From e5d6468fe9d8dced9af0c548a359a7dbeb31c931 Mon Sep 17 00:00:00 2001
From: Rocky Liao <rjliao@codeaurora.org>
Date: Wed, 25 Mar 2020 10:26:37 +0800
Subject: Bluetooth: hci_qca: Add support for Qualcomm Bluetooth SoC QCA6390

This patch adds support for QCA6390, including the devicetree and acpi
compatible hwid matching, and patch/nvm downloading.

Signed-off-by: Rocky Liao <rjliao@codeaurora.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btqca.c   | 18 +++++++++++++-----
 drivers/bluetooth/btqca.h   |  3 ++-
 drivers/bluetooth/hci_qca.c | 40 +++++++++++++++++++++++++++++++++-------
 3 files changed, 48 insertions(+), 13 deletions(-)

diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c
index a16845c0751d..3ea866d44568 100644
--- a/drivers/bluetooth/btqca.c
+++ b/drivers/bluetooth/btqca.c
@@ -32,7 +32,7 @@ int qca_read_soc_version(struct hci_dev *hdev, u32 *soc_version,
 	 * VSE event. WCN3991 sends version command response as a payload to
 	 * command complete event.
 	 */
-	if (soc_type == QCA_WCN3991) {
+	if (soc_type >= QCA_WCN3991) {
 		event_type = 0;
 		rlen += 1;
 		rtype = EDL_PATCH_VER_REQ_CMD;
@@ -69,7 +69,7 @@ int qca_read_soc_version(struct hci_dev *hdev, u32 *soc_version,
 		goto out;
 	}
 
-	if (soc_type == QCA_WCN3991)
+	if (soc_type >= QCA_WCN3991)
 		memmove(&edl->data, &edl->data[1], sizeof(*ver));
 
 	ver = (struct qca_btsoc_version *)(edl->data);
@@ -217,7 +217,7 @@ static void qca_tlv_check_data(struct qca_fw_config *config,
 				tlv_nvm->data[0] |= 0x80;
 
 				/* UART Baud Rate */
-				if (soc_type == QCA_WCN3991)
+				if (soc_type >= QCA_WCN3991)
 					tlv_nvm->data[1] = nvm_baud_rate;
 				else
 					tlv_nvm->data[2] = nvm_baud_rate;
@@ -268,7 +268,7 @@ static int qca_tlv_send_segment(struct hci_dev *hdev, int seg_size,
 	 * VSE event. WCN3991 sends version command response as a payload to
 	 * command complete event.
 	 */
-	if (soc_type == QCA_WCN3991) {
+	if (soc_type >= QCA_WCN3991) {
 		event_type = 0;
 		rlen = sizeof(*edl);
 		rtype = EDL_PATCH_TLV_REQ_CMD;
@@ -301,7 +301,7 @@ static int qca_tlv_send_segment(struct hci_dev *hdev, int seg_size,
 		err = -EIO;
 	}
 
-	if (soc_type == QCA_WCN3991)
+	if (soc_type >= QCA_WCN3991)
 		goto out;
 
 	tlv_resp = (struct tlv_seg_resp *)(edl->data);
@@ -442,6 +442,11 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate,
 			    (soc_ver & 0x0000000f);
 		snprintf(config.fwname, sizeof(config.fwname),
 			 "qca/crbtfw%02x.tlv", rom_ver);
+	} else if (soc_type == QCA_QCA6390) {
+		rom_ver = ((soc_ver & 0x00000f00) >> 0x04) |
+			    (soc_ver & 0x0000000f);
+		snprintf(config.fwname, sizeof(config.fwname),
+			 "qca/htbtfw%02x.tlv", rom_ver);
 	} else {
 		snprintf(config.fwname, sizeof(config.fwname),
 			 "qca/rampatch_%08x.bin", soc_ver);
@@ -464,6 +469,9 @@ int qca_uart_setup(struct hci_dev *hdev, uint8_t baudrate,
 	else if (qca_is_wcn399x(soc_type))
 		snprintf(config.fwname, sizeof(config.fwname),
 			 "qca/crnv%02x.bin", rom_ver);
+	else if (soc_type == QCA_QCA6390)
+		snprintf(config.fwname, sizeof(config.fwname),
+			 "qca/htnv%02x.bin", rom_ver);
 	else
 		snprintf(config.fwname, sizeof(config.fwname),
 			 "qca/nvm_%08x.bin", soc_ver);
diff --git a/drivers/bluetooth/btqca.h b/drivers/bluetooth/btqca.h
index e16a4d650597..6e1e62dd4b95 100644
--- a/drivers/bluetooth/btqca.h
+++ b/drivers/bluetooth/btqca.h
@@ -125,8 +125,9 @@ enum qca_btsoc_type {
 	QCA_AR3002,
 	QCA_ROME,
 	QCA_WCN3990,
-	QCA_WCN3991,
 	QCA_WCN3998,
+	QCA_WCN3991,
+	QCA_QCA6390,
 };
 
 #if IS_ENABLED(CONFIG_BT_QCA)
diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
index 439392b1c043..d0ac554584a4 100644
--- a/drivers/bluetooth/hci_qca.c
+++ b/drivers/bluetooth/hci_qca.c
@@ -26,6 +26,7 @@
 #include <linux/mod_devicetable.h>
 #include <linux/module.h>
 #include <linux/of_device.h>
+#include <linux/acpi.h>
 #include <linux/platform_device.h>
 #include <linux/regulator/consumer.h>
 #include <linux/serdev.h>
@@ -1596,7 +1597,7 @@ static int qca_setup(struct hci_uart *hu)
 	set_bit(HCI_QUIRK_SIMULTANEOUS_DISCOVERY, &hdev->quirks);
 
 	bt_dev_info(hdev, "setting up %s",
-		qca_is_wcn399x(soc_type) ? "wcn399x" : "ROME");
+		qca_is_wcn399x(soc_type) ? "wcn399x" : "ROME/QCA6390");
 
 retry:
 	ret = qca_power_on(hdev);
@@ -1665,10 +1666,10 @@ retry:
 	}
 
 	/* Setup bdaddr */
-	if (qca_is_wcn399x(soc_type))
-		hu->hdev->set_bdaddr = qca_set_bdaddr;
-	else
+	if (soc_type == QCA_ROME)
 		hu->hdev->set_bdaddr = qca_set_bdaddr_rome;
+	else
+		hu->hdev->set_bdaddr = qca_set_bdaddr;
 
 	return ret;
 }
@@ -1721,6 +1722,11 @@ static const struct qca_vreg_data qca_soc_data_wcn3998 = {
 	.num_vregs = 4,
 };
 
+static const struct qca_vreg_data qca_soc_data_qca6390 = {
+	.soc_type = QCA_QCA6390,
+	.num_vregs = 0,
+};
+
 static void qca_power_shutdown(struct hci_uart *hu)
 {
 	struct qca_serdev *qcadev;
@@ -1764,7 +1770,7 @@ static int qca_power_off(struct hci_dev *hdev)
 	enum qca_btsoc_type soc_type = qca_soc_type(hu);
 
 	/* Stop sending shutdown command if soc crashes. */
-	if (qca_is_wcn399x(soc_type)
+	if (soc_type != QCA_ROME
 		&& qca->memdump_state == QCA_MEMDUMP_IDLE) {
 		qca_send_pre_shutdown_cmd(hdev);
 		usleep_range(8000, 10000);
@@ -1900,7 +1906,11 @@ static int qca_serdev_probe(struct serdev_device *serdev)
 			return err;
 		}
 	} else {
-		qcadev->btsoc_type = QCA_ROME;
+		if (data)
+			qcadev->btsoc_type = data->soc_type;
+		else
+			qcadev->btsoc_type = QCA_ROME;
+
 		qcadev->bt_en = devm_gpiod_get_optional(&serdev->dev, "enable",
 					       GPIOD_OUT_LOW);
 		if (!qcadev->bt_en) {
@@ -2044,21 +2054,37 @@ static int __maybe_unused qca_resume(struct device *dev)
 
 static SIMPLE_DEV_PM_OPS(qca_pm_ops, qca_suspend, qca_resume);
 
+#ifdef CONFIG_OF
 static const struct of_device_id qca_bluetooth_of_match[] = {
 	{ .compatible = "qcom,qca6174-bt" },
+	{ .compatible = "qcom,qca6390-bt", .data = &qca_soc_data_qca6390},
 	{ .compatible = "qcom,wcn3990-bt", .data = &qca_soc_data_wcn3990},
 	{ .compatible = "qcom,wcn3991-bt", .data = &qca_soc_data_wcn3991},
 	{ .compatible = "qcom,wcn3998-bt", .data = &qca_soc_data_wcn3998},
 	{ /* sentinel */ }
 };
 MODULE_DEVICE_TABLE(of, qca_bluetooth_of_match);
+#endif
+
+#ifdef CONFIG_ACPI
+static const struct acpi_device_id qca_bluetooth_acpi_match[] = {
+	{ "QCOM6390", (kernel_ulong_t)&qca_soc_data_qca6390 },
+	{ "DLA16390", (kernel_ulong_t)&qca_soc_data_qca6390 },
+	{ "DLB16390", (kernel_ulong_t)&qca_soc_data_qca6390 },
+	{ "DLB26390", (kernel_ulong_t)&qca_soc_data_qca6390 },
+	{ },
+};
+MODULE_DEVICE_TABLE(acpi, qca_bluetooth_acpi_match);
+#endif
+
 
 static struct serdev_device_driver qca_serdev_driver = {
 	.probe = qca_serdev_probe,
 	.remove = qca_serdev_remove,
 	.driver = {
 		.name = "hci_uart_qca",
-		.of_match_table = qca_bluetooth_of_match,
+		.of_match_table = of_match_ptr(qca_bluetooth_of_match),
+		.acpi_match_table = ACPI_PTR(qca_bluetooth_acpi_match),
 		.pm = &qca_pm_ops,
 	},
 };
-- 
cgit v1.2.3-59-g8ed1b


From 139dfad6cfa0ff816ea06d70132b164a44257c12 Mon Sep 17 00:00:00 2001
From: Rocky Liao <rjliao@codeaurora.org>
Date: Wed, 25 Mar 2020 10:26:38 +0800
Subject: dt-bindings: net: bluetooth: Add device tree bindings for QCA chip
 QCA6390

This patch adds compatible string for the QCA chip QCA6390.

Signed-off-by: Rocky Liao <rjliao@codeaurora.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt b/Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt
index beca6466d59a..badf597c0e58 100644
--- a/Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt
+++ b/Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt
@@ -13,6 +13,7 @@ Required properties:
    * "qcom,wcn3990-bt"
    * "qcom,wcn3991-bt"
    * "qcom,wcn3998-bt"
+   * "qcom,qca6390-bt"
 
 Optional properties for compatible string qcom,qca6174-bt:
 
-- 
cgit v1.2.3-59-g8ed1b


From b86b0b150fed840c376145383ef5105116c81b0c Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Fri, 27 Mar 2020 11:32:14 -0700
Subject: Bluetooth: L2CAP: Fix handling LE modes by L2CAP_OPTIONS

L2CAP_OPTIONS shall only be used with BR/EDR modes.

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap_sock.c | 25 +++++++++++++++++++++----
 1 file changed, 21 insertions(+), 4 deletions(-)

diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 117ba20ea194..cfb402645c26 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -424,6 +424,20 @@ static int l2cap_sock_getsockopt_old(struct socket *sock, int optname,
 			break;
 		}
 
+		/* Only BR/EDR modes are supported here */
+		switch (chan->mode) {
+		case L2CAP_MODE_BASIC:
+		case L2CAP_MODE_ERTM:
+		case L2CAP_MODE_STREAMING:
+			break;
+		default:
+			err = -EINVAL;
+			break;
+		}
+
+		if (err < 0)
+			break;
+
 		memset(&opts, 0, sizeof(opts));
 		opts.imtu     = chan->imtu;
 		opts.omtu     = chan->omtu;
@@ -698,10 +712,8 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname,
 			break;
 		}
 
-		chan->mode = opts.mode;
-		switch (chan->mode) {
-		case L2CAP_MODE_LE_FLOWCTL:
-			break;
+		/* Only BR/EDR modes are supported here */
+		switch (opts.mode) {
 		case L2CAP_MODE_BASIC:
 			clear_bit(CONF_STATE2_DEVICE, &chan->conf_state);
 			break;
@@ -715,6 +727,11 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname,
 			break;
 		}
 
+		if (err < 0)
+			break;
+
+		chan->mode = opts.mode;
+
 		BT_DBG("mode 0x%2.2x", chan->mode);
 
 		chan->imtu = opts.imtu;
-- 
cgit v1.2.3-59-g8ed1b


From 3ee7b7cd83900bb711efadbf16fa096a615a1566 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Fri, 27 Mar 2020 11:32:15 -0700
Subject: Bluetooth: Add BT_MODE socket option

This adds BT_MODE socket option which can be used to set L2CAP modes,
including modes only supported over LE which were not supported using
the L2CAP_OPTIONS.

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/bluetooth.h |   8 +++
 net/bluetooth/l2cap_sock.c        | 113 +++++++++++++++++++++++++++++++++++++-
 2 files changed, 120 insertions(+), 1 deletion(-)

diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index 1576353a2773..3fa7b1e3c5d9 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -139,6 +139,14 @@ struct bt_voice {
 #define BT_PHY_LE_CODED_TX	0x00002000
 #define BT_PHY_LE_CODED_RX	0x00004000
 
+#define BT_MODE			15
+
+#define BT_MODE_BASIC		0x00
+#define BT_MODE_ERTM		0x01
+#define BT_MODE_STREAMING	0x02
+#define BT_MODE_LE_FLOWCTL	0x03
+#define BT_MODE_EXT_FLOWCTL	0x04
+
 __printf(1, 2)
 void bt_info(const char *fmt, ...);
 __printf(1, 2)
diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index cfb402645c26..1cea42ee1e92 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -395,6 +395,24 @@ static int l2cap_sock_getname(struct socket *sock, struct sockaddr *addr,
 	return sizeof(struct sockaddr_l2);
 }
 
+static int l2cap_get_mode(struct l2cap_chan *chan)
+{
+	switch (chan->mode) {
+	case L2CAP_MODE_BASIC:
+		return BT_MODE_BASIC;
+	case L2CAP_MODE_ERTM:
+		return BT_MODE_ERTM;
+	case L2CAP_MODE_STREAMING:
+		return BT_MODE_STREAMING;
+	case L2CAP_MODE_LE_FLOWCTL:
+		return BT_MODE_LE_FLOWCTL;
+	case L2CAP_MODE_EXT_FLOWCTL:
+		return BT_MODE_EXT_FLOWCTL;
+	}
+
+	return -EINVAL;
+}
+
 static int l2cap_sock_getsockopt_old(struct socket *sock, int optname,
 				     char __user *optval, int __user *optlen)
 {
@@ -522,7 +540,7 @@ static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname,
 	struct bt_security sec;
 	struct bt_power pwr;
 	u32 phys;
-	int len, err = 0;
+	int len, mode, err = 0;
 
 	BT_DBG("sk %p", sk);
 
@@ -638,6 +656,27 @@ static int l2cap_sock_getsockopt(struct socket *sock, int level, int optname,
 			err = -EFAULT;
 		break;
 
+	case BT_MODE:
+		if (!enable_ecred) {
+			err = -ENOPROTOOPT;
+			break;
+		}
+
+		if (chan->chan_type != L2CAP_CHAN_CONN_ORIENTED) {
+			err = -EINVAL;
+			break;
+		}
+
+		mode = l2cap_get_mode(chan);
+		if (mode < 0) {
+			err = mode;
+			break;
+		}
+
+		if (put_user(mode, (u8 __user *) optval))
+			err = -EFAULT;
+		break;
+
 	default:
 		err = -ENOPROTOOPT;
 		break;
@@ -780,6 +819,45 @@ static int l2cap_sock_setsockopt_old(struct socket *sock, int optname,
 	return err;
 }
 
+static int l2cap_set_mode(struct l2cap_chan *chan, u8 mode)
+{
+	switch (mode) {
+	case BT_MODE_BASIC:
+		if (bdaddr_type_is_le(chan->src_type))
+			return -EINVAL;
+		mode = L2CAP_MODE_BASIC;
+		clear_bit(CONF_STATE2_DEVICE, &chan->conf_state);
+		break;
+	case BT_MODE_ERTM:
+		if (!disable_ertm || bdaddr_type_is_le(chan->src_type))
+			return -EINVAL;
+		mode = L2CAP_MODE_ERTM;
+		break;
+	case BT_MODE_STREAMING:
+		if (!disable_ertm || bdaddr_type_is_le(chan->src_type))
+			return -EINVAL;
+		mode = L2CAP_MODE_STREAMING;
+		break;
+	case BT_MODE_LE_FLOWCTL:
+		if (!bdaddr_type_is_le(chan->src_type))
+			return -EINVAL;
+		mode = L2CAP_MODE_LE_FLOWCTL;
+		break;
+	case BT_MODE_EXT_FLOWCTL:
+		/* TODO: Add support for ECRED PDUs to BR/EDR */
+		if (!bdaddr_type_is_le(chan->src_type))
+			return -EINVAL;
+		mode = L2CAP_MODE_EXT_FLOWCTL;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	chan->mode = mode;
+
+	return 0;
+}
+
 static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
 				 char __user *optval, unsigned int optlen)
 {
@@ -985,6 +1063,39 @@ static int l2cap_sock_setsockopt(struct socket *sock, int level, int optname,
 
 		break;
 
+	case BT_MODE:
+		if (!enable_ecred) {
+			err = -ENOPROTOOPT;
+			break;
+		}
+
+		BT_DBG("sk->sk_state %u", sk->sk_state);
+
+		if (sk->sk_state != BT_BOUND) {
+			err = -EINVAL;
+			break;
+		}
+
+		if (chan->chan_type != L2CAP_CHAN_CONN_ORIENTED) {
+			err = -EINVAL;
+			break;
+		}
+
+		if (get_user(opt, (u8 __user *) optval)) {
+			err = -EFAULT;
+			break;
+		}
+
+		BT_DBG("opt %u", opt);
+
+		err = l2cap_set_mode(chan, opt);
+		if (err)
+			break;
+
+		BT_DBG("mode 0x%2.2x", chan->mode);
+
+		break;
+
 	default:
 		err = -ENOPROTOOPT;
 		break;
-- 
cgit v1.2.3-59-g8ed1b


From 92516cd97fd4d8ad5b1421a0d51771044f453a5f Mon Sep 17 00:00:00 2001
From: Sonny Sasaka <sonnysasaka@gmail.com>
Date: Fri, 27 Mar 2020 17:34:23 -0700
Subject: Bluetooth: Always request for user confirmation for Just Works

To improve security, always give the user-space daemon a chance to
accept or reject a Just Works pairing (LE). The daemon may decide to
auto-accept based on the user's intent.

Signed-off-by: Sonny Sasaka <sonnysasaka@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/smp.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 1476a91ce935..d0b695ee49f6 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -855,6 +855,7 @@ static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth,
 	struct smp_chan *smp = chan->data;
 	u32 passkey = 0;
 	int ret = 0;
+	int err;
 
 	/* Initialize key for JUST WORKS */
 	memset(smp->tk, 0, sizeof(smp->tk));
@@ -883,9 +884,16 @@ static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth,
 	    hcon->io_capability == HCI_IO_NO_INPUT_OUTPUT)
 		smp->method = JUST_WORKS;
 
-	/* If Just Works, Continue with Zero TK */
+	/* If Just Works, Continue with Zero TK and ask user-space for
+	 * confirmation */
 	if (smp->method == JUST_WORKS) {
-		set_bit(SMP_FLAG_TK_VALID, &smp->flags);
+		err = mgmt_user_confirm_request(hcon->hdev, &hcon->dst,
+						hcon->type,
+						hcon->dst_type,
+						passkey, 1);
+		if (err)
+			return SMP_UNSPECIFIED;
+		set_bit(SMP_FLAG_WAIT_USER, &smp->flags);
 		return 0;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From b25e4df4a83e516efbdeeefb5b2d3e259639a56e Mon Sep 17 00:00:00 2001
From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Date: Thu, 2 Apr 2020 14:55:18 +0200
Subject: Bluetooth: hci_bcm: respect IRQ polarity from DT
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The IRQ polarity is be configured in bcm_setup_sleep(). Make the
configured value match what is in the DeviceTree.

Cc: stable@vger.kernel.org
Fixes: f25a96c8eb46 ("Bluetooth: hci_bcm: enable IRQ capability from devicetree")
Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/hci_bcm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c
index b236cb11c0dc..36b7f0d00c4b 100644
--- a/drivers/bluetooth/hci_bcm.c
+++ b/drivers/bluetooth/hci_bcm.c
@@ -1153,7 +1153,8 @@ static int bcm_of_probe(struct bcm_device *bdev)
 	device_property_read_u8_array(bdev->dev, "brcm,bt-pcm-int-params",
 				      bdev->pcm_int_params, 5);
 	bdev->irq = of_irq_get_byname(bdev->dev->of_node, "host-wakeup");
-
+	bdev->irq_active_low = irq_get_trigger_type(bdev->irq)
+			     & (IRQ_TYPE_EDGE_FALLING | IRQ_TYPE_LEVEL_LOW);
 	return 0;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 81bd5d0c62437c02caac6b3f942fcda874063cb0 Mon Sep 17 00:00:00 2001
From: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Date: Thu, 2 Apr 2020 14:55:20 +0200
Subject: Bluetooth: hci_bcm: fix freeing not-requested IRQ
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When BT module can't be initialized, but it has an IRQ, unloading
the driver WARNs when trying to free not-yet-requested IRQ. Fix it by
noting whether the IRQ was requested.

WARNING: CPU: 2 PID: 214 at kernel/irq/devres.c:144 devm_free_irq+0x49/0x4ca
[...]
WARNING: CPU: 2 PID: 214 at kernel/irq/manage.c:1746 __free_irq+0x8b/0x27c
Trying to free already-free IRQ 264
Modules linked in: hci_uart(-) btbcm bluetooth ecdh_generic ecc libaes
CPU: 2 PID: 214 Comm: rmmod Tainted: G        W         5.6.1mq-00044-ga5f9ea098318-dirty #928
[...]
[<b016aefb>] (devm_free_irq) from [<af8ba1ff>] (bcm_close+0x97/0x118 [hci_uart])
[<af8ba1ff>] (bcm_close [hci_uart]) from [<af8b736f>] (hci_uart_unregister_device+0x33/0x3c [hci_uart])
[<af8b736f>] (hci_uart_unregister_device [hci_uart]) from [<b035930b>] (serdev_drv_remove+0x13/0x20)
[<b035930b>] (serdev_drv_remove) from [<b037093b>] (device_release_driver_internal+0x97/0x118)
[<b037093b>] (device_release_driver_internal) from [<b0370a0b>] (driver_detach+0x2f/0x58)
[<b0370a0b>] (driver_detach) from [<b036f855>] (bus_remove_driver+0x41/0x94)
[<b036f855>] (bus_remove_driver) from [<af8ba8db>] (bcm_deinit+0x1b/0x740 [hci_uart])
[<af8ba8db>] (bcm_deinit [hci_uart]) from [<af8ba86f>] (hci_uart_exit+0x13/0x30 [hci_uart])
[<af8ba86f>] (hci_uart_exit [hci_uart]) from [<b01900bd>] (sys_delete_module+0x109/0x1d0)
[<b01900bd>] (sys_delete_module) from [<b0101001>] (ret_fast_syscall+0x1/0x5a)
[...]

Cc: stable@vger.kernel.org
Fixes: 6cc4396c8829 ("Bluetooth: hci_bcm: Add wake-up capability")
Signed-off-by: Michał Mirosław <mirq-linux@rere.qmqm.pl>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/hci_bcm.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c
index 36b7f0d00c4b..19e4587f366c 100644
--- a/drivers/bluetooth/hci_bcm.c
+++ b/drivers/bluetooth/hci_bcm.c
@@ -118,6 +118,7 @@ struct bcm_device {
 	u32			oper_speed;
 	int			irq;
 	bool			irq_active_low;
+	bool			irq_acquired;
 
 #ifdef CONFIG_PM
 	struct hci_uart		*hu;
@@ -333,6 +334,8 @@ static int bcm_request_irq(struct bcm_data *bcm)
 		goto unlock;
 	}
 
+	bdev->irq_acquired = true;
+
 	device_init_wakeup(bdev->dev, true);
 
 	pm_runtime_set_autosuspend_delay(bdev->dev,
@@ -514,7 +517,7 @@ static int bcm_close(struct hci_uart *hu)
 	}
 
 	if (bdev) {
-		if (IS_ENABLED(CONFIG_PM) && bdev->irq > 0) {
+		if (IS_ENABLED(CONFIG_PM) && bdev->irq_acquired) {
 			devm_free_irq(bdev->dev, bdev->irq, bdev);
 			device_init_wakeup(bdev->dev, false);
 			pm_runtime_disable(bdev->dev);
-- 
cgit v1.2.3-59-g8ed1b


From 7fedd3bb6b77f9b6eefb0e4dcd8f79d0d00b86d7 Mon Sep 17 00:00:00 2001
From: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
Date: Mon, 23 Mar 2020 12:45:07 -0700
Subject: Bluetooth: Prioritize SCO traffic

When scheduling TX packets, send all SCO/eSCO packets first, check for
pending SCO/eSCO packets after every ACL/LE packet and send them if any
are pending.  This is done to make sure that we can meet SCO deadlines
on slow interfaces like UART.

If we were to queue up multiple ACL packets without checking for a SCO
packet, we might miss the SCO timing. For example:

The time it takes to send a maximum size ACL packet (1024 bytes):
t = 10/8 * 1024 bytes * 8 bits/byte * 1 packet / baudrate
        where 10/8 is uart overhead due to start/stop bits per byte

Replace t = 3.75ms (SCO deadline), which gives us a baudrate of 2730666.

At a baudrate of 3000000, if we didn't check for SCO packets within 1024
bytes, we would miss the 3.75ms timing window.

Signed-off-by: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_core.c | 106 +++++++++++++++++++++++++----------------------
 1 file changed, 57 insertions(+), 49 deletions(-)

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 2e7bc2da8371..5fb9db0b2b7b 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -4240,6 +4240,54 @@ static void __check_timeout(struct hci_dev *hdev, unsigned int cnt)
 	}
 }
 
+/* Schedule SCO */
+static void hci_sched_sco(struct hci_dev *hdev)
+{
+	struct hci_conn *conn;
+	struct sk_buff *skb;
+	int quote;
+
+	BT_DBG("%s", hdev->name);
+
+	if (!hci_conn_num(hdev, SCO_LINK))
+		return;
+
+	while (hdev->sco_cnt && (conn = hci_low_sent(hdev, SCO_LINK, &quote))) {
+		while (quote-- && (skb = skb_dequeue(&conn->data_q))) {
+			BT_DBG("skb %p len %d", skb, skb->len);
+			hci_send_frame(hdev, skb);
+
+			conn->sent++;
+			if (conn->sent == ~0)
+				conn->sent = 0;
+		}
+	}
+}
+
+static void hci_sched_esco(struct hci_dev *hdev)
+{
+	struct hci_conn *conn;
+	struct sk_buff *skb;
+	int quote;
+
+	BT_DBG("%s", hdev->name);
+
+	if (!hci_conn_num(hdev, ESCO_LINK))
+		return;
+
+	while (hdev->sco_cnt && (conn = hci_low_sent(hdev, ESCO_LINK,
+						     &quote))) {
+		while (quote-- && (skb = skb_dequeue(&conn->data_q))) {
+			BT_DBG("skb %p len %d", skb, skb->len);
+			hci_send_frame(hdev, skb);
+
+			conn->sent++;
+			if (conn->sent == ~0)
+				conn->sent = 0;
+		}
+	}
+}
+
 static void hci_sched_acl_pkt(struct hci_dev *hdev)
 {
 	unsigned int cnt = hdev->acl_cnt;
@@ -4271,6 +4319,10 @@ static void hci_sched_acl_pkt(struct hci_dev *hdev)
 			hdev->acl_cnt--;
 			chan->sent++;
 			chan->conn->sent++;
+
+			/* Send pending SCO packets right away */
+			hci_sched_sco(hdev);
+			hci_sched_esco(hdev);
 		}
 	}
 
@@ -4355,54 +4407,6 @@ static void hci_sched_acl(struct hci_dev *hdev)
 	}
 }
 
-/* Schedule SCO */
-static void hci_sched_sco(struct hci_dev *hdev)
-{
-	struct hci_conn *conn;
-	struct sk_buff *skb;
-	int quote;
-
-	BT_DBG("%s", hdev->name);
-
-	if (!hci_conn_num(hdev, SCO_LINK))
-		return;
-
-	while (hdev->sco_cnt && (conn = hci_low_sent(hdev, SCO_LINK, &quote))) {
-		while (quote-- && (skb = skb_dequeue(&conn->data_q))) {
-			BT_DBG("skb %p len %d", skb, skb->len);
-			hci_send_frame(hdev, skb);
-
-			conn->sent++;
-			if (conn->sent == ~0)
-				conn->sent = 0;
-		}
-	}
-}
-
-static void hci_sched_esco(struct hci_dev *hdev)
-{
-	struct hci_conn *conn;
-	struct sk_buff *skb;
-	int quote;
-
-	BT_DBG("%s", hdev->name);
-
-	if (!hci_conn_num(hdev, ESCO_LINK))
-		return;
-
-	while (hdev->sco_cnt && (conn = hci_low_sent(hdev, ESCO_LINK,
-						     &quote))) {
-		while (quote-- && (skb = skb_dequeue(&conn->data_q))) {
-			BT_DBG("skb %p len %d", skb, skb->len);
-			hci_send_frame(hdev, skb);
-
-			conn->sent++;
-			if (conn->sent == ~0)
-				conn->sent = 0;
-		}
-	}
-}
-
 static void hci_sched_le(struct hci_dev *hdev)
 {
 	struct hci_chan *chan;
@@ -4437,6 +4441,10 @@ static void hci_sched_le(struct hci_dev *hdev)
 			cnt--;
 			chan->sent++;
 			chan->conn->sent++;
+
+			/* Send pending SCO packets right away */
+			hci_sched_sco(hdev);
+			hci_sched_esco(hdev);
 		}
 	}
 
@@ -4459,9 +4467,9 @@ static void hci_tx_work(struct work_struct *work)
 
 	if (!hci_dev_test_flag(hdev, HCI_USER_CHANNEL)) {
 		/* Schedule queues and send stuff to HCI driver */
-		hci_sched_acl(hdev);
 		hci_sched_sco(hdev);
 		hci_sched_esco(hdev);
+		hci_sched_acl(hdev);
 		hci_sched_le(hdev);
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 1e5479be46a70389e1059818a2e9358858eaa5fc Mon Sep 17 00:00:00 2001
From: Alain Michaud <alainm@chromium.org>
Date: Fri, 3 Apr 2020 13:49:05 +0000
Subject: Bluetooth: fixing minor typo in comment

This changes a simple typo in hci_event.c

Signed-off-by: Alain Michaud <alainm@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_event.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 0a591be8b0ae..ddf77304aa8e 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -5269,7 +5269,7 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev,
 		case HCI_AUTO_CONN_ALWAYS:
 			/* Devices advertising with ADV_IND or ADV_DIRECT_IND
 			 * are triggering a connection attempt. This means
-			 * that incoming connectioms from slave device are
+			 * that incoming connections from slave device are
 			 * accepted and also outgoing connections to slave
 			 * devices are established when found.
 			 */
-- 
cgit v1.2.3-59-g8ed1b


From 1f8330ea1692c9c490b1e566e31d96d8cef99dd8 Mon Sep 17 00:00:00 2001
From: Sathish Narsimman <nsathish41@gmail.com>
Date: Fri, 3 Apr 2020 21:43:58 +0200
Subject: Bluetooth: add support to notify using SCO air mode

notifying using HCI_NOTIFY_CONN_ADD for SCO connection is generic in
case of mSBC audio. To differntiate SCO air mode introducing
HCI_NOTIFY_ENABLE_SCO_CVSD and HCI_NOTIFY_ENABLE_SCO_TRANSP.

Signed-off-by: Sathish Narsimman <sathish.narasimman@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci.h |  3 +++
 net/bluetooth/hci_conn.c    | 25 +++++++++++++++++++++----
 net/bluetooth/hci_event.c   | 23 ++++++++++++++++++++++-
 3 files changed, 46 insertions(+), 5 deletions(-)

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 5f60e135aeb6..9ff2f7a9e131 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -53,6 +53,9 @@
 #define HCI_NOTIFY_CONN_ADD		1
 #define HCI_NOTIFY_CONN_DEL		2
 #define HCI_NOTIFY_VOICE_SETTING	3
+#define HCI_NOTIFY_ENABLE_SCO_CVSD	4
+#define HCI_NOTIFY_ENABLE_SCO_TRANSP	5
+#define HCI_NOTIFY_DISABLE_SCO		6
 
 /* HCI bus types */
 #define HCI_VIRTUAL	0
diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index e245bc155cc2..07c34c55fc50 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -122,8 +122,18 @@ static void hci_conn_cleanup(struct hci_conn *conn)
 
 	hci_conn_hash_del(hdev, conn);
 
-	if (hdev->notify)
-		hdev->notify(hdev, HCI_NOTIFY_CONN_DEL);
+	if (conn->type == SCO_LINK || conn->type == ESCO_LINK) {
+		switch (conn->setting & SCO_AIRMODE_MASK) {
+		case SCO_AIRMODE_CVSD:
+		case SCO_AIRMODE_TRANSP:
+			if (hdev->notify)
+				hdev->notify(hdev, HCI_NOTIFY_DISABLE_SCO);
+			break;
+		}
+	} else {
+		if (hdev->notify)
+			hdev->notify(hdev, HCI_NOTIFY_CONN_DEL);
+	}
 
 	hci_conn_del_sysfs(conn);
 
@@ -577,8 +587,15 @@ struct hci_conn *hci_conn_add(struct hci_dev *hdev, int type, bdaddr_t *dst,
 	hci_dev_hold(hdev);
 
 	hci_conn_hash_add(hdev, conn);
-	if (hdev->notify)
-		hdev->notify(hdev, HCI_NOTIFY_CONN_ADD);
+
+	/* The SCO and eSCO connections will only be notified when their
+	 * setup has been completed. This is different to ACL links which
+	 * can be notified right away.
+	 */
+	if (conn->type != SCO_LINK && conn->type != ESCO_LINK) {
+		if (hdev->notify)
+			hdev->notify(hdev, HCI_NOTIFY_CONN_ADD);
+	}
 
 	hci_conn_init_sysfs(conn);
 
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index ddf77304aa8e..af396cb69602 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -2607,8 +2607,16 @@ static void hci_conn_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 	if (ev->status) {
 		hci_connect_cfm(conn, ev->status);
 		hci_conn_del(conn);
-	} else if (ev->link_type != ACL_LINK)
+	} else if (ev->link_type == SCO_LINK) {
+		switch (conn->setting & SCO_AIRMODE_MASK) {
+		case SCO_AIRMODE_CVSD:
+			if (hdev->notify)
+				hdev->notify(hdev, HCI_NOTIFY_ENABLE_SCO_CVSD);
+			break;
+		}
+
 		hci_connect_cfm(conn, ev->status);
+	}
 
 unlock:
 	hci_dev_unlock(hdev);
@@ -4307,6 +4315,19 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev,
 		break;
 	}
 
+	bt_dev_dbg(hdev, "SCO connected with air mode: %02x", ev->air_mode);
+
+	switch (conn->setting & SCO_AIRMODE_MASK) {
+	case SCO_AIRMODE_CVSD:
+		if (hdev->notify)
+			hdev->notify(hdev, HCI_NOTIFY_ENABLE_SCO_CVSD);
+		break;
+	case SCO_AIRMODE_TRANSP:
+		if (hdev->notify)
+			hdev->notify(hdev, HCI_NOTIFY_ENABLE_SCO_TRANSP);
+		break;
+	}
+
 	hci_connect_cfm(conn, ev->status);
 	if (ev->status)
 		hci_conn_del(conn);
-- 
cgit v1.2.3-59-g8ed1b


From baac6276c0a9f36f1fe1f00590ef00d2ba5ba626 Mon Sep 17 00:00:00 2001
From: Sathish Narasimman <sathish.narasimman@intel.com>
Date: Fri, 3 Apr 2020 21:43:59 +0200
Subject: Bluetooth: btusb: handle mSBC audio over USB Endpoints

For mSBC encoded audio stream over usb transport, btusb driver
to be set to alternate settings 6 as per BT core spec 5.0. The
type of air mode is used to differenting which alt setting to be
used.

The changes are made considering some discussion over the similar
patch submitted earlier from Kuba Pawlak (link below)
https://www.spinics.net/lists/linux-bluetooth/msg64577.html

Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Sathish Narasimman <sathish.narasimman@intel.com>
Signed-off-by: Chethan T N <chethan.tumkur.narayan@intel.com>
Signed-off-by: Hsin-Yu Chao <hychao@chromium.org>
Signed-off-by: Amit K Bag <amit.k.bag@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 drivers/bluetooth/btusb.c | 156 +++++++++++++++++++++++++++++++++++-----------
 1 file changed, 119 insertions(+), 37 deletions(-)

diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 3bdec42c9612..110e96b245e5 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -492,6 +492,8 @@ struct btusb_data {
 	__u8 cmdreq;
 
 	unsigned int sco_num;
+	unsigned int air_mode;
+	bool usb_alt6_packet_flow;
 	int isoc_altsetting;
 	int suspend_count;
 
@@ -983,6 +985,42 @@ static void btusb_isoc_complete(struct urb *urb)
 	}
 }
 
+static inline void __fill_isoc_descriptor_msbc(struct urb *urb, int len,
+					       int mtu, struct btusb_data *data)
+{
+	int i, offset = 0;
+	unsigned int interval;
+
+	BT_DBG("len %d mtu %d", len, mtu);
+
+	/* For mSBC ALT 6 setting the host will send the packet at continuous
+	 * flow. As per core spec 5, vol 4, part B, table 2.1. For ALT setting
+	 * 6 the HCI PACKET INTERVAL should be 7.5ms for every usb packets.
+	 * To maintain the rate we send 63bytes of usb packets alternatively for
+	 * 7ms and 8ms to maintain the rate as 7.5ms.
+	 */
+	if (data->usb_alt6_packet_flow) {
+		interval = 7;
+		data->usb_alt6_packet_flow = false;
+	} else {
+		interval = 6;
+		data->usb_alt6_packet_flow = true;
+	}
+
+	for (i = 0; i < interval; i++) {
+		urb->iso_frame_desc[i].offset = offset;
+		urb->iso_frame_desc[i].length = offset;
+	}
+
+	if (len && i < BTUSB_MAX_ISOC_FRAMES) {
+		urb->iso_frame_desc[i].offset = offset;
+		urb->iso_frame_desc[i].length = len;
+		i++;
+	}
+
+	urb->number_of_packets = i;
+}
+
 static inline void __fill_isoc_descriptor(struct urb *urb, int len, int mtu)
 {
 	int i, offset = 0;
@@ -1386,9 +1424,13 @@ static struct urb *alloc_isoc_urb(struct hci_dev *hdev, struct sk_buff *skb)
 
 	urb->transfer_flags  = URB_ISO_ASAP;
 
-	__fill_isoc_descriptor(urb, skb->len,
-			       le16_to_cpu(data->isoc_tx_ep->wMaxPacketSize));
-
+	if (data->isoc_altsetting == 6)
+		__fill_isoc_descriptor_msbc(urb, skb->len,
+					    le16_to_cpu(data->isoc_tx_ep->wMaxPacketSize),
+					    data);
+	else
+		__fill_isoc_descriptor(urb, skb->len,
+				       le16_to_cpu(data->isoc_tx_ep->wMaxPacketSize));
 	skb->dev = (void *)hdev;
 
 	return urb;
@@ -1484,6 +1526,7 @@ static void btusb_notify(struct hci_dev *hdev, unsigned int evt)
 
 	if (hci_conn_num(hdev, SCO_LINK) != data->sco_num) {
 		data->sco_num = hci_conn_num(hdev, SCO_LINK);
+		data->air_mode = evt;
 		schedule_work(&data->work);
 	}
 }
@@ -1531,11 +1574,67 @@ static inline int __set_isoc_interface(struct hci_dev *hdev, int altsetting)
 	return 0;
 }
 
+static int btusb_switch_alt_setting(struct hci_dev *hdev, int new_alts)
+{
+	struct btusb_data *data = hci_get_drvdata(hdev);
+	int err;
+
+	if (data->isoc_altsetting != new_alts) {
+		unsigned long flags;
+
+		clear_bit(BTUSB_ISOC_RUNNING, &data->flags);
+		usb_kill_anchored_urbs(&data->isoc_anchor);
+
+		/* When isochronous alternate setting needs to be
+		 * changed, because SCO connection has been added
+		 * or removed, a packet fragment may be left in the
+		 * reassembling state. This could lead to wrongly
+		 * assembled fragments.
+		 *
+		 * Clear outstanding fragment when selecting a new
+		 * alternate setting.
+		 */
+		spin_lock_irqsave(&data->rxlock, flags);
+		kfree_skb(data->sco_skb);
+		data->sco_skb = NULL;
+		spin_unlock_irqrestore(&data->rxlock, flags);
+
+		err = __set_isoc_interface(hdev, new_alts);
+		if (err < 0)
+			return err;
+	}
+
+	if (!test_and_set_bit(BTUSB_ISOC_RUNNING, &data->flags)) {
+		if (btusb_submit_isoc_urb(hdev, GFP_KERNEL) < 0)
+			clear_bit(BTUSB_ISOC_RUNNING, &data->flags);
+		else
+			btusb_submit_isoc_urb(hdev, GFP_KERNEL);
+	}
+
+	return 0;
+}
+
+static struct usb_host_interface *btusb_find_altsetting(struct btusb_data *data,
+							int alt)
+{
+	struct usb_interface *intf = data->isoc;
+	int i;
+
+	BT_DBG("Looking for Alt no :%d", alt);
+
+	for (i = 0; i < intf->num_altsetting; i++) {
+		if (intf->altsetting[i].desc.bAlternateSetting == alt)
+			return &intf->altsetting[i];
+	}
+
+	return NULL;
+}
+
 static void btusb_work(struct work_struct *work)
 {
 	struct btusb_data *data = container_of(work, struct btusb_data, work);
 	struct hci_dev *hdev = data->hdev;
-	int new_alts;
+	int new_alts = 0;
 	int err;
 
 	if (data->sco_num > 0) {
@@ -1550,44 +1649,27 @@ static void btusb_work(struct work_struct *work)
 			set_bit(BTUSB_DID_ISO_RESUME, &data->flags);
 		}
 
-		if (hdev->voice_setting & 0x0020) {
-			static const int alts[3] = { 2, 4, 5 };
-
-			new_alts = alts[data->sco_num - 1];
-		} else {
-			new_alts = data->sco_num;
-		}
-
-		if (data->isoc_altsetting != new_alts) {
-			unsigned long flags;
+		if (data->air_mode == HCI_NOTIFY_ENABLE_SCO_CVSD) {
+			if (hdev->voice_setting & 0x0020) {
+				static const int alts[3] = { 2, 4, 5 };
 
-			clear_bit(BTUSB_ISOC_RUNNING, &data->flags);
-			usb_kill_anchored_urbs(&data->isoc_anchor);
-
-			/* When isochronous alternate setting needs to be
-			 * changed, because SCO connection has been added
-			 * or removed, a packet fragment may be left in the
-			 * reassembling state. This could lead to wrongly
-			 * assembled fragments.
-			 *
-			 * Clear outstanding fragment when selecting a new
-			 * alternate setting.
-			 */
-			spin_lock_irqsave(&data->rxlock, flags);
-			kfree_skb(data->sco_skb);
-			data->sco_skb = NULL;
-			spin_unlock_irqrestore(&data->rxlock, flags);
+				new_alts = alts[data->sco_num - 1];
+			} else {
+				new_alts = data->sco_num;
+			}
+		} else if (data->air_mode == HCI_NOTIFY_ENABLE_SCO_TRANSP) {
 
-			if (__set_isoc_interface(hdev, new_alts) < 0)
-				return;
-		}
+			data->usb_alt6_packet_flow = true;
 
-		if (!test_and_set_bit(BTUSB_ISOC_RUNNING, &data->flags)) {
-			if (btusb_submit_isoc_urb(hdev, GFP_KERNEL) < 0)
-				clear_bit(BTUSB_ISOC_RUNNING, &data->flags);
+			/* Check if Alt 6 is supported for Transparent audio */
+			if (btusb_find_altsetting(data, 6))
+				new_alts = 6;
 			else
-				btusb_submit_isoc_urb(hdev, GFP_KERNEL);
+				bt_dev_err(hdev, "Device does not support ALT setting 6");
 		}
+
+		if (btusb_switch_alt_setting(hdev, new_alts) < 0)
+			bt_dev_err(hdev, "set USB alt:(%d) failed!", new_alts);
 	} else {
 		clear_bit(BTUSB_ISOC_RUNNING, &data->flags);
 		usb_kill_anchored_urbs(&data->isoc_anchor);
-- 
cgit v1.2.3-59-g8ed1b


From 3d2336042ae3555d4b77995402291c5795882d20 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Fri, 3 Apr 2020 21:44:00 +0200
Subject: Bluetooth: Move debugfs configuration above the selftests

This is just a cosmetic clean to move the selftests configuration option
to the bottom of the list of options.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/Kconfig | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig
index 165148c7c4ce..77703216a2e3 100644
--- a/net/bluetooth/Kconfig
+++ b/net/bluetooth/Kconfig
@@ -93,6 +93,14 @@ config BT_LEDS
 	  This option selects a few LED triggers for different
 	  Bluetooth events.
 
+config BT_DEBUGFS
+	bool "Export Bluetooth internals in debugfs"
+	depends on BT && DEBUG_FS
+	default y
+	help
+	  Provide extensive information about internal Bluetooth states
+	  in debugfs.
+
 config BT_SELFTEST
 	bool "Bluetooth self testing support"
 	depends on BT && DEBUG_KERNEL
@@ -120,12 +128,4 @@ config BT_SELFTEST_SMP
 	  Run test cases for SMP cryptographic functionality, including both
 	  legacy SMP as well as the Secure Connections features.
 
-config BT_DEBUGFS
-	bool "Export Bluetooth internals in debugfs"
-	depends on BT && DEBUG_FS
-	default y
-	help
-	  Provide extensive information about internal Bluetooth states
-	  in debugfs.
-
 source "drivers/bluetooth/Kconfig"
-- 
cgit v1.2.3-59-g8ed1b


From 145373cb1b1fcdba2059e945d0aa2613af2e84d1 Mon Sep 17 00:00:00 2001
From: Miao-chen Chou <mcchou@chromium.org>
Date: Fri, 3 Apr 2020 21:44:01 +0200
Subject: Bluetooth: Add framework for Microsoft vendor extension

Micrsoft defined a set for HCI vendor extensions. Check the following
link for details:

https://docs.microsoft.com/en-us/windows-hardware/drivers/bluetooth/microsoft-defined-bluetooth-hci-commands-and-events

This provides the basic framework to enable the extension and read its
supported features. Drivers still have to declare support for this
extension before it can be utilized by the host stack.

Signed-off-by: Miao-chen Chou <mcchou@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci_core.h |  13 ++++
 net/bluetooth/Kconfig            |   7 ++
 net/bluetooth/Makefile           |   1 +
 net/bluetooth/hci_core.c         |   5 ++
 net/bluetooth/hci_event.c        |   5 ++
 net/bluetooth/msft.c             | 141 +++++++++++++++++++++++++++++++++++++++
 net/bluetooth/msft.h             |  18 +++++
 7 files changed, 190 insertions(+)
 create mode 100644 net/bluetooth/msft.c
 create mode 100644 net/bluetooth/msft.h

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index d4e28773d378..3cb0f82d0c83 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -484,6 +484,11 @@ struct hci_dev {
 	struct led_trigger	*power_led;
 #endif
 
+#if IS_ENABLED(CONFIG_BT_MSFTEXT)
+	__u16			msft_opcode;
+	void			*msft_data;
+#endif
+
 	int (*open)(struct hci_dev *hdev);
 	int (*close)(struct hci_dev *hdev);
 	int (*flush)(struct hci_dev *hdev);
@@ -1116,6 +1121,14 @@ int hci_recv_frame(struct hci_dev *hdev, struct sk_buff *skb);
 int hci_recv_diag(struct hci_dev *hdev, struct sk_buff *skb);
 __printf(2, 3) void hci_set_hw_info(struct hci_dev *hdev, const char *fmt, ...);
 __printf(2, 3) void hci_set_fw_info(struct hci_dev *hdev, const char *fmt, ...);
+
+static inline void hci_set_msft_opcode(struct hci_dev *hdev, __u16 opcode)
+{
+#if IS_ENABLED(CONFIG_BT_MSFTEXT)
+	hdev->msft_opcode = opcode;
+#endif
+}
+
 int hci_dev_open(__u16 dev);
 int hci_dev_close(__u16 dev);
 int hci_dev_do_close(struct hci_dev *hdev);
diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig
index 77703216a2e3..9e25c6570170 100644
--- a/net/bluetooth/Kconfig
+++ b/net/bluetooth/Kconfig
@@ -93,6 +93,13 @@ config BT_LEDS
 	  This option selects a few LED triggers for different
 	  Bluetooth events.
 
+config BT_MSFTEXT
+	bool "Enable Microsoft extensions"
+	depends on BT
+	help
+	  This options enables support for the Microsoft defined HCI
+	  vendor extensions.
+
 config BT_DEBUGFS
 	bool "Export Bluetooth internals in debugfs"
 	depends on BT && DEBUG_FS
diff --git a/net/bluetooth/Makefile b/net/bluetooth/Makefile
index fda41c0b4781..41dd541a44a5 100644
--- a/net/bluetooth/Makefile
+++ b/net/bluetooth/Makefile
@@ -19,5 +19,6 @@ bluetooth-y := af_bluetooth.o hci_core.o hci_conn.o hci_event.o mgmt.o \
 bluetooth-$(CONFIG_BT_BREDR) += sco.o
 bluetooth-$(CONFIG_BT_HS) += a2mp.o amp.o
 bluetooth-$(CONFIG_BT_LEDS) += leds.o
+bluetooth-$(CONFIG_BT_MSFTEXT) += msft.o
 bluetooth-$(CONFIG_BT_DEBUGFS) += hci_debugfs.o
 bluetooth-$(CONFIG_BT_SELFTEST) += selftest.o
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 5fb9db0b2b7b..ef0ee3a3d9ed 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -44,6 +44,7 @@
 #include "hci_debugfs.h"
 #include "smp.h"
 #include "leds.h"
+#include "msft.h"
 
 static void hci_rx_work(struct work_struct *work);
 static void hci_cmd_work(struct work_struct *work);
@@ -1563,6 +1564,8 @@ setup_failed:
 	    hci_dev_test_flag(hdev, HCI_VENDOR_DIAG) && hdev->set_diag)
 		ret = hdev->set_diag(hdev, true);
 
+	msft_do_open(hdev);
+
 	clear_bit(HCI_INIT, &hdev->flags);
 
 	if (!ret) {
@@ -1758,6 +1761,8 @@ int hci_dev_do_close(struct hci_dev *hdev)
 
 	hci_sock_dev_event(hdev, HCI_DEV_DOWN);
 
+	msft_do_close(hdev);
+
 	if (hdev->flush)
 		hdev->flush(hdev);
 
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index af396cb69602..2803beaa1c44 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -35,6 +35,7 @@
 #include "a2mp.h"
 #include "amp.h"
 #include "smp.h"
+#include "msft.h"
 
 #define ZERO_KEY "\x00\x00\x00\x00\x00\x00\x00\x00" \
 		 "\x00\x00\x00\x00\x00\x00\x00\x00"
@@ -6166,6 +6167,10 @@ void hci_event_packet(struct hci_dev *hdev, struct sk_buff *skb)
 		hci_num_comp_blocks_evt(hdev, skb);
 		break;
 
+	case HCI_EV_VENDOR:
+		msft_vendor_evt(hdev, skb);
+		break;
+
 	default:
 		BT_DBG("%s event 0x%2.2x", hdev->name, event);
 		break;
diff --git a/net/bluetooth/msft.c b/net/bluetooth/msft.c
new file mode 100644
index 000000000000..d6c4e6b5ae77
--- /dev/null
+++ b/net/bluetooth/msft.c
@@ -0,0 +1,141 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 Google Corporation
+ */
+
+#include <net/bluetooth/bluetooth.h>
+#include <net/bluetooth/hci_core.h>
+
+#include "msft.h"
+
+#define MSFT_OP_READ_SUPPORTED_FEATURES		0x00
+struct msft_cp_read_supported_features {
+	__u8   sub_opcode;
+} __packed;
+struct msft_rp_read_supported_features {
+	__u8   status;
+	__u8   sub_opcode;
+	__le64 features;
+	__u8   evt_prefix_len;
+	__u8   evt_prefix[0];
+} __packed;
+
+struct msft_data {
+	__u64 features;
+	__u8  evt_prefix_len;
+	__u8  *evt_prefix;
+};
+
+static bool read_supported_features(struct hci_dev *hdev,
+				    struct msft_data *msft)
+{
+	struct msft_cp_read_supported_features cp;
+	struct msft_rp_read_supported_features *rp;
+	struct sk_buff *skb;
+
+	cp.sub_opcode = MSFT_OP_READ_SUPPORTED_FEATURES;
+
+	skb = __hci_cmd_sync(hdev, hdev->msft_opcode, sizeof(cp), &cp,
+			     HCI_CMD_TIMEOUT);
+	if (IS_ERR(skb)) {
+		bt_dev_err(hdev, "Failed to read MSFT supported features (%ld)",
+			   PTR_ERR(skb));
+		return false;
+	}
+
+	if (skb->len < sizeof(*rp)) {
+		bt_dev_err(hdev, "MSFT supported features length mismatch");
+		goto failed;
+	}
+
+	rp = (struct msft_rp_read_supported_features *)skb->data;
+
+	if (rp->sub_opcode != MSFT_OP_READ_SUPPORTED_FEATURES)
+		goto failed;
+
+	if (rp->evt_prefix_len > 0) {
+		msft->evt_prefix = kmemdup(rp->evt_prefix, rp->evt_prefix_len,
+					   GFP_KERNEL);
+		if (!msft->evt_prefix)
+			goto failed;
+	}
+
+	msft->evt_prefix_len = rp->evt_prefix_len;
+	msft->features = __le64_to_cpu(rp->features);
+
+	kfree_skb(skb);
+	return true;
+
+failed:
+	kfree_skb(skb);
+	return false;
+}
+
+void msft_do_open(struct hci_dev *hdev)
+{
+	struct msft_data *msft;
+
+	if (hdev->msft_opcode == HCI_OP_NOP)
+		return;
+
+	bt_dev_dbg(hdev, "Initialize MSFT extension");
+
+	msft = kzalloc(sizeof(*msft), GFP_KERNEL);
+	if (!msft)
+		return;
+
+	if (!read_supported_features(hdev, msft)) {
+		kfree(msft);
+		return;
+	}
+
+	hdev->msft_data = msft;
+}
+
+void msft_do_close(struct hci_dev *hdev)
+{
+	struct msft_data *msft = hdev->msft_data;
+
+	if (!msft)
+		return;
+
+	bt_dev_dbg(hdev, "Cleanup of MSFT extension");
+
+	hdev->msft_data = NULL;
+
+	kfree(msft->evt_prefix);
+	kfree(msft);
+}
+
+void msft_vendor_evt(struct hci_dev *hdev, struct sk_buff *skb)
+{
+	struct msft_data *msft = hdev->msft_data;
+	u8 event;
+
+	if (!msft)
+		return;
+
+	/* When the extension has defined an event prefix, check that it
+	 * matches, and otherwise just return.
+	 */
+	if (msft->evt_prefix_len > 0) {
+		if (skb->len < msft->evt_prefix_len)
+			return;
+
+		if (memcmp(skb->data, msft->evt_prefix, msft->evt_prefix_len))
+			return;
+
+		skb_pull(skb, msft->evt_prefix_len);
+	}
+
+	/* Every event starts at least with an event code and the rest of
+	 * the data is variable and depends on the event code.
+	 */
+	if (skb->len < 1)
+		return;
+
+	event = *skb->data;
+	skb_pull(skb, 1);
+
+	bt_dev_dbg(hdev, "MSFT vendor event %u", event);
+}
diff --git a/net/bluetooth/msft.h b/net/bluetooth/msft.h
new file mode 100644
index 000000000000..5aa9130e1f8a
--- /dev/null
+++ b/net/bluetooth/msft.h
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (C) 2020 Google Corporation
+ */
+
+#if IS_ENABLED(CONFIG_BT_MSFTEXT)
+
+void msft_do_open(struct hci_dev *hdev);
+void msft_do_close(struct hci_dev *hdev);
+void msft_vendor_evt(struct hci_dev *hdev, struct sk_buff *skb);
+
+#else
+
+static inline void msft_do_open(struct hci_dev *hdev) {}
+static inline void msft_do_close(struct hci_dev *hdev) {}
+static inline void msft_vendor_evt(struct hci_dev *hdev, struct sk_buff *skb) {}
+
+#endif
-- 
cgit v1.2.3-59-g8ed1b


From 7fd673bcdacc8528c7d9489d31f040eac7cca164 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Fri, 3 Apr 2020 21:44:02 +0200
Subject: Bluetooth: btusb: Enable Intel events even if already in operational
 mode

In case the controller is already in operation mode, the Intel specific
events will not be enabled. Fix this by jumping to a common finish
section that will allow setting final details for the controller.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 drivers/bluetooth/btusb.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 110e96b245e5..43925bdeaa81 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -2334,7 +2334,7 @@ static int btusb_setup_intel_new(struct hci_dev *hdev)
 	if (ver.fw_variant == 0x23) {
 		clear_bit(BTUSB_BOOTLOADER, &data->flags);
 		btintel_check_bdaddr(hdev);
-		return 0;
+		goto finish;
 	}
 
 	/* If the device is not in bootloader mode, then the only possible
@@ -2534,6 +2534,14 @@ done:
 	 */
 	btintel_load_ddc_config(hdev, fwname);
 
+	/* Read the Intel version information after loading the FW  */
+	err = btintel_read_version(hdev, &ver);
+	if (err)
+		return err;
+
+	btintel_version_info(hdev, &ver);
+
+finish:
 	/* Set the event mask for Intel specific vendor events. This enables
 	 * a few extra events that are useful during general operation. It
 	 * does not enable any debugging related events.
@@ -2543,13 +2551,6 @@ done:
 	 */
 	btintel_set_event_mask(hdev, false);
 
-	/* Read the Intel version information after loading the FW  */
-	err = btintel_read_version(hdev, &ver);
-	if (err)
-		return err;
-
-	btintel_version_info(hdev, &ver);
-
 	return 0;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From fc04590e3d39213a22b7afd46c4bd5d95a6cab1f Mon Sep 17 00:00:00 2001
From: Miao-chen Chou <mcchou@chromium.org>
Date: Fri, 3 Apr 2020 21:44:03 +0200
Subject: Bluetooth: btusb: Enable MSFT extension for Intel ThunderPeak devices

The Intel ThundePeak BT controllers support the Microsoft vendor
extension and they are using 0xFC1E for VsMsftOpCode.

< HCI Command: Vendor (0x3f|0x001e) plen 1
        00
> HCI Event: Command Complete (0x0e) plen 15
      Vendor (0x3f|0x001e) ncmd 1
        Status: Success (0x00)
        00 3f 00 00 00 00 00 00 00 01 50

Signed-off-by: Miao-chen Chou <mcchou@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 drivers/bluetooth/btusb.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 43925bdeaa81..09913cadd1ca 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -2542,6 +2542,15 @@ done:
 	btintel_version_info(hdev, &ver);
 
 finish:
+	/* All Intel controllers that support the Microsoft vendor
+	 * extension are using 0xFC1E for VsMsftOpCode.
+	 */
+	switch (ver.hw_variant) {
+	case 0x12:	/* ThP */
+		hci_set_msft_opcode(hdev, 0xFC1E);
+		break;
+	}
+
 	/* Set the event mask for Intel specific vendor events. This enables
 	 * a few extra events that are useful during general operation. It
 	 * does not enable any debugging related events.
-- 
cgit v1.2.3-59-g8ed1b


From a479036041d6a1bcf98f72b16a425e8d45e20ae9 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Fri, 3 Apr 2020 21:44:04 +0200
Subject: Bluetooth: Add support for Read Local Simple Pairing Options

With the Read Local Simple Pairing Options command it is possible to
retrieve the support for max encryption key size supported by the
controller and also if the controller correctly verifies the ECDH public
key during pairing.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Reviewed-by: Alain Michaud <alainm@chromium.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci.h      |  7 +++++++
 include/net/bluetooth/hci_core.h |  2 ++
 net/bluetooth/hci_core.c         |  4 ++++
 net/bluetooth/hci_event.c        | 21 +++++++++++++++++++++
 4 files changed, 34 insertions(+)

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 9ff2f7a9e131..086a9e9d5d03 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -1275,6 +1275,13 @@ struct hci_rp_read_data_block_size {
 
 #define HCI_OP_READ_LOCAL_CODECS	0x100b
 
+#define HCI_OP_READ_LOCAL_PAIRING_OPTS	0x100c
+struct hci_rp_read_local_pairing_opts {
+	__u8     status;
+	__u8     pairing_opts;
+	__u8     max_key_size;
+} __packed;
+
 #define HCI_OP_READ_PAGE_SCAN_ACTIVITY	0x0c1b
 struct hci_rp_read_page_scan_activity {
 	__u8     status;
diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 3cb0f82d0c83..2f3275f1d1c4 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -312,6 +312,8 @@ struct hci_dev {
 	__u16		conn_info_max_age;
 	__u16		auth_payload_timeout;
 	__u8		min_enc_key_size;
+	__u8		max_enc_key_size;
+	__u8		pairing_opts;
 	__u8		ssp_debug_mode;
 	__u8		hw_error_code;
 	__u32		clock;
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index ef0ee3a3d9ed..589c4085499c 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -827,6 +827,10 @@ static int hci_init4_req(struct hci_request *req, unsigned long opt)
 	if (hdev->commands[29] & 0x20)
 		hci_req_add(req, HCI_OP_READ_LOCAL_CODECS, 0, NULL);
 
+	/* Read local pairing options if the HCI command is supported */
+	if (hdev->commands[41] & 0x08)
+		hci_req_add(req, HCI_OP_READ_LOCAL_PAIRING_OPTS, 0, NULL);
+
 	/* Get MWS transport configuration if the HCI command is supported */
 	if (hdev->commands[30] & 0x08)
 		hci_req_add(req, HCI_OP_GET_MWS_TRANSPORT_CONFIG, 0, NULL);
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 2803beaa1c44..51e6461f0b71 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -747,6 +747,23 @@ static void hci_cc_read_bd_addr(struct hci_dev *hdev, struct sk_buff *skb)
 		bacpy(&hdev->setup_addr, &rp->bdaddr);
 }
 
+static void hci_cc_read_local_pairing_opts(struct hci_dev *hdev,
+					   struct sk_buff *skb)
+{
+	struct hci_rp_read_local_pairing_opts *rp = (void *) skb->data;
+
+	BT_DBG("%s status 0x%2.2x", hdev->name, rp->status);
+
+	if (rp->status)
+		return;
+
+	if (hci_dev_test_flag(hdev, HCI_SETUP) ||
+	    hci_dev_test_flag(hdev, HCI_CONFIG)) {
+		hdev->pairing_opts = rp->pairing_opts;
+		hdev->max_enc_key_size = rp->max_key_size;
+	}
+}
+
 static void hci_cc_read_page_scan_activity(struct hci_dev *hdev,
 					   struct sk_buff *skb)
 {
@@ -3343,6 +3360,10 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb,
 		hci_cc_read_bd_addr(hdev, skb);
 		break;
 
+	case HCI_OP_READ_LOCAL_PAIRING_OPTS:
+		hci_cc_read_local_pairing_opts(hdev, skb);
+		break;
+
 	case HCI_OP_READ_PAGE_SCAN_ACTIVITY:
 		hci_cc_read_page_scan_activity(hdev, skb);
 		break;
-- 
cgit v1.2.3-59-g8ed1b


From bc292258c580a82c9baef0a64f66971e010a40a9 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Fri, 3 Apr 2020 21:44:05 +0200
Subject: Bluetooth: Add support for reading security information

To allow userspace to make correcty security policy decision, the kernel
needs to export a few details of the supported security features and
encryption key size information. This command exports this information
and also allows future extensions if needed.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Reviewed-by: Alain Michaud <alainm@chromium.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/mgmt.h |  7 ++++++
 net/bluetooth/mgmt.c         | 53 ++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 60 insertions(+)

diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index f41cd87550dc..65dd6fd1fff3 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -674,6 +674,13 @@ struct mgmt_cp_set_blocked_keys {
 
 #define MGMT_OP_SET_WIDEBAND_SPEECH	0x0047
 
+#define MGMT_OP_READ_SECURITY_INFO	0x0048
+#define MGMT_READ_SECURITY_INFO_SIZE	0
+struct mgmt_rp_read_security_info {
+	__le16   sec_len;
+	__u8     sec[0];
+} __packed;
+
 #define MGMT_EV_CMD_COMPLETE		0x0001
 struct mgmt_ev_cmd_complete {
 	__le16	opcode;
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 6552003a170e..7b9eac339c87 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -108,6 +108,7 @@ static const u16 mgmt_commands[] = {
 	MGMT_OP_SET_APPEARANCE,
 	MGMT_OP_SET_BLOCKED_KEYS,
 	MGMT_OP_SET_WIDEBAND_SPEECH,
+	MGMT_OP_READ_SECURITY_INFO,
 };
 
 static const u16 mgmt_events[] = {
@@ -155,6 +156,7 @@ static const u16 mgmt_untrusted_commands[] = {
 	MGMT_OP_READ_CONFIG_INFO,
 	MGMT_OP_READ_EXT_INDEX_LIST,
 	MGMT_OP_READ_EXT_INFO,
+	MGMT_OP_READ_SECURITY_INFO,
 };
 
 static const u16 mgmt_untrusted_events[] = {
@@ -3659,6 +3661,55 @@ unlock:
 	return err;
 }
 
+static int read_security_info(struct sock *sk, struct hci_dev *hdev,
+			      void *data, u16 data_len)
+{
+	char buf[16];
+	struct mgmt_rp_read_security_info *rp = (void *)buf;
+	u16 sec_len = 0;
+	u8 flags = 0;
+
+	bt_dev_dbg(hdev, "sock %p", sk);
+
+	memset(&buf, 0, sizeof(buf));
+
+	hci_dev_lock(hdev);
+
+	/* When the Read Simple Pairing Options command is supported, then
+	 * the remote public key validation is supported.
+	 */
+	if (hdev->commands[41] & 0x08)
+		flags |= 0x01;	/* Remote public key validation (BR/EDR) */
+
+	flags |= 0x02;		/* Remote public key validation (LE) */
+
+	/* When the Read Encryption Key Size command is supported, then the
+	 * encryption key size is enforced.
+	 */
+	if (hdev->commands[20] & 0x10)
+		flags |= 0x04;	/* Encryption key size enforcement (BR/EDR) */
+
+	flags |= 0x08;		/* Encryption key size enforcement (LE) */
+
+	sec_len = eir_append_data(rp->sec, sec_len, 0x01, &flags, 1);
+
+	/* When the Read Simple Pairing Options command is supported, then
+	 * also max encryption key size information is provided.
+	 */
+	if (hdev->commands[41] & 0x08)
+		sec_len = eir_append_le16(rp->sec, sec_len, 0x02,
+					  hdev->max_enc_key_size);
+
+	sec_len = eir_append_le16(rp->sec, sec_len, 0x03, SMP_MAX_ENC_KEY_SIZE);
+
+	rp->sec_len = cpu_to_le16(sec_len);
+
+	hci_dev_unlock(hdev);
+
+	return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_READ_SECURITY_INFO, 0,
+				 rp, sizeof(*rp) + sec_len);
+}
+
 static void read_local_oob_data_complete(struct hci_dev *hdev, u8 status,
 				         u16 opcode, struct sk_buff *skb)
 {
@@ -7099,6 +7150,8 @@ static const struct hci_mgmt_handler mgmt_handlers[] = {
 	{ set_blocked_keys,	   MGMT_OP_SET_BLOCKED_KEYS_SIZE,
 						HCI_MGMT_VAR_LEN },
 	{ set_wideband_speech,	   MGMT_SETTING_SIZE },
+	{ read_security_info,      MGMT_READ_SECURITY_INFO_SIZE,
+						HCI_MGMT_UNTRUSTED },
 };
 
 void mgmt_index_added(struct hci_dev *hdev)
-- 
cgit v1.2.3-59-g8ed1b


From 3679fe7d43c65e07f00afb216987f33e152ceb6f Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Fri, 3 Apr 2020 21:44:06 +0200
Subject: Bluetooth: Increment management interface revision

Increment the mgmt revision due to the recently added new commands.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/mgmt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 7b9eac339c87..f8c0a4fc8090 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -38,7 +38,7 @@
 #include "mgmt_util.h"
 
 #define MGMT_VERSION	1
-#define MGMT_REVISION	16
+#define MGMT_REVISION	17
 
 static const u16 mgmt_commands[] = {
 	MGMT_OP_READ_INDEX_LIST,
-- 
cgit v1.2.3-59-g8ed1b


From d2a3f5f4635b7b0df8d4cd04ee0c75886ef699b9 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Fri, 3 Apr 2020 21:44:07 +0200
Subject: Bluetooth: Add HCI device identifier for VIRTIO devices

This patch assigns the next free HCI device identifier to Bluetooth
devices based on VIRTIO devices.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 086a9e9d5d03..79de2a659dd6 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -68,6 +68,7 @@
 #define HCI_SPI		7
 #define HCI_I2C		8
 #define HCI_SMD		9
+#define HCI_VIRTIO	10
 
 /* HCI controller types */
 #define HCI_PRIMARY	0x00
-- 
cgit v1.2.3-59-g8ed1b


From 9556dfa28b4d84edfd5b96e684ed8e7a15a51b67 Mon Sep 17 00:00:00 2001
From: Maharaja Kennadyrajan <mkenna@codeaurora.org>
Date: Thu, 26 Mar 2020 18:36:32 +0200
Subject: ath11k: Add sta debugfs support to configure ADDBA and DELBA

Add support to test aggregation procedures (addba/addba_resp/delba)
manually by adding the required callbacks in sta debugfs files.

To enable automatic aggregation in target,

    echo 0 > /sys/kernel/debug/ieee80211/phyX/netdev:wlanX/
	     stations/XX:XX:XX:XX:XX:XX/aggr_mode

For manual mode,

    echo 1 > /sys/kernel/debug/ieee80211/phyX/netdev:wlanX/
	     stations/XX:XX:XX:XX:XX:XX/aggr_mode

To send addba response,
    echo 0 25 > /sys/kernel/debug/ieee80211/phyX/netdev:wlanX/
		stations/XX:XX:XX:XX:XX:XX/addba_resp

To send addba,
    echo 1 32 > /sys/kernel/debug/ieee80211/phyX/netdev:wlanX/
		stations/XX:XX:XX:XX:XX:XX/addba

To send delba,
    echo 0 1 37 > /sys/kernel/debug/ieee80211/phyX/netdev:wlanX/
		  stations/XX:XX:XX:XX:XX:XX/delba

Signed-off-by: Maharaja Kennadyrajan <mkenna@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1585213026-28406-1-git-send-email-mkenna@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/core.h        |   5 +
 drivers/net/wireless/ath/ath11k/debug.h       |   6 +
 drivers/net/wireless/ath/ath11k/debugfs_sta.c | 221 ++++++++++++++++++++++++++
 drivers/net/wireless/ath/ath11k/wmi.c         | 140 ++++++++++++++++
 drivers/net/wireless/ath/ath11k/wmi.h         |  38 +++++
 5 files changed, 410 insertions(+)

diff --git a/drivers/net/wireless/ath/ath11k/core.h b/drivers/net/wireless/ath/ath11k/core.h
index 6e7b8ecd09a6..96ca114c2c44 100644
--- a/drivers/net/wireless/ath/ath11k/core.h
+++ b/drivers/net/wireless/ath/ath11k/core.h
@@ -341,6 +341,11 @@ struct ath11k_sta {
 	u8 rssi_comb;
 	struct ath11k_htt_tx_stats *tx_stats;
 	struct ath11k_rx_peer_stats *rx_stats;
+
+#ifdef CONFIG_MAC80211_DEBUGFS
+	/* protected by conf_mutex */
+	bool aggr_mode;
+#endif
 };
 
 #define ATH11K_NUM_CHANS 41
diff --git a/drivers/net/wireless/ath/ath11k/debug.h b/drivers/net/wireless/ath/ath11k/debug.h
index 97e7306c506d..4a3ff8227187 100644
--- a/drivers/net/wireless/ath/ath11k/debug.h
+++ b/drivers/net/wireless/ath/ath11k/debug.h
@@ -112,6 +112,12 @@ enum ath11k_pktlog_enum {
 	ATH11K_PKTLOG_TYPE_LITE_RX      = 24,
 };
 
+enum ath11k_dbg_aggr_mode {
+	ATH11K_DBG_AGGR_MODE_AUTO,
+	ATH11K_DBG_AGGR_MODE_MANUAL,
+	ATH11K_DBG_AGGR_MODE_MAX,
+};
+
 __printf(2, 3) void ath11k_info(struct ath11k_base *ab, const char *fmt, ...);
 __printf(2, 3) void ath11k_err(struct ath11k_base *ab, const char *fmt, ...);
 __printf(2, 3) void ath11k_warn(struct ath11k_base *ab, const char *fmt, ...);
diff --git a/drivers/net/wireless/ath/ath11k/debugfs_sta.c b/drivers/net/wireless/ath/ath11k/debugfs_sta.c
index 389dac219238..68963cfc5097 100644
--- a/drivers/net/wireless/ath/ath11k/debugfs_sta.c
+++ b/drivers/net/wireless/ath/ath11k/debugfs_sta.c
@@ -533,6 +533,222 @@ static const struct file_operations fops_peer_pktlog = {
 	.llseek = default_llseek,
 };
 
+static ssize_t ath11k_dbg_sta_write_delba(struct file *file,
+					  const char __user *user_buf,
+					  size_t count, loff_t *ppos)
+{
+	struct ieee80211_sta *sta = file->private_data;
+	struct ath11k_sta *arsta = (struct ath11k_sta *)sta->drv_priv;
+	struct ath11k *ar = arsta->arvif->ar;
+	u32 tid, initiator, reason;
+	int ret;
+	char buf[64] = {0};
+
+	ret = simple_write_to_buffer(buf, sizeof(buf) - 1, ppos,
+				     user_buf, count);
+	if (ret <= 0)
+		return ret;
+
+	ret = sscanf(buf, "%u %u %u", &tid, &initiator, &reason);
+	if (ret != 3)
+		return -EINVAL;
+
+	/* Valid TID values are 0 through 15 */
+	if (tid > HAL_DESC_REO_NON_QOS_TID - 1)
+		return -EINVAL;
+
+	mutex_lock(&ar->conf_mutex);
+	if (ar->state != ATH11K_STATE_ON ||
+	    arsta->aggr_mode != ATH11K_DBG_AGGR_MODE_MANUAL) {
+		ret = count;
+		goto out;
+	}
+
+	ret = ath11k_wmi_delba_send(ar, arsta->arvif->vdev_id, sta->addr,
+				    tid, initiator, reason);
+	if (ret) {
+		ath11k_warn(ar->ab, "failed to send delba: vdev_id %u peer %pM tid %u initiator %u reason %u\n",
+			    arsta->arvif->vdev_id, sta->addr, tid, initiator,
+			    reason);
+	}
+	ret = count;
+out:
+	mutex_unlock(&ar->conf_mutex);
+	return ret;
+}
+
+static const struct file_operations fops_delba = {
+	.write = ath11k_dbg_sta_write_delba,
+	.open = simple_open,
+	.owner = THIS_MODULE,
+	.llseek = default_llseek,
+};
+
+static ssize_t ath11k_dbg_sta_write_addba_resp(struct file *file,
+					       const char __user *user_buf,
+					       size_t count, loff_t *ppos)
+{
+	struct ieee80211_sta *sta = file->private_data;
+	struct ath11k_sta *arsta = (struct ath11k_sta *)sta->drv_priv;
+	struct ath11k *ar = arsta->arvif->ar;
+	u32 tid, status;
+	int ret;
+	char buf[64] = {0};
+
+	ret = simple_write_to_buffer(buf, sizeof(buf) - 1, ppos,
+				     user_buf, count);
+	if (ret <= 0)
+		return ret;
+
+	ret = sscanf(buf, "%u %u", &tid, &status);
+	if (ret != 2)
+		return -EINVAL;
+
+	/* Valid TID values are 0 through 15 */
+	if (tid > HAL_DESC_REO_NON_QOS_TID - 1)
+		return -EINVAL;
+
+	mutex_lock(&ar->conf_mutex);
+	if (ar->state != ATH11K_STATE_ON ||
+	    arsta->aggr_mode != ATH11K_DBG_AGGR_MODE_MANUAL) {
+		ret = count;
+		goto out;
+	}
+
+	ret = ath11k_wmi_addba_set_resp(ar, arsta->arvif->vdev_id, sta->addr,
+					tid, status);
+	if (ret) {
+		ath11k_warn(ar->ab, "failed to send addba response: vdev_id %u peer %pM tid %u status%u\n",
+			    arsta->arvif->vdev_id, sta->addr, tid, status);
+	}
+	ret = count;
+out:
+	mutex_unlock(&ar->conf_mutex);
+	return ret;
+}
+
+static const struct file_operations fops_addba_resp = {
+	.write = ath11k_dbg_sta_write_addba_resp,
+	.open = simple_open,
+	.owner = THIS_MODULE,
+	.llseek = default_llseek,
+};
+
+static ssize_t ath11k_dbg_sta_write_addba(struct file *file,
+					  const char __user *user_buf,
+					  size_t count, loff_t *ppos)
+{
+	struct ieee80211_sta *sta = file->private_data;
+	struct ath11k_sta *arsta = (struct ath11k_sta *)sta->drv_priv;
+	struct ath11k *ar = arsta->arvif->ar;
+	u32 tid, buf_size;
+	int ret;
+	char buf[64] = {0};
+
+	ret = simple_write_to_buffer(buf, sizeof(buf) - 1, ppos,
+				     user_buf, count);
+	if (ret <= 0)
+		return ret;
+
+	ret = sscanf(buf, "%u %u", &tid, &buf_size);
+	if (ret != 2)
+		return -EINVAL;
+
+	/* Valid TID values are 0 through 15 */
+	if (tid > HAL_DESC_REO_NON_QOS_TID - 1)
+		return -EINVAL;
+
+	mutex_lock(&ar->conf_mutex);
+	if (ar->state != ATH11K_STATE_ON ||
+	    arsta->aggr_mode != ATH11K_DBG_AGGR_MODE_MANUAL) {
+		ret = count;
+		goto out;
+	}
+
+	ret = ath11k_wmi_addba_send(ar, arsta->arvif->vdev_id, sta->addr,
+				    tid, buf_size);
+	if (ret) {
+		ath11k_warn(ar->ab, "failed to send addba request: vdev_id %u peer %pM tid %u buf_size %u\n",
+			    arsta->arvif->vdev_id, sta->addr, tid, buf_size);
+	}
+
+	ret = count;
+out:
+	mutex_unlock(&ar->conf_mutex);
+	return ret;
+}
+
+static const struct file_operations fops_addba = {
+	.write = ath11k_dbg_sta_write_addba,
+	.open = simple_open,
+	.owner = THIS_MODULE,
+	.llseek = default_llseek,
+};
+
+static ssize_t ath11k_dbg_sta_read_aggr_mode(struct file *file,
+					     char __user *user_buf,
+					     size_t count, loff_t *ppos)
+{
+	struct ieee80211_sta *sta = file->private_data;
+	struct ath11k_sta *arsta = (struct ath11k_sta *)sta->drv_priv;
+	struct ath11k *ar = arsta->arvif->ar;
+	char buf[64];
+	int len = 0;
+
+	mutex_lock(&ar->conf_mutex);
+	len = scnprintf(buf, sizeof(buf) - len,
+			"aggregation mode: %s\n\n%s\n%s\n",
+			(arsta->aggr_mode == ATH11K_DBG_AGGR_MODE_AUTO) ?
+			"auto" : "manual", "auto = 0", "manual = 1");
+	mutex_unlock(&ar->conf_mutex);
+
+	return simple_read_from_buffer(user_buf, count, ppos, buf, len);
+}
+
+static ssize_t ath11k_dbg_sta_write_aggr_mode(struct file *file,
+					      const char __user *user_buf,
+					      size_t count, loff_t *ppos)
+{
+	struct ieee80211_sta *sta = file->private_data;
+	struct ath11k_sta *arsta = (struct ath11k_sta *)sta->drv_priv;
+	struct ath11k *ar = arsta->arvif->ar;
+	u32 aggr_mode;
+	int ret;
+
+	if (kstrtouint_from_user(user_buf, count, 0, &aggr_mode))
+		return -EINVAL;
+
+	if (aggr_mode >= ATH11K_DBG_AGGR_MODE_MAX)
+		return -EINVAL;
+
+	mutex_lock(&ar->conf_mutex);
+	if (ar->state != ATH11K_STATE_ON ||
+	    aggr_mode == arsta->aggr_mode) {
+		ret = count;
+		goto out;
+	}
+
+	ret = ath11k_wmi_addba_clear_resp(ar, arsta->arvif->vdev_id, sta->addr);
+	if (ret) {
+		ath11k_warn(ar->ab, "failed to clear addba session ret: %d\n",
+			    ret);
+		goto out;
+	}
+
+	arsta->aggr_mode = aggr_mode;
+out:
+	mutex_unlock(&ar->conf_mutex);
+	return ret;
+}
+
+static const struct file_operations fops_aggr_mode = {
+	.read = ath11k_dbg_sta_read_aggr_mode,
+	.write = ath11k_dbg_sta_write_aggr_mode,
+	.open = simple_open,
+	.owner = THIS_MODULE,
+	.llseek = default_llseek,
+};
+
 void ath11k_sta_add_debugfs(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 			    struct ieee80211_sta *sta, struct dentry *dir)
 {
@@ -550,4 +766,9 @@ void ath11k_sta_add_debugfs(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 
 	debugfs_create_file("peer_pktlog", 0644, dir, sta,
 			    &fops_peer_pktlog);
+
+	debugfs_create_file("aggr_mode", 0644, dir, sta, &fops_aggr_mode);
+	debugfs_create_file("addba", 0200, dir, sta, &fops_addba);
+	debugfs_create_file("addba_resp", 0200, dir, sta, &fops_addba_resp);
+	debugfs_create_file("delba", 0200, dir, sta, &fops_delba);
 }
diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c
index e7ce36966d6a..49a17c85303a 100644
--- a/drivers/net/wireless/ath/ath11k/wmi.c
+++ b/drivers/net/wireless/ath/ath11k/wmi.c
@@ -2368,6 +2368,146 @@ int ath11k_wmi_send_dfs_phyerr_offload_enable_cmd(struct ath11k *ar,
 	return ret;
 }
 
+int ath11k_wmi_delba_send(struct ath11k *ar, u32 vdev_id, const u8 *mac,
+			  u32 tid, u32 initiator, u32 reason)
+{
+	struct ath11k_pdev_wmi *wmi = ar->wmi;
+	struct wmi_delba_send_cmd *cmd;
+	struct sk_buff *skb;
+	int ret;
+
+	skb = ath11k_wmi_alloc_skb(wmi->wmi_ab, sizeof(*cmd));
+	if (!skb)
+		return -ENOMEM;
+
+	cmd = (struct wmi_delba_send_cmd *)skb->data;
+	cmd->tlv_header = FIELD_PREP(WMI_TLV_TAG, WMI_TAG_DELBA_SEND_CMD) |
+			FIELD_PREP(WMI_TLV_LEN, sizeof(*cmd) - TLV_HDR_SIZE);
+	cmd->vdev_id = vdev_id;
+	ether_addr_copy(cmd->peer_macaddr.addr, mac);
+	cmd->tid = tid;
+	cmd->initiator = initiator;
+	cmd->reasoncode = reason;
+
+	ath11k_dbg(ar->ab, ATH11K_DBG_WMI,
+		   "wmi delba send vdev_id 0x%X mac_addr %pM tid %u initiator %u reason %u\n",
+		   vdev_id, mac, tid, initiator, reason);
+
+	ret = ath11k_wmi_cmd_send(wmi, skb, WMI_DELBA_SEND_CMDID);
+
+	if (ret) {
+		ath11k_warn(ar->ab,
+			    "failed to send WMI_DELBA_SEND_CMDID cmd\n");
+		dev_kfree_skb(skb);
+	}
+
+	return ret;
+}
+
+int ath11k_wmi_addba_set_resp(struct ath11k *ar, u32 vdev_id, const u8 *mac,
+			      u32 tid, u32 status)
+{
+	struct ath11k_pdev_wmi *wmi = ar->wmi;
+	struct wmi_addba_setresponse_cmd *cmd;
+	struct sk_buff *skb;
+	int ret;
+
+	skb = ath11k_wmi_alloc_skb(wmi->wmi_ab, sizeof(*cmd));
+	if (!skb)
+		return -ENOMEM;
+
+	cmd = (struct wmi_addba_setresponse_cmd *)skb->data;
+	cmd->tlv_header =
+		FIELD_PREP(WMI_TLV_TAG, WMI_TAG_ADDBA_SETRESPONSE_CMD) |
+		FIELD_PREP(WMI_TLV_LEN, sizeof(*cmd) - TLV_HDR_SIZE);
+	cmd->vdev_id = vdev_id;
+	ether_addr_copy(cmd->peer_macaddr.addr, mac);
+	cmd->tid = tid;
+	cmd->statuscode = status;
+
+	ath11k_dbg(ar->ab, ATH11K_DBG_WMI,
+		   "wmi addba set resp vdev_id 0x%X mac_addr %pM tid %u status %u\n",
+		   vdev_id, mac, tid, status);
+
+	ret = ath11k_wmi_cmd_send(wmi, skb, WMI_ADDBA_SET_RESP_CMDID);
+
+	if (ret) {
+		ath11k_warn(ar->ab,
+			    "failed to send WMI_ADDBA_SET_RESP_CMDID cmd\n");
+		dev_kfree_skb(skb);
+	}
+
+	return ret;
+}
+
+int ath11k_wmi_addba_send(struct ath11k *ar, u32 vdev_id, const u8 *mac,
+			  u32 tid, u32 buf_size)
+{
+	struct ath11k_pdev_wmi *wmi = ar->wmi;
+	struct wmi_addba_send_cmd *cmd;
+	struct sk_buff *skb;
+	int ret;
+
+	skb = ath11k_wmi_alloc_skb(wmi->wmi_ab, sizeof(*cmd));
+	if (!skb)
+		return -ENOMEM;
+
+	cmd = (struct wmi_addba_send_cmd *)skb->data;
+	cmd->tlv_header = FIELD_PREP(WMI_TLV_TAG, WMI_TAG_ADDBA_SEND_CMD) |
+		FIELD_PREP(WMI_TLV_LEN, sizeof(*cmd) - TLV_HDR_SIZE);
+	cmd->vdev_id = vdev_id;
+	ether_addr_copy(cmd->peer_macaddr.addr, mac);
+	cmd->tid = tid;
+	cmd->buffersize = buf_size;
+
+	ath11k_dbg(ar->ab, ATH11K_DBG_WMI,
+		   "wmi addba send vdev_id 0x%X mac_addr %pM tid %u bufsize %u\n",
+		   vdev_id, mac, tid, buf_size);
+
+	ret = ath11k_wmi_cmd_send(wmi, skb, WMI_ADDBA_SEND_CMDID);
+
+	if (ret) {
+		ath11k_warn(ar->ab,
+			    "failed to send WMI_ADDBA_SEND_CMDID cmd\n");
+		dev_kfree_skb(skb);
+	}
+
+	return ret;
+}
+
+int ath11k_wmi_addba_clear_resp(struct ath11k *ar, u32 vdev_id, const u8 *mac)
+{
+	struct ath11k_pdev_wmi *wmi = ar->wmi;
+	struct wmi_addba_clear_resp_cmd *cmd;
+	struct sk_buff *skb;
+	int ret;
+
+	skb = ath11k_wmi_alloc_skb(wmi->wmi_ab, sizeof(*cmd));
+	if (!skb)
+		return -ENOMEM;
+
+	cmd = (struct wmi_addba_clear_resp_cmd *)skb->data;
+	cmd->tlv_header =
+		FIELD_PREP(WMI_TLV_TAG, WMI_TAG_ADDBA_CLEAR_RESP_CMD) |
+		FIELD_PREP(WMI_TLV_LEN, sizeof(*cmd) - TLV_HDR_SIZE);
+	cmd->vdev_id = vdev_id;
+	ether_addr_copy(cmd->peer_macaddr.addr, mac);
+
+	ath11k_dbg(ar->ab, ATH11K_DBG_WMI,
+		   "wmi addba clear resp vdev_id 0x%X mac_addr %pM\n",
+		   vdev_id, mac);
+
+	ret = ath11k_wmi_cmd_send(wmi, skb, WMI_ADDBA_CLEAR_RESP_CMDID);
+
+	if (ret) {
+		ath11k_warn(ar->ab,
+			    "failed to send WMI_ADDBA_CLEAR_RESP_CMDID cmd\n");
+		dev_kfree_skb(skb);
+	}
+
+	return ret;
+}
+
 int ath11k_wmi_pdev_peer_pktlog_filter(struct ath11k *ar, u8 *addr, u8 enable)
 {
 	struct ath11k_pdev_wmi *wmi = ar->wmi;
diff --git a/drivers/net/wireless/ath/ath11k/wmi.h b/drivers/net/wireless/ath/ath11k/wmi.h
index 510f9c6bc1d7..780e6620142d 100644
--- a/drivers/net/wireless/ath/ath11k/wmi.h
+++ b/drivers/net/wireless/ath/ath11k/wmi.h
@@ -3649,6 +3649,37 @@ struct wmi_therm_throt_level_config_info {
 	u32 prio;
 } __packed;
 
+struct wmi_delba_send_cmd {
+	u32 tlv_header;
+	u32 vdev_id;
+	struct wmi_mac_addr peer_macaddr;
+	u32 tid;
+	u32 initiator;
+	u32 reasoncode;
+} __packed;
+
+struct wmi_addba_setresponse_cmd {
+	u32 tlv_header;
+	u32 vdev_id;
+	struct wmi_mac_addr peer_macaddr;
+	u32 tid;
+	u32 statuscode;
+} __packed;
+
+struct wmi_addba_send_cmd {
+	u32 tlv_header;
+	u32 vdev_id;
+	struct wmi_mac_addr peer_macaddr;
+	u32 tid;
+	u32 buffersize;
+} __packed;
+
+struct wmi_addba_clear_resp_cmd {
+	u32 tlv_header;
+	u32 vdev_id;
+	struct wmi_mac_addr peer_macaddr;
+} __packed;
+
 struct wmi_pdev_pktlog_filter_info {
 	u32 tlv_header;
 	struct wmi_mac_addr peer_macaddr;
@@ -4822,6 +4853,13 @@ int ath11k_wmi_send_scan_chan_list_cmd(struct ath11k *ar,
 				       struct scan_chan_list_params *chan_list);
 int ath11k_wmi_send_dfs_phyerr_offload_enable_cmd(struct ath11k *ar,
 						  u32 pdev_id);
+int ath11k_wmi_addba_clear_resp(struct ath11k *ar, u32 vdev_id, const u8 *mac);
+int ath11k_wmi_addba_send(struct ath11k *ar, u32 vdev_id, const u8 *mac,
+			  u32 tid, u32 buf_size);
+int ath11k_wmi_addba_set_resp(struct ath11k *ar, u32 vdev_id, const u8 *mac,
+			      u32 tid, u32 status);
+int ath11k_wmi_delba_send(struct ath11k *ar, u32 vdev_id, const u8 *mac,
+			  u32 tid, u32 initiator, u32 reason);
 int ath11k_wmi_send_bcn_offload_control_cmd(struct ath11k *ar,
 					    u32 vdev_id, u32 bcn_ctrl_op);
 int
-- 
cgit v1.2.3-59-g8ed1b


From 3d1c60460fb2823a19ead9e6ec8f184dd7271aa7 Mon Sep 17 00:00:00 2001
From: Maharaja Kennadyrajan <mkenna@codeaurora.org>
Date: Thu, 26 Mar 2020 18:36:36 +0200
Subject: ath10k: Fix the race condition in firmware dump work queue

There is a race condition, when the user writes 'hw-restart' and
'hard' in the simulate_fw_crash debugfs file without any delay.
In the above scenario, the firmware dump work queue(scheduled by
'hard') should be handled gracefully, while the target is in the
'hw-restart'.

Tested HW: QCA9984
Tested FW: 10.4-3.9.0.2-00044

Co-developed-by: Govindaraj Saminathan <gsamin@codeaurora.org>
Signed-off-by: Govindaraj Saminathan <gsamin@codeaurora.org>
Signed-off-by: Maharaja Kennadyrajan <mkenna@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1585213077-28439-1-git-send-email-mkenna@codeaurora.org
---
 drivers/net/wireless/ath/ath10k/pci.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c
index ded7a220a4aa..cd1c5d60261f 100644
--- a/drivers/net/wireless/ath/ath10k/pci.c
+++ b/drivers/net/wireless/ath/ath10k/pci.c
@@ -2074,6 +2074,7 @@ static void ath10k_pci_hif_stop(struct ath10k *ar)
 	ath10k_pci_irq_sync(ar);
 	napi_synchronize(&ar->napi);
 	napi_disable(&ar->napi);
+	cancel_work_sync(&ar_pci->dump_work);
 
 	/* Most likely the device has HTT Rx ring configured. The only way to
 	 * prevent the device from accessing (and possible corrupting) host
-- 
cgit v1.2.3-59-g8ed1b


From 21c1b063f4b98c14b2438734c93fe24d517233cb Mon Sep 17 00:00:00 2001
From: Maharaja Kennadyrajan <mkenna@codeaurora.org>
Date: Thu, 26 Mar 2020 20:19:15 +0530
Subject: ath11k: add pktlog checksum in trace events to support pktlog

Pktlog data are different among the chipset & chipset versions.
As part of enhancing the user space script to decode the pktlog
trace events generated, it is desirable to know which chipset or
which chipset version has provided the events and thereby decode
the pktlogs appropriately.

Pktlog checksum helps to determine the chipset variant which is
given by the firmware in the struct wmi_ready_event.

Pktlog checksums are computed during the firmware build.
So, adding that pktlog checksum in the pklog trace events.

Signed-off-by: Maharaja Kennadyrajan <mkenna@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1585234155-30574-1-git-send-email-mkenna@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/core.h  |  1 +
 drivers/net/wireless/ath/ath11k/dp_rx.c |  3 ++-
 drivers/net/wireless/ath/ath11k/trace.h | 12 ++++++++----
 drivers/net/wireless/ath/ath11k/wmi.c   | 21 +++++++++++++--------
 drivers/net/wireless/ath/ath11k/wmi.h   |  8 +++++++-
 5 files changed, 31 insertions(+), 14 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/core.h b/drivers/net/wireless/ath/ath11k/core.h
index 96ca114c2c44..b4c3e0418eef 100644
--- a/drivers/net/wireless/ath/ath11k/core.h
+++ b/drivers/net/wireless/ath/ath11k/core.h
@@ -655,6 +655,7 @@ struct ath11k_base {
 		/* protected by data_lock */
 		u32 fw_crash_counter;
 	} stats;
+	u32 pktlog_defs_checksum;
 };
 
 struct ath11k_fw_stats_pdev {
diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.c b/drivers/net/wireless/ath/ath11k/dp_rx.c
index f74a0e74bf3e..a3f2c76b3471 100644
--- a/drivers/net/wireless/ath/ath11k/dp_rx.c
+++ b/drivers/net/wireless/ath/ath11k/dp_rx.c
@@ -1491,7 +1491,8 @@ static void ath11k_htt_pktlog(struct ath11k_base *ab, struct sk_buff *skb)
 		return;
 	}
 
-	trace_ath11k_htt_pktlog(ar, data->payload, hdr->size);
+	trace_ath11k_htt_pktlog(ar, data->payload, hdr->size,
+				ar->ab->pktlog_defs_checksum);
 }
 
 static void ath11k_htt_backpressure_event_handler(struct ath11k_base *ab,
diff --git a/drivers/net/wireless/ath/ath11k/trace.h b/drivers/net/wireless/ath/ath11k/trace.h
index 8700a622be7b..66d0aae7816c 100644
--- a/drivers/net/wireless/ath/ath11k/trace.h
+++ b/drivers/net/wireless/ath/ath11k/trace.h
@@ -21,14 +21,16 @@ static inline void trace_ ## name(proto) {}
 #define TRACE_SYSTEM ath11k
 
 TRACE_EVENT(ath11k_htt_pktlog,
-	    TP_PROTO(struct ath11k *ar, const void *buf, u16 buf_len),
+	    TP_PROTO(struct ath11k *ar, const void *buf, u16 buf_len,
+		     u32 pktlog_checksum),
 
-	TP_ARGS(ar, buf, buf_len),
+	TP_ARGS(ar, buf, buf_len, pktlog_checksum),
 
 	TP_STRUCT__entry(
 		__string(device, dev_name(ar->ab->dev))
 		__string(driver, dev_driver_string(ar->ab->dev))
 		__field(u16, buf_len)
+		__field(u32, pktlog_checksum)
 		__dynamic_array(u8, pktlog, buf_len)
 	),
 
@@ -36,14 +38,16 @@ TRACE_EVENT(ath11k_htt_pktlog,
 		__assign_str(device, dev_name(ar->ab->dev));
 		__assign_str(driver, dev_driver_string(ar->ab->dev));
 		__entry->buf_len = buf_len;
+		__entry->pktlog_checksum = pktlog_checksum;
 		memcpy(__get_dynamic_array(pktlog), buf, buf_len);
 	),
 
 	TP_printk(
-		"%s %s size %hu",
+		"%s %s size %hu pktlog_checksum %d",
 		__get_str(driver),
 		__get_str(device),
-		__entry->buf_len
+		__entry->buf_len,
+		__entry->pktlog_checksum
 	 )
 );
 
diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c
index 49a17c85303a..09150de53321 100644
--- a/drivers/net/wireless/ath/ath11k/wmi.c
+++ b/drivers/net/wireless/ath/ath11k/wmi.c
@@ -87,8 +87,8 @@ static const struct wmi_tlv_policy wmi_tlv_policies[] = {
 		= { .min_len = sizeof(struct wmi_pdev_bss_chan_info_event) },
 	[WMI_TAG_VDEV_INSTALL_KEY_COMPLETE_EVENT]
 		= { .min_len = sizeof(struct wmi_vdev_install_key_compl_event) },
-	[WMI_TAG_READY_EVENT]
-		= {.min_len = sizeof(struct wmi_ready_event) },
+	[WMI_TAG_READY_EVENT] = {
+		.min_len = sizeof(struct wmi_ready_event_min) },
 	[WMI_TAG_SERVICE_AVAILABLE_EVENT]
 		= {.min_len = sizeof(struct wmi_service_available_event) },
 	[WMI_TAG_PEER_ASSOC_CONF_EVENT]
@@ -4991,7 +4991,7 @@ static int ath11k_wmi_tlv_rdy_parse(struct ath11k_base *ab, u16 tag, u16 len,
 				    const void *ptr, void *data)
 {
 	struct wmi_tlv_rdy_parse *rdy_parse = data;
-	struct wmi_ready_event *fixed_param;
+	struct wmi_ready_event fixed_param;
 	struct wmi_mac_addr *addr_list;
 	struct ath11k_pdev *pdev;
 	u32 num_mac_addr;
@@ -4999,11 +4999,16 @@ static int ath11k_wmi_tlv_rdy_parse(struct ath11k_base *ab, u16 tag, u16 len,
 
 	switch (tag) {
 	case WMI_TAG_READY_EVENT:
-		fixed_param = (struct wmi_ready_event *)ptr;
-		ab->wlan_init_status = fixed_param->status;
-		rdy_parse->num_extra_mac_addr = fixed_param->num_extra_mac_addr;
-
-		ether_addr_copy(ab->mac_addr, fixed_param->mac_addr.addr);
+		memset(&fixed_param, 0, sizeof(fixed_param));
+		memcpy(&fixed_param, (struct wmi_ready_event *)ptr,
+		       min_t(u16, sizeof(fixed_param), len));
+		ab->wlan_init_status = fixed_param.ready_event_min.status;
+		rdy_parse->num_extra_mac_addr =
+			fixed_param.ready_event_min.num_extra_mac_addr;
+
+		ether_addr_copy(ab->mac_addr,
+				fixed_param.ready_event_min.mac_addr.addr);
+		ab->pktlog_defs_checksum = fixed_param.pktlog_defs_checksum;
 		ab->wmi_ready = true;
 		break;
 	case WMI_TAG_ARRAY_FIXED_STRUCT:
diff --git a/drivers/net/wireless/ath/ath11k/wmi.h b/drivers/net/wireless/ath/ath11k/wmi.h
index 780e6620142d..ba05935b715a 100644
--- a/drivers/net/wireless/ath/ath11k/wmi.h
+++ b/drivers/net/wireless/ath/ath11k/wmi.h
@@ -2345,7 +2345,7 @@ struct wmi_mac_addr {
 	} __packed;
 } __packed;
 
-struct wmi_ready_event {
+struct wmi_ready_event_min {
 	struct wmi_abi_version fw_abi_vers;
 	struct wmi_mac_addr mac_addr;
 	u32 status;
@@ -2355,6 +2355,12 @@ struct wmi_ready_event {
 	u32 num_extra_peers;
 } __packed;
 
+struct wmi_ready_event {
+	struct wmi_ready_event_min ready_event_min;
+	u32 max_ast_index;
+	u32 pktlog_defs_checksum;
+} __packed;
+
 struct wmi_service_available_event {
 	u32 wmi_service_segment_offset;
 	u32 wmi_service_segment_bitmap[WMI_SERVICE_SEGMENT_BM_SIZE32];
-- 
cgit v1.2.3-59-g8ed1b


From 9a8074e3bcd7956ec6b4f7c26360af1b0b0abe38 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 27 Mar 2020 19:26:39 +0000
Subject: ath11k: fix error message to correctly report the command that failed

Currently the error message refers to the command WMI_TWT_DIeABLE_CMDID
which looks like a cut-n-paste mangled typo. Fix the message to match
the command WMI_BSS_COLOR_CHANGE_ENABLE_CMDID that failed.

Fixes: 5a032c8d1953 ("ath11k: add WMI calls required for handling BSS color")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200327192639.363354-1-colin.king@canonical.com
---
 drivers/net/wireless/ath/ath11k/wmi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c
index 09150de53321..8832b8c8e63f 100644
--- a/drivers/net/wireless/ath/ath11k/wmi.c
+++ b/drivers/net/wireless/ath/ath11k/wmi.c
@@ -2919,7 +2919,7 @@ int ath11k_wmi_send_bss_color_change_enable_cmd(struct ath11k *ar, u32 vdev_id,
 	ret = ath11k_wmi_cmd_send(wmi, skb,
 				  WMI_BSS_COLOR_CHANGE_ENABLE_CMDID);
 	if (ret) {
-		ath11k_warn(ab, "Failed to send WMI_TWT_DIeABLE_CMDID");
+		ath11k_warn(ab, "Failed to send WMI_BSS_COLOR_CHANGE_ENABLE_CMDID");
 		dev_kfree_skb(skb);
 	}
 	return ret;
-- 
cgit v1.2.3-59-g8ed1b


From bdef56a36eeaccf236af43578f77938f3561a2b1 Mon Sep 17 00:00:00 2001
From: Sriram R <srirrama@codeaurora.org>
Date: Mon, 30 Mar 2020 16:46:46 +0530
Subject: ath11k: Increase the tx completion ring size

Increase the tx completion ring size to 0x8000.Also set the
idr size to be same as the completion ring size.

This avoids backpressure on the TX Completion and
corresponding TCL Data ring during high data traffic.

Signed-off-by: Sriram R <srirrama@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1585567006-9173-1-git-send-email-srirrama@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/dp.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/dp.h b/drivers/net/wireless/ath/ath11k/dp.h
index 551f9c9fb847..d4e19dc4bce1 100644
--- a/drivers/net/wireless/ath/ath11k/dp.h
+++ b/drivers/net/wireless/ath/ath11k/dp.h
@@ -169,8 +169,8 @@ struct ath11k_pdev_dp {
 
 #define DP_WBM_RELEASE_RING_SIZE	64
 #define DP_TCL_DATA_RING_SIZE		512
-#define DP_TX_COMP_RING_SIZE		8192
-#define DP_TX_IDR_SIZE			(DP_TX_COMP_RING_SIZE << 1)
+#define DP_TX_COMP_RING_SIZE		32768
+#define DP_TX_IDR_SIZE			DP_TX_COMP_RING_SIZE
 #define DP_TCL_CMD_RING_SIZE		32
 #define DP_TCL_STATUS_RING_SIZE		32
 #define DP_REO_DST_RING_MAX		4
-- 
cgit v1.2.3-59-g8ed1b


From 800113ff4b1d277c2b66ffc04d4d38f202a0d187 Mon Sep 17 00:00:00 2001
From: Sriram R <srirrama@codeaurora.org>
Date: Mon, 30 Mar 2020 16:47:08 +0530
Subject: ath11k: Avoid mgmt tx count underflow

The mgmt tx count reference is incremented/decremented on every mgmt tx and on
tx completion event from firmware.
In case of an unexpected mgmt tx completion event from firmware,
the counter would underflow. Avoid this by decrementing
only when the tx count is greater than 0.

Signed-off-by: Sriram R <srirrama@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1585567028-9242-1-git-send-email-srirrama@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/wmi.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c
index 8832b8c8e63f..973b72a0ca69 100644
--- a/drivers/net/wireless/ath/ath11k/wmi.c
+++ b/drivers/net/wireless/ath/ath11k/wmi.c
@@ -3880,8 +3880,9 @@ static int wmi_process_mgmt_tx_comp(struct ath11k *ar, u32 desc_id,
 
 	ieee80211_tx_status_irqsafe(ar->hw, msdu);
 
-	WARN_ON_ONCE(atomic_read(&ar->num_pending_mgmt_tx) == 0);
-	atomic_dec(&ar->num_pending_mgmt_tx);
+	/* WARN when we received this event without doing any mgmt tx */
+	if (atomic_dec_if_positive(&ar->num_pending_mgmt_tx) < 0)
+		WARN_ON_ONCE(1);
 
 	return 0;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 3db24065c2c824e9ea419c453b810b5f301d91c8 Mon Sep 17 00:00:00 2001
From: Lei Wang <leiwa@codeaurora.org>
Date: Mon, 30 Mar 2020 18:56:31 +0530
Subject: ath10k: enable VHT160 and VHT80+80 modes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Set right channel frequencies in VHT160 mode according to the VHT160
interoperability workaround added as part of IEEE Std 802.11™-2016 in
"Table 9-252—VHT Operation Information subfields", band_center_freq2
corresponds to CCFS1 in Table 9-253. Previous implementation
(band_center_freq2 = 0 for VHT160) is only deprecated.

Enable VHT80+80 mode and set the proper peer RX nss value for VHT160 and
VHT80+80 mode.

Based on patches by Sebastian Gottschall:

https://lkml.kernel.org/r/20180704095444.662-1-s.gottschall@dd-wrt.com

https://lkml.kernel.org/r/20180704120519.6479-1-s.gottschall@dd-wrt.com

Tested: qca9984 with firmware ver 10.4-3.10-00047

Co-developed-by: Sebastian Gottschall <s.gottschall@dd-wrt.com>
Signed-off-by: Sebastian Gottschall <s.gottschall@dd-wrt.com>
Co-developed-by: Rick Wu <rwu@codeaurora.org>
Signed-off-by: Rick Wu <rwu@codeaurora.org>
Signed-off-by: Lei Wang <leiwa@codeaurora.org>
Signed-off-by: Sowmiya Sree Elavalagan <ssreeela@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1585574792-719-1-git-send-email-ssreeela@codeaurora.org
---
 drivers/net/wireless/ath/ath10k/mac.c | 84 +++++++++++++++++++++++++----------
 drivers/net/wireless/ath/ath10k/wmi.c | 23 ++++++----
 drivers/net/wireless/ath/ath10k/wmi.h |  5 ++-
 3 files changed, 80 insertions(+), 32 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index 2d03b8dd3b8c..a59a7a5631a8 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -2505,6 +2505,30 @@ ath10k_peer_assoc_h_vht_limit(u16 tx_mcs_set,
 	return tx_mcs_set;
 }
 
+static u32 get_160mhz_nss_from_maxrate(int rate)
+{
+	u32 nss;
+
+	switch (rate) {
+	case 780:
+		nss = 1;
+		break;
+	case 1560:
+		nss = 2;
+		break;
+	case 2106:
+		nss = 3; /* not support MCS9 from spec*/
+		break;
+	case 3120:
+		nss = 4;
+		break;
+	default:
+		 nss = 1;
+	}
+
+	return nss;
+}
+
 static void ath10k_peer_assoc_h_vht(struct ath10k *ar,
 				    struct ieee80211_vif *vif,
 				    struct ieee80211_sta *sta,
@@ -2512,6 +2536,7 @@ static void ath10k_peer_assoc_h_vht(struct ath10k *ar,
 {
 	const struct ieee80211_sta_vht_cap *vht_cap = &sta->vht_cap;
 	struct ath10k_vif *arvif = (void *)vif->drv_priv;
+	struct ath10k_hw_params *hw = &ar->hw_params;
 	struct cfg80211_chan_def def;
 	enum nl80211_band band;
 	const u16 *vht_mcs_mask;
@@ -2578,22 +2603,38 @@ static void ath10k_peer_assoc_h_vht(struct ath10k *ar,
 	arg->peer_vht_rates.tx_mcs_set = ath10k_peer_assoc_h_vht_limit(
 		__le16_to_cpu(vht_cap->vht_mcs.tx_mcs_map), vht_mcs_mask);
 
-	ath10k_dbg(ar, ATH10K_DBG_MAC, "mac vht peer %pM max_mpdu %d flags 0x%x\n",
-		   sta->addr, arg->peer_max_mpdu, arg->peer_flags);
+	/* Configure bandwidth-NSS mapping to FW
+	 * for the chip's tx chains setting on 160Mhz bw
+	 */
+	if (arg->peer_phymode == MODE_11AC_VHT160 ||
+	    arg->peer_phymode == MODE_11AC_VHT80_80) {
+		u32 rx_nss;
+		u32 max_rate;
 
-	if (arg->peer_vht_rates.rx_max_rate &&
-	    (sta->vht_cap.cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK)) {
-		switch (arg->peer_vht_rates.rx_max_rate) {
-		case 1560:
-			/* Must be 2x2 at 160Mhz is all it can do. */
-			arg->peer_bw_rxnss_override = 2;
-			break;
-		case 780:
-			/* Can only do 1x1 at 160Mhz (Long Guard Interval) */
-			arg->peer_bw_rxnss_override = 1;
-			break;
+		max_rate = arg->peer_vht_rates.rx_max_rate;
+		rx_nss = get_160mhz_nss_from_maxrate(max_rate);
+
+		if (rx_nss == 0)
+			rx_nss = arg->peer_num_spatial_streams;
+		else
+			rx_nss = min(arg->peer_num_spatial_streams, rx_nss);
+
+		max_rate = hw->vht160_mcs_tx_highest;
+		rx_nss = min(rx_nss, get_160mhz_nss_from_maxrate(max_rate));
+
+		arg->peer_bw_rxnss_override =
+			FIELD_PREP(WMI_PEER_NSS_MAP_ENABLE, 1) |
+			FIELD_PREP(WMI_PEER_NSS_160MHZ_MASK, (rx_nss - 1));
+
+		if (arg->peer_phymode == MODE_11AC_VHT80_80) {
+			arg->peer_bw_rxnss_override |=
+			FIELD_PREP(WMI_PEER_NSS_80_80MHZ_MASK, (rx_nss - 1));
 		}
 	}
+	ath10k_dbg(ar, ATH10K_DBG_MAC,
+		   "mac vht peer %pM max_mpdu %d flags 0x%x peer_rx_nss_override 0x%x\n",
+		   sta->addr, arg->peer_max_mpdu,
+		   arg->peer_flags, arg->peer_bw_rxnss_override);
 }
 
 static void ath10k_peer_assoc_h_qos(struct ath10k *ar,
@@ -2745,9 +2786,9 @@ static int ath10k_peer_assoc_prepare(struct ath10k *ar,
 	ath10k_peer_assoc_h_crypto(ar, vif, sta, arg);
 	ath10k_peer_assoc_h_rates(ar, vif, sta, arg);
 	ath10k_peer_assoc_h_ht(ar, vif, sta, arg);
+	ath10k_peer_assoc_h_phymode(ar, vif, sta, arg);
 	ath10k_peer_assoc_h_vht(ar, vif, sta, arg);
 	ath10k_peer_assoc_h_qos(ar, vif, sta, arg);
-	ath10k_peer_assoc_h_phymode(ar, vif, sta, arg);
 
 	return 0;
 }
@@ -4563,13 +4604,6 @@ static struct ieee80211_sta_vht_cap ath10k_create_vht_cap(struct ath10k *ar)
 		vht_cap.cap |= val;
 	}
 
-	/* Currently the firmware seems to be buggy, don't enable 80+80
-	 * mode until that's resolved.
-	 */
-	if ((ar->vht_cap_info & IEEE80211_VHT_CAP_SHORT_GI_160) &&
-	    (ar->vht_cap_info & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK) == 0)
-		vht_cap.cap |= IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ;
-
 	mcs_map = 0;
 	for (i = 0; i < 8; i++) {
 		if ((i < ar->num_rf_chains) && (ar->cfg_tx_chainmask & BIT(i)))
@@ -8625,7 +8659,9 @@ static const struct ieee80211_iface_combination ath10k_10_4_if_comb[] = {
 		.radar_detect_widths =	BIT(NL80211_CHAN_WIDTH_20_NOHT) |
 					BIT(NL80211_CHAN_WIDTH_20) |
 					BIT(NL80211_CHAN_WIDTH_40) |
-					BIT(NL80211_CHAN_WIDTH_80),
+					BIT(NL80211_CHAN_WIDTH_80) |
+					BIT(NL80211_CHAN_WIDTH_80P80) |
+					BIT(NL80211_CHAN_WIDTH_160),
 #endif
 	},
 };
@@ -8643,7 +8679,9 @@ ieee80211_iface_combination ath10k_10_4_bcn_int_if_comb[] = {
 		.radar_detect_widths =  BIT(NL80211_CHAN_WIDTH_20_NOHT) |
 					BIT(NL80211_CHAN_WIDTH_20) |
 					BIT(NL80211_CHAN_WIDTH_40) |
-					BIT(NL80211_CHAN_WIDTH_80),
+					BIT(NL80211_CHAN_WIDTH_80) |
+					BIT(NL80211_CHAN_WIDTH_80P80) |
+					BIT(NL80211_CHAN_WIDTH_160),
 #endif
 	},
 };
diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c
index 2ea77bb880b1..db6f4c751485 100644
--- a/drivers/net/wireless/ath/ath10k/wmi.c
+++ b/drivers/net/wireless/ath/ath10k/wmi.c
@@ -1714,12 +1714,23 @@ void ath10k_wmi_put_wmi_channel(struct wmi_channel *ch,
 	if (arg->chan_radar)
 		flags |= WMI_CHAN_FLAG_DFS;
 
+	ch->band_center_freq2 = 0;
 	ch->mhz = __cpu_to_le32(arg->freq);
 	ch->band_center_freq1 = __cpu_to_le32(arg->band_center_freq1);
 	if (arg->mode == MODE_11AC_VHT80_80)
 		ch->band_center_freq2 = __cpu_to_le32(arg->band_center_freq2);
-	else
-		ch->band_center_freq2 = 0;
+
+	if (arg->mode == MODE_11AC_VHT160) {
+		if (arg->freq > arg->band_center_freq1)
+			ch->band_center_freq1 =
+				__cpu_to_le32(arg->band_center_freq1 + 40);
+		else
+			ch->band_center_freq1 =
+				__cpu_to_le32(arg->band_center_freq1 - 40);
+
+		ch->band_center_freq2 = __cpu_to_le32(arg->band_center_freq1);
+	}
+
 	ch->min_power = arg->min_power;
 	ch->max_power = arg->max_power;
 	ch->reg_power = arg->max_reg_power;
@@ -7628,12 +7639,8 @@ ath10k_wmi_peer_assoc_fill_10_4(struct ath10k *ar, void *buf,
 	struct wmi_10_4_peer_assoc_complete_cmd *cmd = buf;
 
 	ath10k_wmi_peer_assoc_fill_10_2(ar, buf, arg);
-	if (arg->peer_bw_rxnss_override)
-		cmd->peer_bw_rxnss_override =
-			__cpu_to_le32((arg->peer_bw_rxnss_override - 1) |
-				      BIT(PEER_BW_RXNSS_OVERRIDE_OFFSET));
-	else
-		cmd->peer_bw_rxnss_override = 0;
+	cmd->peer_bw_rxnss_override =
+		__cpu_to_le32(arg->peer_bw_rxnss_override);
 }
 
 static int
diff --git a/drivers/net/wireless/ath/ath10k/wmi.h b/drivers/net/wireless/ath/ath10k/wmi.h
index 6df415778374..5ba0c9a7d18c 100644
--- a/drivers/net/wireless/ath/ath10k/wmi.h
+++ b/drivers/net/wireless/ath/ath10k/wmi.h
@@ -6508,7 +6508,10 @@ struct wmi_10_2_peer_assoc_complete_cmd {
 	__le32 info0; /* WMI_PEER_ASSOC_INFO0_ */
 } __packed;
 
-#define PEER_BW_RXNSS_OVERRIDE_OFFSET  31
+/* NSS Mapping to FW */
+#define WMI_PEER_NSS_MAP_ENABLE	BIT(31)
+#define WMI_PEER_NSS_160MHZ_MASK	GENMASK(2, 0)
+#define WMI_PEER_NSS_80_80MHZ_MASK	GENMASK(5, 3)
 
 struct wmi_10_4_peer_assoc_complete_cmd {
 	struct wmi_10_2_peer_assoc_complete_cmd cmd;
-- 
cgit v1.2.3-59-g8ed1b


From 795def8b14ffa334881264823444eaab4d1879c3 Mon Sep 17 00:00:00 2001
From: Lei Wang <leiwa@codeaurora.org>
Date: Mon, 30 Mar 2020 18:56:32 +0530
Subject: ath10k: enable radar detection in secondary segment

Enable radar detection in secondary segment for VHT160 and VHT80+80 mode
on DFS channels. Otherwise, when injecting radar pulse in the secondary
segment, the DUT can't detect radar pulse.

Tested: qca9984 with firmware ver 10.4-3.10-00047

Signed-off-by: Lei Wang <leiwa@codeaurora.org>
Signed-off-by: Sowmiya Sree Elavalagan <ssreeela@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1585574792-719-2-git-send-email-ssreeela@codeaurora.org
---
 drivers/net/wireless/ath/ath10k/wmi-tlv.c |  6 ++---
 drivers/net/wireless/ath/ath10k/wmi.c     | 39 ++++++++++++++++++++++---------
 drivers/net/wireless/ath/ath10k/wmi.h     |  5 ++--
 3 files changed, 34 insertions(+), 16 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.c b/drivers/net/wireless/ath/ath10k/wmi-tlv.c
index 4e68debda9bf..e1ab900f2662 100644
--- a/drivers/net/wireless/ath/ath10k/wmi-tlv.c
+++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.c
@@ -2123,7 +2123,7 @@ ath10k_wmi_tlv_op_gen_vdev_start(struct ath10k *ar,
 	tlv->tag = __cpu_to_le16(WMI_TLV_TAG_STRUCT_CHANNEL);
 	tlv->len = __cpu_to_le16(sizeof(*ch));
 	ch = (void *)tlv->value;
-	ath10k_wmi_put_wmi_channel(ch, &arg->channel);
+	ath10k_wmi_put_wmi_channel(ar, ch, &arg->channel);
 
 	ptr += sizeof(*tlv);
 	ptr += sizeof(*ch);
@@ -2763,7 +2763,7 @@ ath10k_wmi_tlv_op_gen_scan_chan_list(struct ath10k *ar,
 		tlv->len = __cpu_to_le16(sizeof(*ci));
 		ci = (void *)tlv->value;
 
-		ath10k_wmi_put_wmi_channel(ci, ch);
+		ath10k_wmi_put_wmi_channel(ar, ci, ch);
 
 		chans += sizeof(*tlv);
 		chans += sizeof(*ci);
@@ -3450,7 +3450,7 @@ ath10k_wmi_tlv_op_gen_tdls_peer_update(struct ath10k *ar,
 		tlv->tag = __cpu_to_le16(WMI_TLV_TAG_STRUCT_CHANNEL);
 		tlv->len = __cpu_to_le16(sizeof(*chan));
 		chan = (void *)tlv->value;
-		ath10k_wmi_put_wmi_channel(chan, &chan_arg[i]);
+		ath10k_wmi_put_wmi_channel(ar, chan, &chan_arg[i]);
 
 		ptr += sizeof(*tlv);
 		ptr += sizeof(*chan);
diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c
index db6f4c751485..4a3a698fe059 100644
--- a/drivers/net/wireless/ath/ath10k/wmi.c
+++ b/drivers/net/wireless/ath/ath10k/wmi.c
@@ -1694,10 +1694,11 @@ static const struct wmi_peer_flags_map wmi_10_2_peer_flags_map = {
 	.bw160 = WMI_10_2_PEER_160MHZ,
 };
 
-void ath10k_wmi_put_wmi_channel(struct wmi_channel *ch,
+void ath10k_wmi_put_wmi_channel(struct ath10k *ar, struct wmi_channel *ch,
 				const struct wmi_channel_arg *arg)
 {
 	u32 flags = 0;
+	struct ieee80211_channel *chan = NULL;
 
 	memset(ch, 0, sizeof(*ch));
 
@@ -1717,20 +1718,36 @@ void ath10k_wmi_put_wmi_channel(struct wmi_channel *ch,
 	ch->band_center_freq2 = 0;
 	ch->mhz = __cpu_to_le32(arg->freq);
 	ch->band_center_freq1 = __cpu_to_le32(arg->band_center_freq1);
-	if (arg->mode == MODE_11AC_VHT80_80)
+	if (arg->mode == MODE_11AC_VHT80_80) {
 		ch->band_center_freq2 = __cpu_to_le32(arg->band_center_freq2);
+		chan = ieee80211_get_channel(ar->hw->wiphy,
+					     arg->band_center_freq2 - 10);
+	}
 
 	if (arg->mode == MODE_11AC_VHT160) {
-		if (arg->freq > arg->band_center_freq1)
-			ch->band_center_freq1 =
-				__cpu_to_le32(arg->band_center_freq1 + 40);
-		else
-			ch->band_center_freq1 =
-				__cpu_to_le32(arg->band_center_freq1 - 40);
+		u32 band_center_freq1;
+		u32 band_center_freq2;
+
+		if (arg->freq > arg->band_center_freq1) {
+			band_center_freq1 = arg->band_center_freq1 + 40;
+			band_center_freq2 = arg->band_center_freq1 - 40;
+		} else {
+			band_center_freq1 = arg->band_center_freq1 - 40;
+			band_center_freq2 = arg->band_center_freq1 + 40;
+		}
 
+		ch->band_center_freq1 =
+					__cpu_to_le32(band_center_freq1);
+		/* Minus 10 to get a defined 5G channel frequency*/
+		chan = ieee80211_get_channel(ar->hw->wiphy,
+					     band_center_freq2 - 10);
+		/* The center frequency of the entire VHT160 */
 		ch->band_center_freq2 = __cpu_to_le32(arg->band_center_freq1);
 	}
 
+	if (chan && chan->flags & IEEE80211_CHAN_RADAR)
+		flags |= WMI_CHAN_FLAG_DFS_CFREQ2;
+
 	ch->min_power = arg->min_power;
 	ch->max_power = arg->max_power;
 	ch->reg_power = arg->max_reg_power;
@@ -7176,7 +7193,7 @@ ath10k_wmi_op_gen_vdev_start(struct ath10k *ar,
 		memcpy(cmd->ssid.ssid, arg->ssid, arg->ssid_len);
 	}
 
-	ath10k_wmi_put_wmi_channel(&cmd->chan, &arg->channel);
+	ath10k_wmi_put_wmi_channel(ar, &cmd->chan, &arg->channel);
 
 	ath10k_dbg(ar, ATH10K_DBG_WMI,
 		   "wmi vdev %s id 0x%x flags: 0x%0X, freq %d, mode %d, ch_flags: 0x%0X, max_power: %d\n",
@@ -7548,7 +7565,7 @@ ath10k_wmi_op_gen_scan_chan_list(struct ath10k *ar,
 		ch = &arg->channels[i];
 		ci = &cmd->chan_info[i];
 
-		ath10k_wmi_put_wmi_channel(ci, ch);
+		ath10k_wmi_put_wmi_channel(ar, ci, ch);
 	}
 
 	return skb;
@@ -8952,7 +8969,7 @@ ath10k_wmi_10_4_gen_tdls_peer_update(struct ath10k *ar,
 
 	for (i = 0; i < cap->peer_chan_len; i++) {
 		chan = (struct wmi_channel *)&peer_cap->peer_chan_list[i];
-		ath10k_wmi_put_wmi_channel(chan, &chan_arg[i]);
+		ath10k_wmi_put_wmi_channel(ar, chan, &chan_arg[i]);
 	}
 
 	ath10k_dbg(ar, ATH10K_DBG_WMI,
diff --git a/drivers/net/wireless/ath/ath10k/wmi.h b/drivers/net/wireless/ath/ath10k/wmi.h
index 5ba0c9a7d18c..209070714d1a 100644
--- a/drivers/net/wireless/ath/ath10k/wmi.h
+++ b/drivers/net/wireless/ath/ath10k/wmi.h
@@ -2094,7 +2094,8 @@ enum wmi_channel_change_cause {
 
 /* Indicate reason for channel switch */
 #define WMI_CHANNEL_CHANGE_CAUSE_CSA (1 << 13)
-
+/* DFS required on channel for 2nd segment of VHT160 and VHT80+80*/
+#define WMI_CHAN_FLAG_DFS_CFREQ2  (1 << 15)
 #define WMI_MAX_SPATIAL_STREAM        3 /* default max ss */
 
 /* HT Capabilities*/
@@ -7351,7 +7352,7 @@ void ath10k_wmi_put_start_scan_common(struct wmi_start_scan_common *cmn,
 				      const struct wmi_start_scan_arg *arg);
 void ath10k_wmi_set_wmm_param(struct wmi_wmm_params *params,
 			      const struct wmi_wmm_params_arg *arg);
-void ath10k_wmi_put_wmi_channel(struct wmi_channel *ch,
+void ath10k_wmi_put_wmi_channel(struct ath10k *ar, struct wmi_channel *ch,
 				const struct wmi_channel_arg *arg);
 int ath10k_wmi_start_scan_verify(const struct wmi_start_scan_arg *arg);
 
-- 
cgit v1.2.3-59-g8ed1b


From acb31476adc9ff271140cdd4d3c707ff0c97f5a4 Mon Sep 17 00:00:00 2001
From: Venkateswara Naralasetty <vnaralas@codeaurora.org>
Date: Wed, 1 Apr 2020 15:48:10 +0530
Subject: ath10k: fix kernel null pointer dereference

Currently sta airtime is updated without any lock in case of
host based airtime calculation. Which may result in accessing the
invalid sta pointer in case of continuous station connect/disconnect.

This patch fix the kernel null pointer dereference by updating the
station airtime with proper RCU lock in case of host based airtime
calculation.

Proceeding with the analysis of "ARM Kernel Panic".
The APSS crash happened due to OOPS on CPU 0.
Crash Signature : Unable to handle kernel NULL pointer dereference
at virtual address 00000300
During the crash,
PC points to "ieee80211_sta_register_airtime+0x1c/0x448 [mac80211]"
LR points to "ath10k_txrx_tx_unref+0x17c/0x364 [ath10k_core]".
The Backtrace obtained is as follows:
[<bf880238>] (ieee80211_sta_register_airtime [mac80211]) from
[<bf945a38>] (ath10k_txrx_tx_unref+0x17c/0x364 [ath10k_core])
[<bf945a38>] (ath10k_txrx_tx_unref [ath10k_core]) from
[<bf9428e4>] (ath10k_htt_txrx_compl_task+0xa50/0xfc0 [ath10k_core])
[<bf9428e4>] (ath10k_htt_txrx_compl_task [ath10k_core]) from
[<bf9b9bc8>] (ath10k_pci_napi_poll+0x50/0xf8 [ath10k_pci])
[<bf9b9bc8>] (ath10k_pci_napi_poll [ath10k_pci]) from
[<c059e3b0>] (net_rx_action+0xac/0x160)
[<c059e3b0>] (net_rx_action) from [<c02329a4>] (__do_softirq+0x104/0x294)
[<c02329a4>] (__do_softirq) from [<c0232b64>] (run_ksoftirqd+0x30/0x90)
[<c0232b64>] (run_ksoftirqd) from [<c024e358>] (smpboot_thread_fn+0x25c/0x274)
[<c024e358>] (smpboot_thread_fn) from [<c02482fc>] (kthread+0xd8/0xec)

Tested HW: QCA9888
Tested FW: 10.4-3.10-00047

Signed-off-by: Venkateswara Naralasetty <vnaralas@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1585736290-17661-1-git-send-email-vnaralas@codeaurora.org
---
 drivers/net/wireless/ath/ath10k/txrx.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/wireless/ath/ath10k/txrx.c b/drivers/net/wireless/ath/ath10k/txrx.c
index 39abf8b12903..f46b9083bbf1 100644
--- a/drivers/net/wireless/ath/ath10k/txrx.c
+++ b/drivers/net/wireless/ath/ath10k/txrx.c
@@ -84,9 +84,11 @@ int ath10k_txrx_tx_unref(struct ath10k_htt *htt,
 		wake_up(&htt->empty_tx_wq);
 	spin_unlock_bh(&htt->tx_lock);
 
+	rcu_read_lock();
 	if (txq && txq->sta && skb_cb->airtime_est)
 		ieee80211_sta_register_airtime(txq->sta, txq->tid,
 					       skb_cb->airtime_est, 0);
+	rcu_read_unlock();
 
 	if (ar->bus_param.dev_type != ATH10K_DEV_TYPE_HL)
 		dma_unmap_single(dev, skb_cb->paddr, msdu->len, DMA_TO_DEVICE);
-- 
cgit v1.2.3-59-g8ed1b


From ced21a4c726bdc60b1680c050a284b08803bc64c Mon Sep 17 00:00:00 2001
From: Qiujun Huang <hqjagain@gmail.com>
Date: Sat, 4 Apr 2020 12:18:34 +0800
Subject: ath9k: Fix use-after-free Read in htc_connect_service

The skb is consumed by htc_send_epid, so it needn't release again.

The case reported by syzbot:

https://lore.kernel.org/linux-usb/000000000000590f6b05a1c05d15@google.com
usb 1-1: ath9k_htc: Firmware ath9k_htc/htc_9271-1.4.0.fw requested
usb 1-1: ath9k_htc: Transferred FW: ath9k_htc/htc_9271-1.4.0.fw, size:
51008
usb 1-1: Service connection timeout for: 256
==================================================================
BUG: KASAN: use-after-free in atomic_read
include/asm-generic/atomic-instrumented.h:26 [inline]
BUG: KASAN: use-after-free in refcount_read include/linux/refcount.h:134
[inline]
BUG: KASAN: use-after-free in skb_unref include/linux/skbuff.h:1042
[inline]
BUG: KASAN: use-after-free in kfree_skb+0x32/0x3d0 net/core/skbuff.c:692
Read of size 4 at addr ffff8881d0957994 by task kworker/1:2/83

Call Trace:
kfree_skb+0x32/0x3d0 net/core/skbuff.c:692
htc_connect_service.cold+0xa9/0x109
drivers/net/wireless/ath/ath9k/htc_hst.c:282
ath9k_wmi_connect+0xd2/0x1a0 drivers/net/wireless/ath/ath9k/wmi.c:265
ath9k_init_htc_services.constprop.0+0xb4/0x650
drivers/net/wireless/ath/ath9k/htc_drv_init.c:146
ath9k_htc_probe_device+0x25a/0x1d80
drivers/net/wireless/ath/ath9k/htc_drv_init.c:959
ath9k_htc_hw_init+0x31/0x60
drivers/net/wireless/ath/ath9k/htc_hst.c:501
ath9k_hif_usb_firmware_cb+0x26b/0x500
drivers/net/wireless/ath/ath9k/hif_usb.c:1187
request_firmware_work_func+0x126/0x242
drivers/base/firmware_loader/main.c:976
process_one_work+0x94b/0x1620 kernel/workqueue.c:2264
worker_thread+0x96/0xe20 kernel/workqueue.c:2410
kthread+0x318/0x420 kernel/kthread.c:255
ret_from_fork+0x24/0x30 arch/x86/entry/entry_64.S:352

Allocated by task 83:
kmem_cache_alloc_node+0xdc/0x330 mm/slub.c:2814
__alloc_skb+0xba/0x5a0 net/core/skbuff.c:198
alloc_skb include/linux/skbuff.h:1081 [inline]
htc_connect_service+0x2cc/0x840
drivers/net/wireless/ath/ath9k/htc_hst.c:257
ath9k_wmi_connect+0xd2/0x1a0 drivers/net/wireless/ath/ath9k/wmi.c:265
ath9k_init_htc_services.constprop.0+0xb4/0x650
drivers/net/wireless/ath/ath9k/htc_drv_init.c:146
ath9k_htc_probe_device+0x25a/0x1d80
drivers/net/wireless/ath/ath9k/htc_drv_init.c:959
ath9k_htc_hw_init+0x31/0x60
drivers/net/wireless/ath/ath9k/htc_hst.c:501
ath9k_hif_usb_firmware_cb+0x26b/0x500
drivers/net/wireless/ath/ath9k/hif_usb.c:1187
request_firmware_work_func+0x126/0x242
drivers/base/firmware_loader/main.c:976
process_one_work+0x94b/0x1620 kernel/workqueue.c:2264
worker_thread+0x96/0xe20 kernel/workqueue.c:2410
kthread+0x318/0x420 kernel/kthread.c:255
ret_from_fork+0x24/0x30 arch/x86/entry/entry_64.S:352

Freed by task 0:
kfree_skb+0x102/0x3d0 net/core/skbuff.c:690
ath9k_htc_txcompletion_cb+0x1f8/0x2b0
drivers/net/wireless/ath/ath9k/htc_hst.c:356
hif_usb_regout_cb+0x10b/0x1b0
drivers/net/wireless/ath/ath9k/hif_usb.c:90
__usb_hcd_giveback_urb+0x29a/0x550 drivers/usb/core/hcd.c:1650
usb_hcd_giveback_urb+0x368/0x420 drivers/usb/core/hcd.c:1716
dummy_timer+0x1258/0x32ae drivers/usb/gadget/udc/dummy_hcd.c:1966
call_timer_fn+0x195/0x6f0 kernel/time/timer.c:1404
expire_timers kernel/time/timer.c:1449 [inline]
__run_timers kernel/time/timer.c:1773 [inline]
__run_timers kernel/time/timer.c:1740 [inline]
run_timer_softirq+0x5f9/0x1500 kernel/time/timer.c:1786
__do_softirq+0x21e/0x950 kernel/softirq.c:292

Reported-and-tested-by: syzbot+9505af1ae303dabdc646@syzkaller.appspotmail.com
Signed-off-by: Qiujun Huang <hqjagain@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200404041838.10426-2-hqjagain@gmail.com
---
 drivers/net/wireless/ath/ath9k/htc_hst.c | 3 ---
 drivers/net/wireless/ath/ath9k/wmi.c     | 1 -
 2 files changed, 4 deletions(-)

diff --git a/drivers/net/wireless/ath/ath9k/htc_hst.c b/drivers/net/wireless/ath/ath9k/htc_hst.c
index d091c8ebdcf0..1bf63a4efb4c 100644
--- a/drivers/net/wireless/ath/ath9k/htc_hst.c
+++ b/drivers/net/wireless/ath/ath9k/htc_hst.c
@@ -170,7 +170,6 @@ static int htc_config_pipe_credits(struct htc_target *target)
 	time_left = wait_for_completion_timeout(&target->cmd_wait, HZ);
 	if (!time_left) {
 		dev_err(target->dev, "HTC credit config timeout\n");
-		kfree_skb(skb);
 		return -ETIMEDOUT;
 	}
 
@@ -206,7 +205,6 @@ static int htc_setup_complete(struct htc_target *target)
 	time_left = wait_for_completion_timeout(&target->cmd_wait, HZ);
 	if (!time_left) {
 		dev_err(target->dev, "HTC start timeout\n");
-		kfree_skb(skb);
 		return -ETIMEDOUT;
 	}
 
@@ -279,7 +277,6 @@ int htc_connect_service(struct htc_target *target,
 	if (!time_left) {
 		dev_err(target->dev, "Service connection timeout for: %d\n",
 			service_connreq->service_id);
-		kfree_skb(skb);
 		return -ETIMEDOUT;
 	}
 
diff --git a/drivers/net/wireless/ath/ath9k/wmi.c b/drivers/net/wireless/ath/ath9k/wmi.c
index cdc146091194..d1f6710ca63b 100644
--- a/drivers/net/wireless/ath/ath9k/wmi.c
+++ b/drivers/net/wireless/ath/ath9k/wmi.c
@@ -336,7 +336,6 @@ int ath9k_wmi_cmd(struct wmi *wmi, enum wmi_cmd_id cmd_id,
 		ath_dbg(common, WMI, "Timeout waiting for WMI command: %s\n",
 			wmi_cmd_to_name(cmd_id));
 		mutex_unlock(&wmi->op_mutex);
-		kfree_skb(skb);
 		return -ETIMEDOUT;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From abeaa85054ff8cfe8b99aafc5c70ea067e5d0908 Mon Sep 17 00:00:00 2001
From: Qiujun Huang <hqjagain@gmail.com>
Date: Sat, 4 Apr 2020 12:18:35 +0800
Subject: ath9k: Fix use-after-free Read in ath9k_wmi_ctrl_rx

Free wmi later after cmd urb has been killed, as urb cb will access wmi.

the case reported by syzbot:
https://lore.kernel.org/linux-usb/0000000000000002fc05a1d61a68@google.com
BUG: KASAN: use-after-free in ath9k_wmi_ctrl_rx+0x416/0x500
drivers/net/wireless/ath/ath9k/wmi.c:215
Read of size 1 at addr ffff8881cef1417c by task swapper/1/0

Call Trace:
<IRQ>
ath9k_wmi_ctrl_rx+0x416/0x500 drivers/net/wireless/ath/ath9k/wmi.c:215
ath9k_htc_rx_msg+0x2da/0xaf0
drivers/net/wireless/ath/ath9k/htc_hst.c:459
ath9k_hif_usb_reg_in_cb+0x1ba/0x630
drivers/net/wireless/ath/ath9k/hif_usb.c:718
__usb_hcd_giveback_urb+0x29a/0x550 drivers/usb/core/hcd.c:1650
usb_hcd_giveback_urb+0x368/0x420 drivers/usb/core/hcd.c:1716
dummy_timer+0x1258/0x32ae drivers/usb/gadget/udc/dummy_hcd.c:1966
call_timer_fn+0x195/0x6f0 kernel/time/timer.c:1404
expire_timers kernel/time/timer.c:1449 [inline]
__run_timers kernel/time/timer.c:1773 [inline]
__run_timers kernel/time/timer.c:1740 [inline]
run_timer_softirq+0x5f9/0x1500 kernel/time/timer.c:1786

Reported-and-tested-by: syzbot+5d338854440137ea0fef@syzkaller.appspotmail.com
Signed-off-by: Qiujun Huang <hqjagain@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200404041838.10426-3-hqjagain@gmail.com
---
 drivers/net/wireless/ath/ath9k/hif_usb.c      |  5 +++--
 drivers/net/wireless/ath/ath9k/hif_usb.h      |  1 +
 drivers/net/wireless/ath/ath9k/htc_drv_init.c | 10 +++++++---
 drivers/net/wireless/ath/ath9k/wmi.c          |  5 ++++-
 drivers/net/wireless/ath/ath9k/wmi.h          |  3 ++-
 5 files changed, 17 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wireless/ath/ath9k/hif_usb.c b/drivers/net/wireless/ath/ath9k/hif_usb.c
index dd0c32379375..f227e19087ff 100644
--- a/drivers/net/wireless/ath/ath9k/hif_usb.c
+++ b/drivers/net/wireless/ath/ath9k/hif_usb.c
@@ -973,7 +973,7 @@ err:
 	return -ENOMEM;
 }
 
-static void ath9k_hif_usb_dealloc_urbs(struct hif_device_usb *hif_dev)
+void ath9k_hif_usb_dealloc_urbs(struct hif_device_usb *hif_dev)
 {
 	usb_kill_anchored_urbs(&hif_dev->regout_submitted);
 	ath9k_hif_usb_dealloc_reg_in_urbs(hif_dev);
@@ -1341,8 +1341,9 @@ static void ath9k_hif_usb_disconnect(struct usb_interface *interface)
 
 	if (hif_dev->flags & HIF_USB_READY) {
 		ath9k_htc_hw_deinit(hif_dev->htc_handle, unplugged);
-		ath9k_htc_hw_free(hif_dev->htc_handle);
 		ath9k_hif_usb_dev_deinit(hif_dev);
+		ath9k_destoy_wmi(hif_dev->htc_handle->drv_priv);
+		ath9k_htc_hw_free(hif_dev->htc_handle);
 	}
 
 	usb_set_intfdata(interface, NULL);
diff --git a/drivers/net/wireless/ath/ath9k/hif_usb.h b/drivers/net/wireless/ath/ath9k/hif_usb.h
index 7846916aa01d..a94e7e1c86e9 100644
--- a/drivers/net/wireless/ath/ath9k/hif_usb.h
+++ b/drivers/net/wireless/ath/ath9k/hif_usb.h
@@ -133,5 +133,6 @@ struct hif_device_usb {
 
 int ath9k_hif_usb_init(void);
 void ath9k_hif_usb_exit(void);
+void ath9k_hif_usb_dealloc_urbs(struct hif_device_usb *hif_dev);
 
 #endif /* HTC_USB_H */
diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_init.c b/drivers/net/wireless/ath/ath9k/htc_drv_init.c
index d961095ab01f..40a065028ebe 100644
--- a/drivers/net/wireless/ath/ath9k/htc_drv_init.c
+++ b/drivers/net/wireless/ath/ath9k/htc_drv_init.c
@@ -931,8 +931,9 @@ err_init:
 int ath9k_htc_probe_device(struct htc_target *htc_handle, struct device *dev,
 			   u16 devid, char *product, u32 drv_info)
 {
-	struct ieee80211_hw *hw;
+	struct hif_device_usb *hif_dev;
 	struct ath9k_htc_priv *priv;
+	struct ieee80211_hw *hw;
 	int ret;
 
 	hw = ieee80211_alloc_hw(sizeof(struct ath9k_htc_priv), &ath9k_htc_ops);
@@ -967,7 +968,10 @@ int ath9k_htc_probe_device(struct htc_target *htc_handle, struct device *dev,
 	return 0;
 
 err_init:
-	ath9k_deinit_wmi(priv);
+	ath9k_stop_wmi(priv);
+	hif_dev = (struct hif_device_usb *)htc_handle->hif_dev;
+	ath9k_hif_usb_dealloc_urbs(hif_dev);
+	ath9k_destoy_wmi(priv);
 err_free:
 	ieee80211_free_hw(hw);
 	return ret;
@@ -982,7 +986,7 @@ void ath9k_htc_disconnect_device(struct htc_target *htc_handle, bool hotunplug)
 			htc_handle->drv_priv->ah->ah_flags |= AH_UNPLUGGED;
 
 		ath9k_deinit_device(htc_handle->drv_priv);
-		ath9k_deinit_wmi(htc_handle->drv_priv);
+		ath9k_stop_wmi(htc_handle->drv_priv);
 		ieee80211_free_hw(htc_handle->drv_priv->hw);
 	}
 }
diff --git a/drivers/net/wireless/ath/ath9k/wmi.c b/drivers/net/wireless/ath/ath9k/wmi.c
index d1f6710ca63b..e7a3127395be 100644
--- a/drivers/net/wireless/ath/ath9k/wmi.c
+++ b/drivers/net/wireless/ath/ath9k/wmi.c
@@ -112,14 +112,17 @@ struct wmi *ath9k_init_wmi(struct ath9k_htc_priv *priv)
 	return wmi;
 }
 
-void ath9k_deinit_wmi(struct ath9k_htc_priv *priv)
+void ath9k_stop_wmi(struct ath9k_htc_priv *priv)
 {
 	struct wmi *wmi = priv->wmi;
 
 	mutex_lock(&wmi->op_mutex);
 	wmi->stopped = true;
 	mutex_unlock(&wmi->op_mutex);
+}
 
+void ath9k_destoy_wmi(struct ath9k_htc_priv *priv)
+{
 	kfree(priv->wmi);
 }
 
diff --git a/drivers/net/wireless/ath/ath9k/wmi.h b/drivers/net/wireless/ath/ath9k/wmi.h
index 380175d5ecd7..d8b912206232 100644
--- a/drivers/net/wireless/ath/ath9k/wmi.h
+++ b/drivers/net/wireless/ath/ath9k/wmi.h
@@ -179,7 +179,6 @@ struct wmi {
 };
 
 struct wmi *ath9k_init_wmi(struct ath9k_htc_priv *priv);
-void ath9k_deinit_wmi(struct ath9k_htc_priv *priv);
 int ath9k_wmi_connect(struct htc_target *htc, struct wmi *wmi,
 		      enum htc_endpoint_id *wmi_ctrl_epid);
 int ath9k_wmi_cmd(struct wmi *wmi, enum wmi_cmd_id cmd_id,
@@ -189,6 +188,8 @@ int ath9k_wmi_cmd(struct wmi *wmi, enum wmi_cmd_id cmd_id,
 void ath9k_wmi_event_tasklet(unsigned long data);
 void ath9k_fatal_work(struct work_struct *work);
 void ath9k_wmi_event_drain(struct ath9k_htc_priv *priv);
+void ath9k_stop_wmi(struct ath9k_htc_priv *priv);
+void ath9k_destoy_wmi(struct ath9k_htc_priv *priv);
 
 #define WMI_CMD(_wmi_cmd)						\
 	do {								\
-- 
cgit v1.2.3-59-g8ed1b


From e4ff08a4d727146bb6717a39a8d399d834654345 Mon Sep 17 00:00:00 2001
From: Qiujun Huang <hqjagain@gmail.com>
Date: Sat, 4 Apr 2020 12:18:36 +0800
Subject: ath9k: Fix use-after-free Write in ath9k_htc_rx_msg

Write out of slab bounds. We should check epid.

The case reported by syzbot:
https://lore.kernel.org/linux-usb/0000000000006ac55b05a1c05d72@google.com
BUG: KASAN: use-after-free in htc_process_conn_rsp
drivers/net/wireless/ath/ath9k/htc_hst.c:131 [inline]
BUG: KASAN: use-after-free in ath9k_htc_rx_msg+0xa25/0xaf0
drivers/net/wireless/ath/ath9k/htc_hst.c:443
Write of size 2 at addr ffff8881cea291f0 by task swapper/1/0

Call Trace:
 htc_process_conn_rsp drivers/net/wireless/ath/ath9k/htc_hst.c:131
[inline]
ath9k_htc_rx_msg+0xa25/0xaf0
drivers/net/wireless/ath/ath9k/htc_hst.c:443
ath9k_hif_usb_reg_in_cb+0x1ba/0x630
drivers/net/wireless/ath/ath9k/hif_usb.c:718
__usb_hcd_giveback_urb+0x29a/0x550 drivers/usb/core/hcd.c:1650
usb_hcd_giveback_urb+0x368/0x420 drivers/usb/core/hcd.c:1716
dummy_timer+0x1258/0x32ae drivers/usb/gadget/udc/dummy_hcd.c:1966
call_timer_fn+0x195/0x6f0 kernel/time/timer.c:1404
expire_timers kernel/time/timer.c:1449 [inline]
__run_timers kernel/time/timer.c:1773 [inline]
__run_timers kernel/time/timer.c:1740 [inline]
run_timer_softirq+0x5f9/0x1500 kernel/time/timer.c:1786

Reported-and-tested-by: syzbot+b1c61e5f11be5782f192@syzkaller.appspotmail.com
Signed-off-by: Qiujun Huang <hqjagain@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200404041838.10426-4-hqjagain@gmail.com
---
 drivers/net/wireless/ath/ath9k/htc_hst.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/wireless/ath/ath9k/htc_hst.c b/drivers/net/wireless/ath/ath9k/htc_hst.c
index 1bf63a4efb4c..d2e062eaf561 100644
--- a/drivers/net/wireless/ath/ath9k/htc_hst.c
+++ b/drivers/net/wireless/ath/ath9k/htc_hst.c
@@ -113,6 +113,9 @@ static void htc_process_conn_rsp(struct htc_target *target,
 
 	if (svc_rspmsg->status == HTC_SERVICE_SUCCESS) {
 		epid = svc_rspmsg->endpoint_id;
+		if (epid < 0 || epid >= ENDPOINT_MAX)
+			return;
+
 		service_id = be16_to_cpu(svc_rspmsg->service_id);
 		max_msglen = be16_to_cpu(svc_rspmsg->max_msg_len);
 		endpoint = &target->endpoint[epid];
-- 
cgit v1.2.3-59-g8ed1b


From 19d6c375d671ce9949a864fb9a03e19f5487b4d3 Mon Sep 17 00:00:00 2001
From: Qiujun Huang <hqjagain@gmail.com>
Date: Sat, 4 Apr 2020 12:18:37 +0800
Subject: ath9x: Fix stack-out-of-bounds Write in ath9k_hif_usb_rx_cb

Add barrier to accessing the stack array skb_pool.

The case reported by syzbot:
https://lore.kernel.org/linux-usb/0000000000003d7c1505a2168418@google.com
BUG: KASAN: stack-out-of-bounds in ath9k_hif_usb_rx_stream
drivers/net/wireless/ath/ath9k/hif_usb.c:626 [inline]
BUG: KASAN: stack-out-of-bounds in ath9k_hif_usb_rx_cb+0xdf6/0xf70
drivers/net/wireless/ath/ath9k/hif_usb.c:666
Write of size 8 at addr ffff8881db309a28 by task swapper/1/0

Call Trace:
ath9k_hif_usb_rx_stream drivers/net/wireless/ath/ath9k/hif_usb.c:626
[inline]
ath9k_hif_usb_rx_cb+0xdf6/0xf70
drivers/net/wireless/ath/ath9k/hif_usb.c:666
__usb_hcd_giveback_urb+0x1f2/0x470 drivers/usb/core/hcd.c:1648
usb_hcd_giveback_urb+0x368/0x420 drivers/usb/core/hcd.c:1713
dummy_timer+0x1258/0x32ae drivers/usb/gadget/udc/dummy_hcd.c:1966
call_timer_fn+0x195/0x6f0 kernel/time/timer.c:1404
expire_timers kernel/time/timer.c:1449 [inline]
__run_timers kernel/time/timer.c:1773 [inline]
__run_timers kernel/time/timer.c:1740 [inline]
run_timer_softirq+0x5f9/0x1500 kernel/time/timer.c:1786

Reported-and-tested-by: syzbot+d403396d4df67ad0bd5f@syzkaller.appspotmail.com
Signed-off-by: Qiujun Huang <hqjagain@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200404041838.10426-5-hqjagain@gmail.com
---
 drivers/net/wireless/ath/ath9k/hif_usb.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/wireless/ath/ath9k/hif_usb.c b/drivers/net/wireless/ath/ath9k/hif_usb.c
index f227e19087ff..6049d3766c64 100644
--- a/drivers/net/wireless/ath/ath9k/hif_usb.c
+++ b/drivers/net/wireless/ath/ath9k/hif_usb.c
@@ -612,6 +612,11 @@ static void ath9k_hif_usb_rx_stream(struct hif_device_usb *hif_dev,
 			hif_dev->remain_skb = nskb;
 			spin_unlock(&hif_dev->rx_lock);
 		} else {
+			if (pool_index == MAX_PKT_NUM_IN_TRANSFER) {
+				dev_err(&hif_dev->udev->dev,
+					"ath9k_htc: over RX MAX_PKT_NUM\n");
+				goto err;
+			}
 			nskb = __dev_alloc_skb(pkt_len + 32, GFP_ATOMIC);
 			if (!nskb) {
 				dev_err(&hif_dev->udev->dev,
-- 
cgit v1.2.3-59-g8ed1b


From 2bbcaaee1fcbd83272e29f31e2bb7e70d8c49e05 Mon Sep 17 00:00:00 2001
From: Qiujun Huang <hqjagain@gmail.com>
Date: Sat, 4 Apr 2020 12:18:38 +0800
Subject: ath9k: Fix general protection fault in ath9k_hif_usb_rx_cb

In ath9k_hif_usb_rx_cb interface number is assumed to be 0.
usb_ifnum_to_if(urb->dev, 0)
But it isn't always true.

The case reported by syzbot:
https://lore.kernel.org/linux-usb/000000000000666c9c05a1c05d12@google.com
usb 2-1: new high-speed USB device number 2 using dummy_hcd
usb 2-1: config 1 has an invalid interface number: 2 but max is 0
usb 2-1: config 1 has no interface number 0
usb 2-1: New USB device found, idVendor=0cf3, idProduct=9271, bcdDevice=
1.08
usb 2-1: New USB device strings: Mfr=1, Product=2, SerialNumber=3
general protection fault, probably for non-canonical address
0xdffffc0000000015: 0000 [#1] SMP KASAN
KASAN: null-ptr-deref in range [0x00000000000000a8-0x00000000000000af]
CPU: 0 PID: 0 Comm: swapper/0 Not tainted 5.6.0-rc5-syzkaller #0

Call Trace
__usb_hcd_giveback_urb+0x29a/0x550 drivers/usb/core/hcd.c:1650
usb_hcd_giveback_urb+0x368/0x420 drivers/usb/core/hcd.c:1716
dummy_timer+0x1258/0x32ae drivers/usb/gadget/udc/dummy_hcd.c:1966
call_timer_fn+0x195/0x6f0 kernel/time/timer.c:1404
expire_timers kernel/time/timer.c:1449 [inline]
__run_timers kernel/time/timer.c:1773 [inline]
__run_timers kernel/time/timer.c:1740 [inline]
run_timer_softirq+0x5f9/0x1500 kernel/time/timer.c:1786
__do_softirq+0x21e/0x950 kernel/softirq.c:292
invoke_softirq kernel/softirq.c:373 [inline]
irq_exit+0x178/0x1a0 kernel/softirq.c:413
exiting_irq arch/x86/include/asm/apic.h:546 [inline]
smp_apic_timer_interrupt+0x141/0x540 arch/x86/kernel/apic/apic.c:1146
apic_timer_interrupt+0xf/0x20 arch/x86/entry/entry_64.S:829

Reported-and-tested-by: syzbot+40d5d2e8a4680952f042@syzkaller.appspotmail.com
Signed-off-by: Qiujun Huang <hqjagain@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200404041838.10426-6-hqjagain@gmail.com
---
 drivers/net/wireless/ath/ath9k/hif_usb.c | 48 ++++++++++++++++++++++++--------
 drivers/net/wireless/ath/ath9k/hif_usb.h |  5 ++++
 2 files changed, 42 insertions(+), 11 deletions(-)

diff --git a/drivers/net/wireless/ath/ath9k/hif_usb.c b/drivers/net/wireless/ath/ath9k/hif_usb.c
index 6049d3766c64..4ed21dad6a8e 100644
--- a/drivers/net/wireless/ath/ath9k/hif_usb.c
+++ b/drivers/net/wireless/ath/ath9k/hif_usb.c
@@ -643,9 +643,9 @@ err:
 
 static void ath9k_hif_usb_rx_cb(struct urb *urb)
 {
-	struct sk_buff *skb = (struct sk_buff *) urb->context;
-	struct hif_device_usb *hif_dev =
-		usb_get_intfdata(usb_ifnum_to_if(urb->dev, 0));
+	struct rx_buf *rx_buf = (struct rx_buf *)urb->context;
+	struct hif_device_usb *hif_dev = rx_buf->hif_dev;
+	struct sk_buff *skb = rx_buf->skb;
 	int ret;
 
 	if (!skb)
@@ -685,14 +685,15 @@ resubmit:
 	return;
 free:
 	kfree_skb(skb);
+	kfree(rx_buf);
 }
 
 static void ath9k_hif_usb_reg_in_cb(struct urb *urb)
 {
-	struct sk_buff *skb = (struct sk_buff *) urb->context;
+	struct rx_buf *rx_buf = (struct rx_buf *)urb->context;
+	struct hif_device_usb *hif_dev = rx_buf->hif_dev;
+	struct sk_buff *skb = rx_buf->skb;
 	struct sk_buff *nskb;
-	struct hif_device_usb *hif_dev =
-		usb_get_intfdata(usb_ifnum_to_if(urb->dev, 0));
 	int ret;
 
 	if (!skb)
@@ -750,6 +751,7 @@ resubmit:
 	return;
 free:
 	kfree_skb(skb);
+	kfree(rx_buf);
 	urb->context = NULL;
 }
 
@@ -795,7 +797,7 @@ static int ath9k_hif_usb_alloc_tx_urbs(struct hif_device_usb *hif_dev)
 	init_usb_anchor(&hif_dev->mgmt_submitted);
 
 	for (i = 0; i < MAX_TX_URB_NUM; i++) {
-		tx_buf = kzalloc(sizeof(struct tx_buf), GFP_KERNEL);
+		tx_buf = kzalloc(sizeof(*tx_buf), GFP_KERNEL);
 		if (!tx_buf)
 			goto err;
 
@@ -832,8 +834,9 @@ static void ath9k_hif_usb_dealloc_rx_urbs(struct hif_device_usb *hif_dev)
 
 static int ath9k_hif_usb_alloc_rx_urbs(struct hif_device_usb *hif_dev)
 {
-	struct urb *urb = NULL;
+	struct rx_buf *rx_buf = NULL;
 	struct sk_buff *skb = NULL;
+	struct urb *urb = NULL;
 	int i, ret;
 
 	init_usb_anchor(&hif_dev->rx_submitted);
@@ -841,6 +844,12 @@ static int ath9k_hif_usb_alloc_rx_urbs(struct hif_device_usb *hif_dev)
 
 	for (i = 0; i < MAX_RX_URB_NUM; i++) {
 
+		rx_buf = kzalloc(sizeof(*rx_buf), GFP_KERNEL);
+		if (!rx_buf) {
+			ret = -ENOMEM;
+			goto err_rxb;
+		}
+
 		/* Allocate URB */
 		urb = usb_alloc_urb(0, GFP_KERNEL);
 		if (urb == NULL) {
@@ -855,11 +864,14 @@ static int ath9k_hif_usb_alloc_rx_urbs(struct hif_device_usb *hif_dev)
 			goto err_skb;
 		}
 
+		rx_buf->hif_dev = hif_dev;
+		rx_buf->skb = skb;
+
 		usb_fill_bulk_urb(urb, hif_dev->udev,
 				  usb_rcvbulkpipe(hif_dev->udev,
 						  USB_WLAN_RX_PIPE),
 				  skb->data, MAX_RX_BUF_SIZE,
-				  ath9k_hif_usb_rx_cb, skb);
+				  ath9k_hif_usb_rx_cb, rx_buf);
 
 		/* Anchor URB */
 		usb_anchor_urb(urb, &hif_dev->rx_submitted);
@@ -885,6 +897,8 @@ err_submit:
 err_skb:
 	usb_free_urb(urb);
 err_urb:
+	kfree(rx_buf);
+err_rxb:
 	ath9k_hif_usb_dealloc_rx_urbs(hif_dev);
 	return ret;
 }
@@ -896,14 +910,21 @@ static void ath9k_hif_usb_dealloc_reg_in_urbs(struct hif_device_usb *hif_dev)
 
 static int ath9k_hif_usb_alloc_reg_in_urbs(struct hif_device_usb *hif_dev)
 {
-	struct urb *urb = NULL;
+	struct rx_buf *rx_buf = NULL;
 	struct sk_buff *skb = NULL;
+	struct urb *urb = NULL;
 	int i, ret;
 
 	init_usb_anchor(&hif_dev->reg_in_submitted);
 
 	for (i = 0; i < MAX_REG_IN_URB_NUM; i++) {
 
+		rx_buf = kzalloc(sizeof(*rx_buf), GFP_KERNEL);
+		if (!rx_buf) {
+			ret = -ENOMEM;
+			goto err_rxb;
+		}
+
 		/* Allocate URB */
 		urb = usb_alloc_urb(0, GFP_KERNEL);
 		if (urb == NULL) {
@@ -918,11 +939,14 @@ static int ath9k_hif_usb_alloc_reg_in_urbs(struct hif_device_usb *hif_dev)
 			goto err_skb;
 		}
 
+		rx_buf->hif_dev = hif_dev;
+		rx_buf->skb = skb;
+
 		usb_fill_int_urb(urb, hif_dev->udev,
 				  usb_rcvintpipe(hif_dev->udev,
 						  USB_REG_IN_PIPE),
 				  skb->data, MAX_REG_IN_BUF_SIZE,
-				  ath9k_hif_usb_reg_in_cb, skb, 1);
+				  ath9k_hif_usb_reg_in_cb, rx_buf, 1);
 
 		/* Anchor URB */
 		usb_anchor_urb(urb, &hif_dev->reg_in_submitted);
@@ -948,6 +972,8 @@ err_submit:
 err_skb:
 	usb_free_urb(urb);
 err_urb:
+	kfree(rx_buf);
+err_rxb:
 	ath9k_hif_usb_dealloc_reg_in_urbs(hif_dev);
 	return ret;
 }
diff --git a/drivers/net/wireless/ath/ath9k/hif_usb.h b/drivers/net/wireless/ath/ath9k/hif_usb.h
index a94e7e1c86e9..5985aa15ca93 100644
--- a/drivers/net/wireless/ath/ath9k/hif_usb.h
+++ b/drivers/net/wireless/ath/ath9k/hif_usb.h
@@ -86,6 +86,11 @@ struct tx_buf {
 	struct list_head list;
 };
 
+struct rx_buf {
+	struct sk_buff *skb;
+	struct hif_device_usb *hif_dev;
+};
+
 #define HIF_USB_TX_STOP  BIT(0)
 #define HIF_USB_TX_FLUSH BIT(1)
 
-- 
cgit v1.2.3-59-g8ed1b


From c2aa30db744d9cbdde127d4ed8aeea18273834c6 Mon Sep 17 00:00:00 2001
From: Archie Pusaka <apusaka@chromium.org>
Date: Tue, 7 Apr 2020 12:26:27 +0800
Subject: Bluetooth: debugfs option to unset MITM flag

The BT qualification test SM/MAS/PKE/BV-01-C needs us to turn off
the MITM flag when pairing, and at the same time also set the io
capability to something other than no input no output.

Currently the MITM flag is only unset when the io capability is set
to no input no output, therefore the test cannot be executed.

This patch introduces a debugfs option to force MITM flag to be
turned off.

Signed-off-by: Archie Pusaka <apusaka@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci.h |  1 +
 net/bluetooth/hci_debugfs.c | 46 +++++++++++++++++++++++++++++++++++++++++++++
 net/bluetooth/smp.c         | 15 ++++++++++-----
 3 files changed, 57 insertions(+), 5 deletions(-)

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 79de2a659dd6..f4e8e2a0b7c1 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -298,6 +298,7 @@ enum {
 	HCI_FORCE_STATIC_ADDR,
 	HCI_LL_RPA_RESOLUTION,
 	HCI_CMD_PENDING,
+	HCI_FORCE_NO_MITM,
 
 	__HCI_NUM_FLAGS,
 };
diff --git a/net/bluetooth/hci_debugfs.c b/net/bluetooth/hci_debugfs.c
index 6b1314c738b8..5e8af2658e44 100644
--- a/net/bluetooth/hci_debugfs.c
+++ b/net/bluetooth/hci_debugfs.c
@@ -1075,6 +1075,50 @@ DEFINE_SIMPLE_ATTRIBUTE(auth_payload_timeout_fops,
 			auth_payload_timeout_get,
 			auth_payload_timeout_set, "%llu\n");
 
+static ssize_t force_no_mitm_read(struct file *file,
+				  char __user *user_buf,
+				  size_t count, loff_t *ppos)
+{
+	struct hci_dev *hdev = file->private_data;
+	char buf[3];
+
+	buf[0] = hci_dev_test_flag(hdev, HCI_FORCE_NO_MITM) ? 'Y' : 'N';
+	buf[1] = '\n';
+	buf[2] = '\0';
+	return simple_read_from_buffer(user_buf, count, ppos, buf, 2);
+}
+
+static ssize_t force_no_mitm_write(struct file *file,
+				   const char __user *user_buf,
+				   size_t count, loff_t *ppos)
+{
+	struct hci_dev *hdev = file->private_data;
+	char buf[32];
+	size_t buf_size = min(count, (sizeof(buf) - 1));
+	bool enable;
+
+	if (copy_from_user(buf, user_buf, buf_size))
+		return -EFAULT;
+
+	buf[buf_size] = '\0';
+	if (strtobool(buf, &enable))
+		return -EINVAL;
+
+	if (enable == hci_dev_test_flag(hdev, HCI_FORCE_NO_MITM))
+		return -EALREADY;
+
+	hci_dev_change_flag(hdev, HCI_FORCE_NO_MITM);
+
+	return count;
+}
+
+static const struct file_operations force_no_mitm_fops = {
+	.open		= simple_open,
+	.read		= force_no_mitm_read,
+	.write		= force_no_mitm_write,
+	.llseek		= default_llseek,
+};
+
 DEFINE_QUIRK_ATTRIBUTE(quirk_strict_duplicate_filter,
 		       HCI_QUIRK_STRICT_DUPLICATE_FILTER);
 DEFINE_QUIRK_ATTRIBUTE(quirk_simultaneous_discovery,
@@ -1134,6 +1178,8 @@ void hci_debugfs_create_le(struct hci_dev *hdev)
 			    &max_key_size_fops);
 	debugfs_create_file("auth_payload_timeout", 0644, hdev->debugfs, hdev,
 			    &auth_payload_timeout_fops);
+	debugfs_create_file("force_no_mitm", 0644, hdev->debugfs, hdev,
+			    &force_no_mitm_fops);
 
 	debugfs_create_file("quirk_strict_duplicate_filter", 0644,
 			    hdev->debugfs, hdev,
diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index d0b695ee49f6..a85e3e49cd0d 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -2393,12 +2393,17 @@ int smp_conn_security(struct hci_conn *hcon, __u8 sec_level)
 			authreq |= SMP_AUTH_CT2;
 	}
 
-	/* Require MITM if IO Capability allows or the security level
-	 * requires it.
+	/* Don't attempt to set MITM if setting is overridden by debugfs
+	 * Needed to pass certification test SM/MAS/PKE/BV-01-C
 	 */
-	if (hcon->io_capability != HCI_IO_NO_INPUT_OUTPUT ||
-	    hcon->pending_sec_level > BT_SECURITY_MEDIUM)
-		authreq |= SMP_AUTH_MITM;
+	if (!hci_dev_test_flag(hcon->hdev, HCI_FORCE_NO_MITM)) {
+		/* Require MITM if IO Capability allows or the security level
+		 * requires it.
+		 */
+		if (hcon->io_capability != HCI_IO_NO_INPUT_OUTPUT ||
+		    hcon->pending_sec_level > BT_SECURITY_MEDIUM)
+			authreq |= SMP_AUTH_MITM;
+	}
 
 	if (hcon->role == HCI_ROLE_MASTER) {
 		struct smp_cmd_pairing cp;
-- 
cgit v1.2.3-59-g8ed1b


From d1d900f822b6b2874de9c1ef8094fc8df56a2f9f Mon Sep 17 00:00:00 2001
From: Guenter Roeck <linux@roeck-us.net>
Date: Mon, 6 Apr 2020 11:54:38 -0700
Subject: Bluetooth: Simplify / fix return values from tk_request

Some static checker run by 0day reports a variableScope warning.

net/bluetooth/smp.c:870:6: warning:
	The scope of the variable 'err' can be reduced. [variableScope]

There is no need for two separate variables holding return values.
Stick with the existing variable. While at it, don't pre-initialize
'ret' because it is set in each code path.

tk_request() is supposed to return a negative error code on errors,
not a bluetooth return code. The calling code converts the return
value to SMP_UNSPECIFIED if needed.

Fixes: 92516cd97fd4 ("Bluetooth: Always request for user confirmation for Just Works")
Cc: Sonny Sasaka <sonnysasaka@chromium.org>
Signed-off-by: Guenter Roeck <linux@roeck-us.net>
Reviewed-by: Sonny Sasaka <sonnysasaka@chromium.org>
Signed-off-by: Sonny Sasaka <sonnysasaka@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/smp.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index a85e3e49cd0d..daf198fb2b31 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -854,8 +854,7 @@ static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth,
 	struct l2cap_chan *chan = conn->smp;
 	struct smp_chan *smp = chan->data;
 	u32 passkey = 0;
-	int ret = 0;
-	int err;
+	int ret;
 
 	/* Initialize key for JUST WORKS */
 	memset(smp->tk, 0, sizeof(smp->tk));
@@ -887,12 +886,12 @@ static int tk_request(struct l2cap_conn *conn, u8 remote_oob, u8 auth,
 	/* If Just Works, Continue with Zero TK and ask user-space for
 	 * confirmation */
 	if (smp->method == JUST_WORKS) {
-		err = mgmt_user_confirm_request(hcon->hdev, &hcon->dst,
+		ret = mgmt_user_confirm_request(hcon->hdev, &hcon->dst,
 						hcon->type,
 						hcon->dst_type,
 						passkey, 1);
-		if (err)
-			return SMP_UNSPECIFIED;
+		if (ret)
+			return ret;
 		set_bit(SMP_FLAG_WAIT_USER, &smp->flags);
 		return 0;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From fcd156ee8bc6a989dbf7e5efbccdc9cdb831fd27 Mon Sep 17 00:00:00 2001
From: Sathish Narasimman <nsathish41@gmail.com>
Date: Wed, 8 Apr 2020 10:57:03 +0530
Subject: Bluetooth: btusb: check for NULL in btusb_find_altsetting()

The new btusb_find_altsetting() dereferences it without checking
the check is added in this patch

Signed-off-by: Sathish Narasimman <sathish.narasimman@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btusb.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 09913cadd1ca..871162790a0e 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -1622,6 +1622,9 @@ static struct usb_host_interface *btusb_find_altsetting(struct btusb_data *data,
 
 	BT_DBG("Looking for Alt no :%d", alt);
 
+	if (!intf)
+		return NULL;
+
 	for (i = 0; i < intf->num_altsetting; i++) {
 		if (intf->altsetting[i].desc.bAlternateSetting == alt)
 			return &intf->altsetting[i];
-- 
cgit v1.2.3-59-g8ed1b


From ffee202a78c2980688bc5d2f7d56480e69a5e0c9 Mon Sep 17 00:00:00 2001
From: Sonny Sasaka <sonnysasaka@chromium.org>
Date: Mon, 6 Apr 2020 11:04:02 -0700
Subject: Bluetooth: Always request for user confirmation for Just Works (LE
 SC)

To improve security, always give the user-space daemon a chance to
accept or reject a Just Works pairing (LE). The daemon may decide to
auto-accept based on the user's intent.

This patch is similar to the previous patch but applies for LE Secure
Connections (SC).

Signed-off-by: Sonny Sasaka <sonnysasaka@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/smp.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index daf198fb2b31..df22cbf94693 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -2201,7 +2201,7 @@ mackey_and_ltk:
 	if (err)
 		return SMP_UNSPECIFIED;
 
-	if (smp->method == JUST_WORKS || smp->method == REQ_OOB) {
+	if (smp->method == REQ_OOB) {
 		if (hcon->out) {
 			sc_dhkey_check(smp);
 			SMP_ALLOW_CMD(smp, SMP_CMD_DHKEY_CHECK);
@@ -2216,6 +2216,9 @@ mackey_and_ltk:
 	confirm_hint = 0;
 
 confirm:
+	if (smp->method == JUST_WORKS)
+		confirm_hint = 1;
+
 	err = mgmt_user_confirm_request(hcon->hdev, &hcon->dst, hcon->type,
 					hcon->dst_type, passkey, confirm_hint);
 	if (err)
-- 
cgit v1.2.3-59-g8ed1b


From d81686d3335648197c5da3992b151648706dc0f8 Mon Sep 17 00:00:00 2001
From: Wen Gong <wgong@codeaurora.org>
Date: Tue, 7 Apr 2020 08:12:30 +0300
Subject: ath10k: disable TX complete indication of htt for sdio

For sdio chip, it is high latency bus, all the TX packet's content will
be tranferred from HOST memory to firmware memory via sdio bus, then it
need much more memory in firmware than low latency bus chip, for low
latency chip, such as PCI-E, it only need to transfer the TX descriptor
via PCI-E bus to firmware memory. For sdio chip, reduce the complexity of
TX logic will help TX efficiency since its memory is limited, and it will
reduce the TX circle's time of each packet and then firmware will have more
memory for TX since TX complete also need memeory.

This patch disable TX complete indication from firmware for htt data
packet, it will not have TX complete indication from firmware to ath10k.
It will cut the cost of bus bandwidth of TX complete and make the TX
logic of firmware simpler, it results in significant performance
improvement on TX path.

Udp TX throughout is 130Mbps without this patch, and it arrives
400Mbps with this patch.

The downside of this patch is the command "iw wlan0 station dump" will
show 0 for "tx retries" and "tx failed" since all tx packet's status
is success.

This patch only effect sdio chip, it will not effect PCI, SNOC etc.

Tested with QCA6174 SDIO with firmware
WLAN.RMH.4.4.1-00017-QCARMSWPZ-1

Signed-off-by: Wen Gong <wgong@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200212080415.31265-2-wgong@codeaurora.org
---
 drivers/net/wireless/ath/ath10k/core.c   |  5 +----
 drivers/net/wireless/ath/ath10k/hif.h    |  9 ++++++++
 drivers/net/wireless/ath/ath10k/htc.c    | 10 +++++++++
 drivers/net/wireless/ath/ath10k/htc.h    |  3 +++
 drivers/net/wireless/ath/ath10k/htt.c    |  5 +++++
 drivers/net/wireless/ath/ath10k/htt.h    | 13 +++++++++++-
 drivers/net/wireless/ath/ath10k/htt_rx.c | 34 ++++++++++++++++++++++++++++++-
 drivers/net/wireless/ath/ath10k/htt_tx.c | 35 ++++++++++++++++++++++++++++++++
 drivers/net/wireless/ath/ath10k/hw.h     |  2 +-
 drivers/net/wireless/ath/ath10k/sdio.c   | 23 +++++++++++++++++++++
 10 files changed, 132 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c
index 70f3bae92a85..4cd50a353047 100644
--- a/drivers/net/wireless/ath/ath10k/core.c
+++ b/drivers/net/wireless/ath/ath10k/core.c
@@ -723,10 +723,7 @@ static int ath10k_init_sdio(struct ath10k *ar, enum ath10k_firmware_mode mode)
 	if (ret)
 		return ret;
 
-	/* Data transfer is not initiated, when reduced Tx completion
-	 * is used for SDIO. disable it until fixed
-	 */
-	param &= ~HI_ACS_FLAGS_SDIO_REDUCE_TX_COMPL_SET;
+	param |= HI_ACS_FLAGS_SDIO_REDUCE_TX_COMPL_SET;
 
 	/* Alternate credit size of 1544 as used by SDIO firmware is
 	 * not big enough for mac80211 / native wifi frames. disable it
diff --git a/drivers/net/wireless/ath/ath10k/hif.h b/drivers/net/wireless/ath/ath10k/hif.h
index 496ee34a4d78..0dd8973d0acf 100644
--- a/drivers/net/wireless/ath/ath10k/hif.h
+++ b/drivers/net/wireless/ath/ath10k/hif.h
@@ -56,6 +56,8 @@ struct ath10k_hif_ops {
 
 	int (*swap_mailbox)(struct ath10k *ar);
 
+	int (*get_htt_tx_complete)(struct ath10k *ar);
+
 	int (*map_service_to_pipe)(struct ath10k *ar, u16 service_id,
 				   u8 *ul_pipe, u8 *dl_pipe);
 
@@ -144,6 +146,13 @@ static inline int ath10k_hif_swap_mailbox(struct ath10k *ar)
 	return 0;
 }
 
+static inline int ath10k_hif_get_htt_tx_complete(struct ath10k *ar)
+{
+	if (ar->hif.ops->get_htt_tx_complete)
+		return ar->hif.ops->get_htt_tx_complete(ar);
+	return 0;
+}
+
 static inline int ath10k_hif_map_service_to_pipe(struct ath10k *ar,
 						 u16 service_id,
 						 u8 *ul_pipe, u8 *dl_pipe)
diff --git a/drivers/net/wireless/ath/ath10k/htc.c b/drivers/net/wireless/ath/ath10k/htc.c
index 2248d6c022f4..61ee413d902a 100644
--- a/drivers/net/wireless/ath/ath10k/htc.c
+++ b/drivers/net/wireless/ath/ath10k/htc.c
@@ -660,6 +660,16 @@ int ath10k_htc_wait_target(struct ath10k_htc *htc)
 	return 0;
 }
 
+void ath10k_htc_change_tx_credit_flow(struct ath10k_htc *htc,
+				      enum ath10k_htc_ep_id eid,
+				      bool enable)
+{
+	struct ath10k *ar = htc->ar;
+	struct ath10k_htc_ep *ep = &ar->htc.endpoint[eid];
+
+	ep->tx_credit_flow_enabled = enable;
+}
+
 int ath10k_htc_connect_service(struct ath10k_htc *htc,
 			       struct ath10k_htc_svc_conn_req *conn_req,
 			       struct ath10k_htc_svc_conn_resp *conn_resp)
diff --git a/drivers/net/wireless/ath/ath10k/htc.h b/drivers/net/wireless/ath/ath10k/htc.h
index 065c82d9d689..14e5c3f712c1 100644
--- a/drivers/net/wireless/ath/ath10k/htc.h
+++ b/drivers/net/wireless/ath/ath10k/htc.h
@@ -386,6 +386,9 @@ int ath10k_htc_start(struct ath10k_htc *htc);
 int ath10k_htc_connect_service(struct ath10k_htc *htc,
 			       struct ath10k_htc_svc_conn_req  *conn_req,
 			       struct ath10k_htc_svc_conn_resp *conn_resp);
+void ath10k_htc_change_tx_credit_flow(struct ath10k_htc *htc,
+				      enum ath10k_htc_ep_id eid,
+				      bool enable);
 int ath10k_htc_send(struct ath10k_htc *htc, enum ath10k_htc_ep_id eid,
 		    struct sk_buff *packet);
 struct sk_buff *ath10k_htc_alloc_skb(struct ath10k *ar, int size);
diff --git a/drivers/net/wireless/ath/ath10k/htt.c b/drivers/net/wireless/ath/ath10k/htt.c
index 7b75200ceae5..4354bf285ff1 100644
--- a/drivers/net/wireless/ath/ath10k/htt.c
+++ b/drivers/net/wireless/ath/ath10k/htt.c
@@ -10,6 +10,7 @@
 #include "htt.h"
 #include "core.h"
 #include "debug.h"
+#include "hif.h"
 
 static const enum htt_t2h_msg_type htt_main_t2h_msg_types[] = {
 	[HTT_MAIN_T2H_MSG_TYPE_VERSION_CONF] = HTT_T2H_MSG_TYPE_VERSION_CONF,
@@ -153,6 +154,10 @@ int ath10k_htt_connect(struct ath10k_htt *htt)
 
 	htt->eid = conn_resp.eid;
 
+	htt->disable_tx_comp = ath10k_hif_get_htt_tx_complete(htt->ar);
+	if (htt->disable_tx_comp)
+		ath10k_htc_change_tx_credit_flow(&htt->ar->htc, htt->eid, true);
+
 	return 0;
 }
 
diff --git a/drivers/net/wireless/ath/ath10k/htt.h b/drivers/net/wireless/ath/ath10k/htt.h
index 4a12564fc30e..b88c2f3787d8 100644
--- a/drivers/net/wireless/ath/ath10k/htt.h
+++ b/drivers/net/wireless/ath/ath10k/htt.h
@@ -150,9 +150,19 @@ enum htt_data_tx_desc_flags1 {
 	HTT_DATA_TX_DESC_FLAGS1_MORE_IN_BATCH    = 1 << 12,
 	HTT_DATA_TX_DESC_FLAGS1_CKSUM_L3_OFFLOAD = 1 << 13,
 	HTT_DATA_TX_DESC_FLAGS1_CKSUM_L4_OFFLOAD = 1 << 14,
-	HTT_DATA_TX_DESC_FLAGS1_RSVD1            = 1 << 15
+	HTT_DATA_TX_DESC_FLAGS1_TX_COMPLETE      = 1 << 15
 };
 
+#define HTT_TX_CREDIT_DELTA_ABS_M      0xffff0000
+#define HTT_TX_CREDIT_DELTA_ABS_S      16
+#define HTT_TX_CREDIT_DELTA_ABS_GET(word) \
+	    (((word) & HTT_TX_CREDIT_DELTA_ABS_M) >> HTT_TX_CREDIT_DELTA_ABS_S)
+
+#define HTT_TX_CREDIT_SIGN_BIT_M       0x00000100
+#define HTT_TX_CREDIT_SIGN_BIT_S       8
+#define HTT_TX_CREDIT_SIGN_BIT_GET(word) \
+	    (((word) & HTT_TX_CREDIT_SIGN_BIT_M) >> HTT_TX_CREDIT_SIGN_BIT_S)
+
 enum htt_data_tx_ext_tid {
 	HTT_DATA_TX_EXT_TID_NON_QOS_MCAST_BCAST = 16,
 	HTT_DATA_TX_EXT_TID_MGMT                = 17,
@@ -2021,6 +2031,7 @@ struct ath10k_htt {
 	bool tx_mem_allocated;
 	const struct ath10k_htt_tx_ops *tx_ops;
 	const struct ath10k_htt_rx_ops *rx_ops;
+	bool disable_tx_comp;
 };
 
 struct ath10k_htt_tx_ops {
diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c
index f883f2a724dd..64e45bfa5d05 100644
--- a/drivers/net/wireless/ath/ath10k/htt_rx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
@@ -3789,6 +3789,9 @@ bool ath10k_htt_t2h_msg_handler(struct ath10k *ar, struct sk_buff *skb)
 	}
 	case HTT_T2H_MSG_TYPE_MGMT_TX_COMPLETION: {
 		struct htt_tx_done tx_done = {};
+		struct ath10k_htt *htt = &ar->htt;
+		struct ath10k_htc *htc = &ar->htc;
+		struct ath10k_htc_ep *ep = &ar->htc.endpoint[htt->eid];
 		int status = __le32_to_cpu(resp->mgmt_tx_completion.status);
 		int info = __le32_to_cpu(resp->mgmt_tx_completion.info);
 
@@ -3814,6 +3817,12 @@ bool ath10k_htt_t2h_msg_handler(struct ath10k *ar, struct sk_buff *skb)
 			break;
 		}
 
+		if (htt->disable_tx_comp) {
+			spin_lock_bh(&htc->tx_lock);
+			ep->tx_credits++;
+			spin_unlock_bh(&htc->tx_lock);
+		}
+
 		status = ath10k_txrx_tx_unref(htt, &tx_done);
 		if (!status) {
 			spin_lock_bh(&htt->tx_lock);
@@ -3888,8 +3897,31 @@ bool ath10k_htt_t2h_msg_handler(struct ath10k *ar, struct sk_buff *skb)
 		skb_queue_tail(&htt->rx_in_ord_compl_q, skb);
 		return false;
 	}
-	case HTT_T2H_MSG_TYPE_TX_CREDIT_UPDATE_IND:
+	case HTT_T2H_MSG_TYPE_TX_CREDIT_UPDATE_IND: {
+		struct ath10k_htt *htt = &ar->htt;
+		struct ath10k_htc *htc = &ar->htc;
+		struct ath10k_htc_ep *ep = &ar->htc.endpoint[htt->eid];
+		u32 msg_word = __le32_to_cpu(*(__le32 *)resp);
+		int htt_credit_delta;
+
+		htt_credit_delta = HTT_TX_CREDIT_DELTA_ABS_GET(msg_word);
+		if (HTT_TX_CREDIT_SIGN_BIT_GET(msg_word))
+			htt_credit_delta = -htt_credit_delta;
+
+		ath10k_dbg(ar, ATH10K_DBG_HTT,
+			   "htt credit update delta %d\n",
+			   htt_credit_delta);
+
+		if (htt->disable_tx_comp) {
+			spin_lock_bh(&htc->tx_lock);
+			ep->tx_credits += htt_credit_delta;
+			spin_unlock_bh(&htc->tx_lock);
+			ath10k_dbg(ar, ATH10K_DBG_HTT,
+				   "htt credit total %d\n",
+				   ep->tx_credits);
+		}
 		break;
+	}
 	case HTT_T2H_MSG_TYPE_CHAN_CHANGE: {
 		u32 phymode = __le32_to_cpu(resp->chan_change.phymode);
 		u32 freq = __le32_to_cpu(resp->chan_change.freq);
diff --git a/drivers/net/wireless/ath/ath10k/htt_tx.c b/drivers/net/wireless/ath/ath10k/htt_tx.c
index e9d12ea708b6..bcecf05fe2fd 100644
--- a/drivers/net/wireless/ath/ath10k/htt_tx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_tx.c
@@ -543,7 +543,39 @@ void ath10k_htt_tx_free(struct ath10k_htt *htt)
 
 void ath10k_htt_htc_tx_complete(struct ath10k *ar, struct sk_buff *skb)
 {
+	struct ath10k_htt *htt = &ar->htt;
+	struct htt_tx_done tx_done = {0};
+	struct htt_cmd_hdr *htt_hdr;
+	struct htt_data_tx_desc *desc_hdr = NULL;
+	u16 flags1 = 0;
+	u8 msg_type = 0;
+
+	if (htt->disable_tx_comp) {
+		htt_hdr = (struct htt_cmd_hdr *)skb->data;
+		msg_type = htt_hdr->msg_type;
+
+		if (msg_type == HTT_H2T_MSG_TYPE_TX_FRM) {
+			desc_hdr = (struct htt_data_tx_desc *)
+				(skb->data + sizeof(*htt_hdr));
+			flags1 = __le16_to_cpu(desc_hdr->flags1);
+		}
+	}
+
 	dev_kfree_skb_any(skb);
+
+	if ((!htt->disable_tx_comp) || (msg_type != HTT_H2T_MSG_TYPE_TX_FRM))
+		return;
+
+	ath10k_dbg(ar, ATH10K_DBG_HTT,
+		   "htt tx complete msdu id:%u ,flags1:%x\n",
+		   __le16_to_cpu(desc_hdr->id), flags1);
+
+	if (flags1 & HTT_DATA_TX_DESC_FLAGS1_TX_COMPLETE)
+		return;
+
+	tx_done.status = HTT_TX_COMPL_STATE_ACK;
+	tx_done.msdu_id = __le16_to_cpu(desc_hdr->id);
+	ath10k_txrx_tx_unref(&ar->htt, &tx_done);
 }
 
 void ath10k_htt_hif_tx_complete(struct ath10k *ar, struct sk_buff *skb)
@@ -1279,6 +1311,9 @@ static int ath10k_htt_tx_hl(struct ath10k_htt *htt, enum ath10k_hw_txrx_mode txm
 		flags0 |= SM(ATH10K_HW_TXRX_MGMT,
 			     HTT_DATA_TX_DESC_FLAGS0_PKT_TYPE);
 		flags0 |= HTT_DATA_TX_DESC_FLAGS0_MAC_HDR_PRESENT;
+
+		if (htt->disable_tx_comp)
+			flags1 |= HTT_DATA_TX_DESC_FLAGS1_TX_COMPLETE;
 		break;
 	}
 
diff --git a/drivers/net/wireless/ath/ath10k/hw.h b/drivers/net/wireless/ath/ath10k/hw.h
index 970c736ac6bb..2a7af5861788 100644
--- a/drivers/net/wireless/ath/ath10k/hw.h
+++ b/drivers/net/wireless/ath/ath10k/hw.h
@@ -765,7 +765,7 @@ ath10k_is_rssi_enable(struct ath10k_hw_params *hw,
 #define TARGET_TLV_NUM_TDLS_VDEVS		1
 #define TARGET_TLV_NUM_TIDS			((TARGET_TLV_NUM_PEERS) * 2)
 #define TARGET_TLV_NUM_MSDU_DESC		(1024 + 32)
-#define TARGET_TLV_NUM_MSDU_DESC_HL		64
+#define TARGET_TLV_NUM_MSDU_DESC_HL		1024
 #define TARGET_TLV_NUM_WOW_PATTERNS		22
 #define TARGET_TLV_MGMT_NUM_MSDU_DESC		(50)
 
diff --git a/drivers/net/wireless/ath/ath10k/sdio.c b/drivers/net/wireless/ath/ath10k/sdio.c
index 1f709b65c29b..5a0db342e5ad 100644
--- a/drivers/net/wireless/ath/ath10k/sdio.c
+++ b/drivers/net/wireless/ath/ath10k/sdio.c
@@ -1752,6 +1752,28 @@ static int ath10k_sdio_hif_swap_mailbox(struct ath10k *ar)
 	return 0;
 }
 
+static int ath10k_sdio_get_htt_tx_complete(struct ath10k *ar)
+{
+	u32 addr, val;
+	int ret;
+
+	addr = host_interest_item_address(HI_ITEM(hi_acs_flags));
+
+	ret = ath10k_sdio_hif_diag_read32(ar, addr, &val);
+	if (ret) {
+		ath10k_warn(ar,
+			    "unable to read hi_acs_flags for htt tx comple : %d\n", ret);
+		return ret;
+	}
+
+	ret = (val & HI_ACS_FLAGS_SDIO_REDUCE_TX_COMPL_FW_ACK);
+
+	ath10k_dbg(ar, ATH10K_DBG_SDIO, "sdio reduce tx complete fw%sack\n",
+		   ret ? " " : " not ");
+
+	return ret;
+}
+
 /* HIF start/stop */
 
 static int ath10k_sdio_hif_start(struct ath10k *ar)
@@ -2026,6 +2048,7 @@ static const struct ath10k_hif_ops ath10k_sdio_hif_ops = {
 	.start			= ath10k_sdio_hif_start,
 	.stop			= ath10k_sdio_hif_stop,
 	.swap_mailbox		= ath10k_sdio_hif_swap_mailbox,
+	.get_htt_tx_complete	= ath10k_sdio_get_htt_tx_complete,
 	.map_service_to_pipe	= ath10k_sdio_hif_map_service_to_pipe,
 	.get_default_pipe	= ath10k_sdio_hif_get_default_pipe,
 	.send_complete_check	= ath10k_sdio_hif_send_complete_check,
-- 
cgit v1.2.3-59-g8ed1b


From c61a748370438ca1ae8389071664b2520f16820c Mon Sep 17 00:00:00 2001
From: Wen Gong <wgong@codeaurora.org>
Date: Tue, 7 Apr 2020 08:12:34 +0300
Subject: ath10k: change ATH10K_SDIO_BUS_REQUEST_MAX_NUM from 64 to 1024

sdio bus bandwidth is low, sometimes for high performance TX test,
it will lack of ath10k_sdio_bus_request, it will print message:
ath10k_sdio mmc1:0001:1: unable to allocate bus request for async request

change the num from 64 to 1024 will not happen it.

Tested with QCA6174 SDIO with firmware
WLAN.RMH.4.4.1-00017-QCARMSWP-1.

Signed-off-by: Wen Gong <wgong@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200212080415.31265-3-wgong@codeaurora.org
---
 drivers/net/wireless/ath/ath10k/sdio.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath10k/sdio.h b/drivers/net/wireless/ath/ath10k/sdio.h
index 33195f49acab..1c987494ad22 100644
--- a/drivers/net/wireless/ath/ath10k/sdio.h
+++ b/drivers/net/wireless/ath/ath10k/sdio.h
@@ -37,7 +37,7 @@
 	(ATH10K_SDIO_MAX_BUFFER_SIZE - sizeof(struct ath10k_htc_hdr))
 
 #define ATH10K_HIF_MBOX_NUM_MAX                 4
-#define ATH10K_SDIO_BUS_REQUEST_MAX_NUM         64
+#define ATH10K_SDIO_BUS_REQUEST_MAX_NUM         1024
 
 #define ATH10K_SDIO_HIF_COMMUNICATION_TIMEOUT_HZ (100 * HZ)
 
@@ -98,6 +98,7 @@
 #define ATH10K_FIFO_TIMEOUT_AND_CHIP_CONTROL_DISABLE_SLEEP_OFF 0xFFFEFFFF
 #define ATH10K_FIFO_TIMEOUT_AND_CHIP_CONTROL_DISABLE_SLEEP_ON 0x10000
 
+/* TODO: remove this and use skb->cb instead, much cleaner approach */
 struct ath10k_sdio_bus_request {
 	struct list_head list;
 
-- 
cgit v1.2.3-59-g8ed1b


From 943d5d92c5e87aa8293aae6de2b3ee977aa7d3cf Mon Sep 17 00:00:00 2001
From: Daniels Umanovskis <du@axentia.se>
Date: Thu, 9 Apr 2020 13:18:29 +0200
Subject: Bluetooth: log advertisement packet length if it gets corrected

The error could indicate a problem with the Bluetooth device. It
is easier to investigate if the packet's actual length gets logged,
not just the fact that a discrepancy occurred.

Signed-off-by: Daniels Umanovskis <du@axentia.se>
Reviewed-by: Alain Michaud <alainm@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_event.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 51e6461f0b71..966fc543c01d 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -5396,7 +5396,8 @@ static void process_adv_report(struct hci_dev *hdev, u8 type, bdaddr_t *bdaddr,
 
 	/* Adjust for actual length */
 	if (len != real_len) {
-		bt_dev_err_ratelimited(hdev, "advertising data len corrected");
+		bt_dev_err_ratelimited(hdev, "advertising data len corrected %u -> %u",
+				       len, real_len);
 		len = real_len;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 04896832c94aae4842100cafb8d3a73e1bed3a45 Mon Sep 17 00:00:00 2001
From: "Ziqian SUN (Zamir)" <sztsian@gmail.com>
Date: Sat, 11 Apr 2020 09:34:27 +0800
Subject: Bluetooth: btrtl: Add support for RTL8761B

Add new compatible device RTL8761B. RTL8761B is a USB Bluetooth device,
with support of BLE and BR/EDR. The USB info is

T:  Bus=03 Lev=04 Prnt=04 Port=00 Cnt=01 Dev#= 29 Spd=12   MxCh= 0
D:  Ver= 1.10 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs=  1
P:  Vendor=0bda ProdID=8771 Rev= 2.00
S:  Manufacturer=Realtek
S:  Product=Bluetooth Radio
S:  SerialNumber=XXXXXXXXXXXX
C:* #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=500mA
I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=81(I) Atr=03(Int.) MxPS=  16 Ivl=1ms
E:  Ad=02(O) Atr=02(Bulk) MxPS=  64 Ivl=0ms
E:  Ad=82(I) Atr=02(Bulk) MxPS=  64 Ivl=0ms
I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=   0 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=   0 Ivl=1ms
I:  If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=   9 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=   9 Ivl=1ms
I:  If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=  17 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=  17 Ivl=1ms
I:  If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=  25 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=  25 Ivl=1ms
I:  If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=  33 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=  33 Ivl=1ms
I:  If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=  49 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=  49 Ivl=1ms

Signed-off-by: Ziqian SUN (Zamir) <sztsian@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btrtl.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/bluetooth/btrtl.c b/drivers/bluetooth/btrtl.c
index 67f4bc21e7c5..3a9afc905f24 100644
--- a/drivers/bluetooth/btrtl.c
+++ b/drivers/bluetooth/btrtl.c
@@ -130,12 +130,19 @@ static const struct id_table ic_id_table[] = {
 	  .cfg_name = "rtl_bt/rtl8821c_config" },
 
 	/* 8761A */
-	{ IC_MATCH_FL_LMPSUBV, RTL_ROM_LMP_8761A, 0x0,
+	{ IC_INFO(RTL_ROM_LMP_8761A, 0xa),
 	  .config_needed = false,
 	  .has_rom_version = true,
 	  .fw_name  = "rtl_bt/rtl8761a_fw.bin",
 	  .cfg_name = "rtl_bt/rtl8761a_config" },
 
+	/* 8761B */
+	{ IC_INFO(RTL_ROM_LMP_8761A, 0xb),
+	  .config_needed = false,
+	  .has_rom_version = true,
+	  .fw_name  = "rtl_bt/rtl8761b_fw.bin",
+	  .cfg_name = "rtl_bt/rtl8761b_config" },
+
 	/* 8822C with UART interface */
 	{ .match_flags = IC_MATCH_FL_LMPSUBV | IC_MATCH_FL_HCIREV |
 			 IC_MATCH_FL_HCIBUS,
@@ -267,6 +274,7 @@ static int rtlbt_parse_firmware(struct hci_dev *hdev,
 		{ RTL_ROM_LMP_8723B, 9 },	/* 8723D */
 		{ RTL_ROM_LMP_8821A, 10 },	/* 8821C */
 		{ RTL_ROM_LMP_8822B, 13 },	/* 8822C */
+		{ RTL_ROM_LMP_8761A, 14 },	/* 8761B */
 	};
 
 	min_size = sizeof(struct rtl_epatch_header) + sizeof(extension_sig) + 3;
-- 
cgit v1.2.3-59-g8ed1b


From 1e744bf218b54d2e241aa6107484828d4f4a9fdc Mon Sep 17 00:00:00 2001
From: Karthikeyan Periyasamy <periyasa@codeaurora.org>
Date: Wed, 8 Apr 2020 16:33:15 +0530
Subject: ath11k: fix duplication peer create on same radio

Add the pdev index information in the peer object to validate
the peer creation. Ignore the peer creation request, if the given
MAC address is already present in the peer list with same radio.
If we allow the peer creation in above scenario, FW assert will happen.
Above scenario occurred in two cases, where Multiple AP VAP created in
the same radio.

1. when testing tool sends association request to two AP with same
   MAC address
2. when a station do roaming from one AP VAP to another AP VAP.

Signed-off-by: Karthikeyan Periyasamy <periyasa@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1586343795-21422-1-git-send-email-periyasa@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/peer.c | 35 ++++++++++++++++++++++++++++++++--
 drivers/net/wireless/ath/ath11k/peer.h |  1 +
 2 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/peer.c b/drivers/net/wireless/ath/ath11k/peer.c
index f43deacc01bd..297172538620 100644
--- a/drivers/net/wireless/ath/ath11k/peer.c
+++ b/drivers/net/wireless/ath/ath11k/peer.c
@@ -17,7 +17,26 @@ struct ath11k_peer *ath11k_peer_find(struct ath11k_base *ab, int vdev_id,
 	list_for_each_entry(peer, &ab->peers, list) {
 		if (peer->vdev_id != vdev_id)
 			continue;
-		if (memcmp(peer->addr, addr, ETH_ALEN))
+		if (!ether_addr_equal(peer->addr, addr))
+			continue;
+
+		return peer;
+	}
+
+	return NULL;
+}
+
+static struct ath11k_peer *ath11k_peer_find_by_pdev_idx(struct ath11k_base *ab,
+							u8 pdev_idx, const u8 *addr)
+{
+	struct ath11k_peer *peer;
+
+	lockdep_assert_held(&ab->base_lock);
+
+	list_for_each_entry(peer, &ab->peers, list) {
+		if (peer->pdev_idx != pdev_idx)
+			continue;
+		if (!ether_addr_equal(peer->addr, addr))
 			continue;
 
 		return peer;
@@ -34,7 +53,7 @@ struct ath11k_peer *ath11k_peer_find_by_addr(struct ath11k_base *ab,
 	lockdep_assert_held(&ab->base_lock);
 
 	list_for_each_entry(peer, &ab->peers, list) {
-		if (memcmp(peer->addr, addr, ETH_ALEN))
+		if (!ether_addr_equal(peer->addr, addr))
 			continue;
 
 		return peer;
@@ -200,6 +219,17 @@ int ath11k_peer_create(struct ath11k *ar, struct ath11k_vif *arvif,
 		return -ENOBUFS;
 	}
 
+	spin_lock_bh(&ar->ab->base_lock);
+	peer = ath11k_peer_find_by_pdev_idx(ar->ab, ar->pdev_idx, param->peer_addr);
+	if (peer) {
+		spin_unlock_bh(&ar->ab->base_lock);
+		ath11k_info(ar->ab,
+			    "ignoring the peer %pM creation on same pdev idx %d\n",
+			    param->peer_addr, ar->pdev_idx);
+		return -EINVAL;
+	}
+	spin_unlock_bh(&ar->ab->base_lock);
+
 	ret = ath11k_wmi_send_peer_create_cmd(ar, param);
 	if (ret) {
 		ath11k_warn(ar->ab,
@@ -225,6 +255,7 @@ int ath11k_peer_create(struct ath11k *ar, struct ath11k_vif *arvif,
 		return -ENOENT;
 	}
 
+	peer->pdev_idx = ar->pdev_idx;
 	peer->sta = sta;
 	arvif->ast_hash = peer->ast_hash;
 
diff --git a/drivers/net/wireless/ath/ath11k/peer.h b/drivers/net/wireless/ath/ath11k/peer.h
index ccca1523a6ea..5d125ce8984e 100644
--- a/drivers/net/wireless/ath/ath11k/peer.h
+++ b/drivers/net/wireless/ath/ath11k/peer.h
@@ -13,6 +13,7 @@ struct ath11k_peer {
 	u8 addr[ETH_ALEN];
 	int peer_id;
 	u16 ast_hash;
+	u8 pdev_idx;
 
 	/* protected by ab->data_lock */
 	struct ieee80211_key_conf *keys[WMI_MAX_KEY_INDEX + 1];
-- 
cgit v1.2.3-59-g8ed1b


From bd902b1bdb25729be44c25630f44735fd6b8b254 Mon Sep 17 00:00:00 2001
From: Karthikeyan Periyasamy <periyasa@codeaurora.org>
Date: Wed, 8 Apr 2020 16:35:57 +0530
Subject: ath11k: Modify the interrupt timer threshold

Modify the interrupt timer threshold param as 256 to avoid HW watchdog
in heavy multicast traffic scenario.

Signed-off-by: Karthikeyan Periyasamy <periyasa@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1586343957-21474-1-git-send-email-periyasa@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/hal.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath11k/hal.h b/drivers/net/wireless/ath/ath11k/hal.h
index 7722822a0456..780a3e11b609 100644
--- a/drivers/net/wireless/ath/ath11k/hal.h
+++ b/drivers/net/wireless/ath/ath11k/hal.h
@@ -599,7 +599,7 @@ struct hal_srng {
 /* Interrupt mitigation - timer threshold in us */
 #define HAL_SRNG_INT_TIMER_THRESHOLD_TX 1000
 #define HAL_SRNG_INT_TIMER_THRESHOLD_RX 500
-#define HAL_SRNG_INT_TIMER_THRESHOLD_OTHER 1000
+#define HAL_SRNG_INT_TIMER_THRESHOLD_OTHER 256
 
 /* HW SRNG configuration table */
 struct hal_srng_config {
-- 
cgit v1.2.3-59-g8ed1b


From a3baa8f084198949f3739651d96634d897f3224d Mon Sep 17 00:00:00 2001
From: Aloka Dixit <alokad@codeaurora.org>
Date: Wed, 8 Apr 2020 10:41:17 -0700
Subject: ath11k: Fix TWT radio count

TWT feature fails on radio2 because physical device count is
hardcoded to 2. Set value dynamically.

Signed-off-by: Aloka Dixit <alokad@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200408174117.22957-1-alokad@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/wmi.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath11k/wmi.c b/drivers/net/wireless/ath/ath11k/wmi.c
index 973b72a0ca69..c2a972377687 100644
--- a/drivers/net/wireless/ath/ath11k/wmi.c
+++ b/drivers/net/wireless/ath/ath11k/wmi.c
@@ -3245,7 +3245,7 @@ int ath11k_wmi_cmd_init(struct ath11k_base *ab)
 	config.beacon_tx_offload_max_vdev = ab->num_radios * TARGET_MAX_BCN_OFFLD;
 	config.rx_batchmode = TARGET_RX_BATCHMODE;
 	config.peer_map_unmap_v2_support = 1;
-	config.twt_ap_pdev_count = 2;
+	config.twt_ap_pdev_count = ab->num_radios;
 	config.twt_ap_sta_count = 1000;
 
 	memcpy(&wmi_sc->wlan_resource_config, &config, sizeof(config));
-- 
cgit v1.2.3-59-g8ed1b


From 05090864fc7ecfe72558087216fcccc5eb46add8 Mon Sep 17 00:00:00 2001
From: Manikanta Pubbisetty <mpubbise@codeaurora.org>
Date: Thu, 9 Apr 2020 14:00:13 +0530
Subject: ath11k: set IRQ_DISABLE_UNLAZY flag for DP interrupts

Unlike CE interrupts, DP interrupts are not enabled/disabled at
source; they are enabled/disabled only at GIC level, therefore
it is required to set IRQ_DISABLE_UNLAZY flag to avoid spurious
interrupts.

Signed-off-by: Manikanta Pubbisetty <mpubbise@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1586421013-23025-1-git-send-email-mpubbise@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/ahb.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath11k/ahb.c b/drivers/net/wireless/ath/ath11k/ahb.c
index 59342d2797ca..3b2b76d602f2 100644
--- a/drivers/net/wireless/ath/ath11k/ahb.c
+++ b/drivers/net/wireless/ath/ath11k/ahb.c
@@ -788,7 +788,7 @@ static int ath11k_ahb_ext_irq_config(struct ath11k_base *ab)
 			irq = platform_get_irq_byname(ab->pdev,
 						      irq_name[irq_idx]);
 			ab->irq_num[irq_idx] = irq;
-			irq_set_status_flags(irq, IRQ_NOAUTOEN);
+			irq_set_status_flags(irq, IRQ_NOAUTOEN | IRQ_DISABLE_UNLAZY);
 			ret = request_irq(irq, ath11k_ahb_ext_interrupt_handler,
 					  IRQF_TRIGGER_RISING,
 					  irq_name[irq_idx], irq_grp);
-- 
cgit v1.2.3-59-g8ed1b


From 7395fb496577f0f9abf7fd278f00a8941b2f7ad8 Mon Sep 17 00:00:00 2001
From: Manikanta Pubbisetty <mpubbise@codeaurora.org>
Date: Thu, 9 Apr 2020 14:13:17 +0530
Subject: ath11k: rx path optimizations

During RX, accessing the reo dest ring descriptor directly is consuming
a lot of CPU cycles. Accessing the descriptor after copying it locally
has improved CPU usage by around ~10-15% while measuring throughput
in RX DBTC test cases(all radios are involved in the throughput
measurement).

HW tested: IPQ8074

Signed-off-by: Manikanta Pubbisetty <mpubbise@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1586421797-885-1-git-send-email-mpubbise@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/dp_rx.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.c b/drivers/net/wireless/ath/ath11k/dp_rx.c
index a3f2c76b3471..203fd44ff352 100644
--- a/drivers/net/wireless/ath/ath11k/dp_rx.c
+++ b/drivers/net/wireless/ath/ath11k/dp_rx.c
@@ -2403,12 +2403,12 @@ int ath11k_dp_process_rx(struct ath11k_base *ab, int ring_id,
 
 try_again:
 	while ((rx_desc = ath11k_hal_srng_dst_get_next_entry(ab, srng))) {
-		struct hal_reo_dest_ring *desc = (struct hal_reo_dest_ring *)rx_desc;
+		struct hal_reo_dest_ring desc = *(struct hal_reo_dest_ring *)rx_desc;
 		enum hal_reo_dest_ring_push_reason push_reason;
 		u32 cookie;
 
 		cookie = FIELD_GET(BUFFER_ADDR_INFO1_SW_COOKIE,
-				   desc->buf_addr_info.info1);
+				   desc.buf_addr_info.info1);
 		buf_id = FIELD_GET(DP_RXDMA_BUF_COOKIE_BUF_ID,
 				   cookie);
 		mac_id = FIELD_GET(DP_RXDMA_BUF_COOKIE_PDEV_ID, cookie);
@@ -2436,7 +2436,7 @@ try_again:
 		total_msdu_reaped++;
 
 		push_reason = FIELD_GET(HAL_REO_DEST_RING_INFO0_PUSH_REASON,
-					desc->info0);
+					desc.info0);
 		if (push_reason !=
 		    HAL_REO_DEST_RING_PUSH_REASON_ROUTING_INSTRUCTION) {
 			dev_kfree_skb_any(msdu);
@@ -2444,15 +2444,15 @@ try_again:
 			continue;
 		}
 
-		rxcb->is_first_msdu = !!(desc->rx_msdu_info.info0 &
+		rxcb->is_first_msdu = !!(desc.rx_msdu_info.info0 &
 					 RX_MSDU_DESC_INFO0_FIRST_MSDU_IN_MPDU);
-		rxcb->is_last_msdu = !!(desc->rx_msdu_info.info0 &
+		rxcb->is_last_msdu = !!(desc.rx_msdu_info.info0 &
 					RX_MSDU_DESC_INFO0_LAST_MSDU_IN_MPDU);
-		rxcb->is_continuation = !!(desc->rx_msdu_info.info0 &
+		rxcb->is_continuation = !!(desc.rx_msdu_info.info0 &
 					   RX_MSDU_DESC_INFO0_MSDU_CONTINUATION);
 		rxcb->mac_id = mac_id;
 		rxcb->tid = FIELD_GET(HAL_REO_DEST_RING_INFO0_RX_QUEUE_NUM,
-				      desc->info0);
+				      desc.info0);
 
 		__skb_queue_tail(&msdu_list, msdu);
 
-- 
cgit v1.2.3-59-g8ed1b


From ca2c6881dccabe00a38cda00ddcccb55e6abe245 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Sat, 28 Mar 2020 11:05:24 +0800
Subject: rtw88: Make two functions static

Fix sparse warnings:

drivers/net/wireless/realtek/rtw88/fw.c:633:4: warning:
 symbol 'rtw_get_rsvd_page_probe_req_location' was not declared. Should it be static?
drivers/net/wireless/realtek/rtw88/fw.c:650:5: warning:
 symbol 'rtw_get_rsvd_page_probe_req_size' was not declared. Should it be static?

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200328030524.16032-1-yuehaibing@huawei.com
---
 drivers/net/wireless/realtek/rtw88/fw.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtw88/fw.c b/drivers/net/wireless/realtek/rtw88/fw.c
index 05c430b3489c..9192ab26e39b 100644
--- a/drivers/net/wireless/realtek/rtw88/fw.c
+++ b/drivers/net/wireless/realtek/rtw88/fw.c
@@ -630,8 +630,8 @@ void rtw_fw_set_pg_info(struct rtw_dev *rtwdev)
 	rtw_fw_send_h2c_command(rtwdev, h2c_pkt);
 }
 
-u8 rtw_get_rsvd_page_probe_req_location(struct rtw_dev *rtwdev,
-					struct cfg80211_ssid *ssid)
+static u8 rtw_get_rsvd_page_probe_req_location(struct rtw_dev *rtwdev,
+					       struct cfg80211_ssid *ssid)
 {
 	struct rtw_rsvd_page *rsvd_pkt;
 	u8 location = 0;
@@ -647,8 +647,8 @@ u8 rtw_get_rsvd_page_probe_req_location(struct rtw_dev *rtwdev,
 	return location;
 }
 
-u16 rtw_get_rsvd_page_probe_req_size(struct rtw_dev *rtwdev,
-				     struct cfg80211_ssid *ssid)
+static u16 rtw_get_rsvd_page_probe_req_size(struct rtw_dev *rtwdev,
+					    struct cfg80211_ssid *ssid)
 {
 	struct rtw_rsvd_page *rsvd_pkt;
 	u16 size = 0;
-- 
cgit v1.2.3-59-g8ed1b


From c57673852062428cdeabdd6501ac8b8e4c302067 Mon Sep 17 00:00:00 2001
From: Jaehoon Chung <jh80.chung@samsung.com>
Date: Mon, 30 Mar 2020 14:25:28 +0900
Subject: brcmfmac: fix wrong location to get firmware feature

sup_wpa feature is getting after setting feature_disable flag.
If firmware is supported sup_wpa feature,  it's always enabled
regardless of feature_disable flag.

Fixes: b8a64f0e96c2 ("brcmfmac: support 4-way handshake offloading for WPA/WPA2-PSK")
Signed-off-by: Jaehoon Chung <jh80.chung@samsung.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200330052528.10503-1-jh80.chung@samsung.com
---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c
index 5da0dda0d899..0dcefbd0c000 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/feature.c
@@ -285,13 +285,14 @@ void brcmf_feat_attach(struct brcmf_pub *drvr)
 	if (!err)
 		ifp->drvr->feat_flags |= BIT(BRCMF_FEAT_SCAN_RANDOM_MAC);
 
+	brcmf_feat_iovar_int_get(ifp, BRCMF_FEAT_FWSUP, "sup_wpa");
+
 	if (drvr->settings->feature_disable) {
 		brcmf_dbg(INFO, "Features: 0x%02x, disable: 0x%02x\n",
 			  ifp->drvr->feat_flags,
 			  drvr->settings->feature_disable);
 		ifp->drvr->feat_flags &= ~drvr->settings->feature_disable;
 	}
-	brcmf_feat_iovar_int_get(ifp, BRCMF_FEAT_FWSUP, "sup_wpa");
 
 	brcmf_feat_firmware_overrides(drvr);
 
-- 
cgit v1.2.3-59-g8ed1b


From a24993e54b9cac81b2814da53a664261af10a829 Mon Sep 17 00:00:00 2001
From: Qiujun Huang <hqjagain@gmail.com>
Date: Thu, 2 Apr 2020 16:19:17 +0800
Subject: rtlwifi: rtl8723ae: fix spelling mistake "chang" -> "change"

There is a spelling mistake in a trace message. Fix it.

Signed-off-by: Qiujun Huang <hqjagain@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1585815557-20212-1-git-send-email-hqjagain@gmail.com
---
 drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hal_btc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hal_btc.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hal_btc.c
index 680198280f8f..652d8ff9cccb 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hal_btc.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hal_btc.c
@@ -131,7 +131,7 @@ static bool rtl8723e_dm_bt_is_same_coexist_state(struct ieee80211_hw *hw)
 	    (rtlpriv->btcoexist.previous_state_h ==
 	     rtlpriv->btcoexist.cstate_h)) {
 		RT_TRACE(rtlpriv, COMP_BT_COEXIST, DBG_DMESG,
-			 "[DM][BT], Coexist state do not chang!!\n");
+			 "[DM][BT], Coexist state do not change!!\n");
 		return true;
 	} else {
 		RT_TRACE(rtlpriv, COMP_BT_COEXIST, DBG_DMESG,
-- 
cgit v1.2.3-59-g8ed1b


From f9f46bca59d11d0fa04087c840e23ca94cd239b5 Mon Sep 17 00:00:00 2001
From: Qiujun Huang <hqjagain@gmail.com>
Date: Thu, 2 Apr 2020 22:17:58 +0800
Subject: rsi: fix a typo "throld" -> "threshold"

There is a typo in debug message. Fix it.
s/throld/threshold

Signed-off-by: Qiujun Huang <hqjagain@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1585837078-6149-1-git-send-email-hqjagain@gmail.com
---
 drivers/net/wireless/rsi/rsi_91x_mac80211.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/rsi/rsi_91x_mac80211.c b/drivers/net/wireless/rsi/rsi_91x_mac80211.c
index 440088293aff..5c0adb0efc5d 100644
--- a/drivers/net/wireless/rsi/rsi_91x_mac80211.c
+++ b/drivers/net/wireless/rsi/rsi_91x_mac80211.c
@@ -832,7 +832,7 @@ static void rsi_mac80211_bss_info_changed(struct ieee80211_hw *hw,
 		common->cqm_info.last_cqm_event_rssi = 0;
 		common->cqm_info.rssi_thold = bss_conf->cqm_rssi_thold;
 		common->cqm_info.rssi_hyst = bss_conf->cqm_rssi_hyst;
-		rsi_dbg(INFO_ZONE, "RSSI throld & hysteresis are: %d %d\n",
+		rsi_dbg(INFO_ZONE, "RSSI threshold & hysteresis are: %d %d\n",
 			common->cqm_info.rssi_thold,
 			common->cqm_info.rssi_hyst);
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 09667ea7ce6d6a90152aeba631f11b55f283a898 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Sun, 5 Apr 2020 14:39:06 +0100
Subject: brcm80211: remove redundant pointer 'address'

Pointer 'address' is being assigned and updated in a few places
by it is never read. Hence the assignments are redundant and can
be removed.

Addresses-Coverity: ("Unused value")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200405133906.381358-1-colin.king@canonical.com
---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/commonring.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/commonring.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/commonring.c
index 49db54d23e03..e44236cb210e 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/commonring.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/commonring.c
@@ -180,14 +180,8 @@ again:
 
 int brcmf_commonring_write_complete(struct brcmf_commonring *commonring)
 {
-	void *address;
-
-	address = commonring->buf_addr;
-	address += (commonring->f_ptr * commonring->item_len);
-	if (commonring->f_ptr > commonring->w_ptr) {
-		address = commonring->buf_addr;
+	if (commonring->f_ptr > commonring->w_ptr)
 		commonring->f_ptr = 0;
-	}
 
 	commonring->f_ptr = commonring->w_ptr;
 
-- 
cgit v1.2.3-59-g8ed1b


From 63e49a9fdac1b4e97ac26cb3fe953f210d83bc53 Mon Sep 17 00:00:00 2001
From: Giuseppe Marco Randazzo <gmrandazzo@gmail.com>
Date: Mon, 6 Apr 2020 00:06:59 +0200
Subject: p54usb: add AirVasT USB stick device-id

This patch adds the AirVasT USB wireless devices 124a:4026
to the list of supported devices. It's using the ISL3886
usb firmware. Without this modification, the wiki adapter
is not recognized.

Cc: <stable@vger.kernel.org>
Signed-off-by: Giuseppe Marco Randazzo <gmrandazzo@gmail.com>
Signed-off-by: Christian Lamparter <chunkeey@gmail.com> [formatted, reworded]
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200405220659.45621-1-chunkeey@gmail.com
---
 drivers/net/wireless/intersil/p54/p54usb.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wireless/intersil/p54/p54usb.c b/drivers/net/wireless/intersil/p54/p54usb.c
index b94764c88750..ff0e30c0c14c 100644
--- a/drivers/net/wireless/intersil/p54/p54usb.c
+++ b/drivers/net/wireless/intersil/p54/p54usb.c
@@ -61,6 +61,7 @@ static const struct usb_device_id p54u_table[] = {
 	{USB_DEVICE(0x0db0, 0x6826)},	/* MSI UB54G (MS-6826) */
 	{USB_DEVICE(0x107b, 0x55f2)},	/* Gateway WGU-210 (Gemtek) */
 	{USB_DEVICE(0x124a, 0x4023)},	/* Shuttle PN15, Airvast WM168g, IOGear GWU513 */
+	{USB_DEVICE(0x124a, 0x4026)},	/* AirVasT USB wireless device */
 	{USB_DEVICE(0x1435, 0x0210)},	/* Inventel UR054G */
 	{USB_DEVICE(0x15a9, 0x0002)},	/* Gemtek WUBI-100GW 802.11g */
 	{USB_DEVICE(0x1630, 0x0005)},	/* 2Wire 802.11g USB (v1) / Z-Com */
-- 
cgit v1.2.3-59-g8ed1b


From 6343a6d4b2130be9323f347d60af8a7ba8f7242c Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng@canonical.com>
Date: Tue, 7 Apr 2020 15:33:31 +0800
Subject: rtw88: Add delay on polling h2c command status bit

On some systems we can constanly see rtw88 complains:
[39584.721375] rtw_pci 0000:03:00.0: failed to send h2c command

Increase interval of each check to wait the status bit really changed.

Use read_poll_timeout() macro which fits anything we need here.

Suggested-by: Kalle Valo <kvalo@codeaurora.org>
Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200407073331.397-1-kai.heng.feng@canonical.com
---
 drivers/net/wireless/realtek/rtw88/fw.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtw88/fw.c b/drivers/net/wireless/realtek/rtw88/fw.c
index 9192ab26e39b..245da96dfddc 100644
--- a/drivers/net/wireless/realtek/rtw88/fw.c
+++ b/drivers/net/wireless/realtek/rtw88/fw.c
@@ -2,6 +2,8 @@
 /* Copyright(c) 2018-2019  Realtek Corporation
  */
 
+#include <linux/iopoll.h>
+
 #include "main.h"
 #include "coex.h"
 #include "fw.h"
@@ -193,8 +195,8 @@ static void rtw_fw_send_h2c_command(struct rtw_dev *rtwdev,
 	u8 box;
 	u8 box_state;
 	u32 box_reg, box_ex_reg;
-	u32 h2c_wait;
 	int idx;
+	int ret;
 
 	rtw_dbg(rtwdev, RTW_DBG_FW,
 		"send H2C content %02x%02x%02x%02x %02x%02x%02x%02x\n",
@@ -226,12 +228,11 @@ static void rtw_fw_send_h2c_command(struct rtw_dev *rtwdev,
 		goto out;
 	}
 
-	h2c_wait = 20;
-	do {
-		box_state = rtw_read8(rtwdev, REG_HMETFR);
-	} while ((box_state >> box) & 0x1 && --h2c_wait > 0);
+	ret = read_poll_timeout(rtw_read8, box_state,
+				!((box_state >> box) & 0x1), 100, 3000, false,
+				rtwdev, REG_HMETFR);
 
-	if (!h2c_wait) {
+	if (ret) {
 		rtw_err(rtwdev, "failed to send h2c command\n");
 		goto out;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From ec4d3e3a054578de34cd0b587ab8a1ac36f629d9 Mon Sep 17 00:00:00 2001
From: Larry Finger <Larry.Finger@lwfinger.net>
Date: Tue, 7 Apr 2020 14:00:43 -0500
Subject: b43legacy: Fix case where channel status is corrupted

This patch fixes commit 75388acd0cd8 ("add mac80211-based driver for
legacy BCM43xx devices")

In https://bugzilla.kernel.org/show_bug.cgi?id=207093, a defect in
b43legacy is reported. Upon testing, thus problem exists on PPC and
X86 platforms, is present in the oldest kernel tested (3.2), and
has been present in the driver since it was first added to the kernel.

The problem is a corrupted channel status received from the device.
Both the internal card in a PowerBook G4 and the PCMCIA version
(Broadcom BCM4306 with PCI ID 14e4:4320) have the problem. Only Rev, 2
(revision 4 of the 802.11 core) of the chip has been tested. No other
devices using b43legacy are available for testing.

Various sources of the problem were considered. Buffer overrun and
other sources of corruption within the driver were rejected because
the faulty channel status is always the same, not a random value.
It was concluded that the faulty data is coming from the device, probably
due to a firmware bug. As that source is not available, the driver
must take appropriate action to recover.

At present, the driver reports the error, and them continues to process
the bad packet. This is believed that to be a mistake, and the correct
action is to drop the correpted packet.

Fixes: 75388acd0cd8 ("add mac80211-based driver for legacy BCM43xx devices")
Cc: Stable <stable@vger.kernel.org>
Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net>
Reported-and-tested by: F. Erhard <erhard_f@mailbox.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200407190043.1686-1-Larry.Finger@lwfinger.net
---
 drivers/net/wireless/broadcom/b43legacy/xmit.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wireless/broadcom/b43legacy/xmit.c b/drivers/net/wireless/broadcom/b43legacy/xmit.c
index e9b23c2e5bd4..efd63f4ce74f 100644
--- a/drivers/net/wireless/broadcom/b43legacy/xmit.c
+++ b/drivers/net/wireless/broadcom/b43legacy/xmit.c
@@ -558,6 +558,7 @@ void b43legacy_rx(struct b43legacy_wldev *dev,
 	default:
 		b43legacywarn(dev->wl, "Unexpected value for chanstat (0x%X)\n",
 		       chanstat);
+		goto drop;
 	}
 
 	memcpy(IEEE80211_SKB_RXCB(skb), &status, sizeof(status));
-- 
cgit v1.2.3-59-g8ed1b


From c960e2b384ef3cec4dd447ac90dbdc27a3c41a08 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Tue, 7 Apr 2020 21:32:33 +0200
Subject: qtnfmac: Simplify code in _attach functions

There is no need to re-implement 'netdev_alloc_skb_ip_align()' here.
Keep the code simple.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Reviewed-by: Sergey Matyukevich <sergey.matyukevich.os@quantenna.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200407193233.9439-1-christophe.jaillet@wanadoo.fr
---
 drivers/net/wireless/quantenna/qtnfmac/pcie/pearl_pcie.c | 2 +-
 drivers/net/wireless/quantenna/qtnfmac/pcie/topaz_pcie.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/quantenna/qtnfmac/pcie/pearl_pcie.c b/drivers/net/wireless/quantenna/qtnfmac/pcie/pearl_pcie.c
index dbb241106d8a..eb67b66b846b 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/pcie/pearl_pcie.c
+++ b/drivers/net/wireless/quantenna/qtnfmac/pcie/pearl_pcie.c
@@ -286,7 +286,7 @@ static int pearl_skb2rbd_attach(struct qtnf_pcie_pearl_state *ps, u16 index)
 	struct sk_buff *skb;
 	dma_addr_t paddr;
 
-	skb = __netdev_alloc_skb_ip_align(NULL, SKB_BUF_SIZE, GFP_ATOMIC);
+	skb = netdev_alloc_skb_ip_align(NULL, SKB_BUF_SIZE);
 	if (!skb) {
 		priv->rx_skb[index] = NULL;
 		return -ENOMEM;
diff --git a/drivers/net/wireless/quantenna/qtnfmac/pcie/topaz_pcie.c b/drivers/net/wireless/quantenna/qtnfmac/pcie/topaz_pcie.c
index dbf3c5fd751f..d1b850aa4657 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/pcie/topaz_pcie.c
+++ b/drivers/net/wireless/quantenna/qtnfmac/pcie/topaz_pcie.c
@@ -247,7 +247,7 @@ topaz_skb2rbd_attach(struct qtnf_pcie_topaz_state *ts, u16 index, u32 wrap)
 	struct sk_buff *skb;
 	dma_addr_t paddr;
 
-	skb = __netdev_alloc_skb_ip_align(NULL, SKB_BUF_SIZE, GFP_ATOMIC);
+	skb = netdev_alloc_skb_ip_align(NULL, SKB_BUF_SIZE);
 	if (!skb) {
 		ts->base.rx_skb[index] = NULL;
 		return -ENOMEM;
-- 
cgit v1.2.3-59-g8ed1b


From fd7fb0253cdf96241308336e7833186b928336a8 Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Fri, 10 Apr 2020 17:08:17 +0800
Subject: brcmsmac: make brcms_c_set_mac() void

Fix the following coccicheck warning:

drivers/net/wireless/broadcom/brcm80211/brcmsmac/main.c:3773:5-8:
Unneeded variable: "err". Return "0" on line 3781

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Jason Yan <yanaijie@huawei.com>
Acked-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200410090817.26883-1-yanaijie@huawei.com
---
 drivers/net/wireless/broadcom/brcm80211/brcmsmac/main.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/main.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/main.c
index 7f2c15c799d2..d88f8d456b94 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/main.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/main.c
@@ -3768,17 +3768,14 @@ static void brcms_c_set_ps_ctrl(struct brcms_c_info *wlc)
  * Write this BSS config's MAC address to core.
  * Updates RXE match engine.
  */
-static int brcms_c_set_mac(struct brcms_bss_cfg *bsscfg)
+static void brcms_c_set_mac(struct brcms_bss_cfg *bsscfg)
 {
-	int err = 0;
 	struct brcms_c_info *wlc = bsscfg->wlc;
 
 	/* enter the MAC addr into the RXE match registers */
 	brcms_c_set_addrmatch(wlc, RCM_MAC_OFFSET, wlc->pub->cur_etheraddr);
 
 	brcms_c_ampdu_macaddr_upd(wlc);
-
-	return err;
 }
 
 /* Write the BSS config's BSSID address to core (set_bssid in d11procs.tcl).
-- 
cgit v1.2.3-59-g8ed1b


From 6fc3b94ef5964736408cd4f8e85a816dcf1dd510 Mon Sep 17 00:00:00 2001
From: Maharaja Kennadyrajan <mkenna@codeaurora.org>
Date: Fri, 10 Apr 2020 22:36:43 +0530
Subject: ath11k: Cleanup in pdev destroy and mac register during crash on
 recovery

Debugfs pdev entries should be cleaned up during the crash
on recovery. If not, mac register will fail for the reason
that it is already registered during core reconfigure.

Signed-off-by: Maharaja Kennadyrajan <mkenna@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1586538405-16226-1-git-send-email-mkenna@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/debug.c | 3 +++
 drivers/net/wireless/ath/ath11k/mac.c   | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/drivers/net/wireless/ath/ath11k/debug.c b/drivers/net/wireless/ath/ath11k/debug.c
index 8d485171b0b3..825e7ba61f45 100644
--- a/drivers/net/wireless/ath/ath11k/debug.c
+++ b/drivers/net/wireless/ath/ath11k/debug.c
@@ -803,6 +803,9 @@ static const struct file_operations fops_soc_rx_stats = {
 
 int ath11k_debug_pdev_create(struct ath11k_base *ab)
 {
+	if (test_bit(ATH11K_FLAG_REGISTERED, &ab->dev_flags))
+		return 0;
+
 	ab->debugfs_soc = debugfs_create_dir(ab->hw_params.name, ab->debugfs_ath11k);
 
 	if (IS_ERR_OR_NULL(ab->debugfs_soc)) {
diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c
index 9f8bc19cc5ae..4783394b8575 100644
--- a/drivers/net/wireless/ath/ath11k/mac.c
+++ b/drivers/net/wireless/ath/ath11k/mac.c
@@ -5891,6 +5891,9 @@ int ath11k_mac_register(struct ath11k_base *ab)
 	int i;
 	int ret;
 
+	if (test_bit(ATH11K_FLAG_REGISTERED, &ab->dev_flags))
+		return 0;
+
 	for (i = 0; i < ab->num_radios; i++) {
 		pdev = &ab->pdevs[i];
 		ar = pdev->ar;
-- 
cgit v1.2.3-59-g8ed1b


From 40c766d4a49cdfe30e0fb40825321b4d4de651aa Mon Sep 17 00:00:00 2001
From: Ritesh Singh <ritesi@codeaurora.org>
Date: Fri, 10 Apr 2020 22:36:44 +0530
Subject: ath11k: Fix fw assert by setting proper vht cap

After setting fixed vht-rate if new station is trying to
assoc with mu_bfee cap, or if a sta is already connected
with mu_bfee cap then set the fixed vht-rate and
reconnecting the sta, FW assert is happening.

So to avoid this, reset the MU_BEAMFORMEE bit in vht->caps,
if mcs_index is invalid for nss 1.

Signed-off-by: Ritesh Singh <ritesi@codeaurora.org>
Signed-off-by: Maharaja Kennadyrajan <mkenna@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1586538405-16226-2-git-send-email-mkenna@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/mac.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c
index 4783394b8575..0834089a61e7 100644
--- a/drivers/net/wireless/ath/ath11k/mac.c
+++ b/drivers/net/wireless/ath/ath11k/mac.c
@@ -1142,6 +1142,10 @@ static void ath11k_peer_assoc_h_vht(struct ath11k *ar,
 	arg->tx_mcs_set &= ~IEEE80211_VHT_MCS_SUPPORT_0_11_MASK;
 	arg->tx_mcs_set |= IEEE80211_DISABLE_VHT_MCS_SUPPORT_0_11;
 
+	if ((arg->tx_mcs_set & IEEE80211_VHT_MCS_NOT_SUPPORTED) ==
+			IEEE80211_VHT_MCS_NOT_SUPPORTED)
+		arg->peer_vht_caps &= ~IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE;
+
 	/* TODO:  Check */
 	arg->tx_max_mcs_nss = 0xFF;
 
-- 
cgit v1.2.3-59-g8ed1b


From ec48d28ba291943d4ae2f873a4330debddecbca6 Mon Sep 17 00:00:00 2001
From: Maharaja Kennadyrajan <mkenna@codeaurora.org>
Date: Fri, 10 Apr 2020 22:36:45 +0530
Subject: ath11k: Fix rx_filter flags setting for per peer rx_stats

Rx_filter flags are set with default filter flags during
wifi up/down sequence even though the 'ext_rx_stats' debugfs
is enabled as 1. So, that we are not getting proper per peer
rx_stats.
Hence, fixing this by setting the missing rx_filter when
ext_rx_stats is already set/enabled.

Signed-off-by: Maharaja Kennadyrajan <mkenna@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1586538405-16226-3-git-send-email-mkenna@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/core.h  |  1 +
 drivers/net/wireless/ath/ath11k/debug.c |  2 ++
 drivers/net/wireless/ath/ath11k/debug.h | 10 ++++++++++
 drivers/net/wireless/ath/ath11k/mac.c   |  4 +++-
 4 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath11k/core.h b/drivers/net/wireless/ath/ath11k/core.h
index b4c3e0418eef..a8ef95f98616 100644
--- a/drivers/net/wireless/ath/ath11k/core.h
+++ b/drivers/net/wireless/ath/ath11k/core.h
@@ -392,6 +392,7 @@ struct ath11k_debug {
 	u32 pktlog_mode;
 	u32 pktlog_peer_valid;
 	u8 pktlog_peer_addr[ETH_ALEN];
+	u32 rx_filter;
 };
 
 struct ath11k_per_peer_tx_stats {
diff --git a/drivers/net/wireless/ath/ath11k/debug.c b/drivers/net/wireless/ath/ath11k/debug.c
index 825e7ba61f45..a2e3dfeae3d5 100644
--- a/drivers/net/wireless/ath/ath11k/debug.c
+++ b/drivers/net/wireless/ath/ath11k/debug.c
@@ -698,6 +698,8 @@ static ssize_t ath11k_write_extd_rx_stats(struct file *file,
 		tlv_filter = ath11k_mac_mon_status_filter_default;
 	}
 
+	ar->debug.rx_filter = tlv_filter.rx_filter;
+
 	ring_id = ar->dp.rx_mon_status_refill_ring.refill_buf_ring.ring_id;
 	ret = ath11k_dp_tx_htt_rx_filter_setup(ar->ab, ring_id, ar->dp.mac_id,
 					       HAL_RXDMA_MONITOR_STATUS,
diff --git a/drivers/net/wireless/ath/ath11k/debug.h b/drivers/net/wireless/ath/ath11k/debug.h
index 4a3ff8227187..45454fcef346 100644
--- a/drivers/net/wireless/ath/ath11k/debug.h
+++ b/drivers/net/wireless/ath/ath11k/debug.h
@@ -188,6 +188,11 @@ static inline int ath11k_debug_is_extd_rx_stats_enabled(struct ath11k *ar)
 	return ar->debug.extd_rx_stats;
 }
 
+static inline int ath11k_debug_rx_filter(struct ath11k *ar)
+{
+	return ar->debug.rx_filter;
+}
+
 void ath11k_sta_add_debugfs(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 			    struct ieee80211_sta *sta, struct dentry *dir);
 void
@@ -269,6 +274,11 @@ static inline bool ath11k_debug_is_pktlog_peer_valid(struct ath11k *ar, u8 *addr
 	return false;
 }
 
+static inline int ath11k_debug_rx_filter(struct ath11k *ar)
+{
+	return 0;
+}
+
 static inline void
 ath11k_accumulate_per_peer_tx_stats(struct ath11k_sta *arsta,
 				    struct ath11k_per_peer_tx_stats *peer_stats,
diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c
index 0834089a61e7..065b7d6d4ab2 100644
--- a/drivers/net/wireless/ath/ath11k/mac.c
+++ b/drivers/net/wireless/ath/ath11k/mac.c
@@ -3881,8 +3881,10 @@ static int ath11k_mac_config_mon_status_default(struct ath11k *ar, bool enable)
 	struct htt_rx_ring_tlv_filter tlv_filter = {0};
 	u32 ring_id;
 
-	if (enable)
+	if (enable) {
 		tlv_filter = ath11k_mac_mon_status_filter_default;
+		tlv_filter.rx_filter = ath11k_debug_rx_filter(ar);
+	}
 
 	ring_id = ar->dp.rx_mon_status_refill_ring.refill_buf_ring.ring_id;
 
-- 
cgit v1.2.3-59-g8ed1b


From 8a7968bee8d08835caa0d7bc0c25d750a5b52389 Mon Sep 17 00:00:00 2001
From: Mamatha Telu <telumamatha36@gmail.com>
Date: Sun, 12 Apr 2020 23:54:35 +0530
Subject: ath10k: Fix typo in warning messages

Fix some typo:
  s/fnrom/from
  s/pkgs/pkts/
  s/AMSUs/AMSDUs/

Signed-off-by: Mamatha Telu <telumamatha36@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1586715875-5182-1-git-send-email-telumamatha36@gmail.com
---
 drivers/net/wireless/ath/ath10k/debug.c | 2 +-
 drivers/net/wireless/ath/ath10k/sdio.c  | 2 +-
 drivers/net/wireless/ath/ath10k/wmi.c   | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/debug.c b/drivers/net/wireless/ath/ath10k/debug.c
index f811e6940fb0..69139c2e6f82 100644
--- a/drivers/net/wireless/ath/ath10k/debug.c
+++ b/drivers/net/wireless/ath/ath10k/debug.c
@@ -778,7 +778,7 @@ static ssize_t ath10k_mem_value_read(struct file *file,
 
 	ret = ath10k_hif_diag_read(ar, *ppos, buf, count);
 	if (ret) {
-		ath10k_warn(ar, "failed to read address 0x%08x via diagnose window fnrom debugfs: %d\n",
+		ath10k_warn(ar, "failed to read address 0x%08x via diagnose window from debugfs: %d\n",
 			    (u32)(*ppos), ret);
 		goto exit;
 	}
diff --git a/drivers/net/wireless/ath/ath10k/sdio.c b/drivers/net/wireless/ath/ath10k/sdio.c
index 5a0db342e5ad..943db9f401d8 100644
--- a/drivers/net/wireless/ath/ath10k/sdio.c
+++ b/drivers/net/wireless/ath/ath10k/sdio.c
@@ -542,7 +542,7 @@ static int ath10k_sdio_mbox_rx_alloc(struct ath10k *ar,
 	int pkt_cnt = 0;
 
 	if (n_lookaheads > ATH10K_SDIO_MAX_RX_MSGS) {
-		ath10k_warn(ar, "the total number of pkgs to be fetched (%u) exceeds maximum %u\n",
+		ath10k_warn(ar, "the total number of pkts to be fetched (%u) exceeds maximum %u\n",
 			    n_lookaheads, ATH10K_SDIO_MAX_RX_MSGS);
 		ret = -ENOMEM;
 		goto err;
diff --git a/drivers/net/wireless/ath/ath10k/wmi.c b/drivers/net/wireless/ath/ath10k/wmi.c
index 4a3a698fe059..a81a1ab2de19 100644
--- a/drivers/net/wireless/ath/ath10k/wmi.c
+++ b/drivers/net/wireless/ath/ath10k/wmi.c
@@ -8336,7 +8336,7 @@ ath10k_wmi_fw_pdev_rx_stats_fill(const struct ath10k_fw_stats_pdev *pdev,
 	len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
 			 "MPDUs delivered to stack", pdev->loc_mpdus);
 	len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
-			 "Oversized AMSUs", pdev->oversize_amsdu);
+			 "Oversized AMSDUs", pdev->oversize_amsdu);
 	len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
 			 "PHY errors", pdev->phy_errs);
 	len += scnprintf(buf + len, buf_len - len, "%30s %10d\n",
-- 
cgit v1.2.3-59-g8ed1b


From e190bc05b191ff62157ca63322aedd14c7e87d32 Mon Sep 17 00:00:00 2001
From: Govindaraj Saminathan <gsamin@codeaurora.org>
Date: Mon, 13 Apr 2020 16:51:12 +0530
Subject: ath11k: cleanup reo command error code overwritten

should not overwrite the error code. No buffer available then return
invalid. For other failures return the error code of actual failure.

Signed-off-by: Govindaraj Saminathan <gsamin@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1586776872-25766-1-git-send-email-gsamin@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/dp_tx.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath11k/dp_tx.c b/drivers/net/wireless/ath/ath11k/dp_tx.c
index 7aac4b0eea0c..6f40e72f41e6 100644
--- a/drivers/net/wireless/ath/ath11k/dp_tx.c
+++ b/drivers/net/wireless/ath/ath11k/dp_tx.c
@@ -543,8 +543,12 @@ int ath11k_dp_tx_send_reo_cmd(struct ath11k_base *ab, struct dp_rx_tid *rx_tid,
 	cmd_ring = &ab->hal.srng_list[dp->reo_cmd_ring.ring_id];
 	cmd_num = ath11k_hal_reo_cmd_send(ab, cmd_ring, type, cmd);
 
+	/* cmd_num should start from 1, during failure return the error code */
+	if (cmd_num < 0)
+		return cmd_num;
+
 	/* reo cmd ring descriptors has cmd_num starting from 1 */
-	if (cmd_num <= 0)
+	if (cmd_num == 0)
 		return -EINVAL;
 
 	if (!cb)
-- 
cgit v1.2.3-59-g8ed1b


From d687275b268b09c350b24b1947d1bf3496f49137 Mon Sep 17 00:00:00 2001
From: Sriram R <srirrama@codeaurora.org>
Date: Mon, 13 Apr 2020 18:27:02 +0530
Subject: ath11k: Add dynamic tcl ring selection logic with retry mechanism

IPQ8074 HW supports three TCL rings for tx. Currently these rings
are mapped based on the Access categories, viz. VO, VI, BE, BK.
In case, one of the traffic type dominates, then it could stress
the same tcl rings. Rather, it would be optimal to make use of all
the rings in a round robin fashion irrespective of the traffic type
so that the load could be evenly distributed among all the rings.
Also, in case the selected ring is busy or full, a retry mechanism
is used to ensure other available ring is selected without dropping
the packet.

In SMP systems, this change avoids a single CPU from getting hogged
when heavy traffic of same category is transmitted.
The tx completion interrupts corresponding to the used tcl ring
would be more which causes the assigned CPU to get hogged.
Distribution of tx packets to different tcl rings helps balance
this load.

Signed-off-by: Sriram R <srirrama@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1586782622-22570-1-git-send-email-srirrama@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/core.h  |  3 +++
 drivers/net/wireless/ath/ath11k/dp_tx.c | 46 +++++++++++++++++++++++++++------
 2 files changed, 41 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/core.h b/drivers/net/wireless/ath/ath11k/core.h
index a8ef95f98616..33237eaf0371 100644
--- a/drivers/net/wireless/ath/ath11k/core.h
+++ b/drivers/net/wireless/ath/ath11k/core.h
@@ -657,6 +657,9 @@ struct ath11k_base {
 		u32 fw_crash_counter;
 	} stats;
 	u32 pktlog_defs_checksum;
+
+	/* Round robbin based TCL ring selector */
+	atomic_t tcl_ring_selector;
 };
 
 struct ath11k_fw_stats_pdev {
diff --git a/drivers/net/wireless/ath/ath11k/dp_tx.c b/drivers/net/wireless/ath/ath11k/dp_tx.c
index 6f40e72f41e6..59018ccb14da 100644
--- a/drivers/net/wireless/ath/ath11k/dp_tx.c
+++ b/drivers/net/wireless/ath/ath11k/dp_tx.c
@@ -9,10 +9,6 @@
 #include "hw.h"
 #include "peer.h"
 
-/* NOTE: Any of the mapped ring id value must not exceed DP_TCL_NUM_RING_MAX */
-static const u8
-ath11k_txq_tcl_ring_map[ATH11K_HW_MAX_QUEUES] = { 0x0, 0x1, 0x2, 0x2 };
-
 static enum hal_tcl_encap_type
 ath11k_dp_tx_get_encap_type(struct ath11k_vif *arvif, struct sk_buff *skb)
 {
@@ -84,6 +80,8 @@ int ath11k_dp_tx(struct ath11k *ar, struct ath11k_vif *arvif,
 	u8 pool_id;
 	u8 hal_ring_id;
 	int ret;
+	u8 ring_selector = 0, ring_map = 0;
+	bool tcl_ring_retry;
 
 	if (test_bit(ATH11K_FLAG_CRASH_FLUSH, &ar->ab->dev_flags))
 		return -ESHUTDOWN;
@@ -92,7 +90,20 @@ int ath11k_dp_tx(struct ath11k *ar, struct ath11k_vif *arvif,
 		return -ENOTSUPP;
 
 	pool_id = skb_get_queue_mapping(skb) & (ATH11K_HW_MAX_QUEUES - 1);
-	ti.ring_id = ath11k_txq_tcl_ring_map[pool_id];
+
+	/* Let the default ring selection be based on a round robin
+	 * fashion where one of the 3 tcl rings are selected based on
+	 * the tcl_ring_selector counter. In case that ring
+	 * is full/busy, we resort to other available rings.
+	 * If all rings are full, we drop the packet.
+	 * //TODO Add throttling logic when all rings are full
+	 */
+	ring_selector = atomic_inc_return(&ab->tcl_ring_selector);
+
+tcl_ring_sel:
+	tcl_ring_retry = false;
+	ti.ring_id = ring_selector % DP_TCL_NUM_RING_MAX;
+	ring_map |= BIT(ti.ring_id);
 
 	tx_ring = &dp->tx_ring[ti.ring_id];
 
@@ -101,8 +112,14 @@ int ath11k_dp_tx(struct ath11k *ar, struct ath11k_vif *arvif,
 			DP_TX_IDR_SIZE - 1, GFP_ATOMIC);
 	spin_unlock_bh(&tx_ring->tx_idr_lock);
 
-	if (ret < 0)
-		return -ENOSPC;
+	if (ret < 0) {
+		if (ring_map == (BIT(DP_TCL_NUM_RING_MAX) - 1))
+			return -ENOSPC;
+
+		/* Check if the next ring is available */
+		ring_selector++;
+		goto tcl_ring_sel;
+	}
 
 	ti.desc_id = FIELD_PREP(DP_TX_DESC_ID_MAC_ID, ar->pdev_idx) |
 		     FIELD_PREP(DP_TX_DESC_ID_MSDU_ID, ret) |
@@ -178,11 +195,21 @@ int ath11k_dp_tx(struct ath11k *ar, struct ath11k_vif *arvif,
 	if (!hal_tcl_desc) {
 		/* NOTE: It is highly unlikely we'll be running out of tcl_ring
 		 * desc because the desc is directly enqueued onto hw queue.
-		 * So add tx packet throttling logic in future if required.
 		 */
 		ath11k_hal_srng_access_end(ab, tcl_ring);
 		spin_unlock_bh(&tcl_ring->lock);
 		ret = -ENOMEM;
+
+		/* Checking for available tcl descritors in another ring in
+		 * case of failure due to full tcl ring now, is better than
+		 * checking this ring earlier for each pkt tx.
+		 * Restart ring selection if some rings are not checked yet.
+		 */
+		if (ring_map != (BIT(DP_TCL_NUM_RING_MAX) - 1)) {
+			tcl_ring_retry = true;
+			ring_selector++;
+		}
+
 		goto fail_unmap_dma;
 	}
 
@@ -206,6 +233,9 @@ fail_remove_idr:
 		   FIELD_GET(DP_TX_DESC_ID_MSDU_ID, ti.desc_id));
 	spin_unlock_bh(&tx_ring->tx_idr_lock);
 
+	if (tcl_ring_retry)
+		goto tcl_ring_sel;
+
 	return ret;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From a69a1328fb03309555d70f4add76eae3780a6fba Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Fri, 10 Apr 2020 17:08:50 +0800
Subject: ipw2x00: make ipw_qos_association_resp() void

Fix the following coccicheck warning:

drivers/net/wireless/intel/ipw2x00/ipw2200.c:7048:5-8: Unneeded
variable: "ret". Return "0" on line 7055

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200410090850.27025-1-yanaijie@huawei.com
---
 drivers/net/wireless/intel/ipw2x00/ipw2200.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2200.c b/drivers/net/wireless/intel/ipw2x00/ipw2200.c
index 60b5e08dd6df..201a1eb0e2f6 100644
--- a/drivers/net/wireless/intel/ipw2x00/ipw2200.c
+++ b/drivers/net/wireless/intel/ipw2x00/ipw2200.c
@@ -7042,23 +7042,22 @@ static int ipw_qos_association(struct ipw_priv *priv,
 * off the network from the associated setting, adjust the QoS
 * setting
 */
-static int ipw_qos_association_resp(struct ipw_priv *priv,
+static void ipw_qos_association_resp(struct ipw_priv *priv,
 				    struct libipw_network *network)
 {
-	int ret = 0;
 	unsigned long flags;
 	u32 size = sizeof(struct libipw_qos_parameters);
 	int set_qos_param = 0;
 
 	if ((priv == NULL) || (network == NULL) ||
 	    (priv->assoc_network == NULL))
-		return ret;
+		return;
 
 	if (!(priv->status & STATUS_ASSOCIATED))
-		return ret;
+		return;
 
 	if ((priv->ieee->iw_mode != IW_MODE_INFRA))
-		return ret;
+		return;
 
 	spin_lock_irqsave(&priv->ieee->lock, flags);
 	if (network->flags & NETWORK_HAS_QOS_PARAMETERS) {
@@ -7088,8 +7087,6 @@ static int ipw_qos_association_resp(struct ipw_priv *priv,
 
 	if (set_qos_param == 1)
 		schedule_work(&priv->qos_activate);
-
-	return ret;
 }
 
 static u32 ipw_qos_get_burst_duration(struct ipw_priv *priv)
-- 
cgit v1.2.3-59-g8ed1b


From 80efb443ea0346791a79dade0e65c4a252f0571f Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Fri, 10 Apr 2020 17:09:10 +0800
Subject: cw1200: make cw1200_spi_irq_unsubscribe() void

Fix the following coccicheck warning:

drivers/net/wireless/st/cw1200/cw1200_spi.c:273:5-8: Unneeded variable:
"ret". Return "0" on line 279

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200410090910.27132-1-yanaijie@huawei.com
---
 drivers/net/wireless/st/cw1200/cw1200_spi.c | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/drivers/net/wireless/st/cw1200/cw1200_spi.c b/drivers/net/wireless/st/cw1200/cw1200_spi.c
index ef01caac629c..271ed2ce2d7f 100644
--- a/drivers/net/wireless/st/cw1200/cw1200_spi.c
+++ b/drivers/net/wireless/st/cw1200/cw1200_spi.c
@@ -268,15 +268,11 @@ exit:
 	return ret;
 }
 
-static int cw1200_spi_irq_unsubscribe(struct hwbus_priv *self)
+static void cw1200_spi_irq_unsubscribe(struct hwbus_priv *self)
 {
-	int ret = 0;
-
 	pr_debug("SW IRQ unsubscribe\n");
 	disable_irq_wake(self->func->irq);
 	free_irq(self->func->irq, self);
-
-	return ret;
 }
 
 static int cw1200_spi_off(const struct cw1200_platform_data_spi *pdata)
-- 
cgit v1.2.3-59-g8ed1b


From 2fd5fdca6a3ae0d7333c086e019ba6c4330fa0a8 Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Fri, 10 Apr 2020 17:09:42 +0800
Subject: libertas: make lbs_init_mesh() void

Fix the following coccicheck warning:

drivers/net/wireless/marvell/libertas/mesh.c:833:5-8: Unneeded variable:
"ret". Return "0" on line 874

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Jason Yan <yanaijie@huawei.com>
Reviewed-by: Lubomir Rintel <lkundrak@v3.sk>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200410090942.27239-1-yanaijie@huawei.com
---
 drivers/net/wireless/marvell/libertas/mesh.c | 6 +-----
 drivers/net/wireless/marvell/libertas/mesh.h | 2 +-
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/marvell/libertas/mesh.c b/drivers/net/wireless/marvell/libertas/mesh.c
index 44c8a550da4c..f5b78257d551 100644
--- a/drivers/net/wireless/marvell/libertas/mesh.c
+++ b/drivers/net/wireless/marvell/libertas/mesh.c
@@ -828,10 +828,8 @@ static void lbs_persist_config_remove(struct net_device *dev)
  * Check mesh FW version and appropriately send the mesh start
  * command
  */
-int lbs_init_mesh(struct lbs_private *priv)
+void lbs_init_mesh(struct lbs_private *priv)
 {
-	int ret = 0;
-
 	/* Determine mesh_fw_ver from fwrelease and fwcapinfo */
 	/* 5.0.16p0 9.0.0.p0 is known to NOT support any mesh */
 	/* 5.110.22 have mesh command with 0xa3 command id */
@@ -870,8 +868,6 @@ int lbs_init_mesh(struct lbs_private *priv)
 
 	/* Stop meshing until interface is brought up */
 	lbs_mesh_config(priv, CMD_ACT_MESH_CONFIG_STOP, 1);
-
-	return ret;
 }
 
 void lbs_start_mesh(struct lbs_private *priv)
diff --git a/drivers/net/wireless/marvell/libertas/mesh.h b/drivers/net/wireless/marvell/libertas/mesh.h
index 1561018f226f..d49717b20c09 100644
--- a/drivers/net/wireless/marvell/libertas/mesh.h
+++ b/drivers/net/wireless/marvell/libertas/mesh.h
@@ -16,7 +16,7 @@
 
 struct net_device;
 
-int lbs_init_mesh(struct lbs_private *priv);
+void lbs_init_mesh(struct lbs_private *priv);
 void lbs_start_mesh(struct lbs_private *priv);
 int lbs_deinit_mesh(struct lbs_private *priv);
 
-- 
cgit v1.2.3-59-g8ed1b


From b9ed7e9505ba6346a101384d21ddd1139ae69eef Mon Sep 17 00:00:00 2001
From: Yan-Hsuan Chuang <yhchuang@realtek.com>
Date: Fri, 10 Apr 2020 18:09:49 +0800
Subject: rtw88: make rtw_chip_ops::set_antenna return int

To support ieee80211_ops::set_antenna, the driver can decide if the
antenna mask is accepted, otherwise it can return an error code.
Because each chip could have different limitations, let the chip
check the mask and return.

Also the antenna mask for TRX from upper space is 32-bit long.
Change the antenna mask for rtw_chip_ops::set_antenna from u8 to u32.

Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200410100950.3199-2-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/main.h     |  9 +++++----
 drivers/net/wireless/realtek/rtw88/rtw8822b.c | 18 ++++++++++++------
 2 files changed, 17 insertions(+), 10 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtw88/main.h b/drivers/net/wireless/realtek/rtw88/main.h
index c6b590fdb573..c9edcabd7c42 100644
--- a/drivers/net/wireless/realtek/rtw88/main.h
+++ b/drivers/net/wireless/realtek/rtw88/main.h
@@ -798,8 +798,9 @@ struct rtw_chip_ops {
 	void (*set_tx_power_index)(struct rtw_dev *rtwdev);
 	int (*rsvd_page_dump)(struct rtw_dev *rtwdev, u8 *buf, u32 offset,
 			      u32 size);
-	void (*set_antenna)(struct rtw_dev *rtwdev, u8 antenna_tx,
-			    u8 antenna_rx);
+	int (*set_antenna)(struct rtw_dev *rtwdev,
+			   u32 antenna_tx,
+			   u32 antenna_rx);
 	void (*cfg_ldo25)(struct rtw_dev *rtwdev, bool enable);
 	void (*false_alarm_statistics)(struct rtw_dev *rtwdev);
 	void (*phy_calibration)(struct rtw_dev *rtwdev);
@@ -1567,8 +1568,8 @@ struct rtw_hal {
 	u8 sec_ch_offset;
 	u8 rf_type;
 	u8 rf_path_num;
-	u8 antenna_tx;
-	u8 antenna_rx;
+	u32 antenna_tx;
+	u32 antenna_rx;
 	u8 bfee_sts_cap;
 
 	/* protect tx power section */
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822b.c b/drivers/net/wireless/realtek/rtw88/rtw8822b.c
index 4dd7d4143b04..c02f3a730369 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8822b.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822b.c
@@ -998,8 +998,9 @@ static bool rtw8822b_check_rf_path(u8 antenna)
 	}
 }
 
-static void rtw8822b_set_antenna(struct rtw_dev *rtwdev, u8 antenna_tx,
-				 u8 antenna_rx)
+static int rtw8822b_set_antenna(struct rtw_dev *rtwdev,
+				u32 antenna_tx,
+				u32 antenna_rx)
 {
 	struct rtw_hal *hal = &rtwdev->hal;
 
@@ -1007,16 +1008,21 @@ static void rtw8822b_set_antenna(struct rtw_dev *rtwdev, u8 antenna_tx,
 		antenna_tx, antenna_rx);
 
 	if (!rtw8822b_check_rf_path(antenna_tx)) {
-		rtw_info(rtwdev, "unsupport tx path, set to default path ab\n");
-		antenna_tx = BB_PATH_AB;
+		rtw_info(rtwdev, "unsupport tx path 0x%x\n", antenna_tx);
+		return -EINVAL;
 	}
+
 	if (!rtw8822b_check_rf_path(antenna_rx)) {
-		rtw_info(rtwdev, "unsupport rx path, set to default path ab\n");
-		antenna_rx = BB_PATH_AB;
+		rtw_info(rtwdev, "unsupport rx path 0x%x\n", antenna_rx);
+		return -EINVAL;
 	}
+
 	hal->antenna_tx = antenna_tx;
 	hal->antenna_rx = antenna_rx;
+
 	rtw8822b_config_trx_mode(rtwdev, antenna_tx, antenna_rx, false);
+
+	return 0;
 }
 
 static void rtw8822b_cfg_ldo25(struct rtw_dev *rtwdev, bool enable)
-- 
cgit v1.2.3-59-g8ed1b


From 297bcf8222f222fd7defead862de4b8e3ea0b08a Mon Sep 17 00:00:00 2001
From: Yan-Hsuan Chuang <yhchuang@realtek.com>
Date: Fri, 10 Apr 2020 18:09:50 +0800
Subject: rtw88: add support for set/get antennas

User space program such as iw can set antenna mask for the device.
So add set antenna support by configure the trx mode.

This is useful for some tests want to see the output of different
antenna configuration (e.g. path A v.s. path B).

Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200410100950.3199-3-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/mac80211.c | 33 +++++++++++++++++++++++++
 drivers/net/wireless/realtek/rtw88/main.c     |  3 +++
 drivers/net/wireless/realtek/rtw88/rtw8822c.c | 35 +++++++++++++++++++++++++++
 3 files changed, 71 insertions(+)

diff --git a/drivers/net/wireless/realtek/rtw88/mac80211.c b/drivers/net/wireless/realtek/rtw88/mac80211.c
index d7d02e4c0184..a2e6ef4ad9ee 100644
--- a/drivers/net/wireless/realtek/rtw88/mac80211.c
+++ b/drivers/net/wireless/realtek/rtw88/mac80211.c
@@ -754,6 +754,37 @@ static int rtw_ops_set_bitrate_mask(struct ieee80211_hw *hw,
 	return 0;
 }
 
+static int rtw_ops_set_antenna(struct ieee80211_hw *hw,
+			       u32 tx_antenna,
+			       u32 rx_antenna)
+{
+	struct rtw_dev *rtwdev = hw->priv;
+	struct rtw_chip_info *chip = rtwdev->chip;
+	int ret;
+
+	if (!chip->ops->set_antenna)
+		return -EOPNOTSUPP;
+
+	mutex_lock(&rtwdev->mutex);
+	ret = chip->ops->set_antenna(rtwdev, tx_antenna, rx_antenna);
+	mutex_unlock(&rtwdev->mutex);
+
+	return ret;
+}
+
+static int rtw_ops_get_antenna(struct ieee80211_hw *hw,
+			       u32 *tx_antenna,
+			       u32 *rx_antenna)
+{
+	struct rtw_dev *rtwdev = hw->priv;
+	struct rtw_hal *hal = &rtwdev->hal;
+
+	*tx_antenna = hal->antenna_tx;
+	*rx_antenna = hal->antenna_rx;
+
+	return 0;
+}
+
 #ifdef CONFIG_PM
 static int rtw_ops_suspend(struct ieee80211_hw *hw,
 			   struct cfg80211_wowlan *wowlan)
@@ -815,6 +846,8 @@ const struct ieee80211_ops rtw_ops = {
 	.sta_statistics		= rtw_ops_sta_statistics,
 	.flush			= rtw_ops_flush,
 	.set_bitrate_mask	= rtw_ops_set_bitrate_mask,
+	.set_antenna		= rtw_ops_set_antenna,
+	.get_antenna		= rtw_ops_get_antenna,
 #ifdef CONFIG_PM
 	.suspend		= rtw_ops_suspend,
 	.resume			= rtw_ops_resume,
diff --git a/drivers/net/wireless/realtek/rtw88/main.c b/drivers/net/wireless/realtek/rtw88/main.c
index 7640e97706f5..1e1d2c774287 100644
--- a/drivers/net/wireless/realtek/rtw88/main.c
+++ b/drivers/net/wireless/realtek/rtw88/main.c
@@ -1450,6 +1450,7 @@ EXPORT_SYMBOL(rtw_core_deinit);
 
 int rtw_register_hw(struct rtw_dev *rtwdev, struct ieee80211_hw *hw)
 {
+	struct rtw_hal *hal = &rtwdev->hal;
 	int max_tx_headroom = 0;
 	int ret;
 
@@ -1478,6 +1479,8 @@ int rtw_register_hw(struct rtw_dev *rtwdev, struct ieee80211_hw *hw)
 				     BIT(NL80211_IFTYPE_AP) |
 				     BIT(NL80211_IFTYPE_ADHOC) |
 				     BIT(NL80211_IFTYPE_MESH_POINT);
+	hw->wiphy->available_antennas_tx = hal->antenna_tx;
+	hw->wiphy->available_antennas_rx = hal->antenna_rx;
 
 	hw->wiphy->flags |= WIPHY_FLAG_SUPPORTS_TDLS |
 			    WIPHY_FLAG_TDLS_EXTERNAL_SETUP;
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822c.c b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
index dc07e6be38e8..c99b1de54bfc 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8822c.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
@@ -1890,6 +1890,40 @@ static void rtw8822c_set_tx_power_index(struct rtw_dev *rtwdev)
 	}
 }
 
+static int rtw8822c_set_antenna(struct rtw_dev *rtwdev,
+				u32 antenna_tx,
+				u32 antenna_rx)
+{
+	struct rtw_hal *hal = &rtwdev->hal;
+
+	switch (antenna_tx) {
+	case BB_PATH_A:
+	case BB_PATH_B:
+	case BB_PATH_AB:
+		break;
+	default:
+		rtw_info(rtwdev, "unsupport tx path 0x%x\n", antenna_tx);
+		return -EINVAL;
+	}
+
+	/* path B only is not available for RX */
+	switch (antenna_rx) {
+	case BB_PATH_A:
+	case BB_PATH_AB:
+		break;
+	default:
+		rtw_info(rtwdev, "unsupport rx path 0x%x\n", antenna_rx);
+		return -EINVAL;
+	}
+
+	hal->antenna_tx = antenna_tx;
+	hal->antenna_rx = antenna_rx;
+
+	rtw8822c_config_trx_mode(rtwdev, antenna_tx, antenna_rx, false);
+
+	return 0;
+}
+
 static void rtw8822c_cfg_ldo25(struct rtw_dev *rtwdev, bool enable)
 {
 	u8 ldo_pwr;
@@ -3794,6 +3828,7 @@ static struct rtw_chip_ops rtw8822c_ops = {
 	.read_rf		= rtw_phy_read_rf,
 	.write_rf		= rtw_phy_write_rf_reg_mix,
 	.set_tx_power_index	= rtw8822c_set_tx_power_index,
+	.set_antenna		= rtw8822c_set_antenna,
 	.cfg_ldo25		= rtw8822c_cfg_ldo25,
 	.false_alarm_statistics	= rtw8822c_false_alarm_statistics,
 	.dpk_track		= rtw8822c_dpk_track,
-- 
cgit v1.2.3-59-g8ed1b


From 1c0e3c73e98dd55bc9551279fed6233997425c23 Mon Sep 17 00:00:00 2001
From: Jules Irenge <jbi.octave@gmail.com>
Date: Sat, 11 Apr 2020 01:19:27 +0100
Subject: hostap: Add missing annotations for prism2_bss_list_proc_start() and
 prism2_bss_list_proc_stop

Sparse reports warnings at prism2_bss_list_proc_start() and prism2_bss_list_proc_stop()

warning: context imbalance in prism2_wds_proc_stop() - unexpected unlock
warning: context imbalance in prism2_bss_list_proc_start() - wrong count at exit

The root cause is the missing annotations at prism2_bss_list_proc_start()

Add the missing __acquires(&local->lock) annotation
Add the missing __releases(&local->lock) annotation

Signed-off-by: Jules Irenge <jbi.octave@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200411001933.10072-4-jbi.octave@gmail.com
---
 drivers/net/wireless/intersil/hostap/hostap_proc.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/wireless/intersil/hostap/hostap_proc.c b/drivers/net/wireless/intersil/hostap/hostap_proc.c
index a2ee4693eaed..97c270845fd1 100644
--- a/drivers/net/wireless/intersil/hostap/hostap_proc.c
+++ b/drivers/net/wireless/intersil/hostap/hostap_proc.c
@@ -149,6 +149,7 @@ static int prism2_bss_list_proc_show(struct seq_file *m, void *v)
 }
 
 static void *prism2_bss_list_proc_start(struct seq_file *m, loff_t *_pos)
+	__acquires(&local->lock)
 {
 	local_info_t *local = PDE_DATA(file_inode(m->file));
 	spin_lock_bh(&local->lock);
@@ -162,6 +163,7 @@ static void *prism2_bss_list_proc_next(struct seq_file *m, void *v, loff_t *_pos
 }
 
 static void prism2_bss_list_proc_stop(struct seq_file *m, void *v)
+	__releases(&local->lock)
 {
 	local_info_t *local = PDE_DATA(file_inode(m->file));
 	spin_unlock_bh(&local->lock);
-- 
cgit v1.2.3-59-g8ed1b


From 2fe5efb8a475c856cd72a37fd73d82f5b5b563e0 Mon Sep 17 00:00:00 2001
From: Jules Irenge <jbi.octave@gmail.com>
Date: Sat, 11 Apr 2020 01:19:28 +0100
Subject: brcmsmac: Add missing annotation for brcms_rfkill_set_hw_state()

Sparse reports a warning at brcms_rfkill_set_hw_state()

warning: context imbalance in brcms_rfkill_set_hw_state()
	- unexpected unlock
The root cause is the missing annotation at brcms_rfkill_set_hw_state()
Add the missing __must_hold(&wl->lock) annotation

Signed-off-by: Jules Irenge <jbi.octave@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200411001933.10072-5-jbi.octave@gmail.com
---
 drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c
index 8e8b685cfe09..c3dbeacea6ca 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c
@@ -1717,6 +1717,7 @@ int brcms_check_firmwares(struct brcms_info *wl)
  * precondition: perimeter lock has been acquired
  */
 bool brcms_rfkill_set_hw_state(struct brcms_info *wl)
+	__must_hold(&wl->lock)
 {
 	bool blocked = brcms_c_check_radio_disabled(wl->wlc);
 
-- 
cgit v1.2.3-59-g8ed1b


From 40fb232c02d1b012c6c84b8c22465d01e20eddf9 Mon Sep 17 00:00:00 2001
From: Jules Irenge <jbi.octave@gmail.com>
Date: Sat, 11 Apr 2020 01:19:29 +0100
Subject: brcmsmac: Add missing annotation for brcms_down()

Sparse reports a warning at brcms_down()

warning: context imbalance in brcms_down()
	- unexpected unlock
The root cause is the missing annotation at brcms_down()
Add the missing __must_hold(&wl->lock) annotation

Signed-off-by: Jules Irenge <jbi.octave@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200411001933.10072-6-jbi.octave@gmail.com
---
 drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c
index c3dbeacea6ca..648efcbc819f 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/mac80211_if.c
@@ -1431,6 +1431,7 @@ int brcms_up(struct brcms_info *wl)
  * precondition: perimeter lock has been acquired
  */
 void brcms_down(struct brcms_info *wl)
+	__must_hold(&wl->lock)
 {
 	uint callbacks, ret_val = 0;
 
-- 
cgit v1.2.3-59-g8ed1b


From 99cd87d63c0b0724a8e4f1405107ca06c10341e8 Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Mon, 13 Apr 2020 16:20:22 +0800
Subject: libertas: make lbs_process_event() void

Fix the following coccicheck warning:

drivers/net/wireless/marvell/libertas/cmdresp.c:225:5-8: Unneeded
variable: "ret". Return "0" on line 355

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200413082022.22380-1-yanaijie@huawei.com
---
 drivers/net/wireless/marvell/libertas/cmd.h     | 2 +-
 drivers/net/wireless/marvell/libertas/cmdresp.c | 5 +----
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/marvell/libertas/cmd.h b/drivers/net/wireless/marvell/libertas/cmd.h
index 80878561cb90..3c193074662b 100644
--- a/drivers/net/wireless/marvell/libertas/cmd.h
+++ b/drivers/net/wireless/marvell/libertas/cmd.h
@@ -76,7 +76,7 @@ void lbs_mac_event_disconnected(struct lbs_private *priv,
 
 /* Events */
 
-int lbs_process_event(struct lbs_private *priv, u32 event);
+void lbs_process_event(struct lbs_private *priv, u32 event);
 
 
 /* Actual commands */
diff --git a/drivers/net/wireless/marvell/libertas/cmdresp.c b/drivers/net/wireless/marvell/libertas/cmdresp.c
index b73d08381398..cb515c5584c1 100644
--- a/drivers/net/wireless/marvell/libertas/cmdresp.c
+++ b/drivers/net/wireless/marvell/libertas/cmdresp.c
@@ -220,9 +220,8 @@ done:
 	return ret;
 }
 
-int lbs_process_event(struct lbs_private *priv, u32 event)
+void lbs_process_event(struct lbs_private *priv, u32 event)
 {
-	int ret = 0;
 	struct cmd_header cmd;
 
 	switch (event) {
@@ -351,6 +350,4 @@ int lbs_process_event(struct lbs_private *priv, u32 event)
 		netdev_alert(priv->dev, "EVENT: unknown event id %d\n", event);
 		break;
 	}
-
-	return ret;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 7b9ae69d5441d98ce55da7d651efff2c7ce27551 Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Mon, 13 Apr 2020 16:20:43 +0800
Subject: orinoco: remove useless variable 'err' in spectrum_cs_suspend()

Fix the following coccicheck warning:

drivers/net/wireless/intersil/orinoco/spectrum_cs.c:281:5-8: Unneeded
variable: "err". Return "0" on line 286

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200413082043.22468-1-yanaijie@huawei.com
---
 drivers/net/wireless/intersil/orinoco/spectrum_cs.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/wireless/intersil/orinoco/spectrum_cs.c b/drivers/net/wireless/intersil/orinoco/spectrum_cs.c
index b60048c95e0a..291ef97ed45e 100644
--- a/drivers/net/wireless/intersil/orinoco/spectrum_cs.c
+++ b/drivers/net/wireless/intersil/orinoco/spectrum_cs.c
@@ -278,12 +278,11 @@ static int
 spectrum_cs_suspend(struct pcmcia_device *link)
 {
 	struct orinoco_private *priv = link->priv;
-	int err = 0;
 
 	/* Mark the device as stopped, to block IO until later */
 	orinoco_down(priv);
 
-	return err;
+	return 0;
 }
 
 static int
-- 
cgit v1.2.3-59-g8ed1b


From e871b8bfedda84924ac5767883c80deae67a2657 Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Mon, 13 Apr 2020 16:21:26 +0800
Subject: brcmsmac: make brcms_c_stf_ss_update() void

Fix the following coccicheck warning:

drivers/net/wireless/broadcom/brcm80211/brcmsmac/stf.c:309:5-13:
Unneeded variable: "ret_code". Return "0" on line 328

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200413082126.22572-1-yanaijie@huawei.com
---
 drivers/net/wireless/broadcom/brcm80211/brcmsmac/stf.c | 7 ++-----
 drivers/net/wireless/broadcom/brcm80211/brcmsmac/stf.h | 2 +-
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/stf.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/stf.c
index 0ab865de1491..79d4a7a4da8b 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/stf.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/stf.c
@@ -304,9 +304,8 @@ int brcms_c_stf_txchain_set(struct brcms_c_info *wlc, s32 int_val, bool force)
  * update wlc->stf->ss_opmode which represents the operational stf_ss mode
  * we're using
  */
-int brcms_c_stf_ss_update(struct brcms_c_info *wlc, struct brcms_band *band)
+void brcms_c_stf_ss_update(struct brcms_c_info *wlc, struct brcms_band *band)
 {
-	int ret_code = 0;
 	u8 prev_stf_ss;
 	u8 upd_stf_ss;
 
@@ -325,7 +324,7 @@ int brcms_c_stf_ss_update(struct brcms_c_info *wlc, struct brcms_band *band)
 				    PHY_TXC1_MODE_SISO : PHY_TXC1_MODE_CDD;
 	} else {
 		if (wlc->band != band)
-			return ret_code;
+			return;
 		upd_stf_ss = (wlc->stf->txstreams == 1) ?
 				PHY_TXC1_MODE_SISO : band->band_stf_ss_mode;
 	}
@@ -333,8 +332,6 @@ int brcms_c_stf_ss_update(struct brcms_c_info *wlc, struct brcms_band *band)
 		wlc->stf->ss_opmode = upd_stf_ss;
 		brcms_b_band_stf_ss_set(wlc->hw, upd_stf_ss);
 	}
-
-	return ret_code;
 }
 
 int brcms_c_stf_attach(struct brcms_c_info *wlc)
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/stf.h b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/stf.h
index ba9493009a33..aa4ab53bf634 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/stf.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/stf.h
@@ -25,7 +25,7 @@ void brcms_c_stf_detach(struct brcms_c_info *wlc);
 void brcms_c_tempsense_upd(struct brcms_c_info *wlc);
 void brcms_c_stf_ss_algo_channel_get(struct brcms_c_info *wlc,
 				     u16 *ss_algo_channel, u16 chanspec);
-int brcms_c_stf_ss_update(struct brcms_c_info *wlc, struct brcms_band *band);
+void brcms_c_stf_ss_update(struct brcms_c_info *wlc, struct brcms_band *band);
 void brcms_c_stf_phy_txant_upd(struct brcms_c_info *wlc);
 int brcms_c_stf_txchain_set(struct brcms_c_info *wlc, s32 int_val, bool force);
 bool brcms_c_stf_stbc_rx_set(struct brcms_c_info *wlc, s32 int_val);
-- 
cgit v1.2.3-59-g8ed1b


From 5a652b49b41b1cbffe2beedbaf253f60f768fd92 Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Tue, 14 Apr 2020 20:02:51 +0800
Subject: ipw2x00: make ipw_setup_deferred_work() void

This function actually needs no return value. So remove the unneeded
variable 'ret' and make it void.

This also fixes the following coccicheck warning:

drivers/net/wireless/intel/ipw2x00/ipw2200.c:10648:5-8: Unneeded
variable: "ret". Return "0" on line 10684

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200414120251.35869-1-yanaijie@huawei.com
---
 drivers/net/wireless/intel/ipw2x00/ipw2200.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2200.c b/drivers/net/wireless/intel/ipw2x00/ipw2200.c
index 201a1eb0e2f6..923be3781c92 100644
--- a/drivers/net/wireless/intel/ipw2x00/ipw2200.c
+++ b/drivers/net/wireless/intel/ipw2x00/ipw2200.c
@@ -10640,10 +10640,8 @@ static void ipw_bg_link_down(struct work_struct *work)
 	mutex_unlock(&priv->mutex);
 }
 
-static int ipw_setup_deferred_work(struct ipw_priv *priv)
+static void ipw_setup_deferred_work(struct ipw_priv *priv)
 {
-	int ret = 0;
-
 	init_waitqueue_head(&priv->wait_command_queue);
 	init_waitqueue_head(&priv->wait_state);
 
@@ -10677,8 +10675,6 @@ static int ipw_setup_deferred_work(struct ipw_priv *priv)
 
 	tasklet_init(&priv->irq_tasklet,
 		     ipw_irq_tasklet, (unsigned long)priv);
-
-	return ret;
 }
 
 static void shim__set_security(struct net_device *dev,
@@ -11659,11 +11655,7 @@ static int ipw_pci_probe(struct pci_dev *pdev,
 	IPW_DEBUG_INFO("pci_resource_len = 0x%08x\n", length);
 	IPW_DEBUG_INFO("pci_resource_base = %p\n", base);
 
-	err = ipw_setup_deferred_work(priv);
-	if (err) {
-		IPW_ERROR("Unable to setup deferred work\n");
-		goto out_iounmap;
-	}
+	ipw_setup_deferred_work(priv);
 
 	ipw_sw_reset(priv, 1);
 
-- 
cgit v1.2.3-59-g8ed1b


From 55beec10710d10cb4a1cbbc5b1a0d9c9cfbd2c1e Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Thu, 9 Apr 2020 08:05:47 +0200
Subject: Bluetooth: Sort list of LE features constants

The list of LE features constants has gotten a bit confused. It lost the
order and gained duplicated. Clean this up.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index f4e8e2a0b7c1..ff42d05b3e72 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -460,12 +460,10 @@ enum {
 #define HCI_LE_SLAVE_FEATURES		0x08
 #define HCI_LE_PING			0x10
 #define HCI_LE_DATA_LEN_EXT		0x20
-#define HCI_LE_PHY_2M			0x01
-#define HCI_LE_PHY_CODED		0x08
-#define HCI_LE_EXT_ADV			0x10
 #define HCI_LE_EXT_SCAN_POLICY		0x80
 #define HCI_LE_PHY_2M			0x01
 #define HCI_LE_PHY_CODED		0x08
+#define HCI_LE_EXT_ADV			0x10
 #define HCI_LE_CHAN_SEL_ALG2		0x40
 #define HCI_LE_CIS_MASTER		0x10
 #define HCI_LE_CIS_SLAVE		0x20
-- 
cgit v1.2.3-59-g8ed1b


From 849c9c35e80d73c215c65b6023658b371bdeb5ed Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Thu, 9 Apr 2020 08:05:48 +0200
Subject: Bluetooth: Use extra variable to make code more readable

When starting active scanning for discovery the whitelist is not needed
to be used. So the filter_policy is 0x00. To make the core more readable
use a variable name instead of just setting 0 as paramter.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/hci_request.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 649e1e5ed446..9ea40106ef17 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -2723,6 +2723,8 @@ static int active_scan(struct hci_request *req, unsigned long opt)
 	uint16_t interval = opt;
 	struct hci_dev *hdev = req->hdev;
 	u8 own_addr_type;
+	/* White list is not used for discovery */
+	u8 filter_policy = 0x00;
 	int err;
 
 	BT_DBG("%s", hdev->name);
@@ -2744,7 +2746,7 @@ static int active_scan(struct hci_request *req, unsigned long opt)
 		own_addr_type = ADDR_LE_DEV_PUBLIC;
 
 	hci_req_start_scan(req, LE_SCAN_ACTIVE, interval, DISCOV_LE_SCAN_WIN,
-			   own_addr_type, 0);
+			   own_addr_type, filter_policy);
 	return 0;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From ff3b8df2bd758d97aa3dd7c021864be05fec9bd5 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Thu, 9 Apr 2020 08:05:49 +0200
Subject: Bluetooth: Enable LE Enhanced Connection Complete event.

In case LL Privacy is supported by the controller, it is also a good
idea to use the LE Enhanced Connection Complete event for getting all
information about the new connection and its addresses.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci.h | 1 +
 net/bluetooth/hci_core.c    | 8 ++++++++
 2 files changed, 9 insertions(+)

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index ff42d05b3e72..1da8cec8e210 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -460,6 +460,7 @@ enum {
 #define HCI_LE_SLAVE_FEATURES		0x08
 #define HCI_LE_PING			0x10
 #define HCI_LE_DATA_LEN_EXT		0x20
+#define HCI_LE_LL_PRIVACY		0x40
 #define HCI_LE_EXT_SCAN_POLICY		0x80
 #define HCI_LE_PHY_2M			0x01
 #define HCI_LE_PHY_CODED		0x08
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 589c4085499c..0d726d59a492 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -638,6 +638,14 @@ static int hci_init3_req(struct hci_request *req, unsigned long opt)
 		if (hdev->le_features[0] & HCI_LE_DATA_LEN_EXT)
 			events[0] |= 0x40;	/* LE Data Length Change */
 
+		/* If the controller supports LL Privacy feature, enable
+		 * the corresponding event.
+		 */
+		if (hdev->le_features[0] & HCI_LE_LL_PRIVACY)
+			events[1] |= 0x02;	/* LE Enhanced Connection
+						 * Complete
+						 */
+
 		/* If the controller supports Extended Scanner Filter
 		 * Policies, enable the correspondig event.
 		 */
-- 
cgit v1.2.3-59-g8ed1b


From 2eb71a3a68c387274cfc1bc43eac25138add528d Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Thu, 9 Apr 2020 08:05:50 +0200
Subject: Bluetooth: Clear HCI_LL_RPA_RESOLUTION flag on reset

When the controller is being reset or power cycled, then the flag
HCI_LL_RPA_RESOLUTION which indicates if controller based address
resolution is active needs to be also reset.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci_core.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 2f3275f1d1c4..239ab72f16c6 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -645,6 +645,7 @@ extern struct mutex hci_cb_list_lock;
 	do {							\
 		hci_dev_clear_flag(hdev, HCI_LE_SCAN);		\
 		hci_dev_clear_flag(hdev, HCI_LE_ADV);		\
+		hci_dev_clear_flag(hdev, HCI_LL_RPA_RESOLUTION);\
 		hci_dev_clear_flag(hdev, HCI_PERIODIC_INQ);	\
 	} while (0)
 
-- 
cgit v1.2.3-59-g8ed1b


From f0f383347ced96416d5e3062f8bb2b0f99ac9d5b Mon Sep 17 00:00:00 2001
From: Nils ANDRÉ-CHANG <nils@nilsand.re>
Date: Sun, 12 Apr 2020 18:19:00 +0100
Subject: brcmfmac: remove leading space
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Nils ANDRÉ-CHANG <nils@nilsand.re>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200412171900.xzedxhzd56gox5kf@nixos
---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
index b684a5b6d904..22a17ae09e94 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
@@ -961,7 +961,7 @@ static const struct sdio_device_id brcmf_sdmmc_ids[] = {
 	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43340),
 	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43341),
 	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43362),
- 	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43364),
+	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43364),
 	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_4335_4339),
 	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_4339),
 	BRCMF_SDIO_DEVICE(SDIO_DEVICE_ID_BROADCOM_43430),
-- 
cgit v1.2.3-59-g8ed1b


From 7edc9079540b65026f3d3386b3642d1820d5fed5 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Wed, 15 Apr 2020 17:35:16 +0200
Subject: Bluetooth: Enhanced Connection Complete event belongs to LL Privacy

The Enhanced Connection Complete event is use in conjunction with LL
Privacy and not Extended Advertising.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/hci_core.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 0d726d59a492..51d399273276 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -719,14 +719,6 @@ static int hci_init3_req(struct hci_request *req, unsigned long opt)
 						 * Report
 						 */
 
-		/* If the controller supports the LE Extended Create Connection
-		 * command, enable the corresponding event.
-		 */
-		if (use_ext_conn(hdev))
-			events[1] |= 0x02;      /* LE Enhanced Connection
-						 * Complete
-						 */
-
 		/* If the controller supports the LE Extended Advertising
 		 * command, enable the corresponding event.
 		 */
-- 
cgit v1.2.3-59-g8ed1b


From 34428dff3679f0c4c9b185ff8eccefd12a7f55f8 Mon Sep 17 00:00:00 2001
From: Sasha Neftin <sasha.neftin@intel.com>
Date: Mon, 10 Feb 2020 09:04:15 +0200
Subject: igc: Add GSO partial support

Partial generic segmentation offload is a hybrid between TSO and GSO.
What is effectively does is take advantage of certain traits of TCP and
tunnels so that instead of having to rewrite the packet headers for each
segment only in the inner-most transport header and possible the outer-most
network header need to be updated.
This allows devices that do not support tunnel offload or tunnels
offloads with checksum to still make use of segmentation.

Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_main.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 69fa1ce1f927..46ab035c2032 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -4727,6 +4727,16 @@ static int igc_probe(struct pci_dev *pdev,
 	netdev->features |= NETIF_F_HW_CSUM;
 	netdev->features |= NETIF_F_SCTP_CRC;
 
+#define IGC_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE | \
+				  NETIF_F_GSO_GRE_CSUM | \
+				  NETIF_F_GSO_IPXIP4 | \
+				  NETIF_F_GSO_IPXIP6 | \
+				  NETIF_F_GSO_UDP_TUNNEL | \
+				  NETIF_F_GSO_UDP_TUNNEL_CSUM)
+
+	netdev->gso_partial_features = IGC_GSO_PARTIAL_FEATURES;
+	netdev->features |= NETIF_F_GSO_PARTIAL | IGC_GSO_PARTIAL_FEATURES;
+
 	/* setup the private structure */
 	err = igc_sw_init(adapter);
 	if (err)
-- 
cgit v1.2.3-59-g8ed1b


From ec50a9d437f05dd76444a65fdd3cfbfad90ee9d6 Mon Sep 17 00:00:00 2001
From: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Date: Fri, 14 Feb 2020 15:52:02 -0800
Subject: igc: Add support for taprio offloading

Adds support for translating taprio schedules into i225 cycles. This
will allow schedules to run in the hardware, making the schedules
enforcement more precise and saving CPU time.

Right now, the only simple schedules are allowed, complex schedules are
rejected. "simple" in this context are schedules that each HW queue is
opened and closed only once in each cycle.

Changing schedules is still not supported as well.

Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Reviewed-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/Makefile      |   2 +-
 drivers/net/ethernet/intel/igc/igc.h         |   7 ++
 drivers/net/ethernet/intel/igc/igc_defines.h |  12 +++
 drivers/net/ethernet/intel/igc/igc_main.c    | 113 +++++++++++++++++++++
 drivers/net/ethernet/intel/igc/igc_regs.h    |  12 +++
 drivers/net/ethernet/intel/igc/igc_tsn.c     | 140 +++++++++++++++++++++++++++
 drivers/net/ethernet/intel/igc/igc_tsn.h     |   9 ++
 7 files changed, 294 insertions(+), 1 deletion(-)
 create mode 100644 drivers/net/ethernet/intel/igc/igc_tsn.c
 create mode 100644 drivers/net/ethernet/intel/igc/igc_tsn.h

diff --git a/drivers/net/ethernet/intel/igc/Makefile b/drivers/net/ethernet/intel/igc/Makefile
index e3c164c12e10..3652f211f351 100644
--- a/drivers/net/ethernet/intel/igc/Makefile
+++ b/drivers/net/ethernet/intel/igc/Makefile
@@ -8,4 +8,4 @@
 obj-$(CONFIG_IGC) += igc.o
 
 igc-objs := igc_main.o igc_mac.o igc_i225.o igc_base.o igc_nvm.o igc_phy.o \
-igc_ethtool.o igc_ptp.o igc_dump.o
+igc_ethtool.o igc_ptp.o igc_dump.o igc_tsn.o
diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index a1f845a2aa80..5e36822de5ec 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -70,6 +70,7 @@ extern char igc_driver_version[];
 #define IGC_FLAG_HAS_MSIX		BIT(13)
 #define IGC_FLAG_VLAN_PROMISC		BIT(15)
 #define IGC_FLAG_RX_LEGACY		BIT(16)
+#define IGC_FLAG_TSN_QBV_ENABLED	BIT(17)
 
 #define IGC_FLAG_RSS_FIELD_IPV4_UDP	BIT(6)
 #define IGC_FLAG_RSS_FIELD_IPV6_UDP	BIT(7)
@@ -287,6 +288,9 @@ struct igc_ring {
 	u8 reg_idx;                     /* physical index of the ring */
 	bool launchtime_enable;		/* true if LaunchTime is enabled */
 
+	u32 start_time;
+	u32 end_time;
+
 	/* everything past this point are written often */
 	u16 next_to_clean;
 	u16 next_to_use;
@@ -421,6 +425,9 @@ struct igc_adapter {
 	u32 max_frame_size;
 	u32 min_frame_size;
 
+	ktime_t base_time;
+	ktime_t cycle_time;
+
 	/* OS defined structs */
 	struct pci_dev *pdev;
 	/* lock for statistics */
diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
index 4ddccccf42cc..2da5a9b012af 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -377,6 +377,11 @@
 #define I225_TXPBSIZE_DEFAULT	0x04000014 /* TXPBSIZE default */
 #define IGC_RXPBS_CFG_TS_EN	0x80000000 /* Timestamp in Rx buffer */
 
+#define IGC_TXPBSIZE_TSN	0x04145145 /* 5k bytes buffer for each queue */
+
+#define IGC_DTXMXPKTSZ_TSN	0x19 /* 1600 bytes of max TX DMA packet size */
+#define IGC_DTXMXPKTSZ_DEFAULT	0x98 /* 9728-byte Jumbo frames */
+
 /* Time Sync Interrupt Causes */
 #define IGC_TSICR_SYS_WRAP	BIT(0) /* SYSTIM Wrap around. */
 #define IGC_TSICR_TXTS		BIT(1) /* Transmit Timestamp. */
@@ -431,6 +436,13 @@
 #define IGC_TSYNCTXCTL_START_SYNC		0x80000000  /* initiate sync */
 #define IGC_TSYNCTXCTL_TXSYNSIG			0x00000020  /* Sample TX tstamp in PHY sop */
 
+/* Transmit Scheduling */
+#define IGC_TQAVCTRL_TRANSMIT_MODE_TSN	0x00000001
+#define IGC_TQAVCTRL_ENHANCED_QAV	0x00000008
+
+#define IGC_TXQCTL_STRICT_CYCLE		0x00000002
+#define IGC_TXQCTL_STRICT_END		0x00000004
+
 /* Receive Checksum Control */
 #define IGC_RXCSUM_CRCOFL	0x00000800   /* CRC32 offload enable */
 #define IGC_RXCSUM_PCSD		0x00002000   /* packet checksum disabled */
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 46ab035c2032..12d672a6bc45 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -9,11 +9,13 @@
 #include <linux/udp.h>
 #include <linux/ip.h>
 #include <linux/pm_runtime.h>
+#include <net/pkt_sched.h>
 
 #include <net/ipv6.h>
 
 #include "igc.h"
 #include "igc_hw.h"
+#include "igc_tsn.h"
 
 #define DRV_VERSION	"0.0.1-k"
 #define DRV_SUMMARY	"Intel(R) 2.5G Ethernet Linux Driver"
@@ -106,6 +108,9 @@ void igc_reset(struct igc_adapter *adapter)
 	/* Re-enable PTP, where applicable. */
 	igc_ptp_reset(adapter);
 
+	/* Re-enable TSN offloading, where applicable. */
+	igc_tsn_offload_apply(adapter);
+
 	igc_get_phy_info(hw);
 }
 
@@ -4491,6 +4496,113 @@ static int igc_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
 	}
 }
 
+static bool validate_schedule(const struct tc_taprio_qopt_offload *qopt)
+{
+	int queue_uses[IGC_MAX_TX_QUEUES] = { };
+	size_t n;
+
+	if (qopt->cycle_time_extension)
+		return false;
+
+	for (n = 0; n < qopt->num_entries; n++) {
+		const struct tc_taprio_sched_entry *e;
+		int i;
+
+		e = &qopt->entries[n];
+
+		/* i225 only supports "global" frame preemption
+		 * settings.
+		 */
+		if (e->command != TC_TAPRIO_CMD_SET_GATES)
+			return false;
+
+		for (i = 0; i < IGC_MAX_TX_QUEUES; i++) {
+			if (e->gate_mask & BIT(i))
+				queue_uses[i]++;
+
+			if (queue_uses[i] > 1)
+				return false;
+		}
+	}
+
+	return true;
+}
+
+static int igc_save_qbv_schedule(struct igc_adapter *adapter,
+				 struct tc_taprio_qopt_offload *qopt)
+{
+	u32 start_time = 0, end_time = 0;
+	size_t n;
+
+	if (!qopt->enable) {
+		adapter->base_time = 0;
+		return 0;
+	}
+
+	if (adapter->base_time)
+		return -EALREADY;
+
+	if (!validate_schedule(qopt))
+		return -EINVAL;
+
+	adapter->cycle_time = qopt->cycle_time;
+	adapter->base_time = qopt->base_time;
+
+	/* FIXME: be a little smarter about cases when the gate for a
+	 * queue stays open for more than one entry.
+	 */
+	for (n = 0; n < qopt->num_entries; n++) {
+		struct tc_taprio_sched_entry *e = &qopt->entries[n];
+		int i;
+
+		end_time += e->interval;
+
+		for (i = 0; i < IGC_MAX_TX_QUEUES; i++) {
+			struct igc_ring *ring = adapter->tx_ring[i];
+
+			if (!(e->gate_mask & BIT(i)))
+				continue;
+
+			ring->start_time = start_time;
+			ring->end_time = end_time;
+		}
+
+		start_time += e->interval;
+	}
+
+	return 0;
+}
+
+static int igc_tsn_enable_qbv_scheduling(struct igc_adapter *adapter,
+					 struct tc_taprio_qopt_offload *qopt)
+{
+	struct igc_hw *hw = &adapter->hw;
+	int err;
+
+	if (hw->mac.type != igc_i225)
+		return -EOPNOTSUPP;
+
+	err = igc_save_qbv_schedule(adapter, qopt);
+	if (err)
+		return err;
+
+	return igc_tsn_offload_apply(adapter);
+}
+
+static int igc_setup_tc(struct net_device *dev, enum tc_setup_type type,
+			void *type_data)
+{
+	struct igc_adapter *adapter = netdev_priv(dev);
+
+	switch (type) {
+	case TC_SETUP_QDISC_TAPRIO:
+		return igc_tsn_enable_qbv_scheduling(adapter, type_data);
+
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 static const struct net_device_ops igc_netdev_ops = {
 	.ndo_open		= igc_open,
 	.ndo_stop		= igc_close,
@@ -4503,6 +4615,7 @@ static const struct net_device_ops igc_netdev_ops = {
 	.ndo_set_features	= igc_set_features,
 	.ndo_features_check	= igc_features_check,
 	.ndo_do_ioctl		= igc_ioctl,
+	.ndo_setup_tc		= igc_setup_tc,
 };
 
 /* PCIe configuration access */
diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h
index d4af53a80f11..6093cde2351c 100644
--- a/drivers/net/ethernet/intel/igc/igc_regs.h
+++ b/drivers/net/ethernet/intel/igc/igc_regs.h
@@ -231,6 +231,18 @@
 
 #define IGC_RXPBS	0x02404  /* Rx Packet Buffer Size - RW */
 
+/* Transmit Scheduling Registers */
+#define IGC_TQAVCTRL		0x3570
+#define IGC_TXQCTL(_n)		(0x3344 + 0x4 * (_n))
+#define IGC_BASET_L		0x3314
+#define IGC_BASET_H		0x3318
+#define IGC_QBVCYCLET		0x331C
+#define IGC_QBVCYCLET_S		0x3320
+
+#define IGC_STQT(_n)		(0x3324 + 0x4 * (_n))
+#define IGC_ENDQT(_n)		(0x3334 + 0x4 * (_n))
+#define IGC_DTXMXPKTSZ		0x355C
+
 /* System Time Registers */
 #define IGC_SYSTIML	0x0B600  /* System time register Low - RO */
 #define IGC_SYSTIMH	0x0B604  /* System time register High - RO */
diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c
new file mode 100644
index 000000000000..257fe970afe8
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_tsn.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c)  2019 Intel Corporation */
+
+#include "igc.h"
+#include "igc_tsn.h"
+
+/* Returns the TSN specific registers to their default values after
+ * TSN offloading is disabled.
+ */
+static int igc_tsn_disable_offload(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u32 tqavctrl;
+	int i;
+
+	if (!(adapter->flags & IGC_FLAG_TSN_QBV_ENABLED))
+		return 0;
+
+	adapter->cycle_time = 0;
+
+	wr32(IGC_TXPBS, I225_TXPBSIZE_DEFAULT);
+	wr32(IGC_DTXMXPKTSZ, IGC_DTXMXPKTSZ_DEFAULT);
+
+	tqavctrl = rd32(IGC_TQAVCTRL);
+	tqavctrl &= ~(IGC_TQAVCTRL_TRANSMIT_MODE_TSN |
+		      IGC_TQAVCTRL_ENHANCED_QAV);
+	wr32(IGC_TQAVCTRL, tqavctrl);
+
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		struct igc_ring *ring = adapter->tx_ring[i];
+
+		ring->start_time = 0;
+		ring->end_time = 0;
+		ring->launchtime_enable = false;
+
+		wr32(IGC_TXQCTL(i), 0);
+		wr32(IGC_STQT(i), 0);
+		wr32(IGC_ENDQT(i), NSEC_PER_SEC);
+	}
+
+	wr32(IGC_QBVCYCLET_S, NSEC_PER_SEC);
+	wr32(IGC_QBVCYCLET, NSEC_PER_SEC);
+
+	adapter->flags &= ~IGC_FLAG_TSN_QBV_ENABLED;
+
+	return 0;
+}
+
+static int igc_tsn_enable_offload(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u32 tqavctrl, baset_l, baset_h;
+	u32 sec, nsec, cycle;
+	ktime_t base_time, systim;
+	int i;
+
+	if (adapter->flags & IGC_FLAG_TSN_QBV_ENABLED)
+		return 0;
+
+	cycle = adapter->cycle_time;
+	base_time = adapter->base_time;
+
+	wr32(IGC_TSAUXC, 0);
+	wr32(IGC_DTXMXPKTSZ, IGC_DTXMXPKTSZ_TSN);
+	wr32(IGC_TXPBS, IGC_TXPBSIZE_TSN);
+
+	tqavctrl = rd32(IGC_TQAVCTRL);
+	tqavctrl |= IGC_TQAVCTRL_TRANSMIT_MODE_TSN | IGC_TQAVCTRL_ENHANCED_QAV;
+	wr32(IGC_TQAVCTRL, tqavctrl);
+
+	wr32(IGC_QBVCYCLET_S, cycle);
+	wr32(IGC_QBVCYCLET, cycle);
+
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		struct igc_ring *ring = adapter->tx_ring[i];
+		u32 txqctl = 0;
+
+		wr32(IGC_STQT(i), ring->start_time);
+		wr32(IGC_ENDQT(i), ring->end_time);
+
+		if (adapter->base_time) {
+			/* If we have a base_time we are in "taprio"
+			 * mode and we need to be strict about the
+			 * cycles: only transmit a packet if it can be
+			 * completed during that cycle.
+			 */
+			txqctl |= IGC_TXQCTL_STRICT_CYCLE |
+				IGC_TXQCTL_STRICT_END;
+		}
+
+		wr32(IGC_TXQCTL(i), txqctl);
+	}
+
+	nsec = rd32(IGC_SYSTIML);
+	sec = rd32(IGC_SYSTIMH);
+
+	systim = ktime_set(sec, nsec);
+
+	if (ktime_compare(systim, base_time) > 0) {
+		s64 n;
+
+		n = div64_s64(ktime_sub_ns(systim, base_time), cycle);
+		base_time = ktime_add_ns(base_time, (n + 1) * cycle);
+	}
+
+	baset_h = div_s64_rem(base_time, NSEC_PER_SEC, &baset_l);
+
+	wr32(IGC_BASET_H, baset_h);
+	wr32(IGC_BASET_L, baset_l);
+
+	adapter->flags |= IGC_FLAG_TSN_QBV_ENABLED;
+
+	return 0;
+}
+
+int igc_tsn_offload_apply(struct igc_adapter *adapter)
+{
+	bool is_any_enabled = adapter->base_time;
+
+	if (!(adapter->flags & IGC_FLAG_TSN_QBV_ENABLED) && !is_any_enabled)
+		return 0;
+
+	if (!is_any_enabled) {
+		int err = igc_tsn_disable_offload(adapter);
+
+		if (err < 0)
+			return err;
+
+		/* The BASET registers aren't cleared when writing
+		 * into them, force a reset if the interface is
+		 * running.
+		 */
+		if (netif_running(adapter->netdev))
+			schedule_work(&adapter->reset_task);
+
+		return 0;
+	}
+
+	return igc_tsn_enable_offload(adapter);
+}
diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.h b/drivers/net/ethernet/intel/igc/igc_tsn.h
new file mode 100644
index 000000000000..f76bc86ddccd
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_tsn.h
@@ -0,0 +1,9 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c)  2020 Intel Corporation */
+
+#ifndef _IGC_TSN_H_
+#define _IGC_TSN_H_
+
+int igc_tsn_offload_apply(struct igc_adapter *adapter);
+
+#endif /* _IGC_BASE_H */
-- 
cgit v1.2.3-59-g8ed1b


From 82faa9b799500f9e002067c6d8cb027ab12acca4 Mon Sep 17 00:00:00 2001
From: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Date: Fri, 14 Feb 2020 15:52:03 -0800
Subject: igc: Add support for ETF offloading

This adds support for ETF offloading for the i225 controller.

For i225, the LaunchTime feature is almost a subset of the Qbv
feature. The main change from the i210 is that the launchtime of each
packet is specified as an offset applied to the BASET register. BASET
is automatically incremented each cycle.

For i225, the approach chosen is to re-use most of the setup used for
taprio offloading. With a few changes:

 - The more or less obvious one is that when ETF is enabled, we should
 set add the expected launchtime to the (advanced) transmit
 descriptor;

 - The less obvious, is that when taprio offloading is not enabled, we
 add a dummy schedule (all queues are open all the time, with a cycle
 time of 1 second).

Signed-off-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Reviewed-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_defines.h |  1 +
 drivers/net/ethernet/intel/igc/igc_main.c    | 70 ++++++++++++++++++++++++++--
 drivers/net/ethernet/intel/igc/igc_tsn.c     | 19 +++++++-
 3 files changed, 86 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
index 2da5a9b012af..1b0fd2ffd08d 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -440,6 +440,7 @@
 #define IGC_TQAVCTRL_TRANSMIT_MODE_TSN	0x00000001
 #define IGC_TQAVCTRL_ENHANCED_QAV	0x00000008
 
+#define IGC_TXQCTL_QUEUE_MODE_LAUNCHT	0x00000001
 #define IGC_TXQCTL_STRICT_CYCLE		0x00000002
 #define IGC_TXQCTL_STRICT_END		0x00000004
 
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 12d672a6bc45..896b314035c9 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -869,6 +869,23 @@ static int igc_write_mc_addr_list(struct net_device *netdev)
 	return netdev_mc_count(netdev);
 }
 
+static __le32 igc_tx_launchtime(struct igc_adapter *adapter, ktime_t txtime)
+{
+	ktime_t cycle_time = adapter->cycle_time;
+	ktime_t base_time = adapter->base_time;
+	u32 launchtime;
+
+	/* FIXME: when using ETF together with taprio, we may have a
+	 * case where 'delta' is larger than the cycle_time, this may
+	 * cause problems if we don't read the current value of
+	 * IGC_BASET, as the value writen into the launchtime
+	 * descriptor field may be misinterpreted.
+	 */
+	div_s64_rem(ktime_sub_ns(txtime, base_time), cycle_time, &launchtime);
+
+	return cpu_to_le32(launchtime);
+}
+
 static void igc_tx_ctxtdesc(struct igc_ring *tx_ring,
 			    struct igc_tx_buffer *first,
 			    u32 vlan_macip_lens, u32 type_tucmd,
@@ -876,7 +893,6 @@ static void igc_tx_ctxtdesc(struct igc_ring *tx_ring,
 {
 	struct igc_adv_tx_context_desc *context_desc;
 	u16 i = tx_ring->next_to_use;
-	struct timespec64 ts;
 
 	context_desc = IGC_TX_CTXTDESC(tx_ring, i);
 
@@ -898,9 +914,12 @@ static void igc_tx_ctxtdesc(struct igc_ring *tx_ring,
 	 * should have been handled by the upper layers.
 	 */
 	if (tx_ring->launchtime_enable) {
-		ts = ktime_to_timespec64(first->skb->tstamp);
+		struct igc_adapter *adapter = netdev_priv(tx_ring->netdev);
+		ktime_t txtime = first->skb->tstamp;
+
 		first->skb->tstamp = ktime_set(0, 0);
-		context_desc->launch_time = cpu_to_le32(ts.tv_nsec / 32);
+		context_desc->launch_time = igc_tx_launchtime(adapter,
+							      txtime);
 	} else {
 		context_desc->launch_time = 0;
 	}
@@ -4496,6 +4515,32 @@ static int igc_ioctl(struct net_device *netdev, struct ifreq *ifr, int cmd)
 	}
 }
 
+static int igc_save_launchtime_params(struct igc_adapter *adapter, int queue,
+				      bool enable)
+{
+	struct igc_ring *ring;
+	int i;
+
+	if (queue < 0 || queue >= adapter->num_tx_queues)
+		return -EINVAL;
+
+	ring = adapter->tx_ring[queue];
+	ring->launchtime_enable = enable;
+
+	if (adapter->base_time)
+		return 0;
+
+	adapter->cycle_time = NSEC_PER_SEC;
+
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		ring = adapter->tx_ring[i];
+		ring->start_time = 0;
+		ring->end_time = NSEC_PER_SEC;
+	}
+
+	return 0;
+}
+
 static bool validate_schedule(const struct tc_taprio_qopt_offload *qopt)
 {
 	int queue_uses[IGC_MAX_TX_QUEUES] = { };
@@ -4528,6 +4573,22 @@ static bool validate_schedule(const struct tc_taprio_qopt_offload *qopt)
 	return true;
 }
 
+static int igc_tsn_enable_launchtime(struct igc_adapter *adapter,
+				     struct tc_etf_qopt_offload *qopt)
+{
+	struct igc_hw *hw = &adapter->hw;
+	int err;
+
+	if (hw->mac.type != igc_i225)
+		return -EOPNOTSUPP;
+
+	err = igc_save_launchtime_params(adapter, qopt->queue, qopt->enable);
+	if (err)
+		return err;
+
+	return igc_tsn_offload_apply(adapter);
+}
+
 static int igc_save_qbv_schedule(struct igc_adapter *adapter,
 				 struct tc_taprio_qopt_offload *qopt)
 {
@@ -4598,6 +4659,9 @@ static int igc_setup_tc(struct net_device *dev, enum tc_setup_type type,
 	case TC_SETUP_QDISC_TAPRIO:
 		return igc_tsn_enable_qbv_scheduling(adapter, type_data);
 
+	case TC_SETUP_QDISC_ETF:
+		return igc_tsn_enable_launchtime(adapter, type_data);
+
 	default:
 		return -EOPNOTSUPP;
 	}
diff --git a/drivers/net/ethernet/intel/igc/igc_tsn.c b/drivers/net/ethernet/intel/igc/igc_tsn.c
index 257fe970afe8..174103c4bea6 100644
--- a/drivers/net/ethernet/intel/igc/igc_tsn.c
+++ b/drivers/net/ethernet/intel/igc/igc_tsn.c
@@ -4,6 +4,20 @@
 #include "igc.h"
 #include "igc_tsn.h"
 
+static bool is_any_launchtime(struct igc_adapter *adapter)
+{
+	int i;
+
+	for (i = 0; i < adapter->num_tx_queues; i++) {
+		struct igc_ring *ring = adapter->tx_ring[i];
+
+		if (ring->launchtime_enable)
+			return true;
+	}
+
+	return false;
+}
+
 /* Returns the TSN specific registers to their default values after
  * TSN offloading is disabled.
  */
@@ -88,6 +102,9 @@ static int igc_tsn_enable_offload(struct igc_adapter *adapter)
 				IGC_TXQCTL_STRICT_END;
 		}
 
+		if (ring->launchtime_enable)
+			txqctl |= IGC_TXQCTL_QUEUE_MODE_LAUNCHT;
+
 		wr32(IGC_TXQCTL(i), txqctl);
 	}
 
@@ -115,7 +132,7 @@ static int igc_tsn_enable_offload(struct igc_adapter *adapter)
 
 int igc_tsn_offload_apply(struct igc_adapter *adapter)
 {
-	bool is_any_enabled = adapter->base_time;
+	bool is_any_enabled = adapter->base_time || is_any_launchtime(adapter);
 
 	if (!(adapter->flags & IGC_FLAG_TSN_QBV_ENABLED) && !is_any_enabled)
 		return 0;
-- 
cgit v1.2.3-59-g8ed1b


From a34c7f5156654ebaf7eaace102938be7ff7036cb Mon Sep 17 00:00:00 2001
From: Kees Cook <keescook@chromium.org>
Date: Wed, 19 Feb 2020 22:23:02 -0800
Subject: e1000: Distribute switch variables for initialization
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Variables declared in a switch statement before any case statements
cannot be automatically initialized with compiler instrumentation (as
they are not part of any execution flow). With GCC's proposed automatic
stack variable initialization feature, this triggers a warning (and they
don't get initialized). Clang's automatic stack variable initialization
(via CONFIG_INIT_STACK_ALL=y) doesn't throw a warning, but it also
doesn't initialize such variables[1]. Note that these warnings (or silent
skipping) happen before the dead-store elimination optimization phase,
so even when the automatic initializations are later elided in favor of
direct initializations, the warnings remain.

To avoid these problems, move such variables into the "case" where
they're used or lift them up into the main function body.

drivers/net/ethernet/intel/e1000/e1000_main.c: In function ‘e1000_xmit_frame’:
drivers/net/ethernet/intel/e1000/e1000_main.c:3143:18: warning: statement will never be executed [-Wswitch-unreachable]
 3143 |     unsigned int pull_size;
      |                  ^~~~~~~~~

[1] https://bugs.llvm.org/show_bug.cgi?id=44916

Signed-off-by: Kees Cook <keescook@chromium.org>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/e1000/e1000_main.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c
index f7103356ef56..ac5146d53c4c 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_main.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_main.c
@@ -3136,8 +3136,9 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,
 		hdr_len = skb_transport_offset(skb) + tcp_hdrlen(skb);
 		if (skb->data_len && hdr_len == len) {
 			switch (hw->mac_type) {
+			case e1000_82544: {
 				unsigned int pull_size;
-			case e1000_82544:
+
 				/* Make sure we have room to chop off 4 bytes,
 				 * and that the end alignment will work out to
 				 * this hardware's requirements
@@ -3158,6 +3159,7 @@ static netdev_tx_t e1000_xmit_frame(struct sk_buff *skb,
 				}
 				len = skb_headlen(skb);
 				break;
+			}
 			default:
 				/* do nothing */
 				break;
-- 
cgit v1.2.3-59-g8ed1b


From f1fd45598205b3eb52779f5d92b7df9d42fb755b Mon Sep 17 00:00:00 2001
From: Sasha Neftin <sasha.neftin@intel.com>
Date: Thu, 27 Feb 2020 18:57:23 +0200
Subject: igc: Remove unused MDIC_DEST mask

Formally Destination bit should be kept reserved to
support legacy drivers and ignore on write/read
operation
Not applicable for i225 parts

Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_defines.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
index 1b0fd2ffd08d..d6e07f81ca4c 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -510,7 +510,6 @@
 #define IGC_MDIC_READY		0x10000000
 #define IGC_MDIC_INT_EN		0x20000000
 #define IGC_MDIC_ERROR		0x40000000
-#define IGC_MDIC_DEST		0x80000000
 
 #define IGC_N0_QUEUE		-1
 
-- 
cgit v1.2.3-59-g8ed1b


From 3d1ce3fa83917b19e150352c5a0f9c6324b2da9b Mon Sep 17 00:00:00 2001
From: Sasha Neftin <sasha.neftin@intel.com>
Date: Thu, 27 Feb 2020 19:19:12 +0200
Subject: igc: Remove unused CTRL_EXT_LINK_MODE_MASK

We support only copper mode
Not applicable for i225 parts

Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_defines.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
index d6e07f81ca4c..40d6f557079b 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -44,9 +44,6 @@
 /* Wake Up Packet Memory stores the first 128 bytes of the wake up packet */
 #define IGC_WUPM_BYTES	128
 
-/* Physical Func Reset Done Indication */
-#define IGC_CTRL_EXT_LINK_MODE_MASK	0x00C00000
-
 /* Loop limit on how long we wait for auto-negotiation to complete */
 #define COPPER_LINK_UP_LIMIT		10
 #define PHY_AUTO_NEG_LIMIT		45
-- 
cgit v1.2.3-59-g8ed1b


From 89d35511f38da851c71c3ad9d2b8197ee34e0846 Mon Sep 17 00:00:00 2001
From: Sasha Neftin <sasha.neftin@intel.com>
Date: Fri, 28 Feb 2020 02:25:15 +0200
Subject: igc: Remove forward declaration

Move igc_adapter and igc_ring structures up to avoid
forward declaration
It is not necessary to forward declare these structures

Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc.h | 391 +++++++++++++++++------------------
 1 file changed, 194 insertions(+), 197 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index 5e36822de5ec..c7b0afd370d4 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -19,8 +19,200 @@
 /* forward declaration */
 void igc_set_ethtool_ops(struct net_device *);
 
-struct igc_adapter;
-struct igc_ring;
+/* Transmit and receive queues */
+#define IGC_MAX_RX_QUEUES		4
+#define IGC_MAX_TX_QUEUES		4
+
+#define MAX_Q_VECTORS			8
+#define MAX_STD_JUMBO_FRAME_SIZE	9216
+
+#define MAX_ETYPE_FILTER		(4 - 1)
+#define IGC_RETA_SIZE			128
+
+struct igc_tx_queue_stats {
+	u64 packets;
+	u64 bytes;
+	u64 restart_queue;
+	u64 restart_queue2;
+};
+
+struct igc_rx_queue_stats {
+	u64 packets;
+	u64 bytes;
+	u64 drops;
+	u64 csum_err;
+	u64 alloc_failed;
+};
+
+struct igc_rx_packet_stats {
+	u64 ipv4_packets;      /* IPv4 headers processed */
+	u64 ipv4e_packets;     /* IPv4E headers with extensions processed */
+	u64 ipv6_packets;      /* IPv6 headers processed */
+	u64 ipv6e_packets;     /* IPv6E headers with extensions processed */
+	u64 tcp_packets;       /* TCP headers processed */
+	u64 udp_packets;       /* UDP headers processed */
+	u64 sctp_packets;      /* SCTP headers processed */
+	u64 nfs_packets;       /* NFS headers processe */
+	u64 other_packets;
+};
+
+struct igc_ring_container {
+	struct igc_ring *ring;          /* pointer to linked list of rings */
+	unsigned int total_bytes;       /* total bytes processed this int */
+	unsigned int total_packets;     /* total packets processed this int */
+	u16 work_limit;                 /* total work allowed per interrupt */
+	u8 count;                       /* total number of rings in vector */
+	u8 itr;                         /* current ITR setting for ring */
+};
+
+struct igc_ring {
+	struct igc_q_vector *q_vector;  /* backlink to q_vector */
+	struct net_device *netdev;      /* back pointer to net_device */
+	struct device *dev;             /* device for dma mapping */
+	union {                         /* array of buffer info structs */
+		struct igc_tx_buffer *tx_buffer_info;
+		struct igc_rx_buffer *rx_buffer_info;
+	};
+	void *desc;                     /* descriptor ring memory */
+	unsigned long flags;            /* ring specific flags */
+	void __iomem *tail;             /* pointer to ring tail register */
+	dma_addr_t dma;                 /* phys address of the ring */
+	unsigned int size;              /* length of desc. ring in bytes */
+
+	u16 count;                      /* number of desc. in the ring */
+	u8 queue_index;                 /* logical index of the ring*/
+	u8 reg_idx;                     /* physical index of the ring */
+	bool launchtime_enable;         /* true if LaunchTime is enabled */
+
+	u32 start_time;
+	u32 end_time;
+
+	/* everything past this point are written often */
+	u16 next_to_clean;
+	u16 next_to_use;
+	u16 next_to_alloc;
+
+	union {
+		/* TX */
+		struct {
+			struct igc_tx_queue_stats tx_stats;
+			struct u64_stats_sync tx_syncp;
+			struct u64_stats_sync tx_syncp2;
+		};
+		/* RX */
+		struct {
+			struct igc_rx_queue_stats rx_stats;
+			struct igc_rx_packet_stats pkt_stats;
+			struct u64_stats_sync rx_syncp;
+			struct sk_buff *skb;
+		};
+	};
+} ____cacheline_internodealigned_in_smp;
+
+/* Board specific private data structure */
+struct igc_adapter {
+	struct net_device *netdev;
+
+	unsigned long state;
+	unsigned int flags;
+	unsigned int num_q_vectors;
+
+	struct msix_entry *msix_entries;
+
+	/* TX */
+	u16 tx_work_limit;
+	u32 tx_timeout_count;
+	int num_tx_queues;
+	struct igc_ring *tx_ring[IGC_MAX_TX_QUEUES];
+
+	/* RX */
+	int num_rx_queues;
+	struct igc_ring *rx_ring[IGC_MAX_RX_QUEUES];
+
+	struct timer_list watchdog_timer;
+	struct timer_list dma_err_timer;
+	struct timer_list phy_info_timer;
+
+	u32 wol;
+	u32 en_mng_pt;
+	u16 link_speed;
+	u16 link_duplex;
+
+	u8 port_num;
+
+	u8 __iomem *io_addr;
+	/* Interrupt Throttle Rate */
+	u32 rx_itr_setting;
+	u32 tx_itr_setting;
+
+	struct work_struct reset_task;
+	struct work_struct watchdog_task;
+	struct work_struct dma_err_task;
+	bool fc_autoneg;
+
+	u8 tx_timeout_factor;
+
+	int msg_enable;
+	u32 max_frame_size;
+	u32 min_frame_size;
+
+	ktime_t base_time;
+	ktime_t cycle_time;
+
+	/* OS defined structs */
+	struct pci_dev *pdev;
+	/* lock for statistics */
+	spinlock_t stats64_lock;
+	struct rtnl_link_stats64 stats64;
+
+	/* structs defined in igc_hw.h */
+	struct igc_hw hw;
+	struct igc_hw_stats stats;
+
+	struct igc_q_vector *q_vector[MAX_Q_VECTORS];
+	u32 eims_enable_mask;
+	u32 eims_other;
+
+	u16 tx_ring_count;
+	u16 rx_ring_count;
+
+	u32 tx_hwtstamp_timeouts;
+	u32 tx_hwtstamp_skipped;
+	u32 rx_hwtstamp_cleared;
+
+	u32 rss_queues;
+	u32 rss_indir_tbl_init;
+
+	/* RX network flow classification support */
+	struct hlist_head nfc_filter_list;
+	struct hlist_head cls_flower_list;
+	unsigned int nfc_filter_count;
+
+	/* lock for RX network flow classification filter */
+	spinlock_t nfc_lock;
+	bool etype_bitmap[MAX_ETYPE_FILTER];
+
+	struct igc_mac_addr *mac_table;
+
+	u8 rss_indir_tbl[IGC_RETA_SIZE];
+
+	unsigned long link_check_timeout;
+	struct igc_info ei;
+
+	struct ptp_clock *ptp_clock;
+	struct ptp_clock_info ptp_caps;
+	struct work_struct ptp_tx_work;
+	struct sk_buff *ptp_tx_skb;
+	struct hwtstamp_config tstamp_config;
+	unsigned long ptp_tx_start;
+	unsigned long last_rx_ptp_check;
+	unsigned long last_rx_timestamp;
+	unsigned int ptp_flags;
+	/* System time value lock */
+	spinlock_t tmreg_lock;
+	struct cyclecounter cc;
+	struct timecounter tc;
+};
 
 void igc_up(struct igc_adapter *adapter);
 void igc_down(struct igc_adapter *adapter);
@@ -50,7 +242,6 @@ extern char igc_driver_name[];
 extern char igc_driver_version[];
 
 #define IGC_REGS_LEN			740
-#define IGC_RETA_SIZE			128
 
 /* flags controlling PTP/1588 function */
 #define IGC_PTP_ENABLED		BIT(0)
@@ -100,13 +291,6 @@ extern char igc_driver_version[];
 #define IGC_MIN_RXD		80
 #define IGC_MAX_RXD		4096
 
-/* Transmit and receive queues */
-#define IGC_MAX_RX_QUEUES		4
-#define IGC_MAX_TX_QUEUES		4
-
-#define MAX_Q_VECTORS			8
-#define MAX_STD_JUMBO_FRAME_SIZE	9216
-
 /* Supported Rx Buffer Sizes */
 #define IGC_RXBUFFER_256		256
 #define IGC_RXBUFFER_2048		2048
@@ -233,86 +417,6 @@ struct igc_rx_buffer {
 	__u16 pagecnt_bias;
 };
 
-struct igc_tx_queue_stats {
-	u64 packets;
-	u64 bytes;
-	u64 restart_queue;
-	u64 restart_queue2;
-};
-
-struct igc_rx_queue_stats {
-	u64 packets;
-	u64 bytes;
-	u64 drops;
-	u64 csum_err;
-	u64 alloc_failed;
-};
-
-struct igc_rx_packet_stats {
-	u64 ipv4_packets;      /* IPv4 headers processed */
-	u64 ipv4e_packets;     /* IPv4E headers with extensions processed */
-	u64 ipv6_packets;      /* IPv6 headers processed */
-	u64 ipv6e_packets;     /* IPv6E headers with extensions processed */
-	u64 tcp_packets;       /* TCP headers processed */
-	u64 udp_packets;       /* UDP headers processed */
-	u64 sctp_packets;      /* SCTP headers processed */
-	u64 nfs_packets;       /* NFS headers processe */
-	u64 other_packets;
-};
-
-struct igc_ring_container {
-	struct igc_ring *ring;          /* pointer to linked list of rings */
-	unsigned int total_bytes;       /* total bytes processed this int */
-	unsigned int total_packets;     /* total packets processed this int */
-	u16 work_limit;                 /* total work allowed per interrupt */
-	u8 count;                       /* total number of rings in vector */
-	u8 itr;                         /* current ITR setting for ring */
-};
-
-struct igc_ring {
-	struct igc_q_vector *q_vector;  /* backlink to q_vector */
-	struct net_device *netdev;      /* back pointer to net_device */
-	struct device *dev;             /* device for dma mapping */
-	union {                         /* array of buffer info structs */
-		struct igc_tx_buffer *tx_buffer_info;
-		struct igc_rx_buffer *rx_buffer_info;
-	};
-	void *desc;                     /* descriptor ring memory */
-	unsigned long flags;            /* ring specific flags */
-	void __iomem *tail;             /* pointer to ring tail register */
-	dma_addr_t dma;                 /* phys address of the ring */
-	unsigned int size;              /* length of desc. ring in bytes */
-
-	u16 count;                      /* number of desc. in the ring */
-	u8 queue_index;                 /* logical index of the ring*/
-	u8 reg_idx;                     /* physical index of the ring */
-	bool launchtime_enable;		/* true if LaunchTime is enabled */
-
-	u32 start_time;
-	u32 end_time;
-
-	/* everything past this point are written often */
-	u16 next_to_clean;
-	u16 next_to_use;
-	u16 next_to_alloc;
-
-	union {
-		/* TX */
-		struct {
-			struct igc_tx_queue_stats tx_stats;
-			struct u64_stats_sync tx_syncp;
-			struct u64_stats_sync tx_syncp2;
-		};
-		/* RX */
-		struct {
-			struct igc_rx_queue_stats rx_stats;
-			struct igc_rx_packet_stats pkt_stats;
-			struct u64_stats_sync rx_syncp;
-			struct sk_buff *skb;
-		};
-	};
-} ____cacheline_internodealigned_in_smp;
-
 struct igc_q_vector {
 	struct igc_adapter *adapter;    /* backlink */
 	void __iomem *itr_register;
@@ -333,8 +437,6 @@ struct igc_q_vector {
 	struct igc_ring ring[] ____cacheline_internodealigned_in_smp;
 };
 
-#define MAX_ETYPE_FILTER		(4 - 1)
-
 enum igc_filter_match_flags {
 	IGC_FILTER_FLAG_ETHER_TYPE =	0x1,
 	IGC_FILTER_FLAG_VLAN_TCI   =	0x2,
@@ -378,111 +480,6 @@ struct igc_mac_addr {
 
 #define IGC_MAX_RXNFC_FILTERS		16
 
-/* Board specific private data structure */
-struct igc_adapter {
-	struct net_device *netdev;
-
-	unsigned long state;
-	unsigned int flags;
-	unsigned int num_q_vectors;
-
-	struct msix_entry *msix_entries;
-
-	/* TX */
-	u16 tx_work_limit;
-	u32 tx_timeout_count;
-	int num_tx_queues;
-	struct igc_ring *tx_ring[IGC_MAX_TX_QUEUES];
-
-	/* RX */
-	int num_rx_queues;
-	struct igc_ring *rx_ring[IGC_MAX_RX_QUEUES];
-
-	struct timer_list watchdog_timer;
-	struct timer_list dma_err_timer;
-	struct timer_list phy_info_timer;
-
-	u32 wol;
-	u32 en_mng_pt;
-	u16 link_speed;
-	u16 link_duplex;
-
-	u8 port_num;
-
-	u8 __iomem *io_addr;
-	/* Interrupt Throttle Rate */
-	u32 rx_itr_setting;
-	u32 tx_itr_setting;
-
-	struct work_struct reset_task;
-	struct work_struct watchdog_task;
-	struct work_struct dma_err_task;
-	bool fc_autoneg;
-
-	u8 tx_timeout_factor;
-
-	int msg_enable;
-	u32 max_frame_size;
-	u32 min_frame_size;
-
-	ktime_t base_time;
-	ktime_t cycle_time;
-
-	/* OS defined structs */
-	struct pci_dev *pdev;
-	/* lock for statistics */
-	spinlock_t stats64_lock;
-	struct rtnl_link_stats64 stats64;
-
-	/* structs defined in igc_hw.h */
-	struct igc_hw hw;
-	struct igc_hw_stats stats;
-
-	struct igc_q_vector *q_vector[MAX_Q_VECTORS];
-	u32 eims_enable_mask;
-	u32 eims_other;
-
-	u16 tx_ring_count;
-	u16 rx_ring_count;
-
-	u32 tx_hwtstamp_timeouts;
-	u32 tx_hwtstamp_skipped;
-	u32 rx_hwtstamp_cleared;
-
-	u32 rss_queues;
-	u32 rss_indir_tbl_init;
-
-	/* RX network flow classification support */
-	struct hlist_head nfc_filter_list;
-	struct hlist_head cls_flower_list;
-	unsigned int nfc_filter_count;
-
-	/* lock for RX network flow classification filter */
-	spinlock_t nfc_lock;
-	bool etype_bitmap[MAX_ETYPE_FILTER];
-
-	struct igc_mac_addr *mac_table;
-
-	u8 rss_indir_tbl[IGC_RETA_SIZE];
-
-	unsigned long link_check_timeout;
-	struct igc_info ei;
-
-	struct ptp_clock *ptp_clock;
-	struct ptp_clock_info ptp_caps;
-	struct work_struct ptp_tx_work;
-	struct sk_buff *ptp_tx_skb;
-	struct hwtstamp_config tstamp_config;
-	unsigned long ptp_tx_start;
-	unsigned long last_rx_ptp_check;
-	unsigned long last_rx_timestamp;
-	unsigned int ptp_flags;
-	/* System time value lock */
-	spinlock_t tmreg_lock;
-	struct cyclecounter cc;
-	struct timecounter tc;
-};
-
 /* igc_desc_unused - calculate if we have unused descriptors */
 static inline u16 igc_desc_unused(const struct igc_ring *ring)
 {
-- 
cgit v1.2.3-59-g8ed1b


From b8a61ea15cdf8524f91dbad90a4f1fae13b0120b Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Fri, 28 Feb 2020 00:19:57 -0800
Subject: igc: Fix overwrites when dumping registers

This patch fixes some register overwriting when dumping registers via
ethtool.

We have a total of 16 RAL registers, starting at offset 139. So RAH
offset should be 139 + 16 = 155, not 145. As result some RAL registers
are overwritten. Likewise, RAH registers are also overwritten by TDBAL,
TDBAH, TDLEN, and TDH registers.

To fix this bug while preserving the ABI, this patch re-writes RAL and
RAH registers at the end of 'regs_buff' and bumps regs->version. It also
removes some pointless comments in the middle of igc_set_regs().

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_ethtool.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index f530fc29b074..ff2a40496e4e 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -153,7 +153,7 @@ static void igc_get_regs(struct net_device *netdev,
 
 	memset(p, 0, IGC_REGS_LEN * sizeof(u32));
 
-	regs->version = (1u << 24) | (hw->revision_id << 16) | hw->device_id;
+	regs->version = (2u << 24) | (hw->revision_id << 16) | hw->device_id;
 
 	/* General Registers */
 	regs_buff[0] = rd32(IGC_CTRL);
@@ -306,6 +306,15 @@ static void igc_get_regs(struct net_device *netdev,
 		regs_buff[164 + i] = rd32(IGC_TDT(i));
 	for (i = 0; i < 4; i++)
 		regs_buff[168 + i] = rd32(IGC_TXDCTL(i));
+
+	/* XXX: Due to a bug few lines above, RAL and RAH registers are
+	 * overwritten. To preserve the ABI, we write these registers again in
+	 * regs_buff.
+	 */
+	for (i = 0; i < 16; i++)
+		regs_buff[172 + i] = rd32(IGC_RAL(i));
+	for (i = 0; i < 16; i++)
+		regs_buff[188 + i] = rd32(IGC_RAH(i));
 }
 
 static void igc_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
-- 
cgit v1.2.3-59-g8ed1b


From 64900e8ff551dd6ae891651b6d74789378472ee1 Mon Sep 17 00:00:00 2001
From: Sasha Neftin <sasha.neftin@intel.com>
Date: Fri, 28 Feb 2020 19:50:07 +0200
Subject: igc: Fix double definition

IGC_START_ITR has beed defined twice
This patch come to fix it

Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc.h | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index c7b0afd370d4..4643f358b843 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -246,9 +246,6 @@ extern char igc_driver_version[];
 /* flags controlling PTP/1588 function */
 #define IGC_PTP_ENABLED		BIT(0)
 
-/* Interrupt defines */
-#define IGC_START_ITR			648 /* ~6000 ints/sec */
-
 /* Flags definitions */
 #define IGC_FLAG_HAS_MSI		BIT(0)
 #define IGC_FLAG_QUEUE_PAIRS		BIT(3)
@@ -270,6 +267,7 @@ extern char igc_driver_version[];
 #define IGC_MRQC_RSS_FIELD_IPV4_UDP	0x00400000
 #define IGC_MRQC_RSS_FIELD_IPV6_UDP	0x00800000
 
+/* Interrupt defines */
 #define IGC_START_ITR			648 /* ~6000 ints/sec */
 #define IGC_4K_ITR			980
 #define IGC_20K_ITR			196
-- 
cgit v1.2.3-59-g8ed1b


From 635071e2c48d4a9261a0df8401155dbd959efd3d Mon Sep 17 00:00:00 2001
From: Sasha Neftin <sasha.neftin@intel.com>
Date: Tue, 3 Mar 2020 02:28:08 +0200
Subject: igc: Enable NETIF_F_HW_TC flag

This assignment of the feature NETIF_F_HW_TC
occurs prior to the initial setup of the local
hw_features variable.

This ensures that NETIF_F_HW_TC are marked
as user changeable, and also enables it by
default when the driver loads.

Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 896b314035c9..800268da0834 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -4903,6 +4903,7 @@ static int igc_probe(struct pci_dev *pdev,
 	netdev->features |= NETIF_F_RXCSUM;
 	netdev->features |= NETIF_F_HW_CSUM;
 	netdev->features |= NETIF_F_SCTP_CRC;
+	netdev->features |= NETIF_F_HW_TC;
 
 #define IGC_GSO_PARTIAL_FEATURES (NETIF_F_GSO_GRE | \
 				  NETIF_F_GSO_GRE_CSUM | \
-- 
cgit v1.2.3-59-g8ed1b


From 2e39d2c8ff9654ba508c973ade5df332f53f41cc Mon Sep 17 00:00:00 2001
From: Sasha Neftin <sasha.neftin@intel.com>
Date: Tue, 3 Mar 2020 20:26:10 +0200
Subject: igc: Remove copper fiber switch control

i225 device support copper mode only
PHY signal detect indication for copper fiber switch
not applicable to i225 part

Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_defines.h | 2 --
 drivers/net/ethernet/intel/igc/igc_main.c    | 9 ---------
 2 files changed, 11 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
index 40d6f557079b..42fe4d75cc0d 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -91,8 +91,6 @@
 #define IGC_CTRL_RFCE		0x08000000  /* Receive Flow Control enable */
 #define IGC_CTRL_TFCE		0x10000000  /* Transmit flow control enable */
 
-#define IGC_CONNSW_AUTOSENSE_EN	0x1
-
 /* As per the EAS the maximum supported size is 9.5KB (9728 bytes) */
 #define MAX_JUMBO_FRAME_SIZE	0x2600
 
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 800268da0834..44366c1bec19 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -4033,7 +4033,6 @@ static void igc_watchdog_task(struct work_struct *work)
 	struct igc_hw *hw = &adapter->hw;
 	struct igc_phy_info *phy = &hw->phy;
 	u16 phy_data, retry_count = 20;
-	u32 connsw;
 	u32 link;
 	int i;
 
@@ -4046,14 +4045,6 @@ static void igc_watchdog_task(struct work_struct *work)
 			link = false;
 	}
 
-	/* Force link down if we have fiber to swap to */
-	if (adapter->flags & IGC_FLAG_MAS_ENABLE) {
-		if (hw->phy.media_type == igc_media_type_copper) {
-			connsw = rd32(IGC_CONNSW);
-			if (!(connsw & IGC_CONNSW_AUTOSENSE_EN))
-				link = 0;
-		}
-	}
 	if (link) {
 		/* Cancel scheduled suspend requests. */
 		pm_runtime_resume(netdev->dev.parent);
-- 
cgit v1.2.3-59-g8ed1b


From 27945ebe5b980f796fa04dd61511796ac5b80cc2 Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Fri, 6 Mar 2020 15:54:03 -0800
Subject: igc: Fix NFC queue redirection support

The support for ethtool Network Flow Classification (NFC) queue
redirection based on destination MAC address is currently broken in IGC.
For instance, if we add the following rule, matching frames aren't
enqueued on the expected rx queue.

$ ethtool -N IFNAME flow-type ether dst 3c:fd:fe:9e:7f:71 queue 2

The issue here is due to the fact that igc_rar_set_index() is missing
code to enable the queue selection feature from Receive Address High
(RAH) register. This patch adds the missing code and fixes the issue.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Acked-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_defines.h |  5 ++++-
 drivers/net/ethernet/intel/igc/igc_main.c    | 11 ++++++++---
 2 files changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
index 42fe4d75cc0d..af0c03d77a39 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -63,8 +63,11 @@
  * (RAR[15]) for our directed address used by controllers with
  * manageability enabled, allowing us room for 15 multicast addresses.
  */
+#define IGC_RAH_QSEL_MASK	0x000C0000
+#define IGC_RAH_QSEL_SHIFT	18
+#define IGC_RAH_QSEL_ENABLE	BIT(28)
 #define IGC_RAH_AV		0x80000000 /* Receive descriptor valid */
-#define IGC_RAH_POOL_1		0x00040000
+
 #define IGC_RAL_MAC_ADDR_LEN	4
 #define IGC_RAH_MAC_ADDR_LEN	2
 
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 44366c1bec19..85df9366e172 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -780,13 +780,18 @@ static void igc_rar_set_index(struct igc_adapter *adapter, u32 index)
 	rar_low = le32_to_cpup((__le32 *)(addr));
 	rar_high = le16_to_cpup((__le16 *)(addr + 4));
 
+	if (adapter->mac_table[index].state & IGC_MAC_STATE_QUEUE_STEERING) {
+		u8 queue = adapter->mac_table[index].queue;
+		u32 qsel = IGC_RAH_QSEL_MASK & (queue << IGC_RAH_QSEL_SHIFT);
+
+		rar_high |= qsel;
+		rar_high |= IGC_RAH_QSEL_ENABLE;
+	}
+
 	/* Indicate to hardware the Address is Valid. */
 	if (adapter->mac_table[index].state & IGC_MAC_STATE_IN_USE) {
 		if (is_valid_ether_addr(addr))
 			rar_high |= IGC_RAH_AV;
-
-		rar_high |= IGC_RAH_POOL_1 <<
-			adapter->mac_table[index].queue;
 	}
 
 	wr32(IGC_RAL(index), rar_low);
-- 
cgit v1.2.3-59-g8ed1b


From c24fd2481e0bd3d2c5755c7a3dc898ef249c0ddb Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Fri, 6 Mar 2020 16:36:42 -0800
Subject: igc: Remove dead code related to flower filter

IGC driver has no support for tc-flower filters so this patch removes
some leftover code, probably copied from IGB driver by mistake.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc.h      | 1 -
 drivers/net/ethernet/intel/igc/igc_main.c | 3 ---
 2 files changed, 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index 4643f358b843..5f21dcfe99ce 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -185,7 +185,6 @@ struct igc_adapter {
 
 	/* RX network flow classification support */
 	struct hlist_head nfc_filter_list;
-	struct hlist_head cls_flower_list;
 	unsigned int nfc_filter_count;
 
 	/* lock for RX network flow classification filter */
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 85df9366e172..6acb85842d0a 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -3487,9 +3487,6 @@ static void igc_nfc_filter_exit(struct igc_adapter *adapter)
 	hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node)
 		igc_erase_filter(adapter, rule);
 
-	hlist_for_each_entry(rule, &adapter->cls_flower_list, nfc_node)
-		igc_erase_filter(adapter, rule);
-
 	spin_unlock(&adapter->nfc_lock);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From ac9156b27564a089ec52f526bfcb59f61c34e7c6 Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Mon, 9 Mar 2020 16:10:40 -0700
Subject: igc: Fix default MAC address filter override

This patch fixes a bug when the user adds the first MAC address filter
via ethtool NFC mechanism.

When the first MAC address filter is added, it overwrites the default
MAC address filter configured at RAL[0] and RAH[0]. As consequence,
frames addressed to the interface MAC address are not sent to host
anymore.

This patch fixes the bug by calling igc_set_default_mac_filter() during
adapter init so the position 0 of adapter->mac_table[] is assigned to
the default MAC address.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_main.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 6acb85842d0a..9d1792e80e2e 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -2354,7 +2354,9 @@ static void igc_configure(struct igc_adapter *adapter)
 	igc_setup_mrqc(adapter);
 	igc_setup_rctl(adapter);
 
+	igc_set_default_mac_filter(adapter);
 	igc_nfc_filter_restore(adapter);
+
 	igc_configure_tx(adapter);
 	igc_configure_rx(adapter);
 
-- 
cgit v1.2.3-59-g8ed1b


From 5800091a206172e2016e84906035f1d757cc6547 Mon Sep 17 00:00:00 2001
From: David Bauer <mail@david-bauer.net>
Date: Fri, 17 Apr 2020 15:41:59 +0200
Subject: net: phy: at803x: add support for AR8032 PHY

This adds support for the Qualcomm Atheros AR8032 Fast Ethernet PHY.

It shares many similarities with the already supported AR8030 PHY but
additionally supports MII connection to the MAC.

Signed-off-by: David Bauer <mail@david-bauer.net>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/at803x.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c
index 31f731e6df72..31b6edcc1fd1 100644
--- a/drivers/net/phy/at803x.c
+++ b/drivers/net/phy/at803x.c
@@ -106,6 +106,7 @@
 #define ATH9331_PHY_ID 0x004dd041
 #define ATH8030_PHY_ID 0x004dd076
 #define ATH8031_PHY_ID 0x004dd074
+#define ATH8032_PHY_ID 0x004dd023
 #define ATH8035_PHY_ID 0x004dd072
 #define AT803X_PHY_ID_MASK			0xffffffef
 
@@ -762,6 +763,21 @@ static struct phy_driver at803x_driver[] = {
 	.aneg_done		= at803x_aneg_done,
 	.ack_interrupt		= &at803x_ack_interrupt,
 	.config_intr		= &at803x_config_intr,
+}, {
+	/* Qualcomm Atheros AR8032 */
+	PHY_ID_MATCH_EXACT(ATH8032_PHY_ID),
+	.name			= "Qualcomm Atheros AR8032",
+	.probe			= at803x_probe,
+	.remove			= at803x_remove,
+	.config_init		= at803x_config_init,
+	.link_change_notify	= at803x_link_change_notify,
+	.set_wol		= at803x_set_wol,
+	.get_wol		= at803x_get_wol,
+	.suspend		= at803x_suspend,
+	.resume			= at803x_resume,
+	/* PHY_BASIC_FEATURES */
+	.ack_interrupt		= at803x_ack_interrupt,
+	.config_intr		= at803x_config_intr,
 }, {
 	/* ATHEROS AR9331 */
 	PHY_ID_MATCH_EXACT(ATH9331_PHY_ID),
@@ -778,6 +794,7 @@ module_phy_driver(at803x_driver);
 static struct mdio_device_id __maybe_unused atheros_tbl[] = {
 	{ ATH8030_PHY_ID, AT803X_PHY_ID_MASK },
 	{ ATH8031_PHY_ID, AT803X_PHY_ID_MASK },
+	{ PHY_ID_MATCH_EXACT(ATH8032_PHY_ID) },
 	{ ATH8035_PHY_ID, AT803X_PHY_ID_MASK },
 	{ PHY_ID_MATCH_EXACT(ATH9331_PHY_ID) },
 	{ }
-- 
cgit v1.2.3-59-g8ed1b


From 007fc3c0ca478f3a8ad687cf9ecbe672d3a64700 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 17 Apr 2020 11:33:41 -0700
Subject: net: dsa: b53: per-port interrupts are optional

Make use of platform_get_irq_byname_optional() to avoid printing
messages on the kernel console that interrupts cannot be found.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/b53/b53_srab.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/dsa/b53/b53_srab.c b/drivers/net/dsa/b53/b53_srab.c
index 0a1be5259be0..1207c3095027 100644
--- a/drivers/net/dsa/b53/b53_srab.c
+++ b/drivers/net/dsa/b53/b53_srab.c
@@ -524,7 +524,7 @@ static void b53_srab_prepare_irq(struct platform_device *pdev)
 
 		port->num = i;
 		port->dev = dev;
-		port->irq = platform_get_irq_byname(pdev, name);
+		port->irq = platform_get_irq_byname_optional(pdev, name);
 		kfree(name);
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From c6f5f242f5ed58cc531e75507e8447a8c9b6cd30 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 17 Apr 2020 11:34:20 -0700
Subject: net: phy: mdio-bcm-iproc: Do not show kernel pointer

Displaying the virtual address at which the MDIO base register address
has been mapped is not useful and is not visible with pointer hashing in
place, replace the message with something indicating successful
registration instead.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio-bcm-iproc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/phy/mdio-bcm-iproc.c b/drivers/net/phy/mdio-bcm-iproc.c
index f1ded03f0229..38bf40e0d673 100644
--- a/drivers/net/phy/mdio-bcm-iproc.c
+++ b/drivers/net/phy/mdio-bcm-iproc.c
@@ -159,7 +159,7 @@ static int iproc_mdio_probe(struct platform_device *pdev)
 
 	platform_set_drvdata(pdev, priv);
 
-	dev_info(&pdev->dev, "Broadcom iProc MDIO bus at 0x%p\n", priv->base);
+	dev_info(&pdev->dev, "Broadcom iProc MDIO bus registered\n");
 
 	return 0;
 
-- 
cgit v1.2.3-59-g8ed1b


From 123aff2a789c3975c2235653939ff00107d6156c Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Fri, 17 Apr 2020 11:38:02 -0700
Subject: net: phy: broadcom: Add support for BCM53125 internal PHYs

BCM53125 has internal Gigabit PHYs which support interrupts as well as
statistics, make it possible to configure both of those features with a
PHY driver entry.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/broadcom.c | 14 ++++++++++++++
 include/linux/brcmphy.h    |  1 +
 2 files changed, 15 insertions(+)

diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index ae4873f2f86e..97201d5cf007 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -781,6 +781,19 @@ static struct phy_driver broadcom_drivers[] = {
 	.get_strings	= bcm_phy_get_strings,
 	.get_stats	= bcm53xx_phy_get_stats,
 	.probe		= bcm53xx_phy_probe,
+}, {
+	.phy_id		= PHY_ID_BCM53125,
+	.phy_id_mask	= 0xfffffff0,
+	.name		= "Broadcom BCM53125",
+	.flags		= PHY_IS_INTERNAL,
+	/* PHY_GBIT_FEATURES */
+	.get_sset_count	= bcm_phy_get_sset_count,
+	.get_strings	= bcm_phy_get_strings,
+	.get_stats	= bcm53xx_phy_get_stats,
+	.probe		= bcm53xx_phy_probe,
+	.config_init	= bcm54xx_config_init,
+	.ack_interrupt	= bcm_phy_ack_intr,
+	.config_intr	= bcm_phy_config_intr,
 }, {
 	.phy_id         = PHY_ID_BCM89610,
 	.phy_id_mask    = 0xfffffff0,
@@ -810,6 +823,7 @@ static struct mdio_device_id __maybe_unused broadcom_tbl[] = {
 	{ PHY_ID_BCMAC131, 0xfffffff0 },
 	{ PHY_ID_BCM5241, 0xfffffff0 },
 	{ PHY_ID_BCM5395, 0xfffffff0 },
+	{ PHY_ID_BCM53125, 0xfffffff0 },
 	{ PHY_ID_BCM89610, 0xfffffff0 },
 	{ }
 };
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 6462c5447872..7e1d857c8468 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -15,6 +15,7 @@
 #define PHY_ID_BCMAC131			0x0143bc70
 #define PHY_ID_BCM5481			0x0143bca0
 #define PHY_ID_BCM5395			0x0143bcf0
+#define PHY_ID_BCM53125			0x03625f20
 #define PHY_ID_BCM54810			0x03625d00
 #define PHY_ID_BCM5482			0x0143bcb0
 #define PHY_ID_BCM5411			0x00206070
-- 
cgit v1.2.3-59-g8ed1b


From 89f9ffd3eb670bad1260bc579f5e13b8f2d5b3e0 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 17 Apr 2020 22:03:08 +0300
Subject: net: mscc: ocelot: deal with problematic MAC_ETYPE VCAP IS2 rules

By default, the VCAP IS2 will produce a single match for each frame, on
the most specific classification.

Example: a ping packet (ICMP over IPv4 over Ethernet) sent from an IP
address of 10.0.0.1 and a MAC address of 96:18:82:00:04:01 will match
this rule:

tc filter add dev swp0 ingress protocol ipv4 \
	flower skip_sw src_ip 10.0.0.1 action drop

but not this one:

tc filter add dev swp0 ingress \
	flower skip_sw src_mac 96:18:82:00:04:01 action drop

Currently the driver does not really warn the user in any way about
this, and the behavior is rather strange anyway.

The current patch is a workaround to force matches on MAC_ETYPE keys
(DMAC and SMAC) for all packets irrespective of higher layer protocol.
The setting is made at the port level.

Of course this breaks all other non-src_mac and non-dst_mac matches, so
rule exclusivity checks have been added to the driver, in order to never
have rules of both types on any ingress port.

The bits that discard higher-level protocol information are set only
once a MAC_ETYPE rule is added to a filter block, and only for the ports
that are bound to that filter block. Then all further non-MAC_ETYPE
rules added to that filter block should be denied by the ports bound to
it.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mscc/ocelot_ace.c    | 103 +++++++++++++++++++++++++++++-
 drivers/net/ethernet/mscc/ocelot_ace.h    |   5 +-
 drivers/net/ethernet/mscc/ocelot_flower.c |   2 +-
 3 files changed, 106 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mscc/ocelot_ace.c b/drivers/net/ethernet/mscc/ocelot_ace.c
index 3bd286044480..8a2f7d13ef6d 100644
--- a/drivers/net/ethernet/mscc/ocelot_ace.c
+++ b/drivers/net/ethernet/mscc/ocelot_ace.c
@@ -706,13 +706,114 @@ ocelot_ace_rule_get_rule_index(struct ocelot_acl_block *block, int index)
 	return NULL;
 }
 
+/* If @on=false, then SNAP, ARP, IP and OAM frames will not match on keys based
+ * on destination and source MAC addresses, but only on higher-level protocol
+ * information. The only frame types to match on keys containing MAC addresses
+ * in this case are non-SNAP, non-ARP, non-IP and non-OAM frames.
+ *
+ * If @on=true, then the above frame types (SNAP, ARP, IP and OAM) will match
+ * on MAC_ETYPE keys such as destination and source MAC on this ingress port.
+ * However the setting has the side effect of making these frames not matching
+ * on any _other_ keys than MAC_ETYPE ones.
+ */
+static void ocelot_match_all_as_mac_etype(struct ocelot *ocelot, int port,
+					  bool on)
+{
+	u32 val = 0;
+
+	if (on)
+		val = ANA_PORT_VCAP_S2_CFG_S2_SNAP_DIS(3) |
+		      ANA_PORT_VCAP_S2_CFG_S2_ARP_DIS(3) |
+		      ANA_PORT_VCAP_S2_CFG_S2_IP_TCPUDP_DIS(3) |
+		      ANA_PORT_VCAP_S2_CFG_S2_IP_OTHER_DIS(3) |
+		      ANA_PORT_VCAP_S2_CFG_S2_OAM_DIS(3);
+
+	ocelot_rmw_gix(ocelot, val,
+		       ANA_PORT_VCAP_S2_CFG_S2_SNAP_DIS_M |
+		       ANA_PORT_VCAP_S2_CFG_S2_ARP_DIS_M |
+		       ANA_PORT_VCAP_S2_CFG_S2_IP_TCPUDP_DIS_M |
+		       ANA_PORT_VCAP_S2_CFG_S2_IP_OTHER_DIS_M |
+		       ANA_PORT_VCAP_S2_CFG_S2_OAM_DIS_M,
+		       ANA_PORT_VCAP_S2_CFG, port);
+}
+
+static bool ocelot_ace_is_problematic_mac_etype(struct ocelot_ace_rule *ace)
+{
+	if (ace->type != OCELOT_ACE_TYPE_ETYPE)
+		return false;
+	if (ether_addr_to_u64(ace->frame.etype.dmac.value) &
+	    ether_addr_to_u64(ace->frame.etype.dmac.mask))
+		return true;
+	if (ether_addr_to_u64(ace->frame.etype.smac.value) &
+	    ether_addr_to_u64(ace->frame.etype.smac.mask))
+		return true;
+	return false;
+}
+
+static bool ocelot_ace_is_problematic_non_mac_etype(struct ocelot_ace_rule *ace)
+{
+	if (ace->type == OCELOT_ACE_TYPE_SNAP)
+		return true;
+	if (ace->type == OCELOT_ACE_TYPE_ARP)
+		return true;
+	if (ace->type == OCELOT_ACE_TYPE_IPV4)
+		return true;
+	if (ace->type == OCELOT_ACE_TYPE_IPV6)
+		return true;
+	return false;
+}
+
+static bool ocelot_exclusive_mac_etype_ace_rules(struct ocelot *ocelot,
+						 struct ocelot_ace_rule *ace)
+{
+	struct ocelot_acl_block *block = &ocelot->acl_block;
+	struct ocelot_ace_rule *tmp;
+	unsigned long port;
+	int i;
+
+	if (ocelot_ace_is_problematic_mac_etype(ace)) {
+		/* Search for any non-MAC_ETYPE rules on the port */
+		for (i = 0; i < block->count; i++) {
+			tmp = ocelot_ace_rule_get_rule_index(block, i);
+			if (tmp->ingress_port_mask & ace->ingress_port_mask &&
+			    ocelot_ace_is_problematic_non_mac_etype(tmp))
+				return false;
+		}
+
+		for_each_set_bit(port, &ace->ingress_port_mask,
+				 ocelot->num_phys_ports)
+			ocelot_match_all_as_mac_etype(ocelot, port, true);
+	} else if (ocelot_ace_is_problematic_non_mac_etype(ace)) {
+		/* Search for any MAC_ETYPE rules on the port */
+		for (i = 0; i < block->count; i++) {
+			tmp = ocelot_ace_rule_get_rule_index(block, i);
+			if (tmp->ingress_port_mask & ace->ingress_port_mask &&
+			    ocelot_ace_is_problematic_mac_etype(tmp))
+				return false;
+		}
+
+		for_each_set_bit(port, &ace->ingress_port_mask,
+				 ocelot->num_phys_ports)
+			ocelot_match_all_as_mac_etype(ocelot, port, false);
+	}
+
+	return true;
+}
+
 int ocelot_ace_rule_offload_add(struct ocelot *ocelot,
-				struct ocelot_ace_rule *rule)
+				struct ocelot_ace_rule *rule,
+				struct netlink_ext_ack *extack)
 {
 	struct ocelot_acl_block *block = &ocelot->acl_block;
 	struct ocelot_ace_rule *ace;
 	int i, index;
 
+	if (!ocelot_exclusive_mac_etype_ace_rules(ocelot, rule)) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Cannot mix MAC_ETYPE with non-MAC_ETYPE rules");
+		return -EBUSY;
+	}
+
 	/* Add rule to the linked list */
 	ocelot_ace_rule_add(ocelot, block, rule);
 
diff --git a/drivers/net/ethernet/mscc/ocelot_ace.h b/drivers/net/ethernet/mscc/ocelot_ace.h
index 29d22c566786..099e177f2617 100644
--- a/drivers/net/ethernet/mscc/ocelot_ace.h
+++ b/drivers/net/ethernet/mscc/ocelot_ace.h
@@ -194,7 +194,7 @@ struct ocelot_ace_rule {
 
 	enum ocelot_ace_action action;
 	struct ocelot_ace_stats stats;
-	u16 ingress_port_mask;
+	unsigned long ingress_port_mask;
 
 	enum ocelot_vcap_bit dmac_mc;
 	enum ocelot_vcap_bit dmac_bc;
@@ -215,7 +215,8 @@ struct ocelot_ace_rule {
 };
 
 int ocelot_ace_rule_offload_add(struct ocelot *ocelot,
-				struct ocelot_ace_rule *rule);
+				struct ocelot_ace_rule *rule,
+				struct netlink_ext_ack *extack);
 int ocelot_ace_rule_offload_del(struct ocelot *ocelot,
 				struct ocelot_ace_rule *rule);
 int ocelot_ace_rule_stats_update(struct ocelot *ocelot,
diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c
index 341923311fec..954cb67eeaa2 100644
--- a/drivers/net/ethernet/mscc/ocelot_flower.c
+++ b/drivers/net/ethernet/mscc/ocelot_flower.c
@@ -205,7 +205,7 @@ int ocelot_cls_flower_replace(struct ocelot *ocelot, int port,
 		return ret;
 	}
 
-	return ocelot_ace_rule_offload_add(ocelot, ace);
+	return ocelot_ace_rule_offload_add(ocelot, ace, f->common.extack);
 }
 EXPORT_SYMBOL_GPL(ocelot_cls_flower_replace);
 
-- 
cgit v1.2.3-59-g8ed1b


From 7070eea5e95a3c1ea30b9afcedd8d29efcc28477 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 17 Apr 2020 22:07:55 +0300
Subject: enetc: permit configuration of rx-vlan-filter with ethtool

Each ENETC station interface (SI) has a VLAN filter list and a port
flag (PSIPVMR) by which it can be put in "VLAN promiscuous" mode, which
enables the reception of VLAN-tagged traffic even if it is not in the
VLAN filtering list.

Currently the handling of this setting works like this: the port starts
off as VLAN promiscuous, then it switches to enabling VLAN filtering as
soon as the first VLAN is installed in its filter via
.ndo_vlan_rx_add_vid. In practice that does not work out very well,
because more often than not, the first VLAN to be installed is out of
the control of the user: the 8021q module, if loaded, adds its rule for
802.1p (VID 0) traffic upon bringing the interface up.

What the user is currently seeing in ethtool is this:
ethtool -k eno2
rx-vlan-filter: on [fixed]

which doesn't match the intention of the code, but the practical reality
of having the 8021q module install its VID which has the side-effect of
turning on VLAN filtering in this driver. All in all, a slightly
confusing experience.

So instead of letting this driver switch the VLAN filtering state by
itself, just wire it up with the rx-vlan-filter feature from ethtool,
and let it be user-configurable just through that knob, except for one
case, see below.

In promiscuous mode, it is more intuitive that all traffic is received,
including VLAN tagged traffic. It appears that it is necessary to set
the flag in PSIPVMR for that to be the case, so VLAN promiscuous mode is
also temporarily enabled. On exit from promiscuous mode, the setting
made by ethtool is restored.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Claudiu Manoil <claudiu.manoil@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc_pf.c | 44 +++++++++----------------
 1 file changed, 16 insertions(+), 28 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
index 85e2b741df41..de1ad4975074 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
@@ -50,21 +50,6 @@ static void enetc_set_vlan_promisc(struct enetc_hw *hw, char si_map)
 	enetc_port_wr(hw, ENETC_PSIPVMR, ENETC_PSIPVMR_SET_VP(si_map) | val);
 }
 
-static bool enetc_si_vlan_promisc_is_on(struct enetc_pf *pf, int si_idx)
-{
-	return pf->vlan_promisc_simap & BIT(si_idx);
-}
-
-static bool enetc_vlan_filter_is_on(struct enetc_pf *pf)
-{
-	int i;
-
-	for_each_set_bit(i, pf->active_vlans, VLAN_N_VID)
-		return true;
-
-	return false;
-}
-
 static void enetc_enable_si_vlan_promisc(struct enetc_pf *pf, int si_idx)
 {
 	pf->vlan_promisc_simap |= BIT(si_idx);
@@ -204,6 +189,7 @@ static void enetc_pf_set_rx_mode(struct net_device *ndev)
 {
 	struct enetc_ndev_priv *priv = netdev_priv(ndev);
 	struct enetc_pf *pf = enetc_si_priv(priv->si);
+	char vlan_promisc_simap = pf->vlan_promisc_simap;
 	struct enetc_hw *hw = &priv->si->hw;
 	bool uprom = false, mprom = false;
 	struct enetc_mac_filter *filter;
@@ -216,16 +202,16 @@ static void enetc_pf_set_rx_mode(struct net_device *ndev)
 		psipmr = ENETC_PSIPMR_SET_UP(0) | ENETC_PSIPMR_SET_MP(0);
 		uprom = true;
 		mprom = true;
-		/* enable VLAN promisc mode for SI0 */
-		if (!enetc_si_vlan_promisc_is_on(pf, 0))
-			enetc_enable_si_vlan_promisc(pf, 0);
-
+		/* Enable VLAN promiscuous mode for SI0 (PF) */
+		vlan_promisc_simap |= BIT(0);
 	} else if (ndev->flags & IFF_ALLMULTI) {
 		/* enable multi cast promisc mode for SI0 (PF) */
 		psipmr = ENETC_PSIPMR_SET_MP(0);
 		mprom = true;
 	}
 
+	enetc_set_vlan_promisc(&pf->si->hw, vlan_promisc_simap);
+
 	/* first 2 filter entries belong to PF */
 	if (!uprom) {
 		/* Update unicast filters */
@@ -306,9 +292,6 @@ static int enetc_vlan_rx_add_vid(struct net_device *ndev, __be16 prot, u16 vid)
 	struct enetc_pf *pf = enetc_si_priv(priv->si);
 	int idx;
 
-	if (enetc_si_vlan_promisc_is_on(pf, 0))
-		enetc_disable_si_vlan_promisc(pf, 0);
-
 	__set_bit(vid, pf->active_vlans);
 
 	idx = enetc_vid_hash_idx(vid);
@@ -326,9 +309,6 @@ static int enetc_vlan_rx_del_vid(struct net_device *ndev, __be16 prot, u16 vid)
 	__clear_bit(vid, pf->active_vlans);
 	enetc_sync_vlan_ht_filter(pf, true);
 
-	if (!enetc_vlan_filter_is_on(pf))
-		enetc_enable_si_vlan_promisc(pf, 0);
-
 	return 0;
 }
 
@@ -677,6 +657,15 @@ static int enetc_pf_set_features(struct net_device *ndev,
 		enetc_enable_txvlan(&priv->si->hw, 0,
 				    !!(features & NETIF_F_HW_VLAN_CTAG_TX));
 
+	if (changed & NETIF_F_HW_VLAN_CTAG_FILTER) {
+		struct enetc_pf *pf = enetc_si_priv(priv->si);
+
+		if (!!(features & NETIF_F_HW_VLAN_CTAG_FILTER))
+			enetc_disable_si_vlan_promisc(pf, 0);
+		else
+			enetc_enable_si_vlan_promisc(pf, 0);
+	}
+
 	if (changed & NETIF_F_LOOPBACK)
 		enetc_set_loopback(ndev, !!(features & NETIF_F_LOOPBACK));
 
@@ -719,12 +708,11 @@ static void enetc_pf_netdev_setup(struct enetc_si *si, struct net_device *ndev,
 
 	ndev->hw_features = NETIF_F_SG | NETIF_F_RXCSUM | NETIF_F_HW_CSUM |
 			    NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX |
-			    NETIF_F_LOOPBACK;
+			    NETIF_F_HW_VLAN_CTAG_FILTER | NETIF_F_LOOPBACK;
 	ndev->features = NETIF_F_HIGHDMA | NETIF_F_SG |
 			 NETIF_F_RXCSUM | NETIF_F_HW_CSUM |
 			 NETIF_F_HW_VLAN_CTAG_TX |
-			 NETIF_F_HW_VLAN_CTAG_RX |
-			 NETIF_F_HW_VLAN_CTAG_FILTER;
+			 NETIF_F_HW_VLAN_CTAG_RX;
 
 	if (si->num_rss)
 		ndev->hw_features |= NETIF_F_RXHASH;
-- 
cgit v1.2.3-59-g8ed1b


From 0360c046ca186be1953d185d5a3631e415381820 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sat, 18 Apr 2020 23:06:51 +0200
Subject: r8169: move setting OCP base to generic init code

Move setting the ocp_base to rtl_init_one(). Where supported the value
is always the same, and if not supported it doesn't hurt.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index bf5bf05970a2..f882e8c09987 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -75,6 +75,8 @@
 #define R8169_TX_RING_BYTES	(NUM_TX_DESC * sizeof(struct TxDesc))
 #define R8169_RX_RING_BYTES	(NUM_RX_DESC * sizeof(struct RxDesc))
 
+#define OCP_STD_PHY_BASE	0xa400
+
 #define RTL_CFG_NO_GBIT	1
 
 /* write/read MMIO register */
@@ -847,8 +849,6 @@ static void r8168_mac_ocp_modify(struct rtl8169_private *tp, u32 reg, u16 mask,
 	r8168_mac_ocp_write(tp, reg, (data & ~mask) | set);
 }
 
-#define OCP_STD_PHY_BASE	0xa400
-
 static void r8168g_mdio_write(struct rtl8169_private *tp, int reg, int value)
 {
 	if (reg == 0x1f) {
@@ -5189,8 +5189,6 @@ static int r8169_mdio_register(struct rtl8169_private *tp)
 
 static void rtl_hw_init_8168g(struct rtl8169_private *tp)
 {
-	tp->ocp_base = OCP_STD_PHY_BASE;
-
 	RTL_W32(tp, MISC, RTL_R32(tp, MISC) | RXDV_GATED_EN);
 
 	if (!rtl_udelay_loop_wait_high(tp, &rtl_txcfg_empty_cond, 100, 42))
@@ -5215,8 +5213,6 @@ static void rtl_hw_init_8168g(struct rtl8169_private *tp)
 
 static void rtl_hw_init_8125(struct rtl8169_private *tp)
 {
-	tp->ocp_base = OCP_STD_PHY_BASE;
-
 	RTL_W32(tp, MISC, RTL_R32(tp, MISC) | RXDV_GATED_EN);
 
 	if (!rtl_udelay_loop_wait_high(tp, &rtl_rxtx_empty_cond, 100, 42))
@@ -5353,6 +5349,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	tp->msg_enable = netif_msg_init(debug.msg_enable, R8169_MSG_DEFAULT);
 	tp->supports_gmii = ent->driver_data == RTL_CFG_NO_GBIT ? 0 : 1;
 	tp->eee_adv = -1;
+	tp->ocp_base = OCP_STD_PHY_BASE;
 
 	/* Get the *optional* external "ether_clk" used on some boards */
 	rc = rtl_get_ether_clk(tp);
-- 
cgit v1.2.3-59-g8ed1b


From a9b3d56830a3d626a0d85adc285ea94e829b6e9d Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sat, 18 Apr 2020 23:07:41 +0200
Subject: r8169: remove NETIF_F_HIGHDMA from vlan_features

NETIF_F_HIGHDMA is added to vlan_features by register_netdev(),
therefore we can omit this here.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index f882e8c09987..2d6c94652dc7 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -5440,8 +5440,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_RXCSUM |
 			   NETIF_F_HW_VLAN_CTAG_TX | NETIF_F_HW_VLAN_CTAG_RX;
-	dev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO |
-		NETIF_F_HIGHDMA;
+	dev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
 
 	tp->cp_cmd |= RxChkSum;
-- 
cgit v1.2.3-59-g8ed1b


From 85ab8b245ec6572eb489d0fe76c5573851d7b7ad Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sat, 18 Apr 2020 23:08:43 +0200
Subject: r8169: preserve VLAN setting on RTL8125 in rtl_init_rxcfg

So far we set RX_VLAN_8125 unconditionally, even if
NETIF_F_HW_VLAN_CTAG_RX may not be set. Don't touch these bits,
and let only rtl8169_set_features() control them.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 2d6c94652dc7..e37ff1a5161d 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -2397,6 +2397,8 @@ static void rtl_pll_power_up(struct rtl8169_private *tp)
 
 static void rtl_init_rxcfg(struct rtl8169_private *tp)
 {
+	u32 vlan;
+
 	switch (tp->mac_version) {
 	case RTL_GIGA_MAC_VER_02 ... RTL_GIGA_MAC_VER_06:
 	case RTL_GIGA_MAC_VER_10 ... RTL_GIGA_MAC_VER_17:
@@ -2411,8 +2413,9 @@ static void rtl_init_rxcfg(struct rtl8169_private *tp)
 		RTL_W32(tp, RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST | RX_EARLY_OFF);
 		break;
 	case RTL_GIGA_MAC_VER_60 ... RTL_GIGA_MAC_VER_61:
-		RTL_W32(tp, RxConfig, RX_FETCH_DFLT_8125 | RX_VLAN_8125 |
-				      RX_DMA_BURST);
+		/* VLAN flags are controlled by NETIF_F_HW_VLAN_CTAG_RX */
+		vlan = RTL_R32(tp, RxConfig) & RX_VLAN_8125;
+		RTL_W32(tp, RxConfig, vlan | RX_FETCH_DFLT_8125 | RX_DMA_BURST);
 		break;
 	default:
 		RTL_W32(tp, RxConfig, RX128_INT_EN | RX_DMA_BURST);
-- 
cgit v1.2.3-59-g8ed1b


From 145192f83a1184ca8f2ef4508e7a93bb783bb444 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sat, 18 Apr 2020 23:10:03 +0200
Subject: r8169: use rtl8169_set_features in rtl8169_init_one

At that place in rtl_init_one() we can safely use rtl8169_set_features()
to configure the chip according to the default features.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index e37ff1a5161d..e8c55b795c76 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -5446,10 +5446,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	dev->vlan_features = NETIF_F_SG | NETIF_F_IP_CSUM | NETIF_F_TSO;
 	dev->priv_flags |= IFF_LIVE_ADDR_CHANGE;
 
-	tp->cp_cmd |= RxChkSum;
-	/* RTL8125 uses register RxConfig for VLAN offloading config */
-	if (!rtl_is_8125(tp))
-		tp->cp_cmd |= RxVlan;
 	/*
 	 * Pretend we are using VLANs; This bypasses a nasty bug where
 	 * Interrupts stop flowing on high load on 8110SCd controllers.
@@ -5481,6 +5477,9 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	dev->hw_features |= NETIF_F_RXALL;
 	dev->hw_features |= NETIF_F_RXFCS;
 
+	/* configure chip for default features */
+	rtl8169_set_features(dev, dev->features);
+
 	jumbo_max = rtl_jumbo_max(tp);
 	if (jumbo_max)
 		dev->max_mtu = jumbo_max;
-- 
cgit v1.2.3-59-g8ed1b


From 0623b98b41cd16073ee15f35e058194313e6dc51 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sat, 18 Apr 2020 23:10:44 +0200
Subject: r8169: improve rtl8169_tso_csum_v2

Simplify the code and avoid the overhead of calling vlan_get_protocol().

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 15 +++++----------
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index e8c55b795c76..2cd2b038e4eb 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -4127,25 +4127,20 @@ static bool rtl8169_tso_csum_v2(struct rtl8169_private *tp,
 				struct sk_buff *skb, u32 *opts)
 {
 	u32 transport_offset = (u32)skb_transport_offset(skb);
-	u32 mss = skb_shinfo(skb)->gso_size;
+	struct skb_shared_info *shinfo = skb_shinfo(skb);
+	u32 mss = shinfo->gso_size;
 
 	if (mss) {
-		switch (vlan_get_protocol(skb)) {
-		case htons(ETH_P_IP):
+		if (shinfo->gso_type & SKB_GSO_TCPV4) {
 			opts[0] |= TD1_GTSENV4;
-			break;
-
-		case htons(ETH_P_IPV6):
+		} else if (shinfo->gso_type & SKB_GSO_TCPV6) {
 			if (skb_cow_head(skb, 0))
 				return false;
 
 			tcp_v6_gso_csum_prep(skb);
 			opts[0] |= TD1_GTSENV6;
-			break;
-
-		default:
+		} else {
 			WARN_ON_ONCE(1);
-			break;
 		}
 
 		opts[0] |= transport_offset << GTTCPHO_SHIFT;
-- 
cgit v1.2.3-59-g8ed1b


From 773235f4e1cc41ea98f520a0cfe0a51a58b2d411 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sat, 18 Apr 2020 23:11:32 +0200
Subject: r8169: add workaround for RTL8168evl TSO hw issues

Add workaround for hw issues with TSO on RTL8168evl. This workaround is
based on information I got from Realtek, and *should* allow to safely
enable TSO on this chip version.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 34 +++++++++++++++++++++++++++++++
 1 file changed, 34 insertions(+)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 2cd2b038e4eb..a8696d958cd1 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -4306,6 +4306,37 @@ err_stop_0:
 	return NETDEV_TX_BUSY;
 }
 
+static unsigned int rtl_last_frag_len(struct sk_buff *skb)
+{
+	struct skb_shared_info *info = skb_shinfo(skb);
+	unsigned int nr_frags = info->nr_frags;
+
+	if (!nr_frags)
+		return UINT_MAX;
+
+	return skb_frag_size(info->frags + nr_frags - 1);
+}
+
+/* Workaround for hw issues with TSO on RTL8168evl */
+static netdev_features_t rtl8168evl_fix_tso(struct sk_buff *skb,
+					    netdev_features_t features)
+{
+	/* IPv4 header has options field */
+	if (vlan_get_protocol(skb) == htons(ETH_P_IP) &&
+	    ip_hdrlen(skb) > sizeof(struct iphdr))
+		features &= ~NETIF_F_ALL_TSO;
+
+	/* IPv4 TCP header has options field */
+	else if (skb_shinfo(skb)->gso_type & SKB_GSO_TCPV4 &&
+		 tcp_hdrlen(skb) > sizeof(struct tcphdr))
+		features &= ~NETIF_F_ALL_TSO;
+
+	else if (rtl_last_frag_len(skb) <= 6)
+		features &= ~NETIF_F_ALL_TSO;
+
+	return features;
+}
+
 static netdev_features_t rtl8169_features_check(struct sk_buff *skb,
 						struct net_device *dev,
 						netdev_features_t features)
@@ -4314,6 +4345,9 @@ static netdev_features_t rtl8169_features_check(struct sk_buff *skb,
 	struct rtl8169_private *tp = netdev_priv(dev);
 
 	if (skb_is_gso(skb)) {
+		if (tp->mac_version == RTL_GIGA_MAC_VER_34)
+			features = rtl8168evl_fix_tso(skb, features);
+
 		if (transport_offset > GTTCPHO_MAX &&
 		    rtl_chip_supports_csum_v2(tp))
 			features &= ~NETIF_F_ALL_TSO;
-- 
cgit v1.2.3-59-g8ed1b


From bb7fc863729b45f0fbcdea991d0465d855ffd831 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Sun, 5 Apr 2020 20:57:00 +0300
Subject: net/mlx5: Provide simplified command interfaces

Many mlx5_cmd_exec() callers are not interested in the output from that
command or have standard in/out structures. Those callers simply allocate
those structure on the stack and use sizeof() to provide in/out arguments.

In this naive approach provide simplified versions of mlx5_cmd_exec().

Reviewed-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 include/linux/mlx5/driver.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 6f8f79ef829b..1caddfa85c4d 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -903,6 +903,19 @@ int mlx5_cmd_exec_cb(struct mlx5_async_ctx *ctx, void *in, int in_size,
 
 int mlx5_cmd_exec(struct mlx5_core_dev *dev, void *in, int in_size, void *out,
 		  int out_size);
+
+#define mlx5_cmd_exec_inout(dev, ifc_cmd, in, out)                             \
+	({                                                                     \
+		mlx5_cmd_exec(dev, in, MLX5_ST_SZ_BYTES(ifc_cmd##_in), out,    \
+			      MLX5_ST_SZ_BYTES(ifc_cmd##_out));                \
+	})
+
+#define mlx5_cmd_exec_in(dev, ifc_cmd, in)                                     \
+	({                                                                     \
+		u32 _out[MLX5_ST_SZ_DW(ifc_cmd##_out)] = {};                   \
+		mlx5_cmd_exec_inout(dev, ifc_cmd, in, _out);                   \
+	})
+
 int mlx5_cmd_exec_polling(struct mlx5_core_dev *dev, void *in, int in_size,
 			  void *out, int out_size);
 void mlx5_cmd_mbox_status(void *out, u8 *status, u32 *syndrome);
-- 
cgit v1.2.3-59-g8ed1b


From ec44e72b73b74af489196352152e53a20c8ad5eb Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Thu, 2 Apr 2020 15:17:40 +0300
Subject: net/mlx5: Open-code create and destroy QP calls

FPGA, IPoIB and SW steering don't need anything from the
mlx5_core_create_qp() and mlx5_core_destroy_qp() except calls
to mlx5_cmd_exec().

Let's open-code it, so we will be able to move qp.c to mlx5_ib.

Reviewed-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/fpga/conn.c    | 24 +++++++++--------
 .../net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c  | 30 ++++++++++------------
 .../ethernet/mellanox/mlx5/core/steering/dr_send.c | 18 ++++++++-----
 3 files changed, 39 insertions(+), 33 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
index 61021133029e..7c3e7232852e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
@@ -534,8 +534,9 @@ static int mlx5_fpga_conn_create_qp(struct mlx5_fpga_conn *conn,
 				    unsigned int tx_size, unsigned int rx_size)
 {
 	struct mlx5_fpga_device *fdev = conn->fdev;
+	u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
 	struct mlx5_core_dev *mdev = fdev->mdev;
-	u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {0};
+	u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {};
 	void *in = NULL, *qpc;
 	int err, inlen;
 
@@ -600,10 +601,12 @@ static int mlx5_fpga_conn_create_qp(struct mlx5_fpga_conn *conn,
 	mlx5_fill_page_frag_array(&conn->qp.wq_ctrl.buf,
 				  (__be64 *)MLX5_ADDR_OF(create_qp_in, in, pas));
 
-	err = mlx5_core_create_qp(mdev, &conn->qp.mqp, in, inlen);
+	MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
+	err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
 	if (err)
 		goto err_sq_bufs;
 
+	conn->qp.mqp.qpn = MLX5_GET(create_qp_out, out, qpn);
 	conn->qp.mqp.event = mlx5_fpga_conn_event;
 	mlx5_fpga_dbg(fdev, "Created QP #0x%x\n", conn->qp.mqp.qpn);
 
@@ -658,7 +661,14 @@ static void mlx5_fpga_conn_flush_send_bufs(struct mlx5_fpga_conn *conn)
 
 static void mlx5_fpga_conn_destroy_qp(struct mlx5_fpga_conn *conn)
 {
-	mlx5_core_destroy_qp(conn->fdev->mdev, &conn->qp.mqp);
+	struct mlx5_core_dev *dev = conn->fdev->mdev;
+	u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
+	struct mlx5_core_qp *qp = &conn->qp.mqp;
+
+	MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
+	MLX5_SET(destroy_qp_in, in, qpn, qp->qpn);
+	mlx5_cmd_exec_in(dev, destroy_qp, in);
+
 	mlx5_fpga_conn_free_recv_bufs(conn);
 	mlx5_fpga_conn_flush_send_bufs(conn);
 	kvfree(conn->qp.sq.bufs);
@@ -972,19 +982,11 @@ out:
 
 void mlx5_fpga_conn_destroy(struct mlx5_fpga_conn *conn)
 {
-	struct mlx5_fpga_device *fdev = conn->fdev;
-	struct mlx5_core_dev *mdev = fdev->mdev;
-	int err = 0;
-
 	conn->qp.active = false;
 	tasklet_disable(&conn->cq.tasklet);
 	synchronize_irq(conn->cq.mcq.irqn);
 
 	mlx5_fpga_destroy_qp(conn->fdev->mdev, conn->fpga_qpn);
-	err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2ERR_QP, 0, NULL,
-				  &conn->qp.mqp);
-	if (err)
-		mlx5_fpga_warn(fdev, "qp_modify 2ERR failed: %d\n", err);
 	mlx5_fpga_conn_destroy_qp(conn);
 	mlx5_fpga_conn_destroy_cq(conn);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index 673aaa815f57..8bca11cb1e19 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -219,17 +219,12 @@ void mlx5i_uninit_underlay_qp(struct mlx5e_priv *priv)
 
 int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp)
 {
-	u32 *in = NULL;
+	u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(create_qp_in)] = {};
 	void *addr_path;
 	int ret = 0;
-	int inlen;
 	void *qpc;
 
-	inlen = MLX5_ST_SZ_BYTES(create_qp_in);
-	in = kvzalloc(inlen, GFP_KERNEL);
-	if (!in)
-		return -ENOMEM;
-
 	qpc = MLX5_ADDR_OF(create_qp_in, in, qpc);
 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_UD);
 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
@@ -240,20 +235,23 @@ int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp
 	MLX5_SET(ads, addr_path, vhca_port_num, 1);
 	MLX5_SET(ads, addr_path, grh, 1);
 
-	ret = mlx5_core_create_qp(mdev, qp, in, inlen);
-	if (ret) {
-		mlx5_core_err(mdev, "Failed creating IPoIB QP err : %d\n", ret);
-		goto out;
-	}
+	MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
+	ret = mlx5_cmd_exec_inout(mdev, create_qp, in, out);
+	if (ret)
+		return ret;
 
-out:
-	kvfree(in);
-	return ret;
+	qp->qpn = MLX5_GET(create_qp_out, out, qpn);
+
+	return 0;
 }
 
 void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp)
 {
-	mlx5_core_destroy_qp(mdev, qp);
+	u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
+
+	MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
+	MLX5_SET(destroy_qp_in, in, qpn, qp->qpn);
+	mlx5_cmd_exec_in(mdev, destroy_qp, in);
 }
 
 int mlx5i_create_tis(struct mlx5_core_dev *mdev, u32 underlay_qpn, u32 *tisn)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
index c0ab9cf74929..88bc94a8b8f1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
@@ -108,6 +108,7 @@ static void dr_qp_event(struct mlx5_core_qp *mqp, int event)
 static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev,
 					 struct dr_qp_init_attr *attr)
 {
+	u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
 	u32 temp_qpc[MLX5_ST_SZ_DW(qpc)] = {};
 	struct mlx5_wq_param wqp;
 	struct mlx5dr_qp *dr_qp;
@@ -180,13 +181,12 @@ static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev,
 				  (__be64 *)MLX5_ADDR_OF(create_qp_in,
 							 in, pas));
 
-	err = mlx5_core_create_qp(mdev, &dr_qp->mqp, in, inlen);
+	MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
+	err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
+	dr_qp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn);
 	kfree(in);
-
-	if (err) {
-		mlx5_core_warn(mdev, " Can't create QP\n");
+	if (err)
 		goto err_in;
-	}
 	dr_qp->mqp.event = dr_qp_event;
 	dr_qp->uar = attr->uar;
 
@@ -204,7 +204,13 @@ err_wq:
 static void dr_destroy_qp(struct mlx5_core_dev *mdev,
 			  struct mlx5dr_qp *dr_qp)
 {
-	mlx5_core_destroy_qp(mdev, &dr_qp->mqp);
+	u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
+	struct mlx5_core_qp *qp = &dr_qp->mqp;
+
+	MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
+	MLX5_SET(destroy_qp_in, in, qpn, qp->qpn);
+	mlx5_cmd_exec_in(mdev, destroy_qp, in);
+
 	kfree(dr_qp->sq.wqe_head);
 	mlx5_wq_destroy(&dr_qp->wq_ctrl);
 	kfree(dr_qp);
-- 
cgit v1.2.3-59-g8ed1b


From 73a75b96fc9a79779ad7491b61d65f0bbae04d11 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Mon, 6 Apr 2020 08:40:52 +0300
Subject: net/mlx5: Remove empty QP and CQ events handlers

The QP and CQ events functions do nothing except printing some debug
messages. There is nothing to do with this knowledge and such events,
so remove them.

Reviewed-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c   | 19 -------------------
 .../ethernet/mellanox/mlx5/core/steering/dr_send.c    | 14 --------------
 2 files changed, 33 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
index 7c3e7232852e..1d49894399af 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
@@ -362,23 +362,6 @@ static void mlx5_fpga_conn_arm_cq(struct mlx5_fpga_conn *conn)
 		    conn->fdev->conn_res.uar->map, conn->cq.wq.cc);
 }
 
-static void mlx5_fpga_conn_cq_event(struct mlx5_core_cq *mcq,
-				    enum mlx5_event event)
-{
-	struct mlx5_fpga_conn *conn;
-
-	conn = container_of(mcq, struct mlx5_fpga_conn, cq.mcq);
-	mlx5_fpga_warn(conn->fdev, "CQ event %u on CQ #%u\n", event, mcq->cqn);
-}
-
-static void mlx5_fpga_conn_event(struct mlx5_core_qp *mqp, int event)
-{
-	struct mlx5_fpga_conn *conn;
-
-	conn = container_of(mqp, struct mlx5_fpga_conn, qp.mqp);
-	mlx5_fpga_warn(conn->fdev, "QP event %u on QP #%u\n", event, mqp->qpn);
-}
-
 static inline void mlx5_fpga_conn_cqes(struct mlx5_fpga_conn *conn,
 				       unsigned int budget)
 {
@@ -493,7 +476,6 @@ static int mlx5_fpga_conn_create_cq(struct mlx5_fpga_conn *conn, int cq_size)
 	*conn->cq.mcq.arm_db    = 0;
 	conn->cq.mcq.vector     = 0;
 	conn->cq.mcq.comp       = mlx5_fpga_conn_cq_complete;
-	conn->cq.mcq.event      = mlx5_fpga_conn_cq_event;
 	conn->cq.mcq.irqn       = irqn;
 	conn->cq.mcq.uar        = fdev->conn_res.uar;
 	tasklet_init(&conn->cq.tasklet, mlx5_fpga_conn_cq_tasklet,
@@ -607,7 +589,6 @@ static int mlx5_fpga_conn_create_qp(struct mlx5_fpga_conn *conn,
 		goto err_sq_bufs;
 
 	conn->qp.mqp.qpn = MLX5_GET(create_qp_out, out, qpn);
-	conn->qp.mqp.event = mlx5_fpga_conn_event;
 	mlx5_fpga_dbg(fdev, "Created QP #0x%x\n", conn->qp.mqp.qpn);
 
 	goto out;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
index 88bc94a8b8f1..690e4181db4c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
@@ -100,11 +100,6 @@ static int dr_poll_cq(struct mlx5dr_cq *dr_cq, int ne)
 	return err == CQ_POLL_ERR ? err : npolled;
 }
 
-static void dr_qp_event(struct mlx5_core_qp *mqp, int event)
-{
-	pr_info("DR QP event %u on QP #%u\n", event, mqp->qpn);
-}
-
 static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev,
 					 struct dr_qp_init_attr *attr)
 {
@@ -187,7 +182,6 @@ static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev,
 	kfree(in);
 	if (err)
 		goto err_in;
-	dr_qp->mqp.event = dr_qp_event;
 	dr_qp->uar = attr->uar;
 
 	return dr_qp;
@@ -695,12 +689,6 @@ static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn)
 	return 0;
 }
 
-static void dr_cq_event(struct mlx5_core_cq *mcq,
-			enum mlx5_event event)
-{
-	pr_info("CQ event %u on CQ #%u\n", event, mcq->cqn);
-}
-
 static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev,
 				      struct mlx5_uars_page *uar,
 				      size_t ncqe)
@@ -761,8 +749,6 @@ static struct mlx5dr_cq *dr_create_cq(struct mlx5_core_dev *mdev,
 	pas = (__be64 *)MLX5_ADDR_OF(create_cq_in, in, pas);
 	mlx5_fill_page_frag_array(&cq->wq_ctrl.buf, pas);
 
-	cq->mcq.event = dr_cq_event;
-
 	err = mlx5_core_create_cq(mdev, &cq->mcq, in, inlen, out, sizeof(out));
 	kvfree(in);
 
-- 
cgit v1.2.3-59-g8ed1b


From acab4b88e93ceb467623b991bb4b1594c893667a Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Sat, 4 Apr 2020 18:25:48 +0300
Subject: net/mlx5: Open-code modify QP in steering module

Remove dependency on qp.c from SW steering by open
coding modify QP interface.

Reviewed-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 .../ethernet/mellanox/mlx5/core/steering/dr_send.c    | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
index 690e4181db4c..266b913d2f9a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
@@ -585,8 +585,10 @@ static int dr_modify_qp_rst2init(struct mlx5_core_dev *mdev,
 	MLX5_SET(qpc, qpc, rre, 1);
 	MLX5_SET(qpc, qpc, rwe, 1);
 
-	return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RST2INIT_QP, 0, qpc,
-				   &dr_qp->mqp);
+	MLX5_SET(rst2init_qp_in, in, opcode, MLX5_CMD_OP_RST2INIT_QP);
+	MLX5_SET(rst2init_qp_in, in, qpn, dr_qp->mqp.qpn);
+
+	return mlx5_cmd_exec_in(mdev, rst2init_qp, in);
 }
 
 static int dr_cmd_modify_qp_rtr2rts(struct mlx5_core_dev *mdev,
@@ -600,12 +602,13 @@ static int dr_cmd_modify_qp_rtr2rts(struct mlx5_core_dev *mdev,
 
 	MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->mqp.qpn);
 
-	MLX5_SET(qpc, qpc, log_ack_req_freq, 0);
 	MLX5_SET(qpc, qpc, retry_count, attr->retry_cnt);
 	MLX5_SET(qpc, qpc, rnr_retry, attr->rnr_retry);
 
-	return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RTR2RTS_QP, 0, qpc,
-				   &dr_qp->mqp);
+	MLX5_SET(rtr2rts_qp_in, in, opcode, MLX5_CMD_OP_RTR2RTS_QP);
+	MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->mqp.qpn);
+
+	return mlx5_cmd_exec_in(mdev, rtr2rts_qp, in);
 }
 
 static int dr_cmd_modify_qp_init2rtr(struct mlx5_core_dev *mdev,
@@ -636,8 +639,10 @@ static int dr_cmd_modify_qp_init2rtr(struct mlx5_core_dev *mdev,
 	MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, attr->port_num);
 	MLX5_SET(qpc, qpc, min_rnr_nak, 1);
 
-	return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_INIT2RTR_QP, 0, qpc,
-				   &dr_qp->mqp);
+	MLX5_SET(init2rtr_qp_in, in, opcode, MLX5_CMD_OP_INIT2RTR_QP);
+	MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->mqp.qpn);
+
+	return mlx5_cmd_exec_in(mdev, init2rtr_qp, in);
 }
 
 static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn)
-- 
cgit v1.2.3-59-g8ed1b


From a6532fd925b981863161275dea6fd26b2e2c02e4 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Mon, 6 Apr 2020 09:42:43 +0300
Subject: net/mlx5: Open-code modify QP in the FPGA module

Remove dependency on qp.c from the FPGA by open coding
modify QP interface.

Reviewed-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/fpga/conn.c    | 84 ++++++++--------------
 1 file changed, 28 insertions(+), 56 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
index 1d49894399af..b00d834d2dbf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
@@ -657,30 +657,29 @@ static void mlx5_fpga_conn_destroy_qp(struct mlx5_fpga_conn *conn)
 	mlx5_wq_destroy(&conn->qp.wq_ctrl);
 }
 
-static inline int mlx5_fpga_conn_reset_qp(struct mlx5_fpga_conn *conn)
+static int mlx5_fpga_conn_reset_qp(struct mlx5_fpga_conn *conn)
 {
 	struct mlx5_core_dev *mdev = conn->fdev->mdev;
+	u32 in[MLX5_ST_SZ_DW(qp_2rst_in)] = {};
 
 	mlx5_fpga_dbg(conn->fdev, "Modifying QP %u to RST\n", conn->qp.mqp.qpn);
 
-	return mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2RST_QP, 0, NULL,
-				   &conn->qp.mqp);
+	MLX5_SET(qp_2rst_in, in, opcode, MLX5_CMD_OP_2RST_QP);
+	MLX5_SET(qp_2rst_in, in, qpn, conn->qp.mqp.qpn);
+
+	return mlx5_cmd_exec_in(mdev, qp_2rst, in);
 }
 
-static inline int mlx5_fpga_conn_init_qp(struct mlx5_fpga_conn *conn)
+static int mlx5_fpga_conn_init_qp(struct mlx5_fpga_conn *conn)
 {
+	u32 in[MLX5_ST_SZ_DW(rst2init_qp_in)] = {};
 	struct mlx5_fpga_device *fdev = conn->fdev;
 	struct mlx5_core_dev *mdev = fdev->mdev;
-	u32 *qpc = NULL;
-	int err;
+	u32 *qpc;
 
 	mlx5_fpga_dbg(conn->fdev, "Modifying QP %u to INIT\n", conn->qp.mqp.qpn);
 
-	qpc = kzalloc(MLX5_ST_SZ_BYTES(qpc), GFP_KERNEL);
-	if (!qpc) {
-		err = -ENOMEM;
-		goto out;
-	}
+	qpc = MLX5_ADDR_OF(rst2init_qp_in, in, qpc);
 
 	MLX5_SET(qpc, qpc, st, MLX5_QP_ST_RC);
 	MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
@@ -691,32 +690,22 @@ static inline int mlx5_fpga_conn_init_qp(struct mlx5_fpga_conn *conn)
 	MLX5_SET(qpc, qpc, cqn_rcv, conn->cq.mcq.cqn);
 	MLX5_SET64(qpc, qpc, dbr_addr, conn->qp.wq_ctrl.db.dma);
 
-	err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RST2INIT_QP, 0, qpc,
-				  &conn->qp.mqp);
-	if (err) {
-		mlx5_fpga_warn(fdev, "qp_modify RST2INIT failed: %d\n", err);
-		goto out;
-	}
+	MLX5_SET(rst2init_qp_in, in, opcode, MLX5_CMD_OP_RST2INIT_QP);
+	MLX5_SET(rst2init_qp_in, in, qpn, conn->qp.mqp.qpn);
 
-out:
-	kfree(qpc);
-	return err;
+	return mlx5_cmd_exec_in(mdev, rst2init_qp, in);
 }
 
-static inline int mlx5_fpga_conn_rtr_qp(struct mlx5_fpga_conn *conn)
+static int mlx5_fpga_conn_rtr_qp(struct mlx5_fpga_conn *conn)
 {
+	u32 in[MLX5_ST_SZ_DW(init2rtr_qp_in)] = {};
 	struct mlx5_fpga_device *fdev = conn->fdev;
 	struct mlx5_core_dev *mdev = fdev->mdev;
-	u32 *qpc = NULL;
-	int err;
+	u32 *qpc;
 
 	mlx5_fpga_dbg(conn->fdev, "QP RTR\n");
 
-	qpc = kzalloc(MLX5_ST_SZ_BYTES(qpc), GFP_KERNEL);
-	if (!qpc) {
-		err = -ENOMEM;
-		goto out;
-	}
+	qpc = MLX5_ADDR_OF(init2rtr_qp_in, in, qpc);
 
 	MLX5_SET(qpc, qpc, mtu, MLX5_QPC_MTU_1K_BYTES);
 	MLX5_SET(qpc, qpc, log_msg_max, (u8)MLX5_CAP_GEN(mdev, log_max_msg));
@@ -736,33 +725,22 @@ static inline int mlx5_fpga_conn_rtr_qp(struct mlx5_fpga_conn *conn)
 	       MLX5_ADDR_OF(fpga_qpc, conn->fpga_qpc, fpga_ip),
 	       MLX5_FLD_SZ_BYTES(qpc, primary_address_path.rgid_rip));
 
-	err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_INIT2RTR_QP, 0, qpc,
-				  &conn->qp.mqp);
-	if (err) {
-		mlx5_fpga_warn(fdev, "qp_modify RST2INIT failed: %d\n", err);
-		goto out;
-	}
+	MLX5_SET(init2rtr_qp_in, in, opcode, MLX5_CMD_OP_INIT2RTR_QP);
+	MLX5_SET(init2rtr_qp_in, in, qpn, conn->qp.mqp.qpn);
 
-out:
-	kfree(qpc);
-	return err;
+	return mlx5_cmd_exec_in(mdev, init2rtr_qp, in);
 }
 
-static inline int mlx5_fpga_conn_rts_qp(struct mlx5_fpga_conn *conn)
+static int mlx5_fpga_conn_rts_qp(struct mlx5_fpga_conn *conn)
 {
 	struct mlx5_fpga_device *fdev = conn->fdev;
+	u32 in[MLX5_ST_SZ_DW(rtr2rts_qp_in)] = {};
 	struct mlx5_core_dev *mdev = fdev->mdev;
-	u32 *qpc = NULL;
-	u32 opt_mask;
-	int err;
+	u32 *qpc;
 
 	mlx5_fpga_dbg(conn->fdev, "QP RTS\n");
 
-	qpc = kzalloc(MLX5_ST_SZ_BYTES(qpc), GFP_KERNEL);
-	if (!qpc) {
-		err = -ENOMEM;
-		goto out;
-	}
+	qpc = MLX5_ADDR_OF(rtr2rts_qp_in, in, qpc);
 
 	MLX5_SET(qpc, qpc, log_ack_req_freq, 8);
 	MLX5_SET(qpc, qpc, min_rnr_nak, 0x12);
@@ -772,17 +750,11 @@ static inline int mlx5_fpga_conn_rts_qp(struct mlx5_fpga_conn *conn)
 	MLX5_SET(qpc, qpc, retry_count, 7);
 	MLX5_SET(qpc, qpc, rnr_retry, 7); /* Infinite retry if RNR NACK */
 
-	opt_mask = MLX5_QP_OPTPAR_RNR_TIMEOUT;
-	err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RTR2RTS_QP, opt_mask, qpc,
-				  &conn->qp.mqp);
-	if (err) {
-		mlx5_fpga_warn(fdev, "qp_modify RST2INIT failed: %d\n", err);
-		goto out;
-	}
+	MLX5_SET(rtr2rts_qp_in, in, opcode, MLX5_CMD_OP_RTR2RTS_QP);
+	MLX5_SET(rtr2rts_qp_in, in, qpn, conn->qp.mqp.qpn);
+	MLX5_SET(rtr2rts_qp_in, in, opt_param_mask, MLX5_QP_OPTPAR_RNR_TIMEOUT);
 
-out:
-	kfree(qpc);
-	return err;
+	return mlx5_cmd_exec_in(mdev, rtr2rts_qp, in);
 }
 
 static int mlx5_fpga_conn_connect(struct mlx5_fpga_conn *conn)
-- 
cgit v1.2.3-59-g8ed1b


From a452e0e43669d5223f3f2264d0d4f08acdba98c0 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Mon, 6 Apr 2020 13:43:14 +0300
Subject: net/mlx5: Open-code modify QP in the IPoIB module

Remove dependency on qp.c from the IPoIB by open coding
modify QP interface.

Reviewed-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c  | 76 ++++++++++++----------
 1 file changed, 42 insertions(+), 34 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index 8bca11cb1e19..83b198d8e3d6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -161,44 +161,54 @@ int mlx5i_init_underlay_qp(struct mlx5e_priv *priv)
 	struct mlx5_core_dev *mdev = priv->mdev;
 	struct mlx5i_priv *ipriv = priv->ppriv;
 	struct mlx5_core_qp *qp = &ipriv->qp;
-	struct mlx5_qp_context *context;
 	int ret;
 
-	/* QP states */
-	context = kzalloc(sizeof(*context), GFP_KERNEL);
-	if (!context)
-		return -ENOMEM;
+	{
+		u32 in[MLX5_ST_SZ_DW(rst2init_qp_in)] = {};
+		u32 *qpc;
 
-	context->flags = cpu_to_be32(MLX5_QP_PM_MIGRATED << 11);
-	context->pri_path.port = 1;
-	context->pri_path.pkey_index = cpu_to_be16(ipriv->pkey_index);
-	context->qkey = cpu_to_be32(IB_DEFAULT_Q_KEY);
+		qpc = MLX5_ADDR_OF(rst2init_qp_in, in, qpc);
 
-	ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RST2INIT_QP, 0, context, qp);
-	if (ret) {
-		mlx5_core_err(mdev, "Failed to modify qp RST2INIT, err: %d\n", ret);
-		goto err_qp_modify_to_err;
-	}
-	memset(context, 0, sizeof(*context));
+		MLX5_SET(qpc, qpc, pm_state, MLX5_QP_PM_MIGRATED);
+		MLX5_SET(qpc, qpc, primary_address_path.pkey_index,
+			 ipriv->pkey_index);
+		MLX5_SET(qpc, qpc, primary_address_path.vhca_port_num, 1);
+		MLX5_SET(qpc, qpc, q_key, IB_DEFAULT_Q_KEY);
 
-	ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_INIT2RTR_QP, 0, context, qp);
-	if (ret) {
-		mlx5_core_err(mdev, "Failed to modify qp INIT2RTR, err: %d\n", ret);
-		goto err_qp_modify_to_err;
+		MLX5_SET(rst2init_qp_in, in, opcode, MLX5_CMD_OP_RST2INIT_QP);
+		MLX5_SET(rst2init_qp_in, in, qpn, qp->qpn);
+		ret = mlx5_cmd_exec_in(mdev, rst2init_qp, in);
+		if (ret)
+			goto err_qp_modify_to_err;
 	}
-
-	ret = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_RTR2RTS_QP, 0, context, qp);
-	if (ret) {
-		mlx5_core_err(mdev, "Failed to modify qp RTR2RTS, err: %d\n", ret);
-		goto err_qp_modify_to_err;
+	{
+		u32 in[MLX5_ST_SZ_DW(init2rtr_qp_in)] = {};
+
+		MLX5_SET(init2rtr_qp_in, in, opcode, MLX5_CMD_OP_INIT2RTR_QP);
+		MLX5_SET(init2rtr_qp_in, in, qpn, qp->qpn);
+		ret = mlx5_cmd_exec_in(mdev, init2rtr_qp, in);
+		if (ret)
+			goto err_qp_modify_to_err;
+	}
+	{
+		u32 in[MLX5_ST_SZ_DW(rtr2rts_qp_in)] = {};
+
+		MLX5_SET(rtr2rts_qp_in, in, opcode, MLX5_CMD_OP_RTR2RTS_QP);
+		MLX5_SET(rtr2rts_qp_in, in, qpn, qp->qpn);
+		ret = mlx5_cmd_exec_in(mdev, rtr2rts_qp, in);
+		if (ret)
+			goto err_qp_modify_to_err;
 	}
-
-	kfree(context);
 	return 0;
 
 err_qp_modify_to_err:
-	mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2ERR_QP, 0, &context, qp);
-	kfree(context);
+	{
+		u32 in[MLX5_ST_SZ_DW(qp_2err_in)] = {};
+
+		MLX5_SET(qp_2err_in, in, opcode, MLX5_CMD_OP_2ERR_QP);
+		MLX5_SET(qp_2err_in, in, qpn, qp->qpn);
+		mlx5_cmd_exec_in(mdev, qp_2err, in);
+	}
 	return ret;
 }
 
@@ -206,13 +216,11 @@ void mlx5i_uninit_underlay_qp(struct mlx5e_priv *priv)
 {
 	struct mlx5i_priv *ipriv = priv->ppriv;
 	struct mlx5_core_dev *mdev = priv->mdev;
-	struct mlx5_qp_context context;
-	int err;
+	u32 in[MLX5_ST_SZ_DW(qp_2rst_in)] = {};
 
-	err = mlx5_core_qp_modify(mdev, MLX5_CMD_OP_2RST_QP, 0, &context,
-				  &ipriv->qp);
-	if (err)
-		mlx5_core_err(mdev, "Failed to modify qp 2RST, err: %d\n", err);
+	MLX5_SET(qp_2rst_in, in, opcode, MLX5_CMD_OP_2RST_QP);
+	MLX5_SET(qp_2rst_in, in, qpn, ipriv->qp.qpn);
+	mlx5_cmd_exec_in(mdev, qp_2rst, in);
 }
 
 #define MLX5_QP_ENHANCED_ULP_STATELESS_MODE 2
-- 
cgit v1.2.3-59-g8ed1b


From f93f4f4f31492468d5c6903e35cc0e31a9cb2c48 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Mon, 6 Apr 2020 11:17:44 +0300
Subject: net/mlx5: Remove extra indirection while storing QPN

The FPGA, SW steering and IPoIB need to have only QPN from the
mlx5_core_qp struct, so reduce memory footprint by storing QPN
directly.

Reviewed-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/fpga/conn.c    | 23 ++++-----
 .../net/ethernet/mellanox/mlx5/core/fpga/conn.h    |  2 +-
 .../net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c  | 60 +++++++++++-----------
 .../net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h  |  6 +--
 .../ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c | 19 +++----
 .../ethernet/mellanox/mlx5/core/steering/dr_send.c | 19 ++++---
 .../mellanox/mlx5/core/steering/dr_types.h         |  2 +-
 7 files changed, 64 insertions(+), 67 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
index b00d834d2dbf..182d3ac3e73f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.c
@@ -165,7 +165,7 @@ static void mlx5_fpga_conn_post_send(struct mlx5_fpga_conn *conn,
 	ctrl->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
 	ctrl->opmod_idx_opcode = cpu_to_be32(((conn->qp.sq.pc & 0xffff) << 8) |
 					     MLX5_OPCODE_SEND);
-	ctrl->qpn_ds = cpu_to_be32(size | (conn->qp.mqp.qpn << 8));
+	ctrl->qpn_ds = cpu_to_be32(size | (conn->qp.qpn << 8));
 
 	conn->qp.sq.pc++;
 	conn->qp.sq.bufs[ix] = buf;
@@ -588,8 +588,8 @@ static int mlx5_fpga_conn_create_qp(struct mlx5_fpga_conn *conn,
 	if (err)
 		goto err_sq_bufs;
 
-	conn->qp.mqp.qpn = MLX5_GET(create_qp_out, out, qpn);
-	mlx5_fpga_dbg(fdev, "Created QP #0x%x\n", conn->qp.mqp.qpn);
+	conn->qp.qpn = MLX5_GET(create_qp_out, out, qpn);
+	mlx5_fpga_dbg(fdev, "Created QP #0x%x\n", conn->qp.qpn);
 
 	goto out;
 
@@ -644,10 +644,9 @@ static void mlx5_fpga_conn_destroy_qp(struct mlx5_fpga_conn *conn)
 {
 	struct mlx5_core_dev *dev = conn->fdev->mdev;
 	u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
-	struct mlx5_core_qp *qp = &conn->qp.mqp;
 
 	MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
-	MLX5_SET(destroy_qp_in, in, qpn, qp->qpn);
+	MLX5_SET(destroy_qp_in, in, qpn, conn->qp.qpn);
 	mlx5_cmd_exec_in(dev, destroy_qp, in);
 
 	mlx5_fpga_conn_free_recv_bufs(conn);
@@ -662,10 +661,10 @@ static int mlx5_fpga_conn_reset_qp(struct mlx5_fpga_conn *conn)
 	struct mlx5_core_dev *mdev = conn->fdev->mdev;
 	u32 in[MLX5_ST_SZ_DW(qp_2rst_in)] = {};
 
-	mlx5_fpga_dbg(conn->fdev, "Modifying QP %u to RST\n", conn->qp.mqp.qpn);
+	mlx5_fpga_dbg(conn->fdev, "Modifying QP %u to RST\n", conn->qp.qpn);
 
 	MLX5_SET(qp_2rst_in, in, opcode, MLX5_CMD_OP_2RST_QP);
-	MLX5_SET(qp_2rst_in, in, qpn, conn->qp.mqp.qpn);
+	MLX5_SET(qp_2rst_in, in, qpn, conn->qp.qpn);
 
 	return mlx5_cmd_exec_in(mdev, qp_2rst, in);
 }
@@ -677,7 +676,7 @@ static int mlx5_fpga_conn_init_qp(struct mlx5_fpga_conn *conn)
 	struct mlx5_core_dev *mdev = fdev->mdev;
 	u32 *qpc;
 
-	mlx5_fpga_dbg(conn->fdev, "Modifying QP %u to INIT\n", conn->qp.mqp.qpn);
+	mlx5_fpga_dbg(conn->fdev, "Modifying QP %u to INIT\n", conn->qp.qpn);
 
 	qpc = MLX5_ADDR_OF(rst2init_qp_in, in, qpc);
 
@@ -691,7 +690,7 @@ static int mlx5_fpga_conn_init_qp(struct mlx5_fpga_conn *conn)
 	MLX5_SET64(qpc, qpc, dbr_addr, conn->qp.wq_ctrl.db.dma);
 
 	MLX5_SET(rst2init_qp_in, in, opcode, MLX5_CMD_OP_RST2INIT_QP);
-	MLX5_SET(rst2init_qp_in, in, qpn, conn->qp.mqp.qpn);
+	MLX5_SET(rst2init_qp_in, in, qpn, conn->qp.qpn);
 
 	return mlx5_cmd_exec_in(mdev, rst2init_qp, in);
 }
@@ -726,7 +725,7 @@ static int mlx5_fpga_conn_rtr_qp(struct mlx5_fpga_conn *conn)
 	       MLX5_FLD_SZ_BYTES(qpc, primary_address_path.rgid_rip));
 
 	MLX5_SET(init2rtr_qp_in, in, opcode, MLX5_CMD_OP_INIT2RTR_QP);
-	MLX5_SET(init2rtr_qp_in, in, qpn, conn->qp.mqp.qpn);
+	MLX5_SET(init2rtr_qp_in, in, qpn, conn->qp.qpn);
 
 	return mlx5_cmd_exec_in(mdev, init2rtr_qp, in);
 }
@@ -751,7 +750,7 @@ static int mlx5_fpga_conn_rts_qp(struct mlx5_fpga_conn *conn)
 	MLX5_SET(qpc, qpc, rnr_retry, 7); /* Infinite retry if RNR NACK */
 
 	MLX5_SET(rtr2rts_qp_in, in, opcode, MLX5_CMD_OP_RTR2RTS_QP);
-	MLX5_SET(rtr2rts_qp_in, in, qpn, conn->qp.mqp.qpn);
+	MLX5_SET(rtr2rts_qp_in, in, qpn, conn->qp.qpn);
 	MLX5_SET(rtr2rts_qp_in, in, opt_param_mask, MLX5_QP_OPTPAR_RNR_TIMEOUT);
 
 	return mlx5_cmd_exec_in(mdev, rtr2rts_qp, in);
@@ -894,7 +893,7 @@ struct mlx5_fpga_conn *mlx5_fpga_conn_create(struct mlx5_fpga_device *fdev,
 	MLX5_SET(fpga_qpc, conn->fpga_qpc, next_rcv_psn, 1);
 	MLX5_SET(fpga_qpc, conn->fpga_qpc, next_send_psn, 0);
 	MLX5_SET(fpga_qpc, conn->fpga_qpc, pkey, MLX5_FPGA_PKEY);
-	MLX5_SET(fpga_qpc, conn->fpga_qpc, remote_qpn, conn->qp.mqp.qpn);
+	MLX5_SET(fpga_qpc, conn->fpga_qpc, remote_qpn, conn->qp.qpn);
 	MLX5_SET(fpga_qpc, conn->fpga_qpc, rnr_retry, 7);
 	MLX5_SET(fpga_qpc, conn->fpga_qpc, retry_count, 7);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h
index 634ae10e287b..5116e869a6e4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/conn.h
@@ -65,7 +65,7 @@ struct mlx5_fpga_conn {
 		int sgid_index;
 		struct mlx5_wq_qp wq;
 		struct mlx5_wq_ctrl wq_ctrl;
-		struct mlx5_core_qp mqp;
+		u32 qpn;
 		struct {
 			spinlock_t lock; /* Protects all SQ state */
 			unsigned int pc;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index 83b198d8e3d6..068578be00f1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -160,7 +160,6 @@ int mlx5i_init_underlay_qp(struct mlx5e_priv *priv)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
 	struct mlx5i_priv *ipriv = priv->ppriv;
-	struct mlx5_core_qp *qp = &ipriv->qp;
 	int ret;
 
 	{
@@ -176,7 +175,7 @@ int mlx5i_init_underlay_qp(struct mlx5e_priv *priv)
 		MLX5_SET(qpc, qpc, q_key, IB_DEFAULT_Q_KEY);
 
 		MLX5_SET(rst2init_qp_in, in, opcode, MLX5_CMD_OP_RST2INIT_QP);
-		MLX5_SET(rst2init_qp_in, in, qpn, qp->qpn);
+		MLX5_SET(rst2init_qp_in, in, qpn, ipriv->qpn);
 		ret = mlx5_cmd_exec_in(mdev, rst2init_qp, in);
 		if (ret)
 			goto err_qp_modify_to_err;
@@ -185,7 +184,7 @@ int mlx5i_init_underlay_qp(struct mlx5e_priv *priv)
 		u32 in[MLX5_ST_SZ_DW(init2rtr_qp_in)] = {};
 
 		MLX5_SET(init2rtr_qp_in, in, opcode, MLX5_CMD_OP_INIT2RTR_QP);
-		MLX5_SET(init2rtr_qp_in, in, qpn, qp->qpn);
+		MLX5_SET(init2rtr_qp_in, in, qpn, ipriv->qpn);
 		ret = mlx5_cmd_exec_in(mdev, init2rtr_qp, in);
 		if (ret)
 			goto err_qp_modify_to_err;
@@ -194,7 +193,7 @@ int mlx5i_init_underlay_qp(struct mlx5e_priv *priv)
 		u32 in[MLX5_ST_SZ_DW(rtr2rts_qp_in)] = {};
 
 		MLX5_SET(rtr2rts_qp_in, in, opcode, MLX5_CMD_OP_RTR2RTS_QP);
-		MLX5_SET(rtr2rts_qp_in, in, qpn, qp->qpn);
+		MLX5_SET(rtr2rts_qp_in, in, qpn, ipriv->qpn);
 		ret = mlx5_cmd_exec_in(mdev, rtr2rts_qp, in);
 		if (ret)
 			goto err_qp_modify_to_err;
@@ -206,7 +205,7 @@ err_qp_modify_to_err:
 		u32 in[MLX5_ST_SZ_DW(qp_2err_in)] = {};
 
 		MLX5_SET(qp_2err_in, in, opcode, MLX5_CMD_OP_2ERR_QP);
-		MLX5_SET(qp_2err_in, in, qpn, qp->qpn);
+		MLX5_SET(qp_2err_in, in, qpn, ipriv->qpn);
 		mlx5_cmd_exec_in(mdev, qp_2err, in);
 	}
 	return ret;
@@ -219,16 +218,17 @@ void mlx5i_uninit_underlay_qp(struct mlx5e_priv *priv)
 	u32 in[MLX5_ST_SZ_DW(qp_2rst_in)] = {};
 
 	MLX5_SET(qp_2rst_in, in, opcode, MLX5_CMD_OP_2RST_QP);
-	MLX5_SET(qp_2rst_in, in, qpn, ipriv->qp.qpn);
+	MLX5_SET(qp_2rst_in, in, qpn, ipriv->qpn);
 	mlx5_cmd_exec_in(mdev, qp_2rst, in);
 }
 
 #define MLX5_QP_ENHANCED_ULP_STATELESS_MODE 2
 
-int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp)
+int mlx5i_create_underlay_qp(struct mlx5e_priv *priv)
 {
 	u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
 	u32 in[MLX5_ST_SZ_DW(create_qp_in)] = {};
+	struct mlx5i_priv *ipriv = priv->ppriv;
 	void *addr_path;
 	int ret = 0;
 	void *qpc;
@@ -244,21 +244,21 @@ int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp
 	MLX5_SET(ads, addr_path, grh, 1);
 
 	MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
-	ret = mlx5_cmd_exec_inout(mdev, create_qp, in, out);
+	ret = mlx5_cmd_exec_inout(priv->mdev, create_qp, in, out);
 	if (ret)
 		return ret;
 
-	qp->qpn = MLX5_GET(create_qp_out, out, qpn);
+	ipriv->qpn = MLX5_GET(create_qp_out, out, qpn);
 
 	return 0;
 }
 
-void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp)
+void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, u32 qpn)
 {
 	u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
 
 	MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
-	MLX5_SET(destroy_qp_in, in, qpn, qp->qpn);
+	MLX5_SET(destroy_qp_in, in, qpn, qpn);
 	mlx5_cmd_exec_in(mdev, destroy_qp, in);
 }
 
@@ -279,13 +279,13 @@ static int mlx5i_init_tx(struct mlx5e_priv *priv)
 	struct mlx5i_priv *ipriv = priv->ppriv;
 	int err;
 
-	err = mlx5i_create_underlay_qp(priv->mdev, &ipriv->qp);
+	err = mlx5i_create_underlay_qp(priv);
 	if (err) {
 		mlx5_core_warn(priv->mdev, "create underlay QP failed, %d\n", err);
 		return err;
 	}
 
-	err = mlx5i_create_tis(priv->mdev, ipriv->qp.qpn, &priv->tisn[0][0]);
+	err = mlx5i_create_tis(priv->mdev, ipriv->qpn, &priv->tisn[0][0]);
 	if (err) {
 		mlx5_core_warn(priv->mdev, "create tis failed, %d\n", err);
 		goto err_destroy_underlay_qp;
@@ -294,7 +294,7 @@ static int mlx5i_init_tx(struct mlx5e_priv *priv)
 	return 0;
 
 err_destroy_underlay_qp:
-	mlx5i_destroy_underlay_qp(priv->mdev, &ipriv->qp);
+	mlx5i_destroy_underlay_qp(priv->mdev, ipriv->qpn);
 	return err;
 }
 
@@ -303,7 +303,7 @@ static void mlx5i_cleanup_tx(struct mlx5e_priv *priv)
 	struct mlx5i_priv *ipriv = priv->ppriv;
 
 	mlx5e_destroy_tis(priv->mdev, priv->tisn[0][0]);
-	mlx5i_destroy_underlay_qp(priv->mdev, &ipriv->qp);
+	mlx5i_destroy_underlay_qp(priv->mdev, ipriv->qpn);
 }
 
 static int mlx5i_create_flow_steering(struct mlx5e_priv *priv)
@@ -506,12 +506,12 @@ int mlx5i_dev_init(struct net_device *dev)
 	struct mlx5i_priv    *ipriv  = priv->ppriv;
 
 	/* Set dev address using underlay QP */
-	dev->dev_addr[1] = (ipriv->qp.qpn >> 16) & 0xff;
-	dev->dev_addr[2] = (ipriv->qp.qpn >>  8) & 0xff;
-	dev->dev_addr[3] = (ipriv->qp.qpn) & 0xff;
+	dev->dev_addr[1] = (ipriv->qpn >> 16) & 0xff;
+	dev->dev_addr[2] = (ipriv->qpn >>  8) & 0xff;
+	dev->dev_addr[3] = (ipriv->qpn) & 0xff;
 
 	/* Add QPN to net-device mapping to HT */
-	mlx5i_pkey_add_qpn(dev ,ipriv->qp.qpn);
+	mlx5i_pkey_add_qpn(dev, ipriv->qpn);
 
 	return 0;
 }
@@ -538,7 +538,7 @@ void mlx5i_dev_cleanup(struct net_device *dev)
 	mlx5i_uninit_underlay_qp(priv);
 
 	/* Delete QPN to net-device mapping from HT */
-	mlx5i_pkey_del_qpn(dev, ipriv->qp.qpn);
+	mlx5i_pkey_del_qpn(dev, ipriv->qpn);
 }
 
 static int mlx5i_open(struct net_device *netdev)
@@ -558,7 +558,7 @@ static int mlx5i_open(struct net_device *netdev)
 		goto err_clear_state_opened_flag;
 	}
 
-	err = mlx5_fs_add_rx_underlay_qpn(mdev, ipriv->qp.qpn);
+	err = mlx5_fs_add_rx_underlay_qpn(mdev, ipriv->qpn);
 	if (err) {
 		mlx5_core_warn(mdev, "attach underlay qp to ft failed, %d\n", err);
 		goto err_reset_qp;
@@ -575,7 +575,7 @@ static int mlx5i_open(struct net_device *netdev)
 	return 0;
 
 err_remove_fs_underlay_qp:
-	mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qp.qpn);
+	mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qpn);
 err_reset_qp:
 	mlx5i_uninit_underlay_qp(epriv);
 err_clear_state_opened_flag:
@@ -601,7 +601,7 @@ static int mlx5i_close(struct net_device *netdev)
 	clear_bit(MLX5E_STATE_OPENED, &epriv->state);
 
 	netif_carrier_off(epriv->netdev);
-	mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qp.qpn);
+	mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qpn);
 	mlx5e_deactivate_priv_channels(epriv);
 	mlx5e_close_channels(&epriv->channels);
 	mlx5i_uninit_underlay_qp(epriv);
@@ -620,11 +620,12 @@ static int mlx5i_attach_mcast(struct net_device *netdev, struct ib_device *hca,
 	struct mlx5i_priv    *ipriv = epriv->ppriv;
 	int err;
 
-	mlx5_core_dbg(mdev, "attaching QPN 0x%x, MGID %pI6\n", ipriv->qp.qpn, gid->raw);
-	err = mlx5_core_attach_mcg(mdev, gid, ipriv->qp.qpn);
+	mlx5_core_dbg(mdev, "attaching QPN 0x%x, MGID %pI6\n", ipriv->qpn,
+		      gid->raw);
+	err = mlx5_core_attach_mcg(mdev, gid, ipriv->qpn);
 	if (err)
 		mlx5_core_warn(mdev, "failed attaching QPN 0x%x, MGID %pI6\n",
-			       ipriv->qp.qpn, gid->raw);
+			       ipriv->qpn, gid->raw);
 
 	if (set_qkey) {
 		mlx5_core_dbg(mdev, "%s setting qkey 0x%x\n",
@@ -643,12 +644,13 @@ static int mlx5i_detach_mcast(struct net_device *netdev, struct ib_device *hca,
 	struct mlx5i_priv    *ipriv = epriv->ppriv;
 	int err;
 
-	mlx5_core_dbg(mdev, "detaching QPN 0x%x, MGID %pI6\n", ipriv->qp.qpn, gid->raw);
+	mlx5_core_dbg(mdev, "detaching QPN 0x%x, MGID %pI6\n", ipriv->qpn,
+		      gid->raw);
 
-	err = mlx5_core_detach_mcg(mdev, gid, ipriv->qp.qpn);
+	err = mlx5_core_detach_mcg(mdev, gid, ipriv->qpn);
 	if (err)
 		mlx5_core_dbg(mdev, "failed detaching QPN 0x%x, MGID %pI6\n",
-			      ipriv->qp.qpn, gid->raw);
+			      ipriv->qpn, gid->raw);
 
 	return err;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
index de7e01a027bb..3483ba642cfe 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
@@ -51,7 +51,7 @@ extern const struct ethtool_ops mlx5i_pkey_ethtool_ops;
 /* ipoib rdma netdev's private data structure */
 struct mlx5i_priv {
 	struct rdma_netdev rn; /* keep this first */
-	struct mlx5_core_qp qp;
+	u32 qpn;
 	bool   sub_interface;
 	u32    qkey;
 	u16    pkey_index;
@@ -62,8 +62,8 @@ struct mlx5i_priv {
 int mlx5i_create_tis(struct mlx5_core_dev *mdev, u32 underlay_qpn, u32 *tisn);
 
 /* Underlay QP create/destroy functions */
-int mlx5i_create_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp);
-void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, struct mlx5_core_qp *qp);
+int mlx5i_create_underlay_qp(struct mlx5e_priv *priv);
+void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, u32 qpn);
 
 /* Underlay QP state modification init/uninit functions */
 int mlx5i_init_underlay_qp(struct mlx5e_priv *priv);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
index 96e64187c089..b9af37ad40bf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
@@ -204,13 +204,13 @@ static int mlx5i_pkey_open(struct net_device *netdev)
 		goto err_release_lock;
 	}
 
-	err = mlx5_fs_add_rx_underlay_qpn(mdev, ipriv->qp.qpn);
+	err = mlx5_fs_add_rx_underlay_qpn(mdev, ipriv->qpn);
 	if (err) {
 		mlx5_core_warn(mdev, "attach child underlay qp to ft failed, %d\n", err);
 		goto err_unint_underlay_qp;
 	}
 
-	err = mlx5i_create_tis(mdev, ipriv->qp.qpn, &epriv->tisn[0][0]);
+	err = mlx5i_create_tis(mdev, ipriv->qpn, &epriv->tisn[0][0]);
 	if (err) {
 		mlx5_core_warn(mdev, "create child tis failed, %d\n", err);
 		goto err_remove_rx_uderlay_qp;
@@ -230,7 +230,7 @@ static int mlx5i_pkey_open(struct net_device *netdev)
 err_clear_state_opened_flag:
 	mlx5e_destroy_tis(mdev, epriv->tisn[0][0]);
 err_remove_rx_uderlay_qp:
-	mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qp.qpn);
+	mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qpn);
 err_unint_underlay_qp:
 	mlx5i_uninit_underlay_qp(epriv);
 err_release_lock:
@@ -253,7 +253,7 @@ static int mlx5i_pkey_close(struct net_device *netdev)
 	clear_bit(MLX5E_STATE_OPENED, &priv->state);
 
 	netif_carrier_off(priv->netdev);
-	mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qp.qpn);
+	mlx5_fs_remove_rx_underlay_qpn(mdev, ipriv->qpn);
 	mlx5i_uninit_underlay_qp(priv);
 	mlx5e_deactivate_priv_channels(priv);
 	mlx5e_close_channels(&priv->channels);
@@ -307,23 +307,20 @@ static void mlx5i_pkey_cleanup(struct mlx5e_priv *priv)
 
 static int mlx5i_pkey_init_tx(struct mlx5e_priv *priv)
 {
-	struct mlx5i_priv *ipriv = priv->ppriv;
 	int err;
 
-	err = mlx5i_create_underlay_qp(priv->mdev, &ipriv->qp);
-	if (err) {
+	err = mlx5i_create_underlay_qp(priv);
+	if (err)
 		mlx5_core_warn(priv->mdev, "create child underlay QP failed, %d\n", err);
-		return err;
-	}
 
-	return 0;
+	return err;
 }
 
 static void mlx5i_pkey_cleanup_tx(struct mlx5e_priv *priv)
 {
 	struct mlx5i_priv *ipriv = priv->ppriv;
 
-	mlx5i_destroy_underlay_qp(priv->mdev, &ipriv->qp);
+	mlx5i_destroy_underlay_qp(priv->mdev, ipriv->qpn);
 }
 
 static int mlx5i_pkey_init_rx(struct mlx5e_priv *priv)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
index 266b913d2f9a..c4ed25bb9ac8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
@@ -178,7 +178,7 @@ static struct mlx5dr_qp *dr_create_rc_qp(struct mlx5_core_dev *mdev,
 
 	MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
 	err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
-	dr_qp->mqp.qpn = MLX5_GET(create_qp_out, out, qpn);
+	dr_qp->qpn = MLX5_GET(create_qp_out, out, qpn);
 	kfree(in);
 	if (err)
 		goto err_in;
@@ -199,10 +199,9 @@ static void dr_destroy_qp(struct mlx5_core_dev *mdev,
 			  struct mlx5dr_qp *dr_qp)
 {
 	u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
-	struct mlx5_core_qp *qp = &dr_qp->mqp;
 
 	MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
-	MLX5_SET(destroy_qp_in, in, qpn, qp->qpn);
+	MLX5_SET(destroy_qp_in, in, qpn, dr_qp->qpn);
 	mlx5_cmd_exec_in(mdev, destroy_qp, in);
 
 	kfree(dr_qp->sq.wqe_head);
@@ -242,7 +241,7 @@ static void dr_rdma_segments(struct mlx5dr_qp *dr_qp, u64 remote_addr,
 		MLX5_WQE_CTRL_CQ_UPDATE : 0;
 	wq_ctrl->opmod_idx_opcode = cpu_to_be32(((dr_qp->sq.pc & 0xffff) << 8) |
 						opcode);
-	wq_ctrl->qpn_ds = cpu_to_be32(size | dr_qp->mqp.qpn << 8);
+	wq_ctrl->qpn_ds = cpu_to_be32(size | dr_qp->qpn << 8);
 	wq_raddr = (void *)(wq_ctrl + 1);
 	wq_raddr->raddr = cpu_to_be64(remote_addr);
 	wq_raddr->rkey = cpu_to_be32(rkey);
@@ -586,7 +585,7 @@ static int dr_modify_qp_rst2init(struct mlx5_core_dev *mdev,
 	MLX5_SET(qpc, qpc, rwe, 1);
 
 	MLX5_SET(rst2init_qp_in, in, opcode, MLX5_CMD_OP_RST2INIT_QP);
-	MLX5_SET(rst2init_qp_in, in, qpn, dr_qp->mqp.qpn);
+	MLX5_SET(rst2init_qp_in, in, qpn, dr_qp->qpn);
 
 	return mlx5_cmd_exec_in(mdev, rst2init_qp, in);
 }
@@ -600,13 +599,13 @@ static int dr_cmd_modify_qp_rtr2rts(struct mlx5_core_dev *mdev,
 
 	qpc  = MLX5_ADDR_OF(rtr2rts_qp_in, in, qpc);
 
-	MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->mqp.qpn);
+	MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->qpn);
 
 	MLX5_SET(qpc, qpc, retry_count, attr->retry_cnt);
 	MLX5_SET(qpc, qpc, rnr_retry, attr->rnr_retry);
 
 	MLX5_SET(rtr2rts_qp_in, in, opcode, MLX5_CMD_OP_RTR2RTS_QP);
-	MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->mqp.qpn);
+	MLX5_SET(rtr2rts_qp_in, in, qpn, dr_qp->qpn);
 
 	return mlx5_cmd_exec_in(mdev, rtr2rts_qp, in);
 }
@@ -620,7 +619,7 @@ static int dr_cmd_modify_qp_init2rtr(struct mlx5_core_dev *mdev,
 
 	qpc = MLX5_ADDR_OF(init2rtr_qp_in, in, qpc);
 
-	MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->mqp.qpn);
+	MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->qpn);
 
 	MLX5_SET(qpc, qpc, mtu, attr->mtu);
 	MLX5_SET(qpc, qpc, log_msg_max, DR_CHUNK_SIZE_MAX - 1);
@@ -640,7 +639,7 @@ static int dr_cmd_modify_qp_init2rtr(struct mlx5_core_dev *mdev,
 	MLX5_SET(qpc, qpc, min_rnr_nak, 1);
 
 	MLX5_SET(init2rtr_qp_in, in, opcode, MLX5_CMD_OP_INIT2RTR_QP);
-	MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->mqp.qpn);
+	MLX5_SET(init2rtr_qp_in, in, qpn, dr_qp->qpn);
 
 	return mlx5_cmd_exec_in(mdev, init2rtr_qp, in);
 }
@@ -668,7 +667,7 @@ static int dr_prepare_qp_to_rts(struct mlx5dr_domain *dmn)
 		return ret;
 
 	rtr_attr.mtu		= mtu;
-	rtr_attr.qp_num		= dr_qp->mqp.qpn;
+	rtr_attr.qp_num		= dr_qp->qpn;
 	rtr_attr.min_rnr_timer	= 12;
 	rtr_attr.port_num	= port;
 	rtr_attr.sgid_index	= gid_index;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
index 3fa739951b34..984783238baa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
@@ -990,7 +990,7 @@ struct mlx5dr_qp {
 	struct mlx5_wq_qp wq;
 	struct mlx5_uars_page *uar;
 	struct mlx5_wq_ctrl wq_ctrl;
-	struct mlx5_core_qp mqp;
+	u32 qpn;
 	struct {
 		unsigned int pc;
 		unsigned int cc;
-- 
cgit v1.2.3-59-g8ed1b


From 57a6c5e992f5d6ab92764a7eaaba855f6d4b2df8 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Mon, 6 Apr 2020 15:53:06 +0300
Subject: net/mlx5: Replace hand written QP context struct with automatic
 getters

By changing debugfs to not use any QP related API, convert the debugfs
code to use MLX5_GET() directly to access QP context instead of hand
written struct.

Reviewed-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/debugfs.c | 51 +++++++++--------------
 1 file changed, 20 insertions(+), 31 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
index 04854e5fbcd7..d40c3d5bd496 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
@@ -202,42 +202,37 @@ void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev)
 static u64 qp_read_field(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp,
 			 int index, int *is_str)
 {
-	int outlen = MLX5_ST_SZ_BYTES(query_qp_out);
-	struct mlx5_qp_context *ctx;
+	u32 out[MLX5_ST_SZ_BYTES(query_qp_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(query_qp_in)] = {};
 	u64 param = 0;
-	u32 *out;
+	int state;
+	u32 *qpc;
 	int err;
-	int no_sq;
 
-	out = kzalloc(outlen, GFP_KERNEL);
-	if (!out)
-		return param;
-
-	err = mlx5_core_qp_query(dev, qp, out, outlen);
-	if (err) {
-		mlx5_core_warn(dev, "failed to query qp err=%d\n", err);
-		goto out;
-	}
+	MLX5_SET(query_qp_in, in, opcode, MLX5_CMD_OP_QUERY_QP);
+	MLX5_SET(query_qp_in, in, qpn, qp->qpn);
+	err = mlx5_cmd_exec_inout(dev, query_qp, in, out);
+	if (err)
+		return 0;
 
 	*is_str = 0;
 
-	/* FIXME: use MLX5_GET rather than mlx5_qp_context manual struct */
-	ctx = (struct mlx5_qp_context *)MLX5_ADDR_OF(query_qp_out, out, qpc);
-
+	qpc = MLX5_ADDR_OF(query_qp_out, out, qpc);
 	switch (index) {
 	case QP_PID:
 		param = qp->pid;
 		break;
 	case QP_STATE:
-		param = (unsigned long)mlx5_qp_state_str(be32_to_cpu(ctx->flags) >> 28);
+		state = MLX5_GET(qpc, qpc, state);
+		param = (unsigned long)mlx5_qp_state_str(state);
 		*is_str = 1;
 		break;
 	case QP_XPORT:
-		param = (unsigned long)mlx5_qp_type_str((be32_to_cpu(ctx->flags) >> 16) & 0xff);
+		param = (unsigned long)mlx5_qp_type_str(MLX5_GET(qpc, qpc, st));
 		*is_str = 1;
 		break;
 	case QP_MTU:
-		switch (ctx->mtu_msgmax >> 5) {
+		switch (MLX5_GET(qpc, qpc, mtu)) {
 		case IB_MTU_256:
 			param = 256;
 			break;
@@ -258,29 +253,23 @@ static u64 qp_read_field(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp,
 		}
 		break;
 	case QP_N_RECV:
-		param = 1 << ((ctx->rq_size_stride >> 3) & 0xf);
+		param = 1 << MLX5_GET(qpc, qpc, log_rq_size);
 		break;
 	case QP_RECV_SZ:
-		param = 1 << ((ctx->rq_size_stride & 7) + 4);
+		param = 1 << (MLX5_GET(qpc, qpc, log_rq_stride) + 4);
 		break;
 	case QP_N_SEND:
-		no_sq = be16_to_cpu(ctx->sq_crq_size) >> 15;
-		if (!no_sq)
-			param = 1 << (be16_to_cpu(ctx->sq_crq_size) >> 11);
-		else
-			param = 0;
+		if (!MLX5_GET(qpc, qpc, no_sq))
+			param = 1 << MLX5_GET(qpc, qpc, log_sq_size);
 		break;
 	case QP_LOG_PG_SZ:
-		param = (be32_to_cpu(ctx->log_pg_sz_remote_qpn) >> 24) & 0x1f;
-		param += 12;
+		param = MLX5_GET(qpc, qpc, log_page_size) + 12;
 		break;
 	case QP_RQPN:
-		param = be32_to_cpu(ctx->log_pg_sz_remote_qpn) & 0xffffff;
+		param = MLX5_GET(qpc, qpc, remote_qpn);
 		break;
 	}
 
-out:
-	kfree(out);
 	return param;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 66247fbb280c2a699a8621708c52dae6acd2e4bc Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Fri, 3 Apr 2020 11:28:28 +0300
Subject: net/mlx5: Remove Q counter low level helper APIs

mlx5 core users are encouraged to use low level API (mlx5_cmd_exec)
without the need of helper functions, do this for q counters, remove
helper functions and call mlx5_cmd_exec directly from users.

This will help reduce the total amount of code and reduction of the
mlx5_core symbol table.

Reviewed-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/hw/mlx5/main.c                  | 55 +++++++++++++---------
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  | 39 +++++++++------
 drivers/net/ethernet/mellanox/mlx5/core/en_stats.c | 35 +++++++++-----
 drivers/net/ethernet/mellanox/mlx5/core/qp.c       | 39 ---------------
 include/linux/mlx5/qp.h                            |  4 --
 5 files changed, 80 insertions(+), 92 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 6679756506e6..b02d027ebf3b 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -5439,15 +5439,21 @@ static bool is_mdev_switchdev_mode(const struct mlx5_core_dev *mdev)
 
 static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
 {
+	u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
 	int num_cnt_ports;
 	int i;
 
 	num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports;
 
+	MLX5_SET(dealloc_q_counter_in, in, opcode,
+		 MLX5_CMD_OP_DEALLOC_Q_COUNTER);
+
 	for (i = 0; i < num_cnt_ports; i++) {
-		if (dev->port[i].cnts.set_id_valid)
-			mlx5_core_dealloc_q_counter(dev->mdev,
-						    dev->port[i].cnts.set_id);
+		if (dev->port[i].cnts.set_id_valid) {
+			MLX5_SET(dealloc_q_counter_in, in, counter_set_id,
+				 dev->port[i].cnts.set_id);
+			mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
+		}
 		kfree(dev->port[i].cnts.names);
 		kfree(dev->port[i].cnts.offsets);
 	}
@@ -5638,27 +5644,23 @@ static int mlx5_ib_query_q_counters(struct mlx5_core_dev *mdev,
 				    struct rdma_hw_stats *stats,
 				    u16 set_id)
 {
-	int outlen = MLX5_ST_SZ_BYTES(query_q_counter_out);
-	void *out;
+	u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {};
 	__be32 val;
 	int ret, i;
 
-	out = kvzalloc(outlen, GFP_KERNEL);
-	if (!out)
-		return -ENOMEM;
-
-	ret = mlx5_core_query_q_counter(mdev, set_id, 0, out, outlen);
+	MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
+	MLX5_SET(query_q_counter_in, in, counter_set_id, set_id);
+	ret = mlx5_cmd_exec_inout(mdev, query_q_counter, in, out);
 	if (ret)
-		goto free;
+		return ret;
 
 	for (i = 0; i < cnts->num_q_counters; i++) {
-		val = *(__be32 *)(out + cnts->offsets[i]);
+		val = *(__be32 *)((void *)out + cnts->offsets[i]);
 		stats->value[i] = (u64)be32_to_cpu(val);
 	}
 
-free:
-	kvfree(out);
-	return ret;
+	return 0;
 }
 
 static int mlx5_ib_query_ext_ppcnt_counters(struct mlx5_ib_dev *dev,
@@ -5765,6 +5767,20 @@ static int mlx5_ib_counter_update_stats(struct rdma_counter *counter)
 					counter->stats, counter->id);
 }
 
+static int mlx5_ib_counter_dealloc(struct rdma_counter *counter)
+{
+	struct mlx5_ib_dev *dev = to_mdev(counter->device);
+	u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
+
+	if (!counter->id)
+		return 0;
+
+	MLX5_SET(dealloc_q_counter_in, in, opcode,
+		 MLX5_CMD_OP_DEALLOC_Q_COUNTER);
+	MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter->id);
+	return mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
+}
+
 static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
 				   struct ib_qp *qp)
 {
@@ -5788,7 +5804,7 @@ static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
 	return 0;
 
 fail_set_counter:
-	mlx5_core_dealloc_q_counter(dev->mdev, cnt_set_id);
+	mlx5_ib_counter_dealloc(counter);
 	counter->id = 0;
 
 	return err;
@@ -5799,13 +5815,6 @@ static int mlx5_ib_counter_unbind_qp(struct ib_qp *qp)
 	return mlx5_ib_qp_set_counter(qp, NULL);
 }
 
-static int mlx5_ib_counter_dealloc(struct rdma_counter *counter)
-{
-	struct mlx5_ib_dev *dev = to_mdev(counter->device);
-
-	return mlx5_core_dealloc_q_counter(dev->mdev, counter->id);
-}
-
 static int mlx5_ib_rn_get_params(struct ib_device *device, u8 port_num,
 				 enum rdma_netdev_t type,
 				 struct rdma_netdev_alloc_params *params)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index dd7f338425eb..30970b405040 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -4997,29 +4997,40 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
 
 void mlx5e_create_q_counters(struct mlx5e_priv *priv)
 {
+	u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
 	struct mlx5_core_dev *mdev = priv->mdev;
 	int err;
 
-	err = mlx5_core_alloc_q_counter(mdev, &priv->q_counter);
-	if (err) {
-		mlx5_core_warn(mdev, "alloc queue counter failed, %d\n", err);
-		priv->q_counter = 0;
-	}
+	MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
+	err = mlx5_cmd_exec_inout(mdev, alloc_q_counter, in, out);
+	if (!err)
+		priv->q_counter =
+			MLX5_GET(alloc_q_counter_out, out, counter_set_id);
 
-	err = mlx5_core_alloc_q_counter(mdev, &priv->drop_rq_q_counter);
-	if (err) {
-		mlx5_core_warn(mdev, "alloc drop RQ counter failed, %d\n", err);
-		priv->drop_rq_q_counter = 0;
-	}
+	err = mlx5_cmd_exec_inout(mdev, alloc_q_counter, in, out);
+	if (!err)
+		priv->drop_rq_q_counter =
+			MLX5_GET(alloc_q_counter_out, out, counter_set_id);
 }
 
 void mlx5e_destroy_q_counters(struct mlx5e_priv *priv)
 {
-	if (priv->q_counter)
-		mlx5_core_dealloc_q_counter(priv->mdev, priv->q_counter);
+	u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)] = {};
+
+	MLX5_SET(dealloc_q_counter_in, in, opcode,
+		 MLX5_CMD_OP_DEALLOC_Q_COUNTER);
+	if (priv->q_counter) {
+		MLX5_SET(dealloc_q_counter_in, in, counter_set_id,
+			 priv->q_counter);
+		mlx5_cmd_exec_in(priv->mdev, dealloc_q_counter, in);
+	}
 
-	if (priv->drop_rq_q_counter)
-		mlx5_core_dealloc_q_counter(priv->mdev, priv->drop_rq_q_counter);
+	if (priv->drop_rq_q_counter) {
+		MLX5_SET(dealloc_q_counter_in, in, counter_set_id,
+			 priv->drop_rq_q_counter);
+		mlx5_cmd_exec_in(priv->mdev, dealloc_q_counter, in);
+	}
 }
 
 static int mlx5e_nic_init(struct mlx5_core_dev *mdev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 30b216d9284c..ff4002ebad90 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -411,18 +411,29 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(qcnt)
 static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(qcnt)
 {
 	struct mlx5e_qcounter_stats *qcnt = &priv->stats.qcnt;
-	u32 out[MLX5_ST_SZ_DW(query_q_counter_out)];
-
-	if (priv->q_counter &&
-	    !mlx5_core_query_q_counter(priv->mdev, priv->q_counter, 0, out,
-				       sizeof(out)))
-		qcnt->rx_out_of_buffer = MLX5_GET(query_q_counter_out,
-						  out, out_of_buffer);
-	if (priv->drop_rq_q_counter &&
-	    !mlx5_core_query_q_counter(priv->mdev, priv->drop_rq_q_counter, 0,
-				       out, sizeof(out)))
-		qcnt->rx_if_down_packets = MLX5_GET(query_q_counter_out, out,
-						    out_of_buffer);
+	u32 out[MLX5_ST_SZ_DW(query_q_counter_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {};
+	int ret;
+
+	MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
+
+	if (priv->q_counter) {
+		MLX5_SET(query_q_counter_in, in, counter_set_id,
+			 priv->q_counter);
+		ret = mlx5_cmd_exec_inout(priv->mdev, query_q_counter, in, out);
+		if (!ret)
+			qcnt->rx_out_of_buffer = MLX5_GET(query_q_counter_out,
+							  out, out_of_buffer);
+	}
+
+	if (priv->drop_rq_q_counter) {
+		MLX5_SET(query_q_counter_in, in, counter_set_id,
+			 priv->drop_rq_q_counter);
+		ret = mlx5_cmd_exec_inout(priv->mdev, query_q_counter, in, out);
+		if (!ret)
+			qcnt->rx_if_down_packets = MLX5_GET(query_q_counter_out,
+							    out, out_of_buffer);
+	}
 }
 
 #define VNIC_ENV_OFF(c) MLX5_BYTE_OFF(query_vnic_env_out, c)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
index c3aea4cc2fff..e36790ad5256 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -680,45 +680,6 @@ void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev,
 }
 EXPORT_SYMBOL(mlx5_core_destroy_sq_tracked);
 
-int mlx5_core_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id)
-{
-	u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)]   = {0};
-	u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {0};
-	int err;
-
-	MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
-	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-	if (!err)
-		*counter_id = MLX5_GET(alloc_q_counter_out, out,
-				       counter_set_id);
-	return err;
-}
-EXPORT_SYMBOL_GPL(mlx5_core_alloc_q_counter);
-
-int mlx5_core_dealloc_q_counter(struct mlx5_core_dev *dev, u16 counter_id)
-{
-	u32 in[MLX5_ST_SZ_DW(dealloc_q_counter_in)]   = {0};
-	u32 out[MLX5_ST_SZ_DW(dealloc_q_counter_out)] = {0};
-
-	MLX5_SET(dealloc_q_counter_in, in, opcode,
-		 MLX5_CMD_OP_DEALLOC_Q_COUNTER);
-	MLX5_SET(dealloc_q_counter_in, in, counter_set_id, counter_id);
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-EXPORT_SYMBOL_GPL(mlx5_core_dealloc_q_counter);
-
-int mlx5_core_query_q_counter(struct mlx5_core_dev *dev, u16 counter_id,
-			      int reset, void *out, int out_size)
-{
-	u32 in[MLX5_ST_SZ_DW(query_q_counter_in)] = {0};
-
-	MLX5_SET(query_q_counter_in, in, opcode, MLX5_CMD_OP_QUERY_Q_COUNTER);
-	MLX5_SET(query_q_counter_in, in, clear, reset);
-	MLX5_SET(query_q_counter_in, in, counter_set_id, counter_id);
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
-}
-EXPORT_SYMBOL_GPL(mlx5_core_query_q_counter);
-
 struct mlx5_core_rsc_common *mlx5_core_res_hold(struct mlx5_core_dev *dev,
 						int res_num,
 						enum mlx5_res_type res_type)
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index ae63b1ae9004..4d25a3d24182 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -595,10 +595,6 @@ int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
 				struct mlx5_core_qp *sq);
 void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev,
 				  struct mlx5_core_qp *sq);
-int mlx5_core_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id);
-int mlx5_core_dealloc_q_counter(struct mlx5_core_dev *dev, u16 counter_id);
-int mlx5_core_query_q_counter(struct mlx5_core_dev *dev, u16 counter_id,
-			      int reset, void *out, int out_size);
 
 struct mlx5_core_rsc_common *mlx5_core_res_hold(struct mlx5_core_dev *dev,
 						int res_num,
-- 
cgit v1.2.3-59-g8ed1b


From bfd745f8f327c40d74c8207ca62db05a264b5b7c Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Fri, 3 Apr 2020 13:31:18 +0300
Subject: RDMA/mlx5: Delete Q counter allocations command

Remove mlx5_ib implementation of Q counter allocation logic
together with cleaning boolean which controlled validity of the
counter. It is not needed, because counter_id == 0 means that
counter is not valid.

Reviewed-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/hw/mlx5/cmd.c     | 17 -----------------
 drivers/infiniband/hw/mlx5/cmd.h     |  2 --
 drivers/infiniband/hw/mlx5/main.c    | 31 ++++++++++++++++++++-----------
 drivers/infiniband/hw/mlx5/mlx5_ib.h |  1 -
 4 files changed, 20 insertions(+), 31 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/cmd.c b/drivers/infiniband/hw/mlx5/cmd.c
index 4c26492ab8a3..a2fcbc49131e 100644
--- a/drivers/infiniband/hw/mlx5/cmd.c
+++ b/drivers/infiniband/hw/mlx5/cmd.c
@@ -327,23 +327,6 @@ int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid)
 	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
 }
 
-int mlx5_cmd_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id,
-			     u16 uid)
-{
-	u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)]   = {0};
-	u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {0};
-	int err;
-
-	MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
-	MLX5_SET(alloc_q_counter_in, in, uid, uid);
-
-	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-	if (!err)
-		*counter_id = MLX5_GET(alloc_q_counter_out, out,
-				       counter_set_id);
-	return err;
-}
-
 int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
 		     u16 opmod, u8 port)
 {
diff --git a/drivers/infiniband/hw/mlx5/cmd.h b/drivers/infiniband/hw/mlx5/cmd.h
index 945ebce73613..43079b18d9b4 100644
--- a/drivers/infiniband/hw/mlx5/cmd.h
+++ b/drivers/infiniband/hw/mlx5/cmd.h
@@ -61,8 +61,6 @@ int mlx5_cmd_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid,
 			u32 qpn, u16 uid);
 int mlx5_cmd_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn, u16 uid);
 int mlx5_cmd_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn, u16 uid);
-int mlx5_cmd_alloc_q_counter(struct mlx5_core_dev *dev, u16 *counter_id,
-			     u16 uid);
 int mlx5_cmd_mad_ifc(struct mlx5_core_dev *dev, const void *inb, void *outb,
 		     u16 opmod, u8 port);
 #endif /* MLX5_IB_CMD_H */
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index b02d027ebf3b..76ea756d846b 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -5449,7 +5449,7 @@ static void mlx5_ib_dealloc_counters(struct mlx5_ib_dev *dev)
 		 MLX5_CMD_OP_DEALLOC_Q_COUNTER);
 
 	for (i = 0; i < num_cnt_ports; i++) {
-		if (dev->port[i].cnts.set_id_valid) {
+		if (dev->port[i].cnts.set_id) {
 			MLX5_SET(dealloc_q_counter_in, in, counter_set_id,
 				 dev->port[i].cnts.set_id);
 			mlx5_cmd_exec_in(dev->mdev, dealloc_q_counter, in);
@@ -5562,11 +5562,14 @@ static void mlx5_ib_fill_counters(struct mlx5_ib_dev *dev,
 
 static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
 {
+	u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
 	int num_cnt_ports;
 	int err = 0;
 	int i;
 	bool is_shared;
 
+	MLX5_SET(alloc_q_counter_in, in, opcode, MLX5_CMD_OP_ALLOC_Q_COUNTER);
 	is_shared = MLX5_CAP_GEN(dev->mdev, log_max_uctx) != 0;
 	num_cnt_ports = is_mdev_switchdev_mode(dev->mdev) ? 1 : dev->num_ports;
 
@@ -5578,17 +5581,19 @@ static int mlx5_ib_alloc_counters(struct mlx5_ib_dev *dev)
 		mlx5_ib_fill_counters(dev, dev->port[i].cnts.names,
 				      dev->port[i].cnts.offsets);
 
-		err = mlx5_cmd_alloc_q_counter(dev->mdev,
-					       &dev->port[i].cnts.set_id,
-					       is_shared ?
-					       MLX5_SHARED_RESOURCE_UID : 0);
+		MLX5_SET(alloc_q_counter_in, in, uid,
+			 is_shared ? MLX5_SHARED_RESOURCE_UID : 0);
+
+		err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
 		if (err) {
 			mlx5_ib_warn(dev,
 				     "couldn't allocate queue counter for port %d, err %d\n",
 				     i + 1, err);
 			goto err_alloc;
 		}
-		dev->port[i].cnts.set_id_valid = true;
+
+		dev->port[i].cnts.set_id =
+			MLX5_GET(alloc_q_counter_out, out, counter_set_id);
 	}
 	return 0;
 
@@ -5785,16 +5790,20 @@ static int mlx5_ib_counter_bind_qp(struct rdma_counter *counter,
 				   struct ib_qp *qp)
 {
 	struct mlx5_ib_dev *dev = to_mdev(qp->device);
-	u16 cnt_set_id = 0;
 	int err;
 
 	if (!counter->id) {
-		err = mlx5_cmd_alloc_q_counter(dev->mdev,
-					       &cnt_set_id,
-					       MLX5_SHARED_RESOURCE_UID);
+		u32 out[MLX5_ST_SZ_DW(alloc_q_counter_out)] = {};
+		u32 in[MLX5_ST_SZ_DW(alloc_q_counter_in)] = {};
+
+		MLX5_SET(alloc_q_counter_in, in, opcode,
+			 MLX5_CMD_OP_ALLOC_Q_COUNTER);
+		MLX5_SET(alloc_q_counter_in, in, uid, MLX5_SHARED_RESOURCE_UID);
+		err = mlx5_cmd_exec_inout(dev->mdev, alloc_q_counter, in, out);
 		if (err)
 			return err;
-		counter->id = cnt_set_id;
+		counter->id =
+			MLX5_GET(alloc_q_counter_out, out, counter_set_id);
 	}
 
 	err = mlx5_ib_qp_set_counter(qp, counter);
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index a4e522385de0..cb2a021aa93c 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -780,7 +780,6 @@ struct mlx5_ib_counters {
 	u32 num_cong_counters;
 	u32 num_ext_ppcnt_counters;
 	u16 set_id;
-	bool set_id_valid;
 };
 
 struct mlx5_ib_multiport_info;
-- 
cgit v1.2.3-59-g8ed1b


From 9c275ee4ad82aeb1f51981fdc9ee16b74d4b101a Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Tue, 7 Apr 2020 16:09:15 +0300
Subject: net/mlx5: Delete not-used cmd header

The structures defined in the cmd header are not used and can be safely
removed from the driver. This patch removes that file and deletes all
relevant includes.

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/hw/mlx5/mad.c                       | 1 -
 drivers/infiniband/hw/mlx5/srq_cmd.c                   | 1 -
 drivers/net/ethernet/mellanox/mlx5/core/cq.c           | 1 -
 drivers/net/ethernet/mellanox/mlx5/core/eq.c           | 1 -
 drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c     | 1 -
 drivers/net/ethernet/mellanox/mlx5/core/fw.c           | 1 -
 drivers/net/ethernet/mellanox/mlx5/core/health.c       | 1 -
 drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c | 1 -
 drivers/net/ethernet/mellanox/mlx5/core/mcg.c          | 1 -
 drivers/net/ethernet/mellanox/mlx5/core/mr.c           | 1 -
 drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c    | 1 -
 drivers/net/ethernet/mellanox/mlx5/core/pd.c           | 1 -
 drivers/net/ethernet/mellanox/mlx5/core/qp.c           | 1 -
 drivers/net/ethernet/mellanox/mlx5/core/rl.c           | 1 -
 drivers/net/ethernet/mellanox/mlx5/core/uar.c          | 1 -
 15 files changed, 15 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index 14e0c17de6a9..f0ab6d7d8497 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -30,7 +30,6 @@
  * SOFTWARE.
  */
 
-#include <linux/mlx5/cmd.h>
 #include <linux/mlx5/vport.h>
 #include <rdma/ib_mad.h>
 #include <rdma/ib_smi.h>
diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c
index 8fc3630a9d4c..88c0388f9fc6 100644
--- a/drivers/infiniband/hw/mlx5/srq_cmd.c
+++ b/drivers/infiniband/hw/mlx5/srq_cmd.c
@@ -5,7 +5,6 @@
 
 #include <linux/kernel.h>
 #include <linux/mlx5/driver.h>
-#include <linux/mlx5/cmd.h>
 #include "mlx5_ib.h"
 #include "srq.h"
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
index 818edc63e428..4477a590b308 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
@@ -34,7 +34,6 @@
 #include <linux/module.h>
 #include <linux/hardirq.h>
 #include <linux/mlx5/driver.h>
-#include <linux/mlx5/cmd.h>
 #include <rdma/ib_verbs.h>
 #include <linux/mlx5/cq.h>
 #include "mlx5_core.h"
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index cccea3a8eddd..bee419d01af2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -36,7 +36,6 @@
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/vport.h>
 #include <linux/mlx5/eq.h>
-#include <linux/mlx5/cmd.h>
 #ifdef CONFIG_RFS_ACCEL
 #include <linux/cpu_rmap.h>
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c
index c0fd2212e890..09769401c313 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c
@@ -31,7 +31,6 @@
  */
 
 #include <linux/etherdevice.h>
-#include <linux/mlx5/cmd.h>
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/device.h>
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index 90e3d0233101..3040e0466681 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -31,7 +31,6 @@
  */
 
 #include <linux/mlx5/driver.h>
-#include <linux/mlx5/cmd.h>
 #include <linux/mlx5/eswitch.h>
 #include <linux/module.h>
 #include "mlx5_core.h"
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/health.c b/drivers/net/ethernet/mellanox/mlx5/core/health.c
index fa1665caac46..3ae355453464 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/health.c
@@ -36,7 +36,6 @@
 #include <linux/vmalloc.h>
 #include <linux/hardirq.h>
 #include <linux/mlx5/driver.h>
-#include <linux/mlx5/cmd.h>
 #include "mlx5_core.h"
 #include "lib/eq.h"
 #include "lib/mlx5.h"
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c
index 48b5c847b642..8809a65ecefb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c
@@ -4,7 +4,6 @@
 #include <linux/module.h>
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/port.h>
-#include <linux/mlx5/cmd.h>
 #include "mlx5_core.h"
 #include "lib/port_tun.h"
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mcg.c b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c
index ba2b09cc192f..6789fe658037 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mcg.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c
@@ -33,7 +33,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/mlx5/driver.h>
-#include <linux/mlx5/cmd.h>
 #include <rdma/ib_verbs.h>
 #include "mlx5_core.h"
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
index 366f2cbfc6db..1feedf335dea 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
@@ -33,7 +33,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/mlx5/driver.h>
-#include <linux/mlx5/cmd.h>
 #include "mlx5_core.h"
 
 int mlx5_core_create_mkey(struct mlx5_core_dev *dev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index 91bd258ecf1b..a3959754b927 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -35,7 +35,6 @@
 #include <linux/module.h>
 #include <linux/delay.h>
 #include <linux/mlx5/driver.h>
-#include <linux/mlx5/cmd.h>
 #include "mlx5_core.h"
 #include "lib/eq.h"
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pd.c b/drivers/net/ethernet/mellanox/mlx5/core/pd.c
index bd830d8d6c5f..b92d6f621c83 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pd.c
@@ -33,7 +33,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/mlx5/driver.h>
-#include <linux/mlx5/cmd.h>
 #include "mlx5_core.h"
 
 int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
index e36790ad5256..d9df3a5dd532 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
@@ -32,7 +32,6 @@
 
 #include <linux/gfp.h>
 #include <linux/export.h>
-#include <linux/mlx5/cmd.h>
 #include <linux/mlx5/qp.h>
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/transobj.h>
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
index f3b29d9ade1f..c9599f7c5696 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/rl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
@@ -33,7 +33,6 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/mlx5/driver.h>
-#include <linux/mlx5/cmd.h>
 #include "mlx5_core.h"
 
 /* Scheduling element fw management */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
index 0d006224d7b0..816f9c434359 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/uar.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
@@ -34,7 +34,6 @@
 #include <linux/module.h>
 #include <linux/io-mapping.h>
 #include <linux/mlx5/driver.h>
-#include <linux/mlx5/cmd.h>
 #include "mlx5_core.h"
 
 int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn)
-- 
cgit v1.2.3-59-g8ed1b


From 42f9bbd11278d0a270a75998f5c0d21e7b37c521 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Wed, 1 Apr 2020 11:48:47 +0300
Subject: RDMA/mlx5: Alphabetically sort build artifacts

Sort .o objects in makefile to make addition of new object
less cumbersome.

Reviewed-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/hw/mlx5/Makefile | 27 ++++++++++++++++++++-------
 1 file changed, 20 insertions(+), 7 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
index 2a334800f109..375b341be8a1 100644
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -1,11 +1,24 @@
 # SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_MLX5_INFINIBAND)	+= mlx5_ib.o
+obj-$(CONFIG_MLX5_INFINIBAND) += mlx5_ib.o
+
+mlx5_ib-y := ah.o \
+	     cmd.o \
+	     cong.o \
+	     cq.o \
+	     doorbell.o \
+	     gsi.o \
+	     ib_virt.o \
+	     mad.o \
+	     main.o \
+	     mem.o \
+	     mr.o \
+	     qp.o \
+	     restrack.o \
+	     srq.o \
+	     srq_cmd.o
 
-mlx5_ib-y :=	main.o cq.o doorbell.o qp.o mem.o srq_cmd.o \
-		srq.o mr.o ah.o mad.o gsi.o ib_virt.o cmd.o \
-		cong.o restrack.o
 mlx5_ib-$(CONFIG_INFINIBAND_ON_DEMAND_PAGING) += odp.o
 mlx5_ib-$(CONFIG_MLX5_ESWITCH) += ib_rep.o
-mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o
-mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += flow.o
-mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += qos.o
+mlx5_ib-$(CONFIG_INFINIBAND_USER_ACCESS) += devx.o \
+					    flow.o \
+					    qos.o
-- 
cgit v1.2.3-59-g8ed1b


From 333fbaa0255b8d471fc7ae767ef3a1766c732d6d Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Sat, 4 Apr 2020 10:40:24 +0300
Subject: net/mlx5: Move QP logic to mlx5_ib

The mlx5_core doesn't need any functionality coded in qp.c, so move
that file to drivers/infiniband/ be under mlx5_ib responsibility.

Reviewed-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/hw/mlx5/Makefile               |   1 +
 drivers/infiniband/hw/mlx5/cq.c                   |   3 +-
 drivers/infiniband/hw/mlx5/devx.c                 |  10 +-
 drivers/infiniband/hw/mlx5/main.c                 |  10 +-
 drivers/infiniband/hw/mlx5/mlx5_ib.h              |   2 +
 drivers/infiniband/hw/mlx5/odp.c                  |   3 +-
 drivers/infiniband/hw/mlx5/qp.c                   |  47 +-
 drivers/infiniband/hw/mlx5/qp.h                   |  46 ++
 drivers/infiniband/hw/mlx5/qpc.c                  | 605 +++++++++++++++++++
 drivers/infiniband/hw/mlx5/srq_cmd.c              |   1 +
 drivers/net/ethernet/mellanox/mlx5/core/Makefile  |   2 +-
 drivers/net/ethernet/mellanox/mlx5/core/debugfs.c |   6 +-
 drivers/net/ethernet/mellanox/mlx5/core/main.c    |   4 -
 drivers/net/ethernet/mellanox/mlx5/core/qp.c      | 697 ----------------------
 include/linux/mlx5/cmd.h                          |  51 --
 include/linux/mlx5/driver.h                       |   2 -
 include/linux/mlx5/qp.h                           |  45 --
 17 files changed, 699 insertions(+), 836 deletions(-)
 create mode 100644 drivers/infiniband/hw/mlx5/qp.h
 create mode 100644 drivers/infiniband/hw/mlx5/qpc.c
 delete mode 100644 drivers/net/ethernet/mellanox/mlx5/core/qp.c
 delete mode 100644 include/linux/mlx5/cmd.h

diff --git a/drivers/infiniband/hw/mlx5/Makefile b/drivers/infiniband/hw/mlx5/Makefile
index 375b341be8a1..228be05fbaf8 100644
--- a/drivers/infiniband/hw/mlx5/Makefile
+++ b/drivers/infiniband/hw/mlx5/Makefile
@@ -13,6 +13,7 @@ mlx5_ib-y := ah.o \
 	     mem.o \
 	     mr.o \
 	     qp.o \
+	     qpc.o \
 	     restrack.o \
 	     srq.o \
 	     srq_cmd.o
diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 146ba2966744..32c05730dfe9 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -36,6 +36,7 @@
 #include <rdma/ib_cache.h>
 #include "mlx5_ib.h"
 #include "srq.h"
+#include "qp.h"
 
 static void mlx5_ib_cq_comp(struct mlx5_core_cq *cq, struct mlx5_eqe *eqe)
 {
@@ -484,7 +485,7 @@ repoll:
 		 * because CQs will be locked while QPs are removed
 		 * from the table.
 		 */
-		mqp = __mlx5_qp_lookup(dev->mdev, qpn);
+		mqp = radix_tree_lookup(&dev->qp_table.tree, qpn);
 		*cur_qp = to_mibqp(mqp);
 	}
 
diff --git a/drivers/infiniband/hw/mlx5/devx.c b/drivers/infiniband/hw/mlx5/devx.c
index 46e1ab771f10..35b98c2d64d5 100644
--- a/drivers/infiniband/hw/mlx5/devx.c
+++ b/drivers/infiniband/hw/mlx5/devx.c
@@ -14,6 +14,7 @@
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/fs.h>
 #include "mlx5_ib.h"
+#include "qp.h"
 #include <linux/xarray.h>
 
 #define UVERBS_MODULE_NAME mlx5_ib
@@ -1356,7 +1357,7 @@ static int devx_obj_cleanup(struct ib_uobject *uobject,
 	}
 
 	if (obj->flags & DEVX_OBJ_FLAGS_DCT)
-		ret = mlx5_core_destroy_dct(obj->ib_dev->mdev, &obj->core_dct);
+		ret = mlx5_core_destroy_dct(obj->ib_dev, &obj->core_dct);
 	else if (obj->flags & DEVX_OBJ_FLAGS_CQ)
 		ret = mlx5_core_destroy_cq(obj->ib_dev->mdev, &obj->core_cq);
 	else
@@ -1450,9 +1451,8 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
 
 	if (opcode == MLX5_CMD_OP_CREATE_DCT) {
 		obj->flags |= DEVX_OBJ_FLAGS_DCT;
-		err = mlx5_core_create_dct(dev->mdev, &obj->core_dct,
-					   cmd_in, cmd_in_len,
-					   cmd_out, cmd_out_len);
+		err = mlx5_core_create_dct(dev, &obj->core_dct, cmd_in,
+					   cmd_in_len, cmd_out, cmd_out_len);
 	} else if (opcode == MLX5_CMD_OP_CREATE_CQ) {
 		obj->flags |= DEVX_OBJ_FLAGS_CQ;
 		obj->core_cq.comp = devx_cq_comp;
@@ -1499,7 +1499,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_DEVX_OBJ_CREATE)(
 
 obj_destroy:
 	if (obj->flags & DEVX_OBJ_FLAGS_DCT)
-		mlx5_core_destroy_dct(obj->ib_dev->mdev, &obj->core_dct);
+		mlx5_core_destroy_dct(obj->ib_dev, &obj->core_dct);
 	else if (obj->flags & DEVX_OBJ_FLAGS_CQ)
 		mlx5_core_destroy_cq(obj->ib_dev->mdev, &obj->core_cq);
 	else
diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 76ea756d846b..f10675213115 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -59,6 +59,7 @@
 #include "ib_rep.h"
 #include "cmd.h"
 #include "srq.h"
+#include "qp.h"
 #include <linux/mlx5/fs_helpers.h>
 #include <linux/mlx5/accel.h>
 #include <rdma/uverbs_std_types.h>
@@ -4632,8 +4633,7 @@ static void delay_drop_handler(struct work_struct *work)
 	atomic_inc(&delay_drop->events_cnt);
 
 	mutex_lock(&delay_drop->lock);
-	err = mlx5_core_set_delay_drop(delay_drop->dev->mdev,
-				       delay_drop->timeout);
+	err = mlx5_core_set_delay_drop(delay_drop->dev, delay_drop->timeout);
 	if (err) {
 		mlx5_ib_warn(delay_drop->dev, "Failed to set delay drop, timeout=%u\n",
 			     delay_drop->timeout);
@@ -7193,6 +7193,9 @@ static const struct mlx5_ib_profile pf_profile = {
 	STAGE_CREATE(MLX5_IB_STAGE_ROCE,
 		     mlx5_ib_stage_roce_init,
 		     mlx5_ib_stage_roce_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_QP,
+		     mlx5_init_qp_table,
+		     mlx5_cleanup_qp_table),
 	STAGE_CREATE(MLX5_IB_STAGE_SRQ,
 		     mlx5_init_srq_table,
 		     mlx5_cleanup_srq_table),
@@ -7250,6 +7253,9 @@ const struct mlx5_ib_profile raw_eth_profile = {
 	STAGE_CREATE(MLX5_IB_STAGE_ROCE,
 		     mlx5_ib_stage_raw_eth_roce_init,
 		     mlx5_ib_stage_raw_eth_roce_cleanup),
+	STAGE_CREATE(MLX5_IB_STAGE_QP,
+		     mlx5_init_qp_table,
+		     mlx5_cleanup_qp_table),
 	STAGE_CREATE(MLX5_IB_STAGE_SRQ,
 		     mlx5_init_srq_table,
 		     mlx5_cleanup_srq_table),
diff --git a/drivers/infiniband/hw/mlx5/mlx5_ib.h b/drivers/infiniband/hw/mlx5/mlx5_ib.h
index cb2a021aa93c..aaabb8a98eed 100644
--- a/drivers/infiniband/hw/mlx5/mlx5_ib.h
+++ b/drivers/infiniband/hw/mlx5/mlx5_ib.h
@@ -869,6 +869,7 @@ enum mlx5_ib_stages {
 	MLX5_IB_STAGE_CAPS,
 	MLX5_IB_STAGE_NON_DEFAULT_CB,
 	MLX5_IB_STAGE_ROCE,
+	MLX5_IB_STAGE_QP,
 	MLX5_IB_STAGE_SRQ,
 	MLX5_IB_STAGE_DEVICE_RESOURCES,
 	MLX5_IB_STAGE_DEVICE_NOTIFIER,
@@ -1064,6 +1065,7 @@ struct mlx5_ib_dev {
 	struct mlx5_dm		dm;
 	u16			devx_whitelist_uid;
 	struct mlx5_srq_table   srq_table;
+	struct mlx5_qp_table    qp_table;
 	struct mlx5_async_ctx   async_ctx;
 	struct mlx5_devx_event_table devx_event_table;
 	struct mlx5_var_table var_table;
diff --git a/drivers/infiniband/hw/mlx5/odp.c b/drivers/infiniband/hw/mlx5/odp.c
index 3de7606d4a1a..16af1105cfcf 100644
--- a/drivers/infiniband/hw/mlx5/odp.c
+++ b/drivers/infiniband/hw/mlx5/odp.c
@@ -36,6 +36,7 @@
 
 #include "mlx5_ib.h"
 #include "cmd.h"
+#include "qp.h"
 
 #include <linux/mlx5/eq.h>
 
@@ -1219,7 +1220,7 @@ static inline struct mlx5_core_rsc_common *odp_get_rsc(struct mlx5_ib_dev *dev,
 	case MLX5_WQE_PF_TYPE_REQ_SEND_OR_WRITE:
 	case MLX5_WQE_PF_TYPE_RESP:
 	case MLX5_WQE_PF_TYPE_REQ_READ_OR_ATOMIC:
-		common = mlx5_core_res_hold(dev->mdev, wq_num, MLX5_RES_QP);
+		common = mlx5_core_res_hold(dev, wq_num, MLX5_RES_QP);
 		break;
 	default:
 		break;
diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 1456db4b6295..3ecd1864b3c8 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -39,6 +39,7 @@
 #include "mlx5_ib.h"
 #include "ib_rep.h"
 #include "cmd.h"
+#include "qp.h"
 
 /* not supported currently */
 static int wq_signature;
@@ -1336,7 +1337,7 @@ static int create_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
 	pas = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
 	mlx5_ib_populate_pas(dev, sq->ubuffer.umem, page_shift, pas, 0);
 
-	err = mlx5_core_create_sq_tracked(dev->mdev, in, inlen, &sq->base.mqp);
+	err = mlx5_core_create_sq_tracked(dev, in, inlen, &sq->base.mqp);
 
 	kvfree(in);
 
@@ -1356,7 +1357,7 @@ static void destroy_raw_packet_qp_sq(struct mlx5_ib_dev *dev,
 				     struct mlx5_ib_sq *sq)
 {
 	destroy_flow_rule_vport_sq(sq);
-	mlx5_core_destroy_sq_tracked(dev->mdev, &sq->base.mqp);
+	mlx5_core_destroy_sq_tracked(dev, &sq->base.mqp);
 	ib_umem_release(sq->ubuffer.umem);
 }
 
@@ -1426,7 +1427,7 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
 	qp_pas = (__be64 *)MLX5_ADDR_OF(create_qp_in, qpin, pas);
 	memcpy(pas, qp_pas, rq_pas_size);
 
-	err = mlx5_core_create_rq_tracked(dev->mdev, in, inlen, &rq->base.mqp);
+	err = mlx5_core_create_rq_tracked(dev, in, inlen, &rq->base.mqp);
 
 	kvfree(in);
 
@@ -1436,7 +1437,7 @@ static int create_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
 static void destroy_raw_packet_qp_rq(struct mlx5_ib_dev *dev,
 				     struct mlx5_ib_rq *rq)
 {
-	mlx5_core_destroy_rq_tracked(dev->mdev, &rq->base.mqp);
+	mlx5_core_destroy_rq_tracked(dev, &rq->base.mqp);
 }
 
 static bool tunnel_offload_supported(struct mlx5_core_dev *dev)
@@ -2347,7 +2348,7 @@ static int create_qp_common(struct mlx5_ib_dev *dev, struct ib_pd *pd,
 		err = create_raw_packet_qp(dev, qp, in, inlen, pd, udata,
 					   &resp);
 	} else {
-		err = mlx5_core_create_qp(dev->mdev, &base->mqp, in, inlen);
+		err = mlx5_core_create_qp(dev, &base->mqp, in, inlen);
 	}
 
 	if (err) {
@@ -2513,8 +2514,7 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 	if (qp->state != IB_QPS_RESET) {
 		if (qp->ibqp.qp_type != IB_QPT_RAW_PACKET &&
 		    !(qp->flags & MLX5_IB_QP_UNDERLAY)) {
-			err = mlx5_core_qp_modify(dev->mdev,
-						  MLX5_CMD_OP_2RST_QP, 0,
+			err = mlx5_core_qp_modify(dev, MLX5_CMD_OP_2RST_QP, 0,
 						  NULL, &base->mqp);
 		} else {
 			struct mlx5_modify_raw_qp_param raw_qp_param = {
@@ -2555,7 +2555,7 @@ static void destroy_qp_common(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 	    qp->flags & MLX5_IB_QP_UNDERLAY) {
 		destroy_raw_packet_qp(dev, qp);
 	} else {
-		err = mlx5_core_destroy_qp(dev->mdev, &base->mqp);
+		err = mlx5_core_destroy_qp(dev, &base->mqp);
 		if (err)
 			mlx5_ib_warn(dev, "failed to destroy QP 0x%x\n",
 				     base->mqp.qpn);
@@ -2818,7 +2818,7 @@ static int mlx5_ib_destroy_dct(struct mlx5_ib_qp *mqp)
 	if (mqp->state == IB_QPS_RTR) {
 		int err;
 
-		err = mlx5_core_destroy_dct(dev->mdev, &mqp->dct.mdct);
+		err = mlx5_core_destroy_dct(dev, &mqp->dct.mdct);
 		if (err) {
 			mlx5_ib_warn(dev, "failed to destroy DCT %d\n", err);
 			return err;
@@ -3462,10 +3462,9 @@ static int __mlx5_ib_qp_set_counter(struct ib_qp *qp,
 	base = &mqp->trans_qp.base;
 	context.qp_counter_set_usr_page &= cpu_to_be32(0xffffff);
 	context.qp_counter_set_usr_page |= cpu_to_be32(set_id << 24);
-	return mlx5_core_qp_modify(dev->mdev,
-				   MLX5_CMD_OP_RTS2RTS_QP,
-				   MLX5_QP_OPTPAR_COUNTER_SET_ID,
-				   &context, &base->mqp);
+	return mlx5_core_qp_modify(dev, MLX5_CMD_OP_RTS2RTS_QP,
+				   MLX5_QP_OPTPAR_COUNTER_SET_ID, &context,
+				   &base->mqp);
 }
 
 static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
@@ -3752,8 +3751,7 @@ static int __mlx5_ib_modify_qp(struct ib_qp *ibqp,
 
 		err = modify_raw_packet_qp(dev, qp, &raw_qp_param, tx_affinity);
 	} else {
-		err = mlx5_core_qp_modify(dev->mdev, op, optpar, context,
-					  &base->mqp);
+		err = mlx5_core_qp_modify(dev, op, optpar, context, &base->mqp);
 	}
 
 	if (err)
@@ -3927,7 +3925,7 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 		MLX5_SET(dctc, dctc, my_addr_index, attr->ah_attr.grh.sgid_index);
 		MLX5_SET(dctc, dctc, hop_limit, attr->ah_attr.grh.hop_limit);
 
-		err = mlx5_core_create_dct(dev->mdev, &qp->dct.mdct, qp->dct.in,
+		err = mlx5_core_create_dct(dev, &qp->dct.mdct, qp->dct.in,
 					   MLX5_ST_SZ_BYTES(create_dct_in), out,
 					   sizeof(out));
 		if (err)
@@ -3935,7 +3933,7 @@ static int mlx5_ib_modify_dct(struct ib_qp *ibqp, struct ib_qp_attr *attr,
 		resp.dctn = qp->dct.mdct.mqp.qpn;
 		err = ib_copy_to_udata(udata, &resp, resp.response_length);
 		if (err) {
-			mlx5_core_destroy_dct(dev->mdev, &qp->dct.mdct);
+			mlx5_core_destroy_dct(dev, &qp->dct.mdct);
 			return err;
 		}
 	} else {
@@ -5697,8 +5695,7 @@ static int query_qp_attr(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 	if (!outb)
 		return -ENOMEM;
 
-	err = mlx5_core_qp_query(dev->mdev, &qp->trans_qp.base.mqp, outb,
-				 outlen);
+	err = mlx5_core_qp_query(dev, &qp->trans_qp.base.mqp, outb, outlen);
 	if (err)
 		goto out;
 
@@ -5776,7 +5773,7 @@ static int mlx5_ib_dct_query_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *mqp,
 	if (!out)
 		return -ENOMEM;
 
-	err = mlx5_core_dct_query(dev->mdev, dct, out, outlen);
+	err = mlx5_core_dct_query(dev, dct, out, outlen);
 	if (err)
 		goto out;
 
@@ -5962,7 +5959,7 @@ static int set_delay_drop(struct mlx5_ib_dev *dev)
 	if (dev->delay_drop.activate)
 		goto out;
 
-	err = mlx5_core_set_delay_drop(dev->mdev, dev->delay_drop.timeout);
+	err = mlx5_core_set_delay_drop(dev, dev->delay_drop.timeout);
 	if (err)
 		goto out;
 
@@ -6068,13 +6065,13 @@ static int  create_rq(struct mlx5_ib_rwq *rwq, struct ib_pd *pd,
 	}
 	rq_pas0 = (__be64 *)MLX5_ADDR_OF(wq, wq, pas);
 	mlx5_ib_populate_pas(dev, rwq->umem, rwq->page_shift, rq_pas0, 0);
-	err = mlx5_core_create_rq_tracked(dev->mdev, in, inlen, &rwq->core_qp);
+	err = mlx5_core_create_rq_tracked(dev, in, inlen, &rwq->core_qp);
 	if (!err && init_attr->create_flags & IB_WQ_FLAGS_DELAY_DROP) {
 		err = set_delay_drop(dev);
 		if (err) {
 			mlx5_ib_warn(dev, "Failed to enable delay drop err=%d\n",
 				     err);
-			mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp);
+			mlx5_core_destroy_rq_tracked(dev, &rwq->core_qp);
 		} else {
 			rwq->create_flags |= MLX5_IB_WQ_FLAGS_DELAY_DROP;
 		}
@@ -6256,7 +6253,7 @@ struct ib_wq *mlx5_ib_create_wq(struct ib_pd *pd,
 	return &rwq->ibwq;
 
 err_copy:
-	mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp);
+	mlx5_core_destroy_rq_tracked(dev, &rwq->core_qp);
 err_user_rq:
 	destroy_user_rq(dev, pd, rwq, udata);
 err:
@@ -6269,7 +6266,7 @@ void mlx5_ib_destroy_wq(struct ib_wq *wq, struct ib_udata *udata)
 	struct mlx5_ib_dev *dev = to_mdev(wq->device);
 	struct mlx5_ib_rwq *rwq = to_mrwq(wq);
 
-	mlx5_core_destroy_rq_tracked(dev->mdev, &rwq->core_qp);
+	mlx5_core_destroy_rq_tracked(dev, &rwq->core_qp);
 	destroy_user_rq(dev, wq->pd, rwq, udata);
 	kfree(rwq);
 }
diff --git a/drivers/infiniband/hw/mlx5/qp.h b/drivers/infiniband/hw/mlx5/qp.h
new file mode 100644
index 000000000000..ad9d76e3e18a
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/qp.h
@@ -0,0 +1,46 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/*
+ * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#ifndef _MLX5_IB_QP_H
+#define _MLX5_IB_QP_H
+
+#include "mlx5_ib.h"
+
+int mlx5_init_qp_table(struct mlx5_ib_dev *dev);
+void mlx5_cleanup_qp_table(struct mlx5_ib_dev *dev);
+
+int mlx5_core_create_dct(struct mlx5_ib_dev *dev, struct mlx5_core_dct *qp,
+			 u32 *in, int inlen, u32 *out, int outlen);
+int mlx5_core_create_qp(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp,
+			u32 *in, int inlen);
+int mlx5_core_qp_modify(struct mlx5_ib_dev *dev, u16 opcode, u32 opt_param_mask,
+			void *qpc, struct mlx5_core_qp *qp);
+int mlx5_core_destroy_qp(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp);
+int mlx5_core_destroy_dct(struct mlx5_ib_dev *dev, struct mlx5_core_dct *dct);
+int mlx5_core_qp_query(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp,
+		       u32 *out, int outlen);
+int mlx5_core_dct_query(struct mlx5_ib_dev *dev, struct mlx5_core_dct *dct,
+			u32 *out, int outlen);
+
+int mlx5_core_set_delay_drop(struct mlx5_ib_dev *dev, u32 timeout_usec);
+
+void mlx5_core_destroy_rq_tracked(struct mlx5_ib_dev *dev,
+				  struct mlx5_core_qp *rq);
+int mlx5_core_create_sq_tracked(struct mlx5_ib_dev *dev, u32 *in, int inlen,
+				struct mlx5_core_qp *sq);
+void mlx5_core_destroy_sq_tracked(struct mlx5_ib_dev *dev,
+				  struct mlx5_core_qp *sq);
+
+int mlx5_core_create_rq_tracked(struct mlx5_ib_dev *dev, u32 *in, int inlen,
+				struct mlx5_core_qp *rq);
+
+struct mlx5_core_rsc_common *mlx5_core_res_hold(struct mlx5_ib_dev *dev,
+						int res_num,
+						enum mlx5_res_type res_type);
+void mlx5_core_res_put(struct mlx5_core_rsc_common *res);
+
+int mlx5_core_xrcd_alloc(struct mlx5_ib_dev *dev, u32 *xrcdn);
+int mlx5_core_xrcd_dealloc(struct mlx5_ib_dev *dev, u32 xrcdn);
+#endif /* _MLX5_IB_QP_H */
diff --git a/drivers/infiniband/hw/mlx5/qpc.c b/drivers/infiniband/hw/mlx5/qpc.c
new file mode 100644
index 000000000000..ea62735042f0
--- /dev/null
+++ b/drivers/infiniband/hw/mlx5/qpc.c
@@ -0,0 +1,605 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/*
+ * Copyright (c) 2013-2020, Mellanox Technologies inc. All rights reserved.
+ */
+
+#include <linux/gfp.h>
+#include <linux/mlx5/qp.h>
+#include <linux/mlx5/driver.h>
+#include "mlx5_ib.h"
+#include "qp.h"
+
+static int mlx5_core_drain_dct(struct mlx5_ib_dev *dev,
+			       struct mlx5_core_dct *dct);
+
+static struct mlx5_core_rsc_common *
+mlx5_get_rsc(struct mlx5_qp_table *table, u32 rsn)
+{
+	struct mlx5_core_rsc_common *common;
+	unsigned long flags;
+
+	spin_lock_irqsave(&table->lock, flags);
+
+	common = radix_tree_lookup(&table->tree, rsn);
+	if (common)
+		refcount_inc(&common->refcount);
+
+	spin_unlock_irqrestore(&table->lock, flags);
+
+	return common;
+}
+
+void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common)
+{
+	if (refcount_dec_and_test(&common->refcount))
+		complete(&common->free);
+}
+
+static u64 qp_allowed_event_types(void)
+{
+	u64 mask;
+
+	mask = BIT(MLX5_EVENT_TYPE_PATH_MIG) |
+	       BIT(MLX5_EVENT_TYPE_COMM_EST) |
+	       BIT(MLX5_EVENT_TYPE_SQ_DRAINED) |
+	       BIT(MLX5_EVENT_TYPE_SRQ_LAST_WQE) |
+	       BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR) |
+	       BIT(MLX5_EVENT_TYPE_PATH_MIG_FAILED) |
+	       BIT(MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR) |
+	       BIT(MLX5_EVENT_TYPE_WQ_ACCESS_ERROR);
+
+	return mask;
+}
+
+static u64 rq_allowed_event_types(void)
+{
+	u64 mask;
+
+	mask = BIT(MLX5_EVENT_TYPE_SRQ_LAST_WQE) |
+	       BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR);
+
+	return mask;
+}
+
+static u64 sq_allowed_event_types(void)
+{
+	return BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR);
+}
+
+static u64 dct_allowed_event_types(void)
+{
+	return BIT(MLX5_EVENT_TYPE_DCT_DRAINED);
+}
+
+static bool is_event_type_allowed(int rsc_type, int event_type)
+{
+	switch (rsc_type) {
+	case MLX5_EVENT_QUEUE_TYPE_QP:
+		return BIT(event_type) & qp_allowed_event_types();
+	case MLX5_EVENT_QUEUE_TYPE_RQ:
+		return BIT(event_type) & rq_allowed_event_types();
+	case MLX5_EVENT_QUEUE_TYPE_SQ:
+		return BIT(event_type) & sq_allowed_event_types();
+	case MLX5_EVENT_QUEUE_TYPE_DCT:
+		return BIT(event_type) & dct_allowed_event_types();
+	default:
+		WARN(1, "Event arrived for unknown resource type");
+		return false;
+	}
+}
+
+static int rsc_event_notifier(struct notifier_block *nb,
+			      unsigned long type, void *data)
+{
+	struct mlx5_core_rsc_common *common;
+	struct mlx5_qp_table *table;
+	struct mlx5_core_dct *dct;
+	u8 event_type = (u8)type;
+	struct mlx5_core_qp *qp;
+	struct mlx5_eqe *eqe;
+	u32 rsn;
+
+	switch (event_type) {
+	case MLX5_EVENT_TYPE_DCT_DRAINED:
+		eqe = data;
+		rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff;
+		rsn |= (MLX5_RES_DCT << MLX5_USER_INDEX_LEN);
+		break;
+	case MLX5_EVENT_TYPE_PATH_MIG:
+	case MLX5_EVENT_TYPE_COMM_EST:
+	case MLX5_EVENT_TYPE_SQ_DRAINED:
+	case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
+	case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
+	case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
+	case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
+	case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
+		eqe = data;
+		rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
+		rsn |= (eqe->data.qp_srq.type << MLX5_USER_INDEX_LEN);
+		break;
+	default:
+		return NOTIFY_DONE;
+	}
+
+	table = container_of(nb, struct mlx5_qp_table, nb);
+	common = mlx5_get_rsc(table, rsn);
+	if (!common)
+		return NOTIFY_OK;
+
+	if (!is_event_type_allowed((rsn >> MLX5_USER_INDEX_LEN), event_type))
+		goto out;
+
+	switch (common->res) {
+	case MLX5_RES_QP:
+	case MLX5_RES_RQ:
+	case MLX5_RES_SQ:
+		qp = (struct mlx5_core_qp *)common;
+		qp->event(qp, event_type);
+		break;
+	case MLX5_RES_DCT:
+		dct = (struct mlx5_core_dct *)common;
+		if (event_type == MLX5_EVENT_TYPE_DCT_DRAINED)
+			complete(&dct->drained);
+		break;
+	default:
+		break;
+	}
+out:
+	mlx5_core_put_rsc(common);
+
+	return NOTIFY_OK;
+}
+
+static int create_resource_common(struct mlx5_ib_dev *dev,
+				  struct mlx5_core_qp *qp, int rsc_type)
+{
+	struct mlx5_qp_table *table = &dev->qp_table;
+	int err;
+
+	qp->common.res = rsc_type;
+	spin_lock_irq(&table->lock);
+	err = radix_tree_insert(&table->tree,
+				qp->qpn | (rsc_type << MLX5_USER_INDEX_LEN),
+				qp);
+	spin_unlock_irq(&table->lock);
+	if (err)
+		return err;
+
+	refcount_set(&qp->common.refcount, 1);
+	init_completion(&qp->common.free);
+	qp->pid = current->pid;
+
+	return 0;
+}
+
+static void destroy_resource_common(struct mlx5_ib_dev *dev,
+				    struct mlx5_core_qp *qp)
+{
+	struct mlx5_qp_table *table = &dev->qp_table;
+	unsigned long flags;
+
+	spin_lock_irqsave(&table->lock, flags);
+	radix_tree_delete(&table->tree,
+			  qp->qpn | (qp->common.res << MLX5_USER_INDEX_LEN));
+	spin_unlock_irqrestore(&table->lock, flags);
+	mlx5_core_put_rsc((struct mlx5_core_rsc_common *)qp);
+	wait_for_completion(&qp->common.free);
+}
+
+static int _mlx5_core_destroy_dct(struct mlx5_ib_dev *dev,
+				  struct mlx5_core_dct *dct, bool need_cleanup)
+{
+	u32 in[MLX5_ST_SZ_DW(destroy_dct_in)] = {};
+	struct mlx5_core_qp *qp = &dct->mqp;
+	int err;
+
+	err = mlx5_core_drain_dct(dev, dct);
+	if (err) {
+		if (dev->mdev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR)
+			goto destroy;
+
+		return err;
+	}
+	wait_for_completion(&dct->drained);
+destroy:
+	if (need_cleanup)
+		destroy_resource_common(dev, &dct->mqp);
+	MLX5_SET(destroy_dct_in, in, opcode, MLX5_CMD_OP_DESTROY_DCT);
+	MLX5_SET(destroy_dct_in, in, dctn, qp->qpn);
+	MLX5_SET(destroy_dct_in, in, uid, qp->uid);
+	err = mlx5_cmd_exec_in(dev->mdev, destroy_dct, in);
+	return err;
+}
+
+int mlx5_core_create_dct(struct mlx5_ib_dev *dev, struct mlx5_core_dct *dct,
+			 u32 *in, int inlen, u32 *out, int outlen)
+{
+	struct mlx5_core_qp *qp = &dct->mqp;
+	int err;
+
+	init_completion(&dct->drained);
+	MLX5_SET(create_dct_in, in, opcode, MLX5_CMD_OP_CREATE_DCT);
+
+	err = mlx5_cmd_exec(dev->mdev, in, inlen, out, outlen);
+	if (err)
+		return err;
+
+	qp->qpn = MLX5_GET(create_dct_out, out, dctn);
+	qp->uid = MLX5_GET(create_dct_in, in, uid);
+	err = create_resource_common(dev, qp, MLX5_RES_DCT);
+	if (err)
+		goto err_cmd;
+
+	return 0;
+err_cmd:
+	_mlx5_core_destroy_dct(dev, dct, false);
+	return err;
+}
+
+int mlx5_core_create_qp(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp,
+			u32 *in, int inlen)
+{
+	u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {};
+	u32 din[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
+	int err;
+
+	MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
+
+	err = mlx5_cmd_exec(dev->mdev, in, inlen, out, sizeof(out));
+	if (err)
+		return err;
+
+	qp->uid = MLX5_GET(create_qp_in, in, uid);
+	qp->qpn = MLX5_GET(create_qp_out, out, qpn);
+
+	err = create_resource_common(dev, qp, MLX5_RES_QP);
+	if (err)
+		goto err_cmd;
+
+	mlx5_debug_qp_add(dev->mdev, qp);
+
+	return 0;
+
+err_cmd:
+	MLX5_SET(destroy_qp_in, din, opcode, MLX5_CMD_OP_DESTROY_QP);
+	MLX5_SET(destroy_qp_in, din, qpn, qp->qpn);
+	MLX5_SET(destroy_qp_in, din, uid, qp->uid);
+	mlx5_cmd_exec_in(dev->mdev, destroy_qp, din);
+	return err;
+}
+
+static int mlx5_core_drain_dct(struct mlx5_ib_dev *dev,
+			       struct mlx5_core_dct *dct)
+{
+	u32 in[MLX5_ST_SZ_DW(drain_dct_in)] = {};
+	struct mlx5_core_qp *qp = &dct->mqp;
+
+	MLX5_SET(drain_dct_in, in, opcode, MLX5_CMD_OP_DRAIN_DCT);
+	MLX5_SET(drain_dct_in, in, dctn, qp->qpn);
+	MLX5_SET(drain_dct_in, in, uid, qp->uid);
+	return mlx5_cmd_exec_in(dev->mdev, drain_dct, in);
+}
+
+int mlx5_core_destroy_dct(struct mlx5_ib_dev *dev,
+			  struct mlx5_core_dct *dct)
+{
+	return _mlx5_core_destroy_dct(dev, dct, true);
+}
+
+int mlx5_core_destroy_qp(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp)
+{
+	u32 in[MLX5_ST_SZ_DW(destroy_qp_in)] = {};
+
+	mlx5_debug_qp_remove(dev->mdev, qp);
+
+	destroy_resource_common(dev, qp);
+
+	MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
+	MLX5_SET(destroy_qp_in, in, qpn, qp->qpn);
+	MLX5_SET(destroy_qp_in, in, uid, qp->uid);
+	mlx5_cmd_exec_in(dev->mdev, destroy_qp, in);
+	return 0;
+}
+
+int mlx5_core_set_delay_drop(struct mlx5_ib_dev *dev,
+			     u32 timeout_usec)
+{
+	u32 in[MLX5_ST_SZ_DW(set_delay_drop_params_in)] = {};
+
+	MLX5_SET(set_delay_drop_params_in, in, opcode,
+		 MLX5_CMD_OP_SET_DELAY_DROP_PARAMS);
+	MLX5_SET(set_delay_drop_params_in, in, delay_drop_timeout,
+		 timeout_usec / 100);
+	return mlx5_cmd_exec_in(dev->mdev, set_delay_drop_params, in);
+}
+
+struct mbox_info {
+	u32 *in;
+	u32 *out;
+	int inlen;
+	int outlen;
+};
+
+static int mbox_alloc(struct mbox_info *mbox, int inlen, int outlen)
+{
+	mbox->inlen  = inlen;
+	mbox->outlen = outlen;
+	mbox->in = kzalloc(mbox->inlen, GFP_KERNEL);
+	mbox->out = kzalloc(mbox->outlen, GFP_KERNEL);
+	if (!mbox->in || !mbox->out) {
+		kfree(mbox->in);
+		kfree(mbox->out);
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void mbox_free(struct mbox_info *mbox)
+{
+	kfree(mbox->in);
+	kfree(mbox->out);
+}
+
+static int modify_qp_mbox_alloc(struct mlx5_core_dev *dev, u16 opcode, int qpn,
+				u32 opt_param_mask, void *qpc,
+				struct mbox_info *mbox, u16 uid)
+{
+	mbox->out = NULL;
+	mbox->in = NULL;
+
+#define MBOX_ALLOC(mbox, typ)  \
+	mbox_alloc(mbox, MLX5_ST_SZ_BYTES(typ##_in), MLX5_ST_SZ_BYTES(typ##_out))
+
+#define MOD_QP_IN_SET(typ, in, _opcode, _qpn, _uid)                            \
+	do {                                                                   \
+		MLX5_SET(typ##_in, in, opcode, _opcode);                       \
+		MLX5_SET(typ##_in, in, qpn, _qpn);                             \
+		MLX5_SET(typ##_in, in, uid, _uid);                             \
+	} while (0)
+
+#define MOD_QP_IN_SET_QPC(typ, in, _opcode, _qpn, _opt_p, _qpc, _uid)          \
+	do {                                                                   \
+		MOD_QP_IN_SET(typ, in, _opcode, _qpn, _uid);                   \
+		MLX5_SET(typ##_in, in, opt_param_mask, _opt_p);                \
+		memcpy(MLX5_ADDR_OF(typ##_in, in, qpc), _qpc,                  \
+		       MLX5_ST_SZ_BYTES(qpc));                                 \
+	} while (0)
+
+	switch (opcode) {
+	/* 2RST & 2ERR */
+	case MLX5_CMD_OP_2RST_QP:
+		if (MBOX_ALLOC(mbox, qp_2rst))
+			return -ENOMEM;
+		MOD_QP_IN_SET(qp_2rst, mbox->in, opcode, qpn, uid);
+		break;
+	case MLX5_CMD_OP_2ERR_QP:
+		if (MBOX_ALLOC(mbox, qp_2err))
+			return -ENOMEM;
+		MOD_QP_IN_SET(qp_2err, mbox->in, opcode, qpn, uid);
+		break;
+
+	/* MODIFY with QPC */
+	case MLX5_CMD_OP_RST2INIT_QP:
+		if (MBOX_ALLOC(mbox, rst2init_qp))
+			return -ENOMEM;
+		MOD_QP_IN_SET_QPC(rst2init_qp, mbox->in, opcode, qpn,
+				  opt_param_mask, qpc, uid);
+		break;
+	case MLX5_CMD_OP_INIT2RTR_QP:
+		if (MBOX_ALLOC(mbox, init2rtr_qp))
+			return -ENOMEM;
+		MOD_QP_IN_SET_QPC(init2rtr_qp, mbox->in, opcode, qpn,
+				  opt_param_mask, qpc, uid);
+		break;
+	case MLX5_CMD_OP_RTR2RTS_QP:
+		if (MBOX_ALLOC(mbox, rtr2rts_qp))
+			return -ENOMEM;
+		MOD_QP_IN_SET_QPC(rtr2rts_qp, mbox->in, opcode, qpn,
+				  opt_param_mask, qpc, uid);
+		break;
+	case MLX5_CMD_OP_RTS2RTS_QP:
+		if (MBOX_ALLOC(mbox, rts2rts_qp))
+			return -ENOMEM;
+		MOD_QP_IN_SET_QPC(rts2rts_qp, mbox->in, opcode, qpn,
+				  opt_param_mask, qpc, uid);
+		break;
+	case MLX5_CMD_OP_SQERR2RTS_QP:
+		if (MBOX_ALLOC(mbox, sqerr2rts_qp))
+			return -ENOMEM;
+		MOD_QP_IN_SET_QPC(sqerr2rts_qp, mbox->in, opcode, qpn,
+				  opt_param_mask, qpc, uid);
+		break;
+	case MLX5_CMD_OP_INIT2INIT_QP:
+		if (MBOX_ALLOC(mbox, init2init_qp))
+			return -ENOMEM;
+		MOD_QP_IN_SET_QPC(init2init_qp, mbox->in, opcode, qpn,
+				  opt_param_mask, qpc, uid);
+		break;
+	default:
+		return -EINVAL;
+	}
+	return 0;
+}
+
+int mlx5_core_qp_modify(struct mlx5_ib_dev *dev, u16 opcode, u32 opt_param_mask,
+			void *qpc, struct mlx5_core_qp *qp)
+{
+	struct mbox_info mbox;
+	int err;
+
+	err = modify_qp_mbox_alloc(dev->mdev, opcode, qp->qpn,
+				   opt_param_mask, qpc, &mbox, qp->uid);
+	if (err)
+		return err;
+
+	err = mlx5_cmd_exec(dev->mdev, mbox.in, mbox.inlen, mbox.out,
+			    mbox.outlen);
+	mbox_free(&mbox);
+	return err;
+}
+
+int mlx5_init_qp_table(struct mlx5_ib_dev *dev)
+{
+	struct mlx5_qp_table *table = &dev->qp_table;
+
+	spin_lock_init(&table->lock);
+	INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
+	mlx5_qp_debugfs_init(dev->mdev);
+
+	table->nb.notifier_call = rsc_event_notifier;
+	mlx5_notifier_register(dev->mdev, &table->nb);
+
+	return 0;
+}
+
+void mlx5_cleanup_qp_table(struct mlx5_ib_dev *dev)
+{
+	struct mlx5_qp_table *table = &dev->qp_table;
+
+	mlx5_notifier_unregister(dev->mdev, &table->nb);
+	mlx5_qp_debugfs_cleanup(dev->mdev);
+}
+
+int mlx5_core_qp_query(struct mlx5_ib_dev *dev, struct mlx5_core_qp *qp,
+		       u32 *out, int outlen)
+{
+	u32 in[MLX5_ST_SZ_DW(query_qp_in)] = {};
+
+	MLX5_SET(query_qp_in, in, opcode, MLX5_CMD_OP_QUERY_QP);
+	MLX5_SET(query_qp_in, in, qpn, qp->qpn);
+	return mlx5_cmd_exec(dev->mdev, in, sizeof(in), out, outlen);
+}
+
+int mlx5_core_dct_query(struct mlx5_ib_dev *dev, struct mlx5_core_dct *dct,
+			u32 *out, int outlen)
+{
+	u32 in[MLX5_ST_SZ_DW(query_dct_in)] = {};
+	struct mlx5_core_qp *qp = &dct->mqp;
+
+	MLX5_SET(query_dct_in, in, opcode, MLX5_CMD_OP_QUERY_DCT);
+	MLX5_SET(query_dct_in, in, dctn, qp->qpn);
+
+	return mlx5_cmd_exec(dev->mdev, (void *)&in, sizeof(in), (void *)out,
+			     outlen);
+}
+
+int mlx5_core_xrcd_alloc(struct mlx5_ib_dev *dev, u32 *xrcdn)
+{
+	u32 out[MLX5_ST_SZ_DW(alloc_xrcd_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(alloc_xrcd_in)] = {};
+	int err;
+
+	MLX5_SET(alloc_xrcd_in, in, opcode, MLX5_CMD_OP_ALLOC_XRCD);
+	err = mlx5_cmd_exec_inout(dev->mdev, alloc_xrcd, in, out);
+	if (!err)
+		*xrcdn = MLX5_GET(alloc_xrcd_out, out, xrcd);
+	return err;
+}
+
+int mlx5_core_xrcd_dealloc(struct mlx5_ib_dev *dev, u32 xrcdn)
+{
+	u32 in[MLX5_ST_SZ_DW(dealloc_xrcd_in)] = {};
+
+	MLX5_SET(dealloc_xrcd_in, in, opcode, MLX5_CMD_OP_DEALLOC_XRCD);
+	MLX5_SET(dealloc_xrcd_in, in, xrcd, xrcdn);
+	return mlx5_cmd_exec_in(dev->mdev, dealloc_xrcd, in);
+}
+
+static void destroy_rq_tracked(struct mlx5_ib_dev *dev, u32 rqn, u16 uid)
+{
+	u32 in[MLX5_ST_SZ_DW(destroy_rq_in)] = {};
+
+	MLX5_SET(destroy_rq_in, in, opcode, MLX5_CMD_OP_DESTROY_RQ);
+	MLX5_SET(destroy_rq_in, in, rqn, rqn);
+	MLX5_SET(destroy_rq_in, in, uid, uid);
+	mlx5_cmd_exec_in(dev->mdev, destroy_rq, in);
+}
+
+int mlx5_core_create_rq_tracked(struct mlx5_ib_dev *dev, u32 *in, int inlen,
+				struct mlx5_core_qp *rq)
+{
+	int err;
+	u32 rqn;
+
+	err = mlx5_core_create_rq(dev->mdev, in, inlen, &rqn);
+	if (err)
+		return err;
+
+	rq->uid = MLX5_GET(create_rq_in, in, uid);
+	rq->qpn = rqn;
+	err = create_resource_common(dev, rq, MLX5_RES_RQ);
+	if (err)
+		goto err_destroy_rq;
+
+	return 0;
+
+err_destroy_rq:
+	destroy_rq_tracked(dev, rq->qpn, rq->uid);
+
+	return err;
+}
+
+void mlx5_core_destroy_rq_tracked(struct mlx5_ib_dev *dev,
+				  struct mlx5_core_qp *rq)
+{
+	destroy_resource_common(dev, rq);
+	destroy_rq_tracked(dev, rq->qpn, rq->uid);
+}
+
+static void destroy_sq_tracked(struct mlx5_ib_dev *dev, u32 sqn, u16 uid)
+{
+	u32 in[MLX5_ST_SZ_DW(destroy_sq_in)] = {};
+
+	MLX5_SET(destroy_sq_in, in, opcode, MLX5_CMD_OP_DESTROY_SQ);
+	MLX5_SET(destroy_sq_in, in, sqn, sqn);
+	MLX5_SET(destroy_sq_in, in, uid, uid);
+	mlx5_cmd_exec_in(dev->mdev, destroy_sq, in);
+}
+
+int mlx5_core_create_sq_tracked(struct mlx5_ib_dev *dev, u32 *in, int inlen,
+				struct mlx5_core_qp *sq)
+{
+	u32 out[MLX5_ST_SZ_DW(create_sq_out)] = {};
+	int err;
+
+	MLX5_SET(create_sq_in, in, opcode, MLX5_CMD_OP_CREATE_SQ);
+	err = mlx5_cmd_exec(dev->mdev, in, inlen, out, sizeof(out));
+	if (err)
+		return err;
+
+	sq->qpn = MLX5_GET(create_sq_out, out, sqn);
+	sq->uid = MLX5_GET(create_sq_in, in, uid);
+	err = create_resource_common(dev, sq, MLX5_RES_SQ);
+	if (err)
+		goto err_destroy_sq;
+
+	return 0;
+
+err_destroy_sq:
+	destroy_sq_tracked(dev, sq->qpn, sq->uid);
+
+	return err;
+}
+
+void mlx5_core_destroy_sq_tracked(struct mlx5_ib_dev *dev,
+				  struct mlx5_core_qp *sq)
+{
+	destroy_resource_common(dev, sq);
+	destroy_sq_tracked(dev, sq->qpn, sq->uid);
+}
+
+struct mlx5_core_rsc_common *mlx5_core_res_hold(struct mlx5_ib_dev *dev,
+						int res_num,
+						enum mlx5_res_type res_type)
+{
+	u32 rsn = res_num | (res_type << MLX5_USER_INDEX_LEN);
+	struct mlx5_qp_table *table = &dev->qp_table;
+
+	return mlx5_get_rsc(table, rsn);
+}
+
+void mlx5_core_res_put(struct mlx5_core_rsc_common *res)
+{
+	mlx5_core_put_rsc(res);
+}
diff --git a/drivers/infiniband/hw/mlx5/srq_cmd.c b/drivers/infiniband/hw/mlx5/srq_cmd.c
index 88c0388f9fc6..c851570791af 100644
--- a/drivers/infiniband/hw/mlx5/srq_cmd.c
+++ b/drivers/infiniband/hw/mlx5/srq_cmd.c
@@ -7,6 +7,7 @@
 #include <linux/mlx5/driver.h>
 #include "mlx5_ib.h"
 #include "srq.h"
+#include "qp.h"
 
 static int get_pas_size(struct mlx5_srq_attr *in)
 {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 6d32915000fc..d3c7dbd7f1d5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -12,7 +12,7 @@ obj-$(CONFIG_MLX5_CORE) += mlx5_core.o
 # mlx5 core basic
 #
 mlx5_core-y :=	main.o cmd.o debugfs.o fw.o eq.o uar.o pagealloc.o \
-		health.o mcg.o cq.o alloc.o qp.o port.o mr.o pd.o \
+		health.o mcg.o cq.o alloc.o port.o mr.o pd.o \
 		transobj.o vport.o sriov.o fs_cmd.o fs_core.o pci_irq.o \
 		fs_counters.o rl.o lag.o dev.o events.o wq.o lib/gid.o \
 		lib/devcom.o lib/pci_vsc.o lib/dm.o diag/fs_tracepoint.o \
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
index d40c3d5bd496..65fef5a86644 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
@@ -101,15 +101,15 @@ void mlx5_unregister_debugfs(void)
 
 void mlx5_qp_debugfs_init(struct mlx5_core_dev *dev)
 {
-	atomic_set(&dev->num_qps, 0);
-
 	dev->priv.qp_debugfs = debugfs_create_dir("QPs",  dev->priv.dbg_root);
 }
+EXPORT_SYMBOL(mlx5_qp_debugfs_init);
 
 void mlx5_qp_debugfs_cleanup(struct mlx5_core_dev *dev)
 {
 	debugfs_remove_recursive(dev->priv.qp_debugfs);
 }
+EXPORT_SYMBOL(mlx5_qp_debugfs_cleanup);
 
 void mlx5_eq_debugfs_init(struct mlx5_core_dev *dev)
 {
@@ -450,6 +450,7 @@ int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp)
 
 	return err;
 }
+EXPORT_SYMBOL(mlx5_debug_qp_add);
 
 void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp)
 {
@@ -459,6 +460,7 @@ void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp)
 	if (qp->dbg)
 		rem_res_tree(qp->dbg);
 }
+EXPORT_SYMBOL(mlx5_debug_qp_remove);
 
 int mlx5_debug_eq_add(struct mlx5_core_dev *dev, struct mlx5_eq *eq)
 {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 7af4210c1b96..6e19fa4d1310 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -836,8 +836,6 @@ static int mlx5_init_once(struct mlx5_core_dev *dev)
 
 	mlx5_cq_debugfs_init(dev);
 
-	mlx5_init_qp_table(dev);
-
 	mlx5_init_reserved_gids(dev);
 
 	mlx5_init_clock(dev);
@@ -896,7 +894,6 @@ err_rl_cleanup:
 err_tables_cleanup:
 	mlx5_geneve_destroy(dev->geneve);
 	mlx5_vxlan_destroy(dev->vxlan);
-	mlx5_cleanup_qp_table(dev);
 	mlx5_cq_debugfs_cleanup(dev);
 	mlx5_events_cleanup(dev);
 err_eq_cleanup:
@@ -924,7 +921,6 @@ static void mlx5_cleanup_once(struct mlx5_core_dev *dev)
 	mlx5_vxlan_destroy(dev->vxlan);
 	mlx5_cleanup_clock(dev);
 	mlx5_cleanup_reserved_gids(dev);
-	mlx5_cleanup_qp_table(dev);
 	mlx5_cq_debugfs_cleanup(dev);
 	mlx5_events_cleanup(dev);
 	mlx5_eq_table_cleanup(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/qp.c b/drivers/net/ethernet/mellanox/mlx5/core/qp.c
deleted file mode 100644
index d9df3a5dd532..000000000000
--- a/drivers/net/ethernet/mellanox/mlx5/core/qp.c
+++ /dev/null
@@ -1,697 +0,0 @@
-/*
- * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include <linux/gfp.h>
-#include <linux/export.h>
-#include <linux/mlx5/qp.h>
-#include <linux/mlx5/driver.h>
-#include <linux/mlx5/transobj.h>
-
-#include "mlx5_core.h"
-#include "lib/eq.h"
-
-static int mlx5_core_drain_dct(struct mlx5_core_dev *dev,
-			       struct mlx5_core_dct *dct);
-
-static struct mlx5_core_rsc_common *
-mlx5_get_rsc(struct mlx5_qp_table *table, u32 rsn)
-{
-	struct mlx5_core_rsc_common *common;
-	unsigned long flags;
-
-	spin_lock_irqsave(&table->lock, flags);
-
-	common = radix_tree_lookup(&table->tree, rsn);
-	if (common)
-		refcount_inc(&common->refcount);
-
-	spin_unlock_irqrestore(&table->lock, flags);
-
-	return common;
-}
-
-void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common)
-{
-	if (refcount_dec_and_test(&common->refcount))
-		complete(&common->free);
-}
-
-static u64 qp_allowed_event_types(void)
-{
-	u64 mask;
-
-	mask = BIT(MLX5_EVENT_TYPE_PATH_MIG) |
-	       BIT(MLX5_EVENT_TYPE_COMM_EST) |
-	       BIT(MLX5_EVENT_TYPE_SQ_DRAINED) |
-	       BIT(MLX5_EVENT_TYPE_SRQ_LAST_WQE) |
-	       BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR) |
-	       BIT(MLX5_EVENT_TYPE_PATH_MIG_FAILED) |
-	       BIT(MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR) |
-	       BIT(MLX5_EVENT_TYPE_WQ_ACCESS_ERROR);
-
-	return mask;
-}
-
-static u64 rq_allowed_event_types(void)
-{
-	u64 mask;
-
-	mask = BIT(MLX5_EVENT_TYPE_SRQ_LAST_WQE) |
-	       BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR);
-
-	return mask;
-}
-
-static u64 sq_allowed_event_types(void)
-{
-	return BIT(MLX5_EVENT_TYPE_WQ_CATAS_ERROR);
-}
-
-static u64 dct_allowed_event_types(void)
-{
-	return BIT(MLX5_EVENT_TYPE_DCT_DRAINED);
-}
-
-static bool is_event_type_allowed(int rsc_type, int event_type)
-{
-	switch (rsc_type) {
-	case MLX5_EVENT_QUEUE_TYPE_QP:
-		return BIT(event_type) & qp_allowed_event_types();
-	case MLX5_EVENT_QUEUE_TYPE_RQ:
-		return BIT(event_type) & rq_allowed_event_types();
-	case MLX5_EVENT_QUEUE_TYPE_SQ:
-		return BIT(event_type) & sq_allowed_event_types();
-	case MLX5_EVENT_QUEUE_TYPE_DCT:
-		return BIT(event_type) & dct_allowed_event_types();
-	default:
-		WARN(1, "Event arrived for unknown resource type");
-		return false;
-	}
-}
-
-static int rsc_event_notifier(struct notifier_block *nb,
-			      unsigned long type, void *data)
-{
-	struct mlx5_core_rsc_common *common;
-	struct mlx5_qp_table *table;
-	struct mlx5_core_dev *dev;
-	struct mlx5_core_dct *dct;
-	u8 event_type = (u8)type;
-	struct mlx5_core_qp *qp;
-	struct mlx5_priv *priv;
-	struct mlx5_eqe *eqe;
-	u32 rsn;
-
-	switch (event_type) {
-	case MLX5_EVENT_TYPE_DCT_DRAINED:
-		eqe = data;
-		rsn = be32_to_cpu(eqe->data.dct.dctn) & 0xffffff;
-		rsn |= (MLX5_RES_DCT << MLX5_USER_INDEX_LEN);
-		break;
-	case MLX5_EVENT_TYPE_PATH_MIG:
-	case MLX5_EVENT_TYPE_COMM_EST:
-	case MLX5_EVENT_TYPE_SQ_DRAINED:
-	case MLX5_EVENT_TYPE_SRQ_LAST_WQE:
-	case MLX5_EVENT_TYPE_WQ_CATAS_ERROR:
-	case MLX5_EVENT_TYPE_PATH_MIG_FAILED:
-	case MLX5_EVENT_TYPE_WQ_INVAL_REQ_ERROR:
-	case MLX5_EVENT_TYPE_WQ_ACCESS_ERROR:
-		eqe = data;
-		rsn = be32_to_cpu(eqe->data.qp_srq.qp_srq_n) & 0xffffff;
-		rsn |= (eqe->data.qp_srq.type << MLX5_USER_INDEX_LEN);
-		break;
-	default:
-		return NOTIFY_DONE;
-	}
-
-	table = container_of(nb, struct mlx5_qp_table, nb);
-	priv  = container_of(table, struct mlx5_priv, qp_table);
-	dev   = container_of(priv, struct mlx5_core_dev, priv);
-
-	mlx5_core_dbg(dev, "event (%d) arrived on resource 0x%x\n", eqe->type, rsn);
-
-	common = mlx5_get_rsc(table, rsn);
-	if (!common) {
-		mlx5_core_dbg(dev, "Async event for unknown resource 0x%x\n", rsn);
-		return NOTIFY_OK;
-	}
-
-	if (!is_event_type_allowed((rsn >> MLX5_USER_INDEX_LEN), event_type)) {
-		mlx5_core_warn(dev, "event 0x%.2x is not allowed on resource 0x%.8x\n",
-			       event_type, rsn);
-		goto out;
-	}
-
-	switch (common->res) {
-	case MLX5_RES_QP:
-	case MLX5_RES_RQ:
-	case MLX5_RES_SQ:
-		qp = (struct mlx5_core_qp *)common;
-		qp->event(qp, event_type);
-		break;
-	case MLX5_RES_DCT:
-		dct = (struct mlx5_core_dct *)common;
-		if (event_type == MLX5_EVENT_TYPE_DCT_DRAINED)
-			complete(&dct->drained);
-		break;
-	default:
-		mlx5_core_warn(dev, "invalid resource type for 0x%x\n", rsn);
-	}
-out:
-	mlx5_core_put_rsc(common);
-
-	return NOTIFY_OK;
-}
-
-static int create_resource_common(struct mlx5_core_dev *dev,
-				  struct mlx5_core_qp *qp,
-				  int rsc_type)
-{
-	struct mlx5_qp_table *table = &dev->priv.qp_table;
-	int err;
-
-	qp->common.res = rsc_type;
-	spin_lock_irq(&table->lock);
-	err = radix_tree_insert(&table->tree,
-				qp->qpn | (rsc_type << MLX5_USER_INDEX_LEN),
-				qp);
-	spin_unlock_irq(&table->lock);
-	if (err)
-		return err;
-
-	refcount_set(&qp->common.refcount, 1);
-	init_completion(&qp->common.free);
-	qp->pid = current->pid;
-
-	return 0;
-}
-
-static void destroy_resource_common(struct mlx5_core_dev *dev,
-				    struct mlx5_core_qp *qp)
-{
-	struct mlx5_qp_table *table = &dev->priv.qp_table;
-	unsigned long flags;
-
-	spin_lock_irqsave(&table->lock, flags);
-	radix_tree_delete(&table->tree,
-			  qp->qpn | (qp->common.res << MLX5_USER_INDEX_LEN));
-	spin_unlock_irqrestore(&table->lock, flags);
-	mlx5_core_put_rsc((struct mlx5_core_rsc_common *)qp);
-	wait_for_completion(&qp->common.free);
-}
-
-static int _mlx5_core_destroy_dct(struct mlx5_core_dev *dev,
-				  struct mlx5_core_dct *dct, bool need_cleanup)
-{
-	u32 out[MLX5_ST_SZ_DW(destroy_dct_out)] = {0};
-	u32 in[MLX5_ST_SZ_DW(destroy_dct_in)]   = {0};
-	struct mlx5_core_qp *qp = &dct->mqp;
-	int err;
-
-	err = mlx5_core_drain_dct(dev, dct);
-	if (err) {
-		if (dev->state == MLX5_DEVICE_STATE_INTERNAL_ERROR) {
-			goto destroy;
-		} else {
-			mlx5_core_warn(
-				dev, "failed drain DCT 0x%x with error 0x%x\n",
-				qp->qpn, err);
-			return err;
-		}
-	}
-	wait_for_completion(&dct->drained);
-destroy:
-	if (need_cleanup)
-		destroy_resource_common(dev, &dct->mqp);
-	MLX5_SET(destroy_dct_in, in, opcode, MLX5_CMD_OP_DESTROY_DCT);
-	MLX5_SET(destroy_dct_in, in, dctn, qp->qpn);
-	MLX5_SET(destroy_dct_in, in, uid, qp->uid);
-	err = mlx5_cmd_exec(dev, (void *)&in, sizeof(in),
-			    (void *)&out, sizeof(out));
-	return err;
-}
-
-int mlx5_core_create_dct(struct mlx5_core_dev *dev,
-			 struct mlx5_core_dct *dct,
-			 u32 *in, int inlen,
-			 u32 *out, int outlen)
-{
-	struct mlx5_core_qp *qp = &dct->mqp;
-	int err;
-
-	init_completion(&dct->drained);
-	MLX5_SET(create_dct_in, in, opcode, MLX5_CMD_OP_CREATE_DCT);
-
-	err = mlx5_cmd_exec(dev, in, inlen, out, outlen);
-	if (err) {
-		mlx5_core_warn(dev, "create DCT failed, ret %d\n", err);
-		return err;
-	}
-
-	qp->qpn = MLX5_GET(create_dct_out, out, dctn);
-	qp->uid = MLX5_GET(create_dct_in, in, uid);
-	err = create_resource_common(dev, qp, MLX5_RES_DCT);
-	if (err)
-		goto err_cmd;
-
-	return 0;
-err_cmd:
-	_mlx5_core_destroy_dct(dev, dct, false);
-	return err;
-}
-EXPORT_SYMBOL_GPL(mlx5_core_create_dct);
-
-int mlx5_core_create_qp(struct mlx5_core_dev *dev,
-			struct mlx5_core_qp *qp,
-			u32 *in, int inlen)
-{
-	u32 out[MLX5_ST_SZ_DW(create_qp_out)] = {0};
-	u32 dout[MLX5_ST_SZ_DW(destroy_qp_out)];
-	u32 din[MLX5_ST_SZ_DW(destroy_qp_in)];
-	int err;
-
-	MLX5_SET(create_qp_in, in, opcode, MLX5_CMD_OP_CREATE_QP);
-
-	err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
-	if (err)
-		return err;
-
-	qp->uid = MLX5_GET(create_qp_in, in, uid);
-	qp->qpn = MLX5_GET(create_qp_out, out, qpn);
-	mlx5_core_dbg(dev, "qpn = 0x%x\n", qp->qpn);
-
-	err = create_resource_common(dev, qp, MLX5_RES_QP);
-	if (err)
-		goto err_cmd;
-
-	err = mlx5_debug_qp_add(dev, qp);
-	if (err)
-		mlx5_core_dbg(dev, "failed adding QP 0x%x to debug file system\n",
-			      qp->qpn);
-
-	atomic_inc(&dev->num_qps);
-
-	return 0;
-
-err_cmd:
-	memset(din, 0, sizeof(din));
-	memset(dout, 0, sizeof(dout));
-	MLX5_SET(destroy_qp_in, din, opcode, MLX5_CMD_OP_DESTROY_QP);
-	MLX5_SET(destroy_qp_in, din, qpn, qp->qpn);
-	MLX5_SET(destroy_qp_in, din, uid, qp->uid);
-	mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout));
-	return err;
-}
-EXPORT_SYMBOL_GPL(mlx5_core_create_qp);
-
-static int mlx5_core_drain_dct(struct mlx5_core_dev *dev,
-			       struct mlx5_core_dct *dct)
-{
-	u32 out[MLX5_ST_SZ_DW(drain_dct_out)] = {0};
-	u32 in[MLX5_ST_SZ_DW(drain_dct_in)]   = {0};
-	struct mlx5_core_qp *qp = &dct->mqp;
-
-	MLX5_SET(drain_dct_in, in, opcode, MLX5_CMD_OP_DRAIN_DCT);
-	MLX5_SET(drain_dct_in, in, dctn, qp->qpn);
-	MLX5_SET(drain_dct_in, in, uid, qp->uid);
-	return mlx5_cmd_exec(dev, (void *)&in, sizeof(in),
-			     (void *)&out, sizeof(out));
-}
-
-int mlx5_core_destroy_dct(struct mlx5_core_dev *dev,
-			  struct mlx5_core_dct *dct)
-{
-	return _mlx5_core_destroy_dct(dev, dct, true);
-}
-EXPORT_SYMBOL_GPL(mlx5_core_destroy_dct);
-
-int mlx5_core_destroy_qp(struct mlx5_core_dev *dev,
-			 struct mlx5_core_qp *qp)
-{
-	u32 out[MLX5_ST_SZ_DW(destroy_qp_out)] = {0};
-	u32 in[MLX5_ST_SZ_DW(destroy_qp_in)]   = {0};
-	int err;
-
-	mlx5_debug_qp_remove(dev, qp);
-
-	destroy_resource_common(dev, qp);
-
-	MLX5_SET(destroy_qp_in, in, opcode, MLX5_CMD_OP_DESTROY_QP);
-	MLX5_SET(destroy_qp_in, in, qpn, qp->qpn);
-	MLX5_SET(destroy_qp_in, in, uid, qp->uid);
-	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-	if (err)
-		return err;
-
-	atomic_dec(&dev->num_qps);
-	return 0;
-}
-EXPORT_SYMBOL_GPL(mlx5_core_destroy_qp);
-
-int mlx5_core_set_delay_drop(struct mlx5_core_dev *dev,
-			     u32 timeout_usec)
-{
-	u32 out[MLX5_ST_SZ_DW(set_delay_drop_params_out)] = {0};
-	u32 in[MLX5_ST_SZ_DW(set_delay_drop_params_in)]   = {0};
-
-	MLX5_SET(set_delay_drop_params_in, in, opcode,
-		 MLX5_CMD_OP_SET_DELAY_DROP_PARAMS);
-	MLX5_SET(set_delay_drop_params_in, in, delay_drop_timeout,
-		 timeout_usec / 100);
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-EXPORT_SYMBOL_GPL(mlx5_core_set_delay_drop);
-
-struct mbox_info {
-	u32 *in;
-	u32 *out;
-	int inlen;
-	int outlen;
-};
-
-static int mbox_alloc(struct mbox_info *mbox, int inlen, int outlen)
-{
-	mbox->inlen  = inlen;
-	mbox->outlen = outlen;
-	mbox->in = kzalloc(mbox->inlen, GFP_KERNEL);
-	mbox->out = kzalloc(mbox->outlen, GFP_KERNEL);
-	if (!mbox->in || !mbox->out) {
-		kfree(mbox->in);
-		kfree(mbox->out);
-		return -ENOMEM;
-	}
-
-	return 0;
-}
-
-static void mbox_free(struct mbox_info *mbox)
-{
-	kfree(mbox->in);
-	kfree(mbox->out);
-}
-
-static int modify_qp_mbox_alloc(struct mlx5_core_dev *dev, u16 opcode, int qpn,
-				u32 opt_param_mask, void *qpc,
-				struct mbox_info *mbox, u16 uid)
-{
-	mbox->out = NULL;
-	mbox->in = NULL;
-
-#define MBOX_ALLOC(mbox, typ)  \
-	mbox_alloc(mbox, MLX5_ST_SZ_BYTES(typ##_in), MLX5_ST_SZ_BYTES(typ##_out))
-
-#define MOD_QP_IN_SET(typ, in, _opcode, _qpn, _uid)                            \
-	do {                                                                   \
-		MLX5_SET(typ##_in, in, opcode, _opcode);                       \
-		MLX5_SET(typ##_in, in, qpn, _qpn);                             \
-		MLX5_SET(typ##_in, in, uid, _uid);                             \
-	} while (0)
-
-#define MOD_QP_IN_SET_QPC(typ, in, _opcode, _qpn, _opt_p, _qpc, _uid)          \
-	do {                                                                   \
-		MOD_QP_IN_SET(typ, in, _opcode, _qpn, _uid);                   \
-		MLX5_SET(typ##_in, in, opt_param_mask, _opt_p);                \
-		memcpy(MLX5_ADDR_OF(typ##_in, in, qpc), _qpc,                  \
-		       MLX5_ST_SZ_BYTES(qpc));                                 \
-	} while (0)
-
-	switch (opcode) {
-	/* 2RST & 2ERR */
-	case MLX5_CMD_OP_2RST_QP:
-		if (MBOX_ALLOC(mbox, qp_2rst))
-			return -ENOMEM;
-		MOD_QP_IN_SET(qp_2rst, mbox->in, opcode, qpn, uid);
-		break;
-	case MLX5_CMD_OP_2ERR_QP:
-		if (MBOX_ALLOC(mbox, qp_2err))
-			return -ENOMEM;
-		MOD_QP_IN_SET(qp_2err, mbox->in, opcode, qpn, uid);
-		break;
-
-	/* MODIFY with QPC */
-	case MLX5_CMD_OP_RST2INIT_QP:
-		if (MBOX_ALLOC(mbox, rst2init_qp))
-			return -ENOMEM;
-		MOD_QP_IN_SET_QPC(rst2init_qp, mbox->in, opcode, qpn,
-				  opt_param_mask, qpc, uid);
-		break;
-	case MLX5_CMD_OP_INIT2RTR_QP:
-		if (MBOX_ALLOC(mbox, init2rtr_qp))
-			return -ENOMEM;
-		MOD_QP_IN_SET_QPC(init2rtr_qp, mbox->in, opcode, qpn,
-				  opt_param_mask, qpc, uid);
-		break;
-	case MLX5_CMD_OP_RTR2RTS_QP:
-		if (MBOX_ALLOC(mbox, rtr2rts_qp))
-			return -ENOMEM;
-		MOD_QP_IN_SET_QPC(rtr2rts_qp, mbox->in, opcode, qpn,
-				  opt_param_mask, qpc, uid);
-		break;
-	case MLX5_CMD_OP_RTS2RTS_QP:
-		if (MBOX_ALLOC(mbox, rts2rts_qp))
-			return -ENOMEM;
-		MOD_QP_IN_SET_QPC(rts2rts_qp, mbox->in, opcode, qpn,
-				  opt_param_mask, qpc, uid);
-		break;
-	case MLX5_CMD_OP_SQERR2RTS_QP:
-		if (MBOX_ALLOC(mbox, sqerr2rts_qp))
-			return -ENOMEM;
-		MOD_QP_IN_SET_QPC(sqerr2rts_qp, mbox->in, opcode, qpn,
-				  opt_param_mask, qpc, uid);
-		break;
-	case MLX5_CMD_OP_INIT2INIT_QP:
-		if (MBOX_ALLOC(mbox, init2init_qp))
-			return -ENOMEM;
-		MOD_QP_IN_SET_QPC(init2init_qp, mbox->in, opcode, qpn,
-				  opt_param_mask, qpc, uid);
-		break;
-	default:
-		mlx5_core_err(dev, "Unknown transition for modify QP: OP(0x%x) QPN(0x%x)\n",
-			      opcode, qpn);
-		return -EINVAL;
-	}
-	return 0;
-}
-
-int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 opcode,
-			u32 opt_param_mask, void *qpc,
-			struct mlx5_core_qp *qp)
-{
-	struct mbox_info mbox;
-	int err;
-
-	err = modify_qp_mbox_alloc(dev, opcode, qp->qpn,
-				   opt_param_mask, qpc, &mbox, qp->uid);
-	if (err)
-		return err;
-
-	err = mlx5_cmd_exec(dev, mbox.in, mbox.inlen, mbox.out, mbox.outlen);
-	mbox_free(&mbox);
-	return err;
-}
-EXPORT_SYMBOL_GPL(mlx5_core_qp_modify);
-
-void mlx5_init_qp_table(struct mlx5_core_dev *dev)
-{
-	struct mlx5_qp_table *table = &dev->priv.qp_table;
-
-	memset(table, 0, sizeof(*table));
-	spin_lock_init(&table->lock);
-	INIT_RADIX_TREE(&table->tree, GFP_ATOMIC);
-	mlx5_qp_debugfs_init(dev);
-
-	table->nb.notifier_call = rsc_event_notifier;
-	mlx5_notifier_register(dev, &table->nb);
-}
-
-void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev)
-{
-	struct mlx5_qp_table *table = &dev->priv.qp_table;
-
-	mlx5_notifier_unregister(dev, &table->nb);
-	mlx5_qp_debugfs_cleanup(dev);
-}
-
-int mlx5_core_qp_query(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp,
-		       u32 *out, int outlen)
-{
-	u32 in[MLX5_ST_SZ_DW(query_qp_in)] = {0};
-
-	MLX5_SET(query_qp_in, in, opcode, MLX5_CMD_OP_QUERY_QP);
-	MLX5_SET(query_qp_in, in, qpn, qp->qpn);
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
-}
-EXPORT_SYMBOL_GPL(mlx5_core_qp_query);
-
-int mlx5_core_dct_query(struct mlx5_core_dev *dev, struct mlx5_core_dct *dct,
-			u32 *out, int outlen)
-{
-	u32 in[MLX5_ST_SZ_DW(query_dct_in)] = {0};
-	struct mlx5_core_qp *qp = &dct->mqp;
-
-	MLX5_SET(query_dct_in, in, opcode, MLX5_CMD_OP_QUERY_DCT);
-	MLX5_SET(query_dct_in, in, dctn, qp->qpn);
-
-	return mlx5_cmd_exec(dev, (void *)&in, sizeof(in),
-			     (void *)out, outlen);
-}
-EXPORT_SYMBOL_GPL(mlx5_core_dct_query);
-
-int mlx5_core_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn)
-{
-	u32 out[MLX5_ST_SZ_DW(alloc_xrcd_out)] = {0};
-	u32 in[MLX5_ST_SZ_DW(alloc_xrcd_in)]   = {0};
-	int err;
-
-	MLX5_SET(alloc_xrcd_in, in, opcode, MLX5_CMD_OP_ALLOC_XRCD);
-	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-	if (!err)
-		*xrcdn = MLX5_GET(alloc_xrcd_out, out, xrcd);
-	return err;
-}
-EXPORT_SYMBOL_GPL(mlx5_core_xrcd_alloc);
-
-int mlx5_core_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn)
-{
-	u32 out[MLX5_ST_SZ_DW(dealloc_xrcd_out)] = {0};
-	u32 in[MLX5_ST_SZ_DW(dealloc_xrcd_in)]   = {0};
-
-	MLX5_SET(dealloc_xrcd_in, in, opcode, MLX5_CMD_OP_DEALLOC_XRCD);
-	MLX5_SET(dealloc_xrcd_in, in, xrcd, xrcdn);
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-EXPORT_SYMBOL_GPL(mlx5_core_xrcd_dealloc);
-
-static void destroy_rq_tracked(struct mlx5_core_dev *dev, u32 rqn, u16 uid)
-{
-	u32 in[MLX5_ST_SZ_DW(destroy_rq_in)]   = {};
-	u32 out[MLX5_ST_SZ_DW(destroy_rq_out)] = {};
-
-	MLX5_SET(destroy_rq_in, in, opcode, MLX5_CMD_OP_DESTROY_RQ);
-	MLX5_SET(destroy_rq_in, in, rqn, rqn);
-	MLX5_SET(destroy_rq_in, in, uid, uid);
-	mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-
-int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
-				struct mlx5_core_qp *rq)
-{
-	int err;
-	u32 rqn;
-
-	err = mlx5_core_create_rq(dev, in, inlen, &rqn);
-	if (err)
-		return err;
-
-	rq->uid = MLX5_GET(create_rq_in, in, uid);
-	rq->qpn = rqn;
-	err = create_resource_common(dev, rq, MLX5_RES_RQ);
-	if (err)
-		goto err_destroy_rq;
-
-	return 0;
-
-err_destroy_rq:
-	destroy_rq_tracked(dev, rq->qpn, rq->uid);
-
-	return err;
-}
-EXPORT_SYMBOL(mlx5_core_create_rq_tracked);
-
-void mlx5_core_destroy_rq_tracked(struct mlx5_core_dev *dev,
-				  struct mlx5_core_qp *rq)
-{
-	destroy_resource_common(dev, rq);
-	destroy_rq_tracked(dev, rq->qpn, rq->uid);
-}
-EXPORT_SYMBOL(mlx5_core_destroy_rq_tracked);
-
-static void destroy_sq_tracked(struct mlx5_core_dev *dev, u32 sqn, u16 uid)
-{
-	u32 in[MLX5_ST_SZ_DW(destroy_sq_in)]   = {};
-	u32 out[MLX5_ST_SZ_DW(destroy_sq_out)] = {};
-
-	MLX5_SET(destroy_sq_in, in, opcode, MLX5_CMD_OP_DESTROY_SQ);
-	MLX5_SET(destroy_sq_in, in, sqn, sqn);
-	MLX5_SET(destroy_sq_in, in, uid, uid);
-	mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-
-int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
-				struct mlx5_core_qp *sq)
-{
-	int err;
-	u32 sqn;
-
-	err = mlx5_core_create_sq(dev, in, inlen, &sqn);
-	if (err)
-		return err;
-
-	sq->uid = MLX5_GET(create_sq_in, in, uid);
-	sq->qpn = sqn;
-	err = create_resource_common(dev, sq, MLX5_RES_SQ);
-	if (err)
-		goto err_destroy_sq;
-
-	return 0;
-
-err_destroy_sq:
-	destroy_sq_tracked(dev, sq->qpn, sq->uid);
-
-	return err;
-}
-EXPORT_SYMBOL(mlx5_core_create_sq_tracked);
-
-void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev,
-				  struct mlx5_core_qp *sq)
-{
-	destroy_resource_common(dev, sq);
-	destroy_sq_tracked(dev, sq->qpn, sq->uid);
-}
-EXPORT_SYMBOL(mlx5_core_destroy_sq_tracked);
-
-struct mlx5_core_rsc_common *mlx5_core_res_hold(struct mlx5_core_dev *dev,
-						int res_num,
-						enum mlx5_res_type res_type)
-{
-	u32 rsn = res_num | (res_type << MLX5_USER_INDEX_LEN);
-	struct mlx5_qp_table *table = &dev->priv.qp_table;
-
-	return mlx5_get_rsc(table, rsn);
-}
-EXPORT_SYMBOL_GPL(mlx5_core_res_hold);
-
-void mlx5_core_res_put(struct mlx5_core_rsc_common *res)
-{
-	mlx5_core_put_rsc(res);
-}
-EXPORT_SYMBOL_GPL(mlx5_core_res_put);
diff --git a/include/linux/mlx5/cmd.h b/include/linux/mlx5/cmd.h
deleted file mode 100644
index 68cd08f02c2f..000000000000
--- a/include/linux/mlx5/cmd.h
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Copyright (c) 2013-2015, Mellanox Technologies. All rights reserved.
- *
- * This software is available to you under a choice of one of two
- * licenses.  You may choose to be licensed under the terms of the GNU
- * General Public License (GPL) Version 2, available from the file
- * COPYING in the main directory of this source tree, or the
- * OpenIB.org BSD license below:
- *
- *     Redistribution and use in source and binary forms, with or
- *     without modification, are permitted provided that the following
- *     conditions are met:
- *
- *      - Redistributions of source code must retain the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer.
- *
- *      - Redistributions in binary form must reproduce the above
- *        copyright notice, this list of conditions and the following
- *        disclaimer in the documentation and/or other materials
- *        provided with the distribution.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
- * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
- * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
- * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
- * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
- * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
- * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef MLX5_CMD_H
-#define MLX5_CMD_H
-
-#include <linux/types.h>
-
-struct manage_pages_layout {
-	u64	ptr;
-	u32	reserved;
-	u16	num_entries;
-	u16	func_id;
-};
-
-
-struct mlx5_cmd_alloc_uar_imm_out {
-	u32	rsvd[3];
-	u32	uarn;
-};
-
-#endif /* MLX5_CMD_H */
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 1caddfa85c4d..b60e5ab7906b 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -541,7 +541,6 @@ struct mlx5_priv {
 	struct mlx5_core_health health;
 
 	/* start: qp staff */
-	struct mlx5_qp_table	qp_table;
 	struct dentry	       *qp_debugfs;
 	struct dentry	       *eq_debugfs;
 	struct dentry	       *cq_debugfs;
@@ -687,7 +686,6 @@ struct mlx5_core_dev {
 	unsigned long		intf_state;
 	struct mlx5_priv	priv;
 	struct mlx5_profile	*profile;
-	atomic_t		num_qps;
 	u32			issi;
 	struct mlx5e_resources  mlx5e_res;
 	struct mlx5_dm          *dm;
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index 4d25a3d24182..ef127a156a62 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -553,53 +553,8 @@ struct mlx5_qp_context {
 	u8			rsvd1[24];
 };
 
-static inline struct mlx5_core_qp *__mlx5_qp_lookup(struct mlx5_core_dev *dev, u32 qpn)
-{
-	return radix_tree_lookup(&dev->priv.qp_table.tree, qpn);
-}
-
-int mlx5_core_create_dct(struct mlx5_core_dev *dev,
-			 struct mlx5_core_dct *qp,
-			 u32 *in, int inlen,
-			 u32 *out, int outlen);
-int mlx5_core_create_qp(struct mlx5_core_dev *dev,
-			struct mlx5_core_qp *qp,
-			u32 *in,
-			int inlen);
-int mlx5_core_qp_modify(struct mlx5_core_dev *dev, u16 opcode,
-			u32 opt_param_mask, void *qpc,
-			struct mlx5_core_qp *qp);
-int mlx5_core_destroy_qp(struct mlx5_core_dev *dev,
-			 struct mlx5_core_qp *qp);
-int mlx5_core_destroy_dct(struct mlx5_core_dev *dev,
-			  struct mlx5_core_dct *dct);
-int mlx5_core_qp_query(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp,
-		       u32 *out, int outlen);
-int mlx5_core_dct_query(struct mlx5_core_dev *dev, struct mlx5_core_dct *dct,
-			u32 *out, int outlen);
-
-int mlx5_core_set_delay_drop(struct mlx5_core_dev *dev,
-			     u32 timeout_usec);
-
-int mlx5_core_xrcd_alloc(struct mlx5_core_dev *dev, u32 *xrcdn);
-int mlx5_core_xrcd_dealloc(struct mlx5_core_dev *dev, u32 xrcdn);
-void mlx5_init_qp_table(struct mlx5_core_dev *dev);
-void mlx5_cleanup_qp_table(struct mlx5_core_dev *dev);
 int mlx5_debug_qp_add(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp);
 void mlx5_debug_qp_remove(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp);
-int mlx5_core_create_rq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
-				struct mlx5_core_qp *rq);
-void mlx5_core_destroy_rq_tracked(struct mlx5_core_dev *dev,
-				  struct mlx5_core_qp *rq);
-int mlx5_core_create_sq_tracked(struct mlx5_core_dev *dev, u32 *in, int inlen,
-				struct mlx5_core_qp *sq);
-void mlx5_core_destroy_sq_tracked(struct mlx5_core_dev *dev,
-				  struct mlx5_core_qp *sq);
-
-struct mlx5_core_rsc_common *mlx5_core_res_hold(struct mlx5_core_dev *dev,
-						int res_num,
-						enum mlx5_res_type res_type);
-void mlx5_core_res_put(struct mlx5_core_rsc_common *res);
 
 static inline const char *mlx5_qp_type_str(int type)
 {
-- 
cgit v1.2.3-59-g8ed1b


From a2a322f447b91a9b85d332b345a3b508d97506a9 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Thu, 19 Mar 2020 11:43:59 +0200
Subject: net/mlx5: Refactor HCA capability set flow

Reduce the amount of kzalloc/kfree cycles by allocating
command structure in the parent function and leverage the
knowledge that set_caps() is called for HCA capabilities
only with specific HW structure as parameter to calculate
mailbox size.

Acked-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 83 ++++++++++----------------
 1 file changed, 31 insertions(+), 52 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 6e19fa4d1310..a000cd820ace 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -407,30 +407,28 @@ int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type)
 	return mlx5_core_get_caps_mode(dev, cap_type, HCA_CAP_OPMOD_GET_MAX);
 }
 
-static int set_caps(struct mlx5_core_dev *dev, void *in, int in_sz, int opmod)
+static int set_caps(struct mlx5_core_dev *dev, void *in, int opmod)
 {
-	u32 out[MLX5_ST_SZ_DW(set_hca_cap_out)] = {0};
+	u32 out[MLX5_ST_SZ_DW(set_hca_cap_out)] = {};
 
 	MLX5_SET(set_hca_cap_in, in, opcode, MLX5_CMD_OP_SET_HCA_CAP);
 	MLX5_SET(set_hca_cap_in, in, op_mod, opmod << 1);
-	return mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
+	return mlx5_cmd_exec(dev, in, MLX5_ST_SZ_BYTES(set_hca_cap_in), out,
+			     sizeof(out));
 }
 
-static int handle_hca_cap_atomic(struct mlx5_core_dev *dev)
+static int handle_hca_cap_atomic(struct mlx5_core_dev *dev, void *set_ctx)
 {
-	void *set_ctx;
 	void *set_hca_cap;
-	int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
 	int req_endianness;
 	int err;
 
-	if (MLX5_CAP_GEN(dev, atomic)) {
-		err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC);
-		if (err)
-			return err;
-	} else {
+	if (!MLX5_CAP_GEN(dev, atomic))
 		return 0;
-	}
+
+	err = mlx5_core_get_caps(dev, MLX5_CAP_ATOMIC);
+	if (err)
+		return err;
 
 	req_endianness =
 		MLX5_CAP_ATOMIC(dev,
@@ -439,27 +437,18 @@ static int handle_hca_cap_atomic(struct mlx5_core_dev *dev)
 	if (req_endianness != MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS)
 		return 0;
 
-	set_ctx = kzalloc(set_sz, GFP_KERNEL);
-	if (!set_ctx)
-		return -ENOMEM;
-
 	set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
 
 	/* Set requestor to host endianness */
 	MLX5_SET(atomic_caps, set_hca_cap, atomic_req_8B_endianness_mode,
 		 MLX5_ATOMIC_REQ_MODE_HOST_ENDIANNESS);
 
-	err = set_caps(dev, set_ctx, set_sz, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC);
-
-	kfree(set_ctx);
-	return err;
+	return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_ATOMIC);
 }
 
-static int handle_hca_cap_odp(struct mlx5_core_dev *dev)
+static int handle_hca_cap_odp(struct mlx5_core_dev *dev, void *set_ctx)
 {
 	void *set_hca_cap;
-	void *set_ctx;
-	int set_sz;
 	bool do_set = false;
 	int err;
 
@@ -471,11 +460,6 @@ static int handle_hca_cap_odp(struct mlx5_core_dev *dev)
 	if (err)
 		return err;
 
-	set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
-	set_ctx = kzalloc(set_sz, GFP_KERNEL);
-	if (!set_ctx)
-		return -ENOMEM;
-
 	set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
 	memcpy(set_hca_cap, dev->caps.hca_cur[MLX5_CAP_ODP],
 	       MLX5_ST_SZ_BYTES(odp_cap));
@@ -504,30 +488,21 @@ static int handle_hca_cap_odp(struct mlx5_core_dev *dev)
 	ODP_CAP_SET_MAX(dev, dc_odp_caps.read);
 	ODP_CAP_SET_MAX(dev, dc_odp_caps.atomic);
 
-	if (do_set)
-		err = set_caps(dev, set_ctx, set_sz,
-			       MLX5_SET_HCA_CAP_OP_MOD_ODP);
-
-	kfree(set_ctx);
+	if (!do_set)
+		return 0;
 
-	return err;
+	return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_ODP);
 }
 
-static int handle_hca_cap(struct mlx5_core_dev *dev)
+static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
 {
-	void *set_ctx = NULL;
 	struct mlx5_profile *prof = dev->profile;
-	int err = -ENOMEM;
-	int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
 	void *set_hca_cap;
-
-	set_ctx = kzalloc(set_sz, GFP_KERNEL);
-	if (!set_ctx)
-		goto query_ex;
+	int err;
 
 	err = mlx5_core_get_caps(dev, MLX5_CAP_GENERAL);
 	if (err)
-		goto query_ex;
+		return err;
 
 	set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx,
 				   capability);
@@ -578,37 +553,41 @@ static int handle_hca_cap(struct mlx5_core_dev *dev)
 			 num_vhca_ports,
 			 MLX5_CAP_GEN_MAX(dev, num_vhca_ports));
 
-	err = set_caps(dev, set_ctx, set_sz,
-		       MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
-
-query_ex:
-	kfree(set_ctx);
-	return err;
+	return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
 }
 
 static int set_hca_cap(struct mlx5_core_dev *dev)
 {
+	int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
+	void *set_ctx;
 	int err;
 
-	err = handle_hca_cap(dev);
+	set_ctx = kzalloc(set_sz, GFP_KERNEL);
+	if (!set_ctx)
+		return -ENOMEM;
+
+	err = handle_hca_cap(dev, set_ctx);
 	if (err) {
 		mlx5_core_err(dev, "handle_hca_cap failed\n");
 		goto out;
 	}
 
-	err = handle_hca_cap_atomic(dev);
+	memset(set_ctx, 0, set_sz);
+	err = handle_hca_cap_atomic(dev, set_ctx);
 	if (err) {
 		mlx5_core_err(dev, "handle_hca_cap_atomic failed\n");
 		goto out;
 	}
 
-	err = handle_hca_cap_odp(dev);
+	memset(set_ctx, 0, set_sz);
+	err = handle_hca_cap_odp(dev, set_ctx);
 	if (err) {
 		mlx5_core_err(dev, "handle_hca_cap_odp failed\n");
 		goto out;
 	}
 
 out:
+	kfree(set_ctx);
 	return err;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 59e9e8e4fe83f68e599b87c06aaf239dcc64887b Mon Sep 17 00:00:00 2001
From: Mark Zhang <markz@mellanox.com>
Date: Tue, 14 Jan 2020 05:06:25 +0200
Subject: net/mlx5: Enable SW-defined RoCEv2 UDP source port

When this is enabled, UDP source port for RoCEv2 packets are defined
by software instead of firmware.

Signed-off-by: Mark Zhang <markz@mellanox.com>
Reviewed-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 32 ++++++++++++++++++++++++++
 include/linux/mlx5/mlx5_ifc.h                  |  5 +++-
 2 files changed, 36 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index a000cd820ace..0044aa5cc676 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -556,6 +556,31 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
 	return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
 }
 
+static int handle_hca_cap_roce(struct mlx5_core_dev *dev, void *set_ctx)
+{
+	void *set_hca_cap;
+	int err;
+
+	if (!MLX5_CAP_GEN(dev, roce))
+		return 0;
+
+	err = mlx5_core_get_caps(dev, MLX5_CAP_ROCE);
+	if (err)
+		return err;
+
+	if (MLX5_CAP_ROCE(dev, sw_r_roce_src_udp_port) ||
+	    !MLX5_CAP_ROCE_MAX(dev, sw_r_roce_src_udp_port))
+		return 0;
+
+	set_hca_cap = MLX5_ADDR_OF(set_hca_cap_in, set_ctx, capability);
+	memcpy(set_hca_cap, dev->caps.hca_cur[MLX5_CAP_ROCE],
+	       MLX5_ST_SZ_BYTES(roce_cap));
+	MLX5_SET(roce_cap, set_hca_cap, sw_r_roce_src_udp_port, 1);
+
+	err = set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_ROCE);
+	return err;
+}
+
 static int set_hca_cap(struct mlx5_core_dev *dev)
 {
 	int set_sz = MLX5_ST_SZ_BYTES(set_hca_cap_in);
@@ -586,6 +611,13 @@ static int set_hca_cap(struct mlx5_core_dev *dev)
 		goto out;
 	}
 
+	memset(set_ctx, 0, set_sz);
+	err = handle_hca_cap_roce(dev, set_ctx);
+	if (err) {
+		mlx5_core_err(dev, "handle_hca_cap_roce failed\n");
+		goto out;
+	}
+
 out:
 	kfree(set_ctx);
 	return err;
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 69b27c7dfc3e..6fa24918eade 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -74,6 +74,7 @@ enum {
 	MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE        = 0x0,
 	MLX5_SET_HCA_CAP_OP_MOD_ODP                   = 0x2,
 	MLX5_SET_HCA_CAP_OP_MOD_ATOMIC                = 0x3,
+	MLX5_SET_HCA_CAP_OP_MOD_ROCE                  = 0x4,
 };
 
 enum {
@@ -903,7 +904,9 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits {
 
 struct mlx5_ifc_roce_cap_bits {
 	u8         roce_apm[0x1];
-	u8         reserved_at_1[0x1f];
+	u8         reserved_at_1[0x3];
+	u8         sw_r_roce_src_udp_port[0x1];
+	u8         reserved_at_5[0x1b];
 
 	u8         reserved_at_20[0x60];
 
-- 
cgit v1.2.3-59-g8ed1b


From c2a3f8febc69f222d9fc3248bf774c8f0c5725f3 Mon Sep 17 00:00:00 2001
From: Sasha Neftin <sasha.neftin@intel.com>
Date: Wed, 11 Mar 2020 11:35:06 +0200
Subject: igc: Add new device IDs for i225 part

Add new device IDs for the next step of i225

Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_base.c | 3 +++
 drivers/net/ethernet/intel/igc/igc_hw.h   | 3 +++
 drivers/net/ethernet/intel/igc/igc_main.c | 3 +++
 3 files changed, 9 insertions(+)

diff --git a/drivers/net/ethernet/intel/igc/igc_base.c b/drivers/net/ethernet/intel/igc/igc_base.c
index 5a506440560a..f7fb18d8d8f5 100644
--- a/drivers/net/ethernet/intel/igc/igc_base.c
+++ b/drivers/net/ethernet/intel/igc/igc_base.c
@@ -212,6 +212,9 @@ static s32 igc_get_invariants_base(struct igc_hw *hw)
 	case IGC_DEV_ID_I225_I:
 	case IGC_DEV_ID_I220_V:
 	case IGC_DEV_ID_I225_K:
+	case IGC_DEV_ID_I225_K2:
+	case IGC_DEV_ID_I225_LMVP:
+	case IGC_DEV_ID_I225_IT:
 	case IGC_DEV_ID_I225_BLANK_NVM:
 		mac->type = igc_i225;
 		break;
diff --git a/drivers/net/ethernet/intel/igc/igc_hw.h b/drivers/net/ethernet/intel/igc/igc_hw.h
index 90ac0e0144d8..af34ae310327 100644
--- a/drivers/net/ethernet/intel/igc/igc_hw.h
+++ b/drivers/net/ethernet/intel/igc/igc_hw.h
@@ -21,6 +21,9 @@
 #define IGC_DEV_ID_I225_I			0x15F8
 #define IGC_DEV_ID_I220_V			0x15F7
 #define IGC_DEV_ID_I225_K			0x3100
+#define IGC_DEV_ID_I225_K2			0x3101
+#define IGC_DEV_ID_I225_LMVP			0x5502
+#define IGC_DEV_ID_I225_IT			0x0D9F
 #define IGC_DEV_ID_I225_BLANK_NVM		0x15FD
 
 /* Function pointers for the MAC. */
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 9d1792e80e2e..6a7c1b081792 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -47,6 +47,9 @@ static const struct pci_device_id igc_pci_tbl[] = {
 	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_I), board_base },
 	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I220_V), board_base },
 	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_K), board_base },
+	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_K2), board_base },
+	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_LMVP), board_base },
+	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_IT), board_base },
 	{ PCI_VDEVICE(INTEL, IGC_DEV_ID_I225_BLANK_NVM), board_base },
 	/* required last entry */
 	{0, }
-- 
cgit v1.2.3-59-g8ed1b


From 632fbd5eb5b0e01f03f1acb90a2b9ac1352b5dc7 Mon Sep 17 00:00:00 2001
From: Vitaly Lifshits <vitaly.lifshits@intel.com>
Date: Thu, 12 Mar 2020 13:57:07 +0200
Subject: e1000e: fix S0ix flows for cable connected case

Added a fix to S0ix entry and exit flows for TGP and above
MAC types, to the case when the Ethernet cable is connected
and the link is up. With that the system is able to reach
SLP_S0 when going to freeze power state.

Signed-off-by: Vitaly Lifshits <vitaly.lifshits@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/e1000e/netdev.c | 54 ++++++++++++++++++++++++++++++
 drivers/net/ethernet/intel/e1000e/regs.h   |  3 ++
 2 files changed, 57 insertions(+)

diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 177c6da80c57..e0b074820b47 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -6404,6 +6404,31 @@ static void e1000e_s0ix_entry_flow(struct e1000_adapter *adapter)
 	mac_data |= BIT(3);
 	ew32(CTRL_EXT, mac_data);
 
+	/* Disable disconnected cable conditioning for Power Gating */
+	mac_data = er32(DPGFR);
+	mac_data |= BIT(2);
+	ew32(DPGFR, mac_data);
+
+	/* Don't wake from dynamic Power Gating with clock request */
+	mac_data = er32(FEXTNVM12);
+	mac_data |= BIT(12);
+	ew32(FEXTNVM12, mac_data);
+
+	/* Ungate PGCB clock */
+	mac_data = er32(FEXTNVM9);
+	mac_data |= BIT(28);
+	ew32(FEXTNVM9, mac_data);
+
+	/* Enable K1 off to enable mPHY Power Gating */
+	mac_data = er32(FEXTNVM6);
+	mac_data |= BIT(31);
+	ew32(FEXTNVM12, mac_data);
+
+	/* Enable mPHY power gating for any link and speed */
+	mac_data = er32(FEXTNVM8);
+	mac_data |= BIT(9);
+	ew32(FEXTNVM8, mac_data);
+
 	/* Enable the Dynamic Clock Gating in the DMA and MAC */
 	mac_data = er32(CTRL_EXT);
 	mac_data |= E1000_CTRL_EXT_DMA_DYN_CLK_EN;
@@ -6433,6 +6458,35 @@ static void e1000e_s0ix_exit_flow(struct e1000_adapter *adapter)
 	mac_data |= BIT(0);
 	ew32(FEXTNVM7, mac_data);
 
+	/* Disable mPHY power gating for any link and speed */
+	mac_data = er32(FEXTNVM8);
+	mac_data &= ~BIT(9);
+	ew32(FEXTNVM8, mac_data);
+
+	/* Disable K1 off */
+	mac_data = er32(FEXTNVM6);
+	mac_data &= ~BIT(31);
+	ew32(FEXTNVM12, mac_data);
+
+	/* Disable Ungate PGCB clock */
+	mac_data = er32(FEXTNVM9);
+	mac_data &= ~BIT(28);
+	ew32(FEXTNVM9, mac_data);
+
+	/* Cancel not waking from dynamic
+	 * Power Gating with clock request
+	 */
+	mac_data = er32(FEXTNVM12);
+	mac_data &= ~BIT(12);
+	ew32(FEXTNVM12, mac_data);
+
+	/* Cancel disable disconnected cable conditioning
+	 * for Power Gating
+	 */
+	mac_data = er32(DPGFR);
+	mac_data &= ~BIT(2);
+	ew32(DPGFR, mac_data);
+
 	/* Disable Dynamic Power Gating */
 	mac_data = er32(CTRL_EXT);
 	mac_data &= 0xFFFFFFF7;
diff --git a/drivers/net/ethernet/intel/e1000e/regs.h b/drivers/net/ethernet/intel/e1000e/regs.h
index df59fd1d660c..8165ba2619a4 100644
--- a/drivers/net/ethernet/intel/e1000e/regs.h
+++ b/drivers/net/ethernet/intel/e1000e/regs.h
@@ -21,9 +21,12 @@
 #define E1000_FEXTNVM5	0x00014	/* Future Extended NVM 5 - RW */
 #define E1000_FEXTNVM6	0x00010	/* Future Extended NVM 6 - RW */
 #define E1000_FEXTNVM7	0x000E4	/* Future Extended NVM 7 - RW */
+#define E1000_FEXTNVM8	0x5BB0	/* Future Extended NVM 8 - RW */
 #define E1000_FEXTNVM9	0x5BB4	/* Future Extended NVM 9 - RW */
 #define E1000_FEXTNVM11	0x5BBC	/* Future Extended NVM 11 - RW */
+#define E1000_FEXTNVM12	0x5BC0	/* Future Extended NVM 12 - RW */
 #define E1000_PCIEANACFG	0x00F18	/* PCIE Analog Config */
+#define E1000_DPGFR	0x00FAC	/* Dynamic Power Gate Force Control Register */
 #define E1000_FCT	0x00030	/* Flow Control Type - RW */
 #define E1000_VET	0x00038	/* VLAN Ether Type - RW */
 #define E1000_ICR	0x000C0	/* Interrupt Cause Read - R/clr */
-- 
cgit v1.2.3-59-g8ed1b


From 0c2e060859aa1cb8b13376554d802a251f750fa9 Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Wed, 18 Mar 2020 16:00:51 -0700
Subject: igc: Remove duplicate code in MAC filtering logic

This patch does a code refactoring in the MAC address filtering logic to
get rid of some duplicate code.

IGC driver has two functions to add MAC address filters that are pretty
much the same: igc_add_mac_filter() and igc_add_mac_filter_flags(). The
only difference is that the latter allows the callee to specify the
'flags' parameter while the former has it hard coded as zero. The same
rationale applies to filter deletion counterparts.

So this patch refactors igc_add_mac_filter() and igc_del_mac_filter() so
they handle the 'flags' parameters, removes the _flags() functions, and
fixes callees accordingly.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_main.c | 112 ++++--------------------------
 1 file changed, 13 insertions(+), 99 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 6a7c1b081792..ade460f08ed1 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -2191,8 +2191,8 @@ static bool igc_mac_entry_can_be_used(const struct igc_mac_addr *entry,
  * default for the destination address, if matching by source address
  * is desired the flag IGC_MAC_STATE_SRC_ADDR can be used.
  */
-static int igc_add_mac_filter(struct igc_adapter *adapter,
-			      const u8 *addr, const u8 queue)
+static int igc_add_mac_filter(struct igc_adapter *adapter, const u8 *addr,
+			      const u8 queue, const u8 flags)
 {
 	struct igc_hw *hw = &adapter->hw;
 	int rar_entries = hw->mac.rar_entry_count;
@@ -2207,12 +2207,12 @@ static int igc_add_mac_filter(struct igc_adapter *adapter,
 	 */
 	for (i = 0; i < rar_entries; i++) {
 		if (!igc_mac_entry_can_be_used(&adapter->mac_table[i],
-					       addr, 0))
+					       addr, flags))
 			continue;
 
 		ether_addr_copy(adapter->mac_table[i].addr, addr);
 		adapter->mac_table[i].queue = queue;
-		adapter->mac_table[i].state |= IGC_MAC_STATE_IN_USE;
+		adapter->mac_table[i].state |= IGC_MAC_STATE_IN_USE | flags;
 
 		igc_rar_set_index(adapter, i);
 		return i;
@@ -2227,8 +2227,8 @@ static int igc_add_mac_filter(struct igc_adapter *adapter,
  * matching by source address is to be removed the flag
  * IGC_MAC_STATE_SRC_ADDR can be used.
  */
-static int igc_del_mac_filter(struct igc_adapter *adapter,
-			      const u8 *addr, const u8 queue)
+static int igc_del_mac_filter(struct igc_adapter *adapter, const u8 *addr,
+			      const u8 queue, const u8 flags)
 {
 	struct igc_hw *hw = &adapter->hw;
 	int rar_entries = hw->mac.rar_entry_count;
@@ -2244,7 +2244,7 @@ static int igc_del_mac_filter(struct igc_adapter *adapter,
 	for (i = 0; i < rar_entries; i++) {
 		if (!(adapter->mac_table[i].state & IGC_MAC_STATE_IN_USE))
 			continue;
-		if (adapter->mac_table[i].state != 0)
+		if (flags && (adapter->mac_table[i].state & flags) != flags)
 			continue;
 		if (adapter->mac_table[i].queue != queue)
 			continue;
@@ -2276,7 +2276,7 @@ static int igc_uc_sync(struct net_device *netdev, const unsigned char *addr)
 	struct igc_adapter *adapter = netdev_priv(netdev);
 	int ret;
 
-	ret = igc_add_mac_filter(adapter, addr, adapter->num_rx_queues);
+	ret = igc_add_mac_filter(adapter, addr, adapter->num_rx_queues, 0);
 
 	return min_t(int, ret, 0);
 }
@@ -2285,7 +2285,7 @@ static int igc_uc_unsync(struct net_device *netdev, const unsigned char *addr)
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
 
-	igc_del_mac_filter(adapter, addr, adapter->num_rx_queues);
+	igc_del_mac_filter(adapter, addr, adapter->num_rx_queues, 0);
 
 	return 0;
 }
@@ -3720,104 +3720,18 @@ igc_features_check(struct sk_buff *skb, struct net_device *dev,
 	return features;
 }
 
-/* Add a MAC filter for 'addr' directing matching traffic to 'queue',
- * 'flags' is used to indicate what kind of match is made, match is by
- * default for the destination address, if matching by source address
- * is desired the flag IGC_MAC_STATE_SRC_ADDR can be used.
- */
-static int igc_add_mac_filter_flags(struct igc_adapter *adapter,
-				    const u8 *addr, const u8 queue,
-				    const u8 flags)
-{
-	struct igc_hw *hw = &adapter->hw;
-	int rar_entries = hw->mac.rar_entry_count;
-	int i;
-
-	if (is_zero_ether_addr(addr))
-		return -EINVAL;
-
-	/* Search for the first empty entry in the MAC table.
-	 * Do not touch entries at the end of the table reserved for the VF MAC
-	 * addresses.
-	 */
-	for (i = 0; i < rar_entries; i++) {
-		if (!igc_mac_entry_can_be_used(&adapter->mac_table[i],
-					       addr, flags))
-			continue;
-
-		ether_addr_copy(adapter->mac_table[i].addr, addr);
-		adapter->mac_table[i].queue = queue;
-		adapter->mac_table[i].state |= IGC_MAC_STATE_IN_USE | flags;
-
-		igc_rar_set_index(adapter, i);
-		return i;
-	}
-
-	return -ENOSPC;
-}
-
 int igc_add_mac_steering_filter(struct igc_adapter *adapter,
 				const u8 *addr, u8 queue, u8 flags)
 {
-	return igc_add_mac_filter_flags(adapter, addr, queue,
-					IGC_MAC_STATE_QUEUE_STEERING | flags);
-}
-
-/* Remove a MAC filter for 'addr' directing matching traffic to
- * 'queue', 'flags' is used to indicate what kind of match need to be
- * removed, match is by default for the destination address, if
- * matching by source address is to be removed the flag
- * IGC_MAC_STATE_SRC_ADDR can be used.
- */
-static int igc_del_mac_filter_flags(struct igc_adapter *adapter,
-				    const u8 *addr, const u8 queue,
-				    const u8 flags)
-{
-	struct igc_hw *hw = &adapter->hw;
-	int rar_entries = hw->mac.rar_entry_count;
-	int i;
-
-	if (is_zero_ether_addr(addr))
-		return -EINVAL;
-
-	/* Search for matching entry in the MAC table based on given address
-	 * and queue. Do not touch entries at the end of the table reserved
-	 * for the VF MAC addresses.
-	 */
-	for (i = 0; i < rar_entries; i++) {
-		if (!(adapter->mac_table[i].state & IGC_MAC_STATE_IN_USE))
-			continue;
-		if ((adapter->mac_table[i].state & flags) != flags)
-			continue;
-		if (adapter->mac_table[i].queue != queue)
-			continue;
-		if (!ether_addr_equal(adapter->mac_table[i].addr, addr))
-			continue;
-
-		/* When a filter for the default address is "deleted",
-		 * we return it to its initial configuration
-		 */
-		if (adapter->mac_table[i].state & IGC_MAC_STATE_DEFAULT) {
-			adapter->mac_table[i].state =
-				IGC_MAC_STATE_DEFAULT | IGC_MAC_STATE_IN_USE;
-		} else {
-			adapter->mac_table[i].state = 0;
-			adapter->mac_table[i].queue = 0;
-			memset(adapter->mac_table[i].addr, 0, ETH_ALEN);
-		}
-
-		igc_rar_set_index(adapter, i);
-		return 0;
-	}
-
-	return -ENOENT;
+	return igc_add_mac_filter(adapter, addr, queue,
+				  IGC_MAC_STATE_QUEUE_STEERING | flags);
 }
 
 int igc_del_mac_steering_filter(struct igc_adapter *adapter,
 				const u8 *addr, u8 queue, u8 flags)
 {
-	return igc_del_mac_filter_flags(adapter, addr, queue,
-					IGC_MAC_STATE_QUEUE_STEERING | flags);
+	return igc_del_mac_filter(adapter, addr, queue,
+				  IGC_MAC_STATE_QUEUE_STEERING | flags);
 }
 
 static void igc_tsync_interrupt(struct igc_adapter *adapter)
-- 
cgit v1.2.3-59-g8ed1b


From 23b7b511675669702ad32bf7f92dcf2ae05015ba Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Wed, 18 Mar 2020 16:00:52 -0700
Subject: igc: Check unsupported flag in igc_add_mac_filter()

The IGC_MAC_STATE_SRC_ADDR flags is not supported by igc_add_mac_
filter() so this patch adds a check for it and returns -ENOTSUPP
in case it is set.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_main.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index ade460f08ed1..66b3a689bb05 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -2200,6 +2200,8 @@ static int igc_add_mac_filter(struct igc_adapter *adapter, const u8 *addr,
 
 	if (is_zero_ether_addr(addr))
 		return -EINVAL;
+	if (flags & IGC_MAC_STATE_SRC_ADDR)
+		return -ENOTSUPP;
 
 	/* Search for the first empty entry in the MAC table.
 	 * Do not touch entries at the end of the table reserved for the VF MAC
-- 
cgit v1.2.3-59-g8ed1b


From 58184b8ff0786b219772016a50ce07c6b3020846 Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Wed, 18 Mar 2020 16:00:53 -0700
Subject: igc: Change igc_add_mac_filter() returning value

In case of success, igc_add_mac_filter() returns the index in
adapter->mac_table where the requested filter was added. This
information, however, is not used by any caller of that function.
In fact, callers have extra code just to handle this returning
index as 0 (success).

So this patch changes the function to return 0 on success instead,
and cleans up the extra code.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_ethtool.c | 2 --
 drivers/net/ethernet/intel/igc/igc_main.c    | 7 ++-----
 2 files changed, 2 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index ff2a40496e4e..c9f4552c018b 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -1269,7 +1269,6 @@ int igc_add_filter(struct igc_adapter *adapter, struct igc_nfc_filter *input)
 		err = igc_add_mac_steering_filter(adapter,
 						  input->filter.dst_addr,
 						  input->action, 0);
-		err = min_t(int, err, 0);
 		if (err)
 			return err;
 	}
@@ -1279,7 +1278,6 @@ int igc_add_filter(struct igc_adapter *adapter, struct igc_nfc_filter *input)
 						  input->filter.src_addr,
 						  input->action,
 						  IGC_MAC_STATE_SRC_ADDR);
-		err = min_t(int, err, 0);
 		if (err)
 			return err;
 	}
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 66b3a689bb05..7c060c731a7e 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -2217,7 +2217,7 @@ static int igc_add_mac_filter(struct igc_adapter *adapter, const u8 *addr,
 		adapter->mac_table[i].state |= IGC_MAC_STATE_IN_USE | flags;
 
 		igc_rar_set_index(adapter, i);
-		return i;
+		return 0;
 	}
 
 	return -ENOSPC;
@@ -2276,11 +2276,8 @@ static int igc_del_mac_filter(struct igc_adapter *adapter, const u8 *addr,
 static int igc_uc_sync(struct net_device *netdev, const unsigned char *addr)
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
-	int ret;
-
-	ret = igc_add_mac_filter(adapter, addr, adapter->num_rx_queues, 0);
 
-	return min_t(int, ret, 0);
+	return igc_add_mac_filter(adapter, addr, adapter->num_rx_queues, 0);
 }
 
 static int igc_uc_unsync(struct net_device *netdev, const unsigned char *addr)
-- 
cgit v1.2.3-59-g8ed1b


From ec00f1090735259151568f6b74d19c3230d65787 Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Wed, 18 Mar 2020 16:00:54 -0700
Subject: igc: Fix igc_uc_unsync()

In case igc_del_mac_filter() returns error, that error is masked
since the functions always return 0 (success). This patch fixes
igc_uc_unsync() so it returns whatever value igc_del_mac_filter()
returns (0 on success, negative number on error).

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_main.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 7c060c731a7e..dc4632428117 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -2284,9 +2284,7 @@ static int igc_uc_unsync(struct net_device *netdev, const unsigned char *addr)
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
 
-	igc_del_mac_filter(adapter, addr, adapter->num_rx_queues, 0);
-
-	return 0;
+	return igc_del_mac_filter(adapter, addr, adapter->num_rx_queues, 0);
 }
 
 /**
-- 
cgit v1.2.3-59-g8ed1b


From 424045bec085575fe4818428e6a68dac7cca48f3 Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Wed, 1 Apr 2020 14:36:45 -0700
Subject: igc: Refactor igc_rar_set_index()

Current igc_rar_set_index() implementation is a bit convoluted so this
patch does some code refactoring to improve it.

The helper igc_rar_set_index() is about writing MAC filter settings into
hardware registers. Logic such as address validation belongs to
functions upper in the call chain such as igc_set_mac() and
igc_add_mac_filter(). So this patch moves the is_valid_ether_addr() call
to igc_add_mac_filter(). No need to touch igc_set_mac() since it already
checks it.

The variables 'rar_low' and 'rar_high' represent the value in registers
RAL and RAH so we rename them to 'ral' and 'rah', respectively, to
match the registers names.

To make it explicit, filter settings are passed as arguments to the
function instead of reading them from adapter->mac_table "under the
hood". Also, the function was renamed to igc_set_mac_filter_hw to make
it more clear what it does.

Finally, the patch removes some wrfl() calls and comments not needed.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_main.c | 73 ++++++++++++++++++-------------
 1 file changed, 42 insertions(+), 31 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index dc4632428117..2f6c8f7fa6f4 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -765,42 +765,52 @@ static void igc_setup_tctl(struct igc_adapter *adapter)
 }
 
 /**
- * igc_rar_set_index - Sync RAL[index] and RAH[index] registers with MAC table
- * @adapter: address of board private structure
- * @index: Index of the RAR entry which need to be synced with MAC table
+ * igc_set_mac_filter_hw() - Set MAC address filter in hardware
+ * @adapter: Pointer to adapter where the filter should be set
+ * @index: Filter index
+ * @addr: Destination MAC address
+ * @queue: If non-negative, queue assignment feature is enabled and frames
+ *         matching the filter are enqueued onto 'queue'. Otherwise, queue
+ *         assignment is disabled.
  */
-static void igc_rar_set_index(struct igc_adapter *adapter, u32 index)
+static void igc_set_mac_filter_hw(struct igc_adapter *adapter, int index,
+				  const u8 *addr, int queue)
 {
-	u8 *addr = adapter->mac_table[index].addr;
 	struct igc_hw *hw = &adapter->hw;
-	u32 rar_low, rar_high;
+	u32 ral, rah;
 
-	/* HW expects these to be in network order when they are plugged
-	 * into the registers which are little endian.  In order to guarantee
-	 * that ordering we need to do an leXX_to_cpup here in order to be
-	 * ready for the byteswap that occurs with writel
-	 */
-	rar_low = le32_to_cpup((__le32 *)(addr));
-	rar_high = le16_to_cpup((__le16 *)(addr + 4));
+	if (WARN_ON(index >= hw->mac.rar_entry_count))
+		return;
 
-	if (adapter->mac_table[index].state & IGC_MAC_STATE_QUEUE_STEERING) {
-		u8 queue = adapter->mac_table[index].queue;
-		u32 qsel = IGC_RAH_QSEL_MASK & (queue << IGC_RAH_QSEL_SHIFT);
+	ral = le32_to_cpup((__le32 *)(addr));
+	rah = le16_to_cpup((__le16 *)(addr + 4));
 
-		rar_high |= qsel;
-		rar_high |= IGC_RAH_QSEL_ENABLE;
+	if (queue >= 0) {
+		rah &= ~IGC_RAH_QSEL_MASK;
+		rah |= (queue << IGC_RAH_QSEL_SHIFT);
+		rah |= IGC_RAH_QSEL_ENABLE;
 	}
 
-	/* Indicate to hardware the Address is Valid. */
-	if (adapter->mac_table[index].state & IGC_MAC_STATE_IN_USE) {
-		if (is_valid_ether_addr(addr))
-			rar_high |= IGC_RAH_AV;
-	}
+	rah |= IGC_RAH_AV;
 
-	wr32(IGC_RAL(index), rar_low);
-	wrfl();
-	wr32(IGC_RAH(index), rar_high);
-	wrfl();
+	wr32(IGC_RAL(index), ral);
+	wr32(IGC_RAH(index), rah);
+}
+
+/**
+ * igc_clear_mac_filter_hw() - Clear MAC address filter in hardware
+ * @adapter: Pointer to adapter where the filter should be cleared
+ * @index: Filter index
+ */
+static void igc_clear_mac_filter_hw(struct igc_adapter *adapter, int index)
+{
+	struct igc_hw *hw = &adapter->hw;
+
+	if (WARN_ON(index >= hw->mac.rar_entry_count))
+		return;
+
+	wr32(IGC_RAL(index), 0);
+	wr32(IGC_RAH(index), 0);
 }
 
 /* Set default MAC address for the PF in the first RAR entry */
@@ -811,7 +821,7 @@ static void igc_set_default_mac_filter(struct igc_adapter *adapter)
 	ether_addr_copy(mac_table->addr, adapter->hw.mac.addr);
 	mac_table->state = IGC_MAC_STATE_DEFAULT | IGC_MAC_STATE_IN_USE;
 
-	igc_rar_set_index(adapter, 0);
+	igc_set_mac_filter_hw(adapter, 0, mac_table->addr, -1);
 }
 
 /**
@@ -2198,7 +2208,7 @@ static int igc_add_mac_filter(struct igc_adapter *adapter, const u8 *addr,
 	int rar_entries = hw->mac.rar_entry_count;
 	int i;
 
-	if (is_zero_ether_addr(addr))
+	if (!is_valid_ether_addr(addr))
 		return -EINVAL;
 	if (flags & IGC_MAC_STATE_SRC_ADDR)
 		return -ENOTSUPP;
@@ -2216,7 +2226,7 @@ static int igc_add_mac_filter(struct igc_adapter *adapter, const u8 *addr,
 		adapter->mac_table[i].queue = queue;
 		adapter->mac_table[i].state |= IGC_MAC_STATE_IN_USE | flags;
 
-		igc_rar_set_index(adapter, i);
+		igc_set_mac_filter_hw(adapter, i, addr, queue);
 		return 0;
 	}
 
@@ -2260,13 +2270,14 @@ static int igc_del_mac_filter(struct igc_adapter *adapter, const u8 *addr,
 			adapter->mac_table[i].state =
 				IGC_MAC_STATE_DEFAULT | IGC_MAC_STATE_IN_USE;
 			adapter->mac_table[i].queue = 0;
+			igc_set_mac_filter_hw(adapter, 0, addr, -1);
 		} else {
 			adapter->mac_table[i].state = 0;
 			adapter->mac_table[i].queue = 0;
 			memset(adapter->mac_table[i].addr, 0, ETH_ALEN);
+			igc_clear_mac_filter_hw(adapter, i);
 		}
 
-		igc_rar_set_index(adapter, i);
 		return 0;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From a73eb651005eef079a6bf3b448d1b6eb607bc80b Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Wed, 18 Mar 2020 16:00:56 -0700
Subject: igc: Improve address check in igc_del_mac_filter()

igc_add_mac_filter() doesn't allow filters with invalid MAC address to
be added to adapter->mac_table so, in igc_del_mac_filter(), we can early
return if MAC address is invalid. No need to traverse the table.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 2f6c8f7fa6f4..070df92bb4e9 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -2246,7 +2246,7 @@ static int igc_del_mac_filter(struct igc_adapter *adapter, const u8 *addr,
 	int rar_entries = hw->mac.rar_entry_count;
 	int i;
 
-	if (is_zero_ether_addr(addr))
+	if (!is_valid_ether_addr(addr))
 		return -EINVAL;
 
 	/* Search for matching entry in the MAC table based on given address
-- 
cgit v1.2.3-59-g8ed1b


From c6aae5917b8a244dcb4114be806ba3ac52e3480a Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Wed, 1 Apr 2020 14:41:43 -0700
Subject: igc: Remove 'queue' check in igc_del_mac_filter()

igc_add_mac_filter() doesn't allow us to have more than one entry with
the same address and address type in adapter->mac_table so checking if
'queue' matches in igc_del_mac_filter() isn't necessary. This patch
removes that check.

This patch also takes the opportunity to improve the igc_del_mac_filter
documentation and remove comment which is not applicable to this I225
controller.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_main.c | 25 +++++++++++--------------
 1 file changed, 11 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 070df92bb4e9..badb8ecf38dc 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -2233,14 +2233,17 @@ static int igc_add_mac_filter(struct igc_adapter *adapter, const u8 *addr,
 	return -ENOSPC;
 }
 
-/* Remove a MAC filter for 'addr' directing matching traffic to
- * 'queue', 'flags' is used to indicate what kind of match need to be
- * removed, match is by default for the destination address, if
- * matching by source address is to be removed the flag
- * IGC_MAC_STATE_SRC_ADDR can be used.
+/**
+ * igc_del_mac_filter() - Delete MAC address filter
+ * @adapter: Pointer to adapter where the filter should be deleted from
+ * @addr: MAC address
+ * @flags: Set IGC_MAC_STATE_SRC_ADDR bit to indicate @address is a source
+ *         address
+ *
+ * Return: 0 in case of success, negative errno code otherwise.
  */
 static int igc_del_mac_filter(struct igc_adapter *adapter, const u8 *addr,
-			      const u8 queue, const u8 flags)
+			      const u8 flags)
 {
 	struct igc_hw *hw = &adapter->hw;
 	int rar_entries = hw->mac.rar_entry_count;
@@ -2249,17 +2252,11 @@ static int igc_del_mac_filter(struct igc_adapter *adapter, const u8 *addr,
 	if (!is_valid_ether_addr(addr))
 		return -EINVAL;
 
-	/* Search for matching entry in the MAC table based on given address
-	 * and queue. Do not touch entries at the end of the table reserved
-	 * for the VF MAC addresses.
-	 */
 	for (i = 0; i < rar_entries; i++) {
 		if (!(adapter->mac_table[i].state & IGC_MAC_STATE_IN_USE))
 			continue;
 		if (flags && (adapter->mac_table[i].state & flags) != flags)
 			continue;
-		if (adapter->mac_table[i].queue != queue)
-			continue;
 		if (!ether_addr_equal(adapter->mac_table[i].addr, addr))
 			continue;
 
@@ -2295,7 +2292,7 @@ static int igc_uc_unsync(struct net_device *netdev, const unsigned char *addr)
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
 
-	return igc_del_mac_filter(adapter, addr, adapter->num_rx_queues, 0);
+	return igc_del_mac_filter(adapter, addr, 0);
 }
 
 /**
@@ -3738,7 +3735,7 @@ int igc_add_mac_steering_filter(struct igc_adapter *adapter,
 int igc_del_mac_steering_filter(struct igc_adapter *adapter,
 				const u8 *addr, u8 queue, u8 flags)
 {
-	return igc_del_mac_filter(adapter, addr, queue,
+	return igc_del_mac_filter(adapter, addr,
 				  IGC_MAC_STATE_QUEUE_STEERING | flags);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From e9736fa407e53224e9f23a092b17d8f2d1eb705d Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Wed, 1 Apr 2020 14:43:58 -0700
Subject: igc: Remove IGC_MAC_STATE_QUEUE_STEERING

The IGC_MAC_STATE_QUEUE_STEERING bit in mac_table[i].state is
utilized to indicate that frames matching the filter are assigned to
mac_table[i].queue. This bit is not strictly necessary since we can
convey the same information as follows: queue == -1 means queue
assignment is disabled, otherwise it is enabled.

In addition to make the code simpler, this change fixes some awkward
situations where we pass a complete misleading 'queue' value such as in
igc_uc_sync().

So this patch removes IGC_MAC_STATE_QUEUE_STEERING and also takes the
opportunity to improve the igc_add_mac_filter documentation.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc.h      |  3 +--
 drivers/net/ethernet/intel/igc/igc_main.c | 32 ++++++++++++++++++-------------
 2 files changed, 20 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index 5f21dcfe99ce..8d5ebe2103ee 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -466,14 +466,13 @@ struct igc_nfc_filter {
 
 struct igc_mac_addr {
 	u8 addr[ETH_ALEN];
-	u8 queue;
+	s8 queue;
 	u8 state; /* bitmask */
 };
 
 #define IGC_MAC_STATE_DEFAULT		0x1
 #define IGC_MAC_STATE_IN_USE		0x2
 #define IGC_MAC_STATE_SRC_ADDR		0x4
-#define IGC_MAC_STATE_QUEUE_STEERING	0x8
 
 #define IGC_MAX_RXNFC_FILTERS		16
 
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index badb8ecf38dc..e195400cd490 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -820,8 +820,9 @@ static void igc_set_default_mac_filter(struct igc_adapter *adapter)
 
 	ether_addr_copy(mac_table->addr, adapter->hw.mac.addr);
 	mac_table->state = IGC_MAC_STATE_DEFAULT | IGC_MAC_STATE_IN_USE;
+	mac_table->queue = -1;
 
-	igc_set_mac_filter_hw(adapter, 0, mac_table->addr, -1);
+	igc_set_mac_filter_hw(adapter, 0, mac_table->addr, mac_table->queue);
 }
 
 /**
@@ -2196,13 +2197,20 @@ static bool igc_mac_entry_can_be_used(const struct igc_mac_addr *entry,
 	return true;
 }
 
-/* Add a MAC filter for 'addr' directing matching traffic to 'queue',
- * 'flags' is used to indicate what kind of match is made, match is by
- * default for the destination address, if matching by source address
- * is desired the flag IGC_MAC_STATE_SRC_ADDR can be used.
+/**
+ * igc_add_mac_filter() - Add MAC address filter
+ * @adapter: Pointer to adapter where the filter should be added
+ * @addr: MAC address
+ * @queue: If non-negative, queue assignment feature is enabled and frames
+ *         matching the filter are enqueued onto 'queue'. Otherwise, queue
+ *         assignment is disabled.
+ * @flags: Set IGC_MAC_STATE_SRC_ADDR bit to indicate @address is a source
+ *         address
+ *
+ * Return: 0 in case of success, negative errno code otherwise.
  */
 static int igc_add_mac_filter(struct igc_adapter *adapter, const u8 *addr,
-			      const u8 queue, const u8 flags)
+			      const s8 queue, const u8 flags)
 {
 	struct igc_hw *hw = &adapter->hw;
 	int rar_entries = hw->mac.rar_entry_count;
@@ -2266,11 +2274,11 @@ static int igc_del_mac_filter(struct igc_adapter *adapter, const u8 *addr,
 		if (adapter->mac_table[i].state & IGC_MAC_STATE_DEFAULT) {
 			adapter->mac_table[i].state =
 				IGC_MAC_STATE_DEFAULT | IGC_MAC_STATE_IN_USE;
-			adapter->mac_table[i].queue = 0;
+			adapter->mac_table[i].queue = -1;
 			igc_set_mac_filter_hw(adapter, 0, addr, -1);
 		} else {
 			adapter->mac_table[i].state = 0;
-			adapter->mac_table[i].queue = 0;
+			adapter->mac_table[i].queue = -1;
 			memset(adapter->mac_table[i].addr, 0, ETH_ALEN);
 			igc_clear_mac_filter_hw(adapter, i);
 		}
@@ -2285,7 +2293,7 @@ static int igc_uc_sync(struct net_device *netdev, const unsigned char *addr)
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
 
-	return igc_add_mac_filter(adapter, addr, adapter->num_rx_queues, 0);
+	return igc_add_mac_filter(adapter, addr, -1, 0);
 }
 
 static int igc_uc_unsync(struct net_device *netdev, const unsigned char *addr)
@@ -3728,15 +3736,13 @@ igc_features_check(struct sk_buff *skb, struct net_device *dev,
 int igc_add_mac_steering_filter(struct igc_adapter *adapter,
 				const u8 *addr, u8 queue, u8 flags)
 {
-	return igc_add_mac_filter(adapter, addr, queue,
-				  IGC_MAC_STATE_QUEUE_STEERING | flags);
+	return igc_add_mac_filter(adapter, addr, queue, flags);
 }
 
 int igc_del_mac_steering_filter(struct igc_adapter *adapter,
 				const u8 *addr, u8 queue, u8 flags)
 {
-	return igc_del_mac_filter(adapter, addr,
-				  IGC_MAC_STATE_QUEUE_STEERING | flags);
+	return igc_del_mac_filter(adapter, addr, flags);
 }
 
 static void igc_tsync_interrupt(struct igc_adapter *adapter)
-- 
cgit v1.2.3-59-g8ed1b


From 83ba21b9ef7706413bdaf9fa8357b93c4986d8a0 Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Wed, 18 Mar 2020 16:00:59 -0700
Subject: igc: Remove igc_*_mac_steering_filter() wrappers

With the previous two patches, igc_add_mac_steering_filter() and
igc_del_mac_steering_filter() became a pointless wrapper of
igc_add_mac_filter() and igc_del_mac_filter().

This patch removes these wrappers and update callers to call
igc_add_mac_filter() and igc_del_mac_filter() directly.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc.h         |  8 ++++----
 drivers/net/ethernet/intel/igc/igc_ethtool.c | 20 ++++++++------------
 drivers/net/ethernet/intel/igc/igc_main.c    | 20 ++++----------------
 3 files changed, 16 insertions(+), 32 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index 8d5ebe2103ee..8ddc39482a8e 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -227,10 +227,10 @@ void igc_write_rss_indir_tbl(struct igc_adapter *adapter);
 bool igc_has_link(struct igc_adapter *adapter);
 void igc_reset(struct igc_adapter *adapter);
 int igc_set_spd_dplx(struct igc_adapter *adapter, u32 spd, u8 dplx);
-int igc_add_mac_steering_filter(struct igc_adapter *adapter,
-				const u8 *addr, u8 queue, u8 flags);
-int igc_del_mac_steering_filter(struct igc_adapter *adapter,
-				const u8 *addr, u8 queue, u8 flags);
+int igc_add_mac_filter(struct igc_adapter *adapter, const u8 *addr,
+		       const s8 queue, const u8 flags);
+int igc_del_mac_filter(struct igc_adapter *adapter, const u8 *addr,
+		       const u8 flags);
 void igc_update_stats(struct igc_adapter *adapter);
 
 /* igc_dump declarations */
diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index c9f4552c018b..0a8c4a7412a4 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -1266,18 +1266,16 @@ int igc_add_filter(struct igc_adapter *adapter, struct igc_nfc_filter *input)
 	}
 
 	if (input->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) {
-		err = igc_add_mac_steering_filter(adapter,
-						  input->filter.dst_addr,
-						  input->action, 0);
+		err = igc_add_mac_filter(adapter, input->filter.dst_addr,
+					 input->action, 0);
 		if (err)
 			return err;
 	}
 
 	if (input->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) {
-		err = igc_add_mac_steering_filter(adapter,
-						  input->filter.src_addr,
-						  input->action,
-						  IGC_MAC_STATE_SRC_ADDR);
+		err = igc_add_mac_filter(adapter, input->filter.src_addr,
+					 input->action,
+					 IGC_MAC_STATE_SRC_ADDR);
 		if (err)
 			return err;
 	}
@@ -1331,13 +1329,11 @@ int igc_erase_filter(struct igc_adapter *adapter, struct igc_nfc_filter *input)
 					   ntohs(input->filter.vlan_tci));
 
 	if (input->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR)
-		igc_del_mac_steering_filter(adapter, input->filter.src_addr,
-					    input->action,
-					    IGC_MAC_STATE_SRC_ADDR);
+		igc_del_mac_filter(adapter, input->filter.src_addr,
+				   IGC_MAC_STATE_SRC_ADDR);
 
 	if (input->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR)
-		igc_del_mac_steering_filter(adapter, input->filter.dst_addr,
-					    input->action, 0);
+		igc_del_mac_filter(adapter, input->filter.dst_addr, 0);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index e195400cd490..3af6ce1712d5 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -2209,8 +2209,8 @@ static bool igc_mac_entry_can_be_used(const struct igc_mac_addr *entry,
  *
  * Return: 0 in case of success, negative errno code otherwise.
  */
-static int igc_add_mac_filter(struct igc_adapter *adapter, const u8 *addr,
-			      const s8 queue, const u8 flags)
+int igc_add_mac_filter(struct igc_adapter *adapter, const u8 *addr,
+		       const s8 queue, const u8 flags)
 {
 	struct igc_hw *hw = &adapter->hw;
 	int rar_entries = hw->mac.rar_entry_count;
@@ -2250,8 +2250,8 @@ static int igc_add_mac_filter(struct igc_adapter *adapter, const u8 *addr,
  *
  * Return: 0 in case of success, negative errno code otherwise.
  */
-static int igc_del_mac_filter(struct igc_adapter *adapter, const u8 *addr,
-			      const u8 flags)
+int igc_del_mac_filter(struct igc_adapter *adapter, const u8 *addr,
+		       const u8 flags)
 {
 	struct igc_hw *hw = &adapter->hw;
 	int rar_entries = hw->mac.rar_entry_count;
@@ -3733,18 +3733,6 @@ igc_features_check(struct sk_buff *skb, struct net_device *dev,
 	return features;
 }
 
-int igc_add_mac_steering_filter(struct igc_adapter *adapter,
-				const u8 *addr, u8 queue, u8 flags)
-{
-	return igc_add_mac_filter(adapter, addr, queue, flags);
-}
-
-int igc_del_mac_steering_filter(struct igc_adapter *adapter,
-				const u8 *addr, u8 queue, u8 flags)
-{
-	return igc_del_mac_filter(adapter, addr, flags);
-}
-
 static void igc_tsync_interrupt(struct igc_adapter *adapter)
 {
 	struct igc_hw *hw = &adapter->hw;
-- 
cgit v1.2.3-59-g8ed1b


From 794e5bc817bcc16ca955691e957e23908edbef9c Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Wed, 18 Mar 2020 16:01:00 -0700
Subject: igc: Refactor igc_mac_entry_can_be_used()

The helper igc_mac_entry_can_be_used() implementation is a bit
convoluted since it does two different things: find a not-in-use slot
in mac_table or find an in-use slot where the address and address type
match. This patch does a code refactoring and break it up into two
helper functions.

With this patch we might traverse mac_table twice in some situations,
but this is not harmful performance-wise (mac_table has only 16 entries
and adding mac filters is not hot-path), and it improves igc_add_mac_
filter() readability considerably.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_main.c | 80 ++++++++++++++++++-------------
 1 file changed, 47 insertions(+), 33 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 3af6ce1712d5..79a9875e0767 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -2176,25 +2176,44 @@ static void igc_nfc_filter_restore(struct igc_adapter *adapter)
 	spin_unlock(&adapter->nfc_lock);
 }
 
-/* If the filter to be added and an already existing filter express
- * the same address and address type, it should be possible to only
- * override the other configurations, for example the queue to steer
- * traffic.
- */
-static bool igc_mac_entry_can_be_used(const struct igc_mac_addr *entry,
-				      const u8 *addr, const u8 flags)
+static int igc_find_mac_filter(struct igc_adapter *adapter, const u8 *addr,
+			       u8 flags)
 {
-	if (!(entry->state & IGC_MAC_STATE_IN_USE))
-		return true;
+	int max_entries = adapter->hw.mac.rar_entry_count;
+	struct igc_mac_addr *entry;
+	int i;
 
-	if ((entry->state & IGC_MAC_STATE_SRC_ADDR) !=
-	    (flags & IGC_MAC_STATE_SRC_ADDR))
-		return false;
+	for (i = 0; i < max_entries; i++) {
+		entry = &adapter->mac_table[i];
 
-	if (!ether_addr_equal(addr, entry->addr))
-		return false;
+		if (!(entry->state & IGC_MAC_STATE_IN_USE))
+			continue;
+		if (!ether_addr_equal(addr, entry->addr))
+			continue;
+		if ((entry->state & IGC_MAC_STATE_SRC_ADDR) !=
+		    (flags & IGC_MAC_STATE_SRC_ADDR))
+			continue;
 
-	return true;
+		return i;
+	}
+
+	return -1;
+}
+
+static int igc_get_avail_mac_filter_slot(struct igc_adapter *adapter)
+{
+	int max_entries = adapter->hw.mac.rar_entry_count;
+	struct igc_mac_addr *entry;
+	int i;
+
+	for (i = 0; i < max_entries; i++) {
+		entry = &adapter->mac_table[i];
+
+		if (!(entry->state & IGC_MAC_STATE_IN_USE))
+			return i;
+	}
+
+	return -1;
 }
 
 /**
@@ -2212,33 +2231,28 @@ static bool igc_mac_entry_can_be_used(const struct igc_mac_addr *entry,
 int igc_add_mac_filter(struct igc_adapter *adapter, const u8 *addr,
 		       const s8 queue, const u8 flags)
 {
-	struct igc_hw *hw = &adapter->hw;
-	int rar_entries = hw->mac.rar_entry_count;
-	int i;
+	int index;
 
 	if (!is_valid_ether_addr(addr))
 		return -EINVAL;
 	if (flags & IGC_MAC_STATE_SRC_ADDR)
 		return -ENOTSUPP;
 
-	/* Search for the first empty entry in the MAC table.
-	 * Do not touch entries at the end of the table reserved for the VF MAC
-	 * addresses.
-	 */
-	for (i = 0; i < rar_entries; i++) {
-		if (!igc_mac_entry_can_be_used(&adapter->mac_table[i],
-					       addr, flags))
-			continue;
+	index = igc_find_mac_filter(adapter, addr, flags);
+	if (index >= 0)
+		goto update_queue_assignment;
 
-		ether_addr_copy(adapter->mac_table[i].addr, addr);
-		adapter->mac_table[i].queue = queue;
-		adapter->mac_table[i].state |= IGC_MAC_STATE_IN_USE | flags;
+	index = igc_get_avail_mac_filter_slot(adapter);
+	if (index < 0)
+		return -ENOSPC;
 
-		igc_set_mac_filter_hw(adapter, i, addr, queue);
-		return 0;
-	}
+	ether_addr_copy(adapter->mac_table[index].addr, addr);
+	adapter->mac_table[index].state |= IGC_MAC_STATE_IN_USE | flags;
+update_queue_assignment:
+	adapter->mac_table[index].queue = queue;
 
-	return -ENOSPC;
+	igc_set_mac_filter_hw(adapter, index, addr, queue);
+	return 0;
 }
 
 /**
-- 
cgit v1.2.3-59-g8ed1b


From 5f930713728b5551f5c07b918cc8030dacb75fe1 Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Wed, 18 Mar 2020 16:01:01 -0700
Subject: igc: Refactor igc_del_mac_filter()

This patch does a code refactoring in igc_del_mac_filter() so it uses
the new helper igc_find_mac_filter() and improves the comment about the
special handling when deleting the default filter.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_main.c | 45 +++++++++++++------------------
 1 file changed, 19 insertions(+), 26 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 79a9875e0767..78753f12b8a0 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -2267,40 +2267,33 @@ update_queue_assignment:
 int igc_del_mac_filter(struct igc_adapter *adapter, const u8 *addr,
 		       const u8 flags)
 {
-	struct igc_hw *hw = &adapter->hw;
-	int rar_entries = hw->mac.rar_entry_count;
-	int i;
+	struct igc_mac_addr *entry;
+	int index;
 
 	if (!is_valid_ether_addr(addr))
 		return -EINVAL;
 
-	for (i = 0; i < rar_entries; i++) {
-		if (!(adapter->mac_table[i].state & IGC_MAC_STATE_IN_USE))
-			continue;
-		if (flags && (adapter->mac_table[i].state & flags) != flags)
-			continue;
-		if (!ether_addr_equal(adapter->mac_table[i].addr, addr))
-			continue;
+	index = igc_find_mac_filter(adapter, addr, flags);
+	if (index < 0)
+		return -ENOENT;
 
-		/* When a filter for the default address is "deleted",
-		 * we return it to its initial configuration
-		 */
-		if (adapter->mac_table[i].state & IGC_MAC_STATE_DEFAULT) {
-			adapter->mac_table[i].state =
-				IGC_MAC_STATE_DEFAULT | IGC_MAC_STATE_IN_USE;
-			adapter->mac_table[i].queue = -1;
-			igc_set_mac_filter_hw(adapter, 0, addr, -1);
-		} else {
-			adapter->mac_table[i].state = 0;
-			adapter->mac_table[i].queue = -1;
-			memset(adapter->mac_table[i].addr, 0, ETH_ALEN);
-			igc_clear_mac_filter_hw(adapter, i);
-		}
+	entry = &adapter->mac_table[index];
 
-		return 0;
+	if (entry->state & IGC_MAC_STATE_DEFAULT) {
+		/* If this is the default filter, we don't actually delete it.
+		 * We just reset to its default value i.e. disable queue
+		 * assignment.
+		 */
+		entry->queue = -1;
+		igc_set_mac_filter_hw(adapter, 0, addr, entry->queue);
+	} else {
+		entry->state = 0;
+		entry->queue = -1;
+		memset(entry->addr, 0, ETH_ALEN);
+		igc_clear_mac_filter_hw(adapter, index);
 	}
 
-	return -ENOENT;
+	return 0;
 }
 
 static int igc_uc_sync(struct net_device *netdev, const unsigned char *addr)
-- 
cgit v1.2.3-59-g8ed1b


From 949b922e8b1b2bebfec90c68edd888723ab8bc23 Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Wed, 18 Mar 2020 16:01:02 -0700
Subject: igc: Add debug messages to MAC filter code

This patch adds log messages to functions related to the MAC address
filtering code to ease debugging.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_main.c | 24 ++++++++++++++++++++++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 78753f12b8a0..9d5f8287c704 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -776,6 +776,7 @@ static void igc_setup_tctl(struct igc_adapter *adapter)
 static void igc_set_mac_filter_hw(struct igc_adapter *adapter, int index,
 				  const u8 *addr, int queue)
 {
+	struct net_device *dev = adapter->netdev;
 	struct igc_hw *hw = &adapter->hw;
 	u32 ral, rah;
 
@@ -795,6 +796,8 @@ static void igc_set_mac_filter_hw(struct igc_adapter *adapter, int index,
 
 	wr32(IGC_RAL(index), ral);
 	wr32(IGC_RAH(index), rah);
+
+	netdev_dbg(dev, "MAC address filter set in HW: index %d", index);
 }
 
 /**
@@ -804,6 +807,7 @@ static void igc_set_mac_filter_hw(struct igc_adapter *adapter, int index,
  */
 static void igc_clear_mac_filter_hw(struct igc_adapter *adapter, int index)
 {
+	struct net_device *dev = adapter->netdev;
 	struct igc_hw *hw = &adapter->hw;
 
 	if (WARN_ON(index >= hw->mac.rar_entry_count))
@@ -811,18 +815,24 @@ static void igc_clear_mac_filter_hw(struct igc_adapter *adapter, int index)
 
 	wr32(IGC_RAL(index), 0);
 	wr32(IGC_RAH(index), 0);
+
+	netdev_dbg(dev, "MAC address filter cleared in HW: index %d", index);
 }
 
 /* Set default MAC address for the PF in the first RAR entry */
 static void igc_set_default_mac_filter(struct igc_adapter *adapter)
 {
 	struct igc_mac_addr *mac_table = &adapter->mac_table[0];
+	struct net_device *dev = adapter->netdev;
+	u8 *addr = adapter->hw.mac.addr;
+
+	netdev_dbg(dev, "Set default MAC address filter: address %pM", addr);
 
-	ether_addr_copy(mac_table->addr, adapter->hw.mac.addr);
+	ether_addr_copy(mac_table->addr, addr);
 	mac_table->state = IGC_MAC_STATE_DEFAULT | IGC_MAC_STATE_IN_USE;
 	mac_table->queue = -1;
 
-	igc_set_mac_filter_hw(adapter, 0, mac_table->addr, mac_table->queue);
+	igc_set_mac_filter_hw(adapter, 0, addr, mac_table->queue);
 }
 
 /**
@@ -2231,6 +2241,7 @@ static int igc_get_avail_mac_filter_slot(struct igc_adapter *adapter)
 int igc_add_mac_filter(struct igc_adapter *adapter, const u8 *addr,
 		       const s8 queue, const u8 flags)
 {
+	struct net_device *dev = adapter->netdev;
 	int index;
 
 	if (!is_valid_ether_addr(addr))
@@ -2246,6 +2257,9 @@ int igc_add_mac_filter(struct igc_adapter *adapter, const u8 *addr,
 	if (index < 0)
 		return -ENOSPC;
 
+	netdev_dbg(dev, "Add MAC address filter: index %d address %pM queue %d",
+		   index, addr, queue);
+
 	ether_addr_copy(adapter->mac_table[index].addr, addr);
 	adapter->mac_table[index].state |= IGC_MAC_STATE_IN_USE | flags;
 update_queue_assignment:
@@ -2267,6 +2281,7 @@ update_queue_assignment:
 int igc_del_mac_filter(struct igc_adapter *adapter, const u8 *addr,
 		       const u8 flags)
 {
+	struct net_device *dev = adapter->netdev;
 	struct igc_mac_addr *entry;
 	int index;
 
@@ -2284,9 +2299,14 @@ int igc_del_mac_filter(struct igc_adapter *adapter, const u8 *addr,
 		 * We just reset to its default value i.e. disable queue
 		 * assignment.
 		 */
+		netdev_dbg(dev, "Disable default MAC filter queue assignment");
+
 		entry->queue = -1;
 		igc_set_mac_filter_hw(adapter, 0, addr, entry->queue);
 	} else {
+		netdev_dbg(dev, "Delete MAC address filter: index %d address %pM",
+			   index, addr);
+
 		entry->state = 0;
 		entry->queue = -1;
 		memset(entry->addr, 0, ETH_ALEN);
-- 
cgit v1.2.3-59-g8ed1b


From 135e30180ff4f654794960aba04785fe7eef2e90 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Fri, 17 Apr 2020 22:50:52 +0300
Subject: net: dsa: sja1105: enable internal pull-down for RX_DV/CRS_DV/RX_CTL
 and RX_ER

Some boards do not have the RX_ER MII signal connected. Normally in such
situation, those pins would be grounded, but then again, some boards
left it electrically floating.

When sending traffic to those switch ports, one can see that the
N_SOFERR statistics counter is incrementing once per each packet. The
user manual states for this counter that it may count the number of
frames "that have the MII error input being asserted prior to or
up to the SOF delimiter byte". So the switch MAC is sampling an
electrically floating signal, and preventing proper traffic reception
because of that.

As a workaround, enable the internal weak pull-downs on the input pads
for the MII control signals. This way, a floating signal would be
internally tied to ground.

The logic levels of signals which _are_ externally driven should not be
bothered by this 40-50 KOhm internal resistor. So it is not an issue to
enable the internal pull-down unconditionally, irrespective of PHY
interface type (MII, RMII, RGMII, SGMII) and of board layout.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105.h          |  1 +
 drivers/net/dsa/sja1105/sja1105_clocking.c | 58 ++++++++++++++++++++++++++----
 drivers/net/dsa/sja1105/sja1105_spi.c      |  2 ++
 3 files changed, 54 insertions(+), 7 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
index 8b60dbd567f2..2f62942692ec 100644
--- a/drivers/net/dsa/sja1105/sja1105.h
+++ b/drivers/net/dsa/sja1105/sja1105.h
@@ -49,6 +49,7 @@ struct sja1105_regs {
 	u64 ptpschtm;
 	u64 ptpegr_ts[SJA1105_NUM_PORTS];
 	u64 pad_mii_tx[SJA1105_NUM_PORTS];
+	u64 pad_mii_rx[SJA1105_NUM_PORTS];
 	u64 pad_mii_id[SJA1105_NUM_PORTS];
 	u64 cgu_idiv[SJA1105_NUM_PORTS];
 	u64 mii_tx_clk[SJA1105_NUM_PORTS];
diff --git a/drivers/net/dsa/sja1105/sja1105_clocking.c b/drivers/net/dsa/sja1105/sja1105_clocking.c
index 0fdc2d55fff6..2a9b8a6a5306 100644
--- a/drivers/net/dsa/sja1105/sja1105_clocking.c
+++ b/drivers/net/dsa/sja1105/sja1105_clocking.c
@@ -7,12 +7,16 @@
 
 #define SJA1105_SIZE_CGU_CMD	4
 
-struct sja1105_cfg_pad_mii_tx {
+/* Common structure for CFG_PAD_MIIx_RX and CFG_PAD_MIIx_TX */
+struct sja1105_cfg_pad_mii {
 	u64 d32_os;
+	u64 d32_ih;
 	u64 d32_ipud;
+	u64 d10_ih;
 	u64 d10_os;
 	u64 d10_ipud;
 	u64 ctrl_os;
+	u64 ctrl_ih;
 	u64 ctrl_ipud;
 	u64 clk_os;
 	u64 clk_ih;
@@ -338,16 +342,19 @@ static int sja1105_cgu_rgmii_tx_clk_config(struct sja1105_private *priv,
 
 /* AGU */
 static void
-sja1105_cfg_pad_mii_tx_packing(void *buf, struct sja1105_cfg_pad_mii_tx *cmd,
-			       enum packing_op op)
+sja1105_cfg_pad_mii_packing(void *buf, struct sja1105_cfg_pad_mii *cmd,
+			    enum packing_op op)
 {
 	const int size = 4;
 
 	sja1105_packing(buf, &cmd->d32_os,   28, 27, size, op);
+	sja1105_packing(buf, &cmd->d32_ih,   26, 26, size, op);
 	sja1105_packing(buf, &cmd->d32_ipud, 25, 24, size, op);
 	sja1105_packing(buf, &cmd->d10_os,   20, 19, size, op);
+	sja1105_packing(buf, &cmd->d10_ih,   18, 18, size, op);
 	sja1105_packing(buf, &cmd->d10_ipud, 17, 16, size, op);
 	sja1105_packing(buf, &cmd->ctrl_os,  12, 11, size, op);
+	sja1105_packing(buf, &cmd->ctrl_ih,  10, 10, size, op);
 	sja1105_packing(buf, &cmd->ctrl_ipud, 9,  8, size, op);
 	sja1105_packing(buf, &cmd->clk_os,    4,  3, size, op);
 	sja1105_packing(buf, &cmd->clk_ih,    2,  2, size, op);
@@ -358,7 +365,7 @@ static int sja1105_rgmii_cfg_pad_tx_config(struct sja1105_private *priv,
 					   int port)
 {
 	const struct sja1105_regs *regs = priv->info->regs;
-	struct sja1105_cfg_pad_mii_tx pad_mii_tx;
+	struct sja1105_cfg_pad_mii pad_mii_tx = {0};
 	u8 packed_buf[SJA1105_SIZE_CGU_CMD] = {0};
 
 	/* Payload */
@@ -375,12 +382,45 @@ static int sja1105_rgmii_cfg_pad_tx_config(struct sja1105_private *priv,
 	pad_mii_tx.clk_os    = 3; /* TX_CLK output stage */
 	pad_mii_tx.clk_ih    = 0; /* TX_CLK input hysteresis (default) */
 	pad_mii_tx.clk_ipud  = 2; /* TX_CLK input stage (default) */
-	sja1105_cfg_pad_mii_tx_packing(packed_buf, &pad_mii_tx, PACK);
+	sja1105_cfg_pad_mii_packing(packed_buf, &pad_mii_tx, PACK);
 
 	return sja1105_xfer_buf(priv, SPI_WRITE, regs->pad_mii_tx[port],
 				packed_buf, SJA1105_SIZE_CGU_CMD);
 }
 
+static int sja1105_cfg_pad_rx_config(struct sja1105_private *priv, int port)
+{
+	const struct sja1105_regs *regs = priv->info->regs;
+	struct sja1105_cfg_pad_mii pad_mii_rx = {0};
+	u8 packed_buf[SJA1105_SIZE_CGU_CMD] = {0};
+
+	/* Payload */
+	pad_mii_rx.d32_ih    = 0; /* RXD[3:2] input stage hysteresis: */
+				  /*          non-Schmitt (default) */
+	pad_mii_rx.d32_ipud  = 2; /* RXD[3:2] input weak pull-up/down */
+				  /*          plain input (default) */
+	pad_mii_rx.d10_ih    = 0; /* RXD[1:0] input stage hysteresis: */
+				  /*          non-Schmitt (default) */
+	pad_mii_rx.d10_ipud  = 2; /* RXD[1:0] input weak pull-up/down */
+				  /*          plain input (default) */
+	pad_mii_rx.ctrl_ih   = 0; /* RX_DV/CRS_DV/RX_CTL and RX_ER */
+				  /* input stage hysteresis: */
+				  /* non-Schmitt (default) */
+	pad_mii_rx.ctrl_ipud = 3; /* RX_DV/CRS_DV/RX_CTL and RX_ER */
+				  /* input stage weak pull-up/down: */
+				  /* pull-down */
+	pad_mii_rx.clk_os    = 2; /* RX_CLK/RXC output stage: */
+				  /* medium noise/fast speed (default) */
+	pad_mii_rx.clk_ih    = 0; /* RX_CLK/RXC input hysteresis: */
+				  /* non-Schmitt (default) */
+	pad_mii_rx.clk_ipud  = 2; /* RX_CLK/RXC input pull-up/down: */
+				  /* plain input (default) */
+	sja1105_cfg_pad_mii_packing(packed_buf, &pad_mii_rx, PACK);
+
+	return sja1105_xfer_buf(priv, SPI_WRITE, regs->pad_mii_rx[port],
+				packed_buf, SJA1105_SIZE_CGU_CMD);
+}
+
 static void
 sja1105_cfg_pad_mii_id_packing(void *buf, struct sja1105_cfg_pad_mii_id *cmd,
 			       enum packing_op op)
@@ -669,10 +709,14 @@ int sja1105_clocking_setup_port(struct sja1105_private *priv, int port)
 			phy_mode);
 		return -EINVAL;
 	}
-	if (rc)
+	if (rc) {
 		dev_err(dev, "Clocking setup for port %d failed: %d\n",
 			port, rc);
-	return rc;
+		return rc;
+	}
+
+	/* Internally pull down the RX_DV/CRS_DV/RX_CTL and RX_ER inputs */
+	return sja1105_cfg_pad_rx_config(priv, port);
 }
 
 int sja1105_clocking_setup(struct sja1105_private *priv)
diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c
index 04bdb72ae6b6..43f14a5c2718 100644
--- a/drivers/net/dsa/sja1105/sja1105_spi.c
+++ b/drivers/net/dsa/sja1105/sja1105_spi.c
@@ -443,6 +443,7 @@ static struct sja1105_regs sja1105et_regs = {
 	.rgu = 0x100440,
 	/* UM10944.pdf, Table 86, ACU Register overview */
 	.pad_mii_tx = {0x100800, 0x100802, 0x100804, 0x100806, 0x100808},
+	.pad_mii_rx = {0x100801, 0x100803, 0x100805, 0x100807, 0x100809},
 	.rmii_pll1 = 0x10000A,
 	.cgu_idiv = {0x10000B, 0x10000C, 0x10000D, 0x10000E, 0x10000F},
 	.mac = {0x200, 0x202, 0x204, 0x206, 0x208},
@@ -475,6 +476,7 @@ static struct sja1105_regs sja1105pqrs_regs = {
 	.rgu = 0x100440,
 	/* UM10944.pdf, Table 86, ACU Register overview */
 	.pad_mii_tx = {0x100800, 0x100802, 0x100804, 0x100806, 0x100808},
+	.pad_mii_rx = {0x100801, 0x100803, 0x100805, 0x100807, 0x100809},
 	.pad_mii_id = {0x100810, 0x100811, 0x100812, 0x100813, 0x100814},
 	.sgmii = 0x1F0000,
 	.rmii_pll1 = 0x10000A,
-- 
cgit v1.2.3-59-g8ed1b


From 0673f976285e0570437612329962c339300d013e Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Sat, 18 Apr 2020 09:51:54 +0800
Subject: ptp_kvm: Make kvm_ptp_lock static

Fix sparse warning:

drivers/ptp/ptp_kvm.c:25:1: warning:
 symbol 'kvm_ptp_lock' was not declared. Should it be static?

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ptp/ptp_kvm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/ptp/ptp_kvm.c b/drivers/ptp/ptp_kvm.c
index fc7d0b77e118..658d33fc3195 100644
--- a/drivers/ptp/ptp_kvm.c
+++ b/drivers/ptp/ptp_kvm.c
@@ -22,7 +22,7 @@ struct kvm_ptp_clock {
 	struct ptp_clock_info caps;
 };
 
-DEFINE_SPINLOCK(kvm_ptp_lock);
+static DEFINE_SPINLOCK(kvm_ptp_lock);
 
 static struct pvclock_vsyscall_time_info *hv_clock;
 
-- 
cgit v1.2.3-59-g8ed1b


From d30e1c3db96467f1f444d84023c2c37821202aeb Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Sat, 18 Apr 2020 10:01:49 +0800
Subject: ptp: idt82p33: Make two variables static

Fix sparse warnings:

drivers/ptp/ptp_idt82p33.c:26:5: warning: symbol 'sync_tod_timeout' was not declared. Should it be static?
drivers/ptp/ptp_idt82p33.c:31:5: warning: symbol 'phase_snap_threshold' was not declared. Should it be static?

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ptp/ptp_idt82p33.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/ptp/ptp_idt82p33.c b/drivers/ptp/ptp_idt82p33.c
index b63ac240308b..31ea811b6d5f 100644
--- a/drivers/ptp/ptp_idt82p33.c
+++ b/drivers/ptp/ptp_idt82p33.c
@@ -23,12 +23,12 @@ MODULE_VERSION("1.0");
 MODULE_LICENSE("GPL");
 
 /* Module Parameters */
-u32 sync_tod_timeout = SYNC_TOD_TIMEOUT_SEC;
+static u32 sync_tod_timeout = SYNC_TOD_TIMEOUT_SEC;
 module_param(sync_tod_timeout, uint, 0);
 MODULE_PARM_DESC(sync_tod_timeout,
 "duration in second to keep SYNC_TOD on (set to 0 to keep it always on)");
 
-u32 phase_snap_threshold = SNAP_THRESHOLD_NS;
+static u32 phase_snap_threshold = SNAP_THRESHOLD_NS;
 module_param(phase_snap_threshold, uint, 0);
 MODULE_PARM_DESC(phase_snap_threshold,
 "threshold (150000ns by default) below which adjtime would ignore");
-- 
cgit v1.2.3-59-g8ed1b


From 6d92797716003a462bd75bbda3740718ce54bcdd Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Sat, 18 Apr 2020 16:42:12 +0800
Subject: net: hns: use true,false for bool variables

Fix the following coccicheck warning:

drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c:700:2-8: WARNING:
Assignment of 0/1 to bool variable
drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c:702:2-8: WARNING:
Assignment of 0/1 to bool variable

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
index 8aace2de0cc9..9a907947ba19 100644
--- a/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
+++ b/drivers/net/ethernet/hisilicon/hns/hns_dsaf_mac.c
@@ -697,9 +697,9 @@ hns_mac_register_phydev(struct mii_bus *mdio, struct hns_mac_cb *mac_cb,
 		return rc;
 
 	if (!strcmp(phy_type, phy_modes(PHY_INTERFACE_MODE_XGMII)))
-		is_c45 = 1;
+		is_c45 = true;
 	else if (!strcmp(phy_type, phy_modes(PHY_INTERFACE_MODE_SGMII)))
-		is_c45 = 0;
+		is_c45 = false;
 	else
 		return -ENODATA;
 
-- 
cgit v1.2.3-59-g8ed1b


From fee698d62b3b0621dba8b33454042a4fe82521f3 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sat, 18 Apr 2020 22:08:51 +0200
Subject: net: phy: realtek: add delay to resume path of certain internal PHY's

Internal PHY's from RTL8168h up may not be instantly ready after calling
genphy_resume(). So far r8169 network driver adds the needed delay, but
better handle this in the PHY driver. The network driver may miss other
places where the PHY is resumed.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/realtek.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/net/phy/realtek.c b/drivers/net/phy/realtek.c
index 2d99e9de6ee1..c7229d022a27 100644
--- a/drivers/net/phy/realtek.c
+++ b/drivers/net/phy/realtek.c
@@ -11,6 +11,7 @@
 #include <linux/bitops.h>
 #include <linux/phy.h>
 #include <linux/module.h>
+#include <linux/delay.h>
 
 #define RTL821x_PHYSR				0x11
 #define RTL821x_PHYSR_DUPLEX			BIT(13)
@@ -526,6 +527,16 @@ static int rtl8125_match_phy_device(struct phy_device *phydev)
 	       rtlgen_supports_2_5gbps(phydev);
 }
 
+static int rtlgen_resume(struct phy_device *phydev)
+{
+	int ret = genphy_resume(phydev);
+
+	/* Internal PHY's from RTL8168h up may not be instantly ready */
+	msleep(20);
+
+	return ret;
+}
+
 static struct phy_driver realtek_drvs[] = {
 	{
 		PHY_ID_MATCH_EXACT(0x00008201),
@@ -609,7 +620,7 @@ static struct phy_driver realtek_drvs[] = {
 		.match_phy_device = rtlgen_match_phy_device,
 		.read_status	= rtlgen_read_status,
 		.suspend	= genphy_suspend,
-		.resume		= genphy_resume,
+		.resume		= rtlgen_resume,
 		.read_page	= rtl821x_read_page,
 		.write_page	= rtl821x_write_page,
 		.read_mmd	= rtlgen_read_mmd,
@@ -621,7 +632,7 @@ static struct phy_driver realtek_drvs[] = {
 		.config_aneg	= rtl8125_config_aneg,
 		.read_status	= rtl8125_read_status,
 		.suspend	= genphy_suspend,
-		.resume		= genphy_resume,
+		.resume		= rtlgen_resume,
 		.read_page	= rtl821x_read_page,
 		.write_page	= rtl821x_write_page,
 		.read_mmd	= rtl8125_read_mmd,
-- 
cgit v1.2.3-59-g8ed1b


From 109f0cf23b094b0b780a22ce1c9ea267ebcd3974 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sat, 18 Apr 2020 22:09:42 +0200
Subject: r8169: remove PHY resume delay that is handled in the PHY driver now

The Realtek PHY driver takes care of adding the needed delay now,
therefore we can remove the delay here.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index a8696d958cd1..1bc415d00cb8 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -2391,8 +2391,6 @@ static void rtl_pll_power_up(struct rtl8169_private *tp)
 	}
 
 	phy_resume(tp->phydev);
-	/* give MAC/PHY some time to resume */
-	msleep(20);
 }
 
 static void rtl_init_rxcfg(struct rtl8169_private *tp)
-- 
cgit v1.2.3-59-g8ed1b


From c290d1ab12d3385da1d7be909e6b09caea3325bb Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Sat, 18 Apr 2020 20:17:13 -0700
Subject: net: phy: Propagate error from bus->reset

If a bus->reset() call for the mii_bus structure returns an error (e.g.:
-EPROE_DEFER) we should propagate it accordingly.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio_bus.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index 7a4eb3f2cb74..346e88435d29 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -627,8 +627,11 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner)
 		gpiod_set_value_cansleep(gpiod, 0);
 	}
 
-	if (bus->reset)
-		bus->reset(bus);
+	if (bus->reset) {
+		err = bus->reset(bus);
+		if (err)
+			goto error_reset_gpiod;
+	}
 
 	for (i = 0; i < PHY_MAX_ADDR; i++) {
 		if ((bus->phy_mask & (1 << i)) == 0) {
@@ -657,7 +660,7 @@ error:
 		mdiodev->device_remove(mdiodev);
 		mdiodev->device_free(mdiodev);
 	}
-
+error_reset_gpiod:
 	/* Put PHYs in RESET to save power */
 	if (bus->reset_gpiod)
 		gpiod_set_value_cansleep(bus->reset_gpiod, 1);
-- 
cgit v1.2.3-59-g8ed1b


From cec2500d44751a7782d16ce99fca104e77e324a5 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Sun, 19 Apr 2020 10:01:05 +0300
Subject: mlxsw: spectrum_router: Re-increase scale of IPv6 nexthop groups

As explained in commit fc25996e6f46 ("mlxsw: spectrum_router: Increase
scale of IPv6 nexthop groups"), each nexthop group is hashed by XOR-ing
the interface indexes of all the member nexthop devices.

To avoid many different nexthop groups ending up using the same key, the
above commit started hashing the interface indexes themselves before
they are XOR-ed.

However, in cases in which there are many nexthop groups that all use
the same nexthop device and only differ in the gateway IP, we can still
end up in a situation in which all the groups are using the same key.
This eventually leads to -EBUSY error from rhashtable during insertion.

Improve the situation by also making the gateway IP part of the key.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reported-by: Alex Veber <alexve@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Tested-by: Alex Veber <alexve@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index d5bca1be3ef5..71aee4914619 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -2999,6 +2999,7 @@ static u32 mlxsw_sp_nexthop_group_hash_obj(const void *data, u32 len, u32 seed)
 		for (i = 0; i < nh_grp->count; i++) {
 			nh = &nh_grp->nexthops[i];
 			val ^= jhash(&nh->ifindex, sizeof(nh->ifindex), seed);
+			val ^= jhash(&nh->gw_addr, sizeof(nh->gw_addr), seed);
 		}
 		return jhash(&val, sizeof(val), seed);
 	default:
@@ -3012,11 +3013,14 @@ mlxsw_sp_nexthop6_group_hash(struct mlxsw_sp_fib6_entry *fib6_entry, u32 seed)
 {
 	unsigned int val = fib6_entry->nrt6;
 	struct mlxsw_sp_rt6 *mlxsw_sp_rt6;
-	struct net_device *dev;
 
 	list_for_each_entry(mlxsw_sp_rt6, &fib6_entry->rt6_list, list) {
-		dev = mlxsw_sp_rt6->rt->fib6_nh->fib_nh_dev;
+		struct fib6_nh *fib6_nh = mlxsw_sp_rt6->rt->fib6_nh;
+		struct net_device *dev = fib6_nh->fib_nh_dev;
+		struct in6_addr *gw = &fib6_nh->fib_nh_gw6;
+
 		val ^= jhash(&dev->ifindex, sizeof(dev->ifindex), seed);
+		val ^= jhash(gw, sizeof(*gw), seed);
 	}
 
 	return jhash(&val, sizeof(val), seed);
-- 
cgit v1.2.3-59-g8ed1b


From b7f03b0b2a213474f452a802769ecd9fcee209a9 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Sun, 19 Apr 2020 10:01:06 +0300
Subject: mlxsw: reg: Increase register field length to 13 bits

The Infrastructure Entry Delete Register (IEDR) is used to delete
entries stored in the KVD linear database. Currently, it is only
possible to delete entries of size up to 2048. Future firmware versions
will support deletion of entries of size up to 4096.

Increase the size of the field so that the driver will be able to
perform such deletions in the future, when required.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 9b39b8e70519..3c3db1c874b6 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -3203,7 +3203,7 @@ MLXSW_ITEM32_INDEXED(reg, iedr, rec_type, MLXSW_REG_IEDR_BASE_LEN, 24, 8,
  * Size of entries do be deleted. The unit is 1 entry, regardless of entry type.
  * Access: OP
  */
-MLXSW_ITEM32_INDEXED(reg, iedr, rec_size, MLXSW_REG_IEDR_BASE_LEN, 0, 11,
+MLXSW_ITEM32_INDEXED(reg, iedr, rec_size, MLXSW_REG_IEDR_BASE_LEN, 0, 13,
 		     MLXSW_REG_IEDR_REC_LEN, 0x00, false);
 
 /* reg_iedr_rec_index_start
-- 
cgit v1.2.3-59-g8ed1b


From cceadc831e728fde74c21813519962c648f5ca7c Mon Sep 17 00:00:00 2001
From: Michael Walle <michael@walle.cc>
Date: Sun, 19 Apr 2020 10:27:57 +0200
Subject: net: phy: mscc: use mdiobus_get_phy()

Don't use internal knowledge of the mdio bus core, instead use
mdiobus_get_phy() which does the same thing.

Signed-off-by: Michael Walle <michael@walle.cc>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mscc/mscc_main.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/net/phy/mscc/mscc_main.c b/drivers/net/phy/mscc/mscc_main.c
index acddef79f4e8..5391acdece05 100644
--- a/drivers/net/phy/mscc/mscc_main.c
+++ b/drivers/net/phy/mscc/mscc_main.c
@@ -1292,7 +1292,7 @@ out:
  */
 static bool vsc8584_is_pkg_init(struct phy_device *phydev, bool reversed)
 {
-	struct mdio_device **map = phydev->mdio.bus->mdio_map;
+	struct mii_bus *bus = phydev->mdio.bus;
 	struct vsc8531_private *vsc8531;
 	struct phy_device *phy;
 	int i, addr;
@@ -1306,11 +1306,10 @@ static bool vsc8584_is_pkg_init(struct phy_device *phydev, bool reversed)
 		else
 			addr = vsc8531->base_addr + i;
 
-		if (!map[addr])
+		phy = mdiobus_get_phy(bus, addr);
+		if (!phy)
 			continue;
 
-		phy = container_of(map[addr], struct phy_device, mdio);
-
 		if ((phy->phy_id & phydev->drv->phy_id_mask) !=
 		    (phydev->drv->phy_id & phydev->drv->phy_id_mask))
 			continue;
-- 
cgit v1.2.3-59-g8ed1b


From b66c9b8de22b666718c2fcb0ae84ce620f9b81c0 Mon Sep 17 00:00:00 2001
From: Lourdes Pedrajas <lu@pplo.net>
Date: Sun, 19 Apr 2020 11:16:51 +0200
Subject: selftests: pmtu: implement IPIP, SIT and ip6tnl PMTU discovery tests

Add PMTU discovery tests for these encapsulations:

- IPIP
- SIT, mode ip6ip
- ip6tnl, modes ip6ip6 and ipip6

Signed-off-by: Lourdes Pedrajas <lu@pplo.net>
Reviewed-by: Stefano Brivio <sbrivio@redhat.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/pmtu.sh | 122 ++++++++++++++++++++++++++++++++++++
 1 file changed, 122 insertions(+)

diff --git a/tools/testing/selftests/net/pmtu.sh b/tools/testing/selftests/net/pmtu.sh
index 71a62e7e35b1..77c09cd339c3 100755
--- a/tools/testing/selftests/net/pmtu.sh
+++ b/tools/testing/selftests/net/pmtu.sh
@@ -67,6 +67,10 @@
 #	Same as pmtu_ipv4_vxlan4, but using a generic UDP IPv4/IPv6
 #	encapsulation (GUE) over IPv4/IPv6, instead of VXLAN
 #
+# - pmtu_ipv{4,6}_ipv{4,6}_exception
+#	Same as pmtu_ipv4_vxlan4, but using a IPv4/IPv6 tunnel over IPv4/IPv6,
+#	instead of VXLAN
+#
 # - pmtu_vti4_exception
 #	Set up vti tunnel on top of veth, with xfrm states and policies, in two
 #	namespaces with matching endpoints. Check that route exception is not
@@ -151,6 +155,10 @@ tests="
 	pmtu_ipv6_gue4_exception	IPv6 over gue4: PMTU exceptions		1
 	pmtu_ipv4_gue6_exception	IPv4 over gue6: PMTU exceptions		1
 	pmtu_ipv6_gue6_exception	IPv6 over gue6: PMTU exceptions		1
+	pmtu_ipv4_ipv4_exception	IPv4 over IPv4: PMTU exceptions		1
+	pmtu_ipv6_ipv4_exception	IPv6 over IPv4: PMTU exceptions		1
+	pmtu_ipv4_ipv6_exception	IPv4 over IPv6: PMTU exceptions		1
+	pmtu_ipv6_ipv6_exception	IPv6 over IPv6: PMTU exceptions		1
 	pmtu_vti6_exception		vti6: PMTU exceptions			0
 	pmtu_vti4_exception		vti4: PMTU exceptions			0
 	pmtu_vti4_default_mtu		vti4: default MTU assignment		0
@@ -363,6 +371,62 @@ setup_gue66() {
 	setup_fou_or_gue 6 6 gue
 }
 
+setup_ipvX_over_ipvY() {
+	inner=${1}
+	outer=${2}
+
+	if [ "${outer}" -eq 4 ]; then
+		a_addr="${prefix4}.${a_r1}.1"
+		b_addr="${prefix4}.${b_r1}.1"
+		if [ "${inner}" -eq 4 ]; then
+			type="ipip"
+			mode="ipip"
+		else
+			type="sit"
+			mode="ip6ip"
+		fi
+	else
+		a_addr="${prefix6}:${a_r1}::1"
+		b_addr="${prefix6}:${b_r1}::1"
+		type="ip6tnl"
+		if [ "${inner}" -eq 4 ]; then
+			mode="ipip6"
+		else
+			mode="ip6ip6"
+		fi
+	fi
+
+	run_cmd ${ns_a} ip link add ip_a type ${type} local ${a_addr} remote ${b_addr} mode ${mode} || return 2
+	run_cmd ${ns_b} ip link add ip_b type ${type} local ${b_addr} remote ${a_addr} mode ${mode}
+
+	run_cmd ${ns_a} ip link set ip_a up
+	run_cmd ${ns_b} ip link set ip_b up
+
+	if [ "${inner}" = "4" ]; then
+		run_cmd ${ns_a} ip addr add ${tunnel4_a_addr}/${tunnel4_mask} dev ip_a
+		run_cmd ${ns_b} ip addr add ${tunnel4_b_addr}/${tunnel4_mask} dev ip_b
+	else
+		run_cmd ${ns_a} ip addr add ${tunnel6_a_addr}/${tunnel6_mask} dev ip_a
+		run_cmd ${ns_b} ip addr add ${tunnel6_b_addr}/${tunnel6_mask} dev ip_b
+	fi
+}
+
+setup_ip4ip4() {
+	setup_ipvX_over_ipvY 4 4
+}
+
+setup_ip6ip4() {
+	setup_ipvX_over_ipvY 6 4
+}
+
+setup_ip4ip6() {
+	setup_ipvX_over_ipvY 4 6
+}
+
+setup_ip6ip6() {
+	setup_ipvX_over_ipvY 6 6
+}
+
 setup_namespaces() {
 	for n in ${NS_A} ${NS_B} ${NS_R1} ${NS_R2}; do
 		ip netns add ${n} || return 1
@@ -908,6 +972,64 @@ test_pmtu_ipv6_gue6_exception() {
 	test_pmtu_ipvX_over_fouY_or_gueY 6 6 gue
 }
 
+test_pmtu_ipvX_over_ipvY_exception() {
+	inner=${1}
+	outer=${2}
+	ll_mtu=4000
+
+	setup namespaces routing ip${inner}ip${outer} || return 2
+
+	trace "${ns_a}" ip_a         "${ns_b}"  ip_b  \
+	      "${ns_a}" veth_A-R1    "${ns_r1}" veth_R1-A \
+	      "${ns_b}" veth_B-R1    "${ns_r1}" veth_R1-B
+
+	if [ ${inner} -eq 4 ]; then
+		ping=ping
+		dst=${tunnel4_b_addr}
+	else
+		ping=${ping6}
+		dst=${tunnel6_b_addr}
+	fi
+
+	if [ ${outer} -eq 4 ]; then
+		#                      IPv4 header
+		exp_mtu=$((${ll_mtu} - 20))
+	else
+		#                      IPv6 header   Option 4
+		exp_mtu=$((${ll_mtu} - 40          - 8))
+	fi
+
+	# Create route exception by exceeding link layer MTU
+	mtu "${ns_a}"  veth_A-R1 $((${ll_mtu} + 1000))
+	mtu "${ns_r1}" veth_R1-A $((${ll_mtu} + 1000))
+	mtu "${ns_b}"  veth_B-R1 ${ll_mtu}
+	mtu "${ns_r1}" veth_R1-B ${ll_mtu}
+
+	mtu "${ns_a}" ip_a $((${ll_mtu} + 1000)) || return
+	mtu "${ns_b}" ip_b $((${ll_mtu} + 1000)) || return
+	run_cmd ${ns_a} ${ping} -q -M want -i 0.1 -w 1 -s $((${ll_mtu} + 500)) ${dst}
+
+	# Check that exception was created
+	pmtu="$(route_get_dst_pmtu_from_exception "${ns_a}" ${dst})"
+	check_pmtu_value ${exp_mtu} "${pmtu}" "exceeding link layer MTU on ip${inner}ip${outer} interface"
+}
+
+test_pmtu_ipv4_ipv4_exception() {
+	test_pmtu_ipvX_over_ipvY_exception 4 4
+}
+
+test_pmtu_ipv6_ipv4_exception() {
+	test_pmtu_ipvX_over_ipvY_exception 6 4
+}
+
+test_pmtu_ipv4_ipv6_exception() {
+	test_pmtu_ipvX_over_ipvY_exception 4 6
+}
+
+test_pmtu_ipv6_ipv6_exception() {
+	test_pmtu_ipvX_over_ipvY_exception 6 6
+}
+
 test_pmtu_vti4_exception() {
 	setup namespaces veth vti4 xfrm4 || return 2
 	trace "${ns_a}" veth_a    "${ns_b}" veth_b \
-- 
cgit v1.2.3-59-g8ed1b


From 92efe48e8fe2b6848ca56e14436c2b7d20c986d4 Mon Sep 17 00:00:00 2001
From: Dejin Zheng <zhengdejin5@gmail.com>
Date: Sun, 19 Apr 2020 20:02:53 +0800
Subject: net: ethernet: dnet: convert to
 devm_platform_get_and_ioremap_resource

use devm_platform_get_and_ioremap_resource() to simplify code, which
contains platform_get_resource() and devm_ioremap_resource(), it also
get the resource for use by the following code.

Signed-off-by: Dejin Zheng <zhengdejin5@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/dnet.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/dnet.c b/drivers/net/ethernet/dnet.c
index 057a508dd6e2..db98274501a0 100644
--- a/drivers/net/ethernet/dnet.c
+++ b/drivers/net/ethernet/dnet.c
@@ -776,8 +776,7 @@ static int dnet_probe(struct platform_device *pdev)
 
 	spin_lock_init(&bp->lock);
 
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	bp->regs = devm_ioremap_resource(&pdev->dev, res);
+	bp->regs = devm_platform_get_and_ioremap_resource(pdev, 0, &res);
 	if (IS_ERR(bp->regs)) {
 		err = PTR_ERR(bp->regs);
 		goto err_out_free_dev;
-- 
cgit v1.2.3-59-g8ed1b


From 5333fdbed0c860c7a17dc12f328203d29947eff7 Mon Sep 17 00:00:00 2001
From: Aishwarya Ramakrishnan <aishwaryarj100@gmail.com>
Date: Sun, 19 Apr 2020 21:14:43 +0530
Subject: net: sun: Remove unneeded cast from memory allocation

Remove casting the values returned by memory allocation function.

Coccinelle emits WARNING: casting value returned by memory allocation
function to (struct cas_init_block *) is useless.

This issue was detected by using the Coccinelle software.

Signed-off-by: Aishwarya Ramakrishnan <aishwaryarj100@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sun/cassini.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c
index e6d1aa882fa5..3ee6ab104cb9 100644
--- a/drivers/net/ethernet/sun/cassini.c
+++ b/drivers/net/ethernet/sun/cassini.c
@@ -5059,7 +5059,7 @@ static int cas_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (cp->cas_flags & CAS_FLAG_SATURN)
 		cas_saturn_firmware_init(cp);
 
-	cp->init_block = (struct cas_init_block *)
+	cp->init_block =
 		pci_alloc_consistent(pdev, sizeof(struct cas_init_block),
 				     &cp->block_dvma);
 	if (!cp->init_block) {
-- 
cgit v1.2.3-59-g8ed1b


From 745e5ad5084db32e093a143de324afa4ed95f14d Mon Sep 17 00:00:00 2001
From: Aishwarya Ramakrishnan <aishwaryarj100@gmail.com>
Date: Sun, 19 Apr 2020 21:59:17 +0530
Subject: net: qed: Remove unneeded cast from memory allocation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove casting the values returned by memory allocation function.

Coccinelle emits WARNING: casting value returned by memory allocation
function to struct pointer is useless.

This issue was detected by using the Coccinelle.

Signed-off-by: Aishwarya Ramakrishnan <aishwaryarj100@gmail.com>
Acked-by: Michal Kalderon <michal.kalderon@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_roce.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c
index 37e70562a964..475b89903f46 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_roce.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c
@@ -736,9 +736,9 @@ static int qed_roce_sp_destroy_qp_responder(struct qed_hwfn *p_hwfn,
 
 	p_ramrod = &p_ent->ramrod.roce_destroy_qp_resp;
 
-	p_ramrod_res = (struct roce_destroy_qp_resp_output_params *)
-	    dma_alloc_coherent(&p_hwfn->cdev->pdev->dev, sizeof(*p_ramrod_res),
-			       &ramrod_res_phys, GFP_KERNEL);
+	p_ramrod_res = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
+					  sizeof(*p_ramrod_res),
+					  &ramrod_res_phys, GFP_KERNEL);
 
 	if (!p_ramrod_res) {
 		rc = -ENOMEM;
@@ -872,10 +872,10 @@ int qed_roce_query_qp(struct qed_hwfn *p_hwfn,
 	}
 
 	/* Send a query responder ramrod to FW to get RQ-PSN and state */
-	p_resp_ramrod_res = (struct roce_query_qp_resp_output_params *)
-	    dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
-			       sizeof(*p_resp_ramrod_res),
-			       &resp_ramrod_res_phys, GFP_KERNEL);
+	p_resp_ramrod_res =
+		dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
+				   sizeof(*p_resp_ramrod_res),
+				   &resp_ramrod_res_phys, GFP_KERNEL);
 	if (!p_resp_ramrod_res) {
 		DP_NOTICE(p_hwfn,
 			  "qed query qp failed: cannot allocate memory (ramrod)\n");
@@ -920,8 +920,7 @@ int qed_roce_query_qp(struct qed_hwfn *p_hwfn,
 	}
 
 	/* Send a query requester ramrod to FW to get SQ-PSN and state */
-	p_req_ramrod_res = (struct roce_query_qp_req_output_params *)
-			   dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
+	p_req_ramrod_res = dma_alloc_coherent(&p_hwfn->cdev->pdev->dev,
 					      sizeof(*p_req_ramrod_res),
 					      &req_ramrod_res_phys,
 					      GFP_KERNEL);
-- 
cgit v1.2.3-59-g8ed1b


From 2ac1fa439ee97aaaa124c37340a316918bb0a8bc Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sun, 19 Apr 2020 23:07:39 +0200
Subject: r8169: inline rtl8169_mark_as_last_descriptor

rtl8169_mark_as_last_descriptor() has just one user, so inline it.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 1bc415d00cb8..3b8ae49c3ea2 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -3939,11 +3939,6 @@ static void rtl8169_rx_clear(struct rtl8169_private *tp)
 	}
 }
 
-static inline void rtl8169_mark_as_last_descriptor(struct RxDesc *desc)
-{
-	desc->opts1 |= cpu_to_le32(RingEnd);
-}
-
 static int rtl8169_rx_fill(struct rtl8169_private *tp)
 {
 	unsigned int i;
@@ -3959,7 +3954,8 @@ static int rtl8169_rx_fill(struct rtl8169_private *tp)
 		tp->Rx_databuff[i] = data;
 	}
 
-	rtl8169_mark_as_last_descriptor(tp->RxDescArray + NUM_RX_DESC - 1);
+	/* mark as last descriptor in the ring */
+	tp->RxDescArray[NUM_RX_DESC - 1].opts1 |= cpu_to_le32(RingEnd);
 
 	return 0;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 9d3679fe0f30b5a4c1ae6303611c9f2bbeb9961d Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sun, 19 Apr 2020 23:16:55 +0200
Subject: r8169: inline rtl8169_make_unusable_by_asic

Inline rtl8169_make_unusable_by_asic() and simplify it:
- Address field doesn't need to be poisoned because descriptor is
  owned by CPU now
- desc->opts1 is set by rtl8169_mark_to_asic() and rtl8169_rx_fill(),
  therefore we don't have to preserve any field parts.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 3b8ae49c3ea2..b8cc064ee9f5 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -3882,12 +3882,6 @@ static int rtl8169_change_mtu(struct net_device *dev, int new_mtu)
 	return 0;
 }
 
-static inline void rtl8169_make_unusable_by_asic(struct RxDesc *desc)
-{
-	desc->addr = cpu_to_le64(0x0badbadbadbadbadull);
-	desc->opts1 &= ~cpu_to_le32(DescOwn | RsvdMask);
-}
-
 static inline void rtl8169_mark_to_asic(struct RxDesc *desc)
 {
 	u32 eor = le32_to_cpu(desc->opts1) & RingEnd;
@@ -3935,7 +3929,8 @@ static void rtl8169_rx_clear(struct rtl8169_private *tp)
 			       R8169_RX_BUF_SIZE, DMA_FROM_DEVICE);
 		__free_pages(tp->Rx_databuff[i], get_order(R8169_RX_BUF_SIZE));
 		tp->Rx_databuff[i] = NULL;
-		rtl8169_make_unusable_by_asic(tp->RxDescArray + i);
+		tp->RxDescArray[i].addr = 0;
+		tp->RxDescArray[i].opts1 = 0;
 	}
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 29ae6bd1b0d8a57d7c00ab12cbb949fc41986eef Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Mon, 20 Apr 2020 00:04:00 +0200
Subject: net: ethernet: fec: Replace interrupt driven MDIO with polled IO

Measurements of the MDIO bus have shown that driving the MDIO bus
using interrupts is slow. Back to back MDIO transactions take about
90us, with 25us spent performing the transaction, and the remainder of
the time the bus is idle.

Replacing the completion interrupt with polled IO results in back to
back transactions of 40us. The polling loop waiting for the hardware
to complete the transaction takes around 28us. Which suggests
interrupt handling has an overhead of 50us, and polled IO nearly
halves this overhead, and doubles the MDIO performance.

Suggested-by: Chris Heally <cphealy@gmail.com>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/fec.h      |  4 +-
 drivers/net/ethernet/freescale/fec_main.c | 67 ++++++++++++++++---------------
 2 files changed, 35 insertions(+), 36 deletions(-)

diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index e74dd1f86bba..a6cdd5b61921 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -376,8 +376,7 @@ struct bufdesc_ex {
 #define FEC_ENET_TS_AVAIL       ((uint)0x00010000)
 #define FEC_ENET_TS_TIMER       ((uint)0x00008000)
 
-#define FEC_DEFAULT_IMASK (FEC_ENET_TXF | FEC_ENET_RXF | FEC_ENET_MII)
-#define FEC_NAPI_IMASK	FEC_ENET_MII
+#define FEC_DEFAULT_IMASK (FEC_ENET_TXF | FEC_ENET_RXF)
 #define FEC_RX_DISABLED_IMASK (FEC_DEFAULT_IMASK & (~FEC_ENET_RXF))
 
 /* ENET interrupt coalescing macro define */
@@ -543,7 +542,6 @@ struct fec_enet_private {
 	int	link;
 	int	full_duplex;
 	int	speed;
-	struct	completion mdio_done;
 	int	irq[FEC_IRQ_NUM];
 	bool	bufdesc_ex;
 	int	pause_flag;
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index dc6f8763a5d4..2267bf75784e 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -976,8 +976,8 @@ fec_restart(struct net_device *ndev)
 	writel((__force u32)cpu_to_be32(temp_mac[1]),
 	       fep->hwp + FEC_ADDR_HIGH);
 
-	/* Clear any outstanding interrupt. */
-	writel(0xffffffff, fep->hwp + FEC_IEVENT);
+	/* Clear any outstanding interrupt, except MDIO. */
+	writel((0xffffffff & ~FEC_ENET_MII), fep->hwp + FEC_IEVENT);
 
 	fec_enet_bd_init(ndev);
 
@@ -1123,7 +1123,7 @@ fec_restart(struct net_device *ndev)
 	if (fep->link)
 		writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK);
 	else
-		writel(FEC_ENET_MII, fep->hwp + FEC_IMASK);
+		writel(0, fep->hwp + FEC_IMASK);
 
 	/* Init the interrupt coalescing */
 	fec_enet_itr_coal_init(ndev);
@@ -1652,6 +1652,10 @@ fec_enet_interrupt(int irq, void *dev_id)
 	irqreturn_t ret = IRQ_NONE;
 
 	int_events = readl(fep->hwp + FEC_IEVENT);
+
+	/* Don't clear MDIO events, we poll for those */
+	int_events &= ~FEC_ENET_MII;
+
 	writel(int_events, fep->hwp + FEC_IEVENT);
 	fec_enet_collect_events(fep, int_events);
 
@@ -1659,16 +1663,12 @@ fec_enet_interrupt(int irq, void *dev_id)
 		ret = IRQ_HANDLED;
 
 		if (napi_schedule_prep(&fep->napi)) {
-			/* Disable the NAPI interrupts */
-			writel(FEC_NAPI_IMASK, fep->hwp + FEC_IMASK);
+			/* Disable interrupts */
+			writel(0, fep->hwp + FEC_IMASK);
 			__napi_schedule(&fep->napi);
 		}
 	}
 
-	if (int_events & FEC_ENET_MII) {
-		ret = IRQ_HANDLED;
-		complete(&fep->mdio_done);
-	}
 	return ret;
 }
 
@@ -1818,11 +1818,24 @@ static void fec_enet_adjust_link(struct net_device *ndev)
 		phy_print_status(phy_dev);
 }
 
+static int fec_enet_mdio_wait(struct fec_enet_private *fep)
+{
+	uint ievent;
+	int ret;
+
+	ret = readl_poll_timeout_atomic(fep->hwp + FEC_IEVENT, ievent,
+					ievent & FEC_ENET_MII, 2, 30000);
+
+	if (!ret)
+		writel(FEC_ENET_MII, fep->hwp + FEC_IEVENT);
+
+	return ret;
+}
+
 static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
 {
 	struct fec_enet_private *fep = bus->priv;
 	struct device *dev = &fep->pdev->dev;
-	unsigned long time_left;
 	int ret = 0, frame_start, frame_addr, frame_op;
 	bool is_c45 = !!(regnum & MII_ADDR_C45);
 
@@ -1830,8 +1843,6 @@ static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
 	if (ret < 0)
 		return ret;
 
-	reinit_completion(&fep->mdio_done);
-
 	if (is_c45) {
 		frame_start = FEC_MMFR_ST_C45;
 
@@ -1843,11 +1854,9 @@ static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
 		       fep->hwp + FEC_MII_DATA);
 
 		/* wait for end of transfer */
-		time_left = wait_for_completion_timeout(&fep->mdio_done,
-				usecs_to_jiffies(FEC_MII_TIMEOUT));
-		if (time_left == 0) {
+		ret = fec_enet_mdio_wait(fep);
+		if (ret) {
 			netdev_err(fep->netdev, "MDIO address write timeout\n");
-			ret = -ETIMEDOUT;
 			goto out;
 		}
 
@@ -1866,11 +1875,9 @@ static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
 		FEC_MMFR_TA, fep->hwp + FEC_MII_DATA);
 
 	/* wait for end of transfer */
-	time_left = wait_for_completion_timeout(&fep->mdio_done,
-			usecs_to_jiffies(FEC_MII_TIMEOUT));
-	if (time_left == 0) {
+	ret = fec_enet_mdio_wait(fep);
+	if (ret) {
 		netdev_err(fep->netdev, "MDIO read timeout\n");
-		ret = -ETIMEDOUT;
 		goto out;
 	}
 
@@ -1888,7 +1895,6 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
 {
 	struct fec_enet_private *fep = bus->priv;
 	struct device *dev = &fep->pdev->dev;
-	unsigned long time_left;
 	int ret, frame_start, frame_addr;
 	bool is_c45 = !!(regnum & MII_ADDR_C45);
 
@@ -1898,8 +1904,6 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
 	else
 		ret = 0;
 
-	reinit_completion(&fep->mdio_done);
-
 	if (is_c45) {
 		frame_start = FEC_MMFR_ST_C45;
 
@@ -1911,11 +1915,9 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
 		       fep->hwp + FEC_MII_DATA);
 
 		/* wait for end of transfer */
-		time_left = wait_for_completion_timeout(&fep->mdio_done,
-			usecs_to_jiffies(FEC_MII_TIMEOUT));
-		if (time_left == 0) {
+		ret = fec_enet_mdio_wait(fep);
+		if (ret) {
 			netdev_err(fep->netdev, "MDIO address write timeout\n");
-			ret = -ETIMEDOUT;
 			goto out;
 		}
 	} else {
@@ -1931,12 +1933,9 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
 		fep->hwp + FEC_MII_DATA);
 
 	/* wait for end of transfer */
-	time_left = wait_for_completion_timeout(&fep->mdio_done,
-			usecs_to_jiffies(FEC_MII_TIMEOUT));
-	if (time_left == 0) {
+	ret = fec_enet_mdio_wait(fep);
+	if (ret)
 		netdev_err(fep->netdev, "MDIO write timeout\n");
-		ret  = -ETIMEDOUT;
-	}
 
 out:
 	pm_runtime_mark_last_busy(dev);
@@ -2132,6 +2131,9 @@ static int fec_enet_mii_init(struct platform_device *pdev)
 
 	writel(fep->phy_speed, fep->hwp + FEC_MII_SPEED);
 
+	/* Clear any pending transaction complete indication */
+	writel(FEC_ENET_MII, fep->hwp + FEC_IEVENT);
+
 	fep->mii_bus = mdiobus_alloc();
 	if (fep->mii_bus == NULL) {
 		err = -ENOMEM;
@@ -3674,7 +3676,6 @@ fec_probe(struct platform_device *pdev)
 		fep->irq[i] = irq;
 	}
 
-	init_completion(&fep->mdio_done);
 	ret = fec_enet_mii_init(pdev);
 	if (ret)
 		goto failed_mii_init;
-- 
cgit v1.2.3-59-g8ed1b


From 3e782985cb3ce00a32c372b37d8feefdae18ddf1 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Mon, 20 Apr 2020 00:04:01 +0200
Subject: net: ethernet: fec: Allow configuration of MDIO bus speed

MDIO busses typically operate at 2.5MHz. However many devices can
operate at faster speeds. This then allows more MDIO transactions per
second, useful for Ethernet switch statistics, or Ethernet PHY TDR
data. Allow the bus speed to be configured, using the standard
"clock-frequency" property, which i2c busses use to indicate the bus
speed. Before using this property, ensure all devices on the bus do
actually support the requested clock speed.

Suggested-by: Chris Healy <Chris.Healy@zii.aero>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/fsl-fec.txt |  1 +
 Documentation/devicetree/bindings/net/mdio.yaml   |  6 ++++++
 drivers/net/ethernet/freescale/fec_main.c         | 11 ++++++++---
 3 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/fsl-fec.txt b/Documentation/devicetree/bindings/net/fsl-fec.txt
index ff8b0f211aa1..26c492a2e0e1 100644
--- a/Documentation/devicetree/bindings/net/fsl-fec.txt
+++ b/Documentation/devicetree/bindings/net/fsl-fec.txt
@@ -82,6 +82,7 @@ ethernet@83fec000 {
 	phy-supply = <&reg_fec_supply>;
 	phy-handle = <&ethphy>;
 	mdio {
+	        clock-frequency = <5000000>;
 		ethphy: ethernet-phy@6 {
 			compatible = "ethernet-phy-ieee802.3-c22";
 			reg = <6>;
diff --git a/Documentation/devicetree/bindings/net/mdio.yaml b/Documentation/devicetree/bindings/net/mdio.yaml
index 50c3397a82bc..ab4a9df8b8e2 100644
--- a/Documentation/devicetree/bindings/net/mdio.yaml
+++ b/Documentation/devicetree/bindings/net/mdio.yaml
@@ -39,6 +39,12 @@ properties:
       and must therefore be appropriately determined based on all PHY
       requirements (maximum value of all per-PHY RESET pulse widths).
 
+  clock-frequency:
+    description:
+      Desired MDIO bus clock frequency in Hz. Values greater than IEEE 802.3
+      defined 2.5MHz should only be used when all devices on the bus support
+      the given clock speed.
+
 patternProperties:
   "^ethernet-phy@[0-9a-f]+$":
     type: object
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 2267bf75784e..832a24e2805c 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -2067,6 +2067,7 @@ static int fec_enet_mii_init(struct platform_device *pdev)
 	struct device_node *node;
 	int err = -ENXIO;
 	u32 mii_speed, holdtime;
+	u32 bus_freq;
 
 	/*
 	 * The i.MX28 dual fec interfaces are not equal.
@@ -2094,15 +2095,20 @@ static int fec_enet_mii_init(struct platform_device *pdev)
 		return -ENOENT;
 	}
 
+	bus_freq = 2500000; /* 2.5MHz by default */
+	node = of_get_child_by_name(pdev->dev.of_node, "mdio");
+	if (node)
+		of_property_read_u32(node, "clock-frequency", &bus_freq);
+
 	/*
-	 * Set MII speed to 2.5 MHz (= clk_get_rate() / 2 * phy_speed)
+	 * Set MII speed (= clk_get_rate() / 2 * phy_speed)
 	 *
 	 * The formula for FEC MDC is 'ref_freq / (MII_SPEED x 2)' while
 	 * for ENET-MAC is 'ref_freq / ((MII_SPEED + 1) x 2)'.  The i.MX28
 	 * Reference Manual has an error on this, and gets fixed on i.MX6Q
 	 * document.
 	 */
-	mii_speed = DIV_ROUND_UP(clk_get_rate(fep->clk_ipg), 5000000);
+	mii_speed = DIV_ROUND_UP(clk_get_rate(fep->clk_ipg), bus_freq * 2);
 	if (fep->quirks & FEC_QUIRK_ENET_MAC)
 		mii_speed--;
 	if (mii_speed > 63) {
@@ -2148,7 +2154,6 @@ static int fec_enet_mii_init(struct platform_device *pdev)
 	fep->mii_bus->priv = fep;
 	fep->mii_bus->parent = &pdev->dev;
 
-	node = of_get_child_by_name(pdev->dev.of_node, "mdio");
 	err = of_mdiobus_register(fep->mii_bus, node);
 	of_node_put(node);
 	if (err)
-- 
cgit v1.2.3-59-g8ed1b


From 3c01eb62d1bd85a5dd1d22d74339728666ae2c45 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Mon, 20 Apr 2020 00:04:02 +0200
Subject: net: ethernet: fec: Allow the MDIO preamble to be disabled

An MDIO transaction normally starts with 32 1s as a preamble. However
not all devices requires such a preamble. Add a device tree property
which allows the preamble to be suppressed. This will half the size of
the MDIO transaction, allowing faster transactions. But it should only
be used when all devices on the bus support suppressed preamble.

Suggested-by: Chris Healy <Chris.Healy@zii.aero>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/mdio.yaml | 6 ++++++
 drivers/net/ethernet/freescale/fec_main.c       | 9 ++++++++-
 2 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/net/mdio.yaml b/Documentation/devicetree/bindings/net/mdio.yaml
index ab4a9df8b8e2..cd6c6ae6dabb 100644
--- a/Documentation/devicetree/bindings/net/mdio.yaml
+++ b/Documentation/devicetree/bindings/net/mdio.yaml
@@ -45,6 +45,12 @@ properties:
       defined 2.5MHz should only be used when all devices on the bus support
       the given clock speed.
 
+  suppress-preamble:
+    description:
+      The 32 bit preamble should be suppressed. In order for this to
+      work, all devices on the bus must support suppressed preamble.
+    type: boolean
+
 patternProperties:
   "^ethernet-phy@[0-9a-f]+$":
     type: object
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 832a24e2805c..1ae075a246a3 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -2064,6 +2064,7 @@ static int fec_enet_mii_init(struct platform_device *pdev)
 	static struct mii_bus *fec0_mii_bus;
 	struct net_device *ndev = platform_get_drvdata(pdev);
 	struct fec_enet_private *fep = netdev_priv(ndev);
+	bool suppress_preamble = false;
 	struct device_node *node;
 	int err = -ENXIO;
 	u32 mii_speed, holdtime;
@@ -2097,8 +2098,11 @@ static int fec_enet_mii_init(struct platform_device *pdev)
 
 	bus_freq = 2500000; /* 2.5MHz by default */
 	node = of_get_child_by_name(pdev->dev.of_node, "mdio");
-	if (node)
+	if (node) {
 		of_property_read_u32(node, "clock-frequency", &bus_freq);
+		suppress_preamble = of_property_read_bool(node,
+							  "suppress-preamble");
+	}
 
 	/*
 	 * Set MII speed (= clk_get_rate() / 2 * phy_speed)
@@ -2135,6 +2139,9 @@ static int fec_enet_mii_init(struct platform_device *pdev)
 
 	fep->phy_speed = mii_speed << 1 | holdtime << 8;
 
+	if (suppress_preamble)
+		fep->phy_speed |= BIT(7);
+
 	writel(fep->phy_speed, fep->hwp + FEC_MII_SPEED);
 
 	/* Clear any pending transaction complete indication */
-- 
cgit v1.2.3-59-g8ed1b


From eec517cdb4810b3843eb7707971de3164088bff1 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Mon, 20 Apr 2020 00:11:50 +0200
Subject: net: Add IF_OPER_TESTING

RFC 2863 defines the operational state testing. Add support for this
state, both as a IF_LINK_MODE_ and __LINK_STATE_.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h | 41 +++++++++++++++++++++++++++++++++++++++++
 include/uapi/linux/if.h   |  1 +
 net/core/dev.c            |  5 +++++
 net/core/link_watch.c     | 12 ++++++++++--
 net/core/rtnetlink.c      |  9 ++++++++-
 5 files changed, 65 insertions(+), 3 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 130a668049ab..0750b54b3765 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -288,6 +288,7 @@ enum netdev_state_t {
 	__LINK_STATE_NOCARRIER,
 	__LINK_STATE_LINKWATCH_PENDING,
 	__LINK_STATE_DORMANT,
+	__LINK_STATE_TESTING,
 };
 
 
@@ -3907,6 +3908,46 @@ static inline bool netif_dormant(const struct net_device *dev)
 }
 
 
+/**
+ *	netif_testing_on - mark device as under test.
+ *	@dev: network device
+ *
+ * Mark device as under test (as per RFC2863).
+ *
+ * The testing state indicates that some test(s) must be performed on
+ * the interface. After completion, of the test, the interface state
+ * will change to up, dormant, or down, as appropriate.
+ */
+static inline void netif_testing_on(struct net_device *dev)
+{
+	if (!test_and_set_bit(__LINK_STATE_TESTING, &dev->state))
+		linkwatch_fire_event(dev);
+}
+
+/**
+ *	netif_testing_off - set device as not under test.
+ *	@dev: network device
+ *
+ * Device is not in testing state.
+ */
+static inline void netif_testing_off(struct net_device *dev)
+{
+	if (test_and_clear_bit(__LINK_STATE_TESTING, &dev->state))
+		linkwatch_fire_event(dev);
+}
+
+/**
+ *	netif_testing - test if device is under test
+ *	@dev: network device
+ *
+ * Check if device is under test
+ */
+static inline bool netif_testing(const struct net_device *dev)
+{
+	return test_bit(__LINK_STATE_TESTING, &dev->state);
+}
+
+
 /**
  *	netif_oper_up - test if device is operational
  *	@dev: network device
diff --git a/include/uapi/linux/if.h b/include/uapi/linux/if.h
index be714cd8c826..797ba2c1562a 100644
--- a/include/uapi/linux/if.h
+++ b/include/uapi/linux/if.h
@@ -178,6 +178,7 @@ enum {
 enum {
 	IF_LINK_MODE_DEFAULT,
 	IF_LINK_MODE_DORMANT,	/* limit upward transition to dormant */
+	IF_LINK_MODE_TESTING,	/* limit upward transition to testing */
 };
 
 /*
diff --git a/net/core/dev.c b/net/core/dev.c
index 522288177bbd..fb61522b1ce1 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -9136,6 +9136,11 @@ void netif_stacked_transfer_operstate(const struct net_device *rootdev,
 	else
 		netif_dormant_off(dev);
 
+	if (rootdev->operstate == IF_OPER_TESTING)
+		netif_testing_on(dev);
+	else
+		netif_testing_off(dev);
+
 	if (netif_carrier_ok(rootdev))
 		netif_carrier_on(dev);
 	else
diff --git a/net/core/link_watch.c b/net/core/link_watch.c
index f153e0601838..75431ca9300f 100644
--- a/net/core/link_watch.c
+++ b/net/core/link_watch.c
@@ -34,6 +34,9 @@ static DEFINE_SPINLOCK(lweventlist_lock);
 
 static unsigned char default_operstate(const struct net_device *dev)
 {
+	if (netif_testing(dev))
+		return IF_OPER_TESTING;
+
 	if (!netif_carrier_ok(dev))
 		return (dev->ifindex != dev_get_iflink(dev) ?
 			IF_OPER_LOWERLAYERDOWN : IF_OPER_DOWN);
@@ -55,11 +58,15 @@ static void rfc2863_policy(struct net_device *dev)
 	write_lock_bh(&dev_base_lock);
 
 	switch(dev->link_mode) {
+	case IF_LINK_MODE_TESTING:
+		if (operstate == IF_OPER_UP)
+			operstate = IF_OPER_TESTING;
+		break;
+
 	case IF_LINK_MODE_DORMANT:
 		if (operstate == IF_OPER_UP)
 			operstate = IF_OPER_DORMANT;
 		break;
-
 	case IF_LINK_MODE_DEFAULT:
 	default:
 		break;
@@ -74,7 +81,8 @@ static void rfc2863_policy(struct net_device *dev)
 void linkwatch_init_dev(struct net_device *dev)
 {
 	/* Handle pre-registration link state changes */
-	if (!netif_carrier_ok(dev) || netif_dormant(dev))
+	if (!netif_carrier_ok(dev) || netif_dormant(dev) ||
+	    netif_testing(dev))
 		rfc2863_policy(dev);
 }
 
diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index 709ebbf8ab5b..d6f4f4a9e8ba 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -829,11 +829,18 @@ static void set_operstate(struct net_device *dev, unsigned char transition)
 	switch (transition) {
 	case IF_OPER_UP:
 		if ((operstate == IF_OPER_DORMANT ||
+		     operstate == IF_OPER_TESTING ||
 		     operstate == IF_OPER_UNKNOWN) &&
-		    !netif_dormant(dev))
+		    !netif_dormant(dev) && !netif_testing(dev))
 			operstate = IF_OPER_UP;
 		break;
 
+	case IF_OPER_TESTING:
+		if (operstate == IF_OPER_UP ||
+		    operstate == IF_OPER_UNKNOWN)
+			operstate = IF_OPER_TESTING;
+		break;
+
 	case IF_OPER_DORMANT:
 		if (operstate == IF_OPER_UP ||
 		    operstate == IF_OPER_UNKNOWN)
-- 
cgit v1.2.3-59-g8ed1b


From db30a57779b18b7cef092c21887ed2d23ad2bd35 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Mon, 20 Apr 2020 00:11:51 +0200
Subject: net: Add testing sysfs attribute

Similar to speed, duplex and dorment, report the testing status
in sysfs.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/ABI/testing/sysfs-class-net | 13 +++++++++++++
 net/core/net-sysfs.c                      | 15 ++++++++++++++-
 2 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/Documentation/ABI/testing/sysfs-class-net b/Documentation/ABI/testing/sysfs-class-net
index 664a8f6a634f..3b404577f380 100644
--- a/Documentation/ABI/testing/sysfs-class-net
+++ b/Documentation/ABI/testing/sysfs-class-net
@@ -124,6 +124,19 @@ Description:
 		authentication is performed (e.g: 802.1x). 'link_mode' attribute
 		will also reflect the dormant state.
 
+What:		/sys/class/net/<iface>/testing
+Date:		April 2002
+KernelVersion:	5.8
+Contact:	netdev@vger.kernel.org
+Description:
+		Indicates whether the interface is under test. Possible
+		values are:
+		0: interface is not being tested
+		1: interface is being tested
+
+		When an interface is under test, it cannot be expected
+		to pass packets as normal.
+
 What:		/sys/clas/net/<iface>/duplex
 Date:		October 2009
 KernelVersion:	2.6.33
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 4773ad6ec111..0d9e46de205e 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -243,6 +243,18 @@ static ssize_t duplex_show(struct device *dev,
 }
 static DEVICE_ATTR_RO(duplex);
 
+static ssize_t testing_show(struct device *dev,
+			    struct device_attribute *attr, char *buf)
+{
+	struct net_device *netdev = to_net_dev(dev);
+
+	if (netif_running(netdev))
+		return sprintf(buf, fmt_dec, !!netif_testing(netdev));
+
+	return -EINVAL;
+}
+static DEVICE_ATTR_RO(testing);
+
 static ssize_t dormant_show(struct device *dev,
 			    struct device_attribute *attr, char *buf)
 {
@@ -260,7 +272,7 @@ static const char *const operstates[] = {
 	"notpresent", /* currently unused */
 	"down",
 	"lowerlayerdown",
-	"testing", /* currently unused */
+	"testing",
 	"dormant",
 	"up"
 };
@@ -524,6 +536,7 @@ static struct attribute *net_class_attrs[] __ro_after_init = {
 	&dev_attr_speed.attr,
 	&dev_attr_duplex.attr,
 	&dev_attr_dormant.attr,
+	&dev_attr_testing.attr,
 	&dev_attr_operstate.attr,
 	&dev_attr_carrier_changes.attr,
 	&dev_attr_ifalias.attr,
-- 
cgit v1.2.3-59-g8ed1b


From 77e9b2ab451d32c81468ef679c377b2f831cd720 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Mon, 20 Apr 2020 00:11:52 +0200
Subject: net: ethtool: self_test: Mark interface in testing operative status

When an interface is executing a self test, put the interface into
operative status testing.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ethtool/ioctl.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index 89d0b1827aaf..593fa665f820 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -1746,7 +1746,9 @@ static int ethtool_self_test(struct net_device *dev, char __user *useraddr)
 	if (!data)
 		return -ENOMEM;
 
+	netif_testing_on(dev);
 	ops->self_test(dev, &test, data);
+	netif_testing_off(dev);
 
 	ret = -EFAULT;
 	if (copy_to_user(useraddr, &test, sizeof(test)))
-- 
cgit v1.2.3-59-g8ed1b


From 736fc0e17fade807e59cd9001af88ec4bcca62ef Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Mon, 20 Apr 2020 10:17:26 +0800
Subject: net: hns3: split out hclge_fd_check_ether_tuple()

For readability and maintainability, this patch separates the
handling part of each flow type in hclge_fd_check_ether_tuple()
into standalone functions.

Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c    | 285 +++++++++++++--------
 1 file changed, 173 insertions(+), 112 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index a758f9ae32be..80d0651145df 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -5244,157 +5244,158 @@ static int hclge_config_action(struct hclge_dev *hdev, u8 stage,
 	return hclge_fd_ad_config(hdev, stage, ad_data.ad_id, &ad_data);
 }
 
-static int hclge_fd_check_spec(struct hclge_dev *hdev,
-			       struct ethtool_rx_flow_spec *fs, u32 *unused)
+static int hclge_fd_check_tcpip4_tuple(struct ethtool_tcpip4_spec *spec,
+				       u32 *unused_tuple)
 {
-	struct ethtool_tcpip4_spec *tcp_ip4_spec;
-	struct ethtool_usrip4_spec *usr_ip4_spec;
-	struct ethtool_tcpip6_spec *tcp_ip6_spec;
-	struct ethtool_usrip6_spec *usr_ip6_spec;
-	struct ethhdr *ether_spec;
-
-	if (fs->location >= hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1])
+	if (!spec || !unused_tuple)
 		return -EINVAL;
 
-	if (!(fs->flow_type & hdev->fd_cfg.proto_support))
-		return -EOPNOTSUPP;
-
-	if ((fs->flow_type & FLOW_EXT) &&
-	    (fs->h_ext.data[0] != 0 || fs->h_ext.data[1] != 0)) {
-		dev_err(&hdev->pdev->dev, "user-def bytes are not supported\n");
-		return -EOPNOTSUPP;
-	}
+	*unused_tuple |= BIT(INNER_SRC_MAC) | BIT(INNER_DST_MAC);
 
-	switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) {
-	case SCTP_V4_FLOW:
-	case TCP_V4_FLOW:
-	case UDP_V4_FLOW:
-		tcp_ip4_spec = &fs->h_u.tcp_ip4_spec;
-		*unused |= BIT(INNER_SRC_MAC) | BIT(INNER_DST_MAC);
+	if (!spec->ip4src)
+		*unused_tuple |= BIT(INNER_SRC_IP);
 
-		if (!tcp_ip4_spec->ip4src)
-			*unused |= BIT(INNER_SRC_IP);
+	if (!spec->ip4dst)
+		*unused_tuple |= BIT(INNER_DST_IP);
 
-		if (!tcp_ip4_spec->ip4dst)
-			*unused |= BIT(INNER_DST_IP);
+	if (!spec->psrc)
+		*unused_tuple |= BIT(INNER_SRC_PORT);
 
-		if (!tcp_ip4_spec->psrc)
-			*unused |= BIT(INNER_SRC_PORT);
+	if (!spec->pdst)
+		*unused_tuple |= BIT(INNER_DST_PORT);
 
-		if (!tcp_ip4_spec->pdst)
-			*unused |= BIT(INNER_DST_PORT);
+	if (!spec->tos)
+		*unused_tuple |= BIT(INNER_IP_TOS);
 
-		if (!tcp_ip4_spec->tos)
-			*unused |= BIT(INNER_IP_TOS);
+	return 0;
+}
 
-		break;
-	case IP_USER_FLOW:
-		usr_ip4_spec = &fs->h_u.usr_ip4_spec;
-		*unused |= BIT(INNER_SRC_MAC) | BIT(INNER_DST_MAC) |
-			BIT(INNER_SRC_PORT) | BIT(INNER_DST_PORT);
+static int hclge_fd_check_ip4_tuple(struct ethtool_usrip4_spec *spec,
+				    u32 *unused_tuple)
+{
+	if (!spec || !unused_tuple)
+		return -EINVAL;
 
-		if (!usr_ip4_spec->ip4src)
-			*unused |= BIT(INNER_SRC_IP);
+	*unused_tuple |= BIT(INNER_SRC_MAC) | BIT(INNER_DST_MAC) |
+		BIT(INNER_SRC_PORT) | BIT(INNER_DST_PORT);
 
-		if (!usr_ip4_spec->ip4dst)
-			*unused |= BIT(INNER_DST_IP);
+	if (!spec->ip4src)
+		*unused_tuple |= BIT(INNER_SRC_IP);
 
-		if (!usr_ip4_spec->tos)
-			*unused |= BIT(INNER_IP_TOS);
+	if (!spec->ip4dst)
+		*unused_tuple |= BIT(INNER_DST_IP);
 
-		if (!usr_ip4_spec->proto)
-			*unused |= BIT(INNER_IP_PROTO);
+	if (!spec->tos)
+		*unused_tuple |= BIT(INNER_IP_TOS);
 
-		if (usr_ip4_spec->l4_4_bytes)
-			return -EOPNOTSUPP;
+	if (!spec->proto)
+		*unused_tuple |= BIT(INNER_IP_PROTO);
 
-		if (usr_ip4_spec->ip_ver != ETH_RX_NFC_IP4)
-			return -EOPNOTSUPP;
+	if (spec->l4_4_bytes)
+		return -EOPNOTSUPP;
 
-		break;
-	case SCTP_V6_FLOW:
-	case TCP_V6_FLOW:
-	case UDP_V6_FLOW:
-		tcp_ip6_spec = &fs->h_u.tcp_ip6_spec;
-		*unused |= BIT(INNER_SRC_MAC) | BIT(INNER_DST_MAC) |
-			BIT(INNER_IP_TOS);
+	if (spec->ip_ver != ETH_RX_NFC_IP4)
+		return -EOPNOTSUPP;
 
-		/* check whether src/dst ip address used */
-		if (!tcp_ip6_spec->ip6src[0] && !tcp_ip6_spec->ip6src[1] &&
-		    !tcp_ip6_spec->ip6src[2] && !tcp_ip6_spec->ip6src[3])
-			*unused |= BIT(INNER_SRC_IP);
+	return 0;
+}
 
-		if (!tcp_ip6_spec->ip6dst[0] && !tcp_ip6_spec->ip6dst[1] &&
-		    !tcp_ip6_spec->ip6dst[2] && !tcp_ip6_spec->ip6dst[3])
-			*unused |= BIT(INNER_DST_IP);
+static int hclge_fd_check_tcpip6_tuple(struct ethtool_tcpip6_spec *spec,
+				       u32 *unused_tuple)
+{
+	if (!spec || !unused_tuple)
+		return -EINVAL;
 
-		if (!tcp_ip6_spec->psrc)
-			*unused |= BIT(INNER_SRC_PORT);
+	*unused_tuple |= BIT(INNER_SRC_MAC) | BIT(INNER_DST_MAC) |
+		BIT(INNER_IP_TOS);
 
-		if (!tcp_ip6_spec->pdst)
-			*unused |= BIT(INNER_DST_PORT);
+	/* check whether src/dst ip address used */
+	if (!spec->ip6src[0] && !spec->ip6src[1] &&
+	    !spec->ip6src[2] && !spec->ip6src[3])
+		*unused_tuple |= BIT(INNER_SRC_IP);
 
-		if (tcp_ip6_spec->tclass)
-			return -EOPNOTSUPP;
+	if (!spec->ip6dst[0] && !spec->ip6dst[1] &&
+	    !spec->ip6dst[2] && !spec->ip6dst[3])
+		*unused_tuple |= BIT(INNER_DST_IP);
 
-		break;
-	case IPV6_USER_FLOW:
-		usr_ip6_spec = &fs->h_u.usr_ip6_spec;
-		*unused |= BIT(INNER_SRC_MAC) | BIT(INNER_DST_MAC) |
-			BIT(INNER_IP_TOS) | BIT(INNER_SRC_PORT) |
-			BIT(INNER_DST_PORT);
+	if (!spec->psrc)
+		*unused_tuple |= BIT(INNER_SRC_PORT);
 
-		/* check whether src/dst ip address used */
-		if (!usr_ip6_spec->ip6src[0] && !usr_ip6_spec->ip6src[1] &&
-		    !usr_ip6_spec->ip6src[2] && !usr_ip6_spec->ip6src[3])
-			*unused |= BIT(INNER_SRC_IP);
+	if (!spec->pdst)
+		*unused_tuple |= BIT(INNER_DST_PORT);
 
-		if (!usr_ip6_spec->ip6dst[0] && !usr_ip6_spec->ip6dst[1] &&
-		    !usr_ip6_spec->ip6dst[2] && !usr_ip6_spec->ip6dst[3])
-			*unused |= BIT(INNER_DST_IP);
+	if (spec->tclass)
+		return -EOPNOTSUPP;
 
-		if (!usr_ip6_spec->l4_proto)
-			*unused |= BIT(INNER_IP_PROTO);
+	return 0;
+}
 
-		if (usr_ip6_spec->tclass)
-			return -EOPNOTSUPP;
+static int hclge_fd_check_ip6_tuple(struct ethtool_usrip6_spec *spec,
+				    u32 *unused_tuple)
+{
+	if (!spec || !unused_tuple)
+		return -EINVAL;
 
-		if (usr_ip6_spec->l4_4_bytes)
-			return -EOPNOTSUPP;
+	*unused_tuple |= BIT(INNER_SRC_MAC) | BIT(INNER_DST_MAC) |
+		BIT(INNER_IP_TOS) | BIT(INNER_SRC_PORT) | BIT(INNER_DST_PORT);
 
-		break;
-	case ETHER_FLOW:
-		ether_spec = &fs->h_u.ether_spec;
-		*unused |= BIT(INNER_SRC_IP) | BIT(INNER_DST_IP) |
-			BIT(INNER_SRC_PORT) | BIT(INNER_DST_PORT) |
-			BIT(INNER_IP_TOS) | BIT(INNER_IP_PROTO);
+	/* check whether src/dst ip address used */
+	if (!spec->ip6src[0] && !spec->ip6src[1] &&
+	    !spec->ip6src[2] && !spec->ip6src[3])
+		*unused_tuple |= BIT(INNER_SRC_IP);
 
-		if (is_zero_ether_addr(ether_spec->h_source))
-			*unused |= BIT(INNER_SRC_MAC);
+	if (!spec->ip6dst[0] && !spec->ip6dst[1] &&
+	    !spec->ip6dst[2] && !spec->ip6dst[3])
+		*unused_tuple |= BIT(INNER_DST_IP);
 
-		if (is_zero_ether_addr(ether_spec->h_dest))
-			*unused |= BIT(INNER_DST_MAC);
+	if (!spec->l4_proto)
+		*unused_tuple |= BIT(INNER_IP_PROTO);
 
-		if (!ether_spec->h_proto)
-			*unused |= BIT(INNER_ETH_TYPE);
+	if (spec->tclass)
+		return -EOPNOTSUPP;
 
-		break;
-	default:
+	if (spec->l4_4_bytes)
 		return -EOPNOTSUPP;
-	}
 
+	return 0;
+}
+
+static int hclge_fd_check_ether_tuple(struct ethhdr *spec, u32 *unused_tuple)
+{
+	if (!spec || !unused_tuple)
+		return -EINVAL;
+
+	*unused_tuple |= BIT(INNER_SRC_IP) | BIT(INNER_DST_IP) |
+		BIT(INNER_SRC_PORT) | BIT(INNER_DST_PORT) |
+		BIT(INNER_IP_TOS) | BIT(INNER_IP_PROTO);
+
+	if (is_zero_ether_addr(spec->h_source))
+		*unused_tuple |= BIT(INNER_SRC_MAC);
+
+	if (is_zero_ether_addr(spec->h_dest))
+		*unused_tuple |= BIT(INNER_DST_MAC);
+
+	if (!spec->h_proto)
+		*unused_tuple |= BIT(INNER_ETH_TYPE);
+
+	return 0;
+}
+
+static int hclge_fd_check_ext_tuple(struct hclge_dev *hdev,
+				    struct ethtool_rx_flow_spec *fs,
+				    u32 *unused_tuple)
+{
 	if ((fs->flow_type & FLOW_EXT)) {
 		if (fs->h_ext.vlan_etype)
 			return -EOPNOTSUPP;
 		if (!fs->h_ext.vlan_tci)
-			*unused |= BIT(INNER_VLAN_TAG_FST);
+			*unused_tuple |= BIT(INNER_VLAN_TAG_FST);
 
-		if (fs->m_ext.vlan_tci) {
-			if (be16_to_cpu(fs->h_ext.vlan_tci) >= VLAN_N_VID)
-				return -EINVAL;
-		}
+		if (fs->m_ext.vlan_tci &&
+		    be16_to_cpu(fs->h_ext.vlan_tci) >= VLAN_N_VID)
+			return -EINVAL;
 	} else {
-		*unused |= BIT(INNER_VLAN_TAG_FST);
+		*unused_tuple |= BIT(INNER_VLAN_TAG_FST);
 	}
 
 	if (fs->flow_type & FLOW_MAC_EXT) {
@@ -5402,14 +5403,74 @@ static int hclge_fd_check_spec(struct hclge_dev *hdev,
 			return -EOPNOTSUPP;
 
 		if (is_zero_ether_addr(fs->h_ext.h_dest))
-			*unused |= BIT(INNER_DST_MAC);
+			*unused_tuple |= BIT(INNER_DST_MAC);
 		else
-			*unused &= ~(BIT(INNER_DST_MAC));
+			*unused_tuple &= ~(BIT(INNER_DST_MAC));
 	}
 
 	return 0;
 }
 
+static int hclge_fd_check_spec(struct hclge_dev *hdev,
+			       struct ethtool_rx_flow_spec *fs,
+			       u32 *unused_tuple)
+{
+	int ret;
+
+	if (fs->location >= hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1])
+		return -EINVAL;
+
+	if (!(fs->flow_type & hdev->fd_cfg.proto_support))
+		return -EOPNOTSUPP;
+
+	if ((fs->flow_type & FLOW_EXT) &&
+	    (fs->h_ext.data[0] != 0 || fs->h_ext.data[1] != 0)) {
+		dev_err(&hdev->pdev->dev, "user-def bytes are not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) {
+	case SCTP_V4_FLOW:
+	case TCP_V4_FLOW:
+	case UDP_V4_FLOW:
+		ret = hclge_fd_check_tcpip4_tuple(&fs->h_u.tcp_ip4_spec,
+						  unused_tuple);
+		break;
+	case IP_USER_FLOW:
+		ret = hclge_fd_check_ip4_tuple(&fs->h_u.usr_ip4_spec,
+					       unused_tuple);
+		break;
+	case SCTP_V6_FLOW:
+	case TCP_V6_FLOW:
+	case UDP_V6_FLOW:
+		ret = hclge_fd_check_tcpip6_tuple(&fs->h_u.tcp_ip6_spec,
+						  unused_tuple);
+		break;
+	case IPV6_USER_FLOW:
+		ret = hclge_fd_check_ip6_tuple(&fs->h_u.usr_ip6_spec,
+					       unused_tuple);
+		break;
+	case ETHER_FLOW:
+		if (hdev->fd_cfg.fd_mode !=
+			HCLGE_FD_MODE_DEPTH_2K_WIDTH_400B_STAGE_1) {
+			dev_err(&hdev->pdev->dev,
+				"ETHER_FLOW is not supported in current fd mode!\n");
+			return -EOPNOTSUPP;
+		}
+
+		ret = hclge_fd_check_ether_tuple(&fs->h_u.ether_spec,
+						 unused_tuple);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	if (ret)
+		return ret;
+
+	return hclge_fd_check_ext_tuple(hdev, fs, unused_tuple);
+}
+
 static bool hclge_fd_rule_exist(struct hclge_dev *hdev, u16 location)
 {
 	struct hclge_fd_rule *rule = NULL;
-- 
cgit v1.2.3-59-g8ed1b


From fa663c096052ec8c5dfef29fc1cd30190e5aba93 Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Mon, 20 Apr 2020 10:17:27 +0800
Subject: net: hns3: split out hclge_get_fd_rule_info()

hclge_get_fd_rule_info() is bloated, this patch separates
it into several standalone functions for readability and
maintainability.

Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c    | 303 +++++++++++----------
 1 file changed, 159 insertions(+), 144 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 80d0651145df..b1fe204c9e40 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -5938,6 +5938,149 @@ static int hclge_get_fd_rule_cnt(struct hnae3_handle *handle,
 	return 0;
 }
 
+static void hclge_fd_get_tcpip4_info(struct hclge_fd_rule *rule,
+				     struct ethtool_tcpip4_spec *spec,
+				     struct ethtool_tcpip4_spec *spec_mask)
+{
+	spec->ip4src = cpu_to_be32(rule->tuples.src_ip[IPV4_INDEX]);
+	spec_mask->ip4src = rule->unused_tuple & BIT(INNER_SRC_IP) ?
+			0 : cpu_to_be32(rule->tuples_mask.src_ip[IPV4_INDEX]);
+
+	spec->ip4dst = cpu_to_be32(rule->tuples.dst_ip[IPV4_INDEX]);
+	spec_mask->ip4dst = rule->unused_tuple & BIT(INNER_DST_IP) ?
+			0 : cpu_to_be32(rule->tuples_mask.dst_ip[IPV4_INDEX]);
+
+	spec->psrc = cpu_to_be16(rule->tuples.src_port);
+	spec_mask->psrc = rule->unused_tuple & BIT(INNER_SRC_PORT) ?
+			0 : cpu_to_be16(rule->tuples_mask.src_port);
+
+	spec->pdst = cpu_to_be16(rule->tuples.dst_port);
+	spec_mask->pdst = rule->unused_tuple & BIT(INNER_DST_PORT) ?
+			0 : cpu_to_be16(rule->tuples_mask.dst_port);
+
+	spec->tos = rule->tuples.ip_tos;
+	spec_mask->tos = rule->unused_tuple & BIT(INNER_IP_TOS) ?
+			0 : rule->tuples_mask.ip_tos;
+}
+
+static void hclge_fd_get_ip4_info(struct hclge_fd_rule *rule,
+				  struct ethtool_usrip4_spec *spec,
+				  struct ethtool_usrip4_spec *spec_mask)
+{
+	spec->ip4src = cpu_to_be32(rule->tuples.src_ip[IPV4_INDEX]);
+	spec_mask->ip4src = rule->unused_tuple & BIT(INNER_SRC_IP) ?
+			0 : cpu_to_be32(rule->tuples_mask.src_ip[IPV4_INDEX]);
+
+	spec->ip4dst = cpu_to_be32(rule->tuples.dst_ip[IPV4_INDEX]);
+	spec_mask->ip4dst = rule->unused_tuple & BIT(INNER_DST_IP) ?
+			0 : cpu_to_be32(rule->tuples_mask.dst_ip[IPV4_INDEX]);
+
+	spec->tos = rule->tuples.ip_tos;
+	spec_mask->tos = rule->unused_tuple & BIT(INNER_IP_TOS) ?
+			0 : rule->tuples_mask.ip_tos;
+
+	spec->proto = rule->tuples.ip_proto;
+	spec_mask->proto = rule->unused_tuple & BIT(INNER_IP_PROTO) ?
+			0 : rule->tuples_mask.ip_proto;
+
+	spec->ip_ver = ETH_RX_NFC_IP4;
+}
+
+static void hclge_fd_get_tcpip6_info(struct hclge_fd_rule *rule,
+				     struct ethtool_tcpip6_spec *spec,
+				     struct ethtool_tcpip6_spec *spec_mask)
+{
+	cpu_to_be32_array(spec->ip6src,
+			  rule->tuples.src_ip, IPV6_SIZE);
+	cpu_to_be32_array(spec->ip6dst,
+			  rule->tuples.dst_ip, IPV6_SIZE);
+	if (rule->unused_tuple & BIT(INNER_SRC_IP))
+		memset(spec_mask->ip6src, 0, sizeof(spec_mask->ip6src));
+	else
+		cpu_to_be32_array(spec_mask->ip6src, rule->tuples_mask.src_ip,
+				  IPV6_SIZE);
+
+	if (rule->unused_tuple & BIT(INNER_DST_IP))
+		memset(spec_mask->ip6dst, 0, sizeof(spec_mask->ip6dst));
+	else
+		cpu_to_be32_array(spec_mask->ip6dst, rule->tuples_mask.dst_ip,
+				  IPV6_SIZE);
+
+	spec->psrc = cpu_to_be16(rule->tuples.src_port);
+	spec_mask->psrc = rule->unused_tuple & BIT(INNER_SRC_PORT) ?
+			0 : cpu_to_be16(rule->tuples_mask.src_port);
+
+	spec->pdst = cpu_to_be16(rule->tuples.dst_port);
+	spec_mask->pdst = rule->unused_tuple & BIT(INNER_DST_PORT) ?
+			0 : cpu_to_be16(rule->tuples_mask.dst_port);
+}
+
+static void hclge_fd_get_ip6_info(struct hclge_fd_rule *rule,
+				  struct ethtool_usrip6_spec *spec,
+				  struct ethtool_usrip6_spec *spec_mask)
+{
+	cpu_to_be32_array(spec->ip6src, rule->tuples.src_ip, IPV6_SIZE);
+	cpu_to_be32_array(spec->ip6dst, rule->tuples.dst_ip, IPV6_SIZE);
+	if (rule->unused_tuple & BIT(INNER_SRC_IP))
+		memset(spec_mask->ip6src, 0, sizeof(spec_mask->ip6src));
+	else
+		cpu_to_be32_array(spec_mask->ip6src,
+				  rule->tuples_mask.src_ip, IPV6_SIZE);
+
+	if (rule->unused_tuple & BIT(INNER_DST_IP))
+		memset(spec_mask->ip6dst, 0, sizeof(spec_mask->ip6dst));
+	else
+		cpu_to_be32_array(spec_mask->ip6dst,
+				  rule->tuples_mask.dst_ip, IPV6_SIZE);
+
+	spec->l4_proto = rule->tuples.ip_proto;
+	spec_mask->l4_proto = rule->unused_tuple & BIT(INNER_IP_PROTO) ?
+			0 : rule->tuples_mask.ip_proto;
+}
+
+static void hclge_fd_get_ether_info(struct hclge_fd_rule *rule,
+				    struct ethhdr *spec,
+				    struct ethhdr *spec_mask)
+{
+	ether_addr_copy(spec->h_source, rule->tuples.src_mac);
+	ether_addr_copy(spec->h_dest, rule->tuples.dst_mac);
+
+	if (rule->unused_tuple & BIT(INNER_SRC_MAC))
+		eth_zero_addr(spec_mask->h_source);
+	else
+		ether_addr_copy(spec_mask->h_source, rule->tuples_mask.src_mac);
+
+	if (rule->unused_tuple & BIT(INNER_DST_MAC))
+		eth_zero_addr(spec_mask->h_dest);
+	else
+		ether_addr_copy(spec_mask->h_dest, rule->tuples_mask.dst_mac);
+
+	spec->h_proto = cpu_to_be16(rule->tuples.ether_proto);
+	spec_mask->h_proto = rule->unused_tuple & BIT(INNER_ETH_TYPE) ?
+			0 : cpu_to_be16(rule->tuples_mask.ether_proto);
+}
+
+static void hclge_fd_get_ext_info(struct ethtool_rx_flow_spec *fs,
+				  struct hclge_fd_rule *rule)
+{
+	if (fs->flow_type & FLOW_EXT) {
+		fs->h_ext.vlan_tci = cpu_to_be16(rule->tuples.vlan_tag1);
+		fs->m_ext.vlan_tci =
+				rule->unused_tuple & BIT(INNER_VLAN_TAG_FST) ?
+				cpu_to_be16(VLAN_VID_MASK) :
+				cpu_to_be16(rule->tuples_mask.vlan_tag1);
+	}
+
+	if (fs->flow_type & FLOW_MAC_EXT) {
+		ether_addr_copy(fs->h_ext.h_dest, rule->tuples.dst_mac);
+		if (rule->unused_tuple & BIT(INNER_DST_MAC))
+			eth_zero_addr(fs->m_u.ether_spec.h_dest);
+		else
+			ether_addr_copy(fs->m_u.ether_spec.h_dest,
+					rule->tuples_mask.dst_mac);
+	}
+}
+
 static int hclge_get_fd_rule_info(struct hnae3_handle *handle,
 				  struct ethtool_rxnfc *cmd)
 {
@@ -5970,162 +6113,34 @@ static int hclge_get_fd_rule_info(struct hnae3_handle *handle,
 	case SCTP_V4_FLOW:
 	case TCP_V4_FLOW:
 	case UDP_V4_FLOW:
-		fs->h_u.tcp_ip4_spec.ip4src =
-				cpu_to_be32(rule->tuples.src_ip[IPV4_INDEX]);
-		fs->m_u.tcp_ip4_spec.ip4src =
-			rule->unused_tuple & BIT(INNER_SRC_IP) ?
-			0 : cpu_to_be32(rule->tuples_mask.src_ip[IPV4_INDEX]);
-
-		fs->h_u.tcp_ip4_spec.ip4dst =
-				cpu_to_be32(rule->tuples.dst_ip[IPV4_INDEX]);
-		fs->m_u.tcp_ip4_spec.ip4dst =
-			rule->unused_tuple & BIT(INNER_DST_IP) ?
-			0 : cpu_to_be32(rule->tuples_mask.dst_ip[IPV4_INDEX]);
-
-		fs->h_u.tcp_ip4_spec.psrc = cpu_to_be16(rule->tuples.src_port);
-		fs->m_u.tcp_ip4_spec.psrc =
-				rule->unused_tuple & BIT(INNER_SRC_PORT) ?
-				0 : cpu_to_be16(rule->tuples_mask.src_port);
-
-		fs->h_u.tcp_ip4_spec.pdst = cpu_to_be16(rule->tuples.dst_port);
-		fs->m_u.tcp_ip4_spec.pdst =
-				rule->unused_tuple & BIT(INNER_DST_PORT) ?
-				0 : cpu_to_be16(rule->tuples_mask.dst_port);
-
-		fs->h_u.tcp_ip4_spec.tos = rule->tuples.ip_tos;
-		fs->m_u.tcp_ip4_spec.tos =
-				rule->unused_tuple & BIT(INNER_IP_TOS) ?
-				0 : rule->tuples_mask.ip_tos;
-
+		hclge_fd_get_tcpip4_info(rule, &fs->h_u.tcp_ip4_spec,
+					 &fs->m_u.tcp_ip4_spec);
 		break;
 	case IP_USER_FLOW:
-		fs->h_u.usr_ip4_spec.ip4src =
-				cpu_to_be32(rule->tuples.src_ip[IPV4_INDEX]);
-		fs->m_u.tcp_ip4_spec.ip4src =
-			rule->unused_tuple & BIT(INNER_SRC_IP) ?
-			0 : cpu_to_be32(rule->tuples_mask.src_ip[IPV4_INDEX]);
-
-		fs->h_u.usr_ip4_spec.ip4dst =
-				cpu_to_be32(rule->tuples.dst_ip[IPV4_INDEX]);
-		fs->m_u.usr_ip4_spec.ip4dst =
-			rule->unused_tuple & BIT(INNER_DST_IP) ?
-			0 : cpu_to_be32(rule->tuples_mask.dst_ip[IPV4_INDEX]);
-
-		fs->h_u.usr_ip4_spec.tos = rule->tuples.ip_tos;
-		fs->m_u.usr_ip4_spec.tos =
-				rule->unused_tuple & BIT(INNER_IP_TOS) ?
-				0 : rule->tuples_mask.ip_tos;
-
-		fs->h_u.usr_ip4_spec.proto = rule->tuples.ip_proto;
-		fs->m_u.usr_ip4_spec.proto =
-				rule->unused_tuple & BIT(INNER_IP_PROTO) ?
-				0 : rule->tuples_mask.ip_proto;
-
-		fs->h_u.usr_ip4_spec.ip_ver = ETH_RX_NFC_IP4;
-
+		hclge_fd_get_ip4_info(rule, &fs->h_u.usr_ip4_spec,
+				      &fs->m_u.usr_ip4_spec);
 		break;
 	case SCTP_V6_FLOW:
 	case TCP_V6_FLOW:
 	case UDP_V6_FLOW:
-		cpu_to_be32_array(fs->h_u.tcp_ip6_spec.ip6src,
-				  rule->tuples.src_ip, IPV6_SIZE);
-		if (rule->unused_tuple & BIT(INNER_SRC_IP))
-			memset(fs->m_u.tcp_ip6_spec.ip6src, 0,
-			       sizeof(int) * IPV6_SIZE);
-		else
-			cpu_to_be32_array(fs->m_u.tcp_ip6_spec.ip6src,
-					  rule->tuples_mask.src_ip, IPV6_SIZE);
-
-		cpu_to_be32_array(fs->h_u.tcp_ip6_spec.ip6dst,
-				  rule->tuples.dst_ip, IPV6_SIZE);
-		if (rule->unused_tuple & BIT(INNER_DST_IP))
-			memset(fs->m_u.tcp_ip6_spec.ip6dst, 0,
-			       sizeof(int) * IPV6_SIZE);
-		else
-			cpu_to_be32_array(fs->m_u.tcp_ip6_spec.ip6dst,
-					  rule->tuples_mask.dst_ip, IPV6_SIZE);
-
-		fs->h_u.tcp_ip6_spec.psrc = cpu_to_be16(rule->tuples.src_port);
-		fs->m_u.tcp_ip6_spec.psrc =
-				rule->unused_tuple & BIT(INNER_SRC_PORT) ?
-				0 : cpu_to_be16(rule->tuples_mask.src_port);
-
-		fs->h_u.tcp_ip6_spec.pdst = cpu_to_be16(rule->tuples.dst_port);
-		fs->m_u.tcp_ip6_spec.pdst =
-				rule->unused_tuple & BIT(INNER_DST_PORT) ?
-				0 : cpu_to_be16(rule->tuples_mask.dst_port);
-
+		hclge_fd_get_tcpip6_info(rule, &fs->h_u.tcp_ip6_spec,
+					 &fs->m_u.tcp_ip6_spec);
 		break;
 	case IPV6_USER_FLOW:
-		cpu_to_be32_array(fs->h_u.usr_ip6_spec.ip6src,
-				  rule->tuples.src_ip, IPV6_SIZE);
-		if (rule->unused_tuple & BIT(INNER_SRC_IP))
-			memset(fs->m_u.usr_ip6_spec.ip6src, 0,
-			       sizeof(int) * IPV6_SIZE);
-		else
-			cpu_to_be32_array(fs->m_u.usr_ip6_spec.ip6src,
-					  rule->tuples_mask.src_ip, IPV6_SIZE);
-
-		cpu_to_be32_array(fs->h_u.usr_ip6_spec.ip6dst,
-				  rule->tuples.dst_ip, IPV6_SIZE);
-		if (rule->unused_tuple & BIT(INNER_DST_IP))
-			memset(fs->m_u.usr_ip6_spec.ip6dst, 0,
-			       sizeof(int) * IPV6_SIZE);
-		else
-			cpu_to_be32_array(fs->m_u.usr_ip6_spec.ip6dst,
-					  rule->tuples_mask.dst_ip, IPV6_SIZE);
-
-		fs->h_u.usr_ip6_spec.l4_proto = rule->tuples.ip_proto;
-		fs->m_u.usr_ip6_spec.l4_proto =
-				rule->unused_tuple & BIT(INNER_IP_PROTO) ?
-				0 : rule->tuples_mask.ip_proto;
-
-		break;
-	case ETHER_FLOW:
-		ether_addr_copy(fs->h_u.ether_spec.h_source,
-				rule->tuples.src_mac);
-		if (rule->unused_tuple & BIT(INNER_SRC_MAC))
-			eth_zero_addr(fs->m_u.ether_spec.h_source);
-		else
-			ether_addr_copy(fs->m_u.ether_spec.h_source,
-					rule->tuples_mask.src_mac);
-
-		ether_addr_copy(fs->h_u.ether_spec.h_dest,
-				rule->tuples.dst_mac);
-		if (rule->unused_tuple & BIT(INNER_DST_MAC))
-			eth_zero_addr(fs->m_u.ether_spec.h_dest);
-		else
-			ether_addr_copy(fs->m_u.ether_spec.h_dest,
-					rule->tuples_mask.dst_mac);
-
-		fs->h_u.ether_spec.h_proto =
-				cpu_to_be16(rule->tuples.ether_proto);
-		fs->m_u.ether_spec.h_proto =
-				rule->unused_tuple & BIT(INNER_ETH_TYPE) ?
-				0 : cpu_to_be16(rule->tuples_mask.ether_proto);
-
+		hclge_fd_get_ip6_info(rule, &fs->h_u.usr_ip6_spec,
+				      &fs->m_u.usr_ip6_spec);
 		break;
+	/* The flow type of fd rule has been checked before adding in to rule
+	 * list. As other flow types have been handled, it must be ETHER_FLOW
+	 * for the default case
+	 */
 	default:
-		spin_unlock_bh(&hdev->fd_rule_lock);
-		return -EOPNOTSUPP;
-	}
-
-	if (fs->flow_type & FLOW_EXT) {
-		fs->h_ext.vlan_tci = cpu_to_be16(rule->tuples.vlan_tag1);
-		fs->m_ext.vlan_tci =
-				rule->unused_tuple & BIT(INNER_VLAN_TAG_FST) ?
-				cpu_to_be16(VLAN_VID_MASK) :
-				cpu_to_be16(rule->tuples_mask.vlan_tag1);
+		hclge_fd_get_ether_info(rule, &fs->h_u.ether_spec,
+					&fs->m_u.ether_spec);
+		break;
 	}
 
-	if (fs->flow_type & FLOW_MAC_EXT) {
-		ether_addr_copy(fs->h_ext.h_dest, rule->tuples.dst_mac);
-		if (rule->unused_tuple & BIT(INNER_DST_MAC))
-			eth_zero_addr(fs->m_u.ether_spec.h_dest);
-		else
-			ether_addr_copy(fs->m_u.ether_spec.h_dest,
-					rule->tuples_mask.dst_mac);
-	}
+	hclge_fd_get_ext_info(fs, rule);
 
 	if (rule->action == HCLGE_FD_ACTION_DROP_PACKET) {
 		fs->ring_cookie = RX_CLS_FLOW_DISC;
-- 
cgit v1.2.3-59-g8ed1b


From e9368c4094f54c1af44a842814c1d87d4365c684 Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Mon, 20 Apr 2020 10:17:28 +0800
Subject: net: hns3: remove an unnecessary case 0 in hclge_fd_convert_tuple()

Since case default has included case 0, so removes this
redundant case 0.

Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index b1fe204c9e40..999f05686f06 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -5006,8 +5006,6 @@ static bool hclge_fd_convert_tuple(u32 tuple_bit, u8 *key_x, u8 *key_y,
 		return true;
 
 	switch (tuple_bit) {
-	case 0:
-		return false;
 	case BIT(INNER_DST_MAC):
 		for (i = 0; i < ETH_ALEN; i++) {
 			calc_x(key_x[ETH_ALEN - 1 - i], rule->tuples.dst_mac[i],
-- 
cgit v1.2.3-59-g8ed1b


From 16505f878e3065f2c0457e38ec867cdd6d3ce243 Mon Sep 17 00:00:00 2001
From: Guojia Liao <liaoguojia@huawei.com>
Date: Mon, 20 Apr 2020 10:17:29 +0800
Subject: net: hns3: remove useless proto_support field in struct hclge_fd_cfg

proto_support field in struct hclge_fd_cfg shows what protocols
in flow direct table are supported now. It is unnecessary since
checking which one is unsupported will be more efficient,
so this patch removes it.

Signed-off-by: Guojia Liao <liaoguojia@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 17 ++++++-----------
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h |  1 -
 2 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 999f05686f06..e238a9df6282 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -4876,9 +4876,6 @@ static int hclge_init_fd_config(struct hclge_dev *hdev)
 		return -EOPNOTSUPP;
 	}
 
-	hdev->fd_cfg.proto_support =
-		TCP_V4_FLOW | UDP_V4_FLOW | SCTP_V4_FLOW | TCP_V6_FLOW |
-		UDP_V6_FLOW | SCTP_V6_FLOW | IPV4_USER_FLOW | IPV6_USER_FLOW;
 	key_cfg = &hdev->fd_cfg.key_cfg[HCLGE_FD_STAGE_1];
 	key_cfg->key_sel = HCLGE_FD_KEY_BASE_ON_TUPLE,
 	key_cfg->inner_sipv6_word_en = LOW_2_WORDS;
@@ -4892,11 +4889,9 @@ static int hclge_init_fd_config(struct hclge_dev *hdev)
 				BIT(INNER_SRC_PORT) | BIT(INNER_DST_PORT);
 
 	/* If use max 400bit key, we can support tuples for ether type */
-	if (hdev->fd_cfg.max_key_length == MAX_KEY_LENGTH) {
-		hdev->fd_cfg.proto_support |= ETHER_FLOW;
+	if (hdev->fd_cfg.fd_mode == HCLGE_FD_MODE_DEPTH_2K_WIDTH_400B_STAGE_1)
 		key_cfg->tuple_active |=
 				BIT(INNER_DST_MAC) | BIT(INNER_SRC_MAC);
-	}
 
 	/* roce_type is used to filter roce frames
 	 * dst_vport is used to specify the rule
@@ -5397,7 +5392,8 @@ static int hclge_fd_check_ext_tuple(struct hclge_dev *hdev,
 	}
 
 	if (fs->flow_type & FLOW_MAC_EXT) {
-		if (!(hdev->fd_cfg.proto_support & ETHER_FLOW))
+		if (hdev->fd_cfg.fd_mode !=
+		    HCLGE_FD_MODE_DEPTH_2K_WIDTH_400B_STAGE_1)
 			return -EOPNOTSUPP;
 
 		if (is_zero_ether_addr(fs->h_ext.h_dest))
@@ -5413,21 +5409,20 @@ static int hclge_fd_check_spec(struct hclge_dev *hdev,
 			       struct ethtool_rx_flow_spec *fs,
 			       u32 *unused_tuple)
 {
+	u32 flow_type;
 	int ret;
 
 	if (fs->location >= hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1])
 		return -EINVAL;
 
-	if (!(fs->flow_type & hdev->fd_cfg.proto_support))
-		return -EOPNOTSUPP;
-
 	if ((fs->flow_type & FLOW_EXT) &&
 	    (fs->h_ext.data[0] != 0 || fs->h_ext.data[1] != 0)) {
 		dev_err(&hdev->pdev->dev, "user-def bytes are not supported\n");
 		return -EOPNOTSUPP;
 	}
 
-	switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) {
+	flow_type = fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT);
+	switch (flow_type) {
 	case SCTP_V4_FLOW:
 	case TCP_V4_FLOW:
 	case UDP_V4_FLOW:
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 71df23d5f1b4..a58c26200ea0 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -580,7 +580,6 @@ struct hclge_fd_key_cfg {
 struct hclge_fd_cfg {
 	u8 fd_mode;
 	u16 max_key_length; /* use bit as unit */
-	u32 proto_support;
 	u32 rule_num[MAX_STAGE_NUM]; /* rule entry number */
 	u16 cnt_num[MAX_STAGE_NUM]; /* rule hit counter number */
 	struct hclge_fd_key_cfg key_cfg[MAX_STAGE_NUM];
-- 
cgit v1.2.3-59-g8ed1b


From f84f6a8634f3424244e91fc3fd460152dae689cf Mon Sep 17 00:00:00 2001
From: Guojia Liao <liaoguojia@huawei.com>
Date: Mon, 20 Apr 2020 10:17:30 +0800
Subject: net: hns3: remove two unused structures in hclge_cmd.h

struct hclge_mac_vlan_remove_cmd and hclge_mac_vlan_add_cmd are unused.
So removes them from hclge_cmd.h.

Signed-off-by: Guojia Liao <liaoguojia@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h | 25 ----------------------
 1 file changed, 25 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index 96498d9b4754..90e422efe590 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -733,31 +733,6 @@ struct hclge_mac_mgr_tbl_entry_cmd {
 	u8      rsv3[2];
 };
 
-struct hclge_mac_vlan_add_cmd {
-	__le16  flags;
-	__le16  mac_addr_hi16;
-	__le32  mac_addr_lo32;
-	__le32  mac_addr_msk_hi32;
-	__le16  mac_addr_msk_lo16;
-	__le16  vlan_tag;
-	__le16  ingress_port;
-	__le16  egress_port;
-	u8      rsv[4];
-};
-
-#define HNS3_MAC_VLAN_CFG_FLAG_BIT 0
-struct hclge_mac_vlan_remove_cmd {
-	__le16  flags;
-	__le16  mac_addr_hi16;
-	__le32  mac_addr_lo32;
-	__le32  mac_addr_msk_hi32;
-	__le16  mac_addr_msk_lo16;
-	__le16  vlan_tag;
-	__le16  ingress_port;
-	__le16  egress_port;
-	u8      rsv[4];
-};
-
 struct hclge_vlan_filter_ctrl_cmd {
 	u8 vlan_type;
 	u8 vlan_fe;
-- 
cgit v1.2.3-59-g8ed1b


From 84944d5c4797df7aa0b2677283db5064a2560633 Mon Sep 17 00:00:00 2001
From: Guojia Liao <liaoguojia@huawei.com>
Date: Mon, 20 Apr 2020 10:17:31 +0800
Subject: net: hns3: modify some unsuitable type declaration

In hclge_set_fd_key_config(), parameter 'stage' should be
as enum HCLGE_FD_STAGE, and in hclge_config_key(), 'tuple_size'
should be type u8, also simplify unsigned int with u32 for 'i'.

Signed-off-by: Guojia Liao <liaoguojia@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index e238a9df6282..9edee7d94bbf 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -4822,7 +4822,8 @@ static int hclge_get_fd_allocation(struct hclge_dev *hdev,
 	return ret;
 }
 
-static int hclge_set_fd_key_config(struct hclge_dev *hdev, int stage_num)
+static int hclge_set_fd_key_config(struct hclge_dev *hdev,
+				   enum HCLGE_FD_STAGE stage_num)
 {
 	struct hclge_set_fd_key_config_cmd *req;
 	struct hclge_fd_key_cfg *stage;
@@ -5158,9 +5159,10 @@ static int hclge_config_key(struct hclge_dev *hdev, u8 stage,
 	struct hclge_fd_key_cfg *key_cfg = &hdev->fd_cfg.key_cfg[stage];
 	u8 key_x[MAX_KEY_BYTES], key_y[MAX_KEY_BYTES];
 	u8 *cur_key_x, *cur_key_y;
-	unsigned int i;
-	int ret, tuple_size;
 	u8 meta_data_region;
+	u8 tuple_size;
+	int ret;
+	u32 i;
 
 	memset(key_x, 0, sizeof(key_x));
 	memset(key_y, 0, sizeof(key_y));
-- 
cgit v1.2.3-59-g8ed1b


From 0b4bdc55df6163f2861fe935755e892963dc9512 Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Mon, 20 Apr 2020 10:17:32 +0800
Subject: net: hns3: clean up some coding style issue

This patch removes some unnecessary blank lines, redundant
parentheses, and changes one tab to blank in
hclge_dbg_dump_reg_common().

Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c |  2 +-
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c    | 10 +++-------
 2 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index 17228288d4df..cfc9300ff715 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -143,7 +143,7 @@ static void hclge_dbg_dump_reg_common(struct hclge_dev *hdev,
 		return;
 	}
 
-	buf_len	= sizeof(struct hclge_desc) * bd_num;
+	buf_len = sizeof(struct hclge_desc) * bd_num;
 	desc_src = kzalloc(buf_len, GFP_KERNEL);
 	if (!desc_src)
 		return;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 9edee7d94bbf..635aec2ffba4 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -5380,7 +5380,7 @@ static int hclge_fd_check_ext_tuple(struct hclge_dev *hdev,
 				    struct ethtool_rx_flow_spec *fs,
 				    u32 *unused_tuple)
 {
-	if ((fs->flow_type & FLOW_EXT)) {
+	if (fs->flow_type & FLOW_EXT) {
 		if (fs->h_ext.vlan_etype)
 			return -EOPNOTSUPP;
 		if (!fs->h_ext.vlan_tci)
@@ -5401,7 +5401,7 @@ static int hclge_fd_check_ext_tuple(struct hclge_dev *hdev,
 		if (is_zero_ether_addr(fs->h_ext.h_dest))
 			*unused_tuple |= BIT(INNER_DST_MAC);
 		else
-			*unused_tuple &= ~(BIT(INNER_DST_MAC));
+			*unused_tuple &= ~BIT(INNER_DST_MAC);
 	}
 
 	return 0;
@@ -5674,7 +5674,7 @@ static int hclge_fd_get_tuple(struct hclge_dev *hdev,
 		break;
 	}
 
-	if ((fs->flow_type & FLOW_EXT)) {
+	if (fs->flow_type & FLOW_EXT) {
 		rule->tuples.vlan_tag1 = be16_to_cpu(fs->h_ext.vlan_tci);
 		rule->tuples_mask.vlan_tag1 = be16_to_cpu(fs->m_ext.vlan_tci);
 	}
@@ -5785,7 +5785,6 @@ static int hclge_add_fd_entry(struct hnae3_handle *handle,
 	}
 
 	rule->flow_type = fs->flow_type;
-
 	rule->location = fs->location;
 	rule->unused_tuple = unused;
 	rule->vf_id = dst_vport_id;
@@ -6273,7 +6272,6 @@ static int hclge_add_fd_entry_by_arfs(struct hnae3_handle *handle, u16 queue_id,
 	 */
 	if (hdev->fd_active_type == HCLGE_FD_EP_ACTIVE) {
 		spin_unlock_bh(&hdev->fd_rule_lock);
-
 		return -EOPNOTSUPP;
 	}
 
@@ -6287,14 +6285,12 @@ static int hclge_add_fd_entry_by_arfs(struct hnae3_handle *handle, u16 queue_id,
 		bit_id = find_first_zero_bit(hdev->fd_bmap, MAX_FD_FILTER_NUM);
 		if (bit_id >= hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1]) {
 			spin_unlock_bh(&hdev->fd_rule_lock);
-
 			return -ENOSPC;
 		}
 
 		rule = kzalloc(sizeof(*rule), GFP_ATOMIC);
 		if (!rule) {
 			spin_unlock_bh(&hdev->fd_rule_lock);
-
 			return -ENOMEM;
 		}
 
-- 
cgit v1.2.3-59-g8ed1b


From a3ca5e9048be07576259d2ba9628dc8187f7690e Mon Sep 17 00:00:00 2001
From: Guojia Liao <liaoguojia@huawei.com>
Date: Mon, 20 Apr 2020 10:17:33 +0800
Subject: net: hns3: add debug information for flow table when failed

Adds some debug information for failures of processing flow table,
removes the redundant printing when hclge_fd_check_spec() returns
error, and modifies the printing level for FD not enable error.

Signed-off-by: Guojia Liao <liaoguojia@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c    | 45 ++++++++++++++++------
 1 file changed, 34 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 635aec2ffba4..0618f22e6f14 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -5381,22 +5381,32 @@ static int hclge_fd_check_ext_tuple(struct hclge_dev *hdev,
 				    u32 *unused_tuple)
 {
 	if (fs->flow_type & FLOW_EXT) {
-		if (fs->h_ext.vlan_etype)
+		if (fs->h_ext.vlan_etype) {
+			dev_err(&hdev->pdev->dev, "vlan-etype is not supported!\n");
 			return -EOPNOTSUPP;
+		}
+
 		if (!fs->h_ext.vlan_tci)
 			*unused_tuple |= BIT(INNER_VLAN_TAG_FST);
 
 		if (fs->m_ext.vlan_tci &&
-		    be16_to_cpu(fs->h_ext.vlan_tci) >= VLAN_N_VID)
+		    be16_to_cpu(fs->h_ext.vlan_tci) >= VLAN_N_VID) {
+			dev_err(&hdev->pdev->dev,
+				"failed to config vlan_tci, invalid vlan_tci: %u, max is %u.\n",
+				ntohs(fs->h_ext.vlan_tci), VLAN_N_VID - 1);
 			return -EINVAL;
+		}
 	} else {
 		*unused_tuple |= BIT(INNER_VLAN_TAG_FST);
 	}
 
 	if (fs->flow_type & FLOW_MAC_EXT) {
 		if (hdev->fd_cfg.fd_mode !=
-		    HCLGE_FD_MODE_DEPTH_2K_WIDTH_400B_STAGE_1)
+		    HCLGE_FD_MODE_DEPTH_2K_WIDTH_400B_STAGE_1) {
+			dev_err(&hdev->pdev->dev,
+				"FLOW_MAC_EXT is not supported in current fd mode!\n");
 			return -EOPNOTSUPP;
+		}
 
 		if (is_zero_ether_addr(fs->h_ext.h_dest))
 			*unused_tuple |= BIT(INNER_DST_MAC);
@@ -5414,8 +5424,13 @@ static int hclge_fd_check_spec(struct hclge_dev *hdev,
 	u32 flow_type;
 	int ret;
 
-	if (fs->location >= hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1])
+	if (fs->location >= hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1]) {
+		dev_err(&hdev->pdev->dev,
+			"failed to config fd rules, invalid rule location: %u, max is %u\n.",
+			fs->location,
+			hdev->fd_cfg.rule_num[HCLGE_FD_STAGE_1] - 1);
 		return -EINVAL;
+	}
 
 	if ((fs->flow_type & FLOW_EXT) &&
 	    (fs->h_ext.data[0] != 0 || fs->h_ext.data[1] != 0)) {
@@ -5457,11 +5472,18 @@ static int hclge_fd_check_spec(struct hclge_dev *hdev,
 						 unused_tuple);
 		break;
 	default:
+		dev_err(&hdev->pdev->dev,
+			"unsupported protocol type, protocol type = %#x\n",
+			flow_type);
 		return -EOPNOTSUPP;
 	}
 
-	if (ret)
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"failed to check flow union tuple, ret = %d\n",
+			ret);
 		return ret;
+	}
 
 	return hclge_fd_check_ext_tuple(hdev, fs, unused_tuple);
 }
@@ -5729,22 +5751,23 @@ static int hclge_add_fd_entry(struct hnae3_handle *handle,
 	u8 action;
 	int ret;
 
-	if (!hnae3_dev_fd_supported(hdev))
+	if (!hnae3_dev_fd_supported(hdev)) {
+		dev_err(&hdev->pdev->dev,
+			"flow table director is not supported\n");
 		return -EOPNOTSUPP;
+	}
 
 	if (!hdev->fd_en) {
-		dev_warn(&hdev->pdev->dev,
-			 "Please enable flow director first\n");
+		dev_err(&hdev->pdev->dev,
+			"please enable flow director first\n");
 		return -EOPNOTSUPP;
 	}
 
 	fs = (struct ethtool_rx_flow_spec *)&cmd->fs;
 
 	ret = hclge_fd_check_spec(hdev, fs, &unused);
-	if (ret) {
-		dev_err(&hdev->pdev->dev, "Check fd spec failed\n");
+	if (ret)
 		return ret;
-	}
 
 	if (fs->ring_cookie == RX_CLS_FLOW_DISC) {
 		action = HCLGE_FD_ACTION_DROP_PACKET;
-- 
cgit v1.2.3-59-g8ed1b


From 5cb51cfe8ad65117d4404b82fb8531768b149ad9 Mon Sep 17 00:00:00 2001
From: Yufeng Mo <moyufeng@huawei.com>
Date: Mon, 20 Apr 2020 10:17:34 +0800
Subject: net: hns3: add support for dumping MAC reg in debugfs

This patch adds support for dumping MAC reg in debugfs,
which will be helpful for debugging.

Signed-off-by: Yufeng Mo <moyufeng@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c |   2 +-
 .../ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c | 113 +++++++++++++++++++++
 2 files changed, 114 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index e1d88095a77e..c934f328c040 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -270,7 +270,7 @@ static void hns3_dbg_help(struct hnae3_handle *h)
 		" [igu egu <port_id>] [rpu <tc_queue_num>]",
 		HNS3_DBG_BUF_LEN - strlen(printf_buf) - 1);
 	strncat(printf_buf + strlen(printf_buf),
-		" [rtc] [ppp] [rcb] [tqp <queue_num>]]\n",
+		" [rtc] [ppp] [rcb] [tqp <queue_num>] [mac]]\n",
 		HNS3_DBG_BUF_LEN - strlen(printf_buf) - 1);
 	dev_info(&h->pdev->dev, "%s", printf_buf);
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index cfc9300ff715..66c1ad3a156b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -173,6 +173,114 @@ static void hclge_dbg_dump_reg_common(struct hclge_dev *hdev,
 	kfree(desc_src);
 }
 
+static void hclge_dbg_dump_mac_enable_status(struct hclge_dev *hdev)
+{
+	struct hclge_config_mac_mode_cmd *req;
+	struct hclge_desc desc;
+	u32 loop_en;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CONFIG_MAC_MODE, true);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"failed to dump mac enable status, ret = %d\n", ret);
+		return;
+	}
+
+	req = (struct hclge_config_mac_mode_cmd *)desc.data;
+	loop_en = le32_to_cpu(req->txrx_pad_fcs_loop_en);
+
+	dev_info(&hdev->pdev->dev, "config_mac_trans_en: %#x\n",
+		 hnae3_get_bit(loop_en, HCLGE_MAC_TX_EN_B));
+	dev_info(&hdev->pdev->dev, "config_mac_rcv_en: %#x\n",
+		 hnae3_get_bit(loop_en, HCLGE_MAC_RX_EN_B));
+	dev_info(&hdev->pdev->dev, "config_pad_trans_en: %#x\n",
+		 hnae3_get_bit(loop_en, HCLGE_MAC_PAD_TX_B));
+	dev_info(&hdev->pdev->dev, "config_pad_rcv_en: %#x\n",
+		 hnae3_get_bit(loop_en, HCLGE_MAC_PAD_RX_B));
+	dev_info(&hdev->pdev->dev, "config_1588_trans_en: %#x\n",
+		 hnae3_get_bit(loop_en, HCLGE_MAC_1588_TX_B));
+	dev_info(&hdev->pdev->dev, "config_1588_rcv_en: %#x\n",
+		 hnae3_get_bit(loop_en, HCLGE_MAC_1588_RX_B));
+	dev_info(&hdev->pdev->dev, "config_mac_app_loop_en: %#x\n",
+		 hnae3_get_bit(loop_en, HCLGE_MAC_APP_LP_B));
+	dev_info(&hdev->pdev->dev, "config_mac_line_loop_en: %#x\n",
+		 hnae3_get_bit(loop_en, HCLGE_MAC_LINE_LP_B));
+	dev_info(&hdev->pdev->dev, "config_mac_fcs_tx_en: %#x\n",
+		 hnae3_get_bit(loop_en, HCLGE_MAC_FCS_TX_B));
+	dev_info(&hdev->pdev->dev, "config_mac_rx_oversize_truncate_en: %#x\n",
+		 hnae3_get_bit(loop_en, HCLGE_MAC_RX_OVERSIZE_TRUNCATE_B));
+	dev_info(&hdev->pdev->dev, "config_mac_rx_fcs_strip_en: %#x\n",
+		 hnae3_get_bit(loop_en, HCLGE_MAC_RX_FCS_STRIP_B));
+	dev_info(&hdev->pdev->dev, "config_mac_rx_fcs_en: %#x\n",
+		 hnae3_get_bit(loop_en, HCLGE_MAC_RX_FCS_B));
+	dev_info(&hdev->pdev->dev, "config_mac_tx_under_min_err_en: %#x\n",
+		 hnae3_get_bit(loop_en, HCLGE_MAC_TX_UNDER_MIN_ERR_B));
+	dev_info(&hdev->pdev->dev, "config_mac_tx_oversize_truncate_en: %#x\n",
+		 hnae3_get_bit(loop_en, HCLGE_MAC_TX_OVERSIZE_TRUNCATE_B));
+}
+
+static void hclge_dbg_dump_mac_frame_size(struct hclge_dev *hdev)
+{
+	struct hclge_config_max_frm_size_cmd *req;
+	struct hclge_desc desc;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CONFIG_MAX_FRM_SIZE, true);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"failed to dump mac frame size, ret = %d\n", ret);
+		return;
+	}
+
+	req = (struct hclge_config_max_frm_size_cmd *)desc.data;
+
+	dev_info(&hdev->pdev->dev, "max_frame_size: %u\n",
+		 le16_to_cpu(req->max_frm_size));
+	dev_info(&hdev->pdev->dev, "min_frame_size: %u\n", req->min_frm_size);
+}
+
+static void hclge_dbg_dump_mac_speed_duplex(struct hclge_dev *hdev)
+{
+#define HCLGE_MAC_SPEED_SHIFT	0
+#define HCLGE_MAC_SPEED_MASK	GENMASK(5, 0)
+#define HCLGE_MAC_DUPLEX_SHIFT	7
+
+	struct hclge_config_mac_speed_dup_cmd *req;
+	struct hclge_desc desc;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_CONFIG_SPEED_DUP, true);
+
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"failed to dump mac speed duplex, ret = %d\n", ret);
+		return;
+	}
+
+	req = (struct hclge_config_mac_speed_dup_cmd *)desc.data;
+
+	dev_info(&hdev->pdev->dev, "speed: %#lx\n",
+		 hnae3_get_field(req->speed_dup, HCLGE_MAC_SPEED_MASK,
+				 HCLGE_MAC_SPEED_SHIFT));
+	dev_info(&hdev->pdev->dev, "duplex: %#x\n",
+		 hnae3_get_bit(req->speed_dup, HCLGE_MAC_DUPLEX_SHIFT));
+}
+
+static void hclge_dbg_dump_mac(struct hclge_dev *hdev)
+{
+	hclge_dbg_dump_mac_enable_status(hdev);
+
+	hclge_dbg_dump_mac_frame_size(hdev);
+
+	hclge_dbg_dump_mac_speed_duplex(hdev);
+}
+
 static void hclge_dbg_dump_dcb(struct hclge_dev *hdev, const char *cmd_buf)
 {
 	struct device *dev = &hdev->pdev->dev;
@@ -304,6 +412,11 @@ static void hclge_dbg_dump_reg_cmd(struct hclge_dev *hdev, const char *cmd_buf)
 		}
 	}
 
+	if (strncmp(cmd_buf, "mac", strlen("mac")) == 0) {
+		hclge_dbg_dump_mac(hdev);
+		has_dump = true;
+	}
+
 	if (strncmp(cmd_buf, "dcb", 3) == 0) {
 		hclge_dbg_dump_dcb(hdev, &cmd_buf[sizeof("dcb")]);
 		has_dump = true;
-- 
cgit v1.2.3-59-g8ed1b


From d8355240cf8fb8b9e002b5c8458578435cea85c2 Mon Sep 17 00:00:00 2001
From: Yufeng Mo <moyufeng@huawei.com>
Date: Mon, 20 Apr 2020 10:17:35 +0800
Subject: net: hns3: add trace event support for PF/VF mailbox

This patch adds trace event support for PF/VF mailbox.

Signed-off-by: Yufeng Mo <moyufeng@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/hisilicon/hns3/hns3pf/Makefile    |  1 +
 .../net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c |  7 ++
 .../ethernet/hisilicon/hns3/hns3pf/hclge_trace.h   | 87 ++++++++++++++++++++++
 .../net/ethernet/hisilicon/hns3/hns3vf/Makefile    |  1 +
 .../ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c   |  7 ++
 .../ethernet/hisilicon/hns3/hns3vf/hclgevf_trace.h | 87 ++++++++++++++++++++++
 6 files changed, 190 insertions(+)
 create mode 100644 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_trace.h
 create mode 100644 drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_trace.h

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile
index 0fb61d440d3b..6c28c8f6292c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/Makefile
@@ -4,6 +4,7 @@
 #
 
 ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3
+ccflags-y += -I $(srctree)/$(src)
 
 obj-$(CONFIG_HNS3_HCLGE) += hclge.o
 hclge-objs = hclge_main.o hclge_cmd.o hclge_mdio.o hclge_tm.o hclge_mbx.o hclge_err.o  hclge_debugfs.o
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index 7f24fcb4f96a..103c2ec777b0 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -5,6 +5,9 @@
 #include "hclge_mbx.h"
 #include "hnae3.h"
 
+#define CREATE_TRACE_POINTS
+#include "hclge_trace.h"
+
 static u16 hclge_errno_to_resp(int errno)
 {
 	return abs(errno);
@@ -90,6 +93,8 @@ static int hclge_send_mbx_msg(struct hclge_vport *vport, u8 *msg, u16 msg_len,
 
 	memcpy(&resp_pf_to_vf->msg.vf_mbx_msg_code, msg, msg_len);
 
+	trace_hclge_pf_mbx_send(hdev, resp_pf_to_vf);
+
 	status = hclge_cmd_send(&hdev->hw, &desc, 1);
 	if (status)
 		dev_err(&hdev->pdev->dev,
@@ -674,6 +679,8 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
 
 		vport = &hdev->vport[req->mbx_src_vfid];
 
+		trace_hclge_pf_mbx_get(hdev, req);
+
 		switch (req->msg.code) {
 		case HCLGE_MBX_MAP_RING_TO_VECTOR:
 			ret = hclge_map_unmap_ring_to_vf_vector(vport, true,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_trace.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_trace.h
new file mode 100644
index 000000000000..5b0b71bd6120
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_trace.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/* Copyright (c) 2018-2020 Hisilicon Limited. */
+
+/* This must be outside ifdef _HCLGE_TRACE_H */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM hns3
+
+#if !defined(_HCLGE_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _HCLGE_TRACE_H_
+
+#include <linux/tracepoint.h>
+
+#define PF_GET_MBX_LEN	(sizeof(struct hclge_mbx_vf_to_pf_cmd) / sizeof(u32))
+#define PF_SEND_MBX_LEN	(sizeof(struct hclge_mbx_pf_to_vf_cmd) / sizeof(u32))
+
+TRACE_EVENT(hclge_pf_mbx_get,
+	TP_PROTO(
+		struct hclge_dev *hdev,
+		struct hclge_mbx_vf_to_pf_cmd *req),
+	TP_ARGS(hdev, req),
+
+	TP_STRUCT__entry(
+		__field(u8, vfid)
+		__field(u8, code)
+		__field(u8, subcode)
+		__string(pciname, pci_name(hdev->pdev))
+		__string(devname, &hdev->vport[0].nic.kinfo.netdev->name)
+		__array(u32, mbx_data, PF_GET_MBX_LEN)
+	),
+
+	TP_fast_assign(
+		__entry->vfid = req->mbx_src_vfid;
+		__entry->code = req->msg.code;
+		__entry->subcode = req->msg.subcode;
+		__assign_str(pciname, pci_name(hdev->pdev));
+		__assign_str(devname, &hdev->vport[0].nic.kinfo.netdev->name);
+		memcpy(__entry->mbx_data, req,
+		       sizeof(struct hclge_mbx_vf_to_pf_cmd));
+	),
+
+	TP_printk(
+		"%s %s vfid:%u code:%u subcode:%u data:%s",
+		__get_str(pciname), __get_str(devname), __entry->vfid,
+		__entry->code, __entry->subcode,
+		__print_array(__entry->mbx_data, PF_GET_MBX_LEN, sizeof(u32))
+	)
+);
+
+TRACE_EVENT(hclge_pf_mbx_send,
+	TP_PROTO(
+		struct hclge_dev *hdev,
+		struct hclge_mbx_pf_to_vf_cmd *req),
+	TP_ARGS(hdev, req),
+
+	TP_STRUCT__entry(
+		__field(u8, vfid)
+		__field(u16, code)
+		__string(pciname, pci_name(hdev->pdev))
+		__string(devname, &hdev->vport[0].nic.kinfo.netdev->name)
+		__array(u32, mbx_data, PF_SEND_MBX_LEN)
+	),
+
+	TP_fast_assign(
+		__entry->vfid = req->dest_vfid;
+		__entry->code = req->msg.code;
+		__assign_str(pciname, pci_name(hdev->pdev));
+		__assign_str(devname, &hdev->vport[0].nic.kinfo.netdev->name);
+		memcpy(__entry->mbx_data, req,
+		       sizeof(struct hclge_mbx_pf_to_vf_cmd));
+	),
+
+	TP_printk(
+		"%s %s vfid:%u code:%u data:%s",
+		__get_str(pciname), __get_str(devname), __entry->vfid,
+		__entry->code,
+		__print_array(__entry->mbx_data, PF_SEND_MBX_LEN, sizeof(u32))
+	)
+);
+
+#endif /* _HCLGE_TRACE_H_ */
+
+/* This must be outside ifdef _HCLGE_TRACE_H */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE hclge_trace
+#include <trace/define_trace.h>
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile b/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile
index 53804d95ea90..2c26ea607a53 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/Makefile
@@ -4,6 +4,7 @@
 #
 
 ccflags-y := -I $(srctree)/drivers/net/ethernet/hisilicon/hns3
+ccflags-y += -I $(srctree)/$(src)
 
 obj-$(CONFIG_HNS3_HCLGEVF) += hclgevf.o
 hclgevf-objs = hclgevf_main.o hclgevf_cmd.o hclgevf_mbx.o
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
index 9b8154955f91..5b2dcd97c107 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_mbx.c
@@ -5,6 +5,9 @@
 #include "hclgevf_main.h"
 #include "hnae3.h"
 
+#define CREATE_TRACE_POINTS
+#include "hclgevf_trace.h"
+
 static int hclgevf_resp_to_errno(u16 resp_code)
 {
 	return resp_code ? -resp_code : 0;
@@ -106,6 +109,8 @@ int hclgevf_send_mbx_msg(struct hclgevf_dev *hdev,
 
 	memcpy(&req->msg, send_msg, sizeof(struct hclge_vf_to_pf_msg));
 
+	trace_hclge_vf_mbx_send(hdev, req);
+
 	/* synchronous send */
 	if (need_resp) {
 		mutex_lock(&hdev->mbx_resp.mbx_mutex);
@@ -179,6 +184,8 @@ void hclgevf_mbx_handler(struct hclgevf_dev *hdev)
 			continue;
 		}
 
+		trace_hclge_vf_mbx_get(hdev, req);
+
 		/* synchronous messages are time critical and need preferential
 		 * treatment. Therefore, we need to acknowledge all the sync
 		 * responses as quickly as possible so that waiting tasks do not
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_trace.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_trace.h
new file mode 100644
index 000000000000..e4bfb6191fef
--- /dev/null
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_trace.h
@@ -0,0 +1,87 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/* Copyright (c) 2018-2019 Hisilicon Limited. */
+
+/* This must be outside ifdef _HCLGEVF_TRACE_H */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM hns3
+
+#if !defined(_HCLGEVF_TRACE_H_) || defined(TRACE_HEADER_MULTI_READ)
+#define _HCLGEVF_TRACE_H_
+
+#include <linux/tracepoint.h>
+
+#define VF_GET_MBX_LEN	(sizeof(struct hclge_mbx_pf_to_vf_cmd) / sizeof(u32))
+#define VF_SEND_MBX_LEN	(sizeof(struct hclge_mbx_vf_to_pf_cmd) / sizeof(u32))
+
+TRACE_EVENT(hclge_vf_mbx_get,
+	TP_PROTO(
+		struct hclgevf_dev *hdev,
+		struct hclge_mbx_pf_to_vf_cmd *req),
+	TP_ARGS(hdev, req),
+
+	TP_STRUCT__entry(
+		__field(u8, vfid)
+		__field(u16, code)
+		__string(pciname, pci_name(hdev->pdev))
+		__string(devname, &hdev->nic.kinfo.netdev->name)
+		__array(u32, mbx_data, VF_GET_MBX_LEN)
+	),
+
+	TP_fast_assign(
+		__entry->vfid = req->dest_vfid;
+		__entry->code = req->msg.code;
+		__assign_str(pciname, pci_name(hdev->pdev));
+		__assign_str(devname, &hdev->nic.kinfo.netdev->name);
+		memcpy(__entry->mbx_data, req,
+		       sizeof(struct hclge_mbx_pf_to_vf_cmd));
+	),
+
+	TP_printk(
+		"%s %s vfid:%u code:%u data:%s",
+		__get_str(pciname), __get_str(devname), __entry->vfid,
+		__entry->code,
+		__print_array(__entry->mbx_data, VF_GET_MBX_LEN, sizeof(u32))
+	)
+);
+
+TRACE_EVENT(hclge_vf_mbx_send,
+	TP_PROTO(
+		struct hclgevf_dev *hdev,
+		struct hclge_mbx_vf_to_pf_cmd *req),
+	TP_ARGS(hdev, req),
+
+	TP_STRUCT__entry(
+		__field(u8, vfid)
+		__field(u8, code)
+		__field(u8, subcode)
+		__string(pciname, pci_name(hdev->pdev))
+		__string(devname, &hdev->nic.kinfo.netdev->name)
+		__array(u32, mbx_data, VF_SEND_MBX_LEN)
+	),
+
+	TP_fast_assign(
+		__entry->vfid = req->mbx_src_vfid;
+		__entry->code = req->msg.code;
+		__entry->subcode = req->msg.subcode;
+		__assign_str(pciname, pci_name(hdev->pdev));
+		__assign_str(devname, &hdev->nic.kinfo.netdev->name);
+		memcpy(__entry->mbx_data, req,
+		       sizeof(struct hclge_mbx_vf_to_pf_cmd));
+	),
+
+	TP_printk(
+		"%s %s vfid:%u code:%u subcode:%u data:%s",
+		__get_str(pciname), __get_str(devname), __entry->vfid,
+		__entry->code, __entry->subcode,
+		__print_array(__entry->mbx_data, VF_SEND_MBX_LEN, sizeof(u32))
+	)
+);
+
+#endif /* _HCLGEVF_TRACE_H_ */
+
+/* This must be outside ifdef _HCLGEVF_TRACE_H */
+#undef TRACE_INCLUDE_PATH
+#define TRACE_INCLUDE_PATH .
+#undef TRACE_INCLUDE_FILE
+#define TRACE_INCLUDE_FILE hclgevf_trace
+#include <trace/define_trace.h>
-- 
cgit v1.2.3-59-g8ed1b


From 82ebc889091a488b4dd95e682b3c3b889a50713c Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Mon, 20 Apr 2020 12:27:20 +0800
Subject: qed: use true,false for bool variables
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix the following coccicheck warning:

drivers/net/ethernet/qlogic/qed/qed_dev.c:4395:2-34: WARNING:
Assignment of 0/1 to bool variable
drivers/net/ethernet/qlogic/qed/qed_dev.c:1975:2-34: WARNING:
Assignment of 0/1 to bool variable

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Acked-by: Michal Kalderon <michal.kalderon@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_dev.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 38a65b984e47..7119a18af19e 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -1972,7 +1972,7 @@ static int qed_init_qm_sanity(struct qed_hwfn *p_hwfn)
 		return 0;
 
 	if (QED_IS_ROCE_PERSONALITY(p_hwfn)) {
-		p_hwfn->hw_info.multi_tc_roce_en = 0;
+		p_hwfn->hw_info.multi_tc_roce_en = false;
 		DP_NOTICE(p_hwfn,
 			  "multi-tc roce was disabled to reduce requested amount of pqs\n");
 		if (qed_init_qm_get_num_pqs(p_hwfn) <= RESC_NUM(p_hwfn, QED_PQ))
@@ -4392,7 +4392,7 @@ qed_get_hw_info(struct qed_hwfn *p_hwfn,
 	}
 
 	if (QED_IS_ROCE_PERSONALITY(p_hwfn))
-		p_hwfn->hw_info.multi_tc_roce_en = 1;
+		p_hwfn->hw_info.multi_tc_roce_en = true;
 
 	p_hwfn->hw_info.num_hw_tc = NUM_PHYS_TCS_4PORT_K2;
 	p_hwfn->hw_info.num_active_tc = 1;
-- 
cgit v1.2.3-59-g8ed1b


From 8c8eea07c1fd46455b3a275a03c27326ddc42b20 Mon Sep 17 00:00:00 2001
From: Raed Salem <raeds@mellanox.com>
Date: Wed, 23 Oct 2019 16:03:12 +0300
Subject: net/mlx5: Use the correct IPsec capability function for FPGA ops

Currently the IPsec acceleration capability function is also used
at IPsec fpga capable device code.

This could cause a future bug as the acceleration layer is agnostic
to the device implementing its API.

Fix by using the IPsec FPGA capability function instead of acceleration
layer capability function in case of FPGA IPsec only related operations.

Downstream patches will add support for Connect-X IPsec, this can avoid
a future bug.

Signed-off-by: Raed Salem <raeds@mellanox.com>
Reviewed-by: Boris Pismenny <borisp@mellanox.com>
Reviewed-by: Huy Nguyen <huyn@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h | 15 ++++++++++++++-
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c    |  5 +++--
 3 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
index b794888fa3ba..c8736b6b4172 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
@@ -602,7 +602,7 @@ static bool mlx5_is_fpga_ipsec_rule(struct mlx5_core_dev *dev,
 				    const u32 *match_c,
 				    const u32 *match_v)
 {
-	u32 ipsec_dev_caps = mlx5_accel_ipsec_device_caps(dev);
+	u32 ipsec_dev_caps = mlx5_fpga_ipsec_device_caps(dev);
 	bool ipv6_flow;
 
 	ipv6_flow = mlx5_fs_is_outer_ipv6_flow(dev, match_c, match_v);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
index 382985e65b48..d01b1fc8e11b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
@@ -37,6 +37,7 @@
 #include "accel/ipsec.h"
 #include "fs_cmd.h"
 
+#ifdef CONFIG_MLX5_FPGA_IPSEC
 u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev);
 unsigned int mlx5_fpga_ipsec_counters_count(struct mlx5_core_dev *mdev);
 int mlx5_fpga_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
@@ -63,5 +64,17 @@ int mlx5_fpga_esp_modify_xfrm(struct mlx5_accel_esp_xfrm *xfrm,
 
 const struct mlx5_flow_cmds *
 mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type);
+#else
+static inline u32 mlx5_fpga_ipsec_device_caps(struct mlx5_core_dev *mdev)
+{
+	return 0;
+}
 
-#endif	/* __MLX5_FPGA_SADB_H__ */
+static inline const struct mlx5_flow_cmds *
+mlx5_fs_cmd_get_default_ipsec_fpga_cmds(enum fs_flow_table_type type)
+{
+	return mlx5_fs_cmd_get_default(type);
+}
+
+#endif /* CONFIG_MLX5_FPGA_IPSEC */
+#endif	/* __MLX5_FPGA_IPSEC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index d5defe09339a..2da45e9b9b6d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -2359,7 +2359,7 @@ static struct mlx5_flow_root_namespace
 	struct mlx5_flow_root_namespace *root_ns;
 	struct mlx5_flow_namespace *ns;
 
-	if (mlx5_accel_ipsec_device_caps(steering->dev) & MLX5_ACCEL_IPSEC_CAP_DEVICE &&
+	if (mlx5_fpga_ipsec_device_caps(steering->dev) & MLX5_ACCEL_IPSEC_CAP_DEVICE &&
 	    (table_type == FS_FT_NIC_RX || table_type == FS_FT_NIC_TX))
 		cmds = mlx5_fs_cmd_get_default_ipsec_fpga_cmds(table_type);
 
@@ -2943,7 +2943,8 @@ int mlx5_init_fs(struct mlx5_core_dev *dev)
 			goto err;
 	}
 
-	if (MLX5_IPSEC_DEV(dev) || MLX5_CAP_FLOWTABLE_NIC_TX(dev, ft_support)) {
+	if (mlx5_fpga_ipsec_device_caps(steering->dev) & MLX5_ACCEL_IPSEC_CAP_DEVICE ||
+	    MLX5_CAP_FLOWTABLE_NIC_TX(dev, ft_support)) {
 		err = init_egress_root_ns(steering);
 		if (err)
 			goto err;
-- 
cgit v1.2.3-59-g8ed1b


From 9425c595bd513948537ef355c07a65595dd2c771 Mon Sep 17 00:00:00 2001
From: Raed Salem <raeds@mellanox.com>
Date: Mon, 20 Jan 2020 15:03:00 +0200
Subject: net/mlx5e: en_accel, Add missing net/geneve.h include

The cited commit relies on include <net/geneve.h> being included
implicitly prior to include "en_accel/en_accel.h".
This mandates that all files that needs to include en_accel.h
to redantantly include net/geneve.h.

Include net/geneve.h explicitly at "en_accel/en_accel.h" to avoid
undesired constrain as above.

Fixes: e3cfc7e6b7bd ("net/mlx5e: TX, Add geneve tunnel stateless offload support")
Signed-off-by: Raed Salem <raeds@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
index 3022463f2284..a6f65d4b2f36 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
@@ -42,6 +42,8 @@
 #include "en/txrx.h"
 
 #if IS_ENABLED(CONFIG_GENEVE)
+#include <net/geneve.h>
+
 static inline bool mlx5_geneve_tx_allowed(struct mlx5_core_dev *mdev)
 {
 	return mlx5_tx_swp_supported(mdev);
-- 
cgit v1.2.3-59-g8ed1b


From 1dbd51d0a71a561056579e2d4f406e5ce5343af0 Mon Sep 17 00:00:00 2001
From: Raed Salem <raeds@mellanox.com>
Date: Tue, 10 Dec 2019 13:20:55 +0200
Subject: net/mlx5: Refactor mlx5_accel_esp_create_hw_context parameter list

Currently the FPGA IPsec is the only hw implementation of the IPsec
acceleration api, and so the mlx5_accel_esp_create_hw_context was
wrongly made to suit this HW api, among other in its parameter list
and some of its parameter endianness.

This implementation might not be suitable for different HW.

Refactor by group and pass all function arguments of
mlx5_accel_esp_create_hw_context in common mlx5_accel_esp_xfrm_attrs
struct field of mlx5_accel_esp_xfrm struct and correct the endianness
according to the HW being called.

Signed-off-by: Raed Salem <raeds@mellanox.com>
Reviewed-by: Boris Pismenny <borisp@mellanox.com>
Reviewed-by: Huy Nguyen <huyn@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/accel/ipsec.c  | 20 +++++++++++++------
 .../net/ethernet/mellanox/mlx5/core/accel/ipsec.h  | 10 ++--------
 .../ethernet/mellanox/mlx5/core/en_accel/ipsec.c   | 23 +++++++++-------------
 include/linux/mlx5/accel.h                         | 12 +++++++++++
 4 files changed, 37 insertions(+), 28 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
index eddc34e4a762..a92cd88d369c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
@@ -57,13 +57,21 @@ int mlx5_accel_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
 }
 
 void *mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev,
-				       struct mlx5_accel_esp_xfrm *xfrm,
-				       const __be32 saddr[4],
-				       const __be32 daddr[4],
-				       const __be32 spi, bool is_ipv6)
+				       struct mlx5_accel_esp_xfrm *xfrm)
 {
-	return mlx5_fpga_ipsec_create_sa_ctx(mdev, xfrm, saddr, daddr,
-					     spi, is_ipv6);
+	__be32 saddr[4] = {}, daddr[4] = {};
+
+	if (!xfrm->attrs.is_ipv6) {
+		saddr[3] = xfrm->attrs.saddr.a4;
+		daddr[3] = xfrm->attrs.daddr.a4;
+	} else {
+		memcpy(saddr, xfrm->attrs.saddr.a6, sizeof(saddr));
+		memcpy(daddr, xfrm->attrs.daddr.a6, sizeof(daddr));
+	}
+
+	return mlx5_fpga_ipsec_create_sa_ctx(mdev, xfrm, saddr,
+					     daddr, xfrm->attrs.spi,
+					     xfrm->attrs.is_ipv6);
 }
 
 void mlx5_accel_esp_free_hw_context(void *context)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
index 530e428d46ab..f9b8e2a041c1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
@@ -47,10 +47,7 @@ int mlx5_accel_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
 				   unsigned int count);
 
 void *mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev,
-				       struct mlx5_accel_esp_xfrm *xfrm,
-				       const __be32 saddr[4],
-				       const __be32 daddr[4],
-				       const __be32 spi, bool is_ipv6);
+				       struct mlx5_accel_esp_xfrm *xfrm);
 void mlx5_accel_esp_free_hw_context(void *context);
 
 int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev);
@@ -63,10 +60,7 @@ void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev);
 
 static inline void *
 mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev,
-				 struct mlx5_accel_esp_xfrm *xfrm,
-				 const __be32 saddr[4],
-				 const __be32 daddr[4],
-				 const __be32 spi, bool is_ipv6)
+				 struct mlx5_accel_esp_xfrm *xfrm)
 {
 	return NULL;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
index 29626c6c9c25..9e6c2216c93e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -199,6 +199,14 @@ mlx5e_ipsec_build_accel_xfrm_attrs(struct mlx5e_ipsec_sa_entry *sa_entry,
 	attrs->flags |= (x->props.mode == XFRM_MODE_TRANSPORT) ?
 			MLX5_ACCEL_ESP_FLAGS_TRANSPORT :
 			MLX5_ACCEL_ESP_FLAGS_TUNNEL;
+
+	/* spi */
+	attrs->spi = x->id.spi;
+
+	/* source , destination ips */
+	memcpy(&attrs->saddr, x->props.saddr.a6, sizeof(attrs->saddr));
+	memcpy(&attrs->daddr, x->id.daddr.a6, sizeof(attrs->daddr));
+	attrs->is_ipv6 = (x->props.family != AF_INET);
 }
 
 static inline int mlx5e_xfrm_validate_state(struct xfrm_state *x)
@@ -284,8 +292,6 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x)
 	struct net_device *netdev = x->xso.dev;
 	struct mlx5_accel_esp_xfrm_attrs attrs;
 	struct mlx5e_priv *priv;
-	__be32 saddr[4] = {0}, daddr[4] = {0}, spi;
-	bool is_ipv6 = false;
 	int err;
 
 	priv = netdev_priv(netdev);
@@ -331,20 +337,9 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x)
 	}
 
 	/* create hw context */
-	if (x->props.family == AF_INET) {
-		saddr[3] = x->props.saddr.a4;
-		daddr[3] = x->id.daddr.a4;
-	} else {
-		memcpy(saddr, x->props.saddr.a6, sizeof(saddr));
-		memcpy(daddr, x->id.daddr.a6, sizeof(daddr));
-		is_ipv6 = true;
-	}
-	spi = x->id.spi;
 	sa_entry->hw_context =
 			mlx5_accel_esp_create_hw_context(priv->mdev,
-							 sa_entry->xfrm,
-							 saddr, daddr, spi,
-							 is_ipv6);
+							 sa_entry->xfrm);
 	if (IS_ERR(sa_entry->hw_context)) {
 		err = PTR_ERR(sa_entry->hw_context);
 		goto err_xfrm;
diff --git a/include/linux/mlx5/accel.h b/include/linux/mlx5/accel.h
index 5613e677a5f9..b919d143a9a6 100644
--- a/include/linux/mlx5/accel.h
+++ b/include/linux/mlx5/accel.h
@@ -92,6 +92,18 @@ struct mlx5_accel_esp_xfrm_attrs {
 	union {
 		struct aes_gcm_keymat aes_gcm;
 	} keymat;
+
+	union {
+		__be32 a4;
+		__be32 a6[4];
+	} saddr;
+
+	union {
+		__be32 a4;
+		__be32 a6[4];
+	} daddr;
+
+	u8 is_ipv6;
 };
 
 struct mlx5_accel_esp_xfrm {
-- 
cgit v1.2.3-59-g8ed1b


From 0aab3e1b04aeeb5682c1ae7c862f107334ab79c0 Mon Sep 17 00:00:00 2001
From: Raed Salem <raeds@mellanox.com>
Date: Wed, 8 Jan 2020 11:48:37 +0200
Subject: net/mlx5e: IPSec, Expose IPsec HW stat only for supporting HW

The current HW counters are supported only by Innova, split the ipsec
stats group into two groups, one for HW and one for SW. And expose
the HW counters to ethtool only if Innova HW is used for IPsec offload.

Signed-off-by: Raed Salem <raeds@mellanox.com>
Reviewed-by: Huy Nguyen <huyn@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../ethernet/mellanox/mlx5/core/en_accel/ipsec.h   | 25 ------
 .../mellanox/mlx5/core/en_accel/ipsec_stats.c      | 88 +++++++++++++---------
 drivers/net/ethernet/mellanox/mlx5/core/en_stats.c | 29 ++-----
 drivers/net/ethernet/mellanox/mlx5/core/en_stats.h |  2 +
 4 files changed, 58 insertions(+), 86 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
index 93bf10e6508c..c85151a1e008 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.h
@@ -109,11 +109,6 @@ int mlx5e_ipsec_init(struct mlx5e_priv *priv);
 void mlx5e_ipsec_cleanup(struct mlx5e_priv *priv);
 void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv);
 
-int mlx5e_ipsec_get_count(struct mlx5e_priv *priv);
-int mlx5e_ipsec_get_strings(struct mlx5e_priv *priv, uint8_t *data);
-void mlx5e_ipsec_update_stats(struct mlx5e_priv *priv);
-int mlx5e_ipsec_get_stats(struct mlx5e_priv *priv, u64 *data);
-
 struct xfrm_state *mlx5e_ipsec_sadb_rx_lookup(struct mlx5e_ipsec *dev,
 					      unsigned int handle);
 
@@ -136,26 +131,6 @@ static inline void mlx5e_ipsec_build_netdev(struct mlx5e_priv *priv)
 {
 }
 
-static inline int mlx5e_ipsec_get_count(struct mlx5e_priv *priv)
-{
-	return 0;
-}
-
-static inline int mlx5e_ipsec_get_strings(struct mlx5e_priv *priv,
-					  uint8_t *data)
-{
-	return 0;
-}
-
-static inline void mlx5e_ipsec_update_stats(struct mlx5e_priv *priv)
-{
-}
-
-static inline int mlx5e_ipsec_get_stats(struct mlx5e_priv *priv, u64 *data)
-{
-	return 0;
-}
-
 #endif
 
 #endif	/* __MLX5E_IPSEC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c
index 6fea59223dc4..6c5c54bcd9be 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_stats.c
@@ -38,6 +38,7 @@
 #include "accel/ipsec.h"
 #include "fpga/sdk.h"
 #include "en_accel/ipsec.h"
+#include "fpga/ipsec.h"
 
 static const struct counter_desc mlx5e_ipsec_hw_stats_desc[] = {
 	{ MLX5E_DECLARE_STAT(struct mlx5e_ipsec_stats, ipsec_dec_in_packets) },
@@ -73,61 +74,74 @@ static const struct counter_desc mlx5e_ipsec_sw_stats_desc[] = {
 #define NUM_IPSEC_HW_COUNTERS ARRAY_SIZE(mlx5e_ipsec_hw_stats_desc)
 #define NUM_IPSEC_SW_COUNTERS ARRAY_SIZE(mlx5e_ipsec_sw_stats_desc)
 
-#define NUM_IPSEC_COUNTERS (NUM_IPSEC_HW_COUNTERS + NUM_IPSEC_SW_COUNTERS)
-
-int mlx5e_ipsec_get_count(struct mlx5e_priv *priv)
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(ipsec_sw)
 {
-	if (!priv->ipsec)
-		return 0;
-
-	return NUM_IPSEC_COUNTERS;
+	return NUM_IPSEC_SW_COUNTERS;
 }
 
-int mlx5e_ipsec_get_strings(struct mlx5e_priv *priv, uint8_t *data)
-{
-	unsigned int i, idx = 0;
+static inline MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(ipsec_sw) {}
 
-	if (!priv->ipsec)
-		return 0;
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(ipsec_sw)
+{
+	unsigned int i;
 
-	for (i = 0; i < NUM_IPSEC_HW_COUNTERS; i++)
-		strcpy(data + (idx++) * ETH_GSTRING_LEN,
-		       mlx5e_ipsec_hw_stats_desc[i].format);
+	if (priv->ipsec)
+		for (i = 0; i < NUM_IPSEC_SW_COUNTERS; i++)
+			strcpy(data + (idx++) * ETH_GSTRING_LEN,
+			       mlx5e_ipsec_sw_stats_desc[i].format);
+	return idx;
+}
 
-	for (i = 0; i < NUM_IPSEC_SW_COUNTERS; i++)
-		strcpy(data + (idx++) * ETH_GSTRING_LEN,
-		       mlx5e_ipsec_sw_stats_desc[i].format);
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(ipsec_sw)
+{
+	int i;
 
-	return NUM_IPSEC_COUNTERS;
+	if (priv->ipsec)
+		for (i = 0; i < NUM_IPSEC_SW_COUNTERS; i++)
+			data[idx++] = MLX5E_READ_CTR_ATOMIC64(&priv->ipsec->sw_stats,
+							      mlx5e_ipsec_sw_stats_desc, i);
+	return idx;
 }
 
-void mlx5e_ipsec_update_stats(struct mlx5e_priv *priv)
+static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(ipsec_hw)
 {
-	int ret;
+	return (mlx5_fpga_ipsec_device_caps(priv->mdev)) ? NUM_IPSEC_HW_COUNTERS : 0;
+}
 
-	if (!priv->ipsec)
-		return;
+static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(ipsec_hw)
+{
+	int ret = 0;
 
-	ret = mlx5_accel_ipsec_counters_read(priv->mdev, (u64 *)&priv->ipsec->stats,
-					     NUM_IPSEC_HW_COUNTERS);
+	if (priv->ipsec)
+		ret = mlx5_accel_ipsec_counters_read(priv->mdev, (u64 *)&priv->ipsec->stats,
+						     NUM_IPSEC_HW_COUNTERS);
 	if (ret)
 		memset(&priv->ipsec->stats, 0, sizeof(priv->ipsec->stats));
 }
 
-int mlx5e_ipsec_get_stats(struct mlx5e_priv *priv, u64 *data)
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(ipsec_hw)
 {
-	int i, idx = 0;
-
-	if (!priv->ipsec)
-		return 0;
+	unsigned int i;
 
-	for (i = 0; i < NUM_IPSEC_HW_COUNTERS; i++)
-		data[idx++] = MLX5E_READ_CTR64_CPU(&priv->ipsec->stats,
-						   mlx5e_ipsec_hw_stats_desc, i);
+	if (priv->ipsec && mlx5_fpga_ipsec_device_caps(priv->mdev))
+		for (i = 0; i < NUM_IPSEC_HW_COUNTERS; i++)
+			strcpy(data + (idx++) * ETH_GSTRING_LEN,
+			       mlx5e_ipsec_hw_stats_desc[i].format);
 
-	for (i = 0; i < NUM_IPSEC_SW_COUNTERS; i++)
-		data[idx++] = MLX5E_READ_CTR_ATOMIC64(&priv->ipsec->sw_stats,
-						      mlx5e_ipsec_sw_stats_desc, i);
+	return idx;
+}
 
-	return NUM_IPSEC_COUNTERS;
+static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(ipsec_hw)
+{
+	int i;
+
+	if (priv->ipsec && mlx5_fpga_ipsec_device_caps(priv->mdev))
+		for (i = 0; i < NUM_IPSEC_HW_COUNTERS; i++)
+			data[idx++] = MLX5E_READ_CTR64_CPU(&priv->ipsec->stats,
+							   mlx5e_ipsec_hw_stats_desc,
+							   i);
+	return idx;
 }
+
+MLX5E_DEFINE_STATS_GRP(ipsec_sw, 0);
+MLX5E_DEFINE_STATS_GRP(ipsec_hw, 0);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index 30b216d9284c..6eb0e8236bbf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -32,8 +32,8 @@
 
 #include "lib/mlx5.h"
 #include "en.h"
-#include "en_accel/ipsec.h"
 #include "en_accel/tls.h"
+#include "en_accel/en_accel.h"
 
 static unsigned int stats_grps_num(struct mlx5e_priv *priv)
 {
@@ -1424,27 +1424,6 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(pme)
 
 static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(pme) { return; }
 
-static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(ipsec)
-{
-	return mlx5e_ipsec_get_count(priv);
-}
-
-static MLX5E_DECLARE_STATS_GRP_OP_FILL_STRS(ipsec)
-{
-	return idx + mlx5e_ipsec_get_strings(priv,
-					     data + idx * ETH_GSTRING_LEN);
-}
-
-static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(ipsec)
-{
-	return idx + mlx5e_ipsec_get_stats(priv, data + idx);
-}
-
-static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(ipsec)
-{
-	mlx5e_ipsec_update_stats(priv);
-}
-
 static MLX5E_DECLARE_STATS_GRP_OP_NUM_STATS(tls)
 {
 	return mlx5e_tls_get_count(priv);
@@ -1714,7 +1693,6 @@ MLX5E_DEFINE_STATS_GRP(pme, 0);
 MLX5E_DEFINE_STATS_GRP(channels, 0);
 MLX5E_DEFINE_STATS_GRP(per_port_buff_congest, 0);
 MLX5E_DEFINE_STATS_GRP(eth_ext, 0);
-static MLX5E_DEFINE_STATS_GRP(ipsec, 0);
 static MLX5E_DEFINE_STATS_GRP(tls, 0);
 
 /* The stats groups order is opposite to the update_stats() order calls */
@@ -1731,7 +1709,10 @@ mlx5e_stats_grp_t mlx5e_nic_stats_grps[] = {
 	&MLX5E_STATS_GRP(pcie),
 	&MLX5E_STATS_GRP(per_prio),
 	&MLX5E_STATS_GRP(pme),
-	&MLX5E_STATS_GRP(ipsec),
+#ifdef CONFIG_MLX5_EN_IPSEC
+	&MLX5E_STATS_GRP(ipsec_sw),
+	&MLX5E_STATS_GRP(ipsec_hw),
+#endif
 	&MLX5E_STATS_GRP(tls),
 	&MLX5E_STATS_GRP(channels),
 	&MLX5E_STATS_GRP(per_port_buff_congest),
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
index 092b39ffa32a..2b83ba990714 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.h
@@ -390,5 +390,7 @@ extern MLX5E_DECLARE_STATS_GRP(per_prio);
 extern MLX5E_DECLARE_STATS_GRP(pme);
 extern MLX5E_DECLARE_STATS_GRP(channels);
 extern MLX5E_DECLARE_STATS_GRP(per_port_buff_congest);
+extern MLX5E_DECLARE_STATS_GRP(ipsec_hw);
+extern MLX5E_DECLARE_STATS_GRP(ipsec_sw);
 
 #endif /* __MLX5_EN_STATS_H__ */
-- 
cgit v1.2.3-59-g8ed1b


From 7dfee4b1d79e1800818abcfb47747b162c9a2d31 Mon Sep 17 00:00:00 2001
From: Raed Salem <raeds@mellanox.com>
Date: Wed, 23 Oct 2019 17:04:13 +0300
Subject: net/mlx5: IPsec, Refactor SA handle creation and destruction

Currently the SA handle is created and managed as part of the common
code for different IPsec supporting HW, this handle is passed to HW
to be used on Rx to identify the SA handle that was used to
return the xfrm state to stack.

The above implementation pose a limitation on managing this handle.

Refactor by moving management of this field to the specific HW code.

Downstream patches will introduce the Connect-X support for IPsec that
will use this handle differently than current implementation.

Signed-off-by: Raed Salem <raeds@mellanox.com>
Reviewed-by: Boris Pismenny <borisp@mellanox.com>
Reviewed-by: Huy Nguyen <huyn@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/accel/ipsec.c  |  5 +-
 .../net/ethernet/mellanox/mlx5/core/accel/ipsec.h  |  6 +-
 .../ethernet/mellanox/mlx5/core/en_accel/ipsec.c   | 68 +++++++++-------------
 .../net/ethernet/mellanox/mlx5/core/fpga/ipsec.c   | 29 ++++++++-
 .../net/ethernet/mellanox/mlx5/core/fpga/ipsec.h   |  3 +-
 5 files changed, 63 insertions(+), 48 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
index a92cd88d369c..8a4985d8cbfe 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c
@@ -57,7 +57,8 @@ int mlx5_accel_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
 }
 
 void *mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev,
-				       struct mlx5_accel_esp_xfrm *xfrm)
+				       struct mlx5_accel_esp_xfrm *xfrm,
+				       u32 *sa_handle)
 {
 	__be32 saddr[4] = {}, daddr[4] = {};
 
@@ -71,7 +72,7 @@ void *mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev,
 
 	return mlx5_fpga_ipsec_create_sa_ctx(mdev, xfrm, saddr,
 					     daddr, xfrm->attrs.spi,
-					     xfrm->attrs.is_ipv6);
+					     xfrm->attrs.is_ipv6, sa_handle);
 }
 
 void mlx5_accel_esp_free_hw_context(void *context)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
index f9b8e2a041c1..e89747674712 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.h
@@ -47,7 +47,8 @@ int mlx5_accel_ipsec_counters_read(struct mlx5_core_dev *mdev, u64 *counters,
 				   unsigned int count);
 
 void *mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev,
-				       struct mlx5_accel_esp_xfrm *xfrm);
+				       struct mlx5_accel_esp_xfrm *xfrm,
+				       u32 *sa_handle);
 void mlx5_accel_esp_free_hw_context(void *context);
 
 int mlx5_accel_ipsec_init(struct mlx5_core_dev *mdev);
@@ -60,7 +61,8 @@ void mlx5_accel_ipsec_cleanup(struct mlx5_core_dev *mdev);
 
 static inline void *
 mlx5_accel_esp_create_hw_context(struct mlx5_core_dev *mdev,
-				 struct mlx5_accel_esp_xfrm *xfrm)
+				 struct mlx5_accel_esp_xfrm *xfrm,
+				 u32 *sa_handle)
 {
 	return NULL;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
index 9e6c2216c93e..92eb3bad4acd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec.c
@@ -75,18 +75,23 @@ struct xfrm_state *mlx5e_ipsec_sadb_rx_lookup(struct mlx5e_ipsec *ipsec,
 	return ret;
 }
 
-static int mlx5e_ipsec_sadb_rx_add(struct mlx5e_ipsec_sa_entry *sa_entry)
+static int  mlx5e_ipsec_sadb_rx_add(struct mlx5e_ipsec_sa_entry *sa_entry,
+				    unsigned int handle)
 {
 	struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
+	struct mlx5e_ipsec_sa_entry *_sa_entry;
 	unsigned long flags;
-	int ret;
 
-	ret = ida_simple_get(&ipsec->halloc, 1, 0, GFP_KERNEL);
-	if (ret < 0)
-		return ret;
+	rcu_read_lock();
+	hash_for_each_possible_rcu(ipsec->sadb_rx, _sa_entry, hlist, handle)
+		if (_sa_entry->handle == handle) {
+			rcu_read_unlock();
+			return  -EEXIST;
+		}
+	rcu_read_unlock();
 
 	spin_lock_irqsave(&ipsec->sadb_rx_lock, flags);
-	sa_entry->handle = ret;
+	sa_entry->handle = handle;
 	hash_add_rcu(ipsec->sadb_rx, &sa_entry->hlist, sa_entry->handle);
 	spin_unlock_irqrestore(&ipsec->sadb_rx_lock, flags);
 
@@ -103,15 +108,6 @@ static void mlx5e_ipsec_sadb_rx_del(struct mlx5e_ipsec_sa_entry *sa_entry)
 	spin_unlock_irqrestore(&ipsec->sadb_rx_lock, flags);
 }
 
-static void mlx5e_ipsec_sadb_rx_free(struct mlx5e_ipsec_sa_entry *sa_entry)
-{
-	struct mlx5e_ipsec *ipsec = sa_entry->ipsec;
-
-	/* xfrm already doing sync rcu between del and free callbacks */
-
-	ida_simple_remove(&ipsec->halloc, sa_entry->handle);
-}
-
 static bool mlx5e_ipsec_update_esn_state(struct mlx5e_ipsec_sa_entry *sa_entry)
 {
 	struct xfrm_replay_state_esn *replay_esn;
@@ -292,6 +288,7 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x)
 	struct net_device *netdev = x->xso.dev;
 	struct mlx5_accel_esp_xfrm_attrs attrs;
 	struct mlx5e_priv *priv;
+	unsigned int sa_handle;
 	int err;
 
 	priv = netdev_priv(netdev);
@@ -309,20 +306,6 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x)
 	sa_entry->x = x;
 	sa_entry->ipsec = priv->ipsec;
 
-	/* Add the SA to handle processed incoming packets before the add SA
-	 * completion was received
-	 */
-	if (x->xso.flags & XFRM_OFFLOAD_INBOUND) {
-		err = mlx5e_ipsec_sadb_rx_add(sa_entry);
-		if (err) {
-			netdev_info(netdev, "Failed adding to SADB_RX: %d\n", err);
-			goto err_entry;
-		}
-	} else {
-		sa_entry->set_iv_op = (x->props.flags & XFRM_STATE_ESN) ?
-				mlx5e_ipsec_set_iv_esn : mlx5e_ipsec_set_iv;
-	}
-
 	/* check esn */
 	mlx5e_ipsec_update_esn_state(sa_entry);
 
@@ -333,30 +316,38 @@ static int mlx5e_xfrm_add_state(struct xfrm_state *x)
 					   MLX5_ACCEL_XFRM_FLAG_REQUIRE_METADATA);
 	if (IS_ERR(sa_entry->xfrm)) {
 		err = PTR_ERR(sa_entry->xfrm);
-		goto err_sadb_rx;
+		goto err_sa_entry;
 	}
 
 	/* create hw context */
 	sa_entry->hw_context =
 			mlx5_accel_esp_create_hw_context(priv->mdev,
-							 sa_entry->xfrm);
+							 sa_entry->xfrm,
+							 &sa_handle);
 	if (IS_ERR(sa_entry->hw_context)) {
 		err = PTR_ERR(sa_entry->hw_context);
 		goto err_xfrm;
 	}
 
+	if (x->xso.flags & XFRM_OFFLOAD_INBOUND) {
+		err = mlx5e_ipsec_sadb_rx_add(sa_entry, sa_handle);
+		if (err)
+			goto err_hw_ctx;
+	} else {
+		sa_entry->set_iv_op = (x->props.flags & XFRM_STATE_ESN) ?
+				mlx5e_ipsec_set_iv_esn : mlx5e_ipsec_set_iv;
+	}
+
 	x->xso.offload_handle = (unsigned long)sa_entry;
 	goto out;
 
+err_hw_ctx:
+	mlx5_accel_esp_free_hw_context(sa_entry->hw_context);
 err_xfrm:
 	mlx5_accel_esp_destroy_xfrm(sa_entry->xfrm);
-err_sadb_rx:
-	if (x->xso.flags & XFRM_OFFLOAD_INBOUND) {
-		mlx5e_ipsec_sadb_rx_del(sa_entry);
-		mlx5e_ipsec_sadb_rx_free(sa_entry);
-	}
-err_entry:
+err_sa_entry:
 	kfree(sa_entry);
+
 out:
 	return err;
 }
@@ -385,9 +376,6 @@ static void mlx5e_xfrm_free_state(struct xfrm_state *x)
 		mlx5_accel_esp_destroy_xfrm(sa_entry->xfrm);
 	}
 
-	if (x->xso.flags & XFRM_OFFLOAD_INBOUND)
-		mlx5e_ipsec_sadb_rx_free(sa_entry);
-
 	kfree(sa_entry);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
index c8736b6b4172..0604216eb94f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
@@ -65,6 +65,7 @@ struct mlx5_fpga_esp_xfrm;
 struct mlx5_fpga_ipsec_sa_ctx {
 	struct rhash_head		hash;
 	struct mlx5_ifc_fpga_ipsec_sa	hw_sa;
+	u32				sa_handle;
 	struct mlx5_core_dev		*dev;
 	struct mlx5_fpga_esp_xfrm	*fpga_xfrm;
 };
@@ -119,6 +120,8 @@ struct mlx5_fpga_ipsec {
 	 */
 	struct rb_root rules_rb;
 	struct mutex rules_rb_lock; /* rules lock */
+
+	struct ida halloc;
 };
 
 static bool mlx5_fpga_is_ipsec_device(struct mlx5_core_dev *mdev)
@@ -666,7 +669,8 @@ void *mlx5_fpga_ipsec_create_sa_ctx(struct mlx5_core_dev *mdev,
 				    struct mlx5_accel_esp_xfrm *accel_xfrm,
 				    const __be32 saddr[4],
 				    const __be32 daddr[4],
-				    const __be32 spi, bool is_ipv6)
+				    const __be32 spi, bool is_ipv6,
+				    u32 *sa_handle)
 {
 	struct mlx5_fpga_ipsec_sa_ctx *sa_ctx;
 	struct mlx5_fpga_esp_xfrm *fpga_xfrm =
@@ -704,6 +708,17 @@ void *mlx5_fpga_ipsec_create_sa_ctx(struct mlx5_core_dev *mdev,
 		goto exists;
 	}
 
+	if (accel_xfrm->attrs.action & MLX5_ACCEL_ESP_ACTION_DECRYPT) {
+		err = ida_simple_get(&fipsec->halloc, 1, 0, GFP_KERNEL);
+		if (err < 0) {
+			context = ERR_PTR(err);
+			goto exists;
+		}
+
+		sa_ctx->sa_handle = err;
+		if (sa_handle)
+			*sa_handle = sa_ctx->sa_handle;
+	}
 	/* This is unbounded fpga_xfrm, try to add to hash */
 	mutex_lock(&fipsec->sa_hash_lock);
 
@@ -744,7 +759,8 @@ delete_hash:
 				       rhash_sa));
 unlock_hash:
 	mutex_unlock(&fipsec->sa_hash_lock);
-
+	if (accel_xfrm->attrs.action & MLX5_ACCEL_ESP_ACTION_DECRYPT)
+		ida_simple_remove(&fipsec->halloc, sa_ctx->sa_handle);
 exists:
 	mutex_unlock(&fpga_xfrm->lock);
 	kfree(sa_ctx);
@@ -816,7 +832,7 @@ mlx5_fpga_ipsec_fs_create_sa_ctx(struct mlx5_core_dev *mdev,
 	/* create */
 	return mlx5_fpga_ipsec_create_sa_ctx(mdev, accel_xfrm,
 					     saddr, daddr,
-					     spi, is_ipv6);
+					     spi, is_ipv6, NULL);
 }
 
 static void
@@ -836,6 +852,10 @@ mlx5_fpga_ipsec_release_sa_ctx(struct mlx5_fpga_ipsec_sa_ctx *sa_ctx)
 		return;
 	}
 
+	if (sa_ctx->fpga_xfrm->accel_xfrm.attrs.action &
+	    MLX5_ACCEL_ESP_ACTION_DECRYPT)
+		ida_simple_remove(&fipsec->halloc, sa_ctx->sa_handle);
+
 	mutex_lock(&fipsec->sa_hash_lock);
 	WARN_ON(rhashtable_remove_fast(&fipsec->sa_hash, &sa_ctx->hash,
 				       rhash_sa));
@@ -1299,6 +1319,8 @@ int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev)
 		goto err_destroy_hash;
 	}
 
+	ida_init(&fdev->ipsec->halloc);
+
 	return 0;
 
 err_destroy_hash:
@@ -1331,6 +1353,7 @@ void mlx5_fpga_ipsec_cleanup(struct mlx5_core_dev *mdev)
 	if (!mlx5_fpga_is_ipsec_device(mdev))
 		return;
 
+	ida_destroy(&fdev->ipsec->halloc);
 	destroy_rules_rb(&fdev->ipsec->rules_rb);
 	rhashtable_destroy(&fdev->ipsec->sa_hash);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
index d01b1fc8e11b..9ba637f0f0f2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.h
@@ -47,7 +47,8 @@ void *mlx5_fpga_ipsec_create_sa_ctx(struct mlx5_core_dev *mdev,
 				    struct mlx5_accel_esp_xfrm *accel_xfrm,
 				    const __be32 saddr[4],
 				    const __be32 daddr[4],
-				    const __be32 spi, bool is_ipv6);
+				    const __be32 spi, bool is_ipv6,
+				    u32 *sa_handle);
 void mlx5_fpga_ipsec_delete_sa_ctx(void *context);
 
 int mlx5_fpga_ipsec_init(struct mlx5_core_dev *mdev);
-- 
cgit v1.2.3-59-g8ed1b


From 82fe2996419830b0bb2c7e1f2fed2d3a8a1a65cd Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Tue, 18 Feb 2020 12:27:25 +0200
Subject: net/mlx5e: Set of completion request bit should not clear other
 adjacent bits

In notify HW (ring doorbell) flow, we set the bit to request a completion
on the TX descriptor.
When doing so, we should not unset other bits in the same byte.
Currently, this does not fix a real issue, as we still don't have a flow
where both MLX5_WQE_CTRL_CQ_UPDATE and any adjacent bit are set together.

Fixes: 542578c67936 ("net/mlx5e: Move helper functions to a new txrx datapath header")
Fixes: 864b2d715300 ("net/mlx5e: Generalize tx helper functions for different SQ types")
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Reviewed-by: Aya Levin <ayal@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
index f07b1399744e..9f6967d76053 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -102,7 +102,7 @@ static inline void
 mlx5e_notify_hw(struct mlx5_wq_cyc *wq, u16 pc, void __iomem *uar_map,
 		struct mlx5_wqe_ctrl_seg *ctrl)
 {
-	ctrl->fm_ce_se = MLX5_WQE_CTRL_CQ_UPDATE;
+	ctrl->fm_ce_se |= MLX5_WQE_CTRL_CQ_UPDATE;
 	/* ensure wqe is visible to device before updating doorbell record */
 	dma_wmb();
 
-- 
cgit v1.2.3-59-g8ed1b


From d7a42ad062cc6b20b2c2a8c09dc61df2d4f5751f Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@mellanox.com>
Date: Wed, 25 Mar 2020 11:32:56 +0200
Subject: net/mlx5e: Allow partial data mask for tunnel options

We use mapping to save and restore the tunnel options.
Save also the tunnel options mask.

Signed-off-by: Roi Dayan <roid@mellanox.com>
Reviewed-by: Paul Blakey <paulb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 32 +++++++++++++++++--------
 1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index a574c588269a..7d2b05576f44 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -171,6 +171,11 @@ struct tunnel_match_key {
 	int filter_ifindex;
 };
 
+struct tunnel_match_enc_opts {
+	struct flow_dissector_key_enc_opts key;
+	struct flow_dissector_key_enc_opts mask;
+};
+
 /* Tunnel_id mapping is TUNNEL_INFO_BITS + ENC_OPTS_BITS.
  * Upper TUNNEL_INFO_BITS for general tunnel info.
  * Lower ENC_OPTS_BITS bits for enc_opts.
@@ -1824,9 +1829,7 @@ enc_opts_is_dont_care_or_full_match(struct mlx5e_priv *priv,
 			*dont_care = false;
 
 			if (opt->opt_class != U16_MAX ||
-			    opt->type != U8_MAX ||
-			    memchr_inv(opt->opt_data, 0xFF,
-				       opt->length * 4)) {
+			    opt->type != U8_MAX) {
 				NL_SET_ERR_MSG(extack,
 					       "Partial match of tunnel options in chain > 0 isn't supported");
 				netdev_warn(priv->netdev,
@@ -1863,6 +1866,7 @@ static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv,
 	struct mlx5_esw_flow_attr *attr = flow->esw_attr;
 	struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts;
 	struct flow_match_enc_opts enc_opts_match;
+	struct tunnel_match_enc_opts tun_enc_opts;
 	struct mlx5_rep_uplink_priv *uplink_priv;
 	struct mlx5e_rep_priv *uplink_rpriv;
 	struct tunnel_match_key tunnel_key;
@@ -1905,8 +1909,14 @@ static int mlx5e_get_flow_tunnel_id(struct mlx5e_priv *priv,
 		goto err_enc_opts;
 
 	if (!enc_opts_is_dont_care) {
+		memset(&tun_enc_opts, 0, sizeof(tun_enc_opts));
+		memcpy(&tun_enc_opts.key, enc_opts_match.key,
+		       sizeof(*enc_opts_match.key));
+		memcpy(&tun_enc_opts.mask, enc_opts_match.mask,
+		       sizeof(*enc_opts_match.mask));
+
 		err = mapping_add(uplink_priv->tunnel_enc_opts_mapping,
-				  enc_opts_match.key, &enc_opts_id);
+				  &tun_enc_opts, &enc_opts_id);
 		if (err)
 			goto err_enc_opts;
 	}
@@ -4707,7 +4717,7 @@ void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv)
 
 int mlx5e_tc_esw_init(struct rhashtable *tc_ht)
 {
-	const size_t sz_enc_opts = sizeof(struct flow_dissector_key_enc_opts);
+	const size_t sz_enc_opts = sizeof(struct tunnel_match_enc_opts);
 	struct mlx5_rep_uplink_priv *uplink_priv;
 	struct mlx5e_rep_priv *priv;
 	struct mapping_ctx *mapping;
@@ -4802,7 +4812,7 @@ static bool mlx5e_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb,
 				 u32 tunnel_id)
 {
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-	struct flow_dissector_key_enc_opts enc_opts = {};
+	struct tunnel_match_enc_opts enc_opts = {};
 	struct mlx5_rep_uplink_priv *uplink_priv;
 	struct mlx5e_rep_priv *uplink_rpriv;
 	struct metadata_dst *tun_dst;
@@ -4840,7 +4850,7 @@ static bool mlx5e_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb,
 		}
 	}
 
-	tun_dst = tun_rx_dst(enc_opts.len);
+	tun_dst = tun_rx_dst(enc_opts.key.len);
 	if (!tun_dst) {
 		WARN_ON_ONCE(true);
 		return false;
@@ -4854,9 +4864,11 @@ static bool mlx5e_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb,
 			   key32_to_tunnel_id(key.enc_key_id.keyid),
 			   TUNNEL_KEY);
 
-	if (enc_opts.len)
-		ip_tunnel_info_opts_set(&tun_dst->u.tun_info, enc_opts.data,
-					enc_opts.len, enc_opts.dst_opt_type);
+	if (enc_opts.key.len)
+		ip_tunnel_info_opts_set(&tun_dst->u.tun_info,
+					enc_opts.key.data,
+					enc_opts.key.len,
+					enc_opts.key.dst_opt_type);
 
 	skb_dst_set(skb, (struct dst_entry *)tun_dst);
 	dev = dev_get_by_index(&init_net, key.filter_ifindex);
-- 
cgit v1.2.3-59-g8ed1b


From fa3748775b92692331cfcab6f7b09a04a23694d9 Mon Sep 17 00:00:00 2001
From: Maxim Mikityanskiy <maximmi@mellanox.com>
Date: Thu, 19 Mar 2020 17:32:27 +0200
Subject: net/mlx5e: Handle errors from netif_set_real_num_{tx,rx}_queues

netif_set_real_num_tx_queues and netif_set_real_num_rx_queues may fail.
Now that mlx5e supports handling errors in the preactivate hook, this
commit leverages that functionality to handle errors from those
functions and roll back all changes on failure.

Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  | 59 +++++++++++++++++-----
 2 files changed, 48 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 6d703ddee4e2..4ab78b5c2393 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -432,7 +432,7 @@ int mlx5e_ethtool_set_channels(struct mlx5e_priv *priv,
 
 	if (!test_bit(MLX5E_STATE_OPENED, &priv->state)) {
 		*cur_params = new_channels.params;
-		mlx5e_num_channels_changed(priv);
+		err = mlx5e_num_channels_changed(priv);
 		goto out;
 	}
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index f02150a97ac8..e057822898f8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -2839,11 +2839,8 @@ void mlx5e_set_netdev_mtu_boundaries(struct mlx5e_priv *priv)
 				ETH_MAX_MTU);
 }
 
-static void mlx5e_netdev_set_tcs(struct net_device *netdev)
+static void mlx5e_netdev_set_tcs(struct net_device *netdev, u16 nch, u8 ntc)
 {
-	struct mlx5e_priv *priv = netdev_priv(netdev);
-	int nch = priv->channels.params.num_channels;
-	int ntc = priv->channels.params.num_tc;
 	int tc;
 
 	netdev_reset_tc(netdev);
@@ -2860,15 +2857,47 @@ static void mlx5e_netdev_set_tcs(struct net_device *netdev)
 		netdev_set_tc_queue(netdev, tc, nch, 0);
 }
 
-static void mlx5e_update_netdev_queues(struct mlx5e_priv *priv, u16 count)
+static int mlx5e_update_netdev_queues(struct mlx5e_priv *priv)
 {
-	int num_txqs = count * priv->channels.params.num_tc;
-	int num_rxqs = count * priv->profile->rq_groups;
 	struct net_device *netdev = priv->netdev;
+	int num_txqs, num_rxqs, nch, ntc;
+	int old_num_txqs, old_ntc;
+	int err;
+
+	old_num_txqs = netdev->real_num_tx_queues;
+	old_ntc = netdev->num_tc;
 
-	mlx5e_netdev_set_tcs(netdev);
-	netif_set_real_num_tx_queues(netdev, num_txqs);
-	netif_set_real_num_rx_queues(netdev, num_rxqs);
+	nch = priv->channels.params.num_channels;
+	ntc = priv->channels.params.num_tc;
+	num_txqs = nch * ntc;
+	num_rxqs = nch * priv->profile->rq_groups;
+
+	mlx5e_netdev_set_tcs(netdev, nch, ntc);
+
+	err = netif_set_real_num_tx_queues(netdev, num_txqs);
+	if (err) {
+		netdev_warn(netdev, "netif_set_real_num_tx_queues failed, %d\n", err);
+		goto err_tcs;
+	}
+	err = netif_set_real_num_rx_queues(netdev, num_rxqs);
+	if (err) {
+		netdev_warn(netdev, "netif_set_real_num_rx_queues failed, %d\n", err);
+		goto err_txqs;
+	}
+
+	return 0;
+
+err_txqs:
+	/* netif_set_real_num_rx_queues could fail only when nch increased. Only
+	 * one of nch and ntc is changed in this function. That means, the call
+	 * to netif_set_real_num_tx_queues below should not fail, because it
+	 * decreases the number of TX queues.
+	 */
+	WARN_ON_ONCE(netif_set_real_num_tx_queues(netdev, old_num_txqs));
+
+err_tcs:
+	mlx5e_netdev_set_tcs(netdev, old_num_txqs / old_ntc, old_ntc);
+	return err;
 }
 
 static void mlx5e_set_default_xps_cpumasks(struct mlx5e_priv *priv,
@@ -2895,8 +2924,12 @@ static void mlx5e_set_default_xps_cpumasks(struct mlx5e_priv *priv,
 int mlx5e_num_channels_changed(struct mlx5e_priv *priv)
 {
 	u16 count = priv->channels.params.num_channels;
+	int err;
+
+	err = mlx5e_update_netdev_queues(priv);
+	if (err)
+		return err;
 
-	mlx5e_update_netdev_queues(priv, count);
 	mlx5e_set_default_xps_cpumasks(priv, &priv->channels.params);
 
 	if (!netif_is_rxfh_configured(priv->netdev))
@@ -5358,9 +5391,11 @@ int mlx5e_attach_netdev(struct mlx5e_priv *priv)
 	 */
 	if (take_rtnl)
 		rtnl_lock();
-	mlx5e_num_channels_changed(priv);
+	err = mlx5e_num_channels_changed(priv);
 	if (take_rtnl)
 		rtnl_unlock();
+	if (err)
+		goto out;
 
 	err = profile->init_tx(priv);
 	if (err)
-- 
cgit v1.2.3-59-g8ed1b


From c89da067a2e4d0f94f0f314c2918dca50348789c Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@mellanox.com>
Date: Sun, 8 Mar 2020 21:41:22 -0500
Subject: net/mlx5: Read embedded cpu bit only once

Embedded CPU bit doesn't change with PCI resume/suspend.
Hence read it only once while probing the PCI device.

Signed-off-by: Parav Pandit <parav@mellanox.com>
Reviewed-by: Bodong Wang <bodong@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 7af4210c1b96..5a97e98e937c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -782,7 +782,7 @@ static int mlx5_pci_init(struct mlx5_core_dev *dev, struct pci_dev *pdev,
 	}
 
 	mlx5_pci_vsc_init(dev);
-
+	dev->caps.embedded_cpu = mlx5_read_embedded_cpu(dev);
 	return 0;
 
 err_clr_master:
@@ -1180,7 +1180,6 @@ int mlx5_load_one(struct mlx5_core_dev *dev, bool boot)
 {
 	int err = 0;
 
-	dev->caps.embedded_cpu = mlx5_read_embedded_cpu(dev);
 	mutex_lock(&dev->intf_state_mutex);
 	if (test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
 		mlx5_core_warn(dev, "interface is up, NOP\n");
-- 
cgit v1.2.3-59-g8ed1b


From 6533380dfd003ea7636cb5672f4f85124b56328b Mon Sep 17 00:00:00 2001
From: Hu Haowen <xianfengting221@163.com>
Date: Wed, 1 Apr 2020 20:57:20 +0800
Subject: net/mlx5: improve some comments

Replaced "its" with "it's".

Signed-off-by: Hu Haowen <xianfengting221@163.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
index c9c9b479bda5..0a8adda073c2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fw_tracer.c
@@ -684,7 +684,7 @@ static void mlx5_fw_tracer_handle_traces(struct work_struct *work)
 		get_block_timestamp(tracer, &tmp_trace_block[TRACES_PER_BLOCK - 1]);
 
 	while (block_timestamp > tracer->last_timestamp) {
-		/* Check block override if its not the first block */
+		/* Check block override if it's not the first block */
 		if (!tracer->last_timestamp) {
 			u64 *ts_event;
 			/* To avoid block override be the HW in case of buffer
-- 
cgit v1.2.3-59-g8ed1b


From 794867ee6730fe3f0d419d3911f35a725cafe3a8 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Mon, 20 Apr 2020 22:52:59 +0200
Subject: r8169: change wmb to smb_wmb in rtl8169_start_xmit

A barrier is needed here to ensure that rtl_tx sees the descriptor
changes (DescOwn set) before the updated tp->cur_tx value. Else it may
wrongly assume that the transfer has been finished already. For this
purpose smp_wmb() is sufficient.

No separate barrier is needed for ordering the descriptor changes
with the MMIO doorbell write. The needed barrier is included in
the non-relaxed writel() used by rtl8169_doorbell().

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index b8cc064ee9f5..b7a2853e7396 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -4249,8 +4249,8 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb,
 
 	txd_first->opts1 |= cpu_to_le32(DescOwn | FirstFrag);
 
-	/* Force all memory writes to complete before notifying device */
-	wmb();
+	/* rtl_tx needs to see descriptor changes before updated tp->cur_tx */
+	smp_wmb();
 
 	tp->cur_tx += frags + 1;
 
-- 
cgit v1.2.3-59-g8ed1b


From d4d9b47e4b10679d5c794e62f9866ee5ee204c0d Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 21 Apr 2020 00:51:17 +0300
Subject: net: bcmgenet: Drop ACPI_PTR() to avoid compiler warning

When compiled with CONFIG_ACPI=n, ACPI_PTR() will be no-op, and thus
genet_acpi_match table defined, but not used. Compiler is not happy about
such data. Drop ACPI_PTR() for good.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/genet/bcmgenet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index d975338bf78d..9f2f0e681656 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -3740,7 +3740,7 @@ static struct platform_driver bcmgenet_driver = {
 		.name	= "bcmgenet",
 		.of_match_table = bcmgenet_match,
 		.pm	= &bcmgenet_pm_ops,
-		.acpi_match_table = ACPI_PTR(genet_acpi_match),
+		.acpi_match_table = genet_acpi_match,
 	},
 };
 module_platform_driver(bcmgenet_driver);
-- 
cgit v1.2.3-59-g8ed1b


From 9a965942a9cabb2cdca1d7e99c985d6978ece96c Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 21 Apr 2020 00:51:18 +0300
Subject: net: bcmgenet: Drop useless OF code

There is nothing which needs a set of OF headers, followed by redundant
OF node ID check. Drop them for good.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/genet/bcmgenet.c | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 9f2f0e681656..ef275db018f7 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -23,11 +23,6 @@
 #include <linux/dma-mapping.h>
 #include <linux/pm.h>
 #include <linux/clk.h>
-#include <linux/of.h>
-#include <linux/of_address.h>
-#include <linux/of_irq.h>
-#include <linux/of_net.h>
-#include <linux/of_platform.h>
 #include <net/arp.h>
 
 #include <linux/mii.h>
@@ -3417,8 +3412,6 @@ MODULE_DEVICE_TABLE(of, bcmgenet_match);
 static int bcmgenet_probe(struct platform_device *pdev)
 {
 	struct bcmgenet_platform_data *pd = pdev->dev.platform_data;
-	struct device_node *dn = pdev->dev.of_node;
-	const struct of_device_id *of_id = NULL;
 	const struct bcmgenet_plat_data *pdata;
 	struct bcmgenet_priv *priv;
 	struct net_device *dev;
@@ -3433,12 +3426,6 @@ static int bcmgenet_probe(struct platform_device *pdev)
 		return -ENOMEM;
 	}
 
-	if (dn) {
-		of_id = of_match_node(bcmgenet_match, dn);
-		if (!of_id)
-			return -EINVAL;
-	}
-
 	priv = netdev_priv(dev);
 	priv->irq0 = platform_get_irq(pdev, 0);
 	if (priv->irq0 < 0) {
-- 
cgit v1.2.3-59-g8ed1b


From c80d36ff63a5fb447616e03d4339d2eef7884d28 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 21 Apr 2020 00:51:19 +0300
Subject: net: bcmgenet: Use devm_clk_get_optional() to get the clocks

Conversion to devm_clk_get_optional() makes it explicit that clocks are
optional. This change allows to handle deferred probe in case clocks are
defined, but not yet probed. Due to above changes bail out in error case.

While here, check potential error when enable main clock.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/genet/bcmgenet.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index ef275db018f7..86666e9ab3e7 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -3487,13 +3487,16 @@ static int bcmgenet_probe(struct platform_device *pdev)
 		priv->dma_max_burst_length = DMA_MAX_BURST_LENGTH;
 	}
 
-	priv->clk = devm_clk_get(&priv->pdev->dev, "enet");
+	priv->clk = devm_clk_get_optional(&priv->pdev->dev, "enet");
 	if (IS_ERR(priv->clk)) {
 		dev_dbg(&priv->pdev->dev, "failed to get enet clock\n");
-		priv->clk = NULL;
+		err = PTR_ERR(priv->clk);
+		goto err;
 	}
 
-	clk_prepare_enable(priv->clk);
+	err = clk_prepare_enable(priv->clk);
+	if (err)
+		goto err;
 
 	bcmgenet_set_hw_params(priv);
 
@@ -3511,16 +3514,18 @@ static int bcmgenet_probe(struct platform_device *pdev)
 	priv->rx_buf_len = RX_BUF_LENGTH;
 	INIT_WORK(&priv->bcmgenet_irq_work, bcmgenet_irq_task);
 
-	priv->clk_wol = devm_clk_get(&priv->pdev->dev, "enet-wol");
+	priv->clk_wol = devm_clk_get_optional(&priv->pdev->dev, "enet-wol");
 	if (IS_ERR(priv->clk_wol)) {
 		dev_dbg(&priv->pdev->dev, "failed to get enet-wol clock\n");
-		priv->clk_wol = NULL;
+		err = PTR_ERR(priv->clk_wol);
+		goto err;
 	}
 
-	priv->clk_eee = devm_clk_get(&priv->pdev->dev, "enet-eee");
+	priv->clk_eee = devm_clk_get_optional(&priv->pdev->dev, "enet-eee");
 	if (IS_ERR(priv->clk_eee)) {
 		dev_dbg(&priv->pdev->dev, "failed to get enet-eee clock\n");
-		priv->clk_eee = NULL;
+		err = PTR_ERR(priv->clk_eee);
+		goto err;
 	}
 
 	/* If this is an internal GPHY, power it on now, before UniMAC is
-- 
cgit v1.2.3-59-g8ed1b


From d2af1420cbc84f5eabcca6b667d7fb2339c398e2 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 21 Apr 2020 00:51:20 +0300
Subject: net: bcmgenet: Use get_unligned_beXX() and put_unaligned_beXX()

It's convenient to use get_unligned_beXX() and put_unaligned_beXX() helpers
to get or set MAC instead of open-coded variants.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/genet/bcmgenet.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 86666e9ab3e7..2c9881032a24 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -2702,9 +2702,8 @@ static void bcmgenet_umac_reset(struct bcmgenet_priv *priv)
 static void bcmgenet_set_hw_addr(struct bcmgenet_priv *priv,
 				 unsigned char *addr)
 {
-	bcmgenet_umac_writel(priv, (addr[0] << 24) | (addr[1] << 16) |
-			(addr[2] << 8) | addr[3], UMAC_MAC0);
-	bcmgenet_umac_writel(priv, (addr[4] << 8) | addr[5], UMAC_MAC1);
+	bcmgenet_umac_writel(priv, get_unaligned_be32(&addr[0]), UMAC_MAC0);
+	bcmgenet_umac_writel(priv, get_unaligned_be16(&addr[4]), UMAC_MAC1);
 }
 
 static void bcmgenet_get_hw_addr(struct bcmgenet_priv *priv,
@@ -2713,13 +2712,9 @@ static void bcmgenet_get_hw_addr(struct bcmgenet_priv *priv,
 	u32 addr_tmp;
 
 	addr_tmp = bcmgenet_umac_readl(priv, UMAC_MAC0);
-	addr[0] = addr_tmp >> 24;
-	addr[1] = (addr_tmp >> 16) & 0xff;
-	addr[2] = (addr_tmp >>	8) & 0xff;
-	addr[3] = addr_tmp & 0xff;
+	put_unaligned_be32(addr_tmp, &addr[0]);
 	addr_tmp = bcmgenet_umac_readl(priv, UMAC_MAC1);
-	addr[4] = (addr_tmp >> 8) & 0xff;
-	addr[5] = addr_tmp & 0xff;
+	put_unaligned_be16(addr_tmp, &addr[4]);
 }
 
 /* Returns a reusable dma control register value */
-- 
cgit v1.2.3-59-g8ed1b


From 7d3cca75c169710048b0fea08522f635efcea893 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Tue, 21 Apr 2020 00:51:21 +0300
Subject: net: bcmgenet: Drop too many parentheses in bcmgenet_probe()

No need to have parentheses around plain pointer variable or
negation operator. Drop them for good.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/genet/bcmgenet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 2c9881032a24..20aba79becce 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -3529,7 +3529,7 @@ static int bcmgenet_probe(struct platform_device *pdev)
 	if (device_get_phy_mode(&pdev->dev) == PHY_INTERFACE_MODE_INTERNAL)
 		bcmgenet_power_up(priv, GENET_POWER_PASSIVE);
 
-	if ((pd) && (!IS_ERR_OR_NULL(pd->mac_address)))
+	if (pd && !IS_ERR_OR_NULL(pd->mac_address))
 		ether_addr_copy(dev->dev_addr, pd->mac_address);
 	else
 		if (!device_get_mac_address(&pdev->dev, dev->dev_addr, ETH_ALEN))
-- 
cgit v1.2.3-59-g8ed1b


From b6246f4d8d0778fd045b84dbd7fc5aadd8f3136e Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Mon, 20 Apr 2020 22:51:49 +0100
Subject: net: ipv4: remove redundant assignment to variable rc

The variable rc is being assigned with a value that is never read
and it is being updated later with a new value. The initialization is
redundant and can be removed.

Addresses-Coverity: ("Unused value")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/af_inet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index cf58e29cf746..c618e242490f 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1914,7 +1914,7 @@ static int __init inet_init(void)
 {
 	struct inet_protosw *q;
 	struct list_head *r;
-	int rc = -EINVAL;
+	int rc;
 
 	sock_skb_cb_check_size(sizeof(struct inet_skb_parm));
 
-- 
cgit v1.2.3-59-g8ed1b


From 2a7e978625e887cbb80cb2188cf5f955e4223fa1 Mon Sep 17 00:00:00 2001
From: Simon Wunderlich <sw@simonwunderlich.de>
Date: Mon, 13 Apr 2020 20:40:20 +0200
Subject: batman-adv: Start new development cycle

Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/main.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/batman-adv/main.h b/net/batman-adv/main.h
index 2a234d0ad445..61d8dbe8c954 100644
--- a/net/batman-adv/main.h
+++ b/net/batman-adv/main.h
@@ -13,7 +13,7 @@
 #define BATADV_DRIVER_DEVICE "batman-adv"
 
 #ifndef BATADV_SOURCE_VERSION
-#define BATADV_SOURCE_VERSION "2020.1"
+#define BATADV_SOURCE_VERSION "2020.2"
 #endif
 
 /* B.A.T.M.A.N. parameters */
-- 
cgit v1.2.3-59-g8ed1b


From c08dd06b3d25e8450a2e187ee6b7412d07c460af Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Thu, 26 Mar 2020 18:37:07 +0100
Subject: batman-adv: Fix spelling error in term buffer

checkpatch warns about a typo in the word bufFer which was introduced in
commit 2191c1bcbc64 ("batman-adv: kernel doc for types.h").

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/types.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/batman-adv/types.h b/net/batman-adv/types.h
index 4a17a66cc572..d152b8e81f61 100644
--- a/net/batman-adv/types.h
+++ b/net/batman-adv/types.h
@@ -1086,7 +1086,7 @@ struct batadv_priv_bla {
  * struct batadv_priv_debug_log - debug logging data
  */
 struct batadv_priv_debug_log {
-	/** @log_buff: buffer holding the logs (ring bufer) */
+	/** @log_buff: buffer holding the logs (ring buffer) */
 	char log_buff[BATADV_LOG_BUF_LEN];
 
 	/** @log_start: index of next character to read */
-- 
cgit v1.2.3-59-g8ed1b


From 9204a4f876b229505f4ab947124d1160b80bbb4c Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Mon, 13 Apr 2020 20:26:07 +0200
Subject: batman-adv: trace: Drop unneeded types.h include

The commit 04ae87a52074 ("ftrace: Rework event_create_dir()") restructured
various macros in the ftrace framework. These changes also had the nice
side effect that the linux/types.h include is no longer necessary to define
some of the types used by these macros.

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/trace.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/batman-adv/trace.h b/net/batman-adv/trace.h
index f631b1e01b89..a87547570b4e 100644
--- a/net/batman-adv/trace.h
+++ b/net/batman-adv/trace.h
@@ -15,7 +15,6 @@
 #include <linux/percpu.h>
 #include <linux/printk.h>
 #include <linux/tracepoint.h>
-#include <linux/types.h>
 
 #undef TRACE_SYSTEM
 #define TRACE_SYSTEM batadv
-- 
cgit v1.2.3-59-g8ed1b


From 26893e7e928e1790852b072edb9bff3c1309e111 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Mon, 13 Apr 2020 21:23:29 +0200
Subject: batman-adv: Utilize prandom_u32_max for random [0, max) values

The kernel provides a function to create random values from 0 - (max-1)
since commit f337db64af05 ("random32: add prandom_u32_max and convert open
coded users"). Simply use this function to replace code sections which use
prandom_u32 and a handcrafted method to map it to the correct range.

Signed-off-by: Sven Eckelmann <sven@narfation.org>
---
 net/batman-adv/bat_iv_ogm.c | 4 ++--
 net/batman-adv/bat_v_elp.c  | 2 +-
 net/batman-adv/bat_v_ogm.c  | 4 ++--
 3 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/batman-adv/bat_iv_ogm.c b/net/batman-adv/bat_iv_ogm.c
index a7c8dd7ae513..e87f19c82e8d 100644
--- a/net/batman-adv/bat_iv_ogm.c
+++ b/net/batman-adv/bat_iv_ogm.c
@@ -280,7 +280,7 @@ batadv_iv_ogm_emit_send_time(const struct batadv_priv *bat_priv)
 	unsigned int msecs;
 
 	msecs = atomic_read(&bat_priv->orig_interval) - BATADV_JITTER;
-	msecs += prandom_u32() % (2 * BATADV_JITTER);
+	msecs += prandom_u32_max(2 * BATADV_JITTER);
 
 	return jiffies + msecs_to_jiffies(msecs);
 }
@@ -288,7 +288,7 @@ batadv_iv_ogm_emit_send_time(const struct batadv_priv *bat_priv)
 /* when do we schedule a ogm packet to be sent */
 static unsigned long batadv_iv_ogm_fwd_send_time(void)
 {
-	return jiffies + msecs_to_jiffies(prandom_u32() % (BATADV_JITTER / 2));
+	return jiffies + msecs_to_jiffies(prandom_u32_max(BATADV_JITTER / 2));
 }
 
 /* apply hop penalty for a normal link */
diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index 1e3172db7492..353e49c40e7f 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -49,7 +49,7 @@ static void batadv_v_elp_start_timer(struct batadv_hard_iface *hard_iface)
 	unsigned int msecs;
 
 	msecs = atomic_read(&hard_iface->bat_v.elp_interval) - BATADV_JITTER;
-	msecs += prandom_u32() % (2 * BATADV_JITTER);
+	msecs += prandom_u32_max(2 * BATADV_JITTER);
 
 	queue_delayed_work(batadv_event_workqueue, &hard_iface->bat_v.elp_wq,
 			   msecs_to_jiffies(msecs));
diff --git a/net/batman-adv/bat_v_ogm.c b/net/batman-adv/bat_v_ogm.c
index 969466218999..0959d32be65c 100644
--- a/net/batman-adv/bat_v_ogm.c
+++ b/net/batman-adv/bat_v_ogm.c
@@ -88,7 +88,7 @@ static void batadv_v_ogm_start_queue_timer(struct batadv_hard_iface *hard_iface)
 	unsigned int msecs = BATADV_MAX_AGGREGATION_MS * 1000;
 
 	/* msecs * [0.9, 1.1] */
-	msecs += prandom_u32() % (msecs / 5) - (msecs / 10);
+	msecs += prandom_u32_max(msecs / 5) - (msecs / 10);
 	queue_delayed_work(batadv_event_workqueue, &hard_iface->bat_v.aggr_wq,
 			   msecs_to_jiffies(msecs / 1000));
 }
@@ -107,7 +107,7 @@ static void batadv_v_ogm_start_timer(struct batadv_priv *bat_priv)
 		return;
 
 	msecs = atomic_read(&bat_priv->orig_interval) - BATADV_JITTER;
-	msecs += prandom_u32() % (2 * BATADV_JITTER);
+	msecs += prandom_u32_max(2 * BATADV_JITTER);
 	queue_delayed_work(batadv_event_workqueue, &bat_priv->bat_v.ogm_wq,
 			   msecs_to_jiffies(msecs));
 }
-- 
cgit v1.2.3-59-g8ed1b


From 557e171434eb9bb43dbe71361775ae21ae95d4ed Mon Sep 17 00:00:00 2001
From: Kalle Valo <kvalo@codeaurora.org>
Date: Thu, 16 Apr 2020 14:50:56 +0300
Subject: ath10k: rename ath10k_hif_swap_mailbox() to ath10k_hif_start_post()

Convert ath10k_hif_swap_mailbox() to a more generic op so that bus drivers can
do more than just swap the mailbox, for example set power save settings like in
the following sdio patch.

No functional changes, compile tested only.

Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1587037859-28873-2-git-send-email-kvalo@codeaurora.org
---
 drivers/net/wireless/ath/ath10k/core.c | 2 +-
 drivers/net/wireless/ath/ath10k/hif.h  | 8 ++++----
 drivers/net/wireless/ath/ath10k/sdio.c | 4 ++--
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c
index 52472bbcee1f..5926281c7e05 100644
--- a/drivers/net/wireless/ath/ath10k/core.c
+++ b/drivers/net/wireless/ath/ath10k/core.c
@@ -2714,7 +2714,7 @@ int ath10k_core_start(struct ath10k *ar, enum ath10k_firmware_mode mode,
 		goto err_hif_stop;
 	}
 
-	status = ath10k_hif_swap_mailbox(ar);
+	status = ath10k_hif_start_post(ar);
 	if (status) {
 		ath10k_err(ar, "failed to swap mailbox: %d\n", status);
 		goto err_hif_stop;
diff --git a/drivers/net/wireless/ath/ath10k/hif.h b/drivers/net/wireless/ath/ath10k/hif.h
index 0dd8973d0acf..2c5d61d98337 100644
--- a/drivers/net/wireless/ath/ath10k/hif.h
+++ b/drivers/net/wireless/ath/ath10k/hif.h
@@ -54,7 +54,7 @@ struct ath10k_hif_ops {
 	 */
 	void (*stop)(struct ath10k *ar);
 
-	int (*swap_mailbox)(struct ath10k *ar);
+	int (*start_post)(struct ath10k *ar);
 
 	int (*get_htt_tx_complete)(struct ath10k *ar);
 
@@ -139,10 +139,10 @@ static inline void ath10k_hif_stop(struct ath10k *ar)
 	return ar->hif.ops->stop(ar);
 }
 
-static inline int ath10k_hif_swap_mailbox(struct ath10k *ar)
+static inline int ath10k_hif_start_post(struct ath10k *ar)
 {
-	if (ar->hif.ops->swap_mailbox)
-		return ar->hif.ops->swap_mailbox(ar);
+	if (ar->hif.ops->start_post)
+		return ar->hif.ops->start_post(ar);
 	return 0;
 }
 
diff --git a/drivers/net/wireless/ath/ath10k/sdio.c b/drivers/net/wireless/ath/ath10k/sdio.c
index 943db9f401d8..184b3545324e 100644
--- a/drivers/net/wireless/ath/ath10k/sdio.c
+++ b/drivers/net/wireless/ath/ath10k/sdio.c
@@ -1725,7 +1725,7 @@ static int ath10k_sdio_hif_diag_write_mem(struct ath10k *ar, u32 address,
 	return 0;
 }
 
-static int ath10k_sdio_hif_swap_mailbox(struct ath10k *ar)
+static int ath10k_sdio_hif_start_post(struct ath10k *ar)
 {
 	struct ath10k_sdio *ar_sdio = ath10k_sdio_priv(ar);
 	u32 addr, val;
@@ -2047,7 +2047,7 @@ static const struct ath10k_hif_ops ath10k_sdio_hif_ops = {
 	.exchange_bmi_msg	= ath10k_sdio_bmi_exchange_msg,
 	.start			= ath10k_sdio_hif_start,
 	.stop			= ath10k_sdio_hif_stop,
-	.swap_mailbox		= ath10k_sdio_hif_swap_mailbox,
+	.start_post		= ath10k_sdio_hif_start_post,
 	.get_htt_tx_complete	= ath10k_sdio_get_htt_tx_complete,
 	.map_service_to_pipe	= ath10k_sdio_hif_map_service_to_pipe,
 	.get_default_pipe	= ath10k_sdio_hif_get_default_pipe,
-- 
cgit v1.2.3-59-g8ed1b


From 22f28076b6c3f86107424b3b1ddfd90f2628f354 Mon Sep 17 00:00:00 2001
From: Wen Gong <wgong@codeaurora.org>
Date: Thu, 16 Apr 2020 14:50:57 +0300
Subject: ath10k: improve power save performance for sdio

This patch is to set register to allow the mbox enter sleep status
if it does not have tx traffic and wakeup it if tx traffic arrive.
After mbox enter sleep status, the soc will enter sleep status by
firmware, this will save power. The power consume drops from about
90mW to about 10mW with this patch.

This patch only effect sdio chip.

Tested with QCA6174 SDIO with firmware WLAN.RMH.4.4.1-00029.

Signed-off-by: Wen Gong <wgong@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1587037859-28873-3-git-send-email-kvalo@codeaurora.org
---
 drivers/net/wireless/ath/ath10k/sdio.c | 130 ++++++++++++++++++++++++++-------
 drivers/net/wireless/ath/ath10k/sdio.h |  16 ++++
 2 files changed, 119 insertions(+), 27 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/sdio.c b/drivers/net/wireless/ath/ath10k/sdio.c
index 184b3545324e..1626976293c7 100644
--- a/drivers/net/wireless/ath/ath10k/sdio.c
+++ b/drivers/net/wireless/ath/ath10k/sdio.c
@@ -1361,23 +1361,117 @@ static void ath10k_rx_indication_async_work(struct work_struct *work)
 		napi_schedule(&ar->napi);
 }
 
+static int ath10k_sdio_read_rtc_state(struct ath10k_sdio *ar_sdio, unsigned char *state)
+{
+	struct ath10k *ar = ar_sdio->ar;
+	unsigned char rtc_state = 0;
+	int ret = 0;
+
+	rtc_state = sdio_f0_readb(ar_sdio->func, ATH10K_CIS_RTC_STATE_ADDR, &ret);
+	if (ret) {
+		ath10k_warn(ar, "failed to read rtc state: %d\n", ret);
+		return ret;
+	}
+
+	*state = rtc_state & 0x3;
+
+	return ret;
+}
+
+static int ath10k_sdio_hif_set_mbox_sleep(struct ath10k *ar, bool enable_sleep)
+{
+	struct ath10k_sdio *ar_sdio = ath10k_sdio_priv(ar);
+	u32 val;
+	int retry = ATH10K_CIS_READ_RETRY, ret = 0;
+	unsigned char rtc_state = 0;
+
+	sdio_claim_host(ar_sdio->func);
+
+	ret = ath10k_sdio_read32(ar, ATH10K_FIFO_TIMEOUT_AND_CHIP_CONTROL, &val);
+	if (ret) {
+		ath10k_warn(ar, "failed to read fifo/chip control register: %d\n",
+			    ret);
+		goto release;
+	}
+
+	if (enable_sleep) {
+		val &= ATH10K_FIFO_TIMEOUT_AND_CHIP_CONTROL_DISABLE_SLEEP_OFF;
+		ar_sdio->mbox_state = SDIO_MBOX_SLEEP_STATE;
+	} else {
+		val |= ATH10K_FIFO_TIMEOUT_AND_CHIP_CONTROL_DISABLE_SLEEP_ON;
+		ar_sdio->mbox_state = SDIO_MBOX_AWAKE_STATE;
+	}
+
+	ret = ath10k_sdio_write32(ar, ATH10K_FIFO_TIMEOUT_AND_CHIP_CONTROL, val);
+	if (ret) {
+		ath10k_warn(ar, "failed to write to FIFO_TIMEOUT_AND_CHIP_CONTROL: %d",
+			    ret);
+	}
+
+	if (!enable_sleep) {
+		do {
+			udelay(ATH10K_CIS_READ_WAIT_4_RTC_CYCLE_IN_US);
+			ret = ath10k_sdio_read_rtc_state(ar_sdio, &rtc_state);
+
+			if (ret) {
+				ath10k_warn(ar, "failed to disable mbox sleep: %d", ret);
+				break;
+			}
+
+			ath10k_dbg(ar, ATH10K_DBG_SDIO, "sdio read rtc state: %d\n",
+				   rtc_state);
+
+			if (rtc_state == ATH10K_CIS_RTC_STATE_ON)
+				break;
+
+			udelay(ATH10K_CIS_XTAL_SETTLE_DURATION_IN_US);
+			retry--;
+		} while (retry > 0);
+	}
+
+release:
+	sdio_release_host(ar_sdio->func);
+
+	return ret;
+}
+
+static void ath10k_sdio_sleep_timer_handler(struct timer_list *t)
+{
+	struct ath10k_sdio *ar_sdio = from_timer(ar_sdio, t, sleep_timer);
+
+	ar_sdio->mbox_state = SDIO_MBOX_REQUEST_TO_SLEEP_STATE;
+	queue_work(ar_sdio->workqueue, &ar_sdio->wr_async_work);
+}
+
 static void ath10k_sdio_write_async_work(struct work_struct *work)
 {
 	struct ath10k_sdio *ar_sdio = container_of(work, struct ath10k_sdio,
 						   wr_async_work);
 	struct ath10k *ar = ar_sdio->ar;
 	struct ath10k_sdio_bus_request *req, *tmp_req;
+	struct ath10k_mbox_info *mbox_info = &ar_sdio->mbox_info;
 
 	spin_lock_bh(&ar_sdio->wr_async_lock);
 
 	list_for_each_entry_safe(req, tmp_req, &ar_sdio->wr_asyncq, list) {
 		list_del(&req->list);
 		spin_unlock_bh(&ar_sdio->wr_async_lock);
+
+		if (req->address >= mbox_info->htc_addr &&
+		    ar_sdio->mbox_state == SDIO_MBOX_SLEEP_STATE) {
+			ath10k_sdio_hif_set_mbox_sleep(ar, false);
+			mod_timer(&ar_sdio->sleep_timer, jiffies +
+				  msecs_to_jiffies(ATH10K_MIN_SLEEP_INACTIVITY_TIME_MS));
+		}
+
 		__ath10k_sdio_write_async(ar, req);
 		spin_lock_bh(&ar_sdio->wr_async_lock);
 	}
 
 	spin_unlock_bh(&ar_sdio->wr_async_lock);
+
+	if (ar_sdio->mbox_state == SDIO_MBOX_REQUEST_TO_SLEEP_STATE)
+		ath10k_sdio_hif_set_mbox_sleep(ar, true);
 }
 
 static int ath10k_sdio_prep_async_req(struct ath10k *ar, u32 addr,
@@ -1517,6 +1611,9 @@ static void ath10k_sdio_hif_power_down(struct ath10k *ar)
 
 	ath10k_dbg(ar, ATH10K_DBG_BOOT, "sdio power off\n");
 
+	del_timer_sync(&ar_sdio->sleep_timer);
+	ath10k_sdio_hif_set_mbox_sleep(ar, true);
+
 	/* Disable the card */
 	sdio_claim_host(ar_sdio->func);
 
@@ -1617,33 +1714,6 @@ static int ath10k_sdio_hif_enable_intrs(struct ath10k *ar)
 	return ret;
 }
 
-static int ath10k_sdio_hif_set_mbox_sleep(struct ath10k *ar, bool enable_sleep)
-{
-	u32 val;
-	int ret;
-
-	ret = ath10k_sdio_read32(ar, ATH10K_FIFO_TIMEOUT_AND_CHIP_CONTROL, &val);
-	if (ret) {
-		ath10k_warn(ar, "failed to read fifo/chip control register: %d\n",
-			    ret);
-		return ret;
-	}
-
-	if (enable_sleep)
-		val &= ATH10K_FIFO_TIMEOUT_AND_CHIP_CONTROL_DISABLE_SLEEP_OFF;
-	else
-		val |= ATH10K_FIFO_TIMEOUT_AND_CHIP_CONTROL_DISABLE_SLEEP_ON;
-
-	ret = ath10k_sdio_write32(ar, ATH10K_FIFO_TIMEOUT_AND_CHIP_CONTROL, val);
-	if (ret) {
-		ath10k_warn(ar, "failed to write to FIFO_TIMEOUT_AND_CHIP_CONTROL: %d",
-			    ret);
-		return ret;
-	}
-
-	return 0;
-}
-
 /* HIF diagnostics */
 
 static int ath10k_sdio_hif_diag_read(struct ath10k *ar, u32 address, void *buf,
@@ -1749,6 +1819,8 @@ static int ath10k_sdio_hif_start_post(struct ath10k *ar)
 		ar_sdio->swap_mbox = false;
 	}
 
+	ath10k_sdio_hif_set_mbox_sleep(ar, true);
+
 	return 0;
 }
 
@@ -2076,6 +2148,8 @@ static int ath10k_sdio_pm_suspend(struct device *device)
 	if (!device_may_wakeup(ar->dev))
 		return 0;
 
+	ath10k_sdio_hif_set_mbox_sleep(ar, true);
+
 	pm_flag = MMC_PM_KEEP_POWER;
 
 	ret = sdio_set_host_pm_flags(func, pm_flag);
@@ -2239,6 +2313,8 @@ static int ath10k_sdio_probe(struct sdio_func *func,
 		goto err_free_wq;
 	}
 
+	timer_setup(&ar_sdio->sleep_timer, ath10k_sdio_sleep_timer_handler, 0);
+
 	return 0;
 
 err_free_wq:
diff --git a/drivers/net/wireless/ath/ath10k/sdio.h b/drivers/net/wireless/ath/ath10k/sdio.h
index 1c987494ad22..29523600887d 100644
--- a/drivers/net/wireless/ath/ath10k/sdio.h
+++ b/drivers/net/wireless/ath/ath10k/sdio.h
@@ -98,6 +98,20 @@
 #define ATH10K_FIFO_TIMEOUT_AND_CHIP_CONTROL_DISABLE_SLEEP_OFF 0xFFFEFFFF
 #define ATH10K_FIFO_TIMEOUT_AND_CHIP_CONTROL_DISABLE_SLEEP_ON 0x10000
 
+enum sdio_mbox_state {
+	SDIO_MBOX_UNKNOWN_STATE = 0,
+	SDIO_MBOX_REQUEST_TO_SLEEP_STATE = 1,
+	SDIO_MBOX_SLEEP_STATE = 2,
+	SDIO_MBOX_AWAKE_STATE = 3,
+};
+
+#define ATH10K_CIS_READ_WAIT_4_RTC_CYCLE_IN_US	125
+#define ATH10K_CIS_RTC_STATE_ADDR		0x1138
+#define ATH10K_CIS_RTC_STATE_ON			0x01
+#define ATH10K_CIS_XTAL_SETTLE_DURATION_IN_US	1500
+#define ATH10K_CIS_READ_RETRY			10
+#define ATH10K_MIN_SLEEP_INACTIVITY_TIME_MS	50
+
 /* TODO: remove this and use skb->cb instead, much cleaner approach */
 struct ath10k_sdio_bus_request {
 	struct list_head list;
@@ -218,6 +232,8 @@ struct ath10k_sdio {
 	spinlock_t wr_async_lock;
 
 	struct work_struct async_work_rx;
+	struct timer_list sleep_timer;
+	enum sdio_mbox_state mbox_state;
 };
 
 static inline struct ath10k_sdio *ath10k_sdio_priv(struct ath10k *ar)
-- 
cgit v1.2.3-59-g8ed1b


From 58921763210315fe96f590d9edb4f3952f8526ce Mon Sep 17 00:00:00 2001
From: Kalle Valo <kvalo@codeaurora.org>
Date: Thu, 16 Apr 2020 14:50:58 +0300
Subject: ath10k: sdio: remove _hif_ prefix from functions not part of hif
 interface

The _hif_ prefix should be used only on functions part of ath10k_hif_ops, so
remove it from functions which should not have it.

No functional changes, compile tested only.

Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1587037859-28873-4-git-send-email-kvalo@codeaurora.org
---
 drivers/net/wireless/ath/ath10k/sdio.c | 34 +++++++++++++++++-----------------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/sdio.c b/drivers/net/wireless/ath/ath10k/sdio.c
index 1626976293c7..884e1a85e29f 100644
--- a/drivers/net/wireless/ath/ath10k/sdio.c
+++ b/drivers/net/wireless/ath/ath10k/sdio.c
@@ -1378,7 +1378,7 @@ static int ath10k_sdio_read_rtc_state(struct ath10k_sdio *ar_sdio, unsigned char
 	return ret;
 }
 
-static int ath10k_sdio_hif_set_mbox_sleep(struct ath10k *ar, bool enable_sleep)
+static int ath10k_sdio_set_mbox_sleep(struct ath10k *ar, bool enable_sleep)
 {
 	struct ath10k_sdio *ar_sdio = ath10k_sdio_priv(ar);
 	u32 val;
@@ -1459,7 +1459,7 @@ static void ath10k_sdio_write_async_work(struct work_struct *work)
 
 		if (req->address >= mbox_info->htc_addr &&
 		    ar_sdio->mbox_state == SDIO_MBOX_SLEEP_STATE) {
-			ath10k_sdio_hif_set_mbox_sleep(ar, false);
+			ath10k_sdio_set_mbox_sleep(ar, false);
 			mod_timer(&ar_sdio->sleep_timer, jiffies +
 				  msecs_to_jiffies(ATH10K_MIN_SLEEP_INACTIVITY_TIME_MS));
 		}
@@ -1471,7 +1471,7 @@ static void ath10k_sdio_write_async_work(struct work_struct *work)
 	spin_unlock_bh(&ar_sdio->wr_async_lock);
 
 	if (ar_sdio->mbox_state == SDIO_MBOX_REQUEST_TO_SLEEP_STATE)
-		ath10k_sdio_hif_set_mbox_sleep(ar, true);
+		ath10k_sdio_set_mbox_sleep(ar, true);
 }
 
 static int ath10k_sdio_prep_async_req(struct ath10k *ar, u32 addr,
@@ -1538,7 +1538,7 @@ static void ath10k_sdio_irq_handler(struct sdio_func *func)
 
 /* sdio HIF functions */
 
-static int ath10k_sdio_hif_disable_intrs(struct ath10k *ar)
+static int ath10k_sdio_disable_intrs(struct ath10k *ar)
 {
 	struct ath10k_sdio *ar_sdio = ath10k_sdio_priv(ar);
 	struct ath10k_sdio_irq_data *irq_data = &ar_sdio->irq_data;
@@ -1594,7 +1594,7 @@ static int ath10k_sdio_hif_power_up(struct ath10k *ar,
 
 	ar_sdio->is_disabled = false;
 
-	ret = ath10k_sdio_hif_disable_intrs(ar);
+	ret = ath10k_sdio_disable_intrs(ar);
 	if (ret)
 		return ret;
 
@@ -1612,7 +1612,7 @@ static void ath10k_sdio_hif_power_down(struct ath10k *ar)
 	ath10k_dbg(ar, ATH10K_DBG_BOOT, "sdio power off\n");
 
 	del_timer_sync(&ar_sdio->sleep_timer);
-	ath10k_sdio_hif_set_mbox_sleep(ar, true);
+	ath10k_sdio_set_mbox_sleep(ar, true);
 
 	/* Disable the card */
 	sdio_claim_host(ar_sdio->func);
@@ -1666,7 +1666,7 @@ static int ath10k_sdio_hif_tx_sg(struct ath10k *ar, u8 pipe_id,
 	return 0;
 }
 
-static int ath10k_sdio_hif_enable_intrs(struct ath10k *ar)
+static int ath10k_sdio_enable_intrs(struct ath10k *ar)
 {
 	struct ath10k_sdio *ar_sdio = ath10k_sdio_priv(ar);
 	struct ath10k_sdio_irq_data *irq_data = &ar_sdio->irq_data;
@@ -1749,8 +1749,8 @@ out:
 	return ret;
 }
 
-static int ath10k_sdio_hif_diag_read32(struct ath10k *ar, u32 address,
-				       u32 *value)
+static int ath10k_sdio_diag_read32(struct ath10k *ar, u32 address,
+				   u32 *value)
 {
 	__le32 *val;
 	int ret;
@@ -1803,7 +1803,7 @@ static int ath10k_sdio_hif_start_post(struct ath10k *ar)
 
 	addr = host_interest_item_address(HI_ITEM(hi_acs_flags));
 
-	ret = ath10k_sdio_hif_diag_read32(ar, addr, &val);
+	ret = ath10k_sdio_diag_read32(ar, addr, &val);
 	if (ret) {
 		ath10k_warn(ar, "unable to read hi_acs_flags : %d\n", ret);
 		return ret;
@@ -1819,7 +1819,7 @@ static int ath10k_sdio_hif_start_post(struct ath10k *ar)
 		ar_sdio->swap_mbox = false;
 	}
 
-	ath10k_sdio_hif_set_mbox_sleep(ar, true);
+	ath10k_sdio_set_mbox_sleep(ar, true);
 
 	return 0;
 }
@@ -1831,7 +1831,7 @@ static int ath10k_sdio_get_htt_tx_complete(struct ath10k *ar)
 
 	addr = host_interest_item_address(HI_ITEM(hi_acs_flags));
 
-	ret = ath10k_sdio_hif_diag_read32(ar, addr, &val);
+	ret = ath10k_sdio_diag_read32(ar, addr, &val);
 	if (ret) {
 		ath10k_warn(ar,
 			    "unable to read hi_acs_flags for htt tx comple : %d\n", ret);
@@ -1860,7 +1860,7 @@ static int ath10k_sdio_hif_start(struct ath10k *ar)
 	 * request before interrupts are disabled.
 	 */
 	msleep(20);
-	ret = ath10k_sdio_hif_disable_intrs(ar);
+	ret = ath10k_sdio_disable_intrs(ar);
 	if (ret)
 		return ret;
 
@@ -1882,19 +1882,19 @@ static int ath10k_sdio_hif_start(struct ath10k *ar)
 
 	sdio_release_host(ar_sdio->func);
 
-	ret = ath10k_sdio_hif_enable_intrs(ar);
+	ret = ath10k_sdio_enable_intrs(ar);
 	if (ret)
 		ath10k_warn(ar, "failed to enable sdio interrupts: %d\n", ret);
 
 	/* Enable sleep and then disable it again */
-	ret = ath10k_sdio_hif_set_mbox_sleep(ar, true);
+	ret = ath10k_sdio_set_mbox_sleep(ar, true);
 	if (ret)
 		return ret;
 
 	/* Wait for 20ms for the written value to take effect */
 	msleep(20);
 
-	ret = ath10k_sdio_hif_set_mbox_sleep(ar, false);
+	ret = ath10k_sdio_set_mbox_sleep(ar, false);
 	if (ret)
 		return ret;
 
@@ -2148,7 +2148,7 @@ static int ath10k_sdio_pm_suspend(struct device *device)
 	if (!device_may_wakeup(ar->dev))
 		return 0;
 
-	ath10k_sdio_hif_set_mbox_sleep(ar, true);
+	ath10k_sdio_set_mbox_sleep(ar, true);
 
 	pm_flag = MMC_PM_KEEP_POWER;
 
-- 
cgit v1.2.3-59-g8ed1b


From 96c64857983fdc623fa5899afdb0310bef196f68 Mon Sep 17 00:00:00 2001
From: Kalle Valo <kvalo@codeaurora.org>
Date: Thu, 16 Apr 2020 14:50:59 +0300
Subject: ath10k: hif: make send_complete_check op optional

That way we don't need to have an empty function in sdio.c.

No functional changes, compile tested only.

Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1587037859-28873-5-git-send-email-kvalo@codeaurora.org
---
 drivers/net/wireless/ath/ath10k/hif.h  |  3 ++-
 drivers/net/wireless/ath/ath10k/sdio.c | 12 ------------
 drivers/net/wireless/ath/ath10k/usb.c  | 12 ------------
 3 files changed, 2 insertions(+), 25 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/hif.h b/drivers/net/wireless/ath/ath10k/hif.h
index 2c5d61d98337..9e45fd9073a6 100644
--- a/drivers/net/wireless/ath/ath10k/hif.h
+++ b/drivers/net/wireless/ath/ath10k/hif.h
@@ -170,7 +170,8 @@ static inline void ath10k_hif_get_default_pipe(struct ath10k *ar,
 static inline void ath10k_hif_send_complete_check(struct ath10k *ar,
 						  u8 pipe_id, int force)
 {
-	ar->hif.ops->send_complete_check(ar, pipe_id, force);
+	if (ar->hif.ops->send_complete_check)
+		ar->hif.ops->send_complete_check(ar, pipe_id, force);
 }
 
 static inline u16 ath10k_hif_get_free_queue_number(struct ath10k *ar,
diff --git a/drivers/net/wireless/ath/ath10k/sdio.c b/drivers/net/wireless/ath/ath10k/sdio.c
index 884e1a85e29f..e2aff2254a40 100644
--- a/drivers/net/wireless/ath/ath10k/sdio.c
+++ b/drivers/net/wireless/ath/ath10k/sdio.c
@@ -2101,17 +2101,6 @@ static void ath10k_sdio_hif_get_default_pipe(struct ath10k *ar,
 	*dl_pipe = 0;
 }
 
-/* This op is currently only used by htc_wait_target if the HTC ready
- * message times out. It is not applicable for SDIO since there is nothing
- * we can do if the HTC ready message does not arrive in time.
- * TODO: Make this op non mandatory by introducing a NULL check in the
- * hif op wrapper.
- */
-static void ath10k_sdio_hif_send_complete_check(struct ath10k *ar,
-						u8 pipe, int force)
-{
-}
-
 static const struct ath10k_hif_ops ath10k_sdio_hif_ops = {
 	.tx_sg			= ath10k_sdio_hif_tx_sg,
 	.diag_read		= ath10k_sdio_hif_diag_read,
@@ -2123,7 +2112,6 @@ static const struct ath10k_hif_ops ath10k_sdio_hif_ops = {
 	.get_htt_tx_complete	= ath10k_sdio_get_htt_tx_complete,
 	.map_service_to_pipe	= ath10k_sdio_hif_map_service_to_pipe,
 	.get_default_pipe	= ath10k_sdio_hif_get_default_pipe,
-	.send_complete_check	= ath10k_sdio_hif_send_complete_check,
 	.power_up		= ath10k_sdio_hif_power_up,
 	.power_down		= ath10k_sdio_hif_power_down,
 #ifdef CONFIG_PM
diff --git a/drivers/net/wireless/ath/ath10k/usb.c b/drivers/net/wireless/ath/ath10k/usb.c
index 1e0343081be9..b7daf344d012 100644
--- a/drivers/net/wireless/ath/ath10k/usb.c
+++ b/drivers/net/wireless/ath/ath10k/usb.c
@@ -693,17 +693,6 @@ static int ath10k_usb_hif_map_service_to_pipe(struct ath10k *ar, u16 svc_id,
 	return 0;
 }
 
-/* This op is currently only used by htc_wait_target if the HTC ready
- * message times out. It is not applicable for USB since there is nothing
- * we can do if the HTC ready message does not arrive in time.
- * TODO: Make this op non mandatory by introducing a NULL check in the
- * hif op wrapper.
- */
-static void ath10k_usb_hif_send_complete_check(struct ath10k *ar,
-					       u8 pipe, int force)
-{
-}
-
 static int ath10k_usb_hif_power_up(struct ath10k *ar,
 				   enum ath10k_firmware_mode fw_mode)
 {
@@ -737,7 +726,6 @@ static const struct ath10k_hif_ops ath10k_usb_hif_ops = {
 	.stop			= ath10k_usb_hif_stop,
 	.map_service_to_pipe	= ath10k_usb_hif_map_service_to_pipe,
 	.get_default_pipe	= ath10k_usb_hif_get_default_pipe,
-	.send_complete_check	= ath10k_usb_hif_send_complete_check,
 	.get_free_queue_number	= ath10k_usb_hif_get_free_queue_number,
 	.power_up		= ath10k_usb_hif_power_up,
 	.power_down		= ath10k_usb_hif_power_down,
-- 
cgit v1.2.3-59-g8ed1b


From bec095ab477dcc11fbe448c6fae6c2c61a876f37 Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Sat, 18 Apr 2020 15:02:32 +0800
Subject: rtlwifi: rtl8188ee: use true,false for bool variables

Fix the following coccicheck warning:

drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c:70:1-34: WARNING:
Assignment of 0/1 to bool variable
drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c:72:1-34: WARNING:
Assignment of 0/1 to bool variable

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200418070236.9620-2-yanaijie@huawei.com
---
 drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c
index 4865639ac9ea..02b77521b5cd 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/sw.c
@@ -67,9 +67,9 @@ static int rtl88e_init_sw_vars(struct ieee80211_hw *hw)
 	char *fw_name;
 
 	rtl8188ee_bt_reg_init(hw);
-	rtlpriv->dm.dm_initialgain_enable = 1;
+	rtlpriv->dm.dm_initialgain_enable = true;
 	rtlpriv->dm.dm_flag = 0;
-	rtlpriv->dm.disable_framebursting = 0;
+	rtlpriv->dm.disable_framebursting = false;
 	rtlpriv->dm.thermalvalue = 0;
 	rtlpci->transmit_config = CFENDFORM | BIT(15);
 
-- 
cgit v1.2.3-59-g8ed1b


From 23c2ddb574c621cc2c5d9be0bb99a59f18f5863c Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Sat, 18 Apr 2020 15:02:33 +0800
Subject: rtlwifi: rtl8723ae: use true,false for bool variables

Fix the following coccicheck warning:

drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c:81:1-34: WARNING:
Assignment of 0/1 to bool variable
drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c:83:1-34: WARNING:
Assignment of 0/1 to bool variable

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200418070236.9620-3-yanaijie@huawei.com
---
 drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c
index ea86d5bf33d2..7828acb1de3f 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/sw.c
@@ -78,9 +78,9 @@ static int rtl8723e_init_sw_vars(struct ieee80211_hw *hw)
 
 	rtlpriv->btcoexist.btc_ops = rtl_btc_get_ops_pointer();
 
-	rtlpriv->dm.dm_initialgain_enable = 1;
+	rtlpriv->dm.dm_initialgain_enable = true;
 	rtlpriv->dm.dm_flag = 0;
-	rtlpriv->dm.disable_framebursting = 0;
+	rtlpriv->dm.disable_framebursting = false;
 	rtlpriv->dm.thermalvalue = 0;
 	rtlpci->transmit_config = CFENDFORM | BIT(12) | BIT(13);
 
-- 
cgit v1.2.3-59-g8ed1b


From c13a83b01010c94ad7fe68161fd4dae3767d3ffe Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Sat, 18 Apr 2020 15:02:34 +0800
Subject: rtlwifi: rtl8192ee: use true,false for bool variables

Fix the following coccicheck warning:

drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c:78:1-34: WARNING:
Assignment of 0/1 to bool variable
drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c:80:1-34: WARNING:
Assignment of 0/1 to bool variable

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200418070236.9620-4-yanaijie@huawei.com
---
 drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c
index b337d599b6f4..7a16563b3a5d 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/sw.c
@@ -75,9 +75,9 @@ static int rtl92ee_init_sw_vars(struct ieee80211_hw *hw)
 	rtlpci->msi_support = rtlpriv->cfg->mod_params->msi_support;
 	rtlpriv->btcoexist.btc_ops = rtl_btc_get_ops_pointer();
 
-	rtlpriv->dm.dm_initialgain_enable = 1;
+	rtlpriv->dm.dm_initialgain_enable = true;
 	rtlpriv->dm.dm_flag = 0;
-	rtlpriv->dm.disable_framebursting = 0;
+	rtlpriv->dm.disable_framebursting = false;
 	rtlpci->transmit_config = CFENDFORM | BIT(15);
 
 	/*just 2.4G band*/
-- 
cgit v1.2.3-59-g8ed1b


From 47361089d987c367bedd778eba66843601d347df Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Sat, 18 Apr 2020 15:02:35 +0800
Subject: rtlwifi: rtl8723be: use true,false for bool variables

Fix the following coccicheck warning:

drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c:77:1-34: WARNING:
Assignment of 0/1 to bool variable
drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c:79:1-34: WARNING:
Assignment of 0/1 to bool variable

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200418070236.9620-5-yanaijie@huawei.com
---
 drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c
index 36209ac5b208..d220e8955e37 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723be/sw.c
@@ -74,9 +74,9 @@ static int rtl8723be_init_sw_vars(struct ieee80211_hw *hw)
 	rtl8723be_bt_reg_init(hw);
 	rtlpriv->btcoexist.btc_ops = rtl_btc_get_ops_pointer();
 
-	rtlpriv->dm.dm_initialgain_enable = 1;
+	rtlpriv->dm.dm_initialgain_enable = true;
 	rtlpriv->dm.dm_flag = 0;
-	rtlpriv->dm.disable_framebursting = 0;
+	rtlpriv->dm.disable_framebursting = false;
 	rtlpriv->dm.thermalvalue = 0;
 	rtlpci->transmit_config = CFENDFORM | BIT(15) | BIT(24) | BIT(25);
 
-- 
cgit v1.2.3-59-g8ed1b


From e8277abd453d6824a92b42989a248969f4fbc988 Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Sat, 18 Apr 2020 15:02:36 +0800
Subject: rtlwifi: rtl8821ae: use true,false for bool variables

Fix the following coccicheck warning:

drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c:79:1-34: WARNING:
Assignment of 0/1 to bool variable
drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c:81:1-34: WARNING:
Assignment of 0/1 to bool variable

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200418070236.9620-6-yanaijie@huawei.com
---
 drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c
index d8df816753cb..950542a24e31 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8821ae/sw.c
@@ -76,9 +76,9 @@ static int rtl8821ae_init_sw_vars(struct ieee80211_hw *hw)
 	rtl8821ae_bt_reg_init(hw);
 	rtlpriv->btcoexist.btc_ops = rtl_btc_get_ops_pointer();
 
-	rtlpriv->dm.dm_initialgain_enable = 1;
+	rtlpriv->dm.dm_initialgain_enable = true;
 	rtlpriv->dm.dm_flag = 0;
-	rtlpriv->dm.disable_framebursting = 0;
+	rtlpriv->dm.disable_framebursting = false;
 	rtlpriv->dm.thermalvalue = 0;
 	rtlpci->transmit_config = CFENDFORM | BIT(15) | BIT(24) | BIT(25);
 
-- 
cgit v1.2.3-59-g8ed1b


From 887e74239805217c9c583584a382e66583e0556b Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Mon, 20 Apr 2020 12:26:58 +0800
Subject: rtlwifi: rtl8723ae: fix warning comparison to bool

Fix the following coccicheck warning:

drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c:617:14-20: WARNING:
Comparison to bool
drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c:622:13-19: WARNING:
Comparison to bool
drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c:627:14-20: WARNING:
Comparison to bool
drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c:632:13-19: WARNING:
Comparison to bool
drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c:937:5-13: WARNING:
Comparison to bool

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200420042658.18733-1-yanaijie@huawei.com
---
 drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c
index 655460f61bbc..7a46c6a9deae 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8723ae/hw.c
@@ -614,22 +614,22 @@ static bool _rtl8723e_llt_table_init(struct ieee80211_hw *hw)
 
 	for (i = 0; i < (txpktbuf_bndy - 1); i++) {
 		status = _rtl8723e_llt_write(hw, i, i + 1);
-		if (true != status)
+		if (!status)
 			return status;
 	}
 
 	status = _rtl8723e_llt_write(hw, (txpktbuf_bndy - 1), 0xFF);
-	if (true != status)
+	if (!status)
 		return status;
 
 	for (i = txpktbuf_bndy; i < maxpage; i++) {
 		status = _rtl8723e_llt_write(hw, i, (i + 1));
-		if (true != status)
+		if (!status)
 			return status;
 	}
 
 	status = _rtl8723e_llt_write(hw, maxpage, txpktbuf_bndy);
-	if (true != status)
+	if (!status)
 		return status;
 
 	rtl_write_byte(rtlpriv, REG_CR, 0xff);
@@ -934,7 +934,7 @@ int rtl8723e_hw_init(struct ieee80211_hw *hw)
 
 	rtlpriv->intf_ops->disable_aspm(hw);
 	rtstatus = _rtl8712e_init_mac(hw);
-	if (rtstatus != true) {
+	if (!rtstatus) {
 		pr_err("Init MAC failed\n");
 		err = 1;
 		goto exit;
-- 
cgit v1.2.3-59-g8ed1b


From 811853da541a6a9be335c1f9dc9f20ca8bde65ed Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Mon, 20 Apr 2020 13:50:47 +0800
Subject: rtw88: 8723d: Add basic chip capabilities

RTL8723DE is an 11n 1x1 2.4G single band chip with the
following capabilities:

 - TX/RX BD size: 16/8
 - TX/RX desc size: 40/24
 - physical/logical/protected efuse size: 512/512/96
 - TX gain index factor: 1
 - max TX power index: 0x3F
 - band: 2G
 - HT: support
 - VHT: Not support

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200420055054.14592-2-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/main.h          |  2 +
 drivers/net/wireless/realtek/rtw88/pci.c           |  3 ++
 drivers/net/wireless/realtek/rtw88/rtw8723d.c      | 44 ++++++++++++++++++++++
 drivers/net/wireless/realtek/rtw88/rtw8723d.h      |  8 ++++
 .../net/wireless/realtek/rtw88/rtw8723d_table.c    |  7 ++++
 .../net/wireless/realtek/rtw88/rtw8723d_table.h    |  8 ++++
 6 files changed, 72 insertions(+)
 create mode 100644 drivers/net/wireless/realtek/rtw88/rtw8723d.c
 create mode 100644 drivers/net/wireless/realtek/rtw88/rtw8723d.h
 create mode 100644 drivers/net/wireless/realtek/rtw88/rtw8723d_table.c
 create mode 100644 drivers/net/wireless/realtek/rtw88/rtw8723d_table.h

diff --git a/drivers/net/wireless/realtek/rtw88/main.h b/drivers/net/wireless/realtek/rtw88/main.h
index c9edcabd7c42..be74533320ad 100644
--- a/drivers/net/wireless/realtek/rtw88/main.h
+++ b/drivers/net/wireless/realtek/rtw88/main.h
@@ -41,6 +41,7 @@ extern unsigned int rtw_debug_mask;
 extern const struct ieee80211_ops rtw_ops;
 extern struct rtw_chip_info rtw8822b_hw_spec;
 extern struct rtw_chip_info rtw8822c_hw_spec;
+extern struct rtw_chip_info rtw8723d_hw_spec;
 
 #define RTW_MAX_CHANNEL_NUM_2G 14
 #define RTW_MAX_CHANNEL_NUM_5G 49
@@ -183,6 +184,7 @@ enum rtw_wireless_set {
 enum rtw_chip_type {
 	RTW_CHIP_TYPE_8822B,
 	RTW_CHIP_TYPE_8822C,
+	RTW_CHIP_TYPE_8723D,
 };
 
 enum rtw_tx_queue_type {
diff --git a/drivers/net/wireless/realtek/rtw88/pci.c b/drivers/net/wireless/realtek/rtw88/pci.c
index e37c71495c0d..b3e76b579af9 100644
--- a/drivers/net/wireless/realtek/rtw88/pci.c
+++ b/drivers/net/wireless/realtek/rtw88/pci.c
@@ -1572,6 +1572,9 @@ static const struct pci_device_id rtw_pci_id_table[] = {
 #endif
 #ifdef CONFIG_RTW88_8822CE
 	{ RTK_PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0xC822, rtw8822c_hw_spec) },
+#endif
+#ifdef CONFIG_RTW88_8723DE
+	{ RTK_PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0xD723, rtw8723d_hw_spec) },
 #endif
 	{},
 };
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.c b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
new file mode 100644
index 000000000000..cccf05ee6807
--- /dev/null
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+/* Copyright(c) 2018-2019  Realtek Corporation
+ */
+
+#include "main.h"
+#include "coex.h"
+#include "fw.h"
+#include "tx.h"
+#include "rx.h"
+#include "phy.h"
+#include "rtw8723d.h"
+#include "rtw8723d_table.h"
+#include "mac.h"
+#include "reg.h"
+#include "debug.h"
+
+static struct rtw_chip_ops rtw8723d_ops = {
+	.set_antenna		= NULL,
+};
+
+struct rtw_chip_info rtw8723d_hw_spec = {
+	.ops = &rtw8723d_ops,
+	.id = RTW_CHIP_TYPE_8723D,
+	.fw_name = "rtw88/rtw8723d_fw.bin",
+	.tx_pkt_desc_sz = 40,
+	.tx_buf_desc_sz = 16,
+	.rx_pkt_desc_sz = 24,
+	.rx_buf_desc_sz = 8,
+	.phy_efuse_size = 512,
+	.log_efuse_size = 512,
+	.ptct_efuse_size = 96 + 1,
+	.txgi_factor = 1,
+	.is_pwr_by_rate_dec = true,
+	.max_power_index = 0x3f,
+	.csi_buf_pg_num = 0,
+	.band = RTW_BAND_2G,
+	.ht_supported = true,
+	.vht_supported = false,
+	.lps_deep_mode_supported = 0,
+	.sys_func_en = 0xFD,
+};
+EXPORT_SYMBOL(rtw8723d_hw_spec);
+
+MODULE_FIRMWARE("rtw88/rtw8723d_fw.bin");
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.h b/drivers/net/wireless/realtek/rtw88/rtw8723d.h
new file mode 100644
index 000000000000..0b784cfc34c6
--- /dev/null
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+/* Copyright(c) 2018-2019  Realtek Corporation
+ */
+
+#ifndef __RTW8723D_H__
+#define __RTW8723D_H__
+
+#endif
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d_table.c b/drivers/net/wireless/realtek/rtw88/rtw8723d_table.c
new file mode 100644
index 000000000000..b22b4b0f2fcf
--- /dev/null
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d_table.c
@@ -0,0 +1,7 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+/* Copyright(c) 2018-2019  Realtek Corporation
+ */
+
+#include "main.h"
+#include "phy.h"
+#include "rtw8723d_table.h"
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d_table.h b/drivers/net/wireless/realtek/rtw88/rtw8723d_table.h
new file mode 100644
index 000000000000..ea5933ffd043
--- /dev/null
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d_table.h
@@ -0,0 +1,8 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+/* Copyright(c) 2018-2019  Realtek Corporation
+ */
+
+#ifndef __RTW8723D_TABLE_H__
+#define __RTW8723D_TABLE_H__
+
+#endif
-- 
cgit v1.2.3-59-g8ed1b


From 93ae973fb47df112326e9a3657302f990934b327 Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Mon, 20 Apr 2020 13:50:48 +0800
Subject: rtw88: 8723d: add beamform wrapper functions

8723D doesn't support beamform because rtw88 only supports VHT beamform
but 8723d doesn't have VHT capability. Though 8723d doesn't support
beamform, BSS_CHANGED_MU_GROUPS is still marked as changed when doing
disassociation. So, add wrapper functions for all beamform ops to make
sure they aren't NULL before calling.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200420055054.14592-3-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/bf.c       |  7 +++----
 drivers/net/wireless/realtek/rtw88/bf.h       | 22 ++++++++++++++++++++++
 drivers/net/wireless/realtek/rtw88/mac80211.c |  7 ++-----
 drivers/net/wireless/realtek/rtw88/main.c     |  7 +++----
 drivers/net/wireless/realtek/rtw88/rtw8723d.c |  3 +++
 5 files changed, 33 insertions(+), 13 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtw88/bf.c b/drivers/net/wireless/realtek/rtw88/bf.c
index b6d1d71f4d30..a5912da327e2 100644
--- a/drivers/net/wireless/realtek/rtw88/bf.c
+++ b/drivers/net/wireless/realtek/rtw88/bf.c
@@ -10,7 +10,6 @@
 void rtw_bf_disassoc(struct rtw_dev *rtwdev, struct ieee80211_vif *vif,
 		     struct ieee80211_bss_conf *bss_conf)
 {
-	struct rtw_chip_info *chip = rtwdev->chip;
 	struct rtw_vif *rtwvif = (struct rtw_vif *)vif->drv_priv;
 	struct rtw_bfee *bfee = &rtwvif->bfee;
 	struct rtw_bf_info *bfinfo = &rtwdev->bf_info;
@@ -23,7 +22,7 @@ void rtw_bf_disassoc(struct rtw_dev *rtwdev, struct ieee80211_vif *vif,
 	else if (bfee->role == RTW_BFEE_SU)
 		bfinfo->bfer_su_cnt--;
 
-	chip->ops->config_bfee(rtwdev, rtwvif, bfee, false);
+	rtw_chip_config_bfee(rtwdev, rtwvif, bfee, false);
 
 	bfee->role = RTW_BFEE_NONE;
 }
@@ -71,7 +70,7 @@ void rtw_bf_assoc(struct rtw_dev *rtwdev, struct ieee80211_vif *vif,
 		bfee->aid = bss_conf->aid;
 		bfinfo->bfer_mu_cnt++;
 
-		chip->ops->config_bfee(rtwdev, rtwvif, bfee, true);
+		rtw_chip_config_bfee(rtwdev, rtwvif, bfee, true);
 	} else if ((ic_vht_cap->cap & IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE) &&
 		   (vht_cap->cap & IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE)) {
 		if (bfinfo->bfer_su_cnt >= chip->bfer_su_max_num) {
@@ -97,7 +96,7 @@ void rtw_bf_assoc(struct rtw_dev *rtwdev, struct ieee80211_vif *vif,
 			}
 		}
 
-		chip->ops->config_bfee(rtwdev, rtwvif, bfee, true);
+		rtw_chip_config_bfee(rtwdev, rtwvif, bfee, true);
 	}
 
 out_unlock:
diff --git a/drivers/net/wireless/realtek/rtw88/bf.h b/drivers/net/wireless/realtek/rtw88/bf.h
index 96a8216dd11f..17855edb5006 100644
--- a/drivers/net/wireless/realtek/rtw88/bf.h
+++ b/drivers/net/wireless/realtek/rtw88/bf.h
@@ -89,4 +89,26 @@ void rtw_bf_set_gid_table(struct rtw_dev *rtwdev, struct ieee80211_vif *vif,
 void rtw_bf_phy_init(struct rtw_dev *rtwdev);
 void rtw_bf_cfg_csi_rate(struct rtw_dev *rtwdev, u8 rssi, u8 cur_rate,
 			 u8 fixrate_en, u8 *new_rate);
+static inline void rtw_chip_config_bfee(struct rtw_dev *rtwdev, struct rtw_vif *vif,
+					struct rtw_bfee *bfee, bool enable)
+{
+	if (rtwdev->chip->ops->config_bfee)
+		rtwdev->chip->ops->config_bfee(rtwdev, vif, bfee, enable);
+}
+
+static inline void rtw_chip_set_gid_table(struct rtw_dev *rtwdev,
+					  struct ieee80211_vif *vif,
+					  struct ieee80211_bss_conf *conf)
+{
+	if (rtwdev->chip->ops->set_gid_table)
+		rtwdev->chip->ops->set_gid_table(rtwdev, vif, conf);
+}
+
+static inline void rtw_chip_cfg_csi_rate(struct rtw_dev *rtwdev, u8 rssi, u8 cur_rate,
+					 u8 fixrate_en, u8 *new_rate)
+{
+	if (rtwdev->chip->ops->cfg_csi_rate)
+		rtwdev->chip->ops->cfg_csi_rate(rtwdev, rssi, cur_rate,
+						fixrate_en, new_rate);
+}
 #endif
diff --git a/drivers/net/wireless/realtek/rtw88/mac80211.c b/drivers/net/wireless/realtek/rtw88/mac80211.c
index a2e6ef4ad9ee..98d2ac22f6f6 100644
--- a/drivers/net/wireless/realtek/rtw88/mac80211.c
+++ b/drivers/net/wireless/realtek/rtw88/mac80211.c
@@ -375,11 +375,8 @@ static void rtw_ops_bss_info_changed(struct ieee80211_hw *hw,
 	if (changed & BSS_CHANGED_BEACON)
 		rtw_fw_download_rsvd_page(rtwdev);
 
-	if (changed & BSS_CHANGED_MU_GROUPS) {
-		struct rtw_chip_info *chip = rtwdev->chip;
-
-		chip->ops->set_gid_table(rtwdev, vif, conf);
-	}
+	if (changed & BSS_CHANGED_MU_GROUPS)
+		rtw_chip_set_gid_table(rtwdev, vif, conf);
 
 	if (changed & BSS_CHANGED_ERP_SLOT)
 		rtw_conf_tx(rtwdev, rtwvif);
diff --git a/drivers/net/wireless/realtek/rtw88/main.c b/drivers/net/wireless/realtek/rtw88/main.c
index 1e1d2c774287..6dfe4895c352 100644
--- a/drivers/net/wireless/realtek/rtw88/main.c
+++ b/drivers/net/wireless/realtek/rtw88/main.c
@@ -137,7 +137,6 @@ struct rtw_watch_dog_iter_data {
 static void rtw_dynamic_csi_rate(struct rtw_dev *rtwdev, struct rtw_vif *rtwvif)
 {
 	struct rtw_bf_info *bf_info = &rtwdev->bf_info;
-	struct rtw_chip_info *chip = rtwdev->chip;
 	u8 fix_rate_enable = 0;
 	u8 new_csi_rate_idx;
 
@@ -145,9 +144,9 @@ static void rtw_dynamic_csi_rate(struct rtw_dev *rtwdev, struct rtw_vif *rtwvif)
 	    rtwvif->bfee.role != RTW_BFEE_MU)
 		return;
 
-	chip->ops->cfg_csi_rate(rtwdev, rtwdev->dm_info.min_rssi,
-				bf_info->cur_csi_rpt_rate,
-				fix_rate_enable, &new_csi_rate_idx);
+	rtw_chip_cfg_csi_rate(rtwdev, rtwdev->dm_info.min_rssi,
+			      bf_info->cur_csi_rpt_rate,
+			      fix_rate_enable, &new_csi_rate_idx);
 
 	if (new_csi_rate_idx != bf_info->cur_csi_rpt_rate)
 		bf_info->cur_csi_rpt_rate = new_csi_rate_idx;
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.c b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
index cccf05ee6807..5798a5804af3 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8723d.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
@@ -16,6 +16,9 @@
 
 static struct rtw_chip_ops rtw8723d_ops = {
 	.set_antenna		= NULL,
+	.config_bfee		= NULL,
+	.set_gid_table		= NULL,
+	.cfg_csi_rate		= NULL,
 };
 
 struct rtw_chip_info rtw8723d_hw_spec = {
-- 
cgit v1.2.3-59-g8ed1b


From c57bd7c3af9974ad432c46c0373a70d75a2d9e08 Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Mon, 20 Apr 2020 13:50:49 +0800
Subject: rtw88: 8723d: Add power sequence

Add corresponding power sequence for 8723D devices

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200420055054.14592-4-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/main.h     |   1 +
 drivers/net/wireless/realtek/rtw88/rtw8723d.c | 403 ++++++++++++++++++++++++++
 2 files changed, 404 insertions(+)

diff --git a/drivers/net/wireless/realtek/rtw88/main.h b/drivers/net/wireless/realtek/rtw88/main.h
index be74533320ad..e852ab194315 100644
--- a/drivers/net/wireless/realtek/rtw88/main.h
+++ b/drivers/net/wireless/realtek/rtw88/main.h
@@ -847,6 +847,7 @@ struct rtw_chip_ops {
 #define RTW_PWR_INTF_PCI_MSK	BIT(2)
 #define RTW_PWR_INTF_ALL_MSK	(BIT(0) | BIT(1) | BIT(2) | BIT(3))
 
+#define RTW_PWR_CUT_TEST_MSK	BIT(0)
 #define RTW_PWR_CUT_A_MSK	BIT(1)
 #define RTW_PWR_CUT_B_MSK	BIT(2)
 #define RTW_PWR_CUT_C_MSK	BIT(3)
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.c b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
index 5798a5804af3..5b97730f1407 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8723d.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
@@ -21,6 +21,407 @@ static struct rtw_chip_ops rtw8723d_ops = {
 	.cfg_csi_rate		= NULL,
 };
 
+static const struct rtw_pwr_seq_cmd trans_carddis_to_cardemu_8723d[] = {
+	{0x0005,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(3) | BIT(7), 0},
+	{0x0086,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_SDIO_MSK,
+	 RTW_PWR_ADDR_SDIO,
+	 RTW_PWR_CMD_WRITE, BIT(0), 0},
+	{0x0086,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_SDIO_MSK,
+	 RTW_PWR_ADDR_SDIO,
+	 RTW_PWR_CMD_POLLING, BIT(1), BIT(1)},
+	{0x004A,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_USB_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(0), 0},
+	{0x0005,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(3) | BIT(4), 0},
+	{0x0023,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_SDIO_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(4), 0},
+	{0x0301,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_PCI_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, 0xFF, 0},
+	{0xFFFF,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 0,
+	 RTW_PWR_CMD_END, 0, 0},
+};
+
+static const struct rtw_pwr_seq_cmd trans_cardemu_to_act_8723d[] = {
+	{0x0020,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_USB_MSK | RTW_PWR_INTF_SDIO_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(0), BIT(0)},
+	{0x0001,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_USB_MSK | RTW_PWR_INTF_SDIO_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_DELAY, 1, RTW_PWR_DELAY_MS},
+	{0x0000,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_USB_MSK | RTW_PWR_INTF_SDIO_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(5), 0},
+	{0x0005,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, (BIT(4) | BIT(3) | BIT(2)), 0},
+	{0x0075,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_PCI_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(0), BIT(0)},
+	{0x0006,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_POLLING, BIT(1), BIT(1)},
+	{0x0075,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_PCI_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(0), 0},
+	{0x0006,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(0), BIT(0)},
+	{0x0005,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_POLLING, (BIT(1) | BIT(0)), 0},
+	{0x0005,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(7), 0},
+	{0x0005,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, (BIT(4) | BIT(3)), 0},
+	{0x0005,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(0), BIT(0)},
+	{0x0005,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_POLLING, BIT(0), 0},
+	{0x0010,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(6), BIT(6)},
+	{0x0049,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(1), BIT(1)},
+	{0x0063,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(1), BIT(1)},
+	{0x0062,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(1), 0},
+	{0x0058,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(0), BIT(0)},
+	{0x005A,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(1), BIT(1)},
+	{0x0068,
+	 RTW_PWR_CUT_TEST_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(3), BIT(3)},
+	{0x0069,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(6), BIT(6)},
+	{0x001f,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, 0xFF, 0x00},
+	{0x0077,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, 0xFF, 0x00},
+	{0x001f,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, 0xFF, 0x07},
+	{0x0077,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, 0xFF, 0x07},
+	{0xFFFF,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 0,
+	 RTW_PWR_CMD_END, 0, 0},
+};
+
+static const struct rtw_pwr_seq_cmd *card_enable_flow_8723d[] = {
+	trans_carddis_to_cardemu_8723d,
+	trans_cardemu_to_act_8723d,
+	NULL
+};
+
+static const struct rtw_pwr_seq_cmd trans_act_to_lps_8723d[] = {
+	{0x0301,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_PCI_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, 0xFF, 0xFF},
+	{0x0522,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, 0xFF, 0xFF},
+	{0x05F8,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_POLLING, 0xFF, 0},
+	{0x05F9,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_POLLING, 0xFF, 0},
+	{0x05FA,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_POLLING, 0xFF, 0},
+	{0x05FB,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_POLLING, 0xFF, 0},
+	{0x0002,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(0), 0},
+	{0x0002,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_DELAY, 0, RTW_PWR_DELAY_US},
+	{0x0002,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(1), 0},
+	{0x0100,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, 0xFF, 0x03},
+	{0x0101,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(1), 0},
+	{0x0093,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_SDIO_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, 0xFF, 0x00},
+	{0x0553,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(5), BIT(5)},
+	{0xFFFF,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 0,
+	 RTW_PWR_CMD_END, 0, 0},
+};
+
+static const struct rtw_pwr_seq_cmd trans_act_to_pre_carddis_8723d[] = {
+	{0x0003,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(2), 0},
+	{0x0080,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, 0xFF, 0},
+	{0xFFFF,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 0,
+	 RTW_PWR_CMD_END, 0, 0},
+};
+
+static const struct rtw_pwr_seq_cmd trans_act_to_cardemu_8723d[] = {
+	{0x0002,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(0), 0},
+	{0x0049,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(1), 0},
+	{0x0006,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(0), BIT(0)},
+	{0x0005,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(1), BIT(1)},
+	{0x0005,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_POLLING, BIT(1), 0},
+	{0x0010,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(6), 0},
+	{0x0000,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_USB_MSK | RTW_PWR_INTF_SDIO_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(5), BIT(5)},
+	{0x0020,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_USB_MSK | RTW_PWR_INTF_SDIO_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(0), 0},
+	{0xFFFF,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 0,
+	 RTW_PWR_CMD_END, 0, 0},
+};
+
+static const struct rtw_pwr_seq_cmd trans_cardemu_to_carddis_8723d[] = {
+	{0x0007,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_SDIO_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, 0xFF, 0x20},
+	{0x0005,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_USB_MSK | RTW_PWR_INTF_SDIO_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(3) | BIT(4), BIT(3)},
+	{0x0005,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_PCI_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(2), BIT(2)},
+	{0x0005,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_PCI_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(3) | BIT(4), BIT(3) | BIT(4)},
+	{0x004A,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_USB_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(0), 1},
+	{0x0023,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_SDIO_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(4), BIT(4)},
+	{0x0086,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_SDIO_MSK,
+	 RTW_PWR_ADDR_SDIO,
+	 RTW_PWR_CMD_WRITE, BIT(0), BIT(0)},
+	{0x0086,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_SDIO_MSK,
+	 RTW_PWR_ADDR_SDIO,
+	 RTW_PWR_CMD_POLLING, BIT(1), 0},
+	{0xFFFF,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 0,
+	 RTW_PWR_CMD_END, 0, 0},
+};
+
+static const struct rtw_pwr_seq_cmd trans_act_to_post_carddis_8723d[] = {
+	{0x001D,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(0), 0},
+	{0x001D,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(0), BIT(0)},
+	{0x001C,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, 0xFF, 0x0E},
+	{0xFFFF,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_ALL_MSK,
+	 0,
+	 RTW_PWR_CMD_END, 0, 0},
+};
+
+static const struct rtw_pwr_seq_cmd *card_disable_flow_8723d[] = {
+	trans_act_to_lps_8723d,
+	trans_act_to_pre_carddis_8723d,
+	trans_act_to_cardemu_8723d,
+	trans_cardemu_to_carddis_8723d,
+	trans_act_to_post_carddis_8723d,
+	NULL
+};
+
 struct rtw_chip_info rtw8723d_hw_spec = {
 	.ops = &rtw8723d_ops,
 	.id = RTW_CHIP_TYPE_8723D,
@@ -41,6 +442,8 @@ struct rtw_chip_info rtw8723d_hw_spec = {
 	.vht_supported = false,
 	.lps_deep_mode_supported = 0,
 	.sys_func_en = 0xFD,
+	.pwr_on_seq = card_enable_flow_8723d,
+	.pwr_off_seq = card_disable_flow_8723d,
 };
 EXPORT_SYMBOL(rtw8723d_hw_spec);
 
-- 
cgit v1.2.3-59-g8ed1b


From e0c27cdbbd414877864773152ad0291913e18eae Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Mon, 20 Apr 2020 13:50:50 +0800
Subject: rtw88: 8723d: Add RF read/write ops

8723D use SIPI to indirectly read RF register instead of directly read,
so introduce a new struct rtw_rf_sipi_addr and new function
rtw_phy_read_rf_sipi(). Since other chips don't use the new function,
only 8723D needs to fill struct rtw_rf_sipi_addr in rtw_chip_info.

Because there are two kinds of functions for reading RF registers now,
change rtw_phy_read_rf() to chip->ops->read_rf() in
rtw_phy_write_rf_reg_sipi() so that we can switch tp proper RF read
functions depends on the type of the chip.

Though 8723D is an 1x1 chip, it has two RF PHY and we can switch to
one of them, and that should be configured properly. Hence, add a
fix_rf_phy_num to struct rtw_chip_info to allow driver to set one of
the PHY's registers for 8723D, even it is only 1x1. Another variable
rf_phy_num is introduced to keep the constraint number of RF path we
can access, and its value is:
	rf_phy_num = (fix_rf_phy_num ? fix_rf_phy_num : rf_path_num)

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Zong-Zhe Yang <kevin_yang@realtek.com>
Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200420055054.14592-5-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/main.c     |  5 +++
 drivers/net/wireless/realtek/rtw88/main.h     | 10 +++++
 drivers/net/wireless/realtek/rtw88/phy.c      | 56 +++++++++++++++++++++++++--
 drivers/net/wireless/realtek/rtw88/phy.h      |  6 +++
 drivers/net/wireless/realtek/rtw88/rtw8723d.c | 12 ++++++
 5 files changed, 85 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtw88/main.c b/drivers/net/wireless/realtek/rtw88/main.c
index 6dfe4895c352..c851830132d0 100644
--- a/drivers/net/wireless/realtek/rtw88/main.c
+++ b/drivers/net/wireless/realtek/rtw88/main.c
@@ -472,6 +472,7 @@ static u8 hw_bw_cap_to_bitamp(u8 bw_cap)
 static void rtw_hw_config_rf_ant_num(struct rtw_dev *rtwdev, u8 hw_ant_num)
 {
 	struct rtw_hal *hal = &rtwdev->hal;
+	struct rtw_chip_info *chip = rtwdev->chip;
 
 	if (hw_ant_num == EFUSE_HW_CAP_IGNORE ||
 	    hw_ant_num >= hal->rf_path_num)
@@ -481,6 +482,8 @@ static void rtw_hw_config_rf_ant_num(struct rtw_dev *rtwdev, u8 hw_ant_num)
 	case 1:
 		hal->rf_type = RF_1T1R;
 		hal->rf_path_num = 1;
+		if (!chip->fix_rf_phy_num)
+			hal->rf_phy_num = hal->rf_path_num;
 		hal->antenna_tx = BB_PATH_A;
 		hal->antenna_rx = BB_PATH_A;
 		break;
@@ -1130,6 +1133,8 @@ static int rtw_chip_parameter_setup(struct rtw_dev *rtwdev)
 		hal->antenna_tx = BB_PATH_A;
 		hal->antenna_rx = BB_PATH_A;
 	}
+	hal->rf_phy_num = chip->fix_rf_phy_num ? chip->fix_rf_phy_num :
+			  hal->rf_path_num;
 
 	efuse->physical_size = chip->phy_efuse_size;
 	efuse->logical_size = chip->log_efuse_size;
diff --git a/drivers/net/wireless/realtek/rtw88/main.h b/drivers/net/wireless/realtek/rtw88/main.h
index e852ab194315..8f15fc113af0 100644
--- a/drivers/net/wireless/realtek/rtw88/main.h
+++ b/drivers/net/wireless/realtek/rtw88/main.h
@@ -529,6 +529,13 @@ struct rtw_reg_domain {
 	u8 domain;
 };
 
+struct rtw_rf_sipi_addr {
+	u32 hssi_1;
+	u32 hssi_2;
+	u32 lssi_read;
+	u32 lssi_read_pi;
+};
+
 struct rtw_backup_info {
 	u8 len;
 	u32 reg;
@@ -1087,6 +1094,8 @@ struct rtw_chip_info {
 	const struct rtw_hw_reg *dig;
 	u32 rf_base_addr[2];
 	u32 rf_sipi_addr[2];
+	const struct rtw_rf_sipi_addr *rf_sipi_read_addr;
+	u8 fix_rf_phy_num;
 
 	const struct rtw_table *mac_tbl;
 	const struct rtw_table *agc_tbl;
@@ -1571,6 +1580,7 @@ struct rtw_hal {
 	u8 sec_ch_offset;
 	u8 rf_type;
 	u8 rf_path_num;
+	u8 rf_phy_num;
 	u32 antenna_tx;
 	u32 antenna_rx;
 	u8 bfee_sts_cap;
diff --git a/drivers/net/wireless/realtek/rtw88/phy.c b/drivers/net/wireless/realtek/rtw88/phy.c
index 8793dd22188f..8489abfdc12e 100644
--- a/drivers/net/wireless/realtek/rtw88/phy.c
+++ b/drivers/net/wireless/realtek/rtw88/phy.c
@@ -679,7 +679,7 @@ u32 rtw_phy_read_rf(struct rtw_dev *rtwdev, enum rtw_rf_path rf_path,
 	const u32 *base_addr = chip->rf_base_addr;
 	u32 val, direct_addr;
 
-	if (rf_path >= hal->rf_path_num) {
+	if (rf_path >= hal->rf_phy_num) {
 		rtw_err(rtwdev, "unsupported rf path (%d)\n", rf_path);
 		return INV_RF_DATA;
 	}
@@ -693,6 +693,54 @@ u32 rtw_phy_read_rf(struct rtw_dev *rtwdev, enum rtw_rf_path rf_path,
 	return val;
 }
 
+u32 rtw_phy_read_rf_sipi(struct rtw_dev *rtwdev, enum rtw_rf_path rf_path,
+			 u32 addr, u32 mask)
+{
+	struct rtw_hal *hal = &rtwdev->hal;
+	struct rtw_chip_info *chip = rtwdev->chip;
+	const struct rtw_rf_sipi_addr *rf_sipi_addr;
+	const struct rtw_rf_sipi_addr *rf_sipi_addr_a;
+	u32 val32;
+	u32 en_pi;
+	u32 r_addr;
+	u32 shift;
+
+	if (rf_path >= hal->rf_phy_num) {
+		rtw_err(rtwdev, "unsupported rf path (%d)\n", rf_path);
+		return INV_RF_DATA;
+	}
+
+	if (!chip->rf_sipi_read_addr) {
+		rtw_err(rtwdev, "rf_sipi_read_addr isn't defined\n");
+		return INV_RF_DATA;
+	}
+
+	rf_sipi_addr = &chip->rf_sipi_read_addr[rf_path];
+	rf_sipi_addr_a = &chip->rf_sipi_read_addr[RF_PATH_A];
+
+	addr &= 0xff;
+
+	val32 = rtw_read32(rtwdev, rf_sipi_addr->hssi_2);
+	val32 = (val32 & ~LSSI_READ_ADDR_MASK) | (addr << 23);
+	rtw_write32(rtwdev, rf_sipi_addr->hssi_2, val32);
+
+	/* toggle read edge of path A */
+	val32 = rtw_read32(rtwdev, rf_sipi_addr_a->hssi_2);
+	rtw_write32(rtwdev, rf_sipi_addr_a->hssi_2, val32 & ~LSSI_READ_EDGE_MASK);
+	rtw_write32(rtwdev, rf_sipi_addr_a->hssi_2, val32 | LSSI_READ_EDGE_MASK);
+
+	udelay(120);
+
+	en_pi = rtw_read32_mask(rtwdev, rf_sipi_addr->hssi_1, BIT(8));
+	r_addr = en_pi ? rf_sipi_addr->lssi_read_pi : rf_sipi_addr->lssi_read;
+
+	val32 = rtw_read32_mask(rtwdev, r_addr, LSSI_READ_DATA_MASK);
+
+	shift = __ffs(mask);
+
+	return (val32 & mask) >> shift;
+}
+
 bool rtw_phy_write_rf_reg_sipi(struct rtw_dev *rtwdev, enum rtw_rf_path rf_path,
 			       u32 addr, u32 mask, u32 data)
 {
@@ -703,7 +751,7 @@ bool rtw_phy_write_rf_reg_sipi(struct rtw_dev *rtwdev, enum rtw_rf_path rf_path,
 	u32 old_data = 0;
 	u32 shift;
 
-	if (rf_path >= hal->rf_path_num) {
+	if (rf_path >= hal->rf_phy_num) {
 		rtw_err(rtwdev, "unsupported rf path (%d)\n", rf_path);
 		return false;
 	}
@@ -712,7 +760,7 @@ bool rtw_phy_write_rf_reg_sipi(struct rtw_dev *rtwdev, enum rtw_rf_path rf_path,
 	mask &= RFREG_MASK;
 
 	if (mask != RFREG_MASK) {
-		old_data = rtw_phy_read_rf(rtwdev, rf_path, addr, RFREG_MASK);
+		old_data = chip->ops->read_rf(rtwdev, rf_path, addr, RFREG_MASK);
 
 		if (old_data == INV_RF_DATA) {
 			rtw_err(rtwdev, "Write fail, rf is disabled\n");
@@ -740,7 +788,7 @@ bool rtw_phy_write_rf_reg(struct rtw_dev *rtwdev, enum rtw_rf_path rf_path,
 	const u32 *base_addr = chip->rf_base_addr;
 	u32 direct_addr;
 
-	if (rf_path >= hal->rf_path_num) {
+	if (rf_path >= hal->rf_phy_num) {
 		rtw_err(rtwdev, "unsupported rf path (%d)\n", rf_path);
 		return false;
 	}
diff --git a/drivers/net/wireless/realtek/rtw88/phy.h b/drivers/net/wireless/realtek/rtw88/phy.h
index af916d8784cd..413bf7165cc0 100644
--- a/drivers/net/wireless/realtek/rtw88/phy.h
+++ b/drivers/net/wireless/realtek/rtw88/phy.h
@@ -21,6 +21,8 @@ void rtw_phy_dynamic_mechanism(struct rtw_dev *rtwdev);
 u8 rtw_phy_rf_power_2_rssi(s8 *rf_power, u8 path_num);
 u32 rtw_phy_read_rf(struct rtw_dev *rtwdev, enum rtw_rf_path rf_path,
 		    u32 addr, u32 mask);
+u32 rtw_phy_read_rf_sipi(struct rtw_dev *rtwdev, enum rtw_rf_path rf_path,
+			 u32 addr, u32 mask);
 bool rtw_phy_write_rf_reg_sipi(struct rtw_dev *rtwdev, enum rtw_rf_path rf_path,
 			       u32 addr, u32 mask, u32 data);
 bool rtw_phy_write_rf_reg(struct rtw_dev *rtwdev, enum rtw_rf_path rf_path,
@@ -178,4 +180,8 @@ enum rtw_phy_cck_pd_lv {
 
 #define CCK_FA_AVG_RESET 0xffffffff
 
+#define LSSI_READ_ADDR_MASK	0x7f800000
+#define LSSI_READ_EDGE_MASK	0x80000000
+#define LSSI_READ_DATA_MASK	0xfffff
+
 #endif
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.c b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
index 5b97730f1407..679c6c19516c 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8723d.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
@@ -15,6 +15,8 @@
 #include "debug.h"
 
 static struct rtw_chip_ops rtw8723d_ops = {
+	.read_rf		= rtw_phy_read_rf_sipi,
+	.write_rf		= rtw_phy_write_rf_reg_sipi,
 	.set_antenna		= NULL,
 	.config_bfee		= NULL,
 	.set_gid_table		= NULL,
@@ -422,6 +424,13 @@ static const struct rtw_pwr_seq_cmd *card_disable_flow_8723d[] = {
 	NULL
 };
 
+static const struct rtw_rf_sipi_addr rtw8723d_rf_sipi_addr[] = {
+	[RF_PATH_A] = { .hssi_1 = 0x820, .lssi_read    = 0x8a0,
+			.hssi_2 = 0x824, .lssi_read_pi = 0x8b8},
+	[RF_PATH_B] = { .hssi_1 = 0x828, .lssi_read    = 0x8a4,
+			.hssi_2 = 0x82c, .lssi_read_pi = 0x8bc},
+};
+
 struct rtw_chip_info rtw8723d_hw_spec = {
 	.ops = &rtw8723d_ops,
 	.id = RTW_CHIP_TYPE_8723D,
@@ -444,6 +453,9 @@ struct rtw_chip_info rtw8723d_hw_spec = {
 	.sys_func_en = 0xFD,
 	.pwr_on_seq = card_enable_flow_8723d,
 	.pwr_off_seq = card_disable_flow_8723d,
+	.rf_sipi_addr = {0x840, 0x844},
+	.rf_sipi_read_addr = rtw8723d_rf_sipi_addr,
+	.fix_rf_phy_num = 2,
 };
 EXPORT_SYMBOL(rtw8723d_hw_spec);
 
-- 
cgit v1.2.3-59-g8ed1b


From 9874f6851e47f674a23fc12969de31dbdf469f3d Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Mon, 20 Apr 2020 13:50:51 +0800
Subject: rtw88: 8723d: Add mac/bb/rf/agc/power_limit tables

Add corresponding parameter tables for 8723D devices.
Since 8723D devices currently have only one RFE type, there is
only one entry in rtw8723d_rfe_defs.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200420055054.14592-6-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/rtw8723d.c      |   11 +
 .../net/wireless/realtek/rtw88/rtw8723d_table.c    | 1189 ++++++++++++++++++++
 .../net/wireless/realtek/rtw88/rtw8723d_table.h    |    7 +
 3 files changed, 1207 insertions(+)

diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.c b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
index 679c6c19516c..4fe433549285 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8723d.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
@@ -431,6 +431,11 @@ static const struct rtw_rf_sipi_addr rtw8723d_rf_sipi_addr[] = {
 			.hssi_2 = 0x82c, .lssi_read_pi = 0x8bc},
 };
 
+static const struct rtw_rfe_def rtw8723d_rfe_defs[] = {
+	[0] = { .phy_pg_tbl	= &rtw8723d_bb_pg_tbl,
+		.txpwr_lmt_tbl	= &rtw8723d_txpwr_lmt_tbl,},
+};
+
 struct rtw_chip_info rtw8723d_hw_spec = {
 	.ops = &rtw8723d_ops,
 	.id = RTW_CHIP_TYPE_8723D,
@@ -456,6 +461,12 @@ struct rtw_chip_info rtw8723d_hw_spec = {
 	.rf_sipi_addr = {0x840, 0x844},
 	.rf_sipi_read_addr = rtw8723d_rf_sipi_addr,
 	.fix_rf_phy_num = 2,
+	.mac_tbl = &rtw8723d_mac_tbl,
+	.agc_tbl = &rtw8723d_agc_tbl,
+	.bb_tbl = &rtw8723d_bb_tbl,
+	.rf_tbl = {&rtw8723d_rf_a_tbl},
+	.rfe_defs = rtw8723d_rfe_defs,
+	.rfe_defs_size = ARRAY_SIZE(rtw8723d_rfe_defs),
 };
 EXPORT_SYMBOL(rtw8723d_hw_spec);
 
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d_table.c b/drivers/net/wireless/realtek/rtw88/rtw8723d_table.c
index b22b4b0f2fcf..27a22b392df0 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8723d_table.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d_table.c
@@ -5,3 +5,1192 @@
 #include "main.h"
 #include "phy.h"
 #include "rtw8723d_table.h"
+
+static const u32 rtw8723d_mac[] = {
+	0x020, 0x00000013,
+	0x02F, 0x00000010,
+	0x077, 0x00000007,
+	0x421, 0x0000000F,
+	0x428, 0x0000000A,
+	0x429, 0x00000010,
+	0x430, 0x00000000,
+	0x431, 0x00000000,
+	0x432, 0x00000000,
+	0x433, 0x00000001,
+	0x434, 0x00000002,
+	0x435, 0x00000003,
+	0x436, 0x00000005,
+	0x437, 0x00000007,
+	0x438, 0x00000000,
+	0x439, 0x00000000,
+	0x43A, 0x00000000,
+	0x43B, 0x00000001,
+	0x43C, 0x00000002,
+	0x43D, 0x00000003,
+	0x43E, 0x00000005,
+	0x43F, 0x00000007,
+	0x440, 0x0000005D,
+	0x441, 0x00000001,
+	0x442, 0x00000000,
+	0x444, 0x00000010,
+	0x445, 0x00000000,
+	0x446, 0x00000000,
+	0x447, 0x00000000,
+	0x448, 0x00000000,
+	0x449, 0x000000F0,
+	0x44A, 0x0000000F,
+	0x44B, 0x0000003E,
+	0x44C, 0x00000010,
+	0x44D, 0x00000000,
+	0x44E, 0x00000000,
+	0x44F, 0x00000000,
+	0x450, 0x00000000,
+	0x451, 0x000000F0,
+	0x452, 0x0000000F,
+	0x453, 0x00000000,
+	0x456, 0x0000005E,
+	0x460, 0x00000066,
+	0x461, 0x00000066,
+	0x4C8, 0x000000FF,
+	0x4C9, 0x00000008,
+	0x4CC, 0x000000FF,
+	0x4CD, 0x000000FF,
+	0x4CE, 0x00000001,
+	0x500, 0x00000026,
+	0x501, 0x000000A2,
+	0x502, 0x0000002F,
+	0x503, 0x00000000,
+	0x504, 0x00000028,
+	0x505, 0x000000A3,
+	0x506, 0x0000005E,
+	0x507, 0x00000000,
+	0x508, 0x0000002B,
+	0x509, 0x000000A4,
+	0x50A, 0x0000005E,
+	0x50B, 0x00000000,
+	0x50C, 0x0000004F,
+	0x50D, 0x000000A4,
+	0x50E, 0x00000000,
+	0x50F, 0x00000000,
+	0x512, 0x0000001C,
+	0x514, 0x0000000A,
+	0x516, 0x0000000A,
+	0x525, 0x0000004F,
+	0x550, 0x00000010,
+	0x551, 0x00000010,
+	0x559, 0x00000002,
+	0x55C, 0x00000028,
+	0x55D, 0x000000FF,
+	0x605, 0x00000030,
+	0x608, 0x0000000E,
+	0x609, 0x0000002A,
+	0x620, 0x000000FF,
+	0x621, 0x000000FF,
+	0x622, 0x000000FF,
+	0x623, 0x000000FF,
+	0x624, 0x000000FF,
+	0x625, 0x000000FF,
+	0x626, 0x000000FF,
+	0x627, 0x000000FF,
+	0x638, 0x00000028,
+	0x63C, 0x0000000A,
+	0x63D, 0x0000000A,
+	0x63E, 0x0000000C,
+	0x63F, 0x0000000C,
+	0x640, 0x00000040,
+	0x642, 0x00000040,
+	0x643, 0x00000000,
+	0x652, 0x000000C8,
+	0x66A, 0x000000B0,
+	0x66E, 0x00000005,
+	0x700, 0x00000021,
+	0x701, 0x00000043,
+	0x702, 0x00000065,
+	0x703, 0x00000087,
+	0x708, 0x00000021,
+	0x709, 0x00000043,
+	0x70A, 0x00000065,
+	0x70B, 0x00000087,
+	0x765, 0x00000018,
+	0x76E, 0x00000004,
+	0x7C0, 0x00000038,
+	0x7C2, 0x0000000F,
+	0x7C3, 0x000000C0,
+	0x073, 0x00000004,
+	0x7C4, 0x00000077,
+	0x07C, 0x00000003,
+	0x016, 0x000000B3,
+};
+
+RTW_DECL_TABLE_PHY_COND(rtw8723d_mac, rtw_phy_cfg_mac);
+
+static const u32 rtw8723d_agc[] = {
+	0xC78, 0xFE000101,
+	0xC78, 0xFD010101,
+	0xC78, 0xFC020101,
+	0xC78, 0xFB030101,
+	0xC78, 0xFA040101,
+	0xC78, 0xF9050101,
+	0xC78, 0xF8060101,
+	0xC78, 0xF7070101,
+	0xC78, 0xF6080101,
+	0xC78, 0xF5090101,
+	0xC78, 0xF40A0101,
+	0xC78, 0xF30B0101,
+	0xC78, 0xF20C0101,
+	0xC78, 0xF10D0101,
+	0xC78, 0xF00E0101,
+	0xC78, 0xEF0F0101,
+	0xC78, 0xEE100101,
+	0xC78, 0xED110101,
+	0xC78, 0xEC120101,
+	0xC78, 0xEB130101,
+	0xC78, 0xEA140101,
+	0xC78, 0xE9150101,
+	0xC78, 0xE8160101,
+	0xC78, 0xE7170101,
+	0xC78, 0xE6180101,
+	0xC78, 0xE5190101,
+	0xC78, 0xE41A0101,
+	0xC78, 0xE31B0101,
+	0xC78, 0xE21C0101,
+	0xC78, 0xE11D0101,
+	0xC78, 0xE01E0101,
+	0xC78, 0x861F0101,
+	0xC78, 0x85200101,
+	0xC78, 0x84210101,
+	0xC78, 0x83220101,
+	0xC78, 0x82230101,
+	0xC78, 0x81240101,
+	0xC78, 0x80250101,
+	0xC78, 0x44260101,
+	0xC78, 0x43270101,
+	0xC78, 0x42280101,
+	0xC78, 0x41290101,
+	0xC78, 0x402A0101,
+	0xC78, 0x022B0101,
+	0xC78, 0x012C0101,
+	0xC78, 0x002D0101,
+	0xC78, 0xC52E0001,
+	0xC78, 0xC42F0001,
+	0xC78, 0xC3300001,
+	0xC78, 0xC2310001,
+	0xC78, 0xC1320001,
+	0xC78, 0xC0330001,
+	0xC78, 0x04340001,
+	0xC78, 0x03350001,
+	0xC78, 0x02360001,
+	0xC78, 0x01370001,
+	0xC78, 0x00380001,
+	0xC78, 0x00390001,
+	0xC78, 0x003A0001,
+	0xC78, 0x003B0001,
+	0xC78, 0x003C0001,
+	0xC78, 0x003D0001,
+	0xC78, 0x003E0001,
+	0xC78, 0x003F0001,
+	0xC78, 0x6F002001,
+	0xC78, 0x6F012001,
+	0xC78, 0x6F022001,
+	0xC78, 0x6F032001,
+	0xC78, 0x6F042001,
+	0xC78, 0x6F052001,
+	0xC78, 0x6F062001,
+	0xC78, 0x6F072001,
+	0xC78, 0x6F082001,
+	0xC78, 0x6F092001,
+	0xC78, 0x6F0A2001,
+	0xC78, 0x6F0B2001,
+	0xC78, 0x6F0C2001,
+	0xC78, 0x6F0D2001,
+	0xC78, 0x6F0E2001,
+	0xC78, 0x6F0F2001,
+	0xC78, 0x6F102001,
+	0xC78, 0x6F112001,
+	0xC78, 0x6F122001,
+	0xC78, 0x6F132001,
+	0xC78, 0x6F142001,
+	0xC78, 0x6F152001,
+	0xC78, 0x6F162001,
+	0xC78, 0x6F172001,
+	0xC78, 0x6F182001,
+	0xC78, 0x6F192001,
+	0xC78, 0x6F1A2001,
+	0xC78, 0x6F1B2001,
+	0xC78, 0x6F1C2001,
+	0xC78, 0x6F1D2001,
+	0xC78, 0x6F1E2001,
+	0xC78, 0x6F1F2001,
+	0xC78, 0x6F202001,
+	0xC78, 0x6F212001,
+	0xC78, 0x6F222001,
+	0xC78, 0x6F232001,
+	0xC78, 0x6E242001,
+	0xC78, 0x6D252001,
+	0xC78, 0x6C262001,
+	0xC78, 0x6B272001,
+	0xC78, 0x6A282001,
+	0xC78, 0x69292001,
+	0xC78, 0x4B2A2001,
+	0xC78, 0x4A2B2001,
+	0xC78, 0x492C2001,
+	0xC78, 0x482D2001,
+	0xC78, 0x472E2001,
+	0xC78, 0x462F2001,
+	0xC78, 0x45302001,
+	0xC78, 0x44312001,
+	0xC78, 0x43322001,
+	0xC78, 0x42332001,
+	0xC78, 0x41342001,
+	0xC78, 0x40352001,
+	0xC78, 0x02362001,
+	0xC78, 0x01372001,
+	0xC78, 0x00382001,
+	0xC78, 0x00392001,
+	0xC78, 0x003A2001,
+	0xC78, 0x003B2001,
+	0xC78, 0x003C2001,
+	0xC78, 0x003D2001,
+	0xC78, 0x003E2001,
+	0xC78, 0x003F2001,
+	0xC78, 0x7F003101,
+	0xC78, 0x7F013101,
+	0xC78, 0x7F023101,
+	0xC78, 0x7F033101,
+	0xC78, 0x7F043101,
+	0xC78, 0x7F053101,
+	0xC78, 0x7F063101,
+	0xC78, 0x7F073101,
+	0xC78, 0x7E083101,
+	0xC78, 0x7D093101,
+	0xC78, 0x7C0A3101,
+	0xC78, 0x7B0B3101,
+	0xC78, 0x7A0C3101,
+	0xC78, 0x790D3101,
+	0xC78, 0x780E3101,
+	0xC78, 0x770F3101,
+	0xC78, 0x76103101,
+	0xC78, 0x75113101,
+	0xC78, 0x74123101,
+	0xC78, 0x73133101,
+	0xC78, 0x72143101,
+	0xC78, 0x71153101,
+	0xC78, 0x70163101,
+	0xC78, 0x6F173101,
+	0xC78, 0x6E183101,
+	0xC78, 0x6D193101,
+	0xC78, 0x6C1A3101,
+	0xC78, 0x6B1B3101,
+	0xC78, 0x6A1C3101,
+	0xC78, 0x691D3101,
+	0xC78, 0x681E3101,
+	0xC78, 0x4B1F3101,
+	0xC78, 0x4A203101,
+	0xC78, 0x49213101,
+	0xC78, 0x48223101,
+	0xC78, 0x47233101,
+	0xC78, 0x46243101,
+	0xC78, 0x45253101,
+	0xC78, 0x44263101,
+	0xC78, 0x43273101,
+	0xC78, 0x42283101,
+	0xC78, 0x41293101,
+	0xC78, 0x402A3101,
+	0xC78, 0x022B3101,
+	0xC78, 0x012C3101,
+	0xC78, 0x002D3101,
+	0xC78, 0x002E3101,
+	0xC78, 0x002F3101,
+	0xC78, 0x00303101,
+	0xC78, 0x00313101,
+	0xC78, 0x00323101,
+	0xC78, 0x00333101,
+	0xC78, 0x00343101,
+	0xC78, 0x00353101,
+	0xC78, 0x00363101,
+	0xC78, 0x00373101,
+	0xC78, 0x00383101,
+	0xC78, 0x00393101,
+	0xC78, 0x003A3101,
+	0xC78, 0x003B3101,
+	0xC78, 0x003C3101,
+	0xC78, 0x003D3101,
+	0xC78, 0x003E3101,
+	0xC78, 0x003F3101,
+	0xC78, 0xFE403101,
+	0xC78, 0xFD413101,
+	0xC78, 0xFC423101,
+	0xC78, 0xFB433101,
+	0xC78, 0xFA443101,
+	0xC78, 0xF9453101,
+	0xC78, 0xF8463101,
+	0xC78, 0xF7473101,
+	0xC78, 0xF6483101,
+	0xC78, 0xF5493101,
+	0xC78, 0xF44A3101,
+	0xC78, 0xF34B3101,
+	0xC78, 0xF24C3101,
+	0xC78, 0xF14D3101,
+	0xC78, 0xF04E3101,
+	0xC78, 0xEF4F3101,
+	0xC78, 0xEE503101,
+	0xC78, 0xED513101,
+	0xC78, 0xEC523101,
+	0xC78, 0xEB533101,
+	0xC78, 0xEA543101,
+	0xC78, 0xE9553101,
+	0xC78, 0xE8563101,
+	0xC78, 0xE7573101,
+	0xC78, 0xE6583101,
+	0xC78, 0xE5593101,
+	0xC78, 0xE45A3101,
+	0xC78, 0xE35B3101,
+	0xC78, 0xE25C3101,
+	0xC78, 0xE15D3101,
+	0xC78, 0xE05E3101,
+	0xC78, 0x865F3101,
+	0xC78, 0x85603101,
+	0xC78, 0x84613101,
+	0xC78, 0x83623101,
+	0xC78, 0x82633101,
+	0xC78, 0x81643101,
+	0xC78, 0x80653101,
+	0xC78, 0x80663101,
+	0xC78, 0x80673101,
+	0xC78, 0x80683101,
+	0xC78, 0x80693101,
+	0xC78, 0x806A3101,
+	0xC78, 0x806B3101,
+	0xC78, 0x806C3101,
+	0xC78, 0x806D3101,
+	0xC78, 0x806E3101,
+	0xC78, 0x806F3101,
+	0xC78, 0x80703101,
+	0xC78, 0x80713101,
+	0xC78, 0x80723101,
+	0xC78, 0x80733101,
+	0xC78, 0x80743101,
+	0xC78, 0x80753101,
+	0xC78, 0x80763101,
+	0xC78, 0x80773101,
+	0xC78, 0x80783101,
+	0xC78, 0x80793101,
+	0xC78, 0x807A3101,
+	0xC78, 0x807B3101,
+	0xC78, 0x807C3101,
+	0xC78, 0x807D3101,
+	0xC78, 0x807E3101,
+	0xC78, 0x807F3101,
+	0xC78, 0xEF402001,
+	0xC78, 0xEF412001,
+	0xC78, 0xEF422001,
+	0xC78, 0xEF432001,
+	0xC78, 0xEF442001,
+	0xC78, 0xEF452001,
+	0xC78, 0xEF462001,
+	0xC78, 0xEF472001,
+	0xC78, 0xEF482001,
+	0xC78, 0xEF492001,
+	0xC78, 0xEF4A2001,
+	0xC78, 0xEF4B2001,
+	0xC78, 0xEF4C2001,
+	0xC78, 0xEF4D2001,
+	0xC78, 0xEF4E2001,
+	0xC78, 0xEF4F2001,
+	0xC78, 0xEF502001,
+	0xC78, 0xEF512001,
+	0xC78, 0xEF522001,
+	0xC78, 0xEF532001,
+	0xC78, 0xEF542001,
+	0xC78, 0xEF552001,
+	0xC78, 0xEF562001,
+	0xC78, 0xEF572001,
+	0xC78, 0xEF582001,
+	0xC78, 0xEF592001,
+	0xC78, 0xEF5A2001,
+	0xC78, 0xEF5B2001,
+	0xC78, 0xEF5C2001,
+	0xC78, 0xEF5D2001,
+	0xC78, 0xEF5E2001,
+	0xC78, 0xEF5F2001,
+	0xC78, 0xEF602001,
+	0xC78, 0xEE612001,
+	0xC78, 0xED622001,
+	0xC78, 0xEC632001,
+	0xC78, 0xEB642001,
+	0xC78, 0xEA652001,
+	0xC78, 0xE9662001,
+	0xC78, 0xE8672001,
+	0xC78, 0xCB682001,
+	0xC78, 0xCA692001,
+	0xC78, 0xC96A2001,
+	0xC78, 0xC86B2001,
+	0xC78, 0xC76C2001,
+	0xC78, 0xC66D2001,
+	0xC78, 0xC56E2001,
+	0xC78, 0xC46F2001,
+	0xC78, 0xC3702001,
+	0xC78, 0xC2712001,
+	0xC78, 0xC1722001,
+	0xC78, 0xC0732001,
+	0xC78, 0x82742001,
+	0xC78, 0x81752001,
+	0xC78, 0x80762001,
+	0xC78, 0x80772001,
+	0xC78, 0x80782001,
+	0xC78, 0x80792001,
+	0xC78, 0x807A2001,
+	0xC78, 0x807B2001,
+	0xC78, 0x807C2001,
+	0xC78, 0x807D2001,
+	0xC78, 0x807E2001,
+	0xC78, 0x807F2001,
+	0xC78, 0xFA001101,
+	0xC78, 0xF9011101,
+	0xC78, 0xF8021101,
+	0xC78, 0xF7031101,
+	0xC78, 0xF6041101,
+	0xC78, 0xF5051101,
+	0xC78, 0xF4061101,
+	0xC78, 0xD7071101,
+	0xC78, 0xD6081101,
+	0xC78, 0xD5091101,
+	0xC78, 0xD40A1101,
+	0xC78, 0x970B1101,
+	0xC78, 0x960C1101,
+	0xC78, 0x950D1101,
+	0xC78, 0x940E1101,
+	0xC78, 0x930F1101,
+	0xC78, 0x92101101,
+	0xC78, 0x91111101,
+	0xC78, 0x90121101,
+	0xC78, 0x8F131101,
+	0xC78, 0x8E141101,
+	0xC78, 0x8D151101,
+	0xC78, 0x8C161101,
+	0xC78, 0x8B171101,
+	0xC78, 0x8A181101,
+	0xC78, 0x89191101,
+	0xC78, 0x881A1101,
+	0xC78, 0x871B1101,
+	0xC78, 0x861C1101,
+	0xC78, 0x851D1101,
+	0xC78, 0x841E1101,
+	0xC78, 0x831F1101,
+	0xC78, 0x82201101,
+	0xC78, 0x81211101,
+	0xC78, 0x80221101,
+	0xC78, 0x43231101,
+	0xC78, 0x42241101,
+	0xC78, 0x41251101,
+	0xC78, 0x04261101,
+	0xC78, 0x03271101,
+	0xC78, 0x02281101,
+	0xC78, 0x01291101,
+	0xC78, 0x002A1101,
+	0xC78, 0xC42B1001,
+	0xC78, 0xC32C1001,
+	0xC78, 0xC22D1001,
+	0xC78, 0xC12E1001,
+	0xC78, 0xC02F1001,
+	0xC78, 0x85301001,
+	0xC78, 0x84311001,
+	0xC78, 0x83321001,
+	0xC78, 0x82331001,
+	0xC78, 0x81341001,
+	0xC78, 0x80351001,
+	0xC78, 0x05361001,
+	0xC78, 0x04371001,
+	0xC78, 0x03381001,
+	0xC78, 0x02391001,
+	0xC78, 0x013A1001,
+	0xC78, 0x003B1001,
+	0xC78, 0x003C1001,
+	0xC78, 0x003D1001,
+	0xC78, 0x003E1001,
+	0xC78, 0x003F1001,
+	0xC50, 0x69553422,
+	0xC50, 0x69553420,
+};
+
+RTW_DECL_TABLE_PHY_COND(rtw8723d_agc, rtw_phy_cfg_agc);
+
+static const u32 rtw8723d_bb[] = {
+	0x800, 0x80046C00,
+	0x804, 0x00000003,
+	0x808, 0x0000FC00,
+	0x80C, 0x0000000A,
+	0x810, 0x10001331,
+	0x814, 0x020C3D10,
+	0x818, 0x00200385,
+	0x81C, 0x00000000,
+	0x820, 0x01000100,
+	0x824, 0x00390204,
+	0x828, 0x00000000,
+	0x82C, 0x00000000,
+	0x830, 0x00000000,
+	0x834, 0x00000000,
+	0x838, 0x00000000,
+	0x83C, 0x00000000,
+	0x840, 0x00010000,
+	0x844, 0x00000000,
+	0x848, 0x00000000,
+	0x84C, 0x00000000,
+	0x850, 0x00000000,
+	0x854, 0x00000000,
+	0x858, 0x569A11A9,
+	0x85C, 0x01000014,
+	0x860, 0x66F60110,
+	0x864, 0x461F0641,
+	0x868, 0x00000000,
+	0x86C, 0x27272700,
+	0x870, 0x07000460,
+	0x874, 0x25004000,
+	0x878, 0x00000808,
+	0x87C, 0x004F0201,
+	0x880, 0xB2002E12,
+	0x884, 0x00000007,
+	0x888, 0x00000000,
+	0x88C, 0xCCC000C0,
+	0x890, 0x00000800,
+	0x894, 0xFFFFFFFE,
+	0x898, 0x40302010,
+	0x89C, 0x00706050,
+	0x900, 0x00000000,
+	0x904, 0x00000023,
+	0x908, 0x00000000,
+	0x90C, 0x81121111,
+	0x910, 0x00000402,
+	0x914, 0x00000300,
+	0x920, 0x18C6318C,
+	0x924, 0x0000018C,
+	0x948, 0x99000000,
+	0x94C, 0x00000010,
+	0x950, 0x00003800,
+	0x954, 0x5A380000,
+	0x958, 0x4BC6D87A,
+	0x95C, 0x04EB9B79,
+	0x96C, 0x00000003,
+	0x970, 0x00000000,
+	0x974, 0x00000000,
+	0x978, 0x00000000,
+	0x97C, 0x13000000,
+	0x980, 0x00000000,
+	0xA00, 0x00D046C8,
+	0xA04, 0x80FF800C,
+	0xA08, 0x8C838300,
+	0xA0C, 0x2E20100F,
+	0xA10, 0x9500BB78,
+	0xA14, 0x1114D028,
+	0xA18, 0x00881117,
+	0xA1C, 0x89140F00,
+	0xA20, 0xE82C0001,
+	0xA24, 0x64B80C1C,
+	0xA28, 0x00008810,
+	0xA2C, 0x00D30000,
+	0xA70, 0x101FBF00,
+	0xA74, 0x00000007,
+	0xA78, 0x00008900,
+	0xA7C, 0x225B0606,
+	0xA80, 0x2180FA74,
+	0xA84, 0x00200000,
+	0xA88, 0x040C0000,
+	0xA8C, 0x12345678,
+	0xA90, 0xABCDEF00,
+	0xA94, 0x001B1B89,
+	0xA98, 0x00000000,
+	0xA9C, 0x00020000,
+	0xAA0, 0x00000000,
+	0xAA4, 0x0000000C,
+	0xAA8, 0xCA100008,
+	0xAAC, 0x01235667,
+	0xAB0, 0x00000000,
+	0xAB4, 0x20201402,
+	0xB2C, 0x00000000,
+	0xC00, 0x48071D40,
+	0xC04, 0x03A05611,
+	0xC08, 0x000000E4,
+	0xC0C, 0x6C6C6C6C,
+	0xC10, 0x28800000,
+	0xC14, 0x40000100,
+	0xC18, 0x08800000,
+	0xC1C, 0x40000100,
+	0xC20, 0x00000000,
+	0xC24, 0x00000000,
+	0xC28, 0x00000000,
+	0xC2C, 0x00000000,
+	0xC30, 0x69E9AC48,
+	0xC34, 0x31000040,
+	0xC38, 0x21688080,
+	0xC3C, 0x000016D4,
+	0xC40, 0x1F78403F,
+	0xC44, 0x00010036,
+	0xC48, 0xEC020107,
+	0xC4C, 0x007F037F,
+	0xC50, 0x69553420,
+	0xC54, 0x43BC0094,
+	0xC58, 0x00015969,
+	0xC5C, 0x00310492,
+	0xC60, 0x00280A00,
+	0xC64, 0x7112848B,
+	0xC68, 0x47C074FF,
+	0xC6C, 0x00000036,
+	0xC70, 0x2C7F000D,
+	0xC74, 0x020600DB,
+	0xC78, 0x0000001F,
+	0xC7C, 0x00B91612,
+	0xC80, 0x390000E4,
+	0xC84, 0x21F60000,
+	0xC88, 0x40000100,
+	0xC8C, 0x20200000,
+	0xC90, 0x00091521,
+	0xC94, 0x00000000,
+	0xC98, 0x00121820,
+	0xC9C, 0x00007F7F,
+	0xCA0, 0x00012000,
+	0xCA4, 0x800000A0,
+	0xCA8, 0x84E6C606,
+	0xCAC, 0x00000060,
+	0xCB0, 0x00000000,
+	0xCB4, 0x00000000,
+	0xCB8, 0x00000000,
+	0xCBC, 0x28000000,
+	0xCC0, 0x0010A3D0,
+	0xCC4, 0x00000F7D,
+	0xCC8, 0x000442D6,
+	0xCCC, 0x00000000,
+	0xCD0, 0x000001C8,
+	0xCD4, 0x001C8000,
+	0xCD8, 0x00000100,
+	0xCDC, 0x40100000,
+	0xCE0, 0x00222220,
+	0xCE4, 0x20000000,
+	0xCE8, 0x37644302,
+	0xCEC, 0x2F97D40C,
+	0xD00, 0x00030740,
+	0xD04, 0x40020401,
+	0xD08, 0x0000907F,
+	0xD0C, 0x20010201,
+	0xD10, 0xA0633333,
+	0xD14, 0x3333BC53,
+	0xD18, 0x7A8F5B6F,
+	0xD2C, 0xCC979975,
+	0xD30, 0x00000000,
+	0xD34, 0x40608000,
+	0xD38, 0x88000000,
+	0xD3C, 0xC0127343,
+	0xD40, 0x00000000,
+	0xD44, 0x00000000,
+	0xD48, 0x00000000,
+	0xD4C, 0x00000000,
+	0xD50, 0x00000038,
+	0xD54, 0x00000000,
+	0xD58, 0x00000282,
+	0xD5C, 0x30032064,
+	0xD60, 0x4653DE68,
+	0xD64, 0x04518A3C,
+	0xD68, 0x00002101,
+	0xE00, 0x2D2D2D2D,
+	0xE04, 0x2D2D2D2D,
+	0xE08, 0x0390272D,
+	0xE10, 0x2D2D2D2D,
+	0xE14, 0x2D2D2D2D,
+	0xE18, 0x2D2D2D2D,
+	0xE1C, 0x2D2D2D2D,
+	0xE28, 0x00000000,
+	0xE30, 0x1000DC1F,
+	0xE34, 0x10008C1F,
+	0xE38, 0x02140102,
+	0xE3C, 0x681604C2,
+	0xE40, 0x01007C00,
+	0xE44, 0x01004800,
+	0xE48, 0xFB000000,
+	0xE4C, 0x000028D1,
+	0xE50, 0x1000DC1F,
+	0xE54, 0x10008C1F,
+	0xE58, 0x02140102,
+	0xE5C, 0x28160D05,
+	0xE60, 0x00000008,
+	0xE68, 0x001B25A4,
+	0xE6C, 0x01C00014,
+	0xE70, 0x01C00016,
+	0xE74, 0x02000014,
+	0xE78, 0x02000014,
+	0xE7C, 0x02000014,
+	0xE80, 0x02000014,
+	0xE84, 0x01C00014,
+	0xE88, 0x02000014,
+	0xE8C, 0x01C00014,
+	0xED0, 0x01C00014,
+	0xED4, 0x01C00014,
+	0xED8, 0x01C00014,
+	0xEDC, 0x00000014,
+	0xEE0, 0x00000014,
+	0xEE8, 0x21555448,
+	0xEEC, 0x03C00014,
+	0xF14, 0x00000003,
+	0xF00, 0x00100300,
+	0xF08, 0x0000800B,
+	0xF0C, 0x0000F007,
+	0xF10, 0x0000A487,
+	0xF1C, 0x80000064,
+	0xF38, 0x00030155,
+	0xF3C, 0x0000003A,
+	0xF4C, 0x13000000,
+	0xF50, 0x00000000,
+	0xF18, 0x00000000,
+};
+
+RTW_DECL_TABLE_PHY_COND(rtw8723d_bb, rtw_phy_cfg_bb);
+
+static const struct rtw_phy_pg_cfg_pair rtw8723d_bb_pg[] = {
+	{ 0, 0, 0, 0x00000e08, 0x0000ff00, 0x00003200, },
+	{ 0, 0, 0, 0x0000086c, 0xffffff00, 0x32323200, },
+	{ 0, 0, 0, 0x00000e00, 0xffffffff, 0x32343434, },
+	{ 0, 0, 0, 0x00000e04, 0xffffffff, 0x28303032, },
+	{ 0, 0, 0, 0x00000e10, 0xffffffff, 0x30323234, },
+	{ 0, 0, 0, 0x00000e14, 0xffffffff, 0x26282830, },
+};
+
+RTW_DECL_TABLE_BB_PG(rtw8723d_bb_pg);
+
+static const u32 rtw8723d_rf_a[] = {
+		0x050, 0x0001C000,
+		0x049, 0x0004AA00,
+		0x000, 0x00010000,
+		0x0B1, 0x00054573,
+		0x0B4, 0x000508AB,
+		0x0B7, 0x00014787,
+		0x0B8, 0x000064CB,
+		0x01B, 0x00073A40,
+		0x051, 0x00038CAF,
+		0x052, 0x000FCCA3,
+		0x053, 0x00090F38,
+		0x054, 0x00011083,
+		0x057, 0x000D0000,
+		0x08D, 0x00000A1A,
+		0x082, 0x00082AAC,
+		0x08E, 0x00076940,
+		0x08F, 0x00088400,
+		0x061, 0x00038CAF,
+		0x062, 0x000FCCA3,
+		0x063, 0x00090F38,
+		0x064, 0x00011083,
+		0x067, 0x000D0000,
+		0x092, 0x00082AAC,
+		0x0EF, 0x00000400,
+		0x030, 0x000008CA,
+		0x030, 0x000018CA,
+		0x030, 0x000028CA,
+		0x030, 0x000038CA,
+		0x0EF, 0x00000000,
+		0x0EE, 0x00000400,
+		0x030, 0x000008CA,
+		0x030, 0x000018CA,
+		0x030, 0x000028CA,
+		0x030, 0x000038CA,
+		0x0EE, 0x00000000,
+		0x0EF, 0x00000100,
+		0x033, 0x00000000,
+		0x03F, 0x0000CCA3,
+		0x033, 0x00000001,
+		0x03F, 0x0000CCA3,
+		0x033, 0x00000002,
+		0x03F, 0x0000CCA3,
+		0x033, 0x00000003,
+		0x03F, 0x0000CCA3,
+		0x033, 0x00000004,
+		0x03F, 0x0000CCA3,
+		0x033, 0x00000005,
+		0x03F, 0x0000CCA3,
+		0x033, 0x00000006,
+		0x03F, 0x0000CCA3,
+		0x033, 0x00000007,
+		0x03F, 0x0000CCA3,
+		0x0EF, 0x00000000,
+		0x0EE, 0x00000100,
+		0x033, 0x00000000,
+		0x03F, 0x0000CCA3,
+		0x033, 0x00000001,
+		0x03F, 0x0000CCA3,
+		0x033, 0x00000002,
+		0x03F, 0x0000CCA3,
+		0x033, 0x00000003,
+		0x03F, 0x0000CCA3,
+		0x033, 0x00000004,
+		0x03F, 0x0000CCA3,
+		0x033, 0x00000005,
+		0x03F, 0x0000CCA3,
+		0x033, 0x00000006,
+		0x03F, 0x0000CCA3,
+		0x033, 0x00000007,
+		0x03F, 0x0000CCA3,
+		0x0EE, 0x00000000,
+		0x0EF, 0x00000800,
+		0x030, 0x0000002D,
+		0x030, 0x0000122C,
+		0x030, 0x0000222F,
+		0x030, 0x0000326C,
+		0x030, 0x0000466B,
+		0x030, 0x0000566E,
+		0x030, 0x000066EB,
+		0x030, 0x000077EC,
+		0x030, 0x000087EF,
+		0x030, 0x000097F2,
+		0x030, 0x0000A7F5,
+		0x0EF, 0x00000000,
+		0x0EE, 0x00000800,
+		0x030, 0x00000001,
+		0x030, 0x00001011,
+		0x030, 0x00002011,
+		0x030, 0x00003013,
+		0x030, 0x00004033,
+		0x030, 0x00005033,
+		0x030, 0x00006037,
+		0x030, 0x0000703F,
+		0x030, 0x0000803F,
+		0x030, 0x0000903F,
+		0x030, 0x0000A03F,
+		0x0EE, 0x00000000,
+		0x082, 0x00083B8C,
+		0x0ED, 0x00000008,
+		0x030, 0x000030F6,
+		0x030, 0x00002004,
+		0x030, 0x000010F6,
+		0x030, 0x000000F6,
+		0x0ED, 0x00000000,
+		0x092, 0x00083B8C,
+		0x0EC, 0x00000008,
+		0x030, 0x000030F6,
+		0x030, 0x00002004,
+		0x030, 0x000010F6,
+		0x030, 0x000000F6,
+		0x0EC, 0x00000000,
+		0x0EF, 0x00010000,
+		0x030, 0x0001C11C,
+		0x030, 0x000181F4,
+		0x030, 0x00014108,
+		0x030, 0x000101E4,
+		0x030, 0x0000C11C,
+		0x030, 0x000081F4,
+		0x030, 0x00004108,
+		0x030, 0x000001E4,
+		0x0EF, 0x00000000,
+		0x0EE, 0x00010000,
+		0x030, 0x0001C11C,
+		0x030, 0x000181F4,
+		0x030, 0x00014108,
+		0x030, 0x000101E4,
+		0x030, 0x0000C11C,
+		0x030, 0x000081F4,
+		0x030, 0x00004108,
+		0x030, 0x000001E4,
+		0x0EE, 0x00000000,
+		0x0EF, 0x00080000,
+		0x033, 0x00000007,
+		0x03E, 0x0000005F,
+		0x03F, 0x000B3FDB,
+		0x033, 0x00000004,
+		0x03E, 0x0000005D,
+		0x03F, 0x000BFFE0,
+		0x033, 0x00000005,
+		0x03E, 0x0000005D,
+		0x03F, 0x000FBFCE,
+		0x033, 0x00000006,
+		0x03E, 0x0000005F,
+		0x03F, 0x000A7FFB,
+		0x0EF, 0x00000000,
+		0x0EE, 0x00000002,
+		0x030, 0x00000001,
+		0x030, 0x00002001,
+		0x030, 0x00004001,
+		0x030, 0x00007001,
+		0x030, 0x00006001,
+		0x030, 0x00020001,
+		0x030, 0x00022001,
+		0x030, 0x00024001,
+		0x030, 0x00027001,
+		0x030, 0x00026001,
+		0x030, 0x00034001,
+		0x030, 0x00037001,
+		0x030, 0x00036001,
+		0x030, 0x00008000,
+		0x030, 0x0000A000,
+		0x030, 0x0000C000,
+	0x83000100,	0x00000000,	0x40000000,	0x00000000,
+		0x030, 0x0000E024,
+	0xA0000000,	0x00000000,
+		0x030, 0x0000E000,
+	0xB0000000,	0x00000000,
+		0x030, 0x0001C000,
+		0x030, 0x0001E000,
+		0x0EE, 0x00000000,
+		0x0EE, 0x00020000,
+		0x0EF, 0x00020000,
+		0x030, 0x00000F75,
+		0x030, 0x00002F55,
+		0x030, 0x00003F75,
+		0x0EE, 0x00000000,
+		0x0EF, 0x00000000,
+		0x018, 0x00008401,
+		0xFFE, 0x00000000,
+};
+
+RTW_DECL_TABLE_RF_RADIO(rtw8723d_rf_a, A);
+
+static const struct rtw_txpwr_lmt_cfg_pair rtw8723d_txpwr_lmt[] = {
+	{0, 0, 0, 0, 1, 30, },
+	{2, 0, 0, 0, 1, 30, },
+	{1, 0, 0, 0, 1, 30, },
+	{0, 0, 0, 0, 2, 30, },
+	{2, 0, 0, 0, 2, 30, },
+	{1, 0, 0, 0, 2, 30, },
+	{0, 0, 0, 0, 3, 30, },
+	{2, 0, 0, 0, 3, 30, },
+	{1, 0, 0, 0, 3, 30, },
+	{0, 0, 0, 0, 4, 30, },
+	{2, 0, 0, 0, 4, 30, },
+	{1, 0, 0, 0, 4, 30, },
+	{0, 0, 0, 0, 5, 30, },
+	{2, 0, 0, 0, 5, 30, },
+	{1, 0, 0, 0, 5, 30, },
+	{0, 0, 0, 0, 6, 30, },
+	{2, 0, 0, 0, 6, 30, },
+	{1, 0, 0, 0, 6, 30, },
+	{0, 0, 0, 0, 7, 30, },
+	{2, 0, 0, 0, 7, 30, },
+	{1, 0, 0, 0, 7, 30, },
+	{0, 0, 0, 0, 8, 30, },
+	{2, 0, 0, 0, 8, 30, },
+	{1, 0, 0, 0, 8, 30, },
+	{0, 0, 0, 0, 9, 30, },
+	{2, 0, 0, 0, 9, 30, },
+	{1, 0, 0, 0, 9, 30, },
+	{0, 0, 0, 0, 10, 30, },
+	{2, 0, 0, 0, 10, 30, },
+	{1, 0, 0, 0, 10, 30, },
+	{0, 0, 0, 0, 11, 30, },
+	{2, 0, 0, 0, 11, 30, },
+	{1, 0, 0, 0, 11, 30, },
+	{0, 0, 0, 0, 12, 30, },
+	{2, 0, 0, 0, 12, 30, },
+	{1, 0, 0, 0, 12, 30, },
+	{0, 0, 0, 0, 13, 17, },
+	{2, 0, 0, 0, 13, 30, },
+	{1, 0, 0, 0, 13, 30, },
+	{0, 0, 0, 0, 14, 63, },
+	{2, 0, 0, 0, 14, 63, },
+	{1, 0, 0, 0, 14, 30, },
+	{0, 0, 0, 1, 1, 26, },
+	{2, 0, 0, 1, 1, 31, },
+	{1, 0, 0, 1, 1, 31, },
+	{0, 0, 0, 1, 2, 28, },
+	{2, 0, 0, 1, 2, 31, },
+	{1, 0, 0, 1, 2, 31, },
+	{0, 0, 0, 1, 3, 30, },
+	{2, 0, 0, 1, 3, 31, },
+	{1, 0, 0, 1, 3, 31, },
+	{0, 0, 0, 1, 4, 30, },
+	{2, 0, 0, 1, 4, 31, },
+	{1, 0, 0, 1, 4, 31, },
+	{0, 0, 0, 1, 5, 30, },
+	{2, 0, 0, 1, 5, 31, },
+	{1, 0, 0, 1, 5, 31, },
+	{0, 0, 0, 1, 6, 30, },
+	{2, 0, 0, 1, 6, 31, },
+	{1, 0, 0, 1, 6, 31, },
+	{0, 0, 0, 1, 7, 30, },
+	{2, 0, 0, 1, 7, 31, },
+	{1, 0, 0, 1, 7, 31, },
+	{0, 0, 0, 1, 8, 30, },
+	{2, 0, 0, 1, 8, 31, },
+	{1, 0, 0, 1, 8, 31, },
+	{0, 0, 0, 1, 9, 30, },
+	{2, 0, 0, 1, 9, 31, },
+	{1, 0, 0, 1, 9, 31, },
+	{0, 0, 0, 1, 10, 28, },
+	{2, 0, 0, 1, 10, 31, },
+	{1, 0, 0, 1, 10, 31, },
+	{0, 0, 0, 1, 11, 26, },
+	{2, 0, 0, 1, 11, 31, },
+	{1, 0, 0, 1, 11, 31, },
+	{0, 0, 0, 1, 12, 24, },
+	{2, 0, 0, 1, 12, 31, },
+	{1, 0, 0, 1, 12, 31, },
+	{0, 0, 0, 1, 13, 14, },
+	{2, 0, 0, 1, 13, 31, },
+	{1, 0, 0, 1, 13, 31, },
+	{0, 0, 0, 1, 14, 63, },
+	{2, 0, 0, 1, 14, 63, },
+	{1, 0, 0, 1, 14, 63, },
+	{0, 0, 0, 2, 1, 24, },
+	{2, 0, 0, 2, 1, 31, },
+	{1, 0, 0, 2, 1, 31, },
+	{0, 0, 0, 2, 2, 26, },
+	{2, 0, 0, 2, 2, 31, },
+	{1, 0, 0, 2, 2, 31, },
+	{0, 0, 0, 2, 3, 30, },
+	{2, 0, 0, 2, 3, 31, },
+	{1, 0, 0, 2, 3, 31, },
+	{0, 0, 0, 2, 4, 30, },
+	{2, 0, 0, 2, 4, 31, },
+	{1, 0, 0, 2, 4, 31, },
+	{0, 0, 0, 2, 5, 30, },
+	{2, 0, 0, 2, 5, 31, },
+	{1, 0, 0, 2, 5, 31, },
+	{0, 0, 0, 2, 6, 30, },
+	{2, 0, 0, 2, 6, 31, },
+	{1, 0, 0, 2, 6, 31, },
+	{0, 0, 0, 2, 7, 30, },
+	{2, 0, 0, 2, 7, 31, },
+	{1, 0, 0, 2, 7, 31, },
+	{0, 0, 0, 2, 8, 30, },
+	{2, 0, 0, 2, 8, 31, },
+	{1, 0, 0, 2, 8, 31, },
+	{0, 0, 0, 2, 9, 30, },
+	{2, 0, 0, 2, 9, 31, },
+	{1, 0, 0, 2, 9, 31, },
+	{0, 0, 0, 2, 10, 26, },
+	{2, 0, 0, 2, 10, 31, },
+	{1, 0, 0, 2, 10, 31, },
+	{0, 0, 0, 2, 11, 24, },
+	{2, 0, 0, 2, 11, 31, },
+	{1, 0, 0, 2, 11, 31, },
+	{0, 0, 0, 2, 12, 23, },
+	{2, 0, 0, 2, 12, 31, },
+	{1, 0, 0, 2, 12, 31, },
+	{0, 0, 0, 2, 13, 13, },
+	{2, 0, 0, 2, 13, 31, },
+	{1, 0, 0, 2, 13, 31, },
+	{0, 0, 0, 2, 14, 63, },
+	{2, 0, 0, 2, 14, 63, },
+	{1, 0, 0, 2, 14, 63, },
+	{0, 0, 0, 3, 1, 28, },
+	{2, 0, 0, 3, 1, 30, },
+	{1, 0, 0, 3, 1, 30, },
+	{0, 0, 0, 3, 2, 28, },
+	{2, 0, 0, 3, 2, 30, },
+	{1, 0, 0, 3, 2, 30, },
+	{0, 0, 0, 3, 3, 30, },
+	{2, 0, 0, 3, 3, 30, },
+	{1, 0, 0, 3, 3, 30, },
+	{0, 0, 0, 3, 4, 30, },
+	{2, 0, 0, 3, 4, 30, },
+	{1, 0, 0, 3, 4, 30, },
+	{0, 0, 0, 3, 5, 30, },
+	{2, 0, 0, 3, 5, 30, },
+	{1, 0, 0, 3, 5, 30, },
+	{0, 0, 0, 3, 6, 30, },
+	{2, 0, 0, 3, 6, 30, },
+	{1, 0, 0, 3, 6, 30, },
+	{0, 0, 0, 3, 7, 30, },
+	{2, 0, 0, 3, 7, 30, },
+	{1, 0, 0, 3, 7, 30, },
+	{0, 0, 0, 3, 8, 30, },
+	{2, 0, 0, 3, 8, 30, },
+	{1, 0, 0, 3, 8, 30, },
+	{0, 0, 0, 3, 9, 28, },
+	{2, 0, 0, 3, 9, 30, },
+	{1, 0, 0, 3, 9, 30, },
+	{0, 0, 0, 3, 10, 28, },
+	{2, 0, 0, 3, 10, 30, },
+	{1, 0, 0, 3, 10, 30, },
+	{0, 0, 0, 3, 11, 28, },
+	{2, 0, 0, 3, 11, 30, },
+	{1, 0, 0, 3, 11, 30, },
+	{0, 0, 0, 3, 12, 63, },
+	{2, 0, 0, 3, 12, 30, },
+	{1, 0, 0, 3, 12, 30, },
+	{0, 0, 0, 3, 13, 63, },
+	{2, 0, 0, 3, 13, 30, },
+	{1, 0, 0, 3, 13, 30, },
+	{0, 0, 0, 3, 14, 63, },
+	{2, 0, 0, 3, 14, 63, },
+	{1, 0, 0, 3, 14, 63, },
+	{0, 0, 1, 2, 1, 63, },
+	{2, 0, 1, 2, 1, 63, },
+	{1, 0, 1, 2, 1, 63, },
+	{0, 0, 1, 2, 2, 63, },
+	{2, 0, 1, 2, 2, 63, },
+	{1, 0, 1, 2, 2, 63, },
+	{0, 0, 1, 2, 3, 24, },
+	{2, 0, 1, 2, 3, 30, },
+	{1, 0, 1, 2, 3, 30, },
+	{0, 0, 1, 2, 4, 24, },
+	{2, 0, 1, 2, 4, 30, },
+	{1, 0, 1, 2, 4, 30, },
+	{0, 0, 1, 2, 5, 24, },
+	{2, 0, 1, 2, 5, 30, },
+	{1, 0, 1, 2, 5, 30, },
+	{0, 0, 1, 2, 6, 24, },
+	{2, 0, 1, 2, 6, 30, },
+	{1, 0, 1, 2, 6, 30, },
+	{0, 0, 1, 2, 7, 24, },
+	{2, 0, 1, 2, 7, 30, },
+	{1, 0, 1, 2, 7, 30, },
+	{0, 0, 1, 2, 8, 24, },
+	{2, 0, 1, 2, 8, 30, },
+	{1, 0, 1, 2, 8, 30, },
+	{0, 0, 1, 2, 9, 24, },
+	{2, 0, 1, 2, 9, 30, },
+	{1, 0, 1, 2, 9, 30, },
+	{0, 0, 1, 2, 10, 22, },
+	{2, 0, 1, 2, 10, 30, },
+	{1, 0, 1, 2, 10, 30, },
+	{0, 0, 1, 2, 11, 20, },
+	{2, 0, 1, 2, 11, 30, },
+	{1, 0, 1, 2, 11, 30, },
+	{0, 0, 1, 2, 12, 63, },
+	{2, 0, 1, 2, 12, 30, },
+	{1, 0, 1, 2, 12, 30, },
+	{0, 0, 1, 2, 13, 63, },
+	{2, 0, 1, 2, 13, 30, },
+	{1, 0, 1, 2, 13, 30, },
+	{0, 0, 1, 2, 14, 63, },
+	{2, 0, 1, 2, 14, 63, },
+	{1, 0, 1, 2, 14, 63, },
+	{0, 0, 1, 3, 1, 63, },
+	{2, 0, 1, 3, 1, 63, },
+	{1, 0, 1, 3, 1, 63, },
+	{0, 0, 1, 3, 2, 63, },
+	{2, 0, 1, 3, 2, 63, },
+	{1, 0, 1, 3, 2, 63, },
+	{0, 0, 1, 3, 3, 26, },
+	{2, 0, 1, 3, 3, 26, },
+	{1, 0, 1, 3, 3, 26, },
+	{0, 0, 1, 3, 4, 26, },
+	{2, 0, 1, 3, 4, 26, },
+	{1, 0, 1, 3, 4, 26, },
+	{0, 0, 1, 3, 5, 26, },
+	{2, 0, 1, 3, 5, 26, },
+	{1, 0, 1, 3, 5, 26, },
+	{0, 0, 1, 3, 6, 26, },
+	{2, 0, 1, 3, 6, 26, },
+	{1, 0, 1, 3, 6, 26, },
+	{0, 0, 1, 3, 7, 26, },
+	{2, 0, 1, 3, 7, 26, },
+	{1, 0, 1, 3, 7, 26, },
+	{0, 0, 1, 3, 8, 26, },
+	{2, 0, 1, 3, 8, 26, },
+	{1, 0, 1, 3, 8, 26, },
+	{0, 0, 1, 3, 9, 26, },
+	{2, 0, 1, 3, 9, 26, },
+	{1, 0, 1, 3, 9, 26, },
+	{0, 0, 1, 3, 10, 26, },
+	{2, 0, 1, 3, 10, 26, },
+	{1, 0, 1, 3, 10, 26, },
+	{0, 0, 1, 3, 11, 26, },
+	{2, 0, 1, 3, 11, 26, },
+	{1, 0, 1, 3, 11, 26, },
+	{0, 0, 1, 3, 12, 63, },
+	{2, 0, 1, 3, 12, 26, },
+	{1, 0, 1, 3, 12, 26, },
+	{0, 0, 1, 3, 13, 63, },
+	{2, 0, 1, 3, 13, 26, },
+	{1, 0, 1, 3, 13, 26, },
+	{0, 0, 1, 3, 14, 63, },
+	{2, 0, 1, 3, 14, 63, },
+	{1, 0, 1, 3, 14, 63, },
+};
+
+RTW_DECL_TABLE_TXPWR_LMT(rtw8723d_txpwr_lmt);
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d_table.h b/drivers/net/wireless/realtek/rtw88/rtw8723d_table.h
index ea5933ffd043..4db996a1d982 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8723d_table.h
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d_table.h
@@ -5,4 +5,11 @@
 #ifndef __RTW8723D_TABLE_H__
 #define __RTW8723D_TABLE_H__
 
+extern const struct rtw_table rtw8723d_mac_tbl;
+extern const struct rtw_table rtw8723d_agc_tbl;
+extern const struct rtw_table rtw8723d_bb_tbl;
+extern const struct rtw_table rtw8723d_bb_pg_tbl;
+extern const struct rtw_table rtw8723d_rf_a_tbl;
+extern const struct rtw_table rtw8723d_txpwr_lmt_tbl;
+
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From 1afb5eb7a00dab15551bbfb24c4e8a750da21827 Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Mon, 20 Apr 2020 13:50:52 +0800
Subject: rtw88: 8723d: Add cfg_ldo25 to control LDO25

Implement rtw_chip_ops::cfg_ldo25 to enable/disable LDO25 with proper
voltage.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200420055054.14592-7-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/reg.h      |  5 +++++
 drivers/net/wireless/realtek/rtw88/rtw8723d.c | 15 +++++++++++++++
 drivers/net/wireless/realtek/rtw88/rtw8822b.c |  2 +-
 3 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/realtek/rtw88/reg.h b/drivers/net/wireless/realtek/rtw88/reg.h
index 9d94534c9674..2afd547ebcc9 100644
--- a/drivers/net/wireless/realtek/rtw88/reg.h
+++ b/drivers/net/wireless/realtek/rtw88/reg.h
@@ -37,6 +37,11 @@
 #define REG_LDO_EFUSE_CTRL	0x0034
 #define BIT_MASK_EFUSE_BANK_SEL	(BIT(8) | BIT(9))
 
+#define BIT_LDO25_VOLTAGE_V25	0x03
+#define BIT_MASK_LDO25_VOLTAGE	GENMASK(6, 4)
+#define BIT_SHIFT_LDO25_VOLTAGE	4
+#define BIT_LDO25_EN		BIT(7)
+
 #define REG_GPIO_MUXCFG		0x0040
 #define BIT_FSPI_EN		BIT(19)
 #define BIT_BT_AOD_GPIO3	BIT(9)
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.c b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
index 4fe433549285..04f8d73e4e6c 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8723d.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
@@ -14,10 +14,25 @@
 #include "reg.h"
 #include "debug.h"
 
+static void rtw8723d_cfg_ldo25(struct rtw_dev *rtwdev, bool enable)
+{
+	u8 ldo_pwr;
+
+	ldo_pwr = rtw_read8(rtwdev, REG_LDO_EFUSE_CTRL + 3);
+	if (enable) {
+		ldo_pwr &= ~BIT_MASK_LDO25_VOLTAGE;
+		ldo_pwr = (BIT_LDO25_VOLTAGE_V25 << 4) | BIT_LDO25_EN;
+	} else {
+		ldo_pwr &= ~BIT_LDO25_EN;
+	}
+	rtw_write8(rtwdev, REG_LDO_EFUSE_CTRL + 3, ldo_pwr);
+}
+
 static struct rtw_chip_ops rtw8723d_ops = {
 	.read_rf		= rtw_phy_read_rf_sipi,
 	.write_rf		= rtw_phy_write_rf_reg_sipi,
 	.set_antenna		= NULL,
+	.cfg_ldo25		= rtw8723d_cfg_ldo25,
 	.config_bfee		= NULL,
 	.set_gid_table		= NULL,
 	.cfg_csi_rate		= NULL,
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822b.c b/drivers/net/wireless/realtek/rtw88/rtw8822b.c
index c02f3a730369..9a2e18e7624f 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8822b.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822b.c
@@ -1030,7 +1030,7 @@ static void rtw8822b_cfg_ldo25(struct rtw_dev *rtwdev, bool enable)
 	u8 ldo_pwr;
 
 	ldo_pwr = rtw_read8(rtwdev, REG_LDO_EFUSE_CTRL + 3);
-	ldo_pwr = enable ? ldo_pwr | BIT(7) : ldo_pwr & ~BIT(7);
+	ldo_pwr = enable ? ldo_pwr | BIT_LDO25_EN : ldo_pwr & ~BIT_LDO25_EN;
 	rtw_write8(rtwdev, REG_LDO_EFUSE_CTRL + 3, ldo_pwr);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 44baa97ca820dbc81dfde076937d15bc725a3a54 Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Mon, 20 Apr 2020 13:50:53 +0800
Subject: rtw88: 8723d: Add new chip op efuse_grant() to control efuse access

8723D devices need to grant efuse access before dumping physical efuse
map, other chips don't need it, so keep this ops as blank.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200420055054.14592-8-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/efuse.c    |  4 ++++
 drivers/net/wireless/realtek/rtw88/main.h     | 13 +++++++++++++
 drivers/net/wireless/realtek/rtw88/reg.h      |  9 +++++++++
 drivers/net/wireless/realtek/rtw88/rtw8723d.c | 13 +++++++++++++
 4 files changed, 39 insertions(+)

diff --git a/drivers/net/wireless/realtek/rtw88/efuse.c b/drivers/net/wireless/realtek/rtw88/efuse.c
index 212c8376a8c9..df969d346b41 100644
--- a/drivers/net/wireless/realtek/rtw88/efuse.c
+++ b/drivers/net/wireless/realtek/rtw88/efuse.c
@@ -90,6 +90,8 @@ static int rtw_dump_physical_efuse_map(struct rtw_dev *rtwdev, u8 *map)
 	u32 addr;
 	u32 cnt;
 
+	rtw_chip_efuse_grant_on(rtwdev);
+
 	switch_efuse_bank(rtwdev);
 
 	/* disable 2.5V LDO */
@@ -113,6 +115,8 @@ static int rtw_dump_physical_efuse_map(struct rtw_dev *rtwdev, u8 *map)
 		*(map + addr) = (u8)(efuse_ctl & BIT_MASK_EF_DATA);
 	}
 
+	rtw_chip_efuse_grant_off(rtwdev);
+
 	return 0;
 }
 
diff --git a/drivers/net/wireless/realtek/rtw88/main.h b/drivers/net/wireless/realtek/rtw88/main.h
index 8f15fc113af0..74302181da53 100644
--- a/drivers/net/wireless/realtek/rtw88/main.h
+++ b/drivers/net/wireless/realtek/rtw88/main.h
@@ -811,6 +811,7 @@ struct rtw_chip_ops {
 			   u32 antenna_tx,
 			   u32 antenna_rx);
 	void (*cfg_ldo25)(struct rtw_dev *rtwdev, bool enable);
+	void (*efuse_grant)(struct rtw_dev *rtwdev, bool enable);
 	void (*false_alarm_statistics)(struct rtw_dev *rtwdev);
 	void (*phy_calibration)(struct rtw_dev *rtwdev);
 	void (*dpk_track)(struct rtw_dev *rtwdev);
@@ -1712,6 +1713,18 @@ static inline bool rtw_ssid_equal(struct cfg80211_ssid *a,
 	return true;
 }
 
+static inline void rtw_chip_efuse_grant_on(struct rtw_dev *rtwdev)
+{
+	if (rtwdev->chip->ops->efuse_grant)
+		rtwdev->chip->ops->efuse_grant(rtwdev, true);
+}
+
+static inline void rtw_chip_efuse_grant_off(struct rtw_dev *rtwdev)
+{
+	if (rtwdev->chip->ops->efuse_grant)
+		rtwdev->chip->ops->efuse_grant(rtwdev, false);
+}
+
 void rtw_get_channel_params(struct cfg80211_chan_def *chandef,
 			    struct rtw_channel_params *ch_param);
 bool check_hw_ready(struct rtw_dev *rtwdev, u32 addr, u32 mask, u32 target);
diff --git a/drivers/net/wireless/realtek/rtw88/reg.h b/drivers/net/wireless/realtek/rtw88/reg.h
index 2afd547ebcc9..911d8e75db77 100644
--- a/drivers/net/wireless/realtek/rtw88/reg.h
+++ b/drivers/net/wireless/realtek/rtw88/reg.h
@@ -6,6 +6,7 @@
 #define __RTW_REG_DEF_H__
 
 #define REG_SYS_FUNC_EN		0x0002
+#define BIT_FEN_ELDR		BIT(12)
 #define BIT_FEN_CPUEN		BIT(2)
 #define BIT_FEN_BB_GLB_RST	BIT(1)
 #define BIT_FEN_BB_RSTB		BIT(0)
@@ -15,6 +16,10 @@
 #define REG_SYS_CLK_CTRL	0x0008
 #define BIT_CPU_CLK_EN		BIT(14)
 
+#define REG_SYS_CLKR		0x0008
+#define BIT_ANA8M		BIT(1)
+#define BIT_LOADER_CLK_EN	BIT(5)
+
 #define REG_RSV_CTRL		0x001C
 #define DISABLE_PI		0x3
 #define ENABLE_PI		0x2
@@ -87,6 +92,10 @@
 				 BIT_CHECK_SUM_OK)
 #define FW_READY_MASK		0xffff
 
+#define REG_EFUSE_ACCESS	0x00CF
+#define EFUSE_ACCESS_ON		0x69
+#define EFUSE_ACCESS_OFF	0x00
+
 #define REG_WLRF1		0x00EC
 #define REG_WIFI_BT_INFO	0x00AA
 #define BIT_BT_INT_EN		BIT(15)
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.c b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
index 04f8d73e4e6c..756454d69fad 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8723d.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
@@ -28,11 +28,24 @@ static void rtw8723d_cfg_ldo25(struct rtw_dev *rtwdev, bool enable)
 	rtw_write8(rtwdev, REG_LDO_EFUSE_CTRL + 3, ldo_pwr);
 }
 
+static void rtw8723d_efuse_grant(struct rtw_dev *rtwdev, bool on)
+{
+	if (on) {
+		rtw_write8(rtwdev, REG_EFUSE_ACCESS, EFUSE_ACCESS_ON);
+
+		rtw_write16_set(rtwdev, REG_SYS_FUNC_EN, BIT_FEN_ELDR);
+		rtw_write16_set(rtwdev, REG_SYS_CLKR, BIT_LOADER_CLK_EN | BIT_ANA8M);
+	} else {
+		rtw_write8(rtwdev, REG_EFUSE_ACCESS, EFUSE_ACCESS_OFF);
+	}
+}
+
 static struct rtw_chip_ops rtw8723d_ops = {
 	.read_rf		= rtw_phy_read_rf_sipi,
 	.write_rf		= rtw_phy_write_rf_reg_sipi,
 	.set_antenna		= NULL,
 	.cfg_ldo25		= rtw8723d_cfg_ldo25,
+	.efuse_grant		= rtw8723d_efuse_grant,
 	.config_bfee		= NULL,
 	.set_gid_table		= NULL,
 	.cfg_csi_rate		= NULL,
-- 
cgit v1.2.3-59-g8ed1b


From ab0a031ecf2908c77833caebf0c86bab5e9f12b7 Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Mon, 20 Apr 2020 13:50:54 +0800
Subject: rtw88: 8723d: Add read_efuse to recognize efuse info from map

The logical efuse map is decoded from physical map by parsing the
header format of the physical map. And each different type of chips
has different logical efuse layout. So add the logical map's layout
for parsing the efuse contents.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200420055054.14592-9-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/rtw8723d.c | 43 +++++++++++++++++++++++++++
 drivers/net/wireless/realtek/rtw88/rtw8723d.h | 39 ++++++++++++++++++++++++
 2 files changed, 82 insertions(+)

diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.c b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
index 756454d69fad..c25cabbab64d 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8723d.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
@@ -14,6 +14,48 @@
 #include "reg.h"
 #include "debug.h"
 
+static void rtw8723de_efuse_parsing(struct rtw_efuse *efuse,
+				    struct rtw8723d_efuse *map)
+{
+	ether_addr_copy(efuse->addr, map->e.mac_addr);
+}
+
+static int rtw8723d_read_efuse(struct rtw_dev *rtwdev, u8 *log_map)
+{
+	struct rtw_efuse *efuse = &rtwdev->efuse;
+	struct rtw8723d_efuse *map;
+	int i;
+
+	map = (struct rtw8723d_efuse *)log_map;
+
+	efuse->rfe_option = 0;
+	efuse->rf_board_option = map->rf_board_option;
+	efuse->crystal_cap = map->xtal_k;
+	efuse->pa_type_2g = map->pa_type;
+	efuse->lna_type_2g = map->lna_type_2g[0];
+	efuse->channel_plan = map->channel_plan;
+	efuse->country_code[0] = map->country_code[0];
+	efuse->country_code[1] = map->country_code[1];
+	efuse->bt_setting = map->rf_bt_setting;
+	efuse->regd = map->rf_board_option & 0x7;
+	efuse->thermal_meter[0] = map->thermal_meter;
+	efuse->thermal_meter_k = map->thermal_meter;
+
+	for (i = 0; i < 4; i++)
+		efuse->txpwr_idx_table[i] = map->txpwr_idx_table[i];
+
+	switch (rtw_hci_type(rtwdev)) {
+	case RTW_HCI_TYPE_PCIE:
+		rtw8723de_efuse_parsing(efuse, map);
+		break;
+	default:
+		/* unsupported now */
+		return -ENOTSUPP;
+	}
+
+	return 0;
+}
+
 static void rtw8723d_cfg_ldo25(struct rtw_dev *rtwdev, bool enable)
 {
 	u8 ldo_pwr;
@@ -41,6 +83,7 @@ static void rtw8723d_efuse_grant(struct rtw_dev *rtwdev, bool on)
 }
 
 static struct rtw_chip_ops rtw8723d_ops = {
+	.read_efuse		= rtw8723d_read_efuse,
 	.read_rf		= rtw_phy_read_rf_sipi,
 	.write_rf		= rtw_phy_write_rf_reg_sipi,
 	.set_antenna		= NULL,
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.h b/drivers/net/wireless/realtek/rtw88/rtw8723d.h
index 0b784cfc34c6..1939d9897a26 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8723d.h
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.h
@@ -5,4 +5,43 @@
 #ifndef __RTW8723D_H__
 #define __RTW8723D_H__
 
+struct rtw8723de_efuse {
+	u8 mac_addr[ETH_ALEN];		/* 0xd0 */
+	u8 vender_id[2];
+	u8 device_id[2];
+	u8 sub_vender_id[2];
+	u8 sub_device_id[2];
+};
+
+struct rtw8723d_efuse {
+	__le16 rtl_id;
+	u8 rsvd[2];
+	u8 afe;
+	u8 rsvd1[11];
+
+	/* power index for four RF paths */
+	struct rtw_txpwr_idx txpwr_idx_table[4];
+
+	u8 channel_plan;		/* 0xb8 */
+	u8 xtal_k;
+	u8 thermal_meter;
+	u8 iqk_lck;
+	u8 pa_type;			/* 0xbc */
+	u8 lna_type_2g[2];		/* 0xbd */
+	u8 lna_type_5g[2];
+	u8 rf_board_option;
+	u8 rf_feature_option;
+	u8 rf_bt_setting;
+	u8 eeprom_version;
+	u8 eeprom_customer_id;
+	u8 tx_bb_swing_setting_2g;
+	u8 res_c7;
+	u8 tx_pwr_calibrate_rate;
+	u8 rf_antenna_option;		/* 0xc9 */
+	u8 rfe_option;
+	u8 country_code[2];
+	u8 res[3];
+	struct rtw8723de_efuse e;
+};
+
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From 5ad4d8957b69f3ebf95ac02212c388bda75aeb30 Mon Sep 17 00:00:00 2001
From: Tzu-En Huang <tehuang@realtek.com>
Date: Mon, 20 Apr 2020 18:52:07 +0800
Subject: rtw88: set power trim according to efuse PG values

8822C devices have power trim, thermal and PA bias values
programmed in efuse. Driver should configure the RF components
according to the values.

If the power trim is not configured, then the devices might have
distortion on the output tx power.

Signed-off-by: Tzu-En Huang <tehuang@realtek.com>
Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200420105207.31899-1-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/efuse.c    |  22 +++++
 drivers/net/wireless/realtek/rtw88/efuse.h    |   3 +
 drivers/net/wireless/realtek/rtw88/rtw8822c.c | 113 ++++++++++++++++++++++++++
 drivers/net/wireless/realtek/rtw88/rtw8822c.h |  28 +++++++
 4 files changed, 166 insertions(+)

diff --git a/drivers/net/wireless/realtek/rtw88/efuse.c b/drivers/net/wireless/realtek/rtw88/efuse.c
index df969d346b41..13d1c58d6de5 100644
--- a/drivers/net/wireless/realtek/rtw88/efuse.c
+++ b/drivers/net/wireless/realtek/rtw88/efuse.c
@@ -2,6 +2,8 @@
 /* Copyright(c) 2018-2019  Realtek Corporation
  */
 
+#include <linux/iopoll.h>
+
 #include "main.h"
 #include "efuse.h"
 #include "reg.h"
@@ -120,6 +122,26 @@ static int rtw_dump_physical_efuse_map(struct rtw_dev *rtwdev, u8 *map)
 	return 0;
 }
 
+int rtw_read8_physical_efuse(struct rtw_dev *rtwdev, u16 addr, u8 *data)
+{
+	u32 efuse_ctl;
+	int ret;
+
+	rtw_write32_mask(rtwdev, REG_EFUSE_CTRL, 0x3ff00, addr);
+	rtw_write32_clr(rtwdev, REG_EFUSE_CTRL, BIT_EF_FLAG);
+
+	ret = read_poll_timeout(rtw_read32, efuse_ctl, efuse_ctl & BIT_EF_FLAG,
+				1000, 100000, false, rtwdev, REG_EFUSE_CTRL);
+	if (ret) {
+		*data = EFUSE_READ_FAIL;
+		return ret;
+	}
+
+	*data = rtw_read8(rtwdev, REG_EFUSE_CTRL);
+
+	return 0;
+}
+
 int rtw_parse_efuse_map(struct rtw_dev *rtwdev)
 {
 	struct rtw_chip_info *chip = rtwdev->chip;
diff --git a/drivers/net/wireless/realtek/rtw88/efuse.h b/drivers/net/wireless/realtek/rtw88/efuse.h
index 115bbe85946a..97a51f0b0e46 100644
--- a/drivers/net/wireless/realtek/rtw88/efuse.h
+++ b/drivers/net/wireless/realtek/rtw88/efuse.h
@@ -10,6 +10,8 @@
 #define EFUSE_HW_CAP_SUPP_BW80		7
 #define EFUSE_HW_CAP_SUPP_BW40		6
 
+#define EFUSE_READ_FAIL			0xff
+
 #define GET_EFUSE_HW_CAP_HCI(hw_cap)					       \
 	le32_get_bits(*((__le32 *)(hw_cap) + 0x01), GENMASK(3, 0))
 #define GET_EFUSE_HW_CAP_BW(hw_cap)					       \
@@ -22,5 +24,6 @@
 	le32_get_bits(*((__le32 *)(hw_cap) + 0x01), GENMASK(27, 26))
 
 int rtw_parse_efuse_map(struct rtw_dev *rtwdev);
+int rtw_read8_physical_efuse(struct rtw_dev *rtwdev, u16 addr, u8 *data);
 
 #endif
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822c.c b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
index c99b1de54bfc..ee0d39135617 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8822c.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
@@ -15,6 +15,7 @@
 #include "debug.h"
 #include "util.h"
 #include "bf.h"
+#include "efuse.h"
 
 static void rtw8822c_config_trx_mode(struct rtw_dev *rtwdev, u8 tx_path,
 				     u8 rx_path, bool is_tx2_path);
@@ -1000,10 +1001,122 @@ static void rtw8822c_rf_x2_check(struct rtw_dev *rtwdev)
 	}
 }
 
+static void rtw8822c_set_power_trim(struct rtw_dev *rtwdev, s8 bb_gain[2][8])
+{
+#define RF_SET_POWER_TRIM(_path, _seq, _idx)					\
+		do {								\
+			rtw_write_rf(rtwdev, _path, 0x33, RFREG_MASK, _seq);	\
+			rtw_write_rf(rtwdev, _path, 0x3f, RFREG_MASK,		\
+				     bb_gain[_path][_idx]);			\
+		} while (0)
+	u8 path;
+
+	for (path = 0; path < rtwdev->hal.rf_path_num; path++) {
+		rtw_write_rf(rtwdev, path, 0xee, BIT(19), 1);
+		RF_SET_POWER_TRIM(path, 0x0, 0);
+		RF_SET_POWER_TRIM(path, 0x1, 1);
+		RF_SET_POWER_TRIM(path, 0x2, 2);
+		RF_SET_POWER_TRIM(path, 0x3, 2);
+		RF_SET_POWER_TRIM(path, 0x4, 3);
+		RF_SET_POWER_TRIM(path, 0x5, 4);
+		RF_SET_POWER_TRIM(path, 0x6, 5);
+		RF_SET_POWER_TRIM(path, 0x7, 6);
+		RF_SET_POWER_TRIM(path, 0x8, 7);
+		RF_SET_POWER_TRIM(path, 0x9, 3);
+		RF_SET_POWER_TRIM(path, 0xa, 4);
+		RF_SET_POWER_TRIM(path, 0xb, 5);
+		RF_SET_POWER_TRIM(path, 0xc, 6);
+		RF_SET_POWER_TRIM(path, 0xd, 7);
+		RF_SET_POWER_TRIM(path, 0xe, 7);
+		rtw_write_rf(rtwdev, path, 0xee, BIT(19), 0);
+	}
+#undef RF_SET_POWER_TRIM
+}
+
+static void rtw8822c_power_trim(struct rtw_dev *rtwdev)
+{
+	u8 pg_pwr = 0xff, i, path, idx;
+	s8 bb_gain[2][8] = {0};
+	u16 rf_efuse_2g[3] = {PPG_2GL_TXAB, PPG_2GM_TXAB, PPG_2GH_TXAB};
+	u16 rf_efuse_5g[2][5] = {{PPG_5GL1_TXA, PPG_5GL2_TXA, PPG_5GM1_TXA,
+				  PPG_5GM2_TXA, PPG_5GH1_TXA},
+				 {PPG_5GL1_TXB, PPG_5GL2_TXB, PPG_5GM1_TXB,
+				  PPG_5GM2_TXB, PPG_5GH1_TXB} };
+	bool set = false;
+
+	for (i = 0; i < ARRAY_SIZE(rf_efuse_2g); i++) {
+		rtw_read8_physical_efuse(rtwdev, rf_efuse_2g[i], &pg_pwr);
+		if (pg_pwr == EFUSE_READ_FAIL)
+			continue;
+		set = true;
+		bb_gain[RF_PATH_A][i] = FIELD_GET(PPG_2G_A_MASK, pg_pwr);
+		bb_gain[RF_PATH_B][i] = FIELD_GET(PPG_2G_B_MASK, pg_pwr);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(rf_efuse_5g[0]); i++) {
+		for (path = 0; path < rtwdev->hal.rf_path_num; path++) {
+			rtw_read8_physical_efuse(rtwdev, rf_efuse_5g[path][i],
+						 &pg_pwr);
+			if (pg_pwr == EFUSE_READ_FAIL)
+				continue;
+			set = true;
+			idx = i + ARRAY_SIZE(rf_efuse_2g);
+			bb_gain[path][idx] = FIELD_GET(PPG_5G_MASK, pg_pwr);
+		}
+	}
+	if (set)
+		rtw8822c_set_power_trim(rtwdev, bb_gain);
+
+	rtw_write32_mask(rtwdev, REG_DIS_DPD, DIS_DPD_MASK, DIS_DPD_RATEALL);
+}
+
+static void rtw8822c_thermal_trim(struct rtw_dev *rtwdev)
+{
+	u16 rf_efuse[2] = {PPG_THERMAL_A, PPG_THERMAL_B};
+	u8 pg_therm = 0xff, thermal[2] = {0}, path;
+
+	for (path = 0; path < rtwdev->hal.rf_path_num; path++) {
+		rtw_read8_physical_efuse(rtwdev, rf_efuse[path], &pg_therm);
+		if (pg_therm == EFUSE_READ_FAIL)
+			return;
+		/* Efuse value of BIT(0) shall be move to BIT(3), and the value
+		 * of BIT(1) to BIT(3) should be right shifted 1 bit.
+		 */
+		thermal[path] = FIELD_GET(GENMASK(3, 1), pg_therm);
+		thermal[path] |= FIELD_PREP(BIT(3), pg_therm & BIT(0));
+		rtw_write_rf(rtwdev, path, 0x43, RF_THEMAL_MASK, thermal[path]);
+	}
+}
+
+static void rtw8822c_pa_bias(struct rtw_dev *rtwdev)
+{
+	u16 rf_efuse_2g[2] = {PPG_PABIAS_2GA, PPG_PABIAS_2GB};
+	u16 rf_efuse_5g[2] = {PPG_PABIAS_5GA, PPG_PABIAS_5GB};
+	u8 pg_pa_bias = 0xff, path;
+
+	for (path = 0; path < rtwdev->hal.rf_path_num; path++) {
+		rtw_read8_physical_efuse(rtwdev, rf_efuse_2g[path],
+					 &pg_pa_bias);
+		if (pg_pa_bias == EFUSE_READ_FAIL)
+			return;
+		pg_pa_bias = FIELD_GET(PPG_PABIAS_MASK, pg_pa_bias);
+		rtw_write_rf(rtwdev, path, 0x60, RF_PABIAS_2G_MASK, pg_pa_bias);
+	}
+	for (path = 0; path < rtwdev->hal.rf_path_num; path++) {
+		rtw_read8_physical_efuse(rtwdev, rf_efuse_5g[path],
+					 &pg_pa_bias);
+		pg_pa_bias = FIELD_GET(PPG_PABIAS_MASK, pg_pa_bias);
+		rtw_write_rf(rtwdev, path, 0x60, RF_PABIAS_5G_MASK, pg_pa_bias);
+	}
+}
+
 static void rtw8822c_rf_init(struct rtw_dev *rtwdev)
 {
 	rtw8822c_rf_dac_cal(rtwdev);
 	rtw8822c_rf_x2_check(rtwdev);
+	rtw8822c_thermal_trim(rtwdev);
+	rtw8822c_power_trim(rtwdev);
+	rtw8822c_pa_bias(rtwdev);
 }
 
 static void rtw8822c_pwrtrack_init(struct rtw_dev *rtwdev)
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822c.h b/drivers/net/wireless/realtek/rtw88/rtw8822c.h
index dfd8662a0c0e..32b4771e04d0 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8822c.h
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822c.h
@@ -309,4 +309,32 @@ const struct rtw_table name ## _tbl = {			\
 #define BIT_GS_PWSF		GENMASK(27, 0)
 #define BIT_RPT_DGAIN		GENMASK(27, 16)
 #define BIT_TX_CFIR		GENMASK(31, 30)
+
+#define PPG_THERMAL_A 0x1ef
+#define PPG_THERMAL_B 0x1b0
+#define RF_THEMAL_MASK GENMASK(19, 16)
+#define PPG_2GL_TXAB 0x1d4
+#define PPG_2GM_TXAB 0x1ee
+#define PPG_2GH_TXAB 0x1d2
+#define PPG_2G_A_MASK GENMASK(3, 0)
+#define PPG_2G_B_MASK GENMASK(7, 4)
+#define PPG_5GL1_TXA 0x1ec
+#define PPG_5GL2_TXA 0x1e8
+#define PPG_5GM1_TXA 0x1e4
+#define PPG_5GM2_TXA 0x1e0
+#define PPG_5GH1_TXA 0x1dc
+#define PPG_5GL1_TXB 0x1eb
+#define PPG_5GL2_TXB 0x1e7
+#define PPG_5GM1_TXB 0x1e3
+#define PPG_5GM2_TXB 0x1df
+#define PPG_5GH1_TXB 0x1db
+#define PPG_5G_MASK GENMASK(4, 0)
+#define PPG_PABIAS_2GA 0x1d6
+#define PPG_PABIAS_2GB 0x1d5
+#define PPG_PABIAS_5GA 0x1d8
+#define PPG_PABIAS_5GB 0x1d7
+#define PPG_PABIAS_MASK GENMASK(3, 0)
+#define RF_PABIAS_2G_MASK GENMASK(15, 12)
+#define RF_PABIAS_5G_MASK GENMASK(19, 16)
+
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From 1c79031f8a75c132bbd42e3ef20267af97b67466 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Sun, 19 Apr 2020 17:18:47 +0300
Subject: drivers: Remove inclusion of vermagic header

Get rid of linux/vermagic.h includes, so that MODULE_ARCH_VERMAGIC from
the arch header arch/x86/include/asm/module.h won't be redefined.

  In file included from ./include/linux/module.h:30,
                   from drivers/net/ethernet/3com/3c515.c:56:
  ./arch/x86/include/asm/module.h:73: warning: "MODULE_ARCH_VERMAGIC"
redefined
     73 | # define MODULE_ARCH_VERMAGIC MODULE_PROC_FAMILY
        |
  In file included from drivers/net/ethernet/3com/3c515.c:25:
  ./include/linux/vermagic.h:28: note: this is the location of the
previous definition
     28 | #define MODULE_ARCH_VERMAGIC ""
        |

Fixes: 6bba2e89a88c ("net/3com: Delete driver and module versions from 3com drivers")
Co-developed-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Borislav Petkov <bp@suse.de>
Acked-by: Shannon Nelson <snelson@pensando.io> # ionic
Acked-by: Sebastian Reichel <sre@kernel.org> # power
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bonding_priv.h               | 2 +-
 drivers/net/ethernet/3com/3c509.c                | 1 -
 drivers/net/ethernet/3com/3c515.c                | 1 -
 drivers/net/ethernet/adaptec/starfire.c          | 1 -
 drivers/net/ethernet/pensando/ionic/ionic_main.c | 2 +-
 drivers/power/supply/test_power.c                | 2 +-
 net/ethtool/ioctl.c                              | 3 +--
 7 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/drivers/net/bonding/bonding_priv.h b/drivers/net/bonding/bonding_priv.h
index 45b77bc8c7b3..48cdf3a49a7d 100644
--- a/drivers/net/bonding/bonding_priv.h
+++ b/drivers/net/bonding/bonding_priv.h
@@ -14,7 +14,7 @@
 
 #ifndef _BONDING_PRIV_H
 #define _BONDING_PRIV_H
-#include <linux/vermagic.h>
+#include <generated/utsrelease.h>
 
 #define DRV_NAME	"bonding"
 #define DRV_DESCRIPTION	"Ethernet Channel Bonding Driver"
diff --git a/drivers/net/ethernet/3com/3c509.c b/drivers/net/ethernet/3com/3c509.c
index b762176a1406..139d0120f511 100644
--- a/drivers/net/ethernet/3com/3c509.c
+++ b/drivers/net/ethernet/3com/3c509.c
@@ -85,7 +85,6 @@
 #include <linux/device.h>
 #include <linux/eisa.h>
 #include <linux/bitops.h>
-#include <linux/vermagic.h>
 
 #include <linux/uaccess.h>
 #include <asm/io.h>
diff --git a/drivers/net/ethernet/3com/3c515.c b/drivers/net/ethernet/3com/3c515.c
index 90312fcd6319..47b4215bb93b 100644
--- a/drivers/net/ethernet/3com/3c515.c
+++ b/drivers/net/ethernet/3com/3c515.c
@@ -22,7 +22,6 @@
 
 */
 
-#include <linux/vermagic.h>
 #define DRV_NAME		"3c515"
 
 #define CORKSCREW 1
diff --git a/drivers/net/ethernet/adaptec/starfire.c b/drivers/net/ethernet/adaptec/starfire.c
index 2db42211329f..a64191fc2af9 100644
--- a/drivers/net/ethernet/adaptec/starfire.c
+++ b/drivers/net/ethernet/adaptec/starfire.c
@@ -45,7 +45,6 @@
 #include <asm/processor.h>		/* Processor type for cache alignment. */
 #include <linux/uaccess.h>
 #include <asm/io.h>
-#include <linux/vermagic.h>
 
 /*
  * The current frame processor firmware fails to checksum a fragment
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c
index 588c62e9add7..3ed150512091 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_main.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c
@@ -6,7 +6,7 @@
 #include <linux/module.h>
 #include <linux/netdevice.h>
 #include <linux/utsname.h>
-#include <linux/vermagic.h>
+#include <generated/utsrelease.h>
 
 #include "ionic.h"
 #include "ionic_bus.h"
diff --git a/drivers/power/supply/test_power.c b/drivers/power/supply/test_power.c
index 65c23ef6408d..b3c05ff05783 100644
--- a/drivers/power/supply/test_power.c
+++ b/drivers/power/supply/test_power.c
@@ -16,7 +16,7 @@
 #include <linux/power_supply.h>
 #include <linux/errno.h>
 #include <linux/delay.h>
-#include <linux/vermagic.h>
+#include <generated/utsrelease.h>
 
 enum test_power_id {
 	TEST_AC,
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index 593fa665f820..226d5ecdd567 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -17,7 +17,6 @@
 #include <linux/phy.h>
 #include <linux/bitops.h>
 #include <linux/uaccess.h>
-#include <linux/vermagic.h>
 #include <linux/vmalloc.h>
 #include <linux/sfp.h>
 #include <linux/slab.h>
@@ -28,7 +27,7 @@
 #include <net/xdp_sock.h>
 #include <net/flow_offload.h>
 #include <linux/ethtool_netlink.h>
-
+#include <generated/utsrelease.h>
 #include "common.h"
 
 /*
-- 
cgit v1.2.3-59-g8ed1b


From cad99e506887e257ce8bce826ec25f7854b7e69a Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Sun, 19 Apr 2020 17:18:48 +0300
Subject: net/hns: Remove custom driver version in favour of global one

Use globally defined kernel version instead of custom driver variant.

Reported-by: Borislav Petkov <bp@suse.de>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c    | 3 ---
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.h    | 4 ----
 drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c | 4 ----
 3 files changed, 11 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index da98fd7c8eca..ac3a48a24d86 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -15,7 +15,6 @@
 #include <linux/aer.h>
 #include <linux/skbuff.h>
 #include <linux/sctp.h>
-#include <linux/vermagic.h>
 #include <net/gre.h>
 #include <net/ip6_checksum.h>
 #include <net/pkt_cls.h>
@@ -44,7 +43,6 @@ static void hns3_clear_all_ring(struct hnae3_handle *h, bool force);
 static void hns3_remove_hw_addr(struct net_device *netdev);
 
 static const char hns3_driver_name[] = "hns3";
-const char hns3_driver_version[] = VERMAGIC_STRING;
 static const char hns3_driver_string[] =
 			"Hisilicon Ethernet Network Driver for Hip08 Family";
 static const char hns3_copyright[] = "Copyright (c) 2017 Huawei Corporation.";
@@ -4765,4 +4763,3 @@ MODULE_DESCRIPTION("HNS3: Hisilicon Ethernet Driver");
 MODULE_AUTHOR("Huawei Tech. Co., Ltd.");
 MODULE_LICENSE("GPL");
 MODULE_ALIAS("pci:hns-nic");
-MODULE_VERSION(HNS3_MOD_VERSION);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index abefd7a179f7..4b3f0abf0715 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -8,10 +8,6 @@
 
 #include "hnae3.h"
 
-#define HNS3_MOD_VERSION "1.0"
-
-extern const char hns3_driver_version[];
-
 enum hns3_nic_state {
 	HNS3_NIC_STATE_TESTING,
 	HNS3_NIC_STATE_RESETTING,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index 28b81f24afa1..6a0734be4a1a 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -546,10 +546,6 @@ static void hns3_get_drvinfo(struct net_device *netdev,
 		return;
 	}
 
-	strncpy(drvinfo->version, hns3_driver_version,
-		sizeof(drvinfo->version));
-	drvinfo->version[sizeof(drvinfo->version) - 1] = '\0';
-
 	strncpy(drvinfo->driver, h->pdev->driver->name,
 		sizeof(drvinfo->driver));
 	drvinfo->driver[sizeof(drvinfo->driver) - 1] = '\0';
-- 
cgit v1.2.3-59-g8ed1b


From b4f37219813fd126b2cda6d7805d8c61b8cf801a Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Sun, 19 Apr 2020 17:18:49 +0300
Subject: net/nfp: Update driver to use global kernel version

Change nfp driver to use globally defined kernel version.

Reported-by: Borislav Petkov <bp@suse.de>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Reviewed-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/netronome/nfp/nfp_main.c        | 3 ---
 drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c | 2 --
 2 files changed, 5 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_main.c b/drivers/net/ethernet/netronome/nfp/nfp_main.c
index 4d282fc56009..7ff2ccbd43b0 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_main.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_main.c
@@ -14,7 +14,6 @@
 #include <linux/mutex.h>
 #include <linux/pci.h>
 #include <linux/firmware.h>
-#include <linux/vermagic.h>
 #include <linux/vmalloc.h>
 #include <net/devlink.h>
 
@@ -31,7 +30,6 @@
 #include "nfp_net.h"
 
 static const char nfp_driver_name[] = "nfp";
-const char nfp_driver_version[] = VERMAGIC_STRING;
 
 static const struct pci_device_id nfp_pci_device_ids[] = {
 	{ PCI_VENDOR_ID_NETRONOME, PCI_DEVICE_ID_NETRONOME_NFP6000,
@@ -920,4 +918,3 @@ MODULE_FIRMWARE("netronome/nic_AMDA0099-0001_1x10_1x25.nffw");
 MODULE_AUTHOR("Netronome Systems <oss-drivers@netronome.com>");
 MODULE_LICENSE("GPL");
 MODULE_DESCRIPTION("The Netronome Flow Processor (NFP) driver.");
-MODULE_VERSION(UTS_RELEASE);
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
index 2779f1526d1e..a5aa3219d112 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
@@ -203,8 +203,6 @@ nfp_get_drvinfo(struct nfp_app *app, struct pci_dev *pdev,
 	char nsp_version[ETHTOOL_FWVERS_LEN] = {};
 
 	strlcpy(drvinfo->driver, pdev->driver->name, sizeof(drvinfo->driver));
-	strlcpy(drvinfo->version, nfp_driver_version, sizeof(drvinfo->version));
-
 	nfp_net_get_nspinfo(app, nsp_version);
 	snprintf(drvinfo->fw_version, sizeof(drvinfo->fw_version),
 		 "%s %s %s %s", vnic_version, nsp_version,
-- 
cgit v1.2.3-59-g8ed1b


From 51161bfc66a68d21f13d15a689b3ea7980457790 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Sun, 19 Apr 2020 18:55:06 +0300
Subject: kernel/module: Hide vermagic header file from general use
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

VERMAGIC* definitions are not supposed to be used by the drivers,
see this [1] bug report, so introduce special define to guard inclusion
of this header file and define it in kernel/modules.h and in internal
script that generates *.mod.c files.

In-tree module build:
➜  kernel git:(vermagic) ✗ make clean
➜  kernel git:(vermagic) ✗ make M=drivers/infiniband/hw/mlx5
➜  kernel git:(vermagic) ✗ modinfo drivers/infiniband/hw/mlx5/mlx5_ib.ko
filename:	/images/leonro/src/kernel/drivers/infiniband/hw/mlx5/mlx5_ib.ko
<...>
vermagic:       5.6.0+ SMP mod_unload modversions

Out-of-tree module build:
➜  mlx5 make -C /images/leonro/src/kernel clean M=/tmp/mlx5
➜  mlx5 make -C /images/leonro/src/kernel M=/tmp/mlx5
➜  mlx5 modinfo /tmp/mlx5/mlx5_ib.ko
filename:       /tmp/mlx5/mlx5_ib.ko
<...>
vermagic:       5.6.0+ SMP mod_unload modversions

[1] https://lore.kernel.org/lkml/20200411155623.GA22175@zn.tnic
Reported-by: Borislav Petkov <bp@suse.de>
Acked-by: Borislav Petkov <bp@suse.de>
Acked-by: Jessica Yu <jeyu@kernel.org>
Co-developed-by: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Masahiro Yamada <masahiroy@kernel.org>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/vermagic.h | 5 +++++
 kernel/module.c          | 3 +++
 scripts/mod/modpost.c    | 1 +
 3 files changed, 9 insertions(+)

diff --git a/include/linux/vermagic.h b/include/linux/vermagic.h
index 9aced11e9000..7768d20ada39 100644
--- a/include/linux/vermagic.h
+++ b/include/linux/vermagic.h
@@ -1,4 +1,9 @@
 /* SPDX-License-Identifier: GPL-2.0 */
+
+#ifndef INCLUDE_VERMAGIC
+#error "This header can be included from kernel/module.c or *.mod.c only"
+#endif
+
 #include <generated/utsrelease.h>
 
 /* Simply sanity version stamp for modules. */
diff --git a/kernel/module.c b/kernel/module.c
index 646f1e2330d2..8833e848b73c 100644
--- a/kernel/module.c
+++ b/kernel/module.c
@@ -4,6 +4,9 @@
    Copyright (C) 2001 Rusty Russell, 2002, 2010 Rusty Russell IBM.
 
 */
+
+#define INCLUDE_VERMAGIC
+
 #include <linux/export.h>
 #include <linux/extable.h>
 #include <linux/moduleloader.h>
diff --git a/scripts/mod/modpost.c b/scripts/mod/modpost.c
index 5c3c50c5ec52..7f7d4ee7b652 100644
--- a/scripts/mod/modpost.c
+++ b/scripts/mod/modpost.c
@@ -2251,6 +2251,7 @@ static void add_header(struct buffer *b, struct module *mod)
 	 * Include build-salt.h after module.h in order to
 	 * inherit the definitions.
 	 */
+	buf_printf(b, "#define INCLUDE_VERMAGIC\n");
 	buf_printf(b, "#include <linux/build-salt.h>\n");
 	buf_printf(b, "#include <linux/vermagic.h>\n");
 	buf_printf(b, "#include <linux/compiler.h>\n");
-- 
cgit v1.2.3-59-g8ed1b


From 2b49d128b3f8d8fff8972afcbc603802e5e40c6a Mon Sep 17 00:00:00 2001
From: Yangbo Lu <yangbo.lu@nxp.com>
Date: Mon, 20 Apr 2020 10:46:45 +0800
Subject: net: mscc: ocelot: move ocelot ptp clock code out of ocelot.c

The Ocelot PTP clock driver had been embedded into ocelot.c driver.
It had supported basic gettime64/settime64/adjtime/adjfine functions
by now which were used by both Ocelot switch and Felix switch.

This patch is to move current ptp clock code out of ocelot.c driver
maintaining as a single ocelot_ptp.c.
For futher new features implementation, the common code could be put
in ocelot_ptp.c and the switch specific code should be in specific
switch driver. The interrupt implementation in SoC is different
between Ocelot and Felix.

Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/ocelot/felix.c           |  25 ++++
 drivers/net/ethernet/mscc/Makefile       |   2 +-
 drivers/net/ethernet/mscc/ocelot.c       | 206 -------------------------------
 drivers/net/ethernet/mscc/ocelot.h       |   3 +-
 drivers/net/ethernet/mscc/ocelot_board.c |  25 ++++
 drivers/net/ethernet/mscc/ocelot_ptp.c   | 203 ++++++++++++++++++++++++++++++
 drivers/net/ethernet/mscc/ocelot_ptp.h   |  41 ------
 include/soc/mscc/ocelot.h                |   1 -
 include/soc/mscc/ocelot_ptp.h            |  52 ++++++++
 9 files changed, 307 insertions(+), 251 deletions(-)
 create mode 100644 drivers/net/ethernet/mscc/ocelot_ptp.c
 delete mode 100644 drivers/net/ethernet/mscc/ocelot_ptp.h
 create mode 100644 include/soc/mscc/ocelot_ptp.h

diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c
index d0a3764ff0cf..44015a24b087 100644
--- a/drivers/net/dsa/ocelot/felix.c
+++ b/drivers/net/dsa/ocelot/felix.c
@@ -7,6 +7,7 @@
 #include <soc/mscc/ocelot_sys.h>
 #include <soc/mscc/ocelot_dev.h>
 #include <soc/mscc/ocelot_ana.h>
+#include <soc/mscc/ocelot_ptp.h>
 #include <soc/mscc/ocelot.h>
 #include <linux/packing.h>
 #include <linux/module.h>
@@ -494,6 +495,21 @@ static int felix_init_structs(struct felix *felix, int num_phys_ports)
 	return 0;
 }
 
+static struct ptp_clock_info ocelot_ptp_clock_info = {
+	.owner		= THIS_MODULE,
+	.name		= "felix ptp",
+	.max_adj	= 0x7fffffff,
+	.n_alarm	= 0,
+	.n_ext_ts	= 0,
+	.n_per_out	= 0,
+	.n_pins		= 0,
+	.pps		= 0,
+	.gettime64	= ocelot_ptp_gettime64,
+	.settime64	= ocelot_ptp_settime64,
+	.adjtime	= ocelot_ptp_adjtime,
+	.adjfine	= ocelot_ptp_adjfine,
+};
+
 /* Hardware initialization done here so that we can allocate structures with
  * devm without fear of dsa_register_switch returning -EPROBE_DEFER and causing
  * us to allocate structures twice (leak memory) and map PCI memory twice
@@ -510,6 +526,14 @@ static int felix_setup(struct dsa_switch *ds)
 		return err;
 
 	ocelot_init(ocelot);
+	if (ocelot->ptp) {
+		err = ocelot_init_timestamp(ocelot, &ocelot_ptp_clock_info);
+		if (err) {
+			dev_err(ocelot->dev,
+				"Timestamp initialization failed\n");
+			ocelot->ptp = 0;
+		}
+	}
 
 	for (port = 0; port < ds->num_ports; port++) {
 		ocelot_init_port(ocelot, port);
@@ -548,6 +572,7 @@ static void felix_teardown(struct dsa_switch *ds)
 	if (felix->info->mdio_bus_free)
 		felix->info->mdio_bus_free(ocelot);
 
+	ocelot_deinit_timestamp(ocelot);
 	/* stop workqueue thread */
 	ocelot_deinit(ocelot);
 }
diff --git a/drivers/net/ethernet/mscc/Makefile b/drivers/net/ethernet/mscc/Makefile
index 9a36c26095c8..91b33b55054e 100644
--- a/drivers/net/ethernet/mscc/Makefile
+++ b/drivers/net/ethernet/mscc/Makefile
@@ -1,5 +1,5 @@
 # SPDX-License-Identifier: (GPL-2.0 OR MIT)
 obj-$(CONFIG_MSCC_OCELOT_SWITCH) += mscc_ocelot_common.o
 mscc_ocelot_common-y := ocelot.o ocelot_io.o
-mscc_ocelot_common-y += ocelot_regs.o ocelot_tc.o ocelot_police.o ocelot_ace.o ocelot_flower.o
+mscc_ocelot_common-y += ocelot_regs.o ocelot_tc.o ocelot_police.o ocelot_ace.o ocelot_flower.o ocelot_ptp.o
 obj-$(CONFIG_MSCC_OCELOT_SWITCH_OCELOT) += ocelot_board.o
diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index a8c48a4a708f..7c4165af9f66 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -14,7 +14,6 @@
 #include <linux/module.h>
 #include <linux/netdevice.h>
 #include <linux/phy.h>
-#include <linux/ptp_clock_kernel.h>
 #include <linux/skbuff.h>
 #include <linux/iopoll.h>
 #include <net/arp.h>
@@ -1991,200 +1990,6 @@ struct notifier_block ocelot_switchdev_blocking_nb __read_mostly = {
 };
 EXPORT_SYMBOL(ocelot_switchdev_blocking_nb);
 
-int ocelot_ptp_gettime64(struct ptp_clock_info *ptp, struct timespec64 *ts)
-{
-	struct ocelot *ocelot = container_of(ptp, struct ocelot, ptp_info);
-	unsigned long flags;
-	time64_t s;
-	u32 val;
-	s64 ns;
-
-	spin_lock_irqsave(&ocelot->ptp_clock_lock, flags);
-
-	val = ocelot_read_rix(ocelot, PTP_PIN_CFG, TOD_ACC_PIN);
-	val &= ~(PTP_PIN_CFG_SYNC | PTP_PIN_CFG_ACTION_MASK | PTP_PIN_CFG_DOM);
-	val |= PTP_PIN_CFG_ACTION(PTP_PIN_ACTION_SAVE);
-	ocelot_write_rix(ocelot, val, PTP_PIN_CFG, TOD_ACC_PIN);
-
-	s = ocelot_read_rix(ocelot, PTP_PIN_TOD_SEC_MSB, TOD_ACC_PIN) & 0xffff;
-	s <<= 32;
-	s += ocelot_read_rix(ocelot, PTP_PIN_TOD_SEC_LSB, TOD_ACC_PIN);
-	ns = ocelot_read_rix(ocelot, PTP_PIN_TOD_NSEC, TOD_ACC_PIN);
-
-	spin_unlock_irqrestore(&ocelot->ptp_clock_lock, flags);
-
-	/* Deal with negative values */
-	if (ns >= 0x3ffffff0 && ns <= 0x3fffffff) {
-		s--;
-		ns &= 0xf;
-		ns += 999999984;
-	}
-
-	set_normalized_timespec64(ts, s, ns);
-	return 0;
-}
-EXPORT_SYMBOL(ocelot_ptp_gettime64);
-
-static int ocelot_ptp_settime64(struct ptp_clock_info *ptp,
-				const struct timespec64 *ts)
-{
-	struct ocelot *ocelot = container_of(ptp, struct ocelot, ptp_info);
-	unsigned long flags;
-	u32 val;
-
-	spin_lock_irqsave(&ocelot->ptp_clock_lock, flags);
-
-	val = ocelot_read_rix(ocelot, PTP_PIN_CFG, TOD_ACC_PIN);
-	val &= ~(PTP_PIN_CFG_SYNC | PTP_PIN_CFG_ACTION_MASK | PTP_PIN_CFG_DOM);
-	val |= PTP_PIN_CFG_ACTION(PTP_PIN_ACTION_IDLE);
-
-	ocelot_write_rix(ocelot, val, PTP_PIN_CFG, TOD_ACC_PIN);
-
-	ocelot_write_rix(ocelot, lower_32_bits(ts->tv_sec), PTP_PIN_TOD_SEC_LSB,
-			 TOD_ACC_PIN);
-	ocelot_write_rix(ocelot, upper_32_bits(ts->tv_sec), PTP_PIN_TOD_SEC_MSB,
-			 TOD_ACC_PIN);
-	ocelot_write_rix(ocelot, ts->tv_nsec, PTP_PIN_TOD_NSEC, TOD_ACC_PIN);
-
-	val = ocelot_read_rix(ocelot, PTP_PIN_CFG, TOD_ACC_PIN);
-	val &= ~(PTP_PIN_CFG_SYNC | PTP_PIN_CFG_ACTION_MASK | PTP_PIN_CFG_DOM);
-	val |= PTP_PIN_CFG_ACTION(PTP_PIN_ACTION_LOAD);
-
-	ocelot_write_rix(ocelot, val, PTP_PIN_CFG, TOD_ACC_PIN);
-
-	spin_unlock_irqrestore(&ocelot->ptp_clock_lock, flags);
-	return 0;
-}
-
-static int ocelot_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
-{
-	if (delta > -(NSEC_PER_SEC / 2) && delta < (NSEC_PER_SEC / 2)) {
-		struct ocelot *ocelot = container_of(ptp, struct ocelot, ptp_info);
-		unsigned long flags;
-		u32 val;
-
-		spin_lock_irqsave(&ocelot->ptp_clock_lock, flags);
-
-		val = ocelot_read_rix(ocelot, PTP_PIN_CFG, TOD_ACC_PIN);
-		val &= ~(PTP_PIN_CFG_SYNC | PTP_PIN_CFG_ACTION_MASK | PTP_PIN_CFG_DOM);
-		val |= PTP_PIN_CFG_ACTION(PTP_PIN_ACTION_IDLE);
-
-		ocelot_write_rix(ocelot, val, PTP_PIN_CFG, TOD_ACC_PIN);
-
-		ocelot_write_rix(ocelot, 0, PTP_PIN_TOD_SEC_LSB, TOD_ACC_PIN);
-		ocelot_write_rix(ocelot, 0, PTP_PIN_TOD_SEC_MSB, TOD_ACC_PIN);
-		ocelot_write_rix(ocelot, delta, PTP_PIN_TOD_NSEC, TOD_ACC_PIN);
-
-		val = ocelot_read_rix(ocelot, PTP_PIN_CFG, TOD_ACC_PIN);
-		val &= ~(PTP_PIN_CFG_SYNC | PTP_PIN_CFG_ACTION_MASK | PTP_PIN_CFG_DOM);
-		val |= PTP_PIN_CFG_ACTION(PTP_PIN_ACTION_DELTA);
-
-		ocelot_write_rix(ocelot, val, PTP_PIN_CFG, TOD_ACC_PIN);
-
-		spin_unlock_irqrestore(&ocelot->ptp_clock_lock, flags);
-	} else {
-		/* Fall back using ocelot_ptp_settime64 which is not exact. */
-		struct timespec64 ts;
-		u64 now;
-
-		ocelot_ptp_gettime64(ptp, &ts);
-
-		now = ktime_to_ns(timespec64_to_ktime(ts));
-		ts = ns_to_timespec64(now + delta);
-
-		ocelot_ptp_settime64(ptp, &ts);
-	}
-	return 0;
-}
-
-static int ocelot_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
-{
-	struct ocelot *ocelot = container_of(ptp, struct ocelot, ptp_info);
-	u32 unit = 0, direction = 0;
-	unsigned long flags;
-	u64 adj = 0;
-
-	spin_lock_irqsave(&ocelot->ptp_clock_lock, flags);
-
-	if (!scaled_ppm)
-		goto disable_adj;
-
-	if (scaled_ppm < 0) {
-		direction = PTP_CFG_CLK_ADJ_CFG_DIR;
-		scaled_ppm = -scaled_ppm;
-	}
-
-	adj = PSEC_PER_SEC << 16;
-	do_div(adj, scaled_ppm);
-	do_div(adj, 1000);
-
-	/* If the adjustment value is too large, use ns instead */
-	if (adj >= (1L << 30)) {
-		unit = PTP_CFG_CLK_ADJ_FREQ_NS;
-		do_div(adj, 1000);
-	}
-
-	/* Still too big */
-	if (adj >= (1L << 30))
-		goto disable_adj;
-
-	ocelot_write(ocelot, unit | adj, PTP_CLK_CFG_ADJ_FREQ);
-	ocelot_write(ocelot, PTP_CFG_CLK_ADJ_CFG_ENA | direction,
-		     PTP_CLK_CFG_ADJ_CFG);
-
-	spin_unlock_irqrestore(&ocelot->ptp_clock_lock, flags);
-	return 0;
-
-disable_adj:
-	ocelot_write(ocelot, 0, PTP_CLK_CFG_ADJ_CFG);
-
-	spin_unlock_irqrestore(&ocelot->ptp_clock_lock, flags);
-	return 0;
-}
-
-static struct ptp_clock_info ocelot_ptp_clock_info = {
-	.owner		= THIS_MODULE,
-	.name		= "ocelot ptp",
-	.max_adj	= 0x7fffffff,
-	.n_alarm	= 0,
-	.n_ext_ts	= 0,
-	.n_per_out	= 0,
-	.n_pins		= 0,
-	.pps		= 0,
-	.gettime64	= ocelot_ptp_gettime64,
-	.settime64	= ocelot_ptp_settime64,
-	.adjtime	= ocelot_ptp_adjtime,
-	.adjfine	= ocelot_ptp_adjfine,
-};
-
-static int ocelot_init_timestamp(struct ocelot *ocelot)
-{
-	struct ptp_clock *ptp_clock;
-
-	ocelot->ptp_info = ocelot_ptp_clock_info;
-	ptp_clock = ptp_clock_register(&ocelot->ptp_info, ocelot->dev);
-	if (IS_ERR(ptp_clock))
-		return PTR_ERR(ptp_clock);
-	/* Check if PHC support is missing at the configuration level */
-	if (!ptp_clock)
-		return 0;
-
-	ocelot->ptp_clock = ptp_clock;
-
-	ocelot_write(ocelot, SYS_PTP_CFG_PTP_STAMP_WID(30), SYS_PTP_CFG);
-	ocelot_write(ocelot, 0xffffffff, ANA_TABLES_PTP_ID_LOW);
-	ocelot_write(ocelot, 0xffffffff, ANA_TABLES_PTP_ID_HIGH);
-
-	ocelot_write(ocelot, PTP_CFG_MISC_PTP_EN, PTP_CFG_MISC);
-
-	/* There is no device reconfiguration, PTP Rx stamping is always
-	 * enabled.
-	 */
-	ocelot->hwtstamp_config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
-
-	return 0;
-}
-
 /* Configure the maximum SDU (L2 payload) on RX to the value specified in @sdu.
  * The length of VLAN tags is accounted for automatically via DEV_MAC_TAGS_CFG.
  * In the special case that it's the NPI port that we're configuring, the
@@ -2530,15 +2335,6 @@ int ocelot_init(struct ocelot *ocelot)
 	queue_delayed_work(ocelot->stats_queue, &ocelot->stats_work,
 			   OCELOT_STATS_CHECK_DELAY);
 
-	if (ocelot->ptp) {
-		ret = ocelot_init_timestamp(ocelot);
-		if (ret) {
-			dev_err(ocelot->dev,
-				"Timestamp initialization failed\n");
-			return ret;
-		}
-	}
-
 	return 0;
 }
 EXPORT_SYMBOL(ocelot_init);
@@ -2551,8 +2347,6 @@ void ocelot_deinit(struct ocelot *ocelot)
 	cancel_delayed_work(&ocelot->stats_work);
 	destroy_workqueue(ocelot->stats_queue);
 	mutex_destroy(&ocelot->stats_lock);
-	if (ocelot->ptp_clock)
-		ptp_clock_unregister(ocelot->ptp_clock);
 
 	for (i = 0; i < ocelot->num_phys_ports; i++) {
 		port = ocelot->ports[i];
diff --git a/drivers/net/ethernet/mscc/ocelot.h b/drivers/net/ethernet/mscc/ocelot.h
index 641af929497f..f0a15aa187f2 100644
--- a/drivers/net/ethernet/mscc/ocelot.h
+++ b/drivers/net/ethernet/mscc/ocelot.h
@@ -15,18 +15,17 @@
 #include <linux/phy.h>
 #include <linux/phy/phy.h>
 #include <linux/platform_device.h>
-#include <linux/ptp_clock_kernel.h>
 #include <linux/regmap.h>
 
 #include <soc/mscc/ocelot_qsys.h>
 #include <soc/mscc/ocelot_sys.h>
 #include <soc/mscc/ocelot_dev.h>
 #include <soc/mscc/ocelot_ana.h>
+#include <soc/mscc/ocelot_ptp.h>
 #include <soc/mscc/ocelot.h>
 #include "ocelot_rew.h"
 #include "ocelot_qs.h"
 #include "ocelot_tc.h"
-#include "ocelot_ptp.h"
 
 #define OCELOT_BUFFER_CELL_SZ 60
 
diff --git a/drivers/net/ethernet/mscc/ocelot_board.c b/drivers/net/ethernet/mscc/ocelot_board.c
index 0ac9fbf77a01..ee016f7ed934 100644
--- a/drivers/net/ethernet/mscc/ocelot_board.c
+++ b/drivers/net/ethernet/mscc/ocelot_board.c
@@ -366,6 +366,21 @@ static const struct vcap_props vsc7514_vcap_props[] = {
 	},
 };
 
+static struct ptp_clock_info ocelot_ptp_clock_info = {
+	.owner		= THIS_MODULE,
+	.name		= "ocelot ptp",
+	.max_adj	= 0x7fffffff,
+	.n_alarm	= 0,
+	.n_ext_ts	= 0,
+	.n_per_out	= 0,
+	.n_pins		= 0,
+	.pps		= 0,
+	.gettime64	= ocelot_ptp_gettime64,
+	.settime64	= ocelot_ptp_settime64,
+	.adjtime	= ocelot_ptp_adjtime,
+	.adjfine	= ocelot_ptp_adjfine,
+};
+
 static int mscc_ocelot_probe(struct platform_device *pdev)
 {
 	struct device_node *np = pdev->dev.of_node;
@@ -469,6 +484,15 @@ static int mscc_ocelot_probe(struct platform_device *pdev)
 	ocelot->vcap = vsc7514_vcap_props;
 
 	ocelot_init(ocelot);
+	if (ocelot->ptp) {
+		err = ocelot_init_timestamp(ocelot, &ocelot_ptp_clock_info);
+		if (err) {
+			dev_err(ocelot->dev,
+				"Timestamp initialization failed\n");
+			ocelot->ptp = 0;
+		}
+	}
+
 	/* No NPI port */
 	ocelot_configure_cpu(ocelot, -1, OCELOT_TAG_PREFIX_NONE,
 			     OCELOT_TAG_PREFIX_NONE);
@@ -574,6 +598,7 @@ static int mscc_ocelot_remove(struct platform_device *pdev)
 {
 	struct ocelot *ocelot = platform_get_drvdata(pdev);
 
+	ocelot_deinit_timestamp(ocelot);
 	ocelot_deinit(ocelot);
 	unregister_switchdev_blocking_notifier(&ocelot_switchdev_blocking_nb);
 	unregister_switchdev_notifier(&ocelot_switchdev_nb);
diff --git a/drivers/net/ethernet/mscc/ocelot_ptp.c b/drivers/net/ethernet/mscc/ocelot_ptp.c
new file mode 100644
index 000000000000..69d4e5677343
--- /dev/null
+++ b/drivers/net/ethernet/mscc/ocelot_ptp.c
@@ -0,0 +1,203 @@
+// SPDX-License-Identifier: (GPL-2.0 OR MIT)
+/* Microsemi Ocelot PTP clock driver
+ *
+ * Copyright (c) 2017 Microsemi Corporation
+ * Copyright 2020 NXP
+ */
+#include <soc/mscc/ocelot_ptp.h>
+#include <soc/mscc/ocelot_sys.h>
+#include <soc/mscc/ocelot.h>
+
+int ocelot_ptp_gettime64(struct ptp_clock_info *ptp, struct timespec64 *ts)
+{
+	struct ocelot *ocelot = container_of(ptp, struct ocelot, ptp_info);
+	unsigned long flags;
+	time64_t s;
+	u32 val;
+	s64 ns;
+
+	spin_lock_irqsave(&ocelot->ptp_clock_lock, flags);
+
+	val = ocelot_read_rix(ocelot, PTP_PIN_CFG, TOD_ACC_PIN);
+	val &= ~(PTP_PIN_CFG_SYNC | PTP_PIN_CFG_ACTION_MASK | PTP_PIN_CFG_DOM);
+	val |= PTP_PIN_CFG_ACTION(PTP_PIN_ACTION_SAVE);
+	ocelot_write_rix(ocelot, val, PTP_PIN_CFG, TOD_ACC_PIN);
+
+	s = ocelot_read_rix(ocelot, PTP_PIN_TOD_SEC_MSB, TOD_ACC_PIN) & 0xffff;
+	s <<= 32;
+	s += ocelot_read_rix(ocelot, PTP_PIN_TOD_SEC_LSB, TOD_ACC_PIN);
+	ns = ocelot_read_rix(ocelot, PTP_PIN_TOD_NSEC, TOD_ACC_PIN);
+
+	spin_unlock_irqrestore(&ocelot->ptp_clock_lock, flags);
+
+	/* Deal with negative values */
+	if (ns >= 0x3ffffff0 && ns <= 0x3fffffff) {
+		s--;
+		ns &= 0xf;
+		ns += 999999984;
+	}
+
+	set_normalized_timespec64(ts, s, ns);
+	return 0;
+}
+EXPORT_SYMBOL(ocelot_ptp_gettime64);
+
+int ocelot_ptp_settime64(struct ptp_clock_info *ptp,
+			 const struct timespec64 *ts)
+{
+	struct ocelot *ocelot = container_of(ptp, struct ocelot, ptp_info);
+	unsigned long flags;
+	u32 val;
+
+	spin_lock_irqsave(&ocelot->ptp_clock_lock, flags);
+
+	val = ocelot_read_rix(ocelot, PTP_PIN_CFG, TOD_ACC_PIN);
+	val &= ~(PTP_PIN_CFG_SYNC | PTP_PIN_CFG_ACTION_MASK | PTP_PIN_CFG_DOM);
+	val |= PTP_PIN_CFG_ACTION(PTP_PIN_ACTION_IDLE);
+
+	ocelot_write_rix(ocelot, val, PTP_PIN_CFG, TOD_ACC_PIN);
+
+	ocelot_write_rix(ocelot, lower_32_bits(ts->tv_sec), PTP_PIN_TOD_SEC_LSB,
+			 TOD_ACC_PIN);
+	ocelot_write_rix(ocelot, upper_32_bits(ts->tv_sec), PTP_PIN_TOD_SEC_MSB,
+			 TOD_ACC_PIN);
+	ocelot_write_rix(ocelot, ts->tv_nsec, PTP_PIN_TOD_NSEC, TOD_ACC_PIN);
+
+	val = ocelot_read_rix(ocelot, PTP_PIN_CFG, TOD_ACC_PIN);
+	val &= ~(PTP_PIN_CFG_SYNC | PTP_PIN_CFG_ACTION_MASK | PTP_PIN_CFG_DOM);
+	val |= PTP_PIN_CFG_ACTION(PTP_PIN_ACTION_LOAD);
+
+	ocelot_write_rix(ocelot, val, PTP_PIN_CFG, TOD_ACC_PIN);
+
+	spin_unlock_irqrestore(&ocelot->ptp_clock_lock, flags);
+	return 0;
+}
+EXPORT_SYMBOL(ocelot_ptp_settime64);
+
+int ocelot_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+	if (delta > -(NSEC_PER_SEC / 2) && delta < (NSEC_PER_SEC / 2)) {
+		struct ocelot *ocelot = container_of(ptp, struct ocelot,
+						     ptp_info);
+		unsigned long flags;
+		u32 val;
+
+		spin_lock_irqsave(&ocelot->ptp_clock_lock, flags);
+
+		val = ocelot_read_rix(ocelot, PTP_PIN_CFG, TOD_ACC_PIN);
+		val &= ~(PTP_PIN_CFG_SYNC | PTP_PIN_CFG_ACTION_MASK |
+			 PTP_PIN_CFG_DOM);
+		val |= PTP_PIN_CFG_ACTION(PTP_PIN_ACTION_IDLE);
+
+		ocelot_write_rix(ocelot, val, PTP_PIN_CFG, TOD_ACC_PIN);
+
+		ocelot_write_rix(ocelot, 0, PTP_PIN_TOD_SEC_LSB, TOD_ACC_PIN);
+		ocelot_write_rix(ocelot, 0, PTP_PIN_TOD_SEC_MSB, TOD_ACC_PIN);
+		ocelot_write_rix(ocelot, delta, PTP_PIN_TOD_NSEC, TOD_ACC_PIN);
+
+		val = ocelot_read_rix(ocelot, PTP_PIN_CFG, TOD_ACC_PIN);
+		val &= ~(PTP_PIN_CFG_SYNC | PTP_PIN_CFG_ACTION_MASK |
+			 PTP_PIN_CFG_DOM);
+		val |= PTP_PIN_CFG_ACTION(PTP_PIN_ACTION_DELTA);
+
+		ocelot_write_rix(ocelot, val, PTP_PIN_CFG, TOD_ACC_PIN);
+
+		spin_unlock_irqrestore(&ocelot->ptp_clock_lock, flags);
+	} else {
+		/* Fall back using ocelot_ptp_settime64 which is not exact. */
+		struct timespec64 ts;
+		u64 now;
+
+		ocelot_ptp_gettime64(ptp, &ts);
+
+		now = ktime_to_ns(timespec64_to_ktime(ts));
+		ts = ns_to_timespec64(now + delta);
+
+		ocelot_ptp_settime64(ptp, &ts);
+	}
+	return 0;
+}
+EXPORT_SYMBOL(ocelot_ptp_adjtime);
+
+int ocelot_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm)
+{
+	struct ocelot *ocelot = container_of(ptp, struct ocelot, ptp_info);
+	u32 unit = 0, direction = 0;
+	unsigned long flags;
+	u64 adj = 0;
+
+	spin_lock_irqsave(&ocelot->ptp_clock_lock, flags);
+
+	if (!scaled_ppm)
+		goto disable_adj;
+
+	if (scaled_ppm < 0) {
+		direction = PTP_CFG_CLK_ADJ_CFG_DIR;
+		scaled_ppm = -scaled_ppm;
+	}
+
+	adj = PSEC_PER_SEC << 16;
+	do_div(adj, scaled_ppm);
+	do_div(adj, 1000);
+
+	/* If the adjustment value is too large, use ns instead */
+	if (adj >= (1L << 30)) {
+		unit = PTP_CFG_CLK_ADJ_FREQ_NS;
+		do_div(adj, 1000);
+	}
+
+	/* Still too big */
+	if (adj >= (1L << 30))
+		goto disable_adj;
+
+	ocelot_write(ocelot, unit | adj, PTP_CLK_CFG_ADJ_FREQ);
+	ocelot_write(ocelot, PTP_CFG_CLK_ADJ_CFG_ENA | direction,
+		     PTP_CLK_CFG_ADJ_CFG);
+
+	spin_unlock_irqrestore(&ocelot->ptp_clock_lock, flags);
+	return 0;
+
+disable_adj:
+	ocelot_write(ocelot, 0, PTP_CLK_CFG_ADJ_CFG);
+
+	spin_unlock_irqrestore(&ocelot->ptp_clock_lock, flags);
+	return 0;
+}
+EXPORT_SYMBOL(ocelot_ptp_adjfine);
+
+int ocelot_init_timestamp(struct ocelot *ocelot, struct ptp_clock_info *info)
+{
+	struct ptp_clock *ptp_clock;
+
+	ocelot->ptp_info = *info;
+	ptp_clock = ptp_clock_register(&ocelot->ptp_info, ocelot->dev);
+	if (IS_ERR(ptp_clock))
+		return PTR_ERR(ptp_clock);
+	/* Check if PHC support is missing at the configuration level */
+	if (!ptp_clock)
+		return 0;
+
+	ocelot->ptp_clock = ptp_clock;
+
+	ocelot_write(ocelot, SYS_PTP_CFG_PTP_STAMP_WID(30), SYS_PTP_CFG);
+	ocelot_write(ocelot, 0xffffffff, ANA_TABLES_PTP_ID_LOW);
+	ocelot_write(ocelot, 0xffffffff, ANA_TABLES_PTP_ID_HIGH);
+
+	ocelot_write(ocelot, PTP_CFG_MISC_PTP_EN, PTP_CFG_MISC);
+
+	/* There is no device reconfiguration, PTP Rx stamping is always
+	 * enabled.
+	 */
+	ocelot->hwtstamp_config.rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
+
+	return 0;
+}
+EXPORT_SYMBOL(ocelot_init_timestamp);
+
+int ocelot_deinit_timestamp(struct ocelot *ocelot)
+{
+	if (ocelot->ptp_clock)
+		ptp_clock_unregister(ocelot->ptp_clock);
+	return 0;
+}
+EXPORT_SYMBOL(ocelot_deinit_timestamp);
diff --git a/drivers/net/ethernet/mscc/ocelot_ptp.h b/drivers/net/ethernet/mscc/ocelot_ptp.h
deleted file mode 100644
index 9ede14a12573..000000000000
--- a/drivers/net/ethernet/mscc/ocelot_ptp.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
-/*
- * Microsemi Ocelot Switch driver
- *
- * License: Dual MIT/GPL
- * Copyright (c) 2017 Microsemi Corporation
- */
-
-#ifndef _MSCC_OCELOT_PTP_H_
-#define _MSCC_OCELOT_PTP_H_
-
-#define PTP_PIN_CFG_RSZ			0x20
-#define PTP_PIN_TOD_SEC_MSB_RSZ		PTP_PIN_CFG_RSZ
-#define PTP_PIN_TOD_SEC_LSB_RSZ		PTP_PIN_CFG_RSZ
-#define PTP_PIN_TOD_NSEC_RSZ		PTP_PIN_CFG_RSZ
-
-#define PTP_PIN_CFG_DOM			BIT(0)
-#define PTP_PIN_CFG_SYNC		BIT(2)
-#define PTP_PIN_CFG_ACTION(x)		((x) << 3)
-#define PTP_PIN_CFG_ACTION_MASK		PTP_PIN_CFG_ACTION(0x7)
-
-enum {
-	PTP_PIN_ACTION_IDLE = 0,
-	PTP_PIN_ACTION_LOAD,
-	PTP_PIN_ACTION_SAVE,
-	PTP_PIN_ACTION_CLOCK,
-	PTP_PIN_ACTION_DELTA,
-	PTP_PIN_ACTION_NOSYNC,
-	PTP_PIN_ACTION_SYNC,
-};
-
-#define PTP_CFG_MISC_PTP_EN		BIT(2)
-
-#define PSEC_PER_SEC			1000000000000LL
-
-#define PTP_CFG_CLK_ADJ_CFG_ENA		BIT(0)
-#define PTP_CFG_CLK_ADJ_CFG_DIR		BIT(1)
-
-#define PTP_CFG_CLK_ADJ_FREQ_NS		BIT(30)
-
-#endif
diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 6d6a3947c8b7..6fd88ee622cf 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -620,7 +620,6 @@ int ocelot_vlan_add(struct ocelot *ocelot, int port, u16 vid, bool pvid,
 int ocelot_vlan_del(struct ocelot *ocelot, int port, u16 vid);
 int ocelot_hwstamp_get(struct ocelot *ocelot, int port, struct ifreq *ifr);
 int ocelot_hwstamp_set(struct ocelot *ocelot, int port, struct ifreq *ifr);
-int ocelot_ptp_gettime64(struct ptp_clock_info *ptp, struct timespec64 *ts);
 int ocelot_port_add_txtstamp_skb(struct ocelot_port *ocelot_port,
 				 struct sk_buff *skb);
 void ocelot_get_txtstamp(struct ocelot *ocelot);
diff --git a/include/soc/mscc/ocelot_ptp.h b/include/soc/mscc/ocelot_ptp.h
new file mode 100644
index 000000000000..f01b0ce4e4cb
--- /dev/null
+++ b/include/soc/mscc/ocelot_ptp.h
@@ -0,0 +1,52 @@
+/* SPDX-License-Identifier: (GPL-2.0 OR MIT) */
+/*
+ * Microsemi Ocelot Switch driver
+ *
+ * License: Dual MIT/GPL
+ * Copyright (c) 2017 Microsemi Corporation
+ * Copyright 2020 NXP
+ */
+
+#ifndef _MSCC_OCELOT_PTP_H_
+#define _MSCC_OCELOT_PTP_H_
+
+#include <linux/ptp_clock_kernel.h>
+#include <soc/mscc/ocelot.h>
+
+#define PTP_PIN_CFG_RSZ			0x20
+#define PTP_PIN_TOD_SEC_MSB_RSZ		PTP_PIN_CFG_RSZ
+#define PTP_PIN_TOD_SEC_LSB_RSZ		PTP_PIN_CFG_RSZ
+#define PTP_PIN_TOD_NSEC_RSZ		PTP_PIN_CFG_RSZ
+
+#define PTP_PIN_CFG_DOM			BIT(0)
+#define PTP_PIN_CFG_SYNC		BIT(2)
+#define PTP_PIN_CFG_ACTION(x)		((x) << 3)
+#define PTP_PIN_CFG_ACTION_MASK		PTP_PIN_CFG_ACTION(0x7)
+
+enum {
+	PTP_PIN_ACTION_IDLE = 0,
+	PTP_PIN_ACTION_LOAD,
+	PTP_PIN_ACTION_SAVE,
+	PTP_PIN_ACTION_CLOCK,
+	PTP_PIN_ACTION_DELTA,
+	PTP_PIN_ACTION_NOSYNC,
+	PTP_PIN_ACTION_SYNC,
+};
+
+#define PTP_CFG_MISC_PTP_EN		BIT(2)
+
+#define PSEC_PER_SEC			1000000000000LL
+
+#define PTP_CFG_CLK_ADJ_CFG_ENA		BIT(0)
+#define PTP_CFG_CLK_ADJ_CFG_DIR		BIT(1)
+
+#define PTP_CFG_CLK_ADJ_FREQ_NS		BIT(30)
+
+int ocelot_ptp_gettime64(struct ptp_clock_info *ptp, struct timespec64 *ts);
+int ocelot_ptp_settime64(struct ptp_clock_info *ptp,
+			 const struct timespec64 *ts);
+int ocelot_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta);
+int ocelot_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm);
+int ocelot_init_timestamp(struct ocelot *ocelot, struct ptp_clock_info *info);
+int ocelot_deinit_timestamp(struct ocelot *ocelot);
+#endif
-- 
cgit v1.2.3-59-g8ed1b


From d2b09a8e7bcbfa47e7161b20d6387ac968834c21 Mon Sep 17 00:00:00 2001
From: Yangbo Lu <yangbo.lu@nxp.com>
Date: Mon, 20 Apr 2020 10:46:46 +0800
Subject: net: mscc: ocelot: fix timestamp info if ptp clock does not work

The timestamp info should be only software timestamp capabilities
if ptp clock does not work.

Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mscc/ocelot.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 7c4165af9f66..a2b9b85612a4 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -1349,6 +1349,12 @@ int ocelot_get_ts_info(struct ocelot *ocelot, int port,
 {
 	info->phc_index = ocelot->ptp_clock ?
 			  ptp_clock_index(ocelot->ptp_clock) : -1;
+	if (info->phc_index == -1) {
+		info->so_timestamping |= SOF_TIMESTAMPING_TX_SOFTWARE |
+					 SOF_TIMESTAMPING_RX_SOFTWARE |
+					 SOF_TIMESTAMPING_SOFTWARE;
+		return 0;
+	}
 	info->so_timestamping |= SOF_TIMESTAMPING_TX_SOFTWARE |
 				 SOF_TIMESTAMPING_RX_SOFTWARE |
 				 SOF_TIMESTAMPING_SOFTWARE |
-- 
cgit v1.2.3-59-g8ed1b


From 3007bc7321e3c37de9d7d965cb9fb95aaa00113b Mon Sep 17 00:00:00 2001
From: Yangbo Lu <yangbo.lu@nxp.com>
Date: Mon, 20 Apr 2020 10:46:47 +0800
Subject: net: mscc: ocelot: redefine PTP pins

There are 5 PTP_PINS register groups on Ocelot switch.
Except the one used for TOD operations, there are still
4 register groups for programmable pins. So redefine the
4 programmable pins.

Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/soc/mscc/ocelot.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 6fd88ee622cf..7d44d3508869 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -440,10 +440,11 @@ enum ocelot_regfield {
 	REGFIELD_MAX
 };
 
-enum ocelot_clk_pins {
-	ALT_PPS_PIN	= 1,
-	EXT_CLK_PIN,
-	ALT_LDST_PIN,
+enum ocelot_ptp_pins {
+	PTP_PIN_0,
+	PTP_PIN_1,
+	PTP_PIN_2,
+	PTP_PIN_3,
 	TOD_ACC_PIN
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From 94aca0824443d32987b31e656044ff7da425c523 Mon Sep 17 00:00:00 2001
From: Yangbo Lu <yangbo.lu@nxp.com>
Date: Mon, 20 Apr 2020 10:46:48 +0800
Subject: net: mscc: ocelot: add wave programming registers definitions

Add wave programming registers definitions for Ocelot platforms.

Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/ocelot/felix_vsc9959.c  | 2 ++
 drivers/net/ethernet/mscc/ocelot_regs.c | 2 ++
 include/soc/mscc/ocelot.h               | 2 ++
 include/soc/mscc/ocelot_ptp.h           | 2 ++
 4 files changed, 8 insertions(+)

diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c
index b4078f3c5c38..4fe707ef54b8 100644
--- a/drivers/net/dsa/ocelot/felix_vsc9959.c
+++ b/drivers/net/dsa/ocelot/felix_vsc9959.c
@@ -313,6 +313,8 @@ static const u32 vsc9959_ptp_regmap[] = {
 	REG(PTP_PIN_TOD_SEC_MSB,           0x000004),
 	REG(PTP_PIN_TOD_SEC_LSB,           0x000008),
 	REG(PTP_PIN_TOD_NSEC,              0x00000c),
+	REG(PTP_PIN_WF_HIGH_PERIOD,        0x000014),
+	REG(PTP_PIN_WF_LOW_PERIOD,         0x000018),
 	REG(PTP_CFG_MISC,                  0x0000a0),
 	REG(PTP_CLK_CFG_ADJ_CFG,           0x0000a4),
 	REG(PTP_CLK_CFG_ADJ_FREQ,          0x0000a8),
diff --git a/drivers/net/ethernet/mscc/ocelot_regs.c b/drivers/net/ethernet/mscc/ocelot_regs.c
index b88b5899b227..ed4dd01a41ad 100644
--- a/drivers/net/ethernet/mscc/ocelot_regs.c
+++ b/drivers/net/ethernet/mscc/ocelot_regs.c
@@ -239,6 +239,8 @@ static const u32 ocelot_ptp_regmap[] = {
 	REG(PTP_PIN_TOD_SEC_MSB,           0x000004),
 	REG(PTP_PIN_TOD_SEC_LSB,           0x000008),
 	REG(PTP_PIN_TOD_NSEC,              0x00000c),
+	REG(PTP_PIN_WF_HIGH_PERIOD,        0x000014),
+	REG(PTP_PIN_WF_LOW_PERIOD,         0x000018),
 	REG(PTP_CFG_MISC,                  0x0000a0),
 	REG(PTP_CLK_CFG_ADJ_CFG,           0x0000a4),
 	REG(PTP_CLK_CFG_ADJ_FREQ,          0x0000a8),
diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 7d44d3508869..31193ad3a545 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -385,6 +385,8 @@ enum ocelot_reg {
 	PTP_PIN_TOD_SEC_MSB,
 	PTP_PIN_TOD_SEC_LSB,
 	PTP_PIN_TOD_NSEC,
+	PTP_PIN_WF_HIGH_PERIOD,
+	PTP_PIN_WF_LOW_PERIOD,
 	PTP_CFG_MISC,
 	PTP_CLK_CFG_ADJ_CFG,
 	PTP_CLK_CFG_ADJ_FREQ,
diff --git a/include/soc/mscc/ocelot_ptp.h b/include/soc/mscc/ocelot_ptp.h
index f01b0ce4e4cb..aae1570eecb1 100644
--- a/include/soc/mscc/ocelot_ptp.h
+++ b/include/soc/mscc/ocelot_ptp.h
@@ -17,6 +17,8 @@
 #define PTP_PIN_TOD_SEC_MSB_RSZ		PTP_PIN_CFG_RSZ
 #define PTP_PIN_TOD_SEC_LSB_RSZ		PTP_PIN_CFG_RSZ
 #define PTP_PIN_TOD_NSEC_RSZ		PTP_PIN_CFG_RSZ
+#define PTP_PIN_WF_HIGH_PERIOD_RSZ	PTP_PIN_CFG_RSZ
+#define PTP_PIN_WF_LOW_PERIOD_RSZ	PTP_PIN_CFG_RSZ
 
 #define PTP_PIN_CFG_DOM			BIT(0)
 #define PTP_PIN_CFG_SYNC		BIT(2)
-- 
cgit v1.2.3-59-g8ed1b


From cc2d87bb83407c7dfb0900d63b3fcfbf6a59202f Mon Sep 17 00:00:00 2001
From: Yangbo Lu <yangbo.lu@nxp.com>
Date: Mon, 20 Apr 2020 10:46:49 +0800
Subject: net: mscc: ocelot: support 4 PTP programmable pins

Support 4 PTP programmable pins with only PTP_PF_PEROUT function
for now. The PTP_PF_EXTTS function will be supported in the
future, and it should be implemented separately for Felix and
Ocelot, because of different hardware interrupt implementation
in them.

Since the hardware is not able to support absolute start time,
the periodic clock request only allows start time 0 0. But nsec
could be accepted for PPS case for phase adjustment.

Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mscc/ocelot_ptp.c | 121 +++++++++++++++++++++++++++++++++
 include/soc/mscc/ocelot.h              |   3 +
 include/soc/mscc/ocelot_ptp.h          |   4 ++
 3 files changed, 128 insertions(+)

diff --git a/drivers/net/ethernet/mscc/ocelot_ptp.c b/drivers/net/ethernet/mscc/ocelot_ptp.c
index 69d4e5677343..a3088a1676ed 100644
--- a/drivers/net/ethernet/mscc/ocelot_ptp.c
+++ b/drivers/net/ethernet/mscc/ocelot_ptp.c
@@ -165,11 +165,132 @@ disable_adj:
 }
 EXPORT_SYMBOL(ocelot_ptp_adjfine);
 
+int ocelot_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin,
+		      enum ptp_pin_function func, unsigned int chan)
+{
+	switch (func) {
+	case PTP_PF_NONE:
+	case PTP_PF_PEROUT:
+		break;
+	case PTP_PF_EXTTS:
+	case PTP_PF_PHYSYNC:
+		return -1;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(ocelot_ptp_verify);
+
+int ocelot_ptp_enable(struct ptp_clock_info *ptp,
+		      struct ptp_clock_request *rq, int on)
+{
+	struct ocelot *ocelot = container_of(ptp, struct ocelot, ptp_info);
+	struct timespec64 ts_start, ts_period;
+	enum ocelot_ptp_pins ptp_pin;
+	unsigned long flags;
+	bool pps = false;
+	int pin = -1;
+	u32 val;
+	s64 ns;
+
+	switch (rq->type) {
+	case PTP_CLK_REQ_PEROUT:
+		/* Reject requests with unsupported flags */
+		if (rq->perout.flags)
+			return -EOPNOTSUPP;
+
+		pin = ptp_find_pin(ocelot->ptp_clock, PTP_PF_PEROUT,
+				   rq->perout.index);
+		if (pin == 0)
+			ptp_pin = PTP_PIN_0;
+		else if (pin == 1)
+			ptp_pin = PTP_PIN_1;
+		else if (pin == 2)
+			ptp_pin = PTP_PIN_2;
+		else if (pin == 3)
+			ptp_pin = PTP_PIN_3;
+		else
+			return -EBUSY;
+
+		ts_start.tv_sec = rq->perout.start.sec;
+		ts_start.tv_nsec = rq->perout.start.nsec;
+		ts_period.tv_sec = rq->perout.period.sec;
+		ts_period.tv_nsec = rq->perout.period.nsec;
+
+		if (ts_period.tv_sec == 1 && ts_period.tv_nsec == 0)
+			pps = true;
+
+		if (ts_start.tv_sec || (ts_start.tv_nsec && !pps)) {
+			dev_warn(ocelot->dev,
+				 "Absolute start time not supported!\n");
+			dev_warn(ocelot->dev,
+				 "Accept nsec for PPS phase adjustment, otherwise start time should be 0 0.\n");
+			return -EINVAL;
+		}
+
+		/* Handle turning off */
+		if (!on) {
+			spin_lock_irqsave(&ocelot->ptp_clock_lock, flags);
+			val = PTP_PIN_CFG_ACTION(PTP_PIN_ACTION_IDLE);
+			ocelot_write_rix(ocelot, val, PTP_PIN_CFG, ptp_pin);
+			spin_unlock_irqrestore(&ocelot->ptp_clock_lock, flags);
+			break;
+		}
+
+		/* Handle PPS request */
+		if (pps) {
+			spin_lock_irqsave(&ocelot->ptp_clock_lock, flags);
+			/* Pulse generated perout.start.nsec after TOD has
+			 * increased seconds.
+			 * Pulse width is set to 1us.
+			 */
+			ocelot_write_rix(ocelot, ts_start.tv_nsec,
+					 PTP_PIN_WF_LOW_PERIOD, ptp_pin);
+			ocelot_write_rix(ocelot, 1000,
+					 PTP_PIN_WF_HIGH_PERIOD, ptp_pin);
+			val = PTP_PIN_CFG_ACTION(PTP_PIN_ACTION_CLOCK);
+			val |= PTP_PIN_CFG_SYNC;
+			ocelot_write_rix(ocelot, val, PTP_PIN_CFG, ptp_pin);
+			spin_unlock_irqrestore(&ocelot->ptp_clock_lock, flags);
+			break;
+		}
+
+		/* Handle periodic clock */
+		ns = timespec64_to_ns(&ts_period);
+		ns = ns >> 1;
+		if (ns > 0x3fffffff || ns <= 0x6)
+			return -EINVAL;
+
+		spin_lock_irqsave(&ocelot->ptp_clock_lock, flags);
+		ocelot_write_rix(ocelot, ns, PTP_PIN_WF_LOW_PERIOD, ptp_pin);
+		ocelot_write_rix(ocelot, ns, PTP_PIN_WF_HIGH_PERIOD, ptp_pin);
+		val = PTP_PIN_CFG_ACTION(PTP_PIN_ACTION_CLOCK);
+		ocelot_write_rix(ocelot, val, PTP_PIN_CFG, ptp_pin);
+		spin_unlock_irqrestore(&ocelot->ptp_clock_lock, flags);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+	return 0;
+}
+EXPORT_SYMBOL(ocelot_ptp_enable);
+
 int ocelot_init_timestamp(struct ocelot *ocelot, struct ptp_clock_info *info)
 {
 	struct ptp_clock *ptp_clock;
+	int i;
 
 	ocelot->ptp_info = *info;
+
+	for (i = 0; i < OCELOT_PTP_PINS_NUM; i++) {
+		struct ptp_pin_desc *p = &ocelot->ptp_pins[i];
+
+		snprintf(p->name, sizeof(p->name), "switch_1588_dat%d", i);
+		p->index = i;
+		p->func = PTP_PF_NONE;
+	}
+
+	ocelot->ptp_info.pin_config = &ocelot->ptp_pins[0];
+
 	ptp_clock = ptp_clock_register(&ocelot->ptp_info, ocelot->dev);
 	if (IS_ERR(ptp_clock))
 		return PTR_ERR(ptp_clock);
diff --git a/include/soc/mscc/ocelot.h b/include/soc/mscc/ocelot.h
index 31193ad3a545..a025fb798164 100644
--- a/include/soc/mscc/ocelot.h
+++ b/include/soc/mscc/ocelot.h
@@ -92,6 +92,8 @@
 #define OCELOT_SPEED_100		2
 #define OCELOT_SPEED_10			3
 
+#define OCELOT_PTP_PINS_NUM		4
+
 #define TARGET_OFFSET			24
 #define REG_MASK			GENMASK(TARGET_OFFSET - 1, 0)
 #define REG(reg, offset)		[reg & REG_MASK] = offset
@@ -552,6 +554,7 @@ struct ocelot {
 	struct mutex			ptp_lock;
 	/* Protects the PTP clock */
 	spinlock_t			ptp_clock_lock;
+	struct ptp_pin_desc		ptp_pins[OCELOT_PTP_PINS_NUM];
 };
 
 struct ocelot_policer {
diff --git a/include/soc/mscc/ocelot_ptp.h b/include/soc/mscc/ocelot_ptp.h
index aae1570eecb1..4a6b2f71b6b2 100644
--- a/include/soc/mscc/ocelot_ptp.h
+++ b/include/soc/mscc/ocelot_ptp.h
@@ -49,6 +49,10 @@ int ocelot_ptp_settime64(struct ptp_clock_info *ptp,
 			 const struct timespec64 *ts);
 int ocelot_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta);
 int ocelot_ptp_adjfine(struct ptp_clock_info *ptp, long scaled_ppm);
+int ocelot_ptp_verify(struct ptp_clock_info *ptp, unsigned int pin,
+		      enum ptp_pin_function func, unsigned int chan);
+int ocelot_ptp_enable(struct ptp_clock_info *ptp,
+		      struct ptp_clock_request *rq, int on);
 int ocelot_init_timestamp(struct ocelot *ocelot, struct ptp_clock_info *info);
 int ocelot_deinit_timestamp(struct ocelot *ocelot);
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From aabb2bb07c963c67b8072aafcca3677e2b235be0 Mon Sep 17 00:00:00 2001
From: Yangbo Lu <yangbo.lu@nxp.com>
Date: Mon, 20 Apr 2020 10:46:50 +0800
Subject: net: mscc: ocelot: enable PTP programmable pin

Enable PTP programmable pin.

Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mscc/ocelot_board.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mscc/ocelot_board.c b/drivers/net/ethernet/mscc/ocelot_board.c
index ee016f7ed934..67a8d61c926a 100644
--- a/drivers/net/ethernet/mscc/ocelot_board.c
+++ b/drivers/net/ethernet/mscc/ocelot_board.c
@@ -372,13 +372,15 @@ static struct ptp_clock_info ocelot_ptp_clock_info = {
 	.max_adj	= 0x7fffffff,
 	.n_alarm	= 0,
 	.n_ext_ts	= 0,
-	.n_per_out	= 0,
-	.n_pins		= 0,
+	.n_per_out	= OCELOT_PTP_PINS_NUM,
+	.n_pins		= OCELOT_PTP_PINS_NUM,
 	.pps		= 0,
 	.gettime64	= ocelot_ptp_gettime64,
 	.settime64	= ocelot_ptp_settime64,
 	.adjtime	= ocelot_ptp_adjtime,
 	.adjfine	= ocelot_ptp_adjfine,
+	.verify		= ocelot_ptp_verify,
+	.enable		= ocelot_ptp_enable,
 };
 
 static int mscc_ocelot_probe(struct platform_device *pdev)
-- 
cgit v1.2.3-59-g8ed1b


From 5287be405ca2263a9c524a6b0a7869c59760f4e6 Mon Sep 17 00:00:00 2001
From: Yangbo Lu <yangbo.lu@nxp.com>
Date: Mon, 20 Apr 2020 10:46:51 +0800
Subject: net: dsa: felix: enable PTP programmable pin

Enable PTP programmable pin.

Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/ocelot/felix.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c
index 44015a24b087..9173b95551d1 100644
--- a/drivers/net/dsa/ocelot/felix.c
+++ b/drivers/net/dsa/ocelot/felix.c
@@ -501,13 +501,15 @@ static struct ptp_clock_info ocelot_ptp_clock_info = {
 	.max_adj	= 0x7fffffff,
 	.n_alarm	= 0,
 	.n_ext_ts	= 0,
-	.n_per_out	= 0,
-	.n_pins		= 0,
+	.n_per_out	= OCELOT_PTP_PINS_NUM,
+	.n_pins		= OCELOT_PTP_PINS_NUM,
 	.pps		= 0,
 	.gettime64	= ocelot_ptp_gettime64,
 	.settime64	= ocelot_ptp_settime64,
 	.adjtime	= ocelot_ptp_adjtime,
 	.adjfine	= ocelot_ptp_adjfine,
+	.verify		= ocelot_ptp_verify,
+	.enable		= ocelot_ptp_enable,
 };
 
 /* Hardware initialization done here so that we can allocate structures with
-- 
cgit v1.2.3-59-g8ed1b


From 8af40902f839f5431b89ce2a035ee01e51b31a1d Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Mon, 20 Apr 2020 20:37:18 +0800
Subject: ath11k: remove conversion to bool in ath11k_dp_rxdesc_mpdu_valid()

The '==' expression itself is bool, no need to convert it to bool again.
This fixes the following coccicheck warning:

drivers/net/wireless/ath/ath11k/dp_rx.c:255:46-51: WARNING: conversion
to bool not needed here

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200420123718.3384-1-yanaijie@huawei.com
---
 drivers/net/wireless/ath/ath11k/dp_rx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.c b/drivers/net/wireless/ath/ath11k/dp_rx.c
index 203fd44ff352..bbd7da48518f 100644
--- a/drivers/net/wireless/ath/ath11k/dp_rx.c
+++ b/drivers/net/wireless/ath/ath11k/dp_rx.c
@@ -252,7 +252,7 @@ static bool ath11k_dp_rxdesc_mpdu_valid(struct hal_rx_desc *rx_desc)
 	tlv_tag = FIELD_GET(HAL_TLV_HDR_TAG,
 			    __le32_to_cpu(rx_desc->mpdu_start_tag));
 
-	return tlv_tag == HAL_RX_MPDU_START ? true : false;
+	return tlv_tag == HAL_RX_MPDU_START;
 }
 
 static u32 ath11k_dp_rxdesc_get_ppduid(struct hal_rx_desc *rx_desc)
-- 
cgit v1.2.3-59-g8ed1b


From d81709346ceac7b9131b4a091197b9d7fc5a409f Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Mon, 20 Apr 2020 20:37:45 +0800
Subject: ath11k: remove conversion to bool in ath11k_debug_fw_stats_process()

The '==' expression itself is bool, no need to convert it to bool again.
This fixes the following coccicheck warning:

drivers/net/wireless/ath/ath11k/debug.c:198:57-62: WARNING: conversion
to bool not needed here
drivers/net/wireless/ath/ath11k/debug.c:218:58-63: WARNING: conversion
to bool not needed here

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200420123745.4159-1-yanaijie@huawei.com
---
 drivers/net/wireless/ath/ath11k/debug.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/debug.c b/drivers/net/wireless/ath/ath11k/debug.c
index a2e3dfeae3d5..3fd6b5af073b 100644
--- a/drivers/net/wireless/ath/ath11k/debug.c
+++ b/drivers/net/wireless/ath/ath11k/debug.c
@@ -195,7 +195,7 @@ void ath11k_debug_fw_stats_process(struct ath11k_base *ab, struct sk_buff *skb)
 				total_vdevs_started += ar->num_started_vdevs;
 		}
 
-		is_end = ((++num_vdev) == total_vdevs_started ? true : false);
+		is_end = ((++num_vdev) == total_vdevs_started);
 
 		list_splice_tail_init(&stats.vdevs,
 				      &ar->debug.fw_stats.vdevs);
@@ -215,7 +215,7 @@ void ath11k_debug_fw_stats_process(struct ath11k_base *ab, struct sk_buff *skb)
 		/* Mark end until we reached the count of all started VDEVs
 		 * within the PDEV
 		 */
-		is_end = ((++num_bcn) == ar->num_started_vdevs ? true : false);
+		is_end = ((++num_bcn) == ar->num_started_vdevs);
 
 		list_splice_tail_init(&stats.bcn,
 				      &ar->debug.fw_stats.bcn);
-- 
cgit v1.2.3-59-g8ed1b


From c8334512f3dd1b94844baca629f9bedca4271593 Mon Sep 17 00:00:00 2001
From: Wen Gong <wgong@codeaurora.org>
Date: Tue, 21 Apr 2020 15:09:35 +0300
Subject: ath10k: add htt TX bundle for sdio

The transmission utilization ratio for sdio bus for small packet is
slow, because the space and time cost for sdio bus is same for large
length packet and small length packet. So the speed of data for large
length packet is higher than small length.

Test result of different length of data:

data packet(byte)   cost time(us)   calculated rate(Mbps)
      256               28                73
      512               33               124
     1024               35               234
     1792               45               318
    14336              168               682
    28672              333               688
    57344              660               695

This patch change the TX packet from single packet to a large length
bundle packet, max size is 32, it results in significant performance
improvement on TX path.

Also there's a fourth thread "ath10k_tx_complete_wq" added to ath10k as it
improves TCP RX throughput (values in Mbps):

                                       TCP-RX    TCP-TX    UDP-RX      UDP-TX
use workqueue_tx_complete              423       357       448         412
change it to ar->workqueue             410       360       449         414
change it to ar->workqueue_aux         405       339       446         401

This patch only effect sdio chip, it will not effect PCI, SNOC etc.
It only enable bundle for sdio chip.

Tested with QCA6174 SDIO with firmware
WLAN.RMH.4.4.1-00017-QCARMSWP-1.

Signed-off-by: Wen Gong <wgong@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200410061400.14231-2-wgong@codeaurora.org
---
 drivers/net/wireless/ath/ath10k/core.c   |  14 +-
 drivers/net/wireless/ath/ath10k/core.h   |   4 +-
 drivers/net/wireless/ath/ath10k/htc.c    | 372 ++++++++++++++++++++++++++++---
 drivers/net/wireless/ath/ath10k/htc.h    |  24 +-
 drivers/net/wireless/ath/ath10k/htt.c    |   8 +
 drivers/net/wireless/ath/ath10k/htt.h    |   4 +
 drivers/net/wireless/ath/ath10k/htt_rx.c |   1 +
 drivers/net/wireless/ath/ath10k/htt_tx.c |   8 +-
 8 files changed, 398 insertions(+), 37 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c
index 5926281c7e05..8689c330fdd9 100644
--- a/drivers/net/wireless/ath/ath10k/core.c
+++ b/drivers/net/wireless/ath/ath10k/core.c
@@ -3288,6 +3288,11 @@ struct ath10k *ath10k_core_create(size_t priv_size, struct device *dev,
 	if (!ar->workqueue_aux)
 		goto err_free_wq;
 
+	ar->workqueue_tx_complete =
+		create_singlethread_workqueue("ath10k_tx_complete_wq");
+	if (!ar->workqueue_tx_complete)
+		goto err_free_aux_wq;
+
 	mutex_init(&ar->conf_mutex);
 	mutex_init(&ar->dump_mutex);
 	spin_lock_init(&ar->data_lock);
@@ -3315,7 +3320,7 @@ struct ath10k *ath10k_core_create(size_t priv_size, struct device *dev,
 
 	ret = ath10k_coredump_create(ar);
 	if (ret)
-		goto err_free_aux_wq;
+		goto err_free_tx_complete;
 
 	ret = ath10k_debug_create(ar);
 	if (ret)
@@ -3325,12 +3330,12 @@ struct ath10k *ath10k_core_create(size_t priv_size, struct device *dev,
 
 err_free_coredump:
 	ath10k_coredump_destroy(ar);
-
+err_free_tx_complete:
+	destroy_workqueue(ar->workqueue_tx_complete);
 err_free_aux_wq:
 	destroy_workqueue(ar->workqueue_aux);
 err_free_wq:
 	destroy_workqueue(ar->workqueue);
-
 err_free_mac:
 	ath10k_mac_destroy(ar);
 
@@ -3346,6 +3351,9 @@ void ath10k_core_destroy(struct ath10k *ar)
 	flush_workqueue(ar->workqueue_aux);
 	destroy_workqueue(ar->workqueue_aux);
 
+	flush_workqueue(ar->workqueue_tx_complete);
+	destroy_workqueue(ar->workqueue_tx_complete);
+
 	ath10k_debug_destroy(ar);
 	ath10k_coredump_destroy(ar);
 	ath10k_htt_tx_destroy(&ar->htt);
diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h
index bd8ef576c590..d6adcbaf9616 100644
--- a/drivers/net/wireless/ath/ath10k/core.h
+++ b/drivers/net/wireless/ath/ath10k/core.h
@@ -1091,7 +1091,7 @@ struct ath10k {
 	struct workqueue_struct *workqueue;
 	/* Auxiliary workqueue */
 	struct workqueue_struct *workqueue_aux;
-
+	struct workqueue_struct *workqueue_tx_complete;
 	/* prevents concurrent FW reconfiguration */
 	struct mutex conf_mutex;
 
@@ -1132,6 +1132,8 @@ struct ath10k {
 
 	struct work_struct register_work;
 	struct work_struct restart_work;
+	struct work_struct bundle_tx_work;
+	struct work_struct tx_complete_work;
 
 	/* cycle count is reported twice for each visited channel during scan.
 	 * access protected by data_lock
diff --git a/drivers/net/wireless/ath/ath10k/htc.c b/drivers/net/wireless/ath/ath10k/htc.c
index 61ee413d902a..ed4e0add997e 100644
--- a/drivers/net/wireless/ath/ath10k/htc.c
+++ b/drivers/net/wireless/ath/ath10k/htc.c
@@ -51,10 +51,12 @@ void ath10k_htc_notify_tx_completion(struct ath10k_htc_ep *ep,
 				     struct sk_buff *skb)
 {
 	struct ath10k *ar = ep->htc->ar;
+	struct ath10k_htc_hdr *hdr;
 
 	ath10k_dbg(ar, ATH10K_DBG_HTC, "%s: ep %d skb %pK\n", __func__,
 		   ep->eid, skb);
 
+	hdr = (struct ath10k_htc_hdr *)skb->data;
 	ath10k_htc_restore_tx_skb(ep->htc, skb);
 
 	if (!ep->ep_ops.ep_tx_complete) {
@@ -63,6 +65,11 @@ void ath10k_htc_notify_tx_completion(struct ath10k_htc_ep *ep,
 		return;
 	}
 
+	if (hdr->flags & ATH10K_HTC_FLAG_SEND_BUNDLE) {
+		dev_kfree_skb_any(skb);
+		return;
+	}
+
 	ep->ep_ops.ep_tx_complete(ep->htc->ar, skb);
 }
 EXPORT_SYMBOL(ath10k_htc_notify_tx_completion);
@@ -78,7 +85,7 @@ static void ath10k_htc_prepare_tx_skb(struct ath10k_htc_ep *ep,
 	hdr->eid = ep->eid;
 	hdr->len = __cpu_to_le16(skb->len - sizeof(*hdr));
 	hdr->flags = 0;
-	if (ep->tx_credit_flow_enabled)
+	if (ep->tx_credit_flow_enabled && !ep->bundle_tx)
 		hdr->flags |= ATH10K_HTC_FLAG_NEED_CREDIT_UPDATE;
 
 	spin_lock_bh(&ep->htc->tx_lock);
@@ -86,6 +93,63 @@ static void ath10k_htc_prepare_tx_skb(struct ath10k_htc_ep *ep,
 	spin_unlock_bh(&ep->htc->tx_lock);
 }
 
+static int ath10k_htc_consume_credit(struct ath10k_htc_ep *ep,
+				     unsigned int len,
+				     bool consume)
+{
+	struct ath10k_htc *htc = ep->htc;
+	struct ath10k *ar = htc->ar;
+	enum ath10k_htc_ep_id eid = ep->eid;
+	int credits, ret = 0;
+
+	if (!ep->tx_credit_flow_enabled)
+		return 0;
+
+	credits = DIV_ROUND_UP(len, ep->tx_credit_size);
+	spin_lock_bh(&htc->tx_lock);
+
+	if (ep->tx_credits < credits) {
+		ath10k_dbg(ar, ATH10K_DBG_HTC,
+			   "htc insufficient credits ep %d required %d available %d consume %d\n",
+			   eid, credits, ep->tx_credits, consume);
+		ret = -EAGAIN;
+		goto unlock;
+	}
+
+	if (consume) {
+		ep->tx_credits -= credits;
+		ath10k_dbg(ar, ATH10K_DBG_HTC,
+			   "htc ep %d consumed %d credits total %d\n",
+			   eid, credits, ep->tx_credits);
+	}
+
+unlock:
+	spin_unlock_bh(&htc->tx_lock);
+	return ret;
+}
+
+static void ath10k_htc_release_credit(struct ath10k_htc_ep *ep, unsigned int len)
+{
+	struct ath10k_htc *htc = ep->htc;
+	struct ath10k *ar = htc->ar;
+	enum ath10k_htc_ep_id eid = ep->eid;
+	int credits;
+
+	if (!ep->tx_credit_flow_enabled)
+		return;
+
+	credits = DIV_ROUND_UP(len, ep->tx_credit_size);
+	spin_lock_bh(&htc->tx_lock);
+	ep->tx_credits += credits;
+	ath10k_dbg(ar, ATH10K_DBG_HTC,
+		   "htc ep %d reverted %d credits back total %d\n",
+		   eid, credits, ep->tx_credits);
+	spin_unlock_bh(&htc->tx_lock);
+
+	if (ep->ep_ops.ep_tx_credits)
+		ep->ep_ops.ep_tx_credits(htc->ar);
+}
+
 int ath10k_htc_send(struct ath10k_htc *htc,
 		    enum ath10k_htc_ep_id eid,
 		    struct sk_buff *skb)
@@ -95,8 +159,8 @@ int ath10k_htc_send(struct ath10k_htc *htc,
 	struct ath10k_skb_cb *skb_cb = ATH10K_SKB_CB(skb);
 	struct ath10k_hif_sg_item sg_item;
 	struct device *dev = htc->ar->dev;
-	int credits = 0;
 	int ret;
+	unsigned int skb_len;
 
 	if (htc->ar->state == ATH10K_STATE_WEDGED)
 		return -ECOMM;
@@ -108,23 +172,10 @@ int ath10k_htc_send(struct ath10k_htc *htc,
 
 	skb_push(skb, sizeof(struct ath10k_htc_hdr));
 
-	if (ep->tx_credit_flow_enabled) {
-		credits = DIV_ROUND_UP(skb->len, htc->target_credit_size);
-		spin_lock_bh(&htc->tx_lock);
-		if (ep->tx_credits < credits) {
-			ath10k_dbg(ar, ATH10K_DBG_HTC,
-				   "htc insufficient credits ep %d required %d available %d\n",
-				   eid, credits, ep->tx_credits);
-			spin_unlock_bh(&htc->tx_lock);
-			ret = -EAGAIN;
-			goto err_pull;
-		}
-		ep->tx_credits -= credits;
-		ath10k_dbg(ar, ATH10K_DBG_HTC,
-			   "htc ep %d consumed %d credits (total %d)\n",
-			   eid, credits, ep->tx_credits);
-		spin_unlock_bh(&htc->tx_lock);
-	}
+	skb_len = skb->len;
+	ret = ath10k_htc_consume_credit(ep, skb_len, true);
+	if (ret)
+		goto err_pull;
 
 	ath10k_htc_prepare_tx_skb(ep, skb);
 
@@ -155,17 +206,7 @@ err_unmap:
 	if (ar->bus_param.dev_type != ATH10K_DEV_TYPE_HL)
 		dma_unmap_single(dev, skb_cb->paddr, skb->len, DMA_TO_DEVICE);
 err_credits:
-	if (ep->tx_credit_flow_enabled) {
-		spin_lock_bh(&htc->tx_lock);
-		ep->tx_credits += credits;
-		ath10k_dbg(ar, ATH10K_DBG_HTC,
-			   "htc ep %d reverted %d credits back (total %d)\n",
-			   eid, credits, ep->tx_credits);
-		spin_unlock_bh(&htc->tx_lock);
-
-		if (ep->ep_ops.ep_tx_credits)
-			ep->ep_ops.ep_tx_credits(htc->ar);
-	}
+	ath10k_htc_release_credit(ep, skb_len);
 err_pull:
 	skb_pull(skb, sizeof(struct ath10k_htc_hdr));
 	return ret;
@@ -581,6 +622,273 @@ static u8 ath10k_htc_get_credit_allocation(struct ath10k_htc *htc,
 	return allocation;
 }
 
+static int ath10k_htc_send_bundle(struct ath10k_htc_ep *ep,
+				  struct sk_buff *bundle_skb,
+				  struct sk_buff_head *tx_save_head)
+{
+	struct ath10k_hif_sg_item sg_item;
+	struct ath10k_htc *htc = ep->htc;
+	struct ath10k *ar = htc->ar;
+	struct sk_buff *skb;
+	int ret, cn = 0;
+	unsigned int skb_len;
+
+	ath10k_dbg(ar, ATH10K_DBG_HTC, "bundle skb len %d\n", bundle_skb->len);
+	skb_len = bundle_skb->len;
+	ret = ath10k_htc_consume_credit(ep, skb_len, true);
+
+	if (!ret) {
+		sg_item.transfer_id = ep->eid;
+		sg_item.transfer_context = bundle_skb;
+		sg_item.vaddr = bundle_skb->data;
+		sg_item.len = bundle_skb->len;
+
+		ret = ath10k_hif_tx_sg(htc->ar, ep->ul_pipe_id, &sg_item, 1);
+		if (ret)
+			ath10k_htc_release_credit(ep, skb_len);
+	}
+
+	if (ret)
+		dev_kfree_skb_any(bundle_skb);
+
+	for (cn = 0; (skb = skb_dequeue_tail(tx_save_head)); cn++) {
+		if (ret) {
+			skb_pull(skb, sizeof(struct ath10k_htc_hdr));
+			skb_queue_head(&ep->tx_req_head, skb);
+		} else {
+			skb_queue_tail(&ep->tx_complete_head, skb);
+		}
+	}
+
+	if (!ret)
+		queue_work(ar->workqueue_tx_complete, &ar->tx_complete_work);
+
+	ath10k_dbg(ar, ATH10K_DBG_HTC,
+		   "bundle tx status %d eid %d req count %d count %d len %d\n",
+		   ret, ep->eid, skb_queue_len(&ep->tx_req_head), cn, bundle_skb->len);
+	return ret;
+}
+
+static void ath10k_htc_send_one_skb(struct ath10k_htc_ep *ep, struct sk_buff *skb)
+{
+	struct ath10k_htc *htc = ep->htc;
+	struct ath10k *ar = htc->ar;
+	int ret;
+
+	ret = ath10k_htc_send(htc, ep->eid, skb);
+
+	if (ret)
+		skb_queue_head(&ep->tx_req_head, skb);
+
+	ath10k_dbg(ar, ATH10K_DBG_HTC, "tx one status %d eid %d len %d pending count %d\n",
+		   ret, ep->eid, skb->len, skb_queue_len(&ep->tx_req_head));
+}
+
+static int ath10k_htc_send_bundle_skbs(struct ath10k_htc_ep *ep)
+{
+	struct ath10k_htc *htc = ep->htc;
+	struct sk_buff *bundle_skb, *skb;
+	struct sk_buff_head tx_save_head;
+	struct ath10k_htc_hdr *hdr;
+	u8 *bundle_buf;
+	int ret = 0, credit_pad, credit_remainder, trans_len, bundles_left = 0;
+
+	if (htc->ar->state == ATH10K_STATE_WEDGED)
+		return -ECOMM;
+
+	if (ep->tx_credit_flow_enabled &&
+	    ep->tx_credits < ATH10K_MIN_CREDIT_PER_HTC_TX_BUNDLE)
+		return 0;
+
+	bundles_left = ATH10K_MAX_MSG_PER_HTC_TX_BUNDLE * ep->tx_credit_size;
+	bundle_skb = dev_alloc_skb(bundles_left);
+
+	if (!bundle_skb)
+		return -ENOMEM;
+
+	bundle_buf = bundle_skb->data;
+	skb_queue_head_init(&tx_save_head);
+
+	while (true) {
+		skb = skb_dequeue(&ep->tx_req_head);
+		if (!skb)
+			break;
+
+		credit_pad = 0;
+		trans_len = skb->len + sizeof(*hdr);
+		credit_remainder = trans_len % ep->tx_credit_size;
+
+		if (credit_remainder != 0) {
+			credit_pad = ep->tx_credit_size - credit_remainder;
+			trans_len += credit_pad;
+		}
+
+		ret = ath10k_htc_consume_credit(ep,
+						bundle_buf + trans_len - bundle_skb->data,
+						false);
+		if (ret) {
+			skb_queue_head(&ep->tx_req_head, skb);
+			break;
+		}
+
+		if (bundles_left < trans_len) {
+			bundle_skb->len = bundle_buf - bundle_skb->data;
+			ret = ath10k_htc_send_bundle(ep, bundle_skb, &tx_save_head);
+
+			if (ret) {
+				skb_queue_head(&ep->tx_req_head, skb);
+				return ret;
+			}
+
+			if (skb_queue_len(&ep->tx_req_head) == 0) {
+				ath10k_htc_send_one_skb(ep, skb);
+				return ret;
+			}
+
+			if (ep->tx_credit_flow_enabled &&
+			    ep->tx_credits < ATH10K_MIN_CREDIT_PER_HTC_TX_BUNDLE) {
+				skb_queue_head(&ep->tx_req_head, skb);
+				return 0;
+			}
+
+			bundles_left =
+				ATH10K_MAX_MSG_PER_HTC_TX_BUNDLE * ep->tx_credit_size;
+			bundle_skb = dev_alloc_skb(bundles_left);
+
+			if (!bundle_skb) {
+				skb_queue_head(&ep->tx_req_head, skb);
+				return -ENOMEM;
+			}
+			bundle_buf = bundle_skb->data;
+			skb_queue_head_init(&tx_save_head);
+		}
+
+		skb_push(skb, sizeof(struct ath10k_htc_hdr));
+		ath10k_htc_prepare_tx_skb(ep, skb);
+
+		memcpy(bundle_buf, skb->data, skb->len);
+		hdr = (struct ath10k_htc_hdr *)bundle_buf;
+		hdr->flags |= ATH10K_HTC_FLAG_SEND_BUNDLE;
+		hdr->pad_len = __cpu_to_le16(credit_pad);
+		bundle_buf += trans_len;
+		bundles_left -= trans_len;
+		skb_queue_tail(&tx_save_head, skb);
+	}
+
+	if (bundle_buf != bundle_skb->data) {
+		bundle_skb->len = bundle_buf - bundle_skb->data;
+		ret = ath10k_htc_send_bundle(ep, bundle_skb, &tx_save_head);
+	} else {
+		dev_kfree_skb_any(bundle_skb);
+	}
+
+	return ret;
+}
+
+static void ath10k_htc_bundle_tx_work(struct work_struct *work)
+{
+	struct ath10k *ar = container_of(work, struct ath10k, bundle_tx_work);
+	struct ath10k_htc_ep *ep;
+	struct sk_buff *skb;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(ar->htc.endpoint); i++) {
+		ep = &ar->htc.endpoint[i];
+
+		if (!ep->bundle_tx)
+			continue;
+
+		ath10k_dbg(ar, ATH10K_DBG_HTC, "bundle tx work eid %d count %d\n",
+			   ep->eid, skb_queue_len(&ep->tx_req_head));
+
+		if (skb_queue_len(&ep->tx_req_head) >=
+		    ATH10K_MIN_MSG_PER_HTC_TX_BUNDLE) {
+			ath10k_htc_send_bundle_skbs(ep);
+		} else {
+			skb = skb_dequeue(&ep->tx_req_head);
+
+			if (!skb)
+				continue;
+			ath10k_htc_send_one_skb(ep, skb);
+		}
+	}
+}
+
+static void ath10k_htc_tx_complete_work(struct work_struct *work)
+{
+	struct ath10k *ar = container_of(work, struct ath10k, tx_complete_work);
+	struct ath10k_htc_ep *ep;
+	enum ath10k_htc_ep_id eid;
+	struct sk_buff *skb;
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(ar->htc.endpoint); i++) {
+		ep = &ar->htc.endpoint[i];
+		eid = ep->eid;
+		if (ep->bundle_tx && eid == ar->htt.eid) {
+			ath10k_dbg(ar, ATH10K_DBG_HTC, "bundle tx complete eid %d pending complete count%d\n",
+				   ep->eid, skb_queue_len(&ep->tx_complete_head));
+
+			while (true) {
+				skb = skb_dequeue(&ep->tx_complete_head);
+				if (!skb)
+					break;
+				ath10k_htc_notify_tx_completion(ep, skb);
+			}
+		}
+	}
+}
+
+int ath10k_htc_send_hl(struct ath10k_htc *htc,
+		       enum ath10k_htc_ep_id eid,
+		       struct sk_buff *skb)
+{
+	struct ath10k_htc_ep *ep = &htc->endpoint[eid];
+	struct ath10k *ar = htc->ar;
+
+	ath10k_dbg(ar, ATH10K_DBG_HTC, "htc send hl eid %d bundle %d tx count %d len %d\n",
+		   eid, ep->bundle_tx, skb_queue_len(&ep->tx_req_head), skb->len);
+
+	if (ep->bundle_tx) {
+		skb_queue_tail(&ep->tx_req_head, skb);
+		queue_work(ar->workqueue, &ar->bundle_tx_work);
+		return 0;
+	} else {
+		return ath10k_htc_send(htc, eid, skb);
+	}
+}
+
+void ath10k_htc_setup_tx_req(struct ath10k_htc_ep *ep)
+{
+	if (ep->htc->max_msgs_per_htc_bundle >= ATH10K_MIN_MSG_PER_HTC_TX_BUNDLE &&
+	    !ep->bundle_tx) {
+		ep->bundle_tx = true;
+		skb_queue_head_init(&ep->tx_req_head);
+		skb_queue_head_init(&ep->tx_complete_head);
+	}
+}
+
+void ath10k_htc_stop_hl(struct ath10k *ar)
+{
+	struct ath10k_htc_ep *ep;
+	int i;
+
+	cancel_work_sync(&ar->bundle_tx_work);
+	cancel_work_sync(&ar->tx_complete_work);
+
+	for (i = 0; i < ARRAY_SIZE(ar->htc.endpoint); i++) {
+		ep = &ar->htc.endpoint[i];
+
+		if (!ep->bundle_tx)
+			continue;
+
+		ath10k_dbg(ar, ATH10K_DBG_HTC, "stop tx work eid %d count %d\n",
+			   ep->eid, skb_queue_len(&ep->tx_req_head));
+
+		skb_queue_purge(&ep->tx_req_head);
+	}
+}
+
 int ath10k_htc_wait_target(struct ath10k_htc *htc)
 {
 	struct ath10k *ar = htc->ar;
@@ -657,6 +965,9 @@ int ath10k_htc_wait_target(struct ath10k_htc *htc)
 			   htc->max_msgs_per_htc_bundle);
 	}
 
+	INIT_WORK(&ar->bundle_tx_work, ath10k_htc_bundle_tx_work);
+	INIT_WORK(&ar->tx_complete_work, ath10k_htc_tx_complete_work);
+
 	return 0;
 }
 
@@ -801,6 +1112,7 @@ setup:
 	ep->max_tx_queue_depth = conn_req->max_send_queue_depth;
 	ep->max_ep_message_len = __le16_to_cpu(resp_msg->max_msg_size);
 	ep->tx_credits = tx_alloc;
+	ep->tx_credit_size = htc->target_credit_size;
 
 	/* copy all the callbacks */
 	ep->ep_ops = conn_req->ep_ops;
diff --git a/drivers/net/wireless/ath/ath10k/htc.h b/drivers/net/wireless/ath/ath10k/htc.h
index 14e5c3f712c1..d045dbc42158 100644
--- a/drivers/net/wireless/ath/ath10k/htc.h
+++ b/drivers/net/wireless/ath/ath10k/htc.h
@@ -83,8 +83,14 @@ struct ath10k_htc_hdr {
 		u8 seq_no; /* for tx */
 		u8 control_byte1;
 	} __packed;
-	u8 pad0;
-	u8 pad1;
+	union {
+		__le16 pad_len;
+		struct {
+			u8 pad0;
+			u8 pad1;
+		} __packed;
+	} __packed;
+
 } __packed __aligned(4);
 
 enum ath10k_ath10k_htc_msg_id {
@@ -121,6 +127,10 @@ enum ath10k_htc_conn_svc_status {
 	ATH10K_HTC_CONN_SVC_STATUS_NO_MORE_EP   = 4
 };
 
+#define ATH10K_MAX_MSG_PER_HTC_TX_BUNDLE        32
+#define ATH10K_MIN_MSG_PER_HTC_TX_BUNDLE        2
+#define ATH10K_MIN_CREDIT_PER_HTC_TX_BUNDLE     2
+
 enum ath10k_htc_setup_complete_flags {
 	ATH10K_HTC_SETUP_COMPLETE_FLAGS_RX_BNDL_EN = 1
 };
@@ -353,7 +363,12 @@ struct ath10k_htc_ep {
 
 	u8 seq_no; /* for debugging */
 	int tx_credits;
+	int tx_credit_size;
 	bool tx_credit_flow_enabled;
+	bool bundle_tx;
+	struct sk_buff_head tx_req_head;
+	struct sk_buff_head tx_complete_head;
+
 };
 
 struct ath10k_htc_svc_tx_credits {
@@ -382,6 +397,7 @@ struct ath10k_htc {
 
 int ath10k_htc_init(struct ath10k *ar);
 int ath10k_htc_wait_target(struct ath10k_htc *htc);
+void ath10k_htc_setup_tx_req(struct ath10k_htc_ep *ep);
 int ath10k_htc_start(struct ath10k_htc *htc);
 int ath10k_htc_connect_service(struct ath10k_htc *htc,
 			       struct ath10k_htc_svc_conn_req  *conn_req,
@@ -391,6 +407,10 @@ void ath10k_htc_change_tx_credit_flow(struct ath10k_htc *htc,
 				      bool enable);
 int ath10k_htc_send(struct ath10k_htc *htc, enum ath10k_htc_ep_id eid,
 		    struct sk_buff *packet);
+void ath10k_htc_stop_hl(struct ath10k *ar);
+
+int ath10k_htc_send_hl(struct ath10k_htc *htc, enum ath10k_htc_ep_id eid,
+		       struct sk_buff *packet);
 struct sk_buff *ath10k_htc_alloc_skb(struct ath10k *ar, int size);
 void ath10k_htc_tx_completion_handler(struct ath10k *ar, struct sk_buff *skb);
 void ath10k_htc_rx_completion_handler(struct ath10k *ar, struct sk_buff *skb);
diff --git a/drivers/net/wireless/ath/ath10k/htt.c b/drivers/net/wireless/ath/ath10k/htt.c
index 4354bf285ff1..127b4e4980ef 100644
--- a/drivers/net/wireless/ath/ath10k/htt.c
+++ b/drivers/net/wireless/ath/ath10k/htt.c
@@ -135,6 +135,8 @@ int ath10k_htt_connect(struct ath10k_htt *htt)
 {
 	struct ath10k_htc_svc_conn_req conn_req;
 	struct ath10k_htc_svc_conn_resp conn_resp;
+	struct ath10k *ar = htt->ar;
+	struct ath10k_htc_ep *ep;
 	int status;
 
 	memset(&conn_req, 0, sizeof(conn_req));
@@ -142,6 +144,7 @@ int ath10k_htt_connect(struct ath10k_htt *htt)
 
 	conn_req.ep_ops.ep_tx_complete = ath10k_htt_htc_tx_complete;
 	conn_req.ep_ops.ep_rx_complete = ath10k_htt_htc_t2h_msg_handler;
+	conn_req.ep_ops.ep_tx_credits = ath10k_htt_op_ep_tx_credits;
 
 	/* connect to control service */
 	conn_req.service_id = ATH10K_HTC_SVC_ID_HTT_DATA_MSG;
@@ -154,6 +157,11 @@ int ath10k_htt_connect(struct ath10k_htt *htt)
 
 	htt->eid = conn_resp.eid;
 
+	if (ar->bus_param.dev_type == ATH10K_DEV_TYPE_HL) {
+		ep = &ar->htc.endpoint[htt->eid];
+		ath10k_htc_setup_tx_req(ep);
+	}
+
 	htt->disable_tx_comp = ath10k_hif_get_htt_tx_complete(htt->ar);
 	if (htt->disable_tx_comp)
 		ath10k_htc_change_tx_credit_flow(&htt->ar->htc, htt->eid, true);
diff --git a/drivers/net/wireless/ath/ath10k/htt.h b/drivers/net/wireless/ath/ath10k/htt.h
index b88c2f3787d8..462a25b3056d 100644
--- a/drivers/net/wireless/ath/ath10k/htt.h
+++ b/drivers/net/wireless/ath/ath10k/htt.h
@@ -2032,6 +2032,9 @@ struct ath10k_htt {
 	const struct ath10k_htt_tx_ops *tx_ops;
 	const struct ath10k_htt_rx_ops *rx_ops;
 	bool disable_tx_comp;
+	bool bundle_tx;
+	struct sk_buff_head tx_req_head;
+	struct sk_buff_head tx_complete_head;
 };
 
 struct ath10k_htt_tx_ops {
@@ -2278,6 +2281,7 @@ int ath10k_htt_tx_fetch_resp(struct ath10k *ar,
 			     __le16 fetch_seq_num,
 			     struct htt_tx_fetch_record *records,
 			     size_t num_records);
+void ath10k_htt_op_ep_tx_credits(struct ath10k *ar);
 
 void ath10k_htt_tx_txq_update(struct ieee80211_hw *hw,
 			      struct ieee80211_txq *txq);
diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c
index 64e45bfa5d05..816af1a8ad69 100644
--- a/drivers/net/wireless/ath/ath10k/htt_rx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
@@ -3919,6 +3919,7 @@ bool ath10k_htt_t2h_msg_handler(struct ath10k *ar, struct sk_buff *skb)
 			ath10k_dbg(ar, ATH10K_DBG_HTT,
 				   "htt credit total %d\n",
 				   ep->tx_credits);
+			ep->ep_ops.ep_tx_credits(htc->ar);
 		}
 		break;
 	}
diff --git a/drivers/net/wireless/ath/ath10k/htt_tx.c b/drivers/net/wireless/ath/ath10k/htt_tx.c
index bcecf05fe2fd..ff044426c337 100644
--- a/drivers/net/wireless/ath/ath10k/htt_tx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_tx.c
@@ -531,6 +531,7 @@ void ath10k_htt_tx_destroy(struct ath10k_htt *htt)
 
 void ath10k_htt_tx_stop(struct ath10k_htt *htt)
 {
+	ath10k_htc_stop_hl(htt->ar);
 	idr_for_each(&htt->pending_tx, ath10k_htt_tx_clean_up_pending, htt->ar);
 	idr_destroy(&htt->pending_tx);
 }
@@ -541,6 +542,11 @@ void ath10k_htt_tx_free(struct ath10k_htt *htt)
 	ath10k_htt_tx_destroy(htt);
 }
 
+void ath10k_htt_op_ep_tx_credits(struct ath10k *ar)
+{
+	queue_work(ar->workqueue, &ar->bundle_tx_work);
+}
+
 void ath10k_htt_htc_tx_complete(struct ath10k *ar, struct sk_buff *skb)
 {
 	struct ath10k_htt *htt = &ar->htt;
@@ -1379,7 +1385,7 @@ static int ath10k_htt_tx_hl(struct ath10k_htt *htt, enum ath10k_hw_txrx_mode txm
 	 */
 	tx_desc->peerid = __cpu_to_le32(HTT_INVALID_PEERID);
 
-	res = ath10k_htc_send(&htt->ar->htc, htt->eid, msdu);
+	res = ath10k_htc_send_hl(&htt->ar->htc, htt->eid, msdu);
 
 out:
 	return res;
-- 
cgit v1.2.3-59-g8ed1b


From 2f918ea98606100f3a6d47db7ff7c200838ec4f3 Mon Sep 17 00:00:00 2001
From: Wen Gong <wgong@codeaurora.org>
Date: Tue, 21 Apr 2020 15:09:35 +0300
Subject: ath10k: enable alt data of TX path for sdio

The default credit size is 1792 bytes, but the IP mtu is 1500 bytes,
then it has about 290 bytes's waste for each data packet on sdio
transfer path for TX bundle, it will reduce the transmission utilization
ratio for data packet.

This patch enable the small credit size in firmware, firmware will use
the new credit size 1556 bytes, it will increase the transmission
utilization ratio for data packet on TX patch. It results in significant
performance improvement on TX path.

This patch only effect sdio chip, it will not effect PCI, SNOC etc.

Tested with QCA6174 SDIO with firmware
WLAN.RMH.4.4.1-00017-QCARMSWP-1.

Signed-off-by: Wen Gong <wgong@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200410061400.14231-3-wgong@codeaurora.org
---
 drivers/net/wireless/ath/ath10k/core.c |  8 ++++----
 drivers/net/wireless/ath/ath10k/htc.c  | 12 ++++++++++--
 drivers/net/wireless/ath/ath10k/htc.h  | 13 +++++++++++--
 3 files changed, 25 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c
index 8689c330fdd9..d96d178b4980 100644
--- a/drivers/net/wireless/ath/ath10k/core.c
+++ b/drivers/net/wireless/ath/ath10k/core.c
@@ -725,10 +725,10 @@ static int ath10k_init_sdio(struct ath10k *ar, enum ath10k_firmware_mode mode)
 
 	param |= HI_ACS_FLAGS_SDIO_REDUCE_TX_COMPL_SET;
 
-	/* Alternate credit size of 1544 as used by SDIO firmware is
-	 * not big enough for mac80211 / native wifi frames. disable it
-	 */
-	param &= ~HI_ACS_FLAGS_ALT_DATA_CREDIT_SIZE;
+	if (mode == ATH10K_FIRMWARE_MODE_NORMAL)
+		param |= HI_ACS_FLAGS_ALT_DATA_CREDIT_SIZE;
+	else
+		param &= ~HI_ACS_FLAGS_ALT_DATA_CREDIT_SIZE;
 
 	if (mode == ATH10K_FIRMWARE_MODE_UTF)
 		param &= ~HI_ACS_FLAGS_SDIO_SWAP_MAILBOX_SET;
diff --git a/drivers/net/wireless/ath/ath10k/htc.c b/drivers/net/wireless/ath/ath10k/htc.c
index ed4e0add997e..58ceba75d20a 100644
--- a/drivers/net/wireless/ath/ath10k/htc.c
+++ b/drivers/net/wireless/ath/ath10k/htc.c
@@ -957,12 +957,16 @@ int ath10k_htc_wait_target(struct ath10k_htc *htc)
 	 */
 	if (htc->control_resp_len >=
 	    sizeof(msg->hdr) + sizeof(msg->ready_ext)) {
+		htc->alt_data_credit_size =
+			__le16_to_cpu(msg->ready_ext.reserved) &
+			ATH10K_HTC_MSG_READY_EXT_ALT_DATA_MASK;
 		htc->max_msgs_per_htc_bundle =
 			min_t(u8, msg->ready_ext.max_msgs_per_htc_bundle,
 			      HTC_HOST_MAX_MSG_PER_RX_BUNDLE);
 		ath10k_dbg(ar, ATH10K_DBG_HTC,
-			   "Extended ready message. RX bundle size: %d\n",
-			   htc->max_msgs_per_htc_bundle);
+			   "Extended ready message RX bundle size %d alt size %d\n",
+			   htc->max_msgs_per_htc_bundle,
+			   htc->alt_data_credit_size);
 	}
 
 	INIT_WORK(&ar->bundle_tx_work, ath10k_htc_bundle_tx_work);
@@ -1114,6 +1118,10 @@ setup:
 	ep->tx_credits = tx_alloc;
 	ep->tx_credit_size = htc->target_credit_size;
 
+	if (conn_req->service_id == ATH10K_HTC_SVC_ID_HTT_DATA_MSG &&
+	    htc->alt_data_credit_size != 0)
+		ep->tx_credit_size = htc->alt_data_credit_size;
+
 	/* copy all the callbacks */
 	ep->ep_ops = conn_req->ep_ops;
 
diff --git a/drivers/net/wireless/ath/ath10k/htc.h b/drivers/net/wireless/ath/ath10k/htc.h
index d045dbc42158..0d180faf3b77 100644
--- a/drivers/net/wireless/ath/ath10k/htc.h
+++ b/drivers/net/wireless/ath/ath10k/htc.h
@@ -119,6 +119,8 @@ enum ath10k_htc_conn_flags {
 #define ATH10K_HTC_CONN_FLAGS_RECV_ALLOC_LSB  8
 };
 
+#define ATH10K_HTC_MSG_READY_EXT_ALT_DATA_MASK 0xFFF
+
 enum ath10k_htc_conn_svc_status {
 	ATH10K_HTC_CONN_SVC_STATUS_SUCCESS      = 0,
 	ATH10K_HTC_CONN_SVC_STATUS_NOT_FOUND    = 1,
@@ -155,8 +157,14 @@ struct ath10k_htc_ready_extended {
 	struct ath10k_htc_ready base;
 	u8 htc_version; /* @enum ath10k_htc_version */
 	u8 max_msgs_per_htc_bundle;
-	u8 pad0;
-	u8 pad1;
+	union {
+		__le16 reserved;
+		struct {
+			u8 pad0;
+			u8 pad1;
+		} __packed;
+	} __packed;
+
 } __packed;
 
 struct ath10k_htc_conn_svc {
@@ -393,6 +401,7 @@ struct ath10k_htc {
 	int total_transmit_credits;
 	int target_credit_size;
 	u8 max_msgs_per_htc_bundle;
+	int alt_data_credit_size;
 };
 
 int ath10k_htc_init(struct ath10k *ar);
-- 
cgit v1.2.3-59-g8ed1b


From dd7fc5545bbafdbd6c1efdc996b61883b285bdc5 Mon Sep 17 00:00:00 2001
From: Wen Gong <wgong@codeaurora.org>
Date: Tue, 21 Apr 2020 15:09:35 +0300
Subject: ath10k: add flush tx packets for SDIO chip

When station connected to AP, and run TX traffic such as TCP/UDP, and
system enter suspend state, then mac80211 call ath10k_flush with set
drop flag, recently it only send wmi peer flush to firmware and
firmware will flush all pending TX packets, for PCIe, firmware will
indicate the TX packets status to ath10k, and then ath10k indicate to
mac80211 TX complete with the status, then all the packets has been
flushed at this moment. For SDIO chip, it is different, its TX
complete indication is disabled by default, and it has a tx queue in
ath10k, and its tx credit control is enabled, total tx credit is 96,
when its credit is not sufficient, then the packets will buffered in
the tx queue of ath10k, max packets is TARGET_TLV_NUM_MSDU_DESC_HL
which is 1024, for SDIO, when mac80211 call ath10k_flush with set drop
flag, maybe it have pending packets in tx queue of ath10k, and if it
does not have sufficient tx credit, the packets will stay in queue
untill tx credit report from firmware, if it is a noisy environment,
tx speed is low and the tx credit report from firmware will delay more
time, then the num_pending_tx will remain > 0 untill all packets send
to firmware. After the 1st ath10k_flush, mac80211 will call the 2nd
ath10k_flush without set drop flag immediately, then it will call to
ath10k_mac_wait_tx_complete, and it wait untill num_pending_tx become
to 0, in noisy environment, it is esay to wait about near 5 seconds,
then it cause the suspend take long time.

1st and 2nd callstack of ath10k_flush
[  303.740427] ath10k_sdio mmc1:0001:1: ath10k_flush drop:1, pending:0-0
[  303.740495] ------------[ cut here ]------------
[  303.740739] WARNING: CPU: 1 PID: 3921 at /mnt/host/source/src/third_party/kernel/v4.19/drivers/net/wireless/ath/ath10k/mac.c:7025 ath10k_flush+0x54/0x104 [ath10k_core]
[  303.740757] Modules linked in: bridge stp llc ath10k_sdio ath10k_core rfcomm uinput cros_ec_rpmsg mtk_seninf mtk_cam_isp mtk_vcodec_enc mtk_fd mtk_vcodec_dec mtk_vcodec_common mtk_dip mtk_mdp3 videobuf2_dma_contig videobuf2_memops v4l2_mem2mem videobuf2_v4l2 videobuf2_common hid_google_hammer hci_uart btqca bluetooth dw9768 ov8856 ecdh_generic ov02a10 v4l2_fwnode mtk_scp mtk_rpmsg rpmsg_core mtk_scp_ipi ipt_MASQUERADE fuse iio_trig_sysfs cros_ec_sensors_ring cros_ec_sensors_sync cros_ec_light_prox cros_ec_sensors industrialio_triggered_buffer
[  303.740914]  kfifo_buf cros_ec_activity cros_ec_sensors_core lzo_rle lzo_compress ath mac80211 zram cfg80211 joydev [last unloaded: ath10k_core]
[  303.741009] CPU: 1 PID: 3921 Comm: kworker/u16:10 Tainted: G        W         4.19.95 #2
[  303.741027] Hardware name: MediaTek krane sku176 board (DT)
[  303.741061] Workqueue: events_unbound async_run_entry_fn
[  303.741086] pstate: 60000005 (nZCv daif -PAN -UAO)
[  303.741166] pc : ath10k_flush+0x54/0x104 [ath10k_core]
[  303.741244] lr : ath10k_flush+0x54/0x104 [ath10k_core]
[  303.741260] sp : ffffffdf080e77a0
[  303.741276] x29: ffffffdf080e77a0 x28: ffffffdef3730040
[  303.741300] x27: ffffff907c2240a0 x26: ffffffde6ff39afc
[  303.741321] x25: ffffffdef3730040 x24: ffffff907bf61018
[  303.741343] x23: ffffff907c2240a0 x22: ffffffde6ff39a50
[  303.741364] x21: 0000000000000001 x20: ffffffde6ff39a50
[  303.741385] x19: ffffffde6bac2420 x18: 0000000000017200
[  303.741407] x17: ffffff907c24a000 x16: 0000000000000037
[  303.741428] x15: ffffff907b49a568 x14: ffffff907cf332c1
[  303.741476] x13: 00000000000922e4 x12: 0000000000000000
[  303.741497] x11: 0000000000000001 x10: 0000000000000007
[  303.741518] x9 : f2256b8c1de4bc00 x8 : f2256b8c1de4bc00
[  303.741539] x7 : ffffff907ab5e764 x6 : 0000000000000000
[  303.741560] x5 : 0000000000000080 x4 : 0000000000000001
[  303.741582] x3 : ffffffdf080e74a8 x2 : ffffff907aa91244
[  303.741603] x1 : ffffffdf080e74a8 x0 : 0000000000000024
[  303.741624] Call trace:
[  303.741701]  ath10k_flush+0x54/0x104 [ath10k_core]
[  303.741941]  __ieee80211_flush_queues+0x1dc/0x358 [mac80211]
[  303.742098]  ieee80211_flush_queues+0x34/0x44 [mac80211]
[  303.742253]  ieee80211_set_disassoc+0xc0/0x5ec [mac80211]
[  303.742399]  ieee80211_mgd_deauth+0x720/0x7d4 [mac80211]
[  303.742535]  ieee80211_deauth+0x24/0x30 [mac80211]
[  303.742720]  cfg80211_mlme_deauth+0x250/0x3bc [cfg80211]
[  303.742849]  cfg80211_mlme_down+0x90/0xd0 [cfg80211]
[  303.742971]  cfg80211_disconnect+0x340/0x3a0 [cfg80211]
[  303.743087]  __cfg80211_leave+0xe4/0x17c [cfg80211]
[  303.743203]  cfg80211_leave+0x38/0x50 [cfg80211]
[  303.743319]  wiphy_suspend+0x84/0x5bc [cfg80211]
[  303.743335]  dpm_run_callback+0x170/0x304
[  303.743346]  __device_suspend+0x2dc/0x3e8
[  303.743356]  async_suspend+0x2c/0xb0
[  303.743370]  async_run_entry_fn+0x48/0xf8
[  303.743383]  process_one_work+0x304/0x604
[  303.743394]  worker_thread+0x248/0x3f4
[  303.743403]  kthread+0x120/0x130
[  303.743416]  ret_from_fork+0x10/0x18

[  303.743812] ath10k_sdio mmc1:0001:1: ath10k_flush drop:0, pending:0-0
[  303.743858] ------------[ cut here ]------------
[  303.744057] WARNING: CPU: 1 PID: 3921 at /mnt/host/source/src/third_party/kernel/v4.19/drivers/net/wireless/ath/ath10k/mac.c:7025 ath10k_flush+0x54/0x104 [ath10k_core]
[  303.744075] Modules linked in: bridge stp llc ath10k_sdio ath10k_core rfcomm uinput cros_ec_rpmsg mtk_seninf mtk_cam_isp mtk_vcodec_enc mtk_fd mtk_vcodec_dec mtk_vcodec_common mtk_dip mtk_mdp3 videobuf2_dma_contig videobuf2_memops v4l2_mem2mem videobuf2_v4l2 videobuf2_common hid_google_hammer hci_uart btqca bluetooth dw9768 ov8856 ecdh_generic ov02a10 v4l2_fwnode mtk_scp mtk_rpmsg rpmsg_core mtk_scp_ipi ipt_MASQUERADE fuse iio_trig_sysfs cros_ec_sensors_ring cros_ec_sensors_sync cros_ec_light_prox cros_ec_sensors industrialio_triggered_buffer kfifo_buf cros_ec_activity cros_ec_sensors_core lzo_rle lzo_compress ath mac80211 zram cfg80211 joydev [last unloaded: ath10k_core]
[  303.744256] CPU: 1 PID: 3921 Comm: kworker/u16:10 Tainted: G        W         4.19.95 #2
[  303.744273] Hardware name: MediaTek krane sku176 board (DT)
[  303.744301] Workqueue: events_unbound async_run_entry_fn
[  303.744325] pstate: 60000005 (nZCv daif -PAN -UAO)
[  303.744403] pc : ath10k_flush+0x54/0x104 [ath10k_core]
[  303.744480] lr : ath10k_flush+0x54/0x104 [ath10k_core]
[  303.744496] sp : ffffffdf080e77a0
[  303.744512] x29: ffffffdf080e77a0 x28: ffffffdef3730040
[  303.744534] x27: ffffff907c2240a0 x26: ffffffde6ff39afc
[  303.744556] x25: ffffffdef3730040 x24: ffffff907bf61018
[  303.744577] x23: ffffff907c2240a0 x22: ffffffde6ff39a50
[  303.744598] x21: 0000000000000000 x20: ffffffde6ff39a50
[  303.744620] x19: ffffffde6bac2420 x18: 000000000001831c
[  303.744641] x17: ffffff907c24a000 x16: 0000000000000037
[  303.744662] x15: ffffff907b49a568 x14: ffffff907cf332c1
[  303.744683] x13: 00000000000922ea x12: 0000000000000000
[  303.744704] x11: 0000000000000001 x10: 0000000000000007
[  303.744747] x9 : f2256b8c1de4bc00 x8 : f2256b8c1de4bc00
[  303.744768] x7 : ffffff907ab5e764 x6 : 0000000000000000
[  303.744789] x5 : 0000000000000080 x4 : 0000000000000001
[  303.744810] x3 : ffffffdf080e74a8 x2 : ffffff907aa91244
[  303.744831] x1 : ffffffdf080e74a8 x0 : 0000000000000024
[  303.744853] Call trace:
[  303.744929]  ath10k_flush+0x54/0x104 [ath10k_core]
[  303.745098]  __ieee80211_flush_queues+0x1dc/0x358 [mac80211]
[  303.745277]  ieee80211_flush_queues+0x34/0x44 [mac80211]
[  303.745424]  ieee80211_set_disassoc+0x108/0x5ec [mac80211]
[  303.745569]  ieee80211_mgd_deauth+0x720/0x7d4 [mac80211]
[  303.745706]  ieee80211_deauth+0x24/0x30 [mac80211]
[  303.745853]  cfg80211_mlme_deauth+0x250/0x3bc [cfg80211]
[  303.745979]  cfg80211_mlme_down+0x90/0xd0 [cfg80211]
[  303.746103]  cfg80211_disconnect+0x340/0x3a0 [cfg80211]
[  303.746219]  __cfg80211_leave+0xe4/0x17c [cfg80211]
[  303.746335]  cfg80211_leave+0x38/0x50 [cfg80211]
[  303.746452]  wiphy_suspend+0x84/0x5bc [cfg80211]
[  303.746467]  dpm_run_callback+0x170/0x304
[  303.746477]  __device_suspend+0x2dc/0x3e8
[  303.746487]  async_suspend+0x2c/0xb0
[  303.746498]  async_run_entry_fn+0x48/0xf8
[  303.746510]  process_one_work+0x304/0x604
[  303.746521]  worker_thread+0x248/0x3f4
[  303.746530]  kthread+0x120/0x130
[  303.746542]  ret_from_fork+0x10/0x18

one sample's debugging log: it wait 3190 ms(5000 - 1810).

1st ath10k_flush, it has 120 packets in tx queue of ath10k:
<...>-1513  [000] .... 25374.786005: ath10k_log_err: ath10k_sdio mmc1:0001:1 ath10k_flush drop:1, pending:120-0
<...>-1513  [000] ...1 25374.788375: ath10k_log_warn: ath10k_sdio mmc1:0001:1 ath10k_htt_tx_mgmt_inc_pending htt->num_pending_mgmt_tx:0
<...>-1500  [001] .... 25374.790143: ath10k_log_dbg: ath10k_sdio mmc1:0001:1 bundle tx work, eid:1, count:121

2st ath10k_flush, it has 121 packets in tx queue of ath10k:
<...>-1513  [000] .... 25374.790571: ath10k_log_err: ath10k_sdio mmc1:0001:1 ath10k_flush drop:0, pending:121-0
<...>-1513  [000] .... 25374.791990: ath10k_log_err: ath10k_sdio mmc1:0001:1 ath10k_mac_wait_tx_complete state:1 pending:121-0
<...>-1508  [001] .... 25374.792696: ath10k_log_dbg: ath10k_sdio mmc1:0001:1 credit update: delta:46
<...>-1508  [001] .... 25374.792700: ath10k_log_dbg: ath10k_sdio mmc1:0001:1 credit total:46
<...>-1508  [001] .... 25374.792729: ath10k_log_dbg: ath10k_sdio mmc1:0001:1 bundle tx work, eid:1, count:121
<...>-1508  [001] .... 25374.792937: ath10k_log_dbg: ath10k_sdio mmc1:0001:1 bundle tx status:0, eid:1, req count:88, count:32, len:49792
<...>-1508  [001] .... 25374.793031: ath10k_log_dbg: ath10k_sdio mmc1:0001:1 bundle tx status:0, eid:1, req count:75, count:14, len:21784
kworker/u16:0-25773 [003] .... 25374.793701: ath10k_log_dbg: ath10k_sdio mmc1:0001:1 bundle tx complete, eid:1, pending complete count:46
<...>-1881  [000] .... 25375.073178: ath10k_log_dbg: ath10k_sdio mmc1:0001:1 credit update: delta:24
<...>-1881  [000] .... 25375.073182: ath10k_log_dbg: ath10k_sdio mmc1:0001:1 credit total:24
<...>-1881  [000] .... 25375.073429: ath10k_log_dbg: ath10k_sdio mmc1:0001:1 bundle tx work, eid:1, count:75
<...>-1879  [001] .... 25375.074090: ath10k_log_dbg: ath10k_sdio mmc1:0001:1 bundle tx complete, eid:1, pending complete count:24
<...>-1881  [000] .... 25375.074123: ath10k_log_dbg: ath10k_sdio mmc1:0001:1 bundle tx status:0, eid:1, req count:51, count:24, len:37344
<...>-1879  [001] .... 25375.270126: ath10k_log_dbg: ath10k_sdio mmc1:0001:1 credit update: delta:26
<...>-1879  [001] .... 25375.270130: ath10k_log_dbg: ath10k_sdio mmc1:0001:1 credit total:26
<...>-1488  [000] .... 25375.270174: ath10k_log_dbg: ath10k_sdio mmc1:0001:1 bundle tx work, eid:1, count:51
<...>-1488  [000] .... 25375.270529: ath10k_log_dbg: ath10k_sdio mmc1:0001:1 bundle tx status:0, eid:1, req count:25, count:26, len:40456
<...>-1879  [001] .... 25375.270693: ath10k_log_dbg: ath10k_sdio mmc1:0001:1 bundle tx complete, eid:1, pending complete count:26
<...>-1488  [001] .... 25377.775885: ath10k_log_dbg: ath10k_sdio mmc1:0001:1 credit update: delta:12
<...>-1488  [001] .... 25377.775890: ath10k_log_dbg: ath10k_sdio mmc1:0001:1 credit total:12
<...>-1488  [001] .... 25377.775933: ath10k_log_dbg: ath10k_sdio mmc1:0001:1 bundle tx work, eid:1, count:25
<...>-1488  [001] .... 25377.776059: ath10k_log_dbg: ath10k_sdio mmc1:0001:1 bundle tx status:0, eid:1, req count:13, count:12, len:18672
<...>-1879  [001] .... 25377.776100: ath10k_log_dbg: ath10k_sdio mmc1:0001:1 bundle tx complete, eid:1, pending complete count:12
<...>-1488  [001] .... 25377.878079: ath10k_log_dbg: ath10k_sdio mmc1:0001:1 credit update: delta:15
<...>-1488  [001] .... 25377.878087: ath10k_log_dbg: ath10k_sdio mmc1:0001:1 credit total:15
<...>-1879  [000] .... 25377.878323: ath10k_log_dbg: ath10k_sdio mmc1:0001:1 bundle tx work, eid:1, count:13
<...>-1879  [000] .... 25377.878487: ath10k_log_dbg: ath10k_sdio mmc1:0001:1 bundle tx status:0, eid:1, req count:0, count:13, len:20228
<...>-1879  [000] .... 25377.878497: ath10k_log_dbg: ath10k_sdio mmc1:0001:1 bundle tx complete, eid:1, pending complete count:13
<...>-1488  [001] .... 25377.919927: ath10k_log_dbg: ath10k_sdio mmc1:0001:1 credit update: delta:11
<...>-1488  [001] .... 25377.919932: ath10k_log_dbg: ath10k_sdio mmc1:0001:1 credit total:13
<...>-1488  [001] .... 25377.919976: ath10k_log_dbg: ath10k_sdio mmc1:0001:1 bundle tx work, eid:1, count:0
<...>-1881  [000] .... 25377.982645: ath10k_log_warn: ath10k_sdio mmc1:0001:1 HTT_T2H_MSG_TYPE_MGMT_TX_COMPLETION status:0
<...>-1513  [001] .... 25377.982973: ath10k_log_err: ath10k_sdio mmc1:0001:1 ath10k_mac_wait_tx_complete time_left:1810, pending:0-0

Flush all pending TX packets for the 1st ath10k_flush reduced the wait
time of the 2nd ath10k_flush and then suspend take short time.

This Patch only effect SDIO chips.

Tested with QCA6174 SDIO with firmware WLAN.RMH.4.4.1-00042.

Signed-off-by: Wen Gong <wgong@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200415233730.10581-1-wgong@codeaurora.org
---
 drivers/net/wireless/ath/ath10k/htt.h    | 7 +++++++
 drivers/net/wireless/ath/ath10k/htt_tx.c | 8 +++++++-
 drivers/net/wireless/ath/ath10k/mac.c    | 1 +
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath10k/htt.h b/drivers/net/wireless/ath/ath10k/htt.h
index 462a25b3056d..8f3710cf28f4 100644
--- a/drivers/net/wireless/ath/ath10k/htt.h
+++ b/drivers/net/wireless/ath/ath10k/htt.h
@@ -2049,6 +2049,7 @@ struct ath10k_htt_tx_ops {
 	int (*htt_h2t_aggr_cfg_msg)(struct ath10k_htt *htt,
 				    u8 max_subfrms_ampdu,
 				    u8 max_subfrms_amsdu);
+	void (*htt_flush_tx)(struct ath10k_htt *htt);
 };
 
 static inline int ath10k_htt_send_rx_ring_cfg(struct ath10k_htt *htt)
@@ -2088,6 +2089,12 @@ static inline int ath10k_htt_tx(struct ath10k_htt *htt,
 	return htt->tx_ops->htt_tx(htt, txmode, msdu);
 }
 
+static inline void ath10k_htt_flush_tx(struct ath10k_htt *htt)
+{
+	if (htt->tx_ops->htt_flush_tx)
+		htt->tx_ops->htt_flush_tx(htt);
+}
+
 static inline int ath10k_htt_alloc_txbuff(struct ath10k_htt *htt)
 {
 	if (!htt->tx_ops->htt_alloc_txbuff)
diff --git a/drivers/net/wireless/ath/ath10k/htt_tx.c b/drivers/net/wireless/ath/ath10k/htt_tx.c
index ff044426c337..4fd10ac3a941 100644
--- a/drivers/net/wireless/ath/ath10k/htt_tx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_tx.c
@@ -529,10 +529,15 @@ void ath10k_htt_tx_destroy(struct ath10k_htt *htt)
 	htt->tx_mem_allocated = false;
 }
 
-void ath10k_htt_tx_stop(struct ath10k_htt *htt)
+static void ath10k_htt_flush_tx_queue(struct ath10k_htt *htt)
 {
 	ath10k_htc_stop_hl(htt->ar);
 	idr_for_each(&htt->pending_tx, ath10k_htt_tx_clean_up_pending, htt->ar);
+}
+
+void ath10k_htt_tx_stop(struct ath10k_htt *htt)
+{
+	ath10k_htt_flush_tx_queue(htt);
 	idr_destroy(&htt->pending_tx);
 }
 
@@ -1825,6 +1830,7 @@ static const struct ath10k_htt_tx_ops htt_tx_ops_hl = {
 	.htt_send_frag_desc_bank_cfg = ath10k_htt_send_frag_desc_bank_cfg_32,
 	.htt_tx = ath10k_htt_tx_hl,
 	.htt_h2t_aggr_cfg_msg = ath10k_htt_h2t_aggr_cfg_msg_32,
+	.htt_flush_tx = ath10k_htt_flush_tx_queue,
 };
 
 void ath10k_htt_set_tx_ops(struct ath10k_htt *htt)
diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index a59a7a5631a8..6791c0035be0 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -7224,6 +7224,7 @@ static void ath10k_flush(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 					ath10k_wmi_peer_flush(ar, arvif->vdev_id,
 							      arvif->bssid, bitmap);
 			}
+			ath10k_htt_flush_tx(&ar->htt);
 		}
 		return;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 3fef10ec321ced8e75cd0a28616402401cbbcaf4 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 17 Apr 2020 19:15:25 +0200
Subject: Bluetooth: btbcm: Drop upper nibble version check from
 btbcm_initialize()

btbcm_initialize() must either return an error; or fill the passed in
fw_name, otherwise we end up passing uninitialized stack memory to
request_firmware().

Since we have a fallback hw_name of "BCM" not having a known version
in the subver field does not matter, drop the check so that we always
fill the passed in fw_name.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btbcm.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c
index 1f498f358f60..b9e1fe052148 100644
--- a/drivers/bluetooth/btbcm.c
+++ b/drivers/bluetooth/btbcm.c
@@ -440,10 +440,6 @@ int btbcm_initialize(struct hci_dev *hdev, char *fw_name, size_t len,
 			return err;
 	}
 
-	/* Upper nibble of rev should be between 0 and 3? */
-	if (((rev & 0xf000) >> 12) > 3)
-		return 0;
-
 	bcm_subver_table = (hdev->bus == HCI_USB) ? bcm_usb_subver_table :
 						    bcm_uart_subver_table;
 
-- 
cgit v1.2.3-59-g8ed1b


From f8c51d28e9d13f20c33f4f2f46f8e7d0b8476b9c Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 17 Apr 2020 19:15:26 +0200
Subject: Bluetooth: btbcm: Move setting of USE_BDADDR_PROPERTY quirk to
 hci_bcm.c

btbcm_finalize() is currently only used by UART attached BCM devices.

Move the setting of the USE_BDADDR_PROPERTY quirk, which we only want
for UART attached devices to hci_bcm in preparation for using
btbcm_finalize() for USB attached devices too.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btbcm.c   | 6 ------
 drivers/bluetooth/hci_bcm.c | 6 ++++++
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c
index b9e1fe052148..8052a0e8dbfb 100644
--- a/drivers/bluetooth/btbcm.c
+++ b/drivers/bluetooth/btbcm.c
@@ -488,12 +488,6 @@ int btbcm_finalize(struct hci_dev *hdev)
 
 	set_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks);
 
-	/* Some devices ship with the controller default address.
-	 * Allow the bootloader to set a valid address through the
-	 * device tree.
-	 */
-	set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hdev->quirks);
-
 	return 0;
 }
 EXPORT_SYMBOL_GPL(btbcm_finalize);
diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c
index 19e4587f366c..c42bf791a61b 100644
--- a/drivers/bluetooth/hci_bcm.c
+++ b/drivers/bluetooth/hci_bcm.c
@@ -623,6 +623,12 @@ finalize:
 	if (err)
 		return err;
 
+	/* Some devices ship with the controller default address.
+	 * Allow the bootloader to set a valid address through the
+	 * device tree.
+	 */
+	set_bit(HCI_QUIRK_USE_BDADDR_PROPERTY, &hu->hdev->quirks);
+
 	if (!bcm_request_irq(bcm))
 		err = bcm_setup_sleep(hu);
 
-- 
cgit v1.2.3-59-g8ed1b


From 0287c5d84f5c0cde6c39362d56c7002dc4acedb3 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 17 Apr 2020 19:15:27 +0200
Subject: Bluetooth: btbcm: Fold Patch loading + applying into
 btbcm_initialize()

Instead of having btbcm_initialize() fill a passed in fw_name buffer
and then have its callers use that to request the firmware + load
it into the HCI, make btbcm_initialize() do this itself the first
time it is called (its get called a second time to reset the HCI
after the firmware has been loaded).

This removes some code duplication and makes it easier for further
patches in this series to try more then 1 firmware filename.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btbcm.c   | 50 +++++++++++++++++++++++++++------------------
 drivers/bluetooth/btbcm.h   |  6 ++----
 drivers/bluetooth/hci_bcm.c | 19 +++--------------
 3 files changed, 35 insertions(+), 40 deletions(-)

diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c
index 8052a0e8dbfb..c22e90a5e288 100644
--- a/drivers/bluetooth/btbcm.c
+++ b/drivers/bluetooth/btbcm.c
@@ -27,6 +27,8 @@
 #define BDADDR_BCM4345C5 (&(bdaddr_t) {{0xac, 0x1f, 0x00, 0xc5, 0x45, 0x43}})
 #define BDADDR_BCM43341B (&(bdaddr_t) {{0xac, 0x1f, 0x00, 0x1b, 0x34, 0x43}})
 
+#define BCM_FW_NAME_LEN			64
+
 int btbcm_check_bdaddr(struct hci_dev *hdev)
 {
 	struct hci_rp_read_bd_addr *bda;
@@ -408,14 +410,15 @@ static const struct bcm_subver_table bcm_usb_subver_table[] = {
 	{ }
 };
 
-int btbcm_initialize(struct hci_dev *hdev, char *fw_name, size_t len,
-		     bool reinit)
+int btbcm_initialize(struct hci_dev *hdev, bool *fw_load_done)
 {
 	u16 subver, rev, pid, vid;
 	const char *hw_name = "BCM";
 	struct sk_buff *skb;
 	struct hci_rp_read_local_version *ver;
 	const struct bcm_subver_table *bcm_subver_table;
+	char fw_name[BCM_FW_NAME_LEN];
+	const struct firmware *fw;
 	int i, err;
 
 	/* Reset */
@@ -434,7 +437,7 @@ int btbcm_initialize(struct hci_dev *hdev, char *fw_name, size_t len,
 	kfree_skb(skb);
 
 	/* Read controller information */
-	if (!reinit) {
+	if (!(*fw_load_done)) {
 		err = btbcm_read_info(hdev);
 		if (err)
 			return err;
@@ -460,27 +463,42 @@ int btbcm_initialize(struct hci_dev *hdev, char *fw_name, size_t len,
 		pid = get_unaligned_le16(skb->data + 3);
 		kfree_skb(skb);
 
-		snprintf(fw_name, len, "brcm/%s-%4.4x-%4.4x.hcd",
+		snprintf(fw_name, BCM_FW_NAME_LEN, "brcm/%s-%4.4x-%4.4x.hcd",
 			 hw_name, vid, pid);
 	} else {
-		snprintf(fw_name, len, "brcm/%s.hcd", hw_name);
+		snprintf(fw_name, BCM_FW_NAME_LEN, "brcm/%s.hcd", hw_name);
 	}
 
 	bt_dev_info(hdev, "%s (%3.3u.%3.3u.%3.3u) build %4.4u",
 		    hw_name, (subver & 0xe000) >> 13,
 		    (subver & 0x1f00) >> 8, (subver & 0x00ff), rev & 0x0fff);
 
+	if (*fw_load_done)
+		return 0;
+
+	err = request_firmware(&fw, fw_name, &hdev->dev);
+	if (err) {
+		bt_dev_info(hdev, "BCM: Patch %s not found", fw_name);
+		return 0;
+	}
+
+	err = btbcm_patchram(hdev, fw);
+	if (err)
+		bt_dev_info(hdev, "BCM: Patch failed (%d)", err);
+
+	release_firmware(fw);
+	*fw_load_done = true;
 	return 0;
 }
 EXPORT_SYMBOL_GPL(btbcm_initialize);
 
 int btbcm_finalize(struct hci_dev *hdev)
 {
-	char fw_name[64];
+	bool fw_load_done = true;
 	int err;
 
 	/* Re-initialize */
-	err = btbcm_initialize(hdev, fw_name, sizeof(fw_name), true);
+	err = btbcm_initialize(hdev, &fw_load_done);
 	if (err)
 		return err;
 
@@ -494,28 +512,20 @@ EXPORT_SYMBOL_GPL(btbcm_finalize);
 
 int btbcm_setup_patchram(struct hci_dev *hdev)
 {
-	char fw_name[64];
-	const struct firmware *fw;
+	bool fw_load_done = false;
 	struct sk_buff *skb;
 	int err;
 
 	/* Initialize */
-	err = btbcm_initialize(hdev, fw_name, sizeof(fw_name), false);
+	err = btbcm_initialize(hdev, &fw_load_done);
 	if (err)
 		return err;
 
-	err = request_firmware(&fw, fw_name, &hdev->dev);
-	if (err < 0) {
-		bt_dev_info(hdev, "BCM: Patch %s not found", fw_name);
+	if (!fw_load_done)
 		goto done;
-	}
 
-	btbcm_patchram(hdev, fw);
-
-	release_firmware(fw);
-
-	/* Re-initialize */
-	err = btbcm_initialize(hdev, fw_name, sizeof(fw_name), true);
+	/* Re-initialize after loading Patch */
+	err = btbcm_initialize(hdev, &fw_load_done);
 	if (err)
 		return err;
 
diff --git a/drivers/bluetooth/btbcm.h b/drivers/bluetooth/btbcm.h
index 014ef847a486..8437caba421d 100644
--- a/drivers/bluetooth/btbcm.h
+++ b/drivers/bluetooth/btbcm.h
@@ -62,8 +62,7 @@ int btbcm_write_pcm_int_params(struct hci_dev *hdev,
 int btbcm_setup_patchram(struct hci_dev *hdev);
 int btbcm_setup_apple(struct hci_dev *hdev);
 
-int btbcm_initialize(struct hci_dev *hdev, char *fw_name, size_t len,
-		     bool reinit);
+int btbcm_initialize(struct hci_dev *hdev, bool *fw_load_done);
 int btbcm_finalize(struct hci_dev *hdev);
 
 #else
@@ -105,8 +104,7 @@ static inline int btbcm_setup_apple(struct hci_dev *hdev)
 	return 0;
 }
 
-static inline int btbcm_initialize(struct hci_dev *hdev, char *fw_name,
-				   size_t len, bool reinit)
+static inline int btbcm_initialize(struct hci_dev *hdev, bool *fw_load_done)
 {
 	return 0;
 }
diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c
index c42bf791a61b..61731cb451cb 100644
--- a/drivers/bluetooth/hci_bcm.c
+++ b/drivers/bluetooth/hci_bcm.c
@@ -553,8 +553,7 @@ static int bcm_flush(struct hci_uart *hu)
 static int bcm_setup(struct hci_uart *hu)
 {
 	struct bcm_data *bcm = hu->priv;
-	char fw_name[64];
-	const struct firmware *fw;
+	bool fw_load_done = false;
 	unsigned int speed;
 	int err;
 
@@ -563,21 +562,12 @@ static int bcm_setup(struct hci_uart *hu)
 	hu->hdev->set_diag = bcm_set_diag;
 	hu->hdev->set_bdaddr = btbcm_set_bdaddr;
 
-	err = btbcm_initialize(hu->hdev, fw_name, sizeof(fw_name), false);
+	err = btbcm_initialize(hu->hdev, &fw_load_done);
 	if (err)
 		return err;
 
-	err = request_firmware(&fw, fw_name, &hu->hdev->dev);
-	if (err < 0) {
-		bt_dev_info(hu->hdev, "BCM: Patch %s not found", fw_name);
+	if (!fw_load_done)
 		return 0;
-	}
-
-	err = btbcm_patchram(hu->hdev, fw);
-	if (err) {
-		bt_dev_info(hu->hdev, "BCM: Patch failed (%d)", err);
-		goto finalize;
-	}
 
 	/* Init speed if any */
 	if (hu->init_speed)
@@ -616,9 +606,6 @@ static int bcm_setup(struct hci_uart *hu)
 		btbcm_write_pcm_int_params(hu->hdev, &params);
 	}
 
-finalize:
-	release_firmware(fw);
-
 	err = btbcm_finalize(hu->hdev);
 	if (err)
 		return err;
-- 
cgit v1.2.3-59-g8ed1b


From 2fcdd562b91bdc29dddd406f7278102e4d90b1fa Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 17 Apr 2020 19:15:28 +0200
Subject: Bluetooth: btbcm: Make btbcm_initialize() print local-name on re-init
 too

Make btbcm_initialize() get and print the device's local-name on re-init
too, this will make us also print the local-name after loading the
Patch on UART attached devices making things more consistent.

This also removes some code duplication from btbcm_setup_patchram()
and allows more code duplication removal there in a follow-up patch.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btbcm.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c
index c22e90a5e288..3404021b10bd 100644
--- a/drivers/bluetooth/btbcm.c
+++ b/drivers/bluetooth/btbcm.c
@@ -360,6 +360,13 @@ static int btbcm_read_info(struct hci_dev *hdev)
 	bt_dev_info(hdev, "BCM: features 0x%2.2x", skb->data[1]);
 	kfree_skb(skb);
 
+	return 0;
+}
+
+static int btbcm_print_local_name(struct hci_dev *hdev)
+{
+	struct sk_buff *skb;
+
 	/* Read Local Name */
 	skb = btbcm_read_local_name(hdev);
 	if (IS_ERR(skb))
@@ -442,6 +449,9 @@ int btbcm_initialize(struct hci_dev *hdev, bool *fw_load_done)
 		if (err)
 			return err;
 	}
+	err = btbcm_print_local_name(hdev);
+	if (err)
+		return err;
 
 	bcm_subver_table = (hdev->bus == HCI_USB) ? bcm_usb_subver_table :
 						    bcm_uart_subver_table;
@@ -513,7 +523,6 @@ EXPORT_SYMBOL_GPL(btbcm_finalize);
 int btbcm_setup_patchram(struct hci_dev *hdev)
 {
 	bool fw_load_done = false;
-	struct sk_buff *skb;
 	int err;
 
 	/* Initialize */
@@ -529,14 +538,6 @@ int btbcm_setup_patchram(struct hci_dev *hdev)
 	if (err)
 		return err;
 
-	/* Read Local Name */
-	skb = btbcm_read_local_name(hdev);
-	if (IS_ERR(skb))
-		return PTR_ERR(skb);
-
-	bt_dev_info(hdev, "%s", (char *)(skb->data + 1));
-	kfree_skb(skb);
-
 done:
 	btbcm_check_bdaddr(hdev);
 
-- 
cgit v1.2.3-59-g8ed1b


From 0383f16a87c4dec6840cdbb80c2a30ecfdc2ffb0 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 17 Apr 2020 19:15:29 +0200
Subject: Bluetooth: btbcm: Make btbcm_setup_patchram use btbcm_finalize

On UART attached devices we do:

1. btbcm_initialize()
2. Setup UART baudrate, etc.
3. btbcm_finalize()

After our previous changes we can now also use btbcm_finalize() from
the btbcm_setup_patchram() function used on USB devices without any
functional changes. This completes unifying the USB and UART paths
as much as possible.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btbcm.c   | 27 ++++++++-------------------
 drivers/bluetooth/btbcm.h   |  4 ++--
 drivers/bluetooth/hci_bcm.c |  2 +-
 3 files changed, 11 insertions(+), 22 deletions(-)

diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c
index 3404021b10bd..cc3628cace35 100644
--- a/drivers/bluetooth/btbcm.c
+++ b/drivers/bluetooth/btbcm.c
@@ -502,15 +502,16 @@ int btbcm_initialize(struct hci_dev *hdev, bool *fw_load_done)
 }
 EXPORT_SYMBOL_GPL(btbcm_initialize);
 
-int btbcm_finalize(struct hci_dev *hdev)
+int btbcm_finalize(struct hci_dev *hdev, bool *fw_load_done)
 {
-	bool fw_load_done = true;
 	int err;
 
-	/* Re-initialize */
-	err = btbcm_initialize(hdev, &fw_load_done);
-	if (err)
-		return err;
+	/* Re-initialize if necessary */
+	if (*fw_load_done) {
+		err = btbcm_initialize(hdev, fw_load_done);
+		if (err)
+			return err;
+	}
 
 	btbcm_check_bdaddr(hdev);
 
@@ -530,20 +531,8 @@ int btbcm_setup_patchram(struct hci_dev *hdev)
 	if (err)
 		return err;
 
-	if (!fw_load_done)
-		goto done;
-
 	/* Re-initialize after loading Patch */
-	err = btbcm_initialize(hdev, &fw_load_done);
-	if (err)
-		return err;
-
-done:
-	btbcm_check_bdaddr(hdev);
-
-	set_bit(HCI_QUIRK_STRICT_DUPLICATE_FILTER, &hdev->quirks);
-
-	return 0;
+	return btbcm_finalize(hdev, &fw_load_done);
 }
 EXPORT_SYMBOL_GPL(btbcm_setup_patchram);
 
diff --git a/drivers/bluetooth/btbcm.h b/drivers/bluetooth/btbcm.h
index 8437caba421d..8bf01565fdfc 100644
--- a/drivers/bluetooth/btbcm.h
+++ b/drivers/bluetooth/btbcm.h
@@ -63,7 +63,7 @@ int btbcm_setup_patchram(struct hci_dev *hdev);
 int btbcm_setup_apple(struct hci_dev *hdev);
 
 int btbcm_initialize(struct hci_dev *hdev, bool *fw_load_done);
-int btbcm_finalize(struct hci_dev *hdev);
+int btbcm_finalize(struct hci_dev *hdev, bool *fw_load_done);
 
 #else
 
@@ -109,7 +109,7 @@ static inline int btbcm_initialize(struct hci_dev *hdev, bool *fw_load_done)
 	return 0;
 }
 
-static inline int btbcm_finalize(struct hci_dev *hdev)
+static inline int btbcm_finalize(struct hci_dev *hdev, bool *fw_load_done)
 {
 	return 0;
 }
diff --git a/drivers/bluetooth/hci_bcm.c b/drivers/bluetooth/hci_bcm.c
index 61731cb451cb..8ea5ca8d71d6 100644
--- a/drivers/bluetooth/hci_bcm.c
+++ b/drivers/bluetooth/hci_bcm.c
@@ -606,7 +606,7 @@ static int bcm_setup(struct hci_uart *hu)
 		btbcm_write_pcm_int_params(hu->hdev, &params);
 	}
 
-	err = btbcm_finalize(hu->hdev);
+	err = btbcm_finalize(hu->hdev, &fw_load_done);
 	if (err)
 		return err;
 
-- 
cgit v1.2.3-59-g8ed1b


From f53b975cf113fa0dca9c7bba067c3d749682cc82 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 17 Apr 2020 19:15:30 +0200
Subject: Bluetooth: btbcm: Bail sooner from btbcm_initialize() when not
 loading fw

If we have already loaded the firmware/patchram and btbcm_initialize()
is called to re-init the HCI after this then there is no need to get
the USB device-ids and build a firmware-filename out of these.

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btbcm.c | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c
index cc3628cace35..9fa153b35825 100644
--- a/drivers/bluetooth/btbcm.c
+++ b/drivers/bluetooth/btbcm.c
@@ -463,6 +463,13 @@ int btbcm_initialize(struct hci_dev *hdev, bool *fw_load_done)
 		}
 	}
 
+	bt_dev_info(hdev, "%s (%3.3u.%3.3u.%3.3u) build %4.4u",
+		    hw_name, (subver & 0xe000) >> 13,
+		    (subver & 0x1f00) >> 8, (subver & 0x00ff), rev & 0x0fff);
+
+	if (*fw_load_done)
+		return 0;
+
 	if (hdev->bus == HCI_USB) {
 		/* Read USB Product Info */
 		skb = btbcm_read_usb_product(hdev);
@@ -479,13 +486,6 @@ int btbcm_initialize(struct hci_dev *hdev, bool *fw_load_done)
 		snprintf(fw_name, BCM_FW_NAME_LEN, "brcm/%s.hcd", hw_name);
 	}
 
-	bt_dev_info(hdev, "%s (%3.3u.%3.3u.%3.3u) build %4.4u",
-		    hw_name, (subver & 0xe000) >> 13,
-		    (subver & 0x1f00) >> 8, (subver & 0x00ff), rev & 0x0fff);
-
-	if (*fw_load_done)
-		return 0;
-
 	err = request_firmware(&fw, fw_name, &hdev->dev);
 	if (err) {
 		bt_dev_info(hdev, "BCM: Patch %s not found", fw_name);
-- 
cgit v1.2.3-59-g8ed1b


From 74530a639adfa2b2162df6a688c6367ecae6a3ca Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 17 Apr 2020 19:15:31 +0200
Subject: Bluetooth: btbcm: Try multiple Patch filenames when loading the Patch
 firmware

Currently the bcm_uart_subver_ and bcm_usb_subver_table-s lack entries
for various newer chipsets. This makes the code use just "BCM" as prefix
for the filename to pass to request-firmware, making it harder for users
to figure out which firmware they need. This especially a problem with
UART attached devices where this leads to the filename being "BCM.hcd".

If we add new entries to the subver-tables now, then this will change
what firmware file the kernel looks for, e.g. currently linux-firmware
contains a brcm/BCM-0bb4-0306.hcd file. If we add the info for the
BCM20703A1 to the subver table, then this will change to
brcm/BCM20703A1-0bb4-0306.hcd. This will cause the file to no longer
get loaded breaking Bluetooth for existing users, going against the
no regressions policy.

To avoid this regression make the btbcm code try multiple filenames,
first try the fullname, e.g. BCM20703A1-0bb4-0306.hcd and if that is
not found, then fallback to the name with just BCM as prefix.

This commit also adds an info message which filename was used,
this makes the output look like this for example:

[   57.387867] Bluetooth: hci0: BCM20703A1
[   57.387870] Bluetooth: hci0: BCM20703A1 (001.001.005) build 0000
[   57.389438] Bluetooth: hci0: BCM20703A1 'brcm/BCM20703A1-0a5c-6410.hcd' Patch
[   58.681769] Bluetooth: hci0: BCM20703A1 Generic USB 20Mhz fcbga_BU
[   58.681772] Bluetooth: hci0: BCM20703A1 (001.001.005) build 0481

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btbcm.c | 59 ++++++++++++++++++++++++++++++++++-------------
 1 file changed, 43 insertions(+), 16 deletions(-)

diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c
index 9fa153b35825..739ba1200f5d 100644
--- a/drivers/bluetooth/btbcm.c
+++ b/drivers/bluetooth/btbcm.c
@@ -28,6 +28,9 @@
 #define BDADDR_BCM43341B (&(bdaddr_t) {{0xac, 0x1f, 0x00, 0x1b, 0x34, 0x43}})
 
 #define BCM_FW_NAME_LEN			64
+#define BCM_FW_NAME_COUNT_MAX		2
+/* For kmalloc-ing the fw-name array instead of putting it on the stack */
+typedef char bcm_fw_name[BCM_FW_NAME_LEN];
 
 int btbcm_check_bdaddr(struct hci_dev *hdev)
 {
@@ -420,11 +423,13 @@ static const struct bcm_subver_table bcm_usb_subver_table[] = {
 int btbcm_initialize(struct hci_dev *hdev, bool *fw_load_done)
 {
 	u16 subver, rev, pid, vid;
-	const char *hw_name = "BCM";
 	struct sk_buff *skb;
 	struct hci_rp_read_local_version *ver;
 	const struct bcm_subver_table *bcm_subver_table;
-	char fw_name[BCM_FW_NAME_LEN];
+	const char *hw_name = NULL;
+	char postfix[16] = "";
+	int fw_name_count = 0;
+	bcm_fw_name *fw_name;
 	const struct firmware *fw;
 	int i, err;
 
@@ -464,7 +469,7 @@ int btbcm_initialize(struct hci_dev *hdev, bool *fw_load_done)
 	}
 
 	bt_dev_info(hdev, "%s (%3.3u.%3.3u.%3.3u) build %4.4u",
-		    hw_name, (subver & 0xe000) >> 13,
+		    hw_name ? hw_name : "BCM", (subver & 0xe000) >> 13,
 		    (subver & 0x1f00) >> 8, (subver & 0x00ff), rev & 0x0fff);
 
 	if (*fw_load_done)
@@ -480,24 +485,46 @@ int btbcm_initialize(struct hci_dev *hdev, bool *fw_load_done)
 		pid = get_unaligned_le16(skb->data + 3);
 		kfree_skb(skb);
 
-		snprintf(fw_name, BCM_FW_NAME_LEN, "brcm/%s-%4.4x-%4.4x.hcd",
-			 hw_name, vid, pid);
-	} else {
-		snprintf(fw_name, BCM_FW_NAME_LEN, "brcm/%s.hcd", hw_name);
+		snprintf(postfix, sizeof(postfix), "-%4.4x-%4.4x", vid, pid);
 	}
 
-	err = request_firmware(&fw, fw_name, &hdev->dev);
-	if (err) {
-		bt_dev_info(hdev, "BCM: Patch %s not found", fw_name);
-		return 0;
+	fw_name = kmalloc(BCM_FW_NAME_COUNT_MAX * BCM_FW_NAME_LEN, GFP_KERNEL);
+	if (!fw_name)
+		return -ENOMEM;
+
+	if (hw_name) {
+		snprintf(fw_name[fw_name_count], BCM_FW_NAME_LEN,
+			 "brcm/%s%s.hcd", hw_name, postfix);
+		fw_name_count++;
 	}
 
-	err = btbcm_patchram(hdev, fw);
-	if (err)
-		bt_dev_info(hdev, "BCM: Patch failed (%d)", err);
+	snprintf(fw_name[fw_name_count], BCM_FW_NAME_LEN,
+		 "brcm/BCM%s.hcd", postfix);
+	fw_name_count++;
+
+	for (i = 0; i < fw_name_count; i++) {
+		err = firmware_request_nowarn(&fw, fw_name[i], &hdev->dev);
+		if (err == 0) {
+			bt_dev_info(hdev, "%s '%s' Patch",
+				    hw_name ? hw_name : "BCM", fw_name[i]);
+			*fw_load_done = true;
+			break;
+		}
+	}
+
+	if (*fw_load_done) {
+		err = btbcm_patchram(hdev, fw);
+		if (err)
+			bt_dev_info(hdev, "BCM: Patch failed (%d)", err);
+
+		release_firmware(fw);
+	} else {
+		bt_dev_err(hdev, "BCM: firmware Patch file not found, tried:");
+		for (i = 0; i < fw_name_count; i++)
+			bt_dev_err(hdev, "BCM: '%s'", fw_name[i]);
+	}
 
-	release_firmware(fw);
-	*fw_load_done = true;
+	kfree(fw_name);
 	return 0;
 }
 EXPORT_SYMBOL_GPL(btbcm_initialize);
-- 
cgit v1.2.3-59-g8ed1b


From c03ee9af4e07112bd3fc688daca9e654f41eca93 Mon Sep 17 00:00:00 2001
From: Hans de Goede <hdegoede@redhat.com>
Date: Fri, 17 Apr 2020 19:15:32 +0200
Subject: Bluetooth: btbcm: Add 2 missing models to subver tables

Currently the bcm_uart_subver_ and bcm_usb_subver_table-s lack entries
for the BCM4324B5 and BCM20703A1 chipsets. This makes the code use just
"BCM" as prefix for the filename to pass to request-firmware, making it
harder for users to figure out which firmware they need. This especially
is problematic with the UART attached BCM4324B5 where this leads to the
filename being just "BCM.hcd".

Add the 2 missing devices to subver tables. This has been tested on:

1. A Dell XPS15 9550 where this makes btbcm.c try to load
"BCM20703A1-0a5c-6410.hcd" before it tries to load "BCM-0a5c-6410.hcd".

2. A Thinkpad 8 where this makes btbcm.c try to load
"BCM4324B5.hcd" before it tries to load "BCM.hcd"

Signed-off-by: Hans de Goede <hdegoede@redhat.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btbcm.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c
index 739ba1200f5d..df7a8a22e53c 100644
--- a/drivers/bluetooth/btbcm.c
+++ b/drivers/bluetooth/btbcm.c
@@ -392,6 +392,7 @@ static const struct bcm_subver_table bcm_uart_subver_table[] = {
 	{ 0x410e, "BCM43341B0"	},	/* 002.001.014 */
 	{ 0x4204, "BCM2076B1"	},	/* 002.002.004 */
 	{ 0x4406, "BCM4324B3"	},	/* 002.004.006 */
+	{ 0x4606, "BCM4324B5"	},	/* 002.006.006 */
 	{ 0x6109, "BCM4335C0"	},	/* 003.001.009 */
 	{ 0x610c, "BCM4354"	},	/* 003.001.012 */
 	{ 0x2122, "BCM4343A0"	},	/* 001.001.034 */
@@ -407,6 +408,7 @@ static const struct bcm_subver_table bcm_uart_subver_table[] = {
 };
 
 static const struct bcm_subver_table bcm_usb_subver_table[] = {
+	{ 0x2105, "BCM20703A1"	},	/* 001.001.005 */
 	{ 0x210b, "BCM43142A0"	},	/* 001.001.011 */
 	{ 0x2112, "BCM4314A0"	},	/* 001.001.018 */
 	{ 0x2118, "BCM20702A0"	},	/* 001.001.024 */
-- 
cgit v1.2.3-59-g8ed1b


From 86b956de119c09818d0aabaf668280d8e4bd0d4b Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 20 Apr 2020 19:27:41 +0300
Subject: net: mscc: ocelot: support matching on EtherType

Currently, the filter's protocol is ignored except for a few special
cases (IPv4 and IPv6).

The EtherType can be matched inside VCAP IS2 by using a MAC_ETYPE key.
So there are 2 cases in which EtherType matches are supported:

  - As part of a larger MAC_ETYPE rule, such as:

    tc filter add dev swp0 ingress protocol ip \
            flower skip_sw src_mac 42:be:24:9b:76:20 action drop

  - Standalone (matching on protocol only):

    tc filter add dev swp0 ingress protocol arp \
            flower skip_sw action drop

As before, if the protocol is not specified, is it implicitly "all" and
the EtherType mask in the MAC_ETYPE half key is set to zero.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mscc/ocelot_flower.c | 22 +++++++++++++++++++---
 1 file changed, 19 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c
index 954cb67eeaa2..67f0f5455ff0 100644
--- a/drivers/net/ethernet/mscc/ocelot_flower.c
+++ b/drivers/net/ethernet/mscc/ocelot_flower.c
@@ -51,6 +51,8 @@ static int ocelot_flower_parse(struct flow_cls_offload *f,
 {
 	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
 	struct flow_dissector *dissector = rule->match.dissector;
+	u16 proto = ntohs(f->common.protocol);
+	bool match_protocol = true;
 
 	if (dissector->used_keys &
 	    ~(BIT(FLOW_DISSECTOR_KEY_CONTROL) |
@@ -71,7 +73,6 @@ static int ocelot_flower_parse(struct flow_cls_offload *f,
 
 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
 		struct flow_match_eth_addrs match;
-		u16 proto = ntohs(f->common.protocol);
 
 		/* The hw support mac matches only for MAC_ETYPE key,
 		 * therefore if other matches(port, tcp flags, etc) are added
@@ -114,6 +115,7 @@ static int ocelot_flower_parse(struct flow_cls_offload *f,
 				match.key->ip_proto;
 			ace->frame.ipv4.proto.mask[0] =
 				match.mask->ip_proto;
+			match_protocol = false;
 		}
 		if (ntohs(match.key->n_proto) == ETH_P_IPV6) {
 			ace->type = OCELOT_ACE_TYPE_IPV6;
@@ -121,11 +123,12 @@ static int ocelot_flower_parse(struct flow_cls_offload *f,
 				match.key->ip_proto;
 			ace->frame.ipv6.proto.mask[0] =
 				match.mask->ip_proto;
+			match_protocol = false;
 		}
 	}
 
 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV4_ADDRS) &&
-	    ntohs(f->common.protocol) == ETH_P_IP) {
+	    proto == ETH_P_IP) {
 		struct flow_match_ipv4_addrs match;
 		u8 *tmp;
 
@@ -141,10 +144,11 @@ static int ocelot_flower_parse(struct flow_cls_offload *f,
 
 		tmp = &ace->frame.ipv4.dip.mask.addr[0];
 		memcpy(tmp, &match.mask->dst, 4);
+		match_protocol = false;
 	}
 
 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_IPV6_ADDRS) &&
-	    ntohs(f->common.protocol) == ETH_P_IPV6) {
+	    proto == ETH_P_IPV6) {
 		return -EOPNOTSUPP;
 	}
 
@@ -156,6 +160,7 @@ static int ocelot_flower_parse(struct flow_cls_offload *f,
 		ace->frame.ipv4.sport.mask = ntohs(match.mask->src);
 		ace->frame.ipv4.dport.value = ntohs(match.key->dst);
 		ace->frame.ipv4.dport.mask = ntohs(match.mask->dst);
+		match_protocol = false;
 	}
 
 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
@@ -167,9 +172,20 @@ static int ocelot_flower_parse(struct flow_cls_offload *f,
 		ace->vlan.vid.mask = match.mask->vlan_id;
 		ace->vlan.pcp.value[0] = match.key->vlan_priority;
 		ace->vlan.pcp.mask[0] = match.mask->vlan_priority;
+		match_protocol = false;
 	}
 
 finished_key_parsing:
+	if (match_protocol && proto != ETH_P_ALL) {
+		/* TODO: support SNAP, LLC etc */
+		if (proto < ETH_P_802_3_MIN)
+			return -EOPNOTSUPP;
+		ace->type = OCELOT_ACE_TYPE_ETYPE;
+		*(u16 *)ace->frame.etype.etype.value = htons(proto);
+		*(u16 *)ace->frame.etype.etype.mask = 0xffff;
+	}
+	/* else, a rule of type OCELOT_ACE_TYPE_ANY is implicitly added */
+
 	ace->prio = f->common.prio;
 	ace->id = f->cookie;
 	return ocelot_flower_parse_action(f, ace);
-- 
cgit v1.2.3-59-g8ed1b


From 7dec902f4fc0cf1162e18030f2598440e311a2d2 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 20 Apr 2020 19:27:42 +0300
Subject: net: mscc: ocelot: refine the ocelot_ace_is_problematic_mac_etype
 function

The commit mentioned below was a bit too harsh, and while it restricted
the invalid key combinations which are known to not work, such as:

tc filter add dev swp0 ingress proto ip \
      flower src_ip 192.0.2.1 action drop
tc filter add dev swp0 ingress proto all \
      flower src_mac 00:11:22:33:44:55 action drop

it also restricted some which still should work, such as:

tc filter add dev swp0 ingress proto ip \
      flower src_ip 192.0.2.1 action drop
tc filter add dev swp0 ingress proto 0x22f0 \
      flower src_mac 00:11:22:33:44:55 action drop

What actually does not match "sanely" is a MAC_ETYPE rule on frames
having an EtherType of ARP, IPv4, IPv6, in addition to SNAP and OAM
frames (which the ocelot tc-flower implementation does not parse yet, so
the function might need to be revisited again in the future).

So just make the function recognize the problematic MAC_ETYPE rules by
EtherType - thus the VCAP IS2 can be forced to match even on those
packets.

This patch makes it possible for IP rules to live on a port together
with MAC_ETYPE rules that are non-all, non-arp, non-ip and non-ipv6.

Fixes: d4d0cb741d7b ("net: mscc: ocelot: deal with problematic MAC_ETYPE VCAP IS2 rules")
Reported-by: Allan W. Nielsen <allan.nielsen@microchip.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mscc/ocelot_ace.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mscc/ocelot_ace.c b/drivers/net/ethernet/mscc/ocelot_ace.c
index 8a2f7d13ef6d..dfd82a3baab2 100644
--- a/drivers/net/ethernet/mscc/ocelot_ace.c
+++ b/drivers/net/ethernet/mscc/ocelot_ace.c
@@ -739,14 +739,24 @@ static void ocelot_match_all_as_mac_etype(struct ocelot *ocelot, int port,
 
 static bool ocelot_ace_is_problematic_mac_etype(struct ocelot_ace_rule *ace)
 {
+	u16 proto, mask;
+
 	if (ace->type != OCELOT_ACE_TYPE_ETYPE)
 		return false;
-	if (ether_addr_to_u64(ace->frame.etype.dmac.value) &
-	    ether_addr_to_u64(ace->frame.etype.dmac.mask))
+
+	proto = ntohs(*(u16 *)ace->frame.etype.etype.value);
+	mask = ntohs(*(u16 *)ace->frame.etype.etype.mask);
+
+	/* ETH_P_ALL match, so all protocols below are included */
+	if (mask == 0)
 		return true;
-	if (ether_addr_to_u64(ace->frame.etype.smac.value) &
-	    ether_addr_to_u64(ace->frame.etype.smac.mask))
+	if (proto == ETH_P_ARP)
 		return true;
+	if (proto == ETH_P_IP)
+		return true;
+	if (proto == ETH_P_IPV6)
+		return true;
+
 	return false;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 4faa2e06433fbba16a13a21e1380ee4d246b95fc Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Mon, 20 Apr 2020 19:27:43 +0300
Subject: net: mscc: ocelot: lift protocol restriction for flow_match_eth_addrs
 keys

An attempt was made in commit fe3490e6107e ("net: mscc: ocelot: Hardware
ofload for tc flower filter") to avoid clashes between MAC_ETYPE rules
and IP rules. Because the protocol blacklist should have included
ETH_P_ALL too, it created some confusion, but now the situation should
be dealt with a bit better by the patch immediately previous to this one
("net: mscc: ocelot: refine the ocelot_ace_is_problematic_mac_etype
function").

So now we can remove that check. MAC_ETYPE rules with a protocol of
ETH_P_IP, ETH_P_IPV6, ETH_P_ARP and ETH_P_ALL _are_ supported, with some
restrictions regarding per-port exclusivity which are enforced now.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mscc/ocelot_flower.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/net/ethernet/mscc/ocelot_flower.c b/drivers/net/ethernet/mscc/ocelot_flower.c
index 67f0f5455ff0..5ce172e22b43 100644
--- a/drivers/net/ethernet/mscc/ocelot_flower.c
+++ b/drivers/net/ethernet/mscc/ocelot_flower.c
@@ -87,11 +87,6 @@ static int ocelot_flower_parse(struct flow_cls_offload *f,
 		     BIT(FLOW_DISSECTOR_KEY_CONTROL)))
 			return -EOPNOTSUPP;
 
-		if (proto == ETH_P_IP ||
-		    proto == ETH_P_IPV6 ||
-		    proto == ETH_P_ARP)
-			return -EOPNOTSUPP;
-
 		flow_rule_match_eth_addrs(rule, &match);
 		ace->type = OCELOT_ACE_TYPE_ETYPE;
 		ether_addr_copy(ace->frame.etype.dmac.value,
-- 
cgit v1.2.3-59-g8ed1b


From f42ceca226cadf2c27709499af8643acd4281cd7 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Mon, 20 Apr 2020 11:07:21 -0700
Subject: dt-bindings: net: Correct description of 'broken-turn-around'

The turn around bytes (2) are placed between the control phase of the
MDIO transaction and the data phase, correct the wording to be more
exact.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/ethernet-phy.yaml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/net/ethernet-phy.yaml b/Documentation/devicetree/bindings/net/ethernet-phy.yaml
index 5aa141ccc113..9b1f1147ca36 100644
--- a/Documentation/devicetree/bindings/net/ethernet-phy.yaml
+++ b/Documentation/devicetree/bindings/net/ethernet-phy.yaml
@@ -81,7 +81,8 @@ properties:
     $ref: /schemas/types.yaml#definitions/flag
     description:
       If set, indicates the PHY device does not correctly release
-      the turn around line low at the end of a MDIO transaction.
+      the turn around line low at end of the control phase of the
+      MDIO transaction.
 
   enet-phy-lane-swap:
     $ref: /schemas/types.yaml#definitions/flag
-- 
cgit v1.2.3-59-g8ed1b


From b92d905f2c9c5e66868fae26ba9ed32352df0f5a Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Mon, 20 Apr 2020 11:07:22 -0700
Subject: dt-bindings: net: mdio: Document common properties

Some of the properties pertaining to the broken turn around or resets
were only documented in ethernet-phy.yaml while they are applicable
across all MDIO devices and not Ethernet PHYs specifically which are a
superset.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/mdio.yaml | 28 +++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/Documentation/devicetree/bindings/net/mdio.yaml b/Documentation/devicetree/bindings/net/mdio.yaml
index cd6c6ae6dabb..37b9579b5651 100644
--- a/Documentation/devicetree/bindings/net/mdio.yaml
+++ b/Documentation/devicetree/bindings/net/mdio.yaml
@@ -62,6 +62,34 @@ patternProperties:
         description:
           The ID number for the PHY.
 
+      broken-turn-around:
+        $ref: /schemas/types.yaml#definitions/flag
+        description:
+          If set, indicates the MDIO device does not correctly release
+          the turn around line low at end of the control phase of the
+          MDIO transaction.
+
+      resets:
+        maxItems: 1
+
+      reset-names:
+        const: phy
+
+      reset-gpios:
+        maxItems: 1
+        description:
+          The GPIO phandle and specifier for the MDIO reset signal.
+
+      reset-assert-us:
+        description:
+          Delay after the reset was asserted in microseconds. If this
+          property is missing the delay will be skipped.
+
+      reset-deassert-us:
+        description:
+          Delay after the reset was deasserted in microseconds. If
+          this property is missing the delay will be skipped.
+
     required:
       - reg
 
-- 
cgit v1.2.3-59-g8ed1b


From 630c3ff8c3d554054229b8c1c3d3a2b9465ffa64 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Mon, 20 Apr 2020 11:07:23 -0700
Subject: dt-bindings: net: mdio: Make descriptions more general

A number of descriptions assume a PHY device, but since this binding
describes a MDIO bus which can have different kinds of MDIO devices
attached to it, rephrase some descriptions to be more general in that
regard.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/mdio.yaml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/mdio.yaml b/Documentation/devicetree/bindings/net/mdio.yaml
index 37b9579b5651..d6a3bf8550eb 100644
--- a/Documentation/devicetree/bindings/net/mdio.yaml
+++ b/Documentation/devicetree/bindings/net/mdio.yaml
@@ -31,13 +31,13 @@ properties:
     maxItems: 1
     description:
       The phandle and specifier for the GPIO that controls the RESET
-      lines of all PHYs on that MDIO bus.
+      lines of all devices on that MDIO bus.
 
   reset-delay-us:
     description:
-      RESET pulse width in microseconds. It applies to all PHY devices
-      and must therefore be appropriately determined based on all PHY
-      requirements (maximum value of all per-PHY RESET pulse widths).
+      RESET pulse width in microseconds. It applies to all MDIO devices
+      and must therefore be appropriately determined based on all devices
+      requirements (maximum value of all per-device RESET pulse widths).
 
   clock-frequency:
     description:
@@ -60,7 +60,7 @@ patternProperties:
         minimum: 0
         maximum: 31
         description:
-          The ID number for the PHY.
+          The ID number for the device.
 
       broken-turn-around:
         $ref: /schemas/types.yaml#definitions/flag
-- 
cgit v1.2.3-59-g8ed1b


From 0a32f1ff2a2e41404deaba5fb32f8a0d640c0974 Mon Sep 17 00:00:00 2001
From: Michael Walle <michael@walle.cc>
Date: Mon, 20 Apr 2020 20:21:11 +0200
Subject: net: phy: broadcom: add helper to write/read RDB registers

RDB (Register Data Base) registers are used on newer Broadcom PHYs. Add
helper to read, write and modify these registers.

Signed-off-by: Michael Walle <michael@walle.cc>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/bcm-phy-lib.c | 80 +++++++++++++++++++++++++++++++++++++++++++
 drivers/net/phy/bcm-phy-lib.h |  9 +++++
 include/linux/brcmphy.h       |  3 ++
 3 files changed, 92 insertions(+)

diff --git a/drivers/net/phy/bcm-phy-lib.c b/drivers/net/phy/bcm-phy-lib.c
index e77b274a09fd..d5f9a2701989 100644
--- a/drivers/net/phy/bcm-phy-lib.c
+++ b/drivers/net/phy/bcm-phy-lib.c
@@ -155,6 +155,86 @@ int bcm_phy_write_shadow(struct phy_device *phydev, u16 shadow,
 }
 EXPORT_SYMBOL_GPL(bcm_phy_write_shadow);
 
+int __bcm_phy_read_rdb(struct phy_device *phydev, u16 rdb)
+{
+	int val;
+
+	val = __phy_write(phydev, MII_BCM54XX_RDB_ADDR, rdb);
+	if (val < 0)
+		return val;
+
+	return __phy_read(phydev, MII_BCM54XX_RDB_DATA);
+}
+EXPORT_SYMBOL_GPL(__bcm_phy_read_rdb);
+
+int bcm_phy_read_rdb(struct phy_device *phydev, u16 rdb)
+{
+	int ret;
+
+	phy_lock_mdio_bus(phydev);
+	ret = __bcm_phy_read_rdb(phydev, rdb);
+	phy_unlock_mdio_bus(phydev);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(bcm_phy_read_rdb);
+
+int __bcm_phy_write_rdb(struct phy_device *phydev, u16 rdb, u16 val)
+{
+	int ret;
+
+	ret = __phy_write(phydev, MII_BCM54XX_RDB_ADDR, rdb);
+	if (ret < 0)
+		return ret;
+
+	return __phy_write(phydev, MII_BCM54XX_RDB_DATA, val);
+}
+EXPORT_SYMBOL_GPL(__bcm_phy_write_rdb);
+
+int bcm_phy_write_rdb(struct phy_device *phydev, u16 rdb, u16 val)
+{
+	int ret;
+
+	phy_lock_mdio_bus(phydev);
+	ret = __bcm_phy_write_rdb(phydev, rdb, val);
+	phy_unlock_mdio_bus(phydev);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(bcm_phy_write_rdb);
+
+int __bcm_phy_modify_rdb(struct phy_device *phydev, u16 rdb, u16 mask, u16 set)
+{
+	int new, ret;
+
+	ret = __phy_write(phydev, MII_BCM54XX_RDB_ADDR, rdb);
+	if (ret < 0)
+		return ret;
+
+	ret = __phy_read(phydev, MII_BCM54XX_RDB_DATA);
+	if (ret < 0)
+		return ret;
+
+	new = (ret & ~mask) | set;
+	if (new == ret)
+		return 0;
+
+	return __phy_write(phydev, MII_BCM54XX_RDB_DATA, new);
+}
+EXPORT_SYMBOL_GPL(__bcm_phy_modify_rdb);
+
+int bcm_phy_modify_rdb(struct phy_device *phydev, u16 rdb, u16 mask, u16 set)
+{
+	int ret;
+
+	phy_lock_mdio_bus(phydev);
+	ret = __bcm_phy_modify_rdb(phydev, rdb, mask, set);
+	phy_unlock_mdio_bus(phydev);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(bcm_phy_modify_rdb);
+
 int bcm_phy_enable_apd(struct phy_device *phydev, bool dll_pwr_down)
 {
 	int val;
diff --git a/drivers/net/phy/bcm-phy-lib.h b/drivers/net/phy/bcm-phy-lib.h
index 129df819be8c..4d3de91cda6c 100644
--- a/drivers/net/phy/bcm-phy-lib.h
+++ b/drivers/net/phy/bcm-phy-lib.h
@@ -48,6 +48,15 @@ int bcm_phy_write_shadow(struct phy_device *phydev, u16 shadow,
 			 u16 val);
 int bcm_phy_read_shadow(struct phy_device *phydev, u16 shadow);
 
+int __bcm_phy_write_rdb(struct phy_device *phydev, u16 rdb, u16 val);
+int bcm_phy_write_rdb(struct phy_device *phydev, u16 rdb, u16 val);
+int __bcm_phy_read_rdb(struct phy_device *phydev, u16 rdb);
+int bcm_phy_read_rdb(struct phy_device *phydev, u16 rdb);
+int __bcm_phy_modify_rdb(struct phy_device *phydev, u16 rdb, u16 mask,
+			 u16 set);
+int bcm_phy_modify_rdb(struct phy_device *phydev, u16 rdb, u16 mask,
+		       u16 set);
+
 int bcm_phy_ack_intr(struct phy_device *phydev);
 int bcm_phy_config_intr(struct phy_device *phydev);
 
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 7e1d857c8468..897b69309964 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -115,6 +115,9 @@
 #define MII_BCM54XX_SHD_VAL(x)	((x & 0x1f) << 10)
 #define MII_BCM54XX_SHD_DATA(x)	((x & 0x3ff) << 0)
 
+#define MII_BCM54XX_RDB_ADDR	0x1e
+#define MII_BCM54XX_RDB_DATA	0x1f
+
 /*
  * AUXILIARY CONTROL SHADOW ACCESS REGISTERS.  (PHY REG 0x18)
  */
-- 
cgit v1.2.3-59-g8ed1b


From 6937602ed3f9ebd46ed6a6b5e609c0ae4ed99008 Mon Sep 17 00:00:00 2001
From: Michael Walle <michael@walle.cc>
Date: Mon, 20 Apr 2020 20:21:12 +0200
Subject: net: phy: add Broadcom BCM54140 support

The Broadcom BCM54140 is a Quad SGMII/QSGMII Copper/Fiber Gigabit
Ethernet transceiver.

This also adds support for tunables to set and get downshift and
energy detect auto power-down.

The PHY has four ports and each port has its own PHY address.
There are per-port registers as well as global registers.
Unfortunately, the global registers can only be accessed by reading
and writing from/to the PHY address of the first port. Further,
there is no way to find out what port you actually are by just
reading the per-port registers. We therefore, have to scan the
bus on the PHY probe to determine the port and thus what address
we need to access the global registers.

Signed-off-by: Michael Walle <michael@walle.cc>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/Kconfig    |  10 +
 drivers/net/phy/Makefile   |   1 +
 drivers/net/phy/bcm54140.c | 481 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/brcmphy.h    |   1 +
 4 files changed, 493 insertions(+)
 create mode 100644 drivers/net/phy/bcm54140.c

diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index 3fa33d27eeba..cb7936b577de 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -346,6 +346,16 @@ config BROADCOM_PHY
 	  Currently supports the BCM5411, BCM5421, BCM5461, BCM54616S, BCM5464,
 	  BCM5481, BCM54810 and BCM5482 PHYs.
 
+config BCM54140_PHY
+	tristate "Broadcom BCM54140 PHY"
+	depends on PHYLIB
+	select BCM_NET_PHYLIB
+	help
+	  Support the Broadcom BCM54140 Quad SGMII/QSGMII PHY.
+
+	  This driver also supports the hardware monitoring of this PHY and
+	  exposes voltage and temperature sensors.
+
 config BCM84881_PHY
 	tristate "Broadcom BCM84881 PHY"
 	depends on PHYLIB
diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile
index 2f5c7093a65b..cd345b75d127 100644
--- a/drivers/net/phy/Makefile
+++ b/drivers/net/phy/Makefile
@@ -68,6 +68,7 @@ obj-$(CONFIG_BCM87XX_PHY)	+= bcm87xx.o
 obj-$(CONFIG_BCM_CYGNUS_PHY)	+= bcm-cygnus.o
 obj-$(CONFIG_BCM_NET_PHYLIB)	+= bcm-phy-lib.o
 obj-$(CONFIG_BROADCOM_PHY)	+= broadcom.o
+obj-$(CONFIG_BCM54140_PHY)	+= bcm54140.o
 obj-$(CONFIG_BCM84881_PHY)	+= bcm84881.o
 obj-$(CONFIG_CICADA_PHY)	+= cicada.o
 obj-$(CONFIG_CORTINA_PHY)	+= cortina.o
diff --git a/drivers/net/phy/bcm54140.c b/drivers/net/phy/bcm54140.c
new file mode 100644
index 000000000000..0eeb60de67f8
--- /dev/null
+++ b/drivers/net/phy/bcm54140.c
@@ -0,0 +1,481 @@
+// SPDX-License-Identifier: GPL-2.0+
+/* Broadcom BCM54140 Quad SGMII/QSGMII Copper/Fiber Gigabit PHY
+ *
+ * Copyright (c) 2020 Michael Walle <michael@walle.cc>
+ */
+
+#include <linux/bitfield.h>
+#include <linux/brcmphy.h>
+#include <linux/module.h>
+#include <linux/phy.h>
+
+#include "bcm-phy-lib.h"
+
+/* RDB per-port registers
+ */
+#define BCM54140_RDB_ISR		0x00a	/* interrupt status */
+#define BCM54140_RDB_IMR		0x00b	/* interrupt mask */
+#define  BCM54140_RDB_INT_LINK		BIT(1)	/* link status changed */
+#define  BCM54140_RDB_INT_SPEED		BIT(2)	/* link speed change */
+#define  BCM54140_RDB_INT_DUPLEX	BIT(3)	/* duplex mode changed */
+#define BCM54140_RDB_SPARE1		0x012	/* spare control 1 */
+#define  BCM54140_RDB_SPARE1_LSLM	BIT(2)	/* link speed LED mode */
+#define BCM54140_RDB_SPARE2		0x014	/* spare control 2 */
+#define  BCM54140_RDB_SPARE2_WS_RTRY_DIS BIT(8) /* wirespeed retry disable */
+#define  BCM54140_RDB_SPARE2_WS_RTRY_LIMIT GENMASK(4, 2) /* retry limit */
+#define BCM54140_RDB_SPARE3		0x015	/* spare control 3 */
+#define  BCM54140_RDB_SPARE3_BIT0	BIT(0)
+#define BCM54140_RDB_LED_CTRL		0x019	/* LED control */
+#define  BCM54140_RDB_LED_CTRL_ACTLINK0	BIT(4)
+#define  BCM54140_RDB_LED_CTRL_ACTLINK1	BIT(8)
+#define BCM54140_RDB_C_APWR		0x01a	/* auto power down control */
+#define  BCM54140_RDB_C_APWR_SINGLE_PULSE	BIT(8)	/* single pulse */
+#define  BCM54140_RDB_C_APWR_APD_MODE_DIS	0 /* ADP disable */
+#define  BCM54140_RDB_C_APWR_APD_MODE_EN	1 /* ADP enable */
+#define  BCM54140_RDB_C_APWR_APD_MODE_DIS2	2 /* ADP disable */
+#define  BCM54140_RDB_C_APWR_APD_MODE_EN_ANEG	3 /* ADP enable w/ aneg */
+#define  BCM54140_RDB_C_APWR_APD_MODE_MASK	GENMASK(6, 5)
+#define  BCM54140_RDB_C_APWR_SLP_TIM_MASK BIT(4)/* sleep timer */
+#define  BCM54140_RDB_C_APWR_SLP_TIM_2_7 0	/* 2.7s */
+#define  BCM54140_RDB_C_APWR_SLP_TIM_5_4 1	/* 5.4s */
+#define BCM54140_RDB_C_PWR		0x02a	/* copper power control */
+#define  BCM54140_RDB_C_PWR_ISOLATE	BIT(5)	/* super isolate mode */
+#define BCM54140_RDB_C_MISC_CTRL	0x02f	/* misc copper control */
+#define  BCM54140_RDB_C_MISC_CTRL_WS_EN BIT(4)	/* wirespeed enable */
+
+/* RDB global registers
+ */
+#define BCM54140_RDB_TOP_IMR		0x82d	/* interrupt mask */
+#define  BCM54140_RDB_TOP_IMR_PORT0	BIT(4)
+#define  BCM54140_RDB_TOP_IMR_PORT1	BIT(5)
+#define  BCM54140_RDB_TOP_IMR_PORT2	BIT(6)
+#define  BCM54140_RDB_TOP_IMR_PORT3	BIT(7)
+
+#define BCM54140_DEFAULT_DOWNSHIFT 5
+#define BCM54140_MAX_DOWNSHIFT 9
+
+struct bcm54140_priv {
+	int port;
+	int base_addr;
+};
+
+static int bcm54140_base_read_rdb(struct phy_device *phydev, u16 rdb)
+{
+	struct bcm54140_priv *priv = phydev->priv;
+	struct mii_bus *bus = phydev->mdio.bus;
+	int ret;
+
+	mutex_lock(&bus->mdio_lock);
+	ret = __mdiobus_write(bus, priv->base_addr, MII_BCM54XX_RDB_ADDR, rdb);
+	if (ret < 0)
+		goto out;
+
+	ret = __mdiobus_read(bus, priv->base_addr, MII_BCM54XX_RDB_DATA);
+
+out:
+	mutex_unlock(&bus->mdio_lock);
+	return ret;
+}
+
+static int bcm54140_base_write_rdb(struct phy_device *phydev,
+				   u16 rdb, u16 val)
+{
+	struct bcm54140_priv *priv = phydev->priv;
+	struct mii_bus *bus = phydev->mdio.bus;
+	int ret;
+
+	mutex_lock(&bus->mdio_lock);
+	ret = __mdiobus_write(bus, priv->base_addr, MII_BCM54XX_RDB_ADDR, rdb);
+	if (ret < 0)
+		goto out;
+
+	ret = __mdiobus_write(bus, priv->base_addr, MII_BCM54XX_RDB_DATA, val);
+
+out:
+	mutex_unlock(&bus->mdio_lock);
+	return ret;
+}
+
+/* Under some circumstances a core PLL may not lock, this will then prevent
+ * a successful link establishment. Restart the PLL after the voltages are
+ * stable to workaround this issue.
+ */
+static int bcm54140_b0_workaround(struct phy_device *phydev)
+{
+	int spare3;
+	int ret;
+
+	spare3 = bcm_phy_read_rdb(phydev, BCM54140_RDB_SPARE3);
+	if (spare3 < 0)
+		return spare3;
+
+	spare3 &= ~BCM54140_RDB_SPARE3_BIT0;
+
+	ret = bcm_phy_write_rdb(phydev, BCM54140_RDB_SPARE3, spare3);
+	if (ret)
+		return ret;
+
+	ret = phy_modify(phydev, MII_BMCR, 0, BMCR_PDOWN);
+	if (ret)
+		return ret;
+
+	ret = phy_modify(phydev, MII_BMCR, BMCR_PDOWN, 0);
+	if (ret)
+		return ret;
+
+	spare3 |= BCM54140_RDB_SPARE3_BIT0;
+
+	return bcm_phy_write_rdb(phydev, BCM54140_RDB_SPARE3, spare3);
+}
+
+/* The BCM54140 is a quad PHY where only the first port has access to the
+ * global register. Thus we need to find out its PHY address.
+ *
+ */
+static int bcm54140_get_base_addr_and_port(struct phy_device *phydev)
+{
+	struct bcm54140_priv *priv = phydev->priv;
+	struct mii_bus *bus = phydev->mdio.bus;
+	int addr, min_addr, max_addr;
+	int step = 1;
+	u32 phy_id;
+	int tmp;
+
+	min_addr = phydev->mdio.addr;
+	max_addr = phydev->mdio.addr;
+	addr = phydev->mdio.addr;
+
+	/* We scan forward and backwards and look for PHYs which have the
+	 * same phy_id like we do. Step 1 will scan forward, step 2
+	 * backwards. Once we are finished, we have a min_addr and
+	 * max_addr which resembles the range of PHY addresses of the same
+	 * type of PHY. There is one caveat; there may be many PHYs of
+	 * the same type, but we know that each PHY takes exactly 4
+	 * consecutive addresses. Therefore we can deduce our offset
+	 * to the base address of this quad PHY.
+	 */
+
+	while (1) {
+		if (step == 3) {
+			break;
+		} else if (step == 1) {
+			max_addr = addr;
+			addr++;
+		} else {
+			min_addr = addr;
+			addr--;
+		}
+
+		if (addr < 0 || addr >= PHY_MAX_ADDR) {
+			addr = phydev->mdio.addr;
+			step++;
+			continue;
+		}
+
+		/* read the PHY id */
+		tmp = mdiobus_read(bus, addr, MII_PHYSID1);
+		if (tmp < 0)
+			return tmp;
+		phy_id = tmp << 16;
+		tmp = mdiobus_read(bus, addr, MII_PHYSID2);
+		if (tmp < 0)
+			return tmp;
+		phy_id |= tmp;
+
+		/* see if it is still the same PHY */
+		if ((phy_id & phydev->drv->phy_id_mask) !=
+		    (phydev->drv->phy_id & phydev->drv->phy_id_mask)) {
+			addr = phydev->mdio.addr;
+			step++;
+		}
+	}
+
+	/* The range we get should be a multiple of four. Please note that both
+	 * the min_addr and max_addr are inclusive. So we have to add one if we
+	 * subtract them.
+	 */
+	if ((max_addr - min_addr + 1) % 4) {
+		dev_err(&phydev->mdio.dev,
+			"Detected Quad PHY IDs %d..%d doesn't make sense.\n",
+			min_addr, max_addr);
+		return -EINVAL;
+	}
+
+	priv->port = (phydev->mdio.addr - min_addr) % 4;
+	priv->base_addr = phydev->mdio.addr - priv->port;
+
+	return 0;
+}
+
+static int bcm54140_probe(struct phy_device *phydev)
+{
+	struct bcm54140_priv *priv;
+	int ret;
+
+	priv = devm_kzalloc(&phydev->mdio.dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	phydev->priv = priv;
+
+	ret = bcm54140_get_base_addr_and_port(phydev);
+	if (ret)
+		return ret;
+
+	phydev_dbg(phydev, "probed (port %d, base PHY address %d)\n",
+		   priv->port, priv->base_addr);
+
+	return 0;
+}
+
+static int bcm54140_config_init(struct phy_device *phydev)
+{
+	u16 reg = 0xffff;
+	int ret;
+
+	/* Apply hardware errata */
+	ret = bcm54140_b0_workaround(phydev);
+	if (ret)
+		return ret;
+
+	/* Unmask events we are interested in. */
+	reg &= ~(BCM54140_RDB_INT_DUPLEX |
+		 BCM54140_RDB_INT_SPEED |
+		 BCM54140_RDB_INT_LINK);
+	ret = bcm_phy_write_rdb(phydev, BCM54140_RDB_IMR, reg);
+	if (ret)
+		return ret;
+
+	/* LED1=LINKSPD[1], LED2=LINKSPD[2], LED3=LINK/ACTIVITY */
+	ret = bcm_phy_modify_rdb(phydev, BCM54140_RDB_SPARE1,
+				 0, BCM54140_RDB_SPARE1_LSLM);
+	if (ret)
+		return ret;
+
+	ret = bcm_phy_modify_rdb(phydev, BCM54140_RDB_LED_CTRL,
+				 0, BCM54140_RDB_LED_CTRL_ACTLINK0);
+	if (ret)
+		return ret;
+
+	/* disable super isolate mode */
+	return bcm_phy_modify_rdb(phydev, BCM54140_RDB_C_PWR,
+				  BCM54140_RDB_C_PWR_ISOLATE, 0);
+}
+
+int bcm54140_did_interrupt(struct phy_device *phydev)
+{
+	int ret;
+
+	ret = bcm_phy_read_rdb(phydev, BCM54140_RDB_ISR);
+
+	return (ret < 0) ? 0 : ret;
+}
+
+int bcm54140_ack_intr(struct phy_device *phydev)
+{
+	int reg;
+
+	/* clear pending interrupts */
+	reg = bcm_phy_read_rdb(phydev, BCM54140_RDB_ISR);
+	if (reg < 0)
+		return reg;
+
+	return 0;
+}
+
+int bcm54140_config_intr(struct phy_device *phydev)
+{
+	struct bcm54140_priv *priv = phydev->priv;
+	static const u16 port_to_imr_bit[] = {
+		BCM54140_RDB_TOP_IMR_PORT0, BCM54140_RDB_TOP_IMR_PORT1,
+		BCM54140_RDB_TOP_IMR_PORT2, BCM54140_RDB_TOP_IMR_PORT3,
+	};
+	int reg;
+
+	if (priv->port >= ARRAY_SIZE(port_to_imr_bit))
+		return -EINVAL;
+
+	reg = bcm54140_base_read_rdb(phydev, BCM54140_RDB_TOP_IMR);
+	if (reg < 0)
+		return reg;
+
+	if (phydev->interrupts == PHY_INTERRUPT_ENABLED)
+		reg &= ~port_to_imr_bit[priv->port];
+	else
+		reg |= port_to_imr_bit[priv->port];
+
+	return bcm54140_base_write_rdb(phydev, BCM54140_RDB_TOP_IMR, reg);
+}
+
+static int bcm54140_get_downshift(struct phy_device *phydev, u8 *data)
+{
+	int val;
+
+	val = bcm_phy_read_rdb(phydev, BCM54140_RDB_C_MISC_CTRL);
+	if (val < 0)
+		return val;
+
+	if (!(val & BCM54140_RDB_C_MISC_CTRL_WS_EN)) {
+		*data = DOWNSHIFT_DEV_DISABLE;
+		return 0;
+	}
+
+	val = bcm_phy_read_rdb(phydev, BCM54140_RDB_SPARE2);
+	if (val < 0)
+		return val;
+
+	if (val & BCM54140_RDB_SPARE2_WS_RTRY_DIS)
+		*data = 1;
+	else
+		*data = FIELD_GET(BCM54140_RDB_SPARE2_WS_RTRY_LIMIT, val) + 2;
+
+	return 0;
+}
+
+static int bcm54140_set_downshift(struct phy_device *phydev, u8 cnt)
+{
+	u16 mask, set;
+	int ret;
+
+	if (cnt > BCM54140_MAX_DOWNSHIFT && cnt != DOWNSHIFT_DEV_DEFAULT_COUNT)
+		return -EINVAL;
+
+	if (!cnt)
+		return bcm_phy_modify_rdb(phydev, BCM54140_RDB_C_MISC_CTRL,
+					  BCM54140_RDB_C_MISC_CTRL_WS_EN, 0);
+
+	if (cnt == DOWNSHIFT_DEV_DEFAULT_COUNT)
+		cnt = BCM54140_DEFAULT_DOWNSHIFT;
+
+	if (cnt == 1) {
+		mask = 0;
+		set = BCM54140_RDB_SPARE2_WS_RTRY_DIS;
+	} else {
+		mask = BCM54140_RDB_SPARE2_WS_RTRY_DIS;
+		mask |= BCM54140_RDB_SPARE2_WS_RTRY_LIMIT;
+		set = FIELD_PREP(BCM54140_RDB_SPARE2_WS_RTRY_LIMIT, cnt - 2);
+	}
+	ret = bcm_phy_modify_rdb(phydev, BCM54140_RDB_SPARE2,
+				 mask, set);
+	if (ret)
+		return ret;
+
+	return bcm_phy_modify_rdb(phydev, BCM54140_RDB_C_MISC_CTRL,
+				  0, BCM54140_RDB_C_MISC_CTRL_WS_EN);
+}
+
+static int bcm54140_get_edpd(struct phy_device *phydev, u16 *tx_interval)
+{
+	int val;
+
+	val = bcm_phy_read_rdb(phydev, BCM54140_RDB_C_APWR);
+	if (val < 0)
+		return val;
+
+	switch (FIELD_GET(BCM54140_RDB_C_APWR_APD_MODE_MASK, val)) {
+	case BCM54140_RDB_C_APWR_APD_MODE_DIS:
+	case BCM54140_RDB_C_APWR_APD_MODE_DIS2:
+		*tx_interval = ETHTOOL_PHY_EDPD_DISABLE;
+		break;
+	case BCM54140_RDB_C_APWR_APD_MODE_EN:
+	case BCM54140_RDB_C_APWR_APD_MODE_EN_ANEG:
+		switch (FIELD_GET(BCM54140_RDB_C_APWR_SLP_TIM_MASK, val)) {
+		case BCM54140_RDB_C_APWR_SLP_TIM_2_7:
+			*tx_interval = 2700;
+			break;
+		case BCM54140_RDB_C_APWR_SLP_TIM_5_4:
+			*tx_interval = 5400;
+			break;
+		}
+	}
+
+	return 0;
+}
+
+static int bcm54140_set_edpd(struct phy_device *phydev, u16 tx_interval)
+{
+	u16 mask, set;
+
+	mask = BCM54140_RDB_C_APWR_APD_MODE_MASK;
+	if (tx_interval == ETHTOOL_PHY_EDPD_DISABLE)
+		set = FIELD_PREP(BCM54140_RDB_C_APWR_APD_MODE_MASK,
+				 BCM54140_RDB_C_APWR_APD_MODE_DIS);
+	else
+		set = FIELD_PREP(BCM54140_RDB_C_APWR_APD_MODE_MASK,
+				 BCM54140_RDB_C_APWR_APD_MODE_EN_ANEG);
+
+	/* enable single pulse mode */
+	set |= BCM54140_RDB_C_APWR_SINGLE_PULSE;
+
+	/* set sleep timer */
+	mask |= BCM54140_RDB_C_APWR_SLP_TIM_MASK;
+	switch (tx_interval) {
+	case ETHTOOL_PHY_EDPD_DFLT_TX_MSECS:
+	case ETHTOOL_PHY_EDPD_DISABLE:
+	case 2700:
+		set |= BCM54140_RDB_C_APWR_SLP_TIM_2_7;
+		break;
+	case 5400:
+		set |= BCM54140_RDB_C_APWR_SLP_TIM_5_4;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return bcm_phy_modify_rdb(phydev, BCM54140_RDB_C_APWR, mask, set);
+}
+
+static int bcm54140_get_tunable(struct phy_device *phydev,
+				struct ethtool_tunable *tuna, void *data)
+{
+	switch (tuna->id) {
+	case ETHTOOL_PHY_DOWNSHIFT:
+		return bcm54140_get_downshift(phydev, data);
+	case ETHTOOL_PHY_EDPD:
+		return bcm54140_get_edpd(phydev, data);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int bcm54140_set_tunable(struct phy_device *phydev,
+				struct ethtool_tunable *tuna, const void *data)
+{
+	switch (tuna->id) {
+	case ETHTOOL_PHY_DOWNSHIFT:
+		return bcm54140_set_downshift(phydev, *(const u8 *)data);
+	case ETHTOOL_PHY_EDPD:
+		return bcm54140_set_edpd(phydev, *(const u16 *)data);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static struct phy_driver bcm54140_drivers[] = {
+	{
+		.phy_id         = PHY_ID_BCM54140,
+		.phy_id_mask    = 0xfffffff0,
+		.name           = "Broadcom BCM54140",
+		.features       = PHY_GBIT_FEATURES,
+		.config_init    = bcm54140_config_init,
+		.did_interrupt	= bcm54140_did_interrupt,
+		.ack_interrupt  = bcm54140_ack_intr,
+		.config_intr    = bcm54140_config_intr,
+		.probe		= bcm54140_probe,
+		.suspend	= genphy_suspend,
+		.resume		= genphy_resume,
+		.get_tunable	= bcm54140_get_tunable,
+		.set_tunable	= bcm54140_set_tunable,
+	},
+};
+module_phy_driver(bcm54140_drivers);
+
+static struct mdio_device_id __maybe_unused bcm54140_tbl[] = {
+	{ PHY_ID_BCM54140, 0xfffffff0 },
+	{ }
+};
+
+MODULE_AUTHOR("Michael Walle");
+MODULE_DESCRIPTION("Broadcom BCM54140 PHY driver");
+MODULE_DEVICE_TABLE(mdio, bcm54140_tbl);
+MODULE_LICENSE("GPL");
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 897b69309964..8be150e69c7c 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -25,6 +25,7 @@
 #define PHY_ID_BCM5461			0x002060c0
 #define PHY_ID_BCM54612E		0x03625e60
 #define PHY_ID_BCM54616S		0x03625d10
+#define PHY_ID_BCM54140			0xae025019
 #define PHY_ID_BCM57780			0x03625d90
 #define PHY_ID_BCM89610			0x03625cd0
 
-- 
cgit v1.2.3-59-g8ed1b


From 4406d36dfdf1fbd954400e16ffeb915c1907d58a Mon Sep 17 00:00:00 2001
From: Michael Walle <michael@walle.cc>
Date: Mon, 20 Apr 2020 20:21:13 +0200
Subject: net: phy: bcm54140: add hwmon support

The PHY supports monitoring its die temperature as well as two analog
voltages. Add support for it.

Signed-off-by: Michael Walle <michael@walle.cc>
Acked-by: Guenter Roeck <linux@roeck-us.net>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/hwmon/bcm54140.rst |  45 +++++
 Documentation/hwmon/index.rst    |   1 +
 drivers/net/phy/Kconfig          |   1 +
 drivers/net/phy/bcm54140.c       | 396 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 443 insertions(+)
 create mode 100644 Documentation/hwmon/bcm54140.rst

diff --git a/Documentation/hwmon/bcm54140.rst b/Documentation/hwmon/bcm54140.rst
new file mode 100644
index 000000000000..bc6ea4b45966
--- /dev/null
+++ b/Documentation/hwmon/bcm54140.rst
@@ -0,0 +1,45 @@
+.. SPDX-License-Identifier: GPL-2.0-only
+
+Broadcom BCM54140 Quad SGMII/QSGMII PHY
+=======================================
+
+Supported chips:
+
+   * Broadcom BCM54140
+
+     Datasheet: not public
+
+Author: Michael Walle <michael@walle.cc>
+
+Description
+-----------
+
+The Broadcom BCM54140 is a Quad SGMII/QSGMII PHY which supports monitoring
+its die temperature as well as two analog voltages.
+
+The AVDDL is a 1.0V analogue voltage, the AVDDH is a 3.3V analogue voltage.
+Both voltages and the temperature are measured in a round-robin fashion.
+
+Sysfs entries
+-------------
+
+The following attributes are supported.
+
+======================= ========================================================
+in0_label		"AVDDL"
+in0_input		Measured AVDDL voltage.
+in0_min			Minimum AVDDL voltage.
+in0_max			Maximum AVDDL voltage.
+in0_alarm		AVDDL voltage alarm.
+
+in1_label		"AVDDH"
+in1_input		Measured AVDDH voltage.
+in1_min			Minimum AVDDH voltage.
+in1_max			Maximum AVDDH voltage.
+in1_alarm		AVDDH voltage alarm.
+
+temp1_input		Die temperature.
+temp1_min		Minimum die temperature.
+temp1_max		Maximum die temperature.
+temp1_alarm		Die temperature alarm.
+======================= ========================================================
diff --git a/Documentation/hwmon/index.rst b/Documentation/hwmon/index.rst
index 8ef62fd39787..1f0affb3b6e0 100644
--- a/Documentation/hwmon/index.rst
+++ b/Documentation/hwmon/index.rst
@@ -42,6 +42,7 @@ Hardware Monitoring Kernel Drivers
    asb100
    asc7621
    aspeed-pwm-tacho
+   bcm54140
    bel-pfe
    coretemp
    da9052
diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index cb7936b577de..bacfee41b564 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -349,6 +349,7 @@ config BROADCOM_PHY
 config BCM54140_PHY
 	tristate "Broadcom BCM54140 PHY"
 	depends on PHYLIB
+	depends on HWMON || HWMON=n
 	select BCM_NET_PHYLIB
 	help
 	  Support the Broadcom BCM54140 Quad SGMII/QSGMII PHY.
diff --git a/drivers/net/phy/bcm54140.c b/drivers/net/phy/bcm54140.c
index 0eeb60de67f8..aa854477e06a 100644
--- a/drivers/net/phy/bcm54140.c
+++ b/drivers/net/phy/bcm54140.c
@@ -6,6 +6,7 @@
 
 #include <linux/bitfield.h>
 #include <linux/brcmphy.h>
+#include <linux/hwmon.h>
 #include <linux/module.h>
 #include <linux/phy.h>
 
@@ -50,6 +51,69 @@
 #define  BCM54140_RDB_TOP_IMR_PORT1	BIT(5)
 #define  BCM54140_RDB_TOP_IMR_PORT2	BIT(6)
 #define  BCM54140_RDB_TOP_IMR_PORT3	BIT(7)
+#define BCM54140_RDB_MON_CTRL		0x831	/* monitor control */
+#define  BCM54140_RDB_MON_CTRL_V_MODE	BIT(3)	/* voltage mode */
+#define  BCM54140_RDB_MON_CTRL_SEL_MASK	GENMASK(2, 1)
+#define  BCM54140_RDB_MON_CTRL_SEL_TEMP	0	/* meassure temperature */
+#define  BCM54140_RDB_MON_CTRL_SEL_1V0	1	/* meassure AVDDL 1.0V */
+#define  BCM54140_RDB_MON_CTRL_SEL_3V3	2	/* meassure AVDDH 3.3V */
+#define  BCM54140_RDB_MON_CTRL_SEL_RR	3	/* meassure all round-robin */
+#define  BCM54140_RDB_MON_CTRL_PWR_DOWN	BIT(0)	/* power-down monitor */
+#define BCM54140_RDB_MON_TEMP_VAL	0x832	/* temperature value */
+#define BCM54140_RDB_MON_TEMP_MAX	0x833	/* temperature high thresh */
+#define BCM54140_RDB_MON_TEMP_MIN	0x834	/* temperature low thresh */
+#define  BCM54140_RDB_MON_TEMP_DATA_MASK GENMASK(9, 0)
+#define BCM54140_RDB_MON_1V0_VAL	0x835	/* AVDDL 1.0V value */
+#define BCM54140_RDB_MON_1V0_MAX	0x836	/* AVDDL 1.0V high thresh */
+#define BCM54140_RDB_MON_1V0_MIN	0x837	/* AVDDL 1.0V low thresh */
+#define  BCM54140_RDB_MON_1V0_DATA_MASK	GENMASK(10, 0)
+#define BCM54140_RDB_MON_3V3_VAL	0x838	/* AVDDH 3.3V value */
+#define BCM54140_RDB_MON_3V3_MAX	0x839	/* AVDDH 3.3V high thresh */
+#define BCM54140_RDB_MON_3V3_MIN	0x83a	/* AVDDH 3.3V low thresh */
+#define  BCM54140_RDB_MON_3V3_DATA_MASK	GENMASK(11, 0)
+#define BCM54140_RDB_MON_ISR		0x83b	/* interrupt status */
+#define  BCM54140_RDB_MON_ISR_3V3	BIT(2)	/* AVDDH 3.3V alarm */
+#define  BCM54140_RDB_MON_ISR_1V0	BIT(1)	/* AVDDL 1.0V alarm */
+#define  BCM54140_RDB_MON_ISR_TEMP	BIT(0)	/* temperature alarm */
+
+/* According to the datasheet the formula is:
+ *   T = 413.35 - (0.49055 * bits[9:0])
+ */
+#define BCM54140_HWMON_TO_TEMP(v) (413350L - (v) * 491)
+#define BCM54140_HWMON_FROM_TEMP(v) DIV_ROUND_CLOSEST_ULL(413350L - (v), 491)
+
+/* According to the datasheet the formula is:
+ *   U = bits[11:0] / 1024 * 220 / 0.2
+ *
+ * Normalized:
+ *   U = bits[11:0] / 4096 * 2514
+ */
+#define BCM54140_HWMON_TO_IN_1V0(v) ((v) * 2514 >> 11)
+#define BCM54140_HWMON_FROM_IN_1V0(v) DIV_ROUND_CLOSEST_ULL(((v) << 11), 2514)
+
+/* According to the datasheet the formula is:
+ *   U = bits[10:0] / 1024 * 880 / 0.7
+ *
+ * Normalized:
+ *   U = bits[10:0] / 2048 * 4400
+ */
+#define BCM54140_HWMON_TO_IN_3V3(v) ((v) * 4400 >> 12)
+#define BCM54140_HWMON_FROM_IN_3V3(v) DIV_ROUND_CLOSEST_ULL(((v) << 12), 4400)
+
+#define BCM54140_HWMON_TO_IN(ch, v) ((ch) ? BCM54140_HWMON_TO_IN_3V3(v) \
+					  : BCM54140_HWMON_TO_IN_1V0(v))
+#define BCM54140_HWMON_FROM_IN(ch, v) ((ch) ? BCM54140_HWMON_FROM_IN_3V3(v) \
+					    : BCM54140_HWMON_FROM_IN_1V0(v))
+#define BCM54140_HWMON_IN_MASK(ch) ((ch) ? BCM54140_RDB_MON_3V3_DATA_MASK \
+					 : BCM54140_RDB_MON_1V0_DATA_MASK)
+#define BCM54140_HWMON_IN_VAL_REG(ch) ((ch) ? BCM54140_RDB_MON_3V3_VAL \
+					    : BCM54140_RDB_MON_1V0_VAL)
+#define BCM54140_HWMON_IN_MIN_REG(ch) ((ch) ? BCM54140_RDB_MON_3V3_MIN \
+					    : BCM54140_RDB_MON_1V0_MIN)
+#define BCM54140_HWMON_IN_MAX_REG(ch) ((ch) ? BCM54140_RDB_MON_3V3_MAX \
+					    : BCM54140_RDB_MON_1V0_MAX)
+#define BCM54140_HWMON_IN_ALARM_BIT(ch) ((ch) ? BCM54140_RDB_MON_ISR_3V3 \
+					      : BCM54140_RDB_MON_ISR_1V0)
 
 #define BCM54140_DEFAULT_DOWNSHIFT 5
 #define BCM54140_MAX_DOWNSHIFT 9
@@ -57,8 +121,328 @@
 struct bcm54140_priv {
 	int port;
 	int base_addr;
+#if IS_ENABLED(CONFIG_HWMON)
+	bool pkg_init;
+	/* protect the alarm bits */
+	struct mutex alarm_lock;
+	u16 alarm;
+#endif
 };
 
+#if IS_ENABLED(CONFIG_HWMON)
+static umode_t bcm54140_hwmon_is_visible(const void *data,
+					 enum hwmon_sensor_types type,
+					 u32 attr, int channel)
+{
+	switch (type) {
+	case hwmon_in:
+		switch (attr) {
+		case hwmon_in_min:
+		case hwmon_in_max:
+			return 0644;
+		case hwmon_in_label:
+		case hwmon_in_input:
+		case hwmon_in_alarm:
+			return 0444;
+		default:
+			return 0;
+		}
+	case hwmon_temp:
+		switch (attr) {
+		case hwmon_temp_min:
+		case hwmon_temp_max:
+			return 0644;
+		case hwmon_temp_input:
+		case hwmon_temp_alarm:
+			return 0444;
+		default:
+			return 0;
+		}
+	default:
+		return 0;
+	}
+}
+
+static int bcm54140_hwmon_read_alarm(struct device *dev, unsigned int bit,
+				     long *val)
+{
+	struct phy_device *phydev = dev_get_drvdata(dev);
+	struct bcm54140_priv *priv = phydev->priv;
+	int tmp, ret = 0;
+
+	mutex_lock(&priv->alarm_lock);
+
+	/* latch any alarm bits */
+	tmp = bcm_phy_read_rdb(phydev, BCM54140_RDB_MON_ISR);
+	if (tmp < 0) {
+		ret = tmp;
+		goto out;
+	}
+	priv->alarm |= tmp;
+
+	*val = !!(priv->alarm & bit);
+	priv->alarm &= ~bit;
+
+out:
+	mutex_unlock(&priv->alarm_lock);
+	return ret;
+}
+
+static int bcm54140_hwmon_read_temp(struct device *dev, u32 attr, long *val)
+{
+	struct phy_device *phydev = dev_get_drvdata(dev);
+	u16 reg, tmp;
+
+	switch (attr) {
+	case hwmon_temp_input:
+		reg = BCM54140_RDB_MON_TEMP_VAL;
+		break;
+	case hwmon_temp_min:
+		reg = BCM54140_RDB_MON_TEMP_MIN;
+		break;
+	case hwmon_temp_max:
+		reg = BCM54140_RDB_MON_TEMP_MAX;
+		break;
+	case hwmon_temp_alarm:
+		return bcm54140_hwmon_read_alarm(dev,
+						 BCM54140_RDB_MON_ISR_TEMP,
+						 val);
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	tmp = bcm_phy_read_rdb(phydev, reg);
+	if (tmp < 0)
+		return tmp;
+
+	*val = BCM54140_HWMON_TO_TEMP(tmp & BCM54140_RDB_MON_TEMP_DATA_MASK);
+
+	return 0;
+}
+
+static int bcm54140_hwmon_read_in(struct device *dev, u32 attr,
+				  int channel, long *val)
+{
+	struct phy_device *phydev = dev_get_drvdata(dev);
+	u16 bit, reg, tmp;
+
+	switch (attr) {
+	case hwmon_in_input:
+		reg = BCM54140_HWMON_IN_VAL_REG(channel);
+		break;
+	case hwmon_in_min:
+		reg = BCM54140_HWMON_IN_MIN_REG(channel);
+		break;
+	case hwmon_in_max:
+		reg = BCM54140_HWMON_IN_MAX_REG(channel);
+		break;
+	case hwmon_in_alarm:
+		bit = BCM54140_HWMON_IN_ALARM_BIT(channel);
+		return bcm54140_hwmon_read_alarm(dev, bit, val);
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	tmp = bcm_phy_read_rdb(phydev, reg);
+	if (tmp < 0)
+		return tmp;
+
+	tmp &= BCM54140_HWMON_IN_MASK(channel);
+	*val = BCM54140_HWMON_TO_IN(channel, tmp);
+
+	return 0;
+}
+
+static int bcm54140_hwmon_read(struct device *dev,
+			       enum hwmon_sensor_types type, u32 attr,
+			       int channel, long *val)
+{
+	switch (type) {
+	case hwmon_temp:
+		return bcm54140_hwmon_read_temp(dev, attr, val);
+	case hwmon_in:
+		return bcm54140_hwmon_read_in(dev, attr, channel, val);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static const char *const bcm54140_hwmon_in_labels[] = {
+	"AVDDL",
+	"AVDDH",
+};
+
+static int bcm54140_hwmon_read_string(struct device *dev,
+				      enum hwmon_sensor_types type, u32 attr,
+				      int channel, const char **str)
+{
+	switch (type) {
+	case hwmon_in:
+		switch (attr) {
+		case hwmon_in_label:
+			*str = bcm54140_hwmon_in_labels[channel];
+			return 0;
+		default:
+			return -EOPNOTSUPP;
+		}
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int bcm54140_hwmon_write_temp(struct device *dev, u32 attr,
+				     int channel, long val)
+{
+	struct phy_device *phydev = dev_get_drvdata(dev);
+	u16 mask = BCM54140_RDB_MON_TEMP_DATA_MASK;
+	u16 reg;
+
+	val = clamp_val(val, BCM54140_HWMON_TO_TEMP(mask),
+			BCM54140_HWMON_TO_TEMP(0));
+
+	switch (attr) {
+	case hwmon_temp_min:
+		reg = BCM54140_RDB_MON_TEMP_MIN;
+		break;
+	case hwmon_temp_max:
+		reg = BCM54140_RDB_MON_TEMP_MAX;
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return bcm_phy_modify_rdb(phydev, reg, mask,
+				  BCM54140_HWMON_FROM_TEMP(val));
+}
+
+static int bcm54140_hwmon_write_in(struct device *dev, u32 attr,
+				   int channel, long val)
+{
+	struct phy_device *phydev = dev_get_drvdata(dev);
+	u16 mask = BCM54140_HWMON_IN_MASK(channel);
+	u16 reg;
+
+	val = clamp_val(val, 0, BCM54140_HWMON_TO_IN(channel, mask));
+
+	switch (attr) {
+	case hwmon_in_min:
+		reg = BCM54140_HWMON_IN_MIN_REG(channel);
+		break;
+	case hwmon_in_max:
+		reg = BCM54140_HWMON_IN_MAX_REG(channel);
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	return bcm_phy_modify_rdb(phydev, reg, mask,
+				  BCM54140_HWMON_FROM_IN(channel, val));
+}
+
+static int bcm54140_hwmon_write(struct device *dev,
+				enum hwmon_sensor_types type, u32 attr,
+				int channel, long val)
+{
+	switch (type) {
+	case hwmon_temp:
+		return bcm54140_hwmon_write_temp(dev, attr, channel, val);
+	case hwmon_in:
+		return bcm54140_hwmon_write_in(dev, attr, channel, val);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static const struct hwmon_channel_info *bcm54140_hwmon_info[] = {
+	HWMON_CHANNEL_INFO(temp,
+			   HWMON_T_INPUT | HWMON_T_MIN | HWMON_T_MAX |
+			   HWMON_T_ALARM),
+	HWMON_CHANNEL_INFO(in,
+			   HWMON_I_INPUT | HWMON_I_MIN | HWMON_I_MAX |
+			   HWMON_I_ALARM | HWMON_I_LABEL,
+			   HWMON_I_INPUT | HWMON_I_MIN | HWMON_I_MAX |
+			   HWMON_I_ALARM | HWMON_I_LABEL),
+	NULL
+};
+
+static const struct hwmon_ops bcm54140_hwmon_ops = {
+	.is_visible = bcm54140_hwmon_is_visible,
+	.read = bcm54140_hwmon_read,
+	.read_string = bcm54140_hwmon_read_string,
+	.write = bcm54140_hwmon_write,
+};
+
+static const struct hwmon_chip_info bcm54140_chip_info = {
+	.ops = &bcm54140_hwmon_ops,
+	.info = bcm54140_hwmon_info,
+};
+
+static int bcm54140_enable_monitoring(struct phy_device *phydev)
+{
+	u16 mask, set;
+
+	/* 3.3V voltage mode */
+	set = BCM54140_RDB_MON_CTRL_V_MODE;
+
+	/* select round-robin */
+	mask = BCM54140_RDB_MON_CTRL_SEL_MASK;
+	set |= FIELD_PREP(BCM54140_RDB_MON_CTRL_SEL_MASK,
+			  BCM54140_RDB_MON_CTRL_SEL_RR);
+
+	/* remove power-down bit */
+	mask |= BCM54140_RDB_MON_CTRL_PWR_DOWN;
+
+	return bcm_phy_modify_rdb(phydev, BCM54140_RDB_MON_CTRL, mask, set);
+}
+
+/* Check if one PHY has already done the init of the parts common to all PHYs
+ * in the Quad PHY package.
+ */
+static bool bcm54140_is_pkg_init(struct phy_device *phydev)
+{
+	struct bcm54140_priv *priv = phydev->priv;
+	struct mii_bus *bus = phydev->mdio.bus;
+	int base_addr = priv->base_addr;
+	struct phy_device *phy;
+	int i;
+
+	/* Quad PHY */
+	for (i = 0; i < 4; i++) {
+		phy = mdiobus_get_phy(bus, base_addr + i);
+		if (!phy)
+			continue;
+
+		if ((phy->phy_id & phydev->drv->phy_id_mask) !=
+		    (phydev->drv->phy_id & phydev->drv->phy_id_mask))
+			continue;
+
+		priv = phy->priv;
+
+		if (priv && priv->pkg_init)
+			return true;
+	}
+
+	return false;
+}
+
+static int bcm54140_probe_once(struct phy_device *phydev)
+{
+	struct device *hwmon;
+	int ret;
+
+	/* enable hardware monitoring */
+	ret = bcm54140_enable_monitoring(phydev);
+	if (ret)
+		return ret;
+
+	hwmon = devm_hwmon_device_register_with_info(&phydev->mdio.dev,
+						     "BCM54140", phydev,
+						     &bcm54140_chip_info,
+						     NULL);
+	return PTR_ERR_OR_ZERO(hwmon);
+}
+#endif
+
 static int bcm54140_base_read_rdb(struct phy_device *phydev, u16 rdb)
 {
 	struct bcm54140_priv *priv = phydev->priv;
@@ -222,6 +606,18 @@ static int bcm54140_probe(struct phy_device *phydev)
 	if (ret)
 		return ret;
 
+#if IS_ENABLED(CONFIG_HWMON)
+	mutex_init(&priv->alarm_lock);
+
+	if (!bcm54140_is_pkg_init(phydev)) {
+		ret = bcm54140_probe_once(phydev);
+		if (ret)
+			return ret;
+	}
+
+	priv->pkg_init = true;
+#endif
+
 	phydev_dbg(phydev, "probed (port %d, base PHY address %d)\n",
 		   priv->port, priv->base_addr);
 
-- 
cgit v1.2.3-59-g8ed1b


From 38f961e744840db9044af68f4773ae5feae60a89 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Mon, 20 Apr 2020 23:29:05 +0200
Subject: net: phy: add device-managed devm_mdiobus_register

If there's no special ordering requirement for mdiobus_unregister(),
then driver code can be simplified by using a device-managed version
of mdiobus_register(). Prerequisite is that bus allocation has been
done device-managed too. Else mdiobus_free() may be called whilst
bus is still registered, resulting in a BUG_ON(). Therefore let
devm_mdiobus_register() return -EPERM if bus was allocated
non-managed.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio_bus.c |  8 +++++++-
 include/linux/phy.h        | 17 +++++++++++++++++
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index 346e88435d29..26b00af94573 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -170,7 +170,12 @@ EXPORT_SYMBOL(mdiobus_alloc_size);
 
 static void _devm_mdiobus_free(struct device *dev, void *res)
 {
-	mdiobus_free(*(struct mii_bus **)res);
+	struct mii_bus *bus = *(struct mii_bus **)res;
+
+	if (bus->is_managed_registered && bus->state == MDIOBUS_REGISTERED)
+		mdiobus_unregister(bus);
+
+	mdiobus_free(bus);
 }
 
 static int devm_mdiobus_match(struct device *dev, void *res, void *data)
@@ -210,6 +215,7 @@ struct mii_bus *devm_mdiobus_alloc_size(struct device *dev, int sizeof_priv)
 	if (bus) {
 		*ptr = bus;
 		devres_add(dev, ptr);
+		bus->is_managed = 1;
 	} else {
 		devres_free(ptr);
 	}
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 2432ca463ddc..3941a6bcba10 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -241,6 +241,9 @@ struct mii_bus {
 	int (*reset)(struct mii_bus *bus);
 	struct mdio_bus_stats stats[PHY_MAX_ADDR];
 
+	unsigned int is_managed:1;	/* is device-managed */
+	unsigned int is_managed_registered:1;
+
 	/*
 	 * A lock to ensure that only one thing can read/write
 	 * the MDIO bus at a time
@@ -286,6 +289,20 @@ static inline struct mii_bus *mdiobus_alloc(void)
 
 int __mdiobus_register(struct mii_bus *bus, struct module *owner);
 #define mdiobus_register(bus) __mdiobus_register(bus, THIS_MODULE)
+static inline int devm_mdiobus_register(struct mii_bus *bus)
+{
+	int ret;
+
+	if (!bus->is_managed)
+		return -EPERM;
+
+	ret = mdiobus_register(bus);
+	if (!ret)
+		bus->is_managed_registered = 1;
+
+	return ret;
+}
+
 void mdiobus_unregister(struct mii_bus *bus);
 void mdiobus_free(struct mii_bus *bus);
 struct mii_bus *devm_mdiobus_alloc_size(struct device *dev, int sizeof_priv);
-- 
cgit v1.2.3-59-g8ed1b


From 0785dad48003408da27579845db2f83c024636df Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Mon, 20 Apr 2020 23:29:55 +0200
Subject: r8169: use devm_mdiobus_register

Use new function devm_mdiobus_register() to simplify the driver.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index b7a2853e7396..4c616701856a 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -5185,20 +5185,18 @@ static int r8169_mdio_register(struct rtl8169_private *tp)
 	new_bus->read = r8169_mdio_read_reg;
 	new_bus->write = r8169_mdio_write_reg;
 
-	ret = mdiobus_register(new_bus);
+	ret = devm_mdiobus_register(new_bus);
 	if (ret)
 		return ret;
 
 	tp->phydev = mdiobus_get_phy(new_bus, 0);
 	if (!tp->phydev) {
-		mdiobus_unregister(new_bus);
 		return -ENODEV;
 	} else if (!tp->phydev->drv) {
 		/* Most chip versions fail with the genphy driver.
 		 * Therefore ensure that the dedicated PHY driver is loaded.
 		 */
 		dev_err(&pdev->dev, "realtek.ko not loaded, maybe it needs to be added to initramfs?\n");
-		mdiobus_unregister(new_bus);
 		return -EUNATCH;
 	}
 
@@ -5523,7 +5521,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	rc = register_netdev(dev);
 	if (rc)
-		goto err_mdio_unregister;
+		return rc;
 
 	netif_info(tp, probe, dev, "%s, %pM, XID %03x, IRQ %d\n",
 		   rtl_chip_infos[chipset].name, dev->dev_addr, xid,
@@ -5542,10 +5540,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 		pm_runtime_put_sync(&pdev->dev);
 
 	return 0;
-
-err_mdio_unregister:
-	mdiobus_unregister(tp->phydev->mdio.bus);
-	return rc;
 }
 
 static struct pci_driver rtl8169_pci_driver = {
-- 
cgit v1.2.3-59-g8ed1b


From beb97d3a3192c00575580af9073921c6283cf93d Mon Sep 17 00:00:00 2001
From: wenxu <wenxu@ucloud.cn>
Date: Tue, 21 Apr 2020 07:55:43 +0800
Subject: net/sched: act_ct: update nf_conn_acct for act_ct SW offload in
 flowtable

When the act_ct SW offload in flowtable, The counter of the conntrack
entry will never update. So update the nf_conn_acct conuter in act_ct
flowtable software offload.

Signed-off-by: wenxu <wenxu@ucloud.cn>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_ct.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/sched/act_ct.c b/net/sched/act_ct.c
index 1a766393be62..9adff83b523b 100644
--- a/net/sched/act_ct.c
+++ b/net/sched/act_ct.c
@@ -30,6 +30,7 @@
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_conntrack_zones.h>
 #include <net/netfilter/nf_conntrack_helper.h>
+#include <net/netfilter/nf_conntrack_acct.h>
 #include <net/netfilter/ipv6/nf_defrag_ipv6.h>
 #include <uapi/linux/netfilter/nf_nat.h>
 
@@ -536,6 +537,7 @@ static bool tcf_ct_flow_table_lookup(struct tcf_ct_params *p,
 	flow_offload_refresh(nf_ft, flow);
 	nf_conntrack_get(&ct->ct_general);
 	nf_ct_set(skb, ct, ctinfo);
+	nf_ct_acct_update(ct, dir, skb->len);
 
 	return true;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 540bde5c2c3da005b87b3edb394d6ca4f890777d Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Tue, 21 Apr 2020 11:09:12 +0800
Subject: ila: remove unused macro 'ILA_HASH_TABLE_SIZE'

net/ipv6/ila/ila_xlat.c:604:0: warning: macro "ILA_HASH_TABLE_SIZE" is not used [-Wunused-macros]

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ila/ila_xlat.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/net/ipv6/ila/ila_xlat.c b/net/ipv6/ila/ila_xlat.c
index 5fc1f4e0c0cf..a1ac0e3d8c60 100644
--- a/net/ipv6/ila/ila_xlat.c
+++ b/net/ipv6/ila/ila_xlat.c
@@ -601,8 +601,6 @@ out_ret:
 	return ret;
 }
 
-#define ILA_HASH_TABLE_SIZE 1024
-
 int ila_xlat_init_net(struct net *net)
 {
 	struct ila_net *ilan = net_generic(net, ila_net_id);
-- 
cgit v1.2.3-59-g8ed1b


From dfddb54043f0a377f642bd0e6a28aa40769e2e65 Mon Sep 17 00:00:00 2001
From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Date: Tue, 21 Apr 2020 13:10:54 +0530
Subject: net: qrtr: Add tracepoint support

Add tracepoint support for QRTR with NS as the first candidate. Later on
this can be extended to core QRTR and transport drivers.

The trace_printk() used in NS has been replaced by tracepoints.

Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/trace/events/qrtr.h | 115 ++++++++++++++++++++++++++++++++++++++++++++
 net/qrtr/ns.c               |  20 ++++----
 2 files changed, 126 insertions(+), 9 deletions(-)
 create mode 100644 include/trace/events/qrtr.h

diff --git a/include/trace/events/qrtr.h b/include/trace/events/qrtr.h
new file mode 100644
index 000000000000..b1de14c3bb93
--- /dev/null
+++ b/include/trace/events/qrtr.h
@@ -0,0 +1,115 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#undef TRACE_SYSTEM
+#define TRACE_SYSTEM qrtr
+
+#if !defined(_TRACE_QRTR_H) || defined(TRACE_HEADER_MULTI_READ)
+#define _TRACE_QRTR_H
+
+#include <linux/qrtr.h>
+#include <linux/tracepoint.h>
+
+TRACE_EVENT(qrtr_ns_service_announce_new,
+
+	TP_PROTO(__le32 service, __le32 instance, __le32 node, __le32 port),
+
+	TP_ARGS(service, instance, node, port),
+
+	TP_STRUCT__entry(
+		__field(__le32, service)
+		__field(__le32, instance)
+		__field(__le32, node)
+		__field(__le32, port)
+	),
+
+	TP_fast_assign(
+		__entry->service = service;
+		__entry->instance = instance;
+		__entry->node = node;
+		__entry->port = port;
+	),
+
+	TP_printk("advertising new server [%d:%x]@[%d:%d]",
+		  __entry->service, __entry->instance, __entry->node,
+		  __entry->port
+	)
+);
+
+TRACE_EVENT(qrtr_ns_service_announce_del,
+
+	TP_PROTO(__le32 service, __le32 instance, __le32 node, __le32 port),
+
+	TP_ARGS(service, instance, node, port),
+
+	TP_STRUCT__entry(
+		__field(__le32, service)
+		__field(__le32, instance)
+		__field(__le32, node)
+		__field(__le32, port)
+	),
+
+	TP_fast_assign(
+		__entry->service = service;
+		__entry->instance = instance;
+		__entry->node = node;
+		__entry->port = port;
+	),
+
+	TP_printk("advertising removal of server [%d:%x]@[%d:%d]",
+		  __entry->service, __entry->instance, __entry->node,
+		  __entry->port
+	)
+);
+
+TRACE_EVENT(qrtr_ns_server_add,
+
+	TP_PROTO(__le32 service, __le32 instance, __le32 node, __le32 port),
+
+	TP_ARGS(service, instance, node, port),
+
+	TP_STRUCT__entry(
+		__field(__le32, service)
+		__field(__le32, instance)
+		__field(__le32, node)
+		__field(__le32, port)
+	),
+
+	TP_fast_assign(
+		__entry->service = service;
+		__entry->instance = instance;
+		__entry->node = node;
+		__entry->port = port;
+	),
+
+	TP_printk("add server [%d:%x]@[%d:%d]",
+		  __entry->service, __entry->instance, __entry->node,
+		  __entry->port
+	)
+);
+
+TRACE_EVENT(qrtr_ns_message,
+
+	TP_PROTO(const char * const ctrl_pkt_str, __u32 sq_node, __u32 sq_port),
+
+	TP_ARGS(ctrl_pkt_str, sq_node, sq_port),
+
+	TP_STRUCT__entry(
+		__string(ctrl_pkt_str, ctrl_pkt_str)
+		__field(__u32, sq_node)
+		__field(__u32, sq_port)
+	),
+
+	TP_fast_assign(
+		__assign_str(ctrl_pkt_str, ctrl_pkt_str);
+		__entry->sq_node = sq_node;
+		__entry->sq_port = sq_port;
+	),
+
+	TP_printk("%s from %d:%d",
+		  __get_str(ctrl_pkt_str), __entry->sq_node, __entry->sq_port
+	)
+);
+
+#endif /* _TRACE_QRTR_H */
+
+/* This part must be outside protection */
+#include <trace/define_trace.h>
diff --git a/net/qrtr/ns.c b/net/qrtr/ns.c
index e7d0fe3f4330..3ca196fc7f9b 100644
--- a/net/qrtr/ns.c
+++ b/net/qrtr/ns.c
@@ -12,6 +12,9 @@
 
 #include "qrtr.h"
 
+#define CREATE_TRACE_POINTS
+#include <trace/events/qrtr.h>
+
 static RADIX_TREE(nodes, GFP_KERNEL);
 
 static struct {
@@ -105,8 +108,8 @@ static int service_announce_new(struct sockaddr_qrtr *dest,
 	struct msghdr msg = { };
 	struct kvec iv;
 
-	trace_printk("advertising new server [%d:%x]@[%d:%d]\n",
-		     srv->service, srv->instance, srv->node, srv->port);
+	trace_qrtr_ns_service_announce_new(srv->service, srv->instance,
+					   srv->node, srv->port);
 
 	iv.iov_base = &pkt;
 	iv.iov_len = sizeof(pkt);
@@ -132,8 +135,8 @@ static int service_announce_del(struct sockaddr_qrtr *dest,
 	struct kvec iv;
 	int ret;
 
-	trace_printk("advertising removal of server [%d:%x]@[%d:%d]\n",
-		     srv->service, srv->instance, srv->node, srv->port);
+	trace_qrtr_ns_service_announce_del(srv->service, srv->instance,
+					   srv->node, srv->port);
 
 	iv.iov_base = &pkt;
 	iv.iov_len = sizeof(pkt);
@@ -244,8 +247,8 @@ static struct qrtr_server *server_add(unsigned int service,
 
 	radix_tree_insert(&node->servers, port, srv);
 
-	trace_printk("add server [%d:%x]@[%d:%d]\n", srv->service,
-		     srv->instance, srv->node, srv->port);
+	trace_qrtr_ns_server_add(srv->service, srv->instance,
+				 srv->node, srv->port);
 
 	return srv;
 
@@ -633,9 +636,8 @@ static void qrtr_ns_worker(struct work_struct *work)
 		cmd = le32_to_cpu(pkt->cmd);
 		if (cmd < ARRAY_SIZE(qrtr_ctrl_pkt_strings) &&
 		    qrtr_ctrl_pkt_strings[cmd])
-			trace_printk("%s from %d:%d\n",
-				     qrtr_ctrl_pkt_strings[cmd], sq.sq_node,
-				     sq.sq_port);
+			trace_qrtr_ns_message(qrtr_ctrl_pkt_strings[cmd],
+					      sq.sq_node, sq.sq_port);
 
 		ret = 0;
 		switch (cmd) {
-- 
cgit v1.2.3-59-g8ed1b


From 3c7b51bd39b2078870baeeb98ad8190f447c2ed2 Mon Sep 17 00:00:00 2001
From: Xiaoliang Yang <xiaoliang.yang_1@nxp.com>
Date: Tue, 21 Apr 2020 21:13:47 +0300
Subject: net: dsa: felix: allow flooding for all traffic classes

Right now it can be seen that the VSC9959 (Felix) switch will not flood
frames if they have a VLAN tag with a PCP of 1-7 (nonzero).

It turns out that Felix is quite different from its cousin, Ocelot, in
that frame flooding can be allowed/denied per traffic class. Where
Ocelot has 1 instance of the ANA_FLOODING register, Felix has 8.

The approach that this driver is going to take is "thanks, but no
thanks". We have no use case of limiting the flooding domain based on
traffic class, so we just want to allow packets to be flooded, no matter
what traffic class they have.

So we copy the line of code from ocelot.c which does the one-shot
initialization of the flooding PGIDs, and we add it to felix.c as well -
except replicated 8 times.

Signed-off-by: Xiaoliang Yang <xiaoliang.yang_1@nxp.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/ocelot/felix.c | 7 +++++++
 drivers/net/dsa/ocelot/felix.h | 1 +
 2 files changed, 8 insertions(+)

diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c
index 9173b95551d1..8a633ddce6c5 100644
--- a/drivers/net/dsa/ocelot/felix.c
+++ b/drivers/net/dsa/ocelot/felix.c
@@ -522,6 +522,7 @@ static int felix_setup(struct dsa_switch *ds)
 	struct ocelot *ocelot = ds->priv;
 	struct felix *felix = ocelot_to_felix(ocelot);
 	int port, err;
+	int tc;
 
 	err = felix_init_structs(felix, ds->num_ports);
 	if (err)
@@ -555,6 +556,12 @@ static int felix_setup(struct dsa_switch *ds)
 	ocelot_write_rix(ocelot,
 			 ANA_PGID_PGID_PGID(GENMASK(ocelot->num_phys_ports, 0)),
 			 ANA_PGID_PGID, PGID_UC);
+	/* Setup the per-traffic class flooding PGIDs */
+	for (tc = 0; tc < FELIX_NUM_TC; tc++)
+		ocelot_write_rix(ocelot, ANA_FLOODING_FLD_MULTICAST(PGID_MC) |
+				 ANA_FLOODING_FLD_BROADCAST(PGID_MC) |
+				 ANA_FLOODING_FLD_UNICAST(PGID_UC),
+				 ANA_FLOODING, tc);
 
 	ds->mtu_enforcement_ingress = true;
 	/* It looks like the MAC/PCS interrupt register - PM0_IEVENT (0x8040)
diff --git a/drivers/net/dsa/ocelot/felix.h b/drivers/net/dsa/ocelot/felix.h
index 82d46f260041..2ad793c0e1df 100644
--- a/drivers/net/dsa/ocelot/felix.h
+++ b/drivers/net/dsa/ocelot/felix.h
@@ -5,6 +5,7 @@
 #define _MSCC_FELIX_H
 
 #define ocelot_to_felix(o)		container_of((o), struct felix, ocelot)
+#define FELIX_NUM_TC			8
 
 /* Platform-specific information */
 struct felix_info {
-- 
cgit v1.2.3-59-g8ed1b


From 3f251d741150265cfa7c84d30d105612449601ab Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Tue, 21 Apr 2020 18:40:22 -0600
Subject: selftests: Add tests for vrf and xfrms

Add tests for vrf and xfrms with a second round after adding a
qdisc. There are a few known problems documented with the test
cases that fail. The fix is non-trivial; will come back to it
when time allows.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/Makefile          |   1 +
 tools/testing/selftests/net/vrf-xfrm-tests.sh | 436 ++++++++++++++++++++++++++
 2 files changed, 437 insertions(+)
 create mode 100755 tools/testing/selftests/net/vrf-xfrm-tests.sh

diff --git a/tools/testing/selftests/net/Makefile b/tools/testing/selftests/net/Makefile
index 3f386eb9e7d7..895ec992b2f1 100644
--- a/tools/testing/selftests/net/Makefile
+++ b/tools/testing/selftests/net/Makefile
@@ -16,6 +16,7 @@ TEST_PROGS += altnames.sh icmp_redirect.sh ip6_gre_headroom.sh
 TEST_PROGS += route_localnet.sh
 TEST_PROGS += reuseaddr_ports_exhausted.sh
 TEST_PROGS += txtimestamp.sh
+TEST_PROGS += vrf-xfrm-tests.sh
 TEST_PROGS_EXTENDED := in_netns.sh
 TEST_GEN_FILES =  socket nettest
 TEST_GEN_FILES += psock_fanout psock_tpacket msg_zerocopy reuseport_addr_any
diff --git a/tools/testing/selftests/net/vrf-xfrm-tests.sh b/tools/testing/selftests/net/vrf-xfrm-tests.sh
new file mode 100755
index 000000000000..184da81f554f
--- /dev/null
+++ b/tools/testing/selftests/net/vrf-xfrm-tests.sh
@@ -0,0 +1,436 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Various combinations of VRF with xfrms and qdisc.
+
+# Kselftest framework requirement - SKIP code is 4.
+ksft_skip=4
+
+PAUSE_ON_FAIL=no
+VERBOSE=0
+ret=0
+
+HOST1_4=192.168.1.1
+HOST2_4=192.168.1.2
+HOST1_6=2001:db8:1::1
+HOST2_6=2001:db8:1::2
+
+XFRM1_4=10.0.1.1
+XFRM2_4=10.0.1.2
+XFRM1_6=fc00:1000::1
+XFRM2_6=fc00:1000::2
+IF_ID=123
+
+VRF=red
+TABLE=300
+
+AUTH_1=0xd94fcfea65fddf21dc6e0d24a0253508
+AUTH_2=0xdc6e0d24a0253508d94fcfea65fddf21
+ENC_1=0xfc46c20f8048be9725930ff3fb07ac2a91f0347dffeacf62
+ENC_2=0x3fb07ac2a91f0347dffeacf62fc46c20f8048be9725930ff
+SPI_1=0x02122b77
+SPI_2=0x2b770212
+
+which ping6 > /dev/null 2>&1 && ping6=$(which ping6) || ping6=$(which ping)
+
+################################################################################
+#
+log_test()
+{
+	local rc=$1
+	local expected=$2
+	local msg="$3"
+
+	if [ ${rc} -eq ${expected} ]; then
+		printf "TEST: %-60s  [ OK ]\n" "${msg}"
+		nsuccess=$((nsuccess+1))
+	else
+		ret=1
+		nfail=$((nfail+1))
+		printf "TEST: %-60s  [FAIL]\n" "${msg}"
+		if [ "${PAUSE_ON_FAIL}" = "yes" ]; then
+			echo
+			echo "hit enter to continue, 'q' to quit"
+			read a
+			[ "$a" = "q" ] && exit 1
+		fi
+	fi
+}
+
+run_cmd_host1()
+{
+	local cmd="$*"
+	local out
+	local rc
+
+	if [ "$VERBOSE" = "1" ]; then
+		printf "    COMMAND: $cmd\n"
+	fi
+
+	out=$(eval ip netns exec host1 $cmd 2>&1)
+	rc=$?
+	if [ "$VERBOSE" = "1" ]; then
+		if [ -n "$out" ]; then
+			echo
+			echo "    $out"
+		fi
+		echo
+	fi
+
+	return $rc
+}
+
+################################################################################
+# create namespaces for hosts and sws
+
+create_vrf()
+{
+	local ns=$1
+	local vrf=$2
+	local table=$3
+
+	if [ -n "${ns}" ]; then
+		ns="-netns ${ns}"
+	fi
+
+	ip ${ns} link add ${vrf} type vrf table ${table}
+	ip ${ns} link set ${vrf} up
+	ip ${ns} route add vrf ${vrf} unreachable default metric 8192
+	ip ${ns} -6 route add vrf ${vrf} unreachable default metric 8192
+
+	ip ${ns} addr add 127.0.0.1/8 dev ${vrf}
+	ip ${ns} -6 addr add ::1 dev ${vrf} nodad
+
+	ip ${ns} ru del pref 0
+	ip ${ns} ru add pref 32765 from all lookup local
+	ip ${ns} -6 ru del pref 0
+	ip ${ns} -6 ru add pref 32765 from all lookup local
+}
+
+create_ns()
+{
+	local ns=$1
+	local addr=$2
+	local addr6=$3
+
+	[ -z "${addr}" ] && addr="-"
+	[ -z "${addr6}" ] && addr6="-"
+
+	ip netns add ${ns}
+
+	ip -netns ${ns} link set lo up
+	if [ "${addr}" != "-" ]; then
+		ip -netns ${ns} addr add dev lo ${addr}
+	fi
+	if [ "${addr6}" != "-" ]; then
+		ip -netns ${ns} -6 addr add dev lo ${addr6}
+	fi
+
+	ip -netns ${ns} ro add unreachable default metric 8192
+	ip -netns ${ns} -6 ro add unreachable default metric 8192
+
+	ip netns exec ${ns} sysctl -qw net.ipv4.ip_forward=1
+	ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.keep_addr_on_down=1
+	ip netns exec ${ns} sysctl -qw net.ipv6.conf.all.forwarding=1
+	ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.forwarding=1
+	ip netns exec ${ns} sysctl -qw net.ipv6.conf.default.accept_dad=0
+}
+
+# create veth pair to connect namespaces and apply addresses.
+connect_ns()
+{
+	local ns1=$1
+	local ns1_dev=$2
+	local ns1_addr=$3
+	local ns1_addr6=$4
+	local ns2=$5
+	local ns2_dev=$6
+	local ns2_addr=$7
+	local ns2_addr6=$8
+	local ns1arg
+	local ns2arg
+
+	if [ -n "${ns1}" ]; then
+		ns1arg="-netns ${ns1}"
+	fi
+	if [ -n "${ns2}" ]; then
+		ns2arg="-netns ${ns2}"
+	fi
+
+	ip ${ns1arg} li add ${ns1_dev} type veth peer name tmp
+	ip ${ns1arg} li set ${ns1_dev} up
+	ip ${ns1arg} li set tmp netns ${ns2} name ${ns2_dev}
+	ip ${ns2arg} li set ${ns2_dev} up
+
+	if [ "${ns1_addr}" != "-" ]; then
+		ip ${ns1arg} addr add dev ${ns1_dev} ${ns1_addr}
+		ip ${ns2arg} addr add dev ${ns2_dev} ${ns2_addr}
+	fi
+
+	if [ "${ns1_addr6}" != "-" ]; then
+		ip ${ns1arg} addr add dev ${ns1_dev} ${ns1_addr6} nodad
+		ip ${ns2arg} addr add dev ${ns2_dev} ${ns2_addr6} nodad
+	fi
+}
+
+################################################################################
+
+cleanup()
+{
+	ip netns del host1
+	ip netns del host2
+}
+
+setup()
+{
+	create_ns "host1"
+	create_ns "host2"
+
+	connect_ns "host1" eth0 ${HOST1_4}/24 ${HOST1_6}/64 \
+	           "host2" eth0 ${HOST2_4}/24 ${HOST2_6}/64
+
+	create_vrf "host1" ${VRF} ${TABLE}
+	ip -netns host1 link set dev eth0 master ${VRF}
+}
+
+cleanup_xfrm()
+{
+	for ns in host1 host2
+	do
+		for x in state policy
+		do
+			ip -netns ${ns} xfrm ${x} flush
+			ip -6 -netns ${ns} xfrm ${x} flush
+		done
+	done
+}
+
+setup_xfrm()
+{
+	local h1_4=$1
+	local h2_4=$2
+	local h1_6=$3
+	local h2_6=$4
+	local devarg="$5"
+
+	#
+	# policy
+	#
+
+	# host1 - IPv4 out
+	ip -netns host1 xfrm policy add \
+	  src ${h1_4} dst ${h2_4} ${devarg} dir out \
+	  tmpl src ${HOST1_4} dst ${HOST2_4} proto esp mode tunnel
+
+	# host2 - IPv4 in
+	ip -netns host2 xfrm policy add \
+	  src ${h1_4} dst ${h2_4} dir in \
+	  tmpl src ${HOST1_4} dst ${HOST2_4} proto esp mode tunnel
+
+	# host1 - IPv4 in
+	ip -netns host1 xfrm policy add \
+	  src ${h2_4} dst ${h1_4} ${devarg} dir in \
+	  tmpl src ${HOST2_4} dst ${HOST1_4} proto esp mode tunnel
+
+	# host2 - IPv4 out
+	ip -netns host2 xfrm policy add \
+	  src ${h2_4} dst ${h1_4} dir out \
+	  tmpl src ${HOST2_4} dst ${HOST1_4} proto esp mode tunnel
+
+
+	# host1 - IPv6 out
+	ip -6 -netns host1 xfrm policy add \
+	  src ${h1_6} dst ${h2_6} ${devarg} dir out \
+	  tmpl src ${HOST1_6} dst ${HOST2_6} proto esp mode tunnel
+
+	# host2 - IPv6 in
+	ip -6 -netns host2 xfrm policy add \
+	  src ${h1_6} dst ${h2_6} dir in \
+	  tmpl src ${HOST1_6} dst ${HOST2_6} proto esp mode tunnel
+
+	# host1 - IPv6 in
+	ip -6 -netns host1 xfrm policy add \
+	  src ${h2_6} dst ${h1_6} ${devarg} dir in \
+	  tmpl src ${HOST2_6} dst ${HOST1_6} proto esp mode tunnel
+
+	# host2 - IPv6 out
+	ip -6 -netns host2 xfrm policy add \
+	  src ${h2_6} dst ${h1_6} dir out \
+	  tmpl src ${HOST2_6} dst ${HOST1_6} proto esp mode tunnel
+
+	#
+	# state
+	#
+	ip -netns host1 xfrm state add src ${HOST1_4} dst ${HOST2_4} \
+	    proto esp spi ${SPI_1} reqid 0 mode tunnel \
+	    replay-window 4 replay-oseq 0x4 \
+	    auth-trunc 'hmac(md5)' ${AUTH_1} 96 \
+	    enc 'cbc(des3_ede)' ${ENC_1} \
+	    sel src ${h1_4} dst ${h2_4} ${devarg}
+
+	ip -netns host2 xfrm state add src ${HOST1_4} dst ${HOST2_4} \
+	    proto esp spi ${SPI_1} reqid 0 mode tunnel \
+	    replay-window 4 replay-oseq 0x4 \
+	    auth-trunc 'hmac(md5)' ${AUTH_1} 96 \
+	    enc 'cbc(des3_ede)' ${ENC_1} \
+	    sel src ${h1_4} dst ${h2_4}
+
+
+	ip -netns host1 xfrm state add src ${HOST2_4} dst ${HOST1_4} \
+	    proto esp spi ${SPI_2} reqid 0 mode tunnel \
+	    replay-window 4 replay-oseq 0x4 \
+	    auth-trunc 'hmac(md5)' ${AUTH_2} 96 \
+	    enc 'cbc(des3_ede)' ${ENC_2} \
+	    sel src ${h2_4} dst ${h1_4} ${devarg}
+
+	ip -netns host2 xfrm state add src ${HOST2_4} dst ${HOST1_4} \
+	    proto esp spi ${SPI_2} reqid 0 mode tunnel \
+	    replay-window 4 replay-oseq 0x4 \
+	    auth-trunc 'hmac(md5)' ${AUTH_2} 96 \
+	    enc 'cbc(des3_ede)' ${ENC_2} \
+	    sel src ${h2_4} dst ${h1_4}
+
+
+	ip -6 -netns host1 xfrm state add src ${HOST1_6} dst ${HOST2_6} \
+	    proto esp spi ${SPI_1} reqid 0 mode tunnel \
+	    replay-window 4 replay-oseq 0x4 \
+	    auth-trunc 'hmac(md5)' ${AUTH_1} 96 \
+	    enc 'cbc(des3_ede)' ${ENC_1} \
+	    sel src ${h1_6} dst ${h2_6} ${devarg}
+
+	ip -6 -netns host2 xfrm state add src ${HOST1_6} dst ${HOST2_6} \
+	    proto esp spi ${SPI_1} reqid 0 mode tunnel \
+	    replay-window 4 replay-oseq 0x4 \
+	    auth-trunc 'hmac(md5)' ${AUTH_1} 96 \
+	    enc 'cbc(des3_ede)' ${ENC_1} \
+	    sel src ${h1_6} dst ${h2_6}
+
+
+	ip -6 -netns host1 xfrm state add src ${HOST2_6} dst ${HOST1_6} \
+	    proto esp spi ${SPI_2} reqid 0 mode tunnel \
+	    replay-window 4 replay-oseq 0x4 \
+	    auth-trunc 'hmac(md5)' ${AUTH_2} 96 \
+	    enc 'cbc(des3_ede)' ${ENC_2} \
+	    sel src ${h2_6} dst ${h1_6} ${devarg}
+
+	ip -6 -netns host2 xfrm state add src ${HOST2_6} dst ${HOST1_6} \
+	    proto esp spi ${SPI_2} reqid 0 mode tunnel \
+	    replay-window 4 replay-oseq 0x4 \
+	    auth-trunc 'hmac(md5)' ${AUTH_2} 96 \
+	    enc 'cbc(des3_ede)' ${ENC_2} \
+	    sel src ${h2_6} dst ${h1_6}
+}
+
+cleanup_xfrm_dev()
+{
+	ip -netns host1 li del xfrm0
+	ip -netns host2 addr del ${XFRM2_4}/24 dev eth0
+	ip -netns host2 addr del ${XFRM2_6}/64 dev eth0
+}
+
+setup_xfrm_dev()
+{
+	local vrfarg="vrf ${VRF}"
+
+	ip -netns host1 li add type xfrm dev eth0 if_id ${IF_ID}
+	ip -netns host1 li set xfrm0 ${vrfarg} up
+	ip -netns host1 addr add ${XFRM1_4}/24 dev xfrm0
+	ip -netns host1 addr add ${XFRM1_6}/64 dev xfrm0
+
+	ip -netns host2 addr add ${XFRM2_4}/24 dev eth0
+	ip -netns host2 addr add ${XFRM2_6}/64 dev eth0
+
+	setup_xfrm ${XFRM1_4} ${XFRM2_4} ${XFRM1_6} ${XFRM2_6} "if_id ${IF_ID}"
+}
+
+run_tests()
+{
+	cleanup_xfrm
+
+	# no IPsec
+	run_cmd_host1 ip vrf exec ${VRF} ping -c1 -w1 ${HOST2_4}
+	log_test $? 0 "IPv4 no xfrm policy"
+	run_cmd_host1 ip vrf exec ${VRF} ${ping6} -c1 -w1 ${HOST2_6}
+	log_test $? 0 "IPv6 no xfrm policy"
+
+	# xfrm without VRF in sel
+	setup_xfrm ${HOST1_4} ${HOST2_4} ${HOST1_6} ${HOST2_6}
+	run_cmd_host1 ip vrf exec ${VRF} ping -c1 -w1 ${HOST2_4}
+	log_test $? 0 "IPv4 xfrm policy based on address"
+	run_cmd_host1 ip vrf exec ${VRF} ${ping6} -c1 -w1 ${HOST2_6}
+	log_test $? 0 "IPv6 xfrm policy based on address"
+	cleanup_xfrm
+
+	# xfrm with VRF in sel
+	# Known failure: ipv4 resets the flow oif after the lookup. Fix is
+	# not straightforward.
+	# setup_xfrm ${HOST1_4} ${HOST2_4} ${HOST1_6} ${HOST2_6} "dev ${VRF}"
+	# run_cmd_host1 ip vrf exec ${VRF} ping -c1 -w1 ${HOST2_4}
+	# log_test $? 0 "IPv4 xfrm policy with VRF in selector"
+	run_cmd_host1 ip vrf exec ${VRF} ${ping6} -c1 -w1 ${HOST2_6}
+	log_test $? 0 "IPv6 xfrm policy with VRF in selector"
+	cleanup_xfrm
+
+	# xfrm with enslaved device in sel
+	# Known failures: combined with the above, __xfrm{4,6}_selector_match
+	# needs to consider both l3mdev and enslaved device index.
+	# setup_xfrm ${HOST1_4} ${HOST2_4} ${HOST1_6} ${HOST2_6} "dev eth0"
+	# run_cmd_host1 ip vrf exec ${VRF} ping -c1 -w1 ${HOST2_4}
+	# log_test $? 0 "IPv4 xfrm policy with enslaved device in selector"
+	# run_cmd_host1 ip vrf exec ${VRF} ${ping6} -c1 -w1 ${HOST2_6}
+	# log_test $? 0 "IPv6 xfrm policy with enslaved device in selector"
+	# cleanup_xfrm
+
+	# xfrm device
+	setup_xfrm_dev
+	run_cmd_host1 ip vrf exec ${VRF} ping -c1 -w1 ${XFRM2_4}
+	log_test $? 0 "IPv4 xfrm policy with xfrm device"
+	run_cmd_host1 ip vrf exec ${VRF} ${ping6} -c1 -w1 ${XFRM2_6}
+	log_test $? 0 "IPv6 xfrm policy with xfrm device"
+	cleanup_xfrm_dev
+}
+
+################################################################################
+# usage
+
+usage()
+{
+        cat <<EOF
+usage: ${0##*/} OPTS
+
+        -p          Pause on fail
+        -v          verbose mode (show commands and output)
+
+done
+EOF
+}
+
+################################################################################
+# main
+
+while getopts :pv o
+do
+	case $o in
+		p) PAUSE_ON_FAIL=yes;;
+		v) VERBOSE=$(($VERBOSE + 1));;
+		h) usage; exit 0;;
+		*) usage; exit 1;;
+	esac
+done
+
+cleanup 2>/dev/null
+setup
+
+echo
+echo "No qdisc on VRF device"
+run_tests
+
+run_cmd_host1 tc qdisc add dev ${VRF} root netem delay 100ms
+echo
+echo "netem qdisc on VRF device"
+run_tests
+
+printf "\nTests passed: %3d\n" ${nsuccess}
+printf "Tests failed: %3d\n"   ${nfail}
+
+exit $ret
-- 
cgit v1.2.3-59-g8ed1b


From 58e64a312c8468f3e8adead24b71ebb3039b381e Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Wed, 22 Apr 2020 10:11:35 +0800
Subject: macvlan: silence RCU list debugging warning

macvlan_hash_lookup() uses list_for_each_entry_rcu() for traversing
should either under RCU in fast path or the protection of rtnl_mutex.

In the case of holding RTNL, we should add the corresponding lockdep
expression to silence the following false-positive warning:

=============================
WARNING: suspicious RCU usage
5.7.0-rc1-next-20200416-00003-ga3b8d28bc #1 Not tainted
-----------------------------
drivers/net/macvlan.c:126 RCU-list traversed in non-reader section!!

Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvlan.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index e7289d67268f..654c1fa11826 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -123,7 +123,8 @@ static struct macvlan_dev *macvlan_hash_lookup(const struct macvlan_port *port,
 	struct macvlan_dev *vlan;
 	u32 idx = macvlan_eth_hash(addr);
 
-	hlist_for_each_entry_rcu(vlan, &port->vlan_hash[idx], hlist) {
+	hlist_for_each_entry_rcu(vlan, &port->vlan_hash[idx], hlist,
+				 lockdep_rtnl_is_held()) {
 		if (ether_addr_equal_64bits(vlan->dev->dev_addr, addr))
 			return vlan;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From c89f44ff10fd4cdcfbebf4854aa1282fdad8de5d Mon Sep 17 00:00:00 2001
From: "Chuah, Kim Tatt" <kim.tatt.chuah@intel.com>
Date: Wed, 22 Apr 2020 11:31:06 +0800
Subject: net: stmmac: Add support for VLAN promiscuous mode

For dwmac4, enable VLAN promiscuity when MAC controller is requested to
enter promiscuous mode.

Signed-off-by: Chuah, Kim Tatt <kim.tatt.chuah@intel.com>
Signed-off-by: Ong Boon Leong <boon.leong.ong@intel.com>
Signed-off-by: Tan, Tee Min <tee.min.tan@intel.com>
Signed-off-by: Wong Vee Khee <vee.khee.wong@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/common.h      |  1 +
 drivers/net/ethernet/stmicro/stmmac/dwmac4.h      |  1 +
 drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c | 67 +++++++++++++++++++++++
 3 files changed, 69 insertions(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/common.h b/drivers/net/ethernet/stmicro/stmmac/common.h
index 6208a68a331d..127f75862962 100644
--- a/drivers/net/ethernet/stmicro/stmmac/common.h
+++ b/drivers/net/ethernet/stmicro/stmmac/common.h
@@ -473,6 +473,7 @@ struct mac_device_info {
 	unsigned int xlgmac;
 	unsigned int num_vlan;
 	u32 vlan_filter[32];
+	unsigned int promisc;
 };
 
 struct stmmac_rx_routing {
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
index 28cac28253b8..61f3249bd724 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4.h
@@ -90,6 +90,7 @@
 #define GMAC_VLAN_CSVL			BIT(19)
 #define GMAC_VLAN_VLC			GENMASK(17, 16)
 #define GMAC_VLAN_VLC_SHIFT		16
+#define GMAC_VLAN_VLHT			GENMASK(15, 0)
 
 /* MAC VLAN Tag */
 #define GMAC_VLAN_TAG_VID		GENMASK(15, 0)
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
index 39692d15d80c..ecd834e0e121 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac4_core.c
@@ -450,6 +450,12 @@ static int dwmac4_add_hw_vlan_rx_fltr(struct net_device *dev,
 	if (vid > 4095)
 		return -EINVAL;
 
+	if (hw->promisc) {
+		netdev_err(dev,
+			   "Adding VLAN in promisc mode not supported\n");
+		return -EPERM;
+	}
+
 	/* Single Rx VLAN Filter */
 	if (hw->num_vlan == 1) {
 		/* For single VLAN filter, VID 0 means VLAN promiscuous */
@@ -499,6 +505,12 @@ static int dwmac4_del_hw_vlan_rx_fltr(struct net_device *dev,
 {
 	int i, ret = 0;
 
+	if (hw->promisc) {
+		netdev_err(dev,
+			   "Deleting VLAN in promisc mode not supported\n");
+		return -EPERM;
+	}
+
 	/* Single Rx VLAN Filter */
 	if (hw->num_vlan == 1) {
 		if ((hw->vlan_filter[0] & GMAC_VLAN_TAG_VID) == vid) {
@@ -523,9 +535,45 @@ static int dwmac4_del_hw_vlan_rx_fltr(struct net_device *dev,
 	return ret;
 }
 
+static void dwmac4_vlan_promisc_enable(struct net_device *dev,
+				       struct mac_device_info *hw)
+{
+	void __iomem *ioaddr = hw->pcsr;
+	u32 value;
+	u32 hash;
+	u32 val;
+	int i;
+
+	/* Single Rx VLAN Filter */
+	if (hw->num_vlan == 1) {
+		dwmac4_write_single_vlan(dev, 0);
+		return;
+	}
+
+	/* Extended Rx VLAN Filter Enable */
+	for (i = 0; i < hw->num_vlan; i++) {
+		if (hw->vlan_filter[i] & GMAC_VLAN_TAG_DATA_VEN) {
+			val = hw->vlan_filter[i] & ~GMAC_VLAN_TAG_DATA_VEN;
+			dwmac4_write_vlan_filter(dev, hw, i, val);
+		}
+	}
+
+	hash = readl(ioaddr + GMAC_VLAN_HASH_TABLE);
+	if (hash & GMAC_VLAN_VLHT) {
+		value = readl(ioaddr + GMAC_VLAN_TAG);
+		if (value & GMAC_VLAN_VTHM) {
+			value &= ~GMAC_VLAN_VTHM;
+			writel(value, ioaddr + GMAC_VLAN_TAG);
+		}
+	}
+}
+
 static void dwmac4_restore_hw_vlan_rx_fltr(struct net_device *dev,
 					   struct mac_device_info *hw)
 {
+	void __iomem *ioaddr = hw->pcsr;
+	u32 value;
+	u32 hash;
 	u32 val;
 	int i;
 
@@ -542,6 +590,13 @@ static void dwmac4_restore_hw_vlan_rx_fltr(struct net_device *dev,
 			dwmac4_write_vlan_filter(dev, hw, i, val);
 		}
 	}
+
+	hash = readl(ioaddr + GMAC_VLAN_HASH_TABLE);
+	if (hash & GMAC_VLAN_VLHT) {
+		value = readl(ioaddr + GMAC_VLAN_TAG);
+		value |= GMAC_VLAN_VTHM;
+		writel(value, ioaddr + GMAC_VLAN_TAG);
+	}
 }
 
 static void dwmac4_set_filter(struct mac_device_info *hw,
@@ -624,6 +679,18 @@ static void dwmac4_set_filter(struct mac_device_info *hw,
 		value |= GMAC_PACKET_FILTER_VTFE;
 
 	writel(value, ioaddr + GMAC_PACKET_FILTER);
+
+	if (dev->flags & IFF_PROMISC) {
+		if (!hw->promisc) {
+			hw->promisc = 1;
+			dwmac4_vlan_promisc_enable(dev, hw);
+		}
+	} else {
+		if (hw->promisc) {
+			hw->promisc = 0;
+			dwmac4_restore_hw_vlan_rx_fltr(dev, hw);
+		}
+	}
 }
 
 static void dwmac4_flow_ctrl(struct mac_device_info *hw, unsigned int duplex,
-- 
cgit v1.2.3-59-g8ed1b


From 8518307dc2b2a97b235c74920b45020f9bebe33e Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Wed, 22 Apr 2020 15:16:36 +0800
Subject: net: caif: use true,false for bool variables

Fix the following coccicheck warning:

net/caif/caif_dev.c:410:2-13: WARNING: Assignment of 0/1 to bool
variable
net/caif/caif_dev.c:445:2-13: WARNING: Assignment of 0/1 to bool
variable
net/caif/caif_dev.c:145:1-12: WARNING: Assignment of 0/1 to bool
variable
net/caif/caif_dev.c:223:1-12: WARNING: Assignment of 0/1 to bool
variable

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/caif_dev.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/caif/caif_dev.c b/net/caif/caif_dev.c
index 195d2d67be8a..c10e5a55758d 100644
--- a/net/caif/caif_dev.c
+++ b/net/caif/caif_dev.c
@@ -142,7 +142,7 @@ static void caif_flow_cb(struct sk_buff *skb)
 
 	spin_lock_bh(&caifd->flow_lock);
 	send_xoff = caifd->xoff;
-	caifd->xoff = 0;
+	caifd->xoff = false;
 	dtor = caifd->xoff_skb_dtor;
 
 	if (WARN_ON(caifd->xoff_skb != skb))
@@ -220,7 +220,7 @@ static int transmit(struct cflayer *layer, struct cfpkt *pkt)
 	pr_debug("queue has stopped(%d) or is full (%d > %d)\n",
 			netif_queue_stopped(caifd->netdev),
 			qlen, high);
-	caifd->xoff = 1;
+	caifd->xoff = true;
 	caifd->xoff_skb = skb;
 	caifd->xoff_skb_dtor = skb->destructor;
 	skb->destructor = caif_flow_cb;
@@ -407,7 +407,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
 			break;
 		}
 
-		caifd->xoff = 0;
+		caifd->xoff = false;
 		cfcnfg_set_phy_state(cfg, &caifd->layer, true);
 		rcu_read_unlock();
 
@@ -442,7 +442,7 @@ static int caif_device_notify(struct notifier_block *me, unsigned long what,
 		if (caifd->xoff_skb_dtor != NULL && caifd->xoff_skb != NULL)
 			caifd->xoff_skb->destructor = caifd->xoff_skb_dtor;
 
-		caifd->xoff = 0;
+		caifd->xoff = false;
 		caifd->xoff_skb_dtor = NULL;
 		caifd->xoff_skb = NULL;
 
-- 
cgit v1.2.3-59-g8ed1b


From bcf3440c6dd78bfe5836ec0990fe36d7b4bb7d20 Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Wed, 22 Apr 2020 09:21:37 +0200
Subject: net: phy: micrel: add phy-mode support for the KSZ9031 PHY

Add support for following phy-modes: rgmii, rgmii-id, rgmii-txid, rgmii-rxid.

This PHY has an internal RX delay of 1.2ns and no delay for TX.

The pad skew registers allow to set the total TX delay to max 1.38ns and
the total RX delay to max of 2.58ns (configurable 1.38ns + build in
1.2ns) and a minimal delay of 0ns.

According to the RGMII v1.3 specification the delay provided by PCB traces
should be between 1.5ns and 2.0ns. The RGMII v2.0 allows to provide this
delay by MAC or PHY. So, we configure this PHY to the best values we can
get by this HW: TX delay to 1.38ns (max supported value) and RX delay to
1.80ns (best calculated delay)

The phy-modes can still be fine tuned/overwritten by *-skew-ps
device tree properties described in:
Documentation/devicetree/bindings/net/micrel-ksz90x1.txt

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Reviewed-by: Philippe Schenker <philippe.schenker@toradex.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/micrel.c | 128 +++++++++++++++++++++++++++++++++++++++++++++--
 1 file changed, 123 insertions(+), 5 deletions(-)

diff --git a/drivers/net/phy/micrel.c b/drivers/net/phy/micrel.c
index 3a4d83fa52dc..3fe552675dd2 100644
--- a/drivers/net/phy/micrel.c
+++ b/drivers/net/phy/micrel.c
@@ -19,6 +19,7 @@
  *			 ksz9477
  */
 
+#include <linux/bitfield.h>
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/phy.h>
@@ -490,9 +491,50 @@ static int ksz9021_config_init(struct phy_device *phydev)
 
 /* MMD Address 0x2 */
 #define MII_KSZ9031RN_CONTROL_PAD_SKEW	4
+#define MII_KSZ9031RN_RX_CTL_M		GENMASK(7, 4)
+#define MII_KSZ9031RN_TX_CTL_M		GENMASK(3, 0)
+
 #define MII_KSZ9031RN_RX_DATA_PAD_SKEW	5
+#define MII_KSZ9031RN_RXD3		GENMASK(15, 12)
+#define MII_KSZ9031RN_RXD2		GENMASK(11, 8)
+#define MII_KSZ9031RN_RXD1		GENMASK(7, 4)
+#define MII_KSZ9031RN_RXD0		GENMASK(3, 0)
+
 #define MII_KSZ9031RN_TX_DATA_PAD_SKEW	6
+#define MII_KSZ9031RN_TXD3		GENMASK(15, 12)
+#define MII_KSZ9031RN_TXD2		GENMASK(11, 8)
+#define MII_KSZ9031RN_TXD1		GENMASK(7, 4)
+#define MII_KSZ9031RN_TXD0		GENMASK(3, 0)
+
 #define MII_KSZ9031RN_CLK_PAD_SKEW	8
+#define MII_KSZ9031RN_GTX_CLK		GENMASK(9, 5)
+#define MII_KSZ9031RN_RX_CLK		GENMASK(4, 0)
+
+/* KSZ9031 has internal RGMII_IDRX = 1.2ns and RGMII_IDTX = 0ns. To
+ * provide different RGMII options we need to configure delay offset
+ * for each pad relative to build in delay.
+ */
+/* keep rx as "No delay adjustment" and set rx_clk to +0.60ns to get delays of
+ * 1.80ns
+ */
+#define RX_ID				0x7
+#define RX_CLK_ID			0x19
+
+/* set rx to +0.30ns and rx_clk to -0.90ns to compensate the
+ * internal 1.2ns delay.
+ */
+#define RX_ND				0xc
+#define RX_CLK_ND			0x0
+
+/* set tx to -0.42ns and tx_clk to +0.96ns to get 1.38ns delay */
+#define TX_ID				0x0
+#define TX_CLK_ID			0x1f
+
+/* set tx and tx_clk to "No delay adjustment" to keep 0ns
+ * dealy
+ */
+#define TX_ND				0x7
+#define TX_CLK_ND			0xf
 
 /* MMD Address 0x1C */
 #define MII_KSZ9031RN_EDPD		0x23
@@ -501,7 +543,8 @@ static int ksz9021_config_init(struct phy_device *phydev)
 static int ksz9031_of_load_skew_values(struct phy_device *phydev,
 				       const struct device_node *of_node,
 				       u16 reg, size_t field_sz,
-				       const char *field[], u8 numfields)
+				       const char *field[], u8 numfields,
+				       bool *update)
 {
 	int val[4] = {-1, -2, -3, -4};
 	int matches = 0;
@@ -517,6 +560,8 @@ static int ksz9031_of_load_skew_values(struct phy_device *phydev,
 	if (!matches)
 		return 0;
 
+	*update |= true;
+
 	if (matches < numfields)
 		newval = phy_read_mmd(phydev, 2, reg);
 	else
@@ -565,6 +610,67 @@ static int ksz9031_enable_edpd(struct phy_device *phydev)
 			     reg | MII_KSZ9031RN_EDPD_ENABLE);
 }
 
+static int ksz9031_config_rgmii_delay(struct phy_device *phydev)
+{
+	u16 rx, tx, rx_clk, tx_clk;
+	int ret;
+
+	switch (phydev->interface) {
+	case PHY_INTERFACE_MODE_RGMII:
+		tx = TX_ND;
+		tx_clk = TX_CLK_ND;
+		rx = RX_ND;
+		rx_clk = RX_CLK_ND;
+		break;
+	case PHY_INTERFACE_MODE_RGMII_ID:
+		tx = TX_ID;
+		tx_clk = TX_CLK_ID;
+		rx = RX_ID;
+		rx_clk = RX_CLK_ID;
+		break;
+	case PHY_INTERFACE_MODE_RGMII_RXID:
+		tx = TX_ND;
+		tx_clk = TX_CLK_ND;
+		rx = RX_ID;
+		rx_clk = RX_CLK_ID;
+		break;
+	case PHY_INTERFACE_MODE_RGMII_TXID:
+		tx = TX_ID;
+		tx_clk = TX_CLK_ID;
+		rx = RX_ND;
+		rx_clk = RX_CLK_ND;
+		break;
+	default:
+		return 0;
+	}
+
+	ret = phy_write_mmd(phydev, 2, MII_KSZ9031RN_CONTROL_PAD_SKEW,
+			    FIELD_PREP(MII_KSZ9031RN_RX_CTL_M, rx) |
+			    FIELD_PREP(MII_KSZ9031RN_TX_CTL_M, tx));
+	if (ret < 0)
+		return ret;
+
+	ret = phy_write_mmd(phydev, 2, MII_KSZ9031RN_RX_DATA_PAD_SKEW,
+			    FIELD_PREP(MII_KSZ9031RN_RXD3, rx) |
+			    FIELD_PREP(MII_KSZ9031RN_RXD2, rx) |
+			    FIELD_PREP(MII_KSZ9031RN_RXD1, rx) |
+			    FIELD_PREP(MII_KSZ9031RN_RXD0, rx));
+	if (ret < 0)
+		return ret;
+
+	ret = phy_write_mmd(phydev, 2, MII_KSZ9031RN_TX_DATA_PAD_SKEW,
+			    FIELD_PREP(MII_KSZ9031RN_TXD3, tx) |
+			    FIELD_PREP(MII_KSZ9031RN_TXD2, tx) |
+			    FIELD_PREP(MII_KSZ9031RN_TXD1, tx) |
+			    FIELD_PREP(MII_KSZ9031RN_TXD0, tx));
+	if (ret < 0)
+		return ret;
+
+	return phy_write_mmd(phydev, 2, MII_KSZ9031RN_CLK_PAD_SKEW,
+			     FIELD_PREP(MII_KSZ9031RN_GTX_CLK, tx_clk) |
+			     FIELD_PREP(MII_KSZ9031RN_RX_CLK, rx_clk));
+}
+
 static int ksz9031_config_init(struct phy_device *phydev)
 {
 	const struct device *dev = &phydev->mdio.dev;
@@ -597,21 +703,33 @@ static int ksz9031_config_init(struct phy_device *phydev)
 	} while (!of_node && dev_walker);
 
 	if (of_node) {
+		bool update = false;
+
+		if (phy_interface_is_rgmii(phydev)) {
+			result = ksz9031_config_rgmii_delay(phydev);
+			if (result < 0)
+				return result;
+		}
+
 		ksz9031_of_load_skew_values(phydev, of_node,
 				MII_KSZ9031RN_CLK_PAD_SKEW, 5,
-				clk_skews, 2);
+				clk_skews, 2, &update);
 
 		ksz9031_of_load_skew_values(phydev, of_node,
 				MII_KSZ9031RN_CONTROL_PAD_SKEW, 4,
-				control_skews, 2);
+				control_skews, 2, &update);
 
 		ksz9031_of_load_skew_values(phydev, of_node,
 				MII_KSZ9031RN_RX_DATA_PAD_SKEW, 4,
-				rx_data_skews, 4);
+				rx_data_skews, 4, &update);
 
 		ksz9031_of_load_skew_values(phydev, of_node,
 				MII_KSZ9031RN_TX_DATA_PAD_SKEW, 4,
-				tx_data_skews, 4);
+				tx_data_skews, 4, &update);
+
+		if (update && phydev->interface != PHY_INTERFACE_MODE_RGMII)
+			phydev_warn(phydev,
+				    "*-skew-ps values should be used only with phy-mode = \"rgmii\"\n");
 
 		/* Silicon Errata Sheet (DS80000691D or DS80000692D):
 		 * When the device links in the 1000BASE-T slave mode only,
-- 
cgit v1.2.3-59-g8ed1b


From d0f0c55e7c4ca7c66877064d7c1f4795025e88f8 Mon Sep 17 00:00:00 2001
From: Tang Bin <tangbin@cmss.chinamobile.com>
Date: Wed, 22 Apr 2020 16:15:42 +0800
Subject: net: phy: Use IS_ERR() to check and simplify code

Use IS_ERR() and PTR_ERR() instead of PTR_ZRR_OR_ZERO()
to simplify code, avoid redundant paramenter definitions
and judgements.

Signed-off-by: Zhang Shengju <zhangshengju@cmss.chinamobile.com>
Signed-off-by: Tang Bin <tangbin@cmss.chinamobile.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio_bus.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index 26b00af94573..3e79b96fa344 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -42,14 +42,11 @@
 
 static int mdiobus_register_gpiod(struct mdio_device *mdiodev)
 {
-	int error;
-
 	/* Deassert the optional reset signal */
 	mdiodev->reset_gpio = gpiod_get_optional(&mdiodev->dev,
 						 "reset", GPIOD_OUT_LOW);
-	error = PTR_ERR_OR_ZERO(mdiodev->reset_gpio);
-	if (error)
-		return error;
+	if (IS_ERR(mdiodev->reset_gpio))
+		return PTR_ERR(mdiodev->reset_gpio);
 
 	if (mdiodev->reset_gpio)
 		gpiod_set_consumer_name(mdiodev->reset_gpio, "PHY reset");
-- 
cgit v1.2.3-59-g8ed1b


From d9cc193cf0bf471630e203c369e60ab3735dd621 Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Wed, 22 Apr 2020 11:24:53 +0200
Subject: dt-bindings: net: phy: Add support for NXP TJA11xx

Document the NXP TJA11xx PHY bindings.

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../devicetree/bindings/net/nxp,tja11xx.yaml       | 61 ++++++++++++++++++++++
 1 file changed, 61 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/net/nxp,tja11xx.yaml

diff --git a/Documentation/devicetree/bindings/net/nxp,tja11xx.yaml b/Documentation/devicetree/bindings/net/nxp,tja11xx.yaml
new file mode 100644
index 000000000000..42be0255512b
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/nxp,tja11xx.yaml
@@ -0,0 +1,61 @@
+# SPDX-License-Identifier: GPL-2.0+
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/nxp,tja11xx.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: NXP TJA11xx PHY
+
+maintainers:
+  - Andrew Lunn <andrew@lunn.ch>
+  - Florian Fainelli <f.fainelli@gmail.com>
+  - Heiner Kallweit <hkallweit1@gmail.com>
+
+description:
+  Bindings for NXP TJA11xx automotive PHYs
+
+allOf:
+  - $ref: ethernet-phy.yaml#
+
+patternProperties:
+  "^ethernet-phy@[0-9a-f]+$":
+    type: object
+    description: |
+      Some packages have multiple PHYs. Secondary PHY should be defines as
+      subnode of the first (parent) PHY.
+
+    properties:
+      reg:
+        minimum: 0
+        maximum: 31
+        description:
+          The ID number for the child PHY. Should be +1 of parent PHY.
+
+    required:
+      - reg
+
+examples:
+  - |
+    mdio {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        tja1101_phy0: ethernet-phy@4 {
+            reg = <0x4>;
+        };
+    };
+  - |
+    mdio {
+        #address-cells = <1>;
+        #size-cells = <0>;
+
+        tja1102_phy0: ethernet-phy@4 {
+            reg = <0x4>;
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            tja1102_phy1: ethernet-phy@5 {
+                reg = <0x5>;
+            };
+        };
+    };
-- 
cgit v1.2.3-59-g8ed1b


From 8f469506de2ad5528dedbab4d9ba34838991d5d1 Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Wed, 22 Apr 2020 11:24:54 +0200
Subject: net: phy: tja11xx: add initial TJA1102 support

TJA1102 is an dual T1 PHY chip. Both PHYs are separately addressable.
Both PHYs are similar but have different amount of functionality. For
example PHY 1 has no PHY ID and no health monitor.

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/nxp-tja11xx.c | 91 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 91 insertions(+)

diff --git a/drivers/net/phy/nxp-tja11xx.c b/drivers/net/phy/nxp-tja11xx.c
index 47caae770ffc..971286f5e5b0 100644
--- a/drivers/net/phy/nxp-tja11xx.c
+++ b/drivers/net/phy/nxp-tja11xx.c
@@ -15,6 +15,7 @@
 #define PHY_ID_MASK			0xfffffff0
 #define PHY_ID_TJA1100			0x0180dc40
 #define PHY_ID_TJA1101			0x0180dd00
+#define PHY_ID_TJA1102			0x0180dc80
 
 #define MII_ECTRL			17
 #define MII_ECTRL_LINK_CONTROL		BIT(15)
@@ -40,6 +41,10 @@
 #define MII_INTSRC_TEMP_ERR		BIT(1)
 #define MII_INTSRC_UV_ERR		BIT(3)
 
+#define MII_INTEN			22
+#define MII_INTEN_LINK_FAIL		BIT(10)
+#define MII_INTEN_LINK_UP		BIT(9)
+
 #define MII_COMMSTAT			23
 #define MII_COMMSTAT_LINK_UP		BIT(15)
 
@@ -180,6 +185,7 @@ static int tja11xx_config_init(struct phy_device *phydev)
 			return ret;
 		break;
 	case PHY_ID_TJA1101:
+	case PHY_ID_TJA1102:
 		ret = phy_set_bits(phydev, MII_COMMCFG, MII_COMMCFG_AUTO_OP);
 		if (ret)
 			return ret;
@@ -344,6 +350,55 @@ static int tja11xx_probe(struct phy_device *phydev)
 	return PTR_ERR_OR_ZERO(priv->hwmon_dev);
 }
 
+static int tja1102_match_phy_device(struct phy_device *phydev, bool port0)
+{
+	int ret;
+
+	if ((phydev->phy_id & PHY_ID_MASK) != PHY_ID_TJA1102)
+		return 0;
+
+	ret = phy_read(phydev, MII_PHYSID2);
+	if (ret < 0)
+		return ret;
+
+	/* TJA1102 Port 1 has phyid 0 and doesn't support temperature
+	 * and undervoltage alarms.
+	 */
+	if (port0)
+		return ret ? 1 : 0;
+
+	return !ret;
+}
+
+static int tja1102_p0_match_phy_device(struct phy_device *phydev)
+{
+	return tja1102_match_phy_device(phydev, true);
+}
+
+static int tja1102_p1_match_phy_device(struct phy_device *phydev)
+{
+	return tja1102_match_phy_device(phydev, false);
+}
+
+static int tja11xx_ack_interrupt(struct phy_device *phydev)
+{
+	int ret;
+
+	ret = phy_read(phydev, MII_INTSRC);
+
+	return (ret < 0) ? ret : 0;
+}
+
+static int tja11xx_config_intr(struct phy_device *phydev)
+{
+	int value = 0;
+
+	if (phydev->interrupts == PHY_INTERRUPT_ENABLED)
+		value = MII_INTEN_LINK_FAIL | MII_INTEN_LINK_UP;
+
+	return phy_write(phydev, MII_INTEN, value);
+}
+
 static struct phy_driver tja11xx_driver[] = {
 	{
 		PHY_ID_MATCH_MODEL(PHY_ID_TJA1100),
@@ -375,6 +430,41 @@ static struct phy_driver tja11xx_driver[] = {
 		.get_sset_count = tja11xx_get_sset_count,
 		.get_strings	= tja11xx_get_strings,
 		.get_stats	= tja11xx_get_stats,
+	}, {
+		.name		= "NXP TJA1102 Port 0",
+		.features       = PHY_BASIC_T1_FEATURES,
+		.probe		= tja11xx_probe,
+		.soft_reset	= tja11xx_soft_reset,
+		.config_init	= tja11xx_config_init,
+		.read_status	= tja11xx_read_status,
+		.match_phy_device = tja1102_p0_match_phy_device,
+		.suspend	= genphy_suspend,
+		.resume		= genphy_resume,
+		.set_loopback   = genphy_loopback,
+		/* Statistics */
+		.get_sset_count = tja11xx_get_sset_count,
+		.get_strings	= tja11xx_get_strings,
+		.get_stats	= tja11xx_get_stats,
+		.ack_interrupt	= tja11xx_ack_interrupt,
+		.config_intr	= tja11xx_config_intr,
+
+	}, {
+		.name		= "NXP TJA1102 Port 1",
+		.features       = PHY_BASIC_T1_FEATURES,
+		/* currently no probe for Port 1 is need */
+		.soft_reset	= tja11xx_soft_reset,
+		.config_init	= tja11xx_config_init,
+		.read_status	= tja11xx_read_status,
+		.match_phy_device = tja1102_p1_match_phy_device,
+		.suspend	= genphy_suspend,
+		.resume		= genphy_resume,
+		.set_loopback   = genphy_loopback,
+		/* Statistics */
+		.get_sset_count = tja11xx_get_sset_count,
+		.get_strings	= tja11xx_get_strings,
+		.get_stats	= tja11xx_get_stats,
+		.ack_interrupt	= tja11xx_ack_interrupt,
+		.config_intr	= tja11xx_config_intr,
 	}
 };
 
@@ -383,6 +473,7 @@ module_phy_driver(tja11xx_driver);
 static struct mdio_device_id __maybe_unused tja11xx_tbl[] = {
 	{ PHY_ID_MATCH_MODEL(PHY_ID_TJA1100) },
 	{ PHY_ID_MATCH_MODEL(PHY_ID_TJA1101) },
+	{ PHY_ID_MATCH_MODEL(PHY_ID_TJA1102) },
 	{ }
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From 5972157c2dde11698d7bcfc55621107d97121c87 Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Wed, 22 Apr 2020 11:24:55 +0200
Subject: net: mdio: of: export part of of_mdiobus_register_phy()

This function will be needed in tja11xx driver for secondary PHY
support.

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/of/of_mdio.c    | 73 ++++++++++++++++++++++++++++---------------------
 include/linux/of_mdio.h | 11 +++++++-
 2 files changed, 52 insertions(+), 32 deletions(-)

diff --git a/drivers/of/of_mdio.c b/drivers/of/of_mdio.c
index 9f982c0627a0..a04afe79529c 100644
--- a/drivers/of/of_mdio.c
+++ b/drivers/of/of_mdio.c
@@ -60,39 +60,15 @@ static struct mii_timestamper *of_find_mii_timestamper(struct device_node *node)
 	return register_mii_timestamper(arg.np, arg.args[0]);
 }
 
-static int of_mdiobus_register_phy(struct mii_bus *mdio,
-				    struct device_node *child, u32 addr)
+int of_mdiobus_phy_device_register(struct mii_bus *mdio, struct phy_device *phy,
+			      struct device_node *child, u32 addr)
 {
-	struct mii_timestamper *mii_ts;
-	struct phy_device *phy;
-	bool is_c45;
 	int rc;
-	u32 phy_id;
-
-	mii_ts = of_find_mii_timestamper(child);
-	if (IS_ERR(mii_ts))
-		return PTR_ERR(mii_ts);
-
-	is_c45 = of_device_is_compatible(child,
-					 "ethernet-phy-ieee802.3-c45");
-
-	if (!is_c45 && !of_get_phy_id(child, &phy_id))
-		phy = phy_device_create(mdio, addr, phy_id, 0, NULL);
-	else
-		phy = get_phy_device(mdio, addr, is_c45);
-	if (IS_ERR(phy)) {
-		if (mii_ts)
-			unregister_mii_timestamper(mii_ts);
-		return PTR_ERR(phy);
-	}
 
 	rc = of_irq_get(child, 0);
-	if (rc == -EPROBE_DEFER) {
-		if (mii_ts)
-			unregister_mii_timestamper(mii_ts);
-		phy_device_free(phy);
+	if (rc == -EPROBE_DEFER)
 		return rc;
-	}
+
 	if (rc > 0) {
 		phy->irq = rc;
 		mdio->irq[addr] = rc;
@@ -117,11 +93,48 @@ static int of_mdiobus_register_phy(struct mii_bus *mdio,
 	/* All data is now stored in the phy struct;
 	 * register it */
 	rc = phy_device_register(phy);
+	if (rc) {
+		of_node_put(child);
+		return rc;
+	}
+
+	dev_dbg(&mdio->dev, "registered phy %pOFn at address %i\n",
+		child, addr);
+	return 0;
+}
+EXPORT_SYMBOL(of_mdiobus_phy_device_register);
+
+static int of_mdiobus_register_phy(struct mii_bus *mdio,
+				    struct device_node *child, u32 addr)
+{
+	struct mii_timestamper *mii_ts;
+	struct phy_device *phy;
+	bool is_c45;
+	int rc;
+	u32 phy_id;
+
+	mii_ts = of_find_mii_timestamper(child);
+	if (IS_ERR(mii_ts))
+		return PTR_ERR(mii_ts);
+
+	is_c45 = of_device_is_compatible(child,
+					 "ethernet-phy-ieee802.3-c45");
+
+	if (!is_c45 && !of_get_phy_id(child, &phy_id))
+		phy = phy_device_create(mdio, addr, phy_id, 0, NULL);
+	else
+		phy = get_phy_device(mdio, addr, is_c45);
+	if (IS_ERR(phy)) {
+		if (mii_ts)
+			unregister_mii_timestamper(mii_ts);
+		return PTR_ERR(phy);
+	}
+
+	rc = of_mdiobus_phy_device_register(mdio, phy, child, addr);
 	if (rc) {
 		if (mii_ts)
 			unregister_mii_timestamper(mii_ts);
 		phy_device_free(phy);
-		of_node_put(child);
 		return rc;
 	}
 
@@ -132,8 +145,6 @@ static int of_mdiobus_register_phy(struct mii_bus *mdio,
 	if (mii_ts)
 		phy->mii_ts = mii_ts;
 
-	dev_dbg(&mdio->dev, "registered phy %pOFn at address %i\n",
-		child, addr);
 	return 0;
 }
 
diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h
index 491a2b7e77c1..0f61a4ac6bcf 100644
--- a/include/linux/of_mdio.h
+++ b/include/linux/of_mdio.h
@@ -30,7 +30,9 @@ extern struct mii_bus *of_mdio_find_bus(struct device_node *mdio_np);
 extern int of_phy_register_fixed_link(struct device_node *np);
 extern void of_phy_deregister_fixed_link(struct device_node *np);
 extern bool of_phy_is_fixed_link(struct device_node *np);
-
+extern int of_mdiobus_phy_device_register(struct mii_bus *mdio,
+				     struct phy_device *phy,
+				     struct device_node *child, u32 addr);
 
 static inline int of_mdio_parse_addr(struct device *dev,
 				     const struct device_node *np)
@@ -118,6 +120,13 @@ static inline bool of_phy_is_fixed_link(struct device_node *np)
 {
 	return false;
 }
+
+static inline int of_mdiobus_phy_device_register(struct mii_bus *mdio,
+					    struct phy_device *phy,
+					    struct device_node *child, u32 addr)
+{
+	return -ENOSYS;
+}
 #endif
 
 
-- 
cgit v1.2.3-59-g8ed1b


From 6a64d3cdc5ef89b2ac629701de5ffb3df1fb7937 Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Wed, 22 Apr 2020 11:24:56 +0200
Subject: net: phy: tja11xx: add delayed registration of TJA1102 PHY1

TJA1102 is a dual PHY package with PHY0 having proper PHYID and PHY1
having no ID. On one hand it is possible to for PHY detection by
compatible, on other hand we should be able to reset complete chip
before PHY1 configured it, and we need to define dependencies for proper
power management.

We can solve it by defining PHY1 as child of PHY0:
	tja1102_phy0: ethernet-phy@4 {
		reg = <0x4>;

		interrupts-extended = <&gpio5 8 IRQ_TYPE_LEVEL_LOW>;

		reset-gpios = <&gpio5 9 GPIO_ACTIVE_LOW>;
		reset-assert-us = <20>;
		reset-deassert-us = <2000>;

		tja1102_phy1: ethernet-phy@5 {
			reg = <0x5>;

			interrupts-extended = <&gpio5 8 IRQ_TYPE_LEVEL_LOW>;
		};
	};

The PHY1 should be a subnode of PHY0 and registered only after PHY0 was
completely reset and initialized.

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/nxp-tja11xx.c | 112 +++++++++++++++++++++++++++++++++++++++---
 1 file changed, 105 insertions(+), 7 deletions(-)

diff --git a/drivers/net/phy/nxp-tja11xx.c b/drivers/net/phy/nxp-tja11xx.c
index 971286f5e5b0..cc766b2d4136 100644
--- a/drivers/net/phy/nxp-tja11xx.c
+++ b/drivers/net/phy/nxp-tja11xx.c
@@ -6,11 +6,14 @@
 #include <linux/delay.h>
 #include <linux/ethtool.h>
 #include <linux/kernel.h>
+#include <linux/mdio.h>
 #include <linux/mii.h>
 #include <linux/module.h>
 #include <linux/phy.h>
 #include <linux/hwmon.h>
 #include <linux/bitfield.h>
+#include <linux/of_mdio.h>
+#include <linux/of_irq.h>
 
 #define PHY_ID_MASK			0xfffffff0
 #define PHY_ID_TJA1100			0x0180dc40
@@ -57,6 +60,8 @@
 struct tja11xx_priv {
 	char		*hwmon_name;
 	struct device	*hwmon_dev;
+	struct phy_device *phydev;
+	struct work_struct phy_register_work;
 };
 
 struct tja11xx_phy_stats {
@@ -323,16 +328,12 @@ static const struct hwmon_chip_info tja11xx_hwmon_chip_info = {
 	.info		= tja11xx_hwmon_info,
 };
 
-static int tja11xx_probe(struct phy_device *phydev)
+static int tja11xx_hwmon_register(struct phy_device *phydev,
+				  struct tja11xx_priv *priv)
 {
 	struct device *dev = &phydev->mdio.dev;
-	struct tja11xx_priv *priv;
 	int i;
 
-	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
-	if (!priv)
-		return -ENOMEM;
-
 	priv->hwmon_name = devm_kstrdup(dev, dev_name(dev), GFP_KERNEL);
 	if (!priv->hwmon_name)
 		return -ENOMEM;
@@ -350,6 +351,103 @@ static int tja11xx_probe(struct phy_device *phydev)
 	return PTR_ERR_OR_ZERO(priv->hwmon_dev);
 }
 
+static int tja11xx_probe(struct phy_device *phydev)
+{
+	struct device *dev = &phydev->mdio.dev;
+	struct tja11xx_priv *priv;
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->phydev = phydev;
+
+	return tja11xx_hwmon_register(phydev, priv);
+}
+
+static void tja1102_p1_register(struct work_struct *work)
+{
+	struct tja11xx_priv *priv = container_of(work, struct tja11xx_priv,
+						 phy_register_work);
+	struct phy_device *phydev_phy0 = priv->phydev;
+	struct mii_bus *bus = phydev_phy0->mdio.bus;
+	struct device *dev = &phydev_phy0->mdio.dev;
+	struct device_node *np = dev->of_node;
+	struct device_node *child;
+	int ret;
+
+	for_each_available_child_of_node(np, child) {
+		struct phy_device *phy;
+		int addr;
+
+		addr = of_mdio_parse_addr(dev, child);
+		if (addr < 0) {
+			dev_err(dev, "Can't parse addr\n");
+			continue;
+		} else if (addr != phydev_phy0->mdio.addr + 1) {
+			/* Currently we care only about double PHY chip TJA1102.
+			 * If some day NXP will decide to bring chips with more
+			 * PHYs, this logic should be reworked.
+			 */
+			dev_err(dev, "Unexpected address. Should be: %i\n",
+				phydev_phy0->mdio.addr + 1);
+			continue;
+		}
+
+		if (mdiobus_is_registered_device(bus, addr)) {
+			dev_err(dev, "device is already registered\n");
+			continue;
+		}
+
+		/* Real PHY ID of Port 1 is 0 */
+		phy = phy_device_create(bus, addr, PHY_ID_TJA1102, false, NULL);
+		if (IS_ERR(phy)) {
+			dev_err(dev, "Can't create PHY device for Port 1: %i\n",
+				addr);
+			continue;
+		}
+
+		/* Overwrite parent device. phy_device_create() set parent to
+		 * the mii_bus->dev, which is not correct in case.
+		 */
+		phy->mdio.dev.parent = dev;
+
+		ret = of_mdiobus_phy_device_register(bus, phy, child, addr);
+		if (ret) {
+			/* All resources needed for Port 1 should be already
+			 * available for Port 0. Both ports use the same
+			 * interrupt line, so -EPROBE_DEFER would make no sense
+			 * here.
+			 */
+			dev_err(dev, "Can't register Port 1. Unexpected error: %i\n",
+				ret);
+			phy_device_free(phy);
+		}
+	}
+}
+
+static int tja1102_p0_probe(struct phy_device *phydev)
+{
+	struct device *dev = &phydev->mdio.dev;
+	struct tja11xx_priv *priv;
+	int ret;
+
+	priv = devm_kzalloc(dev, sizeof(*priv), GFP_KERNEL);
+	if (!priv)
+		return -ENOMEM;
+
+	priv->phydev = phydev;
+	INIT_WORK(&priv->phy_register_work, tja1102_p1_register);
+
+	ret = tja11xx_hwmon_register(phydev, priv);
+	if (ret)
+		return ret;
+
+	schedule_work(&priv->phy_register_work);
+
+	return 0;
+}
+
 static int tja1102_match_phy_device(struct phy_device *phydev, bool port0)
 {
 	int ret;
@@ -433,7 +531,7 @@ static struct phy_driver tja11xx_driver[] = {
 	}, {
 		.name		= "NXP TJA1102 Port 0",
 		.features       = PHY_BASIC_T1_FEATURES,
-		.probe		= tja11xx_probe,
+		.probe		= tja1102_p0_probe,
 		.soft_reset	= tja11xx_soft_reset,
 		.config_init	= tja11xx_config_init,
 		.read_status	= tja11xx_read_status,
-- 
cgit v1.2.3-59-g8ed1b


From 93e106da6a7514445c1e27fdbb6b9810f3df8452 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Wed, 22 Apr 2020 19:48:29 +0300
Subject: selftests: forwarding: pedit_dsfield: Add pedit munge ip6 dsfield

Extend the pedit_dsfield forwarding selftest with coverage of "pedit ex
munge ip6 dsfield set".

Signed-off-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../selftests/net/forwarding/pedit_dsfield.sh      | 66 ++++++++++++++++++++++
 1 file changed, 66 insertions(+)

diff --git a/tools/testing/selftests/net/forwarding/pedit_dsfield.sh b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh
index b50081855913..1181d647f6a7 100755
--- a/tools/testing/selftests/net/forwarding/pedit_dsfield.sh
+++ b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh
@@ -20,10 +20,14 @@
 
 ALL_TESTS="
 	ping_ipv4
+	ping_ipv6
 	test_ip_dsfield
 	test_ip_dscp
 	test_ip_ecn
 	test_ip_dscp_ecn
+	test_ip6_dsfield
+	test_ip6_dscp
+	test_ip6_ecn
 "
 
 NUM_NETIFS=4
@@ -107,6 +111,11 @@ ping_ipv4()
 	ping_test $h1 192.0.2.2
 }
 
+ping_ipv6()
+{
+	ping6_test $h1 2001:db8:1::2
+}
+
 do_test_pedit_dsfield_common()
 {
 	local pedit_locus=$1; shift
@@ -228,6 +237,63 @@ test_ip_dscp_ecn()
 	do_test_ip_dscp_ecn "dev $swp2 egress"
 }
 
+do_test_ip6_dsfield()
+{
+	local locus=$1; shift
+	local dsfield
+
+	for dsfield in 0 1 2 3 128 252 253 254 255; do
+		do_test_pedit_dsfield "$locus"				\
+				  "ip6 traffic_class set $dsfield"	\
+				  ipv6 "ip_tos $dsfield"		\
+				  "-6 -A 2001:db8:1::1 -B 2001:db8:1::2"
+	done
+}
+
+test_ip6_dsfield()
+{
+	do_test_ip6_dsfield "dev $swp1 ingress"
+	do_test_ip6_dsfield "dev $swp2 egress"
+}
+
+do_test_ip6_dscp()
+{
+	local locus=$1; shift
+	local dscp
+
+	for dscp in 0 1 2 3 32 61 62 63; do
+		do_test_pedit_dsfield "$locus"				       \
+			    "ip6 traffic_class set $((dscp << 2)) retain 0xfc" \
+			    ipv6 "ip_tos $(((dscp << 2) | 1))"		       \
+			    "-6 -A 2001:db8:1::1 -B 2001:db8:1::2"
+	done
+}
+
+test_ip6_dscp()
+{
+	do_test_ip6_dscp "dev $swp1 ingress"
+	do_test_ip6_dscp "dev $swp2 egress"
+}
+
+do_test_ip6_ecn()
+{
+	local locus=$1; shift
+	local ecn
+
+	for ecn in 0 1 2 3; do
+		do_test_pedit_dsfield "$locus"				\
+				"ip6 traffic_class set $ecn retain 0x3"	\
+				ipv6 "ip_tos $((124 | $ecn))"		\
+				"-6 -A 2001:db8:1::1 -B 2001:db8:1::2"
+	done
+}
+
+test_ip6_ecn()
+{
+	do_test_ip6_ecn "dev $swp1 ingress"
+	do_test_ip6_ecn "dev $swp2 egress"
+}
+
 trap cleanup EXIT
 
 setup_prepare
-- 
cgit v1.2.3-59-g8ed1b


From f132ccc56e35875655226915588cab63a16237ef Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Wed, 22 Apr 2020 19:48:30 +0300
Subject: selftests: tc-testing: Add a TDC test for pedit munge ip6 dsfield

Add a self-test for the IPv6 dsfield munge that iproute2 will support.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../tc-testing/tc-tests/actions/pedit.json         | 25 ++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json b/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json
index f8ea6f5fa8e9..72cdc3c800a5 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/actions/pedit.json
@@ -1471,6 +1471,31 @@
             "$TC actions flush action pedit"
         ]
     },
+    {
+        "id": "94bb",
+        "name": "Add pedit action with LAYERED_OP ip6 traffic_class",
+        "category": [
+            "actions",
+            "pedit",
+            "layered_op"
+        ],
+        "setup": [
+            [
+                "$TC actions flush action pedit",
+                0,
+                1,
+                255
+            ]
+        ],
+        "cmdUnderTest": "$TC actions add action pedit ex munge ip6 traffic_class set 0x40 continue",
+        "expExitCode": "0",
+        "verifyCmd": "$TC actions list action pedit",
+        "matchPattern": "ipv6\\+0: val 04000000 mask f00fffff",
+        "matchCount": "1",
+        "teardown": [
+            "$TC actions flush action pedit"
+        ]
+    },
     {
         "id": "6f5e",
         "name": "Add pedit action with LAYERED_OP ip6 flow_lbl",
-- 
cgit v1.2.3-59-g8ed1b


From 493f3cc7ee020a4c5da02f6502743d9ae7be50d6 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Wed, 22 Apr 2020 17:08:22 -0600
Subject: selftests: A few improvements to fib_nexthops.sh

Add nodad when adding IPv6 addresses and remove the sleep.

A recent change to iproute2 moved the 'pref medium' to the prefix
(where it belongs). Change the expected route check to strip
'pref medium' to be compatible with old and new iproute2.

Add IPv4 runtime test with an IPv6 address as the gateway in
the default route.

Signed-off-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/fib_nexthops.sh | 25 ++++++++++++++-----------
 1 file changed, 14 insertions(+), 11 deletions(-)

diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index 796670ebc65b..5890ba6d7ef6 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -150,31 +150,31 @@ setup()
 	$IP li add veth1 type veth peer name veth2
 	$IP li set veth1 up
 	$IP addr add 172.16.1.1/24 dev veth1
-	$IP -6 addr add 2001:db8:91::1/64 dev veth1
+	$IP -6 addr add 2001:db8:91::1/64 dev veth1 nodad
 
 	$IP li add veth3 type veth peer name veth4
 	$IP li set veth3 up
 	$IP addr add 172.16.2.1/24 dev veth3
-	$IP -6 addr add 2001:db8:92::1/64 dev veth3
+	$IP -6 addr add 2001:db8:92::1/64 dev veth3 nodad
 
 	$IP li set veth2 netns peer up
 	ip -netns peer addr add 172.16.1.2/24 dev veth2
-	ip -netns peer -6 addr add 2001:db8:91::2/64 dev veth2
+	ip -netns peer -6 addr add 2001:db8:91::2/64 dev veth2 nodad
 
 	$IP li set veth4 netns peer up
 	ip -netns peer addr add 172.16.2.2/24 dev veth4
-	ip -netns peer -6 addr add 2001:db8:92::2/64 dev veth4
+	ip -netns peer -6 addr add 2001:db8:92::2/64 dev veth4 nodad
 
 	ip -netns remote li add veth5 type veth peer name veth6
 	ip -netns remote li set veth5 up
 	ip -netns remote addr add dev veth5 172.16.101.1/24
-	ip -netns remote addr add dev veth5 2001:db8:101::1/64
+	ip -netns remote -6 addr add dev veth5 2001:db8:101::1/64 nodad
 	ip -netns remote ro add 172.16.0.0/22 via 172.16.101.2
 	ip -netns remote -6 ro add 2001:db8:90::/40 via 2001:db8:101::2
 
 	ip -netns remote li set veth6 netns peer up
 	ip -netns peer addr add dev veth6 172.16.101.2/24
-	ip -netns peer addr add dev veth6 2001:db8:101::2/64
+	ip -netns peer -6 addr add dev veth6 2001:db8:101::2/64 nodad
 	set +e
 }
 
@@ -248,7 +248,7 @@ check_route6()
 	local expected="$2"
 	local out
 
-	out=$($IP -6 route ls match ${pfx} 2>/dev/null)
+	out=$($IP -6 route ls match ${pfx} 2>/dev/null | sed -e 's/pref medium//')
 
 	check_output "${out}" "${expected}"
 }
@@ -423,8 +423,6 @@ ipv6_fcnal_runtime()
 	echo "IPv6 functional runtime"
 	echo "-----------------------"
 
-	sleep 5
-
 	#
 	# IPv6 - the basics
 	#
@@ -481,12 +479,12 @@ ipv6_fcnal_runtime()
 	run_cmd "$IP -6 nexthop add id 85 dev veth1"
 	run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 85"
 	log_test $? 0 "IPv6 route with device only nexthop"
-	check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 85 dev veth1 metric 1024 pref medium"
+	check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 85 dev veth1 metric 1024"
 
 	run_cmd "$IP nexthop add id 123 group 81/85"
 	run_cmd "$IP ro replace 2001:db8:101::1/128 nhid 123"
 	log_test $? 0 "IPv6 multipath route with nexthop mix - dev only + gw"
-	check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 123 metric 1024 nexthop via 2001:db8:91::2 dev veth1 weight 1 nexthop dev veth1 weight 1 pref medium"
+	check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 123 metric 1024 nexthop via 2001:db8:91::2 dev veth1 weight 1 nexthop dev veth1 weight 1"
 
 	#
 	# IPv6 route with v4 nexthop - not allowed
@@ -843,6 +841,11 @@ ipv4_fcnal_runtime()
 		$IP neigh sh | grep 'dev veth1'
 	fi
 
+	run_cmd "$IP ro del 172.16.101.1/32 via inet6 ${lladdr} dev veth1"
+	run_cmd "$IP -4 ro add default via inet6 ${lladdr} dev veth1"
+	run_cmd "ip netns exec me ping -c1 -w1 172.16.101.1"
+	log_test $? 0 "IPv4 default route with IPv6 gateway"
+
 	#
 	# MPLS as an example of LWT encap
 	#
-- 
cgit v1.2.3-59-g8ed1b


From 788f87ac608c518b74f338acb95f197cf6e3d0c4 Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Wed, 22 Apr 2020 15:05:09 +0300
Subject: xdp: export the DEV_MAP_BULK_SIZE macro

Export the DEV_MAP_BULK_SIZE macro to the header file so that drivers
can directly use it as the maximum number of xdp_frames received in the
.ndo_xdp_xmit() callback.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/xdp.h   | 2 ++
 kernel/bpf/devmap.c | 1 -
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/include/net/xdp.h b/include/net/xdp.h
index 40c6d3398458..3cc6d5d84aa4 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -181,4 +181,6 @@ bool xdp_attachment_flags_ok(struct xdp_attachment_info *info,
 void xdp_attachment_setup(struct xdp_attachment_info *info,
 			  struct netdev_bpf *bpf);
 
+#define DEV_MAP_BULK_SIZE 16
+
 #endif /* __LINUX_NET_XDP_H__ */
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 58bdca5d978a..a51d9fb7a359 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -52,7 +52,6 @@
 #define DEV_CREATE_FLAG_MASK \
 	(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY)
 
-#define DEV_MAP_BULK_SIZE 16
 struct xdp_dev_bulk_queue {
 	struct xdp_frame *q[DEV_MAP_BULK_SIZE];
 	struct list_head flush_node;
-- 
cgit v1.2.3-59-g8ed1b


From 48c0481e5ad1d1eec6ccfaee6bb8a030fbbd07f7 Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Wed, 22 Apr 2020 15:05:10 +0300
Subject: dpaa2-eth: return num_enqueued frames from enqueue callback

The enqueue dpaa2-eth callback now returns the number of successfully
enqueued frames. This is a preliminary patch necessary for adding
support for bulk ring mode enqueue.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 34 ++++++++++++++++--------
 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h |  5 ++--
 2 files changed, 26 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index b6c46639aa4c..7b41ece8f160 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
 /* Copyright 2014-2016 Freescale Semiconductor Inc.
- * Copyright 2016-2019 NXP
+ * Copyright 2016-2020 NXP
  */
 #include <linux/init.h>
 #include <linux/module.h>
@@ -268,7 +268,7 @@ static int xdp_enqueue(struct dpaa2_eth_priv *priv, struct dpaa2_fd *fd,
 
 	fq = &priv->fq[queue_id];
 	for (i = 0; i < DPAA2_ETH_ENQUEUE_RETRIES; i++) {
-		err = priv->enqueue(priv, fq, fd, 0);
+		err = priv->enqueue(priv, fq, fd, 0, NULL);
 		if (err != -EBUSY)
 			break;
 	}
@@ -847,7 +847,7 @@ static netdev_tx_t dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev)
 	 * the Tx confirmation callback for this frame
 	 */
 	for (i = 0; i < DPAA2_ETH_ENQUEUE_RETRIES; i++) {
-		err = priv->enqueue(priv, fq, &fd, prio);
+		err = priv->enqueue(priv, fq, &fd, prio, NULL);
 		if (err != -EBUSY)
 			break;
 	}
@@ -1937,7 +1937,7 @@ static int dpaa2_eth_xdp_xmit_frame(struct net_device *net_dev,
 
 	fq = &priv->fq[smp_processor_id() % dpaa2_eth_queue_count(priv)];
 	for (i = 0; i < DPAA2_ETH_ENQUEUE_RETRIES; i++) {
-		err = priv->enqueue(priv, fq, &fd, 0);
+		err = priv->enqueue(priv, fq, &fd, 0, NULL);
 		if (err != -EBUSY)
 			break;
 	}
@@ -2523,19 +2523,31 @@ static int set_buffer_layout(struct dpaa2_eth_priv *priv)
 
 static inline int dpaa2_eth_enqueue_qd(struct dpaa2_eth_priv *priv,
 				       struct dpaa2_eth_fq *fq,
-				       struct dpaa2_fd *fd, u8 prio)
+				       struct dpaa2_fd *fd, u8 prio,
+				       int *frames_enqueued)
 {
-	return dpaa2_io_service_enqueue_qd(fq->channel->dpio,
-					   priv->tx_qdid, prio,
-					   fq->tx_qdbin, fd);
+	int err;
+
+	err = dpaa2_io_service_enqueue_qd(fq->channel->dpio,
+					  priv->tx_qdid, prio,
+					  fq->tx_qdbin, fd);
+	if (!err && frames_enqueued)
+		*frames_enqueued = 1;
+	return err;
 }
 
 static inline int dpaa2_eth_enqueue_fq(struct dpaa2_eth_priv *priv,
 				       struct dpaa2_eth_fq *fq,
-				       struct dpaa2_fd *fd, u8 prio)
+				       struct dpaa2_fd *fd, u8 prio,
+				       int *frames_enqueued)
 {
-	return dpaa2_io_service_enqueue_fq(fq->channel->dpio,
-					   fq->tx_fqid[prio], fd);
+	int err;
+
+	err = dpaa2_io_service_enqueue_fq(fq->channel->dpio,
+					  fq->tx_fqid[prio], fd);
+	if (!err && frames_enqueued)
+		*frames_enqueued = 1;
+	return err;
 }
 
 static void set_enqueue_mode(struct dpaa2_eth_priv *priv)
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
index 7635db3ef903..085ff750e4b5 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause) */
 /* Copyright 2014-2016 Freescale Semiconductor Inc.
- * Copyright 2016 NXP
+ * Copyright 2016-2020 NXP
  */
 
 #ifndef __DPAA2_ETH_H
@@ -371,7 +371,8 @@ struct dpaa2_eth_priv {
 	struct dpaa2_eth_fq fq[DPAA2_ETH_MAX_QUEUES];
 	int (*enqueue)(struct dpaa2_eth_priv *priv,
 		       struct dpaa2_eth_fq *fq,
-		       struct dpaa2_fd *fd, u8 prio);
+		       struct dpaa2_fd *fd, u8 prio,
+		       int *frames_enqueued);
 
 	u8 num_channels;
 	struct dpaa2_eth_channel *channel[DPAA2_ETH_MAX_DPCONS];
-- 
cgit v1.2.3-59-g8ed1b


From 6ff8044751bdc40fd3199813bfe9b93d056fc15d Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Wed, 22 Apr 2020 15:05:11 +0300
Subject: dpaa2-eth: use the bulk ring mode enqueue interface

Update the dpaa2-eth driver to use the bulk enqueue function introduced
with the change to QBMAN ring mode. At the moment, no functional changes
are made but rather the driver just transitions to the new interface
while still enqueuing just one frame at a time.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 35 ++++++++++++++----------
 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h |  1 +
 2 files changed, 22 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index 7b41ece8f160..26c2868435d5 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -268,7 +268,7 @@ static int xdp_enqueue(struct dpaa2_eth_priv *priv, struct dpaa2_fd *fd,
 
 	fq = &priv->fq[queue_id];
 	for (i = 0; i < DPAA2_ETH_ENQUEUE_RETRIES; i++) {
-		err = priv->enqueue(priv, fq, fd, 0, NULL);
+		err = priv->enqueue(priv, fq, fd, 0, 1, NULL);
 		if (err != -EBUSY)
 			break;
 	}
@@ -847,7 +847,7 @@ static netdev_tx_t dpaa2_eth_tx(struct sk_buff *skb, struct net_device *net_dev)
 	 * the Tx confirmation callback for this frame
 	 */
 	for (i = 0; i < DPAA2_ETH_ENQUEUE_RETRIES; i++) {
-		err = priv->enqueue(priv, fq, &fd, prio, NULL);
+		err = priv->enqueue(priv, fq, &fd, prio, 1, NULL);
 		if (err != -EBUSY)
 			break;
 	}
@@ -1937,7 +1937,7 @@ static int dpaa2_eth_xdp_xmit_frame(struct net_device *net_dev,
 
 	fq = &priv->fq[smp_processor_id() % dpaa2_eth_queue_count(priv)];
 	for (i = 0; i < DPAA2_ETH_ENQUEUE_RETRIES; i++) {
-		err = priv->enqueue(priv, fq, &fd, 0, NULL);
+		err = priv->enqueue(priv, fq, &fd, 0, 1, NULL);
 		if (err != -EBUSY)
 			break;
 	}
@@ -2524,6 +2524,7 @@ static int set_buffer_layout(struct dpaa2_eth_priv *priv)
 static inline int dpaa2_eth_enqueue_qd(struct dpaa2_eth_priv *priv,
 				       struct dpaa2_eth_fq *fq,
 				       struct dpaa2_fd *fd, u8 prio,
+				       u32 num_frames __always_unused,
 				       int *frames_enqueued)
 {
 	int err;
@@ -2536,18 +2537,24 @@ static inline int dpaa2_eth_enqueue_qd(struct dpaa2_eth_priv *priv,
 	return err;
 }
 
-static inline int dpaa2_eth_enqueue_fq(struct dpaa2_eth_priv *priv,
-				       struct dpaa2_eth_fq *fq,
-				       struct dpaa2_fd *fd, u8 prio,
-				       int *frames_enqueued)
+static inline int dpaa2_eth_enqueue_fq_multiple(struct dpaa2_eth_priv *priv,
+						struct dpaa2_eth_fq *fq,
+						struct dpaa2_fd *fd,
+						u8 prio, u32 num_frames,
+						int *frames_enqueued)
 {
 	int err;
 
-	err = dpaa2_io_service_enqueue_fq(fq->channel->dpio,
-					  fq->tx_fqid[prio], fd);
-	if (!err && frames_enqueued)
-		*frames_enqueued = 1;
-	return err;
+	err = dpaa2_io_service_enqueue_multiple_fq(fq->channel->dpio,
+						   fq->tx_fqid[prio],
+						   fd, num_frames);
+
+	if (err == 0)
+		return -EBUSY;
+
+	if (frames_enqueued)
+		*frames_enqueued = err;
+	return 0;
 }
 
 static void set_enqueue_mode(struct dpaa2_eth_priv *priv)
@@ -2556,7 +2563,7 @@ static void set_enqueue_mode(struct dpaa2_eth_priv *priv)
 				   DPNI_ENQUEUE_FQID_VER_MINOR) < 0)
 		priv->enqueue = dpaa2_eth_enqueue_qd;
 	else
-		priv->enqueue = dpaa2_eth_enqueue_fq;
+		priv->enqueue = dpaa2_eth_enqueue_fq_multiple;
 }
 
 static int set_pause(struct dpaa2_eth_priv *priv)
@@ -2617,7 +2624,7 @@ static void update_tx_fqids(struct dpaa2_eth_priv *priv)
 		}
 	}
 
-	priv->enqueue = dpaa2_eth_enqueue_fq;
+	priv->enqueue = dpaa2_eth_enqueue_fq_multiple;
 
 	return;
 
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
index 085ff750e4b5..2440ba6b21ef 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
@@ -372,6 +372,7 @@ struct dpaa2_eth_priv {
 	int (*enqueue)(struct dpaa2_eth_priv *priv,
 		       struct dpaa2_eth_fq *fq,
 		       struct dpaa2_fd *fd, u8 prio,
+		       u32 num_frames,
 		       int *frames_enqueued);
 
 	u8 num_channels;
-- 
cgit v1.2.3-59-g8ed1b


From 6aa40b9e5b1ee732e07e406ffa6e17d152b3a216 Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Wed, 22 Apr 2020 15:05:12 +0300
Subject: dpaa2-eth: split the .ndo_xdp_xmit callback into two stages

Instead of having a function that both creates a frame descriptor from
an xdp_frame and enqueues it, split this into two stages.
Add the dpaa2_eth_xdp_create_fd that just transforms an xdp_frame into a
FD while the actual enqueue callback is called directly from the ndo for
each frame.
This is particulary useful in conjunction with bulk enqueue.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 76 +++++++++++++-----------
 1 file changed, 40 insertions(+), 36 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index 26c2868435d5..9a0432cd893c 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -1880,20 +1880,16 @@ static int dpaa2_eth_xdp(struct net_device *dev, struct netdev_bpf *xdp)
 	return 0;
 }
 
-static int dpaa2_eth_xdp_xmit_frame(struct net_device *net_dev,
-				    struct xdp_frame *xdpf)
+static int dpaa2_eth_xdp_create_fd(struct net_device *net_dev,
+				   struct xdp_frame *xdpf,
+				   struct dpaa2_fd *fd)
 {
 	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
 	struct device *dev = net_dev->dev.parent;
-	struct rtnl_link_stats64 *percpu_stats;
-	struct dpaa2_eth_drv_stats *percpu_extras;
 	unsigned int needed_headroom;
 	struct dpaa2_eth_swa *swa;
-	struct dpaa2_eth_fq *fq;
-	struct dpaa2_fd fd;
 	void *buffer_start, *aligned_start;
 	dma_addr_t addr;
-	int err, i;
 
 	/* We require a minimum headroom to be able to transmit the frame.
 	 * Otherwise return an error and let the original net_device handle it
@@ -1902,11 +1898,8 @@ static int dpaa2_eth_xdp_xmit_frame(struct net_device *net_dev,
 	if (xdpf->headroom < needed_headroom)
 		return -EINVAL;
 
-	percpu_stats = this_cpu_ptr(priv->percpu_stats);
-	percpu_extras = this_cpu_ptr(priv->percpu_extras);
-
 	/* Setup the FD fields */
-	memset(&fd, 0, sizeof(fd));
+	memset(fd, 0, sizeof(*fd));
 
 	/* Align FD address, if possible */
 	buffer_start = xdpf->data - needed_headroom;
@@ -1924,32 +1917,14 @@ static int dpaa2_eth_xdp_xmit_frame(struct net_device *net_dev,
 	addr = dma_map_single(dev, buffer_start,
 			      swa->xdp.dma_size,
 			      DMA_BIDIRECTIONAL);
-	if (unlikely(dma_mapping_error(dev, addr))) {
-		percpu_stats->tx_dropped++;
+	if (unlikely(dma_mapping_error(dev, addr)))
 		return -ENOMEM;
-	}
-
-	dpaa2_fd_set_addr(&fd, addr);
-	dpaa2_fd_set_offset(&fd, xdpf->data - buffer_start);
-	dpaa2_fd_set_len(&fd, xdpf->len);
-	dpaa2_fd_set_format(&fd, dpaa2_fd_single);
-	dpaa2_fd_set_ctrl(&fd, FD_CTRL_PTA);
-
-	fq = &priv->fq[smp_processor_id() % dpaa2_eth_queue_count(priv)];
-	for (i = 0; i < DPAA2_ETH_ENQUEUE_RETRIES; i++) {
-		err = priv->enqueue(priv, fq, &fd, 0, 1, NULL);
-		if (err != -EBUSY)
-			break;
-	}
-	percpu_extras->tx_portal_busy += i;
-	if (unlikely(err < 0)) {
-		percpu_stats->tx_errors++;
-		/* let the Rx device handle the cleanup */
-		return err;
-	}
 
-	percpu_stats->tx_packets++;
-	percpu_stats->tx_bytes += dpaa2_fd_get_len(&fd);
+	dpaa2_fd_set_addr(fd, addr);
+	dpaa2_fd_set_offset(fd, xdpf->data - buffer_start);
+	dpaa2_fd_set_len(fd, xdpf->len);
+	dpaa2_fd_set_format(fd, dpaa2_fd_single);
+	dpaa2_fd_set_ctrl(fd, FD_CTRL_PTA);
 
 	return 0;
 }
@@ -1957,6 +1932,11 @@ static int dpaa2_eth_xdp_xmit_frame(struct net_device *net_dev,
 static int dpaa2_eth_xdp_xmit(struct net_device *net_dev, int n,
 			      struct xdp_frame **frames, u32 flags)
 {
+	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
+	struct dpaa2_eth_drv_stats *percpu_extras;
+	struct rtnl_link_stats64 *percpu_stats;
+	struct dpaa2_eth_fq *fq;
+	struct dpaa2_fd fd;
 	int drops = 0;
 	int i, err;
 
@@ -1966,14 +1946,38 @@ static int dpaa2_eth_xdp_xmit(struct net_device *net_dev, int n,
 	if (!netif_running(net_dev))
 		return -ENETDOWN;
 
+	percpu_stats = this_cpu_ptr(priv->percpu_stats);
+	percpu_extras = this_cpu_ptr(priv->percpu_extras);
+
 	for (i = 0; i < n; i++) {
 		struct xdp_frame *xdpf = frames[i];
 
-		err = dpaa2_eth_xdp_xmit_frame(net_dev, xdpf);
+		/* create the FD from the xdp_frame */
+		err = dpaa2_eth_xdp_create_fd(net_dev, xdpf, &fd);
 		if (err) {
+			percpu_stats->tx_dropped++;
 			xdp_return_frame_rx_napi(xdpf);
 			drops++;
+			continue;
+		}
+
+		/* enqueue the newly created FD */
+		fq = &priv->fq[smp_processor_id() % dpaa2_eth_queue_count(priv)];
+		for (i = 0; i < DPAA2_ETH_ENQUEUE_RETRIES; i++) {
+			err = priv->enqueue(priv, fq, &fd, 0, 1);
+			if (err != -EBUSY)
+				break;
 		}
+
+		percpu_extras->tx_portal_busy += i;
+		if (unlikely(err < 0)) {
+			percpu_stats->tx_errors++;
+			xdp_return_frame_rx_napi(xdpf);
+			continue;
+		}
+
+		percpu_stats->tx_packets++;
+		percpu_stats->tx_bytes += dpaa2_fd_get_len(&fd);
 	}
 
 	return n - drops;
-- 
cgit v1.2.3-59-g8ed1b


From 8665d9780e6efafa3cd9865ae3a77826326fe8c6 Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Wed, 22 Apr 2020 15:05:13 +0300
Subject: dpaa2-eth: use bulk enqueue in .ndo_xdp_xmit

Take advantage of the bulk enqueue feature in .ndo_xdp_xmit.
We cannot use the XDP_XMIT_FLUSH since the architecture is not capable
to store all the frames dequeued in a NAPI cycle so we instead are
enqueueing all the frames received in a ndo_xdp_xmit call right away.

After setting up all FDs for the xdp_frames received, enqueue multiple
frames at a time until all are sent or the maximum number of retries is
hit.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 57 ++++++++++++------------
 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h |  2 +
 2 files changed, 30 insertions(+), 29 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index 9a0432cd893c..9d4061bba0b8 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -1933,12 +1933,12 @@ static int dpaa2_eth_xdp_xmit(struct net_device *net_dev, int n,
 			      struct xdp_frame **frames, u32 flags)
 {
 	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
+	int total_enqueued = 0, retries = 0, enqueued;
 	struct dpaa2_eth_drv_stats *percpu_extras;
 	struct rtnl_link_stats64 *percpu_stats;
+	int num_fds, i, err, max_retries;
 	struct dpaa2_eth_fq *fq;
-	struct dpaa2_fd fd;
-	int drops = 0;
-	int i, err;
+	struct dpaa2_fd *fds;
 
 	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
 		return -EINVAL;
@@ -1946,41 +1946,40 @@ static int dpaa2_eth_xdp_xmit(struct net_device *net_dev, int n,
 	if (!netif_running(net_dev))
 		return -ENETDOWN;
 
+	fq = &priv->fq[smp_processor_id()];
+	fds = fq->xdp_fds;
+
 	percpu_stats = this_cpu_ptr(priv->percpu_stats);
 	percpu_extras = this_cpu_ptr(priv->percpu_extras);
 
+	/* create a FD for each xdp_frame in the list received */
 	for (i = 0; i < n; i++) {
-		struct xdp_frame *xdpf = frames[i];
-
-		/* create the FD from the xdp_frame */
-		err = dpaa2_eth_xdp_create_fd(net_dev, xdpf, &fd);
-		if (err) {
-			percpu_stats->tx_dropped++;
-			xdp_return_frame_rx_napi(xdpf);
-			drops++;
-			continue;
-		}
-
-		/* enqueue the newly created FD */
-		fq = &priv->fq[smp_processor_id() % dpaa2_eth_queue_count(priv)];
-		for (i = 0; i < DPAA2_ETH_ENQUEUE_RETRIES; i++) {
-			err = priv->enqueue(priv, fq, &fd, 0, 1);
-			if (err != -EBUSY)
-				break;
-		}
+		err = dpaa2_eth_xdp_create_fd(net_dev, frames[i], &fds[i]);
+		if (err)
+			break;
+	}
+	num_fds = i;
 
-		percpu_extras->tx_portal_busy += i;
-		if (unlikely(err < 0)) {
-			percpu_stats->tx_errors++;
-			xdp_return_frame_rx_napi(xdpf);
+	/* try to enqueue all the FDs until the max number of retries is hit */
+	max_retries = num_fds * DPAA2_ETH_ENQUEUE_RETRIES;
+	while (total_enqueued < num_fds && retries < max_retries) {
+		err = priv->enqueue(priv, fq, &fds[total_enqueued],
+				    0, num_fds - total_enqueued, &enqueued);
+		if (err == -EBUSY) {
+			percpu_extras->tx_portal_busy += ++retries;
 			continue;
 		}
-
-		percpu_stats->tx_packets++;
-		percpu_stats->tx_bytes += dpaa2_fd_get_len(&fd);
+		total_enqueued += enqueued;
 	}
 
-	return n - drops;
+	/* update statistics */
+	percpu_stats->tx_packets += total_enqueued;
+	for (i = 0; i < total_enqueued; i++)
+		percpu_stats->tx_bytes += dpaa2_fd_get_len(&fds[i]);
+	for (i = total_enqueued; i < n; i++)
+		xdp_return_frame_rx_napi(frames[i]);
+
+	return total_enqueued;
 }
 
 static int update_xps(struct dpaa2_eth_priv *priv)
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
index 2440ba6b21ef..289053099974 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
@@ -325,6 +325,8 @@ struct dpaa2_eth_fq {
 			const struct dpaa2_fd *fd,
 			struct dpaa2_eth_fq *fq);
 	struct dpaa2_eth_fq_stats stats;
+
+	struct dpaa2_fd xdp_fds[DEV_MAP_BULK_SIZE];
 };
 
 struct dpaa2_eth_ch_xdp {
-- 
cgit v1.2.3-59-g8ed1b


From 2b7aadd3b9e17e8b81eeb8d9cc46756ae4658265 Mon Sep 17 00:00:00 2001
From: Raz Bouganim <r-bouganim@ti.com>
Date: Tue, 21 Apr 2020 15:28:05 +0300
Subject: wlcore: Adding suppoprt for IGTK key in wlcore driver

This patch adding support for new cipher suite - AES-CMAC in wlcore driver.
This patch is required for support PMF/WPA3 connection to install IGTK key.

Signed-off-by: Raz Bouganim <r-bouganim@ti.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1587472085-755-1-git-send-email-r-bouganim@ti.com
---
 drivers/net/wireless/ti/wlcore/cmd.h  | 1 +
 drivers/net/wireless/ti/wlcore/main.c | 4 ++++
 2 files changed, 5 insertions(+)

diff --git a/drivers/net/wireless/ti/wlcore/cmd.h b/drivers/net/wireless/ti/wlcore/cmd.h
index f2609d5b6bf7..9acd8a41ea61 100644
--- a/drivers/net/wireless/ti/wlcore/cmd.h
+++ b/drivers/net/wireless/ti/wlcore/cmd.h
@@ -458,6 +458,7 @@ enum wl1271_cmd_key_type {
 	KEY_TKIP = 2,
 	KEY_AES  = 3,
 	KEY_GEM  = 4,
+	KEY_IGTK  = 5,
 };
 
 struct wl1271_cmd_set_keys {
diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c
index f140f7d7f553..4421fc656b1c 100644
--- a/drivers/net/wireless/ti/wlcore/main.c
+++ b/drivers/net/wireless/ti/wlcore/main.c
@@ -3547,6 +3547,9 @@ int wlcore_set_key(struct wl1271 *wl, enum set_key_cmd cmd,
 	case WL1271_CIPHER_SUITE_GEM:
 		key_type = KEY_GEM;
 		break;
+	case WLAN_CIPHER_SUITE_AES_CMAC:
+		key_type = KEY_IGTK;
+		break;
 	default:
 		wl1271_error("Unknown key algo 0x%x", key_conf->cipher);
 
@@ -6214,6 +6217,7 @@ static int wl1271_init_ieee80211(struct wl1271 *wl)
 		WLAN_CIPHER_SUITE_TKIP,
 		WLAN_CIPHER_SUITE_CCMP,
 		WL1271_CIPHER_SUITE_GEM,
+		WLAN_CIPHER_SUITE_AES_CMAC,
 	};
 
 	/* The tx descriptor buffer */
-- 
cgit v1.2.3-59-g8ed1b


From 15d2fcc6b2dea46986e55cd3808c0dbb480a6c8d Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Wed, 22 Apr 2020 11:46:00 +0800
Subject: rtw88: add legacy firmware download for 8723D devices

The WLAN CPU of 8723D device is different from others, add legacy
firmware download function for it. A new variable wlan_cpu is used to
decide which firmware download function we should use.

Legacy firmware file contains 32 bytes header including version and
subversion. When downloading to wlan cpu, header is excluded.

Firmware is downloaded via beacon queue to reserved page that is a part of
TX buffer. Since 11N WLAN CPU uses different control registers, this patch
introduces related control registers.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200422034607.28747-2-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/fw.c       |  21 +++-
 drivers/net/wireless/realtek/rtw88/fw.h       |  25 +++++
 drivers/net/wireless/realtek/rtw88/mac.c      | 146 +++++++++++++++++++++++++-
 drivers/net/wireless/realtek/rtw88/main.c     |  41 ++++++--
 drivers/net/wireless/realtek/rtw88/main.h     |  16 +++
 drivers/net/wireless/realtek/rtw88/reg.h      |  11 ++
 drivers/net/wireless/realtek/rtw88/rtw8723d.c |   1 +
 drivers/net/wireless/realtek/rtw88/rtw8822b.c |   1 +
 drivers/net/wireless/realtek/rtw88/rtw8822c.c |   1 +
 9 files changed, 252 insertions(+), 11 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtw88/fw.c b/drivers/net/wireless/realtek/rtw88/fw.c
index 245da96dfddc..209853fdcb42 100644
--- a/drivers/net/wireless/realtek/rtw88/fw.c
+++ b/drivers/net/wireless/realtek/rtw88/fw.c
@@ -1079,6 +1079,8 @@ int rtw_fw_write_data_rsvd_page(struct rtw_dev *rtwdev, u16 pg_addr,
 	u8 bckp[2];
 	u8 val;
 	u16 rsvd_pg_head;
+	u32 bcn_valid_addr;
+	u32 bcn_valid_mask;
 	int ret;
 
 	lockdep_assert_held(&rtwdev->mutex);
@@ -1086,8 +1088,13 @@ int rtw_fw_write_data_rsvd_page(struct rtw_dev *rtwdev, u16 pg_addr,
 	if (!size)
 		return -EINVAL;
 
-	pg_addr &= BIT_MASK_BCN_HEAD_1_V1;
-	rtw_write16(rtwdev, REG_FIFOPAGE_CTRL_2, pg_addr | BIT_BCN_VALID_V1);
+	if (rtw_chip_wcpu_11n(rtwdev)) {
+		rtw_write32_set(rtwdev, REG_DWBCN0_CTRL, BIT_BCN_VALID);
+	} else {
+		pg_addr &= BIT_MASK_BCN_HEAD_1_V1;
+		pg_addr |= BIT_BCN_VALID_V1;
+		rtw_write16(rtwdev, REG_FIFOPAGE_CTRL_2, pg_addr);
+	}
 
 	val = rtw_read8(rtwdev, REG_CR + 1);
 	bckp[0] = val;
@@ -1105,7 +1112,15 @@ int rtw_fw_write_data_rsvd_page(struct rtw_dev *rtwdev, u16 pg_addr,
 		goto restore;
 	}
 
-	if (!check_hw_ready(rtwdev, REG_FIFOPAGE_CTRL_2, BIT_BCN_VALID_V1, 1)) {
+	if (rtw_chip_wcpu_11n(rtwdev)) {
+		bcn_valid_addr = REG_DWBCN0_CTRL;
+		bcn_valid_mask = BIT_BCN_VALID;
+	} else {
+		bcn_valid_addr = REG_FIFOPAGE_CTRL_2;
+		bcn_valid_mask = BIT_BCN_VALID_V1;
+	}
+
+	if (!check_hw_ready(rtwdev, bcn_valid_addr, bcn_valid_mask, 1)) {
 		rtw_err(rtwdev, "error beacon valid\n");
 		ret = -EBUSY;
 	}
diff --git a/drivers/net/wireless/realtek/rtw88/fw.h b/drivers/net/wireless/realtek/rtw88/fw.h
index cdd244857048..2933ef741e53 100644
--- a/drivers/net/wireless/realtek/rtw88/fw.h
+++ b/drivers/net/wireless/realtek/rtw88/fw.h
@@ -19,6 +19,12 @@
 #define RSVD_PAGE_START_ADDR		0x780
 #define FIFO_DUMP_ADDR			0x8000
 
+#define DLFW_PAGE_SIZE_SHIFT_LEGACY	12
+#define DLFW_PAGE_SIZE_LEGACY		0x1000
+#define DLFW_BLK_SIZE_SHIFT_LEGACY	2
+#define DLFW_BLK_SIZE_LEGACY		4
+#define FW_START_ADDR_LEGACY		0x1000
+
 enum rtw_c2h_cmd_id {
 	C2H_BT_INFO = 0x09,
 	C2H_BT_MP_INFO = 0x0b,
@@ -192,6 +198,25 @@ struct rtw_fw_hdr {
 	__le32 imem_addr;
 } __packed;
 
+struct rtw_fw_hdr_legacy {
+	__le16 signature;
+	u8 category;
+	u8 function;
+	__le16 version;	/* 0x04 */
+	u8 subversion1;
+	u8 subversion2;
+	u8 month;	/* 0x08 */
+	u8 day;
+	u8 hour;
+	u8 minute;
+	__le16 size;
+	__le16 rsvd2;
+	__le32 idx;	/* 0x10 */
+	__le32 rsvd3;
+	__le32 rsvd4;	/* 0x18 */
+	__le32 rsvd5;
+} __packed;
+
 /* C2H */
 #define GET_CCX_REPORT_SEQNUM(c2h_payload)	(c2h_payload[8] & 0xfc)
 #define GET_CCX_REPORT_STATUS(c2h_payload)	(c2h_payload[9] & 0xc0)
diff --git a/drivers/net/wireless/realtek/rtw88/mac.c b/drivers/net/wireless/realtek/rtw88/mac.c
index 7b245779ff90..6092604abfb9 100644
--- a/drivers/net/wireless/realtek/rtw88/mac.c
+++ b/drivers/net/wireless/realtek/rtw88/mac.c
@@ -650,7 +650,7 @@ static void download_firmware_end_flow(struct rtw_dev *rtwdev)
 	rtw_write16(rtwdev, REG_MCUFW_CTRL, fw_ctrl);
 }
 
-int rtw_download_firmware(struct rtw_dev *rtwdev, struct rtw_fw_state *fw)
+int __rtw_download_firmware(struct rtw_dev *rtwdev, struct rtw_fw_state *fw)
 {
 	struct rtw_backup_info bckp[DLFW_RESTORE_REG_NUM];
 	const u8 *data = fw->firmware->data;
@@ -704,6 +704,150 @@ dlfw_fail:
 	return ret;
 }
 
+static void en_download_firmware_legacy(struct rtw_dev *rtwdev, bool en)
+{
+	int try;
+
+	if (en) {
+		wlan_cpu_enable(rtwdev, false);
+		wlan_cpu_enable(rtwdev, true);
+
+		rtw_write8_set(rtwdev, REG_MCUFW_CTRL, BIT_MCUFWDL_EN);
+
+		for (try = 0; try < 10; try++) {
+			if (rtw_read8(rtwdev, REG_MCUFW_CTRL) & BIT_MCUFWDL_EN)
+				goto fwdl_ready;
+			rtw_write8_set(rtwdev, REG_MCUFW_CTRL, BIT_MCUFWDL_EN);
+			msleep(20);
+		}
+		rtw_err(rtwdev, "failed to check fw download ready\n");
+fwdl_ready:
+		rtw_write32_clr(rtwdev, REG_MCUFW_CTRL, BIT_ROM_DLEN);
+	} else {
+		rtw_write8_clr(rtwdev, REG_MCUFW_CTRL, BIT_MCUFWDL_EN);
+	}
+}
+
+static void
+write_firmware_page(struct rtw_dev *rtwdev, u32 page, const u8 *data, u32 size)
+{
+	u32 val32;
+	u32 block_nr;
+	u32 remain_size;
+	u32 write_addr = FW_START_ADDR_LEGACY;
+	const __le32 *ptr = (const __le32 *)data;
+	u32 block;
+	__le32 remain_data = 0;
+
+	block_nr = size >> DLFW_BLK_SIZE_SHIFT_LEGACY;
+	remain_size = size & (DLFW_BLK_SIZE_LEGACY - 1);
+
+	val32 = rtw_read32(rtwdev, REG_MCUFW_CTRL);
+	val32 &= ~BIT_ROM_PGE;
+	val32 |= (page << BIT_SHIFT_ROM_PGE) & BIT_ROM_PGE;
+	rtw_write32(rtwdev, REG_MCUFW_CTRL, val32);
+
+	for (block = 0; block < block_nr; block++) {
+		rtw_write32(rtwdev, write_addr, le32_to_cpu(*ptr));
+
+		write_addr += DLFW_BLK_SIZE_LEGACY;
+		ptr++;
+	}
+
+	if (remain_size) {
+		memcpy(&remain_data, ptr, remain_size);
+		rtw_write32(rtwdev, write_addr, le32_to_cpu(remain_data));
+	}
+}
+
+static int
+download_firmware_legacy(struct rtw_dev *rtwdev, const u8 *data, u32 size)
+{
+	u32 page;
+	u32 total_page;
+	u32 last_page_size;
+
+	data += sizeof(struct rtw_fw_hdr_legacy);
+	size -= sizeof(struct rtw_fw_hdr_legacy);
+
+	total_page = size >> DLFW_PAGE_SIZE_SHIFT_LEGACY;
+	last_page_size = size & (DLFW_PAGE_SIZE_LEGACY - 1);
+
+	rtw_write8_set(rtwdev, REG_MCUFW_CTRL, BIT_FWDL_CHK_RPT);
+
+	for (page = 0; page < total_page; page++) {
+		write_firmware_page(rtwdev, page, data, DLFW_PAGE_SIZE_LEGACY);
+		data += DLFW_PAGE_SIZE_LEGACY;
+	}
+	if (last_page_size)
+		write_firmware_page(rtwdev, page, data, last_page_size);
+
+	if (!check_hw_ready(rtwdev, REG_MCUFW_CTRL, BIT_FWDL_CHK_RPT, 1)) {
+		rtw_err(rtwdev, "failed to check download fimrware report\n");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int download_firmware_validate_legacy(struct rtw_dev *rtwdev)
+{
+	u32 val32;
+	int try;
+
+	val32 = rtw_read32(rtwdev, REG_MCUFW_CTRL);
+	val32 |= BIT_MCUFWDL_RDY;
+	val32 &= ~BIT_WINTINI_RDY;
+	rtw_write32(rtwdev, REG_MCUFW_CTRL, val32);
+
+	wlan_cpu_enable(rtwdev, false);
+	wlan_cpu_enable(rtwdev, true);
+
+	for (try = 0; try < 10; try++) {
+		val32 = rtw_read32(rtwdev, REG_MCUFW_CTRL);
+		if ((val32 & FW_READY_LEGACY) == FW_READY_LEGACY)
+			return 0;
+		msleep(20);
+	}
+
+	rtw_err(rtwdev, "failed to validate fimrware\n");
+	return -EINVAL;
+}
+
+int __rtw_download_firmware_legacy(struct rtw_dev *rtwdev, struct rtw_fw_state *fw)
+{
+	int ret = 0;
+
+	en_download_firmware_legacy(rtwdev, true);
+	ret = download_firmware_legacy(rtwdev, fw->firmware->data, fw->firmware->size);
+	en_download_firmware_legacy(rtwdev, false);
+	if (ret)
+		goto out;
+
+	ret = download_firmware_validate_legacy(rtwdev);
+	if (ret)
+		goto out;
+
+	/* reset desc and index */
+	rtw_hci_setup(rtwdev);
+
+	rtwdev->h2c.last_box_num = 0;
+	rtwdev->h2c.seq = 0;
+
+	set_bit(RTW_FLAG_FW_RUNNING, rtwdev->flags);
+
+out:
+	return ret;
+}
+
+int rtw_download_firmware(struct rtw_dev *rtwdev, struct rtw_fw_state *fw)
+{
+	if (rtw_chip_wcpu_11n(rtwdev))
+		return __rtw_download_firmware_legacy(rtwdev, fw);
+
+	return __rtw_download_firmware(rtwdev, fw);
+}
+
 static u32 get_priority_queues(struct rtw_dev *rtwdev, u32 queues)
 {
 	const struct rtw_rqpn *rqpn = rtwdev->fifo.rqpn;
diff --git a/drivers/net/wireless/realtek/rtw88/main.c b/drivers/net/wireless/realtek/rtw88/main.c
index c851830132d0..b0dadff0dc7b 100644
--- a/drivers/net/wireless/realtek/rtw88/main.c
+++ b/drivers/net/wireless/realtek/rtw88/main.c
@@ -1042,11 +1042,43 @@ static void rtw_unset_supported_band(struct ieee80211_hw *hw,
 	kfree(hw->wiphy->bands[NL80211_BAND_5GHZ]);
 }
 
+static void __update_firmware_info(struct rtw_dev *rtwdev,
+				   struct rtw_fw_state *fw)
+{
+	const struct rtw_fw_hdr *fw_hdr =
+				(const struct rtw_fw_hdr *)fw->firmware->data;
+
+	fw->h2c_version = le16_to_cpu(fw_hdr->h2c_fmt_ver);
+	fw->version = le16_to_cpu(fw_hdr->version);
+	fw->sub_version = fw_hdr->subversion;
+	fw->sub_index = fw_hdr->subindex;
+}
+
+static void __update_firmware_info_legacy(struct rtw_dev *rtwdev,
+					  struct rtw_fw_state *fw)
+{
+	struct rtw_fw_hdr_legacy *legacy =
+				(struct rtw_fw_hdr_legacy *)fw->firmware->data;
+
+	fw->h2c_version = 0;
+	fw->version = le16_to_cpu(legacy->version);
+	fw->sub_version = legacy->subversion1;
+	fw->sub_index = legacy->subversion2;
+}
+
+static void update_firmware_info(struct rtw_dev *rtwdev,
+				 struct rtw_fw_state *fw)
+{
+	if (rtw_chip_wcpu_11n(rtwdev))
+		__update_firmware_info_legacy(rtwdev, fw);
+	else
+		__update_firmware_info(rtwdev, fw);
+}
+
 static void rtw_load_firmware_cb(const struct firmware *firmware, void *context)
 {
 	struct rtw_fw_state *fw = context;
 	struct rtw_dev *rtwdev = fw->rtwdev;
-	const struct rtw_fw_hdr *fw_hdr;
 
 	if (!firmware || !firmware->data) {
 		rtw_err(rtwdev, "failed to request firmware\n");
@@ -1054,13 +1086,8 @@ static void rtw_load_firmware_cb(const struct firmware *firmware, void *context)
 		return;
 	}
 
-	fw_hdr = (const struct rtw_fw_hdr *)firmware->data;
-	fw->h2c_version = le16_to_cpu(fw_hdr->h2c_fmt_ver);
-	fw->version = le16_to_cpu(fw_hdr->version);
-	fw->sub_version = fw_hdr->subversion;
-	fw->sub_index = fw_hdr->subindex;
-
 	fw->firmware = firmware;
+	update_firmware_info(rtwdev, fw);
 	complete_all(&fw->completion);
 
 	rtw_info(rtwdev, "Firmware version %u.%u.%u, H2C version %u\n",
diff --git a/drivers/net/wireless/realtek/rtw88/main.h b/drivers/net/wireless/realtek/rtw88/main.h
index 74302181da53..380a670eeeee 100644
--- a/drivers/net/wireless/realtek/rtw88/main.h
+++ b/drivers/net/wireless/realtek/rtw88/main.h
@@ -1056,12 +1056,18 @@ struct rtw_pwr_track_tbl {
 	const u8 *pwrtrk_2g_ccka_p;
 };
 
+enum rtw_wlan_cpu {
+	RTW_WCPU_11AC,
+	RTW_WCPU_11N,
+};
+
 /* hardware configuration for each IC */
 struct rtw_chip_info {
 	struct rtw_chip_ops *ops;
 	u8 id;
 
 	const char *fw_name;
+	enum rtw_wlan_cpu wlan_cpu;
 	u8 tx_pkt_desc_sz;
 	u8 tx_buf_desc_sz;
 	u8 rx_pkt_desc_sz;
@@ -1725,6 +1731,16 @@ static inline void rtw_chip_efuse_grant_off(struct rtw_dev *rtwdev)
 		rtwdev->chip->ops->efuse_grant(rtwdev, false);
 }
 
+static inline bool rtw_chip_wcpu_11n(struct rtw_dev *rtwdev)
+{
+	return rtwdev->chip->wlan_cpu == RTW_WCPU_11N;
+}
+
+static inline bool rtw_chip_wcpu_11ac(struct rtw_dev *rtwdev)
+{
+	return rtwdev->chip->wlan_cpu == RTW_WCPU_11AC;
+}
+
 void rtw_get_channel_params(struct cfg80211_chan_def *chandef,
 			    struct rtw_channel_params *ch_param);
 bool check_hw_ready(struct rtw_dev *rtwdev, u32 addr, u32 mask, u32 target);
diff --git a/drivers/net/wireless/realtek/rtw88/reg.h b/drivers/net/wireless/realtek/rtw88/reg.h
index 911d8e75db77..89868ac0748f 100644
--- a/drivers/net/wireless/realtek/rtw88/reg.h
+++ b/drivers/net/wireless/realtek/rtw88/reg.h
@@ -77,19 +77,28 @@
 #define BIT_ANA_PORT_EN		BIT(22)
 #define BIT_MAC_PORT_EN		BIT(21)
 #define BIT_BOOT_FSPI_EN	BIT(20)
+#define BIT_ROM_DLEN		BIT(19)
+#define BIT_ROM_PGE		GENMASK(18, 16)	/* legacy only */
+#define BIT_SHIFT_ROM_PGE	16
 #define BIT_FW_INIT_RDY		BIT(15)
 #define BIT_FW_DW_RDY		BIT(14)
 #define BIT_RPWM_TOGGLE		BIT(7)
+#define BIT_RAM_DL_SEL		BIT(7)	/* legacy only */
 #define BIT_DMEM_CHKSUM_OK	BIT(6)
+#define BIT_WINTINI_RDY		BIT(6)	/* legacy only */
 #define BIT_DMEM_DW_OK		BIT(5)
 #define BIT_IMEM_CHKSUM_OK	BIT(4)
 #define BIT_IMEM_DW_OK		BIT(3)
 #define BIT_IMEM_BOOT_LOAD_CHECKSUM_OK BIT(2)
+#define BIT_FWDL_CHK_RPT	BIT(2)	/* legacy only */
+#define BIT_MCUFWDL_RDY		BIT(1)	/* legacy only */
 #define BIT_MCUFWDL_EN		BIT(0)
 #define BIT_CHECK_SUM_OK	(BIT(4) | BIT(6))
 #define FW_READY		(BIT_FW_INIT_RDY | BIT_FW_DW_RDY |             \
 				 BIT_IMEM_DW_OK | BIT_DMEM_DW_OK |             \
 				 BIT_CHECK_SUM_OK)
+#define FW_READY_LEGACY		(BIT_MCUFWDL_RDY | BIT_FWDL_CHK_RPT |	       \
+				 BIT_WINTINI_RDY | BIT_RAM_DL_SEL)
 #define FW_READY_MASK		0xffff
 
 #define REG_EFUSE_ACCESS	0x00CF
@@ -197,6 +206,8 @@
 #define BIT_MASK_BCN_HEAD_1_V1	0xfff
 #define REG_AUTO_LLT_V1		0x0208
 #define BIT_AUTO_INIT_LLT_V1	BIT(0)
+#define REG_DWBCN0_CTRL		0x0208
+#define BIT_BCN_VALID		BIT(16)
 #define REG_TXDMA_OFFSET_CHK	0x020C
 #define REG_TXDMA_STATUS	0x0210
 #define BTI_PAGE_OVF		BIT(2)
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.c b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
index c25cabbab64d..5e8e0dd6456e 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8723d.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
@@ -511,6 +511,7 @@ struct rtw_chip_info rtw8723d_hw_spec = {
 	.ops = &rtw8723d_ops,
 	.id = RTW_CHIP_TYPE_8723D,
 	.fw_name = "rtw88/rtw8723d_fw.bin",
+	.wlan_cpu = RTW_WCPU_11N,
 	.tx_pkt_desc_sz = 40,
 	.tx_buf_desc_sz = 16,
 	.rx_pkt_desc_sz = 24,
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822b.c b/drivers/net/wireless/realtek/rtw88/rtw8822b.c
index 9a2e18e7624f..ffee8111d145 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8822b.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822b.c
@@ -2408,6 +2408,7 @@ struct rtw_chip_info rtw8822b_hw_spec = {
 	.ops = &rtw8822b_ops,
 	.id = RTW_CHIP_TYPE_8822B,
 	.fw_name = "rtw88/rtw8822b_fw.bin",
+	.wlan_cpu = RTW_WCPU_11AC,
 	.tx_pkt_desc_sz = 48,
 	.tx_buf_desc_sz = 16,
 	.rx_pkt_desc_sz = 24,
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822c.c b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
index ee0d39135617..8dd92136145d 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8822c.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
@@ -4269,6 +4269,7 @@ struct rtw_chip_info rtw8822c_hw_spec = {
 	.ops = &rtw8822c_ops,
 	.id = RTW_CHIP_TYPE_8822C,
 	.fw_name = "rtw88/rtw8822c_fw.bin",
+	.wlan_cpu = RTW_WCPU_11AC,
 	.tx_pkt_desc_sz = 48,
 	.tx_buf_desc_sz = 16,
 	.rx_pkt_desc_sz = 24,
-- 
cgit v1.2.3-59-g8ed1b


From e5f57ad06adec1dcbfe69f45792a8b9dd4798664 Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Wed, 22 Apr 2020 11:46:01 +0800
Subject: rtw88: no need to send additional information to legacy firmware

The firmware of 11AC devices need more information to support more offload
functions, such as IQK. And 11N devices such as 8723D does not support
offload these function in firmware, there is no need to send these additional
information to firmware when it comes to 11N devices.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200422034607.28747-3-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/fw.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/wireless/realtek/rtw88/fw.c b/drivers/net/wireless/realtek/rtw88/fw.c
index 209853fdcb42..dde7823143ea 100644
--- a/drivers/net/wireless/realtek/rtw88/fw.c
+++ b/drivers/net/wireless/realtek/rtw88/fw.c
@@ -271,6 +271,9 @@ rtw_fw_send_general_info(struct rtw_dev *rtwdev)
 	u8 h2c_pkt[H2C_PKT_SIZE] = {0};
 	u16 total_size = H2C_PKT_HDR_SIZE + 4;
 
+	if (rtw_chip_wcpu_11n(rtwdev))
+		return;
+
 	rtw_h2c_pkt_set_header(h2c_pkt, H2C_PKT_GENERAL_INFO);
 
 	SET_PKT_H2C_TOTAL_LEN(h2c_pkt, total_size);
@@ -291,6 +294,9 @@ rtw_fw_send_phydm_info(struct rtw_dev *rtwdev)
 	u16 total_size = H2C_PKT_HDR_SIZE + 8;
 	u8 fw_rf_type = 0;
 
+	if (rtw_chip_wcpu_11n(rtwdev))
+		return;
+
 	if (hal->rf_type == RF_1T1R)
 		fw_rf_type = FW_RF_1T1R;
 	else if (hal->rf_type == RF_2T2R)
-- 
cgit v1.2.3-59-g8ed1b


From 4e223a5f5342fab01ccebf87714401f559dcc791 Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Wed, 22 Apr 2020 11:46:02 +0800
Subject: rtw88: 8723d: Add mac power-on/-off function

The mac power-on flow consists of three steps:
1. pre_sys_cfg  (Before switching power state)
2. power_switch (Switching power state)
3. init_sys_cfg (Settings after swtiching power state)

When switching power state, driver will load and parse the power sequence
tables. For 8723D devices, the logics for parsing are most same except for
the polling function. 8723D devices need to toggle BIT_PFM_WOWL twice.

The settings after power state is switched for 8723D devices are quite
different with other devices, extract a legacy function for them.

For power-off flow, 8723D devices have the same logic with existing chips.
But warning printed if we run power-off sequence in power-off state:

   rtw_pci 0000:03:00.0: failed to poll offset=0x5f8 mask=0xff value=0x0

The scenario is user do 'ifconfig up' that will run power-on sequence to
bring up and then run power-off sequence to enter idle
(IEEE80211_CONF_IDLE). Then, user do 'ifconfig down' that will run
power-off sequence again, and the warning is shown. Original code check
power-on state to avoid to run power-on sequence twice, and this commit
extends to check both power-on and power-off states.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200422034607.28747-4-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/mac.c | 60 +++++++++++++++++++++++++++-----
 drivers/net/wireless/realtek/rtw88/reg.h | 10 ++++++
 2 files changed, 61 insertions(+), 9 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtw88/mac.c b/drivers/net/wireless/realtek/rtw88/mac.c
index 6092604abfb9..21b5c7173f0f 100644
--- a/drivers/net/wireless/realtek/rtw88/mac.c
+++ b/drivers/net/wireless/realtek/rtw88/mac.c
@@ -61,6 +61,14 @@ static int rtw_mac_pre_system_cfg(struct rtw_dev *rtwdev)
 
 	rtw_write8(rtwdev, REG_RSV_CTRL, 0);
 
+	if (rtw_chip_wcpu_11n(rtwdev)) {
+		if (rtw_read32(rtwdev, REG_SYS_CFG1) & BIT_LDO)
+			rtw_write8(rtwdev, REG_LDO_SWR_CTRL, LDO_SEL);
+		else
+			rtw_write8(rtwdev, REG_LDO_SWR_CTRL, SPS_SEL);
+		return 0;
+	}
+
 	switch (rtw_hci_type(rtwdev)) {
 	case RTW_HCI_TYPE_PCIE:
 		rtw_write32_set(rtwdev, REG_HCI_OPT_CTRL, BIT_BT_DIG_CLK_EN);
@@ -123,10 +131,19 @@ static int rtw_pwr_cmd_polling(struct rtw_dev *rtwdev,
 			if (rtw_hci_type(rtwdev) == RTW_HCI_TYPE_PCIE &&
 			    flag == 0) {
 				value = rtw_read8(rtwdev, REG_SYS_PW_CTRL);
-				value |= BIT(3);
+				if (rtwdev->chip->id == RTW_CHIP_TYPE_8723D) {
+					value &= ~BIT_PFM_WOWL;
+					rtw_write8(rtwdev, REG_SYS_PW_CTRL, value);
+				}
+				value |= BIT_PFM_WOWL;
 				rtw_write8(rtwdev, REG_SYS_PW_CTRL, value);
-				value &= ~BIT(3);
+				value &= ~BIT_PFM_WOWL;
 				rtw_write8(rtwdev, REG_SYS_PW_CTRL, value);
+				if (rtwdev->chip->id == RTW_CHIP_TYPE_8723D) {
+					value |= BIT_PFM_WOWL;
+					rtw_write8(rtwdev, REG_SYS_PW_CTRL, value);
+				}
+
 				cnt = RTW_PWR_POLLING_CNT;
 				flag = 1;
 			} else {
@@ -228,12 +245,14 @@ static int rtw_mac_power_switch(struct rtw_dev *rtwdev, bool pwr_on)
 	u8 rpwm;
 	bool cur_pwr;
 
-	rpwm = rtw_read8(rtwdev, rtwdev->hci.rpwm_addr);
+	if (rtw_chip_wcpu_11ac(rtwdev)) {
+		rpwm = rtw_read8(rtwdev, rtwdev->hci.rpwm_addr);
 
-	/* Check FW still exist or not */
-	if (rtw_read16(rtwdev, REG_MCUFW_CTRL) == 0xC078) {
-		rpwm = (rpwm ^ BIT_RPWM_TOGGLE) & BIT_RPWM_TOGGLE;
-		rtw_write8(rtwdev, rtwdev->hci.rpwm_addr, rpwm);
+		/* Check FW still exist or not */
+		if (rtw_read16(rtwdev, REG_MCUFW_CTRL) == 0xC078) {
+			rpwm = (rpwm ^ BIT_RPWM_TOGGLE) & BIT_RPWM_TOGGLE;
+			rtw_write8(rtwdev, rtwdev->hci.rpwm_addr, rpwm);
+		}
 	}
 
 	if (rtw_read8(rtwdev, REG_CR) == 0xea)
@@ -244,7 +263,7 @@ static int rtw_mac_power_switch(struct rtw_dev *rtwdev, bool pwr_on)
 	else
 		cur_pwr = true;
 
-	if (pwr_on && cur_pwr)
+	if (pwr_on == cur_pwr)
 		return -EALREADY;
 
 	pwr_seq = pwr_on ? chip->pwr_on_seq : chip->pwr_off_seq;
@@ -254,7 +273,7 @@ static int rtw_mac_power_switch(struct rtw_dev *rtwdev, bool pwr_on)
 	return 0;
 }
 
-static int rtw_mac_init_system_cfg(struct rtw_dev *rtwdev)
+static int __rtw_mac_init_system_cfg(struct rtw_dev *rtwdev)
 {
 	u8 sys_func_en = rtwdev->chip->sys_func_en;
 	u8 value8;
@@ -279,6 +298,29 @@ static int rtw_mac_init_system_cfg(struct rtw_dev *rtwdev)
 	return 0;
 }
 
+static int __rtw_mac_init_system_cfg_legacy(struct rtw_dev *rtwdev)
+{
+	rtw_write8(rtwdev, REG_CR, 0xff);
+	mdelay(2);
+	rtw_write8(rtwdev, REG_HWSEQ_CTRL, 0x7f);
+	mdelay(2);
+
+	rtw_write8_set(rtwdev, REG_SYS_CLKR, BIT_WAKEPAD_EN);
+	rtw_write16_clr(rtwdev, REG_GPIO_MUXCFG, BIT_EN_SIC);
+
+	rtw_write16(rtwdev, REG_CR, 0x2ff);
+
+	return 0;
+}
+
+static int rtw_mac_init_system_cfg(struct rtw_dev *rtwdev)
+{
+	if (rtw_chip_wcpu_11n(rtwdev))
+		return __rtw_mac_init_system_cfg_legacy(rtwdev);
+
+	return __rtw_mac_init_system_cfg(rtwdev);
+}
+
 int rtw_mac_power_on(struct rtw_dev *rtwdev)
 {
 	int ret = 0;
diff --git a/drivers/net/wireless/realtek/rtw88/reg.h b/drivers/net/wireless/realtek/rtw88/reg.h
index 89868ac0748f..c1e66d656307 100644
--- a/drivers/net/wireless/realtek/rtw88/reg.h
+++ b/drivers/net/wireless/realtek/rtw88/reg.h
@@ -13,11 +13,13 @@
 #define BIT_R_DIS_PRST		BIT(6)
 #define BIT_WLOCK_1C_B6		BIT(5)
 #define REG_SYS_PW_CTRL		0x0004
+#define BIT_PFM_WOWL		BIT(3)
 #define REG_SYS_CLK_CTRL	0x0008
 #define BIT_CPU_CLK_EN		BIT(14)
 
 #define REG_SYS_CLKR		0x0008
 #define BIT_ANA8M		BIT(1)
+#define BIT_WAKEPAD_EN		BIT(3)
 #define BIT_LOADER_CLK_EN	BIT(5)
 
 #define REG_RSV_CTRL		0x001C
@@ -49,6 +51,7 @@
 
 #define REG_GPIO_MUXCFG		0x0040
 #define BIT_FSPI_EN		BIT(19)
+#define BIT_EN_SIC		BIT(12)
 #define BIT_BT_AOD_GPIO3	BIT(9)
 #define BIT_BT_PTA_EN		BIT(5)
 #define BIT_WLRFE_4_5_EN	BIT(2)
@@ -73,6 +76,10 @@
 #define BIT_LTE_MUX_CTRL_PATH	BIT(26)
 #define REG_HCI_OPT_CTRL	0x0074
 
+#define REG_LDO_SWR_CTRL	0x007C
+#define LDO_SEL			0xC3
+#define SPS_SEL			0x83
+
 #define REG_MCUFW_CTRL		0x0080
 #define BIT_ANA_PORT_EN		BIT(22)
 #define BIT_MAC_PORT_EN		BIT(21)
@@ -110,6 +117,7 @@
 #define BIT_BT_INT_EN		BIT(15)
 #define REG_SYS_CFG1		0x00F0
 #define	BIT_RTL_ID		BIT(23)
+#define BIT_LDO			BIT(24)
 #define BIT_RF_TYPE_ID		BIT(27)
 #define BIT_SHIFT_VENDOR_ID	16
 #define BIT_MASK_VENDOR_ID	0xf
@@ -238,6 +246,8 @@
 #define REG_FWHW_TXQ_CTRL	0x0420
 #define BIT_EN_BCNQ_DL		BIT(22)
 #define BIT_EN_WR_FREE_TAIL	BIT(20)
+#define REG_HWSEQ_CTRL		0x0423
+
 #define REG_BCNQ_BDNY_V1	0x0424
 #define REG_LIFETIME_EN		0x0426
 #define BIT_BA_PARSER_EN	BIT(5)
-- 
cgit v1.2.3-59-g8ed1b


From fd9ead385102652b43f628ca700810d343c52437 Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Wed, 22 Apr 2020 11:46:03 +0800
Subject: rtw88: decompose while(1) loop of power sequence polling command

The power polling command is one kind of power sequence commands. It's used
to check hardware situation, and subsequent comamnds will be executed if
hardware is ready. A special case is PCIE must toggle BIT_PFM_WOWL and try
again if first try is failed.

In order to reduce indentation to understand the code easier, move polling
part to a separate function. Then, the 'while (1)...loop' is replaced by
two statements to do first try and retry.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200422034607.28747-5-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/mac.c | 72 +++++++++++++++++---------------
 1 file changed, 38 insertions(+), 34 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtw88/mac.c b/drivers/net/wireless/realtek/rtw88/mac.c
index 21b5c7173f0f..ac5d35153c8a 100644
--- a/drivers/net/wireless/realtek/rtw88/mac.c
+++ b/drivers/net/wireless/realtek/rtw88/mac.c
@@ -108,51 +108,55 @@ static int rtw_mac_pre_system_cfg(struct rtw_dev *rtwdev)
 	return 0;
 }
 
+static bool do_pwr_poll_cmd(struct rtw_dev *rtwdev, u32 addr, u32 mask, u32 target)
+{
+	u32 cnt;
+
+	target &= mask;
+
+	for (cnt = 0; cnt < RTW_PWR_POLLING_CNT; cnt++) {
+		if ((rtw_read8(rtwdev, addr) & mask) == target)
+			return true;
+
+		udelay(50);
+	}
+
+	return false;
+}
+
 static int rtw_pwr_cmd_polling(struct rtw_dev *rtwdev,
 			       const struct rtw_pwr_seq_cmd *cmd)
 {
 	u8 value;
-	u8 flag = 0;
 	u32 offset;
-	u32 cnt = RTW_PWR_POLLING_CNT;
 
 	if (cmd->base == RTW_PWR_ADDR_SDIO)
 		offset = cmd->offset | SDIO_LOCAL_OFFSET;
 	else
 		offset = cmd->offset;
 
-	do {
-		cnt--;
-		value = rtw_read8(rtwdev, offset);
-		value &= cmd->mask;
-		if (value == (cmd->value & cmd->mask))
-			return 0;
-		if (cnt == 0) {
-			if (rtw_hci_type(rtwdev) == RTW_HCI_TYPE_PCIE &&
-			    flag == 0) {
-				value = rtw_read8(rtwdev, REG_SYS_PW_CTRL);
-				if (rtwdev->chip->id == RTW_CHIP_TYPE_8723D) {
-					value &= ~BIT_PFM_WOWL;
-					rtw_write8(rtwdev, REG_SYS_PW_CTRL, value);
-				}
-				value |= BIT_PFM_WOWL;
-				rtw_write8(rtwdev, REG_SYS_PW_CTRL, value);
-				value &= ~BIT_PFM_WOWL;
-				rtw_write8(rtwdev, REG_SYS_PW_CTRL, value);
-				if (rtwdev->chip->id == RTW_CHIP_TYPE_8723D) {
-					value |= BIT_PFM_WOWL;
-					rtw_write8(rtwdev, REG_SYS_PW_CTRL, value);
-				}
-
-				cnt = RTW_PWR_POLLING_CNT;
-				flag = 1;
-			} else {
-				return -EBUSY;
-			}
-		} else {
-			udelay(50);
-		}
-	} while (1);
+	if (do_pwr_poll_cmd(rtwdev, offset, cmd->mask, cmd->value))
+		return 0;
+
+	if (rtw_hci_type(rtwdev) != RTW_HCI_TYPE_PCIE)
+		goto err;
+
+	/* if PCIE, toggle BIT_PFM_WOWL and try again */
+	value = rtw_read8(rtwdev, REG_SYS_PW_CTRL);
+	if (rtwdev->chip->id == RTW_CHIP_TYPE_8723D)
+		rtw_write8(rtwdev, REG_SYS_PW_CTRL, value & ~BIT_PFM_WOWL);
+	rtw_write8(rtwdev, REG_SYS_PW_CTRL, value | BIT_PFM_WOWL);
+	rtw_write8(rtwdev, REG_SYS_PW_CTRL, value & ~BIT_PFM_WOWL);
+	if (rtwdev->chip->id == RTW_CHIP_TYPE_8723D)
+		rtw_write8(rtwdev, REG_SYS_PW_CTRL, value | BIT_PFM_WOWL);
+
+	if (do_pwr_poll_cmd(rtwdev, offset, cmd->mask, cmd->value))
+		return 0;
+
+err:
+	rtw_err(rtwdev, "failed to poll offset=0x%x mask=0x%x value=0x%x\n",
+		offset, cmd->mask, cmd->value);
+	return -EBUSY;
 }
 
 static int rtw_sub_pwr_seq_parser(struct rtw_dev *rtwdev, u8 intf_mask,
-- 
cgit v1.2.3-59-g8ed1b


From 7907b52de08aeb27cea05bd3a2c825658f91f051 Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Wed, 22 Apr 2020 11:46:04 +0800
Subject: rtw88: 8723d: 11N chips don't support H2C queue

H2C queue is used to send command to firmware. Since 8723D doesn't support
this queue, this commit check wlan_cpu flag to avoid to set H2C related
registers.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200422034607.28747-6-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/mac.c |  6 +++++-
 drivers/net/wireless/realtek/rtw88/pci.c | 35 +++++++++++++++++++++-----------
 2 files changed, 28 insertions(+), 13 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtw88/mac.c b/drivers/net/wireless/realtek/rtw88/mac.c
index ac5d35153c8a..f4a504b350cf 100644
--- a/drivers/net/wireless/realtek/rtw88/mac.c
+++ b/drivers/net/wireless/realtek/rtw88/mac.c
@@ -1016,7 +1016,8 @@ static int txdma_queue_mapping(struct rtw_dev *rtwdev)
 
 	rtw_write8(rtwdev, REG_CR, 0);
 	rtw_write8(rtwdev, REG_CR, MAC_TRX_ENABLE);
-	rtw_write32(rtwdev, REG_H2CQ_CSR, BIT_H2CQ_FULL);
+	if (rtw_chip_wcpu_11ac(rtwdev))
+		rtw_write32(rtwdev, REG_H2CQ_CSR, BIT_H2CQ_FULL);
 
 	return 0;
 }
@@ -1135,6 +1136,9 @@ static int init_h2c(struct rtw_dev *rtwdev)
 	u32 h2cq_free;
 	u32 wp, rp;
 
+	if (rtw_chip_wcpu_11n(rtwdev))
+		return 0;
+
 	h2cq_addr = fifo->rsvd_h2cq_addr << TX_PAGE_SIZE_SHIFT;
 	h2cq_size = RSVD_PG_H2CQ_NUM << TX_PAGE_SIZE_SHIFT;
 
diff --git a/drivers/net/wireless/realtek/rtw88/pci.c b/drivers/net/wireless/realtek/rtw88/pci.c
index b3e76b579af9..8a8d746d3349 100644
--- a/drivers/net/wireless/realtek/rtw88/pci.c
+++ b/drivers/net/wireless/realtek/rtw88/pci.c
@@ -411,12 +411,14 @@ static void rtw_pci_reset_buf_desc(struct rtw_dev *rtwdev)
 	dma = rtwpci->tx_rings[RTW_TX_QUEUE_BCN].r.dma;
 	rtw_write32(rtwdev, RTK_PCI_TXBD_DESA_BCNQ, dma);
 
-	len = rtwpci->tx_rings[RTW_TX_QUEUE_H2C].r.len;
-	dma = rtwpci->tx_rings[RTW_TX_QUEUE_H2C].r.dma;
-	rtwpci->tx_rings[RTW_TX_QUEUE_H2C].r.rp = 0;
-	rtwpci->tx_rings[RTW_TX_QUEUE_H2C].r.wp = 0;
-	rtw_write16(rtwdev, RTK_PCI_TXBD_NUM_H2CQ, len & TRX_BD_IDX_MASK);
-	rtw_write32(rtwdev, RTK_PCI_TXBD_DESA_H2CQ, dma);
+	if (!rtw_chip_wcpu_11n(rtwdev)) {
+		len = rtwpci->tx_rings[RTW_TX_QUEUE_H2C].r.len;
+		dma = rtwpci->tx_rings[RTW_TX_QUEUE_H2C].r.dma;
+		rtwpci->tx_rings[RTW_TX_QUEUE_H2C].r.rp = 0;
+		rtwpci->tx_rings[RTW_TX_QUEUE_H2C].r.wp = 0;
+		rtw_write16(rtwdev, RTK_PCI_TXBD_NUM_H2CQ, len & TRX_BD_IDX_MASK);
+		rtw_write32(rtwdev, RTK_PCI_TXBD_DESA_H2CQ, dma);
+	}
 
 	len = rtwpci->tx_rings[RTW_TX_QUEUE_BK].r.len;
 	dma = rtwpci->tx_rings[RTW_TX_QUEUE_BK].r.dma;
@@ -471,8 +473,9 @@ static void rtw_pci_reset_buf_desc(struct rtw_dev *rtwdev)
 	rtw_write32(rtwdev, RTK_PCI_TXBD_RWPTR_CLR, 0xffffffff);
 
 	/* reset H2C Queue index in a single write */
-	rtw_write32_set(rtwdev, RTK_PCI_TXBD_H2CQ_CSR,
-			BIT_CLR_H2CQ_HOST_IDX | BIT_CLR_H2CQ_HW_IDX);
+	if (rtw_chip_wcpu_11ac(rtwdev))
+		rtw_write32_set(rtwdev, RTK_PCI_TXBD_H2CQ_CSR,
+				BIT_CLR_H2CQ_HOST_IDX | BIT_CLR_H2CQ_HW_IDX);
 }
 
 static void rtw_pci_reset_trx_ring(struct rtw_dev *rtwdev)
@@ -489,7 +492,9 @@ static void rtw_pci_enable_interrupt(struct rtw_dev *rtwdev,
 
 	rtw_write32(rtwdev, RTK_PCI_HIMR0, rtwpci->irq_mask[0]);
 	rtw_write32(rtwdev, RTK_PCI_HIMR1, rtwpci->irq_mask[1]);
-	rtw_write32(rtwdev, RTK_PCI_HIMR3, rtwpci->irq_mask[3]);
+	if (rtw_chip_wcpu_11ac(rtwdev))
+		rtw_write32(rtwdev, RTK_PCI_HIMR3, rtwpci->irq_mask[3]);
+
 	rtwpci->irq_enabled = true;
 
 	spin_unlock_irqrestore(&rtwpci->hwirq_lock, flags);
@@ -507,7 +512,9 @@ static void rtw_pci_disable_interrupt(struct rtw_dev *rtwdev,
 
 	rtw_write32(rtwdev, RTK_PCI_HIMR0, 0);
 	rtw_write32(rtwdev, RTK_PCI_HIMR1, 0);
-	rtw_write32(rtwdev, RTK_PCI_HIMR3, 0);
+	if (rtw_chip_wcpu_11ac(rtwdev))
+		rtw_write32(rtwdev, RTK_PCI_HIMR3, 0);
+
 	rtwpci->irq_enabled = false;
 
 out:
@@ -1012,13 +1019,17 @@ static void rtw_pci_irq_recognized(struct rtw_dev *rtwdev,
 
 	irq_status[0] = rtw_read32(rtwdev, RTK_PCI_HISR0);
 	irq_status[1] = rtw_read32(rtwdev, RTK_PCI_HISR1);
-	irq_status[3] = rtw_read32(rtwdev, RTK_PCI_HISR3);
+	if (rtw_chip_wcpu_11ac(rtwdev))
+		irq_status[3] = rtw_read32(rtwdev, RTK_PCI_HISR3);
+	else
+		irq_status[3] = 0;
 	irq_status[0] &= rtwpci->irq_mask[0];
 	irq_status[1] &= rtwpci->irq_mask[1];
 	irq_status[3] &= rtwpci->irq_mask[3];
 	rtw_write32(rtwdev, RTK_PCI_HISR0, irq_status[0]);
 	rtw_write32(rtwdev, RTK_PCI_HISR1, irq_status[1]);
-	rtw_write32(rtwdev, RTK_PCI_HISR3, irq_status[3]);
+	if (rtw_chip_wcpu_11ac(rtwdev))
+		rtw_write32(rtwdev, RTK_PCI_HISR3, irq_status[3]);
 
 	spin_unlock_irqrestore(&rtwpci->hwirq_lock, flags);
 }
-- 
cgit v1.2.3-59-g8ed1b


From ba9f0d1b8d9debf2e2d83db01d3b8f63fb75d9d5 Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Wed, 22 Apr 2020 11:46:05 +0800
Subject: rtw88: 8723d: implement set_tx_power_index ops

The txagc table is used to map rate_id and txagc register address and
mask, and ops set_tx_power_index uses this table to write TX power to
corresponding registers. Since 8723D is a 1x1 2.4G 11n chip, only CCK, OFDM
and HT_MCS 0-7 are listed in the table.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200422034607.28747-7-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/rtw8723d.c | 61 +++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)

diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.c b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
index 5e8e0dd6456e..f2d21272b237 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8723d.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
@@ -14,6 +14,29 @@
 #include "reg.h"
 #include "debug.h"
 
+static const struct rtw_hw_reg rtw8723d_txagc[] = {
+	[DESC_RATE1M]	= { .addr = 0xe08, .mask = 0x0000ff00 },
+	[DESC_RATE2M]	= { .addr = 0x86c, .mask = 0x0000ff00 },
+	[DESC_RATE5_5M]	= { .addr = 0x86c, .mask = 0x00ff0000 },
+	[DESC_RATE11M]	= { .addr = 0x86c, .mask = 0xff000000 },
+	[DESC_RATE6M]	= { .addr = 0xe00, .mask = 0x000000ff },
+	[DESC_RATE9M]	= { .addr = 0xe00, .mask = 0x0000ff00 },
+	[DESC_RATE12M]	= { .addr = 0xe00, .mask = 0x00ff0000 },
+	[DESC_RATE18M]	= { .addr = 0xe00, .mask = 0xff000000 },
+	[DESC_RATE24M]	= { .addr = 0xe04, .mask = 0x000000ff },
+	[DESC_RATE36M]	= { .addr = 0xe04, .mask = 0x0000ff00 },
+	[DESC_RATE48M]	= { .addr = 0xe04, .mask = 0x00ff0000 },
+	[DESC_RATE54M]	= { .addr = 0xe04, .mask = 0xff000000 },
+	[DESC_RATEMCS0]	= { .addr = 0xe10, .mask = 0x000000ff },
+	[DESC_RATEMCS1]	= { .addr = 0xe10, .mask = 0x0000ff00 },
+	[DESC_RATEMCS2]	= { .addr = 0xe10, .mask = 0x00ff0000 },
+	[DESC_RATEMCS3]	= { .addr = 0xe10, .mask = 0xff000000 },
+	[DESC_RATEMCS4]	= { .addr = 0xe14, .mask = 0x000000ff },
+	[DESC_RATEMCS5]	= { .addr = 0xe14, .mask = 0x0000ff00 },
+	[DESC_RATEMCS6]	= { .addr = 0xe14, .mask = 0x00ff0000 },
+	[DESC_RATEMCS7]	= { .addr = 0xe14, .mask = 0xff000000 },
+};
+
 static void rtw8723de_efuse_parsing(struct rtw_efuse *efuse,
 				    struct rtw8723d_efuse *map)
 {
@@ -70,6 +93,43 @@ static void rtw8723d_cfg_ldo25(struct rtw_dev *rtwdev, bool enable)
 	rtw_write8(rtwdev, REG_LDO_EFUSE_CTRL + 3, ldo_pwr);
 }
 
+static void
+rtw8723d_set_tx_power_index_by_rate(struct rtw_dev *rtwdev, u8 path, u8 rs)
+{
+	struct rtw_hal *hal = &rtwdev->hal;
+	const struct rtw_hw_reg *txagc;
+	u8 rate, pwr_index;
+	int j;
+
+	for (j = 0; j < rtw_rate_size[rs]; j++) {
+		rate = rtw_rate_section[rs][j];
+		pwr_index = hal->tx_pwr_tbl[path][rate];
+
+		if (rate >= ARRAY_SIZE(rtw8723d_txagc)) {
+			rtw_warn(rtwdev, "rate 0x%x isn't supported\n", rate);
+			continue;
+		}
+		txagc = &rtw8723d_txagc[rate];
+		if (!txagc->addr) {
+			rtw_warn(rtwdev, "rate 0x%x isn't defined\n", rate);
+			continue;
+		}
+
+		rtw_write32_mask(rtwdev, txagc->addr, txagc->mask, pwr_index);
+	}
+}
+
+static void rtw8723d_set_tx_power_index(struct rtw_dev *rtwdev)
+{
+	struct rtw_hal *hal = &rtwdev->hal;
+	int rs, path;
+
+	for (path = 0; path < hal->rf_path_num; path++) {
+		for (rs = 0; rs <= RTW_RATE_SECTION_HT_1S; rs++)
+			rtw8723d_set_tx_power_index_by_rate(rtwdev, path, rs);
+	}
+}
+
 static void rtw8723d_efuse_grant(struct rtw_dev *rtwdev, bool on)
 {
 	if (on) {
@@ -86,6 +146,7 @@ static struct rtw_chip_ops rtw8723d_ops = {
 	.read_efuse		= rtw8723d_read_efuse,
 	.read_rf		= rtw_phy_read_rf_sipi,
 	.write_rf		= rtw_phy_write_rf_reg_sipi,
+	.set_tx_power_index	= rtw8723d_set_tx_power_index,
 	.set_antenna		= NULL,
 	.cfg_ldo25		= rtw8723d_cfg_ldo25,
 	.efuse_grant		= rtw8723d_efuse_grant,
-- 
cgit v1.2.3-59-g8ed1b


From d91277de23310c497212c2a2d2313e126cc3f2b8 Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Wed, 22 Apr 2020 11:46:06 +0800
Subject: rtw88: 8723d: Organize chip TX/RX FIFO

TX FIFO size is 32k and it was divided into 256 pages with 128 bytes.
A boundary is used to split pages into two parts, head part is used to
store TX packets coming from host, and tail part is reserved for special
purposes, such as beacon packet, null data packet and so on.

The TX packets coming from host have many categories, such as VO, VI, BE,
BK, MG and etc. When going into head part of TX FIFO, they are classified
to four priority queue named low, normal, high and extra priority queues.
Each priority queue occupies predefined number of page, if a certain
priority queue is full, TX packet will store into PUB priority queue.

Similarly, RX FIFO is 16k and split into two parts, head part is used to
store RX packets, and tail part is 128 bytes and used to store report.
Thus, we fill this boundary to register as well.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200422034607.28747-8-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/mac.c      | 140 +++++++++++++++++---------
 drivers/net/wireless/realtek/rtw88/mac.h      |   1 +
 drivers/net/wireless/realtek/rtw88/reg.h      |  28 ++++++
 drivers/net/wireless/realtek/rtw88/rtw8723d.c |  31 ++++++
 4 files changed, 154 insertions(+), 46 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtw88/mac.c b/drivers/net/wireless/realtek/rtw88/mac.c
index f4a504b350cf..645207a01525 100644
--- a/drivers/net/wireless/realtek/rtw88/mac.c
+++ b/drivers/net/wireless/realtek/rtw88/mac.c
@@ -1032,13 +1032,16 @@ static int set_trx_fifo_info(struct rtw_dev *rtwdev)
 	/* config rsvd page num */
 	fifo->rsvd_drv_pg_num = 8;
 	fifo->txff_pg_num = chip->txff_size >> 7;
-	fifo->rsvd_pg_num = fifo->rsvd_drv_pg_num +
-			   RSVD_PG_H2C_EXTRAINFO_NUM +
-			   RSVD_PG_H2C_STATICINFO_NUM +
-			   RSVD_PG_H2CQ_NUM +
-			   RSVD_PG_CPU_INSTRUCTION_NUM +
-			   RSVD_PG_FW_TXBUF_NUM +
-			   csi_buf_pg_num;
+	if (rtw_chip_wcpu_11n(rtwdev))
+		fifo->rsvd_pg_num = fifo->rsvd_drv_pg_num;
+	else
+		fifo->rsvd_pg_num = fifo->rsvd_drv_pg_num +
+				   RSVD_PG_H2C_EXTRAINFO_NUM +
+				   RSVD_PG_H2C_STATICINFO_NUM +
+				   RSVD_PG_H2CQ_NUM +
+				   RSVD_PG_CPU_INSTRUCTION_NUM +
+				   RSVD_PG_FW_TXBUF_NUM +
+				   csi_buf_pg_num;
 
 	if (fifo->rsvd_pg_num > fifo->txff_pg_num)
 		return -ENOMEM;
@@ -1047,18 +1050,20 @@ static int set_trx_fifo_info(struct rtw_dev *rtwdev)
 	fifo->rsvd_boundary = fifo->txff_pg_num - fifo->rsvd_pg_num;
 
 	cur_pg_addr = fifo->txff_pg_num;
-	cur_pg_addr -= csi_buf_pg_num;
-	fifo->rsvd_csibuf_addr = cur_pg_addr;
-	cur_pg_addr -= RSVD_PG_FW_TXBUF_NUM;
-	fifo->rsvd_fw_txbuf_addr = cur_pg_addr;
-	cur_pg_addr -= RSVD_PG_CPU_INSTRUCTION_NUM;
-	fifo->rsvd_cpu_instr_addr = cur_pg_addr;
-	cur_pg_addr -= RSVD_PG_H2CQ_NUM;
-	fifo->rsvd_h2cq_addr = cur_pg_addr;
-	cur_pg_addr -= RSVD_PG_H2C_STATICINFO_NUM;
-	fifo->rsvd_h2c_sta_info_addr = cur_pg_addr;
-	cur_pg_addr -= RSVD_PG_H2C_EXTRAINFO_NUM;
-	fifo->rsvd_h2c_info_addr = cur_pg_addr;
+	if (rtw_chip_wcpu_11ac(rtwdev)) {
+		cur_pg_addr -= csi_buf_pg_num;
+		fifo->rsvd_csibuf_addr = cur_pg_addr;
+		cur_pg_addr -= RSVD_PG_FW_TXBUF_NUM;
+		fifo->rsvd_fw_txbuf_addr = cur_pg_addr;
+		cur_pg_addr -= RSVD_PG_CPU_INSTRUCTION_NUM;
+		fifo->rsvd_cpu_instr_addr = cur_pg_addr;
+		cur_pg_addr -= RSVD_PG_H2CQ_NUM;
+		fifo->rsvd_h2cq_addr = cur_pg_addr;
+		cur_pg_addr -= RSVD_PG_H2C_STATICINFO_NUM;
+		fifo->rsvd_h2c_sta_info_addr = cur_pg_addr;
+		cur_pg_addr -= RSVD_PG_H2C_EXTRAINFO_NUM;
+		fifo->rsvd_h2c_info_addr = cur_pg_addr;
+	}
 	cur_pg_addr -= fifo->rsvd_drv_pg_num;
 	fifo->rsvd_drv_addr = cur_pg_addr;
 
@@ -1070,6 +1075,65 @@ static int set_trx_fifo_info(struct rtw_dev *rtwdev)
 	return 0;
 }
 
+static int __priority_queue_cfg(struct rtw_dev *rtwdev,
+				const struct rtw_page_table *pg_tbl,
+				u16 pubq_num)
+{
+	struct rtw_fifo_conf *fifo = &rtwdev->fifo;
+	struct rtw_chip_info *chip = rtwdev->chip;
+
+	rtw_write16(rtwdev, REG_FIFOPAGE_INFO_1, pg_tbl->hq_num);
+	rtw_write16(rtwdev, REG_FIFOPAGE_INFO_2, pg_tbl->lq_num);
+	rtw_write16(rtwdev, REG_FIFOPAGE_INFO_3, pg_tbl->nq_num);
+	rtw_write16(rtwdev, REG_FIFOPAGE_INFO_4, pg_tbl->exq_num);
+	rtw_write16(rtwdev, REG_FIFOPAGE_INFO_5, pubq_num);
+	rtw_write32_set(rtwdev, REG_RQPN_CTRL_2, BIT_LD_RQPN);
+
+	rtw_write16(rtwdev, REG_FIFOPAGE_CTRL_2, fifo->rsvd_boundary);
+	rtw_write8_set(rtwdev, REG_FWHW_TXQ_CTRL + 2, BIT_EN_WR_FREE_TAIL >> 16);
+
+	rtw_write16(rtwdev, REG_BCNQ_BDNY_V1, fifo->rsvd_boundary);
+	rtw_write16(rtwdev, REG_FIFOPAGE_CTRL_2 + 2, fifo->rsvd_boundary);
+	rtw_write16(rtwdev, REG_BCNQ1_BDNY_V1, fifo->rsvd_boundary);
+	rtw_write32(rtwdev, REG_RXFF_BNDY, chip->rxff_size - C2H_PKT_BUF - 1);
+	rtw_write8_set(rtwdev, REG_AUTO_LLT_V1, BIT_AUTO_INIT_LLT_V1);
+
+	if (!check_hw_ready(rtwdev, REG_AUTO_LLT_V1, BIT_AUTO_INIT_LLT_V1, 0))
+		return -EBUSY;
+
+	rtw_write8(rtwdev, REG_CR + 3, 0);
+
+	return 0;
+}
+
+static int __priority_queue_cfg_legacy(struct rtw_dev *rtwdev,
+				       const struct rtw_page_table *pg_tbl,
+				       u16 pubq_num)
+{
+	struct rtw_fifo_conf *fifo = &rtwdev->fifo;
+	struct rtw_chip_info *chip = rtwdev->chip;
+	u32 val32;
+
+	val32 = BIT_RQPN_NE(pg_tbl->nq_num, pg_tbl->exq_num);
+	rtw_write32(rtwdev, REG_RQPN_NPQ, val32);
+	val32 = BIT_RQPN_HLP(pg_tbl->hq_num, pg_tbl->lq_num, pubq_num);
+	rtw_write32(rtwdev, REG_RQPN, val32);
+
+	rtw_write8(rtwdev, REG_TRXFF_BNDY, fifo->rsvd_boundary);
+	rtw_write16(rtwdev, REG_TRXFF_BNDY + 2, chip->rxff_size - REPORT_BUF - 1);
+	rtw_write8(rtwdev, REG_DWBCN0_CTRL + 1, fifo->rsvd_boundary);
+	rtw_write8(rtwdev, REG_BCNQ_BDNY, fifo->rsvd_boundary);
+	rtw_write8(rtwdev, REG_MGQ_BDNY, fifo->rsvd_boundary);
+	rtw_write8(rtwdev, REG_WMAC_LBK_BF_HD, fifo->rsvd_boundary);
+
+	rtw_write32_set(rtwdev, REG_AUTO_LLT, BIT_AUTO_INIT_LLT);
+
+	if (!check_hw_ready(rtwdev, REG_AUTO_LLT, BIT_AUTO_INIT_LLT, 0))
+		return -EBUSY;
+
+	return 0;
+}
+
 static int priority_queue_cfg(struct rtw_dev *rtwdev)
 {
 	struct rtw_fifo_conf *fifo = &rtwdev->fifo;
@@ -1102,28 +1166,10 @@ static int priority_queue_cfg(struct rtw_dev *rtwdev)
 
 	pubq_num = fifo->acq_pg_num - pg_tbl->hq_num - pg_tbl->lq_num -
 		   pg_tbl->nq_num - pg_tbl->exq_num - pg_tbl->gapq_num;
-	rtw_write16(rtwdev, REG_FIFOPAGE_INFO_1, pg_tbl->hq_num);
-	rtw_write16(rtwdev, REG_FIFOPAGE_INFO_2, pg_tbl->lq_num);
-	rtw_write16(rtwdev, REG_FIFOPAGE_INFO_3, pg_tbl->nq_num);
-	rtw_write16(rtwdev, REG_FIFOPAGE_INFO_4, pg_tbl->exq_num);
-	rtw_write16(rtwdev, REG_FIFOPAGE_INFO_5, pubq_num);
-	rtw_write32_set(rtwdev, REG_RQPN_CTRL_2, BIT_LD_RQPN);
-
-	rtw_write16(rtwdev, REG_FIFOPAGE_CTRL_2, fifo->rsvd_boundary);
-	rtw_write8_set(rtwdev, REG_FWHW_TXQ_CTRL + 2, BIT_EN_WR_FREE_TAIL >> 16);
-
-	rtw_write16(rtwdev, REG_BCNQ_BDNY_V1, fifo->rsvd_boundary);
-	rtw_write16(rtwdev, REG_FIFOPAGE_CTRL_2 + 2, fifo->rsvd_boundary);
-	rtw_write16(rtwdev, REG_BCNQ1_BDNY_V1, fifo->rsvd_boundary);
-	rtw_write32(rtwdev, REG_RXFF_BNDY, chip->rxff_size - C2H_PKT_BUF - 1);
-	rtw_write8_set(rtwdev, REG_AUTO_LLT_V1, BIT_AUTO_INIT_LLT_V1);
-
-	if (!check_hw_ready(rtwdev, REG_AUTO_LLT_V1, BIT_AUTO_INIT_LLT_V1, 0))
-		return -EBUSY;
-
-	rtw_write8(rtwdev, REG_CR + 3, 0);
-
-	return 0;
+	if (rtw_chip_wcpu_11n(rtwdev))
+		return __priority_queue_cfg_legacy(rtwdev, pg_tbl, pubq_num);
+	else
+		return __priority_queue_cfg(rtwdev, pg_tbl, pubq_num);
 }
 
 static int init_h2c(struct rtw_dev *rtwdev)
@@ -1203,11 +1249,13 @@ static int rtw_drv_info_cfg(struct rtw_dev *rtwdev)
 	u8 value8;
 
 	rtw_write8(rtwdev, REG_RX_DRVINFO_SZ, PHY_STATUS_SIZE);
-	value8 = rtw_read8(rtwdev, REG_TRXFF_BNDY + 1);
-	value8 &= 0xF0;
-	/* For rxdesc len = 0 issue */
-	value8 |= 0xF;
-	rtw_write8(rtwdev, REG_TRXFF_BNDY + 1, value8);
+	if (rtw_chip_wcpu_11ac(rtwdev)) {
+		value8 = rtw_read8(rtwdev, REG_TRXFF_BNDY + 1);
+		value8 &= 0xF0;
+		/* For rxdesc len = 0 issue */
+		value8 |= 0xF;
+		rtw_write8(rtwdev, REG_TRXFF_BNDY + 1, value8);
+	}
 	rtw_write32_set(rtwdev, REG_RCR, BIT_APP_PHYSTS);
 	rtw_write32_clr(rtwdev, REG_WMAC_OPTION_FUNCTION + 4, BIT(8) | BIT(9));
 
diff --git a/drivers/net/wireless/realtek/rtw88/mac.h b/drivers/net/wireless/realtek/rtw88/mac.h
index 592dc830160c..ce64cdf7a565 100644
--- a/drivers/net/wireless/realtek/rtw88/mac.h
+++ b/drivers/net/wireless/realtek/rtw88/mac.h
@@ -10,6 +10,7 @@
 #define SDIO_LOCAL_OFFSET	0x10250000
 #define DDMA_POLLING_COUNT	1000
 #define C2H_PKT_BUF		256
+#define REPORT_BUF		128
 #define PHY_STATUS_SIZE		4
 #define ILLEGAL_KEY_GROUP	0xFAAAAA00
 
diff --git a/drivers/net/wireless/realtek/rtw88/reg.h b/drivers/net/wireless/realtek/rtw88/reg.h
index c1e66d656307..00eb6b6a1f5b 100644
--- a/drivers/net/wireless/realtek/rtw88/reg.h
+++ b/drivers/net/wireless/realtek/rtw88/reg.h
@@ -209,6 +209,19 @@
 #define REG_HMEBOX2_EX		0x01F8
 #define REG_HMEBOX3_EX		0x01FC
 
+#define REG_RQPN		0x0200
+#define BIT_MASK_HPQ		0xff
+#define BIT_SHIFT_HPQ		0
+#define BIT_RQPN_HPQ(x)		(((x) & BIT_MASK_HPQ) << BIT_SHIFT_HPQ)
+#define BIT_MASK_LPQ		0xff
+#define BIT_SHIFT_LPQ		8
+#define BIT_RQPN_LPQ(x)		(((x) & BIT_MASK_LPQ) << BIT_SHIFT_LPQ)
+#define BIT_MASK_PUBQ		0xff
+#define BIT_SHIFT_PUBQ		16
+#define BIT_RQPN_PUBQ(x)	(((x) & BIT_MASK_PUBQ) << BIT_SHIFT_PUBQ)
+#define BIT_RQPN_HLP(h, l, p)	(BIT_LD_RQPN | BIT_RQPN_HPQ(h) |	       \
+				 BIT_RQPN_LPQ(l) | BIT_RQPN_PUBQ(p))
+
 #define REG_FIFOPAGE_CTRL_2	0x0204
 #define BIT_BCN_VALID_V1	BIT(15)
 #define BIT_MASK_BCN_HEAD_1_V1	0xfff
@@ -219,6 +232,18 @@
 #define REG_TXDMA_OFFSET_CHK	0x020C
 #define REG_TXDMA_STATUS	0x0210
 #define BTI_PAGE_OVF		BIT(2)
+
+#define REG_RQPN_NPQ		0x0214
+#define BIT_MASK_NPQ		0xff
+#define BIT_SHIFT_NPQ		0
+#define BIT_MASK_EPQ		0xff
+#define BIT_SHIFT_EPQ		16
+#define BIT_RQPN_NPQ(x)		(((x) & BIT_MASK_NPQ) << BIT_SHIFT_NPQ)
+#define BIT_RQPN_EPQ(x)		(((x) & BIT_MASK_EPQ) << BIT_SHIFT_EPQ)
+#define BIT_RQPN_NE(n, e)	(BIT_RQPN_NPQ(n) | BIT_RQPN_EPQ(e))
+
+#define REG_AUTO_LLT		0x0224
+#define BIT_AUTO_INIT_LLT	BIT(16)
 #define REG_RQPN_CTRL_1		0x0228
 #define REG_RQPN_CTRL_2		0x022C
 #define BIT_LD_RQPN		BIT(31)
@@ -249,6 +274,8 @@
 #define REG_HWSEQ_CTRL		0x0423
 
 #define REG_BCNQ_BDNY_V1	0x0424
+#define REG_BCNQ_BDNY		0x0424
+#define REG_MGQ_BDNY		0x0425
 #define REG_LIFETIME_EN		0x0426
 #define BIT_BA_PARSER_EN	BIT(5)
 #define REG_SPEC_SIFS		0x0428
@@ -264,6 +291,7 @@
 #define BIT_CHECK_CCK_EN	BIT(7)
 #define REG_AMPDU_MAX_TIME_V1	0x0455
 #define REG_BCNQ1_BDNY_V1	0x0456
+#define REG_WMAC_LBK_BF_HD	0x045D
 #define REG_TX_HANG_CTRL	0x045E
 #define BIT_EN_GNT_BT_AWAKE	BIT(3)
 #define BIT_EN_EOF_V1		BIT(2)
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.c b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
index f2d21272b237..c03ed91349e5 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8723d.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
@@ -556,6 +556,32 @@ static const struct rtw_pwr_seq_cmd *card_disable_flow_8723d[] = {
 	NULL
 };
 
+static const struct rtw_page_table page_table_8723d[] = {
+	{12, 2, 2, 0, 1},
+	{12, 2, 2, 0, 1},
+	{12, 2, 2, 0, 1},
+	{12, 2, 2, 0, 1},
+	{12, 2, 2, 0, 1},
+};
+
+static const struct rtw_rqpn rqpn_table_8723d[] = {
+	{RTW_DMA_MAPPING_NORMAL, RTW_DMA_MAPPING_NORMAL,
+	 RTW_DMA_MAPPING_LOW, RTW_DMA_MAPPING_LOW,
+	 RTW_DMA_MAPPING_EXTRA, RTW_DMA_MAPPING_HIGH},
+	{RTW_DMA_MAPPING_NORMAL, RTW_DMA_MAPPING_NORMAL,
+	 RTW_DMA_MAPPING_LOW, RTW_DMA_MAPPING_LOW,
+	 RTW_DMA_MAPPING_EXTRA, RTW_DMA_MAPPING_HIGH},
+	{RTW_DMA_MAPPING_NORMAL, RTW_DMA_MAPPING_NORMAL,
+	 RTW_DMA_MAPPING_NORMAL, RTW_DMA_MAPPING_HIGH,
+	 RTW_DMA_MAPPING_HIGH, RTW_DMA_MAPPING_HIGH},
+	{RTW_DMA_MAPPING_NORMAL, RTW_DMA_MAPPING_NORMAL,
+	 RTW_DMA_MAPPING_LOW, RTW_DMA_MAPPING_LOW,
+	 RTW_DMA_MAPPING_HIGH, RTW_DMA_MAPPING_HIGH},
+	{RTW_DMA_MAPPING_NORMAL, RTW_DMA_MAPPING_NORMAL,
+	 RTW_DMA_MAPPING_LOW, RTW_DMA_MAPPING_LOW,
+	 RTW_DMA_MAPPING_EXTRA, RTW_DMA_MAPPING_HIGH},
+};
+
 static const struct rtw_rf_sipi_addr rtw8723d_rf_sipi_addr[] = {
 	[RF_PATH_A] = { .hssi_1 = 0x820, .lssi_read    = 0x8a0,
 			.hssi_2 = 0x824, .lssi_read_pi = 0x8b8},
@@ -580,17 +606,22 @@ struct rtw_chip_info rtw8723d_hw_spec = {
 	.phy_efuse_size = 512,
 	.log_efuse_size = 512,
 	.ptct_efuse_size = 96 + 1,
+	.txff_size = 32768,
+	.rxff_size = 16384,
 	.txgi_factor = 1,
 	.is_pwr_by_rate_dec = true,
 	.max_power_index = 0x3f,
 	.csi_buf_pg_num = 0,
 	.band = RTW_BAND_2G,
+	.page_size = 128,
 	.ht_supported = true,
 	.vht_supported = false,
 	.lps_deep_mode_supported = 0,
 	.sys_func_en = 0xFD,
 	.pwr_on_seq = card_enable_flow_8723d,
 	.pwr_off_seq = card_disable_flow_8723d,
+	.page_table = page_table_8723d,
+	.rqpn_table = rqpn_table_8723d,
 	.rf_sipi_addr = {0x840, 0x844},
 	.rf_sipi_read_addr = rtw8723d_rf_sipi_addr,
 	.fix_rf_phy_num = 2,
-- 
cgit v1.2.3-59-g8ed1b


From 75e69fb11b40ba1256b14f943c7050682c1f5458 Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Wed, 22 Apr 2020 11:46:07 +0800
Subject: rtw88: 8723d: initialize mac/bb/rf basic functions

Implement rtw_chip_ops::phy_set_param and ::mac_init to initialize
mac/bb/rf, and they are used during interface up. The procedure contains
power on sequence registers, download firmware, load predefined parameters,
mac/bb/rf specific register and etc.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200422034607.28747-9-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/main.h     |   1 +
 drivers/net/wireless/realtek/rtw88/reg.h      |  34 +++++++
 drivers/net/wireless/realtek/rtw88/rtw8723d.c | 124 ++++++++++++++++++++++++++
 drivers/net/wireless/realtek/rtw88/rtw8723d.h |   3 +
 4 files changed, 162 insertions(+)

diff --git a/drivers/net/wireless/realtek/rtw88/main.h b/drivers/net/wireless/realtek/rtw88/main.h
index 380a670eeeee..157aca641f6d 100644
--- a/drivers/net/wireless/realtek/rtw88/main.h
+++ b/drivers/net/wireless/realtek/rtw88/main.h
@@ -1475,6 +1475,7 @@ struct rtw_efuse {
 	u8 ant_div_cfg;
 	u8 ant_div_type;
 	u8 regd;
+	u8 afe;
 
 	u8 lna_type_2g;
 	u8 lna_type_5g;
diff --git a/drivers/net/wireless/realtek/rtw88/reg.h b/drivers/net/wireless/realtek/rtw88/reg.h
index 00eb6b6a1f5b..9fdfcdc5c5cf 100644
--- a/drivers/net/wireless/realtek/rtw88/reg.h
+++ b/drivers/net/wireless/realtek/rtw88/reg.h
@@ -6,6 +6,7 @@
 #define __RTW_REG_DEF_H__
 
 #define REG_SYS_FUNC_EN		0x0002
+#define BIT_FEN_EN_25_1		BIT(13)
 #define BIT_FEN_ELDR		BIT(12)
 #define BIT_FEN_CPUEN		BIT(2)
 #define BIT_FEN_BB_GLB_RST	BIT(1)
@@ -40,6 +41,11 @@
 #define BIT_MASK_EF_ADDR	0x3ff
 #define BIT_MASK_EF_DATA	0xff
 #define BITS_EF_ADDR		(BIT_MASK_EF_ADDR << BIT_SHIFT_EF_ADDR)
+#define BITS_PLL		0xf0
+
+#define REG_AFE_CTRL3		0x2c
+#define BIT_MASK_XTAL		0x00FFF000
+#define BIT_XTAL_GMP_BIT4	BIT(28)
 
 #define REG_LDO_EFUSE_CTRL	0x0034
 #define BIT_MASK_EFUSE_BANK_SEL	(BIT(8) | BIT(9))
@@ -61,6 +67,7 @@
 #define BIT_PAPE_SEL_EN		BIT(25)
 #define BIT_DPDT_WL_SEL		BIT(24)
 #define BIT_DPDT_SEL_EN		BIT(23)
+#define REG_LEDCFG2		0x004E
 #define REG_PAD_CTRL1		0x0064
 #define BIT_PAPE_WLBT_SEL	BIT(29)
 #define BIT_LNAON_WLBT_SEL	BIT(28)
@@ -76,9 +83,15 @@
 #define BIT_LTE_MUX_CTRL_PATH	BIT(26)
 #define REG_HCI_OPT_CTRL	0x0074
 
+#define REG_AFE_CTRL_4		0x0078
+#define BIT_CK320M_AFE_EN	BIT(4)
+#define BIT_EN_SYN		BIT(15)
+
 #define REG_LDO_SWR_CTRL	0x007C
 #define LDO_SEL			0xC3
 #define SPS_SEL			0x83
+#define BIT_XTA1		BIT(29)
+#define BIT_XTA0		BIT(28)
 
 #define REG_MCUFW_CTRL		0x0080
 #define BIT_ANA_PORT_EN		BIT(22)
@@ -197,6 +210,7 @@
 #define BIT_FS_RXDONE		BIT(16)
 #define REG_PKTBUF_DBG_CTRL	0x0140
 #define REG_C2HEVT		0x01A0
+#define REG_MCUTST_1		0x01C0
 #define REG_MCUTST_II		0x01C4
 #define REG_WOWLAN_WAKE_REASON	0x01C7
 #define REG_HMETFR		0x01CC
@@ -230,6 +244,7 @@
 #define REG_DWBCN0_CTRL		0x0208
 #define BIT_BCN_VALID		BIT(16)
 #define REG_TXDMA_OFFSET_CHK	0x020C
+#define BIT_DROP_DATA_EN	BIT(9)
 #define REG_TXDMA_STATUS	0x0210
 #define BTI_PAGE_OVF		BIT(2)
 
@@ -291,6 +306,7 @@
 #define BIT_CHECK_CCK_EN	BIT(7)
 #define REG_AMPDU_MAX_TIME_V1	0x0455
 #define REG_BCNQ1_BDNY_V1	0x0456
+#define REG_AMPDU_MAX_TIME	0x0456
 #define REG_WMAC_LBK_BF_HD	0x045D
 #define REG_TX_HANG_CTRL	0x045E
 #define BIT_EN_GNT_BT_AWAKE	BIT(3)
@@ -306,7 +322,10 @@
 #define REG_QUEUE_CTRL		0x04C6
 #define BIT_PTA_WL_TX_EN	BIT(4)
 #define BIT_PTA_EDCCA_EN	BIT(5)
+#define REG_SINGLE_AMPDU_CTRL	0x04C7
+#define BIT_EN_SINGLE_APMDU	BIT(7)
 #define REG_PROT_MODE_CTRL	0x04C8
+#define REG_MAX_AGGR_NUM	0x04CA
 #define REG_BAR_MODE_CTRL	0x04CC
 #define REG_PRECNT_CTRL		0x04E5
 #define BIT_BTCCA_CTRL		(BIT(0) | BIT(1))
@@ -326,6 +345,7 @@
 #define BIT_SHIFT_SIFS_OFDM_CTX	8
 #define BIT_SHIFT_SIFS_CCK_TRX	16
 #define BIT_SHIFT_SIFS_OFDM_TRX	24
+#define REG_AGGR_BREAK_TIME	0x051A
 #define REG_SLOT		0x051B
 #define REG_TX_PTCL_CTRL	0x0520
 #define BIT_SIFS_BK_EN		BIT(12)
@@ -337,18 +357,23 @@
 #define REG_TBTT_PROHIBIT	0x0540
 #define BIT_SHIFT_TBTT_HOLD_TIME_AP 8
 #define REG_RD_NAV_NXT		0x0544
+#define REG_NAV_PROT_LEN	0x0546
 #define REG_BCN_CTRL		0x0550
 #define BIT_DIS_TSF_UDT		BIT(4)
 #define BIT_EN_BCN_FUNCTION	BIT(3)
+#define BIT_EN_TXBCN_RPT	BIT(2)
 #define REG_BCN_CTRL_CLINT0	0x0551
 #define REG_DRVERLYINT		0x0558
 #define REG_BCNDMATIM		0x0559
+#define REG_ATIMWND		0x055A
 #define REG_USTIME_TSF		0x055C
 #define REG_BCN_MAX_ERR		0x055D
 #define REG_RXTSF_OFFSET_CCK	0x055E
 #define REG_MISC_CTRL		0x0577
 #define BIT_EN_FREE_CNT		BIT(3)
 #define BIT_DIS_SECOND_CCA	(BIT(0) | BIT(1))
+#define REG_HIQ_NO_LMT_EN	0x5A7
+#define BIT_HIQ_NO_LMT_EN_ROOT	BIT(0)
 #define REG_TIMER0_SRC_SEL	0x05B4
 #define BIT_TSFT_SEL_TIMER0	(BIT(4) | BIT(5) | BIT(6))
 
@@ -374,6 +399,7 @@
 #define BIT_HTC_LOC_CTRL	BIT(14)
 #define BIT_RPFM_CAM_ENABLE	BIT(12)
 #define BIT_TA_BCN		BIT(11)
+#define BIT_RCR_ADF		BIT(11)
 #define BIT_DISDECMYPKT		BIT(10)
 #define BIT_AICV		BIT(9)
 #define BIT_ACRC32		BIT(8)
@@ -391,6 +417,7 @@
 #define REG_MAR			0x0620
 #define REG_USTIME_EDCA		0x0638
 #define REG_ACKTO_CCK		0x0639
+#define REG_MAC_SPEC_SIFS	0x063A
 #define REG_RESP_SIFS_CCK	0x063C
 #define REG_RESP_SIFS_OFDM	0x063E
 #define REG_ACKTO		0x0640
@@ -433,12 +460,19 @@
 #define BIT_LTE_COEX_EN		BIT(7)
 #define REG_BT_STAT_CTRL	0x0778
 #define REG_BT_TDMA_TIME	0x0790
+#define REG_LTR_IDLE_LATENCY	0x0798
+#define REG_LTR_ACTIVE_LATENCY	0x079C
+#define REG_LTR_CTRL_BASIC	0x07A4
 #define REG_WMAC_OPTION_FUNCTION 0x07D0
 #define REG_WMAC_OPTION_FUNCTION_1 0x07D4
 
+#define REG_FPGA0_RFMOD		0x0800
+#define BIT_CCKEN		BIT(24)
+#define BIT_OFDMEN		BIT(25)
 #define REG_RX_GAIN_EN		0x081c
 
 #define REG_RFE_CTRL_E		0x0974
+#define REG_2ND_CCA_CTRL	0x0976
 
 #define REG_DIS_DPD		0x0a70
 #define DIS_DPD_MASK		GENMASK(9, 0)
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.c b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
index c03ed91349e5..8ca4d5794434 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8723d.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
@@ -37,6 +37,98 @@ static const struct rtw_hw_reg rtw8723d_txagc[] = {
 	[DESC_RATEMCS7]	= { .addr = 0xe14, .mask = 0xff000000 },
 };
 
+#define WLAN_TXQ_RPT_EN		0x1F
+#define WLAN_SLOT_TIME		0x09
+#define WLAN_RL_VAL		0x3030
+#define WLAN_BAR_VAL		0x0201ffff
+#define BIT_MASK_TBTT_HOLD	0x00000fff
+#define BIT_SHIFT_TBTT_HOLD	8
+#define BIT_MASK_TBTT_SETUP	0x000000ff
+#define BIT_SHIFT_TBTT_SETUP	0
+#define BIT_MASK_TBTT_MASK	((BIT_MASK_TBTT_HOLD << BIT_SHIFT_TBTT_HOLD) | \
+				 (BIT_MASK_TBTT_SETUP << BIT_SHIFT_TBTT_SETUP))
+#define TBTT_TIME(s, h)((((s) & BIT_MASK_TBTT_SETUP) << BIT_SHIFT_TBTT_SETUP) |\
+			(((h) & BIT_MASK_TBTT_HOLD) << BIT_SHIFT_TBTT_HOLD))
+#define WLAN_TBTT_TIME_NORMAL	TBTT_TIME(0x04, 0x80)
+#define WLAN_TBTT_TIME_STOP_BCN	TBTT_TIME(0x04, 0x64)
+#define WLAN_PIFS_VAL		0
+#define WLAN_AGG_BRK_TIME	0x16
+#define WLAN_NAV_PROT_LEN	0x0040
+#define WLAN_SPEC_SIFS		0x100a
+#define WLAN_RX_PKT_LIMIT	0x17
+#define WLAN_MAX_AGG_NR		0x0A
+#define WLAN_AMPDU_MAX_TIME	0x1C
+#define WLAN_ANT_SEL		0x82
+#define WLAN_LTR_IDLE_LAT	0x883C883C
+#define WLAN_LTR_ACT_LAT	0x880B880B
+#define WLAN_LTR_CTRL1		0xCB004010
+#define WLAN_LTR_CTRL2		0x01233425
+
+static void rtw8723d_phy_set_param(struct rtw_dev *rtwdev)
+{
+	u8 xtal_cap;
+	u32 val32;
+
+	/* power on BB/RF domain */
+	rtw_write16_set(rtwdev, REG_SYS_FUNC_EN,
+			BIT_FEN_EN_25_1 | BIT_FEN_BB_GLB_RST | BIT_FEN_BB_RSTB);
+	rtw_write8_set(rtwdev, REG_RF_CTRL,
+		       BIT_RF_EN | BIT_RF_RSTB | BIT_RF_SDM_RSTB);
+	rtw_write8(rtwdev, REG_AFE_CTRL1 + 1, 0x80);
+
+	rtw_phy_load_tables(rtwdev);
+
+	/* post init after header files config */
+	rtw_write32_clr(rtwdev, REG_RCR, BIT_RCR_ADF);
+	rtw_write8_set(rtwdev, REG_HIQ_NO_LMT_EN, BIT_HIQ_NO_LMT_EN_ROOT);
+	rtw_write16_set(rtwdev, REG_AFE_CTRL_4, BIT_CK320M_AFE_EN | BIT_EN_SYN);
+
+	xtal_cap = rtwdev->efuse.crystal_cap & 0x3F;
+	rtw_write32_mask(rtwdev, REG_AFE_CTRL3, BIT_MASK_XTAL,
+			 xtal_cap | (xtal_cap << 6));
+	rtw_write32_set(rtwdev, REG_FPGA0_RFMOD, BIT_CCKEN | BIT_OFDMEN);
+	if ((rtwdev->efuse.afe >> 4) == 14) {
+		rtw_write32_set(rtwdev, REG_AFE_CTRL3, BIT_XTAL_GMP_BIT4);
+		rtw_write32_clr(rtwdev, REG_AFE_CTRL1, BITS_PLL);
+		rtw_write32_set(rtwdev, REG_LDO_SWR_CTRL, BIT_XTA1);
+		rtw_write32_clr(rtwdev, REG_LDO_SWR_CTRL, BIT_XTA0);
+	}
+
+	rtw_write8(rtwdev, REG_SLOT, WLAN_SLOT_TIME);
+	rtw_write8(rtwdev, REG_FWHW_TXQ_CTRL + 1, WLAN_TXQ_RPT_EN);
+	rtw_write16(rtwdev, REG_RETRY_LIMIT, WLAN_RL_VAL);
+	rtw_write32(rtwdev, REG_BAR_MODE_CTRL, WLAN_BAR_VAL);
+	rtw_write8(rtwdev, REG_ATIMWND, 0x2);
+	rtw_write8(rtwdev, REG_BCN_CTRL,
+		   BIT_DIS_TSF_UDT | BIT_EN_BCN_FUNCTION | BIT_EN_TXBCN_RPT);
+	val32 = rtw_read32(rtwdev, REG_TBTT_PROHIBIT);
+	val32 &= ~BIT_MASK_TBTT_MASK;
+	val32 |= WLAN_TBTT_TIME_STOP_BCN;
+	rtw_write8(rtwdev, REG_TBTT_PROHIBIT, val32);
+	rtw_write8(rtwdev, REG_PIFS, WLAN_PIFS_VAL);
+	rtw_write8(rtwdev, REG_AGGR_BREAK_TIME, WLAN_AGG_BRK_TIME);
+	rtw_write16(rtwdev, REG_NAV_PROT_LEN, WLAN_NAV_PROT_LEN);
+	rtw_write16(rtwdev, REG_MAC_SPEC_SIFS, WLAN_SPEC_SIFS);
+	rtw_write16(rtwdev, REG_SIFS, WLAN_SPEC_SIFS);
+	rtw_write16(rtwdev, REG_SIFS + 2, WLAN_SPEC_SIFS);
+	rtw_write8(rtwdev, REG_SINGLE_AMPDU_CTRL, BIT_EN_SINGLE_APMDU);
+	rtw_write8(rtwdev, REG_RX_PKT_LIMIT, WLAN_RX_PKT_LIMIT);
+	rtw_write8(rtwdev, REG_MAX_AGGR_NUM, WLAN_MAX_AGG_NR);
+	rtw_write8(rtwdev, REG_AMPDU_MAX_TIME, WLAN_AMPDU_MAX_TIME);
+	rtw_write8(rtwdev, REG_LEDCFG2, WLAN_ANT_SEL);
+
+	rtw_write32(rtwdev, REG_LTR_IDLE_LATENCY, WLAN_LTR_IDLE_LAT);
+	rtw_write32(rtwdev, REG_LTR_ACTIVE_LATENCY, WLAN_LTR_ACT_LAT);
+	rtw_write32(rtwdev, REG_LTR_CTRL_BASIC, WLAN_LTR_CTRL1);
+	rtw_write32(rtwdev, REG_LTR_CTRL_BASIC + 4, WLAN_LTR_CTRL2);
+
+	rtw_phy_init(rtwdev);
+
+	rtw_write16_set(rtwdev, REG_TXDMA_OFFSET_CHK, BIT_DROP_DATA_EN);
+	rtw_write32_mask(rtwdev, REG_OFDM0_XAAGC1, MASKBYTE0, 0x50);
+	rtw_write32_mask(rtwdev, REG_OFDM0_XAAGC1, MASKBYTE0, 0x20);
+}
+
 static void rtw8723de_efuse_parsing(struct rtw_efuse *efuse,
 				    struct rtw8723d_efuse *map)
 {
@@ -63,6 +155,7 @@ static int rtw8723d_read_efuse(struct rtw_dev *rtwdev, u8 *log_map)
 	efuse->regd = map->rf_board_option & 0x7;
 	efuse->thermal_meter[0] = map->thermal_meter;
 	efuse->thermal_meter_k = map->thermal_meter;
+	efuse->afe = map->afe;
 
 	for (i = 0; i < 4; i++)
 		efuse->txpwr_idx_table[i] = map->txpwr_idx_table[i];
@@ -79,6 +172,35 @@ static int rtw8723d_read_efuse(struct rtw_dev *rtwdev, u8 *log_map)
 	return 0;
 }
 
+#define BIT_CFENDFORM		BIT(9)
+#define BIT_WMAC_TCR_ERR0	BIT(12)
+#define BIT_WMAC_TCR_ERR1	BIT(13)
+#define BIT_TCR_CFG		(BIT_CFENDFORM | BIT_WMAC_TCR_ERR0 |	       \
+				 BIT_WMAC_TCR_ERR1)
+#define WLAN_RX_FILTER0		0xFFFF
+#define WLAN_RX_FILTER1		0x400
+#define WLAN_RX_FILTER2		0xFFFF
+#define WLAN_RCR_CFG		0x700060CE
+
+static int rtw8723d_mac_init(struct rtw_dev *rtwdev)
+{
+	rtw_write8(rtwdev, REG_FWHW_TXQ_CTRL + 1, WLAN_TXQ_RPT_EN);
+	rtw_write32(rtwdev, REG_TCR, BIT_TCR_CFG);
+
+	rtw_write16(rtwdev, REG_RXFLTMAP0, WLAN_RX_FILTER0);
+	rtw_write16(rtwdev, REG_RXFLTMAP1, WLAN_RX_FILTER1);
+	rtw_write16(rtwdev, REG_RXFLTMAP2, WLAN_RX_FILTER2);
+	rtw_write32(rtwdev, REG_RCR, WLAN_RCR_CFG);
+
+	rtw_write32(rtwdev, REG_INT_MIG, 0);
+	rtw_write32(rtwdev, REG_MCUTST_1, 0x0);
+
+	rtw_write8(rtwdev, REG_MISC_CTRL, BIT_DIS_SECOND_CCA);
+	rtw_write8(rtwdev, REG_2ND_CCA_CTRL, 0);
+
+	return 0;
+}
+
 static void rtw8723d_cfg_ldo25(struct rtw_dev *rtwdev, bool enable)
 {
 	u8 ldo_pwr;
@@ -143,7 +265,9 @@ static void rtw8723d_efuse_grant(struct rtw_dev *rtwdev, bool on)
 }
 
 static struct rtw_chip_ops rtw8723d_ops = {
+	.phy_set_param		= rtw8723d_phy_set_param,
 	.read_efuse		= rtw8723d_read_efuse,
+	.mac_init		= rtw8723d_mac_init,
 	.read_rf		= rtw_phy_read_rf_sipi,
 	.write_rf		= rtw_phy_write_rf_reg_sipi,
 	.set_tx_power_index	= rtw8723d_set_tx_power_index,
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.h b/drivers/net/wireless/realtek/rtw88/rtw8723d.h
index 1939d9897a26..6321dea83519 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8723d.h
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.h
@@ -44,4 +44,7 @@ struct rtw8723d_efuse {
 	struct rtw8723de_efuse e;
 };
 
+#define REG_OFDM0_XAAGC1	0x0c50
+#define REG_OFDM0_XBAGC1	0x0c58
+
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From aac392d8553f3fcc8dd42fc8f7af8eb0593ce9ca Mon Sep 17 00:00:00 2001
From: Maharaja Kennadyrajan <mkenna@codeaurora.org>
Date: Wed, 22 Apr 2020 00:28:32 +0530
Subject: ath10k: Fix the invalid tx/rx chainmask configuration

The driver is allowing the invalid tx/rx chainmask configuration
(other than 1,3,7,15) set by the user. It causes the firmware
crash due to the invalid chainmask values.

Hence, reject the invalid chainmask values in the driver by not
sending the pdev set command to the firmware.

Tested hardware: QCA9888
Tested firmware: 10.4-3.10-00047

Signed-off-by: Maharaja Kennadyrajan <mkenna@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1587495512-29813-1-git-send-email-mkenna@codeaurora.org
---
 drivers/net/wireless/ath/ath10k/mac.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index 6791c0035be0..5de7910c24e7 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -4529,17 +4529,18 @@ static int ath10k_get_antenna(struct ieee80211_hw *hw, u32 *tx_ant, u32 *rx_ant)
 	return 0;
 }
 
-static void ath10k_check_chain_mask(struct ath10k *ar, u32 cm, const char *dbg)
+static bool ath10k_check_chain_mask(struct ath10k *ar, u32 cm, const char *dbg)
 {
 	/* It is not clear that allowing gaps in chainmask
 	 * is helpful.  Probably it will not do what user
 	 * is hoping for, so warn in that case.
 	 */
 	if (cm == 15 || cm == 7 || cm == 3 || cm == 1 || cm == 0)
-		return;
+		return true;
 
-	ath10k_warn(ar, "mac %s antenna chainmask may be invalid: 0x%x.  Suggested values: 15, 7, 3, 1 or 0.\n",
+	ath10k_warn(ar, "mac %s antenna chainmask is invalid: 0x%x.  Suggested values: 15, 7, 3, 1 or 0.\n",
 		    dbg, cm);
+	return false;
 }
 
 static int ath10k_mac_get_vht_cap_bf_sts(struct ath10k *ar)
@@ -4722,11 +4723,15 @@ static void ath10k_mac_setup_ht_vht_cap(struct ath10k *ar)
 static int __ath10k_set_antenna(struct ath10k *ar, u32 tx_ant, u32 rx_ant)
 {
 	int ret;
+	bool is_valid_tx_chain_mask, is_valid_rx_chain_mask;
 
 	lockdep_assert_held(&ar->conf_mutex);
 
-	ath10k_check_chain_mask(ar, tx_ant, "tx");
-	ath10k_check_chain_mask(ar, rx_ant, "rx");
+	is_valid_tx_chain_mask = ath10k_check_chain_mask(ar, tx_ant, "tx");
+	is_valid_rx_chain_mask = ath10k_check_chain_mask(ar, rx_ant, "rx");
+
+	if (!is_valid_tx_chain_mask || !is_valid_rx_chain_mask)
+		return -EINVAL;
 
 	ar->cfg_tx_chainmask = tx_ant;
 	ar->cfg_rx_chainmask = rx_ant;
-- 
cgit v1.2.3-59-g8ed1b


From 8347784d6f5fae467e82522029ab1290673c50d6 Mon Sep 17 00:00:00 2001
From: Wen Gong <wgong@codeaurora.org>
Date: Wed, 22 Apr 2020 16:47:19 +0800
Subject: ath10k: drop the TX packet which size exceed credit size for sdio

sdio chip use DMA buffer to receive TX packet from ath10k, and it has
limitation of each buffer, if the packet size exceed the credit size,
it will trigger error in firmware.

Tested with QCA6174 SDIO with firmware
WLAN.RMH.4.4.1-00017-QCARMSWP-1.

Signed-off-by: Wen Gong <wgong@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200422084719.3479-1-wgong@codeaurora.org
---
 drivers/net/wireless/ath/ath10k/htc.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/wireless/ath/ath10k/htc.c b/drivers/net/wireless/ath/ath10k/htc.c
index 58ceba75d20a..31df6dd04bf6 100644
--- a/drivers/net/wireless/ath/ath10k/htc.c
+++ b/drivers/net/wireless/ath/ath10k/htc.c
@@ -846,6 +846,11 @@ int ath10k_htc_send_hl(struct ath10k_htc *htc,
 	struct ath10k_htc_ep *ep = &htc->endpoint[eid];
 	struct ath10k *ar = htc->ar;
 
+	if (sizeof(struct ath10k_htc_hdr) + skb->len > ep->tx_credit_size) {
+		ath10k_dbg(ar, ATH10K_DBG_HTC, "tx exceed max len %d\n", skb->len);
+		return -ENOMEM;
+	}
+
 	ath10k_dbg(ar, ATH10K_DBG_HTC, "htc send hl eid %d bundle %d tx count %d len %d\n",
 		   eid, ep->bundle_tx, skb_queue_len(&ep->tx_req_head), skb->len);
 
-- 
cgit v1.2.3-59-g8ed1b


From 5d1c9a114a6efba2c8391e39d4ac3e4e5c7b6d32 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Tue, 7 Apr 2020 18:59:51 +0300
Subject: net/mlx5: Update vport.c to new cmd interface

Do mass update of vport.c to reuse newly introduced
mlx5_cmd_exec_in*() interfaces.

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/hw/mlx5/ib_virt.c            |   2 +-
 drivers/infiniband/hw/mlx5/mad.c                |   4 +-
 drivers/net/ethernet/mellanox/mlx5/core/vport.c | 142 +++++++++++-------------
 include/linux/mlx5/vport.h                      |   3 +-
 4 files changed, 71 insertions(+), 80 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/ib_virt.c b/drivers/infiniband/hw/mlx5/ib_virt.c
index b61165359954..46b2d370fb3f 100644
--- a/drivers/infiniband/hw/mlx5/ib_virt.c
+++ b/drivers/infiniband/hw/mlx5/ib_virt.c
@@ -134,7 +134,7 @@ int mlx5_ib_get_vf_stats(struct ib_device *device, int vf,
 	if (!out)
 		return -ENOMEM;
 
-	err = mlx5_core_query_vport_counter(mdev, true, vf, port, out, out_sz);
+	err = mlx5_core_query_vport_counter(mdev, true, vf, port, out);
 	if (err)
 		goto ex;
 
diff --git a/drivers/infiniband/hw/mlx5/mad.c b/drivers/infiniband/hw/mlx5/mad.c
index f0ab6d7d8497..454ce5de2de7 100644
--- a/drivers/infiniband/hw/mlx5/mad.c
+++ b/drivers/infiniband/hw/mlx5/mad.c
@@ -187,8 +187,8 @@ static int process_pma_cmd(struct mlx5_ib_dev *dev, u8 port_num,
 			goto done;
 		}
 
-		err = mlx5_core_query_vport_counter(mdev, 0, 0,
-						    mdev_port_num, out_cnt, sz);
+		err = mlx5_core_query_vport_counter(mdev, 0, 0, mdev_port_num,
+						    out_cnt);
 		if (!err)
 			pma_cnt_ext_assign(pma_cnt_ext, out_cnt);
 	} else {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/vport.c b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
index 23f879da9104..c107d92dc118 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/vport.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/vport.c
@@ -40,10 +40,11 @@
 /* Mutex to hold while enabling or disabling RoCE */
 static DEFINE_MUTEX(mlx5_roce_en_lock);
 
-static int _mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod,
-				   u16 vport, u32 *out, int outlen)
+u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
 {
-	u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {0};
+	u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(query_vport_state_in)] = {};
+	int err;
 
 	MLX5_SET(query_vport_state_in, in, opcode,
 		 MLX5_CMD_OP_QUERY_VPORT_STATE);
@@ -52,14 +53,9 @@ static int _mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod,
 	if (vport)
 		MLX5_SET(query_vport_state_in, in, other_vport, 1);
 
-	return mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen);
-}
-
-u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
-{
-	u32 out[MLX5_ST_SZ_DW(query_vport_state_out)] = {0};
-
-	_mlx5_query_vport_state(mdev, opmod, vport, out, sizeof(out));
+	err = mlx5_cmd_exec_inout(mdev, query_vport_state, in, out);
+	if (err)
+		return 0;
 
 	return MLX5_GET(query_vport_state_out, out, state);
 }
@@ -67,8 +63,7 @@ u8 mlx5_query_vport_state(struct mlx5_core_dev *mdev, u8 opmod, u16 vport)
 int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
 				  u16 vport, u8 other_vport, u8 state)
 {
-	u32 in[MLX5_ST_SZ_DW(modify_vport_state_in)]   = {0};
-	u32 out[MLX5_ST_SZ_DW(modify_vport_state_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(modify_vport_state_in)] = {};
 
 	MLX5_SET(modify_vport_state_in, in, opcode,
 		 MLX5_CMD_OP_MODIFY_VPORT_STATE);
@@ -77,13 +72,13 @@ int mlx5_modify_vport_admin_state(struct mlx5_core_dev *mdev, u8 opmod,
 	MLX5_SET(modify_vport_state_in, in, other_vport, other_vport);
 	MLX5_SET(modify_vport_state_in, in, admin_state, state);
 
-	return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+	return mlx5_cmd_exec_in(mdev, modify_vport_state, in);
 }
 
 static int mlx5_query_nic_vport_context(struct mlx5_core_dev *mdev, u16 vport,
-					u32 *out, int outlen)
+					u32 *out)
 {
-	u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)] = {0};
+	u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)] = {};
 
 	MLX5_SET(query_nic_vport_context_in, in, opcode,
 		 MLX5_CMD_OP_QUERY_NIC_VPORT_CONTEXT);
@@ -91,26 +86,16 @@ static int mlx5_query_nic_vport_context(struct mlx5_core_dev *mdev, u16 vport,
 	if (vport)
 		MLX5_SET(query_nic_vport_context_in, in, other_vport, 1);
 
-	return mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen);
-}
-
-static int mlx5_modify_nic_vport_context(struct mlx5_core_dev *mdev, void *in,
-					 int inlen)
-{
-	u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)] = {0};
-
-	MLX5_SET(modify_nic_vport_context_in, in, opcode,
-		 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
-	return mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
+	return mlx5_cmd_exec_inout(mdev, query_nic_vport_context, in, out);
 }
 
 int mlx5_query_nic_vport_min_inline(struct mlx5_core_dev *mdev,
 				    u16 vport, u8 *min_inline)
 {
-	u32 out[MLX5_ST_SZ_DW(query_nic_vport_context_out)] = {0};
+	u32 out[MLX5_ST_SZ_DW(query_nic_vport_context_out)] = {};
 	int err;
 
-	err = mlx5_query_nic_vport_context(mdev, vport, out, sizeof(out));
+	err = mlx5_query_nic_vport_context(mdev, vport, out);
 	if (!err)
 		*min_inline = MLX5_GET(query_nic_vport_context_out, out,
 				       nic_vport_context.min_wqe_inline_mode);
@@ -139,8 +124,7 @@ EXPORT_SYMBOL_GPL(mlx5_query_min_inline);
 int mlx5_modify_nic_vport_min_inline(struct mlx5_core_dev *mdev,
 				     u16 vport, u8 min_inline)
 {
-	u32 in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)] = {0};
-	int inlen = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in);
+	u32 in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)] = {};
 	void *nic_vport_ctx;
 
 	MLX5_SET(modify_nic_vport_context_in, in,
@@ -152,23 +136,20 @@ int mlx5_modify_nic_vport_min_inline(struct mlx5_core_dev *mdev,
 				     in, nic_vport_context);
 	MLX5_SET(nic_vport_context, nic_vport_ctx,
 		 min_wqe_inline_mode, min_inline);
+	MLX5_SET(modify_nic_vport_context_in, in, opcode,
+		 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
 
-	return mlx5_modify_nic_vport_context(mdev, in, inlen);
+	return mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in);
 }
 
 int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
 				     u16 vport, bool other, u8 *addr)
 {
-	int outlen = MLX5_ST_SZ_BYTES(query_nic_vport_context_out);
+	u32 out[MLX5_ST_SZ_DW(query_nic_vport_context_out)] = {};
 	u32 in[MLX5_ST_SZ_DW(query_nic_vport_context_in)] = {};
 	u8 *out_addr;
-	u32 *out;
 	int err;
 
-	out = kvzalloc(outlen, GFP_KERNEL);
-	if (!out)
-		return -ENOMEM;
-
 	out_addr = MLX5_ADDR_OF(query_nic_vport_context_out, out,
 				nic_vport_context.permanent_address);
 
@@ -177,11 +158,10 @@ int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev,
 	MLX5_SET(query_nic_vport_context_in, in, vport_number, vport);
 	MLX5_SET(query_nic_vport_context_in, in, other_vport, other);
 
-	err = mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen);
+	err = mlx5_cmd_exec_inout(mdev, query_nic_vport_context, in, out);
 	if (!err)
 		ether_addr_copy(addr, &out_addr[2]);
 
-	kvfree(out);
 	return err;
 }
 EXPORT_SYMBOL_GPL(mlx5_query_nic_vport_mac_address);
@@ -216,8 +196,10 @@ int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *mdev,
 				permanent_address);
 
 	ether_addr_copy(&perm_mac[2], addr);
+	MLX5_SET(modify_nic_vport_context_in, in, opcode,
+		 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
 
-	err = mlx5_modify_nic_vport_context(mdev, in, inlen);
+	err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in);
 
 	kvfree(in);
 
@@ -235,7 +217,7 @@ int mlx5_query_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 *mtu)
 	if (!out)
 		return -ENOMEM;
 
-	err = mlx5_query_nic_vport_context(mdev, 0, out, outlen);
+	err = mlx5_query_nic_vport_context(mdev, 0, out);
 	if (!err)
 		*mtu = MLX5_GET(query_nic_vport_context_out, out,
 				nic_vport_context.mtu);
@@ -257,8 +239,10 @@ int mlx5_modify_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 mtu)
 
 	MLX5_SET(modify_nic_vport_context_in, in, field_select.mtu, 1);
 	MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.mtu, mtu);
+	MLX5_SET(modify_nic_vport_context_in, in, opcode,
+		 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
 
-	err = mlx5_modify_nic_vport_context(mdev, in, inlen);
+	err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in);
 
 	kvfree(in);
 	return err;
@@ -292,7 +276,7 @@ int mlx5_query_nic_vport_mac_list(struct mlx5_core_dev *dev,
 		req_list_size = max_list_size;
 	}
 
-	out_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) +
+	out_sz = MLX5_ST_SZ_BYTES(query_nic_vport_context_in) +
 			req_list_size * MLX5_ST_SZ_BYTES(mac_address_layout);
 
 	out = kzalloc(out_sz, GFP_KERNEL);
@@ -332,7 +316,7 @@ int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev,
 				   u8 addr_list[][ETH_ALEN],
 				   int list_size)
 {
-	u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)];
+	u32 out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)] = {};
 	void *nic_vport_ctx;
 	int max_list_size;
 	int in_sz;
@@ -350,7 +334,6 @@ int mlx5_modify_nic_vport_mac_list(struct mlx5_core_dev *dev,
 	in_sz = MLX5_ST_SZ_BYTES(modify_nic_vport_context_in) +
 		list_size * MLX5_ST_SZ_BYTES(mac_address_layout);
 
-	memset(out, 0, sizeof(out));
 	in = kzalloc(in_sz, GFP_KERNEL);
 	if (!in)
 		return -ENOMEM;
@@ -442,7 +425,7 @@ int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev,
 	if (!out)
 		return -ENOMEM;
 
-	mlx5_query_nic_vport_context(mdev, 0, out, outlen);
+	mlx5_query_nic_vport_context(mdev, 0, out);
 
 	*system_image_guid = MLX5_GET64(query_nic_vport_context_out, out,
 					nic_vport_context.system_image_guid);
@@ -462,7 +445,7 @@ int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid)
 	if (!out)
 		return -ENOMEM;
 
-	mlx5_query_nic_vport_context(mdev, 0, out, outlen);
+	mlx5_query_nic_vport_context(mdev, 0, out);
 
 	*node_guid = MLX5_GET64(query_nic_vport_context_out, out,
 				nic_vport_context.node_guid);
@@ -498,8 +481,10 @@ int mlx5_modify_nic_vport_node_guid(struct mlx5_core_dev *mdev,
 	nic_vport_context = MLX5_ADDR_OF(modify_nic_vport_context_in,
 					 in, nic_vport_context);
 	MLX5_SET64(nic_vport_context, nic_vport_context, node_guid, node_guid);
+	MLX5_SET(modify_nic_vport_context_in, in, opcode,
+		 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
 
-	err = mlx5_modify_nic_vport_context(mdev, in, inlen);
+	err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in);
 
 	kvfree(in);
 
@@ -516,7 +501,7 @@ int mlx5_query_nic_vport_qkey_viol_cntr(struct mlx5_core_dev *mdev,
 	if (!out)
 		return -ENOMEM;
 
-	mlx5_query_nic_vport_context(mdev, 0, out, outlen);
+	mlx5_query_nic_vport_context(mdev, 0, out);
 
 	*qkey_viol_cntr = MLX5_GET(query_nic_vport_context_out, out,
 				   nic_vport_context.qkey_violation_counter);
@@ -664,7 +649,7 @@ int mlx5_query_hca_vport_context(struct mlx5_core_dev *dev,
 				 struct mlx5_hca_vport_context *rep)
 {
 	int out_sz = MLX5_ST_SZ_BYTES(query_hca_vport_context_out);
-	int in[MLX5_ST_SZ_DW(query_hca_vport_context_in)] = {0};
+	int in[MLX5_ST_SZ_DW(query_hca_vport_context_in)] = {};
 	int is_group_manager;
 	void *out;
 	void *ctx;
@@ -691,7 +676,7 @@ int mlx5_query_hca_vport_context(struct mlx5_core_dev *dev,
 	if (MLX5_CAP_GEN(dev, num_ports) == 2)
 		MLX5_SET(query_hca_vport_context_in, in, port_num, port_num);
 
-	err = mlx5_cmd_exec(dev, in, sizeof(in), out,  out_sz);
+	err = mlx5_cmd_exec_inout(dev, query_hca_vport_context, in, out);
 	if (err)
 		goto ex;
 
@@ -788,7 +773,7 @@ int mlx5_query_nic_vport_promisc(struct mlx5_core_dev *mdev,
 	if (!out)
 		return -ENOMEM;
 
-	err = mlx5_query_nic_vport_context(mdev, vport, out, outlen);
+	err = mlx5_query_nic_vport_context(mdev, vport, out);
 	if (err)
 		goto out;
 
@@ -825,8 +810,10 @@ int mlx5_modify_nic_vport_promisc(struct mlx5_core_dev *mdev,
 		 nic_vport_context.promisc_mc, promisc_mc);
 	MLX5_SET(modify_nic_vport_context_in, in,
 		 nic_vport_context.promisc_all, promisc_all);
+	MLX5_SET(modify_nic_vport_context_in, in, opcode,
+		 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
 
-	err = mlx5_modify_nic_vport_context(mdev, in, inlen);
+	err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in);
 
 	kvfree(in);
 
@@ -865,8 +852,10 @@ int mlx5_nic_vport_update_local_lb(struct mlx5_core_dev *mdev, bool enable)
 	if (MLX5_CAP_GEN(mdev, disable_local_lb_uc))
 		MLX5_SET(modify_nic_vport_context_in, in,
 			 field_select.disable_uc_local_lb, 1);
+	MLX5_SET(modify_nic_vport_context_in, in, opcode,
+		 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
 
-	err = mlx5_modify_nic_vport_context(mdev, in, inlen);
+	err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in);
 
 	if (!err)
 		mlx5_core_dbg(mdev, "%s local_lb\n",
@@ -888,7 +877,7 @@ int mlx5_nic_vport_query_local_lb(struct mlx5_core_dev *mdev, bool *status)
 	if (!out)
 		return -ENOMEM;
 
-	err = mlx5_query_nic_vport_context(mdev, 0, out, outlen);
+	err = mlx5_query_nic_vport_context(mdev, 0, out);
 	if (err)
 		goto out;
 
@@ -925,8 +914,10 @@ static int mlx5_nic_vport_update_roce_state(struct mlx5_core_dev *mdev,
 	MLX5_SET(modify_nic_vport_context_in, in, field_select.roce_en, 1);
 	MLX5_SET(modify_nic_vport_context_in, in, nic_vport_context.roce_en,
 		 state);
+	MLX5_SET(modify_nic_vport_context_in, in, opcode,
+		 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
 
-	err = mlx5_modify_nic_vport_context(mdev, in, inlen);
+	err = mlx5_cmd_exec_in(mdev, modify_nic_vport_context, in);
 
 	kvfree(in);
 
@@ -965,16 +956,15 @@ int mlx5_nic_vport_disable_roce(struct mlx5_core_dev *mdev)
 	mutex_unlock(&mlx5_roce_en_lock);
 	return err;
 }
-EXPORT_SYMBOL_GPL(mlx5_nic_vport_disable_roce);
+EXPORT_SYMBOL(mlx5_nic_vport_disable_roce);
 
 int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport,
-				  int vf, u8 port_num, void *out,
-				  size_t out_sz)
+				  int vf, u8 port_num, void *out)
 {
-	int	in_sz = MLX5_ST_SZ_BYTES(query_vport_counter_in);
-	int	is_group_manager;
-	void   *in;
-	int	err;
+	int in_sz = MLX5_ST_SZ_BYTES(query_vport_counter_in);
+	int is_group_manager;
+	void *in;
+	int err;
 
 	is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager);
 	in = kvzalloc(in_sz, GFP_KERNEL);
@@ -997,7 +987,7 @@ int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport,
 	if (MLX5_CAP_GEN(dev, num_ports) == 2)
 		MLX5_SET(query_vport_counter_in, in, port_num, port_num);
 
-	err = mlx5_cmd_exec(dev, in, in_sz, out,  out_sz);
+	err = mlx5_cmd_exec_inout(dev, query_vport_counter, in, out);
 free:
 	kvfree(in);
 	return err;
@@ -1008,8 +998,8 @@ int mlx5_query_vport_down_stats(struct mlx5_core_dev *mdev, u16 vport,
 				u8 other_vport, u64 *rx_discard_vport_down,
 				u64 *tx_discard_vport_down)
 {
-	u32 out[MLX5_ST_SZ_DW(query_vnic_env_out)] = {0};
-	u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {0};
+	u32 out[MLX5_ST_SZ_DW(query_vnic_env_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {};
 	int err;
 
 	MLX5_SET(query_vnic_env_in, in, opcode,
@@ -1018,7 +1008,7 @@ int mlx5_query_vport_down_stats(struct mlx5_core_dev *mdev, u16 vport,
 	MLX5_SET(query_vnic_env_in, in, vport_number, vport);
 	MLX5_SET(query_vnic_env_in, in, other_vport, other_vport);
 
-	err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+	err = mlx5_cmd_exec_inout(mdev, query_vnic_env, in, out);
 	if (err)
 		return err;
 
@@ -1035,11 +1025,10 @@ int mlx5_core_modify_hca_vport_context(struct mlx5_core_dev *dev,
 				       struct mlx5_hca_vport_context *req)
 {
 	int in_sz = MLX5_ST_SZ_BYTES(modify_hca_vport_context_in);
-	u8 out[MLX5_ST_SZ_BYTES(modify_hca_vport_context_out)];
 	int is_group_manager;
+	void *ctx;
 	void *in;
 	int err;
-	void *ctx;
 
 	mlx5_core_dbg(dev, "vf %d\n", vf);
 	is_group_manager = MLX5_CAP_GEN(dev, vport_group_manager);
@@ -1047,7 +1036,6 @@ int mlx5_core_modify_hca_vport_context(struct mlx5_core_dev *dev,
 	if (!in)
 		return -ENOMEM;
 
-	memset(out, 0, sizeof(out));
 	MLX5_SET(modify_hca_vport_context_in, in, opcode, MLX5_CMD_OP_MODIFY_HCA_VPORT_CONTEXT);
 	if (other_vport) {
 		if (is_group_manager) {
@@ -1074,7 +1062,7 @@ int mlx5_core_modify_hca_vport_context(struct mlx5_core_dev *dev,
 	MLX5_SET(hca_vport_context, ctx, cap_mask1, req->cap_mask1);
 	MLX5_SET(hca_vport_context, ctx, cap_mask1_field_select,
 		 req->cap_mask1_perm);
-	err = mlx5_cmd_exec(dev, in, in_sz, out, sizeof(out));
+	err = mlx5_cmd_exec_in(dev, modify_hca_vport_context, in);
 ex:
 	kfree(in);
 	return err;
@@ -1103,8 +1091,10 @@ int mlx5_nic_vport_affiliate_multiport(struct mlx5_core_dev *master_mdev,
 	MLX5_SET(modify_nic_vport_context_in, in,
 		 nic_vport_context.affiliation_criteria,
 		 MLX5_CAP_GEN(port_mdev, affiliate_nic_vport_criteria));
+	MLX5_SET(modify_nic_vport_context_in, in, opcode,
+		 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
 
-	err = mlx5_modify_nic_vport_context(port_mdev, in, inlen);
+	err = mlx5_cmd_exec_in(port_mdev, modify_nic_vport_context, in);
 	if (err)
 		mlx5_nic_vport_disable_roce(port_mdev);
 
@@ -1129,8 +1119,10 @@ int mlx5_nic_vport_unaffiliate_multiport(struct mlx5_core_dev *port_mdev)
 		 nic_vport_context.affiliated_vhca_id, 0);
 	MLX5_SET(modify_nic_vport_context_in, in,
 		 nic_vport_context.affiliation_criteria, 0);
+	MLX5_SET(modify_nic_vport_context_in, in, opcode,
+		 MLX5_CMD_OP_MODIFY_NIC_VPORT_CONTEXT);
 
-	err = mlx5_modify_nic_vport_context(port_mdev, in, inlen);
+	err = mlx5_cmd_exec_in(port_mdev, modify_nic_vport_context, in);
 	if (!err)
 		mlx5_nic_vport_disable_roce(port_mdev);
 
@@ -1170,4 +1162,4 @@ u16 mlx5_eswitch_get_total_vports(const struct mlx5_core_dev *dev)
 {
 	return MLX5_SPECIAL_VPORTS(dev) + mlx5_core_max_vfs(dev);
 }
-EXPORT_SYMBOL(mlx5_eswitch_get_total_vports);
+EXPORT_SYMBOL_GPL(mlx5_eswitch_get_total_vports);
diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h
index 16060fb9b5e5..8170da1e9f70 100644
--- a/include/linux/mlx5/vport.h
+++ b/include/linux/mlx5/vport.h
@@ -127,8 +127,7 @@ int mlx5_query_vport_down_stats(struct mlx5_core_dev *mdev, u16 vport,
 				u8 other_vport, u64 *rx_discard_vport_down,
 				u64 *tx_discard_vport_down);
 int mlx5_core_query_vport_counter(struct mlx5_core_dev *dev, u8 other_vport,
-				  int vf, u8 port_num, void *out,
-				  size_t out_sz);
+				  int vf, u8 port_num, void *out);
 int mlx5_core_modify_hca_vport_context(struct mlx5_core_dev *dev,
 				       u8 other_vport, u8 port_num,
 				       int vf,
-- 
cgit v1.2.3-59-g8ed1b


From d1f620500cde5c72c7b96a19474733c4c6c67f38 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Thu, 9 Apr 2020 11:39:14 +0300
Subject: net/mlx5: Update cq.c to new cmd interface

Do mass update of cq.c to reuse newly introduced
mlx5_cmd_exec_in*() interfaces.

Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/cq.c       | 22 +++++++++-------------
 drivers/net/ethernet/mellanox/mlx5/core/debugfs.c  |  2 +-
 .../net/ethernet/mellanox/mlx5/core/en/health.c    |  2 +-
 include/linux/mlx5/cq.h                            |  2 +-
 4 files changed, 12 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cq.c b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
index 4477a590b308..8379b24cb838 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cq.c
@@ -90,8 +90,7 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
 			u32 *in, int inlen, u32 *out, int outlen)
 {
 	int eqn = MLX5_GET(cqc, MLX5_ADDR_OF(create_cq_in, in, cq_context), c_eqn);
-	u32 dout[MLX5_ST_SZ_DW(destroy_cq_out)];
-	u32 din[MLX5_ST_SZ_DW(destroy_cq_in)];
+	u32 din[MLX5_ST_SZ_DW(destroy_cq_in)] = {};
 	struct mlx5_eq_comp *eq;
 	int err;
 
@@ -141,20 +140,17 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
 err_cq_add:
 	mlx5_eq_del_cq(&eq->core, cq);
 err_cmd:
-	memset(din, 0, sizeof(din));
-	memset(dout, 0, sizeof(dout));
 	MLX5_SET(destroy_cq_in, din, opcode, MLX5_CMD_OP_DESTROY_CQ);
 	MLX5_SET(destroy_cq_in, din, cqn, cq->cqn);
 	MLX5_SET(destroy_cq_in, din, uid, cq->uid);
-	mlx5_cmd_exec(dev, din, sizeof(din), dout, sizeof(dout));
+	mlx5_cmd_exec_in(dev, destroy_cq, din);
 	return err;
 }
 EXPORT_SYMBOL(mlx5_core_create_cq);
 
 int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
 {
-	u32 out[MLX5_ST_SZ_DW(destroy_cq_out)] = {0};
-	u32 in[MLX5_ST_SZ_DW(destroy_cq_in)] = {0};
+	u32 in[MLX5_ST_SZ_DW(destroy_cq_in)] = {};
 	int err;
 
 	mlx5_eq_del_cq(mlx5_get_async_eq(dev), cq);
@@ -163,7 +159,7 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
 	MLX5_SET(destroy_cq_in, in, opcode, MLX5_CMD_OP_DESTROY_CQ);
 	MLX5_SET(destroy_cq_in, in, cqn, cq->cqn);
 	MLX5_SET(destroy_cq_in, in, uid, cq->uid);
-	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	err = mlx5_cmd_exec_in(dev, destroy_cq, in);
 	if (err)
 		return err;
 
@@ -178,20 +174,20 @@ int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq)
 EXPORT_SYMBOL(mlx5_core_destroy_cq);
 
 int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
-		       u32 *out, int outlen)
+		       u32 *out)
 {
-	u32 in[MLX5_ST_SZ_DW(query_cq_in)] = {0};
+	u32 in[MLX5_ST_SZ_DW(query_cq_in)] = {};
 
 	MLX5_SET(query_cq_in, in, opcode, MLX5_CMD_OP_QUERY_CQ);
 	MLX5_SET(query_cq_in, in, cqn, cq->cqn);
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+	return mlx5_cmd_exec_inout(dev, query_cq, in, out);
 }
 EXPORT_SYMBOL(mlx5_core_query_cq);
 
 int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
 			u32 *in, int inlen)
 {
-	u32 out[MLX5_ST_SZ_DW(modify_cq_out)] = {0};
+	u32 out[MLX5_ST_SZ_DW(modify_cq_out)] = {};
 
 	MLX5_SET(modify_cq_in, in, opcode, MLX5_CMD_OP_MODIFY_CQ);
 	MLX5_SET(modify_cq_in, in, uid, cq->uid);
@@ -204,7 +200,7 @@ int mlx5_core_modify_cq_moderation(struct mlx5_core_dev *dev,
 				   u16 cq_period,
 				   u16 cq_max_count)
 {
-	u32 in[MLX5_ST_SZ_DW(modify_cq_in)] = {0};
+	u32 in[MLX5_ST_SZ_DW(modify_cq_in)] = {};
 	void *cqc;
 
 	MLX5_SET(modify_cq_in, in, cqn, cq->cqn);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
index 65fef5a86644..c05e6a2c9126 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
@@ -333,7 +333,7 @@ static u64 cq_read_field(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
 	if (!out)
 		return param;
 
-	err = mlx5_core_query_cq(dev, cq, out, outlen);
+	err = mlx5_core_query_cq(dev, cq, out);
 	if (err) {
 		mlx5_core_warn(dev, "failed to query cq\n");
 		goto out;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
index 3a199a03d929..7283443868f3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/health.c
@@ -43,7 +43,7 @@ int mlx5e_reporter_cq_diagnose(struct mlx5e_cq *cq, struct devlink_fmsg *fmsg)
 	void *cqc;
 	int err;
 
-	err = mlx5_core_query_cq(priv->mdev, &cq->mcq, out, sizeof(out));
+	err = mlx5_core_query_cq(priv->mdev, &cq->mcq, out);
 	if (err)
 		return err;
 
diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h
index 40748fc1b11b..b5a9399e07ee 100644
--- a/include/linux/mlx5/cq.h
+++ b/include/linux/mlx5/cq.h
@@ -188,7 +188,7 @@ int mlx5_core_create_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
 			u32 *in, int inlen, u32 *out, int outlen);
 int mlx5_core_destroy_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq);
 int mlx5_core_query_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
-		       u32 *out, int outlen);
+		       u32 *out);
 int mlx5_core_modify_cq(struct mlx5_core_dev *dev, struct mlx5_core_cq *cq,
 			u32 *in, int inlen);
 int mlx5_core_modify_cq_moderation(struct mlx5_core_dev *dev,
-- 
cgit v1.2.3-59-g8ed1b


From e36fb468d23967683c1f0de644da0928563e604d Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Thu, 9 Apr 2020 11:55:06 +0300
Subject: net/mlx5: Update debugfs.c to new cmd interface

Do mass update of debugfs.c to reuse newly introduced
mlx5_cmd_exec_in*() interfaces.

Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/debugfs.c | 15 ++++-----------
 1 file changed, 4 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
index c05e6a2c9126..6409090b3ec5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
@@ -273,20 +273,11 @@ static u64 qp_read_field(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp,
 	return param;
 }
 
-static int mlx5_core_eq_query(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
-			      u32 *out, int outlen)
-{
-	u32 in[MLX5_ST_SZ_DW(query_eq_in)] = {};
-
-	MLX5_SET(query_eq_in, in, opcode, MLX5_CMD_OP_QUERY_EQ);
-	MLX5_SET(query_eq_in, in, eq_number, eq->eqn);
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
-}
-
 static u64 eq_read_field(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
 			 int index)
 {
 	int outlen = MLX5_ST_SZ_BYTES(query_eq_out);
+	u32 in[MLX5_ST_SZ_DW(query_eq_in)] = {};
 	u64 param = 0;
 	void *ctx;
 	u32 *out;
@@ -296,7 +287,9 @@ static u64 eq_read_field(struct mlx5_core_dev *dev, struct mlx5_eq *eq,
 	if (!out)
 		return param;
 
-	err = mlx5_core_eq_query(dev, eq, out, outlen);
+	MLX5_SET(query_eq_in, in, opcode, MLX5_CMD_OP_QUERY_EQ);
+	MLX5_SET(query_eq_in, in, eq_number, eq->eqn);
+	err = mlx5_cmd_exec_inout(dev, query_eq, in, out);
 	if (err) {
 		mlx5_core_warn(dev, "failed to query eq\n");
 		goto out;
-- 
cgit v1.2.3-59-g8ed1b


From 9aa536ad45ec390d32c563321b26ccd8de52a2d9 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Thu, 9 Apr 2020 11:56:42 +0300
Subject: net/mlx5: Update ecpf.c to new cmd interface

Do mass update of ecpf.c to reuse newly introduced
mlx5_cmd_exec_in*() interfaces.

Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/ecpf.c | 30 ++++++--------------------
 1 file changed, 6 insertions(+), 24 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
index d2228e37450f..a894ea98c95a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ecpf.c
@@ -8,33 +8,13 @@ bool mlx5_read_embedded_cpu(struct mlx5_core_dev *dev)
 	return (ioread32be(&dev->iseg->initializing) >> MLX5_ECPU_BIT_NUM) & 1;
 }
 
-static int mlx5_peer_pf_enable_hca(struct mlx5_core_dev *dev)
-{
-	u32 out[MLX5_ST_SZ_DW(enable_hca_out)] = {};
-	u32 in[MLX5_ST_SZ_DW(enable_hca_in)]   = {};
-
-	MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA);
-	MLX5_SET(enable_hca_in, in, function_id, 0);
-	MLX5_SET(enable_hca_in, in, embedded_cpu_function, 0);
-	return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
-}
-
-static int mlx5_peer_pf_disable_hca(struct mlx5_core_dev *dev)
-{
-	u32 out[MLX5_ST_SZ_DW(disable_hca_out)] = {};
-	u32 in[MLX5_ST_SZ_DW(disable_hca_in)]   = {};
-
-	MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA);
-	MLX5_SET(disable_hca_in, in, function_id, 0);
-	MLX5_SET(disable_hca_in, in, embedded_cpu_function, 0);
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-
 static int mlx5_peer_pf_init(struct mlx5_core_dev *dev)
 {
+	u32 in[MLX5_ST_SZ_DW(enable_hca_in)] = {};
 	int err;
 
-	err = mlx5_peer_pf_enable_hca(dev);
+	MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA);
+	err = mlx5_cmd_exec_in(dev, enable_hca, in);
 	if (err)
 		mlx5_core_err(dev, "Failed to enable peer PF HCA err(%d)\n",
 			      err);
@@ -44,9 +24,11 @@ static int mlx5_peer_pf_init(struct mlx5_core_dev *dev)
 
 static void mlx5_peer_pf_cleanup(struct mlx5_core_dev *dev)
 {
+	u32 in[MLX5_ST_SZ_DW(disable_hca_in)] = {};
 	int err;
 
-	err = mlx5_peer_pf_disable_hca(dev);
+	MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA);
+	err = mlx5_cmd_exec_in(dev, disable_hca, in);
 	if (err) {
 		mlx5_core_err(dev, "Failed to disable peer PF HCA err(%d)\n",
 			      err);
-- 
cgit v1.2.3-59-g8ed1b


From 49d7fcd127c1ee011aee252985749eb33593488c Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Thu, 9 Apr 2020 12:25:08 +0300
Subject: net/mlx5: Update eq.c to new cmd interface

Do mass update of eq.c to reuse newly introduced
mlx5_cmd_exec_in*() interfaces.

Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eq.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eq.c b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
index bee419d01af2..4d974b5405b5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eq.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eq.c
@@ -101,12 +101,11 @@ struct mlx5_eq_table {
 
 static int mlx5_cmd_destroy_eq(struct mlx5_core_dev *dev, u8 eqn)
 {
-	u32 out[MLX5_ST_SZ_DW(destroy_eq_out)] = {0};
-	u32 in[MLX5_ST_SZ_DW(destroy_eq_in)]   = {0};
+	u32 in[MLX5_ST_SZ_DW(destroy_eq_in)] = {};
 
 	MLX5_SET(destroy_eq_in, in, opcode, MLX5_CMD_OP_DESTROY_EQ);
 	MLX5_SET(destroy_eq_in, in, eq_number, eqn);
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	return mlx5_cmd_exec_in(dev, destroy_eq, in);
 }
 
 /* caller must eventually call mlx5_cq_put on the returned cq */
-- 
cgit v1.2.3-59-g8ed1b


From a184cda1bb31eb14720c5f09d9698ab1666aa371 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Thu, 9 Apr 2020 12:26:41 +0300
Subject: net/mlx5: Update statistics to new cmd interface

Do mass update of statistics to reuse newly introduced
mlx5_cmd_exec_in*() interfaces.

Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 .../ethernet/mellanox/mlx5/core/en/monitor_stats.c | 46 +++++++---------------
 drivers/net/ethernet/mellanox/mlx5/core/en_stats.c | 17 +++-----
 2 files changed, 19 insertions(+), 44 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c
index 7cd5b02e0f10..8fe8b4d6ad1c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/monitor_stats.c
@@ -38,12 +38,11 @@ int mlx5e_monitor_counter_supported(struct mlx5e_priv *priv)
 
 void mlx5e_monitor_counter_arm(struct mlx5e_priv *priv)
 {
-	u32  in[MLX5_ST_SZ_DW(arm_monitor_counter_in)]  = {};
-	u32 out[MLX5_ST_SZ_DW(arm_monitor_counter_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(arm_monitor_counter_in)] = {};
 
 	MLX5_SET(arm_monitor_counter_in, in, opcode,
 		 MLX5_CMD_OP_ARM_MONITOR_COUNTER);
-	mlx5_cmd_exec(priv->mdev, in, sizeof(in), out, sizeof(out));
+	mlx5_cmd_exec_in(priv->mdev, arm_monitor_counter, in);
 }
 
 static void mlx5e_monitor_counters_work(struct work_struct *work)
@@ -66,19 +65,6 @@ static int mlx5e_monitor_event_handler(struct notifier_block *nb,
 	return NOTIFY_OK;
 }
 
-static void mlx5e_monitor_counter_start(struct mlx5e_priv *priv)
-{
-	MLX5_NB_INIT(&priv->monitor_counters_nb, mlx5e_monitor_event_handler,
-		     MONITOR_COUNTER);
-	mlx5_eq_notifier_register(priv->mdev, &priv->monitor_counters_nb);
-}
-
-static void mlx5e_monitor_counter_stop(struct mlx5e_priv *priv)
-{
-	mlx5_eq_notifier_unregister(priv->mdev, &priv->monitor_counters_nb);
-	cancel_work_sync(&priv->monitor_counters_work);
-}
-
 static int fill_monitor_counter_ppcnt_set1(int cnt, u32 *in)
 {
 	enum mlx5_monitor_counter_ppcnt ppcnt_cnt;
@@ -118,8 +104,7 @@ static void mlx5e_set_monitor_counter(struct mlx5e_priv *priv)
 	int num_q_counters      = MLX5_CAP_GEN(mdev, num_q_monitor_counters);
 	int num_ppcnt_counters  = !MLX5_CAP_PCAM_REG(mdev, ppcnt) ? 0 :
 				  MLX5_CAP_GEN(mdev, num_ppcnt_monitor_counters);
-	u32  in[MLX5_ST_SZ_DW(set_monitor_counter_in)]  = {};
-	u32 out[MLX5_ST_SZ_DW(set_monitor_counter_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(set_monitor_counter_in)] = {};
 	int q_counter = priv->q_counter;
 	int cnt	= 0;
 
@@ -136,34 +121,31 @@ static void mlx5e_set_monitor_counter(struct mlx5e_priv *priv)
 	MLX5_SET(set_monitor_counter_in, in, opcode,
 		 MLX5_CMD_OP_SET_MONITOR_COUNTER);
 
-	mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+	mlx5_cmd_exec_in(mdev, set_monitor_counter, in);
 }
 
 /* check if mlx5e_monitor_counter_supported before calling this function*/
 void mlx5e_monitor_counter_init(struct mlx5e_priv *priv)
 {
 	INIT_WORK(&priv->monitor_counters_work, mlx5e_monitor_counters_work);
-	mlx5e_monitor_counter_start(priv);
+	MLX5_NB_INIT(&priv->monitor_counters_nb, mlx5e_monitor_event_handler,
+		     MONITOR_COUNTER);
+	mlx5_eq_notifier_register(priv->mdev, &priv->monitor_counters_nb);
+
 	mlx5e_set_monitor_counter(priv);
 	mlx5e_monitor_counter_arm(priv);
 	queue_work(priv->wq, &priv->update_stats_work);
 }
 
-static void mlx5e_monitor_counter_disable(struct mlx5e_priv *priv)
+/* check if mlx5e_monitor_counter_supported before calling this function*/
+void mlx5e_monitor_counter_cleanup(struct mlx5e_priv *priv)
 {
-	u32  in[MLX5_ST_SZ_DW(set_monitor_counter_in)]  = {};
-	u32 out[MLX5_ST_SZ_DW(set_monitor_counter_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(set_monitor_counter_in)] = {};
 
-	MLX5_SET(set_monitor_counter_in, in, num_of_counters, 0);
 	MLX5_SET(set_monitor_counter_in, in, opcode,
 		 MLX5_CMD_OP_SET_MONITOR_COUNTER);
 
-	mlx5_cmd_exec(priv->mdev, in, sizeof(in), out, sizeof(out));
-}
-
-/* check if mlx5e_monitor_counter_supported before calling this function*/
-void mlx5e_monitor_counter_cleanup(struct mlx5e_priv *priv)
-{
-	mlx5e_monitor_counter_disable(priv);
-	mlx5e_monitor_counter_stop(priv);
+	mlx5_cmd_exec_in(priv->mdev, set_monitor_counter, in);
+	mlx5_eq_notifier_unregister(priv->mdev, &priv->monitor_counters_nb);
+	cancel_work_sync(&priv->monitor_counters_work);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
index ff4002ebad90..e91a8b22eba6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_stats.c
@@ -491,18 +491,14 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(vnic_env)
 static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(vnic_env)
 {
 	u32 *out = (u32 *)priv->stats.vnic.query_vnic_env_out;
-	int outlen = MLX5_ST_SZ_BYTES(query_vnic_env_out);
-	u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {0};
+	u32 in[MLX5_ST_SZ_DW(query_vnic_env_in)] = {};
 	struct mlx5_core_dev *mdev = priv->mdev;
 
 	if (!MLX5_CAP_GEN(priv->mdev, nic_receive_steering_discard))
 		return;
 
-	MLX5_SET(query_vnic_env_in, in, opcode,
-		 MLX5_CMD_OP_QUERY_VNIC_ENV);
-	MLX5_SET(query_vnic_env_in, in, op_mod, 0);
-	MLX5_SET(query_vnic_env_in, in, other_vport, 0);
-	mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen);
+	MLX5_SET(query_vnic_env_in, in, opcode, MLX5_CMD_OP_QUERY_VNIC_ENV);
+	mlx5_cmd_exec_inout(mdev, query_vnic_env, in, out);
 }
 
 #define VPORT_COUNTER_OFF(c) MLX5_BYTE_OFF(query_vport_counter_out, c)
@@ -577,15 +573,12 @@ static MLX5E_DECLARE_STATS_GRP_OP_FILL_STATS(vport)
 
 static MLX5E_DECLARE_STATS_GRP_OP_UPDATE_STATS(vport)
 {
-	int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out);
 	u32 *out = (u32 *)priv->stats.vport.query_vport_out;
-	u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)] = {0};
+	u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)] = {};
 	struct mlx5_core_dev *mdev = priv->mdev;
 
 	MLX5_SET(query_vport_counter_in, in, opcode, MLX5_CMD_OP_QUERY_VPORT_COUNTER);
-	MLX5_SET(query_vport_counter_in, in, op_mod, 0);
-	MLX5_SET(query_vport_counter_in, in, other_vport, 0);
-	mlx5_cmd_exec(mdev, in, sizeof(in), out, outlen);
+	mlx5_cmd_exec_inout(mdev, query_vport_counter, in, out);
 }
 
 #define PPORT_802_3_OFF(c) \
-- 
cgit v1.2.3-59-g8ed1b


From e08a6832f9c19a1b514675ee53a34736647f918a Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Thu, 9 Apr 2020 12:30:08 +0300
Subject: net/mlx5: Update eswitch to new cmd interface

Do mass update of eswitch to reuse newly introduced
mlx5_cmd_exec_in*() interfaces.

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c  | 38 ++++++----------------
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |  6 +---
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 18 +++++-----
 3 files changed, 20 insertions(+), 42 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 7f618a443bfd..c5eb4e7754a9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -84,8 +84,7 @@ mlx5_eswitch_get_vport(struct mlx5_eswitch *esw, u16 vport_num)
 static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport,
 					u32 events_mask)
 {
-	int in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)]   = {0};
-	int out[MLX5_ST_SZ_DW(modify_nic_vport_context_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(modify_nic_vport_context_in)] = {};
 	void *nic_vport_ctx;
 
 	MLX5_SET(modify_nic_vport_context_in, in,
@@ -108,40 +107,24 @@ static int arm_vport_context_events_cmd(struct mlx5_core_dev *dev, u16 vport,
 		MLX5_SET(nic_vport_context, nic_vport_ctx,
 			 event_on_promisc_change, 1);
 
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	return mlx5_cmd_exec_in(dev, modify_nic_vport_context, in);
 }
 
 /* E-Switch vport context HW commands */
 int mlx5_eswitch_modify_esw_vport_context(struct mlx5_core_dev *dev, u16 vport,
-					  bool other_vport,
-					  void *in, int inlen)
+					  bool other_vport, void *in)
 {
-	u32 out[MLX5_ST_SZ_DW(modify_esw_vport_context_out)] = {0};
-
 	MLX5_SET(modify_esw_vport_context_in, in, opcode,
 		 MLX5_CMD_OP_MODIFY_ESW_VPORT_CONTEXT);
 	MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport);
 	MLX5_SET(modify_esw_vport_context_in, in, other_vport, other_vport);
-	return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
-}
-
-int mlx5_eswitch_query_esw_vport_context(struct mlx5_core_dev *dev, u16 vport,
-					 bool other_vport,
-					 void *out, int outlen)
-{
-	u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)] = {};
-
-	MLX5_SET(query_esw_vport_context_in, in, opcode,
-		 MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT);
-	MLX5_SET(modify_esw_vport_context_in, in, vport_number, vport);
-	MLX5_SET(modify_esw_vport_context_in, in, other_vport, other_vport);
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+	return mlx5_cmd_exec_in(dev, modify_esw_vport_context, in);
 }
 
 static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u16 vport,
 				  u16 vlan, u8 qos, u8 set_flags)
 {
-	u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {0};
+	u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {};
 
 	if (!MLX5_CAP_ESW(dev, vport_cvlan_strip) ||
 	    !MLX5_CAP_ESW(dev, vport_cvlan_insert_if_not_exist))
@@ -170,8 +153,7 @@ static int modify_esw_vport_cvlan(struct mlx5_core_dev *dev, u16 vport,
 	MLX5_SET(modify_esw_vport_context_in, in,
 		 field_select.vport_cvlan_insert, 1);
 
-	return mlx5_eswitch_modify_esw_vport_context(dev, vport, true,
-						     in, sizeof(in));
+	return mlx5_eswitch_modify_esw_vport_context(dev, vport, true, in);
 }
 
 /* E-Switch FDB */
@@ -1901,7 +1883,7 @@ const u32 *mlx5_esw_query_functions(struct mlx5_core_dev *dev)
 	MLX5_SET(query_esw_functions_in, in, opcode,
 		 MLX5_CMD_OP_QUERY_ESW_FUNCTIONS);
 
-	err = mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+	err = mlx5_cmd_exec_inout(dev, query_esw_functions, in, out);
 	if (!err)
 		return out;
 
@@ -2783,8 +2765,8 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
 {
 	struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
 	int outlen = MLX5_ST_SZ_BYTES(query_vport_counter_out);
-	u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)] = {0};
-	struct mlx5_vport_drop_stats stats = {0};
+	u32 in[MLX5_ST_SZ_DW(query_vport_counter_in)] = {};
+	struct mlx5_vport_drop_stats stats = {};
 	int err = 0;
 	u32 *out;
 
@@ -2801,7 +2783,7 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
 	MLX5_SET(query_vport_counter_in, in, vport_number, vport->vport);
 	MLX5_SET(query_vport_counter_in, in, other_vport, 1);
 
-	err = mlx5_cmd_exec(esw->dev, in, sizeof(in), out, outlen);
+	err = mlx5_cmd_exec_inout(esw->dev, query_vport_counter, in, out);
 	if (err)
 		goto free_out;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 39f42f985fbd..fa2ad172f08c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -329,11 +329,7 @@ int mlx5_eswitch_get_vport_stats(struct mlx5_eswitch *esw,
 void mlx5_eswitch_del_send_to_vport_rule(struct mlx5_flow_handle *rule);
 
 int mlx5_eswitch_modify_esw_vport_context(struct mlx5_core_dev *dev, u16 vport,
-					  bool other_vport,
-					  void *in, int inlen);
-int mlx5_eswitch_query_esw_vport_context(struct mlx5_core_dev *dev, u16 vport,
-					 bool other_vport,
-					 void *out, int outlen);
+					  bool other_vport, void *in);
 
 struct mlx5_flow_spec;
 struct mlx5_esw_flow_attr;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index f171eb2234b0..dc098bb58973 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -790,7 +790,8 @@ static bool mlx5_eswitch_reg_c1_loopback_supported(struct mlx5_eswitch *esw)
 static int esw_set_passing_vport_metadata(struct mlx5_eswitch *esw, bool enable)
 {
 	u32 out[MLX5_ST_SZ_DW(query_esw_vport_context_out)] = {};
-	u32 in[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {};
+	u32 min[MLX5_ST_SZ_DW(modify_esw_vport_context_in)] = {};
+	u32 in[MLX5_ST_SZ_DW(query_esw_vport_context_in)] = {};
 	u8 curr, wanted;
 	int err;
 
@@ -798,8 +799,9 @@ static int esw_set_passing_vport_metadata(struct mlx5_eswitch *esw, bool enable)
 	    !mlx5_eswitch_vport_match_metadata_enabled(esw))
 		return 0;
 
-	err = mlx5_eswitch_query_esw_vport_context(esw->dev, 0, false,
-						   out, sizeof(out));
+	MLX5_SET(query_esw_vport_context_in, in, opcode,
+		 MLX5_CMD_OP_QUERY_ESW_VPORT_CONTEXT);
+	err = mlx5_cmd_exec_inout(esw->dev, query_esw_vport_context, in, out);
 	if (err)
 		return err;
 
@@ -814,14 +816,12 @@ static int esw_set_passing_vport_metadata(struct mlx5_eswitch *esw, bool enable)
 	else
 		curr &= ~wanted;
 
-	MLX5_SET(modify_esw_vport_context_in, in,
+	MLX5_SET(modify_esw_vport_context_in, min,
 		 esw_vport_context.fdb_to_vport_reg_c_id, curr);
-
-	MLX5_SET(modify_esw_vport_context_in, in,
+	MLX5_SET(modify_esw_vport_context_in, min,
 		 field_select.fdb_to_vport_reg_c_id, 1);
 
-	err = mlx5_eswitch_modify_esw_vport_context(esw->dev, 0, false, in,
-						    sizeof(in));
+	err = mlx5_eswitch_modify_esw_vport_context(esw->dev, 0, false, min);
 	if (!err) {
 		if (enable && (curr & MLX5_FDB_TO_VPORT_REG_C_1))
 			esw->flags |= MLX5_ESWITCH_REG_C1_LOOPBACK_ENABLED;
@@ -1474,7 +1474,7 @@ query_vports:
 out:
 	*mode = mlx5_mode;
 	return 0;
-}       
+}
 
 static void esw_destroy_restore_table(struct mlx5_eswitch *esw)
 {
-- 
cgit v1.2.3-59-g8ed1b


From b316e1866fa39aadc1888fc68b73729343cc4058 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Thu, 9 Apr 2020 15:59:30 +0300
Subject: net/mlx5: Update FPGA to new cmd interface

Do mass update of FPGA to reuse newly introduced
mlx5_cmd_exec_in*() interfaces.

Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c | 28 ++++++++++------------
 1 file changed, 13 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c
index 09769401c313..9a37077152aa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/cmd.c
@@ -142,15 +142,15 @@ int mlx5_fpga_query(struct mlx5_core_dev *dev, struct mlx5_fpga_query *query)
 int mlx5_fpga_create_qp(struct mlx5_core_dev *dev, void *fpga_qpc,
 			u32 *fpga_qpn)
 {
-	u32 in[MLX5_ST_SZ_DW(fpga_create_qp_in)] = {0};
-	u32 out[MLX5_ST_SZ_DW(fpga_create_qp_out)];
+	u32 out[MLX5_ST_SZ_DW(fpga_create_qp_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(fpga_create_qp_in)] = {};
 	int ret;
 
 	MLX5_SET(fpga_create_qp_in, in, opcode, MLX5_CMD_OP_FPGA_CREATE_QP);
 	memcpy(MLX5_ADDR_OF(fpga_create_qp_in, in, fpga_qpc), fpga_qpc,
 	       MLX5_FLD_SZ_BYTES(fpga_create_qp_in, fpga_qpc));
 
-	ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	ret = mlx5_cmd_exec_inout(dev, fpga_create_qp, in, out);
 	if (ret)
 		return ret;
 
@@ -164,8 +164,7 @@ int mlx5_fpga_modify_qp(struct mlx5_core_dev *dev, u32 fpga_qpn,
 			enum mlx5_fpga_qpc_field_select fields,
 			void *fpga_qpc)
 {
-	u32 in[MLX5_ST_SZ_DW(fpga_modify_qp_in)] = {0};
-	u32 out[MLX5_ST_SZ_DW(fpga_modify_qp_out)];
+	u32 in[MLX5_ST_SZ_DW(fpga_modify_qp_in)] = {};
 
 	MLX5_SET(fpga_modify_qp_in, in, opcode, MLX5_CMD_OP_FPGA_MODIFY_QP);
 	MLX5_SET(fpga_modify_qp_in, in, field_select, fields);
@@ -173,20 +172,20 @@ int mlx5_fpga_modify_qp(struct mlx5_core_dev *dev, u32 fpga_qpn,
 	memcpy(MLX5_ADDR_OF(fpga_modify_qp_in, in, fpga_qpc), fpga_qpc,
 	       MLX5_FLD_SZ_BYTES(fpga_modify_qp_in, fpga_qpc));
 
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	return mlx5_cmd_exec_in(dev, fpga_modify_qp, in);
 }
 
 int mlx5_fpga_query_qp(struct mlx5_core_dev *dev,
 		       u32 fpga_qpn, void *fpga_qpc)
 {
-	u32 in[MLX5_ST_SZ_DW(fpga_query_qp_in)] = {0};
-	u32 out[MLX5_ST_SZ_DW(fpga_query_qp_out)];
+	u32 out[MLX5_ST_SZ_DW(fpga_query_qp_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(fpga_query_qp_in)] = {};
 	int ret;
 
 	MLX5_SET(fpga_query_qp_in, in, opcode, MLX5_CMD_OP_FPGA_QUERY_QP);
 	MLX5_SET(fpga_query_qp_in, in, fpga_qpn, fpga_qpn);
 
-	ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	ret = mlx5_cmd_exec_inout(dev, fpga_query_qp, in, out);
 	if (ret)
 		return ret;
 
@@ -197,20 +196,19 @@ int mlx5_fpga_query_qp(struct mlx5_core_dev *dev,
 
 int mlx5_fpga_destroy_qp(struct mlx5_core_dev *dev, u32 fpga_qpn)
 {
-	u32 in[MLX5_ST_SZ_DW(fpga_destroy_qp_in)] = {0};
-	u32 out[MLX5_ST_SZ_DW(fpga_destroy_qp_out)];
+	u32 in[MLX5_ST_SZ_DW(fpga_destroy_qp_in)] = {};
 
 	MLX5_SET(fpga_destroy_qp_in, in, opcode, MLX5_CMD_OP_FPGA_DESTROY_QP);
 	MLX5_SET(fpga_destroy_qp_in, in, fpga_qpn, fpga_qpn);
 
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	return mlx5_cmd_exec_in(dev, fpga_destroy_qp, in);
 }
 
 int mlx5_fpga_query_qp_counters(struct mlx5_core_dev *dev, u32 fpga_qpn,
 				bool clear, struct mlx5_fpga_qp_counters *data)
 {
-	u32 in[MLX5_ST_SZ_DW(fpga_query_qp_counters_in)] = {0};
-	u32 out[MLX5_ST_SZ_DW(fpga_query_qp_counters_out)];
+	u32 out[MLX5_ST_SZ_DW(fpga_query_qp_counters_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(fpga_query_qp_counters_in)] = {};
 	int ret;
 
 	MLX5_SET(fpga_query_qp_counters_in, in, opcode,
@@ -218,7 +216,7 @@ int mlx5_fpga_query_qp_counters(struct mlx5_core_dev *dev, u32 fpga_qpn,
 	MLX5_SET(fpga_query_qp_counters_in, in, clear, clear);
 	MLX5_SET(fpga_query_qp_counters_in, in, fpga_qpn, fpga_qpn);
 
-	ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	ret = mlx5_cmd_exec_inout(dev, fpga_query_qp_counters, in, out);
 	if (ret)
 		return ret;
 
-- 
cgit v1.2.3-59-g8ed1b


From 31a0956ea91567b8f0e39d8420253646304b8571 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Thu, 9 Apr 2020 16:01:30 +0300
Subject: net/mlx5: Update fs_core new cmd interface

Do mass update of fs_core to reuse newly introduced
mlx5_cmd_exec_in*() interfaces.

Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 80 +++++++++---------------
 1 file changed, 31 insertions(+), 49 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 90048697b2ff..304d1e4f0541 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -155,8 +155,7 @@ static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns,
 				   struct mlx5_flow_table *ft, u32 underlay_qpn,
 				   bool disconnect)
 {
-	u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)]   = {0};
-	u32 out[MLX5_ST_SZ_DW(set_flow_table_root_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(set_flow_table_root_in)] = {};
 	struct mlx5_core_dev *dev = ns->dev;
 
 	if ((MLX5_CAP_GEN(dev, port_type) == MLX5_CAP_PORT_TYPE_IB) &&
@@ -167,13 +166,10 @@ static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns,
 		 MLX5_CMD_OP_SET_FLOW_TABLE_ROOT);
 	MLX5_SET(set_flow_table_root_in, in, table_type, ft->type);
 
-	if (disconnect) {
+	if (disconnect)
 		MLX5_SET(set_flow_table_root_in, in, op_mod, 1);
-		MLX5_SET(set_flow_table_root_in, in, table_id, 0);
-	} else {
-		MLX5_SET(set_flow_table_root_in, in, op_mod, 0);
+	else
 		MLX5_SET(set_flow_table_root_in, in, table_id, ft->id);
-	}
 
 	MLX5_SET(set_flow_table_root_in, in, underlay_qpn, underlay_qpn);
 	if (ft->vport) {
@@ -181,7 +177,7 @@ static int mlx5_cmd_update_root_ft(struct mlx5_flow_root_namespace *ns,
 		MLX5_SET(set_flow_table_root_in, in, other_vport, 1);
 	}
 
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	return mlx5_cmd_exec_in(dev, set_flow_table_root, in);
 }
 
 static int mlx5_cmd_create_flow_table(struct mlx5_flow_root_namespace *ns,
@@ -192,8 +188,8 @@ static int mlx5_cmd_create_flow_table(struct mlx5_flow_root_namespace *ns,
 	int en_encap = !!(ft->flags & MLX5_FLOW_TABLE_TUNNEL_EN_REFORMAT);
 	int en_decap = !!(ft->flags & MLX5_FLOW_TABLE_TUNNEL_EN_DECAP);
 	int term = !!(ft->flags & MLX5_FLOW_TABLE_TERMINATION);
-	u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {0};
-	u32 in[MLX5_ST_SZ_DW(create_flow_table_in)]   = {0};
+	u32 out[MLX5_ST_SZ_DW(create_flow_table_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(create_flow_table_in)] = {};
 	struct mlx5_core_dev *dev = ns->dev;
 	int err;
 
@@ -239,7 +235,7 @@ static int mlx5_cmd_create_flow_table(struct mlx5_flow_root_namespace *ns,
 		break;
 	}
 
-	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	err = mlx5_cmd_exec_inout(dev, create_flow_table, in, out);
 	if (!err)
 		ft->id = MLX5_GET(create_flow_table_out, out,
 				  table_id);
@@ -249,8 +245,7 @@ static int mlx5_cmd_create_flow_table(struct mlx5_flow_root_namespace *ns,
 static int mlx5_cmd_destroy_flow_table(struct mlx5_flow_root_namespace *ns,
 				       struct mlx5_flow_table *ft)
 {
-	u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)]   = {0};
-	u32 out[MLX5_ST_SZ_DW(destroy_flow_table_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)] = {};
 	struct mlx5_core_dev *dev = ns->dev;
 
 	MLX5_SET(destroy_flow_table_in, in, opcode,
@@ -262,15 +257,14 @@ static int mlx5_cmd_destroy_flow_table(struct mlx5_flow_root_namespace *ns,
 		MLX5_SET(destroy_flow_table_in, in, other_vport, 1);
 	}
 
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	return mlx5_cmd_exec_in(dev, destroy_flow_table, in);
 }
 
 static int mlx5_cmd_modify_flow_table(struct mlx5_flow_root_namespace *ns,
 				      struct mlx5_flow_table *ft,
 				      struct mlx5_flow_table *next_ft)
 {
-	u32 in[MLX5_ST_SZ_DW(modify_flow_table_in)]   = {0};
-	u32 out[MLX5_ST_SZ_DW(modify_flow_table_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(modify_flow_table_in)] = {};
 	struct mlx5_core_dev *dev = ns->dev;
 
 	MLX5_SET(modify_flow_table_in, in, opcode,
@@ -310,7 +304,7 @@ static int mlx5_cmd_modify_flow_table(struct mlx5_flow_root_namespace *ns,
 		}
 	}
 
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	return mlx5_cmd_exec_in(dev, modify_flow_table, in);
 }
 
 static int mlx5_cmd_create_flow_group(struct mlx5_flow_root_namespace *ns,
@@ -318,8 +312,7 @@ static int mlx5_cmd_create_flow_group(struct mlx5_flow_root_namespace *ns,
 				      u32 *in,
 				      struct mlx5_flow_group *fg)
 {
-	u32 out[MLX5_ST_SZ_DW(create_flow_group_out)] = {0};
-	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+	u32 out[MLX5_ST_SZ_DW(create_flow_group_out)] = {};
 	struct mlx5_core_dev *dev = ns->dev;
 	int err;
 
@@ -332,7 +325,7 @@ static int mlx5_cmd_create_flow_group(struct mlx5_flow_root_namespace *ns,
 		MLX5_SET(create_flow_group_in, in, other_vport, 1);
 	}
 
-	err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+	err = mlx5_cmd_exec_inout(dev, create_flow_group, in, out);
 	if (!err)
 		fg->id = MLX5_GET(create_flow_group_out, out,
 				  group_id);
@@ -343,8 +336,7 @@ static int mlx5_cmd_destroy_flow_group(struct mlx5_flow_root_namespace *ns,
 				       struct mlx5_flow_table *ft,
 				       struct mlx5_flow_group *fg)
 {
-	u32 out[MLX5_ST_SZ_DW(destroy_flow_group_out)] = {0};
-	u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)]   = {0};
+	u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)] = {};
 	struct mlx5_core_dev *dev = ns->dev;
 
 	MLX5_SET(destroy_flow_group_in, in, opcode,
@@ -357,7 +349,7 @@ static int mlx5_cmd_destroy_flow_group(struct mlx5_flow_root_namespace *ns,
 		MLX5_SET(destroy_flow_group_in, in, other_vport, 1);
 	}
 
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	return mlx5_cmd_exec_in(dev, destroy_flow_group, in);
 }
 
 static int mlx5_set_extended_dest(struct mlx5_core_dev *dev,
@@ -600,8 +592,7 @@ static int mlx5_cmd_delete_fte(struct mlx5_flow_root_namespace *ns,
 			       struct mlx5_flow_table *ft,
 			       struct fs_fte *fte)
 {
-	u32 out[MLX5_ST_SZ_DW(delete_fte_out)] = {0};
-	u32 in[MLX5_ST_SZ_DW(delete_fte_in)]   = {0};
+	u32 in[MLX5_ST_SZ_DW(delete_fte_in)] = {};
 	struct mlx5_core_dev *dev = ns->dev;
 
 	MLX5_SET(delete_fte_in, in, opcode, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY);
@@ -613,22 +604,22 @@ static int mlx5_cmd_delete_fte(struct mlx5_flow_root_namespace *ns,
 		MLX5_SET(delete_fte_in, in, other_vport, 1);
 	}
 
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	return mlx5_cmd_exec_in(dev, delete_fte, in);
 }
 
 int mlx5_cmd_fc_bulk_alloc(struct mlx5_core_dev *dev,
 			   enum mlx5_fc_bulk_alloc_bitmask alloc_bitmask,
 			   u32 *id)
 {
-	u32 in[MLX5_ST_SZ_DW(alloc_flow_counter_in)]   = {0};
-	u32 out[MLX5_ST_SZ_DW(alloc_flow_counter_out)] = {0};
+	u32 out[MLX5_ST_SZ_DW(alloc_flow_counter_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(alloc_flow_counter_in)] = {};
 	int err;
 
 	MLX5_SET(alloc_flow_counter_in, in, opcode,
 		 MLX5_CMD_OP_ALLOC_FLOW_COUNTER);
 	MLX5_SET(alloc_flow_counter_in, in, flow_counter_bulk, alloc_bitmask);
 
-	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	err = mlx5_cmd_exec_inout(dev, alloc_flow_counter, in, out);
 	if (!err)
 		*id = MLX5_GET(alloc_flow_counter_out, out, flow_counter_id);
 	return err;
@@ -641,21 +632,20 @@ int mlx5_cmd_fc_alloc(struct mlx5_core_dev *dev, u32 *id)
 
 int mlx5_cmd_fc_free(struct mlx5_core_dev *dev, u32 id)
 {
-	u32 in[MLX5_ST_SZ_DW(dealloc_flow_counter_in)]   = {0};
-	u32 out[MLX5_ST_SZ_DW(dealloc_flow_counter_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(dealloc_flow_counter_in)] = {};
 
 	MLX5_SET(dealloc_flow_counter_in, in, opcode,
 		 MLX5_CMD_OP_DEALLOC_FLOW_COUNTER);
 	MLX5_SET(dealloc_flow_counter_in, in, flow_counter_id, id);
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	return mlx5_cmd_exec_in(dev, dealloc_flow_counter, in);
 }
 
 int mlx5_cmd_fc_query(struct mlx5_core_dev *dev, u32 id,
 		      u64 *packets, u64 *bytes)
 {
 	u32 out[MLX5_ST_SZ_BYTES(query_flow_counter_out) +
-		MLX5_ST_SZ_BYTES(traffic_counter)]   = {0};
-	u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)] = {0};
+		MLX5_ST_SZ_BYTES(traffic_counter)] = {};
+	u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)] = {};
 	void *stats;
 	int err = 0;
 
@@ -683,11 +673,10 @@ int mlx5_cmd_fc_bulk_query(struct mlx5_core_dev *dev, u32 base_id, int bulk_len,
 			   u32 *out)
 {
 	int outlen = mlx5_cmd_fc_get_bulk_query_out_len(bulk_len);
-	u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)] = {0};
+	u32 in[MLX5_ST_SZ_DW(query_flow_counter_in)] = {};
 
 	MLX5_SET(query_flow_counter_in, in, opcode,
 		 MLX5_CMD_OP_QUERY_FLOW_COUNTER);
-	MLX5_SET(query_flow_counter_in, in, op_mod, 0);
 	MLX5_SET(query_flow_counter_in, in, flow_counter_id, base_id);
 	MLX5_SET(query_flow_counter_in, in, num_of_counters, bulk_len);
 	return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
@@ -700,7 +689,7 @@ static int mlx5_cmd_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns,
 					  enum mlx5_flow_namespace_type namespace,
 					  struct mlx5_pkt_reformat *pkt_reformat)
 {
-	u32 out[MLX5_ST_SZ_DW(alloc_packet_reformat_context_out)];
+	u32 out[MLX5_ST_SZ_DW(alloc_packet_reformat_context_out)] = {};
 	struct mlx5_core_dev *dev = ns->dev;
 	void *packet_reformat_context_in;
 	int max_encap_size;
@@ -732,7 +721,6 @@ static int mlx5_cmd_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns,
 				reformat_data);
 	inlen = reformat - (void *)in  + size;
 
-	memset(in, 0, inlen);
 	MLX5_SET(alloc_packet_reformat_context_in, in, opcode,
 		 MLX5_CMD_OP_ALLOC_PACKET_REFORMAT_CONTEXT);
 	MLX5_SET(packet_reformat_context_in, packet_reformat_context_in,
@@ -741,7 +729,6 @@ static int mlx5_cmd_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns,
 		 reformat_type, reformat_type);
 	memcpy(reformat, reformat_data, size);
 
-	memset(out, 0, sizeof(out));
 	err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 
 	pkt_reformat->id = MLX5_GET(alloc_packet_reformat_context_out,
@@ -753,17 +740,15 @@ static int mlx5_cmd_packet_reformat_alloc(struct mlx5_flow_root_namespace *ns,
 static void mlx5_cmd_packet_reformat_dealloc(struct mlx5_flow_root_namespace *ns,
 					     struct mlx5_pkt_reformat *pkt_reformat)
 {
-	u32 in[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_in)];
-	u32 out[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_out)];
+	u32 in[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_in)] = {};
 	struct mlx5_core_dev *dev = ns->dev;
 
-	memset(in, 0, sizeof(in));
 	MLX5_SET(dealloc_packet_reformat_context_in, in, opcode,
 		 MLX5_CMD_OP_DEALLOC_PACKET_REFORMAT_CONTEXT);
 	MLX5_SET(dealloc_packet_reformat_context_in, in, packet_reformat_id,
 		 pkt_reformat->id);
 
-	mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	mlx5_cmd_exec_in(dev, dealloc_packet_reformat_context, in);
 }
 
 static int mlx5_cmd_modify_header_alloc(struct mlx5_flow_root_namespace *ns,
@@ -771,7 +756,7 @@ static int mlx5_cmd_modify_header_alloc(struct mlx5_flow_root_namespace *ns,
 					void *modify_actions,
 					struct mlx5_modify_hdr *modify_hdr)
 {
-	u32 out[MLX5_ST_SZ_DW(alloc_modify_header_context_out)];
+	u32 out[MLX5_ST_SZ_DW(alloc_modify_header_context_out)] = {};
 	int max_actions, actions_size, inlen, err;
 	struct mlx5_core_dev *dev = ns->dev;
 	void *actions_in;
@@ -821,7 +806,6 @@ static int mlx5_cmd_modify_header_alloc(struct mlx5_flow_root_namespace *ns,
 	actions_in = MLX5_ADDR_OF(alloc_modify_header_context_in, in, actions);
 	memcpy(actions_in, modify_actions, actions_size);
 
-	memset(out, 0, sizeof(out));
 	err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
 
 	modify_hdr->id = MLX5_GET(alloc_modify_header_context_out, out, modify_header_id);
@@ -832,17 +816,15 @@ static int mlx5_cmd_modify_header_alloc(struct mlx5_flow_root_namespace *ns,
 static void mlx5_cmd_modify_header_dealloc(struct mlx5_flow_root_namespace *ns,
 					   struct mlx5_modify_hdr *modify_hdr)
 {
-	u32 in[MLX5_ST_SZ_DW(dealloc_modify_header_context_in)];
-	u32 out[MLX5_ST_SZ_DW(dealloc_modify_header_context_out)];
+	u32 in[MLX5_ST_SZ_DW(dealloc_modify_header_context_in)] = {};
 	struct mlx5_core_dev *dev = ns->dev;
 
-	memset(in, 0, sizeof(in));
 	MLX5_SET(dealloc_modify_header_context_in, in, opcode,
 		 MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT);
 	MLX5_SET(dealloc_modify_header_context_in, in, modify_header_id,
 		 modify_hdr->id);
 
-	mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	mlx5_cmd_exec_in(dev, dealloc_modify_header_context, in);
 }
 
 static const struct mlx5_flow_cmds mlx5_flow_cmds = {
-- 
cgit v1.2.3-59-g8ed1b


From 59ad21c21fc4025831fbd0c291e2db1247fddf4d Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Thu, 9 Apr 2020 16:08:35 +0300
Subject: net/mlx5: Update fw.c new cmd interface

Do mass update of fw.c to reuse newly introduced
mlx5_cmd_exec_in*() interfaces.

Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fw.c | 33 +++++++++++-----------------
 1 file changed, 13 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fw.c b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
index 3040e0466681..a5fbe7343508 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fw.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fw.c
@@ -67,26 +67,19 @@ enum {
 	MCQI_FW_STORED_VERSION  = 1,
 };
 
-static int mlx5_cmd_query_adapter(struct mlx5_core_dev *dev, u32 *out,
-				  int outlen)
-{
-	u32 in[MLX5_ST_SZ_DW(query_adapter_in)] = {0};
-
-	MLX5_SET(query_adapter_in, in, opcode, MLX5_CMD_OP_QUERY_ADAPTER);
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
-}
-
 int mlx5_query_board_id(struct mlx5_core_dev *dev)
 {
 	u32 *out;
 	int outlen = MLX5_ST_SZ_BYTES(query_adapter_out);
+	u32 in[MLX5_ST_SZ_DW(query_adapter_in)] = {};
 	int err;
 
 	out = kzalloc(outlen, GFP_KERNEL);
 	if (!out)
 		return -ENOMEM;
 
-	err = mlx5_cmd_query_adapter(dev, out, outlen);
+	MLX5_SET(query_adapter_in, in, opcode, MLX5_CMD_OP_QUERY_ADAPTER);
+	err = mlx5_cmd_exec_inout(dev, query_adapter, in, out);
 	if (err)
 		goto out;
 
@@ -105,13 +98,15 @@ int mlx5_core_query_vendor_id(struct mlx5_core_dev *mdev, u32 *vendor_id)
 {
 	u32 *out;
 	int outlen = MLX5_ST_SZ_BYTES(query_adapter_out);
+	u32 in[MLX5_ST_SZ_DW(query_adapter_in)] = {};
 	int err;
 
 	out = kzalloc(outlen, GFP_KERNEL);
 	if (!out)
 		return -ENOMEM;
 
-	err = mlx5_cmd_query_adapter(mdev, out, outlen);
+	MLX5_SET(query_adapter_in, in, opcode, MLX5_CMD_OP_QUERY_ADAPTER);
+	err = mlx5_cmd_exec_inout(mdev, query_adapter, in, out);
 	if (err)
 		goto out;
 
@@ -259,8 +254,7 @@ int mlx5_query_hca_caps(struct mlx5_core_dev *dev)
 
 int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id)
 {
-	u32 out[MLX5_ST_SZ_DW(init_hca_out)] = {0};
-	u32 in[MLX5_ST_SZ_DW(init_hca_in)]   = {0};
+	u32 in[MLX5_ST_SZ_DW(init_hca_in)] = {};
 	int i;
 
 	MLX5_SET(init_hca_in, in, opcode, MLX5_CMD_OP_INIT_HCA);
@@ -271,16 +265,15 @@ int mlx5_cmd_init_hca(struct mlx5_core_dev *dev, uint32_t *sw_owner_id)
 				       sw_owner_id[i]);
 	}
 
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	return mlx5_cmd_exec_in(dev, init_hca, in);
 }
 
 int mlx5_cmd_teardown_hca(struct mlx5_core_dev *dev)
 {
-	u32 out[MLX5_ST_SZ_DW(teardown_hca_out)] = {0};
-	u32 in[MLX5_ST_SZ_DW(teardown_hca_in)]   = {0};
+	u32 in[MLX5_ST_SZ_DW(teardown_hca_in)] = {};
 
 	MLX5_SET(teardown_hca_in, in, opcode, MLX5_CMD_OP_TEARDOWN_HCA);
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	return mlx5_cmd_exec_in(dev, teardown_hca, in);
 }
 
 int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev)
@@ -315,8 +308,8 @@ int mlx5_cmd_force_teardown_hca(struct mlx5_core_dev *dev)
 int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev)
 {
 	unsigned long end, delay_ms = MLX5_FAST_TEARDOWN_WAIT_MS;
-	u32 out[MLX5_ST_SZ_DW(teardown_hca_out)] = {0};
-	u32 in[MLX5_ST_SZ_DW(teardown_hca_in)] = {0};
+	u32 out[MLX5_ST_SZ_DW(teardown_hca_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(teardown_hca_in)] = {};
 	int state;
 	int ret;
 
@@ -329,7 +322,7 @@ int mlx5_cmd_fast_teardown_hca(struct mlx5_core_dev *dev)
 	MLX5_SET(teardown_hca_in, in, profile,
 		 MLX5_TEARDOWN_HCA_IN_PROFILE_PREPARE_FAST_TEARDOWN);
 
-	ret = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	ret = mlx5_cmd_exec_inout(dev, teardown_hca, in, out);
 	if (ret)
 		return ret;
 
-- 
cgit v1.2.3-59-g8ed1b


From 5d19395f6988c057cb9461075f308a6c16a8c253 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Thu, 9 Apr 2020 16:17:01 +0300
Subject: net/mlx5: Update lag.c new cmd interface

Do mass update of lag.c to reuse newly introduced
mlx5_cmd_exec_in*() interfaces.

Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/lag.c | 52 +++++++++------------------
 1 file changed, 17 insertions(+), 35 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
index 93052b07c76c..c6ad5ca46877 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
@@ -47,8 +47,7 @@ static DEFINE_MUTEX(lag_mutex);
 static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1,
 			       u8 remap_port2)
 {
-	u32   in[MLX5_ST_SZ_DW(create_lag_in)]   = {0};
-	u32   out[MLX5_ST_SZ_DW(create_lag_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(create_lag_in)] = {};
 	void *lag_ctx = MLX5_ADDR_OF(create_lag_in, in, ctx);
 
 	MLX5_SET(create_lag_in, in, opcode, MLX5_CMD_OP_CREATE_LAG);
@@ -56,14 +55,13 @@ static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1,
 	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
 	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
 
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	return mlx5_cmd_exec_in(dev, create_lag, in);
 }
 
 static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 remap_port1,
 			       u8 remap_port2)
 {
-	u32   in[MLX5_ST_SZ_DW(modify_lag_in)]   = {0};
-	u32   out[MLX5_ST_SZ_DW(modify_lag_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(modify_lag_in)] = {};
 	void *lag_ctx = MLX5_ADDR_OF(modify_lag_in, in, ctx);
 
 	MLX5_SET(modify_lag_in, in, opcode, MLX5_CMD_OP_MODIFY_LAG);
@@ -72,52 +70,29 @@ static int mlx5_cmd_modify_lag(struct mlx5_core_dev *dev, u8 remap_port1,
 	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_1, remap_port1);
 	MLX5_SET(lagc, lag_ctx, tx_remap_affinity_2, remap_port2);
 
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
-}
-
-static int mlx5_cmd_destroy_lag(struct mlx5_core_dev *dev)
-{
-	u32  in[MLX5_ST_SZ_DW(destroy_lag_in)]  = {0};
-	u32 out[MLX5_ST_SZ_DW(destroy_lag_out)] = {0};
-
-	MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
-
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	return mlx5_cmd_exec_in(dev, modify_lag, in);
 }
 
 int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev)
 {
-	u32  in[MLX5_ST_SZ_DW(create_vport_lag_in)]  = {0};
-	u32 out[MLX5_ST_SZ_DW(create_vport_lag_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(create_vport_lag_in)] = {};
 
 	MLX5_SET(create_vport_lag_in, in, opcode, MLX5_CMD_OP_CREATE_VPORT_LAG);
 
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	return mlx5_cmd_exec_in(dev, create_vport_lag, in);
 }
 EXPORT_SYMBOL(mlx5_cmd_create_vport_lag);
 
 int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev)
 {
-	u32  in[MLX5_ST_SZ_DW(destroy_vport_lag_in)]  = {0};
-	u32 out[MLX5_ST_SZ_DW(destroy_vport_lag_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(destroy_vport_lag_in)] = {};
 
 	MLX5_SET(destroy_vport_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_VPORT_LAG);
 
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	return mlx5_cmd_exec_in(dev, destroy_vport_lag, in);
 }
 EXPORT_SYMBOL(mlx5_cmd_destroy_vport_lag);
 
-static int mlx5_cmd_query_cong_counter(struct mlx5_core_dev *dev,
-				       bool reset, void *out, int out_size)
-{
-	u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = { };
-
-	MLX5_SET(query_cong_statistics_in, in, opcode,
-		 MLX5_CMD_OP_QUERY_CONG_STATISTICS);
-	MLX5_SET(query_cong_statistics_in, in, clear, reset);
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, out_size);
-}
-
 int mlx5_lag_dev_get_netdev_idx(struct mlx5_lag *ldev,
 				struct net_device *ndev)
 {
@@ -232,12 +207,14 @@ int mlx5_activate_lag(struct mlx5_lag *ldev,
 static int mlx5_deactivate_lag(struct mlx5_lag *ldev)
 {
 	struct mlx5_core_dev *dev0 = ldev->pf[MLX5_LAG_P1].dev;
+	u32 in[MLX5_ST_SZ_DW(destroy_lag_in)] = {};
 	bool roce_lag = __mlx5_lag_is_roce(ldev);
 	int err;
 
 	ldev->flags &= ~MLX5_LAG_MODE_FLAGS;
 
-	err = mlx5_cmd_destroy_lag(dev0);
+	MLX5_SET(destroy_lag_in, in, opcode, MLX5_CMD_OP_DESTROY_LAG);
+	err = mlx5_cmd_exec_in(dev0, destroy_lag, in);
 	if (err) {
 		if (roce_lag) {
 			mlx5_core_err(dev0,
@@ -758,7 +735,12 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
 	}
 
 	for (i = 0; i < num_ports; ++i) {
-		ret = mlx5_cmd_query_cong_counter(mdev[i], false, out, outlen);
+		u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {};
+
+		MLX5_SET(query_cong_statistics_in, in, opcode,
+			 MLX5_CMD_OP_QUERY_CONG_STATISTICS);
+		ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in,
+					  out);
 		if (ret)
 			goto unlock;
 
-- 
cgit v1.2.3-59-g8ed1b


From bb7664d369bfeb2754af8b972ddaa5734e2864a8 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Thu, 9 Apr 2020 16:30:39 +0300
Subject: net/mlx5: Update gid.c new cmd interface

Do mass update of gid.c to reuse newly introduced
mlx5_cmd_exec_in*() interfaces.

Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c
index 7722a3f9bb68..a68738c8f4bc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/gid.c
@@ -124,8 +124,7 @@ int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index,
 			   const u8 *mac, bool vlan, u16 vlan_id, u8 port_num)
 {
 #define MLX5_SET_RA(p, f, v) MLX5_SET(roce_addr_layout, p, f, v)
-	u32  in[MLX5_ST_SZ_DW(set_roce_address_in)] = {0};
-	u32 out[MLX5_ST_SZ_DW(set_roce_address_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(set_roce_address_in)] = {};
 	void *in_addr = MLX5_ADDR_OF(set_roce_address_in, in, roce_address);
 	char *addr_l3_addr = MLX5_ADDR_OF(roce_addr_layout, in_addr,
 					  source_l3_address);
@@ -153,6 +152,6 @@ int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index,
 
 	MLX5_SET(set_roce_address_in, in, roce_address_index, index);
 	MLX5_SET(set_roce_address_in, in, opcode, MLX5_CMD_OP_SET_ROCE_ADDRESS);
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	return mlx5_cmd_exec_in(dev, set_roce_address, in);
 }
 EXPORT_SYMBOL(mlx5_core_roce_gid_set);
-- 
cgit v1.2.3-59-g8ed1b


From 9d6ed27163163b3236f38f89e3968601f7de6a95 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Thu, 9 Apr 2020 16:31:21 +0300
Subject: net/mlx5: Update mpfs.c new cmd interface

Do mass update of mpfs.c to reuse newly introduced
mlx5_cmd_exec_in*() interfaces.

Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
index 3118e8d66407..fd8449ff9e17 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/mpfs.c
@@ -40,8 +40,7 @@
 /* HW L2 Table (MPFS) management */
 static int set_l2table_entry_cmd(struct mlx5_core_dev *dev, u32 index, u8 *mac)
 {
-	u32 in[MLX5_ST_SZ_DW(set_l2_table_entry_in)]   = {0};
-	u32 out[MLX5_ST_SZ_DW(set_l2_table_entry_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(set_l2_table_entry_in)] = {};
 	u8 *in_mac_addr;
 
 	MLX5_SET(set_l2_table_entry_in, in, opcode, MLX5_CMD_OP_SET_L2_TABLE_ENTRY);
@@ -50,17 +49,16 @@ static int set_l2table_entry_cmd(struct mlx5_core_dev *dev, u32 index, u8 *mac)
 	in_mac_addr = MLX5_ADDR_OF(set_l2_table_entry_in, in, mac_address);
 	ether_addr_copy(&in_mac_addr[2], mac);
 
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	return mlx5_cmd_exec_in(dev, set_l2_table_entry, in);
 }
 
 static int del_l2table_entry_cmd(struct mlx5_core_dev *dev, u32 index)
 {
-	u32 in[MLX5_ST_SZ_DW(delete_l2_table_entry_in)]   = {0};
-	u32 out[MLX5_ST_SZ_DW(delete_l2_table_entry_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(delete_l2_table_entry_in)] = {};
 
 	MLX5_SET(delete_l2_table_entry_in, in, opcode, MLX5_CMD_OP_DELETE_L2_TABLE_ENTRY);
 	MLX5_SET(delete_l2_table_entry_in, in, table_index, index);
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	return mlx5_cmd_exec_in(dev, delete_l2_table_entry, in);
 }
 
 /* UC L2 table hash node */
-- 
cgit v1.2.3-59-g8ed1b


From 253e790e204f90adf10797ccea58910518a2c77e Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Thu, 9 Apr 2020 16:32:06 +0300
Subject: net/mlx5: Update vxlan.c new cmd interface

Do mass update of vxlan.c to reuse newly introduced
mlx5_cmd_exec_in*() interfaces.

Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c
index 148b55c3db7a..82c766a95165 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/vxlan.c
@@ -60,24 +60,22 @@ static inline u8 mlx5_vxlan_max_udp_ports(struct mlx5_core_dev *mdev)
 
 static int mlx5_vxlan_core_add_port_cmd(struct mlx5_core_dev *mdev, u16 port)
 {
-	u32 in[MLX5_ST_SZ_DW(add_vxlan_udp_dport_in)]   = {0};
-	u32 out[MLX5_ST_SZ_DW(add_vxlan_udp_dport_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(add_vxlan_udp_dport_in)] = {};
 
 	MLX5_SET(add_vxlan_udp_dport_in, in, opcode,
 		 MLX5_CMD_OP_ADD_VXLAN_UDP_DPORT);
 	MLX5_SET(add_vxlan_udp_dport_in, in, vxlan_udp_port, port);
-	return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+	return mlx5_cmd_exec_in(mdev, add_vxlan_udp_dport, in);
 }
 
 static int mlx5_vxlan_core_del_port_cmd(struct mlx5_core_dev *mdev, u16 port)
 {
-	u32 in[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_in)]   = {0};
-	u32 out[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(delete_vxlan_udp_dport_in)] = {};
 
 	MLX5_SET(delete_vxlan_udp_dport_in, in, opcode,
 		 MLX5_CMD_OP_DELETE_VXLAN_UDP_DPORT);
 	MLX5_SET(delete_vxlan_udp_dport_in, in, vxlan_udp_port, port);
-	return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+	return mlx5_cmd_exec_in(mdev, delete_vxlan_udp_dport, in);
 }
 
 static struct mlx5_vxlan_port*
-- 
cgit v1.2.3-59-g8ed1b


From 3ac0e69e69ad17fe792ec68e651c6535d74fcffd Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Thu, 9 Apr 2020 16:33:38 +0300
Subject: net/mlx5: Update main.c new cmd interface

Do mass update of main.c to reuse newly introduced
mlx5_cmd_exec_in*() interfaces.

Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 35 ++++++++++----------------
 1 file changed, 13 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 0044aa5cc676..061b69ea9cc4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -206,8 +206,7 @@ static void mlx5_set_driver_version(struct mlx5_core_dev *dev)
 {
 	int driver_ver_sz = MLX5_FLD_SZ_BYTES(set_driver_version_in,
 					      driver_version);
-	u8 in[MLX5_ST_SZ_BYTES(set_driver_version_in)] = {0};
-	u8 out[MLX5_ST_SZ_BYTES(set_driver_version_out)] = {0};
+	u8 in[MLX5_ST_SZ_BYTES(set_driver_version_in)] = {};
 	int remaining_size = driver_ver_sz;
 	char *string;
 
@@ -234,7 +233,7 @@ static void mlx5_set_driver_version(struct mlx5_core_dev *dev)
 	MLX5_SET(set_driver_version_in, in, opcode,
 		 MLX5_CMD_OP_SET_DRIVER_VERSION);
 
-	mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	mlx5_cmd_exec_in(dev, set_driver_version, in);
 }
 
 static int set_dma_caps(struct pci_dev *pdev)
@@ -366,7 +365,7 @@ static int mlx5_core_get_caps_mode(struct mlx5_core_dev *dev,
 
 	MLX5_SET(query_hca_cap_in, in, opcode, MLX5_CMD_OP_QUERY_HCA_CAP);
 	MLX5_SET(query_hca_cap_in, in, op_mod, opmod);
-	err = mlx5_cmd_exec(dev, in, sizeof(in), out, out_sz);
+	err = mlx5_cmd_exec_inout(dev, query_hca_cap, in, out);
 	if (err) {
 		mlx5_core_warn(dev,
 			       "QUERY_HCA_CAP : type(%x) opmode(%x) Failed(%d)\n",
@@ -409,12 +408,9 @@ int mlx5_core_get_caps(struct mlx5_core_dev *dev, enum mlx5_cap_type cap_type)
 
 static int set_caps(struct mlx5_core_dev *dev, void *in, int opmod)
 {
-	u32 out[MLX5_ST_SZ_DW(set_hca_cap_out)] = {};
-
 	MLX5_SET(set_hca_cap_in, in, opcode, MLX5_CMD_OP_SET_HCA_CAP);
 	MLX5_SET(set_hca_cap_in, in, op_mod, opmod << 1);
-	return mlx5_cmd_exec(dev, in, MLX5_ST_SZ_BYTES(set_hca_cap_in), out,
-			     sizeof(out));
+	return mlx5_cmd_exec_in(dev, set_hca_cap, in);
 }
 
 static int handle_hca_cap_atomic(struct mlx5_core_dev *dev, void *set_ctx)
@@ -653,26 +649,24 @@ static int mlx5_core_set_hca_defaults(struct mlx5_core_dev *dev)
 
 int mlx5_core_enable_hca(struct mlx5_core_dev *dev, u16 func_id)
 {
-	u32 out[MLX5_ST_SZ_DW(enable_hca_out)] = {0};
-	u32 in[MLX5_ST_SZ_DW(enable_hca_in)]   = {0};
+	u32 in[MLX5_ST_SZ_DW(enable_hca_in)] = {};
 
 	MLX5_SET(enable_hca_in, in, opcode, MLX5_CMD_OP_ENABLE_HCA);
 	MLX5_SET(enable_hca_in, in, function_id, func_id);
 	MLX5_SET(enable_hca_in, in, embedded_cpu_function,
 		 dev->caps.embedded_cpu);
-	return mlx5_cmd_exec(dev, &in, sizeof(in), &out, sizeof(out));
+	return mlx5_cmd_exec_in(dev, enable_hca, in);
 }
 
 int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id)
 {
-	u32 out[MLX5_ST_SZ_DW(disable_hca_out)] = {0};
-	u32 in[MLX5_ST_SZ_DW(disable_hca_in)]   = {0};
+	u32 in[MLX5_ST_SZ_DW(disable_hca_in)] = {};
 
 	MLX5_SET(disable_hca_in, in, opcode, MLX5_CMD_OP_DISABLE_HCA);
 	MLX5_SET(disable_hca_in, in, function_id, func_id);
 	MLX5_SET(enable_hca_in, in, embedded_cpu_function,
 		 dev->caps.embedded_cpu);
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	return mlx5_cmd_exec_in(dev, disable_hca, in);
 }
 
 u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev,
@@ -697,14 +691,13 @@ u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev,
 
 static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
 {
-	u32 query_in[MLX5_ST_SZ_DW(query_issi_in)]   = {0};
-	u32 query_out[MLX5_ST_SZ_DW(query_issi_out)] = {0};
+	u32 query_out[MLX5_ST_SZ_DW(query_issi_out)] = {};
+	u32 query_in[MLX5_ST_SZ_DW(query_issi_in)] = {};
 	u32 sup_issi;
 	int err;
 
 	MLX5_SET(query_issi_in, query_in, opcode, MLX5_CMD_OP_QUERY_ISSI);
-	err = mlx5_cmd_exec(dev, query_in, sizeof(query_in),
-			    query_out, sizeof(query_out));
+	err = mlx5_cmd_exec_inout(dev, query_issi, query_in, query_out);
 	if (err) {
 		u32 syndrome;
 		u8 status;
@@ -724,13 +717,11 @@ static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
 	sup_issi = MLX5_GET(query_issi_out, query_out, supported_issi_dw0);
 
 	if (sup_issi & (1 << 1)) {
-		u32 set_in[MLX5_ST_SZ_DW(set_issi_in)]   = {0};
-		u32 set_out[MLX5_ST_SZ_DW(set_issi_out)] = {0};
+		u32 set_in[MLX5_ST_SZ_DW(set_issi_in)] = {};
 
 		MLX5_SET(set_issi_in, set_in, opcode, MLX5_CMD_OP_SET_ISSI);
 		MLX5_SET(set_issi_in, set_in, current_issi, 1);
-		err = mlx5_cmd_exec(dev, set_in, sizeof(set_in),
-				    set_out, sizeof(set_out));
+		err = mlx5_cmd_exec_in(dev, set_issi, set_in);
 		if (err) {
 			mlx5_core_err(dev, "Failed to set ISSI to 1 err(%d)\n",
 				      err);
-- 
cgit v1.2.3-59-g8ed1b


From 62a9fec040831c84a79721ec288851a3fd6f8ec0 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Thu, 9 Apr 2020 16:43:58 +0300
Subject: net/mlx5: Update mcg.c new cmd interface

Do mass update of mcg.c to reuse newly introduced
mlx5_cmd_exec_in*() interfaces.

Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/mcg.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mcg.c b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c
index 6789fe658037..e019d68062d8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mcg.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mcg.c
@@ -38,28 +38,26 @@
 
 int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn)
 {
-	u32 out[MLX5_ST_SZ_DW(attach_to_mcg_out)] = {0};
-	u32 in[MLX5_ST_SZ_DW(attach_to_mcg_in)]   = {0};
+	u32 in[MLX5_ST_SZ_DW(attach_to_mcg_in)] = {};
 	void *gid;
 
 	MLX5_SET(attach_to_mcg_in, in, opcode, MLX5_CMD_OP_ATTACH_TO_MCG);
 	MLX5_SET(attach_to_mcg_in, in, qpn, qpn);
 	gid = MLX5_ADDR_OF(attach_to_mcg_in, in, multicast_gid);
 	memcpy(gid, mgid, sizeof(*mgid));
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	return mlx5_cmd_exec_in(dev, attach_to_mcg, in);
 }
 EXPORT_SYMBOL(mlx5_core_attach_mcg);
 
 int mlx5_core_detach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn)
 {
-	u32 out[MLX5_ST_SZ_DW(detach_from_mcg_out)] = {0};
-	u32 in[MLX5_ST_SZ_DW(detach_from_mcg_in)]   = {0};
+	u32 in[MLX5_ST_SZ_DW(detach_from_mcg_in)] = {};
 	void *gid;
 
 	MLX5_SET(detach_from_mcg_in, in, opcode, MLX5_CMD_OP_DETACH_FROM_MCG);
 	MLX5_SET(detach_from_mcg_in, in, qpn, qpn);
 	gid = MLX5_ADDR_OF(detach_from_mcg_in, in, multicast_gid);
 	memcpy(gid, mgid, sizeof(*mgid));
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	return mlx5_cmd_exec_in(dev, detach_from_mcg, in);
 }
 EXPORT_SYMBOL(mlx5_core_detach_mcg);
-- 
cgit v1.2.3-59-g8ed1b


From adda874c957c86c7407930142d944c1546c38260 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Thu, 9 Apr 2020 16:50:37 +0300
Subject: net/mlx5: Update mr.c new cmd interface

Do mass update of mr.c to reuse newly introduced
mlx5_cmd_exec_in*() interfaces.

Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/mr.c | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mr.c b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
index 1feedf335dea..9eb51f06d3ae 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mr.c
@@ -39,7 +39,7 @@ int mlx5_core_create_mkey(struct mlx5_core_dev *dev,
 			  struct mlx5_core_mkey *mkey,
 			  u32 *in, int inlen)
 {
-	u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {0};
+	u32 lout[MLX5_ST_SZ_DW(create_mkey_out)] = {};
 	u32 mkey_index;
 	void *mkc;
 	int err;
@@ -65,19 +65,18 @@ EXPORT_SYMBOL(mlx5_core_create_mkey);
 int mlx5_core_destroy_mkey(struct mlx5_core_dev *dev,
 			   struct mlx5_core_mkey *mkey)
 {
-	u32 out[MLX5_ST_SZ_DW(destroy_mkey_out)] = {0};
-	u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)]   = {0};
+	u32 in[MLX5_ST_SZ_DW(destroy_mkey_in)] = {};
 
 	MLX5_SET(destroy_mkey_in, in, opcode, MLX5_CMD_OP_DESTROY_MKEY);
 	MLX5_SET(destroy_mkey_in, in, mkey_index, mlx5_mkey_to_idx(mkey->key));
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	return mlx5_cmd_exec_in(dev, destroy_mkey, in);
 }
 EXPORT_SYMBOL(mlx5_core_destroy_mkey);
 
 int mlx5_core_query_mkey(struct mlx5_core_dev *dev, struct mlx5_core_mkey *mkey,
 			 u32 *out, int outlen)
 {
-	u32 in[MLX5_ST_SZ_DW(query_mkey_in)] = {0};
+	u32 in[MLX5_ST_SZ_DW(query_mkey_in)] = {};
 
 	memset(out, 0, outlen);
 	MLX5_SET(query_mkey_in, in, opcode, MLX5_CMD_OP_QUERY_MKEY);
@@ -99,8 +98,8 @@ static inline u32 mlx5_get_psv(u32 *out, int psv_index)
 int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn,
 			 int npsvs, u32 *sig_index)
 {
-	u32 out[MLX5_ST_SZ_DW(create_psv_out)] = {0};
-	u32 in[MLX5_ST_SZ_DW(create_psv_in)]   = {0};
+	u32 out[MLX5_ST_SZ_DW(create_psv_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(create_psv_in)] = {};
 	int i, err;
 
 	if (npsvs > MLX5_MAX_PSVS)
@@ -110,7 +109,7 @@ int mlx5_core_create_psv(struct mlx5_core_dev *dev, u32 pdn,
 	MLX5_SET(create_psv_in, in, pd, pdn);
 	MLX5_SET(create_psv_in, in, num_psv, npsvs);
 
-	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	err = mlx5_cmd_exec_inout(dev, create_psv, in, out);
 	if (err)
 		return err;
 
@@ -123,11 +122,10 @@ EXPORT_SYMBOL(mlx5_core_create_psv);
 
 int mlx5_core_destroy_psv(struct mlx5_core_dev *dev, int psv_num)
 {
-	u32 out[MLX5_ST_SZ_DW(destroy_psv_out)] = {0};
-	u32 in[MLX5_ST_SZ_DW(destroy_psv_in)]   = {0};
+	u32 in[MLX5_ST_SZ_DW(destroy_psv_in)] = {};
 
 	MLX5_SET(destroy_psv_in, in, opcode, MLX5_CMD_OP_DESTROY_PSV);
 	MLX5_SET(destroy_psv_in, in, psvn, psv_num);
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	return mlx5_cmd_exec_in(dev, destroy_psv, in);
 }
 EXPORT_SYMBOL(mlx5_core_destroy_psv);
-- 
cgit v1.2.3-59-g8ed1b


From 86d41641ddd66fc9b2cc0aef0909cee68493a78a Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Thu, 9 Apr 2020 17:02:07 +0300
Subject: net/mlx5: Update pagealloc.c new cmd interface

Do mass update of pagealloc.c to reuse newly introduced
mlx5_cmd_exec_in*() interfaces.

Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index a3959754b927..3d6f617abb7d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -135,8 +135,8 @@ static struct fw_page *find_fw_page(struct mlx5_core_dev *dev, u64 addr)
 static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id,
 				s32 *npages, int boot)
 {
-	u32 out[MLX5_ST_SZ_DW(query_pages_out)] = {0};
-	u32 in[MLX5_ST_SZ_DW(query_pages_in)]   = {0};
+	u32 out[MLX5_ST_SZ_DW(query_pages_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(query_pages_in)] = {};
 	int err;
 
 	MLX5_SET(query_pages_in, in, opcode, MLX5_CMD_OP_QUERY_PAGES);
@@ -145,7 +145,7 @@ static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id,
 		 MLX5_QUERY_PAGES_IN_OP_MOD_INIT_PAGES);
 	MLX5_SET(query_pages_in, in, embedded_cpu_function, mlx5_core_is_ecpf(dev));
 
-	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	err = mlx5_cmd_exec_inout(dev, query_pages, in, out);
 	if (err)
 		return err;
 
@@ -256,8 +256,7 @@ err_mapping:
 static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id,
 			     bool ec_function)
 {
-	u32 out[MLX5_ST_SZ_DW(manage_pages_out)] = {0};
-	u32 in[MLX5_ST_SZ_DW(manage_pages_in)]   = {0};
+	u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {};
 	int err;
 
 	MLX5_SET(manage_pages_in, in, opcode, MLX5_CMD_OP_MANAGE_PAGES);
@@ -265,7 +264,7 @@ static void page_notify_fail(struct mlx5_core_dev *dev, u16 func_id,
 	MLX5_SET(manage_pages_in, in, function_id, func_id);
 	MLX5_SET(manage_pages_in, in, embedded_cpu_function, ec_function);
 
-	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	err = mlx5_cmd_exec_in(dev, manage_pages, in);
 	if (err)
 		mlx5_core_warn(dev, "page notify failed func_id(%d) err(%d)\n",
 			       func_id, err);
@@ -373,7 +372,7 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
 			 int *nclaimed, bool ec_function)
 {
 	int outlen = MLX5_ST_SZ_BYTES(manage_pages_out);
-	u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {0};
+	u32 in[MLX5_ST_SZ_DW(manage_pages_in)] = {};
 	int num_claimed;
 	u32 *out;
 	int err;
-- 
cgit v1.2.3-59-g8ed1b


From 9b3ca3ec03169e895c414c4e98a49c0cd0cd95ec Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Thu, 9 Apr 2020 17:04:30 +0300
Subject: net/mlx5: Update pd.c new cmd interface

Do mass update of pd.c to reuse newly introduced
mlx5_cmd_exec_in*() interfaces.

Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/pd.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pd.c b/drivers/net/ethernet/mellanox/mlx5/core/pd.c
index b92d6f621c83..aabc53ad8bdd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pd.c
@@ -37,12 +37,12 @@
 
 int mlx5_core_alloc_pd(struct mlx5_core_dev *dev, u32 *pdn)
 {
-	u32 out[MLX5_ST_SZ_DW(alloc_pd_out)] = {0};
-	u32 in[MLX5_ST_SZ_DW(alloc_pd_in)]   = {0};
+	u32 out[MLX5_ST_SZ_DW(alloc_pd_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(alloc_pd_in)] = {};
 	int err;
 
 	MLX5_SET(alloc_pd_in, in, opcode, MLX5_CMD_OP_ALLOC_PD);
-	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	err = mlx5_cmd_exec_inout(dev, alloc_pd, in, out);
 	if (!err)
 		*pdn = MLX5_GET(alloc_pd_out, out, pd);
 	return err;
@@ -51,11 +51,10 @@ EXPORT_SYMBOL(mlx5_core_alloc_pd);
 
 int mlx5_core_dealloc_pd(struct mlx5_core_dev *dev, u32 pdn)
 {
-	u32 out[MLX5_ST_SZ_DW(dealloc_pd_out)] = {0};
-	u32 in[MLX5_ST_SZ_DW(dealloc_pd_in)]   = {0};
+	u32 in[MLX5_ST_SZ_DW(dealloc_pd_in)] = {};
 
 	MLX5_SET(dealloc_pd_in, in, opcode, MLX5_CMD_OP_DEALLOC_PD);
 	MLX5_SET(dealloc_pd_in, in, pd, pdn);
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	return mlx5_cmd_exec_in(dev, dealloc_pd, in);
 }
 EXPORT_SYMBOL(mlx5_core_dealloc_pd);
-- 
cgit v1.2.3-59-g8ed1b


From 1fb5193434555df68eb065705baf223bca80ad0a Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Thu, 9 Apr 2020 17:05:05 +0300
Subject: net/mlx5: Update uar.c new cmd interface

Do mass update of uar.c to reuse newly introduced
mlx5_cmd_exec_in*() interfaces.

Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/uar.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/uar.c b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
index 816f9c434359..da481a7c12f4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/uar.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/uar.c
@@ -38,12 +38,12 @@
 
 int mlx5_cmd_alloc_uar(struct mlx5_core_dev *dev, u32 *uarn)
 {
-	u32 out[MLX5_ST_SZ_DW(alloc_uar_out)] = {0};
-	u32 in[MLX5_ST_SZ_DW(alloc_uar_in)]   = {0};
+	u32 out[MLX5_ST_SZ_DW(alloc_uar_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(alloc_uar_in)] = {};
 	int err;
 
 	MLX5_SET(alloc_uar_in, in, opcode, MLX5_CMD_OP_ALLOC_UAR);
-	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	err = mlx5_cmd_exec_inout(dev, alloc_uar, in, out);
 	if (!err)
 		*uarn = MLX5_GET(alloc_uar_out, out, uar);
 	return err;
@@ -52,12 +52,11 @@ EXPORT_SYMBOL(mlx5_cmd_alloc_uar);
 
 int mlx5_cmd_free_uar(struct mlx5_core_dev *dev, u32 uarn)
 {
-	u32 out[MLX5_ST_SZ_DW(dealloc_uar_out)] = {0};
-	u32 in[MLX5_ST_SZ_DW(dealloc_uar_in)]   = {0};
+	u32 in[MLX5_ST_SZ_DW(dealloc_uar_in)] = {};
 
 	MLX5_SET(dealloc_uar_in, in, opcode, MLX5_CMD_OP_DEALLOC_UAR);
 	MLX5_SET(dealloc_uar_in, in, uar, uarn);
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	return mlx5_cmd_exec_in(dev, dealloc_uar, in);
 }
 EXPORT_SYMBOL(mlx5_cmd_free_uar);
 
-- 
cgit v1.2.3-59-g8ed1b


From fa8110f4451c189b1efd4de6bef02ac6efb1244f Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Thu, 9 Apr 2020 17:05:31 +0300
Subject: net/mlx5: Update rl.c new cmd interface

Do mass update of rl.c to reuse newly introduced
mlx5_cmd_exec_in*() interfaces.

Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/rl.c | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/rl.c b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
index c9599f7c5696..99039c47ef33 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/rl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/rl.c
@@ -39,8 +39,8 @@
 int mlx5_create_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
 				       void *ctx, u32 *element_id)
 {
-	u32 in[MLX5_ST_SZ_DW(create_scheduling_element_in)]  = {0};
-	u32 out[MLX5_ST_SZ_DW(create_scheduling_element_in)] = {0};
+	u32 out[MLX5_ST_SZ_DW(create_scheduling_element_in)] = {};
+	u32 in[MLX5_ST_SZ_DW(create_scheduling_element_in)] = {};
 	void *schedc;
 	int err;
 
@@ -52,7 +52,7 @@ int mlx5_create_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
 		 hierarchy);
 	memcpy(schedc, ctx, MLX5_ST_SZ_BYTES(scheduling_context));
 
-	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	err = mlx5_cmd_exec_inout(dev, create_scheduling_element, in, out);
 	if (err)
 		return err;
 
@@ -65,8 +65,7 @@ int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
 				       void *ctx, u32 element_id,
 				       u32 modify_bitmask)
 {
-	u32 in[MLX5_ST_SZ_DW(modify_scheduling_element_in)]  = {0};
-	u32 out[MLX5_ST_SZ_DW(modify_scheduling_element_in)] = {0};
+	u32 in[MLX5_ST_SZ_DW(modify_scheduling_element_in)] = {};
 	void *schedc;
 
 	schedc = MLX5_ADDR_OF(modify_scheduling_element_in, in,
@@ -81,14 +80,13 @@ int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
 		 hierarchy);
 	memcpy(schedc, ctx, MLX5_ST_SZ_BYTES(scheduling_context));
 
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	return mlx5_cmd_exec_in(dev, modify_scheduling_element, in);
 }
 
 int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
 					u32 element_id)
 {
-	u32 in[MLX5_ST_SZ_DW(destroy_scheduling_element_in)]  = {0};
-	u32 out[MLX5_ST_SZ_DW(destroy_scheduling_element_in)] = {0};
+	u32 in[MLX5_ST_SZ_DW(destroy_scheduling_element_in)] = {};
 
 	MLX5_SET(destroy_scheduling_element_in, in, opcode,
 		 MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT);
@@ -97,7 +95,7 @@ int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
 	MLX5_SET(destroy_scheduling_element_in, in, scheduling_hierarchy,
 		 hierarchy);
 
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	return mlx5_cmd_exec_in(dev, destroy_scheduling_element, in);
 }
 
 static bool mlx5_rl_are_equal_raw(struct mlx5_rl_entry *entry, void *rl_in,
@@ -144,8 +142,7 @@ static struct mlx5_rl_entry *find_rl_entry(struct mlx5_rl_table *table,
 static int mlx5_set_pp_rate_limit_cmd(struct mlx5_core_dev *dev,
 				      struct mlx5_rl_entry *entry, bool set)
 {
-	u32 in[MLX5_ST_SZ_DW(set_pp_rate_limit_in)]   = {};
-	u32 out[MLX5_ST_SZ_DW(set_pp_rate_limit_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(set_pp_rate_limit_in)] = {};
 	void *pp_context;
 
 	pp_context = MLX5_ADDR_OF(set_pp_rate_limit_in, in, ctx);
@@ -155,7 +152,7 @@ static int mlx5_set_pp_rate_limit_cmd(struct mlx5_core_dev *dev,
 	MLX5_SET(set_pp_rate_limit_in, in, rate_limit_index, entry->index);
 	if (set)
 		memcpy(pp_context, entry->rl_raw, sizeof(entry->rl_raw));
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	return mlx5_cmd_exec_in(dev, set_pp_rate_limit, in);
 }
 
 bool mlx5_rl_is_in_range(struct mlx5_core_dev *dev, u32 rate)
-- 
cgit v1.2.3-59-g8ed1b


From 2276a0dfc17be2cd89cfb8ed0a7ef6fad417aa3e Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Thu, 9 Apr 2020 21:00:23 +0300
Subject: net/mlx5: Update port.c new cmd interface

Do mass update of port.c to reuse newly introduced
mlx5_cmd_exec_in*() interfaces.

Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/port.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/port.c b/drivers/net/ethernet/mellanox/mlx5/core/port.c
index cc262b30aed5..9f829e68fc73 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/port.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/port.c
@@ -763,24 +763,23 @@ EXPORT_SYMBOL_GPL(mlx5_query_port_ets_rate_limit);
 
 int mlx5_set_port_wol(struct mlx5_core_dev *mdev, u8 wol_mode)
 {
-	u32 in[MLX5_ST_SZ_DW(set_wol_rol_in)]   = {0};
-	u32 out[MLX5_ST_SZ_DW(set_wol_rol_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(set_wol_rol_in)] = {};
 
 	MLX5_SET(set_wol_rol_in, in, opcode, MLX5_CMD_OP_SET_WOL_ROL);
 	MLX5_SET(set_wol_rol_in, in, wol_mode_valid, 1);
 	MLX5_SET(set_wol_rol_in, in, wol_mode, wol_mode);
-	return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+	return mlx5_cmd_exec_in(mdev, set_wol_rol, in);
 }
 EXPORT_SYMBOL_GPL(mlx5_set_port_wol);
 
 int mlx5_query_port_wol(struct mlx5_core_dev *mdev, u8 *wol_mode)
 {
-	u32 in[MLX5_ST_SZ_DW(query_wol_rol_in)]   = {0};
-	u32 out[MLX5_ST_SZ_DW(query_wol_rol_out)] = {0};
+	u32 out[MLX5_ST_SZ_DW(query_wol_rol_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(query_wol_rol_in)] = {};
 	int err;
 
 	MLX5_SET(query_wol_rol_in, in, opcode, MLX5_CMD_OP_QUERY_WOL_ROL);
-	err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+	err = mlx5_cmd_exec_inout(mdev, query_wol_rol, in, out);
 	if (!err)
 		*wol_mode = MLX5_GET(query_wol_rol_out, out, wol_mode);
 
-- 
cgit v1.2.3-59-g8ed1b


From 7ba294e43595ab728adf2baf0aab378095768bfe Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Thu, 9 Apr 2020 21:02:08 +0300
Subject: net/mlx5: Update SW steering new cmd interface

Do mass update of SW steering to reuse newly introduced
mlx5_cmd_exec_in*() interfaces.

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 .../ethernet/mellanox/mlx5/core/steering/dr_cmd.c  | 33 +++++++++-------------
 1 file changed, 14 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
index 461b39376daf..6bd34b293007 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_cmd.c
@@ -18,7 +18,7 @@ int mlx5dr_cmd_query_esw_vport_context(struct mlx5_core_dev *mdev,
 	MLX5_SET(query_esw_vport_context_in, in, other_vport, other_vport);
 	MLX5_SET(query_esw_vport_context_in, in, vport_number, vport_number);
 
-	err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+	err = mlx5_cmd_exec_inout(mdev, query_esw_vport_context, in, out);
 	if (err)
 		return err;
 
@@ -51,7 +51,7 @@ int mlx5dr_cmd_query_gvmi(struct mlx5_core_dev *mdev, bool other_vport,
 		 MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE << 1 |
 		 HCA_CAP_OPMOD_GET_CUR);
 
-	err = mlx5_cmd_exec(mdev, in, sizeof(in), out, out_size);
+	err = mlx5_cmd_exec_inout(mdev, query_hca_cap, in, out);
 	if (err) {
 		kfree(out);
 		return err;
@@ -141,7 +141,7 @@ int mlx5dr_cmd_query_flow_table(struct mlx5_core_dev *dev,
 	MLX5_SET(query_flow_table_in, in, table_type, type);
 	MLX5_SET(query_flow_table_in, in, table_id, table_id);
 
-	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	err = mlx5_cmd_exec_inout(dev, query_flow_table, in, out);
 	if (err)
 		return err;
 
@@ -158,12 +158,11 @@ int mlx5dr_cmd_query_flow_table(struct mlx5_core_dev *dev,
 
 int mlx5dr_cmd_sync_steering(struct mlx5_core_dev *mdev)
 {
-	u32 out[MLX5_ST_SZ_DW(sync_steering_out)] = {};
 	u32 in[MLX5_ST_SZ_DW(sync_steering_in)] = {};
 
 	MLX5_SET(sync_steering_in, in, opcode, MLX5_CMD_OP_SYNC_STEERING);
 
-	return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+	return mlx5_cmd_exec_in(mdev, sync_steering, in);
 }
 
 int mlx5dr_cmd_set_fte_modify_and_vport(struct mlx5_core_dev *mdev,
@@ -214,14 +213,13 @@ int mlx5dr_cmd_del_flow_table_entry(struct mlx5_core_dev *mdev,
 				    u32 table_type,
 				    u32 table_id)
 {
-	u32 out[MLX5_ST_SZ_DW(delete_fte_out)] = {};
 	u32 in[MLX5_ST_SZ_DW(delete_fte_in)] = {};
 
 	MLX5_SET(delete_fte_in, in, opcode, MLX5_CMD_OP_DELETE_FLOW_TABLE_ENTRY);
 	MLX5_SET(delete_fte_in, in, table_type, table_type);
 	MLX5_SET(delete_fte_in, in, table_id, table_id);
 
-	return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+	return mlx5_cmd_exec_in(mdev, delete_fte, in);
 }
 
 int mlx5dr_cmd_alloc_modify_header(struct mlx5_core_dev *mdev,
@@ -263,7 +261,6 @@ out:
 int mlx5dr_cmd_dealloc_modify_header(struct mlx5_core_dev *mdev,
 				     u32 modify_header_id)
 {
-	u32 out[MLX5_ST_SZ_DW(dealloc_modify_header_context_out)] = {};
 	u32 in[MLX5_ST_SZ_DW(dealloc_modify_header_context_in)] = {};
 
 	MLX5_SET(dealloc_modify_header_context_in, in, opcode,
@@ -271,7 +268,7 @@ int mlx5dr_cmd_dealloc_modify_header(struct mlx5_core_dev *mdev,
 	MLX5_SET(dealloc_modify_header_context_in, in, modify_header_id,
 		 modify_header_id);
 
-	return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+	return mlx5_cmd_exec_in(mdev, dealloc_modify_header_context, in);
 }
 
 int mlx5dr_cmd_create_empty_flow_group(struct mlx5_core_dev *mdev,
@@ -292,7 +289,7 @@ int mlx5dr_cmd_create_empty_flow_group(struct mlx5_core_dev *mdev,
 	MLX5_SET(create_flow_group_in, in, table_type, table_type);
 	MLX5_SET(create_flow_group_in, in, table_id, table_id);
 
-	err = mlx5_cmd_exec(mdev, in, inlen, out, sizeof(out));
+	err = mlx5_cmd_exec_inout(mdev, create_flow_group, in, out);
 	if (err)
 		goto out;
 
@@ -309,14 +306,14 @@ int mlx5dr_cmd_destroy_flow_group(struct mlx5_core_dev *mdev,
 				  u32 group_id)
 {
 	u32 in[MLX5_ST_SZ_DW(destroy_flow_group_in)] = {};
-	u32 out[MLX5_ST_SZ_DW(destroy_flow_group_out)] = {};
 
-	MLX5_SET(create_flow_group_in, in, opcode, MLX5_CMD_OP_DESTROY_FLOW_GROUP);
+	MLX5_SET(destroy_flow_group_in, in, opcode,
+		 MLX5_CMD_OP_DESTROY_FLOW_GROUP);
 	MLX5_SET(destroy_flow_group_in, in, table_type, table_type);
 	MLX5_SET(destroy_flow_group_in, in, table_id, table_id);
 	MLX5_SET(destroy_flow_group_in, in, group_id, group_id);
 
-	return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+	return mlx5_cmd_exec_in(mdev, destroy_flow_group, in);
 }
 
 int mlx5dr_cmd_create_flow_table(struct mlx5_core_dev *mdev,
@@ -360,7 +357,7 @@ int mlx5dr_cmd_create_flow_table(struct mlx5_core_dev *mdev,
 	MLX5_SET(create_flow_table_in, in, flow_table_context.reformat_en,
 		 attr->reformat_en);
 
-	err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+	err = mlx5_cmd_exec_inout(mdev, create_flow_table, in, out);
 	if (err)
 		return err;
 
@@ -379,7 +376,6 @@ int mlx5dr_cmd_destroy_flow_table(struct mlx5_core_dev *mdev,
 				  u32 table_id,
 				  u32 table_type)
 {
-	u32 out[MLX5_ST_SZ_DW(destroy_flow_table_out)] = {};
 	u32 in[MLX5_ST_SZ_DW(destroy_flow_table_in)] = {};
 
 	MLX5_SET(destroy_flow_table_in, in, opcode,
@@ -387,7 +383,7 @@ int mlx5dr_cmd_destroy_flow_table(struct mlx5_core_dev *mdev,
 	MLX5_SET(destroy_flow_table_in, in, table_type, table_type);
 	MLX5_SET(destroy_flow_table_in, in, table_id, table_id);
 
-	return mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+	return mlx5_cmd_exec_in(mdev, destroy_flow_table, in);
 }
 
 int mlx5dr_cmd_create_reformat_ctx(struct mlx5_core_dev *mdev,
@@ -434,7 +430,6 @@ int mlx5dr_cmd_create_reformat_ctx(struct mlx5_core_dev *mdev,
 void mlx5dr_cmd_destroy_reformat_ctx(struct mlx5_core_dev *mdev,
 				     u32 reformat_id)
 {
-	u32 out[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_out)] = {};
 	u32 in[MLX5_ST_SZ_DW(dealloc_packet_reformat_context_in)] = {};
 
 	MLX5_SET(dealloc_packet_reformat_context_in, in, opcode,
@@ -442,7 +437,7 @@ void mlx5dr_cmd_destroy_reformat_ctx(struct mlx5_core_dev *mdev,
 	MLX5_SET(dealloc_packet_reformat_context_in, in, packet_reformat_id,
 		 reformat_id);
 
-	mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+	mlx5_cmd_exec_in(mdev, dealloc_packet_reformat_context, in);
 }
 
 int mlx5dr_cmd_query_gid(struct mlx5_core_dev *mdev, u8 vhca_port_num,
@@ -458,7 +453,7 @@ int mlx5dr_cmd_query_gid(struct mlx5_core_dev *mdev, u8 vhca_port_num,
 	MLX5_SET(query_roce_address_in, in, roce_address_index, index);
 	MLX5_SET(query_roce_address_in, in, vhca_port_num, vhca_port_num);
 
-	err = mlx5_cmd_exec(mdev, in, sizeof(in), out, sizeof(out));
+	err = mlx5_cmd_exec_inout(mdev, query_roce_address, in, out);
 	if (err)
 		return err;
 
-- 
cgit v1.2.3-59-g8ed1b


From e0b4b4722dfac09658d1519b296cf8dc349a2451 Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Thu, 9 Apr 2020 21:03:33 +0300
Subject: net/mlx5: Update transobj.c new cmd interface

Do mass update of transobj.c to reuse newly introduced
mlx5_cmd_exec_in*() interfaces.

Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/hw/mlx5/qp.c                    |  32 +++---
 drivers/net/ethernet/mellanox/mlx5/core/en.h       |   6 +-
 .../net/ethernet/mellanox/mlx5/core/en_common.c    |   7 +-
 .../net/ethernet/mellanox/mlx5/core/en_ethtool.c   |   2 +-
 .../ethernet/mellanox/mlx5/core/en_fs_ethtool.c    |   2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |  29 +++---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c    |   6 +-
 drivers/net/ethernet/mellanox/mlx5/core/transobj.c | 113 ++++++++-------------
 include/linux/mlx5/transobj.h                      |  19 ++--
 9 files changed, 85 insertions(+), 131 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/qp.c b/drivers/infiniband/hw/mlx5/qp.c
index 3ecd1864b3c8..af599c8b88aa 100644
--- a/drivers/infiniband/hw/mlx5/qp.c
+++ b/drivers/infiniband/hw/mlx5/qp.c
@@ -1255,7 +1255,7 @@ static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
 				    struct mlx5_ib_sq *sq, u32 tdn,
 				    struct ib_pd *pd)
 {
-	u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {0};
+	u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
 	void *tisc = MLX5_ADDR_OF(create_tis_in, in, ctx);
 
 	MLX5_SET(create_tis_in, in, uid, to_mpd(pd)->uid);
@@ -1263,7 +1263,7 @@ static int create_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
 	if (qp->flags & MLX5_IB_QP_UNDERLAY)
 		MLX5_SET(tisc, tisc, underlay_qpn, qp->underlay_qpn);
 
-	return mlx5_core_create_tis(dev->mdev, in, sizeof(in), &sq->tisn);
+	return mlx5_core_create_tis(dev->mdev, in, &sq->tisn);
 }
 
 static void destroy_raw_packet_qp_tis(struct mlx5_ib_dev *dev,
@@ -1460,9 +1460,8 @@ static void destroy_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
 
 static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
 				    struct mlx5_ib_rq *rq, u32 tdn,
-				    u32 *qp_flags_en,
-				    struct ib_pd *pd,
-				    u32 *out, int outlen)
+				    u32 *qp_flags_en, struct ib_pd *pd,
+				    u32 *out)
 {
 	u8 lb_flag = 0;
 	u32 *in;
@@ -1495,9 +1494,8 @@ static int create_raw_packet_qp_tir(struct mlx5_ib_dev *dev,
 	}
 
 	MLX5_SET(tirc, tirc, self_lb_block, lb_flag);
-
-	err = mlx5_core_create_tir_out(dev->mdev, in, inlen, out, outlen);
-
+	MLX5_SET(create_tir_in, in, opcode, MLX5_CMD_OP_CREATE_TIR);
+	err = mlx5_cmd_exec_inout(dev->mdev, create_tir, in, out);
 	rq->tirn = MLX5_GET(create_tir_out, out, tirn);
 	if (!err && MLX5_GET(tirc, tirc, self_lb_block)) {
 		err = mlx5_ib_enable_lb(dev, false, true);
@@ -1557,9 +1555,8 @@ static int create_raw_packet_qp(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 		if (err)
 			goto err_destroy_sq;
 
-		err = create_raw_packet_qp_tir(
-			dev, rq, tdn, &qp->flags_en, pd, out,
-			MLX5_ST_SZ_BYTES(create_tir_out));
+		err = create_raw_packet_qp_tir(dev, rq, tdn, &qp->flags_en, pd,
+					       out);
 		if (err)
 			goto err_destroy_rq;
 
@@ -1854,7 +1851,8 @@ static int create_rss_raw_qp_tir(struct mlx5_ib_dev *dev, struct mlx5_ib_qp *qp,
 	MLX5_SET(rx_hash_field_select, hfso, selected_fields, selected_fields);
 
 create_tir:
-	err = mlx5_core_create_tir_out(dev->mdev, in, inlen, out, outlen);
+	MLX5_SET(create_tir_in, in, opcode, MLX5_CMD_OP_CREATE_TIR);
+	err = mlx5_cmd_exec_inout(dev->mdev, create_tir, in, out);
 
 	qp->rss_qp.tirn = MLX5_GET(create_tir_out, out, tirn);
 	if (!err && MLX5_GET(tirc, tirc, self_lb_block)) {
@@ -2933,7 +2931,7 @@ static int modify_raw_packet_eth_prio(struct mlx5_core_dev *dev,
 	tisc = MLX5_ADDR_OF(modify_tis_in, in, ctx);
 	MLX5_SET(tisc, tisc, prio, ((sl & 0x7) << 1));
 
-	err = mlx5_core_modify_tis(dev, sq->tisn, in, inlen);
+	err = mlx5_core_modify_tis(dev, sq->tisn, in);
 
 	kvfree(in);
 
@@ -2960,7 +2958,7 @@ static int modify_raw_packet_tx_affinity(struct mlx5_core_dev *dev,
 	tisc = MLX5_ADDR_OF(modify_tis_in, in, ctx);
 	MLX5_SET(tisc, tisc, lag_tx_port_affinity, tx_affinity);
 
-	err = mlx5_core_modify_tis(dev, sq->tisn, in, inlen);
+	err = mlx5_core_modify_tis(dev, sq->tisn, in);
 
 	kvfree(in);
 
@@ -3240,7 +3238,7 @@ static int modify_raw_packet_qp_rq(
 				"RAW PACKET QP counters are not supported on current FW\n");
 	}
 
-	err = mlx5_core_modify_rq(dev->mdev, rq->base.mqp.qpn, in, inlen);
+	err = mlx5_core_modify_rq(dev->mdev, rq->base.mqp.qpn, in);
 	if (err)
 		goto out;
 
@@ -3303,7 +3301,7 @@ static int modify_raw_packet_qp_sq(
 		MLX5_SET(sqc, sqc, packet_pacing_rate_limit_index, rl_index);
 	}
 
-	err = mlx5_core_modify_sq(dev, sq->base.mqp.qpn, in, inlen);
+	err = mlx5_core_modify_sq(dev, sq->base.mqp.qpn, in);
 	if (err) {
 		/* Remove new rate from table if failed */
 		if (new_rate_added)
@@ -6444,7 +6442,7 @@ int mlx5_ib_modify_wq(struct ib_wq *wq, struct ib_wq_attr *wq_attr,
 				"Receive WQ counters are not supported on current FW\n");
 	}
 
-	err = mlx5_core_modify_rq(dev->mdev, rwq->core_qp.qpn, in, inlen);
+	err = mlx5_core_modify_rq(dev->mdev, rwq->core_qp.qpn, in);
 	if (!err)
 		rwq->ibwq.state = (wq_state == MLX5_RQC_STATE_ERR) ? IB_WQS_ERR : wq_state;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 12a61bf82c14..1599b05f3c5a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -1012,7 +1012,7 @@ int mlx5e_redirect_rqt(struct mlx5e_priv *priv, u32 rqtn, int sz,
 void mlx5e_build_indir_tir_ctx_hash(struct mlx5e_rss_params *rss_params,
 				    const struct mlx5e_tirc_config *ttconfig,
 				    void *tirc, bool inner);
-void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen);
+void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in);
 struct mlx5e_tirc_config mlx5e_tirc_get_default_config(enum mlx5e_traffic_types tt);
 
 struct mlx5e_xsk_param;
@@ -1102,8 +1102,8 @@ void mlx5e_dcbnl_init_app(struct mlx5e_priv *priv);
 void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv);
 #endif
 
-int mlx5e_create_tir(struct mlx5_core_dev *mdev,
-		     struct mlx5e_tir *tir, u32 *in, int inlen);
+int mlx5e_create_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir,
+		     u32 *in);
 void mlx5e_destroy_tir(struct mlx5_core_dev *mdev,
 		       struct mlx5e_tir *tir);
 int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
index f7890e0ce96c..af3228b3f303 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
@@ -36,12 +36,11 @@
  * Global resources are common to all the netdevices crated on the same nic.
  */
 
-int mlx5e_create_tir(struct mlx5_core_dev *mdev,
-		     struct mlx5e_tir *tir, u32 *in, int inlen)
+int mlx5e_create_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir, u32 *in)
 {
 	int err;
 
-	err = mlx5_core_create_tir(mdev, in, inlen, &tir->tirn);
+	err = mlx5_core_create_tir(mdev, in, &tir->tirn);
 	if (err)
 		return err;
 
@@ -167,7 +166,7 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb)
 	mutex_lock(&mdev->mlx5e_res.td.list_lock);
 	list_for_each_entry(tir, &mdev->mlx5e_res.td.tirs_list, list) {
 		tirn = tir->tirn;
-		err = mlx5_core_modify_tir(mdev, tirn, in, inlen);
+		err = mlx5_core_modify_tir(mdev, tirn, in);
 		if (err)
 			goto out;
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
index 6d703ddee4e2..de8250820b06 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_ethtool.c
@@ -1204,7 +1204,7 @@ int mlx5e_set_rxfh(struct net_device *dev, const u32 *indir,
 	}
 
 	if (hash_changed)
-		mlx5e_modify_tirs_hash(priv, in, inlen);
+		mlx5e_modify_tirs_hash(priv, in);
 
 	mutex_unlock(&priv->state_lock);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
index 3bc2ac3d53fc..83c9b2bbc4af 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_fs_ethtool.c
@@ -858,7 +858,7 @@ static int mlx5e_set_rss_hash_opt(struct mlx5e_priv *priv,
 		goto out;
 
 	priv->rss_params.rx_hash_fields[tt] = rx_hash_field;
-	mlx5e_modify_tirs_hash(priv, in, inlen);
+	mlx5e_modify_tirs_hash(priv, in);
 
 out:
 	mutex_unlock(&priv->state_lock);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 30970b405040..05dbe8b9caac 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -721,7 +721,7 @@ int mlx5e_modify_rq_state(struct mlx5e_rq *rq, int curr_state, int next_state)
 	MLX5_SET(modify_rq_in, in, rq_state, curr_state);
 	MLX5_SET(rqc, rqc, state, next_state);
 
-	err = mlx5_core_modify_rq(mdev, rq->rqn, in, inlen);
+	err = mlx5_core_modify_rq(mdev, rq->rqn, in);
 
 	kvfree(in);
 
@@ -752,7 +752,7 @@ static int mlx5e_modify_rq_scatter_fcs(struct mlx5e_rq *rq, bool enable)
 	MLX5_SET(rqc, rqc, scatter_fcs, enable);
 	MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RDY);
 
-	err = mlx5_core_modify_rq(mdev, rq->rqn, in, inlen);
+	err = mlx5_core_modify_rq(mdev, rq->rqn, in);
 
 	kvfree(in);
 
@@ -781,7 +781,7 @@ static int mlx5e_modify_rq_vsd(struct mlx5e_rq *rq, bool vsd)
 	MLX5_SET(rqc, rqc, vsd, vsd);
 	MLX5_SET(rqc, rqc, state, MLX5_RQC_STATE_RDY);
 
-	err = mlx5_core_modify_rq(mdev, rq->rqn, in, inlen);
+	err = mlx5_core_modify_rq(mdev, rq->rqn, in);
 
 	kvfree(in);
 
@@ -1259,7 +1259,7 @@ int mlx5e_modify_sq(struct mlx5_core_dev *mdev, u32 sqn,
 		MLX5_SET(sqc,  sqc, packet_pacing_rate_limit_index, p->rl_index);
 	}
 
-	err = mlx5_core_modify_sq(mdev, sqn, in, inlen);
+	err = mlx5_core_modify_sq(mdev, sqn, in);
 
 	kvfree(in);
 
@@ -2698,7 +2698,7 @@ static void mlx5e_update_rx_hash_fields(struct mlx5e_tirc_config *ttconfig,
 	ttconfig->rx_hash_fields = rx_hash_fields;
 }
 
-void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen)
+void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in)
 {
 	void *tirc = MLX5_ADDR_OF(modify_tir_in, in, ctx);
 	struct mlx5e_rss_params *rss = &priv->rss_params;
@@ -2714,7 +2714,7 @@ void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen)
 		mlx5e_update_rx_hash_fields(&ttconfig, tt,
 					    rss->rx_hash_fields[tt]);
 		mlx5e_build_indir_tir_ctx_hash(rss, &ttconfig, tirc, false);
-		mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in, inlen);
+		mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in);
 	}
 
 	if (!mlx5e_tunnel_inner_ft_supported(priv->mdev))
@@ -2725,8 +2725,7 @@ void mlx5e_modify_tirs_hash(struct mlx5e_priv *priv, void *in, int inlen)
 		mlx5e_update_rx_hash_fields(&ttconfig, tt,
 					    rss->rx_hash_fields[tt]);
 		mlx5e_build_indir_tir_ctx_hash(rss, &ttconfig, tirc, true);
-		mlx5_core_modify_tir(mdev, priv->inner_indir_tir[tt].tirn, in,
-				     inlen);
+		mlx5_core_modify_tir(mdev, priv->inner_indir_tir[tt].tirn, in);
 	}
 }
 
@@ -2752,15 +2751,13 @@ static int mlx5e_modify_tirs_lro(struct mlx5e_priv *priv)
 	mlx5e_build_tir_ctx_lro(&priv->channels.params, tirc);
 
 	for (tt = 0; tt < MLX5E_NUM_INDIR_TIRS; tt++) {
-		err = mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in,
-					   inlen);
+		err = mlx5_core_modify_tir(mdev, priv->indir_tir[tt].tirn, in);
 		if (err)
 			goto free_in;
 	}
 
 	for (ix = 0; ix < priv->max_nch; ix++) {
-		err = mlx5_core_modify_tir(mdev, priv->direct_tir[ix].tirn,
-					   in, inlen);
+		err = mlx5_core_modify_tir(mdev, priv->direct_tir[ix].tirn, in);
 		if (err)
 			goto free_in;
 	}
@@ -3214,7 +3211,7 @@ int mlx5e_create_tis(struct mlx5_core_dev *mdev, void *in, u32 *tisn)
 	if (mlx5_lag_is_lacp_owner(mdev))
 		MLX5_SET(tisc, tisc, strict_lag_tx_port_affinity, 1);
 
-	return mlx5_core_create_tis(mdev, in, MLX5_ST_SZ_BYTES(create_tis_in), tisn);
+	return mlx5_core_create_tis(mdev, in, tisn);
 }
 
 void mlx5e_destroy_tis(struct mlx5_core_dev *mdev, u32 tisn)
@@ -3332,7 +3329,7 @@ int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc)
 		tir = &priv->indir_tir[tt];
 		tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
 		mlx5e_build_indir_tir_ctx(priv, tt, tirc);
-		err = mlx5e_create_tir(priv->mdev, tir, in, inlen);
+		err = mlx5e_create_tir(priv->mdev, tir, in);
 		if (err) {
 			mlx5_core_warn(priv->mdev, "create indirect tirs failed, %d\n", err);
 			goto err_destroy_inner_tirs;
@@ -3347,7 +3344,7 @@ int mlx5e_create_indirect_tirs(struct mlx5e_priv *priv, bool inner_ttc)
 		tir = &priv->inner_indir_tir[i];
 		tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
 		mlx5e_build_inner_indir_tir_ctx(priv, i, tirc);
-		err = mlx5e_create_tir(priv->mdev, tir, in, inlen);
+		err = mlx5e_create_tir(priv->mdev, tir, in);
 		if (err) {
 			mlx5_core_warn(priv->mdev, "create inner indirect tirs failed, %d\n", err);
 			goto err_destroy_inner_tirs;
@@ -3390,7 +3387,7 @@ int mlx5e_create_direct_tirs(struct mlx5e_priv *priv, struct mlx5e_tir *tirs)
 		tir = &tirs[ix];
 		tirc = MLX5_ADDR_OF(create_tir_in, in, ctx);
 		mlx5e_build_direct_tir_ctx(priv, tir->rqt.rqtn, tirc);
-		err = mlx5e_create_tir(priv->mdev, tir, in, inlen);
+		err = mlx5e_create_tir(priv->mdev, tir, in);
 		if (unlikely(err))
 			goto err_destroy_ch_tirs;
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 438128dde187..88c0e460e995 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -568,7 +568,7 @@ struct mlx5_core_dev *mlx5e_hairpin_get_mdev(struct net *net, int ifindex)
 
 static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp)
 {
-	u32 in[MLX5_ST_SZ_DW(create_tir_in)] = {0};
+	u32 in[MLX5_ST_SZ_DW(create_tir_in)] = {};
 	void *tirc;
 	int err;
 
@@ -582,7 +582,7 @@ static int mlx5e_hairpin_create_transport(struct mlx5e_hairpin *hp)
 	MLX5_SET(tirc, tirc, inline_rqn, hp->pair->rqn[0]);
 	MLX5_SET(tirc, tirc, transport_domain, hp->tdn);
 
-	err = mlx5_core_create_tir(hp->func_mdev, in, MLX5_ST_SZ_BYTES(create_tir_in), &hp->tirn);
+	err = mlx5_core_create_tir(hp->func_mdev, in, &hp->tirn);
 	if (err)
 		goto create_tir_err;
 
@@ -666,7 +666,7 @@ static int mlx5e_hairpin_create_indirect_tirs(struct mlx5e_hairpin *hp)
 		mlx5e_build_indir_tir_ctx_hash(&priv->rss_params, &ttconfig, tirc, false);
 
 		err = mlx5_core_create_tir(hp->func_mdev, in,
-					   MLX5_ST_SZ_BYTES(create_tir_in), &hp->indir_tirn[tt]);
+					   &hp->indir_tirn[tt]);
 		if (err) {
 			mlx5_core_warn(hp->func_mdev, "create indirect tirs failed, %d\n", err);
 			goto err_destroy_tirs;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
index b1068500f1df..01cc00ad8acf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/transobj.c
@@ -36,14 +36,14 @@
 
 int mlx5_core_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn)
 {
-	u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)]   = {0};
-	u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)] = {0};
+	u32 out[MLX5_ST_SZ_DW(alloc_transport_domain_out)] = {};
+	u32 in[MLX5_ST_SZ_DW(alloc_transport_domain_in)] = {};
 	int err;
 
 	MLX5_SET(alloc_transport_domain_in, in, opcode,
 		 MLX5_CMD_OP_ALLOC_TRANSPORT_DOMAIN);
 
-	err = mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	err = mlx5_cmd_exec_inout(dev, alloc_transport_domain, in, out);
 	if (!err)
 		*tdn = MLX5_GET(alloc_transport_domain_out, out,
 				transport_domain);
@@ -54,19 +54,18 @@ EXPORT_SYMBOL(mlx5_core_alloc_transport_domain);
 
 void mlx5_core_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn)
 {
-	u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)]   = {0};
-	u32 out[MLX5_ST_SZ_DW(dealloc_transport_domain_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(dealloc_transport_domain_in)] = {};
 
 	MLX5_SET(dealloc_transport_domain_in, in, opcode,
 		 MLX5_CMD_OP_DEALLOC_TRANSPORT_DOMAIN);
 	MLX5_SET(dealloc_transport_domain_in, in, transport_domain, tdn);
-	mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	mlx5_cmd_exec_in(dev, dealloc_transport_domain, in);
 }
 EXPORT_SYMBOL(mlx5_core_dealloc_transport_domain);
 
 int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqn)
 {
-	u32 out[MLX5_ST_SZ_DW(create_rq_out)] = {0};
+	u32 out[MLX5_ST_SZ_DW(create_rq_out)] = {};
 	int err;
 
 	MLX5_SET(create_rq_in, in, opcode, MLX5_CMD_OP_CREATE_RQ);
@@ -78,44 +77,39 @@ int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *rqn)
 }
 EXPORT_SYMBOL(mlx5_core_create_rq);
 
-int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen)
+int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in)
 {
-	u32 out[MLX5_ST_SZ_DW(modify_rq_out)];
-
 	MLX5_SET(modify_rq_in, in, rqn, rqn);
 	MLX5_SET(modify_rq_in, in, opcode, MLX5_CMD_OP_MODIFY_RQ);
 
-	memset(out, 0, sizeof(out));
-	return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+	return mlx5_cmd_exec_in(dev, modify_rq, in);
 }
 EXPORT_SYMBOL(mlx5_core_modify_rq);
 
 void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn)
 {
-	u32 in[MLX5_ST_SZ_DW(destroy_rq_in)]   = {0};
-	u32 out[MLX5_ST_SZ_DW(destroy_rq_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(destroy_rq_in)] = {};
 
 	MLX5_SET(destroy_rq_in, in, opcode, MLX5_CMD_OP_DESTROY_RQ);
 	MLX5_SET(destroy_rq_in, in, rqn, rqn);
-	mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	mlx5_cmd_exec_in(dev, destroy_rq, in);
 }
 EXPORT_SYMBOL(mlx5_core_destroy_rq);
 
 int mlx5_core_query_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *out)
 {
-	u32 in[MLX5_ST_SZ_DW(query_rq_in)] = {0};
-	int outlen = MLX5_ST_SZ_BYTES(query_rq_out);
+	u32 in[MLX5_ST_SZ_DW(query_rq_in)] = {};
 
 	MLX5_SET(query_rq_in, in, opcode, MLX5_CMD_OP_QUERY_RQ);
 	MLX5_SET(query_rq_in, in, rqn, rqn);
 
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+	return mlx5_cmd_exec_inout(dev, query_rq, in, out);
 }
 EXPORT_SYMBOL(mlx5_core_query_rq);
 
 int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *sqn)
 {
-	u32 out[MLX5_ST_SZ_DW(create_sq_out)] = {0};
+	u32 out[MLX5_ST_SZ_DW(create_sq_out)] = {};
 	int err;
 
 	MLX5_SET(create_sq_in, in, opcode, MLX5_CMD_OP_CREATE_SQ);
@@ -126,34 +120,30 @@ int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen, u32 *sqn)
 	return err;
 }
 
-int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in, int inlen)
+int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in)
 {
-	u32 out[MLX5_ST_SZ_DW(modify_sq_out)] = {0};
-
 	MLX5_SET(modify_sq_in, in, sqn, sqn);
 	MLX5_SET(modify_sq_in, in, opcode, MLX5_CMD_OP_MODIFY_SQ);
-	return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+	return mlx5_cmd_exec_in(dev, modify_sq, in);
 }
 EXPORT_SYMBOL(mlx5_core_modify_sq);
 
 void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn)
 {
-	u32 in[MLX5_ST_SZ_DW(destroy_sq_in)]   = {0};
-	u32 out[MLX5_ST_SZ_DW(destroy_sq_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(destroy_sq_in)] = {};
 
 	MLX5_SET(destroy_sq_in, in, opcode, MLX5_CMD_OP_DESTROY_SQ);
 	MLX5_SET(destroy_sq_in, in, sqn, sqn);
-	mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	mlx5_cmd_exec_in(dev, destroy_sq, in);
 }
 
 int mlx5_core_query_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *out)
 {
-	u32 in[MLX5_ST_SZ_DW(query_sq_in)] = {0};
-	int outlen = MLX5_ST_SZ_BYTES(query_sq_out);
+	u32 in[MLX5_ST_SZ_DW(query_sq_in)] = {};
 
 	MLX5_SET(query_sq_in, in, opcode, MLX5_CMD_OP_QUERY_SQ);
 	MLX5_SET(query_sq_in, in, sqn, sqn);
-	return mlx5_cmd_exec(dev, in, sizeof(in), out, outlen);
+	return mlx5_cmd_exec_inout(dev, query_sq, in, out);
 }
 EXPORT_SYMBOL(mlx5_core_query_sq);
 
@@ -182,24 +172,13 @@ out:
 }
 EXPORT_SYMBOL_GPL(mlx5_core_query_sq_state);
 
-int mlx5_core_create_tir_out(struct mlx5_core_dev *dev,
-			     u32 *in, int inlen,
-			     u32 *out, int outlen)
-{
-	MLX5_SET(create_tir_in, in, opcode, MLX5_CMD_OP_CREATE_TIR);
-
-	return mlx5_cmd_exec(dev, in, inlen, out, outlen);
-}
-EXPORT_SYMBOL(mlx5_core_create_tir_out);
-
-int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen,
-			 u32 *tirn)
+int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, u32 *tirn)
 {
 	u32 out[MLX5_ST_SZ_DW(create_tir_out)] = {};
 	int err;
 
-	err = mlx5_core_create_tir_out(dev, in, inlen,
-				       out, sizeof(out));
+	MLX5_SET(create_tir_in, in, opcode, MLX5_CMD_OP_CREATE_TIR);
+	err = mlx5_cmd_exec_inout(dev, create_tir, in, out);
 	if (!err)
 		*tirn = MLX5_GET(create_tir_out, out, tirn);
 
@@ -207,35 +186,30 @@ int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen,
 }
 EXPORT_SYMBOL(mlx5_core_create_tir);
 
-int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in,
-			 int inlen)
+int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in)
 {
-	u32 out[MLX5_ST_SZ_DW(modify_tir_out)] = {0};
-
 	MLX5_SET(modify_tir_in, in, tirn, tirn);
 	MLX5_SET(modify_tir_in, in, opcode, MLX5_CMD_OP_MODIFY_TIR);
-	return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+	return mlx5_cmd_exec_in(dev, modify_tir, in);
 }
 
 void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn)
 {
-	u32 in[MLX5_ST_SZ_DW(destroy_tir_in)]   = {0};
-	u32 out[MLX5_ST_SZ_DW(destroy_tir_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(destroy_tir_in)] = {};
 
 	MLX5_SET(destroy_tir_in, in, opcode, MLX5_CMD_OP_DESTROY_TIR);
 	MLX5_SET(destroy_tir_in, in, tirn, tirn);
-	mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	mlx5_cmd_exec_in(dev, destroy_tir, in);
 }
 EXPORT_SYMBOL(mlx5_core_destroy_tir);
 
-int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen,
-			 u32 *tisn)
+int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, u32 *tisn)
 {
-	u32 out[MLX5_ST_SZ_DW(create_tis_out)] = {0};
+	u32 out[MLX5_ST_SZ_DW(create_tis_out)] = {};
 	int err;
 
 	MLX5_SET(create_tis_in, in, opcode, MLX5_CMD_OP_CREATE_TIS);
-	err = mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+	err = mlx5_cmd_exec_inout(dev, create_tis, in, out);
 	if (!err)
 		*tisn = MLX5_GET(create_tis_out, out, tisn);
 
@@ -243,33 +217,29 @@ int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen,
 }
 EXPORT_SYMBOL(mlx5_core_create_tis);
 
-int mlx5_core_modify_tis(struct mlx5_core_dev *dev, u32 tisn, u32 *in,
-			 int inlen)
+int mlx5_core_modify_tis(struct mlx5_core_dev *dev, u32 tisn, u32 *in)
 {
-	u32 out[MLX5_ST_SZ_DW(modify_tis_out)] = {0};
-
 	MLX5_SET(modify_tis_in, in, tisn, tisn);
 	MLX5_SET(modify_tis_in, in, opcode, MLX5_CMD_OP_MODIFY_TIS);
 
-	return mlx5_cmd_exec(dev, in, inlen, out, sizeof(out));
+	return mlx5_cmd_exec_in(dev, modify_tis, in);
 }
 EXPORT_SYMBOL(mlx5_core_modify_tis);
 
 void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn)
 {
-	u32 in[MLX5_ST_SZ_DW(destroy_tis_in)]   = {0};
-	u32 out[MLX5_ST_SZ_DW(destroy_tis_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(destroy_tis_in)] = {};
 
 	MLX5_SET(destroy_tis_in, in, opcode, MLX5_CMD_OP_DESTROY_TIS);
 	MLX5_SET(destroy_tis_in, in, tisn, tisn);
-	mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	mlx5_cmd_exec_in(dev, destroy_tis, in);
 }
 EXPORT_SYMBOL(mlx5_core_destroy_tis);
 
 int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen,
 			 u32 *rqtn)
 {
-	u32 out[MLX5_ST_SZ_DW(create_rqt_out)] = {0};
+	u32 out[MLX5_ST_SZ_DW(create_rqt_out)] = {};
 	int err;
 
 	MLX5_SET(create_rqt_in, in, opcode, MLX5_CMD_OP_CREATE_RQT);
@@ -284,7 +254,7 @@ EXPORT_SYMBOL(mlx5_core_create_rqt);
 int mlx5_core_modify_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 *in,
 			 int inlen)
 {
-	u32 out[MLX5_ST_SZ_DW(modify_rqt_out)] = {0};
+	u32 out[MLX5_ST_SZ_DW(modify_rqt_out)] = {};
 
 	MLX5_SET(modify_rqt_in, in, rqtn, rqtn);
 	MLX5_SET(modify_rqt_in, in, opcode, MLX5_CMD_OP_MODIFY_RQT);
@@ -293,12 +263,11 @@ int mlx5_core_modify_rqt(struct mlx5_core_dev *dev, u32 rqtn, u32 *in,
 
 void mlx5_core_destroy_rqt(struct mlx5_core_dev *dev, u32 rqtn)
 {
-	u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)]   = {0};
-	u32 out[MLX5_ST_SZ_DW(destroy_rqt_out)] = {0};
+	u32 in[MLX5_ST_SZ_DW(destroy_rqt_in)] = {};
 
 	MLX5_SET(destroy_rqt_in, in, opcode, MLX5_CMD_OP_DESTROY_RQT);
 	MLX5_SET(destroy_rqt_in, in, rqtn, rqtn);
-	mlx5_cmd_exec(dev, in, sizeof(in), out, sizeof(out));
+	mlx5_cmd_exec_in(dev, destroy_rqt, in);
 }
 EXPORT_SYMBOL(mlx5_core_destroy_rqt);
 
@@ -383,7 +352,7 @@ static int mlx5_hairpin_modify_rq(struct mlx5_core_dev *func_mdev, u32 rqn,
 				  int curr_state, int next_state,
 				  u16 peer_vhca, u32 peer_sq)
 {
-	u32 in[MLX5_ST_SZ_DW(modify_rq_in)] = {0};
+	u32 in[MLX5_ST_SZ_DW(modify_rq_in)] = {};
 	void *rqc;
 
 	rqc = MLX5_ADDR_OF(modify_rq_in, in, ctx);
@@ -396,8 +365,7 @@ static int mlx5_hairpin_modify_rq(struct mlx5_core_dev *func_mdev, u32 rqn,
 	MLX5_SET(modify_rq_in, in, rq_state, curr_state);
 	MLX5_SET(rqc, rqc, state, next_state);
 
-	return mlx5_core_modify_rq(func_mdev, rqn,
-				   in, MLX5_ST_SZ_BYTES(modify_rq_in));
+	return mlx5_core_modify_rq(func_mdev, rqn, in);
 }
 
 static int mlx5_hairpin_modify_sq(struct mlx5_core_dev *peer_mdev, u32 sqn,
@@ -417,8 +385,7 @@ static int mlx5_hairpin_modify_sq(struct mlx5_core_dev *peer_mdev, u32 sqn,
 	MLX5_SET(modify_sq_in, in, sq_state, curr_state);
 	MLX5_SET(sqc, sqc, state, next_state);
 
-	return mlx5_core_modify_sq(peer_mdev, sqn,
-				   in, MLX5_ST_SZ_BYTES(modify_sq_in));
+	return mlx5_core_modify_sq(peer_mdev, sqn, in);
 }
 
 static int mlx5_hairpin_pair_queues(struct mlx5_hairpin *hp)
diff --git a/include/linux/mlx5/transobj.h b/include/linux/mlx5/transobj.h
index dc6b1e7cb8c4..028f442530cf 100644
--- a/include/linux/mlx5/transobj.h
+++ b/include/linux/mlx5/transobj.h
@@ -39,27 +39,20 @@ int mlx5_core_alloc_transport_domain(struct mlx5_core_dev *dev, u32 *tdn);
 void mlx5_core_dealloc_transport_domain(struct mlx5_core_dev *dev, u32 tdn);
 int mlx5_core_create_rq(struct mlx5_core_dev *dev, u32 *in, int inlen,
 			u32 *rqn);
-int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in, int inlen);
+int mlx5_core_modify_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *in);
 void mlx5_core_destroy_rq(struct mlx5_core_dev *dev, u32 rqn);
 int mlx5_core_query_rq(struct mlx5_core_dev *dev, u32 rqn, u32 *out);
 int mlx5_core_create_sq(struct mlx5_core_dev *dev, u32 *in, int inlen,
 			u32 *sqn);
-int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in, int inlen);
+int mlx5_core_modify_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *in);
 void mlx5_core_destroy_sq(struct mlx5_core_dev *dev, u32 sqn);
 int mlx5_core_query_sq(struct mlx5_core_dev *dev, u32 sqn, u32 *out);
 int mlx5_core_query_sq_state(struct mlx5_core_dev *dev, u32 sqn, u8 *state);
-int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, int inlen,
-			 u32 *tirn);
-int mlx5_core_create_tir_out(struct mlx5_core_dev *dev,
-			     u32 *in, int inlen,
-			     u32 *out, int outlen);
-int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in,
-			 int inlen);
+int mlx5_core_create_tir(struct mlx5_core_dev *dev, u32 *in, u32 *tirn);
+int mlx5_core_modify_tir(struct mlx5_core_dev *dev, u32 tirn, u32 *in);
 void mlx5_core_destroy_tir(struct mlx5_core_dev *dev, u32 tirn);
-int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, int inlen,
-			 u32 *tisn);
-int mlx5_core_modify_tis(struct mlx5_core_dev *dev, u32 tisn, u32 *in,
-			 int inlen);
+int mlx5_core_create_tis(struct mlx5_core_dev *dev, u32 *in, u32 *tisn);
+int mlx5_core_modify_tis(struct mlx5_core_dev *dev, u32 tisn, u32 *in);
 void mlx5_core_destroy_tis(struct mlx5_core_dev *dev, u32 tisn);
 int mlx5_core_create_rqt(struct mlx5_core_dev *dev, u32 *in, int inlen,
 			 u32 *rqtn);
-- 
cgit v1.2.3-59-g8ed1b


From b75326c201242de9495ff98e5d5cff41d7fc0d9d Mon Sep 17 00:00:00 2001
From: Fernando Gont <fgont@si6networks.com>
Date: Sun, 19 Apr 2020 09:24:57 -0300
Subject: ipv6: Honor all IPv6 PIO Valid Lifetime values

RFC4862 5.5.3 e) prevents received Router Advertisements from reducing
the Valid Lifetime of configured addresses to less than two hours, thus
preventing hosts from reacting to the information provided by a router
that has positive knowledge that a prefix has become invalid.

This patch makes hosts honor all Valid Lifetime values, as per
draft-gont-6man-slaac-renum-06, Section 4.2. This is meant to help
mitigate the problem discussed in draft-ietf-v6ops-slaac-renum.

Note: Attacks aiming at disabling an advertised prefix via a Valid
Lifetime of 0 are not really more harmful than other attacks
that can be performed via forged RA messages, such as those
aiming at completely disabling a next-hop router via an RA that
advertises a Router Lifetime of 0, or performing a Denial of
Service (DoS) attack by advertising illegitimate prefixes via
forged PIOs.  In scenarios where RA-based attacks are of concern,
proper mitigations such as RA-Guard [RFC6105] [RFC7113] should
be implemented.

Signed-off-by: Fernando Gont <fgont@si6networks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/addrconf.h |  2 --
 net/ipv6/addrconf.c    | 27 +++++++--------------------
 2 files changed, 7 insertions(+), 22 deletions(-)

diff --git a/include/net/addrconf.h b/include/net/addrconf.h
index e0eabe58aa8b..fdb07105384c 100644
--- a/include/net/addrconf.h
+++ b/include/net/addrconf.h
@@ -6,8 +6,6 @@
 #define RTR_SOLICITATION_INTERVAL	(4*HZ)
 #define RTR_SOLICITATION_MAX_INTERVAL	(3600*HZ)	/* 1 hour */
 
-#define MIN_VALID_LIFETIME		(2*3600)	/* 2 hours */
-
 #define TEMP_VALID_LIFETIME		(7*86400)
 #define TEMP_PREFERRED_LIFETIME		(86400)
 #define REGEN_MAX_RETRY			(3)
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 24e319dfb510..27b4fb6e452b 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2564,7 +2564,7 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
 				 __u32 valid_lft, u32 prefered_lft)
 {
 	struct inet6_ifaddr *ifp = ipv6_get_ifaddr(net, addr, dev, 1);
-	int create = 0, update_lft = 0;
+	int create = 0;
 
 	if (!ifp && valid_lft) {
 		int max_addresses = in6_dev->cnf.max_addresses;
@@ -2608,32 +2608,19 @@ int addrconf_prefix_rcv_add_addr(struct net *net, struct net_device *dev,
 		unsigned long now;
 		u32 stored_lft;
 
-		/* update lifetime (RFC2462 5.5.3 e) */
+		/* Update lifetime (RFC4862 5.5.3 e)
+		 * We deviate from RFC4862 by honoring all Valid Lifetimes to
+		 * improve the reaction of SLAAC to renumbering events
+		 * (draft-gont-6man-slaac-renum-06, Section 4.2)
+		 */
 		spin_lock_bh(&ifp->lock);
 		now = jiffies;
 		if (ifp->valid_lft > (now - ifp->tstamp) / HZ)
 			stored_lft = ifp->valid_lft - (now - ifp->tstamp) / HZ;
 		else
 			stored_lft = 0;
-		if (!create && stored_lft) {
-			const u32 minimum_lft = min_t(u32,
-				stored_lft, MIN_VALID_LIFETIME);
-			valid_lft = max(valid_lft, minimum_lft);
-
-			/* RFC4862 Section 5.5.3e:
-			 * "Note that the preferred lifetime of the
-			 *  corresponding address is always reset to
-			 *  the Preferred Lifetime in the received
-			 *  Prefix Information option, regardless of
-			 *  whether the valid lifetime is also reset or
-			 *  ignored."
-			 *
-			 * So we should always update prefered_lft here.
-			 */
-			update_lft = 1;
-		}
 
-		if (update_lft) {
+		if (!create && stored_lft) {
 			ifp->valid_lft = valid_lft;
 			ifp->prefered_lft = prefered_lft;
 			ifp->tstamp = now;
-- 
cgit v1.2.3-59-g8ed1b


From e131a5634830047923c694b4ce0c3b31745ff01b Mon Sep 17 00:00:00 2001
From: Alexander Lobakin <bloodyreaper@yandex.ru>
Date: Tue, 21 Apr 2020 16:41:08 +0300
Subject: net: dsa: add GRO support via gro_cells

gro_cells lib is used by different encapsulating netdevices, such as
geneve, macsec, vxlan etc. to speed up decapsulated traffic processing.
CPU tag is a sort of "encapsulation", and we can use the same mechs to
greatly improve overall DSA performance.
skbs are passed to the GRO layer after removing CPU tags, so we don't
need any new packet offload types as it was firstly proposed by me in
the first GRO-over-DSA variant [1].

The size of struct gro_cells is sizeof(void *), so hot struct
dsa_slave_priv becomes only 4/8 bytes bigger, and all critical fields
remain in one 32-byte cacheline.
The other positive side effect is that drivers for network devices
that can be shipped as CPU ports of DSA-driven switches can now use
napi_gro_frags() to pass skbs to kernel. Packets built that way are
completely non-linear and are likely being dropped without GRO.

This was tested on to-be-mainlined-soon Ethernet driver that uses
napi_gro_frags(), and the overall performance was on par with the
variant from [1], sometimes even better due to minimal overhead.
net.core.gro_normal_batch tuning may help to push it to the limit
on particular setups and platforms.

iperf3 IPoE VLAN NAT TCP forwarding (port1.218 -> port0) setup
on 1.2 GHz MIPS board:

5.7-rc2 baseline:

[ID]  Interval         Transfer     Bitrate        Retr
[ 5]  0.00-120.01 sec  9.00 GBytes  644 Mbits/sec  413  sender
[ 5]  0.00-120.00 sec  8.99 GBytes  644 Mbits/sec       receiver

Iface      RX packets  TX packets
eth0       7097731     7097702
port0      426050      6671829
port1      6671681     425862
port1.218  6671677     425851

With this patch:

[ID]  Interval         Transfer     Bitrate        Retr
[ 5]  0.00-120.01 sec  12.2 GBytes  870 Mbits/sec  122  sender
[ 5]  0.00-120.00 sec  12.2 GBytes  870 Mbits/sec       receiver

Iface      RX packets  TX packets
eth0       9474792     9474777
port0      455200      353288
port1      9019592     455035
port1.218  353144      455024

v2:
 - Add some performance examples in the commit message;
 - No functional changes.

[1] https://lore.kernel.org/netdev/20191230143028.27313-1-alobakin@dlink.ru/

Signed-off-by: Alexander Lobakin <bloodyreaper@yandex.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/Kconfig    |  1 +
 net/dsa/dsa.c      |  2 +-
 net/dsa/dsa_priv.h |  3 +++
 net/dsa/slave.c    | 10 +++++++++-
 4 files changed, 14 insertions(+), 2 deletions(-)

diff --git a/net/dsa/Kconfig b/net/dsa/Kconfig
index 92663dcb3aa2..739613070d07 100644
--- a/net/dsa/Kconfig
+++ b/net/dsa/Kconfig
@@ -9,6 +9,7 @@ menuconfig NET_DSA
 	tristate "Distributed Switch Architecture"
 	depends on HAVE_NET_DSA
 	depends on BRIDGE || BRIDGE=n
+	select GRO_CELLS
 	select NET_SWITCHDEV
 	select PHYLINK
 	select NET_DEVLINK
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index ee2610c4d46a..0384a911779e 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -234,7 +234,7 @@ static int dsa_switch_rcv(struct sk_buff *skb, struct net_device *dev,
 	if (dsa_skb_defer_rx_timestamp(p, skb))
 		return 0;
 
-	netif_receive_skb(skb);
+	gro_cells_receive(&p->gcells, skb);
 
 	return 0;
 }
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 904cc7c9b882..6d9a1ef65fa0 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -11,6 +11,7 @@
 #include <linux/netdevice.h>
 #include <linux/netpoll.h>
 #include <net/dsa.h>
+#include <net/gro_cells.h>
 
 enum {
 	DSA_NOTIFIER_AGEING_TIME,
@@ -77,6 +78,8 @@ struct dsa_slave_priv {
 
 	struct pcpu_sw_netstats	*stats64;
 
+	struct gro_cells	gcells;
+
 	/* DSA port data, such as switch, port index, etc. */
 	struct dsa_port		*dp;
 
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index e94eb1aac602..f2c241cf3a80 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1762,6 +1762,11 @@ int dsa_slave_create(struct dsa_port *port)
 		free_netdev(slave_dev);
 		return -ENOMEM;
 	}
+
+	ret = gro_cells_init(&p->gcells, slave_dev);
+	if (ret)
+		goto out_free;
+
 	p->dp = port;
 	INIT_LIST_HEAD(&p->mall_tc_list);
 	p->xmit = cpu_dp->tag_ops->xmit;
@@ -1781,7 +1786,7 @@ int dsa_slave_create(struct dsa_port *port)
 	ret = dsa_slave_phy_setup(slave_dev);
 	if (ret) {
 		netdev_err(master, "error %d setting up slave phy\n", ret);
-		goto out_free;
+		goto out_gcells;
 	}
 
 	dsa_slave_notify(slave_dev, DSA_PORT_REGISTER);
@@ -1800,6 +1805,8 @@ out_phy:
 	phylink_disconnect_phy(p->dp->pl);
 	rtnl_unlock();
 	phylink_destroy(p->dp->pl);
+out_gcells:
+	gro_cells_destroy(&p->gcells);
 out_free:
 	free_percpu(p->stats64);
 	free_netdev(slave_dev);
@@ -1820,6 +1827,7 @@ void dsa_slave_destroy(struct net_device *slave_dev)
 	dsa_slave_notify(slave_dev, DSA_PORT_UNREGISTER);
 	unregister_netdev(slave_dev);
 	phylink_destroy(dp->pl);
+	gro_cells_destroy(&p->gcells);
 	free_percpu(p->stats64);
 	free_netdev(slave_dev);
 }
-- 
cgit v1.2.3-59-g8ed1b


From 2196d831205bad6bc5cd328baf0ae02234629695 Mon Sep 17 00:00:00 2001
From: Sudarsana Reddy Kalluru <skalluru@marvell.com>
Date: Wed, 22 Apr 2020 06:16:06 -0700
Subject: qed: Enable device error reporting capability.

The patch enables the device to send error messages to root port when
an error is detected.

Signed-off-by: Sudarsana Reddy Kalluru <skalluru@marvell.com>
Signed-off-by: Ariel Elior <aelior@marvell.com>
Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_main.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 96356e897c80..38a1d26ca9db 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -49,6 +49,7 @@
 #include <linux/qed/qed_if.h>
 #include <linux/qed/qed_ll2_if.h>
 #include <net/devlink.h>
+#include <linux/aer.h>
 
 #include "qed.h"
 #include "qed_sriov.h"
@@ -129,6 +130,8 @@ static void qed_free_pci(struct qed_dev *cdev)
 {
 	struct pci_dev *pdev = cdev->pdev;
 
+	pci_disable_pcie_error_reporting(pdev);
+
 	if (cdev->doorbells && cdev->db_size)
 		iounmap(cdev->doorbells);
 	if (cdev->regview)
@@ -231,6 +234,12 @@ static int qed_init_pci(struct qed_dev *cdev, struct pci_dev *pdev)
 		return -ENOMEM;
 	}
 
+	/* AER (Advanced Error reporting) configuration */
+	rc = pci_enable_pcie_error_reporting(pdev);
+	if (rc)
+		DP_VERBOSE(cdev, NETIF_MSG_DRV,
+			   "Failed to configure PCIe AER [%d]\n", rc);
+
 	return 0;
 
 err2:
-- 
cgit v1.2.3-59-g8ed1b


From 731815e720ae7e47a19753e00ea80651b2d52b3b Mon Sep 17 00:00:00 2001
From: Sudarsana Reddy Kalluru <skalluru@marvell.com>
Date: Wed, 22 Apr 2020 06:16:07 -0700
Subject: qede: Add support for handling the pcie errors.

The error recovery is handled by management firmware (MFW) with the help of
qed/qede drivers. Upon detecting the errors, driver informs MFW about this
event which in turn starts a recovery process. MFW sends ERROR_RECOVERY
notification to the driver which performs the required cleanup/recovery
from the driver side.

Signed-off-by: Sudarsana Reddy Kalluru <skalluru@marvell.com>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qede/qede.h      |  1 +
 drivers/net/ethernet/qlogic/qede/qede_main.c | 68 +++++++++++++++++++++++++++-
 2 files changed, 68 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
index 234c6f30effb..1a708f95ce94 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -485,6 +485,7 @@ struct qede_fastpath {
 
 #define QEDE_SP_RECOVERY		0
 #define QEDE_SP_RX_MODE			1
+#define QEDE_SP_AER			7
 
 #ifdef CONFIG_RFS_ACCEL
 int qede_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 34fa3917eb33..9b456198cb50 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -60,6 +60,7 @@
 #include <net/ip6_checksum.h>
 #include <linux/bitops.h>
 #include <linux/vmalloc.h>
+#include <linux/aer.h>
 #include "qede.h"
 #include "qede_ptp.h"
 
@@ -124,6 +125,8 @@ static const struct pci_device_id qede_pci_tbl[] = {
 MODULE_DEVICE_TABLE(pci, qede_pci_tbl);
 
 static int qede_probe(struct pci_dev *pdev, const struct pci_device_id *id);
+static pci_ers_result_t
+qede_io_error_detected(struct pci_dev *pdev, pci_channel_state_t state);
 
 #define TX_TIMEOUT		(5 * HZ)
 
@@ -203,6 +206,10 @@ static int qede_sriov_configure(struct pci_dev *pdev, int num_vfs_param)
 }
 #endif
 
+static const struct pci_error_handlers qede_err_handler = {
+	.error_detected = qede_io_error_detected,
+};
+
 static struct pci_driver qede_pci_driver = {
 	.name = "qede",
 	.id_table = qede_pci_tbl,
@@ -212,6 +219,7 @@ static struct pci_driver qede_pci_driver = {
 #ifdef CONFIG_QED_SRIOV
 	.sriov_configure = qede_sriov_configure,
 #endif
+	.err_handler = &qede_err_handler,
 };
 
 static struct qed_eth_cb_ops qede_ll_ops = {
@@ -974,7 +982,8 @@ static void qede_sp_task(struct work_struct *work)
 		/* SRIOV must be disabled outside the lock to avoid a deadlock.
 		 * The recovery of the active VFs is currently not supported.
 		 */
-		qede_sriov_configure(edev->pdev, 0);
+		if (pci_num_vf(edev->pdev))
+			qede_sriov_configure(edev->pdev, 0);
 #endif
 		qede_lock(edev);
 		qede_recovery_handler(edev);
@@ -994,6 +1003,17 @@ static void qede_sp_task(struct work_struct *work)
 	}
 #endif
 	__qede_unlock(edev);
+
+	if (test_and_clear_bit(QEDE_SP_AER, &edev->sp_flags)) {
+#ifdef CONFIG_QED_SRIOV
+		/* SRIOV must be disabled outside the lock to avoid a deadlock.
+		 * The recovery of the active VFs is currently not supported.
+		 */
+		if (pci_num_vf(edev->pdev))
+			qede_sriov_configure(edev->pdev, 0);
+#endif
+		edev->ops->common->recovery_process(edev->cdev);
+	}
 }
 
 static void qede_update_pf_params(struct qed_dev *cdev)
@@ -2579,3 +2599,49 @@ static void qede_get_eth_tlv_data(void *dev, void *data)
 	etlv->num_txqs_full_set = true;
 	etlv->num_rxqs_full_set = true;
 }
+
+/**
+ * qede_io_error_detected - called when PCI error is detected
+ * @pdev: Pointer to PCI device
+ * @state: The current pci connection state
+ *
+ * This function is called after a PCI bus error affecting
+ * this device has been detected.
+ */
+static pci_ers_result_t
+qede_io_error_detected(struct pci_dev *pdev, pci_channel_state_t state)
+{
+	struct net_device *dev = pci_get_drvdata(pdev);
+	struct qede_dev *edev = netdev_priv(dev);
+
+	if (!edev)
+		return PCI_ERS_RESULT_NONE;
+
+	DP_NOTICE(edev, "IO error detected [%d]\n", state);
+
+	__qede_lock(edev);
+	if (edev->state == QEDE_STATE_RECOVERY) {
+		DP_NOTICE(edev, "Device already in the recovery state\n");
+		__qede_unlock(edev);
+		return PCI_ERS_RESULT_NONE;
+	}
+
+	/* PF handles the recovery of its VFs */
+	if (IS_VF(edev)) {
+		DP_VERBOSE(edev, QED_MSG_IOV,
+			   "VF recovery is handled by its PF\n");
+		__qede_unlock(edev);
+		return PCI_ERS_RESULT_RECOVERED;
+	}
+
+	/* Close OS Tx */
+	netif_tx_disable(edev->ndev);
+	netif_carrier_off(edev->ndev);
+
+	set_bit(QEDE_SP_AER, &edev->sp_flags);
+	schedule_delayed_work(&edev->sp_task, 0);
+
+	__qede_unlock(edev);
+
+	return PCI_ERS_RESULT_CAN_RECOVER;
+}
-- 
cgit v1.2.3-59-g8ed1b


From 6f8b12d661d09b488b9ac879b8eafbd2cc4a1450 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 22 Apr 2020 09:13:27 -0700
Subject: net: napi: add hard irqs deferral feature

Back in commit 3b47d30396ba ("net: gro: add a per device gro flush timer")
we added the ability to arm one high resolution timer, that we used
to keep not-complete packets in GRO engine a bit longer, hoping that further
frames might be added to them.

Since then, we added the napi_complete_done() interface, and commit
364b6055738b ("net: busy-poll: return busypolling status to drivers")
allowed drivers to avoid re-arming NIC interrupts if we made a promise
that their NAPI poll() handler would be called in the near future.

This infrastructure can be leveraged, thanks to a new device parameter,
which allows to arm the napi hrtimer, instead of re-arming the device
hard IRQ.

We have noticed that on some servers with 32 RX queues or more, the chit-chat
between the NIC and the host caused by IRQ delivery and re-arming could hurt
throughput by ~20% on 100Gbit NIC.

In contrast, hrtimers are using local (percpu) resources and might have lower
cost.

The new tunable, named napi_defer_hard_irqs, is placed in the same hierarchy
than gro_flush_timeout (/sys/class/net/ethX/)

By default, both gro_flush_timeout and napi_defer_hard_irqs are zero.

This patch does not change the prior behavior of gro_flush_timeout
if used alone : NIC hard irqs should be rearmed as before.

One concrete usage can be :

echo 20000 >/sys/class/net/eth1/gro_flush_timeout
echo 10 >/sys/class/net/eth1/napi_defer_hard_irqs

If at least one packet is retired, then we will reset napi counter
to 10 (napi_defer_hard_irqs), ensuring at least 10 periodic scans
of the queue.

On busy queues, this should avoid NIC hard IRQ, while before this patch IRQ
avoidance was only possible if napi->poll() was exhausting its budget
and not call napi_complete_done().

This feature also can be used to work around some non-optimal NIC irq
coalescing strategies.

Having the ability to insert XX usec delays between each napi->poll()
can increase cache efficiency, since we increase batch sizes.

It also keeps serving cpus not idle too long, reducing tail latencies.

Co-developed-by: Luigi Rizzo <lrizzo@google.com>
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  2 ++
 net/core/dev.c            | 29 ++++++++++++++++++-----------
 net/core/net-sysfs.c      | 18 ++++++++++++++++++
 3 files changed, 38 insertions(+), 11 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 0750b54b3765..5a8d40f1ffe2 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -329,6 +329,7 @@ struct napi_struct {
 
 	unsigned long		state;
 	int			weight;
+	int			defer_hard_irqs_count;
 	unsigned long		gro_bitmask;
 	int			(*poll)(struct napi_struct *, int);
 #ifdef CONFIG_NETPOLL
@@ -1995,6 +1996,7 @@ struct net_device {
 
 	struct bpf_prog __rcu	*xdp_prog;
 	unsigned long		gro_flush_timeout;
+	int			napi_defer_hard_irqs;
 	rx_handler_func_t __rcu	*rx_handler;
 	void __rcu		*rx_handler_data;
 
diff --git a/net/core/dev.c b/net/core/dev.c
index fb61522b1ce1..67585484ad32 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6227,7 +6227,8 @@ EXPORT_SYMBOL(__napi_schedule_irqoff);
 
 bool napi_complete_done(struct napi_struct *n, int work_done)
 {
-	unsigned long flags, val, new;
+	unsigned long flags, val, new, timeout = 0;
+	bool ret = true;
 
 	/*
 	 * 1) Don't let napi dequeue from the cpu poll list
@@ -6239,20 +6240,23 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
 				 NAPIF_STATE_IN_BUSY_POLL)))
 		return false;
 
-	if (n->gro_bitmask) {
-		unsigned long timeout = 0;
-
-		if (work_done)
+	if (work_done) {
+		if (n->gro_bitmask)
 			timeout = n->dev->gro_flush_timeout;
-
+		n->defer_hard_irqs_count = n->dev->napi_defer_hard_irqs;
+	}
+	if (n->defer_hard_irqs_count > 0) {
+		n->defer_hard_irqs_count--;
+		timeout = n->dev->gro_flush_timeout;
+		if (timeout)
+			ret = false;
+	}
+	if (n->gro_bitmask) {
 		/* When the NAPI instance uses a timeout and keeps postponing
 		 * it, we need to bound somehow the time packets are kept in
 		 * the GRO layer
 		 */
 		napi_gro_flush(n, !!timeout);
-		if (timeout)
-			hrtimer_start(&n->timer, ns_to_ktime(timeout),
-				      HRTIMER_MODE_REL_PINNED);
 	}
 
 	gro_normal_list(n);
@@ -6284,7 +6288,10 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
 		return false;
 	}
 
-	return true;
+	if (timeout)
+		hrtimer_start(&n->timer, ns_to_ktime(timeout),
+			      HRTIMER_MODE_REL_PINNED);
+	return ret;
 }
 EXPORT_SYMBOL(napi_complete_done);
 
@@ -6464,7 +6471,7 @@ static enum hrtimer_restart napi_watchdog(struct hrtimer *timer)
 	/* Note : we use a relaxed variant of napi_schedule_prep() not setting
 	 * NAPI_STATE_MISSED, since we do not react to a device IRQ.
 	 */
-	if (napi->gro_bitmask && !napi_disable_pending(napi) &&
+	if (!napi_disable_pending(napi) &&
 	    !test_and_set_bit(NAPI_STATE_SCHED, &napi->state))
 		__napi_schedule_irqoff(napi);
 
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 0d9e46de205e..f3b650cd0923 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -382,6 +382,23 @@ static ssize_t gro_flush_timeout_store(struct device *dev,
 }
 NETDEVICE_SHOW_RW(gro_flush_timeout, fmt_ulong);
 
+static int change_napi_defer_hard_irqs(struct net_device *dev, unsigned long val)
+{
+	dev->napi_defer_hard_irqs = val;
+	return 0;
+}
+
+static ssize_t napi_defer_hard_irqs_store(struct device *dev,
+					  struct device_attribute *attr,
+					  const char *buf, size_t len)
+{
+	if (!capable(CAP_NET_ADMIN))
+		return -EPERM;
+
+	return netdev_store(dev, attr, buf, len, change_napi_defer_hard_irqs);
+}
+NETDEVICE_SHOW_RW(napi_defer_hard_irqs, fmt_dec);
+
 static ssize_t ifalias_store(struct device *dev, struct device_attribute *attr,
 			     const char *buf, size_t len)
 {
@@ -545,6 +562,7 @@ static struct attribute *net_class_attrs[] __ro_after_init = {
 	&dev_attr_flags.attr,
 	&dev_attr_tx_queue_len.attr,
 	&dev_attr_gro_flush_timeout.attr,
+	&dev_attr_napi_defer_hard_irqs.attr,
 	&dev_attr_phys_port_id.attr,
 	&dev_attr_phys_port_name.attr,
 	&dev_attr_phys_switch_id.attr,
-- 
cgit v1.2.3-59-g8ed1b


From 7e417a66b86c110f4b282945dac82e21e0b08328 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 22 Apr 2020 09:13:28 -0700
Subject: net: napi: use READ_ONCE()/WRITE_ONCE()

gro_flush_timeout and napi_defer_hard_irqs can be read
from napi_complete_done() while other cpus write the value,
whithout explicit synchronization.

Use READ_ONCE()/WRITE_ONCE() to annotate the races.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/dev.c       | 6 +++---
 net/core/net-sysfs.c | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 67585484ad32..afff16849c26 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -6242,12 +6242,12 @@ bool napi_complete_done(struct napi_struct *n, int work_done)
 
 	if (work_done) {
 		if (n->gro_bitmask)
-			timeout = n->dev->gro_flush_timeout;
-		n->defer_hard_irqs_count = n->dev->napi_defer_hard_irqs;
+			timeout = READ_ONCE(n->dev->gro_flush_timeout);
+		n->defer_hard_irqs_count = READ_ONCE(n->dev->napi_defer_hard_irqs);
 	}
 	if (n->defer_hard_irqs_count > 0) {
 		n->defer_hard_irqs_count--;
-		timeout = n->dev->gro_flush_timeout;
+		timeout = READ_ONCE(n->dev->gro_flush_timeout);
 		if (timeout)
 			ret = false;
 	}
diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index f3b650cd0923..880e89c894f6 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -367,7 +367,7 @@ NETDEVICE_SHOW_RW(tx_queue_len, fmt_dec);
 
 static int change_gro_flush_timeout(struct net_device *dev, unsigned long val)
 {
-	dev->gro_flush_timeout = val;
+	WRITE_ONCE(dev->gro_flush_timeout, val);
 	return 0;
 }
 
@@ -384,7 +384,7 @@ NETDEVICE_SHOW_RW(gro_flush_timeout, fmt_ulong);
 
 static int change_napi_defer_hard_irqs(struct net_device *dev, unsigned long val)
 {
-	dev->napi_defer_hard_irqs = val;
+	WRITE_ONCE(dev->napi_defer_hard_irqs, val);
 	return 0;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From cf4058dbaa18bf8e55b7cb8c04e1c313298cd5b1 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 22 Apr 2020 09:13:29 -0700
Subject: net/mlx4_en: use napi_complete_done() in TX completion

In order to benefit from the new napi_defer_hard_irqs feature,
we need to use napi_complete_done() variant in this driver.

RX path is already using it, this patch implements TX completion side.

mlx4_en_process_tx_cq() now returns the amount of retired packets,
instead of a boolean, so that mlx4_en_poll_tx_cq() can pass
this value to napi_complete_done().

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_rx.c   |  2 +-
 drivers/net/ethernet/mellanox/mlx4/en_tx.c   | 20 ++++++++++----------
 drivers/net/ethernet/mellanox/mlx4/mlx4_en.h |  4 ++--
 3 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index db3552f2d087..787139219813 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -946,7 +946,7 @@ int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget)
 		xdp_tx_cq = priv->tx_cq[TX_XDP][cq->ring];
 		if (xdp_tx_cq->xdp_busy) {
 			clean_complete = mlx4_en_process_tx_cq(dev, xdp_tx_cq,
-							       budget);
+							       budget) < budget;
 			xdp_tx_cq->xdp_busy = !clean_complete;
 		}
 	}
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_tx.c b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
index 4d5ca302c067..a99d3ed49ed6 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_tx.c
@@ -382,8 +382,8 @@ int mlx4_en_free_tx_buf(struct net_device *dev, struct mlx4_en_tx_ring *ring)
 	return cnt;
 }
 
-bool mlx4_en_process_tx_cq(struct net_device *dev,
-			   struct mlx4_en_cq *cq, int napi_budget)
+int mlx4_en_process_tx_cq(struct net_device *dev,
+			  struct mlx4_en_cq *cq, int napi_budget)
 {
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	struct mlx4_cq *mcq = &cq->mcq;
@@ -405,7 +405,7 @@ bool mlx4_en_process_tx_cq(struct net_device *dev,
 	u32 ring_cons;
 
 	if (unlikely(!priv->port_up))
-		return true;
+		return 0;
 
 	netdev_txq_bql_complete_prefetchw(ring->tx_queue);
 
@@ -480,7 +480,7 @@ bool mlx4_en_process_tx_cq(struct net_device *dev,
 	WRITE_ONCE(ring->cons, ring_cons + txbbs_skipped);
 
 	if (cq->type == TX_XDP)
-		return done < budget;
+		return done;
 
 	netdev_tx_completed_queue(ring->tx_queue, packets, bytes);
 
@@ -492,7 +492,7 @@ bool mlx4_en_process_tx_cq(struct net_device *dev,
 		ring->wake_queue++;
 	}
 
-	return done < budget;
+	return done;
 }
 
 void mlx4_en_tx_irq(struct mlx4_cq *mcq)
@@ -512,14 +512,14 @@ int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget)
 	struct mlx4_en_cq *cq = container_of(napi, struct mlx4_en_cq, napi);
 	struct net_device *dev = cq->dev;
 	struct mlx4_en_priv *priv = netdev_priv(dev);
-	bool clean_complete;
+	int work_done;
 
-	clean_complete = mlx4_en_process_tx_cq(dev, cq, budget);
-	if (!clean_complete)
+	work_done = mlx4_en_process_tx_cq(dev, cq, budget);
+	if (work_done >= budget)
 		return budget;
 
-	napi_complete(napi);
-	mlx4_en_arm_cq(priv, cq);
+	if (napi_complete_done(napi, work_done))
+		mlx4_en_arm_cq(priv, cq);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
index 630f15977f09..9f5603612960 100644
--- a/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
+++ b/drivers/net/ethernet/mellanox/mlx4/mlx4_en.h
@@ -737,8 +737,8 @@ int mlx4_en_process_rx_cq(struct net_device *dev,
 			  int budget);
 int mlx4_en_poll_rx_cq(struct napi_struct *napi, int budget);
 int mlx4_en_poll_tx_cq(struct napi_struct *napi, int budget);
-bool mlx4_en_process_tx_cq(struct net_device *dev,
-			   struct mlx4_en_cq *cq, int napi_budget);
+int mlx4_en_process_tx_cq(struct net_device *dev,
+			  struct mlx4_en_cq *cq, int napi_budget);
 u32 mlx4_en_free_tx_desc(struct mlx4_en_priv *priv,
 			 struct mlx4_en_tx_ring *ring,
 			 int index, u64 timestamp,
-- 
cgit v1.2.3-59-g8ed1b


From 79d6e755a45486ffb14bf0ed752e6ace20334cda Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Thu, 23 Apr 2020 17:20:13 +0300
Subject: net: ethernet: ti: cpts: use dev_yy() api for logs

Use dev_yy() API instead of pr_yy() for log outputs.

Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/cpts.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index 729ce09dded9..445f445185df 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -71,7 +71,7 @@ static int cpts_purge_events(struct cpts *cpts)
 	}
 
 	if (removed)
-		pr_debug("cpts: event pool cleaned up %d\n", removed);
+		dev_dbg(cpts->dev, "cpts: event pool cleaned up %d\n", removed);
 	return removed ? 0 : -1;
 }
 
@@ -150,7 +150,7 @@ static int cpts_fifo_read(struct cpts *cpts, int match)
 			break;
 
 		if (list_empty(&cpts->pool) && cpts_purge_events(cpts)) {
-			pr_err("cpts: event pool empty\n");
+			dev_warn(cpts->dev, "cpts: event pool empty\n");
 			return -1;
 		}
 
@@ -178,7 +178,7 @@ static int cpts_fifo_read(struct cpts *cpts, int match)
 		case CPTS_EV_HW:
 			break;
 		default:
-			pr_err("cpts: unknown event type\n");
+			dev_err(cpts->dev, "cpts: unknown event type\n");
 			break;
 		}
 		if (type == match)
@@ -196,7 +196,7 @@ static u64 cpts_systim_read(const struct cyclecounter *cc)
 
 	cpts_write32(cpts, TS_PUSH, ts_push);
 	if (cpts_fifo_read(cpts, CPTS_EV_PUSH))
-		pr_err("cpts: unable to obtain a time stamp\n");
+		dev_err(cpts->dev, "cpts: unable to obtain a time stamp\n");
 
 	list_for_each_safe(this, next, &cpts->events) {
 		event = list_entry(this, struct cpts_event, list);
@@ -307,8 +307,8 @@ static long cpts_overflow_check(struct ptp_clock_info *ptp)
 	}
 	spin_unlock_irqrestore(&cpts->lock, flags);
 
-	pr_debug("cpts overflow check at %lld.%09ld\n",
-		 (long long)ts.tv_sec, ts.tv_nsec);
+	dev_dbg(cpts->dev, "cpts overflow check at %lld.%09ld\n",
+		(long long)ts.tv_sec, ts.tv_nsec);
 	return (long)delay;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From e66dccced0cfd59a4dc4c16409b713332b882fa6 Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Thu, 23 Apr 2020 17:20:14 +0300
Subject: net: ethernet: ti: cpts: separate hw counter read from timecounter

Now CPTS HW time reading code is implemented in timecounter->cyclecounter
.read() callback and performs following operations:
timecounter_read() ->cc.read() -> cpts_systim_read()
 - request current CPTS HW time CPTS_TS_PUSH.TS_PUSH = 1
 - poll CPTS FIFO for CPTS_EV_PUSH event with current HW timestamp

This approach need to be changed for the future switch to PTP PHC
.gettimex64() callback, which require to separate requesting current CPTS
HW time and processing CPTS FIFO. And for the follow up patch, which
improves .adjfreq() implementation.

This patch moves code accessing CPTS HW out of timecounter code as
following:
- convert HW timestamp of every CPTS event to PTP time (us) and store it as
part struct cpts_event;
- add CPTS context field to store current CPTS HW time (counter) value and
update it on CPTS_EV_PUSH reception;
- move code accessing CPTS HW out of timecounter code and use current CPTS
HW time (counter) from CPTS context instead;
- ensure timecounter->cycle_last is updated on CPTS_EV_PUSH reception.

After this change CPTS timecounter will only perform timekeeper role
without actually accessing CPTS HW.

Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/cpts.c | 53 +++++++++++++++++++++---------------------
 drivers/net/ethernet/ti/cpts.h |  2 ++
 2 files changed, 29 insertions(+), 26 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index 445f445185df..f40a864d8c36 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -112,10 +112,8 @@ static bool cpts_match_tx_ts(struct cpts *cpts, struct cpts_event *event)
 					(struct cpts_skb_cb_data *)skb->cb;
 
 		if (cpts_match(skb, class, seqid, mtype)) {
-			u64 ns = timecounter_cyc2time(&cpts->tc, event->low);
-
 			memset(&ssh, 0, sizeof(ssh));
-			ssh.hwtstamp = ns_to_ktime(ns);
+			ssh.hwtstamp = ns_to_ktime(event->timestamp);
 			skb_tstamp_tx(skb, &ssh);
 			found = true;
 			__skb_unlink(skb, &cpts->txq);
@@ -158,8 +156,16 @@ static int cpts_fifo_read(struct cpts *cpts, int match)
 		event->tmo = jiffies + 2;
 		event->high = hi;
 		event->low = lo;
+		event->timestamp = timecounter_cyc2time(&cpts->tc, event->low);
 		type = event_type(event);
+
+		dev_dbg(cpts->dev, "CPTS_EV: %d high:%08X low:%08x\n",
+			type, event->high, event->low);
 		switch (type) {
+		case CPTS_EV_PUSH:
+			WRITE_ONCE(cpts->cur_timestamp, lo);
+			timecounter_read(&cpts->tc);
+			break;
 		case CPTS_EV_TX:
 			if (cpts_match_tx_ts(cpts, event)) {
 				/* if the new event matches an existing skb,
@@ -168,7 +174,6 @@ static int cpts_fifo_read(struct cpts *cpts, int match)
 				break;
 			}
 			/* fall through */
-		case CPTS_EV_PUSH:
 		case CPTS_EV_RX:
 			list_del_init(&event->list);
 			list_add_tail(&event->list, &cpts->events);
@@ -189,26 +194,17 @@ static int cpts_fifo_read(struct cpts *cpts, int match)
 
 static u64 cpts_systim_read(const struct cyclecounter *cc)
 {
-	u64 val = 0;
-	struct cpts_event *event;
-	struct list_head *this, *next;
 	struct cpts *cpts = container_of(cc, struct cpts, cc);
 
-	cpts_write32(cpts, TS_PUSH, ts_push);
-	if (cpts_fifo_read(cpts, CPTS_EV_PUSH))
-		dev_err(cpts->dev, "cpts: unable to obtain a time stamp\n");
+	return READ_ONCE(cpts->cur_timestamp);
+}
 
-	list_for_each_safe(this, next, &cpts->events) {
-		event = list_entry(this, struct cpts_event, list);
-		if (event_type(event) == CPTS_EV_PUSH) {
-			list_del_init(&event->list);
-			list_add(&event->list, &cpts->pool);
-			val = event->low;
-			break;
-		}
-	}
+static void cpts_update_cur_time(struct cpts *cpts, int match)
+{
+	cpts_write32(cpts, TS_PUSH, ts_push);
 
-	return val;
+	if (cpts_fifo_read(cpts, match) && match != -1)
+		dev_err(cpts->dev, "cpts: unable to obtain a time stamp\n");
 }
 
 /* PTP clock operations */
@@ -232,7 +228,7 @@ static int cpts_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
 
 	spin_lock_irqsave(&cpts->lock, flags);
 
-	timecounter_read(&cpts->tc);
+	cpts_update_cur_time(cpts, CPTS_EV_PUSH);
 
 	cpts->cc.mult = neg_adj ? mult - diff : mult + diff;
 
@@ -260,6 +256,9 @@ static int cpts_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
 	struct cpts *cpts = container_of(ptp, struct cpts, info);
 
 	spin_lock_irqsave(&cpts->lock, flags);
+
+	cpts_update_cur_time(cpts, CPTS_EV_PUSH);
+
 	ns = timecounter_read(&cpts->tc);
 	spin_unlock_irqrestore(&cpts->lock, flags);
 
@@ -294,11 +293,14 @@ static long cpts_overflow_check(struct ptp_clock_info *ptp)
 {
 	struct cpts *cpts = container_of(ptp, struct cpts, info);
 	unsigned long delay = cpts->ov_check_period;
-	struct timespec64 ts;
 	unsigned long flags;
+	u64 ns;
 
 	spin_lock_irqsave(&cpts->lock, flags);
-	ts = ns_to_timespec64(timecounter_read(&cpts->tc));
+
+	cpts_update_cur_time(cpts, -1);
+
+	ns = timecounter_read(&cpts->tc);
 
 	if (!skb_queue_empty(&cpts->txq)) {
 		cpts_purge_txq(cpts);
@@ -307,8 +309,7 @@ static long cpts_overflow_check(struct ptp_clock_info *ptp)
 	}
 	spin_unlock_irqrestore(&cpts->lock, flags);
 
-	dev_dbg(cpts->dev, "cpts overflow check at %lld.%09ld\n",
-		(long long)ts.tv_sec, ts.tv_nsec);
+	dev_dbg(cpts->dev, "cpts overflow check at %lld\n", ns);
 	return (long)delay;
 }
 
@@ -390,7 +391,7 @@ static u64 cpts_find_ts(struct cpts *cpts, struct sk_buff *skb, int ev_type)
 		seqid = (event->high >> SEQUENCE_ID_SHIFT) & SEQUENCE_ID_MASK;
 		if (ev_type == event_type(event) &&
 		    cpts_match(skb, class, seqid, mtype)) {
-			ns = timecounter_cyc2time(&cpts->tc, event->low);
+			ns = event->timestamp;
 			list_del_init(&event->list);
 			list_add(&event->list, &cpts->pool);
 			break;
diff --git a/drivers/net/ethernet/ti/cpts.h b/drivers/net/ethernet/ti/cpts.h
index bb997c11ee15..32ecd1ce4d3b 100644
--- a/drivers/net/ethernet/ti/cpts.h
+++ b/drivers/net/ethernet/ti/cpts.h
@@ -94,6 +94,7 @@ struct cpts_event {
 	unsigned long tmo;
 	u32 high;
 	u32 low;
+	u64 timestamp;
 };
 
 struct cpts {
@@ -114,6 +115,7 @@ struct cpts {
 	struct cpts_event pool_data[CPTS_MAX_EVENTS];
 	unsigned long ov_check_period;
 	struct sk_buff_head txq;
+	u64 cur_timestamp;
 };
 
 void cpts_rx_timestamp(struct cpts *cpts, struct sk_buff *skb);
-- 
cgit v1.2.3-59-g8ed1b


From 0d6df3e613b74fe1a88de89cda63a0352e1dc4eb Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Thu, 23 Apr 2020 17:20:15 +0300
Subject: net: ethernet: ti: cpts: move tc mult update in cpts_fifo_read()

Now CPTS driver .adjfreq() generates request to read CPTS current time
(CPTS_EV_PUSH) with intention to process all pending event using previous
frequency adjustment values before switching to the new ones. So
CPTS_EV_PUSH works as a marker to switch to the new frequency adjustment
values. Current code assumes that all job is done in .adjfreq(), but after
enabling IRQ this will not be true any more.

Hence save new frequency adjustment values (mult) and perform actual freq
adjustment in cpts_fifo_read() immediately after CPTS_EV_PUSH is received.

Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/cpts.c | 8 ++++++--
 drivers/net/ethernet/ti/cpts.h | 1 +
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index f40a864d8c36..a2974b542bed 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -165,6 +165,10 @@ static int cpts_fifo_read(struct cpts *cpts, int match)
 		case CPTS_EV_PUSH:
 			WRITE_ONCE(cpts->cur_timestamp, lo);
 			timecounter_read(&cpts->tc);
+			if (cpts->mult_new) {
+				cpts->cc.mult = cpts->mult_new;
+				cpts->mult_new = 0;
+			}
 			break;
 		case CPTS_EV_TX:
 			if (cpts_match_tx_ts(cpts, event)) {
@@ -228,9 +232,9 @@ static int cpts_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
 
 	spin_lock_irqsave(&cpts->lock, flags);
 
-	cpts_update_cur_time(cpts, CPTS_EV_PUSH);
+	cpts->mult_new = neg_adj ? mult - diff : mult + diff;
 
-	cpts->cc.mult = neg_adj ? mult - diff : mult + diff;
+	cpts_update_cur_time(cpts, CPTS_EV_PUSH);
 
 	spin_unlock_irqrestore(&cpts->lock, flags);
 
diff --git a/drivers/net/ethernet/ti/cpts.h b/drivers/net/ethernet/ti/cpts.h
index 32ecd1ce4d3b..421630049ee7 100644
--- a/drivers/net/ethernet/ti/cpts.h
+++ b/drivers/net/ethernet/ti/cpts.h
@@ -116,6 +116,7 @@ struct cpts {
 	unsigned long ov_check_period;
 	struct sk_buff_head txq;
 	u64 cur_timestamp;
+	u32 mult_new;
 };
 
 void cpts_rx_timestamp(struct cpts *cpts, struct sk_buff *skb);
-- 
cgit v1.2.3-59-g8ed1b


From 856e59ab7e6d3c85ee739f3f53341d47c88d454e Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Thu, 23 Apr 2020 17:20:16 +0300
Subject: net: ethernet: ti: cpts: switch to use new .gettimex64() interface

The CPTS HW latches and saves CPTS counter value in CPTS fifo immediately
after writing to CPSW_CPTS_PUSH.TS_PUSH (bit 0), so the total time that the
driver needs to read the CPTS timestamp is the time required CPSW_CPTS_PUSH
write to actually reach HW.

Hence switch CPTS driver to implement new .gettimex64() callback for more
precise measurement of the offset between a PHC and the system clock which
is measured as time between
  write(CPSW_CPTS_PUSH)
  read(CPSW_CPTS_PUSH)

Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/cpts.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index a2974b542bed..1f738bb3df74 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -203,9 +203,13 @@ static u64 cpts_systim_read(const struct cyclecounter *cc)
 	return READ_ONCE(cpts->cur_timestamp);
 }
 
-static void cpts_update_cur_time(struct cpts *cpts, int match)
+static void cpts_update_cur_time(struct cpts *cpts, int match,
+				 struct ptp_system_timestamp *sts)
 {
+	ptp_read_system_prets(sts);
 	cpts_write32(cpts, TS_PUSH, ts_push);
+	cpts_read32(cpts, ts_push);
+	ptp_read_system_postts(sts);
 
 	if (cpts_fifo_read(cpts, match) && match != -1)
 		dev_err(cpts->dev, "cpts: unable to obtain a time stamp\n");
@@ -234,7 +238,7 @@ static int cpts_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
 
 	cpts->mult_new = neg_adj ? mult - diff : mult + diff;
 
-	cpts_update_cur_time(cpts, CPTS_EV_PUSH);
+	cpts_update_cur_time(cpts, CPTS_EV_PUSH, NULL);
 
 	spin_unlock_irqrestore(&cpts->lock, flags);
 
@@ -253,15 +257,17 @@ static int cpts_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
 	return 0;
 }
 
-static int cpts_ptp_gettime(struct ptp_clock_info *ptp, struct timespec64 *ts)
+static int cpts_ptp_gettimeex(struct ptp_clock_info *ptp,
+			      struct timespec64 *ts,
+			      struct ptp_system_timestamp *sts)
 {
-	u64 ns;
-	unsigned long flags;
 	struct cpts *cpts = container_of(ptp, struct cpts, info);
+	unsigned long flags;
+	u64 ns;
 
 	spin_lock_irqsave(&cpts->lock, flags);
 
-	cpts_update_cur_time(cpts, CPTS_EV_PUSH);
+	cpts_update_cur_time(cpts, CPTS_EV_PUSH, sts);
 
 	ns = timecounter_read(&cpts->tc);
 	spin_unlock_irqrestore(&cpts->lock, flags);
@@ -302,7 +308,7 @@ static long cpts_overflow_check(struct ptp_clock_info *ptp)
 
 	spin_lock_irqsave(&cpts->lock, flags);
 
-	cpts_update_cur_time(cpts, -1);
+	cpts_update_cur_time(cpts, -1, NULL);
 
 	ns = timecounter_read(&cpts->tc);
 
@@ -326,7 +332,7 @@ static const struct ptp_clock_info cpts_info = {
 	.pps		= 0,
 	.adjfreq	= cpts_ptp_adjfreq,
 	.adjtime	= cpts_ptp_adjtime,
-	.gettime64	= cpts_ptp_gettime,
+	.gettimex64	= cpts_ptp_gettimeex,
 	.settime64	= cpts_ptp_settime,
 	.enable		= cpts_ptp_enable,
 	.do_aux_work	= cpts_overflow_check,
-- 
cgit v1.2.3-59-g8ed1b


From 3bfd41b57811d76412af57f4884e28ad78c2ab2f Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Thu, 23 Apr 2020 17:20:17 +0300
Subject: net: ethernet: ti: cpts: optimize packet to event matching

Now the CPTS driver performs packet (skb) parsing every time when it needs
to match packet to CPTS event (including ptp_classify_raw() calls).

This patch optimizes matching process by parsing packet only once upon
arrival and stores PTP specific data in skb->cb using the same fromat as in
CPTS HW event. As result, all future matching reduces to comparing two u32
values.

Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/cpts.c | 91 +++++++++++++++++++++++++++---------------
 1 file changed, 58 insertions(+), 33 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index 1f738bb3df74..6efb809d58ed 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -23,15 +23,13 @@
 #define CPTS_SKB_TX_WORK_TIMEOUT 1 /* jiffies */
 
 struct cpts_skb_cb_data {
+	u32 skb_mtype_seqid;
 	unsigned long tmo;
 };
 
 #define cpts_read32(c, r)	readl_relaxed(&c->reg->r)
 #define cpts_write32(c, v, r)	writel_relaxed(v, &c->reg->r)
 
-static int cpts_match(struct sk_buff *skb, unsigned int ptp_class,
-		      u16 ts_seqid, u8 ts_msgtype);
-
 static int event_expired(struct cpts_event *event)
 {
 	return time_after(jiffies, event->tmo);
@@ -97,29 +95,29 @@ static void cpts_purge_txq(struct cpts *cpts)
 static bool cpts_match_tx_ts(struct cpts *cpts, struct cpts_event *event)
 {
 	struct sk_buff *skb, *tmp;
-	u16 seqid;
-	u8 mtype;
 	bool found = false;
+	u32 mtype_seqid;
 
-	mtype = (event->high >> MESSAGE_TYPE_SHIFT) & MESSAGE_TYPE_MASK;
-	seqid = (event->high >> SEQUENCE_ID_SHIFT) & SEQUENCE_ID_MASK;
+	mtype_seqid = event->high &
+		      ((MESSAGE_TYPE_MASK << MESSAGE_TYPE_SHIFT) |
+		       (SEQUENCE_ID_MASK << SEQUENCE_ID_SHIFT) |
+		       (EVENT_TYPE_MASK << EVENT_TYPE_SHIFT));
 
 	/* no need to grab txq.lock as access is always done under cpts->lock */
 	skb_queue_walk_safe(&cpts->txq, skb, tmp) {
 		struct skb_shared_hwtstamps ssh;
-		unsigned int class = ptp_classify_raw(skb);
 		struct cpts_skb_cb_data *skb_cb =
 					(struct cpts_skb_cb_data *)skb->cb;
 
-		if (cpts_match(skb, class, seqid, mtype)) {
+		if (mtype_seqid == skb_cb->skb_mtype_seqid) {
 			memset(&ssh, 0, sizeof(ssh));
 			ssh.hwtstamp = ns_to_ktime(event->timestamp);
 			skb_tstamp_tx(skb, &ssh);
 			found = true;
 			__skb_unlink(skb, &cpts->txq);
 			dev_consume_skb_any(skb);
-			dev_dbg(cpts->dev, "match tx timestamp mtype %u seqid %04x\n",
-				mtype, seqid);
+			dev_dbg(cpts->dev, "match tx timestamp mtype_seqid %08x\n",
+				mtype_seqid);
 			break;
 		}
 
@@ -338,12 +336,15 @@ static const struct ptp_clock_info cpts_info = {
 	.do_aux_work	= cpts_overflow_check,
 };
 
-static int cpts_match(struct sk_buff *skb, unsigned int ptp_class,
-		      u16 ts_seqid, u8 ts_msgtype)
+static int cpts_skb_get_mtype_seqid(struct sk_buff *skb, u32 *mtype_seqid)
 {
-	u16 *seqid;
-	unsigned int offset = 0;
+	unsigned int ptp_class = ptp_classify_raw(skb);
 	u8 *msgtype, *data = skb->data;
+	unsigned int offset = 0;
+	u16 *seqid;
+
+	if (ptp_class == PTP_CLASS_NONE)
+		return 0;
 
 	if (ptp_class & PTP_CLASS_VLAN)
 		offset += VLAN_HLEN;
@@ -371,22 +372,20 @@ static int cpts_match(struct sk_buff *skb, unsigned int ptp_class,
 		msgtype = data + offset;
 
 	seqid = (u16 *)(data + offset + OFF_PTP_SEQUENCE_ID);
+	*mtype_seqid = (*msgtype & MESSAGE_TYPE_MASK) << MESSAGE_TYPE_SHIFT;
+	*mtype_seqid |= (ntohs(*seqid) & SEQUENCE_ID_MASK) << SEQUENCE_ID_SHIFT;
 
-	return (ts_msgtype == (*msgtype & 0xf) && ts_seqid == ntohs(*seqid));
+	return 1;
 }
 
-static u64 cpts_find_ts(struct cpts *cpts, struct sk_buff *skb, int ev_type)
+static u64 cpts_find_ts(struct cpts *cpts, struct sk_buff *skb,
+			int ev_type, u32 skb_mtype_seqid)
 {
-	u64 ns = 0;
-	struct cpts_event *event;
 	struct list_head *this, *next;
-	unsigned int class = ptp_classify_raw(skb);
+	struct cpts_event *event;
 	unsigned long flags;
-	u16 seqid;
-	u8 mtype;
-
-	if (class == PTP_CLASS_NONE)
-		return 0;
+	u32 mtype_seqid;
+	u64 ns = 0;
 
 	spin_lock_irqsave(&cpts->lock, flags);
 	cpts_fifo_read(cpts, -1);
@@ -397,10 +396,13 @@ static u64 cpts_find_ts(struct cpts *cpts, struct sk_buff *skb, int ev_type)
 			list_add(&event->list, &cpts->pool);
 			continue;
 		}
-		mtype = (event->high >> MESSAGE_TYPE_SHIFT) & MESSAGE_TYPE_MASK;
-		seqid = (event->high >> SEQUENCE_ID_SHIFT) & SEQUENCE_ID_MASK;
-		if (ev_type == event_type(event) &&
-		    cpts_match(skb, class, seqid, mtype)) {
+
+		mtype_seqid = event->high &
+			      ((MESSAGE_TYPE_MASK << MESSAGE_TYPE_SHIFT) |
+			       (SEQUENCE_ID_MASK << SEQUENCE_ID_SHIFT) |
+			       (EVENT_TYPE_MASK << EVENT_TYPE_SHIFT));
+
+		if (mtype_seqid == skb_mtype_seqid) {
 			ns = event->timestamp;
 			list_del_init(&event->list);
 			list_add(&event->list, &cpts->pool);
@@ -427,10 +429,21 @@ static u64 cpts_find_ts(struct cpts *cpts, struct sk_buff *skb, int ev_type)
 
 void cpts_rx_timestamp(struct cpts *cpts, struct sk_buff *skb)
 {
-	u64 ns;
+	struct cpts_skb_cb_data *skb_cb = (struct cpts_skb_cb_data *)skb->cb;
 	struct skb_shared_hwtstamps *ssh;
+	int ret;
+	u64 ns;
+
+	ret = cpts_skb_get_mtype_seqid(skb, &skb_cb->skb_mtype_seqid);
+	if (!ret)
+		return;
+
+	skb_cb->skb_mtype_seqid |= (CPTS_EV_RX << EVENT_TYPE_SHIFT);
 
-	ns = cpts_find_ts(cpts, skb, CPTS_EV_RX);
+	dev_dbg(cpts->dev, "%s mtype seqid %08x\n",
+		__func__, skb_cb->skb_mtype_seqid);
+
+	ns = cpts_find_ts(cpts, skb, CPTS_EV_RX, skb_cb->skb_mtype_seqid);
 	if (!ns)
 		return;
 	ssh = skb_hwtstamps(skb);
@@ -441,12 +454,24 @@ EXPORT_SYMBOL_GPL(cpts_rx_timestamp);
 
 void cpts_tx_timestamp(struct cpts *cpts, struct sk_buff *skb)
 {
-	u64 ns;
+	struct cpts_skb_cb_data *skb_cb = (struct cpts_skb_cb_data *)skb->cb;
 	struct skb_shared_hwtstamps ssh;
+	int ret;
+	u64 ns;
 
 	if (!(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS))
 		return;
-	ns = cpts_find_ts(cpts, skb, CPTS_EV_TX);
+
+	ret = cpts_skb_get_mtype_seqid(skb, &skb_cb->skb_mtype_seqid);
+	if (!ret)
+		return;
+
+	skb_cb->skb_mtype_seqid |= (CPTS_EV_TX << EVENT_TYPE_SHIFT);
+
+	dev_dbg(cpts->dev, "%s mtype seqid %08x\n",
+		__func__, skb_cb->skb_mtype_seqid);
+
+	ns = cpts_find_ts(cpts, skb, CPTS_EV_TX, skb_cb->skb_mtype_seqid);
 	if (!ns)
 		return;
 	memset(&ssh, 0, sizeof(ssh));
-- 
cgit v1.2.3-59-g8ed1b


From c8f8e47efe66dae775b617982e47a4564d7c4dda Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Thu, 23 Apr 2020 17:20:18 +0300
Subject: net: ethernet: ti: cpts: move tx timestamp processing to ptp worker
 only

Now the tx timestamp processing happens from different contexts - softirq
and thread/PTP worker. Enabling IRQ will add one more hard_irq context.
This makes over all defered TX timestamp processing and locking
overcomplicated. Move tx timestamp processing to PTP worker always instead.

napi_rx->cpts_tx_timestamp
 if ptp_packet then
    push to txq
    ptp_schedule_worker()

do_aux_work->cpts_overflow_check
 cpts_process_events()

Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/cpts.c | 165 +++++++++++++++++++++++------------------
 1 file changed, 94 insertions(+), 71 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index 6efb809d58ed..55ba6b425fb5 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -21,6 +21,8 @@
 #include "cpts.h"
 
 #define CPTS_SKB_TX_WORK_TIMEOUT 1 /* jiffies */
+#define CPTS_SKB_RX_TX_TMO 100 /*ms */
+#define CPTS_EVENT_RX_TX_TIMEOUT (100) /* ms */
 
 struct cpts_skb_cb_data {
 	u32 skb_mtype_seqid;
@@ -92,46 +94,6 @@ static void cpts_purge_txq(struct cpts *cpts)
 		dev_dbg(cpts->dev, "txq cleaned up %d\n", removed);
 }
 
-static bool cpts_match_tx_ts(struct cpts *cpts, struct cpts_event *event)
-{
-	struct sk_buff *skb, *tmp;
-	bool found = false;
-	u32 mtype_seqid;
-
-	mtype_seqid = event->high &
-		      ((MESSAGE_TYPE_MASK << MESSAGE_TYPE_SHIFT) |
-		       (SEQUENCE_ID_MASK << SEQUENCE_ID_SHIFT) |
-		       (EVENT_TYPE_MASK << EVENT_TYPE_SHIFT));
-
-	/* no need to grab txq.lock as access is always done under cpts->lock */
-	skb_queue_walk_safe(&cpts->txq, skb, tmp) {
-		struct skb_shared_hwtstamps ssh;
-		struct cpts_skb_cb_data *skb_cb =
-					(struct cpts_skb_cb_data *)skb->cb;
-
-		if (mtype_seqid == skb_cb->skb_mtype_seqid) {
-			memset(&ssh, 0, sizeof(ssh));
-			ssh.hwtstamp = ns_to_ktime(event->timestamp);
-			skb_tstamp_tx(skb, &ssh);
-			found = true;
-			__skb_unlink(skb, &cpts->txq);
-			dev_consume_skb_any(skb);
-			dev_dbg(cpts->dev, "match tx timestamp mtype_seqid %08x\n",
-				mtype_seqid);
-			break;
-		}
-
-		if (time_after(jiffies, skb_cb->tmo)) {
-			/* timeout any expired skbs over 1s */
-			dev_dbg(cpts->dev, "expiring tx timestamp from txq\n");
-			__skb_unlink(skb, &cpts->txq);
-			dev_consume_skb_any(skb);
-		}
-	}
-
-	return found;
-}
-
 /*
  * Returns zero if matching event type was found.
  */
@@ -151,7 +113,6 @@ static int cpts_fifo_read(struct cpts *cpts, int match)
 		}
 
 		event = list_first_entry(&cpts->pool, struct cpts_event, list);
-		event->tmo = jiffies + 2;
 		event->high = hi;
 		event->low = lo;
 		event->timestamp = timecounter_cyc2time(&cpts->tc, event->low);
@@ -169,14 +130,10 @@ static int cpts_fifo_read(struct cpts *cpts, int match)
 			}
 			break;
 		case CPTS_EV_TX:
-			if (cpts_match_tx_ts(cpts, event)) {
-				/* if the new event matches an existing skb,
-				 * then don't queue it
-				 */
-				break;
-			}
-			/* fall through */
 		case CPTS_EV_RX:
+			event->tmo = jiffies +
+				msecs_to_jiffies(CPTS_EVENT_RX_TX_TIMEOUT);
+
 			list_del_init(&event->list);
 			list_add_tail(&event->list, &cpts->events);
 			break;
@@ -297,6 +254,84 @@ static int cpts_ptp_enable(struct ptp_clock_info *ptp,
 	return -EOPNOTSUPP;
 }
 
+static bool cpts_match_tx_ts(struct cpts *cpts, struct cpts_event *event)
+{
+	struct sk_buff_head txq_list;
+	struct sk_buff *skb, *tmp;
+	unsigned long flags;
+	bool found = false;
+	u32 mtype_seqid;
+
+	mtype_seqid = event->high &
+		      ((MESSAGE_TYPE_MASK << MESSAGE_TYPE_SHIFT) |
+		       (SEQUENCE_ID_MASK << SEQUENCE_ID_SHIFT) |
+		       (EVENT_TYPE_MASK << EVENT_TYPE_SHIFT));
+
+	__skb_queue_head_init(&txq_list);
+
+	spin_lock_irqsave(&cpts->txq.lock, flags);
+	skb_queue_splice_init(&cpts->txq, &txq_list);
+	spin_unlock_irqrestore(&cpts->txq.lock, flags);
+
+	skb_queue_walk_safe(&txq_list, skb, tmp) {
+		struct skb_shared_hwtstamps ssh;
+		struct cpts_skb_cb_data *skb_cb =
+					(struct cpts_skb_cb_data *)skb->cb;
+
+		if (mtype_seqid == skb_cb->skb_mtype_seqid) {
+			memset(&ssh, 0, sizeof(ssh));
+			ssh.hwtstamp = ns_to_ktime(event->timestamp);
+			skb_tstamp_tx(skb, &ssh);
+			found = true;
+			__skb_unlink(skb, &txq_list);
+			dev_consume_skb_any(skb);
+			dev_dbg(cpts->dev, "match tx timestamp mtype_seqid %08x\n",
+				mtype_seqid);
+			break;
+		}
+
+		if (time_after(jiffies, skb_cb->tmo)) {
+			/* timeout any expired skbs over 1s */
+			dev_dbg(cpts->dev, "expiring tx timestamp from txq\n");
+			__skb_unlink(skb, &txq_list);
+			dev_consume_skb_any(skb);
+		}
+	}
+
+	spin_lock_irqsave(&cpts->txq.lock, flags);
+	skb_queue_splice(&txq_list, &cpts->txq);
+	spin_unlock_irqrestore(&cpts->txq.lock, flags);
+
+	return found;
+}
+
+static void cpts_process_events(struct cpts *cpts)
+{
+	struct list_head *this, *next;
+	struct cpts_event *event;
+	LIST_HEAD(events_free);
+	unsigned long flags;
+	LIST_HEAD(events);
+
+	spin_lock_irqsave(&cpts->lock, flags);
+	list_splice_init(&cpts->events, &events);
+	spin_unlock_irqrestore(&cpts->lock, flags);
+
+	list_for_each_safe(this, next, &events) {
+		event = list_entry(this, struct cpts_event, list);
+		if (cpts_match_tx_ts(cpts, event) ||
+		    time_after(jiffies, event->tmo)) {
+			list_del_init(&event->list);
+			list_add(&event->list, &events_free);
+		}
+	}
+
+	spin_lock_irqsave(&cpts->lock, flags);
+	list_splice_tail(&events, &cpts->events);
+	list_splice_tail(&events_free, &cpts->pool);
+	spin_unlock_irqrestore(&cpts->lock, flags);
+}
+
 static long cpts_overflow_check(struct ptp_clock_info *ptp)
 {
 	struct cpts *cpts = container_of(ptp, struct cpts, info);
@@ -305,17 +340,20 @@ static long cpts_overflow_check(struct ptp_clock_info *ptp)
 	u64 ns;
 
 	spin_lock_irqsave(&cpts->lock, flags);
-
 	cpts_update_cur_time(cpts, -1, NULL);
+	spin_unlock_irqrestore(&cpts->lock, flags);
 
 	ns = timecounter_read(&cpts->tc);
 
+	cpts_process_events(cpts);
+
+	spin_lock_irqsave(&cpts->txq.lock, flags);
 	if (!skb_queue_empty(&cpts->txq)) {
 		cpts_purge_txq(cpts);
 		if (!skb_queue_empty(&cpts->txq))
 			delay = CPTS_SKB_TX_WORK_TIMEOUT;
 	}
-	spin_unlock_irqrestore(&cpts->lock, flags);
+	spin_unlock_irqrestore(&cpts->txq.lock, flags);
 
 	dev_dbg(cpts->dev, "cpts overflow check at %lld\n", ns);
 	return (long)delay;
@@ -409,19 +447,6 @@ static u64 cpts_find_ts(struct cpts *cpts, struct sk_buff *skb,
 			break;
 		}
 	}
-
-	if (ev_type == CPTS_EV_TX && !ns) {
-		struct cpts_skb_cb_data *skb_cb =
-				(struct cpts_skb_cb_data *)skb->cb;
-		/* Not found, add frame to queue for processing later.
-		 * The periodic FIFO check will handle this.
-		 */
-		skb_get(skb);
-		/* get the timestamp for timeouts */
-		skb_cb->tmo = jiffies + msecs_to_jiffies(100);
-		__skb_queue_tail(&cpts->txq, skb);
-		ptp_schedule_worker(cpts->clock, 0);
-	}
 	spin_unlock_irqrestore(&cpts->lock, flags);
 
 	return ns;
@@ -455,9 +480,7 @@ EXPORT_SYMBOL_GPL(cpts_rx_timestamp);
 void cpts_tx_timestamp(struct cpts *cpts, struct sk_buff *skb)
 {
 	struct cpts_skb_cb_data *skb_cb = (struct cpts_skb_cb_data *)skb->cb;
-	struct skb_shared_hwtstamps ssh;
 	int ret;
-	u64 ns;
 
 	if (!(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS))
 		return;
@@ -471,12 +494,12 @@ void cpts_tx_timestamp(struct cpts *cpts, struct sk_buff *skb)
 	dev_dbg(cpts->dev, "%s mtype seqid %08x\n",
 		__func__, skb_cb->skb_mtype_seqid);
 
-	ns = cpts_find_ts(cpts, skb, CPTS_EV_TX, skb_cb->skb_mtype_seqid);
-	if (!ns)
-		return;
-	memset(&ssh, 0, sizeof(ssh));
-	ssh.hwtstamp = ns_to_ktime(ns);
-	skb_tstamp_tx(skb, &ssh);
+	/* Always defer TX TS processing to PTP worker */
+	skb_get(skb);
+	/* get the timestamp for timeouts */
+	skb_cb->tmo = jiffies + msecs_to_jiffies(CPTS_SKB_RX_TX_TMO);
+	skb_queue_tail(&cpts->txq, skb);
+	ptp_schedule_worker(cpts->clock, 0);
 }
 EXPORT_SYMBOL_GPL(cpts_tx_timestamp);
 
-- 
cgit v1.2.3-59-g8ed1b


From ba10742840fbc6e475dc05c7515fd91b7c88e1b2 Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Thu, 23 Apr 2020 17:20:19 +0300
Subject: net: ethernet: ti: cpts: rework locking

Now spinlock is used to synchronize everything which is not required. Add
mutex and use to sync access to PTP interface and PTP worker and use
spinlock only to sync FIFO/events processing.

Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/cpts.c | 53 ++++++++++++++++++++++++------------------
 drivers/net/ethernet/ti/cpts.h |  3 ++-
 2 files changed, 32 insertions(+), 24 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index 55ba6b425fb5..8db9efdf1708 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -99,9 +99,12 @@ static void cpts_purge_txq(struct cpts *cpts)
  */
 static int cpts_fifo_read(struct cpts *cpts, int match)
 {
+	struct cpts_event *event;
+	unsigned long flags;
 	int i, type = -1;
 	u32 hi, lo;
-	struct cpts_event *event;
+
+	spin_lock_irqsave(&cpts->lock, flags);
 
 	for (i = 0; i < CPTS_FIFO_DEPTH; i++) {
 		if (cpts_fifo_pop(cpts, &hi, &lo))
@@ -109,7 +112,7 @@ static int cpts_fifo_read(struct cpts *cpts, int match)
 
 		if (list_empty(&cpts->pool) && cpts_purge_events(cpts)) {
 			dev_warn(cpts->dev, "cpts: event pool empty\n");
-			return -1;
+			break;
 		}
 
 		event = list_first_entry(&cpts->pool, struct cpts_event, list);
@@ -148,6 +151,9 @@ static int cpts_fifo_read(struct cpts *cpts, int match)
 		if (type == match)
 			break;
 	}
+
+	spin_unlock_irqrestore(&cpts->lock, flags);
+
 	return type == match ? 0 : -1;
 }
 
@@ -161,10 +167,15 @@ static u64 cpts_systim_read(const struct cyclecounter *cc)
 static void cpts_update_cur_time(struct cpts *cpts, int match,
 				 struct ptp_system_timestamp *sts)
 {
+	unsigned long flags;
+
+	/* use spin_lock_irqsave() here as it has to run very fast */
+	spin_lock_irqsave(&cpts->lock, flags);
 	ptp_read_system_prets(sts);
 	cpts_write32(cpts, TS_PUSH, ts_push);
 	cpts_read32(cpts, ts_push);
 	ptp_read_system_postts(sts);
+	spin_unlock_irqrestore(&cpts->lock, flags);
 
 	if (cpts_fifo_read(cpts, match) && match != -1)
 		dev_err(cpts->dev, "cpts: unable to obtain a time stamp\n");
@@ -174,11 +185,10 @@ static void cpts_update_cur_time(struct cpts *cpts, int match,
 
 static int cpts_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
 {
-	u64 adj;
-	u32 diff, mult;
-	int neg_adj = 0;
-	unsigned long flags;
 	struct cpts *cpts = container_of(ptp, struct cpts, info);
+	int neg_adj = 0;
+	u32 diff, mult;
+	u64 adj;
 
 	if (ppb < 0) {
 		neg_adj = 1;
@@ -189,25 +199,23 @@ static int cpts_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
 	adj *= ppb;
 	diff = div_u64(adj, 1000000000ULL);
 
-	spin_lock_irqsave(&cpts->lock, flags);
+	mutex_lock(&cpts->ptp_clk_mutex);
 
 	cpts->mult_new = neg_adj ? mult - diff : mult + diff;
 
 	cpts_update_cur_time(cpts, CPTS_EV_PUSH, NULL);
 
-	spin_unlock_irqrestore(&cpts->lock, flags);
-
+	mutex_unlock(&cpts->ptp_clk_mutex);
 	return 0;
 }
 
 static int cpts_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
 {
-	unsigned long flags;
 	struct cpts *cpts = container_of(ptp, struct cpts, info);
 
-	spin_lock_irqsave(&cpts->lock, flags);
+	mutex_lock(&cpts->ptp_clk_mutex);
 	timecounter_adjtime(&cpts->tc, delta);
-	spin_unlock_irqrestore(&cpts->lock, flags);
+	mutex_unlock(&cpts->ptp_clk_mutex);
 
 	return 0;
 }
@@ -217,15 +225,14 @@ static int cpts_ptp_gettimeex(struct ptp_clock_info *ptp,
 			      struct ptp_system_timestamp *sts)
 {
 	struct cpts *cpts = container_of(ptp, struct cpts, info);
-	unsigned long flags;
 	u64 ns;
 
-	spin_lock_irqsave(&cpts->lock, flags);
+	mutex_lock(&cpts->ptp_clk_mutex);
 
 	cpts_update_cur_time(cpts, CPTS_EV_PUSH, sts);
 
 	ns = timecounter_read(&cpts->tc);
-	spin_unlock_irqrestore(&cpts->lock, flags);
+	mutex_unlock(&cpts->ptp_clk_mutex);
 
 	*ts = ns_to_timespec64(ns);
 
@@ -235,15 +242,14 @@ static int cpts_ptp_gettimeex(struct ptp_clock_info *ptp,
 static int cpts_ptp_settime(struct ptp_clock_info *ptp,
 			    const struct timespec64 *ts)
 {
-	u64 ns;
-	unsigned long flags;
 	struct cpts *cpts = container_of(ptp, struct cpts, info);
+	u64 ns;
 
 	ns = timespec64_to_ns(ts);
 
-	spin_lock_irqsave(&cpts->lock, flags);
+	mutex_lock(&cpts->ptp_clk_mutex);
 	timecounter_init(&cpts->tc, &cpts->cc, ns);
-	spin_unlock_irqrestore(&cpts->lock, flags);
+	mutex_unlock(&cpts->ptp_clk_mutex);
 
 	return 0;
 }
@@ -339,10 +345,9 @@ static long cpts_overflow_check(struct ptp_clock_info *ptp)
 	unsigned long flags;
 	u64 ns;
 
-	spin_lock_irqsave(&cpts->lock, flags);
-	cpts_update_cur_time(cpts, -1, NULL);
-	spin_unlock_irqrestore(&cpts->lock, flags);
+	mutex_lock(&cpts->ptp_clk_mutex);
 
+	cpts_update_cur_time(cpts, -1, NULL);
 	ns = timecounter_read(&cpts->tc);
 
 	cpts_process_events(cpts);
@@ -356,6 +361,7 @@ static long cpts_overflow_check(struct ptp_clock_info *ptp)
 	spin_unlock_irqrestore(&cpts->txq.lock, flags);
 
 	dev_dbg(cpts->dev, "cpts overflow check at %lld\n", ns);
+	mutex_unlock(&cpts->ptp_clk_mutex);
 	return (long)delay;
 }
 
@@ -425,8 +431,8 @@ static u64 cpts_find_ts(struct cpts *cpts, struct sk_buff *skb,
 	u32 mtype_seqid;
 	u64 ns = 0;
 
-	spin_lock_irqsave(&cpts->lock, flags);
 	cpts_fifo_read(cpts, -1);
+	spin_lock_irqsave(&cpts->lock, flags);
 	list_for_each_safe(this, next, &cpts->events) {
 		event = list_entry(this, struct cpts_event, list);
 		if (event_expired(event)) {
@@ -703,6 +709,7 @@ struct cpts *cpts_create(struct device *dev, void __iomem *regs,
 	cpts->dev = dev;
 	cpts->reg = (struct cpsw_cpts __iomem *)regs;
 	spin_lock_init(&cpts->lock);
+	mutex_init(&cpts->ptp_clk_mutex);
 
 	ret = cpts_of_parse(cpts, node);
 	if (ret)
diff --git a/drivers/net/ethernet/ti/cpts.h b/drivers/net/ethernet/ti/cpts.h
index 421630049ee7..f16e14d67f5f 100644
--- a/drivers/net/ethernet/ti/cpts.h
+++ b/drivers/net/ethernet/ti/cpts.h
@@ -104,7 +104,7 @@ struct cpts {
 	int rx_enable;
 	struct ptp_clock_info info;
 	struct ptp_clock *clock;
-	spinlock_t lock; /* protects time registers */
+	spinlock_t lock; /* protects fifo/events */
 	u32 cc_mult; /* for the nominal frequency */
 	struct cyclecounter cc;
 	struct timecounter tc;
@@ -117,6 +117,7 @@ struct cpts {
 	struct sk_buff_head txq;
 	u64 cur_timestamp;
 	u32 mult_new;
+	struct mutex ptp_clk_mutex; /* sync PTP interface and worker */
 };
 
 void cpts_rx_timestamp(struct cpts *cpts, struct sk_buff *skb);
-- 
cgit v1.2.3-59-g8ed1b


From 85624412a03dc61eabddeb1cfbbc8325e3544694 Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Thu, 23 Apr 2020 17:20:20 +0300
Subject: net: ethernet: ti: cpts: add irq support

Add CPTS IRQ support, but do not enable it. By default, the CPTS driver
will continue working using polling mode which is required for CPTS to
continue working on platforms other than CPSW, like Keystone 2.

The CPTS IRQ support is required to enable support for HW_TS_PUSH events.
The CPSW CPTS IRQ and HW_TS_PUSH events support will be enabled in follow
up patches.

Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/cpts.c | 23 ++++++++++++++++++++++-
 drivers/net/ethernet/ti/cpts.h | 16 ++++++++++++++++
 2 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index 8db9efdf1708..339796c87bf6 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -99,6 +99,7 @@ static void cpts_purge_txq(struct cpts *cpts)
  */
 static int cpts_fifo_read(struct cpts *cpts, int match)
 {
+	bool need_schedule = false;
 	struct cpts_event *event;
 	unsigned long flags;
 	int i, type = -1;
@@ -131,6 +132,8 @@ static int cpts_fifo_read(struct cpts *cpts, int match)
 				cpts->cc.mult = cpts->mult_new;
 				cpts->mult_new = 0;
 			}
+			if (!cpts->irq_poll)
+				complete(&cpts->ts_push_complete);
 			break;
 		case CPTS_EV_TX:
 		case CPTS_EV_RX:
@@ -139,6 +142,7 @@ static int cpts_fifo_read(struct cpts *cpts, int match)
 
 			list_del_init(&event->list);
 			list_add_tail(&event->list, &cpts->events);
+			need_schedule = true;
 			break;
 		case CPTS_EV_ROLL:
 		case CPTS_EV_HALF:
@@ -154,9 +158,18 @@ static int cpts_fifo_read(struct cpts *cpts, int match)
 
 	spin_unlock_irqrestore(&cpts->lock, flags);
 
+	if (!cpts->irq_poll && need_schedule)
+		ptp_schedule_worker(cpts->clock, 0);
+
 	return type == match ? 0 : -1;
 }
 
+void cpts_misc_interrupt(struct cpts *cpts)
+{
+	cpts_fifo_read(cpts, -1);
+}
+EXPORT_SYMBOL_GPL(cpts_misc_interrupt);
+
 static u64 cpts_systim_read(const struct cyclecounter *cc)
 {
 	struct cpts *cpts = container_of(cc, struct cpts, cc);
@@ -169,6 +182,8 @@ static void cpts_update_cur_time(struct cpts *cpts, int match,
 {
 	unsigned long flags;
 
+	reinit_completion(&cpts->ts_push_complete);
+
 	/* use spin_lock_irqsave() here as it has to run very fast */
 	spin_lock_irqsave(&cpts->lock, flags);
 	ptp_read_system_prets(sts);
@@ -177,8 +192,12 @@ static void cpts_update_cur_time(struct cpts *cpts, int match,
 	ptp_read_system_postts(sts);
 	spin_unlock_irqrestore(&cpts->lock, flags);
 
-	if (cpts_fifo_read(cpts, match) && match != -1)
+	if (cpts->irq_poll && cpts_fifo_read(cpts, match) && match != -1)
 		dev_err(cpts->dev, "cpts: unable to obtain a time stamp\n");
+
+	if (!cpts->irq_poll &&
+	    !wait_for_completion_timeout(&cpts->ts_push_complete, HZ))
+		dev_err(cpts->dev, "cpts: obtain a time stamp timeout\n");
 }
 
 /* PTP clock operations */
@@ -708,8 +727,10 @@ struct cpts *cpts_create(struct device *dev, void __iomem *regs,
 
 	cpts->dev = dev;
 	cpts->reg = (struct cpsw_cpts __iomem *)regs;
+	cpts->irq_poll = true;
 	spin_lock_init(&cpts->lock);
 	mutex_init(&cpts->ptp_clk_mutex);
+	init_completion(&cpts->ts_push_complete);
 
 	ret = cpts_of_parse(cpts, node);
 	if (ret)
diff --git a/drivers/net/ethernet/ti/cpts.h b/drivers/net/ethernet/ti/cpts.h
index f16e14d67f5f..473d0622e861 100644
--- a/drivers/net/ethernet/ti/cpts.h
+++ b/drivers/net/ethernet/ti/cpts.h
@@ -118,6 +118,8 @@ struct cpts {
 	u64 cur_timestamp;
 	u32 mult_new;
 	struct mutex ptp_clk_mutex; /* sync PTP interface and worker */
+	bool irq_poll;
+	struct completion	ts_push_complete;
 };
 
 void cpts_rx_timestamp(struct cpts *cpts, struct sk_buff *skb);
@@ -127,6 +129,7 @@ void cpts_unregister(struct cpts *cpts);
 struct cpts *cpts_create(struct device *dev, void __iomem *regs,
 			 struct device_node *node);
 void cpts_release(struct cpts *cpts);
+void cpts_misc_interrupt(struct cpts *cpts);
 
 static inline bool cpts_can_timestamp(struct cpts *cpts, struct sk_buff *skb)
 {
@@ -138,6 +141,11 @@ static inline bool cpts_can_timestamp(struct cpts *cpts, struct sk_buff *skb)
 	return true;
 }
 
+static inline void cpts_set_irqpoll(struct cpts *cpts, bool en)
+{
+	cpts->irq_poll = en;
+}
+
 #else
 struct cpts;
 
@@ -173,6 +181,14 @@ static inline bool cpts_can_timestamp(struct cpts *cpts, struct sk_buff *skb)
 {
 	return false;
 }
+
+static inline void cpts_misc_interrupt(struct cpts *cpts)
+{
+}
+
+static inline void cpts_set_irqpoll(struct cpts *cpts, bool en)
+{
+}
 #endif
 
 
-- 
cgit v1.2.3-59-g8ed1b


From b78aba495df0eaee4f4a779b2354d6e2a43a3d70 Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Thu, 23 Apr 2020 17:20:21 +0300
Subject: net: ethernet: ti: cpts: add support for HW_TS_PUSH events

Hence CPTS IRQ support is in place the W_TS_PUSH events can be added.
PWM capable DmTimers can be used to generete input signals for CPTS on TI
AM335x/AM437x/DRA7 SoCs to be timestamped:
AM335x/AM437x: timer4 - timer7
DRA7/AM57xx: timer13 - timer16

Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/cpsw_priv.c   |  5 +++-
 drivers/net/ethernet/ti/cpts.c        | 49 ++++++++++++++++++++++++++++++++++-
 drivers/net/ethernet/ti/cpts.h        |  5 ++--
 drivers/net/ethernet/ti/netcp_ethss.c |  3 ++-
 4 files changed, 57 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpsw_priv.c b/drivers/net/ethernet/ti/cpsw_priv.c
index 97a058ca60ac..099208927400 100644
--- a/drivers/net/ethernet/ti/cpsw_priv.c
+++ b/drivers/net/ethernet/ti/cpsw_priv.c
@@ -28,6 +28,8 @@
 #include "cpsw_sl.h"
 #include "davinci_cpdma.h"
 
+#define CPTS_N_ETX_TS 4
+
 int (*cpsw_slave_index)(struct cpsw_common *cpsw, struct cpsw_priv *priv);
 
 void cpsw_intr_enable(struct cpsw_common *cpsw)
@@ -522,7 +524,8 @@ int cpsw_init_common(struct cpsw_common *cpsw, void __iomem *ss_regs,
 	if (!cpts_node)
 		cpts_node = cpsw->dev->of_node;
 
-	cpsw->cpts = cpts_create(cpsw->dev, cpts_regs, cpts_node);
+	cpsw->cpts = cpts_create(cpsw->dev, cpts_regs, cpts_node,
+				 CPTS_N_ETX_TS);
 	if (IS_ERR(cpsw->cpts)) {
 		ret = PTR_ERR(cpsw->cpts);
 		cpdma_ctlr_destroy(cpsw->dma);
diff --git a/drivers/net/ethernet/ti/cpts.c b/drivers/net/ethernet/ti/cpts.c
index 339796c87bf6..7c55d395de2c 100644
--- a/drivers/net/ethernet/ti/cpts.c
+++ b/drivers/net/ethernet/ti/cpts.c
@@ -32,6 +32,11 @@ struct cpts_skb_cb_data {
 #define cpts_read32(c, r)	readl_relaxed(&c->reg->r)
 #define cpts_write32(c, v, r)	writel_relaxed(v, &c->reg->r)
 
+static int cpts_event_port(struct cpts_event *event)
+{
+	return (event->high >> PORT_NUMBER_SHIFT) & PORT_NUMBER_MASK;
+}
+
 static int event_expired(struct cpts_event *event)
 {
 	return time_after(jiffies, event->tmo);
@@ -99,6 +104,7 @@ static void cpts_purge_txq(struct cpts *cpts)
  */
 static int cpts_fifo_read(struct cpts *cpts, int match)
 {
+	struct ptp_clock_event pevent;
 	bool need_schedule = false;
 	struct cpts_event *event;
 	unsigned long flags;
@@ -146,7 +152,12 @@ static int cpts_fifo_read(struct cpts *cpts, int match)
 			break;
 		case CPTS_EV_ROLL:
 		case CPTS_EV_HALF:
+			break;
 		case CPTS_EV_HW:
+			pevent.timestamp = event->timestamp;
+			pevent.type = PTP_CLOCK_EXTTS;
+			pevent.index = cpts_event_port(event) - 1;
+			ptp_clock_event(cpts->clock, &pevent);
 			break;
 		default:
 			dev_err(cpts->dev, "cpts: unknown event type\n");
@@ -273,9 +284,42 @@ static int cpts_ptp_settime(struct ptp_clock_info *ptp,
 	return 0;
 }
 
+static int cpts_extts_enable(struct cpts *cpts, u32 index, int on)
+{
+	u32 v;
+
+	if (((cpts->hw_ts_enable & BIT(index)) >> index) == on)
+		return 0;
+
+	mutex_lock(&cpts->ptp_clk_mutex);
+
+	v = cpts_read32(cpts, control);
+	if (on) {
+		v |= BIT(8 + index);
+		cpts->hw_ts_enable |= BIT(index);
+	} else {
+		v &= ~BIT(8 + index);
+		cpts->hw_ts_enable &= ~BIT(index);
+	}
+	cpts_write32(cpts, v, control);
+
+	mutex_unlock(&cpts->ptp_clk_mutex);
+
+	return 0;
+}
+
 static int cpts_ptp_enable(struct ptp_clock_info *ptp,
 			   struct ptp_clock_request *rq, int on)
 {
+	struct cpts *cpts = container_of(ptp, struct cpts, info);
+
+	switch (rq->type) {
+	case PTP_CLK_REQ_EXTTS:
+		return cpts_extts_enable(cpts, rq->extts.index, on);
+	default:
+		break;
+	}
+
 	return -EOPNOTSUPP;
 }
 
@@ -716,7 +760,7 @@ of_error:
 }
 
 struct cpts *cpts_create(struct device *dev, void __iomem *regs,
-			 struct device_node *node)
+			 struct device_node *node, u32 n_ext_ts)
 {
 	struct cpts *cpts;
 	int ret;
@@ -755,6 +799,9 @@ struct cpts *cpts_create(struct device *dev, void __iomem *regs,
 	cpts->cc.mask = CLOCKSOURCE_MASK(32);
 	cpts->info = cpts_info;
 
+	if (n_ext_ts)
+		cpts->info.n_ext_ts = n_ext_ts;
+
 	cpts_calc_mult_shift(cpts);
 	/* save cc.mult original value as it can be modified
 	 * by cpts_ptp_adjfreq().
diff --git a/drivers/net/ethernet/ti/cpts.h b/drivers/net/ethernet/ti/cpts.h
index 473d0622e861..07222f651d2e 100644
--- a/drivers/net/ethernet/ti/cpts.h
+++ b/drivers/net/ethernet/ti/cpts.h
@@ -120,6 +120,7 @@ struct cpts {
 	struct mutex ptp_clk_mutex; /* sync PTP interface and worker */
 	bool irq_poll;
 	struct completion	ts_push_complete;
+	u32 hw_ts_enable;
 };
 
 void cpts_rx_timestamp(struct cpts *cpts, struct sk_buff *skb);
@@ -127,7 +128,7 @@ void cpts_tx_timestamp(struct cpts *cpts, struct sk_buff *skb);
 int cpts_register(struct cpts *cpts);
 void cpts_unregister(struct cpts *cpts);
 struct cpts *cpts_create(struct device *dev, void __iomem *regs,
-			 struct device_node *node);
+			 struct device_node *node, u32 n_ext_ts);
 void cpts_release(struct cpts *cpts);
 void cpts_misc_interrupt(struct cpts *cpts);
 
@@ -158,7 +159,7 @@ static inline void cpts_tx_timestamp(struct cpts *cpts, struct sk_buff *skb)
 
 static inline
 struct cpts *cpts_create(struct device *dev, void __iomem *regs,
-			 struct device_node *node)
+			 struct device_node *node, u32 n_ext_ts)
 {
 	return NULL;
 }
diff --git a/drivers/net/ethernet/ti/netcp_ethss.c b/drivers/net/ethernet/ti/netcp_ethss.c
index fb36115e9c51..9d6e27fb710e 100644
--- a/drivers/net/ethernet/ti/netcp_ethss.c
+++ b/drivers/net/ethernet/ti/netcp_ethss.c
@@ -3716,7 +3716,8 @@ static int gbe_probe(struct netcp_device *netcp_device, struct device *dev,
 	if (!cpts_node)
 		cpts_node = of_node_get(node);
 
-	gbe_dev->cpts = cpts_create(gbe_dev->dev, gbe_dev->cpts_reg, cpts_node);
+	gbe_dev->cpts = cpts_create(gbe_dev->dev, gbe_dev->cpts_reg,
+				    cpts_node, 0);
 	of_node_put(cpts_node);
 	if (IS_ENABLED(CONFIG_TI_CPTS) && IS_ERR(gbe_dev->cpts)) {
 		ret = PTR_ERR(gbe_dev->cpts);
-- 
cgit v1.2.3-59-g8ed1b


From 84ea9c0a95d7b3e554d6c3d7d719cc57be22e7ad Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Thu, 23 Apr 2020 17:20:22 +0300
Subject: net: ethernet: ti: cpsw: enable cpts irq

The CPSW misc IRQ need be enabled for CPTS event_pend IRQs processing. This
patch adds corresponding support to CPSW driver.

Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/cpsw.c      | 21 +++++++++++++++++++++
 drivers/net/ethernet/ti/cpsw_new.c  | 20 ++++++++++++++++++++
 drivers/net/ethernet/ti/cpsw_priv.c | 12 ++++++++++++
 drivers/net/ethernet/ti/cpsw_priv.h |  2 ++
 4 files changed, 55 insertions(+)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index c2c5bf87da01..09f98fa2fb4e 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -1569,6 +1569,12 @@ static int cpsw_probe(struct platform_device *pdev)
 		return irq;
 	cpsw->irqs_table[1] = irq;
 
+	/* get misc irq*/
+	irq = platform_get_irq(pdev, 3);
+	if (irq <= 0)
+		return irq;
+	cpsw->misc_irq = irq;
+
 	/*
 	 * This may be required here for child devices.
 	 */
@@ -1703,6 +1709,21 @@ static int cpsw_probe(struct platform_device *pdev)
 		goto clean_unregister_netdev_ret;
 	}
 
+	if (!cpsw->cpts)
+		goto skip_cpts;
+
+	ret = devm_request_irq(&pdev->dev, cpsw->misc_irq, cpsw_misc_interrupt,
+			       0, dev_name(&pdev->dev), cpsw);
+	if (ret < 0) {
+		dev_err(dev, "error attaching misc irq (%d)\n", ret);
+		goto clean_unregister_netdev_ret;
+	}
+
+	/* Enable misc CPTS evnt_pend IRQ */
+	cpts_set_irqpoll(cpsw->cpts, false);
+	writel(0x10, &cpsw->wr_regs->misc_en);
+
+skip_cpts:
 	cpsw_notice(priv, probe,
 		    "initialized device (regs %pa, irq %d, pool size %d)\n",
 		    &ss_res->start, cpsw->irqs_table[0], descs_pool_size);
diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c
index 9209e613257d..33c8dd686206 100644
--- a/drivers/net/ethernet/ti/cpsw_new.c
+++ b/drivers/net/ethernet/ti/cpsw_new.c
@@ -1896,6 +1896,11 @@ static int cpsw_probe(struct platform_device *pdev)
 		return irq;
 	cpsw->irqs_table[1] = irq;
 
+	irq = platform_get_irq_byname(pdev, "misc");
+	if (irq <= 0)
+		return irq;
+	cpsw->misc_irq = irq;
+
 	platform_set_drvdata(pdev, cpsw);
 	/* This may be required here for child devices. */
 	pm_runtime_enable(dev);
@@ -1975,6 +1980,21 @@ static int cpsw_probe(struct platform_device *pdev)
 		goto clean_unregister_netdev;
 	}
 
+	if (!cpsw->cpts)
+		goto skip_cpts;
+
+	ret = devm_request_irq(dev, cpsw->misc_irq, cpsw_misc_interrupt,
+			       0, dev_name(&pdev->dev), cpsw);
+	if (ret < 0) {
+		dev_err(dev, "error attaching misc irq (%d)\n", ret);
+		goto clean_unregister_netdev;
+	}
+
+	/* Enable misc CPTS evnt_pend IRQ */
+	cpts_set_irqpoll(cpsw->cpts, false);
+	writel(0x10, &cpsw->wr_regs->misc_en);
+
+skip_cpts:
 	ret = cpsw_register_notifiers(cpsw);
 	if (ret)
 		goto clean_unregister_netdev;
diff --git a/drivers/net/ethernet/ti/cpsw_priv.c b/drivers/net/ethernet/ti/cpsw_priv.c
index 099208927400..9d098c802c6d 100644
--- a/drivers/net/ethernet/ti/cpsw_priv.c
+++ b/drivers/net/ethernet/ti/cpsw_priv.c
@@ -114,6 +114,18 @@ irqreturn_t cpsw_rx_interrupt(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
+irqreturn_t cpsw_misc_interrupt(int irq, void *dev_id)
+{
+	struct cpsw_common *cpsw = dev_id;
+
+	writel(0, &cpsw->wr_regs->misc_en);
+	cpdma_ctlr_eoi(cpsw->dma, CPDMA_EOI_MISC);
+	cpts_misc_interrupt(cpsw->cpts);
+	writel(0x10, &cpsw->wr_regs->misc_en);
+
+	return IRQ_HANDLED;
+}
+
 int cpsw_tx_mq_poll(struct napi_struct *napi_tx, int budget)
 {
 	struct cpsw_common	*cpsw = napi_to_cpsw(napi_tx);
diff --git a/drivers/net/ethernet/ti/cpsw_priv.h b/drivers/net/ethernet/ti/cpsw_priv.h
index b8d7b924ee3d..bf4e179b4ca4 100644
--- a/drivers/net/ethernet/ti/cpsw_priv.h
+++ b/drivers/net/ethernet/ti/cpsw_priv.h
@@ -350,6 +350,7 @@ struct cpsw_common {
 	bool				rx_irq_disabled;
 	bool				tx_irq_disabled;
 	u32 irqs_table[IRQ_NUM];
+	int misc_irq;
 	struct cpts			*cpts;
 	struct devlink *devlink;
 	int				rx_ch_num, tx_ch_num;
@@ -442,6 +443,7 @@ int cpsw_run_xdp(struct cpsw_priv *priv, int ch, struct xdp_buff *xdp,
 		 struct page *page, int port);
 irqreturn_t cpsw_tx_interrupt(int irq, void *dev_id);
 irqreturn_t cpsw_rx_interrupt(int irq, void *dev_id);
+irqreturn_t cpsw_misc_interrupt(int irq, void *dev_id);
 int cpsw_tx_mq_poll(struct napi_struct *napi_tx, int budget);
 int cpsw_tx_poll(struct napi_struct *napi_tx, int budget);
 int cpsw_rx_mq_poll(struct napi_struct *napi_rx, int budget);
-- 
cgit v1.2.3-59-g8ed1b


From 3c9143d96852485725d330b9164062d8f2d90a38 Mon Sep 17 00:00:00 2001
From: Xu Wang <vulab@iscas.ac.cn>
Date: Thu, 23 Apr 2020 13:43:13 +0800
Subject: net: sched : Remove unnecessary cast in kfree

Remove unnecassary casts in the argument to kfree.

Signed-off-by: Xu Wang <vulab@iscas.ac.cn>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/em_ipt.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sched/em_ipt.c b/net/sched/em_ipt.c
index eecfe072c508..18755d29fd15 100644
--- a/net/sched/em_ipt.c
+++ b/net/sched/em_ipt.c
@@ -199,7 +199,7 @@ static void em_ipt_destroy(struct tcf_ematch *em)
 		im->match->destroy(&par);
 	}
 	module_put(im->match->me);
-	kfree((void *)im);
+	kfree(im);
 }
 
 static int em_ipt_match(struct sk_buff *skb, struct tcf_ematch *em,
-- 
cgit v1.2.3-59-g8ed1b


From 8ffe2df6426f874659a3aa1654f45ba83fa91f87 Mon Sep 17 00:00:00 2001
From: Zou Wei <zou_wei@huawei.com>
Date: Thu, 23 Apr 2020 15:27:40 +0800
Subject: qed: Make ll2_cbs static
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix the following sparse warning:

drivers/net/ethernet/qlogic/qed/qed_ll2.c:2334:20: warning: symbol 'll2_cbs'
was not declared. Should it be static?

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zou Wei <zou_wei@huawei.com>
Acked-by: Michal Kalderon <michal.kalderon@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_ll2.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_ll2.c b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
index 037e5978787e..4afd8572ada6 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_ll2.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_ll2.c
@@ -2331,7 +2331,7 @@ static void qed_ll2_register_cb_ops(struct qed_dev *cdev,
 	cdev->ll2->cb_cookie = cookie;
 }
 
-struct qed_ll2_cbs ll2_cbs = {
+static struct qed_ll2_cbs ll2_cbs = {
 	.rx_comp_cb = &qed_ll2b_complete_rx_packet,
 	.rx_release_cb = &qed_ll2b_release_rx_packet,
 	.tx_comp_cb = &qed_ll2b_complete_tx_packet,
-- 
cgit v1.2.3-59-g8ed1b


From efcd549da9d7e8194b4d2d2f35eff8ce7b4da684 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 23 Apr 2020 15:10:16 +0100
Subject: net: phy: bcm54140: fix less than zero comparison on an unsigned

Currently the unsigned variable tmp is being checked for an negative
error return from the call to bcm_phy_read_rdb and this can never
be true since tmp is unsigned.  Fix this by making tmp a plain int.

Addresses-Coverity: ("Unsigned compared against 0")
Fixes: 4406d36dfdf1 ("net: phy: bcm54140: add hwmon support")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Michael Walle <michael@walle.cc>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/bcm54140.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/phy/bcm54140.c b/drivers/net/phy/bcm54140.c
index aa854477e06a..7341f0126cc4 100644
--- a/drivers/net/phy/bcm54140.c
+++ b/drivers/net/phy/bcm54140.c
@@ -191,7 +191,8 @@ out:
 static int bcm54140_hwmon_read_temp(struct device *dev, u32 attr, long *val)
 {
 	struct phy_device *phydev = dev_get_drvdata(dev);
-	u16 reg, tmp;
+	u16 reg;
+	int tmp;
 
 	switch (attr) {
 	case hwmon_temp_input:
@@ -224,7 +225,8 @@ static int bcm54140_hwmon_read_in(struct device *dev, u32 attr,
 				  int channel, long *val)
 {
 	struct phy_device *phydev = dev_get_drvdata(dev);
-	u16 bit, reg, tmp;
+	u16 bit, reg;
+	int tmp;
 
 	switch (attr) {
 	case hwmon_in_input:
-- 
cgit v1.2.3-59-g8ed1b


From c7c4c44c9a95d87e50ced38f7480e779cb472174 Mon Sep 17 00:00:00 2001
From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Date: Fri, 24 Apr 2020 08:08:02 +0800
Subject: net: openvswitch: expand the meters supported number

In kernel datapath of Open vSwitch, there are only 1024
buckets of meter in one datapath. If installing more than
1024 (e.g. 8192) meters, it may lead to the performance drop.
But in some case, for example, Open vSwitch used as edge
gateway, there should be 20K at least, where meters used for
IP address bandwidth limitation.

[Open vSwitch userspace datapath has this issue too.]

For more scalable meter, this patch use meter array instead of
hash tables, and expand/shrink the array when necessary. So we
can install more meters than before in the datapath.
Introducing the struct *dp_meter_instance, it's easy to
expand meter though changing the *ti point in the struct
*dp_meter_table.

Cc: Pravin B Shelar <pshelar@ovn.org>
Cc: Andy Zhou <azhou@ovn.org>
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/datapath.h |   2 +-
 net/openvswitch/meter.c    | 240 ++++++++++++++++++++++++++++++++++-----------
 net/openvswitch/meter.h    |  16 ++-
 3 files changed, 195 insertions(+), 63 deletions(-)

diff --git a/net/openvswitch/datapath.h b/net/openvswitch/datapath.h
index e239a46c2f94..2016dd107939 100644
--- a/net/openvswitch/datapath.h
+++ b/net/openvswitch/datapath.h
@@ -82,7 +82,7 @@ struct datapath {
 	u32 max_headroom;
 
 	/* Switch meters. */
-	struct hlist_head *meters;
+	struct dp_meter_table meter_tbl;
 };
 
 /**
diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index 5010d1ddd4bd..f806ded1dd0a 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -19,8 +19,6 @@
 #include "datapath.h"
 #include "meter.h"
 
-#define METER_HASH_BUCKETS 1024
-
 static const struct nla_policy meter_policy[OVS_METER_ATTR_MAX + 1] = {
 	[OVS_METER_ATTR_ID] = { .type = NLA_U32, },
 	[OVS_METER_ATTR_KBPS] = { .type = NLA_FLAG },
@@ -39,6 +37,11 @@ static const struct nla_policy band_policy[OVS_BAND_ATTR_MAX + 1] = {
 	[OVS_BAND_ATTR_STATS] = { .len = sizeof(struct ovs_flow_stats) },
 };
 
+static u32 meter_hash(struct dp_meter_instance *ti, u32 id)
+{
+	return id % ti->n_meters;
+}
+
 static void ovs_meter_free(struct dp_meter *meter)
 {
 	if (!meter)
@@ -47,40 +50,153 @@ static void ovs_meter_free(struct dp_meter *meter)
 	kfree_rcu(meter, rcu);
 }
 
-static struct hlist_head *meter_hash_bucket(const struct datapath *dp,
-					    u32 meter_id)
-{
-	return &dp->meters[meter_id & (METER_HASH_BUCKETS - 1)];
-}
-
 /* Call with ovs_mutex or RCU read lock. */
-static struct dp_meter *lookup_meter(const struct datapath *dp,
+static struct dp_meter *lookup_meter(const struct dp_meter_table *tbl,
 				     u32 meter_id)
 {
+	struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
+	u32 hash = meter_hash(ti, meter_id);
 	struct dp_meter *meter;
-	struct hlist_head *head;
 
-	head = meter_hash_bucket(dp, meter_id);
-	hlist_for_each_entry_rcu(meter, head, dp_hash_node,
-				lockdep_ovsl_is_held()) {
-		if (meter->id == meter_id)
-			return meter;
-	}
+	meter = rcu_dereference_ovsl(ti->dp_meters[hash]);
+	if (meter && likely(meter->id == meter_id))
+		return meter;
+
 	return NULL;
 }
 
-static void attach_meter(struct datapath *dp, struct dp_meter *meter)
+static struct dp_meter_instance *dp_meter_instance_alloc(const u32 size)
+{
+	struct dp_meter_instance *ti;
+
+	ti = kvzalloc(sizeof(*ti) +
+		      sizeof(struct dp_meter *) * size,
+		      GFP_KERNEL);
+	if (!ti)
+		return NULL;
+
+	ti->n_meters = size;
+
+	return ti;
+}
+
+static void dp_meter_instance_free(struct dp_meter_instance *ti)
+{
+	kvfree(ti);
+}
+
+static void dp_meter_instance_free_rcu(struct rcu_head *rcu)
+{
+	struct dp_meter_instance *ti;
+
+	ti = container_of(rcu, struct dp_meter_instance, rcu);
+	kvfree(ti);
+}
+
+static int
+dp_meter_instance_realloc(struct dp_meter_table *tbl, u32 size)
+{
+	struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
+	int n_meters = min(size, ti->n_meters);
+	struct dp_meter_instance *new_ti;
+	int i;
+
+	new_ti = dp_meter_instance_alloc(size);
+	if (!new_ti)
+		return -ENOMEM;
+
+	for (i = 0; i < n_meters; i++)
+		new_ti->dp_meters[i] =
+			rcu_dereference_ovsl(ti->dp_meters[i]);
+
+	rcu_assign_pointer(tbl->ti, new_ti);
+	call_rcu(&ti->rcu, dp_meter_instance_free_rcu);
+
+	return 0;
+}
+
+static void dp_meter_instance_insert(struct dp_meter_instance *ti,
+				     struct dp_meter *meter)
+{
+	u32 hash;
+
+	hash = meter_hash(ti, meter->id);
+	rcu_assign_pointer(ti->dp_meters[hash], meter);
+}
+
+static void dp_meter_instance_remove(struct dp_meter_instance *ti,
+				     struct dp_meter *meter)
 {
-	struct hlist_head *head = meter_hash_bucket(dp, meter->id);
+	u32 hash;
 
-	hlist_add_head_rcu(&meter->dp_hash_node, head);
+	hash = meter_hash(ti, meter->id);
+	RCU_INIT_POINTER(ti->dp_meters[hash], NULL);
 }
 
-static void detach_meter(struct dp_meter *meter)
+static int attach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
 {
+	struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
+	u32 hash = meter_hash(ti, meter->id);
+
+	/* In generally, slots selected should be empty, because
+	 * OvS uses id-pool to fetch a available id.
+	 */
+	if (unlikely(rcu_dereference_ovsl(ti->dp_meters[hash])))
+		return -EBUSY;
+
+	dp_meter_instance_insert(ti, meter);
+
+	/* That function is thread-safe. */
+	if (++tbl->count >= ti->n_meters)
+		if (dp_meter_instance_realloc(tbl, ti->n_meters * 2))
+			goto expand_err;
+
+	return 0;
+
+expand_err:
+	dp_meter_instance_remove(ti, meter);
+	tbl->count--;
+	return -ENOMEM;
+}
+
+static int detach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
+{
+	struct dp_meter_instance *ti;
+
 	ASSERT_OVSL();
-	if (meter)
-		hlist_del_rcu(&meter->dp_hash_node);
+	if (!meter)
+		return 0;
+
+	ti = rcu_dereference_ovsl(tbl->ti);
+	dp_meter_instance_remove(ti, meter);
+
+	tbl->count--;
+
+	/* Shrink the meter array if necessary. */
+	if (ti->n_meters > DP_METER_ARRAY_SIZE_MIN &&
+	    tbl->count <= (ti->n_meters / 4)) {
+		int half_size = ti->n_meters / 2;
+		int i;
+
+		/* Avoid hash collision, don't move slots to other place.
+		 * Make sure there are no references of meters in array
+		 * which will be released.
+		 */
+		for (i = half_size; i < ti->n_meters; i++)
+			if (rcu_dereference_ovsl(ti->dp_meters[i]))
+				goto out;
+
+		if (dp_meter_instance_realloc(tbl, half_size))
+			goto shrink_err;
+	}
+
+out:
+	return 0;
+
+shrink_err:
+	dp_meter_instance_insert(ti, meter);
+	tbl->count++;
+	return -ENOMEM;
 }
 
 static struct sk_buff *
@@ -273,6 +389,7 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
 	struct sk_buff *reply;
 	struct ovs_header *ovs_reply_header;
 	struct ovs_header *ovs_header = info->userhdr;
+	struct dp_meter_table *meter_tbl;
 	struct datapath *dp;
 	int err;
 	u32 meter_id;
@@ -300,12 +417,18 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
 		goto exit_unlock;
 	}
 
+	meter_tbl = &dp->meter_tbl;
 	meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
 
-	/* Cannot fail after this. */
-	old_meter = lookup_meter(dp, meter_id);
-	detach_meter(old_meter);
-	attach_meter(dp, meter);
+	old_meter = lookup_meter(meter_tbl, meter_id);
+	err = detach_meter(meter_tbl, old_meter);
+	if (err)
+		goto exit_unlock;
+
+	err = attach_meter(meter_tbl, meter);
+	if (err)
+		goto exit_unlock;
+
 	ovs_unlock();
 
 	/* Build response with the meter_id and stats from
@@ -337,14 +460,14 @@ exit_free_meter:
 
 static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info)
 {
-	struct nlattr **a = info->attrs;
-	u32 meter_id;
 	struct ovs_header *ovs_header = info->userhdr;
 	struct ovs_header *ovs_reply_header;
+	struct nlattr **a = info->attrs;
+	struct dp_meter *meter;
+	struct sk_buff *reply;
 	struct datapath *dp;
+	u32 meter_id;
 	int err;
-	struct sk_buff *reply;
-	struct dp_meter *meter;
 
 	if (!a[OVS_METER_ATTR_ID])
 		return -EINVAL;
@@ -365,7 +488,7 @@ static int ovs_meter_cmd_get(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	/* Locate meter, copy stats. */
-	meter = lookup_meter(dp, meter_id);
+	meter = lookup_meter(&dp->meter_tbl, meter_id);
 	if (!meter) {
 		err = -ENOENT;
 		goto exit_unlock;
@@ -390,18 +513,17 @@ exit_unlock:
 
 static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info)
 {
-	struct nlattr **a = info->attrs;
-	u32 meter_id;
 	struct ovs_header *ovs_header = info->userhdr;
 	struct ovs_header *ovs_reply_header;
+	struct nlattr **a = info->attrs;
+	struct dp_meter *old_meter;
+	struct sk_buff *reply;
 	struct datapath *dp;
+	u32 meter_id;
 	int err;
-	struct sk_buff *reply;
-	struct dp_meter *old_meter;
 
 	if (!a[OVS_METER_ATTR_ID])
 		return -EINVAL;
-	meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
 
 	reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_DEL,
 					  &ovs_reply_header);
@@ -416,14 +538,19 @@ static int ovs_meter_cmd_del(struct sk_buff *skb, struct genl_info *info)
 		goto exit_unlock;
 	}
 
-	old_meter = lookup_meter(dp, meter_id);
+	meter_id = nla_get_u32(a[OVS_METER_ATTR_ID]);
+	old_meter = lookup_meter(&dp->meter_tbl, meter_id);
 	if (old_meter) {
 		spin_lock_bh(&old_meter->lock);
 		err = ovs_meter_cmd_reply_stats(reply, meter_id, old_meter);
 		WARN_ON(err);
 		spin_unlock_bh(&old_meter->lock);
-		detach_meter(old_meter);
+
+		err = detach_meter(&dp->meter_tbl, old_meter);
+		if (err)
+			goto exit_unlock;
 	}
+
 	ovs_unlock();
 	ovs_meter_free(old_meter);
 	genlmsg_end(reply, ovs_reply_header);
@@ -443,16 +570,16 @@ exit_unlock:
 bool ovs_meter_execute(struct datapath *dp, struct sk_buff *skb,
 		       struct sw_flow_key *key, u32 meter_id)
 {
-	struct dp_meter *meter;
-	struct dp_meter_band *band;
 	long long int now_ms = div_u64(ktime_get_ns(), 1000 * 1000);
 	long long int long_delta_ms;
-	u32 delta_ms;
-	u32 cost;
+	struct dp_meter_band *band;
+	struct dp_meter *meter;
 	int i, band_exceeded_max = -1;
 	u32 band_exceeded_rate = 0;
+	u32 delta_ms;
+	u32 cost;
 
-	meter = lookup_meter(dp, meter_id);
+	meter = lookup_meter(&dp->meter_tbl, meter_id);
 	/* Do not drop the packet when there is no meter. */
 	if (!meter)
 		return false;
@@ -570,32 +697,27 @@ struct genl_family dp_meter_genl_family __ro_after_init = {
 
 int ovs_meters_init(struct datapath *dp)
 {
-	int i;
+	struct dp_meter_table *tbl = &dp->meter_tbl;
+	struct dp_meter_instance *ti;
 
-	dp->meters = kmalloc_array(METER_HASH_BUCKETS,
-				   sizeof(struct hlist_head), GFP_KERNEL);
-
-	if (!dp->meters)
+	ti = dp_meter_instance_alloc(DP_METER_ARRAY_SIZE_MIN);
+	if (!ti)
 		return -ENOMEM;
 
-	for (i = 0; i < METER_HASH_BUCKETS; i++)
-		INIT_HLIST_HEAD(&dp->meters[i]);
+	rcu_assign_pointer(tbl->ti, ti);
+	tbl->count = 0;
 
 	return 0;
 }
 
 void ovs_meters_exit(struct datapath *dp)
 {
+	struct dp_meter_table *tbl = &dp->meter_tbl;
+	struct dp_meter_instance *ti = rcu_dereference_raw(tbl->ti);
 	int i;
 
-	for (i = 0; i < METER_HASH_BUCKETS; i++) {
-		struct hlist_head *head = &dp->meters[i];
-		struct dp_meter *meter;
-		struct hlist_node *n;
-
-		hlist_for_each_entry_safe(meter, n, head, dp_hash_node)
-			kfree(meter);
-	}
+	for (i = 0; i < ti->n_meters; i++)
+		ovs_meter_free(ti->dp_meters[i]);
 
-	kfree(dp->meters);
+	dp_meter_instance_free(ti);
 }
diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h
index f645913870bd..f52052d30a16 100644
--- a/net/openvswitch/meter.h
+++ b/net/openvswitch/meter.h
@@ -13,11 +13,13 @@
 #include <linux/openvswitch.h>
 #include <linux/genetlink.h>
 #include <linux/skbuff.h>
+#include <linux/bits.h>
 
 #include "flow.h"
 struct datapath;
 
 #define DP_MAX_BANDS		1
+#define DP_METER_ARRAY_SIZE_MIN	BIT_ULL(10)
 
 struct dp_meter_band {
 	u32 type;
@@ -30,9 +32,6 @@ struct dp_meter_band {
 struct dp_meter {
 	spinlock_t lock;    /* Per meter lock */
 	struct rcu_head rcu;
-	struct hlist_node dp_hash_node; /*Element in datapath->meters
-					 * hash table.
-					 */
 	u32 id;
 	u16 kbps:1, keep_stats:1;
 	u16 n_bands;
@@ -42,6 +41,17 @@ struct dp_meter {
 	struct dp_meter_band bands[];
 };
 
+struct dp_meter_instance {
+	struct rcu_head rcu;
+	u32 n_meters;
+	struct dp_meter __rcu *dp_meters[];
+};
+
+struct dp_meter_table {
+	struct dp_meter_instance __rcu *ti;
+	u32 count;
+};
+
 extern struct genl_family dp_meter_genl_family;
 int ovs_meters_init(struct datapath *dp);
 void ovs_meters_exit(struct datapath *dp);
-- 
cgit v1.2.3-59-g8ed1b


From eb58eebc7fb5e23c9cc7d557c0a9236630591526 Mon Sep 17 00:00:00 2001
From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Date: Fri, 24 Apr 2020 08:08:03 +0800
Subject: net: openvswitch: set max limitation to meters
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Don't allow user to create meter unlimitedly, which may cause
to consume a large amount of kernel memory. The max number
supported is decided by physical memory and 20K meters as default.

Cc: Pravin B Shelar <pshelar@ovn.org>
Cc: Andy Zhou <azhou@ovn.org>
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/meter.c | 57 ++++++++++++++++++++++++++++++++++++++++---------
 net/openvswitch/meter.h |  2 ++
 2 files changed, 49 insertions(+), 10 deletions(-)

diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index f806ded1dd0a..372f4565872d 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -12,6 +12,7 @@
 #include <linux/openvswitch.h>
 #include <linux/netlink.h>
 #include <linux/rculist.h>
+#include <linux/swap.h>
 
 #include <net/netlink.h>
 #include <net/genetlink.h>
@@ -137,6 +138,7 @@ static int attach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
 {
 	struct dp_meter_instance *ti = rcu_dereference_ovsl(tbl->ti);
 	u32 hash = meter_hash(ti, meter->id);
+	int err;
 
 	/* In generally, slots selected should be empty, because
 	 * OvS uses id-pool to fetch a available id.
@@ -147,16 +149,24 @@ static int attach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
 	dp_meter_instance_insert(ti, meter);
 
 	/* That function is thread-safe. */
-	if (++tbl->count >= ti->n_meters)
-		if (dp_meter_instance_realloc(tbl, ti->n_meters * 2))
-			goto expand_err;
+	tbl->count++;
+	if (tbl->count >= tbl->max_meters_allowed) {
+		err = -EFBIG;
+		goto attach_err;
+	}
+
+	if (tbl->count >= ti->n_meters &&
+	    dp_meter_instance_realloc(tbl, ti->n_meters * 2)) {
+		err = -ENOMEM;
+		goto attach_err;
+	}
 
 	return 0;
 
-expand_err:
+attach_err:
 	dp_meter_instance_remove(ti, meter);
 	tbl->count--;
-	return -ENOMEM;
+	return err;
 }
 
 static int detach_meter(struct dp_meter_table *tbl, struct dp_meter *meter)
@@ -266,18 +276,32 @@ error:
 
 static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info)
 {
-	struct sk_buff *reply;
+	struct ovs_header *ovs_header = info->userhdr;
 	struct ovs_header *ovs_reply_header;
 	struct nlattr *nla, *band_nla;
-	int err;
+	struct sk_buff *reply;
+	struct datapath *dp;
+	int err = -EMSGSIZE;
 
 	reply = ovs_meter_cmd_reply_start(info, OVS_METER_CMD_FEATURES,
 					  &ovs_reply_header);
 	if (IS_ERR(reply))
 		return PTR_ERR(reply);
 
-	if (nla_put_u32(reply, OVS_METER_ATTR_MAX_METERS, U32_MAX) ||
-	    nla_put_u32(reply, OVS_METER_ATTR_MAX_BANDS, DP_MAX_BANDS))
+	ovs_lock();
+	dp = get_dp(sock_net(skb->sk), ovs_header->dp_ifindex);
+	if (!dp) {
+		err = -ENODEV;
+		goto exit_unlock;
+	}
+
+	if (nla_put_u32(reply, OVS_METER_ATTR_MAX_METERS,
+			dp->meter_tbl.max_meters_allowed))
+		goto exit_unlock;
+
+	ovs_unlock();
+
+	if (nla_put_u32(reply, OVS_METER_ATTR_MAX_BANDS, DP_MAX_BANDS))
 		goto nla_put_failure;
 
 	nla = nla_nest_start_noflag(reply, OVS_METER_ATTR_BANDS);
@@ -296,9 +320,10 @@ static int ovs_meter_cmd_features(struct sk_buff *skb, struct genl_info *info)
 	genlmsg_end(reply, ovs_reply_header);
 	return genlmsg_reply(reply, info);
 
+exit_unlock:
+	ovs_unlock();
 nla_put_failure:
 	nlmsg_free(reply);
-	err = -EMSGSIZE;
 	return err;
 }
 
@@ -699,15 +724,27 @@ int ovs_meters_init(struct datapath *dp)
 {
 	struct dp_meter_table *tbl = &dp->meter_tbl;
 	struct dp_meter_instance *ti;
+	unsigned long free_mem_bytes;
 
 	ti = dp_meter_instance_alloc(DP_METER_ARRAY_SIZE_MIN);
 	if (!ti)
 		return -ENOMEM;
 
+	/* Allow meters in a datapath to use ~3.12% of physical memory. */
+	free_mem_bytes = nr_free_buffer_pages() * (PAGE_SIZE >> 5);
+	tbl->max_meters_allowed = min(free_mem_bytes / sizeof(struct dp_meter),
+				      DP_METER_NUM_MAX);
+	if (!tbl->max_meters_allowed)
+		goto out_err;
+
 	rcu_assign_pointer(tbl->ti, ti);
 	tbl->count = 0;
 
 	return 0;
+
+out_err:
+	dp_meter_instance_free(ti);
+	return -ENOMEM;
 }
 
 void ovs_meters_exit(struct datapath *dp)
diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h
index f52052d30a16..61a3ca43cd77 100644
--- a/net/openvswitch/meter.h
+++ b/net/openvswitch/meter.h
@@ -20,6 +20,7 @@ struct datapath;
 
 #define DP_MAX_BANDS		1
 #define DP_METER_ARRAY_SIZE_MIN	BIT_ULL(10)
+#define DP_METER_NUM_MAX	(200000UL)
 
 struct dp_meter_band {
 	u32 type;
@@ -50,6 +51,7 @@ struct dp_meter_instance {
 struct dp_meter_table {
 	struct dp_meter_instance __rcu *ti;
 	u32 count;
+	u32 max_meters_allowed;
 };
 
 extern struct genl_family dp_meter_genl_family;
-- 
cgit v1.2.3-59-g8ed1b


From a8e387384f554ea0b63889a7682ffcddee24df8b Mon Sep 17 00:00:00 2001
From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Date: Fri, 24 Apr 2020 08:08:04 +0800
Subject: net: openvswitch: remove the unnecessary check

Before invoking the ovs_meter_cmd_reply_stats, "meter"
was checked, so don't check it agin in that function.

Cc: Pravin B Shelar <pshelar@ovn.org>
Cc: Andy Zhou <azhou@ovn.org>
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/meter.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index 372f4565872d..b7893b0d6423 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -242,12 +242,11 @@ static int ovs_meter_cmd_reply_stats(struct sk_buff *reply, u32 meter_id,
 	if (nla_put_u32(reply, OVS_METER_ATTR_ID, meter_id))
 		goto error;
 
-	if (!meter)
-		return 0;
-
 	if (nla_put(reply, OVS_METER_ATTR_STATS,
-		    sizeof(struct ovs_flow_stats), &meter->stats) ||
-	    nla_put_u64_64bit(reply, OVS_METER_ATTR_USED, meter->used,
+		    sizeof(struct ovs_flow_stats), &meter->stats))
+		goto error;
+
+	if (nla_put_u64_64bit(reply, OVS_METER_ATTR_USED, meter->used,
 			      OVS_METER_ATTR_PAD))
 		goto error;
 
-- 
cgit v1.2.3-59-g8ed1b


From c77350089052cafa8125169e37463ab7028d6a18 Mon Sep 17 00:00:00 2001
From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Date: Fri, 24 Apr 2020 08:08:05 +0800
Subject: net: openvswitch: make EINVAL return value more obvious

Cc: Pravin B Shelar <pshelar@ovn.org>
Cc: Andy Zhou <azhou@ovn.org>
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/meter.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index b7893b0d6423..e36b464b32a5 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -419,9 +419,8 @@ static int ovs_meter_cmd_set(struct sk_buff *skb, struct genl_info *info)
 	u32 meter_id;
 	bool failed;
 
-	if (!a[OVS_METER_ATTR_ID]) {
-		return -ENODEV;
-	}
+	if (!a[OVS_METER_ATTR_ID])
+		return -EINVAL;
 
 	meter = dp_meter_create(a);
 	if (IS_ERR_OR_NULL(meter))
-- 
cgit v1.2.3-59-g8ed1b


From e57358873bb5d6caa882b9684f59140912b37dde Mon Sep 17 00:00:00 2001
From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Date: Fri, 24 Apr 2020 08:08:06 +0800
Subject: net: openvswitch: use u64 for meter bucket

When setting the meter rate to 4+Gbps, there is an
overflow, the meters don't work as expected.

Cc: Pravin B Shelar <pshelar@ovn.org>
Cc: Andy Zhou <azhou@ovn.org>
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Acked-by: Pravin B Shelar <pshelar@ovn.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/meter.c | 2 +-
 net/openvswitch/meter.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index e36b464b32a5..915f31123f23 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -392,7 +392,7 @@ static struct dp_meter *dp_meter_create(struct nlattr **a)
 		 *
 		 * Start with a full bucket.
 		 */
-		band->bucket = (band->burst_size + band->rate) * 1000;
+		band->bucket = (band->burst_size + band->rate) * 1000ULL;
 		band_max_delta_t = band->bucket / band->rate;
 		if (band_max_delta_t > meter->max_delta_t)
 			meter->max_delta_t = band_max_delta_t;
diff --git a/net/openvswitch/meter.h b/net/openvswitch/meter.h
index 61a3ca43cd77..0c33889a8515 100644
--- a/net/openvswitch/meter.h
+++ b/net/openvswitch/meter.h
@@ -26,7 +26,7 @@ struct dp_meter_band {
 	u32 type;
 	u32 rate;
 	u32 burst_size;
-	u32 bucket; /* 1/1000 packets, or in bits */
+	u64 bucket; /* 1/1000 packets, or in bits */
 	struct ovs_flow_stats stats;
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From 5cc58a9ecfa1bbf5fb587ec65e42f15dd5051238 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 23 Mar 2020 16:24:00 +0100
Subject: mac80211_hwsim: notify wmediumd of used MAC addresses

Currently, wmediumd requires each used MAC address to be configured
as a station in the virtual air, but that doesn't make sense as any
station could have multiple MAC addresses, and even have randomized
ones in scanning, etc.

Add some code here to tell wmediumd of used MAC addresses, binding
them to the hardware address. Combined with a wmediumd patch that
makes it track the addresses this allows configuring just the radio
address (42:00:00:00:nn:00 unless the radio was manually created)
in wmediumd as a station, and all addresses that the station uses
are added/removed dynamically.

Tested with random scan, which without this and the corresponding
wmediumd change doesn't get anything through as the sender doesn't
exist as far as wmediumd is concerned (it's random).

Link: https://lore.kernel.org/r/20200323162358.b397b1a1acef.Ice0536e34e5d96c51f97c374ea8af9551347c7e8@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/mac80211_hwsim.c | 51 +++++++++++++++++++++++++++++++++++
 drivers/net/wireless/mac80211_hwsim.h |  8 ++++++
 2 files changed, 59 insertions(+)

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 7c4b7c31d07a..f1c08b31c564 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -1068,6 +1068,47 @@ static int hwsim_unicast_netgroup(struct mac80211_hwsim_data *data,
 	return res;
 }
 
+static void mac80211_hwsim_config_mac_nl(struct ieee80211_hw *hw,
+					 const u8 *addr, bool add)
+{
+	struct mac80211_hwsim_data *data = hw->priv;
+	u32 _portid = READ_ONCE(data->wmediumd);
+	struct sk_buff *skb;
+	void *msg_head;
+
+	if (!_portid && !hwsim_virtio_enabled)
+		return;
+
+	skb = genlmsg_new(GENLMSG_DEFAULT_SIZE, GFP_ATOMIC);
+	if (!skb)
+		return;
+
+	msg_head = genlmsg_put(skb, 0, 0, &hwsim_genl_family, 0,
+			       add ? HWSIM_CMD_ADD_MAC_ADDR :
+				     HWSIM_CMD_DEL_MAC_ADDR);
+	if (!msg_head) {
+		pr_debug("mac80211_hwsim: problem with msg_head\n");
+		goto nla_put_failure;
+	}
+
+	if (nla_put(skb, HWSIM_ATTR_ADDR_TRANSMITTER,
+		    ETH_ALEN, data->addresses[1].addr))
+		goto nla_put_failure;
+
+	if (nla_put(skb, HWSIM_ATTR_ADDR_RECEIVER, ETH_ALEN, addr))
+		goto nla_put_failure;
+
+	genlmsg_end(skb, msg_head);
+
+	if (hwsim_virtio_enabled)
+		hwsim_tx_virtio(data, skb);
+	else
+		hwsim_unicast_netgroup(data, skb, _portid);
+	return;
+nla_put_failure:
+	nlmsg_free(skb);
+}
+
 static inline u16 trans_tx_rate_flags_ieee2hwsim(struct ieee80211_tx_rate *rate)
 {
 	u16 result = 0;
@@ -1545,6 +1586,9 @@ static int mac80211_hwsim_add_interface(struct ieee80211_hw *hw,
 		  vif->addr);
 	hwsim_set_magic(vif);
 
+	if (vif->type != NL80211_IFTYPE_MONITOR)
+		mac80211_hwsim_config_mac_nl(hw, vif->addr, true);
+
 	vif->cab_queue = 0;
 	vif->hw_queue[IEEE80211_AC_VO] = 0;
 	vif->hw_queue[IEEE80211_AC_VI] = 1;
@@ -1584,6 +1628,8 @@ static void mac80211_hwsim_remove_interface(
 		  vif->addr);
 	hwsim_check_magic(vif);
 	hwsim_clear_magic(vif);
+	if (vif->type != NL80211_IFTYPE_MONITOR)
+		mac80211_hwsim_config_mac_nl(hw, vif->addr, false);
 }
 
 static void mac80211_hwsim_tx_frame(struct ieee80211_hw *hw,
@@ -2104,6 +2150,8 @@ static void hw_scan_work(struct work_struct *work)
 		hwsim->hw_scan_vif = NULL;
 		hwsim->tmp_chan = NULL;
 		mutex_unlock(&hwsim->mutex);
+		mac80211_hwsim_config_mac_nl(hwsim->hw, hwsim->scan_addr,
+					     false);
 		return;
 	}
 
@@ -2177,6 +2225,7 @@ static int mac80211_hwsim_hw_scan(struct ieee80211_hw *hw,
 	memset(hwsim->survey_data, 0, sizeof(hwsim->survey_data));
 	mutex_unlock(&hwsim->mutex);
 
+	mac80211_hwsim_config_mac_nl(hw, hwsim->scan_addr, true);
 	wiphy_dbg(hw->wiphy, "hwsim hw_scan request\n");
 
 	ieee80211_queue_delayed_work(hwsim->hw, &hwsim->hw_scan, 0);
@@ -2220,6 +2269,7 @@ static void mac80211_hwsim_sw_scan(struct ieee80211_hw *hw,
 	pr_debug("hwsim sw_scan request, prepping stuff\n");
 
 	memcpy(hwsim->scan_addr, mac_addr, ETH_ALEN);
+	mac80211_hwsim_config_mac_nl(hw, hwsim->scan_addr, true);
 	hwsim->scanning = true;
 	memset(hwsim->survey_data, 0, sizeof(hwsim->survey_data));
 
@@ -2236,6 +2286,7 @@ static void mac80211_hwsim_sw_scan_complete(struct ieee80211_hw *hw,
 
 	pr_debug("hwsim sw_scan_complete\n");
 	hwsim->scanning = false;
+	mac80211_hwsim_config_mac_nl(hw, hwsim->scan_addr, false);
 	eth_zero_addr(hwsim->scan_addr);
 
 	mutex_unlock(&hwsim->mutex);
diff --git a/drivers/net/wireless/mac80211_hwsim.h b/drivers/net/wireless/mac80211_hwsim.h
index 28ade92adcb4..9dceed77c5d6 100644
--- a/drivers/net/wireless/mac80211_hwsim.h
+++ b/drivers/net/wireless/mac80211_hwsim.h
@@ -75,6 +75,12 @@ enum hwsim_tx_control_flags {
  * @HWSIM_CMD_DEL_RADIO: destroy a radio, reply is multicasted
  * @HWSIM_CMD_GET_RADIO: fetch information about existing radios, uses:
  *	%HWSIM_ATTR_RADIO_ID
+ * @HWSIM_CMD_ADD_MAC_ADDR: add a receive MAC address (given in the
+ *	%HWSIM_ATTR_ADDR_RECEIVER attribute) to a device identified by
+ *	%HWSIM_ATTR_ADDR_TRANSMITTER. This lets wmediumd forward frames
+ *	to this receiver address for a given station.
+ * @HWSIM_CMD_DEL_MAC_ADDR: remove the MAC address again, the attributes
+ *	are the same as to @HWSIM_CMD_ADD_MAC_ADDR.
  * @__HWSIM_CMD_MAX: enum limit
  */
 enum {
@@ -85,6 +91,8 @@ enum {
 	HWSIM_CMD_NEW_RADIO,
 	HWSIM_CMD_DEL_RADIO,
 	HWSIM_CMD_GET_RADIO,
+	HWSIM_CMD_ADD_MAC_ADDR,
+	HWSIM_CMD_DEL_MAC_ADDR,
 	__HWSIM_CMD_MAX,
 };
 #define HWSIM_CMD_MAX (_HWSIM_CMD_MAX - 1)
-- 
cgit v1.2.3-59-g8ed1b


From 1db364c88695272e3410eb4b5d4595c8cb15db30 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 17 Apr 2020 12:38:04 +0200
Subject: mac80211: mlme: remove duplicate AID bookkeeping

Maintain the connection AID only in sdata->vif.bss_conf.aid, not
also in sdata->u.mgd.aid.

Keep setting that where we set ifmgd->aid before, which has the
side effect of exposing the AID to the driver before the station
entry (AP) is marked associated, in case it needs it then.

Requested-by: Felix Fietkau <nbd@nbd.name>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Tested-by: Lorenzo Bianconi <lorenzo@kernel.org>
Link: https://lore.kernel.org/r/20200417123802.085d4a322b0c.I2e7a2ceceea8c6880219f9e9ee4d4ac985fd295a@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/debugfs_netdev.c | 2 +-
 net/mac80211/ieee80211_i.h    | 2 --
 net/mac80211/mlme.c           | 7 +++----
 net/mac80211/tdls.c           | 3 +--
 net/mac80211/tx.c             | 2 +-
 5 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/net/mac80211/debugfs_netdev.c b/net/mac80211/debugfs_netdev.c
index 3dbe7c5cefd1..d7e955127d5c 100644
--- a/net/mac80211/debugfs_netdev.c
+++ b/net/mac80211/debugfs_netdev.c
@@ -236,7 +236,7 @@ IEEE80211_IF_FILE_R(hw_queues);
 
 /* STA attributes */
 IEEE80211_IF_FILE(bssid, u.mgd.bssid, MAC);
-IEEE80211_IF_FILE(aid, u.mgd.aid, DEC);
+IEEE80211_IF_FILE(aid, vif.bss_conf.aid, DEC);
 IEEE80211_IF_FILE(beacon_timeout, u.mgd.beacon_timeout, JIFFIES_TO_MS);
 
 static int ieee80211_set_smps(struct ieee80211_sub_if_data *sdata,
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index f8ed4f621f7f..934a91bef575 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -450,8 +450,6 @@ struct ieee80211_if_managed {
 
 	u8 bssid[ETH_ALEN] __aligned(2);
 
-	u16 aid;
-
 	bool powersave; /* powersave requested for this iface */
 	bool broken_ap; /* AP is broken -- turn off powersave */
 	bool have_beacon;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 16d75da0996a..7139335f29c0 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -3249,7 +3249,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
 		return false;
 	}
 
-	ifmgd->aid = aid;
+	sdata->vif.bss_conf.aid = aid;
 	ifmgd->tdls_chan_switch_prohibited =
 		elems->ext_capab && elems->ext_capab_len >= 5 &&
 		(elems->ext_capab[4] & WLAN_EXT_CAPA5_TDLS_CH_SW_PROHIBITED);
@@ -3521,9 +3521,8 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
 		bss_conf->protected_keep_alive = false;
 	}
 
-	/* set AID and assoc capability,
+	/* set assoc capability (AID was already set earlier),
 	 * ieee80211_set_associated() will tell the driver */
-	bss_conf->aid = aid;
 	bss_conf->assoc_capability = capab_info;
 	ieee80211_set_associated(sdata, cbss, changed);
 
@@ -3948,7 +3947,7 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 					  mgmt->bssid, bssid);
 
 	if (ieee80211_hw_check(&local->hw, PS_NULLFUNC_STACK) &&
-	    ieee80211_check_tim(elems.tim, elems.tim_len, ifmgd->aid)) {
+	    ieee80211_check_tim(elems.tim, elems.tim_len, bss_conf->aid)) {
 		if (local->hw.conf.dynamic_ps_timeout > 0) {
 			if (local->hw.conf.flags & IEEE80211_CONF_PS) {
 				local->hw.conf.flags &= ~IEEE80211_CONF_PS;
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index fca1f5477396..7ff22f9d6e80 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -226,12 +226,11 @@ static void ieee80211_tdls_add_link_ie(struct ieee80211_sub_if_data *sdata,
 static void
 ieee80211_tdls_add_aid(struct ieee80211_sub_if_data *sdata, struct sk_buff *skb)
 {
-	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	u8 *pos = skb_put(skb, 4);
 
 	*pos++ = WLAN_EID_AID;
 	*pos++ = 2; /* len */
-	put_unaligned_le16(ifmgd->aid, pos);
+	put_unaligned_le16(sdata->vif.bss_conf.aid, pos);
 }
 
 /* translate numbering in the WMM parameter IE to the mac80211 notation */
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 82846aca86d9..3dc1990e15c5 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -5006,7 +5006,7 @@ struct sk_buff *ieee80211_pspoll_get(struct ieee80211_hw *hw,
 	pspoll = skb_put_zero(skb, sizeof(*pspoll));
 	pspoll->frame_control = cpu_to_le16(IEEE80211_FTYPE_CTL |
 					    IEEE80211_STYPE_PSPOLL);
-	pspoll->aid = cpu_to_le16(ifmgd->aid);
+	pspoll->aid = cpu_to_le16(sdata->vif.bss_conf.aid);
 
 	/* aid in PS-Poll has its two MSBs each set to 1 */
 	pspoll->aid |= cpu_to_le16(1 << 15 | 1 << 14);
-- 
cgit v1.2.3-59-g8ed1b


From 90e8f58dfc04d1bd48ca155cc55ebf7ba1824864 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 17 Apr 2020 11:18:31 +0200
Subject: mac80211: fix drv_config_iface_filter() behaviour

There are two bugs with this, first, it shouldn't be called
on an interface that's down, and secondly, it should then be
called when the interface comes up.

Note that the currently only user (iwlwifi) doesn't seem to
care about either of these scenarios.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Link: https://lore.kernel.org/r/20200417111830.401d82c7a0bf.I5dc7d718816460c2d8d89c7af6c215f9e2b3078f@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/cfg.c   | 15 +++++++++------
 net/mac80211/iface.c |  5 +++++
 2 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 0f72813fed53..b90f2131ec7a 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -3421,12 +3421,15 @@ static void ieee80211_mgmt_frame_register(struct wiphy *wiphy,
 		if (!local->open_count)
 			break;
 
-		if (sdata->vif.probe_req_reg == 1)
-			drv_config_iface_filter(local, sdata, FIF_PROBE_REQ,
-						FIF_PROBE_REQ);
-		else if (sdata->vif.probe_req_reg == 0)
-			drv_config_iface_filter(local, sdata, 0,
-						FIF_PROBE_REQ);
+		if (ieee80211_sdata_running(sdata)) {
+			if (sdata->vif.probe_req_reg == 1)
+				drv_config_iface_filter(local, sdata,
+							FIF_PROBE_REQ,
+							FIF_PROBE_REQ);
+			else if (sdata->vif.probe_req_reg == 0)
+				drv_config_iface_filter(local, sdata, 0,
+							FIF_PROBE_REQ);
+		}
 
 		ieee80211_configure_filter(local);
 		break;
diff --git a/net/mac80211/iface.c b/net/mac80211/iface.c
index d069825705d6..f900c84fb40f 100644
--- a/net/mac80211/iface.c
+++ b/net/mac80211/iface.c
@@ -644,6 +644,11 @@ int ieee80211_do_open(struct wireless_dev *wdev, bool coming_up)
 			local->fif_probe_req++;
 		}
 
+		if (sdata->vif.probe_req_reg)
+			drv_config_iface_filter(local, sdata,
+						FIF_PROBE_REQ,
+						FIF_PROBE_REQ);
+
 		if (sdata->vif.type != NL80211_IFTYPE_P2P_DEVICE &&
 		    sdata->vif.type != NL80211_IFTYPE_NAN)
 			changed |= ieee80211_reset_erp_info(sdata);
-- 
cgit v1.2.3-59-g8ed1b


From de2cc97acba036847cdfb74e336f6e560eb6907c Mon Sep 17 00:00:00 2001
From: Tova Mussai <tova.mussai@intel.com>
Date: Fri, 17 Apr 2020 13:21:33 +0300
Subject: iwlwifi: scan: remove support for fw scan api v13

The fw already supports scan api v14 and the firmware version that
supports only v13 was not published, so we can remove support for v13 in
the driver.

Signed-off-by: Tova Mussai <tova.mussai@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200417131727.11883315579a.I4f59100e457c1079c5e4c90e4930d1fa62b7ddd7@changeid
---
 drivers/net/wireless/intel/iwlwifi/fw/api/scan.h | 26 -----------------
 drivers/net/wireless/intel/iwlwifi/mvm/scan.c    | 36 ------------------------
 2 files changed, 62 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h b/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h
index 3d770f406c38..5cc33a1b7172 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/scan.h
@@ -1050,20 +1050,6 @@ struct iwl_scan_req_params_v12 {
 	struct iwl_scan_probe_params_v3 probe_params;
 } __packed; /* SCAN_REQUEST_PARAMS_API_S_VER_12 */
 
-/**
- * struct iwl_scan_req_params_v13
- * @general_params: &struct iwl_scan_general_params_v10
- * @channel_params: &struct iwl_scan_channel_params_v4
- * @periodic_params: &struct iwl_scan_periodic_parms_v1
- * @probe_params: &struct iwl_scan_probe_params_v4
- */
-struct iwl_scan_req_params_v13 {
-	struct iwl_scan_general_params_v10 general_params;
-	struct iwl_scan_channel_params_v4 channel_params;
-	struct iwl_scan_periodic_parms_v1 periodic_params;
-	struct iwl_scan_probe_params_v4 probe_params;
-} __packed; /* SCAN_REQUEST_PARAMS_API_S_VER_13 */
-
 /**
  * struct iwl_scan_req_params_v14
  * @general_params: &struct iwl_scan_general_params_v10
@@ -1090,18 +1076,6 @@ struct iwl_scan_req_umac_v12 {
 	struct iwl_scan_req_params_v12 scan_params;
 } __packed; /* SCAN_REQUEST_CMD_UMAC_API_S_VER_12 */
 
-/**
- * struct iwl_scan_req_umac_v13
- * @uid: scan id, &enum iwl_umac_scan_uid_offsets
- * @ooc_priority: out of channel priority - &enum iwl_scan_priority
- * @scan_params: scan parameters
- */
-struct iwl_scan_req_umac_v13 {
-	__le32 uid;
-	__le32 ooc_priority;
-	struct iwl_scan_req_params_v13 scan_params;
-} __packed; /* SCAN_REQUEST_CMD_UMAC_API_S_VER_13 */
-
 /**
  * struct iwl_scan_req_umac_v14
  * @uid: scan id, &enum iwl_umac_scan_uid_offsets
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
index 7a6ad1ff7055..bc48113f0568 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
@@ -2051,40 +2051,6 @@ static int iwl_mvm_scan_umac_v12(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 	return 0;
 }
 
-static int iwl_mvm_scan_umac_v13(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
-				 struct iwl_mvm_scan_params *params, int type,
-				 int uid)
-{
-	struct iwl_scan_req_umac_v13 *cmd = mvm->scan_cmd;
-	struct iwl_scan_req_params_v13 *scan_p = &cmd->scan_params;
-	int ret;
-	u16 gen_flags;
-	u32 bitmap_ssid = 0;
-
-	mvm->scan_uid_status[uid] = type;
-
-	cmd->ooc_priority = cpu_to_le32(iwl_mvm_scan_umac_ooc_priority(params));
-	cmd->uid = cpu_to_le32(uid);
-
-	gen_flags = iwl_mvm_scan_umac_flags_v2(mvm, params, vif, type);
-	iwl_mvm_scan_umac_fill_general_p_v10(mvm, params, vif,
-					     &scan_p->general_params,
-					     gen_flags);
-
-	 ret = iwl_mvm_fill_scan_sched_params(params,
-					      scan_p->periodic_params.schedule,
-					      &scan_p->periodic_params.delay);
-	if (ret)
-		return ret;
-
-	iwl_mvm_scan_umac_fill_probe_p_v4(params, &scan_p->probe_params,
-					  &bitmap_ssid);
-	iwl_mvm_scan_umac_fill_ch_p_v4(mvm, params, vif,
-				       &scan_p->channel_params, bitmap_ssid);
-
-	return 0;
-}
-
 static int iwl_mvm_scan_umac_v14(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 				 struct iwl_mvm_scan_params *params, int type,
 				 int uid)
@@ -2235,7 +2201,6 @@ struct iwl_scan_umac_handler {
 static const struct iwl_scan_umac_handler iwl_scan_umac_handlers[] = {
 	/* set the newest version first to shorten the list traverse time */
 	IWL_SCAN_UMAC_HANDLER(14),
-	IWL_SCAN_UMAC_HANDLER(13),
 	IWL_SCAN_UMAC_HANDLER(12),
 };
 
@@ -2594,7 +2559,6 @@ static int iwl_scan_req_umac_get_size(u8 scan_ver)
 {
 	switch (scan_ver) {
 		IWL_SCAN_REQ_UMAC_HANDLE_SIZE(14);
-		IWL_SCAN_REQ_UMAC_HANDLE_SIZE(13);
 		IWL_SCAN_REQ_UMAC_HANDLE_SIZE(12);
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 4d797fce783a8eb11dd23463828db84743795046 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni@codeaurora.org>
Date: Wed, 1 Apr 2020 17:25:47 +0300
Subject: cfg80211: Unprotected Beacon frame RX indication

Extend cfg80211_rx_unprot_mlme_mgmt() to cover indication of unprotected
Beacon frames in addition to the previously used Deauthentication and
Disassociation frames. The Beacon frame case is quite similar, but has
couple of exceptions: this is used both with fully unprotected and also
incorrectly protected frames and there is a rate limit on the events to
avoid unnecessary flooding netlink events in case something goes wrong.

Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
Link: https://lore.kernel.org/r/20200401142548.6990-1-jouni@codeaurora.org
[add missing kernel-doc]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 12 ++++++++++--
 include/uapi/linux/nl80211.h |  7 +++++++
 net/wireless/nl80211.c       | 13 +++++++++++--
 net/wireless/sme.c           |  2 ++
 4 files changed, 30 insertions(+), 4 deletions(-)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 70e48f66dac8..775952677b3d 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -5045,6 +5045,8 @@ struct cfg80211_cqm_config;
  * @pmsr_list: (private) peer measurement requests
  * @pmsr_lock: (private) peer measurements requests/results lock
  * @pmsr_free_wk: (private) peer measurements cleanup work
+ * @unprot_beacon_reported: (private) timestamp of last
+ *	unprotected beacon report
  */
 struct wireless_dev {
 	struct wiphy *wiphy;
@@ -5121,6 +5123,8 @@ struct wireless_dev {
 	struct list_head pmsr_list;
 	spinlock_t pmsr_lock;
 	struct work_struct pmsr_free_wk;
+
+	unsigned long unprot_beacon_reported;
 };
 
 static inline u8 *wdev_address(struct wireless_dev *wdev)
@@ -6135,12 +6139,16 @@ void cfg80211_tx_mlme_mgmt(struct net_device *dev, const u8 *buf, size_t len);
 /**
  * cfg80211_rx_unprot_mlme_mgmt - notification of unprotected mlme mgmt frame
  * @dev: network device
- * @buf: deauthentication frame (header + body)
+ * @buf: received management frame (header + body)
  * @len: length of the frame data
  *
  * This function is called whenever a received deauthentication or dissassoc
  * frame has been dropped in station mode because of MFP being used but the
- * frame was not protected. This function may sleep.
+ * frame was not protected. This is also used to notify reception of a Beacon
+ * frame that was dropped because it did not include a valid MME MIC while
+ * beacon protection was enabled (BIGTK configured in station mode).
+ *
+ * This function may sleep.
  */
 void cfg80211_rx_unprot_mlme_mgmt(struct net_device *dev,
 				  const u8 *buf, size_t len);
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 2b691161830f..afdd9802ccb8 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1151,6 +1151,11 @@
  * @NL80211_CMD_SET_TID_CONFIG: Data frame TID specific configuration
  *	is passed using %NL80211_ATTR_TID_CONFIG attribute.
  *
+ * @NL80211_CMD_UNPROT_BEACON: Unprotected or incorrectly protected Beacon
+ *	frame. This event is used to indicate that a received Beacon frame was
+ *	dropped because it did not include a valid MME MIC while beacon
+ *	protection was enabled (BIGTK configured in station mode).
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -1377,6 +1382,8 @@ enum nl80211_commands {
 
 	NL80211_CMD_SET_TID_CONFIG,
 
+	NL80211_CMD_UNPROT_BEACON,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 692bcd35f809..2127e5344b1a 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -15542,10 +15542,19 @@ void cfg80211_rx_unprot_mlme_mgmt(struct net_device *dev, const u8 *buf,
 	if (WARN_ON(len < 2))
 		return;
 
-	if (ieee80211_is_deauth(mgmt->frame_control))
+	if (ieee80211_is_deauth(mgmt->frame_control)) {
 		cmd = NL80211_CMD_UNPROT_DEAUTHENTICATE;
-	else
+	} else if (ieee80211_is_disassoc(mgmt->frame_control)) {
 		cmd = NL80211_CMD_UNPROT_DISASSOCIATE;
+	} else if (ieee80211_is_beacon(mgmt->frame_control)) {
+		if (wdev->unprot_beacon_reported &&
+		    elapsed_jiffies_msecs(wdev->unprot_beacon_reported) < 10000)
+			return;
+		cmd = NL80211_CMD_UNPROT_BEACON;
+		wdev->unprot_beacon_reported = jiffies;
+	} else {
+		return;
+	}
 
 	trace_cfg80211_rx_unprot_mlme_mgmt(dev, buf, len);
 	nl80211_send_mlme_event(rdev, dev, buf, len, cmd, GFP_ATOMIC, -1,
diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index ac3e60aa1fc8..3554c0d951f4 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -694,6 +694,7 @@ void __cfg80211_connect_result(struct net_device *dev,
 		return;
 	}
 
+	wdev->unprot_beacon_reported = 0;
 	nl80211_send_connect_result(wiphy_to_rdev(wdev->wiphy), dev, cr,
 				    GFP_KERNEL);
 
@@ -921,6 +922,7 @@ void __cfg80211_roamed(struct wireless_dev *wdev,
 	cfg80211_hold_bss(bss_from_pub(info->bss));
 	wdev->current_bss = bss_from_pub(info->bss);
 
+	wdev->unprot_beacon_reported = 0;
 	nl80211_send_roamed(wiphy_to_rdev(wdev->wiphy),
 			    wdev->netdev, info, GFP_KERNEL);
 
-- 
cgit v1.2.3-59-g8ed1b


From 9eaf183af741e3d8393eb571ac8aec9ee7d6530e Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni@codeaurora.org>
Date: Wed, 1 Apr 2020 17:25:48 +0300
Subject: mac80211: Report beacon protection failures to user space

Report received Beacon frames that do not have a valid MME MIC when
beacon protection is enabled. This covers both the cases of no MME in
the received frame and invalid MIC in the MME.

Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
Link: https://lore.kernel.org/r/20200401142548.6990-2-jouni@codeaurora.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rx.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 91a13aee4378..a724551b8ddf 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -1984,8 +1984,12 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 
 		if (mmie_keyidx < NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS ||
 		    mmie_keyidx >= NUM_DEFAULT_KEYS + NUM_DEFAULT_MGMT_KEYS +
-		    NUM_DEFAULT_BEACON_KEYS)
+		    NUM_DEFAULT_BEACON_KEYS) {
+			cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev,
+						     skb->data,
+						     skb->len);
 			return RX_DROP_MONITOR; /* unexpected BIP keyidx */
+		}
 
 		rx->key = ieee80211_rx_get_bigtk(rx, mmie_keyidx);
 		if (!rx->key)
@@ -2131,6 +2135,10 @@ ieee80211_rx_h_decrypt(struct ieee80211_rx_data *rx)
 	/* either the frame has been decrypted or will be dropped */
 	status->flag |= RX_FLAG_DECRYPTED;
 
+	if (unlikely(ieee80211_is_beacon(fc) && result == RX_DROP_UNUSABLE))
+		cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev,
+					     skb->data, skb->len);
+
 	return result;
 }
 
@@ -2411,8 +2419,12 @@ static int ieee80211_drop_unencrypted_mgmt(struct ieee80211_rx_data *rx)
 			return -EACCES;
 		}
 		if (unlikely(ieee80211_is_beacon(fc) && rx->key &&
-			     ieee80211_get_mmie_keyidx(rx->skb) < 0))
+			     ieee80211_get_mmie_keyidx(rx->skb) < 0)) {
+			cfg80211_rx_unprot_mlme_mgmt(rx->sdata->dev,
+						     rx->skb->data,
+						     rx->skb->len);
 			return -EACCES;
+		}
 		/*
 		 * When using MFP, Action frames are not allowed prior to
 		 * having configured keys.
-- 
cgit v1.2.3-59-g8ed1b


From 6cd536fe62ef58d7c4eac2da07ab0ed7fd19010d Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 17 Apr 2020 12:43:01 +0200
Subject: cfg80211: change internal management frame registration API

Almost all drivers below cfg80211 get the API wrong (except for
cfg80211) and are unable to cope with multiple registrations for
the same frame type, which is valid due to the match filter.
This seems to indicate the API is wrong, and we should maintain
the full information in cfg80211 instead of the drivers.

Change the API to no longer inform the driver about individual
registrations and unregistrations, but rather every time about
the entire state of the entire wiphy and single wdev, whenever
it may have changed. This also simplifies the code in cfg80211
as it no longer has to track exactly what was unregistered and
can free things immediately.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Acked-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Reviewed-by: Sergey Matyukevich <sergey.matyukevich.os@quantenna.com>
Link: https://lore.kernel.org/r/20200417124300.f47f3828afc8.I7f81ef59c2c5a340d7075fb3c6d0e08e8aeffe07@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ath6kl/cfg80211.c         | 26 ++++---
 .../broadcom/brcm80211/brcmfmac/cfg80211.c         | 19 ++---
 drivers/net/wireless/marvell/mwifiex/cfg80211.c    | 16 ++---
 drivers/net/wireless/quantenna/qtnfmac/cfg80211.c  | 83 +++++++++++-----------
 include/net/cfg80211.h                             | 23 ++++--
 include/net/mac80211.h                             |  2 +-
 net/mac80211/cfg.c                                 | 50 ++++++-------
 net/mac80211/ieee80211_i.h                         |  2 +-
 net/wireless/core.c                                |  7 +-
 net/wireless/core.h                                |  6 +-
 net/wireless/mlme.c                                | 72 ++++++++-----------
 net/wireless/rdev-ops.h                            | 11 +--
 net/wireless/trace.h                               | 20 +++---
 13 files changed, 159 insertions(+), 178 deletions(-)

diff --git a/drivers/net/wireless/ath/ath6kl/cfg80211.c b/drivers/net/wireless/ath/ath6kl/cfg80211.c
index 37cf602d8adf..67f8f2aa7a53 100644
--- a/drivers/net/wireless/ath/ath6kl/cfg80211.c
+++ b/drivers/net/wireless/ath/ath6kl/cfg80211.c
@@ -3249,22 +3249,19 @@ static int ath6kl_get_antenna(struct wiphy *wiphy,
 	return 0;
 }
 
-static void ath6kl_mgmt_frame_register(struct wiphy *wiphy,
-				       struct wireless_dev *wdev,
-				       u16 frame_type, bool reg)
+static void ath6kl_update_mgmt_frame_registrations(struct wiphy *wiphy,
+						   struct wireless_dev *wdev,
+						   struct mgmt_frame_regs *upd)
 {
 	struct ath6kl_vif *vif = ath6kl_vif_from_wdev(wdev);
 
-	ath6kl_dbg(ATH6KL_DBG_WLAN_CFG, "%s: frame_type=0x%x reg=%d\n",
-		   __func__, frame_type, reg);
-	if (frame_type == IEEE80211_STYPE_PROBE_REQ) {
-		/*
-		 * Note: This notification callback is not allowed to sleep, so
-		 * we cannot send WMI_PROBE_REQ_REPORT_CMD here. Instead, we
-		 * hardcode target to report Probe Request frames all the time.
-		 */
-		vif->probe_req_report = reg;
-	}
+	/*
+	 * FIXME: send WMI_PROBE_REQ_REPORT_CMD here instead of hardcoding
+	 *	  the reporting in the target all the time, this callback
+	 *	  *is* allowed to sleep after all.
+	 */
+	vif->probe_req_report =
+		upd->interface_stypes & BIT(IEEE80211_STYPE_PROBE_REQ >> 4);
 }
 
 static int ath6kl_cfg80211_sscan_start(struct wiphy *wiphy,
@@ -3464,7 +3461,8 @@ static struct cfg80211_ops ath6kl_cfg80211_ops = {
 	.remain_on_channel = ath6kl_remain_on_channel,
 	.cancel_remain_on_channel = ath6kl_cancel_remain_on_channel,
 	.mgmt_tx = ath6kl_mgmt_tx,
-	.mgmt_frame_register = ath6kl_mgmt_frame_register,
+	.update_mgmt_frame_registrations =
+		ath6kl_update_mgmt_frame_registrations,
 	.get_antenna = ath6kl_get_antenna,
 	.sched_scan_start = ath6kl_cfg80211_sscan_start,
 	.sched_scan_stop = ath6kl_cfg80211_sscan_stop,
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
index 2ba165330038..fa846471dac2 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
@@ -4979,21 +4979,15 @@ brcmf_cfg80211_change_station(struct wiphy *wiphy, struct net_device *ndev,
 }
 
 static void
-brcmf_cfg80211_mgmt_frame_register(struct wiphy *wiphy,
-				   struct wireless_dev *wdev,
-				   u16 frame_type, bool reg)
+brcmf_cfg80211_update_mgmt_frame_registrations(struct wiphy *wiphy,
+					       struct wireless_dev *wdev,
+					       struct mgmt_frame_regs *upd)
 {
 	struct brcmf_cfg80211_vif *vif;
-	u16 mgmt_type;
 
-	brcmf_dbg(TRACE, "Enter, frame_type %04x, reg=%d\n", frame_type, reg);
-
-	mgmt_type = (frame_type & IEEE80211_FCTL_STYPE) >> 4;
 	vif = container_of(wdev, struct brcmf_cfg80211_vif, wdev);
-	if (reg)
-		vif->mgmt_rx_reg |= BIT(mgmt_type);
-	else
-		vif->mgmt_rx_reg &= ~BIT(mgmt_type);
+
+	vif->mgmt_rx_reg = upd->interface_stypes;
 }
 
 
@@ -5408,7 +5402,8 @@ static struct cfg80211_ops brcmf_cfg80211_ops = {
 	.change_station = brcmf_cfg80211_change_station,
 	.sched_scan_start = brcmf_cfg80211_sched_scan_start,
 	.sched_scan_stop = brcmf_cfg80211_sched_scan_stop,
-	.mgmt_frame_register = brcmf_cfg80211_mgmt_frame_register,
+	.update_mgmt_frame_registrations =
+		brcmf_cfg80211_update_mgmt_frame_registrations,
 	.mgmt_tx = brcmf_cfg80211_mgmt_tx,
 	.remain_on_channel = brcmf_p2p_remain_on_channel,
 	.cancel_remain_on_channel = brcmf_cfg80211_cancel_remain_on_channel,
diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c
index 1566d2197906..21a17d4017c4 100644
--- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c
+++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c
@@ -269,17 +269,12 @@ mwifiex_cfg80211_mgmt_tx(struct wiphy *wiphy, struct wireless_dev *wdev,
  * CFG802.11 operation handler to register a mgmt frame.
  */
 static void
-mwifiex_cfg80211_mgmt_frame_register(struct wiphy *wiphy,
-				     struct wireless_dev *wdev,
-				     u16 frame_type, bool reg)
+mwifiex_cfg80211_update_mgmt_frame_registrations(struct wiphy *wiphy,
+						 struct wireless_dev *wdev,
+						 struct mgmt_frame_regs *upd)
 {
 	struct mwifiex_private *priv = mwifiex_netdev_get_priv(wdev->netdev);
-	u32 mask;
-
-	if (reg)
-		mask = priv->mgmt_frame_mask | BIT(frame_type >> 4);
-	else
-		mask = priv->mgmt_frame_mask & ~BIT(frame_type >> 4);
+	u32 mask = upd->interface_stypes;
 
 	if (mask != priv->mgmt_frame_mask) {
 		priv->mgmt_frame_mask = mask;
@@ -4189,7 +4184,8 @@ static struct cfg80211_ops mwifiex_cfg80211_ops = {
 	.del_key = mwifiex_cfg80211_del_key,
 	.set_default_mgmt_key = mwifiex_cfg80211_set_default_mgmt_key,
 	.mgmt_tx = mwifiex_cfg80211_mgmt_tx,
-	.mgmt_frame_register = mwifiex_cfg80211_mgmt_frame_register,
+	.update_mgmt_frame_registrations =
+		mwifiex_cfg80211_update_mgmt_frame_registrations,
 	.remain_on_channel = mwifiex_cfg80211_remain_on_channel,
 	.cancel_remain_on_channel = mwifiex_cfg80211_cancel_remain_on_channel,
 	.set_default_key = mwifiex_cfg80211_set_default_key,
diff --git a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c
index 8be17106008d..54cdf3ad09d7 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c
+++ b/drivers/net/wireless/quantenna/qtnfmac/cfg80211.c
@@ -389,55 +389,57 @@ static int qtnf_set_wiphy_params(struct wiphy *wiphy, u32 changed)
 }
 
 static void
-qtnf_mgmt_frame_register(struct wiphy *wiphy, struct wireless_dev *wdev,
-			 u16 frame_type, bool reg)
+qtnf_update_mgmt_frame_registrations(struct wiphy *wiphy,
+				     struct wireless_dev *wdev,
+				     struct mgmt_frame_regs *upd)
 {
 	struct qtnf_vif *vif = qtnf_netdev_get_priv(wdev->netdev);
-	u16 mgmt_type;
-	u16 new_mask;
-	u16 qlink_frame_type = 0;
+	u16 new_mask = upd->interface_stypes;
+	u16 old_mask = vif->mgmt_frames_bitmask;
+	static const struct {
+		u16 mask, qlink_type;
+	} updates[] = {
+		{
+			.mask = BIT(IEEE80211_STYPE_REASSOC_REQ >> 4) |
+				BIT(IEEE80211_STYPE_ASSOC_REQ >> 4),
+			.qlink_type = QLINK_MGMT_FRAME_ASSOC_REQ,
+		},
+		{
+			.mask = BIT(IEEE80211_STYPE_AUTH >> 4),
+			.qlink_type = QLINK_MGMT_FRAME_AUTH,
+		},
+		{
+			.mask = BIT(IEEE80211_STYPE_PROBE_REQ >> 4),
+			.qlink_type = QLINK_MGMT_FRAME_PROBE_REQ,
+		},
+		{
+			.mask = BIT(IEEE80211_STYPE_ACTION >> 4),
+			.qlink_type = QLINK_MGMT_FRAME_ACTION,
+		},
+	};
+	unsigned int i;
 
-	mgmt_type = (frame_type & IEEE80211_FCTL_STYPE) >> 4;
+	if (new_mask == old_mask)
+		return;
 
-	if (reg)
-		new_mask = vif->mgmt_frames_bitmask | BIT(mgmt_type);
-	else
-		new_mask = vif->mgmt_frames_bitmask & ~BIT(mgmt_type);
+	for (i = 0; i < ARRAY_SIZE(updates); i++) {
+		u16 mask = updates[i].mask;
+		u16 qlink_frame_type = updates[i].qlink_type;
+		bool reg;
 
-	if (new_mask == vif->mgmt_frames_bitmask)
-		return;
+		/* the ! are here due to the assoc/reassoc merge */
+		if (!(new_mask & mask) == !(old_mask & mask))
+			continue;
 
-	switch (frame_type & IEEE80211_FCTL_STYPE) {
-	case IEEE80211_STYPE_REASSOC_REQ:
-	case IEEE80211_STYPE_ASSOC_REQ:
-		qlink_frame_type = QLINK_MGMT_FRAME_ASSOC_REQ;
-		break;
-	case IEEE80211_STYPE_AUTH:
-		qlink_frame_type = QLINK_MGMT_FRAME_AUTH;
-		break;
-	case IEEE80211_STYPE_PROBE_REQ:
-		qlink_frame_type = QLINK_MGMT_FRAME_PROBE_REQ;
-		break;
-	case IEEE80211_STYPE_ACTION:
-		qlink_frame_type = QLINK_MGMT_FRAME_ACTION;
-		break;
-	default:
-		pr_warn("VIF%u.%u: unsupported frame type: %X\n",
-			vif->mac->macid, vif->vifid,
-			(frame_type & IEEE80211_FCTL_STYPE) >> 4);
-		return;
-	}
+		reg = new_mask & mask;
 
-	if (qtnf_cmd_send_register_mgmt(vif, qlink_frame_type, reg)) {
-		pr_warn("VIF%u.%u: failed to %sregister mgmt frame type 0x%x\n",
-			vif->mac->macid, vif->vifid, reg ? "" : "un",
-			frame_type);
-		return;
+		if (qtnf_cmd_send_register_mgmt(vif, qlink_frame_type, reg))
+			pr_warn("VIF%u.%u: failed to %sregister qlink frame type 0x%x\n",
+				vif->mac->macid, vif->vifid, reg ? "" : "un",
+				qlink_frame_type);
 	}
 
 	vif->mgmt_frames_bitmask = new_mask;
-	pr_debug("VIF%u.%u: %sregistered mgmt frame type 0x%x\n",
-		 vif->mac->macid, vif->vifid, reg ? "" : "un", frame_type);
 }
 
 static int
@@ -1017,7 +1019,8 @@ static struct cfg80211_ops qtn_cfg80211_ops = {
 	.change_beacon		= qtnf_change_beacon,
 	.stop_ap		= qtnf_stop_ap,
 	.set_wiphy_params	= qtnf_set_wiphy_params,
-	.mgmt_frame_register	= qtnf_mgmt_frame_register,
+	.update_mgmt_frame_registrations =
+		qtnf_update_mgmt_frame_registrations,
 	.mgmt_tx		= qtnf_mgmt_tx,
 	.change_station		= qtnf_change_station,
 	.del_station		= qtnf_del_station,
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 775952677b3d..bc273f6d60f2 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -3384,6 +3384,17 @@ struct cfg80211_update_owe_info {
 	size_t ie_len;
 };
 
+/**
+ * struct mgmt_frame_regs - management frame registrations data
+ * @global_stypes: bitmap of management frame subtypes registered
+ *	for the entire device
+ * @interface_stypes: bitmap of management frame subtypes registered
+ *	for the given interface
+ */
+struct mgmt_frame_regs {
+	u32 global_stypes, interface_stypes;
+};
+
 /**
  * struct cfg80211_ops - backend description for wireless configuration
  *
@@ -3608,8 +3619,8 @@ struct cfg80211_update_owe_info {
  *	The driver should not call cfg80211_sched_scan_stopped() for a requested
  *	stop (when this method returns 0).
  *
- * @mgmt_frame_register: Notify driver that a management frame type was
- *	registered. The callback is allowed to sleep.
+ * @update_mgmt_frame_registrations: Notify the driver that management frame
+ *	registrations were updated. The callback is allowed to sleep.
  *
  * @set_antenna: Set antenna configuration (tx_ant, rx_ant) on the device.
  *	Parameters are bitmaps of allowed antennas to use for TX/RX. Drivers may
@@ -3932,9 +3943,9 @@ struct cfg80211_ops {
 				      struct net_device *dev,
 				      u32 rate, u32 pkts, u32 intvl);
 
-	void	(*mgmt_frame_register)(struct wiphy *wiphy,
-				       struct wireless_dev *wdev,
-				       u16 frame_type, bool reg);
+	void	(*update_mgmt_frame_registrations)(struct wiphy *wiphy,
+						   struct wireless_dev *wdev,
+						   struct mgmt_frame_regs *upd);
 
 	int	(*set_antenna)(struct wiphy *wiphy, u32 tx_ant, u32 rx_ant);
 	int	(*get_antenna)(struct wiphy *wiphy, u32 *tx_ant, u32 *rx_ant);
@@ -5015,6 +5026,7 @@ struct cfg80211_cqm_config;
  *	by cfg80211 on change_interface
  * @mgmt_registrations: list of registrations for management frames
  * @mgmt_registrations_lock: lock for the list
+ * @mgmt_registrations_update_wk: update work to defer from atomic context
  * @mtx: mutex used to lock data in this struct, may be used by drivers
  *	and some API functions require it held
  * @beacon_interval: beacon interval used on this device for transmitting
@@ -5060,6 +5072,7 @@ struct wireless_dev {
 
 	struct list_head mgmt_registrations;
 	spinlock_t mgmt_registrations_lock;
+	struct work_struct mgmt_registrations_update_wk;
 
 	struct mutex mtx;
 
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index b6b4de0e4b5e..f6dc5a38720f 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1647,7 +1647,7 @@ struct ieee80211_vif {
 	struct dentry *debugfs_dir;
 #endif
 
-	unsigned int probe_req_reg;
+	bool probe_req_reg;
 
 	bool txqs_stopped[IEEE80211_NUM_ACS];
 
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index b90f2131ec7a..e62b4764e82e 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -3398,44 +3398,35 @@ int ieee80211_attach_ack_skb(struct ieee80211_local *local, struct sk_buff *skb,
 	return 0;
 }
 
-static void ieee80211_mgmt_frame_register(struct wiphy *wiphy,
+static void
+ieee80211_update_mgmt_frame_registrations(struct wiphy *wiphy,
 					  struct wireless_dev *wdev,
-					  u16 frame_type, bool reg)
+					  struct mgmt_frame_regs *upd)
 {
 	struct ieee80211_local *local = wiphy_priv(wiphy);
 	struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
+	u32 preq_mask = BIT(IEEE80211_STYPE_PROBE_REQ >> 4);
+	bool global_change, intf_change;
 
-	switch (frame_type) {
-	case IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_PROBE_REQ:
-		if (reg) {
-			local->probe_req_reg++;
-			sdata->vif.probe_req_reg++;
-		} else {
-			if (local->probe_req_reg)
-				local->probe_req_reg--;
+	global_change =
+		local->probe_req_reg != !!(upd->global_stypes & preq_mask);
+	local->probe_req_reg = upd->global_stypes & preq_mask;
 
-			if (sdata->vif.probe_req_reg)
-				sdata->vif.probe_req_reg--;
-		}
+	intf_change = sdata->vif.probe_req_reg !=
+				!!(upd->interface_stypes & preq_mask);
+	sdata->vif.probe_req_reg = upd->interface_stypes & preq_mask;
 
-		if (!local->open_count)
-			break;
+	if (!local->open_count)
+		return;
 
-		if (ieee80211_sdata_running(sdata)) {
-			if (sdata->vif.probe_req_reg == 1)
-				drv_config_iface_filter(local, sdata,
-							FIF_PROBE_REQ,
-							FIF_PROBE_REQ);
-			else if (sdata->vif.probe_req_reg == 0)
-				drv_config_iface_filter(local, sdata, 0,
-							FIF_PROBE_REQ);
-		}
+	if (intf_change && ieee80211_sdata_running(sdata))
+		drv_config_iface_filter(local, sdata,
+					sdata->vif.probe_req_reg ?
+						FIF_PROBE_REQ : 0,
+					FIF_PROBE_REQ);
 
+	if (global_change)
 		ieee80211_configure_filter(local);
-		break;
-	default:
-		break;
-	}
 }
 
 static int ieee80211_set_antenna(struct wiphy *wiphy, u32 tx_ant, u32 rx_ant)
@@ -4020,7 +4011,8 @@ const struct cfg80211_ops mac80211_config_ops = {
 	.mgmt_tx_cancel_wait = ieee80211_mgmt_tx_cancel_wait,
 	.set_cqm_rssi_config = ieee80211_set_cqm_rssi_config,
 	.set_cqm_rssi_range_config = ieee80211_set_cqm_rssi_range_config,
-	.mgmt_frame_register = ieee80211_mgmt_frame_register,
+	.update_mgmt_frame_registrations =
+		ieee80211_update_mgmt_frame_registrations,
 	.set_antenna = ieee80211_set_antenna,
 	.get_antenna = ieee80211_get_antenna,
 	.set_rekey_data = ieee80211_set_rekey_data,
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 934a91bef575..da41ee996d3d 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1167,7 +1167,7 @@ struct ieee80211_local {
 	/* number of interfaces with corresponding FIF_ flags */
 	int fif_fcsfail, fif_plcpfail, fif_control, fif_other_bss, fif_pspoll,
 	    fif_probe_req;
-	int probe_req_reg;
+	bool probe_req_reg;
 	unsigned int filter_flags; /* FIF_* */
 
 	bool wiphy_ciphers_allocated;
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 341402b4f178..5757dea2aa94 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -480,9 +480,6 @@ use_default_name:
 	INIT_LIST_HEAD(&rdev->bss_list);
 	INIT_LIST_HEAD(&rdev->sched_scan_req_list);
 	INIT_WORK(&rdev->scan_done_wk, __cfg80211_scan_done);
-	INIT_LIST_HEAD(&rdev->mlme_unreg);
-	spin_lock_init(&rdev->mlme_unreg_lock);
-	INIT_WORK(&rdev->mlme_unreg_wk, cfg80211_mlme_unreg_wk);
 	INIT_DELAYED_WORK(&rdev->dfs_update_channels_wk,
 			  cfg80211_dfs_channels_update_work);
 #ifdef CONFIG_CFG80211_WEXT
@@ -1030,7 +1027,6 @@ void wiphy_unregister(struct wiphy *wiphy)
 	cancel_delayed_work_sync(&rdev->dfs_update_channels_wk);
 	flush_work(&rdev->destroy_work);
 	flush_work(&rdev->sched_scan_stop_wk);
-	flush_work(&rdev->mlme_unreg_wk);
 	flush_work(&rdev->propagate_radar_detect_wk);
 	flush_work(&rdev->propagate_cac_done_wk);
 
@@ -1094,6 +1090,7 @@ static void __cfg80211_unregister_wdev(struct wireless_dev *wdev, bool sync)
 	rdev->devlist_generation++;
 
 	cfg80211_mlme_purge_registrations(wdev);
+	flush_work(&wdev->mgmt_registrations_update_wk);
 
 	switch (wdev->iftype) {
 	case NL80211_IFTYPE_P2P_DEVICE:
@@ -1238,6 +1235,8 @@ void cfg80211_init_wdev(struct cfg80211_registered_device *rdev,
 	spin_lock_init(&wdev->event_lock);
 	INIT_LIST_HEAD(&wdev->mgmt_registrations);
 	spin_lock_init(&wdev->mgmt_registrations_lock);
+	INIT_WORK(&wdev->mgmt_registrations_update_wk,
+		  cfg80211_mgmt_registrations_update_wk);
 	INIT_LIST_HEAD(&wdev->pmsr_list);
 	spin_lock_init(&wdev->pmsr_lock);
 	INIT_WORK(&wdev->pmsr_free_wk, cfg80211_pmsr_free_wk);
diff --git a/net/wireless/core.h b/net/wireless/core.h
index bb897a803ffe..30fb2c35ae43 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -60,10 +60,6 @@ struct cfg80211_registered_device {
 	struct list_head beacon_registrations;
 	spinlock_t beacon_registrations_lock;
 
-	struct list_head mlme_unreg;
-	spinlock_t mlme_unreg_lock;
-	struct work_struct mlme_unreg_wk;
-
 	/* protected by RTNL only */
 	int num_running_ifaces;
 	int num_running_monitor_ifaces;
@@ -386,7 +382,7 @@ void cfg80211_mlme_down(struct cfg80211_registered_device *rdev,
 int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid,
 				u16 frame_type, const u8 *match_data,
 				int match_len, struct netlink_ext_ack *extack);
-void cfg80211_mlme_unreg_wk(struct work_struct *wk);
+void cfg80211_mgmt_registrations_update_wk(struct work_struct *wk);
 void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid);
 void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev);
 int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index e4805a3bd310..2e1a21e90b83 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -429,43 +429,37 @@ struct cfg80211_mgmt_registration {
 	u8 match[];
 };
 
-static void
-cfg80211_process_mlme_unregistrations(struct cfg80211_registered_device *rdev)
+static void cfg80211_mgmt_registrations_update(struct wireless_dev *wdev)
 {
+	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+	struct wireless_dev *tmp;
 	struct cfg80211_mgmt_registration *reg;
+	struct mgmt_frame_regs upd = {};
 
 	ASSERT_RTNL();
 
-	spin_lock_bh(&rdev->mlme_unreg_lock);
-	while ((reg = list_first_entry_or_null(&rdev->mlme_unreg,
-					       struct cfg80211_mgmt_registration,
-					       list))) {
-		list_del(&reg->list);
-		spin_unlock_bh(&rdev->mlme_unreg_lock);
-
-		if (rdev->ops->mgmt_frame_register) {
-			u16 frame_type = le16_to_cpu(reg->frame_type);
+	rcu_read_lock();
+	list_for_each_entry_rcu(tmp, &rdev->wiphy.wdev_list, list) {
+		list_for_each_entry_rcu(reg, &tmp->mgmt_registrations, list) {
+			u32 mask = BIT(le16_to_cpu(reg->frame_type) >> 4);
 
-			rdev_mgmt_frame_register(rdev, reg->wdev,
-						 frame_type, false);
+			upd.global_stypes |= mask;
+			if (tmp == wdev)
+				upd.interface_stypes |= mask;
 		}
-
-		kfree(reg);
-
-		spin_lock_bh(&rdev->mlme_unreg_lock);
 	}
-	spin_unlock_bh(&rdev->mlme_unreg_lock);
+	rcu_read_unlock();
+
+	rdev_update_mgmt_frame_registrations(rdev, wdev, &upd);
 }
 
-void cfg80211_mlme_unreg_wk(struct work_struct *wk)
+void cfg80211_mgmt_registrations_update_wk(struct work_struct *wk)
 {
-	struct cfg80211_registered_device *rdev;
-
-	rdev = container_of(wk, struct cfg80211_registered_device,
-			    mlme_unreg_wk);
+	struct wireless_dev *wdev = container_of(wk, struct wireless_dev,
+						 mgmt_registrations_update_wk);
 
 	rtnl_lock();
-	cfg80211_process_mlme_unregistrations(rdev);
+	cfg80211_mgmt_registrations_update(wdev);
 	rtnl_unlock();
 }
 
@@ -473,8 +467,6 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid,
 				u16 frame_type, const u8 *match_data,
 				int match_len, struct netlink_ext_ack *extack)
 {
-	struct wiphy *wiphy = wdev->wiphy;
-	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 	struct cfg80211_mgmt_registration *reg, *nreg;
 	int err = 0;
 	u16 mgmt_type;
@@ -534,10 +526,8 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid,
 		}
 	}
 
-	if (err) {
-		kfree(nreg);
+	if (err)
 		goto out;
-	}
 
 	memcpy(nreg->match, match_data, match_len);
 	nreg->match_len = match_len;
@@ -547,15 +537,12 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid,
 	list_add(&nreg->list, &wdev->mgmt_registrations);
 	spin_unlock_bh(&wdev->mgmt_registrations_lock);
 
-	/* process all unregistrations to avoid driver confusion */
-	cfg80211_process_mlme_unregistrations(rdev);
-
-	if (rdev->ops->mgmt_frame_register)
-		rdev_mgmt_frame_register(rdev, wdev, frame_type, true);
+	cfg80211_mgmt_registrations_update(wdev);
 
 	return 0;
 
  out:
+	kfree(nreg);
 	spin_unlock_bh(&wdev->mgmt_registrations_lock);
 
 	return err;
@@ -574,11 +561,9 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid)
 			continue;
 
 		list_del(&reg->list);
-		spin_lock(&rdev->mlme_unreg_lock);
-		list_add_tail(&reg->list, &rdev->mlme_unreg);
-		spin_unlock(&rdev->mlme_unreg_lock);
+		kfree(reg);
 
-		schedule_work(&rdev->mlme_unreg_wk);
+		schedule_work(&wdev->mgmt_registrations_update_wk);
 	}
 
 	spin_unlock_bh(&wdev->mgmt_registrations_lock);
@@ -594,15 +579,16 @@ void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlportid)
 
 void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev)
 {
-	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wdev->wiphy);
+	struct cfg80211_mgmt_registration *reg, *tmp;
 
 	spin_lock_bh(&wdev->mgmt_registrations_lock);
-	spin_lock(&rdev->mlme_unreg_lock);
-	list_splice_tail_init(&wdev->mgmt_registrations, &rdev->mlme_unreg);
-	spin_unlock(&rdev->mlme_unreg_lock);
+	list_for_each_entry_safe(reg, tmp, &wdev->mgmt_registrations, list) {
+		list_del(&reg->list);
+		kfree(reg);
+	}
 	spin_unlock_bh(&wdev->mgmt_registrations_lock);
 
-	cfg80211_process_mlme_unregistrations(rdev);
+	cfg80211_mgmt_registrations_update(wdev);
 }
 
 int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index 99462f0c4e08..df5142e86c4f 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -819,13 +819,16 @@ rdev_set_cqm_txe_config(struct cfg80211_registered_device *rdev,
 }
 
 static inline void
-rdev_mgmt_frame_register(struct cfg80211_registered_device *rdev,
-			 struct wireless_dev *wdev, u16 frame_type, bool reg)
+rdev_update_mgmt_frame_registrations(struct cfg80211_registered_device *rdev,
+				     struct wireless_dev *wdev,
+				     struct mgmt_frame_regs *upd)
 {
 	might_sleep();
 
-	trace_rdev_mgmt_frame_register(&rdev->wiphy, wdev , frame_type, reg);
-	rdev->ops->mgmt_frame_register(&rdev->wiphy, wdev , frame_type, reg);
+	trace_rdev_update_mgmt_frame_registrations(&rdev->wiphy, wdev, upd);
+	if (rdev->ops->update_mgmt_frame_registrations)
+		rdev->ops->update_mgmt_frame_registrations(&rdev->wiphy, wdev,
+							   upd);
 	trace_rdev_return_void(&rdev->wiphy);
 }
 
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 839df54cee21..ee736620f1e3 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -1582,25 +1582,25 @@ TRACE_EVENT(rdev_set_bitrate_mask,
 		  WIPHY_PR_ARG, NETDEV_PR_ARG, MAC_PR_ARG(peer))
 );
 
-TRACE_EVENT(rdev_mgmt_frame_register,
+TRACE_EVENT(rdev_update_mgmt_frame_registrations,
 	TP_PROTO(struct wiphy *wiphy, struct wireless_dev *wdev,
-		 u16 frame_type, bool reg),
-	TP_ARGS(wiphy, wdev, frame_type, reg),
+		 struct mgmt_frame_regs *upd),
+	TP_ARGS(wiphy, wdev, upd),
 	TP_STRUCT__entry(
 		WIPHY_ENTRY
 		WDEV_ENTRY
-		__field(u16, frame_type)
-		__field(bool, reg)
+		__field(u16, global_stypes)
+		__field(u16, interface_stypes)
 	),
 	TP_fast_assign(
 		WIPHY_ASSIGN;
 		WDEV_ASSIGN;
-		__entry->frame_type = frame_type;
-		__entry->reg = reg;
+		__entry->global_stypes = upd->global_stypes;
+		__entry->interface_stypes = upd->interface_stypes;
 	),
-	TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", frame_type: 0x%.2x, reg: %s ",
-		  WIPHY_PR_ARG, WDEV_PR_ARG, __entry->frame_type,
-		  __entry->reg ? "true" : "false")
+	TP_printk(WIPHY_PR_FMT ", " WDEV_PR_FMT ", global: 0x%.2x, intf: 0x%.2x",
+		  WIPHY_PR_ARG, WDEV_PR_ARG,
+		  __entry->global_stypes, __entry->interface_stypes)
 );
 
 TRACE_EVENT(rdev_return_int_tx_rx,
-- 
cgit v1.2.3-59-g8ed1b


From 9dba48a6ece79da064655736dc7347a5fcadedef Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 17 Apr 2020 12:40:15 +0200
Subject: cfg80211: support multicast RX registration

For DPP, there's a need to receive multicast action frames,
but many drivers need a special filter configuration for this.

Support announcing from userspace in the management registration
that multicast RX is required, with an extended feature flag if
the driver handles this.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Reviewed-by: Sergey Matyukevich <sergey.matyukevich.os@quantenna.com>
Link: https://lore.kernel.org/r/20200417124013.c46238801048.Ib041d437ce0bff28a0c6d5dc915f68f1d8591002@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       |  4 ++++
 include/uapi/linux/nl80211.h | 13 +++++++++++++
 net/wireless/core.h          |  3 ++-
 net/wireless/mlme.c          | 38 ++++++++++++++++++++++++++++++--------
 net/wireless/nl80211.c       | 10 ++++++++++
 5 files changed, 59 insertions(+), 9 deletions(-)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index bc273f6d60f2..dbb9675fe38f 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -3390,9 +3390,13 @@ struct cfg80211_update_owe_info {
  *	for the entire device
  * @interface_stypes: bitmap of management frame subtypes registered
  *	for the given interface
+ * @global_mcast_rx: mcast RX is needed globally for these subtypes
+ * @interface_mcast_stypes: mcast RX is needed on this interface
+ *	for these subtypes
  */
 struct mgmt_frame_regs {
 	u32 global_stypes, interface_stypes;
+	u32 global_mcast_stypes, interface_mcast_stypes;
 };
 
 /**
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index afdd9802ccb8..e0dc89eceab8 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -687,6 +687,10 @@
  *	four bytes for vendor frames including the OUI. The registration
  *	cannot be dropped, but is removed automatically when the netlink
  *	socket is closed. Multiple registrations can be made.
+ *	The %NL80211_ATTR_RECEIVE_MULTICAST flag attribute can be given if
+ *	%NL80211_EXT_FEATURE_MULTICAST_REGISTRATIONS is available, in which
+ *	case the registration can also be modified to include/exclude the
+ *	flag, rather than requiring unregistration to change it.
  * @NL80211_CMD_REGISTER_ACTION: Alias for @NL80211_CMD_REGISTER_FRAME for
  *	backward compatibility
  * @NL80211_CMD_FRAME: Management frame TX request and RX notification. This
@@ -2477,6 +2481,9 @@ enum nl80211_commands {
  *	no roaming occurs between the reauth threshold and PMK expiration,
  *	disassociation is still forced.
  *
+ * @NL80211_ATTR_RECEIVE_MULTICAST: multicast flag for the
+ *	%NL80211_CMD_REGISTER_FRAME command, see the description there.
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -2952,6 +2959,8 @@ enum nl80211_attrs {
 	NL80211_ATTR_PMK_LIFETIME,
 	NL80211_ATTR_PMK_REAUTH_THRESHOLD,
 
+	NL80211_ATTR_RECEIVE_MULTICAST,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
@@ -5691,6 +5700,9 @@ enum nl80211_feature_flags {
  * @NL80211_EXT_FEATURE_DEL_IBSS_STA: The driver supports removing stations
  *      in IBSS mode, essentially by dropping their state.
  *
+ * @NL80211_EXT_FEATURE_MULTICAST_REGISTRATIONS: management frame registrations
+ *	are possible for multicast frames and those will be reported properly.
+ *
  * @NUM_NL80211_EXT_FEATURES: number of extended features.
  * @MAX_NL80211_EXT_FEATURES: highest extended feature index.
  */
@@ -5742,6 +5754,7 @@ enum nl80211_ext_feature_index {
 	NL80211_EXT_FEATURE_CONTROL_PORT_NO_PREAUTH,
 	NL80211_EXT_FEATURE_PROTECTED_TWT,
 	NL80211_EXT_FEATURE_DEL_IBSS_STA,
+	NL80211_EXT_FEATURE_MULTICAST_REGISTRATIONS,
 
 	/* add new features before the definition below */
 	NUM_NL80211_EXT_FEATURES,
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 30fb2c35ae43..639d41896573 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -381,7 +381,8 @@ void cfg80211_mlme_down(struct cfg80211_registered_device *rdev,
 			struct net_device *dev);
 int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_pid,
 				u16 frame_type, const u8 *match_data,
-				int match_len, struct netlink_ext_ack *extack);
+				int match_len, bool multicast_rx,
+				struct netlink_ext_ack *extack);
 void cfg80211_mgmt_registrations_update_wk(struct work_struct *wk);
 void cfg80211_mlme_unregister_socket(struct wireless_dev *wdev, u32 nlpid);
 void cfg80211_mlme_purge_registrations(struct wireless_dev *wdev);
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 2e1a21e90b83..409497a3527d 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -426,6 +426,8 @@ struct cfg80211_mgmt_registration {
 
 	__le16 frame_type;
 
+	bool multicast_rx;
+
 	u8 match[];
 };
 
@@ -442,10 +444,18 @@ static void cfg80211_mgmt_registrations_update(struct wireless_dev *wdev)
 	list_for_each_entry_rcu(tmp, &rdev->wiphy.wdev_list, list) {
 		list_for_each_entry_rcu(reg, &tmp->mgmt_registrations, list) {
 			u32 mask = BIT(le16_to_cpu(reg->frame_type) >> 4);
+			u32 mcast_mask = 0;
+
+			if (reg->multicast_rx)
+				mcast_mask = mask;
 
 			upd.global_stypes |= mask;
-			if (tmp == wdev)
+			upd.global_mcast_stypes |= mcast_mask;
+
+			if (tmp == wdev) {
 				upd.interface_stypes |= mask;
+				upd.interface_mcast_stypes |= mcast_mask;
+			}
 		}
 	}
 	rcu_read_unlock();
@@ -465,11 +475,13 @@ void cfg80211_mgmt_registrations_update_wk(struct work_struct *wk)
 
 int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid,
 				u16 frame_type, const u8 *match_data,
-				int match_len, struct netlink_ext_ack *extack)
+				int match_len, bool multicast_rx,
+				struct netlink_ext_ack *extack)
 {
 	struct cfg80211_mgmt_registration *reg, *nreg;
 	int err = 0;
 	u16 mgmt_type;
+	bool update_multicast = false;
 
 	if (!wdev->wiphy->mgmt_stypes)
 		return -EOPNOTSUPP;
@@ -520,6 +532,11 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid,
 			continue;
 
 		if (memcmp(reg->match, match_data, mlen) == 0) {
+			if (reg->multicast_rx != multicast_rx) {
+				update_multicast = true;
+				reg->multicast_rx = multicast_rx;
+				break;
+			}
 			NL_SET_ERR_MSG(extack, "Match already configured");
 			err = -EALREADY;
 			break;
@@ -529,12 +546,17 @@ int cfg80211_mlme_register_mgmt(struct wireless_dev *wdev, u32 snd_portid,
 	if (err)
 		goto out;
 
-	memcpy(nreg->match, match_data, match_len);
-	nreg->match_len = match_len;
-	nreg->nlportid = snd_portid;
-	nreg->frame_type = cpu_to_le16(frame_type);
-	nreg->wdev = wdev;
-	list_add(&nreg->list, &wdev->mgmt_registrations);
+	if (update_multicast) {
+		kfree(nreg);
+	} else {
+		memcpy(nreg->match, match_data, match_len);
+		nreg->match_len = match_len;
+		nreg->nlportid = snd_portid;
+		nreg->frame_type = cpu_to_le16(frame_type);
+		nreg->wdev = wdev;
+		nreg->multicast_rx = multicast_rx;
+		list_add(&nreg->list, &wdev->mgmt_registrations);
+	}
 	spin_unlock_bh(&wdev->mgmt_registrations_lock);
 
 	cfg80211_mgmt_registrations_update(wdev);
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 2127e5344b1a..73a3e885d4dd 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -661,6 +661,7 @@ const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	[NL80211_ATTR_CONTROL_PORT_NO_PREAUTH] = { .type = NLA_FLAG },
 	[NL80211_ATTR_PMK_LIFETIME] = NLA_POLICY_MIN(NLA_U32, 1),
 	[NL80211_ATTR_PMK_REAUTH_THRESHOLD] = NLA_POLICY_RANGE(NLA_U8, 1, 100),
+	[NL80211_ATTR_RECEIVE_MULTICAST] = { .type = NLA_FLAG },
 };
 
 /* policy for the key attributes */
@@ -10773,9 +10774,18 @@ static int nl80211_register_mgmt(struct sk_buff *skb, struct genl_info *info)
 	if (!rdev->ops->mgmt_tx)
 		return -EOPNOTSUPP;
 
+	if (info->attrs[NL80211_ATTR_RECEIVE_MULTICAST] &&
+	    !wiphy_ext_feature_isset(&rdev->wiphy,
+				     NL80211_EXT_FEATURE_MULTICAST_REGISTRATIONS)) {
+		GENL_SET_ERR_MSG(info,
+				 "multicast RX registrations are not supported");
+		return -EOPNOTSUPP;
+	}
+
 	return cfg80211_mlme_register_mgmt(wdev, info->snd_portid, frame_type,
 					   nla_data(info->attrs[NL80211_ATTR_FRAME_MATCH]),
 					   nla_len(info->attrs[NL80211_ATTR_FRAME_MATCH]),
+					   info->attrs[NL80211_ATTR_RECEIVE_MULTICAST],
 					   info->extack);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 155d7c733807190258639c66b36340948f369349 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Mon, 20 Apr 2020 14:06:00 +0200
Subject: nl80211: allow client-only BIGTK support

The current NL80211_EXT_FEATURE_BEACON_PROTECTION feature flag
requires both AP and client support, add a new one called
NL80211_EXT_FEATURE_BEACON_PROTECTION_CLIENT that enables only
support in client (and P2P-client) modes.

Link: https://lore.kernel.org/r/20200420140559.6ba704053a5a.Ifeb869fb0b48e52fe0cb9c15572b93ac8a924f8d@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h |  3 +++
 net/wireless/nl80211.c       | 19 +++++++++++++++----
 2 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index e0dc89eceab8..9679d561f7d0 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -5690,6 +5690,8 @@ enum nl80211_feature_flags {
  *
  * @NL80211_EXT_FEATURE_BEACON_PROTECTION: The driver supports Beacon protection
  *	and can receive key configuration for BIGTK using key indexes 6 and 7.
+ * @NL80211_EXT_FEATURE_BEACON_PROTECTION_CLIENT: The driver supports Beacon
+ *	protection as a client only and cannot transmit protected beacons.
  *
  * @NL80211_EXT_FEATURE_CONTROL_PORT_NO_PREAUTH: The driver can disable the
  *	forwarding of preauth frames over the control port. They are then
@@ -5755,6 +5757,7 @@ enum nl80211_ext_feature_index {
 	NL80211_EXT_FEATURE_PROTECTED_TWT,
 	NL80211_EXT_FEATURE_DEL_IBSS_STA,
 	NL80211_EXT_FEATURE_MULTICAST_REGISTRATIONS,
+	NL80211_EXT_FEATURE_BEACON_PROTECTION_CLIENT,
 
 	/* add new features before the definition below */
 	NUM_NL80211_EXT_FEATURES,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 73a3e885d4dd..d470d77d2eb6 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -3905,14 +3905,25 @@ static int nl80211_get_key(struct sk_buff *skb, struct genl_info *info)
 	};
 	void *hdr;
 	struct sk_buff *msg;
+	bool bigtk_support = false;
+
+	if (wiphy_ext_feature_isset(&rdev->wiphy,
+				    NL80211_EXT_FEATURE_BEACON_PROTECTION))
+		bigtk_support = true;
+
+	if ((dev->ieee80211_ptr->iftype == NL80211_IFTYPE_STATION ||
+	     dev->ieee80211_ptr->iftype == NL80211_IFTYPE_P2P_CLIENT) &&
+	    wiphy_ext_feature_isset(&rdev->wiphy,
+				    NL80211_EXT_FEATURE_BEACON_PROTECTION_CLIENT))
+		bigtk_support = true;
 
 	if (info->attrs[NL80211_ATTR_KEY_IDX]) {
 		key_idx = nla_get_u8(info->attrs[NL80211_ATTR_KEY_IDX]);
-		if (key_idx > 5 &&
-		    !wiphy_ext_feature_isset(
-			    &rdev->wiphy,
-			    NL80211_EXT_FEATURE_BEACON_PROTECTION))
+
+		if (key_idx >= 6 && key_idx <= 7 && !bigtk_support) {
+			GENL_SET_ERR_MSG(info, "BIGTK not supported");
 			return -EINVAL;
+		}
 	}
 
 	if (info->attrs[NL80211_ATTR_MAC])
-- 
cgit v1.2.3-59-g8ed1b


From 873b1cf61105a67f01f6fc3758405edb1bd1ba35 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni@codeaurora.org>
Date: Tue, 21 Apr 2020 17:48:15 +0300
Subject: mac80211: Process multicast RX registration for Action frames

Convert a user space registration for processing multicast Action frames
(NL80211_CMD_REGISTER_FRAME with NL80211_ATTR_RECEIVE_MULTICAST) to a
new enum ieee80211_filter_flags bit FIF_MCAST_ACTION so that drivers can
update their RX filter parameters appropriately, if needed.

Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
Link: https://lore.kernel.org/r/20200421144815.19175-1-jouni@codeaurora.org
[rename variables to rx_mcast_action_reg indicating action frames only]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h     |  6 ++++++
 net/mac80211/cfg.c         | 14 +++++++++++---
 net/mac80211/ieee80211_i.h |  1 +
 net/mac80211/main.c        |  3 +++
 4 files changed, 21 insertions(+), 3 deletions(-)

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index f6dc5a38720f..f12fe3b0a868 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1620,6 +1620,8 @@ enum ieee80211_vif_flags {
  *	monitor interface (if that is requested.)
  * @probe_req_reg: probe requests should be reported to mac80211 for this
  *	interface.
+ * @rx_mcast_action_reg: multicast Action frames should be reported to mac80211
+ *	for this interface.
  * @drv_priv: data area for driver use, will always be aligned to
  *	sizeof(void \*).
  * @txq: the multicast data TX queue (if driver uses the TXQ abstraction)
@@ -1648,6 +1650,7 @@ struct ieee80211_vif {
 #endif
 
 	bool probe_req_reg;
+	bool rx_mcast_action_reg;
 
 	bool txqs_stopped[IEEE80211_NUM_ACS];
 
@@ -3091,6 +3094,8 @@ void ieee80211_free_txskb(struct ieee80211_hw *hw, struct sk_buff *skb);
  * @FIF_PSPOLL: pass PS Poll frames
  *
  * @FIF_PROBE_REQ: pass probe request frames
+ *
+ * @FIF_MCAST_ACTION: pass multicast Action frames
  */
 enum ieee80211_filter_flags {
 	FIF_ALLMULTI		= 1<<1,
@@ -3101,6 +3106,7 @@ enum ieee80211_filter_flags {
 	FIF_OTHER_BSS		= 1<<6,
 	FIF_PSPOLL		= 1<<7,
 	FIF_PROBE_REQ		= 1<<8,
+	FIF_MCAST_ACTION	= 1<<9,
 };
 
 /**
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index e62b4764e82e..f0d43b9cfa43 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -3406,15 +3406,23 @@ ieee80211_update_mgmt_frame_registrations(struct wiphy *wiphy,
 	struct ieee80211_local *local = wiphy_priv(wiphy);
 	struct ieee80211_sub_if_data *sdata = IEEE80211_WDEV_TO_SUB_IF(wdev);
 	u32 preq_mask = BIT(IEEE80211_STYPE_PROBE_REQ >> 4);
+	u32 action_mask = BIT(IEEE80211_STYPE_ACTION >> 4);
 	bool global_change, intf_change;
 
 	global_change =
-		local->probe_req_reg != !!(upd->global_stypes & preq_mask);
+		(local->probe_req_reg != !!(upd->global_stypes & preq_mask)) ||
+		(local->rx_mcast_action_reg !=
+		 !!(upd->global_mcast_stypes & action_mask));
 	local->probe_req_reg = upd->global_stypes & preq_mask;
+	local->rx_mcast_action_reg = upd->global_mcast_stypes & action_mask;
 
-	intf_change = sdata->vif.probe_req_reg !=
-				!!(upd->interface_stypes & preq_mask);
+	intf_change = (sdata->vif.probe_req_reg !=
+		       !!(upd->interface_stypes & preq_mask)) ||
+		(sdata->vif.rx_mcast_action_reg !=
+		 !!(upd->interface_mcast_stypes & action_mask));
 	sdata->vif.probe_req_reg = upd->interface_stypes & preq_mask;
+	sdata->vif.rx_mcast_action_reg =
+		upd->interface_mcast_stypes & action_mask;
 
 	if (!local->open_count)
 		return;
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index da41ee996d3d..9407cf44305c 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1168,6 +1168,7 @@ struct ieee80211_local {
 	int fif_fcsfail, fif_plcpfail, fif_control, fif_other_bss, fif_pspoll,
 	    fif_probe_req;
 	bool probe_req_reg;
+	bool rx_mcast_action_reg;
 	unsigned int filter_flags; /* FIF_* */
 
 	bool wiphy_ciphers_allocated;
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 0e9ad60fb2b3..a0cb052ea30d 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -64,6 +64,9 @@ void ieee80211_configure_filter(struct ieee80211_local *local)
 	if (local->fif_pspoll)
 		new_flags |= FIF_PSPOLL;
 
+	if (local->rx_mcast_action_reg)
+		new_flags |= FIF_MCAST_ACTION;
+
 	spin_lock_bh(&local->filter_lock);
 	changed_flags = local->filter_flags ^ new_flags;
 
-- 
cgit v1.2.3-59-g8ed1b


From 9166cc49767a646990a73380480356416b7794eb Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 26 Mar 2020 15:09:32 +0200
Subject: mac80211: implement Operating Mode Notification extended NSS support

Somehow we missed this for a long time, but similar to the extended
NSS support in VHT capabilities, we need to have this in Operating
Mode notification.

Implement it by
 * parsing the 160/80+80 bit there and setting the bandwidth
   appropriately
 * having callers of ieee80211_get_vht_max_nss() pass in the current
   max NSS value as received in the operating mode notification in
   order to modify it appropriately depending on the extended NSS
   bits.

This updates all drivers that use it, i.e. only iwlwifi/mvm.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200326150855.098483728cfa.I4e8c25d3288441759c2793247197229f0696a37d@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/intel/iwlwifi/mvm/rs.c |  6 +++---
 include/linux/ieee80211.h                   | 12 +++++++++---
 net/mac80211/vht.c                          | 10 ++++++++--
 net/wireless/util.c                         | 26 ++++++++++++++------------
 4 files changed, 34 insertions(+), 20 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c
index c1aba2bf73cf..a8c13f6fbce0 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c
@@ -1,10 +1,9 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /******************************************************************************
  *
- * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2005 - 2014, 2018 - 2020 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
  *
  * Contact Information:
  *  Intel Linux Wireless <linuxwifi@intel.com>
@@ -1430,7 +1429,8 @@ static u32 rs_bw_from_sta_bw(struct ieee80211_sta *sta)
 		 */
 		if (ieee80211_get_vht_max_nss(&vht_cap,
 					      IEEE80211_VHT_CHANWIDTH_160MHZ,
-					      0, true) < sta->rx_nss)
+					      0, true,
+					      sta->rx_nss) < sta->rx_nss)
 			return RATE_MCS_CHAN_WIDTH_80;
 		return RATE_MCS_CHAN_WIDTH_160;
 	case IEEE80211_STA_RX_BW_80:
diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 16268ef1cbcc..c326aec535c6 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -9,7 +9,7 @@
  * Copyright (c) 2006, Michael Wu <flamingice@sourmilk.net>
  * Copyright (c) 2013 - 2014 Intel Mobile Communications GmbH
  * Copyright (c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright (c) 2018 - 2019 Intel Corporation
+ * Copyright (c) 2018 - 2020 Intel Corporation
  */
 
 #ifndef LINUX_IEEE80211_H
@@ -859,6 +859,7 @@ enum ieee80211_ht_chanwidth_values {
  * @IEEE80211_OPMODE_NOTIF_CHANWIDTH_40MHZ: 40 MHz channel width
  * @IEEE80211_OPMODE_NOTIF_CHANWIDTH_80MHZ: 80 MHz channel width
  * @IEEE80211_OPMODE_NOTIF_CHANWIDTH_160MHZ: 160 MHz or 80+80 MHz channel width
+ * @IEEE80211_OPMODE_NOTIF_BW_160_80P80: 160 / 80+80 MHz indicator flag
  * @IEEE80211_OPMODE_NOTIF_RX_NSS_MASK: number of spatial streams mask
  *	(the NSS value is the value of this field + 1)
  * @IEEE80211_OPMODE_NOTIF_RX_NSS_SHIFT: number of spatial streams shift
@@ -866,11 +867,12 @@ enum ieee80211_ht_chanwidth_values {
  *	using a beamforming steering matrix
  */
 enum ieee80211_vht_opmode_bits {
-	IEEE80211_OPMODE_NOTIF_CHANWIDTH_MASK	= 3,
+	IEEE80211_OPMODE_NOTIF_CHANWIDTH_MASK	= 0x03,
 	IEEE80211_OPMODE_NOTIF_CHANWIDTH_20MHZ	= 0,
 	IEEE80211_OPMODE_NOTIF_CHANWIDTH_40MHZ	= 1,
 	IEEE80211_OPMODE_NOTIF_CHANWIDTH_80MHZ	= 2,
 	IEEE80211_OPMODE_NOTIF_CHANWIDTH_160MHZ	= 3,
+	IEEE80211_OPMODE_NOTIF_BW_160_80P80	= 0x04,
 	IEEE80211_OPMODE_NOTIF_RX_NSS_MASK	= 0x70,
 	IEEE80211_OPMODE_NOTIF_RX_NSS_SHIFT	= 4,
 	IEEE80211_OPMODE_NOTIF_RX_NSS_TYPE_BF	= 0x80,
@@ -1731,6 +1733,9 @@ struct ieee80211_mu_edca_param_set {
  * @ext_nss_bw_capable: indicates whether or not the local transmitter
  *	(rate scaling algorithm) can deal with the new logic
  *	(dot11VHTExtendedNSSBWCapable)
+ * @max_vht_nss: current maximum NSS as advertised by the STA in
+ *	operating mode notification, can be 0 in which case the
+ *	capability data will be used to derive this (from MCS support)
  *
  * Due to the VHT Extended NSS Bandwidth Support, the maximum NSS can
  * vary for a given BW/MCS. This function parses the data.
@@ -1739,7 +1744,8 @@ struct ieee80211_mu_edca_param_set {
  */
 int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap,
 			      enum ieee80211_vht_chanwidth bw,
-			      int mcs, bool ext_nss_bw_capable);
+			      int mcs, bool ext_nss_bw_capable,
+			      unsigned int max_vht_nss);
 
 /* 802.11ax HE MAC capabilities */
 #define IEEE80211_HE_MAC_CAP0_HTC_HE				0x01
diff --git a/net/mac80211/vht.c b/net/mac80211/vht.c
index 632f07401850..9c6045f9c24d 100644
--- a/net/mac80211/vht.c
+++ b/net/mac80211/vht.c
@@ -4,7 +4,7 @@
  *
  * Portions of this file
  * Copyright(c) 2015 - 2016 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2019 Intel Corporation
+ * Copyright (C) 2018 - 2020 Intel Corporation
  */
 
 #include <linux/ieee80211.h>
@@ -575,15 +575,21 @@ u32 __ieee80211_vht_handle_opmode(struct ieee80211_sub_if_data *sdata,
 
 	switch (opmode & IEEE80211_OPMODE_NOTIF_CHANWIDTH_MASK) {
 	case IEEE80211_OPMODE_NOTIF_CHANWIDTH_20MHZ:
+		/* ignore IEEE80211_OPMODE_NOTIF_BW_160_80P80 must not be set */
 		sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_20;
 		break;
 	case IEEE80211_OPMODE_NOTIF_CHANWIDTH_40MHZ:
+		/* ignore IEEE80211_OPMODE_NOTIF_BW_160_80P80 must not be set */
 		sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_40;
 		break;
 	case IEEE80211_OPMODE_NOTIF_CHANWIDTH_80MHZ:
-		sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_80;
+		if (opmode & IEEE80211_OPMODE_NOTIF_BW_160_80P80)
+			sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_160;
+		else
+			sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_80;
 		break;
 	case IEEE80211_OPMODE_NOTIF_CHANWIDTH_160MHZ:
+		/* legacy only, no longer used by newer spec */
 		sta->cur_max_bandwidth = IEEE80211_STA_RX_BW_160;
 		break;
 	}
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 6590efbbcbb9..123d6ce79b8e 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -5,7 +5,7 @@
  * Copyright 2007-2009	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright 2017	Intel Deutschland GmbH
- * Copyright (C) 2018-2019 Intel Corporation
+ * Copyright (C) 2018-2020 Intel Corporation
  */
 #include <linux/export.h>
 #include <linux/bitops.h>
@@ -2030,10 +2030,10 @@ EXPORT_SYMBOL(cfg80211_send_layer2_update);
 
 int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap,
 			      enum ieee80211_vht_chanwidth bw,
-			      int mcs, bool ext_nss_bw_capable)
+			      int mcs, bool ext_nss_bw_capable,
+			      unsigned int max_vht_nss)
 {
 	u16 map = le16_to_cpu(cap->supp_mcs.rx_mcs_map);
-	int max_vht_nss = 0;
 	int ext_nss_bw;
 	int supp_width;
 	int i, mcs_encoding;
@@ -2041,7 +2041,7 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap,
 	if (map == 0xffff)
 		return 0;
 
-	if (WARN_ON(mcs > 9))
+	if (WARN_ON(mcs > 9 || max_vht_nss > 8))
 		return 0;
 	if (mcs <= 7)
 		mcs_encoding = 0;
@@ -2050,16 +2050,18 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap,
 	else
 		mcs_encoding = 2;
 
-	/* find max_vht_nss for the given MCS */
-	for (i = 7; i >= 0; i--) {
-		int supp = (map >> (2 * i)) & 3;
+	if (!max_vht_nss) {
+		/* find max_vht_nss for the given MCS */
+		for (i = 7; i >= 0; i--) {
+			int supp = (map >> (2 * i)) & 3;
 
-		if (supp == 3)
-			continue;
+			if (supp == 3)
+				continue;
 
-		if (supp >= mcs_encoding) {
-			max_vht_nss = i + 1;
-			break;
+			if (supp >= mcs_encoding) {
+				max_vht_nss = i + 1;
+				break;
+			}
 		}
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From d46b4ab870fa29445b701e922e9aa36b15f833ea Mon Sep 17 00:00:00 2001
From: Shaul Triebitz <shaul.triebitz@intel.com>
Date: Thu, 26 Mar 2020 15:09:33 +0200
Subject: mac80211: add twt_protected flag to the bss_conf structure

Add a flag to the BSS conf whether the BSS and STA support protected TWT.

Signed-off-by: Shaul Triebitz <shaul.triebitz@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200326150855.1dcb2d16fa74.I74d7c007dad2601d2e39f54612fe6554dd5ab386@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 2 ++
 net/mac80211/mlme.c    | 9 +++++++++
 2 files changed, 11 insertions(+)

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index f12fe3b0a868..5fb80dd8bbbc 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -508,6 +508,7 @@ struct ieee80211_ftm_responder_params {
  *	mode only, set if the AP advertises TWT responder role)
  * @twt_responder: does this BSS support TWT requester (relevant for managed
  *	mode only, set if the AP advertises TWT responder role)
+ * @twt_protected: does this BSS support protected TWT frames
  * @assoc: association status
  * @ibss_joined: indicates whether this station is part of an IBSS
  *	or not
@@ -618,6 +619,7 @@ struct ieee80211_bss_conf {
 	bool he_support;
 	bool twt_requester;
 	bool twt_responder;
+	bool twt_protected;
 	/* association related data */
 	bool assoc, ibss_joined;
 	bool ibss_creator;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 7139335f29c0..b77787995723 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -3384,10 +3384,19 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
 						  sta);
 
 		bss_conf->he_support = sta->sta.he_cap.has_he;
+		if (elems->rsnx && elems->rsnx_len &&
+		    (elems->rsnx[0] & WLAN_RSNX_CAPA_PROTECTED_TWT) &&
+		    wiphy_ext_feature_isset(local->hw.wiphy,
+					    NL80211_EXT_FEATURE_PROTECTED_TWT))
+			bss_conf->twt_protected = true;
+		else
+			bss_conf->twt_protected = false;
+
 		changed |= ieee80211_recalc_twt_req(sdata, sta, elems);
 	} else {
 		bss_conf->he_support = false;
 		bss_conf->twt_requester = false;
+		bss_conf->twt_protected = false;
 	}
 
 	if (bss_conf->he_support) {
-- 
cgit v1.2.3-59-g8ed1b


From a4055e74a2ff7c70ccdb6c36254ad5181464f211 Mon Sep 17 00:00:00 2001
From: Andrei Otcheretianski <andrei.otcheretianski@intel.com>
Date: Thu, 26 Mar 2020 15:09:34 +0200
Subject: mac80211: Don't destroy auth data in case of anti-clogging

SAE AP may reject authentication with WLAN_STATUS_ANTI_CLOG_REQUIRED.
As the user space will immediately continue the authentication flow,
there is no need to destroy the authentication data in this case.
This saves unneeded station removal and releasing the channel.

Signed-off-by: Andrei Otcheretianski <andrei.otcheretianski@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200326150855.7483996157a8.I8040a842874aaf6d209df3fc8a2acb97a0bf508b@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mlme.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index b77787995723..56d61bc9954d 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2948,10 +2948,15 @@ static void ieee80211_rx_mgmt_auth(struct ieee80211_sub_if_data *sdata,
 	}
 
 	if (status_code != WLAN_STATUS_SUCCESS) {
+		cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len);
+
+		if (auth_alg == WLAN_AUTH_SAE &&
+		    status_code == WLAN_STATUS_ANTI_CLOG_REQUIRED)
+			return;
+
 		sdata_info(sdata, "%pM denied authentication (status %d)\n",
 			   mgmt->sa, status_code);
 		ieee80211_destroy_auth_data(sdata, false);
-		cfg80211_rx_mlme_mgmt(sdata->dev, (u8 *)mgmt, len);
 		event.u.mlme.status = MLME_DENIED;
 		event.u.mlme.reason = status_code;
 		drv_event_callback(sdata->local, sdata, &event);
-- 
cgit v1.2.3-59-g8ed1b


From 2a392596d8811c6d58c014ec881b159c75a0cf45 Mon Sep 17 00:00:00 2001
From: Ilan Peer <ilan.peer@intel.com>
Date: Thu, 26 Mar 2020 15:09:35 +0200
Subject: cfg80211: Parse HE membership selector

This extends the support for drivers that rebuilds IEs in the
FW (same as with HT/VHT).

Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200326150855.20feaabfb484.I886252639604c8e3e84b8ef97962f1b0e4beec81@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 1 +
 include/net/cfg80211.h    | 3 ++-
 net/wireless/nl80211.c    | 2 ++
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index c326aec535c6..38f513ce7528 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1067,6 +1067,7 @@ struct ieee80211_mgmt {
 /* Supported rates membership selectors */
 #define BSS_MEMBERSHIP_SELECTOR_HT_PHY	127
 #define BSS_MEMBERSHIP_SELECTOR_VHT_PHY	126
+#define BSS_MEMBERSHIP_SELECTOR_HE_PHY	122
 
 /* mgmt header + 1 byte category code */
 #define IEEE80211_MIN_ACTION_SIZE offsetof(struct ieee80211_mgmt, u.action.u)
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index dbb9675fe38f..e288fdcb3df2 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1054,6 +1054,7 @@ enum cfg80211_ap_settings_flags {
  * @ht_required: stations must support HT
  * @vht_required: stations must support VHT
  * @twt_responder: Enable Target Wait Time
+ * @he_required: stations must support HE
  * @flags: flags, as defined in enum cfg80211_ap_settings_flags
  * @he_obss_pd: OBSS Packet Detection settings
  * @he_bss_color: BSS Color settings
@@ -1083,7 +1084,7 @@ struct cfg80211_ap_settings {
 	const struct ieee80211_vht_cap *vht_cap;
 	const struct ieee80211_he_cap_elem *he_cap;
 	const struct ieee80211_he_operation *he_oper;
-	bool ht_required, vht_required;
+	bool ht_required, vht_required, he_required;
 	bool twt_responder;
 	u32 flags;
 	struct ieee80211_he_obss_pd he_obss_pd;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index d470d77d2eb6..3d27b24c68b2 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -4738,6 +4738,8 @@ static void nl80211_check_ap_rate_selectors(struct cfg80211_ap_settings *params,
 			params->ht_required = true;
 		if (rates[2 + i] == BSS_MEMBERSHIP_SELECTOR_VHT_PHY)
 			params->vht_required = true;
+		if (rates[2 + i] == BSS_MEMBERSHIP_SELECTOR_HE_PHY)
+			params->he_required = true;
 	}
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 4826e721103acf42421304330cf48a642fa163bb Mon Sep 17 00:00:00 2001
From: Ilan Peer <ilan.peer@intel.com>
Date: Thu, 26 Mar 2020 15:09:36 +0200
Subject: mac80211: Skip entries with HE membership selector

When parsing supported rates IE.

Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200326150855.ed3e66f8c197.I93aad0e5ddb7ce79f05f8153922acb9aa5076d38@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mlme.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 56d61bc9954d..c77f47b41356 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -3154,15 +3154,16 @@ static void ieee80211_get_rates(struct ieee80211_supported_band *sband,
 			*have_higher_than_11mbit = true;
 
 		/*
-		 * Skip HT and VHT BSS membership selectors since they're not
-		 * rates.
+		 * Skip HT, VHT and HE BSS membership selectors since they're
+		 * not rates.
 		 *
 		 * Note: Even though the membership selector and the basic
 		 *	 rate flag share the same bit, they are not exactly
 		 *	 the same.
 		 */
 		if (supp_rates[i] == (0x80 | BSS_MEMBERSHIP_SELECTOR_HT_PHY) ||
-		    supp_rates[i] == (0x80 | BSS_MEMBERSHIP_SELECTOR_VHT_PHY))
+		    supp_rates[i] == (0x80 | BSS_MEMBERSHIP_SELECTOR_VHT_PHY) ||
+		    supp_rates[i] == (0x80 | BSS_MEMBERSHIP_SELECTOR_HE_PHY))
 			continue;
 
 		for (j = 0; j < sband->n_bitrates; j++) {
-- 
cgit v1.2.3-59-g8ed1b


From 31d8bb4e07f80935ee9bf599a9d99de7ca90fc5a Mon Sep 17 00:00:00 2001
From: Mordechay Goodstein <mordechay.goodstein@intel.com>
Date: Thu, 26 Mar 2020 15:09:37 +0200
Subject: mac80211: agg-tx: refactor sending addba

We move the actual arming the timer and sending ADDBA to a function
for the use in different places calling the same logic.

Signed-off-by: Mordechay Goodstein <mordechay.goodstein@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200326150855.58a337eb90a1.I75934e6464535fbf43969acc796bc886291e79a5@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/agg-tx.c | 67 ++++++++++++++++++++++++++++++---------------------
 1 file changed, 39 insertions(+), 28 deletions(-)

diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 33da6f738c99..32f40c4f3120 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -9,7 +9,7 @@
  * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
  * Copyright 2007-2010, Intel Corporation
  * Copyright(c) 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2019 Intel Corporation
+ * Copyright (C) 2018 - 2020 Intel Corporation
  */
 
 #include <linux/ieee80211.h>
@@ -448,6 +448,43 @@ static void sta_addba_resp_timer_expired(struct timer_list *t)
 	ieee80211_stop_tx_ba_session(&sta->sta, tid);
 }
 
+static void ieee80211_send_addba_with_timeout(struct sta_info *sta,
+					      struct tid_ampdu_tx *tid_tx)
+{
+	struct ieee80211_sub_if_data *sdata = sta->sdata;
+	struct ieee80211_local *local = sta->local;
+	u8 tid = tid_tx->tid;
+	u16 buf_size;
+
+	/* activate the timer for the recipient's addBA response */
+	mod_timer(&tid_tx->addba_resp_timer, jiffies + ADDBA_RESP_INTERVAL);
+	ht_dbg(sdata, "activated addBA response timer on %pM tid %d\n",
+	       sta->sta.addr, tid);
+
+	spin_lock_bh(&sta->lock);
+	sta->ampdu_mlme.last_addba_req_time[tid] = jiffies;
+	sta->ampdu_mlme.addba_req_num[tid]++;
+	spin_unlock_bh(&sta->lock);
+
+	if (sta->sta.he_cap.has_he) {
+		buf_size = local->hw.max_tx_aggregation_subframes;
+	} else {
+		/*
+		 * We really should use what the driver told us it will
+		 * transmit as the maximum, but certain APs (e.g. the
+		 * LinkSys WRT120N with FW v1.0.07 build 002 Jun 18 2012)
+		 * will crash when we use a lower number.
+		 */
+		buf_size = IEEE80211_MAX_AMPDU_BUF_HT;
+	}
+
+	/* send AddBA request */
+	ieee80211_send_addba_request(sdata, sta->sta.addr, tid,
+				     tid_tx->dialog_token,
+				     sta->tid_seq[tid] >> 4,
+				     buf_size, tid_tx->timeout);
+}
+
 void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
 {
 	struct tid_ampdu_tx *tid_tx;
@@ -462,7 +499,6 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
 		.timeout = 0,
 	};
 	int ret;
-	u16 buf_size;
 
 	tid_tx = rcu_dereference_protected_tid_tx(sta, tid);
 
@@ -508,32 +544,7 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
 		return;
 	}
 
-	/* activate the timer for the recipient's addBA response */
-	mod_timer(&tid_tx->addba_resp_timer, jiffies + ADDBA_RESP_INTERVAL);
-	ht_dbg(sdata, "activated addBA response timer on %pM tid %d\n",
-	       sta->sta.addr, tid);
-
-	spin_lock_bh(&sta->lock);
-	sta->ampdu_mlme.last_addba_req_time[tid] = jiffies;
-	sta->ampdu_mlme.addba_req_num[tid]++;
-	spin_unlock_bh(&sta->lock);
-
-	if (sta->sta.he_cap.has_he) {
-		buf_size = local->hw.max_tx_aggregation_subframes;
-	} else {
-		/*
-		 * We really should use what the driver told us it will
-		 * transmit as the maximum, but certain APs (e.g. the
-		 * LinkSys WRT120N with FW v1.0.07 build 002 Jun 18 2012)
-		 * will crash when we use a lower number.
-		 */
-		buf_size = IEEE80211_MAX_AMPDU_BUF_HT;
-	}
-
-	/* send AddBA request */
-	ieee80211_send_addba_request(sdata, sta->sta.addr, tid,
-				     tid_tx->dialog_token, params.ssn,
-				     buf_size, tid_tx->timeout);
+	ieee80211_send_addba_with_timeout(sta, tid_tx);
 }
 
 /*
-- 
cgit v1.2.3-59-g8ed1b


From 0c197f16f7bc5ddb43073690a80fb15998ad61e4 Mon Sep 17 00:00:00 2001
From: Mordechay Goodstein <mordechay.goodstein@intel.com>
Date: Thu, 26 Mar 2020 15:09:38 +0200
Subject: mac80211: agg-tx: add an option to defer ADDBA transmit

Driver tells mac80211 to sends ADDBA with SSN (starting sequence number)
from the head of the queue, while the transmission of all the frames in the
queue may take a while, which causes the peer to time out. In order to
fix this scenario, add an option to defer ADDBA transmit until queue
is drained.

Signed-off-by: Mordechay Goodstein <mordechay.goodstein@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200326150855.0f27423fec75.If67daab123a27c1cbddef000d6a3f212aa6309ef@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h  |  6 +++++-
 net/mac80211/agg-tx.c   | 12 +++++++++++-
 net/mac80211/sta_info.h |  2 ++
 3 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 5fb80dd8bbbc..f3147633dda2 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -3125,7 +3125,10 @@ enum ieee80211_filter_flags {
  * @IEEE80211_AMPDU_RX_START: start RX aggregation
  * @IEEE80211_AMPDU_RX_STOP: stop RX aggregation
  * @IEEE80211_AMPDU_TX_START: start TX aggregation, the driver must either
- *	call ieee80211_start_tx_ba_cb_irqsafe() or return the special
+ *	call ieee80211_start_tx_ba_cb_irqsafe() or
+ *	call ieee80211_start_tx_ba_cb_irqsafe() with status
+ *	%IEEE80211_AMPDU_TX_START_DELAY_ADDBA to delay addba after
+ *	ieee80211_start_tx_ba_cb_irqsafe is called, or just return the special
  *	status %IEEE80211_AMPDU_TX_START_IMMEDIATE.
  * @IEEE80211_AMPDU_TX_OPERATIONAL: TX aggregation has become operational
  * @IEEE80211_AMPDU_TX_STOP_CONT: stop TX aggregation but continue transmitting
@@ -3151,6 +3154,7 @@ enum ieee80211_ampdu_mlme_action {
 };
 
 #define IEEE80211_AMPDU_TX_START_IMMEDIATE 1
+#define IEEE80211_AMPDU_TX_START_DELAY_ADDBA 2
 
 /**
  * struct ieee80211_ampdu_params - AMPDU action parameters
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index 32f40c4f3120..c2d5f512526d 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -483,6 +483,8 @@ static void ieee80211_send_addba_with_timeout(struct sta_info *sta,
 				     tid_tx->dialog_token,
 				     sta->tid_seq[tid] >> 4,
 				     buf_size, tid_tx->timeout);
+
+	WARN_ON(test_and_set_bit(HT_AGG_STATE_SENT_ADDBA, &tid_tx->state));
 }
 
 void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
@@ -521,7 +523,9 @@ void ieee80211_tx_ba_session_handle_start(struct sta_info *sta, int tid)
 
 	params.ssn = sta->tid_seq[tid] >> 4;
 	ret = drv_ampdu_action(local, sdata, &params);
-	if (ret == IEEE80211_AMPDU_TX_START_IMMEDIATE) {
+	if (ret == IEEE80211_AMPDU_TX_START_DELAY_ADDBA) {
+		return;
+	} else if (ret == IEEE80211_AMPDU_TX_START_IMMEDIATE) {
 		/*
 		 * We didn't send the request yet, so don't need to check
 		 * here if we already got a response, just mark as driver
@@ -765,6 +769,12 @@ void ieee80211_start_tx_ba_cb(struct sta_info *sta, int tid,
 	if (WARN_ON(test_and_set_bit(HT_AGG_STATE_DRV_READY, &tid_tx->state)))
 		return;
 
+	if (!test_bit(HT_AGG_STATE_SENT_ADDBA, &tid_tx->state)) {
+		ieee80211_send_addba_with_timeout(sta, tid_tx);
+		/* RESPONSE_RECEIVED state whould trigger the flow again */
+		return;
+	}
+
 	if (test_bit(HT_AGG_STATE_RESPONSE_RECEIVED, &tid_tx->state))
 		ieee80211_agg_tx_operational(local, sta, tid);
 }
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index 36f1abaab9ff..a5de3aa6ea42 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -3,6 +3,7 @@
  * Copyright 2002-2005, Devicescape Software, Inc.
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright(c) 2015-2017 Intel Deutschland GmbH
+ * Copyright(c) 2020 Intel Corporation
  */
 
 #ifndef STA_INFO_H
@@ -116,6 +117,7 @@ enum ieee80211_sta_info_flags {
 #define HT_AGG_STATE_WANT_STOP		5
 #define HT_AGG_STATE_START_CB		6
 #define HT_AGG_STATE_STOP_CB		7
+#define HT_AGG_STATE_SENT_ADDBA		8
 
 DECLARE_EWMA(avg_signal, 10, 8)
 enum ieee80211_agg_stop_reason {
-- 
cgit v1.2.3-59-g8ed1b


From 302ff8b7a2b01cfb7645f112bb259af1c146c57a Mon Sep 17 00:00:00 2001
From: Ilan Peer <ilan.peer@intel.com>
Date: Thu, 26 Mar 2020 15:09:39 +0200
Subject: mac80211: Fail association when AP has no legacy rates

The MLME logic had a workaround that allowed to continue an
association with an AP even if the AP did not provide any basic
rates in its supported rates in the association response, assuming
that the first (non basic) legacy rate could be used as a basic rate.
However, this did not consider the case where the AP (which is
obviously buggy) did not provide any legacy rate.

Fix this by failing the association, as this can result in
an unexpected failure in the low level driver and FW, e.g., in
rate scale logic etc.

Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200326150855.d70a1450d83f.I6e6ce5efda351a8544c0e7bfeee260fe3360d401@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mlme.c | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index c77f47b41356..59a35c7997c3 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -5036,8 +5036,16 @@ static int ieee80211_prep_connection(struct ieee80211_sub_if_data *sdata,
 		 * doesn't happen any more, but keep the workaround so
 		 * in case some *other* APs are buggy in different ways
 		 * we can connect -- with a warning.
+		 * Allow this workaround only in case the AP provided at least
+		 * one rate.
 		 */
-		if (!basic_rates && min_rate_index >= 0) {
+		if (min_rate_index < 0) {
+			sdata_info(sdata,
+				   "No legacy rates in association response\n");
+
+			sta_info_free(local, new_sta);
+			return -EINVAL;
+		} else if (!basic_rates) {
 			sdata_info(sdata,
 				   "No basic rates, using min rate instead\n");
 			basic_rates = BIT(min_rate_index);
-- 
cgit v1.2.3-59-g8ed1b


From dba25b04c61170cf8592f87df2bb086201047473 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 26 Mar 2020 15:09:40 +0200
Subject: mac80211: minstrel_ht_assign_best_tp_rates: remove redundant test

We know this pointer isn't NULL and in fact dereferenced it before,
remove the redundant test.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200326150855.adf551928846.Iae9015573d6c350cc1b12a311d6d13d086beec6c@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rc80211_minstrel_ht.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/mac80211/rc80211_minstrel_ht.c b/net/mac80211/rc80211_minstrel_ht.c
index 694a31978a04..5547111d22bf 100644
--- a/net/mac80211/rc80211_minstrel_ht.c
+++ b/net/mac80211/rc80211_minstrel_ht.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (C) 2010-2013 Felix Fietkau <nbd@openwrt.org>
+ * Copyright (C) 2019-2020 Intel Corporation
  */
 #include <linux/netdevice.h>
 #include <linux/types.h>
@@ -490,7 +491,7 @@ minstrel_ht_assign_best_tp_rates(struct minstrel_ht_sta *mi,
 	tmp_prob = mi->groups[tmp_group].rates[tmp_idx].prob_avg;
 	tmp_mcs_tp = minstrel_ht_get_tp_avg(mi, tmp_group, tmp_idx, tmp_prob);
 
-	if (tmp_cck_tp_rate && tmp_cck_tp > tmp_mcs_tp) {
+	if (tmp_cck_tp > tmp_mcs_tp) {
 		for(i = 0; i < MAX_THR_RATES; i++) {
 			minstrel_ht_sort_best_tp_rates(mi, tmp_cck_tp_rate[i],
 						       tmp_mcs_tp_rate);
-- 
cgit v1.2.3-59-g8ed1b


From bdee75d2ac23a1db30d3c689665a584c20220f97 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 26 Mar 2020 15:09:41 +0200
Subject: mac80211_hwsim: indicate in IBSS that we have transmitted beacons

This is actually true because there's no functional beacon distribution
and lets us get active scanning working - without it, mac80211 doesn't
respond to probe requests.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200326150855.554d1199b309.Id86fd36e3d88d2a75d6e0c6618fd93ce8fe84065@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/mac80211_hwsim.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index f1c08b31c564..05e8203aa6d9 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -2464,6 +2464,11 @@ static void mac80211_hwsim_get_et_stats(struct ieee80211_hw *hw,
 	WARN_ON(i != MAC80211_HWSIM_SSTATS_LEN);
 }
 
+static int mac80211_hwsim_tx_last_beacon(struct ieee80211_hw *hw)
+{
+	return 1;
+}
+
 #define HWSIM_COMMON_OPS					\
 	.tx = mac80211_hwsim_tx,				\
 	.start = mac80211_hwsim_start,				\
@@ -2474,6 +2479,7 @@ static void mac80211_hwsim_get_et_stats(struct ieee80211_hw *hw,
 	.config = mac80211_hwsim_config,			\
 	.configure_filter = mac80211_hwsim_configure_filter,	\
 	.bss_info_changed = mac80211_hwsim_bss_info_changed,	\
+	.tx_last_beacon = mac80211_hwsim_tx_last_beacon,	\
 	.sta_add = mac80211_hwsim_sta_add,			\
 	.sta_remove = mac80211_hwsim_sta_remove,		\
 	.sta_notify = mac80211_hwsim_sta_notify,		\
-- 
cgit v1.2.3-59-g8ed1b


From b572510100165ba037ba43dbbb0f05e8da12c741 Mon Sep 17 00:00:00 2001
From: Thomas Pedersen <thomas@adapt-ip.com>
Date: Wed, 1 Apr 2020 18:18:02 -0700
Subject: ieee80211: share 802.11 unit conversion helpers

MHZ_TO_KHZ, and KHZ_TO_MHZ are useful to drivers and
elsewhere so export these in the common ieee80211 header.
Move the power helpers also because we might as well.

Signed-off-by: Thomas Pedersen <thomas@adapt-ip.com>
Link: https://lore.kernel.org/r/20200402011810.22947-2-thomas@adapt-ip.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 10 ++++++++++
 include/net/regulatory.h  |  7 -------
 2 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 38f513ce7528..a561db435a4b 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -3330,6 +3330,16 @@ static inline int ieee80211_get_tdls_action(struct sk_buff *skb, u32 hdr_size)
 #define TU_TO_JIFFIES(x)	(usecs_to_jiffies((x) * 1024))
 #define TU_TO_EXP_TIME(x)	(jiffies + TU_TO_JIFFIES(x))
 
+/* convert frequencies */
+#define MHZ_TO_KHZ(freq) ((freq) * 1000)
+#define KHZ_TO_MHZ(freq) ((freq) / 1000)
+
+/* convert powers */
+#define DBI_TO_MBI(gain) ((gain) * 100)
+#define MBI_TO_DBI(gain) ((gain) / 100)
+#define DBM_TO_MBM(gain) ((gain) * 100)
+#define MBM_TO_DBM(gain) ((gain) / 100)
+
 /**
  * ieee80211_action_contains_tpc - checks if the frame contains TPC element
  * @skb: the skb containing the frame, length will be checked
diff --git a/include/net/regulatory.h b/include/net/regulatory.h
index 3469750df0f4..09a3099886e5 100644
--- a/include/net/regulatory.h
+++ b/include/net/regulatory.h
@@ -231,13 +231,6 @@ struct ieee80211_regdomain {
 	struct ieee80211_reg_rule reg_rules[];
 };
 
-#define MHZ_TO_KHZ(freq) ((freq) * 1000)
-#define KHZ_TO_MHZ(freq) ((freq) / 1000)
-#define DBI_TO_MBI(gain) ((gain) * 100)
-#define MBI_TO_DBI(gain) ((gain) / 100)
-#define DBM_TO_MBM(gain) ((gain) * 100)
-#define MBM_TO_DBM(gain) ((gain) / 100)
-
 #define REG_RULE_EXT(start, end, bw, gain, eirp, dfs_cac, reg_flags)	\
 {									\
 	.freq_range.start_freq_khz = MHZ_TO_KHZ(start),			\
-- 
cgit v1.2.3-59-g8ed1b


From 934f4c7dd3a544bb8000f7436f1f0e12e04ebc37 Mon Sep 17 00:00:00 2001
From: Thomas Pedersen <thomas@adapt-ip.com>
Date: Wed, 1 Apr 2020 18:18:03 -0700
Subject: cfg80211: express channels with a KHz component

Some bands (S1G) define channels centered on a non-integer
MHz. Give ieee80211_channel and cfg80211_chan_def a
freq_offset component where the final frequency can be
expressed as:

MHZ_TO_KHZ(chan->center_freq) + chan->freq_offset;

Also provide some helper functions to do the frequency
conversion and test for equality.

Retain the existing interface to frequency and channel
conversion helpers, and expose new ones which handle
frequencies in units of KHz.

Some internal functions (net/wireless/chan.c) pass around
a frequency value. Convert these to units of KHz.

mesh, ibss, wext, etc. are currently ignored.

Signed-off-by: Thomas Pedersen <thomas@adapt-ip.com>
Link: https://lore.kernel.org/r/20200402011810.22947-3-thomas@adapt-ip.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 92 ++++++++++++++++++++++++++++++++++++++++++++++----
 net/wireless/chan.c    | 68 +++++++++++++++++++++----------------
 net/wireless/reg.c     | 40 +++++++++++-----------
 net/wireless/scan.c    |  4 +--
 net/wireless/trace.h   | 21 +++++++++---
 net/wireless/util.c    | 32 +++++++++++-------
 6 files changed, 182 insertions(+), 75 deletions(-)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index e288fdcb3df2..a82fc59a1d82 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -128,6 +128,7 @@ enum ieee80211_channel_flags {
  * with cfg80211.
  *
  * @center_freq: center frequency in MHz
+ * @freq_offset: offset from @center_freq, in KHz
  * @hw_value: hardware-specific value for the channel
  * @flags: channel flags from &enum ieee80211_channel_flags.
  * @orig_flags: channel flags at registration time, used by regulatory
@@ -149,6 +150,7 @@ enum ieee80211_channel_flags {
 struct ieee80211_channel {
 	enum nl80211_band band;
 	u32 center_freq;
+	u16 freq_offset;
 	u16 hw_value;
 	u32 flags;
 	int max_antenna_gain;
@@ -617,6 +619,7 @@ struct key_params {
  *	If edmg is requested (i.e. the .channels member is non-zero),
  *	chan will define the primary channel and all other
  *	parameters are ignored.
+ * @freq1_offset: offset from @center_freq1, in KHz
  */
 struct cfg80211_chan_def {
 	struct ieee80211_channel *chan;
@@ -624,6 +627,7 @@ struct cfg80211_chan_def {
 	u32 center_freq1;
 	u32 center_freq2;
 	struct ieee80211_edmg edmg;
+	u16 freq1_offset;
 };
 
 /**
@@ -713,6 +717,7 @@ cfg80211_chandef_identical(const struct cfg80211_chan_def *chandef1,
 	return (chandef1->chan == chandef2->chan &&
 		chandef1->width == chandef2->width &&
 		chandef1->center_freq1 == chandef2->center_freq1 &&
+		chandef1->freq1_offset == chandef2->freq1_offset &&
 		chandef1->center_freq2 == chandef2->center_freq2);
 }
 
@@ -5177,30 +5182,92 @@ static inline void *wdev_priv(struct wireless_dev *wdev)
  * cfg80211 offers a number of utility functions that can be useful.
  */
 
+/**
+ * ieee80211_channel_equal - compare two struct ieee80211_channel
+ *
+ * @a: 1st struct ieee80211_channel
+ * @b: 2nd struct ieee80211_channel
+ * Return: true if center frequency of @a == @b
+ */
+static inline bool
+ieee80211_channel_equal(struct ieee80211_channel *a,
+			struct ieee80211_channel *b)
+{
+	return (a->center_freq == b->center_freq &&
+		a->freq_offset == b->freq_offset);
+}
+
+/**
+ * ieee80211_channel_to_khz - convert ieee80211_channel to frequency in KHz
+ * @chan: struct ieee80211_channel to convert
+ * Return: The corresponding frequency (in KHz)
+ */
+static inline u32
+ieee80211_channel_to_khz(const struct ieee80211_channel *chan)
+{
+	return MHZ_TO_KHZ(chan->center_freq) + chan->freq_offset;
+}
+
+/**
+ * ieee80211_channel_to_freq_khz - convert channel number to frequency
+ * @chan: channel number
+ * @band: band, necessary due to channel number overlap
+ * Return: The corresponding frequency (in KHz), or 0 if the conversion failed.
+ */
+u32 ieee80211_channel_to_freq_khz(int chan, enum nl80211_band band);
+
 /**
  * ieee80211_channel_to_frequency - convert channel number to frequency
  * @chan: channel number
  * @band: band, necessary due to channel number overlap
  * Return: The corresponding frequency (in MHz), or 0 if the conversion failed.
  */
-int ieee80211_channel_to_frequency(int chan, enum nl80211_band band);
+static inline int
+ieee80211_channel_to_frequency(int chan, enum nl80211_band band)
+{
+	return KHZ_TO_MHZ(ieee80211_channel_to_freq_khz(chan, band));
+}
+
+/**
+ * ieee80211_freq_khz_to_channel - convert frequency to channel number
+ * @freq: center frequency in KHz
+ * Return: The corresponding channel, or 0 if the conversion failed.
+ */
+int ieee80211_freq_khz_to_channel(u32 freq);
 
 /**
  * ieee80211_frequency_to_channel - convert frequency to channel number
- * @freq: center frequency
+ * @freq: center frequency in MHz
  * Return: The corresponding channel, or 0 if the conversion failed.
  */
-int ieee80211_frequency_to_channel(int freq);
+static inline int
+ieee80211_frequency_to_channel(int freq)
+{
+	return ieee80211_freq_khz_to_channel(MHZ_TO_KHZ(freq));
+}
+
+/**
+ * ieee80211_get_channel_khz - get channel struct from wiphy for specified
+ * frequency
+ * @wiphy: the struct wiphy to get the channel for
+ * @freq: the center frequency (in KHz) of the channel
+ * Return: The channel struct from @wiphy at @freq.
+ */
+struct ieee80211_channel *
+ieee80211_get_channel_khz(struct wiphy *wiphy, u32 freq);
 
 /**
  * ieee80211_get_channel - get channel struct from wiphy for specified frequency
  *
  * @wiphy: the struct wiphy to get the channel for
- * @freq: the center frequency of the channel
- *
+ * @freq: the center frequency (in MHz) of the channel
  * Return: The channel struct from @wiphy at @freq.
  */
-struct ieee80211_channel *ieee80211_get_channel(struct wiphy *wiphy, int freq);
+static inline struct ieee80211_channel *
+ieee80211_get_channel(struct wiphy *wiphy, int freq)
+{
+	return ieee80211_get_channel_khz(wiphy, MHZ_TO_KHZ(freq));
+}
 
 /**
  * ieee80211_get_response_rate - get basic rate for a given rate
@@ -7228,6 +7295,19 @@ bool ieee80211_operating_class_to_band(u8 operating_class,
 bool ieee80211_chandef_to_operating_class(struct cfg80211_chan_def *chandef,
 					  u8 *op_class);
 
+/**
+ * ieee80211_chandef_to_khz - convert chandef to frequency in KHz
+ *
+ * @chandef: the chandef to convert
+ *
+ * Returns the center frequency of chandef (1st segment) in KHz.
+ */
+static inline u32
+ieee80211_chandef_to_khz(const struct cfg80211_chan_def *chandef)
+{
+	return MHZ_TO_KHZ(chandef->center_freq1) + chandef->freq1_offset;
+}
+
 /*
  * cfg80211_tdls_oper_request - request userspace to perform TDLS operation
  * @dev: the device on which the operation is requested
diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index fcac5c6366e1..d60e50a3b910 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -27,6 +27,7 @@ void cfg80211_chandef_create(struct cfg80211_chan_def *chandef,
 		return;
 
 	chandef->chan = chan;
+	chandef->freq1_offset = chan->freq_offset;
 	chandef->center_freq2 = 0;
 	chandef->edmg.bw_config = 0;
 	chandef->edmg.channels = 0;
@@ -153,7 +154,8 @@ bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef)
 	case NL80211_CHAN_WIDTH_10:
 	case NL80211_CHAN_WIDTH_20:
 	case NL80211_CHAN_WIDTH_20_NOHT:
-		if (chandef->center_freq1 != control_freq)
+		if (ieee80211_chandef_to_khz(chandef) !=
+		    ieee80211_channel_to_khz(chandef->chan))
 			return false;
 		if (chandef->center_freq2)
 			return false;
@@ -386,10 +388,11 @@ static u32 cfg80211_get_start_freq(u32 center_freq,
 {
 	u32 start_freq;
 
-	if (bandwidth <= 20)
+	bandwidth = MHZ_TO_KHZ(bandwidth);
+	if (bandwidth <= MHZ_TO_KHZ(20))
 		start_freq = center_freq;
 	else
-		start_freq = center_freq - bandwidth/2 + 10;
+		start_freq = center_freq - bandwidth / 2 + MHZ_TO_KHZ(10);
 
 	return start_freq;
 }
@@ -399,10 +402,11 @@ static u32 cfg80211_get_end_freq(u32 center_freq,
 {
 	u32 end_freq;
 
-	if (bandwidth <= 20)
+	bandwidth = MHZ_TO_KHZ(bandwidth);
+	if (bandwidth <= MHZ_TO_KHZ(20))
 		end_freq = center_freq;
 	else
-		end_freq = center_freq + bandwidth/2 - 10;
+		end_freq = center_freq + bandwidth / 2 - MHZ_TO_KHZ(10);
 
 	return end_freq;
 }
@@ -417,8 +421,8 @@ static int cfg80211_get_chans_dfs_required(struct wiphy *wiphy,
 	start_freq = cfg80211_get_start_freq(center_freq, bandwidth);
 	end_freq = cfg80211_get_end_freq(center_freq, bandwidth);
 
-	for (freq = start_freq; freq <= end_freq; freq += 20) {
-		c = ieee80211_get_channel(wiphy, freq);
+	for (freq = start_freq; freq <= end_freq; freq += MHZ_TO_KHZ(20)) {
+		c = ieee80211_get_channel_khz(wiphy, freq);
 		if (!c)
 			return -EINVAL;
 
@@ -449,8 +453,8 @@ int cfg80211_chandef_dfs_required(struct wiphy *wiphy,
 			return -EINVAL;
 
 		ret = cfg80211_get_chans_dfs_required(wiphy,
-						      chandef->center_freq1,
-						      width);
+					ieee80211_chandef_to_khz(chandef),
+					width);
 		if (ret < 0)
 			return ret;
 		else if (ret > 0)
@@ -460,8 +464,8 @@ int cfg80211_chandef_dfs_required(struct wiphy *wiphy,
 			return 0;
 
 		ret = cfg80211_get_chans_dfs_required(wiphy,
-						      chandef->center_freq2,
-						      width);
+					MHZ_TO_KHZ(chandef->center_freq2),
+					width);
 		if (ret < 0)
 			return ret;
 		else if (ret > 0)
@@ -503,8 +507,8 @@ static int cfg80211_get_chans_dfs_usable(struct wiphy *wiphy,
 	 * DFS_AVAILABLE). Return number of usable channels
 	 * (require CAC). Allow DFS and non-DFS channel mix.
 	 */
-	for (freq = start_freq; freq <= end_freq; freq += 20) {
-		c = ieee80211_get_channel(wiphy, freq);
+	for (freq = start_freq; freq <= end_freq; freq += MHZ_TO_KHZ(20)) {
+		c = ieee80211_get_channel_khz(wiphy, freq);
 		if (!c)
 			return -EINVAL;
 
@@ -536,8 +540,9 @@ bool cfg80211_chandef_dfs_usable(struct wiphy *wiphy,
 	if (width < 0)
 		return false;
 
-	r1 = cfg80211_get_chans_dfs_usable(wiphy, chandef->center_freq1,
-					  width);
+	r1 = cfg80211_get_chans_dfs_usable(wiphy,
+					   MHZ_TO_KHZ(chandef->center_freq1),
+					   width);
 
 	if (r1 < 0)
 		return false;
@@ -546,8 +551,8 @@ bool cfg80211_chandef_dfs_usable(struct wiphy *wiphy,
 	case NL80211_CHAN_WIDTH_80P80:
 		WARN_ON(!chandef->center_freq2);
 		r2 = cfg80211_get_chans_dfs_usable(wiphy,
-						   chandef->center_freq2,
-						   width);
+					MHZ_TO_KHZ(chandef->center_freq2),
+					width);
 		if (r2 < 0)
 			return false;
 		break;
@@ -694,8 +699,8 @@ static bool cfg80211_get_chans_dfs_available(struct wiphy *wiphy,
 	 * If any channel in between is disabled or has not
 	 * had gone through CAC return false
 	 */
-	for (freq = start_freq; freq <= end_freq; freq += 20) {
-		c = ieee80211_get_channel(wiphy, freq);
+	for (freq = start_freq; freq <= end_freq; freq += MHZ_TO_KHZ(20)) {
+		c = ieee80211_get_channel_khz(wiphy, freq);
 		if (!c)
 			return false;
 
@@ -724,7 +729,8 @@ static bool cfg80211_chandef_dfs_available(struct wiphy *wiphy,
 	if (width < 0)
 		return false;
 
-	r = cfg80211_get_chans_dfs_available(wiphy, chandef->center_freq1,
+	r = cfg80211_get_chans_dfs_available(wiphy,
+					     MHZ_TO_KHZ(chandef->center_freq1),
 					     width);
 
 	/* If any of channels unavailable for cf1 just return */
@@ -735,8 +741,8 @@ static bool cfg80211_chandef_dfs_available(struct wiphy *wiphy,
 	case NL80211_CHAN_WIDTH_80P80:
 		WARN_ON(!chandef->center_freq2);
 		r = cfg80211_get_chans_dfs_available(wiphy,
-						     chandef->center_freq2,
-						     width);
+					MHZ_TO_KHZ(chandef->center_freq2),
+					width);
 		break;
 	default:
 		WARN_ON(chandef->center_freq2);
@@ -757,8 +763,8 @@ static unsigned int cfg80211_get_chans_dfs_cac_time(struct wiphy *wiphy,
 	start_freq = cfg80211_get_start_freq(center_freq, bandwidth);
 	end_freq = cfg80211_get_end_freq(center_freq, bandwidth);
 
-	for (freq = start_freq; freq <= end_freq; freq += 20) {
-		c = ieee80211_get_channel(wiphy, freq);
+	for (freq = start_freq; freq <= end_freq; freq += MHZ_TO_KHZ(20)) {
+		c = ieee80211_get_channel_khz(wiphy, freq);
 		if (!c)
 			return 0;
 
@@ -790,14 +796,14 @@ cfg80211_chandef_dfs_cac_time(struct wiphy *wiphy,
 		return 0;
 
 	t1 = cfg80211_get_chans_dfs_cac_time(wiphy,
-					     chandef->center_freq1,
+					     MHZ_TO_KHZ(chandef->center_freq1),
 					     width);
 
 	if (!chandef->center_freq2)
 		return t1;
 
 	t2 = cfg80211_get_chans_dfs_cac_time(wiphy,
-					     chandef->center_freq2,
+					     MHZ_TO_KHZ(chandef->center_freq2),
 					     width);
 
 	return max(t1, t2);
@@ -813,8 +819,8 @@ static bool cfg80211_secondary_chans_ok(struct wiphy *wiphy,
 	start_freq = cfg80211_get_start_freq(center_freq, bandwidth);
 	end_freq = cfg80211_get_end_freq(center_freq, bandwidth);
 
-	for (freq = start_freq; freq <= end_freq; freq += 20) {
-		c = ieee80211_get_channel(wiphy, freq);
+	for (freq = start_freq; freq <= end_freq; freq += MHZ_TO_KHZ(20)) {
+		c = ieee80211_get_channel_khz(wiphy, freq);
 		if (!c || c->flags & prohibited_flags)
 			return false;
 	}
@@ -976,13 +982,15 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
 		prohibited_flags |= IEEE80211_CHAN_NO_OFDM;
 
 
-	if (!cfg80211_secondary_chans_ok(wiphy, chandef->center_freq1,
+	if (!cfg80211_secondary_chans_ok(wiphy,
+					 ieee80211_chandef_to_khz(chandef),
 					 width, prohibited_flags))
 		return false;
 
 	if (!chandef->center_freq2)
 		return true;
-	return cfg80211_secondary_chans_ok(wiphy, chandef->center_freq2,
+	return cfg80211_secondary_chans_ok(wiphy,
+					   MHZ_TO_KHZ(chandef->center_freq2),
 					   width, prohibited_flags);
 }
 EXPORT_SYMBOL(cfg80211_chandef_usable);
diff --git a/net/wireless/reg.c b/net/wireless/reg.c
index d476d4da0d09..0d74a31ef0ab 100644
--- a/net/wireless/reg.c
+++ b/net/wireless/reg.c
@@ -1658,22 +1658,23 @@ static uint32_t reg_rule_to_chan_bw_flags(const struct ieee80211_regdomain *regd
 					  const struct ieee80211_channel *chan)
 {
 	const struct ieee80211_freq_range *freq_range = NULL;
-	u32 max_bandwidth_khz, bw_flags = 0;
+	u32 max_bandwidth_khz, center_freq_khz, bw_flags = 0;
 
 	freq_range = &reg_rule->freq_range;
 
 	max_bandwidth_khz = freq_range->max_bandwidth_khz;
+	center_freq_khz = ieee80211_channel_to_khz(chan);
 	/* Check if auto calculation requested */
 	if (reg_rule->flags & NL80211_RRF_AUTO_BW)
 		max_bandwidth_khz = reg_get_max_bandwidth(regd, reg_rule);
 
 	/* If we get a reg_rule we can assume that at least 5Mhz fit */
 	if (!cfg80211_does_bw_fit_range(freq_range,
-					MHZ_TO_KHZ(chan->center_freq),
+					center_freq_khz,
 					MHZ_TO_KHZ(10)))
 		bw_flags |= IEEE80211_CHAN_NO_10MHZ;
 	if (!cfg80211_does_bw_fit_range(freq_range,
-					MHZ_TO_KHZ(chan->center_freq),
+					center_freq_khz,
 					MHZ_TO_KHZ(20)))
 		bw_flags |= IEEE80211_CHAN_NO_20MHZ;
 
@@ -1710,7 +1711,7 @@ static void handle_channel(struct wiphy *wiphy,
 
 	flags = chan->orig_flags;
 
-	reg_rule = freq_reg_info(wiphy, MHZ_TO_KHZ(chan->center_freq));
+	reg_rule = freq_reg_info(wiphy, ieee80211_channel_to_khz(chan));
 	if (IS_ERR(reg_rule)) {
 		/*
 		 * We will disable all channels that do not match our
@@ -1729,13 +1730,13 @@ static void handle_channel(struct wiphy *wiphy,
 		if (lr->initiator == NL80211_REGDOM_SET_BY_DRIVER &&
 		    request_wiphy && request_wiphy == wiphy &&
 		    request_wiphy->regulatory_flags & REGULATORY_STRICT_REG) {
-			pr_debug("Disabling freq %d MHz for good\n",
-				 chan->center_freq);
+			pr_debug("Disabling freq %d.%03d MHz for good\n",
+				 chan->center_freq, chan->freq_offset);
 			chan->orig_flags |= IEEE80211_CHAN_DISABLED;
 			chan->flags = chan->orig_flags;
 		} else {
-			pr_debug("Disabling freq %d MHz\n",
-				 chan->center_freq);
+			pr_debug("Disabling freq %d.%03d MHz\n",
+				 chan->center_freq, chan->freq_offset);
 			chan->flags |= IEEE80211_CHAN_DISABLED;
 		}
 		return;
@@ -1936,7 +1937,7 @@ static void handle_reg_beacon(struct wiphy *wiphy, unsigned int chan_idx,
 	sband = wiphy->bands[reg_beacon->chan.band];
 	chan = &sband->channels[chan_idx];
 
-	if (likely(chan->center_freq != reg_beacon->chan.center_freq))
+	if (likely(!ieee80211_channel_equal(chan, &reg_beacon->chan)))
 		return;
 
 	if (chan->beacon_found)
@@ -2269,18 +2270,18 @@ static void handle_channel_custom(struct wiphy *wiphy,
 	u32 bw_flags = 0;
 	const struct ieee80211_reg_rule *reg_rule = NULL;
 	const struct ieee80211_power_rule *power_rule = NULL;
-	u32 bw;
+	u32 bw, center_freq_khz;
 
+	center_freq_khz = ieee80211_channel_to_khz(chan);
 	for (bw = MHZ_TO_KHZ(20); bw >= min_bw; bw = bw / 2) {
-		reg_rule = freq_reg_info_regd(MHZ_TO_KHZ(chan->center_freq),
-					      regd, bw);
+		reg_rule = freq_reg_info_regd(center_freq_khz, regd, bw);
 		if (!IS_ERR(reg_rule))
 			break;
 	}
 
 	if (IS_ERR_OR_NULL(reg_rule)) {
-		pr_debug("Disabling freq %d MHz as custom regd has no rule that fits it\n",
-			 chan->center_freq);
+		pr_debug("Disabling freq %d.%03d MHz as custom regd has no rule that fits it\n",
+			 chan->center_freq, chan->freq_offset);
 		if (wiphy->regulatory_flags & REGULATORY_WIPHY_SELF_MANAGED) {
 			chan->flags |= IEEE80211_CHAN_DISABLED;
 		} else {
@@ -3337,8 +3338,8 @@ static bool pending_reg_beacon(struct ieee80211_channel *beacon_chan)
 	struct reg_beacon *pending_beacon;
 
 	list_for_each_entry(pending_beacon, &reg_pending_beacons, list)
-		if (beacon_chan->center_freq ==
-		    pending_beacon->chan.center_freq)
+		if (ieee80211_channel_equal(beacon_chan,
+					    &pending_beacon->chan))
 			return true;
 	return false;
 }
@@ -3367,9 +3368,10 @@ int regulatory_hint_found_beacon(struct wiphy *wiphy,
 	if (!reg_beacon)
 		return -ENOMEM;
 
-	pr_debug("Found new beacon on frequency: %d MHz (Ch %d) on %s\n",
-		 beacon_chan->center_freq,
-		 ieee80211_frequency_to_channel(beacon_chan->center_freq),
+	pr_debug("Found new beacon on frequency: %d.%03d MHz (Ch %d) on %s\n",
+		 beacon_chan->center_freq, beacon_chan->freq_offset,
+		 ieee80211_freq_khz_to_channel(
+			 ieee80211_channel_to_khz(beacon_chan)),
 		 wiphy_name(wiphy));
 
 	memcpy(&reg_beacon->chan, beacon_chan,
diff --git a/net/wireless/scan.c b/net/wireless/scan.c
index 4000382aef48..74ea4cfb39fb 100644
--- a/net/wireless/scan.c
+++ b/net/wireless/scan.c
@@ -1322,8 +1322,8 @@ cfg80211_get_bss_channel(struct wiphy *wiphy, const u8 *ie, size_t ielen,
 		return channel;
 	}
 
-	freq = ieee80211_channel_to_frequency(channel_number, channel->band);
-	alt_channel = ieee80211_get_channel(wiphy, freq);
+	freq = ieee80211_channel_to_freq_khz(channel_number, channel->band);
+	alt_channel = ieee80211_get_channel_khz(wiphy, freq);
 	if (!alt_channel) {
 		if (channel->band == NL80211_BAND_2GHZ) {
 			/*
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index ee736620f1e3..53c887ea67c7 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -112,24 +112,29 @@
 	} while (0)
 
 #define CHAN_ENTRY __field(enum nl80211_band, band) \
-		   __field(u32, center_freq)
+		   __field(u32, center_freq)		\
+		   __field(u16, freq_offset)
 #define CHAN_ASSIGN(chan)					  \
 	do {							  \
 		if (chan) {					  \
 			__entry->band = chan->band;		  \
 			__entry->center_freq = chan->center_freq; \
+			__entry->freq_offset = chan->freq_offset; \
 		} else {					  \
 			__entry->band = 0;			  \
 			__entry->center_freq = 0;		  \
+			__entry->freq_offset = 0;		  \
 		}						  \
 	} while (0)
-#define CHAN_PR_FMT "band: %d, freq: %u"
-#define CHAN_PR_ARG __entry->band, __entry->center_freq
+#define CHAN_PR_FMT "band: %d, freq: %u.%03u"
+#define CHAN_PR_ARG __entry->band, __entry->center_freq, __entry->freq_offset
 
 #define CHAN_DEF_ENTRY __field(enum nl80211_band, band)		\
 		       __field(u32, control_freq)			\
+		       __field(u32, freq_offset)			\
 		       __field(u32, width)				\
 		       __field(u32, center_freq1)			\
+		       __field(u32, freq1_offset)			\
 		       __field(u32, center_freq2)
 #define CHAN_DEF_ASSIGN(chandef)					\
 	do {								\
@@ -137,21 +142,27 @@
 			__entry->band = (chandef)->chan->band;		\
 			__entry->control_freq =				\
 				(chandef)->chan->center_freq;		\
+			__entry->freq_offset =				\
+				(chandef)->chan->freq_offset;		\
 			__entry->width = (chandef)->width;		\
 			__entry->center_freq1 = (chandef)->center_freq1;\
+			__entry->freq1_offset = (chandef)->freq1_offset;\
 			__entry->center_freq2 = (chandef)->center_freq2;\
 		} else {						\
 			__entry->band = 0;				\
 			__entry->control_freq = 0;			\
+			__entry->freq_offset = 0;			\
 			__entry->width = 0;				\
 			__entry->center_freq1 = 0;			\
+			__entry->freq1_offset = 0;			\
 			__entry->center_freq2 = 0;			\
 		}							\
 	} while (0)
 #define CHAN_DEF_PR_FMT							\
-	"band: %d, control freq: %u, width: %d, cf1: %u, cf2: %u"
+	"band: %d, control freq: %u.%03u, width: %d, cf1: %u.%03u, cf2: %u"
 #define CHAN_DEF_PR_ARG __entry->band, __entry->control_freq,		\
-			__entry->width, __entry->center_freq1,		\
+			__entry->freq_offset, __entry->width,		\
+			__entry->center_freq1, __entry->freq1_offset,	\
 			__entry->center_freq2
 
 #define SINFO_ENTRY __field(int, generation)	    \
diff --git a/net/wireless/util.c b/net/wireless/util.c
index 123d6ce79b8e..df75e58eca5d 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -72,7 +72,7 @@ u32 ieee80211_mandatory_rates(struct ieee80211_supported_band *sband,
 }
 EXPORT_SYMBOL(ieee80211_mandatory_rates);
 
-int ieee80211_channel_to_frequency(int chan, enum nl80211_band band)
+u32 ieee80211_channel_to_freq_khz(int chan, enum nl80211_band band)
 {
 	/* see 802.11 17.3.8.3.2 and Annex J
 	 * there are overlapping channel numbers in 5GHz and 2GHz bands */
@@ -81,15 +81,15 @@ int ieee80211_channel_to_frequency(int chan, enum nl80211_band band)
 	switch (band) {
 	case NL80211_BAND_2GHZ:
 		if (chan == 14)
-			return 2484;
+			return MHZ_TO_KHZ(2484);
 		else if (chan < 14)
-			return 2407 + chan * 5;
+			return MHZ_TO_KHZ(2407 + chan * 5);
 		break;
 	case NL80211_BAND_5GHZ:
 		if (chan >= 182 && chan <= 196)
-			return 4000 + chan * 5;
+			return MHZ_TO_KHZ(4000 + chan * 5);
 		else
-			return 5000 + chan * 5;
+			return MHZ_TO_KHZ(5000 + chan * 5);
 		break;
 	case NL80211_BAND_6GHZ:
 		/* see 802.11ax D4.1 27.3.22.2 */
@@ -98,17 +98,20 @@ int ieee80211_channel_to_frequency(int chan, enum nl80211_band band)
 		break;
 	case NL80211_BAND_60GHZ:
 		if (chan < 7)
-			return 56160 + chan * 2160;
+			return MHZ_TO_KHZ(56160 + chan * 2160);
 		break;
 	default:
 		;
 	}
 	return 0; /* not supported */
 }
-EXPORT_SYMBOL(ieee80211_channel_to_frequency);
+EXPORT_SYMBOL(ieee80211_channel_to_freq_khz);
 
-int ieee80211_frequency_to_channel(int freq)
+int ieee80211_freq_khz_to_channel(u32 freq)
 {
+	/* TODO: just handle MHz for now */
+	freq = KHZ_TO_MHZ(freq);
+
 	/* see 802.11 17.3.8.3.2 and Annex J */
 	if (freq == 2484)
 		return 14;
@@ -126,9 +129,10 @@ int ieee80211_frequency_to_channel(int freq)
 	else
 		return 0;
 }
-EXPORT_SYMBOL(ieee80211_frequency_to_channel);
+EXPORT_SYMBOL(ieee80211_freq_khz_to_channel);
 
-struct ieee80211_channel *ieee80211_get_channel(struct wiphy *wiphy, int freq)
+struct ieee80211_channel *ieee80211_get_channel_khz(struct wiphy *wiphy,
+						    u32 freq)
 {
 	enum nl80211_band band;
 	struct ieee80211_supported_band *sband;
@@ -141,14 +145,16 @@ struct ieee80211_channel *ieee80211_get_channel(struct wiphy *wiphy, int freq)
 			continue;
 
 		for (i = 0; i < sband->n_channels; i++) {
-			if (sband->channels[i].center_freq == freq)
-				return &sband->channels[i];
+			struct ieee80211_channel *chan = &sband->channels[i];
+
+			if (ieee80211_channel_to_khz(chan) == freq)
+				return chan;
 		}
 	}
 
 	return NULL;
 }
-EXPORT_SYMBOL(ieee80211_get_channel);
+EXPORT_SYMBOL(ieee80211_get_channel_khz);
 
 static void set_mandatory_flags_band(struct ieee80211_supported_band *sband)
 {
-- 
cgit v1.2.3-59-g8ed1b


From b6011960f392d1de619f10aa5d088c27f1e7526c Mon Sep 17 00:00:00 2001
From: Thomas Pedersen <thomas@adapt-ip.com>
Date: Wed, 1 Apr 2020 18:18:04 -0700
Subject: mac80211: handle channel frequency offset

cfg80211_chan_def and ieee80211_channel recently gained a
frequency offset component. Handle this where it makes
sense (potentially required by S1G channels).

For IBSS, TDLS, CSA, and ROC we return -EOPNOTSUPP if a
channel with frequency offset is passed, since they may or
may not work. Once someone tests and verifies these
commands work on thos types of channels, we can remove
that error.

join_ocb and join_mesh look harmless because they use a
simple ieee80211_vif_use_channel(), which is using an
already verified channel, so we let those through.

Signed-off-by: Thomas Pedersen <thomas@adapt-ip.com>
Link: https://lore.kernel.org/r/20200402011810.22947-4-thomas@adapt-ip.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/cfg.c        |  6 ++++++
 net/mac80211/chan.c       |  1 +
 net/mac80211/ibss.c       |  5 +++++
 net/mac80211/main.c       |  8 +++++---
 net/mac80211/mlme.c       | 16 ++++++++++++----
 net/mac80211/offchannel.c |  4 ++++
 net/mac80211/scan.c       |  1 +
 net/mac80211/tdls.c       |  4 ++++
 net/mac80211/trace.h      | 41 +++++++++++++++++++++++++++++++++--------
 9 files changed, 71 insertions(+), 15 deletions(-)

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index f0d43b9cfa43..ae3e06375a28 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -3287,6 +3287,12 @@ __ieee80211_channel_switch(struct wiphy *wiphy, struct net_device *dev,
 		goto out;
 	}
 
+	if (params->chandef.chan->freq_offset) {
+		/* this may work, but is untested */
+		err = -EOPNOTSUPP;
+		goto out;
+	}
+
 	chanctx = container_of(conf, struct ieee80211_chanctx, conf);
 
 	ch_switch.timestamp = 0;
diff --git a/net/mac80211/chan.c b/net/mac80211/chan.c
index 9c94baaf693c..e6e192f53e4e 100644
--- a/net/mac80211/chan.c
+++ b/net/mac80211/chan.c
@@ -533,6 +533,7 @@ static void ieee80211_del_chanctx(struct ieee80211_local *local,
 		struct cfg80211_chan_def *chandef = &local->_oper_chandef;
 		chandef->width = NL80211_CHAN_WIDTH_20_NOHT;
 		chandef->center_freq1 = chandef->chan->center_freq;
+		chandef->freq1_offset = chandef->chan->freq_offset;
 		chandef->center_freq2 = 0;
 
 		/* NOTE: Disabling radar is only valid here for
diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index d40744903fa9..2479cd48fed0 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -1758,6 +1758,11 @@ int ieee80211_ibss_join(struct ieee80211_sub_if_data *sdata,
 	int i;
 	int ret;
 
+	if (params->chandef.chan->freq_offset) {
+		/* this may work, but is untested */
+		return -EOPNOTSUPP;
+	}
+
 	ret = cfg80211_chandef_dfs_required(local->hw.wiphy,
 					    &params->chandef,
 					    sdata->wdev.iftype);
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index a0cb052ea30d..dfcee5e462da 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -107,13 +107,15 @@ static u32 ieee80211_hw_conf_chan(struct ieee80211_local *local)
 		chandef.chan = local->tmp_channel;
 		chandef.width = NL80211_CHAN_WIDTH_20_NOHT;
 		chandef.center_freq1 = chandef.chan->center_freq;
+		chandef.freq1_offset = chandef.chan->freq_offset;
 	} else
 		chandef = local->_oper_chandef;
 
 	WARN(!cfg80211_chandef_valid(&chandef),
-	     "control:%d MHz width:%d center: %d/%d MHz",
-	     chandef.chan->center_freq, chandef.width,
-	     chandef.center_freq1, chandef.center_freq2);
+	     "control:%d.%03d MHz width:%d center: %d.%03d/%d MHz",
+	     chandef.chan->center_freq, chandef.chan->freq_offset,
+	     chandef.width, chandef.center_freq1, chandef.freq1_offset,
+	     chandef.center_freq2);
 
 	if (!cfg80211_chandef_identical(&chandef, &local->_oper_chandef))
 		local->hw.conf.flags |= IEEE80211_CONF_OFFCHANNEL;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 59a35c7997c3..acc8adf50d69 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -162,6 +162,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
 	chandef->chan = channel;
 	chandef->width = NL80211_CHAN_WIDTH_20_NOHT;
 	chandef->center_freq1 = channel->center_freq;
+	chandef->freq1_offset = channel->freq_offset;
 
 	if (!ht_oper || !sta_ht_cap.ht_supported) {
 		ret = IEEE80211_STA_DISABLE_HT |
@@ -396,9 +397,12 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata,
 		return 0;
 
 	sdata_info(sdata,
-		   "AP %pM changed bandwidth, new config is %d MHz, width %d (%d/%d MHz)\n",
-		   ifmgd->bssid, chandef.chan->center_freq, chandef.width,
-		   chandef.center_freq1, chandef.center_freq2);
+		   "AP %pM changed bandwidth, new config is %d.%03d MHz, "
+		   "width %d (%d.%03d/%d MHz)\n",
+		   ifmgd->bssid, chandef.chan->center_freq,
+		   chandef.chan->freq_offset, chandef.width,
+		   chandef.center_freq1, chandef.freq1_offset,
+		   chandef.center_freq2);
 
 	if (flags != (ifmgd->flags & (IEEE80211_STA_DISABLE_HT |
 				      IEEE80211_STA_DISABLE_VHT |
@@ -1364,10 +1368,14 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
 	if (!cfg80211_chandef_usable(local->hw.wiphy, &csa_ie.chandef,
 				     IEEE80211_CHAN_DISABLED)) {
 		sdata_info(sdata,
-			   "AP %pM switches to unsupported channel (%d MHz, width:%d, CF1/2: %d/%d MHz), disconnecting\n",
+			   "AP %pM switches to unsupported channel "
+			   "(%d.%03d MHz, width:%d, CF1/2: %d.%03d/%d MHz), "
+			   "disconnecting\n",
 			   ifmgd->associated->bssid,
 			   csa_ie.chandef.chan->center_freq,
+			   csa_ie.chandef.chan->freq_offset,
 			   csa_ie.chandef.width, csa_ie.chandef.center_freq1,
+			   csa_ie.chandef.freq1_offset,
 			   csa_ie.chandef.center_freq2);
 		ieee80211_queue_work(&local->hw,
 				     &ifmgd->csa_connection_drop_work);
diff --git a/net/mac80211/offchannel.c b/net/mac80211/offchannel.c
index c710504ccf1a..db3b8bf75656 100644
--- a/net/mac80211/offchannel.c
+++ b/net/mac80211/offchannel.c
@@ -557,6 +557,10 @@ static int ieee80211_start_roc_work(struct ieee80211_local *local,
 
 	lockdep_assert_held(&local->mtx);
 
+	if (channel->freq_offset)
+		/* this may work, but is untested */
+		return -EOPNOTSUPP;
+
 	if (local->use_chanctx && !local->ops->remain_on_channel)
 		return -EOPNOTSUPP;
 
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index fdac8192a519..4d14118dddca 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -896,6 +896,7 @@ static void ieee80211_scan_state_set_channel(struct ieee80211_local *local,
 
 	local->scan_chandef.chan = chan;
 	local->scan_chandef.center_freq1 = chan->center_freq;
+	local->scan_chandef.freq1_offset = chan->freq_offset;
 	local->scan_chandef.center_freq2 = 0;
 	switch (scan_req->scan_width) {
 	case NL80211_BSS_CHAN_WIDTH_5:
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index 7ff22f9d6e80..8ad420db3766 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -1566,6 +1566,10 @@ ieee80211_tdls_channel_switch(struct wiphy *wiphy, struct net_device *dev,
 	u32 ch_sw_tm_ie;
 	int ret;
 
+	if (chandef->chan->freq_offset)
+		/* this may work, but is untested */
+		return -EOPNOTSUPP;
+
 	mutex_lock(&local->sta_mtx);
 	sta = sta_info_get(sdata, addr);
 	if (!sta) {
diff --git a/net/mac80211/trace.h b/net/mac80211/trace.h
index 427f51a0a994..1b4709694d2a 100644
--- a/net/mac80211/trace.h
+++ b/net/mac80211/trace.h
@@ -37,32 +37,42 @@
 #define VIF_PR_ARG	__get_str(vif_name), __entry->vif_type, __entry->p2p ? "/p2p" : ""
 
 #define CHANDEF_ENTRY	__field(u32, control_freq)					\
+			__field(u32, freq_offset)					\
 			__field(u32, chan_width)					\
 			__field(u32, center_freq1)					\
+			__field(u32, freq1_offset)					\
 			__field(u32, center_freq2)
 #define CHANDEF_ASSIGN(c)							\
 			__entry->control_freq = (c) ? ((c)->chan ? (c)->chan->center_freq : 0) : 0;	\
+			__entry->freq_offset = (c) ? ((c)->chan ? (c)->chan->freq_offset : 0) : 0;	\
 			__entry->chan_width = (c) ? (c)->width : 0;			\
 			__entry->center_freq1 = (c) ? (c)->center_freq1 : 0;		\
+			__entry->freq1_offset = (c) ? (c)->freq1_offset : 0;		\
 			__entry->center_freq2 = (c) ? (c)->center_freq2 : 0;
-#define CHANDEF_PR_FMT	" control:%d MHz width:%d center: %d/%d MHz"
-#define CHANDEF_PR_ARG	__entry->control_freq, __entry->chan_width,			\
-			__entry->center_freq1, __entry->center_freq2
+#define CHANDEF_PR_FMT	" control:%d.%03d MHz width:%d center: %d.%03d/%d MHz"
+#define CHANDEF_PR_ARG	__entry->control_freq, __entry->freq_offset, __entry->chan_width, \
+			__entry->center_freq1, __entry->freq1_offset, __entry->center_freq2
 
 #define MIN_CHANDEF_ENTRY								\
 			__field(u32, min_control_freq)					\
+			__field(u32, min_freq_offset)					\
 			__field(u32, min_chan_width)					\
 			__field(u32, min_center_freq1)					\
+			__field(u32, min_freq1_offset)					\
 			__field(u32, min_center_freq2)
 
 #define MIN_CHANDEF_ASSIGN(c)								\
 			__entry->min_control_freq = (c)->chan ? (c)->chan->center_freq : 0;	\
+			__entry->min_freq_offset = (c)->chan ? (c)->chan->freq_offset : 0;	\
 			__entry->min_chan_width = (c)->width;				\
 			__entry->min_center_freq1 = (c)->center_freq1;			\
+			__entry->freq1_offset = (c)->freq1_offset;			\
 			__entry->min_center_freq2 = (c)->center_freq2;
-#define MIN_CHANDEF_PR_FMT	" min_control:%d MHz min_width:%d min_center: %d/%d MHz"
-#define MIN_CHANDEF_PR_ARG	__entry->min_control_freq, __entry->min_chan_width,	\
-			__entry->min_center_freq1, __entry->min_center_freq2
+#define MIN_CHANDEF_PR_FMT	" min_control:%d.%03d MHz min_width:%d min_center: %d.%03d/%d MHz"
+#define MIN_CHANDEF_PR_ARG	__entry->min_control_freq, __entry->min_freq_offset,	\
+			__entry->min_chan_width,					\
+			__entry->min_center_freq1, __entry->min_freq1_offset,		\
+			__entry->min_center_freq2
 
 #define CHANCTX_ENTRY	CHANDEF_ENTRY							\
 			MIN_CHANDEF_ENTRY						\
@@ -412,6 +422,7 @@ TRACE_EVENT(drv_bss_info_changed,
 		__field(s32, cqm_rssi_hyst)
 		__field(u32, channel_width)
 		__field(u32, channel_cfreq1)
+		__field(u32, channel_cfreq1_offset)
 		__dynamic_array(u32, arp_addr_list,
 				info->arp_addr_cnt > IEEE80211_BSS_ARP_ADDR_LIST_LEN ?
 					IEEE80211_BSS_ARP_ADDR_LIST_LEN :
@@ -452,6 +463,7 @@ TRACE_EVENT(drv_bss_info_changed,
 		__entry->cqm_rssi_hyst = info->cqm_rssi_hyst;
 		__entry->channel_width = info->chandef.width;
 		__entry->channel_cfreq1 = info->chandef.center_freq1;
+		__entry->channel_cfreq1_offset = info->chandef.freq1_offset;
 		__entry->arp_addr_cnt = info->arp_addr_cnt;
 		memcpy(__get_dynamic_array(arp_addr_list), info->arp_addr_list,
 		       sizeof(u32) * (info->arp_addr_cnt > IEEE80211_BSS_ARP_ADDR_LIST_LEN ?
@@ -1223,6 +1235,7 @@ TRACE_EVENT(drv_remain_on_channel,
 		LOCAL_ENTRY
 		VIF_ENTRY
 		__field(int, center_freq)
+		__field(int, freq_offset)
 		__field(unsigned int, duration)
 		__field(u32, type)
 	),
@@ -1231,14 +1244,16 @@ TRACE_EVENT(drv_remain_on_channel,
 		LOCAL_ASSIGN;
 		VIF_ASSIGN;
 		__entry->center_freq = chan->center_freq;
+		__entry->freq_offset = chan->freq_offset;
 		__entry->duration = duration;
 		__entry->type = type;
 	),
 
 	TP_printk(
-		LOCAL_PR_FMT  VIF_PR_FMT " freq:%dMHz duration:%dms type=%d",
+		LOCAL_PR_FMT  VIF_PR_FMT " freq:%d.%03dMHz duration:%dms type=%d",
 		LOCAL_PR_ARG, VIF_PR_ARG,
-		__entry->center_freq, __entry->duration, __entry->type
+		__entry->center_freq, __entry->freq_offset,
+		__entry->duration, __entry->type
 	)
 );
 
@@ -1546,8 +1561,10 @@ struct trace_vif_entry {
 
 struct trace_chandef_entry {
 	u32 control_freq;
+	u32 freq_offset;
 	u32 chan_width;
 	u32 center_freq1;
+	u32 freq1_offset;
 	u32 center_freq2;
 } __packed;
 
@@ -1597,18 +1614,26 @@ TRACE_EVENT(drv_switch_vif_chanctx,
 					sizeof(local_vifs[i].vif.vif_name));
 				SWITCH_ENTRY_ASSIGN(old_chandef.control_freq,
 						old_ctx->def.chan->center_freq);
+				SWITCH_ENTRY_ASSIGN(old_chandef.freq_offset,
+						old_ctx->def.chan->freq_offset);
 				SWITCH_ENTRY_ASSIGN(old_chandef.chan_width,
 						    old_ctx->def.width);
 				SWITCH_ENTRY_ASSIGN(old_chandef.center_freq1,
 						    old_ctx->def.center_freq1);
+				SWITCH_ENTRY_ASSIGN(old_chandef.freq1_offset,
+						    old_ctx->def.freq1_offset);
 				SWITCH_ENTRY_ASSIGN(old_chandef.center_freq2,
 						    old_ctx->def.center_freq2);
 				SWITCH_ENTRY_ASSIGN(new_chandef.control_freq,
 						new_ctx->def.chan->center_freq);
+				SWITCH_ENTRY_ASSIGN(new_chandef.freq_offset,
+						new_ctx->def.chan->freq_offset);
 				SWITCH_ENTRY_ASSIGN(new_chandef.chan_width,
 						    new_ctx->def.width);
 				SWITCH_ENTRY_ASSIGN(new_chandef.center_freq1,
 						    new_ctx->def.center_freq1);
+				SWITCH_ENTRY_ASSIGN(new_chandef.freq1_offset,
+						    new_ctx->def.freq1_offset);
 				SWITCH_ENTRY_ASSIGN(new_chandef.center_freq2,
 						    new_ctx->def.center_freq2);
 			}
-- 
cgit v1.2.3-59-g8ed1b


From 3b23c184f72acddad39c40373f165e1a9e384758 Mon Sep 17 00:00:00 2001
From: Thomas Pedersen <thomas@adapt-ip.com>
Date: Wed, 1 Apr 2020 18:18:05 -0700
Subject: mac80211: add freq_offset to RX status

RX status needs a KHz component, so add freq_offset. We
can reduce the bits for the frequency since 60 GHz isn't
supported.

Signed-off-by: Thomas Pedersen <thomas@adapt-ip.com>
Link: https://lore.kernel.org/r/20200402011810.22947-5-thomas@adapt-ip.com
[fix commit message]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h | 10 +++++++++-
 net/mac80211/mlme.c    |  6 ++++--
 net/mac80211/rx.c      |  1 +
 net/mac80211/scan.c    |  3 ++-
 4 files changed, 16 insertions(+), 4 deletions(-)

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index f3147633dda2..2936049f918e 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -1335,6 +1335,7 @@ enum mac80211_rx_encoding {
  * @freq: frequency the radio was tuned to when receiving this frame, in MHz
  *	This field must be set for management frames, but isn't strictly needed
  *	for data (other) frames - for those it only affects radiotap reporting.
+ * @freq_offset: @freq has a positive offset of 500Khz.
  * @signal: signal strength when receiving this frame, either in dBm, in dB or
  *	unspecified depending on the hardware capabilities flags
  *	@IEEE80211_HW_SIGNAL_*
@@ -1365,7 +1366,7 @@ struct ieee80211_rx_status {
 	u32 device_timestamp;
 	u32 ampdu_reference;
 	u32 flag;
-	u16 freq;
+	u16 freq: 13, freq_offset: 1;
 	u8 enc_flags;
 	u8 encoding:2, bw:3, he_ru:3;
 	u8 he_gi:2, he_dcm:1;
@@ -1381,6 +1382,13 @@ struct ieee80211_rx_status {
 	u8 zero_length_psdu_type;
 };
 
+static inline u32
+ieee80211_rx_status_to_khz(struct ieee80211_rx_status *rx_status)
+{
+	return MHZ_TO_KHZ(rx_status->freq) +
+	       (rx_status->freq_offset ? 500 : 0);
+}
+
 /**
  * struct ieee80211_vendor_radiotap - vendor radiotap data information
  * @present: presence bitmap for this vendor namespace
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index acc8adf50d69..a259b4487b60 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -3683,7 +3683,8 @@ static void ieee80211_rx_bss_info(struct ieee80211_sub_if_data *sdata,
 
 	sdata_assert_lock(sdata);
 
-	channel = ieee80211_get_channel(local->hw.wiphy, rx_status->freq);
+	channel = ieee80211_get_channel_khz(local->hw.wiphy,
+					ieee80211_rx_status_to_khz(rx_status));
 	if (!channel)
 		return;
 
@@ -3899,7 +3900,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 		return;
 	}
 
-	if (rx_status->freq != chanctx_conf->def.chan->center_freq) {
+	if (ieee80211_rx_status_to_khz(rx_status) !=
+	    ieee80211_channel_to_khz(chanctx_conf->def.chan)) {
 		rcu_read_unlock();
 		return;
 	}
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index a724551b8ddf..eaf8931e4627 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -412,6 +412,7 @@ ieee80211_add_rx_radiotap_header(struct ieee80211_local *local,
 	pos++;
 
 	/* IEEE80211_RADIOTAP_CHANNEL */
+	/* TODO: frequency offset in KHz */
 	put_unaligned_le16(status->freq, pos);
 	pos += 2;
 	if (status->bw == RATE_INFO_BW_10)
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 4d14118dddca..5db15996524f 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -275,7 +275,8 @@ void ieee80211_scan_rx(struct ieee80211_local *local, struct sk_buff *skb)
 			return;
 	}
 
-	channel = ieee80211_get_channel(local->hw.wiphy, rx_status->freq);
+	channel = ieee80211_get_channel_khz(local->hw.wiphy,
+					ieee80211_rx_status_to_khz(rx_status));
 
 	if (!channel || channel->flags & IEEE80211_CHAN_DISABLED)
 		return;
-- 
cgit v1.2.3-59-g8ed1b


From be689f68d040702a3521035d267949d3927971f0 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 24 Apr 2020 12:01:04 +0200
Subject: cfg80211: reject channels/chandefs with KHz offset >= 1000

This should be covered by the next MHz, make sure that the
numbers are always normalized.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Link: https://lore.kernel.org/r/20200424120103.12b91ecf75f9.I4bf499d58404283bbfacb517d614a816763bccf2@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/chan.c | 3 +++
 net/wireless/core.c | 3 +++
 2 files changed, 6 insertions(+)

diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index d60e50a3b910..e111c08daa0e 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -147,6 +147,9 @@ bool cfg80211_chandef_valid(const struct cfg80211_chan_def *chandef)
 	if (!chandef->chan)
 		return false;
 
+	if (chandef->freq1_offset >= 1000)
+		return false;
+
 	control_freq = chandef->chan->center_freq;
 
 	switch (chandef->width) {
diff --git a/net/wireless/core.c b/net/wireless/core.c
index 5757dea2aa94..b795f363d004 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -834,6 +834,9 @@ int wiphy_register(struct wiphy *wiphy)
 			sband->channels[i].orig_mpwr =
 				sband->channels[i].max_power;
 			sband->channels[i].band = band;
+
+			if (WARN_ON(sband->channels[i].freq_offset >= 1000))
+				return -EINVAL;
 		}
 
 		for (i = 0; i < sband->n_iftype_data; i++) {
-- 
cgit v1.2.3-59-g8ed1b


From b6b5c42e3bab939d357d800fd313e3c995164065 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 24 Apr 2020 12:39:46 +0200
Subject: mac80211: fix two missing documentation entries

Add documentation for two struct entries that was missing.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Link: https://lore.kernel.org/r/20200424123945.6b23a26ab5e7.I664440ab5f33442df8103253bf5b9fe84be8d58c@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h  | 2 ++
 net/mac80211/sta_info.h | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 2936049f918e..ecb219e3ec4f 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -820,6 +820,8 @@ enum mac80211_tx_info_flags {
  * @IEEE80211_TX_CTRL_AMSDU: This frame is an A-MSDU frame
  * @IEEE80211_TX_CTRL_FAST_XMIT: This frame is going through the fast_xmit path
  * @IEEE80211_TX_CTRL_SKIP_MPATH_LOOKUP: This frame skips mesh path lookup
+ * @IEEE80211_TX_CTRL_HW_80211_ENCAP: This frame uses hardware encapsulation
+ *	(header conversion)
  *
  * These flags are used in tx_info->control.flags.
  */
diff --git a/net/mac80211/sta_info.h b/net/mac80211/sta_info.h
index a5de3aa6ea42..49728047dfad 100644
--- a/net/mac80211/sta_info.h
+++ b/net/mac80211/sta_info.h
@@ -69,6 +69,8 @@
  * @WLAN_STA_MPSP_RECIPIENT: local STA is recipient of a MPSP.
  * @WLAN_STA_PS_DELIVER: station woke up, but we're still blocking TX
  *	until pending frames are delivered
+ * @WLAN_STA_USES_ENCRYPTION: This station was configured for encryption,
+ *	so drop all packets without a key later.
  *
  * @NUM_WLAN_STA_FLAGS: number of defined flags
  */
-- 
cgit v1.2.3-59-g8ed1b


From e73f94d1b6f05f6f22434c63de255a9dec6fd23d Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Fri, 24 Apr 2020 21:14:37 +0800
Subject: batman-adv: remove unused inline function batadv_arp_change_timeout

There's no callers in-tree.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/distributed-arp-table.h | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/net/batman-adv/distributed-arp-table.h b/net/batman-adv/distributed-arp-table.h
index 2bff2f4a325c..4e031661682a 100644
--- a/net/batman-adv/distributed-arp-table.h
+++ b/net/batman-adv/distributed-arp-table.h
@@ -163,11 +163,6 @@ static inline void batadv_dat_init_own_addr(struct batadv_priv *bat_priv,
 {
 }
 
-static inline void batadv_arp_change_timeout(struct net_device *soft_iface,
-					     const char *name)
-{
-}
-
 static inline int batadv_dat_init(struct batadv_priv *bat_priv)
 {
 	return 0;
-- 
cgit v1.2.3-59-g8ed1b


From c2cf318df87c3745fd0cf76c6a6ec2b85380dbdf Mon Sep 17 00:00:00 2001
From: Tova Mussai <tova.mussai@intel.com>
Date: Fri, 17 Apr 2020 13:21:35 +0300
Subject: iwlwifi: nvm: use iwl_nl80211_band_from_channel_idx

Use iwl_nl80211_band_from_channel_idx in iwl_parse_nvm_mcc_info

Signed-off-by: Tova Mussai <tova.mussai@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200417131727.a64a018f244e.Ie75ac5bb0f0f524d26944800138855ef2228339a@changeid
---
 drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
index 9e9810d2b262..6047b98691eb 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
@@ -1168,8 +1168,7 @@ iwl_parse_nvm_mcc_info(struct device *dev, const struct iwl_cfg *cfg,
 
 	for (ch_idx = 0; ch_idx < num_of_ch; ch_idx++) {
 		ch_flags = (u16)__le32_to_cpup(channels + ch_idx);
-		band = (ch_idx < NUM_2GHZ_CHANNELS) ?
-		       NL80211_BAND_2GHZ : NL80211_BAND_5GHZ;
+		band = iwl_nl80211_band_from_channel_idx(ch_idx);
 		center_freq = ieee80211_channel_to_frequency(nvm_chan[ch_idx],
 							     band);
 		new_rule = false;
-- 
cgit v1.2.3-59-g8ed1b


From 4af119509a4807ac450634c73d38757aaf0b3f98 Mon Sep 17 00:00:00 2001
From: Mordechay Goodstein <mordechay.goodstein@intel.com>
Date: Tue, 21 Apr 2020 13:33:47 +0300
Subject: iwlwifi: move API version lookup to common code

The API version lookup is parsed from a TLV and should be in shared code
make make it reusable across all opmodes.

Also change the function names from mvm to fw, since this is not
mvm-specific anymore.

Additionally, since this function is not just a single line of code, it
shouldn't be inline.  Convert them to actual functions.

Signed-off-by: Mordechay Goodstein <mordechay.goodstein@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200421133326.cf94672dfcdb.I5ede9cc25ee8de7b8d2b5c574f917a18971da734@changeid
---
 drivers/net/wireless/intel/iwlwifi/Makefile        |  3 +-
 drivers/net/wireless/intel/iwlwifi/fw/img.c        | 99 ++++++++++++++++++++++
 drivers/net/wireless/intel/iwlwifi/fw/img.h        | 19 +----
 .../net/wireless/intel/iwlwifi/mvm/ftm-initiator.c |  4 +-
 .../net/wireless/intel/iwlwifi/mvm/ftm-responder.c |  4 +-
 drivers/net/wireless/intel/iwlwifi/mvm/fw.c        |  6 +-
 drivers/net/wireless/intel/iwlwifi/mvm/mvm.h       |  4 +-
 drivers/net/wireless/intel/iwlwifi/mvm/ops.c       | 24 +-----
 drivers/net/wireless/intel/iwlwifi/mvm/scan.c      |  8 +-
 9 files changed, 118 insertions(+), 53 deletions(-)
 create mode 100644 drivers/net/wireless/intel/iwlwifi/fw/img.c

diff --git a/drivers/net/wireless/intel/iwlwifi/Makefile b/drivers/net/wireless/intel/iwlwifi/Makefile
index 0aae3fa4128c..fbcd1405aeea 100644
--- a/drivers/net/wireless/intel/iwlwifi/Makefile
+++ b/drivers/net/wireless/intel/iwlwifi/Makefile
@@ -13,7 +13,8 @@ iwlwifi-$(CONFIG_IWLDVM) += cfg/1000.o cfg/2000.o cfg/5000.o cfg/6000.o
 iwlwifi-$(CONFIG_IWLMVM) += cfg/7000.o cfg/8000.o cfg/9000.o cfg/22000.o
 iwlwifi-objs		+= iwl-dbg-tlv.o
 iwlwifi-objs		+= iwl-trans.o
-iwlwifi-objs		+= fw/notif-wait.o
+
+iwlwifi-objs		+= fw/img.o fw/notif-wait.o
 iwlwifi-objs		+= fw/dbg.o
 iwlwifi-$(CONFIG_IWLMVM) += fw/paging.o fw/smem.o fw/init.o
 iwlwifi-$(CONFIG_ACPI) += fw/acpi.o
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/img.c b/drivers/net/wireless/intel/iwlwifi/fw/img.c
new file mode 100644
index 000000000000..de8cff463dbe
--- /dev/null
+++ b/drivers/net/wireless/intel/iwlwifi/fw/img.c
@@ -0,0 +1,99 @@
+/******************************************************************************
+ *
+ * This file is provided under a dual BSD/GPLv2 license.  When using or
+ * redistributing this file, you may do so under either license.
+ *
+ * GPL LICENSE SUMMARY
+ *
+ * Copyright(c) 2019 Intel Corporation
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of version 2 of the GNU General Public License as
+ * published by the Free Software Foundation.
+ *
+ * This program is distributed in the hope that it will be useful, but
+ * WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * General Public License for more details.
+ *
+ * The full GNU General Public License is included in this distribution
+ * in the file called COPYING.
+ *
+ * Contact Information:
+ *  Intel Linux Wireless <linuxwifi@intel.com>
+ * Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+ *
+ * BSD LICENSE
+ *
+ * Copyright(c) 2019 Intel Corporation
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ *
+ *  * Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ *  * Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in
+ *    the documentation and/or other materials provided with the
+ *    distribution.
+ *  * Neither the name Intel Corporation nor the names of its
+ *    contributors may be used to endorse or promote products derived
+ *    from this software without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+ * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+ * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+ * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+ * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+ * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+ * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+ * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+ * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ *****************************************************************************/
+
+#include "img.h"
+
+u8 iwl_fw_lookup_cmd_ver(const struct iwl_fw *fw, u8 grp, u8 cmd)
+{
+	const struct iwl_fw_cmd_version *entry;
+	unsigned int i;
+
+	if (!fw->ucode_capa.cmd_versions ||
+	    !fw->ucode_capa.n_cmd_versions)
+		return IWL_FW_CMD_VER_UNKNOWN;
+
+	entry = fw->ucode_capa.cmd_versions;
+	for (i = 0; i < fw->ucode_capa.n_cmd_versions; i++, entry++) {
+		if (entry->group == grp && entry->cmd == cmd)
+			return entry->cmd_ver;
+	}
+
+	return IWL_FW_CMD_VER_UNKNOWN;
+}
+EXPORT_SYMBOL_GPL(iwl_fw_lookup_cmd_ver);
+
+u8 iwl_fw_lookup_notif_ver(const struct iwl_fw *fw, u8 grp, u8 cmd, u8 def)
+{
+	const struct iwl_fw_cmd_version *entry;
+	unsigned int i;
+
+	if (!fw->ucode_capa.cmd_versions ||
+	    !fw->ucode_capa.n_cmd_versions)
+		return def;
+
+	entry = fw->ucode_capa.cmd_versions;
+	for (i = 0; i < fw->ucode_capa.n_cmd_versions; i++, entry++) {
+		if (entry->group == grp && entry->cmd == cmd) {
+			if (entry->notif_ver == IWL_FW_CMD_VER_UNKNOWN)
+				return def;
+			return entry->notif_ver;
+		}
+	}
+
+	return def;
+}
+EXPORT_SYMBOL_GPL(iwl_fw_lookup_notif_ver);
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/img.h b/drivers/net/wireless/intel/iwlwifi/fw/img.h
index 90ca5f929cf9..a8630bf90b63 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/img.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/img.h
@@ -313,22 +313,7 @@ iwl_get_ucode_image(const struct iwl_fw *fw, enum iwl_ucode_type ucode_type)
 	return &fw->img[ucode_type];
 }
 
-static inline u8 iwl_mvm_lookup_cmd_ver(const struct iwl_fw *fw, u8 grp, u8 cmd)
-{
-	const struct iwl_fw_cmd_version *entry;
-	unsigned int i;
-
-	if (!fw->ucode_capa.cmd_versions ||
-	    !fw->ucode_capa.n_cmd_versions)
-		return IWL_FW_CMD_VER_UNKNOWN;
-
-	entry = fw->ucode_capa.cmd_versions;
-	for (i = 0; i < fw->ucode_capa.n_cmd_versions; i++, entry++) {
-		if (entry->group == grp && entry->cmd == cmd)
-			return entry->cmd_ver;
-	}
-
-	return IWL_FW_CMD_VER_UNKNOWN;
-}
+u8 iwl_fw_lookup_cmd_ver(const struct iwl_fw *fw, u8 grp, u8 cmd);
 
+u8 iwl_fw_lookup_notif_ver(const struct iwl_fw *fw, u8 grp, u8 cmd, u8 def);
 #endif  /* __iwl_fw_img_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
index 9e21f5e5d364..cdb87139100d 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
@@ -508,8 +508,8 @@ int iwl_mvm_ftm_start(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 		return -EBUSY;
 
 	if (new_api) {
-		u8 cmd_ver = iwl_mvm_lookup_cmd_ver(mvm->fw, LOCATION_GROUP,
-						    TOF_RANGE_REQ_CMD);
+		u8 cmd_ver = iwl_fw_lookup_cmd_ver(mvm->fw, LOCATION_GROUP,
+						   TOF_RANGE_REQ_CMD);
 
 		if (cmd_ver == 8)
 			err = iwl_mvm_ftm_start_v8(mvm, vif, req);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-responder.c b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-responder.c
index 834564198409..0b6c32098b5a 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-responder.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-responder.c
@@ -136,8 +136,8 @@ iwl_mvm_ftm_responder_cmd(struct iwl_mvm *mvm,
 				    IWL_TOF_RESPONDER_CMD_VALID_STA_ID),
 		.sta_id = mvmvif->bcast_sta.sta_id,
 	};
-	u8 cmd_ver = iwl_mvm_lookup_cmd_ver(mvm->fw, LOCATION_GROUP,
-					    TOF_RESPONDER_CONFIG_CMD);
+	u8 cmd_ver = iwl_fw_lookup_cmd_ver(mvm->fw, LOCATION_GROUP,
+					   TOF_RESPONDER_CONFIG_CMD);
 	int err;
 
 	lockdep_assert_held(&mvm->mutex);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
index a4038f289ab3..8fe78ce37771 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
@@ -102,9 +102,9 @@ static int iwl_set_soc_latency(struct iwl_mvm *mvm)
 	if (!mvm->trans->trans_cfg->integrated)
 		cmd.flags = cpu_to_le32(SOC_CONFIG_CMD_FLAGS_DISCRETE);
 
-	if (iwl_mvm_lookup_cmd_ver(mvm->fw, IWL_ALWAYS_LONG_GROUP,
-				   SCAN_REQ_UMAC) >= 2 &&
-	    (mvm->trans->trans_cfg->low_latency_xtal))
+	if (iwl_fw_lookup_cmd_ver(mvm->fw, IWL_ALWAYS_LONG_GROUP,
+				  SCAN_REQ_UMAC) >= 2 &&
+	    mvm->trans->trans_cfg->low_latency_xtal)
 		cmd.flags |= cpu_to_le32(SOC_CONFIG_CMD_FLAGS_LOW_LATENCY);
 
 	cmd.latency = cpu_to_le32(mvm->trans->trans_cfg->xtal_latency);
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
index afcf2b98a9cb..9e2a0858108c 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
@@ -2149,8 +2149,8 @@ iwl_mvm_set_chan_info_chandef(struct iwl_mvm *mvm,
 
 static inline int iwl_umac_scan_get_max_profiles(const struct iwl_fw *fw)
 {
-	u8 ver = iwl_mvm_lookup_cmd_ver(fw, IWL_ALWAYS_LONG_GROUP,
-					SCAN_OFFLOAD_UPDATE_PROFILES_CMD);
+	u8 ver = iwl_fw_lookup_cmd_ver(fw, IWL_ALWAYS_LONG_GROUP,
+				       SCAN_OFFLOAD_UPDATE_PROFILES_CMD);
 	return (ver == IWL_FW_CMD_VER_UNKNOWN || ver < 3) ?
 		IWL_SCAN_MAX_PROFILES : IWL_SCAN_MAX_PROFILES_V2;
 }
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
index dfe02440d474..b00f4a8b8424 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
@@ -612,27 +612,6 @@ static const struct iwl_fw_runtime_ops iwl_mvm_fwrt_ops = {
 	.d3_debug_enable = iwl_mvm_d3_debug_enable,
 };
 
-static u8 iwl_mvm_lookup_notif_ver(struct iwl_mvm *mvm, u8 grp, u8 cmd, u8 def)
-{
-	const struct iwl_fw_cmd_version *entry;
-	unsigned int i;
-
-	if (!mvm->fw->ucode_capa.cmd_versions ||
-	    !mvm->fw->ucode_capa.n_cmd_versions)
-		return def;
-
-	entry = mvm->fw->ucode_capa.cmd_versions;
-	for (i = 0; i < mvm->fw->ucode_capa.n_cmd_versions; i++, entry++) {
-		if (entry->group == grp && entry->cmd == cmd) {
-			if (entry->notif_ver == IWL_FW_CMD_VER_UNKNOWN)
-				return def;
-			return entry->notif_ver;
-		}
-	}
-
-	return def;
-}
-
 static struct iwl_op_mode *
 iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg,
 		      const struct iwl_fw *fw, struct dentry *dbgfs_dir)
@@ -745,7 +724,8 @@ iwl_op_mode_mvm_start(struct iwl_trans *trans, const struct iwl_cfg *cfg,
 	INIT_DELAYED_WORK(&mvm->cs_tx_unblock_dwork, iwl_mvm_tx_unblock_dwork);
 
 	mvm->cmd_ver.d0i3_resp =
-		iwl_mvm_lookup_notif_ver(mvm, LEGACY_GROUP, D0I3_END_CMD, 0);
+		iwl_fw_lookup_notif_ver(mvm->fw, LEGACY_GROUP, D0I3_END_CMD,
+					0);
 	/* we only support version 1 */
 	if (WARN_ON_ONCE(mvm->cmd_ver.d0i3_resp > 1))
 		goto out_free;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
index bc48113f0568..51a061b138ba 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/scan.c
@@ -2228,8 +2228,8 @@ static int iwl_mvm_build_scan_cmd(struct iwl_mvm *mvm,
 
 	hcmd->id = iwl_cmd_id(SCAN_REQ_UMAC, IWL_ALWAYS_LONG_GROUP, 0);
 
-	scan_ver = iwl_mvm_lookup_cmd_ver(mvm->fw, IWL_ALWAYS_LONG_GROUP,
-					  SCAN_REQ_UMAC);
+	scan_ver = iwl_fw_lookup_cmd_ver(mvm->fw, IWL_ALWAYS_LONG_GROUP,
+					 SCAN_REQ_UMAC);
 
 	for (i = 0; i < ARRAY_SIZE(iwl_scan_umac_handlers); i++) {
 		const struct iwl_scan_umac_handler *ver_handler =
@@ -2568,8 +2568,8 @@ static int iwl_scan_req_umac_get_size(u8 scan_ver)
 int iwl_mvm_scan_size(struct iwl_mvm *mvm)
 {
 	int base_size, tail_size;
-	u8 scan_ver = iwl_mvm_lookup_cmd_ver(mvm->fw, IWL_ALWAYS_LONG_GROUP,
-					     SCAN_REQ_UMAC);
+	u8 scan_ver = iwl_fw_lookup_cmd_ver(mvm->fw, IWL_ALWAYS_LONG_GROUP,
+					    SCAN_REQ_UMAC);
 
 	base_size = iwl_scan_req_umac_get_size(scan_ver);
 	if (base_size)
-- 
cgit v1.2.3-59-g8ed1b


From 7a99c877ae8e2f1b4bd9811addd337900d24b3ae Mon Sep 17 00:00:00 2001
From: Shahar S Matityahu <shahar.s.matityahu@intel.com>
Date: Fri, 17 Apr 2020 13:21:36 +0300
Subject: iwlwifi: dbg: support multiple dumps in legacy dump flow

Support multiple debug data collection triggers in legacy flow.
Utilize the already existing Yoyo infra so the change is rather simple.

Signed-off-by: Shahar S Matityahu <shahar.s.matityahu@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200417131727.5be6a1923cbe.I10701236b03f66328041f2a38f5f0f22a26fd40b@changeid
---
 drivers/net/wireless/intel/iwlwifi/fw/dbg.c       | 97 +++++++++++++++--------
 drivers/net/wireless/intel/iwlwifi/fw/dbg.h       | 11 ---
 drivers/net/wireless/intel/iwlwifi/fw/runtime.h   | 14 +++-
 drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c |  1 -
 4 files changed, 72 insertions(+), 51 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
index 14ac7153a3e7..8daa83cdc72c 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
@@ -818,7 +818,8 @@ static void iwl_dump_paging(struct iwl_fw_runtime *fwrt,
 
 static struct iwl_fw_error_dump_file *
 iwl_fw_error_dump_file(struct iwl_fw_runtime *fwrt,
-		       struct iwl_fw_dump_ptrs *fw_error_dump)
+		       struct iwl_fw_dump_ptrs *fw_error_dump,
+		       struct iwl_fwrt_dump_data *data)
 {
 	struct iwl_fw_error_dump_file *dump_file;
 	struct iwl_fw_error_dump_data *dump_data;
@@ -900,15 +901,15 @@ iwl_fw_error_dump_file(struct iwl_fw_runtime *fwrt,
 	}
 
 	/* If we only want a monitor dump, reset the file length */
-	if (fwrt->dump.monitor_only) {
+	if (data->monitor_only) {
 		file_len = sizeof(*dump_file) + sizeof(*dump_data) * 2 +
 			   sizeof(*dump_info) + sizeof(*dump_smem_cfg);
 	}
 
 	if (iwl_fw_dbg_type_on(fwrt, IWL_FW_ERROR_DUMP_ERROR_INFO) &&
-	    fwrt->dump.desc)
+	    data->desc)
 		file_len += sizeof(*dump_data) + sizeof(*dump_trig) +
-			    fwrt->dump.desc->len;
+			data->desc->len;
 
 	dump_file = vzalloc(file_len);
 	if (!dump_file)
@@ -984,19 +985,19 @@ iwl_fw_error_dump_file(struct iwl_fw_runtime *fwrt,
 		iwl_read_radio_regs(fwrt, &dump_data);
 
 	if (iwl_fw_dbg_type_on(fwrt, IWL_FW_ERROR_DUMP_ERROR_INFO) &&
-	    fwrt->dump.desc) {
+	    data->desc) {
 		dump_data->type = cpu_to_le32(IWL_FW_ERROR_DUMP_ERROR_INFO);
 		dump_data->len = cpu_to_le32(sizeof(*dump_trig) +
-					     fwrt->dump.desc->len);
+					     data->desc->len);
 		dump_trig = (void *)dump_data->data;
-		memcpy(dump_trig, &fwrt->dump.desc->trig_desc,
-		       sizeof(*dump_trig) + fwrt->dump.desc->len);
+		memcpy(dump_trig, &data->desc->trig_desc,
+		       sizeof(*dump_trig) + data->desc->len);
 
 		dump_data = iwl_fw_error_next_data(dump_data);
 	}
 
 	/* In case we only want monitor dump, skip to dump trasport data */
-	if (fwrt->dump.monitor_only)
+	if (data->monitor_only)
 		goto out;
 
 	if (iwl_fw_dbg_type_on(fwrt, IWL_FW_ERROR_DUMP_MEM)) {
@@ -2172,7 +2173,21 @@ static u32 iwl_dump_ini_file_gen(struct iwl_fw_runtime *fwrt,
 	return le32_to_cpu(hdr->file_len);
 }
 
-static void iwl_fw_error_dump(struct iwl_fw_runtime *fwrt)
+static inline void iwl_fw_free_dump_desc(struct iwl_fw_runtime *fwrt,
+					 const struct iwl_fw_dump_desc **desc)
+{
+	if (desc && *desc != &iwl_dump_desc_assert)
+		kfree(*desc);
+
+	*desc = NULL;
+	fwrt->dump.lmac_err_id[0] = 0;
+	if (fwrt->smem_cfg.num_lmacs > 1)
+		fwrt->dump.lmac_err_id[1] = 0;
+	fwrt->dump.umac_err_id = 0;
+}
+
+static void iwl_fw_error_dump(struct iwl_fw_runtime *fwrt,
+			      struct iwl_fwrt_dump_data *dump_data)
 {
 	struct iwl_fw_dump_ptrs fw_error_dump = {};
 	struct iwl_fw_error_dump_file *dump_file;
@@ -2180,11 +2195,11 @@ static void iwl_fw_error_dump(struct iwl_fw_runtime *fwrt)
 	u32 file_len;
 	u32 dump_mask = fwrt->fw->dbg.dump_mask;
 
-	dump_file = iwl_fw_error_dump_file(fwrt, &fw_error_dump);
+	dump_file = iwl_fw_error_dump_file(fwrt, &fw_error_dump, dump_data);
 	if (!dump_file)
-		goto out;
+		return;
 
-	if (fwrt->dump.monitor_only)
+	if (dump_data->monitor_only)
 		dump_mask &= IWL_FW_ERROR_DUMP_FW_MONITOR;
 
 	fw_error_dump.trans_ptr = iwl_trans_dump_data(fwrt->trans, dump_mask);
@@ -2213,9 +2228,6 @@ static void iwl_fw_error_dump(struct iwl_fw_runtime *fwrt)
 	}
 	vfree(fw_error_dump.fwrt_ptr);
 	vfree(fw_error_dump.trans_ptr);
-
-out:
-	iwl_fw_free_dump_desc(fwrt);
 }
 
 static void iwl_dump_ini_list_free(struct list_head *list)
@@ -2244,7 +2256,7 @@ static void iwl_fw_error_ini_dump(struct iwl_fw_runtime *fwrt,
 	u32 file_len = iwl_dump_ini_file_gen(fwrt, dump_data, &dump_list);
 
 	if (!file_len)
-		goto out;
+		return;
 
 	sg_dump_data = alloc_sgtable(file_len);
 	if (sg_dump_data) {
@@ -2261,9 +2273,6 @@ static void iwl_fw_error_ini_dump(struct iwl_fw_runtime *fwrt,
 			       GFP_KERNEL);
 	}
 	iwl_dump_ini_list_free(&dump_list);
-
-out:
-	iwl_fw_error_dump_data_free(dump_data);
 }
 
 const struct iwl_fw_dump_desc iwl_dump_desc_assert = {
@@ -2278,27 +2287,40 @@ int iwl_fw_dbg_collect_desc(struct iwl_fw_runtime *fwrt,
 			    bool monitor_only,
 			    unsigned int delay)
 {
+	struct iwl_fwrt_wk_data *wk_data;
+	unsigned long idx;
+
 	if (iwl_trans_dbg_ini_valid(fwrt->trans)) {
-		iwl_fw_free_dump_desc(fwrt);
+		iwl_fw_free_dump_desc(fwrt, &desc);
 		return 0;
 	}
 
-	/* use wks[0] since dump flow prior to ini does not need to support
-	 * consecutive triggers collection
+	/*
+	 * Check there is an available worker.
+	 * ffz return value is undefined if no zero exists,
+	 * so check against ~0UL first.
 	 */
-	if (test_and_set_bit(fwrt->dump.wks[0].idx, &fwrt->dump.active_wks))
+	if (fwrt->dump.active_wks == ~0UL)
 		return -EBUSY;
 
-	if (WARN_ON(fwrt->dump.desc))
-		iwl_fw_free_dump_desc(fwrt);
+	idx = ffz(fwrt->dump.active_wks);
+
+	if (idx >= IWL_FW_RUNTIME_DUMP_WK_NUM ||
+	    test_and_set_bit(fwrt->dump.wks[idx].idx, &fwrt->dump.active_wks))
+		return -EBUSY;
+
+	wk_data = &fwrt->dump.wks[idx];
+
+	if (WARN_ON(wk_data->dump_data.desc))
+		iwl_fw_free_dump_desc(fwrt, &wk_data->dump_data.desc);
+
+	wk_data->dump_data.desc = desc;
+	wk_data->dump_data.monitor_only = monitor_only;
 
 	IWL_WARN(fwrt, "Collecting data: trigger %d fired.\n",
 		 le32_to_cpu(desc->trig_desc.type));
 
-	fwrt->dump.desc = desc;
-	fwrt->dump.monitor_only = monitor_only;
-
-	schedule_delayed_work(&fwrt->dump.wks[0].wk, usecs_to_jiffies(delay));
+	schedule_delayed_work(&wk_data->wk, usecs_to_jiffies(delay));
 
 	return 0;
 }
@@ -2504,14 +2526,14 @@ IWL_EXPORT_SYMBOL(iwl_fw_start_dbg_conf);
 static void iwl_fw_dbg_collect_sync(struct iwl_fw_runtime *fwrt, u8 wk_idx)
 {
 	struct iwl_fw_dbg_params params = {0};
+	struct iwl_fwrt_dump_data *dump_data =
+		&fwrt->dump.wks[wk_idx].dump_data;
 
 	if (!test_bit(wk_idx, &fwrt->dump.active_wks))
 		return;
 
-	if (fwrt->ops && fwrt->ops->fw_running &&
-	    !fwrt->ops->fw_running(fwrt->ops_ctx)) {
-		IWL_ERR(fwrt, "Firmware not running - cannot dump error\n");
-		iwl_fw_free_dump_desc(fwrt);
+	if (!test_bit(STATUS_DEVICE_ENABLED, &fwrt->trans->status)) {
+		IWL_ERR(fwrt, "Device is not enabled - cannot dump error\n");
 		goto out;
 	}
 
@@ -2527,12 +2549,17 @@ static void iwl_fw_dbg_collect_sync(struct iwl_fw_runtime *fwrt, u8 wk_idx)
 	if (iwl_trans_dbg_ini_valid(fwrt->trans))
 		iwl_fw_error_ini_dump(fwrt, &fwrt->dump.wks[wk_idx].dump_data);
 	else
-		iwl_fw_error_dump(fwrt);
+		iwl_fw_error_dump(fwrt, &fwrt->dump.wks[wk_idx].dump_data);
 	IWL_DEBUG_FW_INFO(fwrt, "WRT: Data collection done\n");
 
 	iwl_fw_dbg_stop_restart_recording(fwrt, &params, false);
 
 out:
+	if (iwl_trans_dbg_ini_valid(fwrt->trans))
+		iwl_fw_error_dump_data_free(dump_data);
+	else
+		iwl_fw_free_dump_desc(fwrt, &dump_data->desc);
+
 	clear_bit(wk_idx, &fwrt->dump.active_wks);
 }
 
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.h b/drivers/net/wireless/intel/iwlwifi/fw/dbg.h
index 9d3513213f5f..11558df36b94 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.h
@@ -98,17 +98,6 @@ struct iwl_fw_dbg_params {
 
 extern const struct iwl_fw_dump_desc iwl_dump_desc_assert;
 
-static inline void iwl_fw_free_dump_desc(struct iwl_fw_runtime *fwrt)
-{
-	if (fwrt->dump.desc != &iwl_dump_desc_assert)
-		kfree(fwrt->dump.desc);
-	fwrt->dump.desc = NULL;
-	fwrt->dump.lmac_err_id[0] = 0;
-	if (fwrt->smem_cfg.num_lmacs > 1)
-		fwrt->dump.lmac_err_id[1] = 0;
-	fwrt->dump.umac_err_id = 0;
-}
-
 int iwl_fw_dbg_collect_desc(struct iwl_fw_runtime *fwrt,
 			    const struct iwl_fw_dump_desc *desc,
 			    bool monitor_only, unsigned int delay);
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/runtime.h b/drivers/net/wireless/intel/iwlwifi/fw/runtime.h
index da0d90e2b537..9906d9b9bdd5 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/runtime.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/runtime.h
@@ -98,8 +98,16 @@ struct iwl_fwrt_shared_mem_cfg {
  * @fw_pkt: packet received from FW
  */
 struct iwl_fwrt_dump_data {
-	struct iwl_fw_ini_trigger_tlv *trig;
-	struct iwl_rx_packet *fw_pkt;
+	union {
+		struct {
+			struct iwl_fw_ini_trigger_tlv *trig;
+			struct iwl_rx_packet *fw_pkt;
+		};
+		struct {
+			const struct iwl_fw_dump_desc *desc;
+			bool monitor_only;
+		};
+	};
 };
 
 /**
@@ -162,8 +170,6 @@ struct iwl_fw_runtime {
 
 	/* debug */
 	struct {
-		const struct iwl_fw_dump_desc *desc;
-		bool monitor_only;
 		struct iwl_fwrt_wk_data wks[IWL_FW_RUNTIME_DUMP_WK_NUM];
 		unsigned long active_wks;
 
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index 7aa1350b093e..853ba7b8bf3f 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -1264,7 +1264,6 @@ static void iwl_mvm_mac_stop(struct ieee80211_hw *hw)
 
 	cancel_delayed_work_sync(&mvm->cs_tx_unblock_dwork);
 	cancel_delayed_work_sync(&mvm->scan_timeout_dwork);
-	iwl_fw_free_dump_desc(&mvm->fwrt);
 
 	mutex_lock(&mvm->mutex);
 	__iwl_mvm_mac_stop(mvm);
-- 
cgit v1.2.3-59-g8ed1b


From 250380c9b8e5a1d893a8012a33667343dc75e17e Mon Sep 17 00:00:00 2001
From: Mordechay Goodstein <mordechay.goodstein@intel.com>
Date: Fri, 17 Apr 2020 13:21:37 +0300
Subject: iwlwifi: support version 9 of WOWLAN_GET_STATUS notification

Add support for the new WOWLAN_GET_STATUS notification that contains a
new element that informs the driver of TIDs whose BA sessions were
closed during suspend.

Note that the new functionality of handling these closed sessions is not
implemented in this patch it.  It only aligns to the new API version.

Signed-off-by: Mordechay Goodstein <mordechay.goodstein@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200417131727.b02153b94c1d.Ieb6291586d60f372d5a505604b18227ef97e7202@changeid
---
 drivers/net/wireless/intel/iwlwifi/fw/api/d3.h | 39 +++++++++++++++++++++++++-
 drivers/net/wireless/intel/iwlwifi/mvm/d3.c    | 29 +++++++++++++------
 2 files changed, 58 insertions(+), 10 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/d3.h b/drivers/net/wireless/intel/iwlwifi/fw/api/d3.h
index 3643b6ba6385..c4562e1f8d18 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/d3.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/d3.h
@@ -618,7 +618,7 @@ struct iwl_wowlan_status_v6 {
  * @wake_packet_bufsize: wakeup packet buffer size
  * @wake_packet: wakeup packet
  */
-struct iwl_wowlan_status {
+struct iwl_wowlan_status_v7 {
 	struct iwl_wowlan_gtk_status gtk[WOWLAN_GTK_KEYS_NUM];
 	struct iwl_wowlan_igtk_status igtk[WOWLAN_IGTK_KEYS_NUM];
 	__le64 replay_ctr;
@@ -634,6 +634,43 @@ struct iwl_wowlan_status {
 	u8 wake_packet[]; /* can be truncated from _length to _bufsize */
 } __packed; /* WOWLAN_STATUSES_API_S_VER_7 */
 
+/**
+ * struct iwl_wowlan_status - WoWLAN status
+ * @gtk: GTK data
+ * @igtk: IGTK data
+ * @replay_ctr: GTK rekey replay counter
+ * @pattern_number: number of the matched pattern
+ * @non_qos_seq_ctr: non-QoS sequence counter to use next
+ * @qos_seq_ctr: QoS sequence counters to use next
+ * @wakeup_reasons: wakeup reasons, see &enum iwl_wowlan_wakeup_reason
+ * @num_of_gtk_rekeys: number of GTK rekeys
+ * @transmitted_ndps: number of transmitted neighbor discovery packets
+ * @received_beacons: number of received beacons
+ * @wake_packet_length: wakeup packet length
+ * @wake_packet_bufsize: wakeup packet buffer size
+ * @tid_tear_down: bit mask of tids whose BA sessions were closed
+ *		   in suspend state
+ * @reserved: unused
+ * @wake_packet: wakeup packet
+ */
+struct iwl_wowlan_status {
+	struct iwl_wowlan_gtk_status gtk[WOWLAN_GTK_KEYS_NUM];
+	struct iwl_wowlan_igtk_status igtk[WOWLAN_IGTK_KEYS_NUM];
+	__le64 replay_ctr;
+	__le16 pattern_number;
+	__le16 non_qos_seq_ctr;
+	__le16 qos_seq_ctr[8];
+	__le32 wakeup_reasons;
+	__le32 num_of_gtk_rekeys;
+	__le32 transmitted_ndps;
+	__le32 received_beacons;
+	__le32 wake_packet_length;
+	__le32 wake_packet_bufsize;
+	u8 tid_tear_down;
+	u8 reserved[3];
+	u8 wake_packet[]; /* can be truncated from _length to _bufsize */
+} __packed; /* WOWLAN_STATUSES_API_S_VER_9 */
+
 static inline u8 iwlmvm_wowlan_gtk_idx(struct iwl_wowlan_gtk_status *gtk)
 {
 	return gtk->key_flags & IWL_WOWLAN_GTK_IDX_MASK;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
index 122ca7624073..222775714859 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
@@ -1517,12 +1517,14 @@ out:
 
 struct iwl_wowlan_status *iwl_mvm_send_wowlan_get_status(struct iwl_mvm *mvm)
 {
-	struct iwl_wowlan_status *v7, *status;
+	struct iwl_wowlan_status_v7 *v7;
+	struct iwl_wowlan_status *status;
 	struct iwl_host_cmd cmd = {
 		.id = WOWLAN_GET_STATUSES,
 		.flags = CMD_WANT_SKB,
 	};
-	int ret, len, status_size;
+	int ret, len, status_size, data_size;
+	u8 notif_ver;
 
 	lockdep_assert_held(&mvm->mutex);
 
@@ -1532,13 +1534,12 @@ struct iwl_wowlan_status *iwl_mvm_send_wowlan_get_status(struct iwl_mvm *mvm)
 		return ERR_PTR(ret);
 	}
 
+	len = iwl_rx_packet_payload_len(cmd.resp_pkt);
 	if (!fw_has_api(&mvm->fw->ucode_capa,
 			IWL_UCODE_TLV_API_WOWLAN_KEY_MATERIAL)) {
 		struct iwl_wowlan_status_v6 *v6 = (void *)cmd.resp_pkt->data;
-		int data_size;
 
 		status_size = sizeof(*v6);
-		len = iwl_rx_packet_payload_len(cmd.resp_pkt);
 
 		if (len < status_size) {
 			IWL_ERR(mvm, "Invalid WoWLAN status response!\n");
@@ -1593,23 +1594,33 @@ struct iwl_wowlan_status *iwl_mvm_send_wowlan_get_status(struct iwl_mvm *mvm)
 	}
 
 	v7 = (void *)cmd.resp_pkt->data;
-	status_size = sizeof(*v7);
-	len = iwl_rx_packet_payload_len(cmd.resp_pkt);
+	notif_ver = iwl_fw_lookup_notif_ver(mvm->fw, LEGACY_GROUP,
+					    WOWLAN_GET_STATUSES, 0);
+
+	status_size = sizeof(*status);
+
+	if (notif_ver == IWL_FW_CMD_VER_UNKNOWN || notif_ver < 9)
+		status_size = sizeof(*v7);
 
 	if (len < status_size) {
 		IWL_ERR(mvm, "Invalid WoWLAN status response!\n");
 		status = ERR_PTR(-EIO);
 		goto out_free_resp;
 	}
+	data_size = ALIGN(le32_to_cpu(v7->wake_packet_bufsize), 4);
 
-	if (len != (status_size +
-		    ALIGN(le32_to_cpu(v7->wake_packet_bufsize), 4))) {
+	if (len != (status_size + data_size)) {
 		IWL_ERR(mvm, "Invalid WoWLAN status response!\n");
 		status = ERR_PTR(-EIO);
 		goto out_free_resp;
 	}
 
-	status = kmemdup(v7, len, GFP_KERNEL);
+	status = kzalloc(sizeof(*status) + data_size, GFP_KERNEL);
+	if (!status)
+		goto out_free_resp;
+
+	memcpy(status, v7, status_size);
+	memcpy(status->wake_packet, (u8 *)v7 + status_size, data_size);
 
 out_free_resp:
 	iwl_free_resp(&cmd);
-- 
cgit v1.2.3-59-g8ed1b


From df67a1bea0378488a0454f2be2609349fda86727 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 17 Apr 2020 13:21:38 +0300
Subject: iwlwifi: pcie: use seq_file for tx_queue debugfs file

On newer hardware, the tx_queue debugfs file would need to
allocate 37.5kib data since there are 512 queues, which is
too much. Rather than resorting to kludges like kvmalloc(),
use the seq_file API to print out the data.

While at it, also fix a NULL pointer dereference here, the
txq pointer from trans_pcie->txqs[] may be NULL if that
queue hasn't been allocated.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200417131727.491cf8224c49.I7f154d81e5becef3b5ff22d7c6e36170bde0d7d5@changeid
---
 drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 121 +++++++++++++++++-------
 1 file changed, 89 insertions(+), 32 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
index e4cbd8daa7c6..3c33c01cda60 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
@@ -70,6 +70,7 @@
 #include <linux/vmalloc.h>
 #include <linux/module.h>
 #include <linux/wait.h>
+#include <linux/seq_file.h>
 
 #include "iwl-drv.h"
 #include "iwl-trans.h"
@@ -2544,44 +2545,94 @@ static const struct file_operations iwl_dbgfs_##name##_ops = {		\
 	.llseek = generic_file_llseek,					\
 };
 
-static ssize_t iwl_dbgfs_tx_queue_read(struct file *file,
-				       char __user *user_buf,
-				       size_t count, loff_t *ppos)
+struct iwl_dbgfs_tx_queue_priv {
+	struct iwl_trans *trans;
+};
+
+struct iwl_dbgfs_tx_queue_state {
+	loff_t pos;
+};
+
+static void *iwl_dbgfs_tx_queue_seq_start(struct seq_file *seq, loff_t *pos)
 {
-	struct iwl_trans *trans = file->private_data;
+	struct iwl_dbgfs_tx_queue_priv *priv = seq->private;
+	struct iwl_dbgfs_tx_queue_state *state;
+
+	if (*pos >= priv->trans->trans_cfg->base_params->num_of_queues)
+		return NULL;
+
+	state = kmalloc(sizeof(*state), GFP_KERNEL);
+	if (!state)
+		return NULL;
+	state->pos = *pos;
+	return state;
+}
+
+static void *iwl_dbgfs_tx_queue_seq_next(struct seq_file *seq,
+					 void *v, loff_t *pos)
+{
+	struct iwl_dbgfs_tx_queue_priv *priv = seq->private;
+	struct iwl_dbgfs_tx_queue_state *state = v;
+
+	*pos = ++state->pos;
+
+	if (*pos >= priv->trans->trans_cfg->base_params->num_of_queues)
+		return NULL;
+
+	return state;
+}
+
+static void iwl_dbgfs_tx_queue_seq_stop(struct seq_file *seq, void *v)
+{
+	kfree(v);
+}
+
+static int iwl_dbgfs_tx_queue_seq_show(struct seq_file *seq, void *v)
+{
+	struct iwl_dbgfs_tx_queue_priv *priv = seq->private;
+	struct iwl_dbgfs_tx_queue_state *state = v;
+	struct iwl_trans *trans = priv->trans;
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-	struct iwl_txq *txq;
-	char *buf;
-	int pos = 0;
-	int cnt;
-	int ret;
-	size_t bufsz;
+	struct iwl_txq *txq = trans_pcie->txq[state->pos];
+
+	seq_printf(seq, "hwq %.3u: used=%d stopped=%d ",
+		   (unsigned int)state->pos,
+		   !!test_bit(state->pos, trans_pcie->queue_used),
+		   !!test_bit(state->pos, trans_pcie->queue_stopped));
+	if (txq)
+		seq_printf(seq,
+			   "read=%u write=%u need_update=%d frozen=%d",
+			   txq->read_ptr, txq->write_ptr,
+			   txq->need_update, txq->frozen);
+	else
+		seq_puts(seq, "(unallocated)");
 
-	bufsz = sizeof(char) * 75 *
-		trans->trans_cfg->base_params->num_of_queues;
+	if (state->pos == trans_pcie->cmd_queue)
+		seq_puts(seq, " (HCMD)");
+	seq_puts(seq, "\n");
 
-	if (!trans_pcie->txq_memory)
-		return -EAGAIN;
+	return 0;
+}
 
-	buf = kzalloc(bufsz, GFP_KERNEL);
-	if (!buf)
+static const struct seq_operations iwl_dbgfs_tx_queue_seq_ops = {
+	.start = iwl_dbgfs_tx_queue_seq_start,
+	.next = iwl_dbgfs_tx_queue_seq_next,
+	.stop = iwl_dbgfs_tx_queue_seq_stop,
+	.show = iwl_dbgfs_tx_queue_seq_show,
+};
+
+static int iwl_dbgfs_tx_queue_open(struct inode *inode, struct file *filp)
+{
+	struct iwl_dbgfs_tx_queue_priv *priv;
+
+	priv = __seq_open_private(filp, &iwl_dbgfs_tx_queue_seq_ops,
+				  sizeof(*priv));
+
+	if (!priv)
 		return -ENOMEM;
 
-	for (cnt = 0;
-	     cnt < trans->trans_cfg->base_params->num_of_queues;
-	     cnt++) {
-		txq = trans_pcie->txq[cnt];
-		pos += scnprintf(buf + pos, bufsz - pos,
-				"hwq %.2d: read=%u write=%u use=%d stop=%d need_update=%d frozen=%d%s\n",
-				cnt, txq->read_ptr, txq->write_ptr,
-				!!test_bit(cnt, trans_pcie->queue_used),
-				 !!test_bit(cnt, trans_pcie->queue_stopped),
-				 txq->need_update, txq->frozen,
-				 (cnt == trans_pcie->cmd_queue ? " HCMD" : ""));
-	}
-	ret = simple_read_from_buffer(user_buf, count, ppos, buf, pos);
-	kfree(buf);
-	return ret;
+	priv->trans = inode->i_private;
+	return 0;
 }
 
 static ssize_t iwl_dbgfs_rx_queue_read(struct file *file,
@@ -2914,9 +2965,15 @@ static ssize_t iwl_dbgfs_monitor_data_read(struct file *file,
 DEBUGFS_READ_WRITE_FILE_OPS(interrupt);
 DEBUGFS_READ_FILE_OPS(fh_reg);
 DEBUGFS_READ_FILE_OPS(rx_queue);
-DEBUGFS_READ_FILE_OPS(tx_queue);
 DEBUGFS_WRITE_FILE_OPS(csr);
 DEBUGFS_READ_WRITE_FILE_OPS(rfkill);
+static const struct file_operations iwl_dbgfs_tx_queue_ops = {
+	.owner = THIS_MODULE,
+	.open = iwl_dbgfs_tx_queue_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release_private,
+};
 
 static const struct file_operations iwl_dbgfs_monitor_data_ops = {
 	.read = iwl_dbgfs_monitor_data_read,
-- 
cgit v1.2.3-59-g8ed1b


From 95a9e44f8fb2626e4d0cb642ae6b5f6f30c5fb58 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 17 Apr 2020 13:21:39 +0300
Subject: iwlwifi: pcie: add n_window/ampdu to tx_queue debugfs

Add the n_window and ampdu parameters so we can see them.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200417131727.a2cc1f36008f.Iea23802bb64a08de410223e9af4431dfcadf121b@changeid
---
 drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
index 3c33c01cda60..06785c46c50d 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
@@ -2601,9 +2601,10 @@ static int iwl_dbgfs_tx_queue_seq_show(struct seq_file *seq, void *v)
 		   !!test_bit(state->pos, trans_pcie->queue_stopped));
 	if (txq)
 		seq_printf(seq,
-			   "read=%u write=%u need_update=%d frozen=%d",
+			   "read=%u write=%u need_update=%d frozen=%d n_window=%d ampdu=%d",
 			   txq->read_ptr, txq->write_ptr,
-			   txq->need_update, txq->frozen);
+			   txq->need_update, txq->frozen,
+			   txq->n_window, txq->ampdu);
 	else
 		seq_puts(seq, "(unallocated)");
 
-- 
cgit v1.2.3-59-g8ed1b


From 161158d7af3f67b15ee681c2b26b99ba461da9a6 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 17 Apr 2020 13:21:40 +0300
Subject: iwlwifi: pcie: gen2: minor code cleanups in byte table update

One line should be indented less, otherwise it looks like it
belongs into the parentheses, which clearly it doesn't; also
some variables can move into their respective if branches.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200417131727.a4858aa0441b.I0e70e4a5493fe6b8db6390f9349ff0e7888ab240@changeid
---
 drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
index 86fc00167817..a30f6b080201 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
@@ -90,9 +90,7 @@ static void iwl_pcie_gen2_update_byte_tbl(struct iwl_trans_pcie *trans_pcie,
 					  struct iwl_txq *txq, u16 byte_cnt,
 					  int num_tbs)
 {
-	struct iwlagn_scd_bc_tbl *scd_bc_tbl = txq->bc_tbl.addr;
 	struct iwl_trans *trans = iwl_trans_pcie_get_trans(trans_pcie);
-	struct iwl_gen3_bc_tbl *scd_bc_tbl_gen3 = txq->bc_tbl.addr;
 	int idx = iwl_pcie_get_cmd_index(txq, txq->write_ptr);
 	u8 filled_tfd_size, num_fetch_chunks;
 	u16 len = byte_cnt;
@@ -102,7 +100,7 @@ static void iwl_pcie_gen2_update_byte_tbl(struct iwl_trans_pcie *trans_pcie,
 		return;
 
 	filled_tfd_size = offsetof(struct iwl_tfh_tfd, tbs) +
-				   num_tbs * sizeof(struct iwl_tfh_tb);
+			  num_tbs * sizeof(struct iwl_tfh_tb);
 	/*
 	 * filled_tfd_size contains the number of filled bytes in the TFD.
 	 * Dividing it by 64 will give the number of chunks to fetch
@@ -114,12 +112,16 @@ static void iwl_pcie_gen2_update_byte_tbl(struct iwl_trans_pcie *trans_pcie,
 	num_fetch_chunks = DIV_ROUND_UP(filled_tfd_size, 64) - 1;
 
 	if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_AX210) {
+		struct iwl_gen3_bc_tbl *scd_bc_tbl_gen3 = txq->bc_tbl.addr;
+
 		/* Starting from AX210, the HW expects bytes */
 		WARN_ON(trans_pcie->bc_table_dword);
 		WARN_ON(len > 0x3FFF);
 		bc_ent = cpu_to_le16(len | (num_fetch_chunks << 14));
 		scd_bc_tbl_gen3->tfd_offset[idx] = bc_ent;
 	} else {
+		struct iwlagn_scd_bc_tbl *scd_bc_tbl = txq->bc_tbl.addr;
+
 		/* Before AX210, the HW expects DW */
 		WARN_ON(!trans_pcie->bc_table_dword);
 		len = DIV_ROUND_UP(len, 4);
-- 
cgit v1.2.3-59-g8ed1b


From a548c69d2e0f010cd7b77404f94660a6c789abc8 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 17 Apr 2020 13:21:41 +0300
Subject: iwlwifi: mvm: add DCM flag to rate pretty-print

It's useful to know if DCM was enabled, add this flag
to the rate pretty-printer.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200417131727.891bb9741eca.Ia66448f7e00be9e4c9ea7147b90d4fcd5f1d3845@changeid
---
 drivers/net/wireless/intel/iwlwifi/mvm/rs.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c
index c1aba2bf73cf..1b6cbcf57d5f 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rs.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs.c
@@ -3740,11 +3740,12 @@ int rs_pretty_print_rate(char *buf, int bufsz, const u32 rate)
 	}
 
 	return scnprintf(buf, bufsz,
-			 "0x%x: %s | ANT: %s BW: %s MCS: %d NSS: %d %s%s%s%s",
+			 "0x%x: %s | ANT: %s BW: %s MCS: %d NSS: %d %s%s%s%s%s",
 			 rate, type, rs_pretty_ant(ant), bw, mcs, nss,
 			 (rate & RATE_MCS_SGI_MSK) ? "SGI " : "NGI ",
 			 (rate & RATE_MCS_STBC_MSK) ? "STBC " : "",
 			 (rate & RATE_MCS_LDPC_MSK) ? "LDPC " : "",
+			 (rate & RATE_HE_DUAL_CARRIER_MODE_MSK) ? "DCM " : "",
 			 (rate & RATE_MCS_BF_MSK) ? "BF " : "");
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From f05f8edd90f1f637b60c4ed07a4f387052c84cbb Mon Sep 17 00:00:00 2001
From: Shahar S Matityahu <shahar.s.matityahu@intel.com>
Date: Fri, 17 Apr 2020 13:21:42 +0300
Subject: iwlwifi: yoyo: support IWL_FW_INI_TIME_POINT_HOST_ALIVE_TIMEOUT time
 point

Allow the driver to perform dump collection in case of alive notification
timeout in yoyo mode.

Signed-off-by: Shahar S Matityahu <shahar.s.matityahu@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200417131727.bd46e6240590.Ibda6d9d330a1ae49670152cede34629b280f6cf9@changeid
---
 drivers/net/wireless/intel/iwlwifi/fw/dbg.c | 42 +++++++++++++++++++----------
 1 file changed, 28 insertions(+), 14 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
index 8daa83cdc72c..39c8332be3ac 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
@@ -2329,26 +2329,40 @@ IWL_EXPORT_SYMBOL(iwl_fw_dbg_collect_desc);
 int iwl_fw_dbg_error_collect(struct iwl_fw_runtime *fwrt,
 			     enum iwl_fw_dbg_trigger trig_type)
 {
-	int ret;
-	struct iwl_fw_dump_desc *iwl_dump_error_desc;
-
 	if (!test_bit(STATUS_DEVICE_ENABLED, &fwrt->trans->status))
 		return -EIO;
 
-	iwl_dump_error_desc = kmalloc(sizeof(*iwl_dump_error_desc), GFP_KERNEL);
-	if (!iwl_dump_error_desc)
-		return -ENOMEM;
+	if (iwl_trans_dbg_ini_valid(fwrt->trans)) {
+		if (trig_type != FW_DBG_TRIGGER_ALIVE_TIMEOUT)
+			return -EIO;
 
-	iwl_dump_error_desc->trig_desc.type = cpu_to_le32(trig_type);
-	iwl_dump_error_desc->len = 0;
+		iwl_dbg_tlv_time_point(fwrt,
+				       IWL_FW_INI_TIME_POINT_HOST_ALIVE_TIMEOUT,
+				       NULL);
+	} else {
+		struct iwl_fw_dump_desc *iwl_dump_error_desc;
+		int ret;
 
-	ret = iwl_fw_dbg_collect_desc(fwrt, iwl_dump_error_desc, false, 0);
-	if (ret)
-		kfree(iwl_dump_error_desc);
-	else
-		iwl_trans_sync_nmi(fwrt->trans);
+		iwl_dump_error_desc =
+			kmalloc(sizeof(*iwl_dump_error_desc), GFP_KERNEL);
 
-	return ret;
+		if (!iwl_dump_error_desc)
+			return -ENOMEM;
+
+		iwl_dump_error_desc->trig_desc.type = cpu_to_le32(trig_type);
+		iwl_dump_error_desc->len = 0;
+
+		ret = iwl_fw_dbg_collect_desc(fwrt, iwl_dump_error_desc,
+					      false, 0);
+		if (ret) {
+			kfree(iwl_dump_error_desc);
+			return ret;
+		}
+	}
+
+	iwl_trans_sync_nmi(fwrt->trans);
+
+	return 0;
 }
 IWL_EXPORT_SYMBOL(iwl_fw_dbg_error_collect);
 
-- 
cgit v1.2.3-59-g8ed1b


From 63417549fc8ea5d84cf7172e72a7452938755874 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Sat, 18 Apr 2020 11:08:46 +0300
Subject: iwlwifi: pcie: move iwl_pcie_ctxt_info_alloc_dma() to user

There's no need for this to be an inline in the header file,
only the context-info.c file ever uses it. Move it there.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200418110539.818a06457888.Ib4f55280cd14d7edab37f2992b381c9b6ca4cd7a@changeid
---
 drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c | 15 +++++++++++++++
 drivers/net/wireless/intel/iwlwifi/pcie/internal.h  | 16 ----------------
 2 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c
index acd01d86f101..b65405009d02 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c
@@ -93,6 +93,21 @@ static void *iwl_pcie_ctxt_info_dma_alloc_coherent(struct iwl_trans *trans,
 	return _iwl_pcie_ctxt_info_dma_alloc_coherent(trans, size, phys, 0);
 }
 
+static int iwl_pcie_ctxt_info_alloc_dma(struct iwl_trans *trans,
+					const struct fw_desc *sec,
+					struct iwl_dram_data *dram)
+{
+	dram->block = iwl_pcie_ctxt_info_dma_alloc_coherent(trans, sec->len,
+							    &dram->physical);
+	if (!dram->block)
+		return -ENOMEM;
+
+	dram->size = sec->len;
+	memcpy(dram->block, sec->data, sec->len);
+
+	return 0;
+}
+
 void iwl_pcie_ctxt_info_free_paging(struct iwl_trans *trans)
 {
 	struct iwl_self_init_dram *dram = &trans->init_dram;
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
index 595e6873d56e..abe649af689c 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
@@ -792,22 +792,6 @@ static inline int iwl_pcie_get_num_sections(const struct fw_img *fw,
 	return i;
 }
 
-static inline int iwl_pcie_ctxt_info_alloc_dma(struct iwl_trans *trans,
-					       const struct fw_desc *sec,
-					       struct iwl_dram_data *dram)
-{
-	dram->block = dma_alloc_coherent(trans->dev, sec->len,
-					 &dram->physical,
-					 GFP_KERNEL);
-	if (!dram->block)
-		return -ENOMEM;
-
-	dram->size = sec->len;
-	memcpy(dram->block, sec->data, sec->len);
-
-	return 0;
-}
-
 static inline void iwl_pcie_ctxt_info_free_fw_img(struct iwl_trans *trans)
 {
 	struct iwl_self_init_dram *dram = &trans->init_dram;
-- 
cgit v1.2.3-59-g8ed1b


From c4ace42659b572c597e37a91902036378fe0f973 Mon Sep 17 00:00:00 2001
From: Gil Adam <gil.adam@intel.com>
Date: Sat, 18 Apr 2020 11:08:47 +0300
Subject: iwlwifi: mvm: add framework for specific phy configuration

Add framework for supporting specific PHY filter configuration,
which allows for application of various FW defined PHY filters
(one per antenna). Change phy_cfg_cmd to the new API (ver3).
Reading of configuration from platform's ACPI tables to be added
later when tables are defined.

Signed-off-by: Gil Adam <gil.adam@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200418110539.242a8f979592.I13c77a8a8dbf1a169b5052c7af1f8401ff3991ad@changeid
---
 drivers/net/wireless/intel/iwlwifi/fw/api/config.h | 39 ++++++++++++++--
 drivers/net/wireless/intel/iwlwifi/mvm/constants.h |  4 ++
 drivers/net/wireless/intel/iwlwifi/mvm/fw.c        | 54 ++++++++++++++++++++--
 3 files changed, 91 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/config.h b/drivers/net/wireless/intel/iwlwifi/fw/api/config.h
index 5e88fa2e6fb7..546fa60ed9fd 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/config.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/config.h
@@ -8,7 +8,7 @@
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018 Intel Corporation
+ * Copyright (C) 2018 - 2019 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -31,7 +31,7 @@
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018 Intel Corporation
+ * Copyright (C) 2018 - 2019 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -119,16 +119,49 @@ enum iwl_calib_cfg {
 	IWL_CALIB_CFG_AGC_IDX			= BIT(18),
 };
 
+/**
+ * struct iwl_phy_specific_cfg - specific PHY filter configuration
+ *
+ * Sent as part of the phy configuration command (v3) to configure specific FW
+ * defined PHY filters that can be applied to each antenna.
+ *
+ * @filter_cfg_chain_a: filter config id for LMAC1 chain A
+ * @filter_cfg_chain_b: filter config id for LMAC1 chain B
+ * @filter_cfg_chain_c: filter config id for LMAC2 chain A
+ * @filter_cfg_chain_d: filter config id for LMAC2 chain B
+ * values: 0 - no filter; 0xffffffff - reserved; otherwise - filter id
+ */
+struct iwl_phy_specific_cfg {
+	__le32 filter_cfg_chain_a;
+	__le32 filter_cfg_chain_b;
+	__le32 filter_cfg_chain_c;
+	__le32 filter_cfg_chain_d;
+} __packed; /* PHY_SPECIFIC_CONFIGURATION_API_VER_1*/
+
 /**
  * struct iwl_phy_cfg_cmd - Phy configuration command
+ *
  * @phy_cfg: PHY configuration value, uses &enum iwl_fw_phy_cfg
  * @calib_control: calibration control data
  */
-struct iwl_phy_cfg_cmd {
+struct iwl_phy_cfg_cmd_v1 {
 	__le32	phy_cfg;
 	struct iwl_calib_ctrl calib_control;
 } __packed;
 
+/**
+ * struct iwl_phy_cfg_cmd_v3 - Phy configuration command (v3)
+ *
+ * @phy_cfg: PHY configuration value, uses &enum iwl_fw_phy_cfg
+ * @calib_control: calibration control data
+ * @phy_specific_cfg: configure predefined PHY filters
+ */
+struct iwl_phy_cfg_cmd_v3 {
+	__le32	phy_cfg;
+	struct iwl_calib_ctrl calib_control;
+	struct iwl_phy_specific_cfg phy_specific_cfg;
+} __packed; /* PHY_CONFIGURATION_CMD_API_S_VER_3 */
+
 /*
  * enum iwl_dc2dc_config_id - flag ids
  *
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/constants.h b/drivers/net/wireless/intel/iwlwifi/mvm/constants.h
index 58df25e2fb32..b0268f44b2ea 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/constants.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/constants.h
@@ -155,5 +155,9 @@
 #define IWL_MVM_USE_TWT				false
 #define IWL_MVM_AMPDU_CONSEC_DROPS_DELBA	10
 #define IWL_MVM_USE_NSSN_SYNC			0
+#define IWL_MVM_PHY_FILTER_CHAIN_A		0
+#define IWL_MVM_PHY_FILTER_CHAIN_B		0
+#define IWL_MVM_PHY_FILTER_CHAIN_C		0
+#define IWL_MVM_PHY_FILTER_CHAIN_D		0
 
 #endif /* __MVM_CONSTANTS_H */
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
index 8fe78ce37771..2bc15ef13bb5 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
@@ -550,10 +550,49 @@ error:
 	return ret;
 }
 
+#ifdef CONFIG_ACPI
+static void iwl_mvm_phy_filter_init(struct iwl_mvm *mvm,
+				    struct iwl_phy_specific_cfg *phy_filters)
+{
+	/*
+	 * TODO: read specific phy config from BIOS
+	 * ACPI table for this feature has not been defined yet,
+	 * so for now we use hardcoded values.
+	 */
+
+	if (IWL_MVM_PHY_FILTER_CHAIN_A) {
+		phy_filters->filter_cfg_chain_a =
+			cpu_to_le32(IWL_MVM_PHY_FILTER_CHAIN_A);
+	}
+	if (IWL_MVM_PHY_FILTER_CHAIN_B) {
+		phy_filters->filter_cfg_chain_b =
+			cpu_to_le32(IWL_MVM_PHY_FILTER_CHAIN_B);
+	}
+	if (IWL_MVM_PHY_FILTER_CHAIN_C) {
+		phy_filters->filter_cfg_chain_c =
+			cpu_to_le32(IWL_MVM_PHY_FILTER_CHAIN_C);
+	}
+	if (IWL_MVM_PHY_FILTER_CHAIN_D) {
+		phy_filters->filter_cfg_chain_d =
+			cpu_to_le32(IWL_MVM_PHY_FILTER_CHAIN_D);
+	}
+}
+
+#else /* CONFIG_ACPI */
+
+static void iwl_mvm_phy_filter_init(struct iwl_mvm *mvm,
+				    struct iwl_phy_specific_cfg *phy_filters)
+{
+}
+#endif /* CONFIG_ACPI */
+
 static int iwl_send_phy_cfg_cmd(struct iwl_mvm *mvm)
 {
-	struct iwl_phy_cfg_cmd phy_cfg_cmd;
+	struct iwl_phy_cfg_cmd_v3 phy_cfg_cmd;
 	enum iwl_ucode_type ucode_type = mvm->fwrt.cur_fw_img;
+	struct iwl_phy_specific_cfg phy_filters = {};
+	u8 cmd_ver;
+	size_t cmd_size;
 
 	if (iwl_mvm_has_unified_ucode(mvm) &&
 	    !mvm->trans->cfg->tx_with_siso_diversity)
@@ -580,11 +619,20 @@ static int iwl_send_phy_cfg_cmd(struct iwl_mvm *mvm)
 	phy_cfg_cmd.calib_control.flow_trigger =
 		mvm->fw->default_calib[ucode_type].flow_trigger;
 
+	cmd_ver = iwl_fw_lookup_cmd_ver(mvm->fw, IWL_ALWAYS_LONG_GROUP,
+					PHY_CONFIGURATION_CMD);
+	if (cmd_ver == 3) {
+		iwl_mvm_phy_filter_init(mvm, &phy_filters);
+		memcpy(&phy_cfg_cmd.phy_specific_cfg, &phy_filters,
+		       sizeof(struct iwl_phy_specific_cfg));
+	}
+
 	IWL_DEBUG_INFO(mvm, "Sending Phy CFG command: 0x%x\n",
 		       phy_cfg_cmd.phy_cfg);
-
+	cmd_size = (cmd_ver == 3) ? sizeof(struct iwl_phy_cfg_cmd_v3) :
+				    sizeof(struct iwl_phy_cfg_cmd_v1);
 	return iwl_mvm_send_cmd_pdu(mvm, PHY_CONFIGURATION_CMD, 0,
-				    sizeof(phy_cfg_cmd), &phy_cfg_cmd);
+				    cmd_size, &phy_cfg_cmd);
 }
 
 int iwl_run_init_mvm_ucode(struct iwl_mvm *mvm, bool read_nvm)
-- 
cgit v1.2.3-59-g8ed1b


From 9dede812455041eef3fa308af015825ce9c701d0 Mon Sep 17 00:00:00 2001
From: Luca Coelho <luciano.coelho@intel.com>
Date: Sat, 18 Apr 2020 11:08:48 +0300
Subject: iwlwifi: remove deprecated and unused iwl_mvm_keyinfo struct

This struct hasn't been used in years and is just a remnant of an API
support removal that missed this structured.

Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200418110539.93860da2d12a.Ifeca3b3313e3f14330317bc3e3d62f7d991ec955@changeid
---
 drivers/net/wireless/intel/iwlwifi/fw/api/sta.h | 26 -------------------------
 1 file changed, 26 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/sta.h b/drivers/net/wireless/intel/iwlwifi/fw/api/sta.h
index 970e9e508ad0..c010e6febbf4 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/sta.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/sta.h
@@ -245,32 +245,6 @@ enum iwl_sta_sleep_flag {
 #define STA_KEY_LEN_WEP40 (5)
 #define STA_KEY_LEN_WEP104 (13)
 
-/**
- * struct iwl_mvm_keyinfo - key information
- * @key_flags: type &enum iwl_sta_key_flag
- * @tkip_rx_tsc_byte2: TSC[2] for key mix ph1 detection
- * @reserved1: reserved
- * @tkip_rx_ttak: 10-byte unicast TKIP TTAK for Rx
- * @key_offset: key offset in the fw's key table
- * @reserved2: reserved
- * @key: 16-byte unicast decryption key
- * @tx_secur_seq_cnt: initial RSC / PN needed for replay check
- * @hw_tkip_mic_rx_key: byte: MIC Rx Key - used for TKIP only
- * @hw_tkip_mic_tx_key: byte: MIC Tx Key - used for TKIP only
- */
-struct iwl_mvm_keyinfo {
-	__le16 key_flags;
-	u8 tkip_rx_tsc_byte2;
-	u8 reserved1;
-	__le16 tkip_rx_ttak[5];
-	u8 key_offset;
-	u8 reserved2;
-	u8 key[16];
-	__le64 tx_secur_seq_cnt;
-	__le64 hw_tkip_mic_rx_key;
-	__le64 hw_tkip_mic_tx_key;
-} __packed;
-
 #define IWL_ADD_STA_STATUS_MASK		0xFF
 #define IWL_ADD_STA_BAID_VALID_MASK	0x8000
 #define IWL_ADD_STA_BAID_MASK		0x7F00
-- 
cgit v1.2.3-59-g8ed1b


From 9efab1ad3ffb5b5ecbe24ea5ace420a9b7466338 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Sat, 18 Apr 2020 11:08:49 +0300
Subject: iwlwifi: remove fw_monitor module parameter

This module parameter is no longer useful now that other debug
infrastructure was added to iwlwifi.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200418110539.03bd49c3432b.Ie62047d3b364b19c8c3584ea37790220466f2a8d@changeid
---
 drivers/net/wireless/intel/iwlwifi/iwl-drv.c       |  4 ----
 drivers/net/wireless/intel/iwlwifi/iwl-modparams.h |  2 --
 drivers/net/wireless/intel/iwlwifi/pcie/trans.c    | 15 +--------------
 3 files changed, 1 insertion(+), 20 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
index ff52e69c1c80..9ff12c207f25 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
@@ -1872,10 +1872,6 @@ module_param_named(power_level, iwlwifi_mod_params.power_level, int, 0444);
 MODULE_PARM_DESC(power_level,
 		 "default power save level (range from 1 - 5, default: 1)");
 
-module_param_named(fw_monitor, iwlwifi_mod_params.fw_monitor, bool, 0444);
-MODULE_PARM_DESC(fw_monitor,
-		 "firmware monitor - to debug FW (default: false - needs lots of memory)");
-
 module_param_named(disable_11ac, iwlwifi_mod_params.disable_11ac, bool, 0444);
 MODULE_PARM_DESC(disable_11ac, "Disable VHT capabilities (default: false)");
 
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-modparams.h b/drivers/net/wireless/intel/iwlwifi/iwl-modparams.h
index 82e5cac23d8d..b094cc1e9be0 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-modparams.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-modparams.h
@@ -115,7 +115,6 @@ enum iwl_uapsd_disable {
  * @nvm_file: specifies a external NVM file
  * @uapsd_disable: disable U-APSD, see &enum iwl_uapsd_disable, default =
  *	IWL_DISABLE_UAPSD_BSS | IWL_DISABLE_UAPSD_P2P_CLIENT
- * @fw_monitor: allow to use firmware monitor
  * @disable_11ac: disable VHT capabilities, default = false.
  * @remove_when_gone: remove an inaccessible device from the PCIe bus.
  * @enable_ini: enable new FW debug infratructure (INI TLVs)
@@ -135,7 +134,6 @@ struct iwl_mod_params {
 	int antenna_coupling;
 	char *nvm_file;
 	u32 uapsd_disable;
-	bool fw_monitor;
 	bool disable_11ac;
 	/**
 	 * @disable_11ax: disable HE capabilities, default = false
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
index 06785c46c50d..a0daae058c1c 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
@@ -1019,21 +1019,8 @@ static int iwl_pcie_load_given_ucode(struct iwl_trans *trans,
 			return ret;
 	}
 
-	/* supported for 7000 only for the moment */
-	if (iwlwifi_mod_params.fw_monitor &&
-	    trans->trans_cfg->device_family == IWL_DEVICE_FAMILY_7000) {
-		struct iwl_dram_data *fw_mon = &trans->dbg.fw_mon;
-
-		iwl_pcie_alloc_fw_monitor(trans, 0);
-		if (fw_mon->size) {
-			iwl_write_prph(trans, MON_BUFF_BASE_ADDR,
-				       fw_mon->physical >> 4);
-			iwl_write_prph(trans, MON_BUFF_END_ADDR,
-				       (fw_mon->physical + fw_mon->size) >> 4);
-		}
-	} else if (iwl_pcie_dbg_on(trans)) {
+	if (iwl_pcie_dbg_on(trans))
 		iwl_pcie_apply_destination(trans);
-	}
 
 	iwl_enable_interrupts(trans);
 
-- 
cgit v1.2.3-59-g8ed1b


From 28dd7ccdc56fbde66d49a36dc1fce06730586681 Mon Sep 17 00:00:00 2001
From: Mordechay Goodstein <mordechay.goodstein@intel.com>
Date: Sat, 18 Apr 2020 11:08:50 +0300
Subject: iwlwifi: acpi: read TAS table from ACPI and send it to the FW

Read the Time Averaged SAR (TAS) table from ACPI and if TAS
feature is enabled in the FW send the black list countries
which TAS is disabled in to the FW

Signed-off-by: Mordechay Goodstein <mordechay.goodstein@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200418110539.40a327d32cfd.I7203f3afc8186cca34c48a1a116baac1df5eff4e@changeid
---
 drivers/net/wireless/intel/iwlwifi/fw/acpi.c       | 76 ++++++++++++++++++++++
 drivers/net/wireless/intel/iwlwifi/fw/acpi.h       | 17 +++++
 .../net/wireless/intel/iwlwifi/fw/api/nvm-reg.h    | 15 +++++
 drivers/net/wireless/intel/iwlwifi/fw/file.h       |  1 +
 drivers/net/wireless/intel/iwlwifi/mvm/fw.c        | 39 +++++++++++
 drivers/net/wireless/intel/iwlwifi/mvm/ops.c       |  1 +
 6 files changed, 149 insertions(+)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
index ba2aff3af0fe..344eba82a902 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
@@ -151,6 +151,82 @@ found:
 }
 IWL_EXPORT_SYMBOL(iwl_acpi_get_wifi_pkg);
 
+int iwl_acpi_get_tas(struct iwl_fw_runtime *fwrt,
+		     __le32 *black_list_array,
+		     int *black_list_size)
+{
+	union acpi_object *wifi_pkg, *data;
+	int ret, tbl_rev, i;
+	bool enabled;
+
+	data = iwl_acpi_get_object(fwrt->dev, ACPI_WTAS_METHOD);
+	if (IS_ERR(data))
+		return PTR_ERR(data);
+
+	wifi_pkg = iwl_acpi_get_wifi_pkg(fwrt->dev, data,
+					 ACPI_WTAS_WIFI_DATA_SIZE,
+					 &tbl_rev);
+	if (IS_ERR(wifi_pkg)) {
+		ret = PTR_ERR(wifi_pkg);
+		goto out_free;
+	}
+
+	if (wifi_pkg->package.elements[0].type != ACPI_TYPE_INTEGER ||
+	    tbl_rev != 0) {
+		ret = -EINVAL;
+		goto out_free;
+	}
+
+	enabled = !!wifi_pkg->package.elements[0].integer.value;
+
+	if (!enabled) {
+		*black_list_size = -1;
+		IWL_DEBUG_RADIO(fwrt, "TAS not enabled\n");
+		ret = 0;
+		goto out_free;
+	}
+
+	if (wifi_pkg->package.elements[1].type != ACPI_TYPE_INTEGER ||
+	    wifi_pkg->package.elements[1].integer.value >
+	    APCI_WTAS_BLACK_LIST_MAX) {
+		IWL_DEBUG_RADIO(fwrt, "TAS invalid array size %llu\n",
+				wifi_pkg->package.elements[1].integer.value);
+		ret = -EINVAL;
+		goto out_free;
+	}
+	*black_list_size = wifi_pkg->package.elements[1].integer.value;
+
+	IWL_DEBUG_RADIO(fwrt, "TAS array size %d\n", *black_list_size);
+	if (*black_list_size > APCI_WTAS_BLACK_LIST_MAX) {
+		IWL_DEBUG_RADIO(fwrt, "TAS invalid array size value %u\n",
+				*black_list_size);
+		ret = -EINVAL;
+		goto out_free;
+	}
+
+	for (i = 0; i < *black_list_size; i++) {
+		u32 country;
+
+		if (wifi_pkg->package.elements[2 + i].type !=
+		    ACPI_TYPE_INTEGER) {
+			IWL_DEBUG_RADIO(fwrt,
+					"TAS invalid array elem %d\n", 2 + i);
+			ret = -EINVAL;
+			goto out_free;
+		}
+
+		country = wifi_pkg->package.elements[2 + i].integer.value;
+		black_list_array[i] = cpu_to_le32(country);
+		IWL_DEBUG_RADIO(fwrt, "TAS black list country %d\n", country);
+	}
+
+	ret = 0;
+out_free:
+	kfree(data);
+	return ret;
+}
+IWL_EXPORT_SYMBOL(iwl_acpi_get_tas);
+
 int iwl_acpi_get_mcc(struct device *dev, char *mcc)
 {
 	union acpi_object *wifi_pkg, *data;
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h
index 5590e5cc8fbb..6a646dc524e1 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h
@@ -64,6 +64,7 @@
 #include "fw/api/commands.h"
 #include "fw/api/power.h"
 #include "fw/api/phy.h"
+#include "fw/api/nvm-reg.h"
 #include "fw/img.h"
 #include "iwl-trans.h"
 
@@ -75,6 +76,7 @@
 #define ACPI_SPLC_METHOD	"SPLC"
 #define ACPI_ECKV_METHOD	"ECKV"
 #define ACPI_PPAG_METHOD	"PPAG"
+#define ACPI_WTAS_METHOD	"WTAS"
 
 #define ACPI_WIFI_DOMAIN	(0x07)
 
@@ -96,6 +98,12 @@
 #define ACPI_SPLC_WIFI_DATA_SIZE	2
 #define ACPI_ECKV_WIFI_DATA_SIZE	2
 
+/*
+ * 1 type, 1 enabled, 1 black list size, 16 black list array
+ */
+#define APCI_WTAS_BLACK_LIST_MAX	16
+#define ACPI_WTAS_WIFI_DATA_SIZE	(3 + APCI_WTAS_BLACK_LIST_MAX)
+
 #define ACPI_WGDS_NUM_BANDS		2
 #define ACPI_WGDS_TABLE_SIZE		3
 
@@ -174,6 +182,9 @@ int iwl_validate_sar_geo_profile(struct iwl_fw_runtime *fwrt,
 int iwl_sar_geo_init(struct iwl_fw_runtime *fwrt,
 		     struct iwl_per_chain_offset_group *table);
 
+int iwl_acpi_get_tas(struct iwl_fw_runtime *fwrt, __le32 *black_list_array,
+		     int *black_list_size);
+
 #else /* CONFIG_ACPI */
 
 static inline void *iwl_acpi_get_object(struct device *dev, acpi_string method)
@@ -250,5 +261,11 @@ static inline int iwl_sar_geo_init(struct iwl_fw_runtime *fwrt,
 	return -ENOENT;
 }
 
+static inline int iwl_acpi_get_tas(struct iwl_fw_runtime *fwrt,
+				   __le32 *black_list_array,
+				   int *black_list_size)
+{
+	return -ENOENT;
+}
 #endif /* CONFIG_ACPI */
 #endif /* __iwl_fw_acpi__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h b/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h
index 97b49843e318..2d230a7893c2 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h
@@ -80,6 +80,11 @@ enum iwl_regulatory_and_nvm_subcmd_ids {
 	 * response is &struct iwl_nvm_get_info_rsp
 	 */
 	NVM_GET_INFO = 0x2,
+
+	/**
+	 * @TAS_CONFIG: &struct iwl_tas_config_cmd
+	 */
+	TAS_CONFIG = 0x3,
 };
 
 /**
@@ -431,4 +436,14 @@ enum iwl_mcc_source {
 	MCC_SOURCE_GETTING_MCC_TEST_MODE = 0x11,
 };
 
+#define IWL_TAS_BLACK_LIST_MAX 16
+/**
+ * struct iwl_tas_config_cmd - configures the TAS
+ * @black_list_size: size of relevant field in black_list_array
+ * @black_list_array: black list countries (without TAS)
+ */
+struct iwl_tas_config_cmd {
+	__le32 black_list_size;
+	__le32 black_list_array[IWL_TAS_BLACK_LIST_MAX];
+} __packed; /* TAS_CONFIG_CMD_API_S_VER_2 */
 #endif /* __iwl_fw_api_nvm_reg_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/file.h b/drivers/net/wireless/intel/iwlwifi/fw/file.h
index 35f42e529a6d..1fb45fd30ffa 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/file.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/file.h
@@ -449,6 +449,7 @@ enum iwl_ucode_tlv_capa {
 	IWL_UCODE_TLV_CAPA_CS_MODIFY			= (__force iwl_ucode_tlv_capa_t)49,
 	IWL_UCODE_TLV_CAPA_SET_LTR_GEN2			= (__force iwl_ucode_tlv_capa_t)50,
 	IWL_UCODE_TLV_CAPA_SET_PPAG			= (__force iwl_ucode_tlv_capa_t)52,
+	IWL_UCODE_TLV_CAPA_TAS_CFG			= (__force iwl_ucode_tlv_capa_t)53,
 	IWL_UCODE_TLV_CAPA_SESSION_PROT_CMD		= (__force iwl_ucode_tlv_capa_t)54,
 
 	/* set 2 */
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
index 2bc15ef13bb5..bf3eaadfb343 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
@@ -979,6 +979,40 @@ static int iwl_mvm_ppag_init(struct iwl_mvm *mvm)
 	return iwl_mvm_ppag_send_cmd(mvm);
 }
 
+static void iwl_mvm_tas_init(struct iwl_mvm *mvm)
+{
+	int ret;
+	struct iwl_tas_config_cmd cmd = {};
+	int list_size;
+
+	BUILD_BUG_ON(ARRAY_SIZE(cmd.black_list_array) <
+		     APCI_WTAS_BLACK_LIST_MAX);
+
+	if (!fw_has_capa(&mvm->fw->ucode_capa, IWL_UCODE_TLV_CAPA_TAS_CFG)) {
+		IWL_DEBUG_RADIO(mvm, "TAS not enabled in FW\n");
+		return;
+	}
+
+	ret = iwl_acpi_get_tas(&mvm->fwrt, cmd.black_list_array, &list_size);
+	if (ret < 0) {
+		IWL_DEBUG_RADIO(mvm,
+				"TAS table invalid or unavailable. (%d)\n",
+				ret);
+		return;
+	}
+
+	if (list_size < 0)
+		return;
+
+	/* list size if TAS enabled can only be non-negative */
+	cmd.black_list_size = cpu_to_le32((u32)list_size);
+
+	ret = iwl_mvm_send_cmd_pdu(mvm, WIDE_ID(REGULATORY_AND_NVM_GROUP,
+						TAS_CONFIG),
+				   0, sizeof(cmd), &cmd);
+	if (ret < 0)
+		IWL_DEBUG_RADIO(mvm, "failed to send TAS_CONFIG (%d)\n", ret);
+}
 #else /* CONFIG_ACPI */
 
 inline int iwl_mvm_sar_select_profile(struct iwl_mvm *mvm,
@@ -1006,6 +1040,10 @@ static int iwl_mvm_ppag_init(struct iwl_mvm *mvm)
 {
 	return 0;
 }
+
+static void iwl_mvm_tas_init(struct iwl_mvm *mvm)
+{
+}
 #endif /* CONFIG_ACPI */
 
 void iwl_mvm_send_recovery_cmd(struct iwl_mvm *mvm, u32 flags)
@@ -1333,6 +1371,7 @@ int iwl_mvm_up(struct iwl_mvm *mvm)
 	if (ret < 0)
 		goto error;
 
+	iwl_mvm_tas_init(mvm);
 	iwl_mvm_leds_sync(mvm);
 
 	IWL_DEBUG_INFO(mvm, "RT uCode started.\n");
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
index b00f4a8b8424..d0afc806706d 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
@@ -505,6 +505,7 @@ static const struct iwl_hcmd_names iwl_mvm_prot_offload_names[] = {
 static const struct iwl_hcmd_names iwl_mvm_regulatory_and_nvm_names[] = {
 	HCMD_NAME(NVM_ACCESS_COMPLETE),
 	HCMD_NAME(NVM_GET_INFO),
+	HCMD_NAME(TAS_CONFIG),
 };
 
 static const struct iwl_hcmd_arr iwl_mvm_groups[] = {
-- 
cgit v1.2.3-59-g8ed1b


From e819a80a9764aea789ec6a25d3858d2a5d9ac7bc Mon Sep 17 00:00:00 2001
From: Ihab Zhaika <ihab.zhaika@intel.com>
Date: Sat, 18 Apr 2020 11:08:51 +0300
Subject: iwlwifi: add new cards for AX family

add few PCI ID'S for AX family.

Signed-off-by: Ihab Zhaika <ihab.zhaika@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200418110539.5eae2261b70c.I0369619a562c4e4008e2f0a3afb9ed5d4c9b49d4@changeid
---
 drivers/net/wireless/intel/iwlwifi/cfg/22000.c  | 21 ++++++++++++++++++---
 drivers/net/wireless/intel/iwlwifi/iwl-config.h |  1 +
 drivers/net/wireless/intel/iwlwifi/pcie/drv.c   |  7 ++++++-
 3 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
index bc49cdd819df..1af4ba2d30cb 100644
--- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
+++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
@@ -90,7 +90,8 @@
 #define IWL_22000_SO_A_GF_A_FW_PRE      "iwlwifi-so-a0-gf-a0-"
 #define IWL_22000_TY_A_GF_A_FW_PRE      "iwlwifi-ty-a0-gf-a0-"
 #define IWL_22000_SO_A_GF4_A_FW_PRE     "iwlwifi-so-a0-gf4-a0-"
-#define IWL_22000_SOSNJ_A_GF4_A_FW_PRE  "iwlwifi-SoSnj-a0-gf4-a0-"
+#define IWL_SNJ_A_GF4_A_FW_PRE		"iwlwifi-SoSnj-a0-gf4-a0-"
+#define IWL_SNJ_A_GF_A_FW_PRE		"iwlwifi-SoSnj-a0-gf-a0-"
 
 #define IWL_22000_HR_MODULE_FIRMWARE(api) \
 	IWL_22000_HR_FW_PRE __stringify(api) ".ucode"
@@ -120,6 +121,10 @@
 	IWL_22000_SO_A_GF_A_FW_PRE __stringify(api) ".ucode"
 #define IWL_22000_TY_A_GF_A_MODULE_FIRMWARE(api) \
 	IWL_22000_TY_A_GF_A_FW_PRE __stringify(api) ".ucode"
+#define IWL_SNJ_A_GF4_A_MODULE_FIRMWARE(api) \
+	IWL_SNJ_A_GF4_A_FW_PRE __stringify(api) ".ucode"
+#define IWL_SNJ_A_GF_A_MODULE_FIRMWARE(api) \
+	IWL_SNJ_A_GF_A_FW_PRE __stringify(api) ".ucode"
 
 static const struct iwl_base_params iwl_22000_base_params = {
 	.eeprom_size = OTP_LOW_IMAGE_SIZE_32K,
@@ -553,8 +558,16 @@ const struct iwl_cfg iwlax411_2ax_cfg_so_gf4_a0 = {
 };
 
 const struct iwl_cfg iwlax411_2ax_cfg_sosnj_gf4_a0 = {
-	.name = "Intel(R) Wi-Fi 7 AX411 160MHz",
-	.fw_name_pre = IWL_22000_SOSNJ_A_GF4_A_FW_PRE,
+	.name = "Intel(R) Wi-Fi 6 AX411 160MHz",
+	.fw_name_pre = IWL_SNJ_A_GF4_A_FW_PRE,
+	.uhb_supported = true,
+	IWL_DEVICE_AX210,
+	.num_rbds = IWL_NUM_RBDS_AX210_HE,
+};
+
+const struct iwl_cfg iwlax211_cfg_snj_gf_a0 = {
+	.name = "Intel(R) Wi-Fi 6 AX211 160MHz",
+	.fw_name_pre = IWL_SNJ_A_GF_A_FW_PRE,
 	.uhb_supported = true,
 	IWL_DEVICE_AX210,
 	.num_rbds = IWL_NUM_RBDS_AX210_HE,
@@ -573,3 +586,5 @@ MODULE_FIRMWARE(IWL_22000_SO_A_JF_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
 MODULE_FIRMWARE(IWL_22000_SO_A_HR_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
 MODULE_FIRMWARE(IWL_22000_SO_A_GF_A_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
 MODULE_FIRMWARE(IWL_22000_TY_A_GF_A_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
+MODULE_FIRMWARE(IWL_SNJ_A_GF4_A_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
+MODULE_FIRMWARE(IWL_SNJ_A_GF_A_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
index d5d984d7ce83..3ed8ead8a9bb 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
@@ -625,6 +625,7 @@ extern const struct iwl_cfg iwlax211_2ax_cfg_so_gf_a0;
 extern const struct iwl_cfg iwlax210_2ax_cfg_ty_gf_a0;
 extern const struct iwl_cfg iwlax411_2ax_cfg_so_gf4_a0;
 extern const struct iwl_cfg iwlax411_2ax_cfg_sosnj_gf4_a0;
+extern const struct iwl_cfg iwlax211_cfg_snj_gf_a0;
 #endif /* CPTCFG_IWLMVM || CPTCFG_IWLFMAC */
 
 #endif /* __IWL_CONFIG_H__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
index 6744c0281ffb..ab849aa4434e 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
@@ -539,12 +539,17 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
 	{IWL_PCI_DEVICE(0x2725, 0x0310, iwlax210_2ax_cfg_ty_gf_a0)},
 	{IWL_PCI_DEVICE(0x2725, 0x0510, iwlax210_2ax_cfg_ty_gf_a0)},
 	{IWL_PCI_DEVICE(0x2725, 0x0A10, iwlax210_2ax_cfg_ty_gf_a0)},
-	{IWL_PCI_DEVICE(0x2725, 0x00B0, iwlax411_2ax_cfg_so_gf4_a0)},
+	{IWL_PCI_DEVICE(0x2725, 0x00B0, iwlax411_2ax_cfg_sosnj_gf4_a0)},
+	{IWL_PCI_DEVICE(0x2726, 0x0090, iwlax211_cfg_snj_gf_a0)},
+	{IWL_PCI_DEVICE(0x2726, 0x00B0, iwlax411_2ax_cfg_sosnj_gf4_a0)},
+	{IWL_PCI_DEVICE(0x2726, 0x0510, iwlax211_cfg_snj_gf_a0)},
 	{IWL_PCI_DEVICE(0x7A70, 0x0090, iwlax211_2ax_cfg_so_gf_a0)},
+	{IWL_PCI_DEVICE(0x7A70, 0x00B0, iwlax411_2ax_cfg_so_gf4_a0)},
 	{IWL_PCI_DEVICE(0x7A70, 0x0310, iwlax211_2ax_cfg_so_gf_a0)},
 	{IWL_PCI_DEVICE(0x7A70, 0x0510, iwlax211_2ax_cfg_so_gf_a0)},
 	{IWL_PCI_DEVICE(0x7A70, 0x0A10, iwlax211_2ax_cfg_so_gf_a0)},
 	{IWL_PCI_DEVICE(0x7AF0, 0x0090, iwlax211_2ax_cfg_so_gf_a0)},
+	{IWL_PCI_DEVICE(0x7AF0, 0x00B0, iwlax411_2ax_cfg_so_gf4_a0)},
 	{IWL_PCI_DEVICE(0x7AF0, 0x0310, iwlax211_2ax_cfg_so_gf_a0)},
 	{IWL_PCI_DEVICE(0x7AF0, 0x0510, iwlax211_2ax_cfg_so_gf_a0)},
 	{IWL_PCI_DEVICE(0x7AF0, 0x0A10, iwlax211_2ax_cfg_so_gf_a0)},
-- 
cgit v1.2.3-59-g8ed1b


From 4ee27edd389174b3e1a9c84a1b3a678d9e9f0934 Mon Sep 17 00:00:00 2001
From: Luca Coelho <luciano.coelho@intel.com>
Date: Sat, 18 Apr 2020 11:08:52 +0300
Subject: iwlwifi: pcie: add cfgs for SoCs with device ID 0x4FD0

A new device ID needs to be added to the list to support new SoCs.
Add it and support all subsystem IDs that other Qu devices support.

Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200418110539.5e5ce668ff8b.I20a9c8b3470aaabaa54361a5641637e5a14d8321@changeid
---
 drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
index ab849aa4434e..bc2cdf40028e 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
@@ -526,6 +526,7 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
 	{IWL_PCI_DEVICE(0x06F0, PCI_ANY_ID, iwl_qu_trans_cfg)},
 	{IWL_PCI_DEVICE(0x34F0, PCI_ANY_ID, iwl_qu_trans_cfg)},
 	{IWL_PCI_DEVICE(0x3DF0, PCI_ANY_ID, iwl_qu_trans_cfg)},
+	{IWL_PCI_DEVICE(0x4DF0, PCI_ANY_ID, iwl_qu_trans_cfg)},
 
 	{IWL_PCI_DEVICE(0x43F0, PCI_ANY_ID, iwl_qu_long_latency_trans_cfg)},
 	{IWL_PCI_DEVICE(0xA0F0, PCI_ANY_ID, iwl_qu_long_latency_trans_cfg)},
@@ -662,6 +663,19 @@ static const struct iwl_dev_info iwl_dev_info_table[] = {
 	IWL_DEV_INFO(0x3DF0, 0x4070, iwl_ax201_cfg_qu_hr, NULL),
 	IWL_DEV_INFO(0x3DF0, 0x4244, iwl_ax101_cfg_qu_hr, NULL),
 
+	IWL_DEV_INFO(0x4DF0, 0x0044, iwl_ax101_cfg_qu_hr, NULL),
+	IWL_DEV_INFO(0x4DF0, 0x0070, iwl_ax201_cfg_qu_hr, NULL),
+	IWL_DEV_INFO(0x4DF0, 0x0074, iwl_ax201_cfg_qu_hr, NULL),
+	IWL_DEV_INFO(0x4DF0, 0x0078, iwl_ax201_cfg_qu_hr, NULL),
+	IWL_DEV_INFO(0x4DF0, 0x007C, iwl_ax201_cfg_qu_hr, NULL),
+	IWL_DEV_INFO(0x4DF0, 0x0244, iwl_ax101_cfg_qu_hr, NULL),
+	IWL_DEV_INFO(0x4DF0, 0x0310, iwl_ax201_cfg_qu_hr, NULL),
+	IWL_DEV_INFO(0x4DF0, 0x1651, killer1650s_2ax_cfg_qu_b0_hr_b0, NULL),
+	IWL_DEV_INFO(0x4DF0, 0x1652, killer1650i_2ax_cfg_qu_b0_hr_b0, NULL),
+	IWL_DEV_INFO(0x4DF0, 0x2074, iwl_ax201_cfg_qu_hr, NULL),
+	IWL_DEV_INFO(0x4DF0, 0x4070, iwl_ax201_cfg_qu_hr, NULL),
+	IWL_DEV_INFO(0x4DF0, 0x4244, iwl_ax101_cfg_qu_hr, NULL),
+
 	IWL_DEV_INFO(0x2720, 0x0000, iwl22000_2ax_cfg_qnj_hr_b0, NULL),
 	IWL_DEV_INFO(0x2720, 0x0040, iwl22000_2ax_cfg_qnj_hr_b0, NULL),
 	IWL_DEV_INFO(0x2720, 0x0044, iwl22000_2ax_cfg_qnj_hr_b0, NULL),
-- 
cgit v1.2.3-59-g8ed1b


From 2d39683e739940b852e579b8d5005c25a8912c0a Mon Sep 17 00:00:00 2001
From: Ihab Zhaika <ihab.zhaika@intel.com>
Date: Sat, 18 Apr 2020 11:08:53 +0300
Subject: iwlwifi: update few product names in AX family

update the product names of few structs in AX family.

Signed-off-by: Ihab Zhaika <ihab.zhaika@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200418110539.738dabad8732.I5673eaf8a016b8aa27ab8bab02121108fa723783@changeid
---
 drivers/net/wireless/intel/iwlwifi/cfg/22000.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
index 1af4ba2d30cb..35fa89d28fef 100644
--- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
+++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
@@ -527,14 +527,14 @@ const struct iwl_cfg iwlax210_2ax_cfg_so_jf_a0 = {
 };
 
 const struct iwl_cfg iwlax210_2ax_cfg_so_hr_a0 = {
-	.name = "Intel(R) Wi-Fi 7 AX210 160MHz",
+	.name = "Intel(R) Wi-Fi 6 AX210 160MHz",
 	.fw_name_pre = IWL_22000_SO_A_HR_B_FW_PRE,
 	IWL_DEVICE_AX210,
 	.num_rbds = IWL_NUM_RBDS_AX210_HE,
 };
 
 const struct iwl_cfg iwlax211_2ax_cfg_so_gf_a0 = {
-	.name = "Intel(R) Wi-Fi 7 AX211 160MHz",
+	.name = "Intel(R) Wi-Fi 6 AX211 160MHz",
 	.fw_name_pre = IWL_22000_SO_A_GF_A_FW_PRE,
 	.uhb_supported = true,
 	IWL_DEVICE_AX210,
@@ -542,7 +542,7 @@ const struct iwl_cfg iwlax211_2ax_cfg_so_gf_a0 = {
 };
 
 const struct iwl_cfg iwlax210_2ax_cfg_ty_gf_a0 = {
-	.name = "Intel(R) Wi-Fi 7 AX210 160MHz",
+	.name = "Intel(R) Wi-Fi 6 AX210 160MHz",
 	.fw_name_pre = IWL_22000_TY_A_GF_A_FW_PRE,
 	.uhb_supported = true,
 	IWL_DEVICE_AX210,
@@ -550,7 +550,7 @@ const struct iwl_cfg iwlax210_2ax_cfg_ty_gf_a0 = {
 };
 
 const struct iwl_cfg iwlax411_2ax_cfg_so_gf4_a0 = {
-	.name = "Intel(R) Wi-Fi 7 AX411 160MHz",
+	.name = "Intel(R) Wi-Fi 6 AX411 160MHz",
 	.fw_name_pre = IWL_22000_SO_A_GF4_A_FW_PRE,
 	.uhb_supported = true,
 	IWL_DEVICE_AX210,
-- 
cgit v1.2.3-59-g8ed1b


From 0928df0a868c010c1dfb5269a23ffa2f9adc876b Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Sat, 18 Apr 2020 11:08:54 +0300
Subject: iwlwifi: mvm: tell firmware about required LTR delay

Some (integrated) devices need a longer LTR delay than the firmware
would typically apply, tell it about that.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200418110539.24276ae2ad61.I8831a538f75893d5cee47b4a81f4b9b7fd0e8bea@changeid
---
 drivers/net/wireless/intel/iwlwifi/cfg/22000.c  |  2 ++
 drivers/net/wireless/intel/iwlwifi/fw/api/soc.h | 12 ++++++++----
 drivers/net/wireless/intel/iwlwifi/iwl-config.h | 17 ++++++++++++-----
 drivers/net/wireless/intel/iwlwifi/mvm/fw.c     | 14 ++++++++++++++
 4 files changed, 36 insertions(+), 9 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
index 35fa89d28fef..1f30d8fdf35d 100644
--- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
+++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
@@ -243,6 +243,7 @@ const struct iwl_cfg_trans_params iwl_qu_trans_cfg = {
 	.base_params = &iwl_22000_base_params,
 	.integrated = true,
 	.xtal_latency = 5000,
+	.ltr_delay = IWL_CFG_TRANS_LTR_DELAY_200US,
 };
 
 const struct iwl_cfg_trans_params iwl_qu_long_latency_trans_cfg = {
@@ -255,6 +256,7 @@ const struct iwl_cfg_trans_params iwl_qu_long_latency_trans_cfg = {
 	.integrated = true,
 	.xtal_latency = 12000,
 	.low_latency_xtal = true,
+	.ltr_delay = IWL_CFG_TRANS_LTR_DELAY_2500US,
 };
 
 const struct iwl_cfg_trans_params iwl_qnj_trans_cfg = {
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/soc.h b/drivers/net/wireless/intel/iwlwifi/fw/api/soc.h
index aadca78e9846..0c6d7b3e1324 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/soc.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/soc.h
@@ -5,10 +5,9 @@
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2019        Intel Deutschland GmbH
+ * Copyright(c) 2012 - 2014, 2019 - 2020 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -28,10 +27,9 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2019        Intel Deutschland GmbH
+ * Copyright(c) 2012 - 2014, 2019 - 2020 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -68,6 +66,12 @@
 #define SOC_CONFIG_CMD_FLAGS_DISCRETE		BIT(0)
 #define SOC_CONFIG_CMD_FLAGS_LOW_LATENCY	BIT(1)
 
+#define SOC_FLAGS_LTR_APPLY_DELAY_MASK		0xc
+#define SOC_FLAGS_LTR_APPLY_DELAY_NONE		0
+#define SOC_FLAGS_LTR_APPLY_DELAY_200		1
+#define SOC_FLAGS_LTR_APPLY_DELAY_2500		2
+#define SOC_FLAGS_LTR_APPLY_DELAY_1820		3
+
 /**
  * struct iwl_soc_configuration_cmd - Set device stabilization latency
  *
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
index 3ed8ead8a9bb..9b31fcc37ace 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
@@ -5,9 +5,8 @@
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2007 - 2014 Intel Corporation. All rights reserved.
  * Copyright (C) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2007 - 2014, 2018 - 2020 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -27,9 +26,8 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved.
  * Copyright (C) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2005 - 2014, 2018 - 2020 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -284,6 +282,13 @@ struct iwl_pwr_tx_backoff {
 	u32 backoff;
 };
 
+enum iwl_cfg_trans_ltr_delay {
+	IWL_CFG_TRANS_LTR_DELAY_NONE	= 0,
+	IWL_CFG_TRANS_LTR_DELAY_200US	= 1,
+	IWL_CFG_TRANS_LTR_DELAY_2500US	= 2,
+	IWL_CFG_TRANS_LTR_DELAY_1820US	= 3,
+};
+
 /**
  * struct iwl_cfg_trans - information needed to start the trans
  *
@@ -304,6 +309,7 @@ struct iwl_pwr_tx_backoff {
  * @mq_rx_supported: multi-queue rx support
  * @integrated: discrete or integrated
  * @low_latency_xtal: use the low latency xtal if supported
+ * @ltr_delay: LTR delay parameter, &enum iwl_cfg_trans_ltr_delay.
  */
 struct iwl_cfg_trans_params {
 	const struct iwl_base_params *base_params;
@@ -317,7 +323,8 @@ struct iwl_cfg_trans_params {
 	    mq_rx_supported:1,
 	    integrated:1,
 	    low_latency_xtal:1,
-	    bisr_workaround:1;
+	    bisr_workaround:1,
+	    ltr_delay:2;
 };
 
 /**
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
index bf3eaadfb343..d6598339c55c 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
@@ -102,6 +102,20 @@ static int iwl_set_soc_latency(struct iwl_mvm *mvm)
 	if (!mvm->trans->trans_cfg->integrated)
 		cmd.flags = cpu_to_le32(SOC_CONFIG_CMD_FLAGS_DISCRETE);
 
+	BUILD_BUG_ON(IWL_CFG_TRANS_LTR_DELAY_NONE !=
+		     SOC_FLAGS_LTR_APPLY_DELAY_NONE);
+	BUILD_BUG_ON(IWL_CFG_TRANS_LTR_DELAY_200US !=
+		     SOC_FLAGS_LTR_APPLY_DELAY_200);
+	BUILD_BUG_ON(IWL_CFG_TRANS_LTR_DELAY_2500US !=
+		     SOC_FLAGS_LTR_APPLY_DELAY_2500);
+	BUILD_BUG_ON(IWL_CFG_TRANS_LTR_DELAY_1820US !=
+		     SOC_FLAGS_LTR_APPLY_DELAY_1820);
+
+	if (mvm->trans->trans_cfg->ltr_delay != IWL_CFG_TRANS_LTR_DELAY_NONE &&
+	    !WARN_ON(!mvm->trans->trans_cfg->integrated))
+		cmd.flags |= le32_encode_bits(mvm->trans->trans_cfg->ltr_delay,
+					      SOC_FLAGS_LTR_APPLY_DELAY_MASK);
+
 	if (iwl_fw_lookup_cmd_ver(mvm->fw, IWL_ALWAYS_LONG_GROUP,
 				  SCAN_REQ_UMAC) >= 2 &&
 	    mvm->trans->trans_cfg->low_latency_xtal)
-- 
cgit v1.2.3-59-g8ed1b


From 9c9613f0ee07b8da049914d92fc735a0e954d777 Mon Sep 17 00:00:00 2001
From: Luca Coelho <luciano.coelho@intel.com>
Date: Sat, 18 Apr 2020 11:08:55 +0300
Subject: iwlwifi: pcie: add new structure for Qu devices with medium latency

Some Qu devices require an intermediate amount of time to wake up and
for LTR notifications, so add a new structure with the correct values
for them and change the corresponding devices to use it.

Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200418110539.d6df2bcee78f.Ie008b0c8f03340a466c1ef981bfd25359c9de90d@changeid
---
 drivers/net/wireless/intel/iwlwifi/cfg/22000.c  | 22 +++++++++++++++++-----
 drivers/net/wireless/intel/iwlwifi/iwl-config.h |  3 ++-
 drivers/net/wireless/intel/iwlwifi/pcie/drv.c   |  7 ++++---
 3 files changed, 23 insertions(+), 9 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
index 1f30d8fdf35d..7db4472a1ec5 100644
--- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
+++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
@@ -234,6 +234,15 @@ static const struct iwl_ht_params iwl_22000_ht_params = {
 		},							\
 	}
 
+const struct iwl_cfg_trans_params iwl_qnj_trans_cfg = {
+	.mq_rx_supported = true,
+	.use_tfh = true,
+	.rf_id = true,
+	.gen2 = true,
+	.device_family = IWL_DEVICE_FAMILY_22000,
+	.base_params = &iwl_22000_base_params,
+};
+
 const struct iwl_cfg_trans_params iwl_qu_trans_cfg = {
 	.mq_rx_supported = true,
 	.use_tfh = true,
@@ -246,7 +255,7 @@ const struct iwl_cfg_trans_params iwl_qu_trans_cfg = {
 	.ltr_delay = IWL_CFG_TRANS_LTR_DELAY_200US,
 };
 
-const struct iwl_cfg_trans_params iwl_qu_long_latency_trans_cfg = {
+const struct iwl_cfg_trans_params iwl_qu_medium_latency_trans_cfg = {
 	.mq_rx_supported = true,
 	.use_tfh = true,
 	.rf_id = true,
@@ -254,18 +263,21 @@ const struct iwl_cfg_trans_params iwl_qu_long_latency_trans_cfg = {
 	.device_family = IWL_DEVICE_FAMILY_22000,
 	.base_params = &iwl_22000_base_params,
 	.integrated = true,
-	.xtal_latency = 12000,
-	.low_latency_xtal = true,
-	.ltr_delay = IWL_CFG_TRANS_LTR_DELAY_2500US,
+	.xtal_latency = 1820,
+	.ltr_delay = IWL_CFG_TRANS_LTR_DELAY_1820US,
 };
 
-const struct iwl_cfg_trans_params iwl_qnj_trans_cfg = {
+const struct iwl_cfg_trans_params iwl_qu_long_latency_trans_cfg = {
 	.mq_rx_supported = true,
 	.use_tfh = true,
 	.rf_id = true,
 	.gen2 = true,
 	.device_family = IWL_DEVICE_FAMILY_22000,
 	.base_params = &iwl_22000_base_params,
+	.integrated = true,
+	.xtal_latency = 12000,
+	.low_latency_xtal = true,
+	.ltr_delay = IWL_CFG_TRANS_LTR_DELAY_2500US,
 };
 
 /*
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
index 9b31fcc37ace..efb10a7f4d4f 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
@@ -513,9 +513,10 @@ struct iwl_dev_info {
 extern const struct iwl_cfg_trans_params iwl9000_trans_cfg;
 extern const struct iwl_cfg_trans_params iwl9560_trans_cfg;
 extern const struct iwl_cfg_trans_params iwl9560_shared_clk_trans_cfg;
+extern const struct iwl_cfg_trans_params iwl_qnj_trans_cfg;
 extern const struct iwl_cfg_trans_params iwl_qu_trans_cfg;
+extern const struct iwl_cfg_trans_params iwl_qu_medium_latency_trans_cfg;
 extern const struct iwl_cfg_trans_params iwl_qu_long_latency_trans_cfg;
-extern const struct iwl_cfg_trans_params iwl_qnj_trans_cfg;
 extern const struct iwl_cfg_trans_params iwl_ax200_trans_cfg;
 extern const char iwl9162_name[];
 extern const char iwl9260_name[];
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
index bc2cdf40028e..2d78f8504bd5 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
@@ -524,9 +524,10 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
 /* Qu devices */
 	{IWL_PCI_DEVICE(0x02F0, PCI_ANY_ID, iwl_qu_trans_cfg)},
 	{IWL_PCI_DEVICE(0x06F0, PCI_ANY_ID, iwl_qu_trans_cfg)},
-	{IWL_PCI_DEVICE(0x34F0, PCI_ANY_ID, iwl_qu_trans_cfg)},
-	{IWL_PCI_DEVICE(0x3DF0, PCI_ANY_ID, iwl_qu_trans_cfg)},
-	{IWL_PCI_DEVICE(0x4DF0, PCI_ANY_ID, iwl_qu_trans_cfg)},
+
+	{IWL_PCI_DEVICE(0x34F0, PCI_ANY_ID, iwl_qu_medium_latency_trans_cfg)},
+	{IWL_PCI_DEVICE(0x3DF0, PCI_ANY_ID, iwl_qu_medium_latency_trans_cfg)},
+	{IWL_PCI_DEVICE(0x4DF0, PCI_ANY_ID, iwl_qu_medium_latency_trans_cfg)},
 
 	{IWL_PCI_DEVICE(0x43F0, PCI_ANY_ID, iwl_qu_long_latency_trans_cfg)},
 	{IWL_PCI_DEVICE(0xA0F0, PCI_ANY_ID, iwl_qu_long_latency_trans_cfg)},
-- 
cgit v1.2.3-59-g8ed1b


From 62bee4862bfa03c4d4ec9205111be99210923f49 Mon Sep 17 00:00:00 2001
From: Luca Coelho <luciano.coelho@intel.com>
Date: Sat, 18 Apr 2020 11:08:56 +0300
Subject: iwlwifi: pcie: add new structs for So devices with long latency

Some So devices have a longer wake latency.  To support this properly,
add new cfg structs for them so the driver will inform the FW about
the need to use another xtal and use a higher wait value during state
transitions.

Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200418110539.daf515618f57.I80e60006b108e1586e3c56669635c670597fe08d@changeid
---
 drivers/net/wireless/intel/iwlwifi/cfg/22000.c  | 20 ++++++++++++++++++++
 drivers/net/wireless/intel/iwlwifi/iwl-config.h |  2 ++
 drivers/net/wireless/intel/iwlwifi/pcie/drv.c   | 10 +++++-----
 3 files changed, 27 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
index 7db4472a1ec5..2f741f5e3a7d 100644
--- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
+++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
@@ -555,6 +555,16 @@ const struct iwl_cfg iwlax211_2ax_cfg_so_gf_a0 = {
 	.num_rbds = IWL_NUM_RBDS_AX210_HE,
 };
 
+const struct iwl_cfg iwlax211_2ax_cfg_so_gf_a0_long = {
+	.name = "Intel(R) Wi-Fi 6 AX211 160MHz",
+	.fw_name_pre = IWL_22000_SO_A_GF_A_FW_PRE,
+	.uhb_supported = true,
+	IWL_DEVICE_AX210,
+	.num_rbds = IWL_NUM_RBDS_AX210_HE,
+	.trans.xtal_latency = 12000,
+	.trans.low_latency_xtal = true,
+};
+
 const struct iwl_cfg iwlax210_2ax_cfg_ty_gf_a0 = {
 	.name = "Intel(R) Wi-Fi 6 AX210 160MHz",
 	.fw_name_pre = IWL_22000_TY_A_GF_A_FW_PRE,
@@ -571,6 +581,16 @@ const struct iwl_cfg iwlax411_2ax_cfg_so_gf4_a0 = {
 	.num_rbds = IWL_NUM_RBDS_AX210_HE,
 };
 
+const struct iwl_cfg iwlax411_2ax_cfg_so_gf4_a0_long = {
+	.name = "Intel(R) Wi-Fi 6 AX411 160MHz",
+	.fw_name_pre = IWL_22000_SO_A_GF4_A_FW_PRE,
+	.uhb_supported = true,
+	IWL_DEVICE_AX210,
+	.num_rbds = IWL_NUM_RBDS_AX210_HE,
+	.trans.xtal_latency = 12000,
+	.trans.low_latency_xtal = true,
+};
+
 const struct iwl_cfg iwlax411_2ax_cfg_sosnj_gf4_a0 = {
 	.name = "Intel(R) Wi-Fi 6 AX411 160MHz",
 	.fw_name_pre = IWL_SNJ_A_GF4_A_FW_PRE,
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
index efb10a7f4d4f..3a9a33851793 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
@@ -630,8 +630,10 @@ extern const struct iwl_cfg iwl22000_2ax_cfg_qnj_hr_b0;
 extern const struct iwl_cfg iwlax210_2ax_cfg_so_jf_a0;
 extern const struct iwl_cfg iwlax210_2ax_cfg_so_hr_a0;
 extern const struct iwl_cfg iwlax211_2ax_cfg_so_gf_a0;
+extern const struct iwl_cfg iwlax211_2ax_cfg_so_gf_a0_long;
 extern const struct iwl_cfg iwlax210_2ax_cfg_ty_gf_a0;
 extern const struct iwl_cfg iwlax411_2ax_cfg_so_gf4_a0;
+extern const struct iwl_cfg iwlax411_2ax_cfg_so_gf4_a0_long;
 extern const struct iwl_cfg iwlax411_2ax_cfg_sosnj_gf4_a0;
 extern const struct iwl_cfg iwlax211_cfg_snj_gf_a0;
 #endif /* CPTCFG_IWLMVM || CPTCFG_IWLFMAC */
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
index 2d78f8504bd5..2083eb4f2f15 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
@@ -545,11 +545,11 @@ static const struct pci_device_id iwl_hw_card_ids[] = {
 	{IWL_PCI_DEVICE(0x2726, 0x0090, iwlax211_cfg_snj_gf_a0)},
 	{IWL_PCI_DEVICE(0x2726, 0x00B0, iwlax411_2ax_cfg_sosnj_gf4_a0)},
 	{IWL_PCI_DEVICE(0x2726, 0x0510, iwlax211_cfg_snj_gf_a0)},
-	{IWL_PCI_DEVICE(0x7A70, 0x0090, iwlax211_2ax_cfg_so_gf_a0)},
-	{IWL_PCI_DEVICE(0x7A70, 0x00B0, iwlax411_2ax_cfg_so_gf4_a0)},
-	{IWL_PCI_DEVICE(0x7A70, 0x0310, iwlax211_2ax_cfg_so_gf_a0)},
-	{IWL_PCI_DEVICE(0x7A70, 0x0510, iwlax211_2ax_cfg_so_gf_a0)},
-	{IWL_PCI_DEVICE(0x7A70, 0x0A10, iwlax211_2ax_cfg_so_gf_a0)},
+	{IWL_PCI_DEVICE(0x7A70, 0x0090, iwlax211_2ax_cfg_so_gf_a0_long)},
+	{IWL_PCI_DEVICE(0x7A70, 0x00B0, iwlax411_2ax_cfg_so_gf4_a0_long)},
+	{IWL_PCI_DEVICE(0x7A70, 0x0310, iwlax211_2ax_cfg_so_gf_a0_long)},
+	{IWL_PCI_DEVICE(0x7A70, 0x0510, iwlax211_2ax_cfg_so_gf_a0_long)},
+	{IWL_PCI_DEVICE(0x7A70, 0x0A10, iwlax211_2ax_cfg_so_gf_a0_long)},
 	{IWL_PCI_DEVICE(0x7AF0, 0x0090, iwlax211_2ax_cfg_so_gf_a0)},
 	{IWL_PCI_DEVICE(0x7AF0, 0x00B0, iwlax411_2ax_cfg_so_gf4_a0)},
 	{IWL_PCI_DEVICE(0x7AF0, 0x0310, iwlax211_2ax_cfg_so_gf_a0)},
-- 
cgit v1.2.3-59-g8ed1b


From cbc636557d2d20c4fb808c14df545b3c407a53d6 Mon Sep 17 00:00:00 2001
From: Gil Adam <gil.adam@intel.com>
Date: Sat, 18 Apr 2020 11:08:57 +0300
Subject: iwlwifi: debug: set NPK buffer in context info

When buffer destination for ini debug is configured
to NPK (TB22DTF) set the appropriate bit in the context
info struct.

Signed-off-by: Gil Adam <gil.adam@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200418110539.3c9f0fa6033f.Id1d6c191f85efe0d6cf35434bfb186ffd46ff64c@changeid
---
 drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c   | 26 ++++++++----
 .../wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c   | 47 ++++++++++++----------
 2 files changed, 43 insertions(+), 30 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c
index bf2f00b89214..9eb8fbfaa2a2 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c
@@ -5,7 +5,7 @@
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright (C) 2018 - 2019 Intel Corporation
+ * Copyright (C) 2018 - 2020 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -28,7 +28,7 @@
  *
  * BSD LICENSE
  *
- * Copyright (C) 2018 - 2019 Intel Corporation
+ * Copyright (C) 2018 - 2020 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -170,14 +170,24 @@ static int iwl_dbg_tlv_alloc_buf_alloc(struct iwl_trans *trans,
 
 	if (le32_to_cpu(tlv->length) != sizeof(*alloc) ||
 	    (buf_location != IWL_FW_INI_LOCATION_SRAM_PATH &&
-	     buf_location != IWL_FW_INI_LOCATION_DRAM_PATH))
+	     buf_location != IWL_FW_INI_LOCATION_DRAM_PATH &&
+	     buf_location != IWL_FW_INI_LOCATION_NPK_PATH)) {
+		IWL_ERR(trans,
+			"WRT: Invalid allocation TLV\n");
+		return -EINVAL;
+	}
+
+	if ((buf_location == IWL_FW_INI_LOCATION_SRAM_PATH ||
+	     buf_location == IWL_FW_INI_LOCATION_NPK_PATH) &&
+	     alloc_id != IWL_FW_INI_ALLOCATION_ID_DBGC1) {
+		IWL_ERR(trans,
+			"WRT: Allocation TLV for SMEM/NPK path must have id %u (current: %u)\n",
+			IWL_FW_INI_ALLOCATION_ID_DBGC1, alloc_id);
 		return -EINVAL;
+	}
 
-	if ((buf_location == IWL_FW_INI_LOCATION_SRAM_PATH &&
-	     alloc_id != IWL_FW_INI_ALLOCATION_ID_DBGC1) ||
-	    (buf_location == IWL_FW_INI_LOCATION_DRAM_PATH &&
-	     (alloc_id == IWL_FW_INI_ALLOCATION_INVALID ||
-	      alloc_id >= IWL_FW_INI_ALLOCATION_NUM))) {
+	if (alloc_id == IWL_FW_INI_ALLOCATION_INVALID ||
+	    alloc_id >= IWL_FW_INI_ALLOCATION_NUM) {
 		IWL_ERR(trans,
 			"WRT: Invalid allocation id %u for allocation TLV\n",
 			alloc_id);
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c
index 01f248ba8fec..27e94e6140b3 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c
@@ -5,7 +5,7 @@
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2018 - 2020 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -18,7 +18,7 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2018 - 2020 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -84,32 +84,35 @@ iwl_pcie_ctxt_info_dbg_enable(struct iwl_trans *trans,
 
 	fw_mon_cfg = &trans->dbg.fw_mon_cfg[alloc_id];
 
-	if (le32_to_cpu(fw_mon_cfg->buf_location) ==
-	    IWL_FW_INI_LOCATION_SRAM_PATH) {
+	switch (le32_to_cpu(fw_mon_cfg->buf_location)) {
+	case IWL_FW_INI_LOCATION_SRAM_PATH:
 		dbg_flags |= IWL_PRPH_SCRATCH_EDBG_DEST_INTERNAL;
-
 		IWL_DEBUG_FW(trans,
-			     "WRT: Applying SMEM buffer destination\n");
-
-		goto out;
-	}
-
-	if (le32_to_cpu(fw_mon_cfg->buf_location) ==
-	    IWL_FW_INI_LOCATION_DRAM_PATH &&
-	    trans->dbg.fw_mon_ini[alloc_id].num_frags) {
-		struct iwl_dram_data *frag =
-			&trans->dbg.fw_mon_ini[alloc_id].frags[0];
-
-		dbg_flags |= IWL_PRPH_SCRATCH_EDBG_DEST_DRAM;
+				"WRT: Applying SMEM buffer destination\n");
+		break;
 
+	case IWL_FW_INI_LOCATION_NPK_PATH:
+		dbg_flags |= IWL_PRPH_SCRATCH_EDBG_DEST_TB22DTF;
 		IWL_DEBUG_FW(trans,
-			     "WRT: Applying DRAM destination (alloc_id=%u)\n",
-			     alloc_id);
+			     "WRT: Applying NPK buffer destination\n");
+		break;
 
-		dbg_cfg->hwm_base_addr = cpu_to_le64(frag->physical);
-		dbg_cfg->hwm_size = cpu_to_le32(frag->size);
+	case IWL_FW_INI_LOCATION_DRAM_PATH:
+		if (trans->dbg.fw_mon_ini[alloc_id].num_frags) {
+			struct iwl_dram_data *frag =
+				&trans->dbg.fw_mon_ini[alloc_id].frags[0];
+			dbg_flags |= IWL_PRPH_SCRATCH_EDBG_DEST_DRAM;
+			dbg_cfg->hwm_base_addr = cpu_to_le64(frag->physical);
+			dbg_cfg->hwm_size = cpu_to_le32(frag->size);
+			IWL_DEBUG_FW(trans,
+				     "WRT: Applying DRAM destination (alloc_id=%u, num_frags=%u)\n",
+				     alloc_id,
+				     trans->dbg.fw_mon_ini[alloc_id].num_frags);
+		}
+		break;
+	default:
+		IWL_ERR(trans, "WRT: Invalid buffer destination\n");
 	}
-
 out:
 	if (dbg_flags)
 		*control_flags |= IWL_PRPH_SCRATCH_EARLY_DEBUG_EN | dbg_flags;
-- 
cgit v1.2.3-59-g8ed1b


From 8146458fcd7942861c754b85b5464ef2a8cfacbb Mon Sep 17 00:00:00 2001
From: Amit Cohen <amitc@mellanox.com>
Date: Fri, 24 Apr 2020 18:43:41 +0300
Subject: mlxsw: spectrum_span: Reduce nesting in
 mlxsw_sp_span_entry_configure()

Use early return to avoid unnecessary nesting.

Signed-off-by: Amit Cohen <amitc@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_span.c    | 29 ++++++++++++++--------
 1 file changed, 19 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
index 9fb2e9d93929..e7be1bfe7f75 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
@@ -622,18 +622,27 @@ mlxsw_sp_span_entry_configure(struct mlxsw_sp *mlxsw_sp,
 			      struct mlxsw_sp_span_entry *span_entry,
 			      struct mlxsw_sp_span_parms sparms)
 {
-	if (sparms.dest_port) {
-		if (sparms.dest_port->mlxsw_sp != mlxsw_sp) {
-			netdev_err(span_entry->to_dev, "Cannot mirror to %s, which belongs to a different mlxsw instance",
-				   sparms.dest_port->dev->name);
-			sparms.dest_port = NULL;
-		} else if (span_entry->ops->configure(span_entry, sparms)) {
-			netdev_err(span_entry->to_dev, "Failed to offload mirror to %s",
-				   sparms.dest_port->dev->name);
-			sparms.dest_port = NULL;
-		}
+	int err;
+
+	if (!sparms.dest_port)
+		goto set_parms;
+
+	if (sparms.dest_port->mlxsw_sp != mlxsw_sp) {
+		netdev_err(span_entry->to_dev, "Cannot mirror to %s, which belongs to a different mlxsw instance",
+			   sparms.dest_port->dev->name);
+		sparms.dest_port = NULL;
+		goto set_parms;
+	}
+
+	err = span_entry->ops->configure(span_entry, sparms);
+	if (err) {
+		netdev_err(span_entry->to_dev, "Failed to offload mirror to %s",
+			   sparms.dest_port->dev->name);
+		sparms.dest_port = NULL;
+		goto set_parms;
 	}
 
+set_parms:
 	span_entry->parms = sparms;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 7f9b099bd9d3143d3c1a0ea74b586c5189e58750 Mon Sep 17 00:00:00 2001
From: Amit Cohen <amitc@mellanox.com>
Date: Fri, 24 Apr 2020 18:43:42 +0300
Subject: mlxsw: spectrum_span: Rename parms() to parms_set()

Use a more meaningful name for parms() function.

Signed-off-by: Amit Cohen <amitc@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c | 14 +++++++-------
 drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h |  4 ++--
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
index e7be1bfe7f75..eb4a1c0f2788 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
@@ -130,7 +130,7 @@ mlxsw_sp_span_entry_phys_deconfigure(struct mlxsw_sp_span_entry *span_entry)
 static const
 struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_phys = {
 	.can_handle = mlxsw_sp_port_dev_check,
-	.parms = mlxsw_sp_span_entry_phys_parms,
+	.parms_set = mlxsw_sp_span_entry_phys_parms,
 	.configure = mlxsw_sp_span_entry_phys_configure,
 	.deconfigure = mlxsw_sp_span_entry_phys_deconfigure,
 };
@@ -418,7 +418,7 @@ mlxsw_sp_span_entry_gretap4_deconfigure(struct mlxsw_sp_span_entry *span_entry)
 
 static const struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_gretap4 = {
 	.can_handle = netif_is_gretap,
-	.parms = mlxsw_sp_span_entry_gretap4_parms,
+	.parms_set = mlxsw_sp_span_entry_gretap4_parms,
 	.configure = mlxsw_sp_span_entry_gretap4_configure,
 	.deconfigure = mlxsw_sp_span_entry_gretap4_deconfigure,
 };
@@ -519,7 +519,7 @@ mlxsw_sp_span_entry_gretap6_deconfigure(struct mlxsw_sp_span_entry *span_entry)
 static const
 struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_gretap6 = {
 	.can_handle = netif_is_ip6gretap,
-	.parms = mlxsw_sp_span_entry_gretap6_parms,
+	.parms_set = mlxsw_sp_span_entry_gretap6_parms,
 	.configure = mlxsw_sp_span_entry_gretap6_configure,
 	.deconfigure = mlxsw_sp_span_entry_gretap6_deconfigure,
 };
@@ -575,7 +575,7 @@ mlxsw_sp_span_entry_vlan_deconfigure(struct mlxsw_sp_span_entry *span_entry)
 static const
 struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_vlan = {
 	.can_handle = mlxsw_sp_span_vlan_can_handle,
-	.parms = mlxsw_sp_span_entry_vlan_parms,
+	.parms_set = mlxsw_sp_span_entry_vlan_parms,
 	.configure = mlxsw_sp_span_entry_vlan_configure,
 	.deconfigure = mlxsw_sp_span_entry_vlan_deconfigure,
 };
@@ -612,7 +612,7 @@ mlxsw_sp_span_entry_nop_deconfigure(struct mlxsw_sp_span_entry *span_entry)
 }
 
 static const struct mlxsw_sp_span_entry_ops mlxsw_sp_span_entry_ops_nop = {
-	.parms = mlxsw_sp_span_entry_nop_parms,
+	.parms_set = mlxsw_sp_span_entry_nop_parms,
 	.configure = mlxsw_sp_span_entry_nop_configure,
 	.deconfigure = mlxsw_sp_span_entry_nop_deconfigure,
 };
@@ -970,7 +970,7 @@ int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
 		return -EOPNOTSUPP;
 	}
 
-	err = ops->parms(to_dev, &sparms);
+	err = ops->parms_set(to_dev, &sparms);
 	if (err)
 		return err;
 
@@ -1026,7 +1026,7 @@ static void mlxsw_sp_span_respin_work(struct work_struct *work)
 		if (!curr->ref_count)
 			continue;
 
-		err = curr->ops->parms(curr->to_dev, &sparms);
+		err = curr->ops->parms_set(curr->to_dev, &sparms);
 		if (err)
 			continue;
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
index 59724335525f..01273e54ba20 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
@@ -49,8 +49,8 @@ struct mlxsw_sp_span_entry {
 
 struct mlxsw_sp_span_entry_ops {
 	bool (*can_handle)(const struct net_device *to_dev);
-	int (*parms)(const struct net_device *to_dev,
-		     struct mlxsw_sp_span_parms *sparmsp);
+	int (*parms_set)(const struct net_device *to_dev,
+			 struct mlxsw_sp_span_parms *sparmsp);
 	int (*configure)(struct mlxsw_sp_span_entry *span_entry,
 			 struct mlxsw_sp_span_parms sparms);
 	void (*deconfigure)(struct mlxsw_sp_span_entry *span_entry);
-- 
cgit v1.2.3-59-g8ed1b


From c0c2899cf66ee7a68e5b3a7a135089622e005008 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Fri, 24 Apr 2020 18:43:43 +0300
Subject: mlxsw: spectrum_span: Remove unnecessary debug prints

To the best of my knowledge, these debug prints were never used. Remove
them.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
index eb4a1c0f2788..14c5edc71239 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
@@ -978,9 +978,6 @@ int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
 	if (!span_entry)
 		return -ENOBUFS;
 
-	netdev_dbg(from->dev, "Adding inspected port to SPAN entry %d\n",
-		   span_entry->id);
-
 	err = mlxsw_sp_span_inspected_port_add(from, span_entry, type, bind);
 	if (err)
 		goto err_port_bind;
@@ -1004,8 +1001,6 @@ void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, int span_id,
 		return;
 	}
 
-	netdev_dbg(from->dev, "removing inspected port from SPAN entry %d\n",
-		   span_entry->id);
 	mlxsw_sp_span_inspected_port_del(from, span_entry, type, bind);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 4c00dafc59c7cc25a381abf7671b203a2fcfca71 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Fri, 24 Apr 2020 18:43:44 +0300
Subject: mlxsw: spectrum_span: Use 'refcount_t' for reference counting

'refcount_t' is very useful for catching over/under flows. Convert the
SPAN agent objects to use it instead of 'int' for their reference count.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c | 16 ++++++++--------
 drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h |  3 ++-
 2 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
index 14c5edc71239..235556be58f5 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
@@ -3,6 +3,7 @@
 
 #include <linux/if_bridge.h>
 #include <linux/list.h>
+#include <linux/refcount.h>
 #include <linux/rtnetlink.h>
 #include <linux/workqueue.h>
 #include <net/arp.h>
@@ -664,7 +665,7 @@ mlxsw_sp_span_entry_create(struct mlxsw_sp *mlxsw_sp,
 
 	/* find a free entry to use */
 	for (i = 0; i < mlxsw_sp->span->entries_count; i++) {
-		if (!mlxsw_sp->span->entries[i].ref_count) {
+		if (!refcount_read(&mlxsw_sp->span->entries[i].ref_count)) {
 			span_entry = &mlxsw_sp->span->entries[i];
 			break;
 		}
@@ -674,7 +675,7 @@ mlxsw_sp_span_entry_create(struct mlxsw_sp *mlxsw_sp,
 
 	atomic_inc(&mlxsw_sp->span->active_entries_count);
 	span_entry->ops = ops;
-	span_entry->ref_count = 1;
+	refcount_set(&span_entry->ref_count, 1);
 	span_entry->to_dev = to_dev;
 	mlxsw_sp_span_entry_configure(mlxsw_sp, span_entry, sparms);
 
@@ -697,7 +698,7 @@ mlxsw_sp_span_entry_find_by_port(struct mlxsw_sp *mlxsw_sp,
 	for (i = 0; i < mlxsw_sp->span->entries_count; i++) {
 		struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span->entries[i];
 
-		if (curr->ref_count && curr->to_dev == to_dev)
+		if (refcount_read(&curr->ref_count) && curr->to_dev == to_dev)
 			return curr;
 	}
 	return NULL;
@@ -718,7 +719,7 @@ mlxsw_sp_span_entry_find_by_id(struct mlxsw_sp *mlxsw_sp, int span_id)
 	for (i = 0; i < mlxsw_sp->span->entries_count; i++) {
 		struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span->entries[i];
 
-		if (curr->ref_count && curr->id == span_id)
+		if (refcount_read(&curr->ref_count) && curr->id == span_id)
 			return curr;
 	}
 	return NULL;
@@ -735,7 +736,7 @@ mlxsw_sp_span_entry_get(struct mlxsw_sp *mlxsw_sp,
 	span_entry = mlxsw_sp_span_entry_find_by_port(mlxsw_sp, to_dev);
 	if (span_entry) {
 		/* Already exists, just take a reference */
-		span_entry->ref_count++;
+		refcount_inc(&span_entry->ref_count);
 		return span_entry;
 	}
 
@@ -745,8 +746,7 @@ mlxsw_sp_span_entry_get(struct mlxsw_sp *mlxsw_sp,
 static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp,
 				   struct mlxsw_sp_span_entry *span_entry)
 {
-	WARN_ON(!span_entry->ref_count);
-	if (--span_entry->ref_count == 0)
+	if (refcount_dec_and_test(&span_entry->ref_count))
 		mlxsw_sp_span_entry_destroy(mlxsw_sp, span_entry);
 	return 0;
 }
@@ -1018,7 +1018,7 @@ static void mlxsw_sp_span_respin_work(struct work_struct *work)
 		struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span->entries[i];
 		struct mlxsw_sp_span_parms sparms = {NULL};
 
-		if (!curr->ref_count)
+		if (!refcount_read(&curr->ref_count))
 			continue;
 
 		err = curr->ops->parms_set(curr->to_dev, &sparms);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
index 01273e54ba20..d23abdf957fa 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
@@ -6,6 +6,7 @@
 
 #include <linux/types.h>
 #include <linux/if_ether.h>
+#include <linux/refcount.h>
 
 #include "spectrum_router.h"
 
@@ -43,7 +44,7 @@ struct mlxsw_sp_span_entry {
 	const struct mlxsw_sp_span_entry_ops *ops;
 	struct mlxsw_sp_span_parms parms;
 	struct list_head bound_ports_list;
-	int ref_count;
+	refcount_t ref_count;
 	int id;
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From 4780dbdbd957c204b62680161f39bb0bc4daf3a0 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Fri, 24 Apr 2020 18:43:45 +0300
Subject: mlxsw: spectrum_span: Replace zero-length array with flexible-array
 member

In a similar fashion to commit e99f8e7f88b5 ("mlxsw: Replace zero-length
array with flexible-array member"), use a flexible-array member to get a
compiler warning in case the flexible array does not occur last in the
structure.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
index 235556be58f5..ae3c8a1e9a43 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
@@ -22,7 +22,7 @@ struct mlxsw_sp_span {
 	struct mlxsw_sp *mlxsw_sp;
 	atomic_t active_entries_count;
 	int entries_count;
-	struct mlxsw_sp_span_entry entries[0];
+	struct mlxsw_sp_span_entry entries[];
 };
 
 static void mlxsw_sp_span_respin_work(struct work_struct *work);
-- 
cgit v1.2.3-59-g8ed1b


From b70ba69ef1f72e58c8e43b5689a37a66a7b31d11 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Thu, 23 Apr 2020 16:57:45 +0200
Subject: net: sched: report ndo_setup_tc failures via extack

Help end-users of the 'tc' command to see if the drivers ndo_setup_tc
function call fails. Troubleshooting when this happens is non-trivial
(see full process here[1]), and results in net_device getting assigned
the 'qdisc noop', which will drop all TX packets on the interface.

[1]: https://github.com/xdp-project/xdp-project/blob/master/areas/arm64/board_nxp_ls1088/nxp-board04-troubleshoot-qdisc.org

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Tested-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_api.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 55bd1429678f..11b683c45c28 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -735,8 +735,11 @@ static int tcf_block_offload_cmd(struct tcf_block *block,
 	INIT_LIST_HEAD(&bo.cb_list);
 
 	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
-	if (err < 0)
+	if (err < 0) {
+		if (err != -EOPNOTSUPP)
+			NL_SET_ERR_MSG(extack, "Driver ndo_setup_tc failed");
 		return err;
+	}
 
 	return tcf_block_setup(block, &bo);
 }
-- 
cgit v1.2.3-59-g8ed1b


From b89c1e6bdc73f5775e118eb2ab778e75b262b30c Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Thu, 23 Apr 2020 16:57:50 +0200
Subject: dpaa2-eth: fix return codes used in ndo_setup_tc

Drivers ndo_setup_tc call should return -EOPNOTSUPP, when it cannot
support the qdisc type. Other return values will result in failing the
qdisc setup.  This lead to qdisc noop getting assigned, which will
drop all TX packets on the interface.

Fixes: ab1e6de2bd49 ("dpaa2-eth: Add mqprio support")
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Tested-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index 9d4061bba0b8..d271c016229d 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -2021,7 +2021,7 @@ static int dpaa2_eth_setup_tc(struct net_device *net_dev,
 	int i;
 
 	if (type != TC_SETUP_QDISC_MQPRIO)
-		return -EINVAL;
+		return -EOPNOTSUPP;
 
 	mqprio->hw = TC_MQPRIO_HW_OFFLOAD_TCS;
 	num_queues = dpaa2_eth_queue_count(priv);
@@ -2033,7 +2033,7 @@ static int dpaa2_eth_setup_tc(struct net_device *net_dev,
 	if (num_tc  > dpaa2_eth_tc_count(priv)) {
 		netdev_err(net_dev, "Max %d traffic classes supported\n",
 			   dpaa2_eth_tc_count(priv));
-		return -EINVAL;
+		return -EOPNOTSUPP;
 	}
 
 	if (!num_tc) {
-- 
cgit v1.2.3-59-g8ed1b


From 5c05c1dbb177293636a3f5ea4caa872dfcf50ccd Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Thu, 23 Apr 2020 17:02:56 +0100
Subject: net: phylink, dsa: eliminate phylink_fixed_state_cb()

Move the callback into the phylink_config structure, rather than
providing a callback to set this up.

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Tested-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phylink.c | 46 +++++++++++++++-------------------------------
 include/linux/phylink.h   |  6 +++---
 net/dsa/slave.c           | 20 +++++++++++---------
 3 files changed, 29 insertions(+), 43 deletions(-)

diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 34ca12aec61b..0f23bec431c1 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -480,8 +480,8 @@ static void phylink_get_fixed_state(struct phylink *pl,
 				    struct phylink_link_state *state)
 {
 	*state = pl->link_config;
-	if (pl->get_fixed_state)
-		pl->get_fixed_state(pl->netdev, state);
+	if (pl->config->get_fixed_state)
+		pl->config->get_fixed_state(pl->config, state);
 	else if (pl->link_gpio)
 		state->link = !!gpiod_get_value_cansleep(pl->link_gpio);
 
@@ -1044,32 +1044,6 @@ void phylink_disconnect_phy(struct phylink *pl)
 }
 EXPORT_SYMBOL_GPL(phylink_disconnect_phy);
 
-/**
- * phylink_fixed_state_cb() - allow setting a fixed link callback
- * @pl: a pointer to a &struct phylink returned from phylink_create()
- * @cb: callback to execute to determine the fixed link state.
- *
- * The MAC driver should call this driver when the state of its link
- * can be determined through e.g: an out of band MMIO register.
- */
-int phylink_fixed_state_cb(struct phylink *pl,
-			   void (*cb)(struct net_device *dev,
-				      struct phylink_link_state *state))
-{
-	/* It does not make sense to let the link be overriden unless we use
-	 * MLO_AN_FIXED
-	 */
-	if (pl->cfg_link_an_mode != MLO_AN_FIXED)
-		return -EINVAL;
-
-	mutex_lock(&pl->state_mutex);
-	pl->get_fixed_state = cb;
-	mutex_unlock(&pl->state_mutex);
-
-	return 0;
-}
-EXPORT_SYMBOL_GPL(phylink_fixed_state_cb);
-
 /**
  * phylink_mac_change() - notify phylink of a change in MAC state
  * @pl: a pointer to a &struct phylink returned from phylink_create()
@@ -1106,6 +1080,8 @@ static irqreturn_t phylink_link_handler(int irq, void *data)
  */
 void phylink_start(struct phylink *pl)
 {
+	bool poll = false;
+
 	ASSERT_RTNL();
 
 	phylink_info(pl, "configuring for %s/%s link mode\n",
@@ -1142,10 +1118,18 @@ void phylink_start(struct phylink *pl)
 				irq = 0;
 		}
 		if (irq <= 0)
-			mod_timer(&pl->link_poll, jiffies + HZ);
+			poll = true;
+	}
+
+	switch (pl->cfg_link_an_mode) {
+	case MLO_AN_FIXED:
+		poll |= pl->config->poll_fixed_state;
+		break;
+	case MLO_AN_INBAND:
+		poll |= pl->config->pcs_poll;
+		break;
 	}
-	if ((pl->cfg_link_an_mode == MLO_AN_FIXED && pl->get_fixed_state) ||
-	    pl->config->pcs_poll)
+	if (poll)
 		mod_timer(&pl->link_poll, jiffies + HZ);
 	if (pl->phydev)
 		phy_start(pl->phydev);
diff --git a/include/linux/phylink.h b/include/linux/phylink.h
index 3f8d37ec5503..cc5b452a184e 100644
--- a/include/linux/phylink.h
+++ b/include/linux/phylink.h
@@ -67,6 +67,9 @@ struct phylink_config {
 	struct device *dev;
 	enum phylink_op_type type;
 	bool pcs_poll;
+	bool poll_fixed_state;
+	void (*get_fixed_state)(struct phylink_config *config,
+				struct phylink_link_state *state);
 };
 
 /**
@@ -366,9 +369,6 @@ void phylink_destroy(struct phylink *);
 int phylink_connect_phy(struct phylink *, struct phy_device *);
 int phylink_of_phy_connect(struct phylink *, struct device_node *, u32 flags);
 void phylink_disconnect_phy(struct phylink *);
-int phylink_fixed_state_cb(struct phylink *,
-			   void (*cb)(struct net_device *dev,
-				      struct phylink_link_state *));
 
 void phylink_mac_change(struct phylink *, bool up);
 
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index f2c241cf3a80..1035230771ae 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1590,10 +1590,10 @@ void dsa_port_phylink_mac_change(struct dsa_switch *ds, int port, bool up)
 }
 EXPORT_SYMBOL_GPL(dsa_port_phylink_mac_change);
 
-static void dsa_slave_phylink_fixed_state(struct net_device *dev,
+static void dsa_slave_phylink_fixed_state(struct phylink_config *config,
 					  struct phylink_link_state *state)
 {
-	struct dsa_port *dp = dsa_slave_to_port(dev);
+	struct dsa_port *dp = container_of(config, struct dsa_port, pl_config);
 	struct dsa_switch *ds = dp->ds;
 
 	/* No need to check that this operation is valid, the callback would
@@ -1633,6 +1633,15 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev)
 	dp->pl_config.dev = &slave_dev->dev;
 	dp->pl_config.type = PHYLINK_NETDEV;
 
+	/* The get_fixed_state callback takes precedence over polling the
+	 * link GPIO in PHYLINK (see phylink_get_fixed_state).  Only set
+	 * this if the switch provides such a callback.
+	 */
+	if (ds->ops->phylink_fixed_state) {
+		dp->pl_config.get_fixed_state = dsa_slave_phylink_fixed_state;
+		dp->pl_config.poll_fixed_state = true;
+	}
+
 	dp->pl = phylink_create(&dp->pl_config, of_fwnode_handle(port_dn), mode,
 				&dsa_port_phylink_mac_ops);
 	if (IS_ERR(dp->pl)) {
@@ -1641,13 +1650,6 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev)
 		return PTR_ERR(dp->pl);
 	}
 
-	/* Register only if the switch provides such a callback, since this
-	 * callback takes precedence over polling the link GPIO in PHYLINK
-	 * (see phylink_get_fixed_state).
-	 */
-	if (ds->ops->phylink_fixed_state)
-		phylink_fixed_state_cb(dp->pl, dsa_slave_phylink_fixed_state);
-
 	if (ds->ops->get_phy_flags)
 		phy_flags = ds->ops->get_phy_flags(ds, dp->index);
 
-- 
cgit v1.2.3-59-g8ed1b


From d70c47c8dc6902db19555b7ff7e6eeb264d4ac06 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Thu, 23 Apr 2020 21:34:33 +0200
Subject: net: phy: make phy_suspend a no-op if PHY is suspended already

Gently handle the case that phy_suspend() is called whilst PHY is in
power-down.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy_device.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index ac2784192472..206d98502b13 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1524,6 +1524,9 @@ int phy_suspend(struct phy_device *phydev)
 	struct phy_driver *phydrv = phydev->drv;
 	int ret;
 
+	if (phydev->suspended)
+		return 0;
+
 	/* If the device has WOL enabled, we cannot suspend the PHY */
 	phy_ethtool_get_wol(phydev, &wol);
 	if (wol.wolopts || (netdev && netdev->wol_enabled))
-- 
cgit v1.2.3-59-g8ed1b


From 3194915486b2bc3f77745774f1731b78f32ff688 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Thu, 23 Apr 2020 21:35:36 +0200
Subject: net: phy: remove genphy_no_soft_reset

Since 6e2d85ec0559 ("net: phy: Stop with excessive soft reset")
we don't need genphy_no_soft_reset() any longer. Not setting
callback soft_reset results in a no-op now.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/cortina.c    | 1 -
 drivers/net/phy/marvell10g.c | 2 --
 drivers/net/phy/phy-c45.c    | 1 -
 drivers/net/phy/phy_device.c | 1 -
 drivers/net/phy/teranetics.c | 1 -
 include/linux/phy.h          | 4 ----
 6 files changed, 10 deletions(-)

diff --git a/drivers/net/phy/cortina.c b/drivers/net/phy/cortina.c
index 856cdc36aacd..aac51362c0fe 100644
--- a/drivers/net/phy/cortina.c
+++ b/drivers/net/phy/cortina.c
@@ -82,7 +82,6 @@ static struct phy_driver cortina_driver[] = {
 	.features       = PHY_10GBIT_FEATURES,
 	.config_aneg	= gen10g_config_aneg,
 	.read_status	= cortina_read_status,
-	.soft_reset	= genphy_no_soft_reset,
 	.probe		= cortina_probe,
 },
 };
diff --git a/drivers/net/phy/marvell10g.c b/drivers/net/phy/marvell10g.c
index 95e3f4644aeb..80cbc77ffd55 100644
--- a/drivers/net/phy/marvell10g.c
+++ b/drivers/net/phy/marvell10g.c
@@ -727,7 +727,6 @@ static struct phy_driver mv3310_drivers[] = {
 		.phy_id_mask	= MARVELL_PHY_ID_MASK,
 		.name		= "mv88x3310",
 		.get_features	= mv3310_get_features,
-		.soft_reset	= genphy_no_soft_reset,
 		.config_init	= mv3310_config_init,
 		.probe		= mv3310_probe,
 		.suspend	= mv3310_suspend,
@@ -745,7 +744,6 @@ static struct phy_driver mv3310_drivers[] = {
 		.probe		= mv3310_probe,
 		.suspend	= mv3310_suspend,
 		.resume		= mv3310_resume,
-		.soft_reset	= genphy_no_soft_reset,
 		.config_init	= mv3310_config_init,
 		.config_aneg	= mv3310_config_aneg,
 		.aneg_done	= mv3310_aneg_done,
diff --git a/drivers/net/phy/phy-c45.c b/drivers/net/phy/phy-c45.c
index 67ba47ae5284..defe09d94422 100644
--- a/drivers/net/phy/phy-c45.c
+++ b/drivers/net/phy/phy-c45.c
@@ -564,6 +564,5 @@ struct phy_driver genphy_c45_driver = {
 	.phy_id         = 0xffffffff,
 	.phy_id_mask    = 0xffffffff,
 	.name           = "Generic Clause 45 PHY",
-	.soft_reset	= genphy_no_soft_reset,
 	.read_status    = genphy_c45_read_status,
 };
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 206d98502b13..c8f8fd9908fe 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -2630,7 +2630,6 @@ static struct phy_driver genphy_driver = {
 	.phy_id		= 0xffffffff,
 	.phy_id_mask	= 0xffffffff,
 	.name		= "Generic PHY",
-	.soft_reset	= genphy_no_soft_reset,
 	.get_features	= genphy_read_abilities,
 	.suspend	= genphy_suspend,
 	.resume		= genphy_resume,
diff --git a/drivers/net/phy/teranetics.c b/drivers/net/phy/teranetics.c
index beb054b931ee..8057ea8dbc21 100644
--- a/drivers/net/phy/teranetics.c
+++ b/drivers/net/phy/teranetics.c
@@ -78,7 +78,6 @@ static struct phy_driver teranetics_driver[] = {
 	.phy_id_mask	= 0xffffffff,
 	.name		= "Teranetics TN2020",
 	.features       = PHY_10GBIT_FEATURES,
-	.soft_reset	= genphy_no_soft_reset,
 	.aneg_done	= teranetics_aneg_done,
 	.config_aneg    = gen10g_config_aneg,
 	.read_status	= teranetics_read_status,
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 3941a6bcba10..e2bfb9240587 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1251,10 +1251,6 @@ static inline int genphy_config_aneg(struct phy_device *phydev)
 	return __genphy_config_aneg(phydev, false);
 }
 
-static inline int genphy_no_soft_reset(struct phy_device *phydev)
-{
-	return 0;
-}
 static inline int genphy_no_ack_interrupt(struct phy_device *phydev)
 {
 	return 0;
-- 
cgit v1.2.3-59-g8ed1b


From 9576e9fa1c02aea3cf1e42eadcbeb12ccf5e87de Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Thu, 23 Apr 2020 21:38:42 +0200
Subject: net: phy: clear phydev->suspended after soft reset

If a soft reset is triggered whilst PHY is in power-down, then
phydev->suspended will remain set. Seems we didn't face any issue yet
caused by this, but better reset the suspended flag after soft reset.

See also the following from 22.2.4.1.1
Resetting a PHY is accomplished by setting bit 0.15 to a logic one.
This action shall set the status and control registers to their default
states.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy_device.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index c8f8fd9908fe..7e1ddd5745d2 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1082,8 +1082,12 @@ int phy_init_hw(struct phy_device *phydev)
 	if (!phydev->drv)
 		return 0;
 
-	if (phydev->drv->soft_reset)
+	if (phydev->drv->soft_reset) {
 		ret = phydev->drv->soft_reset(phydev);
+		/* see comment in genphy_soft_reset for an explanation */
+		if (!ret)
+			phydev->suspended = 0;
+	}
 
 	if (ret < 0)
 		return ret;
@@ -2157,6 +2161,12 @@ int genphy_soft_reset(struct phy_device *phydev)
 	if (ret < 0)
 		return ret;
 
+	/* Clause 22 states that setting bit BMCR_RESET sets control registers
+	 * to their default value. Therefore the POWER DOWN bit is supposed to
+	 * be cleared after soft reset.
+	 */
+	phydev->suspended = 0;
+
 	ret = phy_poll_reset(phydev);
 	if (ret)
 		return ret;
-- 
cgit v1.2.3-59-g8ed1b


From 10395e99f4a6f67d53b89f143f610ee954c24531 Mon Sep 17 00:00:00 2001
From: Zheng Bin <zhengbin13@huawei.com>
Date: Fri, 24 Apr 2020 17:00:15 +0800
Subject: net/mlxfw: Remove unneeded semicolon

Fixes coccicheck warning:

drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c:79:2-3: Unneeded semicolon
drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c:162:2-3: Unneeded semicolon

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zheng Bin <zhengbin13@huawei.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c
index 046a0cb82ed8..7a04c626a2aa 100644
--- a/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c
+++ b/drivers/net/ethernet/mellanox/mlxfw/mlxfw_fsm.c
@@ -76,7 +76,7 @@ static int mlxfw_fsm_state_err(struct mlxfw_dev *mlxfw_dev,
 	case MLXFW_FSM_STATE_ERR_MAX:
 		MLXFW_ERR_MSG(mlxfw_dev, extack, "unknown error", err);
 		break;
-	};
+	}
 
 	return mlxfw_fsm_state_errno[fsm_state_err];
 };
@@ -159,7 +159,7 @@ mlxfw_fsm_reactivate_err(struct mlxfw_dev *mlxfw_dev,
 	case MLXFW_FSM_REACTIVATE_STATUS_MAX:
 		MLXFW_REACT_ERR("unexpected error", err);
 		break;
-	};
+	}
 	return -EREMOTEIO;
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From d9e4171a4a2cf20e803eb31a92d6854ea7002c38 Mon Sep 17 00:00:00 2001
From: Zheng Bin <zhengbin13@huawei.com>
Date: Fri, 24 Apr 2020 17:04:28 +0800
Subject: net: atlantic: Remove unneeded semicolon

Fixes coccicheck warning:

drivers/net/ethernet/aquantia/atlantic/aq_macsec.c:404:2-3: Unneeded semicolon
drivers/net/ethernet/aquantia/atlantic/aq_macsec.c:420:2-3: Unneeded semicolon

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zheng Bin <zhengbin13@huawei.com>
Acked-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/aquantia/atlantic/aq_macsec.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c b/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c
index 0b3e234a54aa..91870ceaf3fe 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c
@@ -401,7 +401,7 @@ static u32 aq_sc_idx_max(const enum aq_macsec_sc_sa sc_sa)
 		break;
 	default:
 		break;
-	};
+	}
 
 	return result;
 }
@@ -417,7 +417,7 @@ static u32 aq_to_hw_sc_idx(const u32 sc_idx, const enum aq_macsec_sc_sa sc_sa)
 		return sc_idx;
 	default:
 		WARN_ONCE(true, "Invalid sc_sa");
-	};
+	}
 
 	return sc_idx;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 7f023ec91c3cb188ab8a52478f3145ccd4daef68 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Fri, 24 Apr 2020 17:04:50 +0800
Subject: net: sched: remove unused inline function qdisc_reset_all_tx

There's no callers in-tree anymore.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index 25d2ec4c8f00..1862bf5a105b 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -710,11 +710,6 @@ static inline void qdisc_reset_all_tx_gt(struct net_device *dev, unsigned int i)
 	}
 }
 
-static inline void qdisc_reset_all_tx(struct net_device *dev)
-{
-	qdisc_reset_all_tx_gt(dev, 0);
-}
-
 /* Are all TX queues of the device empty?  */
 static inline bool qdisc_all_tx_empty(const struct net_device *dev)
 {
-- 
cgit v1.2.3-59-g8ed1b


From 5d7163a117876f95f44f6f9fb9f028daead42243 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Fri, 24 Apr 2020 17:06:29 +0800
Subject: net: ipv6: remove unused inline function ip6_set_txhash

commit 877d1f6291f8 ("net: Set sk_txhash from a random number")
left behind this, remove it.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 1bf8065fe871..955badd1e8ff 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -908,7 +908,6 @@ static inline int ip6_default_np_autolabel(struct net *net)
 	}
 }
 #else
-static inline void ip6_set_txhash(struct sock *sk) { }
 static inline __be32 ip6_make_flowlabel(struct net *net, struct sk_buff *skb,
 					__be32 flowlabel, bool autolabel,
 					struct flowi6 *fl6)
-- 
cgit v1.2.3-59-g8ed1b


From 6033cebdfff9b10192eb254e8cc60fedd595ea7f Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 24 Apr 2020 16:03:48 +0800
Subject: ptp: idt82p33: remove unnecessary comparison

The type of loaddr is u8 which is always '<=' 0xff, so the
loaddr <= 0xff is always true, we can remove this comparison.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ptp/ptp_idt82p33.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/ptp/ptp_idt82p33.c b/drivers/ptp/ptp_idt82p33.c
index 31ea811b6d5f..179f6c472e50 100644
--- a/drivers/ptp/ptp_idt82p33.c
+++ b/drivers/ptp/ptp_idt82p33.c
@@ -881,7 +881,7 @@ static int idt82p33_load_firmware(struct idt82p33 *idt82p33)
 
 			/* Page size 128, last 4 bytes of page skipped */
 			if (((loaddr > 0x7b) && (loaddr <= 0x7f))
-			     || ((loaddr > 0xfb) && (loaddr <= 0xff)))
+			     || loaddr > 0xfb)
 				continue;
 
 			err = idt82p33_write(idt82p33, _ADDR(page, loaddr),
-- 
cgit v1.2.3-59-g8ed1b


From 1ac0e6c292983c2665383ae9efcfe1f8b53271e0 Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Fri, 24 Apr 2020 10:23:06 +0800
Subject: net: hns3: refine for unicast MAC VLAN space management

Currently, firmware helps manage the unicast MAC VLAN table
space for each PF. PF just needs to tell firmware its wanted
space when initializing, and unnecessary to free it when
un-intializing. So this patch removes the umv space free handle,
and removes the forward statement of hclge_set_umv_space()
by defining hclge_init_umv_space() after it.

Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c    | 72 ++++++++--------------
 1 file changed, 24 insertions(+), 48 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 0618f22e6f14..ccf269a9a3b1 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -62,8 +62,6 @@ static int hclge_init_vlan_config(struct hclge_dev *hdev);
 static void hclge_sync_vlan_filter(struct hclge_dev *hdev);
 static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev);
 static bool hclge_get_hw_reset_stat(struct hnae3_handle *handle);
-static int hclge_set_umv_space(struct hclge_dev *hdev, u16 space_size,
-			       u16 *allocated_size, bool is_alloc);
 static void hclge_rfs_filter_expire(struct hclge_dev *hdev);
 static void hclge_clear_arfs_rules(struct hnae3_handle *handle);
 static enum hnae3_reset_type hclge_get_reset_level(struct hnae3_ae_dev *ae_dev,
@@ -7196,50 +7194,6 @@ static int hclge_add_mac_vlan_tbl(struct hclge_vport *vport,
 	return cfg_status;
 }
 
-static int hclge_init_umv_space(struct hclge_dev *hdev)
-{
-	u16 allocated_size = 0;
-	int ret;
-
-	ret = hclge_set_umv_space(hdev, hdev->wanted_umv_size, &allocated_size,
-				  true);
-	if (ret)
-		return ret;
-
-	if (allocated_size < hdev->wanted_umv_size)
-		dev_warn(&hdev->pdev->dev,
-			 "Alloc umv space failed, want %u, get %u\n",
-			 hdev->wanted_umv_size, allocated_size);
-
-	mutex_init(&hdev->umv_mutex);
-	hdev->max_umv_size = allocated_size;
-	/* divide max_umv_size by (hdev->num_req_vfs + 2), in order to
-	 * preserve some unicast mac vlan table entries shared by pf
-	 * and its vfs.
-	 */
-	hdev->priv_umv_size = hdev->max_umv_size / (hdev->num_req_vfs + 2);
-	hdev->share_umv_size = hdev->priv_umv_size +
-			hdev->max_umv_size % (hdev->num_req_vfs + 2);
-
-	return 0;
-}
-
-static int hclge_uninit_umv_space(struct hclge_dev *hdev)
-{
-	int ret;
-
-	if (hdev->max_umv_size > 0) {
-		ret = hclge_set_umv_space(hdev, hdev->max_umv_size, NULL,
-					  false);
-		if (ret)
-			return ret;
-		hdev->max_umv_size = 0;
-	}
-	mutex_destroy(&hdev->umv_mutex);
-
-	return 0;
-}
-
 static int hclge_set_umv_space(struct hclge_dev *hdev, u16 space_size,
 			       u16 *allocated_size, bool is_alloc)
 {
@@ -7268,6 +7222,30 @@ static int hclge_set_umv_space(struct hclge_dev *hdev, u16 space_size,
 	return 0;
 }
 
+static int hclge_init_umv_space(struct hclge_dev *hdev)
+{
+	u16 allocated_size = 0;
+	int ret;
+
+	ret = hclge_set_umv_space(hdev, hdev->wanted_umv_size, &allocated_size,
+				  true);
+	if (ret)
+		return ret;
+
+	if (allocated_size < hdev->wanted_umv_size)
+		dev_warn(&hdev->pdev->dev,
+			 "failed to alloc umv space, want %u, get %u\n",
+			 hdev->wanted_umv_size, allocated_size);
+
+	mutex_init(&hdev->umv_mutex);
+	hdev->max_umv_size = allocated_size;
+	hdev->priv_umv_size = hdev->max_umv_size / (hdev->num_alloc_vport + 1);
+	hdev->share_umv_size = hdev->priv_umv_size +
+			hdev->max_umv_size % (hdev->num_alloc_vport + 1);
+
+	return 0;
+}
+
 static void hclge_reset_umv_space(struct hclge_dev *hdev)
 {
 	struct hclge_vport *vport;
@@ -10041,8 +10019,6 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev)
 	if (mac->phydev)
 		mdiobus_unregister(mac->mdio_bus);
 
-	hclge_uninit_umv_space(hdev);
-
 	/* Disable MISC vector(vector0) */
 	hclge_enable_vector(&hdev->misc_vector, false);
 	synchronize_irq(hdev->misc_vector.vector_irq);
-- 
cgit v1.2.3-59-g8ed1b


From c1c5f66ee0dea782de9c95e6d63f19c355389ee8 Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Fri, 24 Apr 2020 10:23:07 +0800
Subject: net: hns3: remove unnecessary parameter 'is_alloc' in
 hclge_set_umv_space()

Since hclge_set_umv_space() is only called by hclge_init_umv_space(),
so parameter 'is_alloc' is redundant.

Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index ccf269a9a3b1..fe6e60a41925 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -7195,7 +7195,7 @@ static int hclge_add_mac_vlan_tbl(struct hclge_vport *vport,
 }
 
 static int hclge_set_umv_space(struct hclge_dev *hdev, u16 space_size,
-			       u16 *allocated_size, bool is_alloc)
+			       u16 *allocated_size)
 {
 	struct hclge_umv_spc_alc_cmd *req;
 	struct hclge_desc desc;
@@ -7203,20 +7203,17 @@ static int hclge_set_umv_space(struct hclge_dev *hdev, u16 space_size,
 
 	req = (struct hclge_umv_spc_alc_cmd *)desc.data;
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_MAC_VLAN_ALLOCATE, false);
-	if (!is_alloc)
-		hnae3_set_bit(req->allocate, HCLGE_UMV_SPC_ALC_B, 1);
 
 	req->space_size = cpu_to_le32(space_size);
 
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 	if (ret) {
-		dev_err(&hdev->pdev->dev,
-			"%s umv space failed for cmd_send, ret =%d\n",
-			is_alloc ? "allocate" : "free", ret);
+		dev_err(&hdev->pdev->dev, "failed to set umv space, ret = %d\n",
+			ret);
 		return ret;
 	}
 
-	if (is_alloc && allocated_size)
+	if (allocated_size)
 		*allocated_size = le32_to_cpu(desc.data[1]);
 
 	return 0;
@@ -7227,8 +7224,7 @@ static int hclge_init_umv_space(struct hclge_dev *hdev)
 	u16 allocated_size = 0;
 	int ret;
 
-	ret = hclge_set_umv_space(hdev, hdev->wanted_umv_size, &allocated_size,
-				  true);
+	ret = hclge_set_umv_space(hdev, hdev->wanted_umv_size, &allocated_size);
 	if (ret)
 		return ret;
 
-- 
cgit v1.2.3-59-g8ed1b


From 4c58f592470192d2b5ce4cfd2f7ff0ea2624c073 Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Fri, 24 Apr 2020 10:23:08 +0800
Subject: net: hns3: replace num_req_vfs with num_alloc_vport in
 hclge_reset_umv_space()

Like the calculation elsewhere, replaces num_req_vfs with
num_alloc_vport in hclge_reset_umv_space().

Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index fe6e60a41925..a268004c8e0e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -7254,7 +7254,7 @@ static void hclge_reset_umv_space(struct hclge_dev *hdev)
 
 	mutex_lock(&hdev->umv_mutex);
 	hdev->share_umv_size = hdev->priv_umv_size +
-			hdev->max_umv_size % (hdev->num_req_vfs + 2);
+			hdev->max_umv_size % (hdev->num_alloc_vport + 1);
 	mutex_unlock(&hdev->umv_mutex);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From ee4bcd3b7ae40bd77732eb1ba14aa26d6c514525 Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Fri, 24 Apr 2020 10:23:09 +0800
Subject: net: hns3: refactor the MAC address configure

Currently, the HNS3 driver sync and unsync MAC address in function
hns3_set_rx_mode(). For PF, it adds and deletes MAC address directly
in the path of dev_set_rx_mode(). If failed, it won't retry until
next calling of hns3_set_rx_mode(). On the other hand, if request
add and remove a same address many times at a short interval, each
request must be done one by one, can't be merged. For VF, it sends
mailbox messages to PF to request adding or deleting MAC address in
the path of function hns3_set_rx_mode(), no matter the address is
configured success.

This patch refines it by recording the MAC address in function
hns3_set_rx_mode(), and updating MAC address in the service task.
If failed, it will retry by the next calling of periodical service
task. It also uses some state to mark the state of each MAC address
in the MAC list, which can help merge configure request for a same
address. With these changes, when global reset or IMP reset occurs,
we can restore the MAC table with the MAC list.

Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c    |  79 +--
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c    | 592 +++++++++++++++++----
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h    |  27 +-
 .../net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c |  42 +-
 .../ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c  | 313 ++++++++++-
 .../ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h  |  25 +
 6 files changed, 860 insertions(+), 218 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index ac3a48a24d86..341e8b5cd219 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -40,7 +40,6 @@
 	} while (0)
 
 static void hns3_clear_all_ring(struct hnae3_handle *h, bool force);
-static void hns3_remove_hw_addr(struct net_device *netdev);
 
 static const char hns3_driver_name[] = "hns3";
 static const char hns3_driver_string[] =
@@ -548,6 +547,13 @@ static int hns3_nic_uc_unsync(struct net_device *netdev,
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
 
+	/* need ignore the request of removing device address, because
+	 * we store the device address and other addresses of uc list
+	 * in the function's mac filter list.
+	 */
+	if (ether_addr_equal(addr, netdev->dev_addr))
+		return 0;
+
 	if (h->ae_algo->ops->rm_uc_addr)
 		return h->ae_algo->ops->rm_uc_addr(h, addr);
 
@@ -3907,9 +3913,11 @@ static int hns3_init_mac_addr(struct net_device *netdev)
 		eth_hw_addr_random(netdev);
 		dev_warn(priv->dev, "using random MAC address %pM\n",
 			 netdev->dev_addr);
-	} else {
+	} else if (!ether_addr_equal(netdev->dev_addr, mac_addr_temp)) {
 		ether_addr_copy(netdev->dev_addr, mac_addr_temp);
 		ether_addr_copy(netdev->perm_addr, mac_addr_temp);
+	} else {
+		return 0;
 	}
 
 	if (h->ae_algo->ops->set_mac_addr)
@@ -4119,8 +4127,6 @@ static void hns3_client_uninit(struct hnae3_handle *handle, bool reset)
 	struct hns3_nic_priv *priv = netdev_priv(netdev);
 	int ret;
 
-	hns3_remove_hw_addr(netdev);
-
 	if (netdev->reg_state != NETREG_UNINITIALIZED)
 		unregister_netdev(netdev);
 
@@ -4191,56 +4197,6 @@ static int hns3_client_setup_tc(struct hnae3_handle *handle, u8 tc)
 	return hns3_nic_set_real_num_queue(ndev);
 }
 
-static int hns3_recover_hw_addr(struct net_device *ndev)
-{
-	struct netdev_hw_addr_list *list;
-	struct netdev_hw_addr *ha, *tmp;
-	int ret = 0;
-
-	netif_addr_lock_bh(ndev);
-	/* go through and sync uc_addr entries to the device */
-	list = &ndev->uc;
-	list_for_each_entry_safe(ha, tmp, &list->list, list) {
-		ret = hns3_nic_uc_sync(ndev, ha->addr);
-		if (ret)
-			goto out;
-	}
-
-	/* go through and sync mc_addr entries to the device */
-	list = &ndev->mc;
-	list_for_each_entry_safe(ha, tmp, &list->list, list) {
-		ret = hns3_nic_mc_sync(ndev, ha->addr);
-		if (ret)
-			goto out;
-	}
-
-out:
-	netif_addr_unlock_bh(ndev);
-	return ret;
-}
-
-static void hns3_remove_hw_addr(struct net_device *netdev)
-{
-	struct netdev_hw_addr_list *list;
-	struct netdev_hw_addr *ha, *tmp;
-
-	hns3_nic_uc_unsync(netdev, netdev->dev_addr);
-
-	netif_addr_lock_bh(netdev);
-	/* go through and unsync uc_addr entries to the device */
-	list = &netdev->uc;
-	list_for_each_entry_safe(ha, tmp, &list->list, list)
-		hns3_nic_uc_unsync(netdev, ha->addr);
-
-	/* go through and unsync mc_addr entries to the device */
-	list = &netdev->mc;
-	list_for_each_entry_safe(ha, tmp, &list->list, list)
-		if (ha->refcount > 1)
-			hns3_nic_mc_unsync(netdev, ha->addr);
-
-	netif_addr_unlock_bh(netdev);
-}
-
 static void hns3_clear_tx_ring(struct hns3_enet_ring *ring)
 {
 	while (ring->next_to_clean != ring->next_to_use) {
@@ -4411,10 +4367,8 @@ static int hns3_reset_notify_down_enet(struct hnae3_handle *handle)
 	 * from table space. Hence, for function reset software intervention is
 	 * required to delete the entries
 	 */
-	if (hns3_dev_ongoing_func_reset(ae_dev)) {
-		hns3_remove_hw_addr(ndev);
+	if (hns3_dev_ongoing_func_reset(ae_dev))
 		hns3_del_all_fd_rules(ndev, false);
-	}
 
 	if (!netif_running(ndev))
 		return 0;
@@ -4482,6 +4436,9 @@ static int hns3_reset_notify_init_enet(struct hnae3_handle *handle)
 		goto err_init_irq_fail;
 	}
 
+	if (!hns3_is_phys_func(handle->pdev))
+		hns3_init_mac_addr(netdev);
+
 	ret = hns3_client_start(handle);
 	if (ret) {
 		dev_err(priv->dev, "hns3_client_start fail! ret=%d\n", ret);
@@ -4513,14 +4470,6 @@ static int hns3_reset_notify_restore_enet(struct hnae3_handle *handle)
 	bool vlan_filter_enable;
 	int ret;
 
-	ret = hns3_init_mac_addr(netdev);
-	if (ret)
-		return ret;
-
-	ret = hns3_recover_hw_addr(netdev);
-	if (ret)
-		return ret;
-
 	ret = hns3_update_promisc_mode(netdev, handle->netdev_flags);
 	if (ret)
 		return ret;
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index a268004c8e0e..c3205ae620ce 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -68,6 +68,8 @@ static enum hnae3_reset_type hclge_get_reset_level(struct hnae3_ae_dev *ae_dev,
 						   unsigned long *addr);
 static int hclge_set_default_loopback(struct hclge_dev *hdev);
 
+static void hclge_sync_mac_table(struct hclge_dev *hdev);
+
 static struct hnae3_ae_algo ae_algo;
 
 static struct workqueue_struct *hclge_wq;
@@ -1685,6 +1687,7 @@ static int hclge_alloc_vport(struct hclge_dev *hdev)
 		INIT_LIST_HEAD(&vport->vlan_list);
 		INIT_LIST_HEAD(&vport->uc_mac_list);
 		INIT_LIST_HEAD(&vport->mc_mac_list);
+		spin_lock_init(&vport->mac_list_lock);
 
 		if (i == 0)
 			ret = hclge_vport_setup(vport, tqp_main_vport);
@@ -3971,6 +3974,7 @@ static void hclge_periodic_service_task(struct hclge_dev *hdev)
 	 * updated when it is triggered by mbx.
 	 */
 	hclge_update_link_status(hdev);
+	hclge_sync_mac_table(hdev);
 
 	if (time_is_after_jiffies(hdev->last_serv_processed + HZ)) {
 		delta = jiffies - hdev->last_serv_processed;
@@ -6922,8 +6926,16 @@ static void hclge_ae_stop(struct hnae3_handle *handle)
 
 int hclge_vport_start(struct hclge_vport *vport)
 {
+	struct hclge_dev *hdev = vport->back;
+
 	set_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state);
 	vport->last_active_jiffies = jiffies;
+
+	if (test_bit(vport->vport_id, hdev->vport_config_block))
+		hclge_restore_mac_table_common(vport);
+
+	clear_bit(vport->vport_id, hdev->vport_config_block);
+
 	return 0;
 }
 
@@ -7291,12 +7303,106 @@ static void hclge_update_umv_space(struct hclge_vport *vport, bool is_free)
 	mutex_unlock(&hdev->umv_mutex);
 }
 
+static struct hclge_mac_node *hclge_find_mac_node(struct list_head *list,
+						  const u8 *mac_addr)
+{
+	struct hclge_mac_node *mac_node, *tmp;
+
+	list_for_each_entry_safe(mac_node, tmp, list, node)
+		if (ether_addr_equal(mac_addr, mac_node->mac_addr))
+			return mac_node;
+
+	return NULL;
+}
+
+static void hclge_update_mac_node(struct hclge_mac_node *mac_node,
+				  enum HCLGE_MAC_NODE_STATE state)
+{
+	switch (state) {
+	/* from set_rx_mode or tmp_add_list */
+	case HCLGE_MAC_TO_ADD:
+		if (mac_node->state == HCLGE_MAC_TO_DEL)
+			mac_node->state = HCLGE_MAC_ACTIVE;
+		break;
+	/* only from set_rx_mode */
+	case HCLGE_MAC_TO_DEL:
+		if (mac_node->state == HCLGE_MAC_TO_ADD) {
+			list_del(&mac_node->node);
+			kfree(mac_node);
+		} else {
+			mac_node->state = HCLGE_MAC_TO_DEL;
+		}
+		break;
+	/* only from tmp_add_list, the mac_node->state won't be
+	 * ACTIVE.
+	 */
+	case HCLGE_MAC_ACTIVE:
+		if (mac_node->state == HCLGE_MAC_TO_ADD)
+			mac_node->state = HCLGE_MAC_ACTIVE;
+
+		break;
+	}
+}
+
+int hclge_update_mac_list(struct hclge_vport *vport,
+			  enum HCLGE_MAC_NODE_STATE state,
+			  enum HCLGE_MAC_ADDR_TYPE mac_type,
+			  const unsigned char *addr)
+{
+	struct hclge_dev *hdev = vport->back;
+	struct hclge_mac_node *mac_node;
+	struct list_head *list;
+
+	list = (mac_type == HCLGE_MAC_ADDR_UC) ?
+		&vport->uc_mac_list : &vport->mc_mac_list;
+
+	spin_lock_bh(&vport->mac_list_lock);
+
+	/* if the mac addr is already in the mac list, no need to add a new
+	 * one into it, just check the mac addr state, convert it to a new
+	 * new state, or just remove it, or do nothing.
+	 */
+	mac_node = hclge_find_mac_node(list, addr);
+	if (mac_node) {
+		hclge_update_mac_node(mac_node, state);
+		spin_unlock_bh(&vport->mac_list_lock);
+		set_bit(HCLGE_VPORT_STATE_MAC_TBL_CHANGE, &vport->state);
+		return 0;
+	}
+
+	/* if this address is never added, unnecessary to delete */
+	if (state == HCLGE_MAC_TO_DEL) {
+		spin_unlock_bh(&vport->mac_list_lock);
+		dev_err(&hdev->pdev->dev,
+			"failed to delete address %pM from mac list\n",
+			addr);
+		return -ENOENT;
+	}
+
+	mac_node = kzalloc(sizeof(*mac_node), GFP_ATOMIC);
+	if (!mac_node) {
+		spin_unlock_bh(&vport->mac_list_lock);
+		return -ENOMEM;
+	}
+
+	set_bit(HCLGE_VPORT_STATE_MAC_TBL_CHANGE, &vport->state);
+
+	mac_node->state = state;
+	ether_addr_copy(mac_node->mac_addr, addr);
+	list_add_tail(&mac_node->node, list);
+
+	spin_unlock_bh(&vport->mac_list_lock);
+
+	return 0;
+}
+
 static int hclge_add_uc_addr(struct hnae3_handle *handle,
 			     const unsigned char *addr)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 
-	return hclge_add_uc_addr_common(vport, addr);
+	return hclge_update_mac_list(vport, HCLGE_MAC_TO_ADD, HCLGE_MAC_ADDR_UC,
+				     addr);
 }
 
 int hclge_add_uc_addr_common(struct hclge_vport *vport,
@@ -7367,7 +7473,8 @@ static int hclge_rm_uc_addr(struct hnae3_handle *handle,
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 
-	return hclge_rm_uc_addr_common(vport, addr);
+	return hclge_update_mac_list(vport, HCLGE_MAC_TO_DEL, HCLGE_MAC_ADDR_UC,
+				     addr);
 }
 
 int hclge_rm_uc_addr_common(struct hclge_vport *vport,
@@ -7392,6 +7499,8 @@ int hclge_rm_uc_addr_common(struct hclge_vport *vport,
 	ret = hclge_remove_mac_vlan_tbl(vport, &req);
 	if (!ret)
 		hclge_update_umv_space(vport, true);
+	else if (ret == -ENOENT)
+		ret = 0;
 
 	return ret;
 }
@@ -7401,7 +7510,8 @@ static int hclge_add_mc_addr(struct hnae3_handle *handle,
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 
-	return hclge_add_mc_addr_common(vport, addr);
+	return hclge_update_mac_list(vport, HCLGE_MAC_TO_ADD, HCLGE_MAC_ADDR_MC,
+				     addr);
 }
 
 int hclge_add_mc_addr_common(struct hclge_vport *vport,
@@ -7444,7 +7554,8 @@ static int hclge_rm_mc_addr(struct hnae3_handle *handle,
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
 
-	return hclge_rm_mc_addr_common(vport, addr);
+	return hclge_update_mac_list(vport, HCLGE_MAC_TO_DEL, HCLGE_MAC_ADDR_MC,
+				     addr);
 }
 
 int hclge_rm_mc_addr_common(struct hclge_vport *vport,
@@ -7479,111 +7590,328 @@ int hclge_rm_mc_addr_common(struct hclge_vport *vport,
 			/* Not all the vfid is zero, update the vfid */
 			status = hclge_add_mac_vlan_tbl(vport, &req, desc);
 
-	} else {
-		/* Maybe this mac address is in mta table, but it cannot be
-		 * deleted here because an entry of mta represents an address
-		 * range rather than a specific address. the delete action to
-		 * all entries will take effect in update_mta_status called by
-		 * hns3_nic_set_rx_mode.
-		 */
+	} else if (status == -ENOENT) {
 		status = 0;
 	}
 
 	return status;
 }
 
-void hclge_add_vport_mac_table(struct hclge_vport *vport, const u8 *mac_addr,
-			       enum HCLGE_MAC_ADDR_TYPE mac_type)
+static void hclge_sync_vport_mac_list(struct hclge_vport *vport,
+				      struct list_head *list,
+				      int (*sync)(struct hclge_vport *,
+						  const unsigned char *))
 {
-	struct hclge_vport_mac_addr_cfg *mac_cfg;
-	struct list_head *list;
+	struct hclge_mac_node *mac_node, *tmp;
+	int ret;
 
-	if (!vport->vport_id)
-		return;
+	list_for_each_entry_safe(mac_node, tmp, list, node) {
+		ret = sync(vport, mac_node->mac_addr);
+		if (!ret) {
+			mac_node->state = HCLGE_MAC_ACTIVE;
+		} else {
+			set_bit(HCLGE_VPORT_STATE_MAC_TBL_CHANGE,
+				&vport->state);
+			break;
+		}
+	}
+}
 
-	mac_cfg = kzalloc(sizeof(*mac_cfg), GFP_KERNEL);
-	if (!mac_cfg)
-		return;
+static void hclge_unsync_vport_mac_list(struct hclge_vport *vport,
+					struct list_head *list,
+					int (*unsync)(struct hclge_vport *,
+						      const unsigned char *))
+{
+	struct hclge_mac_node *mac_node, *tmp;
+	int ret;
 
-	mac_cfg->hd_tbl_status = true;
-	memcpy(mac_cfg->mac_addr, mac_addr, ETH_ALEN);
+	list_for_each_entry_safe(mac_node, tmp, list, node) {
+		ret = unsync(vport, mac_node->mac_addr);
+		if (!ret || ret == -ENOENT) {
+			list_del(&mac_node->node);
+			kfree(mac_node);
+		} else {
+			set_bit(HCLGE_VPORT_STATE_MAC_TBL_CHANGE,
+				&vport->state);
+			break;
+		}
+	}
+}
 
-	list = (mac_type == HCLGE_MAC_ADDR_UC) ?
-	       &vport->uc_mac_list : &vport->mc_mac_list;
+static void hclge_sync_from_add_list(struct list_head *add_list,
+				     struct list_head *mac_list)
+{
+	struct hclge_mac_node *mac_node, *tmp, *new_node;
 
-	list_add_tail(&mac_cfg->node, list);
+	list_for_each_entry_safe(mac_node, tmp, add_list, node) {
+		/* if the mac address from tmp_add_list is not in the
+		 * uc/mc_mac_list, it means have received a TO_DEL request
+		 * during the time window of adding the mac address into mac
+		 * table. if mac_node state is ACTIVE, then change it to TO_DEL,
+		 * then it will be removed at next time. else it must be TO_ADD,
+		 * this address hasn't been added into mac table,
+		 * so just remove the mac node.
+		 */
+		new_node = hclge_find_mac_node(mac_list, mac_node->mac_addr);
+		if (new_node) {
+			hclge_update_mac_node(new_node, mac_node->state);
+			list_del(&mac_node->node);
+			kfree(mac_node);
+		} else if (mac_node->state == HCLGE_MAC_ACTIVE) {
+			mac_node->state = HCLGE_MAC_TO_DEL;
+			list_del(&mac_node->node);
+			list_add_tail(&mac_node->node, mac_list);
+		} else {
+			list_del(&mac_node->node);
+			kfree(mac_node);
+		}
+	}
 }
 
-void hclge_rm_vport_mac_table(struct hclge_vport *vport, const u8 *mac_addr,
-			      bool is_write_tbl,
-			      enum HCLGE_MAC_ADDR_TYPE mac_type)
+static void hclge_sync_from_del_list(struct list_head *del_list,
+				     struct list_head *mac_list)
 {
-	struct hclge_vport_mac_addr_cfg *mac_cfg, *tmp;
-	struct list_head *list;
-	bool uc_flag, mc_flag;
+	struct hclge_mac_node *mac_node, *tmp, *new_node;
 
-	list = (mac_type == HCLGE_MAC_ADDR_UC) ?
-	       &vport->uc_mac_list : &vport->mc_mac_list;
+	list_for_each_entry_safe(mac_node, tmp, del_list, node) {
+		new_node = hclge_find_mac_node(mac_list, mac_node->mac_addr);
+		if (new_node) {
+			/* If the mac addr exists in the mac list, it means
+			 * received a new TO_ADD request during the time window
+			 * of configuring the mac address. For the mac node
+			 * state is TO_ADD, and the address is already in the
+			 * in the hardware(due to delete fail), so we just need
+			 * to change the mac node state to ACTIVE.
+			 */
+			new_node->state = HCLGE_MAC_ACTIVE;
+			list_del(&mac_node->node);
+			kfree(mac_node);
+		} else {
+			list_del(&mac_node->node);
+			list_add_tail(&mac_node->node, mac_list);
+		}
+	}
+}
 
-	uc_flag = is_write_tbl && mac_type == HCLGE_MAC_ADDR_UC;
-	mc_flag = is_write_tbl && mac_type == HCLGE_MAC_ADDR_MC;
+static void hclge_sync_vport_mac_table(struct hclge_vport *vport,
+				       enum HCLGE_MAC_ADDR_TYPE mac_type)
+{
+	struct hclge_mac_node *mac_node, *tmp, *new_node;
+	struct list_head tmp_add_list, tmp_del_list;
+	struct list_head *list;
 
-	list_for_each_entry_safe(mac_cfg, tmp, list, node) {
-		if (ether_addr_equal(mac_cfg->mac_addr, mac_addr)) {
-			if (uc_flag && mac_cfg->hd_tbl_status)
-				hclge_rm_uc_addr_common(vport, mac_addr);
+	INIT_LIST_HEAD(&tmp_add_list);
+	INIT_LIST_HEAD(&tmp_del_list);
 
-			if (mc_flag && mac_cfg->hd_tbl_status)
-				hclge_rm_mc_addr_common(vport, mac_addr);
+	/* move the mac addr to the tmp_add_list and tmp_del_list, then
+	 * we can add/delete these mac addr outside the spin lock
+	 */
+	list = (mac_type == HCLGE_MAC_ADDR_UC) ?
+		&vport->uc_mac_list : &vport->mc_mac_list;
 
-			list_del(&mac_cfg->node);
-			kfree(mac_cfg);
+	spin_lock_bh(&vport->mac_list_lock);
+
+	list_for_each_entry_safe(mac_node, tmp, list, node) {
+		switch (mac_node->state) {
+		case HCLGE_MAC_TO_DEL:
+			list_del(&mac_node->node);
+			list_add_tail(&mac_node->node, &tmp_del_list);
+			break;
+		case HCLGE_MAC_TO_ADD:
+			new_node = kzalloc(sizeof(*new_node), GFP_ATOMIC);
+			if (!new_node)
+				goto stop_traverse;
+			ether_addr_copy(new_node->mac_addr, mac_node->mac_addr);
+			new_node->state = mac_node->state;
+			list_add_tail(&new_node->node, &tmp_add_list);
+			break;
+		default:
 			break;
 		}
 	}
+
+stop_traverse:
+	spin_unlock_bh(&vport->mac_list_lock);
+
+	/* delete first, in order to get max mac table space for adding */
+	if (mac_type == HCLGE_MAC_ADDR_UC) {
+		hclge_unsync_vport_mac_list(vport, &tmp_del_list,
+					    hclge_rm_uc_addr_common);
+		hclge_sync_vport_mac_list(vport, &tmp_add_list,
+					  hclge_add_uc_addr_common);
+	} else {
+		hclge_unsync_vport_mac_list(vport, &tmp_del_list,
+					    hclge_rm_mc_addr_common);
+		hclge_sync_vport_mac_list(vport, &tmp_add_list,
+					  hclge_add_mc_addr_common);
+	}
+
+	/* if some mac addresses were added/deleted fail, move back to the
+	 * mac_list, and retry at next time.
+	 */
+	spin_lock_bh(&vport->mac_list_lock);
+
+	hclge_sync_from_del_list(&tmp_del_list, list);
+	hclge_sync_from_add_list(&tmp_add_list, list);
+
+	spin_unlock_bh(&vport->mac_list_lock);
+}
+
+static bool hclge_need_sync_mac_table(struct hclge_vport *vport)
+{
+	struct hclge_dev *hdev = vport->back;
+
+	if (test_bit(vport->vport_id, hdev->vport_config_block))
+		return false;
+
+	if (test_and_clear_bit(HCLGE_VPORT_STATE_MAC_TBL_CHANGE, &vport->state))
+		return true;
+
+	return false;
+}
+
+static void hclge_sync_mac_table(struct hclge_dev *hdev)
+{
+	int i;
+
+	for (i = 0; i < hdev->num_alloc_vport; i++) {
+		struct hclge_vport *vport = &hdev->vport[i];
+
+		if (!hclge_need_sync_mac_table(vport))
+			continue;
+
+		hclge_sync_vport_mac_table(vport, HCLGE_MAC_ADDR_UC);
+		hclge_sync_vport_mac_table(vport, HCLGE_MAC_ADDR_MC);
+	}
 }
 
 void hclge_rm_vport_all_mac_table(struct hclge_vport *vport, bool is_del_list,
 				  enum HCLGE_MAC_ADDR_TYPE mac_type)
 {
-	struct hclge_vport_mac_addr_cfg *mac_cfg, *tmp;
-	struct list_head *list;
+	int (*unsync)(struct hclge_vport *vport, const unsigned char *addr);
+	struct hclge_mac_node *mac_cfg, *tmp;
+	struct hclge_dev *hdev = vport->back;
+	struct list_head tmp_del_list, *list;
+	int ret;
 
-	list = (mac_type == HCLGE_MAC_ADDR_UC) ?
-	       &vport->uc_mac_list : &vport->mc_mac_list;
+	if (mac_type == HCLGE_MAC_ADDR_UC) {
+		list = &vport->uc_mac_list;
+		unsync = hclge_rm_uc_addr_common;
+	} else {
+		list = &vport->mc_mac_list;
+		unsync = hclge_rm_mc_addr_common;
+	}
 
-	list_for_each_entry_safe(mac_cfg, tmp, list, node) {
-		if (mac_type == HCLGE_MAC_ADDR_UC && mac_cfg->hd_tbl_status)
-			hclge_rm_uc_addr_common(vport, mac_cfg->mac_addr);
+	INIT_LIST_HEAD(&tmp_del_list);
 
-		if (mac_type == HCLGE_MAC_ADDR_MC && mac_cfg->hd_tbl_status)
-			hclge_rm_mc_addr_common(vport, mac_cfg->mac_addr);
+	if (!is_del_list)
+		set_bit(vport->vport_id, hdev->vport_config_block);
 
-		mac_cfg->hd_tbl_status = false;
-		if (is_del_list) {
+	spin_lock_bh(&vport->mac_list_lock);
+
+	list_for_each_entry_safe(mac_cfg, tmp, list, node) {
+		switch (mac_cfg->state) {
+		case HCLGE_MAC_TO_DEL:
+		case HCLGE_MAC_ACTIVE:
 			list_del(&mac_cfg->node);
-			kfree(mac_cfg);
+			list_add_tail(&mac_cfg->node, &tmp_del_list);
+			break;
+		case HCLGE_MAC_TO_ADD:
+			if (is_del_list) {
+				list_del(&mac_cfg->node);
+				kfree(mac_cfg);
+			}
+			break;
 		}
 	}
+
+	spin_unlock_bh(&vport->mac_list_lock);
+
+	list_for_each_entry_safe(mac_cfg, tmp, &tmp_del_list, node) {
+		ret = unsync(vport, mac_cfg->mac_addr);
+		if (!ret || ret == -ENOENT) {
+			/* clear all mac addr from hardware, but remain these
+			 * mac addr in the mac list, and restore them after
+			 * vf reset finished.
+			 */
+			if (!is_del_list &&
+			    mac_cfg->state == HCLGE_MAC_ACTIVE) {
+				mac_cfg->state = HCLGE_MAC_TO_ADD;
+			} else {
+				list_del(&mac_cfg->node);
+				kfree(mac_cfg);
+			}
+		} else if (is_del_list) {
+			mac_cfg->state = HCLGE_MAC_TO_DEL;
+		}
+	}
+
+	spin_lock_bh(&vport->mac_list_lock);
+
+	hclge_sync_from_del_list(&tmp_del_list, list);
+
+	spin_unlock_bh(&vport->mac_list_lock);
+}
+
+/* remove all mac address when uninitailize */
+static void hclge_uninit_vport_mac_list(struct hclge_vport *vport,
+					enum HCLGE_MAC_ADDR_TYPE mac_type)
+{
+	struct hclge_mac_node *mac_node, *tmp;
+	struct hclge_dev *hdev = vport->back;
+	struct list_head tmp_del_list, *list;
+
+	INIT_LIST_HEAD(&tmp_del_list);
+
+	list = (mac_type == HCLGE_MAC_ADDR_UC) ?
+		&vport->uc_mac_list : &vport->mc_mac_list;
+
+	spin_lock_bh(&vport->mac_list_lock);
+
+	list_for_each_entry_safe(mac_node, tmp, list, node) {
+		switch (mac_node->state) {
+		case HCLGE_MAC_TO_DEL:
+		case HCLGE_MAC_ACTIVE:
+			list_del(&mac_node->node);
+			list_add_tail(&mac_node->node, &tmp_del_list);
+			break;
+		case HCLGE_MAC_TO_ADD:
+			list_del(&mac_node->node);
+			kfree(mac_node);
+			break;
+		}
+	}
+
+	spin_unlock_bh(&vport->mac_list_lock);
+
+	if (mac_type == HCLGE_MAC_ADDR_UC)
+		hclge_unsync_vport_mac_list(vport, &tmp_del_list,
+					    hclge_rm_uc_addr_common);
+	else
+		hclge_unsync_vport_mac_list(vport, &tmp_del_list,
+					    hclge_rm_mc_addr_common);
+
+	if (!list_empty(&tmp_del_list))
+		dev_warn(&hdev->pdev->dev,
+			 "uninit %s mac list for vport %u not completely.\n",
+			 mac_type == HCLGE_MAC_ADDR_UC ? "uc" : "mc",
+			 vport->vport_id);
+
+	list_for_each_entry_safe(mac_node, tmp, &tmp_del_list, node) {
+		list_del(&mac_node->node);
+		kfree(mac_node);
+	}
 }
 
-void hclge_uninit_vport_mac_table(struct hclge_dev *hdev)
+static void hclge_uninit_mac_table(struct hclge_dev *hdev)
 {
-	struct hclge_vport_mac_addr_cfg *mac, *tmp;
 	struct hclge_vport *vport;
 	int i;
 
 	for (i = 0; i < hdev->num_alloc_vport; i++) {
 		vport = &hdev->vport[i];
-		list_for_each_entry_safe(mac, tmp, &vport->uc_mac_list, node) {
-			list_del(&mac->node);
-			kfree(mac);
-		}
-
-		list_for_each_entry_safe(mac, tmp, &vport->mc_mac_list, node) {
-			list_del(&mac->node);
-			kfree(mac);
-		}
+		hclge_uninit_vport_mac_list(vport, HCLGE_MAC_ADDR_UC);
+		hclge_uninit_vport_mac_list(vport, HCLGE_MAC_ADDR_MC);
 	}
 }
 
@@ -7747,12 +8075,57 @@ static void hclge_get_mac_addr(struct hnae3_handle *handle, u8 *p)
 	ether_addr_copy(p, hdev->hw.mac.mac_addr);
 }
 
+int hclge_update_mac_node_for_dev_addr(struct hclge_vport *vport,
+				       const u8 *old_addr, const u8 *new_addr)
+{
+	struct list_head *list = &vport->uc_mac_list;
+	struct hclge_mac_node *old_node, *new_node;
+
+	new_node = hclge_find_mac_node(list, new_addr);
+	if (!new_node) {
+		new_node = kzalloc(sizeof(*new_node), GFP_ATOMIC);
+		if (!new_node)
+			return -ENOMEM;
+
+		new_node->state = HCLGE_MAC_TO_ADD;
+		ether_addr_copy(new_node->mac_addr, new_addr);
+		list_add(&new_node->node, list);
+	} else {
+		if (new_node->state == HCLGE_MAC_TO_DEL)
+			new_node->state = HCLGE_MAC_ACTIVE;
+
+		/* make sure the new addr is in the list head, avoid dev
+		 * addr may be not re-added into mac table for the umv space
+		 * limitation after global/imp reset which will clear mac
+		 * table by hardware.
+		 */
+		list_move(&new_node->node, list);
+	}
+
+	if (old_addr && !ether_addr_equal(old_addr, new_addr)) {
+		old_node = hclge_find_mac_node(list, old_addr);
+		if (old_node) {
+			if (old_node->state == HCLGE_MAC_TO_ADD) {
+				list_del(&old_node->node);
+				kfree(old_node);
+			} else {
+				old_node->state = HCLGE_MAC_TO_DEL;
+			}
+		}
+	}
+
+	set_bit(HCLGE_VPORT_STATE_MAC_TBL_CHANGE, &vport->state);
+
+	return 0;
+}
+
 static int hclge_set_mac_addr(struct hnae3_handle *handle, void *p,
 			      bool is_first)
 {
 	const unsigned char *new_addr = (const unsigned char *)p;
 	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_dev *hdev = vport->back;
+	unsigned char *old_addr = NULL;
 	int ret;
 
 	/* mac addr check */
@@ -7760,39 +8133,42 @@ static int hclge_set_mac_addr(struct hnae3_handle *handle, void *p,
 	    is_broadcast_ether_addr(new_addr) ||
 	    is_multicast_ether_addr(new_addr)) {
 		dev_err(&hdev->pdev->dev,
-			"Change uc mac err! invalid mac:%pM.\n",
+			"change uc mac err! invalid mac: %pM.\n",
 			 new_addr);
 		return -EINVAL;
 	}
 
-	if ((!is_first || is_kdump_kernel()) &&
-	    hclge_rm_uc_addr(handle, hdev->hw.mac.mac_addr))
-		dev_warn(&hdev->pdev->dev,
-			 "remove old uc mac address fail.\n");
-
-	ret = hclge_add_uc_addr(handle, new_addr);
+	ret = hclge_pause_addr_cfg(hdev, new_addr);
 	if (ret) {
 		dev_err(&hdev->pdev->dev,
-			"add uc mac address fail, ret =%d.\n",
+			"failed to configure mac pause address, ret = %d\n",
 			ret);
-
-		if (!is_first &&
-		    hclge_add_uc_addr(handle, hdev->hw.mac.mac_addr))
-			dev_err(&hdev->pdev->dev,
-				"restore uc mac address fail.\n");
-
-		return -EIO;
+		return ret;
 	}
 
-	ret = hclge_pause_addr_cfg(hdev, new_addr);
+	if (!is_first)
+		old_addr = hdev->hw.mac.mac_addr;
+
+	spin_lock_bh(&vport->mac_list_lock);
+	ret = hclge_update_mac_node_for_dev_addr(vport, old_addr, new_addr);
 	if (ret) {
 		dev_err(&hdev->pdev->dev,
-			"configure mac pause address fail, ret =%d.\n",
-			ret);
-		return -EIO;
-	}
+			"failed to change the mac addr:%pM, ret = %d\n",
+			new_addr, ret);
+		spin_unlock_bh(&vport->mac_list_lock);
+
+		if (!is_first)
+			hclge_pause_addr_cfg(hdev, old_addr);
 
+		return ret;
+	}
+	/* we must update dev addr with spin lock protect, preventing dev addr
+	 * being removed by set_rx_mode path.
+	 */
 	ether_addr_copy(hdev->hw.mac.mac_addr, new_addr);
+	spin_unlock_bh(&vport->mac_list_lock);
+
+	hclge_task_schedule(hdev, 0);
 
 	return 0;
 }
@@ -8408,6 +8784,37 @@ static void hclge_restore_vlan_table(struct hnae3_handle *handle)
 	}
 }
 
+/* For global reset and imp reset, hardware will clear the mac table,
+ * so we change the mac address state from ACTIVE to TO_ADD, then they
+ * can be restored in the service task after reset complete. Furtherly,
+ * the mac addresses with state TO_DEL or DEL_FAIL are unnecessary to
+ * be restored after reset, so just remove these mac nodes from mac_list.
+ */
+static void hclge_mac_node_convert_for_reset(struct list_head *list)
+{
+	struct hclge_mac_node *mac_node, *tmp;
+
+	list_for_each_entry_safe(mac_node, tmp, list, node) {
+		if (mac_node->state == HCLGE_MAC_ACTIVE) {
+			mac_node->state = HCLGE_MAC_TO_ADD;
+		} else if (mac_node->state == HCLGE_MAC_TO_DEL) {
+			list_del(&mac_node->node);
+			kfree(mac_node);
+		}
+	}
+}
+
+void hclge_restore_mac_table_common(struct hclge_vport *vport)
+{
+	spin_lock_bh(&vport->mac_list_lock);
+
+	hclge_mac_node_convert_for_reset(&vport->uc_mac_list);
+	hclge_mac_node_convert_for_reset(&vport->mc_mac_list);
+	set_bit(HCLGE_VPORT_STATE_MAC_TBL_CHANGE, &vport->state);
+
+	spin_unlock_bh(&vport->mac_list_lock);
+}
+
 int hclge_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
@@ -9899,6 +10306,15 @@ static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev)
 	set_bit(HCLGE_STATE_DOWN, &hdev->state);
 
 	hclge_stats_clear(hdev);
+	/* NOTE: pf reset needn't to clear or restore pf and vf table entry.
+	 * so here should not clean table in memory.
+	 */
+	if (hdev->reset_type == HNAE3_IMP_RESET ||
+	    hdev->reset_type == HNAE3_GLOBAL_RESET) {
+		bitmap_set(hdev->vport_config_block, 0, hdev->num_alloc_vport);
+		hclge_reset_umv_space(hdev);
+	}
+
 	memset(hdev->vlan_table, 0, sizeof(hdev->vlan_table));
 	memset(hdev->vf_vlan_full, 0, sizeof(hdev->vf_vlan_full));
 
@@ -9914,8 +10330,6 @@ static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev)
 		return ret;
 	}
 
-	hclge_reset_umv_space(hdev);
-
 	ret = hclge_mac_init(hdev);
 	if (ret) {
 		dev_err(&pdev->dev, "Mac init error, ret = %d\n", ret);
@@ -10011,6 +10425,7 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev)
 	hclge_clear_vf_vlan(hdev);
 	hclge_misc_affinity_teardown(hdev);
 	hclge_state_uninit(hdev);
+	hclge_uninit_mac_table(hdev);
 
 	if (mac->phydev)
 		mdiobus_unregister(mac->mdio_bus);
@@ -10028,7 +10443,6 @@ static void hclge_uninit_ae_dev(struct hnae3_ae_dev *ae_dev)
 	hclge_misc_irq_uninit(hdev);
 	hclge_pci_uninit(hdev);
 	mutex_destroy(&hdev->vport_lock);
-	hclge_uninit_vport_mac_table(hdev);
 	hclge_uninit_vport_vlan_table(hdev);
 	ae_dev->priv = NULL;
 }
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index a58c26200ea0..5fcbc3d23f21 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -630,9 +630,15 @@ struct hclge_fd_ad_data {
 	u16 rule_id;
 };
 
-struct hclge_vport_mac_addr_cfg {
+enum HCLGE_MAC_NODE_STATE {
+	HCLGE_MAC_TO_ADD,
+	HCLGE_MAC_TO_DEL,
+	HCLGE_MAC_ACTIVE
+};
+
+struct hclge_mac_node {
 	struct list_head node;
-	int hd_tbl_status;
+	enum HCLGE_MAC_NODE_STATE state;
 	u8 mac_addr[ETH_ALEN];
 };
 
@@ -805,6 +811,8 @@ struct hclge_dev {
 	unsigned long vlan_table[VLAN_N_VID][BITS_TO_LONGS(HCLGE_VPORT_NUM)];
 	unsigned long vf_vlan_full[BITS_TO_LONGS(HCLGE_VPORT_NUM)];
 
+	unsigned long vport_config_block[BITS_TO_LONGS(HCLGE_VPORT_NUM)];
+
 	struct hclge_fd_cfg fd_cfg;
 	struct hlist_head fd_rule_list;
 	spinlock_t fd_rule_lock; /* protect fd_rule_list and fd_bmap */
@@ -866,6 +874,7 @@ struct hclge_rss_tuple_cfg {
 
 enum HCLGE_VPORT_STATE {
 	HCLGE_VPORT_STATE_ALIVE,
+	HCLGE_VPORT_STATE_MAC_TBL_CHANGE,
 	HCLGE_VPORT_STATE_MAX
 };
 
@@ -922,6 +931,7 @@ struct hclge_vport {
 	u32 mps; /* Max packet size */
 	struct hclge_vf_info vf_info;
 
+	spinlock_t mac_list_lock; /* protect mac address need to add/detele */
 	struct list_head uc_mac_list;   /* Store VF unicast table */
 	struct list_head mc_mac_list;   /* Store VF multicast table */
 	struct list_head vlan_list;     /* Store VF vlan table */
@@ -977,16 +987,17 @@ int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf);
 u16 hclge_covert_handle_qid_global(struct hnae3_handle *handle, u16 queue_id);
 int hclge_notify_client(struct hclge_dev *hdev,
 			enum hnae3_reset_notify_type type);
-void hclge_add_vport_mac_table(struct hclge_vport *vport, const u8 *mac_addr,
-			       enum HCLGE_MAC_ADDR_TYPE mac_type);
-void hclge_rm_vport_mac_table(struct hclge_vport *vport, const u8 *mac_addr,
-			      bool is_write_tbl,
-			      enum HCLGE_MAC_ADDR_TYPE mac_type);
+int hclge_update_mac_list(struct hclge_vport *vport,
+			  enum HCLGE_MAC_NODE_STATE state,
+			  enum HCLGE_MAC_ADDR_TYPE mac_type,
+			  const unsigned char *addr);
+int hclge_update_mac_node_for_dev_addr(struct hclge_vport *vport,
+				       const u8 *old_addr, const u8 *new_addr);
 void hclge_rm_vport_all_mac_table(struct hclge_vport *vport, bool is_del_list,
 				  enum HCLGE_MAC_ADDR_TYPE mac_type);
-void hclge_uninit_vport_mac_table(struct hclge_dev *hdev);
 void hclge_rm_vport_all_vlan_table(struct hclge_vport *vport, bool is_del_list);
 void hclge_uninit_vport_vlan_table(struct hclge_dev *hdev);
+void hclge_restore_mac_table_common(struct hclge_vport *vport);
 int hclge_update_port_base_vlan_cfg(struct hclge_vport *vport, u16 state,
 				    struct hclge_vlan_info *vlan_info);
 int hclge_push_vf_port_base_vlan_info(struct hclge_vport *vport, u8 vfid,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index 103c2ec777b0..0efc04562ba6 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -275,26 +275,17 @@ static int hclge_set_vf_uc_mac_addr(struct hclge_vport *vport,
 		if (!is_valid_ether_addr(mac_addr))
 			return -EINVAL;
 
-		hclge_rm_uc_addr_common(vport, old_addr);
-		status = hclge_add_uc_addr_common(vport, mac_addr);
-		if (status) {
-			hclge_add_uc_addr_common(vport, old_addr);
-		} else {
-			hclge_rm_vport_mac_table(vport, mac_addr,
-						 false, HCLGE_MAC_ADDR_UC);
-			hclge_add_vport_mac_table(vport, mac_addr,
-						  HCLGE_MAC_ADDR_UC);
-		}
+		spin_lock_bh(&vport->mac_list_lock);
+		status = hclge_update_mac_node_for_dev_addr(vport, old_addr,
+							    mac_addr);
+		spin_unlock_bh(&vport->mac_list_lock);
+		hclge_task_schedule(hdev, 0);
 	} else if (mbx_req->msg.subcode == HCLGE_MBX_MAC_VLAN_UC_ADD) {
-		status = hclge_add_uc_addr_common(vport, mac_addr);
-		if (!status)
-			hclge_add_vport_mac_table(vport, mac_addr,
-						  HCLGE_MAC_ADDR_UC);
+		status = hclge_update_mac_list(vport, HCLGE_MAC_TO_ADD,
+					       HCLGE_MAC_ADDR_UC, mac_addr);
 	} else if (mbx_req->msg.subcode == HCLGE_MBX_MAC_VLAN_UC_REMOVE) {
-		status = hclge_rm_uc_addr_common(vport, mac_addr);
-		if (!status)
-			hclge_rm_vport_mac_table(vport, mac_addr,
-						 false, HCLGE_MAC_ADDR_UC);
+		status = hclge_update_mac_list(vport, HCLGE_MAC_TO_DEL,
+					       HCLGE_MAC_ADDR_UC, mac_addr);
 	} else {
 		dev_err(&hdev->pdev->dev,
 			"failed to set unicast mac addr, unknown subcode %u\n",
@@ -310,18 +301,13 @@ static int hclge_set_vf_mc_mac_addr(struct hclge_vport *vport,
 {
 	const u8 *mac_addr = (const u8 *)(mbx_req->msg.data);
 	struct hclge_dev *hdev = vport->back;
-	int status;
 
 	if (mbx_req->msg.subcode == HCLGE_MBX_MAC_VLAN_MC_ADD) {
-		status = hclge_add_mc_addr_common(vport, mac_addr);
-		if (!status)
-			hclge_add_vport_mac_table(vport, mac_addr,
-						  HCLGE_MAC_ADDR_MC);
+		hclge_update_mac_list(vport, HCLGE_MAC_TO_ADD,
+				      HCLGE_MAC_ADDR_MC, mac_addr);
 	} else if (mbx_req->msg.subcode == HCLGE_MBX_MAC_VLAN_MC_REMOVE) {
-		status = hclge_rm_mc_addr_common(vport, mac_addr);
-		if (!status)
-			hclge_rm_vport_mac_table(vport, mac_addr,
-						 false, HCLGE_MAC_ADDR_MC);
+		hclge_update_mac_list(vport, HCLGE_MAC_TO_DEL,
+				      HCLGE_MAC_ADDR_MC, mac_addr);
 	} else {
 		dev_err(&hdev->pdev->dev,
 			"failed to set mcast mac addr, unknown subcode %u\n",
@@ -329,7 +315,7 @@ static int hclge_set_vf_mc_mac_addr(struct hclge_vport *vport,
 		return -EIO;
 	}
 
-	return status;
+	return 0;
 }
 
 int hclge_push_vf_port_base_vlan_info(struct hclge_vport *vport, u8 vfid,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index e02d427131ee..05d485a48706 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -1245,10 +1245,12 @@ static int hclgevf_set_mac_addr(struct hnae3_handle *handle, void *p,
 	int status;
 
 	hclgevf_build_send_msg(&send_msg, HCLGE_MBX_SET_UNICAST, 0);
-	send_msg.subcode = is_first ? HCLGE_MBX_MAC_VLAN_UC_ADD :
-			HCLGE_MBX_MAC_VLAN_UC_MODIFY;
+	send_msg.subcode = HCLGE_MBX_MAC_VLAN_UC_MODIFY;
 	ether_addr_copy(send_msg.data, new_mac_addr);
-	ether_addr_copy(&send_msg.data[ETH_ALEN], old_mac_addr);
+	if (is_first && !hdev->has_pf_mac)
+		eth_zero_addr(&send_msg.data[ETH_ALEN]);
+	else
+		ether_addr_copy(&send_msg.data[ETH_ALEN], old_mac_addr);
 	status = hclgevf_send_mbx_msg(hdev, &send_msg, true, NULL, 0);
 	if (!status)
 		ether_addr_copy(hdev->hw.mac.mac_addr, new_mac_addr);
@@ -1256,54 +1258,302 @@ static int hclgevf_set_mac_addr(struct hnae3_handle *handle, void *p,
 	return status;
 }
 
-static int hclgevf_add_uc_addr(struct hnae3_handle *handle,
-			       const unsigned char *addr)
+static struct hclgevf_mac_addr_node *
+hclgevf_find_mac_node(struct list_head *list, const u8 *mac_addr)
+{
+	struct hclgevf_mac_addr_node *mac_node, *tmp;
+
+	list_for_each_entry_safe(mac_node, tmp, list, node)
+		if (ether_addr_equal(mac_addr, mac_node->mac_addr))
+			return mac_node;
+
+	return NULL;
+}
+
+static void hclgevf_update_mac_node(struct hclgevf_mac_addr_node *mac_node,
+				    enum HCLGEVF_MAC_NODE_STATE state)
+{
+	switch (state) {
+	/* from set_rx_mode or tmp_add_list */
+	case HCLGEVF_MAC_TO_ADD:
+		if (mac_node->state == HCLGEVF_MAC_TO_DEL)
+			mac_node->state = HCLGEVF_MAC_ACTIVE;
+		break;
+	/* only from set_rx_mode */
+	case HCLGEVF_MAC_TO_DEL:
+		if (mac_node->state == HCLGEVF_MAC_TO_ADD) {
+			list_del(&mac_node->node);
+			kfree(mac_node);
+		} else {
+			mac_node->state = HCLGEVF_MAC_TO_DEL;
+		}
+		break;
+	/* only from tmp_add_list, the mac_node->state won't be
+	 * HCLGEVF_MAC_ACTIVE
+	 */
+	case HCLGEVF_MAC_ACTIVE:
+		if (mac_node->state == HCLGEVF_MAC_TO_ADD)
+			mac_node->state = HCLGEVF_MAC_ACTIVE;
+		break;
+	}
+}
+
+static int hclgevf_update_mac_list(struct hnae3_handle *handle,
+				   enum HCLGEVF_MAC_NODE_STATE state,
+				   enum HCLGEVF_MAC_ADDR_TYPE mac_type,
+				   const unsigned char *addr)
 {
 	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
-	struct hclge_vf_to_pf_msg send_msg;
+	struct hclgevf_mac_addr_node *mac_node;
+	struct list_head *list;
 
-	hclgevf_build_send_msg(&send_msg, HCLGE_MBX_SET_UNICAST,
-			       HCLGE_MBX_MAC_VLAN_UC_ADD);
-	ether_addr_copy(send_msg.data, addr);
-	return hclgevf_send_mbx_msg(hdev, &send_msg, false, NULL, 0);
+	list = (mac_type == HCLGEVF_MAC_ADDR_UC) ?
+	       &hdev->mac_table.uc_mac_list : &hdev->mac_table.mc_mac_list;
+
+	spin_lock_bh(&hdev->mac_table.mac_list_lock);
+
+	/* if the mac addr is already in the mac list, no need to add a new
+	 * one into it, just check the mac addr state, convert it to a new
+	 * new state, or just remove it, or do nothing.
+	 */
+	mac_node = hclgevf_find_mac_node(list, addr);
+	if (mac_node) {
+		hclgevf_update_mac_node(mac_node, state);
+		spin_unlock_bh(&hdev->mac_table.mac_list_lock);
+		return 0;
+	}
+	/* if this address is never added, unnecessary to delete */
+	if (state == HCLGEVF_MAC_TO_DEL) {
+		spin_unlock_bh(&hdev->mac_table.mac_list_lock);
+		return -ENOENT;
+	}
+
+	mac_node = kzalloc(sizeof(*mac_node), GFP_ATOMIC);
+	if (!mac_node) {
+		spin_unlock_bh(&hdev->mac_table.mac_list_lock);
+		return -ENOMEM;
+	}
+
+	mac_node->state = state;
+	ether_addr_copy(mac_node->mac_addr, addr);
+	list_add_tail(&mac_node->node, list);
+
+	spin_unlock_bh(&hdev->mac_table.mac_list_lock);
+	return 0;
+}
+
+static int hclgevf_add_uc_addr(struct hnae3_handle *handle,
+			       const unsigned char *addr)
+{
+	return hclgevf_update_mac_list(handle, HCLGEVF_MAC_TO_ADD,
+				       HCLGEVF_MAC_ADDR_UC, addr);
 }
 
 static int hclgevf_rm_uc_addr(struct hnae3_handle *handle,
 			      const unsigned char *addr)
 {
-	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
-	struct hclge_vf_to_pf_msg send_msg;
-
-	hclgevf_build_send_msg(&send_msg, HCLGE_MBX_SET_UNICAST,
-			       HCLGE_MBX_MAC_VLAN_UC_REMOVE);
-	ether_addr_copy(send_msg.data, addr);
-	return hclgevf_send_mbx_msg(hdev, &send_msg, false, NULL, 0);
+	return hclgevf_update_mac_list(handle, HCLGEVF_MAC_TO_DEL,
+				       HCLGEVF_MAC_ADDR_UC, addr);
 }
 
 static int hclgevf_add_mc_addr(struct hnae3_handle *handle,
 			       const unsigned char *addr)
 {
-	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
-	struct hclge_vf_to_pf_msg send_msg;
-
-	hclgevf_build_send_msg(&send_msg, HCLGE_MBX_SET_MULTICAST,
-			       HCLGE_MBX_MAC_VLAN_MC_ADD);
-	ether_addr_copy(send_msg.data, addr);
-	return hclgevf_send_mbx_msg(hdev, &send_msg, false, NULL, 0);
+	return hclgevf_update_mac_list(handle, HCLGEVF_MAC_TO_ADD,
+				       HCLGEVF_MAC_ADDR_MC, addr);
 }
 
 static int hclgevf_rm_mc_addr(struct hnae3_handle *handle,
 			      const unsigned char *addr)
 {
-	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+	return hclgevf_update_mac_list(handle, HCLGEVF_MAC_TO_DEL,
+				       HCLGEVF_MAC_ADDR_MC, addr);
+}
+
+static int hclgevf_add_del_mac_addr(struct hclgevf_dev *hdev,
+				    struct hclgevf_mac_addr_node *mac_node,
+				    enum HCLGEVF_MAC_ADDR_TYPE mac_type)
+{
 	struct hclge_vf_to_pf_msg send_msg;
+	u8 code, subcode;
 
-	hclgevf_build_send_msg(&send_msg, HCLGE_MBX_SET_MULTICAST,
-			       HCLGE_MBX_MAC_VLAN_MC_REMOVE);
-	ether_addr_copy(send_msg.data, addr);
+	if (mac_type == HCLGEVF_MAC_ADDR_UC) {
+		code = HCLGE_MBX_SET_UNICAST;
+		if (mac_node->state == HCLGEVF_MAC_TO_ADD)
+			subcode = HCLGE_MBX_MAC_VLAN_UC_ADD;
+		else
+			subcode = HCLGE_MBX_MAC_VLAN_UC_REMOVE;
+	} else {
+		code = HCLGE_MBX_SET_MULTICAST;
+		if (mac_node->state == HCLGEVF_MAC_TO_ADD)
+			subcode = HCLGE_MBX_MAC_VLAN_MC_ADD;
+		else
+			subcode = HCLGE_MBX_MAC_VLAN_MC_REMOVE;
+	}
+
+	hclgevf_build_send_msg(&send_msg, code, subcode);
+	ether_addr_copy(send_msg.data, mac_node->mac_addr);
 	return hclgevf_send_mbx_msg(hdev, &send_msg, false, NULL, 0);
 }
 
+static void hclgevf_config_mac_list(struct hclgevf_dev *hdev,
+				    struct list_head *list,
+				    enum HCLGEVF_MAC_ADDR_TYPE mac_type)
+{
+	struct hclgevf_mac_addr_node *mac_node, *tmp;
+	int ret;
+
+	list_for_each_entry_safe(mac_node, tmp, list, node) {
+		ret = hclgevf_add_del_mac_addr(hdev, mac_node, mac_type);
+		if  (ret) {
+			dev_err(&hdev->pdev->dev,
+				"failed to configure mac %pM, state = %d, ret = %d\n",
+				mac_node->mac_addr, mac_node->state, ret);
+			return;
+		}
+		if (mac_node->state == HCLGEVF_MAC_TO_ADD) {
+			mac_node->state = HCLGEVF_MAC_ACTIVE;
+		} else {
+			list_del(&mac_node->node);
+			kfree(mac_node);
+		}
+	}
+}
+
+static void hclgevf_sync_from_add_list(struct list_head *add_list,
+				       struct list_head *mac_list)
+{
+	struct hclgevf_mac_addr_node *mac_node, *tmp, *new_node;
+
+	list_for_each_entry_safe(mac_node, tmp, add_list, node) {
+		/* if the mac address from tmp_add_list is not in the
+		 * uc/mc_mac_list, it means have received a TO_DEL request
+		 * during the time window of sending mac config request to PF
+		 * If mac_node state is ACTIVE, then change its state to TO_DEL,
+		 * then it will be removed at next time. If is TO_ADD, it means
+		 * send TO_ADD request failed, so just remove the mac node.
+		 */
+		new_node = hclgevf_find_mac_node(mac_list, mac_node->mac_addr);
+		if (new_node) {
+			hclgevf_update_mac_node(new_node, mac_node->state);
+			list_del(&mac_node->node);
+			kfree(mac_node);
+		} else if (mac_node->state == HCLGEVF_MAC_ACTIVE) {
+			mac_node->state = HCLGEVF_MAC_TO_DEL;
+			list_del(&mac_node->node);
+			list_add_tail(&mac_node->node, mac_list);
+		} else {
+			list_del(&mac_node->node);
+			kfree(mac_node);
+		}
+	}
+}
+
+static void hclgevf_sync_from_del_list(struct list_head *del_list,
+				       struct list_head *mac_list)
+{
+	struct hclgevf_mac_addr_node *mac_node, *tmp, *new_node;
+
+	list_for_each_entry_safe(mac_node, tmp, del_list, node) {
+		new_node = hclgevf_find_mac_node(mac_list, mac_node->mac_addr);
+		if (new_node) {
+			/* If the mac addr is exist in the mac list, it means
+			 * received a new request TO_ADD during the time window
+			 * of sending mac addr configurrequest to PF, so just
+			 * change the mac state to ACTIVE.
+			 */
+			new_node->state = HCLGEVF_MAC_ACTIVE;
+			list_del(&mac_node->node);
+			kfree(mac_node);
+		} else {
+			list_del(&mac_node->node);
+			list_add_tail(&mac_node->node, mac_list);
+		}
+	}
+}
+
+static void hclgevf_clear_list(struct list_head *list)
+{
+	struct hclgevf_mac_addr_node *mac_node, *tmp;
+
+	list_for_each_entry_safe(mac_node, tmp, list, node) {
+		list_del(&mac_node->node);
+		kfree(mac_node);
+	}
+}
+
+static void hclgevf_sync_mac_list(struct hclgevf_dev *hdev,
+				  enum HCLGEVF_MAC_ADDR_TYPE mac_type)
+{
+	struct hclgevf_mac_addr_node *mac_node, *tmp, *new_node;
+	struct list_head tmp_add_list, tmp_del_list;
+	struct list_head *list;
+
+	INIT_LIST_HEAD(&tmp_add_list);
+	INIT_LIST_HEAD(&tmp_del_list);
+
+	/* move the mac addr to the tmp_add_list and tmp_del_list, then
+	 * we can add/delete these mac addr outside the spin lock
+	 */
+	list = (mac_type == HCLGEVF_MAC_ADDR_UC) ?
+		&hdev->mac_table.uc_mac_list : &hdev->mac_table.mc_mac_list;
+
+	spin_lock_bh(&hdev->mac_table.mac_list_lock);
+
+	list_for_each_entry_safe(mac_node, tmp, list, node) {
+		switch (mac_node->state) {
+		case HCLGEVF_MAC_TO_DEL:
+			list_del(&mac_node->node);
+			list_add_tail(&mac_node->node, &tmp_del_list);
+			break;
+		case HCLGEVF_MAC_TO_ADD:
+			new_node = kzalloc(sizeof(*new_node), GFP_ATOMIC);
+			if (!new_node)
+				goto stop_traverse;
+
+			ether_addr_copy(new_node->mac_addr, mac_node->mac_addr);
+			new_node->state = mac_node->state;
+			list_add_tail(&new_node->node, &tmp_add_list);
+			break;
+		default:
+			break;
+		}
+	}
+
+stop_traverse:
+	spin_unlock_bh(&hdev->mac_table.mac_list_lock);
+
+	/* delete first, in order to get max mac table space for adding */
+	hclgevf_config_mac_list(hdev, &tmp_del_list, mac_type);
+	hclgevf_config_mac_list(hdev, &tmp_add_list, mac_type);
+
+	/* if some mac addresses were added/deleted fail, move back to the
+	 * mac_list, and retry at next time.
+	 */
+	spin_lock_bh(&hdev->mac_table.mac_list_lock);
+
+	hclgevf_sync_from_del_list(&tmp_del_list, list);
+	hclgevf_sync_from_add_list(&tmp_add_list, list);
+
+	spin_unlock_bh(&hdev->mac_table.mac_list_lock);
+}
+
+static void hclgevf_sync_mac_table(struct hclgevf_dev *hdev)
+{
+	hclgevf_sync_mac_list(hdev, HCLGEVF_MAC_ADDR_UC);
+	hclgevf_sync_mac_list(hdev, HCLGEVF_MAC_ADDR_MC);
+}
+
+static void hclgevf_uninit_mac_list(struct hclgevf_dev *hdev)
+{
+	spin_lock_bh(&hdev->mac_table.mac_list_lock);
+
+	hclgevf_clear_list(&hdev->mac_table.uc_mac_list);
+	hclgevf_clear_list(&hdev->mac_table.mc_mac_list);
+
+	spin_unlock_bh(&hdev->mac_table.mac_list_lock);
+}
+
 static int hclgevf_set_vlan_filter(struct hnae3_handle *handle,
 				   __be16 proto, u16 vlan_id,
 				   bool is_kill)
@@ -1951,6 +2201,8 @@ static void hclgevf_periodic_service_task(struct hclgevf_dev *hdev)
 
 	hclgevf_sync_vlan_filter(hdev);
 
+	hclgevf_sync_mac_table(hdev);
+
 	hdev->last_serv_processed = jiffies;
 
 out:
@@ -2313,6 +2565,10 @@ static void hclgevf_state_init(struct hclgevf_dev *hdev)
 	mutex_init(&hdev->mbx_resp.mbx_mutex);
 	sema_init(&hdev->reset_sem, 1);
 
+	spin_lock_init(&hdev->mac_table.mac_list_lock);
+	INIT_LIST_HEAD(&hdev->mac_table.uc_mac_list);
+	INIT_LIST_HEAD(&hdev->mac_table.mc_mac_list);
+
 	/* bring the device down */
 	set_bit(HCLGEVF_STATE_DOWN, &hdev->state);
 }
@@ -2846,6 +3102,7 @@ static void hclgevf_uninit_hdev(struct hclgevf_dev *hdev)
 
 	hclgevf_pci_uninit(hdev);
 	hclgevf_cmd_uninit(hdev);
+	hclgevf_uninit_mac_list(hdev);
 }
 
 static int hclgevf_init_ae_dev(struct hnae3_ae_dev *ae_dev)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
index 3b88d866facc..0222d9b87a42 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
@@ -234,6 +234,29 @@ struct hclgevf_rst_stats {
 	u32 rst_fail_cnt;		/* the number of VF reset fail */
 };
 
+enum HCLGEVF_MAC_ADDR_TYPE {
+	HCLGEVF_MAC_ADDR_UC,
+	HCLGEVF_MAC_ADDR_MC
+};
+
+enum HCLGEVF_MAC_NODE_STATE {
+	HCLGEVF_MAC_TO_ADD,
+	HCLGEVF_MAC_TO_DEL,
+	HCLGEVF_MAC_ACTIVE
+};
+
+struct hclgevf_mac_addr_node {
+	struct list_head node;
+	enum HCLGEVF_MAC_NODE_STATE state;
+	u8 mac_addr[ETH_ALEN];
+};
+
+struct hclgevf_mac_table_cfg {
+	spinlock_t mac_list_lock; /* protect mac address need to add/detele */
+	struct list_head uc_mac_list;
+	struct list_head mc_mac_list;
+};
+
 struct hclgevf_dev {
 	struct pci_dev *pdev;
 	struct hnae3_ae_dev *ae_dev;
@@ -282,6 +305,8 @@ struct hclgevf_dev {
 
 	unsigned long vlan_del_fail_bmap[BITS_TO_LONGS(VLAN_N_VID)];
 
+	struct hclgevf_mac_table_cfg mac_table;
+
 	bool mbx_event_pending;
 	struct hclgevf_mbx_resp_status mbx_resp; /* mailbox response */
 	struct hclgevf_mbx_arq_ring arq; /* mailbox async rx queue */
-- 
cgit v1.2.3-59-g8ed1b


From f671237a4b4521dfde5f96c2b088287712e72f4b Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Fri, 24 Apr 2020 10:23:10 +0800
Subject: net: hns3: add support for dumping UC and MC MAC list

This patch adds support for dumping entries of UC and MC MAC list,
which help checking whether a MAC address being added into hardware
or not.

Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c |  2 +
 .../ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c | 51 ++++++++++++++++++++++
 2 files changed, 53 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
index c934f328c040..fe7fb565da19 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_debugfs.c
@@ -262,6 +262,8 @@ static void hns3_dbg_help(struct hnae3_handle *h)
 	dev_info(&h->pdev->dev, "dump mac tnl status\n");
 	dev_info(&h->pdev->dev, "dump loopback\n");
 	dev_info(&h->pdev->dev, "dump qs shaper [qs id]\n");
+	dev_info(&h->pdev->dev, "dump uc mac list <func id>\n");
+	dev_info(&h->pdev->dev, "dump mc mac list <func id>\n");
 
 	memset(printf_buf, 0, HNS3_DBG_BUF_LEN);
 	strncat(printf_buf, "dump reg [[bios common] [ssu <port_id>]",
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index 66c1ad3a156b..6cfa8253eefc 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -1441,6 +1441,49 @@ static void hclge_dbg_dump_qs_shaper(struct hclge_dev *hdev,
 	hclge_dbg_dump_qs_shaper_single(hdev, qsid);
 }
 
+static int hclge_dbg_dump_mac_list(struct hclge_dev *hdev, const char *cmd_buf,
+				   bool is_unicast)
+{
+	struct hclge_mac_node *mac_node, *tmp;
+	struct hclge_vport *vport;
+	struct list_head *list;
+	u32 func_id;
+	int ret;
+
+	ret = kstrtouint(cmd_buf, 0, &func_id);
+	if (ret < 0) {
+		dev_err(&hdev->pdev->dev,
+			"dump mac list: bad command string, ret = %d\n", ret);
+		return -EINVAL;
+	}
+
+	if (func_id >= hdev->num_alloc_vport) {
+		dev_err(&hdev->pdev->dev,
+			"function id(%u) is out of range(0-%u)\n", func_id,
+			hdev->num_alloc_vport - 1);
+		return -EINVAL;
+	}
+
+	vport = &hdev->vport[func_id];
+
+	list = is_unicast ? &vport->uc_mac_list : &vport->mc_mac_list;
+
+	dev_info(&hdev->pdev->dev, "vport %u %s mac list:\n",
+		 func_id, is_unicast ? "uc" : "mc");
+	dev_info(&hdev->pdev->dev, "mac address              state\n");
+
+	spin_lock_bh(&vport->mac_list_lock);
+
+	list_for_each_entry_safe(mac_node, tmp, list, node) {
+		dev_info(&hdev->pdev->dev, "%pM         %d\n",
+			 mac_node->mac_addr, mac_node->state);
+	}
+
+	spin_unlock_bh(&vport->mac_list_lock);
+
+	return 0;
+}
+
 int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf)
 {
 #define DUMP_REG	"dump reg"
@@ -1485,6 +1528,14 @@ int hclge_dbg_run_cmd(struct hnae3_handle *handle, const char *cmd_buf)
 	} else if (strncmp(cmd_buf, "dump qs shaper", 14) == 0) {
 		hclge_dbg_dump_qs_shaper(hdev,
 					 &cmd_buf[sizeof("dump qs shaper")]);
+	} else if (strncmp(cmd_buf, "dump uc mac list", 16) == 0) {
+		hclge_dbg_dump_mac_list(hdev,
+					&cmd_buf[sizeof("dump uc mac list")],
+					true);
+	} else if (strncmp(cmd_buf, "dump mc mac list", 16) == 0) {
+		hclge_dbg_dump_mac_list(hdev,
+					&cmd_buf[sizeof("dump mc mac list")],
+					false);
 	} else {
 		dev_info(&hdev->pdev->dev, "unknown command\n");
 		return -EINVAL;
-- 
cgit v1.2.3-59-g8ed1b


From c631c696823cdddbf3c683c3b78812ecba31c350 Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Fri, 24 Apr 2020 10:23:11 +0800
Subject: net: hns3: refactor the promisc mode setting

As the HNS3 driver doesn't update the MAC address directly in
function hns3_set_rx_mode() now, it can't know whether the
MAC table is full from __dev_uc_sync() and __dev_mc_sync(),
so it's senseless to handle the overflow promisc here.

This patch removes the handle of overflow promisc from function
hns3_set_rx_mode(), and updates the promisc mode in the service
task.

Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h        |  3 +
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c    | 42 +++-------
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.h    |  1 +
 drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c |  2 +-
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c    | 89 ++++++++++++++++++----
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h    |  4 +
 .../ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c  | 26 +++++++
 .../ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h  |  1 +
 8 files changed, 122 insertions(+), 46 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 5587605d6deb..a56f8d623349 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -270,6 +270,8 @@ struct hnae3_ae_dev {
  *   Set loopback
  * set_promisc_mode
  *   Set promisc mode
+ * request_update_promisc_mode
+ *   request to hclge(vf) to update promisc mode
  * set_mtu()
  *   set mtu
  * get_pauseparam()
@@ -408,6 +410,7 @@ struct hnae3_ae_ops {
 
 	int (*set_promisc_mode)(struct hnae3_handle *handle, bool en_uc_pmc,
 				bool en_mc_pmc);
+	void (*request_update_promisc_mode)(struct hnae3_handle *handle);
 	int (*set_mtu)(struct hnae3_handle *handle, int new_mtu);
 
 	void (*get_pauseparam)(struct hnae3_handle *handle,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 341e8b5cd219..6b9535c17647 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -601,34 +601,25 @@ static void hns3_nic_set_rx_mode(struct net_device *netdev)
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
 	u8 new_flags;
-	int ret;
 
 	new_flags = hns3_get_netdev_flags(netdev);
 
-	ret = __dev_uc_sync(netdev, hns3_nic_uc_sync, hns3_nic_uc_unsync);
-	if (ret) {
-		netdev_err(netdev, "sync uc address fail\n");
-		if (ret == -ENOSPC)
-			new_flags |= HNAE3_OVERFLOW_UPE;
-	}
-
-	if (netdev->flags & IFF_MULTICAST) {
-		ret = __dev_mc_sync(netdev, hns3_nic_mc_sync,
-				    hns3_nic_mc_unsync);
-		if (ret) {
-			netdev_err(netdev, "sync mc address fail\n");
-			if (ret == -ENOSPC)
-				new_flags |= HNAE3_OVERFLOW_MPE;
-		}
-	}
+	__dev_uc_sync(netdev, hns3_nic_uc_sync, hns3_nic_uc_unsync);
+	__dev_mc_sync(netdev, hns3_nic_mc_sync, hns3_nic_mc_unsync);
 
 	/* User mode Promisc mode enable and vlan filtering is disabled to
-	 * let all packets in. MAC-VLAN Table overflow Promisc enabled and
-	 * vlan fitering is enabled
+	 * let all packets in.
 	 */
-	hns3_enable_vlan_filter(netdev, new_flags & HNAE3_VLAN_FLTR);
 	h->netdev_flags = new_flags;
-	hns3_update_promisc_mode(netdev, new_flags);
+	hns3_request_update_promisc_mode(h);
+}
+
+void hns3_request_update_promisc_mode(struct hnae3_handle *handle)
+{
+	const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+
+	if (ops->request_update_promisc_mode)
+		ops->request_update_promisc_mode(handle);
 }
 
 int hns3_update_promisc_mode(struct net_device *netdev, u8 promisc_flags)
@@ -4467,15 +4458,6 @@ err_put_ring:
 static int hns3_reset_notify_restore_enet(struct hnae3_handle *handle)
 {
 	struct net_device *netdev = handle->kinfo.netdev;
-	bool vlan_filter_enable;
-	int ret;
-
-	ret = hns3_update_promisc_mode(netdev, handle->netdev_flags);
-	if (ret)
-		return ret;
-
-	vlan_filter_enable = netdev->flags & IFF_PROMISC ? false : true;
-	hns3_enable_vlan_filter(netdev, vlan_filter_enable);
 
 	if (handle->ae_algo->ops->restore_vlan_table)
 		handle->ae_algo->ops->restore_vlan_table(handle);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index 4b3f0abf0715..53bc0edf9b6f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -658,6 +658,7 @@ void hns3_set_vector_coalesce_rl(struct hns3_enet_tqp_vector *tqp_vector,
 
 void hns3_enable_vlan_filter(struct net_device *netdev, bool enable);
 int hns3_update_promisc_mode(struct net_device *netdev, u8 promisc_flags);
+void hns3_request_update_promisc_mode(struct hnae3_handle *handle);
 
 #ifdef CONFIG_HNS3_DCB
 void hns3_dcbnl_setup(struct hnae3_handle *handle);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index 6a0734be4a1a..4d9c85f049dc 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -99,7 +99,7 @@ static int hns3_lp_setup(struct net_device *ndev, enum hnae3_loop loop, bool en)
 		h->ae_algo->ops->set_promisc_mode(h, true, true);
 	} else {
 		/* recover promisc mode before loopback test */
-		hns3_update_promisc_mode(ndev, h->netdev_flags);
+		hns3_request_update_promisc_mode(h);
 		vlan_filter_enable = ndev->flags & IFF_PROMISC ? false : true;
 		hns3_enable_vlan_filter(ndev, vlan_filter_enable);
 	}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index c3205ae620ce..71ff0fa64f46 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -69,6 +69,7 @@ static enum hnae3_reset_type hclge_get_reset_level(struct hnae3_ae_dev *ae_dev,
 static int hclge_set_default_loopback(struct hclge_dev *hdev);
 
 static void hclge_sync_mac_table(struct hclge_dev *hdev);
+static void hclge_sync_promisc_mode(struct hclge_dev *hdev);
 
 static struct hnae3_ae_algo ae_algo;
 
@@ -3975,6 +3976,7 @@ static void hclge_periodic_service_task(struct hclge_dev *hdev)
 	 */
 	hclge_update_link_status(hdev);
 	hclge_sync_mac_table(hdev);
+	hclge_sync_promisc_mode(hdev);
 
 	if (time_is_after_jiffies(hdev->last_serv_processed + HZ)) {
 		delta = jiffies - hdev->last_serv_processed;
@@ -4724,7 +4726,8 @@ static int hclge_cmd_set_promisc_mode(struct hclge_dev *hdev,
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 	if (ret)
 		dev_err(&hdev->pdev->dev,
-			"Set promisc mode fail, status is %d.\n", ret);
+			"failed to set vport %d promisc mode, ret = %d.\n",
+			param->vf_id, ret);
 
 	return ret;
 }
@@ -4774,6 +4777,14 @@ static int hclge_set_promisc_mode(struct hnae3_handle *handle, bool en_uc_pmc,
 					    en_bc_pmc);
 }
 
+static void hclge_request_update_promisc_mode(struct hnae3_handle *handle)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+
+	set_bit(HCLGE_STATE_PROMISC_CHANGED, &hdev->state);
+}
+
 static int hclge_get_fd_mode(struct hclge_dev *hdev, u8 *fd_mode)
 {
 	struct hclge_get_fd_mode_cmd *req;
@@ -6972,17 +6983,11 @@ static int hclge_get_mac_vlan_cmd_status(struct hclge_vport *vport,
 	}
 
 	if (op == HCLGE_MAC_VLAN_ADD) {
-		if ((!resp_code) || (resp_code == 1)) {
+		if (!resp_code || resp_code == 1)
 			return 0;
-		} else if (resp_code == HCLGE_ADD_UC_OVERFLOW) {
-			dev_err(&hdev->pdev->dev,
-				"add mac addr failed for uc_overflow.\n");
-			return -ENOSPC;
-		} else if (resp_code == HCLGE_ADD_MC_OVERFLOW) {
-			dev_err(&hdev->pdev->dev,
-				"add mac addr failed for mc_overflow.\n");
+		else if (resp_code == HCLGE_ADD_UC_OVERFLOW ||
+			 resp_code == HCLGE_ADD_MC_OVERFLOW)
 			return -ENOSPC;
-		}
 
 		dev_err(&hdev->pdev->dev,
 			"add mac addr failed for undefined, code=%u.\n",
@@ -7448,8 +7453,9 @@ int hclge_add_uc_addr_common(struct hclge_vport *vport,
 			return ret;
 		}
 
-		dev_err(&hdev->pdev->dev, "UC MAC table full(%u)\n",
-			hdev->priv_umv_size);
+		if (!(vport->overflow_promisc_flags & HNAE3_OVERFLOW_UPE))
+			dev_err(&hdev->pdev->dev, "UC MAC table full(%u)\n",
+				hdev->priv_umv_size);
 
 		return -ENOSPC;
 	}
@@ -7543,7 +7549,9 @@ int hclge_add_mc_addr_common(struct hclge_vport *vport,
 		return status;
 	status = hclge_add_mac_vlan_tbl(vport, &req, desc);
 
-	if (status == -ENOSPC)
+	/* if already overflow, not to print each time */
+	if (status == -ENOSPC &&
+	    !(vport->overflow_promisc_flags & HNAE3_OVERFLOW_MPE))
 		dev_err(&hdev->pdev->dev, "mc mac vlan table is full\n");
 
 	return status;
@@ -7638,12 +7646,16 @@ static void hclge_unsync_vport_mac_list(struct hclge_vport *vport,
 	}
 }
 
-static void hclge_sync_from_add_list(struct list_head *add_list,
+static bool hclge_sync_from_add_list(struct list_head *add_list,
 				     struct list_head *mac_list)
 {
 	struct hclge_mac_node *mac_node, *tmp, *new_node;
+	bool all_added = true;
 
 	list_for_each_entry_safe(mac_node, tmp, add_list, node) {
+		if (mac_node->state == HCLGE_MAC_TO_ADD)
+			all_added = false;
+
 		/* if the mac address from tmp_add_list is not in the
 		 * uc/mc_mac_list, it means have received a TO_DEL request
 		 * during the time window of adding the mac address into mac
@@ -7666,6 +7678,8 @@ static void hclge_sync_from_add_list(struct list_head *add_list,
 			kfree(mac_node);
 		}
 	}
+
+	return all_added;
 }
 
 static void hclge_sync_from_del_list(struct list_head *del_list,
@@ -7693,12 +7707,30 @@ static void hclge_sync_from_del_list(struct list_head *del_list,
 	}
 }
 
+static void hclge_update_overflow_flags(struct hclge_vport *vport,
+					enum HCLGE_MAC_ADDR_TYPE mac_type,
+					bool is_all_added)
+{
+	if (mac_type == HCLGE_MAC_ADDR_UC) {
+		if (is_all_added)
+			vport->overflow_promisc_flags &= ~HNAE3_OVERFLOW_UPE;
+		else
+			vport->overflow_promisc_flags |= HNAE3_OVERFLOW_UPE;
+	} else {
+		if (is_all_added)
+			vport->overflow_promisc_flags &= ~HNAE3_OVERFLOW_MPE;
+		else
+			vport->overflow_promisc_flags |= HNAE3_OVERFLOW_MPE;
+	}
+}
+
 static void hclge_sync_vport_mac_table(struct hclge_vport *vport,
 				       enum HCLGE_MAC_ADDR_TYPE mac_type)
 {
 	struct hclge_mac_node *mac_node, *tmp, *new_node;
 	struct list_head tmp_add_list, tmp_del_list;
 	struct list_head *list;
+	bool all_added;
 
 	INIT_LIST_HEAD(&tmp_add_list);
 	INIT_LIST_HEAD(&tmp_del_list);
@@ -7752,9 +7784,11 @@ stop_traverse:
 	spin_lock_bh(&vport->mac_list_lock);
 
 	hclge_sync_from_del_list(&tmp_del_list, list);
-	hclge_sync_from_add_list(&tmp_add_list, list);
+	all_added = hclge_sync_from_add_list(&tmp_add_list, list);
 
 	spin_unlock_bh(&vport->mac_list_lock);
+
+	hclge_update_overflow_flags(vport, mac_type, all_added);
 }
 
 static bool hclge_need_sync_mac_table(struct hclge_vport *vport)
@@ -11052,6 +11086,30 @@ static int hclge_gro_en(struct hnae3_handle *handle, bool enable)
 	return hclge_config_gro(hdev, enable);
 }
 
+static void hclge_sync_promisc_mode(struct hclge_dev *hdev)
+{
+	struct hclge_vport *vport = &hdev->vport[0];
+	struct hnae3_handle *handle = &vport->nic;
+	u8 tmp_flags = 0;
+	int ret;
+
+	if (vport->last_promisc_flags != vport->overflow_promisc_flags) {
+		set_bit(HCLGE_STATE_PROMISC_CHANGED, &hdev->state);
+		vport->last_promisc_flags = vport->overflow_promisc_flags;
+	}
+
+	if (test_bit(HCLGE_STATE_PROMISC_CHANGED, &hdev->state)) {
+		tmp_flags = handle->netdev_flags | vport->last_promisc_flags;
+		ret = hclge_set_promisc_mode(handle, tmp_flags & HNAE3_UPE,
+					     tmp_flags & HNAE3_MPE);
+		if (!ret) {
+			clear_bit(HCLGE_STATE_PROMISC_CHANGED, &hdev->state);
+			hclge_enable_vlan_filter(handle,
+						 tmp_flags & HNAE3_VLAN_FLTR);
+		}
+	}
+}
+
 static const struct hnae3_ae_ops hclge_ops = {
 	.init_ae_dev = hclge_init_ae_dev,
 	.uninit_ae_dev = hclge_uninit_ae_dev,
@@ -11064,6 +11122,7 @@ static const struct hnae3_ae_ops hclge_ops = {
 	.get_vector = hclge_get_vector,
 	.put_vector = hclge_put_vector,
 	.set_promisc_mode = hclge_set_promisc_mode,
+	.request_update_promisc_mode = hclge_request_update_promisc_mode,
 	.set_loopback = hclge_set_loopback,
 	.start = hclge_ae_start,
 	.stop = hclge_ae_stop,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 5fcbc3d23f21..85180f4a9b4d 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -217,6 +217,7 @@ enum HCLGE_DEV_STATE {
 	HCLGE_STATE_STATISTICS_UPDATING,
 	HCLGE_STATE_CMD_DISABLE,
 	HCLGE_STATE_LINK_UPDATING,
+	HCLGE_STATE_PROMISC_CHANGED,
 	HCLGE_STATE_RST_FAIL,
 	HCLGE_STATE_MAX
 };
@@ -931,6 +932,9 @@ struct hclge_vport {
 	u32 mps; /* Max packet size */
 	struct hclge_vf_info vf_info;
 
+	u8 overflow_promisc_flags;
+	u8 last_promisc_flags;
+
 	spinlock_t mac_list_lock; /* protect mac address need to add/detele */
 	struct list_head uc_mac_list;   /* Store VF unicast table */
 	struct list_head mc_mac_list;   /* Store VF multicast table */
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 05d485a48706..fea197fd77cb 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -1164,6 +1164,27 @@ static int hclgevf_set_promisc_mode(struct hnae3_handle *handle, bool en_uc_pmc,
 					    en_bc_pmc);
 }
 
+static void hclgevf_request_update_promisc_mode(struct hnae3_handle *handle)
+{
+	struct hclgevf_dev *hdev = hclgevf_ae_get_hdev(handle);
+
+	set_bit(HCLGEVF_STATE_PROMISC_CHANGED, &hdev->state);
+}
+
+static void hclgevf_sync_promisc_mode(struct hclgevf_dev *hdev)
+{
+	struct hnae3_handle *handle = &hdev->nic;
+	bool en_uc_pmc = handle->netdev_flags & HNAE3_UPE;
+	bool en_mc_pmc = handle->netdev_flags & HNAE3_MPE;
+	int ret;
+
+	if (test_bit(HCLGEVF_STATE_PROMISC_CHANGED, &hdev->state)) {
+		ret = hclgevf_set_promisc_mode(handle, en_uc_pmc, en_mc_pmc);
+		if (!ret)
+			clear_bit(HCLGEVF_STATE_PROMISC_CHANGED, &hdev->state);
+	}
+}
+
 static int hclgevf_tqp_enable(struct hclgevf_dev *hdev, unsigned int tqp_id,
 			      int stream_id, bool enable)
 {
@@ -2203,6 +2224,8 @@ static void hclgevf_periodic_service_task(struct hclgevf_dev *hdev)
 
 	hclgevf_sync_mac_table(hdev);
 
+	hclgevf_sync_promisc_mode(hdev);
+
 	hdev->last_serv_processed = jiffies;
 
 out:
@@ -2986,6 +3009,8 @@ static int hclgevf_reset_hdev(struct hclgevf_dev *hdev)
 		return ret;
 	}
 
+	set_bit(HCLGEVF_STATE_PROMISC_CHANGED, &hdev->state);
+
 	dev_info(&hdev->pdev->dev, "Reset done\n");
 
 	return 0;
@@ -3470,6 +3495,7 @@ static const struct hnae3_ae_ops hclgevf_ops = {
 	.set_timer_task = hclgevf_set_timer_task,
 	.get_link_mode = hclgevf_get_link_mode,
 	.set_promisc_mode = hclgevf_set_promisc_mode,
+	.request_update_promisc_mode = hclgevf_request_update_promisc_mode,
 };
 
 static struct hnae3_ae_algo ae_algovf = {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
index 0222d9b87a42..f19583c4bc9b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
@@ -148,6 +148,7 @@ enum hclgevf_states {
 	HCLGEVF_STATE_MBX_HANDLING,
 	HCLGEVF_STATE_CMD_DISABLE,
 	HCLGEVF_STATE_LINK_UPDATING,
+	HCLGEVF_STATE_PROMISC_CHANGED,
 	HCLGEVF_STATE_RST_FAIL,
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From 7d0b345156d0678fbbbf885f991c6d83f23d70f1 Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Fri, 24 Apr 2020 10:23:12 +0800
Subject: net: hns3: use mutex vport_lock instead of mutex umv_lock

Currently, the driver use mutex umv_lock to protect the variable
vport->share_umv_size. And there is already a mutex vport_lock
being defined in the driver, which is designed to protect the
resource of vport. So we can use vport_lock instead of umv_lock.

Furthermore, there is a time window for protect share_umv_size
between checking UMV space and doing MAC configuration in the
lin function hclge_add_uc_addr_common(). It should be extended.

This patch uses mutex vport_lock intead of spin lock umv_lock to
protect share_umv_size, and adjusts the mutex's range.

Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c    | 31 +++++++++++++---------
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h    |  1 -
 2 files changed, 19 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 71ff0fa64f46..177ef5e93d06 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -7250,7 +7250,6 @@ static int hclge_init_umv_space(struct hclge_dev *hdev)
 			 "failed to alloc umv space, want %u, get %u\n",
 			 hdev->wanted_umv_size, allocated_size);
 
-	mutex_init(&hdev->umv_mutex);
 	hdev->max_umv_size = allocated_size;
 	hdev->priv_umv_size = hdev->max_umv_size / (hdev->num_alloc_vport + 1);
 	hdev->share_umv_size = hdev->priv_umv_size +
@@ -7269,21 +7268,25 @@ static void hclge_reset_umv_space(struct hclge_dev *hdev)
 		vport->used_umv_num = 0;
 	}
 
-	mutex_lock(&hdev->umv_mutex);
+	mutex_lock(&hdev->vport_lock);
 	hdev->share_umv_size = hdev->priv_umv_size +
 			hdev->max_umv_size % (hdev->num_alloc_vport + 1);
-	mutex_unlock(&hdev->umv_mutex);
+	mutex_unlock(&hdev->vport_lock);
 }
 
-static bool hclge_is_umv_space_full(struct hclge_vport *vport)
+static bool hclge_is_umv_space_full(struct hclge_vport *vport, bool need_lock)
 {
 	struct hclge_dev *hdev = vport->back;
 	bool is_full;
 
-	mutex_lock(&hdev->umv_mutex);
+	if (need_lock)
+		mutex_lock(&hdev->vport_lock);
+
 	is_full = (vport->used_umv_num >= hdev->priv_umv_size &&
 		   hdev->share_umv_size == 0);
-	mutex_unlock(&hdev->umv_mutex);
+
+	if (need_lock)
+		mutex_unlock(&hdev->vport_lock);
 
 	return is_full;
 }
@@ -7292,7 +7295,6 @@ static void hclge_update_umv_space(struct hclge_vport *vport, bool is_free)
 {
 	struct hclge_dev *hdev = vport->back;
 
-	mutex_lock(&hdev->umv_mutex);
 	if (is_free) {
 		if (vport->used_umv_num > hdev->priv_umv_size)
 			hdev->share_umv_size++;
@@ -7305,7 +7307,6 @@ static void hclge_update_umv_space(struct hclge_vport *vport, bool is_free)
 			hdev->share_umv_size--;
 		vport->used_umv_num++;
 	}
-	mutex_unlock(&hdev->umv_mutex);
 }
 
 static struct hclge_mac_node *hclge_find_mac_node(struct list_head *list,
@@ -7446,12 +7447,15 @@ int hclge_add_uc_addr_common(struct hclge_vport *vport,
 	 */
 	ret = hclge_lookup_mac_vlan_tbl(vport, &req, &desc, false);
 	if (ret == -ENOENT) {
-		if (!hclge_is_umv_space_full(vport)) {
+		mutex_lock(&hdev->vport_lock);
+		if (!hclge_is_umv_space_full(vport, false)) {
 			ret = hclge_add_mac_vlan_tbl(vport, &req, NULL);
 			if (!ret)
 				hclge_update_umv_space(vport, false);
+			mutex_unlock(&hdev->vport_lock);
 			return ret;
 		}
+		mutex_unlock(&hdev->vport_lock);
 
 		if (!(vport->overflow_promisc_flags & HNAE3_OVERFLOW_UPE))
 			dev_err(&hdev->pdev->dev, "UC MAC table full(%u)\n",
@@ -7503,10 +7507,13 @@ int hclge_rm_uc_addr_common(struct hclge_vport *vport,
 	hnae3_set_bit(req.entry_type, HCLGE_MAC_VLAN_BIT0_EN_B, 0);
 	hclge_prepare_mac_addr(&req, addr, false);
 	ret = hclge_remove_mac_vlan_tbl(vport, &req);
-	if (!ret)
+	if (!ret) {
+		mutex_lock(&hdev->vport_lock);
 		hclge_update_umv_space(vport, true);
-	else if (ret == -ENOENT)
+		mutex_unlock(&hdev->vport_lock);
+	} else if (ret == -ENOENT) {
 		ret = 0;
+	}
 
 	return ret;
 }
@@ -10163,7 +10170,7 @@ static int hclge_set_vf_spoofchk(struct hnae3_handle *handle, int vf,
 		dev_warn(&hdev->pdev->dev,
 			 "vf %d vlan table is full, enable spoof check may cause its packet send fail\n",
 			 vf);
-	else if (enable && hclge_is_umv_space_full(vport))
+	else if (enable && hclge_is_umv_space_full(vport, true))
 		dev_warn(&hdev->pdev->dev,
 			 "vf %d mac table is full, enable spoof check may cause its packet send fail\n",
 			 vf);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 85180f4a9b4d..8e69651f8bf1 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -831,7 +831,6 @@ struct hclge_dev {
 	u16 priv_umv_size;
 	/* unicast mac vlan space shared by PF and its VFs */
 	u16 share_umv_size;
-	struct mutex umv_mutex; /* protect share_umv_size */
 
 	DECLARE_KFIFO(mac_tnl_log, struct hclge_mac_tnl_stats,
 		      HCLGE_MAC_TNL_LOG_SIZE);
-- 
cgit v1.2.3-59-g8ed1b


From 039ba863e8d71c52b1f5faf26b0f458eec33d5e7 Mon Sep 17 00:00:00 2001
From: Jian Shen <shenjian15@huawei.com>
Date: Fri, 24 Apr 2020 10:23:13 +0800
Subject: net: hns3: optimize the filter table entries handling when resetting

Currently, the PF driver removes all (including its VFs') MAC/VLAN
flow director table entries when resetting, and restores them after
reset completed.

In fact, the hardware will clear all table entries only in IMP
reset and global reset. So driver only needs to restore the table
entries in these cases, and needs do nothing when PF reset, FLR
or other function level reset.

This patch optimizes it by removing unnecessary table entries clear
and restoring handling in the reset flow, and doing the restoring
after reset completed.

Signed-off-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h    |  5 ++
 drivers/net/ethernet/hisilicon/hns3/hnae3.h        |  5 --
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c    | 33 --------
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.h    |  9 ---
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c    | 89 +++++++++++-----------
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.h    |  1 +
 .../net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c | 28 ++++++-
 .../ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c  | 29 ++++---
 8 files changed, 94 insertions(+), 105 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
index 948e67ef30fd..21a736174fda 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
@@ -45,6 +45,7 @@ enum HCLGE_MBX_OPCODE {
 	HCLGE_MBX_GET_MEDIA_TYPE,       /* (VF -> PF) get media type */
 	HCLGE_MBX_PUSH_PROMISC_INFO,	/* (PF -> VF) push vf promisc info */
 	HCLGE_MBX_VF_UNINIT,            /* (VF -> PF) vf is unintializing */
+	HCLGE_MBX_HANDLE_VF_TBL,	/* (VF -> PF) store/clear hw table */
 
 	HCLGE_MBX_GET_VF_FLR_STATUS = 200, /* (M7 -> PF) get vf flr status */
 	HCLGE_MBX_PUSH_LINK_STATUS,	/* (M7 -> PF) get port link status */
@@ -70,6 +71,10 @@ enum hclge_mbx_vlan_cfg_subcode {
 	HCLGE_MBX_GET_PORT_BASE_VLAN_STATE,	/* get port based vlan state */
 };
 
+enum hclge_mbx_tbl_cfg_subcode {
+	HCLGE_MBX_VPORT_LIST_CLEAR,
+};
+
 #define HCLGE_MBX_MAX_MSG_SIZE	14
 #define HCLGE_MBX_MAX_RESP_DATA_SIZE	8U
 #define HCLGE_MBX_MAX_RING_CHAIN_PARAM_NUM	4
diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index a56f8d623349..6291aa9f06b0 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -233,7 +233,6 @@ struct hnae3_ae_dev {
 	struct list_head node;
 	u32 flag;
 	unsigned long hw_err_reset_req;
-	enum hnae3_reset_type reset_type;
 	void *priv;
 };
 
@@ -356,8 +355,6 @@ struct hnae3_ae_dev {
  *   Set vlan filter config of Ports
  * set_vf_vlan_filter()
  *   Set vlan filter config of vf
- * restore_vlan_table()
- *   Restore vlan filter entries after reset
  * enable_hw_strip_rxvtag()
  *   Enable/disable hardware strip vlan tag of packets received
  * set_gro_en
@@ -528,7 +525,6 @@ struct hnae3_ae_ops {
 				struct ethtool_rxnfc *cmd);
 	int (*get_fd_all_rules)(struct hnae3_handle *handle,
 				struct ethtool_rxnfc *cmd, u32 *rule_locs);
-	int (*restore_fd_rules)(struct hnae3_handle *handle);
 	void (*enable_fd)(struct hnae3_handle *handle, bool enable);
 	int (*add_arfs_entry)(struct hnae3_handle *handle, u16 queue_id,
 			      u16 flow_id, struct flow_keys *fkeys);
@@ -542,7 +538,6 @@ struct hnae3_ae_ops {
 	void (*set_timer_task)(struct hnae3_handle *handle, bool enable);
 	int (*mac_connect_phy)(struct hnae3_handle *handle);
 	void (*mac_disconnect_phy)(struct hnae3_handle *handle);
-	void (*restore_vlan_table)(struct hnae3_handle *handle);
 	int (*get_vf_config)(struct hnae3_handle *handle, int vf,
 			     struct ifla_vf_info *ivf);
 	int (*set_vf_link_state)(struct hnae3_handle *handle, int vf,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 6b9535c17647..c79d6a391105 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -2102,7 +2102,6 @@ static int hns3_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	ae_dev->pdev = pdev;
 	ae_dev->flag = ent->driver_data;
-	ae_dev->reset_type = HNAE3_NONE_RESET;
 	hns3_get_dev_capability(pdev, ae_dev);
 	pci_set_drvdata(pdev, ae_dev);
 
@@ -3936,17 +3935,6 @@ static void hns3_uninit_phy(struct net_device *netdev)
 		h->ae_algo->ops->mac_disconnect_phy(h);
 }
 
-static int hns3_restore_fd_rules(struct net_device *netdev)
-{
-	struct hnae3_handle *h = hns3_get_handle(netdev);
-	int ret = 0;
-
-	if (h->ae_algo->ops->restore_fd_rules)
-		ret = h->ae_algo->ops->restore_fd_rules(h);
-
-	return ret;
-}
-
 static void hns3_del_all_fd_rules(struct net_device *netdev, bool clear_list)
 {
 	struct hnae3_handle *h = hns3_get_handle(netdev);
@@ -4346,7 +4334,6 @@ static void hns3_restore_coal(struct hns3_nic_priv *priv)
 
 static int hns3_reset_notify_down_enet(struct hnae3_handle *handle)
 {
-	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(handle->pdev);
 	struct hnae3_knic_private_info *kinfo = &handle->kinfo;
 	struct net_device *ndev = kinfo->netdev;
 	struct hns3_nic_priv *priv = netdev_priv(ndev);
@@ -4354,13 +4341,6 @@ static int hns3_reset_notify_down_enet(struct hnae3_handle *handle)
 	if (test_and_set_bit(HNS3_NIC_STATE_RESETTING, &priv->state))
 		return 0;
 
-	/* it is cumbersome for hardware to pick-and-choose entries for deletion
-	 * from table space. Hence, for function reset software intervention is
-	 * required to delete the entries
-	 */
-	if (hns3_dev_ongoing_func_reset(ae_dev))
-		hns3_del_all_fd_rules(ndev, false);
-
 	if (!netif_running(ndev))
 		return 0;
 
@@ -4455,16 +4435,6 @@ err_put_ring:
 	return ret;
 }
 
-static int hns3_reset_notify_restore_enet(struct hnae3_handle *handle)
-{
-	struct net_device *netdev = handle->kinfo.netdev;
-
-	if (handle->ae_algo->ops->restore_vlan_table)
-		handle->ae_algo->ops->restore_vlan_table(handle);
-
-	return hns3_restore_fd_rules(netdev);
-}
-
 static int hns3_reset_notify_uninit_enet(struct hnae3_handle *handle)
 {
 	struct net_device *netdev = handle->kinfo.netdev;
@@ -4514,9 +4484,6 @@ static int hns3_reset_notify(struct hnae3_handle *handle,
 	case HNAE3_UNINIT_CLIENT:
 		ret = hns3_reset_notify_uninit_enet(handle);
 		break;
-	case HNAE3_RESTORE_CLIENT:
-		ret = hns3_reset_notify_restore_enet(handle);
-		break;
 	default:
 		break;
 	}
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index 53bc0edf9b6f..240ba06cd0eb 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -576,15 +576,6 @@ static inline void hns3_write_reg(void __iomem *base, u32 reg, u32 value)
 	writel(value, reg_addr + reg);
 }
 
-static inline bool hns3_dev_ongoing_func_reset(struct hnae3_ae_dev *ae_dev)
-{
-	return (ae_dev && (ae_dev->reset_type == HNAE3_FUNC_RESET ||
-			   ae_dev->reset_type == HNAE3_FLR_RESET ||
-			   ae_dev->reset_type == HNAE3_VF_FUNC_RESET ||
-			   ae_dev->reset_type == HNAE3_VF_FULL_RESET ||
-			   ae_dev->reset_type == HNAE3_VF_PF_FUNC_RESET));
-}
-
 #define hns3_read_dev(a, reg) \
 	hns3_read_reg((a)->io_base, (reg))
 
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 177ef5e93d06..c74990a59e10 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -69,6 +69,7 @@ static enum hnae3_reset_type hclge_get_reset_level(struct hnae3_ae_dev *ae_dev,
 static int hclge_set_default_loopback(struct hclge_dev *hdev);
 
 static void hclge_sync_mac_table(struct hclge_dev *hdev);
+static void hclge_restore_hw_table(struct hclge_dev *hdev);
 static void hclge_sync_promisc_mode(struct hclge_dev *hdev);
 
 static struct hnae3_ae_algo ae_algo;
@@ -3731,22 +3732,13 @@ static int hclge_reset_stack(struct hclge_dev *hdev)
 	if (ret)
 		return ret;
 
-	ret = hclge_notify_client(hdev, HNAE3_INIT_CLIENT);
-	if (ret)
-		return ret;
-
-	return hclge_notify_client(hdev, HNAE3_RESTORE_CLIENT);
+	return hclge_notify_client(hdev, HNAE3_INIT_CLIENT);
 }
 
 static int hclge_reset_prepare(struct hclge_dev *hdev)
 {
-	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
 	int ret;
 
-	/* Initialize ae_dev reset status as well, in case enet layer wants to
-	 * know if device is undergoing reset
-	 */
-	ae_dev->reset_type = hdev->reset_type;
 	hdev->rst_stats.reset_cnt++;
 	/* perform reset of the stack & ae device for a client */
 	ret = hclge_notify_roce_client(hdev, HNAE3_DOWN_CLIENT);
@@ -3808,7 +3800,6 @@ static int hclge_reset_rebuild(struct hclge_dev *hdev)
 	hdev->last_reset_time = jiffies;
 	hdev->rst_stats.reset_fail_cnt = 0;
 	hdev->rst_stats.reset_done_cnt++;
-	ae_dev->reset_type = HNAE3_NONE_RESET;
 	clear_bit(HCLGE_STATE_RST_FAIL, &hdev->state);
 
 	/* if default_reset_request has a higher level reset request,
@@ -6942,8 +6933,14 @@ int hclge_vport_start(struct hclge_vport *vport)
 	set_bit(HCLGE_VPORT_STATE_ALIVE, &vport->state);
 	vport->last_active_jiffies = jiffies;
 
-	if (test_bit(vport->vport_id, hdev->vport_config_block))
-		hclge_restore_mac_table_common(vport);
+	if (test_bit(vport->vport_id, hdev->vport_config_block)) {
+		if (vport->vport_id) {
+			hclge_restore_mac_table_common(vport);
+			hclge_restore_vport_vlan_table(vport);
+		} else {
+			hclge_restore_hw_table(hdev);
+		}
+	}
 
 	clear_bit(vport->vport_id, hdev->vport_config_block);
 
@@ -8789,39 +8786,34 @@ void hclge_uninit_vport_vlan_table(struct hclge_dev *hdev)
 	}
 }
 
-static void hclge_restore_vlan_table(struct hnae3_handle *handle)
+void hclge_restore_vport_vlan_table(struct hclge_vport *vport)
 {
-	struct hclge_vport *vport = hclge_get_vport(handle);
 	struct hclge_vport_vlan_cfg *vlan, *tmp;
 	struct hclge_dev *hdev = vport->back;
 	u16 vlan_proto;
-	u16 state, vlan_id;
-	int i;
+	u16 vlan_id;
+	u16 state;
+	int ret;
 
-	for (i = 0; i < hdev->num_alloc_vport; i++) {
-		vport = &hdev->vport[i];
-		vlan_proto = vport->port_base_vlan_cfg.vlan_info.vlan_proto;
-		vlan_id = vport->port_base_vlan_cfg.vlan_info.vlan_tag;
-		state = vport->port_base_vlan_cfg.state;
+	vlan_proto = vport->port_base_vlan_cfg.vlan_info.vlan_proto;
+	vlan_id = vport->port_base_vlan_cfg.vlan_info.vlan_tag;
+	state = vport->port_base_vlan_cfg.state;
 
-		if (state != HNAE3_PORT_BASE_VLAN_DISABLE) {
-			hclge_set_vlan_filter_hw(hdev, htons(vlan_proto),
-						 vport->vport_id, vlan_id,
-						 false);
-			continue;
-		}
-
-		list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) {
-			int ret;
+	if (state != HNAE3_PORT_BASE_VLAN_DISABLE) {
+		clear_bit(vport->vport_id, hdev->vlan_table[vlan_id]);
+		hclge_set_vlan_filter_hw(hdev, htons(vlan_proto),
+					 vport->vport_id, vlan_id,
+					 false);
+		return;
+	}
 
-			if (!vlan->hd_tbl_status)
-				continue;
-			ret = hclge_set_vlan_filter_hw(hdev, htons(ETH_P_8021Q),
-						       vport->vport_id,
-						       vlan->vlan_id, false);
-			if (ret)
-				break;
-		}
+	list_for_each_entry_safe(vlan, tmp, &vport->vlan_list, node) {
+		ret = hclge_set_vlan_filter_hw(hdev, htons(ETH_P_8021Q),
+					       vport->vport_id,
+					       vlan->vlan_id, false);
+		if (ret)
+			break;
+		vlan->hd_tbl_status = true;
 	}
 }
 
@@ -8856,6 +8848,18 @@ void hclge_restore_mac_table_common(struct hclge_vport *vport)
 	spin_unlock_bh(&vport->mac_list_lock);
 }
 
+static void hclge_restore_hw_table(struct hclge_dev *hdev)
+{
+	struct hclge_vport *vport = &hdev->vport[0];
+	struct hnae3_handle *handle = &vport->nic;
+
+	hclge_restore_mac_table_common(vport);
+	hclge_restore_vport_vlan_table(vport);
+	set_bit(HCLGE_STATE_PROMISC_CHANGED, &hdev->state);
+
+	hclge_restore_fd_entries(handle);
+}
+
 int hclge_en_hw_strip_rxvtag(struct hnae3_handle *handle, bool enable)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
@@ -10352,13 +10356,12 @@ static int hclge_reset_ae_dev(struct hnae3_ae_dev *ae_dev)
 	 */
 	if (hdev->reset_type == HNAE3_IMP_RESET ||
 	    hdev->reset_type == HNAE3_GLOBAL_RESET) {
+		memset(hdev->vlan_table, 0, sizeof(hdev->vlan_table));
+		memset(hdev->vf_vlan_full, 0, sizeof(hdev->vf_vlan_full));
 		bitmap_set(hdev->vport_config_block, 0, hdev->num_alloc_vport);
 		hclge_reset_umv_space(hdev);
 	}
 
-	memset(hdev->vlan_table, 0, sizeof(hdev->vlan_table));
-	memset(hdev->vf_vlan_full, 0, sizeof(hdev->vf_vlan_full));
-
 	ret = hclge_cmd_init(hdev);
 	if (ret) {
 		dev_err(&pdev->dev, "Cmd queue init failed\n");
@@ -11191,7 +11194,6 @@ static const struct hnae3_ae_ops hclge_ops = {
 	.get_fd_rule_cnt = hclge_get_fd_rule_cnt,
 	.get_fd_rule_info = hclge_get_fd_rule_info,
 	.get_fd_all_rules = hclge_get_all_rules,
-	.restore_fd_rules = hclge_restore_fd_entries,
 	.enable_fd = hclge_enable_fd,
 	.add_arfs_entry = hclge_add_fd_entry_by_arfs,
 	.dbg_run_cmd = hclge_dbg_run_cmd,
@@ -11204,7 +11206,6 @@ static const struct hnae3_ae_ops hclge_ops = {
 	.set_timer_task = hclge_set_timer_task,
 	.mac_connect_phy = hclge_mac_connect_phy,
 	.mac_disconnect_phy = hclge_mac_disconnect_phy,
-	.restore_vlan_table = hclge_restore_vlan_table,
 	.get_vf_config = hclge_get_vf_config,
 	.set_vf_link_state = hclge_set_vf_link_state,
 	.set_vf_spoofchk = hclge_set_vf_spoofchk,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 8e69651f8bf1..913c4f677404 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -1001,6 +1001,7 @@ void hclge_rm_vport_all_mac_table(struct hclge_vport *vport, bool is_del_list,
 void hclge_rm_vport_all_vlan_table(struct hclge_vport *vport, bool is_del_list);
 void hclge_uninit_vport_vlan_table(struct hclge_dev *hdev);
 void hclge_restore_mac_table_common(struct hclge_vport *vport);
+void hclge_restore_vport_vlan_table(struct hclge_vport *vport);
 int hclge_update_port_base_vlan_cfg(struct hclge_vport *vport, u16 state,
 				    struct hclge_vlan_info *vlan_info);
 int hclge_push_vf_port_base_vlan_info(struct hclge_vport *vport, u8 vfid,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index 0efc04562ba6..ac70fafd15d5 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -629,6 +629,23 @@ static void hclge_handle_ncsi_error(struct hclge_dev *hdev)
 	ae_dev->ops->reset_event(hdev->pdev, NULL);
 }
 
+static void hclge_handle_vf_tbl(struct hclge_vport *vport,
+				struct hclge_mbx_vf_to_pf_cmd *mbx_req)
+{
+	struct hclge_dev *hdev = vport->back;
+	struct hclge_vf_vlan_cfg *msg_cmd;
+
+	msg_cmd = (struct hclge_vf_vlan_cfg *)&mbx_req->msg;
+	if (msg_cmd->subcode == HCLGE_MBX_VPORT_LIST_CLEAR) {
+		hclge_rm_vport_all_mac_table(vport, true, HCLGE_MAC_ADDR_UC);
+		hclge_rm_vport_all_mac_table(vport, true, HCLGE_MAC_ADDR_MC);
+		hclge_rm_vport_all_vlan_table(vport, true);
+	} else {
+		dev_warn(&hdev->pdev->dev, "Invalid cmd(%u)\n",
+			 msg_cmd->subcode);
+	}
+}
+
 void hclge_mbx_handler(struct hclge_dev *hdev)
 {
 	struct hclge_cmq_ring *crq = &hdev->hw.cmq.crq;
@@ -636,6 +653,7 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
 	struct hclge_mbx_vf_to_pf_cmd *req;
 	struct hclge_vport *vport;
 	struct hclge_desc *desc;
+	bool is_del = false;
 	unsigned int flag;
 	int ret = 0;
 
@@ -753,11 +771,12 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
 			break;
 		case HCLGE_MBX_GET_VF_FLR_STATUS:
 		case HCLGE_MBX_VF_UNINIT:
-			hclge_rm_vport_all_mac_table(vport, true,
+			is_del = req->msg.code == HCLGE_MBX_VF_UNINIT;
+			hclge_rm_vport_all_mac_table(vport, is_del,
 						     HCLGE_MAC_ADDR_UC);
-			hclge_rm_vport_all_mac_table(vport, true,
+			hclge_rm_vport_all_mac_table(vport, is_del,
 						     HCLGE_MAC_ADDR_MC);
-			hclge_rm_vport_all_vlan_table(vport, true);
+			hclge_rm_vport_all_vlan_table(vport, is_del);
 			break;
 		case HCLGE_MBX_GET_MEDIA_TYPE:
 			hclge_get_vf_media_type(vport, &resp_msg);
@@ -771,6 +790,9 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
 		case HCLGE_MBX_NCSI_ERROR:
 			hclge_handle_ncsi_error(hdev);
 			break;
+		case HCLGE_MBX_HANDLE_VF_TBL:
+			hclge_handle_vf_tbl(vport, req);
+			break;
 		default:
 			dev_err(&hdev->pdev->dev,
 				"un-supported mailbox message, code = %u\n",
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index fea197fd77cb..32341dcaa6c1 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -1777,10 +1777,6 @@ static int hclgevf_reset_stack(struct hclgevf_dev *hdev)
 	if (ret)
 		return ret;
 
-	ret = hclgevf_notify_client(hdev, HNAE3_RESTORE_CLIENT);
-	if (ret)
-		return ret;
-
 	/* clear handshake status with IMP */
 	hclgevf_reset_handshake(hdev, false);
 
@@ -1860,13 +1856,8 @@ static void hclgevf_reset_err_handle(struct hclgevf_dev *hdev)
 
 static int hclgevf_reset_prepare(struct hclgevf_dev *hdev)
 {
-	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
 	int ret;
 
-	/* Initialize ae_dev reset status as well, in case enet layer wants to
-	 * know if device is undergoing reset
-	 */
-	ae_dev->reset_type = hdev->reset_type;
 	hdev->rst_stats.rst_cnt++;
 
 	rtnl_lock();
@@ -1881,7 +1872,6 @@ static int hclgevf_reset_prepare(struct hclgevf_dev *hdev)
 
 static int hclgevf_reset_rebuild(struct hclgevf_dev *hdev)
 {
-	struct hnae3_ae_dev *ae_dev = pci_get_drvdata(hdev->pdev);
 	int ret;
 
 	hdev->rst_stats.hw_rst_done_cnt++;
@@ -1896,7 +1886,6 @@ static int hclgevf_reset_rebuild(struct hclgevf_dev *hdev)
 	}
 
 	hdev->last_reset_time = jiffies;
-	ae_dev->reset_type = HNAE3_NONE_RESET;
 	hdev->rst_stats.rst_done_cnt++;
 	hdev->rst_stats.rst_fail_cnt = 0;
 	clear_bit(HCLGEVF_STATE_RST_FAIL, &hdev->state);
@@ -2974,6 +2963,15 @@ static int hclgevf_pci_reset(struct hclgevf_dev *hdev)
 	return ret;
 }
 
+static int hclgevf_clear_vport_list(struct hclgevf_dev *hdev)
+{
+	struct hclge_vf_to_pf_msg send_msg;
+
+	hclgevf_build_send_msg(&send_msg, HCLGE_MBX_HANDLE_VF_TBL,
+			       HCLGE_MBX_VPORT_LIST_CLEAR);
+	return hclgevf_send_mbx_msg(hdev, &send_msg, false, NULL, 0);
+}
+
 static int hclgevf_reset_hdev(struct hclgevf_dev *hdev)
 {
 	struct pci_dev *pdev = hdev->pdev;
@@ -3083,6 +3081,15 @@ static int hclgevf_init_hdev(struct hclgevf_dev *hdev)
 		goto err_config;
 	}
 
+	/* ensure vf tbl list as empty before init*/
+	ret = hclgevf_clear_vport_list(hdev);
+	if (ret) {
+		dev_err(&pdev->dev,
+			"failed to clear tbl list configuration, ret = %d.\n",
+			ret);
+		goto err_config;
+	}
+
 	ret = hclgevf_init_vlan_config(hdev);
 	if (ret) {
 		dev_err(&hdev->pdev->dev,
-- 
cgit v1.2.3-59-g8ed1b


From dce38b74b2b57c6aeb9eafa9fb08451a7bb022dc Mon Sep 17 00:00:00 2001
From: Zheng Bin <zhengbin13@huawei.com>
Date: Fri, 24 Apr 2020 17:08:50 +0800
Subject: net: phy: dp83867: Remove unneeded semicolon

Fixes coccicheck warning:

drivers/net/phy/dp83867.c:368:2-3: Unneeded semicolon
drivers/net/phy/dp83867.c:403:2-3: Unneeded semicolon

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zheng Bin <zhengbin13@huawei.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/dp83867.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/phy/dp83867.c b/drivers/net/phy/dp83867.c
index b55e3c0403ed..4017ae1692d8 100644
--- a/drivers/net/phy/dp83867.c
+++ b/drivers/net/phy/dp83867.c
@@ -365,7 +365,7 @@ static int dp83867_get_downshift(struct phy_device *phydev, u8 *data)
 		break;
 	default:
 		return -EINVAL;
-	};
+	}
 
 	*data = enable ? count : DOWNSHIFT_DEV_DISABLE;
 
@@ -400,7 +400,7 @@ static int dp83867_set_downshift(struct phy_device *phydev, u8 cnt)
 			phydev_err(phydev,
 				   "Downshift count must be 1, 2, 4 or 8\n");
 			return -EINVAL;
-	};
+	}
 
 	val = DP83867_DOWNSHIFT_EN;
 	val |= FIELD_PREP(DP83867_DOWNSHIFT_ATTEMPT_MASK, count);
-- 
cgit v1.2.3-59-g8ed1b


From ae23aae229b8a253a3d5d51acf3bd6103a519d85 Mon Sep 17 00:00:00 2001
From: Zheng Bin <zhengbin13@huawei.com>
Date: Fri, 24 Apr 2020 17:13:35 +0800
Subject: octeontx2-pf: Remove unneeded semicolon

Fixes coccicheck warning:

drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h:312:2-3: Unneeded semicolon

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zheng Bin <zhengbin13@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
index 018c283a0ac4..0b1c653b3449 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
@@ -309,7 +309,7 @@ static inline void __iomem *otx2_get_regaddr(struct otx2_nic *nic, u64 offset)
 	default:
 		blkaddr = BLKADDR_RVUM;
 		break;
-	};
+	}
 
 	offset &= ~(RVU_FUNC_BLKADDR_MASK << RVU_FUNC_BLKADDR_SHIFT);
 	offset |= (blkaddr << RVU_FUNC_BLKADDR_SHIFT);
-- 
cgit v1.2.3-59-g8ed1b


From 460fd830dd9d68e07c4d15363fd764944090e1f8 Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Fri, 24 Apr 2020 12:33:18 +0300
Subject: dpaa2-eth: add channel stat to debugfs

Compute the average number of frames processed for each CDAN (Channel
Data Availability Notification) and export it to debugfs detailed
channel stats.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.c | 9 ++++++---
 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c         | 1 +
 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h         | 2 ++
 drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c     | 2 +-
 4 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.c
index a9afe46b837f..80291afff3ea 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.c
@@ -127,16 +127,19 @@ static int dpaa2_dbg_ch_show(struct seq_file *file, void *offset)
 	int i;
 
 	seq_printf(file, "Channel stats for %s:\n", priv->net_dev->name);
-	seq_printf(file, "%s%16s%16s%16s%16s\n",
-		   "CHID", "CPU", "Deq busy", "CDANs", "Buf count");
+	seq_printf(file, "%s%16s%16s%16s%16s%16s%16s\n",
+		   "CHID", "CPU", "Deq busy", "Frames", "CDANs",
+		   "Avg Frm/CDAN", "Buf count");
 
 	for (i = 0; i < priv->num_channels; i++) {
 		ch = priv->channel[i];
-		seq_printf(file, "%4d%16d%16llu%16llu%16d\n",
+		seq_printf(file, "%4d%16d%16llu%16llu%16llu%16llu%16d\n",
 			   ch->ch_id,
 			   ch->nctx.desired_cpu,
 			   ch->stats.dequeue_portal_busy,
+			   ch->stats.frames,
 			   ch->stats.cdan,
+			   ch->stats.frames / ch->stats.cdan,
 			   ch->buf_count);
 	}
 
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index d271c016229d..8ec435ba7d27 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -493,6 +493,7 @@ static int consume_frames(struct dpaa2_eth_channel *ch,
 		return 0;
 
 	fq->stats.frames += cleaned;
+	ch->stats.frames += cleaned;
 
 	/* A dequeue operation only pulls frames from a single queue
 	 * into the store. Return the frame queue as an out param.
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
index 289053099974..43cd8409f2e9 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
@@ -288,6 +288,8 @@ struct dpaa2_eth_ch_stats {
 	__u64 xdp_tx;
 	__u64 xdp_tx_err;
 	__u64 xdp_redirect;
+	/* Must be last, does not show up in ethtool stats */
+	__u64 frames;
 };
 
 /* Maximum number of queues associated with a DPNI */
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
index 94347c695233..bd13ee48d623 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
@@ -277,7 +277,7 @@ static void dpaa2_eth_get_ethtool_stats(struct net_device *net_dev,
 	/* Per-channel stats */
 	for (k = 0; k < priv->num_channels; k++) {
 		ch_stats = &priv->channel[k]->stats;
-		for (j = 0; j < sizeof(*ch_stats) / sizeof(__u64); j++)
+		for (j = 0; j < sizeof(*ch_stats) / sizeof(__u64) - 1; j++)
 			*((__u64 *)data + i + j) += *((__u64 *)ch_stats + j);
 	}
 	i += j;
-- 
cgit v1.2.3-59-g8ed1b


From 071c8ed6e88d2ac0a5f26948fb9c288fd4dd6e40 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 24 Apr 2020 12:31:50 +0200
Subject: tcp: mptcp: use mptcp receive buffer space to select rcv window

In MPTCP, the receive window is shared across all subflows, because it
refers to the mptcp-level sequence space.

MPTCP receivers already place incoming packets on the mptcp socket
receive queue and will charge it to the mptcp socket rcvbuf until
userspace consumes the data.

Update __tcp_select_window to use the occupancy of the parent/mptcp
socket instead of the subflow socket in case the tcp socket is part
of a logical mptcp connection.

This commit doesn't change choice of initial window for passive or active
connections.
While it would be possible to change those as well, this adds complexity
(especially when handling MP_JOIN requests).  Furthermore, the MPTCP RFC
specifically says that a MPTCP sender 'MUST NOT use the RCV.WND field
of a TCP segment at the connection level if it does not also carry a DSS
option with a Data ACK field.'

SYN/SYNACK packets do not carry a DSS option with a Data ACK field.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/mptcp.h   |  3 +++
 net/ipv4/tcp_output.c |  8 ++++++--
 net/mptcp/subflow.c   | 18 ++++++++++++++++++
 3 files changed, 27 insertions(+), 2 deletions(-)

diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index 0e7c5471010b..5288fba56e55 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -68,6 +68,8 @@ static inline bool rsk_is_mptcp(const struct request_sock *req)
 	return tcp_rsk(req)->is_mptcp;
 }
 
+void mptcp_space(const struct sock *ssk, int *space, int *full_space);
+
 void mptcp_parse_option(const struct sk_buff *skb, const unsigned char *ptr,
 			int opsize, struct tcp_options_received *opt_rx);
 bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
@@ -197,6 +199,7 @@ static inline bool mptcp_sk_is_subflow(const struct sock *sk)
 	return false;
 }
 
+static inline void mptcp_space(const struct sock *ssk, int *s, int *fs) { }
 static inline void mptcp_seq_show(struct seq_file *seq) { }
 #endif /* CONFIG_MPTCP */
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 2f45cde168c4..ba4482130f08 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2772,8 +2772,12 @@ u32 __tcp_select_window(struct sock *sk)
 	int mss = icsk->icsk_ack.rcv_mss;
 	int free_space = tcp_space(sk);
 	int allowed_space = tcp_full_space(sk);
-	int full_space = min_t(int, tp->window_clamp, allowed_space);
-	int window;
+	int full_space, window;
+
+	if (sk_is_mptcp(sk))
+		mptcp_space(sk, &free_space, &allowed_space);
+
+	full_space = min_t(int, tp->window_clamp, allowed_space);
 
 	if (unlikely(mss > full_space)) {
 		mss = full_space;
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index fabd06f2ff45..87c094702d63 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -821,6 +821,24 @@ bool mptcp_subflow_data_available(struct sock *sk)
 	return subflow->data_avail;
 }
 
+/* If ssk has an mptcp parent socket, use the mptcp rcvbuf occupancy,
+ * not the ssk one.
+ *
+ * In mptcp, rwin is about the mptcp-level connection data.
+ *
+ * Data that is still on the ssk rx queue can thus be ignored,
+ * as far as mptcp peer is concerened that data is still inflight.
+ * DSS ACK is updated when skb is moved to the mptcp rx queue.
+ */
+void mptcp_space(const struct sock *ssk, int *space, int *full_space)
+{
+	const struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(ssk);
+	const struct sock *sk = subflow->conn;
+
+	*space = tcp_space(sk);
+	*full_space = tcp_full_space(sk);
+}
+
 static void subflow_data_ready(struct sock *sk)
 {
 	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
-- 
cgit v1.2.3-59-g8ed1b


From f30e472071c827e2c2f191c8a011b7e371e17af5 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Fri, 24 Apr 2020 12:43:09 +0000
Subject: hsr: remove unnecessary code in hsr_dev_change_mtu()

In the hsr_dev_change_mtu(), the 'dev' and 'master->dev' pointer are
same. So, the 'master' variable and some code are unnecessary.

Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/hsr/hsr_device.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/net/hsr/hsr_device.c b/net/hsr/hsr_device.c
index fc7027314ad8..cd99f548e440 100644
--- a/net/hsr/hsr_device.c
+++ b/net/hsr/hsr_device.c
@@ -125,13 +125,11 @@ int hsr_get_max_mtu(struct hsr_priv *hsr)
 static int hsr_dev_change_mtu(struct net_device *dev, int new_mtu)
 {
 	struct hsr_priv *hsr;
-	struct hsr_port *master;
 
 	hsr = netdev_priv(dev);
-	master = hsr_port_get_hsr(hsr, HSR_PT_MASTER);
 
 	if (new_mtu > hsr_get_max_mtu(hsr)) {
-		netdev_info(master->dev, "A HSR master's MTU cannot be greater than the smallest MTU of its slaves minus the HSR Tag length (%d octets).\n",
+		netdev_info(dev, "A HSR master's MTU cannot be greater than the smallest MTU of its slaves minus the HSR Tag length (%d octets).\n",
 			    HSR_HLEN);
 		return -EINVAL;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 3e14462f1bee6d74eea6425d7965357d55151f80 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 24 Apr 2020 20:52:26 +0800
Subject: ptp: clockmatrix: remove unnecessary comparison

The type of loaddr is u8 which is always '<=' 0xff, so the
loaddr <= 0xff is always true, we can remove this comparison.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Reviewed-by: Vincent Cheng  <vincent.cheng.xh@renesas.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ptp/ptp_clockmatrix.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/ptp/ptp_clockmatrix.c b/drivers/ptp/ptp_clockmatrix.c
index 032e112c3dd9..a3f608832660 100644
--- a/drivers/ptp/ptp_clockmatrix.c
+++ b/drivers/ptp/ptp_clockmatrix.c
@@ -780,7 +780,7 @@ static int idtcm_load_firmware(struct idtcm *idtcm,
 
 			/* Page size 128, last 4 bytes of page skipped */
 			if (((loaddr > 0x7b) && (loaddr <= 0x7f))
-			     || ((loaddr > 0xfb) && (loaddr <= 0xff)))
+			     || loaddr > 0xfb)
 				continue;
 
 			err = idtcm_write(idtcm, regaddr, 0, &val, sizeof(val));
-- 
cgit v1.2.3-59-g8ed1b


From 308de89fedf4240d2270d00a36b90a1394fef6a7 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Fri, 24 Apr 2020 21:11:34 +0800
Subject: liquidio: remove unused inline functions

commit b6334be64d6f ("net/liquidio: Delete driver version assignment")
left behind this, remove it.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/liquidio/octeon_device.h | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/octeon_device.h b/drivers/net/ethernet/cavium/liquidio/octeon_device.h
index 3d01d3602d8f..fb380b4f3e02 100644
--- a/drivers/net/ethernet/cavium/liquidio/octeon_device.h
+++ b/drivers/net/ethernet/cavium/liquidio/octeon_device.h
@@ -712,18 +712,6 @@ struct octeon_device *lio_get_device(u32 octeon_id);
  */
 int lio_get_device_id(void *dev);
 
-static inline u16 OCTEON_MAJOR_REV(struct octeon_device *oct)
-{
-	u16 rev = (oct->rev_id & 0xC) >> 2;
-
-	return (rev == 0) ? 1 : rev;
-}
-
-static inline u16 OCTEON_MINOR_REV(struct octeon_device *oct)
-{
-	return oct->rev_id & 0x3;
-}
-
 /** Read windowed register.
  *  @param  oct   -  pointer to the Octeon device.
  *  @param  addr  -  Address of the register to read.
-- 
cgit v1.2.3-59-g8ed1b


From 163749ad8436b3206709172406d68869c40bc176 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Fri, 24 Apr 2020 21:12:56 +0800
Subject: qlcnic: remove unused inline function qlcnic_hw_write_wx_2M

There's no callers in-tree anymore.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qlcnic/qlcnic.h | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
index 134611aa2c9a..d838774af5a6 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic.h
@@ -1880,12 +1880,6 @@ static inline void qlcnic_write_crb(struct qlcnic_adapter *adapter, char *buf,
 	adapter->ahw->hw_ops->write_crb(adapter, buf, offset, size);
 }
 
-static inline int qlcnic_hw_write_wx_2M(struct qlcnic_adapter *adapter,
-					ulong off, u32 data)
-{
-	return adapter->ahw->hw_ops->write_reg(adapter, off, data);
-}
-
 static inline int qlcnic_get_mac_address(struct qlcnic_adapter *adapter,
 					 u8 *mac, u8 function)
 {
-- 
cgit v1.2.3-59-g8ed1b


From df346f1aac6cef551b69d788b022a942270dc17b Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Fri, 24 Apr 2020 21:13:34 +0800
Subject: dccp: remove unused inline function dccp_set_seqno

There's no callers in-tree since commit 792b48780e8b ("dccp: Implement
both feature-local and feature-remote Sequence Window feature")

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dccp/dccp.h | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/net/dccp/dccp.h b/net/dccp/dccp.h
index 9c3b27c257bb..7dce4f6c7025 100644
--- a/net/dccp/dccp.h
+++ b/net/dccp/dccp.h
@@ -108,11 +108,6 @@ extern int  sysctl_dccp_sync_ratelimit;
 #define ADD48(a, b)	 (((a) + (b)) & UINT48_MAX)
 #define SUB48(a, b)	 ADD48((a), COMPLEMENT48(b))
 
-static inline void dccp_set_seqno(u64 *seqno, u64 value)
-{
-	*seqno = value & UINT48_MAX;
-}
-
 static inline void dccp_inc_seqno(u64 *seqno)
 {
 	*seqno = ADD48(*seqno, 1);
-- 
cgit v1.2.3-59-g8ed1b


From c90af587a9eee697e2d89683113707cada70116a Mon Sep 17 00:00:00 2001
From: Zou Wei <zou_wei@huawei.com>
Date: Fri, 24 Apr 2020 21:53:14 +0800
Subject: net/mlx4_core: Add missing iounmap() in error path

This fixes the following coccicheck warning:

drivers/net/ethernet/mellanox/mlx4/crdump.c:200:2-8: ERROR: missing iounmap;
ioremap on line 190 and execution via conditional on line 198

Fixes: 7ef19d3b1d5e ("devlink: report error once U32_MAX snapshot ids have been used")
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zou Wei <zou_wei@huawei.com>
Reviewed-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/crdump.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/mellanox/mlx4/crdump.c b/drivers/net/ethernet/mellanox/mlx4/crdump.c
index 73eae80e1cb7..ac5468b77488 100644
--- a/drivers/net/ethernet/mellanox/mlx4/crdump.c
+++ b/drivers/net/ethernet/mellanox/mlx4/crdump.c
@@ -197,6 +197,7 @@ int mlx4_crdump_collect(struct mlx4_dev *dev)
 	err = devlink_region_snapshot_id_get(devlink, &id);
 	if (err) {
 		mlx4_err(dev, "crdump: devlink get snapshot id err %d\n", err);
+		iounmap(cr_space);
 		return err;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From a425b6e1c69ba907b72b737a4d44f8cfbc43ce3c Mon Sep 17 00:00:00 2001
From: Luo bin <luobin9@huawei.com>
Date: Sat, 25 Apr 2020 01:21:09 +0000
Subject: hinic: add mailbox function support

virtual function and physical function can communicate with each
other through mailbox channel supported by hw

Signed-off-by: Luo bin <luobin9@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/huawei/hinic/Makefile        |    2 +-
 drivers/net/ethernet/huawei/hinic/hinic_hw_csr.h  |    2 +-
 drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c  |   25 +-
 drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h  |    2 +
 drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.h  |    3 +-
 drivers/net/ethernet/huawei/hinic/hinic_hw_if.c   |   46 +-
 drivers/net/ethernet/huawei/hinic/hinic_hw_if.h   |   18 +
 drivers/net/ethernet/huawei/hinic/hinic_hw_mbox.c | 1213 +++++++++++++++++++++
 drivers/net/ethernet/huawei/hinic/hinic_hw_mbox.h |  154 +++
 drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.h |    1 +
 10 files changed, 1456 insertions(+), 10 deletions(-)
 create mode 100644 drivers/net/ethernet/huawei/hinic/hinic_hw_mbox.c
 create mode 100644 drivers/net/ethernet/huawei/hinic/hinic_hw_mbox.h

diff --git a/drivers/net/ethernet/huawei/hinic/Makefile b/drivers/net/ethernet/huawei/hinic/Makefile
index fe88ab88cacc..a73862a64690 100644
--- a/drivers/net/ethernet/huawei/hinic/Makefile
+++ b/drivers/net/ethernet/huawei/hinic/Makefile
@@ -4,4 +4,4 @@ obj-$(CONFIG_HINIC) += hinic.o
 hinic-y := hinic_main.o hinic_tx.o hinic_rx.o hinic_port.o hinic_hw_dev.o \
 	   hinic_hw_io.o hinic_hw_qp.o hinic_hw_cmdq.o hinic_hw_wq.o \
 	   hinic_hw_mgmt.o hinic_hw_api_cmd.o hinic_hw_eqs.o hinic_hw_if.o \
-	   hinic_common.o hinic_ethtool.o
+	   hinic_common.o hinic_ethtool.o hinic_hw_mbox.o
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_csr.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_csr.h
index cdec1d0a3962..7e84e4e33fff 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_csr.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_csr.h
@@ -10,7 +10,7 @@
 /* HW interface registers */
 #define HINIC_CSR_FUNC_ATTR0_ADDR                       0x0
 #define HINIC_CSR_FUNC_ATTR1_ADDR                       0x4
-
+#define HINIC_CSR_FUNC_ATTR2_ADDR			0x8
 #define HINIC_CSR_FUNC_ATTR4_ADDR                       0x10
 #define HINIC_CSR_FUNC_ATTR5_ADDR                       0x14
 
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
index c7c75b772a86..f2cf6f7ffc34 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
@@ -676,10 +676,23 @@ static int init_pfhwdev(struct hinic_pfhwdev *pfhwdev)
 		return err;
 	}
 
-	hinic_register_mgmt_msg_cb(&pfhwdev->pf_to_mgmt, HINIC_MOD_L2NIC,
-				   pfhwdev, nic_mgmt_msg_handler);
+	err = hinic_func_to_func_init(hwdev);
+	if (err) {
+		dev_err(&hwif->pdev->dev, "Failed to init mailbox\n");
+		hinic_pf_to_mgmt_free(&pfhwdev->pf_to_mgmt);
+		return err;
+	}
+
+	if (!HINIC_IS_VF(hwif))
+		hinic_register_mgmt_msg_cb(&pfhwdev->pf_to_mgmt,
+					   HINIC_MOD_L2NIC, pfhwdev,
+					   nic_mgmt_msg_handler);
+	else
+		hinic_register_vf_mbox_cb(hwdev, HINIC_MOD_L2NIC,
+					  nic_mgmt_msg_handler);
 
 	hinic_set_pf_action(hwif, HINIC_PF_MGMT_ACTIVE);
+
 	return 0;
 }
 
@@ -693,7 +706,13 @@ static void free_pfhwdev(struct hinic_pfhwdev *pfhwdev)
 
 	hinic_set_pf_action(hwdev->hwif, HINIC_PF_MGMT_INIT);
 
-	hinic_unregister_mgmt_msg_cb(&pfhwdev->pf_to_mgmt, HINIC_MOD_L2NIC);
+	if (!HINIC_IS_VF(hwdev->hwif))
+		hinic_unregister_mgmt_msg_cb(&pfhwdev->pf_to_mgmt,
+					     HINIC_MOD_L2NIC);
+	else
+		hinic_unregister_vf_mbox_cb(hwdev, HINIC_MOD_L2NIC);
+
+	hinic_func_to_func_free(hwdev);
 
 	hinic_pf_to_mgmt_free(&pfhwdev->pf_to_mgmt);
 }
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
index 66fd2340d447..2574086aa314 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
@@ -16,6 +16,7 @@
 #include "hinic_hw_mgmt.h"
 #include "hinic_hw_qp.h"
 #include "hinic_hw_io.h"
+#include "hinic_hw_mbox.h"
 
 #define HINIC_MAX_QPS   32
 
@@ -225,6 +226,7 @@ struct hinic_hwdev {
 
 	struct hinic_aeqs               aeqs;
 	struct hinic_func_to_io         func_to_io;
+	struct hinic_mbox_func_to_func  *func_to_func;
 
 	struct hinic_cap                nic_cap;
 };
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.h
index d35f2068ee0c..d73256da4b80 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.h
@@ -143,8 +143,9 @@ enum hinic_eq_type {
 };
 
 enum hinic_aeq_type {
+	HINIC_MBX_FROM_FUNC = 1,
 	HINIC_MSG_FROM_MGMT_CPU = 2,
-
+	HINIC_MBX_SEND_RSLT = 5,
 	HINIC_MAX_AEQ_EVENTS,
 };
 
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c
index 07bbfbf68577..3fbd2eb80582 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c
@@ -115,8 +115,12 @@ int hinic_msix_attr_cnt_clear(struct hinic_hwif *hwif, u16 msix_index)
  **/
 void hinic_set_pf_action(struct hinic_hwif *hwif, enum hinic_pf_action action)
 {
-	u32 attr5 = hinic_hwif_read_reg(hwif, HINIC_CSR_FUNC_ATTR5_ADDR);
+	u32 attr5;
 
+	if (HINIC_IS_VF(hwif))
+		return;
+
+	attr5 = hinic_hwif_read_reg(hwif, HINIC_CSR_FUNC_ATTR5_ADDR);
 	attr5 = HINIC_FA5_CLEAR(attr5, PF_ACTION);
 	attr5 |= HINIC_FA5_SET(action, PF_ACTION);
 
@@ -203,7 +207,8 @@ static int hwif_ready(struct hinic_hwif *hwif)
  * @attr0: the first attribute that was read from the hw
  * @attr1: the second attribute that was read from the hw
  **/
-static void set_hwif_attr(struct hinic_hwif *hwif, u32 attr0, u32 attr1)
+static void set_hwif_attr(struct hinic_hwif *hwif, u32 attr0, u32 attr1,
+			  u32 attr2)
 {
 	hwif->attr.func_idx     = HINIC_FA0_GET(attr0, FUNC_IDX);
 	hwif->attr.pf_idx       = HINIC_FA0_GET(attr0, PF_IDX);
@@ -214,6 +219,8 @@ static void set_hwif_attr(struct hinic_hwif *hwif, u32 attr0, u32 attr1)
 	hwif->attr.num_ceqs = BIT(HINIC_FA1_GET(attr1, CEQS_PER_FUNC));
 	hwif->attr.num_irqs = BIT(HINIC_FA1_GET(attr1, IRQS_PER_FUNC));
 	hwif->attr.num_dma_attr = BIT(HINIC_FA1_GET(attr1, DMA_ATTR_PER_FUNC));
+	hwif->attr.global_vf_id_of_pf = HINIC_FA2_GET(attr2,
+						      GLOBAL_VF_ID_OF_PF);
 }
 
 /**
@@ -222,7 +229,7 @@ static void set_hwif_attr(struct hinic_hwif *hwif, u32 attr0, u32 attr1)
  **/
 static void read_hwif_attr(struct hinic_hwif *hwif)
 {
-	u32 addr, attr0, attr1;
+	u32 addr, attr0, attr1, attr2;
 
 	addr   = HINIC_CSR_FUNC_ATTR0_ADDR;
 	attr0  = hinic_hwif_read_reg(hwif, addr);
@@ -230,7 +237,10 @@ static void read_hwif_attr(struct hinic_hwif *hwif)
 	addr   = HINIC_CSR_FUNC_ATTR1_ADDR;
 	attr1  = hinic_hwif_read_reg(hwif, addr);
 
-	set_hwif_attr(hwif, attr0, attr1);
+	addr   = HINIC_CSR_FUNC_ATTR2_ADDR;
+	attr2  = hinic_hwif_read_reg(hwif, addr);
+
+	set_hwif_attr(hwif, attr0, attr1, attr2);
 }
 
 /**
@@ -309,6 +319,34 @@ static void dma_attr_init(struct hinic_hwif *hwif)
 		     HINIC_PCIE_SNOOP, HINIC_PCIE_TPH_DISABLE);
 }
 
+u16 hinic_glb_pf_vf_offset(struct hinic_hwif *hwif)
+{
+	if (!hwif)
+		return 0;
+
+	return hwif->attr.global_vf_id_of_pf;
+}
+
+u16 hinic_global_func_id_hw(struct hinic_hwif *hwif)
+{
+	u32 addr, attr0;
+
+	addr   = HINIC_CSR_FUNC_ATTR0_ADDR;
+	attr0  = hinic_hwif_read_reg(hwif, addr);
+
+	return HINIC_FA0_GET(attr0, FUNC_IDX);
+}
+
+u16 hinic_pf_id_of_vf_hw(struct hinic_hwif *hwif)
+{
+	u32 addr, attr0;
+
+	addr   = HINIC_CSR_FUNC_ATTR0_ADDR;
+	attr0  = hinic_hwif_read_reg(hwif, addr);
+
+	return HINIC_FA0_GET(attr0, PF_IDX);
+}
+
 /**
  * hinic_init_hwif - initialize the hw interface
  * @hwif: the HW interface of a pci function device
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h
index c7bb9ceca72c..53bb89c1dd26 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h
@@ -35,6 +35,7 @@
 #define HINIC_FA0_FUNC_IDX_SHIFT                                0
 #define HINIC_FA0_PF_IDX_SHIFT                                  10
 #define HINIC_FA0_PCI_INTF_IDX_SHIFT                            14
+#define HINIC_FA0_VF_IN_PF_SHIFT				16
 /* reserved members - off 16 */
 #define HINIC_FA0_FUNC_TYPE_SHIFT                               24
 
@@ -42,6 +43,7 @@
 #define HINIC_FA0_PF_IDX_MASK                                   0xF
 #define HINIC_FA0_PCI_INTF_IDX_MASK                             0x3
 #define HINIC_FA0_FUNC_TYPE_MASK                                0x1
+#define HINIC_FA0_VF_IN_PF_MASK					0xFF
 
 #define HINIC_FA0_GET(val, member)                              \
 	(((val) >> HINIC_FA0_##member##_SHIFT) & HINIC_FA0_##member##_MASK)
@@ -64,6 +66,12 @@
 #define HINIC_FA1_GET(val, member)                              \
 	(((val) >> HINIC_FA1_##member##_SHIFT) & HINIC_FA1_##member##_MASK)
 
+#define HINIC_FA2_GLOBAL_VF_ID_OF_PF_SHIFT	16
+#define HINIC_FA2_GLOBAL_VF_ID_OF_PF_MASK	0x3FF
+
+#define HINIC_FA2_GET(val, member)				\
+	(((val) >> HINIC_FA2_##member##_SHIFT) & HINIC_FA2_##member##_MASK)
+
 #define HINIC_FA4_OUTBOUND_STATE_SHIFT                          0
 #define HINIC_FA4_DB_STATE_SHIFT                                1
 
@@ -140,6 +148,7 @@
 #define HINIC_HWIF_PPF_IDX(hwif)        ((hwif)->attr.ppf_idx)
 
 #define HINIC_FUNC_TYPE(hwif)           ((hwif)->attr.func_type)
+#define HINIC_IS_VF(hwif)               (HINIC_FUNC_TYPE(hwif) == HINIC_VF)
 #define HINIC_IS_PF(hwif)               (HINIC_FUNC_TYPE(hwif) == HINIC_PF)
 #define HINIC_IS_PPF(hwif)              (HINIC_FUNC_TYPE(hwif) == HINIC_PPF)
 
@@ -173,6 +182,7 @@ enum hinic_pcie_tph {
 
 enum hinic_func_type {
 	HINIC_PF        = 0,
+	HINIC_VF	    = 1,
 	HINIC_PPF       = 2,
 };
 
@@ -223,6 +233,8 @@ struct hinic_func_attr {
 	u8                      num_ceqs;
 
 	u8                      num_dma_attr;
+
+	u16						global_vf_id_of_pf;
 };
 
 struct hinic_hwif {
@@ -271,6 +283,12 @@ enum hinic_db_state hinic_db_state_get(struct hinic_hwif *hwif);
 void hinic_db_state_set(struct hinic_hwif *hwif,
 			enum hinic_db_state db_state);
 
+u16 hinic_glb_pf_vf_offset(struct hinic_hwif *hwif);
+
+u16 hinic_global_func_id_hw(struct hinic_hwif *hwif);
+
+u16 hinic_pf_id_of_vf_hw(struct hinic_hwif *hwif);
+
 int hinic_init_hwif(struct hinic_hwif *hwif, struct pci_dev *pdev);
 
 void hinic_free_hwif(struct hinic_hwif *hwif);
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_mbox.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_mbox.c
new file mode 100644
index 000000000000..f8626dfd192e
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_mbox.c
@@ -0,0 +1,1213 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Huawei HiNIC PCI Express Linux driver
+ * Copyright(c) 2017 Huawei Technologies Co., Ltd
+ */
+#include <linux/pci.h>
+#include <linux/delay.h>
+#include <linux/types.h>
+#include <linux/completion.h>
+#include <linux/semaphore.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+
+#include "hinic_hw_if.h"
+#include "hinic_hw_mgmt.h"
+#include "hinic_hw_csr.h"
+#include "hinic_hw_dev.h"
+#include "hinic_hw_mbox.h"
+
+#define HINIC_MBOX_INT_DST_FUNC_SHIFT				0
+#define HINIC_MBOX_INT_DST_AEQN_SHIFT				10
+#define HINIC_MBOX_INT_SRC_RESP_AEQN_SHIFT			12
+#define HINIC_MBOX_INT_STAT_DMA_SHIFT				14
+/* The size of data to be sended (unit of 4 bytes) */
+#define HINIC_MBOX_INT_TX_SIZE_SHIFT				20
+/* SO_RO(strong order, relax order) */
+#define HINIC_MBOX_INT_STAT_DMA_SO_RO_SHIFT			25
+#define HINIC_MBOX_INT_WB_EN_SHIFT				28
+
+#define HINIC_MBOX_INT_DST_FUNC_MASK				0x3FF
+#define HINIC_MBOX_INT_DST_AEQN_MASK				0x3
+#define HINIC_MBOX_INT_SRC_RESP_AEQN_MASK			0x3
+#define HINIC_MBOX_INT_STAT_DMA_MASK				0x3F
+#define HINIC_MBOX_INT_TX_SIZE_MASK				0x1F
+#define HINIC_MBOX_INT_STAT_DMA_SO_RO_MASK			0x3
+#define HINIC_MBOX_INT_WB_EN_MASK				0x1
+
+#define HINIC_MBOX_INT_SET(val, field)	\
+			(((val) & HINIC_MBOX_INT_##field##_MASK) << \
+			HINIC_MBOX_INT_##field##_SHIFT)
+
+enum hinic_mbox_tx_status {
+	TX_NOT_DONE = 1,
+};
+
+#define HINIC_MBOX_CTRL_TRIGGER_AEQE_SHIFT			0
+
+/* specifies the issue request for the message data.
+ * 0 - Tx request is done;
+ * 1 - Tx request is in process.
+ */
+#define HINIC_MBOX_CTRL_TX_STATUS_SHIFT				1
+
+#define HINIC_MBOX_CTRL_TRIGGER_AEQE_MASK			0x1
+#define HINIC_MBOX_CTRL_TX_STATUS_MASK				0x1
+
+#define HINIC_MBOX_CTRL_SET(val, field)	\
+			(((val) & HINIC_MBOX_CTRL_##field##_MASK) << \
+			HINIC_MBOX_CTRL_##field##_SHIFT)
+
+#define HINIC_MBOX_HEADER_MSG_LEN_SHIFT				0
+#define HINIC_MBOX_HEADER_MODULE_SHIFT				11
+#define HINIC_MBOX_HEADER_SEG_LEN_SHIFT				16
+#define HINIC_MBOX_HEADER_NO_ACK_SHIFT				22
+#define HINIC_MBOX_HEADER_SEQID_SHIFT				24
+#define HINIC_MBOX_HEADER_LAST_SHIFT				30
+
+/* specifies the mailbox message direction
+ * 0 - send
+ * 1 - receive
+ */
+#define HINIC_MBOX_HEADER_DIRECTION_SHIFT			31
+#define HINIC_MBOX_HEADER_CMD_SHIFT				32
+#define HINIC_MBOX_HEADER_MSG_ID_SHIFT				40
+#define HINIC_MBOX_HEADER_STATUS_SHIFT				48
+#define HINIC_MBOX_HEADER_SRC_GLB_FUNC_IDX_SHIFT		54
+
+#define HINIC_MBOX_HEADER_MSG_LEN_MASK				0x7FF
+#define HINIC_MBOX_HEADER_MODULE_MASK				0x1F
+#define HINIC_MBOX_HEADER_SEG_LEN_MASK				0x3F
+#define HINIC_MBOX_HEADER_NO_ACK_MASK				0x1
+#define HINIC_MBOX_HEADER_SEQID_MASK				0x3F
+#define HINIC_MBOX_HEADER_LAST_MASK				0x1
+#define HINIC_MBOX_HEADER_DIRECTION_MASK			0x1
+#define HINIC_MBOX_HEADER_CMD_MASK				0xFF
+#define HINIC_MBOX_HEADER_MSG_ID_MASK				0xFF
+#define HINIC_MBOX_HEADER_STATUS_MASK				0x3F
+#define HINIC_MBOX_HEADER_SRC_GLB_FUNC_IDX_MASK			0x3FF
+
+#define HINIC_MBOX_HEADER_GET(val, field)	\
+			(((val) >> HINIC_MBOX_HEADER_##field##_SHIFT) & \
+			HINIC_MBOX_HEADER_##field##_MASK)
+#define HINIC_MBOX_HEADER_SET(val, field)	\
+			((u64)((val) & HINIC_MBOX_HEADER_##field##_MASK) << \
+			HINIC_MBOX_HEADER_##field##_SHIFT)
+
+#define MBOX_SEGLEN_MASK			\
+		HINIC_MBOX_HEADER_SET(HINIC_MBOX_HEADER_SEG_LEN_MASK, SEG_LEN)
+
+#define HINIC_MBOX_SEG_LEN			48
+#define HINIC_MBOX_COMP_TIME			8000U
+#define MBOX_MSG_POLLING_TIMEOUT		8000
+
+#define HINIC_MBOX_DATA_SIZE			2040
+
+#define MBOX_MAX_BUF_SZ				2048UL
+#define MBOX_HEADER_SZ				8
+
+#define MBOX_INFO_SZ				4
+
+/* MBOX size is 64B, 8B for mbox_header, 4B reserved */
+#define MBOX_SEG_LEN				48
+#define MBOX_SEG_LEN_ALIGN			4
+#define MBOX_WB_STATUS_LEN			16UL
+
+/* mbox write back status is 16B, only first 4B is used */
+#define MBOX_WB_STATUS_ERRCODE_MASK		0xFFFF
+#define MBOX_WB_STATUS_MASK			0xFF
+#define MBOX_WB_ERROR_CODE_MASK			0xFF00
+#define MBOX_WB_STATUS_FINISHED_SUCCESS		0xFF
+#define MBOX_WB_STATUS_FINISHED_WITH_ERR	0xFE
+#define MBOX_WB_STATUS_NOT_FINISHED		0x00
+
+#define MBOX_STATUS_FINISHED(wb)	\
+	(((wb) & MBOX_WB_STATUS_MASK) != MBOX_WB_STATUS_NOT_FINISHED)
+#define MBOX_STATUS_SUCCESS(wb)		\
+	(((wb) & MBOX_WB_STATUS_MASK) == MBOX_WB_STATUS_FINISHED_SUCCESS)
+#define MBOX_STATUS_ERRCODE(wb)		\
+	((wb) & MBOX_WB_ERROR_CODE_MASK)
+
+#define SEQ_ID_START_VAL			0
+#define SEQ_ID_MAX_VAL				42
+
+#define DST_AEQ_IDX_DEFAULT_VAL			0
+#define SRC_AEQ_IDX_DEFAULT_VAL			0
+#define NO_DMA_ATTRIBUTE_VAL			0
+
+#define HINIC_MGMT_RSP_AEQN			0
+#define HINIC_MBOX_RSP_AEQN			2
+#define HINIC_MBOX_RECV_AEQN			0
+
+#define MBOX_MSG_NO_DATA_LEN			1
+
+#define MBOX_BODY_FROM_HDR(header)	((u8 *)(header) + MBOX_HEADER_SZ)
+#define MBOX_AREA(hwif)			\
+	((hwif)->cfg_regs_bar + HINIC_FUNC_CSR_MAILBOX_DATA_OFF)
+
+#define IS_PF_OR_PPF_SRC(src_func_idx)	((src_func_idx) < HINIC_MAX_PF_FUNCS)
+
+#define MBOX_RESPONSE_ERROR		0x1
+#define MBOX_MSG_ID_MASK		0xFF
+#define MBOX_MSG_ID(func_to_func)	((func_to_func)->send_msg_id)
+#define MBOX_MSG_ID_INC(func_to_func_mbox) (MBOX_MSG_ID(func_to_func_mbox) = \
+			(MBOX_MSG_ID(func_to_func_mbox) + 1) & MBOX_MSG_ID_MASK)
+
+#define FUNC_ID_OFF_SET_8B		8
+#define FUNC_ID_OFF_SET_10B		10
+
+/* max message counter wait to process for one function */
+#define HINIC_MAX_MSG_CNT_TO_PROCESS	10
+
+#define HINIC_QUEUE_MIN_DEPTH		6
+#define HINIC_QUEUE_MAX_DEPTH		12
+#define HINIC_MAX_RX_BUFFER_SIZE		15
+
+enum hinic_hwif_direction_type {
+	HINIC_HWIF_DIRECT_SEND	= 0,
+	HINIC_HWIF_RESPONSE	= 1,
+};
+
+enum mbox_send_mod {
+	MBOX_SEND_MSG_INT,
+};
+
+enum mbox_seg_type {
+	NOT_LAST_SEG,
+	LAST_SEG,
+};
+
+enum mbox_ordering_type {
+	STRONG_ORDER,
+};
+
+enum mbox_write_back_type {
+	WRITE_BACK = 1,
+};
+
+enum mbox_aeq_trig_type {
+	NOT_TRIGGER,
+	TRIGGER,
+};
+
+/**
+ * hinic_register_pf_mbox_cb - register mbox callback for pf
+ * @hwdev: the pointer to hw device
+ * @mod:	specific mod that the callback will handle
+ * @callback:	callback function
+ * Return: 0 - success, negative - failure
+ */
+int hinic_register_pf_mbox_cb(struct hinic_hwdev *hwdev,
+			      enum hinic_mod_type mod,
+			      hinic_pf_mbox_cb callback)
+{
+	struct hinic_mbox_func_to_func *func_to_func = hwdev->func_to_func;
+
+	if (mod >= HINIC_MOD_MAX)
+		return -EFAULT;
+
+	func_to_func->pf_mbox_cb[mod] = callback;
+
+	set_bit(HINIC_PF_MBOX_CB_REG, &func_to_func->pf_mbox_cb_state[mod]);
+
+	return 0;
+}
+
+/**
+ * hinic_register_vf_mbox_cb - register mbox callback for vf
+ * @hwdev: the pointer to hw device
+ * @mod:	specific mod that the callback will handle
+ * @callback:	callback function
+ * Return: 0 - success, negative - failure
+ */
+int hinic_register_vf_mbox_cb(struct hinic_hwdev *hwdev,
+			      enum hinic_mod_type mod,
+			      hinic_vf_mbox_cb callback)
+{
+	struct hinic_mbox_func_to_func *func_to_func = hwdev->func_to_func;
+
+	if (mod >= HINIC_MOD_MAX)
+		return -EFAULT;
+
+	func_to_func->vf_mbox_cb[mod] = callback;
+
+	set_bit(HINIC_VF_MBOX_CB_REG, &func_to_func->vf_mbox_cb_state[mod]);
+
+	return 0;
+}
+
+/**
+ * hinic_unregister_pf_mbox_cb - unregister the mbox callback for pf
+ * @hwdev:	the pointer to hw device
+ * @mod:	specific mod that the callback will handle
+ */
+void hinic_unregister_pf_mbox_cb(struct hinic_hwdev *hwdev,
+				 enum hinic_mod_type mod)
+{
+	struct hinic_mbox_func_to_func *func_to_func = hwdev->func_to_func;
+
+	clear_bit(HINIC_PF_MBOX_CB_REG, &func_to_func->pf_mbox_cb_state[mod]);
+
+	while (test_bit(HINIC_PF_MBOX_CB_RUNNING,
+			&func_to_func->pf_mbox_cb_state[mod]))
+		usleep_range(900, 1000);
+
+	func_to_func->pf_mbox_cb[mod] = NULL;
+}
+
+/**
+ * hinic_unregister_vf_mbox_cb - unregister the mbox callback for vf
+ * @hwdev:	the pointer to hw device
+ * @mod:	specific mod that the callback will handle
+ */
+void hinic_unregister_vf_mbox_cb(struct hinic_hwdev *hwdev,
+				 enum hinic_mod_type mod)
+{
+	struct hinic_mbox_func_to_func *func_to_func = hwdev->func_to_func;
+
+	clear_bit(HINIC_VF_MBOX_CB_REG, &func_to_func->vf_mbox_cb_state[mod]);
+
+	while (test_bit(HINIC_VF_MBOX_CB_RUNNING,
+			&func_to_func->vf_mbox_cb_state[mod]))
+		usleep_range(900, 1000);
+
+	func_to_func->vf_mbox_cb[mod] = NULL;
+}
+
+static int recv_vf_mbox_handler(struct hinic_mbox_func_to_func *func_to_func,
+				struct hinic_recv_mbox *recv_mbox,
+				void *buf_out, u16 *out_size)
+{
+	hinic_vf_mbox_cb cb;
+	int ret = 0;
+
+	if (recv_mbox->mod >= HINIC_MOD_MAX) {
+		dev_err(&func_to_func->hwif->pdev->dev, "Receive illegal mbox message, mod = %d\n",
+			recv_mbox->mod);
+		return -EINVAL;
+	}
+
+	set_bit(HINIC_VF_MBOX_CB_RUNNING,
+		&func_to_func->vf_mbox_cb_state[recv_mbox->mod]);
+
+	cb = func_to_func->vf_mbox_cb[recv_mbox->mod];
+	if (cb && test_bit(HINIC_VF_MBOX_CB_REG,
+			   &func_to_func->vf_mbox_cb_state[recv_mbox->mod])) {
+		cb(func_to_func->hwdev, recv_mbox->cmd, recv_mbox->mbox,
+		   recv_mbox->mbox_len, buf_out, out_size);
+	} else {
+		dev_err(&func_to_func->hwif->pdev->dev, "VF mbox cb is not registered\n");
+		ret = -EINVAL;
+	}
+
+	clear_bit(HINIC_VF_MBOX_CB_RUNNING,
+		  &func_to_func->vf_mbox_cb_state[recv_mbox->mod]);
+
+	return ret;
+}
+
+static int
+recv_pf_from_vf_mbox_handler(struct hinic_mbox_func_to_func *func_to_func,
+			     struct hinic_recv_mbox *recv_mbox,
+			     u16 src_func_idx, void *buf_out,
+			     u16 *out_size)
+{
+	hinic_pf_mbox_cb cb;
+	u16 vf_id = 0;
+	int ret;
+
+	if (recv_mbox->mod >= HINIC_MOD_MAX) {
+		dev_err(&func_to_func->hwif->pdev->dev, "Receive illegal mbox message, mod = %d\n",
+			recv_mbox->mod);
+		return -EINVAL;
+	}
+
+	set_bit(HINIC_PF_MBOX_CB_RUNNING,
+		&func_to_func->pf_mbox_cb_state[recv_mbox->mod]);
+
+	cb = func_to_func->pf_mbox_cb[recv_mbox->mod];
+	if (cb && test_bit(HINIC_PF_MBOX_CB_REG,
+			   &func_to_func->pf_mbox_cb_state[recv_mbox->mod])) {
+		vf_id = src_func_idx -
+			hinic_glb_pf_vf_offset(func_to_func->hwif);
+		ret = cb(func_to_func->hwdev, vf_id, recv_mbox->cmd,
+			 recv_mbox->mbox, recv_mbox->mbox_len,
+			 buf_out, out_size);
+	} else {
+		dev_err(&func_to_func->hwif->pdev->dev, "PF mbox mod(0x%x) cb is not registered\n",
+			recv_mbox->mod);
+		ret = -EINVAL;
+	}
+
+	clear_bit(HINIC_PF_MBOX_CB_RUNNING,
+		  &func_to_func->pf_mbox_cb_state[recv_mbox->mod]);
+
+	return ret;
+}
+
+static bool check_mbox_seq_id_and_seg_len(struct hinic_recv_mbox *recv_mbox,
+					  u8 seq_id, u8 seg_len)
+{
+	if (seq_id > SEQ_ID_MAX_VAL || seg_len > MBOX_SEG_LEN)
+		return false;
+
+	if (seq_id == 0) {
+		recv_mbox->seq_id = seq_id;
+	} else {
+		if (seq_id != recv_mbox->seq_id + 1)
+			return false;
+
+		recv_mbox->seq_id = seq_id;
+	}
+
+	return true;
+}
+
+static void resp_mbox_handler(struct hinic_mbox_func_to_func *func_to_func,
+			      struct hinic_recv_mbox *recv_mbox)
+{
+	spin_lock(&func_to_func->mbox_lock);
+	if (recv_mbox->msg_info.msg_id == func_to_func->send_msg_id &&
+	    func_to_func->event_flag == EVENT_START)
+		complete(&recv_mbox->recv_done);
+	else
+		dev_err(&func_to_func->hwif->pdev->dev,
+			"Mbox response timeout, current send msg id(0x%x), recv msg id(0x%x), status(0x%x)\n",
+			func_to_func->send_msg_id, recv_mbox->msg_info.msg_id,
+			recv_mbox->msg_info.status);
+	spin_unlock(&func_to_func->mbox_lock);
+}
+
+static void recv_func_mbox_handler(struct hinic_mbox_func_to_func *func_to_func,
+				   struct hinic_recv_mbox *recv_mbox,
+				   u16 src_func_idx);
+
+static void recv_func_mbox_work_handler(struct work_struct *work)
+{
+	struct hinic_mbox_work *mbox_work =
+			container_of(work, struct hinic_mbox_work, work);
+	struct hinic_recv_mbox *recv_mbox;
+
+	recv_func_mbox_handler(mbox_work->func_to_func, mbox_work->recv_mbox,
+			       mbox_work->src_func_idx);
+
+	recv_mbox =
+		&mbox_work->func_to_func->mbox_send[mbox_work->src_func_idx];
+
+	atomic_dec(&recv_mbox->msg_cnt);
+
+	kfree(mbox_work);
+}
+
+static void recv_mbox_handler(struct hinic_mbox_func_to_func *func_to_func,
+			      void *header, struct hinic_recv_mbox *recv_mbox)
+{
+	void *mbox_body = MBOX_BODY_FROM_HDR(header);
+	struct hinic_recv_mbox *rcv_mbox_temp = NULL;
+	u64 mbox_header = *((u64 *)header);
+	struct hinic_mbox_work *mbox_work;
+	u8 seq_id, seg_len;
+	u16 src_func_idx;
+	int pos;
+
+	seq_id = HINIC_MBOX_HEADER_GET(mbox_header, SEQID);
+	seg_len = HINIC_MBOX_HEADER_GET(mbox_header, SEG_LEN);
+	src_func_idx = HINIC_MBOX_HEADER_GET(mbox_header, SRC_GLB_FUNC_IDX);
+
+	if (!check_mbox_seq_id_and_seg_len(recv_mbox, seq_id, seg_len)) {
+		dev_err(&func_to_func->hwif->pdev->dev,
+			"Mailbox sequence and segment check fail, src func id: 0x%x, front id: 0x%x, current id: 0x%x, seg len: 0x%x\n",
+			src_func_idx, recv_mbox->seq_id, seq_id, seg_len);
+		recv_mbox->seq_id = SEQ_ID_MAX_VAL;
+		return;
+	}
+
+	pos = seq_id * MBOX_SEG_LEN;
+	memcpy((u8 *)recv_mbox->mbox + pos, mbox_body,
+	       HINIC_MBOX_HEADER_GET(mbox_header, SEG_LEN));
+
+	if (!HINIC_MBOX_HEADER_GET(mbox_header, LAST))
+		return;
+
+	recv_mbox->cmd = HINIC_MBOX_HEADER_GET(mbox_header, CMD);
+	recv_mbox->mod = HINIC_MBOX_HEADER_GET(mbox_header, MODULE);
+	recv_mbox->mbox_len = HINIC_MBOX_HEADER_GET(mbox_header, MSG_LEN);
+	recv_mbox->ack_type = HINIC_MBOX_HEADER_GET(mbox_header, NO_ACK);
+	recv_mbox->msg_info.msg_id = HINIC_MBOX_HEADER_GET(mbox_header, MSG_ID);
+	recv_mbox->msg_info.status = HINIC_MBOX_HEADER_GET(mbox_header, STATUS);
+	recv_mbox->seq_id = SEQ_ID_MAX_VAL;
+
+	if (HINIC_MBOX_HEADER_GET(mbox_header, DIRECTION) ==
+	    HINIC_HWIF_RESPONSE) {
+		resp_mbox_handler(func_to_func, recv_mbox);
+		return;
+	}
+
+	if (atomic_read(&recv_mbox->msg_cnt) > HINIC_MAX_MSG_CNT_TO_PROCESS) {
+		dev_warn(&func_to_func->hwif->pdev->dev,
+			 "This function(%u) have %d message wait to process,can't add to work queue\n",
+			 src_func_idx, atomic_read(&recv_mbox->msg_cnt));
+		return;
+	}
+
+	rcv_mbox_temp = kzalloc(sizeof(*rcv_mbox_temp), GFP_KERNEL);
+	if (!rcv_mbox_temp)
+		return;
+
+	memcpy(rcv_mbox_temp, recv_mbox, sizeof(*rcv_mbox_temp));
+
+	rcv_mbox_temp->mbox = kzalloc(MBOX_MAX_BUF_SZ, GFP_KERNEL);
+	if (!rcv_mbox_temp->mbox)
+		goto err_alloc_rcv_mbox_msg;
+
+	memcpy(rcv_mbox_temp->mbox, recv_mbox->mbox, MBOX_MAX_BUF_SZ);
+
+	rcv_mbox_temp->buf_out = kzalloc(MBOX_MAX_BUF_SZ, GFP_KERNEL);
+	if (!rcv_mbox_temp->buf_out)
+		goto err_alloc_rcv_mbox_buf;
+
+	mbox_work = kzalloc(sizeof(*mbox_work), GFP_KERNEL);
+	if (!mbox_work)
+		goto err_alloc_mbox_work;
+
+	mbox_work->func_to_func = func_to_func;
+	mbox_work->recv_mbox = rcv_mbox_temp;
+	mbox_work->src_func_idx = src_func_idx;
+
+	atomic_inc(&recv_mbox->msg_cnt);
+	INIT_WORK(&mbox_work->work, recv_func_mbox_work_handler);
+	queue_work(func_to_func->workq, &mbox_work->work);
+
+	return;
+
+err_alloc_mbox_work:
+	kfree(rcv_mbox_temp->buf_out);
+
+err_alloc_rcv_mbox_buf:
+	kfree(rcv_mbox_temp->mbox);
+
+err_alloc_rcv_mbox_msg:
+	kfree(rcv_mbox_temp);
+}
+
+void hinic_mbox_func_aeqe_handler(void *handle, void *header, u8 size)
+{
+	struct hinic_mbox_func_to_func *func_to_func;
+	u64 mbox_header = *((u64 *)header);
+	struct hinic_recv_mbox *recv_mbox;
+	u64 src, dir;
+
+	func_to_func = ((struct hinic_hwdev *)handle)->func_to_func;
+
+	dir = HINIC_MBOX_HEADER_GET(mbox_header, DIRECTION);
+	src = HINIC_MBOX_HEADER_GET(mbox_header, SRC_GLB_FUNC_IDX);
+
+	if (src >= HINIC_MAX_FUNCTIONS) {
+		dev_err(&func_to_func->hwif->pdev->dev,
+			"Mailbox source function id:%u is invalid\n", (u32)src);
+		return;
+	}
+
+	recv_mbox = (dir == HINIC_HWIF_DIRECT_SEND) ?
+		    &func_to_func->mbox_send[src] :
+		    &func_to_func->mbox_resp[src];
+
+	recv_mbox_handler(func_to_func, (u64 *)header, recv_mbox);
+}
+
+void hinic_mbox_self_aeqe_handler(void *handle, void *header, u8 size)
+{
+	struct hinic_mbox_func_to_func *func_to_func;
+	struct hinic_send_mbox *send_mbox;
+
+	func_to_func = ((struct hinic_hwdev *)handle)->func_to_func;
+	send_mbox = &func_to_func->send_mbox;
+
+	complete(&send_mbox->send_done);
+}
+
+static void clear_mbox_status(struct hinic_send_mbox *mbox)
+{
+	*mbox->wb_status = 0;
+
+	/* clear mailbox write back status */
+	wmb();
+}
+
+static void mbox_copy_header(struct hinic_hwdev *hwdev,
+			     struct hinic_send_mbox *mbox, u64 *header)
+{
+	u32 i, idx_max = MBOX_HEADER_SZ / sizeof(u32);
+	u32 *data = (u32 *)header;
+
+	for (i = 0; i < idx_max; i++)
+		__raw_writel(*(data + i), mbox->data + i * sizeof(u32));
+}
+
+static void mbox_copy_send_data(struct hinic_hwdev *hwdev,
+				struct hinic_send_mbox *mbox, void *seg,
+				u16 seg_len)
+{
+	u8 mbox_max_buf[MBOX_SEG_LEN] = {0};
+	u32 data_len, chk_sz = sizeof(u32);
+	u32 *data = seg;
+	u32 i, idx_max;
+
+	/* The mbox message should be aligned in 4 bytes. */
+	if (seg_len % chk_sz) {
+		memcpy(mbox_max_buf, seg, seg_len);
+		data = (u32 *)mbox_max_buf;
+	}
+
+	data_len = seg_len;
+	idx_max = ALIGN(data_len, chk_sz) / chk_sz;
+
+	for (i = 0; i < idx_max; i++)
+		__raw_writel(*(data + i),
+			     mbox->data + MBOX_HEADER_SZ + i * sizeof(u32));
+}
+
+static void write_mbox_msg_attr(struct hinic_mbox_func_to_func *func_to_func,
+				u16 dst_func, u16 dst_aeqn, u16 seg_len,
+				int poll)
+{
+	u16 rsp_aeq = (dst_aeqn == 0) ? 0 : HINIC_MBOX_RSP_AEQN;
+	u32 mbox_int, mbox_ctrl;
+
+	mbox_int = HINIC_MBOX_INT_SET(dst_func, DST_FUNC) |
+		   HINIC_MBOX_INT_SET(dst_aeqn, DST_AEQN) |
+		   HINIC_MBOX_INT_SET(rsp_aeq, SRC_RESP_AEQN) |
+		   HINIC_MBOX_INT_SET(NO_DMA_ATTRIBUTE_VAL, STAT_DMA) |
+		   HINIC_MBOX_INT_SET(ALIGN(MBOX_SEG_LEN + MBOX_HEADER_SZ +
+				      MBOX_INFO_SZ, MBOX_SEG_LEN_ALIGN) >> 2,
+				      TX_SIZE) |
+		   HINIC_MBOX_INT_SET(STRONG_ORDER, STAT_DMA_SO_RO) |
+		   HINIC_MBOX_INT_SET(WRITE_BACK, WB_EN);
+
+	hinic_hwif_write_reg(func_to_func->hwif,
+			     HINIC_FUNC_CSR_MAILBOX_INT_OFFSET_OFF, mbox_int);
+
+	wmb(); /* writing the mbox int attributes */
+	mbox_ctrl = HINIC_MBOX_CTRL_SET(TX_NOT_DONE, TX_STATUS);
+
+	if (poll)
+		mbox_ctrl |= HINIC_MBOX_CTRL_SET(NOT_TRIGGER, TRIGGER_AEQE);
+	else
+		mbox_ctrl |= HINIC_MBOX_CTRL_SET(TRIGGER, TRIGGER_AEQE);
+
+	hinic_hwif_write_reg(func_to_func->hwif,
+			     HINIC_FUNC_CSR_MAILBOX_CONTROL_OFF, mbox_ctrl);
+}
+
+void dump_mox_reg(struct hinic_hwdev *hwdev)
+{
+	u32 val;
+
+	val = hinic_hwif_read_reg(hwdev->hwif,
+				  HINIC_FUNC_CSR_MAILBOX_CONTROL_OFF);
+	dev_err(&hwdev->hwif->pdev->dev, "Mailbox control reg: 0x%x\n", val);
+
+	val = hinic_hwif_read_reg(hwdev->hwif,
+				  HINIC_FUNC_CSR_MAILBOX_INT_OFFSET_OFF);
+	dev_err(&hwdev->hwif->pdev->dev, "Mailbox interrupt offset: 0x%x\n",
+		val);
+}
+
+static u16 get_mbox_status(struct hinic_send_mbox *mbox)
+{
+	/* write back is 16B, but only use first 4B */
+	u64 wb_val = be64_to_cpu(*mbox->wb_status);
+
+	rmb(); /* verify reading before check */
+
+	return (u16)(wb_val & MBOX_WB_STATUS_ERRCODE_MASK);
+}
+
+static int
+wait_for_mbox_seg_completion(struct hinic_mbox_func_to_func *func_to_func,
+			     int poll, u16 *wb_status)
+{
+	struct hinic_send_mbox *send_mbox = &func_to_func->send_mbox;
+	struct hinic_hwdev *hwdev = func_to_func->hwdev;
+	struct completion *done = &send_mbox->send_done;
+	u32 cnt = 0;
+	ulong jif;
+
+	if (poll) {
+		while (cnt < MBOX_MSG_POLLING_TIMEOUT) {
+			*wb_status = get_mbox_status(send_mbox);
+			if (MBOX_STATUS_FINISHED(*wb_status))
+				break;
+
+			usleep_range(900, 1000);
+			cnt++;
+		}
+
+		if (cnt == MBOX_MSG_POLLING_TIMEOUT) {
+			dev_err(&hwdev->hwif->pdev->dev, "Send mailbox segment timeout, wb status: 0x%x\n",
+				*wb_status);
+			dump_mox_reg(hwdev);
+			return -ETIMEDOUT;
+		}
+	} else {
+		jif = msecs_to_jiffies(HINIC_MBOX_COMP_TIME);
+		if (!wait_for_completion_timeout(done, jif)) {
+			dev_err(&hwdev->hwif->pdev->dev, "Send mailbox segment timeout\n");
+			dump_mox_reg(hwdev);
+			return -ETIMEDOUT;
+		}
+
+		*wb_status = get_mbox_status(send_mbox);
+	}
+
+	return 0;
+}
+
+static int send_mbox_seg(struct hinic_mbox_func_to_func *func_to_func,
+			 u64 header, u16 dst_func, void *seg, u16 seg_len,
+			 int poll, void *msg_info)
+{
+	struct hinic_send_mbox *send_mbox = &func_to_func->send_mbox;
+	u16 seq_dir = HINIC_MBOX_HEADER_GET(header, DIRECTION);
+	struct hinic_hwdev *hwdev = func_to_func->hwdev;
+	struct completion *done = &send_mbox->send_done;
+	u8 num_aeqs = hwdev->hwif->attr.num_aeqs;
+	u16 dst_aeqn, wb_status = 0, errcode;
+
+	if (num_aeqs >= 4)
+		dst_aeqn = (seq_dir == HINIC_HWIF_DIRECT_SEND) ?
+			   HINIC_MBOX_RECV_AEQN : HINIC_MBOX_RSP_AEQN;
+	else
+		dst_aeqn = 0;
+
+	if (!poll)
+		init_completion(done);
+
+	clear_mbox_status(send_mbox);
+
+	mbox_copy_header(hwdev, send_mbox, &header);
+
+	mbox_copy_send_data(hwdev, send_mbox, seg, seg_len);
+
+	write_mbox_msg_attr(func_to_func, dst_func, dst_aeqn, seg_len, poll);
+
+	wmb(); /* writing the mbox msg attributes */
+
+	if (wait_for_mbox_seg_completion(func_to_func, poll, &wb_status))
+		return -ETIMEDOUT;
+
+	if (!MBOX_STATUS_SUCCESS(wb_status)) {
+		dev_err(&hwdev->hwif->pdev->dev, "Send mailbox segment to function %d error, wb status: 0x%x\n",
+			dst_func, wb_status);
+		errcode = MBOX_STATUS_ERRCODE(wb_status);
+		return errcode ? errcode : -EFAULT;
+	}
+
+	return 0;
+}
+
+static int send_mbox_to_func(struct hinic_mbox_func_to_func *func_to_func,
+			     enum hinic_mod_type mod, u16 cmd, void *msg,
+			     u16 msg_len, u16 dst_func,
+			     enum hinic_hwif_direction_type direction,
+			     enum hinic_mbox_ack_type ack_type,
+			     struct mbox_msg_info *msg_info)
+{
+	struct hinic_hwdev *hwdev = func_to_func->hwdev;
+	u16 seg_len = MBOX_SEG_LEN;
+	u8 *msg_seg = (u8 *)msg;
+	u16 left = msg_len;
+	u32 seq_id = 0;
+	u64 header = 0;
+	int err = 0;
+
+	down(&func_to_func->msg_send_sem);
+
+	header = HINIC_MBOX_HEADER_SET(msg_len, MSG_LEN) |
+		 HINIC_MBOX_HEADER_SET(mod, MODULE) |
+		 HINIC_MBOX_HEADER_SET(seg_len, SEG_LEN) |
+		 HINIC_MBOX_HEADER_SET(ack_type, NO_ACK) |
+		 HINIC_MBOX_HEADER_SET(SEQ_ID_START_VAL, SEQID) |
+		 HINIC_MBOX_HEADER_SET(NOT_LAST_SEG, LAST) |
+		 HINIC_MBOX_HEADER_SET(direction, DIRECTION) |
+		 HINIC_MBOX_HEADER_SET(cmd, CMD) |
+		 /* The vf's offset to it's associated pf */
+		 HINIC_MBOX_HEADER_SET(msg_info->msg_id, MSG_ID) |
+		 HINIC_MBOX_HEADER_SET(msg_info->status, STATUS) |
+		 HINIC_MBOX_HEADER_SET(hinic_global_func_id_hw(hwdev->hwif),
+				       SRC_GLB_FUNC_IDX);
+
+	while (!(HINIC_MBOX_HEADER_GET(header, LAST))) {
+		if (left <= HINIC_MBOX_SEG_LEN) {
+			header &= ~MBOX_SEGLEN_MASK;
+			header |= HINIC_MBOX_HEADER_SET(left, SEG_LEN);
+			header |= HINIC_MBOX_HEADER_SET(LAST_SEG, LAST);
+
+			seg_len = left;
+		}
+
+		err = send_mbox_seg(func_to_func, header, dst_func, msg_seg,
+				    seg_len, MBOX_SEND_MSG_INT, msg_info);
+		if (err) {
+			dev_err(&hwdev->hwif->pdev->dev, "Failed to send mbox seg, seq_id=0x%llx\n",
+				HINIC_MBOX_HEADER_GET(header, SEQID));
+			goto err_send_mbox_seg;
+		}
+
+		left -= HINIC_MBOX_SEG_LEN;
+		msg_seg += HINIC_MBOX_SEG_LEN;
+
+		seq_id++;
+		header &= ~(HINIC_MBOX_HEADER_SET(HINIC_MBOX_HEADER_SEQID_MASK,
+						  SEQID));
+		header |= HINIC_MBOX_HEADER_SET(seq_id, SEQID);
+	}
+
+err_send_mbox_seg:
+	up(&func_to_func->msg_send_sem);
+
+	return err;
+}
+
+static void
+response_for_recv_func_mbox(struct hinic_mbox_func_to_func *func_to_func,
+			    struct hinic_recv_mbox *recv_mbox, int err,
+			    u16 out_size, u16 src_func_idx)
+{
+	struct mbox_msg_info msg_info = {0};
+
+	if (recv_mbox->ack_type == MBOX_ACK) {
+		msg_info.msg_id = recv_mbox->msg_info.msg_id;
+		if (err == HINIC_MBOX_PF_BUSY_ACTIVE_FW)
+			msg_info.status = HINIC_MBOX_PF_BUSY_ACTIVE_FW;
+		else if (err == HINIC_MBOX_VF_CMD_ERROR)
+			msg_info.status = HINIC_MBOX_VF_CMD_ERROR;
+		else if (err)
+			msg_info.status = HINIC_MBOX_PF_SEND_ERR;
+
+		/* if no data needs to response, set out_size to 1 */
+		if (!out_size || err)
+			out_size = MBOX_MSG_NO_DATA_LEN;
+
+		send_mbox_to_func(func_to_func, recv_mbox->mod, recv_mbox->cmd,
+				  recv_mbox->buf_out, out_size, src_func_idx,
+				  HINIC_HWIF_RESPONSE, MBOX_ACK,
+				  &msg_info);
+	}
+}
+
+static void recv_func_mbox_handler(struct hinic_mbox_func_to_func *func_to_func,
+				   struct hinic_recv_mbox *recv_mbox,
+				   u16 src_func_idx)
+{
+	void *buf_out = recv_mbox->buf_out;
+	u16 out_size = MBOX_MAX_BUF_SZ;
+	int err = 0;
+
+	if (HINIC_IS_VF(func_to_func->hwif)) {
+		err = recv_vf_mbox_handler(func_to_func, recv_mbox, buf_out,
+					   &out_size);
+	} else {
+		if (IS_PF_OR_PPF_SRC(src_func_idx))
+			dev_warn(&func_to_func->hwif->pdev->dev,
+				 "Unsupported pf2pf mbox msg\n");
+		else
+			err = recv_pf_from_vf_mbox_handler(func_to_func,
+							   recv_mbox,
+							   src_func_idx,
+							   buf_out, &out_size);
+	}
+
+	response_for_recv_func_mbox(func_to_func, recv_mbox, err, out_size,
+				    src_func_idx);
+	kfree(recv_mbox->buf_out);
+	kfree(recv_mbox->mbox);
+	kfree(recv_mbox);
+}
+
+static void set_mbox_to_func_event(struct hinic_mbox_func_to_func *func_to_func,
+				   enum mbox_event_state event_flag)
+{
+	spin_lock(&func_to_func->mbox_lock);
+	func_to_func->event_flag = event_flag;
+	spin_unlock(&func_to_func->mbox_lock);
+}
+
+static int mbox_resp_info_handler(struct hinic_mbox_func_to_func *func_to_func,
+				  struct hinic_recv_mbox *mbox_for_resp,
+				  enum hinic_mod_type mod, u16 cmd,
+				  void *buf_out, u16 *out_size)
+{
+	int err;
+
+	if (mbox_for_resp->msg_info.status) {
+		err = mbox_for_resp->msg_info.status;
+		if (err != HINIC_MBOX_PF_BUSY_ACTIVE_FW)
+			dev_err(&func_to_func->hwif->pdev->dev, "Mbox response error(0x%x)\n",
+				mbox_for_resp->msg_info.status);
+		return err;
+	}
+
+	if (buf_out && out_size) {
+		if (*out_size < mbox_for_resp->mbox_len) {
+			dev_err(&func_to_func->hwif->pdev->dev,
+				"Invalid response mbox message length: %d for mod %d cmd %d, should less than: %d\n",
+				mbox_for_resp->mbox_len, mod, cmd, *out_size);
+			return -EFAULT;
+		}
+
+		if (mbox_for_resp->mbox_len)
+			memcpy(buf_out, mbox_for_resp->mbox,
+			       mbox_for_resp->mbox_len);
+
+		*out_size = mbox_for_resp->mbox_len;
+	}
+
+	return 0;
+}
+
+int hinic_mbox_to_func(struct hinic_mbox_func_to_func *func_to_func,
+		       enum hinic_mod_type mod, u16 cmd, u16 dst_func,
+		       void *buf_in, u16 in_size, void *buf_out,
+		       u16 *out_size, u32 timeout)
+{
+	struct hinic_recv_mbox *mbox_for_resp;
+	struct mbox_msg_info msg_info = {0};
+	ulong timeo;
+	int err;
+
+	mbox_for_resp = &func_to_func->mbox_resp[dst_func];
+
+	down(&func_to_func->mbox_send_sem);
+
+	init_completion(&mbox_for_resp->recv_done);
+
+	msg_info.msg_id = MBOX_MSG_ID_INC(func_to_func);
+
+	set_mbox_to_func_event(func_to_func, EVENT_START);
+
+	err = send_mbox_to_func(func_to_func, mod, cmd, buf_in, in_size,
+				dst_func, HINIC_HWIF_DIRECT_SEND, MBOX_ACK,
+				&msg_info);
+	if (err) {
+		dev_err(&func_to_func->hwif->pdev->dev, "Send mailbox failed, msg_id: %d\n",
+			msg_info.msg_id);
+		set_mbox_to_func_event(func_to_func, EVENT_FAIL);
+		goto err_send_mbox;
+	}
+
+	timeo = msecs_to_jiffies(timeout ? timeout : HINIC_MBOX_COMP_TIME);
+	if (!wait_for_completion_timeout(&mbox_for_resp->recv_done, timeo)) {
+		set_mbox_to_func_event(func_to_func, EVENT_TIMEOUT);
+		dev_err(&func_to_func->hwif->pdev->dev,
+			"Send mbox msg timeout, msg_id: %d\n", msg_info.msg_id);
+		err = -ETIMEDOUT;
+		goto err_send_mbox;
+	}
+
+	set_mbox_to_func_event(func_to_func, EVENT_END);
+
+	err = mbox_resp_info_handler(func_to_func, mbox_for_resp, mod, cmd,
+				     buf_out, out_size);
+
+err_send_mbox:
+	up(&func_to_func->mbox_send_sem);
+
+	return err;
+}
+
+static int mbox_func_params_valid(struct hinic_mbox_func_to_func *func_to_func,
+				  void *buf_in, u16 in_size)
+{
+	if (in_size > HINIC_MBOX_DATA_SIZE) {
+		dev_err(&func_to_func->hwif->pdev->dev,
+			"Mbox msg len(%d) exceed limit(%d)\n",
+			in_size, HINIC_MBOX_DATA_SIZE);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+int hinic_mbox_to_pf(struct hinic_hwdev *hwdev,
+		     enum hinic_mod_type mod, u8 cmd, void *buf_in,
+		     u16 in_size, void *buf_out, u16 *out_size, u32 timeout)
+{
+	struct hinic_mbox_func_to_func *func_to_func = hwdev->func_to_func;
+	int err = mbox_func_params_valid(func_to_func, buf_in, in_size);
+
+	if (err)
+		return err;
+
+	if (!HINIC_IS_VF(hwdev->hwif)) {
+		dev_err(&hwdev->hwif->pdev->dev, "Params error, func_type: %d\n",
+			HINIC_FUNC_TYPE(hwdev->hwif));
+		return -EINVAL;
+	}
+
+	return hinic_mbox_to_func(func_to_func, mod, cmd,
+				  hinic_pf_id_of_vf_hw(hwdev->hwif), buf_in,
+				  in_size, buf_out, out_size, timeout);
+}
+
+int hinic_mbox_to_vf(struct hinic_hwdev *hwdev,
+		     enum hinic_mod_type mod, u16 vf_id, u8 cmd, void *buf_in,
+		     u16 in_size, void *buf_out, u16 *out_size, u32 timeout)
+{
+	struct hinic_mbox_func_to_func *func_to_func;
+	u16 dst_func_idx;
+	int err;
+
+	if (!hwdev)
+		return -EINVAL;
+
+	func_to_func = hwdev->func_to_func;
+	err = mbox_func_params_valid(func_to_func, buf_in, in_size);
+	if (err)
+		return err;
+
+	if (HINIC_IS_VF(hwdev->hwif)) {
+		dev_err(&hwdev->hwif->pdev->dev, "Params error, func_type: %d\n",
+			HINIC_FUNC_TYPE(hwdev->hwif));
+		return -EINVAL;
+	}
+
+	if (!vf_id) {
+		dev_err(&hwdev->hwif->pdev->dev,
+			"VF id(%d) error!\n", vf_id);
+		return -EINVAL;
+	}
+
+	/* vf_offset_to_pf + vf_id is the vf's global function id of vf in
+	 * this pf
+	 */
+	dst_func_idx = hinic_glb_pf_vf_offset(hwdev->hwif) + vf_id;
+
+	return hinic_mbox_to_func(func_to_func, mod, cmd, dst_func_idx, buf_in,
+				  in_size, buf_out, out_size, timeout);
+}
+
+static int init_mbox_info(struct hinic_recv_mbox *mbox_info)
+{
+	int err;
+
+	mbox_info->seq_id = SEQ_ID_MAX_VAL;
+
+	mbox_info->mbox = kzalloc(MBOX_MAX_BUF_SZ, GFP_KERNEL);
+	if (!mbox_info->mbox)
+		return -ENOMEM;
+
+	mbox_info->buf_out = kzalloc(MBOX_MAX_BUF_SZ, GFP_KERNEL);
+	if (!mbox_info->buf_out) {
+		err = -ENOMEM;
+		goto err_alloc_buf_out;
+	}
+
+	atomic_set(&mbox_info->msg_cnt, 0);
+
+	return 0;
+
+err_alloc_buf_out:
+	kfree(mbox_info->mbox);
+
+	return err;
+}
+
+static void clean_mbox_info(struct hinic_recv_mbox *mbox_info)
+{
+	kfree(mbox_info->buf_out);
+	kfree(mbox_info->mbox);
+}
+
+static int alloc_mbox_info(struct hinic_hwdev *hwdev,
+			   struct hinic_recv_mbox *mbox_info)
+{
+	u16 func_idx, i;
+	int err;
+
+	for (func_idx = 0; func_idx < HINIC_MAX_FUNCTIONS; func_idx++) {
+		err = init_mbox_info(&mbox_info[func_idx]);
+		if (err) {
+			dev_err(&hwdev->hwif->pdev->dev, "Failed to init function %d mbox info\n",
+				func_idx);
+			goto err_init_mbox_info;
+		}
+	}
+
+	return 0;
+
+err_init_mbox_info:
+	for (i = 0; i < func_idx; i++)
+		clean_mbox_info(&mbox_info[i]);
+
+	return err;
+}
+
+static void free_mbox_info(struct hinic_recv_mbox *mbox_info)
+{
+	u16 func_idx;
+
+	for (func_idx = 0; func_idx < HINIC_MAX_FUNCTIONS; func_idx++)
+		clean_mbox_info(&mbox_info[func_idx]);
+}
+
+static void prepare_send_mbox(struct hinic_mbox_func_to_func *func_to_func)
+{
+	struct hinic_send_mbox *send_mbox = &func_to_func->send_mbox;
+
+	send_mbox->data = MBOX_AREA(func_to_func->hwif);
+}
+
+static int alloc_mbox_wb_status(struct hinic_mbox_func_to_func *func_to_func)
+{
+	struct hinic_send_mbox *send_mbox = &func_to_func->send_mbox;
+	struct hinic_hwdev *hwdev = func_to_func->hwdev;
+	u32 addr_h, addr_l;
+
+	send_mbox->wb_vaddr = dma_alloc_coherent(&hwdev->hwif->pdev->dev,
+						 MBOX_WB_STATUS_LEN,
+						 &send_mbox->wb_paddr,
+						 GFP_KERNEL);
+	if (!send_mbox->wb_vaddr)
+		return -ENOMEM;
+
+	send_mbox->wb_status = send_mbox->wb_vaddr;
+
+	addr_h = upper_32_bits(send_mbox->wb_paddr);
+	addr_l = lower_32_bits(send_mbox->wb_paddr);
+
+	hinic_hwif_write_reg(hwdev->hwif, HINIC_FUNC_CSR_MAILBOX_RESULT_H_OFF,
+			     addr_h);
+	hinic_hwif_write_reg(hwdev->hwif, HINIC_FUNC_CSR_MAILBOX_RESULT_L_OFF,
+			     addr_l);
+
+	return 0;
+}
+
+static void free_mbox_wb_status(struct hinic_mbox_func_to_func *func_to_func)
+{
+	struct hinic_send_mbox *send_mbox = &func_to_func->send_mbox;
+	struct hinic_hwdev *hwdev = func_to_func->hwdev;
+
+	hinic_hwif_write_reg(hwdev->hwif, HINIC_FUNC_CSR_MAILBOX_RESULT_H_OFF,
+			     0);
+	hinic_hwif_write_reg(hwdev->hwif, HINIC_FUNC_CSR_MAILBOX_RESULT_L_OFF,
+			     0);
+
+	dma_free_coherent(&hwdev->hwif->pdev->dev, MBOX_WB_STATUS_LEN,
+			  send_mbox->wb_vaddr,
+			  send_mbox->wb_paddr);
+}
+
+static int comm_pf_mbox_handler(void *handle, u16 vf_id, u8 cmd, void *buf_in,
+				u16 in_size, void *buf_out, u16 *out_size)
+{
+	struct hinic_hwdev *hwdev = handle;
+	struct hinic_pfhwdev *pfhwdev;
+	int err = 0;
+
+	pfhwdev = container_of(hwdev, struct hinic_pfhwdev, hwdev);
+
+	if (cmd == HINIC_COMM_CMD_START_FLR) {
+		*out_size = 0;
+	} else {
+		err = hinic_msg_to_mgmt(&pfhwdev->pf_to_mgmt, HINIC_MOD_COMM,
+					cmd, buf_in, in_size, buf_out, out_size,
+					HINIC_MGMT_MSG_SYNC);
+		if (err && err != HINIC_MBOX_PF_BUSY_ACTIVE_FW)
+			dev_err(&hwdev->hwif->pdev->dev,
+				"PF mbox common callback handler err: %d\n",
+				err);
+	}
+
+	return err;
+}
+
+int hinic_func_to_func_init(struct hinic_hwdev *hwdev)
+{
+	struct hinic_mbox_func_to_func *func_to_func;
+	struct hinic_pfhwdev *pfhwdev;
+	int err;
+
+	pfhwdev =  container_of(hwdev, struct hinic_pfhwdev, hwdev);
+	func_to_func = kzalloc(sizeof(*func_to_func), GFP_KERNEL);
+	if (!func_to_func)
+		return -ENOMEM;
+
+	hwdev->func_to_func = func_to_func;
+	func_to_func->hwdev = hwdev;
+	func_to_func->hwif = hwdev->hwif;
+	sema_init(&func_to_func->mbox_send_sem, 1);
+	sema_init(&func_to_func->msg_send_sem, 1);
+	spin_lock_init(&func_to_func->mbox_lock);
+	func_to_func->workq = create_singlethread_workqueue(HINIC_MBOX_WQ_NAME);
+	if (!func_to_func->workq) {
+		dev_err(&hwdev->hwif->pdev->dev, "Failed to initialize MBOX workqueue\n");
+		err = -ENOMEM;
+		goto err_create_mbox_workq;
+	}
+
+	err = alloc_mbox_info(hwdev, func_to_func->mbox_send);
+	if (err) {
+		dev_err(&hwdev->hwif->pdev->dev, "Failed to alloc mem for mbox_active\n");
+		goto err_alloc_mbox_for_send;
+	}
+
+	err = alloc_mbox_info(hwdev, func_to_func->mbox_resp);
+	if (err) {
+		dev_err(&hwdev->hwif->pdev->dev, "Failed to alloc mem for mbox_passive\n");
+		goto err_alloc_mbox_for_resp;
+	}
+
+	err = alloc_mbox_wb_status(func_to_func);
+	if (err) {
+		dev_err(&hwdev->hwif->pdev->dev, "Failed to alloc mbox write back status\n");
+		goto err_alloc_wb_status;
+	}
+
+	prepare_send_mbox(func_to_func);
+
+	hinic_aeq_register_hw_cb(&hwdev->aeqs, HINIC_MBX_FROM_FUNC,
+				 &pfhwdev->hwdev, hinic_mbox_func_aeqe_handler);
+	hinic_aeq_register_hw_cb(&hwdev->aeqs, HINIC_MBX_SEND_RSLT,
+				 &pfhwdev->hwdev, hinic_mbox_self_aeqe_handler);
+
+	if (!HINIC_IS_VF(hwdev->hwif))
+		hinic_register_pf_mbox_cb(hwdev, HINIC_MOD_COMM,
+					  comm_pf_mbox_handler);
+
+	return 0;
+
+err_alloc_wb_status:
+	free_mbox_info(func_to_func->mbox_resp);
+
+err_alloc_mbox_for_resp:
+	free_mbox_info(func_to_func->mbox_send);
+
+err_alloc_mbox_for_send:
+	destroy_workqueue(func_to_func->workq);
+
+err_create_mbox_workq:
+	kfree(func_to_func);
+
+	return err;
+}
+
+void hinic_func_to_func_free(struct hinic_hwdev *hwdev)
+{
+	struct hinic_mbox_func_to_func *func_to_func = hwdev->func_to_func;
+
+	hinic_aeq_unregister_hw_cb(&hwdev->aeqs, HINIC_MBX_FROM_FUNC);
+	hinic_aeq_unregister_hw_cb(&hwdev->aeqs, HINIC_MBX_SEND_RSLT);
+
+	hinic_unregister_pf_mbox_cb(hwdev, HINIC_MOD_COMM);
+	/* destroy workqueue before free related mbox resources in case of
+	 * illegal resource access
+	 */
+	destroy_workqueue(func_to_func->workq);
+
+	free_mbox_wb_status(func_to_func);
+	free_mbox_info(func_to_func->mbox_resp);
+	free_mbox_info(func_to_func->mbox_send);
+
+	kfree(func_to_func);
+}
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_mbox.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_mbox.h
new file mode 100644
index 000000000000..7b18559bfe80
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_mbox.h
@@ -0,0 +1,154 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Huawei HiNIC PCI Express Linux driver
+ * Copyright(c) 2017 Huawei Technologies Co., Ltd
+ */
+
+#ifndef HINIC_MBOX_H_
+#define HINIC_MBOX_H_
+
+#define HINIC_MBOX_PF_SEND_ERR		0x1
+#define HINIC_MBOX_PF_BUSY_ACTIVE_FW	0x2
+#define HINIC_MBOX_VF_CMD_ERROR		0x3
+
+#define HINIC_MAX_FUNCTIONS		512
+
+#define HINIC_MAX_PF_FUNCS		16
+
+#define HINIC_MBOX_WQ_NAME		"hinic_mbox"
+
+#define HINIC_FUNC_CSR_MAILBOX_DATA_OFF			0x80
+#define HINIC_FUNC_CSR_MAILBOX_CONTROL_OFF		0x0100
+#define HINIC_FUNC_CSR_MAILBOX_INT_OFFSET_OFF		0x0104
+#define HINIC_FUNC_CSR_MAILBOX_RESULT_H_OFF		0x0108
+#define HINIC_FUNC_CSR_MAILBOX_RESULT_L_OFF		0x010C
+
+enum hinic_mbox_ack_type {
+	MBOX_ACK,
+	MBOX_NO_ACK,
+};
+
+struct mbox_msg_info {
+	u8 msg_id;
+	u8 status;
+};
+
+struct hinic_recv_mbox {
+	struct completion	recv_done;
+	void			*mbox;
+	u8			cmd;
+	enum hinic_mod_type	mod;
+	u16			mbox_len;
+	void			*buf_out;
+	enum hinic_mbox_ack_type ack_type;
+	struct mbox_msg_info	msg_info;
+	u8			seq_id;
+	atomic_t		msg_cnt;
+};
+
+struct hinic_send_mbox {
+	struct completion	send_done;
+	u8			*data;
+
+	u64			*wb_status;
+	void			*wb_vaddr;
+	dma_addr_t		wb_paddr;
+};
+
+typedef void (*hinic_vf_mbox_cb)(void *handle, u8 cmd, void *buf_in,
+				u16 in_size, void *buf_out, u16 *out_size);
+typedef int (*hinic_pf_mbox_cb)(void *handle, u16 vf_id, u8 cmd, void *buf_in,
+				u16 in_size, void *buf_out, u16 *out_size);
+
+enum mbox_event_state {
+	EVENT_START = 0,
+	EVENT_FAIL,
+	EVENT_TIMEOUT,
+	EVENT_END,
+};
+
+enum hinic_mbox_cb_state {
+	HINIC_VF_MBOX_CB_REG = 0,
+	HINIC_VF_MBOX_CB_RUNNING,
+	HINIC_PF_MBOX_CB_REG,
+	HINIC_PF_MBOX_CB_RUNNING,
+	HINIC_PPF_MBOX_CB_REG,
+	HINIC_PPF_MBOX_CB_RUNNING,
+	HINIC_PPF_TO_PF_MBOX_CB_REG,
+	HINIC_PPF_TO_PF_MBOX_CB_RUNNIG,
+};
+
+struct hinic_mbox_func_to_func {
+	struct hinic_hwdev	*hwdev;
+	struct hinic_hwif		*hwif;
+
+	struct semaphore	mbox_send_sem;
+	struct semaphore	msg_send_sem;
+	struct hinic_send_mbox	send_mbox;
+
+	struct workqueue_struct *workq;
+
+	struct hinic_recv_mbox	mbox_resp[HINIC_MAX_FUNCTIONS];
+	struct hinic_recv_mbox	mbox_send[HINIC_MAX_FUNCTIONS];
+
+	hinic_vf_mbox_cb	vf_mbox_cb[HINIC_MOD_MAX];
+	hinic_pf_mbox_cb	pf_mbox_cb[HINIC_MOD_MAX];
+	unsigned long		pf_mbox_cb_state[HINIC_MOD_MAX];
+	unsigned long		vf_mbox_cb_state[HINIC_MOD_MAX];
+
+	u8 send_msg_id;
+	enum mbox_event_state event_flag;
+
+	/* lock for mbox event flag */
+	spinlock_t mbox_lock;
+};
+
+struct hinic_mbox_work {
+	struct work_struct work;
+	u16 src_func_idx;
+	struct hinic_mbox_func_to_func *func_to_func;
+	struct hinic_recv_mbox *recv_mbox;
+};
+
+struct vf_cmd_msg_handle {
+	u8 cmd;
+	int (*cmd_msg_handler)(void *hwdev, u16 vf_id,
+			       void *buf_in, u16 in_size,
+			       void *buf_out, u16 *out_size);
+};
+
+int hinic_register_pf_mbox_cb(struct hinic_hwdev *hwdev,
+			      enum hinic_mod_type mod,
+			      hinic_pf_mbox_cb callback);
+
+int hinic_register_vf_mbox_cb(struct hinic_hwdev *hwdev,
+			      enum hinic_mod_type mod,
+			      hinic_vf_mbox_cb callback);
+
+void hinic_unregister_pf_mbox_cb(struct hinic_hwdev *hwdev,
+				 enum hinic_mod_type mod);
+
+void hinic_unregister_vf_mbox_cb(struct hinic_hwdev *hwdev,
+				 enum hinic_mod_type mod);
+
+void hinic_mbox_func_aeqe_handler(void *handle, void *header, u8 size);
+
+void hinic_mbox_self_aeqe_handler(void *handle, void *header, u8 size);
+
+int hinic_func_to_func_init(struct hinic_hwdev *hwdev);
+
+void hinic_func_to_func_free(struct hinic_hwdev *hwdev);
+
+int hinic_mbox_to_pf(struct hinic_hwdev *hwdev, enum hinic_mod_type mod,
+		     u8 cmd, void *buf_in, u16 in_size, void *buf_out,
+		     u16 *out_size, u32 timeout);
+
+int hinic_mbox_to_func(struct hinic_mbox_func_to_func *func_to_func,
+		       enum hinic_mod_type mod, u16 cmd, u16 dst_func,
+		       void *buf_in, u16 in_size, void *buf_out,
+		       u16 *out_size, u32 timeout);
+
+int hinic_mbox_to_vf(struct hinic_hwdev *hwdev,
+		     enum hinic_mod_type mod, u16 vf_id, u8 cmd, void *buf_in,
+		     u16 in_size, void *buf_out, u16 *out_size, u32 timeout);
+
+#endif
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.h
index 182fba17b643..a5ab044f98cc 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.h
@@ -60,6 +60,7 @@ enum hinic_cfg_cmd {
 };
 
 enum hinic_comm_cmd {
+	HINIC_COMM_CMD_START_FLR          = 0x1,
 	HINIC_COMM_CMD_IO_STATUS_GET    = 0x3,
 
 	HINIC_COMM_CMD_CMDQ_CTXT_SET    = 0x10,
-- 
cgit v1.2.3-59-g8ed1b


From 7dd29ee128654702bd493ecec0bb22c2c5f0f395 Mon Sep 17 00:00:00 2001
From: Luo bin <luobin9@huawei.com>
Date: Sat, 25 Apr 2020 01:21:10 +0000
Subject: hinic: add sriov feature support

adds support of basic sriov feature including initialization and
tx/rx capabilities of virtual function

Signed-off-by: Luo bin <luobin9@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/huawei/hinic/Makefile        |   2 +-
 drivers/net/ethernet/huawei/hinic/hinic_dev.h     |   3 +
 drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c |  18 +-
 drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.h |   2 +-
 drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c  | 123 ++--
 drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h  |  46 ++
 drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c  |  98 ++-
 drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.h  |   4 +-
 drivers/net/ethernet/huawei/hinic/hinic_hw_io.c   |  49 ++
 drivers/net/ethernet/huawei/hinic/hinic_hw_io.h   |  23 +-
 drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c |  17 +-
 drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.h |  11 +-
 drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c   |   7 +-
 drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h   |   4 +-
 drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c   |   9 +-
 drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h   |   6 +-
 drivers/net/ethernet/huawei/hinic/hinic_main.c    |  77 ++-
 drivers/net/ethernet/huawei/hinic/hinic_port.c    |  76 +--
 drivers/net/ethernet/huawei/hinic/hinic_port.h    |   4 +-
 drivers/net/ethernet/huawei/hinic/hinic_rx.c      |  15 +-
 drivers/net/ethernet/huawei/hinic/hinic_sriov.c   | 698 ++++++++++++++++++++++
 drivers/net/ethernet/huawei/hinic/hinic_sriov.h   |  79 +++
 drivers/net/ethernet/huawei/hinic/hinic_tx.c      |  17 +-
 23 files changed, 1195 insertions(+), 193 deletions(-)
 create mode 100644 drivers/net/ethernet/huawei/hinic/hinic_sriov.c
 create mode 100644 drivers/net/ethernet/huawei/hinic/hinic_sriov.h

diff --git a/drivers/net/ethernet/huawei/hinic/Makefile b/drivers/net/ethernet/huawei/hinic/Makefile
index a73862a64690..32a011ca44c3 100644
--- a/drivers/net/ethernet/huawei/hinic/Makefile
+++ b/drivers/net/ethernet/huawei/hinic/Makefile
@@ -4,4 +4,4 @@ obj-$(CONFIG_HINIC) += hinic.o
 hinic-y := hinic_main.o hinic_tx.o hinic_rx.o hinic_port.o hinic_hw_dev.o \
 	   hinic_hw_io.o hinic_hw_qp.o hinic_hw_cmdq.o hinic_hw_wq.o \
 	   hinic_hw_mgmt.o hinic_hw_api_cmd.o hinic_hw_eqs.o hinic_hw_if.o \
-	   hinic_common.o hinic_ethtool.o hinic_hw_mbox.o
+	   hinic_common.o hinic_ethtool.o hinic_hw_mbox.o hinic_sriov.o
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_dev.h b/drivers/net/ethernet/huawei/hinic/hinic_dev.h
index a209b14160cc..a621ebbf7610 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_dev.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_dev.h
@@ -16,6 +16,7 @@
 #include "hinic_hw_dev.h"
 #include "hinic_tx.h"
 #include "hinic_rx.h"
+#include "hinic_sriov.h"
 
 #define HINIC_DRV_NAME          "hinic"
 
@@ -23,6 +24,7 @@ enum hinic_flags {
 	HINIC_LINK_UP = BIT(0),
 	HINIC_INTF_UP = BIT(1),
 	HINIC_RSS_ENABLE = BIT(2),
+	HINIC_LINK_DOWN = BIT(3),
 };
 
 struct hinic_rx_mode_work {
@@ -78,6 +80,7 @@ struct hinic_dev {
 	struct hinic_rss_type		rss_type;
 	u8				*rss_hkey_user;
 	s32				*rss_indir_user;
+	struct hinic_sriov_info sriov_info;
 };
 
 #endif
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c
index 5f2d57d1b2d3..33c5333657c1 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c
@@ -64,7 +64,7 @@
 #define CMDQ_WQE_SIZE                   64
 #define CMDQ_DEPTH                      SZ_4K
 
-#define CMDQ_WQ_PAGE_SIZE               SZ_4K
+#define CMDQ_WQ_PAGE_SIZE               SZ_256K
 
 #define WQE_LCMD_SIZE                   64
 #define WQE_SCMD_SIZE                   64
@@ -705,7 +705,7 @@ static void cmdq_init_queue_ctxt(struct hinic_cmdq_ctxt *cmdq_ctxt,
 	/* The data in the HW is in Big Endian Format */
 	wq_first_page_paddr = be64_to_cpu(*wq->block_vaddr);
 
-	pfn = CMDQ_PFN(wq_first_page_paddr, wq->wq_page_size);
+	pfn = CMDQ_PFN(wq_first_page_paddr, SZ_4K);
 
 	ctxt_info->curr_wqe_page_pfn =
 		HINIC_CMDQ_CTXT_PAGE_INFO_SET(pfn, CURR_WQE_PAGE_PFN)   |
@@ -714,16 +714,19 @@ static void cmdq_init_queue_ctxt(struct hinic_cmdq_ctxt *cmdq_ctxt,
 		HINIC_CMDQ_CTXT_PAGE_INFO_SET(1, CEQ_EN)                |
 		HINIC_CMDQ_CTXT_PAGE_INFO_SET(cmdq->wrapped, WRAPPED);
 
-	/* block PFN - Read Modify Write */
-	cmdq_first_block_paddr = cmdq_pages->page_paddr;
+	if (wq->num_q_pages != 1) {
+		/* block PFN - Read Modify Write */
+		cmdq_first_block_paddr = cmdq_pages->page_paddr;
 
-	pfn = CMDQ_PFN(cmdq_first_block_paddr, wq->wq_page_size);
+		pfn = CMDQ_PFN(cmdq_first_block_paddr, wq->wq_page_size);
+	}
 
 	ctxt_info->wq_block_pfn =
 		HINIC_CMDQ_CTXT_BLOCK_INFO_SET(pfn, WQ_BLOCK_PFN) |
 		HINIC_CMDQ_CTXT_BLOCK_INFO_SET(atomic_read(&wq->cons_idx), CI);
 
 	cmdq_ctxt->func_idx = HINIC_HWIF_FUNC_IDX(cmdqs->hwif);
+	cmdq_ctxt->ppf_idx = HINIC_HWIF_PPF_IDX(cmdqs->hwif);
 	cmdq_ctxt->cmdq_type  = cmdq->cmdq_type;
 }
 
@@ -795,11 +798,6 @@ static int init_cmdqs_ctxt(struct hinic_hwdev *hwdev,
 	size_t cmdq_ctxts_size;
 	int err;
 
-	if (!HINIC_IS_PF(hwif) && !HINIC_IS_PPF(hwif)) {
-		dev_err(&pdev->dev, "Unsupported PCI function type\n");
-		return -EINVAL;
-	}
-
 	cmdq_ctxts_size = HINIC_MAX_CMDQ_TYPES * sizeof(*cmdq_ctxts);
 	cmdq_ctxts = devm_kzalloc(&pdev->dev, cmdq_ctxts_size, GFP_KERNEL);
 	if (!cmdq_ctxts)
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.h
index 7a434b653faa..3e4b0aef9fe6 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.h
@@ -122,7 +122,7 @@ struct hinic_cmdq_ctxt {
 
 	u16     func_idx;
 	u8      cmdq_type;
-	u8      rsvd1[1];
+	u8      ppf_idx;
 
 	u8      rsvd2[4];
 
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
index f2cf6f7ffc34..e5cab58e4ddd 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
@@ -15,7 +15,9 @@
 #include <linux/jiffies.h>
 #include <linux/log2.h>
 #include <linux/err.h>
+#include <linux/netdevice.h>
 
+#include "hinic_sriov.h"
 #include "hinic_hw_if.h"
 #include "hinic_hw_eqs.h"
 #include "hinic_hw_mgmt.h"
@@ -46,20 +48,6 @@ enum hw_ioctxt_set_cmdq_depth {
 	HW_IOCTXT_SET_CMDQ_DEPTH_DEFAULT,
 };
 
-/* HW struct */
-struct hinic_dev_cap {
-	u8      status;
-	u8      version;
-	u8      rsvd0[6];
-
-	u8      rsvd1[5];
-	u8      intr_type;
-	u8      rsvd2[66];
-	u16     max_sqs;
-	u16     max_rqs;
-	u8      rsvd3[208];
-};
-
 /**
  * get_capability - convert device capabilities to NIC capabilities
  * @hwdev: the HW device to set and convert device capabilities for
@@ -67,16 +55,13 @@ struct hinic_dev_cap {
  *
  * Return 0 - Success, negative - Failure
  **/
-static int get_capability(struct hinic_hwdev *hwdev,
-			  struct hinic_dev_cap *dev_cap)
+static int parse_capability(struct hinic_hwdev *hwdev,
+			    struct hinic_dev_cap *dev_cap)
 {
 	struct hinic_cap *nic_cap = &hwdev->nic_cap;
 	int num_aeqs, num_ceqs, num_irqs;
 
-	if (!HINIC_IS_PF(hwdev->hwif) && !HINIC_IS_PPF(hwdev->hwif))
-		return -EINVAL;
-
-	if (dev_cap->intr_type != INTR_MSIX_TYPE)
+	if (!HINIC_IS_VF(hwdev->hwif) && dev_cap->intr_type != INTR_MSIX_TYPE)
 		return -EFAULT;
 
 	num_aeqs = HINIC_HWIF_NUM_AEQS(hwdev->hwif);
@@ -89,13 +74,19 @@ static int get_capability(struct hinic_hwdev *hwdev,
 	if (nic_cap->num_qps > HINIC_Q_CTXT_MAX)
 		nic_cap->num_qps = HINIC_Q_CTXT_MAX;
 
-	nic_cap->max_qps = dev_cap->max_sqs + 1;
-	if (nic_cap->max_qps != (dev_cap->max_rqs + 1))
-		return -EFAULT;
+	if (!HINIC_IS_VF(hwdev->hwif))
+		nic_cap->max_qps = dev_cap->max_sqs + 1;
+	else
+		nic_cap->max_qps = dev_cap->max_sqs;
 
 	if (nic_cap->num_qps > nic_cap->max_qps)
 		nic_cap->num_qps = nic_cap->max_qps;
 
+	if (!HINIC_IS_VF(hwdev->hwif)) {
+		nic_cap->max_vf = dev_cap->max_vf;
+		nic_cap->max_vf_qps = dev_cap->max_vf_sqs + 1;
+	}
+
 	return 0;
 }
 
@@ -105,27 +96,26 @@ static int get_capability(struct hinic_hwdev *hwdev,
  *
  * Return 0 - Success, negative - Failure
  **/
-static int get_cap_from_fw(struct hinic_pfhwdev *pfhwdev)
+static int get_capability(struct hinic_pfhwdev *pfhwdev)
 {
 	struct hinic_hwdev *hwdev = &pfhwdev->hwdev;
 	struct hinic_hwif *hwif = hwdev->hwif;
 	struct pci_dev *pdev = hwif->pdev;
 	struct hinic_dev_cap dev_cap;
-	u16 in_len, out_len;
+	u16 out_len;
 	int err;
 
-	in_len = 0;
 	out_len = sizeof(dev_cap);
 
 	err = hinic_msg_to_mgmt(&pfhwdev->pf_to_mgmt, HINIC_MOD_CFGM,
-				HINIC_CFG_NIC_CAP, &dev_cap, in_len, &dev_cap,
-				&out_len, HINIC_MGMT_MSG_SYNC);
+				HINIC_CFG_NIC_CAP, &dev_cap, sizeof(dev_cap),
+				&dev_cap, &out_len, HINIC_MGMT_MSG_SYNC);
 	if (err) {
 		dev_err(&pdev->dev, "Failed to get capability from FW\n");
 		return err;
 	}
 
-	return get_capability(hwdev, &dev_cap);
+	return parse_capability(hwdev, &dev_cap);
 }
 
 /**
@@ -144,15 +134,14 @@ static int get_dev_cap(struct hinic_hwdev *hwdev)
 	switch (HINIC_FUNC_TYPE(hwif)) {
 	case HINIC_PPF:
 	case HINIC_PF:
+	case HINIC_VF:
 		pfhwdev = container_of(hwdev, struct hinic_pfhwdev, hwdev);
-
-		err = get_cap_from_fw(pfhwdev);
+		err = get_capability(pfhwdev);
 		if (err) {
-			dev_err(&pdev->dev, "Failed to get capability from FW\n");
+			dev_err(&pdev->dev, "Failed to get capability\n");
 			return err;
 		}
 		break;
-
 	default:
 		dev_err(&pdev->dev, "Unsupported PCI Function type\n");
 		return -EINVAL;
@@ -225,15 +214,8 @@ static void disable_msix(struct hinic_hwdev *hwdev)
 int hinic_port_msg_cmd(struct hinic_hwdev *hwdev, enum hinic_port_cmd cmd,
 		       void *buf_in, u16 in_size, void *buf_out, u16 *out_size)
 {
-	struct hinic_hwif *hwif = hwdev->hwif;
-	struct pci_dev *pdev = hwif->pdev;
 	struct hinic_pfhwdev *pfhwdev;
 
-	if (!HINIC_IS_PF(hwif) && !HINIC_IS_PPF(hwif)) {
-		dev_err(&pdev->dev, "unsupported PCI Function type\n");
-		return -EINVAL;
-	}
-
 	pfhwdev = container_of(hwdev, struct hinic_pfhwdev, hwdev);
 
 	return hinic_msg_to_mgmt(&pfhwdev->pf_to_mgmt, HINIC_MOD_L2NIC, cmd,
@@ -252,14 +234,9 @@ static int init_fw_ctxt(struct hinic_hwdev *hwdev)
 	struct hinic_hwif *hwif = hwdev->hwif;
 	struct pci_dev *pdev = hwif->pdev;
 	struct hinic_cmd_fw_ctxt fw_ctxt;
-	u16 out_size;
+	u16 out_size = sizeof(fw_ctxt);
 	int err;
 
-	if (!HINIC_IS_PF(hwif) && !HINIC_IS_PPF(hwif)) {
-		dev_err(&pdev->dev, "Unsupported PCI Function type\n");
-		return -EINVAL;
-	}
-
 	fw_ctxt.func_idx = HINIC_HWIF_FUNC_IDX(hwif);
 	fw_ctxt.rx_buf_sz = HINIC_RX_BUF_SZ;
 
@@ -288,14 +265,8 @@ static int set_hw_ioctxt(struct hinic_hwdev *hwdev, unsigned int rq_depth,
 {
 	struct hinic_hwif *hwif = hwdev->hwif;
 	struct hinic_cmd_hw_ioctxt hw_ioctxt;
-	struct pci_dev *pdev = hwif->pdev;
 	struct hinic_pfhwdev *pfhwdev;
 
-	if (!HINIC_IS_PF(hwif) && !HINIC_IS_PPF(hwif)) {
-		dev_err(&pdev->dev, "Unsupported PCI Function type\n");
-		return -EINVAL;
-	}
-
 	hw_ioctxt.func_idx = HINIC_HWIF_FUNC_IDX(hwif);
 	hw_ioctxt.ppf_idx = HINIC_HWIF_PPF_IDX(hwif);
 
@@ -374,11 +345,6 @@ static int clear_io_resources(struct hinic_hwdev *hwdev)
 	struct hinic_pfhwdev *pfhwdev;
 	int err;
 
-	if (!HINIC_IS_PF(hwif) && !HINIC_IS_PPF(hwif)) {
-		dev_err(&pdev->dev, "Unsupported PCI Function type\n");
-		return -EINVAL;
-	}
-
 	/* sleep 100ms to wait for firmware stopping I/O */
 	msleep(100);
 
@@ -410,14 +376,8 @@ static int set_resources_state(struct hinic_hwdev *hwdev,
 {
 	struct hinic_cmd_set_res_state res_state;
 	struct hinic_hwif *hwif = hwdev->hwif;
-	struct pci_dev *pdev = hwif->pdev;
 	struct hinic_pfhwdev *pfhwdev;
 
-	if (!HINIC_IS_PF(hwif) && !HINIC_IS_PPF(hwif)) {
-		dev_err(&pdev->dev, "Unsupported PCI Function type\n");
-		return -EINVAL;
-	}
-
 	res_state.func_idx = HINIC_HWIF_FUNC_IDX(hwif);
 	res_state.state = state;
 
@@ -441,8 +401,8 @@ static int get_base_qpn(struct hinic_hwdev *hwdev, u16 *base_qpn)
 {
 	struct hinic_cmd_base_qpn cmd_base_qpn;
 	struct hinic_hwif *hwif = hwdev->hwif;
+	u16 out_size = sizeof(cmd_base_qpn);
 	struct pci_dev *pdev = hwif->pdev;
-	u16 out_size;
 	int err;
 
 	cmd_base_qpn.func_idx = HINIC_HWIF_FUNC_IDX(hwif);
@@ -488,7 +448,7 @@ int hinic_hwdev_ifup(struct hinic_hwdev *hwdev)
 	num_ceqs = HINIC_HWIF_NUM_CEQS(hwif);
 
 	ceq_msix_entries = &hwdev->msix_entries[num_aeqs];
-
+	func_to_io->hwdev = hwdev;
 	err = hinic_io_init(func_to_io, hwif, nic_cap->max_qps, num_ceqs,
 			    ceq_msix_entries);
 	if (err) {
@@ -558,17 +518,10 @@ void hinic_hwdev_cb_register(struct hinic_hwdev *hwdev,
 					     u16 in_size, void *buf_out,
 					     u16 *out_size))
 {
-	struct hinic_hwif *hwif = hwdev->hwif;
-	struct pci_dev *pdev = hwif->pdev;
 	struct hinic_pfhwdev *pfhwdev;
 	struct hinic_nic_cb *nic_cb;
 	u8 cmd_cb;
 
-	if (!HINIC_IS_PF(hwif) && !HINIC_IS_PPF(hwif)) {
-		dev_err(&pdev->dev, "unsupported PCI Function type\n");
-		return;
-	}
-
 	pfhwdev = container_of(hwdev, struct hinic_pfhwdev, hwdev);
 
 	cmd_cb = cmd - HINIC_MGMT_MSG_CMD_BASE;
@@ -588,15 +541,12 @@ void hinic_hwdev_cb_unregister(struct hinic_hwdev *hwdev,
 			       enum hinic_mgmt_msg_cmd cmd)
 {
 	struct hinic_hwif *hwif = hwdev->hwif;
-	struct pci_dev *pdev = hwif->pdev;
 	struct hinic_pfhwdev *pfhwdev;
 	struct hinic_nic_cb *nic_cb;
 	u8 cmd_cb;
 
-	if (!HINIC_IS_PF(hwif) && !HINIC_IS_PPF(hwif)) {
-		dev_err(&pdev->dev, "unsupported PCI Function type\n");
+	if (!HINIC_IS_PF(hwif) && !HINIC_IS_PPF(hwif))
 		return;
-	}
 
 	pfhwdev = container_of(hwdev, struct hinic_pfhwdev, hwdev);
 
@@ -742,12 +692,6 @@ struct hinic_hwdev *hinic_init_hwdev(struct pci_dev *pdev)
 		return ERR_PTR(err);
 	}
 
-	if (!HINIC_IS_PF(hwif) && !HINIC_IS_PPF(hwif)) {
-		dev_err(&pdev->dev, "Unsupported PCI Function type\n");
-		err = -EFAULT;
-		goto err_func_type;
-	}
-
 	pfhwdev = devm_kzalloc(&pdev->dev, sizeof(*pfhwdev), GFP_KERNEL);
 	if (!pfhwdev) {
 		err = -ENOMEM;
@@ -791,6 +735,12 @@ struct hinic_hwdev *hinic_init_hwdev(struct pci_dev *pdev)
 		goto err_dev_cap;
 	}
 
+	err = hinic_vf_func_init(hwdev);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to init nic mbox\n");
+		goto err_vf_func_init;
+	}
+
 	err = init_fw_ctxt(hwdev);
 	if (err) {
 		dev_err(&pdev->dev, "Failed to init function table\n");
@@ -807,6 +757,8 @@ struct hinic_hwdev *hinic_init_hwdev(struct pci_dev *pdev)
 
 err_resources_state:
 err_init_fw_ctxt:
+	hinic_vf_func_free(hwdev);
+err_vf_func_init:
 err_dev_cap:
 	free_pfhwdev(pfhwdev);
 
@@ -818,7 +770,6 @@ err_aeqs_init:
 
 err_init_msix:
 err_pfhwdev_alloc:
-err_func_type:
 	hinic_free_hwif(hwif);
 	return ERR_PTR(err);
 }
@@ -949,15 +900,9 @@ int hinic_hwdev_hw_ci_addr_set(struct hinic_hwdev *hwdev, struct hinic_sq *sq,
 {
 	struct hinic_qp *qp = container_of(sq, struct hinic_qp, sq);
 	struct hinic_hwif *hwif = hwdev->hwif;
-	struct pci_dev *pdev = hwif->pdev;
 	struct hinic_pfhwdev *pfhwdev;
 	struct hinic_cmd_hw_ci hw_ci;
 
-	if (!HINIC_IS_PF(hwif) && !HINIC_IS_PPF(hwif)) {
-		dev_err(&pdev->dev, "Unsupported PCI Function type\n");
-		return -EINVAL;
-	}
-
 	hw_ci.dma_attr_off  = 0;
 	hw_ci.pending_limit = pending_limit;
 	hw_ci.coalesc_timer = coalesc_timer;
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
index 2574086aa314..531d1072e0df 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
@@ -23,12 +23,20 @@
 #define HINIC_MGMT_NUM_MSG_CMD  (HINIC_MGMT_MSG_CMD_MAX - \
 				 HINIC_MGMT_MSG_CMD_BASE)
 
+#define HINIC_PF_SET_VF_ALREADY				0x4
+#define HINIC_MGMT_STATUS_EXIST				0x6
+
 struct hinic_cap {
 	u16     max_qps;
 	u16     num_qps;
+	u8		max_vf;
+	u16     max_vf_qps;
 };
 
 enum hinic_port_cmd {
+	HINIC_PORT_CMD_VF_REGISTER = 0x0,
+	HINIC_PORT_CMD_VF_UNREGISTER = 0x1,
+
 	HINIC_PORT_CMD_CHANGE_MTU       = 2,
 
 	HINIC_PORT_CMD_ADD_VLAN         = 3,
@@ -84,10 +92,18 @@ enum hinic_port_cmd {
 
 	HINIC_PORT_CMD_GET_GLOBAL_QPN   = 102,
 
+	HINIC_PORT_CMD_SET_VF_VLAN	= 106,
+
+	HINIC_PORT_CMD_CLR_VF_VLAN,
+
 	HINIC_PORT_CMD_SET_TSO          = 112,
 
 	HINIC_PORT_CMD_SET_RQ_IQ_MAP	= 115,
 
+	HINIC_PORT_CMD_LINK_STATUS_REPORT = 160,
+
+	HINIC_PORT_CMD_UPDATE_MAC = 164,
+
 	HINIC_PORT_CMD_GET_CAP          = 170,
 
 	HINIC_PORT_CMD_SET_LRO_TIMER	= 244,
@@ -192,6 +208,17 @@ struct hinic_cmd_set_res_state {
 	u32     rsvd2;
 };
 
+struct hinic_ceq_ctrl_reg {
+	u8 status;
+	u8 version;
+	u8 rsvd0[6];
+
+	u16 func_id;
+	u16 q_id;
+	u32 ctrl0;
+	u32 ctrl1;
+};
+
 struct hinic_cmd_base_qpn {
 	u8      status;
 	u8      version;
@@ -248,6 +275,25 @@ struct hinic_pfhwdev {
 	struct hinic_nic_cb             nic_cb[HINIC_MGMT_NUM_MSG_CMD];
 };
 
+struct hinic_dev_cap {
+	u8      status;
+	u8      version;
+	u8      rsvd0[6];
+
+	u8      rsvd1[5];
+	u8      intr_type;
+	u8	max_cos_id;
+	u8	er_id;
+	u8	port_id;
+	u8      max_vf;
+	u8      rsvd2[62];
+	u16     max_sqs;
+	u16	max_rqs;
+	u16	max_vf_sqs;
+	u16     max_vf_rqs;
+	u8      rsvd3[204];
+};
+
 void hinic_hwdev_cb_register(struct hinic_hwdev *hwdev,
 			     enum hinic_mgmt_msg_cmd cmd, void *handle,
 			     void (*handler)(void *handle, void *buf_in,
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c
index c0b6bcb067cd..397936cac304 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.c
@@ -17,6 +17,7 @@
 #include <asm/byteorder.h>
 #include <asm/barrier.h>
 
+#include "hinic_hw_dev.h"
 #include "hinic_hw_csr.h"
 #include "hinic_hw_if.h"
 #include "hinic_hw_eqs.h"
@@ -416,11 +417,11 @@ static irqreturn_t ceq_interrupt(int irq, void *data)
 	return IRQ_HANDLED;
 }
 
-static void set_ctrl0(struct hinic_eq *eq)
+static u32 get_ctrl0_val(struct hinic_eq *eq, u32 addr)
 {
 	struct msix_entry *msix_entry = &eq->msix_entry;
 	enum hinic_eq_type type = eq->type;
-	u32 addr, val, ctrl0;
+	u32 val, ctrl0;
 
 	if (type == HINIC_AEQ) {
 		/* RMW Ctrl0 */
@@ -440,9 +441,7 @@ static void set_ctrl0(struct hinic_eq *eq)
 			HINIC_AEQ_CTRL_0_SET(EQ_INT_MODE_ARMED, INT_MODE);
 
 		val |= ctrl0;
-
-		hinic_hwif_write_reg(eq->hwif, addr, val);
-	} else if (type == HINIC_CEQ) {
+	} else {
 		/* RMW Ctrl0 */
 		addr = HINIC_CSR_CEQ_CTRL_0_ADDR(eq->q_id);
 
@@ -462,16 +461,28 @@ static void set_ctrl0(struct hinic_eq *eq)
 			HINIC_CEQ_CTRL_0_SET(EQ_INT_MODE_ARMED, INTR_MODE);
 
 		val |= ctrl0;
-
-		hinic_hwif_write_reg(eq->hwif, addr, val);
 	}
+	return val;
 }
 
-static void set_ctrl1(struct hinic_eq *eq)
+static void set_ctrl0(struct hinic_eq *eq)
 {
+	u32 val, addr;
+
+	if (eq->type == HINIC_AEQ)
+		addr = HINIC_CSR_AEQ_CTRL_0_ADDR(eq->q_id);
+	else
+		addr = HINIC_CSR_CEQ_CTRL_0_ADDR(eq->q_id);
+
+	val = get_ctrl0_val(eq, addr);
+
+	hinic_hwif_write_reg(eq->hwif, addr, val);
+}
+
+static u32 get_ctrl1_val(struct hinic_eq *eq, u32 addr)
+{
+	u32 page_size_val, elem_size, val, ctrl1;
 	enum hinic_eq_type type = eq->type;
-	u32 page_size_val, elem_size;
-	u32 addr, val, ctrl1;
 
 	if (type == HINIC_AEQ) {
 		/* RMW Ctrl1 */
@@ -491,9 +502,7 @@ static void set_ctrl1(struct hinic_eq *eq)
 			HINIC_AEQ_CTRL_1_SET(page_size_val, PAGE_SIZE);
 
 		val |= ctrl1;
-
-		hinic_hwif_write_reg(eq->hwif, addr, val);
-	} else if (type == HINIC_CEQ) {
+	} else {
 		/* RMW Ctrl1 */
 		addr = HINIC_CSR_CEQ_CTRL_1_ADDR(eq->q_id);
 
@@ -508,19 +517,70 @@ static void set_ctrl1(struct hinic_eq *eq)
 			HINIC_CEQ_CTRL_1_SET(page_size_val, PAGE_SIZE);
 
 		val |= ctrl1;
+	}
+	return val;
+}
 
-		hinic_hwif_write_reg(eq->hwif, addr, val);
+static void set_ctrl1(struct hinic_eq *eq)
+{
+	u32 addr, val;
+
+	if (eq->type == HINIC_AEQ)
+		addr = HINIC_CSR_AEQ_CTRL_1_ADDR(eq->q_id);
+	else
+		addr = HINIC_CSR_CEQ_CTRL_1_ADDR(eq->q_id);
+
+	val = get_ctrl1_val(eq, addr);
+
+	hinic_hwif_write_reg(eq->hwif, addr, val);
+}
+
+static int set_ceq_ctrl_reg(struct hinic_eq *eq)
+{
+	struct hinic_ceq_ctrl_reg ceq_ctrl = {0};
+	struct hinic_hwdev *hwdev = eq->hwdev;
+	u16 out_size = sizeof(ceq_ctrl);
+	u16 in_size = sizeof(ceq_ctrl);
+	struct hinic_pfhwdev *pfhwdev;
+	u32 addr;
+	int err;
+
+	pfhwdev = container_of(hwdev, struct hinic_pfhwdev, hwdev);
+
+	addr = HINIC_CSR_CEQ_CTRL_0_ADDR(eq->q_id);
+	ceq_ctrl.ctrl0 = get_ctrl0_val(eq, addr);
+	addr = HINIC_CSR_CEQ_CTRL_1_ADDR(eq->q_id);
+	ceq_ctrl.ctrl1 = get_ctrl1_val(eq, addr);
+
+	ceq_ctrl.func_id = HINIC_HWIF_FUNC_IDX(hwdev->hwif);
+	ceq_ctrl.q_id = eq->q_id;
+
+	err = hinic_msg_to_mgmt(&pfhwdev->pf_to_mgmt, HINIC_MOD_COMM,
+				HINIC_COMM_CMD_CEQ_CTRL_REG_WR_BY_UP,
+				&ceq_ctrl, in_size,
+				&ceq_ctrl, &out_size, HINIC_MGMT_MSG_SYNC);
+	if (err || !out_size || ceq_ctrl.status) {
+		dev_err(&hwdev->hwif->pdev->dev,
+			"Failed to set ceq %d ctrl reg, err: %d status: 0x%x, out_size: 0x%x\n",
+			eq->q_id, err, ceq_ctrl.status, out_size);
+		return -EFAULT;
 	}
+
+	return 0;
 }
 
 /**
  * set_eq_ctrls - setting eq's ctrl registers
  * @eq: the Event Queue for setting
  **/
-static void set_eq_ctrls(struct hinic_eq *eq)
+static int set_eq_ctrls(struct hinic_eq *eq)
 {
+	if (HINIC_IS_VF(eq->hwif) && eq->type == HINIC_CEQ)
+		return set_ceq_ctrl_reg(eq);
+
 	set_ctrl0(eq);
 	set_ctrl1(eq);
+	return 0;
 }
 
 /**
@@ -703,7 +763,12 @@ static int init_eq(struct hinic_eq *eq, struct hinic_hwif *hwif,
 		return -EINVAL;
 	}
 
-	set_eq_ctrls(eq);
+	err = set_eq_ctrls(eq);
+	if (err) {
+		dev_err(&pdev->dev, "Failed to set eq ctrls\n");
+		return err;
+	}
+
 	eq_update_ci(eq, EQ_ARMED);
 
 	err = alloc_eq_pages(eq);
@@ -859,6 +924,7 @@ int hinic_ceqs_init(struct hinic_ceqs *ceqs, struct hinic_hwif *hwif,
 	ceqs->num_ceqs = num_ceqs;
 
 	for (q_id = 0; q_id < num_ceqs; q_id++) {
+		ceqs->ceq[q_id].hwdev = ceqs->hwdev;
 		err = init_eq(&ceqs->ceq[q_id], hwif, HINIC_CEQ, q_id, q_len,
 			      page_size, msix_entries[q_id]);
 		if (err) {
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.h
index d73256da4b80..74b9ff90640c 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_eqs.h
@@ -172,7 +172,7 @@ struct hinic_eq_work {
 
 struct hinic_eq {
 	struct hinic_hwif       *hwif;
-
+	struct hinic_hwdev      *hwdev;
 	enum hinic_eq_type      type;
 	int                     q_id;
 	u32                     q_len;
@@ -220,7 +220,7 @@ struct hinic_ceq_cb {
 
 struct hinic_ceqs {
 	struct hinic_hwif       *hwif;
-
+	struct hinic_hwdev		*hwdev;
 	struct hinic_eq         ceq[HINIC_MAX_CEQS];
 	int                     num_ceqs;
 
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_io.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_io.c
index d66f86fa3f46..a4581c988a63 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_io.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_io.c
@@ -15,6 +15,7 @@
 #include <linux/io.h>
 #include <linux/err.h>
 
+#include "hinic_hw_dev.h"
 #include "hinic_hw_if.h"
 #include "hinic_hw_eqs.h"
 #include "hinic_hw_wqe.h"
@@ -34,6 +35,8 @@
 #define DB_IDX(db, db_base)             \
 	(((unsigned long)(db) - (unsigned long)(db_base)) / HINIC_DB_PAGE_SIZE)
 
+#define HINIC_PAGE_SIZE_HW(pg_size)	((u8)ilog2((u32)((pg_size) >> 12)))
+
 enum io_cmd {
 	IO_CMD_MODIFY_QUEUE_CTXT = 0,
 	IO_CMD_CLEAN_QUEUE_CTXT,
@@ -484,6 +487,33 @@ void hinic_io_destroy_qps(struct hinic_func_to_io *func_to_io, int num_qps)
 	devm_kfree(&pdev->dev, func_to_io->qps);
 }
 
+int hinic_set_wq_page_size(struct hinic_hwdev *hwdev, u16 func_idx,
+			   u32 page_size)
+{
+	struct hinic_wq_page_size page_size_info = {0};
+	u16 out_size = sizeof(page_size_info);
+	struct hinic_pfhwdev *pfhwdev;
+	int err;
+
+	pfhwdev = container_of(hwdev, struct hinic_pfhwdev, hwdev);
+
+	page_size_info.func_idx = func_idx;
+	page_size_info.ppf_idx = HINIC_HWIF_PPF_IDX(hwdev->hwif);
+	page_size_info.page_size = HINIC_PAGE_SIZE_HW(page_size);
+
+	err = hinic_msg_to_mgmt(&pfhwdev->pf_to_mgmt, HINIC_MOD_COMM,
+				HINIC_COMM_CMD_PAGESIZE_SET, &page_size_info,
+				sizeof(page_size_info), &page_size_info,
+				&out_size, HINIC_MGMT_MSG_SYNC);
+	if (err || !out_size || page_size_info.status) {
+		dev_err(&hwdev->hwif->pdev->dev, "Failed to set wq page size, err: %d, status: 0x%x, out_size: 0x%0x\n",
+			err, page_size_info.status, out_size);
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
 /**
  * hinic_io_init - Initialize the IO components
  * @func_to_io: func to io channel that holds the IO components
@@ -506,6 +536,7 @@ int hinic_io_init(struct hinic_func_to_io *func_to_io,
 	func_to_io->hwif = hwif;
 	func_to_io->qps = NULL;
 	func_to_io->max_qps = max_qps;
+	func_to_io->ceqs.hwdev = func_to_io->hwdev;
 
 	err = hinic_ceqs_init(&func_to_io->ceqs, hwif, num_ceqs,
 			      HINIC_DEFAULT_CEQ_LEN, HINIC_EQ_PAGE_SIZE,
@@ -541,6 +572,14 @@ int hinic_io_init(struct hinic_func_to_io *func_to_io,
 		func_to_io->cmdq_db_area[cmdq] = db_area;
 	}
 
+	err = hinic_set_wq_page_size(func_to_io->hwdev,
+				     HINIC_HWIF_FUNC_IDX(hwif),
+				     HINIC_DEFAULT_WQ_PAGE_SIZE);
+	if (err) {
+		dev_err(&func_to_io->hwif->pdev->dev, "Failed to set wq page size\n");
+		goto init_wq_pg_size_err;
+	}
+
 	err = hinic_init_cmdqs(&func_to_io->cmdqs, hwif,
 			       func_to_io->cmdq_db_area);
 	if (err) {
@@ -551,6 +590,11 @@ int hinic_io_init(struct hinic_func_to_io *func_to_io,
 	return 0;
 
 err_init_cmdqs:
+	if (!HINIC_IS_VF(func_to_io->hwif))
+		hinic_set_wq_page_size(func_to_io->hwdev,
+				       HINIC_HWIF_FUNC_IDX(hwif),
+				       HINIC_HW_WQ_PAGE_SIZE);
+init_wq_pg_size_err:
 err_db_area:
 	for (type = HINIC_CMDQ_SYNC; type < cmdq; type++)
 		return_db_area(func_to_io, func_to_io->cmdq_db_area[type]);
@@ -575,6 +619,11 @@ void hinic_io_free(struct hinic_func_to_io *func_to_io)
 
 	hinic_free_cmdqs(&func_to_io->cmdqs);
 
+	if (!HINIC_IS_VF(func_to_io->hwif))
+		hinic_set_wq_page_size(func_to_io->hwdev,
+				       HINIC_HWIF_FUNC_IDX(func_to_io->hwif),
+				       HINIC_HW_WQ_PAGE_SIZE);
+
 	for (cmdq = HINIC_CMDQ_SYNC; cmdq < HINIC_MAX_CMDQ_TYPES; cmdq++)
 		return_db_area(func_to_io, func_to_io->cmdq_db_area[cmdq]);
 
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_io.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_io.h
index cac2b722e7dc..28c0594f636d 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_io.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_io.h
@@ -20,6 +20,8 @@
 
 #define HINIC_DB_PAGE_SIZE      SZ_4K
 #define HINIC_DB_SIZE           SZ_4M
+#define HINIC_HW_WQ_PAGE_SIZE	SZ_4K
+#define HINIC_DEFAULT_WQ_PAGE_SIZE SZ_256K
 
 #define HINIC_DB_MAX_AREAS      (HINIC_DB_SIZE / HINIC_DB_PAGE_SIZE)
 
@@ -47,7 +49,7 @@ struct hinic_free_db_area {
 
 struct hinic_func_to_io {
 	struct hinic_hwif       *hwif;
-
+	struct hinic_hwdev      *hwdev;
 	struct hinic_ceqs       ceqs;
 
 	struct hinic_wqs        wqs;
@@ -69,8 +71,27 @@ struct hinic_func_to_io {
 	void __iomem                    *cmdq_db_area[HINIC_MAX_CMDQ_TYPES];
 
 	struct hinic_cmdqs              cmdqs;
+
+	u16			max_vfs;
+	struct vf_data_storage	*vf_infos;
+	u8			link_status;
 };
 
+struct hinic_wq_page_size {
+	u8	status;
+	u8	version;
+	u8	rsvd0[6];
+
+	u16	func_idx;
+	u8	ppf_idx;
+	u8	page_size;
+
+	u32	rsvd1;
+};
+
+int hinic_set_wq_page_size(struct hinic_hwdev *hwdev, u16 func_idx,
+			   u32 page_size);
+
 int hinic_io_create_qps(struct hinic_func_to_io *func_to_io,
 			u16 base_qpn, int num_qps,
 			struct msix_entry *sq_msix_entries,
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c
index 8995e32dd1c0..eef855f11a01 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.c
@@ -353,7 +353,11 @@ int hinic_msg_to_mgmt(struct hinic_pf_to_mgmt *pf_to_mgmt,
 		return -EINVAL;
 	}
 
-	return msg_to_mgmt_sync(pf_to_mgmt, mod, cmd, buf_in, in_size,
+	if (HINIC_IS_VF(hwif))
+		return hinic_mbox_to_pf(pf_to_mgmt->hwdev, mod, cmd, buf_in,
+					in_size, buf_out, out_size, 0);
+	else
+		return msg_to_mgmt_sync(pf_to_mgmt, mod, cmd, buf_in, in_size,
 				buf_out, out_size, MGMT_DIRECT_SEND,
 				MSG_NOT_RESP);
 }
@@ -390,8 +394,8 @@ static void mgmt_recv_msg_handler(struct hinic_pf_to_mgmt *pf_to_mgmt,
 			    recv_msg->msg, recv_msg->msg_len,
 			    buf_out, &out_size);
 	else
-		dev_err(&pdev->dev, "No MGMT msg handler, mod = %d\n",
-			recv_msg->mod);
+		dev_err(&pdev->dev, "No MGMT msg handler, mod: %d, cmd: %d\n",
+			recv_msg->mod, recv_msg->cmd);
 
 	mgmt_cb->state &= ~HINIC_MGMT_CB_RUNNING;
 
@@ -553,6 +557,10 @@ int hinic_pf_to_mgmt_init(struct hinic_pf_to_mgmt *pf_to_mgmt,
 	int err;
 
 	pf_to_mgmt->hwif = hwif;
+	pf_to_mgmt->hwdev = hwdev;
+
+	if (HINIC_IS_VF(hwif))
+		return 0;
 
 	sema_init(&pf_to_mgmt->sync_msg_lock, 1);
 	pf_to_mgmt->sync_msg_id = 0;
@@ -584,6 +592,9 @@ void hinic_pf_to_mgmt_free(struct hinic_pf_to_mgmt *pf_to_mgmt)
 	struct hinic_pfhwdev *pfhwdev = mgmt_to_pfhwdev(pf_to_mgmt);
 	struct hinic_hwdev *hwdev = &pfhwdev->hwdev;
 
+	if (HINIC_IS_VF(hwdev->hwif))
+		return;
+
 	hinic_aeq_unregister_hw_cb(&hwdev->aeqs, HINIC_MSG_FROM_MGMT_CPU);
 	hinic_api_cmd_free(pf_to_mgmt->cmd_chain);
 }
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.h
index a5ab044f98cc..c2b142c08b0e 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_mgmt.h
@@ -62,6 +62,7 @@ enum hinic_cfg_cmd {
 enum hinic_comm_cmd {
 	HINIC_COMM_CMD_START_FLR          = 0x1,
 	HINIC_COMM_CMD_IO_STATUS_GET    = 0x3,
+	HINIC_COMM_CMD_DMA_ATTR_SET	    = 0x4,
 
 	HINIC_COMM_CMD_CMDQ_CTXT_SET    = 0x10,
 	HINIC_COMM_CMD_CMDQ_CTXT_GET    = 0x11,
@@ -75,7 +76,13 @@ enum hinic_comm_cmd {
 
 	HINIC_COMM_CMD_IO_RES_CLEAR     = 0x29,
 
-	HINIC_COMM_CMD_MAX              = 0x32,
+	HINIC_COMM_CMD_CEQ_CTRL_REG_WR_BY_UP = 0x33,
+
+	HINIC_COMM_CMD_L2NIC_RESET		= 0x4b,
+
+	HINIC_COMM_CMD_PAGESIZE_SET	= 0x50,
+
+	HINIC_COMM_CMD_MAX              = 0x51,
 };
 
 enum hinic_mgmt_cb_state {
@@ -108,7 +115,7 @@ struct hinic_mgmt_cb {
 
 struct hinic_pf_to_mgmt {
 	struct hinic_hwif               *hwif;
-
+	struct hinic_hwdev		*hwdev;
 	struct semaphore                sync_msg_lock;
 	u16                             sync_msg_id;
 	u8                              *sync_msg_buf;
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c
index be364b7a7019..20c5c8ea452e 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c
@@ -108,7 +108,12 @@ void hinic_sq_prepare_ctxt(struct hinic_sq_ctxt *sq_ctxt,
 	wq_page_pfn_hi = upper_32_bits(wq_page_pfn);
 	wq_page_pfn_lo = lower_32_bits(wq_page_pfn);
 
-	wq_block_pfn = HINIC_WQ_BLOCK_PFN(wq->block_paddr);
+	/* If only one page, use 0-level CLA */
+	if (wq->num_q_pages == 1)
+		wq_block_pfn = HINIC_WQ_BLOCK_PFN(wq_page_addr);
+	else
+		wq_block_pfn = HINIC_WQ_BLOCK_PFN(wq->block_paddr);
+
 	wq_block_pfn_hi = upper_32_bits(wq_block_pfn);
 	wq_block_pfn_lo = lower_32_bits(wq_block_pfn);
 
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h
index 79091e131418..c30d092e48d5 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h
@@ -38,8 +38,8 @@
 #define HINIC_SQ_WQEBB_SIZE                     64
 #define HINIC_RQ_WQEBB_SIZE                     32
 
-#define HINIC_SQ_PAGE_SIZE                      SZ_4K
-#define HINIC_RQ_PAGE_SIZE                      SZ_4K
+#define HINIC_SQ_PAGE_SIZE                      SZ_256K
+#define HINIC_RQ_PAGE_SIZE                      SZ_256K
 
 #define HINIC_SQ_DEPTH                          SZ_4K
 #define HINIC_RQ_DEPTH                          SZ_4K
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c
index 03363216ff59..5dc3743f8091 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.c
@@ -503,7 +503,7 @@ err_alloc_wq_pages:
  * Return 0 - Success, negative - Failure
  **/
 int hinic_wq_allocate(struct hinic_wqs *wqs, struct hinic_wq *wq,
-		      u16 wqebb_size, u16 wq_page_size, u16 q_depth,
+		      u16 wqebb_size, u32 wq_page_size, u16 q_depth,
 		      u16 max_wqe_size)
 {
 	struct hinic_hwif *hwif = wqs->hwif;
@@ -600,7 +600,7 @@ void hinic_wq_free(struct hinic_wqs *wqs, struct hinic_wq *wq)
  **/
 int hinic_wqs_cmdq_alloc(struct hinic_cmdq_pages *cmdq_pages,
 			 struct hinic_wq *wq, struct hinic_hwif *hwif,
-			 int cmdq_blocks, u16 wqebb_size, u16 wq_page_size,
+			 int cmdq_blocks, u16 wqebb_size, u32 wq_page_size,
 			 u16 q_depth, u16 max_wqe_size)
 {
 	struct pci_dev *pdev = hwif->pdev;
@@ -768,7 +768,10 @@ struct hinic_hw_wqe *hinic_get_wqe(struct hinic_wq *wq, unsigned int wqe_size,
 
 	*prod_idx = curr_prod_idx;
 
-	if (curr_pg != end_pg) {
+	/* If we only have one page, still need to get shadown wqe when
+	 * wqe rolling-over page
+	 */
+	if (curr_pg != end_pg || MASKED_WQE_IDX(wq, end_prod_idx) < *prod_idx) {
 		void *shadow_addr = &wq->shadow_wqe[curr_pg * wq->max_wqe_size];
 
 		copy_wqe_to_shadow(wq, shadow_addr, num_wqebbs, *prod_idx);
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h
index 811eef744140..b06f8c0255de 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_wq.h
@@ -26,7 +26,7 @@ struct hinic_wq {
 	int             block_idx;
 
 	u16             wqebb_size;
-	u16             wq_page_size;
+	u32             wq_page_size;
 	u16             q_depth;
 	u16             max_wqe_size;
 	u16             num_wqebbs_per_page;
@@ -76,7 +76,7 @@ struct hinic_cmdq_pages {
 
 int hinic_wqs_cmdq_alloc(struct hinic_cmdq_pages *cmdq_pages,
 			 struct hinic_wq *wq, struct hinic_hwif *hwif,
-			 int cmdq_blocks, u16 wqebb_size, u16 wq_page_size,
+			 int cmdq_blocks, u16 wqebb_size, u32 wq_page_size,
 			 u16 q_depth, u16 max_wqe_size);
 
 void hinic_wqs_cmdq_free(struct hinic_cmdq_pages *cmdq_pages,
@@ -88,7 +88,7 @@ int hinic_wqs_alloc(struct hinic_wqs *wqs, int num_wqs,
 void hinic_wqs_free(struct hinic_wqs *wqs);
 
 int hinic_wq_allocate(struct hinic_wqs *wqs, struct hinic_wq *wq,
-		      u16 wqebb_size, u16 wq_page_size, u16 q_depth,
+		      u16 wqebb_size, u32 wq_page_size, u16 q_depth,
 		      u16 max_wqe_size);
 
 void hinic_wq_free(struct hinic_wqs *wqs, struct hinic_wq *wq);
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c
index 13560975c103..cd71249f9b1c 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_main.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c
@@ -29,6 +29,7 @@
 #include "hinic_tx.h"
 #include "hinic_rx.h"
 #include "hinic_dev.h"
+#include "hinic_sriov.h"
 
 MODULE_AUTHOR("Huawei Technologies CO., Ltd");
 MODULE_DESCRIPTION("Huawei Intelligent NIC driver");
@@ -46,6 +47,7 @@ MODULE_PARM_DESC(rx_weight, "Number Rx packets for NAPI budget (default=64)");
 #define HINIC_DEV_ID_DUAL_PORT_100GE        0x0200
 #define HINIC_DEV_ID_DUAL_PORT_100GE_MEZZ   0x0205
 #define HINIC_DEV_ID_QUAD_PORT_25GE_MEZZ    0x0210
+#define HINIC_DEV_ID_VF    0x375e
 
 #define HINIC_WQ_NAME                   "hinic_dev"
 
@@ -65,6 +67,8 @@ MODULE_PARM_DESC(rx_weight, "Number Rx packets for NAPI budget (default=64)");
 #define rx_mode_work_to_nic_dev(rx_mode_work) \
 		container_of(rx_mode_work, struct hinic_dev, rx_mode_work)
 
+#define HINIC_WAIT_SRIOV_CFG_TIMEOUT	15000
+
 static int change_mac_addr(struct net_device *netdev, const u8 *addr);
 
 static int set_features(struct hinic_dev *nic_dev,
@@ -423,8 +427,9 @@ static int hinic_open(struct net_device *netdev)
 		goto err_func_port_state;
 	}
 
-	/* Wait up to 3 sec between port enable to link state */
-	msleep(3000);
+	if (!HINIC_IS_VF(nic_dev->hwdev->hwif))
+		/* Wait up to 3 sec between port enable to link state */
+		msleep(3000);
 
 	down(&nic_dev->mgmt_lock);
 
@@ -434,6 +439,9 @@ static int hinic_open(struct net_device *netdev)
 		goto err_port_link;
 	}
 
+	if (!HINIC_IS_VF(nic_dev->hwdev->hwif))
+		hinic_notify_all_vfs_link_changed(nic_dev->hwdev, link_state);
+
 	if (link_state == HINIC_LINK_STATE_UP)
 		nic_dev->flags |= HINIC_LINK_UP;
 
@@ -497,6 +505,9 @@ static int hinic_close(struct net_device *netdev)
 
 	up(&nic_dev->mgmt_lock);
 
+	if (!HINIC_IS_VF(nic_dev->hwdev->hwif))
+		hinic_notify_all_vfs_link_changed(nic_dev->hwdev, 0);
+
 	err = hinic_port_set_func_state(nic_dev, HINIC_FUNC_PORT_DISABLE);
 	if (err) {
 		netif_err(nic_dev, drv, netdev,
@@ -685,7 +696,7 @@ static int hinic_vlan_rx_add_vid(struct net_device *netdev,
 	}
 
 	err = hinic_port_add_mac(nic_dev, netdev->dev_addr, vid);
-	if (err) {
+	if (err && err != HINIC_PF_SET_VF_ALREADY) {
 		netif_err(nic_dev, drv, netdev, "Failed to set mac\n");
 		goto err_add_mac;
 	}
@@ -737,8 +748,6 @@ static void set_rx_mode(struct work_struct *work)
 	struct hinic_rx_mode_work *rx_mode_work = work_to_rx_mode_work(work);
 	struct hinic_dev *nic_dev = rx_mode_work_to_nic_dev(rx_mode_work);
 
-	netif_info(nic_dev, drv, nic_dev->netdev, "set rx mode work\n");
-
 	hinic_port_set_rx_mode(nic_dev, rx_mode_work->rx_mode);
 
 	__dev_uc_sync(nic_dev->netdev, add_mac_addr, remove_mac_addr);
@@ -896,6 +905,10 @@ static void link_status_event_handler(void *handle, void *buf_in, u16 in_size,
 		netif_info(nic_dev, drv, nic_dev->netdev, "HINIC_Link is DOWN\n");
 	}
 
+	if (!HINIC_IS_VF(nic_dev->hwdev->hwif))
+		hinic_notify_all_vfs_link_changed(nic_dev->hwdev,
+						  link_status->link);
+
 	ret_link_status = buf_out;
 	ret_link_status->status = 0;
 
@@ -969,7 +982,9 @@ static int nic_dev_init(struct pci_dev *pdev)
 	}
 
 	hinic_set_ethtool_ops(netdev);
+
 	netdev->netdev_ops = &hinic_netdev_ops;
+
 	netdev->max_mtu = ETH_MAX_MTU;
 
 	nic_dev = netdev_priv(netdev);
@@ -981,6 +996,8 @@ static int nic_dev_init(struct pci_dev *pdev)
 	nic_dev->rxqs = NULL;
 	nic_dev->tx_weight = tx_weight;
 	nic_dev->rx_weight = rx_weight;
+	nic_dev->sriov_info.hwdev = hwdev;
+	nic_dev->sriov_info.pdev = pdev;
 
 	sema_init(&nic_dev->mgmt_lock, 1);
 
@@ -1007,11 +1024,25 @@ static int nic_dev_init(struct pci_dev *pdev)
 	pci_set_drvdata(pdev, netdev);
 
 	err = hinic_port_get_mac(nic_dev, netdev->dev_addr);
-	if (err)
-		dev_warn(&pdev->dev, "Failed to get mac address\n");
+	if (err) {
+		dev_err(&pdev->dev, "Failed to get mac address\n");
+		goto err_get_mac;
+	}
+
+	if (!is_valid_ether_addr(netdev->dev_addr)) {
+		if (!HINIC_IS_VF(nic_dev->hwdev->hwif)) {
+			dev_err(&pdev->dev, "Invalid MAC address\n");
+			err = -EIO;
+			goto err_add_mac;
+		}
+
+		dev_info(&pdev->dev, "Invalid MAC address %pM, using random\n",
+			 netdev->dev_addr);
+		eth_hw_addr_random(netdev);
+	}
 
 	err = hinic_port_add_mac(nic_dev, netdev->dev_addr, 0);
-	if (err) {
+	if (err && err != HINIC_PF_SET_VF_ALREADY) {
 		dev_err(&pdev->dev, "Failed to add mac\n");
 		goto err_add_mac;
 	}
@@ -1053,6 +1084,7 @@ err_set_features:
 	cancel_work_sync(&rx_mode_work->work);
 
 err_set_mtu:
+err_get_mac:
 err_add_mac:
 	pci_set_drvdata(pdev, NULL);
 	destroy_workqueue(nic_dev->workq);
@@ -1126,12 +1158,37 @@ err_pci_regions:
 	return err;
 }
 
+#define HINIC_WAIT_SRIOV_CFG_TIMEOUT	15000
+
+static void wait_sriov_cfg_complete(struct hinic_dev *nic_dev)
+{
+	struct hinic_sriov_info *sriov_info = &nic_dev->sriov_info;
+	u32 loop_cnt = 0;
+
+	set_bit(HINIC_FUNC_REMOVE, &sriov_info->state);
+	usleep_range(9900, 10000);
+
+	while (loop_cnt < HINIC_WAIT_SRIOV_CFG_TIMEOUT) {
+		if (!test_bit(HINIC_SRIOV_ENABLE, &sriov_info->state) &&
+		    !test_bit(HINIC_SRIOV_DISABLE, &sriov_info->state))
+			return;
+
+		usleep_range(9900, 10000);
+		loop_cnt++;
+	}
+}
+
 static void hinic_remove(struct pci_dev *pdev)
 {
 	struct net_device *netdev = pci_get_drvdata(pdev);
 	struct hinic_dev *nic_dev = netdev_priv(netdev);
 	struct hinic_rx_mode_work *rx_mode_work;
 
+	if (!HINIC_IS_VF(nic_dev->hwdev->hwif)) {
+		wait_sriov_cfg_complete(nic_dev);
+		hinic_pci_sriov_disable(pdev);
+	}
+
 	unregister_netdev(netdev);
 
 	hinic_hwdev_cb_unregister(nic_dev->hwdev,
@@ -1144,6 +1201,8 @@ static void hinic_remove(struct pci_dev *pdev)
 
 	destroy_workqueue(nic_dev->workq);
 
+	hinic_vf_func_free(nic_dev->hwdev);
+
 	hinic_free_hwdev(nic_dev->hwdev);
 
 	free_netdev(netdev);
@@ -1164,6 +1223,7 @@ static const struct pci_device_id hinic_pci_table[] = {
 	{ PCI_VDEVICE(HUAWEI, HINIC_DEV_ID_DUAL_PORT_100GE), 0},
 	{ PCI_VDEVICE(HUAWEI, HINIC_DEV_ID_DUAL_PORT_100GE_MEZZ), 0},
 	{ PCI_VDEVICE(HUAWEI, HINIC_DEV_ID_QUAD_PORT_25GE_MEZZ), 0},
+	{ PCI_VDEVICE(HUAWEI, HINIC_DEV_ID_VF), 0},
 	{ 0, 0}
 };
 MODULE_DEVICE_TABLE(pci, hinic_pci_table);
@@ -1174,6 +1234,7 @@ static struct pci_driver hinic_driver = {
 	.probe          = hinic_probe,
 	.remove         = hinic_remove,
 	.shutdown       = hinic_shutdown,
+	.sriov_configure = hinic_pci_sriov_configure,
 };
 
 module_pci_driver(hinic_driver);
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_port.c b/drivers/net/ethernet/huawei/hinic/hinic_port.c
index 1e389a004e50..b7fe0adcc29a 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_port.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_port.c
@@ -37,20 +37,14 @@ enum mac_op {
 static int change_mac(struct hinic_dev *nic_dev, const u8 *addr,
 		      u16 vlan_id, enum mac_op op)
 {
-	struct net_device *netdev = nic_dev->netdev;
 	struct hinic_hwdev *hwdev = nic_dev->hwdev;
 	struct hinic_port_mac_cmd port_mac_cmd;
 	struct hinic_hwif *hwif = hwdev->hwif;
+	u16 out_size = sizeof(port_mac_cmd);
 	struct pci_dev *pdev = hwif->pdev;
 	enum hinic_port_cmd cmd;
-	u16 out_size;
 	int err;
 
-	if (vlan_id >= VLAN_N_VID) {
-		netif_err(nic_dev, drv, netdev, "Invalid VLAN number\n");
-		return -EINVAL;
-	}
-
 	if (op == MAC_SET)
 		cmd = HINIC_PORT_CMD_SET_MAC;
 	else
@@ -63,12 +57,25 @@ static int change_mac(struct hinic_dev *nic_dev, const u8 *addr,
 	err = hinic_port_msg_cmd(hwdev, cmd, &port_mac_cmd,
 				 sizeof(port_mac_cmd),
 				 &port_mac_cmd, &out_size);
-	if (err || (out_size != sizeof(port_mac_cmd)) || port_mac_cmd.status) {
+	if (err || out_size != sizeof(port_mac_cmd) ||
+	    (port_mac_cmd.status  &&
+	    port_mac_cmd.status != HINIC_PF_SET_VF_ALREADY &&
+	    port_mac_cmd.status != HINIC_MGMT_STATUS_EXIST)) {
 		dev_err(&pdev->dev, "Failed to change MAC, ret = %d\n",
 			port_mac_cmd.status);
 		return -EFAULT;
 	}
 
+	if (cmd == HINIC_PORT_CMD_SET_MAC && port_mac_cmd.status ==
+	    HINIC_PF_SET_VF_ALREADY) {
+		dev_warn(&pdev->dev, "PF has already set VF mac, Ignore set operation\n");
+		return HINIC_PF_SET_VF_ALREADY;
+	}
+
+	if (cmd == HINIC_PORT_CMD_SET_MAC && port_mac_cmd.status ==
+	    HINIC_MGMT_STATUS_EXIST)
+		dev_warn(&pdev->dev, "MAC is repeated. Ignore set operation\n");
+
 	return 0;
 }
 
@@ -112,8 +119,8 @@ int hinic_port_get_mac(struct hinic_dev *nic_dev, u8 *addr)
 	struct hinic_hwdev *hwdev = nic_dev->hwdev;
 	struct hinic_port_mac_cmd port_mac_cmd;
 	struct hinic_hwif *hwif = hwdev->hwif;
+	u16 out_size = sizeof(port_mac_cmd);
 	struct pci_dev *pdev = hwif->pdev;
-	u16 out_size;
 	int err;
 
 	port_mac_cmd.func_idx = HINIC_HWIF_FUNC_IDX(hwif);
@@ -144,9 +151,9 @@ int hinic_port_set_mtu(struct hinic_dev *nic_dev, int new_mtu)
 	struct hinic_hwdev *hwdev = nic_dev->hwdev;
 	struct hinic_port_mtu_cmd port_mtu_cmd;
 	struct hinic_hwif *hwif = hwdev->hwif;
+	u16 out_size = sizeof(port_mtu_cmd);
 	struct pci_dev *pdev = hwif->pdev;
 	int err, max_frame;
-	u16 out_size;
 
 	if (new_mtu < HINIC_MIN_MTU_SIZE) {
 		netif_err(nic_dev, drv, netdev, "mtu < MIN MTU size");
@@ -248,14 +255,9 @@ int hinic_port_link_state(struct hinic_dev *nic_dev,
 	struct hinic_hwif *hwif = hwdev->hwif;
 	struct hinic_port_link_cmd link_cmd;
 	struct pci_dev *pdev = hwif->pdev;
-	u16 out_size;
+	u16 out_size = sizeof(link_cmd);
 	int err;
 
-	if (!HINIC_IS_PF(hwif) && !HINIC_IS_PPF(hwif)) {
-		dev_err(&pdev->dev, "unsupported PCI Function type\n");
-		return -EINVAL;
-	}
-
 	link_cmd.func_idx = HINIC_HWIF_FUNC_IDX(hwif);
 
 	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_GET_LINK_STATE,
@@ -284,13 +286,11 @@ int hinic_port_set_state(struct hinic_dev *nic_dev, enum hinic_port_state state)
 	struct hinic_port_state_cmd port_state;
 	struct hinic_hwif *hwif = hwdev->hwif;
 	struct pci_dev *pdev = hwif->pdev;
-	u16 out_size;
+	u16 out_size = sizeof(port_state);
 	int err;
 
-	if (!HINIC_IS_PF(hwif) && !HINIC_IS_PPF(hwif)) {
-		dev_err(&pdev->dev, "unsupported PCI Function type\n");
-		return -EINVAL;
-	}
+	if (HINIC_IS_VF(hwdev->hwif))
+		return 0;
 
 	port_state.state = state;
 
@@ -320,7 +320,7 @@ int hinic_port_set_func_state(struct hinic_dev *nic_dev,
 	struct hinic_hwdev *hwdev = nic_dev->hwdev;
 	struct hinic_hwif *hwif = hwdev->hwif;
 	struct pci_dev *pdev = hwif->pdev;
-	u16 out_size;
+	u16 out_size = sizeof(func_state);
 	int err;
 
 	func_state.func_idx = HINIC_HWIF_FUNC_IDX(hwif);
@@ -351,7 +351,7 @@ int hinic_port_get_cap(struct hinic_dev *nic_dev,
 	struct hinic_hwdev *hwdev = nic_dev->hwdev;
 	struct hinic_hwif *hwif = hwdev->hwif;
 	struct pci_dev *pdev = hwif->pdev;
-	u16 out_size;
+	u16 out_size = sizeof(*port_cap);
 	int err;
 
 	port_cap->func_idx = HINIC_HWIF_FUNC_IDX(hwif);
@@ -382,7 +382,7 @@ int hinic_port_set_tso(struct hinic_dev *nic_dev, enum hinic_tso_state state)
 	struct hinic_hwif *hwif = hwdev->hwif;
 	struct hinic_tso_config tso_cfg = {0};
 	struct pci_dev *pdev = hwif->pdev;
-	u16 out_size;
+	u16 out_size = sizeof(tso_cfg);
 	int err;
 
 	tso_cfg.func_id = HINIC_HWIF_FUNC_IDX(hwif);
@@ -405,9 +405,9 @@ int hinic_set_rx_csum_offload(struct hinic_dev *nic_dev, u32 en)
 {
 	struct hinic_checksum_offload rx_csum_cfg = {0};
 	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	u16 out_size = sizeof(rx_csum_cfg);
 	struct hinic_hwif *hwif;
 	struct pci_dev *pdev;
-	u16 out_size;
 	int err;
 
 	if (!hwdev)
@@ -443,6 +443,7 @@ int hinic_set_rx_vlan_offload(struct hinic_dev *nic_dev, u8 en)
 	if (!hwdev)
 		return -EINVAL;
 
+	out_size = sizeof(vlan_cfg);
 	hwif = hwdev->hwif;
 	pdev = hwif->pdev;
 	vlan_cfg.func_id = HINIC_HWIF_FUNC_IDX(hwif);
@@ -465,8 +466,8 @@ int hinic_set_max_qnum(struct hinic_dev *nic_dev, u8 num_rqs)
 {
 	struct hinic_hwdev *hwdev = nic_dev->hwdev;
 	struct hinic_hwif *hwif = hwdev->hwif;
-	struct pci_dev *pdev = hwif->pdev;
 	struct hinic_rq_num rq_num = { 0 };
+	struct pci_dev *pdev = hwif->pdev;
 	u16 out_size = sizeof(rq_num);
 	int err;
 
@@ -491,8 +492,8 @@ static int hinic_set_rx_lro(struct hinic_dev *nic_dev, u8 ipv4_en, u8 ipv6_en,
 			    u8 max_wqe_num)
 {
 	struct hinic_hwdev *hwdev = nic_dev->hwdev;
-	struct hinic_hwif *hwif = hwdev->hwif;
 	struct hinic_lro_config lro_cfg = { 0 };
+	struct hinic_hwif *hwif = hwdev->hwif;
 	struct pci_dev *pdev = hwif->pdev;
 	u16 out_size = sizeof(lro_cfg);
 	int err;
@@ -568,6 +569,9 @@ int hinic_set_rx_lro_state(struct hinic_dev *nic_dev, u8 lro_en,
 	if (err)
 		return err;
 
+	if (HINIC_IS_VF(nic_dev->hwdev->hwif))
+		return 0;
+
 	err = hinic_set_rx_lro_timer(nic_dev, lro_timer);
 	if (err)
 		return err;
@@ -741,9 +745,9 @@ int hinic_get_rss_type(struct hinic_dev *nic_dev, u32 tmpl_idx,
 {
 	struct hinic_rss_context_table ctx_tbl = { 0 };
 	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	u16 out_size = sizeof(ctx_tbl);
 	struct hinic_hwif *hwif;
 	struct pci_dev *pdev;
-	u16 out_size = sizeof(ctx_tbl);
 	int err;
 
 	if (!hwdev || !rss_type)
@@ -784,7 +788,7 @@ int hinic_rss_set_template_tbl(struct hinic_dev *nic_dev, u32 template_id,
 	struct hinic_hwif *hwif = hwdev->hwif;
 	struct hinic_rss_key rss_key = { 0 };
 	struct pci_dev *pdev = hwif->pdev;
-	u16 out_size;
+	u16 out_size = sizeof(rss_key);
 	int err;
 
 	rss_key.func_id = HINIC_HWIF_FUNC_IDX(hwif);
@@ -809,9 +813,9 @@ int hinic_rss_get_template_tbl(struct hinic_dev *nic_dev, u32 tmpl_idx,
 {
 	struct hinic_rss_template_key temp_key = { 0 };
 	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	u16 out_size = sizeof(temp_key);
 	struct hinic_hwif *hwif;
 	struct pci_dev *pdev;
-	u16 out_size = sizeof(temp_key);
 	int err;
 
 	if (!hwdev || !temp)
@@ -844,7 +848,7 @@ int hinic_rss_set_hash_engine(struct hinic_dev *nic_dev, u8 template_id,
 	struct hinic_hwdev *hwdev = nic_dev->hwdev;
 	struct hinic_hwif *hwif = hwdev->hwif;
 	struct pci_dev *pdev = hwif->pdev;
-	u16 out_size;
+	u16 out_size = sizeof(rss_engine);
 	int err;
 
 	rss_engine.func_id = HINIC_HWIF_FUNC_IDX(hwif);
@@ -868,9 +872,9 @@ int hinic_rss_get_hash_engine(struct hinic_dev *nic_dev, u8 tmpl_idx, u8 *type)
 {
 	struct hinic_rss_engine_type hash_type = { 0 };
 	struct hinic_hwdev *hwdev = nic_dev->hwdev;
+	u16 out_size = sizeof(hash_type);
 	struct hinic_hwif *hwif;
 	struct pci_dev *pdev;
-	u16 out_size = sizeof(hash_type);
 	int err;
 
 	if (!hwdev || !type)
@@ -901,7 +905,7 @@ int hinic_rss_cfg(struct hinic_dev *nic_dev, u8 rss_en, u8 template_id)
 	struct hinic_rss_config rss_cfg = { 0 };
 	struct hinic_hwif *hwif = hwdev->hwif;
 	struct pci_dev *pdev = hwif->pdev;
-	u16 out_size;
+	u16 out_size = sizeof(rss_cfg);
 	int err;
 
 	rss_cfg.func_id = HINIC_HWIF_FUNC_IDX(hwif);
@@ -927,8 +931,8 @@ int hinic_rss_template_alloc(struct hinic_dev *nic_dev, u8 *tmpl_idx)
 	struct hinic_rss_template_mgmt template_mgmt = { 0 };
 	struct hinic_hwdev *hwdev = nic_dev->hwdev;
 	struct hinic_hwif *hwif = hwdev->hwif;
+	u16 out_size = sizeof(template_mgmt);
 	struct pci_dev *pdev = hwif->pdev;
-	u16 out_size;
 	int err;
 
 	template_mgmt.func_id = HINIC_HWIF_FUNC_IDX(hwif);
@@ -953,8 +957,8 @@ int hinic_rss_template_free(struct hinic_dev *nic_dev, u8 tmpl_idx)
 	struct hinic_rss_template_mgmt template_mgmt = { 0 };
 	struct hinic_hwdev *hwdev = nic_dev->hwdev;
 	struct hinic_hwif *hwif = hwdev->hwif;
+	u16 out_size = sizeof(template_mgmt);
 	struct pci_dev *pdev = hwif->pdev;
-	u16 out_size;
 	int err;
 
 	template_mgmt.func_id = HINIC_HWIF_FUNC_IDX(hwif);
@@ -1043,9 +1047,9 @@ int hinic_get_mgmt_version(struct hinic_dev *nic_dev, u8 *mgmt_ver)
 {
 	struct hinic_hwdev *hwdev = nic_dev->hwdev;
 	struct hinic_version_info up_ver = {0};
+	u16 out_size = sizeof(up_ver);
 	struct hinic_hwif *hwif;
 	struct pci_dev *pdev;
-	u16 out_size;
 	int err;
 
 	if (!hwdev)
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_port.h b/drivers/net/ethernet/huawei/hinic/hinic_port.h
index 44772fd47fc1..5ad04fb6722a 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_port.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_port.h
@@ -148,9 +148,9 @@ struct hinic_port_link_status {
 	u8      version;
 	u8      rsvd0[6];
 
-	u16     rsvd1;
+	u16     func_id;
 	u8      link;
-	u8      rsvd2;
+	u8      port_id;
 };
 
 struct hinic_port_func_state_cmd {
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_rx.c b/drivers/net/ethernet/huawei/hinic/hinic_rx.c
index 815649e37cb1..af20d0dd6de7 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_rx.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_rx.c
@@ -432,9 +432,11 @@ static int rx_poll(struct napi_struct *napi, int budget)
 		return budget;
 
 	napi_complete(napi);
-	hinic_hwdev_set_msix_state(nic_dev->hwdev,
-				   rq->msix_entry,
-				   HINIC_MSIX_ENABLE);
+
+	if (!HINIC_IS_VF(nic_dev->hwdev->hwif))
+		hinic_hwdev_set_msix_state(nic_dev->hwdev,
+					   rq->msix_entry,
+					   HINIC_MSIX_ENABLE);
 
 	return pkts;
 }
@@ -461,9 +463,10 @@ static irqreturn_t rx_irq(int irq, void *data)
 
 	/* Disable the interrupt until napi will be completed */
 	nic_dev = netdev_priv(rxq->netdev);
-	hinic_hwdev_set_msix_state(nic_dev->hwdev,
-				   rq->msix_entry,
-				   HINIC_MSIX_DISABLE);
+	if (!HINIC_IS_VF(nic_dev->hwdev->hwif))
+		hinic_hwdev_set_msix_state(nic_dev->hwdev,
+					   rq->msix_entry,
+					   HINIC_MSIX_DISABLE);
 
 	nic_dev = netdev_priv(rxq->netdev);
 	hinic_hwdev_msix_cnt_set(nic_dev->hwdev, rq->msix_entry);
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_sriov.c b/drivers/net/ethernet/huawei/hinic/hinic_sriov.c
new file mode 100644
index 000000000000..d1c4e1428b38
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic/hinic_sriov.c
@@ -0,0 +1,698 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Huawei HiNIC PCI Express Linux driver
+ * Copyright(c) 2017 Huawei Technologies Co., Ltd
+ */
+
+#include <linux/pci.h>
+#include <linux/if_vlan.h>
+#include <linux/interrupt.h>
+#include <linux/etherdevice.h>
+#include <linux/netdevice.h>
+
+#include "hinic_hw_dev.h"
+#include "hinic_dev.h"
+#include "hinic_hw_mbox.h"
+#include "hinic_hw_cmdq.h"
+#include "hinic_port.h"
+#include "hinic_sriov.h"
+
+static unsigned char set_vf_link_state;
+module_param(set_vf_link_state, byte, 0444);
+MODULE_PARM_DESC(set_vf_link_state, "Set vf link state, 0 represents link auto, 1 represents link always up, 2 represents link always down. - default is 0.");
+
+#define HINIC_VLAN_PRIORITY_SHIFT 13
+#define HINIC_ADD_VLAN_IN_MAC 0x8000
+
+int hinic_set_mac(struct hinic_hwdev *hwdev, const u8 *mac_addr, u16 vlan_id,
+		  u16 func_id)
+{
+	struct hinic_port_mac_cmd mac_info = {0};
+	u16 out_size = sizeof(mac_info);
+	int err;
+
+	mac_info.func_idx = func_id;
+	mac_info.vlan_id = vlan_id;
+	memcpy(mac_info.mac, mac_addr, ETH_ALEN);
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_SET_MAC, &mac_info,
+				 sizeof(mac_info), &mac_info, &out_size);
+	if (err || out_size != sizeof(mac_info) ||
+	    (mac_info.status && mac_info.status != HINIC_PF_SET_VF_ALREADY &&
+	    mac_info.status != HINIC_MGMT_STATUS_EXIST)) {
+		dev_err(&hwdev->func_to_io.hwif->pdev->dev, "Failed to change MAC, ret = %d\n",
+			mac_info.status);
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+static void hinic_notify_vf_link_status(struct hinic_hwdev *hwdev, u16 vf_id,
+					u8 link_status)
+{
+	struct vf_data_storage *vf_infos = hwdev->func_to_io.vf_infos;
+	struct hinic_port_link_status link = {0};
+	u16 out_size = sizeof(link);
+	int err;
+
+	if (vf_infos[HW_VF_ID_TO_OS(vf_id)].registered) {
+		link.link = link_status;
+		link.func_id = hinic_glb_pf_vf_offset(hwdev->hwif) + vf_id;
+		err = hinic_mbox_to_vf(hwdev, HINIC_MOD_L2NIC,
+				       vf_id, HINIC_PORT_CMD_LINK_STATUS_REPORT,
+				       &link, sizeof(link),
+				       &link, &out_size, 0);
+		if (err || !out_size || link.status)
+			dev_err(&hwdev->hwif->pdev->dev,
+				"Send link change event to VF %d failed, err: %d, status: 0x%x, out_size: 0x%x\n",
+				HW_VF_ID_TO_OS(vf_id), err,
+				link.status, out_size);
+	}
+}
+
+/* send link change event mbox msg to active vfs under the pf */
+void hinic_notify_all_vfs_link_changed(struct hinic_hwdev *hwdev,
+				       u8 link_status)
+{
+	struct hinic_func_to_io *nic_io = &hwdev->func_to_io;
+	u16 i;
+
+	nic_io->link_status = link_status;
+	for (i = 1; i <= nic_io->max_vfs; i++) {
+		if (!nic_io->vf_infos[HW_VF_ID_TO_OS(i)].link_forced)
+			hinic_notify_vf_link_status(hwdev, i,  link_status);
+	}
+}
+
+u16 hinic_vf_info_vlanprio(struct hinic_hwdev *hwdev, int vf_id)
+{
+	struct hinic_func_to_io *nic_io = &hwdev->func_to_io;
+	u16 pf_vlan, vlanprio;
+	u8 pf_qos;
+
+	pf_vlan = nic_io->vf_infos[HW_VF_ID_TO_OS(vf_id)].pf_vlan;
+	pf_qos = nic_io->vf_infos[HW_VF_ID_TO_OS(vf_id)].pf_qos;
+	vlanprio = pf_vlan | pf_qos << HINIC_VLAN_PRIORITY_SHIFT;
+
+	return vlanprio;
+}
+
+int hinic_set_vf_vlan(struct hinic_hwdev *hwdev, bool add, u16 vid,
+		      u8 qos, int vf_id)
+{
+	struct hinic_vf_vlan_config vf_vlan = {0};
+	u16 out_size = sizeof(vf_vlan);
+	int err;
+	u8 cmd;
+
+	/* VLAN 0 is a special case, don't allow it to be removed */
+	if (!vid && !add)
+		return 0;
+
+	vf_vlan.func_id = hinic_glb_pf_vf_offset(hwdev->hwif) + vf_id;
+	vf_vlan.vlan_id = vid;
+	vf_vlan.qos = qos;
+
+	if (add)
+		cmd = HINIC_PORT_CMD_SET_VF_VLAN;
+	else
+		cmd = HINIC_PORT_CMD_CLR_VF_VLAN;
+
+	err = hinic_port_msg_cmd(hwdev, cmd, &vf_vlan,
+				 sizeof(vf_vlan), &vf_vlan, &out_size);
+	if (err || !out_size || vf_vlan.status) {
+		dev_err(&hwdev->hwif->pdev->dev, "Failed to set VF %d vlan, err: %d, status: 0x%x, out size: 0x%x\n",
+			HW_VF_ID_TO_OS(vf_id), err, vf_vlan.status, out_size);
+		return -EFAULT;
+	}
+
+	return 0;
+}
+
+static int hinic_init_vf_config(struct hinic_hwdev *hwdev, u16 vf_id)
+{
+	struct vf_data_storage *vf_info;
+	u16 func_id, vlan_id;
+	int err = 0;
+
+	vf_info = hwdev->func_to_io.vf_infos + HW_VF_ID_TO_OS(vf_id);
+	if (vf_info->pf_set_mac) {
+		func_id = hinic_glb_pf_vf_offset(hwdev->hwif) + vf_id;
+
+		vlan_id = 0;
+
+		err = hinic_set_mac(hwdev, vf_info->vf_mac_addr, vlan_id,
+				    func_id);
+		if (err) {
+			dev_err(&hwdev->func_to_io.hwif->pdev->dev, "Failed to set VF %d MAC\n",
+				HW_VF_ID_TO_OS(vf_id));
+			return err;
+		}
+	}
+
+	if (hinic_vf_info_vlanprio(hwdev, vf_id)) {
+		err = hinic_set_vf_vlan(hwdev, true, vf_info->pf_vlan,
+					vf_info->pf_qos, vf_id);
+		if (err) {
+			dev_err(&hwdev->hwif->pdev->dev, "Failed to add VF %d VLAN_QOS\n",
+				HW_VF_ID_TO_OS(vf_id));
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+int hinic_register_vf_msg_handler(void *hwdev, u16 vf_id,
+				  void *buf_in, u16 in_size,
+				  void *buf_out, u16 *out_size)
+{
+	struct hinic_register_vf *register_info = buf_out;
+	struct hinic_hwdev *hw_dev = hwdev;
+	struct hinic_func_to_io *nic_io;
+	int err;
+
+	nic_io = &hw_dev->func_to_io;
+	if (vf_id > nic_io->max_vfs) {
+		dev_err(&hw_dev->hwif->pdev->dev, "Register VF id %d exceed limit[0-%d]\n",
+			HW_VF_ID_TO_OS(vf_id), HW_VF_ID_TO_OS(nic_io->max_vfs));
+		register_info->status = EFAULT;
+		return -EFAULT;
+	}
+
+	*out_size = sizeof(*register_info);
+	err = hinic_init_vf_config(hw_dev, vf_id);
+	if (err) {
+		register_info->status = EFAULT;
+		return err;
+	}
+
+	nic_io->vf_infos[HW_VF_ID_TO_OS(vf_id)].registered = true;
+
+	return 0;
+}
+
+int hinic_unregister_vf_msg_handler(void *hwdev, u16 vf_id,
+				    void *buf_in, u16 in_size,
+				    void *buf_out, u16 *out_size)
+{
+	struct hinic_hwdev *hw_dev = hwdev;
+	struct hinic_func_to_io *nic_io;
+
+	nic_io = &hw_dev->func_to_io;
+	*out_size = 0;
+	if (vf_id > nic_io->max_vfs)
+		return 0;
+
+	nic_io->vf_infos[HW_VF_ID_TO_OS(vf_id)].registered = false;
+
+	return 0;
+}
+
+int hinic_change_vf_mtu_msg_handler(void *hwdev, u16 vf_id,
+				    void *buf_in, u16 in_size,
+				    void *buf_out, u16 *out_size)
+{
+	struct hinic_hwdev *hw_dev = hwdev;
+	int err;
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_CHANGE_MTU, buf_in,
+				 in_size, buf_out, out_size);
+	if (err) {
+		dev_err(&hw_dev->hwif->pdev->dev, "Failed to set VF %u mtu\n",
+			vf_id);
+		return err;
+	}
+
+	return 0;
+}
+
+int hinic_get_vf_mac_msg_handler(void *hwdev, u16 vf_id,
+				 void *buf_in, u16 in_size,
+				 void *buf_out, u16 *out_size)
+{
+	struct hinic_port_mac_cmd *mac_info = buf_out;
+	struct hinic_hwdev *dev = hwdev;
+	struct hinic_func_to_io *nic_io;
+	struct vf_data_storage *vf_info;
+
+	nic_io = &dev->func_to_io;
+	vf_info = nic_io->vf_infos + HW_VF_ID_TO_OS(vf_id);
+
+	memcpy(mac_info->mac, vf_info->vf_mac_addr, ETH_ALEN);
+	mac_info->status = 0;
+	*out_size = sizeof(*mac_info);
+
+	return 0;
+}
+
+int hinic_set_vf_mac_msg_handler(void *hwdev, u16 vf_id,
+				 void *buf_in, u16 in_size,
+				 void *buf_out, u16 *out_size)
+{
+	struct hinic_port_mac_cmd *mac_out = buf_out;
+	struct hinic_port_mac_cmd *mac_in = buf_in;
+	struct hinic_hwdev *hw_dev = hwdev;
+	struct hinic_func_to_io *nic_io;
+	struct vf_data_storage *vf_info;
+	int err;
+
+	nic_io =  &hw_dev->func_to_io;
+	vf_info = nic_io->vf_infos + HW_VF_ID_TO_OS(vf_id);
+	if (vf_info->pf_set_mac && !(vf_info->trust) &&
+	    is_valid_ether_addr(mac_in->mac)) {
+		dev_warn(&hw_dev->hwif->pdev->dev, "PF has already set VF %d MAC address\n",
+			 HW_VF_ID_TO_OS(vf_id));
+		mac_out->status = HINIC_PF_SET_VF_ALREADY;
+		*out_size = sizeof(*mac_out);
+		return 0;
+	}
+
+	err = hinic_port_msg_cmd(hw_dev, HINIC_PORT_CMD_SET_MAC, buf_in,
+				 in_size, buf_out, out_size);
+	if ((err &&  err != HINIC_MBOX_PF_BUSY_ACTIVE_FW) || !(*out_size)) {
+		dev_err(&hw_dev->hwif->pdev->dev,
+			"Failed to set VF %d MAC address, err: %d, status: 0x%x, out size: 0x%x\n",
+			HW_VF_ID_TO_OS(vf_id), err, mac_out->status, *out_size);
+		return -EFAULT;
+	}
+
+	return err;
+}
+
+int hinic_del_vf_mac_msg_handler(void *hwdev, u16 vf_id,
+				 void *buf_in, u16 in_size,
+				 void *buf_out, u16 *out_size)
+{
+	struct hinic_port_mac_cmd *mac_out = buf_out;
+	struct hinic_port_mac_cmd *mac_in = buf_in;
+	struct hinic_hwdev *hw_dev = hwdev;
+	struct hinic_func_to_io *nic_io;
+	struct vf_data_storage *vf_info;
+	int err;
+
+	nic_io = &hw_dev->func_to_io;
+	vf_info = nic_io->vf_infos + HW_VF_ID_TO_OS(vf_id);
+	if (vf_info->pf_set_mac  && is_valid_ether_addr(mac_in->mac) &&
+	    !memcmp(vf_info->vf_mac_addr, mac_in->mac, ETH_ALEN)) {
+		dev_warn(&hw_dev->hwif->pdev->dev, "PF has already set VF mac.\n");
+		mac_out->status = HINIC_PF_SET_VF_ALREADY;
+		*out_size = sizeof(*mac_out);
+		return 0;
+	}
+
+	err = hinic_port_msg_cmd(hw_dev, HINIC_PORT_CMD_DEL_MAC, buf_in,
+				 in_size, buf_out, out_size);
+	if ((err && err != HINIC_MBOX_PF_BUSY_ACTIVE_FW) || !(*out_size)) {
+		dev_err(&hw_dev->hwif->pdev->dev, "Failed to delete VF %d MAC, err: %d, status: 0x%x, out size: 0x%x\n",
+			HW_VF_ID_TO_OS(vf_id), err, mac_out->status, *out_size);
+		return -EFAULT;
+	}
+
+	return err;
+}
+
+int hinic_get_vf_link_status_msg_handler(void *hwdev, u16 vf_id,
+					 void *buf_in, u16 in_size,
+					 void *buf_out, u16 *out_size)
+{
+	struct hinic_port_link_cmd *get_link = buf_out;
+	struct hinic_hwdev *hw_dev = hwdev;
+	struct vf_data_storage *vf_infos;
+	struct hinic_func_to_io *nic_io;
+	bool link_forced, link_up;
+
+	nic_io = &hw_dev->func_to_io;
+	vf_infos = nic_io->vf_infos;
+	link_forced = vf_infos[HW_VF_ID_TO_OS(vf_id)].link_forced;
+	link_up = vf_infos[HW_VF_ID_TO_OS(vf_id)].link_up;
+
+	if (link_forced)
+		get_link->state = link_up ?
+			HINIC_LINK_STATE_UP : HINIC_LINK_STATE_DOWN;
+	else
+		get_link->state = nic_io->link_status;
+
+	get_link->status = 0;
+	*out_size = sizeof(*get_link);
+
+	return 0;
+}
+
+struct vf_cmd_msg_handle nic_vf_cmd_msg_handler[] = {
+	{HINIC_PORT_CMD_VF_REGISTER, hinic_register_vf_msg_handler},
+	{HINIC_PORT_CMD_VF_UNREGISTER, hinic_unregister_vf_msg_handler},
+	{HINIC_PORT_CMD_CHANGE_MTU, hinic_change_vf_mtu_msg_handler},
+	{HINIC_PORT_CMD_GET_MAC, hinic_get_vf_mac_msg_handler},
+	{HINIC_PORT_CMD_SET_MAC, hinic_set_vf_mac_msg_handler},
+	{HINIC_PORT_CMD_DEL_MAC, hinic_del_vf_mac_msg_handler},
+	{HINIC_PORT_CMD_GET_LINK_STATE, hinic_get_vf_link_status_msg_handler},
+};
+
+#define CHECK_IPSU_15BIT	0X8000
+
+struct hinic_sriov_info *hinic_get_sriov_info_by_pcidev(struct pci_dev *pdev)
+{
+	struct net_device *netdev = pci_get_drvdata(pdev);
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+
+	return &nic_dev->sriov_info;
+}
+
+int hinic_kill_vf_vlan(struct hinic_hwdev *hwdev, int vf_id)
+{
+	struct hinic_func_to_io *nic_io = &hwdev->func_to_io;
+	int err;
+
+	err = hinic_set_vf_vlan(hwdev, false,
+				nic_io->vf_infos[HW_VF_ID_TO_OS(vf_id)].pf_vlan,
+				nic_io->vf_infos[HW_VF_ID_TO_OS(vf_id)].pf_qos,
+				vf_id);
+	if (err)
+		return err;
+
+	dev_info(&hwdev->hwif->pdev->dev, "Remove VLAN %d on VF %d\n",
+		 nic_io->vf_infos[HW_VF_ID_TO_OS(vf_id)].pf_vlan,
+		 HW_VF_ID_TO_OS(vf_id));
+
+	nic_io->vf_infos[HW_VF_ID_TO_OS(vf_id)].pf_vlan = 0;
+	nic_io->vf_infos[HW_VF_ID_TO_OS(vf_id)].pf_qos = 0;
+
+	return 0;
+}
+
+/* pf receive message from vf */
+int nic_pf_mbox_handler(void *hwdev, u16 vf_id, u8 cmd, void *buf_in,
+			u16 in_size, void *buf_out, u16 *out_size)
+{
+	struct vf_cmd_msg_handle *vf_msg_handle;
+	struct hinic_hwdev *dev = hwdev;
+	struct hinic_func_to_io *nic_io;
+	struct hinic_pfhwdev *pfhwdev;
+	u32 i, cmd_number;
+	int err = 0;
+
+	if (!hwdev)
+		return -EFAULT;
+
+	cmd_number = sizeof(nic_vf_cmd_msg_handler) /
+			    sizeof(struct vf_cmd_msg_handle);
+	pfhwdev = container_of(dev, struct hinic_pfhwdev, hwdev);
+	nic_io = &dev->func_to_io;
+	for (i = 0; i < cmd_number; i++) {
+		vf_msg_handle = &nic_vf_cmd_msg_handler[i];
+		if (cmd == vf_msg_handle->cmd &&
+		    vf_msg_handle->cmd_msg_handler) {
+			err = vf_msg_handle->cmd_msg_handler(hwdev, vf_id,
+							     buf_in, in_size,
+							     buf_out,
+							     out_size);
+			break;
+		}
+	}
+	if (i == cmd_number)
+		err = hinic_msg_to_mgmt(&pfhwdev->pf_to_mgmt, HINIC_MOD_L2NIC,
+					cmd, buf_in, in_size, buf_out,
+					out_size, HINIC_MGMT_MSG_SYNC);
+
+	if (err &&  err != HINIC_MBOX_PF_BUSY_ACTIVE_FW)
+		dev_err(&nic_io->hwif->pdev->dev, "PF receive VF L2NIC cmd: %d process error, err:%d\n",
+			cmd, err);
+	return err;
+}
+
+static int cfg_mbx_pf_proc_vf_msg(void *hwdev, u16 vf_id, u8 cmd, void *buf_in,
+				  u16 in_size, void *buf_out, u16 *out_size)
+{
+	struct hinic_dev_cap *dev_cap = buf_out;
+	struct hinic_hwdev *dev = hwdev;
+	struct hinic_cap *cap;
+
+	cap = &dev->nic_cap;
+	memset(dev_cap, 0, sizeof(*dev_cap));
+
+	dev_cap->max_vf = cap->max_vf;
+	dev_cap->max_sqs = cap->max_vf_qps;
+	dev_cap->max_rqs = cap->max_vf_qps;
+
+	*out_size = sizeof(*dev_cap);
+
+	return 0;
+}
+
+static int hinic_init_vf_infos(struct hinic_func_to_io *nic_io, u16 vf_id)
+{
+	struct vf_data_storage *vf_infos = nic_io->vf_infos;
+
+	if (set_vf_link_state > HINIC_IFLA_VF_LINK_STATE_DISABLE) {
+		dev_warn(&nic_io->hwif->pdev->dev, "Module Parameter set_vf_link_state value %d is out of range, resetting to %d\n",
+			 set_vf_link_state, HINIC_IFLA_VF_LINK_STATE_AUTO);
+		set_vf_link_state = HINIC_IFLA_VF_LINK_STATE_AUTO;
+	}
+
+	switch (set_vf_link_state) {
+	case HINIC_IFLA_VF_LINK_STATE_AUTO:
+		vf_infos[vf_id].link_forced = false;
+		break;
+	case HINIC_IFLA_VF_LINK_STATE_ENABLE:
+		vf_infos[vf_id].link_forced = true;
+		vf_infos[vf_id].link_up = true;
+		break;
+	case HINIC_IFLA_VF_LINK_STATE_DISABLE:
+		vf_infos[vf_id].link_forced = true;
+		vf_infos[vf_id].link_up = false;
+		break;
+	default:
+		dev_err(&nic_io->hwif->pdev->dev, "Invalid input parameter set_vf_link_state: %d\n",
+			set_vf_link_state);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+void hinic_clear_vf_infos(struct hinic_dev *nic_dev, u16 vf_id)
+{
+	struct vf_data_storage *vf_infos;
+	u16 func_id;
+
+	func_id = hinic_glb_pf_vf_offset(nic_dev->hwdev->hwif) + vf_id;
+	vf_infos = nic_dev->hwdev->func_to_io.vf_infos + HW_VF_ID_TO_OS(vf_id);
+	if (vf_infos->pf_set_mac)
+		hinic_port_del_mac(nic_dev, vf_infos->vf_mac_addr, 0);
+
+	if (hinic_vf_info_vlanprio(nic_dev->hwdev, vf_id))
+		hinic_kill_vf_vlan(nic_dev->hwdev, vf_id);
+
+	memset(vf_infos, 0, sizeof(*vf_infos));
+	/* set vf_infos to default */
+	hinic_init_vf_infos(&nic_dev->hwdev->func_to_io, HW_VF_ID_TO_OS(vf_id));
+}
+
+int hinic_deinit_vf_hw(struct hinic_sriov_info *sriov_info, u16 start_vf_id,
+		       u16 end_vf_id)
+{
+	struct hinic_dev *nic_dev;
+	u16 func_idx, idx;
+
+	nic_dev = container_of(sriov_info, struct hinic_dev, sriov_info);
+
+	for (idx = start_vf_id; idx <= end_vf_id; idx++) {
+		func_idx = hinic_glb_pf_vf_offset(nic_dev->hwdev->hwif) + idx;
+		hinic_set_wq_page_size(nic_dev->hwdev, func_idx,
+				       HINIC_HW_WQ_PAGE_SIZE);
+		hinic_clear_vf_infos(nic_dev, idx);
+	}
+
+	return 0;
+}
+
+int hinic_vf_func_init(struct hinic_hwdev *hwdev)
+{
+	struct hinic_register_vf register_info = {0};
+	u16 out_size = sizeof(register_info);
+	struct hinic_func_to_io *nic_io;
+	int err = 0;
+	u32 size, i;
+
+	nic_io = &hwdev->func_to_io;
+
+	if (HINIC_IS_VF(hwdev->hwif)) {
+		err = hinic_mbox_to_pf(hwdev, HINIC_MOD_L2NIC,
+				       HINIC_PORT_CMD_VF_REGISTER,
+				       &register_info, sizeof(register_info),
+				       &register_info, &out_size, 0);
+		if (err || register_info.status || !out_size) {
+			dev_err(&hwdev->hwif->pdev->dev,
+				"Failed to register VF, err: %d, status: 0x%x, out size: 0x%x\n",
+				err, register_info.status, out_size);
+			hinic_unregister_vf_mbox_cb(hwdev, HINIC_MOD_L2NIC);
+			return -EIO;
+		}
+	} else {
+		err = hinic_register_pf_mbox_cb(hwdev, HINIC_MOD_CFGM,
+						cfg_mbx_pf_proc_vf_msg);
+		if (err) {
+			dev_err(&hwdev->hwif->pdev->dev,
+				"Register PF mailbox callback failed\n");
+			return err;
+		}
+		nic_io->max_vfs = hwdev->nic_cap.max_vf;
+		size = sizeof(*nic_io->vf_infos) * nic_io->max_vfs;
+		if (size != 0) {
+			nic_io->vf_infos = kzalloc(size, GFP_KERNEL);
+			if (!nic_io->vf_infos) {
+				err = -ENOMEM;
+				goto out_free_nic_io;
+			}
+
+			for (i = 0; i < nic_io->max_vfs; i++) {
+				err = hinic_init_vf_infos(nic_io, i);
+				if (err)
+					goto err_init_vf_infos;
+			}
+
+			err = hinic_register_pf_mbox_cb(hwdev, HINIC_MOD_L2NIC,
+							nic_pf_mbox_handler);
+			if (err)
+				goto err_register_pf_mbox_cb;
+		}
+	}
+
+	return 0;
+
+err_register_pf_mbox_cb:
+err_init_vf_infos:
+	kfree(nic_io->vf_infos);
+out_free_nic_io:
+	return err;
+}
+
+void hinic_vf_func_free(struct hinic_hwdev *hwdev)
+{
+	struct hinic_register_vf unregister = {0};
+	u16 out_size = sizeof(unregister);
+	int err;
+
+	if (HINIC_IS_VF(hwdev->hwif)) {
+		err = hinic_mbox_to_pf(hwdev, HINIC_MOD_L2NIC,
+				       HINIC_PORT_CMD_VF_UNREGISTER,
+				       &unregister, sizeof(unregister),
+				       &unregister, &out_size, 0);
+		if (err || !out_size || unregister.status)
+			dev_err(&hwdev->hwif->pdev->dev, "Failed to unregister VF, err: %d, status: 0x%x, out_size: 0x%x\n",
+				err, unregister.status, out_size);
+	} else {
+		if (hwdev->func_to_io.vf_infos) {
+			hinic_unregister_pf_mbox_cb(hwdev, HINIC_MOD_L2NIC);
+			kfree(hwdev->func_to_io.vf_infos);
+		}
+	}
+}
+
+int hinic_init_vf_hw(struct hinic_hwdev *hwdev, u16 start_vf_id, u16 end_vf_id)
+{
+	u16 i, func_idx;
+	int err;
+
+	/* vf use 256K as default wq page size, and can't change it */
+	for (i = start_vf_id; i <= end_vf_id; i++) {
+		func_idx = hinic_glb_pf_vf_offset(hwdev->hwif) + i;
+		err = hinic_set_wq_page_size(hwdev, func_idx,
+					     HINIC_DEFAULT_WQ_PAGE_SIZE);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+int hinic_pci_sriov_disable(struct pci_dev *pdev)
+{
+	struct hinic_sriov_info *sriov_info;
+	u16 tmp_vfs;
+
+	sriov_info = hinic_get_sriov_info_by_pcidev(pdev);
+	/* if SR-IOV is already disabled then nothing will be done */
+	if (!sriov_info->sriov_enabled)
+		return 0;
+
+	set_bit(HINIC_SRIOV_DISABLE, &sriov_info->state);
+
+	/* If our VFs are assigned we cannot shut down SR-IOV
+	 * without causing issues, so just leave the hardware
+	 * available but disabled
+	 */
+	if (pci_vfs_assigned(sriov_info->pdev)) {
+		clear_bit(HINIC_SRIOV_DISABLE, &sriov_info->state);
+		dev_warn(&pdev->dev, "Unloading driver while VFs are assigned - VFs will not be deallocated\n");
+		return -EPERM;
+	}
+	sriov_info->sriov_enabled = false;
+
+	/* disable iov and allow time for transactions to clear */
+	pci_disable_sriov(sriov_info->pdev);
+
+	tmp_vfs = (u16)sriov_info->num_vfs;
+	sriov_info->num_vfs = 0;
+	hinic_deinit_vf_hw(sriov_info, OS_VF_ID_TO_HW(0),
+			   OS_VF_ID_TO_HW(tmp_vfs - 1));
+
+	clear_bit(HINIC_SRIOV_DISABLE, &sriov_info->state);
+
+	return 0;
+}
+
+int hinic_pci_sriov_enable(struct pci_dev *pdev, int num_vfs)
+{
+	struct hinic_sriov_info *sriov_info;
+	int err;
+
+	sriov_info = hinic_get_sriov_info_by_pcidev(pdev);
+
+	if (test_and_set_bit(HINIC_SRIOV_ENABLE, &sriov_info->state)) {
+		dev_err(&pdev->dev,
+			"SR-IOV enable in process, please wait, num_vfs %d\n",
+			num_vfs);
+		return -EPERM;
+	}
+
+	err = hinic_init_vf_hw(sriov_info->hwdev, OS_VF_ID_TO_HW(0),
+			       OS_VF_ID_TO_HW((u16)num_vfs - 1));
+	if (err) {
+		dev_err(&sriov_info->pdev->dev,
+			"Failed to init vf in hardware before enable sriov, error %d\n",
+			err);
+		clear_bit(HINIC_SRIOV_ENABLE, &sriov_info->state);
+		return err;
+	}
+
+	err = pci_enable_sriov(sriov_info->pdev, num_vfs);
+	if (err) {
+		dev_err(&pdev->dev,
+			"Failed to enable SR-IOV, error %d\n", err);
+		clear_bit(HINIC_SRIOV_ENABLE, &sriov_info->state);
+		return err;
+	}
+
+	sriov_info->sriov_enabled = true;
+	sriov_info->num_vfs = num_vfs;
+	clear_bit(HINIC_SRIOV_ENABLE, &sriov_info->state);
+
+	return num_vfs;
+}
+
+int hinic_pci_sriov_configure(struct pci_dev *dev, int num_vfs)
+{
+	struct hinic_sriov_info *sriov_info;
+
+	sriov_info = hinic_get_sriov_info_by_pcidev(dev);
+
+	if (test_bit(HINIC_FUNC_REMOVE, &sriov_info->state))
+		return -EBUSY;
+
+	if (!num_vfs)
+		return hinic_pci_sriov_disable(dev);
+	else
+		return hinic_pci_sriov_enable(dev, num_vfs);
+}
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_sriov.h b/drivers/net/ethernet/huawei/hinic/hinic_sriov.h
new file mode 100644
index 000000000000..4889eabe7b7c
--- /dev/null
+++ b/drivers/net/ethernet/huawei/hinic/hinic_sriov.h
@@ -0,0 +1,79 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Huawei HiNIC PCI Express Linux driver
+ * Copyright(c) 2017 Huawei Technologies Co., Ltd
+ */
+
+#ifndef HINIC_SRIOV_H
+#define HINIC_SRIOV_H
+
+#include "hinic_hw_dev.h"
+
+#define OS_VF_ID_TO_HW(os_vf_id) ((os_vf_id) + 1)
+#define HW_VF_ID_TO_OS(hw_vf_id) ((hw_vf_id) - 1)
+
+enum hinic_sriov_state {
+	HINIC_SRIOV_DISABLE,
+	HINIC_SRIOV_ENABLE,
+	HINIC_FUNC_REMOVE,
+};
+
+enum {
+	HINIC_IFLA_VF_LINK_STATE_AUTO,	/* link state of the uplink */
+	HINIC_IFLA_VF_LINK_STATE_ENABLE,	/* link always up */
+	HINIC_IFLA_VF_LINK_STATE_DISABLE,	/* link always down */
+};
+
+struct hinic_sriov_info {
+	struct pci_dev *pdev;
+	struct hinic_hwdev *hwdev;
+	bool sriov_enabled;
+	unsigned int num_vfs;
+	unsigned long state;
+};
+
+struct vf_data_storage {
+	u8 vf_mac_addr[ETH_ALEN];
+	bool registered;
+	bool pf_set_mac;
+	u16 pf_vlan;
+	u8 pf_qos;
+	u32 max_rate;
+	u32 min_rate;
+
+	bool link_forced;
+	bool link_up;		/* only valid if VF link is forced */
+	bool spoofchk;
+	bool trust;
+};
+
+struct hinic_register_vf {
+	u8	status;
+	u8	version;
+	u8	rsvd0[6];
+};
+
+struct hinic_vf_vlan_config {
+	u8 status;
+	u8 version;
+	u8 rsvd0[6];
+
+	u16 func_id;
+	u16 vlan_id;
+	u8  qos;
+	u8  rsvd1[7];
+};
+
+void hinic_notify_all_vfs_link_changed(struct hinic_hwdev *hwdev,
+				       u8 link_status);
+
+int hinic_pci_sriov_disable(struct pci_dev *dev);
+
+int hinic_pci_sriov_enable(struct pci_dev *dev, int num_vfs);
+
+int hinic_vf_func_init(struct hinic_hwdev *hwdev);
+
+void hinic_vf_func_free(struct hinic_hwdev *hwdev);
+
+int hinic_pci_sriov_configure(struct pci_dev *dev, int num_vfs);
+
+#endif
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_tx.c b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
index 365016450bdb..4c66a0bc1b28 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_tx.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_tx.c
@@ -673,9 +673,11 @@ static int free_tx_poll(struct napi_struct *napi, int budget)
 
 	if (pkts < budget) {
 		napi_complete(napi);
-		hinic_hwdev_set_msix_state(nic_dev->hwdev,
-					   sq->msix_entry,
-					   HINIC_MSIX_ENABLE);
+		if (!HINIC_IS_VF(nic_dev->hwdev->hwif))
+			hinic_hwdev_set_msix_state(nic_dev->hwdev,
+						   sq->msix_entry,
+						   HINIC_MSIX_ENABLE);
+
 		return pkts;
 	}
 
@@ -701,10 +703,11 @@ static irqreturn_t tx_irq(int irq, void *data)
 
 	nic_dev = netdev_priv(txq->netdev);
 
-	/* Disable the interrupt until napi will be completed */
-	hinic_hwdev_set_msix_state(nic_dev->hwdev,
-				   txq->sq->msix_entry,
-				   HINIC_MSIX_DISABLE);
+	if (!HINIC_IS_VF(nic_dev->hwdev->hwif))
+		/* Disable the interrupt until napi will be completed */
+		hinic_hwdev_set_msix_state(nic_dev->hwdev,
+					   txq->sq->msix_entry,
+					   HINIC_MSIX_DISABLE);
 
 	hinic_hwdev_msix_cnt_set(nic_dev->hwdev, txq->sq->msix_entry);
 
-- 
cgit v1.2.3-59-g8ed1b


From 1f62cfa19a619f82c098468660b7950477101d45 Mon Sep 17 00:00:00 2001
From: Luo bin <luobin9@huawei.com>
Date: Sat, 25 Apr 2020 01:21:11 +0000
Subject: hinic: add net_device_ops associated with vf

adds ndo_set_vf_mac/ndo_set_vf_vlan/ndo_get_vf_config and
ndo_set_vf_trust to configure netdev of virtual function

Signed-off-by: Luo bin <luobin9@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/huawei/hinic/hinic_main.c  |  43 +++-
 drivers/net/ethernet/huawei/hinic/hinic_sriov.c | 318 ++++++++++++++++++++++++
 drivers/net/ethernet/huawei/hinic/hinic_sriov.h |  23 ++
 3 files changed, 383 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c
index cd71249f9b1c..b66bb86cff96 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_main.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c
@@ -779,8 +779,26 @@ static void hinic_set_rx_mode(struct net_device *netdev)
 static void hinic_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 {
 	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	u16 sw_pi, hw_ci, sw_ci;
+	struct hinic_sq *sq;
+	u16 num_sqs, q_id;
+
+	num_sqs = hinic_hwdev_num_qps(nic_dev->hwdev);
 
 	netif_err(nic_dev, drv, netdev, "Tx timeout\n");
+
+	for (q_id = 0; q_id < num_sqs; q_id++) {
+		if (!netif_xmit_stopped(netdev_get_tx_queue(netdev, q_id)))
+			continue;
+
+		sq = hinic_hwdev_get_sq(nic_dev->hwdev, q_id);
+		sw_pi = atomic_read(&sq->wq->prod_idx) & sq->wq->mask;
+		hw_ci = be16_to_cpu(*(u16 *)(sq->hw_ci_addr)) & sq->wq->mask;
+		sw_ci = atomic_read(&sq->wq->cons_idx) & sq->wq->mask;
+		netif_err(nic_dev, drv, netdev, "Txq%d: sw_pi: %d, hw_ci: %d, sw_ci: %d, napi->state: 0x%lx\n",
+			  q_id, sw_pi, hw_ci, sw_ci,
+			  nic_dev->txqs[q_id].napi.state);
+	}
 }
 
 static void hinic_get_stats64(struct net_device *netdev,
@@ -846,6 +864,26 @@ static const struct net_device_ops hinic_netdev_ops = {
 	.ndo_get_stats64 = hinic_get_stats64,
 	.ndo_fix_features = hinic_fix_features,
 	.ndo_set_features = hinic_set_features,
+	.ndo_set_vf_mac	= hinic_ndo_set_vf_mac,
+	.ndo_set_vf_vlan = hinic_ndo_set_vf_vlan,
+	.ndo_get_vf_config = hinic_ndo_get_vf_config,
+	.ndo_set_vf_trust = hinic_ndo_set_vf_trust,
+};
+
+static const struct net_device_ops hinicvf_netdev_ops = {
+	.ndo_open = hinic_open,
+	.ndo_stop = hinic_close,
+	.ndo_change_mtu = hinic_change_mtu,
+	.ndo_set_mac_address = hinic_set_mac_addr,
+	.ndo_validate_addr = eth_validate_addr,
+	.ndo_vlan_rx_add_vid = hinic_vlan_rx_add_vid,
+	.ndo_vlan_rx_kill_vid = hinic_vlan_rx_kill_vid,
+	.ndo_set_rx_mode = hinic_set_rx_mode,
+	.ndo_start_xmit = hinic_xmit_frame,
+	.ndo_tx_timeout = hinic_tx_timeout,
+	.ndo_get_stats64 = hinic_get_stats64,
+	.ndo_fix_features = hinic_fix_features,
+	.ndo_set_features = hinic_set_features,
 };
 
 static void netdev_features_init(struct net_device *netdev)
@@ -983,7 +1021,10 @@ static int nic_dev_init(struct pci_dev *pdev)
 
 	hinic_set_ethtool_ops(netdev);
 
-	netdev->netdev_ops = &hinic_netdev_ops;
+	if (!HINIC_IS_VF(hwdev->hwif))
+		netdev->netdev_ops = &hinic_netdev_ops;
+	else
+		netdev->netdev_ops = &hinicvf_netdev_ops;
 
 	netdev->max_mtu = ETH_MAX_MTU;
 
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_sriov.c b/drivers/net/ethernet/huawei/hinic/hinic_sriov.c
index d1c4e1428b38..b24788e9733c 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_sriov.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_sriov.c
@@ -359,6 +359,168 @@ struct hinic_sriov_info *hinic_get_sriov_info_by_pcidev(struct pci_dev *pdev)
 	return &nic_dev->sriov_info;
 }
 
+static int hinic_check_mac_info(u8 status, u16 vlan_id)
+{
+	if ((status && status != HINIC_MGMT_STATUS_EXIST &&
+	     status != HINIC_PF_SET_VF_ALREADY) ||
+	    (vlan_id & CHECK_IPSU_15BIT &&
+	     status == HINIC_MGMT_STATUS_EXIST))
+		return -EINVAL;
+
+	return 0;
+}
+
+#define HINIC_VLAN_ID_MASK	0x7FFF
+
+int hinic_update_mac(struct hinic_hwdev *hwdev, u8 *old_mac, u8 *new_mac,
+		     u16 vlan_id, u16 func_id)
+{
+	struct hinic_port_mac_update mac_info = {0};
+	u16 out_size = sizeof(mac_info);
+	int err;
+
+	if (!hwdev || !old_mac || !new_mac)
+		return -EINVAL;
+
+	if ((vlan_id & HINIC_VLAN_ID_MASK) >= VLAN_N_VID) {
+		dev_err(&hwdev->hwif->pdev->dev, "Invalid VLAN number: %d\n",
+			(vlan_id & HINIC_VLAN_ID_MASK));
+		return -EINVAL;
+	}
+
+	mac_info.func_id = func_id;
+	mac_info.vlan_id = vlan_id;
+	memcpy(mac_info.old_mac, old_mac, ETH_ALEN);
+	memcpy(mac_info.new_mac, new_mac, ETH_ALEN);
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_UPDATE_MAC, &mac_info,
+				 sizeof(mac_info), &mac_info, &out_size);
+
+	if (err || !out_size ||
+	    hinic_check_mac_info(mac_info.status, mac_info.vlan_id)) {
+		dev_err(&hwdev->hwif->pdev->dev,
+			"Failed to update MAC, err: %d, status: 0x%x, out size: 0x%x\n",
+			err, mac_info.status, out_size);
+		return -EINVAL;
+	}
+
+	if (mac_info.status == HINIC_PF_SET_VF_ALREADY) {
+		dev_warn(&hwdev->hwif->pdev->dev,
+			 "PF has already set VF MAC. Ignore update operation\n");
+		return HINIC_PF_SET_VF_ALREADY;
+	}
+
+	if (mac_info.status == HINIC_MGMT_STATUS_EXIST)
+		dev_warn(&hwdev->hwif->pdev->dev, "MAC is repeated. Ignore update operation\n");
+
+	return 0;
+}
+
+void hinic_get_vf_config(struct hinic_hwdev *hwdev, u16 vf_id,
+			 struct ifla_vf_info *ivi)
+{
+	struct vf_data_storage *vfinfo;
+
+	vfinfo = hwdev->func_to_io.vf_infos + HW_VF_ID_TO_OS(vf_id);
+
+	ivi->vf = HW_VF_ID_TO_OS(vf_id);
+	memcpy(ivi->mac, vfinfo->vf_mac_addr, ETH_ALEN);
+	ivi->vlan = vfinfo->pf_vlan;
+	ivi->qos = vfinfo->pf_qos;
+	ivi->spoofchk = vfinfo->spoofchk;
+	ivi->trusted = vfinfo->trust;
+	ivi->max_tx_rate = vfinfo->max_rate;
+	ivi->min_tx_rate = vfinfo->min_rate;
+
+	if (!vfinfo->link_forced)
+		ivi->linkstate = IFLA_VF_LINK_STATE_AUTO;
+	else if (vfinfo->link_up)
+		ivi->linkstate = IFLA_VF_LINK_STATE_ENABLE;
+	else
+		ivi->linkstate = IFLA_VF_LINK_STATE_DISABLE;
+}
+
+int hinic_ndo_get_vf_config(struct net_device *netdev,
+			    int vf, struct ifla_vf_info *ivi)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	struct hinic_sriov_info *sriov_info;
+
+	sriov_info = &nic_dev->sriov_info;
+	if (vf >= sriov_info->num_vfs)
+		return -EINVAL;
+
+	hinic_get_vf_config(sriov_info->hwdev, OS_VF_ID_TO_HW(vf), ivi);
+
+	return 0;
+}
+
+int hinic_set_vf_mac(struct hinic_hwdev *hwdev, int vf, unsigned char *mac_addr)
+{
+	struct hinic_func_to_io *nic_io = &hwdev->func_to_io;
+	struct vf_data_storage *vf_info;
+	u16 func_id;
+	int err;
+
+	vf_info = nic_io->vf_infos + HW_VF_ID_TO_OS(vf);
+
+	/* duplicate request, so just return success */
+	if (vf_info->pf_set_mac &&
+	    !memcmp(vf_info->vf_mac_addr, mac_addr, ETH_ALEN))
+		return 0;
+
+	vf_info->pf_set_mac = true;
+
+	func_id = hinic_glb_pf_vf_offset(hwdev->hwif) + vf;
+	err = hinic_update_mac(hwdev, vf_info->vf_mac_addr,
+			       mac_addr, 0, func_id);
+	if (err) {
+		vf_info->pf_set_mac = false;
+		return err;
+	}
+
+	memcpy(vf_info->vf_mac_addr, mac_addr, ETH_ALEN);
+
+	return 0;
+}
+
+int hinic_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	struct hinic_sriov_info *sriov_info;
+	int err;
+
+	sriov_info = &nic_dev->sriov_info;
+	if (!is_valid_ether_addr(mac) || vf >= sriov_info->num_vfs)
+		return -EINVAL;
+
+	err = hinic_set_vf_mac(sriov_info->hwdev, OS_VF_ID_TO_HW(vf), mac);
+	if (err)
+		return err;
+
+	netif_info(nic_dev, drv, netdev, "Setting MAC %pM on VF %d\n", mac, vf);
+	netif_info(nic_dev, drv, netdev, "Reload the VF driver to make this change effective.");
+
+	return 0;
+}
+
+int hinic_add_vf_vlan(struct hinic_hwdev *hwdev, int vf_id, u16 vlan, u8 qos)
+{
+	struct hinic_func_to_io *nic_io = &hwdev->func_to_io;
+	int err;
+
+	err = hinic_set_vf_vlan(hwdev, true, vlan, qos, vf_id);
+	if (err)
+		return err;
+
+	nic_io->vf_infos[HW_VF_ID_TO_OS(vf_id)].pf_vlan = vlan;
+	nic_io->vf_infos[HW_VF_ID_TO_OS(vf_id)].pf_qos = qos;
+
+	dev_info(&hwdev->hwif->pdev->dev, "Setting VLAN %d, QOS 0x%x on VF %d\n",
+		 vlan, qos, HW_VF_ID_TO_OS(vf_id));
+	return 0;
+}
+
 int hinic_kill_vf_vlan(struct hinic_hwdev *hwdev, int vf_id)
 {
 	struct hinic_func_to_io *nic_io = &hwdev->func_to_io;
@@ -381,6 +543,159 @@ int hinic_kill_vf_vlan(struct hinic_hwdev *hwdev, int vf_id)
 	return 0;
 }
 
+int hinic_update_mac_vlan(struct hinic_dev *nic_dev, u16 old_vlan, u16 new_vlan,
+			  int vf_id)
+{
+	struct vf_data_storage *vf_info;
+	u16 vlan_id;
+	int err;
+
+	if (!nic_dev || old_vlan >= VLAN_N_VID || new_vlan >= VLAN_N_VID)
+		return -EINVAL;
+
+	vf_info = nic_dev->hwdev->func_to_io.vf_infos + HW_VF_ID_TO_OS(vf_id);
+	if (!vf_info->pf_set_mac)
+		return 0;
+
+	vlan_id = old_vlan;
+	if (vlan_id)
+		vlan_id |= HINIC_ADD_VLAN_IN_MAC;
+
+	err = hinic_port_del_mac(nic_dev, vf_info->vf_mac_addr, vlan_id);
+	if (err) {
+		dev_err(&nic_dev->hwdev->hwif->pdev->dev, "Failed to delete VF %d MAC %pM vlan %d\n",
+			HW_VF_ID_TO_OS(vf_id), vf_info->vf_mac_addr, old_vlan);
+		return err;
+	}
+
+	vlan_id = new_vlan;
+	if (vlan_id)
+		vlan_id |= HINIC_ADD_VLAN_IN_MAC;
+
+	err = hinic_port_add_mac(nic_dev, vf_info->vf_mac_addr, vlan_id);
+	if (err) {
+		dev_err(&nic_dev->hwdev->hwif->pdev->dev, "Failed to add VF %d MAC %pM vlan %d\n",
+			HW_VF_ID_TO_OS(vf_id), vf_info->vf_mac_addr, new_vlan);
+		goto out;
+	}
+
+	return 0;
+
+out:
+	vlan_id = old_vlan;
+	if (vlan_id)
+		vlan_id |= HINIC_ADD_VLAN_IN_MAC;
+	hinic_port_add_mac(nic_dev, vf_info->vf_mac_addr, vlan_id);
+
+	return err;
+}
+
+static int set_hw_vf_vlan(struct hinic_dev *nic_dev,
+			  u16 cur_vlanprio, int vf, u16 vlan, u8 qos)
+{
+	u16 old_vlan = cur_vlanprio & VLAN_VID_MASK;
+	int err = 0;
+
+	if (vlan || qos) {
+		if (cur_vlanprio) {
+			err = hinic_kill_vf_vlan(nic_dev->hwdev,
+						 OS_VF_ID_TO_HW(vf));
+			if (err) {
+				dev_err(&nic_dev->sriov_info.pdev->dev, "Failed to delete vf %d old vlan %d\n",
+					vf, old_vlan);
+				goto out;
+			}
+		}
+		err = hinic_add_vf_vlan(nic_dev->hwdev,
+					OS_VF_ID_TO_HW(vf), vlan, qos);
+		if (err) {
+			dev_err(&nic_dev->sriov_info.pdev->dev, "Failed to add vf %d new vlan %d\n",
+				vf, vlan);
+			goto out;
+		}
+	} else {
+		err = hinic_kill_vf_vlan(nic_dev->hwdev, OS_VF_ID_TO_HW(vf));
+		if (err) {
+			dev_err(&nic_dev->sriov_info.pdev->dev, "Failed to delete vf %d vlan %d\n",
+				vf, old_vlan);
+			goto out;
+		}
+	}
+
+	err = hinic_update_mac_vlan(nic_dev, old_vlan, vlan,
+				    OS_VF_ID_TO_HW(vf));
+
+out:
+	return err;
+}
+
+int hinic_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
+			  __be16 vlan_proto)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	struct hinic_sriov_info *sriov_info;
+	u16 vlanprio, cur_vlanprio;
+
+	sriov_info = &nic_dev->sriov_info;
+	if (vf >= sriov_info->num_vfs || vlan > 4095 || qos > 7)
+		return -EINVAL;
+	if (vlan_proto != htons(ETH_P_8021Q))
+		return -EPROTONOSUPPORT;
+	vlanprio = vlan | qos << HINIC_VLAN_PRIORITY_SHIFT;
+	cur_vlanprio = hinic_vf_info_vlanprio(nic_dev->hwdev,
+					      OS_VF_ID_TO_HW(vf));
+	/* duplicate request, so just return success */
+	if (vlanprio == cur_vlanprio)
+		return 0;
+
+	return set_hw_vf_vlan(nic_dev, cur_vlanprio, vf, vlan, qos);
+}
+
+int hinic_set_vf_trust(struct hinic_hwdev *hwdev, u16 vf_id, bool trust)
+{
+	struct vf_data_storage *vf_infos;
+	struct hinic_func_to_io *nic_io;
+
+	if (!hwdev)
+		return -EINVAL;
+
+	nic_io = &hwdev->func_to_io;
+	vf_infos = nic_io->vf_infos;
+	vf_infos[vf_id].trust = trust;
+
+	return 0;
+}
+
+int hinic_ndo_set_vf_trust(struct net_device *netdev, int vf, bool setting)
+{
+	struct hinic_dev *adapter = netdev_priv(netdev);
+	struct hinic_sriov_info *sriov_info;
+	struct hinic_func_to_io *nic_io;
+	bool cur_trust;
+	int err;
+
+	sriov_info = &adapter->sriov_info;
+	nic_io = &adapter->hwdev->func_to_io;
+
+	if (vf >= sriov_info->num_vfs)
+		return -EINVAL;
+
+	cur_trust = nic_io->vf_infos[vf].trust;
+	/* same request, so just return success */
+	if ((setting && cur_trust) || (!setting && !cur_trust))
+		return 0;
+
+	err = hinic_set_vf_trust(adapter->hwdev, vf, setting);
+	if (!err)
+		dev_info(&sriov_info->pdev->dev, "Set VF %d trusted %s succeed\n",
+			 vf, setting ? "on" : "off");
+	else
+		dev_err(&sriov_info->pdev->dev, "Failed set VF %d trusted %s\n",
+			vf, setting ? "on" : "off");
+
+	return err;
+}
+
 /* pf receive message from vf */
 int nic_pf_mbox_handler(void *hwdev, u16 vf_id, u8 cmd, void *buf_in,
 			u16 in_size, void *buf_out, u16 *out_size)
@@ -484,6 +799,9 @@ void hinic_clear_vf_infos(struct hinic_dev *nic_dev, u16 vf_id)
 	if (hinic_vf_info_vlanprio(nic_dev->hwdev, vf_id))
 		hinic_kill_vf_vlan(nic_dev->hwdev, vf_id);
 
+	if (vf_infos->trust)
+		hinic_set_vf_trust(nic_dev->hwdev, vf_id, false);
+
 	memset(vf_infos, 0, sizeof(*vf_infos));
 	/* set vf_infos to default */
 	hinic_init_vf_infos(&nic_dev->hwdev->func_to_io, HW_VF_ID_TO_OS(vf_id));
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_sriov.h b/drivers/net/ethernet/huawei/hinic/hinic_sriov.h
index 4889eabe7b7c..64affc7474b5 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_sriov.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_sriov.h
@@ -52,6 +52,19 @@ struct hinic_register_vf {
 	u8	rsvd0[6];
 };
 
+struct hinic_port_mac_update {
+	u8	status;
+	u8	version;
+	u8	rsvd0[6];
+
+	u16	func_id;
+	u16	vlan_id;
+	u16	rsvd1;
+	u8	old_mac[ETH_ALEN];
+	u16	rsvd2;
+	u8	new_mac[ETH_ALEN];
+};
+
 struct hinic_vf_vlan_config {
 	u8 status;
 	u8 version;
@@ -63,6 +76,16 @@ struct hinic_vf_vlan_config {
 	u8  rsvd1[7];
 };
 
+int hinic_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac);
+
+int hinic_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
+			  __be16 vlan_proto);
+
+int hinic_ndo_get_vf_config(struct net_device *netdev,
+			    int vf, struct ifla_vf_info *ivi);
+
+int hinic_ndo_set_vf_trust(struct net_device *netdev, int vf, bool setting);
+
 void hinic_notify_all_vfs_link_changed(struct hinic_hwdev *hwdev,
 				       u8 link_status);
 
-- 
cgit v1.2.3-59-g8ed1b


From 4b36a0dff794a00989a50581aed2f94c88b57107 Mon Sep 17 00:00:00 2001
From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Date: Sat, 25 Apr 2020 11:39:47 +0800
Subject: net: openvswitch: suitable access to the dp_meters

To fix the following sparse warning:
| net/openvswitch/meter.c:109:38: sparse: sparse: incorrect type
| in assignment (different address spaces) ...
| net/openvswitch/meter.c:720:45: sparse: sparse: incorrect type
| in argument 1 (different address spaces) ...

Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/meter.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index 915f31123f23..612ad5586ce9 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -107,8 +107,8 @@ dp_meter_instance_realloc(struct dp_meter_table *tbl, u32 size)
 		return -ENOMEM;
 
 	for (i = 0; i < n_meters; i++)
-		new_ti->dp_meters[i] =
-			rcu_dereference_ovsl(ti->dp_meters[i]);
+		if (rcu_dereference_ovsl(ti->dp_meters[i]))
+			new_ti->dp_meters[i] = ti->dp_meters[i];
 
 	rcu_assign_pointer(tbl->ti, new_ti);
 	call_rcu(&ti->rcu, dp_meter_instance_free_rcu);
@@ -752,7 +752,7 @@ void ovs_meters_exit(struct datapath *dp)
 	int i;
 
 	for (i = 0; i < ti->n_meters; i++)
-		ovs_meter_free(ti->dp_meters[i]);
+		ovs_meter_free(rcu_dereference_raw(ti->dp_meters[i]));
 
 	dp_meter_instance_free(ti);
 }
-- 
cgit v1.2.3-59-g8ed1b


From 659d4587fe7233bfdff303744b20d6f41ad04362 Mon Sep 17 00:00:00 2001
From: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Date: Sat, 25 Apr 2020 11:39:48 +0800
Subject: net: openvswitch: use div_u64() for 64-by-32 divisions

Compile the kernel for arm 32 platform, the build warning found.
To fix that, should use div_u64() for divisions.
| net/openvswitch/meter.c:396: undefined reference to `__udivdi3'

[add more commit msg, change reported tag, and use div_u64 instead
of do_div by Tonghao]

Fixes: e57358873bb5d6ca ("net: openvswitch: use u64 for meter bucket")
Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Tested-by: Tonghao Zhang <xiangxia.m.yue@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/openvswitch/meter.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/openvswitch/meter.c b/net/openvswitch/meter.c
index 612ad5586ce9..3d3d8e094546 100644
--- a/net/openvswitch/meter.c
+++ b/net/openvswitch/meter.c
@@ -393,7 +393,7 @@ static struct dp_meter *dp_meter_create(struct nlattr **a)
 		 * Start with a full bucket.
 		 */
 		band->bucket = (band->burst_size + band->rate) * 1000ULL;
-		band_max_delta_t = band->bucket / band->rate;
+		band_max_delta_t = div_u64(band->bucket, band->rate);
 		if (band_max_delta_t > meter->max_delta_t)
 			meter->max_delta_t = band_max_delta_t;
 		band++;
-- 
cgit v1.2.3-59-g8ed1b


From 3fd8dc269ff0647819589c21b2ce60af6fc0a455 Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Sun, 26 Apr 2020 10:13:48 +0800
Subject: net: hns3: remove an unnecessary check in hclge_set_umv_space()

Since hclge_set_umv_space() is only called by hclge_init_umv_space(),
parameter 'allocated_size' will not be NULL.

Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index c74990a59e10..e2fec832fdf0 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -7227,8 +7227,7 @@ static int hclge_set_umv_space(struct hclge_dev *hdev, u16 space_size,
 		return ret;
 	}
 
-	if (allocated_size)
-		*allocated_size = le32_to_cpu(desc.data[1]);
+	*allocated_size = le32_to_cpu(desc.data[1]);
 
 	return 0;
 }
-- 
cgit v1.2.3-59-g8ed1b


From b0b3fb6759220d4fa359e9ac486859c9d422c204 Mon Sep 17 00:00:00 2001
From: Mao Wenan <maowenan@huawei.com>
Date: Sat, 18 Apr 2020 09:37:35 +0800
Subject: bpf: Remove set but not used variable 'dst_known'
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes gcc '-Wunused-but-set-variable' warning:

kernel/bpf/verifier.c:5603:18: warning: variable ‘dst_known’
set but not used [-Wunused-but-set-variable], delete this
variable.

Signed-off-by: Mao Wenan <maowenan@huawei.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20200418013735.67882-1-maowenan@huawei.com
---
 kernel/bpf/verifier.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index fa1d8245b925..15ba8bf92ca9 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -5609,7 +5609,7 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
 {
 	struct bpf_reg_state *regs = cur_regs(env);
 	u8 opcode = BPF_OP(insn->code);
-	bool src_known, dst_known;
+	bool src_known;
 	s64 smin_val, smax_val;
 	u64 umin_val, umax_val;
 	s32 s32_min_val, s32_max_val;
@@ -5631,7 +5631,6 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
 
 	if (alu32) {
 		src_known = tnum_subreg_is_const(src_reg.var_off);
-		dst_known = tnum_subreg_is_const(dst_reg->var_off);
 		if ((src_known &&
 		     (s32_min_val != s32_max_val || u32_min_val != u32_max_val)) ||
 		    s32_min_val > s32_max_val || u32_min_val > u32_max_val) {
@@ -5643,7 +5642,6 @@ static int adjust_scalar_min_max_vals(struct bpf_verifier_env *env,
 		}
 	} else {
 		src_known = tnum_is_const(src_reg.var_off);
-		dst_known = tnum_is_const(dst_reg->var_off);
 		if ((src_known &&
 		     (smin_val != smax_val || umin_val != umax_val)) ||
 		    smin_val > smax_val || umin_val > umax_val) {
-- 
cgit v1.2.3-59-g8ed1b


From 93e516894752e8b2ae3c2e7671e3ea33e27e3898 Mon Sep 17 00:00:00 2001
From: Jagadeesh Pagadala <jagdsh.linux@gmail.com>
Date: Sun, 19 Apr 2020 11:09:17 +0530
Subject: tools/bpf/bpftool: Remove duplicate headers

Code cleanup: Remove duplicate headers which are included twice.

Signed-off-by: Jagadeesh Pagadala <jagdsh.linux@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Link: https://lore.kernel.org/bpf/1587274757-14101-1-git-send-email-jagdsh.linux@gmail.com
---
 tools/bpf/bpftool/btf.c        | 1 -
 tools/bpf/bpftool/gen.c        | 1 -
 tools/bpf/bpftool/jit_disasm.c | 1 -
 3 files changed, 3 deletions(-)

diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index bcaf55b59498..41a1346934a1 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -15,7 +15,6 @@
 #include <linux/hashtable.h>
 #include <sys/types.h>
 #include <sys/stat.h>
-#include <unistd.h>
 
 #include "json_writer.h"
 #include "main.h"
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index f8113b3646f5..0e5f0236cc76 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -17,7 +17,6 @@
 #include <sys/types.h>
 #include <sys/stat.h>
 #include <sys/mman.h>
-#include <unistd.h>
 #include <bpf/btf.h>
 
 #include "bpf/libbpf_internal.h"
diff --git a/tools/bpf/bpftool/jit_disasm.c b/tools/bpf/bpftool/jit_disasm.c
index f7f5885aa3ba..e7e7eee9f172 100644
--- a/tools/bpf/bpftool/jit_disasm.c
+++ b/tools/bpf/bpftool/jit_disasm.c
@@ -15,7 +15,6 @@
 #include <stdio.h>
 #include <stdarg.h>
 #include <stdint.h>
-#include <stdio.h>
 #include <stdlib.h>
 #include <assert.h>
 #include <unistd.h>
-- 
cgit v1.2.3-59-g8ed1b


From 0456ea170cd665ddbb9503be92e39f96055dd5fa Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Mon, 20 Apr 2020 10:46:10 -0700
Subject: bpf: Enable more helpers for
 BPF_PROG_TYPE_CGROUP_{DEVICE,SYSCTL,SOCKOPT}

Currently the following prog types don't fall back to bpf_base_func_proto()
(instead they have cgroup_base_func_proto which has a limited set of
helpers from bpf_base_func_proto):
* BPF_PROG_TYPE_CGROUP_DEVICE
* BPF_PROG_TYPE_CGROUP_SYSCTL
* BPF_PROG_TYPE_CGROUP_SOCKOPT

I don't see any specific reason why we shouldn't use bpf_base_func_proto(),
every other type of program (except bpf-lirc and, understandably, tracing)
use it, so let's fall back to bpf_base_func_proto for those prog types
as well.

This basically boils down to adding access to the following helpers:
* BPF_FUNC_get_prandom_u32
* BPF_FUNC_get_smp_processor_id
* BPF_FUNC_get_numa_node_id
* BPF_FUNC_tail_call
* BPF_FUNC_ktime_get_ns
* BPF_FUNC_spin_lock (CAP_SYS_ADMIN)
* BPF_FUNC_spin_unlock (CAP_SYS_ADMIN)
* BPF_FUNC_jiffies64 (CAP_SYS_ADMIN)

I've also added bpf_perf_event_output() because it's really handy for
logging and debugging.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200420174610.77494-1-sdf@google.com
---
 include/linux/bpf.h                                |  1 +
 kernel/bpf/cgroup.c                                | 20 +++---------------
 net/core/filter.c                                  |  2 +-
 .../testing/selftests/bpf/verifier/event_output.c  | 24 ++++++++++++++++++++++
 4 files changed, 29 insertions(+), 18 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index fd2b2322412d..25da6ff2a880 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1523,6 +1523,7 @@ extern const struct bpf_func_proto bpf_strtoul_proto;
 extern const struct bpf_func_proto bpf_tcp_sock_proto;
 extern const struct bpf_func_proto bpf_jiffies64_proto;
 extern const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto;
+extern const struct bpf_func_proto bpf_event_output_data_proto;
 
 const struct bpf_func_proto *bpf_tracing_func_proto(
 	enum bpf_func_id func_id, const struct bpf_prog *prog);
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index cb305e71e7de..4d748c5785bc 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -1060,30 +1060,16 @@ static const struct bpf_func_proto *
 cgroup_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
 	switch (func_id) {
-	case BPF_FUNC_map_lookup_elem:
-		return &bpf_map_lookup_elem_proto;
-	case BPF_FUNC_map_update_elem:
-		return &bpf_map_update_elem_proto;
-	case BPF_FUNC_map_delete_elem:
-		return &bpf_map_delete_elem_proto;
-	case BPF_FUNC_map_push_elem:
-		return &bpf_map_push_elem_proto;
-	case BPF_FUNC_map_pop_elem:
-		return &bpf_map_pop_elem_proto;
-	case BPF_FUNC_map_peek_elem:
-		return &bpf_map_peek_elem_proto;
 	case BPF_FUNC_get_current_uid_gid:
 		return &bpf_get_current_uid_gid_proto;
 	case BPF_FUNC_get_local_storage:
 		return &bpf_get_local_storage_proto;
 	case BPF_FUNC_get_current_cgroup_id:
 		return &bpf_get_current_cgroup_id_proto;
-	case BPF_FUNC_trace_printk:
-		if (capable(CAP_SYS_ADMIN))
-			return bpf_get_trace_printk_proto();
-		/* fall through */
+	case BPF_FUNC_perf_event_output:
+		return &bpf_event_output_data_proto;
 	default:
-		return NULL;
+		return bpf_base_func_proto(func_id);
 	}
 }
 
diff --git a/net/core/filter.c b/net/core/filter.c
index 7d6ceaa54d21..a943df3ad8b0 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4214,7 +4214,7 @@ BPF_CALL_5(bpf_event_output_data, void *, ctx, struct bpf_map *, map, u64, flags
 	return bpf_event_output(map, flags, data, size, NULL, 0, NULL);
 }
 
-static const struct bpf_func_proto bpf_event_output_data_proto =  {
+const struct bpf_func_proto bpf_event_output_data_proto =  {
 	.func		= bpf_event_output_data,
 	.gpl_only       = true,
 	.ret_type       = RET_INTEGER,
diff --git a/tools/testing/selftests/bpf/verifier/event_output.c b/tools/testing/selftests/bpf/verifier/event_output.c
index 130553e19eca..99f8f582c02b 100644
--- a/tools/testing/selftests/bpf/verifier/event_output.c
+++ b/tools/testing/selftests/bpf/verifier/event_output.c
@@ -92,3 +92,27 @@
 	.result = ACCEPT,
 	.retval = 1,
 },
+{
+	"perfevent for cgroup dev",
+	.insns =  { __PERF_EVENT_INSNS__ },
+	.prog_type = BPF_PROG_TYPE_CGROUP_DEVICE,
+	.fixup_map_event_output = { 4 },
+	.result = ACCEPT,
+	.retval = 1,
+},
+{
+	"perfevent for cgroup sysctl",
+	.insns =  { __PERF_EVENT_INSNS__ },
+	.prog_type = BPF_PROG_TYPE_CGROUP_SYSCTL,
+	.fixup_map_event_output = { 4 },
+	.result = ACCEPT,
+	.retval = 1,
+},
+{
+	"perfevent for cgroup sockopt",
+	.insns =  { __PERF_EVENT_INSNS__ },
+	.prog_type = BPF_PROG_TYPE_CGROUP_SOCKOPT,
+	.fixup_map_event_output = { 4 },
+	.result = ACCEPT,
+	.retval = 1,
+},
-- 
cgit v1.2.3-59-g8ed1b


From ae460c022453337850bdc36a36bf7596a6cfcf99 Mon Sep 17 00:00:00 2001
From: Yoshiki Komachi <komachi.yoshiki@gmail.com>
Date: Tue, 21 Apr 2020 09:05:27 +0900
Subject: bpf_helpers.h: Add note for building with vmlinux.h or linux/types.h

The following error was shown when a bpf program was compiled without
vmlinux.h auto-generated from BTF:

 # clang -I./linux/tools/lib/ -I/lib/modules/$(uname -r)/build/include/ \
   -O2 -Wall -target bpf -emit-llvm -c bpf_prog.c -o bpf_prog.bc
 ...
 In file included from linux/tools/lib/bpf/bpf_helpers.h:5:
 linux/tools/lib/bpf/bpf_helper_defs.h:56:82: error: unknown type name '__u64'
 ...

It seems that bpf programs are intended for being built together with
the vmlinux.h (which will have all the __u64 and other typedefs). But
users may mistakenly think "include <linux/types.h>" is missing
because the vmlinux.h is not common for non-bpf developers. IMO, an
explicit comment therefore should be added to bpf_helpers.h as this
patch shows.

Signed-off-by: Yoshiki Komachi <komachi.yoshiki@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/1587427527-29399-1-git-send-email-komachi.yoshiki@gmail.com
---
 tools/lib/bpf/bpf_helpers.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index f69cc208778a..60aad054eea1 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -2,6 +2,12 @@
 #ifndef __BPF_HELPERS__
 #define __BPF_HELPERS__
 
+/*
+ * Note that bpf programs need to include either
+ * vmlinux.h (auto-generated from BTF) or linux/types.h
+ * in advance since bpf_helper_defs.h uses such types
+ * as __u64.
+ */
 #include "bpf_helper_defs.h"
 
 #define __uint(name, val) int (*name)[val]
-- 
cgit v1.2.3-59-g8ed1b


From 745abfaa9eafa597d31fdf24a3249e5206a98768 Mon Sep 17 00:00:00 2001
From: Luke Nelson <lukenels@cs.washington.edu>
Date: Mon, 20 Apr 2020 17:28:04 -0700
Subject: bpf, riscv: Fix tail call count off by one in RV32 BPF JIT

This patch fixes an off by one error in the RV32 JIT handling for BPF
tail call. Currently, the code decrements TCC before checking if it
is less than zero. This limits the maximum number of tail calls to 32
instead of 33 as in other JITs. The fix is to instead check the old
value of TCC before decrementing.

Fixes: 5f316b65e99f ("riscv, bpf: Add RV32G eBPF JIT")
Signed-off-by: Luke Nelson <luke.r.nels@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Xi Wang <xi.wang@gmail.com>
Link: https://lore.kernel.org/bpf/20200421002804.5118-1-luke.r.nels@gmail.com
---
 arch/riscv/net/bpf_jit_comp32.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/riscv/net/bpf_jit_comp32.c b/arch/riscv/net/bpf_jit_comp32.c
index 302934177760..11083d4d5f2d 100644
--- a/arch/riscv/net/bpf_jit_comp32.c
+++ b/arch/riscv/net/bpf_jit_comp32.c
@@ -770,12 +770,13 @@ static int emit_bpf_tail_call(int insn, struct rv_jit_context *ctx)
 	emit_bcc(BPF_JGE, lo(idx_reg), RV_REG_T1, off, ctx);
 
 	/*
-	 * if ((temp_tcc = tcc - 1) < 0)
+	 * temp_tcc = tcc - 1;
+	 * if (tcc < 0)
 	 *   goto out;
 	 */
 	emit(rv_addi(RV_REG_T1, RV_REG_TCC, -1), ctx);
 	off = (tc_ninsn - (ctx->ninsns - start_insn)) << 2;
-	emit_bcc(BPF_JSLT, RV_REG_T1, RV_REG_ZERO, off, ctx);
+	emit_bcc(BPF_JSLT, RV_REG_TCC, RV_REG_ZERO, off, ctx);
 
 	/*
 	 * prog = array->ptrs[index];
-- 
cgit v1.2.3-59-g8ed1b


From 6890896bd765b0504761c61901c9804fca23bfb2 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Fri, 24 Apr 2020 16:59:41 -0700
Subject: bpf: Fix missing bpf_base_func_proto in cgroup_base_func_proto for
 CGROUP_NET=n

linux-next build bot reported compile issue [1] with one of its
configs. It looks like when we have CONFIG_NET=n and
CONFIG_BPF{,_SYSCALL}=y, we are missing the bpf_base_func_proto
definition (from net/core/filter.c) in cgroup_base_func_proto.

I'm reshuffling the code a bit to make it work. The common helpers
are moved into kernel/bpf/helpers.c and the bpf_base_func_proto is
exported from there.
Also, bpf_get_raw_cpu_id goes into kernel/bpf/core.c akin to existing
bpf_user_rnd_u32.

[1] https://lore.kernel.org/linux-next/CAKH8qBsBvKHswiX1nx40LgO+BGeTmb1NX8tiTttt_0uu6T3dCA@mail.gmail.com/T/#mff8b0c083314c68c2e2ef0211cb11bc20dc13c72

Fixes: 0456ea170cd6 ("bpf: Enable more helpers for BPF_PROG_TYPE_CGROUP_{DEVICE,SYSCTL,SOCKOPT}")
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Cc: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200424235941.58382-1-sdf@google.com
---
 include/linux/bpf.h    |  8 ++++++
 include/linux/filter.h |  2 --
 kernel/bpf/core.c      |  5 ++++
 kernel/bpf/helpers.c   | 73 ++++++++++++++++++++++++++++++++++++++++++++++
 net/core/filter.c      | 78 +-------------------------------------------------
 5 files changed, 87 insertions(+), 79 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 25da6ff2a880..5147e11e53ff 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1215,6 +1215,7 @@ int btf_check_type_match(struct bpf_verifier_env *env, struct bpf_prog *prog,
 
 struct bpf_prog *bpf_prog_by_id(u32 id);
 
+const struct bpf_func_proto *bpf_base_func_proto(enum bpf_func_id func_id);
 #else /* !CONFIG_BPF_SYSCALL */
 static inline struct bpf_prog *bpf_prog_get(u32 ufd)
 {
@@ -1365,6 +1366,12 @@ static inline struct bpf_prog *bpf_prog_by_id(u32 id)
 {
 	return ERR_PTR(-ENOTSUPP);
 }
+
+static inline const struct bpf_func_proto *
+bpf_base_func_proto(enum bpf_func_id func_id)
+{
+	return NULL;
+}
 #endif /* CONFIG_BPF_SYSCALL */
 
 static inline struct bpf_prog *bpf_prog_get_type(u32 ufd,
@@ -1531,6 +1538,7 @@ const struct bpf_func_proto *bpf_tracing_func_proto(
 /* Shared helpers among cBPF and eBPF. */
 void bpf_user_rnd_init_once(void);
 u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
+u64 bpf_get_raw_cpu_id(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
 
 #if defined(CONFIG_NET)
 bool bpf_sock_common_is_valid_access(int off, int size,
diff --git a/include/linux/filter.h b/include/linux/filter.h
index 9b5aa5c483cc..af37318bb1c5 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -863,8 +863,6 @@ int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog);
 int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog,
 			      bpf_aux_classic_check_t trans, bool save_orig);
 void bpf_prog_destroy(struct bpf_prog *fp);
-const struct bpf_func_proto *
-bpf_base_func_proto(enum bpf_func_id func_id);
 
 int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk);
 int sk_attach_bpf(u32 ufd, struct sock *sk);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 916f5132a984..0cc91805069a 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2136,6 +2136,11 @@ BPF_CALL_0(bpf_user_rnd_u32)
 	return res;
 }
 
+BPF_CALL_0(bpf_get_raw_cpu_id)
+{
+	return raw_smp_processor_id();
+}
+
 /* Weak definitions of helper functions in case we don't have bpf syscall. */
 const struct bpf_func_proto bpf_map_lookup_elem_proto __weak;
 const struct bpf_func_proto bpf_map_update_elem_proto __weak;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index bafc53ddd350..dbba4f41d508 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -562,3 +562,76 @@ const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto = {
 	.arg3_type      = ARG_PTR_TO_UNINIT_MEM,
 	.arg4_type      = ARG_CONST_SIZE,
 };
+
+static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = {
+	.func		= bpf_get_raw_cpu_id,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+};
+
+BPF_CALL_5(bpf_event_output_data, void *, ctx, struct bpf_map *, map,
+	   u64, flags, void *, data, u64, size)
+{
+	if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
+		return -EINVAL;
+
+	return bpf_event_output(map, flags, data, size, NULL, 0, NULL);
+}
+
+const struct bpf_func_proto bpf_event_output_data_proto =  {
+	.func		= bpf_event_output_data,
+	.gpl_only       = true,
+	.ret_type       = RET_INTEGER,
+	.arg1_type      = ARG_PTR_TO_CTX,
+	.arg2_type      = ARG_CONST_MAP_PTR,
+	.arg3_type      = ARG_ANYTHING,
+	.arg4_type      = ARG_PTR_TO_MEM,
+	.arg5_type      = ARG_CONST_SIZE_OR_ZERO,
+};
+
+const struct bpf_func_proto *
+bpf_base_func_proto(enum bpf_func_id func_id)
+{
+	switch (func_id) {
+	case BPF_FUNC_map_lookup_elem:
+		return &bpf_map_lookup_elem_proto;
+	case BPF_FUNC_map_update_elem:
+		return &bpf_map_update_elem_proto;
+	case BPF_FUNC_map_delete_elem:
+		return &bpf_map_delete_elem_proto;
+	case BPF_FUNC_map_push_elem:
+		return &bpf_map_push_elem_proto;
+	case BPF_FUNC_map_pop_elem:
+		return &bpf_map_pop_elem_proto;
+	case BPF_FUNC_map_peek_elem:
+		return &bpf_map_peek_elem_proto;
+	case BPF_FUNC_get_prandom_u32:
+		return &bpf_get_prandom_u32_proto;
+	case BPF_FUNC_get_smp_processor_id:
+		return &bpf_get_raw_smp_processor_id_proto;
+	case BPF_FUNC_get_numa_node_id:
+		return &bpf_get_numa_node_id_proto;
+	case BPF_FUNC_tail_call:
+		return &bpf_tail_call_proto;
+	case BPF_FUNC_ktime_get_ns:
+		return &bpf_ktime_get_ns_proto;
+	default:
+		break;
+	}
+
+	if (!capable(CAP_SYS_ADMIN))
+		return NULL;
+
+	switch (func_id) {
+	case BPF_FUNC_spin_lock:
+		return &bpf_spin_lock_proto;
+	case BPF_FUNC_spin_unlock:
+		return &bpf_spin_unlock_proto;
+	case BPF_FUNC_trace_printk:
+		return bpf_get_trace_printk_proto();
+	case BPF_FUNC_jiffies64:
+		return &bpf_jiffies64_proto;
+	default:
+		return NULL;
+	}
+}
diff --git a/net/core/filter.c b/net/core/filter.c
index a943df3ad8b0..a605626142b6 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -256,17 +256,6 @@ BPF_CALL_2(bpf_skb_load_helper_32_no_cache, const struct sk_buff *, skb,
 					  offset);
 }
 
-BPF_CALL_0(bpf_get_raw_cpu_id)
-{
-	return raw_smp_processor_id();
-}
-
-static const struct bpf_func_proto bpf_get_raw_smp_processor_id_proto = {
-	.func		= bpf_get_raw_cpu_id,
-	.gpl_only	= false,
-	.ret_type	= RET_INTEGER,
-};
-
 static u32 convert_skb_access(int skb_field, int dst_reg, int src_reg,
 			      struct bpf_insn *insn_buf)
 {
@@ -4205,26 +4194,6 @@ static const struct bpf_func_proto bpf_get_socket_uid_proto = {
 	.arg1_type      = ARG_PTR_TO_CTX,
 };
 
-BPF_CALL_5(bpf_event_output_data, void *, ctx, struct bpf_map *, map, u64, flags,
-	   void *, data, u64, size)
-{
-	if (unlikely(flags & ~(BPF_F_INDEX_MASK)))
-		return -EINVAL;
-
-	return bpf_event_output(map, flags, data, size, NULL, 0, NULL);
-}
-
-const struct bpf_func_proto bpf_event_output_data_proto =  {
-	.func		= bpf_event_output_data,
-	.gpl_only       = true,
-	.ret_type       = RET_INTEGER,
-	.arg1_type      = ARG_PTR_TO_CTX,
-	.arg2_type      = ARG_CONST_MAP_PTR,
-	.arg3_type      = ARG_ANYTHING,
-	.arg4_type      = ARG_PTR_TO_MEM,
-	.arg5_type      = ARG_CONST_SIZE_OR_ZERO,
-};
-
 BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
 	   int, level, int, optname, char *, optval, int, optlen)
 {
@@ -5983,52 +5952,7 @@ bool bpf_helper_changes_pkt_data(void *func)
 	return false;
 }
 
-const struct bpf_func_proto *
-bpf_base_func_proto(enum bpf_func_id func_id)
-{
-	switch (func_id) {
-	case BPF_FUNC_map_lookup_elem:
-		return &bpf_map_lookup_elem_proto;
-	case BPF_FUNC_map_update_elem:
-		return &bpf_map_update_elem_proto;
-	case BPF_FUNC_map_delete_elem:
-		return &bpf_map_delete_elem_proto;
-	case BPF_FUNC_map_push_elem:
-		return &bpf_map_push_elem_proto;
-	case BPF_FUNC_map_pop_elem:
-		return &bpf_map_pop_elem_proto;
-	case BPF_FUNC_map_peek_elem:
-		return &bpf_map_peek_elem_proto;
-	case BPF_FUNC_get_prandom_u32:
-		return &bpf_get_prandom_u32_proto;
-	case BPF_FUNC_get_smp_processor_id:
-		return &bpf_get_raw_smp_processor_id_proto;
-	case BPF_FUNC_get_numa_node_id:
-		return &bpf_get_numa_node_id_proto;
-	case BPF_FUNC_tail_call:
-		return &bpf_tail_call_proto;
-	case BPF_FUNC_ktime_get_ns:
-		return &bpf_ktime_get_ns_proto;
-	default:
-		break;
-	}
-
-	if (!capable(CAP_SYS_ADMIN))
-		return NULL;
-
-	switch (func_id) {
-	case BPF_FUNC_spin_lock:
-		return &bpf_spin_lock_proto;
-	case BPF_FUNC_spin_unlock:
-		return &bpf_spin_unlock_proto;
-	case BPF_FUNC_trace_printk:
-		return bpf_get_trace_printk_proto();
-	case BPF_FUNC_jiffies64:
-		return &bpf_jiffies64_proto;
-	default:
-		return NULL;
-	}
-}
+const struct bpf_func_proto bpf_event_output_data_proto __weak;
 
 static const struct bpf_func_proto *
 sock_filter_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
-- 
cgit v1.2.3-59-g8ed1b


From 6f3f65d80dac8f2bafce2213005821fccdce194c Mon Sep 17 00:00:00 2001
From: Lorenzo Colitti <lorenzo@google.com>
Date: Mon, 20 Apr 2020 11:34:08 -0700
Subject: net: bpf: Allow TC programs to call BPF_FUNC_skb_change_head
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This allows TC eBPF programs to modify and forward (redirect) packets
from interfaces without ethernet headers (for example cellular)
to interfaces with (for example ethernet/wifi).

The lack of this appears to simply be an oversight.

Tested:
  in active use in Android R on 4.14+ devices for ipv6
  cellular to wifi tethering offload.

Signed-off-by: Lorenzo Colitti <lorenzo@google.com>
Signed-off-by: Maciej Żenczykowski <maze@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 net/core/filter.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/core/filter.c b/net/core/filter.c
index a605626142b6..da3b7a72c37c 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -6137,6 +6137,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_skb_adjust_room_proto;
 	case BPF_FUNC_skb_change_tail:
 		return &bpf_skb_change_tail_proto;
+	case BPF_FUNC_skb_change_head:
+		return &bpf_skb_change_head_proto;
 	case BPF_FUNC_skb_get_tunnel_key:
 		return &bpf_skb_get_tunnel_key_proto;
 	case BPF_FUNC_skb_set_tunnel_key:
-- 
cgit v1.2.3-59-g8ed1b


From 082b57e3eb09810d357083cca5ee2df02c16aec9 Mon Sep 17 00:00:00 2001
From: Maciej Żenczykowski <maze@google.com>
Date: Mon, 20 Apr 2020 11:47:50 -0700
Subject: net: bpf: Make bpf_ktime_get_ns() available to non GPL programs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The entire implementation is in kernel/bpf/helpers.c:

BPF_CALL_0(bpf_ktime_get_ns) {
       /* NMI safe access to clock monotonic */
       return ktime_get_mono_fast_ns();
}

const struct bpf_func_proto bpf_ktime_get_ns_proto = {
       .func           = bpf_ktime_get_ns,
       .gpl_only       = false,
       .ret_type       = RET_INTEGER,
};

and this was presumably marked GPL due to kernel/time/timekeeping.c:
  EXPORT_SYMBOL_GPL(ktime_get_mono_fast_ns);

and while that may make sense for kernel modules (although even that
is doubtful), there is currently AFAICT no other source of time
available to ebpf.

Furthermore this is really just equivalent to clock_gettime(CLOCK_MONOTONIC)
which is exposed to userspace (via vdso even to make it performant)...

As such, I see no reason to keep the GPL restriction.
(In the future I'd like to have access to time from Apache licensed ebpf code)

Signed-off-by: Maciej Żenczykowski <maze@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/helpers.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index dbba4f41d508..9a6b23387d02 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -151,7 +151,7 @@ BPF_CALL_0(bpf_ktime_get_ns)
 
 const struct bpf_func_proto bpf_ktime_get_ns_proto = {
 	.func		= bpf_ktime_get_ns,
-	.gpl_only	= true,
+	.gpl_only	= false,
 	.ret_type	= RET_INTEGER,
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From 0a05861f80fe7d4dcfdabcc98d9854947573e072 Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Wed, 22 Apr 2020 01:29:27 +0200
Subject: xsk: Fix typo in xsk_umem_consume_tx and xsk_generic_xmit comments

s/backpreassure/backpressure/

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Magnus Karlsson <magnus.karlsson@intel.com>
Link: https://lore.kernel.org/bpf/20200421232927.21082-1-tklauser@distanz.ch
---
 net/xdp/xsk.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index c350108aa38d..f6e6609f70a3 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -322,7 +322,7 @@ bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc)
 		if (!xskq_cons_peek_desc(xs->tx, desc, umem))
 			continue;
 
-		/* This is the backpreassure mechanism for the Tx path.
+		/* This is the backpressure mechanism for the Tx path.
 		 * Reserve space in the completion queue and only proceed
 		 * if there is space in it. This avoids having to implement
 		 * any buffering in the Tx path.
@@ -406,7 +406,7 @@ static int xsk_generic_xmit(struct sock *sk)
 		addr = desc.addr;
 		buffer = xdp_umem_get_data(xs->umem, addr);
 		err = skb_store_bits(skb, 0, buffer, len);
-		/* This is the backpreassure mechanism for the Tx path.
+		/* This is the backpressure mechanism for the Tx path.
 		 * Reserve space in the completion queue and only proceed
 		 * if there is space in it. This avoids having to implement
 		 * any buffering in the Tx path.
-- 
cgit v1.2.3-59-g8ed1b


From 71d19214776e61b33da48f7c1b46e522c7f78221 Mon Sep 17 00:00:00 2001
From: Maciej Żenczykowski <maze@google.com>
Date: Sun, 26 Apr 2020 09:15:25 -0700
Subject: bpf: add bpf_ktime_get_boot_ns()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On a device like a cellphone which is constantly suspending
and resuming CLOCK_MONOTONIC is not particularly useful for
keeping track of or reacting to external network events.
Instead you want to use CLOCK_BOOTTIME.

Hence add bpf_ktime_get_boot_ns() as a mirror of bpf_ktime_get_ns()
based around CLOCK_BOOTTIME instead of CLOCK_MONOTONIC.

Signed-off-by: Maciej Żenczykowski <maze@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 drivers/media/rc/bpf-lirc.c    |  2 ++
 include/linux/bpf.h            |  1 +
 include/uapi/linux/bpf.h       | 13 ++++++++++++-
 kernel/bpf/core.c              |  1 +
 kernel/bpf/helpers.c           | 14 ++++++++++++++
 kernel/trace/bpf_trace.c       |  2 ++
 tools/include/uapi/linux/bpf.h | 13 ++++++++++++-
 7 files changed, 44 insertions(+), 2 deletions(-)

diff --git a/drivers/media/rc/bpf-lirc.c b/drivers/media/rc/bpf-lirc.c
index 0f3417d161b8..069c42f22a8c 100644
--- a/drivers/media/rc/bpf-lirc.c
+++ b/drivers/media/rc/bpf-lirc.c
@@ -103,6 +103,8 @@ lirc_mode2_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_map_peek_elem_proto;
 	case BPF_FUNC_ktime_get_ns:
 		return &bpf_ktime_get_ns_proto;
+	case BPF_FUNC_ktime_get_boot_ns:
+		return &bpf_ktime_get_boot_ns_proto;
 	case BPF_FUNC_tail_call:
 		return &bpf_tail_call_proto;
 	case BPF_FUNC_get_prandom_u32:
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 5147e11e53ff..10960cfabea4 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1509,6 +1509,7 @@ extern const struct bpf_func_proto bpf_get_smp_processor_id_proto;
 extern const struct bpf_func_proto bpf_get_numa_node_id_proto;
 extern const struct bpf_func_proto bpf_tail_call_proto;
 extern const struct bpf_func_proto bpf_ktime_get_ns_proto;
+extern const struct bpf_func_proto bpf_ktime_get_boot_ns_proto;
 extern const struct bpf_func_proto bpf_get_current_pid_tgid_proto;
 extern const struct bpf_func_proto bpf_get_current_uid_gid_proto;
 extern const struct bpf_func_proto bpf_get_current_comm_proto;
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 7bbf1b65be10..4a6c47f3febe 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -652,6 +652,8 @@ union bpf_attr {
  * u64 bpf_ktime_get_ns(void)
  * 	Description
  * 		Return the time elapsed since system boot, in nanoseconds.
+ * 		Does not include time the system was suspended.
+ * 		See: clock_gettime(CLOCK_MONOTONIC)
  * 	Return
  * 		Current *ktime*.
  *
@@ -3025,6 +3027,14 @@ union bpf_attr {
  *		* **-EOPNOTSUPP**	Unsupported operation, for example a
  *					call from outside of TC ingress.
  *		* **-ESOCKTNOSUPPORT**	Socket type not supported (reuseport).
+ *
+ * u64 bpf_ktime_get_boot_ns(void)
+ * 	Description
+ * 		Return the time elapsed since system boot, in nanoseconds.
+ * 		Does include the time the system was suspended.
+ * 		See: clock_gettime(CLOCK_BOOTTIME)
+ * 	Return
+ * 		Current *ktime*.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3151,7 +3161,8 @@ union bpf_attr {
 	FN(xdp_output),			\
 	FN(get_netns_cookie),		\
 	FN(get_current_ancestor_cgroup_id),	\
-	FN(sk_assign),
+	FN(sk_assign),			\
+	FN(ktime_get_boot_ns),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 0cc91805069a..6aa11de67315 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -2156,6 +2156,7 @@ const struct bpf_func_proto bpf_get_prandom_u32_proto __weak;
 const struct bpf_func_proto bpf_get_smp_processor_id_proto __weak;
 const struct bpf_func_proto bpf_get_numa_node_id_proto __weak;
 const struct bpf_func_proto bpf_ktime_get_ns_proto __weak;
+const struct bpf_func_proto bpf_ktime_get_boot_ns_proto __weak;
 
 const struct bpf_func_proto bpf_get_current_pid_tgid_proto __weak;
 const struct bpf_func_proto bpf_get_current_uid_gid_proto __weak;
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 9a6b23387d02..5c0290e0696e 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -155,6 +155,18 @@ const struct bpf_func_proto bpf_ktime_get_ns_proto = {
 	.ret_type	= RET_INTEGER,
 };
 
+BPF_CALL_0(bpf_ktime_get_boot_ns)
+{
+	/* NMI safe access to clock boottime */
+	return ktime_get_boot_fast_ns();
+}
+
+const struct bpf_func_proto bpf_ktime_get_boot_ns_proto = {
+	.func		= bpf_ktime_get_boot_ns,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+};
+
 BPF_CALL_0(bpf_get_current_pid_tgid)
 {
 	struct task_struct *task = current;
@@ -615,6 +627,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
 		return &bpf_tail_call_proto;
 	case BPF_FUNC_ktime_get_ns:
 		return &bpf_ktime_get_ns_proto;
+	case BPF_FUNC_ktime_get_boot_ns:
+		return &bpf_ktime_get_boot_ns_proto;
 	default:
 		break;
 	}
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index ca1796747a77..e875c95d3ced 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -797,6 +797,8 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_map_peek_elem_proto;
 	case BPF_FUNC_ktime_get_ns:
 		return &bpf_ktime_get_ns_proto;
+	case BPF_FUNC_ktime_get_boot_ns:
+		return &bpf_ktime_get_boot_ns_proto;
 	case BPF_FUNC_tail_call:
 		return &bpf_tail_call_proto;
 	case BPF_FUNC_get_current_pid_tgid:
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 7bbf1b65be10..4a6c47f3febe 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -652,6 +652,8 @@ union bpf_attr {
  * u64 bpf_ktime_get_ns(void)
  * 	Description
  * 		Return the time elapsed since system boot, in nanoseconds.
+ * 		Does not include time the system was suspended.
+ * 		See: clock_gettime(CLOCK_MONOTONIC)
  * 	Return
  * 		Current *ktime*.
  *
@@ -3025,6 +3027,14 @@ union bpf_attr {
  *		* **-EOPNOTSUPP**	Unsupported operation, for example a
  *					call from outside of TC ingress.
  *		* **-ESOCKTNOSUPPORT**	Socket type not supported (reuseport).
+ *
+ * u64 bpf_ktime_get_boot_ns(void)
+ * 	Description
+ * 		Return the time elapsed since system boot, in nanoseconds.
+ * 		Does include the time the system was suspended.
+ * 		See: clock_gettime(CLOCK_BOOTTIME)
+ * 	Return
+ * 		Current *ktime*.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3151,7 +3161,8 @@ union bpf_attr {
 	FN(xdp_output),			\
 	FN(get_netns_cookie),		\
 	FN(get_current_ancestor_cgroup_id),	\
-	FN(sk_assign),
+	FN(sk_assign),			\
+	FN(ktime_get_boot_ns),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
-- 
cgit v1.2.3-59-g8ed1b


From 6f8a57ccf8511724e6f48d732cb2940889789ab2 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Thu, 23 Apr 2020 12:58:50 -0700
Subject: bpf: Make verifier log more relevant by default

To make BPF verifier verbose log more releavant and easier to use to debug
verification failures, "pop" parts of log that were successfully verified.
This has effect of leaving only verifier logs that correspond to code branches
that lead to verification failure, which in practice should result in much
shorter and more relevant verifier log dumps. This behavior is made the
default behavior and can be overriden to do exhaustive logging by specifying
BPF_LOG_LEVEL2 log level.

Using BPF_LOG_LEVEL2 to disable this behavior is not ideal, because in some
cases it's good to have BPF_LOG_LEVEL2 per-instruction register dump
verbosity, but still have only relevant verifier branches logged. But for this
patch, I didn't want to add any new flags. It might be worth-while to just
rethink how BPF verifier logging is performed and requested and streamline it
a bit. But this trimming of successfully verified branches seems to be useful
and a good default behavior.

To test this, I modified runqslower slightly to introduce read of
uninitialized stack variable. Log (**truncated in the middle** to save many
lines out of this commit message) BEFORE this change:

; int handle__sched_switch(u64 *ctx)
0: (bf) r6 = r1
; struct task_struct *prev = (struct task_struct *)ctx[1];
1: (79) r1 = *(u64 *)(r6 +8)
func 'sched_switch' arg1 has btf_id 151 type STRUCT 'task_struct'
2: (b7) r2 = 0
; struct event event = {};
3: (7b) *(u64 *)(r10 -24) = r2
last_idx 3 first_idx 0
regs=4 stack=0 before 2: (b7) r2 = 0
4: (7b) *(u64 *)(r10 -32) = r2
5: (7b) *(u64 *)(r10 -40) = r2
6: (7b) *(u64 *)(r10 -48) = r2
; if (prev->state == TASK_RUNNING)

[ ... instruction dump from insn #7 through #50 are cut out ... ]

51: (b7) r2 = 16
52: (85) call bpf_get_current_comm#16
last_idx 52 first_idx 42
regs=4 stack=0 before 51: (b7) r2 = 16
; bpf_perf_event_output(ctx, &events, BPF_F_CURRENT_CPU,
53: (bf) r1 = r6
54: (18) r2 = 0xffff8881f3868800
56: (18) r3 = 0xffffffff
58: (bf) r4 = r7
59: (b7) r5 = 32
60: (85) call bpf_perf_event_output#25
last_idx 60 first_idx 53
regs=20 stack=0 before 59: (b7) r5 = 32
61: (bf) r2 = r10
; event.pid = pid;
62: (07) r2 += -16
; bpf_map_delete_elem(&start, &pid);
63: (18) r1 = 0xffff8881f3868000
65: (85) call bpf_map_delete_elem#3
; }
66: (b7) r0 = 0
67: (95) exit

from 44 to 66: safe

from 34 to 66: safe

from 11 to 28: R1_w=inv0 R2_w=inv0 R6_w=ctx(id=0,off=0,imm=0) R10=fp0 fp-8=mmmm???? fp-24_w=00000000 fp-32_w=00000000 fp-40_w=00000000 fp-48_w=00000000
; bpf_map_update_elem(&start, &pid, &ts, 0);
28: (bf) r2 = r10
;
29: (07) r2 += -16
; tsp = bpf_map_lookup_elem(&start, &pid);
30: (18) r1 = 0xffff8881f3868000
32: (85) call bpf_map_lookup_elem#1
invalid indirect read from stack off -16+0 size 4
processed 65 insns (limit 1000000) max_states_per_insn 1 total_states 5 peak_states 5 mark_read 4

Notice how there is a successful code path from instruction 0 through 67, few
successfully verified jumps (44->66, 34->66), and only after that 11->28 jump
plus error on instruction #32.

AFTER this change (full verifier log, **no truncation**):

; int handle__sched_switch(u64 *ctx)
0: (bf) r6 = r1
; struct task_struct *prev = (struct task_struct *)ctx[1];
1: (79) r1 = *(u64 *)(r6 +8)
func 'sched_switch' arg1 has btf_id 151 type STRUCT 'task_struct'
2: (b7) r2 = 0
; struct event event = {};
3: (7b) *(u64 *)(r10 -24) = r2
last_idx 3 first_idx 0
regs=4 stack=0 before 2: (b7) r2 = 0
4: (7b) *(u64 *)(r10 -32) = r2
5: (7b) *(u64 *)(r10 -40) = r2
6: (7b) *(u64 *)(r10 -48) = r2
; if (prev->state == TASK_RUNNING)
7: (79) r2 = *(u64 *)(r1 +16)
; if (prev->state == TASK_RUNNING)
8: (55) if r2 != 0x0 goto pc+19
 R1_w=ptr_task_struct(id=0,off=0,imm=0) R2_w=inv0 R6_w=ctx(id=0,off=0,imm=0) R10=fp0 fp-24_w=00000000 fp-32_w=00000000 fp-40_w=00000000 fp-48_w=00000000
; trace_enqueue(prev->tgid, prev->pid);
9: (61) r1 = *(u32 *)(r1 +1184)
10: (63) *(u32 *)(r10 -4) = r1
; if (!pid || (targ_pid && targ_pid != pid))
11: (15) if r1 == 0x0 goto pc+16

from 11 to 28: R1_w=inv0 R2_w=inv0 R6_w=ctx(id=0,off=0,imm=0) R10=fp0 fp-8=mmmm???? fp-24_w=00000000 fp-32_w=00000000 fp-40_w=00000000 fp-48_w=00000000
; bpf_map_update_elem(&start, &pid, &ts, 0);
28: (bf) r2 = r10
;
29: (07) r2 += -16
; tsp = bpf_map_lookup_elem(&start, &pid);
30: (18) r1 = 0xffff8881db3ce800
32: (85) call bpf_map_lookup_elem#1
invalid indirect read from stack off -16+0 size 4
processed 65 insns (limit 1000000) max_states_per_insn 1 total_states 5 peak_states 5 mark_read 4

Notice how in this case, there are 0-11 instructions + jump from 11 to
28 is recorded + 28-32 instructions with error on insn #32.

test_verifier test runner was updated to specify BPF_LOG_LEVEL2 for
VERBOSE_ACCEPT expected result due to potentially "incomplete" success verbose
log at BPF_LOG_LEVEL1.

On success, verbose log will only have a summary of number of processed
instructions, etc, but no branch tracing log. Having just a last succesful
branch tracing seemed weird and confusing. Having small and clean summary log
in success case seems quite logical and nice, though.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200423195850.1259827-1-andriin@fb.com
---
 kernel/bpf/verifier.c                       | 29 +++++++++++++++++++++++++----
 tools/testing/selftests/bpf/test_verifier.c |  7 ++++++-
 2 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 15ba8bf92ca9..91728e0f27eb 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -168,6 +168,8 @@ struct bpf_verifier_stack_elem {
 	int insn_idx;
 	int prev_insn_idx;
 	struct bpf_verifier_stack_elem *next;
+	/* length of verifier log at the time this state was pushed on stack */
+	u32 log_pos;
 };
 
 #define BPF_COMPLEXITY_LIMIT_JMP_SEQ	8192
@@ -283,6 +285,18 @@ void bpf_verifier_vlog(struct bpf_verifier_log *log, const char *fmt,
 		log->ubuf = NULL;
 }
 
+static void bpf_vlog_reset(struct bpf_verifier_log *log, u32 new_pos)
+{
+	char zero = 0;
+
+	if (!bpf_verifier_log_needed(log))
+		return;
+
+	log->len_used = new_pos;
+	if (put_user(zero, log->ubuf + new_pos))
+		log->ubuf = NULL;
+}
+
 /* log_level controls verbosity level of eBPF verifier.
  * bpf_verifier_log_write() is used to dump the verification trace to the log,
  * so the user can figure out what's wrong with the program
@@ -846,7 +860,7 @@ static void update_branch_counts(struct bpf_verifier_env *env, struct bpf_verifi
 }
 
 static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
-		     int *insn_idx)
+		     int *insn_idx, bool pop_log)
 {
 	struct bpf_verifier_state *cur = env->cur_state;
 	struct bpf_verifier_stack_elem *elem, *head = env->head;
@@ -860,6 +874,8 @@ static int pop_stack(struct bpf_verifier_env *env, int *prev_insn_idx,
 		if (err)
 			return err;
 	}
+	if (pop_log)
+		bpf_vlog_reset(&env->log, head->log_pos);
 	if (insn_idx)
 		*insn_idx = head->insn_idx;
 	if (prev_insn_idx)
@@ -887,6 +903,7 @@ static struct bpf_verifier_state *push_stack(struct bpf_verifier_env *env,
 	elem->insn_idx = insn_idx;
 	elem->prev_insn_idx = prev_insn_idx;
 	elem->next = env->head;
+	elem->log_pos = env->log.len_used;
 	env->head = elem;
 	env->stack_size++;
 	err = copy_verifier_state(&elem->st, cur);
@@ -915,7 +932,7 @@ err:
 	free_verifier_state(env->cur_state, true);
 	env->cur_state = NULL;
 	/* pop all elements and return */
-	while (!pop_stack(env, NULL, NULL));
+	while (!pop_stack(env, NULL, NULL, false));
 	return NULL;
 }
 
@@ -8407,6 +8424,7 @@ static bool reg_type_mismatch(enum bpf_reg_type src, enum bpf_reg_type prev)
 
 static int do_check(struct bpf_verifier_env *env)
 {
+	bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
 	struct bpf_verifier_state *state = env->cur_state;
 	struct bpf_insn *insns = env->prog->insnsi;
 	struct bpf_reg_state *regs;
@@ -8683,7 +8701,7 @@ static int do_check(struct bpf_verifier_env *env)
 process_bpf_exit:
 				update_branch_counts(env, env->cur_state);
 				err = pop_stack(env, &prev_insn_idx,
-						&env->insn_idx);
+						&env->insn_idx, pop_log);
 				if (err < 0) {
 					if (err != -ENOENT)
 						return err;
@@ -10206,6 +10224,7 @@ static void sanitize_insn_aux_data(struct bpf_verifier_env *env)
 
 static int do_check_common(struct bpf_verifier_env *env, int subprog)
 {
+	bool pop_log = !(env->log.level & BPF_LOG_LEVEL2);
 	struct bpf_verifier_state *state;
 	struct bpf_reg_state *regs;
 	int ret, i;
@@ -10268,7 +10287,9 @@ out:
 		free_verifier_state(env->cur_state, true);
 		env->cur_state = NULL;
 	}
-	while (!pop_stack(env, NULL, NULL));
+	while (!pop_stack(env, NULL, NULL, false));
+	if (!ret && pop_log)
+		bpf_vlog_reset(&env->log, 0);
 	free_states(env);
 	if (ret)
 		/* clean aux data in case subprog was rejected */
diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 87eaa49609a0..ad6939c67c5e 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -943,7 +943,12 @@ static void do_test_single(struct bpf_test *test, bool unpriv,
 	attr.insns = prog;
 	attr.insns_cnt = prog_len;
 	attr.license = "GPL";
-	attr.log_level = verbose || expected_ret == VERBOSE_ACCEPT ? 1 : 4;
+	if (verbose)
+		attr.log_level = 1;
+	else if (expected_ret == VERBOSE_ACCEPT)
+		attr.log_level = 2;
+	else
+		attr.log_level = 4;
 	attr.prog_flags = pflags;
 
 	fd_prog = bpf_load_program_xattr(&attr, bpf_vlog, sizeof(bpf_vlog));
-- 
cgit v1.2.3-59-g8ed1b


From 234589012ba0e5bf448e3fdbbac0f4c265dbdd7b Mon Sep 17 00:00:00 2001
From: Lorenz Bauer <lmb@cloudflare.com>
Date: Fri, 24 Apr 2020 19:55:55 +0100
Subject: selftests/bpf: Add cls_redirect classifier

cls_redirect is a TC clsact based replacement for the glb-redirect iptables
module available at [1]. It enables what GitHub calls "second chance"
flows [2], similarly proposed by the Beamer paper [3]. In contrast to
glb-redirect, it also supports migrating UDP flows as long as connected
sockets are used. cls_redirect is in production at Cloudflare, as part of
our own L4 load balancer.

We have modified the encapsulation format slightly from glb-redirect:
glbgue_chained_routing.private_data_type has been repurposed to form a
version field and several flags. Both have been arranged in a way that
a private_data_type value of zero matches the current glb-redirect
behaviour. This means that cls_redirect will understand packets in
glb-redirect format, but not vice versa.

The test suite only covers basic features. For example, cls_redirect will
correctly forward path MTU discovery packets, but this is not exercised.
It is also possible to switch the encapsulation format to GRE on the last
hop, which is also not tested.

There are two major distinctions from glb-redirect: first, cls_redirect
relies on receiving encapsulated packets directly from a router. This is
because we don't have access to the neighbour tables from BPF, yet. See
forward_to_next_hop for details. Second, cls_redirect performs decapsulation
instead of using separate ipip and sit tunnel devices. This
avoids issues with the sit tunnel [4] and makes deploying the classifier
easier: decapsulated packets appear on the same interface, so existing
firewall rules continue to work as expected.

The code base started it's life on v4.19, so there are most likely still
hold overs from old workarounds. In no particular order:

- The function buf_off is required to defeat a clang optimization
  that leads to the verifier rejecting the program due to pointer
  arithmetic in the wrong order.

- The function pkt_parse_ipv6 is force inlined, because it would
  otherwise be rejected due to returning a pointer to stack memory.

- The functions fill_tuple and classify_tcp contain kludges, because
  we've run out of function arguments.

- The logic in general is rather nested, due to verifier restrictions.
  I think this is either because the verifier loses track of constants
  on the stack, or because it can't track enum like variables.

1: https://github.com/github/glb-director/tree/master/src/glb-redirect
2: https://github.com/github/glb-director/blob/master/docs/development/second-chance-design.md
3: https://www.usenix.org/conference/nsdi18/presentation/olteanu
4: https://github.com/github/glb-director/issues/64

Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200424185556.7358-2-lmb@cloudflare.com
---
 .../selftests/bpf/prog_tests/cls_redirect.c        |  456 +++++++++
 .../selftests/bpf/progs/test_cls_redirect.c        | 1058 ++++++++++++++++++++
 .../selftests/bpf/progs/test_cls_redirect.h        |   54 +
 tools/testing/selftests/bpf/test_progs.h           |    7 +
 4 files changed, 1575 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/cls_redirect.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_cls_redirect.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_cls_redirect.h

diff --git a/tools/testing/selftests/bpf/prog_tests/cls_redirect.c b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
new file mode 100644
index 000000000000..f259085cca6a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cls_redirect.c
@@ -0,0 +1,456 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+// Copyright (c) 2020 Cloudflare
+
+#define _GNU_SOURCE
+
+#include <arpa/inet.h>
+#include <string.h>
+
+#include <linux/pkt_cls.h>
+
+#include <test_progs.h>
+
+#include "progs/test_cls_redirect.h"
+#include "test_cls_redirect.skel.h"
+
+#define ENCAP_IP INADDR_LOOPBACK
+#define ENCAP_PORT (1234)
+
+struct addr_port {
+	in_port_t port;
+	union {
+		struct in_addr in_addr;
+		struct in6_addr in6_addr;
+	};
+};
+
+struct tuple {
+	int family;
+	struct addr_port src;
+	struct addr_port dst;
+};
+
+static int start_server(const struct sockaddr *addr, socklen_t len, int type)
+{
+	int fd = socket(addr->sa_family, type, 0);
+	if (CHECK_FAIL(fd == -1))
+		return -1;
+	if (CHECK_FAIL(bind(fd, addr, len) == -1))
+		goto err;
+	if (type == SOCK_STREAM && CHECK_FAIL(listen(fd, 128) == -1))
+		goto err;
+
+	return fd;
+
+err:
+	close(fd);
+	return -1;
+}
+
+static int connect_to_server(const struct sockaddr *addr, socklen_t len,
+			     int type)
+{
+	int fd = socket(addr->sa_family, type, 0);
+	if (CHECK_FAIL(fd == -1))
+		return -1;
+	if (CHECK_FAIL(connect(fd, addr, len)))
+		goto err;
+
+	return fd;
+
+err:
+	close(fd);
+	return -1;
+}
+
+static bool fill_addr_port(const struct sockaddr *sa, struct addr_port *ap)
+{
+	const struct sockaddr_in6 *in6;
+	const struct sockaddr_in *in;
+
+	switch (sa->sa_family) {
+	case AF_INET:
+		in = (const struct sockaddr_in *)sa;
+		ap->in_addr = in->sin_addr;
+		ap->port = in->sin_port;
+		return true;
+
+	case AF_INET6:
+		in6 = (const struct sockaddr_in6 *)sa;
+		ap->in6_addr = in6->sin6_addr;
+		ap->port = in6->sin6_port;
+		return true;
+
+	default:
+		return false;
+	}
+}
+
+static bool set_up_conn(const struct sockaddr *addr, socklen_t len, int type,
+			int *server, int *conn, struct tuple *tuple)
+{
+	struct sockaddr_storage ss;
+	socklen_t slen = sizeof(ss);
+	struct sockaddr *sa = (struct sockaddr *)&ss;
+
+	*server = start_server(addr, len, type);
+	if (*server < 0)
+		return false;
+
+	if (CHECK_FAIL(getsockname(*server, sa, &slen)))
+		goto close_server;
+
+	*conn = connect_to_server(sa, slen, type);
+	if (*conn < 0)
+		goto close_server;
+
+	/* We want to simulate packets arriving at conn, so we have to
+	 * swap src and dst.
+	 */
+	slen = sizeof(ss);
+	if (CHECK_FAIL(getsockname(*conn, sa, &slen)))
+		goto close_conn;
+
+	if (CHECK_FAIL(!fill_addr_port(sa, &tuple->dst)))
+		goto close_conn;
+
+	slen = sizeof(ss);
+	if (CHECK_FAIL(getpeername(*conn, sa, &slen)))
+		goto close_conn;
+
+	if (CHECK_FAIL(!fill_addr_port(sa, &tuple->src)))
+		goto close_conn;
+
+	tuple->family = ss.ss_family;
+	return true;
+
+close_conn:
+	close(*conn);
+	*conn = -1;
+close_server:
+	close(*server);
+	*server = -1;
+	return false;
+}
+
+static socklen_t prepare_addr(struct sockaddr_storage *addr, int family)
+{
+	struct sockaddr_in *addr4;
+	struct sockaddr_in6 *addr6;
+
+	switch (family) {
+	case AF_INET:
+		addr4 = (struct sockaddr_in *)addr;
+		memset(addr4, 0, sizeof(*addr4));
+		addr4->sin_family = family;
+		addr4->sin_addr.s_addr = htonl(INADDR_LOOPBACK);
+		return sizeof(*addr4);
+	case AF_INET6:
+		addr6 = (struct sockaddr_in6 *)addr;
+		memset(addr6, 0, sizeof(*addr6));
+		addr6->sin6_family = family;
+		addr6->sin6_addr = in6addr_loopback;
+		return sizeof(*addr6);
+	default:
+		fprintf(stderr, "Invalid family %d", family);
+		return 0;
+	}
+}
+
+static bool was_decapsulated(struct bpf_prog_test_run_attr *tattr)
+{
+	return tattr->data_size_out < tattr->data_size_in;
+}
+
+enum type {
+	UDP,
+	TCP,
+	__NR_KIND,
+};
+
+enum hops {
+	NO_HOPS,
+	ONE_HOP,
+};
+
+enum flags {
+	NONE,
+	SYN,
+	ACK,
+};
+
+enum conn {
+	KNOWN_CONN,
+	UNKNOWN_CONN,
+};
+
+enum result {
+	ACCEPT,
+	FORWARD,
+};
+
+struct test_cfg {
+	enum type type;
+	enum result result;
+	enum conn conn;
+	enum hops hops;
+	enum flags flags;
+};
+
+static int test_str(void *buf, size_t len, const struct test_cfg *test,
+		    int family)
+{
+	const char *family_str, *type, *conn, *hops, *result, *flags;
+
+	family_str = "IPv4";
+	if (family == AF_INET6)
+		family_str = "IPv6";
+
+	type = "TCP";
+	if (test->type == UDP)
+		type = "UDP";
+
+	conn = "known";
+	if (test->conn == UNKNOWN_CONN)
+		conn = "unknown";
+
+	hops = "no hops";
+	if (test->hops == ONE_HOP)
+		hops = "one hop";
+
+	result = "accept";
+	if (test->result == FORWARD)
+		result = "forward";
+
+	flags = "none";
+	if (test->flags == SYN)
+		flags = "SYN";
+	else if (test->flags == ACK)
+		flags = "ACK";
+
+	return snprintf(buf, len, "%s %s %s %s (%s, flags: %s)", family_str,
+			type, result, conn, hops, flags);
+}
+
+static struct test_cfg tests[] = {
+	{ TCP, ACCEPT, UNKNOWN_CONN, NO_HOPS, SYN },
+	{ TCP, ACCEPT, UNKNOWN_CONN, NO_HOPS, ACK },
+	{ TCP, FORWARD, UNKNOWN_CONN, ONE_HOP, ACK },
+	{ TCP, ACCEPT, KNOWN_CONN, ONE_HOP, ACK },
+	{ UDP, ACCEPT, UNKNOWN_CONN, NO_HOPS, NONE },
+	{ UDP, FORWARD, UNKNOWN_CONN, ONE_HOP, NONE },
+	{ UDP, ACCEPT, KNOWN_CONN, ONE_HOP, NONE },
+};
+
+static void encap_init(encap_headers_t *encap, uint8_t hop_count, uint8_t proto)
+{
+	const uint8_t hlen =
+		(sizeof(struct guehdr) / sizeof(uint32_t)) + hop_count;
+	*encap = (encap_headers_t){
+		.eth = { .h_proto = htons(ETH_P_IP) },
+		.ip = {
+			.ihl = 5,
+			.version = 4,
+			.ttl = IPDEFTTL,
+			.protocol = IPPROTO_UDP,
+			.daddr = htonl(ENCAP_IP)
+		},
+		.udp = {
+			.dest = htons(ENCAP_PORT),
+		},
+		.gue = {
+			.hlen = hlen,
+			.proto_ctype = proto
+		},
+		.unigue = {
+			.hop_count = hop_count
+		},
+	};
+}
+
+static size_t build_input(const struct test_cfg *test, void *const buf,
+			  const struct tuple *tuple)
+{
+	in_port_t sport = tuple->src.port;
+	encap_headers_t encap;
+	struct iphdr ip;
+	struct ipv6hdr ipv6;
+	struct tcphdr tcp;
+	struct udphdr udp;
+	struct in_addr next_hop;
+	uint8_t *p = buf;
+	int proto;
+
+	proto = IPPROTO_IPIP;
+	if (tuple->family == AF_INET6)
+		proto = IPPROTO_IPV6;
+
+	encap_init(&encap, test->hops == ONE_HOP ? 1 : 0, proto);
+	p = mempcpy(p, &encap, sizeof(encap));
+
+	if (test->hops == ONE_HOP) {
+		next_hop = (struct in_addr){ .s_addr = htonl(0x7f000002) };
+		p = mempcpy(p, &next_hop, sizeof(next_hop));
+	}
+
+	proto = IPPROTO_TCP;
+	if (test->type == UDP)
+		proto = IPPROTO_UDP;
+
+	switch (tuple->family) {
+	case AF_INET:
+		ip = (struct iphdr){
+			.ihl = 5,
+			.version = 4,
+			.ttl = IPDEFTTL,
+			.protocol = proto,
+			.saddr = tuple->src.in_addr.s_addr,
+			.daddr = tuple->dst.in_addr.s_addr,
+		};
+		p = mempcpy(p, &ip, sizeof(ip));
+		break;
+	case AF_INET6:
+		ipv6 = (struct ipv6hdr){
+			.version = 6,
+			.hop_limit = IPDEFTTL,
+			.nexthdr = proto,
+			.saddr = tuple->src.in6_addr,
+			.daddr = tuple->dst.in6_addr,
+		};
+		p = mempcpy(p, &ipv6, sizeof(ipv6));
+		break;
+	default:
+		return 0;
+	}
+
+	if (test->conn == UNKNOWN_CONN)
+		sport--;
+
+	switch (test->type) {
+	case TCP:
+		tcp = (struct tcphdr){
+			.source = sport,
+			.dest = tuple->dst.port,
+		};
+		if (test->flags == SYN)
+			tcp.syn = true;
+		if (test->flags == ACK)
+			tcp.ack = true;
+		p = mempcpy(p, &tcp, sizeof(tcp));
+		break;
+	case UDP:
+		udp = (struct udphdr){
+			.source = sport,
+			.dest = tuple->dst.port,
+		};
+		p = mempcpy(p, &udp, sizeof(udp));
+		break;
+	default:
+		return 0;
+	}
+
+	return (void *)p - buf;
+}
+
+static void close_fds(int *fds, int n)
+{
+	int i;
+
+	for (i = 0; i < n; i++)
+		if (fds[i] > 0)
+			close(fds[i]);
+}
+
+void test_cls_redirect(void)
+{
+	struct test_cls_redirect *skel = NULL;
+	struct bpf_prog_test_run_attr tattr = {};
+	int families[] = { AF_INET, AF_INET6 };
+	struct sockaddr_storage ss;
+	struct sockaddr *addr;
+	socklen_t slen;
+	int i, j, err;
+
+	int servers[__NR_KIND][ARRAY_SIZE(families)] = {};
+	int conns[__NR_KIND][ARRAY_SIZE(families)] = {};
+	struct tuple tuples[__NR_KIND][ARRAY_SIZE(families)];
+
+	skel = test_cls_redirect__open();
+	if (CHECK_FAIL(!skel))
+		return;
+
+	skel->rodata->ENCAPSULATION_IP = htonl(ENCAP_IP);
+	skel->rodata->ENCAPSULATION_PORT = htons(ENCAP_PORT);
+
+	if (CHECK_FAIL(test_cls_redirect__load(skel)))
+		goto cleanup;
+
+	addr = (struct sockaddr *)&ss;
+	for (i = 0; i < ARRAY_SIZE(families); i++) {
+		slen = prepare_addr(&ss, families[i]);
+		if (CHECK_FAIL(!slen))
+			goto cleanup;
+
+		if (CHECK_FAIL(!set_up_conn(addr, slen, SOCK_DGRAM,
+					    &servers[UDP][i], &conns[UDP][i],
+					    &tuples[UDP][i])))
+			goto cleanup;
+
+		if (CHECK_FAIL(!set_up_conn(addr, slen, SOCK_STREAM,
+					    &servers[TCP][i], &conns[TCP][i],
+					    &tuples[TCP][i])))
+			goto cleanup;
+	}
+
+	tattr.prog_fd = bpf_program__fd(skel->progs.cls_redirect);
+	for (i = 0; i < ARRAY_SIZE(tests); i++) {
+		struct test_cfg *test = &tests[i];
+
+		for (j = 0; j < ARRAY_SIZE(families); j++) {
+			struct tuple *tuple = &tuples[test->type][j];
+			char input[256];
+			char tmp[256];
+
+			test_str(tmp, sizeof(tmp), test, tuple->family);
+			if (!test__start_subtest(tmp))
+				continue;
+
+			tattr.data_out = tmp;
+			tattr.data_size_out = sizeof(tmp);
+
+			tattr.data_in = input;
+			tattr.data_size_in = build_input(test, input, tuple);
+			if (CHECK_FAIL(!tattr.data_size_in))
+				continue;
+
+			err = bpf_prog_test_run_xattr(&tattr);
+			if (CHECK_FAIL(err))
+				continue;
+
+			if (tattr.retval != TC_ACT_REDIRECT) {
+				PRINT_FAIL("expected TC_ACT_REDIRECT, got %d\n",
+					   tattr.retval);
+				continue;
+			}
+
+			switch (test->result) {
+			case ACCEPT:
+				if (CHECK_FAIL(!was_decapsulated(&tattr)))
+					continue;
+				break;
+			case FORWARD:
+				if (CHECK_FAIL(was_decapsulated(&tattr)))
+					continue;
+				break;
+			default:
+				PRINT_FAIL("unknown result %d\n", test->result);
+				continue;
+			}
+		}
+	}
+
+cleanup:
+	test_cls_redirect__destroy(skel);
+	close_fds((int *)servers, sizeof(servers) / sizeof(servers[0][0]));
+	close_fds((int *)conns, sizeof(conns) / sizeof(conns[0][0]));
+}
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.c b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
new file mode 100644
index 000000000000..1668b993eb86
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
@@ -0,0 +1,1058 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+// Copyright (c) 2019, 2020 Cloudflare
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include <linux/bpf.h>
+#include <linux/icmp.h>
+#include <linux/icmpv6.h>
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/pkt_cls.h>
+#include <linux/tcp.h>
+#include <linux/udp.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#include "test_cls_redirect.h"
+
+#define offsetofend(TYPE, MEMBER) \
+	(offsetof(TYPE, MEMBER) + sizeof((((TYPE *)0)->MEMBER)))
+
+#define IP_OFFSET_MASK (0x1FFF)
+#define IP_MF (0x2000)
+
+char _license[] SEC("license") = "Dual BSD/GPL";
+
+/**
+ * Destination port and IP used for UDP encapsulation.
+ */
+static volatile const __be16 ENCAPSULATION_PORT;
+static volatile const __be32 ENCAPSULATION_IP;
+
+typedef struct {
+	uint64_t processed_packets_total;
+	uint64_t l3_protocol_packets_total_ipv4;
+	uint64_t l3_protocol_packets_total_ipv6;
+	uint64_t l4_protocol_packets_total_tcp;
+	uint64_t l4_protocol_packets_total_udp;
+	uint64_t accepted_packets_total_syn;
+	uint64_t accepted_packets_total_syn_cookies;
+	uint64_t accepted_packets_total_last_hop;
+	uint64_t accepted_packets_total_icmp_echo_request;
+	uint64_t accepted_packets_total_established;
+	uint64_t forwarded_packets_total_gue;
+	uint64_t forwarded_packets_total_gre;
+
+	uint64_t errors_total_unknown_l3_proto;
+	uint64_t errors_total_unknown_l4_proto;
+	uint64_t errors_total_malformed_ip;
+	uint64_t errors_total_fragmented_ip;
+	uint64_t errors_total_malformed_icmp;
+	uint64_t errors_total_unwanted_icmp;
+	uint64_t errors_total_malformed_icmp_pkt_too_big;
+	uint64_t errors_total_malformed_tcp;
+	uint64_t errors_total_malformed_udp;
+	uint64_t errors_total_icmp_echo_replies;
+	uint64_t errors_total_malformed_encapsulation;
+	uint64_t errors_total_encap_adjust_failed;
+	uint64_t errors_total_encap_buffer_too_small;
+	uint64_t errors_total_redirect_loop;
+} metrics_t;
+
+typedef enum {
+	INVALID = 0,
+	UNKNOWN,
+	ECHO_REQUEST,
+	SYN,
+	SYN_COOKIE,
+	ESTABLISHED,
+} verdict_t;
+
+typedef struct {
+	uint16_t src, dst;
+} flow_ports_t;
+
+_Static_assert(
+	sizeof(flow_ports_t) !=
+		offsetofend(struct bpf_sock_tuple, ipv4.dport) -
+			offsetof(struct bpf_sock_tuple, ipv4.sport) - 1,
+	"flow_ports_t must match sport and dport in struct bpf_sock_tuple");
+_Static_assert(
+	sizeof(flow_ports_t) !=
+		offsetofend(struct bpf_sock_tuple, ipv6.dport) -
+			offsetof(struct bpf_sock_tuple, ipv6.sport) - 1,
+	"flow_ports_t must match sport and dport in struct bpf_sock_tuple");
+
+typedef int ret_t;
+
+/* This is a bit of a hack. We need a return value which allows us to
+ * indicate that the regular flow of the program should continue,
+ * while allowing functions to use XDP_PASS and XDP_DROP, etc.
+ */
+static const ret_t CONTINUE_PROCESSING = -1;
+
+/* Convenience macro to call functions which return ret_t.
+ */
+#define MAYBE_RETURN(x)                           \
+	do {                                      \
+		ret_t __ret = x;                  \
+		if (__ret != CONTINUE_PROCESSING) \
+			return __ret;             \
+	} while (0)
+
+/* Linux packet pointers are either aligned to NET_IP_ALIGN (aka 2 bytes),
+ * or not aligned if the arch supports efficient unaligned access.
+ *
+ * Since the verifier ensures that eBPF packet accesses follow these rules,
+ * we can tell LLVM to emit code as if we always had a larger alignment.
+ * It will yell at us if we end up on a platform where this is not valid.
+ */
+typedef uint8_t *net_ptr __attribute__((align_value(8)));
+
+typedef struct buf {
+	struct __sk_buff *skb;
+	net_ptr head;
+	/* NB: tail musn't have alignment other than 1, otherwise
+	* LLVM will go and eliminate code, e.g. when checking packet lengths.
+	*/
+	uint8_t *const tail;
+} buf_t;
+
+static size_t buf_off(const buf_t *buf)
+{
+	/* Clang seems to optimize constructs like
+	 *    a - b + c
+	 * if c is known:
+	 *    r? = c
+	 *    r? -= b
+	 *    r? += a
+	 *
+	 * This is a problem if a and b are packet pointers,
+	 * since the verifier allows subtracting two pointers to
+	 * get a scalar, but not a scalar and a pointer.
+	 *
+	 * Use inline asm to break this optimization.
+	 */
+	size_t off = (size_t)buf->head;
+	asm("%0 -= %1" : "+r"(off) : "r"(buf->skb->data));
+	return off;
+}
+
+static bool buf_copy(buf_t *buf, void *dst, size_t len)
+{
+	if (bpf_skb_load_bytes(buf->skb, buf_off(buf), dst, len)) {
+		return false;
+	}
+
+	buf->head += len;
+	return true;
+}
+
+static bool buf_skip(buf_t *buf, const size_t len)
+{
+	/* Check whether off + len is valid in the non-linear part. */
+	if (buf_off(buf) + len > buf->skb->len) {
+		return false;
+	}
+
+	buf->head += len;
+	return true;
+}
+
+/* Returns a pointer to the start of buf, or NULL if len is
+ * larger than the remaining data. Consumes len bytes on a successful
+ * call.
+ *
+ * If scratch is not NULL, the function will attempt to load non-linear
+ * data via bpf_skb_load_bytes. On success, scratch is returned.
+ */
+static void *buf_assign(buf_t *buf, const size_t len, void *scratch)
+{
+	if (buf->head + len > buf->tail) {
+		if (scratch == NULL) {
+			return NULL;
+		}
+
+		return buf_copy(buf, scratch, len) ? scratch : NULL;
+	}
+
+	void *ptr = buf->head;
+	buf->head += len;
+	return ptr;
+}
+
+static bool pkt_skip_ipv4_options(buf_t *buf, const struct iphdr *ipv4)
+{
+	if (ipv4->ihl <= 5) {
+		return true;
+	}
+
+	return buf_skip(buf, (ipv4->ihl - 5) * 4);
+}
+
+static bool ipv4_is_fragment(const struct iphdr *ip)
+{
+	uint16_t frag_off = ip->frag_off & bpf_htons(IP_OFFSET_MASK);
+	return (ip->frag_off & bpf_htons(IP_MF)) != 0 || frag_off > 0;
+}
+
+static struct iphdr *pkt_parse_ipv4(buf_t *pkt, struct iphdr *scratch)
+{
+	struct iphdr *ipv4 = buf_assign(pkt, sizeof(*ipv4), scratch);
+	if (ipv4 == NULL) {
+		return NULL;
+	}
+
+	if (ipv4->ihl < 5) {
+		return NULL;
+	}
+
+	if (!pkt_skip_ipv4_options(pkt, ipv4)) {
+		return NULL;
+	}
+
+	return ipv4;
+}
+
+/* Parse the L4 ports from a packet, assuming a layout like TCP or UDP. */
+static bool pkt_parse_icmp_l4_ports(buf_t *pkt, flow_ports_t *ports)
+{
+	if (!buf_copy(pkt, ports, sizeof(*ports))) {
+		return false;
+	}
+
+	/* Ports in the L4 headers are reversed, since we are parsing an ICMP
+	 * payload which is going towards the eyeball.
+	 */
+	uint16_t dst = ports->src;
+	ports->src = ports->dst;
+	ports->dst = dst;
+	return true;
+}
+
+static uint16_t pkt_checksum_fold(uint32_t csum)
+{
+	/* The highest reasonable value for an IPv4 header
+	 * checksum requires two folds, so we just do that always.
+	 */
+	csum = (csum & 0xffff) + (csum >> 16);
+	csum = (csum & 0xffff) + (csum >> 16);
+	return (uint16_t)~csum;
+}
+
+static void pkt_ipv4_checksum(struct iphdr *iph)
+{
+	iph->check = 0;
+
+	/* An IP header without options is 20 bytes. Two of those
+	 * are the checksum, which we always set to zero. Hence,
+	 * the maximum accumulated value is 18 / 2 * 0xffff = 0x8fff7,
+	 * which fits in 32 bit.
+	 */
+	_Static_assert(sizeof(struct iphdr) == 20, "iphdr must be 20 bytes");
+	uint32_t acc = 0;
+	uint16_t *ipw = (uint16_t *)iph;
+
+#pragma clang loop unroll(full)
+	for (size_t i = 0; i < sizeof(struct iphdr) / 2; i++) {
+		acc += ipw[i];
+	}
+
+	iph->check = pkt_checksum_fold(acc);
+}
+
+static bool pkt_skip_ipv6_extension_headers(buf_t *pkt,
+					    const struct ipv6hdr *ipv6,
+					    uint8_t *upper_proto,
+					    bool *is_fragment)
+{
+	/* We understand five extension headers.
+	 * https://tools.ietf.org/html/rfc8200#section-4.1 states that all
+	 * headers should occur once, except Destination Options, which may
+	 * occur twice. Hence we give up after 6 headers.
+	 */
+	struct {
+		uint8_t next;
+		uint8_t len;
+	} exthdr = {
+		.next = ipv6->nexthdr,
+	};
+	*is_fragment = false;
+
+#pragma clang loop unroll(full)
+	for (int i = 0; i < 6; i++) {
+		switch (exthdr.next) {
+		case IPPROTO_FRAGMENT:
+			*is_fragment = true;
+			/* NB: We don't check that hdrlen == 0 as per spec. */
+			/* fallthrough; */
+
+		case IPPROTO_HOPOPTS:
+		case IPPROTO_ROUTING:
+		case IPPROTO_DSTOPTS:
+		case IPPROTO_MH:
+			if (!buf_copy(pkt, &exthdr, sizeof(exthdr))) {
+				return false;
+			}
+
+			/* hdrlen is in 8-octet units, and excludes the first 8 octets. */
+			if (!buf_skip(pkt,
+				      (exthdr.len + 1) * 8 - sizeof(exthdr))) {
+				return false;
+			}
+
+			/* Decode next header */
+			break;
+
+		default:
+			/* The next header is not one of the known extension
+			 * headers, treat it as the upper layer header.
+			 *
+			 * This handles IPPROTO_NONE.
+			 *
+			 * Encapsulating Security Payload (50) and Authentication
+			 * Header (51) also end up here (and will trigger an
+			 * unknown proto error later). They have a custom header
+			 * format and seem too esoteric to care about.
+			 */
+			*upper_proto = exthdr.next;
+			return true;
+		}
+	}
+
+	/* We never found an upper layer header. */
+	return false;
+}
+
+/* This function has to be inlined, because the verifier otherwise rejects it
+ * due to returning a pointer to the stack. This is technically correct, since
+ * scratch is allocated on the stack. However, this usage should be safe since
+ * it's the callers stack after all.
+ */
+static inline __attribute__((__always_inline__)) struct ipv6hdr *
+pkt_parse_ipv6(buf_t *pkt, struct ipv6hdr *scratch, uint8_t *proto,
+	       bool *is_fragment)
+{
+	struct ipv6hdr *ipv6 = buf_assign(pkt, sizeof(*ipv6), scratch);
+	if (ipv6 == NULL) {
+		return NULL;
+	}
+
+	if (!pkt_skip_ipv6_extension_headers(pkt, ipv6, proto, is_fragment)) {
+		return NULL;
+	}
+
+	return ipv6;
+}
+
+/* Global metrics, per CPU
+ */
+struct bpf_map_def metrics_map SEC("maps") = {
+	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
+	.key_size = sizeof(unsigned int),
+	.value_size = sizeof(metrics_t),
+	.max_entries = 1,
+};
+
+static metrics_t *get_global_metrics(void)
+{
+	uint64_t key = 0;
+	return bpf_map_lookup_elem(&metrics_map, &key);
+}
+
+static ret_t accept_locally(struct __sk_buff *skb, encap_headers_t *encap)
+{
+	const int payload_off =
+		sizeof(*encap) +
+		sizeof(struct in_addr) * encap->unigue.hop_count;
+	int32_t encap_overhead = payload_off - sizeof(struct ethhdr);
+
+	// Changing the ethertype if the encapsulated packet is ipv6
+	if (encap->gue.proto_ctype == IPPROTO_IPV6) {
+		encap->eth.h_proto = bpf_htons(ETH_P_IPV6);
+	}
+
+	if (bpf_skb_adjust_room(skb, -encap_overhead, BPF_ADJ_ROOM_MAC,
+				BPF_F_ADJ_ROOM_FIXED_GSO)) {
+		return TC_ACT_SHOT;
+	}
+
+	return bpf_redirect(skb->ifindex, BPF_F_INGRESS);
+}
+
+static ret_t forward_with_gre(struct __sk_buff *skb, encap_headers_t *encap,
+			      struct in_addr *next_hop, metrics_t *metrics)
+{
+	metrics->forwarded_packets_total_gre++;
+
+	const int payload_off =
+		sizeof(*encap) +
+		sizeof(struct in_addr) * encap->unigue.hop_count;
+	int32_t encap_overhead =
+		payload_off - sizeof(struct ethhdr) - sizeof(struct iphdr);
+	int32_t delta = sizeof(struct gre_base_hdr) - encap_overhead;
+	uint16_t proto = ETH_P_IP;
+
+	/* Loop protection: the inner packet's TTL is decremented as a safeguard
+	 * against any forwarding loop. As the only interesting field is the TTL
+	 * hop limit for IPv6, it is easier to use bpf_skb_load_bytes/bpf_skb_store_bytes
+	 * as they handle the split packets if needed (no need for the data to be
+	 * in the linear section).
+	 */
+	if (encap->gue.proto_ctype == IPPROTO_IPV6) {
+		proto = ETH_P_IPV6;
+		uint8_t ttl;
+		int rc;
+
+		rc = bpf_skb_load_bytes(
+			skb, payload_off + offsetof(struct ipv6hdr, hop_limit),
+			&ttl, 1);
+		if (rc != 0) {
+			metrics->errors_total_malformed_encapsulation++;
+			return TC_ACT_SHOT;
+		}
+
+		if (ttl == 0) {
+			metrics->errors_total_redirect_loop++;
+			return TC_ACT_SHOT;
+		}
+
+		ttl--;
+		rc = bpf_skb_store_bytes(
+			skb, payload_off + offsetof(struct ipv6hdr, hop_limit),
+			&ttl, 1, 0);
+		if (rc != 0) {
+			metrics->errors_total_malformed_encapsulation++;
+			return TC_ACT_SHOT;
+		}
+	} else {
+		uint8_t ttl;
+		int rc;
+
+		rc = bpf_skb_load_bytes(
+			skb, payload_off + offsetof(struct iphdr, ttl), &ttl,
+			1);
+		if (rc != 0) {
+			metrics->errors_total_malformed_encapsulation++;
+			return TC_ACT_SHOT;
+		}
+
+		if (ttl == 0) {
+			metrics->errors_total_redirect_loop++;
+			return TC_ACT_SHOT;
+		}
+
+		/* IPv4 also has a checksum to patch. While the TTL is only one byte,
+		 * this function only works for 2 and 4 bytes arguments (the result is
+		 * the same).
+		 */
+		rc = bpf_l3_csum_replace(
+			skb, payload_off + offsetof(struct iphdr, check), ttl,
+			ttl - 1, 2);
+		if (rc != 0) {
+			metrics->errors_total_malformed_encapsulation++;
+			return TC_ACT_SHOT;
+		}
+
+		ttl--;
+		rc = bpf_skb_store_bytes(
+			skb, payload_off + offsetof(struct iphdr, ttl), &ttl, 1,
+			0);
+		if (rc != 0) {
+			metrics->errors_total_malformed_encapsulation++;
+			return TC_ACT_SHOT;
+		}
+	}
+
+	if (bpf_skb_adjust_room(skb, delta, BPF_ADJ_ROOM_NET,
+				BPF_F_ADJ_ROOM_FIXED_GSO)) {
+		metrics->errors_total_encap_adjust_failed++;
+		return TC_ACT_SHOT;
+	}
+
+	if (bpf_skb_pull_data(skb, sizeof(encap_gre_t))) {
+		metrics->errors_total_encap_buffer_too_small++;
+		return TC_ACT_SHOT;
+	}
+
+	buf_t pkt = {
+		.skb = skb,
+		.head = (uint8_t *)(long)skb->data,
+		.tail = (uint8_t *)(long)skb->data_end,
+	};
+
+	encap_gre_t *encap_gre = buf_assign(&pkt, sizeof(encap_gre_t), NULL);
+	if (encap_gre == NULL) {
+		metrics->errors_total_encap_buffer_too_small++;
+		return TC_ACT_SHOT;
+	}
+
+	encap_gre->ip.protocol = IPPROTO_GRE;
+	encap_gre->ip.daddr = next_hop->s_addr;
+	encap_gre->ip.saddr = ENCAPSULATION_IP;
+	encap_gre->ip.tot_len =
+		bpf_htons(bpf_ntohs(encap_gre->ip.tot_len) + delta);
+	encap_gre->gre.flags = 0;
+	encap_gre->gre.protocol = bpf_htons(proto);
+	pkt_ipv4_checksum((void *)&encap_gre->ip);
+
+	return bpf_redirect(skb->ifindex, 0);
+}
+
+static ret_t forward_to_next_hop(struct __sk_buff *skb, encap_headers_t *encap,
+				 struct in_addr *next_hop, metrics_t *metrics)
+{
+	/* swap L2 addresses */
+	/* This assumes that packets are received from a router.
+	 * So just swapping the MAC addresses here will make the packet go back to
+	 * the router, which will send it to the appropriate machine.
+	 */
+	unsigned char temp[ETH_ALEN];
+	memcpy(temp, encap->eth.h_dest, sizeof(temp));
+	memcpy(encap->eth.h_dest, encap->eth.h_source,
+	       sizeof(encap->eth.h_dest));
+	memcpy(encap->eth.h_source, temp, sizeof(encap->eth.h_source));
+
+	if (encap->unigue.next_hop == encap->unigue.hop_count - 1 &&
+	    encap->unigue.last_hop_gre) {
+		return forward_with_gre(skb, encap, next_hop, metrics);
+	}
+
+	metrics->forwarded_packets_total_gue++;
+	uint32_t old_saddr = encap->ip.saddr;
+	encap->ip.saddr = encap->ip.daddr;
+	encap->ip.daddr = next_hop->s_addr;
+	if (encap->unigue.next_hop < encap->unigue.hop_count) {
+		encap->unigue.next_hop++;
+	}
+
+	/* Remove ip->saddr, add next_hop->s_addr */
+	const uint64_t off = offsetof(typeof(*encap), ip.check);
+	int ret = bpf_l3_csum_replace(skb, off, old_saddr, next_hop->s_addr, 4);
+	if (ret < 0) {
+		return TC_ACT_SHOT;
+	}
+
+	return bpf_redirect(skb->ifindex, 0);
+}
+
+static ret_t skip_next_hops(buf_t *pkt, int n)
+{
+	switch (n) {
+	case 1:
+		if (!buf_skip(pkt, sizeof(struct in_addr)))
+			return TC_ACT_SHOT;
+	case 0:
+		return CONTINUE_PROCESSING;
+
+	default:
+		return TC_ACT_SHOT;
+	}
+}
+
+/* Get the next hop from the GLB header.
+ *
+ * Sets next_hop->s_addr to 0 if there are no more hops left.
+ * pkt is positioned just after the variable length GLB header
+ * iff the call is successful.
+ */
+static ret_t get_next_hop(buf_t *pkt, encap_headers_t *encap,
+			  struct in_addr *next_hop)
+{
+	if (encap->unigue.next_hop > encap->unigue.hop_count) {
+		return TC_ACT_SHOT;
+	}
+
+	/* Skip "used" next hops. */
+	MAYBE_RETURN(skip_next_hops(pkt, encap->unigue.next_hop));
+
+	if (encap->unigue.next_hop == encap->unigue.hop_count) {
+		/* No more next hops, we are at the end of the GLB header. */
+		next_hop->s_addr = 0;
+		return CONTINUE_PROCESSING;
+	}
+
+	if (!buf_copy(pkt, next_hop, sizeof(*next_hop))) {
+		return TC_ACT_SHOT;
+	}
+
+	/* Skip the remainig next hops (may be zero). */
+	return skip_next_hops(pkt, encap->unigue.hop_count -
+					   encap->unigue.next_hop - 1);
+}
+
+/* Fill a bpf_sock_tuple to be used with the socket lookup functions.
+ * This is a kludge that let's us work around verifier limitations:
+ *
+ *    fill_tuple(&t, foo, sizeof(struct iphdr), 123, 321)
+ *
+ * clang will substitue a costant for sizeof, which allows the verifier
+ * to track it's value. Based on this, it can figure out the constant
+ * return value, and calling code works while still being "generic" to
+ * IPv4 and IPv6.
+ */
+static uint64_t fill_tuple(struct bpf_sock_tuple *tuple, void *iph,
+			   uint64_t iphlen, uint16_t sport, uint16_t dport)
+{
+	switch (iphlen) {
+	case sizeof(struct iphdr): {
+		struct iphdr *ipv4 = (struct iphdr *)iph;
+		tuple->ipv4.daddr = ipv4->daddr;
+		tuple->ipv4.saddr = ipv4->saddr;
+		tuple->ipv4.sport = sport;
+		tuple->ipv4.dport = dport;
+		return sizeof(tuple->ipv4);
+	}
+
+	case sizeof(struct ipv6hdr): {
+		struct ipv6hdr *ipv6 = (struct ipv6hdr *)iph;
+		memcpy(&tuple->ipv6.daddr, &ipv6->daddr,
+		       sizeof(tuple->ipv6.daddr));
+		memcpy(&tuple->ipv6.saddr, &ipv6->saddr,
+		       sizeof(tuple->ipv6.saddr));
+		tuple->ipv6.sport = sport;
+		tuple->ipv6.dport = dport;
+		return sizeof(tuple->ipv6);
+	}
+
+	default:
+		return 0;
+	}
+}
+
+static verdict_t classify_tcp(struct __sk_buff *skb,
+			      struct bpf_sock_tuple *tuple, uint64_t tuplen,
+			      void *iph, struct tcphdr *tcp)
+{
+	struct bpf_sock *sk =
+		bpf_skc_lookup_tcp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0);
+	if (sk == NULL) {
+		return UNKNOWN;
+	}
+
+	if (sk->state != BPF_TCP_LISTEN) {
+		bpf_sk_release(sk);
+		return ESTABLISHED;
+	}
+
+	if (iph != NULL && tcp != NULL) {
+		/* Kludge: we've run out of arguments, but need the length of the ip header. */
+		uint64_t iphlen = sizeof(struct iphdr);
+		if (tuplen == sizeof(tuple->ipv6)) {
+			iphlen = sizeof(struct ipv6hdr);
+		}
+
+		if (bpf_tcp_check_syncookie(sk, iph, iphlen, tcp,
+					    sizeof(*tcp)) == 0) {
+			bpf_sk_release(sk);
+			return SYN_COOKIE;
+		}
+	}
+
+	bpf_sk_release(sk);
+	return UNKNOWN;
+}
+
+static verdict_t classify_udp(struct __sk_buff *skb,
+			      struct bpf_sock_tuple *tuple, uint64_t tuplen)
+{
+	struct bpf_sock *sk =
+		bpf_sk_lookup_udp(skb, tuple, tuplen, BPF_F_CURRENT_NETNS, 0);
+	if (sk == NULL) {
+		return UNKNOWN;
+	}
+
+	if (sk->state == BPF_TCP_ESTABLISHED) {
+		bpf_sk_release(sk);
+		return ESTABLISHED;
+	}
+
+	bpf_sk_release(sk);
+	return UNKNOWN;
+}
+
+static verdict_t classify_icmp(struct __sk_buff *skb, uint8_t proto,
+			       struct bpf_sock_tuple *tuple, uint64_t tuplen,
+			       metrics_t *metrics)
+{
+	switch (proto) {
+	case IPPROTO_TCP:
+		return classify_tcp(skb, tuple, tuplen, NULL, NULL);
+
+	case IPPROTO_UDP:
+		return classify_udp(skb, tuple, tuplen);
+
+	default:
+		metrics->errors_total_malformed_icmp++;
+		return INVALID;
+	}
+}
+
+static verdict_t process_icmpv4(buf_t *pkt, metrics_t *metrics)
+{
+	struct icmphdr icmp;
+	if (!buf_copy(pkt, &icmp, sizeof(icmp))) {
+		metrics->errors_total_malformed_icmp++;
+		return INVALID;
+	}
+
+	/* We should never receive encapsulated echo replies. */
+	if (icmp.type == ICMP_ECHOREPLY) {
+		metrics->errors_total_icmp_echo_replies++;
+		return INVALID;
+	}
+
+	if (icmp.type == ICMP_ECHO) {
+		return ECHO_REQUEST;
+	}
+
+	if (icmp.type != ICMP_DEST_UNREACH || icmp.code != ICMP_FRAG_NEEDED) {
+		metrics->errors_total_unwanted_icmp++;
+		return INVALID;
+	}
+
+	struct iphdr _ip4;
+	const struct iphdr *ipv4 = pkt_parse_ipv4(pkt, &_ip4);
+	if (ipv4 == NULL) {
+		metrics->errors_total_malformed_icmp_pkt_too_big++;
+		return INVALID;
+	}
+
+	/* The source address in the outer IP header is from the entity that
+	 * originated the ICMP message. Use the original IP header to restore
+	 * the correct flow tuple.
+	 */
+	struct bpf_sock_tuple tuple;
+	tuple.ipv4.saddr = ipv4->daddr;
+	tuple.ipv4.daddr = ipv4->saddr;
+
+	if (!pkt_parse_icmp_l4_ports(pkt, (flow_ports_t *)&tuple.ipv4.sport)) {
+		metrics->errors_total_malformed_icmp_pkt_too_big++;
+		return INVALID;
+	}
+
+	return classify_icmp(pkt->skb, ipv4->protocol, &tuple,
+			     sizeof(tuple.ipv4), metrics);
+}
+
+static verdict_t process_icmpv6(buf_t *pkt, metrics_t *metrics)
+{
+	struct icmp6hdr icmp6;
+	if (!buf_copy(pkt, &icmp6, sizeof(icmp6))) {
+		metrics->errors_total_malformed_icmp++;
+		return INVALID;
+	}
+
+	/* We should never receive encapsulated echo replies. */
+	if (icmp6.icmp6_type == ICMPV6_ECHO_REPLY) {
+		metrics->errors_total_icmp_echo_replies++;
+		return INVALID;
+	}
+
+	if (icmp6.icmp6_type == ICMPV6_ECHO_REQUEST) {
+		return ECHO_REQUEST;
+	}
+
+	if (icmp6.icmp6_type != ICMPV6_PKT_TOOBIG) {
+		metrics->errors_total_unwanted_icmp++;
+		return INVALID;
+	}
+
+	bool is_fragment;
+	uint8_t l4_proto;
+	struct ipv6hdr _ipv6;
+	const struct ipv6hdr *ipv6 =
+		pkt_parse_ipv6(pkt, &_ipv6, &l4_proto, &is_fragment);
+	if (ipv6 == NULL) {
+		metrics->errors_total_malformed_icmp_pkt_too_big++;
+		return INVALID;
+	}
+
+	if (is_fragment) {
+		metrics->errors_total_fragmented_ip++;
+		return INVALID;
+	}
+
+	/* Swap source and dest addresses. */
+	struct bpf_sock_tuple tuple;
+	memcpy(&tuple.ipv6.saddr, &ipv6->daddr, sizeof(tuple.ipv6.saddr));
+	memcpy(&tuple.ipv6.daddr, &ipv6->saddr, sizeof(tuple.ipv6.daddr));
+
+	if (!pkt_parse_icmp_l4_ports(pkt, (flow_ports_t *)&tuple.ipv6.sport)) {
+		metrics->errors_total_malformed_icmp_pkt_too_big++;
+		return INVALID;
+	}
+
+	return classify_icmp(pkt->skb, l4_proto, &tuple, sizeof(tuple.ipv6),
+			     metrics);
+}
+
+static verdict_t process_tcp(buf_t *pkt, void *iph, uint64_t iphlen,
+			     metrics_t *metrics)
+{
+	metrics->l4_protocol_packets_total_tcp++;
+
+	struct tcphdr _tcp;
+	struct tcphdr *tcp = buf_assign(pkt, sizeof(_tcp), &_tcp);
+	if (tcp == NULL) {
+		metrics->errors_total_malformed_tcp++;
+		return INVALID;
+	}
+
+	if (tcp->syn) {
+		return SYN;
+	}
+
+	struct bpf_sock_tuple tuple;
+	uint64_t tuplen =
+		fill_tuple(&tuple, iph, iphlen, tcp->source, tcp->dest);
+	return classify_tcp(pkt->skb, &tuple, tuplen, iph, tcp);
+}
+
+static verdict_t process_udp(buf_t *pkt, void *iph, uint64_t iphlen,
+			     metrics_t *metrics)
+{
+	metrics->l4_protocol_packets_total_udp++;
+
+	struct udphdr _udp;
+	struct udphdr *udph = buf_assign(pkt, sizeof(_udp), &_udp);
+	if (udph == NULL) {
+		metrics->errors_total_malformed_udp++;
+		return INVALID;
+	}
+
+	struct bpf_sock_tuple tuple;
+	uint64_t tuplen =
+		fill_tuple(&tuple, iph, iphlen, udph->source, udph->dest);
+	return classify_udp(pkt->skb, &tuple, tuplen);
+}
+
+static verdict_t process_ipv4(buf_t *pkt, metrics_t *metrics)
+{
+	metrics->l3_protocol_packets_total_ipv4++;
+
+	struct iphdr _ip4;
+	struct iphdr *ipv4 = pkt_parse_ipv4(pkt, &_ip4);
+	if (ipv4 == NULL) {
+		metrics->errors_total_malformed_ip++;
+		return INVALID;
+	}
+
+	if (ipv4->version != 4) {
+		metrics->errors_total_malformed_ip++;
+		return INVALID;
+	}
+
+	if (ipv4_is_fragment(ipv4)) {
+		metrics->errors_total_fragmented_ip++;
+		return INVALID;
+	}
+
+	switch (ipv4->protocol) {
+	case IPPROTO_ICMP:
+		return process_icmpv4(pkt, metrics);
+
+	case IPPROTO_TCP:
+		return process_tcp(pkt, ipv4, sizeof(*ipv4), metrics);
+
+	case IPPROTO_UDP:
+		return process_udp(pkt, ipv4, sizeof(*ipv4), metrics);
+
+	default:
+		metrics->errors_total_unknown_l4_proto++;
+		return INVALID;
+	}
+}
+
+static verdict_t process_ipv6(buf_t *pkt, metrics_t *metrics)
+{
+	metrics->l3_protocol_packets_total_ipv6++;
+
+	uint8_t l4_proto;
+	bool is_fragment;
+	struct ipv6hdr _ipv6;
+	struct ipv6hdr *ipv6 =
+		pkt_parse_ipv6(pkt, &_ipv6, &l4_proto, &is_fragment);
+	if (ipv6 == NULL) {
+		metrics->errors_total_malformed_ip++;
+		return INVALID;
+	}
+
+	if (ipv6->version != 6) {
+		metrics->errors_total_malformed_ip++;
+		return INVALID;
+	}
+
+	if (is_fragment) {
+		metrics->errors_total_fragmented_ip++;
+		return INVALID;
+	}
+
+	switch (l4_proto) {
+	case IPPROTO_ICMPV6:
+		return process_icmpv6(pkt, metrics);
+
+	case IPPROTO_TCP:
+		return process_tcp(pkt, ipv6, sizeof(*ipv6), metrics);
+
+	case IPPROTO_UDP:
+		return process_udp(pkt, ipv6, sizeof(*ipv6), metrics);
+
+	default:
+		metrics->errors_total_unknown_l4_proto++;
+		return INVALID;
+	}
+}
+
+SEC("classifier/cls_redirect")
+int cls_redirect(struct __sk_buff *skb)
+{
+	metrics_t *metrics = get_global_metrics();
+	if (metrics == NULL) {
+		return TC_ACT_SHOT;
+	}
+
+	metrics->processed_packets_total++;
+
+	/* Pass bogus packets as long as we're not sure they're
+	 * destined for us.
+	 */
+	if (skb->protocol != bpf_htons(ETH_P_IP)) {
+		return TC_ACT_OK;
+	}
+
+	encap_headers_t *encap;
+
+	/* Make sure that all encapsulation headers are available in
+	 * the linear portion of the skb. This makes it easy to manipulate them.
+	 */
+	if (bpf_skb_pull_data(skb, sizeof(*encap))) {
+		return TC_ACT_OK;
+	}
+
+	buf_t pkt = {
+		.skb = skb,
+		.head = (uint8_t *)(long)skb->data,
+		.tail = (uint8_t *)(long)skb->data_end,
+	};
+
+	encap = buf_assign(&pkt, sizeof(*encap), NULL);
+	if (encap == NULL) {
+		return TC_ACT_OK;
+	}
+
+	if (encap->ip.ihl != 5) {
+		/* We never have any options. */
+		return TC_ACT_OK;
+	}
+
+	if (encap->ip.daddr != ENCAPSULATION_IP ||
+	    encap->ip.protocol != IPPROTO_UDP) {
+		return TC_ACT_OK;
+	}
+
+	/* TODO Check UDP length? */
+	if (encap->udp.dest != ENCAPSULATION_PORT) {
+		return TC_ACT_OK;
+	}
+
+	/* We now know that the packet is destined to us, we can
+	 * drop bogus ones.
+	 */
+	if (ipv4_is_fragment((void *)&encap->ip)) {
+		metrics->errors_total_fragmented_ip++;
+		return TC_ACT_SHOT;
+	}
+
+	if (encap->gue.variant != 0) {
+		metrics->errors_total_malformed_encapsulation++;
+		return TC_ACT_SHOT;
+	}
+
+	if (encap->gue.control != 0) {
+		metrics->errors_total_malformed_encapsulation++;
+		return TC_ACT_SHOT;
+	}
+
+	if (encap->gue.flags != 0) {
+		metrics->errors_total_malformed_encapsulation++;
+		return TC_ACT_SHOT;
+	}
+
+	if (encap->gue.hlen !=
+	    sizeof(encap->unigue) / 4 + encap->unigue.hop_count) {
+		metrics->errors_total_malformed_encapsulation++;
+		return TC_ACT_SHOT;
+	}
+
+	if (encap->unigue.version != 0) {
+		metrics->errors_total_malformed_encapsulation++;
+		return TC_ACT_SHOT;
+	}
+
+	if (encap->unigue.reserved != 0) {
+		return TC_ACT_SHOT;
+	}
+
+	struct in_addr next_hop;
+	MAYBE_RETURN(get_next_hop(&pkt, encap, &next_hop));
+
+	if (next_hop.s_addr == 0) {
+		metrics->accepted_packets_total_last_hop++;
+		return accept_locally(skb, encap);
+	}
+
+	verdict_t verdict;
+	switch (encap->gue.proto_ctype) {
+	case IPPROTO_IPIP:
+		verdict = process_ipv4(&pkt, metrics);
+		break;
+
+	case IPPROTO_IPV6:
+		verdict = process_ipv6(&pkt, metrics);
+		break;
+
+	default:
+		metrics->errors_total_unknown_l3_proto++;
+		return TC_ACT_SHOT;
+	}
+
+	switch (verdict) {
+	case INVALID:
+		/* metrics have already been bumped */
+		return TC_ACT_SHOT;
+
+	case UNKNOWN:
+		return forward_to_next_hop(skb, encap, &next_hop, metrics);
+
+	case ECHO_REQUEST:
+		metrics->accepted_packets_total_icmp_echo_request++;
+		break;
+
+	case SYN:
+		if (encap->unigue.forward_syn) {
+			return forward_to_next_hop(skb, encap, &next_hop,
+						   metrics);
+		}
+
+		metrics->accepted_packets_total_syn++;
+		break;
+
+	case SYN_COOKIE:
+		metrics->accepted_packets_total_syn_cookies++;
+		break;
+
+	case ESTABLISHED:
+		metrics->accepted_packets_total_established++;
+		break;
+	}
+
+	return accept_locally(skb, encap);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.h b/tools/testing/selftests/bpf/progs/test_cls_redirect.h
new file mode 100644
index 000000000000..76eab0aacba0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+/* Copyright 2019, 2020 Cloudflare */
+
+#include <stdbool.h>
+#include <stddef.h>
+#include <stdint.h>
+#include <string.h>
+
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/udp.h>
+
+struct gre_base_hdr {
+	uint16_t flags;
+	uint16_t protocol;
+} __attribute__((packed));
+
+struct guehdr {
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+	uint8_t hlen : 5, control : 1, variant : 2;
+#else
+	uint8_t variant : 2, control : 1, hlen : 5;
+#endif
+	uint8_t proto_ctype;
+	uint16_t flags;
+};
+
+struct unigue {
+#if __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
+	uint8_t _r : 2, last_hop_gre : 1, forward_syn : 1, version : 4;
+#else
+	uint8_t version : 4, forward_syn : 1, last_hop_gre : 1, _r : 2;
+#endif
+	uint8_t reserved;
+	uint8_t next_hop;
+	uint8_t hop_count;
+	// Next hops go here
+} __attribute__((packed));
+
+typedef struct {
+	struct ethhdr eth;
+	struct iphdr ip;
+	struct gre_base_hdr gre;
+} __attribute__((packed)) encap_gre_t;
+
+typedef struct {
+	struct ethhdr eth;
+	struct iphdr ip;
+	struct udphdr udp;
+	struct guehdr gue;
+	struct unigue unigue;
+} __attribute__((packed)) encap_headers_t;
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index f4aff6b8284b..10188cc8e9e0 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -105,6 +105,13 @@ struct ipv6_packet {
 } __packed;
 extern struct ipv6_packet pkt_v6;
 
+#define PRINT_FAIL(format...)                                                  \
+	({                                                                     \
+		test__fail();                                                  \
+		fprintf(stdout, "%s:FAIL:%d ", __func__, __LINE__);            \
+		fprintf(stdout, ##format);                                     \
+	})
+
 #define _CHECK(condition, tag, duration, format...) ({			\
 	int __ret = !!(condition);					\
 	int __save_errno = errno;					\
-- 
cgit v1.2.3-59-g8ed1b


From 74f99482eae03195ced512b440b31d62bdb6e943 Mon Sep 17 00:00:00 2001
From: Bodong Wang <bodong@mellanox.com>
Date: Tue, 21 Apr 2020 10:04:16 -0500
Subject: netfilter: nf_conntrack: add IPS_HW_OFFLOAD status bit

This bit indicates that the conntrack entry is offloaded to hardware
flow table. nf_conntrack entry will be tagged with [HW_OFFLOAD] if
it's offload to hardware.

cat /proc/net/nf_conntrack
	ipv4 2 tcp 6 \
	src=1.1.1.17 dst=1.1.1.16 sport=56394 dport=5001 \
	src=1.1.1.16 dst=1.1.1.17 sport=5001 dport=56394 [HW_OFFLOAD] \
	mark=0 zone=0 use=3

Note that HW_OFFLOAD/OFFLOAD/ASSURED are mutually exclusive.

Changelog:

* V1->V2:
- Remove check of lastused from stats. It was meant for cases such
  as removing driver module while traffic still running. Better to
  handle such cases from garbage collector.

Signed-off-by: Bodong Wang <bodong@mellanox.com>
Reviewed-by: Oz Shlomo <ozsh@mellanox.com>
Reviewed-by: Paul Blakey <paulb@mellanox.com>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_conntrack_common.h | 8 ++++++--
 net/netfilter/nf_conntrack_standalone.c            | 4 +++-
 net/netfilter/nf_flow_table_offload.c              | 3 +++
 3 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/include/uapi/linux/netfilter/nf_conntrack_common.h b/include/uapi/linux/netfilter/nf_conntrack_common.h
index b6f0bb1dc799..4b3395082d15 100644
--- a/include/uapi/linux/netfilter/nf_conntrack_common.h
+++ b/include/uapi/linux/netfilter/nf_conntrack_common.h
@@ -114,15 +114,19 @@ enum ip_conntrack_status {
 	IPS_OFFLOAD_BIT = 14,
 	IPS_OFFLOAD = (1 << IPS_OFFLOAD_BIT),
 
+	/* Conntrack has been offloaded to hardware. */
+	IPS_HW_OFFLOAD_BIT = 15,
+	IPS_HW_OFFLOAD = (1 << IPS_HW_OFFLOAD_BIT),
+
 	/* Be careful here, modifying these bits can make things messy,
 	 * so don't let users modify them directly.
 	 */
 	IPS_UNCHANGEABLE_MASK = (IPS_NAT_DONE_MASK | IPS_NAT_MASK |
 				 IPS_EXPECTED | IPS_CONFIRMED | IPS_DYING |
 				 IPS_SEQ_ADJUST | IPS_TEMPLATE | IPS_UNTRACKED |
-				 IPS_OFFLOAD),
+				 IPS_OFFLOAD | IPS_HW_OFFLOAD),
 
-	__IPS_MAX_BIT = 15,
+	__IPS_MAX_BIT = 16,
 };
 
 /* Connection tracking event types */
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 9b57330c81f8..5a3e6c43ee68 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -348,7 +348,9 @@ static int ct_seq_show(struct seq_file *s, void *v)
 	if (seq_print_acct(s, ct, IP_CT_DIR_REPLY))
 		goto release;
 
-	if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
+	if (test_bit(IPS_HW_OFFLOAD_BIT, &ct->status))
+		seq_puts(s, "[HW_OFFLOAD] ");
+	else if (test_bit(IPS_OFFLOAD_BIT, &ct->status))
 		seq_puts(s, "[OFFLOAD] ");
 	else if (test_bit(IPS_ASSURED_BIT, &ct->status))
 		seq_puts(s, "[ASSURED] ");
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index e3b099c14eff..a2abb0feab7f 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -754,12 +754,15 @@ static void flow_offload_work_add(struct flow_offload_work *offload)
 	err = flow_offload_rule_add(offload, flow_rule);
 	if (err < 0)
 		set_bit(NF_FLOW_HW_REFRESH, &offload->flow->flags);
+	else
+		set_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
 
 	nf_flow_offload_destroy(flow_rule);
 }
 
 static void flow_offload_work_del(struct flow_offload_work *offload)
 {
+	clear_bit(IPS_HW_OFFLOAD_BIT, &offload->flow->ct->status);
 	flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_ORIGINAL);
 	flow_offload_tuple_del(offload, FLOW_OFFLOAD_DIR_REPLY);
 	set_bit(NF_FLOW_HW_DEAD, &offload->flow->flags);
-- 
cgit v1.2.3-59-g8ed1b


From 5cb899dd5ba430bd2debf6e4ce7b5cece9e9025d Mon Sep 17 00:00:00 2001
From: Karthikeyan Periyasamy <periyasa@codeaurora.org>
Date: Wed, 22 Apr 2020 16:16:18 +0530
Subject: ath11k: fix reo flush send

we are sending the reo flush command for the deleted peer
tid after the ageout period reaches 1 second. This handling
causes reo ring get full when more than 128 clients are
disconnected continuously. so added the count for flush list
and reo flush command is triggered after the list count reaches
the threshold value, it is configured as 64 (half of the reo ring).
This will avoid the situation where reo ring get full.

Signed-off-by: Karthikeyan Periyasamy <periyasa@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1587552378-4884-1-git-send-email-periyasa@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/dp.c    | 2 ++
 drivers/net/wireless/ath/ath11k/dp.h    | 9 ++++++++-
 drivers/net/wireless/ath/ath11k/dp_rx.c | 6 +++++-
 3 files changed, 15 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/dp.c b/drivers/net/wireless/ath/ath11k/dp.c
index 50350f77b309..8d6fb848f8c4 100644
--- a/drivers/net/wireless/ath/ath11k/dp.c
+++ b/drivers/net/wireless/ath/ath11k/dp.c
@@ -880,6 +880,8 @@ int ath11k_dp_alloc(struct ath11k_base *ab)
 	INIT_LIST_HEAD(&dp->reo_cmd_cache_flush_list);
 	spin_lock_init(&dp->reo_cmd_lock);
 
+	dp->reo_cmd_cache_flush_count = 0;
+
 	ret = ath11k_wbm_idle_ring_setup(ab, &n_link_desc);
 	if (ret) {
 		ath11k_warn(ab, "failed to setup wbm_idle_ring: %d\n", ret);
diff --git a/drivers/net/wireless/ath/ath11k/dp.h b/drivers/net/wireless/ath/ath11k/dp.h
index d4e19dc4bce1..222de10e4b93 100644
--- a/drivers/net/wireless/ath/ath11k/dp.h
+++ b/drivers/net/wireless/ath/ath11k/dp.h
@@ -36,6 +36,7 @@ struct dp_rx_tid {
 	struct ath11k_base *ab;
 };
 
+#define DP_REO_DESC_FREE_THRESHOLD  64
 #define DP_REO_DESC_FREE_TIMEOUT_MS 1000
 
 struct dp_reo_cache_flush_elem {
@@ -222,7 +223,13 @@ struct ath11k_dp {
 	struct hal_wbm_idle_scatter_list scatter_list[DP_IDLE_SCATTER_BUFS_MAX];
 	struct list_head reo_cmd_list;
 	struct list_head reo_cmd_cache_flush_list;
-	/* protects access to reo_cmd_list and reo_cmd_cache_flush_list */
+	u32 reo_cmd_cache_flush_count;
+	/**
+	 * protects access to below fields,
+	 * - reo_cmd_list
+	 * - reo_cmd_cache_flush_list
+	 * - reo_cmd_cache_flush_count
+	 */
 	spinlock_t reo_cmd_lock;
 };
 
diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.c b/drivers/net/wireless/ath/ath11k/dp_rx.c
index bbd7da48518f..d3d2a335cc40 100644
--- a/drivers/net/wireless/ath/ath11k/dp_rx.c
+++ b/drivers/net/wireless/ath/ath11k/dp_rx.c
@@ -565,6 +565,7 @@ void ath11k_dp_reo_cmd_list_cleanup(struct ath11k_base *ab)
 	list_for_each_entry_safe(cmd_cache, tmp_cache,
 				 &dp->reo_cmd_cache_flush_list, list) {
 		list_del(&cmd_cache->list);
+		dp->reo_cmd_cache_flush_count--;
 		dma_unmap_single(ab->dev, cmd_cache->data.paddr,
 				 cmd_cache->data.size, DMA_BIDIRECTIONAL);
 		kfree(cmd_cache->data.vaddr);
@@ -651,15 +652,18 @@ static void ath11k_dp_rx_tid_del_func(struct ath11k_dp *dp, void *ctx,
 
 	spin_lock_bh(&dp->reo_cmd_lock);
 	list_add_tail(&elem->list, &dp->reo_cmd_cache_flush_list);
+	dp->reo_cmd_cache_flush_count++;
 	spin_unlock_bh(&dp->reo_cmd_lock);
 
 	/* Flush and invalidate aged REO desc from HW cache */
 	spin_lock_bh(&dp->reo_cmd_lock);
 	list_for_each_entry_safe(elem, tmp, &dp->reo_cmd_cache_flush_list,
 				 list) {
-		if (time_after(jiffies, elem->ts +
+		if (dp->reo_cmd_cache_flush_count > DP_REO_DESC_FREE_THRESHOLD ||
+		    time_after(jiffies, elem->ts +
 			       msecs_to_jiffies(DP_REO_DESC_FREE_TIMEOUT_MS))) {
 			list_del(&elem->list);
+			dp->reo_cmd_cache_flush_count--;
 			spin_unlock_bh(&dp->reo_cmd_lock);
 
 			ath11k_dp_reo_cache_flush(ab, &elem->data);
-- 
cgit v1.2.3-59-g8ed1b


From 4913e675630ec1a15c92651f426a63755c71b91b Mon Sep 17 00:00:00 2001
From: Wen Gong <wgong@codeaurora.org>
Date: Thu, 23 Apr 2020 10:27:58 +0800
Subject: ath10k: enable rx duration report default for wmi tlv

When run command "iw dev wlan0 station dump", the rx duration is 0.
When firmware indicate WMI_UPDATE_STATS_EVENTID, extended flag of
statsis not set by default, so firmware do not report rx duration.

one sample:
localhost # iw wlan0 station dump
Station c4:04:15:5d:97:22 (on wlan0)
        inactive time:  48 ms
        rx bytes:       21670
        rx packets:     147
        tx bytes:       11529
        tx packets:     100
        tx retries:     88
        tx failed:      36
        beacon loss:    1
        beacon rx:      31
        rx drop misc:   47
        signal:         -72 [-74, -75] dBm
        signal avg:     -71 [-74, -75] dBm
        beacon signal avg:      -71 dBm
        tx bitrate:     54.0 MBit/s MCS 3 40MHz
        rx bitrate:     1.0 MBit/s
	rx duration:    0 us

This patch enable firmware's extened flag of stats by setting flag
WMI_TLV_STAT_PEER_EXTD of ar->fw_stats_req_mask which is set in
ath10k_core_init_firmware_features via WMI_REQUEST_STATS_CMDID.

After apply this patch, rx duration show value with the command:
Station c4:04:15:5d:97:22 (on wlan0)
        inactive time:  883 ms
        rx bytes:       44289
        rx packets:     265
        tx bytes:       10838
        tx packets:     93
        tx retries:     899
        tx failed:      103
        beacon loss:    0
        beacon rx:      78
        rx drop misc:   46
        signal:         -71 [-74, -76] dBm
        signal avg:     -70 [-74, -76] dBm
        beacon signal avg:      -70 dBm
        tx bitrate:     54.0 MBit/s MCS 3 40MHz
        rx bitrate:     1.0 MBit/s
        rx duration:    358004 us

This patch do not have side effect for all chips, because function
ath10k_debug_fw_stats_request is already exported to debugfs
"fw_stats" and WMI_REQUEST_STATS_CMDID is safely sent after condition
checked by ath10k_peer_stats_enabled in ath10k_sta_statistics.

Tested with QCA6174 SDIO with firmware WLAN.RMH.4.4.1-00042.

Signed-off-by: Wen Gong <wgong@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200423022758.5365-1-wgong@codeaurora.org
---
 drivers/net/wireless/ath/ath10k/debug.c | 2 +-
 drivers/net/wireless/ath/ath10k/debug.h | 8 ++++++++
 drivers/net/wireless/ath/ath10k/mac.c   | 2 ++
 3 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath10k/debug.c b/drivers/net/wireless/ath/ath10k/debug.c
index 69139c2e6f82..e8250a665433 100644
--- a/drivers/net/wireless/ath/ath10k/debug.c
+++ b/drivers/net/wireless/ath/ath10k/debug.c
@@ -349,7 +349,7 @@ free:
 	spin_unlock_bh(&ar->data_lock);
 }
 
-static int ath10k_debug_fw_stats_request(struct ath10k *ar)
+int ath10k_debug_fw_stats_request(struct ath10k *ar)
 {
 	unsigned long timeout, time_left;
 	int ret;
diff --git a/drivers/net/wireless/ath/ath10k/debug.h b/drivers/net/wireless/ath/ath10k/debug.h
index 82f7eb8583d9..4cbfd9279d6f 100644
--- a/drivers/net/wireless/ath/ath10k/debug.h
+++ b/drivers/net/wireless/ath/ath10k/debug.h
@@ -125,6 +125,9 @@ static inline int ath10k_debug_is_extd_tx_stats_enabled(struct ath10k *ar)
 {
 	return ar->debug.enable_extd_tx_stats;
 }
+
+int ath10k_debug_fw_stats_request(struct ath10k *ar);
+
 #else
 
 static inline int ath10k_debug_start(struct ath10k *ar)
@@ -192,6 +195,11 @@ static inline int ath10k_debug_is_extd_tx_stats_enabled(struct ath10k *ar)
 	return 0;
 }
 
+static inline int ath10k_debug_fw_stats_request(struct ath10k *ar)
+{
+	return 0;
+}
+
 #define ATH10K_DFS_STAT_INC(ar, c) do { } while (0)
 
 #define ath10k_debug_get_et_strings NULL
diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index 5de7910c24e7..98065b97b982 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -8311,6 +8311,8 @@ static void ath10k_sta_statistics(struct ieee80211_hw *hw,
 	if (!ath10k_peer_stats_enabled(ar))
 		return;
 
+	ath10k_debug_fw_stats_request(ar);
+
 	sinfo->rx_duration = arsta->rx_duration;
 	sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_DURATION);
 
-- 
cgit v1.2.3-59-g8ed1b


From 59a022cc14cf84c6405efc1571045683c258a1f5 Mon Sep 17 00:00:00 2001
From: Wen Gong <wgong@codeaurora.org>
Date: Thu, 23 Apr 2020 10:41:34 +0800
Subject: ath10k: add statistics of tx retries and tx failed when tx complete
 disable

When tx complete is disabled, all tx status will be set with status
HTT_TX_COMPL_STATE_ACK and indicate to mac80211 by ieee80211_tx_status,
then it does not have the statistics for retries and failed packets.
count of tx retries and tx failed of command "iw wlan0 station dump"
are both 0. If tx complete is not disabled, then firmware report the
tx status and ath10k indicate the status to mac80211, then mac80211
save the statistics and command "iw wlan0 station dump" show them.

for example:
localhost ~ # iw dev wlan0 station dump
Station 3c:28:6d:96:fd:69 (on wlan0)
	inactive time:	5 ms
	rx bytes:	1325012
	rx packets:	6477
	tx bytes:	85264
	tx packets:	518
	tx retries:	0
	tx failed:	0

This patch only effect chips with tx complete disabled, e.g. SDIO.

with this patch, output of command "iw dev wlan0 station dump":
Station c4:04:15:5d:97:22 (on wlan0)
        inactive time:  608 ms
        rx bytes:       180366
        rx packets:     991
        tx bytes:       98765577
        tx packets:     64624
        tx retries:     14682
        tx failed:      47086

Tested with QCA6174 SDIO with firmware WLAN.RMH.4.4.1-00042.

Signed-off-by: Wen Gong <wgong@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200423024134.10601-1-wgong@codeaurora.org
---
 drivers/net/wireless/ath/ath10k/core.h   | 2 ++
 drivers/net/wireless/ath/ath10k/htt_rx.c | 7 +++++++
 drivers/net/wireless/ath/ath10k/mac.c    | 7 +++++++
 3 files changed, 16 insertions(+)

diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h
index d6adcbaf9616..07935d39d6d6 100644
--- a/drivers/net/wireless/ath/ath10k/core.h
+++ b/drivers/net/wireless/ath/ath10k/core.h
@@ -500,6 +500,8 @@ struct ath10k_sta {
 	u16 peer_id;
 	struct rate_info txrate;
 	struct ieee80211_tx_info tx_info;
+	u32 tx_retries;
+	u32 tx_failed;
 	u32 last_tx_bitrate;
 
 	struct work_struct update_wk;
diff --git a/drivers/net/wireless/ath/ath10k/htt_rx.c b/drivers/net/wireless/ath/ath10k/htt_rx.c
index 816af1a8ad69..d787cbead56a 100644
--- a/drivers/net/wireless/ath/ath10k/htt_rx.c
+++ b/drivers/net/wireless/ath/ath10k/htt_rx.c
@@ -3574,6 +3574,13 @@ ath10k_update_per_peer_tx_stats(struct ath10k *ar,
 		ieee80211_tx_rate_update(ar->hw, sta, &arsta->tx_info);
 	}
 
+	if (ar->htt.disable_tx_comp) {
+		arsta->tx_retries += peer_stats->retry_pkts;
+		arsta->tx_failed += peer_stats->failed_pkts;
+		ath10k_dbg(ar, ATH10K_DBG_HTT, "htt tx retries %d tx failed %d\n",
+			   arsta->tx_retries, arsta->tx_failed);
+	}
+
 	if (ath10k_debug_is_extd_tx_stats_enabled(ar))
 		ath10k_accumulate_per_peer_tx_stats(ar, arsta, peer_stats,
 						    rate_idx);
diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index 98065b97b982..a1147ccc09bf 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -8328,6 +8328,13 @@ static void ath10k_sta_statistics(struct ieee80211_hw *hw,
 	}
 	sinfo->txrate.flags = arsta->txrate.flags;
 	sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BITRATE);
+
+	if (ar->htt.disable_tx_comp) {
+		sinfo->tx_retries = arsta->tx_retries;
+		sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_RETRIES);
+		sinfo->tx_failed = arsta->tx_failed;
+		sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_FAILED);
+	}
 }
 
 static const struct ieee80211_ops ath10k_ops = {
-- 
cgit v1.2.3-59-g8ed1b


From 26363af5643490a817272e1cc6f1d3f1d550a699 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 24 Apr 2020 08:43:35 +0200
Subject: mm: remove watermark_boost_factor_sysctl_handler

watermark_boost_factor_sysctl_handler is just a pointless wrapper for
proc_dointvec_minmax, so remove it and use proc_dointvec_minmax
directly.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: David Rientjes <rientjes@google.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/mmzone.h |  2 --
 kernel/sysctl.c        |  2 +-
 mm/page_alloc.c        | 12 ------------
 3 files changed, 1 insertion(+), 15 deletions(-)

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 1b9de7d220fb..f37bb8f187fc 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -911,8 +911,6 @@ static inline int is_highmem(struct zone *zone)
 struct ctl_table;
 int min_free_kbytes_sysctl_handler(struct ctl_table *, int,
 					void __user *, size_t *, loff_t *);
-int watermark_boost_factor_sysctl_handler(struct ctl_table *, int,
-					void __user *, size_t *, loff_t *);
 int watermark_scale_factor_sysctl_handler(struct ctl_table *, int,
 					void __user *, size_t *, loff_t *);
 extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES];
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 8a176d8727a3..99d27acf4646 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -1491,7 +1491,7 @@ static struct ctl_table vm_table[] = {
 		.data		= &watermark_boost_factor,
 		.maxlen		= sizeof(watermark_boost_factor),
 		.mode		= 0644,
-		.proc_handler	= watermark_boost_factor_sysctl_handler,
+		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= SYSCTL_ZERO,
 	},
 	{
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 69827d4fa052..62c1550cd43e 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -7978,18 +7978,6 @@ int min_free_kbytes_sysctl_handler(struct ctl_table *table, int write,
 	return 0;
 }
 
-int watermark_boost_factor_sysctl_handler(struct ctl_table *table, int write,
-	void __user *buffer, size_t *length, loff_t *ppos)
-{
-	int rc;
-
-	rc = proc_dointvec_minmax(table, write, buffer, length, ppos);
-	if (rc)
-		return rc;
-
-	return 0;
-}
-
 int watermark_scale_factor_sysctl_handler(struct ctl_table *table, int write,
 	void __user *buffer, size_t *length, loff_t *ppos)
 {
-- 
cgit v1.2.3-59-g8ed1b


From 2374c09b1c8a883bb9b4b2fc3756703eeb618f4a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 24 Apr 2020 08:43:36 +0200
Subject: sysctl: remove all extern declaration from sysctl.c

Extern declarations in .c files are a bad style and can lead to
mismatches.  Use existing definitions in headers where they exist,
and otherwise move the external declarations to suitable header
files.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/coredump.h |  4 ++++
 include/linux/file.h     |  2 ++
 include/linux/mm.h       |  2 ++
 include/linux/mmzone.h   |  2 ++
 include/linux/pid.h      |  3 +++
 include/linux/sysctl.h   |  8 ++++++++
 kernel/sysctl.c          | 45 +++------------------------------------------
 7 files changed, 24 insertions(+), 42 deletions(-)

diff --git a/include/linux/coredump.h b/include/linux/coredump.h
index abf4b4e65dbb..7a899e83835d 100644
--- a/include/linux/coredump.h
+++ b/include/linux/coredump.h
@@ -22,4 +22,8 @@ extern void do_coredump(const kernel_siginfo_t *siginfo);
 static inline void do_coredump(const kernel_siginfo_t *siginfo) {}
 #endif
 
+extern int core_uses_pid;
+extern char core_pattern[];
+extern unsigned int core_pipe_limit;
+
 #endif /* _LINUX_COREDUMP_H */
diff --git a/include/linux/file.h b/include/linux/file.h
index 142d102f285e..122f80084a3e 100644
--- a/include/linux/file.h
+++ b/include/linux/file.h
@@ -94,4 +94,6 @@ extern void fd_install(unsigned int fd, struct file *file);
 extern void flush_delayed_fput(void);
 extern void __fput_sync(struct file *);
 
+extern unsigned int sysctl_nr_open_min, sysctl_nr_open_max;
+
 #endif /* __LINUX_FILE_H */
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 5a323422d783..9c4e7e76dedd 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -3140,5 +3140,7 @@ unsigned long wp_shared_mapping_range(struct address_space *mapping,
 				      pgoff_t first_index, pgoff_t nr);
 #endif
 
+extern int sysctl_nr_trim_pages;
+
 #endif /* __KERNEL__ */
 #endif /* _LINUX_MM_H */
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index f37bb8f187fc..b2af594ef0f7 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -909,6 +909,7 @@ static inline int is_highmem(struct zone *zone)
 
 /* These two functions are used to setup the per zone pages min values */
 struct ctl_table;
+
 int min_free_kbytes_sysctl_handler(struct ctl_table *, int,
 					void __user *, size_t *, loff_t *);
 int watermark_scale_factor_sysctl_handler(struct ctl_table *, int,
@@ -925,6 +926,7 @@ int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int,
 
 extern int numa_zonelist_order_handler(struct ctl_table *, int,
 			void __user *, size_t *, loff_t *);
+extern int percpu_pagelist_fraction;
 extern char numa_zonelist_order[];
 #define NUMA_ZONELIST_ORDER_LEN	16
 
diff --git a/include/linux/pid.h b/include/linux/pid.h
index cc896f0fc4e3..93543cbc0e6b 100644
--- a/include/linux/pid.h
+++ b/include/linux/pid.h
@@ -108,6 +108,9 @@ extern void transfer_pid(struct task_struct *old, struct task_struct *new,
 struct pid_namespace;
 extern struct pid_namespace init_pid_ns;
 
+extern int pid_max;
+extern int pid_max_min, pid_max_max;
+
 /*
  * look up a PID in the hash table. Must be called with the tasklist_lock
  * or rcu_read_lock() held.
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 02fa84493f23..36143ca40b56 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -207,7 +207,15 @@ void unregister_sysctl_table(struct ctl_table_header * table);
 
 extern int sysctl_init(void);
 
+extern int pwrsw_enabled;
+extern int unaligned_enabled;
+extern int unaligned_dump_stack;
+extern int no_unaligned_warning;
+
 extern struct ctl_table sysctl_mount_point[];
+extern struct ctl_table random_table[];
+extern struct ctl_table firmware_config_table[];
+extern struct ctl_table epoll_table[];
 
 #else /* CONFIG_SYSCTL */
 static inline struct ctl_table_header *register_sysctl_table(struct ctl_table * table)
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 99d27acf4646..31b934865ebc 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -68,6 +68,9 @@
 #include <linux/bpf.h>
 #include <linux/mount.h>
 #include <linux/userfaultfd_k.h>
+#include <linux/coredump.h>
+#include <linux/latencytop.h>
+#include <linux/pid.h>
 
 #include "../lib/kstrtox.h"
 
@@ -103,22 +106,6 @@
 
 #if defined(CONFIG_SYSCTL)
 
-/* External variables not in a header file. */
-extern int suid_dumpable;
-#ifdef CONFIG_COREDUMP
-extern int core_uses_pid;
-extern char core_pattern[];
-extern unsigned int core_pipe_limit;
-#endif
-extern int pid_max;
-extern int pid_max_min, pid_max_max;
-extern int percpu_pagelist_fraction;
-extern int latencytop_enabled;
-extern unsigned int sysctl_nr_open_min, sysctl_nr_open_max;
-#ifndef CONFIG_MMU
-extern int sysctl_nr_trim_pages;
-#endif
-
 /* Constants used for minimum and  maximum */
 #ifdef CONFIG_LOCKUP_DETECTOR
 static int sixty = 60;
@@ -160,24 +147,6 @@ static unsigned long hung_task_timeout_max = (LONG_MAX/HZ);
 #ifdef CONFIG_INOTIFY_USER
 #include <linux/inotify.h>
 #endif
-#ifdef CONFIG_SPARC
-#endif
-
-#ifdef CONFIG_PARISC
-extern int pwrsw_enabled;
-#endif
-
-#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
-extern int unaligned_enabled;
-#endif
-
-#ifdef CONFIG_IA64
-extern int unaligned_dump_stack;
-#endif
-
-#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
-extern int no_unaligned_warning;
-#endif
 
 #ifdef CONFIG_PROC_SYSCTL
 
@@ -243,14 +212,6 @@ static struct ctl_table vm_table[];
 static struct ctl_table fs_table[];
 static struct ctl_table debug_table[];
 static struct ctl_table dev_table[];
-extern struct ctl_table random_table[];
-#ifdef CONFIG_EPOLL
-extern struct ctl_table epoll_table[];
-#endif
-
-#ifdef CONFIG_FW_LOADER_USER_HELPER
-extern struct ctl_table firmware_config_table[];
-#endif
 
 #if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
     defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
-- 
cgit v1.2.3-59-g8ed1b


From f461d2dcd511c020a26d4d791fae595c65ed09b6 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 24 Apr 2020 08:43:37 +0200
Subject: sysctl: avoid forward declarations

Move the sysctl tables to the end of the file to avoid lots of pointless
forward declarations.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 kernel/sysctl.c | 3573 +++++++++++++++++++++++++++----------------------------
 1 file changed, 1768 insertions(+), 1805 deletions(-)

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 31b934865ebc..3fafca3ced98 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -176,79 +176,13 @@ enum sysctl_writes_mode {
 };
 
 static enum sysctl_writes_mode sysctl_writes_strict = SYSCTL_WRITES_STRICT;
-
-static int proc_do_cad_pid(struct ctl_table *table, int write,
-		  void __user *buffer, size_t *lenp, loff_t *ppos);
-static int proc_taint(struct ctl_table *table, int write,
-			       void __user *buffer, size_t *lenp, loff_t *ppos);
-#ifdef CONFIG_COMPACTION
-static int proc_dointvec_minmax_warn_RT_change(struct ctl_table *table,
-					       int write, void __user *buffer,
-					       size_t *lenp, loff_t *ppos);
-#endif
-#endif
-
-#ifdef CONFIG_PRINTK
-static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
-				void __user *buffer, size_t *lenp, loff_t *ppos);
-#endif
-
-static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
-		void __user *buffer, size_t *lenp, loff_t *ppos);
-#ifdef CONFIG_COREDUMP
-static int proc_dostring_coredump(struct ctl_table *table, int write,
-		void __user *buffer, size_t *lenp, loff_t *ppos);
-#endif
-static int proc_dopipe_max_size(struct ctl_table *table, int write,
-		void __user *buffer, size_t *lenp, loff_t *ppos);
-
-#ifdef CONFIG_MAGIC_SYSRQ
-static int sysrq_sysctl_handler(struct ctl_table *table, int write,
-			void __user *buffer, size_t *lenp, loff_t *ppos);
-#endif
-
-static struct ctl_table kern_table[];
-static struct ctl_table vm_table[];
-static struct ctl_table fs_table[];
-static struct ctl_table debug_table[];
-static struct ctl_table dev_table[];
+#endif /* CONFIG_PROC_SYSCTL */
 
 #if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
     defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
 int sysctl_legacy_va_layout;
 #endif
 
-/* The default sysctl tables: */
-
-static struct ctl_table sysctl_base_table[] = {
-	{
-		.procname	= "kernel",
-		.mode		= 0555,
-		.child		= kern_table,
-	},
-	{
-		.procname	= "vm",
-		.mode		= 0555,
-		.child		= vm_table,
-	},
-	{
-		.procname	= "fs",
-		.mode		= 0555,
-		.child		= fs_table,
-	},
-	{
-		.procname	= "debug",
-		.mode		= 0555,
-		.child		= debug_table,
-	},
-	{
-		.procname	= "dev",
-		.mode		= 0555,
-		.child		= dev_table,
-	},
-	{ }
-};
-
 #ifdef CONFIG_SCHED_DEBUG
 static int min_sched_granularity_ns = 100000;		/* 100 usecs */
 static int max_sched_granularity_ns = NSEC_PER_SEC;	/* 1 second */
@@ -265,1676 +199,12 @@ static int min_extfrag_threshold;
 static int max_extfrag_threshold = 1000;
 #endif
 
-static struct ctl_table kern_table[] = {
-	{
-		.procname	= "sched_child_runs_first",
-		.data		= &sysctl_sched_child_runs_first,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#ifdef CONFIG_SCHED_DEBUG
-	{
-		.procname	= "sched_min_granularity_ns",
-		.data		= &sysctl_sched_min_granularity,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= sched_proc_update_handler,
-		.extra1		= &min_sched_granularity_ns,
-		.extra2		= &max_sched_granularity_ns,
-	},
-	{
-		.procname	= "sched_latency_ns",
-		.data		= &sysctl_sched_latency,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= sched_proc_update_handler,
-		.extra1		= &min_sched_granularity_ns,
-		.extra2		= &max_sched_granularity_ns,
-	},
-	{
-		.procname	= "sched_wakeup_granularity_ns",
-		.data		= &sysctl_sched_wakeup_granularity,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= sched_proc_update_handler,
-		.extra1		= &min_wakeup_granularity_ns,
-		.extra2		= &max_wakeup_granularity_ns,
-	},
-#ifdef CONFIG_SMP
-	{
-		.procname	= "sched_tunable_scaling",
-		.data		= &sysctl_sched_tunable_scaling,
-		.maxlen		= sizeof(enum sched_tunable_scaling),
-		.mode		= 0644,
-		.proc_handler	= sched_proc_update_handler,
-		.extra1		= &min_sched_tunable_scaling,
-		.extra2		= &max_sched_tunable_scaling,
-	},
-	{
-		.procname	= "sched_migration_cost_ns",
-		.data		= &sysctl_sched_migration_cost,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "sched_nr_migrate",
-		.data		= &sysctl_sched_nr_migrate,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#ifdef CONFIG_SCHEDSTATS
-	{
-		.procname	= "sched_schedstats",
-		.data		= NULL,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= sysctl_schedstats,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-#endif /* CONFIG_SCHEDSTATS */
-#endif /* CONFIG_SMP */
-#ifdef CONFIG_NUMA_BALANCING
-	{
-		.procname	= "numa_balancing_scan_delay_ms",
-		.data		= &sysctl_numa_balancing_scan_delay,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "numa_balancing_scan_period_min_ms",
-		.data		= &sysctl_numa_balancing_scan_period_min,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "numa_balancing_scan_period_max_ms",
-		.data		= &sysctl_numa_balancing_scan_period_max,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "numa_balancing_scan_size_mb",
-		.data		= &sysctl_numa_balancing_scan_size,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ONE,
-	},
-	{
-		.procname	= "numa_balancing",
-		.data		= NULL, /* filled in by handler */
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= sysctl_numa_balancing,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-#endif /* CONFIG_NUMA_BALANCING */
-#endif /* CONFIG_SCHED_DEBUG */
-	{
-		.procname	= "sched_rt_period_us",
-		.data		= &sysctl_sched_rt_period,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= sched_rt_handler,
-	},
-	{
-		.procname	= "sched_rt_runtime_us",
-		.data		= &sysctl_sched_rt_runtime,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= sched_rt_handler,
-	},
-	{
-		.procname	= "sched_rr_timeslice_ms",
-		.data		= &sysctl_sched_rr_timeslice,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= sched_rr_handler,
-	},
-#ifdef CONFIG_UCLAMP_TASK
-	{
-		.procname	= "sched_util_clamp_min",
-		.data		= &sysctl_sched_uclamp_util_min,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= sysctl_sched_uclamp_handler,
-	},
-	{
-		.procname	= "sched_util_clamp_max",
-		.data		= &sysctl_sched_uclamp_util_max,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= sysctl_sched_uclamp_handler,
-	},
-#endif
-#ifdef CONFIG_SCHED_AUTOGROUP
-	{
-		.procname	= "sched_autogroup_enabled",
-		.data		= &sysctl_sched_autogroup_enabled,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-#endif
-#ifdef CONFIG_CFS_BANDWIDTH
-	{
-		.procname	= "sched_cfs_bandwidth_slice_us",
-		.data		= &sysctl_sched_cfs_bandwidth_slice,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ONE,
-	},
-#endif
-#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
-	{
-		.procname	= "sched_energy_aware",
-		.data		= &sysctl_sched_energy_aware,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= sched_energy_aware_handler,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-#endif
-#ifdef CONFIG_PROVE_LOCKING
-	{
-		.procname	= "prove_locking",
-		.data		= &prove_locking,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#endif
-#ifdef CONFIG_LOCK_STAT
-	{
-		.procname	= "lock_stat",
-		.data		= &lock_stat,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#endif
-	{
-		.procname	= "panic",
-		.data		= &panic_timeout,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#ifdef CONFIG_COREDUMP
-	{
-		.procname	= "core_uses_pid",
-		.data		= &core_uses_pid,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "core_pattern",
-		.data		= core_pattern,
-		.maxlen		= CORENAME_MAX_SIZE,
-		.mode		= 0644,
-		.proc_handler	= proc_dostring_coredump,
-	},
-	{
-		.procname	= "core_pipe_limit",
-		.data		= &core_pipe_limit,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#endif
-#ifdef CONFIG_PROC_SYSCTL
-	{
-		.procname	= "tainted",
-		.maxlen 	= sizeof(long),
-		.mode		= 0644,
-		.proc_handler	= proc_taint,
-	},
-	{
-		.procname	= "sysctl_writes_strict",
-		.data		= &sysctl_writes_strict,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &neg_one,
-		.extra2		= SYSCTL_ONE,
-	},
-#endif
-#ifdef CONFIG_LATENCYTOP
-	{
-		.procname	= "latencytop",
-		.data		= &latencytop_enabled,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= sysctl_latencytop,
-	},
-#endif
-#ifdef CONFIG_BLK_DEV_INITRD
-	{
-		.procname	= "real-root-dev",
-		.data		= &real_root_dev,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#endif
-	{
-		.procname	= "print-fatal-signals",
-		.data		= &print_fatal_signals,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#ifdef CONFIG_SPARC
-	{
-		.procname	= "reboot-cmd",
-		.data		= reboot_command,
-		.maxlen		= 256,
-		.mode		= 0644,
-		.proc_handler	= proc_dostring,
-	},
-	{
-		.procname	= "stop-a",
-		.data		= &stop_a_enabled,
-		.maxlen		= sizeof (int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "scons-poweroff",
-		.data		= &scons_pwroff,
-		.maxlen		= sizeof (int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#endif
-#ifdef CONFIG_SPARC64
-	{
-		.procname	= "tsb-ratio",
-		.data		= &sysctl_tsb_ratio,
-		.maxlen		= sizeof (int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#endif
-#ifdef CONFIG_PARISC
-	{
-		.procname	= "soft-power",
-		.data		= &pwrsw_enabled,
-		.maxlen		= sizeof (int),
-	 	.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#endif
-#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
-	{
-		.procname	= "unaligned-trap",
-		.data		= &unaligned_enabled,
-		.maxlen		= sizeof (int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#endif
-	{
-		.procname	= "ctrl-alt-del",
-		.data		= &C_A_D,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#ifdef CONFIG_FUNCTION_TRACER
-	{
-		.procname	= "ftrace_enabled",
-		.data		= &ftrace_enabled,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= ftrace_enable_sysctl,
-	},
-#endif
-#ifdef CONFIG_STACK_TRACER
-	{
-		.procname	= "stack_tracer_enabled",
-		.data		= &stack_tracer_enabled,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= stack_trace_sysctl,
-	},
-#endif
-#ifdef CONFIG_TRACING
-	{
-		.procname	= "ftrace_dump_on_oops",
-		.data		= &ftrace_dump_on_oops,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "traceoff_on_warning",
-		.data		= &__disable_trace_on_warning,
-		.maxlen		= sizeof(__disable_trace_on_warning),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "tracepoint_printk",
-		.data		= &tracepoint_printk,
-		.maxlen		= sizeof(tracepoint_printk),
-		.mode		= 0644,
-		.proc_handler	= tracepoint_printk_sysctl,
-	},
-#endif
-#ifdef CONFIG_KEXEC_CORE
-	{
-		.procname	= "kexec_load_disabled",
-		.data		= &kexec_load_disabled,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		/* only handle a transition from default "0" to "1" */
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ONE,
-		.extra2		= SYSCTL_ONE,
-	},
-#endif
-#ifdef CONFIG_MODULES
-	{
-		.procname	= "modprobe",
-		.data		= &modprobe_path,
-		.maxlen		= KMOD_PATH_LEN,
-		.mode		= 0644,
-		.proc_handler	= proc_dostring,
-	},
-	{
-		.procname	= "modules_disabled",
-		.data		= &modules_disabled,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		/* only handle a transition from default "0" to "1" */
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ONE,
-		.extra2		= SYSCTL_ONE,
-	},
-#endif
-#ifdef CONFIG_UEVENT_HELPER
-	{
-		.procname	= "hotplug",
-		.data		= &uevent_helper,
-		.maxlen		= UEVENT_HELPER_PATH_LEN,
-		.mode		= 0644,
-		.proc_handler	= proc_dostring,
-	},
-#endif
-#ifdef CONFIG_CHR_DEV_SG
-	{
-		.procname	= "sg-big-buff",
-		.data		= &sg_big_buff,
-		.maxlen		= sizeof (int),
-		.mode		= 0444,
-		.proc_handler	= proc_dointvec,
-	},
-#endif
-#ifdef CONFIG_BSD_PROCESS_ACCT
-	{
-		.procname	= "acct",
-		.data		= &acct_parm,
-		.maxlen		= 3*sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#endif
-#ifdef CONFIG_MAGIC_SYSRQ
-	{
-		.procname	= "sysrq",
-		.data		= NULL,
-		.maxlen		= sizeof (int),
-		.mode		= 0644,
-		.proc_handler	= sysrq_sysctl_handler,
-	},
-#endif
-#ifdef CONFIG_PROC_SYSCTL
-	{
-		.procname	= "cad_pid",
-		.data		= NULL,
-		.maxlen		= sizeof (int),
-		.mode		= 0600,
-		.proc_handler	= proc_do_cad_pid,
-	},
-#endif
-	{
-		.procname	= "threads-max",
-		.data		= NULL,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= sysctl_max_threads,
-	},
-	{
-		.procname	= "random",
-		.mode		= 0555,
-		.child		= random_table,
-	},
-	{
-		.procname	= "usermodehelper",
-		.mode		= 0555,
-		.child		= usermodehelper_table,
-	},
-#ifdef CONFIG_FW_LOADER_USER_HELPER
-	{
-		.procname	= "firmware_config",
-		.mode		= 0555,
-		.child		= firmware_config_table,
-	},
-#endif
-	{
-		.procname	= "overflowuid",
-		.data		= &overflowuid,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &minolduid,
-		.extra2		= &maxolduid,
-	},
-	{
-		.procname	= "overflowgid",
-		.data		= &overflowgid,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &minolduid,
-		.extra2		= &maxolduid,
-	},
-#ifdef CONFIG_S390
-	{
-		.procname	= "userprocess_debug",
-		.data		= &show_unhandled_signals,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#endif
-	{
-		.procname	= "pid_max",
-		.data		= &pid_max,
-		.maxlen		= sizeof (int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &pid_max_min,
-		.extra2		= &pid_max_max,
-	},
-	{
-		.procname	= "panic_on_oops",
-		.data		= &panic_on_oops,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "panic_print",
-		.data		= &panic_print,
-		.maxlen		= sizeof(unsigned long),
-		.mode		= 0644,
-		.proc_handler	= proc_doulongvec_minmax,
-	},
-#if defined CONFIG_PRINTK
-	{
-		.procname	= "printk",
-		.data		= &console_loglevel,
-		.maxlen		= 4*sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "printk_ratelimit",
-		.data		= &printk_ratelimit_state.interval,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "printk_ratelimit_burst",
-		.data		= &printk_ratelimit_state.burst,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "printk_delay",
-		.data		= &printk_delay_msec,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= &ten_thousand,
-	},
-	{
-		.procname	= "printk_devkmsg",
-		.data		= devkmsg_log_str,
-		.maxlen		= DEVKMSG_STR_MAX_SIZE,
-		.mode		= 0644,
-		.proc_handler	= devkmsg_sysctl_set_loglvl,
-	},
-	{
-		.procname	= "dmesg_restrict",
-		.data		= &dmesg_restrict,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax_sysadmin,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-	{
-		.procname	= "kptr_restrict",
-		.data		= &kptr_restrict,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax_sysadmin,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= &two,
-	},
-#endif
-	{
-		.procname	= "ngroups_max",
-		.data		= &ngroups_max,
-		.maxlen		= sizeof (int),
-		.mode		= 0444,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "cap_last_cap",
-		.data		= (void *)&cap_last_cap,
-		.maxlen		= sizeof(int),
-		.mode		= 0444,
-		.proc_handler	= proc_dointvec,
-	},
-#if defined(CONFIG_LOCKUP_DETECTOR)
-	{
-		.procname       = "watchdog",
-		.data		= &watchdog_user_enabled,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler   = proc_watchdog,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-	{
-		.procname	= "watchdog_thresh",
-		.data		= &watchdog_thresh,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_watchdog_thresh,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= &sixty,
-	},
-	{
-		.procname       = "nmi_watchdog",
-		.data		= &nmi_watchdog_user_enabled,
-		.maxlen		= sizeof(int),
-		.mode		= NMI_WATCHDOG_SYSCTL_PERM,
-		.proc_handler   = proc_nmi_watchdog,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-	{
-		.procname	= "watchdog_cpumask",
-		.data		= &watchdog_cpumask_bits,
-		.maxlen		= NR_CPUS,
-		.mode		= 0644,
-		.proc_handler	= proc_watchdog_cpumask,
-	},
-#ifdef CONFIG_SOFTLOCKUP_DETECTOR
-	{
-		.procname       = "soft_watchdog",
-		.data		= &soft_watchdog_user_enabled,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler   = proc_soft_watchdog,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-	{
-		.procname	= "softlockup_panic",
-		.data		= &softlockup_panic,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-#ifdef CONFIG_SMP
-	{
-		.procname	= "softlockup_all_cpu_backtrace",
-		.data		= &sysctl_softlockup_all_cpu_backtrace,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-#endif /* CONFIG_SMP */
-#endif
-#ifdef CONFIG_HARDLOCKUP_DETECTOR
-	{
-		.procname	= "hardlockup_panic",
-		.data		= &hardlockup_panic,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-#ifdef CONFIG_SMP
-	{
-		.procname	= "hardlockup_all_cpu_backtrace",
-		.data		= &sysctl_hardlockup_all_cpu_backtrace,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-#endif /* CONFIG_SMP */
-#endif
-#endif
-
-#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
-	{
-		.procname       = "unknown_nmi_panic",
-		.data           = &unknown_nmi_panic,
-		.maxlen         = sizeof (int),
-		.mode           = 0644,
-		.proc_handler   = proc_dointvec,
-	},
-#endif
-#if defined(CONFIG_X86)
-	{
-		.procname	= "panic_on_unrecovered_nmi",
-		.data		= &panic_on_unrecovered_nmi,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "panic_on_io_nmi",
-		.data		= &panic_on_io_nmi,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#ifdef CONFIG_DEBUG_STACKOVERFLOW
-	{
-		.procname	= "panic_on_stackoverflow",
-		.data		= &sysctl_panic_on_stackoverflow,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#endif
-	{
-		.procname	= "bootloader_type",
-		.data		= &bootloader_type,
-		.maxlen		= sizeof (int),
-		.mode		= 0444,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "bootloader_version",
-		.data		= &bootloader_version,
-		.maxlen		= sizeof (int),
-		.mode		= 0444,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "io_delay_type",
-		.data		= &io_delay_type,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#endif
-#if defined(CONFIG_MMU)
-	{
-		.procname	= "randomize_va_space",
-		.data		= &randomize_va_space,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#endif
-#if defined(CONFIG_S390) && defined(CONFIG_SMP)
-	{
-		.procname	= "spin_retry",
-		.data		= &spin_retry,
-		.maxlen		= sizeof (int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#endif
-#if	defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
-	{
-		.procname	= "acpi_video_flags",
-		.data		= &acpi_realmode_flags,
-		.maxlen		= sizeof (unsigned long),
-		.mode		= 0644,
-		.proc_handler	= proc_doulongvec_minmax,
-	},
-#endif
-#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
-	{
-		.procname	= "ignore-unaligned-usertrap",
-		.data		= &no_unaligned_warning,
-		.maxlen		= sizeof (int),
-	 	.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#endif
-#ifdef CONFIG_IA64
-	{
-		.procname	= "unaligned-dump-stack",
-		.data		= &unaligned_dump_stack,
-		.maxlen		= sizeof (int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#endif
-#ifdef CONFIG_DETECT_HUNG_TASK
-	{
-		.procname	= "hung_task_panic",
-		.data		= &sysctl_hung_task_panic,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-	{
-		.procname	= "hung_task_check_count",
-		.data		= &sysctl_hung_task_check_count,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-	},
-	{
-		.procname	= "hung_task_timeout_secs",
-		.data		= &sysctl_hung_task_timeout_secs,
-		.maxlen		= sizeof(unsigned long),
-		.mode		= 0644,
-		.proc_handler	= proc_dohung_task_timeout_secs,
-		.extra2		= &hung_task_timeout_max,
-	},
-	{
-		.procname	= "hung_task_check_interval_secs",
-		.data		= &sysctl_hung_task_check_interval_secs,
-		.maxlen		= sizeof(unsigned long),
-		.mode		= 0644,
-		.proc_handler	= proc_dohung_task_timeout_secs,
-		.extra2		= &hung_task_timeout_max,
-	},
-	{
-		.procname	= "hung_task_warnings",
-		.data		= &sysctl_hung_task_warnings,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &neg_one,
-	},
-#endif
-#ifdef CONFIG_RT_MUTEXES
-	{
-		.procname	= "max_lock_depth",
-		.data		= &max_lock_depth,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#endif
-	{
-		.procname	= "poweroff_cmd",
-		.data		= &poweroff_cmd,
-		.maxlen		= POWEROFF_CMD_PATH_LEN,
-		.mode		= 0644,
-		.proc_handler	= proc_dostring,
-	},
-#ifdef CONFIG_KEYS
-	{
-		.procname	= "keys",
-		.mode		= 0555,
-		.child		= key_sysctls,
-	},
-#endif
-#ifdef CONFIG_PERF_EVENTS
-	/*
-	 * User-space scripts rely on the existence of this file
-	 * as a feature check for perf_events being enabled.
-	 *
-	 * So it's an ABI, do not remove!
-	 */
-	{
-		.procname	= "perf_event_paranoid",
-		.data		= &sysctl_perf_event_paranoid,
-		.maxlen		= sizeof(sysctl_perf_event_paranoid),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "perf_event_mlock_kb",
-		.data		= &sysctl_perf_event_mlock,
-		.maxlen		= sizeof(sysctl_perf_event_mlock),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "perf_event_max_sample_rate",
-		.data		= &sysctl_perf_event_sample_rate,
-		.maxlen		= sizeof(sysctl_perf_event_sample_rate),
-		.mode		= 0644,
-		.proc_handler	= perf_proc_update_handler,
-		.extra1		= SYSCTL_ONE,
-	},
-	{
-		.procname	= "perf_cpu_time_max_percent",
-		.data		= &sysctl_perf_cpu_time_max_percent,
-		.maxlen		= sizeof(sysctl_perf_cpu_time_max_percent),
-		.mode		= 0644,
-		.proc_handler	= perf_cpu_time_max_percent_handler,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= &one_hundred,
-	},
-	{
-		.procname	= "perf_event_max_stack",
-		.data		= &sysctl_perf_event_max_stack,
-		.maxlen		= sizeof(sysctl_perf_event_max_stack),
-		.mode		= 0644,
-		.proc_handler	= perf_event_max_stack_handler,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= &six_hundred_forty_kb,
-	},
-	{
-		.procname	= "perf_event_max_contexts_per_stack",
-		.data		= &sysctl_perf_event_max_contexts_per_stack,
-		.maxlen		= sizeof(sysctl_perf_event_max_contexts_per_stack),
-		.mode		= 0644,
-		.proc_handler	= perf_event_max_stack_handler,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= &one_thousand,
-	},
-#endif
-	{
-		.procname	= "panic_on_warn",
-		.data		= &panic_on_warn,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
-	{
-		.procname	= "timer_migration",
-		.data		= &sysctl_timer_migration,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= timer_migration_handler,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-#endif
-#ifdef CONFIG_BPF_SYSCALL
-	{
-		.procname	= "unprivileged_bpf_disabled",
-		.data		= &sysctl_unprivileged_bpf_disabled,
-		.maxlen		= sizeof(sysctl_unprivileged_bpf_disabled),
-		.mode		= 0644,
-		/* only handle a transition from default "0" to "1" */
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ONE,
-		.extra2		= SYSCTL_ONE,
-	},
-	{
-		.procname	= "bpf_stats_enabled",
-		.data		= &bpf_stats_enabled_key.key,
-		.maxlen		= sizeof(bpf_stats_enabled_key),
-		.mode		= 0644,
-		.proc_handler	= proc_do_static_key,
-	},
-#endif
-#if defined(CONFIG_TREE_RCU)
-	{
-		.procname	= "panic_on_rcu_stall",
-		.data		= &sysctl_panic_on_rcu_stall,
-		.maxlen		= sizeof(sysctl_panic_on_rcu_stall),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-#endif
-#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
-	{
-		.procname	= "stack_erasing",
-		.data		= NULL,
-		.maxlen		= sizeof(int),
-		.mode		= 0600,
-		.proc_handler	= stack_erasing_sysctl,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-#endif
-	{ }
-};
-
-static struct ctl_table vm_table[] = {
-	{
-		.procname	= "overcommit_memory",
-		.data		= &sysctl_overcommit_memory,
-		.maxlen		= sizeof(sysctl_overcommit_memory),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= &two,
-	},
-	{
-		.procname	= "panic_on_oom",
-		.data		= &sysctl_panic_on_oom,
-		.maxlen		= sizeof(sysctl_panic_on_oom),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= &two,
-	},
-	{
-		.procname	= "oom_kill_allocating_task",
-		.data		= &sysctl_oom_kill_allocating_task,
-		.maxlen		= sizeof(sysctl_oom_kill_allocating_task),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "oom_dump_tasks",
-		.data		= &sysctl_oom_dump_tasks,
-		.maxlen		= sizeof(sysctl_oom_dump_tasks),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-	{
-		.procname	= "overcommit_ratio",
-		.data		= &sysctl_overcommit_ratio,
-		.maxlen		= sizeof(sysctl_overcommit_ratio),
-		.mode		= 0644,
-		.proc_handler	= overcommit_ratio_handler,
-	},
-	{
-		.procname	= "overcommit_kbytes",
-		.data		= &sysctl_overcommit_kbytes,
-		.maxlen		= sizeof(sysctl_overcommit_kbytes),
-		.mode		= 0644,
-		.proc_handler	= overcommit_kbytes_handler,
-	},
-	{
-		.procname	= "page-cluster", 
-		.data		= &page_cluster,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-	},
-	{
-		.procname	= "dirty_background_ratio",
-		.data		= &dirty_background_ratio,
-		.maxlen		= sizeof(dirty_background_ratio),
-		.mode		= 0644,
-		.proc_handler	= dirty_background_ratio_handler,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= &one_hundred,
-	},
-	{
-		.procname	= "dirty_background_bytes",
-		.data		= &dirty_background_bytes,
-		.maxlen		= sizeof(dirty_background_bytes),
-		.mode		= 0644,
-		.proc_handler	= dirty_background_bytes_handler,
-		.extra1		= &one_ul,
-	},
-	{
-		.procname	= "dirty_ratio",
-		.data		= &vm_dirty_ratio,
-		.maxlen		= sizeof(vm_dirty_ratio),
-		.mode		= 0644,
-		.proc_handler	= dirty_ratio_handler,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= &one_hundred,
-	},
-	{
-		.procname	= "dirty_bytes",
-		.data		= &vm_dirty_bytes,
-		.maxlen		= sizeof(vm_dirty_bytes),
-		.mode		= 0644,
-		.proc_handler	= dirty_bytes_handler,
-		.extra1		= &dirty_bytes_min,
-	},
-	{
-		.procname	= "dirty_writeback_centisecs",
-		.data		= &dirty_writeback_interval,
-		.maxlen		= sizeof(dirty_writeback_interval),
-		.mode		= 0644,
-		.proc_handler	= dirty_writeback_centisecs_handler,
-	},
-	{
-		.procname	= "dirty_expire_centisecs",
-		.data		= &dirty_expire_interval,
-		.maxlen		= sizeof(dirty_expire_interval),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-	},
-	{
-		.procname	= "dirtytime_expire_seconds",
-		.data		= &dirtytime_expire_interval,
-		.maxlen		= sizeof(dirtytime_expire_interval),
-		.mode		= 0644,
-		.proc_handler	= dirtytime_interval_handler,
-		.extra1		= SYSCTL_ZERO,
-	},
-	{
-		.procname	= "swappiness",
-		.data		= &vm_swappiness,
-		.maxlen		= sizeof(vm_swappiness),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= &one_hundred,
-	},
-#ifdef CONFIG_HUGETLB_PAGE
-	{
-		.procname	= "nr_hugepages",
-		.data		= NULL,
-		.maxlen		= sizeof(unsigned long),
-		.mode		= 0644,
-		.proc_handler	= hugetlb_sysctl_handler,
-	},
-#ifdef CONFIG_NUMA
-	{
-		.procname       = "nr_hugepages_mempolicy",
-		.data           = NULL,
-		.maxlen         = sizeof(unsigned long),
-		.mode           = 0644,
-		.proc_handler   = &hugetlb_mempolicy_sysctl_handler,
-	},
-	{
-		.procname		= "numa_stat",
-		.data			= &sysctl_vm_numa_stat,
-		.maxlen			= sizeof(int),
-		.mode			= 0644,
-		.proc_handler	= sysctl_vm_numa_stat_handler,
-		.extra1			= SYSCTL_ZERO,
-		.extra2			= SYSCTL_ONE,
-	},
-#endif
-	 {
-		.procname	= "hugetlb_shm_group",
-		.data		= &sysctl_hugetlb_shm_group,
-		.maxlen		= sizeof(gid_t),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	 },
-	{
-		.procname	= "nr_overcommit_hugepages",
-		.data		= NULL,
-		.maxlen		= sizeof(unsigned long),
-		.mode		= 0644,
-		.proc_handler	= hugetlb_overcommit_handler,
-	},
-#endif
-	{
-		.procname	= "lowmem_reserve_ratio",
-		.data		= &sysctl_lowmem_reserve_ratio,
-		.maxlen		= sizeof(sysctl_lowmem_reserve_ratio),
-		.mode		= 0644,
-		.proc_handler	= lowmem_reserve_ratio_sysctl_handler,
-	},
-	{
-		.procname	= "drop_caches",
-		.data		= &sysctl_drop_caches,
-		.maxlen		= sizeof(int),
-		.mode		= 0200,
-		.proc_handler	= drop_caches_sysctl_handler,
-		.extra1		= SYSCTL_ONE,
-		.extra2		= &four,
-	},
-#ifdef CONFIG_COMPACTION
-	{
-		.procname	= "compact_memory",
-		.data		= &sysctl_compact_memory,
-		.maxlen		= sizeof(int),
-		.mode		= 0200,
-		.proc_handler	= sysctl_compaction_handler,
-	},
-	{
-		.procname	= "extfrag_threshold",
-		.data		= &sysctl_extfrag_threshold,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &min_extfrag_threshold,
-		.extra2		= &max_extfrag_threshold,
-	},
-	{
-		.procname	= "compact_unevictable_allowed",
-		.data		= &sysctl_compact_unevictable_allowed,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax_warn_RT_change,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-
-#endif /* CONFIG_COMPACTION */
-	{
-		.procname	= "min_free_kbytes",
-		.data		= &min_free_kbytes,
-		.maxlen		= sizeof(min_free_kbytes),
-		.mode		= 0644,
-		.proc_handler	= min_free_kbytes_sysctl_handler,
-		.extra1		= SYSCTL_ZERO,
-	},
-	{
-		.procname	= "watermark_boost_factor",
-		.data		= &watermark_boost_factor,
-		.maxlen		= sizeof(watermark_boost_factor),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-	},
-	{
-		.procname	= "watermark_scale_factor",
-		.data		= &watermark_scale_factor,
-		.maxlen		= sizeof(watermark_scale_factor),
-		.mode		= 0644,
-		.proc_handler	= watermark_scale_factor_sysctl_handler,
-		.extra1		= SYSCTL_ONE,
-		.extra2		= &one_thousand,
-	},
-	{
-		.procname	= "percpu_pagelist_fraction",
-		.data		= &percpu_pagelist_fraction,
-		.maxlen		= sizeof(percpu_pagelist_fraction),
-		.mode		= 0644,
-		.proc_handler	= percpu_pagelist_fraction_sysctl_handler,
-		.extra1		= SYSCTL_ZERO,
-	},
-#ifdef CONFIG_MMU
-	{
-		.procname	= "max_map_count",
-		.data		= &sysctl_max_map_count,
-		.maxlen		= sizeof(sysctl_max_map_count),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-	},
-#else
-	{
-		.procname	= "nr_trim_pages",
-		.data		= &sysctl_nr_trim_pages,
-		.maxlen		= sizeof(sysctl_nr_trim_pages),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-	},
-#endif
-	{
-		.procname	= "laptop_mode",
-		.data		= &laptop_mode,
-		.maxlen		= sizeof(laptop_mode),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "block_dump",
-		.data		= &block_dump,
-		.maxlen		= sizeof(block_dump),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-		.extra1		= SYSCTL_ZERO,
-	},
-	{
-		.procname	= "vfs_cache_pressure",
-		.data		= &sysctl_vfs_cache_pressure,
-		.maxlen		= sizeof(sysctl_vfs_cache_pressure),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-		.extra1		= SYSCTL_ZERO,
-	},
-#if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
-    defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
-	{
-		.procname	= "legacy_va_layout",
-		.data		= &sysctl_legacy_va_layout,
-		.maxlen		= sizeof(sysctl_legacy_va_layout),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-		.extra1		= SYSCTL_ZERO,
-	},
-#endif
-#ifdef CONFIG_NUMA
-	{
-		.procname	= "zone_reclaim_mode",
-		.data		= &node_reclaim_mode,
-		.maxlen		= sizeof(node_reclaim_mode),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-		.extra1		= SYSCTL_ZERO,
-	},
-	{
-		.procname	= "min_unmapped_ratio",
-		.data		= &sysctl_min_unmapped_ratio,
-		.maxlen		= sizeof(sysctl_min_unmapped_ratio),
-		.mode		= 0644,
-		.proc_handler	= sysctl_min_unmapped_ratio_sysctl_handler,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= &one_hundred,
-	},
-	{
-		.procname	= "min_slab_ratio",
-		.data		= &sysctl_min_slab_ratio,
-		.maxlen		= sizeof(sysctl_min_slab_ratio),
-		.mode		= 0644,
-		.proc_handler	= sysctl_min_slab_ratio_sysctl_handler,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= &one_hundred,
-	},
-#endif
-#ifdef CONFIG_SMP
-	{
-		.procname	= "stat_interval",
-		.data		= &sysctl_stat_interval,
-		.maxlen		= sizeof(sysctl_stat_interval),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_jiffies,
-	},
-	{
-		.procname	= "stat_refresh",
-		.data		= NULL,
-		.maxlen		= 0,
-		.mode		= 0600,
-		.proc_handler	= vmstat_refresh,
-	},
-#endif
-#ifdef CONFIG_MMU
-	{
-		.procname	= "mmap_min_addr",
-		.data		= &dac_mmap_min_addr,
-		.maxlen		= sizeof(unsigned long),
-		.mode		= 0644,
-		.proc_handler	= mmap_min_addr_handler,
-	},
-#endif
-#ifdef CONFIG_NUMA
-	{
-		.procname	= "numa_zonelist_order",
-		.data		= &numa_zonelist_order,
-		.maxlen		= NUMA_ZONELIST_ORDER_LEN,
-		.mode		= 0644,
-		.proc_handler	= numa_zonelist_order_handler,
-	},
-#endif
-#if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
-   (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
-	{
-		.procname	= "vdso_enabled",
-#ifdef CONFIG_X86_32
-		.data		= &vdso32_enabled,
-		.maxlen		= sizeof(vdso32_enabled),
-#else
-		.data		= &vdso_enabled,
-		.maxlen		= sizeof(vdso_enabled),
-#endif
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-		.extra1		= SYSCTL_ZERO,
-	},
-#endif
-#ifdef CONFIG_HIGHMEM
-	{
-		.procname	= "highmem_is_dirtyable",
-		.data		= &vm_highmem_is_dirtyable,
-		.maxlen		= sizeof(vm_highmem_is_dirtyable),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-#endif
-#ifdef CONFIG_MEMORY_FAILURE
-	{
-		.procname	= "memory_failure_early_kill",
-		.data		= &sysctl_memory_failure_early_kill,
-		.maxlen		= sizeof(sysctl_memory_failure_early_kill),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-	{
-		.procname	= "memory_failure_recovery",
-		.data		= &sysctl_memory_failure_recovery,
-		.maxlen		= sizeof(sysctl_memory_failure_recovery),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-#endif
-	{
-		.procname	= "user_reserve_kbytes",
-		.data		= &sysctl_user_reserve_kbytes,
-		.maxlen		= sizeof(sysctl_user_reserve_kbytes),
-		.mode		= 0644,
-		.proc_handler	= proc_doulongvec_minmax,
-	},
-	{
-		.procname	= "admin_reserve_kbytes",
-		.data		= &sysctl_admin_reserve_kbytes,
-		.maxlen		= sizeof(sysctl_admin_reserve_kbytes),
-		.mode		= 0644,
-		.proc_handler	= proc_doulongvec_minmax,
-	},
-#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
-	{
-		.procname	= "mmap_rnd_bits",
-		.data		= &mmap_rnd_bits,
-		.maxlen		= sizeof(mmap_rnd_bits),
-		.mode		= 0600,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= (void *)&mmap_rnd_bits_min,
-		.extra2		= (void *)&mmap_rnd_bits_max,
-	},
-#endif
-#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
-	{
-		.procname	= "mmap_rnd_compat_bits",
-		.data		= &mmap_rnd_compat_bits,
-		.maxlen		= sizeof(mmap_rnd_compat_bits),
-		.mode		= 0600,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= (void *)&mmap_rnd_compat_bits_min,
-		.extra2		= (void *)&mmap_rnd_compat_bits_max,
-	},
-#endif
-#ifdef CONFIG_USERFAULTFD
-	{
-		.procname	= "unprivileged_userfaultfd",
-		.data		= &sysctl_unprivileged_userfaultfd,
-		.maxlen		= sizeof(sysctl_unprivileged_userfaultfd),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-#endif
-	{ }
-};
-
-static struct ctl_table fs_table[] = {
-	{
-		.procname	= "inode-nr",
-		.data		= &inodes_stat,
-		.maxlen		= 2*sizeof(long),
-		.mode		= 0444,
-		.proc_handler	= proc_nr_inodes,
-	},
-	{
-		.procname	= "inode-state",
-		.data		= &inodes_stat,
-		.maxlen		= 7*sizeof(long),
-		.mode		= 0444,
-		.proc_handler	= proc_nr_inodes,
-	},
-	{
-		.procname	= "file-nr",
-		.data		= &files_stat,
-		.maxlen		= sizeof(files_stat),
-		.mode		= 0444,
-		.proc_handler	= proc_nr_files,
-	},
-	{
-		.procname	= "file-max",
-		.data		= &files_stat.max_files,
-		.maxlen		= sizeof(files_stat.max_files),
-		.mode		= 0644,
-		.proc_handler	= proc_doulongvec_minmax,
-		.extra1		= &zero_ul,
-		.extra2		= &long_max,
-	},
-	{
-		.procname	= "nr_open",
-		.data		= &sysctl_nr_open,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &sysctl_nr_open_min,
-		.extra2		= &sysctl_nr_open_max,
-	},
-	{
-		.procname	= "dentry-state",
-		.data		= &dentry_stat,
-		.maxlen		= 6*sizeof(long),
-		.mode		= 0444,
-		.proc_handler	= proc_nr_dentry,
-	},
-	{
-		.procname	= "overflowuid",
-		.data		= &fs_overflowuid,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &minolduid,
-		.extra2		= &maxolduid,
-	},
-	{
-		.procname	= "overflowgid",
-		.data		= &fs_overflowgid,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= &minolduid,
-		.extra2		= &maxolduid,
-	},
-#ifdef CONFIG_FILE_LOCKING
-	{
-		.procname	= "leases-enable",
-		.data		= &leases_enable,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#endif
-#ifdef CONFIG_DNOTIFY
-	{
-		.procname	= "dir-notify-enable",
-		.data		= &dir_notify_enable,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#endif
-#ifdef CONFIG_MMU
-#ifdef CONFIG_FILE_LOCKING
-	{
-		.procname	= "lease-break-time",
-		.data		= &lease_break_time,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec,
-	},
-#endif
-#ifdef CONFIG_AIO
-	{
-		.procname	= "aio-nr",
-		.data		= &aio_nr,
-		.maxlen		= sizeof(aio_nr),
-		.mode		= 0444,
-		.proc_handler	= proc_doulongvec_minmax,
-	},
-	{
-		.procname	= "aio-max-nr",
-		.data		= &aio_max_nr,
-		.maxlen		= sizeof(aio_max_nr),
-		.mode		= 0644,
-		.proc_handler	= proc_doulongvec_minmax,
-	},
-#endif /* CONFIG_AIO */
-#ifdef CONFIG_INOTIFY_USER
-	{
-		.procname	= "inotify",
-		.mode		= 0555,
-		.child		= inotify_table,
-	},
-#endif	
-#ifdef CONFIG_EPOLL
-	{
-		.procname	= "epoll",
-		.mode		= 0555,
-		.child		= epoll_table,
-	},
-#endif
-#endif
-	{
-		.procname	= "protected_symlinks",
-		.data		= &sysctl_protected_symlinks,
-		.maxlen		= sizeof(int),
-		.mode		= 0600,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-	{
-		.procname	= "protected_hardlinks",
-		.data		= &sysctl_protected_hardlinks,
-		.maxlen		= sizeof(int),
-		.mode		= 0600,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-	{
-		.procname	= "protected_fifos",
-		.data		= &sysctl_protected_fifos,
-		.maxlen		= sizeof(int),
-		.mode		= 0600,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= &two,
-	},
-	{
-		.procname	= "protected_regular",
-		.data		= &sysctl_protected_regular,
-		.maxlen		= sizeof(int),
-		.mode		= 0600,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= &two,
-	},
-	{
-		.procname	= "suid_dumpable",
-		.data		= &suid_dumpable,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax_coredump,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= &two,
-	},
-#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
-	{
-		.procname	= "binfmt_misc",
-		.mode		= 0555,
-		.child		= sysctl_mount_point,
-	},
-#endif
-	{
-		.procname	= "pipe-max-size",
-		.data		= &pipe_max_size,
-		.maxlen		= sizeof(pipe_max_size),
-		.mode		= 0644,
-		.proc_handler	= proc_dopipe_max_size,
-	},
-	{
-		.procname	= "pipe-user-pages-hard",
-		.data		= &pipe_user_pages_hard,
-		.maxlen		= sizeof(pipe_user_pages_hard),
-		.mode		= 0644,
-		.proc_handler	= proc_doulongvec_minmax,
-	},
-	{
-		.procname	= "pipe-user-pages-soft",
-		.data		= &pipe_user_pages_soft,
-		.maxlen		= sizeof(pipe_user_pages_soft),
-		.mode		= 0644,
-		.proc_handler	= proc_doulongvec_minmax,
-	},
-	{
-		.procname	= "mount-max",
-		.data		= &sysctl_mount_max,
-		.maxlen		= sizeof(unsigned int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec_minmax,
-		.extra1		= SYSCTL_ONE,
-	},
-	{ }
-};
-
-static struct ctl_table debug_table[] = {
-#ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
-	{
-		.procname	= "exception-trace",
-		.data		= &show_unhandled_signals,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_dointvec
-	},
-#endif
-#if defined(CONFIG_OPTPROBES)
-	{
-		.procname	= "kprobes-optimization",
-		.data		= &sysctl_kprobes_optimization,
-		.maxlen		= sizeof(int),
-		.mode		= 0644,
-		.proc_handler	= proc_kprobes_optimization_handler,
-		.extra1		= SYSCTL_ZERO,
-		.extra2		= SYSCTL_ONE,
-	},
-#endif
-	{ }
-};
-
-static struct ctl_table dev_table[] = {
-	{ }
-};
-
-int __init sysctl_init(void)
-{
-	struct ctl_table_header *hdr;
-
-	hdr = register_sysctl_table(sysctl_base_table);
-	kmemleak_not_leak(hdr);
-	return 0;
-}
-
-#endif /* CONFIG_SYSCTL */
-
-/*
- * /proc/sys support
- */
-
+#endif /* CONFIG_SYSCTL */
+
+/*
+ * /proc/sys support
+ */
+
 #ifdef CONFIG_PROC_SYSCTL
 
 static int _proc_do_string(char *data, int maxlen, int write,
@@ -3301,101 +1571,1794 @@ int proc_dostring(struct ctl_table *table, int write,
 	return -ENOSYS;
 }
 
-int proc_dointvec(struct ctl_table *table, int write,
-		  void __user *buffer, size_t *lenp, loff_t *ppos)
+int proc_dointvec(struct ctl_table *table, int write,
+		  void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	return -ENOSYS;
+}
+
+int proc_douintvec(struct ctl_table *table, int write,
+		  void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	return -ENOSYS;
+}
+
+int proc_dointvec_minmax(struct ctl_table *table, int write,
+		    void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	return -ENOSYS;
+}
+
+int proc_douintvec_minmax(struct ctl_table *table, int write,
+			  void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	return -ENOSYS;
+}
+
+int proc_dointvec_jiffies(struct ctl_table *table, int write,
+		    void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	return -ENOSYS;
+}
+
+int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
+		    void __user *buffer, size_t *lenp, loff_t *ppos)
+{
+	return -ENOSYS;
+}
+
+int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
+			     void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	return -ENOSYS;
 }
 
-int proc_douintvec(struct ctl_table *table, int write,
-		  void __user *buffer, size_t *lenp, loff_t *ppos)
+int proc_doulongvec_minmax(struct ctl_table *table, int write,
+		    void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	return -ENOSYS;
 }
 
-int proc_dointvec_minmax(struct ctl_table *table, int write,
-		    void __user *buffer, size_t *lenp, loff_t *ppos)
+int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
+				      void __user *buffer,
+				      size_t *lenp, loff_t *ppos)
 {
-	return -ENOSYS;
+    return -ENOSYS;
 }
 
-int proc_douintvec_minmax(struct ctl_table *table, int write,
-			  void __user *buffer, size_t *lenp, loff_t *ppos)
+int proc_do_large_bitmap(struct ctl_table *table, int write,
+			 void __user *buffer, size_t *lenp, loff_t *ppos)
 {
 	return -ENOSYS;
 }
 
-int proc_dointvec_jiffies(struct ctl_table *table, int write,
-		    void __user *buffer, size_t *lenp, loff_t *ppos)
+#endif /* CONFIG_PROC_SYSCTL */
+
+#if defined(CONFIG_SYSCTL)
+int proc_do_static_key(struct ctl_table *table, int write,
+		       void __user *buffer, size_t *lenp,
+		       loff_t *ppos)
 {
-	return -ENOSYS;
+	struct static_key *key = (struct static_key *)table->data;
+	static DEFINE_MUTEX(static_key_mutex);
+	int val, ret;
+	struct ctl_table tmp = {
+		.data   = &val,
+		.maxlen = sizeof(val),
+		.mode   = table->mode,
+		.extra1 = SYSCTL_ZERO,
+		.extra2 = SYSCTL_ONE,
+	};
+
+	if (write && !capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	mutex_lock(&static_key_mutex);
+	val = static_key_enabled(key);
+	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+	if (write && !ret) {
+		if (val)
+			static_key_enable(key);
+		else
+			static_key_disable(key);
+	}
+	mutex_unlock(&static_key_mutex);
+	return ret;
 }
 
-int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
-		    void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-	return -ENOSYS;
-}
+static struct ctl_table kern_table[] = {
+	{
+		.procname	= "sched_child_runs_first",
+		.data		= &sysctl_sched_child_runs_first,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#ifdef CONFIG_SCHED_DEBUG
+	{
+		.procname	= "sched_min_granularity_ns",
+		.data		= &sysctl_sched_min_granularity,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= sched_proc_update_handler,
+		.extra1		= &min_sched_granularity_ns,
+		.extra2		= &max_sched_granularity_ns,
+	},
+	{
+		.procname	= "sched_latency_ns",
+		.data		= &sysctl_sched_latency,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= sched_proc_update_handler,
+		.extra1		= &min_sched_granularity_ns,
+		.extra2		= &max_sched_granularity_ns,
+	},
+	{
+		.procname	= "sched_wakeup_granularity_ns",
+		.data		= &sysctl_sched_wakeup_granularity,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= sched_proc_update_handler,
+		.extra1		= &min_wakeup_granularity_ns,
+		.extra2		= &max_wakeup_granularity_ns,
+	},
+#ifdef CONFIG_SMP
+	{
+		.procname	= "sched_tunable_scaling",
+		.data		= &sysctl_sched_tunable_scaling,
+		.maxlen		= sizeof(enum sched_tunable_scaling),
+		.mode		= 0644,
+		.proc_handler	= sched_proc_update_handler,
+		.extra1		= &min_sched_tunable_scaling,
+		.extra2		= &max_sched_tunable_scaling,
+	},
+	{
+		.procname	= "sched_migration_cost_ns",
+		.data		= &sysctl_sched_migration_cost,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "sched_nr_migrate",
+		.data		= &sysctl_sched_nr_migrate,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#ifdef CONFIG_SCHEDSTATS
+	{
+		.procname	= "sched_schedstats",
+		.data		= NULL,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= sysctl_schedstats,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+#endif /* CONFIG_SCHEDSTATS */
+#endif /* CONFIG_SMP */
+#ifdef CONFIG_NUMA_BALANCING
+	{
+		.procname	= "numa_balancing_scan_delay_ms",
+		.data		= &sysctl_numa_balancing_scan_delay,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "numa_balancing_scan_period_min_ms",
+		.data		= &sysctl_numa_balancing_scan_period_min,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "numa_balancing_scan_period_max_ms",
+		.data		= &sysctl_numa_balancing_scan_period_max,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "numa_balancing_scan_size_mb",
+		.data		= &sysctl_numa_balancing_scan_size,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ONE,
+	},
+	{
+		.procname	= "numa_balancing",
+		.data		= NULL, /* filled in by handler */
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= sysctl_numa_balancing,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+#endif /* CONFIG_NUMA_BALANCING */
+#endif /* CONFIG_SCHED_DEBUG */
+	{
+		.procname	= "sched_rt_period_us",
+		.data		= &sysctl_sched_rt_period,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= sched_rt_handler,
+	},
+	{
+		.procname	= "sched_rt_runtime_us",
+		.data		= &sysctl_sched_rt_runtime,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= sched_rt_handler,
+	},
+	{
+		.procname	= "sched_rr_timeslice_ms",
+		.data		= &sysctl_sched_rr_timeslice,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= sched_rr_handler,
+	},
+#ifdef CONFIG_UCLAMP_TASK
+	{
+		.procname	= "sched_util_clamp_min",
+		.data		= &sysctl_sched_uclamp_util_min,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= sysctl_sched_uclamp_handler,
+	},
+	{
+		.procname	= "sched_util_clamp_max",
+		.data		= &sysctl_sched_uclamp_util_max,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= sysctl_sched_uclamp_handler,
+	},
+#endif
+#ifdef CONFIG_SCHED_AUTOGROUP
+	{
+		.procname	= "sched_autogroup_enabled",
+		.data		= &sysctl_sched_autogroup_enabled,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+#endif
+#ifdef CONFIG_CFS_BANDWIDTH
+	{
+		.procname	= "sched_cfs_bandwidth_slice_us",
+		.data		= &sysctl_sched_cfs_bandwidth_slice,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ONE,
+	},
+#endif
+#if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
+	{
+		.procname	= "sched_energy_aware",
+		.data		= &sysctl_sched_energy_aware,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= sched_energy_aware_handler,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+#endif
+#ifdef CONFIG_PROVE_LOCKING
+	{
+		.procname	= "prove_locking",
+		.data		= &prove_locking,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
+#ifdef CONFIG_LOCK_STAT
+	{
+		.procname	= "lock_stat",
+		.data		= &lock_stat,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
+	{
+		.procname	= "panic",
+		.data		= &panic_timeout,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#ifdef CONFIG_COREDUMP
+	{
+		.procname	= "core_uses_pid",
+		.data		= &core_uses_pid,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "core_pattern",
+		.data		= core_pattern,
+		.maxlen		= CORENAME_MAX_SIZE,
+		.mode		= 0644,
+		.proc_handler	= proc_dostring_coredump,
+	},
+	{
+		.procname	= "core_pipe_limit",
+		.data		= &core_pipe_limit,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
+#ifdef CONFIG_PROC_SYSCTL
+	{
+		.procname	= "tainted",
+		.maxlen 	= sizeof(long),
+		.mode		= 0644,
+		.proc_handler	= proc_taint,
+	},
+	{
+		.procname	= "sysctl_writes_strict",
+		.data		= &sysctl_writes_strict,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &neg_one,
+		.extra2		= SYSCTL_ONE,
+	},
+#endif
+#ifdef CONFIG_LATENCYTOP
+	{
+		.procname	= "latencytop",
+		.data		= &latencytop_enabled,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= sysctl_latencytop,
+	},
+#endif
+#ifdef CONFIG_BLK_DEV_INITRD
+	{
+		.procname	= "real-root-dev",
+		.data		= &real_root_dev,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
+	{
+		.procname	= "print-fatal-signals",
+		.data		= &print_fatal_signals,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#ifdef CONFIG_SPARC
+	{
+		.procname	= "reboot-cmd",
+		.data		= reboot_command,
+		.maxlen		= 256,
+		.mode		= 0644,
+		.proc_handler	= proc_dostring,
+	},
+	{
+		.procname	= "stop-a",
+		.data		= &stop_a_enabled,
+		.maxlen		= sizeof (int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "scons-poweroff",
+		.data		= &scons_pwroff,
+		.maxlen		= sizeof (int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
+#ifdef CONFIG_SPARC64
+	{
+		.procname	= "tsb-ratio",
+		.data		= &sysctl_tsb_ratio,
+		.maxlen		= sizeof (int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
+#ifdef CONFIG_PARISC
+	{
+		.procname	= "soft-power",
+		.data		= &pwrsw_enabled,
+		.maxlen		= sizeof (int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
+#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_ALLOW
+	{
+		.procname	= "unaligned-trap",
+		.data		= &unaligned_enabled,
+		.maxlen		= sizeof (int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
+	{
+		.procname	= "ctrl-alt-del",
+		.data		= &C_A_D,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#ifdef CONFIG_FUNCTION_TRACER
+	{
+		.procname	= "ftrace_enabled",
+		.data		= &ftrace_enabled,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= ftrace_enable_sysctl,
+	},
+#endif
+#ifdef CONFIG_STACK_TRACER
+	{
+		.procname	= "stack_tracer_enabled",
+		.data		= &stack_tracer_enabled,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= stack_trace_sysctl,
+	},
+#endif
+#ifdef CONFIG_TRACING
+	{
+		.procname	= "ftrace_dump_on_oops",
+		.data		= &ftrace_dump_on_oops,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "traceoff_on_warning",
+		.data		= &__disable_trace_on_warning,
+		.maxlen		= sizeof(__disable_trace_on_warning),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "tracepoint_printk",
+		.data		= &tracepoint_printk,
+		.maxlen		= sizeof(tracepoint_printk),
+		.mode		= 0644,
+		.proc_handler	= tracepoint_printk_sysctl,
+	},
+#endif
+#ifdef CONFIG_KEXEC_CORE
+	{
+		.procname	= "kexec_load_disabled",
+		.data		= &kexec_load_disabled,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		/* only handle a transition from default "0" to "1" */
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ONE,
+		.extra2		= SYSCTL_ONE,
+	},
+#endif
+#ifdef CONFIG_MODULES
+	{
+		.procname	= "modprobe",
+		.data		= &modprobe_path,
+		.maxlen		= KMOD_PATH_LEN,
+		.mode		= 0644,
+		.proc_handler	= proc_dostring,
+	},
+	{
+		.procname	= "modules_disabled",
+		.data		= &modules_disabled,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		/* only handle a transition from default "0" to "1" */
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ONE,
+		.extra2		= SYSCTL_ONE,
+	},
+#endif
+#ifdef CONFIG_UEVENT_HELPER
+	{
+		.procname	= "hotplug",
+		.data		= &uevent_helper,
+		.maxlen		= UEVENT_HELPER_PATH_LEN,
+		.mode		= 0644,
+		.proc_handler	= proc_dostring,
+	},
+#endif
+#ifdef CONFIG_CHR_DEV_SG
+	{
+		.procname	= "sg-big-buff",
+		.data		= &sg_big_buff,
+		.maxlen		= sizeof (int),
+		.mode		= 0444,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
+#ifdef CONFIG_BSD_PROCESS_ACCT
+	{
+		.procname	= "acct",
+		.data		= &acct_parm,
+		.maxlen		= 3*sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
+#ifdef CONFIG_MAGIC_SYSRQ
+	{
+		.procname	= "sysrq",
+		.data		= NULL,
+		.maxlen		= sizeof (int),
+		.mode		= 0644,
+		.proc_handler	= sysrq_sysctl_handler,
+	},
+#endif
+#ifdef CONFIG_PROC_SYSCTL
+	{
+		.procname	= "cad_pid",
+		.data		= NULL,
+		.maxlen		= sizeof (int),
+		.mode		= 0600,
+		.proc_handler	= proc_do_cad_pid,
+	},
+#endif
+	{
+		.procname	= "threads-max",
+		.data		= NULL,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= sysctl_max_threads,
+	},
+	{
+		.procname	= "random",
+		.mode		= 0555,
+		.child		= random_table,
+	},
+	{
+		.procname	= "usermodehelper",
+		.mode		= 0555,
+		.child		= usermodehelper_table,
+	},
+#ifdef CONFIG_FW_LOADER_USER_HELPER
+	{
+		.procname	= "firmware_config",
+		.mode		= 0555,
+		.child		= firmware_config_table,
+	},
+#endif
+	{
+		.procname	= "overflowuid",
+		.data		= &overflowuid,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &minolduid,
+		.extra2		= &maxolduid,
+	},
+	{
+		.procname	= "overflowgid",
+		.data		= &overflowgid,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &minolduid,
+		.extra2		= &maxolduid,
+	},
+#ifdef CONFIG_S390
+	{
+		.procname	= "userprocess_debug",
+		.data		= &show_unhandled_signals,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
+	{
+		.procname	= "pid_max",
+		.data		= &pid_max,
+		.maxlen		= sizeof (int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &pid_max_min,
+		.extra2		= &pid_max_max,
+	},
+	{
+		.procname	= "panic_on_oops",
+		.data		= &panic_on_oops,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "panic_print",
+		.data		= &panic_print,
+		.maxlen		= sizeof(unsigned long),
+		.mode		= 0644,
+		.proc_handler	= proc_doulongvec_minmax,
+	},
+#if defined CONFIG_PRINTK
+	{
+		.procname	= "printk",
+		.data		= &console_loglevel,
+		.maxlen		= 4*sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "printk_ratelimit",
+		.data		= &printk_ratelimit_state.interval,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	{
+		.procname	= "printk_ratelimit_burst",
+		.data		= &printk_ratelimit_state.burst,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "printk_delay",
+		.data		= &printk_delay_msec,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= &ten_thousand,
+	},
+	{
+		.procname	= "printk_devkmsg",
+		.data		= devkmsg_log_str,
+		.maxlen		= DEVKMSG_STR_MAX_SIZE,
+		.mode		= 0644,
+		.proc_handler	= devkmsg_sysctl_set_loglvl,
+	},
+	{
+		.procname	= "dmesg_restrict",
+		.data		= &dmesg_restrict,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax_sysadmin,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+	{
+		.procname	= "kptr_restrict",
+		.data		= &kptr_restrict,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax_sysadmin,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= &two,
+	},
+#endif
+	{
+		.procname	= "ngroups_max",
+		.data		= &ngroups_max,
+		.maxlen		= sizeof (int),
+		.mode		= 0444,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "cap_last_cap",
+		.data		= (void *)&cap_last_cap,
+		.maxlen		= sizeof(int),
+		.mode		= 0444,
+		.proc_handler	= proc_dointvec,
+	},
+#if defined(CONFIG_LOCKUP_DETECTOR)
+	{
+		.procname       = "watchdog",
+		.data		= &watchdog_user_enabled,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler   = proc_watchdog,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+	{
+		.procname	= "watchdog_thresh",
+		.data		= &watchdog_thresh,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_watchdog_thresh,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= &sixty,
+	},
+	{
+		.procname       = "nmi_watchdog",
+		.data		= &nmi_watchdog_user_enabled,
+		.maxlen		= sizeof(int),
+		.mode		= NMI_WATCHDOG_SYSCTL_PERM,
+		.proc_handler   = proc_nmi_watchdog,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+	{
+		.procname	= "watchdog_cpumask",
+		.data		= &watchdog_cpumask_bits,
+		.maxlen		= NR_CPUS,
+		.mode		= 0644,
+		.proc_handler	= proc_watchdog_cpumask,
+	},
+#ifdef CONFIG_SOFTLOCKUP_DETECTOR
+	{
+		.procname       = "soft_watchdog",
+		.data		= &soft_watchdog_user_enabled,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler   = proc_soft_watchdog,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+	{
+		.procname	= "softlockup_panic",
+		.data		= &softlockup_panic,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+#ifdef CONFIG_SMP
+	{
+		.procname	= "softlockup_all_cpu_backtrace",
+		.data		= &sysctl_softlockup_all_cpu_backtrace,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+#endif /* CONFIG_SMP */
+#endif
+#ifdef CONFIG_HARDLOCKUP_DETECTOR
+	{
+		.procname	= "hardlockup_panic",
+		.data		= &hardlockup_panic,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+#ifdef CONFIG_SMP
+	{
+		.procname	= "hardlockup_all_cpu_backtrace",
+		.data		= &sysctl_hardlockup_all_cpu_backtrace,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+#endif /* CONFIG_SMP */
+#endif
+#endif
+
+#if defined(CONFIG_X86_LOCAL_APIC) && defined(CONFIG_X86)
+	{
+		.procname       = "unknown_nmi_panic",
+		.data           = &unknown_nmi_panic,
+		.maxlen         = sizeof (int),
+		.mode           = 0644,
+		.proc_handler   = proc_dointvec,
+	},
+#endif
+#if defined(CONFIG_X86)
+	{
+		.procname	= "panic_on_unrecovered_nmi",
+		.data		= &panic_on_unrecovered_nmi,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "panic_on_io_nmi",
+		.data		= &panic_on_io_nmi,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#ifdef CONFIG_DEBUG_STACKOVERFLOW
+	{
+		.procname	= "panic_on_stackoverflow",
+		.data		= &sysctl_panic_on_stackoverflow,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
+	{
+		.procname	= "bootloader_type",
+		.data		= &bootloader_type,
+		.maxlen		= sizeof (int),
+		.mode		= 0444,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "bootloader_version",
+		.data		= &bootloader_version,
+		.maxlen		= sizeof (int),
+		.mode		= 0444,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "io_delay_type",
+		.data		= &io_delay_type,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
+#if defined(CONFIG_MMU)
+	{
+		.procname	= "randomize_va_space",
+		.data		= &randomize_va_space,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
+#if defined(CONFIG_S390) && defined(CONFIG_SMP)
+	{
+		.procname	= "spin_retry",
+		.data		= &spin_retry,
+		.maxlen		= sizeof (int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
+#if	defined(CONFIG_ACPI_SLEEP) && defined(CONFIG_X86)
+	{
+		.procname	= "acpi_video_flags",
+		.data		= &acpi_realmode_flags,
+		.maxlen		= sizeof (unsigned long),
+		.mode		= 0644,
+		.proc_handler	= proc_doulongvec_minmax,
+	},
+#endif
+#ifdef CONFIG_SYSCTL_ARCH_UNALIGN_NO_WARN
+	{
+		.procname	= "ignore-unaligned-usertrap",
+		.data		= &no_unaligned_warning,
+		.maxlen		= sizeof (int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
+#ifdef CONFIG_IA64
+	{
+		.procname	= "unaligned-dump-stack",
+		.data		= &unaligned_dump_stack,
+		.maxlen		= sizeof (int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
+#ifdef CONFIG_DETECT_HUNG_TASK
+	{
+		.procname	= "hung_task_panic",
+		.data		= &sysctl_hung_task_panic,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+	{
+		.procname	= "hung_task_check_count",
+		.data		= &sysctl_hung_task_check_count,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+	},
+	{
+		.procname	= "hung_task_timeout_secs",
+		.data		= &sysctl_hung_task_timeout_secs,
+		.maxlen		= sizeof(unsigned long),
+		.mode		= 0644,
+		.proc_handler	= proc_dohung_task_timeout_secs,
+		.extra2		= &hung_task_timeout_max,
+	},
+	{
+		.procname	= "hung_task_check_interval_secs",
+		.data		= &sysctl_hung_task_check_interval_secs,
+		.maxlen		= sizeof(unsigned long),
+		.mode		= 0644,
+		.proc_handler	= proc_dohung_task_timeout_secs,
+		.extra2		= &hung_task_timeout_max,
+	},
+	{
+		.procname	= "hung_task_warnings",
+		.data		= &sysctl_hung_task_warnings,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &neg_one,
+	},
+#endif
+#ifdef CONFIG_RT_MUTEXES
+	{
+		.procname	= "max_lock_depth",
+		.data		= &max_lock_depth,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
+	{
+		.procname	= "poweroff_cmd",
+		.data		= &poweroff_cmd,
+		.maxlen		= POWEROFF_CMD_PATH_LEN,
+		.mode		= 0644,
+		.proc_handler	= proc_dostring,
+	},
+#ifdef CONFIG_KEYS
+	{
+		.procname	= "keys",
+		.mode		= 0555,
+		.child		= key_sysctls,
+	},
+#endif
+#ifdef CONFIG_PERF_EVENTS
+	/*
+	 * User-space scripts rely on the existence of this file
+	 * as a feature check for perf_events being enabled.
+	 *
+	 * So it's an ABI, do not remove!
+	 */
+	{
+		.procname	= "perf_event_paranoid",
+		.data		= &sysctl_perf_event_paranoid,
+		.maxlen		= sizeof(sysctl_perf_event_paranoid),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "perf_event_mlock_kb",
+		.data		= &sysctl_perf_event_mlock,
+		.maxlen		= sizeof(sysctl_perf_event_mlock),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "perf_event_max_sample_rate",
+		.data		= &sysctl_perf_event_sample_rate,
+		.maxlen		= sizeof(sysctl_perf_event_sample_rate),
+		.mode		= 0644,
+		.proc_handler	= perf_proc_update_handler,
+		.extra1		= SYSCTL_ONE,
+	},
+	{
+		.procname	= "perf_cpu_time_max_percent",
+		.data		= &sysctl_perf_cpu_time_max_percent,
+		.maxlen		= sizeof(sysctl_perf_cpu_time_max_percent),
+		.mode		= 0644,
+		.proc_handler	= perf_cpu_time_max_percent_handler,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= &one_hundred,
+	},
+	{
+		.procname	= "perf_event_max_stack",
+		.data		= &sysctl_perf_event_max_stack,
+		.maxlen		= sizeof(sysctl_perf_event_max_stack),
+		.mode		= 0644,
+		.proc_handler	= perf_event_max_stack_handler,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= &six_hundred_forty_kb,
+	},
+	{
+		.procname	= "perf_event_max_contexts_per_stack",
+		.data		= &sysctl_perf_event_max_contexts_per_stack,
+		.maxlen		= sizeof(sysctl_perf_event_max_contexts_per_stack),
+		.mode		= 0644,
+		.proc_handler	= perf_event_max_stack_handler,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= &one_thousand,
+	},
+#endif
+	{
+		.procname	= "panic_on_warn",
+		.data		= &panic_on_warn,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+#if defined(CONFIG_SMP) && defined(CONFIG_NO_HZ_COMMON)
+	{
+		.procname	= "timer_migration",
+		.data		= &sysctl_timer_migration,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= timer_migration_handler,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+#endif
+#ifdef CONFIG_BPF_SYSCALL
+	{
+		.procname	= "unprivileged_bpf_disabled",
+		.data		= &sysctl_unprivileged_bpf_disabled,
+		.maxlen		= sizeof(sysctl_unprivileged_bpf_disabled),
+		.mode		= 0644,
+		/* only handle a transition from default "0" to "1" */
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ONE,
+		.extra2		= SYSCTL_ONE,
+	},
+	{
+		.procname	= "bpf_stats_enabled",
+		.data		= &bpf_stats_enabled_key.key,
+		.maxlen		= sizeof(bpf_stats_enabled_key),
+		.mode		= 0644,
+		.proc_handler	= proc_do_static_key,
+	},
+#endif
+#if defined(CONFIG_TREE_RCU)
+	{
+		.procname	= "panic_on_rcu_stall",
+		.data		= &sysctl_panic_on_rcu_stall,
+		.maxlen		= sizeof(sysctl_panic_on_rcu_stall),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+#endif
+#ifdef CONFIG_STACKLEAK_RUNTIME_DISABLE
+	{
+		.procname	= "stack_erasing",
+		.data		= NULL,
+		.maxlen		= sizeof(int),
+		.mode		= 0600,
+		.proc_handler	= stack_erasing_sysctl,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+#endif
+	{ }
+};
 
-int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
-			     void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-	return -ENOSYS;
-}
+static struct ctl_table vm_table[] = {
+	{
+		.procname	= "overcommit_memory",
+		.data		= &sysctl_overcommit_memory,
+		.maxlen		= sizeof(sysctl_overcommit_memory),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= &two,
+	},
+	{
+		.procname	= "panic_on_oom",
+		.data		= &sysctl_panic_on_oom,
+		.maxlen		= sizeof(sysctl_panic_on_oom),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= &two,
+	},
+	{
+		.procname	= "oom_kill_allocating_task",
+		.data		= &sysctl_oom_kill_allocating_task,
+		.maxlen		= sizeof(sysctl_oom_kill_allocating_task),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "oom_dump_tasks",
+		.data		= &sysctl_oom_dump_tasks,
+		.maxlen		= sizeof(sysctl_oom_dump_tasks),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+	{
+		.procname	= "overcommit_ratio",
+		.data		= &sysctl_overcommit_ratio,
+		.maxlen		= sizeof(sysctl_overcommit_ratio),
+		.mode		= 0644,
+		.proc_handler	= overcommit_ratio_handler,
+	},
+	{
+		.procname	= "overcommit_kbytes",
+		.data		= &sysctl_overcommit_kbytes,
+		.maxlen		= sizeof(sysctl_overcommit_kbytes),
+		.mode		= 0644,
+		.proc_handler	= overcommit_kbytes_handler,
+	},
+	{
+		.procname	= "page-cluster",
+		.data		= &page_cluster,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+	},
+	{
+		.procname	= "dirty_background_ratio",
+		.data		= &dirty_background_ratio,
+		.maxlen		= sizeof(dirty_background_ratio),
+		.mode		= 0644,
+		.proc_handler	= dirty_background_ratio_handler,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= &one_hundred,
+	},
+	{
+		.procname	= "dirty_background_bytes",
+		.data		= &dirty_background_bytes,
+		.maxlen		= sizeof(dirty_background_bytes),
+		.mode		= 0644,
+		.proc_handler	= dirty_background_bytes_handler,
+		.extra1		= &one_ul,
+	},
+	{
+		.procname	= "dirty_ratio",
+		.data		= &vm_dirty_ratio,
+		.maxlen		= sizeof(vm_dirty_ratio),
+		.mode		= 0644,
+		.proc_handler	= dirty_ratio_handler,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= &one_hundred,
+	},
+	{
+		.procname	= "dirty_bytes",
+		.data		= &vm_dirty_bytes,
+		.maxlen		= sizeof(vm_dirty_bytes),
+		.mode		= 0644,
+		.proc_handler	= dirty_bytes_handler,
+		.extra1		= &dirty_bytes_min,
+	},
+	{
+		.procname	= "dirty_writeback_centisecs",
+		.data		= &dirty_writeback_interval,
+		.maxlen		= sizeof(dirty_writeback_interval),
+		.mode		= 0644,
+		.proc_handler	= dirty_writeback_centisecs_handler,
+	},
+	{
+		.procname	= "dirty_expire_centisecs",
+		.data		= &dirty_expire_interval,
+		.maxlen		= sizeof(dirty_expire_interval),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+	},
+	{
+		.procname	= "dirtytime_expire_seconds",
+		.data		= &dirtytime_expire_interval,
+		.maxlen		= sizeof(dirtytime_expire_interval),
+		.mode		= 0644,
+		.proc_handler	= dirtytime_interval_handler,
+		.extra1		= SYSCTL_ZERO,
+	},
+	{
+		.procname	= "swappiness",
+		.data		= &vm_swappiness,
+		.maxlen		= sizeof(vm_swappiness),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= &one_hundred,
+	},
+#ifdef CONFIG_HUGETLB_PAGE
+	{
+		.procname	= "nr_hugepages",
+		.data		= NULL,
+		.maxlen		= sizeof(unsigned long),
+		.mode		= 0644,
+		.proc_handler	= hugetlb_sysctl_handler,
+	},
+#ifdef CONFIG_NUMA
+	{
+		.procname       = "nr_hugepages_mempolicy",
+		.data           = NULL,
+		.maxlen         = sizeof(unsigned long),
+		.mode           = 0644,
+		.proc_handler   = &hugetlb_mempolicy_sysctl_handler,
+	},
+	{
+		.procname		= "numa_stat",
+		.data			= &sysctl_vm_numa_stat,
+		.maxlen			= sizeof(int),
+		.mode			= 0644,
+		.proc_handler	= sysctl_vm_numa_stat_handler,
+		.extra1			= SYSCTL_ZERO,
+		.extra2			= SYSCTL_ONE,
+	},
+#endif
+	 {
+		.procname	= "hugetlb_shm_group",
+		.data		= &sysctl_hugetlb_shm_group,
+		.maxlen		= sizeof(gid_t),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	 },
+	{
+		.procname	= "nr_overcommit_hugepages",
+		.data		= NULL,
+		.maxlen		= sizeof(unsigned long),
+		.mode		= 0644,
+		.proc_handler	= hugetlb_overcommit_handler,
+	},
+#endif
+	{
+		.procname	= "lowmem_reserve_ratio",
+		.data		= &sysctl_lowmem_reserve_ratio,
+		.maxlen		= sizeof(sysctl_lowmem_reserve_ratio),
+		.mode		= 0644,
+		.proc_handler	= lowmem_reserve_ratio_sysctl_handler,
+	},
+	{
+		.procname	= "drop_caches",
+		.data		= &sysctl_drop_caches,
+		.maxlen		= sizeof(int),
+		.mode		= 0200,
+		.proc_handler	= drop_caches_sysctl_handler,
+		.extra1		= SYSCTL_ONE,
+		.extra2		= &four,
+	},
+#ifdef CONFIG_COMPACTION
+	{
+		.procname	= "compact_memory",
+		.data		= &sysctl_compact_memory,
+		.maxlen		= sizeof(int),
+		.mode		= 0200,
+		.proc_handler	= sysctl_compaction_handler,
+	},
+	{
+		.procname	= "extfrag_threshold",
+		.data		= &sysctl_extfrag_threshold,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &min_extfrag_threshold,
+		.extra2		= &max_extfrag_threshold,
+	},
+	{
+		.procname	= "compact_unevictable_allowed",
+		.data		= &sysctl_compact_unevictable_allowed,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax_warn_RT_change,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+
+#endif /* CONFIG_COMPACTION */
+	{
+		.procname	= "min_free_kbytes",
+		.data		= &min_free_kbytes,
+		.maxlen		= sizeof(min_free_kbytes),
+		.mode		= 0644,
+		.proc_handler	= min_free_kbytes_sysctl_handler,
+		.extra1		= SYSCTL_ZERO,
+	},
+	{
+		.procname	= "watermark_boost_factor",
+		.data		= &watermark_boost_factor,
+		.maxlen		= sizeof(watermark_boost_factor),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+	},
+	{
+		.procname	= "watermark_scale_factor",
+		.data		= &watermark_scale_factor,
+		.maxlen		= sizeof(watermark_scale_factor),
+		.mode		= 0644,
+		.proc_handler	= watermark_scale_factor_sysctl_handler,
+		.extra1		= SYSCTL_ONE,
+		.extra2		= &one_thousand,
+	},
+	{
+		.procname	= "percpu_pagelist_fraction",
+		.data		= &percpu_pagelist_fraction,
+		.maxlen		= sizeof(percpu_pagelist_fraction),
+		.mode		= 0644,
+		.proc_handler	= percpu_pagelist_fraction_sysctl_handler,
+		.extra1		= SYSCTL_ZERO,
+	},
+#ifdef CONFIG_MMU
+	{
+		.procname	= "max_map_count",
+		.data		= &sysctl_max_map_count,
+		.maxlen		= sizeof(sysctl_max_map_count),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+	},
+#else
+	{
+		.procname	= "nr_trim_pages",
+		.data		= &sysctl_nr_trim_pages,
+		.maxlen		= sizeof(sysctl_nr_trim_pages),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+	},
+#endif
+	{
+		.procname	= "laptop_mode",
+		.data		= &laptop_mode,
+		.maxlen		= sizeof(laptop_mode),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	{
+		.procname	= "block_dump",
+		.data		= &block_dump,
+		.maxlen		= sizeof(block_dump),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+		.extra1		= SYSCTL_ZERO,
+	},
+	{
+		.procname	= "vfs_cache_pressure",
+		.data		= &sysctl_vfs_cache_pressure,
+		.maxlen		= sizeof(sysctl_vfs_cache_pressure),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+		.extra1		= SYSCTL_ZERO,
+	},
+#if defined(HAVE_ARCH_PICK_MMAP_LAYOUT) || \
+    defined(CONFIG_ARCH_WANT_DEFAULT_TOPDOWN_MMAP_LAYOUT)
+	{
+		.procname	= "legacy_va_layout",
+		.data		= &sysctl_legacy_va_layout,
+		.maxlen		= sizeof(sysctl_legacy_va_layout),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+		.extra1		= SYSCTL_ZERO,
+	},
+#endif
+#ifdef CONFIG_NUMA
+	{
+		.procname	= "zone_reclaim_mode",
+		.data		= &node_reclaim_mode,
+		.maxlen		= sizeof(node_reclaim_mode),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+		.extra1		= SYSCTL_ZERO,
+	},
+	{
+		.procname	= "min_unmapped_ratio",
+		.data		= &sysctl_min_unmapped_ratio,
+		.maxlen		= sizeof(sysctl_min_unmapped_ratio),
+		.mode		= 0644,
+		.proc_handler	= sysctl_min_unmapped_ratio_sysctl_handler,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= &one_hundred,
+	},
+	{
+		.procname	= "min_slab_ratio",
+		.data		= &sysctl_min_slab_ratio,
+		.maxlen		= sizeof(sysctl_min_slab_ratio),
+		.mode		= 0644,
+		.proc_handler	= sysctl_min_slab_ratio_sysctl_handler,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= &one_hundred,
+	},
+#endif
+#ifdef CONFIG_SMP
+	{
+		.procname	= "stat_interval",
+		.data		= &sysctl_stat_interval,
+		.maxlen		= sizeof(sysctl_stat_interval),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_jiffies,
+	},
+	{
+		.procname	= "stat_refresh",
+		.data		= NULL,
+		.maxlen		= 0,
+		.mode		= 0600,
+		.proc_handler	= vmstat_refresh,
+	},
+#endif
+#ifdef CONFIG_MMU
+	{
+		.procname	= "mmap_min_addr",
+		.data		= &dac_mmap_min_addr,
+		.maxlen		= sizeof(unsigned long),
+		.mode		= 0644,
+		.proc_handler	= mmap_min_addr_handler,
+	},
+#endif
+#ifdef CONFIG_NUMA
+	{
+		.procname	= "numa_zonelist_order",
+		.data		= &numa_zonelist_order,
+		.maxlen		= NUMA_ZONELIST_ORDER_LEN,
+		.mode		= 0644,
+		.proc_handler	= numa_zonelist_order_handler,
+	},
+#endif
+#if (defined(CONFIG_X86_32) && !defined(CONFIG_UML))|| \
+   (defined(CONFIG_SUPERH) && defined(CONFIG_VSYSCALL))
+	{
+		.procname	= "vdso_enabled",
+#ifdef CONFIG_X86_32
+		.data		= &vdso32_enabled,
+		.maxlen		= sizeof(vdso32_enabled),
+#else
+		.data		= &vdso_enabled,
+		.maxlen		= sizeof(vdso_enabled),
+#endif
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+		.extra1		= SYSCTL_ZERO,
+	},
+#endif
+#ifdef CONFIG_HIGHMEM
+	{
+		.procname	= "highmem_is_dirtyable",
+		.data		= &vm_highmem_is_dirtyable,
+		.maxlen		= sizeof(vm_highmem_is_dirtyable),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+#endif
+#ifdef CONFIG_MEMORY_FAILURE
+	{
+		.procname	= "memory_failure_early_kill",
+		.data		= &sysctl_memory_failure_early_kill,
+		.maxlen		= sizeof(sysctl_memory_failure_early_kill),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+	{
+		.procname	= "memory_failure_recovery",
+		.data		= &sysctl_memory_failure_recovery,
+		.maxlen		= sizeof(sysctl_memory_failure_recovery),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+#endif
+	{
+		.procname	= "user_reserve_kbytes",
+		.data		= &sysctl_user_reserve_kbytes,
+		.maxlen		= sizeof(sysctl_user_reserve_kbytes),
+		.mode		= 0644,
+		.proc_handler	= proc_doulongvec_minmax,
+	},
+	{
+		.procname	= "admin_reserve_kbytes",
+		.data		= &sysctl_admin_reserve_kbytes,
+		.maxlen		= sizeof(sysctl_admin_reserve_kbytes),
+		.mode		= 0644,
+		.proc_handler	= proc_doulongvec_minmax,
+	},
+#ifdef CONFIG_HAVE_ARCH_MMAP_RND_BITS
+	{
+		.procname	= "mmap_rnd_bits",
+		.data		= &mmap_rnd_bits,
+		.maxlen		= sizeof(mmap_rnd_bits),
+		.mode		= 0600,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= (void *)&mmap_rnd_bits_min,
+		.extra2		= (void *)&mmap_rnd_bits_max,
+	},
+#endif
+#ifdef CONFIG_HAVE_ARCH_MMAP_RND_COMPAT_BITS
+	{
+		.procname	= "mmap_rnd_compat_bits",
+		.data		= &mmap_rnd_compat_bits,
+		.maxlen		= sizeof(mmap_rnd_compat_bits),
+		.mode		= 0600,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= (void *)&mmap_rnd_compat_bits_min,
+		.extra2		= (void *)&mmap_rnd_compat_bits_max,
+	},
+#endif
+#ifdef CONFIG_USERFAULTFD
+	{
+		.procname	= "unprivileged_userfaultfd",
+		.data		= &sysctl_unprivileged_userfaultfd,
+		.maxlen		= sizeof(sysctl_unprivileged_userfaultfd),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+#endif
+	{ }
+};
 
-int proc_doulongvec_minmax(struct ctl_table *table, int write,
-		    void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-	return -ENOSYS;
-}
+static struct ctl_table fs_table[] = {
+	{
+		.procname	= "inode-nr",
+		.data		= &inodes_stat,
+		.maxlen		= 2*sizeof(long),
+		.mode		= 0444,
+		.proc_handler	= proc_nr_inodes,
+	},
+	{
+		.procname	= "inode-state",
+		.data		= &inodes_stat,
+		.maxlen		= 7*sizeof(long),
+		.mode		= 0444,
+		.proc_handler	= proc_nr_inodes,
+	},
+	{
+		.procname	= "file-nr",
+		.data		= &files_stat,
+		.maxlen		= sizeof(files_stat),
+		.mode		= 0444,
+		.proc_handler	= proc_nr_files,
+	},
+	{
+		.procname	= "file-max",
+		.data		= &files_stat.max_files,
+		.maxlen		= sizeof(files_stat.max_files),
+		.mode		= 0644,
+		.proc_handler	= proc_doulongvec_minmax,
+		.extra1		= &zero_ul,
+		.extra2		= &long_max,
+	},
+	{
+		.procname	= "nr_open",
+		.data		= &sysctl_nr_open,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &sysctl_nr_open_min,
+		.extra2		= &sysctl_nr_open_max,
+	},
+	{
+		.procname	= "dentry-state",
+		.data		= &dentry_stat,
+		.maxlen		= 6*sizeof(long),
+		.mode		= 0444,
+		.proc_handler	= proc_nr_dentry,
+	},
+	{
+		.procname	= "overflowuid",
+		.data		= &fs_overflowuid,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &minolduid,
+		.extra2		= &maxolduid,
+	},
+	{
+		.procname	= "overflowgid",
+		.data		= &fs_overflowgid,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= &minolduid,
+		.extra2		= &maxolduid,
+	},
+#ifdef CONFIG_FILE_LOCKING
+	{
+		.procname	= "leases-enable",
+		.data		= &leases_enable,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
+#ifdef CONFIG_DNOTIFY
+	{
+		.procname	= "dir-notify-enable",
+		.data		= &dir_notify_enable,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
+#ifdef CONFIG_MMU
+#ifdef CONFIG_FILE_LOCKING
+	{
+		.procname	= "lease-break-time",
+		.data		= &lease_break_time,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec,
+	},
+#endif
+#ifdef CONFIG_AIO
+	{
+		.procname	= "aio-nr",
+		.data		= &aio_nr,
+		.maxlen		= sizeof(aio_nr),
+		.mode		= 0444,
+		.proc_handler	= proc_doulongvec_minmax,
+	},
+	{
+		.procname	= "aio-max-nr",
+		.data		= &aio_max_nr,
+		.maxlen		= sizeof(aio_max_nr),
+		.mode		= 0644,
+		.proc_handler	= proc_doulongvec_minmax,
+	},
+#endif /* CONFIG_AIO */
+#ifdef CONFIG_INOTIFY_USER
+	{
+		.procname	= "inotify",
+		.mode		= 0555,
+		.child		= inotify_table,
+	},
+#endif	
+#ifdef CONFIG_EPOLL
+	{
+		.procname	= "epoll",
+		.mode		= 0555,
+		.child		= epoll_table,
+	},
+#endif
+#endif
+	{
+		.procname	= "protected_symlinks",
+		.data		= &sysctl_protected_symlinks,
+		.maxlen		= sizeof(int),
+		.mode		= 0600,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+	{
+		.procname	= "protected_hardlinks",
+		.data		= &sysctl_protected_hardlinks,
+		.maxlen		= sizeof(int),
+		.mode		= 0600,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+	{
+		.procname	= "protected_fifos",
+		.data		= &sysctl_protected_fifos,
+		.maxlen		= sizeof(int),
+		.mode		= 0600,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= &two,
+	},
+	{
+		.procname	= "protected_regular",
+		.data		= &sysctl_protected_regular,
+		.maxlen		= sizeof(int),
+		.mode		= 0600,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= &two,
+	},
+	{
+		.procname	= "suid_dumpable",
+		.data		= &suid_dumpable,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax_coredump,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= &two,
+	},
+#if defined(CONFIG_BINFMT_MISC) || defined(CONFIG_BINFMT_MISC_MODULE)
+	{
+		.procname	= "binfmt_misc",
+		.mode		= 0555,
+		.child		= sysctl_mount_point,
+	},
+#endif
+	{
+		.procname	= "pipe-max-size",
+		.data		= &pipe_max_size,
+		.maxlen		= sizeof(pipe_max_size),
+		.mode		= 0644,
+		.proc_handler	= proc_dopipe_max_size,
+	},
+	{
+		.procname	= "pipe-user-pages-hard",
+		.data		= &pipe_user_pages_hard,
+		.maxlen		= sizeof(pipe_user_pages_hard),
+		.mode		= 0644,
+		.proc_handler	= proc_doulongvec_minmax,
+	},
+	{
+		.procname	= "pipe-user-pages-soft",
+		.data		= &pipe_user_pages_soft,
+		.maxlen		= sizeof(pipe_user_pages_soft),
+		.mode		= 0644,
+		.proc_handler	= proc_doulongvec_minmax,
+	},
+	{
+		.procname	= "mount-max",
+		.data		= &sysctl_mount_max,
+		.maxlen		= sizeof(unsigned int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec_minmax,
+		.extra1		= SYSCTL_ONE,
+	},
+	{ }
+};
 
-int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
-				      void __user *buffer,
-				      size_t *lenp, loff_t *ppos)
-{
-    return -ENOSYS;
-}
+static struct ctl_table debug_table[] = {
+#ifdef CONFIG_SYSCTL_EXCEPTION_TRACE
+	{
+		.procname	= "exception-trace",
+		.data		= &show_unhandled_signals,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_dointvec
+	},
+#endif
+#if defined(CONFIG_OPTPROBES)
+	{
+		.procname	= "kprobes-optimization",
+		.data		= &sysctl_kprobes_optimization,
+		.maxlen		= sizeof(int),
+		.mode		= 0644,
+		.proc_handler	= proc_kprobes_optimization_handler,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
+#endif
+	{ }
+};
 
-int proc_do_large_bitmap(struct ctl_table *table, int write,
-			 void __user *buffer, size_t *lenp, loff_t *ppos)
-{
-	return -ENOSYS;
-}
+static struct ctl_table dev_table[] = {
+	{ }
+};
 
-#endif /* CONFIG_PROC_SYSCTL */
+static struct ctl_table sysctl_base_table[] = {
+	{
+		.procname	= "kernel",
+		.mode		= 0555,
+		.child		= kern_table,
+	},
+	{
+		.procname	= "vm",
+		.mode		= 0555,
+		.child		= vm_table,
+	},
+	{
+		.procname	= "fs",
+		.mode		= 0555,
+		.child		= fs_table,
+	},
+	{
+		.procname	= "debug",
+		.mode		= 0555,
+		.child		= debug_table,
+	},
+	{
+		.procname	= "dev",
+		.mode		= 0555,
+		.child		= dev_table,
+	},
+	{ }
+};
 
-#if defined(CONFIG_SYSCTL)
-int proc_do_static_key(struct ctl_table *table, int write,
-		       void __user *buffer, size_t *lenp,
-		       loff_t *ppos)
+int __init sysctl_init(void)
 {
-	struct static_key *key = (struct static_key *)table->data;
-	static DEFINE_MUTEX(static_key_mutex);
-	int val, ret;
-	struct ctl_table tmp = {
-		.data   = &val,
-		.maxlen = sizeof(val),
-		.mode   = table->mode,
-		.extra1 = SYSCTL_ZERO,
-		.extra2 = SYSCTL_ONE,
-	};
-
-	if (write && !capable(CAP_SYS_ADMIN))
-		return -EPERM;
+	struct ctl_table_header *hdr;
 
-	mutex_lock(&static_key_mutex);
-	val = static_key_enabled(key);
-	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
-	if (write && !ret) {
-		if (val)
-			static_key_enable(key);
-		else
-			static_key_disable(key);
-	}
-	mutex_unlock(&static_key_mutex);
-	return ret;
+	hdr = register_sysctl_table(sysctl_base_table);
+	kmemleak_not_leak(hdr);
+	return 0;
 }
-#endif
+#endif /* CONFIG_SYSCTL */
 /*
  * No sense putting this after each symbol definition, twice,
  * exception granted :-)
-- 
cgit v1.2.3-59-g8ed1b


From 32927393dc1ccd60fb2bdc05b9e8e88753761469 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 24 Apr 2020 08:43:38 +0200
Subject: sysctl: pass kernel pointers to ->proc_handler

Instead of having all the sysctl handlers deal with user pointers, which
is rather hairy in terms of the BPF interaction, copy the input to and
from  userspace in common code.  This also means that the strings are
always NUL-terminated by the common code, making the API a little bit
safer.

As most handler just pass through the data to one of the common handlers
a lot of the changes are mechnical.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 arch/arm64/kernel/armv8_deprecated.c    |   2 +-
 arch/arm64/kernel/fpsimd.c              |   3 +-
 arch/mips/lasat/sysctl.c                |  13 +-
 arch/s390/appldata/appldata_base.c      |  11 +-
 arch/s390/kernel/debug.c                |   2 +-
 arch/s390/kernel/topology.c             |   2 +-
 arch/s390/mm/cmm.c                      |  12 +-
 arch/x86/kernel/itmt.c                  |   3 +-
 drivers/cdrom/cdrom.c                   |   2 +-
 drivers/char/random.c                   |   2 +-
 drivers/macintosh/mac_hid.c             |   3 +-
 drivers/parport/procfs.c                |  39 +++---
 fs/dcache.c                             |   2 +-
 fs/drop_caches.c                        |   2 +-
 fs/file_table.c                         |   4 +-
 fs/fscache/main.c                       |   3 +-
 fs/inode.c                              |   2 +-
 fs/proc/proc_sysctl.c                   |  47 ++++---
 fs/quota/dquot.c                        |   2 +-
 fs/xfs/xfs_sysctl.c                     |   4 +-
 include/linux/bpf-cgroup.h              |   9 +-
 include/linux/compaction.h              |   2 +-
 include/linux/fs.h                      |   6 +-
 include/linux/ftrace.h                  |   3 +-
 include/linux/hugetlb.h                 |  15 +-
 include/linux/kprobes.h                 |   2 +-
 include/linux/latencytop.h              |   4 +-
 include/linux/mm.h                      |  12 +-
 include/linux/mmzone.h                  |  23 ++-
 include/linux/nmi.h                     |  15 +-
 include/linux/perf_event.h              |  13 +-
 include/linux/printk.h                  |   2 +-
 include/linux/sched/sysctl.h            |  44 ++----
 include/linux/security.h                |   2 +-
 include/linux/sysctl.h                  |  53 +++----
 include/linux/timer.h                   |   3 +-
 include/linux/vmstat.h                  |   8 +-
 include/linux/writeback.h               |  28 ++--
 ipc/ipc_sysctl.c                        |  10 +-
 ipc/mq_sysctl.c                         |   4 +-
 kernel/bpf/cgroup.c                     |  35 ++---
 kernel/events/callchain.c               |   2 +-
 kernel/events/core.c                    |   6 +-
 kernel/kprobes.c                        |   2 +-
 kernel/latencytop.c                     |   4 +-
 kernel/pid_namespace.c                  |   2 +-
 kernel/printk/printk.c                  |   2 +-
 kernel/sched/core.c                     |   9 +-
 kernel/sched/fair.c                     |   3 +-
 kernel/sched/rt.c                       |  10 +-
 kernel/sched/topology.c                 |   2 +-
 kernel/seccomp.c                        |   2 +-
 kernel/sysctl.c                         | 239 ++++++++++++--------------------
 kernel/time/timer.c                     |   3 +-
 kernel/trace/trace.c                    |   2 +-
 kernel/umh.c                            |   2 +-
 kernel/utsname_sysctl.c                 |   2 +-
 kernel/watchdog.c                       |  12 +-
 mm/compaction.c                         |   2 +-
 mm/hugetlb.c                            |   9 +-
 mm/page-writeback.c                     |  16 +--
 mm/page_alloc.c                         |  30 ++--
 mm/util.c                               |  10 +-
 mm/vmstat.c                             |   4 +-
 net/bridge/br_netfilter_hooks.c         |   2 +-
 net/core/neighbour.c                    |  28 ++--
 net/core/sysctl_net_core.c              |  27 ++--
 net/decnet/dn_dev.c                     |   7 +-
 net/decnet/sysctl_net_decnet.c          |  27 ++--
 net/ipv4/devinet.c                      |   9 +-
 net/ipv4/route.c                        |   3 +-
 net/ipv4/sysctl_net_ipv4.c              |  38 ++---
 net/ipv6/addrconf.c                     |  33 ++---
 net/ipv6/ndisc.c                        |   3 +-
 net/ipv6/route.c                        |   5 +-
 net/ipv6/sysctl_net_ipv6.c              |   3 +-
 net/mpls/af_mpls.c                      |   5 +-
 net/netfilter/ipvs/ip_vs_ctl.c          |   6 +-
 net/netfilter/nf_conntrack_standalone.c |   2 +-
 net/netfilter/nf_log.c                  |   2 +-
 net/phonet/sysctl.c                     |   3 +-
 net/rds/tcp.c                           |   6 +-
 net/sctp/sysctl.c                       |  32 ++---
 net/sunrpc/sysctl.c                     |  29 ++--
 net/sunrpc/xprtrdma/svc_rdma.c          |   7 +-
 security/apparmor/lsm.c                 |   2 +-
 security/min_addr.c                     |   2 +-
 security/yama/yama_lsm.c                |   2 +-
 88 files changed, 458 insertions(+), 653 deletions(-)

diff --git a/arch/arm64/kernel/armv8_deprecated.c b/arch/arm64/kernel/armv8_deprecated.c
index c19aa81ddc8c..7364de008bab 100644
--- a/arch/arm64/kernel/armv8_deprecated.c
+++ b/arch/arm64/kernel/armv8_deprecated.c
@@ -203,7 +203,7 @@ static void __init register_insn_emulation(struct insn_emulation_ops *ops)
 }
 
 static int emulation_proc_handler(struct ctl_table *table, int write,
-				  void __user *buffer, size_t *lenp,
+				  void *buffer, size_t *lenp,
 				  loff_t *ppos)
 {
 	int ret = 0;
diff --git a/arch/arm64/kernel/fpsimd.c b/arch/arm64/kernel/fpsimd.c
index 94289d126993..35cb5e66c504 100644
--- a/arch/arm64/kernel/fpsimd.c
+++ b/arch/arm64/kernel/fpsimd.c
@@ -341,8 +341,7 @@ static unsigned int find_supported_vector_length(unsigned int vl)
 #ifdef CONFIG_SYSCTL
 
 static int sve_proc_do_default_vl(struct ctl_table *table, int write,
-				  void __user *buffer, size_t *lenp,
-				  loff_t *ppos)
+				  void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int ret;
 	int vl = sve_default_vl;
diff --git a/arch/mips/lasat/sysctl.c b/arch/mips/lasat/sysctl.c
index e666fe26c50d..2119541a5b8b 100644
--- a/arch/mips/lasat/sysctl.c
+++ b/arch/mips/lasat/sysctl.c
@@ -95,16 +95,15 @@ int proc_lasat_ip(struct ctl_table *table, int write,
 		len = 0;
 		p = buffer;
 		while (len < *lenp) {
-			if (get_user(c, p++))
-				return -EFAULT;
+			c = *p;
+			p++;
 			if (c == 0 || c == '\n')
 				break;
 			len++;
 		}
 		if (len >= sizeof(ipbuf)-1)
 			len = sizeof(ipbuf) - 1;
-		if (copy_from_user(ipbuf, buffer, len))
-			return -EFAULT;
+		memcpy(ipbuf, buffer, len);
 		ipbuf[len] = 0;
 		*ppos += *lenp;
 		/* Now see if we can convert it to a valid IP */
@@ -122,11 +121,9 @@ int proc_lasat_ip(struct ctl_table *table, int write,
 		if (len > *lenp)
 			len = *lenp;
 		if (len)
-			if (copy_to_user(buffer, ipbuf, len))
-				return -EFAULT;
+			memcpy(buffer, ipbuf, len);
 		if (len < *lenp) {
-			if (put_user('\n', ((char *) buffer) + len))
-				return -EFAULT;
+			*((char *)buffer + len) = '\n';
 			len++;
 		}
 		*lenp = len;
diff --git a/arch/s390/appldata/appldata_base.c b/arch/s390/appldata/appldata_base.c
index aa738cad1338..d74a4c7d5df6 100644
--- a/arch/s390/appldata/appldata_base.c
+++ b/arch/s390/appldata/appldata_base.c
@@ -51,10 +51,9 @@ static struct platform_device *appldata_pdev;
  */
 static const char appldata_proc_name[APPLDATA_PROC_NAME_LENGTH] = "appldata";
 static int appldata_timer_handler(struct ctl_table *ctl, int write,
-				  void __user *buffer, size_t *lenp, loff_t *ppos);
+				  void *buffer, size_t *lenp, loff_t *ppos);
 static int appldata_interval_handler(struct ctl_table *ctl, int write,
-					 void __user *buffer,
-					 size_t *lenp, loff_t *ppos);
+				     void *buffer, size_t *lenp, loff_t *ppos);
 
 static struct ctl_table_header *appldata_sysctl_header;
 static struct ctl_table appldata_table[] = {
@@ -217,7 +216,7 @@ static void __appldata_vtimer_setup(int cmd)
  */
 static int
 appldata_timer_handler(struct ctl_table *ctl, int write,
-			   void __user *buffer, size_t *lenp, loff_t *ppos)
+			   void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int timer_active = appldata_timer_active;
 	int rc;
@@ -250,7 +249,7 @@ appldata_timer_handler(struct ctl_table *ctl, int write,
  */
 static int
 appldata_interval_handler(struct ctl_table *ctl, int write,
-			   void __user *buffer, size_t *lenp, loff_t *ppos)
+			   void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int interval = appldata_interval;
 	int rc;
@@ -280,7 +279,7 @@ appldata_interval_handler(struct ctl_table *ctl, int write,
  */
 static int
 appldata_generic_handler(struct ctl_table *ctl, int write,
-			   void __user *buffer, size_t *lenp, loff_t *ppos)
+			   void *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct appldata_ops *ops = NULL, *tmp_ops;
 	struct list_head *lh;
diff --git a/arch/s390/kernel/debug.c b/arch/s390/kernel/debug.c
index 6d321f5f101d..636446003a06 100644
--- a/arch/s390/kernel/debug.c
+++ b/arch/s390/kernel/debug.c
@@ -867,7 +867,7 @@ static int debug_active = 1;
  * if debug_active is already off
  */
 static int s390dbf_procactive(struct ctl_table *table, int write,
-			      void __user *buffer, size_t *lenp, loff_t *ppos)
+			      void *buffer, size_t *lenp, loff_t *ppos)
 {
 	if (!write || debug_stoppable || !debug_active)
 		return proc_dointvec(table, write, buffer, lenp, ppos);
diff --git a/arch/s390/kernel/topology.c b/arch/s390/kernel/topology.c
index 5f70cefc13e4..332b542548cd 100644
--- a/arch/s390/kernel/topology.c
+++ b/arch/s390/kernel/topology.c
@@ -594,7 +594,7 @@ static int __init topology_setup(char *str)
 early_param("topology", topology_setup);
 
 static int topology_ctl_handler(struct ctl_table *ctl, int write,
-				void __user *buffer, size_t *lenp, loff_t *ppos)
+				void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int enabled = topology_is_enabled();
 	int new_mode;
diff --git a/arch/s390/mm/cmm.c b/arch/s390/mm/cmm.c
index ae989b740376..36bce727897b 100644
--- a/arch/s390/mm/cmm.c
+++ b/arch/s390/mm/cmm.c
@@ -245,7 +245,7 @@ static int cmm_skip_blanks(char *cp, char **endp)
 }
 
 static int cmm_pages_handler(struct ctl_table *ctl, int write,
-			     void __user *buffer, size_t *lenp, loff_t *ppos)
+			     void *buffer, size_t *lenp, loff_t *ppos)
 {
 	long nr = cmm_get_pages();
 	struct ctl_table ctl_entry = {
@@ -264,7 +264,7 @@ static int cmm_pages_handler(struct ctl_table *ctl, int write,
 }
 
 static int cmm_timed_pages_handler(struct ctl_table *ctl, int write,
-				   void __user *buffer, size_t *lenp,
+				   void *buffer, size_t *lenp,
 				   loff_t *ppos)
 {
 	long nr = cmm_get_timed_pages();
@@ -284,7 +284,7 @@ static int cmm_timed_pages_handler(struct ctl_table *ctl, int write,
 }
 
 static int cmm_timeout_handler(struct ctl_table *ctl, int write,
-			       void __user *buffer, size_t *lenp, loff_t *ppos)
+			       void *buffer, size_t *lenp, loff_t *ppos)
 {
 	char buf[64], *p;
 	long nr, seconds;
@@ -297,8 +297,7 @@ static int cmm_timeout_handler(struct ctl_table *ctl, int write,
 
 	if (write) {
 		len = min(*lenp, sizeof(buf));
-		if (copy_from_user(buf, buffer, len))
-			return -EFAULT;
+		memcpy(buf, buffer, len);
 		buf[len - 1] = '\0';
 		cmm_skip_blanks(buf, &p);
 		nr = simple_strtoul(p, &p, 0);
@@ -311,8 +310,7 @@ static int cmm_timeout_handler(struct ctl_table *ctl, int write,
 			      cmm_timeout_pages, cmm_timeout_seconds);
 		if (len > *lenp)
 			len = *lenp;
-		if (copy_to_user(buffer, buf, len))
-			return -EFAULT;
+		memcpy(buffer, buf, len);
 		*lenp = len;
 		*ppos += len;
 	}
diff --git a/arch/x86/kernel/itmt.c b/arch/x86/kernel/itmt.c
index 1cb3ca9bba49..1afbdd1dd777 100644
--- a/arch/x86/kernel/itmt.c
+++ b/arch/x86/kernel/itmt.c
@@ -39,8 +39,7 @@ static bool __read_mostly sched_itmt_capable;
 unsigned int __read_mostly sysctl_sched_itmt_enabled;
 
 static int sched_itmt_update_handler(struct ctl_table *table, int write,
-				     void __user *buffer, size_t *lenp,
-				     loff_t *ppos)
+				     void *buffer, size_t *lenp, loff_t *ppos)
 {
 	unsigned int old_sysctl;
 	int ret;
diff --git a/drivers/cdrom/cdrom.c b/drivers/cdrom/cdrom.c
index faca0f346fff..e3bbe108eb54 100644
--- a/drivers/cdrom/cdrom.c
+++ b/drivers/cdrom/cdrom.c
@@ -3631,7 +3631,7 @@ static void cdrom_update_settings(void)
 }
 
 static int cdrom_sysctl_handler(struct ctl_table *ctl, int write,
-				void __user *buffer, size_t *lenp, loff_t *ppos)
+				void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int ret;
 	
diff --git a/drivers/char/random.c b/drivers/char/random.c
index 0d10e31fd342..1e0db78b83ba 100644
--- a/drivers/char/random.c
+++ b/drivers/char/random.c
@@ -2057,7 +2057,7 @@ static char sysctl_bootid[16];
  * sysctl system call, as 16 bytes of binary data.
  */
 static int proc_do_uuid(struct ctl_table *table, int write,
-			void __user *buffer, size_t *lenp, loff_t *ppos)
+			void *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct ctl_table fake_table;
 	unsigned char buf[64], tmp_uuid[16], *uuid;
diff --git a/drivers/macintosh/mac_hid.c b/drivers/macintosh/mac_hid.c
index 7af0c536d568..28b8581b44dd 100644
--- a/drivers/macintosh/mac_hid.c
+++ b/drivers/macintosh/mac_hid.c
@@ -183,8 +183,7 @@ static void mac_hid_stop_emulation(void)
 }
 
 static int mac_hid_toggle_emumouse(struct ctl_table *table, int write,
-				   void __user *buffer, size_t *lenp,
-				   loff_t *ppos)
+				   void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int *valp = table->data;
 	int old_val = *valp;
diff --git a/drivers/parport/procfs.c b/drivers/parport/procfs.c
index 48804049d697..ee7b5daabfd4 100644
--- a/drivers/parport/procfs.c
+++ b/drivers/parport/procfs.c
@@ -34,7 +34,7 @@
 #define PARPORT_MAX_SPINTIME_VALUE 1000
 
 static int do_active_device(struct ctl_table *table, int write,
-		      void __user *result, size_t *lenp, loff_t *ppos)
+		      void *result, size_t *lenp, loff_t *ppos)
 {
 	struct parport *port = (struct parport *)table->extra1;
 	char buffer[256];
@@ -65,13 +65,13 @@ static int do_active_device(struct ctl_table *table, int write,
 		*lenp = len;
 
 	*ppos += len;
-
-	return copy_to_user(result, buffer, len) ? -EFAULT : 0;
+	memcpy(result, buffer, len);
+	return 0;
 }
 
 #ifdef CONFIG_PARPORT_1284
 static int do_autoprobe(struct ctl_table *table, int write,
-			void __user *result, size_t *lenp, loff_t *ppos)
+			void *result, size_t *lenp, loff_t *ppos)
 {
 	struct parport_device_info *info = table->extra2;
 	const char *str;
@@ -108,13 +108,13 @@ static int do_autoprobe(struct ctl_table *table, int write,
 
 	*ppos += len;
 
-	return copy_to_user (result, buffer, len) ? -EFAULT : 0;
+	memcpy(result, buffer, len);
+	return 0;
 }
 #endif /* IEEE1284.3 support. */
 
 static int do_hardware_base_addr(struct ctl_table *table, int write,
-				 void __user *result,
-				 size_t *lenp, loff_t *ppos)
+				 void *result, size_t *lenp, loff_t *ppos)
 {
 	struct parport *port = (struct parport *)table->extra1;
 	char buffer[20];
@@ -136,13 +136,12 @@ static int do_hardware_base_addr(struct ctl_table *table, int write,
 		*lenp = len;
 
 	*ppos += len;
-
-	return copy_to_user(result, buffer, len) ? -EFAULT : 0;
+	memcpy(result, buffer, len);
+	return 0;
 }
 
 static int do_hardware_irq(struct ctl_table *table, int write,
-			   void __user *result,
-			   size_t *lenp, loff_t *ppos)
+			   void *result, size_t *lenp, loff_t *ppos)
 {
 	struct parport *port = (struct parport *)table->extra1;
 	char buffer[20];
@@ -164,13 +163,12 @@ static int do_hardware_irq(struct ctl_table *table, int write,
 		*lenp = len;
 
 	*ppos += len;
-
-	return copy_to_user(result, buffer, len) ? -EFAULT : 0;
+	memcpy(result, buffer, len);
+	return 0;
 }
 
 static int do_hardware_dma(struct ctl_table *table, int write,
-			   void __user *result,
-			   size_t *lenp, loff_t *ppos)
+			   void *result, size_t *lenp, loff_t *ppos)
 {
 	struct parport *port = (struct parport *)table->extra1;
 	char buffer[20];
@@ -192,13 +190,12 @@ static int do_hardware_dma(struct ctl_table *table, int write,
 		*lenp = len;
 
 	*ppos += len;
-
-	return copy_to_user(result, buffer, len) ? -EFAULT : 0;
+	memcpy(result, buffer, len);
+	return 0;
 }
 
 static int do_hardware_modes(struct ctl_table *table, int write,
-			     void __user *result,
-			     size_t *lenp, loff_t *ppos)
+			     void *result, size_t *lenp, loff_t *ppos)
 {
 	struct parport *port = (struct parport *)table->extra1;
 	char buffer[40];
@@ -231,8 +228,8 @@ static int do_hardware_modes(struct ctl_table *table, int write,
 		*lenp = len;
 
 	*ppos += len;
-
-	return copy_to_user(result, buffer, len) ? -EFAULT : 0;
+	memcpy(result, buffer, len);
+	return 0;
 }
 
 #define PARPORT_PORT_DIR(CHILD) { .procname = NULL, .mode = 0555, .child = CHILD }
diff --git a/fs/dcache.c b/fs/dcache.c
index b280e07e162b..8dd4d8d7bd0b 100644
--- a/fs/dcache.c
+++ b/fs/dcache.c
@@ -165,7 +165,7 @@ static long get_nr_dentry_negative(void)
 	return sum < 0 ? 0 : sum;
 }
 
-int proc_nr_dentry(struct ctl_table *table, int write, void __user *buffer,
+int proc_nr_dentry(struct ctl_table *table, int write, void *buffer,
 		   size_t *lenp, loff_t *ppos)
 {
 	dentry_stat.nr_dentry = get_nr_dentry();
diff --git a/fs/drop_caches.c b/fs/drop_caches.c
index dc1a1d5d825b..f00fcc4a4f72 100644
--- a/fs/drop_caches.c
+++ b/fs/drop_caches.c
@@ -47,7 +47,7 @@ static void drop_pagecache_sb(struct super_block *sb, void *unused)
 }
 
 int drop_caches_sysctl_handler(struct ctl_table *table, int write,
-	void __user *buffer, size_t *length, loff_t *ppos)
+		void *buffer, size_t *length, loff_t *ppos)
 {
 	int ret;
 
diff --git a/fs/file_table.c b/fs/file_table.c
index 30d55c9a1744..3b612535391f 100644
--- a/fs/file_table.c
+++ b/fs/file_table.c
@@ -80,14 +80,14 @@ EXPORT_SYMBOL_GPL(get_max_files);
  */
 #if defined(CONFIG_SYSCTL) && defined(CONFIG_PROC_FS)
 int proc_nr_files(struct ctl_table *table, int write,
-                     void __user *buffer, size_t *lenp, loff_t *ppos)
+                     void *buffer, size_t *lenp, loff_t *ppos)
 {
 	files_stat.nr_files = get_nr_files();
 	return proc_doulongvec_minmax(table, write, buffer, lenp, ppos);
 }
 #else
 int proc_nr_files(struct ctl_table *table, int write,
-                     void __user *buffer, size_t *lenp, loff_t *ppos)
+                     void *buffer, size_t *lenp, loff_t *ppos)
 {
 	return -ENOSYS;
 }
diff --git a/fs/fscache/main.c b/fs/fscache/main.c
index 59c2494efda3..c1e6cc9091aa 100644
--- a/fs/fscache/main.c
+++ b/fs/fscache/main.c
@@ -51,8 +51,7 @@ static unsigned fscache_op_max_active = 2;
 static struct ctl_table_header *fscache_sysctl_header;
 
 static int fscache_max_active_sysctl(struct ctl_table *table, int write,
-				     void __user *buffer,
-				     size_t *lenp, loff_t *ppos)
+				     void *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct workqueue_struct **wqp = table->extra1;
 	unsigned int *datap = table->data;
diff --git a/fs/inode.c b/fs/inode.c
index 93d9252a00ab..cc6e701b7e5d 100644
--- a/fs/inode.c
+++ b/fs/inode.c
@@ -108,7 +108,7 @@ long get_nr_dirty_inodes(void)
  */
 #ifdef CONFIG_SYSCTL
 int proc_nr_inodes(struct ctl_table *table, int write,
-		   void __user *buffer, size_t *lenp, loff_t *ppos)
+		   void *buffer, size_t *lenp, loff_t *ppos)
 {
 	inodes_stat.nr_inodes = get_nr_inodes();
 	inodes_stat.nr_unused = get_nr_inodes_unused();
diff --git a/fs/proc/proc_sysctl.c b/fs/proc/proc_sysctl.c
index b6f5d459b087..df2143e05c57 100644
--- a/fs/proc/proc_sysctl.c
+++ b/fs/proc/proc_sysctl.c
@@ -539,13 +539,13 @@ out:
 	return err;
 }
 
-static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
+static ssize_t proc_sys_call_handler(struct file *filp, void __user *ubuf,
 		size_t count, loff_t *ppos, int write)
 {
 	struct inode *inode = file_inode(filp);
 	struct ctl_table_header *head = grab_header(inode);
 	struct ctl_table *table = PROC_I(inode)->sysctl_entry;
-	void *new_buf = NULL;
+	void *kbuf;
 	ssize_t error;
 
 	if (IS_ERR(head))
@@ -564,27 +564,38 @@ static ssize_t proc_sys_call_handler(struct file *filp, void __user *buf,
 	if (!table->proc_handler)
 		goto out;
 
-	error = BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, &count,
-					   ppos, &new_buf);
+	if (write) {
+		kbuf = memdup_user_nul(ubuf, count);
+		if (IS_ERR(kbuf)) {
+			error = PTR_ERR(kbuf);
+			goto out;
+		}
+	} else {
+		error = -ENOMEM;
+		kbuf = kzalloc(count, GFP_KERNEL);
+		if (!kbuf)
+			goto out;
+	}
+
+	error = BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, &kbuf, &count,
+					   ppos);
 	if (error)
-		goto out;
+		goto out_free_buf;
 
 	/* careful: calling conventions are nasty here */
-	if (new_buf) {
-		mm_segment_t old_fs;
-
-		old_fs = get_fs();
-		set_fs(KERNEL_DS);
-		error = table->proc_handler(table, write, (void __user *)new_buf,
-					    &count, ppos);
-		set_fs(old_fs);
-		kfree(new_buf);
-	} else {
-		error = table->proc_handler(table, write, buf, &count, ppos);
+	error = table->proc_handler(table, write, kbuf, &count, ppos);
+	if (error)
+		goto out_free_buf;
+
+	if (!write) {
+		error = -EFAULT;
+		if (copy_to_user(ubuf, kbuf, count))
+			goto out_free_buf;
 	}
 
-	if (!error)
-		error = count;
+	error = count;
+out_free_buf:
+	kfree(kbuf);
 out:
 	sysctl_head_finish(head);
 
diff --git a/fs/quota/dquot.c b/fs/quota/dquot.c
index b6a4f692d345..7b4bac91146b 100644
--- a/fs/quota/dquot.c
+++ b/fs/quota/dquot.c
@@ -2841,7 +2841,7 @@ const struct quotactl_ops dquot_quotactl_sysfile_ops = {
 EXPORT_SYMBOL(dquot_quotactl_sysfile_ops);
 
 static int do_proc_dqstats(struct ctl_table *table, int write,
-		     void __user *buffer, size_t *lenp, loff_t *ppos)
+		     void *buffer, size_t *lenp, loff_t *ppos)
 {
 	unsigned int type = (unsigned long *)table->data - dqstats.stat;
 	s64 value = percpu_counter_sum(&dqstats.counter[type]);
diff --git a/fs/xfs/xfs_sysctl.c b/fs/xfs/xfs_sysctl.c
index 31b3bdbd2eba..021ef96d0542 100644
--- a/fs/xfs/xfs_sysctl.c
+++ b/fs/xfs/xfs_sysctl.c
@@ -13,7 +13,7 @@ STATIC int
 xfs_stats_clear_proc_handler(
 	struct ctl_table	*ctl,
 	int			write,
-	void			__user *buffer,
+	void			*buffer,
 	size_t			*lenp,
 	loff_t			*ppos)
 {
@@ -33,7 +33,7 @@ STATIC int
 xfs_panic_mask_proc_handler(
 	struct ctl_table	*ctl,
 	int			write,
-	void			__user *buffer,
+	void			*buffer,
 	size_t			*lenp,
 	loff_t			*ppos)
 {
diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index c11b413d5b1a..0b41fd5fc96b 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -138,8 +138,7 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
 
 int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
 				   struct ctl_table *table, int write,
-				   void __user *buf, size_t *pcount,
-				   loff_t *ppos, void **new_buf,
+				   void **buf, size_t *pcount, loff_t *ppos,
 				   enum bpf_attach_type type);
 
 int __cgroup_bpf_run_filter_setsockopt(struct sock *sock, int *level,
@@ -302,12 +301,12 @@ int bpf_percpu_cgroup_storage_update(struct bpf_map *map, void *key,
 })
 
 
-#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, count, pos, nbuf)  \
+#define BPF_CGROUP_RUN_PROG_SYSCTL(head, table, write, buf, count, pos)  \
 ({									       \
 	int __ret = 0;							       \
 	if (cgroup_bpf_enabled)						       \
 		__ret = __cgroup_bpf_run_filter_sysctl(head, table, write,     \
-						       buf, count, pos, nbuf,  \
+						       buf, count, pos,        \
 						       BPF_CGROUP_SYSCTL);     \
 	__ret;								       \
 })
@@ -429,7 +428,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
 #define BPF_CGROUP_RUN_PROG_UDP6_RECVMSG_LOCK(sk, uaddr) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; })
-#define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos,nbuf) ({ 0; })
+#define BPF_CGROUP_RUN_PROG_SYSCTL(head,table,write,buf,count,pos) ({ 0; })
 #define BPF_CGROUP_GETSOCKOPT_MAX_OPTLEN(optlen) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_GETSOCKOPT(sock, level, optname, optval, \
 				       optlen, max_optlen, retval) ({ retval; })
diff --git a/include/linux/compaction.h b/include/linux/compaction.h
index 4b898cdbdf05..a0eabfbeb0e1 100644
--- a/include/linux/compaction.h
+++ b/include/linux/compaction.h
@@ -86,7 +86,7 @@ static inline unsigned long compact_gap(unsigned int order)
 #ifdef CONFIG_COMPACTION
 extern int sysctl_compact_memory;
 extern int sysctl_compaction_handler(struct ctl_table *table, int write,
-			void __user *buffer, size_t *length, loff_t *ppos);
+			void *buffer, size_t *length, loff_t *ppos);
 extern int sysctl_extfrag_threshold;
 extern int sysctl_compact_unevictable_allowed;
 
diff --git a/include/linux/fs.h b/include/linux/fs.h
index 4f6f59b4f22a..9b028d260649 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -3536,11 +3536,11 @@ ssize_t simple_attr_write(struct file *file, const char __user *buf,
 
 struct ctl_table;
 int proc_nr_files(struct ctl_table *table, int write,
-		  void __user *buffer, size_t *lenp, loff_t *ppos);
+		  void *buffer, size_t *lenp, loff_t *ppos);
 int proc_nr_dentry(struct ctl_table *table, int write,
-		  void __user *buffer, size_t *lenp, loff_t *ppos);
+		  void *buffer, size_t *lenp, loff_t *ppos);
 int proc_nr_inodes(struct ctl_table *table, int write,
-		   void __user *buffer, size_t *lenp, loff_t *ppos);
+		   void *buffer, size_t *lenp, loff_t *ppos);
 int __init get_filesystem_list(char *buf);
 
 #define __FMODE_EXEC		((__force int) FMODE_EXEC)
diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h
index db95244a62d4..ddfc377de0d2 100644
--- a/include/linux/ftrace.h
+++ b/include/linux/ftrace.h
@@ -1005,8 +1005,7 @@ extern void disable_trace_on_warning(void);
 extern int __disable_trace_on_warning;
 
 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
-			     void __user *buffer, size_t *lenp,
-			     loff_t *ppos);
+			     void *buffer, size_t *lenp, loff_t *ppos);
 
 #else /* CONFIG_TRACING */
 static inline void  disable_trace_on_warning(void) { }
diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h
index 43a1cef8f0f1..92c21c5ccc58 100644
--- a/include/linux/hugetlb.h
+++ b/include/linux/hugetlb.h
@@ -105,14 +105,13 @@ struct hugepage_subpool *hugepage_new_subpool(struct hstate *h, long max_hpages,
 void hugepage_put_subpool(struct hugepage_subpool *spool);
 
 void reset_vma_resv_huge_pages(struct vm_area_struct *vma);
-int hugetlb_sysctl_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
-int hugetlb_overcommit_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
-int hugetlb_treat_movable_handler(struct ctl_table *, int, void __user *, size_t *, loff_t *);
-
-#ifdef CONFIG_NUMA
-int hugetlb_mempolicy_sysctl_handler(struct ctl_table *, int,
-					void __user *, size_t *, loff_t *);
-#endif
+int hugetlb_sysctl_handler(struct ctl_table *, int, void *, size_t *, loff_t *);
+int hugetlb_overcommit_handler(struct ctl_table *, int, void *, size_t *,
+		loff_t *);
+int hugetlb_treat_movable_handler(struct ctl_table *, int, void *, size_t *,
+		loff_t *);
+int hugetlb_mempolicy_sysctl_handler(struct ctl_table *, int, void *, size_t *,
+		loff_t *);
 
 int copy_hugetlb_page_range(struct mm_struct *, struct mm_struct *, struct vm_area_struct *);
 long follow_hugetlb_page(struct mm_struct *, struct vm_area_struct *,
diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h
index 04bdaf01112c..594265bfd390 100644
--- a/include/linux/kprobes.h
+++ b/include/linux/kprobes.h
@@ -312,7 +312,7 @@ DEFINE_INSN_CACHE_OPS(optinsn);
 #ifdef CONFIG_SYSCTL
 extern int sysctl_kprobes_optimization;
 extern int proc_kprobes_optimization_handler(struct ctl_table *table,
-					     int write, void __user *buffer,
+					     int write, void *buffer,
 					     size_t *length, loff_t *ppos);
 #endif
 extern void wait_for_kprobe_optimizer(void);
diff --git a/include/linux/latencytop.h b/include/linux/latencytop.h
index 9022f0c2e2e4..abe3d95f795b 100644
--- a/include/linux/latencytop.h
+++ b/include/linux/latencytop.h
@@ -38,8 +38,8 @@ account_scheduler_latency(struct task_struct *task, int usecs, int inter)
 
 void clear_tsk_latency_tracing(struct task_struct *p);
 
-extern int sysctl_latencytop(struct ctl_table *table, int write,
-			void __user *buffer, size_t *lenp, loff_t *ppos);
+int sysctl_latencytop(struct ctl_table *table, int write, void *buffer,
+		size_t *lenp, loff_t *ppos);
 
 #else
 
diff --git a/include/linux/mm.h b/include/linux/mm.h
index 9c4e7e76dedd..a7b1ef8ed970 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -201,10 +201,10 @@ extern int sysctl_overcommit_memory;
 extern int sysctl_overcommit_ratio;
 extern unsigned long sysctl_overcommit_kbytes;
 
-extern int overcommit_ratio_handler(struct ctl_table *, int, void __user *,
-				    size_t *, loff_t *);
-extern int overcommit_kbytes_handler(struct ctl_table *, int, void __user *,
-				    size_t *, loff_t *);
+int overcommit_ratio_handler(struct ctl_table *, int, void *, size_t *,
+		loff_t *);
+int overcommit_kbytes_handler(struct ctl_table *, int, void *, size_t *,
+		loff_t *);
 
 #define nth_page(page,n) pfn_to_page(page_to_pfn((page)) + (n))
 
@@ -2957,8 +2957,8 @@ extern bool process_shares_mm(struct task_struct *p, struct mm_struct *mm);
 
 #ifdef CONFIG_SYSCTL
 extern int sysctl_drop_caches;
-int drop_caches_sysctl_handler(struct ctl_table *, int,
-					void __user *, size_t *, loff_t *);
+int drop_caches_sysctl_handler(struct ctl_table *, int, void *, size_t *,
+		loff_t *);
 #endif
 
 void drop_slab(void);
diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index b2af594ef0f7..93cf20f41e26 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -910,22 +910,21 @@ static inline int is_highmem(struct zone *zone)
 /* These two functions are used to setup the per zone pages min values */
 struct ctl_table;
 
-int min_free_kbytes_sysctl_handler(struct ctl_table *, int,
-					void __user *, size_t *, loff_t *);
-int watermark_scale_factor_sysctl_handler(struct ctl_table *, int,
-					void __user *, size_t *, loff_t *);
+int min_free_kbytes_sysctl_handler(struct ctl_table *, int, void *, size_t *,
+		loff_t *);
+int watermark_scale_factor_sysctl_handler(struct ctl_table *, int, void *,
+		size_t *, loff_t *);
 extern int sysctl_lowmem_reserve_ratio[MAX_NR_ZONES];
-int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int,
-					void __user *, size_t *, loff_t *);
+int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *, int, void *,
+		size_t *, loff_t *);
 int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *, int,
-					void __user *, size_t *, loff_t *);
+		void *, size_t *, loff_t *);
 int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *, int,
-			void __user *, size_t *, loff_t *);
+		void *, size_t *, loff_t *);
 int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *, int,
-			void __user *, size_t *, loff_t *);
-
-extern int numa_zonelist_order_handler(struct ctl_table *, int,
-			void __user *, size_t *, loff_t *);
+		void *, size_t *, loff_t *);
+int numa_zonelist_order_handler(struct ctl_table *, int,
+		void *, size_t *, loff_t *);
 extern int percpu_pagelist_fraction;
 extern char numa_zonelist_order[];
 #define NUMA_ZONELIST_ORDER_LEN	16
diff --git a/include/linux/nmi.h b/include/linux/nmi.h
index 9003e29cde46..750c7f395ca9 100644
--- a/include/linux/nmi.h
+++ b/include/linux/nmi.h
@@ -202,16 +202,11 @@ static inline void watchdog_update_hrtimer_threshold(u64 period) { }
 #endif
 
 struct ctl_table;
-extern int proc_watchdog(struct ctl_table *, int ,
-			 void __user *, size_t *, loff_t *);
-extern int proc_nmi_watchdog(struct ctl_table *, int ,
-			     void __user *, size_t *, loff_t *);
-extern int proc_soft_watchdog(struct ctl_table *, int ,
-			      void __user *, size_t *, loff_t *);
-extern int proc_watchdog_thresh(struct ctl_table *, int ,
-				void __user *, size_t *, loff_t *);
-extern int proc_watchdog_cpumask(struct ctl_table *, int,
-				 void __user *, size_t *, loff_t *);
+int proc_watchdog(struct ctl_table *, int, void *, size_t *, loff_t *);
+int proc_nmi_watchdog(struct ctl_table *, int , void *, size_t *, loff_t *);
+int proc_soft_watchdog(struct ctl_table *, int , void *, size_t *, loff_t *);
+int proc_watchdog_thresh(struct ctl_table *, int , void *, size_t *, loff_t *);
+int proc_watchdog_cpumask(struct ctl_table *, int, void *, size_t *, loff_t *);
 
 #ifdef CONFIG_HAVE_ACPI_APEI_NMI
 #include <asm/nmi.h>
diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h
index 9c3e7619c929..347ea379622a 100644
--- a/include/linux/perf_event.h
+++ b/include/linux/perf_event.h
@@ -1280,15 +1280,12 @@ extern int sysctl_perf_cpu_time_max_percent;
 
 extern void perf_sample_event_took(u64 sample_len_ns);
 
-extern int perf_proc_update_handler(struct ctl_table *table, int write,
-		void __user *buffer, size_t *lenp,
-		loff_t *ppos);
-extern int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
-		void __user *buffer, size_t *lenp,
-		loff_t *ppos);
-
+int perf_proc_update_handler(struct ctl_table *table, int write,
+		void *buffer, size_t *lenp, loff_t *ppos);
+int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
+		void *buffer, size_t *lenp, loff_t *ppos);
 int perf_event_max_stack_handler(struct ctl_table *table, int write,
-				 void __user *buffer, size_t *lenp, loff_t *ppos);
+		void *buffer, size_t *lenp, loff_t *ppos);
 
 /* Access to perf_event_open(2) syscall. */
 #define PERF_SECURITY_OPEN		0
diff --git a/include/linux/printk.h b/include/linux/printk.h
index e061635e0409..fcde0772ec98 100644
--- a/include/linux/printk.h
+++ b/include/linux/printk.h
@@ -189,7 +189,7 @@ extern int printk_delay_msec;
 extern int dmesg_restrict;
 
 extern int
-devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, void __user *buf,
+devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, void *buf,
 			  size_t *lenp, loff_t *ppos);
 
 extern void wake_up_klogd(void);
diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h
index d4f6215ee03f..7b4d3a49b6c5 100644
--- a/include/linux/sched/sysctl.h
+++ b/include/linux/sched/sysctl.h
@@ -12,9 +12,8 @@ extern unsigned int  sysctl_hung_task_panic;
 extern unsigned long sysctl_hung_task_timeout_secs;
 extern unsigned long sysctl_hung_task_check_interval_secs;
 extern int sysctl_hung_task_warnings;
-extern int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
-					 void __user *buffer,
-					 size_t *lenp, loff_t *ppos);
+int proc_dohung_task_timeout_secs(struct ctl_table *table, int write,
+		void *buffer, size_t *lenp, loff_t *ppos);
 #else
 /* Avoid need for ifdefs elsewhere in the code */
 enum { sysctl_hung_task_timeout_secs = 0 };
@@ -43,8 +42,7 @@ extern __read_mostly unsigned int sysctl_sched_migration_cost;
 extern __read_mostly unsigned int sysctl_sched_nr_migrate;
 
 int sched_proc_update_handler(struct ctl_table *table, int write,
-		void __user *buffer, size_t *length,
-		loff_t *ppos);
+		void *buffer, size_t *length, loff_t *ppos);
 #endif
 
 /*
@@ -72,33 +70,21 @@ extern unsigned int sysctl_sched_autogroup_enabled;
 extern int sysctl_sched_rr_timeslice;
 extern int sched_rr_timeslice;
 
-extern int sched_rr_handler(struct ctl_table *table, int write,
-		void __user *buffer, size_t *lenp,
-		loff_t *ppos);
-
-extern int sched_rt_handler(struct ctl_table *table, int write,
-		void __user *buffer, size_t *lenp,
-		loff_t *ppos);
-
-#ifdef CONFIG_UCLAMP_TASK
-extern int sysctl_sched_uclamp_handler(struct ctl_table *table, int write,
-				       void __user *buffer, size_t *lenp,
-				       loff_t *ppos);
-#endif
-
-extern int sysctl_numa_balancing(struct ctl_table *table, int write,
-				 void __user *buffer, size_t *lenp,
-				 loff_t *ppos);
-
-extern int sysctl_schedstats(struct ctl_table *table, int write,
-				 void __user *buffer, size_t *lenp,
-				 loff_t *ppos);
+int sched_rr_handler(struct ctl_table *table, int write, void *buffer,
+		size_t *lenp, loff_t *ppos);
+int sched_rt_handler(struct ctl_table *table, int write, void *buffer,
+		size_t *lenp, loff_t *ppos);
+int sysctl_sched_uclamp_handler(struct ctl_table *table, int write,
+		void *buffer, size_t *lenp, loff_t *ppos);
+int sysctl_numa_balancing(struct ctl_table *table, int write, void *buffer,
+		size_t *lenp, loff_t *ppos);
+int sysctl_schedstats(struct ctl_table *table, int write, void *buffer,
+		size_t *lenp, loff_t *ppos);
 
 #if defined(CONFIG_ENERGY_MODEL) && defined(CONFIG_CPU_FREQ_GOV_SCHEDUTIL)
 extern unsigned int sysctl_sched_energy_aware;
-extern int sched_energy_aware_handler(struct ctl_table *table, int write,
-				 void __user *buffer, size_t *lenp,
-				 loff_t *ppos);
+int sched_energy_aware_handler(struct ctl_table *table, int write,
+		void *buffer, size_t *lenp, loff_t *ppos);
 #endif
 
 #endif /* _LINUX_SCHED_SYSCTL_H */
diff --git a/include/linux/security.h b/include/linux/security.h
index a8d9310472df..6aa229b252ce 100644
--- a/include/linux/security.h
+++ b/include/linux/security.h
@@ -211,7 +211,7 @@ struct request_sock;
 
 #ifdef CONFIG_MMU
 extern int mmap_min_addr_handler(struct ctl_table *table, int write,
-				 void __user *buffer, size_t *lenp, loff_t *ppos);
+				 void *buffer, size_t *lenp, loff_t *ppos);
 #endif
 
 /* security_inode_init_security callback function to write xattrs */
diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h
index 36143ca40b56..f2401e45a3c2 100644
--- a/include/linux/sysctl.h
+++ b/include/linux/sysctl.h
@@ -44,35 +44,26 @@ struct ctl_dir;
 
 extern const int sysctl_vals[];
 
-typedef int proc_handler (struct ctl_table *ctl, int write,
-			  void __user *buffer, size_t *lenp, loff_t *ppos);
-
-extern int proc_dostring(struct ctl_table *, int,
-			 void __user *, size_t *, loff_t *);
-extern int proc_dointvec(struct ctl_table *, int,
-			 void __user *, size_t *, loff_t *);
-extern int proc_douintvec(struct ctl_table *, int,
-			 void __user *, size_t *, loff_t *);
-extern int proc_dointvec_minmax(struct ctl_table *, int,
-				void __user *, size_t *, loff_t *);
-extern int proc_douintvec_minmax(struct ctl_table *table, int write,
-				 void __user *buffer, size_t *lenp,
-				 loff_t *ppos);
-extern int proc_dointvec_jiffies(struct ctl_table *, int,
-				 void __user *, size_t *, loff_t *);
-extern int proc_dointvec_userhz_jiffies(struct ctl_table *, int,
-					void __user *, size_t *, loff_t *);
-extern int proc_dointvec_ms_jiffies(struct ctl_table *, int,
-				    void __user *, size_t *, loff_t *);
-extern int proc_doulongvec_minmax(struct ctl_table *, int,
-				  void __user *, size_t *, loff_t *);
-extern int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int,
-				      void __user *, size_t *, loff_t *);
-extern int proc_do_large_bitmap(struct ctl_table *, int,
-				void __user *, size_t *, loff_t *);
-extern int proc_do_static_key(struct ctl_table *table, int write,
-			      void __user *buffer, size_t *lenp,
-			      loff_t *ppos);
+typedef int proc_handler(struct ctl_table *ctl, int write, void *buffer,
+		size_t *lenp, loff_t *ppos);
+
+int proc_dostring(struct ctl_table *, int, void *, size_t *, loff_t *);
+int proc_dointvec(struct ctl_table *, int, void *, size_t *, loff_t *);
+int proc_douintvec(struct ctl_table *, int, void *, size_t *, loff_t *);
+int proc_dointvec_minmax(struct ctl_table *, int, void *, size_t *, loff_t *);
+int proc_douintvec_minmax(struct ctl_table *table, int write, void *buffer,
+		size_t *lenp, loff_t *ppos);
+int proc_dointvec_jiffies(struct ctl_table *, int, void *, size_t *, loff_t *);
+int proc_dointvec_userhz_jiffies(struct ctl_table *, int, void *, size_t *,
+		loff_t *);
+int proc_dointvec_ms_jiffies(struct ctl_table *, int, void *, size_t *,
+		loff_t *);
+int proc_doulongvec_minmax(struct ctl_table *, int, void *, size_t *, loff_t *);
+int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int, void *,
+		size_t *, loff_t *);
+int proc_do_large_bitmap(struct ctl_table *, int, void *, size_t *, loff_t *);
+int proc_do_static_key(struct ctl_table *table, int write, void *buffer,
+		size_t *lenp, loff_t *ppos);
 
 /*
  * Register a set of sysctl names by calling register_sysctl_table
@@ -246,7 +237,7 @@ static inline void setup_sysctl_set(struct ctl_table_set *p,
 
 #endif /* CONFIG_SYSCTL */
 
-int sysctl_max_threads(struct ctl_table *table, int write,
-		       void __user *buffer, size_t *lenp, loff_t *ppos);
+int sysctl_max_threads(struct ctl_table *table, int write, void *buffer,
+		size_t *lenp, loff_t *ppos);
 
 #endif /* _LINUX_SYSCTL_H */
diff --git a/include/linux/timer.h b/include/linux/timer.h
index 0dc19a8c39c9..07910ae5ddd9 100644
--- a/include/linux/timer.h
+++ b/include/linux/timer.h
@@ -201,8 +201,7 @@ struct ctl_table;
 
 extern unsigned int sysctl_timer_migration;
 int timer_migration_handler(struct ctl_table *table, int write,
-			    void __user *buffer, size_t *lenp,
-			    loff_t *ppos);
+			    void *buffer, size_t *lenp, loff_t *ppos);
 #endif
 
 unsigned long __round_jiffies(unsigned long j, int cpu);
diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h
index 292485f3d24d..cb507151710f 100644
--- a/include/linux/vmstat.h
+++ b/include/linux/vmstat.h
@@ -16,8 +16,8 @@ extern int sysctl_stat_interval;
 #define DISABLE_NUMA_STAT   0
 extern int sysctl_vm_numa_stat;
 DECLARE_STATIC_KEY_TRUE(vm_numa_stat_key);
-extern int sysctl_vm_numa_stat_handler(struct ctl_table *table,
-		int write, void __user *buffer, size_t *length, loff_t *ppos);
+int sysctl_vm_numa_stat_handler(struct ctl_table *table, int write,
+		void *buffer, size_t *length, loff_t *ppos);
 #endif
 
 struct reclaim_stat {
@@ -274,8 +274,8 @@ void cpu_vm_stats_fold(int cpu);
 void refresh_zone_stat_thresholds(void);
 
 struct ctl_table;
-int vmstat_refresh(struct ctl_table *, int write,
-		   void __user *buffer, size_t *lenp, loff_t *ppos);
+int vmstat_refresh(struct ctl_table *, int write, void *buffer, size_t *lenp,
+		loff_t *ppos);
 
 void drain_zonestat(struct zone *zone, struct per_cpu_pageset *);
 
diff --git a/include/linux/writeback.h b/include/linux/writeback.h
index a19d845dd7eb..f8a7e1a850fb 100644
--- a/include/linux/writeback.h
+++ b/include/linux/writeback.h
@@ -362,24 +362,18 @@ extern int vm_highmem_is_dirtyable;
 extern int block_dump;
 extern int laptop_mode;
 
-extern int dirty_background_ratio_handler(struct ctl_table *table, int write,
-		void __user *buffer, size_t *lenp,
-		loff_t *ppos);
-extern int dirty_background_bytes_handler(struct ctl_table *table, int write,
-		void __user *buffer, size_t *lenp,
-		loff_t *ppos);
-extern int dirty_ratio_handler(struct ctl_table *table, int write,
-		void __user *buffer, size_t *lenp,
-		loff_t *ppos);
-extern int dirty_bytes_handler(struct ctl_table *table, int write,
-		void __user *buffer, size_t *lenp,
-		loff_t *ppos);
+int dirty_background_ratio_handler(struct ctl_table *table, int write,
+		void *buffer, size_t *lenp, loff_t *ppos);
+int dirty_background_bytes_handler(struct ctl_table *table, int write,
+		void *buffer, size_t *lenp, loff_t *ppos);
+int dirty_ratio_handler(struct ctl_table *table, int write,
+		void *buffer, size_t *lenp, loff_t *ppos);
+int dirty_bytes_handler(struct ctl_table *table, int write,
+		void *buffer, size_t *lenp, loff_t *ppos);
 int dirtytime_interval_handler(struct ctl_table *table, int write,
-			       void __user *buffer, size_t *lenp, loff_t *ppos);
-
-struct ctl_table;
-int dirty_writeback_centisecs_handler(struct ctl_table *, int,
-				      void __user *, size_t *, loff_t *);
+		void *buffer, size_t *lenp, loff_t *ppos);
+int dirty_writeback_centisecs_handler(struct ctl_table *table, int write,
+		void *buffer, size_t *lenp, loff_t *ppos);
 
 void global_dirty_limits(unsigned long *pbackground, unsigned long *pdirty);
 unsigned long wb_calc_thresh(struct bdi_writeback *wb, unsigned long thresh);
diff --git a/ipc/ipc_sysctl.c b/ipc/ipc_sysctl.c
index affd66537e87..d1b8644bfb88 100644
--- a/ipc/ipc_sysctl.c
+++ b/ipc/ipc_sysctl.c
@@ -24,7 +24,7 @@ static void *get_ipc(struct ctl_table *table)
 
 #ifdef CONFIG_PROC_SYSCTL
 static int proc_ipc_dointvec(struct ctl_table *table, int write,
-	void __user *buffer, size_t *lenp, loff_t *ppos)
+		void *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct ctl_table ipc_table;
 
@@ -35,7 +35,7 @@ static int proc_ipc_dointvec(struct ctl_table *table, int write,
 }
 
 static int proc_ipc_dointvec_minmax(struct ctl_table *table, int write,
-	void __user *buffer, size_t *lenp, loff_t *ppos)
+		void *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct ctl_table ipc_table;
 
@@ -46,7 +46,7 @@ static int proc_ipc_dointvec_minmax(struct ctl_table *table, int write,
 }
 
 static int proc_ipc_dointvec_minmax_orphans(struct ctl_table *table, int write,
-	void __user *buffer, size_t *lenp, loff_t *ppos)
+		void *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct ipc_namespace *ns = current->nsproxy->ipc_ns;
 	int err = proc_ipc_dointvec_minmax(table, write, buffer, lenp, ppos);
@@ -59,7 +59,7 @@ static int proc_ipc_dointvec_minmax_orphans(struct ctl_table *table, int write,
 }
 
 static int proc_ipc_doulongvec_minmax(struct ctl_table *table, int write,
-	void __user *buffer, size_t *lenp, loff_t *ppos)
+		void *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct ctl_table ipc_table;
 	memcpy(&ipc_table, table, sizeof(ipc_table));
@@ -70,7 +70,7 @@ static int proc_ipc_doulongvec_minmax(struct ctl_table *table, int write,
 }
 
 static int proc_ipc_auto_msgmni(struct ctl_table *table, int write,
-	void __user *buffer, size_t *lenp, loff_t *ppos)
+		void *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct ctl_table ipc_table;
 	int dummy = 0;
diff --git a/ipc/mq_sysctl.c b/ipc/mq_sysctl.c
index 7c00f28923a8..72a92a08c848 100644
--- a/ipc/mq_sysctl.c
+++ b/ipc/mq_sysctl.c
@@ -19,7 +19,7 @@ static void *get_mq(struct ctl_table *table)
 }
 
 static int proc_mq_dointvec(struct ctl_table *table, int write,
-			    void __user *buffer, size_t *lenp, loff_t *ppos)
+			    void *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct ctl_table mq_table;
 	memcpy(&mq_table, table, sizeof(mq_table));
@@ -29,7 +29,7 @@ static int proc_mq_dointvec(struct ctl_table *table, int write,
 }
 
 static int proc_mq_dointvec_minmax(struct ctl_table *table, int write,
-	void __user *buffer, size_t *lenp, loff_t *ppos)
+		void *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct ctl_table mq_table;
 	memcpy(&mq_table, table, sizeof(mq_table));
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index cb305e71e7de..977bc69bb1c5 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -1137,16 +1137,13 @@ const struct bpf_verifier_ops cg_dev_verifier_ops = {
  * @head: sysctl table header
  * @table: sysctl table
  * @write: sysctl is being read (= 0) or written (= 1)
- * @buf: pointer to buffer passed by user space
+ * @buf: pointer to buffer (in and out)
  * @pcount: value-result argument: value is size of buffer pointed to by @buf,
  *	result is size of @new_buf if program set new value, initial value
  *	otherwise
  * @ppos: value-result argument: value is position at which read from or write
  *	to sysctl is happening, result is new position if program overrode it,
  *	initial value otherwise
- * @new_buf: pointer to pointer to new buffer that will be allocated if program
- *	overrides new value provided by user space on sysctl write
- *	NOTE: it's caller responsibility to free *new_buf if it was set
  * @type: type of program to be executed
  *
  * Program is run when sysctl is being accessed, either read or written, and
@@ -1157,8 +1154,7 @@ const struct bpf_verifier_ops cg_dev_verifier_ops = {
  */
 int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
 				   struct ctl_table *table, int write,
-				   void __user *buf, size_t *pcount,
-				   loff_t *ppos, void **new_buf,
+				   void **buf, size_t *pcount, loff_t *ppos,
 				   enum bpf_attach_type type)
 {
 	struct bpf_sysctl_kern ctx = {
@@ -1173,36 +1169,28 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
 		.new_updated = 0,
 	};
 	struct cgroup *cgrp;
+	loff_t pos = 0;
 	int ret;
 
 	ctx.cur_val = kmalloc_track_caller(ctx.cur_len, GFP_KERNEL);
-	if (ctx.cur_val) {
-		mm_segment_t old_fs;
-		loff_t pos = 0;
-
-		old_fs = get_fs();
-		set_fs(KERNEL_DS);
-		if (table->proc_handler(table, 0, (void __user *)ctx.cur_val,
-					&ctx.cur_len, &pos)) {
-			/* Let BPF program decide how to proceed. */
-			ctx.cur_len = 0;
-		}
-		set_fs(old_fs);
-	} else {
+	if (!ctx.cur_val ||
+	    table->proc_handler(table, 0, ctx.cur_val, &ctx.cur_len, &pos)) {
 		/* Let BPF program decide how to proceed. */
 		ctx.cur_len = 0;
 	}
 
-	if (write && buf && *pcount) {
+	if (write && *buf && *pcount) {
 		/* BPF program should be able to override new value with a
 		 * buffer bigger than provided by user.
 		 */
 		ctx.new_val = kmalloc_track_caller(PAGE_SIZE, GFP_KERNEL);
 		ctx.new_len = min_t(size_t, PAGE_SIZE, *pcount);
-		if (!ctx.new_val ||
-		    copy_from_user(ctx.new_val, buf, ctx.new_len))
+		if (ctx.new_val) {
+			memcpy(ctx.new_val, *buf, ctx.new_len);
+		} else {
 			/* Let BPF program decide how to proceed. */
 			ctx.new_len = 0;
+		}
 	}
 
 	rcu_read_lock();
@@ -1213,7 +1201,8 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
 	kfree(ctx.cur_val);
 
 	if (ret == 1 && ctx.new_updated) {
-		*new_buf = ctx.new_val;
+		kfree(*buf);
+		*buf = ctx.new_val;
 		*pcount = ctx.new_len;
 	} else {
 		kfree(ctx.new_val);
diff --git a/kernel/events/callchain.c b/kernel/events/callchain.c
index c2b41a263166..bdb1533ada81 100644
--- a/kernel/events/callchain.c
+++ b/kernel/events/callchain.c
@@ -236,7 +236,7 @@ exit_put:
  * sysctl_perf_event_max_contexts_per_stack.
  */
 int perf_event_max_stack_handler(struct ctl_table *table, int write,
-				 void __user *buffer, size_t *lenp, loff_t *ppos)
+				 void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int *value = table->data;
 	int new_value = *value, ret;
diff --git a/kernel/events/core.c b/kernel/events/core.c
index bc9b98a9af9a..f86d46f2c4d9 100644
--- a/kernel/events/core.c
+++ b/kernel/events/core.c
@@ -437,8 +437,7 @@ static void update_perf_cpu_limits(void)
 static bool perf_rotate_context(struct perf_cpu_context *cpuctx);
 
 int perf_proc_update_handler(struct ctl_table *table, int write,
-		void __user *buffer, size_t *lenp,
-		loff_t *ppos)
+		void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int ret;
 	int perf_cpu = sysctl_perf_cpu_time_max_percent;
@@ -462,8 +461,7 @@ int perf_proc_update_handler(struct ctl_table *table, int write,
 int sysctl_perf_cpu_time_max_percent __read_mostly = DEFAULT_CPU_TIME_MAX_PERCENT;
 
 int perf_cpu_time_max_percent_handler(struct ctl_table *table, int write,
-				void __user *buffer, size_t *lenp,
-				loff_t *ppos)
+		void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 
diff --git a/kernel/kprobes.c b/kernel/kprobes.c
index 2625c241ac00..ffbe03a45c16 100644
--- a/kernel/kprobes.c
+++ b/kernel/kprobes.c
@@ -892,7 +892,7 @@ static void unoptimize_all_kprobes(void)
 static DEFINE_MUTEX(kprobe_sysctl_mutex);
 int sysctl_kprobes_optimization;
 int proc_kprobes_optimization_handler(struct ctl_table *table, int write,
-				      void __user *buffer, size_t *length,
+				      void *buffer, size_t *length,
 				      loff_t *ppos)
 {
 	int ret;
diff --git a/kernel/latencytop.c b/kernel/latencytop.c
index 8d1c15832e55..166d7bf49666 100644
--- a/kernel/latencytop.c
+++ b/kernel/latencytop.c
@@ -269,8 +269,8 @@ static int __init init_lstats_procfs(void)
 	return 0;
 }
 
-int sysctl_latencytop(struct ctl_table *table, int write,
-			void __user *buffer, size_t *lenp, loff_t *ppos)
+int sysctl_latencytop(struct ctl_table *table, int write, void *buffer,
+		size_t *lenp, loff_t *ppos)
 {
 	int err;
 
diff --git a/kernel/pid_namespace.c b/kernel/pid_namespace.c
index 01f8ba32cc0c..3ccaba5f15c0 100644
--- a/kernel/pid_namespace.c
+++ b/kernel/pid_namespace.c
@@ -263,7 +263,7 @@ void zap_pid_ns_processes(struct pid_namespace *pid_ns)
 
 #ifdef CONFIG_CHECKPOINT_RESTORE
 static int pid_ns_ctl_handler(struct ctl_table *table, int write,
-		void __user *buffer, size_t *lenp, loff_t *ppos)
+		void *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct pid_namespace *pid_ns = task_active_pid_ns(current);
 	struct ctl_table tmp = *table;
diff --git a/kernel/printk/printk.c b/kernel/printk/printk.c
index 9a9b6156270b..471f649b5868 100644
--- a/kernel/printk/printk.c
+++ b/kernel/printk/printk.c
@@ -173,7 +173,7 @@ __setup("printk.devkmsg=", control_devkmsg);
 char devkmsg_log_str[DEVKMSG_STR_MAX_SIZE] = "ratelimit";
 
 int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write,
-			      void __user *buffer, size_t *lenp, loff_t *ppos)
+			      void *buffer, size_t *lenp, loff_t *ppos)
 {
 	char old_str[DEVKMSG_STR_MAX_SIZE];
 	unsigned int old;
diff --git a/kernel/sched/core.c b/kernel/sched/core.c
index 3a61a3b8eaa9..5c589a2e4d19 100644
--- a/kernel/sched/core.c
+++ b/kernel/sched/core.c
@@ -1110,8 +1110,7 @@ static void uclamp_update_root_tg(void) { }
 #endif
 
 int sysctl_sched_uclamp_handler(struct ctl_table *table, int write,
-				void __user *buffer, size_t *lenp,
-				loff_t *ppos)
+				void *buffer, size_t *lenp, loff_t *ppos)
 {
 	bool update_root_tg = false;
 	int old_min, old_max;
@@ -2723,7 +2722,7 @@ void set_numabalancing_state(bool enabled)
 
 #ifdef CONFIG_PROC_SYSCTL
 int sysctl_numa_balancing(struct ctl_table *table, int write,
-			 void __user *buffer, size_t *lenp, loff_t *ppos)
+			  void *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct ctl_table t;
 	int err;
@@ -2797,8 +2796,8 @@ static void __init init_schedstats(void)
 }
 
 #ifdef CONFIG_PROC_SYSCTL
-int sysctl_schedstats(struct ctl_table *table, int write,
-			 void __user *buffer, size_t *lenp, loff_t *ppos)
+int sysctl_schedstats(struct ctl_table *table, int write, void *buffer,
+		size_t *lenp, loff_t *ppos)
 {
 	struct ctl_table t;
 	int err;
diff --git a/kernel/sched/fair.c b/kernel/sched/fair.c
index 02f323b85b6d..b6077fd5b32f 100644
--- a/kernel/sched/fair.c
+++ b/kernel/sched/fair.c
@@ -645,8 +645,7 @@ struct sched_entity *__pick_last_entity(struct cfs_rq *cfs_rq)
  */
 
 int sched_proc_update_handler(struct ctl_table *table, int write,
-		void __user *buffer, size_t *lenp,
-		loff_t *ppos)
+		void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int ret = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 	unsigned int factor = get_update_sysctl_factor();
diff --git a/kernel/sched/rt.c b/kernel/sched/rt.c
index df11d88c9895..45da29de3ecc 100644
--- a/kernel/sched/rt.c
+++ b/kernel/sched/rt.c
@@ -2714,9 +2714,8 @@ static void sched_rt_do_global(void)
 	def_rt_bandwidth.rt_period = ns_to_ktime(global_rt_period());
 }
 
-int sched_rt_handler(struct ctl_table *table, int write,
-		void __user *buffer, size_t *lenp,
-		loff_t *ppos)
+int sched_rt_handler(struct ctl_table *table, int write, void *buffer,
+		size_t *lenp, loff_t *ppos)
 {
 	int old_period, old_runtime;
 	static DEFINE_MUTEX(mutex);
@@ -2754,9 +2753,8 @@ undo:
 	return ret;
 }
 
-int sched_rr_handler(struct ctl_table *table, int write,
-		void __user *buffer, size_t *lenp,
-		loff_t *ppos)
+int sched_rr_handler(struct ctl_table *table, int write, void *buffer,
+		size_t *lenp, loff_t *ppos)
 {
 	int ret;
 	static DEFINE_MUTEX(mutex);
diff --git a/kernel/sched/topology.c b/kernel/sched/topology.c
index 8344757bba6e..fa64b2ee9fe6 100644
--- a/kernel/sched/topology.c
+++ b/kernel/sched/topology.c
@@ -209,7 +209,7 @@ bool sched_energy_update;
 
 #ifdef CONFIG_PROC_SYSCTL
 int sched_energy_aware_handler(struct ctl_table *table, int write,
-			 void __user *buffer, size_t *lenp, loff_t *ppos)
+		void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int ret, state;
 
diff --git a/kernel/seccomp.c b/kernel/seccomp.c
index 55a6184f5990..d653d8426de9 100644
--- a/kernel/seccomp.c
+++ b/kernel/seccomp.c
@@ -1776,7 +1776,7 @@ static void audit_actions_logged(u32 actions_logged, u32 old_actions_logged,
 }
 
 static int seccomp_actions_logged_handler(struct ctl_table *ro_table, int write,
-					  void __user *buffer, size_t *lenp,
+					  void *buffer, size_t *lenp,
 					  loff_t *ppos)
 {
 	int ret;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 3fafca3ced98..e961286d0e14 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -208,12 +208,10 @@ static int max_extfrag_threshold = 1000;
 #ifdef CONFIG_PROC_SYSCTL
 
 static int _proc_do_string(char *data, int maxlen, int write,
-			   char __user *buffer,
-			   size_t *lenp, loff_t *ppos)
+		char *buffer, size_t *lenp, loff_t *ppos)
 {
 	size_t len;
-	char __user *p;
-	char c;
+	char c, *p;
 
 	if (!data || !maxlen || !*lenp) {
 		*lenp = 0;
@@ -238,8 +236,7 @@ static int _proc_do_string(char *data, int maxlen, int write,
 		*ppos += *lenp;
 		p = buffer;
 		while ((p - buffer) < *lenp && len < maxlen - 1) {
-			if (get_user(c, p++))
-				return -EFAULT;
+			c = *(p++);
 			if (c == 0 || c == '\n')
 				break;
 			data[len++] = c;
@@ -261,11 +258,9 @@ static int _proc_do_string(char *data, int maxlen, int write,
 		if (len > *lenp)
 			len = *lenp;
 		if (len)
-			if (copy_to_user(buffer, data, len))
-				return -EFAULT;
+			memcpy(buffer, data, len);
 		if (len < *lenp) {
-			if (put_user('\n', buffer + len))
-				return -EFAULT;
+			buffer[len] = '\n';
 			len++;
 		}
 		*lenp = len;
@@ -326,13 +321,13 @@ static bool proc_first_pos_non_zero_ignore(loff_t *ppos,
  * Returns 0 on success.
  */
 int proc_dostring(struct ctl_table *table, int write,
-		  void __user *buffer, size_t *lenp, loff_t *ppos)
+		  void *buffer, size_t *lenp, loff_t *ppos)
 {
 	if (write)
 		proc_first_pos_non_zero_ignore(ppos, table);
 
-	return _proc_do_string((char *)(table->data), table->maxlen, write,
-			       (char __user *)buffer, lenp, ppos);
+	return _proc_do_string(table->data, table->maxlen, write, buffer, lenp,
+			ppos);
 }
 
 static size_t proc_skip_spaces(char **buf)
@@ -463,11 +458,10 @@ static int proc_get_long(char **buf, size_t *size,
  * @val: the integer to be converted
  * @neg: sign of the number, %TRUE for negative
  *
- * In case of success %0 is returned and @buf and @size are updated with
- * the amount of bytes written.
+ * In case of success @buf and @size are updated with the amount of bytes
+ * written.
  */
-static int proc_put_long(void __user **buf, size_t *size, unsigned long val,
-			  bool neg)
+static void proc_put_long(void **buf, size_t *size, unsigned long val, bool neg)
 {
 	int len;
 	char tmp[TMPBUFLEN], *p = tmp;
@@ -476,24 +470,22 @@ static int proc_put_long(void __user **buf, size_t *size, unsigned long val,
 	len = strlen(tmp);
 	if (len > *size)
 		len = *size;
-	if (copy_to_user(*buf, tmp, len))
-		return -EFAULT;
+	memcpy(*buf, tmp, len);
 	*size -= len;
 	*buf += len;
-	return 0;
 }
 #undef TMPBUFLEN
 
-static int proc_put_char(void __user **buf, size_t *size, char c)
+static void proc_put_char(void **buf, size_t *size, char c)
 {
 	if (*size) {
-		char __user **buffer = (char __user **)buf;
-		if (put_user(c, *buffer))
-			return -EFAULT;
-		(*size)--, (*buffer)++;
+		char **buffer = (char **)buf;
+		**buffer = c;
+
+		(*size)--;
+		(*buffer)++;
 		*buf = *buffer;
 	}
-	return 0;
 }
 
 static int do_proc_dointvec_conv(bool *negp, unsigned long *lvalp,
@@ -541,7 +533,7 @@ static int do_proc_douintvec_conv(unsigned long *lvalp,
 static const char proc_wspace_sep[] = { ' ', '\t', '\n' };
 
 static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
-		  int write, void __user *buffer,
+		  int write, void *buffer,
 		  size_t *lenp, loff_t *ppos,
 		  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
 			      int write, void *data),
@@ -549,7 +541,7 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
 {
 	int *i, vleft, first = 1, err = 0;
 	size_t left;
-	char *kbuf = NULL, *p;
+	char *p;
 	
 	if (!tbl_data || !table->maxlen || !*lenp || (*ppos && !write)) {
 		*lenp = 0;
@@ -569,9 +561,7 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
 
 		if (left > PAGE_SIZE - 1)
 			left = PAGE_SIZE - 1;
-		p = kbuf = memdup_user_nul(buffer, left);
-		if (IS_ERR(kbuf))
-			return PTR_ERR(kbuf);
+		p = buffer;
 	}
 
 	for (; left && vleft--; i++, first=0) {
@@ -598,24 +588,17 @@ static int __do_proc_dointvec(void *tbl_data, struct ctl_table *table,
 				break;
 			}
 			if (!first)
-				err = proc_put_char(&buffer, &left, '\t');
-			if (err)
-				break;
-			err = proc_put_long(&buffer, &left, lval, neg);
-			if (err)
-				break;
+				proc_put_char(&buffer, &left, '\t');
+			proc_put_long(&buffer, &left, lval, neg);
 		}
 	}
 
 	if (!write && !first && left && !err)
-		err = proc_put_char(&buffer, &left, '\n');
+		proc_put_char(&buffer, &left, '\n');
 	if (write && !err && left)
 		left -= proc_skip_spaces(&p);
-	if (write) {
-		kfree(kbuf);
-		if (first)
-			return err ? : -EINVAL;
-	}
+	if (write && first)
+		return err ? : -EINVAL;
 	*lenp -= left;
 out:
 	*ppos += *lenp;
@@ -623,7 +606,7 @@ out:
 }
 
 static int do_proc_dointvec(struct ctl_table *table, int write,
-		  void __user *buffer, size_t *lenp, loff_t *ppos,
+		  void *buffer, size_t *lenp, loff_t *ppos,
 		  int (*conv)(bool *negp, unsigned long *lvalp, int *valp,
 			      int write, void *data),
 		  void *data)
@@ -634,7 +617,7 @@ static int do_proc_dointvec(struct ctl_table *table, int write,
 
 static int do_proc_douintvec_w(unsigned int *tbl_data,
 			       struct ctl_table *table,
-			       void __user *buffer,
+			       void *buffer,
 			       size_t *lenp, loff_t *ppos,
 			       int (*conv)(unsigned long *lvalp,
 					   unsigned int *valp,
@@ -645,7 +628,7 @@ static int do_proc_douintvec_w(unsigned int *tbl_data,
 	int err = 0;
 	size_t left;
 	bool neg;
-	char *kbuf = NULL, *p;
+	char *p = buffer;
 
 	left = *lenp;
 
@@ -655,10 +638,6 @@ static int do_proc_douintvec_w(unsigned int *tbl_data,
 	if (left > PAGE_SIZE - 1)
 		left = PAGE_SIZE - 1;
 
-	p = kbuf = memdup_user_nul(buffer, left);
-	if (IS_ERR(kbuf))
-		return -EINVAL;
-
 	left -= proc_skip_spaces(&p);
 	if (!left) {
 		err = -EINVAL;
@@ -682,7 +661,6 @@ static int do_proc_douintvec_w(unsigned int *tbl_data,
 		left -= proc_skip_spaces(&p);
 
 out_free:
-	kfree(kbuf);
 	if (err)
 		return -EINVAL;
 
@@ -694,7 +672,7 @@ bail_early:
 	return err;
 }
 
-static int do_proc_douintvec_r(unsigned int *tbl_data, void __user *buffer,
+static int do_proc_douintvec_r(unsigned int *tbl_data, void *buffer,
 			       size_t *lenp, loff_t *ppos,
 			       int (*conv)(unsigned long *lvalp,
 					   unsigned int *valp,
@@ -712,11 +690,11 @@ static int do_proc_douintvec_r(unsigned int *tbl_data, void __user *buffer,
 		goto out;
 	}
 
-	err = proc_put_long(&buffer, &left, lval, false);
-	if (err || !left)
+	proc_put_long(&buffer, &left, lval, false);
+	if (!left)
 		goto out;
 
-	err = proc_put_char(&buffer, &left, '\n');
+	proc_put_char(&buffer, &left, '\n');
 
 out:
 	*lenp -= left;
@@ -726,7 +704,7 @@ out:
 }
 
 static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table,
-			       int write, void __user *buffer,
+			       int write, void *buffer,
 			       size_t *lenp, loff_t *ppos,
 			       int (*conv)(unsigned long *lvalp,
 					   unsigned int *valp,
@@ -762,7 +740,7 @@ static int __do_proc_douintvec(void *tbl_data, struct ctl_table *table,
 }
 
 static int do_proc_douintvec(struct ctl_table *table, int write,
-			     void __user *buffer, size_t *lenp, loff_t *ppos,
+			     void *buffer, size_t *lenp, loff_t *ppos,
 			     int (*conv)(unsigned long *lvalp,
 					 unsigned int *valp,
 					 int write, void *data),
@@ -785,16 +763,15 @@ static int do_proc_douintvec(struct ctl_table *table, int write,
  *
  * Returns 0 on success.
  */
-int proc_dointvec(struct ctl_table *table, int write,
-		     void __user *buffer, size_t *lenp, loff_t *ppos)
+int proc_dointvec(struct ctl_table *table, int write, void *buffer,
+		  size_t *lenp, loff_t *ppos)
 {
 	return do_proc_dointvec(table, write, buffer, lenp, ppos, NULL, NULL);
 }
 
 #ifdef CONFIG_COMPACTION
 static int proc_dointvec_minmax_warn_RT_change(struct ctl_table *table,
-					       int write, void __user *buffer,
-					       size_t *lenp, loff_t *ppos)
+		int write, void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int ret, old;
 
@@ -826,8 +803,8 @@ static int proc_dointvec_minmax_warn_RT_change(struct ctl_table *table,
  *
  * Returns 0 on success.
  */
-int proc_douintvec(struct ctl_table *table, int write,
-		     void __user *buffer, size_t *lenp, loff_t *ppos)
+int proc_douintvec(struct ctl_table *table, int write, void *buffer,
+		size_t *lenp, loff_t *ppos)
 {
 	return do_proc_douintvec(table, write, buffer, lenp, ppos,
 				 do_proc_douintvec_conv, NULL);
@@ -838,7 +815,7 @@ int proc_douintvec(struct ctl_table *table, int write,
  * This means we can safely use a temporary.
  */
 static int proc_taint(struct ctl_table *table, int write,
-			       void __user *buffer, size_t *lenp, loff_t *ppos)
+			       void *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct ctl_table t;
 	unsigned long tmptaint = get_taint();
@@ -870,7 +847,7 @@ static int proc_taint(struct ctl_table *table, int write,
 
 #ifdef CONFIG_PRINTK
 static int proc_dointvec_minmax_sysadmin(struct ctl_table *table, int write,
-				void __user *buffer, size_t *lenp, loff_t *ppos)
+				void *buffer, size_t *lenp, loff_t *ppos)
 {
 	if (write && !capable(CAP_SYS_ADMIN))
 		return -EPERM;
@@ -936,7 +913,7 @@ static int do_proc_dointvec_minmax_conv(bool *negp, unsigned long *lvalp,
  * Returns 0 on success or -EINVAL on write when the range check fails.
  */
 int proc_dointvec_minmax(struct ctl_table *table, int write,
-		  void __user *buffer, size_t *lenp, loff_t *ppos)
+		  void *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct do_proc_dointvec_minmax_conv_param param = {
 		.min = (int *) table->extra1,
@@ -1005,7 +982,7 @@ static int do_proc_douintvec_minmax_conv(unsigned long *lvalp,
  * Returns 0 on success or -ERANGE on write when the range check fails.
  */
 int proc_douintvec_minmax(struct ctl_table *table, int write,
-			  void __user *buffer, size_t *lenp, loff_t *ppos)
+			  void *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct do_proc_douintvec_minmax_conv_param param = {
 		.min = (unsigned int *) table->extra1,
@@ -1036,7 +1013,7 @@ static int do_proc_dopipe_max_size_conv(unsigned long *lvalp,
 }
 
 static int proc_dopipe_max_size(struct ctl_table *table, int write,
-				void __user *buffer, size_t *lenp, loff_t *ppos)
+				void *buffer, size_t *lenp, loff_t *ppos)
 {
 	return do_proc_douintvec(table, write, buffer, lenp, ppos,
 				 do_proc_dopipe_max_size_conv, NULL);
@@ -1057,7 +1034,7 @@ static void validate_coredump_safety(void)
 }
 
 static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
-		void __user *buffer, size_t *lenp, loff_t *ppos)
+		void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int error = proc_dointvec_minmax(table, write, buffer, lenp, ppos);
 	if (!error)
@@ -1067,7 +1044,7 @@ static int proc_dointvec_minmax_coredump(struct ctl_table *table, int write,
 
 #ifdef CONFIG_COREDUMP
 static int proc_dostring_coredump(struct ctl_table *table, int write,
-		  void __user *buffer, size_t *lenp, loff_t *ppos)
+		  void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int error = proc_dostring(table, write, buffer, lenp, ppos);
 	if (!error)
@@ -1078,7 +1055,7 @@ static int proc_dostring_coredump(struct ctl_table *table, int write,
 
 #ifdef CONFIG_MAGIC_SYSRQ
 static int sysrq_sysctl_handler(struct ctl_table *table, int write,
-				void __user *buffer, size_t *lenp, loff_t *ppos)
+				void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int tmp, ret;
 
@@ -1096,16 +1073,14 @@ static int sysrq_sysctl_handler(struct ctl_table *table, int write,
 }
 #endif
 
-static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int write,
-				     void __user *buffer,
-				     size_t *lenp, loff_t *ppos,
-				     unsigned long convmul,
-				     unsigned long convdiv)
+static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table,
+		int write, void *buffer, size_t *lenp, loff_t *ppos,
+		unsigned long convmul, unsigned long convdiv)
 {
 	unsigned long *i, *min, *max;
 	int vleft, first = 1, err = 0;
 	size_t left;
-	char *kbuf = NULL, *p;
+	char *p;
 
 	if (!data || !table->maxlen || !*lenp || (*ppos && !write)) {
 		*lenp = 0;
@@ -1124,9 +1099,7 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int
 
 		if (left > PAGE_SIZE - 1)
 			left = PAGE_SIZE - 1;
-		p = kbuf = memdup_user_nul(buffer, left);
-		if (IS_ERR(kbuf))
-			return PTR_ERR(kbuf);
+		p = buffer;
 	}
 
 	for (; left && vleft--; i++, first = 0) {
@@ -1154,26 +1127,18 @@ static int __do_proc_doulongvec_minmax(void *data, struct ctl_table *table, int
 			*i = val;
 		} else {
 			val = convdiv * (*i) / convmul;
-			if (!first) {
-				err = proc_put_char(&buffer, &left, '\t');
-				if (err)
-					break;
-			}
-			err = proc_put_long(&buffer, &left, val, false);
-			if (err)
-				break;
+			if (!first)
+				proc_put_char(&buffer, &left, '\t');
+			proc_put_long(&buffer, &left, val, false);
 		}
 	}
 
 	if (!write && !first && left && !err)
-		err = proc_put_char(&buffer, &left, '\n');
+		proc_put_char(&buffer, &left, '\n');
 	if (write && !err)
 		left -= proc_skip_spaces(&p);
-	if (write) {
-		kfree(kbuf);
-		if (first)
-			return err ? : -EINVAL;
-	}
+	if (write && first)
+		return err ? : -EINVAL;
 	*lenp -= left;
 out:
 	*ppos += *lenp;
@@ -1181,10 +1146,8 @@ out:
 }
 
 static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
-				     void __user *buffer,
-				     size_t *lenp, loff_t *ppos,
-				     unsigned long convmul,
-				     unsigned long convdiv)
+		void *buffer, size_t *lenp, loff_t *ppos, unsigned long convmul,
+		unsigned long convdiv)
 {
 	return __do_proc_doulongvec_minmax(table->data, table, write,
 			buffer, lenp, ppos, convmul, convdiv);
@@ -1207,7 +1170,7 @@ static int do_proc_doulongvec_minmax(struct ctl_table *table, int write,
  * Returns 0 on success.
  */
 int proc_doulongvec_minmax(struct ctl_table *table, int write,
-			   void __user *buffer, size_t *lenp, loff_t *ppos)
+			   void *buffer, size_t *lenp, loff_t *ppos)
 {
     return do_proc_doulongvec_minmax(table, write, buffer, lenp, ppos, 1l, 1l);
 }
@@ -1230,8 +1193,7 @@ int proc_doulongvec_minmax(struct ctl_table *table, int write,
  * Returns 0 on success.
  */
 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
-				      void __user *buffer,
-				      size_t *lenp, loff_t *ppos)
+				      void *buffer, size_t *lenp, loff_t *ppos)
 {
     return do_proc_doulongvec_minmax(table, write, buffer,
 				     lenp, ppos, HZ, 1000l);
@@ -1325,7 +1287,7 @@ static int do_proc_dointvec_ms_jiffies_conv(bool *negp, unsigned long *lvalp,
  * Returns 0 on success.
  */
 int proc_dointvec_jiffies(struct ctl_table *table, int write,
-			  void __user *buffer, size_t *lenp, loff_t *ppos)
+			  void *buffer, size_t *lenp, loff_t *ppos)
 {
     return do_proc_dointvec(table,write,buffer,lenp,ppos,
 		    	    do_proc_dointvec_jiffies_conv,NULL);
@@ -1347,7 +1309,7 @@ int proc_dointvec_jiffies(struct ctl_table *table, int write,
  * Returns 0 on success.
  */
 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
-				 void __user *buffer, size_t *lenp, loff_t *ppos)
+				 void *buffer, size_t *lenp, loff_t *ppos)
 {
     return do_proc_dointvec(table,write,buffer,lenp,ppos,
 		    	    do_proc_dointvec_userhz_jiffies_conv,NULL);
@@ -1369,15 +1331,15 @@ int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
  *
  * Returns 0 on success.
  */
-int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
-			     void __user *buffer, size_t *lenp, loff_t *ppos)
+int proc_dointvec_ms_jiffies(struct ctl_table *table, int write, void *buffer,
+		size_t *lenp, loff_t *ppos)
 {
 	return do_proc_dointvec(table, write, buffer, lenp, ppos,
 				do_proc_dointvec_ms_jiffies_conv, NULL);
 }
 
-static int proc_do_cad_pid(struct ctl_table *table, int write,
-			   void __user *buffer, size_t *lenp, loff_t *ppos)
+static int proc_do_cad_pid(struct ctl_table *table, int write, void *buffer,
+		size_t *lenp, loff_t *ppos)
 {
 	struct pid *new_pid;
 	pid_t tmp;
@@ -1416,7 +1378,7 @@ static int proc_do_cad_pid(struct ctl_table *table, int write,
  * Returns 0 on success.
  */
 int proc_do_large_bitmap(struct ctl_table *table, int write,
-			 void __user *buffer, size_t *lenp, loff_t *ppos)
+			 void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int err = 0;
 	bool first = 1;
@@ -1432,7 +1394,7 @@ int proc_do_large_bitmap(struct ctl_table *table, int write,
 	}
 
 	if (write) {
-		char *kbuf, *p;
+		char *p = buffer;
 		size_t skipped = 0;
 
 		if (left > PAGE_SIZE - 1) {
@@ -1441,15 +1403,9 @@ int proc_do_large_bitmap(struct ctl_table *table, int write,
 			skipped = *lenp - left;
 		}
 
-		p = kbuf = memdup_user_nul(buffer, left);
-		if (IS_ERR(kbuf))
-			return PTR_ERR(kbuf);
-
 		tmp_bitmap = bitmap_zalloc(bitmap_len, GFP_KERNEL);
-		if (!tmp_bitmap) {
-			kfree(kbuf);
+		if (!tmp_bitmap)
 			return -ENOMEM;
-		}
 		proc_skip_char(&p, &left, '\n');
 		while (!err && left) {
 			unsigned long val_a, val_b;
@@ -1513,7 +1469,6 @@ int proc_do_large_bitmap(struct ctl_table *table, int write,
 			first = 0;
 			proc_skip_char(&p, &left, '\n');
 		}
-		kfree(kbuf);
 		left += skipped;
 	} else {
 		unsigned long bit_a, bit_b = 0;
@@ -1525,27 +1480,17 @@ int proc_do_large_bitmap(struct ctl_table *table, int write,
 			bit_b = find_next_zero_bit(bitmap, bitmap_len,
 						   bit_a + 1) - 1;
 
-			if (!first) {
-				err = proc_put_char(&buffer, &left, ',');
-				if (err)
-					break;
-			}
-			err = proc_put_long(&buffer, &left, bit_a, false);
-			if (err)
-				break;
+			if (!first)
+				proc_put_char(&buffer, &left, ',');
+			proc_put_long(&buffer, &left, bit_a, false);
 			if (bit_a != bit_b) {
-				err = proc_put_char(&buffer, &left, '-');
-				if (err)
-					break;
-				err = proc_put_long(&buffer, &left, bit_b, false);
-				if (err)
-					break;
+				proc_put_char(&buffer, &left, '-');
+				proc_put_long(&buffer, &left, bit_b, false);
 			}
 
 			first = 0; bit_b++;
 		}
-		if (!err)
-			err = proc_put_char(&buffer, &left, '\n');
+		proc_put_char(&buffer, &left, '\n');
 	}
 
 	if (!err) {
@@ -1566,68 +1511,67 @@ int proc_do_large_bitmap(struct ctl_table *table, int write,
 #else /* CONFIG_PROC_SYSCTL */
 
 int proc_dostring(struct ctl_table *table, int write,
-		  void __user *buffer, size_t *lenp, loff_t *ppos)
+		  void *buffer, size_t *lenp, loff_t *ppos)
 {
 	return -ENOSYS;
 }
 
 int proc_dointvec(struct ctl_table *table, int write,
-		  void __user *buffer, size_t *lenp, loff_t *ppos)
+		  void *buffer, size_t *lenp, loff_t *ppos)
 {
 	return -ENOSYS;
 }
 
 int proc_douintvec(struct ctl_table *table, int write,
-		  void __user *buffer, size_t *lenp, loff_t *ppos)
+		  void *buffer, size_t *lenp, loff_t *ppos)
 {
 	return -ENOSYS;
 }
 
 int proc_dointvec_minmax(struct ctl_table *table, int write,
-		    void __user *buffer, size_t *lenp, loff_t *ppos)
+		    void *buffer, size_t *lenp, loff_t *ppos)
 {
 	return -ENOSYS;
 }
 
 int proc_douintvec_minmax(struct ctl_table *table, int write,
-			  void __user *buffer, size_t *lenp, loff_t *ppos)
+			  void *buffer, size_t *lenp, loff_t *ppos)
 {
 	return -ENOSYS;
 }
 
 int proc_dointvec_jiffies(struct ctl_table *table, int write,
-		    void __user *buffer, size_t *lenp, loff_t *ppos)
+		    void *buffer, size_t *lenp, loff_t *ppos)
 {
 	return -ENOSYS;
 }
 
 int proc_dointvec_userhz_jiffies(struct ctl_table *table, int write,
-		    void __user *buffer, size_t *lenp, loff_t *ppos)
+		    void *buffer, size_t *lenp, loff_t *ppos)
 {
 	return -ENOSYS;
 }
 
 int proc_dointvec_ms_jiffies(struct ctl_table *table, int write,
-			     void __user *buffer, size_t *lenp, loff_t *ppos)
+			     void *buffer, size_t *lenp, loff_t *ppos)
 {
 	return -ENOSYS;
 }
 
 int proc_doulongvec_minmax(struct ctl_table *table, int write,
-		    void __user *buffer, size_t *lenp, loff_t *ppos)
+		    void *buffer, size_t *lenp, loff_t *ppos)
 {
 	return -ENOSYS;
 }
 
 int proc_doulongvec_ms_jiffies_minmax(struct ctl_table *table, int write,
-				      void __user *buffer,
-				      size_t *lenp, loff_t *ppos)
+				      void *buffer, size_t *lenp, loff_t *ppos)
 {
-    return -ENOSYS;
+	return -ENOSYS;
 }
 
 int proc_do_large_bitmap(struct ctl_table *table, int write,
-			 void __user *buffer, size_t *lenp, loff_t *ppos)
+			 void *buffer, size_t *lenp, loff_t *ppos)
 {
 	return -ENOSYS;
 }
@@ -1636,8 +1580,7 @@ int proc_do_large_bitmap(struct ctl_table *table, int write,
 
 #if defined(CONFIG_SYSCTL)
 int proc_do_static_key(struct ctl_table *table, int write,
-		       void __user *buffer, size_t *lenp,
-		       loff_t *ppos)
+		       void *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct static_key *key = (struct static_key *)table->data;
 	static DEFINE_MUTEX(static_key_mutex);
diff --git a/kernel/time/timer.c b/kernel/time/timer.c
index a5221abb4594..398e6eadb861 100644
--- a/kernel/time/timer.c
+++ b/kernel/time/timer.c
@@ -249,8 +249,7 @@ void timers_update_nohz(void)
 }
 
 int timer_migration_handler(struct ctl_table *table, int write,
-			    void __user *buffer, size_t *lenp,
-			    loff_t *ppos)
+			    void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int ret;
 
diff --git a/kernel/trace/trace.c b/kernel/trace/trace.c
index 8d2b98812625..167a74a15b1a 100644
--- a/kernel/trace/trace.c
+++ b/kernel/trace/trace.c
@@ -2661,7 +2661,7 @@ static void output_printk(struct trace_event_buffer *fbuffer)
 }
 
 int tracepoint_printk_sysctl(struct ctl_table *table, int write,
-			     void __user *buffer, size_t *lenp,
+			     void *buffer, size_t *lenp,
 			     loff_t *ppos)
 {
 	int save_tracepoint_printk;
diff --git a/kernel/umh.c b/kernel/umh.c
index 7f255b5a8845..9788ed481a6a 100644
--- a/kernel/umh.c
+++ b/kernel/umh.c
@@ -630,7 +630,7 @@ int call_usermodehelper(const char *path, char **argv, char **envp, int wait)
 EXPORT_SYMBOL(call_usermodehelper);
 
 static int proc_cap_handler(struct ctl_table *table, int write,
-			 void __user *buffer, size_t *lenp, loff_t *ppos)
+			 void *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct ctl_table t;
 	unsigned long cap_array[_KERNEL_CAPABILITY_U32S];
diff --git a/kernel/utsname_sysctl.c b/kernel/utsname_sysctl.c
index 3732c888a949..4ca61d49885b 100644
--- a/kernel/utsname_sysctl.c
+++ b/kernel/utsname_sysctl.c
@@ -30,7 +30,7 @@ static void *get_uts(struct ctl_table *table)
  *	to observe. Should this be in kernel/sys.c ????
  */
 static int proc_do_uts_string(struct ctl_table *table, int write,
-		  void __user *buffer, size_t *lenp, loff_t *ppos)
+		  void *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct ctl_table uts_table;
 	int r;
diff --git a/kernel/watchdog.c b/kernel/watchdog.c
index b6b1f54a7837..53ff2c81b084 100644
--- a/kernel/watchdog.c
+++ b/kernel/watchdog.c
@@ -661,7 +661,7 @@ static void proc_watchdog_update(void)
  * proc_soft_watchdog | soft_watchdog_user_enabled | SOFT_WATCHDOG_ENABLED
  */
 static int proc_watchdog_common(int which, struct ctl_table *table, int write,
-				void __user *buffer, size_t *lenp, loff_t *ppos)
+				void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int err, old, *param = table->data;
 
@@ -688,7 +688,7 @@ static int proc_watchdog_common(int which, struct ctl_table *table, int write,
  * /proc/sys/kernel/watchdog
  */
 int proc_watchdog(struct ctl_table *table, int write,
-		  void __user *buffer, size_t *lenp, loff_t *ppos)
+		  void *buffer, size_t *lenp, loff_t *ppos)
 {
 	return proc_watchdog_common(NMI_WATCHDOG_ENABLED|SOFT_WATCHDOG_ENABLED,
 				    table, write, buffer, lenp, ppos);
@@ -698,7 +698,7 @@ int proc_watchdog(struct ctl_table *table, int write,
  * /proc/sys/kernel/nmi_watchdog
  */
 int proc_nmi_watchdog(struct ctl_table *table, int write,
-		      void __user *buffer, size_t *lenp, loff_t *ppos)
+		      void *buffer, size_t *lenp, loff_t *ppos)
 {
 	if (!nmi_watchdog_available && write)
 		return -ENOTSUPP;
@@ -710,7 +710,7 @@ int proc_nmi_watchdog(struct ctl_table *table, int write,
  * /proc/sys/kernel/soft_watchdog
  */
 int proc_soft_watchdog(struct ctl_table *table, int write,
-			void __user *buffer, size_t *lenp, loff_t *ppos)
+			void *buffer, size_t *lenp, loff_t *ppos)
 {
 	return proc_watchdog_common(SOFT_WATCHDOG_ENABLED,
 				    table, write, buffer, lenp, ppos);
@@ -720,7 +720,7 @@ int proc_soft_watchdog(struct ctl_table *table, int write,
  * /proc/sys/kernel/watchdog_thresh
  */
 int proc_watchdog_thresh(struct ctl_table *table, int write,
-			 void __user *buffer, size_t *lenp, loff_t *ppos)
+			 void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int err, old;
 
@@ -743,7 +743,7 @@ int proc_watchdog_thresh(struct ctl_table *table, int write,
  * been brought online, if desired.
  */
 int proc_watchdog_cpumask(struct ctl_table *table, int write,
-			  void __user *buffer, size_t *lenp, loff_t *ppos)
+			  void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int err;
 
diff --git a/mm/compaction.c b/mm/compaction.c
index 46f0fcc93081..d8cfb7b99a83 100644
--- a/mm/compaction.c
+++ b/mm/compaction.c
@@ -2463,7 +2463,7 @@ int sysctl_compact_memory;
  * /proc/sys/vm/compact_memory
  */
 int sysctl_compaction_handler(struct ctl_table *table, int write,
-			void __user *buffer, size_t *length, loff_t *ppos)
+			void *buffer, size_t *length, loff_t *ppos)
 {
 	if (write)
 		compact_nodes();
diff --git a/mm/hugetlb.c b/mm/hugetlb.c
index cd459155d28a..2277c5728b1f 100644
--- a/mm/hugetlb.c
+++ b/mm/hugetlb.c
@@ -3352,7 +3352,7 @@ static unsigned int cpuset_mems_nr(unsigned int *array)
 #ifdef CONFIG_SYSCTL
 static int hugetlb_sysctl_handler_common(bool obey_mempolicy,
 			 struct ctl_table *table, int write,
-			 void __user *buffer, size_t *length, loff_t *ppos)
+			 void *buffer, size_t *length, loff_t *ppos)
 {
 	struct hstate *h = &default_hstate;
 	unsigned long tmp = h->max_huge_pages;
@@ -3375,7 +3375,7 @@ out:
 }
 
 int hugetlb_sysctl_handler(struct ctl_table *table, int write,
-			  void __user *buffer, size_t *length, loff_t *ppos)
+			  void *buffer, size_t *length, loff_t *ppos)
 {
 
 	return hugetlb_sysctl_handler_common(false, table, write,
@@ -3384,7 +3384,7 @@ int hugetlb_sysctl_handler(struct ctl_table *table, int write,
 
 #ifdef CONFIG_NUMA
 int hugetlb_mempolicy_sysctl_handler(struct ctl_table *table, int write,
-			  void __user *buffer, size_t *length, loff_t *ppos)
+			  void *buffer, size_t *length, loff_t *ppos)
 {
 	return hugetlb_sysctl_handler_common(true, table, write,
 							buffer, length, ppos);
@@ -3392,8 +3392,7 @@ int hugetlb_mempolicy_sysctl_handler(struct ctl_table *table, int write,
 #endif /* CONFIG_NUMA */
 
 int hugetlb_overcommit_handler(struct ctl_table *table, int write,
-			void __user *buffer,
-			size_t *length, loff_t *ppos)
+		void *buffer, size_t *length, loff_t *ppos)
 {
 	struct hstate *h = &default_hstate;
 	unsigned long tmp;
diff --git a/mm/page-writeback.c b/mm/page-writeback.c
index 7326b54ab728..d3ee4c4dafac 100644
--- a/mm/page-writeback.c
+++ b/mm/page-writeback.c
@@ -512,8 +512,7 @@ bool node_dirty_ok(struct pglist_data *pgdat)
 }
 
 int dirty_background_ratio_handler(struct ctl_table *table, int write,
-		void __user *buffer, size_t *lenp,
-		loff_t *ppos)
+		void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int ret;
 
@@ -524,8 +523,7 @@ int dirty_background_ratio_handler(struct ctl_table *table, int write,
 }
 
 int dirty_background_bytes_handler(struct ctl_table *table, int write,
-		void __user *buffer, size_t *lenp,
-		loff_t *ppos)
+		void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int ret;
 
@@ -535,9 +533,8 @@ int dirty_background_bytes_handler(struct ctl_table *table, int write,
 	return ret;
 }
 
-int dirty_ratio_handler(struct ctl_table *table, int write,
-		void __user *buffer, size_t *lenp,
-		loff_t *ppos)
+int dirty_ratio_handler(struct ctl_table *table, int write, void *buffer,
+		size_t *lenp, loff_t *ppos)
 {
 	int old_ratio = vm_dirty_ratio;
 	int ret;
@@ -551,8 +548,7 @@ int dirty_ratio_handler(struct ctl_table *table, int write,
 }
 
 int dirty_bytes_handler(struct ctl_table *table, int write,
-		void __user *buffer, size_t *lenp,
-		loff_t *ppos)
+		void *buffer, size_t *lenp, loff_t *ppos)
 {
 	unsigned long old_bytes = vm_dirty_bytes;
 	int ret;
@@ -1972,7 +1968,7 @@ bool wb_over_bg_thresh(struct bdi_writeback *wb)
  * sysctl handler for /proc/sys/vm/dirty_writeback_centisecs
  */
 int dirty_writeback_centisecs_handler(struct ctl_table *table, int write,
-	void __user *buffer, size_t *length, loff_t *ppos)
+		void *buffer, size_t *length, loff_t *ppos)
 {
 	unsigned int old_interval = dirty_writeback_interval;
 	int ret;
diff --git a/mm/page_alloc.c b/mm/page_alloc.c
index 62c1550cd43e..0c43e9ae5004 100644
--- a/mm/page_alloc.c
+++ b/mm/page_alloc.c
@@ -5546,21 +5546,11 @@ char numa_zonelist_order[] = "Node";
  * sysctl handler for numa_zonelist_order
  */
 int numa_zonelist_order_handler(struct ctl_table *table, int write,
-		void __user *buffer, size_t *length,
-		loff_t *ppos)
+		void *buffer, size_t *length, loff_t *ppos)
 {
-	char *str;
-	int ret;
-
-	if (!write)
-		return proc_dostring(table, write, buffer, length, ppos);
-	str = memdup_user_nul(buffer, 16);
-	if (IS_ERR(str))
-		return PTR_ERR(str);
-
-	ret = __parse_numa_zonelist_order(str);
-	kfree(str);
-	return ret;
+	if (write)
+		return __parse_numa_zonelist_order(buffer);
+	return proc_dostring(table, write, buffer, length, ppos);
 }
 
 
@@ -7963,7 +7953,7 @@ core_initcall(init_per_zone_wmark_min)
  *	changes.
  */
 int min_free_kbytes_sysctl_handler(struct ctl_table *table, int write,
-	void __user *buffer, size_t *length, loff_t *ppos)
+		void *buffer, size_t *length, loff_t *ppos)
 {
 	int rc;
 
@@ -7979,7 +7969,7 @@ int min_free_kbytes_sysctl_handler(struct ctl_table *table, int write,
 }
 
 int watermark_scale_factor_sysctl_handler(struct ctl_table *table, int write,
-	void __user *buffer, size_t *length, loff_t *ppos)
+		void *buffer, size_t *length, loff_t *ppos)
 {
 	int rc;
 
@@ -8009,7 +7999,7 @@ static void setup_min_unmapped_ratio(void)
 
 
 int sysctl_min_unmapped_ratio_sysctl_handler(struct ctl_table *table, int write,
-	void __user *buffer, size_t *length, loff_t *ppos)
+		void *buffer, size_t *length, loff_t *ppos)
 {
 	int rc;
 
@@ -8036,7 +8026,7 @@ static void setup_min_slab_ratio(void)
 }
 
 int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *table, int write,
-	void __user *buffer, size_t *length, loff_t *ppos)
+		void *buffer, size_t *length, loff_t *ppos)
 {
 	int rc;
 
@@ -8060,7 +8050,7 @@ int sysctl_min_slab_ratio_sysctl_handler(struct ctl_table *table, int write,
  * if in function of the boot time zone sizes.
  */
 int lowmem_reserve_ratio_sysctl_handler(struct ctl_table *table, int write,
-	void __user *buffer, size_t *length, loff_t *ppos)
+		void *buffer, size_t *length, loff_t *ppos)
 {
 	proc_dointvec_minmax(table, write, buffer, length, ppos);
 	setup_per_zone_lowmem_reserve();
@@ -8082,7 +8072,7 @@ static void __zone_pcp_update(struct zone *zone)
  * pagelist can have before it gets flushed back to buddy allocator.
  */
 int percpu_pagelist_fraction_sysctl_handler(struct ctl_table *table, int write,
-	void __user *buffer, size_t *length, loff_t *ppos)
+		void *buffer, size_t *length, loff_t *ppos)
 {
 	struct zone *zone;
 	int old_percpu_pagelist_fraction;
diff --git a/mm/util.c b/mm/util.c
index 988d11e6c17c..8defc8ec141f 100644
--- a/mm/util.c
+++ b/mm/util.c
@@ -717,9 +717,8 @@ int sysctl_max_map_count __read_mostly = DEFAULT_MAX_MAP_COUNT;
 unsigned long sysctl_user_reserve_kbytes __read_mostly = 1UL << 17; /* 128MB */
 unsigned long sysctl_admin_reserve_kbytes __read_mostly = 1UL << 13; /* 8MB */
 
-int overcommit_ratio_handler(struct ctl_table *table, int write,
-			     void __user *buffer, size_t *lenp,
-			     loff_t *ppos)
+int overcommit_ratio_handler(struct ctl_table *table, int write, void *buffer,
+		size_t *lenp, loff_t *ppos)
 {
 	int ret;
 
@@ -729,9 +728,8 @@ int overcommit_ratio_handler(struct ctl_table *table, int write,
 	return ret;
 }
 
-int overcommit_kbytes_handler(struct ctl_table *table, int write,
-			     void __user *buffer, size_t *lenp,
-			     loff_t *ppos)
+int overcommit_kbytes_handler(struct ctl_table *table, int write, void *buffer,
+		size_t *lenp, loff_t *ppos)
 {
 	int ret;
 
diff --git a/mm/vmstat.c b/mm/vmstat.c
index 96d21a792b57..c03a8c914922 100644
--- a/mm/vmstat.c
+++ b/mm/vmstat.c
@@ -76,7 +76,7 @@ static void invalid_numa_statistics(void)
 static DEFINE_MUTEX(vm_numa_stat_lock);
 
 int sysctl_vm_numa_stat_handler(struct ctl_table *table, int write,
-		void __user *buffer, size_t *length, loff_t *ppos)
+		void *buffer, size_t *length, loff_t *ppos)
 {
 	int ret, oldval;
 
@@ -1751,7 +1751,7 @@ static void refresh_vm_stats(struct work_struct *work)
 }
 
 int vmstat_refresh(struct ctl_table *table, int write,
-		   void __user *buffer, size_t *lenp, loff_t *ppos)
+		   void *buffer, size_t *lenp, loff_t *ppos)
 {
 	long val;
 	int err;
diff --git a/net/bridge/br_netfilter_hooks.c b/net/bridge/br_netfilter_hooks.c
index 59980ecfc962..04c3f9a82650 100644
--- a/net/bridge/br_netfilter_hooks.c
+++ b/net/bridge/br_netfilter_hooks.c
@@ -1027,7 +1027,7 @@ int br_nf_hook_thresh(unsigned int hook, struct net *net,
 #ifdef CONFIG_SYSCTL
 static
 int brnf_sysctl_call_tables(struct ctl_table *ctl, int write,
-			    void __user *buffer, size_t *lenp, loff_t *ppos)
+			    void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int ret;
 
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index 39d37d0ef575..3f2263e79e4b 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -3379,7 +3379,7 @@ EXPORT_SYMBOL(neigh_app_ns);
 static int unres_qlen_max = INT_MAX / SKB_TRUESIZE(ETH_FRAME_LEN);
 
 static int proc_unres_qlen(struct ctl_table *ctl, int write,
-			   void __user *buffer, size_t *lenp, loff_t *ppos)
+			   void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int size, ret;
 	struct ctl_table tmp = *ctl;
@@ -3443,8 +3443,8 @@ static void neigh_proc_update(struct ctl_table *ctl, int write)
 }
 
 static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
-					   void __user *buffer,
-					   size_t *lenp, loff_t *ppos)
+					   void *buffer, size_t *lenp,
+					   loff_t *ppos)
 {
 	struct ctl_table tmp = *ctl;
 	int ret;
@@ -3457,8 +3457,8 @@ static int neigh_proc_dointvec_zero_intmax(struct ctl_table *ctl, int write,
 	return ret;
 }
 
-int neigh_proc_dointvec(struct ctl_table *ctl, int write,
-			void __user *buffer, size_t *lenp, loff_t *ppos)
+int neigh_proc_dointvec(struct ctl_table *ctl, int write, void *buffer,
+			size_t *lenp, loff_t *ppos)
 {
 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
 
@@ -3467,8 +3467,7 @@ int neigh_proc_dointvec(struct ctl_table *ctl, int write,
 }
 EXPORT_SYMBOL(neigh_proc_dointvec);
 
-int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write,
-				void __user *buffer,
+int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write, void *buffer,
 				size_t *lenp, loff_t *ppos)
 {
 	int ret = proc_dointvec_jiffies(ctl, write, buffer, lenp, ppos);
@@ -3479,8 +3478,8 @@ int neigh_proc_dointvec_jiffies(struct ctl_table *ctl, int write,
 EXPORT_SYMBOL(neigh_proc_dointvec_jiffies);
 
 static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
-					      void __user *buffer,
-					      size_t *lenp, loff_t *ppos)
+					      void *buffer, size_t *lenp,
+					      loff_t *ppos)
 {
 	int ret = proc_dointvec_userhz_jiffies(ctl, write, buffer, lenp, ppos);
 
@@ -3489,8 +3488,7 @@ static int neigh_proc_dointvec_userhz_jiffies(struct ctl_table *ctl, int write,
 }
 
 int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
-				   void __user *buffer,
-				   size_t *lenp, loff_t *ppos)
+				   void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int ret = proc_dointvec_ms_jiffies(ctl, write, buffer, lenp, ppos);
 
@@ -3500,8 +3498,8 @@ int neigh_proc_dointvec_ms_jiffies(struct ctl_table *ctl, int write,
 EXPORT_SYMBOL(neigh_proc_dointvec_ms_jiffies);
 
 static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
-					  void __user *buffer,
-					  size_t *lenp, loff_t *ppos)
+					  void *buffer, size_t *lenp,
+					  loff_t *ppos)
 {
 	int ret = proc_unres_qlen(ctl, write, buffer, lenp, ppos);
 
@@ -3510,8 +3508,8 @@ static int neigh_proc_dointvec_unres_qlen(struct ctl_table *ctl, int write,
 }
 
 static int neigh_proc_base_reachable_time(struct ctl_table *ctl, int write,
-					  void __user *buffer,
-					  size_t *lenp, loff_t *ppos)
+					  void *buffer, size_t *lenp,
+					  loff_t *ppos)
 {
 	struct neigh_parms *p = ctl->extra2;
 	int ret;
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 9f9e00ba3ad7..0ddb13a6282b 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -45,7 +45,7 @@ EXPORT_SYMBOL(sysctl_devconf_inherit_init_net);
 
 #ifdef CONFIG_RPS
 static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
-				void __user *buffer, size_t *lenp, loff_t *ppos)
+				void *buffer, size_t *lenp, loff_t *ppos)
 {
 	unsigned int orig_size, size;
 	int ret, i;
@@ -115,8 +115,7 @@ static int rps_sock_flow_sysctl(struct ctl_table *table, int write,
 static DEFINE_MUTEX(flow_limit_update_mutex);
 
 static int flow_limit_cpu_sysctl(struct ctl_table *table, int write,
-				 void __user *buffer, size_t *lenp,
-				 loff_t *ppos)
+				 void *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct sd_flow_limit *cur;
 	struct softnet_data *sd;
@@ -180,10 +179,7 @@ write_unlock:
 		}
 		if (len < *lenp)
 			kbuf[len++] = '\n';
-		if (copy_to_user(buffer, kbuf, len)) {
-			ret = -EFAULT;
-			goto done;
-		}
+		memcpy(buffer, kbuf, len);
 		*lenp = len;
 		*ppos += len;
 	}
@@ -194,8 +190,7 @@ done:
 }
 
 static int flow_limit_table_len_sysctl(struct ctl_table *table, int write,
-				       void __user *buffer, size_t *lenp,
-				       loff_t *ppos)
+				       void *buffer, size_t *lenp, loff_t *ppos)
 {
 	unsigned int old, *ptr;
 	int ret;
@@ -217,7 +212,7 @@ static int flow_limit_table_len_sysctl(struct ctl_table *table, int write,
 
 #ifdef CONFIG_NET_SCHED
 static int set_default_qdisc(struct ctl_table *table, int write,
-			     void __user *buffer, size_t *lenp, loff_t *ppos)
+			     void *buffer, size_t *lenp, loff_t *ppos)
 {
 	char id[IFNAMSIZ];
 	struct ctl_table tbl = {
@@ -236,7 +231,7 @@ static int set_default_qdisc(struct ctl_table *table, int write,
 #endif
 
 static int proc_do_dev_weight(struct ctl_table *table, int write,
-			   void __user *buffer, size_t *lenp, loff_t *ppos)
+			   void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int ret;
 
@@ -251,7 +246,7 @@ static int proc_do_dev_weight(struct ctl_table *table, int write,
 }
 
 static int proc_do_rss_key(struct ctl_table *table, int write,
-			   void __user *buffer, size_t *lenp, loff_t *ppos)
+			   void *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct ctl_table fake_table;
 	char buf[NETDEV_RSS_KEY_LEN * 3];
@@ -264,7 +259,7 @@ static int proc_do_rss_key(struct ctl_table *table, int write,
 
 #ifdef CONFIG_BPF_JIT
 static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write,
-					   void __user *buffer, size_t *lenp,
+					   void *buffer, size_t *lenp,
 					   loff_t *ppos)
 {
 	int ret, jit_enable = *(int *)table->data;
@@ -291,8 +286,7 @@ static int proc_dointvec_minmax_bpf_enable(struct ctl_table *table, int write,
 # ifdef CONFIG_HAVE_EBPF_JIT
 static int
 proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write,
-				    void __user *buffer, size_t *lenp,
-				    loff_t *ppos)
+				    void *buffer, size_t *lenp, loff_t *ppos)
 {
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
@@ -303,8 +297,7 @@ proc_dointvec_minmax_bpf_restricted(struct ctl_table *table, int write,
 
 static int
 proc_dolongvec_minmax_bpf_restricted(struct ctl_table *table, int write,
-				     void __user *buffer, size_t *lenp,
-				     loff_t *ppos)
+				     void *buffer, size_t *lenp, loff_t *ppos)
 {
 	if (!capable(CAP_SYS_ADMIN))
 		return -EPERM;
diff --git a/net/decnet/dn_dev.c b/net/decnet/dn_dev.c
index cca7ae712995..65abcf1b3210 100644
--- a/net/decnet/dn_dev.c
+++ b/net/decnet/dn_dev.c
@@ -160,8 +160,8 @@ static int max_t3[] = { 8191 }; /* Must fit in 16 bits when multiplied by BCT3MU
 static int min_priority[1];
 static int max_priority[] = { 127 }; /* From DECnet spec */
 
-static int dn_forwarding_proc(struct ctl_table *, int,
-			void __user *, size_t *, loff_t *);
+static int dn_forwarding_proc(struct ctl_table *, int, void *, size_t *,
+		loff_t *);
 static struct dn_dev_sysctl_table {
 	struct ctl_table_header *sysctl_header;
 	struct ctl_table dn_dev_vars[5];
@@ -245,8 +245,7 @@ static void dn_dev_sysctl_unregister(struct dn_dev_parms *parms)
 }
 
 static int dn_forwarding_proc(struct ctl_table *table, int write,
-				void __user *buffer,
-				size_t *lenp, loff_t *ppos)
+		void *buffer, size_t *lenp, loff_t *ppos)
 {
 #ifdef CONFIG_DECNET_ROUTER
 	struct net_device *dev = table->extra1;
diff --git a/net/decnet/sysctl_net_decnet.c b/net/decnet/sysctl_net_decnet.c
index 55bf64a22b59..deae519bdeec 100644
--- a/net/decnet/sysctl_net_decnet.c
+++ b/net/decnet/sysctl_net_decnet.c
@@ -134,8 +134,7 @@ static int parse_addr(__le16 *addr, char *str)
 }
 
 static int dn_node_address_handler(struct ctl_table *table, int write,
-				void __user *buffer,
-				size_t *lenp, loff_t *ppos)
+		void *buffer, size_t *lenp, loff_t *ppos)
 {
 	char addr[DN_ASCBUF_LEN];
 	size_t len;
@@ -148,10 +147,7 @@ static int dn_node_address_handler(struct ctl_table *table, int write,
 
 	if (write) {
 		len = (*lenp < DN_ASCBUF_LEN) ? *lenp : (DN_ASCBUF_LEN-1);
-
-		if (copy_from_user(addr, buffer, len))
-			return -EFAULT;
-
+		memcpy(addr, buffer, len);
 		addr[len] = 0;
 		strip_it(addr);
 
@@ -173,11 +169,9 @@ static int dn_node_address_handler(struct ctl_table *table, int write,
 	len = strlen(addr);
 	addr[len++] = '\n';
 
-	if (len > *lenp) len = *lenp;
-
-	if (copy_to_user(buffer, addr, len))
-		return -EFAULT;
-
+	if (len > *lenp)
+		len = *lenp;
+	memcpy(buffer, addr, len);
 	*lenp = len;
 	*ppos += len;
 
@@ -185,8 +179,7 @@ static int dn_node_address_handler(struct ctl_table *table, int write,
 }
 
 static int dn_def_dev_handler(struct ctl_table *table, int write,
-				void __user *buffer,
-				size_t *lenp, loff_t *ppos)
+		void *buffer, size_t *lenp, loff_t *ppos)
 {
 	size_t len;
 	struct net_device *dev;
@@ -201,9 +194,7 @@ static int dn_def_dev_handler(struct ctl_table *table, int write,
 		if (*lenp > 16)
 			return -E2BIG;
 
-		if (copy_from_user(devname, buffer, *lenp))
-			return -EFAULT;
-
+		memcpy(devname, buffer, *lenp);
 		devname[*lenp] = 0;
 		strip_it(devname);
 
@@ -238,9 +229,7 @@ static int dn_def_dev_handler(struct ctl_table *table, int write,
 
 	if (len > *lenp) len = *lenp;
 
-	if (copy_to_user(buffer, devname, len))
-		return -EFAULT;
-
+	memcpy(buffer, devname, len);
 	*lenp = len;
 	*ppos += len;
 
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index 30fa42f5997d..a118978d222c 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -2361,8 +2361,7 @@ static int devinet_conf_ifindex(struct net *net, struct ipv4_devconf *cnf)
 }
 
 static int devinet_conf_proc(struct ctl_table *ctl, int write,
-			     void __user *buffer,
-			     size_t *lenp, loff_t *ppos)
+			     void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int old_value = *(int *)ctl->data;
 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
@@ -2414,8 +2413,7 @@ static int devinet_conf_proc(struct ctl_table *ctl, int write,
 }
 
 static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
-				  void __user *buffer,
-				  size_t *lenp, loff_t *ppos)
+				  void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int *valp = ctl->data;
 	int val = *valp;
@@ -2458,8 +2456,7 @@ static int devinet_sysctl_forward(struct ctl_table *ctl, int write,
 }
 
 static int ipv4_doint_and_flush(struct ctl_table *ctl, int write,
-				void __user *buffer,
-				size_t *lenp, loff_t *ppos)
+				void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int *valp = ctl->data;
 	int val = *valp;
diff --git a/net/ipv4/route.c b/net/ipv4/route.c
index 788c69d9bfe0..041f4dcac440 100644
--- a/net/ipv4/route.c
+++ b/net/ipv4/route.c
@@ -3336,8 +3336,7 @@ static int ip_rt_gc_elasticity __read_mostly	= 8;
 static int ip_min_valid_pmtu __read_mostly	= IPV4_MIN_MTU;
 
 static int ipv4_sysctl_rtcache_flush(struct ctl_table *__ctl, int write,
-					void __user *buffer,
-					size_t *lenp, loff_t *ppos)
+		void *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct net *net = (struct net *)__ctl->extra1;
 
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 81b267e990a1..868e317cc324 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -71,8 +71,7 @@ static void set_local_port_range(struct net *net, int range[2])
 
 /* Validate changes from /proc interface. */
 static int ipv4_local_port_range(struct ctl_table *table, int write,
-				 void __user *buffer,
-				 size_t *lenp, loff_t *ppos)
+				 void *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct net *net =
 		container_of(table->data, struct net, ipv4.ip_local_ports.range);
@@ -107,7 +106,7 @@ static int ipv4_local_port_range(struct ctl_table *table, int write,
 
 /* Validate changes from /proc interface. */
 static int ipv4_privileged_ports(struct ctl_table *table, int write,
-				void __user *buffer, size_t *lenp, loff_t *ppos)
+				void *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct net *net = container_of(table->data, struct net,
 	    ipv4.sysctl_ip_prot_sock);
@@ -168,8 +167,7 @@ static void set_ping_group_range(struct ctl_table *table, kgid_t low, kgid_t hig
 
 /* Validate changes from /proc interface. */
 static int ipv4_ping_group_range(struct ctl_table *table, int write,
-				 void __user *buffer,
-				 size_t *lenp, loff_t *ppos)
+				 void *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct user_namespace *user_ns = current_user_ns();
 	int ret;
@@ -204,8 +202,7 @@ static int ipv4_ping_group_range(struct ctl_table *table, int write,
 }
 
 static int ipv4_fwd_update_priority(struct ctl_table *table, int write,
-				    void __user *buffer,
-				    size_t *lenp, loff_t *ppos)
+				    void *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct net *net;
 	int ret;
@@ -221,7 +218,7 @@ static int ipv4_fwd_update_priority(struct ctl_table *table, int write,
 }
 
 static int proc_tcp_congestion_control(struct ctl_table *ctl, int write,
-				       void __user *buffer, size_t *lenp, loff_t *ppos)
+				       void *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct net *net = container_of(ctl->data, struct net,
 				       ipv4.tcp_congestion_control);
@@ -241,9 +238,8 @@ static int proc_tcp_congestion_control(struct ctl_table *ctl, int write,
 }
 
 static int proc_tcp_available_congestion_control(struct ctl_table *ctl,
-						 int write,
-						 void __user *buffer, size_t *lenp,
-						 loff_t *ppos)
+						 int write, void *buffer,
+						 size_t *lenp, loff_t *ppos)
 {
 	struct ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX, };
 	int ret;
@@ -258,9 +254,8 @@ static int proc_tcp_available_congestion_control(struct ctl_table *ctl,
 }
 
 static int proc_allowed_congestion_control(struct ctl_table *ctl,
-					   int write,
-					   void __user *buffer, size_t *lenp,
-					   loff_t *ppos)
+					   int write, void *buffer,
+					   size_t *lenp, loff_t *ppos)
 {
 	struct ctl_table tbl = { .maxlen = TCP_CA_BUF_MAX };
 	int ret;
@@ -296,8 +291,7 @@ static int sscanf_key(char *buf, __le32 *key)
 }
 
 static int proc_tcp_fastopen_key(struct ctl_table *table, int write,
-				 void __user *buffer, size_t *lenp,
-				 loff_t *ppos)
+				 void *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct net *net = container_of(table->data, struct net,
 	    ipv4.sysctl_tcp_fastopen);
@@ -399,7 +393,7 @@ static void proc_configure_early_demux(int enabled, int protocol)
 }
 
 static int proc_tcp_early_demux(struct ctl_table *table, int write,
-				void __user *buffer, size_t *lenp, loff_t *ppos)
+				void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int ret = 0;
 
@@ -415,7 +409,7 @@ static int proc_tcp_early_demux(struct ctl_table *table, int write,
 }
 
 static int proc_udp_early_demux(struct ctl_table *table, int write,
-				void __user *buffer, size_t *lenp, loff_t *ppos)
+				void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int ret = 0;
 
@@ -431,8 +425,7 @@ static int proc_udp_early_demux(struct ctl_table *table, int write,
 }
 
 static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table,
-					     int write,
-					     void __user *buffer,
+					     int write, void *buffer,
 					     size_t *lenp, loff_t *ppos)
 {
 	struct net *net = container_of(table->data, struct net,
@@ -447,8 +440,7 @@ static int proc_tfo_blackhole_detect_timeout(struct ctl_table *table,
 }
 
 static int proc_tcp_available_ulp(struct ctl_table *ctl,
-				  int write,
-				  void __user *buffer, size_t *lenp,
+				  int write, void *buffer, size_t *lenp,
 				  loff_t *ppos)
 {
 	struct ctl_table tbl = { .maxlen = TCP_ULP_BUF_MAX, };
@@ -466,7 +458,7 @@ static int proc_tcp_available_ulp(struct ctl_table *ctl,
 
 #ifdef CONFIG_IP_ROUTE_MULTIPATH
 static int proc_fib_multipath_hash_policy(struct ctl_table *table, int write,
-					  void __user *buffer, size_t *lenp,
+					  void *buffer, size_t *lenp,
 					  loff_t *ppos)
 {
 	struct net *net = container_of(table->data, struct net,
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 24e319dfb510..9d0e89bccb90 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -6108,9 +6108,8 @@ static void ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 
 #ifdef CONFIG_SYSCTL
 
-static
-int addrconf_sysctl_forward(struct ctl_table *ctl, int write,
-			   void __user *buffer, size_t *lenp, loff_t *ppos)
+static int addrconf_sysctl_forward(struct ctl_table *ctl, int write,
+		void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int *valp = ctl->data;
 	int val = *valp;
@@ -6134,9 +6133,8 @@ int addrconf_sysctl_forward(struct ctl_table *ctl, int write,
 	return ret;
 }
 
-static
-int addrconf_sysctl_mtu(struct ctl_table *ctl, int write,
-			void __user *buffer, size_t *lenp, loff_t *ppos)
+static int addrconf_sysctl_mtu(struct ctl_table *ctl, int write,
+		void *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct inet6_dev *idev = ctl->extra1;
 	int min_mtu = IPV6_MIN_MTU;
@@ -6206,9 +6204,8 @@ static int addrconf_disable_ipv6(struct ctl_table *table, int *p, int newf)
 	return 0;
 }
 
-static
-int addrconf_sysctl_disable(struct ctl_table *ctl, int write,
-			    void __user *buffer, size_t *lenp, loff_t *ppos)
+static int addrconf_sysctl_disable(struct ctl_table *ctl, int write,
+		void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int *valp = ctl->data;
 	int val = *valp;
@@ -6232,9 +6229,8 @@ int addrconf_sysctl_disable(struct ctl_table *ctl, int write,
 	return ret;
 }
 
-static
-int addrconf_sysctl_proxy_ndp(struct ctl_table *ctl, int write,
-			      void __user *buffer, size_t *lenp, loff_t *ppos)
+static int addrconf_sysctl_proxy_ndp(struct ctl_table *ctl, int write,
+		void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int *valp = ctl->data;
 	int ret;
@@ -6275,7 +6271,7 @@ int addrconf_sysctl_proxy_ndp(struct ctl_table *ctl, int write,
 }
 
 static int addrconf_sysctl_addr_gen_mode(struct ctl_table *ctl, int write,
-					 void __user *buffer, size_t *lenp,
+					 void *buffer, size_t *lenp,
 					 loff_t *ppos)
 {
 	int ret = 0;
@@ -6337,7 +6333,7 @@ out:
 }
 
 static int addrconf_sysctl_stable_secret(struct ctl_table *ctl, int write,
-					 void __user *buffer, size_t *lenp,
+					 void *buffer, size_t *lenp,
 					 loff_t *ppos)
 {
 	int err;
@@ -6404,8 +6400,7 @@ out:
 
 static
 int addrconf_sysctl_ignore_routes_with_linkdown(struct ctl_table *ctl,
-						int write,
-						void __user *buffer,
+						int write, void *buffer,
 						size_t *lenp,
 						loff_t *ppos)
 {
@@ -6505,10 +6500,8 @@ int addrconf_disable_policy(struct ctl_table *ctl, int *valp, int val)
 	return 0;
 }
 
-static
-int addrconf_sysctl_disable_policy(struct ctl_table *ctl, int write,
-				   void __user *buffer, size_t *lenp,
-				   loff_t *ppos)
+static int addrconf_sysctl_disable_policy(struct ctl_table *ctl, int write,
+				   void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int *valp = ctl->data;
 	int val = *valp;
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 1ecd4e9b0bdf..58f1255295d3 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1835,7 +1835,8 @@ static void ndisc_warn_deprecated_sysctl(struct ctl_table *ctl,
 	}
 }
 
-int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, void __user *buffer, size_t *lenp, loff_t *ppos)
+int ndisc_ifinfo_sysctl_change(struct ctl_table *ctl, int write, void *buffer,
+		size_t *lenp, loff_t *ppos)
 {
 	struct net_device *dev = ctl->extra1;
 	struct inet6_dev *idev;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 310cbddaa533..acdb31e38412 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -6088,9 +6088,8 @@ static int rt6_stats_seq_show(struct seq_file *seq, void *v)
 
 #ifdef CONFIG_SYSCTL
 
-static
-int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
-			      void __user *buffer, size_t *lenp, loff_t *ppos)
+static int ipv6_sysctl_rtcache_flush(struct ctl_table *ctl, int write,
+			      void *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct net *net;
 	int delay;
diff --git a/net/ipv6/sysctl_net_ipv6.c b/net/ipv6/sysctl_net_ipv6.c
index 63b657aa8d29..fac2135aa47b 100644
--- a/net/ipv6/sysctl_net_ipv6.c
+++ b/net/ipv6/sysctl_net_ipv6.c
@@ -26,8 +26,7 @@ static int auto_flowlabels_min;
 static int auto_flowlabels_max = IP6_AUTO_FLOW_LABEL_MAX;
 
 static int proc_rt6_multipath_hash_policy(struct ctl_table *table, int write,
-					  void __user *buffer, size_t *lenp,
-					  loff_t *ppos)
+					  void *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct net *net;
 	int ret;
diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index 4701edffb1f7..a42e4ed5ab0e 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -1362,8 +1362,7 @@ done:
 	(&((struct mpls_dev *)0)->field)
 
 static int mpls_conf_proc(struct ctl_table *ctl, int write,
-			  void __user *buffer,
-			  size_t *lenp, loff_t *ppos)
+			  void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int oval = *(int *)ctl->data;
 	int ret = proc_dointvec(ctl, write, buffer, lenp, ppos);
@@ -2594,7 +2593,7 @@ nolabels:
 }
 
 static int mpls_platform_labels(struct ctl_table *table, int write,
-				void __user *buffer, size_t *lenp, loff_t *ppos)
+				void *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct net *net = table->data;
 	int platform_labels = net->mpls.platform_labels;
diff --git a/net/netfilter/ipvs/ip_vs_ctl.c b/net/netfilter/ipvs/ip_vs_ctl.c
index 8d14a1acbc37..412656c34f20 100644
--- a/net/netfilter/ipvs/ip_vs_ctl.c
+++ b/net/netfilter/ipvs/ip_vs_ctl.c
@@ -1736,7 +1736,7 @@ static int three = 3;
 
 static int
 proc_do_defense_mode(struct ctl_table *table, int write,
-		     void __user *buffer, size_t *lenp, loff_t *ppos)
+		     void *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct netns_ipvs *ipvs = table->extra2;
 	int *valp = table->data;
@@ -1763,7 +1763,7 @@ proc_do_defense_mode(struct ctl_table *table, int write,
 
 static int
 proc_do_sync_threshold(struct ctl_table *table, int write,
-		       void __user *buffer, size_t *lenp, loff_t *ppos)
+		       void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int *valp = table->data;
 	int val[2];
@@ -1788,7 +1788,7 @@ proc_do_sync_threshold(struct ctl_table *table, int write,
 
 static int
 proc_do_sync_ports(struct ctl_table *table, int write,
-		   void __user *buffer, size_t *lenp, loff_t *ppos)
+		   void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int *valp = table->data;
 	int val = *valp;
diff --git a/net/netfilter/nf_conntrack_standalone.c b/net/netfilter/nf_conntrack_standalone.c
index 9b57330c81f8..31b027b12ff3 100644
--- a/net/netfilter/nf_conntrack_standalone.c
+++ b/net/netfilter/nf_conntrack_standalone.c
@@ -517,7 +517,7 @@ static unsigned int nf_conntrack_htable_size_user __read_mostly;
 
 static int
 nf_conntrack_hash_sysctl(struct ctl_table *table, int write,
-			 void __user *buffer, size_t *lenp, loff_t *ppos)
+			 void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int ret;
 
diff --git a/net/netfilter/nf_log.c b/net/netfilter/nf_log.c
index bb25d4c794c7..6cb9f9474b05 100644
--- a/net/netfilter/nf_log.c
+++ b/net/netfilter/nf_log.c
@@ -414,7 +414,7 @@ static struct ctl_table nf_log_sysctl_ftable[] = {
 };
 
 static int nf_log_proc_dostring(struct ctl_table *table, int write,
-			 void __user *buffer, size_t *lenp, loff_t *ppos)
+			 void *buffer, size_t *lenp, loff_t *ppos)
 {
 	const struct nf_logger *logger;
 	char buf[NFLOGGER_NAME_LEN];
diff --git a/net/phonet/sysctl.c b/net/phonet/sysctl.c
index 251e750fd9aa..0d0bf41381c2 100644
--- a/net/phonet/sysctl.c
+++ b/net/phonet/sysctl.c
@@ -49,8 +49,7 @@ void phonet_get_local_port_range(int *min, int *max)
 }
 
 static int proc_local_port_range(struct ctl_table *table, int write,
-				void __user *buffer,
-				size_t *lenp, loff_t *ppos)
+				 void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int ret;
 	int range[2] = {local_port_range[0], local_port_range[1]};
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 66121bc6f34e..46782fac4c16 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -62,8 +62,7 @@ static atomic_t rds_tcp_unloading = ATOMIC_INIT(0);
 static struct kmem_cache *rds_tcp_conn_slab;
 
 static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write,
-				 void __user *buffer, size_t *lenp,
-				 loff_t *fpos);
+				 void *buffer, size_t *lenp, loff_t *fpos);
 
 static int rds_tcp_min_sndbuf = SOCK_MIN_SNDBUF;
 static int rds_tcp_min_rcvbuf = SOCK_MIN_RCVBUF;
@@ -676,8 +675,7 @@ static void rds_tcp_sysctl_reset(struct net *net)
 }
 
 static int rds_tcp_skbuf_handler(struct ctl_table *ctl, int write,
-				 void __user *buffer, size_t *lenp,
-				 loff_t *fpos)
+				 void *buffer, size_t *lenp, loff_t *fpos)
 {
 	struct net *net = current->nsproxy->net_ns;
 	int err;
diff --git a/net/sctp/sysctl.c b/net/sctp/sysctl.c
index 4740aa70e652..c16c80963e55 100644
--- a/net/sctp/sysctl.c
+++ b/net/sctp/sysctl.c
@@ -43,20 +43,15 @@ static unsigned long max_autoclose_max =
 	? UINT_MAX : MAX_SCHEDULE_TIMEOUT / HZ;
 
 static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write,
-				void __user *buffer, size_t *lenp,
-				loff_t *ppos);
+				 void *buffer, size_t *lenp, loff_t *ppos);
 static int proc_sctp_do_rto_min(struct ctl_table *ctl, int write,
-				void __user *buffer, size_t *lenp,
-				loff_t *ppos);
-static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write,
-				void __user *buffer, size_t *lenp,
-				loff_t *ppos);
+				void *buffer, size_t *lenp, loff_t *ppos);
+static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write, void *buffer,
+				size_t *lenp, loff_t *ppos);
 static int proc_sctp_do_alpha_beta(struct ctl_table *ctl, int write,
-				   void __user *buffer, size_t *lenp,
-				   loff_t *ppos);
+				   void *buffer, size_t *lenp, loff_t *ppos);
 static int proc_sctp_do_auth(struct ctl_table *ctl, int write,
-			     void __user *buffer, size_t *lenp,
-			     loff_t *ppos);
+			     void *buffer, size_t *lenp, loff_t *ppos);
 
 static struct ctl_table sctp_table[] = {
 	{
@@ -343,8 +338,7 @@ static struct ctl_table sctp_net_table[] = {
 };
 
 static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write,
-				void __user *buffer, size_t *lenp,
-				loff_t *ppos)
+				 void *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct net *net = current->nsproxy->net_ns;
 	struct ctl_table tbl;
@@ -389,8 +383,7 @@ static int proc_sctp_do_hmac_alg(struct ctl_table *ctl, int write,
 }
 
 static int proc_sctp_do_rto_min(struct ctl_table *ctl, int write,
-				void __user *buffer, size_t *lenp,
-				loff_t *ppos)
+				void *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct net *net = current->nsproxy->net_ns;
 	unsigned int min = *(unsigned int *) ctl->extra1;
@@ -418,8 +411,7 @@ static int proc_sctp_do_rto_min(struct ctl_table *ctl, int write,
 }
 
 static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write,
-				void __user *buffer, size_t *lenp,
-				loff_t *ppos)
+				void *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct net *net = current->nsproxy->net_ns;
 	unsigned int min = *(unsigned int *) ctl->extra1;
@@ -447,8 +439,7 @@ static int proc_sctp_do_rto_max(struct ctl_table *ctl, int write,
 }
 
 static int proc_sctp_do_alpha_beta(struct ctl_table *ctl, int write,
-				   void __user *buffer, size_t *lenp,
-				   loff_t *ppos)
+				   void *buffer, size_t *lenp, loff_t *ppos)
 {
 	if (write)
 		pr_warn_once("Changing rto_alpha or rto_beta may lead to "
@@ -458,8 +449,7 @@ static int proc_sctp_do_alpha_beta(struct ctl_table *ctl, int write,
 }
 
 static int proc_sctp_do_auth(struct ctl_table *ctl, int write,
-			     void __user *buffer, size_t *lenp,
-			     loff_t *ppos)
+			     void *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct net *net = current->nsproxy->net_ns;
 	struct ctl_table tbl;
diff --git a/net/sunrpc/sysctl.c b/net/sunrpc/sysctl.c
index d75f17b56f0e..999eee1ed61c 100644
--- a/net/sunrpc/sysctl.c
+++ b/net/sunrpc/sysctl.c
@@ -60,7 +60,7 @@ rpc_unregister_sysctl(void)
 }
 
 static int proc_do_xprt(struct ctl_table *table, int write,
-			void __user *buffer, size_t *lenp, loff_t *ppos)
+			void *buffer, size_t *lenp, loff_t *ppos)
 {
 	char tmpbuf[256];
 	size_t len;
@@ -70,15 +70,15 @@ static int proc_do_xprt(struct ctl_table *table, int write,
 		return 0;
 	}
 	len = svc_print_xprts(tmpbuf, sizeof(tmpbuf));
-	return simple_read_from_buffer(buffer, *lenp, ppos, tmpbuf, len);
+	return memory_read_from_buffer(buffer, *lenp, ppos, tmpbuf, len);
 }
 
 static int
-proc_dodebug(struct ctl_table *table, int write,
-				void __user *buffer, size_t *lenp, loff_t *ppos)
+proc_dodebug(struct ctl_table *table, int write, void *buffer, size_t *lenp,
+	     loff_t *ppos)
 {
-	char		tmpbuf[20], c, *s = NULL;
-	char __user *p;
+	char		tmpbuf[20], *s = NULL;
+	char *p;
 	unsigned int	value;
 	size_t		left, len;
 
@@ -90,18 +90,17 @@ proc_dodebug(struct ctl_table *table, int write,
 	left = *lenp;
 
 	if (write) {
-		if (!access_ok(buffer, left))
-			return -EFAULT;
 		p = buffer;
-		while (left && __get_user(c, p) >= 0 && isspace(c))
-			left--, p++;
+		while (left && isspace(*p)) {
+			left--;
+			p++;
+		}
 		if (!left)
 			goto done;
 
 		if (left > sizeof(tmpbuf) - 1)
 			return -EINVAL;
-		if (copy_from_user(tmpbuf, p, left))
-			return -EFAULT;
+		memcpy(tmpbuf, p, left);
 		tmpbuf[left] = '\0';
 
 		value = simple_strtol(tmpbuf, &s, 0);
@@ -121,11 +120,9 @@ proc_dodebug(struct ctl_table *table, int write,
 		len = sprintf(tmpbuf, "0x%04x", *(unsigned int *) table->data);
 		if (len > left)
 			len = left;
-		if (copy_to_user(buffer, tmpbuf, len))
-			return -EFAULT;
+		memcpy(buffer, tmpbuf, len);
 		if ((left -= len) > 0) {
-			if (put_user('\n', (char __user *)buffer + len))
-				return -EFAULT;
+			*((char *)buffer + len) = '\n';
 			left--;
 		}
 	}
diff --git a/net/sunrpc/xprtrdma/svc_rdma.c b/net/sunrpc/xprtrdma/svc_rdma.c
index 97bca509a391..526da5d4710b 100644
--- a/net/sunrpc/xprtrdma/svc_rdma.c
+++ b/net/sunrpc/xprtrdma/svc_rdma.c
@@ -80,8 +80,7 @@ atomic_t rdma_stat_sq_prod;
  * current value.
  */
 static int read_reset_stat(struct ctl_table *table, int write,
-			   void __user *buffer, size_t *lenp,
-			   loff_t *ppos)
+			   void *buffer, size_t *lenp, loff_t *ppos)
 {
 	atomic_t *stat = (atomic_t *)table->data;
 
@@ -103,8 +102,8 @@ static int read_reset_stat(struct ctl_table *table, int write,
 		len -= *ppos;
 		if (len > *lenp)
 			len = *lenp;
-		if (len && copy_to_user(buffer, str_buf, len))
-			return -EFAULT;
+		if (len)
+			memcpy(buffer, str_buf, len);
 		*lenp = len;
 		*ppos += len;
 	}
diff --git a/security/apparmor/lsm.c b/security/apparmor/lsm.c
index b621ad74f54a..27e371b44dad 100644
--- a/security/apparmor/lsm.c
+++ b/security/apparmor/lsm.c
@@ -1696,7 +1696,7 @@ static int __init alloc_buffers(void)
 
 #ifdef CONFIG_SYSCTL
 static int apparmor_dointvec(struct ctl_table *table, int write,
-			     void __user *buffer, size_t *lenp, loff_t *ppos)
+			     void *buffer, size_t *lenp, loff_t *ppos)
 {
 	if (!policy_admin_capable(NULL))
 		return -EPERM;
diff --git a/security/min_addr.c b/security/min_addr.c
index 94d2b0cf0e7b..88c9a6a21f47 100644
--- a/security/min_addr.c
+++ b/security/min_addr.c
@@ -30,7 +30,7 @@ static void update_mmap_min_addr(void)
  * calls update_mmap_min_addr() so non MAP_FIXED hints get rounded properly
  */
 int mmap_min_addr_handler(struct ctl_table *table, int write,
-			  void __user *buffer, size_t *lenp, loff_t *ppos)
+			  void *buffer, size_t *lenp, loff_t *ppos)
 {
 	int ret;
 
diff --git a/security/yama/yama_lsm.c b/security/yama/yama_lsm.c
index 94dc346370b1..536c99646f6a 100644
--- a/security/yama/yama_lsm.c
+++ b/security/yama/yama_lsm.c
@@ -430,7 +430,7 @@ static struct security_hook_list yama_hooks[] __lsm_ro_after_init = {
 
 #ifdef CONFIG_SYSCTL
 static int yama_dointvec_minmax(struct ctl_table *table, int write,
-				void __user *buffer, size_t *lenp, loff_t *ppos)
+				void *buffer, size_t *lenp, loff_t *ppos)
 {
 	struct ctl_table table_copy;
 
-- 
cgit v1.2.3-59-g8ed1b


From e62905ae34eaf5fe2cfb254be5e0c097b3b1f798 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Thu, 23 Apr 2020 11:39:20 +0200
Subject: xfrm interface: don't take extra reference to netdev

I don't see any reason to do this. Maybe needed before
commit 56c5ee1a5823 ("xfrm interface: fix memory leak on creation").

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/xfrm/xfrm_interface.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c
index 3361e3ac5714..eb9928c0a87c 100644
--- a/net/xfrm/xfrm_interface.c
+++ b/net/xfrm/xfrm_interface.c
@@ -145,7 +145,6 @@ static int xfrmi_create(struct net_device *dev)
 	if (err < 0)
 		goto out;
 
-	dev_hold(dev);
 	xfrmi_link(xfrmn, xi);
 
 	return 0;
@@ -175,7 +174,6 @@ static void xfrmi_dev_uninit(struct net_device *dev)
 	struct xfrmi_net *xfrmn = net_generic(xi->net, xfrmi_net_id);
 
 	xfrmi_unlink(xfrmn, xi);
-	dev_put(dev);
 }
 
 static void xfrmi_scrub_packet(struct sk_buff *skb, bool xnet)
-- 
cgit v1.2.3-59-g8ed1b


From e411eb257b331bf44cbe8845b5351260c8222c6c Mon Sep 17 00:00:00 2001
From: Mao Wenan <maowenan@huawei.com>
Date: Sun, 26 Apr 2020 14:36:35 +0800
Subject: libbpf: Return err if bpf_object__load failed

bpf_object__load() has various return code, when it failed to load
object, it must return err instead of -EINVAL.

Signed-off-by: Mao Wenan <maowenan@huawei.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200426063635.130680-3-maowenan@huawei.com
---
 tools/lib/bpf/libbpf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 8f480e29a6b0..8e1dc6980fac 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -7006,7 +7006,7 @@ int bpf_prog_load_xattr(const struct bpf_prog_load_attr *attr,
 	err = bpf_object__load(obj);
 	if (err) {
 		bpf_object__close(obj);
-		return -EINVAL;
+		return err;
 	}
 
 	*pobj = obj;
-- 
cgit v1.2.3-59-g8ed1b


From 0767ec04289757c0edc2322957ba51d97446eaa4 Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Fri, 24 Apr 2020 06:59:14 +0200
Subject: net: ag71xx: extend link validation to support other SoCs

Most (all?) QCA SoCs have two MAC with different supported link
capabilities. Extend ag71xx_mac_validate() to properly validate this
variants.

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/atheros/ag71xx.c | 43 +++++++++++++++++++++++++++++++----
 1 file changed, 38 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/atheros/ag71xx.c b/drivers/net/ethernet/atheros/ag71xx.c
index 02b7705393ca..112edbd30823 100644
--- a/drivers/net/ethernet/atheros/ag71xx.c
+++ b/drivers/net/ethernet/atheros/ag71xx.c
@@ -871,13 +871,40 @@ static void ag71xx_mac_validate(struct phylink_config *config,
 			    unsigned long *supported,
 			    struct phylink_link_state *state)
 {
+	struct ag71xx *ag = netdev_priv(to_net_dev(config->dev));
 	__ETHTOOL_DECLARE_LINK_MODE_MASK(mask) = { 0, };
 
-	if (state->interface != PHY_INTERFACE_MODE_NA &&
-	    state->interface != PHY_INTERFACE_MODE_GMII &&
-	    state->interface != PHY_INTERFACE_MODE_MII) {
-		bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS);
-		return;
+	switch (state->interface) {
+	case PHY_INTERFACE_MODE_NA:
+		break;
+	case PHY_INTERFACE_MODE_MII:
+		if ((ag71xx_is(ag, AR9330) && ag->mac_idx == 0) ||
+		    ag71xx_is(ag, AR9340) ||
+		    ag71xx_is(ag, QCA9530) ||
+		    (ag71xx_is(ag, QCA9550) && ag->mac_idx == 1))
+			break;
+		goto unsupported;
+	case PHY_INTERFACE_MODE_GMII:
+		if ((ag71xx_is(ag, AR9330) && ag->mac_idx == 1) ||
+		    (ag71xx_is(ag, AR9340) && ag->mac_idx == 1) ||
+		    (ag71xx_is(ag, QCA9530) && ag->mac_idx == 1))
+			break;
+		goto unsupported;
+	case PHY_INTERFACE_MODE_SGMII:
+		if (ag71xx_is(ag, QCA9550) && ag->mac_idx == 0)
+			break;
+		goto unsupported;
+	case PHY_INTERFACE_MODE_RMII:
+		if (ag71xx_is(ag, AR9340) && ag->mac_idx == 0)
+			break;
+		goto unsupported;
+	case PHY_INTERFACE_MODE_RGMII:
+		if ((ag71xx_is(ag, AR9340) && ag->mac_idx == 0) ||
+		    (ag71xx_is(ag, QCA9550) && ag->mac_idx == 1))
+			break;
+		goto unsupported;
+	default:
+		goto unsupported;
 	}
 
 	phylink_set(mask, MII);
@@ -889,6 +916,8 @@ static void ag71xx_mac_validate(struct phylink_config *config,
 	phylink_set(mask, 100baseT_Full);
 
 	if (state->interface == PHY_INTERFACE_MODE_NA ||
+	    state->interface == PHY_INTERFACE_MODE_SGMII ||
+	    state->interface == PHY_INTERFACE_MODE_RGMII ||
 	    state->interface == PHY_INTERFACE_MODE_GMII) {
 		phylink_set(mask, 1000baseT_Full);
 		phylink_set(mask, 1000baseX_Full);
@@ -898,6 +927,10 @@ static void ag71xx_mac_validate(struct phylink_config *config,
 		   __ETHTOOL_LINK_MODE_MASK_NBITS);
 	bitmap_and(state->advertising, state->advertising, mask,
 		   __ETHTOOL_LINK_MODE_MASK_NBITS);
+
+	return;
+unsupported:
+	bitmap_zero(supported, __ETHTOOL_LINK_MODE_MASK_NBITS);
 }
 
 static void ag71xx_mac_pcs_get_state(struct phylink_config *config,
-- 
cgit v1.2.3-59-g8ed1b


From 3608a199749873edc992c74adf077c3a848121ad Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Fri, 24 Apr 2020 07:21:16 +0200
Subject: dt-bindings: net: convert qca,ar71xx documentation to yaml

Now that we have the DT validation in place, let's convert the device tree
bindings for the Atheros AR71XX over to a YAML schemas.

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../devicetree/bindings/net/qca,ar71xx.txt         |  45 -----
 .../devicetree/bindings/net/qca,ar71xx.yaml        | 216 +++++++++++++++++++++
 2 files changed, 216 insertions(+), 45 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/net/qca,ar71xx.txt
 create mode 100644 Documentation/devicetree/bindings/net/qca,ar71xx.yaml

diff --git a/Documentation/devicetree/bindings/net/qca,ar71xx.txt b/Documentation/devicetree/bindings/net/qca,ar71xx.txt
deleted file mode 100644
index 2a33e71ba72b..000000000000
--- a/Documentation/devicetree/bindings/net/qca,ar71xx.txt
+++ /dev/null
@@ -1,45 +0,0 @@
-Required properties:
-- compatible:	Should be "qca,<soc>-eth". Currently support compatibles are:
-		qca,ar7100-eth - Atheros AR7100
-		qca,ar7240-eth - Atheros AR7240
-		qca,ar7241-eth - Atheros AR7241
-		qca,ar7242-eth - Atheros AR7242
-		qca,ar9130-eth - Atheros AR9130
-		qca,ar9330-eth - Atheros AR9330
-		qca,ar9340-eth - Atheros AR9340
-		qca,qca9530-eth - Qualcomm Atheros QCA9530
-		qca,qca9550-eth - Qualcomm Atheros QCA9550
-		qca,qca9560-eth - Qualcomm Atheros QCA9560
-
-- reg : Address and length of the register set for the device
-- interrupts : Should contain eth interrupt
-- phy-mode : See ethernet.txt file in the same directory
-- clocks: the clock used by the core
-- clock-names: the names of the clock listed in the clocks property. These are
-	"eth" and "mdio".
-- resets: Should contain phandles to the reset signals
-- reset-names: Should contain the names of reset signal listed in the resets
-		property. These are "mac" and "mdio"
-
-Optional properties:
-- phy-handle : phandle to the PHY device connected to this device.
-- fixed-link : Assume a fixed link. See fixed-link.txt in the same directory.
-  Use instead of phy-handle.
-
-Optional subnodes:
-- mdio : specifies the mdio bus, used as a container for phy nodes
-  according to phy.txt in the same directory
-
-Example:
-
-ethernet@1a000000 {
-	compatible = "qca,ar9330-eth";
-	reg = <0x1a000000 0x200>;
-	interrupts = <5>;
-	resets = <&rst 13>, <&rst 23>;
-	reset-names = "mac", "mdio";
-	clocks = <&pll ATH79_CLK_AHB>, <&pll ATH79_CLK_MDIO>;
-	clock-names = "eth", "mdio";
-
-	phy-mode = "gmii";
-};
diff --git a/Documentation/devicetree/bindings/net/qca,ar71xx.yaml b/Documentation/devicetree/bindings/net/qca,ar71xx.yaml
new file mode 100644
index 000000000000..f99a5aabe923
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/qca,ar71xx.yaml
@@ -0,0 +1,216 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/qca,ar71xx.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: QCA AR71XX MAC
+
+allOf:
+  - $ref: ethernet-controller.yaml#
+
+maintainers:
+  - Oleksij Rempel <o.rempel@pengutronix.de>
+
+properties:
+  compatible:
+    oneOf:
+      - items:
+          - enum:
+              - qca,ar7100-eth   # Atheros AR7100
+              - qca,ar7240-eth   # Atheros AR7240
+              - qca,ar7241-eth   # Atheros AR7241
+              - qca,ar7242-eth   # Atheros AR7242
+              - qca,ar9130-eth   # Atheros AR9130
+              - qca,ar9330-eth   # Atheros AR9330
+              - qca,ar9340-eth   # Atheros AR9340
+              - qca,qca9530-eth  # Qualcomm Atheros QCA9530
+              - qca,qca9550-eth  # Qualcomm Atheros QCA9550
+              - qca,qca9560-eth  # Qualcomm Atheros QCA9560
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  '#address-cells':
+    description: number of address cells for the MDIO bus
+    const: 1
+
+  '#size-cells':
+    description: number of size cells on the MDIO bus
+    const: 0
+
+  clocks:
+    items:
+      - description: MAC main clock
+      - description: MDIO clock
+
+  clock-names:
+    items:
+      - const: eth
+      - const: mdio
+
+  resets:
+    items:
+      - description: MAC reset
+      - description: MDIO reset
+
+  reset-names:
+    items:
+      - const: mac
+      - const: mdio
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - phy-mode
+  - clocks
+  - clock-names
+  - resets
+  - reset-names
+
+examples:
+  # Lager board
+  - |
+    eth0: ethernet@19000000 {
+        compatible = "qca,ar9330-eth";
+        reg = <0x19000000 0x200>;
+        interrupts = <4>;
+        resets = <&rst 9>, <&rst 22>;
+        reset-names = "mac", "mdio";
+        clocks = <&pll 1>, <&pll 2>;
+        clock-names = "eth", "mdio";
+        qca,ethcfg = <&ethcfg>;
+        phy-mode = "mii";
+        phy-handle = <&phy_port4>;
+    };
+
+    eth1: ethernet@1a000000 {
+        compatible = "qca,ar9330-eth";
+        reg = <0x1a000000 0x200>;
+        interrupts = <5>;
+        resets = <&rst 13>, <&rst 23>;
+        reset-names = "mac", "mdio";
+        clocks = <&pll 1>, <&pll 2>;
+        clock-names = "eth", "mdio";
+
+        phy-mode = "gmii";
+
+        status = "disabled";
+
+        fixed-link {
+            speed = <1000>;
+            full-duplex;
+        };
+
+        mdio {
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            switch10: switch@10 {
+                #address-cells = <1>;
+                #size-cells = <0>;
+
+                compatible = "qca,ar9331-switch";
+                reg = <0x10>;
+                resets = <&rst 8>;
+                reset-names = "switch";
+
+                interrupt-parent = <&miscintc>;
+                interrupts = <12>;
+
+                interrupt-controller;
+                #interrupt-cells = <1>;
+
+                ports {
+                    #address-cells = <1>;
+                    #size-cells = <0>;
+
+                    switch_port0: port@0 {
+                        reg = <0x0>;
+                        label = "cpu";
+                        ethernet = <&eth1>;
+
+                        phy-mode = "gmii";
+
+                        fixed-link {
+                            speed = <1000>;
+                            full-duplex;
+                        };
+                    };
+
+                    switch_port1: port@1 {
+                        reg = <0x1>;
+                        phy-handle = <&phy_port0>;
+                        phy-mode = "internal";
+
+                        status = "disabled";
+                    };
+
+                    switch_port2: port@2 {
+                        reg = <0x2>;
+                        phy-handle = <&phy_port1>;
+                        phy-mode = "internal";
+
+                        status = "disabled";
+                    };
+
+                    switch_port3: port@3 {
+                        reg = <0x3>;
+                        phy-handle = <&phy_port2>;
+                        phy-mode = "internal";
+
+                        status = "disabled";
+                    };
+
+                    switch_port4: port@4 {
+                        reg = <0x4>;
+                        phy-handle = <&phy_port3>;
+                        phy-mode = "internal";
+
+                        status = "disabled";
+                    };
+                };
+
+                mdio {
+                    #address-cells = <1>;
+                    #size-cells = <0>;
+
+                    interrupt-parent = <&switch10>;
+
+                    phy_port0: phy@0 {
+                        reg = <0x0>;
+                        interrupts = <0>;
+                        status = "disabled";
+                    };
+
+                    phy_port1: phy@1 {
+                        reg = <0x1>;
+                        interrupts = <0>;
+                        status = "disabled";
+                    };
+
+                    phy_port2: phy@2 {
+                        reg = <0x2>;
+                        interrupts = <0>;
+                        status = "disabled";
+                    };
+
+                    phy_port3: phy@3 {
+                        reg = <0x3>;
+                        interrupts = <0>;
+                        status = "disabled";
+                    };
+
+                    phy_port4: phy@4 {
+                        reg = <0x4>;
+                        interrupts = <0>;
+                        status = "disabled";
+                    };
+                };
+            };
+        };
+    };
-- 
cgit v1.2.3-59-g8ed1b


From 7d3118016787b5c05da94b3bcdb96c9d6ff82c44 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Sat, 25 Apr 2020 12:28:14 +0100
Subject: net: rtnetlink: remove redundant assignment to variable err

The variable err is being initializeed with a value that is never read
and it is being updated later with a new value. The initialization
is redundant and can be removed.

Addresses-Coverity: ("Unused value")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/rtnetlink.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/rtnetlink.c b/net/core/rtnetlink.c
index d6f4f4a9e8ba..2269199c5891 100644
--- a/net/core/rtnetlink.c
+++ b/net/core/rtnetlink.c
@@ -3997,8 +3997,8 @@ static int rtnl_fdb_del(struct sk_buff *skb, struct nlmsghdr *nlh,
 	struct ndmsg *ndm;
 	struct nlattr *tb[NDA_MAX+1];
 	struct net_device *dev;
-	int err = -EINVAL;
 	__u8 *addr;
+	int err;
 	u16 vid;
 
 	if (!netlink_capable(skb, CAP_NET_ADMIN))
-- 
cgit v1.2.3-59-g8ed1b


From 4714d13791f831d253852c8b5d657270becb8b2a Mon Sep 17 00:00:00 2001
From: Horatiu Vultur <horatiu.vultur@microchip.com>
Date: Sun, 26 Apr 2020 15:21:58 +0200
Subject: bridge: uapi: mrp: Add mrp attributes.

Add new nested netlink attribute to configure the MRP. These attributes are used
by the userspace to add/delete/configure MRP instances and by the kernel to
notify the userspace when the MRP ring gets open/closed. MRP nested attribute
has the following attributes:

IFLA_BRIDGE_MRP_INSTANCE - the parameter type is br_mrp_instance which contains
  the instance id, and the ifindex of the two ports. The ports can't be part of
  multiple instances. This is used to create/delete MRP instances.

IFLA_BRIDGE_MRP_PORT_STATE - the parameter type is u32. Which can be forwarding,
  blocking or disabled.

IFLA_BRIDGE_MRP_PORT_ROLE - the parameter type is br_mrp_port_role which
  contains the instance id and the role. The role can be primary or secondary.

IFLA_BRIDGE_MRP_RING_STATE - the parameter type is br_mrp_ring_state which
  contains the instance id and the state. The state can be open or closed.

IFLA_BRIDGE_MRP_RING_ROLE - the parameter type is br_mrp_ring_role which
  contains the instance id and the ring role. The role can be MRM or MRC.

IFLA_BRIDGE_MRP_START_TEST - the parameter type is br_mrp_start_test which
  contains the instance id, the interval at which to send the MRP_Test frames,
  how many test frames can be missed before declaring the ring open and the
  period which represent for how long to send the test frames.

Also add the file include/uapi/linux/mrp_bridge.h which defines all the types
used by MRP that are also needed by the userpace.

Reviewed-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_bridge.h  | 42 +++++++++++++++++++++
 include/uapi/linux/if_ether.h   |  1 +
 include/uapi/linux/mrp_bridge.h | 84 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 127 insertions(+)
 create mode 100644 include/uapi/linux/mrp_bridge.h

diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index bfe621ea51b3..bd8c95488f16 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -120,6 +120,7 @@ enum {
 	IFLA_BRIDGE_MODE,
 	IFLA_BRIDGE_VLAN_INFO,
 	IFLA_BRIDGE_VLAN_TUNNEL_INFO,
+	IFLA_BRIDGE_MRP,
 	__IFLA_BRIDGE_MAX,
 };
 #define IFLA_BRIDGE_MAX (__IFLA_BRIDGE_MAX - 1)
@@ -157,6 +158,47 @@ struct bridge_vlan_xstats {
 	__u32 pad2;
 };
 
+enum {
+	IFLA_BRIDGE_MRP_UNSPEC,
+	IFLA_BRIDGE_MRP_INSTANCE,
+	IFLA_BRIDGE_MRP_PORT_STATE,
+	IFLA_BRIDGE_MRP_PORT_ROLE,
+	IFLA_BRIDGE_MRP_RING_STATE,
+	IFLA_BRIDGE_MRP_RING_ROLE,
+	IFLA_BRIDGE_MRP_START_TEST,
+	__IFLA_BRIDGE_MRP_MAX,
+};
+
+struct br_mrp_instance {
+	__u32 ring_id;
+	__u32 p_ifindex;
+	__u32 s_ifindex;
+};
+
+struct br_mrp_port_role {
+	__u32 ring_id;
+	__u32 role;
+};
+
+struct br_mrp_ring_state {
+	__u32 ring_id;
+	__u32 ring_state;
+};
+
+struct br_mrp_ring_role {
+	__u32 ring_id;
+	__u32 ring_role;
+};
+
+struct br_mrp_start_test {
+	__u32 ring_id;
+	__u32 interval;
+	__u32 max_miss;
+	__u32 period;
+};
+
+#define IFLA_BRIDGE_MRP_MAX (__IFLA_BRIDGE_MRP_MAX - 1)
+
 struct bridge_stp_xstats {
 	__u64 transition_blk;
 	__u64 transition_fwd;
diff --git a/include/uapi/linux/if_ether.h b/include/uapi/linux/if_ether.h
index f6ceb2e63d1e..d6de2b167448 100644
--- a/include/uapi/linux/if_ether.h
+++ b/include/uapi/linux/if_ether.h
@@ -92,6 +92,7 @@
 #define ETH_P_PREAUTH	0x88C7		/* 802.11 Preauthentication */
 #define ETH_P_TIPC	0x88CA		/* TIPC 			*/
 #define ETH_P_LLDP	0x88CC		/* Link Layer Discovery Protocol */
+#define ETH_P_MRP	0x88E3		/* Media Redundancy Protocol	*/
 #define ETH_P_MACSEC	0x88E5		/* 802.1ae MACsec */
 #define ETH_P_8021AH	0x88E7          /* 802.1ah Backbone Service Tag */
 #define ETH_P_MVRP	0x88F5          /* 802.1Q MVRP                  */
diff --git a/include/uapi/linux/mrp_bridge.h b/include/uapi/linux/mrp_bridge.h
new file mode 100644
index 000000000000..2600cdf5a284
--- /dev/null
+++ b/include/uapi/linux/mrp_bridge.h
@@ -0,0 +1,84 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+
+#ifndef _UAPI_LINUX_MRP_BRIDGE_H_
+#define _UAPI_LINUX_MRP_BRIDGE_H_
+
+#include <linux/types.h>
+#include <linux/if_ether.h>
+
+#define MRP_MAX_FRAME_LENGTH		200
+#define MRP_DEFAULT_PRIO		0x8000
+#define MRP_DOMAIN_UUID_LENGTH		16
+#define MRP_VERSION			1
+#define MRP_FRAME_PRIO			7
+
+enum br_mrp_ring_role_type {
+	BR_MRP_RING_ROLE_DISABLED,
+	BR_MRP_RING_ROLE_MRC,
+	BR_MRP_RING_ROLE_MRM,
+};
+
+enum br_mrp_ring_state_type {
+	BR_MRP_RING_STATE_OPEN,
+	BR_MRP_RING_STATE_CLOSED,
+};
+
+enum br_mrp_port_state_type {
+	BR_MRP_PORT_STATE_DISABLED,
+	BR_MRP_PORT_STATE_BLOCKED,
+	BR_MRP_PORT_STATE_FORWARDING,
+	BR_MRP_PORT_STATE_NOT_CONNECTED,
+};
+
+enum br_mrp_port_role_type {
+	BR_MRP_PORT_ROLE_PRIMARY,
+	BR_MRP_PORT_ROLE_SECONDARY,
+	BR_MRP_PORT_ROLE_NONE,
+};
+
+enum br_mrp_tlv_header_type {
+	BR_MRP_TLV_HEADER_END = 0x0,
+	BR_MRP_TLV_HEADER_COMMON = 0x1,
+	BR_MRP_TLV_HEADER_RING_TEST = 0x2,
+	BR_MRP_TLV_HEADER_RING_TOPO = 0x3,
+	BR_MRP_TLV_HEADER_RING_LINK_DOWN = 0x4,
+	BR_MRP_TLV_HEADER_RING_LINK_UP = 0x5,
+};
+
+struct br_mrp_tlv_hdr {
+	__u8 type;
+	__u8 length;
+};
+
+struct br_mrp_end_hdr {
+	struct br_mrp_tlv_hdr hdr;
+};
+
+struct br_mrp_common_hdr {
+	__u16 seq_id;
+	__u8 domain[MRP_DOMAIN_UUID_LENGTH];
+};
+
+struct br_mrp_ring_test_hdr {
+	__u16 prio;
+	__u8 sa[ETH_ALEN];
+	__u16 port_role;
+	__u16 state;
+	__u16 transitions;
+	__u32 timestamp;
+};
+
+struct br_mrp_ring_topo_hdr {
+	__u16 prio;
+	__u8 sa[ETH_ALEN];
+	__u16 interval;
+};
+
+struct br_mrp_ring_link_hdr {
+	__u8 sa[ETH_ALEN];
+	__u16 port_role;
+	__u16 interval;
+	__u16 blocked;
+};
+
+#endif
-- 
cgit v1.2.3-59-g8ed1b


From 2cc974f83fb505751b7fbcf8dee27bdcc7054a7e Mon Sep 17 00:00:00 2001
From: Horatiu Vultur <horatiu.vultur@microchip.com>
Date: Sun, 26 Apr 2020 15:21:59 +0200
Subject: bridge: mrp: Update Kconfig

Add the option BRIDGE_MRP to allow to build in or not MRP support.
The default value is N.

Reviewed-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/Kconfig | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/net/bridge/Kconfig b/net/bridge/Kconfig
index e4fb050e2078..51a6414145d2 100644
--- a/net/bridge/Kconfig
+++ b/net/bridge/Kconfig
@@ -61,3 +61,15 @@ config BRIDGE_VLAN_FILTERING
 	  Say N to exclude this support and reduce the binary size.
 
 	  If unsure, say Y.
+
+config BRIDGE_MRP
+	bool "MRP protocol"
+	depends on BRIDGE
+	default n
+	help
+	  If you say Y here, then the Ethernet bridge will be able to run MRP
+	  protocol to detect loops
+
+	  Say N to exclude this support and reduce the binary size.
+
+	  If unsure, say N.
-- 
cgit v1.2.3-59-g8ed1b


From 4b8d7d4c599182393421c190bae3604b4db9629a Mon Sep 17 00:00:00 2001
From: Horatiu Vultur <horatiu.vultur@microchip.com>
Date: Sun, 26 Apr 2020 15:22:00 +0200
Subject: bridge: mrp: Extend bridge interface

To integrate MRP into the bridge, first the bridge needs to be aware of ports
that are part of an MRP ring and which rings are on the bridge.
Therefore extend bridge interface with the following:
- add new flag(BR_MPP_AWARE) to the net bridge ports, this bit will be
  set when the port is added to an MRP instance. In this way it knows if
  the frame was received on MRP ring port
- add new flag(BR_MRP_LOST_CONT) to the net bridge ports, this bit will be set
  when the port lost the continuity of MRP Test frames.
- add a list of MRP instances

Reviewed-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_bridge.h | 2 ++
 net/bridge/br_private.h   | 4 ++++
 2 files changed, 6 insertions(+)

diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h
index 9e57c4411734..b3a8d3054af0 100644
--- a/include/linux/if_bridge.h
+++ b/include/linux/if_bridge.h
@@ -47,6 +47,8 @@ struct br_ip_list {
 #define BR_BCAST_FLOOD		BIT(14)
 #define BR_NEIGH_SUPPRESS	BIT(15)
 #define BR_ISOLATED		BIT(16)
+#define BR_MRP_AWARE		BIT(17)
+#define BR_MRP_LOST_CONT	BIT(18)
 
 #define BR_DEFAULT_AGEING_TIME	(300 * HZ)
 
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 1f97703a52ff..835a70f8d3ea 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -428,6 +428,10 @@ struct net_bridge {
 	int offload_fwd_mark;
 #endif
 	struct hlist_head		fdb_list;
+
+#if IS_ENABLED(CONFIG_BRIDGE_MRP)
+	struct list_head		__rcu mrp_list;
+#endif
 };
 
 struct br_input_skb_cb {
-- 
cgit v1.2.3-59-g8ed1b


From 3e54442c93845316762b1b3c75e654463fd1b715 Mon Sep 17 00:00:00 2001
From: Horatiu Vultur <horatiu.vultur@microchip.com>
Date: Sun, 26 Apr 2020 15:22:01 +0200
Subject: net: bridge: Add port attribute IFLA_BRPORT_MRP_RING_OPEN

This patch adds a new port attribute, IFLA_BRPORT_MRP_RING_OPEN, which allows
to notify the userspace when the port lost the continuite of MRP frames.

This attribute is set by kernel whenever the SW or HW detects that the ring is
being open or closed.

Reviewed-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_link.h       | 1 +
 net/bridge/br_netlink.c            | 3 +++
 tools/include/uapi/linux/if_link.h | 1 +
 3 files changed, 5 insertions(+)

diff --git a/include/uapi/linux/if_link.h b/include/uapi/linux/if_link.h
index 127c704eeba9..a009365ad67b 100644
--- a/include/uapi/linux/if_link.h
+++ b/include/uapi/linux/if_link.h
@@ -343,6 +343,7 @@ enum {
 	IFLA_BRPORT_NEIGH_SUPPRESS,
 	IFLA_BRPORT_ISOLATED,
 	IFLA_BRPORT_BACKUP_PORT,
+	IFLA_BRPORT_MRP_RING_OPEN,
 	__IFLA_BRPORT_MAX
 };
 #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 43dab4066f91..4084f1ef8641 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -151,6 +151,7 @@ static inline size_t br_port_info_size(void)
 		+ nla_total_size(sizeof(u8))	/* IFLA_BRPORT_MULTICAST_ROUTER */
 #endif
 		+ nla_total_size(sizeof(u16))	/* IFLA_BRPORT_GROUP_FWD_MASK */
+		+ nla_total_size(sizeof(u8))	/* IFLA_BRPORT_MRP_RING_OPEN */
 		+ 0;
 }
 
@@ -213,6 +214,8 @@ static int br_port_fill_attrs(struct sk_buff *skb,
 	    nla_put_u16(skb, IFLA_BRPORT_GROUP_FWD_MASK, p->group_fwd_mask) ||
 	    nla_put_u8(skb, IFLA_BRPORT_NEIGH_SUPPRESS,
 		       !!(p->flags & BR_NEIGH_SUPPRESS)) ||
+	    nla_put_u8(skb, IFLA_BRPORT_MRP_RING_OPEN, !!(p->flags &
+							  BR_MRP_LOST_CONT)) ||
 	    nla_put_u8(skb, IFLA_BRPORT_ISOLATED, !!(p->flags & BR_ISOLATED)))
 		return -EMSGSIZE;
 
diff --git a/tools/include/uapi/linux/if_link.h b/tools/include/uapi/linux/if_link.h
index ca6665ea758a..cafedbbfefbe 100644
--- a/tools/include/uapi/linux/if_link.h
+++ b/tools/include/uapi/linux/if_link.h
@@ -343,6 +343,7 @@ enum {
 	IFLA_BRPORT_NEIGH_SUPPRESS,
 	IFLA_BRPORT_ISOLATED,
 	IFLA_BRPORT_BACKUP_PORT,
+	IFLA_BRPORT_MRP_RING_OPEN,
 	__IFLA_BRPORT_MAX
 };
 #define IFLA_BRPORT_MAX (__IFLA_BRPORT_MAX - 1)
-- 
cgit v1.2.3-59-g8ed1b


From 2f1a11ae11d222b3a3b41d09a85cb2bf8f83db49 Mon Sep 17 00:00:00 2001
From: Horatiu Vultur <horatiu.vultur@microchip.com>
Date: Sun, 26 Apr 2020 15:22:02 +0200
Subject: bridge: mrp: Add MRP interface.

Define the MRP interface.
This interface is used by the netlink to update the MRP instances and by the MRP
to make the calls to switchdev to offload it to HW.

It defines an MRP instance 'struct br_mrp' which is a list of MRP instances.
Which will be part of the 'struct net_bridge'. Each instance has 2 ring ports,
a bridge and an ID.

In case the HW can't generate MRP Test frames then the SW will generate those.

br_mrp_add - adds a new MRP instance.

br_mrp_del - deletes an existing MRP instance. Each instance has an ID(ring_id).

br_mrp_set_port_state - changes the port state. The port can be in forwarding
  state, which means that the frames can pass through or in blocked state which
  means that the frames can't pass through except MRP frames. This will
  eventually call the switchdev API to notify the HW. This information is used
  also by the SW bridge to know how to forward frames in case the HW doesn't
  have this capability.

br_mrp_set_port_role - a port role can be primary or secondary. This
  information is required to be pushed to HW in case the HW can generate
  MRP_Test frames.  Because the MRP_Test frames contains a file with this
  information. Otherwise the HW will not be able to generate the frames
  correctly.

br_mrp_set_ring_state - a ring can be in state open or closed. State open means
  that the mrp port stopped receiving MRP_Test frames, while closed means that
  the mrp port received MRP_Test frames. Similar with br_mrp_port_role, this
  information is pushed in HW because the MRP_Test frames contain this
  information.

br_mrp_set_ring_role - a ring can have the following roles MRM or MRC. For the
  role MRM it is expected that the HW can terminate the MRP frames, notify the
  SW that it stopped receiving MRP_Test frames and trapp all the other MRP
  frames.  While for MRC mode it is expected that the HW can forward the MRP
  frames only between the MRP ports and copy MRP_Topology frames to CPU. In
  case the HW doesn't support a role it needs to return an error code different
  than -EOPNOTSUPP.

br_mrp_start_test - this starts/stops the generation of MRP_Test frames. To stop
  the generation of frames the interval needs to have a value of 0. In this case
  the userspace needs to know if the HW supports this or not. Not to have
  duplicate frames(generated by HW and SW). Because if the HW supports this then
  the SW will not generate anymore frames and will expect that the HW will
  notify when it stopped receiving MRP frames using the function
  br_mrp_port_open.

br_mrp_port_open - this function is used by drivers to notify the userspace via
  a netlink callback that one of the ports stopped receiving MRP_Test frames.
  This function is called only when the node has the role MRM. It is not
  supposed to be called from userspace.

br_mrp_port_switchdev_add - this corresponds to the function br_mrp_add,
  and will notify the HW that a MRP instance is added. The function gets
  as parameter the MRP instance.

br_mrp_port_switchdev_del - this corresponds to the function br_mrp_del,
  and will notify the HW that a MRP instance is removed. The function
  gets as parameter the ID of the MRP instance that is removed.

br_mrp_port_switchdev_set_state - this corresponds to the function
  br_mrp_set_port_state. It would notify the HW if it should block or not
  non-MRP frames.

br_mrp_port_switchdev_set_port - this corresponds to the function
  br_mrp_set_port_role. It would set the port role, primary or secondary.

br_mrp_switchdev_set_role - this corresponds to the function
  br_mrp_set_ring_role and would set one of the role MRM or MRC.

br_mrp_switchdev_set_ring_state - this corresponds to the function
  br_mrp_set_ring_state and would set the ring to be open or closed.

br_mrp_switchdev_send_ring_test - this corresponds to the function
  br_mrp_start_test. This will notify the HW to start or stop generating
  MRP_Test frames. Value 0 for the interval parameter means to stop generating
  the frames.

br_mrp_port_open - this function is used to notify the userspace that the port
  lost the continuity of MRP Test frames.

Reviewed-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_private_mrp.h | 63 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 63 insertions(+)
 create mode 100644 net/bridge/br_private_mrp.h

diff --git a/net/bridge/br_private_mrp.h b/net/bridge/br_private_mrp.h
new file mode 100644
index 000000000000..2921a4b59f8e
--- /dev/null
+++ b/net/bridge/br_private_mrp.h
@@ -0,0 +1,63 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+
+#ifndef _BR_PRIVATE_MRP_H_
+#define _BR_PRIVATE_MRP_H_
+
+#include "br_private.h"
+#include <uapi/linux/mrp_bridge.h>
+
+struct br_mrp {
+	/* list of mrp instances */
+	struct list_head		__rcu list;
+
+	struct net_bridge_port __rcu	*p_port;
+	struct net_bridge_port __rcu	*s_port;
+
+	u32				ring_id;
+
+	enum br_mrp_ring_role_type	ring_role;
+	u8				ring_role_offloaded;
+	enum br_mrp_ring_state_type	ring_state;
+	u32				ring_transitions;
+
+	struct delayed_work		test_work;
+	u32				test_interval;
+	unsigned long			test_end;
+	u32				test_count_miss;
+	u32				test_max_miss;
+
+	u32				seq_id;
+
+	struct rcu_head			rcu;
+};
+
+/* br_mrp.c */
+int br_mrp_add(struct net_bridge *br, struct br_mrp_instance *instance);
+int br_mrp_del(struct net_bridge *br, struct br_mrp_instance *instance);
+int br_mrp_set_port_state(struct net_bridge_port *p,
+			  enum br_mrp_port_state_type state);
+int br_mrp_set_port_role(struct net_bridge_port *p,
+			 struct br_mrp_port_role *role);
+int br_mrp_set_ring_state(struct net_bridge *br,
+			  struct br_mrp_ring_state *state);
+int br_mrp_set_ring_role(struct net_bridge *br, struct br_mrp_ring_role *role);
+int br_mrp_start_test(struct net_bridge *br, struct br_mrp_start_test *test);
+
+/* br_mrp_switchdev.c */
+int br_mrp_switchdev_add(struct net_bridge *br, struct br_mrp *mrp);
+int br_mrp_switchdev_del(struct net_bridge *br, struct br_mrp *mrp);
+int br_mrp_switchdev_set_ring_role(struct net_bridge *br, struct br_mrp *mrp,
+				   enum br_mrp_ring_role_type role);
+int br_mrp_switchdev_set_ring_state(struct net_bridge *br, struct br_mrp *mrp,
+				    enum br_mrp_ring_state_type state);
+int br_mrp_switchdev_send_ring_test(struct net_bridge *br, struct br_mrp *mrp,
+				    u32 interval, u8 max_miss, u32 period);
+int br_mrp_port_switchdev_set_state(struct net_bridge_port *p,
+				    enum br_mrp_port_state_type state);
+int br_mrp_port_switchdev_set_role(struct net_bridge_port *p,
+				   enum br_mrp_port_role_type role);
+
+/* br_mrp_netlink.c  */
+int br_mrp_port_open(struct net_device *dev, u8 loc);
+
+#endif /* _BR_PRIVATE_MRP_H */
-- 
cgit v1.2.3-59-g8ed1b


From c284b54590083017193a836362daa4489e782028 Mon Sep 17 00:00:00 2001
From: Horatiu Vultur <horatiu.vultur@microchip.com>
Date: Sun, 26 Apr 2020 15:22:03 +0200
Subject: switchdev: mrp: Extend switchdev API to offload MRP

Extend switchdev API to add support for MRP. The HW is notified in
following cases:

SWITCHDEV_OBJ_ID_MRP: This is used when a MRP instance is added/removed
  from the MRP ring.

SWITCHDEV_OBJ_ID_RING_ROLE_MRP: This is used when the role of the node
  changes. The current supported roles are MRM and MRC.

SWITCHDEV_OBJ_ID_RING_TEST_MRP: This is used when to start/stop sending
  MRP_Test frames on the mrp ring ports. This is called only on nodes that have
  the role MRM. In case this fails then the SW will generate the frames.

SWITCHDEV_OBJ_ID_RING_STATE_STATE: This is used when the ring changes it states
  to open or closed. This is required to notify HW because the MRP_Test frame
  contains the field MRP_InState which contains this information.

SWITCHDEV_ATTR_ID_MRP_PORT_STATE: This is used when the port's state is
  changed. It can be in blocking/forwarding mode.

SWITCHDEV_ATTR_ID_MRP_PORT_ROLE: This is used when port's role changes. The
  roles of the port can be primary/secondary. This is required to notify HW
  because the MRP_Test frame contains the field MRP_PortRole that contains this
  information.

Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/switchdev.h | 62 +++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 62 insertions(+)

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index aee86a189432..ae7aeb0d1f9c 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -40,6 +40,10 @@ enum switchdev_attr_id {
 	SWITCHDEV_ATTR_ID_BRIDGE_VLAN_FILTERING,
 	SWITCHDEV_ATTR_ID_BRIDGE_MC_DISABLED,
 	SWITCHDEV_ATTR_ID_BRIDGE_MROUTER,
+#if IS_ENABLED(CONFIG_BRIDGE_MRP)
+	SWITCHDEV_ATTR_ID_MRP_PORT_STATE,
+	SWITCHDEV_ATTR_ID_MRP_PORT_ROLE,
+#endif
 };
 
 struct switchdev_attr {
@@ -55,6 +59,11 @@ struct switchdev_attr {
 		clock_t ageing_time;			/* BRIDGE_AGEING_TIME */
 		bool vlan_filtering;			/* BRIDGE_VLAN_FILTERING */
 		bool mc_disabled;			/* MC_DISABLED */
+#if IS_ENABLED(CONFIG_BRIDGE_MRP)
+		u8 mrp_port_state;			/* MRP_PORT_STATE */
+		u8 mrp_port_role;			/* MRP_PORT_ROLE */
+		u8 mrp_ring_state;			/* MRP_RING_STATE */
+#endif
 	} u;
 };
 
@@ -63,6 +72,12 @@ enum switchdev_obj_id {
 	SWITCHDEV_OBJ_ID_PORT_VLAN,
 	SWITCHDEV_OBJ_ID_PORT_MDB,
 	SWITCHDEV_OBJ_ID_HOST_MDB,
+#if IS_ENABLED(CONFIG_BRIDGE_MRP)
+	SWITCHDEV_OBJ_ID_MRP,
+	SWITCHDEV_OBJ_ID_RING_TEST_MRP,
+	SWITCHDEV_OBJ_ID_RING_ROLE_MRP,
+	SWITCHDEV_OBJ_ID_RING_STATE_MRP,
+#endif
 };
 
 struct switchdev_obj {
@@ -94,6 +109,53 @@ struct switchdev_obj_port_mdb {
 #define SWITCHDEV_OBJ_PORT_MDB(OBJ) \
 	container_of((OBJ), struct switchdev_obj_port_mdb, obj)
 
+
+#if IS_ENABLED(CONFIG_BRIDGE_MRP)
+/* SWITCHDEV_OBJ_ID_MRP */
+struct switchdev_obj_mrp {
+	struct switchdev_obj obj;
+	struct net_device *p_port;
+	struct net_device *s_port;
+	u32 ring_id;
+};
+
+#define SWITCHDEV_OBJ_MRP(OBJ) \
+	container_of((OBJ), struct switchdev_obj_mrp, obj)
+
+/* SWITCHDEV_OBJ_ID_RING_TEST_MRP */
+struct switchdev_obj_ring_test_mrp {
+	struct switchdev_obj obj;
+	/* The value is in us and a value of 0 represents to stop */
+	u32 interval;
+	u8 max_miss;
+	u32 ring_id;
+	u32 period;
+};
+
+#define SWITCHDEV_OBJ_RING_TEST_MRP(OBJ) \
+	container_of((OBJ), struct switchdev_obj_ring_test_mrp, obj)
+
+/* SWICHDEV_OBJ_ID_RING_ROLE_MRP */
+struct switchdev_obj_ring_role_mrp {
+	struct switchdev_obj obj;
+	u8 ring_role;
+	u32 ring_id;
+};
+
+#define SWITCHDEV_OBJ_RING_ROLE_MRP(OBJ) \
+	container_of((OBJ), struct switchdev_obj_ring_role_mrp, obj)
+
+struct switchdev_obj_ring_state_mrp {
+	struct switchdev_obj obj;
+	u8 ring_state;
+	u32 ring_id;
+};
+
+#define SWITCHDEV_OBJ_RING_STATE_MRP(OBJ) \
+	container_of((OBJ), struct switchdev_obj_ring_state_mrp, obj)
+
+#endif
+
 typedef int switchdev_obj_dump_cb_t(struct switchdev_obj *obj);
 
 enum switchdev_notifier_type {
-- 
cgit v1.2.3-59-g8ed1b


From fadd409136f0f21192d80816edd9529f27d88c17 Mon Sep 17 00:00:00 2001
From: Horatiu Vultur <horatiu.vultur@microchip.com>
Date: Sun, 26 Apr 2020 15:22:04 +0200
Subject: bridge: switchdev: mrp: Implement MRP API for switchdev

Implement the MRP api for switchdev.
These functions will just eventually call the switchdev functions:
switchdev_port_obj_add/del and switchdev_port_attr_set.

Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/Makefile           |   2 +
 net/bridge/br_mrp_switchdev.c | 140 ++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 142 insertions(+)
 create mode 100644 net/bridge/br_mrp_switchdev.c

diff --git a/net/bridge/Makefile b/net/bridge/Makefile
index 49da7ae6f077..3cacf9dd78d5 100644
--- a/net/bridge/Makefile
+++ b/net/bridge/Makefile
@@ -25,3 +25,5 @@ bridge-$(CONFIG_BRIDGE_VLAN_FILTERING) += br_vlan.o br_vlan_tunnel.o br_vlan_opt
 bridge-$(CONFIG_NET_SWITCHDEV) += br_switchdev.o
 
 obj-$(CONFIG_NETFILTER) += netfilter/
+
+bridge-$(CONFIG_BRIDGE_MRP)	+= br_mrp_switchdev.o
diff --git a/net/bridge/br_mrp_switchdev.c b/net/bridge/br_mrp_switchdev.c
new file mode 100644
index 000000000000..51cb1d5a24b4
--- /dev/null
+++ b/net/bridge/br_mrp_switchdev.c
@@ -0,0 +1,140 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <net/switchdev.h>
+
+#include "br_private_mrp.h"
+
+int br_mrp_switchdev_add(struct net_bridge *br, struct br_mrp *mrp)
+{
+	struct switchdev_obj_mrp mrp_obj = {
+		.obj.orig_dev = br->dev,
+		.obj.id = SWITCHDEV_OBJ_ID_MRP,
+		.p_port = rtnl_dereference(mrp->p_port)->dev,
+		.s_port = rtnl_dereference(mrp->s_port)->dev,
+		.ring_id = mrp->ring_id,
+	};
+	int err;
+
+	err = switchdev_port_obj_add(br->dev, &mrp_obj.obj, NULL);
+
+	if (err && err != -EOPNOTSUPP)
+		return err;
+
+	return 0;
+}
+
+int br_mrp_switchdev_del(struct net_bridge *br, struct br_mrp *mrp)
+{
+	struct switchdev_obj_mrp mrp_obj = {
+		.obj.orig_dev = br->dev,
+		.obj.id = SWITCHDEV_OBJ_ID_MRP,
+		.p_port = NULL,
+		.s_port = NULL,
+		.ring_id = mrp->ring_id,
+	};
+	int err;
+
+	err = switchdev_port_obj_del(br->dev, &mrp_obj.obj);
+
+	if (err && err != -EOPNOTSUPP)
+		return err;
+
+	return 0;
+}
+
+int br_mrp_switchdev_set_ring_role(struct net_bridge *br,
+				   struct br_mrp *mrp,
+				   enum br_mrp_ring_role_type role)
+{
+	struct switchdev_obj_ring_role_mrp mrp_role = {
+		.obj.orig_dev = br->dev,
+		.obj.id = SWITCHDEV_OBJ_ID_RING_ROLE_MRP,
+		.ring_role = role,
+		.ring_id = mrp->ring_id,
+	};
+	int err;
+
+	if (role == BR_MRP_RING_ROLE_DISABLED)
+		err = switchdev_port_obj_del(br->dev, &mrp_role.obj);
+	else
+		err = switchdev_port_obj_add(br->dev, &mrp_role.obj, NULL);
+
+	return err;
+}
+
+int br_mrp_switchdev_send_ring_test(struct net_bridge *br,
+				    struct br_mrp *mrp, u32 interval,
+				    u8 max_miss, u32 period)
+{
+	struct switchdev_obj_ring_test_mrp test = {
+		.obj.orig_dev = br->dev,
+		.obj.id = SWITCHDEV_OBJ_ID_RING_TEST_MRP,
+		.interval = interval,
+		.max_miss = max_miss,
+		.ring_id = mrp->ring_id,
+		.period = period,
+	};
+	int err;
+
+	if (interval == 0)
+		err = switchdev_port_obj_del(br->dev, &test.obj);
+	else
+		err = switchdev_port_obj_add(br->dev, &test.obj, NULL);
+
+	return err;
+}
+
+int br_mrp_switchdev_set_ring_state(struct net_bridge *br,
+				    struct br_mrp *mrp,
+				    enum br_mrp_ring_state_type state)
+{
+	struct switchdev_obj_ring_state_mrp mrp_state = {
+		.obj.orig_dev = br->dev,
+		.obj.id = SWITCHDEV_OBJ_ID_RING_STATE_MRP,
+		.ring_state = state,
+		.ring_id = mrp->ring_id,
+	};
+	int err;
+
+	err = switchdev_port_obj_add(br->dev, &mrp_state.obj, NULL);
+
+	if (err && err != -EOPNOTSUPP)
+		return err;
+
+	return 0;
+}
+
+int br_mrp_port_switchdev_set_state(struct net_bridge_port *p,
+				    enum br_mrp_port_state_type state)
+{
+	struct switchdev_attr attr = {
+		.orig_dev = p->dev,
+		.id = SWITCHDEV_ATTR_ID_MRP_PORT_STATE,
+		.u.mrp_port_state = state,
+	};
+	int err;
+
+	err = switchdev_port_attr_set(p->dev, &attr);
+	if (err && err != -EOPNOTSUPP)
+		br_warn(p->br, "error setting offload MRP state on port %u(%s)\n",
+			(unsigned int)p->port_no, p->dev->name);
+
+	return err;
+}
+
+int br_mrp_port_switchdev_set_role(struct net_bridge_port *p,
+				   enum br_mrp_port_role_type role)
+{
+	struct switchdev_attr attr = {
+		.orig_dev = p->dev,
+		.id = SWITCHDEV_ATTR_ID_MRP_PORT_ROLE,
+		.u.mrp_port_role = role,
+	};
+	int err;
+
+	err = switchdev_port_attr_set(p->dev, &attr);
+	if (err && err != -EOPNOTSUPP)
+		return err;
+
+	return 0;
+}
-- 
cgit v1.2.3-59-g8ed1b


From 9a9f26e8f7ea300e8efffcae036dbef239be433a Mon Sep 17 00:00:00 2001
From: Horatiu Vultur <horatiu.vultur@microchip.com>
Date: Sun, 26 Apr 2020 15:22:05 +0200
Subject: bridge: mrp: Connect MRP API with the switchdev API

Implement the MRP API.

In case the HW can't generate MRP Test frames then the SW will try to generate
the frames. In case that also the SW will fail in generating the frames then a
error is return to the userspace. The userspace is responsible to generate all
the other MRP frames regardless if the test frames are generated by HW or SW.

The forwarding/termination of MRP frames is happening in the kernel and is done
by the MRP instance. The userspace application doesn't do the forwarding.

Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/Makefile         |   2 +-
 net/bridge/br_mrp.c         | 559 ++++++++++++++++++++++++++++++++++++++++++++
 net/bridge/br_mrp_netlink.c |  29 +++
 3 files changed, 589 insertions(+), 1 deletion(-)
 create mode 100644 net/bridge/br_mrp.c
 create mode 100644 net/bridge/br_mrp_netlink.c

diff --git a/net/bridge/Makefile b/net/bridge/Makefile
index 3cacf9dd78d5..ccb394236fbd 100644
--- a/net/bridge/Makefile
+++ b/net/bridge/Makefile
@@ -26,4 +26,4 @@ bridge-$(CONFIG_NET_SWITCHDEV) += br_switchdev.o
 
 obj-$(CONFIG_NETFILTER) += netfilter/
 
-bridge-$(CONFIG_BRIDGE_MRP)	+= br_mrp_switchdev.o
+bridge-$(CONFIG_BRIDGE_MRP)	+= br_mrp_switchdev.o br_mrp.o br_mrp_netlink.o
diff --git a/net/bridge/br_mrp.c b/net/bridge/br_mrp.c
new file mode 100644
index 000000000000..d7bc09de4c13
--- /dev/null
+++ b/net/bridge/br_mrp.c
@@ -0,0 +1,559 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <linux/mrp_bridge.h>
+#include "br_private_mrp.h"
+
+static const u8 mrp_test_dmac[ETH_ALEN] = { 0x1, 0x15, 0x4e, 0x0, 0x0, 0x1 };
+
+static struct net_bridge_port *br_mrp_get_port(struct net_bridge *br,
+					       u32 ifindex)
+{
+	struct net_bridge_port *res = NULL;
+	struct net_bridge_port *port;
+
+	list_for_each_entry(port, &br->port_list, list) {
+		if (port->dev->ifindex == ifindex) {
+			res = port;
+			break;
+		}
+	}
+
+	return res;
+}
+
+static struct br_mrp *br_mrp_find_id(struct net_bridge *br, u32 ring_id)
+{
+	struct br_mrp *res = NULL;
+	struct br_mrp *mrp;
+
+	list_for_each_entry_rcu(mrp, &br->mrp_list, list,
+				lockdep_rtnl_is_held()) {
+		if (mrp->ring_id == ring_id) {
+			res = mrp;
+			break;
+		}
+	}
+
+	return res;
+}
+
+static struct br_mrp *br_mrp_find_port(struct net_bridge *br,
+				       struct net_bridge_port *p)
+{
+	struct br_mrp *res = NULL;
+	struct br_mrp *mrp;
+
+	list_for_each_entry_rcu(mrp, &br->mrp_list, list,
+				lockdep_rtnl_is_held()) {
+		if (rcu_access_pointer(mrp->p_port) == p ||
+		    rcu_access_pointer(mrp->s_port) == p) {
+			res = mrp;
+			break;
+		}
+	}
+
+	return res;
+}
+
+static int br_mrp_next_seq(struct br_mrp *mrp)
+{
+	mrp->seq_id++;
+	return mrp->seq_id;
+}
+
+static struct sk_buff *br_mrp_skb_alloc(struct net_bridge_port *p,
+					const u8 *src, const u8 *dst)
+{
+	struct ethhdr *eth_hdr;
+	struct sk_buff *skb;
+	u16 *version;
+
+	skb = dev_alloc_skb(MRP_MAX_FRAME_LENGTH);
+	if (!skb)
+		return NULL;
+
+	skb->dev = p->dev;
+	skb->protocol = htons(ETH_P_MRP);
+	skb->priority = MRP_FRAME_PRIO;
+	skb_reserve(skb, sizeof(*eth_hdr));
+
+	eth_hdr = skb_push(skb, sizeof(*eth_hdr));
+	ether_addr_copy(eth_hdr->h_dest, dst);
+	ether_addr_copy(eth_hdr->h_source, src);
+	eth_hdr->h_proto = htons(ETH_P_MRP);
+
+	version = skb_put(skb, sizeof(*version));
+	*version = cpu_to_be16(MRP_VERSION);
+
+	return skb;
+}
+
+static void br_mrp_skb_tlv(struct sk_buff *skb,
+			   enum br_mrp_tlv_header_type type,
+			   u8 length)
+{
+	struct br_mrp_tlv_hdr *hdr;
+
+	hdr = skb_put(skb, sizeof(*hdr));
+	hdr->type = type;
+	hdr->length = length;
+}
+
+static void br_mrp_skb_common(struct sk_buff *skb, struct br_mrp *mrp)
+{
+	struct br_mrp_common_hdr *hdr;
+
+	br_mrp_skb_tlv(skb, BR_MRP_TLV_HEADER_COMMON, sizeof(*hdr));
+
+	hdr = skb_put(skb, sizeof(*hdr));
+	hdr->seq_id = cpu_to_be16(br_mrp_next_seq(mrp));
+	memset(hdr->domain, 0xff, MRP_DOMAIN_UUID_LENGTH);
+}
+
+static struct sk_buff *br_mrp_alloc_test_skb(struct br_mrp *mrp,
+					     struct net_bridge_port *p,
+					     enum br_mrp_port_role_type port_role)
+{
+	struct br_mrp_ring_test_hdr *hdr = NULL;
+	struct sk_buff *skb = NULL;
+
+	if (!p)
+		return NULL;
+
+	skb = br_mrp_skb_alloc(p, p->dev->dev_addr, mrp_test_dmac);
+	if (!skb)
+		return NULL;
+
+	br_mrp_skb_tlv(skb, BR_MRP_TLV_HEADER_RING_TEST, sizeof(*hdr));
+	hdr = skb_put(skb, sizeof(*hdr));
+
+	hdr->prio = cpu_to_be16(MRP_DEFAULT_PRIO);
+	ether_addr_copy(hdr->sa, p->br->dev->dev_addr);
+	hdr->port_role = cpu_to_be16(port_role);
+	hdr->state = cpu_to_be16(mrp->ring_state);
+	hdr->transitions = cpu_to_be16(mrp->ring_transitions);
+	hdr->timestamp = cpu_to_be32(jiffies_to_msecs(jiffies));
+
+	br_mrp_skb_common(skb, mrp);
+	br_mrp_skb_tlv(skb, BR_MRP_TLV_HEADER_END, 0x0);
+
+	return skb;
+}
+
+static void br_mrp_test_work_expired(struct work_struct *work)
+{
+	struct delayed_work *del_work = to_delayed_work(work);
+	struct br_mrp *mrp = container_of(del_work, struct br_mrp, test_work);
+	struct net_bridge_port *p;
+	bool notify_open = false;
+	struct sk_buff *skb;
+
+	if (time_before_eq(mrp->test_end, jiffies))
+		return;
+
+	if (mrp->test_count_miss < mrp->test_max_miss) {
+		mrp->test_count_miss++;
+	} else {
+		/* Notify that the ring is open only if the ring state is
+		 * closed, otherwise it would continue to notify at every
+		 * interval.
+		 */
+		if (mrp->ring_state == BR_MRP_RING_STATE_CLOSED)
+			notify_open = true;
+	}
+
+	rcu_read_lock();
+
+	p = rcu_dereference(mrp->p_port);
+	if (p) {
+		skb = br_mrp_alloc_test_skb(mrp, p, BR_MRP_PORT_ROLE_PRIMARY);
+		if (!skb)
+			goto out;
+
+		skb_reset_network_header(skb);
+		dev_queue_xmit(skb);
+
+		if (notify_open && !mrp->ring_role_offloaded)
+			br_mrp_port_open(p->dev, true);
+	}
+
+	p = rcu_dereference(mrp->s_port);
+	if (p) {
+		skb = br_mrp_alloc_test_skb(mrp, p, BR_MRP_PORT_ROLE_SECONDARY);
+		if (!skb)
+			goto out;
+
+		skb_reset_network_header(skb);
+		dev_queue_xmit(skb);
+
+		if (notify_open && !mrp->ring_role_offloaded)
+			br_mrp_port_open(p->dev, true);
+	}
+
+out:
+	rcu_read_unlock();
+
+	queue_delayed_work(system_wq, &mrp->test_work,
+			   usecs_to_jiffies(mrp->test_interval));
+}
+
+/* Deletes the MRP instance.
+ * note: called under rtnl_lock
+ */
+static void br_mrp_del_impl(struct net_bridge *br, struct br_mrp *mrp)
+{
+	struct net_bridge_port *p;
+
+	/* Stop sending MRP_Test frames */
+	cancel_delayed_work_sync(&mrp->test_work);
+	br_mrp_switchdev_send_ring_test(br, mrp, 0, 0, 0);
+
+	br_mrp_switchdev_del(br, mrp);
+
+	/* Reset the ports */
+	p = rtnl_dereference(mrp->p_port);
+	if (p) {
+		spin_lock_bh(&br->lock);
+		p->state = BR_STATE_FORWARDING;
+		p->flags &= ~BR_MRP_AWARE;
+		spin_unlock_bh(&br->lock);
+		br_mrp_port_switchdev_set_state(p, BR_STATE_FORWARDING);
+		rcu_assign_pointer(mrp->p_port, NULL);
+	}
+
+	p = rtnl_dereference(mrp->s_port);
+	if (p) {
+		spin_lock_bh(&br->lock);
+		p->state = BR_STATE_FORWARDING;
+		p->flags &= ~BR_MRP_AWARE;
+		spin_unlock_bh(&br->lock);
+		br_mrp_port_switchdev_set_state(p, BR_STATE_FORWARDING);
+		rcu_assign_pointer(mrp->s_port, NULL);
+	}
+
+	list_del_rcu(&mrp->list);
+	kfree_rcu(mrp, rcu);
+}
+
+/* Adds a new MRP instance.
+ * note: called under rtnl_lock
+ */
+int br_mrp_add(struct net_bridge *br, struct br_mrp_instance *instance)
+{
+	struct net_bridge_port *p;
+	struct br_mrp *mrp;
+	int err;
+
+	/* If the ring exists, it is not possible to create another one with the
+	 * same ring_id
+	 */
+	mrp = br_mrp_find_id(br, instance->ring_id);
+	if (mrp)
+		return -EINVAL;
+
+	if (!br_mrp_get_port(br, instance->p_ifindex) ||
+	    !br_mrp_get_port(br, instance->s_ifindex))
+		return -EINVAL;
+
+	mrp = kzalloc(sizeof(*mrp), GFP_KERNEL);
+	if (!mrp)
+		return -ENOMEM;
+
+	mrp->ring_id = instance->ring_id;
+
+	p = br_mrp_get_port(br, instance->p_ifindex);
+	spin_lock_bh(&br->lock);
+	p->state = BR_STATE_FORWARDING;
+	p->flags |= BR_MRP_AWARE;
+	spin_unlock_bh(&br->lock);
+	rcu_assign_pointer(mrp->p_port, p);
+
+	p = br_mrp_get_port(br, instance->s_ifindex);
+	spin_lock_bh(&br->lock);
+	p->state = BR_STATE_FORWARDING;
+	p->flags |= BR_MRP_AWARE;
+	spin_unlock_bh(&br->lock);
+	rcu_assign_pointer(mrp->s_port, p);
+
+	INIT_DELAYED_WORK(&mrp->test_work, br_mrp_test_work_expired);
+	list_add_tail_rcu(&mrp->list, &br->mrp_list);
+
+	err = br_mrp_switchdev_add(br, mrp);
+	if (err)
+		goto delete_mrp;
+
+	return 0;
+
+delete_mrp:
+	br_mrp_del_impl(br, mrp);
+
+	return err;
+}
+
+/* Deletes the MRP instance from which the port is part of
+ * note: called under rtnl_lock
+ */
+void br_mrp_port_del(struct net_bridge *br, struct net_bridge_port *p)
+{
+	struct br_mrp *mrp = br_mrp_find_port(br, p);
+
+	/* If the port is not part of a MRP instance just bail out */
+	if (!mrp)
+		return;
+
+	br_mrp_del_impl(br, mrp);
+}
+
+/* Deletes existing MRP instance based on ring_id
+ * note: called under rtnl_lock
+ */
+int br_mrp_del(struct net_bridge *br, struct br_mrp_instance *instance)
+{
+	struct br_mrp *mrp = br_mrp_find_id(br, instance->ring_id);
+
+	if (!mrp)
+		return -EINVAL;
+
+	br_mrp_del_impl(br, mrp);
+
+	return 0;
+}
+
+/* Set port state, port state can be forwarding, blocked or disabled
+ * note: already called with rtnl_lock
+ */
+int br_mrp_set_port_state(struct net_bridge_port *p,
+			  enum br_mrp_port_state_type state)
+{
+	if (!p || !(p->flags & BR_MRP_AWARE))
+		return -EINVAL;
+
+	spin_lock_bh(&p->br->lock);
+
+	if (state == BR_MRP_PORT_STATE_FORWARDING)
+		p->state = BR_STATE_FORWARDING;
+	else
+		p->state = BR_STATE_BLOCKING;
+
+	spin_unlock_bh(&p->br->lock);
+
+	br_mrp_port_switchdev_set_state(p, state);
+
+	return 0;
+}
+
+/* Set port role, port role can be primary or secondary
+ * note: already called with rtnl_lock
+ */
+int br_mrp_set_port_role(struct net_bridge_port *p,
+			 struct br_mrp_port_role *role)
+{
+	struct br_mrp *mrp;
+
+	if (!p || !(p->flags & BR_MRP_AWARE))
+		return -EINVAL;
+
+	mrp = br_mrp_find_id(p->br, role->ring_id);
+
+	if (!mrp)
+		return -EINVAL;
+
+	if (role->role == BR_MRP_PORT_ROLE_PRIMARY)
+		rcu_assign_pointer(mrp->p_port, p);
+	else
+		rcu_assign_pointer(mrp->s_port, p);
+
+	br_mrp_port_switchdev_set_role(p, role->role);
+
+	return 0;
+}
+
+/* Set ring state, ring state can be only Open or Closed
+ * note: already called with rtnl_lock
+ */
+int br_mrp_set_ring_state(struct net_bridge *br,
+			  struct br_mrp_ring_state *state)
+{
+	struct br_mrp *mrp = br_mrp_find_id(br, state->ring_id);
+
+	if (!mrp)
+		return -EINVAL;
+
+	if (mrp->ring_state == BR_MRP_RING_STATE_CLOSED &&
+	    state->ring_state != BR_MRP_RING_STATE_CLOSED)
+		mrp->ring_transitions++;
+
+	mrp->ring_state = state->ring_state;
+
+	br_mrp_switchdev_set_ring_state(br, mrp, state->ring_state);
+
+	return 0;
+}
+
+/* Set ring role, ring role can be only MRM(Media Redundancy Manager) or
+ * MRC(Media Redundancy Client).
+ * note: already called with rtnl_lock
+ */
+int br_mrp_set_ring_role(struct net_bridge *br,
+			 struct br_mrp_ring_role *role)
+{
+	struct br_mrp *mrp = br_mrp_find_id(br, role->ring_id);
+	int err;
+
+	if (!mrp)
+		return -EINVAL;
+
+	mrp->ring_role = role->ring_role;
+
+	/* If there is an error just bailed out */
+	err = br_mrp_switchdev_set_ring_role(br, mrp, role->ring_role);
+	if (err && err != -EOPNOTSUPP)
+		return err;
+
+	/* Now detect if the HW actually applied the role or not. If the HW
+	 * applied the role it means that the SW will not to do those operations
+	 * anymore. For example if the role ir MRM then the HW will notify the
+	 * SW when ring is open, but if the is not pushed to the HW the SW will
+	 * need to detect when the ring is open
+	 */
+	mrp->ring_role_offloaded = err == -EOPNOTSUPP ? 0 : 1;
+
+	return 0;
+}
+
+/* Start to generate MRP test frames, the frames are generated by HW and if it
+ * fails, they are generated by the SW.
+ * note: already called with rtnl_lock
+ */
+int br_mrp_start_test(struct net_bridge *br,
+		      struct br_mrp_start_test *test)
+{
+	struct br_mrp *mrp = br_mrp_find_id(br, test->ring_id);
+
+	if (!mrp)
+		return -EINVAL;
+
+	/* Try to push it to the HW and if it fails then continue to generate in
+	 * SW and if that also fails then return error
+	 */
+	if (!br_mrp_switchdev_send_ring_test(br, mrp, test->interval,
+					     test->max_miss, test->period))
+		return 0;
+
+	mrp->test_interval = test->interval;
+	mrp->test_end = jiffies + usecs_to_jiffies(test->period);
+	mrp->test_max_miss = test->max_miss;
+	mrp->test_count_miss = 0;
+	queue_delayed_work(system_wq, &mrp->test_work,
+			   usecs_to_jiffies(test->interval));
+
+	return 0;
+}
+
+/* Process only MRP Test frame. All the other MRP frames are processed by
+ * userspace application
+ * note: already called with rcu_read_lock
+ */
+static void br_mrp_mrm_process(struct br_mrp *mrp, struct net_bridge_port *port,
+			       struct sk_buff *skb)
+{
+	const struct br_mrp_tlv_hdr *hdr;
+	struct br_mrp_tlv_hdr _hdr;
+
+	/* Each MRP header starts with a version field which is 16 bits.
+	 * Therefore skip the version and get directly the TLV header.
+	 */
+	hdr = skb_header_pointer(skb, sizeof(uint16_t), sizeof(_hdr), &_hdr);
+	if (!hdr)
+		return;
+
+	if (hdr->type != BR_MRP_TLV_HEADER_RING_TEST)
+		return;
+
+	mrp->test_count_miss = 0;
+
+	/* Notify the userspace that the ring is closed only when the ring is
+	 * not closed
+	 */
+	if (mrp->ring_state != BR_MRP_RING_STATE_CLOSED)
+		br_mrp_port_open(port->dev, false);
+}
+
+/* This will just forward the frame to the other mrp ring port(MRC role) or will
+ * not do anything.
+ * note: already called with rcu_read_lock
+ */
+static int br_mrp_rcv(struct net_bridge_port *p,
+		      struct sk_buff *skb, struct net_device *dev)
+{
+	struct net_device *s_dev, *p_dev, *d_dev;
+	struct net_bridge_port *p_port, *s_port;
+	struct net_bridge *br;
+	struct sk_buff *nskb;
+	struct br_mrp *mrp;
+
+	/* If port is disabled don't accept any frames */
+	if (p->state == BR_STATE_DISABLED)
+		return 0;
+
+	br = p->br;
+	mrp =  br_mrp_find_port(br, p);
+	if (unlikely(!mrp))
+		return 0;
+
+	p_port = rcu_dereference(mrp->p_port);
+	if (!p_port)
+		return 0;
+
+	s_port = rcu_dereference(mrp->s_port);
+	if (!s_port)
+		return 0;
+
+	/* If the role is MRM then don't forward the frames */
+	if (mrp->ring_role == BR_MRP_RING_ROLE_MRM) {
+		br_mrp_mrm_process(mrp, p, skb);
+		return 1;
+	}
+
+	/* Clone the frame and forward it on the other MRP port */
+	nskb = skb_clone(skb, GFP_ATOMIC);
+	if (!nskb)
+		return 0;
+
+	p_dev = p_port->dev;
+	s_dev = s_port->dev;
+
+	if (p_dev == dev)
+		d_dev = s_dev;
+	else
+		d_dev = p_dev;
+
+	nskb->dev = d_dev;
+	skb_push(nskb, ETH_HLEN);
+	dev_queue_xmit(nskb);
+
+	return 1;
+}
+
+/* Check if the frame was received on a port that is part of MRP ring
+ * and if the frame has MRP eth. In that case process the frame otherwise do
+ * normal forwarding.
+ * note: already called with rcu_read_lock
+ */
+int br_mrp_process(struct net_bridge_port *p, struct sk_buff *skb)
+{
+	/* If there is no MRP instance do normal forwarding */
+	if (likely(!(p->flags & BR_MRP_AWARE)))
+		goto out;
+
+	if (unlikely(skb->protocol == htons(ETH_P_MRP)))
+		return br_mrp_rcv(p, skb, p->dev);
+
+out:
+	return 0;
+}
+
+bool br_mrp_enabled(struct net_bridge *br)
+{
+	return !list_empty(&br->mrp_list);
+}
diff --git a/net/bridge/br_mrp_netlink.c b/net/bridge/br_mrp_netlink.c
new file mode 100644
index 000000000000..b982db14bbf4
--- /dev/null
+++ b/net/bridge/br_mrp_netlink.c
@@ -0,0 +1,29 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+
+#include <net/genetlink.h>
+
+#include <uapi/linux/mrp_bridge.h>
+#include "br_private.h"
+#include "br_private_mrp.h"
+
+int br_mrp_port_open(struct net_device *dev, u8 loc)
+{
+	struct net_bridge_port *p;
+	int err = 0;
+
+	p = br_port_get_rcu(dev);
+	if (!p) {
+		err = -EINVAL;
+		goto out;
+	}
+
+	if (loc)
+		p->flags |= BR_MRP_LOST_CONT;
+	else
+		p->flags &= ~BR_MRP_LOST_CONT;
+
+	br_ifinfo_notify(RTM_NEWLINK, NULL, p);
+
+out:
+	return err;
+}
-- 
cgit v1.2.3-59-g8ed1b


From 4d02b8f075153508562803e590f76c4dfe5f4b66 Mon Sep 17 00:00:00 2001
From: Horatiu Vultur <horatiu.vultur@microchip.com>
Date: Sun, 26 Apr 2020 15:22:06 +0200
Subject: bridge: mrp: Implement netlink interface to configure MRP

Implement netlink interface to configure MRP. The implementation
will do sanity checks over the attributes and then eventually call the MRP
interface.

Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_mrp_netlink.c | 91 +++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 91 insertions(+)

diff --git a/net/bridge/br_mrp_netlink.c b/net/bridge/br_mrp_netlink.c
index b982db14bbf4..503896638be0 100644
--- a/net/bridge/br_mrp_netlink.c
+++ b/net/bridge/br_mrp_netlink.c
@@ -6,6 +6,97 @@
 #include "br_private.h"
 #include "br_private_mrp.h"
 
+static const struct nla_policy br_mrp_policy[IFLA_BRIDGE_MRP_MAX + 1] = {
+	[IFLA_BRIDGE_MRP_UNSPEC]	= { .type = NLA_REJECT },
+	[IFLA_BRIDGE_MRP_INSTANCE]	= { .type = NLA_EXACT_LEN,
+				    .len = sizeof(struct br_mrp_instance)},
+	[IFLA_BRIDGE_MRP_PORT_STATE]	= { .type = NLA_U32 },
+	[IFLA_BRIDGE_MRP_PORT_ROLE]	= { .type = NLA_EXACT_LEN,
+				    .len = sizeof(struct br_mrp_port_role)},
+	[IFLA_BRIDGE_MRP_RING_STATE]	= { .type = NLA_EXACT_LEN,
+				    .len = sizeof(struct br_mrp_ring_state)},
+	[IFLA_BRIDGE_MRP_RING_ROLE]	= { .type = NLA_EXACT_LEN,
+				    .len = sizeof(struct br_mrp_ring_role)},
+	[IFLA_BRIDGE_MRP_START_TEST]	= { .type = NLA_EXACT_LEN,
+				    .len = sizeof(struct br_mrp_start_test)},
+};
+
+int br_mrp_parse(struct net_bridge *br, struct net_bridge_port *p,
+		 struct nlattr *attr, int cmd, struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[IFLA_BRIDGE_MRP_MAX + 1];
+	int err;
+
+	if (br->stp_enabled != BR_NO_STP) {
+		NL_SET_ERR_MSG_MOD(extack, "MRP can't be enabled if STP is already enabled\n");
+		return -EINVAL;
+	}
+
+	err = nla_parse_nested(tb, IFLA_BRIDGE_MRP_MAX, attr,
+			       br_mrp_policy, extack);
+	if (err)
+		return err;
+
+	if (tb[IFLA_BRIDGE_MRP_INSTANCE]) {
+		struct br_mrp_instance *instance =
+			nla_data(tb[IFLA_BRIDGE_MRP_INSTANCE]);
+
+		if (cmd == RTM_SETLINK)
+			err = br_mrp_add(br, instance);
+		else
+			err = br_mrp_del(br, instance);
+		if (err)
+			return err;
+	}
+
+	if (tb[IFLA_BRIDGE_MRP_PORT_STATE]) {
+		enum br_mrp_port_state_type state =
+			nla_get_u32(tb[IFLA_BRIDGE_MRP_PORT_STATE]);
+
+		err = br_mrp_set_port_state(p, state);
+		if (err)
+			return err;
+	}
+
+	if (tb[IFLA_BRIDGE_MRP_PORT_ROLE]) {
+		struct br_mrp_port_role *role =
+			nla_data(tb[IFLA_BRIDGE_MRP_PORT_ROLE]);
+
+		err = br_mrp_set_port_role(p, role);
+		if (err)
+			return err;
+	}
+
+	if (tb[IFLA_BRIDGE_MRP_RING_STATE]) {
+		struct br_mrp_ring_state *state =
+			nla_data(tb[IFLA_BRIDGE_MRP_RING_STATE]);
+
+		err = br_mrp_set_ring_state(br, state);
+		if (err)
+			return err;
+	}
+
+	if (tb[IFLA_BRIDGE_MRP_RING_ROLE]) {
+		struct br_mrp_ring_role *role =
+			nla_data(tb[IFLA_BRIDGE_MRP_RING_ROLE]);
+
+		err = br_mrp_set_ring_role(br, role);
+		if (err)
+			return err;
+	}
+
+	if (tb[IFLA_BRIDGE_MRP_START_TEST]) {
+		struct br_mrp_start_test *test =
+			nla_data(tb[IFLA_BRIDGE_MRP_START_TEST]);
+
+		err = br_mrp_start_test(br, test);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
 int br_mrp_port_open(struct net_device *dev, u8 loc)
 {
 	struct net_bridge_port *p;
-- 
cgit v1.2.3-59-g8ed1b


From 6536993371fab3de4e8379649b60e94d03e6ff37 Mon Sep 17 00:00:00 2001
From: Horatiu Vultur <horatiu.vultur@microchip.com>
Date: Sun, 26 Apr 2020 15:22:07 +0200
Subject: bridge: mrp: Integrate MRP into the bridge

To integrate MRP into the bridge, the bridge needs to do the following:
- detect if the MRP frame was received on MRP ring port in that case it would be
  processed otherwise just forward it as usual.
- enable parsing of MRP
- before whenever the bridge was set up, it would set all the ports in
  forwarding state. Add an extra check to not set ports in forwarding state if
  the port is an MRP ring port. The reason of this change is that if the MRP
  instance initially sets the port in blocked state by setting the bridge up it
  would overwrite this setting.

Reviewed-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_device.c  |  3 +++
 net/bridge/br_if.c      |  2 ++
 net/bridge/br_input.c   |  3 +++
 net/bridge/br_netlink.c |  5 +++++
 net/bridge/br_private.h | 31 +++++++++++++++++++++++++++++++
 5 files changed, 44 insertions(+)

diff --git a/net/bridge/br_device.c b/net/bridge/br_device.c
index 0e3dbc5f3c34..8ec1362588af 100644
--- a/net/bridge/br_device.c
+++ b/net/bridge/br_device.c
@@ -463,6 +463,9 @@ void br_dev_setup(struct net_device *dev)
 	spin_lock_init(&br->lock);
 	INIT_LIST_HEAD(&br->port_list);
 	INIT_HLIST_HEAD(&br->fdb_list);
+#if IS_ENABLED(CONFIG_BRIDGE_MRP)
+	INIT_LIST_HEAD(&br->mrp_list);
+#endif
 	spin_lock_init(&br->hash_lock);
 
 	br->bridge_id.prio[0] = 0x80;
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index 4fe30b182ee7..ca685c0cdf95 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -333,6 +333,8 @@ static void del_nbp(struct net_bridge_port *p)
 	br_stp_disable_port(p);
 	spin_unlock_bh(&br->lock);
 
+	br_mrp_port_del(br, p);
+
 	br_ifinfo_notify(RTM_DELLINK, NULL, p);
 
 	list_del_rcu(&p->list);
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index fcc260840028..d5c34f36f0f4 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -342,6 +342,9 @@ rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
 		}
 	}
 
+	if (unlikely(br_mrp_process(p, skb)))
+		return RX_HANDLER_PASS;
+
 forward:
 	switch (p->state) {
 	case BR_STATE_FORWARDING:
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 4084f1ef8641..1a5e681a626a 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -672,6 +672,11 @@ static int br_afspec(struct net_bridge *br,
 			if (err)
 				return err;
 			break;
+		case IFLA_BRIDGE_MRP:
+			err = br_mrp_parse(br, p, attr, cmd, extack);
+			if (err)
+				return err;
+			break;
 		}
 	}
 
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 835a70f8d3ea..5835828320b6 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -1308,6 +1308,37 @@ unsigned long br_timer_value(const struct timer_list *timer);
 extern int (*br_fdb_test_addr_hook)(struct net_device *dev, unsigned char *addr);
 #endif
 
+/* br_mrp.c */
+#if IS_ENABLED(CONFIG_BRIDGE_MRP)
+int br_mrp_parse(struct net_bridge *br, struct net_bridge_port *p,
+		 struct nlattr *attr, int cmd, struct netlink_ext_ack *extack);
+int br_mrp_process(struct net_bridge_port *p, struct sk_buff *skb);
+bool br_mrp_enabled(struct net_bridge *br);
+void br_mrp_port_del(struct net_bridge *br, struct net_bridge_port *p);
+#else
+static inline int br_mrp_parse(struct net_bridge *br, struct net_bridge_port *p,
+			       struct nlattr *attr, int cmd,
+			       struct netlink_ext_ack *extack)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int br_mrp_process(struct net_bridge_port *p, struct sk_buff *skb)
+{
+	return 0;
+}
+
+static inline bool br_mrp_enabled(struct net_bridge *br)
+{
+	return 0;
+}
+
+static inline void br_mrp_port_del(struct net_bridge *br,
+				   struct net_bridge_port *p)
+{
+}
+#endif
+
 /* br_netlink.c */
 extern struct rtnl_link_ops br_link_ops;
 int br_netlink_init(void);
-- 
cgit v1.2.3-59-g8ed1b


From 419dba8a49d7cc355e5b495d20dea8217369ed63 Mon Sep 17 00:00:00 2001
From: Horatiu Vultur <horatiu.vultur@microchip.com>
Date: Sun, 26 Apr 2020 15:22:08 +0200
Subject: net: bridge: Add checks for enabling the STP.

It is not possible to have the MRP and STP running at the same time on the
bridge, therefore add check when enabling the STP to check if MRP is already
enabled. In that case return error.

Reviewed-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_ioctl.c    |  3 +--
 net/bridge/br_netlink.c  |  4 +++-
 net/bridge/br_private.h  |  3 ++-
 net/bridge/br_stp.c      |  6 ++++++
 net/bridge/br_stp_if.c   | 11 ++++++++++-
 net/bridge/br_sysfs_br.c |  4 +---
 6 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/net/bridge/br_ioctl.c b/net/bridge/br_ioctl.c
index ae22d784b88a..5e71fc8b826f 100644
--- a/net/bridge/br_ioctl.c
+++ b/net/bridge/br_ioctl.c
@@ -242,8 +242,7 @@ static int old_dev_ioctl(struct net_device *dev, struct ifreq *rq, int cmd)
 		if (!ns_capable(dev_net(dev)->user_ns, CAP_NET_ADMIN))
 			return -EPERM;
 
-		br_stp_set_enabled(br, args[1]);
-		ret = 0;
+		ret = br_stp_set_enabled(br, args[1], NULL);
 		break;
 
 	case BRCTL_SET_BRIDGE_PRIORITY:
diff --git a/net/bridge/br_netlink.c b/net/bridge/br_netlink.c
index 1a5e681a626a..a774e19c41bb 100644
--- a/net/bridge/br_netlink.c
+++ b/net/bridge/br_netlink.c
@@ -1109,7 +1109,9 @@ static int br_changelink(struct net_device *brdev, struct nlattr *tb[],
 	if (data[IFLA_BR_STP_STATE]) {
 		u32 stp_enabled = nla_get_u32(data[IFLA_BR_STP_STATE]);
 
-		br_stp_set_enabled(br, stp_enabled);
+		err = br_stp_set_enabled(br, stp_enabled, extack);
+		if (err)
+			return err;
 	}
 
 	if (data[IFLA_BR_PRIORITY]) {
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 5835828320b6..c35647cb138a 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -1283,7 +1283,8 @@ int br_set_ageing_time(struct net_bridge *br, clock_t ageing_time);
 /* br_stp_if.c */
 void br_stp_enable_bridge(struct net_bridge *br);
 void br_stp_disable_bridge(struct net_bridge *br);
-void br_stp_set_enabled(struct net_bridge *br, unsigned long val);
+int br_stp_set_enabled(struct net_bridge *br, unsigned long val,
+		       struct netlink_ext_ack *extack);
 void br_stp_enable_port(struct net_bridge_port *p);
 void br_stp_disable_port(struct net_bridge_port *p);
 bool br_stp_recalculate_bridge_id(struct net_bridge *br);
diff --git a/net/bridge/br_stp.c b/net/bridge/br_stp.c
index 1f14b8455345..3e88be7aa269 100644
--- a/net/bridge/br_stp.c
+++ b/net/bridge/br_stp.c
@@ -36,6 +36,12 @@ void br_set_state(struct net_bridge_port *p, unsigned int state)
 	};
 	int err;
 
+	/* Don't change the state of the ports if they are driven by a different
+	 * protocol.
+	 */
+	if (p->flags & BR_MRP_AWARE)
+		return;
+
 	p->state = state;
 	err = switchdev_port_attr_set(p->dev, &attr);
 	if (err && err != -EOPNOTSUPP)
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index d174d3a566aa..a42850b7eb9a 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -196,10 +196,17 @@ static void br_stp_stop(struct net_bridge *br)
 	br->stp_enabled = BR_NO_STP;
 }
 
-void br_stp_set_enabled(struct net_bridge *br, unsigned long val)
+int br_stp_set_enabled(struct net_bridge *br, unsigned long val,
+		       struct netlink_ext_ack *extack)
 {
 	ASSERT_RTNL();
 
+	if (br_mrp_enabled(br)) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "STP can't be enabled if MRP is already enabled\n");
+		return -EINVAL;
+	}
+
 	if (val) {
 		if (br->stp_enabled == BR_NO_STP)
 			br_stp_start(br);
@@ -207,6 +214,8 @@ void br_stp_set_enabled(struct net_bridge *br, unsigned long val)
 		if (br->stp_enabled != BR_NO_STP)
 			br_stp_stop(br);
 	}
+
+	return 0;
 }
 
 /* called under bridge lock */
diff --git a/net/bridge/br_sysfs_br.c b/net/bridge/br_sysfs_br.c
index 9ab0f00b1081..7db06e3f642a 100644
--- a/net/bridge/br_sysfs_br.c
+++ b/net/bridge/br_sysfs_br.c
@@ -126,9 +126,7 @@ static ssize_t stp_state_show(struct device *d,
 
 static int set_stp_state(struct net_bridge *br, unsigned long val)
 {
-	br_stp_set_enabled(br, val);
-
-	return 0;
+	return br_stp_set_enabled(br, val, NULL);
 }
 
 static ssize_t stp_state_store(struct device *d,
-- 
cgit v1.2.3-59-g8ed1b


From 975e8505e6701c77b43bcdd5acc48d3735063b68 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sun, 26 Apr 2020 23:35:59 +0200
Subject: r8169: improve handling CPCMD_MASK

It's sufficient to do the masking once in probe() for clearing
unwanted bits that may have been set by the BIOS.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 4c616701856a..06a877fe74ba 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -3845,7 +3845,6 @@ static void rtl_hw_start(struct  rtl8169_private *tp)
 {
 	rtl_unlock_config_regs(tp);
 
-	tp->cp_cmd &= CPCMD_MASK;
 	RTL_W16(tp, CPlusCmd, tp->cp_cmd);
 
 	if (tp->mac_version <= RTL_GIGA_MAC_VER_06)
@@ -5424,7 +5423,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	tp->mac_version = chipset;
 
-	tp->cp_cmd = RTL_R16(tp, CPlusCmd);
+	tp->cp_cmd = RTL_R16(tp, CPlusCmd) & CPCMD_MASK;
 
 	if (sizeof(dma_addr_t) > 4 && tp->mac_version >= RTL_GIGA_MAC_VER_18 &&
 	    !dma_set_mask_and_coherent(&pdev->dev, DMA_BIT_MASK(64)))
-- 
cgit v1.2.3-59-g8ed1b


From 10478283f210e64ac682083083437dd5f89b7c4a Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sun, 26 Apr 2020 23:36:56 +0200
Subject: r8169: improve configuring RxConfig register

Two bits in RxConfig are controlled by the following dev->feature's:
- NETIF_F_RXALL
- NETIF_F_HW_VLAN_CTAG_RX (since RTL8125)

We have to take care that RxConfig gets fully configured in
rtl_hw_start() after e.g. resume from hibernation. Therefore:

- Factor out setting the feature-controlled RxConfig bits to a new
  function rtl_set_rx_config_features() that is called from
  rtl8169_set_features() and rtl_hw_start().
- Don't deal with RX_VLAN_8125 in rtl_init_rxcfg(), it will be set
  by rtl_set_rx_config_features().
- Don't handle NETIF_F_RXALL in rtl_set_rx_mode().

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 38 ++++++++++++++++---------------
 1 file changed, 20 insertions(+), 18 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 06a877fe74ba..f70e36c20431 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -388,10 +388,12 @@ enum rtl_register_content {
 	/* rx_mode_bits */
 	AcceptErr	= 0x20,
 	AcceptRunt	= 0x10,
+#define RX_CONFIG_ACCEPT_ERR_MASK	0x30
 	AcceptBroadcast	= 0x08,
 	AcceptMulticast	= 0x04,
 	AcceptMyPhys	= 0x02,
 	AcceptAllPhys	= 0x01,
+#define RX_CONFIG_ACCEPT_OK_MASK	0x0f
 #define RX_CONFIG_ACCEPT_MASK		0x3f
 
 	/* TxConfigBits */
@@ -1497,19 +1499,15 @@ static netdev_features_t rtl8169_fix_features(struct net_device *dev,
 	return features;
 }
 
-static int rtl8169_set_features(struct net_device *dev,
-				netdev_features_t features)
+static void rtl_set_rx_config_features(struct rtl8169_private *tp,
+				       netdev_features_t features)
 {
-	struct rtl8169_private *tp = netdev_priv(dev);
-	u32 rx_config;
-
-	rtl_lock_work(tp);
+	u32 rx_config = RTL_R32(tp, RxConfig);
 
-	rx_config = RTL_R32(tp, RxConfig);
 	if (features & NETIF_F_RXALL)
-		rx_config |= (AcceptErr | AcceptRunt);
+		rx_config |= RX_CONFIG_ACCEPT_ERR_MASK;
 	else
-		rx_config &= ~(AcceptErr | AcceptRunt);
+		rx_config &= ~RX_CONFIG_ACCEPT_ERR_MASK;
 
 	if (rtl_is_8125(tp)) {
 		if (features & NETIF_F_HW_VLAN_CTAG_RX)
@@ -1519,6 +1517,16 @@ static int rtl8169_set_features(struct net_device *dev,
 	}
 
 	RTL_W32(tp, RxConfig, rx_config);
+}
+
+static int rtl8169_set_features(struct net_device *dev,
+				netdev_features_t features)
+{
+	struct rtl8169_private *tp = netdev_priv(dev);
+
+	rtl_lock_work(tp);
+
+	rtl_set_rx_config_features(tp, features);
 
 	if (features & NETIF_F_RXCSUM)
 		tp->cp_cmd |= RxChkSum;
@@ -2395,8 +2403,6 @@ static void rtl_pll_power_up(struct rtl8169_private *tp)
 
 static void rtl_init_rxcfg(struct rtl8169_private *tp)
 {
-	u32 vlan;
-
 	switch (tp->mac_version) {
 	case RTL_GIGA_MAC_VER_02 ... RTL_GIGA_MAC_VER_06:
 	case RTL_GIGA_MAC_VER_10 ... RTL_GIGA_MAC_VER_17:
@@ -2411,9 +2417,7 @@ static void rtl_init_rxcfg(struct rtl8169_private *tp)
 		RTL_W32(tp, RxConfig, RX128_INT_EN | RX_MULTI_EN | RX_DMA_BURST | RX_EARLY_OFF);
 		break;
 	case RTL_GIGA_MAC_VER_60 ... RTL_GIGA_MAC_VER_61:
-		/* VLAN flags are controlled by NETIF_F_HW_VLAN_CTAG_RX */
-		vlan = RTL_R32(tp, RxConfig) & RX_VLAN_8125;
-		RTL_W32(tp, RxConfig, vlan | RX_FETCH_DFLT_8125 | RX_DMA_BURST);
+		RTL_W32(tp, RxConfig, RX_FETCH_DFLT_8125 | RX_DMA_BURST);
 		break;
 	default:
 		RTL_W32(tp, RxConfig, RX128_INT_EN | RX_DMA_BURST);
@@ -2680,14 +2684,11 @@ static void rtl_set_rx_mode(struct net_device *dev)
 		}
 	}
 
-	if (dev->features & NETIF_F_RXALL)
-		rx_mode |= (AcceptErr | AcceptRunt);
-
 	RTL_W32(tp, MAR0 + 4, mc_filter[1]);
 	RTL_W32(tp, MAR0 + 0, mc_filter[0]);
 
 	tmp = RTL_R32(tp, RxConfig);
-	RTL_W32(tp, RxConfig, (tmp & ~RX_CONFIG_ACCEPT_MASK) | rx_mode);
+	RTL_W32(tp, RxConfig, (tmp & ~RX_CONFIG_ACCEPT_OK_MASK) | rx_mode);
 }
 
 DECLARE_RTL_COND(rtl_csiar_cond)
@@ -3866,6 +3867,7 @@ static void rtl_hw_start(struct  rtl8169_private *tp)
 	RTL_W8(tp, ChipCmd, CmdTxEnb | CmdRxEnb);
 	rtl_init_rxcfg(tp);
 	rtl_set_tx_config_registers(tp);
+	rtl_set_rx_config_features(tp, tp->dev->features);
 	rtl_set_rx_mode(tp->dev);
 	rtl_irq_enable(tp);
 }
-- 
cgit v1.2.3-59-g8ed1b


From 49c958ccd2433114ca8a96c996b5016aa89c7ba5 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Mon, 27 Apr 2020 18:12:58 +0300
Subject: mlxsw: spectrum_acl: Move block helpers into inline header functions

The struct is defined in the header, no need to have the helpers
in the c file. Move the helpers to the header.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h     | 60 ++++++++++++++++++----
 drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c | 43 ----------------
 2 files changed, 51 insertions(+), 52 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index ca56e72cb4b7..f158cd98f8d8 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -677,12 +677,57 @@ struct mlxsw_sp_acl_block {
 };
 
 struct mlxsw_afk *mlxsw_sp_acl_afk(struct mlxsw_sp_acl *acl);
-struct mlxsw_sp *mlxsw_sp_acl_block_mlxsw_sp(struct mlxsw_sp_acl_block *block);
-unsigned int
-mlxsw_sp_acl_block_rule_count(const struct mlxsw_sp_acl_block *block);
-void mlxsw_sp_acl_block_disable_inc(struct mlxsw_sp_acl_block *block);
-void mlxsw_sp_acl_block_disable_dec(struct mlxsw_sp_acl_block *block);
-bool mlxsw_sp_acl_block_disabled(const struct mlxsw_sp_acl_block *block);
+
+static inline struct mlxsw_sp *
+mlxsw_sp_acl_block_mlxsw_sp(struct mlxsw_sp_acl_block *block)
+{
+	return block->mlxsw_sp;
+}
+
+static inline unsigned int
+mlxsw_sp_acl_block_rule_count(const struct mlxsw_sp_acl_block *block)
+{
+	return block ? block->rule_count : 0;
+}
+
+static inline void
+mlxsw_sp_acl_block_disable_inc(struct mlxsw_sp_acl_block *block)
+{
+	if (block)
+		block->disable_count++;
+}
+
+static inline void
+mlxsw_sp_acl_block_disable_dec(struct mlxsw_sp_acl_block *block)
+{
+	if (block)
+		block->disable_count--;
+}
+
+static inline bool
+mlxsw_sp_acl_block_disabled(const struct mlxsw_sp_acl_block *block)
+{
+	return block->disable_count;
+}
+
+static inline bool
+mlxsw_sp_acl_block_is_egress_bound(const struct mlxsw_sp_acl_block *block)
+{
+	return block->egress_binding_count;
+}
+
+static inline bool
+mlxsw_sp_acl_block_is_ingress_bound(const struct mlxsw_sp_acl_block *block)
+{
+	return block->ingress_binding_count;
+}
+
+static inline bool
+mlxsw_sp_acl_block_is_mixed_bound(const struct mlxsw_sp_acl_block *block)
+{
+	return block->ingress_binding_count && block->egress_binding_count;
+}
+
 struct mlxsw_sp_acl_block *mlxsw_sp_acl_block_create(struct mlxsw_sp *mlxsw_sp,
 						     struct net *net);
 void mlxsw_sp_acl_block_destroy(struct mlxsw_sp_acl_block *block);
@@ -695,9 +740,6 @@ int mlxsw_sp_acl_block_unbind(struct mlxsw_sp *mlxsw_sp,
 			      struct mlxsw_sp_acl_block *block,
 			      struct mlxsw_sp_port *mlxsw_sp_port,
 			      bool ingress);
-bool mlxsw_sp_acl_block_is_egress_bound(const struct mlxsw_sp_acl_block *block);
-bool mlxsw_sp_acl_block_is_ingress_bound(const struct mlxsw_sp_acl_block *block);
-bool mlxsw_sp_acl_block_is_mixed_bound(const struct mlxsw_sp_acl_block *block);
 struct mlxsw_sp_acl_ruleset *
 mlxsw_sp_acl_ruleset_lookup(struct mlxsw_sp *mlxsw_sp,
 			    struct mlxsw_sp_acl_block *block, u32 chain_index,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
index 01cff711bbd2..bb06c007b3f2 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
@@ -94,49 +94,6 @@ struct mlxsw_sp_fid *mlxsw_sp_acl_dummy_fid(struct mlxsw_sp *mlxsw_sp)
 	return mlxsw_sp->acl->dummy_fid;
 }
 
-struct mlxsw_sp *mlxsw_sp_acl_block_mlxsw_sp(struct mlxsw_sp_acl_block *block)
-{
-	return block->mlxsw_sp;
-}
-
-unsigned int
-mlxsw_sp_acl_block_rule_count(const struct mlxsw_sp_acl_block *block)
-{
-	return block ? block->rule_count : 0;
-}
-
-void mlxsw_sp_acl_block_disable_inc(struct mlxsw_sp_acl_block *block)
-{
-	if (block)
-		block->disable_count++;
-}
-
-void mlxsw_sp_acl_block_disable_dec(struct mlxsw_sp_acl_block *block)
-{
-	if (block)
-		block->disable_count--;
-}
-
-bool mlxsw_sp_acl_block_disabled(const struct mlxsw_sp_acl_block *block)
-{
-	return block->disable_count;
-}
-
-bool mlxsw_sp_acl_block_is_egress_bound(const struct mlxsw_sp_acl_block *block)
-{
-	return block->egress_binding_count;
-}
-
-bool mlxsw_sp_acl_block_is_ingress_bound(const struct mlxsw_sp_acl_block *block)
-{
-	return block->ingress_binding_count;
-}
-
-bool mlxsw_sp_acl_block_is_mixed_bound(const struct mlxsw_sp_acl_block *block)
-{
-	return block->ingress_binding_count && block->egress_binding_count;
-}
-
 static bool
 mlxsw_sp_acl_ruleset_is_singular(const struct mlxsw_sp_acl_ruleset *ruleset)
 {
-- 
cgit v1.2.3-59-g8ed1b


From 3bc3ffb6e911f9de099d81187ae99b45966cbd05 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Mon, 27 Apr 2020 18:12:59 +0300
Subject: mlxsw: spectrum: Rename acl_block to flow_block

The acl_block structure is going to be used for non-acl case - matchall
offload. So rename it accordingly.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c     | 71 ++++++++++---------
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h     | 64 ++++++++---------
 .../ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c    | 14 ++--
 drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c | 82 +++++++++++-----------
 .../net/ethernet/mellanox/mlxsw/spectrum_flower.c  | 24 +++----
 5 files changed, 128 insertions(+), 127 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 24ca8d5bc564..f64e8da21d4a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -1544,23 +1544,23 @@ static int mlxsw_sp_setup_tc_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
 }
 
 static int
-mlxsw_sp_setup_tc_cls_flower(struct mlxsw_sp_acl_block *acl_block,
+mlxsw_sp_setup_tc_cls_flower(struct mlxsw_sp_flow_block *flow_block,
 			     struct flow_cls_offload *f)
 {
-	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_acl_block_mlxsw_sp(acl_block);
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_flow_block_mlxsw_sp(flow_block);
 
 	switch (f->command) {
 	case FLOW_CLS_REPLACE:
-		return mlxsw_sp_flower_replace(mlxsw_sp, acl_block, f);
+		return mlxsw_sp_flower_replace(mlxsw_sp, flow_block, f);
 	case FLOW_CLS_DESTROY:
-		mlxsw_sp_flower_destroy(mlxsw_sp, acl_block, f);
+		mlxsw_sp_flower_destroy(mlxsw_sp, flow_block, f);
 		return 0;
 	case FLOW_CLS_STATS:
-		return mlxsw_sp_flower_stats(mlxsw_sp, acl_block, f);
+		return mlxsw_sp_flower_stats(mlxsw_sp, flow_block, f);
 	case FLOW_CLS_TMPLT_CREATE:
-		return mlxsw_sp_flower_tmplt_create(mlxsw_sp, acl_block, f);
+		return mlxsw_sp_flower_tmplt_create(mlxsw_sp, flow_block, f);
 	case FLOW_CLS_TMPLT_DESTROY:
-		mlxsw_sp_flower_tmplt_destroy(mlxsw_sp, acl_block, f);
+		mlxsw_sp_flower_tmplt_destroy(mlxsw_sp, flow_block, f);
 		return 0;
 	default:
 		return -EOPNOTSUPP;
@@ -1607,16 +1607,16 @@ static int mlxsw_sp_setup_tc_block_cb_matchall_eg(enum tc_setup_type type,
 static int mlxsw_sp_setup_tc_block_cb_flower(enum tc_setup_type type,
 					     void *type_data, void *cb_priv)
 {
-	struct mlxsw_sp_acl_block *acl_block = cb_priv;
+	struct mlxsw_sp_flow_block *flow_block = cb_priv;
 
 	switch (type) {
 	case TC_SETUP_CLSMATCHALL:
 		return 0;
 	case TC_SETUP_CLSFLOWER:
-		if (mlxsw_sp_acl_block_disabled(acl_block))
+		if (mlxsw_sp_flow_block_disabled(flow_block))
 			return -EOPNOTSUPP;
 
-		return mlxsw_sp_setup_tc_cls_flower(acl_block, type_data);
+		return mlxsw_sp_setup_tc_cls_flower(flow_block, type_data);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -1624,9 +1624,9 @@ static int mlxsw_sp_setup_tc_block_cb_flower(enum tc_setup_type type,
 
 static void mlxsw_sp_tc_block_flower_release(void *cb_priv)
 {
-	struct mlxsw_sp_acl_block *acl_block = cb_priv;
+	struct mlxsw_sp_flow_block *flow_block = cb_priv;
 
-	mlxsw_sp_acl_block_destroy(acl_block);
+	mlxsw_sp_flow_block_destroy(flow_block);
 }
 
 static LIST_HEAD(mlxsw_sp_block_cb_list);
@@ -1636,7 +1636,7 @@ mlxsw_sp_setup_tc_block_flower_bind(struct mlxsw_sp_port *mlxsw_sp_port,
 			            struct flow_block_offload *f, bool ingress)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-	struct mlxsw_sp_acl_block *acl_block;
+	struct mlxsw_sp_flow_block *flow_block;
 	struct flow_block_cb *block_cb;
 	bool register_block = false;
 	int err;
@@ -1645,31 +1645,31 @@ mlxsw_sp_setup_tc_block_flower_bind(struct mlxsw_sp_port *mlxsw_sp_port,
 					mlxsw_sp_setup_tc_block_cb_flower,
 					mlxsw_sp);
 	if (!block_cb) {
-		acl_block = mlxsw_sp_acl_block_create(mlxsw_sp, f->net);
-		if (!acl_block)
+		flow_block = mlxsw_sp_flow_block_create(mlxsw_sp, f->net);
+		if (!flow_block)
 			return -ENOMEM;
 		block_cb = flow_block_cb_alloc(mlxsw_sp_setup_tc_block_cb_flower,
-					       mlxsw_sp, acl_block,
+					       mlxsw_sp, flow_block,
 					       mlxsw_sp_tc_block_flower_release);
 		if (IS_ERR(block_cb)) {
-			mlxsw_sp_acl_block_destroy(acl_block);
+			mlxsw_sp_flow_block_destroy(flow_block);
 			err = PTR_ERR(block_cb);
 			goto err_cb_register;
 		}
 		register_block = true;
 	} else {
-		acl_block = flow_block_cb_priv(block_cb);
+		flow_block = flow_block_cb_priv(block_cb);
 	}
 	flow_block_cb_incref(block_cb);
-	err = mlxsw_sp_acl_block_bind(mlxsw_sp, acl_block,
-				      mlxsw_sp_port, ingress, f->extack);
+	err = mlxsw_sp_flow_block_bind(mlxsw_sp, flow_block,
+				       mlxsw_sp_port, ingress, f->extack);
 	if (err)
 		goto err_block_bind;
 
 	if (ingress)
-		mlxsw_sp_port->ing_acl_block = acl_block;
+		mlxsw_sp_port->ing_flow_block = flow_block;
 	else
-		mlxsw_sp_port->eg_acl_block = acl_block;
+		mlxsw_sp_port->eg_flow_block = flow_block;
 
 	if (register_block) {
 		flow_block_cb_add(block_cb, f);
@@ -1687,10 +1687,11 @@ err_cb_register:
 
 static void
 mlxsw_sp_setup_tc_block_flower_unbind(struct mlxsw_sp_port *mlxsw_sp_port,
-				      struct flow_block_offload *f, bool ingress)
+				      struct flow_block_offload *f,
+				      bool ingress)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-	struct mlxsw_sp_acl_block *acl_block;
+	struct mlxsw_sp_flow_block *flow_block;
 	struct flow_block_cb *block_cb;
 	int err;
 
@@ -1701,13 +1702,13 @@ mlxsw_sp_setup_tc_block_flower_unbind(struct mlxsw_sp_port *mlxsw_sp_port,
 		return;
 
 	if (ingress)
-		mlxsw_sp_port->ing_acl_block = NULL;
+		mlxsw_sp_port->ing_flow_block = NULL;
 	else
-		mlxsw_sp_port->eg_acl_block = NULL;
+		mlxsw_sp_port->eg_flow_block = NULL;
 
-	acl_block = flow_block_cb_priv(block_cb);
-	err = mlxsw_sp_acl_block_unbind(mlxsw_sp, acl_block,
-					mlxsw_sp_port, ingress);
+	flow_block = flow_block_cb_priv(block_cb);
+	err = mlxsw_sp_flow_block_unbind(mlxsw_sp, flow_block,
+					 mlxsw_sp_port, ingress);
 	if (!err && !flow_block_cb_decref(block_cb)) {
 		flow_block_cb_remove(block_cb, f);
 		list_del(&block_cb->driver_list);
@@ -1797,17 +1798,17 @@ static int mlxsw_sp_feature_hw_tc(struct net_device *dev, bool enable)
 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
 
 	if (!enable) {
-		if (mlxsw_sp_acl_block_rule_count(mlxsw_sp_port->ing_acl_block) ||
-		    mlxsw_sp_acl_block_rule_count(mlxsw_sp_port->eg_acl_block) ||
+		if (mlxsw_sp_flow_block_rule_count(mlxsw_sp_port->ing_flow_block) ||
+		    mlxsw_sp_flow_block_rule_count(mlxsw_sp_port->eg_flow_block) ||
 		    !list_empty(&mlxsw_sp_port->mall_tc_list)) {
 			netdev_err(dev, "Active offloaded tc filters, can't turn hw_tc_offload off\n");
 			return -EINVAL;
 		}
-		mlxsw_sp_acl_block_disable_inc(mlxsw_sp_port->ing_acl_block);
-		mlxsw_sp_acl_block_disable_inc(mlxsw_sp_port->eg_acl_block);
+		mlxsw_sp_flow_block_disable_inc(mlxsw_sp_port->ing_flow_block);
+		mlxsw_sp_flow_block_disable_inc(mlxsw_sp_port->eg_flow_block);
 	} else {
-		mlxsw_sp_acl_block_disable_dec(mlxsw_sp_port->ing_acl_block);
-		mlxsw_sp_acl_block_disable_dec(mlxsw_sp_port->eg_acl_block);
+		mlxsw_sp_flow_block_disable_dec(mlxsw_sp_port->ing_flow_block);
+		mlxsw_sp_flow_block_disable_dec(mlxsw_sp_port->eg_flow_block);
 	}
 	return 0;
 }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index f158cd98f8d8..65b1a2d87c2d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -287,8 +287,8 @@ struct mlxsw_sp_port {
 	struct mlxsw_sp_port_vlan *default_vlan;
 	struct mlxsw_sp_qdisc_state *qdisc;
 	unsigned acl_rule_count;
-	struct mlxsw_sp_acl_block *ing_acl_block;
-	struct mlxsw_sp_acl_block *eg_acl_block;
+	struct mlxsw_sp_flow_block *ing_flow_block;
+	struct mlxsw_sp_flow_block *eg_flow_block;
 	struct {
 		struct delayed_work shaper_dw;
 		struct hwtstamp_config hwtstamp_config;
@@ -654,7 +654,7 @@ struct mlxsw_sp_acl_rule_info {
 	unsigned int counter_index;
 };
 
-struct mlxsw_sp_acl_block;
+struct mlxsw_sp_flow_block;
 struct mlxsw_sp_acl_ruleset;
 
 /* spectrum_acl.c */
@@ -663,7 +663,7 @@ enum mlxsw_sp_acl_profile {
 	MLXSW_SP_ACL_PROFILE_MR,
 };
 
-struct mlxsw_sp_acl_block {
+struct mlxsw_sp_flow_block {
 	struct list_head binding_list;
 	struct mlxsw_sp_acl_ruleset *ruleset_zero;
 	struct mlxsw_sp *mlxsw_sp;
@@ -679,74 +679,74 @@ struct mlxsw_sp_acl_block {
 struct mlxsw_afk *mlxsw_sp_acl_afk(struct mlxsw_sp_acl *acl);
 
 static inline struct mlxsw_sp *
-mlxsw_sp_acl_block_mlxsw_sp(struct mlxsw_sp_acl_block *block)
+mlxsw_sp_flow_block_mlxsw_sp(struct mlxsw_sp_flow_block *block)
 {
 	return block->mlxsw_sp;
 }
 
 static inline unsigned int
-mlxsw_sp_acl_block_rule_count(const struct mlxsw_sp_acl_block *block)
+mlxsw_sp_flow_block_rule_count(const struct mlxsw_sp_flow_block *block)
 {
 	return block ? block->rule_count : 0;
 }
 
 static inline void
-mlxsw_sp_acl_block_disable_inc(struct mlxsw_sp_acl_block *block)
+mlxsw_sp_flow_block_disable_inc(struct mlxsw_sp_flow_block *block)
 {
 	if (block)
 		block->disable_count++;
 }
 
 static inline void
-mlxsw_sp_acl_block_disable_dec(struct mlxsw_sp_acl_block *block)
+mlxsw_sp_flow_block_disable_dec(struct mlxsw_sp_flow_block *block)
 {
 	if (block)
 		block->disable_count--;
 }
 
 static inline bool
-mlxsw_sp_acl_block_disabled(const struct mlxsw_sp_acl_block *block)
+mlxsw_sp_flow_block_disabled(const struct mlxsw_sp_flow_block *block)
 {
 	return block->disable_count;
 }
 
 static inline bool
-mlxsw_sp_acl_block_is_egress_bound(const struct mlxsw_sp_acl_block *block)
+mlxsw_sp_flow_block_is_egress_bound(const struct mlxsw_sp_flow_block *block)
 {
 	return block->egress_binding_count;
 }
 
 static inline bool
-mlxsw_sp_acl_block_is_ingress_bound(const struct mlxsw_sp_acl_block *block)
+mlxsw_sp_flow_block_is_ingress_bound(const struct mlxsw_sp_flow_block *block)
 {
 	return block->ingress_binding_count;
 }
 
 static inline bool
-mlxsw_sp_acl_block_is_mixed_bound(const struct mlxsw_sp_acl_block *block)
+mlxsw_sp_flow_block_is_mixed_bound(const struct mlxsw_sp_flow_block *block)
 {
 	return block->ingress_binding_count && block->egress_binding_count;
 }
 
-struct mlxsw_sp_acl_block *mlxsw_sp_acl_block_create(struct mlxsw_sp *mlxsw_sp,
-						     struct net *net);
-void mlxsw_sp_acl_block_destroy(struct mlxsw_sp_acl_block *block);
-int mlxsw_sp_acl_block_bind(struct mlxsw_sp *mlxsw_sp,
-			    struct mlxsw_sp_acl_block *block,
-			    struct mlxsw_sp_port *mlxsw_sp_port,
-			    bool ingress,
-			    struct netlink_ext_ack *extack);
-int mlxsw_sp_acl_block_unbind(struct mlxsw_sp *mlxsw_sp,
-			      struct mlxsw_sp_acl_block *block,
-			      struct mlxsw_sp_port *mlxsw_sp_port,
-			      bool ingress);
+struct mlxsw_sp_flow_block *mlxsw_sp_flow_block_create(struct mlxsw_sp *mlxsw_sp,
+						       struct net *net);
+void mlxsw_sp_flow_block_destroy(struct mlxsw_sp_flow_block *block);
+int mlxsw_sp_flow_block_bind(struct mlxsw_sp *mlxsw_sp,
+			     struct mlxsw_sp_flow_block *block,
+			     struct mlxsw_sp_port *mlxsw_sp_port,
+			     bool ingress,
+			     struct netlink_ext_ack *extack);
+int mlxsw_sp_flow_block_unbind(struct mlxsw_sp *mlxsw_sp,
+			       struct mlxsw_sp_flow_block *block,
+			       struct mlxsw_sp_port *mlxsw_sp_port,
+			       bool ingress);
 struct mlxsw_sp_acl_ruleset *
 mlxsw_sp_acl_ruleset_lookup(struct mlxsw_sp *mlxsw_sp,
-			    struct mlxsw_sp_acl_block *block, u32 chain_index,
+			    struct mlxsw_sp_flow_block *block, u32 chain_index,
 			    enum mlxsw_sp_acl_profile profile);
 struct mlxsw_sp_acl_ruleset *
 mlxsw_sp_acl_ruleset_get(struct mlxsw_sp *mlxsw_sp,
-			 struct mlxsw_sp_acl_block *block, u32 chain_index,
+			 struct mlxsw_sp_flow_block *block, u32 chain_index,
 			 enum mlxsw_sp_acl_profile profile,
 			 struct mlxsw_afk_element_usage *tmplt_elusage);
 void mlxsw_sp_acl_ruleset_put(struct mlxsw_sp *mlxsw_sp,
@@ -778,7 +778,7 @@ int mlxsw_sp_acl_rulei_act_drop(struct mlxsw_sp_acl_rule_info *rulei,
 int mlxsw_sp_acl_rulei_act_trap(struct mlxsw_sp_acl_rule_info *rulei);
 int mlxsw_sp_acl_rulei_act_mirror(struct mlxsw_sp *mlxsw_sp,
 				  struct mlxsw_sp_acl_rule_info *rulei,
-				  struct mlxsw_sp_acl_block *block,
+				  struct mlxsw_sp_flow_block *block,
 				  struct net_device *out_dev,
 				  struct netlink_ext_ack *extack);
 int mlxsw_sp_acl_rulei_act_fwd(struct mlxsw_sp *mlxsw_sp,
@@ -901,19 +901,19 @@ extern const struct mlxsw_afk_ops mlxsw_sp2_afk_ops;
 
 /* spectrum_flower.c */
 int mlxsw_sp_flower_replace(struct mlxsw_sp *mlxsw_sp,
-			    struct mlxsw_sp_acl_block *block,
+			    struct mlxsw_sp_flow_block *block,
 			    struct flow_cls_offload *f);
 void mlxsw_sp_flower_destroy(struct mlxsw_sp *mlxsw_sp,
-			     struct mlxsw_sp_acl_block *block,
+			     struct mlxsw_sp_flow_block *block,
 			     struct flow_cls_offload *f);
 int mlxsw_sp_flower_stats(struct mlxsw_sp *mlxsw_sp,
-			  struct mlxsw_sp_acl_block *block,
+			  struct mlxsw_sp_flow_block *block,
 			  struct flow_cls_offload *f);
 int mlxsw_sp_flower_tmplt_create(struct mlxsw_sp *mlxsw_sp,
-				 struct mlxsw_sp_acl_block *block,
+				 struct mlxsw_sp_flow_block *block,
 				 struct flow_cls_offload *f);
 void mlxsw_sp_flower_tmplt_destroy(struct mlxsw_sp *mlxsw_sp,
-				   struct mlxsw_sp_acl_block *block,
+				   struct mlxsw_sp_flow_block *block,
 				   struct flow_cls_offload *f);
 
 /* spectrum_qdisc.c */
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c
index e31ec75ac035..a11d911302f1 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum2_mr_tcam.c
@@ -9,7 +9,7 @@
 
 struct mlxsw_sp2_mr_tcam {
 	struct mlxsw_sp *mlxsw_sp;
-	struct mlxsw_sp_acl_block *acl_block;
+	struct mlxsw_sp_flow_block *flow_block;
 	struct mlxsw_sp_acl_ruleset *ruleset4;
 	struct mlxsw_sp_acl_ruleset *ruleset6;
 };
@@ -61,7 +61,7 @@ static int mlxsw_sp2_mr_tcam_ipv4_init(struct mlxsw_sp2_mr_tcam *mr_tcam)
 				     mlxsw_sp2_mr_tcam_usage_ipv4,
 				     ARRAY_SIZE(mlxsw_sp2_mr_tcam_usage_ipv4));
 	mr_tcam->ruleset4 = mlxsw_sp_acl_ruleset_get(mr_tcam->mlxsw_sp,
-						     mr_tcam->acl_block,
+						     mr_tcam->flow_block,
 						     MLXSW_SP_L3_PROTO_IPV4,
 						     MLXSW_SP_ACL_PROFILE_MR,
 						     &elusage);
@@ -111,7 +111,7 @@ static int mlxsw_sp2_mr_tcam_ipv6_init(struct mlxsw_sp2_mr_tcam *mr_tcam)
 				     mlxsw_sp2_mr_tcam_usage_ipv6,
 				     ARRAY_SIZE(mlxsw_sp2_mr_tcam_usage_ipv6));
 	mr_tcam->ruleset6 = mlxsw_sp_acl_ruleset_get(mr_tcam->mlxsw_sp,
-						     mr_tcam->acl_block,
+						     mr_tcam->flow_block,
 						     MLXSW_SP_L3_PROTO_IPV6,
 						     MLXSW_SP_ACL_PROFILE_MR,
 						     &elusage);
@@ -289,8 +289,8 @@ static int mlxsw_sp2_mr_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv)
 	int err;
 
 	mr_tcam->mlxsw_sp = mlxsw_sp;
-	mr_tcam->acl_block = mlxsw_sp_acl_block_create(mlxsw_sp, NULL);
-	if (!mr_tcam->acl_block)
+	mr_tcam->flow_block = mlxsw_sp_flow_block_create(mlxsw_sp, NULL);
+	if (!mr_tcam->flow_block)
 		return -ENOMEM;
 
 	err = mlxsw_sp2_mr_tcam_ipv4_init(mr_tcam);
@@ -306,7 +306,7 @@ static int mlxsw_sp2_mr_tcam_init(struct mlxsw_sp *mlxsw_sp, void *priv)
 err_ipv6_init:
 	mlxsw_sp2_mr_tcam_ipv4_fini(mr_tcam);
 err_ipv4_init:
-	mlxsw_sp_acl_block_destroy(mr_tcam->acl_block);
+	mlxsw_sp_flow_block_destroy(mr_tcam->flow_block);
 	return err;
 }
 
@@ -316,7 +316,7 @@ static void mlxsw_sp2_mr_tcam_fini(void *priv)
 
 	mlxsw_sp2_mr_tcam_ipv6_fini(mr_tcam);
 	mlxsw_sp2_mr_tcam_ipv4_fini(mr_tcam);
-	mlxsw_sp_acl_block_destroy(mr_tcam->acl_block);
+	mlxsw_sp_flow_block_destroy(mr_tcam->flow_block);
 }
 
 const struct mlxsw_sp_mr_tcam_ops mlxsw_sp2_mr_tcam_ops = {
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
index bb06c007b3f2..f9524cb95e9f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
@@ -40,7 +40,7 @@ struct mlxsw_afk *mlxsw_sp_acl_afk(struct mlxsw_sp_acl *acl)
 	return acl->afk;
 }
 
-struct mlxsw_sp_acl_block_binding {
+struct mlxsw_sp_flow_block_binding {
 	struct list_head list;
 	struct net_device *dev;
 	struct mlxsw_sp_port *mlxsw_sp_port;
@@ -48,7 +48,7 @@ struct mlxsw_sp_acl_block_binding {
 };
 
 struct mlxsw_sp_acl_ruleset_ht_key {
-	struct mlxsw_sp_acl_block *block;
+	struct mlxsw_sp_flow_block *block;
 	u32 chain_index;
 	const struct mlxsw_sp_acl_profile_ops *ops;
 };
@@ -103,8 +103,8 @@ mlxsw_sp_acl_ruleset_is_singular(const struct mlxsw_sp_acl_ruleset *ruleset)
 
 static int
 mlxsw_sp_acl_ruleset_bind(struct mlxsw_sp *mlxsw_sp,
-			  struct mlxsw_sp_acl_block *block,
-			  struct mlxsw_sp_acl_block_binding *binding)
+			  struct mlxsw_sp_flow_block *block,
+			  struct mlxsw_sp_flow_block_binding *binding)
 {
 	struct mlxsw_sp_acl_ruleset *ruleset = block->ruleset_zero;
 	const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops;
@@ -115,8 +115,8 @@ mlxsw_sp_acl_ruleset_bind(struct mlxsw_sp *mlxsw_sp,
 
 static void
 mlxsw_sp_acl_ruleset_unbind(struct mlxsw_sp *mlxsw_sp,
-			    struct mlxsw_sp_acl_block *block,
-			    struct mlxsw_sp_acl_block_binding *binding)
+			    struct mlxsw_sp_flow_block *block,
+			    struct mlxsw_sp_flow_block_binding *binding)
 {
 	struct mlxsw_sp_acl_ruleset *ruleset = block->ruleset_zero;
 	const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops;
@@ -126,7 +126,7 @@ mlxsw_sp_acl_ruleset_unbind(struct mlxsw_sp *mlxsw_sp,
 }
 
 static bool
-mlxsw_sp_acl_ruleset_block_bound(const struct mlxsw_sp_acl_block *block)
+mlxsw_sp_acl_ruleset_block_bound(const struct mlxsw_sp_flow_block *block)
 {
 	return block->ruleset_zero;
 }
@@ -134,9 +134,9 @@ mlxsw_sp_acl_ruleset_block_bound(const struct mlxsw_sp_acl_block *block)
 static int
 mlxsw_sp_acl_ruleset_block_bind(struct mlxsw_sp *mlxsw_sp,
 				struct mlxsw_sp_acl_ruleset *ruleset,
-				struct mlxsw_sp_acl_block *block)
+				struct mlxsw_sp_flow_block *block)
 {
-	struct mlxsw_sp_acl_block_binding *binding;
+	struct mlxsw_sp_flow_block_binding *binding;
 	int err;
 
 	block->ruleset_zero = ruleset;
@@ -159,19 +159,19 @@ rollback:
 static void
 mlxsw_sp_acl_ruleset_block_unbind(struct mlxsw_sp *mlxsw_sp,
 				  struct mlxsw_sp_acl_ruleset *ruleset,
-				  struct mlxsw_sp_acl_block *block)
+				  struct mlxsw_sp_flow_block *block)
 {
-	struct mlxsw_sp_acl_block_binding *binding;
+	struct mlxsw_sp_flow_block_binding *binding;
 
 	list_for_each_entry(binding, &block->binding_list, list)
 		mlxsw_sp_acl_ruleset_unbind(mlxsw_sp, block, binding);
 	block->ruleset_zero = NULL;
 }
 
-struct mlxsw_sp_acl_block *mlxsw_sp_acl_block_create(struct mlxsw_sp *mlxsw_sp,
-						     struct net *net)
+struct mlxsw_sp_flow_block *
+mlxsw_sp_flow_block_create(struct mlxsw_sp *mlxsw_sp, struct net *net)
 {
-	struct mlxsw_sp_acl_block *block;
+	struct mlxsw_sp_flow_block *block;
 
 	block = kzalloc(sizeof(*block), GFP_KERNEL);
 	if (!block)
@@ -182,17 +182,17 @@ struct mlxsw_sp_acl_block *mlxsw_sp_acl_block_create(struct mlxsw_sp *mlxsw_sp,
 	return block;
 }
 
-void mlxsw_sp_acl_block_destroy(struct mlxsw_sp_acl_block *block)
+void mlxsw_sp_flow_block_destroy(struct mlxsw_sp_flow_block *block)
 {
 	WARN_ON(!list_empty(&block->binding_list));
 	kfree(block);
 }
 
-static struct mlxsw_sp_acl_block_binding *
-mlxsw_sp_acl_block_lookup(struct mlxsw_sp_acl_block *block,
-			  struct mlxsw_sp_port *mlxsw_sp_port, bool ingress)
+static struct mlxsw_sp_flow_block_binding *
+mlxsw_sp_flow_block_lookup(struct mlxsw_sp_flow_block *block,
+			   struct mlxsw_sp_port *mlxsw_sp_port, bool ingress)
 {
-	struct mlxsw_sp_acl_block_binding *binding;
+	struct mlxsw_sp_flow_block_binding *binding;
 
 	list_for_each_entry(binding, &block->binding_list, list)
 		if (binding->mlxsw_sp_port == mlxsw_sp_port &&
@@ -201,16 +201,16 @@ mlxsw_sp_acl_block_lookup(struct mlxsw_sp_acl_block *block,
 	return NULL;
 }
 
-int mlxsw_sp_acl_block_bind(struct mlxsw_sp *mlxsw_sp,
-			    struct mlxsw_sp_acl_block *block,
-			    struct mlxsw_sp_port *mlxsw_sp_port,
-			    bool ingress,
-			    struct netlink_ext_ack *extack)
+int mlxsw_sp_flow_block_bind(struct mlxsw_sp *mlxsw_sp,
+			     struct mlxsw_sp_flow_block *block,
+			     struct mlxsw_sp_port *mlxsw_sp_port,
+			     bool ingress,
+			     struct netlink_ext_ack *extack)
 {
-	struct mlxsw_sp_acl_block_binding *binding;
+	struct mlxsw_sp_flow_block_binding *binding;
 	int err;
 
-	if (WARN_ON(mlxsw_sp_acl_block_lookup(block, mlxsw_sp_port, ingress)))
+	if (WARN_ON(mlxsw_sp_flow_block_lookup(block, mlxsw_sp_port, ingress)))
 		return -EEXIST;
 
 	if (ingress && block->ingress_blocker_rule_count) {
@@ -247,14 +247,14 @@ err_ruleset_bind:
 	return err;
 }
 
-int mlxsw_sp_acl_block_unbind(struct mlxsw_sp *mlxsw_sp,
-			      struct mlxsw_sp_acl_block *block,
-			      struct mlxsw_sp_port *mlxsw_sp_port,
-			      bool ingress)
+int mlxsw_sp_flow_block_unbind(struct mlxsw_sp *mlxsw_sp,
+			       struct mlxsw_sp_flow_block *block,
+			       struct mlxsw_sp_port *mlxsw_sp_port,
+			       bool ingress)
 {
-	struct mlxsw_sp_acl_block_binding *binding;
+	struct mlxsw_sp_flow_block_binding *binding;
 
-	binding = mlxsw_sp_acl_block_lookup(block, mlxsw_sp_port, ingress);
+	binding = mlxsw_sp_flow_block_lookup(block, mlxsw_sp_port, ingress);
 	if (!binding)
 		return -ENOENT;
 
@@ -274,7 +274,7 @@ int mlxsw_sp_acl_block_unbind(struct mlxsw_sp *mlxsw_sp,
 
 static struct mlxsw_sp_acl_ruleset *
 mlxsw_sp_acl_ruleset_create(struct mlxsw_sp *mlxsw_sp,
-			    struct mlxsw_sp_acl_block *block, u32 chain_index,
+			    struct mlxsw_sp_flow_block *block, u32 chain_index,
 			    const struct mlxsw_sp_acl_profile_ops *ops,
 			    struct mlxsw_afk_element_usage *tmplt_elusage)
 {
@@ -345,7 +345,7 @@ static void mlxsw_sp_acl_ruleset_ref_dec(struct mlxsw_sp *mlxsw_sp,
 
 static struct mlxsw_sp_acl_ruleset *
 __mlxsw_sp_acl_ruleset_lookup(struct mlxsw_sp_acl *acl,
-			      struct mlxsw_sp_acl_block *block, u32 chain_index,
+			      struct mlxsw_sp_flow_block *block, u32 chain_index,
 			      const struct mlxsw_sp_acl_profile_ops *ops)
 {
 	struct mlxsw_sp_acl_ruleset_ht_key ht_key;
@@ -360,7 +360,7 @@ __mlxsw_sp_acl_ruleset_lookup(struct mlxsw_sp_acl *acl,
 
 struct mlxsw_sp_acl_ruleset *
 mlxsw_sp_acl_ruleset_lookup(struct mlxsw_sp *mlxsw_sp,
-			    struct mlxsw_sp_acl_block *block, u32 chain_index,
+			    struct mlxsw_sp_flow_block *block, u32 chain_index,
 			    enum mlxsw_sp_acl_profile profile)
 {
 	const struct mlxsw_sp_acl_profile_ops *ops;
@@ -378,7 +378,7 @@ mlxsw_sp_acl_ruleset_lookup(struct mlxsw_sp *mlxsw_sp,
 
 struct mlxsw_sp_acl_ruleset *
 mlxsw_sp_acl_ruleset_get(struct mlxsw_sp *mlxsw_sp,
-			 struct mlxsw_sp_acl_block *block, u32 chain_index,
+			 struct mlxsw_sp_flow_block *block, u32 chain_index,
 			 enum mlxsw_sp_acl_profile profile,
 			 struct mlxsw_afk_element_usage *tmplt_elusage)
 {
@@ -541,11 +541,11 @@ int mlxsw_sp_acl_rulei_act_fwd(struct mlxsw_sp *mlxsw_sp,
 
 int mlxsw_sp_acl_rulei_act_mirror(struct mlxsw_sp *mlxsw_sp,
 				  struct mlxsw_sp_acl_rule_info *rulei,
-				  struct mlxsw_sp_acl_block *block,
+				  struct mlxsw_sp_flow_block *block,
 				  struct net_device *out_dev,
 				  struct netlink_ext_ack *extack)
 {
-	struct mlxsw_sp_acl_block_binding *binding;
+	struct mlxsw_sp_flow_block_binding *binding;
 	struct mlxsw_sp_port *in_port;
 
 	if (!list_is_singular(&block->binding_list)) {
@@ -553,7 +553,7 @@ int mlxsw_sp_acl_rulei_act_mirror(struct mlxsw_sp *mlxsw_sp,
 		return -EOPNOTSUPP;
 	}
 	binding = list_first_entry(&block->binding_list,
-				   struct mlxsw_sp_acl_block_binding, list);
+				   struct mlxsw_sp_flow_block_binding, list);
 	in_port = binding->mlxsw_sp_port;
 
 	return mlxsw_afa_block_append_mirror(rulei->act_block,
@@ -775,7 +775,7 @@ int mlxsw_sp_acl_rule_add(struct mlxsw_sp *mlxsw_sp,
 {
 	struct mlxsw_sp_acl_ruleset *ruleset = rule->ruleset;
 	const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops;
-	struct mlxsw_sp_acl_block *block = ruleset->ht_key.block;
+	struct mlxsw_sp_flow_block *block = ruleset->ht_key.block;
 	int err;
 
 	err = ops->rule_add(mlxsw_sp, ruleset->priv, rule->priv, rule->rulei);
@@ -819,7 +819,7 @@ void mlxsw_sp_acl_rule_del(struct mlxsw_sp *mlxsw_sp,
 {
 	struct mlxsw_sp_acl_ruleset *ruleset = rule->ruleset;
 	const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops;
-	struct mlxsw_sp_acl_block *block = ruleset->ht_key.block;
+	struct mlxsw_sp_flow_block *block = ruleset->ht_key.block;
 
 	block->egress_blocker_rule_count -= rule->rulei->egress_bind_blocker;
 	block->ingress_blocker_rule_count -= rule->rulei->ingress_bind_blocker;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
index 51117a5a6bbf..89c2e9820e95 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
@@ -15,7 +15,7 @@
 #include "core_acl_flex_keys.h"
 
 static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
-					 struct mlxsw_sp_acl_block *block,
+					 struct mlxsw_sp_flow_block *block,
 					 struct mlxsw_sp_acl_rule_info *rulei,
 					 struct flow_action *flow_action,
 					 struct netlink_ext_ack *extack)
@@ -53,11 +53,11 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
 		case FLOW_ACTION_DROP: {
 			bool ingress;
 
-			if (mlxsw_sp_acl_block_is_mixed_bound(block)) {
+			if (mlxsw_sp_flow_block_is_mixed_bound(block)) {
 				NL_SET_ERR_MSG_MOD(extack, "Drop action is not supported when block is bound to ingress and egress");
 				return -EOPNOTSUPP;
 			}
-			ingress = mlxsw_sp_acl_block_is_ingress_bound(block);
+			ingress = mlxsw_sp_flow_block_is_ingress_bound(block);
 			err = mlxsw_sp_acl_rulei_act_drop(rulei, ingress,
 							  act->cookie, extack);
 			if (err) {
@@ -106,7 +106,7 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
 			struct mlxsw_sp_fid *fid;
 			u16 fid_index;
 
-			if (mlxsw_sp_acl_block_is_egress_bound(block)) {
+			if (mlxsw_sp_flow_block_is_egress_bound(block)) {
 				NL_SET_ERR_MSG_MOD(extack, "Redirect action is not supported on egress");
 				return -EOPNOTSUPP;
 			}
@@ -190,7 +190,7 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
 
 static int mlxsw_sp_flower_parse_meta(struct mlxsw_sp_acl_rule_info *rulei,
 				      struct flow_cls_offload *f,
-				      struct mlxsw_sp_acl_block *block)
+				      struct mlxsw_sp_flow_block *block)
 {
 	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
 	struct mlxsw_sp_port *mlxsw_sp_port;
@@ -371,7 +371,7 @@ static int mlxsw_sp_flower_parse_ip(struct mlxsw_sp *mlxsw_sp,
 }
 
 static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp,
-				 struct mlxsw_sp_acl_block *block,
+				 struct mlxsw_sp_flow_block *block,
 				 struct mlxsw_sp_acl_rule_info *rulei,
 				 struct flow_cls_offload *f)
 {
@@ -460,7 +460,7 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp,
 		struct flow_match_vlan match;
 
 		flow_rule_match_vlan(rule, &match);
-		if (mlxsw_sp_acl_block_is_egress_bound(block)) {
+		if (mlxsw_sp_flow_block_is_egress_bound(block)) {
 			NL_SET_ERR_MSG_MOD(f->common.extack, "vlan_id key is not supported on egress");
 			return -EOPNOTSUPP;
 		}
@@ -505,7 +505,7 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp,
 }
 
 int mlxsw_sp_flower_replace(struct mlxsw_sp *mlxsw_sp,
-			    struct mlxsw_sp_acl_block *block,
+			    struct mlxsw_sp_flow_block *block,
 			    struct flow_cls_offload *f)
 {
 	struct mlxsw_sp_acl_rule_info *rulei;
@@ -552,7 +552,7 @@ err_rule_create:
 }
 
 void mlxsw_sp_flower_destroy(struct mlxsw_sp *mlxsw_sp,
-			     struct mlxsw_sp_acl_block *block,
+			     struct mlxsw_sp_flow_block *block,
 			     struct flow_cls_offload *f)
 {
 	struct mlxsw_sp_acl_ruleset *ruleset;
@@ -574,7 +574,7 @@ void mlxsw_sp_flower_destroy(struct mlxsw_sp *mlxsw_sp,
 }
 
 int mlxsw_sp_flower_stats(struct mlxsw_sp *mlxsw_sp,
-			  struct mlxsw_sp_acl_block *block,
+			  struct mlxsw_sp_flow_block *block,
 			  struct flow_cls_offload *f)
 {
 	enum flow_action_hw_stats used_hw_stats = FLOW_ACTION_HW_STATS_DISABLED;
@@ -611,7 +611,7 @@ err_rule_get_stats:
 }
 
 int mlxsw_sp_flower_tmplt_create(struct mlxsw_sp *mlxsw_sp,
-				 struct mlxsw_sp_acl_block *block,
+				 struct mlxsw_sp_flow_block *block,
 				 struct flow_cls_offload *f)
 {
 	struct mlxsw_sp_acl_ruleset *ruleset;
@@ -632,7 +632,7 @@ int mlxsw_sp_flower_tmplt_create(struct mlxsw_sp *mlxsw_sp,
 }
 
 void mlxsw_sp_flower_tmplt_destroy(struct mlxsw_sp *mlxsw_sp,
-				   struct mlxsw_sp_acl_block *block,
+				   struct mlxsw_sp_flow_block *block,
 				   struct flow_cls_offload *f)
 {
 	struct mlxsw_sp_acl_ruleset *ruleset;
-- 
cgit v1.2.3-59-g8ed1b


From d52238eb7bcf53225841217af12fe1383205fcaa Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Mon, 27 Apr 2020 18:13:00 +0300
Subject: mlxsw: spectrum: Push flow_block related functions into a separate
 file

The code around flow_block is currently mixed in spectrum_acl.c.
However, as it really does not directly relate to ACL part only,
push the bits into a separate file.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/Makefile       |   1 +
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h     |  34 ++++--
 drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c | 131 +--------------------
 .../net/ethernet/mellanox/mlxsw/spectrum_flow.c    | 120 +++++++++++++++++++
 4 files changed, 151 insertions(+), 135 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlxsw/spectrum_flow.c

diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile
index 0e86a581d45b..59cbf02d6731 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Makefile
+++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile
@@ -21,6 +21,7 @@ mlxsw_spectrum-objs		:= spectrum.o spectrum_buffers.o \
 				   spectrum_acl_atcam.o spectrum_acl_erp.o \
 				   spectrum1_acl_tcam.o spectrum2_acl_tcam.o \
 				   spectrum_acl_bloom_filter.o spectrum_acl.o \
+				   spectrum_flow.o \
 				   spectrum_flower.o spectrum_cnt.o \
 				   spectrum_fid.o spectrum_ipip.o \
 				   spectrum_acl_flex_actions.o \
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 65b1a2d87c2d..d4ef079aab4b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -654,15 +654,7 @@ struct mlxsw_sp_acl_rule_info {
 	unsigned int counter_index;
 };
 
-struct mlxsw_sp_flow_block;
-struct mlxsw_sp_acl_ruleset;
-
-/* spectrum_acl.c */
-enum mlxsw_sp_acl_profile {
-	MLXSW_SP_ACL_PROFILE_FLOWER,
-	MLXSW_SP_ACL_PROFILE_MR,
-};
-
+/* spectrum_flow.c */
 struct mlxsw_sp_flow_block {
 	struct list_head binding_list;
 	struct mlxsw_sp_acl_ruleset *ruleset_zero;
@@ -676,7 +668,12 @@ struct mlxsw_sp_flow_block {
 	struct net *net;
 };
 
-struct mlxsw_afk *mlxsw_sp_acl_afk(struct mlxsw_sp_acl *acl);
+struct mlxsw_sp_flow_block_binding {
+	struct list_head list;
+	struct net_device *dev;
+	struct mlxsw_sp_port *mlxsw_sp_port;
+	bool ingress;
+};
 
 static inline struct mlxsw_sp *
 mlxsw_sp_flow_block_mlxsw_sp(struct mlxsw_sp_flow_block *block)
@@ -740,6 +737,23 @@ int mlxsw_sp_flow_block_unbind(struct mlxsw_sp *mlxsw_sp,
 			       struct mlxsw_sp_flow_block *block,
 			       struct mlxsw_sp_port *mlxsw_sp_port,
 			       bool ingress);
+
+/* spectrum_acl.c */
+struct mlxsw_sp_acl_ruleset;
+
+enum mlxsw_sp_acl_profile {
+	MLXSW_SP_ACL_PROFILE_FLOWER,
+	MLXSW_SP_ACL_PROFILE_MR,
+};
+
+struct mlxsw_afk *mlxsw_sp_acl_afk(struct mlxsw_sp_acl *acl);
+
+int mlxsw_sp_acl_ruleset_bind(struct mlxsw_sp *mlxsw_sp,
+			      struct mlxsw_sp_flow_block *block,
+			      struct mlxsw_sp_flow_block_binding *binding);
+void mlxsw_sp_acl_ruleset_unbind(struct mlxsw_sp *mlxsw_sp,
+				 struct mlxsw_sp_flow_block *block,
+				 struct mlxsw_sp_flow_block_binding *binding);
 struct mlxsw_sp_acl_ruleset *
 mlxsw_sp_acl_ruleset_lookup(struct mlxsw_sp *mlxsw_sp,
 			    struct mlxsw_sp_flow_block *block, u32 chain_index,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
index f9524cb95e9f..800eaa6be3c0 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
@@ -40,13 +40,6 @@ struct mlxsw_afk *mlxsw_sp_acl_afk(struct mlxsw_sp_acl *acl)
 	return acl->afk;
 }
 
-struct mlxsw_sp_flow_block_binding {
-	struct list_head list;
-	struct net_device *dev;
-	struct mlxsw_sp_port *mlxsw_sp_port;
-	bool ingress;
-};
-
 struct mlxsw_sp_acl_ruleset_ht_key {
 	struct mlxsw_sp_flow_block *block;
 	u32 chain_index;
@@ -101,10 +94,9 @@ mlxsw_sp_acl_ruleset_is_singular(const struct mlxsw_sp_acl_ruleset *ruleset)
 	return ruleset->ref_count == 2;
 }
 
-static int
-mlxsw_sp_acl_ruleset_bind(struct mlxsw_sp *mlxsw_sp,
-			  struct mlxsw_sp_flow_block *block,
-			  struct mlxsw_sp_flow_block_binding *binding)
+int mlxsw_sp_acl_ruleset_bind(struct mlxsw_sp *mlxsw_sp,
+			      struct mlxsw_sp_flow_block *block,
+			      struct mlxsw_sp_flow_block_binding *binding)
 {
 	struct mlxsw_sp_acl_ruleset *ruleset = block->ruleset_zero;
 	const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops;
@@ -113,10 +105,9 @@ mlxsw_sp_acl_ruleset_bind(struct mlxsw_sp *mlxsw_sp,
 				 binding->mlxsw_sp_port, binding->ingress);
 }
 
-static void
-mlxsw_sp_acl_ruleset_unbind(struct mlxsw_sp *mlxsw_sp,
-			    struct mlxsw_sp_flow_block *block,
-			    struct mlxsw_sp_flow_block_binding *binding)
+void mlxsw_sp_acl_ruleset_unbind(struct mlxsw_sp *mlxsw_sp,
+				 struct mlxsw_sp_flow_block *block,
+				 struct mlxsw_sp_flow_block_binding *binding)
 {
 	struct mlxsw_sp_acl_ruleset *ruleset = block->ruleset_zero;
 	const struct mlxsw_sp_acl_profile_ops *ops = ruleset->ht_key.ops;
@@ -125,12 +116,6 @@ mlxsw_sp_acl_ruleset_unbind(struct mlxsw_sp *mlxsw_sp,
 			    binding->mlxsw_sp_port, binding->ingress);
 }
 
-static bool
-mlxsw_sp_acl_ruleset_block_bound(const struct mlxsw_sp_flow_block *block)
-{
-	return block->ruleset_zero;
-}
-
 static int
 mlxsw_sp_acl_ruleset_block_bind(struct mlxsw_sp *mlxsw_sp,
 				struct mlxsw_sp_acl_ruleset *ruleset,
@@ -168,110 +153,6 @@ mlxsw_sp_acl_ruleset_block_unbind(struct mlxsw_sp *mlxsw_sp,
 	block->ruleset_zero = NULL;
 }
 
-struct mlxsw_sp_flow_block *
-mlxsw_sp_flow_block_create(struct mlxsw_sp *mlxsw_sp, struct net *net)
-{
-	struct mlxsw_sp_flow_block *block;
-
-	block = kzalloc(sizeof(*block), GFP_KERNEL);
-	if (!block)
-		return NULL;
-	INIT_LIST_HEAD(&block->binding_list);
-	block->mlxsw_sp = mlxsw_sp;
-	block->net = net;
-	return block;
-}
-
-void mlxsw_sp_flow_block_destroy(struct mlxsw_sp_flow_block *block)
-{
-	WARN_ON(!list_empty(&block->binding_list));
-	kfree(block);
-}
-
-static struct mlxsw_sp_flow_block_binding *
-mlxsw_sp_flow_block_lookup(struct mlxsw_sp_flow_block *block,
-			   struct mlxsw_sp_port *mlxsw_sp_port, bool ingress)
-{
-	struct mlxsw_sp_flow_block_binding *binding;
-
-	list_for_each_entry(binding, &block->binding_list, list)
-		if (binding->mlxsw_sp_port == mlxsw_sp_port &&
-		    binding->ingress == ingress)
-			return binding;
-	return NULL;
-}
-
-int mlxsw_sp_flow_block_bind(struct mlxsw_sp *mlxsw_sp,
-			     struct mlxsw_sp_flow_block *block,
-			     struct mlxsw_sp_port *mlxsw_sp_port,
-			     bool ingress,
-			     struct netlink_ext_ack *extack)
-{
-	struct mlxsw_sp_flow_block_binding *binding;
-	int err;
-
-	if (WARN_ON(mlxsw_sp_flow_block_lookup(block, mlxsw_sp_port, ingress)))
-		return -EEXIST;
-
-	if (ingress && block->ingress_blocker_rule_count) {
-		NL_SET_ERR_MSG_MOD(extack, "Block cannot be bound to ingress because it contains unsupported rules");
-		return -EOPNOTSUPP;
-	}
-
-	if (!ingress && block->egress_blocker_rule_count) {
-		NL_SET_ERR_MSG_MOD(extack, "Block cannot be bound to egress because it contains unsupported rules");
-		return -EOPNOTSUPP;
-	}
-
-	binding = kzalloc(sizeof(*binding), GFP_KERNEL);
-	if (!binding)
-		return -ENOMEM;
-	binding->mlxsw_sp_port = mlxsw_sp_port;
-	binding->ingress = ingress;
-
-	if (mlxsw_sp_acl_ruleset_block_bound(block)) {
-		err = mlxsw_sp_acl_ruleset_bind(mlxsw_sp, block, binding);
-		if (err)
-			goto err_ruleset_bind;
-	}
-
-	if (ingress)
-		block->ingress_binding_count++;
-	else
-		block->egress_binding_count++;
-	list_add(&binding->list, &block->binding_list);
-	return 0;
-
-err_ruleset_bind:
-	kfree(binding);
-	return err;
-}
-
-int mlxsw_sp_flow_block_unbind(struct mlxsw_sp *mlxsw_sp,
-			       struct mlxsw_sp_flow_block *block,
-			       struct mlxsw_sp_port *mlxsw_sp_port,
-			       bool ingress)
-{
-	struct mlxsw_sp_flow_block_binding *binding;
-
-	binding = mlxsw_sp_flow_block_lookup(block, mlxsw_sp_port, ingress);
-	if (!binding)
-		return -ENOENT;
-
-	list_del(&binding->list);
-
-	if (ingress)
-		block->ingress_binding_count--;
-	else
-		block->egress_binding_count--;
-
-	if (mlxsw_sp_acl_ruleset_block_bound(block))
-		mlxsw_sp_acl_ruleset_unbind(mlxsw_sp, block, binding);
-
-	kfree(binding);
-	return 0;
-}
-
 static struct mlxsw_sp_acl_ruleset *
 mlxsw_sp_acl_ruleset_create(struct mlxsw_sp *mlxsw_sp,
 			    struct mlxsw_sp_flow_block *block, u32 chain_index,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flow.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flow.c
new file mode 100644
index 000000000000..655e1df5c95a
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flow.c
@@ -0,0 +1,120 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2017-2020 Mellanox Technologies. All rights reserved */
+
+#include <linux/kernel.h>
+#include <linux/slab.h>
+#include <linux/errno.h>
+#include <linux/list.h>
+#include <net/net_namespace.h>
+
+#include "spectrum.h"
+
+struct mlxsw_sp_flow_block *
+mlxsw_sp_flow_block_create(struct mlxsw_sp *mlxsw_sp, struct net *net)
+{
+	struct mlxsw_sp_flow_block *block;
+
+	block = kzalloc(sizeof(*block), GFP_KERNEL);
+	if (!block)
+		return NULL;
+	INIT_LIST_HEAD(&block->binding_list);
+	block->mlxsw_sp = mlxsw_sp;
+	block->net = net;
+	return block;
+}
+
+void mlxsw_sp_flow_block_destroy(struct mlxsw_sp_flow_block *block)
+{
+	WARN_ON(!list_empty(&block->binding_list));
+	kfree(block);
+}
+
+static struct mlxsw_sp_flow_block_binding *
+mlxsw_sp_flow_block_lookup(struct mlxsw_sp_flow_block *block,
+			   struct mlxsw_sp_port *mlxsw_sp_port, bool ingress)
+{
+	struct mlxsw_sp_flow_block_binding *binding;
+
+	list_for_each_entry(binding, &block->binding_list, list)
+		if (binding->mlxsw_sp_port == mlxsw_sp_port &&
+		    binding->ingress == ingress)
+			return binding;
+	return NULL;
+}
+
+static bool
+mlxsw_sp_flow_block_ruleset_bound(const struct mlxsw_sp_flow_block *block)
+{
+	return block->ruleset_zero;
+}
+
+int mlxsw_sp_flow_block_bind(struct mlxsw_sp *mlxsw_sp,
+			     struct mlxsw_sp_flow_block *block,
+			     struct mlxsw_sp_port *mlxsw_sp_port,
+			     bool ingress,
+			     struct netlink_ext_ack *extack)
+{
+	struct mlxsw_sp_flow_block_binding *binding;
+	int err;
+
+	if (WARN_ON(mlxsw_sp_flow_block_lookup(block, mlxsw_sp_port, ingress)))
+		return -EEXIST;
+
+	if (ingress && block->ingress_blocker_rule_count) {
+		NL_SET_ERR_MSG_MOD(extack, "Block cannot be bound to ingress because it contains unsupported rules");
+		return -EOPNOTSUPP;
+	}
+
+	if (!ingress && block->egress_blocker_rule_count) {
+		NL_SET_ERR_MSG_MOD(extack, "Block cannot be bound to egress because it contains unsupported rules");
+		return -EOPNOTSUPP;
+	}
+
+	binding = kzalloc(sizeof(*binding), GFP_KERNEL);
+	if (!binding)
+		return -ENOMEM;
+	binding->mlxsw_sp_port = mlxsw_sp_port;
+	binding->ingress = ingress;
+
+	if (mlxsw_sp_flow_block_ruleset_bound(block)) {
+		err = mlxsw_sp_acl_ruleset_bind(mlxsw_sp, block, binding);
+		if (err)
+			goto err_ruleset_bind;
+	}
+
+	if (ingress)
+		block->ingress_binding_count++;
+	else
+		block->egress_binding_count++;
+	list_add(&binding->list, &block->binding_list);
+	return 0;
+
+err_ruleset_bind:
+	kfree(binding);
+	return err;
+}
+
+int mlxsw_sp_flow_block_unbind(struct mlxsw_sp *mlxsw_sp,
+			       struct mlxsw_sp_flow_block *block,
+			       struct mlxsw_sp_port *mlxsw_sp_port,
+			       bool ingress)
+{
+	struct mlxsw_sp_flow_block_binding *binding;
+
+	binding = mlxsw_sp_flow_block_lookup(block, mlxsw_sp_port, ingress);
+	if (!binding)
+		return -ENOENT;
+
+	list_del(&binding->list);
+
+	if (ingress)
+		block->ingress_binding_count--;
+	else
+		block->egress_binding_count--;
+
+	if (mlxsw_sp_flow_block_ruleset_bound(block))
+		mlxsw_sp_acl_ruleset_unbind(mlxsw_sp, block, binding);
+
+	kfree(binding);
+	return 0;
+}
-- 
cgit v1.2.3-59-g8ed1b


From d7fcc986224d77c4ab66f436e7eaa11170c509af Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Mon, 27 Apr 2020 18:13:01 +0300
Subject: mlxsw: spectrum: Push matchall bits into a separate file

Similar to flower, have matchall related code in a separate file.
Do some small renaming on the way (consistent "mall" prefixes,
dropped "_tc_", dropped "_port_" where suitable).

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/Makefile       |   2 +-
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c     | 186 +------------------
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h     |  28 +--
 .../ethernet/mellanox/mlxsw/spectrum_matchall.c    | 202 +++++++++++++++++++++
 4 files changed, 214 insertions(+), 204 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c

diff --git a/drivers/net/ethernet/mellanox/mlxsw/Makefile b/drivers/net/ethernet/mellanox/mlxsw/Makefile
index 59cbf02d6731..4aeabb35c943 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/Makefile
+++ b/drivers/net/ethernet/mellanox/mlxsw/Makefile
@@ -21,7 +21,7 @@ mlxsw_spectrum-objs		:= spectrum.o spectrum_buffers.o \
 				   spectrum_acl_atcam.o spectrum_acl_erp.o \
 				   spectrum1_acl_tcam.o spectrum2_acl_tcam.o \
 				   spectrum_acl_bloom_filter.o spectrum_acl.o \
-				   spectrum_flow.o \
+				   spectrum_flow.o spectrum_matchall.o \
 				   spectrum_flower.o spectrum_cnt.o \
 				   spectrum_fid.o spectrum_ipip.o \
 				   spectrum_acl_flex_actions.o \
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index f64e8da21d4a..ff25f8fc55e9 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -25,9 +25,7 @@
 #include <linux/log2.h>
 #include <net/switchdev.h>
 #include <net/pkt_cls.h>
-#include <net/tc_act/tc_mirred.h>
 #include <net/netevent.h>
-#include <net/tc_act/tc_sample.h>
 #include <net/addrconf.h>
 
 #include "spectrum.h"
@@ -582,16 +580,6 @@ static int mlxsw_sp_base_mac_get(struct mlxsw_sp *mlxsw_sp)
 	return 0;
 }
 
-static int mlxsw_sp_port_sample_set(struct mlxsw_sp_port *mlxsw_sp_port,
-				    bool enable, u32 rate)
-{
-	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-	char mpsc_pl[MLXSW_REG_MPSC_LEN];
-
-	mlxsw_reg_mpsc_pack(mpsc_pl, mlxsw_sp_port->local_port, enable, rate);
-	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpsc), mpsc_pl);
-}
-
 static int mlxsw_sp_port_admin_status_set(struct mlxsw_sp_port *mlxsw_sp_port,
 					  bool is_up)
 {
@@ -1362,181 +1350,15 @@ static int mlxsw_sp_port_kill_vid(struct net_device *dev,
 	return 0;
 }
 
-static struct mlxsw_sp_port_mall_tc_entry *
-mlxsw_sp_port_mall_tc_entry_find(struct mlxsw_sp_port *port,
-				 unsigned long cookie) {
-	struct mlxsw_sp_port_mall_tc_entry *mall_tc_entry;
-
-	list_for_each_entry(mall_tc_entry, &port->mall_tc_list, list)
-		if (mall_tc_entry->cookie == cookie)
-			return mall_tc_entry;
-
-	return NULL;
-}
-
-static int
-mlxsw_sp_port_add_cls_matchall_mirror(struct mlxsw_sp_port *mlxsw_sp_port,
-				      struct mlxsw_sp_port_mall_mirror_tc_entry *mirror,
-				      const struct flow_action_entry *act,
-				      bool ingress)
-{
-	enum mlxsw_sp_span_type span_type;
-
-	if (!act->dev) {
-		netdev_err(mlxsw_sp_port->dev, "Could not find requested device\n");
-		return -EINVAL;
-	}
-
-	mirror->ingress = ingress;
-	span_type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
-	return mlxsw_sp_span_mirror_add(mlxsw_sp_port, act->dev, span_type,
-					true, &mirror->span_id);
-}
-
-static void
-mlxsw_sp_port_del_cls_matchall_mirror(struct mlxsw_sp_port *mlxsw_sp_port,
-				      struct mlxsw_sp_port_mall_mirror_tc_entry *mirror)
-{
-	enum mlxsw_sp_span_type span_type;
-
-	span_type = mirror->ingress ?
-			MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
-	mlxsw_sp_span_mirror_del(mlxsw_sp_port, mirror->span_id,
-				 span_type, true);
-}
-
-static int
-mlxsw_sp_port_add_cls_matchall_sample(struct mlxsw_sp_port *mlxsw_sp_port,
-				      struct tc_cls_matchall_offload *cls,
-				      const struct flow_action_entry *act,
-				      bool ingress)
-{
-	int err;
-
-	if (!mlxsw_sp_port->sample)
-		return -EOPNOTSUPP;
-	if (rtnl_dereference(mlxsw_sp_port->sample->psample_group)) {
-		netdev_err(mlxsw_sp_port->dev, "sample already active\n");
-		return -EEXIST;
-	}
-	if (act->sample.rate > MLXSW_REG_MPSC_RATE_MAX) {
-		netdev_err(mlxsw_sp_port->dev, "sample rate not supported\n");
-		return -EOPNOTSUPP;
-	}
-
-	rcu_assign_pointer(mlxsw_sp_port->sample->psample_group,
-			   act->sample.psample_group);
-	mlxsw_sp_port->sample->truncate = act->sample.truncate;
-	mlxsw_sp_port->sample->trunc_size = act->sample.trunc_size;
-	mlxsw_sp_port->sample->rate = act->sample.rate;
-
-	err = mlxsw_sp_port_sample_set(mlxsw_sp_port, true, act->sample.rate);
-	if (err)
-		goto err_port_sample_set;
-	return 0;
-
-err_port_sample_set:
-	RCU_INIT_POINTER(mlxsw_sp_port->sample->psample_group, NULL);
-	return err;
-}
-
-static void
-mlxsw_sp_port_del_cls_matchall_sample(struct mlxsw_sp_port *mlxsw_sp_port)
-{
-	if (!mlxsw_sp_port->sample)
-		return;
-
-	mlxsw_sp_port_sample_set(mlxsw_sp_port, false, 1);
-	RCU_INIT_POINTER(mlxsw_sp_port->sample->psample_group, NULL);
-}
-
-static int mlxsw_sp_port_add_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
-					  struct tc_cls_matchall_offload *f,
-					  bool ingress)
-{
-	struct mlxsw_sp_port_mall_tc_entry *mall_tc_entry;
-	__be16 protocol = f->common.protocol;
-	struct flow_action_entry *act;
-	int err;
-
-	if (!flow_offload_has_one_action(&f->rule->action)) {
-		netdev_err(mlxsw_sp_port->dev, "only singular actions are supported\n");
-		return -EOPNOTSUPP;
-	}
-
-	mall_tc_entry = kzalloc(sizeof(*mall_tc_entry), GFP_KERNEL);
-	if (!mall_tc_entry)
-		return -ENOMEM;
-	mall_tc_entry->cookie = f->cookie;
-
-	act = &f->rule->action.entries[0];
-
-	if (act->id == FLOW_ACTION_MIRRED && protocol == htons(ETH_P_ALL)) {
-		struct mlxsw_sp_port_mall_mirror_tc_entry *mirror;
-
-		mall_tc_entry->type = MLXSW_SP_PORT_MALL_MIRROR;
-		mirror = &mall_tc_entry->mirror;
-		err = mlxsw_sp_port_add_cls_matchall_mirror(mlxsw_sp_port,
-							    mirror, act,
-							    ingress);
-	} else if (act->id == FLOW_ACTION_SAMPLE &&
-		   protocol == htons(ETH_P_ALL)) {
-		mall_tc_entry->type = MLXSW_SP_PORT_MALL_SAMPLE;
-		err = mlxsw_sp_port_add_cls_matchall_sample(mlxsw_sp_port, f,
-							    act, ingress);
-	} else {
-		err = -EOPNOTSUPP;
-	}
-
-	if (err)
-		goto err_add_action;
-
-	list_add_tail(&mall_tc_entry->list, &mlxsw_sp_port->mall_tc_list);
-	return 0;
-
-err_add_action:
-	kfree(mall_tc_entry);
-	return err;
-}
-
-static void mlxsw_sp_port_del_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
-					   struct tc_cls_matchall_offload *f)
-{
-	struct mlxsw_sp_port_mall_tc_entry *mall_tc_entry;
-
-	mall_tc_entry = mlxsw_sp_port_mall_tc_entry_find(mlxsw_sp_port,
-							 f->cookie);
-	if (!mall_tc_entry) {
-		netdev_dbg(mlxsw_sp_port->dev, "tc entry not found on port\n");
-		return;
-	}
-	list_del(&mall_tc_entry->list);
-
-	switch (mall_tc_entry->type) {
-	case MLXSW_SP_PORT_MALL_MIRROR:
-		mlxsw_sp_port_del_cls_matchall_mirror(mlxsw_sp_port,
-						      &mall_tc_entry->mirror);
-		break;
-	case MLXSW_SP_PORT_MALL_SAMPLE:
-		mlxsw_sp_port_del_cls_matchall_sample(mlxsw_sp_port);
-		break;
-	default:
-		WARN_ON(1);
-	}
-
-	kfree(mall_tc_entry);
-}
-
 static int mlxsw_sp_setup_tc_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
 					  struct tc_cls_matchall_offload *f,
 					  bool ingress)
 {
 	switch (f->command) {
 	case TC_CLSMATCHALL_REPLACE:
-		return mlxsw_sp_port_add_cls_matchall(mlxsw_sp_port, f,
-						      ingress);
+		return mlxsw_sp_mall_replace(mlxsw_sp_port, f, ingress);
 	case TC_CLSMATCHALL_DESTROY:
-		mlxsw_sp_port_del_cls_matchall(mlxsw_sp_port, f);
+		mlxsw_sp_mall_destroy(mlxsw_sp_port, f);
 		return 0;
 	default:
 		return -EOPNOTSUPP;
@@ -1800,7 +1622,7 @@ static int mlxsw_sp_feature_hw_tc(struct net_device *dev, bool enable)
 	if (!enable) {
 		if (mlxsw_sp_flow_block_rule_count(mlxsw_sp_port->ing_flow_block) ||
 		    mlxsw_sp_flow_block_rule_count(mlxsw_sp_port->eg_flow_block) ||
-		    !list_empty(&mlxsw_sp_port->mall_tc_list)) {
+		    !list_empty(&mlxsw_sp_port->mall_list)) {
 			netdev_err(dev, "Active offloaded tc filters, can't turn hw_tc_offload off\n");
 			return -EINVAL;
 		}
@@ -3696,7 +3518,7 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
 	mlxsw_sp_port->mapping = *port_mapping;
 	mlxsw_sp_port->link.autoneg = 1;
 	INIT_LIST_HEAD(&mlxsw_sp_port->vlans_list);
-	INIT_LIST_HEAD(&mlxsw_sp_port->mall_tc_list);
+	INIT_LIST_HEAD(&mlxsw_sp_port->mall_list);
 
 	mlxsw_sp_port->pcpu_stats =
 		netdev_alloc_pcpu_stats(struct mlxsw_sp_port_pcpu_stats);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index d4ef079aab4b..5c2f1af53e53 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -109,25 +109,6 @@ struct mlxsw_sp_mid {
 	unsigned long *ports_in_mid; /* bits array */
 };
 
-enum mlxsw_sp_port_mall_action_type {
-	MLXSW_SP_PORT_MALL_MIRROR,
-	MLXSW_SP_PORT_MALL_SAMPLE,
-};
-
-struct mlxsw_sp_port_mall_mirror_tc_entry {
-	int span_id;
-	bool ingress;
-};
-
-struct mlxsw_sp_port_mall_tc_entry {
-	struct list_head list;
-	unsigned long cookie;
-	enum mlxsw_sp_port_mall_action_type type;
-	union {
-		struct mlxsw_sp_port_mall_mirror_tc_entry mirror;
-	};
-};
-
 struct mlxsw_sp_sb;
 struct mlxsw_sp_bridge;
 struct mlxsw_sp_router;
@@ -274,8 +255,7 @@ struct mlxsw_sp_port {
 					       * the same localport can have
 					       * different mapping.
 					       */
-	/* TC handles */
-	struct list_head mall_tc_list;
+	struct list_head mall_list;
 	struct {
 		#define MLXSW_HW_STATS_UPDATE_TIME HZ
 		struct rtnl_link_stats64 stats;
@@ -913,6 +893,12 @@ extern const struct mlxsw_afa_ops mlxsw_sp2_act_afa_ops;
 extern const struct mlxsw_afk_ops mlxsw_sp1_afk_ops;
 extern const struct mlxsw_afk_ops mlxsw_sp2_afk_ops;
 
+/* spectrum_matchall.c */
+int mlxsw_sp_mall_replace(struct mlxsw_sp_port *mlxsw_sp_port,
+			  struct tc_cls_matchall_offload *f, bool ingress);
+void mlxsw_sp_mall_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
+			   struct tc_cls_matchall_offload *f);
+
 /* spectrum_flower.c */
 int mlxsw_sp_flower_replace(struct mlxsw_sp *mlxsw_sp,
 			    struct mlxsw_sp_flow_block *block,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
new file mode 100644
index 000000000000..56f21cfdb48e
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
@@ -0,0 +1,202 @@
+// SPDX-License-Identifier: BSD-3-Clause OR GPL-2.0
+/* Copyright (c) 2017-2020 Mellanox Technologies. All rights reserved */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/netdevice.h>
+#include <net/flow_offload.h>
+
+#include "spectrum.h"
+#include "spectrum_span.h"
+#include "reg.h"
+
+enum mlxsw_sp_mall_action_type {
+	MLXSW_SP_MALL_ACTION_TYPE_MIRROR,
+	MLXSW_SP_MALL_ACTION_TYPE_SAMPLE,
+};
+
+struct mlxsw_sp_mall_mirror_entry {
+	int span_id;
+	bool ingress;
+};
+
+struct mlxsw_sp_mall_entry {
+	struct list_head list;
+	unsigned long cookie;
+	enum mlxsw_sp_mall_action_type type;
+	union {
+		struct mlxsw_sp_mall_mirror_entry mirror;
+	};
+};
+
+static struct mlxsw_sp_mall_entry *
+mlxsw_sp_mall_entry_find(struct mlxsw_sp_port *port, unsigned long cookie)
+{
+	struct mlxsw_sp_mall_entry *mall_entry;
+
+	list_for_each_entry(mall_entry, &port->mall_list, list)
+		if (mall_entry->cookie == cookie)
+			return mall_entry;
+
+	return NULL;
+}
+
+static int
+mlxsw_sp_mall_port_mirror_add(struct mlxsw_sp_port *mlxsw_sp_port,
+			      struct mlxsw_sp_mall_mirror_entry *mirror,
+			      const struct flow_action_entry *act,
+			      bool ingress)
+{
+	enum mlxsw_sp_span_type span_type;
+
+	if (!act->dev) {
+		netdev_err(mlxsw_sp_port->dev, "Could not find requested device\n");
+		return -EINVAL;
+	}
+
+	mirror->ingress = ingress;
+	span_type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
+	return mlxsw_sp_span_mirror_add(mlxsw_sp_port, act->dev, span_type,
+					true, &mirror->span_id);
+}
+
+static void
+mlxsw_sp_mall_port_mirror_del(struct mlxsw_sp_port *mlxsw_sp_port,
+			      struct mlxsw_sp_mall_mirror_entry *mirror)
+{
+	enum mlxsw_sp_span_type span_type;
+
+	span_type = mirror->ingress ? MLXSW_SP_SPAN_INGRESS :
+				      MLXSW_SP_SPAN_EGRESS;
+	mlxsw_sp_span_mirror_del(mlxsw_sp_port, mirror->span_id,
+				 span_type, true);
+}
+
+static int mlxsw_sp_mall_port_sample_set(struct mlxsw_sp_port *mlxsw_sp_port,
+					 bool enable, u32 rate)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	char mpsc_pl[MLXSW_REG_MPSC_LEN];
+
+	mlxsw_reg_mpsc_pack(mpsc_pl, mlxsw_sp_port->local_port, enable, rate);
+	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpsc), mpsc_pl);
+}
+
+static int
+mlxsw_sp_mall_port_sample_add(struct mlxsw_sp_port *mlxsw_sp_port,
+			      struct tc_cls_matchall_offload *cls,
+			      const struct flow_action_entry *act, bool ingress)
+{
+	int err;
+
+	if (!mlxsw_sp_port->sample)
+		return -EOPNOTSUPP;
+	if (rtnl_dereference(mlxsw_sp_port->sample->psample_group)) {
+		netdev_err(mlxsw_sp_port->dev, "sample already active\n");
+		return -EEXIST;
+	}
+	if (act->sample.rate > MLXSW_REG_MPSC_RATE_MAX) {
+		netdev_err(mlxsw_sp_port->dev, "sample rate not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	rcu_assign_pointer(mlxsw_sp_port->sample->psample_group,
+			   act->sample.psample_group);
+	mlxsw_sp_port->sample->truncate = act->sample.truncate;
+	mlxsw_sp_port->sample->trunc_size = act->sample.trunc_size;
+	mlxsw_sp_port->sample->rate = act->sample.rate;
+
+	err = mlxsw_sp_mall_port_sample_set(mlxsw_sp_port, true,
+					    act->sample.rate);
+	if (err)
+		goto err_port_sample_set;
+	return 0;
+
+err_port_sample_set:
+	RCU_INIT_POINTER(mlxsw_sp_port->sample->psample_group, NULL);
+	return err;
+}
+
+static void
+mlxsw_sp_mall_port_sample_del(struct mlxsw_sp_port *mlxsw_sp_port)
+{
+	if (!mlxsw_sp_port->sample)
+		return;
+
+	mlxsw_sp_mall_port_sample_set(mlxsw_sp_port, false, 1);
+	RCU_INIT_POINTER(mlxsw_sp_port->sample->psample_group, NULL);
+}
+
+int mlxsw_sp_mall_replace(struct mlxsw_sp_port *mlxsw_sp_port,
+			  struct tc_cls_matchall_offload *f, bool ingress)
+{
+	struct mlxsw_sp_mall_entry *mall_entry;
+	__be16 protocol = f->common.protocol;
+	struct flow_action_entry *act;
+	int err;
+
+	if (!flow_offload_has_one_action(&f->rule->action)) {
+		netdev_err(mlxsw_sp_port->dev, "only singular actions are supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	mall_entry = kzalloc(sizeof(*mall_entry), GFP_KERNEL);
+	if (!mall_entry)
+		return -ENOMEM;
+	mall_entry->cookie = f->cookie;
+
+	act = &f->rule->action.entries[0];
+
+	if (act->id == FLOW_ACTION_MIRRED && protocol == htons(ETH_P_ALL)) {
+		struct mlxsw_sp_mall_mirror_entry *mirror;
+
+		mall_entry->type = MLXSW_SP_MALL_ACTION_TYPE_MIRROR;
+		mirror = &mall_entry->mirror;
+		err = mlxsw_sp_mall_port_mirror_add(mlxsw_sp_port, mirror, act,
+						    ingress);
+	} else if (act->id == FLOW_ACTION_SAMPLE &&
+		   protocol == htons(ETH_P_ALL)) {
+		mall_entry->type = MLXSW_SP_MALL_ACTION_TYPE_SAMPLE;
+		err = mlxsw_sp_mall_port_sample_add(mlxsw_sp_port, f, act,
+						    ingress);
+	} else {
+		err = -EOPNOTSUPP;
+	}
+
+	if (err)
+		goto err_add_action;
+
+	list_add_tail(&mall_entry->list, &mlxsw_sp_port->mall_list);
+	return 0;
+
+err_add_action:
+	kfree(mall_entry);
+	return err;
+}
+
+void mlxsw_sp_mall_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
+			   struct tc_cls_matchall_offload *f)
+{
+	struct mlxsw_sp_mall_entry *mall_entry;
+
+	mall_entry = mlxsw_sp_mall_entry_find(mlxsw_sp_port, f->cookie);
+	if (!mall_entry) {
+		netdev_dbg(mlxsw_sp_port->dev, "tc entry not found on port\n");
+		return;
+	}
+	list_del(&mall_entry->list);
+
+	switch (mall_entry->type) {
+	case MLXSW_SP_MALL_ACTION_TYPE_MIRROR:
+		mlxsw_sp_mall_port_mirror_del(mlxsw_sp_port,
+					      &mall_entry->mirror);
+		break;
+	case MLXSW_SP_MALL_ACTION_TYPE_SAMPLE:
+		mlxsw_sp_mall_port_sample_del(mlxsw_sp_port);
+		break;
+	default:
+		WARN_ON(1);
+	}
+
+	kfree(mall_entry);
+}
-- 
cgit v1.2.3-59-g8ed1b


From 6c8cd435b58780ded6e3e06d722249ec181efb36 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Mon, 27 Apr 2020 18:13:02 +0300
Subject: mlxsw: spectrum_acl: Use block variable in mlxsw_sp_acl_rule_del()

On couple of places in mlxsw_sp_acl_rule_del(), block variable is not
used directly as it could be. So do it.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
index 800eaa6be3c0..c61f78e30397 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
@@ -704,14 +704,13 @@ void mlxsw_sp_acl_rule_del(struct mlxsw_sp *mlxsw_sp,
 
 	block->egress_blocker_rule_count -= rule->rulei->egress_bind_blocker;
 	block->ingress_blocker_rule_count -= rule->rulei->ingress_bind_blocker;
-	ruleset->ht_key.block->rule_count--;
+	block->rule_count--;
 	mutex_lock(&mlxsw_sp->acl->rules_lock);
 	list_del(&rule->list);
 	mutex_unlock(&mlxsw_sp->acl->rules_lock);
 	if (!ruleset->ht_key.chain_index &&
 	    mlxsw_sp_acl_ruleset_is_singular(ruleset))
-		mlxsw_sp_acl_ruleset_block_unbind(mlxsw_sp, ruleset,
-						  ruleset->ht_key.block);
+		mlxsw_sp_acl_ruleset_block_unbind(mlxsw_sp, ruleset, block);
 	rhashtable_remove_fast(&ruleset->rule_ht, &rule->ht_node,
 			       mlxsw_sp_acl_rule_ht_params);
 	ops->rule_del(mlxsw_sp, rule->priv);
-- 
cgit v1.2.3-59-g8ed1b


From 780ba878a1b024e176dc27c980e1600a23d7b5c5 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Mon, 27 Apr 2020 18:13:03 +0300
Subject: mlxsw: spectrum_matchall: Pass mall_entry as arg to
 mlxsw_sp_mall_port_mirror_add()

In the preparation for future changes, have the
mlxsw_sp_mall_port_mirror_add() function to accept mall_entry including
the "to_dev" originally obtained from act pointer.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/mellanox/mlxsw/spectrum_matchall.c    | 34 +++++++++++-----------
 1 file changed, 17 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
index 56f21cfdb48e..b57267f0c9a1 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
@@ -16,6 +16,7 @@ enum mlxsw_sp_mall_action_type {
 };
 
 struct mlxsw_sp_mall_mirror_entry {
+	const struct net_device *to_dev;
 	int span_id;
 	bool ingress;
 };
@@ -43,32 +44,34 @@ mlxsw_sp_mall_entry_find(struct mlxsw_sp_port *port, unsigned long cookie)
 
 static int
 mlxsw_sp_mall_port_mirror_add(struct mlxsw_sp_port *mlxsw_sp_port,
-			      struct mlxsw_sp_mall_mirror_entry *mirror,
-			      const struct flow_action_entry *act,
+			      struct mlxsw_sp_mall_entry *mall_entry,
 			      bool ingress)
 {
 	enum mlxsw_sp_span_type span_type;
 
-	if (!act->dev) {
+	if (!mall_entry->mirror.to_dev) {
 		netdev_err(mlxsw_sp_port->dev, "Could not find requested device\n");
 		return -EINVAL;
 	}
 
-	mirror->ingress = ingress;
-	span_type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
-	return mlxsw_sp_span_mirror_add(mlxsw_sp_port, act->dev, span_type,
-					true, &mirror->span_id);
+	mall_entry->mirror.ingress = ingress;
+	span_type = mall_entry->mirror.ingress ? MLXSW_SP_SPAN_INGRESS :
+						 MLXSW_SP_SPAN_EGRESS;
+	return mlxsw_sp_span_mirror_add(mlxsw_sp_port,
+					mall_entry->mirror.to_dev,
+					span_type, true,
+					&mall_entry->mirror.span_id);
 }
 
 static void
 mlxsw_sp_mall_port_mirror_del(struct mlxsw_sp_port *mlxsw_sp_port,
-			      struct mlxsw_sp_mall_mirror_entry *mirror)
+			      struct mlxsw_sp_mall_entry *mall_entry)
 {
 	enum mlxsw_sp_span_type span_type;
 
-	span_type = mirror->ingress ? MLXSW_SP_SPAN_INGRESS :
-				      MLXSW_SP_SPAN_EGRESS;
-	mlxsw_sp_span_mirror_del(mlxsw_sp_port, mirror->span_id,
+	span_type = mall_entry->mirror.ingress ? MLXSW_SP_SPAN_INGRESS :
+						 MLXSW_SP_SPAN_EGRESS;
+	mlxsw_sp_span_mirror_del(mlxsw_sp_port, mall_entry->mirror.span_id,
 				 span_type, true);
 }
 
@@ -148,11 +151,9 @@ int mlxsw_sp_mall_replace(struct mlxsw_sp_port *mlxsw_sp_port,
 	act = &f->rule->action.entries[0];
 
 	if (act->id == FLOW_ACTION_MIRRED && protocol == htons(ETH_P_ALL)) {
-		struct mlxsw_sp_mall_mirror_entry *mirror;
-
 		mall_entry->type = MLXSW_SP_MALL_ACTION_TYPE_MIRROR;
-		mirror = &mall_entry->mirror;
-		err = mlxsw_sp_mall_port_mirror_add(mlxsw_sp_port, mirror, act,
+		mall_entry->mirror.to_dev = act->dev;
+		err = mlxsw_sp_mall_port_mirror_add(mlxsw_sp_port, mall_entry,
 						    ingress);
 	} else if (act->id == FLOW_ACTION_SAMPLE &&
 		   protocol == htons(ETH_P_ALL)) {
@@ -188,8 +189,7 @@ void mlxsw_sp_mall_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
 
 	switch (mall_entry->type) {
 	case MLXSW_SP_MALL_ACTION_TYPE_MIRROR:
-		mlxsw_sp_mall_port_mirror_del(mlxsw_sp_port,
-					      &mall_entry->mirror);
+		mlxsw_sp_mall_port_mirror_del(mlxsw_sp_port, mall_entry);
 		break;
 	case MLXSW_SP_MALL_ACTION_TYPE_SAMPLE:
 		mlxsw_sp_mall_port_sample_del(mlxsw_sp_port);
-- 
cgit v1.2.3-59-g8ed1b


From c7ea0e162fc84602fde67d41edf72ae3f4f4a14f Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Mon, 27 Apr 2020 18:13:04 +0300
Subject: mlxsw: spectrum_matchall: Pass mall_entry as arg to
 mlxsw_sp_mall_port_sample_add()

In the preparation for future changes, have the
mlxsw_sp_mall_port_sample_add() function to accept mall_entry including
all needed info originally obtained from cls and act pointers.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/mellanox/mlxsw/spectrum_matchall.c    | 35 ++++++++++++----------
 1 file changed, 19 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
index b57267f0c9a1..adaaee208655 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
@@ -27,6 +27,7 @@ struct mlxsw_sp_mall_entry {
 	enum mlxsw_sp_mall_action_type type;
 	union {
 		struct mlxsw_sp_mall_mirror_entry mirror;
+		struct mlxsw_sp_port_sample sample;
 	};
 };
 
@@ -87,8 +88,7 @@ static int mlxsw_sp_mall_port_sample_set(struct mlxsw_sp_port *mlxsw_sp_port,
 
 static int
 mlxsw_sp_mall_port_sample_add(struct mlxsw_sp_port *mlxsw_sp_port,
-			      struct tc_cls_matchall_offload *cls,
-			      const struct flow_action_entry *act, bool ingress)
+			      struct mlxsw_sp_mall_entry *mall_entry)
 {
 	int err;
 
@@ -98,19 +98,14 @@ mlxsw_sp_mall_port_sample_add(struct mlxsw_sp_port *mlxsw_sp_port,
 		netdev_err(mlxsw_sp_port->dev, "sample already active\n");
 		return -EEXIST;
 	}
-	if (act->sample.rate > MLXSW_REG_MPSC_RATE_MAX) {
-		netdev_err(mlxsw_sp_port->dev, "sample rate not supported\n");
-		return -EOPNOTSUPP;
-	}
-
 	rcu_assign_pointer(mlxsw_sp_port->sample->psample_group,
-			   act->sample.psample_group);
-	mlxsw_sp_port->sample->truncate = act->sample.truncate;
-	mlxsw_sp_port->sample->trunc_size = act->sample.trunc_size;
-	mlxsw_sp_port->sample->rate = act->sample.rate;
+			   mall_entry->sample.psample_group);
+	mlxsw_sp_port->sample->truncate = mall_entry->sample.truncate;
+	mlxsw_sp_port->sample->trunc_size = mall_entry->sample.trunc_size;
+	mlxsw_sp_port->sample->rate = mall_entry->sample.rate;
 
 	err = mlxsw_sp_mall_port_sample_set(mlxsw_sp_port, true,
-					    act->sample.rate);
+					    mall_entry->sample.rate);
 	if (err)
 		goto err_port_sample_set;
 	return 0;
@@ -157,20 +152,28 @@ int mlxsw_sp_mall_replace(struct mlxsw_sp_port *mlxsw_sp_port,
 						    ingress);
 	} else if (act->id == FLOW_ACTION_SAMPLE &&
 		   protocol == htons(ETH_P_ALL)) {
+		if (act->sample.rate > MLXSW_REG_MPSC_RATE_MAX) {
+			netdev_err(mlxsw_sp_port->dev, "sample rate not supported\n");
+			err = -EOPNOTSUPP;
+			goto errout;
+		}
 		mall_entry->type = MLXSW_SP_MALL_ACTION_TYPE_SAMPLE;
-		err = mlxsw_sp_mall_port_sample_add(mlxsw_sp_port, f, act,
-						    ingress);
+		mall_entry->sample.psample_group = act->sample.psample_group;
+		mall_entry->sample.truncate = act->sample.truncate;
+		mall_entry->sample.trunc_size = act->sample.trunc_size;
+		mall_entry->sample.rate = act->sample.rate;
+		err = mlxsw_sp_mall_port_sample_add(mlxsw_sp_port, mall_entry);
 	} else {
 		err = -EOPNOTSUPP;
 	}
 
 	if (err)
-		goto err_add_action;
+		goto errout;
 
 	list_add_tail(&mall_entry->list, &mlxsw_sp_port->mall_list);
 	return 0;
 
-err_add_action:
+errout:
 	kfree(mall_entry);
 	return err;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 47fa15eae487f3f454d004894671ebea53e77bde Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Mon, 27 Apr 2020 18:13:05 +0300
Subject: mlxsw: spectrum_matchall: Move ingress indication into mall_entry

Instead of having it in mirror_entry structure, move it to mall_entry
and set it during rule insertion.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_matchall.c    | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
index adaaee208655..c05e28971d06 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
@@ -18,13 +18,13 @@ enum mlxsw_sp_mall_action_type {
 struct mlxsw_sp_mall_mirror_entry {
 	const struct net_device *to_dev;
 	int span_id;
-	bool ingress;
 };
 
 struct mlxsw_sp_mall_entry {
 	struct list_head list;
 	unsigned long cookie;
 	enum mlxsw_sp_mall_action_type type;
+	bool ingress;
 	union {
 		struct mlxsw_sp_mall_mirror_entry mirror;
 		struct mlxsw_sp_port_sample sample;
@@ -45,8 +45,7 @@ mlxsw_sp_mall_entry_find(struct mlxsw_sp_port *port, unsigned long cookie)
 
 static int
 mlxsw_sp_mall_port_mirror_add(struct mlxsw_sp_port *mlxsw_sp_port,
-			      struct mlxsw_sp_mall_entry *mall_entry,
-			      bool ingress)
+			      struct mlxsw_sp_mall_entry *mall_entry)
 {
 	enum mlxsw_sp_span_type span_type;
 
@@ -55,9 +54,8 @@ mlxsw_sp_mall_port_mirror_add(struct mlxsw_sp_port *mlxsw_sp_port,
 		return -EINVAL;
 	}
 
-	mall_entry->mirror.ingress = ingress;
-	span_type = mall_entry->mirror.ingress ? MLXSW_SP_SPAN_INGRESS :
-						 MLXSW_SP_SPAN_EGRESS;
+	span_type = mall_entry->ingress ? MLXSW_SP_SPAN_INGRESS :
+					  MLXSW_SP_SPAN_EGRESS;
 	return mlxsw_sp_span_mirror_add(mlxsw_sp_port,
 					mall_entry->mirror.to_dev,
 					span_type, true,
@@ -70,8 +68,8 @@ mlxsw_sp_mall_port_mirror_del(struct mlxsw_sp_port *mlxsw_sp_port,
 {
 	enum mlxsw_sp_span_type span_type;
 
-	span_type = mall_entry->mirror.ingress ? MLXSW_SP_SPAN_INGRESS :
-						 MLXSW_SP_SPAN_EGRESS;
+	span_type = mall_entry->ingress ? MLXSW_SP_SPAN_INGRESS :
+					  MLXSW_SP_SPAN_EGRESS;
 	mlxsw_sp_span_mirror_del(mlxsw_sp_port, mall_entry->mirror.span_id,
 				 span_type, true);
 }
@@ -142,14 +140,14 @@ int mlxsw_sp_mall_replace(struct mlxsw_sp_port *mlxsw_sp_port,
 	if (!mall_entry)
 		return -ENOMEM;
 	mall_entry->cookie = f->cookie;
+	mall_entry->ingress = ingress;
 
 	act = &f->rule->action.entries[0];
 
 	if (act->id == FLOW_ACTION_MIRRED && protocol == htons(ETH_P_ALL)) {
 		mall_entry->type = MLXSW_SP_MALL_ACTION_TYPE_MIRROR;
 		mall_entry->mirror.to_dev = act->dev;
-		err = mlxsw_sp_mall_port_mirror_add(mlxsw_sp_port, mall_entry,
-						    ingress);
+		err = mlxsw_sp_mall_port_mirror_add(mlxsw_sp_port, mall_entry);
 	} else if (act->id == FLOW_ACTION_SAMPLE &&
 		   protocol == htons(ETH_P_ALL)) {
 		if (act->sample.rate > MLXSW_REG_MPSC_RATE_MAX) {
-- 
cgit v1.2.3-59-g8ed1b


From dd0fbc89d274e392a077c5dc9a21d581de3252d1 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Mon, 27 Apr 2020 18:13:06 +0300
Subject: mlxsw: spectrum_matchall: Push per-port rule add/del into separate
 functions

As the replace/destroy is going to be used later on per-block, push
the per-port rule addition/deletion into separate functions.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/mellanox/mlxsw/spectrum_matchall.c    | 48 ++++++++++++++++------
 1 file changed, 35 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
index c05e28971d06..41301027a47c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
@@ -123,6 +123,37 @@ mlxsw_sp_mall_port_sample_del(struct mlxsw_sp_port *mlxsw_sp_port)
 	RCU_INIT_POINTER(mlxsw_sp_port->sample->psample_group, NULL);
 }
 
+static int
+mlxsw_sp_mall_port_rule_add(struct mlxsw_sp_port *mlxsw_sp_port,
+			    struct mlxsw_sp_mall_entry *mall_entry)
+{
+	switch (mall_entry->type) {
+	case MLXSW_SP_MALL_ACTION_TYPE_MIRROR:
+		return mlxsw_sp_mall_port_mirror_add(mlxsw_sp_port, mall_entry);
+	case MLXSW_SP_MALL_ACTION_TYPE_SAMPLE:
+		return mlxsw_sp_mall_port_sample_add(mlxsw_sp_port, mall_entry);
+	default:
+		WARN_ON(1);
+		return -EINVAL;
+	}
+}
+
+static void
+mlxsw_sp_mall_port_rule_del(struct mlxsw_sp_port *mlxsw_sp_port,
+			    struct mlxsw_sp_mall_entry *mall_entry)
+{
+	switch (mall_entry->type) {
+	case MLXSW_SP_MALL_ACTION_TYPE_MIRROR:
+		mlxsw_sp_mall_port_mirror_del(mlxsw_sp_port, mall_entry);
+		break;
+	case MLXSW_SP_MALL_ACTION_TYPE_SAMPLE:
+		mlxsw_sp_mall_port_sample_del(mlxsw_sp_port);
+		break;
+	default:
+		WARN_ON(1);
+	}
+}
+
 int mlxsw_sp_mall_replace(struct mlxsw_sp_port *mlxsw_sp_port,
 			  struct tc_cls_matchall_offload *f, bool ingress)
 {
@@ -147,7 +178,6 @@ int mlxsw_sp_mall_replace(struct mlxsw_sp_port *mlxsw_sp_port,
 	if (act->id == FLOW_ACTION_MIRRED && protocol == htons(ETH_P_ALL)) {
 		mall_entry->type = MLXSW_SP_MALL_ACTION_TYPE_MIRROR;
 		mall_entry->mirror.to_dev = act->dev;
-		err = mlxsw_sp_mall_port_mirror_add(mlxsw_sp_port, mall_entry);
 	} else if (act->id == FLOW_ACTION_SAMPLE &&
 		   protocol == htons(ETH_P_ALL)) {
 		if (act->sample.rate > MLXSW_REG_MPSC_RATE_MAX) {
@@ -160,11 +190,12 @@ int mlxsw_sp_mall_replace(struct mlxsw_sp_port *mlxsw_sp_port,
 		mall_entry->sample.truncate = act->sample.truncate;
 		mall_entry->sample.trunc_size = act->sample.trunc_size;
 		mall_entry->sample.rate = act->sample.rate;
-		err = mlxsw_sp_mall_port_sample_add(mlxsw_sp_port, mall_entry);
 	} else {
 		err = -EOPNOTSUPP;
+		goto errout;
 	}
 
+	err = mlxsw_sp_mall_port_rule_add(mlxsw_sp_port, mall_entry);
 	if (err)
 		goto errout;
 
@@ -186,18 +217,9 @@ void mlxsw_sp_mall_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
 		netdev_dbg(mlxsw_sp_port->dev, "tc entry not found on port\n");
 		return;
 	}
-	list_del(&mall_entry->list);
 
-	switch (mall_entry->type) {
-	case MLXSW_SP_MALL_ACTION_TYPE_MIRROR:
-		mlxsw_sp_mall_port_mirror_del(mlxsw_sp_port, mall_entry);
-		break;
-	case MLXSW_SP_MALL_ACTION_TYPE_SAMPLE:
-		mlxsw_sp_mall_port_sample_del(mlxsw_sp_port);
-		break;
-	default:
-		WARN_ON(1);
-	}
+	mlxsw_sp_mall_port_rule_del(mlxsw_sp_port, mall_entry);
 
+	list_del(&mall_entry->list);
 	kfree(mall_entry);
 }
-- 
cgit v1.2.3-59-g8ed1b


From 481ff57aadf5ea36bb3c5a9e659a2e1c5ecc6725 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Mon, 27 Apr 2020 18:13:07 +0300
Subject: mlxsw: spectrum: Avoid copying sample values and use RCU pointer
 direcly instead

Currently, only the psample_group is accessed using RCU on RX path.
However, it is possible (unlikely) that other sample values get change
during RX processing. Fix this by having the port->sample struct
accessed as RCU pointer, containing all sample values including
psample_group pointer. That avoids extra alloc per-port, copying the
values and the race condition described above.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c     | 30 +++++-----------------
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h     |  4 +--
 .../ethernet/mellanox/mlxsw/spectrum_matchall.c    | 17 +++++-------
 3 files changed, 14 insertions(+), 37 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index ff25f8fc55e9..5952ec26c169 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -3527,13 +3527,6 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
 		goto err_alloc_stats;
 	}
 
-	mlxsw_sp_port->sample = kzalloc(sizeof(*mlxsw_sp_port->sample),
-					GFP_KERNEL);
-	if (!mlxsw_sp_port->sample) {
-		err = -ENOMEM;
-		goto err_alloc_sample;
-	}
-
 	INIT_DELAYED_WORK(&mlxsw_sp_port->periodic_hw_stats.update_dw,
 			  &update_stats_cache);
 
@@ -3720,8 +3713,6 @@ err_dev_addr_init:
 err_port_swid_set:
 	mlxsw_sp_port_module_unmap(mlxsw_sp_port);
 err_port_module_map:
-	kfree(mlxsw_sp_port->sample);
-err_alloc_sample:
 	free_percpu(mlxsw_sp_port->pcpu_stats);
 err_alloc_stats:
 	free_netdev(dev);
@@ -3749,7 +3740,6 @@ static void mlxsw_sp_port_remove(struct mlxsw_sp *mlxsw_sp, u8 local_port)
 	mlxsw_sp_port_tc_mc_mode_set(mlxsw_sp_port, false);
 	mlxsw_sp_port_swid_set(mlxsw_sp_port, MLXSW_PORT_SWID_DISABLED_PORT);
 	mlxsw_sp_port_module_unmap(mlxsw_sp_port);
-	kfree(mlxsw_sp_port->sample);
 	free_percpu(mlxsw_sp_port->pcpu_stats);
 	WARN_ON_ONCE(!list_empty(&mlxsw_sp_port->vlans_list));
 	free_netdev(mlxsw_sp_port->dev);
@@ -4236,7 +4226,7 @@ static void mlxsw_sp_rx_listener_sample_func(struct sk_buff *skb, u8 local_port,
 {
 	struct mlxsw_sp *mlxsw_sp = priv;
 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port];
-	struct psample_group *psample_group;
+	struct mlxsw_sp_port_sample *sample;
 	u32 size;
 
 	if (unlikely(!mlxsw_sp_port)) {
@@ -4244,22 +4234,14 @@ static void mlxsw_sp_rx_listener_sample_func(struct sk_buff *skb, u8 local_port,
 				     local_port);
 		goto out;
 	}
-	if (unlikely(!mlxsw_sp_port->sample)) {
-		dev_warn_ratelimited(mlxsw_sp->bus_info->dev, "Port %d: sample skb received on unsupported port\n",
-				     local_port);
-		goto out;
-	}
-
-	size = mlxsw_sp_port->sample->truncate ?
-		  mlxsw_sp_port->sample->trunc_size : skb->len;
 
 	rcu_read_lock();
-	psample_group = rcu_dereference(mlxsw_sp_port->sample->psample_group);
-	if (!psample_group)
+	sample = rcu_dereference(mlxsw_sp_port->sample);
+	if (!sample)
 		goto out_unlock;
-	psample_sample_packet(psample_group, skb, size,
-			      mlxsw_sp_port->dev->ifindex, 0,
-			      mlxsw_sp_port->sample->rate);
+	size = sample->truncate ? sample->trunc_size : skb->len;
+	psample_sample_packet(sample->psample_group, skb, size,
+			      mlxsw_sp_port->dev->ifindex, 0, sample->rate);
 out_unlock:
 	rcu_read_unlock();
 out:
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 5c2f1af53e53..4cdb7f1d7436 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -192,7 +192,7 @@ struct mlxsw_sp_port_pcpu_stats {
 };
 
 struct mlxsw_sp_port_sample {
-	struct psample_group __rcu *psample_group;
+	struct psample_group *psample_group;
 	u32 trunc_size;
 	u32 rate;
 	bool truncate;
@@ -262,7 +262,7 @@ struct mlxsw_sp_port {
 		struct mlxsw_sp_port_xstats xstats;
 		struct delayed_work update_dw;
 	} periodic_hw_stats;
-	struct mlxsw_sp_port_sample *sample;
+	struct mlxsw_sp_port_sample __rcu *sample;
 	struct list_head vlans_list;
 	struct mlxsw_sp_port_vlan *default_vlan;
 	struct mlxsw_sp_qdisc_state *qdisc;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
index 41301027a47c..bda5fb34162a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
@@ -29,6 +29,7 @@ struct mlxsw_sp_mall_entry {
 		struct mlxsw_sp_mall_mirror_entry mirror;
 		struct mlxsw_sp_port_sample sample;
 	};
+	struct rcu_head rcu;
 };
 
 static struct mlxsw_sp_mall_entry *
@@ -90,17 +91,11 @@ mlxsw_sp_mall_port_sample_add(struct mlxsw_sp_port *mlxsw_sp_port,
 {
 	int err;
 
-	if (!mlxsw_sp_port->sample)
-		return -EOPNOTSUPP;
-	if (rtnl_dereference(mlxsw_sp_port->sample->psample_group)) {
+	if (rtnl_dereference(mlxsw_sp_port->sample)) {
 		netdev_err(mlxsw_sp_port->dev, "sample already active\n");
 		return -EEXIST;
 	}
-	rcu_assign_pointer(mlxsw_sp_port->sample->psample_group,
-			   mall_entry->sample.psample_group);
-	mlxsw_sp_port->sample->truncate = mall_entry->sample.truncate;
-	mlxsw_sp_port->sample->trunc_size = mall_entry->sample.trunc_size;
-	mlxsw_sp_port->sample->rate = mall_entry->sample.rate;
+	rcu_assign_pointer(mlxsw_sp_port->sample, &mall_entry->sample);
 
 	err = mlxsw_sp_mall_port_sample_set(mlxsw_sp_port, true,
 					    mall_entry->sample.rate);
@@ -109,7 +104,7 @@ mlxsw_sp_mall_port_sample_add(struct mlxsw_sp_port *mlxsw_sp_port,
 	return 0;
 
 err_port_sample_set:
-	RCU_INIT_POINTER(mlxsw_sp_port->sample->psample_group, NULL);
+	RCU_INIT_POINTER(mlxsw_sp_port->sample, NULL);
 	return err;
 }
 
@@ -120,7 +115,7 @@ mlxsw_sp_mall_port_sample_del(struct mlxsw_sp_port *mlxsw_sp_port)
 		return;
 
 	mlxsw_sp_mall_port_sample_set(mlxsw_sp_port, false, 1);
-	RCU_INIT_POINTER(mlxsw_sp_port->sample->psample_group, NULL);
+	RCU_INIT_POINTER(mlxsw_sp_port->sample, NULL);
 }
 
 static int
@@ -221,5 +216,5 @@ void mlxsw_sp_mall_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
 	mlxsw_sp_mall_port_rule_del(mlxsw_sp_port, mall_entry);
 
 	list_del(&mall_entry->list);
-	kfree(mall_entry);
+	kfree_rcu(mall_entry, rcu); /* sample RX packets may be in-flight */
 }
-- 
cgit v1.2.3-59-g8ed1b


From 3c650136afba8233e738849149b578d0ad6d2023 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Mon, 27 Apr 2020 18:13:08 +0300
Subject: mlxsw: spectrum_matchall: Process matchall events from the same cb as
 flower

Currently there are two callbacks registered: one for matchall,
one for flower. This causes the user to see "in_hw_count 2" in TC filter
dump. Because of this and also as a preparation for future matchall
offload for rules equivalent to flower-all-match, move the processing of
shared block into matchall.c. Leave only one cb for mlxsw driver
per-block.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c     | 125 +++++----------------
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h     |  12 +-
 .../net/ethernet/mellanox/mlxsw/spectrum_flow.c    |  17 ++-
 .../ethernet/mellanox/mlxsw/spectrum_matchall.c    |  90 ++++++++++++---
 4 files changed, 124 insertions(+), 120 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 5952ec26c169..ceaf73ac2008 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -1350,15 +1350,15 @@ static int mlxsw_sp_port_kill_vid(struct net_device *dev,
 	return 0;
 }
 
-static int mlxsw_sp_setup_tc_cls_matchall(struct mlxsw_sp_port *mlxsw_sp_port,
-					  struct tc_cls_matchall_offload *f,
-					  bool ingress)
+static int
+mlxsw_sp_setup_tc_cls_matchall(struct mlxsw_sp_flow_block *flow_block,
+			       struct tc_cls_matchall_offload *f)
 {
 	switch (f->command) {
 	case TC_CLSMATCHALL_REPLACE:
-		return mlxsw_sp_mall_replace(mlxsw_sp_port, f, ingress);
+		return mlxsw_sp_mall_replace(flow_block, f);
 	case TC_CLSMATCHALL_DESTROY:
-		mlxsw_sp_mall_destroy(mlxsw_sp_port, f);
+		mlxsw_sp_mall_destroy(flow_block, f);
 		return 0;
 	default:
 		return -EOPNOTSUPP;
@@ -1389,62 +1389,25 @@ mlxsw_sp_setup_tc_cls_flower(struct mlxsw_sp_flow_block *flow_block,
 	}
 }
 
-static int mlxsw_sp_setup_tc_block_cb_matchall(enum tc_setup_type type,
-					       void *type_data,
-					       void *cb_priv, bool ingress)
+static int mlxsw_sp_setup_tc_block_cb(enum tc_setup_type type,
+				      void *type_data, void *cb_priv)
 {
-	struct mlxsw_sp_port *mlxsw_sp_port = cb_priv;
-
-	switch (type) {
-	case TC_SETUP_CLSMATCHALL:
-		if (!tc_cls_can_offload_and_chain0(mlxsw_sp_port->dev,
-						   type_data))
-			return -EOPNOTSUPP;
+	struct mlxsw_sp_flow_block *flow_block = cb_priv;
 
-		return mlxsw_sp_setup_tc_cls_matchall(mlxsw_sp_port, type_data,
-						      ingress);
-	case TC_SETUP_CLSFLOWER:
-		return 0;
-	default:
+	if (mlxsw_sp_flow_block_disabled(flow_block))
 		return -EOPNOTSUPP;
-	}
-}
-
-static int mlxsw_sp_setup_tc_block_cb_matchall_ig(enum tc_setup_type type,
-						  void *type_data,
-						  void *cb_priv)
-{
-	return mlxsw_sp_setup_tc_block_cb_matchall(type, type_data,
-						   cb_priv, true);
-}
-
-static int mlxsw_sp_setup_tc_block_cb_matchall_eg(enum tc_setup_type type,
-						  void *type_data,
-						  void *cb_priv)
-{
-	return mlxsw_sp_setup_tc_block_cb_matchall(type, type_data,
-						   cb_priv, false);
-}
-
-static int mlxsw_sp_setup_tc_block_cb_flower(enum tc_setup_type type,
-					     void *type_data, void *cb_priv)
-{
-	struct mlxsw_sp_flow_block *flow_block = cb_priv;
 
 	switch (type) {
 	case TC_SETUP_CLSMATCHALL:
-		return 0;
+		return mlxsw_sp_setup_tc_cls_matchall(flow_block, type_data);
 	case TC_SETUP_CLSFLOWER:
-		if (mlxsw_sp_flow_block_disabled(flow_block))
-			return -EOPNOTSUPP;
-
 		return mlxsw_sp_setup_tc_cls_flower(flow_block, type_data);
 	default:
 		return -EOPNOTSUPP;
 	}
 }
 
-static void mlxsw_sp_tc_block_flower_release(void *cb_priv)
+static void mlxsw_sp_tc_block_release(void *cb_priv)
 {
 	struct mlxsw_sp_flow_block *flow_block = cb_priv;
 
@@ -1453,9 +1416,9 @@ static void mlxsw_sp_tc_block_flower_release(void *cb_priv)
 
 static LIST_HEAD(mlxsw_sp_block_cb_list);
 
-static int
-mlxsw_sp_setup_tc_block_flower_bind(struct mlxsw_sp_port *mlxsw_sp_port,
-			            struct flow_block_offload *f, bool ingress)
+static int mlxsw_sp_setup_tc_block_bind(struct mlxsw_sp_port *mlxsw_sp_port,
+					struct flow_block_offload *f,
+					bool ingress)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	struct mlxsw_sp_flow_block *flow_block;
@@ -1463,16 +1426,15 @@ mlxsw_sp_setup_tc_block_flower_bind(struct mlxsw_sp_port *mlxsw_sp_port,
 	bool register_block = false;
 	int err;
 
-	block_cb = flow_block_cb_lookup(f->block,
-					mlxsw_sp_setup_tc_block_cb_flower,
+	block_cb = flow_block_cb_lookup(f->block, mlxsw_sp_setup_tc_block_cb,
 					mlxsw_sp);
 	if (!block_cb) {
 		flow_block = mlxsw_sp_flow_block_create(mlxsw_sp, f->net);
 		if (!flow_block)
 			return -ENOMEM;
-		block_cb = flow_block_cb_alloc(mlxsw_sp_setup_tc_block_cb_flower,
+		block_cb = flow_block_cb_alloc(mlxsw_sp_setup_tc_block_cb,
 					       mlxsw_sp, flow_block,
-					       mlxsw_sp_tc_block_flower_release);
+					       mlxsw_sp_tc_block_release);
 		if (IS_ERR(block_cb)) {
 			mlxsw_sp_flow_block_destroy(flow_block);
 			err = PTR_ERR(block_cb);
@@ -1507,18 +1469,16 @@ err_cb_register:
 	return err;
 }
 
-static void
-mlxsw_sp_setup_tc_block_flower_unbind(struct mlxsw_sp_port *mlxsw_sp_port,
-				      struct flow_block_offload *f,
-				      bool ingress)
+static void mlxsw_sp_setup_tc_block_unbind(struct mlxsw_sp_port *mlxsw_sp_port,
+					   struct flow_block_offload *f,
+					   bool ingress)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	struct mlxsw_sp_flow_block *flow_block;
 	struct flow_block_cb *block_cb;
 	int err;
 
-	block_cb = flow_block_cb_lookup(f->block,
-					mlxsw_sp_setup_tc_block_cb_flower,
+	block_cb = flow_block_cb_lookup(f->block, mlxsw_sp_setup_tc_block_cb,
 					mlxsw_sp);
 	if (!block_cb)
 		return;
@@ -1540,51 +1500,22 @@ mlxsw_sp_setup_tc_block_flower_unbind(struct mlxsw_sp_port *mlxsw_sp_port,
 static int mlxsw_sp_setup_tc_block(struct mlxsw_sp_port *mlxsw_sp_port,
 				   struct flow_block_offload *f)
 {
-	struct flow_block_cb *block_cb;
-	flow_setup_cb_t *cb;
 	bool ingress;
-	int err;
 
-	if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS) {
-		cb = mlxsw_sp_setup_tc_block_cb_matchall_ig;
+	if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
 		ingress = true;
-	} else if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS) {
-		cb = mlxsw_sp_setup_tc_block_cb_matchall_eg;
+	else if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
 		ingress = false;
-	} else {
+	else
 		return -EOPNOTSUPP;
-	}
 
 	f->driver_block_list = &mlxsw_sp_block_cb_list;
 
 	switch (f->command) {
 	case FLOW_BLOCK_BIND:
-		if (flow_block_cb_is_busy(cb, mlxsw_sp_port,
-					  &mlxsw_sp_block_cb_list))
-			return -EBUSY;
-
-		block_cb = flow_block_cb_alloc(cb, mlxsw_sp_port,
-					       mlxsw_sp_port, NULL);
-		if (IS_ERR(block_cb))
-			return PTR_ERR(block_cb);
-		err = mlxsw_sp_setup_tc_block_flower_bind(mlxsw_sp_port, f,
-							  ingress);
-		if (err) {
-			flow_block_cb_free(block_cb);
-			return err;
-		}
-		flow_block_cb_add(block_cb, f);
-		list_add_tail(&block_cb->driver_list, &mlxsw_sp_block_cb_list);
-		return 0;
+		return mlxsw_sp_setup_tc_block_bind(mlxsw_sp_port, f, ingress);
 	case FLOW_BLOCK_UNBIND:
-		mlxsw_sp_setup_tc_block_flower_unbind(mlxsw_sp_port,
-						      f, ingress);
-		block_cb = flow_block_cb_lookup(f->block, cb, mlxsw_sp_port);
-		if (!block_cb)
-			return -ENOENT;
-
-		flow_block_cb_remove(block_cb, f);
-		list_del(&block_cb->driver_list);
+		mlxsw_sp_setup_tc_block_unbind(mlxsw_sp_port, f, ingress);
 		return 0;
 	default:
 		return -EOPNOTSUPP;
@@ -1621,8 +1552,7 @@ static int mlxsw_sp_feature_hw_tc(struct net_device *dev, bool enable)
 
 	if (!enable) {
 		if (mlxsw_sp_flow_block_rule_count(mlxsw_sp_port->ing_flow_block) ||
-		    mlxsw_sp_flow_block_rule_count(mlxsw_sp_port->eg_flow_block) ||
-		    !list_empty(&mlxsw_sp_port->mall_list)) {
+		    mlxsw_sp_flow_block_rule_count(mlxsw_sp_port->eg_flow_block)) {
 			netdev_err(dev, "Active offloaded tc filters, can't turn hw_tc_offload off\n");
 			return -EINVAL;
 		}
@@ -3518,7 +3448,6 @@ static int mlxsw_sp_port_create(struct mlxsw_sp *mlxsw_sp, u8 local_port,
 	mlxsw_sp_port->mapping = *port_mapping;
 	mlxsw_sp_port->link.autoneg = 1;
 	INIT_LIST_HEAD(&mlxsw_sp_port->vlans_list);
-	INIT_LIST_HEAD(&mlxsw_sp_port->mall_list);
 
 	mlxsw_sp_port->pcpu_stats =
 		netdev_alloc_pcpu_stats(struct mlxsw_sp_port_pcpu_stats);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 4cdb7f1d7436..57d320728914 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -255,7 +255,6 @@ struct mlxsw_sp_port {
 					       * the same localport can have
 					       * different mapping.
 					       */
-	struct list_head mall_list;
 	struct {
 		#define MLXSW_HW_STATS_UPDATE_TIME HZ
 		struct rtnl_link_stats64 stats;
@@ -637,6 +636,7 @@ struct mlxsw_sp_acl_rule_info {
 /* spectrum_flow.c */
 struct mlxsw_sp_flow_block {
 	struct list_head binding_list;
+	struct list_head mall_list;
 	struct mlxsw_sp_acl_ruleset *ruleset_zero;
 	struct mlxsw_sp *mlxsw_sp;
 	unsigned int rule_count;
@@ -894,10 +894,14 @@ extern const struct mlxsw_afk_ops mlxsw_sp1_afk_ops;
 extern const struct mlxsw_afk_ops mlxsw_sp2_afk_ops;
 
 /* spectrum_matchall.c */
-int mlxsw_sp_mall_replace(struct mlxsw_sp_port *mlxsw_sp_port,
-			  struct tc_cls_matchall_offload *f, bool ingress);
-void mlxsw_sp_mall_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
+int mlxsw_sp_mall_replace(struct mlxsw_sp_flow_block *block,
+			  struct tc_cls_matchall_offload *f);
+void mlxsw_sp_mall_destroy(struct mlxsw_sp_flow_block *block,
 			   struct tc_cls_matchall_offload *f);
+int mlxsw_sp_mall_port_bind(struct mlxsw_sp_flow_block *block,
+			    struct mlxsw_sp_port *mlxsw_sp_port);
+void mlxsw_sp_mall_port_unbind(struct mlxsw_sp_flow_block *block,
+			       struct mlxsw_sp_port *mlxsw_sp_port);
 
 /* spectrum_flower.c */
 int mlxsw_sp_flower_replace(struct mlxsw_sp *mlxsw_sp,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flow.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flow.c
index 655e1df5c95a..51de6aca1930 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flow.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flow.c
@@ -18,6 +18,7 @@ mlxsw_sp_flow_block_create(struct mlxsw_sp *mlxsw_sp, struct net *net)
 	if (!block)
 		return NULL;
 	INIT_LIST_HEAD(&block->binding_list);
+	INIT_LIST_HEAD(&block->mall_list);
 	block->mlxsw_sp = mlxsw_sp;
 	block->net = net;
 	return block;
@@ -70,9 +71,15 @@ int mlxsw_sp_flow_block_bind(struct mlxsw_sp *mlxsw_sp,
 		return -EOPNOTSUPP;
 	}
 
+	err = mlxsw_sp_mall_port_bind(block, mlxsw_sp_port);
+	if (err)
+		return err;
+
 	binding = kzalloc(sizeof(*binding), GFP_KERNEL);
-	if (!binding)
-		return -ENOMEM;
+	if (!binding) {
+		err = -ENOMEM;
+		goto err_binding_alloc;
+	}
 	binding->mlxsw_sp_port = mlxsw_sp_port;
 	binding->ingress = ingress;
 
@@ -91,6 +98,9 @@ int mlxsw_sp_flow_block_bind(struct mlxsw_sp *mlxsw_sp,
 
 err_ruleset_bind:
 	kfree(binding);
+err_binding_alloc:
+	mlxsw_sp_mall_port_unbind(block, mlxsw_sp_port);
+
 	return err;
 }
 
@@ -116,5 +126,8 @@ int mlxsw_sp_flow_block_unbind(struct mlxsw_sp *mlxsw_sp,
 		mlxsw_sp_acl_ruleset_unbind(mlxsw_sp, block, binding);
 
 	kfree(binding);
+
+	mlxsw_sp_mall_port_unbind(block, mlxsw_sp_port);
+
 	return 0;
 }
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
index bda5fb34162a..889da63072be 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
@@ -33,11 +33,11 @@ struct mlxsw_sp_mall_entry {
 };
 
 static struct mlxsw_sp_mall_entry *
-mlxsw_sp_mall_entry_find(struct mlxsw_sp_port *port, unsigned long cookie)
+mlxsw_sp_mall_entry_find(struct mlxsw_sp_flow_block *block, unsigned long cookie)
 {
 	struct mlxsw_sp_mall_entry *mall_entry;
 
-	list_for_each_entry(mall_entry, &port->mall_list, list)
+	list_for_each_entry(mall_entry, &block->mall_list, list)
 		if (mall_entry->cookie == cookie)
 			return mall_entry;
 
@@ -149,16 +149,27 @@ mlxsw_sp_mall_port_rule_del(struct mlxsw_sp_port *mlxsw_sp_port,
 	}
 }
 
-int mlxsw_sp_mall_replace(struct mlxsw_sp_port *mlxsw_sp_port,
-			  struct tc_cls_matchall_offload *f, bool ingress)
+int mlxsw_sp_mall_replace(struct mlxsw_sp_flow_block *block,
+			  struct tc_cls_matchall_offload *f)
 {
+	struct mlxsw_sp_flow_block_binding *binding;
 	struct mlxsw_sp_mall_entry *mall_entry;
 	__be16 protocol = f->common.protocol;
 	struct flow_action_entry *act;
 	int err;
 
 	if (!flow_offload_has_one_action(&f->rule->action)) {
-		netdev_err(mlxsw_sp_port->dev, "only singular actions are supported\n");
+		NL_SET_ERR_MSG(f->common.extack, "Only singular actions are supported");
+		return -EOPNOTSUPP;
+	}
+
+	if (f->common.chain_index) {
+		NL_SET_ERR_MSG(f->common.extack, "Only chain 0 is supported");
+		return -EOPNOTSUPP;
+	}
+
+	if (mlxsw_sp_flow_block_is_mixed_bound(block)) {
+		NL_SET_ERR_MSG(f->common.extack, "Only not mixed bound blocks are supported");
 		return -EOPNOTSUPP;
 	}
 
@@ -166,7 +177,7 @@ int mlxsw_sp_mall_replace(struct mlxsw_sp_port *mlxsw_sp_port,
 	if (!mall_entry)
 		return -ENOMEM;
 	mall_entry->cookie = f->cookie;
-	mall_entry->ingress = ingress;
+	mall_entry->ingress = mlxsw_sp_flow_block_is_ingress_bound(block);
 
 	act = &f->rule->action.entries[0];
 
@@ -176,7 +187,7 @@ int mlxsw_sp_mall_replace(struct mlxsw_sp_port *mlxsw_sp_port,
 	} else if (act->id == FLOW_ACTION_SAMPLE &&
 		   protocol == htons(ETH_P_ALL)) {
 		if (act->sample.rate > MLXSW_REG_MPSC_RATE_MAX) {
-			netdev_err(mlxsw_sp_port->dev, "sample rate not supported\n");
+			NL_SET_ERR_MSG(f->common.extack, "Sample rate not supported");
 			err = -EOPNOTSUPP;
 			goto errout;
 		}
@@ -190,31 +201,78 @@ int mlxsw_sp_mall_replace(struct mlxsw_sp_port *mlxsw_sp_port,
 		goto errout;
 	}
 
-	err = mlxsw_sp_mall_port_rule_add(mlxsw_sp_port, mall_entry);
-	if (err)
-		goto errout;
+	list_for_each_entry(binding, &block->binding_list, list) {
+		err = mlxsw_sp_mall_port_rule_add(binding->mlxsw_sp_port,
+						  mall_entry);
+		if (err)
+			goto rollback;
+	}
 
-	list_add_tail(&mall_entry->list, &mlxsw_sp_port->mall_list);
+	block->rule_count++;
+	if (mall_entry->ingress)
+		block->egress_blocker_rule_count++;
+	else
+		block->ingress_blocker_rule_count++;
+	list_add_tail(&mall_entry->list, &block->mall_list);
 	return 0;
 
+rollback:
+	list_for_each_entry_continue_reverse(binding, &block->binding_list,
+					     list)
+		mlxsw_sp_mall_port_rule_del(binding->mlxsw_sp_port, mall_entry);
 errout:
 	kfree(mall_entry);
 	return err;
 }
 
-void mlxsw_sp_mall_destroy(struct mlxsw_sp_port *mlxsw_sp_port,
+void mlxsw_sp_mall_destroy(struct mlxsw_sp_flow_block *block,
 			   struct tc_cls_matchall_offload *f)
 {
+	struct mlxsw_sp_flow_block_binding *binding;
 	struct mlxsw_sp_mall_entry *mall_entry;
 
-	mall_entry = mlxsw_sp_mall_entry_find(mlxsw_sp_port, f->cookie);
+	mall_entry = mlxsw_sp_mall_entry_find(block, f->cookie);
 	if (!mall_entry) {
-		netdev_dbg(mlxsw_sp_port->dev, "tc entry not found on port\n");
+		NL_SET_ERR_MSG(f->common.extack, "Entry not found");
 		return;
 	}
 
-	mlxsw_sp_mall_port_rule_del(mlxsw_sp_port, mall_entry);
-
 	list_del(&mall_entry->list);
+	if (mall_entry->ingress)
+		block->egress_blocker_rule_count--;
+	else
+		block->ingress_blocker_rule_count--;
+	block->rule_count--;
+	list_for_each_entry(binding, &block->binding_list, list)
+		mlxsw_sp_mall_port_rule_del(binding->mlxsw_sp_port, mall_entry);
 	kfree_rcu(mall_entry, rcu); /* sample RX packets may be in-flight */
 }
+
+int mlxsw_sp_mall_port_bind(struct mlxsw_sp_flow_block *block,
+			    struct mlxsw_sp_port *mlxsw_sp_port)
+{
+	struct mlxsw_sp_mall_entry *mall_entry;
+	int err;
+
+	list_for_each_entry(mall_entry, &block->mall_list, list) {
+		err = mlxsw_sp_mall_port_rule_add(mlxsw_sp_port, mall_entry);
+		if (err)
+			goto rollback;
+	}
+	return 0;
+
+rollback:
+	list_for_each_entry_continue_reverse(mall_entry, &block->mall_list,
+					     list)
+		mlxsw_sp_mall_port_rule_del(mlxsw_sp_port, mall_entry);
+	return err;
+}
+
+void mlxsw_sp_mall_port_unbind(struct mlxsw_sp_flow_block *block,
+			       struct mlxsw_sp_port *mlxsw_sp_port)
+{
+	struct mlxsw_sp_mall_entry *mall_entry;
+
+	list_for_each_entry(mall_entry, &block->mall_list, list)
+		mlxsw_sp_mall_port_rule_del(mlxsw_sp_port, mall_entry);
+}
-- 
cgit v1.2.3-59-g8ed1b


From 19f06771ca3dd8346972bc7627f9bcb7b6a8ce0b Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Mon, 27 Apr 2020 18:13:09 +0300
Subject: mlxsw: spectrum: Move flow offload binding into spectrum_flow.c

Move the code taking case of setup of flow offload into spectrum_flow.c
Do small renaming of callbacks on the way.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c     | 173 -------------------
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h     |  11 +-
 .../net/ethernet/mellanox/mlxsw/spectrum_flow.c    | 188 ++++++++++++++++++++-
 3 files changed, 181 insertions(+), 191 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index ceaf73ac2008..f78bde8bc16e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -1350,178 +1350,6 @@ static int mlxsw_sp_port_kill_vid(struct net_device *dev,
 	return 0;
 }
 
-static int
-mlxsw_sp_setup_tc_cls_matchall(struct mlxsw_sp_flow_block *flow_block,
-			       struct tc_cls_matchall_offload *f)
-{
-	switch (f->command) {
-	case TC_CLSMATCHALL_REPLACE:
-		return mlxsw_sp_mall_replace(flow_block, f);
-	case TC_CLSMATCHALL_DESTROY:
-		mlxsw_sp_mall_destroy(flow_block, f);
-		return 0;
-	default:
-		return -EOPNOTSUPP;
-	}
-}
-
-static int
-mlxsw_sp_setup_tc_cls_flower(struct mlxsw_sp_flow_block *flow_block,
-			     struct flow_cls_offload *f)
-{
-	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_flow_block_mlxsw_sp(flow_block);
-
-	switch (f->command) {
-	case FLOW_CLS_REPLACE:
-		return mlxsw_sp_flower_replace(mlxsw_sp, flow_block, f);
-	case FLOW_CLS_DESTROY:
-		mlxsw_sp_flower_destroy(mlxsw_sp, flow_block, f);
-		return 0;
-	case FLOW_CLS_STATS:
-		return mlxsw_sp_flower_stats(mlxsw_sp, flow_block, f);
-	case FLOW_CLS_TMPLT_CREATE:
-		return mlxsw_sp_flower_tmplt_create(mlxsw_sp, flow_block, f);
-	case FLOW_CLS_TMPLT_DESTROY:
-		mlxsw_sp_flower_tmplt_destroy(mlxsw_sp, flow_block, f);
-		return 0;
-	default:
-		return -EOPNOTSUPP;
-	}
-}
-
-static int mlxsw_sp_setup_tc_block_cb(enum tc_setup_type type,
-				      void *type_data, void *cb_priv)
-{
-	struct mlxsw_sp_flow_block *flow_block = cb_priv;
-
-	if (mlxsw_sp_flow_block_disabled(flow_block))
-		return -EOPNOTSUPP;
-
-	switch (type) {
-	case TC_SETUP_CLSMATCHALL:
-		return mlxsw_sp_setup_tc_cls_matchall(flow_block, type_data);
-	case TC_SETUP_CLSFLOWER:
-		return mlxsw_sp_setup_tc_cls_flower(flow_block, type_data);
-	default:
-		return -EOPNOTSUPP;
-	}
-}
-
-static void mlxsw_sp_tc_block_release(void *cb_priv)
-{
-	struct mlxsw_sp_flow_block *flow_block = cb_priv;
-
-	mlxsw_sp_flow_block_destroy(flow_block);
-}
-
-static LIST_HEAD(mlxsw_sp_block_cb_list);
-
-static int mlxsw_sp_setup_tc_block_bind(struct mlxsw_sp_port *mlxsw_sp_port,
-					struct flow_block_offload *f,
-					bool ingress)
-{
-	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-	struct mlxsw_sp_flow_block *flow_block;
-	struct flow_block_cb *block_cb;
-	bool register_block = false;
-	int err;
-
-	block_cb = flow_block_cb_lookup(f->block, mlxsw_sp_setup_tc_block_cb,
-					mlxsw_sp);
-	if (!block_cb) {
-		flow_block = mlxsw_sp_flow_block_create(mlxsw_sp, f->net);
-		if (!flow_block)
-			return -ENOMEM;
-		block_cb = flow_block_cb_alloc(mlxsw_sp_setup_tc_block_cb,
-					       mlxsw_sp, flow_block,
-					       mlxsw_sp_tc_block_release);
-		if (IS_ERR(block_cb)) {
-			mlxsw_sp_flow_block_destroy(flow_block);
-			err = PTR_ERR(block_cb);
-			goto err_cb_register;
-		}
-		register_block = true;
-	} else {
-		flow_block = flow_block_cb_priv(block_cb);
-	}
-	flow_block_cb_incref(block_cb);
-	err = mlxsw_sp_flow_block_bind(mlxsw_sp, flow_block,
-				       mlxsw_sp_port, ingress, f->extack);
-	if (err)
-		goto err_block_bind;
-
-	if (ingress)
-		mlxsw_sp_port->ing_flow_block = flow_block;
-	else
-		mlxsw_sp_port->eg_flow_block = flow_block;
-
-	if (register_block) {
-		flow_block_cb_add(block_cb, f);
-		list_add_tail(&block_cb->driver_list, &mlxsw_sp_block_cb_list);
-	}
-
-	return 0;
-
-err_block_bind:
-	if (!flow_block_cb_decref(block_cb))
-		flow_block_cb_free(block_cb);
-err_cb_register:
-	return err;
-}
-
-static void mlxsw_sp_setup_tc_block_unbind(struct mlxsw_sp_port *mlxsw_sp_port,
-					   struct flow_block_offload *f,
-					   bool ingress)
-{
-	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
-	struct mlxsw_sp_flow_block *flow_block;
-	struct flow_block_cb *block_cb;
-	int err;
-
-	block_cb = flow_block_cb_lookup(f->block, mlxsw_sp_setup_tc_block_cb,
-					mlxsw_sp);
-	if (!block_cb)
-		return;
-
-	if (ingress)
-		mlxsw_sp_port->ing_flow_block = NULL;
-	else
-		mlxsw_sp_port->eg_flow_block = NULL;
-
-	flow_block = flow_block_cb_priv(block_cb);
-	err = mlxsw_sp_flow_block_unbind(mlxsw_sp, flow_block,
-					 mlxsw_sp_port, ingress);
-	if (!err && !flow_block_cb_decref(block_cb)) {
-		flow_block_cb_remove(block_cb, f);
-		list_del(&block_cb->driver_list);
-	}
-}
-
-static int mlxsw_sp_setup_tc_block(struct mlxsw_sp_port *mlxsw_sp_port,
-				   struct flow_block_offload *f)
-{
-	bool ingress;
-
-	if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
-		ingress = true;
-	else if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
-		ingress = false;
-	else
-		return -EOPNOTSUPP;
-
-	f->driver_block_list = &mlxsw_sp_block_cb_list;
-
-	switch (f->command) {
-	case FLOW_BLOCK_BIND:
-		return mlxsw_sp_setup_tc_block_bind(mlxsw_sp_port, f, ingress);
-	case FLOW_BLOCK_UNBIND:
-		mlxsw_sp_setup_tc_block_unbind(mlxsw_sp_port, f, ingress);
-		return 0;
-	default:
-		return -EOPNOTSUPP;
-	}
-}
-
 static int mlxsw_sp_setup_tc(struct net_device *dev, enum tc_setup_type type,
 			     void *type_data)
 {
@@ -1545,7 +1373,6 @@ static int mlxsw_sp_setup_tc(struct net_device *dev, enum tc_setup_type type,
 	}
 }
 
-
 static int mlxsw_sp_feature_hw_tc(struct net_device *dev, bool enable)
 {
 	struct mlxsw_sp_port *mlxsw_sp_port = netdev_priv(dev);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 57d320728914..a12ca673c224 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -708,15 +708,8 @@ mlxsw_sp_flow_block_is_mixed_bound(const struct mlxsw_sp_flow_block *block)
 struct mlxsw_sp_flow_block *mlxsw_sp_flow_block_create(struct mlxsw_sp *mlxsw_sp,
 						       struct net *net);
 void mlxsw_sp_flow_block_destroy(struct mlxsw_sp_flow_block *block);
-int mlxsw_sp_flow_block_bind(struct mlxsw_sp *mlxsw_sp,
-			     struct mlxsw_sp_flow_block *block,
-			     struct mlxsw_sp_port *mlxsw_sp_port,
-			     bool ingress,
-			     struct netlink_ext_ack *extack);
-int mlxsw_sp_flow_block_unbind(struct mlxsw_sp *mlxsw_sp,
-			       struct mlxsw_sp_flow_block *block,
-			       struct mlxsw_sp_port *mlxsw_sp_port,
-			       bool ingress);
+int mlxsw_sp_setup_tc_block(struct mlxsw_sp_port *mlxsw_sp_port,
+			    struct flow_block_offload *f);
 
 /* spectrum_acl.c */
 struct mlxsw_sp_acl_ruleset;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flow.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flow.c
index 51de6aca1930..ecab581ff956 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flow.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flow.c
@@ -49,11 +49,11 @@ mlxsw_sp_flow_block_ruleset_bound(const struct mlxsw_sp_flow_block *block)
 	return block->ruleset_zero;
 }
 
-int mlxsw_sp_flow_block_bind(struct mlxsw_sp *mlxsw_sp,
-			     struct mlxsw_sp_flow_block *block,
-			     struct mlxsw_sp_port *mlxsw_sp_port,
-			     bool ingress,
-			     struct netlink_ext_ack *extack)
+static int mlxsw_sp_flow_block_bind(struct mlxsw_sp *mlxsw_sp,
+				    struct mlxsw_sp_flow_block *block,
+				    struct mlxsw_sp_port *mlxsw_sp_port,
+				    bool ingress,
+				    struct netlink_ext_ack *extack)
 {
 	struct mlxsw_sp_flow_block_binding *binding;
 	int err;
@@ -104,10 +104,10 @@ err_binding_alloc:
 	return err;
 }
 
-int mlxsw_sp_flow_block_unbind(struct mlxsw_sp *mlxsw_sp,
-			       struct mlxsw_sp_flow_block *block,
-			       struct mlxsw_sp_port *mlxsw_sp_port,
-			       bool ingress)
+static int mlxsw_sp_flow_block_unbind(struct mlxsw_sp *mlxsw_sp,
+				      struct mlxsw_sp_flow_block *block,
+				      struct mlxsw_sp_port *mlxsw_sp_port,
+				      bool ingress)
 {
 	struct mlxsw_sp_flow_block_binding *binding;
 
@@ -131,3 +131,173 @@ int mlxsw_sp_flow_block_unbind(struct mlxsw_sp *mlxsw_sp,
 
 	return 0;
 }
+
+static int mlxsw_sp_flow_block_mall_cb(struct mlxsw_sp_flow_block *flow_block,
+				       struct tc_cls_matchall_offload *f)
+{
+	switch (f->command) {
+	case TC_CLSMATCHALL_REPLACE:
+		return mlxsw_sp_mall_replace(flow_block, f);
+	case TC_CLSMATCHALL_DESTROY:
+		mlxsw_sp_mall_destroy(flow_block, f);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int mlxsw_sp_flow_block_flower_cb(struct mlxsw_sp_flow_block *flow_block,
+					 struct flow_cls_offload *f)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_flow_block_mlxsw_sp(flow_block);
+
+	switch (f->command) {
+	case FLOW_CLS_REPLACE:
+		return mlxsw_sp_flower_replace(mlxsw_sp, flow_block, f);
+	case FLOW_CLS_DESTROY:
+		mlxsw_sp_flower_destroy(mlxsw_sp, flow_block, f);
+		return 0;
+	case FLOW_CLS_STATS:
+		return mlxsw_sp_flower_stats(mlxsw_sp, flow_block, f);
+	case FLOW_CLS_TMPLT_CREATE:
+		return mlxsw_sp_flower_tmplt_create(mlxsw_sp, flow_block, f);
+	case FLOW_CLS_TMPLT_DESTROY:
+		mlxsw_sp_flower_tmplt_destroy(mlxsw_sp, flow_block, f);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int mlxsw_sp_flow_block_cb(enum tc_setup_type type,
+				  void *type_data, void *cb_priv)
+{
+	struct mlxsw_sp_flow_block *flow_block = cb_priv;
+
+	if (mlxsw_sp_flow_block_disabled(flow_block))
+		return -EOPNOTSUPP;
+
+	switch (type) {
+	case TC_SETUP_CLSMATCHALL:
+		return mlxsw_sp_flow_block_mall_cb(flow_block, type_data);
+	case TC_SETUP_CLSFLOWER:
+		return mlxsw_sp_flow_block_flower_cb(flow_block, type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static void mlxsw_sp_tc_block_release(void *cb_priv)
+{
+	struct mlxsw_sp_flow_block *flow_block = cb_priv;
+
+	mlxsw_sp_flow_block_destroy(flow_block);
+}
+
+static LIST_HEAD(mlxsw_sp_block_cb_list);
+
+static int mlxsw_sp_setup_tc_block_bind(struct mlxsw_sp_port *mlxsw_sp_port,
+					struct flow_block_offload *f,
+					bool ingress)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	struct mlxsw_sp_flow_block *flow_block;
+	struct flow_block_cb *block_cb;
+	bool register_block = false;
+	int err;
+
+	block_cb = flow_block_cb_lookup(f->block, mlxsw_sp_flow_block_cb,
+					mlxsw_sp);
+	if (!block_cb) {
+		flow_block = mlxsw_sp_flow_block_create(mlxsw_sp, f->net);
+		if (!flow_block)
+			return -ENOMEM;
+		block_cb = flow_block_cb_alloc(mlxsw_sp_flow_block_cb,
+					       mlxsw_sp, flow_block,
+					       mlxsw_sp_tc_block_release);
+		if (IS_ERR(block_cb)) {
+			mlxsw_sp_flow_block_destroy(flow_block);
+			err = PTR_ERR(block_cb);
+			goto err_cb_register;
+		}
+		register_block = true;
+	} else {
+		flow_block = flow_block_cb_priv(block_cb);
+	}
+	flow_block_cb_incref(block_cb);
+	err = mlxsw_sp_flow_block_bind(mlxsw_sp, flow_block,
+				       mlxsw_sp_port, ingress, f->extack);
+	if (err)
+		goto err_block_bind;
+
+	if (ingress)
+		mlxsw_sp_port->ing_flow_block = flow_block;
+	else
+		mlxsw_sp_port->eg_flow_block = flow_block;
+
+	if (register_block) {
+		flow_block_cb_add(block_cb, f);
+		list_add_tail(&block_cb->driver_list, &mlxsw_sp_block_cb_list);
+	}
+
+	return 0;
+
+err_block_bind:
+	if (!flow_block_cb_decref(block_cb))
+		flow_block_cb_free(block_cb);
+err_cb_register:
+	return err;
+}
+
+static void mlxsw_sp_setup_tc_block_unbind(struct mlxsw_sp_port *mlxsw_sp_port,
+					   struct flow_block_offload *f,
+					   bool ingress)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	struct mlxsw_sp_flow_block *flow_block;
+	struct flow_block_cb *block_cb;
+	int err;
+
+	block_cb = flow_block_cb_lookup(f->block, mlxsw_sp_flow_block_cb,
+					mlxsw_sp);
+	if (!block_cb)
+		return;
+
+	if (ingress)
+		mlxsw_sp_port->ing_flow_block = NULL;
+	else
+		mlxsw_sp_port->eg_flow_block = NULL;
+
+	flow_block = flow_block_cb_priv(block_cb);
+	err = mlxsw_sp_flow_block_unbind(mlxsw_sp, flow_block,
+					 mlxsw_sp_port, ingress);
+	if (!err && !flow_block_cb_decref(block_cb)) {
+		flow_block_cb_remove(block_cb, f);
+		list_del(&block_cb->driver_list);
+	}
+}
+
+int mlxsw_sp_setup_tc_block(struct mlxsw_sp_port *mlxsw_sp_port,
+			    struct flow_block_offload *f)
+{
+	bool ingress;
+
+	if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+		ingress = true;
+	else if (f->binder_type == FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS)
+		ingress = false;
+	else
+		return -EOPNOTSUPP;
+
+	f->driver_block_list = &mlxsw_sp_block_cb_list;
+
+	switch (f->command) {
+	case FLOW_BLOCK_BIND:
+		return mlxsw_sp_setup_tc_block_bind(mlxsw_sp_port, f, ingress);
+	case FLOW_BLOCK_UNBIND:
+		mlxsw_sp_setup_tc_block_unbind(mlxsw_sp_port, f, ingress);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
-- 
cgit v1.2.3-59-g8ed1b


From 075c8aa79d541ea08c67a2e6d955f6457e98c21c Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Mon, 27 Apr 2020 18:13:10 +0300
Subject: selftests: forwarding: tc_actions.sh: add matchall mirror test

Add test for matchall classifier with mirred egress mirror action.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../testing/selftests/net/forwarding/tc_actions.sh | 26 +++++++++++++++-------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/tools/testing/selftests/net/forwarding/tc_actions.sh b/tools/testing/selftests/net/forwarding/tc_actions.sh
index 813d02d1939d..d9eca227136b 100755
--- a/tools/testing/selftests/net/forwarding/tc_actions.sh
+++ b/tools/testing/selftests/net/forwarding/tc_actions.sh
@@ -2,7 +2,8 @@
 # SPDX-License-Identifier: GPL-2.0
 
 ALL_TESTS="gact_drop_and_ok_test mirred_egress_redirect_test \
-	mirred_egress_mirror_test gact_trap_test"
+	mirred_egress_mirror_test matchall_mirred_egress_mirror_test \
+	gact_trap_test"
 NUM_NETIFS=4
 source tc_common.sh
 source lib.sh
@@ -50,6 +51,9 @@ switch_destroy()
 mirred_egress_test()
 {
 	local action=$1
+	local protocol=$2
+	local classifier=$3
+	local classifier_args=$4
 
 	RET=0
 
@@ -62,9 +66,9 @@ mirred_egress_test()
 	tc_check_packets "dev $h2 ingress" 101 1
 	check_fail $? "Matched without redirect rule inserted"
 
-	tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
-		$tcflags dst_ip 192.0.2.2 action mirred egress $action \
-		dev $swp2
+	tc filter add dev $swp1 ingress protocol $protocol pref 1 handle 101 \
+		$classifier $tcflags $classifier_args \
+		action mirred egress $action dev $swp2
 
 	$MZ $h1 -c 1 -p 64 -a $h1mac -b $h2mac -A 192.0.2.1 -B 192.0.2.2 \
 		-t ip -q
@@ -72,10 +76,11 @@ mirred_egress_test()
 	tc_check_packets "dev $h2 ingress" 101 1
 	check_err $? "Did not match incoming $action packet"
 
-	tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+	tc filter del dev $swp1 ingress protocol $protocol pref 1 handle 101 \
+		$classifier
 	tc filter del dev $h2 ingress protocol ip pref 1 handle 101 flower
 
-	log_test "mirred egress $action ($tcflags)"
+	log_test "mirred egress $classifier $action ($tcflags)"
 }
 
 gact_drop_and_ok_test()
@@ -187,12 +192,17 @@ cleanup()
 
 mirred_egress_redirect_test()
 {
-	mirred_egress_test "redirect"
+	mirred_egress_test "redirect" "ip" "flower" "dst_ip 192.0.2.2"
 }
 
 mirred_egress_mirror_test()
 {
-	mirred_egress_test "mirror"
+	mirred_egress_test "mirror" "ip" "flower" "dst_ip 192.0.2.2"
+}
+
+matchall_mirred_egress_mirror_test()
+{
+	mirred_egress_test "mirror" "all" "matchall" ""
 }
 
 trap cleanup EXIT
-- 
cgit v1.2.3-59-g8ed1b


From fdb9c405e35bdc6e305b9b4e20ebc141ed14fc81 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 24 Apr 2020 21:55:33 +0200
Subject: netfilter: nf_tables: allow up to 64 bytes in the set element data
 area

So far, the set elements could store up to 128-bits in the data area.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h |  4 ++++
 net/netfilter/nf_tables_api.c     | 38 ++++++++++++++++++++++++++------------
 2 files changed, 30 insertions(+), 12 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 4ff7c81e6717..d4e29c952c40 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -243,6 +243,10 @@ struct nft_set_elem {
 		u32		buf[NFT_DATA_VALUE_MAXLEN / sizeof(u32)];
 		struct nft_data	val;
 	} key_end;
+	union {
+		u32		buf[NFT_DATA_VALUE_MAXLEN / sizeof(u32)];
+		struct nft_data val;
+	} data;
 	void			*priv;
 };
 
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 9780bd93b7e4..3558e76e2733 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -4669,6 +4669,25 @@ static int nft_setelem_parse_key(struct nft_ctx *ctx, struct nft_set *set,
 	return 0;
 }
 
+static int nft_setelem_parse_data(struct nft_ctx *ctx, struct nft_set *set,
+				  struct nft_data_desc *desc,
+				  struct nft_data *data,
+				  struct nlattr *attr)
+{
+	int err;
+
+	err = nft_data_init(ctx, data, NFT_DATA_VALUE_MAXLEN, desc, attr);
+	if (err < 0)
+		return err;
+
+	if (desc->type != NFT_DATA_VERDICT && desc->len != set->dlen) {
+		nft_data_release(data, desc->type);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int nft_get_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 			    const struct nlattr *attr)
 {
@@ -4946,7 +4965,6 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 	struct nft_expr *expr = NULL;
 	struct nft_userdata *udata;
 	struct nft_data_desc desc;
-	struct nft_data data;
 	enum nft_registers dreg;
 	struct nft_trans *trans;
 	u32 flags = 0;
@@ -5072,15 +5090,11 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 	}
 
 	if (nla[NFTA_SET_ELEM_DATA] != NULL) {
-		err = nft_data_init(ctx, &data, sizeof(data), &desc,
-				    nla[NFTA_SET_ELEM_DATA]);
+		err = nft_setelem_parse_data(ctx, set, &desc, &elem.data.val,
+					     nla[NFTA_SET_ELEM_DATA]);
 		if (err < 0)
 			goto err_parse_key_end;
 
-		err = -EINVAL;
-		if (set->dtype != NFT_DATA_VERDICT && desc.len != set->dlen)
-			goto err_parse_data;
-
 		dreg = nft_type_to_reg(set->dtype);
 		list_for_each_entry(binding, &set->bindings, list) {
 			struct nft_ctx bind_ctx = {
@@ -5094,14 +5108,14 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 				continue;
 
 			err = nft_validate_register_store(&bind_ctx, dreg,
-							  &data,
+							  &elem.data.val,
 							  desc.type, desc.len);
 			if (err < 0)
 				goto err_parse_data;
 
 			if (desc.type == NFT_DATA_VERDICT &&
-			    (data.verdict.code == NFT_GOTO ||
-			     data.verdict.code == NFT_JUMP))
+			    (elem.data.val.verdict.code == NFT_GOTO ||
+			     elem.data.val.verdict.code == NFT_JUMP))
 				nft_validate_state_update(ctx->net,
 							  NFT_VALIDATE_NEED);
 		}
@@ -5123,7 +5137,7 @@ static int nft_add_set_elem(struct nft_ctx *ctx, struct nft_set *set,
 
 	err = -ENOMEM;
 	elem.priv = nft_set_elem_init(set, &tmpl, elem.key.val.data,
-				      elem.key_end.val.data, data.data,
+				      elem.key_end.val.data, elem.data.val.data,
 				      timeout, expiration, GFP_KERNEL);
 	if (elem.priv == NULL)
 		goto err_parse_data;
@@ -5201,7 +5215,7 @@ err_trans:
 	nf_tables_set_elem_destroy(ctx, set, elem.priv);
 err_parse_data:
 	if (nla[NFTA_SET_ELEM_DATA] != NULL)
-		nft_data_release(&data, desc.type);
+		nft_data_release(&elem.data.val, desc.type);
 err_parse_key_end:
 	nft_data_release(&elem.key_end.val, NFT_DATA_VALUE);
 err_parse_key:
-- 
cgit v1.2.3-59-g8ed1b


From 8c1b2bf16d5944cd5c3a8a72e24ed9e22360c1af Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 24 Apr 2020 08:43:34 +0200
Subject: bpf, cgroup: Remove unused exports

Except for a few of the networking hooks called from modular ipv4
or ipv6 code, all of hooks are just called from guaranteed to be
built-in code.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrey Ignatov <rdna@fb.com>
Link: https://lore.kernel.org/bpf/20200424064338.538313-2-hch@lst.de
---
 kernel/bpf/cgroup.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 4d748c5785bc..fc7c7002fd37 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -1054,7 +1054,6 @@ int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor,
 
 	return !allow;
 }
-EXPORT_SYMBOL(__cgroup_bpf_check_dev_permission);
 
 static const struct bpf_func_proto *
 cgroup_base_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
@@ -1207,7 +1206,6 @@ int __cgroup_bpf_run_filter_sysctl(struct ctl_table_header *head,
 
 	return ret == 1 ? 0 : -EPERM;
 }
-EXPORT_SYMBOL(__cgroup_bpf_run_filter_sysctl);
 
 #ifdef CONFIG_NET
 static bool __cgroup_bpf_prog_array_is_empty(struct cgroup *cgrp,
@@ -1312,7 +1310,6 @@ out:
 		sockopt_free_buf(&ctx);
 	return ret;
 }
-EXPORT_SYMBOL(__cgroup_bpf_run_filter_setsockopt);
 
 int __cgroup_bpf_run_filter_getsockopt(struct sock *sk, int level,
 				       int optname, char __user *optval,
@@ -1399,7 +1396,6 @@ out:
 	sockopt_free_buf(&ctx);
 	return ret;
 }
-EXPORT_SYMBOL(__cgroup_bpf_run_filter_getsockopt);
 #endif
 
 static ssize_t sysctl_cpy_dir(const struct ctl_dir *dir, char **bufp,
-- 
cgit v1.2.3-59-g8ed1b


From 0c2006b29e5f62784c70209e71da7876267e0e2d Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Mon, 27 Apr 2020 21:07:00 +0200
Subject: r8169: improve error message if no dedicated PHY driver is found

There's a number of consumer mainboards where the BIOS leaves the PHY
in a state that it's reporting an invalid PHY ID. To detect such cases
add the PHY ID to the error message if no dedicated PHY driver is found.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index f70e36c20431..68d5255568a5 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -5197,7 +5197,8 @@ static int r8169_mdio_register(struct rtl8169_private *tp)
 		/* Most chip versions fail with the genphy driver.
 		 * Therefore ensure that the dedicated PHY driver is loaded.
 		 */
-		dev_err(&pdev->dev, "realtek.ko not loaded, maybe it needs to be added to initramfs?\n");
+		dev_err(&pdev->dev, "no dedicated PHY driver found for PHY ID 0x%08x, maybe realtek.ko needs to be added to initramfs?\n",
+			tp->phydev->phy_id);
 		return -EUNATCH;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 2ac757e4152e3322a04a6dfb3d1fa010d3521abf Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Mon, 27 Apr 2020 09:33:43 +0000
Subject: net: ethernet: ti: fix return value check in
 k3_cppi_desc_pool_create_name()

In case of error, the function gen_pool_create() returns NULL pointer
not ERR_PTR(). The IS_ERR() test in the return value check should be
replaced with NULL test.

Fixes: 93a76530316a ("net: ethernet: ti: introduce am65x/j721e gigabit eth subsystem driver")
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/k3-cppi-desc-pool.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/ti/k3-cppi-desc-pool.c b/drivers/net/ethernet/ti/k3-cppi-desc-pool.c
index ad7cfc1316ce..38cc12f9f133 100644
--- a/drivers/net/ethernet/ti/k3-cppi-desc-pool.c
+++ b/drivers/net/ethernet/ti/k3-cppi-desc-pool.c
@@ -64,8 +64,8 @@ k3_cppi_desc_pool_create_name(struct device *dev, size_t size,
 		return ERR_PTR(-ENOMEM);
 
 	pool->gen_pool = gen_pool_create(ilog2(pool->desc_size), -1);
-	if (IS_ERR(pool->gen_pool)) {
-		ret = PTR_ERR(pool->gen_pool);
+	if (!pool->gen_pool) {
+		ret = -ENOMEM;
 		dev_err(pool->dev, "pool create failed %d\n", ret);
 		kfree_const(pool_name);
 		goto gen_pool_create_fail;
-- 
cgit v1.2.3-59-g8ed1b


From 0d7c83463fdf7841350f37960a7abadd3e650b41 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 24 Apr 2020 21:55:34 +0200
Subject: netfilter: nft_nat: return EOPNOTSUPP if type or flags are not
 supported

Instead of EINVAL which should be used for malformed netlink messages.

Fixes: eb31628e37a0 ("netfilter: nf_tables: Add support for IPv6 NAT")
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_nat.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index 8b44a4de5329..bb49a217635e 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -129,7 +129,7 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
 		priv->type = NF_NAT_MANIP_DST;
 		break;
 	default:
-		return -EINVAL;
+		return -EOPNOTSUPP;
 	}
 
 	if (tb[NFTA_NAT_FAMILY] == NULL)
@@ -196,7 +196,7 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
 	if (tb[NFTA_NAT_FLAGS]) {
 		priv->flags = ntohl(nla_get_be32(tb[NFTA_NAT_FLAGS]));
 		if (priv->flags & ~NF_NAT_RANGE_MASK)
-			return -EINVAL;
+			return -EOPNOTSUPP;
 	}
 
 	return nf_ct_netns_get(ctx->net, family);
-- 
cgit v1.2.3-59-g8ed1b


From 4566aa440008103c9bd364f38c03ca5309acc8f4 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 24 Apr 2020 21:55:35 +0200
Subject: netfilter: nft_nat: set flags from initialization path

This patch sets the NAT flags from the control plane path.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_nat.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index bb49a217635e..5c7ff213c030 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -55,7 +55,6 @@ static void nft_nat_eval(const struct nft_expr *expr,
 			       &regs->data[priv->sreg_addr_max],
 			       sizeof(range.max_addr.ip6));
 		}
-		range.flags |= NF_NAT_RANGE_MAP_IPS;
 	}
 
 	if (priv->sreg_proto_min) {
@@ -63,10 +62,9 @@ static void nft_nat_eval(const struct nft_expr *expr,
 			&regs->data[priv->sreg_proto_min]);
 		range.max_proto.all = (__force __be16)nft_reg_load16(
 			&regs->data[priv->sreg_proto_max]);
-		range.flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
 	}
 
-	range.flags |= priv->flags;
+	range.flags = priv->flags;
 
 	regs->verdict.code = nf_nat_setup_info(ct, &range, priv->type);
 }
@@ -169,6 +167,8 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
 		} else {
 			priv->sreg_addr_max = priv->sreg_addr_min;
 		}
+
+		priv->flags |= NF_NAT_RANGE_MAP_IPS;
 	}
 
 	plen = sizeof_field(struct nf_nat_range, min_addr.all);
@@ -191,10 +191,12 @@ static int nft_nat_init(const struct nft_ctx *ctx, const struct nft_expr *expr,
 		} else {
 			priv->sreg_proto_max = priv->sreg_proto_min;
 		}
+
+		priv->flags |= NF_NAT_RANGE_PROTO_SPECIFIED;
 	}
 
 	if (tb[NFTA_NAT_FLAGS]) {
-		priv->flags = ntohl(nla_get_be32(tb[NFTA_NAT_FLAGS]));
+		priv->flags |= ntohl(nla_get_be32(tb[NFTA_NAT_FLAGS]));
 		if (priv->flags & ~NF_NAT_RANGE_MASK)
 			return -EOPNOTSUPP;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From acd766e31bb96b90c2dc4954f86e573c9ac16c66 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 24 Apr 2020 21:55:36 +0200
Subject: netfilter: nft_nat: add helper function to set up NAT address and
 protocol

This patch add nft_nat_setup_addr() and nft_nat_setup_proto() to set up
the NAT mangling.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nft_nat.c | 56 ++++++++++++++++++++++++++++++-------------------
 1 file changed, 34 insertions(+), 22 deletions(-)

diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index 5c7ff213c030..7442aa8b1555 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -30,6 +30,36 @@ struct nft_nat {
 	u16			flags;
 };
 
+static void nft_nat_setup_addr(struct nf_nat_range2 *range,
+			       const struct nft_regs *regs,
+			       const struct nft_nat *priv)
+{
+	switch (priv->family) {
+	case AF_INET:
+		range->min_addr.ip = (__force __be32)
+				regs->data[priv->sreg_addr_min];
+		range->max_addr.ip = (__force __be32)
+				regs->data[priv->sreg_addr_max];
+		break;
+	case AF_INET6:
+		memcpy(range->min_addr.ip6, &regs->data[priv->sreg_addr_min],
+		       sizeof(range->min_addr.ip6));
+		memcpy(range->max_addr.ip6, &regs->data[priv->sreg_addr_max],
+		       sizeof(range->max_addr.ip6));
+		break;
+	}
+}
+
+static void nft_nat_setup_proto(struct nf_nat_range2 *range,
+				const struct nft_regs *regs,
+				const struct nft_nat *priv)
+{
+	range->min_proto.all = (__force __be16)
+		nft_reg_load16(&regs->data[priv->sreg_proto_min]);
+	range->max_proto.all = (__force __be16)
+		nft_reg_load16(&regs->data[priv->sreg_proto_max]);
+}
+
 static void nft_nat_eval(const struct nft_expr *expr,
 			 struct nft_regs *regs,
 			 const struct nft_pktinfo *pkt)
@@ -40,29 +70,11 @@ static void nft_nat_eval(const struct nft_expr *expr,
 	struct nf_nat_range2 range;
 
 	memset(&range, 0, sizeof(range));
-	if (priv->sreg_addr_min) {
-		if (priv->family == AF_INET) {
-			range.min_addr.ip = (__force __be32)
-					regs->data[priv->sreg_addr_min];
-			range.max_addr.ip = (__force __be32)
-					regs->data[priv->sreg_addr_max];
-
-		} else {
-			memcpy(range.min_addr.ip6,
-			       &regs->data[priv->sreg_addr_min],
-			       sizeof(range.min_addr.ip6));
-			memcpy(range.max_addr.ip6,
-			       &regs->data[priv->sreg_addr_max],
-			       sizeof(range.max_addr.ip6));
-		}
-	}
+	if (priv->sreg_addr_min)
+		nft_nat_setup_addr(&range, regs, priv);
 
-	if (priv->sreg_proto_min) {
-		range.min_proto.all = (__force __be16)nft_reg_load16(
-			&regs->data[priv->sreg_proto_min]);
-		range.max_proto.all = (__force __be16)nft_reg_load16(
-			&regs->data[priv->sreg_proto_max]);
-	}
+	if (priv->sreg_proto_min)
+		nft_nat_setup_proto(&range, regs, priv);
 
 	range.flags = priv->flags;
 
-- 
cgit v1.2.3-59-g8ed1b


From 3ff7ddb1353da9b535e65702704cbadea1da9a00 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 24 Apr 2020 21:55:37 +0200
Subject: netfilter: nft_nat: add netmap support

This patch allows you to NAT the network address prefix onto another
network address prefix, a.k.a. netmapping.

Userspace must specify the NF_NAT_RANGE_NETMAP flag and the prefix
address through the NFTA_NAT_REG_ADDR_MIN and NFTA_NAT_REG_ADDR_MAX
netlink attributes.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/uapi/linux/netfilter/nf_nat.h |  4 ++-
 net/netfilter/nft_nat.c               | 46 ++++++++++++++++++++++++++++++++++-
 2 files changed, 48 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/netfilter/nf_nat.h b/include/uapi/linux/netfilter/nf_nat.h
index 4a95c0db14d4..a64586e77b24 100644
--- a/include/uapi/linux/netfilter/nf_nat.h
+++ b/include/uapi/linux/netfilter/nf_nat.h
@@ -11,6 +11,7 @@
 #define NF_NAT_RANGE_PERSISTENT			(1 << 3)
 #define NF_NAT_RANGE_PROTO_RANDOM_FULLY		(1 << 4)
 #define NF_NAT_RANGE_PROTO_OFFSET		(1 << 5)
+#define NF_NAT_RANGE_NETMAP			(1 << 6)
 
 #define NF_NAT_RANGE_PROTO_RANDOM_ALL		\
 	(NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PROTO_RANDOM_FULLY)
@@ -18,7 +19,8 @@
 #define NF_NAT_RANGE_MASK					\
 	(NF_NAT_RANGE_MAP_IPS | NF_NAT_RANGE_PROTO_SPECIFIED |	\
 	 NF_NAT_RANGE_PROTO_RANDOM | NF_NAT_RANGE_PERSISTENT |	\
-	 NF_NAT_RANGE_PROTO_RANDOM_FULLY | NF_NAT_RANGE_PROTO_OFFSET)
+	 NF_NAT_RANGE_PROTO_RANDOM_FULLY | NF_NAT_RANGE_PROTO_OFFSET | \
+	 NF_NAT_RANGE_NETMAP)
 
 struct nf_nat_ipv4_range {
 	unsigned int			flags;
diff --git a/net/netfilter/nft_nat.c b/net/netfilter/nft_nat.c
index 7442aa8b1555..23a7bfd10521 100644
--- a/net/netfilter/nft_nat.c
+++ b/net/netfilter/nft_nat.c
@@ -60,6 +60,46 @@ static void nft_nat_setup_proto(struct nf_nat_range2 *range,
 		nft_reg_load16(&regs->data[priv->sreg_proto_max]);
 }
 
+static void nft_nat_setup_netmap(struct nf_nat_range2 *range,
+				 const struct nft_pktinfo *pkt,
+				 const struct nft_nat *priv)
+{
+	struct sk_buff *skb = pkt->skb;
+	union nf_inet_addr new_addr;
+	__be32 netmask;
+	int i, len = 0;
+
+	switch (priv->type) {
+	case NFT_NAT_SNAT:
+		if (nft_pf(pkt) == NFPROTO_IPV4) {
+			new_addr.ip = ip_hdr(skb)->saddr;
+			len = sizeof(struct in_addr);
+		} else {
+			new_addr.in6 = ipv6_hdr(skb)->saddr;
+			len = sizeof(struct in6_addr);
+		}
+		break;
+	case NFT_NAT_DNAT:
+		if (nft_pf(pkt) == NFPROTO_IPV4) {
+			new_addr.ip = ip_hdr(skb)->daddr;
+			len = sizeof(struct in_addr);
+		} else {
+			new_addr.in6 = ipv6_hdr(skb)->daddr;
+			len = sizeof(struct in6_addr);
+		}
+		break;
+	}
+
+	for (i = 0; i < len / sizeof(__be32); i++) {
+		netmask = ~(range->min_addr.ip6[i] ^ range->max_addr.ip6[i]);
+		new_addr.ip6[i] &= ~netmask;
+		new_addr.ip6[i] |= range->min_addr.ip6[i] & netmask;
+	}
+
+	range->min_addr = new_addr;
+	range->max_addr = new_addr;
+}
+
 static void nft_nat_eval(const struct nft_expr *expr,
 			 struct nft_regs *regs,
 			 const struct nft_pktinfo *pkt)
@@ -70,8 +110,12 @@ static void nft_nat_eval(const struct nft_expr *expr,
 	struct nf_nat_range2 range;
 
 	memset(&range, 0, sizeof(range));
-	if (priv->sreg_addr_min)
+
+	if (priv->sreg_addr_min) {
 		nft_nat_setup_addr(&range, regs, priv);
+		if (priv->flags & NF_NAT_RANGE_NETMAP)
+			nft_nat_setup_netmap(&range, pkt, priv);
+	}
 
 	if (priv->sreg_proto_min)
 		nft_nat_setup_proto(&range, regs, priv);
-- 
cgit v1.2.3-59-g8ed1b


From 3d8bf50860c7de09c9713b97ec2f87ad42338c7e Mon Sep 17 00:00:00 2001
From: Yan-Hsuan Chuang <yhchuang@realtek.com>
Date: Fri, 24 Apr 2020 18:12:55 +0800
Subject: rtw88: fix sparse warnings for download firmware routine

sparse warnings: (new ones prefixed by >>)

>> drivers/net/wireless/realtek/rtw88/mac.c:653:5: sparse: sparse:
symbol '__rtw_download_firmware' was not declared. Should it be static?
>> drivers/net/wireless/realtek/rtw88/mac.c:817:5: sparse: sparse:
symbol '__rtw_download_firmware_legacy' was not declared. Should it be
static?

Fixes: 15d2fcc6b2de ("rtw88: add legacy firmware download for 8723D devices")
Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200424101255.28239-1-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/mac.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtw88/mac.c b/drivers/net/wireless/realtek/rtw88/mac.c
index 645207a01525..6969379ba37e 100644
--- a/drivers/net/wireless/realtek/rtw88/mac.c
+++ b/drivers/net/wireless/realtek/rtw88/mac.c
@@ -696,7 +696,8 @@ static void download_firmware_end_flow(struct rtw_dev *rtwdev)
 	rtw_write16(rtwdev, REG_MCUFW_CTRL, fw_ctrl);
 }
 
-int __rtw_download_firmware(struct rtw_dev *rtwdev, struct rtw_fw_state *fw)
+static int __rtw_download_firmware(struct rtw_dev *rtwdev,
+				   struct rtw_fw_state *fw)
 {
 	struct rtw_backup_info bckp[DLFW_RESTORE_REG_NUM];
 	const u8 *data = fw->firmware->data;
@@ -860,7 +861,8 @@ static int download_firmware_validate_legacy(struct rtw_dev *rtwdev)
 	return -EINVAL;
 }
 
-int __rtw_download_firmware_legacy(struct rtw_dev *rtwdev, struct rtw_fw_state *fw)
+static int __rtw_download_firmware_legacy(struct rtw_dev *rtwdev,
+					  struct rtw_fw_state *fw)
 {
 	int ret = 0;
 
-- 
cgit v1.2.3-59-g8ed1b


From 2aad9f81d34c3ecb6b5dbfd7c64a76298d40c361 Mon Sep 17 00:00:00 2001
From: John Oldman <john.oldman@polehill.co.uk>
Date: Fri, 24 Apr 2020 18:50:43 +0100
Subject: ssb: sprom: fix block comments coding style issues

Fixed coding style issues

Signed-off-by: John Oldman <john.oldman@polehill.co.uk>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200424175043.16261-1-john.oldman@polehill.co.uk
---
 drivers/ssb/sprom.c | 12 ++++++++----
 1 file changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/ssb/sprom.c b/drivers/ssb/sprom.c
index 52d2e0f33be7..42d620cee8a9 100644
--- a/drivers/ssb/sprom.c
+++ b/drivers/ssb/sprom.c
@@ -78,7 +78,8 @@ ssize_t ssb_attr_sprom_show(struct ssb_bus *bus, char *buf,
 
 	/* Use interruptible locking, as the SPROM write might
 	 * be holding the lock for several seconds. So allow userspace
-	 * to cancel operation. */
+	 * to cancel operation.
+	 */
 	err = -ERESTARTSYS;
 	if (mutex_lock_interruptible(&bus->sprom_mutex))
 		goto out_kfree;
@@ -121,7 +122,8 @@ ssize_t ssb_attr_sprom_store(struct ssb_bus *bus,
 
 	/* Use interruptible locking, as the SPROM write might
 	 * be holding the lock for several seconds. So allow userspace
-	 * to cancel operation. */
+	 * to cancel operation.
+	 */
 	err = -ERESTARTSYS;
 	if (mutex_lock_interruptible(&bus->sprom_mutex))
 		goto out_kfree;
@@ -188,9 +190,11 @@ int ssb_fill_sprom_with_fallback(struct ssb_bus *bus, struct ssb_sprom *out)
 bool ssb_is_sprom_available(struct ssb_bus *bus)
 {
 	/* status register only exists on chipcomon rev >= 11 and we need check
-	   for >= 31 only */
+	 * for >= 31 only
+	 */
 	/* this routine differs from specs as we do not access SPROM directly
-	   on PCMCIA */
+	 * on PCMCIA
+	 */
 	if (bus->bustype == SSB_BUSTYPE_PCI &&
 	    bus->chipco.dev &&	/* can be unavailable! */
 	    bus->chipco.dev->id.revision >= 31)
-- 
cgit v1.2.3-59-g8ed1b


From 86501437d88532952a21b427d2ffd1683870d369 Mon Sep 17 00:00:00 2001
From: John Oldman <john.oldman@polehill.co.uk>
Date: Sat, 25 Apr 2020 16:52:33 +0100
Subject: ssb: scan: fix block comments coding style issues

Fixed coding style issues

Signed-off-by: John Oldman <john.oldman@polehill.co.uk>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200425155233.19624-1-john.oldman@polehill.co.uk
---
 drivers/ssb/scan.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/ssb/scan.c b/drivers/ssb/scan.c
index 6ceee98ed6ff..b97a5c32d44a 100644
--- a/drivers/ssb/scan.c
+++ b/drivers/ssb/scan.c
@@ -400,7 +400,8 @@ int ssb_bus_scan(struct ssb_bus *bus,
 #ifdef CONFIG_SSB_DRIVER_PCICORE
 			if (bus->bustype == SSB_BUSTYPE_PCI) {
 				/* Ignore PCI cores on PCI-E cards.
-				 * Ignore PCI-E cores on PCI cards. */
+				 * Ignore PCI-E cores on PCI cards.
+				 */
 				if (dev->id.coreid == SSB_DEV_PCI) {
 					if (pci_is_pcie(bus->host_pci))
 						continue;
@@ -421,7 +422,8 @@ int ssb_bus_scan(struct ssb_bus *bus,
 				if (bus->host_pci->vendor == PCI_VENDOR_ID_BROADCOM &&
 				    (bus->host_pci->device & 0xFF00) == 0x4300) {
 					/* This is a dangling ethernet core on a
-					 * wireless device. Ignore it. */
+					 * wireless device. Ignore it.
+					 */
 					continue;
 				}
 			}
-- 
cgit v1.2.3-59-g8ed1b


From d6cae2bc195b558ba79315eae699138ebdf41b57 Mon Sep 17 00:00:00 2001
From: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Date: Fri, 24 Apr 2020 03:49:18 +0300
Subject: ath9k: fix AR9002 ADC and NF calibrations

ADC calibration is only required for a 80 MHz sampling rate (i.e. for
40 MHz channels), when the chip utilizes the pair of ADCs in interleved
mode. Calibration on a 20 MHz channel will never be completed.

Previous channel check is trying to exclude all channels where the
calibration will get stuck. It effectively blocks the calibration run
for HT20 channels, but fails to exclude 20 MHz channels without HT (e.g.
legacy mode channels).

Fix this issue by reworking the channel check to explicitly allow ADCs
gain & DC offset calibrations for HT40 channels only. Also update the
complicated comment to make it clear that these calibrations are for
multi-ADC mode only.

Stuck ADCs calibration blocks the NF calibration, what could make it
impossible to work in a noisy evironment: too big Rx attentuation,
invalid RSSI value, etc. So this change is actually more of a NF
calibration fix rather then the ADC calibration fix.

Run tested with AR9220.

Signed-off-by: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200424004923.17129-2-ryazanov.s.a@gmail.com
---
 drivers/net/wireless/ath/ath9k/ar9002_calib.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/ath/ath9k/ar9002_calib.c b/drivers/net/wireless/ath/ath9k/ar9002_calib.c
index fd9db8ca99d7..14eee06744ed 100644
--- a/drivers/net/wireless/ath/ath9k/ar9002_calib.c
+++ b/drivers/net/wireless/ath/ath9k/ar9002_calib.c
@@ -37,9 +37,8 @@ static bool ar9002_hw_is_cal_supported(struct ath_hw *ah,
 		break;
 	case ADC_GAIN_CAL:
 	case ADC_DC_CAL:
-		/* Run ADC Gain Cal for non-CCK & non 2GHz-HT20 only */
-		if (!((IS_CHAN_2GHZ(chan) || IS_CHAN_A_FAST_CLOCK(ah, chan)) &&
-		      IS_CHAN_HT20(chan)))
+		/* Run even/odd ADCs calibrations for HT40 channels only */
+		if (IS_CHAN_HT40(chan))
 			supported = true;
 		break;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 93f8d4223163c6ba4f2ec8ade4832ef40e2b1d0c Mon Sep 17 00:00:00 2001
From: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Date: Fri, 24 Apr 2020 03:49:19 +0300
Subject: ath9k: remove needless NFCAL_PENDING flag setting

The NFCAL_PENDING flag is set by the ath9k_hw_start_nfcal() routine,
so there is no reason to set it manually after calling it during the
AR9002 calibrations initialization.

Signed-off-by: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200424004923.17129-3-ryazanov.s.a@gmail.com
---
 drivers/net/wireless/ath/ath9k/ar9002_calib.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/wireless/ath/ath9k/ar9002_calib.c b/drivers/net/wireless/ath/ath9k/ar9002_calib.c
index 14eee06744ed..0f7c5812e5c2 100644
--- a/drivers/net/wireless/ath/ath9k/ar9002_calib.c
+++ b/drivers/net/wireless/ath/ath9k/ar9002_calib.c
@@ -857,9 +857,6 @@ static bool ar9002_hw_init_cal(struct ath_hw *ah, struct ath9k_channel *chan)
 	ath9k_hw_loadnf(ah, chan);
 	ath9k_hw_start_nfcal(ah, true);
 
-	if (ah->caldata)
-		set_bit(NFCAL_PENDING, &ah->caldata->cal_flags);
-
 	ah->cal_list = ah->cal_list_last = ah->cal_list_curr = NULL;
 
 	/* Enable IQ, ADC Gain and ADC DC offset CALs */
-- 
cgit v1.2.3-59-g8ed1b


From 41ba50fd6cac084a93e7651c528ef4bc37996ee2 Mon Sep 17 00:00:00 2001
From: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Date: Fri, 24 Apr 2020 03:49:20 +0300
Subject: ath9k: do not miss longcal on AR9002

Each of AGC & I/Q calibrations can take a long time. Long calibration
and NF calibration in particular are forbiden for parallel run with
ADC & I/Q calibrations. So, the chip could not be ready to perform the
long calibration at the time of request. And a request to perform the
long calibration may be lost.

In order to fix this, preserve the long calibration request as a
calibration state flag and restore the long calibration request each
time the calibration function is called again (i.e. on each subsequent
ivocation of the short calibration).

This feature will be twice useful after the next change, which will
make it possible to start the long calibration before all ADCs & I/Q
calibrations are completed.

Run tested with AR9220.

Signed-off-by: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200424004923.17129-4-ryazanov.s.a@gmail.com
---
 drivers/net/wireless/ath/ath9k/ar9002_calib.c | 10 +++++++++-
 drivers/net/wireless/ath/ath9k/hw.h           |  1 +
 2 files changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath9k/ar9002_calib.c b/drivers/net/wireless/ath/ath9k/ar9002_calib.c
index 0f7c5812e5c2..ad8db7720993 100644
--- a/drivers/net/wireless/ath/ath9k/ar9002_calib.c
+++ b/drivers/net/wireless/ath/ath9k/ar9002_calib.c
@@ -663,8 +663,13 @@ static int ar9002_hw_calibrate(struct ath_hw *ah, struct ath9k_channel *chan,
 	int ret;
 
 	nfcal = !!(REG_READ(ah, AR_PHY_AGC_CONTROL) & AR_PHY_AGC_CONTROL_NF);
-	if (ah->caldata)
+	if (ah->caldata) {
 		nfcal_pending = test_bit(NFCAL_PENDING, &ah->caldata->cal_flags);
+		if (longcal)		/* Remember to not miss */
+			set_bit(LONGCAL_PENDING, &ah->caldata->cal_flags);
+		else if (test_bit(LONGCAL_PENDING, &ah->caldata->cal_flags))
+			longcal = true;	/* Respin a previous one */
+	}
 
 	percal_pending = (currCal &&
 			  (currCal->calState == CAL_RUNNING ||
@@ -700,6 +705,9 @@ static int ar9002_hw_calibrate(struct ath_hw *ah, struct ath9k_channel *chan,
 		}
 
 		if (longcal) {
+			if (ah->caldata)
+				clear_bit(LONGCAL_PENDING,
+					  &ah->caldata->cal_flags);
 			ath9k_hw_start_nfcal(ah, false);
 			/* Do periodic PAOffset Cal */
 			ar9002_hw_pa_cal(ah, false);
diff --git a/drivers/net/wireless/ath/ath9k/hw.h b/drivers/net/wireless/ath/ath9k/hw.h
index 2e4489700a85..c99f3c77c823 100644
--- a/drivers/net/wireless/ath/ath9k/hw.h
+++ b/drivers/net/wireless/ath/ath9k/hw.h
@@ -427,6 +427,7 @@ enum ath9k_cal_flags {
 	TXIQCAL_DONE,
 	TXCLCAL_DONE,
 	SW_PKDET_DONE,
+	LONGCAL_PENDING,
 };
 
 struct ath9k_hw_cal_data {
-- 
cgit v1.2.3-59-g8ed1b


From 2bb7027b64b68bf8620b849d6ec1c223572c7e92 Mon Sep 17 00:00:00 2001
From: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Date: Fri, 24 Apr 2020 03:49:21 +0300
Subject: ath9k: interleaved NF calibration on AR9002

NF calibration and other elements of long calibration are usually faster
than ADCs & I/Q calibrations due to independence of receiption of the
OFDM signal. Moreover sometime I/Q calibration can not be completed at
all without preceding NF calibration. This is due to AGC, which has a
habit to block a weak signal without regular NF calibration. Thus, we do
not need to deferr the long calibration forever.

So, if the long calibration is requested, then deferr the ADCs & I/Q
calibration(s) and run the longcal (the NF calibration in particular) to
obtain fresh noise data.

Run tested with AR9220.

Signed-off-by: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200424004923.17129-5-ryazanov.s.a@gmail.com
---
 drivers/net/wireless/ath/ath9k/ar9002_calib.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/ath/ath9k/ar9002_calib.c b/drivers/net/wireless/ath/ath9k/ar9002_calib.c
index ad8db7720993..68188f500949 100644
--- a/drivers/net/wireless/ath/ath9k/ar9002_calib.c
+++ b/drivers/net/wireless/ath/ath9k/ar9002_calib.c
@@ -680,8 +680,12 @@ static int ar9002_hw_calibrate(struct ath_hw *ah, struct ath9k_channel *chan,
 			return 0;
 
 		ah->cal_list_curr = currCal = currCal->calNext;
-		if (currCal->calState == CAL_WAITING)
-			ath9k_hw_reset_calibration(ah, currCal);
+		percal_pending = currCal->calState == CAL_WAITING;
+	}
+
+	/* Do not start a next calibration if the longcal is in action */
+	if (percal_pending && !nfcal && !longcal) {
+		ath9k_hw_reset_calibration(ah, currCal);
 
 		return 0;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From ded6ff15a1911af7dd641b4cc1a1a2e161f08e1f Mon Sep 17 00:00:00 2001
From: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Date: Fri, 24 Apr 2020 03:49:22 +0300
Subject: ath9k: invalidate all calibrations at once

Previously after the calibration validity period is over,
calibrations are invalidated in a one at time manner. So, for AR9002
family, which has three calibrations, the full recalibration interval
becomes 3 x ATH_RESTART_CALINTERVAL. And each next calibration will be
separated by the ATH_RESTART_CALINTERVAL time from a previous one.

It seems like it is better to do whole recalibration at once. Also, this
change makes the driver behaviour a little simpler. So, invalidate all
calibrations at once at the end of the calibration validity interval.

This change affects only AR9002 chips family, since the AR9003 utilize
only a single calibration.

Signed-off-by: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200424004923.17129-6-ryazanov.s.a@gmail.com
---
 drivers/net/wireless/ath/ath9k/calib.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/ath/ath9k/calib.c b/drivers/net/wireless/ath/ath9k/calib.c
index 695c779ae8cf..2ac3eefd3851 100644
--- a/drivers/net/wireless/ath/ath9k/calib.c
+++ b/drivers/net/wireless/ath/ath9k/calib.c
@@ -209,14 +209,17 @@ bool ath9k_hw_reset_calvalid(struct ath_hw *ah)
 		return true;
 	}
 
-	if (!(ah->supp_cals & currCal->calData->calType))
-		return true;
+	currCal = ah->cal_list;
+	do {
+		ath_dbg(common, CALIBRATE, "Resetting Cal %d state for channel %u\n",
+			currCal->calData->calType,
+			ah->curchan->chan->center_freq);
 
-	ath_dbg(common, CALIBRATE, "Resetting Cal %d state for channel %u\n",
-		currCal->calData->calType, ah->curchan->chan->center_freq);
+		ah->caldata->CalValid &= ~currCal->calData->calType;
+		currCal->calState = CAL_WAITING;
 
-	ah->caldata->CalValid &= ~currCal->calData->calType;
-	currCal->calState = CAL_WAITING;
+		currCal = currCal->calNext;
+	} while (currCal != ah->cal_list);
 
 	return false;
 }
-- 
cgit v1.2.3-59-g8ed1b


From d8d20845c7f129359c845c526929056651d4e5df Mon Sep 17 00:00:00 2001
From: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Date: Fri, 24 Apr 2020 03:49:23 +0300
Subject: ath9k: add calibration timeout for AR9002
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

ADC & I/Q calibrations could take infinite time to comple, since they
depend on received frames. In particular the I/Q mismatch calibration
requires receiving of OFDM frames for completion. But in the 2.4GHz
band, a station could receive only CCK frames for a very long time.

And while we wait for the completion of one of the mentioned
calibrations, the NF calibration is blocked. Moreover, in some
environments, I/Q calibration is unable to complete until a correct
noise calibration will be performed due to AGC behaviour.

In order to avoid delaying NF calibration on forever, limit the maximum
duration of ADCs & I/Q calibrations. If the calibration is not completed
within the maximum time, it will be interrupted and a next calibration
will be performed. The code that selects the next calibration has been
reworked to the loop so incompleted calibration will be respinned later.

Ð maximum calibration time of 30 seconds was selected to give the
calibration enough time to complete and to not interfere with the long
(NF) calibration.

Run tested with AR9220.

Signed-off-by: Sergey Ryazanov <ryazanov.s.a@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200424004923.17129-7-ryazanov.s.a@gmail.com
---
 drivers/net/wireless/ath/ath9k/ar9002_calib.c | 25 +++++++++++++++++++++++--
 drivers/net/wireless/ath/ath9k/calib.c        |  1 +
 drivers/net/wireless/ath/ath9k/hw.h           |  1 +
 3 files changed, 25 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/ath/ath9k/ar9002_calib.c b/drivers/net/wireless/ath/ath9k/ar9002_calib.c
index 68188f500949..fd53b5f9e9b5 100644
--- a/drivers/net/wireless/ath/ath9k/ar9002_calib.c
+++ b/drivers/net/wireless/ath/ath9k/ar9002_calib.c
@@ -19,6 +19,8 @@
 #include "ar9002_phy.h"
 
 #define AR9285_CLCAL_REDO_THRESH    1
+/* AGC & I/Q calibrations time limit, ms */
+#define AR9002_CAL_MAX_TIME		30000
 
 enum ar9002_cal_types {
 	ADC_GAIN_CAL = BIT(0),
@@ -104,6 +106,14 @@ static bool ar9002_hw_per_calibration(struct ath_hw *ah,
 			} else {
 				ar9002_hw_setup_calibration(ah, currCal);
 			}
+		} else if (time_after(jiffies, ah->cal_start_time +
+				      msecs_to_jiffies(AR9002_CAL_MAX_TIME))) {
+			REG_CLR_BIT(ah, AR_PHY_TIMING_CTRL4(0),
+				    AR_PHY_TIMING_CTRL4_DO_CAL);
+			ath_dbg(ath9k_hw_common(ah), CALIBRATE,
+				"calibration timeout\n");
+			currCal->calState = CAL_WAITING;	/* Try later */
+			iscaldone = true;
 		}
 	} else if (!(caldata->CalValid & currCal->calData->calType)) {
 		ath9k_hw_reset_calibration(ah, currCal);
@@ -679,8 +689,19 @@ static int ar9002_hw_calibrate(struct ath_hw *ah, struct ath9k_channel *chan,
 		if (!ar9002_hw_per_calibration(ah, chan, rxchainmask, currCal))
 			return 0;
 
-		ah->cal_list_curr = currCal = currCal->calNext;
-		percal_pending = currCal->calState == CAL_WAITING;
+		/* Looking for next waiting calibration if any */
+		for (currCal = currCal->calNext; currCal != ah->cal_list_curr;
+		     currCal = currCal->calNext) {
+			if (currCal->calState == CAL_WAITING)
+				break;
+		}
+		if (currCal->calState == CAL_WAITING) {
+			percal_pending = true;
+			ah->cal_list_curr = currCal;
+		} else {
+			percal_pending = false;
+			ah->cal_list_curr = ah->cal_list;
+		}
 	}
 
 	/* Do not start a next calibration if the longcal is in action */
diff --git a/drivers/net/wireless/ath/ath9k/calib.c b/drivers/net/wireless/ath/ath9k/calib.c
index 2ac3eefd3851..0422a33395b7 100644
--- a/drivers/net/wireless/ath/ath9k/calib.c
+++ b/drivers/net/wireless/ath/ath9k/calib.c
@@ -176,6 +176,7 @@ void ath9k_hw_reset_calibration(struct ath_hw *ah,
 
 	ath9k_hw_setup_calibration(ah, currCal);
 
+	ah->cal_start_time = jiffies;
 	currCal->calState = CAL_RUNNING;
 
 	for (i = 0; i < AR5416_MAX_CHAINS; i++) {
diff --git a/drivers/net/wireless/ath/ath9k/hw.h b/drivers/net/wireless/ath/ath9k/hw.h
index c99f3c77c823..023599e10dd5 100644
--- a/drivers/net/wireless/ath/ath9k/hw.h
+++ b/drivers/net/wireless/ath/ath9k/hw.h
@@ -834,6 +834,7 @@ struct ath_hw {
 
 	/* Calibration */
 	u32 supp_cals;
+	unsigned long cal_start_time;
 	struct ath9k_cal_list iq_caldata;
 	struct ath9k_cal_list adcgain_caldata;
 	struct ath9k_cal_list adcdc_caldata;
-- 
cgit v1.2.3-59-g8ed1b


From c26b01d5ec1ab4dbfbdeb93ef4bc9e34951b6688 Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Sun, 26 Apr 2020 17:40:37 +0800
Subject: ath5k: remove conversion to bool in ath5k_ani_calibration()

The '>' expression itself is bool, no need to convert it to bool again.
This fixes the following coccicheck warning:

drivers/net/wireless/ath/ath5k/ani.c:504:56-61: WARNING: conversion to
bool not needed here

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200426094037.23048-1-yanaijie@huawei.com
---
 drivers/net/wireless/ath/ath5k/ani.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath5k/ani.c b/drivers/net/wireless/ath/ath5k/ani.c
index 0624333f5430..850c608b43a3 100644
--- a/drivers/net/wireless/ath/ath5k/ani.c
+++ b/drivers/net/wireless/ath/ath5k/ani.c
@@ -501,7 +501,7 @@ ath5k_ani_calibration(struct ath5k_hw *ah)
 
 	if (as->ofdm_errors > ofdm_high || as->cck_errors > cck_high) {
 		/* too many PHY errors - we have to raise immunity */
-		bool ofdm_flag = as->ofdm_errors > ofdm_high ? true : false;
+		bool ofdm_flag = as->ofdm_errors > ofdm_high;
 		ath5k_ani_raise_immunity(ah, as, ofdm_flag);
 		ath5k_ani_period_restart(as);
 
-- 
cgit v1.2.3-59-g8ed1b


From 2289bef25e32808bb6d748edc667ca297792bf8f Mon Sep 17 00:00:00 2001
From: Wen Gong <wgong@codeaurora.org>
Date: Mon, 27 Apr 2020 16:04:13 +0800
Subject: ath10k: enable firmware peer stats info for wmi tlv

For wmi tlv type, firmware disable peer stats info by default, after
enable it, firmware will report WMI_TLV_PEER_STATS_INFO_EVENTID if
ath10k send WMI_TLV_REQUEST_PEER_STATS_INFO_CMDID to firmware.

Enable it will only set a flag in firmware, firmware will not report
it without receive request WMI command.

Tested with QCA6174 SDIO with firmware WLAN.RMH.4.4.1-00042.

Signed-off-by: Wen Gong <wgong@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200427080416.8265-2-wgong@codeaurora.org
---
 drivers/net/wireless/ath/ath10k/mac.c     | 5 +++++
 drivers/net/wireless/ath/ath10k/wmi-tlv.c | 1 +
 drivers/net/wireless/ath/ath10k/wmi-tlv.h | 1 +
 drivers/net/wireless/ath/ath10k/wmi.h     | 1 +
 4 files changed, 8 insertions(+)

diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index a1147ccc09bf..0fb082c9d04b 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -2959,6 +2959,11 @@ static void ath10k_bss_assoc(struct ieee80211_hw *hw,
 	arvif->aid = bss_conf->aid;
 	ether_addr_copy(arvif->bssid, bss_conf->bssid);
 
+	ret = ath10k_wmi_pdev_set_param(ar,
+					ar->wmi.pdev_param->peer_stats_info_enable, 1);
+	if (ret)
+		ath10k_warn(ar, "failed to enable peer stats info: %d\n", ret);
+
 	ret = ath10k_wmi_vdev_up(ar, arvif->vdev_id, arvif->aid, arvif->bssid);
 	if (ret) {
 		ath10k_warn(ar, "failed to set vdev %d up: %d\n",
diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.c b/drivers/net/wireless/ath/ath10k/wmi-tlv.c
index e1ab900f2662..27aaa48615d2 100644
--- a/drivers/net/wireless/ath/ath10k/wmi-tlv.c
+++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.c
@@ -4269,6 +4269,7 @@ static struct wmi_pdev_param_map wmi_tlv_pdev_param_map = {
 	.arp_dstaddr = WMI_PDEV_PARAM_UNSUPPORTED,
 	.rfkill_config = WMI_TLV_PDEV_PARAM_HW_RFKILL_CONFIG,
 	.rfkill_enable = WMI_TLV_PDEV_PARAM_RFKILL_ENABLE,
+	.peer_stats_info_enable = WMI_TLV_PDEV_PARAM_PEER_STATS_INFO_ENABLE,
 };
 
 static struct wmi_peer_param_map wmi_tlv_peer_param_map = {
diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.h b/drivers/net/wireless/ath/ath10k/wmi-tlv.h
index 4972dc12991c..cd400b19a64d 100644
--- a/drivers/net/wireless/ath/ath10k/wmi-tlv.h
+++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.h
@@ -451,6 +451,7 @@ enum wmi_tlv_pdev_param {
 	WMI_TLV_PDEV_PARAM_VDEV_RATE_STATS_UPDATE_PERIOD,
 	WMI_TLV_PDEV_PARAM_TXPOWER_REASON_NONE,
 	WMI_TLV_PDEV_PARAM_TXPOWER_REASON_SAR,
+	WMI_TLV_PDEV_PARAM_PEER_STATS_INFO_ENABLE = 0x8b,
 	WMI_TLV_PDEV_PARAM_TXPOWER_REASON_MAX,
 };
 
diff --git a/drivers/net/wireless/ath/ath10k/wmi.h b/drivers/net/wireless/ath/ath10k/wmi.h
index 209070714d1a..46740e16f3ce 100644
--- a/drivers/net/wireless/ath/ath10k/wmi.h
+++ b/drivers/net/wireless/ath/ath10k/wmi.h
@@ -3798,6 +3798,7 @@ struct wmi_pdev_param_map {
 	u32 enable_btcoex;
 	u32 rfkill_config;
 	u32 rfkill_enable;
+	u32 peer_stats_info_enable;
 };
 
 #define WMI_PDEV_PARAM_UNSUPPORTED 0
-- 
cgit v1.2.3-59-g8ed1b


From 0f7cb26830a6e740455a7064e46ff1e926197ecb Mon Sep 17 00:00:00 2001
From: Wen Gong <wgong@codeaurora.org>
Date: Mon, 27 Apr 2020 16:04:14 +0800
Subject: ath10k: add rx bitrate report for SDIO

For SDIO chip, its rx indication is struct htt_rx_indication_hl, which
does not include the bitrate info as well as PCIe, for PCIe, it use
function ath10k_htt_rx_h_rates to parse the bitrate info in struct
rx_ppdu_start and then report it to mac80211 via ieee80211_rx_status.

SDIO does not have the same info as PCIe, then iw command can not get
the rx bitrate by "iw wlan0 station dump".

for example, it always show 6.0 MBit/s
localhost ~ # iw wlan0 link
Connected to 3c:28:6d:96:fd:69 (on wlan0)
	SSID: kukui_test
	freq: 5180
	RX: 111800 bytes (595 packets)
	TX: 35419 bytes (202 packets)
	signal: -41 dBm
	rx bitrate: 6.0 MBit/s

This patch is to send WMI_TLV_REQUEST_PEER_STATS_INFO_CMDID to firmware
for ath10k_sta_statistics and save the rx bitrate for WMI event
WMI_TLV_PEER_STATS_INFO_EVENTID.

This patch only effect SDIO chip, ath10k_mac_sta_get_peer_stats_info
has check for bitrate_statistics of hw_params, this patch only enable
it for "qca6174 hw3.2 sdio".

Tested with QCA6174 SDIO firmware WLAN.RMH.4.4.1-00042.

Signed-off-by: Wen Gong <wgong@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200427080416.8265-3-wgong@codeaurora.org
---
 drivers/net/wireless/ath/ath10k/core.c    |   2 +
 drivers/net/wireless/ath/ath10k/core.h    |   3 +
 drivers/net/wireless/ath/ath10k/hw.h      |   3 +
 drivers/net/wireless/ath/ath10k/mac.c     |  40 ++++++++++
 drivers/net/wireless/ath/ath10k/wmi-ops.h |  30 ++++++++
 drivers/net/wireless/ath/ath10k/wmi-tlv.c | 118 ++++++++++++++++++++++++++++++
 drivers/net/wireless/ath/ath10k/wmi-tlv.h | 100 +++++++++++++++++++++++++
 drivers/net/wireless/ath/ath10k/wmi.h     |   8 ++
 8 files changed, 304 insertions(+)

diff --git a/drivers/net/wireless/ath/ath10k/core.c b/drivers/net/wireless/ath/ath10k/core.c
index d96d178b4980..22b6937ac225 100644
--- a/drivers/net/wireless/ath/ath10k/core.c
+++ b/drivers/net/wireless/ath/ath10k/core.c
@@ -190,6 +190,7 @@ static const struct ath10k_hw_params ath10k_hw_params_list[] = {
 		.uart_pin_workaround = true,
 		.tx_stats_over_pktlog = false,
 		.bmi_large_size_download = true,
+		.supports_peer_stats_info = true,
 	},
 	{
 		.id = QCA6174_HW_2_1_VERSION,
@@ -3277,6 +3278,7 @@ struct ath10k *ath10k_core_create(size_t priv_size, struct device *dev,
 	init_completion(&ar->thermal.wmi_sync);
 	init_completion(&ar->bss_survey_done);
 	init_completion(&ar->peer_delete_done);
+	init_completion(&ar->peer_stats_info_complete);
 
 	INIT_DELAYED_WORK(&ar->scan.timeout, ath10k_scan_timeout_work);
 
diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h
index 07935d39d6d6..11d9132be4fd 100644
--- a/drivers/net/wireless/ath/ath10k/core.h
+++ b/drivers/net/wireless/ath/ath10k/core.h
@@ -504,6 +504,8 @@ struct ath10k_sta {
 	u32 tx_failed;
 	u32 last_tx_bitrate;
 
+	u32 rx_rate_code;
+	u32 rx_bitrate_kbps;
 	struct work_struct update_wk;
 	u64 rx_duration;
 	struct ath10k_htt_tx_stats *tx_stats;
@@ -1089,6 +1091,7 @@ struct ath10k {
 	int last_wmi_vdev_start_status;
 	struct completion vdev_setup_done;
 	struct completion vdev_delete_done;
+	struct completion peer_stats_info_complete;
 
 	struct workqueue_struct *workqueue;
 	/* Auxiliary workqueue */
diff --git a/drivers/net/wireless/ath/ath10k/hw.h b/drivers/net/wireless/ath/ath10k/hw.h
index 2a7af5861788..d9907a4648a8 100644
--- a/drivers/net/wireless/ath/ath10k/hw.h
+++ b/drivers/net/wireless/ath/ath10k/hw.h
@@ -623,6 +623,9 @@ struct ath10k_hw_params {
 
 	/* tx stats support over pktlog */
 	bool tx_stats_over_pktlog;
+
+	/* provides bitrates for sta_statistics using WMI_TLV_PEER_STATS_INFO_EVENTID */
+	bool supports_peer_stats_info;
 };
 
 struct htt_rx_desc;
diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index 0fb082c9d04b..d0401d3adde4 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -8305,6 +8305,44 @@ static void ath10k_mac_op_sta_pre_rcu_remove(struct ieee80211_hw *hw,
 			peer->removed = true;
 }
 
+static void ath10k_mac_sta_get_peer_stats_info(struct ath10k *ar,
+					       struct ieee80211_sta *sta,
+					       struct station_info *sinfo)
+{
+	struct ath10k_sta *arsta = (struct ath10k_sta *)sta->drv_priv;
+	struct ath10k_peer *peer;
+	unsigned long time_left;
+	int ret;
+
+	if (!(ar->hw_params.supports_peer_stats_info &&
+	      arsta->arvif->vdev_type == WMI_VDEV_TYPE_STA))
+		return;
+
+	spin_lock_bh(&ar->data_lock);
+	peer = ath10k_peer_find(ar, arsta->arvif->vdev_id, sta->addr);
+	spin_unlock_bh(&ar->data_lock);
+	if (!peer)
+		return;
+
+	reinit_completion(&ar->peer_stats_info_complete);
+
+	ret = ath10k_wmi_request_peer_stats_info(ar,
+						 arsta->arvif->vdev_id,
+						 WMI_REQUEST_ONE_PEER_STATS_INFO,
+						 arsta->arvif->bssid,
+						 0);
+	if (ret && ret != -EOPNOTSUPP) {
+		ath10k_warn(ar, "could not request peer stats info: %d\n", ret);
+		return;
+	}
+
+	time_left = wait_for_completion_timeout(&ar->peer_stats_info_complete, 3 * HZ);
+	if (time_left == 0) {
+		ath10k_warn(ar, "timed out waiting peer stats info\n");
+		return;
+	}
+}
+
 static void ath10k_sta_statistics(struct ieee80211_hw *hw,
 				  struct ieee80211_vif *vif,
 				  struct ieee80211_sta *sta,
@@ -8340,6 +8378,8 @@ static void ath10k_sta_statistics(struct ieee80211_hw *hw,
 		sinfo->tx_failed = arsta->tx_failed;
 		sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_FAILED);
 	}
+
+	ath10k_mac_sta_get_peer_stats_info(ar, sta, sinfo);
 }
 
 static const struct ieee80211_ops ath10k_ops = {
diff --git a/drivers/net/wireless/ath/ath10k/wmi-ops.h b/drivers/net/wireless/ath/ath10k/wmi-ops.h
index 1491c25518bb..6b730f59fd5b 100644
--- a/drivers/net/wireless/ath/ath10k/wmi-ops.h
+++ b/drivers/net/wireless/ath/ath10k/wmi-ops.h
@@ -126,6 +126,13 @@ struct wmi_ops {
 	struct sk_buff *(*gen_pdev_set_wmm)(struct ath10k *ar,
 					    const struct wmi_wmm_params_all_arg *arg);
 	struct sk_buff *(*gen_request_stats)(struct ath10k *ar, u32 stats_mask);
+	struct sk_buff *(*gen_request_peer_stats_info)(struct ath10k *ar,
+						       u32 vdev_id,
+						       enum
+						       wmi_peer_stats_info_request_type
+						       type,
+						       u8 *addr,
+						       u32 reset);
 	struct sk_buff *(*gen_force_fw_hang)(struct ath10k *ar,
 					     enum wmi_force_fw_hang_type type,
 					     u32 delay_ms);
@@ -1064,6 +1071,29 @@ ath10k_wmi_request_stats(struct ath10k *ar, u32 stats_mask)
 	return ath10k_wmi_cmd_send(ar, skb, ar->wmi.cmd->request_stats_cmdid);
 }
 
+static inline int
+ath10k_wmi_request_peer_stats_info(struct ath10k *ar,
+				   u32 vdev_id,
+				   enum wmi_peer_stats_info_request_type type,
+				   u8 *addr,
+				   u32 reset)
+{
+	struct sk_buff *skb;
+
+	if (!ar->wmi.ops->gen_request_peer_stats_info)
+		return -EOPNOTSUPP;
+
+	skb = ar->wmi.ops->gen_request_peer_stats_info(ar,
+						       vdev_id,
+						       type,
+						       addr,
+						       reset);
+	if (IS_ERR(skb))
+		return PTR_ERR(skb);
+
+	return ath10k_wmi_cmd_send(ar, skb, ar->wmi.cmd->request_peer_stats_info_cmdid);
+}
+
 static inline int
 ath10k_wmi_force_fw_hang(struct ath10k *ar,
 			 enum wmi_force_fw_hang_type type, u32 delay_ms)
diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.c b/drivers/net/wireless/ath/ath10k/wmi-tlv.c
index 27aaa48615d2..eec1f1f27dec 100644
--- a/drivers/net/wireless/ath/ath10k/wmi-tlv.c
+++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.c
@@ -219,6 +219,89 @@ static void ath10k_wmi_tlv_event_vdev_delete_resp(struct ath10k *ar,
 	complete(&ar->vdev_delete_done);
 }
 
+static int ath10k_wmi_tlv_parse_peer_stats_info(struct ath10k *ar, u16 tag, u16 len,
+						const void *ptr, void *data)
+{
+	const struct wmi_tlv_peer_stats_info *stat = ptr;
+	struct ieee80211_sta *sta;
+	struct ath10k_sta *arsta;
+
+	if (tag != WMI_TLV_TAG_STRUCT_PEER_STATS_INFO)
+		return -EPROTO;
+
+	ath10k_dbg(ar, ATH10K_DBG_WMI,
+		   "wmi tlv stats peer addr %pMF rx rate code 0x%x bit rate %d kbps\n",
+		   stat->peer_macaddr.addr,
+		   __le32_to_cpu(stat->last_rx_rate_code),
+		   __le32_to_cpu(stat->last_rx_bitrate_kbps));
+
+	ath10k_dbg(ar, ATH10K_DBG_WMI,
+		   "wmi tlv stats tx rate code 0x%x bit rate %d kbps\n",
+		   __le32_to_cpu(stat->last_tx_rate_code),
+		   __le32_to_cpu(stat->last_tx_bitrate_kbps));
+
+	sta = ieee80211_find_sta_by_ifaddr(ar->hw, stat->peer_macaddr.addr, NULL);
+	if (!sta) {
+		ath10k_warn(ar, "not found station for peer stats\n");
+		return -EINVAL;
+	}
+
+	arsta = (struct ath10k_sta *)sta->drv_priv;
+	arsta->rx_rate_code = __le32_to_cpu(stat->last_rx_rate_code);
+	arsta->rx_bitrate_kbps = __le32_to_cpu(stat->last_rx_bitrate_kbps);
+
+	return 0;
+}
+
+static int ath10k_wmi_tlv_op_pull_peer_stats_info(struct ath10k *ar,
+						  struct sk_buff *skb)
+{
+	const void **tb;
+	const struct wmi_tlv_peer_stats_info_ev *ev;
+	const void *data;
+	u32 num_peer_stats;
+	int ret;
+
+	tb = ath10k_wmi_tlv_parse_alloc(ar, skb->data, skb->len, GFP_ATOMIC);
+	if (IS_ERR(tb)) {
+		ret = PTR_ERR(tb);
+		ath10k_warn(ar, "failed to parse tlv: %d\n", ret);
+		return ret;
+	}
+
+	ev = tb[WMI_TLV_TAG_STRUCT_PEER_STATS_INFO_EVENT];
+	data = tb[WMI_TLV_TAG_ARRAY_STRUCT];
+
+	if (!ev || !data) {
+		kfree(tb);
+		return -EPROTO;
+	}
+
+	num_peer_stats = __le32_to_cpu(ev->num_peers);
+
+	ath10k_dbg(ar, ATH10K_DBG_WMI,
+		   "wmi tlv peer stats info update peer vdev id %d peers %i more data %d\n",
+		   __le32_to_cpu(ev->vdev_id),
+		   num_peer_stats,
+		   __le32_to_cpu(ev->more_data));
+
+	ret = ath10k_wmi_tlv_iter(ar, data, ath10k_wmi_tlv_len(data),
+				  ath10k_wmi_tlv_parse_peer_stats_info, NULL);
+	if (ret)
+		ath10k_warn(ar, "failed to parse stats info tlv: %d\n", ret);
+
+	kfree(tb);
+	return 0;
+}
+
+static void ath10k_wmi_tlv_event_peer_stats_info(struct ath10k *ar,
+						 struct sk_buff *skb)
+{
+	ath10k_dbg(ar, ATH10K_DBG_WMI, "WMI_PEER_STATS_INFO_EVENTID\n");
+	ath10k_wmi_tlv_op_pull_peer_stats_info(ar, skb);
+	complete(&ar->peer_stats_info_complete);
+}
+
 static int ath10k_wmi_tlv_event_diag_data(struct ath10k *ar,
 					  struct sk_buff *skb)
 {
@@ -576,6 +659,9 @@ static void ath10k_wmi_tlv_op_rx(struct ath10k *ar, struct sk_buff *skb)
 	case WMI_TLV_UPDATE_STATS_EVENTID:
 		ath10k_wmi_event_update_stats(ar, skb);
 		break;
+	case WMI_TLV_PEER_STATS_INFO_EVENTID:
+		ath10k_wmi_tlv_event_peer_stats_info(ar, skb);
+		break;
 	case WMI_TLV_VDEV_START_RESP_EVENTID:
 		ath10k_wmi_event_vdev_start_resp(ar, skb);
 		break;
@@ -2897,6 +2983,36 @@ ath10k_wmi_tlv_op_gen_request_stats(struct ath10k *ar, u32 stats_mask)
 	return skb;
 }
 
+static struct sk_buff *
+ath10k_wmi_tlv_op_gen_request_peer_stats_info(struct ath10k *ar,
+					      u32 vdev_id,
+					      enum wmi_peer_stats_info_request_type type,
+					      u8 *addr,
+					      u32 reset)
+{
+	struct wmi_tlv_request_peer_stats_info *cmd;
+	struct wmi_tlv *tlv;
+	struct sk_buff *skb;
+
+	skb = ath10k_wmi_alloc_skb(ar, sizeof(*tlv) + sizeof(*cmd));
+	if (!skb)
+		return ERR_PTR(-ENOMEM);
+
+	tlv = (void *)skb->data;
+	tlv->tag = __cpu_to_le16(WMI_TLV_TAG_STRUCT_REQUEST_PEER_STATS_INFO_CMD);
+	tlv->len = __cpu_to_le16(sizeof(*cmd));
+	cmd = (void *)tlv->value;
+	cmd->vdev_id = __cpu_to_le32(vdev_id);
+	cmd->request_type = __cpu_to_le32(type);
+
+	if (type == WMI_REQUEST_ONE_PEER_STATS_INFO)
+		ether_addr_copy(cmd->peer_macaddr.addr, addr);
+
+	cmd->reset_after_request = reset;
+	ath10k_dbg(ar, ATH10K_DBG_WMI, "wmi tlv request peer stats info\n");
+	return skb;
+}
+
 static int
 ath10k_wmi_mgmt_tx_alloc_msdu_id(struct ath10k *ar, struct sk_buff *skb,
 				 dma_addr_t paddr)
@@ -4113,6 +4229,7 @@ static struct wmi_cmd_map wmi_tlv_cmd_map = {
 	.vdev_spectral_scan_configure_cmdid = WMI_TLV_SPECTRAL_SCAN_CONF_CMDID,
 	.vdev_spectral_scan_enable_cmdid = WMI_TLV_SPECTRAL_SCAN_ENABLE_CMDID,
 	.request_stats_cmdid = WMI_TLV_REQUEST_STATS_CMDID,
+	.request_peer_stats_info_cmdid = WMI_TLV_REQUEST_PEER_STATS_INFO_CMDID,
 	.set_arp_ns_offload_cmdid = WMI_TLV_SET_ARP_NS_OFFLOAD_CMDID,
 	.network_list_offload_config_cmdid =
 				WMI_TLV_NETWORK_LIST_OFFLOAD_CONFIG_CMDID,
@@ -4417,6 +4534,7 @@ static const struct wmi_ops wmi_tlv_ops = {
 	.gen_beacon_dma = ath10k_wmi_tlv_op_gen_beacon_dma,
 	.gen_pdev_set_wmm = ath10k_wmi_tlv_op_gen_pdev_set_wmm,
 	.gen_request_stats = ath10k_wmi_tlv_op_gen_request_stats,
+	.gen_request_peer_stats_info = ath10k_wmi_tlv_op_gen_request_peer_stats_info,
 	.gen_force_fw_hang = ath10k_wmi_tlv_op_gen_force_fw_hang,
 	/* .gen_mgmt_tx = not implemented; HTT is used */
 	.gen_mgmt_tx_send = ath10k_wmi_tlv_op_gen_mgmt_tx_send,
diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.h b/drivers/net/wireless/ath/ath10k/wmi-tlv.h
index cd400b19a64d..2153e2d9a955 100644
--- a/drivers/net/wireless/ath/ath10k/wmi-tlv.h
+++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.h
@@ -198,6 +198,12 @@ enum wmi_tlv_cmd_id {
 	WMI_TLV_REQUEST_LINK_STATS_CMDID,
 	WMI_TLV_START_LINK_STATS_CMDID,
 	WMI_TLV_CLEAR_LINK_STATS_CMDID,
+	WMI_TLV_CGET_FW_MEM_DUMP_CMDID,
+	WMI_TLV_CDEBUG_MESG_FLUSH_CMDID,
+	WMI_TLV_CDIAG_EVENT_LOG_CONFIG_CMDID,
+	WMI_TLV_CREQUEST_WLAN_STATS_CMDID,
+	WMI_TLV_CREQUEST_RCPI_CMDID,
+	WMI_TLV_REQUEST_PEER_STATS_INFO_CMDID,
 	WMI_TLV_SET_ARP_NS_OFFLOAD_CMDID = WMI_TLV_CMD(WMI_TLV_GRP_ARP_NS_OFL),
 	WMI_TLV_ADD_PROACTIVE_ARP_RSP_PATTERN_CMDID,
 	WMI_TLV_DEL_PROACTIVE_ARP_RSP_PATTERN_CMDID,
@@ -338,6 +344,13 @@ enum wmi_tlv_event_id {
 	WMI_TLV_IFACE_LINK_STATS_EVENTID,
 	WMI_TLV_PEER_LINK_STATS_EVENTID,
 	WMI_TLV_RADIO_LINK_STATS_EVENTID,
+	WMI_TLV_UPDATE_FW_MEM_DUMP_EVENTID,
+	WMI_TLV_DIAG_EVENT_LOG_SUPPORTED_EVENTID,
+	WMI_TLV_INST_RSSI_STATS_EVENTID,
+	WMI_TLV_RADIO_TX_POWER_LEVEL_STATS_EVENTID,
+	WMI_TLV_REPORT_STATS_EVENTID,
+	WMI_TLV_UPDATE_RCPI_EVENTID,
+	WMI_TLV_PEER_STATS_INFO_EVENTID,
 	WMI_TLV_NLO_MATCH_EVENTID = WMI_TLV_EV(WMI_TLV_GRP_NLO_OFL),
 	WMI_TLV_NLO_SCAN_COMPLETE_EVENTID,
 	WMI_TLV_APFIND_EVENTID,
@@ -2082,6 +2095,85 @@ struct wmi_tlv_stats_ev {
 	__le32 num_peer_stats_extd;
 } __packed;
 
+struct wmi_tlv_peer_stats_info_ev {
+	__le32 vdev_id;
+	__le32 num_peers;
+	__le32 more_data;
+} __packed;
+
+#define WMI_TLV_MAX_CHAINS 8
+
+struct wmi_tlv_peer_stats_info {
+	struct wmi_mac_addr peer_macaddr;
+	struct {
+		/* lower 32 bits of the tx_bytes value */
+		__le32 low_32;
+		/* upper 32 bits of the tx_bytes value */
+		__le32 high_32;
+	} __packed tx_bytes;
+	struct {
+		/* lower 32 bits of the tx_packets value */
+		__le32 low_32;
+		/* upper 32 bits of the tx_packets value */
+		__le32 high_32;
+	} __packed tx_packets;
+	struct {
+		/* lower 32 bits of the rx_bytes value */
+		__le32 low_32;
+		/* upper 32 bits of the rx_bytes value */
+		__le32 high_32;
+	} __packed rx_bytes;
+	struct {
+		/* lower 32 bits of the rx_packets value */
+		__le32 low_32;
+		/* upper 32 bits of the rx_packets value */
+		__le32 high_32;
+	} __packed rx_packets;
+	__le32 tx_retries;
+	__le32 tx_failed;
+
+	/* rate information, it is output of WMI_ASSEMBLE_RATECODE_V1
+	 *  (in format of 0x1000RRRR)
+	 * The rate-code is a 4-bytes field in which,
+	 * for given rate, nss and preamble
+	 *
+	 * b'31-b'29 unused / reserved
+	 * b'28      indicate the version of rate-code (1 = RATECODE_V1)
+	 * b'27-b'11 unused / reserved
+	 * b'10-b'8  indicate the preamble (0 OFDM, 1 CCK, 2 HT, 3 VHT)
+	 * b'7-b'5   indicate the NSS (0 - 1x1, 1 - 2x2, 2 - 3x3, 3 - 4x4)
+	 * b'4-b'0   indicate the rate, which is indicated as follows:
+	 *	    OFDM :     0: OFDM 48 Mbps
+	 *		       1: OFDM 24 Mbps
+	 *		       2: OFDM 12 Mbps
+	 *		       3: OFDM 6 Mbps
+	 *		       4: OFDM 54 Mbps
+	 *		       5: OFDM 36 Mbps
+	 *		       6: OFDM 18 Mbps
+	 *		       7: OFDM 9 Mbps
+	 *	   CCK (pream == 1)
+	 *		       0: CCK 11 Mbps Long
+	 *		       1: CCK 5.5 Mbps Long
+	 *		       2: CCK 2 Mbps Long
+	 *		       3: CCK 1 Mbps Long
+	 *		       4: CCK 11 Mbps Short
+	 *		       5: CCK 5.5 Mbps Short
+	 *		       6: CCK 2 Mbps Short
+	 *	   HT/VHT (pream == 2/3)
+	 *		       0..7: MCS0..MCS7 (HT)
+	 *		       0..9: MCS0..MCS9 (11AC VHT)
+	 *		       0..11: MCS0..MCS11 (11AX VHT)
+	 * rate-code of the last transmission
+	 */
+	__le32 last_tx_rate_code;
+	__le32 last_rx_rate_code;
+	__le32 last_tx_bitrate_kbps;
+	__le32 last_rx_bitrate_kbps;
+	__le32 peer_rssi;
+	__le32 tx_succeed;
+	__le32 peer_rssi_per_chain[WMI_TLV_MAX_CHAINS];
+} __packed;
+
 struct wmi_tlv_p2p_noa_ev {
 	__le32 vdev_id;
 } __packed;
@@ -2098,6 +2190,14 @@ struct wmi_tlv_wow_add_del_event_cmd {
 	__le32 event_bitmap;
 } __packed;
 
+struct wmi_tlv_request_peer_stats_info {
+	__le32 request_type;
+	__le32 vdev_id;
+	/* peer MAC address */
+	struct wmi_mac_addr peer_macaddr;
+	__le32 reset_after_request;
+} __packed;
+
 /* Command to set/unset chip in quiet mode */
 struct wmi_tlv_set_quiet_cmd {
 	__le32 vdev_id;
diff --git a/drivers/net/wireless/ath/ath10k/wmi.h b/drivers/net/wireless/ath/ath10k/wmi.h
index 46740e16f3ce..0f05405bebc0 100644
--- a/drivers/net/wireless/ath/ath10k/wmi.h
+++ b/drivers/net/wireless/ath/ath10k/wmi.h
@@ -940,6 +940,7 @@ struct wmi_cmd_map {
 	u32 vdev_spectral_scan_configure_cmdid;
 	u32 vdev_spectral_scan_enable_cmdid;
 	u32 request_stats_cmdid;
+	u32 request_peer_stats_info_cmdid;
 	u32 set_arp_ns_offload_cmdid;
 	u32 network_list_offload_config_cmdid;
 	u32 gtk_offload_cmdid;
@@ -4579,6 +4580,13 @@ struct wmi_request_stats_cmd {
 	struct wlan_inst_rssi_args inst_rssi_args;
 } __packed;
 
+enum wmi_peer_stats_info_request_type {
+	/* request stats of one specified peer */
+	WMI_REQUEST_ONE_PEER_STATS_INFO = 0x01,
+	/* request stats of all peers belong to specified VDEV */
+	WMI_REQUEST_VDEV_ALL_PEER_STATS_INFO = 0x02,
+};
+
 /* Suspend option */
 enum {
 	/* suspend */
-- 
cgit v1.2.3-59-g8ed1b


From 3344b99d69ab6b479c5a54c7b72c325aaa4bdad0 Mon Sep 17 00:00:00 2001
From: Wen Gong <wgong@codeaurora.org>
Date: Mon, 27 Apr 2020 16:04:15 +0800
Subject: ath10k: add bitrate parse for peer stats info

The rate code and rate kbps report by WMI_TLV_PEER_STATS_INFO_EVENTID
from firmware contains all the bitrate info which include OFDM, CCK,
HT/VHT, and mac80211 need the struct rate_info which include below
parameters:

flags: bitflag of flags from &enum rate_info_flags
mcs: mcs index if struct describes an HT/VHT/HE rate
legacy: bitrate in 100kbit/s for 802.11abg
nss: number of streams (VHT & HE only)
bw: bandwidth (from &enum rate_info_bw)

For OFDM/CCK, its rate kbps indicate the bitrate, for HT/VHT, mac80211
need the above 5 parameters to cacluate the bitrate and show by iw.

After parse the bitrate info, iw show the correct rx bitrate:

localhost ~ # iw wlan0 link
	rx bitrate: 234.0 MBit/s VHT-MCS 3 80MHz VHT-NSS 2
	rx bitrate: 40.5 MBit/s MCS 2 40MHz
	rx bitrate: 72.2 MBit/s MCS 7 short GI
	rx bitrate: 54.0 MBit/s
	rx bitrate: 48.0 MBit/s

Tested with QCA6174 SDIO with firmware WLAN.RMH.4.4.1-00042.

Signed-off-by: Wen Gong <wgong@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200427080416.8265-4-wgong@codeaurora.org
---
 drivers/net/wireless/ath/ath10k/core.h    |  20 ++++
 drivers/net/wireless/ath/ath10k/mac.c     | 161 ++++++++++++++++++++++++++++++
 drivers/net/wireless/ath/ath10k/wmi-tlv.h |   9 ++
 3 files changed, 190 insertions(+)

diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h
index 11d9132be4fd..1700bf59e8fa 100644
--- a/drivers/net/wireless/ath/ath10k/core.h
+++ b/drivers/net/wireless/ath/ath10k/core.h
@@ -149,6 +149,26 @@ static inline u32 host_interest_item_address(u32 item_offset)
 	return QCA988X_HOST_INTEREST_ADDRESS + item_offset;
 }
 
+enum ath10k_phy_mode {
+	ATH10K_PHY_MODE_LEGACY = 0,
+	ATH10K_PHY_MODE_HT = 1,
+	ATH10K_PHY_MODE_VHT = 2,
+};
+
+/* Data rate 100KBPS based on IE Index */
+struct ath10k_index_ht_data_rate_type {
+	u8   beacon_rate_index;
+	u16  supported_rate[4];
+};
+
+/* Data rate 100KBPS based on IE Index */
+struct ath10k_index_vht_data_rate_type {
+	u8   beacon_rate_index;
+	u16  supported_VHT80_rate[2];
+	u16  supported_VHT40_rate[2];
+	u16  supported_VHT20_rate[2];
+};
+
 struct ath10k_bmi {
 	bool done_sent;
 };
diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index d0401d3adde4..38fc8cb3aac9 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -8305,6 +8305,157 @@ static void ath10k_mac_op_sta_pre_rcu_remove(struct ieee80211_hw *hw,
 			peer->removed = true;
 }
 
+/* HT MCS parameters with Nss = 1 */
+static const struct ath10k_index_ht_data_rate_type supported_ht_mcs_rate_nss1[] = {
+	/* MCS  L20   L40   S20  S40 */
+	{0,  { 65,  135,  72,  150} },
+	{1,  { 130, 270,  144, 300} },
+	{2,  { 195, 405,  217, 450} },
+	{3,  { 260, 540,  289, 600} },
+	{4,  { 390, 810,  433, 900} },
+	{5,  { 520, 1080, 578, 1200} },
+	{6,  { 585, 1215, 650, 1350} },
+	{7,  { 650, 1350, 722, 1500} }
+};
+
+/* HT MCS parameters with Nss = 2 */
+static const struct ath10k_index_ht_data_rate_type supported_ht_mcs_rate_nss2[] = {
+	/* MCS  L20    L40   S20   S40 */
+	{0,  {130,  270,  144,  300} },
+	{1,  {260,  540,  289,  600} },
+	{2,  {390,  810,  433,  900} },
+	{3,  {520,  1080, 578,  1200} },
+	{4,  {780,  1620, 867,  1800} },
+	{5,  {1040, 2160, 1156, 2400} },
+	{6,  {1170, 2430, 1300, 2700} },
+	{7,  {1300, 2700, 1444, 3000} }
+};
+
+/* MCS parameters with Nss = 1 */
+static const struct ath10k_index_vht_data_rate_type supported_vht_mcs_rate_nss1[] = {
+	/* MCS  L80    S80     L40   S40    L20   S20 */
+	{0,  {293,  325},  {135,  150},  {65,   72} },
+	{1,  {585,  650},  {270,  300},  {130,  144} },
+	{2,  {878,  975},  {405,  450},  {195,  217} },
+	{3,  {1170, 1300}, {540,  600},  {260,  289} },
+	{4,  {1755, 1950}, {810,  900},  {390,  433} },
+	{5,  {2340, 2600}, {1080, 1200}, {520,  578} },
+	{6,  {2633, 2925}, {1215, 1350}, {585,  650} },
+	{7,  {2925, 3250}, {1350, 1500}, {650,  722} },
+	{8,  {3510, 3900}, {1620, 1800}, {780,  867} },
+	{9,  {3900, 4333}, {1800, 2000}, {780,  867} }
+};
+
+/*MCS parameters with Nss = 2 */
+static const struct ath10k_index_vht_data_rate_type supported_vht_mcs_rate_nss2[] = {
+	/* MCS  L80    S80     L40   S40    L20   S20 */
+	{0,  {585,  650},  {270,  300},  {130,  144} },
+	{1,  {1170, 1300}, {540,  600},  {260,  289} },
+	{2,  {1755, 1950}, {810,  900},  {390,  433} },
+	{3,  {2340, 2600}, {1080, 1200}, {520,  578} },
+	{4,  {3510, 3900}, {1620, 1800}, {780,  867} },
+	{5,  {4680, 5200}, {2160, 2400}, {1040, 1156} },
+	{6,  {5265, 5850}, {2430, 2700}, {1170, 1300} },
+	{7,  {5850, 6500}, {2700, 3000}, {1300, 1444} },
+	{8,  {7020, 7800}, {3240, 3600}, {1560, 1733} },
+	{9,  {7800, 8667}, {3600, 4000}, {1560, 1733} }
+};
+
+static void ath10k_mac_get_rate_flags_ht(struct ath10k *ar, u32 rate, u8 nss, u8 mcs,
+					 u8 *flags, u8 *bw)
+{
+	struct ath10k_index_ht_data_rate_type *mcs_rate;
+
+	mcs_rate = (struct ath10k_index_ht_data_rate_type *)
+		   ((nss == 1) ? &supported_ht_mcs_rate_nss1 :
+		   &supported_ht_mcs_rate_nss2);
+
+	if (rate == mcs_rate[mcs].supported_rate[0]) {
+		*bw = RATE_INFO_BW_20;
+	} else if (rate == mcs_rate[mcs].supported_rate[1]) {
+		*bw |= RATE_INFO_BW_40;
+	} else if (rate == mcs_rate[mcs].supported_rate[2]) {
+		*bw |= RATE_INFO_BW_20;
+		*flags |= RATE_INFO_FLAGS_SHORT_GI;
+	} else if (rate == mcs_rate[mcs].supported_rate[3]) {
+		*bw |= RATE_INFO_BW_40;
+		*flags |= RATE_INFO_FLAGS_SHORT_GI;
+	} else {
+		ath10k_warn(ar, "invalid ht params rate %d 100kbps nss %d mcs %d",
+			    rate, nss, mcs);
+	}
+}
+
+static void ath10k_mac_get_rate_flags_vht(struct ath10k *ar, u32 rate, u8 nss, u8 mcs,
+					  u8 *flags, u8 *bw)
+{
+	struct ath10k_index_vht_data_rate_type *mcs_rate;
+
+	mcs_rate = (struct ath10k_index_vht_data_rate_type *)
+		   ((nss == 1) ? &supported_vht_mcs_rate_nss1 :
+		   &supported_vht_mcs_rate_nss2);
+
+	if (rate == mcs_rate[mcs].supported_VHT80_rate[0]) {
+		*bw = RATE_INFO_BW_80;
+	} else if (rate == mcs_rate[mcs].supported_VHT80_rate[1]) {
+		*bw = RATE_INFO_BW_80;
+		*flags |= RATE_INFO_FLAGS_SHORT_GI;
+	} else if (rate == mcs_rate[mcs].supported_VHT40_rate[0]) {
+		*bw = RATE_INFO_BW_40;
+	} else if (rate == mcs_rate[mcs].supported_VHT40_rate[1]) {
+		*bw = RATE_INFO_BW_40;
+		*flags |= RATE_INFO_FLAGS_SHORT_GI;
+	} else if (rate == mcs_rate[mcs].supported_VHT20_rate[0]) {
+		*bw = RATE_INFO_BW_20;
+	} else if (rate == mcs_rate[mcs].supported_VHT20_rate[1]) {
+		*bw = RATE_INFO_BW_20;
+		*flags |= RATE_INFO_FLAGS_SHORT_GI;
+	} else {
+		ath10k_warn(ar, "invalid vht params rate %d 100kbps nss %d mcs %d",
+			    rate, nss, mcs);
+	}
+}
+
+static void ath10k_mac_get_rate_flags(struct ath10k *ar, u32 rate,
+				      enum ath10k_phy_mode mode, u8 nss, u8 mcs,
+				      u8 *flags, u8 *bw)
+{
+	if (mode == ATH10K_PHY_MODE_HT) {
+		*flags = RATE_INFO_FLAGS_MCS;
+		ath10k_mac_get_rate_flags_ht(ar, rate, nss, mcs, flags, bw);
+	} else if (mode == ATH10K_PHY_MODE_VHT) {
+		*flags = RATE_INFO_FLAGS_VHT_MCS;
+		ath10k_mac_get_rate_flags_vht(ar, rate, nss, mcs, flags, bw);
+	}
+}
+
+static void ath10k_mac_parse_bitrate(struct ath10k *ar, u32 rate_code,
+				     u32 bitrate_kbps, struct rate_info *rate)
+{
+	enum ath10k_phy_mode mode = ATH10K_PHY_MODE_LEGACY;
+	enum wmi_rate_preamble preamble = WMI_TLV_GET_HW_RC_PREAM_V1(rate_code);
+	u8 nss = WMI_TLV_GET_HW_RC_NSS_V1(rate_code) + 1;
+	u8 mcs = WMI_TLV_GET_HW_RC_RATE_V1(rate_code);
+	u8 flags = 0, bw = 0;
+
+	if (preamble == WMI_RATE_PREAMBLE_HT)
+		mode = ATH10K_PHY_MODE_HT;
+	else if (preamble == WMI_RATE_PREAMBLE_VHT)
+		mode = ATH10K_PHY_MODE_VHT;
+
+	ath10k_mac_get_rate_flags(ar, bitrate_kbps / 100, mode, nss, mcs, &flags, &bw);
+
+	ath10k_dbg(ar, ATH10K_DBG_MAC,
+		   "mac parse bitrate preamble %d mode %d nss %d mcs %d flags %x bw %d\n",
+		   preamble, mode, nss, mcs, flags, bw);
+
+	rate->flags = flags;
+	rate->bw = bw;
+	rate->legacy = bitrate_kbps / 100;
+	rate->nss = nss;
+	rate->mcs = mcs;
+}
+
 static void ath10k_mac_sta_get_peer_stats_info(struct ath10k *ar,
 					       struct ieee80211_sta *sta,
 					       struct station_info *sinfo)
@@ -8341,6 +8492,16 @@ static void ath10k_mac_sta_get_peer_stats_info(struct ath10k *ar,
 		ath10k_warn(ar, "timed out waiting peer stats info\n");
 		return;
 	}
+
+	if (arsta->rx_rate_code != 0 && arsta->rx_bitrate_kbps != 0) {
+		ath10k_mac_parse_bitrate(ar, arsta->rx_rate_code,
+					 arsta->rx_bitrate_kbps,
+					 &sinfo->rxrate);
+
+		sinfo->filled |= BIT_ULL(NL80211_STA_INFO_RX_BITRATE);
+		arsta->rx_rate_code = 0;
+		arsta->rx_bitrate_kbps = 0;
+	}
 }
 
 static void ath10k_sta_statistics(struct ieee80211_hw *hw,
diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.h b/drivers/net/wireless/ath/ath10k/wmi-tlv.h
index 2153e2d9a955..6e0537dabd1d 100644
--- a/drivers/net/wireless/ath/ath10k/wmi-tlv.h
+++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.h
@@ -2174,6 +2174,15 @@ struct wmi_tlv_peer_stats_info {
 	__le32 peer_rssi_per_chain[WMI_TLV_MAX_CHAINS];
 } __packed;
 
+#define HW_RATECODE_PREAM_V1_MASK GENMASK(10, 8)
+#define WMI_TLV_GET_HW_RC_PREAM_V1(rc) FIELD_GET(HW_RATECODE_PREAM_V1_MASK, rc)
+
+#define HW_RATECODE_NSS_V1_MASK GENMASK(7, 5)
+#define WMI_TLV_GET_HW_RC_NSS_V1(rc) FIELD_GET(HW_RATECODE_NSS_V1_MASK, rc)
+
+#define HW_RATECODE_RATE_V1_MASK GENMASK(4, 0)
+#define WMI_TLV_GET_HW_RC_RATE_V1(rc) FIELD_GET(HW_RATECODE_RATE_V1_MASK, rc)
+
 struct wmi_tlv_p2p_noa_ev {
 	__le32 vdev_id;
 } __packed;
-- 
cgit v1.2.3-59-g8ed1b


From 4cc02c7c14944b16020c8da44572b3c5d189d386 Mon Sep 17 00:00:00 2001
From: Wen Gong <wgong@codeaurora.org>
Date: Mon, 27 Apr 2020 16:04:16 +0800
Subject: ath10k: correct tx bitrate of iw for SDIO

For legacy mode, tx bitrate not show correct sometimes, for example:
iw wlan0 link
Connected to 8c:21:0a:b3:5a:64 (on wlan0)
        SSID: tplinkgw
        freq: 2462
        RX: 19672 bytes (184 packets)
        TX: 9851 bytes (87 packets)
        signal: -51 dBm
        rx bitrate: 54.0 MBit/s
        tx bitrate: 2.8 MBit/s

This patch use the tx bitrate info from WMI_TLV_PEER_STATS_INFO_EVENTID
report from firmware, and tx bitrate show correct.

iw wlan0 link
Connected to 8c:21:0a:b3:5a:64 (on wlan0)
        SSID: tplinkgw
        freq: 2462
        RX: 13973 bytes (120 packets)
        TX: 6737 bytes (57 packets)
        signal: -52 dBm
        rx bitrate: 54.0 MBit/s
        tx bitrate: 54.0 MBit/s

This patch only effect SDIO chip, ath10k_mac_sta_get_peer_stats_info
has check for bitrate_statistics of hw_params, it is enabled only for
"qca6174 hw3.2 sdio".

Tested with QCA6174 SDIO with firmware WLAN.RMH.4.4.1-00042.

Signed-off-by: Wen Gong <wgong@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200427080416.8265-5-wgong@codeaurora.org
---
 drivers/net/wireless/ath/ath10k/core.h    |  2 ++
 drivers/net/wireless/ath/ath10k/mac.c     | 10 ++++++++++
 drivers/net/wireless/ath/ath10k/wmi-tlv.c |  2 ++
 3 files changed, 14 insertions(+)

diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h
index 1700bf59e8fa..ad6ef8d492c8 100644
--- a/drivers/net/wireless/ath/ath10k/core.h
+++ b/drivers/net/wireless/ath/ath10k/core.h
@@ -526,6 +526,8 @@ struct ath10k_sta {
 
 	u32 rx_rate_code;
 	u32 rx_bitrate_kbps;
+	u32 tx_rate_code;
+	u32 tx_bitrate_kbps;
 	struct work_struct update_wk;
 	u64 rx_duration;
 	struct ath10k_htt_tx_stats *tx_stats;
diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index 38fc8cb3aac9..0b7d510d2725 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -8502,6 +8502,16 @@ static void ath10k_mac_sta_get_peer_stats_info(struct ath10k *ar,
 		arsta->rx_rate_code = 0;
 		arsta->rx_bitrate_kbps = 0;
 	}
+
+	if (arsta->tx_rate_code != 0 && arsta->tx_bitrate_kbps != 0) {
+		ath10k_mac_parse_bitrate(ar, arsta->tx_rate_code,
+					 arsta->tx_bitrate_kbps,
+					 &sinfo->txrate);
+
+		sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BITRATE);
+		arsta->tx_rate_code = 0;
+		arsta->tx_bitrate_kbps = 0;
+	}
 }
 
 static void ath10k_sta_statistics(struct ieee80211_hw *hw,
diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.c b/drivers/net/wireless/ath/ath10k/wmi-tlv.c
index eec1f1f27dec..9187b62b331c 100644
--- a/drivers/net/wireless/ath/ath10k/wmi-tlv.c
+++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.c
@@ -249,6 +249,8 @@ static int ath10k_wmi_tlv_parse_peer_stats_info(struct ath10k *ar, u16 tag, u16
 	arsta = (struct ath10k_sta *)sta->drv_priv;
 	arsta->rx_rate_code = __le32_to_cpu(stat->last_rx_rate_code);
 	arsta->rx_bitrate_kbps = __le32_to_cpu(stat->last_rx_bitrate_kbps);
+	arsta->tx_rate_code = __le32_to_cpu(stat->last_tx_rate_code);
+	arsta->tx_bitrate_kbps = __le32_to_cpu(stat->last_tx_bitrate_kbps);
 
 	return 0;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 69c93f9674c97dc439cdc0527811f8ad104c2e35 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Mon, 27 Apr 2020 09:24:17 +0000
Subject: ath11k: use GFP_ATOMIC under spin lock

A spin lock is taken here so we should use GFP_ATOMIC.

Fixes: d5c65159f289 ("ath11k: driver for Qualcomm IEEE 802.11ax devices")
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200427092417.56236-1-weiyongjun1@huawei.com
---
 drivers/net/wireless/ath/ath11k/dp_rx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.c b/drivers/net/wireless/ath/ath11k/dp_rx.c
index d3d2a335cc40..47ad3bd9e1c6 100644
--- a/drivers/net/wireless/ath/ath11k/dp_rx.c
+++ b/drivers/net/wireless/ath/ath11k/dp_rx.c
@@ -896,7 +896,7 @@ int ath11k_peer_rx_tid_setup(struct ath11k *ar, const u8 *peer_mac, int vdev_id,
 	else
 		hw_desc_sz = ath11k_hal_reo_qdesc_size(DP_BA_WIN_SZ_MAX, tid);
 
-	vaddr = kzalloc(hw_desc_sz + HAL_LINK_DESC_ALIGN - 1, GFP_KERNEL);
+	vaddr = kzalloc(hw_desc_sz + HAL_LINK_DESC_ALIGN - 1, GFP_ATOMIC);
 	if (!vaddr) {
 		spin_unlock_bh(&ab->base_lock);
 		return -ENOMEM;
-- 
cgit v1.2.3-59-g8ed1b


From 0146dca70b877b73c5fd9c67912b8a0ca8a7bac7 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Mon, 27 Apr 2020 17:59:34 +0200
Subject: xfrm: add support for UDPv6 encapsulation of ESP

This patch adds support for encapsulation of ESP over UDPv6. The code
is very similar to the IPv4 encapsulation implementation, and allows
to easily add espintcp on IPv6 as a follow-up.

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/ipv6_stubs.h  |   3 +
 include/net/xfrm.h        |   5 +
 net/ipv4/udp.c            |  10 +-
 net/ipv6/af_inet6.c       |   4 +
 net/ipv6/ah6.c            |   1 +
 net/ipv6/esp6.c           | 226 +++++++++++++++++++++++++++++++++++++++++-----
 net/ipv6/esp6_offload.c   |   7 +-
 net/ipv6/ip6_vti.c        |  18 +++-
 net/ipv6/ipcomp6.c        |   1 +
 net/ipv6/xfrm6_input.c    | 106 +++++++++++++++++++++-
 net/ipv6/xfrm6_protocol.c |  48 ++++++++++
 net/xfrm/xfrm_interface.c |   3 +
 12 files changed, 395 insertions(+), 37 deletions(-)

diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h
index 3e7d2c0e79ca..f033a17b53b6 100644
--- a/include/net/ipv6_stubs.h
+++ b/include/net/ipv6_stubs.h
@@ -56,6 +56,9 @@ struct ipv6_stub {
 	void (*ndisc_send_na)(struct net_device *dev, const struct in6_addr *daddr,
 			      const struct in6_addr *solicited_addr,
 			      bool router, bool solicited, bool override, bool inc_opt);
+#if IS_ENABLED(CONFIG_XFRM)
+	int (*xfrm6_udp_encap_rcv)(struct sock *sk, struct sk_buff *skb);
+#endif
 	struct neigh_table *nd_tbl;
 };
 extern const struct ipv6_stub *ipv6_stub __read_mostly;
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 8f71c111e65a..2577666c34c8 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1406,6 +1406,8 @@ struct xfrm4_protocol {
 
 struct xfrm6_protocol {
 	int (*handler)(struct sk_buff *skb);
+	int (*input_handler)(struct sk_buff *skb, int nexthdr, __be32 spi,
+			     int encap_type);
 	int (*cb_handler)(struct sk_buff *skb, int err);
 	int (*err_handler)(struct sk_buff *skb, struct inet6_skb_parm *opt,
 			   u8 type, u8 code, int offset, __be32 info);
@@ -1590,6 +1592,8 @@ int xfrm6_extract_header(struct sk_buff *skb);
 int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb);
 int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi,
 		  struct ip6_tnl *t);
+int xfrm6_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
+		    int encap_type);
 int xfrm6_transport_finish(struct sk_buff *skb, int async);
 int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t);
 int xfrm6_rcv(struct sk_buff *skb);
@@ -1610,6 +1614,7 @@ int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb,
 
 #ifdef CONFIG_XFRM
 int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
+int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
 int xfrm_user_policy(struct sock *sk, int optname,
 		     u8 __user *optval, int optlen);
 #else
diff --git a/net/ipv4/udp.c b/net/ipv4/udp.c
index 32564b350823..1b7ebbcae497 100644
--- a/net/ipv4/udp.c
+++ b/net/ipv4/udp.c
@@ -112,6 +112,9 @@
 #include <net/sock_reuseport.h>
 #include <net/addrconf.h>
 #include <net/udp_tunnel.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ipv6_stubs.h>
+#endif
 
 struct udp_table udp_table __read_mostly;
 EXPORT_SYMBOL(udp_table);
@@ -2563,7 +2566,12 @@ int udp_lib_setsockopt(struct sock *sk, int level, int optname,
 #ifdef CONFIG_XFRM
 		case UDP_ENCAP_ESPINUDP:
 		case UDP_ENCAP_ESPINUDP_NON_IKE:
-			up->encap_rcv = xfrm4_udp_encap_rcv;
+#if IS_ENABLED(CONFIG_IPV6)
+			if (sk->sk_family == AF_INET6)
+				up->encap_rcv = ipv6_stub->xfrm6_udp_encap_rcv;
+			else
+#endif
+				up->encap_rcv = xfrm4_udp_encap_rcv;
 #endif
 			fallthrough;
 		case UDP_ENCAP_L2TPINUDP:
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 345baa0a754f..b0b99c08350a 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -60,6 +60,7 @@
 #include <net/calipso.h>
 #include <net/seg6.h>
 #include <net/rpl.h>
+#include <net/xfrm.h>
 
 #include <linux/uaccess.h>
 #include <linux/mroute6.h>
@@ -961,6 +962,9 @@ static const struct ipv6_stub ipv6_stub_impl = {
 	.ip6_del_rt	   = ip6_del_rt,
 	.udpv6_encap_enable = udpv6_encap_enable,
 	.ndisc_send_na = ndisc_send_na,
+#if IS_ENABLED(CONFIG_XFRM)
+	.xfrm6_udp_encap_rcv = xfrm6_udp_encap_rcv,
+#endif
 	.nd_tbl	= &nd_tbl,
 };
 
diff --git a/net/ipv6/ah6.c b/net/ipv6/ah6.c
index 45e2adc56610..d88d97617f7e 100644
--- a/net/ipv6/ah6.c
+++ b/net/ipv6/ah6.c
@@ -767,6 +767,7 @@ static const struct xfrm_type ah6_type = {
 
 static struct xfrm6_protocol ah6_protocol = {
 	.handler	=	xfrm6_rcv,
+	.input_handler	=	xfrm_input,
 	.cb_handler	=	ah6_rcv_cb,
 	.err_handler	=	ah6_err,
 	.priority	=	0,
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index 11143d039f16..e8800968e209 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -26,10 +26,12 @@
 #include <linux/random.h>
 #include <linux/slab.h>
 #include <linux/spinlock.h>
+#include <net/ip6_checksum.h>
 #include <net/ip6_route.h>
 #include <net/icmp.h>
 #include <net/ipv6.h>
 #include <net/protocol.h>
+#include <net/udp.h>
 #include <linux/icmpv6.h>
 
 #include <linux/highmem.h>
@@ -39,6 +41,11 @@ struct esp_skb_cb {
 	void *tmp;
 };
 
+struct esp_output_extra {
+	__be32 seqhi;
+	u32 esphoff;
+};
+
 #define ESP_SKB_CB(__skb) ((struct esp_skb_cb *)&((__skb)->cb[0]))
 
 /*
@@ -72,9 +79,9 @@ static void *esp_alloc_tmp(struct crypto_aead *aead, int nfrags, int seqihlen)
 	return kmalloc(len, GFP_ATOMIC);
 }
 
-static inline __be32 *esp_tmp_seqhi(void *tmp)
+static inline void *esp_tmp_extra(void *tmp)
 {
-	return PTR_ALIGN((__be32 *)tmp, __alignof__(__be32));
+	return PTR_ALIGN(tmp, __alignof__(struct esp_output_extra));
 }
 
 static inline u8 *esp_tmp_iv(struct crypto_aead *aead, void *tmp, int seqhilen)
@@ -104,16 +111,17 @@ static inline struct scatterlist *esp_req_sg(struct crypto_aead *aead,
 
 static void esp_ssg_unref(struct xfrm_state *x, void *tmp)
 {
+	struct esp_output_extra *extra = esp_tmp_extra(tmp);
 	struct crypto_aead *aead = x->data;
-	int seqhilen = 0;
+	int extralen = 0;
 	u8 *iv;
 	struct aead_request *req;
 	struct scatterlist *sg;
 
 	if (x->props.flags & XFRM_STATE_ESN)
-		seqhilen += sizeof(__be32);
+		extralen += sizeof(*extra);
 
-	iv = esp_tmp_iv(aead, tmp, seqhilen);
+	iv = esp_tmp_iv(aead, tmp, extralen);
 	req = esp_tmp_req(aead, iv);
 
 	/* Unref skb_frag_pages in the src scatterlist if necessary.
@@ -124,6 +132,23 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp)
 			put_page(sg_page(sg));
 }
 
+static void esp_output_encap_csum(struct sk_buff *skb)
+{
+	/* UDP encap with IPv6 requires a valid checksum */
+	if (*skb_mac_header(skb) == IPPROTO_UDP) {
+		struct udphdr *uh = udp_hdr(skb);
+		struct ipv6hdr *ip6h = ipv6_hdr(skb);
+		int len = ntohs(uh->len);
+		unsigned int offset = skb_transport_offset(skb);
+		__wsum csum = skb_checksum(skb, offset, skb->len - offset, 0);
+
+		uh->check = csum_ipv6_magic(&ip6h->saddr, &ip6h->daddr,
+					    len, IPPROTO_UDP, csum);
+		if (uh->check == 0)
+			uh->check = CSUM_MANGLED_0;
+	}
+}
+
 static void esp_output_done(struct crypto_async_request *base, int err)
 {
 	struct sk_buff *skb = base->data;
@@ -143,6 +168,8 @@ static void esp_output_done(struct crypto_async_request *base, int err)
 	esp_ssg_unref(x, tmp);
 	kfree(tmp);
 
+	esp_output_encap_csum(skb);
+
 	if (xo && (xo->flags & XFRM_DEV_RESUME)) {
 		if (err) {
 			XFRM_INC_STATS(xs_net(x), LINUX_MIB_XFRMOUTSTATEPROTOERROR);
@@ -163,7 +190,7 @@ static void esp_restore_header(struct sk_buff *skb, unsigned int offset)
 {
 	struct ip_esp_hdr *esph = (void *)(skb->data + offset);
 	void *tmp = ESP_SKB_CB(skb)->tmp;
-	__be32 *seqhi = esp_tmp_seqhi(tmp);
+	__be32 *seqhi = esp_tmp_extra(tmp);
 
 	esph->seq_no = esph->spi;
 	esph->spi = *seqhi;
@@ -171,27 +198,36 @@ static void esp_restore_header(struct sk_buff *skb, unsigned int offset)
 
 static void esp_output_restore_header(struct sk_buff *skb)
 {
-	esp_restore_header(skb, skb_transport_offset(skb) - sizeof(__be32));
+	void *tmp = ESP_SKB_CB(skb)->tmp;
+	struct esp_output_extra *extra = esp_tmp_extra(tmp);
+
+	esp_restore_header(skb, skb_transport_offset(skb) + extra->esphoff -
+				sizeof(__be32));
 }
 
 static struct ip_esp_hdr *esp_output_set_esn(struct sk_buff *skb,
 					     struct xfrm_state *x,
 					     struct ip_esp_hdr *esph,
-					     __be32 *seqhi)
+					     struct esp_output_extra *extra)
 {
 	/* For ESN we move the header forward by 4 bytes to
 	 * accomodate the high bits.  We will move it back after
 	 * encryption.
 	 */
 	if ((x->props.flags & XFRM_STATE_ESN)) {
+		__u32 seqhi;
 		struct xfrm_offload *xo = xfrm_offload(skb);
 
-		esph = (void *)(skb_transport_header(skb) - sizeof(__be32));
-		*seqhi = esph->spi;
 		if (xo)
-			esph->seq_no = htonl(xo->seq.hi);
+			seqhi = xo->seq.hi;
 		else
-			esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.hi);
+			seqhi = XFRM_SKB_CB(skb)->seq.output.hi;
+
+		extra->esphoff = (unsigned char *)esph -
+				 skb_transport_header(skb);
+		esph = (struct ip_esp_hdr *)((unsigned char *)esph - 4);
+		extra->seqhi = esph->spi;
+		esph->seq_no = htonl(seqhi);
 	}
 
 	esph->spi = x->id.spi;
@@ -207,15 +243,84 @@ static void esp_output_done_esn(struct crypto_async_request *base, int err)
 	esp_output_done(base, err);
 }
 
+static struct ip_esp_hdr *esp6_output_udp_encap(struct sk_buff *skb,
+					       int encap_type,
+					       struct esp_info *esp,
+					       __be16 sport,
+					       __be16 dport)
+{
+	struct udphdr *uh;
+	__be32 *udpdata32;
+	unsigned int len;
+
+	len = skb->len + esp->tailen - skb_transport_offset(skb);
+	if (len > U16_MAX)
+		return ERR_PTR(-EMSGSIZE);
+
+	uh = (struct udphdr *)esp->esph;
+	uh->source = sport;
+	uh->dest = dport;
+	uh->len = htons(len);
+	uh->check = 0;
+
+	*skb_mac_header(skb) = IPPROTO_UDP;
+
+	if (encap_type == UDP_ENCAP_ESPINUDP_NON_IKE) {
+		udpdata32 = (__be32 *)(uh + 1);
+		udpdata32[0] = udpdata32[1] = 0;
+		return (struct ip_esp_hdr *)(udpdata32 + 2);
+	}
+
+	return (struct ip_esp_hdr *)(uh + 1);
+}
+
+static int esp6_output_encap(struct xfrm_state *x, struct sk_buff *skb,
+			    struct esp_info *esp)
+{
+	struct xfrm_encap_tmpl *encap = x->encap;
+	struct ip_esp_hdr *esph;
+	__be16 sport, dport;
+	int encap_type;
+
+	spin_lock_bh(&x->lock);
+	sport = encap->encap_sport;
+	dport = encap->encap_dport;
+	encap_type = encap->encap_type;
+	spin_unlock_bh(&x->lock);
+
+	switch (encap_type) {
+	default:
+	case UDP_ENCAP_ESPINUDP:
+	case UDP_ENCAP_ESPINUDP_NON_IKE:
+		esph = esp6_output_udp_encap(skb, encap_type, esp, sport, dport);
+		break;
+	}
+
+	if (IS_ERR(esph))
+		return PTR_ERR(esph);
+
+	esp->esph = esph;
+
+	return 0;
+}
+
 int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info *esp)
 {
 	u8 *tail;
 	u8 *vaddr;
 	int nfrags;
+	int esph_offset;
 	struct page *page;
 	struct sk_buff *trailer;
 	int tailen = esp->tailen;
 
+	if (x->encap) {
+		int err = esp6_output_encap(x, skb, esp);
+
+		if (err < 0)
+			return err;
+	}
+
 	if (!skb_cloned(skb)) {
 		if (tailen <= skb_tailroom(skb)) {
 			nfrags = 1;
@@ -274,10 +379,13 @@ int esp6_output_head(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
 	}
 
 cow:
+	esph_offset = (unsigned char *)esp->esph - skb_transport_header(skb);
+
 	nfrags = skb_cow_data(skb, tailen, &trailer);
 	if (nfrags < 0)
 		goto out;
 	tail = skb_tail_pointer(trailer);
+	esp->esph = (struct ip_esp_hdr *)(skb_transport_header(skb) + esph_offset);
 
 skip_cow:
 	esp_output_fill_trailer(tail, esp->tfclen, esp->plen, esp->proto);
@@ -295,20 +403,20 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
 	void *tmp;
 	int ivlen;
 	int assoclen;
-	int seqhilen;
-	__be32 *seqhi;
+	int extralen;
 	struct page *page;
 	struct ip_esp_hdr *esph;
 	struct aead_request *req;
 	struct crypto_aead *aead;
 	struct scatterlist *sg, *dsg;
+	struct esp_output_extra *extra;
 	int err = -ENOMEM;
 
 	assoclen = sizeof(struct ip_esp_hdr);
-	seqhilen = 0;
+	extralen = 0;
 
 	if (x->props.flags & XFRM_STATE_ESN) {
-		seqhilen += sizeof(__be32);
+		extralen += sizeof(*extra);
 		assoclen += sizeof(__be32);
 	}
 
@@ -316,12 +424,12 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
 	alen = crypto_aead_authsize(aead);
 	ivlen = crypto_aead_ivsize(aead);
 
-	tmp = esp_alloc_tmp(aead, esp->nfrags + 2, seqhilen);
+	tmp = esp_alloc_tmp(aead, esp->nfrags + 2, extralen);
 	if (!tmp)
 		goto error;
 
-	seqhi = esp_tmp_seqhi(tmp);
-	iv = esp_tmp_iv(aead, tmp, seqhilen);
+	extra = esp_tmp_extra(tmp);
+	iv = esp_tmp_iv(aead, tmp, extralen);
 	req = esp_tmp_req(aead, iv);
 	sg = esp_req_sg(aead, req);
 
@@ -330,7 +438,8 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
 	else
 		dsg = &sg[esp->nfrags];
 
-	esph = esp_output_set_esn(skb, x, ip_esp_hdr(skb), seqhi);
+	esph = esp_output_set_esn(skb, x, esp->esph, extra);
+	esp->esph = esph;
 
 	sg_init_table(sg, esp->nfrags);
 	err = skb_to_sgvec(skb, sg,
@@ -394,6 +503,7 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
 	case 0:
 		if ((x->props.flags & XFRM_STATE_ESN))
 			esp_output_restore_header(skb);
+		esp_output_encap_csum(skb);
 	}
 
 	if (sg != dsg)
@@ -438,11 +548,13 @@ static int esp6_output(struct xfrm_state *x, struct sk_buff *skb)
 	esp.plen = esp.clen - skb->len - esp.tfclen;
 	esp.tailen = esp.tfclen + esp.plen + alen;
 
+	esp.esph = ip_esp_hdr(skb);
+
 	esp.nfrags = esp6_output_head(x, skb, &esp);
 	if (esp.nfrags < 0)
 		return esp.nfrags;
 
-	esph = ip_esp_hdr(skb);
+	esph = esp.esph;
 	esph->spi = x->id.spi;
 
 	esph->seq_no = htonl(XFRM_SKB_CB(skb)->seq.output.low);
@@ -517,6 +629,56 @@ int esp6_input_done2(struct sk_buff *skb, int err)
 	if (unlikely(err < 0))
 		goto out;
 
+	if (x->encap) {
+		const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+		struct xfrm_encap_tmpl *encap = x->encap;
+		struct udphdr *uh = (void *)(skb_network_header(skb) + hdr_len);
+		__be16 source;
+
+		switch (x->encap->encap_type) {
+		case UDP_ENCAP_ESPINUDP:
+		case UDP_ENCAP_ESPINUDP_NON_IKE:
+			source = uh->source;
+			break;
+		default:
+			WARN_ON_ONCE(1);
+			err = -EINVAL;
+			goto out;
+		}
+
+		/*
+		 * 1) if the NAT-T peer's IP or port changed then
+		 *    advertize the change to the keying daemon.
+		 *    This is an inbound SA, so just compare
+		 *    SRC ports.
+		 */
+		if (!ipv6_addr_equal(&ip6h->saddr, &x->props.saddr.in6) ||
+		    source != encap->encap_sport) {
+			xfrm_address_t ipaddr;
+
+			memcpy(&ipaddr.a6, &ip6h->saddr.s6_addr, sizeof(ipaddr.a6));
+			km_new_mapping(x, &ipaddr, source);
+
+			/* XXX: perhaps add an extra
+			 * policy check here, to see
+			 * if we should allow or
+			 * reject a packet from a
+			 * different source
+			 * address/port.
+			 */
+		}
+
+		/*
+		 * 2) ignore UDP/TCP checksums in case
+		 *    of NAT-T in Transport Mode, or
+		 *    perform other post-processing fixes
+		 *    as per draft-ietf-ipsec-udp-encaps-06,
+		 *    section 3.1.2
+		 */
+		if (x->props.mode == XFRM_MODE_TRANSPORT)
+			skb->ip_summed = CHECKSUM_UNNECESSARY;
+	}
+
 	skb_postpull_rcsum(skb, skb_network_header(skb),
 			   skb_network_header_len(skb));
 	skb_pull_rcsum(skb, hlen);
@@ -632,7 +794,7 @@ skip_cow:
 		goto out;
 
 	ESP_SKB_CB(skb)->tmp = tmp;
-	seqhi = esp_tmp_seqhi(tmp);
+	seqhi = esp_tmp_extra(tmp);
 	iv = esp_tmp_iv(aead, tmp, seqhilen);
 	req = esp_tmp_req(aead, iv);
 	sg = esp_req_sg(aead, req);
@@ -836,9 +998,6 @@ static int esp6_init_state(struct xfrm_state *x)
 	u32 align;
 	int err;
 
-	if (x->encap)
-		return -EINVAL;
-
 	x->data = NULL;
 
 	if (x->aead)
@@ -867,6 +1026,22 @@ static int esp6_init_state(struct xfrm_state *x)
 		break;
 	}
 
+	if (x->encap) {
+		struct xfrm_encap_tmpl *encap = x->encap;
+
+		switch (encap->encap_type) {
+		default:
+			err = -EINVAL;
+			goto error;
+		case UDP_ENCAP_ESPINUDP:
+			x->props.header_len += sizeof(struct udphdr);
+			break;
+		case UDP_ENCAP_ESPINUDP_NON_IKE:
+			x->props.header_len += sizeof(struct udphdr) + 2 * sizeof(u32);
+			break;
+		}
+	}
+
 	align = ALIGN(crypto_aead_blocksize(aead), 4);
 	x->props.trailer_len = align + 1 + crypto_aead_authsize(aead);
 
@@ -893,6 +1068,7 @@ static const struct xfrm_type esp6_type = {
 
 static struct xfrm6_protocol esp6_protocol = {
 	.handler	=	xfrm6_rcv,
+	.input_handler	=	xfrm_input,
 	.cb_handler	=	esp6_rcv_cb,
 	.err_handler	=	esp6_err,
 	.priority	=	0,
diff --git a/net/ipv6/esp6_offload.c b/net/ipv6/esp6_offload.c
index 8eab2c869d61..06163cc15844 100644
--- a/net/ipv6/esp6_offload.c
+++ b/net/ipv6/esp6_offload.c
@@ -271,7 +271,6 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb,  netdev_features
 	int alen;
 	int blksize;
 	struct xfrm_offload *xo;
-	struct ip_esp_hdr *esph;
 	struct crypto_aead *aead;
 	struct esp_info esp;
 	bool hw_offload = true;
@@ -312,13 +311,13 @@ static int esp6_xmit(struct xfrm_state *x, struct sk_buff *skb,  netdev_features
 
 	seq = xo->seq.low;
 
-	esph = ip_esp_hdr(skb);
-	esph->spi = x->id.spi;
+	esp.esph = ip_esp_hdr(skb);
+	esp.esph->spi = x->id.spi;
 
 	skb_push(skb, -skb_network_offset(skb));
 
 	if (xo->flags & XFRM_GSO_SEGMENT) {
-		esph->seq_no = htonl(seq);
+		esp.esph->seq_no = htonl(seq);
 
 		if (!skb_is_gso(skb))
 			xo->seq.low++;
diff --git a/net/ipv6/ip6_vti.c b/net/ipv6/ip6_vti.c
index cc6180e08a4f..1147f647b9a0 100644
--- a/net/ipv6/ip6_vti.c
+++ b/net/ipv6/ip6_vti.c
@@ -296,7 +296,8 @@ static void vti6_dev_uninit(struct net_device *dev)
 	dev_put(dev);
 }
 
-static int vti6_rcv(struct sk_buff *skb)
+static int vti6_input_proto(struct sk_buff *skb, int nexthdr, __be32 spi,
+			    int encap_type)
 {
 	struct ip6_tnl *t;
 	const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
@@ -323,7 +324,10 @@ static int vti6_rcv(struct sk_buff *skb)
 
 		rcu_read_unlock();
 
-		return xfrm6_rcv_tnl(skb, t);
+		XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = t;
+		XFRM_SPI_SKB_CB(skb)->family = AF_INET6;
+		XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr);
+		return xfrm_input(skb, nexthdr, spi, encap_type);
 	}
 	rcu_read_unlock();
 	return -EINVAL;
@@ -332,6 +336,13 @@ discard:
 	return 0;
 }
 
+static int vti6_rcv(struct sk_buff *skb)
+{
+	int nexthdr = skb_network_header(skb)[IP6CB(skb)->nhoff];
+
+	return vti6_input_proto(skb, nexthdr, 0, 0);
+}
+
 static int vti6_rcv_cb(struct sk_buff *skb, int err)
 {
 	unsigned short family;
@@ -1185,6 +1196,7 @@ static struct pernet_operations vti6_net_ops = {
 
 static struct xfrm6_protocol vti_esp6_protocol __read_mostly = {
 	.handler	=	vti6_rcv,
+	.input_handler	=	vti6_input_proto,
 	.cb_handler	=	vti6_rcv_cb,
 	.err_handler	=	vti6_err,
 	.priority	=	100,
@@ -1192,6 +1204,7 @@ static struct xfrm6_protocol vti_esp6_protocol __read_mostly = {
 
 static struct xfrm6_protocol vti_ah6_protocol __read_mostly = {
 	.handler	=	vti6_rcv,
+	.input_handler	=	vti6_input_proto,
 	.cb_handler	=	vti6_rcv_cb,
 	.err_handler	=	vti6_err,
 	.priority	=	100,
@@ -1199,6 +1212,7 @@ static struct xfrm6_protocol vti_ah6_protocol __read_mostly = {
 
 static struct xfrm6_protocol vti_ipcomp6_protocol __read_mostly = {
 	.handler	=	vti6_rcv,
+	.input_handler	=	vti6_input_proto,
 	.cb_handler	=	vti6_rcv_cb,
 	.err_handler	=	vti6_err,
 	.priority	=	100,
diff --git a/net/ipv6/ipcomp6.c b/net/ipv6/ipcomp6.c
index 3752bd3e92ce..99668bfebd85 100644
--- a/net/ipv6/ipcomp6.c
+++ b/net/ipv6/ipcomp6.c
@@ -183,6 +183,7 @@ static const struct xfrm_type ipcomp6_type = {
 
 static struct xfrm6_protocol ipcomp6_protocol = {
 	.handler	= xfrm6_rcv,
+	.input_handler	= xfrm_input,
 	.cb_handler	= ipcomp6_rcv_cb,
 	.err_handler	= ipcomp6_err,
 	.priority	= 0,
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index a52cb3fc6df5..56f52353b324 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -35,9 +35,12 @@ EXPORT_SYMBOL(xfrm6_rcv_spi);
 static int xfrm6_transport_finish2(struct net *net, struct sock *sk,
 				   struct sk_buff *skb)
 {
-	if (xfrm_trans_queue(skb, ip6_rcv_finish))
-		__kfree_skb(skb);
-	return -1;
+	if (xfrm_trans_queue(skb, ip6_rcv_finish)) {
+		kfree_skb(skb);
+		return NET_RX_DROP;
+	}
+
+	return 0;
 }
 
 int xfrm6_transport_finish(struct sk_buff *skb, int async)
@@ -60,13 +63,106 @@ int xfrm6_transport_finish(struct sk_buff *skb, int async)
 	if (xo && (xo->flags & XFRM_GRO)) {
 		skb_mac_header_rebuild(skb);
 		skb_reset_transport_header(skb);
-		return -1;
+		return 0;
 	}
 
 	NF_HOOK(NFPROTO_IPV6, NF_INET_PRE_ROUTING,
 		dev_net(skb->dev), NULL, skb, skb->dev, NULL,
 		xfrm6_transport_finish2);
-	return -1;
+	return 0;
+}
+
+/* If it's a keepalive packet, then just eat it.
+ * If it's an encapsulated packet, then pass it to the
+ * IPsec xfrm input.
+ * Returns 0 if skb passed to xfrm or was dropped.
+ * Returns >0 if skb should be passed to UDP.
+ * Returns <0 if skb should be resubmitted (-ret is protocol)
+ */
+int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb)
+{
+	struct udp_sock *up = udp_sk(sk);
+	struct udphdr *uh;
+	struct ipv6hdr *ip6h;
+	int len;
+	int ip6hlen = sizeof(struct ipv6hdr);
+
+	__u8 *udpdata;
+	__be32 *udpdata32;
+	__u16 encap_type = up->encap_type;
+
+	/* if this is not encapsulated socket, then just return now */
+	if (!encap_type)
+		return 1;
+
+	/* If this is a paged skb, make sure we pull up
+	 * whatever data we need to look at. */
+	len = skb->len - sizeof(struct udphdr);
+	if (!pskb_may_pull(skb, sizeof(struct udphdr) + min(len, 8)))
+		return 1;
+
+	/* Now we can get the pointers */
+	uh = udp_hdr(skb);
+	udpdata = (__u8 *)uh + sizeof(struct udphdr);
+	udpdata32 = (__be32 *)udpdata;
+
+	switch (encap_type) {
+	default:
+	case UDP_ENCAP_ESPINUDP:
+		/* Check if this is a keepalive packet.  If so, eat it. */
+		if (len == 1 && udpdata[0] == 0xff) {
+			goto drop;
+		} else if (len > sizeof(struct ip_esp_hdr) && udpdata32[0] != 0) {
+			/* ESP Packet without Non-ESP header */
+			len = sizeof(struct udphdr);
+		} else
+			/* Must be an IKE packet.. pass it through */
+			return 1;
+		break;
+	case UDP_ENCAP_ESPINUDP_NON_IKE:
+		/* Check if this is a keepalive packet.  If so, eat it. */
+		if (len == 1 && udpdata[0] == 0xff) {
+			goto drop;
+		} else if (len > 2 * sizeof(u32) + sizeof(struct ip_esp_hdr) &&
+			   udpdata32[0] == 0 && udpdata32[1] == 0) {
+
+			/* ESP Packet with Non-IKE marker */
+			len = sizeof(struct udphdr) + 2 * sizeof(u32);
+		} else
+			/* Must be an IKE packet.. pass it through */
+			return 1;
+		break;
+	}
+
+	/* At this point we are sure that this is an ESPinUDP packet,
+	 * so we need to remove 'len' bytes from the packet (the UDP
+	 * header and optional ESP marker bytes) and then modify the
+	 * protocol to ESP, and then call into the transform receiver.
+	 */
+	if (skb_unclone(skb, GFP_ATOMIC))
+		goto drop;
+
+	/* Now we can update and verify the packet length... */
+	ip6h = ipv6_hdr(skb);
+	ip6h->payload_len = htons(ntohs(ip6h->payload_len) - len);
+	if (skb->len < ip6hlen + len) {
+		/* packet is too small!?! */
+		goto drop;
+	}
+
+	/* pull the data buffer up to the ESP header and set the
+	 * transport header to point to ESP.  Keep UDP on the stack
+	 * for later.
+	 */
+	__skb_pull(skb, len);
+	skb_reset_transport_header(skb);
+
+	/* process ESP */
+	return xfrm6_rcv_encap(skb, IPPROTO_ESP, 0, encap_type);
+
+drop:
+	kfree_skb(skb);
+	return 0;
 }
 
 int xfrm6_rcv_tnl(struct sk_buff *skb, struct ip6_tnl *t)
diff --git a/net/ipv6/xfrm6_protocol.c b/net/ipv6/xfrm6_protocol.c
index 34cb65c7d5a7..ea2f805d3b01 100644
--- a/net/ipv6/xfrm6_protocol.c
+++ b/net/ipv6/xfrm6_protocol.c
@@ -14,6 +14,7 @@
 #include <linux/mutex.h>
 #include <linux/skbuff.h>
 #include <linux/icmpv6.h>
+#include <net/ip6_route.h>
 #include <net/ipv6.h>
 #include <net/protocol.h>
 #include <net/xfrm.h>
@@ -58,6 +59,53 @@ static int xfrm6_rcv_cb(struct sk_buff *skb, u8 protocol, int err)
 	return 0;
 }
 
+int xfrm6_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
+		    int encap_type)
+{
+	int ret;
+	struct xfrm6_protocol *handler;
+	struct xfrm6_protocol __rcu **head = proto_handlers(nexthdr);
+
+	XFRM_TUNNEL_SKB_CB(skb)->tunnel.ip6 = NULL;
+	XFRM_SPI_SKB_CB(skb)->family = AF_INET6;
+	XFRM_SPI_SKB_CB(skb)->daddroff = offsetof(struct ipv6hdr, daddr);
+
+	if (!head)
+		goto out;
+
+	if (!skb_dst(skb)) {
+		const struct ipv6hdr *ip6h = ipv6_hdr(skb);
+		int flags = RT6_LOOKUP_F_HAS_SADDR;
+		struct dst_entry *dst;
+		struct flowi6 fl6 = {
+			.flowi6_iif   = skb->dev->ifindex,
+			.daddr        = ip6h->daddr,
+			.saddr        = ip6h->saddr,
+			.flowlabel    = ip6_flowinfo(ip6h),
+			.flowi6_mark  = skb->mark,
+			.flowi6_proto = ip6h->nexthdr,
+		};
+
+		dst = ip6_route_input_lookup(dev_net(skb->dev), skb->dev, &fl6,
+					     skb, flags);
+		if (dst->error)
+			goto drop;
+		skb_dst_set(skb, dst);
+	}
+
+	for_each_protocol_rcu(*head, handler)
+		if ((ret = handler->input_handler(skb, nexthdr, spi, encap_type)) != -EINVAL)
+			return ret;
+
+out:
+	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
+
+drop:
+	kfree_skb(skb);
+	return 0;
+}
+EXPORT_SYMBOL(xfrm6_rcv_encap);
+
 static int xfrm6_esp_rcv(struct sk_buff *skb)
 {
 	int ret;
diff --git a/net/xfrm/xfrm_interface.c b/net/xfrm/xfrm_interface.c
index eb9928c0a87c..02f8f46d0cc5 100644
--- a/net/xfrm/xfrm_interface.c
+++ b/net/xfrm/xfrm_interface.c
@@ -755,6 +755,7 @@ static struct pernet_operations xfrmi_net_ops = {
 
 static struct xfrm6_protocol xfrmi_esp6_protocol __read_mostly = {
 	.handler	=	xfrm6_rcv,
+	.input_handler	=	xfrm_input,
 	.cb_handler	=	xfrmi_rcv_cb,
 	.err_handler	=	xfrmi6_err,
 	.priority	=	10,
@@ -762,6 +763,7 @@ static struct xfrm6_protocol xfrmi_esp6_protocol __read_mostly = {
 
 static struct xfrm6_protocol xfrmi_ah6_protocol __read_mostly = {
 	.handler	=	xfrm6_rcv,
+	.input_handler	=	xfrm_input,
 	.cb_handler	=	xfrmi_rcv_cb,
 	.err_handler	=	xfrmi6_err,
 	.priority	=	10,
@@ -769,6 +771,7 @@ static struct xfrm6_protocol xfrmi_ah6_protocol __read_mostly = {
 
 static struct xfrm6_protocol xfrmi_ipcomp6_protocol __read_mostly = {
 	.handler	=	xfrm6_rcv,
+	.input_handler	=	xfrm_input,
 	.cb_handler	=	xfrmi_rcv_cb,
 	.err_handler	=	xfrmi6_err,
 	.priority	=	10,
-- 
cgit v1.2.3-59-g8ed1b


From 26333c37fc285e7372f1b9461f3ae0ba3dc699c9 Mon Sep 17 00:00:00 2001
From: Sabrina Dubroca <sd@queasysnail.net>
Date: Mon, 27 Apr 2020 17:59:35 +0200
Subject: xfrm: add IPv6 support for espintcp

This extends espintcp to support IPv6, building on the existing code
and the new UDPv6 encapsulation support. Most of the code is either
reused directly (stream parser, ULP) or very similar to the IPv4
variant (net/ipv6/esp6.c changes).

The separation of config options for IPv4 and IPv6 espintcp requires a
bit of Kconfig gymnastics to enable the core code.

Signed-off-by: Sabrina Dubroca <sd@queasysnail.net>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/ipv6_stubs.h |   2 +
 net/ipv4/Kconfig         |   1 +
 net/ipv6/Kconfig         |  12 +++
 net/ipv6/af_inet6.c      |   1 +
 net/ipv6/esp6.c          | 188 ++++++++++++++++++++++++++++++++++++++++++++++-
 net/xfrm/Kconfig         |   3 +
 net/xfrm/Makefile        |   2 +-
 net/xfrm/espintcp.c      |  56 +++++++++++---
 8 files changed, 252 insertions(+), 13 deletions(-)

diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h
index f033a17b53b6..1e9e0cf7dc75 100644
--- a/include/net/ipv6_stubs.h
+++ b/include/net/ipv6_stubs.h
@@ -58,6 +58,8 @@ struct ipv6_stub {
 			      bool router, bool solicited, bool override, bool inc_opt);
 #if IS_ENABLED(CONFIG_XFRM)
 	int (*xfrm6_udp_encap_rcv)(struct sock *sk, struct sk_buff *skb);
+	int (*xfrm6_rcv_encap)(struct sk_buff *skb, int nexthdr, __be32 spi,
+			       int encap_type);
 #endif
 	struct neigh_table *nd_tbl;
 };
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 25a8888826b8..014aaa17dc79 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -384,6 +384,7 @@ config INET_ESPINTCP
 	depends on XFRM && INET_ESP
 	select STREAM_PARSER
 	select NET_SOCK_MSG
+	select XFRM_ESPINTCP
 	help
 	  Support for RFC 8229 encapsulation of ESP and IKE over
 	  TCP/IPv4 sockets.
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 2ccaee98fddb..468a2faadc7d 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -88,6 +88,18 @@ config INET6_ESP_OFFLOAD
 
 	  If unsure, say N.
 
+config INET6_ESPINTCP
+	bool "IPv6: ESP in TCP encapsulation (RFC 8229)"
+	depends on XFRM && INET6_ESP
+	select STREAM_PARSER
+	select NET_SOCK_MSG
+	select XFRM_ESPINTCP
+	help
+	  Support for RFC 8229 encapsulation of ESP and IKE over
+	  TCP/IPv6 sockets.
+
+	  If unsure, say N.
+
 config INET6_IPCOMP
 	tristate "IPv6: IPComp transformation"
 	select INET6_XFRM_TUNNEL
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index b0b99c08350a..cbbb00bad20e 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -964,6 +964,7 @@ static const struct ipv6_stub ipv6_stub_impl = {
 	.ndisc_send_na = ndisc_send_na,
 #if IS_ENABLED(CONFIG_XFRM)
 	.xfrm6_udp_encap_rcv = xfrm6_udp_encap_rcv,
+	.xfrm6_rcv_encap = xfrm6_rcv_encap,
 #endif
 	.nd_tbl	= &nd_tbl,
 };
diff --git a/net/ipv6/esp6.c b/net/ipv6/esp6.c
index e8800968e209..c43592771126 100644
--- a/net/ipv6/esp6.c
+++ b/net/ipv6/esp6.c
@@ -33,6 +33,9 @@
 #include <net/protocol.h>
 #include <net/udp.h>
 #include <linux/icmpv6.h>
+#include <net/tcp.h>
+#include <net/espintcp.h>
+#include <net/inet6_hashtables.h>
 
 #include <linux/highmem.h>
 
@@ -132,6 +135,132 @@ static void esp_ssg_unref(struct xfrm_state *x, void *tmp)
 			put_page(sg_page(sg));
 }
 
+#ifdef CONFIG_INET6_ESPINTCP
+struct esp_tcp_sk {
+	struct sock *sk;
+	struct rcu_head rcu;
+};
+
+static void esp_free_tcp_sk(struct rcu_head *head)
+{
+	struct esp_tcp_sk *esk = container_of(head, struct esp_tcp_sk, rcu);
+
+	sock_put(esk->sk);
+	kfree(esk);
+}
+
+static struct sock *esp6_find_tcp_sk(struct xfrm_state *x)
+{
+	struct xfrm_encap_tmpl *encap = x->encap;
+	struct esp_tcp_sk *esk;
+	__be16 sport, dport;
+	struct sock *nsk;
+	struct sock *sk;
+
+	sk = rcu_dereference(x->encap_sk);
+	if (sk && sk->sk_state == TCP_ESTABLISHED)
+		return sk;
+
+	spin_lock_bh(&x->lock);
+	sport = encap->encap_sport;
+	dport = encap->encap_dport;
+	nsk = rcu_dereference_protected(x->encap_sk,
+					lockdep_is_held(&x->lock));
+	if (sk && sk == nsk) {
+		esk = kmalloc(sizeof(*esk), GFP_ATOMIC);
+		if (!esk) {
+			spin_unlock_bh(&x->lock);
+			return ERR_PTR(-ENOMEM);
+		}
+		RCU_INIT_POINTER(x->encap_sk, NULL);
+		esk->sk = sk;
+		call_rcu(&esk->rcu, esp_free_tcp_sk);
+	}
+	spin_unlock_bh(&x->lock);
+
+	sk = __inet6_lookup_established(xs_net(x), &tcp_hashinfo, &x->id.daddr.in6,
+					dport, &x->props.saddr.in6, ntohs(sport), 0, 0);
+	if (!sk)
+		return ERR_PTR(-ENOENT);
+
+	if (!tcp_is_ulp_esp(sk)) {
+		sock_put(sk);
+		return ERR_PTR(-EINVAL);
+	}
+
+	spin_lock_bh(&x->lock);
+	nsk = rcu_dereference_protected(x->encap_sk,
+					lockdep_is_held(&x->lock));
+	if (encap->encap_sport != sport ||
+	    encap->encap_dport != dport) {
+		sock_put(sk);
+		sk = nsk ?: ERR_PTR(-EREMCHG);
+	} else if (sk == nsk) {
+		sock_put(sk);
+	} else {
+		rcu_assign_pointer(x->encap_sk, sk);
+	}
+	spin_unlock_bh(&x->lock);
+
+	return sk;
+}
+
+static int esp_output_tcp_finish(struct xfrm_state *x, struct sk_buff *skb)
+{
+	struct sock *sk;
+	int err;
+
+	rcu_read_lock();
+
+	sk = esp6_find_tcp_sk(x);
+	err = PTR_ERR_OR_ZERO(sk);
+	if (err)
+		goto out;
+
+	bh_lock_sock(sk);
+	if (sock_owned_by_user(sk))
+		err = espintcp_queue_out(sk, skb);
+	else
+		err = espintcp_push_skb(sk, skb);
+	bh_unlock_sock(sk);
+
+out:
+	rcu_read_unlock();
+	return err;
+}
+
+static int esp_output_tcp_encap_cb(struct net *net, struct sock *sk,
+				   struct sk_buff *skb)
+{
+	struct dst_entry *dst = skb_dst(skb);
+	struct xfrm_state *x = dst->xfrm;
+
+	return esp_output_tcp_finish(x, skb);
+}
+
+static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb)
+{
+	int err;
+
+	local_bh_disable();
+	err = xfrm_trans_queue_net(xs_net(x), skb, esp_output_tcp_encap_cb);
+	local_bh_enable();
+
+	/* EINPROGRESS just happens to do the right thing.  It
+	 * actually means that the skb has been consumed and
+	 * isn't coming back.
+	 */
+	return err ?: -EINPROGRESS;
+}
+#else
+static int esp_output_tail_tcp(struct xfrm_state *x, struct sk_buff *skb)
+{
+	kfree_skb(skb);
+
+	return -EOPNOTSUPP;
+}
+#endif
+
 static void esp_output_encap_csum(struct sk_buff *skb)
 {
 	/* UDP encap with IPv6 requires a valid checksum */
@@ -181,7 +310,11 @@ static void esp_output_done(struct crypto_async_request *base, int err)
 		secpath_reset(skb);
 		xfrm_dev_resume(skb);
 	} else {
-		xfrm_output_resume(skb, err);
+		if (!err &&
+		    x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP)
+			esp_output_tail_tcp(x, skb);
+		else
+			xfrm_output_resume(skb, err);
 	}
 }
 
@@ -274,6 +407,41 @@ static struct ip_esp_hdr *esp6_output_udp_encap(struct sk_buff *skb,
 	return (struct ip_esp_hdr *)(uh + 1);
 }
 
+#ifdef CONFIG_INET6_ESPINTCP
+static struct ip_esp_hdr *esp6_output_tcp_encap(struct xfrm_state *x,
+						struct sk_buff *skb,
+						struct esp_info *esp)
+{
+	__be16 *lenp = (void *)esp->esph;
+	struct ip_esp_hdr *esph;
+	unsigned int len;
+	struct sock *sk;
+
+	len = skb->len + esp->tailen - skb_transport_offset(skb);
+	if (len > IP_MAX_MTU)
+		return ERR_PTR(-EMSGSIZE);
+
+	rcu_read_lock();
+	sk = esp6_find_tcp_sk(x);
+	rcu_read_unlock();
+
+	if (IS_ERR(sk))
+		return ERR_CAST(sk);
+
+	*lenp = htons(len);
+	esph = (struct ip_esp_hdr *)(lenp + 1);
+
+	return esph;
+}
+#else
+static struct ip_esp_hdr *esp6_output_tcp_encap(struct xfrm_state *x,
+						struct sk_buff *skb,
+						struct esp_info *esp)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+#endif
+
 static int esp6_output_encap(struct xfrm_state *x, struct sk_buff *skb,
 			    struct esp_info *esp)
 {
@@ -294,6 +462,9 @@ static int esp6_output_encap(struct xfrm_state *x, struct sk_buff *skb,
 	case UDP_ENCAP_ESPINUDP_NON_IKE:
 		esph = esp6_output_udp_encap(skb, encap_type, esp, sport, dport);
 		break;
+	case TCP_ENCAP_ESPINTCP:
+		esph = esp6_output_tcp_encap(x, skb, esp);
+		break;
 	}
 
 	if (IS_ERR(esph))
@@ -509,6 +680,9 @@ int esp6_output_tail(struct xfrm_state *x, struct sk_buff *skb, struct esp_info
 	if (sg != dsg)
 		esp_ssg_unref(x, tmp);
 
+	if (!err && x->encap && x->encap->encap_type == TCP_ENCAP_ESPINTCP)
+		err = esp_output_tail_tcp(x, skb);
+
 error_free:
 	kfree(tmp);
 error:
@@ -633,9 +807,13 @@ int esp6_input_done2(struct sk_buff *skb, int err)
 		const struct ipv6hdr *ip6h = ipv6_hdr(skb);
 		struct xfrm_encap_tmpl *encap = x->encap;
 		struct udphdr *uh = (void *)(skb_network_header(skb) + hdr_len);
+		struct tcphdr *th = (void *)(skb_network_header(skb) + hdr_len);
 		__be16 source;
 
 		switch (x->encap->encap_type) {
+		case TCP_ENCAP_ESPINTCP:
+			source = th->source;
+			break;
 		case UDP_ENCAP_ESPINUDP:
 		case UDP_ENCAP_ESPINUDP_NON_IKE:
 			source = uh->source;
@@ -1039,6 +1217,14 @@ static int esp6_init_state(struct xfrm_state *x)
 		case UDP_ENCAP_ESPINUDP_NON_IKE:
 			x->props.header_len += sizeof(struct udphdr) + 2 * sizeof(u32);
 			break;
+#ifdef CONFIG_INET6_ESPINTCP
+		case TCP_ENCAP_ESPINTCP:
+			/* only the length field, TCP encap is done by
+			 * the socket
+			 */
+			x->props.header_len += 2;
+			break;
+#endif
 		}
 	}
 
diff --git a/net/xfrm/Kconfig b/net/xfrm/Kconfig
index 6921a18201a0..b7fd9c838416 100644
--- a/net/xfrm/Kconfig
+++ b/net/xfrm/Kconfig
@@ -99,4 +99,7 @@ config NET_KEY_MIGRATE
 
 	  If unsure, say N.
 
+config XFRM_ESPINTCP
+	bool
+
 endif # INET
diff --git a/net/xfrm/Makefile b/net/xfrm/Makefile
index 212a4fcb4a88..2d4bb4b9f75e 100644
--- a/net/xfrm/Makefile
+++ b/net/xfrm/Makefile
@@ -11,4 +11,4 @@ obj-$(CONFIG_XFRM_ALGO) += xfrm_algo.o
 obj-$(CONFIG_XFRM_USER) += xfrm_user.o
 obj-$(CONFIG_XFRM_IPCOMP) += xfrm_ipcomp.o
 obj-$(CONFIG_XFRM_INTERFACE) += xfrm_interface.o
-obj-$(CONFIG_INET_ESPINTCP) += espintcp.o
+obj-$(CONFIG_XFRM_ESPINTCP) += espintcp.o
diff --git a/net/xfrm/espintcp.c b/net/xfrm/espintcp.c
index 037ea156d2f9..2132a3b6df0f 100644
--- a/net/xfrm/espintcp.c
+++ b/net/xfrm/espintcp.c
@@ -6,6 +6,9 @@
 #include <net/espintcp.h>
 #include <linux/skmsg.h>
 #include <net/inet_common.h>
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ipv6_stubs.h>
+#endif
 
 static void handle_nonesp(struct espintcp_ctx *ctx, struct sk_buff *skb,
 			  struct sock *sk)
@@ -31,7 +34,12 @@ static void handle_esp(struct sk_buff *skb, struct sock *sk)
 	rcu_read_lock();
 	skb->dev = dev_get_by_index_rcu(sock_net(sk), skb->skb_iif);
 	local_bh_disable();
-	xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, TCP_ENCAP_ESPINTCP);
+#if IS_ENABLED(CONFIG_IPV6)
+	if (sk->sk_family == AF_INET6)
+		ipv6_stub->xfrm6_rcv_encap(skb, IPPROTO_ESP, 0, TCP_ENCAP_ESPINTCP);
+	else
+#endif
+		xfrm4_rcv_encap(skb, IPPROTO_ESP, 0, TCP_ENCAP_ESPINTCP);
 	local_bh_enable();
 	rcu_read_unlock();
 }
@@ -347,6 +355,9 @@ unlock:
 
 static struct proto espintcp_prot __ro_after_init;
 static struct proto_ops espintcp_ops __ro_after_init;
+static struct proto espintcp6_prot;
+static struct proto_ops espintcp6_ops;
+static DEFINE_MUTEX(tcpv6_prot_mutex);
 
 static void espintcp_data_ready(struct sock *sk)
 {
@@ -384,10 +395,14 @@ static void espintcp_destruct(struct sock *sk)
 
 bool tcp_is_ulp_esp(struct sock *sk)
 {
-	return sk->sk_prot == &espintcp_prot;
+	return sk->sk_prot == &espintcp_prot || sk->sk_prot == &espintcp6_prot;
 }
 EXPORT_SYMBOL_GPL(tcp_is_ulp_esp);
 
+static void build_protos(struct proto *espintcp_prot,
+			 struct proto_ops *espintcp_ops,
+			 const struct proto *orig_prot,
+			 const struct proto_ops *orig_ops);
 static int espintcp_init_sk(struct sock *sk)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
@@ -415,8 +430,19 @@ static int espintcp_init_sk(struct sock *sk)
 	strp_check_rcv(&ctx->strp);
 	skb_queue_head_init(&ctx->ike_queue);
 	skb_queue_head_init(&ctx->out_queue);
-	sk->sk_prot = &espintcp_prot;
-	sk->sk_socket->ops = &espintcp_ops;
+
+	if (sk->sk_family == AF_INET) {
+		sk->sk_prot = &espintcp_prot;
+		sk->sk_socket->ops = &espintcp_ops;
+	} else {
+		mutex_lock(&tcpv6_prot_mutex);
+		if (!espintcp6_prot.recvmsg)
+			build_protos(&espintcp6_prot, &espintcp6_ops, sk->sk_prot, sk->sk_socket->ops);
+		mutex_unlock(&tcpv6_prot_mutex);
+
+		sk->sk_prot = &espintcp6_prot;
+		sk->sk_socket->ops = &espintcp6_ops;
+	}
 	ctx->saved_data_ready = sk->sk_data_ready;
 	ctx->saved_write_space = sk->sk_write_space;
 	sk->sk_data_ready = espintcp_data_ready;
@@ -489,6 +515,20 @@ static __poll_t espintcp_poll(struct file *file, struct socket *sock,
 	return mask;
 }
 
+static void build_protos(struct proto *espintcp_prot,
+			 struct proto_ops *espintcp_ops,
+			 const struct proto *orig_prot,
+			 const struct proto_ops *orig_ops)
+{
+	memcpy(espintcp_prot, orig_prot, sizeof(struct proto));
+	memcpy(espintcp_ops, orig_ops, sizeof(struct proto_ops));
+	espintcp_prot->sendmsg = espintcp_sendmsg;
+	espintcp_prot->recvmsg = espintcp_recvmsg;
+	espintcp_prot->close = espintcp_close;
+	espintcp_prot->release_cb = espintcp_release;
+	espintcp_ops->poll = espintcp_poll;
+}
+
 static struct tcp_ulp_ops espintcp_ulp __read_mostly = {
 	.name = "espintcp",
 	.owner = THIS_MODULE,
@@ -497,13 +537,7 @@ static struct tcp_ulp_ops espintcp_ulp __read_mostly = {
 
 void __init espintcp_init(void)
 {
-	memcpy(&espintcp_prot, &tcp_prot, sizeof(tcp_prot));
-	memcpy(&espintcp_ops, &inet_stream_ops, sizeof(inet_stream_ops));
-	espintcp_prot.sendmsg = espintcp_sendmsg;
-	espintcp_prot.recvmsg = espintcp_recvmsg;
-	espintcp_prot.close = espintcp_close;
-	espintcp_prot.release_cb = espintcp_release;
-	espintcp_ops.poll = espintcp_poll;
+	build_protos(&espintcp_prot, &espintcp_ops, &tcp_prot, &inet_stream_ops);
 
 	tcp_register_ulp(&espintcp_ulp);
 }
-- 
cgit v1.2.3-59-g8ed1b


From 220915857e29795ae5ba4222806268b4a99c19c1 Mon Sep 17 00:00:00 2001
From: Alain Michaud <alainm@chromium.org>
Date: Thu, 23 Apr 2020 14:43:27 +0000
Subject: Bluetooth: Adding driver and quirk defs for multi-role LE

This change adds the relevant driver and quirk to allow drivers to
report the le_states as being trustworthy.

This has historically been disabled as controllers did not reliably
support this. In particular, this will be used to relax this condition
for controllers that have been well tested and reliable.

	/* Most controller will fail if we try to create new connections
	 * while we have an existing one in slave role.
	 */
	if (hdev->conn_hash.le_num_slave > 0)
		return NULL;

Signed-off-by: Alain Michaud <alainm@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btusb.c   | 1 +
 include/net/bluetooth/hci.h | 9 +++++++++
 2 files changed, 10 insertions(+)

diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 871162790a0e..9a0ac333c886 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -58,6 +58,7 @@ static struct usb_driver btusb_driver;
 #define BTUSB_CW6622		0x100000
 #define BTUSB_MEDIATEK		0x200000
 #define BTUSB_WIDEBAND_SPEECH	0x400000
+#define BTUSB_VALID_LE_STATES   0x800000
 
 static const struct usb_device_id btusb_table[] = {
 	/* Generic Bluetooth USB device */
diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index 1da8cec8e210..e5bc1dfe809a 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -218,6 +218,15 @@ enum {
 	 * This quirk must be set before hci_register_dev is called.
 	 */
 	HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED,
+
+	/* When this quirk is set, the controller has validated that
+	 * LE states reported through the HCI_LE_READ_SUPPORTED_STATES are
+	 * valid.  This mechanism is necessary as many controllers have
+	 * been seen has having trouble initiating a connectable
+	 * advertisement despite the state combination being reported as
+	 * supported.
+	 */
+	HCI_QUIRK_VALID_LE_STATES,
 };
 
 /* HCI device flags */
-- 
cgit v1.2.3-59-g8ed1b


From 4364f2e91f0d44fa0e233d2a55e3ec35053d9bd9 Mon Sep 17 00:00:00 2001
From: Alain Michaud <alainm@chromium.org>
Date: Thu, 23 Apr 2020 14:43:29 +0000
Subject: Bluetooth: allow scatternet connections if supported.

This change allows scatternet connections to be created if the
controller reports support and the HCI_QUIRK_VALID_LE_STATES indicates
that the reported LE states can be trusted.

Signed-off-by: Alain Michaud <alainm@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_event.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 966fc543c01d..006c24e04b44 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -5288,7 +5288,9 @@ static struct hci_conn *check_pending_le_conn(struct hci_dev *hdev,
 	/* Most controller will fail if we try to create new connections
 	 * while we have an existing one in slave role.
 	 */
-	if (hdev->conn_hash.le_num_slave > 0)
+	if (hdev->conn_hash.le_num_slave > 0 &&
+	    (!test_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks) ||
+	     !(hdev->le_states[3] & 0x10)))
 		return NULL;
 
 	/* If we're not connectable only connect devices that we have in
-- 
cgit v1.2.3-59-g8ed1b


From aff8c489256ea1e32b35a007906a16dce7c6b4db Mon Sep 17 00:00:00 2001
From: Alain Michaud <alainm@chromium.org>
Date: Thu, 23 Apr 2020 14:43:31 +0000
Subject: Bluetooth: btusb: Adding support for LE scatternet to Jfp and ThP

This change adds support for LE scatternet connections to Intel's JfP
and ThP controllers.

Signed-off-by: Alain Michaud <alainm@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btusb.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 9a0ac333c886..8ae3ad7a6013 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -336,7 +336,8 @@ static const struct usb_device_id blacklist_table[] = {
 
 	/* Intel Bluetooth devices */
 	{ USB_DEVICE(0x8087, 0x0025), .driver_info = BTUSB_INTEL_NEW |
-						     BTUSB_WIDEBAND_SPEECH },
+						     BTUSB_WIDEBAND_SPEECH |
+						     BTUSB_VALID_LE_STATES },
 	{ USB_DEVICE(0x8087, 0x0026), .driver_info = BTUSB_INTEL_NEW |
 						     BTUSB_WIDEBAND_SPEECH },
 	{ USB_DEVICE(0x8087, 0x0029), .driver_info = BTUSB_INTEL_NEW |
@@ -349,7 +350,8 @@ static const struct usb_device_id blacklist_table[] = {
 	{ USB_DEVICE(0x8087, 0x0aa7), .driver_info = BTUSB_INTEL |
 						     BTUSB_WIDEBAND_SPEECH },
 	{ USB_DEVICE(0x8087, 0x0aaa), .driver_info = BTUSB_INTEL_NEW |
-						     BTUSB_WIDEBAND_SPEECH },
+						     BTUSB_WIDEBAND_SPEECH |
+						     BTUSB_VALID_LE_STATES },
 
 	/* Other Intel Bluetooth devices */
 	{ USB_VENDOR_AND_INTERFACE_INFO(0x8087, 0xe0, 0x01, 0x01),
@@ -3973,6 +3975,9 @@ static int btusb_probe(struct usb_interface *intf,
 	if (id->driver_info & BTUSB_WIDEBAND_SPEECH)
 		set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks);
 
+	if (id->driver_info & BTUSB_VALID_LE_STATES)
+		set_bit(HCI_QUIRK_VALID_LE_STATES, &hdev->quirks);
+
 	if (id->driver_info & BTUSB_DIGIANSWER) {
 		data->cmdreq_type = USB_TYPE_VENDOR;
 		set_bit(HCI_QUIRK_RESET_ON_CLOSE, &hdev->quirks);
-- 
cgit v1.2.3-59-g8ed1b


From 65749009242bbf67d5be129b7b67c4f9bfd93360 Mon Sep 17 00:00:00 2001
From: Christian Hewitt <christianshewitt@gmail.com>
Date: Thu, 23 Apr 2020 01:34:28 +0000
Subject: dt-bindings: net: bluetooth: Add device tree bindings for QCA9377

QCA9377 is a QCA ROME device frequently found in Android TV boxes.

Signed-off-by: Christian Hewitt <christianshewitt@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt b/Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt
index badf597c0e58..f21ea24fb0ca 100644
--- a/Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt
+++ b/Documentation/devicetree/bindings/net/qualcomm-bluetooth.txt
@@ -10,6 +10,7 @@ device the slave device is attached to.
 Required properties:
  - compatible: should contain one of the following:
    * "qcom,qca6174-bt"
+   * "qcom,qca9377-bt"
    * "qcom,wcn3990-bt"
    * "qcom,wcn3991-bt"
    * "qcom,wcn3998-bt"
@@ -21,6 +22,10 @@ Optional properties for compatible string qcom,qca6174-bt:
  - clocks: clock provided to the controller (SUSCLK_32KHZ)
  - firmware-name: specify the name of nvm firmware to load
 
+Optional properties for compatible string qcom,qca9377-bt:
+
+ - max-speed: see Documentation/devicetree/bindings/serial/serial.yaml
+
 Required properties for compatible string qcom,wcn399x-bt:
 
  - vddio-supply: VDD_IO supply regulator handle.
-- 
cgit v1.2.3-59-g8ed1b


From 31d4ab856e2d57a8877d90012b42d029f1fe0fe9 Mon Sep 17 00:00:00 2001
From: Christian Hewitt <christianshewitt@gmail.com>
Date: Thu, 23 Apr 2020 01:34:29 +0000
Subject: Bluetooth: hci_qca: add compatible for QCA9377

Add a compatible so QCA9377 devices can be defined in device-tree.

Signed-off-by: Christian Hewitt <christianshewitt@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/hci_qca.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
index d0ac554584a4..072983dc07e3 100644
--- a/drivers/bluetooth/hci_qca.c
+++ b/drivers/bluetooth/hci_qca.c
@@ -2058,6 +2058,7 @@ static SIMPLE_DEV_PM_OPS(qca_pm_ops, qca_suspend, qca_resume);
 static const struct of_device_id qca_bluetooth_of_match[] = {
 	{ .compatible = "qcom,qca6174-bt" },
 	{ .compatible = "qcom,qca6390-bt", .data = &qca_soc_data_qca6390},
+	{ .compatible = "qcom,qca9377-bt" },
 	{ .compatible = "qcom,wcn3990-bt", .data = &qca_soc_data_wcn3990},
 	{ .compatible = "qcom,wcn3991-bt", .data = &qca_soc_data_wcn3991},
 	{ .compatible = "qcom,wcn3998-bt", .data = &qca_soc_data_wcn3998},
-- 
cgit v1.2.3-59-g8ed1b


From 37aee136f8c4e4989099e7d4972d617ea7ad3e5c Mon Sep 17 00:00:00 2001
From: Christian Hewitt <christianshewitt@gmail.com>
Date: Thu, 23 Apr 2020 01:34:30 +0000
Subject: Bluetooth: hci_qca: allow max-speed to be set for QCA9377 devices

Move the read of max-speed from device-tree out of the qca_is_wcn399x
if block so oper_speed can be set for QCA9377 devices as well.

Suggested-by: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
Signed-off-by: Christian Hewitt <christianshewitt@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/hci_qca.c | 16 +++++++++-------
 1 file changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
index 072983dc07e3..b3fd07a6f812 100644
--- a/drivers/bluetooth/hci_qca.c
+++ b/drivers/bluetooth/hci_qca.c
@@ -597,10 +597,12 @@ static int qca_open(struct hci_uart *hu)
 
 	if (hu->serdev) {
 		qcadev = serdev_device_get_drvdata(hu->serdev);
-		if (qca_is_wcn399x(qcadev->btsoc_type)) {
+
+		if (qca_is_wcn399x(qcadev->btsoc_type))
 			hu->init_speed = qcadev->init_speed;
+
+		if (qcadev->oper_speed)
 			hu->oper_speed = qcadev->oper_speed;
-		}
 	}
 
 	timer_setup(&qca->wake_retrans_timer, hci_ibs_wake_retrans_timeout, 0);
@@ -1871,6 +1873,11 @@ static int qca_serdev_probe(struct serdev_device *serdev)
 	serdev_device_set_drvdata(serdev, qcadev);
 	device_property_read_string(&serdev->dev, "firmware-name",
 					 &qcadev->firmware_name);
+	device_property_read_u32(&serdev->dev, "max-speed",
+				 &qcadev->oper_speed);
+	if (!qcadev->oper_speed)
+		BT_DBG("UART will pick default operating speed");
+
 	if (data && qca_is_wcn399x(data->soc_type)) {
 		qcadev->btsoc_type = data->soc_type;
 		qcadev->bt_power = devm_kzalloc(&serdev->dev,
@@ -1895,11 +1902,6 @@ static int qca_serdev_probe(struct serdev_device *serdev)
 			return PTR_ERR(qcadev->susclk);
 		}
 
-		device_property_read_u32(&serdev->dev, "max-speed",
-					 &qcadev->oper_speed);
-		if (!qcadev->oper_speed)
-			BT_DBG("UART will pick default operating speed");
-
 		err = hci_uart_register_device(&qcadev->serdev_hu, &qca_proto);
 		if (err) {
 			BT_ERR("wcn3990 serdev registration failed");
-- 
cgit v1.2.3-59-g8ed1b


From b26d1e2b60284dc9f66ffad9ccd5c5da1100bb4b Mon Sep 17 00:00:00 2001
From: Veronika Kabatova <vkabatov@redhat.com>
Date: Tue, 28 Apr 2020 19:37:42 +0200
Subject: selftests/bpf: Copy runqslower to OUTPUT directory

$(OUTPUT)/runqslower makefile target doesn't actually create runqslower
binary in the $(OUTPUT) directory. As lib.mk expects all
TEST_GEN_PROGS_EXTENDED (which runqslower is a part of) to be present in
the OUTPUT directory, this results in an error when running e.g. `make
install`:

rsync: link_stat "tools/testing/selftests/bpf/runqslower" failed: No
       such file or directory (2)

Copy the binary into the OUTPUT directory after building it to fix the
error.

Fixes: 3a0d3092a4ed ("selftests/bpf: Build runqslower from selftests")
Signed-off-by: Veronika Kabatova <vkabatov@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200428173742.2988395-1-vkabatov@redhat.com
---
 tools/testing/selftests/bpf/Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 7729892e0b04..4e654d41c7af 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -141,7 +141,8 @@ VMLINUX_BTF := $(abspath $(firstword $(wildcard $(VMLINUX_BTF_PATHS))))
 $(OUTPUT)/runqslower: $(BPFOBJ)
 	$(Q)$(MAKE) $(submake_extras) -C $(TOOLSDIR)/bpf/runqslower	\
 		    OUTPUT=$(SCRATCH_DIR)/ VMLINUX_BTF=$(VMLINUX_BTF)   \
-		    BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR)
+		    BPFOBJ=$(BPFOBJ) BPF_INCLUDE=$(INCLUDE_DIR) &&	\
+		    cp $(SCRATCH_DIR)/runqslower $@
 
 $(TEST_GEN_PROGS) $(TEST_GEN_PROGS_EXTENDED): $(OUTPUT)/test_stub.o $(BPFOBJ)
 
-- 
cgit v1.2.3-59-g8ed1b


From d65dbedfd298344747033f17c1efd2afc8082bc7 Mon Sep 17 00:00:00 2001
From: Huy Nguyen <huyn@mellanox.com>
Date: Fri, 24 Apr 2020 12:45:02 -0700
Subject: net/mlx5: Add support for COPY steering action

Add COPY type to modify_header action. IPsec feature is the first
feature that needs COPY steering action.

Signed-off-by: Huy Nguyen <huyn@mellanox.com>
Signed-off-by: Raed Salem <raeds@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Acked-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/hw/mlx5/flow.c                          | 4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c         | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c            | 6 +++---
 drivers/net/ethernet/mellanox/mlx5/core/esw/chains.c       | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c | 4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c           | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c   | 2 +-
 include/linux/mlx5/mlx5_ifc.h                              | 8 ++++----
 8 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c
index 862b7bf3e646..69cb7e6e8955 100644
--- a/drivers/infiniband/hw/mlx5/flow.c
+++ b/drivers/infiniband/hw/mlx5/flow.c
@@ -427,7 +427,7 @@ static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)(
 
 	num_actions = uverbs_attr_ptr_get_array_size(
 		attrs, MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
-		MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto));
+		MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto));
 	if (num_actions < 0)
 		return num_actions;
 
@@ -648,7 +648,7 @@ DECLARE_UVERBS_NAMED_METHOD(
 			UA_MANDATORY),
 	UVERBS_ATTR_PTR_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_ACTIONS_PRM,
 			   UVERBS_ATTR_MIN_SIZE(MLX5_UN_SZ_BYTES(
-				   set_action_in_add_action_in_auto)),
+				   set_add_copy_action_in_auto)),
 			   UA_MANDATORY,
 			   UA_ALLOC_AND_COPY),
 	UVERBS_ATTR_CONST_IN(MLX5_IB_ATTR_CREATE_MODIFY_HEADER_FT_TYPE,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index ad3e3a65d403..91464f70a3fa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -385,7 +385,7 @@ mlx5_tc_ct_entry_create_nat(struct mlx5_tc_ct_priv *ct_priv,
 	char *modact;
 	int err, i;
 
-	action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto);
+	action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto);
 
 	flow_action_for_each(i, act, flow_action) {
 		switch (act->id) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 88c0e460e995..12c5ca5b93ca 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -61,7 +61,7 @@
 #include "lib/geneve.h"
 #include "diag/en_tc_tracepoint.h"
 
-#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)
+#define MLX5_MH_ACT_SZ MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)
 
 struct mlx5_nic_flow_attr {
 	u32 action;
@@ -2660,7 +2660,7 @@ static int offload_pedit_fields(struct mlx5e_priv *priv,
 	set_vals = &hdrs[0].vals;
 	add_vals = &hdrs[1].vals;
 
-	action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto);
+	action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto);
 
 	for (i = 0; i < ARRAY_SIZE(fields); i++) {
 		bool skip;
@@ -2793,7 +2793,7 @@ int alloc_mod_hdr_actions(struct mlx5_core_dev *mdev,
 	if (mod_hdr_acts->num_actions < mod_hdr_acts->max_actions)
 		return 0;
 
-	action_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto);
+	action_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto);
 
 	max_hw_actions = mlx5e_flow_namespace_max_modify_action(mdev,
 								namespace);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/chains.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/chains.c
index 029001040737..d5bf908dfecd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/chains.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/chains.c
@@ -274,7 +274,7 @@ mlx5_esw_chains_destroy_fdb_table(struct mlx5_eswitch *esw,
 static int
 create_fdb_chain_restore(struct fdb_chain *fdb_chain)
 {
-	char modact[MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)];
+	char modact[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)];
 	struct mlx5_eswitch *esw = fdb_chain->esw;
 	struct mlx5_modify_hdr *mod_hdr;
 	u32 index;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index dc098bb58973..703f307c5967 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -1490,7 +1490,7 @@ static void esw_destroy_restore_table(struct mlx5_eswitch *esw)
 
 static int esw_create_restore_table(struct mlx5_eswitch *esw)
 {
-	u8 modact[MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)] = {};
+	u8 modact[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
 	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
 	struct mlx5_flow_table_attr ft_attr = {};
 	struct mlx5_core_dev *dev = esw->dev;
@@ -1900,7 +1900,7 @@ static int esw_vport_ingress_prio_tag_config(struct mlx5_eswitch *esw,
 static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
 						     struct mlx5_vport *vport)
 {
-	u8 action[MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto)] = {};
+	u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
 	struct mlx5_flow_act flow_act = {};
 	int err = 0;
 	u32 key;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 304d1e4f0541..1a8e826ac86b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -791,7 +791,7 @@ static int mlx5_cmd_modify_header_alloc(struct mlx5_flow_root_namespace *ns,
 		return -EOPNOTSUPP;
 	}
 
-	actions_size = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto) * num_actions;
+	actions_size = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto) * num_actions;
 	inlen = MLX5_ST_SZ_BYTES(alloc_modify_header_context_in) + actions_size;
 
 	in = kzalloc(inlen, GFP_KERNEL);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
index 3b3f5b9d4f95..8887b2440c7d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/fs_dr.c
@@ -576,7 +576,7 @@ static int mlx5_cmd_dr_modify_header_alloc(struct mlx5_flow_root_namespace *ns,
 	struct mlx5dr_action *action;
 	size_t actions_sz;
 
-	actions_sz = MLX5_UN_SZ_BYTES(set_action_in_add_action_in_auto) *
+	actions_sz = MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto) *
 		num_actions;
 	action = mlx5dr_action_create_modify_header(dr_domain, 0,
 						    actions_sz,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 6fa24918eade..3ad2c51ccde9 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -5670,9 +5670,9 @@ struct mlx5_ifc_copy_action_in_bits {
 	u8         reserved_at_38[0x8];
 };
 
-union mlx5_ifc_set_action_in_add_action_in_auto_bits {
-	struct mlx5_ifc_set_action_in_bits set_action_in;
-	struct mlx5_ifc_add_action_in_bits add_action_in;
+union mlx5_ifc_set_add_copy_action_in_auto_bits {
+	struct mlx5_ifc_set_action_in_bits  set_action_in;
+	struct mlx5_ifc_add_action_in_bits  add_action_in;
 	struct mlx5_ifc_copy_action_in_bits copy_action_in;
 	u8         reserved_at_0[0x40];
 };
@@ -5746,7 +5746,7 @@ struct mlx5_ifc_alloc_modify_header_context_in_bits {
 	u8         reserved_at_68[0x10];
 	u8         num_of_actions[0x8];
 
-	union mlx5_ifc_set_action_in_add_action_in_auto_bits actions[0];
+	union mlx5_ifc_set_add_copy_action_in_auto_bits actions[0];
 };
 
 struct mlx5_ifc_dealloc_modify_header_context_out_bits {
-- 
cgit v1.2.3-59-g8ed1b


From 2b58f6d9df50f534fe465113b69de60a2ef0e74a Mon Sep 17 00:00:00 2001
From: Raed Salem <raeds@mellanox.com>
Date: Fri, 24 Apr 2020 12:45:03 -0700
Subject: net/mlx5: Introduce IPsec Connect-X offload hardware bits and
 structures

Add IPsec offload related IFC structs, layouts and enumerations.

Signed-off-by: Raed Salem <raeds@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/device.h   |  4 +++
 include/linux/mlx5/mlx5_ifc.h | 78 +++++++++++++++++++++++++++++++++++++++++--
 2 files changed, 79 insertions(+), 3 deletions(-)

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 2b90097a6cf9..7b57877e501e 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -1107,6 +1107,7 @@ enum mlx5_cap_type {
 	MLX5_CAP_TLS,
 	MLX5_CAP_VDPA_EMULATION = 0x13,
 	MLX5_CAP_DEV_EVENT = 0x14,
+	MLX5_CAP_IPSEC,
 	/* NUM OF CAP Types */
 	MLX5_CAP_NUM
 };
@@ -1324,6 +1325,9 @@ enum mlx5_qcam_feature_groups {
 	MLX5_GET64(device_virtio_emulation_cap, \
 		(mdev)->caps.hca_cur[MLX5_CAP_VDPA_EMULATION], cap)
 
+#define MLX5_CAP_IPSEC(mdev, cap)\
+	MLX5_GET(ipsec_cap, (mdev)->caps.hca_cur[MLX5_CAP_IPSEC], cap)
+
 enum {
 	MLX5_CMD_STAT_OK			= 0x0,
 	MLX5_CMD_STAT_INT_ERR			= 0x1,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 3ad2c51ccde9..cf971d341189 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -886,7 +886,8 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits {
 	u8         tunnel_stateless_vxlan_gpe[0x1];
 	u8         tunnel_stateless_ipv4_over_vxlan[0x1];
 	u8         tunnel_stateless_ip_over_ip[0x1];
-	u8         reserved_at_2a[0x6];
+	u8         insert_trailer[0x1];
+	u8         reserved_at_2b[0x5];
 	u8         max_vxlan_udp_ports[0x8];
 	u8         reserved_at_38[0x6];
 	u8         max_geneve_opt_len[0x1];
@@ -1100,6 +1101,23 @@ struct mlx5_ifc_tls_cap_bits {
 	u8         reserved_at_20[0x7e0];
 };
 
+struct mlx5_ifc_ipsec_cap_bits {
+	u8         ipsec_full_offload[0x1];
+	u8         ipsec_crypto_offload[0x1];
+	u8         ipsec_esn[0x1];
+	u8         ipsec_crypto_esp_aes_gcm_256_encrypt[0x1];
+	u8         ipsec_crypto_esp_aes_gcm_128_encrypt[0x1];
+	u8         ipsec_crypto_esp_aes_gcm_256_decrypt[0x1];
+	u8         ipsec_crypto_esp_aes_gcm_128_decrypt[0x1];
+	u8         reserved_at_7[0x4];
+	u8         log_max_ipsec_offload[0x5];
+	u8         reserved_at_10[0x10];
+
+	u8         min_log_ipsec_full_replay_window[0x8];
+	u8         max_log_ipsec_full_replay_window[0x8];
+	u8         reserved_at_30[0x7d0];
+};
+
 enum {
 	MLX5_WQ_TYPE_LINKED_LIST  = 0x0,
 	MLX5_WQ_TYPE_CYCLIC       = 0x1,
@@ -1464,7 +1482,8 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 
 	u8         reserved_at_460[0x3];
 	u8         log_max_uctx[0x5];
-	u8         reserved_at_468[0x3];
+	u8         reserved_at_468[0x2];
+	u8         ipsec_offload[0x1];
 	u8         log_max_umem[0x5];
 	u8         max_num_eqs[0x10];
 
@@ -4143,7 +4162,8 @@ enum {
 	MLX5_SET_FTE_MODIFY_ENABLE_MASK_ACTION    = 0x0,
 	MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_TAG  = 0x1,
 	MLX5_SET_FTE_MODIFY_ENABLE_MASK_DESTINATION_LIST    = 0x2,
-	MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS    = 0x3
+	MLX5_SET_FTE_MODIFY_ENABLE_MASK_FLOW_COUNTERS    = 0x3,
+	MLX5_SET_FTE_MODIFY_ENABLE_MASK_IPSEC_OBJ_ID    = 0x4
 };
 
 struct mlx5_ifc_set_fte_out_bits {
@@ -10468,10 +10488,62 @@ struct mlx5_ifc_affiliated_event_header_bits {
 
 enum {
 	MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY = BIT(0xc),
+	MLX5_HCA_CAP_GENERAL_OBJECT_TYPES_IPSEC = BIT(0x13),
 };
 
 enum {
 	MLX5_GENERAL_OBJECT_TYPES_ENCRYPTION_KEY = 0xc,
+	MLX5_GENERAL_OBJECT_TYPES_IPSEC = 0x13,
+};
+
+enum {
+	MLX5_IPSEC_OBJECT_ICV_LEN_16B,
+	MLX5_IPSEC_OBJECT_ICV_LEN_12B,
+	MLX5_IPSEC_OBJECT_ICV_LEN_8B,
+};
+
+struct mlx5_ifc_ipsec_obj_bits {
+	u8         modify_field_select[0x40];
+	u8         full_offload[0x1];
+	u8         reserved_at_41[0x1];
+	u8         esn_en[0x1];
+	u8         esn_overlap[0x1];
+	u8         reserved_at_44[0x2];
+	u8         icv_length[0x2];
+	u8         reserved_at_48[0x4];
+	u8         aso_return_reg[0x4];
+	u8         reserved_at_50[0x10];
+
+	u8         esn_msb[0x20];
+
+	u8         reserved_at_80[0x8];
+	u8         dekn[0x18];
+
+	u8         salt[0x20];
+
+	u8         implicit_iv[0x40];
+
+	u8         reserved_at_100[0x700];
+};
+
+struct mlx5_ifc_create_ipsec_obj_in_bits {
+	struct mlx5_ifc_general_obj_in_cmd_hdr_bits general_obj_in_cmd_hdr;
+	struct mlx5_ifc_ipsec_obj_bits ipsec_object;
+};
+
+enum {
+	MLX5_MODIFY_IPSEC_BITMASK_ESN_OVERLAP = BIT(0),
+	MLX5_MODIFY_IPSEC_BITMASK_ESN_MSB = BIT(1),
+};
+
+struct mlx5_ifc_query_ipsec_obj_out_bits {
+	struct mlx5_ifc_general_obj_out_cmd_hdr_bits general_obj_out_cmd_hdr;
+	struct mlx5_ifc_ipsec_obj_bits ipsec_object;
+};
+
+struct mlx5_ifc_modify_ipsec_obj_in_bits {
+	struct mlx5_ifc_general_obj_in_cmd_hdr_bits general_obj_in_cmd_hdr;
+	struct mlx5_ifc_ipsec_obj_bits ipsec_object;
 };
 
 struct mlx5_ifc_encryption_key_obj_bits {
-- 
cgit v1.2.3-59-g8ed1b


From dff8e2d15283dd92582ddeec25ca86e4cf2618c7 Mon Sep 17 00:00:00 2001
From: Erez Shitrit <erezsh@mellanox.com>
Date: Fri, 24 Apr 2020 12:45:04 -0700
Subject: net/mlx5: Use aligned variable while allocating ICM memory

The alignment value is part of the input structure, so use it and spare
extra memory allocation when is not needed.
Now, using the new ability when allocating icm for Direct-Rule
insertion.
Signed-off-by: Ariel Levkovich <lariel@mellanox.com>
Signed-off-by: Erez Shitrit <erezsh@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/infiniband/hw/mlx5/main.c                  |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c   | 15 ++++--
 .../mellanox/mlx5/core/steering/dr_icm_pool.c      | 53 ++++++++++------------
 include/linux/mlx5/driver.h                        |  3 +-
 4 files changed, 38 insertions(+), 35 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index f10675213115..65e0e24d463b 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -2444,7 +2444,7 @@ static int handle_alloc_dm_sw_icm(struct ib_ucontext *ctx,
 	act_size = roundup_pow_of_two(act_size);
 
 	dm->size = act_size;
-	err = mlx5_dm_sw_icm_alloc(dev, type, act_size,
+	err = mlx5_dm_sw_icm_alloc(dev, type, act_size, attr->alignment,
 				   to_mucontext(ctx)->devx_uid, &dm->dev_addr,
 				   &dm->icm_dm.obj_id);
 	if (err)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c
index 6cbccba56f70..3d5e57ff558c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/dm.c
@@ -90,7 +90,8 @@ void mlx5_dm_cleanup(struct mlx5_core_dev *dev)
 }
 
 int mlx5_dm_sw_icm_alloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type,
-			 u64 length, u16 uid, phys_addr_t *addr, u32 *obj_id)
+			 u64 length, u32 log_alignment, u16 uid,
+			 phys_addr_t *addr, u32 *obj_id)
 {
 	u32 num_blocks = DIV_ROUND_UP_ULL(length, MLX5_SW_ICM_BLOCK_SIZE(dev));
 	u32 out[MLX5_ST_SZ_DW(general_obj_out_cmd_hdr)] = {};
@@ -99,6 +100,7 @@ int mlx5_dm_sw_icm_alloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type,
 	unsigned long *block_map;
 	u64 icm_start_addr;
 	u32 log_icm_size;
+	u64 align_mask;
 	u32 max_blocks;
 	u64 block_idx;
 	void *sw_icm;
@@ -136,11 +138,14 @@ int mlx5_dm_sw_icm_alloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type,
 		return -EOPNOTSUPP;
 
 	max_blocks = BIT(log_icm_size - MLX5_LOG_SW_ICM_BLOCK_SIZE(dev));
+
+	if (log_alignment < MLX5_LOG_SW_ICM_BLOCK_SIZE(dev))
+		log_alignment = MLX5_LOG_SW_ICM_BLOCK_SIZE(dev);
+	align_mask = BIT(log_alignment - MLX5_LOG_SW_ICM_BLOCK_SIZE(dev)) - 1;
+
 	spin_lock(&dm->lock);
-	block_idx = bitmap_find_next_zero_area(block_map,
-					       max_blocks,
-					       0,
-					       num_blocks, 0);
+	block_idx = bitmap_find_next_zero_area(block_map, max_blocks, 0,
+					       num_blocks, align_mask);
 
 	if (block_idx < max_blocks)
 		bitmap_set(block_map,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c
index 30d2d7376f56..cc33515b9aba 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_icm_pool.c
@@ -95,13 +95,12 @@ static int dr_icm_create_dm_mkey(struct mlx5_core_dev *mdev,
 }
 
 static struct mlx5dr_icm_mr *
-dr_icm_pool_mr_create(struct mlx5dr_icm_pool *pool,
-		      enum mlx5_sw_icm_type type,
-		      size_t align_base)
+dr_icm_pool_mr_create(struct mlx5dr_icm_pool *pool)
 {
 	struct mlx5_core_dev *mdev = pool->dmn->mdev;
+	enum mlx5_sw_icm_type dm_type;
 	struct mlx5dr_icm_mr *icm_mr;
-	size_t align_diff;
+	size_t log_align_base;
 	int err;
 
 	icm_mr = kvzalloc(sizeof(*icm_mr), GFP_KERNEL);
@@ -111,14 +110,22 @@ dr_icm_pool_mr_create(struct mlx5dr_icm_pool *pool,
 	icm_mr->pool = pool;
 	INIT_LIST_HEAD(&icm_mr->mr_list);
 
-	icm_mr->dm.type = type;
-
-	/* 2^log_biggest_table * entry-size * double-for-alignment */
 	icm_mr->dm.length = mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz,
-							       pool->icm_type) * 2;
+							       pool->icm_type);
+
+	if (pool->icm_type == DR_ICM_TYPE_STE) {
+		dm_type = MLX5_SW_ICM_TYPE_STEERING;
+		log_align_base = ilog2(icm_mr->dm.length);
+	} else {
+		dm_type = MLX5_SW_ICM_TYPE_HEADER_MODIFY;
+		/* Align base is 64B */
+		log_align_base = ilog2(DR_ICM_MODIFY_HDR_ALIGN_BASE);
+	}
+	icm_mr->dm.type = dm_type;
 
-	err = mlx5_dm_sw_icm_alloc(mdev, icm_mr->dm.type, icm_mr->dm.length, 0,
-				   &icm_mr->dm.addr, &icm_mr->dm.obj_id);
+	err = mlx5_dm_sw_icm_alloc(mdev, icm_mr->dm.type, icm_mr->dm.length,
+				   log_align_base, 0, &icm_mr->dm.addr,
+				   &icm_mr->dm.obj_id);
 	if (err) {
 		mlx5dr_err(pool->dmn, "Failed to allocate SW ICM memory, err (%d)\n", err);
 		goto free_icm_mr;
@@ -137,15 +144,18 @@ dr_icm_pool_mr_create(struct mlx5dr_icm_pool *pool,
 
 	icm_mr->icm_start_addr = icm_mr->dm.addr;
 
-	/* align_base is always a power of 2 */
-	align_diff = icm_mr->icm_start_addr & (align_base - 1);
-	if (align_diff)
-		icm_mr->used_length = align_base - align_diff;
+	if (icm_mr->icm_start_addr & (BIT(log_align_base) - 1)) {
+		mlx5dr_err(pool->dmn, "Failed to get Aligned ICM mem (asked: %zu)\n",
+			   log_align_base);
+		goto free_mkey;
+	}
 
 	list_add_tail(&icm_mr->mr_list, &pool->icm_mr_list);
 
 	return icm_mr;
 
+free_mkey:
+	mlx5_core_destroy_mkey(mdev, &icm_mr->mkey);
 free_dm:
 	mlx5_dm_sw_icm_dealloc(mdev, icm_mr->dm.type, icm_mr->dm.length, 0,
 			       icm_mr->dm.addr, icm_mr->dm.obj_id);
@@ -200,24 +210,11 @@ static int dr_icm_chunks_create(struct mlx5dr_icm_bucket *bucket)
 	struct mlx5dr_icm_pool *pool = bucket->pool;
 	struct mlx5dr_icm_mr *icm_mr = NULL;
 	struct mlx5dr_icm_chunk *chunk;
-	enum mlx5_sw_icm_type dm_type;
-	size_t align_base;
 	int i, err = 0;
 
 	mr_req_size = bucket->num_of_entries * bucket->entry_size;
 	mr_row_size = mlx5dr_icm_pool_chunk_size_to_byte(pool->max_log_chunk_sz,
 							 pool->icm_type);
-
-	if (pool->icm_type == DR_ICM_TYPE_STE) {
-		dm_type = MLX5_SW_ICM_TYPE_STEERING;
-		/* Align base is the biggest chunk size / row size */
-		align_base = mr_row_size;
-	} else {
-		dm_type = MLX5_SW_ICM_TYPE_HEADER_MODIFY;
-		/* Align base is 64B */
-		align_base = DR_ICM_MODIFY_HDR_ALIGN_BASE;
-	}
-
 	mutex_lock(&pool->mr_mutex);
 	if (!list_empty(&pool->icm_mr_list)) {
 		icm_mr = list_last_entry(&pool->icm_mr_list,
@@ -228,7 +225,7 @@ static int dr_icm_chunks_create(struct mlx5dr_icm_bucket *bucket)
 	}
 
 	if (!icm_mr || mr_free_size < mr_row_size) {
-		icm_mr = dr_icm_pool_mr_create(pool, dm_type, align_base);
+		icm_mr = dr_icm_pool_mr_create(pool);
 		if (!icm_mr) {
 			err = -ENOMEM;
 			goto out_err;
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index b60e5ab7906b..b46537a81703 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1080,7 +1080,8 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
 struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev);
 void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up);
 int mlx5_dm_sw_icm_alloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type,
-			 u64 length, u16 uid, phys_addr_t *addr, u32 *obj_id);
+			 u64 length, u32 log_alignment, u16 uid,
+			 phys_addr_t *addr, u32 *obj_id);
 int mlx5_dm_sw_icm_dealloc(struct mlx5_core_dev *dev, enum mlx5_sw_icm_type type,
 			   u64 length, u16 uid, phys_addr_t addr, u32 obj_id);
 
-- 
cgit v1.2.3-59-g8ed1b


From 244faedfd4d8e8c8e9f3c628d29bb74196b49743 Mon Sep 17 00:00:00 2001
From: Raed Salem <raeds@mellanox.com>
Date: Fri, 24 Apr 2020 12:45:05 -0700
Subject: net/mlx5: Refactor imm_inval_pkey field in cqe struct

The imm_inval_pkey field can hold four different types of data,
depends on the usage, the data could be one of the below:
- Immediate field of the received message
- Invalidate rkey
- Pkey of the packet
- Flow table metadata

Current implementation doesn't reflect the intended usage of the
field at usage time.

Reflect the different types by replace this field with a union,
modify code where this field is used to reflect its intended
usage.

Signed-off-by: Raed Salem <raeds@mellanox.com>
Reviewed-by: Huy Nguyen <huyn@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/infiniband/hw/mlx5/cq.c                 | 8 ++++----
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 +-
 include/linux/mlx5/device.h                     | 7 ++++++-
 3 files changed, 11 insertions(+), 6 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/cq.c b/drivers/infiniband/hw/mlx5/cq.c
index 32c05730dfe9..0c18cb6a2f14 100644
--- a/drivers/infiniband/hw/mlx5/cq.c
+++ b/drivers/infiniband/hw/mlx5/cq.c
@@ -202,7 +202,7 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
 	case MLX5_CQE_RESP_WR_IMM:
 		wc->opcode	= IB_WC_RECV_RDMA_WITH_IMM;
 		wc->wc_flags	= IB_WC_WITH_IMM;
-		wc->ex.imm_data = cqe->imm_inval_pkey;
+		wc->ex.imm_data = cqe->immediate;
 		break;
 	case MLX5_CQE_RESP_SEND:
 		wc->opcode   = IB_WC_RECV;
@@ -214,12 +214,12 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
 	case MLX5_CQE_RESP_SEND_IMM:
 		wc->opcode	= IB_WC_RECV;
 		wc->wc_flags	= IB_WC_WITH_IMM;
-		wc->ex.imm_data = cqe->imm_inval_pkey;
+		wc->ex.imm_data = cqe->immediate;
 		break;
 	case MLX5_CQE_RESP_SEND_INV:
 		wc->opcode	= IB_WC_RECV;
 		wc->wc_flags	= IB_WC_WITH_INVALIDATE;
-		wc->ex.invalidate_rkey = be32_to_cpu(cqe->imm_inval_pkey);
+		wc->ex.invalidate_rkey = be32_to_cpu(cqe->inval_rkey);
 		break;
 	}
 	wc->src_qp	   = be32_to_cpu(cqe->flags_rqpn) & 0xffffff;
@@ -227,7 +227,7 @@ static void handle_responder(struct ib_wc *wc, struct mlx5_cqe64 *cqe,
 	g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3;
 	wc->wc_flags |= g ? IB_WC_GRH : 0;
 	if (unlikely(is_qp1(qp->ibqp.qp_type))) {
-		u16 pkey = be32_to_cpu(cqe->imm_inval_pkey) & 0xffff;
+		u16 pkey = be32_to_cpu(cqe->pkey) & 0xffff;
 
 		ib_find_cached_pkey(&dev->ib_dev, qp->port, pkey,
 				    &wc->pkey_index);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 12c5ca5b93ca..5b632434866f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -4891,7 +4891,7 @@ bool mlx5e_tc_rep_update_skb(struct mlx5_cqe64 *cqe,
 	reg_c0 = (be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK);
 	if (reg_c0 == MLX5_FS_DEFAULT_FLOW_TAG)
 		reg_c0 = 0;
-	reg_c1 = be32_to_cpu(cqe->imm_inval_pkey);
+	reg_c1 = be32_to_cpu(cqe->ft_metadata);
 
 	if (!reg_c0)
 		return true;
diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 7b57877e501e..746e17473d72 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -767,7 +767,12 @@ struct mlx5_cqe64 {
 	u8		l4_l3_hdr_type;
 	__be16		vlan_info;
 	__be32		srqn; /* [31:24]: lro_num_seg, [23:0]: srqn */
-	__be32		imm_inval_pkey;
+	union {
+		__be32 immediate;
+		__be32 inval_rkey;
+		__be32 pkey;
+		__be32 ft_metadata;
+	};
 	u8		rsvd40[4];
 	__be32		byte_cnt;
 	__be32		timestamp_h;
-- 
cgit v1.2.3-59-g8ed1b


From 06939536263d684073a30543930622eede633af1 Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@mellanox.com>
Date: Fri, 24 Apr 2020 12:45:06 -0700
Subject: net/mlx5: Add structure layout and defines for MFRL register

Add needed structure layouts and defines for MFRL (Management Firmware
Reset Level) register. This structure will be used for the firmware
upgrade and reset flow in the downstream patches.

Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/driver.h   |  1 +
 include/linux/mlx5/mlx5_ifc.h | 24 ++++++++++++++++++++++++
 2 files changed, 25 insertions(+)

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index b46537a81703..d82dbbab8179 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -130,6 +130,7 @@ enum {
 	MLX5_REG_NODE_DESC	 = 0x6001,
 	MLX5_REG_HOST_ENDIANNESS = 0x7004,
 	MLX5_REG_MCIA		 = 0x9014,
+	MLX5_REG_MFRL		 = 0x9028,
 	MLX5_REG_MLCR		 = 0x902b,
 	MLX5_REG_MTRC_CAP	 = 0x9040,
 	MLX5_REG_MTRC_CONF	 = 0x9041,
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index cf971d341189..9e6a3cec1e32 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -9703,6 +9703,29 @@ struct mlx5_ifc_mcda_reg_bits {
 	u8         data[0][0x20];
 };
 
+enum {
+	MLX5_MFRL_REG_RESET_TYPE_FULL_CHIP = BIT(0),
+	MLX5_MFRL_REG_RESET_TYPE_NET_PORT_ALIVE = BIT(1),
+};
+
+enum {
+	MLX5_MFRL_REG_RESET_LEVEL0 = BIT(0),
+	MLX5_MFRL_REG_RESET_LEVEL3 = BIT(3),
+	MLX5_MFRL_REG_RESET_LEVEL6 = BIT(6),
+};
+
+struct mlx5_ifc_mfrl_reg_bits {
+	u8         reserved_at_0[0x20];
+
+	u8         reserved_at_20[0x2];
+	u8         pci_sync_for_fw_update_start[0x1];
+	u8         pci_sync_for_fw_update_resp[0x2];
+	u8         rst_type_sel[0x3];
+	u8         reserved_at_28[0x8];
+	u8         reset_type[0x8];
+	u8         reset_level[0x8];
+};
+
 struct mlx5_ifc_mirc_reg_bits {
 	u8         reserved_at_0[0x18];
 	u8         status_code[0x8];
@@ -9766,6 +9789,7 @@ union mlx5_ifc_ports_control_registers_document_bits {
 	struct mlx5_ifc_mcc_reg_bits mcc_reg;
 	struct mlx5_ifc_mcda_reg_bits mcda_reg;
 	struct mlx5_ifc_mirc_reg_bits mirc_reg;
+	struct mlx5_ifc_mfrl_reg_bits mfrl_reg;
 	u8         reserved_at_0[0x60e0];
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From 3df0107784ceb388039b1fe510a8c7b8816de8f0 Mon Sep 17 00:00:00 2001
From: Moshe Shemesh <moshe@mellanox.com>
Date: Fri, 24 Apr 2020 12:45:07 -0700
Subject: net/mlx5: Add structure and defines for pci sync for fw update event

Add needed structure layouts and defines for pci sync for fw update
event. The downstream patches will include event handlers for this event
type.

Signed-off-by: Moshe Shemesh <moshe@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/device.h   | 15 +++++++++++++++
 include/linux/mlx5/mlx5_ifc.h |  4 +++-
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index 746e17473d72..de93f0b67973 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -364,6 +364,7 @@ enum {
 enum {
 	MLX5_GENERAL_SUBTYPE_DELAY_DROP_TIMEOUT = 0x1,
 	MLX5_GENERAL_SUBTYPE_PCI_POWER_CHANGE_EVENT = 0x5,
+	MLX5_GENERAL_SUBTYPE_PCI_SYNC_FOR_FW_UPDATE_EVENT = 0x8,
 };
 
 enum {
@@ -689,6 +690,19 @@ struct mlx5_eqe_temp_warning {
 	__be64 sensor_warning_lsb;
 } __packed;
 
+#define SYNC_RST_STATE_MASK    0xf
+
+enum sync_rst_state_type {
+	MLX5_SYNC_RST_STATE_RESET_REQUEST	= 0x0,
+	MLX5_SYNC_RST_STATE_RESET_NOW		= 0x1,
+	MLX5_SYNC_RST_STATE_RESET_ABORT		= 0x2,
+};
+
+struct mlx5_eqe_sync_fw_update {
+	u8 reserved_at_0[3];
+	u8 sync_rst_state;
+};
+
 union ev_data {
 	__be32				raw[7];
 	struct mlx5_eqe_cmd		cmd;
@@ -707,6 +721,7 @@ union ev_data {
 	struct mlx5_eqe_dct             dct;
 	struct mlx5_eqe_temp_warning	temp_warning;
 	struct mlx5_eqe_xrq_err		xrq_err;
+	struct mlx5_eqe_sync_fw_update	sync_fw_update;
 } __packed;
 
 struct mlx5_eqe {
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 9e6a3cec1e32..058ded202b65 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1317,7 +1317,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         wol_p[0x1];
 
 	u8         stat_rate_support[0x10];
-	u8         reserved_at_1f0[0xc];
+	u8         reserved_at_1f0[0x1];
+	u8         pci_sync_for_fw_update_event[0x1];
+	u8         reserved_at_1f2[0xa];
 	u8         cqe_version[0x4];
 
 	u8         compact_address_vector[0x1];
-- 
cgit v1.2.3-59-g8ed1b


From ee5cdf7a5e8945372c7496e98de2b364e095b60b Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Fri, 24 Apr 2020 12:45:08 -0700
Subject: net/mlx5: Introduce TLS RX offload hardware bits

Add TLS RX offload related IFC hardware fields and enumerations.

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Reviewed-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Reviewed-by: Boris Pismenny <borisp@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/device.h   | 18 ++++++++++++++++--
 include/linux/mlx5/mlx5_ifc.h |  5 +++--
 2 files changed, 19 insertions(+), 4 deletions(-)

diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h
index de93f0b67973..1bc27aca648b 100644
--- a/include/linux/mlx5/device.h
+++ b/include/linux/mlx5/device.h
@@ -450,10 +450,12 @@ enum {
 
 enum {
 	MLX5_OPC_MOD_TLS_TIS_STATIC_PARAMS = 0x1,
+	MLX5_OPC_MOD_TLS_TIR_STATIC_PARAMS = 0x2,
 };
 
 enum {
 	MLX5_OPC_MOD_TLS_TIS_PROGRESS_PARAMS = 0x1,
+	MLX5_OPC_MOD_TLS_TIR_PROGRESS_PARAMS = 0x2,
 };
 
 enum {
@@ -764,7 +766,7 @@ struct mlx5_err_cqe {
 };
 
 struct mlx5_cqe64 {
-	u8		outer_l3_tunneled;
+	u8		tls_outer_l3_tunneled;
 	u8		rsvd0;
 	__be16		wqe_id;
 	u8		lro_tcppsh_abort_dupack;
@@ -854,7 +856,12 @@ static inline u8 get_cqe_l3_hdr_type(struct mlx5_cqe64 *cqe)
 
 static inline bool cqe_is_tunneled(struct mlx5_cqe64 *cqe)
 {
-	return cqe->outer_l3_tunneled & 0x1;
+	return cqe->tls_outer_l3_tunneled & 0x1;
+}
+
+static inline u8 get_cqe_tls_offload(struct mlx5_cqe64 *cqe)
+{
+	return (cqe->tls_outer_l3_tunneled >> 3) & 0x3;
 }
 
 static inline bool cqe_has_vlan(struct mlx5_cqe64 *cqe)
@@ -942,6 +949,13 @@ enum {
 	CQE_L4_OK	= 1 << 2,
 };
 
+enum {
+	CQE_TLS_OFFLOAD_NOT_DECRYPTED		= 0x0,
+	CQE_TLS_OFFLOAD_DECRYPTED		= 0x1,
+	CQE_TLS_OFFLOAD_RESYNC			= 0x2,
+	CQE_TLS_OFFLOAD_ERROR			= 0x3,
+};
+
 struct mlx5_sig_err_cqe {
 	u8		rsvd0[16];
 	__be32		expected_trans_sig;
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 058ded202b65..6a6bb5dc7916 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1491,7 +1491,7 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 
 	u8         reserved_at_480[0x1];
 	u8         tls_tx[0x1];
-	u8         reserved_at_482[0x1];
+	u8         tls_rx[0x1];
 	u8         log_max_l2_table[0x5];
 	u8         reserved_at_488[0x8];
 	u8         log_uar_page_sz[0x10];
@@ -3136,7 +3136,8 @@ struct mlx5_ifc_tirc_bits {
 	u8         reserved_at_0[0x20];
 
 	u8         disp_type[0x4];
-	u8         reserved_at_24[0x1c];
+	u8         tls_en[0x1];
+	u8         reserved_at_25[0x1b];
 
 	u8         reserved_at_40[0x40];
 
-- 
cgit v1.2.3-59-g8ed1b


From 0e1533bb9cce2c6b2aecdfddfcc0de3beeaddc7b Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Fri, 24 Apr 2020 12:45:09 -0700
Subject: net/mlx5: Add release all pages capability bit

Add a bit in HCA capabilities layout to indicate if release all pages is
supported.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/mlx5_ifc.h | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index 6a6bb5dc7916..fb243848132d 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1244,7 +1244,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         reserved_at_130[0xa];
 	u8         log_max_ra_res_dc[0x6];
 
-	u8         reserved_at_140[0x9];
+	u8         reserved_at_140[0x6];
+	u8         release_all_pages[0x1];
+	u8         reserved_at_147[0x2];
 	u8         roce_accl[0x1];
 	u8         log_max_ra_req_qp[0x6];
 	u8         reserved_at_150[0xa];
-- 
cgit v1.2.3-59-g8ed1b


From 2dc8b5246d2c94f732c02e7a688d8a9c0c65361f Mon Sep 17 00:00:00 2001
From: Raed Salem <raeds@mellanox.com>
Date: Fri, 24 Apr 2020 12:45:10 -0700
Subject: net/mlx5: TX WQE Add trailer insertion field

Add new TX WQE field for Connect-X6DX trailer insertion support,
when set, the HW adds a trailer to the packet, the WQE trailer
association flags are used to set to HW the header which the
trailer belongs.

Signed-off-by: Raed Salem <raeds@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/qp.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index ef127a156a62..f23eb18526fe 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -229,6 +229,11 @@ enum {
 
 enum {
 	MLX5_ETH_WQE_SVLAN              = 1 << 0,
+	MLX5_ETH_WQE_TRAILER_HDR_OUTER_IP_ASSOC = 1 << 26,
+	MLX5_ETH_WQE_TRAILER_HDR_OUTER_L4_ASSOC = 1 << 27,
+	MLX5_ETH_WQE_TRAILER_HDR_INNER_IP_ASSOC = 3 << 26,
+	MLX5_ETH_WQE_TRAILER_HDR_INNER_L4_ASSOC = 1 << 28,
+	MLX5_ETH_WQE_INSERT_TRAILER     = 1 << 30,
 	MLX5_ETH_WQE_INSERT_VLAN        = 1 << 15,
 };
 
@@ -257,6 +262,7 @@ struct mlx5_wqe_eth_seg {
 			__be16 type;
 			__be16 vlan_tci;
 		} insert;
+		__be32 trailer;
 	};
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From a6bbdf2e750f245d219d39f3c3d06ace2c5871e6 Mon Sep 17 00:00:00 2001
From: Zou Wei <zou_wei@huawei.com>
Date: Tue, 28 Apr 2020 17:07:09 +0800
Subject: libbpf: Remove unneeded semicolon in btf_dump_emit_type

Fixes the following coccicheck warning:

 tools/lib/bpf/btf_dump.c:661:4-5: Unneeded semicolon

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zou Wei <zou_wei@huawei.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/1588064829-70613-1-git-send-email-zou_wei@huawei.com
---
 tools/lib/bpf/btf_dump.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/lib/bpf/btf_dump.c b/tools/lib/bpf/btf_dump.c
index 0c28ee82834b..de07e559a11d 100644
--- a/tools/lib/bpf/btf_dump.c
+++ b/tools/lib/bpf/btf_dump.c
@@ -658,7 +658,7 @@ static void btf_dump_emit_type(struct btf_dump *d, __u32 id, __u32 cont_id)
 			if (!btf_dump_is_blacklisted(d, id)) {
 				btf_dump_emit_typedef_def(d, id, t, 0);
 				btf_dump_printf(d, ";\n\n");
-			};
+			}
 			tstate->fwd_emitted = 1;
 			break;
 		default:
-- 
cgit v1.2.3-59-g8ed1b


From 11dd74b338bf83f8bca70b57bad33a903fedfa6e Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Mon, 27 Apr 2020 13:56:45 -0700
Subject: net: ipv6: new arg skip_notify to ip6_rt_del

Used in subsequent work to skip route delete
notifications on nexthop deletes.

Suggested-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_route.h  |  2 +-
 include/net/ipv6_stubs.h |  2 +-
 net/ipv4/nexthop.c       |  2 +-
 net/ipv6/addrconf.c      | 12 ++++++------
 net/ipv6/addrconf_core.c |  3 ++-
 net/ipv6/anycast.c       |  4 ++--
 net/ipv6/ndisc.c         |  2 +-
 net/ipv6/route.c         | 11 +++++++----
 8 files changed, 21 insertions(+), 17 deletions(-)

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index 9947eb1e9eb6..e525f003e619 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -123,7 +123,7 @@ int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg);
 int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
 		  struct netlink_ext_ack *extack);
 int ip6_ins_rt(struct net *net, struct fib6_info *f6i);
-int ip6_del_rt(struct net *net, struct fib6_info *f6i);
+int ip6_del_rt(struct net *net, struct fib6_info *f6i, bool skip_notify);
 
 void rt6_flush_exceptions(struct fib6_info *f6i);
 void rt6_age_exceptions(struct fib6_info *f6i, struct fib6_gc_args *gc_args,
diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h
index 3e7d2c0e79ca..a5f7c12c326a 100644
--- a/include/net/ipv6_stubs.h
+++ b/include/net/ipv6_stubs.h
@@ -48,7 +48,7 @@ struct ipv6_stub {
 			    struct netlink_ext_ack *extack);
 	void (*fib6_nh_release)(struct fib6_nh *fib6_nh);
 	void (*fib6_update_sernum)(struct net *net, struct fib6_info *rt);
-	int (*ip6_del_rt)(struct net *net, struct fib6_info *rt);
+	int (*ip6_del_rt)(struct net *net, struct fib6_info *rt, bool skip_notify);
 	void (*fib6_rt_update)(struct net *net, struct fib6_info *rt,
 			       struct nl_info *info);
 
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index fdfca534d094..9999687ad6dc 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -784,7 +784,7 @@ static void __remove_nexthop_fib(struct net *net, struct nexthop *nh)
 	list_for_each_entry_safe(f6i, tmp, &nh->f6i_list, nh_list) {
 		/* __ip6_del_rt does a release, so do a hold here */
 		fib6_info_hold(f6i);
-		ipv6_stub->ip6_del_rt(net, f6i);
+		ipv6_stub->ip6_del_rt(net, f6i, false);
 	}
 }
 
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 27b4fb6e452b..2c4f20ec1e2a 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -1238,7 +1238,7 @@ cleanup_prefix_route(struct inet6_ifaddr *ifp, unsigned long expires,
 					ifp->idev->dev, 0, RTF_DEFAULT, true);
 	if (f6i) {
 		if (del_rt)
-			ip6_del_rt(dev_net(ifp->idev->dev), f6i);
+			ip6_del_rt(dev_net(ifp->idev->dev), f6i, false);
 		else {
 			if (!(f6i->fib6_flags & RTF_EXPIRES))
 				fib6_set_expires(f6i, expires);
@@ -2718,7 +2718,7 @@ void addrconf_prefix_rcv(struct net_device *dev, u8 *opt, int len, bool sllao)
 		if (rt) {
 			/* Autoconf prefix route */
 			if (valid_lft == 0) {
-				ip6_del_rt(net, rt);
+				ip6_del_rt(net, rt, false);
 				rt = NULL;
 			} else if (addrconf_finite_timeout(rt_expires)) {
 				/* not infinity */
@@ -3813,7 +3813,7 @@ restart:
 		spin_unlock_bh(&ifa->lock);
 
 		if (rt)
-			ip6_del_rt(net, rt);
+			ip6_del_rt(net, rt, false);
 
 		if (state != INET6_IFADDR_STATE_DEAD) {
 			__ipv6_ifa_notify(RTM_DELADDR, ifa);
@@ -4652,7 +4652,7 @@ static int modify_prefix_route(struct inet6_ifaddr *ifp,
 	prio = ifp->rt_priority ? : IP6_RT_PRIO_ADDRCONF;
 	if (f6i->fib6_metric != prio) {
 		/* delete old one */
-		ip6_del_rt(dev_net(ifp->idev->dev), f6i);
+		ip6_del_rt(dev_net(ifp->idev->dev), f6i, false);
 
 		/* add new one */
 		addrconf_prefix_route(modify_peer ? &ifp->peer_addr : &ifp->addr,
@@ -6073,10 +6073,10 @@ static void __ipv6_ifa_notify(int event, struct inet6_ifaddr *ifp)
 						       ifp->idev->dev, 0, 0,
 						       false);
 			if (rt)
-				ip6_del_rt(net, rt);
+				ip6_del_rt(net, rt, false);
 		}
 		if (ifp->rt) {
-			ip6_del_rt(net, ifp->rt);
+			ip6_del_rt(net, ifp->rt, false);
 			ifp->rt = NULL;
 		}
 		rt_genid_bump_ipv6(net);
diff --git a/net/ipv6/addrconf_core.c b/net/ipv6/addrconf_core.c
index ea00ce3d4117..9ebf3fe0d2b1 100644
--- a/net/ipv6/addrconf_core.c
+++ b/net/ipv6/addrconf_core.c
@@ -185,7 +185,8 @@ static int eafnosupport_fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
 	return -EAFNOSUPPORT;
 }
 
-static int eafnosupport_ip6_del_rt(struct net *net, struct fib6_info *rt)
+static int eafnosupport_ip6_del_rt(struct net *net, struct fib6_info *rt,
+				   bool skip_notify)
 {
 	return -EAFNOSUPPORT;
 }
diff --git a/net/ipv6/anycast.c b/net/ipv6/anycast.c
index fed91ab7ec46..893261230ffc 100644
--- a/net/ipv6/anycast.c
+++ b/net/ipv6/anycast.c
@@ -364,7 +364,7 @@ int __ipv6_dev_ac_dec(struct inet6_dev *idev, const struct in6_addr *addr)
 	ipv6_del_acaddr_hash(aca);
 	addrconf_leave_solict(idev, &aca->aca_addr);
 
-	ip6_del_rt(dev_net(idev->dev), aca->aca_rt);
+	ip6_del_rt(dev_net(idev->dev), aca->aca_rt, false);
 
 	aca_put(aca);
 	return 0;
@@ -393,7 +393,7 @@ void ipv6_ac_destroy_dev(struct inet6_dev *idev)
 
 		addrconf_leave_solict(idev, &aca->aca_addr);
 
-		ip6_del_rt(dev_net(idev->dev), aca->aca_rt);
+		ip6_del_rt(dev_net(idev->dev), aca->aca_rt, false);
 
 		aca_put(aca);
 
diff --git a/net/ipv6/ndisc.c b/net/ipv6/ndisc.c
index 1ecd4e9b0bdf..2d09c4da03ee 100644
--- a/net/ipv6/ndisc.c
+++ b/net/ipv6/ndisc.c
@@ -1302,7 +1302,7 @@ static void ndisc_router_discovery(struct sk_buff *skb)
 		}
 	}
 	if (rt && lifetime == 0) {
-		ip6_del_rt(net, rt);
+		ip6_del_rt(net, rt, false);
 		rt = NULL;
 	}
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 310cbddaa533..486c36a14f24 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -984,7 +984,7 @@ int rt6_route_rcv(struct net_device *dev, u8 *opt, int len,
 					gwaddr, dev);
 
 	if (rt && !lifetime) {
-		ip6_del_rt(net, rt);
+		ip6_del_rt(net, rt, false);
 		rt = NULL;
 	}
 
@@ -3729,9 +3729,12 @@ out:
 	return err;
 }
 
-int ip6_del_rt(struct net *net, struct fib6_info *rt)
+int ip6_del_rt(struct net *net, struct fib6_info *rt, bool skip_notify)
 {
-	struct nl_info info = { .nl_net = net };
+	struct nl_info info = {
+		.nl_net = net,
+		.skip_notify = skip_notify
+	};
 
 	return __ip6_del_rt(rt, &info);
 }
@@ -4252,7 +4255,7 @@ restart:
 		    (!idev || idev->cnf.accept_ra != 2) &&
 		    fib6_info_hold_safe(rt)) {
 			rcu_read_unlock();
-			ip6_del_rt(net, rt);
+			ip6_del_rt(net, rt, false);
 			goto restart;
 		}
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 4f80116d3df3b23ee4b83ea8557629e1799bc230 Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Mon, 27 Apr 2020 13:56:46 -0700
Subject: net: ipv4: add sysctl for nexthop api compatibility mode

Current route nexthop API maintains user space compatibility
with old route API by default. Dumps and netlink notifications
support both new and old API format. In systems which have
moved to the new API, this compatibility mode cancels some
of the performance benefits provided by the new nexthop API.

This patch adds new sysctl nexthop_compat_mode which is on
by default but provides the ability to turn off compatibility
mode allowing systems to run entirely with the new routing
API. Old route API behaviour and support is not modified by this
sysctl.

Uses a single sysctl to cover both ipv4 and ipv6 following
other sysctls. Covers dumps and delete notifications as
suggested by David Ahern.

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.txt | 12 ++++++++++++
 include/net/netns/ipv4.h               |  2 ++
 net/ipv4/af_inet.c                     |  1 +
 net/ipv4/fib_semantics.c               |  3 +++
 net/ipv4/nexthop.c                     |  5 +++--
 net/ipv4/sysctl_net_ipv4.c             |  9 +++++++++
 net/ipv6/route.c                       |  3 ++-
 7 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
index 9375324aa8e1..5cdc37c34830 100644
--- a/Documentation/networking/ip-sysctl.txt
+++ b/Documentation/networking/ip-sysctl.txt
@@ -1560,6 +1560,18 @@ skip_notify_on_dev_down - BOOLEAN
 	on userspace caches to track link events and evict routes.
 	Default: false (generate message)
 
+nexthop_compat_mode - BOOLEAN
+	New nexthop API provides a means for managing nexthops independent of
+	prefixes. Backwards compatibilty with old route format is enabled by
+	default which means route dumps and notifications contain the new
+	nexthop attribute but also the full, expanded nexthop definition.
+	Further, updates or deletes of a nexthop configuration generate route
+	notifications for each fib entry using the nexthop. Once a system
+	understands the new API, this sysctl can be disabled to achieve full
+	performance benefits of the new API by disabling the nexthop expansion
+	and extraneous notifications.
+	Default: true (backward compat mode)
+
 IPv6 Fragmentation:
 
 ip6frag_high_thresh - INTEGER
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 154b8f01499b..5acdb4d414c4 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -111,6 +111,8 @@ struct netns_ipv4 {
 	int sysctl_tcp_early_demux;
 	int sysctl_udp_early_demux;
 
+	int sysctl_nexthop_compat_mode;
+
 	int sysctl_fwmark_reflect;
 	int sysctl_tcp_fwmark_accept;
 #ifdef CONFIG_NET_L3_MASTER_DEV
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index c618e242490f..6177c4ba0037 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -1835,6 +1835,7 @@ static __net_init int inet_init_net(struct net *net)
 	net->ipv4.sysctl_ip_early_demux = 1;
 	net->ipv4.sysctl_udp_early_demux = 1;
 	net->ipv4.sysctl_tcp_early_demux = 1;
+	net->ipv4.sysctl_nexthop_compat_mode = 1;
 #ifdef CONFIG_SYSCTL
 	net->ipv4.sysctl_ip_prot_sock = PROT_SOCK;
 #endif
diff --git a/net/ipv4/fib_semantics.c b/net/ipv4/fib_semantics.c
index 55ca2e521828..e53871e4a097 100644
--- a/net/ipv4/fib_semantics.c
+++ b/net/ipv4/fib_semantics.c
@@ -1780,6 +1780,8 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
 			goto nla_put_failure;
 		if (nexthop_is_blackhole(fi->nh))
 			rtm->rtm_type = RTN_BLACKHOLE;
+		if (!fi->fib_net->ipv4.sysctl_nexthop_compat_mode)
+			goto offload;
 	}
 
 	if (nhs == 1) {
@@ -1805,6 +1807,7 @@ int fib_dump_info(struct sk_buff *skb, u32 portid, u32 seq, int event,
 			goto nla_put_failure;
 	}
 
+offload:
 	if (fri->offload)
 		rtm->rtm_flags |= RTM_F_OFFLOAD;
 	if (fri->trap)
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 9999687ad6dc..3957364d556c 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -784,7 +784,8 @@ static void __remove_nexthop_fib(struct net *net, struct nexthop *nh)
 	list_for_each_entry_safe(f6i, tmp, &nh->f6i_list, nh_list) {
 		/* __ip6_del_rt does a release, so do a hold here */
 		fib6_info_hold(f6i);
-		ipv6_stub->ip6_del_rt(net, f6i, false);
+		ipv6_stub->ip6_del_rt(net, f6i,
+				      !net->ipv4.sysctl_nexthop_compat_mode);
 	}
 }
 
@@ -1041,7 +1042,7 @@ out:
 	if (!rc) {
 		nh_base_seq_inc(net);
 		nexthop_notify(RTM_NEWNEXTHOP, new_nh, &cfg->nlinfo);
-		if (replace_notify)
+		if (replace_notify && net->ipv4.sysctl_nexthop_compat_mode)
 			nexthop_replace_notify(net, new_nh, &cfg->nlinfo);
 	}
 
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 81b267e990a1..95ad71e76cc3 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -710,6 +710,15 @@ static struct ctl_table ipv4_net_table[] = {
 		.mode           = 0644,
 		.proc_handler   = proc_tcp_early_demux
 	},
+	{
+		.procname       = "nexthop_compat_mode",
+		.data           = &init_net.ipv4.sysctl_nexthop_compat_mode,
+		.maxlen         = sizeof(int),
+		.mode           = 0644,
+		.proc_handler   = proc_dointvec_minmax,
+		.extra1		= SYSCTL_ZERO,
+		.extra2		= SYSCTL_ONE,
+	},
 	{
 		.procname	= "ip_default_ttl",
 		.data		= &init_net.ipv4.sysctl_ip_default_ttl,
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 486c36a14f24..803212aae4ca 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -5557,7 +5557,8 @@ static int rt6_fill_node(struct net *net, struct sk_buff *skb,
 		if (nexthop_is_blackhole(rt->nh))
 			rtm->rtm_type = RTN_BLACKHOLE;
 
-		if (rt6_fill_node_nexthop(skb, rt->nh, &nh_flags) < 0)
+		if (net->ipv4.sysctl_nexthop_compat_mode &&
+		    rt6_fill_node_nexthop(skb, rt->nh, &nh_flags) < 0)
 			goto nla_put_failure;
 
 		rtm->rtm_flags |= nh_flags;
-- 
cgit v1.2.3-59-g8ed1b


From 4dddb5be136a7b151c11f0fbe350feff75a89867 Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Mon, 27 Apr 2020 13:56:47 -0700
Subject: selftests: net: add new testcases for nexthop API compat mode sysctl

New tests to check route dump and notifications with
net.ipv4.nexthop_compat_mode on and off.

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/fib_nexthops.sh | 198 +++++++++++++++++++++++++++-
 1 file changed, 196 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index b785241127df..dd0e5fec6367 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -19,8 +19,8 @@ ret=0
 ksft_skip=4
 
 # all tests in this script. Can be overridden with -t option
-IPV4_TESTS="ipv4_fcnal ipv4_grp_fcnal ipv4_withv6_fcnal ipv4_fcnal_runtime"
-IPV6_TESTS="ipv6_fcnal ipv6_grp_fcnal ipv6_fcnal_runtime"
+IPV4_TESTS="ipv4_fcnal ipv4_grp_fcnal ipv4_withv6_fcnal ipv4_fcnal_runtime ipv4_compat_mode"
+IPV6_TESTS="ipv6_fcnal ipv6_grp_fcnal ipv6_fcnal_runtime ipv6_compat_mode"
 
 ALL_TESTS="basic ${IPV4_TESTS} ${IPV6_TESTS}"
 TESTS="${ALL_TESTS}"
@@ -253,6 +253,33 @@ check_route6()
 	check_output "${out}" "${expected}"
 }
 
+start_ip_monitor()
+{
+	local mtype=$1
+
+	# start the monitor in the background
+	tmpfile=`mktemp /var/run/nexthoptestXXX`
+	mpid=`($IP monitor $mtype > $tmpfile & echo $!) 2>/dev/null`
+	sleep 0.2
+	echo "$mpid $tmpfile"
+}
+
+stop_ip_monitor()
+{
+	local mpid=$1
+	local tmpfile=$2
+	local el=$3
+
+	# check the monitor results
+	kill $mpid
+	lines=`wc -l $tmpfile | cut "-d " -f1`
+	test $lines -eq $el
+	rc=$?
+	rm -rf $tmpfile
+
+	return $rc
+}
+
 ################################################################################
 # basic operations (add, delete, replace) on nexthops and nexthop groups
 #
@@ -883,6 +910,173 @@ ipv4_fcnal_runtime()
 	log_test $? 0 "IPv4 route with MPLS encap, v6 gw - check"
 }
 
+sysctl_nexthop_compat_mode_check()
+{
+	local sysctlname="net.ipv4.nexthop_compat_mode"
+	local lprefix=$1
+
+	IPE="ip netns exec me"
+
+	$IPE sysctl -q $sysctlname 2>&1 >/dev/null
+	if [ $? -ne 0 ]; then
+		echo "SKIP: kernel lacks nexthop compat mode sysctl control"
+		return $ksft_skip
+	fi
+
+	out=$($IPE sysctl $sysctlname 2>/dev/null)
+	log_test $? 0 "$lprefix default nexthop compat mode check"
+	check_output "${out}" "$sysctlname = 1"
+}
+
+sysctl_nexthop_compat_mode_set()
+{
+	local sysctlname="net.ipv4.nexthop_compat_mode"
+	local mode=$1
+	local lprefix=$2
+
+	IPE="ip netns exec me"
+
+	out=$($IPE sysctl -w $sysctlname=$mode)
+	log_test $? 0 "$lprefix set compat mode - $mode"
+	check_output "${out}" "net.ipv4.nexthop_compat_mode = $mode"
+}
+
+ipv6_compat_mode()
+{
+	local rc
+
+	echo
+	echo "IPv6 nexthop api compat mode test"
+	echo "--------------------------------"
+
+	sysctl_nexthop_compat_mode_check "IPv6"
+	if [ $? -eq $ksft_skip ]; then
+		return $ksft_skip
+	fi
+
+	run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1"
+	run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+	run_cmd "$IP nexthop add id 122 group 62/63"
+	ipmout=$(start_ip_monitor route)
+
+	run_cmd "$IP -6 ro add 2001:db8:101::1/128 nhid 122"
+	# route add notification should contain expanded nexthops
+	stop_ip_monitor $ipmout 3
+	log_test $? 0 "IPv6 compat mode on - route add notification"
+
+	# route dump should contain expanded nexthops
+	check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 122 metric 1024 pref medium nexthop via 2001:db8:91::2 dev veth1 weight 1 nexthop via 2001:db8:91::3 dev veth1 weight 1"
+	log_test $? 0 "IPv6 compat mode on - route dump"
+
+	# change in nexthop group should generate route notification
+	run_cmd "$IP nexthop add id 64 via 2001:db8:91::4 dev veth1"
+	ipmout=$(start_ip_monitor route)
+	run_cmd "$IP nexthop replace id 122 group 62/64"
+	stop_ip_monitor $ipmout 3
+
+	log_test $? 0 "IPv6 compat mode on - nexthop change"
+
+	# set compat mode off
+	sysctl_nexthop_compat_mode_set 0 "IPv6"
+
+	run_cmd "$IP -6 ro del 2001:db8:101::1/128 nhid 122"
+
+	run_cmd "$IP nexthop add id 62 via 2001:db8:91::2 dev veth1"
+	run_cmd "$IP nexthop add id 63 via 2001:db8:91::3 dev veth1"
+	run_cmd "$IP nexthop add id 122 group 62/63"
+	ipmout=$(start_ip_monitor route)
+
+	run_cmd "$IP -6 ro add 2001:db8:101::1/128 nhid 122"
+	# route add notification should not contain expanded nexthops
+	stop_ip_monitor $ipmout 1
+	log_test $? 0 "IPv6 compat mode off - route add notification"
+
+	# route dump should not contain expanded nexthops
+	check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 122 metric 1024 pref medium"
+	log_test $? 0 "IPv6 compat mode off - route dump"
+
+	# change in nexthop group should not generate route notification
+	run_cmd "$IP nexthop add id 64 via 2001:db8:91::4 dev veth1"
+	ipmout=$(start_ip_monitor route)
+	run_cmd "$IP nexthop replace id 122 group 62/64"
+	stop_ip_monitor $ipmout 0
+	log_test $? 0 "IPv6 compat mode off - nexthop change"
+
+	# nexthop delete should not generate route notification
+	ipmout=$(start_ip_monitor route)
+	run_cmd "$IP nexthop del id 122"
+	stop_ip_monitor $ipmout 0
+	log_test $? 0 "IPv6 compat mode off - nexthop delete"
+
+	# set compat mode back on
+	sysctl_nexthop_compat_mode_set 1 "IPv6"
+}
+
+ipv4_compat_mode()
+{
+	local rc
+
+	echo
+	echo "IPv4 nexthop api compat mode"
+	echo "----------------------------"
+
+	sysctl_nexthop_compat_mode_check "IPv4"
+	if [ $? -eq $ksft_skip ]; then
+		return $ksft_skip
+	fi
+
+	run_cmd "$IP nexthop add id 21 via 172.16.1.2 dev veth1"
+	run_cmd "$IP nexthop add id 22 via 172.16.1.2 dev veth1"
+	run_cmd "$IP nexthop add id 122 group 21/22"
+	ipmout=$(start_ip_monitor route)
+
+	run_cmd "$IP ro add 172.16.101.1/32 nhid 122"
+	stop_ip_monitor $ipmout 3
+
+	# route add notification should contain expanded nexthops
+	log_test $? 0 "IPv4 compat mode on - route add notification"
+
+	# route dump should contain expanded nexthops
+	check_route "172.16.101.1" "172.16.101.1 nhid 122 nexthop via 172.16.1.2 dev veth1 weight 1 nexthop via 172.16.1.2 dev veth1 weight 1"
+	log_test $? 0 "IPv4 compat mode on - route dump"
+
+	# change in nexthop group should generate route notification
+	run_cmd "$IP nexthop add id 23 via 172.16.1.3 dev veth1"
+	ipmout=$(start_ip_monitor route)
+	run_cmd "$IP nexthop replace id 122 group 21/23"
+	stop_ip_monitor $ipmout 3
+	log_test $? 0 "IPv4 compat mode on - nexthop change"
+
+	sysctl_nexthop_compat_mode_set 0 "IPv4"
+
+	# cleanup
+	run_cmd "$IP ro del 172.16.101.1/32 nhid 122"
+
+	ipmout=$(start_ip_monitor route)
+	run_cmd "$IP ro add 172.16.101.1/32 nhid 122"
+	stop_ip_monitor $ipmout 1
+	# route add notification should not contain expanded nexthops
+	log_test $? 0 "IPv4 compat mode off - route add notification"
+
+	# route dump should not contain expanded nexthops
+	check_route "172.16.101.1" "172.16.101.1 nhid 122"
+	log_test $? 0 "IPv4 compat mode off - route dump"
+
+	# change in nexthop group should not generate route notification
+	ipmout=$(start_ip_monitor route)
+	run_cmd "$IP nexthop replace id 122 group 21/22"
+	stop_ip_monitor $ipmout 0
+	log_test $? 0 "IPv4 compat mode off - nexthop change"
+
+	# nexthop delete should not generate route notification
+	ipmout=$(start_ip_monitor route)
+	run_cmd "$IP nexthop del id 122"
+	stop_ip_monitor $ipmout 0
+	log_test $? 0 "IPv4 compat mode off - nexthop delete"
+
+	sysctl_nexthop_compat_mode_set 1 "IPv4"
+}
+
 basic()
 {
 	echo
-- 
cgit v1.2.3-59-g8ed1b


From 1a89595c2272aa9b4cd3fda562545dc1d9cd89ed Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Mon, 27 Apr 2020 18:03:47 -0700
Subject: kselftest: factor out list manipulation to a helper

Kees suggest to factor out the list append code to a macro,
since following commits need it, which leads to code duplication.

Suggested-by: Kees Cook <keescook@chromium.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/kselftest_harness.h | 42 ++++++++++++++++-------------
 1 file changed, 24 insertions(+), 18 deletions(-)

diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index 2bb8c81fc0b4..77f754854f0d 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -631,6 +631,29 @@
 	} \
 } while (0); OPTIONAL_HANDLER(_assert)
 
+/* List helpers */
+#define __LIST_APPEND(head, item) \
+{ \
+	/* Circular linked list where only prev is circular. */ \
+	if (head == NULL) { \
+		head = item; \
+		item->next = NULL; \
+		item->prev = item; \
+		return;	\
+	} \
+	if (__constructor_order == _CONSTRUCTOR_ORDER_FORWARD) { \
+		item->next = NULL; \
+		item->prev = head->prev; \
+		item->prev->next = item; \
+		head->prev = item; \
+	} else { \
+		item->next = head; \
+		item->next->prev = item; \
+		item->prev = item; \
+		head = item; \
+	} \
+}
+
 /* Contains all the information for test execution and status checking. */
 struct __test_metadata {
 	const char *name;
@@ -667,24 +690,7 @@ static int __constructor_order;
 static inline void __register_test(struct __test_metadata *t)
 {
 	__test_count++;
-	/* Circular linked list where only prev is circular. */
-	if (__test_list == NULL) {
-		__test_list = t;
-		t->next = NULL;
-		t->prev = t;
-		return;
-	}
-	if (__constructor_order == _CONSTRUCTOR_ORDER_FORWARD) {
-		t->next = NULL;
-		t->prev = __test_list->prev;
-		t->prev->next = t;
-		__test_list->prev = t;
-	} else {
-		t->next = __test_list;
-		t->next->prev = t;
-		t->prev = t;
-		__test_list = t;
-	}
+	__LIST_APPEND(__test_list, t);
 }
 
 static inline int __bail(int for_realz, bool no_print, __u8 step)
-- 
cgit v1.2.3-59-g8ed1b


From 142aca6b388c8ab83dc41bd71150cb23115bd285 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Mon, 27 Apr 2020 18:03:48 -0700
Subject: kselftest: create fixture objects

Grouping tests by fixture will allow us to parametrize
test runs. Create full objects for fixtures.

Add a "global" fixture for tests without a fixture.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/kselftest_harness.h | 51 +++++++++++++++++++++--------
 1 file changed, 38 insertions(+), 13 deletions(-)

diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index 77f754854f0d..de283fd6fc4d 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -169,8 +169,10 @@
 #define __TEST_IMPL(test_name, _signal) \
 	static void test_name(struct __test_metadata *_metadata); \
 	static struct __test_metadata _##test_name##_object = \
-		{ .name = "global." #test_name, \
-		  .fn = &test_name, .termsig = _signal, \
+		{ .name = #test_name, \
+		  .fn = &test_name, \
+		  .fixture = &_fixture_global, \
+		  .termsig = _signal, \
 		  .timeout = TEST_TIMEOUT_DEFAULT, }; \
 	static void __attribute__((constructor)) _register_##test_name(void) \
 	{ \
@@ -212,10 +214,12 @@
  * populated and cleaned up using FIXTURE_SETUP() and FIXTURE_TEARDOWN().
  */
 #define FIXTURE(fixture_name) \
+	static struct __fixture_metadata _##fixture_name##_fixture_object = \
+		{ .name =  #fixture_name, }; \
 	static void __attribute__((constructor)) \
 	_register_##fixture_name##_data(void) \
 	{ \
-		__fixture_count++; \
+		__register_fixture(&_##fixture_name##_fixture_object); \
 	} \
 	FIXTURE_DATA(fixture_name)
 
@@ -309,8 +313,9 @@
 	} \
 	static struct __test_metadata \
 		      _##fixture_name##_##test_name##_object = { \
-		.name = #fixture_name "." #test_name, \
+		.name = #test_name, \
 		.fn = &wrapper_##fixture_name##_##test_name, \
+		.fixture = &_##fixture_name##_fixture_object, \
 		.termsig = signal, \
 		.timeout = tmout, \
 	 }; \
@@ -654,11 +659,34 @@
 	} \
 }
 
+/* Contains all the information about a fixture. */
+struct __fixture_metadata {
+	const char *name;
+	struct __fixture_metadata *prev, *next;
+} _fixture_global __attribute__((unused)) = {
+	.name = "global",
+	.prev = &_fixture_global,
+};
+
+static struct __fixture_metadata *__fixture_list = &_fixture_global;
+static unsigned int __fixture_count;
+static int __constructor_order;
+
+#define _CONSTRUCTOR_ORDER_FORWARD   1
+#define _CONSTRUCTOR_ORDER_BACKWARD -1
+
+static inline void __register_fixture(struct __fixture_metadata *f)
+{
+	__fixture_count++;
+	__LIST_APPEND(__fixture_list, f);
+}
+
 /* Contains all the information for test execution and status checking. */
 struct __test_metadata {
 	const char *name;
 	void (*fn)(struct __test_metadata *);
 	pid_t pid;	/* pid of test when being run */
+	struct __fixture_metadata *fixture;
 	int termsig;
 	int passed;
 	int trigger; /* extra handler after the evaluation */
@@ -672,11 +700,6 @@ struct __test_metadata {
 /* Storage for the (global) tests to be run. */
 static struct __test_metadata *__test_list;
 static unsigned int __test_count;
-static unsigned int __fixture_count;
-static int __constructor_order;
-
-#define _CONSTRUCTOR_ORDER_FORWARD   1
-#define _CONSTRUCTOR_ORDER_BACKWARD -1
 
 /*
  * Since constructors are called in reverse order, reverse the test
@@ -796,11 +819,12 @@ void __wait_for_test(struct __test_metadata *t)
 	}
 }
 
-void __run_test(struct __test_metadata *t)
+void __run_test(struct __fixture_metadata *f,
+		struct __test_metadata *t)
 {
 	t->passed = 1;
 	t->trigger = 0;
-	printf("[ RUN      ] %s\n", t->name);
+	printf("[ RUN      ] %s.%s\n", f->name, t->name);
 	t->pid = fork();
 	if (t->pid < 0) {
 		printf("ERROR SPAWNING TEST CHILD\n");
@@ -812,7 +836,8 @@ void __run_test(struct __test_metadata *t)
 	} else {
 		__wait_for_test(t);
 	}
-	printf("[     %4s ] %s\n", (t->passed ? "OK" : "FAIL"), t->name);
+	printf("[     %4s ] %s.%s\n", (t->passed ? "OK" : "FAIL"),
+	       f->name, t->name);
 }
 
 static int test_harness_run(int __attribute__((unused)) argc,
@@ -828,7 +853,7 @@ static int test_harness_run(int __attribute__((unused)) argc,
 	       __test_count, __fixture_count + 1);
 	for (t = __test_list; t; t = t->next) {
 		count++;
-		__run_test(t);
+		__run_test(t->fixture, t);
 		if (t->passed)
 			pass_count++;
 		else
-- 
cgit v1.2.3-59-g8ed1b


From e7f304607778e31bfd8e6b00ce2a8f990b265e14 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Mon, 27 Apr 2020 18:03:49 -0700
Subject: kselftest: run tests by fixture

Now that all tests have a fixture object move from a global
list of tests to a list of tests per fixture.

Order of tests may change as we will now group and run test
fixture by fixture, rather than in declaration order.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/kselftest_harness.h | 23 ++++++++++++++---------
 1 file changed, 14 insertions(+), 9 deletions(-)

diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index de283fd6fc4d..fa7185e45472 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -659,9 +659,12 @@
 	} \
 }
 
+struct __test_metadata;
+
 /* Contains all the information about a fixture. */
 struct __fixture_metadata {
 	const char *name;
+	struct __test_metadata *tests;
 	struct __fixture_metadata *prev, *next;
 } _fixture_global __attribute__((unused)) = {
 	.name = "global",
@@ -698,7 +701,6 @@ struct __test_metadata {
 };
 
 /* Storage for the (global) tests to be run. */
-static struct __test_metadata *__test_list;
 static unsigned int __test_count;
 
 /*
@@ -713,7 +715,7 @@ static unsigned int __test_count;
 static inline void __register_test(struct __test_metadata *t)
 {
 	__test_count++;
-	__LIST_APPEND(__test_list, t);
+	__LIST_APPEND(t->fixture->tests, t);
 }
 
 static inline int __bail(int for_realz, bool no_print, __u8 step)
@@ -843,6 +845,7 @@ void __run_test(struct __fixture_metadata *f,
 static int test_harness_run(int __attribute__((unused)) argc,
 			    char __attribute__((unused)) **argv)
 {
+	struct __fixture_metadata *f;
 	struct __test_metadata *t;
 	int ret = 0;
 	unsigned int count = 0;
@@ -851,13 +854,15 @@ static int test_harness_run(int __attribute__((unused)) argc,
 	/* TODO(wad) add optional arguments similar to gtest. */
 	printf("[==========] Running %u tests from %u test cases.\n",
 	       __test_count, __fixture_count + 1);
-	for (t = __test_list; t; t = t->next) {
-		count++;
-		__run_test(t->fixture, t);
-		if (t->passed)
-			pass_count++;
-		else
-			ret = 1;
+	for (f = __fixture_list; f; f = f->next) {
+		for (t = f->tests; t; t = t->next) {
+			count++;
+			__run_test(f, t);
+			if (t->passed)
+				pass_count++;
+			else
+				ret = 1;
+		}
 	}
 	printf("[==========] %u / %u tests passed.\n", pass_count, count);
 	printf("[  %s  ]\n", (ret ? "FAILED" : "PASSED"));
-- 
cgit v1.2.3-59-g8ed1b


From 74bc7c97fa88ae334752e7b45702d23813df8873 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Mon, 27 Apr 2020 18:03:50 -0700
Subject: kselftest: add fixture variants

Allow users to build parameterized variants of fixtures.

If fixtures want variants, they call FIXTURE_VARIANT() to declare
the structure to fill for each variant. Each fixture will be re-run
for each of the variants defined by calling FIXTURE_VARIANT_ADD()
with the differing parameters initializing the structure.

Since tests are being re-run, additional initialization (steps,
no_print) is also added.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Acked-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/dev-tools/kselftest.rst       |   3 +-
 tools/testing/selftests/kselftest_harness.h | 148 +++++++++++++++++++++++-----
 2 files changed, 124 insertions(+), 27 deletions(-)

diff --git a/Documentation/dev-tools/kselftest.rst b/Documentation/dev-tools/kselftest.rst
index 61ae13c44f91..5d1f56fcd2e7 100644
--- a/Documentation/dev-tools/kselftest.rst
+++ b/Documentation/dev-tools/kselftest.rst
@@ -301,7 +301,8 @@ Helpers
 
 .. kernel-doc:: tools/testing/selftests/kselftest_harness.h
     :functions: TH_LOG TEST TEST_SIGNAL FIXTURE FIXTURE_DATA FIXTURE_SETUP
-                FIXTURE_TEARDOWN TEST_F TEST_HARNESS_MAIN
+                FIXTURE_TEARDOWN TEST_F TEST_HARNESS_MAIN FIXTURE_VARIANT
+                FIXTURE_VARIANT_ADD
 
 Operators
 ---------
diff --git a/tools/testing/selftests/kselftest_harness.h b/tools/testing/selftests/kselftest_harness.h
index fa7185e45472..c9f03ef93338 100644
--- a/tools/testing/selftests/kselftest_harness.h
+++ b/tools/testing/selftests/kselftest_harness.h
@@ -168,9 +168,15 @@
 
 #define __TEST_IMPL(test_name, _signal) \
 	static void test_name(struct __test_metadata *_metadata); \
+	static inline void wrapper_##test_name( \
+		struct __test_metadata *_metadata, \
+		struct __fixture_variant_metadata *variant) \
+	{ \
+		test_name(_metadata); \
+	} \
 	static struct __test_metadata _##test_name##_object = \
 		{ .name = #test_name, \
-		  .fn = &test_name, \
+		  .fn = &wrapper_##test_name, \
 		  .fixture = &_fixture_global, \
 		  .termsig = _signal, \
 		  .timeout = TEST_TIMEOUT_DEFAULT, }; \
@@ -214,6 +220,7 @@
  * populated and cleaned up using FIXTURE_SETUP() and FIXTURE_TEARDOWN().
  */
 #define FIXTURE(fixture_name) \
+	FIXTURE_VARIANT(fixture_name); \
 	static struct __fixture_metadata _##fixture_name##_fixture_object = \
 		{ .name =  #fixture_name, }; \
 	static void __attribute__((constructor)) \
@@ -245,7 +252,10 @@
 #define FIXTURE_SETUP(fixture_name) \
 	void fixture_name##_setup( \
 		struct __test_metadata __attribute__((unused)) *_metadata, \
-		FIXTURE_DATA(fixture_name) __attribute__((unused)) *self)
+		FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \
+		const FIXTURE_VARIANT(fixture_name) \
+			__attribute__((unused)) *variant)
+
 /**
  * FIXTURE_TEARDOWN(fixture_name)
  * *_metadata* is included so that EXPECT_* and ASSERT_* work correctly.
@@ -267,6 +277,59 @@
 		struct __test_metadata __attribute__((unused)) *_metadata, \
 		FIXTURE_DATA(fixture_name) __attribute__((unused)) *self)
 
+/**
+ * FIXTURE_VARIANT(fixture_name) - Optionally called once per fixture
+ * to declare fixture variant
+ *
+ * @fixture_name: fixture name
+ *
+ * .. code-block:: c
+ *
+ *     FIXTURE_VARIANT(datatype name) {
+ *       type property1;
+ *       ...
+ *     };
+ *
+ * Defines type of constant parameters provided to FIXTURE_SETUP() and TEST_F()
+ * as *variant*. Variants allow the same tests to be run with different
+ * arguments.
+ */
+#define FIXTURE_VARIANT(fixture_name) struct _fixture_variant_##fixture_name
+
+/**
+ * FIXTURE_VARIANT_ADD(fixture_name, variant_name) - Called once per fixture
+ * variant to setup and register the data
+ *
+ * @fixture_name: fixture name
+ * @variant_name: name of the parameter set
+ *
+ * .. code-block:: c
+ *
+ *     FIXTURE_ADD(datatype name) {
+ *       .property1 = val1;
+ *       ...
+ *     };
+ *
+ * Defines a variant of the test fixture, provided to FIXTURE_SETUP() and
+ * TEST_F() as *variant*. Tests of each fixture will be run once for each
+ * variant.
+ */
+#define FIXTURE_VARIANT_ADD(fixture_name, variant_name) \
+	extern FIXTURE_VARIANT(fixture_name) \
+		_##fixture_name##_##variant_name##_variant; \
+	static struct __fixture_variant_metadata \
+		_##fixture_name##_##variant_name##_object = \
+		{ .name = #variant_name, \
+		  .data = &_##fixture_name##_##variant_name##_variant}; \
+	static void __attribute__((constructor)) \
+		_register_##fixture_name##_##variant_name(void) \
+	{ \
+		__register_fixture_variant(&_##fixture_name##_fixture_object, \
+			&_##fixture_name##_##variant_name##_object);	\
+	} \
+	FIXTURE_VARIANT(fixture_name) \
+		_##fixture_name##_##variant_name##_variant =
+
 /**
  * TEST_F(fixture_name, test_name) - Emits test registration and helpers for
  * fixture-based test cases
@@ -297,18 +360,20 @@
 #define __TEST_F_IMPL(fixture_name, test_name, signal, tmout) \
 	static void fixture_name##_##test_name( \
 		struct __test_metadata *_metadata, \
-		FIXTURE_DATA(fixture_name) *self); \
+		FIXTURE_DATA(fixture_name) *self, \
+		const FIXTURE_VARIANT(fixture_name) *variant); \
 	static inline void wrapper_##fixture_name##_##test_name( \
-		struct __test_metadata *_metadata) \
+		struct __test_metadata *_metadata, \
+		struct __fixture_variant_metadata *variant) \
 	{ \
 		/* fixture data is alloced, setup, and torn down per call. */ \
 		FIXTURE_DATA(fixture_name) self; \
 		memset(&self, 0, sizeof(FIXTURE_DATA(fixture_name))); \
-		fixture_name##_setup(_metadata, &self); \
+		fixture_name##_setup(_metadata, &self, variant->data); \
 		/* Let setup failure terminate early. */ \
 		if (!_metadata->passed) \
 			return; \
-		fixture_name##_##test_name(_metadata, &self); \
+		fixture_name##_##test_name(_metadata, &self, variant->data); \
 		fixture_name##_teardown(_metadata, &self); \
 	} \
 	static struct __test_metadata \
@@ -326,7 +391,9 @@
 	} \
 	static void fixture_name##_##test_name( \
 		struct __test_metadata __attribute__((unused)) *_metadata, \
-		FIXTURE_DATA(fixture_name) __attribute__((unused)) *self)
+		FIXTURE_DATA(fixture_name) __attribute__((unused)) *self, \
+		const FIXTURE_VARIANT(fixture_name) \
+			__attribute__((unused)) *variant)
 
 /**
  * TEST_HARNESS_MAIN - Simple wrapper to run the test harness
@@ -660,11 +727,13 @@
 }
 
 struct __test_metadata;
+struct __fixture_variant_metadata;
 
 /* Contains all the information about a fixture. */
 struct __fixture_metadata {
 	const char *name;
 	struct __test_metadata *tests;
+	struct __fixture_variant_metadata *variant;
 	struct __fixture_metadata *prev, *next;
 } _fixture_global __attribute__((unused)) = {
 	.name = "global",
@@ -672,7 +741,6 @@ struct __fixture_metadata {
 };
 
 static struct __fixture_metadata *__fixture_list = &_fixture_global;
-static unsigned int __fixture_count;
 static int __constructor_order;
 
 #define _CONSTRUCTOR_ORDER_FORWARD   1
@@ -680,14 +748,27 @@ static int __constructor_order;
 
 static inline void __register_fixture(struct __fixture_metadata *f)
 {
-	__fixture_count++;
 	__LIST_APPEND(__fixture_list, f);
 }
 
+struct __fixture_variant_metadata {
+	const char *name;
+	const void *data;
+	struct __fixture_variant_metadata *prev, *next;
+};
+
+static inline void
+__register_fixture_variant(struct __fixture_metadata *f,
+			   struct __fixture_variant_metadata *variant)
+{
+	__LIST_APPEND(f->variant, variant);
+}
+
 /* Contains all the information for test execution and status checking. */
 struct __test_metadata {
 	const char *name;
-	void (*fn)(struct __test_metadata *);
+	void (*fn)(struct __test_metadata *,
+		   struct __fixture_variant_metadata *);
 	pid_t pid;	/* pid of test when being run */
 	struct __fixture_metadata *fixture;
 	int termsig;
@@ -700,9 +781,6 @@ struct __test_metadata {
 	struct __test_metadata *prev, *next;
 };
 
-/* Storage for the (global) tests to be run. */
-static unsigned int __test_count;
-
 /*
  * Since constructors are called in reverse order, reverse the test
  * list so tests are run in source declaration order.
@@ -714,7 +792,6 @@ static unsigned int __test_count;
  */
 static inline void __register_test(struct __test_metadata *t)
 {
-	__test_count++;
 	__LIST_APPEND(t->fixture->tests, t);
 }
 
@@ -822,46 +899,65 @@ void __wait_for_test(struct __test_metadata *t)
 }
 
 void __run_test(struct __fixture_metadata *f,
+		struct __fixture_variant_metadata *variant,
 		struct __test_metadata *t)
 {
+	/* reset test struct */
 	t->passed = 1;
 	t->trigger = 0;
-	printf("[ RUN      ] %s.%s\n", f->name, t->name);
+	t->step = 0;
+	t->no_print = 0;
+
+	printf("[ RUN      ] %s%s%s.%s\n",
+	       f->name, variant->name[0] ? "." : "", variant->name, t->name);
 	t->pid = fork();
 	if (t->pid < 0) {
 		printf("ERROR SPAWNING TEST CHILD\n");
 		t->passed = 0;
 	} else if (t->pid == 0) {
-		t->fn(t);
+		t->fn(t, variant);
 		/* return the step that failed or 0 */
 		_exit(t->passed ? 0 : t->step);
 	} else {
 		__wait_for_test(t);
 	}
-	printf("[     %4s ] %s.%s\n", (t->passed ? "OK" : "FAIL"),
-	       f->name, t->name);
+	printf("[     %4s ] %s%s%s.%s\n", (t->passed ? "OK" : "FAIL"),
+	       f->name, variant->name[0] ? "." : "", variant->name, t->name);
 }
 
 static int test_harness_run(int __attribute__((unused)) argc,
 			    char __attribute__((unused)) **argv)
 {
+	struct __fixture_variant_metadata no_variant = { .name = "", };
+	struct __fixture_variant_metadata *v;
 	struct __fixture_metadata *f;
 	struct __test_metadata *t;
 	int ret = 0;
+	unsigned int case_count = 0, test_count = 0;
 	unsigned int count = 0;
 	unsigned int pass_count = 0;
 
+	for (f = __fixture_list; f; f = f->next) {
+		for (v = f->variant ?: &no_variant; v; v = v->next) {
+			case_count++;
+			for (t = f->tests; t; t = t->next)
+				test_count++;
+		}
+	}
+
 	/* TODO(wad) add optional arguments similar to gtest. */
 	printf("[==========] Running %u tests from %u test cases.\n",
-	       __test_count, __fixture_count + 1);
+	       test_count, case_count);
 	for (f = __fixture_list; f; f = f->next) {
-		for (t = f->tests; t; t = t->next) {
-			count++;
-			__run_test(f, t);
-			if (t->passed)
-				pass_count++;
-			else
-				ret = 1;
+		for (v = f->variant ?: &no_variant; v; v = v->next) {
+			for (t = f->tests; t; t = t->next) {
+				count++;
+				__run_test(f, v, t);
+				if (t->passed)
+					pass_count++;
+				else
+					ret = 1;
+			}
 		}
 	}
 	printf("[==========] %u / %u tests passed.\n", pass_count, count);
-- 
cgit v1.2.3-59-g8ed1b


From 0feba2219b7348dce7d59312f4701a4805768f2d Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Mon, 27 Apr 2020 18:03:51 -0700
Subject: selftests: tls: run all tests for TLS 1.2 and TLS 1.3

TLS 1.2 and TLS 1.3 differ in the implementation.
Use fixture parameters to run all tests for both
versions, and remove the one-off TLS 1.2 test.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Kees Cook <keescook@chromium.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/tls.c | 93 +++++++--------------------------------
 1 file changed, 17 insertions(+), 76 deletions(-)

diff --git a/tools/testing/selftests/net/tls.c b/tools/testing/selftests/net/tls.c
index 0ea44d975b6c..c5282e62df75 100644
--- a/tools/testing/selftests/net/tls.c
+++ b/tools/testing/selftests/net/tls.c
@@ -101,6 +101,21 @@ FIXTURE(tls)
 	bool notls;
 };
 
+FIXTURE_VARIANT(tls)
+{
+	unsigned int tls_version;
+};
+
+FIXTURE_VARIANT_ADD(tls, 12)
+{
+	.tls_version = TLS_1_2_VERSION,
+};
+
+FIXTURE_VARIANT_ADD(tls, 13)
+{
+	.tls_version = TLS_1_3_VERSION,
+};
+
 FIXTURE_SETUP(tls)
 {
 	struct tls12_crypto_info_aes_gcm_128 tls12;
@@ -112,7 +127,7 @@ FIXTURE_SETUP(tls)
 	len = sizeof(addr);
 
 	memset(&tls12, 0, sizeof(tls12));
-	tls12.info.version = TLS_1_3_VERSION;
+	tls12.info.version = variant->tls_version;
 	tls12.info.cipher_type = TLS_CIPHER_AES_GCM_128;
 
 	addr.sin_family = AF_INET;
@@ -733,7 +748,7 @@ TEST_F(tls, bidir)
 		struct tls12_crypto_info_aes_gcm_128 tls12;
 
 		memset(&tls12, 0, sizeof(tls12));
-		tls12.info.version = TLS_1_3_VERSION;
+		tls12.info.version = variant->tls_version;
 		tls12.info.cipher_type = TLS_CIPHER_AES_GCM_128;
 
 		ret = setsockopt(self->fd, SOL_TLS, TLS_RX, &tls12,
@@ -1258,78 +1273,4 @@ TEST(keysizes) {
 	close(cfd);
 }
 
-TEST(tls12) {
-	int fd, cfd;
-	bool notls;
-
-	struct tls12_crypto_info_aes_gcm_128 tls12;
-	struct sockaddr_in addr;
-	socklen_t len;
-	int sfd, ret;
-
-	notls = false;
-	len = sizeof(addr);
-
-	memset(&tls12, 0, sizeof(tls12));
-	tls12.info.version = TLS_1_2_VERSION;
-	tls12.info.cipher_type = TLS_CIPHER_AES_GCM_128;
-
-	addr.sin_family = AF_INET;
-	addr.sin_addr.s_addr = htonl(INADDR_ANY);
-	addr.sin_port = 0;
-
-	fd = socket(AF_INET, SOCK_STREAM, 0);
-	sfd = socket(AF_INET, SOCK_STREAM, 0);
-
-	ret = bind(sfd, &addr, sizeof(addr));
-	ASSERT_EQ(ret, 0);
-	ret = listen(sfd, 10);
-	ASSERT_EQ(ret, 0);
-
-	ret = getsockname(sfd, &addr, &len);
-	ASSERT_EQ(ret, 0);
-
-	ret = connect(fd, &addr, sizeof(addr));
-	ASSERT_EQ(ret, 0);
-
-	ret = setsockopt(fd, IPPROTO_TCP, TCP_ULP, "tls", sizeof("tls"));
-	if (ret != 0) {
-		notls = true;
-		printf("Failure setting TCP_ULP, testing without tls\n");
-	}
-
-	if (!notls) {
-		ret = setsockopt(fd, SOL_TLS, TLS_TX, &tls12,
-				 sizeof(tls12));
-		ASSERT_EQ(ret, 0);
-	}
-
-	cfd = accept(sfd, &addr, &len);
-	ASSERT_GE(cfd, 0);
-
-	if (!notls) {
-		ret = setsockopt(cfd, IPPROTO_TCP, TCP_ULP, "tls",
-				 sizeof("tls"));
-		ASSERT_EQ(ret, 0);
-
-		ret = setsockopt(cfd, SOL_TLS, TLS_RX, &tls12,
-				 sizeof(tls12));
-		ASSERT_EQ(ret, 0);
-	}
-
-	close(sfd);
-
-	char const *test_str = "test_read";
-	int send_len = 10;
-	char buf[10];
-
-	send_len = strlen(test_str) + 1;
-	EXPECT_EQ(send(fd, test_str, send_len, 0), send_len);
-	EXPECT_NE(recv(cfd, buf, send_len, 0), -1);
-	EXPECT_EQ(memcmp(buf, test_str, send_len), 0);
-
-	close(fd);
-	close(cfd);
-}
-
 TEST_HARNESS_MAIN
-- 
cgit v1.2.3-59-g8ed1b


From 9d42205036d4be7e45fb4dafe5173cd804fd9d5f Mon Sep 17 00:00:00 2001
From: ChenTao <chentao107@huawei.com>
Date: Tue, 28 Apr 2020 09:48:04 +0800
Subject: net: phy: bcm54140: Make a bunch of functions static

Fix the following warning:

drivers/net/phy/bcm54140.c:663:5: warning:
symbol 'bcm54140_did_interrupt' was not declared. Should it be static?
drivers/net/phy/bcm54140.c:672:5: warning:
symbol 'bcm54140_ack_intr' was not declared. Should it be static?
drivers/net/phy/bcm54140.c:684:5: warning:
symbol 'bcm54140_config_intr' was not declared. Should it be static?

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: ChenTao <chentao107@huawei.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/bcm54140.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/phy/bcm54140.c b/drivers/net/phy/bcm54140.c
index 7341f0126cc4..c009ac2856a5 100644
--- a/drivers/net/phy/bcm54140.c
+++ b/drivers/net/phy/bcm54140.c
@@ -660,7 +660,7 @@ static int bcm54140_config_init(struct phy_device *phydev)
 				  BCM54140_RDB_C_PWR_ISOLATE, 0);
 }
 
-int bcm54140_did_interrupt(struct phy_device *phydev)
+static int bcm54140_did_interrupt(struct phy_device *phydev)
 {
 	int ret;
 
@@ -669,7 +669,7 @@ int bcm54140_did_interrupt(struct phy_device *phydev)
 	return (ret < 0) ? 0 : ret;
 }
 
-int bcm54140_ack_intr(struct phy_device *phydev)
+static int bcm54140_ack_intr(struct phy_device *phydev)
 {
 	int reg;
 
@@ -681,7 +681,7 @@ int bcm54140_ack_intr(struct phy_device *phydev)
 	return 0;
 }
 
-int bcm54140_config_intr(struct phy_device *phydev)
+static int bcm54140_config_intr(struct phy_device *phydev)
 {
 	struct bcm54140_priv *priv = phydev->priv;
 	static const u16 port_to_imr_bit[] = {
-- 
cgit v1.2.3-59-g8ed1b


From 88fb831f773e6c51ec528eacb524f00f518f7e54 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Tue, 28 Apr 2020 10:42:22 -0700
Subject: dpaa2-eth: Use proper division helper in dpaa2_dbg_ch_show

When building arm32 allmodconfig:

ERROR: modpost: "__aeabi_uldivmod"
[drivers/net/ethernet/freescale/dpaa2/fsl-dpaa2-eth.ko] undefined!

frames and cdan are both of type __u64 (unsigned long long) so we need
to use div64_u64 to avoid this issues.

Fixes: 460fd830dd9d ("dpaa2-eth: add channel stat to debugfs")
Link: https://github.com/ClangBuiltLinux/linux/issues/1012
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Reported-by: kernelci.org bot <bot@kernelci.org>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.c
index 80291afff3ea..0a31e4268dfb 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.c
@@ -139,7 +139,7 @@ static int dpaa2_dbg_ch_show(struct seq_file *file, void *offset)
 			   ch->stats.dequeue_portal_busy,
 			   ch->stats.frames,
 			   ch->stats.cdan,
-			   ch->stats.frames / ch->stats.cdan,
+			   div64_u64(ch->stats.frames, ch->stats.cdan),
 			   ch->buf_count);
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 790ab249b55d75fdb427b92f81964cd7cb525eec Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Tue, 28 Apr 2020 19:58:33 +0200
Subject: net: ethernet: fec: Prevent MII event after MII_SPEED write

The change to polled IO for MDIO completion assumes that MII events
are only generated for MDIO transactions. However on some SoCs writing
to the MII_SPEED register can also trigger an MII event. As a result,
the next MDIO read has a pending MII event, and immediately reads the
data registers before it contains useful data. When the read does
complete, another MII event is posted, which results in the next read
also going wrong, and the cycle continues.

By writing 0 to the MII_DATA register before writing to the speed
register, this MII event for the MII_SPEED is suppressed, and polled
IO works as expected.

Fixes: 29ae6bd1b0d8 ("net: ethernet: fec: Replace interrupt driven MDIO with polled IO")
Reported-by: Andy Duan <fugang.duan@nxp.com>
Suggested-by: Andy Duan <fugang.duan@nxp.com>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/fec_main.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 1ae075a246a3..aa5e744ec098 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -996,6 +996,9 @@ fec_restart(struct net_device *ndev)
 		writel(0x0, fep->hwp + FEC_X_CNTRL);
 	}
 
+	/* Prevent an MII event being report when changing speed */
+	writel(0, fep->hwp + FEC_MII_DATA);
+
 	/* Set MII speed */
 	writel(fep->phy_speed, fep->hwp + FEC_MII_SPEED);
 
@@ -1182,6 +1185,10 @@ fec_stop(struct net_device *ndev)
 		writel(val, fep->hwp + FEC_ECNTRL);
 		fec_enet_stop_mode(fep, true);
 	}
+
+	/* Prevent an MII event being report when changing speed */
+	writel(0, fep->hwp + FEC_MII_DATA);
+
 	writel(fep->phy_speed, fep->hwp + FEC_MII_SPEED);
 
 	/* We have to keep ENET enabled to have MII interrupt stay working */
@@ -2142,6 +2149,16 @@ static int fec_enet_mii_init(struct platform_device *pdev)
 	if (suppress_preamble)
 		fep->phy_speed |= BIT(7);
 
+	/* Clear MMFR to avoid to generate MII event by writing MSCR.
+	 * MII event generation condition:
+	 * - writing MSCR:
+	 *	- mmfr[31:0]_not_zero & mscr[7:0]_is_zero &
+	 *	  mscr_reg_data_in[7:0] != 0
+	 * - writing MMFR:
+	 *	- mscr[7:0]_not_zero
+	 */
+	writel(0, fep->hwp + FEC_MII_DATA);
+
 	writel(fep->phy_speed, fep->hwp + FEC_MII_SPEED);
 
 	/* Clear any pending transaction complete indication */
-- 
cgit v1.2.3-59-g8ed1b


From da50d57abd7ecaba151600e726ccb944e7ddf81a Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 28 Apr 2020 00:01:16 +0200
Subject: docs: networking: convert caif files to ReST

There are two text files for caif, plus one already converted
file.

Convert the two remaining ones to ReST, create a new index.rst
file for CAIF, adding it to the main networking documentation
index.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/caif/Linux-CAIF.txt  | 175 --------------------
 Documentation/networking/caif/caif.rst        |   2 -
 Documentation/networking/caif/index.rst       |  13 ++
 Documentation/networking/caif/linux_caif.rst  | 195 ++++++++++++++++++++++
 Documentation/networking/caif/spi_porting.rst | 229 ++++++++++++++++++++++++++
 Documentation/networking/caif/spi_porting.txt | 208 -----------------------
 Documentation/networking/index.rst            |   1 +
 drivers/net/caif/Kconfig                      |   2 +-
 8 files changed, 439 insertions(+), 386 deletions(-)
 delete mode 100644 Documentation/networking/caif/Linux-CAIF.txt
 create mode 100644 Documentation/networking/caif/index.rst
 create mode 100644 Documentation/networking/caif/linux_caif.rst
 create mode 100644 Documentation/networking/caif/spi_porting.rst
 delete mode 100644 Documentation/networking/caif/spi_porting.txt

diff --git a/Documentation/networking/caif/Linux-CAIF.txt b/Documentation/networking/caif/Linux-CAIF.txt
deleted file mode 100644
index 0aa4bd381bec..000000000000
--- a/Documentation/networking/caif/Linux-CAIF.txt
+++ /dev/null
@@ -1,175 +0,0 @@
-Linux CAIF
-===========
-copyright (C) ST-Ericsson AB 2010
-Author: Sjur Brendeland/ sjur.brandeland@stericsson.com
-License terms: GNU General Public License (GPL) version 2
-
-
-Introduction
-------------
-CAIF is a MUX protocol used by ST-Ericsson cellular modems for
-communication between Modem and host. The host processes can open virtual AT
-channels, initiate GPRS Data connections, Video channels and Utility Channels.
-The Utility Channels are general purpose pipes between modem and host.
-
-ST-Ericsson modems support a number of transports between modem
-and host. Currently, UART and Loopback are available for Linux.
-
-
-Architecture:
-------------
-The implementation of CAIF is divided into:
-* CAIF Socket Layer and GPRS IP Interface.
-* CAIF Core Protocol Implementation
-* CAIF Link Layer, implemented as NET devices.
-
-
-  RTNL
-   !
-   !	      +------+	 +------+
-   !	     +------+!	+------+!
-   !	     !	IP  !!	!Socket!!
-   +-------> !interf!+	! API  !+	<- CAIF Client APIs
-   !	     +------+	+------!
-   !		!	    !
-   !		+-----------+
-   !		      !
-   !		   +------+		<- CAIF Core Protocol
-   !		   ! CAIF !
-   !		   ! Core !
-   !		   +------+
-   !	   +----------!---------+
-   !	   !	      !		!
-   !	+------+   +-----+   +------+
-   +--> ! HSI  !   ! TTY !   ! USB  !	<- Link Layer (Net Devices)
-	+------+   +-----+   +------+
-
-
-
-I M P L E M E N T A T I O N
-===========================
-
-
-CAIF Core Protocol Layer
-=========================================
-
-CAIF Core layer implements the CAIF protocol as defined by ST-Ericsson.
-It implements the CAIF protocol stack in a layered approach, where
-each layer described in the specification is implemented as a separate layer.
-The architecture is inspired by the design patterns "Protocol Layer" and
-"Protocol Packet".
-
-== CAIF structure ==
-The Core CAIF implementation contains:
-      -	Simple implementation of CAIF.
-      -	Layered architecture (a la Streams), each layer in the CAIF
-	specification is implemented in a separate c-file.
-      -	Clients must call configuration function to add PHY layer.
-      -	Clients must implement CAIF layer to consume/produce
-	CAIF payload with receive and transmit functions.
-      -	Clients must call configuration function to add and connect the
-	Client layer.
-      - When receiving / transmitting CAIF Packets (cfpkt), ownership is passed
-	to the called function (except for framing layers' receive function)
-
-Layered Architecture
---------------------
-The CAIF protocol can be divided into two parts: Support functions and Protocol
-Implementation. The support functions include:
-
-      - CFPKT CAIF Packet. Implementation of CAIF Protocol Packet. The
-	CAIF Packet has functions for creating, destroying and adding content
-	and for adding/extracting header and trailers to protocol packets.
-
-The CAIF Protocol implementation contains:
-
-      - CFCNFG CAIF Configuration layer. Configures the CAIF Protocol
-	Stack and provides a Client interface for adding Link-Layer and
-	Driver interfaces on top of the CAIF Stack.
-
-      - CFCTRL CAIF Control layer. Encodes and Decodes control messages
-	such as enumeration and channel setup. Also matches request and
-	response messages.
-
-      - CFSERVL General CAIF Service Layer functionality; handles flow
-	control and remote shutdown requests.
-
-      - CFVEI CAIF VEI layer. Handles CAIF AT Channels on VEI (Virtual
-	External Interface). This layer encodes/decodes VEI frames.
-
-      - CFDGML CAIF Datagram layer. Handles CAIF Datagram layer (IP
-	traffic), encodes/decodes Datagram frames.
-
-      - CFMUX CAIF Mux layer. Handles multiplexing between multiple
-	physical bearers and multiple channels such as VEI, Datagram, etc.
-	The MUX keeps track of the existing CAIF Channels and
-	Physical Instances and selects the appropriate instance based
-	on Channel-Id and Physical-ID.
-
-      - CFFRML CAIF Framing layer. Handles Framing i.e. Frame length
-	and frame checksum.
-
-      - CFSERL CAIF Serial layer. Handles concatenation/split of frames
-	into CAIF Frames with correct length.
-
-
-
-		    +---------+
-		    | Config  |
-		    | CFCNFG  |
-		    +---------+
-			 !
-    +---------+	    +---------+	    +---------+
-    |	AT    |	    | Control |	    | Datagram|
-    | CFVEIL  |	    | CFCTRL  |	    | CFDGML  |
-    +---------+	    +---------+	    +---------+
-	   \_____________!______________/
-			 !
-		    +---------+
-		    |	MUX   |
-		    |	      |
-		    +---------+
-		    _____!_____
-		   /	       \
-	    +---------+	    +---------+
-	    | CFFRML  |	    | CFFRML  |
-	    | Framing |	    | Framing |
-	    +---------+	    +---------+
-		 !		!
-	    +---------+	    +---------+
-	    |	      |	    | Serial  |
-	    |	      |	    | CFSERL  |
-	    +---------+	    +---------+
-
-
-In this layered approach the following "rules" apply.
-      - All layers embed the same structure "struct cflayer"
-      - A layer does not depend on any other layer's private data.
-      - Layers are stacked by setting the pointers
-		  layer->up , layer->dn
-      -	In order to send data upwards, each layer should do
-		 layer->up->receive(layer->up, packet);
-      - In order to send data downwards, each layer should do
-		 layer->dn->transmit(layer->dn, packet);
-
-
-CAIF Socket and IP interface
-===========================
-
-The IP interface and CAIF socket API are implemented on top of the
-CAIF Core protocol. The IP Interface and CAIF socket have an instance of
-'struct cflayer', just like the CAIF Core protocol stack.
-Net device and Socket implement the 'receive()' function defined by
-'struct cflayer', just like the rest of the CAIF stack. In this way, transmit and
-receive of packets is handled as by the rest of the layers: the 'dn->transmit()'
-function is called in order to transmit data.
-
-Configuration of Link Layer
----------------------------
-The Link Layer is implemented as Linux network devices (struct net_device).
-Payload handling and registration is done using standard Linux mechanisms.
-
-The CAIF Protocol relies on a loss-less link layer without implementing
-retransmission. This implies that packet drops must not happen.
-Therefore a flow-control mechanism is implemented where the physical
-interface can initiate flow stop for all CAIF Channels.
diff --git a/Documentation/networking/caif/caif.rst b/Documentation/networking/caif/caif.rst
index 07afc8063d4d..a07213030ccf 100644
--- a/Documentation/networking/caif/caif.rst
+++ b/Documentation/networking/caif/caif.rst
@@ -1,5 +1,3 @@
-:orphan:
-
 .. SPDX-License-Identifier: GPL-2.0
 .. include:: <isonum.txt>
 
diff --git a/Documentation/networking/caif/index.rst b/Documentation/networking/caif/index.rst
new file mode 100644
index 000000000000..86e5b7832ec3
--- /dev/null
+++ b/Documentation/networking/caif/index.rst
@@ -0,0 +1,13 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+CAIF
+====
+
+Contents:
+
+.. toctree::
+   :maxdepth: 2
+
+   linux_caif
+   caif
+   spi_porting
diff --git a/Documentation/networking/caif/linux_caif.rst b/Documentation/networking/caif/linux_caif.rst
new file mode 100644
index 000000000000..a0480862ab8c
--- /dev/null
+++ b/Documentation/networking/caif/linux_caif.rst
@@ -0,0 +1,195 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+==========
+Linux CAIF
+==========
+
+Copyright |copy| ST-Ericsson AB 2010
+
+:Author: Sjur Brendeland/ sjur.brandeland@stericsson.com
+:License terms: GNU General Public License (GPL) version 2
+
+
+Introduction
+============
+
+CAIF is a MUX protocol used by ST-Ericsson cellular modems for
+communication between Modem and host. The host processes can open virtual AT
+channels, initiate GPRS Data connections, Video channels and Utility Channels.
+The Utility Channels are general purpose pipes between modem and host.
+
+ST-Ericsson modems support a number of transports between modem
+and host. Currently, UART and Loopback are available for Linux.
+
+
+Architecture
+============
+
+The implementation of CAIF is divided into:
+
+* CAIF Socket Layer and GPRS IP Interface.
+* CAIF Core Protocol Implementation
+* CAIF Link Layer, implemented as NET devices.
+
+::
+
+  RTNL
+   !
+   !	      +------+	 +------+
+   !	     +------+!	+------+!
+   !	     !	IP  !!	!Socket!!
+   +-------> !interf!+	! API  !+	<- CAIF Client APIs
+   !	     +------+	+------!
+   !		!	    !
+   !		+-----------+
+   !		      !
+   !		   +------+		<- CAIF Core Protocol
+   !		   ! CAIF !
+   !		   ! Core !
+   !		   +------+
+   !	   +----------!---------+
+   !	   !	      !		!
+   !	+------+   +-----+   +------+
+   +--> ! HSI  !   ! TTY !   ! USB  !	<- Link Layer (Net Devices)
+	+------+   +-----+   +------+
+
+
+
+Implementation
+==============
+
+
+CAIF Core Protocol Layer
+------------------------
+
+CAIF Core layer implements the CAIF protocol as defined by ST-Ericsson.
+It implements the CAIF protocol stack in a layered approach, where
+each layer described in the specification is implemented as a separate layer.
+The architecture is inspired by the design patterns "Protocol Layer" and
+"Protocol Packet".
+
+CAIF structure
+^^^^^^^^^^^^^^
+
+The Core CAIF implementation contains:
+
+      -	Simple implementation of CAIF.
+      -	Layered architecture (a la Streams), each layer in the CAIF
+	specification is implemented in a separate c-file.
+      -	Clients must call configuration function to add PHY layer.
+      -	Clients must implement CAIF layer to consume/produce
+	CAIF payload with receive and transmit functions.
+      -	Clients must call configuration function to add and connect the
+	Client layer.
+      - When receiving / transmitting CAIF Packets (cfpkt), ownership is passed
+	to the called function (except for framing layers' receive function)
+
+Layered Architecture
+====================
+
+The CAIF protocol can be divided into two parts: Support functions and Protocol
+Implementation. The support functions include:
+
+      - CFPKT CAIF Packet. Implementation of CAIF Protocol Packet. The
+	CAIF Packet has functions for creating, destroying and adding content
+	and for adding/extracting header and trailers to protocol packets.
+
+The CAIF Protocol implementation contains:
+
+      - CFCNFG CAIF Configuration layer. Configures the CAIF Protocol
+	Stack and provides a Client interface for adding Link-Layer and
+	Driver interfaces on top of the CAIF Stack.
+
+      - CFCTRL CAIF Control layer. Encodes and Decodes control messages
+	such as enumeration and channel setup. Also matches request and
+	response messages.
+
+      - CFSERVL General CAIF Service Layer functionality; handles flow
+	control and remote shutdown requests.
+
+      - CFVEI CAIF VEI layer. Handles CAIF AT Channels on VEI (Virtual
+	External Interface). This layer encodes/decodes VEI frames.
+
+      - CFDGML CAIF Datagram layer. Handles CAIF Datagram layer (IP
+	traffic), encodes/decodes Datagram frames.
+
+      - CFMUX CAIF Mux layer. Handles multiplexing between multiple
+	physical bearers and multiple channels such as VEI, Datagram, etc.
+	The MUX keeps track of the existing CAIF Channels and
+	Physical Instances and selects the appropriate instance based
+	on Channel-Id and Physical-ID.
+
+      - CFFRML CAIF Framing layer. Handles Framing i.e. Frame length
+	and frame checksum.
+
+      - CFSERL CAIF Serial layer. Handles concatenation/split of frames
+	into CAIF Frames with correct length.
+
+::
+
+		    +---------+
+		    | Config  |
+		    | CFCNFG  |
+		    +---------+
+			 !
+    +---------+	    +---------+	    +---------+
+    |	AT    |	    | Control |	    | Datagram|
+    | CFVEIL  |	    | CFCTRL  |	    | CFDGML  |
+    +---------+	    +---------+	    +---------+
+	   \_____________!______________/
+			 !
+		    +---------+
+		    |	MUX   |
+		    |	      |
+		    +---------+
+		    _____!_____
+		   /	       \
+	    +---------+	    +---------+
+	    | CFFRML  |	    | CFFRML  |
+	    | Framing |	    | Framing |
+	    +---------+	    +---------+
+		 !		!
+	    +---------+	    +---------+
+	    |	      |	    | Serial  |
+	    |	      |	    | CFSERL  |
+	    +---------+	    +---------+
+
+
+In this layered approach the following "rules" apply.
+
+      - All layers embed the same structure "struct cflayer"
+      - A layer does not depend on any other layer's private data.
+      - Layers are stacked by setting the pointers::
+
+		  layer->up , layer->dn
+
+      -	In order to send data upwards, each layer should do::
+
+		 layer->up->receive(layer->up, packet);
+
+      - In order to send data downwards, each layer should do::
+
+		 layer->dn->transmit(layer->dn, packet);
+
+
+CAIF Socket and IP interface
+============================
+
+The IP interface and CAIF socket API are implemented on top of the
+CAIF Core protocol. The IP Interface and CAIF socket have an instance of
+'struct cflayer', just like the CAIF Core protocol stack.
+Net device and Socket implement the 'receive()' function defined by
+'struct cflayer', just like the rest of the CAIF stack. In this way, transmit and
+receive of packets is handled as by the rest of the layers: the 'dn->transmit()'
+function is called in order to transmit data.
+
+Configuration of Link Layer
+---------------------------
+The Link Layer is implemented as Linux network devices (struct net_device).
+Payload handling and registration is done using standard Linux mechanisms.
+
+The CAIF Protocol relies on a loss-less link layer without implementing
+retransmission. This implies that packet drops must not happen.
+Therefore a flow-control mechanism is implemented where the physical
+interface can initiate flow stop for all CAIF Channels.
diff --git a/Documentation/networking/caif/spi_porting.rst b/Documentation/networking/caif/spi_porting.rst
new file mode 100644
index 000000000000..d49f874b20ac
--- /dev/null
+++ b/Documentation/networking/caif/spi_porting.rst
@@ -0,0 +1,229 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+================
+CAIF SPI porting
+================
+
+CAIF SPI basics
+===============
+
+Running CAIF over SPI needs some extra setup, owing to the nature of SPI.
+Two extra GPIOs have been added in order to negotiate the transfers
+between the master and the slave. The minimum requirement for running
+CAIF over SPI is a SPI slave chip and two GPIOs (more details below).
+Please note that running as a slave implies that you need to keep up
+with the master clock. An overrun or underrun event is fatal.
+
+CAIF SPI framework
+==================
+
+To make porting as easy as possible, the CAIF SPI has been divided in
+two parts. The first part (called the interface part) deals with all
+generic functionality such as length framing, SPI frame negotiation
+and SPI frame delivery and transmission. The other part is the CAIF
+SPI slave device part, which is the module that you have to write if
+you want to run SPI CAIF on a new hardware. This part takes care of
+the physical hardware, both with regard to SPI and to GPIOs.
+
+- Implementing a CAIF SPI device:
+
+	- Functionality provided by the CAIF SPI slave device:
+
+	In order to implement a SPI device you will, as a minimum,
+	need to implement the following
+	functions:
+
+	::
+
+	    int (*init_xfer) (struct cfspi_xfer * xfer, struct cfspi_dev *dev):
+
+	This function is called by the CAIF SPI interface to give
+	you a chance to set up your hardware to be ready to receive
+	a stream of data from the master. The xfer structure contains
+	both physical and logical addresses, as well as the total length
+	of the transfer in both directions.The dev parameter can be used
+	to map to different CAIF SPI slave devices.
+
+	::
+
+	    void (*sig_xfer) (bool xfer, struct cfspi_dev *dev):
+
+	This function is called by the CAIF SPI interface when the output
+	(SPI_INT) GPIO needs to change state. The boolean value of the xfer
+	variable indicates whether the GPIO should be asserted (HIGH) or
+	deasserted (LOW). The dev parameter can be used to map to different CAIF
+	SPI slave devices.
+
+	- Functionality provided by the CAIF SPI interface:
+
+	::
+
+	    void (*ss_cb) (bool assert, struct cfspi_ifc *ifc);
+
+	This function is called by the CAIF SPI slave device in order to
+	signal a change of state of the input GPIO (SS) to the interface.
+	Only active edges are mandatory to be reported.
+	This function can be called from IRQ context (recommended in order
+	not to introduce latency). The ifc parameter should be the pointer
+	returned from the platform probe function in the SPI device structure.
+
+	::
+
+	    void (*xfer_done_cb) (struct cfspi_ifc *ifc);
+
+	This function is called by the CAIF SPI slave device in order to
+	report that a transfer is completed. This function should only be
+	called once both the transmission and the reception are completed.
+	This function can be called from IRQ context (recommended in order
+	not to introduce latency). The ifc parameter should be the pointer
+	returned from the platform probe function in the SPI device structure.
+
+	- Connecting the bits and pieces:
+
+		- Filling in the SPI slave device structure:
+
+		  Connect the necessary callback functions.
+
+		  Indicate clock speed (used to calculate toggle delays).
+
+		  Chose a suitable name (helps debugging if you use several CAIF
+		  SPI slave devices).
+
+		  Assign your private data (can be used to map to your
+		  structure).
+
+		- Filling in the SPI slave platform device structure:
+
+		  Add name of driver to connect to ("cfspi_sspi").
+
+		  Assign the SPI slave device structure as platform data.
+
+Padding
+=======
+
+In order to optimize throughput, a number of SPI padding options are provided.
+Padding can be enabled independently for uplink and downlink transfers.
+Padding can be enabled for the head, the tail and for the total frame size.
+The padding needs to be correctly configured on both sides of the link.
+The padding can be changed via module parameters in cfspi_sspi.c or via
+the sysfs directory of the cfspi_sspi driver (before device registration).
+
+- CAIF SPI device template::
+
+    /*
+    *	Copyright (C) ST-Ericsson AB 2010
+    *	Author: Daniel Martensson / Daniel.Martensson@stericsson.com
+    *	License terms: GNU General Public License (GPL), version 2.
+    *
+    */
+
+    #include <linux/init.h>
+    #include <linux/module.h>
+    #include <linux/device.h>
+    #include <linux/wait.h>
+    #include <linux/interrupt.h>
+    #include <linux/dma-mapping.h>
+    #include <net/caif/caif_spi.h>
+
+    MODULE_LICENSE("GPL");
+
+    struct sspi_struct {
+	    struct cfspi_dev sdev;
+	    struct cfspi_xfer *xfer;
+    };
+
+    static struct sspi_struct slave;
+    static struct platform_device slave_device;
+
+    static irqreturn_t sspi_irq(int irq, void *arg)
+    {
+	    /* You only need to trigger on an edge to the active state of the
+	    * SS signal. Once a edge is detected, the ss_cb() function should be
+	    * called with the parameter assert set to true. It is OK
+	    * (and even advised) to call the ss_cb() function in IRQ context in
+	    * order not to add any delay. */
+
+	    return IRQ_HANDLED;
+    }
+
+    static void sspi_complete(void *context)
+    {
+	    /* Normally the DMA or the SPI framework will call you back
+	    * in something similar to this. The only thing you need to
+	    * do is to call the xfer_done_cb() function, providing the pointer
+	    * to the CAIF SPI interface. It is OK to call this function
+	    * from IRQ context. */
+    }
+
+    static int sspi_init_xfer(struct cfspi_xfer *xfer, struct cfspi_dev *dev)
+    {
+	    /* Store transfer info. For a normal implementation you should
+	    * set up your DMA here and make sure that you are ready to
+	    * receive the data from the master SPI. */
+
+	    struct sspi_struct *sspi = (struct sspi_struct *)dev->priv;
+
+	    sspi->xfer = xfer;
+
+	    return 0;
+    }
+
+    void sspi_sig_xfer(bool xfer, struct cfspi_dev *dev)
+    {
+	    /* If xfer is true then you should assert the SPI_INT to indicate to
+	    * the master that you are ready to receive the data from the master
+	    * SPI. If xfer is false then you should de-assert SPI_INT to indicate
+	    * that the transfer is done.
+	    */
+
+	    struct sspi_struct *sspi = (struct sspi_struct *)dev->priv;
+    }
+
+    static void sspi_release(struct device *dev)
+    {
+	    /*
+	    * Here you should release your SPI device resources.
+	    */
+    }
+
+    static int __init sspi_init(void)
+    {
+	    /* Here you should initialize your SPI device by providing the
+	    * necessary functions, clock speed, name and private data. Once
+	    * done, you can register your device with the
+	    * platform_device_register() function. This function will return
+	    * with the CAIF SPI interface initialized. This is probably also
+	    * the place where you should set up your GPIOs, interrupts and SPI
+	    * resources. */
+
+	    int res = 0;
+
+	    /* Initialize slave device. */
+	    slave.sdev.init_xfer = sspi_init_xfer;
+	    slave.sdev.sig_xfer = sspi_sig_xfer;
+	    slave.sdev.clk_mhz = 13;
+	    slave.sdev.priv = &slave;
+	    slave.sdev.name = "spi_sspi";
+	    slave_device.dev.release = sspi_release;
+
+	    /* Initialize platform device. */
+	    slave_device.name = "cfspi_sspi";
+	    slave_device.dev.platform_data = &slave.sdev;
+
+	    /* Register platform device. */
+	    res = platform_device_register(&slave_device);
+	    if (res) {
+		    printk(KERN_WARNING "sspi_init: failed to register dev.\n");
+		    return -ENODEV;
+	    }
+
+	    return res;
+    }
+
+    static void __exit sspi_exit(void)
+    {
+	    platform_device_del(&slave_device);
+    }
+
+    module_init(sspi_init);
+    module_exit(sspi_exit);
diff --git a/Documentation/networking/caif/spi_porting.txt b/Documentation/networking/caif/spi_porting.txt
deleted file mode 100644
index 9efd0687dc4c..000000000000
--- a/Documentation/networking/caif/spi_porting.txt
+++ /dev/null
@@ -1,208 +0,0 @@
-- CAIF SPI porting -
-
-- CAIF SPI basics:
-
-Running CAIF over SPI needs some extra setup, owing to the nature of SPI.
-Two extra GPIOs have been added in order to negotiate the transfers
- between the master and the slave. The minimum requirement for running
-CAIF over SPI is a SPI slave chip and two GPIOs (more details below).
-Please note that running as a slave implies that you need to keep up
-with the master clock. An overrun or underrun event is fatal.
-
-- CAIF SPI framework:
-
-To make porting as easy as possible, the CAIF SPI has been divided in
-two parts. The first part (called the interface part) deals with all
-generic functionality such as length framing, SPI frame negotiation
-and SPI frame delivery and transmission. The other part is the CAIF
-SPI slave device part, which is the module that you have to write if
-you want to run SPI CAIF on a new hardware. This part takes care of
-the physical hardware, both with regard to SPI and to GPIOs.
-
-- Implementing a CAIF SPI device:
-
-	- Functionality provided by the CAIF SPI slave device:
-
-	In order to implement a SPI device you will, as a minimum,
-	need to implement the following
-	functions:
-
-	int (*init_xfer) (struct cfspi_xfer * xfer, struct cfspi_dev *dev):
-
-	This function is called by the CAIF SPI interface to give
-	you a chance to set up your hardware to be ready to receive
-	a stream of data from the master. The xfer structure contains
-	both physical and logical addresses, as well as the total length
-	of the transfer in both directions.The dev parameter can be used
-	to map to different CAIF SPI slave devices.
-
-	void (*sig_xfer) (bool xfer, struct cfspi_dev *dev):
-
-	This function is called by the CAIF SPI interface when the output
-	(SPI_INT) GPIO needs to change state. The boolean value of the xfer
-	variable indicates whether the GPIO should be asserted (HIGH) or
-	deasserted (LOW). The dev parameter can be used to map to different CAIF
-	SPI slave devices.
-
-	- Functionality provided by the CAIF SPI interface:
-
-	void (*ss_cb) (bool assert, struct cfspi_ifc *ifc);
-
-	This function is called by the CAIF SPI slave device in order to
-	signal a change of state of the input GPIO (SS) to the interface.
-	Only active edges are mandatory to be reported.
-	This function can be called from IRQ context (recommended in order
-	not to introduce latency). The ifc parameter should be the pointer
-	returned from the platform probe function in the SPI device structure.
-
-	void (*xfer_done_cb) (struct cfspi_ifc *ifc);
-
-	This function is called by the CAIF SPI slave device in order to
-	report that a transfer is completed. This function should only be
-	called once both the transmission and the reception are completed.
-	This function can be called from IRQ context (recommended in order
-	not to introduce latency). The ifc parameter should be the pointer
-	returned from the platform probe function in the SPI device structure.
-
-	- Connecting the bits and pieces:
-
-		- Filling in the SPI slave device structure:
-
-		Connect the necessary callback functions.
-		Indicate clock speed (used to calculate toggle delays).
-		Chose a suitable name (helps debugging if you use several CAIF
-		SPI slave devices).
-		Assign your private data (can be used to map to your structure).
-
-		- Filling in the SPI slave platform device structure:
-		Add name of driver to connect to ("cfspi_sspi").
-		Assign the SPI slave device structure as platform data.
-
-- Padding:
-
-In order to optimize throughput, a number of SPI padding options are provided.
-Padding can be enabled independently for uplink and downlink transfers.
-Padding can be enabled for the head, the tail and for the total frame size.
-The padding needs to be correctly configured on both sides of the link.
-The padding can be changed via module parameters in cfspi_sspi.c or via
-the sysfs directory of the cfspi_sspi driver (before device registration).
-
-- CAIF SPI device template:
-
-/*
- *	Copyright (C) ST-Ericsson AB 2010
- *	Author: Daniel Martensson / Daniel.Martensson@stericsson.com
- *	License terms: GNU General Public License (GPL), version 2.
- *
- */
-
-#include <linux/init.h>
-#include <linux/module.h>
-#include <linux/device.h>
-#include <linux/wait.h>
-#include <linux/interrupt.h>
-#include <linux/dma-mapping.h>
-#include <net/caif/caif_spi.h>
-
-MODULE_LICENSE("GPL");
-
-struct sspi_struct {
-	struct cfspi_dev sdev;
-	struct cfspi_xfer *xfer;
-};
-
-static struct sspi_struct slave;
-static struct platform_device slave_device;
-
-static irqreturn_t sspi_irq(int irq, void *arg)
-{
-	/* You only need to trigger on an edge to the active state of the
-	 * SS signal. Once a edge is detected, the ss_cb() function should be
-	 * called with the parameter assert set to true. It is OK
-	 * (and even advised) to call the ss_cb() function in IRQ context in
-	 * order not to add any delay. */
-
-	return IRQ_HANDLED;
-}
-
-static void sspi_complete(void *context)
-{
-	/* Normally the DMA or the SPI framework will call you back
-	 * in something similar to this. The only thing you need to
-	 * do is to call the xfer_done_cb() function, providing the pointer
-	 * to the CAIF SPI interface. It is OK to call this function
-	 * from IRQ context. */
-}
-
-static int sspi_init_xfer(struct cfspi_xfer *xfer, struct cfspi_dev *dev)
-{
-	/* Store transfer info. For a normal implementation you should
-	 * set up your DMA here and make sure that you are ready to
-	 * receive the data from the master SPI. */
-
-	struct sspi_struct *sspi = (struct sspi_struct *)dev->priv;
-
-	sspi->xfer = xfer;
-
-	return 0;
-}
-
-void sspi_sig_xfer(bool xfer, struct cfspi_dev *dev)
-{
-	/* If xfer is true then you should assert the SPI_INT to indicate to
-	 * the master that you are ready to receive the data from the master
-	 * SPI. If xfer is false then you should de-assert SPI_INT to indicate
-	 * that the transfer is done.
-	 */
-
-	struct sspi_struct *sspi = (struct sspi_struct *)dev->priv;
-}
-
-static void sspi_release(struct device *dev)
-{
-	/*
-	 * Here you should release your SPI device resources.
-	 */
-}
-
-static int __init sspi_init(void)
-{
-	/* Here you should initialize your SPI device by providing the
-	 * necessary functions, clock speed, name and private data. Once
-	 * done, you can register your device with the
-	 * platform_device_register() function. This function will return
-	 * with the CAIF SPI interface initialized. This is probably also
-	 * the place where you should set up your GPIOs, interrupts and SPI
-	 * resources. */
-
-	int res = 0;
-
-	/* Initialize slave device. */
-	slave.sdev.init_xfer = sspi_init_xfer;
-	slave.sdev.sig_xfer = sspi_sig_xfer;
-	slave.sdev.clk_mhz = 13;
-	slave.sdev.priv = &slave;
-	slave.sdev.name = "spi_sspi";
-	slave_device.dev.release = sspi_release;
-
-	/* Initialize platform device. */
-	slave_device.name = "cfspi_sspi";
-	slave_device.dev.platform_data = &slave.sdev;
-
-	/* Register platform device. */
-	res = platform_device_register(&slave_device);
-	if (res) {
-		printk(KERN_WARNING "sspi_init: failed to register dev.\n");
-		return -ENODEV;
-	}
-
-	return res;
-}
-
-static void __exit sspi_exit(void)
-{
-	platform_device_del(&slave_device);
-}
-
-module_init(sspi_init);
-module_exit(sspi_exit);
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 6538ede29661..5b3421ec25ec 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -15,6 +15,7 @@ Contents:
    device_drivers/index
    dsa/index
    devlink/index
+   caif/index
    ethtool-netlink
    ieee802154
    j1939
diff --git a/drivers/net/caif/Kconfig b/drivers/net/caif/Kconfig
index 661c25eb1c46..1538ad194cf4 100644
--- a/drivers/net/caif/Kconfig
+++ b/drivers/net/caif/Kconfig
@@ -28,7 +28,7 @@ config CAIF_SPI_SLAVE
 	  The CAIF Link layer SPI Protocol driver for Slave SPI interface.
 	  This driver implements a platform driver to accommodate for a
 	  platform specific SPI device. A sample CAIF SPI Platform device is
-	  provided in <file:Documentation/networking/caif/spi_porting.txt>.
+	  provided in <file:Documentation/networking/caif/spi_porting.rst>.
 
 config CAIF_SPI_SYNC
 	bool "Next command and length in start of frame"
-- 
cgit v1.2.3-59-g8ed1b


From a434aaba17f56c0a25edff4104dd5f9d5b3ceba2 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 28 Apr 2020 00:01:17 +0200
Subject: docs: networking: convert 6pack.txt to ReST

- add SPDX header;
- use title markups;
- mark code blocks and literals as such;
- adjust identation, whitespaces and blank lines;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/6pack.rst | 191 +++++++++++++++++++++++++++++++++++++
 Documentation/networking/6pack.txt | 175 ---------------------------------
 Documentation/networking/index.rst |   1 +
 drivers/net/hamradio/Kconfig       |   2 +-
 4 files changed, 193 insertions(+), 176 deletions(-)
 create mode 100644 Documentation/networking/6pack.rst
 delete mode 100644 Documentation/networking/6pack.txt

diff --git a/Documentation/networking/6pack.rst b/Documentation/networking/6pack.rst
new file mode 100644
index 000000000000..bc5bf1f1a98f
--- /dev/null
+++ b/Documentation/networking/6pack.rst
@@ -0,0 +1,191 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============
+6pack Protocol
+==============
+
+This is the 6pack-mini-HOWTO, written by
+
+Andreas Könsgen DG3KQ
+
+:Internet: ajk@comnets.uni-bremen.de
+:AMPR-net: dg3kq@db0pra.ampr.org
+:AX.25:    dg3kq@db0ach.#nrw.deu.eu
+
+Last update: April 7, 1998
+
+1. What is 6pack, and what are the advantages to KISS?
+======================================================
+
+6pack is a transmission protocol for data exchange between the PC and
+the TNC over a serial line. It can be used as an alternative to KISS.
+
+6pack has two major advantages:
+
+- The PC is given full control over the radio
+  channel. Special control data is exchanged between the PC and the TNC so
+  that the PC knows at any time if the TNC is receiving data, if a TNC
+  buffer underrun or overrun has occurred, if the PTT is
+  set and so on. This control data is processed at a higher priority than
+  normal data, so a data stream can be interrupted at any time to issue an
+  important event. This helps to improve the channel access and timing
+  algorithms as everything is computed in the PC. It would even be possible
+  to experiment with something completely different from the known CSMA and
+  DAMA channel access methods.
+  This kind of real-time control is especially important to supply several
+  TNCs that are connected between each other and the PC by a daisy chain
+  (however, this feature is not supported yet by the Linux 6pack driver).
+
+- Each packet transferred over the serial line is supplied with a checksum,
+  so it is easy to detect errors due to problems on the serial line.
+  Received packets that are corrupt are not passed on to the AX.25 layer.
+  Damaged packets that the TNC has received from the PC are not transmitted.
+
+More details about 6pack are described in the file 6pack.ps that is located
+in the doc directory of the AX.25 utilities package.
+
+2. Who has developed the 6pack protocol?
+========================================
+
+The 6pack protocol has been developed by Ekki Plicht DF4OR, Henning Rech
+DF9IC and Gunter Jost DK7WJ. A driver for 6pack, written by Gunter Jost and
+Matthias Welwarsky DG2FEF, comes along with the PC version of FlexNet.
+They have also written a firmware for TNCs to perform the 6pack
+protocol (see section 4 below).
+
+3. Where can I get the latest version of 6pack for LinuX?
+=========================================================
+
+At the moment, the 6pack stuff can obtained via anonymous ftp from
+db0bm.automation.fh-aachen.de. In the directory /incoming/dg3kq,
+there is a file named 6pack.tgz.
+
+4. Preparing the TNC for 6pack operation
+========================================
+
+To be able to use 6pack, a special firmware for the TNC is needed. The EPROM
+of a newly bought TNC does not contain 6pack, so you will have to
+program an EPROM yourself. The image file for 6pack EPROMs should be
+available on any packet radio box where PC/FlexNet can be found. The name of
+the file is 6pack.bin. This file is copyrighted and maintained by the FlexNet
+team. It can be used under the terms of the license that comes along
+with PC/FlexNet. Please do not ask me about the internals of this file as I
+don't know anything about it. I used a textual description of the 6pack
+protocol to program the Linux driver.
+
+TNCs contain a 64kByte EPROM, the lower half of which is used for
+the firmware/KISS. The upper half is either empty or is sometimes
+programmed with software called TAPR. In the latter case, the TNC
+is supplied with a DIP switch so you can easily change between the
+two systems. When programming a new EPROM, one of the systems is replaced
+by 6pack. It is useful to replace TAPR, as this software is rarely used
+nowadays. If your TNC is not equipped with the switch mentioned above, you
+can build in one yourself that switches over the highest address pin
+of the EPROM between HIGH and LOW level. After having inserted the new EPROM
+and switched to 6pack, apply power to the TNC for a first test. The connect
+and the status LED are lit for about a second if the firmware initialises
+the TNC correctly.
+
+5. Building and installing the 6pack driver
+===========================================
+
+The driver has been tested with kernel version 2.1.90. Use with older
+kernels may lead to a compilation error because the interface to a kernel
+function has been changed in the 2.1.8x kernels.
+
+How to turn on 6pack support:
+=============================
+
+- In the linux kernel configuration program, select the code maturity level
+  options menu and turn on the prompting for development drivers.
+
+- Select the amateur radio support menu and turn on the serial port 6pack
+  driver.
+
+- Compile and install the kernel and the modules.
+
+To use the driver, the kissattach program delivered with the AX.25 utilities
+has to be modified.
+
+- Do a cd to the directory that holds the kissattach sources. Edit the
+  kissattach.c file. At the top, insert the following lines::
+
+    #ifndef N_6PACK
+    #define N_6PACK (N_AX25+1)
+    #endif
+
+  Then find the line:
+
+    int disc = N_AX25;
+
+  and replace N_AX25 by N_6PACK.
+
+- Recompile kissattach. Rename it to spattach to avoid confusions.
+
+Installing the driver:
+----------------------
+
+- Do an insmod 6pack. Look at your /var/log/messages file to check if the
+  module has printed its initialization message.
+
+- Do a spattach as you would launch kissattach when starting a KISS port.
+  Check if the kernel prints the message '6pack: TNC found'.
+
+- From here, everything should work as if you were setting up a KISS port.
+  The only difference is that the network device that represents
+  the 6pack port is called sp instead of sl or ax. So, sp0 would be the
+  first 6pack port.
+
+Although the driver has been tested on various platforms, I still declare it
+ALPHA. BE CAREFUL! Sync your disks before insmoding the 6pack module
+and spattaching. Watch out if your computer behaves strangely. Read section
+6 of this file about known problems.
+
+Note that the connect and status LEDs of the TNC are controlled in a
+different way than they are when the TNC is used with PC/FlexNet. When using
+FlexNet, the connect LED is on if there is a connection; the status LED is
+on if there is data in the buffer of the PC's AX.25 engine that has to be
+transmitted. Under Linux, the 6pack layer is beyond the AX.25 layer,
+so the 6pack driver doesn't know anything about connects or data that
+has not yet been transmitted. Therefore the LEDs are controlled
+as they are in KISS mode: The connect LED is turned on if data is transferred
+from the PC to the TNC over the serial line, the status LED if data is
+sent to the PC.
+
+6. Known problems
+=================
+
+When testing the driver with 2.0.3x kernels and
+operating with data rates on the radio channel of 9600 Baud or higher,
+the driver may, on certain systems, sometimes print the message '6pack:
+bad checksum', which is due to data loss if the other station sends two
+or more subsequent packets. I have been told that this is due to a problem
+with the serial driver of 2.0.3x kernels. I don't know yet if the problem
+still exists with 2.1.x kernels, as I have heard that the serial driver
+code has been changed with 2.1.x.
+
+When shutting down the sp interface with ifconfig, the kernel crashes if
+there is still an AX.25 connection left over which an IP connection was
+running, even if that IP connection is already closed. The problem does not
+occur when there is a bare AX.25 connection still running. I don't know if
+this is a problem of the 6pack driver or something else in the kernel.
+
+The driver has been tested as a module, not yet as a kernel-builtin driver.
+
+The 6pack protocol supports daisy-chaining of TNCs in a token ring, which is
+connected to one serial port of the PC. This feature is not implemented
+and at least at the moment I won't be able to do it because I do not have
+the opportunity to build a TNC daisy-chain and test it.
+
+Some of the comments in the source code are inaccurate. They are left from
+the SLIP/KISS driver, from which the 6pack driver has been derived.
+I haven't modified or removed them yet -- sorry! The code itself needs
+some cleaning and optimizing. This will be done in a later release.
+
+If you encounter a bug or if you have a question or suggestion concerning the
+driver, feel free to mail me, using the addresses given at the beginning of
+this file.
+
+Have fun!
+
+Andreas
diff --git a/Documentation/networking/6pack.txt b/Documentation/networking/6pack.txt
deleted file mode 100644
index 8f339428fdf4..000000000000
--- a/Documentation/networking/6pack.txt
+++ /dev/null
@@ -1,175 +0,0 @@
-This is the 6pack-mini-HOWTO, written by
-
-Andreas Könsgen DG3KQ
-Internet: ajk@comnets.uni-bremen.de
-AMPR-net: dg3kq@db0pra.ampr.org
-AX.25:    dg3kq@db0ach.#nrw.deu.eu
-
-Last update: April 7, 1998
-
-1. What is 6pack, and what are the advantages to KISS?
-
-6pack is a transmission protocol for data exchange between the PC and
-the TNC over a serial line. It can be used as an alternative to KISS.
-
-6pack has two major advantages:
-- The PC is given full control over the radio
-  channel. Special control data is exchanged between the PC and the TNC so
-  that the PC knows at any time if the TNC is receiving data, if a TNC
-  buffer underrun or overrun has occurred, if the PTT is
-  set and so on. This control data is processed at a higher priority than
-  normal data, so a data stream can be interrupted at any time to issue an
-  important event. This helps to improve the channel access and timing 
-  algorithms as everything is computed in the PC. It would even be possible 
-  to experiment with something completely different from the known CSMA and 
-  DAMA channel access methods.
-  This kind of real-time control is especially important to supply several
-  TNCs that are connected between each other and the PC by a daisy chain
-  (however, this feature is not supported yet by the Linux 6pack driver).
-
-- Each packet transferred over the serial line is supplied with a checksum,
-  so it is easy to detect errors due to problems on the serial line.
-  Received packets that are corrupt are not passed on to the AX.25 layer.
-  Damaged packets that the TNC has received from the PC are not transmitted.
-
-More details about 6pack are described in the file 6pack.ps that is located
-in the doc directory of the AX.25 utilities package.
-
-2. Who has developed the 6pack protocol?
-
-The 6pack protocol has been developed by Ekki Plicht DF4OR, Henning Rech
-DF9IC and Gunter Jost DK7WJ. A driver for 6pack, written by Gunter Jost and
-Matthias Welwarsky DG2FEF, comes along with the PC version of FlexNet.
-They have also written a firmware for TNCs to perform the 6pack
-protocol (see section 4 below).
-
-3. Where can I get the latest version of 6pack for LinuX?
-
-At the moment, the 6pack stuff can obtained via anonymous ftp from
-db0bm.automation.fh-aachen.de. In the directory /incoming/dg3kq,
-there is a file named 6pack.tgz.
-
-4. Preparing the TNC for 6pack operation
-
-To be able to use 6pack, a special firmware for the TNC is needed. The EPROM
-of a newly bought TNC does not contain 6pack, so you will have to
-program an EPROM yourself. The image file for 6pack EPROMs should be
-available on any packet radio box where PC/FlexNet can be found. The name of
-the file is 6pack.bin. This file is copyrighted and maintained by the FlexNet
-team. It can be used under the terms of the license that comes along
-with PC/FlexNet. Please do not ask me about the internals of this file as I
-don't know anything about it. I used a textual description of the 6pack
-protocol to program the Linux driver.
-
-TNCs contain a 64kByte EPROM, the lower half of which is used for
-the firmware/KISS. The upper half is either empty or is sometimes
-programmed with software called TAPR. In the latter case, the TNC
-is supplied with a DIP switch so you can easily change between the
-two systems. When programming a new EPROM, one of the systems is replaced
-by 6pack. It is useful to replace TAPR, as this software is rarely used
-nowadays. If your TNC is not equipped with the switch mentioned above, you
-can build in one yourself that switches over the highest address pin
-of the EPROM between HIGH and LOW level. After having inserted the new EPROM
-and switched to 6pack, apply power to the TNC for a first test. The connect
-and the status LED are lit for about a second if the firmware initialises
-the TNC correctly.
-
-5. Building and installing the 6pack driver
-
-The driver has been tested with kernel version 2.1.90. Use with older
-kernels may lead to a compilation error because the interface to a kernel
-function has been changed in the 2.1.8x kernels.
-
-How to turn on 6pack support:
-
-- In the linux kernel configuration program, select the code maturity level
-  options menu and turn on the prompting for development drivers.
-
-- Select the amateur radio support menu and turn on the serial port 6pack
-  driver.
-
-- Compile and install the kernel and the modules.
-
-To use the driver, the kissattach program delivered with the AX.25 utilities
-has to be modified.
-
-- Do a cd to the directory that holds the kissattach sources. Edit the
-  kissattach.c file. At the top, insert the following lines:
-
-  #ifndef N_6PACK
-  #define N_6PACK (N_AX25+1)
-  #endif
-
-  Then find the line
-   
-  int disc = N_AX25;
-
-  and replace N_AX25 by N_6PACK.
-
-- Recompile kissattach. Rename it to spattach to avoid confusions.
-
-Installing the driver:
-
-- Do an insmod 6pack. Look at your /var/log/messages file to check if the 
-  module has printed its initialization message.
-
-- Do a spattach as you would launch kissattach when starting a KISS port.
-  Check if the kernel prints the message '6pack: TNC found'. 
-
-- From here, everything should work as if you were setting up a KISS port.
-  The only difference is that the network device that represents
-  the 6pack port is called sp instead of sl or ax. So, sp0 would be the
-  first 6pack port.
-
-Although the driver has been tested on various platforms, I still declare it
-ALPHA. BE CAREFUL! Sync your disks before insmoding the 6pack module
-and spattaching. Watch out if your computer behaves strangely. Read section
-6 of this file about known problems.
-
-Note that the connect and status LEDs of the TNC are controlled in a
-different way than they are when the TNC is used with PC/FlexNet. When using
-FlexNet, the connect LED is on if there is a connection; the status LED is
-on if there is data in the buffer of the PC's AX.25 engine that has to be
-transmitted. Under Linux, the 6pack layer is beyond the AX.25 layer,
-so the 6pack driver doesn't know anything about connects or data that
-has not yet been transmitted. Therefore the LEDs are controlled
-as they are in KISS mode: The connect LED is turned on if data is transferred
-from the PC to the TNC over the serial line, the status LED if data is
-sent to the PC.
-
-6. Known problems
-
-When testing the driver with 2.0.3x kernels and
-operating with data rates on the radio channel of 9600 Baud or higher,
-the driver may, on certain systems, sometimes print the message '6pack:
-bad checksum', which is due to data loss if the other station sends two
-or more subsequent packets. I have been told that this is due to a problem
-with the serial driver of 2.0.3x kernels. I don't know yet if the problem
-still exists with 2.1.x kernels, as I have heard that the serial driver
-code has been changed with 2.1.x.
-
-When shutting down the sp interface with ifconfig, the kernel crashes if
-there is still an AX.25 connection left over which an IP connection was
-running, even if that IP connection is already closed. The problem does not
-occur when there is a bare AX.25 connection still running. I don't know if
-this is a problem of the 6pack driver or something else in the kernel.
-
-The driver has been tested as a module, not yet as a kernel-builtin driver.
-
-The 6pack protocol supports daisy-chaining of TNCs in a token ring, which is
-connected to one serial port of the PC. This feature is not implemented
-and at least at the moment I won't be able to do it because I do not have
-the opportunity to build a TNC daisy-chain and test it.
-
-Some of the comments in the source code are inaccurate. They are left from
-the SLIP/KISS driver, from which the 6pack driver has been derived.
-I haven't modified or removed them yet -- sorry! The code itself needs
-some cleaning and optimizing. This will be done in a later release.
-
-If you encounter a bug or if you have a question or suggestion concerning the
-driver, feel free to mail me, using the addresses given at the beginning of
-this file.
-
-Have fun!
-
-Andreas
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 5b3421ec25ec..dc37fc8d5bee 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -37,6 +37,7 @@ Contents:
    tls-offload
    nfc
    6lowpan
+   6pack
 
 .. only::  subproject and html
 
diff --git a/drivers/net/hamradio/Kconfig b/drivers/net/hamradio/Kconfig
index 8e05b5c31a77..bf306fed04cc 100644
--- a/drivers/net/hamradio/Kconfig
+++ b/drivers/net/hamradio/Kconfig
@@ -30,7 +30,7 @@ config 6PACK
 
 	  Note that this driver is still experimental and might cause
 	  problems. For details about the features and the usage of the
-	  driver, read <file:Documentation/networking/6pack.txt>.
+	  driver, read <file:Documentation/networking/6pack.rst>.
 
 	  To compile this driver as a module, choose M here: the module
 	  will be called 6pack.
-- 
cgit v1.2.3-59-g8ed1b


From 5a7f3132121bbcafd61f616170a08e511d675347 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 28 Apr 2020 00:01:18 +0200
Subject: docs: networking: convert altera_tse.txt to ReST

- add SPDX header;
- use copyright symbol;
- adjust titles and chapters, adding proper markups;
- mark lists as such;
- adjust identation, whitespaces and blank lines;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/altera_tse.rst | 286 ++++++++++++++++++++++++++++++++
 Documentation/networking/altera_tse.txt | 263 -----------------------------
 Documentation/networking/index.rst      |   1 +
 3 files changed, 287 insertions(+), 263 deletions(-)
 create mode 100644 Documentation/networking/altera_tse.rst
 delete mode 100644 Documentation/networking/altera_tse.txt

diff --git a/Documentation/networking/altera_tse.rst b/Documentation/networking/altera_tse.rst
new file mode 100644
index 000000000000..7a7040072e58
--- /dev/null
+++ b/Documentation/networking/altera_tse.rst
@@ -0,0 +1,286 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: <isonum.txt>
+
+=======================================
+Altera Triple-Speed Ethernet MAC driver
+=======================================
+
+Copyright |copy| 2008-2014 Altera Corporation
+
+This is the driver for the Altera Triple-Speed Ethernet (TSE) controllers
+using the SGDMA and MSGDMA soft DMA IP components. The driver uses the
+platform bus to obtain component resources. The designs used to test this
+driver were built for a Cyclone(R) V SOC FPGA board, a Cyclone(R) V FPGA board,
+and tested with ARM and NIOS processor hosts separately. The anticipated use
+cases are simple communications between an embedded system and an external peer
+for status and simple configuration of the embedded system.
+
+For more information visit www.altera.com and www.rocketboards.org. Support
+forums for the driver may be found on www.rocketboards.org, and a design used
+to test this driver may be found there as well. Support is also available from
+the maintainer of this driver, found in MAINTAINERS.
+
+The Triple-Speed Ethernet, SGDMA, and MSGDMA components are all soft IP
+components that can be assembled and built into an FPGA using the Altera
+Quartus toolchain. Quartus 13.1 and 14.0 were used to build the design that
+this driver was tested against. The sopc2dts tool is used to create the
+device tree for the driver, and may be found at rocketboards.org.
+
+The driver probe function examines the device tree and determines if the
+Triple-Speed Ethernet instance is using an SGDMA or MSGDMA component. The
+probe function then installs the appropriate set of DMA routines to
+initialize, setup transmits, receives, and interrupt handling primitives for
+the respective configurations.
+
+The SGDMA component is to be deprecated in the near future (over the next 1-2
+years as of this writing in early 2014) in favor of the MSGDMA component.
+SGDMA support is included for existing designs and reference in case a
+developer wishes to support their own soft DMA logic and driver support. Any
+new designs should not use the SGDMA.
+
+The SGDMA supports only a single transmit or receive operation at a time, and
+therefore will not perform as well compared to the MSGDMA soft IP. Please
+visit www.altera.com for known, documented SGDMA errata.
+
+Scatter-gather DMA is not supported by the SGDMA or MSGDMA at this time.
+Scatter-gather DMA will be added to a future maintenance update to this
+driver.
+
+Jumbo frames are not supported at this time.
+
+The driver limits PHY operations to 10/100Mbps, and has not yet been fully
+tested for 1Gbps. This support will be added in a future maintenance update.
+
+1. Kernel Configuration
+=======================
+
+The kernel configuration option is ALTERA_TSE:
+
+ Device Drivers ---> Network device support ---> Ethernet driver support --->
+ Altera Triple-Speed Ethernet MAC support (ALTERA_TSE)
+
+2. Driver parameters list
+=========================
+
+	- debug: message level (0: no output, 16: all);
+	- dma_rx_num: Number of descriptors in the RX list (default is 64);
+	- dma_tx_num: Number of descriptors in the TX list (default is 64).
+
+3. Command line options
+=======================
+
+Driver parameters can be also passed in command line by using::
+
+	altera_tse=dma_rx_num:128,dma_tx_num:512
+
+4. Driver information and notes
+===============================
+
+4.1. Transmit process
+---------------------
+When the driver's transmit routine is called by the kernel, it sets up a
+transmit descriptor by calling the underlying DMA transmit routine (SGDMA or
+MSGDMA), and initiates a transmit operation. Once the transmit is complete, an
+interrupt is driven by the transmit DMA logic. The driver handles the transmit
+completion in the context of the interrupt handling chain by recycling
+resource required to send and track the requested transmit operation.
+
+4.2. Receive process
+--------------------
+The driver will post receive buffers to the receive DMA logic during driver
+initialization. Receive buffers may or may not be queued depending upon the
+underlying DMA logic (MSGDMA is able queue receive buffers, SGDMA is not able
+to queue receive buffers to the SGDMA receive logic). When a packet is
+received, the DMA logic generates an interrupt. The driver handles a receive
+interrupt by obtaining the DMA receive logic status, reaping receive
+completions until no more receive completions are available.
+
+4.3. Interrupt Mitigation
+-------------------------
+The driver is able to mitigate the number of its DMA interrupts
+using NAPI for receive operations. Interrupt mitigation is not yet supported
+for transmit operations, but will be added in a future maintenance release.
+
+4.4) Ethtool support
+--------------------
+Ethtool is supported. Driver statistics and internal errors can be taken using:
+ethtool -S ethX command. It is possible to dump registers etc.
+
+4.5) PHY Support
+----------------
+The driver is compatible with PAL to work with PHY and GPHY devices.
+
+4.7) List of source files:
+--------------------------
+ - Kconfig
+ - Makefile
+ - altera_tse_main.c: main network device driver
+ - altera_tse_ethtool.c: ethtool support
+ - altera_tse.h: private driver structure and common definitions
+ - altera_msgdma.h: MSGDMA implementation function definitions
+ - altera_sgdma.h: SGDMA implementation function definitions
+ - altera_msgdma.c: MSGDMA implementation
+ - altera_sgdma.c: SGDMA implementation
+ - altera_sgdmahw.h: SGDMA register and descriptor definitions
+ - altera_msgdmahw.h: MSGDMA register and descriptor definitions
+ - altera_utils.c: Driver utility functions
+ - altera_utils.h: Driver utility function definitions
+
+5. Debug Information
+====================
+
+The driver exports debug information such as internal statistics,
+debug information, MAC and DMA registers etc.
+
+A user may use the ethtool support to get statistics:
+e.g. using: ethtool -S ethX (that shows the statistics counters)
+or sees the MAC registers: e.g. using: ethtool -d ethX
+
+The developer can also use the "debug" module parameter to get
+further debug information.
+
+6. Statistics Support
+=====================
+
+The controller and driver support a mix of IEEE standard defined statistics,
+RFC defined statistics, and driver or Altera defined statistics. The four
+specifications containing the standard definitions for these statistics are
+as follows:
+
+ - IEEE 802.3-2012 - IEEE Standard for Ethernet.
+ - RFC 2863 found at http://www.rfc-editor.org/rfc/rfc2863.txt.
+ - RFC 2819 found at http://www.rfc-editor.org/rfc/rfc2819.txt.
+ - Altera Triple Speed Ethernet User Guide, found at http://www.altera.com
+
+The statistics supported by the TSE and the device driver are as follows:
+
+"tx_packets" is equivalent to aFramesTransmittedOK defined in IEEE 802.3-2012,
+Section 5.2.2.1.2. This statistics is the count of frames that are successfully
+transmitted.
+
+"rx_packets" is equivalent to aFramesReceivedOK defined in IEEE 802.3-2012,
+Section 5.2.2.1.5. This statistic is the count of frames that are successfully
+received. This count does not include any error packets such as CRC errors,
+length errors, or alignment errors.
+
+"rx_crc_errors" is equivalent to aFrameCheckSequenceErrors defined in IEEE
+802.3-2012, Section 5.2.2.1.6. This statistic is the count of frames that are
+an integral number of bytes in length and do not pass the CRC test as the frame
+is received.
+
+"rx_align_errors" is equivalent to aAlignmentErrors defined in IEEE 802.3-2012,
+Section 5.2.2.1.7. This statistic is the count of frames that are not an
+integral number of bytes in length and do not pass the CRC test as the frame is
+received.
+
+"tx_bytes" is equivalent to aOctetsTransmittedOK defined in IEEE 802.3-2012,
+Section 5.2.2.1.8. This statistic is the count of data and pad bytes
+successfully transmitted from the interface.
+
+"rx_bytes" is equivalent to aOctetsReceivedOK defined in IEEE 802.3-2012,
+Section 5.2.2.1.14. This statistic is the count of data and pad bytes
+successfully received by the controller.
+
+"tx_pause" is equivalent to aPAUSEMACCtrlFramesTransmitted defined in IEEE
+802.3-2012, Section 30.3.4.2. This statistic is a count of PAUSE frames
+transmitted from the network controller.
+
+"rx_pause" is equivalent to aPAUSEMACCtrlFramesReceived defined in IEEE
+802.3-2012, Section 30.3.4.3. This statistic is a count of PAUSE frames
+received by the network controller.
+
+"rx_errors" is equivalent to ifInErrors defined in RFC 2863. This statistic is
+a count of the number of packets received containing errors that prevented the
+packet from being delivered to a higher level protocol.
+
+"tx_errors" is equivalent to ifOutErrors defined in RFC 2863. This statistic
+is a count of the number of packets that could not be transmitted due to errors.
+
+"rx_unicast" is equivalent to ifInUcastPkts defined in RFC 2863. This
+statistic is a count of the number of packets received that were not addressed
+to the broadcast address or a multicast group.
+
+"rx_multicast" is equivalent to ifInMulticastPkts defined in RFC 2863. This
+statistic is a count of the number of packets received that were addressed to
+a multicast address group.
+
+"rx_broadcast" is equivalent to ifInBroadcastPkts defined in RFC 2863. This
+statistic is a count of the number of packets received that were addressed to
+the broadcast address.
+
+"tx_discards" is equivalent to ifOutDiscards defined in RFC 2863. This
+statistic is the number of outbound packets not transmitted even though an
+error was not detected. An example of a reason this might occur is to free up
+internal buffer space.
+
+"tx_unicast" is equivalent to ifOutUcastPkts defined in RFC 2863. This
+statistic counts the number of packets transmitted that were not addressed to
+a multicast group or broadcast address.
+
+"tx_multicast" is equivalent to ifOutMulticastPkts defined in RFC 2863. This
+statistic counts the number of packets transmitted that were addressed to a
+multicast group.
+
+"tx_broadcast" is equivalent to ifOutBroadcastPkts defined in RFC 2863. This
+statistic counts the number of packets transmitted that were addressed to a
+broadcast address.
+
+"ether_drops" is equivalent to etherStatsDropEvents defined in RFC 2819.
+This statistic counts the number of packets dropped due to lack of internal
+controller resources.
+
+"rx_total_bytes" is equivalent to etherStatsOctets defined in RFC 2819.
+This statistic counts the total number of bytes received by the controller,
+including error and discarded packets.
+
+"rx_total_packets" is equivalent to etherStatsPkts defined in RFC 2819.
+This statistic counts the total number of packets received by the controller,
+including error, discarded, unicast, multicast, and broadcast packets.
+
+"rx_undersize" is equivalent to etherStatsUndersizePkts defined in RFC 2819.
+This statistic counts the number of correctly formed packets received less
+than 64 bytes long.
+
+"rx_oversize" is equivalent to etherStatsOversizePkts defined in RFC 2819.
+This statistic counts the number of correctly formed packets greater than 1518
+bytes long.
+
+"rx_64_bytes" is equivalent to etherStatsPkts64Octets defined in RFC 2819.
+This statistic counts the total number of packets received that were 64 octets
+in length.
+
+"rx_65_127_bytes" is equivalent to etherStatsPkts65to127Octets defined in RFC
+2819. This statistic counts the total number of packets received that were
+between 65 and 127 octets in length inclusive.
+
+"rx_128_255_bytes" is equivalent to etherStatsPkts128to255Octets defined in
+RFC 2819. This statistic is the total number of packets received that were
+between 128 and 255 octets in length inclusive.
+
+"rx_256_511_bytes" is equivalent to etherStatsPkts256to511Octets defined in
+RFC 2819. This statistic is the total number of packets received that were
+between 256 and 511 octets in length inclusive.
+
+"rx_512_1023_bytes" is equivalent to etherStatsPkts512to1023Octets defined in
+RFC 2819. This statistic is the total number of packets received that were
+between 512 and 1023 octets in length inclusive.
+
+"rx_1024_1518_bytes" is equivalent to etherStatsPkts1024to1518Octets define
+in RFC 2819. This statistic is the total number of packets received that were
+between 1024 and 1518 octets in length inclusive.
+
+"rx_gte_1519_bytes" is a statistic defined specific to the behavior of the
+Altera TSE. This statistics counts the number of received good and errored
+frames between the length of 1519 and the maximum frame length configured
+in the frm_length register. See the Altera TSE User Guide for More details.
+
+"rx_jabbers" is equivalent to etherStatsJabbers defined in RFC 2819. This
+statistic is the total number of packets received that were longer than 1518
+octets, and had either a bad CRC with an integral number of octets (CRC Error)
+or a bad CRC with a non-integral number of octets (Alignment Error).
+
+"rx_runts" is equivalent to etherStatsFragments defined in RFC 2819. This
+statistic is the total number of packets received that were less than 64 octets
+in length and had either a bad CRC with an integral number of octets (CRC
+error) or a bad CRC with a non-integral number of octets (Alignment Error).
diff --git a/Documentation/networking/altera_tse.txt b/Documentation/networking/altera_tse.txt
deleted file mode 100644
index 50b8589d12fd..000000000000
--- a/Documentation/networking/altera_tse.txt
+++ /dev/null
@@ -1,263 +0,0 @@
-       Altera Triple-Speed Ethernet MAC driver
-
-Copyright (C) 2008-2014 Altera Corporation
-
-This is the driver for the Altera Triple-Speed Ethernet (TSE) controllers
-using the SGDMA and MSGDMA soft DMA IP components. The driver uses the
-platform bus to obtain component resources. The designs used to test this
-driver were built for a Cyclone(R) V SOC FPGA board, a Cyclone(R) V FPGA board,
-and tested with ARM and NIOS processor hosts separately. The anticipated use
-cases are simple communications between an embedded system and an external peer
-for status and simple configuration of the embedded system.
-
-For more information visit www.altera.com and www.rocketboards.org. Support
-forums for the driver may be found on www.rocketboards.org, and a design used
-to test this driver may be found there as well. Support is also available from
-the maintainer of this driver, found in MAINTAINERS.
-
-The Triple-Speed Ethernet, SGDMA, and MSGDMA components are all soft IP
-components that can be assembled and built into an FPGA using the Altera
-Quartus toolchain. Quartus 13.1 and 14.0 were used to build the design that
-this driver was tested against. The sopc2dts tool is used to create the
-device tree for the driver, and may be found at rocketboards.org.
-
-The driver probe function examines the device tree and determines if the
-Triple-Speed Ethernet instance is using an SGDMA or MSGDMA component. The
-probe function then installs the appropriate set of DMA routines to
-initialize, setup transmits, receives, and interrupt handling primitives for
-the respective configurations.
-
-The SGDMA component is to be deprecated in the near future (over the next 1-2
-years as of this writing in early 2014) in favor of the MSGDMA component.
-SGDMA support is included for existing designs and reference in case a
-developer wishes to support their own soft DMA logic and driver support. Any
-new designs should not use the SGDMA.
-
-The SGDMA supports only a single transmit or receive operation at a time, and
-therefore will not perform as well compared to the MSGDMA soft IP. Please
-visit www.altera.com for known, documented SGDMA errata.
-
-Scatter-gather DMA is not supported by the SGDMA or MSGDMA at this time.
-Scatter-gather DMA will be added to a future maintenance update to this
-driver.
-
-Jumbo frames are not supported at this time.
-
-The driver limits PHY operations to 10/100Mbps, and has not yet been fully
-tested for 1Gbps. This support will be added in a future maintenance update.
-
-1) Kernel Configuration
-The kernel configuration option is ALTERA_TSE:
- Device Drivers ---> Network device support ---> Ethernet driver support --->
- Altera Triple-Speed Ethernet MAC support (ALTERA_TSE)
-
-2) Driver parameters list:
-	debug: message level (0: no output, 16: all);
-	dma_rx_num: Number of descriptors in the RX list (default is 64);
-	dma_tx_num: Number of descriptors in the TX list (default is 64).
-
-3) Command line options
-Driver parameters can be also passed in command line by using:
-	altera_tse=dma_rx_num:128,dma_tx_num:512
-
-4) Driver information and notes
-
-4.1) Transmit process
-When the driver's transmit routine is called by the kernel, it sets up a
-transmit descriptor by calling the underlying DMA transmit routine (SGDMA or
-MSGDMA), and initiates a transmit operation. Once the transmit is complete, an
-interrupt is driven by the transmit DMA logic. The driver handles the transmit
-completion in the context of the interrupt handling chain by recycling
-resource required to send and track the requested transmit operation.
-
-4.2) Receive process
-The driver will post receive buffers to the receive DMA logic during driver
-initialization. Receive buffers may or may not be queued depending upon the
-underlying DMA logic (MSGDMA is able queue receive buffers, SGDMA is not able
-to queue receive buffers to the SGDMA receive logic). When a packet is
-received, the DMA logic generates an interrupt. The driver handles a receive
-interrupt by obtaining the DMA receive logic status, reaping receive
-completions until no more receive completions are available.
-
-4.3) Interrupt Mitigation
-The driver is able to mitigate the number of its DMA interrupts
-using NAPI for receive operations. Interrupt mitigation is not yet supported
-for transmit operations, but will be added in a future maintenance release.
-
-4.4) Ethtool support
-Ethtool is supported. Driver statistics and internal errors can be taken using:
-ethtool -S ethX command. It is possible to dump registers etc.
-
-4.5) PHY Support
-The driver is compatible with PAL to work with PHY and GPHY devices.
-
-4.7) List of source files:
- o Kconfig
- o Makefile
- o altera_tse_main.c: main network device driver
- o altera_tse_ethtool.c: ethtool support
- o altera_tse.h: private driver structure and common definitions
- o altera_msgdma.h: MSGDMA implementation function definitions
- o altera_sgdma.h: SGDMA implementation function definitions
- o altera_msgdma.c: MSGDMA implementation
- o altera_sgdma.c: SGDMA implementation
- o altera_sgdmahw.h: SGDMA register and descriptor definitions
- o altera_msgdmahw.h: MSGDMA register and descriptor definitions
- o altera_utils.c: Driver utility functions
- o altera_utils.h: Driver utility function definitions
-
-5) Debug Information
-
-The driver exports debug information such as internal statistics,
-debug information, MAC and DMA registers etc.
-
-A user may use the ethtool support to get statistics:
-e.g. using: ethtool -S ethX (that shows the statistics counters)
-or sees the MAC registers: e.g. using: ethtool -d ethX
-
-The developer can also use the "debug" module parameter to get
-further debug information.
-
-6) Statistics Support
-
-The controller and driver support a mix of IEEE standard defined statistics,
-RFC defined statistics, and driver or Altera defined statistics. The four
-specifications containing the standard definitions for these statistics are
-as follows:
-
- o IEEE 802.3-2012 - IEEE Standard for Ethernet.
- o RFC 2863 found at http://www.rfc-editor.org/rfc/rfc2863.txt.
- o RFC 2819 found at http://www.rfc-editor.org/rfc/rfc2819.txt.
- o Altera Triple Speed Ethernet User Guide, found at http://www.altera.com
-
-The statistics supported by the TSE and the device driver are as follows:
-
-"tx_packets" is equivalent to aFramesTransmittedOK defined in IEEE 802.3-2012,
-Section 5.2.2.1.2. This statistics is the count of frames that are successfully
-transmitted.
-
-"rx_packets" is equivalent to aFramesReceivedOK defined in IEEE 802.3-2012,
-Section 5.2.2.1.5. This statistic is the count of frames that are successfully
-received. This count does not include any error packets such as CRC errors,
-length errors, or alignment errors.
-
-"rx_crc_errors" is equivalent to aFrameCheckSequenceErrors defined in IEEE
-802.3-2012, Section 5.2.2.1.6. This statistic is the count of frames that are
-an integral number of bytes in length and do not pass the CRC test as the frame
-is received.
-
-"rx_align_errors" is equivalent to aAlignmentErrors defined in IEEE 802.3-2012,
-Section 5.2.2.1.7. This statistic is the count of frames that are not an
-integral number of bytes in length and do not pass the CRC test as the frame is
-received.
-
-"tx_bytes" is equivalent to aOctetsTransmittedOK defined in IEEE 802.3-2012,
-Section 5.2.2.1.8. This statistic is the count of data and pad bytes
-successfully transmitted from the interface.
-
-"rx_bytes" is equivalent to aOctetsReceivedOK defined in IEEE 802.3-2012,
-Section 5.2.2.1.14. This statistic is the count of data and pad bytes
-successfully received by the controller.
-
-"tx_pause" is equivalent to aPAUSEMACCtrlFramesTransmitted defined in IEEE
-802.3-2012, Section 30.3.4.2. This statistic is a count of PAUSE frames
-transmitted from the network controller.
-
-"rx_pause" is equivalent to aPAUSEMACCtrlFramesReceived defined in IEEE
-802.3-2012, Section 30.3.4.3. This statistic is a count of PAUSE frames
-received by the network controller.
-
-"rx_errors" is equivalent to ifInErrors defined in RFC 2863. This statistic is
-a count of the number of packets received containing errors that prevented the
-packet from being delivered to a higher level protocol.
-
-"tx_errors" is equivalent to ifOutErrors defined in RFC 2863. This statistic
-is a count of the number of packets that could not be transmitted due to errors.
-
-"rx_unicast" is equivalent to ifInUcastPkts defined in RFC 2863. This
-statistic is a count of the number of packets received that were not addressed
-to the broadcast address or a multicast group.
-
-"rx_multicast" is equivalent to ifInMulticastPkts defined in RFC 2863. This
-statistic is a count of the number of packets received that were addressed to
-a multicast address group.
-
-"rx_broadcast" is equivalent to ifInBroadcastPkts defined in RFC 2863. This
-statistic is a count of the number of packets received that were addressed to
-the broadcast address.
-
-"tx_discards" is equivalent to ifOutDiscards defined in RFC 2863. This
-statistic is the number of outbound packets not transmitted even though an
-error was not detected. An example of a reason this might occur is to free up
-internal buffer space.
-
-"tx_unicast" is equivalent to ifOutUcastPkts defined in RFC 2863. This
-statistic counts the number of packets transmitted that were not addressed to
-a multicast group or broadcast address.
-
-"tx_multicast" is equivalent to ifOutMulticastPkts defined in RFC 2863. This
-statistic counts the number of packets transmitted that were addressed to a
-multicast group.
-
-"tx_broadcast" is equivalent to ifOutBroadcastPkts defined in RFC 2863. This
-statistic counts the number of packets transmitted that were addressed to a
-broadcast address.
-
-"ether_drops" is equivalent to etherStatsDropEvents defined in RFC 2819.
-This statistic counts the number of packets dropped due to lack of internal
-controller resources.
-
-"rx_total_bytes" is equivalent to etherStatsOctets defined in RFC 2819.
-This statistic counts the total number of bytes received by the controller,
-including error and discarded packets.
-
-"rx_total_packets" is equivalent to etherStatsPkts defined in RFC 2819.
-This statistic counts the total number of packets received by the controller,
-including error, discarded, unicast, multicast, and broadcast packets.
-
-"rx_undersize" is equivalent to etherStatsUndersizePkts defined in RFC 2819.
-This statistic counts the number of correctly formed packets received less
-than 64 bytes long.
-
-"rx_oversize" is equivalent to etherStatsOversizePkts defined in RFC 2819.
-This statistic counts the number of correctly formed packets greater than 1518
-bytes long.
-
-"rx_64_bytes" is equivalent to etherStatsPkts64Octets defined in RFC 2819.
-This statistic counts the total number of packets received that were 64 octets
-in length.
-
-"rx_65_127_bytes" is equivalent to etherStatsPkts65to127Octets defined in RFC
-2819. This statistic counts the total number of packets received that were
-between 65 and 127 octets in length inclusive.
-
-"rx_128_255_bytes" is equivalent to etherStatsPkts128to255Octets defined in
-RFC 2819. This statistic is the total number of packets received that were
-between 128 and 255 octets in length inclusive.
-
-"rx_256_511_bytes" is equivalent to etherStatsPkts256to511Octets defined in
-RFC 2819. This statistic is the total number of packets received that were
-between 256 and 511 octets in length inclusive.
-
-"rx_512_1023_bytes" is equivalent to etherStatsPkts512to1023Octets defined in
-RFC 2819. This statistic is the total number of packets received that were
-between 512 and 1023 octets in length inclusive.
-
-"rx_1024_1518_bytes" is equivalent to etherStatsPkts1024to1518Octets define
-in RFC 2819. This statistic is the total number of packets received that were
-between 1024 and 1518 octets in length inclusive.
-
-"rx_gte_1519_bytes" is a statistic defined specific to the behavior of the
-Altera TSE. This statistics counts the number of received good and errored
-frames between the length of 1519 and the maximum frame length configured
-in the frm_length register. See the Altera TSE User Guide for More details.
-
-"rx_jabbers" is equivalent to etherStatsJabbers defined in RFC 2819. This
-statistic is the total number of packets received that were longer than 1518
-octets, and had either a bad CRC with an integral number of octets (CRC Error)
-or a bad CRC with a non-integral number of octets (Alignment Error).
-
-"rx_runts" is equivalent to etherStatsFragments defined in RFC 2819. This
-statistic is the total number of packets received that were less than 64 octets
-in length and had either a bad CRC with an integral number of octets (CRC
-error) or a bad CRC with a non-integral number of octets (Alignment Error).
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index dc37fc8d5bee..96ffad845fd9 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -38,6 +38,7 @@ Contents:
    nfc
    6lowpan
    6pack
+   altera_tse
 
 .. only::  subproject and html
 
-- 
cgit v1.2.3-59-g8ed1b


From aa92320b3e38f2b64b2d91a20761db1683e6c531 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 28 Apr 2020 00:01:19 +0200
Subject: docs: networking: convert arcnet-hardware.txt to ReST

- add SPDX header;
- add document title markup;
- add notes markups;
- mark tables as such;
- mark code blocks and literals as such;
- adjust identation, whitespaces and blank lines;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/arcnet-hardware.rst | 3234 ++++++++++++++++++++++++++
 Documentation/networking/arcnet-hardware.txt | 3133 -------------------------
 Documentation/networking/index.rst           |    1 +
 3 files changed, 3235 insertions(+), 3133 deletions(-)
 create mode 100644 Documentation/networking/arcnet-hardware.rst
 delete mode 100644 Documentation/networking/arcnet-hardware.txt

diff --git a/Documentation/networking/arcnet-hardware.rst b/Documentation/networking/arcnet-hardware.rst
new file mode 100644
index 000000000000..b5a1a020c824
--- /dev/null
+++ b/Documentation/networking/arcnet-hardware.rst
@@ -0,0 +1,3234 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============
+ARCnet Hardware
+===============
+
+.. note::
+
+   1) This file is a supplement to arcnet.txt.  Please read that for general
+      driver configuration help.
+   2) This file is no longer Linux-specific.  It should probably be moved out
+      of the kernel sources.  Ideas?
+
+Because so many people (myself included) seem to have obtained ARCnet cards
+without manuals, this file contains a quick introduction to ARCnet hardware,
+some cabling tips, and a listing of all jumper settings I can find. Please
+e-mail apenwarr@worldvisions.ca with any settings for your particular card,
+or any other information you have!
+
+
+Introduction to ARCnet
+======================
+
+ARCnet is a network type which works in a way similar to popular Ethernet
+networks but which is also different in some very important ways.
+
+First of all, you can get ARCnet cards in at least two speeds: 2.5 Mbps
+(slower than Ethernet) and 100 Mbps (faster than normal Ethernet).  In fact,
+there are others as well, but these are less common.  The different hardware
+types, as far as I'm aware, are not compatible and so you cannot wire a
+100 Mbps card to a 2.5 Mbps card, and so on.  From what I hear, my driver does
+work with 100 Mbps cards, but I haven't been able to verify this myself,
+since I only have the 2.5 Mbps variety.  It is probably not going to saturate
+your 100 Mbps card.  Stop complaining. :)
+
+You also cannot connect an ARCnet card to any kind of Ethernet card and
+expect it to work.
+
+There are two "types" of ARCnet - STAR topology and BUS topology.  This
+refers to how the cards are meant to be wired together.  According to most
+available documentation, you can only connect STAR cards to STAR cards and
+BUS cards to BUS cards.  That makes sense, right?  Well, it's not quite
+true; see below under "Cabling."
+
+Once you get past these little stumbling blocks, ARCnet is actually quite a
+well-designed standard.  It uses something called "modified token passing"
+which makes it completely incompatible with so-called "Token Ring" cards,
+but which makes transfers much more reliable than Ethernet does.  In fact,
+ARCnet will guarantee that a packet arrives safely at the destination, and
+even if it can't possibly be delivered properly (ie. because of a cable
+break, or because the destination computer does not exist) it will at least
+tell the sender about it.
+
+Because of the carefully defined action of the "token", it will always make
+a pass around the "ring" within a maximum length of time.  This makes it
+useful for realtime networks.
+
+In addition, all known ARCnet cards have an (almost) identical programming
+interface.  This means that with one ARCnet driver you can support any
+card, whereas with Ethernet each manufacturer uses what is sometimes a
+completely different programming interface, leading to a lot of different,
+sometimes very similar, Ethernet drivers.  Of course, always using the same
+programming interface also means that when high-performance hardware
+facilities like PCI bus mastering DMA appear, it's hard to take advantage of
+them.  Let's not go into that.
+
+One thing that makes ARCnet cards difficult to program for, however, is the
+limit on their packet sizes; standard ARCnet can only send packets that are
+up to 508 bytes in length.  This is smaller than the Internet "bare minimum"
+of 576 bytes, let alone the Ethernet MTU of 1500.  To compensate, an extra
+level of encapsulation is defined by RFC1201, which I call "packet
+splitting," that allows "virtual packets" to grow as large as 64K each,
+although they are generally kept down to the Ethernet-style 1500 bytes.
+
+For more information on the advantages and disadvantages (mostly the
+advantages) of ARCnet networks, you might try the "ARCnet Trade Association"
+WWW page:
+
+	http://www.arcnet.com
+
+
+Cabling ARCnet Networks
+=======================
+
+This section was rewritten by
+
+	Vojtech Pavlik     <vojtech@suse.cz>
+
+using information from several people, including:
+
+	- Avery Pennraun     <apenwarr@worldvisions.ca>
+	- Stephen A. Wood    <saw@hallc1.cebaf.gov>
+	- John Paul Morrison <jmorriso@bogomips.ee.ubc.ca>
+	- Joachim Koenig     <jojo@repas.de>
+
+and Avery touched it up a bit, at Vojtech's request.
+
+ARCnet (the classic 2.5 Mbps version) can be connected by two different
+types of cabling: coax and twisted pair.  The other ARCnet-type networks
+(100 Mbps TCNS and 320 kbps - 32 Mbps ARCnet Plus) use different types of
+cabling (Type1, Fiber, C1, C4, C5).
+
+For a coax network, you "should" use 93 Ohm RG-62 cable.  But other cables
+also work fine, because ARCnet is a very stable network. I personally use 75
+Ohm TV antenna cable.
+
+Cards for coax cabling are shipped in two different variants: for BUS and
+STAR network topologies.  They are mostly the same.  The only difference
+lies in the hybrid chip installed.  BUS cards use high impedance output,
+while STAR use low impedance.  Low impedance card (STAR) is electrically
+equal to a high impedance one with a terminator installed.
+
+Usually, the ARCnet networks are built up from STAR cards and hubs.  There
+are two types of hubs - active and passive.  Passive hubs are small boxes
+with four BNC connectors containing four 47 Ohm resistors::
+
+	   |         | wires
+	   R         + junction
+	-R-+-R-      R 47 Ohm resistors
+	   R
+	   |
+
+The shielding is connected together.  Active hubs are much more complicated;
+they are powered and contain electronics to amplify the signal and send it
+to other segments of the net.  They usually have eight connectors.  Active
+hubs come in two variants - dumb and smart.  The dumb variant just
+amplifies, but the smart one decodes to digital and encodes back all packets
+coming through.  This is much better if you have several hubs in the net,
+since many dumb active hubs may worsen the signal quality.
+
+And now to the cabling.  What you can connect together:
+
+1. A card to a card.  This is the simplest way of creating a 2-computer
+   network.
+
+2. A card to a passive hub.  Remember that all unused connectors on the hub
+   must be properly terminated with 93 Ohm (or something else if you don't
+   have the right ones) terminators.
+
+	(Avery's note: oops, I didn't know that.  Mine (TV cable) works
+	anyway, though.)
+
+3. A card to an active hub.  Here is no need to terminate the unused
+   connectors except some kind of aesthetic feeling.  But, there may not be
+   more than eleven active hubs between any two computers.  That of course
+   doesn't limit the number of active hubs on the network.
+
+4. An active hub to another.
+
+5. An active hub to passive hub.
+
+Remember that you cannot connect two passive hubs together.  The power loss
+implied by such a connection is too high for the net to operate reliably.
+
+An example of a typical ARCnet network::
+
+	   R                     S - STAR type card
+    S------H--------A-------S    R - Terminator
+	   |        |            H - Hub
+	   |        |            A - Active hub
+	   |   S----H----S
+	   S        |
+		    |
+		    S
+
+The BUS topology is very similar to the one used by Ethernet.  The only
+difference is in cable and terminators: they should be 93 Ohm.  Ethernet
+uses 50 Ohm impedance. You use T connectors to put the computers on a single
+line of cable, the bus. You have to put terminators at both ends of the
+cable. A typical BUS ARCnet network looks like::
+
+    RT----T------T------T------T------TR
+     B    B      B      B      B      B
+
+  B - BUS type card
+  R - Terminator
+  T - T connector
+
+But that is not all! The two types can be connected together.  According to
+the official documentation the only way of connecting them is using an active
+hub::
+
+	 A------T------T------TR
+	 |      B      B      B
+     S---H---S
+	 |
+	 S
+
+The official docs also state that you can use STAR cards at the ends of
+BUS network in place of a BUS card and a terminator::
+
+     S------T------T------S
+	    B      B
+
+But, according to my own experiments, you can simply hang a BUS type card
+anywhere in middle of a cable in a STAR topology network.  And more - you
+can use the bus card in place of any star card if you use a terminator. Then
+you can build very complicated networks fulfilling all your needs!  An
+example::
+
+				  S
+				  |
+	   RT------T-------T------H------S
+	    B      B       B      |
+				  |       R
+    S------A------T-------T-------A-------H------TR
+	   |      B       B       |       |      B
+	   |   S                 BT       |
+	   |   |                  |  S----A-----S
+    S------H---A----S             |       |
+	   |   |      S------T----H---S   |
+	   S   S             B    R       S
+
+A basically different cabling scheme is used with Twisted Pair cabling. Each
+of the TP cards has two RJ (phone-cord style) connectors.  The cards are
+then daisy-chained together using a cable connecting every two neighboring
+cards.  The ends are terminated with RJ 93 Ohm terminators which plug into
+the empty connectors of cards on the ends of the chain.  An example::
+
+	  ___________   ___________
+      _R_|_         _|_|_         _|_R_
+     |     |       |     |       |     |
+     |Card |       |Card |       |Card |
+     |_____|       |_____|       |_____|
+
+
+There are also hubs for the TP topology.  There is nothing difficult
+involved in using them; you just connect a TP chain to a hub on any end or
+even at both.  This way you can create almost any network configuration.
+The maximum of 11 hubs between any two computers on the net applies here as
+well.  An example::
+
+    RP-------P--------P--------H-----P------P-----PR
+			       |
+      RP-----H--------P--------H-----P------PR
+	     |                 |
+	     PR                PR
+
+    R - RJ Terminator
+    P - TP Card
+    H - TP Hub
+
+Like any network, ARCnet has a limited cable length.  These are the maximum
+cable lengths between two active ends (an active end being an active hub or
+a STAR card).
+
+		========== ======= ===========
+		RG-62       93 Ohm up to 650 m
+		RG-59/U     75 Ohm up to 457 m
+		RG-11/U     75 Ohm up to 533 m
+		IBM Type 1 150 Ohm up to 200 m
+		IBM Type 3 100 Ohm up to 100 m
+		========== ======= ===========
+
+The maximum length of all cables connected to a passive hub is limited to 65
+meters for RG-62 cabling; less for others.  You can see that using passive
+hubs in a large network is a bad idea. The maximum length of a single "BUS
+Trunk" is about 300 meters for RG-62. The maximum distance between the two
+most distant points of the net is limited to 3000 meters. The maximum length
+of a TP cable between two cards/hubs is 650 meters.
+
+
+Setting the Jumpers
+===================
+
+All ARCnet cards should have a total of four or five different settings:
+
+  - the I/O address:  this is the "port" your ARCnet card is on.  Probed
+    values in the Linux ARCnet driver are only from 0x200 through 0x3F0. (If
+    your card has additional ones, which is possible, please tell me.) This
+    should not be the same as any other device on your system.  According to
+    a doc I got from Novell, MS Windows prefers values of 0x300 or more,
+    eating net connections on my system (at least) otherwise.  My guess is
+    this may be because, if your card is at 0x2E0, probing for a serial port
+    at 0x2E8 will reset the card and probably mess things up royally.
+
+	- Avery's favourite: 0x300.
+
+  - the IRQ: on  8-bit cards, it might be 2 (9), 3, 4, 5, or 7.
+	     on 16-bit cards, it might be 2 (9), 3, 4, 5, 7, or 10-15.
+
+    Make sure this is different from any other card on your system.  Note
+    that IRQ2 is the same as IRQ9, as far as Linux is concerned.  You can
+    "cat /proc/interrupts" for a somewhat complete list of which ones are in
+    use at any given time.  Here is a list of common usages from Vojtech
+    Pavlik <vojtech@suse.cz>:
+
+	("Not on bus" means there is no way for a card to generate this
+	interrupt)
+
+	======   =========================================================
+	IRQ  0   Timer 0 (Not on bus)
+	IRQ  1   Keyboard (Not on bus)
+	IRQ  2   IRQ Controller 2 (Not on bus, nor does interrupt the CPU)
+	IRQ  3   COM2
+	IRQ  4   COM1
+	IRQ  5   FREE (LPT2 if you have it; sometimes COM3; maybe PLIP)
+	IRQ  6   Floppy disk controller
+	IRQ  7   FREE (LPT1 if you don't use the polling driver; PLIP)
+	IRQ  8   Realtime Clock Interrupt (Not on bus)
+	IRQ  9   FREE (VGA vertical sync interrupt if enabled)
+	IRQ 10   FREE
+	IRQ 11   FREE
+	IRQ 12   FREE
+	IRQ 13   Numeric Coprocessor (Not on bus)
+	IRQ 14   Fixed Disk Controller
+	IRQ 15   FREE (Fixed Disk Controller 2 if you have it)
+	======   =========================================================
+
+
+	.. note::
+
+	   IRQ 9 is used on some video cards for the "vertical retrace"
+	   interrupt.  This interrupt would have been handy for things like
+	   video games, as it occurs exactly once per screen refresh, but
+	   unfortunately IBM cancelled this feature starting with the original
+	   VGA and thus many VGA/SVGA cards do not support it.  For this
+	   reason, no modern software uses this interrupt and it can almost
+	   always be safely disabled, if your video card supports it at all.
+
+	If your card for some reason CANNOT disable this IRQ (usually there
+	is a jumper), one solution would be to clip the printed circuit
+	contact on the board: it's the fourth contact from the left on the
+	back side.  I take no responsibility if you try this.
+
+	- Avery's favourite: IRQ2 (actually IRQ9).  Watch that VGA, though.
+
+  - the memory address:  Unlike most cards, ARCnets use "shared memory" for
+    copying buffers around.  Make SURE it doesn't conflict with any other
+    used memory in your system!
+
+    ::
+
+	A0000		- VGA graphics memory (ok if you don't have VGA)
+	B0000		- Monochrome text mode
+	C0000		\  One of these is your VGA BIOS - usually C0000.
+	E0000		/
+	F0000		- System BIOS
+
+    Anything less than 0xA0000 is, well, a BAD idea since it isn't above
+    640k.
+
+	- Avery's favourite: 0xD0000
+
+  - the station address:  Every ARCnet card has its own "unique" network
+    address from 0 to 255.  Unlike Ethernet, you can set this address
+    yourself with a jumper or switch (or on some cards, with special
+    software).  Since it's only 8 bits, you can only have 254 ARCnet cards
+    on a network.  DON'T use 0 or 255, since these are reserved (although
+    neat stuff will probably happen if you DO use them).  By the way, if you
+    haven't already guessed, don't set this the same as any other ARCnet on
+    your network!
+
+	- Avery's favourite:  3 and 4.  Not that it matters.
+
+  - There may be ETS1 and ETS2 settings.  These may or may not make a
+    difference on your card (many manuals call them "reserved"), but are
+    used to change the delays used when powering up a computer on the
+    network.  This is only necessary when wiring VERY long range ARCnet
+    networks, on the order of 4km or so; in any case, the only real
+    requirement here is that all cards on the network with ETS1 and ETS2
+    jumpers have them in the same position.  Chris Hindy <chrish@io.org>
+    sent in a chart with actual values for this:
+
+	======= ======= =============== ====================
+	ET1	ET2	Response Time	Reconfiguration Time
+	======= ======= =============== ====================
+	open	open	74.7us		840us
+	open	closed	283.4us		1680us
+	closed	open	561.8us		1680us
+	closed	closed	1118.6us	1680us
+	======= ======= =============== ====================
+
+    Make sure you set ETS1 and ETS2 to the SAME VALUE for all cards on your
+    network.
+
+Also, on many cards (not mine, though) there are red and green LED's.
+Vojtech Pavlik <vojtech@suse.cz> tells me this is what they mean:
+
+	=============== =============== =====================================
+	GREEN           RED             Status
+	=============== =============== =====================================
+	OFF             OFF             Power off
+	OFF             Short flashes   Cabling problems (broken cable or not
+					terminated)
+	OFF (short)     ON              Card init
+	ON              ON              Normal state - everything OK, nothing
+					happens
+	ON              Long flashes    Data transfer
+	ON              OFF             Never happens (maybe when wrong ID)
+	=============== =============== =====================================
+
+
+The following is all the specific information people have sent me about
+their own particular ARCnet cards.  It is officially a mess, and contains
+huge amounts of duplicated information.  I have no time to fix it.  If you
+want to, PLEASE DO!  Just send me a 'diff -u' of all your changes.
+
+The model # is listed right above specifics for that card, so you should be
+able to use your text viewer's "search" function to find the entry you want.
+If you don't KNOW what kind of card you have, try looking through the
+various diagrams to see if you can tell.
+
+If your model isn't listed and/or has different settings, PLEASE PLEASE
+tell me.  I had to figure mine out without the manual, and it WASN'T FUN!
+
+Even if your ARCnet model isn't listed, but has the same jumpers as another
+model that is, please e-mail me to say so.
+
+Cards Listed in this file (in this order, mostly):
+
+	=============== ======================= ====
+	Manufacturer	Model #			Bits
+	=============== ======================= ====
+	SMC		PC100			8
+	SMC		PC110			8
+	SMC		PC120			8
+	SMC		PC130			8
+	SMC		PC270E			8
+	SMC		PC500			16
+	SMC		PC500Longboard		16
+	SMC		PC550Longboard		16
+	SMC		PC600			16
+	SMC		PC710			8
+	SMC?		LCS-8830(-T)		8/16
+	Puredata	PDI507			8
+	CNet Tech	CN120-Series		8
+	CNet Tech	CN160-Series		16
+	Lantech?	UM9065L chipset		8
+	Acer		5210-003		8
+	Datapoint?	LAN-ARC-8		8
+	Topware		TA-ARC/10		8
+	Thomas-Conrad	500-6242-0097 REV A	8
+	Waterloo?	(C)1985 Waterloo Micro. 8
+	No Name		--			8/16
+	No Name		Taiwan R.O.C?		8
+	No Name		Model 9058		8
+	Tiara		Tiara Lancard?		8
+	=============== ======================= ====
+
+
+* SMC = Standard Microsystems Corp.
+* CNet Tech = CNet Technology, Inc.
+
+Unclassified Stuff
+==================
+
+  - Please send any other information you can find.
+
+  - And some other stuff (more info is welcome!)::
+
+     From: root@ultraworld.xs4all.nl (Timo Hilbrink)
+     To: apenwarr@foxnet.net (Avery Pennarun)
+     Date: Wed, 26 Oct 1994 02:10:32 +0000 (GMT)
+     Reply-To: timoh@xs4all.nl
+
+     [...parts deleted...]
+
+     About the jumpers: On my PC130 there is one more jumper, located near the
+     cable-connector and it's for changing to star or bus topology;
+     closed: star - open: bus
+     On the PC500 are some more jumper-pins, one block labeled with RX,PDN,TXI
+     and another with ALE,LA17,LA18,LA19 these are undocumented..
+
+     [...more parts deleted...]
+
+     --- CUT ---
+
+Standard Microsystems Corp (SMC)
+================================
+
+PC100, PC110, PC120, PC130 (8-bit cards) and PC500, PC600 (16-bit cards)
+------------------------------------------------------------------------
+
+  - mainly from Avery Pennarun <apenwarr@worldvisions.ca>.  Values depicted
+    are from Avery's setup.
+  - special thanks to Timo Hilbrink <timoh@xs4all.nl> for noting that PC120,
+    130, 500, and 600 all have the same switches as Avery's PC100.
+    PC500/600 have several extra, undocumented pins though. (?)
+  - PC110 settings were verified by Stephen A. Wood <saw@cebaf.gov>
+  - Also, the JP- and S-numbers probably don't match your card exactly.  Try
+    to find jumpers/switches with the same number of settings - it's
+    probably more reliable.
+
+::
+
+	     JP5		       [|]    :    :    :    :
+	(IRQ Setting)		      IRQ2  IRQ3 IRQ4 IRQ5 IRQ7
+			Put exactly one jumper on exactly one set of pins.
+
+
+				  1  2   3  4  5  6   7  8  9 10
+	     S1                /----------------------------------\
+	(I/O and Memory        |  1  1 * 0  0  0  0 * 1  1  0  1  |
+	 addresses)            \----------------------------------/
+				  |--|   |--------|   |--------|
+				  (a)       (b)           (m)
+
+			WARNING.  It's very important when setting these which way
+			you're holding the card, and which way you think is '1'!
+
+			If you suspect that your settings are not being made
+			correctly, try reversing the direction or inverting the
+			switch positions.
+
+			a: The first digit of the I/O address.
+				Setting		Value
+				-------		-----
+				00		0
+				01		1
+				10		2
+				11		3
+
+			b: The second digit of the I/O address.
+				Setting		Value
+				-------		-----
+				0000		0
+				0001		1
+				0010		2
+				...		...
+				1110		E
+				1111		F
+
+			The I/O address is in the form ab0.  For example, if
+			a is 0x2 and b is 0xE, the address will be 0x2E0.
+
+			DO NOT SET THIS LESS THAN 0x200!!!!!
+
+
+			m: The first digit of the memory address.
+				Setting		Value
+				-------		-----
+				0000		0
+				0001		1
+				0010		2
+				...		...
+				1110		E
+				1111		F
+
+			The memory address is in the form m0000.  For example, if
+			m is D, the address will be 0xD0000.
+
+			DO NOT SET THIS TO C0000, F0000, OR LESS THAN A0000!
+
+				  1  2  3  4  5  6  7  8
+	     S2                /--------------------------\
+	(Station Address)      |  1  1  0  0  0  0  0  0  |
+			       \--------------------------/
+
+				Setting		Value
+				-------		-----
+				00000000	00
+				10000000	01
+				01000000	02
+				...
+				01111111	FE
+				11111111	FF
+
+			Note that this is binary with the digits reversed!
+
+			DO NOT SET THIS TO 0 OR 255 (0xFF)!
+
+
+PC130E/PC270E (8-bit cards)
+---------------------------
+
+  - from Juergen Seifert <seifert@htwm.de>
+
+This description has been written by Juergen Seifert <seifert@htwm.de>
+using information from the following Original SMC Manual
+
+	     "Configuration Guide for ARCNET(R)-PC130E/PC270 Network
+	     Controller Boards Pub. # 900.044A June, 1989"
+
+ARCNET is a registered trademark of the Datapoint Corporation
+SMC is a registered trademark of the Standard Microsystems Corporation
+
+The PC130E is an enhanced version of the PC130 board, is equipped with a
+standard BNC female connector for connection to RG-62/U coax cable.
+Since this board is designed both for point-to-point connection in star
+networks and for connection to bus networks, it is downwardly compatible
+with all the other standard boards designed for coax networks (that is,
+the PC120, PC110 and PC100 star topology boards and the PC220, PC210 and
+PC200 bus topology boards).
+
+The PC270E is an enhanced version of the PC260 board, is equipped with two
+modular RJ11-type jacks for connection to twisted pair wiring.
+It can be used in a star or a daisy-chained network.
+
+::
+
+	 8 7 6 5 4 3 2 1
+    ________________________________________________________________
+   |   |       S1        |                                          |
+   |   |_________________|                                          |
+   |    Offs|Base |I/O Addr                                         |
+   |     RAM Addr |                                              ___|
+   |         ___  ___                                       CR3 |___|
+   |        |   \/   |                                      CR4 |___|
+   |        |  PROM  |                                           ___|
+   |        |        |                                        N |   | 8
+   |        | SOCKET |                                        o |   | 7
+   |        |________|                                        d |   | 6
+   |                   ___________________                    e |   | 5
+   |                  |                   |                   A | S | 4
+   |       |oo| EXT2  |                   |                   d | 2 | 3
+   |       |oo| EXT1  |       SMC         |                   d |   | 2
+   |       |oo| ROM   |      90C63        |                   r |___| 1
+   |       |oo| IRQ7  |                   |               |o|  _____|
+   |       |oo| IRQ5  |                   |               |o| | J1  |
+   |       |oo| IRQ4  |                   |              STAR |_____|
+   |       |oo| IRQ3  |                   |                   | J2  |
+   |       |oo| IRQ2  |___________________|                   |_____|
+   |___                                               ______________|
+       |                                             |
+       |_____________________________________________|
+
+Legend::
+
+  SMC 90C63	ARCNET Controller / Transceiver /Logic
+  S1	1-3:	I/O Base Address Select
+	4-6:	Memory Base Address Select
+	7-8:	RAM Offset Select
+  S2	1-8:	Node ID Select
+  EXT		Extended Timeout Select
+  ROM		ROM Enable Select
+  STAR		Selected - Star Topology	(PC130E only)
+		Deselected - Bus Topology	(PC130E only)
+  CR3/CR4	Diagnostic LEDs
+  J1		BNC RG62/U Connector		(PC130E only)
+  J1		6-position Telephone Jack	(PC270E only)
+  J2		6-position Telephone Jack	(PC270E only)
+
+Setting one of the switches to Off/Open means "1", On/Closed means "0".
+
+
+Setting the Node ID
+^^^^^^^^^^^^^^^^^^^
+
+The eight switches in group S2 are used to set the node ID.
+These switches work in a way similar to the PC100-series cards; see that
+entry for more information.
+
+
+Setting the I/O Base Address
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The first three switches in switch group S1 are used to select one
+of eight possible I/O Base addresses using the following table::
+
+
+   Switch | Hex I/O
+   1 2 3  | Address
+   -------|--------
+   0 0 0  |  260
+   0 0 1  |  290
+   0 1 0  |  2E0  (Manufacturer's default)
+   0 1 1  |  2F0
+   1 0 0  |  300
+   1 0 1  |  350
+   1 1 0  |  380
+   1 1 1  |  3E0
+
+
+Setting the Base Memory (RAM) buffer Address
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The memory buffer requires 2K of a 16K block of RAM. The base of this
+16K block can be located in any of eight positions.
+Switches 4-6 of switch group S1 select the Base of the 16K block.
+Within that 16K address space, the buffer may be assigned any one of four
+positions, determined by the offset, switches 7 and 8 of group S1.
+
+::
+
+   Switch     | Hex RAM | Hex ROM
+   4 5 6  7 8 | Address | Address *)
+   -----------|---------|-----------
+   0 0 0  0 0 |  C0000  |  C2000
+   0 0 0  0 1 |  C0800  |  C2000
+   0 0 0  1 0 |  C1000  |  C2000
+   0 0 0  1 1 |  C1800  |  C2000
+	      |         |
+   0 0 1  0 0 |  C4000  |  C6000
+   0 0 1  0 1 |  C4800  |  C6000
+   0 0 1  1 0 |  C5000  |  C6000
+   0 0 1  1 1 |  C5800  |  C6000
+	      |         |
+   0 1 0  0 0 |  CC000  |  CE000
+   0 1 0  0 1 |  CC800  |  CE000
+   0 1 0  1 0 |  CD000  |  CE000
+   0 1 0  1 1 |  CD800  |  CE000
+	      |         |
+   0 1 1  0 0 |  D0000  |  D2000  (Manufacturer's default)
+   0 1 1  0 1 |  D0800  |  D2000
+   0 1 1  1 0 |  D1000  |  D2000
+   0 1 1  1 1 |  D1800  |  D2000
+	      |         |
+   1 0 0  0 0 |  D4000  |  D6000
+   1 0 0  0 1 |  D4800  |  D6000
+   1 0 0  1 0 |  D5000  |  D6000
+   1 0 0  1 1 |  D5800  |  D6000
+	      |         |
+   1 0 1  0 0 |  D8000  |  DA000
+   1 0 1  0 1 |  D8800  |  DA000
+   1 0 1  1 0 |  D9000  |  DA000
+   1 0 1  1 1 |  D9800  |  DA000
+	      |         |
+   1 1 0  0 0 |  DC000  |  DE000
+   1 1 0  0 1 |  DC800  |  DE000
+   1 1 0  1 0 |  DD000  |  DE000
+   1 1 0  1 1 |  DD800  |  DE000
+	      |         |
+   1 1 1  0 0 |  E0000  |  E2000
+   1 1 1  0 1 |  E0800  |  E2000
+   1 1 1  1 0 |  E1000  |  E2000
+   1 1 1  1 1 |  E1800  |  E2000
+
+  *) To enable the 8K Boot PROM install the jumper ROM.
+     The default is jumper ROM not installed.
+
+
+Setting the Timeouts and Interrupt
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The jumpers labeled EXT1 and EXT2 are used to determine the timeout
+parameters. These two jumpers are normally left open.
+
+To select a hardware interrupt level set one (only one!) of the jumpers
+IRQ2, IRQ3, IRQ4, IRQ5, IRQ7. The Manufacturer's default is IRQ2.
+
+
+Configuring the PC130E for Star or Bus Topology
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The single jumper labeled STAR is used to configure the PC130E board for
+star or bus topology.
+When the jumper is installed, the board may be used in a star network, when
+it is removed, the board can be used in a bus topology.
+
+
+Diagnostic LEDs
+^^^^^^^^^^^^^^^
+
+Two diagnostic LEDs are visible on the rear bracket of the board.
+The green LED monitors the network activity: the red one shows the
+board activity::
+
+ Green  | Status               Red      | Status
+ -------|-------------------   ---------|-------------------
+  on    | normal activity      flash/on | data transfer
+  blink | reconfiguration      off      | no data transfer;
+  off   | defective board or            | incorrect memory or
+	| node ID is zero               | I/O address
+
+
+PC500/PC550 Longboard (16-bit cards)
+------------------------------------
+
+  - from Juergen Seifert <seifert@htwm.de>
+
+
+  .. note::
+
+      There is another Version of the PC500 called Short Version, which
+      is different in hard- and software! The most important differences
+      are:
+
+      - The long board has no Shared memory.
+      - On the long board the selection of the interrupt is done by binary
+	coded switch, on the short board directly by jumper.
+
+[Avery's note: pay special attention to that: the long board HAS NO SHARED
+MEMORY.  This means the current Linux-ARCnet driver can't use these cards.
+I have obtained a PC500Longboard and will be doing some experiments on it in
+the future, but don't hold your breath.  Thanks again to Juergen Seifert for
+his advice about this!]
+
+This description has been written by Juergen Seifert <seifert@htwm.de>
+using information from the following Original SMC Manual
+
+	 "Configuration Guide for SMC ARCNET-PC500/PC550
+	 Series Network Controller Boards Pub. # 900.033 Rev. A
+	 November, 1989"
+
+ARCNET is a registered trademark of the Datapoint Corporation
+SMC is a registered trademark of the Standard Microsystems Corporation
+
+The PC500 is equipped with a standard BNC female connector for connection
+to RG-62/U coax cable.
+The board is designed both for point-to-point connection in star networks
+and for connection to bus networks.
+
+The PC550 is equipped with two modular RJ11-type jacks for connection
+to twisted pair wiring.
+It can be used in a star or a daisy-chained (BUS) network.
+
+::
+
+       1
+       0 9 8 7 6 5 4 3 2 1     6 5 4 3 2 1
+    ____________________________________________________________________
+   < |         SW1         | |     SW2     |                            |
+   > |_____________________| |_____________|                            |
+   <   IRQ    |I/O Addr                                                 |
+   >                                                                 ___|
+   <                                                            CR4 |___|
+   >                                                            CR3 |___|
+   <                                                                 ___|
+   >                                                              N |   | 8
+   <                                                              o |   | 7
+   >                                                              d | S | 6
+   <                                                              e | W | 5
+   >                                                              A | 3 | 4
+   <                                                              d |   | 3
+   >                                                              d |   | 2
+   <                                                              r |___| 1
+   >                                                        |o|    _____|
+   <                                                        |o|   | J1  |
+   >  3 1                                                   JP6   |_____|
+   < |o|o| JP2                                                    | J2  |
+   > |o|o|                                                        |_____|
+   <  4 2__                                               ______________|
+   >    |  |                                             |
+   <____|  |_____________________________________________|
+
+Legend::
+
+  SW1	1-6:	I/O Base Address Select
+	7-10:	Interrupt Select
+  SW2	1-6:	Reserved for Future Use
+  SW3	1-8:	Node ID Select
+  JP2	1-4:	Extended Timeout Select
+  JP6		Selected - Star Topology	(PC500 only)
+		Deselected - Bus Topology	(PC500 only)
+  CR3	Green	Monitors Network Activity
+  CR4	Red	Monitors Board Activity
+  J1		BNC RG62/U Connector		(PC500 only)
+  J1		6-position Telephone Jack	(PC550 only)
+  J2		6-position Telephone Jack	(PC550 only)
+
+Setting one of the switches to Off/Open means "1", On/Closed means "0".
+
+
+Setting the Node ID
+^^^^^^^^^^^^^^^^^^^
+
+The eight switches in group SW3 are used to set the node ID. Each node
+attached to the network must have an unique node ID which must be
+different from 0.
+Switch 1 serves as the least significant bit (LSB).
+
+The node ID is the sum of the values of all switches set to "1"
+These values are::
+
+    Switch | Value
+    -------|-------
+      1    |   1
+      2    |   2
+      3    |   4
+      4    |   8
+      5    |  16
+      6    |  32
+      7    |  64
+      8    | 128
+
+Some Examples::
+
+    Switch         | Hex     | Decimal
+   8 7 6 5 4 3 2 1 | Node ID | Node ID
+   ----------------|---------|---------
+   0 0 0 0 0 0 0 0 |    not allowed
+   0 0 0 0 0 0 0 1 |    1    |    1
+   0 0 0 0 0 0 1 0 |    2    |    2
+   0 0 0 0 0 0 1 1 |    3    |    3
+       . . .       |         |
+   0 1 0 1 0 1 0 1 |   55    |   85
+       . . .       |         |
+   1 0 1 0 1 0 1 0 |   AA    |  170
+       . . .       |         |
+   1 1 1 1 1 1 0 1 |   FD    |  253
+   1 1 1 1 1 1 1 0 |   FE    |  254
+   1 1 1 1 1 1 1 1 |   FF    |  255
+
+
+Setting the I/O Base Address
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The first six switches in switch group SW1 are used to select one
+of 32 possible I/O Base addresses using the following table::
+
+   Switch       | Hex I/O
+   6 5  4 3 2 1 | Address
+   -------------|--------
+   0 1  0 0 0 0 |  200
+   0 1  0 0 0 1 |  210
+   0 1  0 0 1 0 |  220
+   0 1  0 0 1 1 |  230
+   0 1  0 1 0 0 |  240
+   0 1  0 1 0 1 |  250
+   0 1  0 1 1 0 |  260
+   0 1  0 1 1 1 |  270
+   0 1  1 0 0 0 |  280
+   0 1  1 0 0 1 |  290
+   0 1  1 0 1 0 |  2A0
+   0 1  1 0 1 1 |  2B0
+   0 1  1 1 0 0 |  2C0
+   0 1  1 1 0 1 |  2D0
+   0 1  1 1 1 0 |  2E0 (Manufacturer's default)
+   0 1  1 1 1 1 |  2F0
+   1 1  0 0 0 0 |  300
+   1 1  0 0 0 1 |  310
+   1 1  0 0 1 0 |  320
+   1 1  0 0 1 1 |  330
+   1 1  0 1 0 0 |  340
+   1 1  0 1 0 1 |  350
+   1 1  0 1 1 0 |  360
+   1 1  0 1 1 1 |  370
+   1 1  1 0 0 0 |  380
+   1 1  1 0 0 1 |  390
+   1 1  1 0 1 0 |  3A0
+   1 1  1 0 1 1 |  3B0
+   1 1  1 1 0 0 |  3C0
+   1 1  1 1 0 1 |  3D0
+   1 1  1 1 1 0 |  3E0
+   1 1  1 1 1 1 |  3F0
+
+
+Setting the Interrupt
+^^^^^^^^^^^^^^^^^^^^^
+
+Switches seven through ten of switch group SW1 are used to select the
+interrupt level. The interrupt level is binary coded, so selections
+from 0 to 15 would be possible, but only the following eight values will
+be supported: 3, 4, 5, 7, 9, 10, 11, 12.
+
+::
+
+   Switch   | IRQ
+   10 9 8 7 |
+   ---------|--------
+    0 0 1 1 |  3
+    0 1 0 0 |  4
+    0 1 0 1 |  5
+    0 1 1 1 |  7
+    1 0 0 1 |  9 (=2) (default)
+    1 0 1 0 | 10
+    1 0 1 1 | 11
+    1 1 0 0 | 12
+
+
+Setting the Timeouts
+^^^^^^^^^^^^^^^^^^^^
+
+The two jumpers JP2 (1-4) are used to determine the timeout parameters.
+These two jumpers are normally left open.
+Refer to the COM9026 Data Sheet for alternate configurations.
+
+
+Configuring the PC500 for Star or Bus Topology
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The single jumper labeled JP6 is used to configure the PC500 board for
+star or bus topology.
+When the jumper is installed, the board may be used in a star network, when
+it is removed, the board can be used in a bus topology.
+
+
+Diagnostic LEDs
+^^^^^^^^^^^^^^^
+
+Two diagnostic LEDs are visible on the rear bracket of the board.
+The green LED monitors the network activity: the red one shows the
+board activity::
+
+ Green  | Status               Red      | Status
+ -------|-------------------   ---------|-------------------
+  on    | normal activity      flash/on | data transfer
+  blink | reconfiguration      off      | no data transfer;
+  off   | defective board or            | incorrect memory or
+	| node ID is zero               | I/O address
+
+
+PC710 (8-bit card)
+------------------
+
+  - from J.S. van Oosten <jvoosten@compiler.tdcnet.nl>
+
+Note: this data is gathered by experimenting and looking at info of other
+cards. However, I'm sure I got 99% of the settings right.
+
+The SMC710 card resembles the PC270 card, but is much more basic (i.e. no
+LEDs, RJ11 jacks, etc.) and 8 bit. Here's a little drawing::
+
+    _______________________________________
+   | +---------+  +---------+              |____
+   | |   S2    |  |   S1    |              |
+   | +---------+  +---------+              |
+   |                                       |
+   |  +===+    __                          |
+   |  | R |   |  | X-tal                 ###___
+   |  | O |   |__|                      ####__'|
+   |  | M |    ||                        ###
+   |  +===+                                |
+   |                                       |
+   |   .. JP1   +----------+               |
+   |   ..       | big chip |               |
+   |   ..       |  90C63   |               |
+   |   ..       |          |               |
+   |   ..       +----------+               |
+    -------                     -----------
+	   |||||||||||||||||||||
+
+The row of jumpers at JP1 actually consists of 8 jumpers, (sometimes
+labelled) the same as on the PC270, from top to bottom: EXT2, EXT1, ROM,
+IRQ7, IRQ5, IRQ4, IRQ3, IRQ2 (gee, wonder what they would do? :-) )
+
+S1 and S2 perform the same function as on the PC270, only their numbers
+are swapped (S1 is the nodeaddress, S2 sets IO- and RAM-address).
+
+I know it works when connected to a PC110 type ARCnet board.
+
+
+*****************************************************************************
+
+Possibly SMC
+============
+
+LCS-8830(-T) (8 and 16-bit cards)
+---------------------------------
+
+  - from Mathias Katzer <mkatzer@HRZ.Uni-Bielefeld.DE>
+  - Marek Michalkiewicz <marekm@i17linuxb.ists.pwr.wroc.pl> says the
+    LCS-8830 is slightly different from LCS-8830-T.  These are 8 bit, BUS
+    only (the JP0 jumper is hardwired), and BNC only.
+
+This is a LCS-8830-T made by SMC, I think ('SMC' only appears on one PLCC,
+nowhere else, not even on the few Xeroxed sheets from the manual).
+
+SMC ARCnet Board Type LCS-8830-T::
+
+     ------------------------------------
+    |                                    |
+    |              JP3 88  8 JP2         |
+    |       #####      | \               |
+    |       #####    ET1 ET2          ###|
+    |                              8  ###|
+    |  U3   SW 1                  JP0 ###|  Phone Jacks
+    |  --                             ###|
+    | |  |                               |
+    | |  |   SW2                         |
+    | |  |                               |
+    | |  |  #####                        |
+    |  --   #####                       ####  BNC Connector
+    |                                   ####
+    |   888888 JP1                       |
+    |   234567                           |
+     --                           -------
+       |||||||||||||||||||||||||||
+	--------------------------
+
+
+  SW1: DIP-Switches for Station Address
+  SW2: DIP-Switches for Memory Base and I/O Base addresses
+
+  JP0: If closed, internal termination on (default open)
+  JP1: IRQ Jumpers
+  JP2: Boot-ROM enabled if closed
+  JP3: Jumpers for response timeout
+
+  U3: Boot-ROM Socket
+
+
+  ET1 ET2     Response Time     Idle Time    Reconfiguration Time
+
+		 78                86               840
+   X            285               316              1680
+       X        563               624              1680
+   X   X       1130              1237              1680
+
+  (X means closed jumper)
+
+  (DIP-Switch downwards means "0")
+
+The station address is binary-coded with SW1.
+
+The I/O base address is coded with DIP-Switches 6,7 and 8 of SW2:
+
+========	========
+Switches        Base
+678             Address
+========	========
+000		260-26f
+100		290-29f
+010		2e0-2ef
+110		2f0-2ff
+001		300-30f
+101		350-35f
+011		380-38f
+111 		3e0-3ef
+========	========
+
+
+DIP Switches 1-5 of SW2 encode the RAM and ROM Address Range:
+
+========        ============= ================
+Switches        RAM           ROM
+12345           Address Range  Address Range
+========        ============= ================
+00000		C:0000-C:07ff	C:2000-C:3fff
+10000		C:0800-C:0fff
+01000		C:1000-C:17ff
+11000		C:1800-C:1fff
+00100		C:4000-C:47ff	C:6000-C:7fff
+10100		C:4800-C:4fff
+01100		C:5000-C:57ff
+11100		C:5800-C:5fff
+00010		C:C000-C:C7ff	C:E000-C:ffff
+10010		C:C800-C:Cfff
+01010		C:D000-C:D7ff
+11010		C:D800-C:Dfff
+00110		D:0000-D:07ff	D:2000-D:3fff
+10110		D:0800-D:0fff
+01110		D:1000-D:17ff
+11110		D:1800-D:1fff
+00001		D:4000-D:47ff	D:6000-D:7fff
+10001		D:4800-D:4fff
+01001		D:5000-D:57ff
+11001		D:5800-D:5fff
+00101		D:8000-D:87ff	D:A000-D:bfff
+10101		D:8800-D:8fff
+01101		D:9000-D:97ff
+11101		D:9800-D:9fff
+00011		D:C000-D:c7ff	D:E000-D:ffff
+10011		D:C800-D:cfff
+01011		D:D000-D:d7ff
+11011		D:D800-D:dfff
+00111		E:0000-E:07ff	E:2000-E:3fff
+10111		E:0800-E:0fff
+01111		E:1000-E:17ff
+11111		E:1800-E:1fff
+========        ============= ================
+
+
+PureData Corp
+=============
+
+PDI507 (8-bit card)
+--------------------
+
+  - from Mark Rejhon <mdrejhon@magi.com> (slight modifications by Avery)
+  - Avery's note: I think PDI508 cards (but definitely NOT PDI508Plus cards)
+    are mostly the same as this.  PDI508Plus cards appear to be mainly
+    software-configured.
+
+Jumpers:
+
+	There is a jumper array at the bottom of the card, near the edge
+	connector.  This array is labelled J1.  They control the IRQs and
+	something else.  Put only one jumper on the IRQ pins.
+
+	ETS1, ETS2 are for timing on very long distance networks.  See the
+	more general information near the top of this file.
+
+	There is a J2 jumper on two pins.  A jumper should be put on them,
+	since it was already there when I got the card.  I don't know what
+	this jumper is for though.
+
+	There is a two-jumper array for J3.  I don't know what it is for,
+	but there were already two jumpers on it when I got the card.  It's
+	a six pin grid in a two-by-three fashion.  The jumpers were
+	configured as follows::
+
+	   .-------.
+	 o | o   o |
+	   :-------:    ------> Accessible end of card with connectors
+	 o | o   o |             in this direction ------->
+	   `-------'
+
+Carl de Billy <CARL@carainfo.com> explains J3 and J4:
+
+   J3 Diagram::
+
+	   .-------.
+	 o | o   o |
+	   :-------:    TWIST Technology
+	 o | o   o |
+	   `-------'
+	   .-------.
+	   | o   o | o
+	   :-------:    COAX Technology
+	   | o   o | o
+	   `-------'
+
+  - If using coax cable in a bus topology the J4 jumper must be removed;
+    place it on one pin.
+
+  - If using bus topology with twisted pair wiring move the J3
+    jumpers so they connect the middle pin and the pins closest to the RJ11
+    Connectors.  Also the J4 jumper must be removed; place it on one pin of
+    J4 jumper for storage.
+
+  - If using  star topology with twisted pair wiring move the J3
+    jumpers so they connect the middle pin and the pins closest to the RJ11
+    connectors.
+
+
+DIP Switches:
+
+	The DIP switches accessible on the accessible end of the card while
+	it is installed, is used to set the ARCnet address.  There are 8
+	switches.  Use an address from 1 to 254
+
+	==========      =========================
+	Switch No.	ARCnet address
+	12345678
+	==========      =========================
+	00000000	FF  	(Don't use this!)
+	00000001	FE
+	00000010	FD
+	...
+	11111101	2
+	11111110	1
+	11111111	0	(Don't use this!)
+	==========      =========================
+
+	There is another array of eight DIP switches at the top of the
+	card.  There are five labelled MS0-MS4 which seem to control the
+	memory address, and another three labelled IO0-IO2 which seem to
+	control the base I/O address of the card.
+
+	This was difficult to test by trial and error, and the I/O addresses
+	are in a weird order.  This was tested by setting the DIP switches,
+	rebooting the computer, and attempting to load ARCETHER at various
+	addresses (mostly between 0x200 and 0x400).  The address that caused
+	the red transmit LED to blink, is the one that I thought works.
+
+	Also, the address 0x3D0 seem to have a special meaning, since the
+	ARCETHER packet driver loaded fine, but without the red LED
+	blinking.  I don't know what 0x3D0 is for though.  I recommend using
+	an address of 0x300 since Windows may not like addresses below
+	0x300.
+
+	=============   ===========
+	IO Switch No.   I/O address
+	210
+	=============   ===========
+	111             0x260
+	110             0x290
+	101             0x2E0
+	100             0x2F0
+	011             0x300
+	010             0x350
+	001             0x380
+	000             0x3E0
+	=============   ===========
+
+	The memory switches set a reserved address space of 0x1000 bytes
+	(0x100 segment units, or 4k).  For example if I set an address of
+	0xD000, it will use up addresses 0xD000 to 0xD100.
+
+	The memory switches were tested by booting using QEMM386 stealth,
+	and using LOADHI to see what address automatically became excluded
+	from the upper memory regions, and then attempting to load ARCETHER
+	using these addresses.
+
+	I recommend using an ARCnet memory address of 0xD000, and putting
+	the EMS page frame at 0xC000 while using QEMM stealth mode.  That
+	way, you get contiguous high memory from 0xD100 almost all the way
+	the end of the megabyte.
+
+	Memory Switch 0 (MS0) didn't seem to work properly when set to OFF
+	on my card.  It could be malfunctioning on my card.  Experiment with
+	it ON first, and if it doesn't work, set it to OFF.  (It may be a
+	modifier for the 0x200 bit?)
+
+	=============   ============================================
+	MS Switch No.
+	43210           Memory address
+	=============   ============================================
+	00001           0xE100  (guessed - was not detected by QEMM)
+	00011           0xE000  (guessed - was not detected by QEMM)
+	00101           0xDD00
+	00111           0xDC00
+	01001           0xD900
+	01011           0xD800
+	01101           0xD500
+	01111           0xD400
+	10001           0xD100
+	10011           0xD000
+	10101           0xCD00
+	10111           0xCC00
+	11001           0xC900 (guessed - crashes tested system)
+	11011           0xC800 (guessed - crashes tested system)
+	11101           0xC500 (guessed - crashes tested system)
+	11111           0xC400 (guessed - crashes tested system)
+	=============   ============================================
+
+CNet Technology Inc.
+====================
+
+120 Series (8-bit cards)
+------------------------
+  - from Juergen Seifert <seifert@htwm.de>
+
+This description has been written by Juergen Seifert <seifert@htwm.de>
+using information from the following Original CNet Manual
+
+	      "ARCNET USER'S MANUAL for
+	      CN120A
+	      CN120AB
+	      CN120TP
+	      CN120ST
+	      CN120SBT
+	      P/N:12-01-0007
+	      Revision 3.00"
+
+ARCNET is a registered trademark of the Datapoint Corporation
+
+- P/N 120A   ARCNET 8 bit XT/AT Star
+- P/N 120AB  ARCNET 8 bit XT/AT Bus
+- P/N 120TP  ARCNET 8 bit XT/AT Twisted Pair
+- P/N 120ST  ARCNET 8 bit XT/AT Star, Twisted Pair
+- P/N 120SBT ARCNET 8 bit XT/AT Star, Bus, Twisted Pair
+
+::
+
+    __________________________________________________________________
+   |                                                                  |
+   |                                                               ___|
+   |                                                          LED |___|
+   |                                                               ___|
+   |                                                            N |   | ID7
+   |                                                            o |   | ID6
+   |                                                            d | S | ID5
+   |                                                            e | W | ID4
+   |                     ___________________                    A | 2 | ID3
+   |                    |                   |                   d |   | ID2
+   |                    |                   |  1 2 3 4 5 6 7 8  d |   | ID1
+   |                    |                   | _________________ r |___| ID0
+   |                    |      90C65        ||       SW1       |  ____|
+   |  JP 8 7            |                   ||_________________| |    |
+   |    |o|o|  JP1      |                   |                    | J2 |
+   |    |o|o|  |oo|     |                   |         JP 1 1 1   |    |
+   |   ______________   |                   |            0 1 2   |____|
+   |  |  PROM        |  |___________________|           |o|o|o|  _____|
+   |  >  SOCKET      |  JP 6 5 4 3 2                    |o|o|o| | J1  |
+   |  |______________|    |o|o|o|o|o|                   |o|o|o| |_____|
+   |_____                 |o|o|o|o|o|                   ______________|
+	 |                                             |
+	 |_____________________________________________|
+
+Legend::
+
+  90C65       ARCNET Probe
+  S1  1-5:    Base Memory Address Select
+      6-8:    Base I/O Address Select
+  S2  1-8:    Node ID Select (ID0-ID7)
+  JP1     ROM Enable Select
+  JP2     IRQ2
+  JP3     IRQ3
+  JP4     IRQ4
+  JP5     IRQ5
+  JP6     IRQ7
+  JP7/JP8     ET1, ET2 Timeout Parameters
+  JP10/JP11   Coax / Twisted Pair Select  (CN120ST/SBT only)
+  JP12        Terminator Select       (CN120AB/ST/SBT only)
+  J1      BNC RG62/U Connector        (all except CN120TP)
+  J2      Two 6-position Telephone Jack   (CN120TP/ST/SBT only)
+
+Setting one of the switches to Off means "1", On means "0".
+
+
+Setting the Node ID
+^^^^^^^^^^^^^^^^^^^
+
+The eight switches in SW2 are used to set the node ID. Each node attached
+to the network must have an unique node ID which must be different from 0.
+Switch 1 (ID0) serves as the least significant bit (LSB).
+
+The node ID is the sum of the values of all switches set to "1"
+These values are:
+
+   =======  ======  =====
+   Switch   Label   Value
+   =======  ======  =====
+     1      ID0       1
+     2      ID1       2
+     3      ID2       4
+     4      ID3       8
+     5      ID4      16
+     6      ID5      32
+     7      ID6      64
+     8      ID7     128
+   =======  ======  =====
+
+Some Examples::
+
+    Switch         | Hex     | Decimal
+   8 7 6 5 4 3 2 1 | Node ID | Node ID
+   ----------------|---------|---------
+   0 0 0 0 0 0 0 0 |    not allowed
+   0 0 0 0 0 0 0 1 |    1    |    1
+   0 0 0 0 0 0 1 0 |    2    |    2
+   0 0 0 0 0 0 1 1 |    3    |    3
+       . . .       |         |
+   0 1 0 1 0 1 0 1 |   55    |   85
+       . . .       |         |
+   1 0 1 0 1 0 1 0 |   AA    |  170
+       . . .       |         |
+   1 1 1 1 1 1 0 1 |   FD    |  253
+   1 1 1 1 1 1 1 0 |   FE    |  254
+   1 1 1 1 1 1 1 1 |   FF    |  255
+
+
+Setting the I/O Base Address
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The last three switches in switch block SW1 are used to select one
+of eight possible I/O Base addresses using the following table::
+
+
+   Switch      | Hex I/O
+    6   7   8  | Address
+   ------------|--------
+   ON  ON  ON  |  260
+   OFF ON  ON  |  290
+   ON  OFF ON  |  2E0  (Manufacturer's default)
+   OFF OFF ON  |  2F0
+   ON  ON  OFF |  300
+   OFF ON  OFF |  350
+   ON  OFF OFF |  380
+   OFF OFF OFF |  3E0
+
+
+Setting the Base Memory (RAM) buffer Address
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The memory buffer (RAM) requires 2K. The base of this buffer can be
+located in any of eight positions. The address of the Boot Prom is
+memory base + 8K or memory base + 0x2000.
+Switches 1-5 of switch block SW1 select the Memory Base address.
+
+::
+
+   Switch              | Hex RAM | Hex ROM
+    1   2   3   4   5  | Address | Address *)
+   --------------------|---------|-----------
+   ON  ON  ON  ON  ON  |  C0000  |  C2000
+   ON  ON  OFF ON  ON  |  C4000  |  C6000
+   ON  ON  ON  OFF ON  |  CC000  |  CE000
+   ON  ON  OFF OFF ON  |  D0000  |  D2000  (Manufacturer's default)
+   ON  ON  ON  ON  OFF |  D4000  |  D6000
+   ON  ON  OFF ON  OFF |  D8000  |  DA000
+   ON  ON  ON  OFF OFF |  DC000  |  DE000
+   ON  ON  OFF OFF OFF |  E0000  |  E2000
+
+  *) To enable the Boot ROM install the jumper JP1
+
+.. note::
+
+      Since the switches 1 and 2 are always set to ON it may be possible
+      that they can be used to add an offset of 2K, 4K or 6K to the base
+      address, but this feature is not documented in the manual and I
+      haven't tested it yet.
+
+
+Setting the Interrupt Line
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+To select a hardware interrupt level install one (only one!) of the jumpers
+JP2, JP3, JP4, JP5, JP6. JP2 is the default::
+
+   Jumper | IRQ
+   -------|-----
+     2    |  2
+     3    |  3
+     4    |  4
+     5    |  5
+     6    |  7
+
+
+Setting the Internal Terminator on CN120AB/TP/SBT
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The jumper JP12 is used to enable the internal terminator::
+
+			 -----
+       0                |  0  |
+     -----   ON         |     |  ON
+    |  0  |             |  0  |
+    |     |  OFF         -----   OFF
+    |  0  |                0
+     -----
+   Terminator          Terminator
+    disabled            enabled
+
+
+Selecting the Connector Type on CN120ST/SBT
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+::
+
+     JP10    JP11        JP10    JP11
+			 -----   -----
+       0       0        |  0  | |  0  |
+     -----   -----      |     | |     |
+    |  0  | |  0  |     |  0  | |  0  |
+    |     | |     |      -----   -----
+    |  0  | |  0  |        0       0
+     -----   -----
+     Coaxial Cable       Twisted Pair Cable
+       (Default)
+
+
+Setting the Timeout Parameters
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The jumpers labeled EXT1 and EXT2 are used to determine the timeout
+parameters. These two jumpers are normally left open.
+
+
+CNet Technology Inc.
+====================
+
+160 Series (16-bit cards)
+-------------------------
+  - from Juergen Seifert <seifert@htwm.de>
+
+This description has been written by Juergen Seifert <seifert@htwm.de>
+using information from the following Original CNet Manual
+
+	      "ARCNET USER'S MANUAL for
+	      CN160A CN160AB CN160TP
+	      P/N:12-01-0006 Revision 3.00"
+
+ARCNET is a registered trademark of the Datapoint Corporation
+
+- P/N 160A   ARCNET 16 bit XT/AT Star
+- P/N 160AB  ARCNET 16 bit XT/AT Bus
+- P/N 160TP  ARCNET 16 bit XT/AT Twisted Pair
+
+::
+
+   ___________________________________________________________________
+  <                             _________________________          ___|
+  >               |oo| JP2     |                         |    LED |___|
+  <               |oo| JP1     |        9026             |    LED |___|
+  >                            |_________________________|         ___|
+  <                                                             N |   | ID7
+  >                                                      1      o |   | ID6
+  <                                    1 2 3 4 5 6 7 8 9 0      d | S | ID5
+  >         _______________           _____________________     e | W | ID4
+  <        |     PROM      |         |         SW1         |    A | 2 | ID3
+  >        >    SOCKET     |         |_____________________|    d |   | ID2
+  <        |_______________|          | IO-Base   | MEM   |     d |   | ID1
+  >                                                             r |___| ID0
+  <                                                               ____|
+  >                                                              |    |
+  <                                                              | J1 |
+  >                                                              |    |
+  <                                                              |____|
+  >                            1 1 1 1                                |
+  <  3 4 5 6 7      JP     8 9 0 1 2 3                                |
+  > |o|o|o|o|o|           |o|o|o|o|o|o|                               |
+  < |o|o|o|o|o| __        |o|o|o|o|o|o|                    ___________|
+  >            |  |                                       |
+  <____________|  |_______________________________________|
+
+Legend::
+
+  9026            ARCNET Probe
+  SW1 1-6:    Base I/O Address Select
+      7-10:   Base Memory Address Select
+  SW2 1-8:    Node ID Select (ID0-ID7)
+  JP1/JP2     ET1, ET2 Timeout Parameters
+  JP3-JP13    Interrupt Select
+  J1      BNC RG62/U Connector        (CN160A/AB only)
+  J1      Two 6-position Telephone Jack   (CN160TP only)
+  LED
+
+Setting one of the switches to Off means "1", On means "0".
+
+
+Setting the Node ID
+^^^^^^^^^^^^^^^^^^^
+
+The eight switches in SW2 are used to set the node ID. Each node attached
+to the network must have an unique node ID which must be different from 0.
+Switch 1 (ID0) serves as the least significant bit (LSB).
+
+The node ID is the sum of the values of all switches set to "1"
+These values are::
+
+   Switch | Label | Value
+   -------|-------|-------
+     1    | ID0   |   1
+     2    | ID1   |   2
+     3    | ID2   |   4
+     4    | ID3   |   8
+     5    | ID4   |  16
+     6    | ID5   |  32
+     7    | ID6   |  64
+     8    | ID7   | 128
+
+Some Examples::
+
+    Switch         | Hex     | Decimal
+   8 7 6 5 4 3 2 1 | Node ID | Node ID
+   ----------------|---------|---------
+   0 0 0 0 0 0 0 0 |    not allowed
+   0 0 0 0 0 0 0 1 |    1    |    1
+   0 0 0 0 0 0 1 0 |    2    |    2
+   0 0 0 0 0 0 1 1 |    3    |    3
+       . . .       |         |
+   0 1 0 1 0 1 0 1 |   55    |   85
+       . . .       |         |
+   1 0 1 0 1 0 1 0 |   AA    |  170
+       . . .       |         |
+   1 1 1 1 1 1 0 1 |   FD    |  253
+   1 1 1 1 1 1 1 0 |   FE    |  254
+   1 1 1 1 1 1 1 1 |   FF    |  255
+
+
+Setting the I/O Base Address
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The first six switches in switch block SW1 are used to select the I/O Base
+address using the following table::
+
+	     Switch        | Hex I/O
+    1   2   3   4   5   6  | Address
+   ------------------------|--------
+   OFF ON  ON  OFF OFF ON  |  260
+   OFF ON  OFF ON  ON  OFF |  290
+   OFF ON  OFF OFF OFF ON  |  2E0  (Manufacturer's default)
+   OFF ON  OFF OFF OFF OFF |  2F0
+   OFF OFF ON  ON  ON  ON  |  300
+   OFF OFF ON  OFF ON  OFF |  350
+   OFF OFF OFF ON  ON  ON  |  380
+   OFF OFF OFF OFF OFF ON  |  3E0
+
+Note: Other IO-Base addresses seem to be selectable, but only the above
+      combinations are documented.
+
+
+Setting the Base Memory (RAM) buffer Address
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The switches 7-10 of switch block SW1 are used to select the Memory
+Base address of the RAM (2K) and the PROM::
+
+   Switch          | Hex RAM | Hex ROM
+    7   8   9  10  | Address | Address
+   ----------------|---------|-----------
+   OFF OFF ON  ON  |  C0000  |  C8000
+   OFF OFF ON  OFF |  D0000  |  D8000 (Default)
+   OFF OFF OFF ON  |  E0000  |  E8000
+
+.. note::
+
+      Other MEM-Base addresses seem to be selectable, but only the above
+      combinations are documented.
+
+
+Setting the Interrupt Line
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+To select a hardware interrupt level install one (only one!) of the jumpers
+JP3 through JP13 using the following table::
+
+   Jumper | IRQ
+   -------|-----------------
+     3    |  14
+     4    |  15
+     5    |  12
+     6    |  11
+     7    |  10
+     8    |   3
+     9    |   4
+    10    |   5
+    11    |   6
+    12    |   7
+    13    |   2 (=9) Default!
+
+.. note::
+
+       - Do not use JP11=IRQ6, it may conflict with your Floppy Disk
+	 Controller
+       - Use JP3=IRQ14 only, if you don't have an IDE-, MFM-, or RLL-
+	 Hard Disk, it may conflict with their controllers
+
+
+Setting the Timeout Parameters
+------------------------------
+
+The jumpers labeled JP1 and JP2 are used to determine the timeout
+parameters. These two jumpers are normally left open.
+
+
+Lantech
+=======
+
+8-bit card, unknown model
+-------------------------
+  - from Vlad Lungu <vlungu@ugal.ro> - his e-mail address seemed broken at
+    the time I tried to reach him.  Sorry Vlad, if you didn't get my reply.
+
+::
+
+   ________________________________________________________________
+   |   1         8                                                 |
+   |   ___________                                               __|
+   |   |   SW1    |                                         LED |__|
+   |   |__________|                                                |
+   |                                                            ___|
+   |                _____________________                       |S | 8
+   |                |                   |                       |W |
+   |                |                   |                       |2 |
+   |                |                   |                       |__| 1
+   |                |      UM9065L      |     |o|  JP4         ____|____
+   |                |                   |     |o|              |  CN    |
+   |                |                   |                      |________|
+   |                |                   |                          |
+   |                |___________________|                          |
+   |                                                               |
+   |                                                               |
+   |      _____________                                            |
+   |      |            |                                           |
+   |      |    PROM    |        |ooooo|  JP6                       |
+   |      |____________|        |ooooo|                            |
+   |_____________                                             _   _|
+		|____________________________________________| |__|
+
+
+UM9065L : ARCnet Controller
+
+SW 1    : Shared Memory Address and I/O Base
+
+::
+
+	ON=0
+
+	12345|Memory Address
+	-----|--------------
+	00001|  D4000
+	00010|  CC000
+	00110|  D0000
+	01110|  D1000
+	01101|  D9000
+	10010|  CC800
+	10011|  DC800
+	11110|  D1800
+
+It seems that the bits are considered in reverse order.  Also, you must
+observe that some of those addresses are unusual and I didn't probe them; I
+used a memory dump in DOS to identify them.  For the 00000 configuration and
+some others that I didn't write here the card seems to conflict with the
+video card (an S3 GENDAC). I leave the full decoding of those addresses to
+you.
+
+::
+
+	678| I/O Address
+	---|------------
+	000|    260
+	001|    failed probe
+	010|    2E0
+	011|    380
+	100|    290
+	101|    350
+	110|    failed probe
+	111|    3E0
+
+  SW 2  : Node ID (binary coded)
+
+  JP 4  : Boot PROM enable   CLOSE - enabled
+			     OPEN  - disabled
+
+  JP 6  : IRQ set (ONLY ONE jumper on 1-5 for IRQ 2-6)
+
+
+Acer
+====
+
+8-bit card, Model 5210-003
+--------------------------
+
+  - from Vojtech Pavlik <vojtech@suse.cz> using portions of the existing
+    arcnet-hardware file.
+
+This is a 90C26 based card.  Its configuration seems similar to the SMC
+PC100, but has some additional jumpers I don't know the meaning of.
+
+::
+
+	       __
+	      |  |
+   ___________|__|_________________________
+  |         |      |                       |
+  |         | BNC  |                       |
+  |         |______|                    ___|
+  |  _____________________             |___
+  | |                     |                |
+  | | Hybrid IC           |                |
+  | |                     |       o|o J1   |
+  | |_____________________|       8|8      |
+  |                               8|8 J5   |
+  |                               o|o      |
+  |                               8|8      |
+  |__                             8|8      |
+ (|__| LED                        o|o      |
+  |                               8|8      |
+  |                               8|8 J15  |
+  |                                        |
+  |                    _____               |
+  |                   |     |   _____      |
+  |                   |     |  |     |  ___|
+  |                   |     |  |     | |
+  |  _____            | ROM |  | UFS | |
+  | |     |           |     |  |     | |
+  | |     |     ___   |     |  |     | |
+  | |     |    |   |  |__.__|  |__.__| |
+  | | NCR |    |XTL|   _____    _____  |
+  | |     |    |___|  |     |  |     | |
+  | |90C26|           |     |  |     | |
+  | |     |           | RAM |  | UFS | |
+  | |     | J17 o|o   |     |  |     | |
+  | |     | J16 o|o   |     |  |     | |
+  | |__.__|           |__.__|  |__.__| |
+  |  ___                               |
+  | |   |8                             |
+  | |SW2|                              |
+  | |   |                              |
+  | |___|1                             |
+  |  ___                               |
+  | |   |10           J18 o|o          |
+  | |   |                 o|o          |
+  | |SW1|                 o|o          |
+  | |   |             J21 o|o          |
+  | |___|1                             |
+  |                                    |
+  |____________________________________|
+
+
+Legend::
+
+  90C26       ARCNET Chip
+  XTL         20 MHz Crystal
+  SW1 1-6     Base I/O Address Select
+      7-10    Memory Address Select
+  SW2 1-8     Node ID Select (ID0-ID7)
+  J1-J5       IRQ Select
+  J6-J21      Unknown (Probably extra timeouts & ROM enable ...)
+  LED1        Activity LED
+  BNC         Coax connector (STAR ARCnet)
+  RAM         2k of SRAM
+  ROM         Boot ROM socket
+  UFS         Unidentified Flying Sockets
+
+
+Setting the Node ID
+^^^^^^^^^^^^^^^^^^^
+
+The eight switches in SW2 are used to set the node ID. Each node attached
+to the network must have an unique node ID which must not be 0.
+Switch 1 (ID0) serves as the least significant bit (LSB).
+
+Setting one of the switches to OFF means "1", ON means "0".
+
+The node ID is the sum of the values of all switches set to "1"
+These values are::
+
+   Switch | Value
+   -------|-------
+     1    |   1
+     2    |   2
+     3    |   4
+     4    |   8
+     5    |  16
+     6    |  32
+     7    |  64
+     8    | 128
+
+Don't set this to 0 or 255; these values are reserved.
+
+
+Setting the I/O Base Address
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The switches 1 to 6 of switch block SW1 are used to select one
+of 32 possible I/O Base addresses using the following tables::
+
+	  | Hex
+   Switch | Value
+   -------|-------
+     1    | 200
+     2    | 100
+     3    |  80
+     4    |  40
+     5    |  20
+     6    |  10
+
+The I/O address is sum of all switches set to "1". Remember that
+the I/O address space bellow 0x200 is RESERVED for mainboard, so
+switch 1 should be ALWAYS SET TO OFF.
+
+
+Setting the Base Memory (RAM) buffer Address
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The memory buffer (RAM) requires 2K. The base of this buffer can be
+located in any of sixteen positions. However, the addresses below
+A0000 are likely to cause system hang because there's main RAM.
+
+Jumpers 7-10 of switch block SW1 select the Memory Base address::
+
+   Switch          | Hex RAM
+    7   8   9  10  | Address
+   ----------------|---------
+   OFF OFF OFF OFF |  F0000 (conflicts with main BIOS)
+   OFF OFF OFF ON  |  E0000
+   OFF OFF ON  OFF |  D0000
+   OFF OFF ON  ON  |  C0000 (conflicts with video BIOS)
+   OFF ON  OFF OFF |  B0000 (conflicts with mono video)
+   OFF ON  OFF ON  |  A0000 (conflicts with graphics)
+
+
+Setting the Interrupt Line
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Jumpers 1-5 of the jumper block J1 control the IRQ level. ON means
+shorted, OFF means open::
+
+    Jumper              |  IRQ
+    1   2   3   4   5   |
+   ----------------------------
+    ON  OFF OFF OFF OFF |  7
+    OFF ON  OFF OFF OFF |  5
+    OFF OFF ON  OFF OFF |  4
+    OFF OFF OFF ON  OFF |  3
+    OFF OFF OFF OFF ON  |  2
+
+
+Unknown jumpers & sockets
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+I know nothing about these. I just guess that J16&J17 are timeout
+jumpers and maybe one of J18-J21 selects ROM. Also J6-J10 and
+J11-J15 are connecting IRQ2-7 to some pins on the UFSs. I can't
+guess the purpose.
+
+Datapoint?
+==========
+
+LAN-ARC-8, an 8-bit card
+------------------------
+
+  - from Vojtech Pavlik <vojtech@suse.cz>
+
+This is another SMC 90C65-based ARCnet card. I couldn't identify the
+manufacturer, but it might be DataPoint, because the card has the
+original arcNet logo in its upper right corner.
+
+::
+
+	  _______________________________________________________
+	 |                         _________                     |
+	 |                        |   SW2   | ON      arcNet     |
+	 |                        |_________| OFF             ___|
+	 |  _____________         1 ______  8                |   | 8
+	 | |             | SW1     | XTAL | ____________     | S |
+	 | > RAM (2k)    |         |______||            |    | W |
+	 | |_____________|                 |      H     |    | 3 |
+	 |                        _________|_____ y     |    |___| 1
+	 |  _________            |         |     |b     |        |
+	 | |_________|           |         |     |r     |        |
+	 |                       |     SMC |     |i     |        |
+	 |                       |    90C65|     |d     |        |
+	 |  _________            |         |     |      |        |
+	 | |   SW1   | ON        |         |     |I     |        |
+	 | |_________| OFF       |_________|_____/C     |   _____|
+	 |  1       8                      |            |  |     |___
+	 |  ______________                 |            |  | BNC |___|
+	 | |              |                |____________|  |_____|
+	 | > EPROM SOCKET |              _____________           |
+	 | |______________|             |_____________|          |
+	 |                                         ______________|
+	 |                                        |
+	 |________________________________________|
+
+Legend::
+
+  90C65       ARCNET Chip
+  SW1 1-5:    Base Memory Address Select
+      6-8:    Base I/O Address Select
+  SW2 1-8:    Node ID Select
+  SW3 1-5:    IRQ Select
+      6-7:    Extra Timeout
+      8  :    ROM Enable
+  BNC         Coax connector
+  XTAL        20 MHz Crystal
+
+
+Setting the Node ID
+^^^^^^^^^^^^^^^^^^^
+
+The eight switches in SW3 are used to set the node ID. Each node attached
+to the network must have an unique node ID which must not be 0.
+Switch 1 serves as the least significant bit (LSB).
+
+Setting one of the switches to Off means "1", On means "0".
+
+The node ID is the sum of the values of all switches set to "1"
+These values are::
+
+   Switch | Value
+   -------|-------
+     1    |   1
+     2    |   2
+     3    |   4
+     4    |   8
+     5    |  16
+     6    |  32
+     7    |  64
+     8    | 128
+
+
+Setting the I/O Base Address
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The last three switches in switch block SW1 are used to select one
+of eight possible I/O Base addresses using the following table::
+
+
+   Switch      | Hex I/O
+    6   7   8  | Address
+   ------------|--------
+   ON  ON  ON  |  260
+   OFF ON  ON  |  290
+   ON  OFF ON  |  2E0  (Manufacturer's default)
+   OFF OFF ON  |  2F0
+   ON  ON  OFF |  300
+   OFF ON  OFF |  350
+   ON  OFF OFF |  380
+   OFF OFF OFF |  3E0
+
+
+Setting the Base Memory (RAM) buffer Address
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The memory buffer (RAM) requires 2K. The base of this buffer can be
+located in any of eight positions. The address of the Boot Prom is
+memory base + 0x2000.
+
+Jumpers 3-5 of switch block SW1 select the Memory Base address.
+
+::
+
+   Switch              | Hex RAM | Hex ROM
+    1   2   3   4   5  | Address | Address *)
+   --------------------|---------|-----------
+   ON  ON  ON  ON  ON  |  C0000  |  C2000
+   ON  ON  OFF ON  ON  |  C4000  |  C6000
+   ON  ON  ON  OFF ON  |  CC000  |  CE000
+   ON  ON  OFF OFF ON  |  D0000  |  D2000  (Manufacturer's default)
+   ON  ON  ON  ON  OFF |  D4000  |  D6000
+   ON  ON  OFF ON  OFF |  D8000  |  DA000
+   ON  ON  ON  OFF OFF |  DC000  |  DE000
+   ON  ON  OFF OFF OFF |  E0000  |  E2000
+
+  *) To enable the Boot ROM set the switch 8 of switch block SW3 to position ON.
+
+The switches 1 and 2 probably add 0x0800 and 0x1000 to RAM base address.
+
+
+Setting the Interrupt Line
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Switches 1-5 of the switch block SW3 control the IRQ level::
+
+    Jumper              |  IRQ
+    1   2   3   4   5   |
+   ----------------------------
+    ON  OFF OFF OFF OFF |  3
+    OFF ON  OFF OFF OFF |  4
+    OFF OFF ON  OFF OFF |  5
+    OFF OFF OFF ON  OFF |  7
+    OFF OFF OFF OFF ON  |  2
+
+
+Setting the Timeout Parameters
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The switches 6-7 of the switch block SW3 are used to determine the timeout
+parameters.  These two switches are normally left in the OFF position.
+
+
+Topware
+=======
+
+8-bit card, TA-ARC/10
+---------------------
+
+  - from Vojtech Pavlik <vojtech@suse.cz>
+
+This is another very similar 90C65 card. Most of the switches and jumpers
+are the same as on other clones.
+
+::
+
+   _____________________________________________________________________
+  |  ___________   |                         |            ______        |
+  | |SW2 NODE ID|  |                         |           | XTAL |       |
+  | |___________|  |  Hybrid IC              |           |______|       |
+  |  ___________   |                         |                        __|
+  | |SW1 MEM+I/O|  |_________________________|                   LED1|__|)
+  | |___________|           1 2                                         |
+  |                     J3 |o|o| TIMEOUT                          ______|
+  |     ______________     |o|o|                                 |      |
+  |    |              |  ___________________                     | RJ   |
+  |    > EPROM SOCKET | |                   \                    |------|
+  |J2  |______________| |                    |                   |      |
+  ||o|                  |                    |                   |______|
+  ||o| ROM ENABLE       |        SMC         |    _________             |
+  |     _____________   |       90C65        |   |_________|       _____|
+  |    |             |  |                    |                    |     |___
+  |    > RAM (2k)    |  |                    |                    | BNC |___|
+  |    |_____________|  |                    |                    |_____|
+  |                     |____________________|                          |
+  | ________ IRQ 2 3 4 5 7                  ___________                 |
+  ||________|   |o|o|o|o|o|                |___________|                |
+  |________   J1|o|o|o|o|o|                               ______________|
+	   |                                             |
+	   |_____________________________________________|
+
+Legend::
+
+  90C65       ARCNET Chip
+  XTAL        20 MHz Crystal
+  SW1 1-5     Base Memory Address Select
+      6-8     Base I/O Address Select
+  SW2 1-8     Node ID Select (ID0-ID7)
+  J1          IRQ Select
+  J2          ROM Enable
+  J3          Extra Timeout
+  LED1        Activity LED
+  BNC         Coax connector (BUS ARCnet)
+  RJ          Twisted Pair Connector (daisy chain)
+
+
+Setting the Node ID
+^^^^^^^^^^^^^^^^^^^
+
+The eight switches in SW2 are used to set the node ID. Each node attached to
+the network must have an unique node ID which must not be 0.  Switch 1 (ID0)
+serves as the least significant bit (LSB).
+
+Setting one of the switches to Off means "1", On means "0".
+
+The node ID is the sum of the values of all switches set to "1"
+These values are::
+
+   Switch | Label | Value
+   -------|-------|-------
+     1    | ID0   |   1
+     2    | ID1   |   2
+     3    | ID2   |   4
+     4    | ID3   |   8
+     5    | ID4   |  16
+     6    | ID5   |  32
+     7    | ID6   |  64
+     8    | ID7   | 128
+
+Setting the I/O Base Address
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The last three switches in switch block SW1 are used to select one
+of eight possible I/O Base addresses using the following table::
+
+
+   Switch      | Hex I/O
+    6   7   8  | Address
+   ------------|--------
+   ON  ON  ON  |  260  (Manufacturer's default)
+   OFF ON  ON  |  290
+   ON  OFF ON  |  2E0
+   OFF OFF ON  |  2F0
+   ON  ON  OFF |  300
+   OFF ON  OFF |  350
+   ON  OFF OFF |  380
+   OFF OFF OFF |  3E0
+
+
+Setting the Base Memory (RAM) buffer Address
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The memory buffer (RAM) requires 2K. The base of this buffer can be
+located in any of eight positions. The address of the Boot Prom is
+memory base + 0x2000.
+
+Jumpers 3-5 of switch block SW1 select the Memory Base address.
+
+::
+
+   Switch              | Hex RAM | Hex ROM
+    1   2   3   4   5  | Address | Address *)
+   --------------------|---------|-----------
+   ON  ON  ON  ON  ON  |  C0000  |  C2000
+   ON  ON  OFF ON  ON  |  C4000  |  C6000  (Manufacturer's default)
+   ON  ON  ON  OFF ON  |  CC000  |  CE000
+   ON  ON  OFF OFF ON  |  D0000  |  D2000
+   ON  ON  ON  ON  OFF |  D4000  |  D6000
+   ON  ON  OFF ON  OFF |  D8000  |  DA000
+   ON  ON  ON  OFF OFF |  DC000  |  DE000
+   ON  ON  OFF OFF OFF |  E0000  |  E2000
+
+   *) To enable the Boot ROM short the jumper J2.
+
+The jumpers 1 and 2 probably add 0x0800 and 0x1000 to RAM address.
+
+
+Setting the Interrupt Line
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Jumpers 1-5 of the jumper block J1 control the IRQ level.  ON means
+shorted, OFF means open::
+
+    Jumper              |  IRQ
+    1   2   3   4   5   |
+   ----------------------------
+    ON  OFF OFF OFF OFF |  2
+    OFF ON  OFF OFF OFF |  3
+    OFF OFF ON  OFF OFF |  4
+    OFF OFF OFF ON  OFF |  5
+    OFF OFF OFF OFF ON  |  7
+
+
+Setting the Timeout Parameters
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The jumpers J3 are used to set the timeout parameters. These two
+jumpers are normally left open.
+
+Thomas-Conrad
+=============
+
+Model #500-6242-0097 REV A (8-bit card)
+---------------------------------------
+
+  - from Lars Karlsson <100617.3473@compuserve.com>
+
+::
+
+     ________________________________________________________
+   |          ________   ________                           |_____
+   |         |........| |........|                            |
+   |         |________| |________|                         ___|
+   |            SW 3       SW 1                           |   |
+   |         Base I/O   Base Addr.                Station |   |
+   |                                              address |   |
+   |    ______                                    switch  |   |
+   |   |      |                                           |   |
+   |   |      |                                           |___|
+   |   |      |                                 ______        |___._
+   |   |______|                                |______|         ____| BNC
+   |                                            Jumper-        _____| Connector
+   |   Main chip                                block  _    __|   '
+   |                                                  | |  |    RJ Connector
+   |                                                  |_|  |    with 110 Ohm
+   |                                                       |__  Terminator
+   |    ___________                                         __|
+   |   |...........|                                       |    RJ-jack
+   |   |...........|    _____                              |    (unused)
+   |   |___________|   |_____|                             |__
+   |  Boot PROM socket IRQ-jumpers                            |_  Diagnostic
+   |________                                       __          _| LED (red)
+	    | | | | | | | | | | | | | | | | | | | |  |        |
+	    | | | | | | | | | | | | | | | | | | | |  |________|
+							      |
+							      |
+
+And here are the settings for some of the switches and jumpers on the cards.
+
+::
+
+	    I/O
+
+	   1 2 3 4 5 6 7 8
+
+  2E0----- 0 0 0 1 0 0 0 1
+  2F0----- 0 0 0 1 0 0 0 0
+  300----- 0 0 0 0 1 1 1 1
+  350----- 0 0 0 0 1 1 1 0
+
+"0" in the above example means switch is off "1" means that it is on.
+
+::
+
+      ShMem address.
+
+	1 2 3 4 5 6 7 8
+
+  CX00--0 0 1 1 | |   |
+  DX00--0 0 1 0       |
+  X000--------- 1 1   |
+  X400--------- 1 0   |
+  X800--------- 0 1   |
+  XC00--------- 0 0
+  ENHANCED----------- 1
+  COMPATIBLE--------- 0
+
+::
+
+	 IRQ
+
+
+     3 4 5 7 2
+     . . . . .
+     . . . . .
+
+
+There is a DIP-switch with 8 switches, used to set the shared memory address
+to be used. The first 6 switches set the address, the 7th doesn't have any
+function, and the 8th switch is used to select "compatible" or "enhanced".
+When I got my two cards, one of them had this switch set to "enhanced". That
+card didn't work at all, it wasn't even recognized by the driver. The other
+card had this switch set to "compatible" and it behaved absolutely normally. I
+guess that the switch on one of the cards, must have been changed accidentally
+when the card was taken out of its former host. The question remains
+unanswered, what is the purpose of the "enhanced" position?
+
+[Avery's note: "enhanced" probably either disables shared memory (use IO
+ports instead) or disables IO ports (use memory addresses instead).  This
+varies by the type of card involved.  I fail to see how either of these
+enhance anything.  Send me more detailed information about this mode, or
+just use "compatible" mode instead.]
+
+Waterloo Microsystems Inc. ??
+=============================
+
+8-bit card (C) 1985
+-------------------
+  - from Robert Michael Best <rmb117@cs.usask.ca>
+
+[Avery's note: these don't work with my driver for some reason.  These cards
+SEEM to have settings similar to the PDI508Plus, which is
+software-configured and doesn't work with my driver either.  The "Waterloo
+chip" is a boot PROM, probably designed specifically for the University of
+Waterloo.  If you have any further information about this card, please
+e-mail me.]
+
+The probe has not been able to detect the card on any of the J2 settings,
+and I tried them again with the "Waterloo" chip removed.
+
+::
+
+   _____________________________________________________________________
+  | \/  \/              ___  __ __                                      |
+  | C4  C4     |^|     | M ||  ^  ||^|                                  |
+  | --  --     |_|     | 5 ||     || | C3                               |
+  | \/  \/      C10    |___||     ||_|                                  |
+  | C4  C4             _  _ |     |                 ??                  |
+  | --  --            | \/ ||     |                                     |
+  |                   |    ||     |                                     |
+  |                   |    ||  C1 |                                     |
+  |                   |    ||     |  \/                            _____|
+  |                   | C6 ||     |  C9                           |     |___
+  |                   |    ||     |  --                           | BNC |___|
+  |                   |    ||     |          >C7|                 |_____|
+  |                   |    ||     |                                     |
+  | __ __             |____||_____|       1 2 3     6                   |
+  ||  ^  |     >C4|                      |o|o|o|o|o|o| J2    >C4|       |
+  ||     |                               |o|o|o|o|o|o|                  |
+  || C2  |     >C4|                                          >C4|       |
+  ||     |                                   >C8|                       |
+  ||     |       2 3 4 5 6 7  IRQ                            >C4|       |
+  ||_____|      |o|o|o|o|o|o| J3                                        |
+  |_______      |o|o|o|o|o|o|                            _______________|
+	  |                                             |
+	  |_____________________________________________|
+
+  C1 -- "COM9026
+	 SMC 8638"
+	In a chip socket.
+
+  C2 -- "@Copyright
+	 Waterloo Microsystems Inc.
+	 1985"
+	In a chip Socket with info printed on a label covering a round window
+	showing the circuit inside. (The window indicates it is an EPROM chip.)
+
+  C3 -- "COM9032
+	 SMC 8643"
+	In a chip socket.
+
+  C4 -- "74LS"
+	9 total no sockets.
+
+  M5 -- "50006-136
+	 20.000000 MHZ
+	 MTQ-T1-S3
+	 0 M-TRON 86-40"
+	Metallic case with 4 pins, no socket.
+
+  C6 -- "MOSTEK@TC8643
+	 MK6116N-20
+	 MALAYSIA"
+	No socket.
+
+  C7 -- No stamp or label but in a 20 pin chip socket.
+
+  C8 -- "PAL10L8CN
+	 8623"
+	In a 20 pin socket.
+
+  C9 -- "PAl16R4A-2CN
+	 8641"
+	In a 20 pin socket.
+
+  C10 -- "M8640
+	    NMC
+	  9306N"
+	 In an 8 pin socket.
+
+  ?? -- Some components on a smaller board and attached with 20 pins all
+	along the side closest to the BNC connector.  The are coated in a dark
+	resin.
+
+On the board there are two jumper banks labeled J2 and J3. The
+manufacturer didn't put a J1 on the board. The two boards I have both
+came with a jumper box for each bank.
+
+::
+
+  J2 -- Numbered 1 2 3 4 5 6.
+	4 and 5 are not stamped due to solder points.
+
+  J3 -- IRQ 2 3 4 5 6 7
+
+The board itself has a maple leaf stamped just above the irq jumpers
+and "-2 46-86" beside C2. Between C1 and C6 "ASS 'Y 300163" and "@1986
+CORMAN CUSTOM ELECTRONICS CORP." stamped just below the BNC connector.
+Below that "MADE IN CANADA"
+
+No Name
+=======
+
+8-bit cards, 16-bit cards
+-------------------------
+
+  - from Juergen Seifert <seifert@htwm.de>
+
+I have named this ARCnet card "NONAME", since there is no name of any
+manufacturer on the Installation manual nor on the shipping box. The only
+hint to the existence of a manufacturer at all is written in copper,
+it is "Made in Taiwan"
+
+This description has been written by Juergen Seifert <seifert@htwm.de>
+using information from the Original
+
+		    "ARCnet Installation Manual"
+
+::
+
+    ________________________________________________________________
+   | |STAR| BUS| T/P|                                               |
+   | |____|____|____|                                               |
+   |                            _____________________               |
+   |                           |                     |              |
+   |                           |                     |              |
+   |                           |                     |              |
+   |                           |        SMC          |              |
+   |                           |                     |              |
+   |                           |       COM90C65      |              |
+   |                           |                     |              |
+   |                           |                     |              |
+   |                           |__________-__________|              |
+   |                                                           _____|
+   |      _______________                                     |  CN |
+   |     | PROM          |                                    |_____|
+   |     > SOCKET        |                                          |
+   |     |_______________|         1 2 3 4 5 6 7 8  1 2 3 4 5 6 7 8 |
+   |                               _______________  _______________ |
+   |           |o|o|o|o|o|o|o|o|  |      SW1      ||      SW2      ||
+   |           |o|o|o|o|o|o|o|o|  |_______________||_______________||
+   |___         2 3 4 5 7 E E R        Node ID       IOB__|__MEM____|
+       |        \ IRQ   / T T O                      |
+       |__________________1_2_M______________________|
+
+Legend::
+
+  COM90C65:       ARCnet Probe
+  S1  1-8:    Node ID Select
+  S2  1-3:    I/O Base Address Select
+      4-6:    Memory Base Address Select
+      7-8:    RAM Offset Select
+  ET1, ET2    Extended Timeout Select
+  ROM     ROM Enable Select
+  CN              RG62 Coax Connector
+  STAR| BUS | T/P Three fields for placing a sign (colored circle)
+		  indicating the topology of the card
+
+Setting one of the switches to Off means "1", On means "0".
+
+
+Setting the Node ID
+^^^^^^^^^^^^^^^^^^^
+
+The eight switches in group SW1 are used to set the node ID.
+Each node attached to the network must have an unique node ID which
+must be different from 0.
+Switch 8 serves as the least significant bit (LSB).
+
+The node ID is the sum of the values of all switches set to "1"
+These values are::
+
+    Switch | Value
+    -------|-------
+      8    |   1
+      7    |   2
+      6    |   4
+      5    |   8
+      4    |  16
+      3    |  32
+      2    |  64
+      1    | 128
+
+Some Examples::
+
+    Switch         | Hex     | Decimal
+   1 2 3 4 5 6 7 8 | Node ID | Node ID
+   ----------------|---------|---------
+   0 0 0 0 0 0 0 0 |    not allowed
+   0 0 0 0 0 0 0 1 |    1    |    1
+   0 0 0 0 0 0 1 0 |    2    |    2
+   0 0 0 0 0 0 1 1 |    3    |    3
+       . . .       |         |
+   0 1 0 1 0 1 0 1 |   55    |   85
+       . . .       |         |
+   1 0 1 0 1 0 1 0 |   AA    |  170
+       . . .       |         |
+   1 1 1 1 1 1 0 1 |   FD    |  253
+   1 1 1 1 1 1 1 0 |   FE    |  254
+   1 1 1 1 1 1 1 1 |   FF    |  255
+
+
+Setting the I/O Base Address
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The first three switches in switch group SW2 are used to select one
+of eight possible I/O Base addresses using the following table::
+
+   Switch      | Hex I/O
+    1   2   3  | Address
+   ------------|--------
+   ON  ON  ON  |  260
+   ON  ON  OFF |  290
+   ON  OFF ON  |  2E0  (Manufacturer's default)
+   ON  OFF OFF |  2F0
+   OFF ON  ON  |  300
+   OFF ON  OFF |  350
+   OFF OFF ON  |  380
+   OFF OFF OFF |  3E0
+
+
+Setting the Base Memory (RAM) buffer Address
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The memory buffer requires 2K of a 16K block of RAM. The base of this
+16K block can be located in any of eight positions.
+Switches 4-6 of switch group SW2 select the Base of the 16K block.
+Within that 16K address space, the buffer may be assigned any one of four
+positions, determined by the offset, switches 7 and 8 of group SW2.
+
+::
+
+   Switch     | Hex RAM | Hex ROM
+   4 5 6  7 8 | Address | Address *)
+   -----------|---------|-----------
+   0 0 0  0 0 |  C0000  |  C2000
+   0 0 0  0 1 |  C0800  |  C2000
+   0 0 0  1 0 |  C1000  |  C2000
+   0 0 0  1 1 |  C1800  |  C2000
+	      |         |
+   0 0 1  0 0 |  C4000  |  C6000
+   0 0 1  0 1 |  C4800  |  C6000
+   0 0 1  1 0 |  C5000  |  C6000
+   0 0 1  1 1 |  C5800  |  C6000
+	      |         |
+   0 1 0  0 0 |  CC000  |  CE000
+   0 1 0  0 1 |  CC800  |  CE000
+   0 1 0  1 0 |  CD000  |  CE000
+   0 1 0  1 1 |  CD800  |  CE000
+	      |         |
+   0 1 1  0 0 |  D0000  |  D2000  (Manufacturer's default)
+   0 1 1  0 1 |  D0800  |  D2000
+   0 1 1  1 0 |  D1000  |  D2000
+   0 1 1  1 1 |  D1800  |  D2000
+	      |         |
+   1 0 0  0 0 |  D4000  |  D6000
+   1 0 0  0 1 |  D4800  |  D6000
+   1 0 0  1 0 |  D5000  |  D6000
+   1 0 0  1 1 |  D5800  |  D6000
+	      |         |
+   1 0 1  0 0 |  D8000  |  DA000
+   1 0 1  0 1 |  D8800  |  DA000
+   1 0 1  1 0 |  D9000  |  DA000
+   1 0 1  1 1 |  D9800  |  DA000
+	      |         |
+   1 1 0  0 0 |  DC000  |  DE000
+   1 1 0  0 1 |  DC800  |  DE000
+   1 1 0  1 0 |  DD000  |  DE000
+   1 1 0  1 1 |  DD800  |  DE000
+	      |         |
+   1 1 1  0 0 |  E0000  |  E2000
+   1 1 1  0 1 |  E0800  |  E2000
+   1 1 1  1 0 |  E1000  |  E2000
+   1 1 1  1 1 |  E1800  |  E2000
+
+   *) To enable the 8K Boot PROM install the jumper ROM.
+      The default is jumper ROM not installed.
+
+
+Setting Interrupt Request Lines (IRQ)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+To select a hardware interrupt level set one (only one!) of the jumpers
+IRQ2, IRQ3, IRQ4, IRQ5 or IRQ7. The manufacturer's default is IRQ2.
+
+
+Setting the Timeouts
+^^^^^^^^^^^^^^^^^^^^
+
+The two jumpers labeled ET1 and ET2 are used to determine the timeout
+parameters (response and reconfiguration time). Every node in a network
+must be set to the same timeout values.
+
+::
+
+   ET1 ET2 | Response Time (us) | Reconfiguration Time (ms)
+   --------|--------------------|--------------------------
+   Off Off |        78          |          840   (Default)
+   Off On  |       285          |         1680
+   On  Off |       563          |         1680
+   On  On  |      1130          |         1680
+
+On means jumper installed, Off means jumper not installed
+
+
+16-BIT ARCNET
+-------------
+
+The manual of my 8-Bit NONAME ARCnet Card contains another description
+of a 16-Bit Coax / Twisted Pair Card. This description is incomplete,
+because there are missing two pages in the manual booklet. (The table
+of contents reports pages ... 2-9, 2-11, 2-12, 3-1, ... but inside
+the booklet there is a different way of counting ... 2-9, 2-10, A-1,
+(empty page), 3-1, ..., 3-18, A-1 (again), A-2)
+Also the picture of the board layout is not as good as the picture of
+8-Bit card, because there isn't any letter like "SW1" written to the
+picture.
+
+Should somebody have such a board, please feel free to complete this
+description or to send a mail to me!
+
+This description has been written by Juergen Seifert <seifert@htwm.de>
+using information from the Original
+
+		    "ARCnet Installation Manual"
+
+::
+
+   ___________________________________________________________________
+  <                    _________________  _________________           |
+  >                   |       SW?       ||      SW?        |          |
+  <                   |_________________||_________________|          |
+  >                       ____________________                        |
+  <                      |                    |                       |
+  >                      |                    |                       |
+  <                      |                    |                       |
+  >                      |                    |                       |
+  <                      |                    |                       |
+  >                      |                    |                       |
+  <                      |                    |                       |
+  >                      |____________________|                       |
+  <                                                               ____|
+  >                       ____________________                   |    |
+  <                      |                    |                  | J1 |
+  >                      |                    <                  |    |
+  <                      |____________________|  ? ? ? ? ? ?     |____|
+  >                                             |o|o|o|o|o|o|         |
+  <                                             |o|o|o|o|o|o|         |
+  >                                                                   |
+  <             __                                         ___________|
+  >            |  |                                       |
+  <____________|  |_______________________________________|
+
+
+Setting one of the switches to Off means "1", On means "0".
+
+
+Setting the Node ID
+^^^^^^^^^^^^^^^^^^^
+
+The eight switches in group SW2 are used to set the node ID.
+Each node attached to the network must have an unique node ID which
+must be different from 0.
+Switch 8 serves as the least significant bit (LSB).
+
+The node ID is the sum of the values of all switches set to "1"
+These values are::
+
+    Switch | Value
+    -------|-------
+      8    |   1
+      7    |   2
+      6    |   4
+      5    |   8
+      4    |  16
+      3    |  32
+      2    |  64
+      1    | 128
+
+Some Examples::
+
+    Switch         | Hex     | Decimal
+   1 2 3 4 5 6 7 8 | Node ID | Node ID
+   ----------------|---------|---------
+   0 0 0 0 0 0 0 0 |    not allowed
+   0 0 0 0 0 0 0 1 |    1    |    1
+   0 0 0 0 0 0 1 0 |    2    |    2
+   0 0 0 0 0 0 1 1 |    3    |    3
+       . . .       |         |
+   0 1 0 1 0 1 0 1 |   55    |   85
+       . . .       |         |
+   1 0 1 0 1 0 1 0 |   AA    |  170
+       . . .       |         |
+   1 1 1 1 1 1 0 1 |   FD    |  253
+   1 1 1 1 1 1 1 0 |   FE    |  254
+   1 1 1 1 1 1 1 1 |   FF    |  255
+
+
+Setting the I/O Base Address
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The first three switches in switch group SW1 are used to select one
+of eight possible I/O Base addresses using the following table::
+
+   Switch      | Hex I/O
+    3   2   1  | Address
+   ------------|--------
+   ON  ON  ON  |  260
+   ON  ON  OFF |  290
+   ON  OFF ON  |  2E0  (Manufacturer's default)
+   ON  OFF OFF |  2F0
+   OFF ON  ON  |  300
+   OFF ON  OFF |  350
+   OFF OFF ON  |  380
+   OFF OFF OFF |  3E0
+
+
+Setting the Base Memory (RAM) buffer Address
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The memory buffer requires 2K of a 16K block of RAM. The base of this
+16K block can be located in any of eight positions.
+Switches 6-8 of switch group SW1 select the Base of the 16K block.
+Within that 16K address space, the buffer may be assigned any one of four
+positions, determined by the offset, switches 4 and 5 of group SW1::
+
+   Switch     | Hex RAM | Hex ROM
+   8 7 6  5 4 | Address | Address
+   -----------|---------|-----------
+   0 0 0  0 0 |  C0000  |  C2000
+   0 0 0  0 1 |  C0800  |  C2000
+   0 0 0  1 0 |  C1000  |  C2000
+   0 0 0  1 1 |  C1800  |  C2000
+	      |         |
+   0 0 1  0 0 |  C4000  |  C6000
+   0 0 1  0 1 |  C4800  |  C6000
+   0 0 1  1 0 |  C5000  |  C6000
+   0 0 1  1 1 |  C5800  |  C6000
+	      |         |
+   0 1 0  0 0 |  CC000  |  CE000
+   0 1 0  0 1 |  CC800  |  CE000
+   0 1 0  1 0 |  CD000  |  CE000
+   0 1 0  1 1 |  CD800  |  CE000
+	      |         |
+   0 1 1  0 0 |  D0000  |  D2000  (Manufacturer's default)
+   0 1 1  0 1 |  D0800  |  D2000
+   0 1 1  1 0 |  D1000  |  D2000
+   0 1 1  1 1 |  D1800  |  D2000
+	      |         |
+   1 0 0  0 0 |  D4000  |  D6000
+   1 0 0  0 1 |  D4800  |  D6000
+   1 0 0  1 0 |  D5000  |  D6000
+   1 0 0  1 1 |  D5800  |  D6000
+	      |         |
+   1 0 1  0 0 |  D8000  |  DA000
+   1 0 1  0 1 |  D8800  |  DA000
+   1 0 1  1 0 |  D9000  |  DA000
+   1 0 1  1 1 |  D9800  |  DA000
+	      |         |
+   1 1 0  0 0 |  DC000  |  DE000
+   1 1 0  0 1 |  DC800  |  DE000
+   1 1 0  1 0 |  DD000  |  DE000
+   1 1 0  1 1 |  DD800  |  DE000
+	      |         |
+   1 1 1  0 0 |  E0000  |  E2000
+   1 1 1  0 1 |  E0800  |  E2000
+   1 1 1  1 0 |  E1000  |  E2000
+   1 1 1  1 1 |  E1800  |  E2000
+
+
+Setting Interrupt Request Lines (IRQ)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+??????????????????????????????????????
+
+
+Setting the Timeouts
+^^^^^^^^^^^^^^^^^^^^
+
+??????????????????????????????????????
+
+
+8-bit cards ("Made in Taiwan R.O.C.")
+-------------------------------------
+
+  - from Vojtech Pavlik <vojtech@suse.cz>
+
+I have named this ARCnet card "NONAME", since I got only the card with
+no manual at all and the only text identifying the manufacturer is
+"MADE IN TAIWAN R.O.C" printed on the card.
+
+::
+
+	  ____________________________________________________________
+	 |                 1 2 3 4 5 6 7 8                            |
+	 | |o|o| JP1       o|o|o|o|o|o|o|o| ON                        |
+	 |  +              o|o|o|o|o|o|o|o|                        ___|
+	 |  _____________  o|o|o|o|o|o|o|o| OFF         _____     |   | ID7
+	 | |             | SW1                         |     |    |   | ID6
+	 | > RAM (2k)    |        ____________________ |  H  |    | S | ID5
+	 | |_____________|       |                    ||  y  |    | W | ID4
+	 |                       |                    ||  b  |    | 2 | ID3
+	 |                       |                    ||  r  |    |   | ID2
+	 |                       |                    ||  i  |    |   | ID1
+	 |                       |       90C65        ||  d  |    |___| ID0
+	 |      SW3              |                    ||     |        |
+	 | |o|o|o|o|o|o|o|o| ON  |                    ||  I  |        |
+	 | |o|o|o|o|o|o|o|o|     |                    ||  C  |        |
+	 | |o|o|o|o|o|o|o|o| OFF |____________________||     |   _____|
+	 |  1 2 3 4 5 6 7 8                            |     |  |     |___
+	 |  ______________                             |     |  | BNC |___|
+	 | |              |                            |_____|  |_____|
+	 | > EPROM SOCKET |                                           |
+	 | |______________|                                           |
+	 |                                              ______________|
+	 |                                             |
+	 |_____________________________________________|
+
+Legend::
+
+  90C65       ARCNET Chip
+  SW1 1-5:    Base Memory Address Select
+      6-8:    Base I/O Address Select
+  SW2 1-8:    Node ID Select (ID0-ID7)
+  SW3 1-5:    IRQ Select
+      6-7:    Extra Timeout
+      8  :    ROM Enable
+  JP1         Led connector
+  BNC         Coax connector
+
+Although the jumpers SW1 and SW3 are marked SW, not JP, they are jumpers, not
+switches.
+
+Setting the jumpers to ON means connecting the upper two pins, off the bottom
+two - or - in case of IRQ setting, connecting none of them at all.
+
+Setting the Node ID
+^^^^^^^^^^^^^^^^^^^
+
+The eight switches in SW2 are used to set the node ID. Each node attached
+to the network must have an unique node ID which must not be 0.
+Switch 1 (ID0) serves as the least significant bit (LSB).
+
+Setting one of the switches to Off means "1", On means "0".
+
+The node ID is the sum of the values of all switches set to "1"
+These values are::
+
+   Switch | Label | Value
+   -------|-------|-------
+     1    | ID0   |   1
+     2    | ID1   |   2
+     3    | ID2   |   4
+     4    | ID3   |   8
+     5    | ID4   |  16
+     6    | ID5   |  32
+     7    | ID6   |  64
+     8    | ID7   | 128
+
+Some Examples::
+
+    Switch         | Hex     | Decimal
+   8 7 6 5 4 3 2 1 | Node ID | Node ID
+   ----------------|---------|---------
+   0 0 0 0 0 0 0 0 |    not allowed
+   0 0 0 0 0 0 0 1 |    1    |    1
+   0 0 0 0 0 0 1 0 |    2    |    2
+   0 0 0 0 0 0 1 1 |    3    |    3
+       . . .       |         |
+   0 1 0 1 0 1 0 1 |   55    |   85
+       . . .       |         |
+   1 0 1 0 1 0 1 0 |   AA    |  170
+       . . .       |         |
+   1 1 1 1 1 1 0 1 |   FD    |  253
+   1 1 1 1 1 1 1 0 |   FE    |  254
+   1 1 1 1 1 1 1 1 |   FF    |  255
+
+
+Setting the I/O Base Address
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The last three switches in switch block SW1 are used to select one
+of eight possible I/O Base addresses using the following table::
+
+
+   Switch      | Hex I/O
+    6   7   8  | Address
+   ------------|--------
+   ON  ON  ON  |  260
+   OFF ON  ON  |  290
+   ON  OFF ON  |  2E0  (Manufacturer's default)
+   OFF OFF ON  |  2F0
+   ON  ON  OFF |  300
+   OFF ON  OFF |  350
+   ON  OFF OFF |  380
+   OFF OFF OFF |  3E0
+
+
+Setting the Base Memory (RAM) buffer Address
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The memory buffer (RAM) requires 2K. The base of this buffer can be
+located in any of eight positions. The address of the Boot Prom is
+memory base + 0x2000.
+
+Jumpers 3-5 of jumper block SW1 select the Memory Base address.
+
+::
+
+   Switch              | Hex RAM | Hex ROM
+    1   2   3   4   5  | Address | Address *)
+   --------------------|---------|-----------
+   ON  ON  ON  ON  ON  |  C0000  |  C2000
+   ON  ON  OFF ON  ON  |  C4000  |  C6000
+   ON  ON  ON  OFF ON  |  CC000  |  CE000
+   ON  ON  OFF OFF ON  |  D0000  |  D2000  (Manufacturer's default)
+   ON  ON  ON  ON  OFF |  D4000  |  D6000
+   ON  ON  OFF ON  OFF |  D8000  |  DA000
+   ON  ON  ON  OFF OFF |  DC000  |  DE000
+   ON  ON  OFF OFF OFF |  E0000  |  E2000
+
+  *) To enable the Boot ROM set the jumper 8 of jumper block SW3 to position ON.
+
+The jumpers 1 and 2 probably add 0x0800, 0x1000 and 0x1800 to RAM adders.
+
+Setting the Interrupt Line
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Jumpers 1-5 of the jumper block SW3 control the IRQ level::
+
+    Jumper              |  IRQ
+    1   2   3   4   5   |
+   ----------------------------
+    ON  OFF OFF OFF OFF |  2
+    OFF ON  OFF OFF OFF |  3
+    OFF OFF ON  OFF OFF |  4
+    OFF OFF OFF ON  OFF |  5
+    OFF OFF OFF OFF ON  |  7
+
+
+Setting the Timeout Parameters
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The jumpers 6-7 of the jumper block SW3 are used to determine the timeout
+parameters. These two jumpers are normally left in the OFF position.
+
+
+
+(Generic Model 9058)
+--------------------
+  - from Andrew J. Kroll <ag784@freenet.buffalo.edu>
+  - Sorry this sat in my to-do box for so long, Andrew! (yikes - over a
+    year!)
+
+::
+
+								      _____
+								     |    <
+								     | .---'
+    ________________________________________________________________ | |
+   |                           |     SW2     |                      |  |
+   |   ___________             |_____________|                      |  |
+   |  |           |              1 2 3 4 5 6                     ___|  |
+   |  >  6116 RAM |         _________                         8 |   |  |
+   |  |___________|        |20MHzXtal|                        7 |   |  |
+   |                       |_________|       __________       6 | S |  |
+   |    74LS373                             |          |-     5 | W |  |
+   |   _________                            |      E   |-     4 |   |  |
+   |   >_______|              ______________|..... P   |-     3 | 3 |  |
+   |                         |              |    : O   |-     2 |   |  |
+   |                         |              |    : X   |-     1 |___|  |
+   |   ________________      |              |    : Y   |-           |  |
+   |  |      SW1       |     |      SL90C65 |    :     |-           |  |
+   |  |________________|     |              |    : B   |-           |  |
+   |    1 2 3 4 5 6 7 8      |              |    : O   |-           |  |
+   |                         |_________o____|..../ A   |-    _______|  |
+   |    ____________________                |      R   |-   |       |------,
+   |   |                    |               |      D   |-   |  BNC  |   #  |
+   |   > 2764 PROM SOCKET   |               |__________|-   |_______|------'
+   |   |____________________|              _________                |  |
+   |                                       >________| <- 74LS245    |  |
+   |                                                                |  |
+   |___                                               ______________|  |
+       |H H H H H H H H H H H H H H H H H H H H H H H|               | |
+       |U_U_U_U_U_U_U_U_U_U_U_U_U_U_U_U_U_U_U_U_U_U_U|               | |
+								      \|
+
+Legend::
+
+  SL90C65 	ARCNET Controller / Transceiver /Logic
+  SW1	1-5:	IRQ Select
+	  6:	ET1
+	  7:	ET2
+	  8:	ROM ENABLE
+  SW2	1-3:    Memory Buffer/PROM Address
+	3-6:	I/O Address Map
+  SW3	1-8:	Node ID Select
+  BNC		BNC RG62/U Connection
+		*I* have had success using RG59B/U with *NO* terminators!
+		What gives?!
+
+SW1: Timeouts, Interrupt and ROM
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+To select a hardware interrupt level set one (only one!) of the dip switches
+up (on) SW1...(switches 1-5)
+IRQ3, IRQ4, IRQ5, IRQ7, IRQ2. The Manufacturer's default is IRQ2.
+
+The switches on SW1 labeled EXT1 (switch 6) and EXT2 (switch 7)
+are used to determine the timeout parameters. These two dip switches
+are normally left off (down).
+
+   To enable the 8K Boot PROM position SW1 switch 8 on (UP) labeled ROM.
+   The default is jumper ROM not installed.
+
+
+Setting the I/O Base Address
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The last three switches in switch group SW2 are used to select one
+of eight possible I/O Base addresses using the following table::
+
+
+   Switch | Hex I/O
+   4 5 6  | Address
+   -------|--------
+   0 0 0  |  260
+   0 0 1  |  290
+   0 1 0  |  2E0  (Manufacturer's default)
+   0 1 1  |  2F0
+   1 0 0  |  300
+   1 0 1  |  350
+   1 1 0  |  380
+   1 1 1  |  3E0
+
+
+Setting the Base Memory Address (RAM & ROM)
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The memory buffer requires 2K of a 16K block of RAM. The base of this
+16K block can be located in any of eight positions.
+Switches 1-3 of switch group SW2 select the Base of the 16K block.
+(0 = DOWN, 1 = UP)
+I could, however, only verify two settings...
+
+
+::
+
+   Switch| Hex RAM | Hex ROM
+   1 2 3 | Address | Address
+   ------|---------|-----------
+   0 0 0 |  E0000  |  E2000
+   0 0 1 |  D0000  |  D2000  (Manufacturer's default)
+   0 1 0 |  ?????  |  ?????
+   0 1 1 |  ?????  |  ?????
+   1 0 0 |  ?????  |  ?????
+   1 0 1 |  ?????  |  ?????
+   1 1 0 |  ?????  |  ?????
+   1 1 1 |  ?????  |  ?????
+
+
+Setting the Node ID
+^^^^^^^^^^^^^^^^^^^
+
+The eight switches in group SW3 are used to set the node ID.
+Each node attached to the network must have an unique node ID which
+must be different from 0.
+Switch 1 serves as the least significant bit (LSB).
+switches in the DOWN position are OFF (0) and in the UP position are ON (1)
+
+The node ID is the sum of the values of all switches set to "1"
+These values are::
+
+    Switch | Value
+    -------|-------
+      1    |   1
+      2    |   2
+      3    |   4
+      4    |   8
+      5    |  16
+      6    |  32
+      7    |  64
+      8    | 128
+
+Some Examples::
+
+      Switch#     |   Hex   | Decimal
+  8 7 6 5 4 3 2 1 | Node ID | Node ID
+  ----------------|---------|---------
+  0 0 0 0 0 0 0 0 |    not allowed  <-.
+  0 0 0 0 0 0 0 1 |    1    |    1    |
+  0 0 0 0 0 0 1 0 |    2    |    2    |
+  0 0 0 0 0 0 1 1 |    3    |    3    |
+      . . .       |         |         |
+  0 1 0 1 0 1 0 1 |   55    |   85    |
+      . . .       |         |         + Don't use 0 or 255!
+  1 0 1 0 1 0 1 0 |   AA    |  170    |
+      . . .       |         |         |
+  1 1 1 1 1 1 0 1 |   FD    |  253    |
+  1 1 1 1 1 1 1 0 |   FE    |  254    |
+  1 1 1 1 1 1 1 1 |   FF    |  255  <-'
+
+
+Tiara
+=====
+
+(model unknown)
+---------------
+
+  - from Christoph Lameter <christoph@lameter.com>
+
+
+Here is information about my card as far as I could figure it out::
+
+
+  ----------------------------------------------- tiara
+  Tiara LanCard of Tiara Computer Systems.
+
+  +----------------------------------------------+
+  !           ! Transmitter Unit !               !
+  !           +------------------+             -------
+  !          MEM                              Coax Connector
+  !  ROM    7654321 <- I/O                     -------
+  !  :  :   +--------+                           !
+  !  :  :   ! 90C66LJ!                         +++
+  !  :  :   !        !                         !D  Switch to set
+  !  :  :   !        !                         !I  the Nodenumber
+  !  :  :   +--------+                         !P
+  !                                            !++
+  !         234567 <- IRQ                      !
+  +------------!!!!!!!!!!!!!!!!!!!!!!!!--------+
+	       !!!!!!!!!!!!!!!!!!!!!!!!
+
+- 0 = Jumper Installed
+- 1 = Open
+
+Top Jumper line Bit 7 = ROM Enable 654=Memory location 321=I/O
+
+Settings for Memory Location (Top Jumper Line)
+
+===     ================
+456     Address selected
+===     ================
+000	C0000
+001     C4000
+010     CC000
+011     D0000
+100     D4000
+101     D8000
+110     DC000
+111     E0000
+===     ================
+
+Settings for I/O Address (Top Jumper Line)
+
+===     ====
+123     Port
+===     ====
+000	260
+001	290
+010	2E0
+011	2F0
+100	300
+101	350
+110	380
+111	3E0
+===     ====
+
+Settings for IRQ Selection (Lower Jumper Line)
+
+====== =====
+234567
+====== =====
+011111 IRQ 2
+101111 IRQ 3
+110111 IRQ 4
+111011 IRQ 5
+111110 IRQ 7
+====== =====
+
+Other Cards
+===========
+
+I have no information on other models of ARCnet cards at the moment.  Please
+send any and all info to:
+
+	apenwarr@worldvisions.ca
+
+Thanks.
diff --git a/Documentation/networking/arcnet-hardware.txt b/Documentation/networking/arcnet-hardware.txt
deleted file mode 100644
index 731de411513c..000000000000
--- a/Documentation/networking/arcnet-hardware.txt
+++ /dev/null
@@ -1,3133 +0,0 @@
- 
------------------------------------------------------------------------------
-1) This file is a supplement to arcnet.txt.  Please read that for general
-   driver configuration help.
------------------------------------------------------------------------------
-2) This file is no longer Linux-specific.  It should probably be moved out of
-   the kernel sources.  Ideas?
------------------------------------------------------------------------------
-
-Because so many people (myself included) seem to have obtained ARCnet cards
-without manuals, this file contains a quick introduction to ARCnet hardware,
-some cabling tips, and a listing of all jumper settings I can find. Please
-e-mail apenwarr@worldvisions.ca with any settings for your particular card,
-or any other information you have!
-
-
-INTRODUCTION TO ARCNET
-----------------------
-
-ARCnet is a network type which works in a way similar to popular Ethernet
-networks but which is also different in some very important ways.
-
-First of all, you can get ARCnet cards in at least two speeds: 2.5 Mbps
-(slower than Ethernet) and 100 Mbps (faster than normal Ethernet).  In fact,
-there are others as well, but these are less common.  The different hardware
-types, as far as I'm aware, are not compatible and so you cannot wire a
-100 Mbps card to a 2.5 Mbps card, and so on.  From what I hear, my driver does
-work with 100 Mbps cards, but I haven't been able to verify this myself,
-since I only have the 2.5 Mbps variety.  It is probably not going to saturate
-your 100 Mbps card.  Stop complaining. :)
-
-You also cannot connect an ARCnet card to any kind of Ethernet card and
-expect it to work.  
-
-There are two "types" of ARCnet - STAR topology and BUS topology.  This
-refers to how the cards are meant to be wired together.  According to most
-available documentation, you can only connect STAR cards to STAR cards and
-BUS cards to BUS cards.  That makes sense, right?  Well, it's not quite
-true; see below under "Cabling."
-
-Once you get past these little stumbling blocks, ARCnet is actually quite a
-well-designed standard.  It uses something called "modified token passing"
-which makes it completely incompatible with so-called "Token Ring" cards,
-but which makes transfers much more reliable than Ethernet does.  In fact,
-ARCnet will guarantee that a packet arrives safely at the destination, and
-even if it can't possibly be delivered properly (ie. because of a cable
-break, or because the destination computer does not exist) it will at least
-tell the sender about it.
-
-Because of the carefully defined action of the "token", it will always make
-a pass around the "ring" within a maximum length of time.  This makes it
-useful for realtime networks.
-
-In addition, all known ARCnet cards have an (almost) identical programming
-interface.  This means that with one ARCnet driver you can support any
-card, whereas with Ethernet each manufacturer uses what is sometimes a
-completely different programming interface, leading to a lot of different,
-sometimes very similar, Ethernet drivers.  Of course, always using the same
-programming interface also means that when high-performance hardware
-facilities like PCI bus mastering DMA appear, it's hard to take advantage of
-them.  Let's not go into that.
-
-One thing that makes ARCnet cards difficult to program for, however, is the
-limit on their packet sizes; standard ARCnet can only send packets that are
-up to 508 bytes in length.  This is smaller than the Internet "bare minimum"
-of 576 bytes, let alone the Ethernet MTU of 1500.  To compensate, an extra
-level of encapsulation is defined by RFC1201, which I call "packet
-splitting," that allows "virtual packets" to grow as large as 64K each,
-although they are generally kept down to the Ethernet-style 1500 bytes.
-
-For more information on the advantages and disadvantages (mostly the
-advantages) of ARCnet networks, you might try the "ARCnet Trade Association"
-WWW page:
-	http://www.arcnet.com
-
-
-CABLING ARCNET NETWORKS
------------------------
-
-This section was rewritten by 
-        Vojtech Pavlik     <vojtech@suse.cz>
-using information from several people, including:
-        Avery Pennraun     <apenwarr@worldvisions.ca>
- 	Stephen A. Wood    <saw@hallc1.cebaf.gov>
- 	John Paul Morrison <jmorriso@bogomips.ee.ubc.ca>
- 	Joachim Koenig     <jojo@repas.de>
-and Avery touched it up a bit, at Vojtech's request.
-
-ARCnet (the classic 2.5 Mbps version) can be connected by two different
-types of cabling: coax and twisted pair.  The other ARCnet-type networks
-(100 Mbps TCNS and 320 kbps - 32 Mbps ARCnet Plus) use different types of
-cabling (Type1, Fiber, C1, C4, C5).
-
-For a coax network, you "should" use 93 Ohm RG-62 cable.  But other cables
-also work fine, because ARCnet is a very stable network. I personally use 75
-Ohm TV antenna cable.
-
-Cards for coax cabling are shipped in two different variants: for BUS and
-STAR network topologies.  They are mostly the same.  The only difference
-lies in the hybrid chip installed.  BUS cards use high impedance output,
-while STAR use low impedance.  Low impedance card (STAR) is electrically
-equal to a high impedance one with a terminator installed.
-
-Usually, the ARCnet networks are built up from STAR cards and hubs.  There
-are two types of hubs - active and passive.  Passive hubs are small boxes
-with four BNC connectors containing four 47 Ohm resistors:
-
-   |         | wires
-   R         + junction
--R-+-R-      R 47 Ohm resistors
-   R
-   |
-
-The shielding is connected together.  Active hubs are much more complicated;
-they are powered and contain electronics to amplify the signal and send it
-to other segments of the net.  They usually have eight connectors.  Active
-hubs come in two variants - dumb and smart.  The dumb variant just
-amplifies, but the smart one decodes to digital and encodes back all packets
-coming through.  This is much better if you have several hubs in the net,
-since many dumb active hubs may worsen the signal quality.
-
-And now to the cabling.  What you can connect together:
-
-1. A card to a card.  This is the simplest way of creating a 2-computer
-   network.
-
-2. A card to a passive hub.  Remember that all unused connectors on the hub
-   must be properly terminated with 93 Ohm (or something else if you don't
-   have the right ones) terminators.
-   	(Avery's note: oops, I didn't know that.  Mine (TV cable) works
-	anyway, though.)
-
-3. A card to an active hub.  Here is no need to terminate the unused
-   connectors except some kind of aesthetic feeling.  But, there may not be
-   more than eleven active hubs between any two computers.  That of course
-   doesn't limit the number of active hubs on the network.
-   
-4. An active hub to another.
-
-5. An active hub to passive hub.
-
-Remember that you cannot connect two passive hubs together.  The power loss
-implied by such a connection is too high for the net to operate reliably.
-
-An example of a typical ARCnet network:
-
-           R                     S - STAR type card              
-    S------H--------A-------S    R - Terminator
-           |        |            H - Hub                         
-           |        |            A - Active hub                  
-           |   S----H----S                                       
-           S        |                                            
-                    |                                            
-                    S                                            
-                                                                          
-The BUS topology is very similar to the one used by Ethernet.  The only
-difference is in cable and terminators: they should be 93 Ohm.  Ethernet
-uses 50 Ohm impedance. You use T connectors to put the computers on a single
-line of cable, the bus. You have to put terminators at both ends of the
-cable. A typical BUS ARCnet network looks like:
-
-    RT----T------T------T------T------TR
-     B    B      B      B      B      B
-
-  B - BUS type card
-  R - Terminator
-  T - T connector
-
-But that is not all! The two types can be connected together.  According to
-the official documentation the only way of connecting them is using an active
-hub:
-
-         A------T------T------TR
-         |      B      B      B
-     S---H---S
-         |
-         S
-
-The official docs also state that you can use STAR cards at the ends of
-BUS network in place of a BUS card and a terminator:
-
-     S------T------T------S
-            B      B
-
-But, according to my own experiments, you can simply hang a BUS type card
-anywhere in middle of a cable in a STAR topology network.  And more - you
-can use the bus card in place of any star card if you use a terminator. Then
-you can build very complicated networks fulfilling all your needs!  An
-example:
-
-                                  S
-                                  |
-           RT------T-------T------H------S
-            B      B       B      |
-                                  |       R
-    S------A------T-------T-------A-------H------TR                    
-           |      B       B       |       |      B                         
-           |   S                 BT       |                                 
-           |   |                  |  S----A-----S
-    S------H---A----S             |       | 
-           |   |      S------T----H---S   |
-           S   S             B    R       S  
-                                                               
-A basically different cabling scheme is used with Twisted Pair cabling. Each
-of the TP cards has two RJ (phone-cord style) connectors.  The cards are
-then daisy-chained together using a cable connecting every two neighboring
-cards.  The ends are terminated with RJ 93 Ohm terminators which plug into
-the empty connectors of cards on the ends of the chain.  An example:
-
-          ___________   ___________
-      _R_|_         _|_|_         _|_R_  
-     |     |       |     |       |     |      
-     |Card |       |Card |       |Card |     
-     |_____|       |_____|       |_____|          
-
-
-There are also hubs for the TP topology.  There is nothing difficult
-involved in using them; you just connect a TP chain to a hub on any end or
-even at both.  This way you can create almost any network configuration. 
-The maximum of 11 hubs between any two computers on the net applies here as
-well.  An example:
-
-    RP-------P--------P--------H-----P------P-----PR
-                               |
-      RP-----H--------P--------H-----P------PR
-             |                 |
-             PR                PR
-
-    R - RJ Terminator
-    P - TP Card
-    H - TP Hub
-
-Like any network, ARCnet has a limited cable length.  These are the maximum
-cable lengths between two active ends (an active end being an active hub or
-a STAR card).
-
-		RG-62       93 Ohm up to 650 m
-		RG-59/U     75 Ohm up to 457 m
-		RG-11/U     75 Ohm up to 533 m
-		IBM Type 1 150 Ohm up to 200 m
-		IBM Type 3 100 Ohm up to 100 m
-
-The maximum length of all cables connected to a passive hub is limited to 65
-meters for RG-62 cabling; less for others.  You can see that using passive
-hubs in a large network is a bad idea. The maximum length of a single "BUS
-Trunk" is about 300 meters for RG-62. The maximum distance between the two
-most distant points of the net is limited to 3000 meters. The maximum length
-of a TP cable between two cards/hubs is 650 meters.
-
-
-SETTING THE JUMPERS
--------------------
-
-All ARCnet cards should have a total of four or five different settings:
-
-  - the I/O address:  this is the "port" your ARCnet card is on.  Probed
-    values in the Linux ARCnet driver are only from 0x200 through 0x3F0. (If
-    your card has additional ones, which is possible, please tell me.) This
-    should not be the same as any other device on your system.  According to
-    a doc I got from Novell, MS Windows prefers values of 0x300 or more,
-    eating net connections on my system (at least) otherwise.  My guess is
-    this may be because, if your card is at 0x2E0, probing for a serial port
-    at 0x2E8 will reset the card and probably mess things up royally.
-	- Avery's favourite: 0x300.
-
-  - the IRQ: on  8-bit cards, it might be 2 (9), 3, 4, 5, or 7.
-             on 16-bit cards, it might be 2 (9), 3, 4, 5, 7, or 10-15.
-             
-    Make sure this is different from any other card on your system.  Note
-    that IRQ2 is the same as IRQ9, as far as Linux is concerned.  You can
-    "cat /proc/interrupts" for a somewhat complete list of which ones are in
-    use at any given time.  Here is a list of common usages from Vojtech
-    Pavlik <vojtech@suse.cz>:
-    	("Not on bus" means there is no way for a card to generate this
-	interrupt)
-	IRQ  0 - Timer 0 (Not on bus)
-	IRQ  1 - Keyboard (Not on bus)
-	IRQ  2 - IRQ Controller 2 (Not on bus, nor does interrupt the CPU)
-	IRQ  3 - COM2
-	IRQ  4 - COM1
-	IRQ  5 - FREE (LPT2 if you have it; sometimes COM3; maybe PLIP)
-	IRQ  6 - Floppy disk controller
-	IRQ  7 - FREE (LPT1 if you don't use the polling driver; PLIP) 
-	IRQ  8 - Realtime Clock Interrupt (Not on bus)
-	IRQ  9 - FREE (VGA vertical sync interrupt if enabled)
-	IRQ 10 - FREE
-	IRQ 11 - FREE
-	IRQ 12 - FREE
-	IRQ 13 - Numeric Coprocessor (Not on bus)
-	IRQ 14 - Fixed Disk Controller
-	IRQ 15 - FREE (Fixed Disk Controller 2 if you have it) 
-	
-	Note: IRQ 9 is used on some video cards for the "vertical retrace"
-	interrupt.  This interrupt would have been handy for things like
-	video games, as it occurs exactly once per screen refresh, but
-	unfortunately IBM cancelled this feature starting with the original
-	VGA and thus many VGA/SVGA cards do not support it.  For this
-	reason, no modern software uses this interrupt and it can almost
-	always be safely disabled, if your video card supports it at all.
-	
-	If your card for some reason CANNOT disable this IRQ (usually there
-	is a jumper), one solution would be to clip the printed circuit
-	contact on the board: it's the fourth contact from the left on the
-	back side.  I take no responsibility if you try this.
-
-	- Avery's favourite: IRQ2 (actually IRQ9).  Watch that VGA, though.
-
-  - the memory address:  Unlike most cards, ARCnets use "shared memory" for
-    copying buffers around.  Make SURE it doesn't conflict with any other
-    used memory in your system!
-	A0000		- VGA graphics memory (ok if you don't have VGA)
-        B0000		- Monochrome text mode
-        C0000		\  One of these is your VGA BIOS - usually C0000.
-        E0000		/
-        F0000		- System BIOS
-
-    Anything less than 0xA0000 is, well, a BAD idea since it isn't above
-    640k.
-	- Avery's favourite: 0xD0000
-
-  - the station address:  Every ARCnet card has its own "unique" network
-    address from 0 to 255.  Unlike Ethernet, you can set this address
-    yourself with a jumper or switch (or on some cards, with special
-    software).  Since it's only 8 bits, you can only have 254 ARCnet cards
-    on a network.  DON'T use 0 or 255, since these are reserved (although
-    neat stuff will probably happen if you DO use them).  By the way, if you
-    haven't already guessed, don't set this the same as any other ARCnet on
-    your network!
-	- Avery's favourite:  3 and 4.  Not that it matters.
-
-  - There may be ETS1 and ETS2 settings.  These may or may not make a
-    difference on your card (many manuals call them "reserved"), but are
-    used to change the delays used when powering up a computer on the
-    network.  This is only necessary when wiring VERY long range ARCnet
-    networks, on the order of 4km or so; in any case, the only real
-    requirement here is that all cards on the network with ETS1 and ETS2
-    jumpers have them in the same position.  Chris Hindy <chrish@io.org>
-    sent in a chart with actual values for this:
-	ET1	ET2	Response Time	Reconfiguration Time
-	---	---	-------------	--------------------
-	open	open	74.7us		840us
-	open	closed	283.4us		1680us
-	closed	open	561.8us		1680us
-	closed	closed	1118.6us	1680us
-    
-    Make sure you set ETS1 and ETS2 to the SAME VALUE for all cards on your
-    network.
-    
-Also, on many cards (not mine, though) there are red and green LED's. 
-Vojtech Pavlik <vojtech@suse.cz> tells me this is what they mean:
-	GREEN           RED             Status
-	-----		---		------
-	OFF             OFF             Power off
-	OFF             Short flashes   Cabling problems (broken cable or not
-					  terminated)
-	OFF (short)     ON              Card init
-	ON              ON              Normal state - everything OK, nothing
-					  happens
-	ON              Long flashes    Data transfer
-	ON              OFF             Never happens (maybe when wrong ID)
-
-
-The following is all the specific information people have sent me about
-their own particular ARCnet cards.  It is officially a mess, and contains
-huge amounts of duplicated information.  I have no time to fix it.  If you
-want to, PLEASE DO!  Just send me a 'diff -u' of all your changes.
-
-The model # is listed right above specifics for that card, so you should be
-able to use your text viewer's "search" function to find the entry you want. 
-If you don't KNOW what kind of card you have, try looking through the
-various diagrams to see if you can tell.
-
-If your model isn't listed and/or has different settings, PLEASE PLEASE
-tell me.  I had to figure mine out without the manual, and it WASN'T FUN!
-
-Even if your ARCnet model isn't listed, but has the same jumpers as another
-model that is, please e-mail me to say so.
-
-Cards Listed in this file (in this order, mostly):
-
-	Manufacturer	Model #			Bits
-	------------	-------			----
-	SMC		PC100			8
-	SMC		PC110			8
-	SMC		PC120			8
-	SMC		PC130			8
-	SMC		PC270E			8
-	SMC		PC500			16
-	SMC		PC500Longboard		16
-	SMC		PC550Longboard		16
-	SMC		PC600			16
-	SMC		PC710			8
-	SMC?		LCS-8830(-T)		8/16
-	Puredata	PDI507			8
-	CNet Tech	CN120-Series		8
-	CNet Tech	CN160-Series		16
-	Lantech?	UM9065L chipset		8
-	Acer		5210-003		8
-	Datapoint?	LAN-ARC-8		8
-	Topware		TA-ARC/10		8
-	Thomas-Conrad	500-6242-0097 REV A	8
-	Waterloo?	(C)1985 Waterloo Micro. 8
-	No Name		--			8/16
-	No Name		Taiwan R.O.C?		8
-	No Name		Model 9058		8
-	Tiara		Tiara Lancard?		8
-	
-
-** SMC = Standard Microsystems Corp.
-** CNet Tech = CNet Technology, Inc.
-
-
-Unclassified Stuff
-------------------
-  - Please send any other information you can find.
-  
-  - And some other stuff (more info is welcome!):
-     From: root@ultraworld.xs4all.nl (Timo Hilbrink)
-     To: apenwarr@foxnet.net (Avery Pennarun)
-     Date: Wed, 26 Oct 1994 02:10:32 +0000 (GMT)
-     Reply-To: timoh@xs4all.nl
-
-     [...parts deleted...]
-
-     About the jumpers: On my PC130 there is one more jumper, located near the
-     cable-connector and it's for changing to star or bus topology; 
-     closed: star - open: bus
-     On the PC500 are some more jumper-pins, one block labeled with RX,PDN,TXI
-     and another with ALE,LA17,LA18,LA19 these are undocumented..
-
-     [...more parts deleted...]
-
-     --- CUT ---
-
-
-** Standard Microsystems Corp (SMC) **
-PC100, PC110, PC120, PC130 (8-bit cards)
-PC500, PC600 (16-bit cards)
----------------------------------
-  - mainly from Avery Pennarun <apenwarr@worldvisions.ca>.  Values depicted
-    are from Avery's setup.
-  - special thanks to Timo Hilbrink <timoh@xs4all.nl> for noting that PC120,
-    130, 500, and 600 all have the same switches as Avery's PC100. 
-    PC500/600 have several extra, undocumented pins though. (?)
-  - PC110 settings were verified by Stephen A. Wood <saw@cebaf.gov>
-  - Also, the JP- and S-numbers probably don't match your card exactly.  Try
-    to find jumpers/switches with the same number of settings - it's
-    probably more reliable.
-  
-
-     JP5		       [|]    :    :    :    :
-(IRQ Setting)		      IRQ2  IRQ3 IRQ4 IRQ5 IRQ7
-		Put exactly one jumper on exactly one set of pins.
-
-
-                          1  2   3  4  5  6   7  8  9 10
-     S1                /----------------------------------\
-(I/O and Memory        |  1  1 * 0  0  0  0 * 1  1  0  1  |
- addresses)            \----------------------------------/
-                          |--|   |--------|   |--------|
-                          (a)       (b)           (m)
-                          
-                WARNING.  It's very important when setting these which way
-                you're holding the card, and which way you think is '1'!
-                
-                If you suspect that your settings are not being made
-		correctly, try reversing the direction or inverting the
-		switch positions.
-
-		a: The first digit of the I/O address.
-			Setting		Value
-			-------		-----
-			00		0
-			01		1
-			10		2
-			11		3
-
-		b: The second digit of the I/O address.
-			Setting		Value
-			-------		-----
-			0000		0
-			0001		1
-			0010		2
-			...		...
-			1110		E
-			1111		F
-
-		The I/O address is in the form ab0.  For example, if
-		a is 0x2 and b is 0xE, the address will be 0x2E0.
-
-		DO NOT SET THIS LESS THAN 0x200!!!!!
-
-
-		m: The first digit of the memory address.
-			Setting		Value
-			-------		-----
-			0000		0
-			0001		1
-			0010		2
-			...		...
-			1110		E
-			1111		F
-
-		The memory address is in the form m0000.  For example, if
-		m is D, the address will be 0xD0000.
-
-		DO NOT SET THIS TO C0000, F0000, OR LESS THAN A0000!
-
-                          1  2  3  4  5  6  7  8
-     S2                /--------------------------\
-(Station Address)      |  1  1  0  0  0  0  0  0  |
-                       \--------------------------/
-
-			Setting		Value
-			-------		-----
-			00000000	00
-			10000000	01
-			01000000	02
-			...
-			01111111	FE
-			11111111	FF
-
-		Note that this is binary with the digits reversed!
-
-		DO NOT SET THIS TO 0 OR 255 (0xFF)!
-
-
-*****************************************************************************
-
-** Standard Microsystems Corp (SMC) **
-PC130E/PC270E (8-bit cards)
----------------------------
-  - from Juergen Seifert <seifert@htwm.de>
-
-
-STANDARD MICROSYSTEMS CORPORATION (SMC) ARCNET(R)-PC130E/PC270E
-===============================================================
-
-This description has been written by Juergen Seifert <seifert@htwm.de>
-using information from the following Original SMC Manual 
-
-             "Configuration Guide for
-             ARCNET(R)-PC130E/PC270
-            Network Controller Boards
-                Pub. # 900.044A
-                   June, 1989"
-
-ARCNET is a registered trademark of the Datapoint Corporation
-SMC is a registered trademark of the Standard Microsystems Corporation  
-
-The PC130E is an enhanced version of the PC130 board, is equipped with a 
-standard BNC female connector for connection to RG-62/U coax cable.
-Since this board is designed both for point-to-point connection in star
-networks and for connection to bus networks, it is downwardly compatible 
-with all the other standard boards designed for coax networks (that is,
-the PC120, PC110 and PC100 star topology boards and the PC220, PC210 and 
-PC200 bus topology boards).
-
-The PC270E is an enhanced version of the PC260 board, is equipped with two 
-modular RJ11-type jacks for connection to twisted pair wiring.
-It can be used in a star or a daisy-chained network.
-
-
-         8 7 6 5 4 3 2 1
-    ________________________________________________________________
-   |   |       S1        |                                          |
-   |   |_________________|                                          |
-   |    Offs|Base |I/O Addr                                         |
-   |     RAM Addr |                                              ___|
-   |         ___  ___                                       CR3 |___|
-   |        |   \/   |                                      CR4 |___|
-   |        |  PROM  |                                           ___|
-   |        |        |                                        N |   | 8
-   |        | SOCKET |                                        o |   | 7
-   |        |________|                                        d |   | 6
-   |                   ___________________                    e |   | 5
-   |                  |                   |                   A | S | 4
-   |       |oo| EXT2  |                   |                   d | 2 | 3
-   |       |oo| EXT1  |       SMC         |                   d |   | 2
-   |       |oo| ROM   |      90C63        |                   r |___| 1
-   |       |oo| IRQ7  |                   |               |o|  _____|
-   |       |oo| IRQ5  |                   |               |o| | J1  |
-   |       |oo| IRQ4  |                   |              STAR |_____|
-   |       |oo| IRQ3  |                   |                   | J2  |
-   |       |oo| IRQ2  |___________________|                   |_____|
-   |___                                               ______________|
-       |                                             |
-       |_____________________________________________|
-
-Legend:
-
-SMC 90C63	ARCNET Controller / Transceiver /Logic
-S1	1-3:	I/O Base Address Select
-	4-6:	Memory Base Address Select
-	7-8:	RAM Offset Select
-S2	1-8:	Node ID Select
-EXT		Extended Timeout Select
-ROM		ROM Enable Select
-STAR		Selected - Star Topology	(PC130E only)
-		Deselected - Bus Topology	(PC130E only)
-CR3/CR4		Diagnostic LEDs
-J1		BNC RG62/U Connector		(PC130E only)
-J1		6-position Telephone Jack	(PC270E only)
-J2		6-position Telephone Jack	(PC270E only)
-
-Setting one of the switches to Off/Open means "1", On/Closed means "0".
-
-
-Setting the Node ID
--------------------
-
-The eight switches in group S2 are used to set the node ID.
-These switches work in a way similar to the PC100-series cards; see that
-entry for more information.
-
-
-Setting the I/O Base Address
-----------------------------
-
-The first three switches in switch group S1 are used to select one
-of eight possible I/O Base addresses using the following table
-
-
-   Switch | Hex I/O
-   1 2 3  | Address
-   -------|--------
-   0 0 0  |  260
-   0 0 1  |  290
-   0 1 0  |  2E0  (Manufacturer's default)
-   0 1 1  |  2F0
-   1 0 0  |  300
-   1 0 1  |  350
-   1 1 0  |  380
-   1 1 1  |  3E0
-
-
-Setting the Base Memory (RAM) buffer Address
---------------------------------------------
-
-The memory buffer requires 2K of a 16K block of RAM. The base of this
-16K block can be located in any of eight positions.
-Switches 4-6 of switch group S1 select the Base of the 16K block.
-Within that 16K address space, the buffer may be assigned any one of four 
-positions, determined by the offset, switches 7 and 8 of group S1.
-
-   Switch     | Hex RAM | Hex ROM
-   4 5 6  7 8 | Address | Address *)
-   -----------|---------|-----------
-   0 0 0  0 0 |  C0000  |  C2000
-   0 0 0  0 1 |  C0800  |  C2000
-   0 0 0  1 0 |  C1000  |  C2000
-   0 0 0  1 1 |  C1800  |  C2000
-              |         |
-   0 0 1  0 0 |  C4000  |  C6000
-   0 0 1  0 1 |  C4800  |  C6000
-   0 0 1  1 0 |  C5000  |  C6000
-   0 0 1  1 1 |  C5800  |  C6000
-              |         |
-   0 1 0  0 0 |  CC000  |  CE000
-   0 1 0  0 1 |  CC800  |  CE000
-   0 1 0  1 0 |  CD000  |  CE000
-   0 1 0  1 1 |  CD800  |  CE000
-              |         |
-   0 1 1  0 0 |  D0000  |  D2000  (Manufacturer's default)
-   0 1 1  0 1 |  D0800  |  D2000
-   0 1 1  1 0 |  D1000  |  D2000
-   0 1 1  1 1 |  D1800  |  D2000
-              |         |
-   1 0 0  0 0 |  D4000  |  D6000
-   1 0 0  0 1 |  D4800  |  D6000
-   1 0 0  1 0 |  D5000  |  D6000
-   1 0 0  1 1 |  D5800  |  D6000
-              |         |
-   1 0 1  0 0 |  D8000  |  DA000
-   1 0 1  0 1 |  D8800  |  DA000
-   1 0 1  1 0 |  D9000  |  DA000
-   1 0 1  1 1 |  D9800  |  DA000
-              |         |
-   1 1 0  0 0 |  DC000  |  DE000
-   1 1 0  0 1 |  DC800  |  DE000
-   1 1 0  1 0 |  DD000  |  DE000
-   1 1 0  1 1 |  DD800  |  DE000
-              |         |
-   1 1 1  0 0 |  E0000  |  E2000
-   1 1 1  0 1 |  E0800  |  E2000
-   1 1 1  1 0 |  E1000  |  E2000
-   1 1 1  1 1 |  E1800  |  E2000
-  
-*) To enable the 8K Boot PROM install the jumper ROM.
-   The default is jumper ROM not installed.
-
-
-Setting the Timeouts and Interrupt
-----------------------------------
-
-The jumpers labeled EXT1 and EXT2 are used to determine the timeout 
-parameters. These two jumpers are normally left open.
-
-To select a hardware interrupt level set one (only one!) of the jumpers
-IRQ2, IRQ3, IRQ4, IRQ5, IRQ7. The Manufacturer's default is IRQ2.
- 
-
-Configuring the PC130E for Star or Bus Topology
------------------------------------------------
-
-The single jumper labeled STAR is used to configure the PC130E board for 
-star or bus topology.
-When the jumper is installed, the board may be used in a star network, when 
-it is removed, the board can be used in a bus topology.
-
-
-Diagnostic LEDs
----------------
-
-Two diagnostic LEDs are visible on the rear bracket of the board.
-The green LED monitors the network activity: the red one shows the
-board activity:
-
- Green  | Status               Red      | Status
- -------|-------------------   ---------|-------------------
-  on    | normal activity      flash/on | data transfer
-  blink | reconfiguration      off      | no data transfer;
-  off   | defective board or            | incorrect memory or
-        | node ID is zero               | I/O address
-
-
-*****************************************************************************
-
-** Standard Microsystems Corp (SMC) **
-PC500/PC550 Longboard (16-bit cards)
--------------------------------------
-  - from Juergen Seifert <seifert@htwm.de>
-
-
-STANDARD MICROSYSTEMS CORPORATION (SMC) ARCNET-PC500/PC550 Long Board
-=====================================================================
-
-Note: There is another Version of the PC500 called Short Version, which 
-      is different in hard- and software! The most important differences
-      are:
-      - The long board has no Shared memory.
-      - On the long board the selection of the interrupt is done by binary
-        coded switch, on the short board directly by jumper.
-        
-[Avery's note: pay special attention to that: the long board HAS NO SHARED
-MEMORY.  This means the current Linux-ARCnet driver can't use these cards. 
-I have obtained a PC500Longboard and will be doing some experiments on it in
-the future, but don't hold your breath.  Thanks again to Juergen Seifert for
-his advice about this!]
-
-This description has been written by Juergen Seifert <seifert@htwm.de>
-using information from the following Original SMC Manual 
-
-             "Configuration Guide for
-             SMC ARCNET-PC500/PC550
-         Series Network Controller Boards
-             Pub. # 900.033 Rev. A
-                November, 1989"
-
-ARCNET is a registered trademark of the Datapoint Corporation
-SMC is a registered trademark of the Standard Microsystems Corporation  
-
-The PC500 is equipped with a standard BNC female connector for connection
-to RG-62/U coax cable.
-The board is designed both for point-to-point connection in star networks
-and for connection to bus networks.
-
-The PC550 is equipped with two modular RJ11-type jacks for connection
-to twisted pair wiring.
-It can be used in a star or a daisy-chained (BUS) network.
-
-       1 
-       0 9 8 7 6 5 4 3 2 1     6 5 4 3 2 1
-    ____________________________________________________________________
-   < |         SW1         | |     SW2     |                            |
-   > |_____________________| |_____________|                            |
-   <   IRQ    |I/O Addr                                                 |
-   >                                                                 ___|
-   <                                                            CR4 |___|
-   >                                                            CR3 |___|
-   <                                                                 ___|
-   >                                                              N |   | 8
-   <                                                              o |   | 7
-   >                                                              d | S | 6
-   <                                                              e | W | 5
-   >                                                              A | 3 | 4
-   <                                                              d |   | 3
-   >                                                              d |   | 2
-   <                                                              r |___| 1
-   >                                                        |o|    _____|
-   <                                                        |o|   | J1  |
-   >  3 1                                                   JP6   |_____|
-   < |o|o| JP2                                                    | J2  |
-   > |o|o|                                                        |_____|
-   <  4 2__                                               ______________|
-   >    |  |                                             |
-   <____|  |_____________________________________________|
-
-Legend:
-
-SW1	1-6:	I/O Base Address Select
-	7-10:	Interrupt Select
-SW2	1-6:	Reserved for Future Use
-SW3	1-8:	Node ID Select
-JP2	1-4:	Extended Timeout Select
-JP6		Selected - Star Topology	(PC500 only)
-		Deselected - Bus Topology	(PC500 only)
-CR3	Green	Monitors Network Activity
-CR4	Red	Monitors Board Activity
-J1		BNC RG62/U Connector		(PC500 only)
-J1		6-position Telephone Jack	(PC550 only)
-J2		6-position Telephone Jack	(PC550 only)
-
-Setting one of the switches to Off/Open means "1", On/Closed means "0".
-
-
-Setting the Node ID
--------------------
-
-The eight switches in group SW3 are used to set the node ID. Each node
-attached to the network must have an unique node ID which must be 
-different from 0.
-Switch 1 serves as the least significant bit (LSB).
-
-The node ID is the sum of the values of all switches set to "1"  
-These values are:
-
-    Switch | Value
-    -------|-------
-      1    |   1
-      2    |   2
-      3    |   4
-      4    |   8
-      5    |  16
-      6    |  32
-      7    |  64
-      8    | 128
-
-Some Examples:
-
-    Switch         | Hex     | Decimal 
-   8 7 6 5 4 3 2 1 | Node ID | Node ID
-   ----------------|---------|---------
-   0 0 0 0 0 0 0 0 |    not allowed
-   0 0 0 0 0 0 0 1 |    1    |    1 
-   0 0 0 0 0 0 1 0 |    2    |    2
-   0 0 0 0 0 0 1 1 |    3    |    3
-       . . .       |         |
-   0 1 0 1 0 1 0 1 |   55    |   85
-       . . .       |         |
-   1 0 1 0 1 0 1 0 |   AA    |  170
-       . . .       |         |  
-   1 1 1 1 1 1 0 1 |   FD    |  253
-   1 1 1 1 1 1 1 0 |   FE    |  254
-   1 1 1 1 1 1 1 1 |   FF    |  255 
-
-
-Setting the I/O Base Address
-----------------------------
-
-The first six switches in switch group SW1 are used to select one
-of 32 possible I/O Base addresses using the following table
-
-   Switch       | Hex I/O
-   6 5  4 3 2 1 | Address
-   -------------|--------
-   0 1  0 0 0 0 |  200
-   0 1  0 0 0 1 |  210
-   0 1  0 0 1 0 |  220
-   0 1  0 0 1 1 |  230
-   0 1  0 1 0 0 |  240
-   0 1  0 1 0 1 |  250
-   0 1  0 1 1 0 |  260
-   0 1  0 1 1 1 |  270
-   0 1  1 0 0 0 |  280
-   0 1  1 0 0 1 |  290
-   0 1  1 0 1 0 |  2A0
-   0 1  1 0 1 1 |  2B0
-   0 1  1 1 0 0 |  2C0
-   0 1  1 1 0 1 |  2D0
-   0 1  1 1 1 0 |  2E0 (Manufacturer's default)
-   0 1  1 1 1 1 |  2F0
-   1 1  0 0 0 0 |  300
-   1 1  0 0 0 1 |  310
-   1 1  0 0 1 0 |  320
-   1 1  0 0 1 1 |  330
-   1 1  0 1 0 0 |  340
-   1 1  0 1 0 1 |  350
-   1 1  0 1 1 0 |  360
-   1 1  0 1 1 1 |  370
-   1 1  1 0 0 0 |  380
-   1 1  1 0 0 1 |  390
-   1 1  1 0 1 0 |  3A0
-   1 1  1 0 1 1 |  3B0
-   1 1  1 1 0 0 |  3C0
-   1 1  1 1 0 1 |  3D0
-   1 1  1 1 1 0 |  3E0
-   1 1  1 1 1 1 |  3F0
-
-
-Setting the Interrupt
----------------------
-
-Switches seven through ten of switch group SW1 are used to select the 
-interrupt level. The interrupt level is binary coded, so selections 
-from 0 to 15 would be possible, but only the following eight values will
-be supported: 3, 4, 5, 7, 9, 10, 11, 12.
-
-   Switch   | IRQ
-   10 9 8 7 | 
-   ---------|-------- 
-    0 0 1 1 |  3
-    0 1 0 0 |  4
-    0 1 0 1 |  5
-    0 1 1 1 |  7
-    1 0 0 1 |  9 (=2) (default)
-    1 0 1 0 | 10
-    1 0 1 1 | 11
-    1 1 0 0 | 12
-
-
-Setting the Timeouts 
---------------------
-
-The two jumpers JP2 (1-4) are used to determine the timeout parameters. 
-These two jumpers are normally left open.
-Refer to the COM9026 Data Sheet for alternate configurations.
-
-
-Configuring the PC500 for Star or Bus Topology
-----------------------------------------------
-
-The single jumper labeled JP6 is used to configure the PC500 board for 
-star or bus topology.
-When the jumper is installed, the board may be used in a star network, when 
-it is removed, the board can be used in a bus topology.
-
-
-Diagnostic LEDs
----------------
-
-Two diagnostic LEDs are visible on the rear bracket of the board.
-The green LED monitors the network activity: the red one shows the
-board activity:
-
- Green  | Status               Red      | Status
- -------|-------------------   ---------|-------------------
-  on    | normal activity      flash/on | data transfer
-  blink | reconfiguration      off      | no data transfer;
-  off   | defective board or            | incorrect memory or
-        | node ID is zero               | I/O address
-
-
-*****************************************************************************
-
-** SMC **
-PC710 (8-bit card)
-------------------
-  - from J.S. van Oosten <jvoosten@compiler.tdcnet.nl>
-  
-Note: this data is gathered by experimenting and looking at info of other
-cards. However, I'm sure I got 99% of the settings right.
-
-The SMC710 card resembles the PC270 card, but is much more basic (i.e. no
-LEDs, RJ11 jacks, etc.) and 8 bit. Here's a little drawing:
-
-    _______________________________________   
-   | +---------+  +---------+              |____
-   | |   S2    |  |   S1    |              |
-   | +---------+  +---------+              |
-   |                                       |
-   |  +===+    __                          |
-   |  | R |   |  | X-tal                 ###___
-   |  | O |   |__|                      ####__'|
-   |  | M |    ||                        ###
-   |  +===+                                |
-   |                                       |
-   |   .. JP1   +----------+               |
-   |   ..       | big chip |               |   
-   |   ..       |  90C63   |               |
-   |   ..       |          |               |
-   |   ..       +----------+               |
-    -------                     -----------
-           |||||||||||||||||||||
-
-The row of jumpers at JP1 actually consists of 8 jumpers, (sometimes
-labelled) the same as on the PC270, from top to bottom: EXT2, EXT1, ROM,
-IRQ7, IRQ5, IRQ4, IRQ3, IRQ2 (gee, wonder what they would do? :-) )
-
-S1 and S2 perform the same function as on the PC270, only their numbers
-are swapped (S1 is the nodeaddress, S2 sets IO- and RAM-address).
-
-I know it works when connected to a PC110 type ARCnet board.
-
-	
-*****************************************************************************
-
-** Possibly SMC **
-LCS-8830(-T) (8 and 16-bit cards)
----------------------------------
-  - from Mathias Katzer <mkatzer@HRZ.Uni-Bielefeld.DE>
-  - Marek Michalkiewicz <marekm@i17linuxb.ists.pwr.wroc.pl> says the
-    LCS-8830 is slightly different from LCS-8830-T.  These are 8 bit, BUS
-    only (the JP0 jumper is hardwired), and BNC only.
-	
-This is a LCS-8830-T made by SMC, I think ('SMC' only appears on one PLCC,
-nowhere else, not even on the few Xeroxed sheets from the manual).
-
-SMC ARCnet Board Type LCS-8830-T
-
-   ------------------------------------
-  |                                    |
-  |              JP3 88  8 JP2         |
-  |       #####      | \               |
-  |       #####    ET1 ET2          ###|
-  |                              8  ###|
-  |  U3   SW 1                  JP0 ###|  Phone Jacks
-  |  --                             ###|
-  | |  |                               |
-  | |  |   SW2                         |
-  | |  |                               |
-  | |  |  #####                        |
-  |  --   #####                       ####  BNC Connector 
-  |                                   ####
-  |   888888 JP1                       |
-  |   234567                           |
-   --                           -------
-     |||||||||||||||||||||||||||
-      --------------------------
-
-
-SW1: DIP-Switches for Station Address
-SW2: DIP-Switches for Memory Base and I/O Base addresses
-
-JP0: If closed, internal termination on (default open)
-JP1: IRQ Jumpers
-JP2: Boot-ROM enabled if closed
-JP3: Jumpers for response timeout
- 
-U3: Boot-ROM Socket          
-
-
-ET1 ET2     Response Time     Idle Time    Reconfiguration Time
-
-               78                86               840
- X            285               316              1680
-     X        563               624              1680
- X   X       1130              1237              1680
-
-(X means closed jumper)
-
-(DIP-Switch downwards means "0")
-
-The station address is binary-coded with SW1.
-
-The I/O base address is coded with DIP-Switches 6,7 and 8 of SW2:
-
-Switches        Base
-678             Address
-000		260-26f
-100		290-29f
-010		2e0-2ef
-110		2f0-2ff
-001		300-30f
-101		350-35f
-011		380-38f
-111 		3e0-3ef
-
-
-DIP Switches 1-5 of SW2 encode the RAM and ROM Address Range:
-
-Switches        RAM           ROM
-12345           Address Range  Address Range
-00000		C:0000-C:07ff	C:2000-C:3fff
-10000		C:0800-C:0fff
-01000		C:1000-C:17ff
-11000		C:1800-C:1fff
-00100		C:4000-C:47ff	C:6000-C:7fff
-10100		C:4800-C:4fff
-01100		C:5000-C:57ff 
-11100		C:5800-C:5fff
-00010		C:C000-C:C7ff	C:E000-C:ffff
-10010		C:C800-C:Cfff
-01010		C:D000-C:D7ff
-11010		C:D800-C:Dfff
-00110		D:0000-D:07ff	D:2000-D:3fff
-10110		D:0800-D:0fff
-01110		D:1000-D:17ff
-11110		D:1800-D:1fff
-00001		D:4000-D:47ff	D:6000-D:7fff
-10001		D:4800-D:4fff
-01001		D:5000-D:57ff
-11001		D:5800-D:5fff
-00101		D:8000-D:87ff	D:A000-D:bfff
-10101		D:8800-D:8fff
-01101		D:9000-D:97ff
-11101		D:9800-D:9fff 
-00011		D:C000-D:c7ff	D:E000-D:ffff
-10011		D:C800-D:cfff
-01011		D:D000-D:d7ff
-11011		D:D800-D:dfff
-00111		E:0000-E:07ff	E:2000-E:3fff
-10111		E:0800-E:0fff
-01111		E:1000-E:17ff
-11111		E:1800-E:1fff
-
-
-*****************************************************************************
-
-** PureData Corp **
-PDI507 (8-bit card)
---------------------
-  - from Mark Rejhon <mdrejhon@magi.com> (slight modifications by Avery)
-  - Avery's note: I think PDI508 cards (but definitely NOT PDI508Plus cards)
-    are mostly the same as this.  PDI508Plus cards appear to be mainly
-    software-configured.
-
-Jumpers:
-	There is a jumper array at the bottom of the card, near the edge
-        connector.  This array is labelled J1.  They control the IRQs and
-        something else.  Put only one jumper on the IRQ pins.
-
-	ETS1, ETS2 are for timing on very long distance networks.  See the
-	more general information near the top of this file.
-
-	There is a J2 jumper on two pins.  A jumper should be put on them,
-        since it was already there when I got the card.  I don't know what
-        this jumper is for though.
-
-	There is a two-jumper array for J3.  I don't know what it is for,
-        but there were already two jumpers on it when I got the card.  It's
-        a six pin grid in a two-by-three fashion.  The jumpers were
-        configured as follows:
-
-	   .-------.
-	 o | o   o |
-	   :-------:    ------> Accessible end of card with connectors
-	 o | o   o |             in this direction ------->
-	   `-------'
-
-Carl de Billy <CARL@carainfo.com> explains J3 and J4:
-
-	J3 Diagram:
-
-           .-------.
-         o | o   o |
-           :-------:    TWIST Technology
-         o | o   o |
-           `-------'
-           .-------.
-           | o   o | o
-           :-------:    COAX Technology
-           | o   o | o
-           `-------'
-
-  - If using coax cable in a bus topology the J4 jumper must be removed;
-    place it on one pin.
-
-  - If using bus topology with twisted pair wiring move the J3 
-    jumpers so they connect the middle pin and the pins closest to the RJ11
-    Connectors.  Also the J4 jumper must be removed; place it on one pin of
-    J4 jumper for storage.
-
-  - If using  star topology with twisted pair wiring move the J3 
-    jumpers so they connect the middle pin and the pins closest to the RJ11
-    connectors.
-
-
-DIP Switches:
-
-	The DIP switches accessible on the accessible end of the card while
-        it is installed, is used to set the ARCnet address.  There are 8
-        switches.  Use an address from 1 to 254.
-
-	Switch No.
-	12345678	ARCnet address
-	-----------------------------------------
-	00000000	FF  	(Don't use this!)
-	00000001	FE
-	00000010	FD
-	....
-	11111101	2	
-	11111110	1
-	11111111	0	(Don't use this!)
-
-	There is another array of eight DIP switches at the top of the
-        card.  There are five labelled MS0-MS4 which seem to control the
-        memory address, and another three labelled IO0-IO2 which seem to
-        control the base I/O address of the card.
-
-	This was difficult to test by trial and error, and the I/O addresses
-        are in a weird order.  This was tested by setting the DIP switches,
-        rebooting the computer, and attempting to load ARCETHER at various
-        addresses (mostly between 0x200 and 0x400).  The address that caused
-        the red transmit LED to blink, is the one that I thought works.
-
-	Also, the address 0x3D0 seem to have a special meaning, since the
-        ARCETHER packet driver loaded fine, but without the red LED
-        blinking.  I don't know what 0x3D0 is for though.  I recommend using
-        an address of 0x300 since Windows may not like addresses below
-        0x300.
-
-	IO Switch No.
-	210             I/O address
-	-------------------------------
-	111             0x260
-	110             0x290
-	101             0x2E0
-	100             0x2F0
-	011             0x300
-	010             0x350
-	001             0x380
-	000             0x3E0
-
-	The memory switches set a reserved address space of 0x1000 bytes
-        (0x100 segment units, or 4k).  For example if I set an address of
-        0xD000, it will use up addresses 0xD000 to 0xD100.
-
-	The memory switches were tested by booting using QEMM386 stealth,
-        and using LOADHI to see what address automatically became excluded
-        from the upper memory regions, and then attempting to load ARCETHER
-        using these addresses.
-
-	I recommend using an ARCnet memory address of 0xD000, and putting
-        the EMS page frame at 0xC000 while using QEMM stealth mode.  That
-        way, you get contiguous high memory from 0xD100 almost all the way
-        the end of the megabyte.
-
-	Memory Switch 0 (MS0) didn't seem to work properly when set to OFF
-        on my card.  It could be malfunctioning on my card.  Experiment with
-        it ON first, and if it doesn't work, set it to OFF.  (It may be a
-        modifier for the 0x200 bit?)
-
-	MS Switch No.
-	43210           Memory address
-	--------------------------------
-	00001           0xE100  (guessed - was not detected by QEMM)
-	00011           0xE000  (guessed - was not detected by QEMM)
-	00101           0xDD00
-	00111           0xDC00
-	01001           0xD900
-	01011           0xD800
-	01101           0xD500
-	01111           0xD400
-	10001           0xD100
-	10011           0xD000
-	10101           0xCD00
-	10111           0xCC00
-	11001           0xC900 (guessed - crashes tested system)
-	11011           0xC800 (guessed - crashes tested system)
-	11101           0xC500 (guessed - crashes tested system)
-	11111           0xC400 (guessed - crashes tested system)
-	
-	
-*****************************************************************************
-
-** CNet Technology Inc. **
-120 Series (8-bit cards)
-------------------------
-  - from Juergen Seifert <seifert@htwm.de>
-
-
-CNET TECHNOLOGY INC. (CNet) ARCNET 120A SERIES
-==============================================
-
-This description has been written by Juergen Seifert <seifert@htwm.de>
-using information from the following Original CNet Manual 
-
-              "ARCNET
-            USER'S MANUAL 
-                for
-               CN120A
-               CN120AB
-               CN120TP
-               CN120ST
-               CN120SBT
-             P/N:12-01-0007
-             Revision 3.00"
-
-ARCNET is a registered trademark of the Datapoint Corporation
-
-P/N 120A   ARCNET 8 bit XT/AT Star
-P/N 120AB  ARCNET 8 bit XT/AT Bus
-P/N 120TP  ARCNET 8 bit XT/AT Twisted Pair
-P/N 120ST  ARCNET 8 bit XT/AT Star, Twisted Pair
-P/N 120SBT ARCNET 8 bit XT/AT Star, Bus, Twisted Pair
-
-    __________________________________________________________________
-   |                                                                  |
-   |                                                               ___|
-   |                                                          LED |___|
-   |                                                               ___|
-   |                                                            N |   | ID7
-   |                                                            o |   | ID6
-   |                                                            d | S | ID5
-   |                                                            e | W | ID4
-   |                     ___________________                    A | 2 | ID3
-   |                    |                   |                   d |   | ID2
-   |                    |                   |  1 2 3 4 5 6 7 8  d |   | ID1
-   |                    |                   | _________________ r |___| ID0
-   |                    |      90C65        ||       SW1       |  ____|
-   |  JP 8 7            |                   ||_________________| |    |
-   |    |o|o|  JP1      |                   |                    | J2 |
-   |    |o|o|  |oo|     |                   |         JP 1 1 1   |    |
-   |   ______________   |                   |            0 1 2   |____|
-   |  |  PROM        |  |___________________|           |o|o|o|  _____|
-   |  >  SOCKET      |  JP 6 5 4 3 2                    |o|o|o| | J1  |
-   |  |______________|    |o|o|o|o|o|                   |o|o|o| |_____|
-   |_____                 |o|o|o|o|o|                   ______________|
-         |                                             |
-         |_____________________________________________|
-
-Legend:
-
-90C65       ARCNET Probe
-S1  1-5:    Base Memory Address Select
-    6-8:    Base I/O Address Select
-S2  1-8:    Node ID Select (ID0-ID7)
-JP1     ROM Enable Select
-JP2     IRQ2
-JP3     IRQ3
-JP4     IRQ4
-JP5     IRQ5
-JP6     IRQ7
-JP7/JP8     ET1, ET2 Timeout Parameters
-JP10/JP11   Coax / Twisted Pair Select  (CN120ST/SBT only)
-JP12        Terminator Select       (CN120AB/ST/SBT only)
-J1      BNC RG62/U Connector        (all except CN120TP)
-J2      Two 6-position Telephone Jack   (CN120TP/ST/SBT only)
-
-Setting one of the switches to Off means "1", On means "0".
-
-
-Setting the Node ID
--------------------
-
-The eight switches in SW2 are used to set the node ID. Each node attached
-to the network must have an unique node ID which must be different from 0.
-Switch 1 (ID0) serves as the least significant bit (LSB).
-
-The node ID is the sum of the values of all switches set to "1"  
-These values are:
-
-   Switch | Label | Value
-   -------|-------|-------
-     1    | ID0   |   1
-     2    | ID1   |   2
-     3    | ID2   |   4
-     4    | ID3   |   8
-     5    | ID4   |  16
-     6    | ID5   |  32
-     7    | ID6   |  64
-     8    | ID7   | 128
-
-Some Examples:
-
-    Switch         | Hex     | Decimal 
-   8 7 6 5 4 3 2 1 | Node ID | Node ID
-   ----------------|---------|---------
-   0 0 0 0 0 0 0 0 |    not allowed
-   0 0 0 0 0 0 0 1 |    1    |    1 
-   0 0 0 0 0 0 1 0 |    2    |    2
-   0 0 0 0 0 0 1 1 |    3    |    3
-       . . .       |         |
-   0 1 0 1 0 1 0 1 |   55    |   85
-       . . .       |         |
-   1 0 1 0 1 0 1 0 |   AA    |  170
-       . . .       |         |  
-   1 1 1 1 1 1 0 1 |   FD    |  253
-   1 1 1 1 1 1 1 0 |   FE    |  254
-   1 1 1 1 1 1 1 1 |   FF    |  255
-
-
-Setting the I/O Base Address
-----------------------------
-
-The last three switches in switch block SW1 are used to select one
-of eight possible I/O Base addresses using the following table
-
-
-   Switch      | Hex I/O
-    6   7   8  | Address
-   ------------|--------
-   ON  ON  ON  |  260
-   OFF ON  ON  |  290
-   ON  OFF ON  |  2E0  (Manufacturer's default)
-   OFF OFF ON  |  2F0
-   ON  ON  OFF |  300
-   OFF ON  OFF |  350
-   ON  OFF OFF |  380
-   OFF OFF OFF |  3E0
-
-
-Setting the Base Memory (RAM) buffer Address
---------------------------------------------
-
-The memory buffer (RAM) requires 2K. The base of this buffer can be 
-located in any of eight positions. The address of the Boot Prom is
-memory base + 8K or memory base + 0x2000.
-Switches 1-5 of switch block SW1 select the Memory Base address.
-
-   Switch              | Hex RAM | Hex ROM
-    1   2   3   4   5  | Address | Address *)
-   --------------------|---------|-----------
-   ON  ON  ON  ON  ON  |  C0000  |  C2000
-   ON  ON  OFF ON  ON  |  C4000  |  C6000
-   ON  ON  ON  OFF ON  |  CC000  |  CE000
-   ON  ON  OFF OFF ON  |  D0000  |  D2000  (Manufacturer's default)
-   ON  ON  ON  ON  OFF |  D4000  |  D6000
-   ON  ON  OFF ON  OFF |  D8000  |  DA000
-   ON  ON  ON  OFF OFF |  DC000  |  DE000
-   ON  ON  OFF OFF OFF |  E0000  |  E2000
-  
-*) To enable the Boot ROM install the jumper JP1
-
-Note: Since the switches 1 and 2 are always set to ON it may be possible
-      that they can be used to add an offset of 2K, 4K or 6K to the base
-      address, but this feature is not documented in the manual and I
-      haven't tested it yet.
-
-
-Setting the Interrupt Line
---------------------------
-
-To select a hardware interrupt level install one (only one!) of the jumpers
-JP2, JP3, JP4, JP5, JP6. JP2 is the default.
-
-   Jumper | IRQ     
-   -------|-----
-     2    |  2
-     3    |  3
-     4    |  4
-     5    |  5
-     6    |  7
-
-
-Setting the Internal Terminator on CN120AB/TP/SBT
---------------------------------------------------
-
-The jumper JP12 is used to enable the internal terminator. 
-
-                         -----
-       0                |  0  |     
-     -----   ON         |     |  ON
-    |  0  |             |  0  |
-    |     |  OFF         -----   OFF
-    |  0  |                0
-     -----
-   Terminator          Terminator 
-    disabled            enabled
-  
-
-Selecting the Connector Type on CN120ST/SBT
--------------------------------------------
-
-     JP10    JP11        JP10    JP11
-                         -----   -----
-       0       0        |  0  | |  0  |       
-     -----   -----      |     | |     |
-    |  0  | |  0  |     |  0  | |  0  |
-    |     | |     |      -----   -----
-    |  0  | |  0  |        0       0 
-     -----   -----
-     Coaxial Cable       Twisted Pair Cable 
-       (Default)
-
-
-Setting the Timeout Parameters
-------------------------------
-
-The jumpers labeled EXT1 and EXT2 are used to determine the timeout 
-parameters. These two jumpers are normally left open.
-
-
-
-*****************************************************************************
-
-** CNet Technology Inc. **
-160 Series (16-bit cards)
--------------------------
-  - from Juergen Seifert <seifert@htwm.de>
-
-CNET TECHNOLOGY INC. (CNet) ARCNET 160A SERIES
-==============================================
-
-This description has been written by Juergen Seifert <seifert@htwm.de>
-using information from the following Original CNet Manual 
-
-              "ARCNET
-            USER'S MANUAL 
-                for
-               CN160A
-               CN160AB
-               CN160TP
-             P/N:12-01-0006
-             Revision 3.00"
-
-ARCNET is a registered trademark of the Datapoint Corporation
-
-P/N 160A   ARCNET 16 bit XT/AT Star
-P/N 160AB  ARCNET 16 bit XT/AT Bus
-P/N 160TP  ARCNET 16 bit XT/AT Twisted Pair
-
-   ___________________________________________________________________
-  <                             _________________________          ___|
-  >               |oo| JP2     |                         |    LED |___|
-  <               |oo| JP1     |        9026             |    LED |___|
-  >                            |_________________________|         ___|
-  <                                                             N |   | ID7
-  >                                                      1      o |   | ID6
-  <                                    1 2 3 4 5 6 7 8 9 0      d | S | ID5
-  >         _______________           _____________________     e | W | ID4
-  <        |     PROM      |         |         SW1         |    A | 2 | ID3
-  >        >    SOCKET     |         |_____________________|    d |   | ID2
-  <        |_______________|          | IO-Base   | MEM   |     d |   | ID1
-  >                                                             r |___| ID0
-  <                                                               ____|
-  >                                                              |    |
-  <                                                              | J1 |
-  >                                                              |    |
-  <                                                              |____|
-  >                            1 1 1 1                                |
-  <  3 4 5 6 7      JP     8 9 0 1 2 3                                |
-  > |o|o|o|o|o|           |o|o|o|o|o|o|                               |
-  < |o|o|o|o|o| __        |o|o|o|o|o|o|                    ___________|
-  >            |  |                                       |
-  <____________|  |_______________________________________|
-
-Legend:
-
-9026            ARCNET Probe
-SW1 1-6:    Base I/O Address Select
-    7-10:   Base Memory Address Select
-SW2 1-8:    Node ID Select (ID0-ID7)
-JP1/JP2     ET1, ET2 Timeout Parameters
-JP3-JP13    Interrupt Select
-J1      BNC RG62/U Connector        (CN160A/AB only)
-J1      Two 6-position Telephone Jack   (CN160TP only)
-LED
-
-Setting one of the switches to Off means "1", On means "0".
-
-
-Setting the Node ID
--------------------
-
-The eight switches in SW2 are used to set the node ID. Each node attached
-to the network must have an unique node ID which must be different from 0.
-Switch 1 (ID0) serves as the least significant bit (LSB).
-
-The node ID is the sum of the values of all switches set to "1"  
-These values are:
-
-   Switch | Label | Value
-   -------|-------|-------
-     1    | ID0   |   1
-     2    | ID1   |   2
-     3    | ID2   |   4
-     4    | ID3   |   8
-     5    | ID4   |  16
-     6    | ID5   |  32
-     7    | ID6   |  64
-     8    | ID7   | 128
-
-Some Examples:
-
-    Switch         | Hex     | Decimal 
-   8 7 6 5 4 3 2 1 | Node ID | Node ID
-   ----------------|---------|---------
-   0 0 0 0 0 0 0 0 |    not allowed
-   0 0 0 0 0 0 0 1 |    1    |    1 
-   0 0 0 0 0 0 1 0 |    2    |    2
-   0 0 0 0 0 0 1 1 |    3    |    3
-       . . .       |         |
-   0 1 0 1 0 1 0 1 |   55    |   85
-       . . .       |         |
-   1 0 1 0 1 0 1 0 |   AA    |  170
-       . . .       |         |  
-   1 1 1 1 1 1 0 1 |   FD    |  253
-   1 1 1 1 1 1 1 0 |   FE    |  254
-   1 1 1 1 1 1 1 1 |   FF    |  255
-
-
-Setting the I/O Base Address
-----------------------------
-
-The first six switches in switch block SW1 are used to select the I/O Base
-address using the following table:
-
-             Switch        | Hex I/O
-    1   2   3   4   5   6  | Address
-   ------------------------|--------
-   OFF ON  ON  OFF OFF ON  |  260
-   OFF ON  OFF ON  ON  OFF |  290
-   OFF ON  OFF OFF OFF ON  |  2E0  (Manufacturer's default)
-   OFF ON  OFF OFF OFF OFF |  2F0
-   OFF OFF ON  ON  ON  ON  |  300
-   OFF OFF ON  OFF ON  OFF |  350
-   OFF OFF OFF ON  ON  ON  |  380
-   OFF OFF OFF OFF OFF ON  |  3E0
-
-Note: Other IO-Base addresses seem to be selectable, but only the above
-      combinations are documented.
-
-
-Setting the Base Memory (RAM) buffer Address
---------------------------------------------
-
-The switches 7-10 of switch block SW1 are used to select the Memory
-Base address of the RAM (2K) and the PROM.
-
-   Switch          | Hex RAM | Hex ROM
-    7   8   9  10  | Address | Address
-   ----------------|---------|-----------
-   OFF OFF ON  ON  |  C0000  |  C8000
-   OFF OFF ON  OFF |  D0000  |  D8000 (Default)
-   OFF OFF OFF ON  |  E0000  |  E8000
-
-Note: Other MEM-Base addresses seem to be selectable, but only the above
-      combinations are documented.
-
-
-Setting the Interrupt Line
---------------------------
-
-To select a hardware interrupt level install one (only one!) of the jumpers
-JP3 through JP13 using the following table:
-
-   Jumper | IRQ     
-   -------|-----------------
-     3    |  14
-     4    |  15
-     5    |  12
-     6    |  11
-     7    |  10
-     8    |   3
-     9    |   4
-    10    |   5
-    11    |   6
-    12    |   7
-    13    |   2 (=9) Default!
-
-Note:  - Do not use JP11=IRQ6, it may conflict with your Floppy Disk
-         Controller
-       - Use JP3=IRQ14 only, if you don't have an IDE-, MFM-, or RLL-
-         Hard Disk, it may conflict with their controllers
-
-
-Setting the Timeout Parameters
-------------------------------
-
-The jumpers labeled JP1 and JP2 are used to determine the timeout
-parameters. These two jumpers are normally left open.
-
-
-*****************************************************************************
-
-** Lantech **
-8-bit card, unknown model
--------------------------
-  - from Vlad Lungu <vlungu@ugal.ro> - his e-mail address seemed broken at
-    the time I tried to reach him.  Sorry Vlad, if you didn't get my reply.
-
-   ________________________________________________________________
-   |   1         8                                                 |
-   |   ___________                                               __|
-   |   |   SW1    |                                         LED |__|
-   |   |__________|                                                |
-   |                                                            ___|
-   |                _____________________                       |S | 8
-   |                |                   |                       |W |
-   |                |                   |                       |2 |
-   |                |                   |                       |__| 1
-   |                |      UM9065L      |     |o|  JP4         ____|____
-   |                |                   |     |o|              |  CN    |
-   |                |                   |                      |________|
-   |                |                   |                          |
-   |                |___________________|                          |
-   |                                                               |
-   |                                                               |
-   |      _____________                                            |
-   |      |            |                                           |
-   |      |    PROM    |        |ooooo|  JP6                       |
-   |      |____________|        |ooooo|                            |
-   |_____________                                             _   _|
-                |____________________________________________| |__|
-
-
-UM9065L : ARCnet Controller
-
-SW 1    : Shared Memory Address and I/O Base
-
-        ON=0
-
-        12345|Memory Address
-        -----|--------------
-        00001|  D4000
-        00010|  CC000
-        00110|  D0000
-        01110|  D1000
-        01101|  D9000
-        10010|  CC800
-        10011|  DC800
-        11110|  D1800
-
-It seems that the bits are considered in reverse order.  Also, you must
-observe that some of those addresses are unusual and I didn't probe them; I
-used a memory dump in DOS to identify them.  For the 00000 configuration and
-some others that I didn't write here the card seems to conflict with the
-video card (an S3 GENDAC). I leave the full decoding of those addresses to
-you.
-
-        678| I/O Address
-        ---|------------
-        000|    260
-        001|    failed probe
-        010|    2E0
-        011|    380
-        100|    290
-        101|    350
-        110|    failed probe
-        111|    3E0
-
-SW 2  : Node ID (binary coded)
-
-JP 4  : Boot PROM enable   CLOSE - enabled
-                           OPEN  - disabled
-
-JP 6  : IRQ set (ONLY ONE jumper on 1-5 for IRQ 2-6)
-
-
-*****************************************************************************
-
-** Acer **
-8-bit card, Model 5210-003
---------------------------
-  - from Vojtech Pavlik <vojtech@suse.cz> using portions of the existing
-    arcnet-hardware file.
-
-This is a 90C26 based card.  Its configuration seems similar to the SMC
-PC100, but has some additional jumpers I don't know the meaning of.
-
-               __
-              |  |
-   ___________|__|_________________________
-  |         |      |                       |
-  |         | BNC  |                       |
-  |         |______|                    ___|
-  |  _____________________             |___  
-  | |                     |                |
-  | | Hybrid IC           |                |
-  | |                     |       o|o J1   |
-  | |_____________________|       8|8      |
-  |                               8|8 J5   |
-  |                               o|o      |
-  |                               8|8      |
-  |__                             8|8      |
- (|__| LED                        o|o      |
-  |                               8|8      |
-  |                               8|8 J15  |
-  |                                        |
-  |                    _____               |
-  |                   |     |   _____      |
-  |                   |     |  |     |  ___|
-  |                   |     |  |     | |    
-  |  _____            | ROM |  | UFS | |    
-  | |     |           |     |  |     | |   
-  | |     |     ___   |     |  |     | |   
-  | |     |    |   |  |__.__|  |__.__| |   
-  | | NCR |    |XTL|   _____    _____  |   
-  | |     |    |___|  |     |  |     | |   
-  | |90C26|           |     |  |     | |   
-  | |     |           | RAM |  | UFS | |   
-  | |     | J17 o|o   |     |  |     | |   
-  | |     | J16 o|o   |     |  |     | |   
-  | |__.__|           |__.__|  |__.__| |   
-  |  ___                               |   
-  | |   |8                             |   
-  | |SW2|                              |   
-  | |   |                              |   
-  | |___|1                             |   
-  |  ___                               |   
-  | |   |10           J18 o|o          |   
-  | |   |                 o|o          |   
-  | |SW1|                 o|o          |   
-  | |   |             J21 o|o          |   
-  | |___|1                             |   
-  |                                    |   
-  |____________________________________|   
-
-
-Legend:
-
-90C26       ARCNET Chip
-XTL         20 MHz Crystal
-SW1 1-6     Base I/O Address Select
-    7-10    Memory Address Select
-SW2 1-8     Node ID Select (ID0-ID7)
-J1-J5       IRQ Select
-J6-J21      Unknown (Probably extra timeouts & ROM enable ...)
-LED1        Activity LED 
-BNC         Coax connector (STAR ARCnet)
-RAM         2k of SRAM
-ROM         Boot ROM socket
-UFS         Unidentified Flying Sockets
-
-
-Setting the Node ID
--------------------
-
-The eight switches in SW2 are used to set the node ID. Each node attached
-to the network must have an unique node ID which must not be 0.
-Switch 1 (ID0) serves as the least significant bit (LSB).
-
-Setting one of the switches to OFF means "1", ON means "0".
-
-The node ID is the sum of the values of all switches set to "1"
-These values are:
-
-   Switch | Value
-   -------|-------
-     1    |   1
-     2    |   2
-     3    |   4
-     4    |   8
-     5    |  16
-     6    |  32
-     7    |  64
-     8    | 128
-
-Don't set this to 0 or 255; these values are reserved.
-
-
-Setting the I/O Base Address
-----------------------------
-
-The switches 1 to 6 of switch block SW1 are used to select one
-of 32 possible I/O Base addresses using the following tables
-   
-          | Hex
-   Switch | Value
-   -------|-------
-     1    | 200  
-     2    | 100  
-     3    |  80  
-     4    |  40  
-     5    |  20  
-     6    |  10 
-
-The I/O address is sum of all switches set to "1". Remember that
-the I/O address space bellow 0x200 is RESERVED for mainboard, so
-switch 1 should be ALWAYS SET TO OFF. 
-
-
-Setting the Base Memory (RAM) buffer Address
---------------------------------------------
-
-The memory buffer (RAM) requires 2K. The base of this buffer can be
-located in any of sixteen positions. However, the addresses below
-A0000 are likely to cause system hang because there's main RAM.
-
-Jumpers 7-10 of switch block SW1 select the Memory Base address.
-
-   Switch          | Hex RAM
-    7   8   9  10  | Address
-   ----------------|---------
-   OFF OFF OFF OFF |  F0000 (conflicts with main BIOS)
-   OFF OFF OFF ON  |  E0000 
-   OFF OFF ON  OFF |  D0000
-   OFF OFF ON  ON  |  C0000 (conflicts with video BIOS)
-   OFF ON  OFF OFF |  B0000 (conflicts with mono video)
-   OFF ON  OFF ON  |  A0000 (conflicts with graphics)
-
-
-Setting the Interrupt Line
---------------------------
-
-Jumpers 1-5 of the jumper block J1 control the IRQ level. ON means 
-shorted, OFF means open.
-
-    Jumper              |  IRQ
-    1   2   3   4   5   |
-   ----------------------------
-    ON  OFF OFF OFF OFF |  7
-    OFF ON  OFF OFF OFF |  5
-    OFF OFF ON  OFF OFF |  4
-    OFF OFF OFF ON  OFF |  3
-    OFF OFF OFF OFF ON  |  2
-
-
-Unknown jumpers & sockets
--------------------------
-
-I know nothing about these. I just guess that J16&J17 are timeout
-jumpers and maybe one of J18-J21 selects ROM. Also J6-J10 and
-J11-J15 are connecting IRQ2-7 to some pins on the UFSs. I can't
-guess the purpose.
-
-
-*****************************************************************************
-
-** Datapoint? **
-LAN-ARC-8, an 8-bit card
-------------------------
-  - from Vojtech Pavlik <vojtech@suse.cz>
-
-This is another SMC 90C65-based ARCnet card. I couldn't identify the
-manufacturer, but it might be DataPoint, because the card has the
-original arcNet logo in its upper right corner.
-
-          _______________________________________________________
-         |                         _________                     |
-         |                        |   SW2   | ON      arcNet     |
-         |                        |_________| OFF             ___|
-         |  _____________         1 ______  8                |   | 8  
-         | |             | SW1     | XTAL | ____________     | S |    
-         | > RAM (2k)    |         |______||            |    | W |    
-         | |_____________|                 |      H     |    | 3 |    
-         |                        _________|_____ y     |    |___| 1  
-         |  _________            |         |     |b     |        |    
-         | |_________|           |         |     |r     |        |    
-         |                       |     SMC |     |i     |        |    
-         |                       |    90C65|     |d     |        |      
-         |  _________            |         |     |      |        |
-         | |   SW1   | ON        |         |     |I     |        |
-         | |_________| OFF       |_________|_____/C     |   _____|
-         |  1       8                      |            |  |     |___
-         |  ______________                 |            |  | BNC |___|
-         | |              |                |____________|  |_____|
-         | > EPROM SOCKET |              _____________           |
-         | |______________|             |_____________|          |
-         |                                         ______________|
-         |                                        | 
-         |________________________________________|
-
-Legend:
-
-90C65       ARCNET Chip 
-SW1 1-5:    Base Memory Address Select
-    6-8:    Base I/O Address Select
-SW2 1-8:    Node ID Select
-SW3 1-5:    IRQ Select   
-    6-7:    Extra Timeout
-    8  :    ROM Enable   
-BNC         Coax connector
-XTAL        20 MHz Crystal
-
-
-Setting the Node ID
--------------------
-
-The eight switches in SW3 are used to set the node ID. Each node attached
-to the network must have an unique node ID which must not be 0.
-Switch 1 serves as the least significant bit (LSB).
-
-Setting one of the switches to Off means "1", On means "0".
-
-The node ID is the sum of the values of all switches set to "1"  
-These values are:
-
-   Switch | Value
-   -------|-------
-     1    |   1
-     2    |   2
-     3    |   4
-     4    |   8
-     5    |  16
-     6    |  32
-     7    |  64
-     8    | 128
-
-
-Setting the I/O Base Address
-----------------------------
-
-The last three switches in switch block SW1 are used to select one
-of eight possible I/O Base addresses using the following table
-
-
-   Switch      | Hex I/O
-    6   7   8  | Address
-   ------------|--------
-   ON  ON  ON  |  260
-   OFF ON  ON  |  290
-   ON  OFF ON  |  2E0  (Manufacturer's default)
-   OFF OFF ON  |  2F0
-   ON  ON  OFF |  300
-   OFF ON  OFF |  350
-   ON  OFF OFF |  380
-   OFF OFF OFF |  3E0
-
-
-Setting the Base Memory (RAM) buffer Address
---------------------------------------------
-
-The memory buffer (RAM) requires 2K. The base of this buffer can be 
-located in any of eight positions. The address of the Boot Prom is
-memory base + 0x2000.
-Jumpers 3-5 of switch block SW1 select the Memory Base address.
-
-   Switch              | Hex RAM | Hex ROM
-    1   2   3   4   5  | Address | Address *)
-   --------------------|---------|-----------
-   ON  ON  ON  ON  ON  |  C0000  |  C2000
-   ON  ON  OFF ON  ON  |  C4000  |  C6000
-   ON  ON  ON  OFF ON  |  CC000  |  CE000
-   ON  ON  OFF OFF ON  |  D0000  |  D2000  (Manufacturer's default)
-   ON  ON  ON  ON  OFF |  D4000  |  D6000
-   ON  ON  OFF ON  OFF |  D8000  |  DA000
-   ON  ON  ON  OFF OFF |  DC000  |  DE000
-   ON  ON  OFF OFF OFF |  E0000  |  E2000
-  
-*) To enable the Boot ROM set the switch 8 of switch block SW3 to position ON.
-
-The switches 1 and 2 probably add 0x0800 and 0x1000 to RAM base address.
-
-
-Setting the Interrupt Line
---------------------------
-
-Switches 1-5 of the switch block SW3 control the IRQ level.
-
-    Jumper              |  IRQ
-    1   2   3   4   5   |
-   ----------------------------
-    ON  OFF OFF OFF OFF |  3
-    OFF ON  OFF OFF OFF |  4
-    OFF OFF ON  OFF OFF |  5
-    OFF OFF OFF ON  OFF |  7
-    OFF OFF OFF OFF ON  |  2
-
-
-Setting the Timeout Parameters
-------------------------------
-
-The switches 6-7 of the switch block SW3 are used to determine the timeout
-parameters.  These two switches are normally left in the OFF position.
-
-
-*****************************************************************************
-
-** Topware **
-8-bit card, TA-ARC/10
--------------------------
-  - from Vojtech Pavlik <vojtech@suse.cz>
-
-This is another very similar 90C65 card. Most of the switches and jumpers
-are the same as on other clones.
-
- _____________________________________________________________________
-|  ___________   |                         |            ______        |
-| |SW2 NODE ID|  |                         |           | XTAL |       |
-| |___________|  |  Hybrid IC              |           |______|       |
-|  ___________   |                         |                        __|    
-| |SW1 MEM+I/O|  |_________________________|                   LED1|__|)   
-| |___________|           1 2                                         |     
-|                     J3 |o|o| TIMEOUT                          ______|    
-|     ______________     |o|o|                                 |      |    
-|    |              |  ___________________                     | RJ   |    
-|    > EPROM SOCKET | |                   \                    |------|     
-|J2  |______________| |                    |                   |      |    
-||o|                  |                    |                   |______|
-||o| ROM ENABLE       |        SMC         |    _________             |
-|     _____________   |       90C65        |   |_________|       _____|    
-|    |             |  |                    |                    |     |___ 
-|    > RAM (2k)    |  |                    |                    | BNC |___|
-|    |_____________|  |                    |                    |_____|    
-|                     |____________________|                          |    
-| ________ IRQ 2 3 4 5 7                  ___________                 |
-||________|   |o|o|o|o|o|                |___________|                |
-|________   J1|o|o|o|o|o|                               ______________|
-         |                                             |
-         |_____________________________________________|
-
-Legend:
-
-90C65       ARCNET Chip
-XTAL        20 MHz Crystal
-SW1 1-5     Base Memory Address Select
-    6-8     Base I/O Address Select
-SW2 1-8     Node ID Select (ID0-ID7)
-J1          IRQ Select
-J2          ROM Enable
-J3          Extra Timeout
-LED1        Activity LED 
-BNC         Coax connector (BUS ARCnet)
-RJ          Twisted Pair Connector (daisy chain)
-
-
-Setting the Node ID
--------------------
-
-The eight switches in SW2 are used to set the node ID. Each node attached to
-the network must have an unique node ID which must not be 0.  Switch 1 (ID0)
-serves as the least significant bit (LSB).
-
-Setting one of the switches to Off means "1", On means "0".
-
-The node ID is the sum of the values of all switches set to "1"
-These values are:
-
-   Switch | Label | Value
-   -------|-------|-------
-     1    | ID0   |   1
-     2    | ID1   |   2
-     3    | ID2   |   4
-     4    | ID3   |   8
-     5    | ID4   |  16
-     6    | ID5   |  32
-     7    | ID6   |  64
-     8    | ID7   | 128
-
-Setting the I/O Base Address
-----------------------------
-
-The last three switches in switch block SW1 are used to select one
-of eight possible I/O Base addresses using the following table:
-
-
-   Switch      | Hex I/O
-    6   7   8  | Address
-   ------------|--------
-   ON  ON  ON  |  260  (Manufacturer's default)
-   OFF ON  ON  |  290
-   ON  OFF ON  |  2E0                         
-   OFF OFF ON  |  2F0
-   ON  ON  OFF |  300
-   OFF ON  OFF |  350
-   ON  OFF OFF |  380
-   OFF OFF OFF |  3E0
-
-
-Setting the Base Memory (RAM) buffer Address
---------------------------------------------
-
-The memory buffer (RAM) requires 2K. The base of this buffer can be
-located in any of eight positions. The address of the Boot Prom is
-memory base + 0x2000.
-Jumpers 3-5 of switch block SW1 select the Memory Base address.
-
-   Switch              | Hex RAM | Hex ROM
-    1   2   3   4   5  | Address | Address *)
-   --------------------|---------|-----------
-   ON  ON  ON  ON  ON  |  C0000  |  C2000
-   ON  ON  OFF ON  ON  |  C4000  |  C6000  (Manufacturer's default) 
-   ON  ON  ON  OFF ON  |  CC000  |  CE000
-   ON  ON  OFF OFF ON  |  D0000  |  D2000  
-   ON  ON  ON  ON  OFF |  D4000  |  D6000
-   ON  ON  OFF ON  OFF |  D8000  |  DA000
-   ON  ON  ON  OFF OFF |  DC000  |  DE000
-   ON  ON  OFF OFF OFF |  E0000  |  E2000
-
-*) To enable the Boot ROM short the jumper J2.
-
-The jumpers 1 and 2 probably add 0x0800 and 0x1000 to RAM address.
-
-
-Setting the Interrupt Line
---------------------------
-
-Jumpers 1-5 of the jumper block J1 control the IRQ level.  ON means
-shorted, OFF means open.
-
-    Jumper              |  IRQ
-    1   2   3   4   5   |
-   ----------------------------
-    ON  OFF OFF OFF OFF |  2
-    OFF ON  OFF OFF OFF |  3
-    OFF OFF ON  OFF OFF |  4
-    OFF OFF OFF ON  OFF |  5
-    OFF OFF OFF OFF ON  |  7
-
-
-Setting the Timeout Parameters
-------------------------------
-
-The jumpers J3 are used to set the timeout parameters. These two 
-jumpers are normally left open.
-
-  
-*****************************************************************************
-
-** Thomas-Conrad **
-Model #500-6242-0097 REV A (8-bit card)
----------------------------------------
-  - from Lars Karlsson <100617.3473@compuserve.com>
-
-     ________________________________________________________
-   |          ________   ________                           |_____
-   |         |........| |........|                            |
-   |         |________| |________|                         ___|
-   |            SW 3       SW 1                           |   |
-   |         Base I/O   Base Addr.                Station |   |
-   |                                              address |   |
-   |    ______                                    switch  |   |
-   |   |      |                                           |   |
-   |   |      |                                           |___|    
-   |   |      |                                 ______        |___._
-   |   |______|                                |______|         ____| BNC
-   |                                            Jumper-        _____| Connector
-   |   Main chip                                block  _    __|   '  
-   |                                                  | |  |    RJ Connector
-   |                                                  |_|  |    with 110 Ohm
-   |                                                       |__  Terminator
-   |    ___________                                         __|
-   |   |...........|                                       |    RJ-jack
-   |   |...........|    _____                              |    (unused)
-   |   |___________|   |_____|                             |__
-   |  Boot PROM socket IRQ-jumpers                            |_  Diagnostic
-   |________                                       __          _| LED (red)
-            | | | | | | | | | | | | | | | | | | | |  |        |
-            | | | | | | | | | | | | | | | | | | | |  |________|
-                                                              |
-                                                              |
-
-And here are the settings for some of the switches and jumpers on the cards.
-
-
-          I/O
-
-         1 2 3 4 5 6 7 8
-
-2E0----- 0 0 0 1 0 0 0 1
-2F0----- 0 0 0 1 0 0 0 0
-300----- 0 0 0 0 1 1 1 1
-350----- 0 0 0 0 1 1 1 0
-
-"0" in the above example means switch is off "1" means that it is on.
-
-
-    ShMem address.
-
-      1 2 3 4 5 6 7 8
-
-CX00--0 0 1 1 | |   |
-DX00--0 0 1 0       |
-X000--------- 1 1   |
-X400--------- 1 0   |
-X800--------- 0 1   |
-XC00--------- 0 0   
-ENHANCED----------- 1
-COMPATIBLE--------- 0
-
-
-       IRQ
-
-
-   3 4 5 7 2
-   . . . . .
-   . . . . .
-
-
-There is a DIP-switch with 8 switches, used to set the shared memory address
-to be used. The first 6 switches set the address, the 7th doesn't have any
-function, and the 8th switch is used to select "compatible" or "enhanced".
-When I got my two cards, one of them had this switch set to "enhanced". That
-card didn't work at all, it wasn't even recognized by the driver. The other
-card had this switch set to "compatible" and it behaved absolutely normally. I
-guess that the switch on one of the cards, must have been changed accidentally
-when the card was taken out of its former host. The question remains
-unanswered, what is the purpose of the "enhanced" position?
-
-[Avery's note: "enhanced" probably either disables shared memory (use IO
-ports instead) or disables IO ports (use memory addresses instead).  This
-varies by the type of card involved.  I fail to see how either of these
-enhance anything.  Send me more detailed information about this mode, or
-just use "compatible" mode instead.]
-
-
-*****************************************************************************
-
-** Waterloo Microsystems Inc. ?? **
-8-bit card (C) 1985
--------------------
-  - from Robert Michael Best <rmb117@cs.usask.ca>
-
-[Avery's note: these don't work with my driver for some reason.  These cards
-SEEM to have settings similar to the PDI508Plus, which is
-software-configured and doesn't work with my driver either.  The "Waterloo
-chip" is a boot PROM, probably designed specifically for the University of
-Waterloo.  If you have any further information about this card, please
-e-mail me.]
-
-The probe has not been able to detect the card on any of the J2 settings,
-and I tried them again with the "Waterloo" chip removed.
- 
- _____________________________________________________________________
-| \/  \/              ___  __ __                                      |
-| C4  C4     |^|     | M ||  ^  ||^|                                  |
-| --  --     |_|     | 5 ||     || | C3                               |
-| \/  \/      C10    |___||     ||_|                                  | 
-| C4  C4             _  _ |     |                 ??                  | 
-| --  --            | \/ ||     |                                     | 
-|                   |    ||     |                                     | 
-|                   |    ||  C1 |                                     | 
-|                   |    ||     |  \/                            _____|    
-|                   | C6 ||     |  C9                           |     |___ 
-|                   |    ||     |  --                           | BNC |___| 
-|                   |    ||     |          >C7|                 |_____|
-|                   |    ||     |                                     |
-| __ __             |____||_____|       1 2 3     6                   |
-||  ^  |     >C4|                      |o|o|o|o|o|o| J2    >C4|       |
-||     |                               |o|o|o|o|o|o|                  |
-|| C2  |     >C4|                                          >C4|       |
-||     |                                   >C8|                       |
-||     |       2 3 4 5 6 7  IRQ                            >C4|       |
-||_____|      |o|o|o|o|o|o| J3                                        |
-|_______      |o|o|o|o|o|o|                            _______________|
-        |                                             |
-        |_____________________________________________|
-
-C1 -- "COM9026
-       SMC 8638"
-      In a chip socket.
-
-C2 -- "@Copyright
-       Waterloo Microsystems Inc.
-       1985"
-      In a chip Socket with info printed on a label covering a round window
-      showing the circuit inside. (The window indicates it is an EPROM chip.)
-
-C3 -- "COM9032
-       SMC 8643"
-      In a chip socket.
-
-C4 -- "74LS"
-      9 total no sockets.
-
-M5 -- "50006-136
-       20.000000 MHZ
-       MTQ-T1-S3
-       0 M-TRON 86-40"
-      Metallic case with 4 pins, no socket.
-
-C6 -- "MOSTEK@TC8643
-       MK6116N-20
-       MALAYSIA"
-      No socket.
-
-C7 -- No stamp or label but in a 20 pin chip socket.
-
-C8 -- "PAL10L8CN
-       8623"
-      In a 20 pin socket.
-
-C9 -- "PAl16R4A-2CN
-       8641"
-      In a 20 pin socket.
-
-C10 -- "M8640
-          NMC
-        9306N"
-       In an 8 pin socket.
-
-?? -- Some components on a smaller board and attached with 20 pins all 
-      along the side closest to the BNC connector.  The are coated in a dark 
-      resin.
-
-On the board there are two jumper banks labeled J2 and J3. The 
-manufacturer didn't put a J1 on the board. The two boards I have both 
-came with a jumper box for each bank.
-
-J2 -- Numbered 1 2 3 4 5 6. 
-      4 and 5 are not stamped due to solder points.
-       
-J3 -- IRQ 2 3 4 5 6 7
-
-The board itself has a maple leaf stamped just above the irq jumpers 
-and "-2 46-86" beside C2. Between C1 and C6 "ASS 'Y 300163" and "@1986 
-CORMAN CUSTOM ELECTRONICS CORP." stamped just below the BNC connector.
-Below that "MADE IN CANADA"
-
-  
-*****************************************************************************
-
-** No Name **
-8-bit cards, 16-bit cards
--------------------------
-  - from Juergen Seifert <seifert@htwm.de>
-  
-NONAME 8-BIT ARCNET
-===================
-
-I have named this ARCnet card "NONAME", since there is no name of any
-manufacturer on the Installation manual nor on the shipping box. The only
-hint to the existence of a manufacturer at all is written in copper,
-it is "Made in Taiwan"
-
-This description has been written by Juergen Seifert <seifert@htwm.de>
-using information from the Original
-                    "ARCnet Installation Manual"
-
-
-    ________________________________________________________________
-   | |STAR| BUS| T/P|                                               |
-   | |____|____|____|                                               |
-   |                            _____________________               |
-   |                           |                     |              |
-   |                           |                     |              |
-   |                           |                     |              |
-   |                           |        SMC          |              |
-   |                           |                     |              |
-   |                           |       COM90C65      |              |
-   |                           |                     |              |
-   |                           |                     |              |
-   |                           |__________-__________|              |
-   |                                                           _____|
-   |      _______________                                     |  CN |
-   |     | PROM          |                                    |_____|
-   |     > SOCKET        |                                          |
-   |     |_______________|         1 2 3 4 5 6 7 8  1 2 3 4 5 6 7 8 |
-   |                               _______________  _______________ |
-   |           |o|o|o|o|o|o|o|o|  |      SW1      ||      SW2      ||
-   |           |o|o|o|o|o|o|o|o|  |_______________||_______________||
-   |___         2 3 4 5 7 E E R        Node ID       IOB__|__MEM____|
-       |        \ IRQ   / T T O                      |
-       |__________________1_2_M______________________|
-
-Legend:
-
-COM90C65:       ARCnet Probe
-S1  1-8:    Node ID Select
-S2  1-3:    I/O Base Address Select
-    4-6:    Memory Base Address Select
-    7-8:    RAM Offset Select
-ET1, ET2    Extended Timeout Select
-ROM     ROM Enable Select
-CN              RG62 Coax Connector
-STAR| BUS | T/P Three fields for placing a sign (colored circle)
-                indicating the topology of the card
-
-Setting one of the switches to Off means "1", On means "0".
-
-
-Setting the Node ID
--------------------
-
-The eight switches in group SW1 are used to set the node ID.
-Each node attached to the network must have an unique node ID which
-must be different from 0.
-Switch 8 serves as the least significant bit (LSB).
-
-The node ID is the sum of the values of all switches set to "1"  
-These values are:
-
-    Switch | Value
-    -------|-------
-      8    |   1
-      7    |   2
-      6    |   4
-      5    |   8
-      4    |  16
-      3    |  32
-      2    |  64
-      1    | 128
-
-Some Examples:
-
-    Switch         | Hex     | Decimal 
-   1 2 3 4 5 6 7 8 | Node ID | Node ID
-   ----------------|---------|---------
-   0 0 0 0 0 0 0 0 |    not allowed
-   0 0 0 0 0 0 0 1 |    1    |    1 
-   0 0 0 0 0 0 1 0 |    2    |    2
-   0 0 0 0 0 0 1 1 |    3    |    3
-       . . .       |         |
-   0 1 0 1 0 1 0 1 |   55    |   85
-       . . .       |         |
-   1 0 1 0 1 0 1 0 |   AA    |  170
-       . . .       |         |  
-   1 1 1 1 1 1 0 1 |   FD    |  253
-   1 1 1 1 1 1 1 0 |   FE    |  254
-   1 1 1 1 1 1 1 1 |   FF    |  255
-
-
-Setting the I/O Base Address
-----------------------------
-
-The first three switches in switch group SW2 are used to select one
-of eight possible I/O Base addresses using the following table
-
-   Switch      | Hex I/O
-    1   2   3  | Address
-   ------------|--------
-   ON  ON  ON  |  260
-   ON  ON  OFF |  290
-   ON  OFF ON  |  2E0  (Manufacturer's default)
-   ON  OFF OFF |  2F0
-   OFF ON  ON  |  300
-   OFF ON  OFF |  350
-   OFF OFF ON  |  380
-   OFF OFF OFF |  3E0
-
-
-Setting the Base Memory (RAM) buffer Address
---------------------------------------------
-
-The memory buffer requires 2K of a 16K block of RAM. The base of this
-16K block can be located in any of eight positions.
-Switches 4-6 of switch group SW2 select the Base of the 16K block.
-Within that 16K address space, the buffer may be assigned any one of four
-positions, determined by the offset, switches 7 and 8 of group SW2.
-
-   Switch     | Hex RAM | Hex ROM
-   4 5 6  7 8 | Address | Address *)
-   -----------|---------|-----------
-   0 0 0  0 0 |  C0000  |  C2000
-   0 0 0  0 1 |  C0800  |  C2000
-   0 0 0  1 0 |  C1000  |  C2000
-   0 0 0  1 1 |  C1800  |  C2000
-              |         |
-   0 0 1  0 0 |  C4000  |  C6000
-   0 0 1  0 1 |  C4800  |  C6000
-   0 0 1  1 0 |  C5000  |  C6000
-   0 0 1  1 1 |  C5800  |  C6000
-              |         |
-   0 1 0  0 0 |  CC000  |  CE000
-   0 1 0  0 1 |  CC800  |  CE000
-   0 1 0  1 0 |  CD000  |  CE000
-   0 1 0  1 1 |  CD800  |  CE000
-              |         |
-   0 1 1  0 0 |  D0000  |  D2000  (Manufacturer's default)
-   0 1 1  0 1 |  D0800  |  D2000
-   0 1 1  1 0 |  D1000  |  D2000
-   0 1 1  1 1 |  D1800  |  D2000
-              |         |
-   1 0 0  0 0 |  D4000  |  D6000
-   1 0 0  0 1 |  D4800  |  D6000
-   1 0 0  1 0 |  D5000  |  D6000
-   1 0 0  1 1 |  D5800  |  D6000
-              |         |
-   1 0 1  0 0 |  D8000  |  DA000
-   1 0 1  0 1 |  D8800  |  DA000
-   1 0 1  1 0 |  D9000  |  DA000
-   1 0 1  1 1 |  D9800  |  DA000
-              |         |
-   1 1 0  0 0 |  DC000  |  DE000
-   1 1 0  0 1 |  DC800  |  DE000
-   1 1 0  1 0 |  DD000  |  DE000
-   1 1 0  1 1 |  DD800  |  DE000
-              |         |
-   1 1 1  0 0 |  E0000  |  E2000
-   1 1 1  0 1 |  E0800  |  E2000
-   1 1 1  1 0 |  E1000  |  E2000
-   1 1 1  1 1 |  E1800  |  E2000
-  
-*) To enable the 8K Boot PROM install the jumper ROM.
-   The default is jumper ROM not installed.
-
-
-Setting Interrupt Request Lines (IRQ)
--------------------------------------
-
-To select a hardware interrupt level set one (only one!) of the jumpers
-IRQ2, IRQ3, IRQ4, IRQ5 or IRQ7. The manufacturer's default is IRQ2.
- 
-
-Setting the Timeouts
---------------------
-
-The two jumpers labeled ET1 and ET2 are used to determine the timeout
-parameters (response and reconfiguration time). Every node in a network
-must be set to the same timeout values.
-
-   ET1 ET2 | Response Time (us) | Reconfiguration Time (ms)
-   --------|--------------------|--------------------------
-   Off Off |        78          |          840   (Default)
-   Off On  |       285          |         1680
-   On  Off |       563          |         1680
-   On  On  |      1130          |         1680
-
-On means jumper installed, Off means jumper not installed
-
-
-NONAME 16-BIT ARCNET
-====================
-
-The manual of my 8-Bit NONAME ARCnet Card contains another description
-of a 16-Bit Coax / Twisted Pair Card. This description is incomplete,
-because there are missing two pages in the manual booklet. (The table
-of contents reports pages ... 2-9, 2-11, 2-12, 3-1, ... but inside
-the booklet there is a different way of counting ... 2-9, 2-10, A-1,
-(empty page), 3-1, ..., 3-18, A-1 (again), A-2)
-Also the picture of the board layout is not as good as the picture of
-8-Bit card, because there isn't any letter like "SW1" written to the
-picture.
-Should somebody have such a board, please feel free to complete this
-description or to send a mail to me!
-
-This description has been written by Juergen Seifert <seifert@htwm.de>
-using information from the Original
-                    "ARCnet Installation Manual"
-
-
-   ___________________________________________________________________
-  <                    _________________  _________________           |
-  >                   |       SW?       ||      SW?        |          |
-  <                   |_________________||_________________|          |
-  >                       ____________________                        |
-  <                      |                    |                       |
-  >                      |                    |                       |
-  <                      |                    |                       |
-  >                      |                    |                       |
-  <                      |                    |                       |
-  >                      |                    |                       |
-  <                      |                    |                       |
-  >                      |____________________|                       |
-  <                                                               ____|
-  >                       ____________________                   |    |
-  <                      |                    |                  | J1 |
-  >                      |                    <                  |    |
-  <                      |____________________|  ? ? ? ? ? ?     |____|
-  >                                             |o|o|o|o|o|o|         |
-  <                                             |o|o|o|o|o|o|         |
-  >                                                                   |
-  <             __                                         ___________|
-  >            |  |                                       |
-  <____________|  |_______________________________________|
-
-
-Setting one of the switches to Off means "1", On means "0".
-
-
-Setting the Node ID
--------------------
-
-The eight switches in group SW2 are used to set the node ID.
-Each node attached to the network must have an unique node ID which
-must be different from 0.
-Switch 8 serves as the least significant bit (LSB).
-
-The node ID is the sum of the values of all switches set to "1"  
-These values are:
-
-    Switch | Value
-    -------|-------
-      8    |   1
-      7    |   2
-      6    |   4
-      5    |   8
-      4    |  16
-      3    |  32
-      2    |  64
-      1    | 128
-
-Some Examples:
-
-    Switch         | Hex     | Decimal 
-   1 2 3 4 5 6 7 8 | Node ID | Node ID
-   ----------------|---------|---------
-   0 0 0 0 0 0 0 0 |    not allowed
-   0 0 0 0 0 0 0 1 |    1    |    1 
-   0 0 0 0 0 0 1 0 |    2    |    2
-   0 0 0 0 0 0 1 1 |    3    |    3
-       . . .       |         |
-   0 1 0 1 0 1 0 1 |   55    |   85
-       . . .       |         |
-   1 0 1 0 1 0 1 0 |   AA    |  170
-       . . .       |         |  
-   1 1 1 1 1 1 0 1 |   FD    |  253
-   1 1 1 1 1 1 1 0 |   FE    |  254
-   1 1 1 1 1 1 1 1 |   FF    |  255
-
-
-Setting the I/O Base Address
-----------------------------
-
-The first three switches in switch group SW1 are used to select one
-of eight possible I/O Base addresses using the following table
-
-   Switch      | Hex I/O
-    3   2   1  | Address
-   ------------|--------
-   ON  ON  ON  |  260
-   ON  ON  OFF |  290
-   ON  OFF ON  |  2E0  (Manufacturer's default)
-   ON  OFF OFF |  2F0
-   OFF ON  ON  |  300
-   OFF ON  OFF |  350
-   OFF OFF ON  |  380
-   OFF OFF OFF |  3E0
-
-
-Setting the Base Memory (RAM) buffer Address
---------------------------------------------
-
-The memory buffer requires 2K of a 16K block of RAM. The base of this
-16K block can be located in any of eight positions.
-Switches 6-8 of switch group SW1 select the Base of the 16K block.
-Within that 16K address space, the buffer may be assigned any one of four
-positions, determined by the offset, switches 4 and 5 of group SW1.
-
-   Switch     | Hex RAM | Hex ROM
-   8 7 6  5 4 | Address | Address
-   -----------|---------|-----------
-   0 0 0  0 0 |  C0000  |  C2000
-   0 0 0  0 1 |  C0800  |  C2000
-   0 0 0  1 0 |  C1000  |  C2000
-   0 0 0  1 1 |  C1800  |  C2000
-              |         |
-   0 0 1  0 0 |  C4000  |  C6000
-   0 0 1  0 1 |  C4800  |  C6000
-   0 0 1  1 0 |  C5000  |  C6000
-   0 0 1  1 1 |  C5800  |  C6000
-              |         |
-   0 1 0  0 0 |  CC000  |  CE000
-   0 1 0  0 1 |  CC800  |  CE000
-   0 1 0  1 0 |  CD000  |  CE000
-   0 1 0  1 1 |  CD800  |  CE000
-              |         |
-   0 1 1  0 0 |  D0000  |  D2000  (Manufacturer's default)
-   0 1 1  0 1 |  D0800  |  D2000
-   0 1 1  1 0 |  D1000  |  D2000
-   0 1 1  1 1 |  D1800  |  D2000
-              |         |
-   1 0 0  0 0 |  D4000  |  D6000
-   1 0 0  0 1 |  D4800  |  D6000
-   1 0 0  1 0 |  D5000  |  D6000
-   1 0 0  1 1 |  D5800  |  D6000
-              |         |
-   1 0 1  0 0 |  D8000  |  DA000
-   1 0 1  0 1 |  D8800  |  DA000
-   1 0 1  1 0 |  D9000  |  DA000
-   1 0 1  1 1 |  D9800  |  DA000
-              |         |
-   1 1 0  0 0 |  DC000  |  DE000
-   1 1 0  0 1 |  DC800  |  DE000
-   1 1 0  1 0 |  DD000  |  DE000
-   1 1 0  1 1 |  DD800  |  DE000
-              |         |
-   1 1 1  0 0 |  E0000  |  E2000
-   1 1 1  0 1 |  E0800  |  E2000
-   1 1 1  1 0 |  E1000  |  E2000
-   1 1 1  1 1 |  E1800  |  E2000
-  
-
-Setting Interrupt Request Lines (IRQ)
--------------------------------------
-
-??????????????????????????????????????
-
-
-Setting the Timeouts
---------------------
-
-??????????????????????????????????????
-
-
-*****************************************************************************
-
-** No Name **
-8-bit cards ("Made in Taiwan R.O.C.")
------------
-  - from Vojtech Pavlik <vojtech@suse.cz>
-
-I have named this ARCnet card "NONAME", since I got only the card with
-no manual at all and the only text identifying the manufacturer is 
-"MADE IN TAIWAN R.O.C" printed on the card.
-
-          ____________________________________________________________
-         |                 1 2 3 4 5 6 7 8                            |
-         | |o|o| JP1       o|o|o|o|o|o|o|o| ON                        |
-         |  +              o|o|o|o|o|o|o|o|                        ___|
-         |  _____________  o|o|o|o|o|o|o|o| OFF         _____     |   | ID7
-         | |             | SW1                         |     |    |   | ID6
-         | > RAM (2k)    |        ____________________ |  H  |    | S | ID5
-         | |_____________|       |                    ||  y  |    | W | ID4
-         |                       |                    ||  b  |    | 2 | ID3
-         |                       |                    ||  r  |    |   | ID2
-         |                       |                    ||  i  |    |   | ID1
-         |                       |       90C65        ||  d  |    |___| ID0
-         |      SW3              |                    ||     |        |      
-         | |o|o|o|o|o|o|o|o| ON  |                    ||  I  |        |
-         | |o|o|o|o|o|o|o|o|     |                    ||  C  |        |
-         | |o|o|o|o|o|o|o|o| OFF |____________________||     |   _____|
-         |  1 2 3 4 5 6 7 8                            |     |  |     |___
-         |  ______________                             |     |  | BNC |___|
-         | |              |                            |_____|  |_____|
-         | > EPROM SOCKET |                                           |
-         | |______________|                                           |
-         |                                              ______________|
-         |                                             |
-         |_____________________________________________|
-
-Legend:
-
-90C65       ARCNET Chip 
-SW1 1-5:    Base Memory Address Select
-    6-8:    Base I/O Address Select
-SW2 1-8:    Node ID Select (ID0-ID7)
-SW3 1-5:    IRQ Select   
-    6-7:    Extra Timeout
-    8  :    ROM Enable   
-JP1         Led connector
-BNC         Coax connector
-
-Although the jumpers SW1 and SW3 are marked SW, not JP, they are jumpers, not 
-switches.
-
-Setting the jumpers to ON means connecting the upper two pins, off the bottom 
-two - or - in case of IRQ setting, connecting none of them at all.
-
-Setting the Node ID
--------------------
-
-The eight switches in SW2 are used to set the node ID. Each node attached
-to the network must have an unique node ID which must not be 0.
-Switch 1 (ID0) serves as the least significant bit (LSB).
-
-Setting one of the switches to Off means "1", On means "0".
-
-The node ID is the sum of the values of all switches set to "1"  
-These values are:
-
-   Switch | Label | Value
-   -------|-------|-------
-     1    | ID0   |   1
-     2    | ID1   |   2
-     3    | ID2   |   4
-     4    | ID3   |   8
-     5    | ID4   |  16
-     6    | ID5   |  32
-     7    | ID6   |  64
-     8    | ID7   | 128
-
-Some Examples:
-
-    Switch         | Hex     | Decimal 
-   8 7 6 5 4 3 2 1 | Node ID | Node ID
-   ----------------|---------|---------
-   0 0 0 0 0 0 0 0 |    not allowed
-   0 0 0 0 0 0 0 1 |    1    |    1 
-   0 0 0 0 0 0 1 0 |    2    |    2
-   0 0 0 0 0 0 1 1 |    3    |    3
-       . . .       |         |
-   0 1 0 1 0 1 0 1 |   55    |   85
-       . . .       |         |
-   1 0 1 0 1 0 1 0 |   AA    |  170
-       . . .       |         |  
-   1 1 1 1 1 1 0 1 |   FD    |  253
-   1 1 1 1 1 1 1 0 |   FE    |  254
-   1 1 1 1 1 1 1 1 |   FF    |  255
-
-
-Setting the I/O Base Address
-----------------------------
-
-The last three switches in switch block SW1 are used to select one
-of eight possible I/O Base addresses using the following table
-
-
-   Switch      | Hex I/O
-    6   7   8  | Address
-   ------------|--------
-   ON  ON  ON  |  260
-   OFF ON  ON  |  290
-   ON  OFF ON  |  2E0  (Manufacturer's default)
-   OFF OFF ON  |  2F0
-   ON  ON  OFF |  300
-   OFF ON  OFF |  350
-   ON  OFF OFF |  380
-   OFF OFF OFF |  3E0
-
-
-Setting the Base Memory (RAM) buffer Address
---------------------------------------------
-
-The memory buffer (RAM) requires 2K. The base of this buffer can be 
-located in any of eight positions. The address of the Boot Prom is
-memory base + 0x2000.
-Jumpers 3-5 of jumper block SW1 select the Memory Base address.
-
-   Switch              | Hex RAM | Hex ROM
-    1   2   3   4   5  | Address | Address *)
-   --------------------|---------|-----------
-   ON  ON  ON  ON  ON  |  C0000  |  C2000
-   ON  ON  OFF ON  ON  |  C4000  |  C6000
-   ON  ON  ON  OFF ON  |  CC000  |  CE000
-   ON  ON  OFF OFF ON  |  D0000  |  D2000  (Manufacturer's default)
-   ON  ON  ON  ON  OFF |  D4000  |  D6000
-   ON  ON  OFF ON  OFF |  D8000  |  DA000
-   ON  ON  ON  OFF OFF |  DC000  |  DE000
-   ON  ON  OFF OFF OFF |  E0000  |  E2000
-  
-*) To enable the Boot ROM set the jumper 8 of jumper block SW3 to position ON.
-
-The jumpers 1 and 2 probably add 0x0800, 0x1000 and 0x1800 to RAM adders.
-
-Setting the Interrupt Line
---------------------------
-
-Jumpers 1-5 of the jumper block SW3 control the IRQ level.
-
-    Jumper              |  IRQ
-    1   2   3   4   5   |
-   ----------------------------
-    ON  OFF OFF OFF OFF |  2
-    OFF ON  OFF OFF OFF |  3
-    OFF OFF ON  OFF OFF |  4
-    OFF OFF OFF ON  OFF |  5
-    OFF OFF OFF OFF ON  |  7
-
-
-Setting the Timeout Parameters
-------------------------------
-
-The jumpers 6-7 of the jumper block SW3 are used to determine the timeout 
-parameters. These two jumpers are normally left in the OFF position.
-
-
-*****************************************************************************
-
-** No Name **
-(Generic Model 9058)
---------------------
-  - from Andrew J. Kroll <ag784@freenet.buffalo.edu>
-  - Sorry this sat in my to-do box for so long, Andrew! (yikes - over a
-    year!)
-                                                                      _____
-                                                                     |    <
-                                                                     | .---'
-    ________________________________________________________________ | |
-   |                           |     SW2     |                      |  |
-   |   ___________             |_____________|                      |  |
-   |  |           |              1 2 3 4 5 6                     ___|  |
-   |  >  6116 RAM |         _________                         8 |   |  |
-   |  |___________|        |20MHzXtal|                        7 |   |  |
-   |                       |_________|       __________       6 | S |  |
-   |    74LS373                             |          |-     5 | W |  |
-   |   _________                            |      E   |-     4 |   |  |
-   |   >_______|              ______________|..... P   |-     3 | 3 |  |
-   |                         |              |    : O   |-     2 |   |  |
-   |                         |              |    : X   |-     1 |___|  |
-   |   ________________      |              |    : Y   |-           |  |
-   |  |      SW1       |     |      SL90C65 |    :     |-           |  |
-   |  |________________|     |              |    : B   |-           |  |
-   |    1 2 3 4 5 6 7 8      |              |    : O   |-           |  |
-   |                         |_________o____|..../ A   |-    _______|  |
-   |    ____________________                |      R   |-   |       |------,   
-   |   |                    |               |      D   |-   |  BNC  |   #  |
-   |   > 2764 PROM SOCKET   |               |__________|-   |_______|------'
-   |   |____________________|              _________                |  |
-   |                                       >________| <- 74LS245    |  |
-   |                                                                |  |
-   |___                                               ______________|  |
-       |H H H H H H H H H H H H H H H H H H H H H H H|               | |
-       |U_U_U_U_U_U_U_U_U_U_U_U_U_U_U_U_U_U_U_U_U_U_U|               | |
-                                                                      \|
-Legend:
-
-SL90C65 	ARCNET Controller / Transceiver /Logic
-SW1	1-5:	IRQ Select
-	  6:	ET1
-	  7:	ET2
-	  8:	ROM ENABLE 
-SW2	1-3:    Memory Buffer/PROM Address
-	3-6:	I/O Address Map
-SW3	1-8:	Node ID Select
-BNC		BNC RG62/U Connection 
-		*I* have had success using RG59B/U with *NO* terminators!
-		What gives?!
-
-SW1: Timeouts, Interrupt and ROM
----------------------------------
-
-To select a hardware interrupt level set one (only one!) of the dip switches
-up (on) SW1...(switches 1-5)
-IRQ3, IRQ4, IRQ5, IRQ7, IRQ2. The Manufacturer's default is IRQ2.
-
-The switches on SW1 labeled EXT1 (switch 6) and EXT2 (switch 7)
-are used to determine the timeout parameters. These two dip switches
-are normally left off (down).
-
-   To enable the 8K Boot PROM position SW1 switch 8 on (UP) labeled ROM.
-   The default is jumper ROM not installed.
-
-
-Setting the I/O Base Address
-----------------------------
-
-The last three switches in switch group SW2 are used to select one
-of eight possible I/O Base addresses using the following table
-
-
-   Switch | Hex I/O
-   4 5 6  | Address
-   -------|--------
-   0 0 0  |  260
-   0 0 1  |  290
-   0 1 0  |  2E0  (Manufacturer's default)
-   0 1 1  |  2F0
-   1 0 0  |  300
-   1 0 1  |  350
-   1 1 0  |  380
-   1 1 1  |  3E0
-
-
-Setting the Base Memory Address (RAM & ROM)
--------------------------------------------
-
-The memory buffer requires 2K of a 16K block of RAM. The base of this
-16K block can be located in any of eight positions.
-Switches 1-3 of switch group SW2 select the Base of the 16K block.
-(0 = DOWN, 1 = UP)
-I could, however, only verify two settings...
-
-   Switch| Hex RAM | Hex ROM
-   1 2 3 | Address | Address
-   ------|---------|-----------
-   0 0 0 |  E0000  |  E2000
-   0 0 1 |  D0000  |  D2000  (Manufacturer's default)
-   0 1 0 |  ?????  |  ?????
-   0 1 1 |  ?????  |  ?????  
-   1 0 0 |  ?????  |  ?????
-   1 0 1 |  ?????  |  ?????
-   1 1 0 |  ?????  |  ?????
-   1 1 1 |  ?????  |  ?????
-
-
-Setting the Node ID
--------------------
-
-The eight switches in group SW3 are used to set the node ID.
-Each node attached to the network must have an unique node ID which
-must be different from 0.
-Switch 1 serves as the least significant bit (LSB).
-switches in the DOWN position are OFF (0) and in the UP position are ON (1)
-
-The node ID is the sum of the values of all switches set to "1"  
-These values are:
-    Switch | Value
-    -------|-------
-      1    |   1
-      2    |   2
-      3    |   4
-      4    |   8
-      5    |  16
-      6    |  32
-      7    |  64
-      8    | 128
-
-Some Examples:
-
-    Switch#     |   Hex   | Decimal 
-8 7 6 5 4 3 2 1 | Node ID | Node ID
-----------------|---------|---------
-0 0 0 0 0 0 0 0 |    not allowed  <-.
-0 0 0 0 0 0 0 1 |    1    |    1    | 
-0 0 0 0 0 0 1 0 |    2    |    2    |
-0 0 0 0 0 0 1 1 |    3    |    3    |
-    . . .       |         |         |
-0 1 0 1 0 1 0 1 |   55    |   85    |
-    . . .       |         |         + Don't use 0 or 255!
-1 0 1 0 1 0 1 0 |   AA    |  170    |
-    . . .       |         |         |
-1 1 1 1 1 1 0 1 |   FD    |  253    |
-1 1 1 1 1 1 1 0 |   FE    |  254    |
-1 1 1 1 1 1 1 1 |   FF    |  255  <-'
-  
-
-*****************************************************************************
-
-** Tiara **
-(model unknown)
--------------------------
-  - from Christoph Lameter <christoph@lameter.com>
-  
-
-Here is information about my card as far as I could figure it out:
------------------------------------------------ tiara
-Tiara LanCard of Tiara Computer Systems.
-
-+----------------------------------------------+
-!           ! Transmitter Unit !               !
-!           +------------------+             -------
-!          MEM                              Coax Connector
-!  ROM    7654321 <- I/O                     -------
-!  :  :   +--------+                           !
-!  :  :   ! 90C66LJ!                         +++
-!  :  :   !        !                         !D  Switch to set
-!  :  :   !        !                         !I  the Nodenumber
-!  :  :   +--------+                         !P
-!                                            !++
-!         234567 <- IRQ                      !
-+------------!!!!!!!!!!!!!!!!!!!!!!!!--------+
-             !!!!!!!!!!!!!!!!!!!!!!!!
-
-0 = Jumper Installed
-1 = Open
-
-Top Jumper line Bit 7 = ROM Enable 654=Memory location 321=I/O
-
-Settings for Memory Location (Top Jumper Line)
-456     Address selected
-000	C0000
-001     C4000
-010     CC000
-011     D0000
-100     D4000
-101     D8000
-110     DC000     
-111     E0000
-
-Settings for I/O Address (Top Jumper Line)
-123     Port
-000	260
-001	290
-010	2E0
-011	2F0
-100	300
-101	350
-110	380
-111	3E0
-
-Settings for IRQ Selection (Lower Jumper Line)
-234567
-011111 IRQ 2
-101111 IRQ 3
-110111 IRQ 4
-111011 IRQ 5
-111110 IRQ 7
-
-*****************************************************************************
-
-
-Other Cards
------------
-
-I have no information on other models of ARCnet cards at the moment.  Please
-send any and all info to:
-	apenwarr@worldvisions.ca
-
-Thanks.
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 96ffad845fd9..5da18e024fcb 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -39,6 +39,7 @@ Contents:
    6lowpan
    6pack
    altera_tse
+   arcnet-hardware
 
 .. only::  subproject and html
 
-- 
cgit v1.2.3-59-g8ed1b


From 08bab46f00d0f0fe9709a05b7cdfe909a4258b01 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 28 Apr 2020 00:01:20 +0200
Subject: docs: networking: convert arcnet.txt to ReST

- add SPDX header;
- use document title markup;
- add notes markups;
- mark code blocks and literals as such;
- mark tables as such;
- adjust identation, whitespaces and blank lines;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/arcnet.rst | 594 ++++++++++++++++++++++++++++++++++++
 Documentation/networking/arcnet.txt | 556 ---------------------------------
 Documentation/networking/index.rst  |   1 +
 drivers/net/arcnet/Kconfig          |   6 +-
 4 files changed, 598 insertions(+), 559 deletions(-)
 create mode 100644 Documentation/networking/arcnet.rst
 delete mode 100644 Documentation/networking/arcnet.txt

diff --git a/Documentation/networking/arcnet.rst b/Documentation/networking/arcnet.rst
new file mode 100644
index 000000000000..e93d9820f0f1
--- /dev/null
+++ b/Documentation/networking/arcnet.rst
@@ -0,0 +1,594 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======
+ARCnet
+======
+
+.. note::
+
+   See also arcnet-hardware.txt in this directory for jumper-setting
+   and cabling information if you're like many of us and didn't happen to get a
+   manual with your ARCnet card.
+
+Since no one seems to listen to me otherwise, perhaps a poem will get your
+attention::
+
+		This driver's getting fat and beefy,
+		But my cat is still named Fifi.
+
+Hmm, I think I'm allowed to call that a poem, even though it's only two
+lines.  Hey, I'm in Computer Science, not English.  Give me a break.
+
+The point is:  I REALLY REALLY REALLY REALLY REALLY want to hear from you if
+you test this and get it working.  Or if you don't.  Or anything.
+
+ARCnet 0.32 ALPHA first made it into the Linux kernel 1.1.80 - this was
+nice, but after that even FEWER people started writing to me because they
+didn't even have to install the patch.  <sigh>
+
+Come on, be a sport!  Send me a success report!
+
+(hey, that was even better than my original poem... this is getting bad!)
+
+
+.. warning::
+
+   If you don't e-mail me about your success/failure soon, I may be forced to
+   start SINGING.  And we don't want that, do we?
+
+   (You know, it might be argued that I'm pushing this point a little too much.
+   If you think so, why not flame me in a quick little e-mail?  Please also
+   include the type of card(s) you're using, software, size of network, and
+   whether it's working or not.)
+
+   My e-mail address is: apenwarr@worldvisions.ca
+
+These are the ARCnet drivers for Linux.
+
+This new release (2.91) has been put together by David Woodhouse
+<dwmw2@infradead.org>, in an attempt to tidy up the driver after adding support
+for yet another chipset. Now the generic support has been separated from the
+individual chipset drivers, and the source files aren't quite so packed with
+#ifdefs! I've changed this file a bit, but kept it in the first person from
+Avery, because I didn't want to completely rewrite it.
+
+The previous release resulted from many months of on-and-off effort from me
+(Avery Pennarun), many bug reports/fixes and suggestions from others, and in
+particular a lot of input and coding from Tomasz Motylewski.  Starting with
+ARCnet 2.10 ALPHA, Tomasz's all-new-and-improved RFC1051 support has been
+included and seems to be working fine!
+
+
+Where do I discuss these drivers?
+---------------------------------
+
+Tomasz has been so kind as to set up a new and improved mailing list.
+Subscribe by sending a message with the BODY "subscribe linux-arcnet YOUR
+REAL NAME" to listserv@tichy.ch.uj.edu.pl.  Then, to submit messages to the
+list, mail to linux-arcnet@tichy.ch.uj.edu.pl.
+
+There are archives of the mailing list at:
+
+	http://epistolary.org/mailman/listinfo.cgi/arcnet
+
+The people on linux-net@vger.kernel.org (now defunct, replaced by
+netdev@vger.kernel.org) have also been known to be very helpful, especially
+when we're talking about ALPHA Linux kernels that may or may not work right
+in the first place.
+
+
+Other Drivers and Info
+----------------------
+
+You can try my ARCNET page on the World Wide Web at:
+
+	http://www.qis.net/~jschmitz/arcnet/
+
+Also, SMC (one of the companies that makes ARCnet cards) has a WWW site you
+might be interested in, which includes several drivers for various cards
+including ARCnet.  Try:
+
+	http://www.smc.com/
+
+Performance Technologies makes various network software that supports
+ARCnet:
+
+	http://www.perftech.com/ or ftp to ftp.perftech.com.
+
+Novell makes a networking stack for DOS which includes ARCnet drivers.  Try
+FTPing to ftp.novell.com.
+
+You can get the Crynwr packet driver collection (including arcether.com, the
+one you'll want to use with ARCnet cards) from
+oak.oakland.edu:/simtel/msdos/pktdrvr. It won't work perfectly on a 386+
+without patches, though, and also doesn't like several cards.  Fixed
+versions are available on my WWW page, or via e-mail if you don't have WWW
+access.
+
+
+Installing the Driver
+---------------------
+
+All you will need to do in order to install the driver is::
+
+	make config
+		(be sure to choose ARCnet in the network devices
+		and at least one chipset driver.)
+	make clean
+	make zImage
+
+If you obtained this ARCnet package as an upgrade to the ARCnet driver in
+your current kernel, you will need to first copy arcnet.c over the one in
+the linux/drivers/net directory.
+
+You will know the driver is installed properly if you get some ARCnet
+messages when you reboot into the new Linux kernel.
+
+There are four chipset options:
+
+ 1. Standard ARCnet COM90xx chipset.
+
+This is the normal ARCnet card, which you've probably got. This is the only
+chipset driver which will autoprobe if not told where the card is.
+It following options on the command line::
+
+ com90xx=[<io>[,<irq>[,<shmem>]]][,<name>] | <name>
+
+If you load the chipset support as a module, the options are::
+
+ io=<io> irq=<irq> shmem=<shmem> device=<name>
+
+To disable the autoprobe, just specify "com90xx=" on the kernel command line.
+To specify the name alone, but allow autoprobe, just put "com90xx=<name>"
+
+ 2. ARCnet COM20020 chipset.
+
+This is the new chipset from SMC with support for promiscuous mode (packet
+sniffing), extra diagnostic information, etc. Unfortunately, there is no
+sensible method of autoprobing for these cards. You must specify the I/O
+address on the kernel command line.
+
+The command line options are::
+
+ com20020=<io>[,<irq>[,<node_ID>[,backplane[,CKP[,timeout]]]]][,name]
+
+If you load the chipset support as a module, the options are::
+
+ io=<io> irq=<irq> node=<node_ID> backplane=<backplane> clock=<CKP>
+ timeout=<timeout> device=<name>
+
+The COM20020 chipset allows you to set the node ID in software, overriding the
+default which is still set in DIP switches on the card. If you don't have the
+COM20020 data sheets, and you don't know what the other three options refer
+to, then they won't interest you - forget them.
+
+ 3. ARCnet COM90xx chipset in IO-mapped mode.
+
+This will also work with the normal ARCnet cards, but doesn't use the shared
+memory. It performs less well than the above driver, but is provided in case
+you have a card which doesn't support shared memory, or (strangely) in case
+you have so many ARCnet cards in your machine that you run out of shmem slots.
+If you don't give the IO address on the kernel command line, then the driver
+will not find the card.
+
+The command line options are::
+
+ com90io=<io>[,<irq>][,<name>]
+
+If you load the chipset support as a module, the options are:
+ io=<io> irq=<irq> device=<name>
+
+ 4. ARCnet RIM I cards.
+
+These are COM90xx chips which are _completely_ memory mapped. The support for
+these is not tested. If you have one, please mail the author with a success
+report. All options must be specified, except the device name.
+Command line options::
+
+ arcrimi=<shmem>,<irq>,<node_ID>[,<name>]
+
+If you load the chipset support as a module, the options are::
+
+ shmem=<shmem> irq=<irq> node=<node_ID> device=<name>
+
+
+Loadable Module Support
+-----------------------
+
+Configure and rebuild Linux.  When asked, answer 'm' to "Generic ARCnet
+support" and to support for your ARCnet chipset if you want to use the
+loadable module. You can also say 'y' to "Generic ARCnet support" and 'm'
+to the chipset support if you wish.
+
+::
+
+	make config
+	make clean
+	make zImage
+	make modules
+
+If you're using a loadable module, you need to use insmod to load it, and
+you can specify various characteristics of your card on the command
+line.  (In recent versions of the driver, autoprobing is much more reliable
+and works as a module, so most of this is now unnecessary.)
+
+For example::
+
+	cd /usr/src/linux/modules
+	insmod arcnet.o
+	insmod com90xx.o
+	insmod com20020.o io=0x2e0 device=eth1
+
+
+Using the Driver
+----------------
+
+If you build your kernel with ARCnet COM90xx support included, it should
+probe for your card automatically when you boot. If you use a different
+chipset driver complied into the kernel, you must give the necessary options
+on the kernel command line, as detailed above.
+
+Go read the NET-2-HOWTO and ETHERNET-HOWTO for Linux; they should be
+available where you picked up this driver.  Think of your ARCnet as a
+souped-up (or down, as the case may be) Ethernet card.
+
+By the way, be sure to change all references from "eth0" to "arc0" in the
+HOWTOs.  Remember that ARCnet isn't a "true" Ethernet, and the device name
+is DIFFERENT.
+
+
+Multiple Cards in One Computer
+------------------------------
+
+Linux has pretty good support for this now, but since I've been busy, the
+ARCnet driver has somewhat suffered in this respect. COM90xx support, if
+compiled into the kernel, will (try to) autodetect all the installed cards.
+
+If you have other cards, with support compiled into the kernel, then you can
+just repeat the options on the kernel command line, e.g.::
+
+	LILO: linux com20020=0x2e0 com20020=0x380 com90io=0x260
+
+If you have the chipset support built as a loadable module, then you need to
+do something like this::
+
+	insmod -o arc0 com90xx
+	insmod -o arc1 com20020 io=0x2e0
+	insmod -o arc2 com90xx
+
+The ARCnet drivers will now sort out their names automatically.
+
+
+How do I get it to work with...?
+--------------------------------
+
+NFS:
+	Should be fine linux->linux, just pretend you're using Ethernet cards.
+	oak.oakland.edu:/simtel/msdos/nfs has some nice DOS clients.  There
+	is also a DOS-based NFS server called SOSS.  It doesn't multitask
+	quite the way Linux does (actually, it doesn't multitask AT ALL) but
+	you never know what you might need.
+
+	With AmiTCP (and possibly others), you may need to set the following
+	options in your Amiga nfstab:  MD 1024 MR 1024 MW 1024
+	(Thanks to Christian Gottschling <ferksy@indigo.tng.oche.de>
+	for this.)
+
+	Probably these refer to maximum NFS data/read/write block sizes.  I
+	don't know why the defaults on the Amiga didn't work; write to me if
+	you know more.
+
+DOS:
+	If you're using the freeware arcether.com, you might want to install
+	the driver patch from my web page.  It helps with PC/TCP, and also
+	can get arcether to load if it timed out too quickly during
+	initialization.  In fact, if you use it on a 386+ you REALLY need
+	the patch, really.
+
+Windows:
+	See DOS :)  Trumpet Winsock works fine with either the Novell or
+	Arcether client, assuming you remember to load winpkt of course.
+
+LAN Manager and Windows for Workgroups:
+	These programs use protocols that
+	are incompatible with the Internet standard.  They try to pretend
+	the cards are Ethernet, and confuse everyone else on the network.
+
+	However, v2.00 and higher of the Linux ARCnet driver supports this
+	protocol via the 'arc0e' device.  See the section on "Multiprotocol
+	Support" for more information.
+
+	Using the freeware Samba server and clients for Linux, you can now
+	interface quite nicely with TCP/IP-based WfWg or Lan Manager
+	networks.
+
+Windows 95:
+	Tools are included with Win95 that let you use either the LANMAN
+	style network drivers (NDIS) or Novell drivers (ODI) to handle your
+	ARCnet packets.  If you use ODI, you'll need to use the 'arc0'
+	device with Linux.  If you use NDIS, then try the 'arc0e' device.
+	See the "Multiprotocol Support" section below if you need arc0e,
+	you're completely insane, and/or you need to build some kind of
+	hybrid network that uses both encapsulation types.
+
+OS/2:
+	I've been told it works under Warp Connect with an ARCnet driver from
+	SMC.  You need to use the 'arc0e' interface for this.  If you get
+	the SMC driver to work with the TCP/IP stuff included in the
+	"normal" Warp Bonus Pack, let me know.
+
+	ftp.microsoft.com also has a freeware "Lan Manager for OS/2" client
+	which should use the same protocol as WfWg does.  I had no luck
+	installing it under Warp, however.  Please mail me with any results.
+
+NetBSD/AmiTCP:
+	These use an old version of the Internet standard ARCnet
+	protocol (RFC1051) which is compatible with the Linux driver v2.10
+	ALPHA and above using the arc0s device. (See "Multiprotocol ARCnet"
+	below.)  ** Newer versions of NetBSD apparently support RFC1201.
+
+
+Using Multiprotocol ARCnet
+--------------------------
+
+The ARCnet driver v2.10 ALPHA supports three protocols, each on its own
+"virtual network device":
+
+	======  ===============================================================
+	arc0	RFC1201 protocol, the official Internet standard which just
+		happens to be 100% compatible with Novell's TRXNET driver.
+		Version 1.00 of the ARCnet driver supported _only_ this
+		protocol.  arc0 is the fastest of the three protocols (for
+		whatever reason), and allows larger packets to be used
+		because it supports RFC1201 "packet splitting" operations.
+		Unless you have a specific need to use a different protocol,
+		I strongly suggest that you stick with this one.
+
+	arc0e	"Ethernet-Encapsulation" which sends packets over ARCnet
+		that are actually a lot like Ethernet packets, including the
+		6-byte hardware addresses.  This protocol is compatible with
+		Microsoft's NDIS ARCnet driver, like the one in WfWg and
+		LANMAN.  Because the MTU of 493 is actually smaller than the
+		one "required" by TCP/IP (576), there is a chance that some
+		network operations will not function properly.  The Linux
+		TCP/IP layer can compensate in most cases, however, by
+		automatically fragmenting the TCP/IP packets to make them
+		fit.  arc0e also works slightly more slowly than arc0, for
+		reasons yet to be determined.  (Probably it's the smaller
+		MTU that does it.)
+
+	arc0s	The "[s]imple" RFC1051 protocol is the "previous" Internet
+		standard that is completely incompatible with the new
+		standard.  Some software today, however, continues to
+		support the old standard (and only the old standard)
+		including NetBSD and AmiTCP.  RFC1051 also does not support
+		RFC1201's packet splitting, and the MTU of 507 is still
+		smaller than the Internet "requirement," so it's quite
+		possible that you may run into problems.  It's also slower
+		than RFC1201 by about 25%, for the same reason as arc0e.
+
+		The arc0s support was contributed by Tomasz Motylewski
+		and modified somewhat by me.  Bugs are probably my fault.
+	======  ===============================================================
+
+You can choose not to compile arc0e and arc0s into the driver if you want -
+this will save you a bit of memory and avoid confusion when eg. trying to
+use the "NFS-root" stuff in recent Linux kernels.
+
+The arc0e and arc0s devices are created automatically when you first
+ifconfig the arc0 device.  To actually use them, though, you need to also
+ifconfig the other virtual devices you need.  There are a number of ways you
+can set up your network then:
+
+
+1. Single Protocol.
+
+   This is the simplest way to configure your network: use just one of the
+   two available protocols.  As mentioned above, it's a good idea to use
+   only arc0 unless you have a good reason (like some other software, ie.
+   WfWg, that only works with arc0e).
+
+   If you need only arc0, then the following commands should get you going::
+
+	ifconfig arc0 MY.IP.ADD.RESS
+	route add MY.IP.ADD.RESS arc0
+	route add -net SUB.NET.ADD.RESS arc0
+	[add other local routes here]
+
+   If you need arc0e (and only arc0e), it's a little different::
+
+	ifconfig arc0 MY.IP.ADD.RESS
+	ifconfig arc0e MY.IP.ADD.RESS
+	route add MY.IP.ADD.RESS arc0e
+	route add -net SUB.NET.ADD.RESS arc0e
+
+   arc0s works much the same way as arc0e.
+
+
+2. More than one protocol on the same wire.
+
+   Now things start getting confusing.  To even try it, you may need to be
+   partly crazy.  Here's what *I* did. :) Note that I don't include arc0s in
+   my home network; I don't have any NetBSD or AmiTCP computers, so I only
+   use arc0s during limited testing.
+
+   I have three computers on my home network; two Linux boxes (which prefer
+   RFC1201 protocol, for reasons listed above), and one XT that can't run
+   Linux but runs the free Microsoft LANMAN Client instead.
+
+   Worse, one of the Linux computers (freedom) also has a modem and acts as
+   a router to my Internet provider.  The other Linux box (insight) also has
+   its own IP address and needs to use freedom as its default gateway.  The
+   XT (patience), however, does not have its own Internet IP address and so
+   I assigned it one on a "private subnet" (as defined by RFC1597).
+
+   To start with, take a simple network with just insight and freedom.
+   Insight needs to:
+
+	- talk to freedom via RFC1201 (arc0) protocol, because I like it
+	  more and it's faster.
+	- use freedom as its Internet gateway.
+
+   That's pretty easy to do.  Set up insight like this::
+
+	ifconfig arc0 insight
+	route add insight arc0
+	route add freedom arc0	/* I would use the subnet here (like I said
+					to to in "single protocol" above),
+					but the rest of the subnet
+					unfortunately lies across the PPP
+					link on freedom, which confuses
+					things. */
+	route add default gw freedom
+
+   And freedom gets configured like so::
+
+	ifconfig arc0 freedom
+	route add freedom arc0
+	route add insight arc0
+	/* and default gateway is configured by pppd */
+
+   Great, now insight talks to freedom directly on arc0, and sends packets
+   to the Internet through freedom.  If you didn't know how to do the above,
+   you should probably stop reading this section now because it only gets
+   worse.
+
+   Now, how do I add patience into the network?  It will be using LANMAN
+   Client, which means I need the arc0e device.  It needs to be able to talk
+   to both insight and freedom, and also use freedom as a gateway to the
+   Internet.  (Recall that patience has a "private IP address" which won't
+   work on the Internet; that's okay, I configured Linux IP masquerading on
+   freedom for this subnet).
+
+   So patience (necessarily; I don't have another IP number from my
+   provider) has an IP address on a different subnet than freedom and
+   insight, but needs to use freedom as an Internet gateway.  Worse, most
+   DOS networking programs, including LANMAN, have braindead networking
+   schemes that rely completely on the netmask and a 'default gateway' to
+   determine how to route packets.  This means that to get to freedom or
+   insight, patience WILL send through its default gateway, regardless of
+   the fact that both freedom and insight (courtesy of the arc0e device)
+   could understand a direct transmission.
+
+   I compensate by giving freedom an extra IP address - aliased 'gatekeeper' -
+   that is on my private subnet, the same subnet that patience is on.  I
+   then define gatekeeper to be the default gateway for patience.
+
+   To configure freedom (in addition to the commands above)::
+
+	ifconfig arc0e gatekeeper
+	route add gatekeeper arc0e
+	route add patience arc0e
+
+   This way, freedom will send all packets for patience through arc0e,
+   giving its IP address as gatekeeper (on the private subnet).  When it
+   talks to insight or the Internet, it will use its "freedom" Internet IP
+   address.
+
+   You will notice that we haven't configured the arc0e device on insight.
+   This would work, but is not really necessary, and would require me to
+   assign insight another special IP number from my private subnet.  Since
+   both insight and patience are using freedom as their default gateway, the
+   two can already talk to each other.
+
+   It's quite fortunate that I set things up like this the first time (cough
+   cough) because it's really handy when I boot insight into DOS.  There, it
+   runs the Novell ODI protocol stack, which only works with RFC1201 ARCnet.
+   In this mode it would be impossible for insight to communicate directly
+   with patience, since the Novell stack is incompatible with Microsoft's
+   Ethernet-Encap.  Without changing any settings on freedom or patience, I
+   simply set freedom as the default gateway for insight (now in DOS,
+   remember) and all the forwarding happens "automagically" between the two
+   hosts that would normally not be able to communicate at all.
+
+   For those who like diagrams, I have created two "virtual subnets" on the
+   same physical ARCnet wire.  You can picture it like this::
+
+
+	  [RFC1201 NETWORK]                   [ETHER-ENCAP NETWORK]
+      (registered Internet subnet)           (RFC1597 private subnet)
+
+			     (IP Masquerade)
+	  /---------------\         *            /---------------\
+	  |               |         *            |               |
+	  |               +-Freedom-*-Gatekeeper-+               |
+	  |               |    |    *            |               |
+	  \-------+-------/    |    *            \-------+-------/
+		  |            |                         |
+	       Insight         |                      Patience
+			   (Internet)
+
+
+
+It works: what now?
+-------------------
+
+Send mail describing your setup, preferably including driver version, kernel
+version, ARCnet card model, CPU type, number of systems on your network, and
+list of software in use to me at the following address:
+
+	apenwarr@worldvisions.ca
+
+I do send (sometimes automated) replies to all messages I receive.  My email
+can be weird (and also usually gets forwarded all over the place along the
+way to me), so if you don't get a reply within a reasonable time, please
+resend.
+
+
+It doesn't work: what now?
+--------------------------
+
+Do the same as above, but also include the output of the ifconfig and route
+commands, as well as any pertinent log entries (ie. anything that starts
+with "arcnet:" and has shown up since the last reboot) in your mail.
+
+If you want to try fixing it yourself (I strongly recommend that you mail me
+about the problem first, since it might already have been solved) you may
+want to try some of the debug levels available.  For heavy testing on
+D_DURING or more, it would be a REALLY good idea to kill your klogd daemon
+first!  D_DURING displays 4-5 lines for each packet sent or received.  D_TX,
+D_RX, and D_SKB actually DISPLAY each packet as it is sent or received,
+which is obviously quite big.
+
+Starting with v2.40 ALPHA, the autoprobe routines have changed
+significantly.  In particular, they won't tell you why the card was not
+found unless you turn on the D_INIT_REASONS debugging flag.
+
+Once the driver is running, you can run the arcdump shell script (available
+from me or in the full ARCnet package, if you have it) as root to list the
+contents of the arcnet buffers at any time.  To make any sense at all out of
+this, you should grab the pertinent RFCs. (some are listed near the top of
+arcnet.c).  arcdump assumes your card is at 0xD0000.  If it isn't, edit the
+script.
+
+Buffers 0 and 1 are used for receiving, and Buffers 2 and 3 are for sending.
+Ping-pong buffers are implemented both ways.
+
+If your debug level includes D_DURING and you did NOT define SLOW_XMIT_COPY,
+the buffers are cleared to a constant value of 0x42 every time the card is
+reset (which should only happen when you do an ifconfig up, or when Linux
+decides that the driver is broken).  During a transmit, unused parts of the
+buffer will be cleared to 0x42 as well.  This is to make it easier to figure
+out which bytes are being used by a packet.
+
+You can change the debug level without recompiling the kernel by typing::
+
+	ifconfig arc0 down metric 1xxx
+	/etc/rc.d/rc.inet1
+
+where "xxx" is the debug level you want.  For example, "metric 1015" would put
+you at debug level 15.  Debug level 7 is currently the default.
+
+Note that the debug level is (starting with v1.90 ALPHA) a binary
+combination of different debug flags; so debug level 7 is really 1+2+4 or
+D_NORMAL+D_EXTRA+D_INIT.  To include D_DURING, you would add 16 to this,
+resulting in debug level 23.
+
+If you don't understand that, you probably don't want to know anyway.
+E-mail me about your problem.
+
+
+I want to send money: what now?
+-------------------------------
+
+Go take a nap or something.  You'll feel better in the morning.
diff --git a/Documentation/networking/arcnet.txt b/Documentation/networking/arcnet.txt
deleted file mode 100644
index aff97f47c05c..000000000000
--- a/Documentation/networking/arcnet.txt
+++ /dev/null
@@ -1,556 +0,0 @@
-----------------------------------------------------------------------------
-NOTE:  See also arcnet-hardware.txt in this directory for jumper-setting
-and cabling information if you're like many of us and didn't happen to get a
-manual with your ARCnet card.
-----------------------------------------------------------------------------
-
-Since no one seems to listen to me otherwise, perhaps a poem will get your
-attention:
-		This driver's getting fat and beefy,
-		But my cat is still named Fifi.
-
-Hmm, I think I'm allowed to call that a poem, even though it's only two
-lines.  Hey, I'm in Computer Science, not English.  Give me a break.
-
-The point is:  I REALLY REALLY REALLY REALLY REALLY want to hear from you if
-you test this and get it working.  Or if you don't.  Or anything.
-
-ARCnet 0.32 ALPHA first made it into the Linux kernel 1.1.80 - this was
-nice, but after that even FEWER people started writing to me because they
-didn't even have to install the patch.  <sigh>
-
-Come on, be a sport!  Send me a success report!
-
-(hey, that was even better than my original poem... this is getting bad!)
-
-
---------
-WARNING:
---------
-
-If you don't e-mail me about your success/failure soon, I may be forced to
-start SINGING.  And we don't want that, do we?
-
-(You know, it might be argued that I'm pushing this point a little too much. 
-If you think so, why not flame me in a quick little e-mail?  Please also
-include the type of card(s) you're using, software, size of network, and
-whether it's working or not.)
-
-My e-mail address is: apenwarr@worldvisions.ca
-
-
----------------------------------------------------------------------------
-
-			
-These are the ARCnet drivers for Linux.
-
-
-This new release (2.91) has been put together by David Woodhouse 
-<dwmw2@infradead.org>, in an attempt to tidy up the driver after adding support
-for yet another chipset. Now the generic support has been separated from the
-individual chipset drivers, and the source files aren't quite so packed with
-#ifdefs! I've changed this file a bit, but kept it in the first person from
-Avery, because I didn't want to completely rewrite it.
-
-The previous release resulted from many months of on-and-off effort from me
-(Avery Pennarun), many bug reports/fixes and suggestions from others, and in
-particular a lot of input and coding from Tomasz Motylewski.  Starting with
-ARCnet 2.10 ALPHA, Tomasz's all-new-and-improved RFC1051 support has been
-included and seems to be working fine!
-
-
-Where do I discuss these drivers?
----------------------------------
-
-Tomasz has been so kind as to set up a new and improved mailing list. 
-Subscribe by sending a message with the BODY "subscribe linux-arcnet YOUR
-REAL NAME" to listserv@tichy.ch.uj.edu.pl.  Then, to submit messages to the
-list, mail to linux-arcnet@tichy.ch.uj.edu.pl.
-
-There are archives of the mailing list at:
-	http://epistolary.org/mailman/listinfo.cgi/arcnet
-
-The people on linux-net@vger.kernel.org (now defunct, replaced by
-netdev@vger.kernel.org) have also been known to be very helpful, especially
-when we're talking about ALPHA Linux kernels that may or may not work right
-in the first place.
-
-
-Other Drivers and Info
-----------------------
-
-You can try my ARCNET page on the World Wide Web at:
-	http://www.qis.net/~jschmitz/arcnet/	
-
-Also, SMC (one of the companies that makes ARCnet cards) has a WWW site you
-might be interested in, which includes several drivers for various cards
-including ARCnet.  Try:
-	http://www.smc.com/
-	
-Performance Technologies makes various network software that supports
-ARCnet:
-	http://www.perftech.com/ or ftp to ftp.perftech.com.
-	
-Novell makes a networking stack for DOS which includes ARCnet drivers.  Try
-FTPing to ftp.novell.com.
-
-You can get the Crynwr packet driver collection (including arcether.com, the
-one you'll want to use with ARCnet cards) from
-oak.oakland.edu:/simtel/msdos/pktdrvr. It won't work perfectly on a 386+
-without patches, though, and also doesn't like several cards.  Fixed
-versions are available on my WWW page, or via e-mail if you don't have WWW
-access. 
-
-
-Installing the Driver
----------------------
-
-All you will need to do in order to install the driver is:
-	make config
-		(be sure to choose ARCnet in the network devices 
-		and at least one chipset driver.)
-	make clean
-	make zImage
-	
-If you obtained this ARCnet package as an upgrade to the ARCnet driver in
-your current kernel, you will need to first copy arcnet.c over the one in
-the linux/drivers/net directory.
-
-You will know the driver is installed properly if you get some ARCnet
-messages when you reboot into the new Linux kernel.
-
-There are four chipset options:
-
- 1. Standard ARCnet COM90xx chipset.
-
-This is the normal ARCnet card, which you've probably got. This is the only
-chipset driver which will autoprobe if not told where the card is.
-It following options on the command line:
- com90xx=[<io>[,<irq>[,<shmem>]]][,<name>] | <name>
-
-If you load the chipset support as a module, the options are:
- io=<io> irq=<irq> shmem=<shmem> device=<name>
-
-To disable the autoprobe, just specify "com90xx=" on the kernel command line.
-To specify the name alone, but allow autoprobe, just put "com90xx=<name>"
-
- 2. ARCnet COM20020 chipset.
-
-This is the new chipset from SMC with support for promiscuous mode (packet 
-sniffing), extra diagnostic information, etc. Unfortunately, there is no
-sensible method of autoprobing for these cards. You must specify the I/O
-address on the kernel command line.
-The command line options are:
- com20020=<io>[,<irq>[,<node_ID>[,backplane[,CKP[,timeout]]]]][,name]
-
-If you load the chipset support as a module, the options are:
- io=<io> irq=<irq> node=<node_ID> backplane=<backplane> clock=<CKP>
- timeout=<timeout> device=<name>
-
-The COM20020 chipset allows you to set the node ID in software, overriding the
-default which is still set in DIP switches on the card. If you don't have the
-COM20020 data sheets, and you don't know what the other three options refer
-to, then they won't interest you - forget them.
-
- 3. ARCnet COM90xx chipset in IO-mapped mode.
-
-This will also work with the normal ARCnet cards, but doesn't use the shared
-memory. It performs less well than the above driver, but is provided in case
-you have a card which doesn't support shared memory, or (strangely) in case
-you have so many ARCnet cards in your machine that you run out of shmem slots.
-If you don't give the IO address on the kernel command line, then the driver
-will not find the card.
-The command line options are:
- com90io=<io>[,<irq>][,<name>] 
-
-If you load the chipset support as a module, the options are:
- io=<io> irq=<irq> device=<name>
-
- 4. ARCnet RIM I cards.
-
-These are COM90xx chips which are _completely_ memory mapped. The support for
-these is not tested. If you have one, please mail the author with a success 
-report. All options must be specified, except the device name.
-Command line options:
- arcrimi=<shmem>,<irq>,<node_ID>[,<name>]
-
-If you load the chipset support as a module, the options are:
- shmem=<shmem> irq=<irq> node=<node_ID> device=<name>
-
-
-Loadable Module Support
------------------------
-
-Configure and rebuild Linux.  When asked, answer 'm' to "Generic ARCnet 
-support" and to support for your ARCnet chipset if you want to use the
-loadable module. You can also say 'y' to "Generic ARCnet support" and 'm' 
-to the chipset support if you wish.
-
-	make config
-	make clean	
-	make zImage
-	make modules
-	
-If you're using a loadable module, you need to use insmod to load it, and
-you can specify various characteristics of your card on the command
-line.  (In recent versions of the driver, autoprobing is much more reliable
-and works as a module, so most of this is now unnecessary.)
-
-For example:
-	cd /usr/src/linux/modules
-	insmod arcnet.o
-	insmod com90xx.o
-	insmod com20020.o io=0x2e0 device=eth1
-	
-
-Using the Driver
-----------------
-
-If you build your kernel with ARCnet COM90xx support included, it should 
-probe for your card automatically when you boot. If you use a different
-chipset driver complied into the kernel, you must give the necessary options
-on the kernel command line, as detailed above.
-
-Go read the NET-2-HOWTO and ETHERNET-HOWTO for Linux; they should be
-available where you picked up this driver.  Think of your ARCnet as a
-souped-up (or down, as the case may be) Ethernet card.
-
-By the way, be sure to change all references from "eth0" to "arc0" in the
-HOWTOs.  Remember that ARCnet isn't a "true" Ethernet, and the device name
-is DIFFERENT.
-
-
-Multiple Cards in One Computer
-------------------------------
-
-Linux has pretty good support for this now, but since I've been busy, the
-ARCnet driver has somewhat suffered in this respect. COM90xx support, if 
-compiled into the kernel, will (try to) autodetect all the installed cards. 
-
-If you have other cards, with support compiled into the kernel, then you can 
-just repeat the options on the kernel command line, e.g.:
-LILO: linux com20020=0x2e0 com20020=0x380 com90io=0x260
-
-If you have the chipset support built as a loadable module, then you need to 
-do something like this:
-	insmod -o arc0 com90xx
-	insmod -o arc1 com20020 io=0x2e0
-	insmod -o arc2 com90xx
-The ARCnet drivers will now sort out their names automatically.
-
-
-How do I get it to work with...?
---------------------------------
-
-NFS: Should be fine linux->linux, just pretend you're using Ethernet cards. 
-        oak.oakland.edu:/simtel/msdos/nfs has some nice DOS clients.  There
-        is also a DOS-based NFS server called SOSS.  It doesn't multitask
-        quite the way Linux does (actually, it doesn't multitask AT ALL) but
-        you never know what you might need.
-        
-        With AmiTCP (and possibly others), you may need to set the following
-        options in your Amiga nfstab:  MD 1024 MR 1024 MW 1024
-        (Thanks to Christian Gottschling <ferksy@indigo.tng.oche.de>
-	for this.)
-	
-	Probably these refer to maximum NFS data/read/write block sizes.  I
-	don't know why the defaults on the Amiga didn't work; write to me if
-	you know more.
-
-DOS: If you're using the freeware arcether.com, you might want to install
-        the driver patch from my web page.  It helps with PC/TCP, and also
-        can get arcether to load if it timed out too quickly during
-        initialization.  In fact, if you use it on a 386+ you REALLY need
-        the patch, really.
-	
-Windows:  See DOS :)  Trumpet Winsock works fine with either the Novell or
-	Arcether client, assuming you remember to load winpkt of course.
-
-LAN Manager and Windows for Workgroups: These programs use protocols that
-        are incompatible with the Internet standard.  They try to pretend
-        the cards are Ethernet, and confuse everyone else on the network. 
-        
-        However, v2.00 and higher of the Linux ARCnet driver supports this
-        protocol via the 'arc0e' device.  See the section on "Multiprotocol
-        Support" for more information.
-
-	Using the freeware Samba server and clients for Linux, you can now
-	interface quite nicely with TCP/IP-based WfWg or Lan Manager
-	networks.
-	
-Windows 95: Tools are included with Win95 that let you use either the LANMAN
-	style network drivers (NDIS) or Novell drivers (ODI) to handle your
-	ARCnet packets.  If you use ODI, you'll need to use the 'arc0'
-	device with Linux.  If you use NDIS, then try the 'arc0e' device. 
-	See the "Multiprotocol Support" section below if you need arc0e,
-	you're completely insane, and/or you need to build some kind of
-	hybrid network that uses both encapsulation types.
-
-OS/2: I've been told it works under Warp Connect with an ARCnet driver from
-	SMC.  You need to use the 'arc0e' interface for this.  If you get
-	the SMC driver to work with the TCP/IP stuff included in the
-	"normal" Warp Bonus Pack, let me know.
-
-	ftp.microsoft.com also has a freeware "Lan Manager for OS/2" client
-	which should use the same protocol as WfWg does.  I had no luck
-	installing it under Warp, however.  Please mail me with any results.
-
-NetBSD/AmiTCP: These use an old version of the Internet standard ARCnet
-	protocol (RFC1051) which is compatible with the Linux driver v2.10
-	ALPHA and above using the arc0s device. (See "Multiprotocol ARCnet"
-	below.)  ** Newer versions of NetBSD apparently support RFC1201.
-
-
-Using Multiprotocol ARCnet
---------------------------
-
-The ARCnet driver v2.10 ALPHA supports three protocols, each on its own
-"virtual network device":
-
-	arc0  - RFC1201 protocol, the official Internet standard which just
-		happens to be 100% compatible with Novell's TRXNET driver. 
-		Version 1.00 of the ARCnet driver supported _only_ this
-		protocol.  arc0 is the fastest of the three protocols (for
-		whatever reason), and allows larger packets to be used
-		because it supports RFC1201 "packet splitting" operations. 
-		Unless you have a specific need to use a different protocol,
-		I strongly suggest that you stick with this one.
-		
-	arc0e - "Ethernet-Encapsulation" which sends packets over ARCnet
-		that are actually a lot like Ethernet packets, including the
-		6-byte hardware addresses.  This protocol is compatible with
-		Microsoft's NDIS ARCnet driver, like the one in WfWg and
-		LANMAN.  Because the MTU of 493 is actually smaller than the
-		one "required" by TCP/IP (576), there is a chance that some
-		network operations will not function properly.  The Linux
-		TCP/IP layer can compensate in most cases, however, by
-		automatically fragmenting the TCP/IP packets to make them
-		fit.  arc0e also works slightly more slowly than arc0, for
-		reasons yet to be determined.  (Probably it's the smaller
-		MTU that does it.)
-		
-	arc0s - The "[s]imple" RFC1051 protocol is the "previous" Internet
-		standard that is completely incompatible with the new
-		standard.  Some software today, however, continues to
-		support the old standard (and only the old standard)
-		including NetBSD and AmiTCP.  RFC1051 also does not support
-		RFC1201's packet splitting, and the MTU of 507 is still
-		smaller than the Internet "requirement," so it's quite
-		possible that you may run into problems.  It's also slower
-		than RFC1201 by about 25%, for the same reason as arc0e.
-		
-		The arc0s support was contributed by Tomasz Motylewski
-		and modified somewhat by me.  Bugs are probably my fault.
-
-You can choose not to compile arc0e and arc0s into the driver if you want -
-this will save you a bit of memory and avoid confusion when eg. trying to
-use the "NFS-root" stuff in recent Linux kernels.
-
-The arc0e and arc0s devices are created automatically when you first
-ifconfig the arc0 device.  To actually use them, though, you need to also
-ifconfig the other virtual devices you need.  There are a number of ways you
-can set up your network then:
-
-
-1. Single Protocol.
-
-   This is the simplest way to configure your network: use just one of the
-   two available protocols.  As mentioned above, it's a good idea to use
-   only arc0 unless you have a good reason (like some other software, ie.
-   WfWg, that only works with arc0e).
-   
-   If you need only arc0, then the following commands should get you going:
-   	ifconfig arc0 MY.IP.ADD.RESS
-   	route add MY.IP.ADD.RESS arc0
-   	route add -net SUB.NET.ADD.RESS arc0
-   	[add other local routes here]
-   	
-   If you need arc0e (and only arc0e), it's a little different:
-   	ifconfig arc0 MY.IP.ADD.RESS
-   	ifconfig arc0e MY.IP.ADD.RESS
-   	route add MY.IP.ADD.RESS arc0e
-   	route add -net SUB.NET.ADD.RESS arc0e
-   
-   arc0s works much the same way as arc0e.
-
-
-2. More than one protocol on the same wire.
-
-   Now things start getting confusing.  To even try it, you may need to be
-   partly crazy.  Here's what *I* did. :) Note that I don't include arc0s in
-   my home network; I don't have any NetBSD or AmiTCP computers, so I only
-   use arc0s during limited testing.
-
-   I have three computers on my home network; two Linux boxes (which prefer
-   RFC1201 protocol, for reasons listed above), and one XT that can't run
-   Linux but runs the free Microsoft LANMAN Client instead.
-
-   Worse, one of the Linux computers (freedom) also has a modem and acts as
-   a router to my Internet provider.  The other Linux box (insight) also has
-   its own IP address and needs to use freedom as its default gateway.  The
-   XT (patience), however, does not have its own Internet IP address and so
-   I assigned it one on a "private subnet" (as defined by RFC1597).
-
-   To start with, take a simple network with just insight and freedom. 
-   Insight needs to:
-   	- talk to freedom via RFC1201 (arc0) protocol, because I like it
-	  more and it's faster.
-	- use freedom as its Internet gateway.
-	
-   That's pretty easy to do.  Set up insight like this:
-   	ifconfig arc0 insight
-   	route add insight arc0
-   	route add freedom arc0	/* I would use the subnet here (like I said
-					to to in "single protocol" above),
-   					but the rest of the subnet
-   					unfortunately lies across the PPP
-   					link on freedom, which confuses
-   					things. */
-   	route add default gw freedom
-   	
-   And freedom gets configured like so:
-   	ifconfig arc0 freedom
-   	route add freedom arc0
-   	route add insight arc0
-   	/* and default gateway is configured by pppd */
-   	
-   Great, now insight talks to freedom directly on arc0, and sends packets
-   to the Internet through freedom.  If you didn't know how to do the above,
-   you should probably stop reading this section now because it only gets
-   worse.
-
-   Now, how do I add patience into the network?  It will be using LANMAN
-   Client, which means I need the arc0e device.  It needs to be able to talk
-   to both insight and freedom, and also use freedom as a gateway to the
-   Internet.  (Recall that patience has a "private IP address" which won't
-   work on the Internet; that's okay, I configured Linux IP masquerading on
-   freedom for this subnet).
-   
-   So patience (necessarily; I don't have another IP number from my
-   provider) has an IP address on a different subnet than freedom and
-   insight, but needs to use freedom as an Internet gateway.  Worse, most
-   DOS networking programs, including LANMAN, have braindead networking
-   schemes that rely completely on the netmask and a 'default gateway' to
-   determine how to route packets.  This means that to get to freedom or
-   insight, patience WILL send through its default gateway, regardless of
-   the fact that both freedom and insight (courtesy of the arc0e device)
-   could understand a direct transmission.
-   
-   I compensate by giving freedom an extra IP address - aliased 'gatekeeper'
-   - that is on my private subnet, the same subnet that patience is on.  I
-   then define gatekeeper to be the default gateway for patience.
-   
-   To configure freedom (in addition to the commands above):
-   	ifconfig arc0e gatekeeper
-   	route add gatekeeper arc0e
-   	route add patience arc0e
-   
-   This way, freedom will send all packets for patience through arc0e,
-   giving its IP address as gatekeeper (on the private subnet).  When it
-   talks to insight or the Internet, it will use its "freedom" Internet IP
-   address.
-   
-   You will notice that we haven't configured the arc0e device on insight. 
-   This would work, but is not really necessary, and would require me to
-   assign insight another special IP number from my private subnet.  Since
-   both insight and patience are using freedom as their default gateway, the
-   two can already talk to each other.
-   
-   It's quite fortunate that I set things up like this the first time (cough
-   cough) because it's really handy when I boot insight into DOS.  There, it
-   runs the Novell ODI protocol stack, which only works with RFC1201 ARCnet. 
-   In this mode it would be impossible for insight to communicate directly
-   with patience, since the Novell stack is incompatible with Microsoft's
-   Ethernet-Encap.  Without changing any settings on freedom or patience, I
-   simply set freedom as the default gateway for insight (now in DOS,
-   remember) and all the forwarding happens "automagically" between the two
-   hosts that would normally not be able to communicate at all.
-   
-   For those who like diagrams, I have created two "virtual subnets" on the
-   same physical ARCnet wire.  You can picture it like this:
-   
-                                                    
-          [RFC1201 NETWORK]                   [ETHER-ENCAP NETWORK]
-      (registered Internet subnet)           (RFC1597 private subnet)
-  
-                             (IP Masquerade)
-          /---------------\         *            /---------------\
-          |               |         *            |               |
-          |               +-Freedom-*-Gatekeeper-+               |
-          |               |    |    *            |               |
-          \-------+-------/    |    *            \-------+-------/
-                  |            |                         |
-               Insight         |                      Patience
-                           (Internet)
-
-
-
-It works: what now?
--------------------
-
-Send mail describing your setup, preferably including driver version, kernel
-version, ARCnet card model, CPU type, number of systems on your network, and
-list of software in use to me at the following address:
-	apenwarr@worldvisions.ca
-
-I do send (sometimes automated) replies to all messages I receive.  My email
-can be weird (and also usually gets forwarded all over the place along the
-way to me), so if you don't get a reply within a reasonable time, please
-resend.
-
-
-It doesn't work: what now?
---------------------------
-
-Do the same as above, but also include the output of the ifconfig and route
-commands, as well as any pertinent log entries (ie. anything that starts
-with "arcnet:" and has shown up since the last reboot) in your mail.
-
-If you want to try fixing it yourself (I strongly recommend that you mail me
-about the problem first, since it might already have been solved) you may
-want to try some of the debug levels available.  For heavy testing on
-D_DURING or more, it would be a REALLY good idea to kill your klogd daemon
-first!  D_DURING displays 4-5 lines for each packet sent or received.  D_TX,
-D_RX, and D_SKB actually DISPLAY each packet as it is sent or received,
-which is obviously quite big.
-
-Starting with v2.40 ALPHA, the autoprobe routines have changed
-significantly.  In particular, they won't tell you why the card was not
-found unless you turn on the D_INIT_REASONS debugging flag.
-
-Once the driver is running, you can run the arcdump shell script (available
-from me or in the full ARCnet package, if you have it) as root to list the
-contents of the arcnet buffers at any time.  To make any sense at all out of
-this, you should grab the pertinent RFCs. (some are listed near the top of
-arcnet.c).  arcdump assumes your card is at 0xD0000.  If it isn't, edit the
-script.
-
-Buffers 0 and 1 are used for receiving, and Buffers 2 and 3 are for sending. 
-Ping-pong buffers are implemented both ways.
-
-If your debug level includes D_DURING and you did NOT define SLOW_XMIT_COPY,
-the buffers are cleared to a constant value of 0x42 every time the card is
-reset (which should only happen when you do an ifconfig up, or when Linux
-decides that the driver is broken).  During a transmit, unused parts of the
-buffer will be cleared to 0x42 as well.  This is to make it easier to figure
-out which bytes are being used by a packet.
-
-You can change the debug level without recompiling the kernel by typing:
-	ifconfig arc0 down metric 1xxx
-	/etc/rc.d/rc.inet1
-where "xxx" is the debug level you want.  For example, "metric 1015" would put
-you at debug level 15.  Debug level 7 is currently the default.
-
-Note that the debug level is (starting with v1.90 ALPHA) a binary
-combination of different debug flags; so debug level 7 is really 1+2+4 or
-D_NORMAL+D_EXTRA+D_INIT.  To include D_DURING, you would add 16 to this,
-resulting in debug level 23.
-
-If you don't understand that, you probably don't want to know anyway. 
-E-mail me about your problem.
-
-
-I want to send money: what now?
--------------------------------
-
-Go take a nap or something.  You'll feel better in the morning.
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 5da18e024fcb..3e0a4bb23ef9 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -40,6 +40,7 @@ Contents:
    6pack
    altera_tse
    arcnet-hardware
+   arcnet
 
 .. only::  subproject and html
 
diff --git a/drivers/net/arcnet/Kconfig b/drivers/net/arcnet/Kconfig
index 27551bf3d7e4..43eef60653b2 100644
--- a/drivers/net/arcnet/Kconfig
+++ b/drivers/net/arcnet/Kconfig
@@ -9,7 +9,7 @@ menuconfig ARCNET
 	---help---
 	  If you have a network card of this type, say Y and check out the
 	  (arguably) beautiful poetry in
-	  <file:Documentation/networking/arcnet.txt>.
+	  <file:Documentation/networking/arcnet.rst>.
 
 	  You need both this driver, and the driver for the particular ARCnet
 	  chipset of your card. If you don't know, then it's probably a
@@ -28,7 +28,7 @@ config ARCNET_1201
 	  arc0 device.  You need to say Y here to communicate with
 	  industry-standard RFC1201 implementations, like the arcether.com
 	  packet driver or most DOS/Windows ODI drivers.  Please read the
-	  ARCnet documentation in <file:Documentation/networking/arcnet.txt>
+	  ARCnet documentation in <file:Documentation/networking/arcnet.rst>
 	  for more information about using arc0.
 
 config ARCNET_1051
@@ -42,7 +42,7 @@ config ARCNET_1051
 	  industry-standard RFC1201 implementations, like the arcether.com
 	  packet driver or most DOS/Windows ODI drivers. RFC1201 is included
 	  automatically as the arc0 device. Please read the ARCnet
-	  documentation in <file:Documentation/networking/arcnet.txt> for more
+	  documentation in <file:Documentation/networking/arcnet.rst> for more
 	  information about using arc0e and arc0s.
 
 config ARCNET_RAW
-- 
cgit v1.2.3-59-g8ed1b


From ff2269f16a1e1a7f8bbe72920d3d285ba3943572 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 28 Apr 2020 00:01:21 +0200
Subject: docs: networking: convert atm.txt to ReST

There isn't much to be done here. Just:

- add SPDX header;
- add a document title.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/atm.rst   | 14 ++++++++++++++
 Documentation/networking/atm.txt   |  8 --------
 Documentation/networking/index.rst |  1 +
 net/atm/Kconfig                    |  2 +-
 4 files changed, 16 insertions(+), 9 deletions(-)
 create mode 100644 Documentation/networking/atm.rst
 delete mode 100644 Documentation/networking/atm.txt

diff --git a/Documentation/networking/atm.rst b/Documentation/networking/atm.rst
new file mode 100644
index 000000000000..c1df8c038525
--- /dev/null
+++ b/Documentation/networking/atm.rst
@@ -0,0 +1,14 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===
+ATM
+===
+
+In order to use anything but the most primitive functions of ATM,
+several user-mode programs are required to assist the kernel. These
+programs and related material can be found via the ATM on Linux Web
+page at http://linux-atm.sourceforge.net/
+
+If you encounter problems with ATM, please report them on the ATM
+on Linux mailing list. Subscription information, archives, etc.,
+can be found on http://linux-atm.sourceforge.net/
diff --git a/Documentation/networking/atm.txt b/Documentation/networking/atm.txt
deleted file mode 100644
index 82921cee77fe..000000000000
--- a/Documentation/networking/atm.txt
+++ /dev/null
@@ -1,8 +0,0 @@
-In order to use anything but the most primitive functions of ATM,
-several user-mode programs are required to assist the kernel. These
-programs and related material can be found via the ATM on Linux Web
-page at http://linux-atm.sourceforge.net/
-
-If you encounter problems with ATM, please report them on the ATM
-on Linux mailing list. Subscription information, archives, etc.,
-can be found on http://linux-atm.sourceforge.net/
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 3e0a4bb23ef9..841f3c3905d5 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -41,6 +41,7 @@ Contents:
    altera_tse
    arcnet-hardware
    arcnet
+   atm
 
 .. only::  subproject and html
 
diff --git a/net/atm/Kconfig b/net/atm/Kconfig
index 271f682e8438..e61dcc9f85b2 100644
--- a/net/atm/Kconfig
+++ b/net/atm/Kconfig
@@ -16,7 +16,7 @@ config ATM
 	  of your ATM card below.
 
 	  Note that you need a set of user-space programs to actually make use
-	  of ATM.  See the file <file:Documentation/networking/atm.txt> for
+	  of ATM.  See the file <file:Documentation/networking/atm.rst> for
 	  further details.
 
 config ATM_CLIP
-- 
cgit v1.2.3-59-g8ed1b


From 20b943f075574c233de51fa2f0124a97f0298be1 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 28 Apr 2020 00:01:22 +0200
Subject: docs: networking: convert ax25.txt to ReST

There isn't much to be done here. Just:

- add SPDX header;
- add a document title.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ax25.rst  | 16 ++++++++++++++++
 Documentation/networking/ax25.txt  | 10 ----------
 Documentation/networking/index.rst |  1 +
 net/ax25/Kconfig                   |  6 +++---
 4 files changed, 20 insertions(+), 13 deletions(-)
 create mode 100644 Documentation/networking/ax25.rst
 delete mode 100644 Documentation/networking/ax25.txt

diff --git a/Documentation/networking/ax25.rst b/Documentation/networking/ax25.rst
new file mode 100644
index 000000000000..824afd7002db
--- /dev/null
+++ b/Documentation/networking/ax25.rst
@@ -0,0 +1,16 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====
+AX.25
+=====
+
+To use the amateur radio protocols within Linux you will need to get a
+suitable copy of the AX.25 Utilities. More detailed information about
+AX.25, NET/ROM and ROSE, associated programs and and utilities can be
+found on http://www.linux-ax25.org.
+
+There is an active mailing list for discussing Linux amateur radio matters
+called linux-hams@vger.kernel.org. To subscribe to it, send a message to
+majordomo@vger.kernel.org with the words "subscribe linux-hams" in the body
+of the message, the subject field is ignored.  You don't need to be
+subscribed to post but of course that means you might miss an answer.
diff --git a/Documentation/networking/ax25.txt b/Documentation/networking/ax25.txt
deleted file mode 100644
index 8257dbf9be57..000000000000
--- a/Documentation/networking/ax25.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-To use the amateur radio protocols within Linux you will need to get a
-suitable copy of the AX.25 Utilities. More detailed information about
-AX.25, NET/ROM and ROSE, associated programs and and utilities can be
-found on http://www.linux-ax25.org.
-
-There is an active mailing list for discussing Linux amateur radio matters
-called linux-hams@vger.kernel.org. To subscribe to it, send a message to
-majordomo@vger.kernel.org with the words "subscribe linux-hams" in the body
-of the message, the subject field is ignored.  You don't need to be
-subscribed to post but of course that means you might miss an answer.
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 841f3c3905d5..6a5858b27cf6 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -42,6 +42,7 @@ Contents:
    arcnet-hardware
    arcnet
    atm
+   ax25
 
 .. only::  subproject and html
 
diff --git a/net/ax25/Kconfig b/net/ax25/Kconfig
index 043fd5437809..97d686d115c0 100644
--- a/net/ax25/Kconfig
+++ b/net/ax25/Kconfig
@@ -40,7 +40,7 @@ config AX25
 	  radio as well as information about how to configure an AX.25 port is
 	  contained in the AX25-HOWTO, available from
 	  <http://www.tldp.org/docs.html#howto>. You might also want to
-	  check out the file <file:Documentation/networking/ax25.txt> in the
+	  check out the file <file:Documentation/networking/ax25.rst> in the
 	  kernel source. More information about digital amateur radio in
 	  general is on the WWW at
 	  <http://www.tapr.org/>.
@@ -88,7 +88,7 @@ config NETROM
 	  users as well as information about how to configure an AX.25 port is
 	  contained in the Linux Ham Wiki, available from
 	  <http://www.linux-ax25.org>. You also might want to check out the
-	  file <file:Documentation/networking/ax25.txt>. More information about
+	  file <file:Documentation/networking/ax25.rst>. More information about
 	  digital amateur radio in general is on the WWW at
 	  <http://www.tapr.org/>.
 
@@ -107,7 +107,7 @@ config ROSE
 	  users as well as information about how to configure an AX.25 port is
 	  contained in the Linux Ham Wiki, available from
 	  <http://www.linux-ax25.org>.  You also might want to check out the
-	  file <file:Documentation/networking/ax25.txt>. More information about
+	  file <file:Documentation/networking/ax25.rst>. More information about
 	  digital amateur radio in general is on the WWW at
 	  <http://www.tapr.org/>.
 
-- 
cgit v1.2.3-59-g8ed1b


From b5fcf32d7d4b647c0f3aa612d91d25996a49bcd9 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 28 Apr 2020 00:01:23 +0200
Subject: docs: networking: convert baycom.txt to ReST

- add SPDX header;
- adjust titles and chapters, adding proper markups;
- mark code blocks and literals as such;
- mark tables as such;
- adjust identation, whitespaces and blank lines;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/baycom.rst | 174 ++++++++++++++++++++++++++++++++++++
 Documentation/networking/baycom.txt | 158 --------------------------------
 Documentation/networking/index.rst  |   1 +
 drivers/net/hamradio/Kconfig        |   8 +-
 4 files changed, 179 insertions(+), 162 deletions(-)
 create mode 100644 Documentation/networking/baycom.rst
 delete mode 100644 Documentation/networking/baycom.txt

diff --git a/Documentation/networking/baycom.rst b/Documentation/networking/baycom.rst
new file mode 100644
index 000000000000..fe2d010f0e86
--- /dev/null
+++ b/Documentation/networking/baycom.rst
@@ -0,0 +1,174 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============================
+Linux Drivers for Baycom Modems
+===============================
+
+Thomas M. Sailer, HB9JNX/AE4WA, <sailer@ife.ee.ethz.ch>
+
+The drivers for the baycom modems have been split into
+separate drivers as they did not share any code, and the driver
+and device names have changed.
+
+This document describes the Linux Kernel Drivers for simple Baycom style
+amateur radio modems.
+
+The following drivers are available:
+====================================
+
+baycom_ser_fdx:
+  This driver supports the SER12 modems either full or half duplex.
+  Its baud rate may be changed via the ``baud`` module parameter,
+  therefore it supports just about every bit bang modem on a
+  serial port. Its devices are called bcsf0 through bcsf3.
+  This is the recommended driver for SER12 type modems,
+  however if you have a broken UART clone that does not have working
+  delta status bits, you may try baycom_ser_hdx.
+
+baycom_ser_hdx:
+  This is an alternative driver for SER12 type modems.
+  It only supports half duplex, and only 1200 baud. Its devices
+  are called bcsh0 through bcsh3. Use this driver only if baycom_ser_fdx
+  does not work with your UART.
+
+baycom_par:
+  This driver supports the par96 and picpar modems.
+  Its devices are called bcp0 through bcp3.
+
+baycom_epp:
+  This driver supports the EPP modem.
+  Its devices are called bce0 through bce3.
+  This driver is work-in-progress.
+
+The following modems are supported:
+
+======= ========================================================================
+ser12   This is a very simple 1200 baud AFSK modem. The modem consists only
+	of a modulator/demodulator chip, usually a TI TCM3105. The computer
+	is responsible for regenerating the receiver bit clock, as well as
+	for handling the HDLC protocol. The modem connects to a serial port,
+	hence the name. Since the serial port is not used as an async serial
+	port, the kernel driver for serial ports cannot be used, and this
+	driver only supports standard serial hardware (8250, 16450, 16550)
+
+par96   This is a modem for 9600 baud FSK compatible to the G3RUH standard.
+	The modem does all the filtering and regenerates the receiver clock.
+	Data is transferred from and to the PC via a shift register.
+	The shift register is filled with 16 bits and an interrupt is signalled.
+	The PC then empties the shift register in a burst. This modem connects
+	to the parallel port, hence the name. The modem leaves the
+	implementation of the HDLC protocol and the scrambler polynomial to
+	the PC.
+
+picpar  This is a redesign of the par96 modem by Henning Rech, DF9IC. The modem
+	is protocol compatible to par96, but uses only three low power ICs
+	and can therefore be fed from the parallel port and does not require
+	an additional power supply. Furthermore, it incorporates a carrier
+	detect circuitry.
+
+EPP     This is a high-speed modem adaptor that connects to an enhanced parallel
+	port.
+
+	Its target audience is users working over a high speed hub (76.8kbit/s).
+
+eppfpga This is a redesign of the EPP adaptor.
+======= ========================================================================
+
+All of the above modems only support half duplex communications. However,
+the driver supports the KISS (see below) fullduplex command. It then simply
+starts to send as soon as there's a packet to transmit and does not care
+about DCD, i.e. it starts to send even if there's someone else on the channel.
+This command is required by some implementations of the DAMA channel
+access protocol.
+
+
+The Interface of the drivers
+============================
+
+Unlike previous drivers, these drivers are no longer character devices,
+but they are now true kernel network interfaces. Installation is therefore
+simple. Once installed, four interfaces named bc{sf,sh,p,e}[0-3] are available.
+sethdlc from the ax25 utilities may be used to set driver states etc.
+Users of userland AX.25 stacks may use the net2kiss utility (also available
+in the ax25 utilities package) to convert packets of a network interface
+to a KISS stream on a pseudo tty. There's also a patch available from
+me for WAMPES which allows attaching a kernel network interface directly.
+
+
+Configuring the driver
+======================
+
+Every time a driver is inserted into the kernel, it has to know which
+modems it should access at which ports. This can be done with the setbaycom
+utility. If you are only using one modem, you can also configure the
+driver from the insmod command line (or by means of an option line in
+``/etc/modprobe.d/*.conf``).
+
+Examples::
+
+  modprobe baycom_ser_fdx mode="ser12*" iobase=0x3f8 irq=4
+  sethdlc -i bcsf0 -p mode "ser12*" io 0x3f8 irq 4
+
+Both lines configure the first port to drive a ser12 modem at the first
+serial port (COM1 under DOS). The * in the mode parameter instructs the driver
+to use the software DCD algorithm (see below)::
+
+  insmod baycom_par mode="picpar" iobase=0x378
+  sethdlc -i bcp0 -p mode "picpar" io 0x378
+
+Both lines configure the first port to drive a picpar modem at the
+first parallel port (LPT1 under DOS). (Note: picpar implies
+hardware DCD, par96 implies software DCD).
+
+The channel access parameters can be set with sethdlc -a or kissparms.
+Note that both utilities interpret the values slightly differently.
+
+
+Hardware DCD versus Software DCD
+================================
+
+To avoid collisions on the air, the driver must know when the channel is
+busy. This is the task of the DCD circuitry/software. The driver may either
+utilise a software DCD algorithm (options=1) or use a DCD signal from
+the hardware (options=0).
+
+======= =================================================================
+ser12   if software DCD is utilised, the radio's squelch should always be
+	open. It is highly recommended to use the software DCD algorithm,
+	as it is much faster than most hardware squelch circuitry. The
+	disadvantage is a slightly higher load on the system.
+
+par96   the software DCD algorithm for this type of modem is rather poor.
+	The modem simply does not provide enough information to implement
+	a reasonable DCD algorithm in software. Therefore, if your radio
+	feeds the DCD input of the PAR96 modem, the use of the hardware
+	DCD circuitry is recommended.
+
+picpar  the picpar modem features a builtin DCD hardware, which is highly
+	recommended.
+======= =================================================================
+
+
+
+Compatibility with the rest of the Linux kernel
+===============================================
+
+The serial driver and the baycom serial drivers compete
+for the same hardware resources. Of course only one driver can access a given
+interface at a time. The serial driver grabs all interfaces it can find at
+startup time. Therefore the baycom drivers subsequently won't be able to
+access a serial port. You might therefore find it necessary to release
+a port owned by the serial driver with 'setserial /dev/ttyS# uart none', where
+# is the number of the interface. The baycom drivers do not reserve any
+ports at startup, unless one is specified on the 'insmod' command line. Another
+method to solve the problem is to compile all drivers as modules and
+leave it to kmod to load the correct driver depending on the application.
+
+The parallel port drivers (baycom_par, baycom_epp) now use the parport subsystem
+to arbitrate the ports between different client drivers.
+
+vy 73s de
+
+Tom Sailer, sailer@ife.ee.ethz.ch
+
+hb9jnx @ hb9w.ampr.org
diff --git a/Documentation/networking/baycom.txt b/Documentation/networking/baycom.txt
deleted file mode 100644
index 688f18fd4467..000000000000
--- a/Documentation/networking/baycom.txt
+++ /dev/null
@@ -1,158 +0,0 @@
-		    LINUX DRIVERS FOR BAYCOM MODEMS
-
-       Thomas M. Sailer, HB9JNX/AE4WA, <sailer@ife.ee.ethz.ch>
-
-!!NEW!! (04/98) The drivers for the baycom modems have been split into
-separate drivers as they did not share any code, and the driver
-and device names have changed.
-
-This document describes the Linux Kernel Drivers for simple Baycom style
-amateur radio modems. 
-
-The following drivers are available:
-
-baycom_ser_fdx:
-  This driver supports the SER12 modems either full or half duplex.
-  Its baud rate may be changed via the `baud' module parameter,
-  therefore it supports just about every bit bang modem on a
-  serial port. Its devices are called bcsf0 through bcsf3.
-  This is the recommended driver for SER12 type modems,
-  however if you have a broken UART clone that does not have working
-  delta status bits, you may try baycom_ser_hdx. 
-
-baycom_ser_hdx: 
-  This is an alternative driver for SER12 type modems.
-  It only supports half duplex, and only 1200 baud. Its devices
-  are called bcsh0 through bcsh3. Use this driver only if baycom_ser_fdx
-  does not work with your UART.
-
-baycom_par:
-  This driver supports the par96 and picpar modems.
-  Its devices are called bcp0 through bcp3.
-
-baycom_epp:
-  This driver supports the EPP modem.
-  Its devices are called bce0 through bce3.
-  This driver is work-in-progress.
-
-The following modems are supported:
-
-ser12:  This is a very simple 1200 baud AFSK modem. The modem consists only
-        of a modulator/demodulator chip, usually a TI TCM3105. The computer
-        is responsible for regenerating the receiver bit clock, as well as
-        for handling the HDLC protocol. The modem connects to a serial port,
-        hence the name. Since the serial port is not used as an async serial
-        port, the kernel driver for serial ports cannot be used, and this
-        driver only supports standard serial hardware (8250, 16450, 16550)
-
-par96:  This is a modem for 9600 baud FSK compatible to the G3RUH standard.
-        The modem does all the filtering and regenerates the receiver clock.
-        Data is transferred from and to the PC via a shift register.
-        The shift register is filled with 16 bits and an interrupt is signalled.
-        The PC then empties the shift register in a burst. This modem connects
-        to the parallel port, hence the name. The modem leaves the 
-        implementation of the HDLC protocol and the scrambler polynomial to
-        the PC.
-
-picpar: This is a redesign of the par96 modem by Henning Rech, DF9IC. The modem
-        is protocol compatible to par96, but uses only three low power ICs
-        and can therefore be fed from the parallel port and does not require
-        an additional power supply. Furthermore, it incorporates a carrier
-        detect circuitry.
-
-EPP:    This is a high-speed modem adaptor that connects to an enhanced parallel port.
-        Its target audience is users working over a high speed hub (76.8kbit/s).
-
-eppfpga: This is a redesign of the EPP adaptor.
-
-
-
-All of the above modems only support half duplex communications. However,
-the driver supports the KISS (see below) fullduplex command. It then simply
-starts to send as soon as there's a packet to transmit and does not care
-about DCD, i.e. it starts to send even if there's someone else on the channel.
-This command is required by some implementations of the DAMA channel 
-access protocol.
-
-
-The Interface of the drivers
-
-Unlike previous drivers, these drivers are no longer character devices,
-but they are now true kernel network interfaces. Installation is therefore
-simple. Once installed, four interfaces named bc{sf,sh,p,e}[0-3] are available.
-sethdlc from the ax25 utilities may be used to set driver states etc.
-Users of userland AX.25 stacks may use the net2kiss utility (also available
-in the ax25 utilities package) to convert packets of a network interface
-to a KISS stream on a pseudo tty. There's also a patch available from
-me for WAMPES which allows attaching a kernel network interface directly.
-
-
-Configuring the driver
-
-Every time a driver is inserted into the kernel, it has to know which
-modems it should access at which ports. This can be done with the setbaycom
-utility. If you are only using one modem, you can also configure the
-driver from the insmod command line (or by means of an option line in
-/etc/modprobe.d/*.conf).
-
-Examples:
-  modprobe baycom_ser_fdx mode="ser12*" iobase=0x3f8 irq=4
-  sethdlc -i bcsf0 -p mode "ser12*" io 0x3f8 irq 4
-
-Both lines configure the first port to drive a ser12 modem at the first
-serial port (COM1 under DOS). The * in the mode parameter instructs the driver to use
-the software DCD algorithm (see below).
-
-  insmod baycom_par mode="picpar" iobase=0x378
-  sethdlc -i bcp0 -p mode "picpar" io 0x378
-
-Both lines configure the first port to drive a picpar modem at the
-first parallel port (LPT1 under DOS). (Note: picpar implies
-hardware DCD, par96 implies software DCD).
-
-The channel access parameters can be set with sethdlc -a or kissparms.
-Note that both utilities interpret the values slightly differently.
-
-
-Hardware DCD versus Software DCD
-
-To avoid collisions on the air, the driver must know when the channel is
-busy. This is the task of the DCD circuitry/software. The driver may either
-utilise a software DCD algorithm (options=1) or use a DCD signal from
-the hardware (options=0).
-
-ser12:  if software DCD is utilised, the radio's squelch should always be
-        open. It is highly recommended to use the software DCD algorithm,
-        as it is much faster than most hardware squelch circuitry. The
-        disadvantage is a slightly higher load on the system.
-
-par96:  the software DCD algorithm for this type of modem is rather poor.
-        The modem simply does not provide enough information to implement
-        a reasonable DCD algorithm in software. Therefore, if your radio
-        feeds the DCD input of the PAR96 modem, the use of the hardware
-        DCD circuitry is recommended.
-
-picpar: the picpar modem features a builtin DCD hardware, which is highly
-        recommended.
-
-
-
-Compatibility with the rest of the Linux kernel
-
-The serial driver and the baycom serial drivers compete
-for the same hardware resources. Of course only one driver can access a given
-interface at a time. The serial driver grabs all interfaces it can find at
-startup time. Therefore the baycom drivers subsequently won't be able to
-access a serial port. You might therefore find it necessary to release
-a port owned by the serial driver with 'setserial /dev/ttyS# uart none', where
-# is the number of the interface. The baycom drivers do not reserve any
-ports at startup, unless one is specified on the 'insmod' command line. Another
-method to solve the problem is to compile all drivers as modules and
-leave it to kmod to load the correct driver depending on the application.
-
-The parallel port drivers (baycom_par, baycom_epp) now use the parport subsystem
-to arbitrate the ports between different client drivers.
-
-vy 73s de
-Tom Sailer, sailer@ife.ee.ethz.ch
-hb9jnx @ hb9w.ampr.org
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 6a5858b27cf6..fbf845fbaff7 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -43,6 +43,7 @@ Contents:
    arcnet
    atm
    ax25
+   baycom
 
 .. only::  subproject and html
 
diff --git a/drivers/net/hamradio/Kconfig b/drivers/net/hamradio/Kconfig
index bf306fed04cc..fe409819b56d 100644
--- a/drivers/net/hamradio/Kconfig
+++ b/drivers/net/hamradio/Kconfig
@@ -127,7 +127,7 @@ config BAYCOM_SER_FDX
 	  your serial interface chip. To configure the driver, use the sethdlc
 	  utility available in the standard ax25 utilities package. For
 	  information on the modems, see <http://www.baycom.de/> and
-	  <file:Documentation/networking/baycom.txt>.
+	  <file:Documentation/networking/baycom.rst>.
 
 	  To compile this driver as a module, choose M here: the module
 	  will be called baycom_ser_fdx.  This is recommended.
@@ -145,7 +145,7 @@ config BAYCOM_SER_HDX
 	  the driver, use the sethdlc utility available in the standard ax25
 	  utilities package. For information on the modems, see
 	  <http://www.baycom.de/> and
-	  <file:Documentation/networking/baycom.txt>.
+	  <file:Documentation/networking/baycom.rst>.
 
 	  To compile this driver as a module, choose M here: the module
 	  will be called baycom_ser_hdx.  This is recommended.
@@ -160,7 +160,7 @@ config BAYCOM_PAR
 	  par96 designs. To configure the driver, use the sethdlc utility
 	  available in the standard ax25 utilities package. For information on
 	  the modems, see <http://www.baycom.de/> and the file
-	  <file:Documentation/networking/baycom.txt>.
+	  <file:Documentation/networking/baycom.rst>.
 
 	  To compile this driver as a module, choose M here: the module
 	  will be called baycom_par.  This is recommended.
@@ -175,7 +175,7 @@ config BAYCOM_EPP
 	  designs. To configure the driver, use the sethdlc utility available
 	  in the standard ax25 utilities package. For information on the
 	  modems, see <http://www.baycom.de/> and the file
-	  <file:Documentation/networking/baycom.txt>.
+	  <file:Documentation/networking/baycom.rst>.
 
 	  To compile this driver as a module, choose M here: the module
 	  will be called baycom_epp.  This is recommended.
-- 
cgit v1.2.3-59-g8ed1b


From a362032eca22d03071c4613f6ca503be982bf375 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 28 Apr 2020 00:01:24 +0200
Subject: docs: networking: convert bonding.txt to ReST

- add SPDX header;
- adjust titles and chapters, adding proper markups;
- comment out text-only TOC from html/pdf output;
- mark code blocks and literals as such;
- mark tables as such;
- add notes markups;
- adjust identation, whitespaces and blank lines;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/bonding.rst               | 2890 ++++++++++++++++++++
 Documentation/networking/bonding.txt               | 2837 -------------------
 .../networking/device_drivers/intel/e100.rst       |    2 +-
 .../networking/device_drivers/intel/ixgb.rst       |    2 +-
 Documentation/networking/index.rst                 |    1 +
 drivers/net/Kconfig                                |    2 +-
 6 files changed, 2894 insertions(+), 2840 deletions(-)
 create mode 100644 Documentation/networking/bonding.rst
 delete mode 100644 Documentation/networking/bonding.txt

diff --git a/Documentation/networking/bonding.rst b/Documentation/networking/bonding.rst
new file mode 100644
index 000000000000..dd49f95d28d3
--- /dev/null
+++ b/Documentation/networking/bonding.rst
@@ -0,0 +1,2890 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================================
+Linux Ethernet Bonding Driver HOWTO
+===================================
+
+Latest update: 27 April 2011
+
+Initial release: Thomas Davis <tadavis at lbl.gov>
+
+Corrections, HA extensions: 2000/10/03-15:
+
+  - Willy Tarreau <willy at meta-x.org>
+  - Constantine Gavrilov <const-g at xpert.com>
+  - Chad N. Tindel <ctindel at ieee dot org>
+  - Janice Girouard <girouard at us dot ibm dot com>
+  - Jay Vosburgh <fubar at us dot ibm dot com>
+
+Reorganized and updated Feb 2005 by Jay Vosburgh
+Added Sysfs information: 2006/04/24
+
+  - Mitch Williams <mitch.a.williams at intel.com>
+
+Introduction
+============
+
+The Linux bonding driver provides a method for aggregating
+multiple network interfaces into a single logical "bonded" interface.
+The behavior of the bonded interfaces depends upon the mode; generally
+speaking, modes provide either hot standby or load balancing services.
+Additionally, link integrity monitoring may be performed.
+
+The bonding driver originally came from Donald Becker's
+beowulf patches for kernel 2.0. It has changed quite a bit since, and
+the original tools from extreme-linux and beowulf sites will not work
+with this version of the driver.
+
+For new versions of the driver, updated userspace tools, and
+who to ask for help, please follow the links at the end of this file.
+
+.. Table of Contents
+
+   1. Bonding Driver Installation
+
+   2. Bonding Driver Options
+
+   3. Configuring Bonding Devices
+   3.1	Configuration with Sysconfig Support
+   3.1.1		Using DHCP with Sysconfig
+   3.1.2		Configuring Multiple Bonds with Sysconfig
+   3.2	Configuration with Initscripts Support
+   3.2.1		Using DHCP with Initscripts
+   3.2.2		Configuring Multiple Bonds with Initscripts
+   3.3	Configuring Bonding Manually with Ifenslave
+   3.3.1		Configuring Multiple Bonds Manually
+   3.4	Configuring Bonding Manually via Sysfs
+   3.5	Configuration with Interfaces Support
+   3.6	Overriding Configuration for Special Cases
+   3.7 Configuring LACP for 802.3ad mode in a more secure way
+
+   4. Querying Bonding Configuration
+   4.1	Bonding Configuration
+   4.2	Network Configuration
+
+   5. Switch Configuration
+
+   6. 802.1q VLAN Support
+
+   7. Link Monitoring
+   7.1	ARP Monitor Operation
+   7.2	Configuring Multiple ARP Targets
+   7.3	MII Monitor Operation
+
+   8. Potential Trouble Sources
+   8.1	Adventures in Routing
+   8.2	Ethernet Device Renaming
+   8.3	Painfully Slow Or No Failed Link Detection By Miimon
+
+   9. SNMP agents
+
+   10. Promiscuous mode
+
+   11. Configuring Bonding for High Availability
+   11.1	High Availability in a Single Switch Topology
+   11.2	High Availability in a Multiple Switch Topology
+   11.2.1		HA Bonding Mode Selection for Multiple Switch Topology
+   11.2.2		HA Link Monitoring for Multiple Switch Topology
+
+   12. Configuring Bonding for Maximum Throughput
+   12.1	Maximum Throughput in a Single Switch Topology
+   12.1.1		MT Bonding Mode Selection for Single Switch Topology
+   12.1.2		MT Link Monitoring for Single Switch Topology
+   12.2	Maximum Throughput in a Multiple Switch Topology
+   12.2.1		MT Bonding Mode Selection for Multiple Switch Topology
+   12.2.2		MT Link Monitoring for Multiple Switch Topology
+
+   13. Switch Behavior Issues
+   13.1	Link Establishment and Failover Delays
+   13.2	Duplicated Incoming Packets
+
+   14. Hardware Specific Considerations
+   14.1	IBM BladeCenter
+
+   15. Frequently Asked Questions
+
+   16. Resources and Links
+
+
+1. Bonding Driver Installation
+==============================
+
+Most popular distro kernels ship with the bonding driver
+already available as a module. If your distro does not, or you
+have need to compile bonding from source (e.g., configuring and
+installing a mainline kernel from kernel.org), you'll need to perform
+the following steps:
+
+1.1 Configure and build the kernel with bonding
+-----------------------------------------------
+
+The current version of the bonding driver is available in the
+drivers/net/bonding subdirectory of the most recent kernel source
+(which is available on http://kernel.org).  Most users "rolling their
+own" will want to use the most recent kernel from kernel.org.
+
+Configure kernel with "make menuconfig" (or "make xconfig" or
+"make config"), then select "Bonding driver support" in the "Network
+device support" section.  It is recommended that you configure the
+driver as module since it is currently the only way to pass parameters
+to the driver or configure more than one bonding device.
+
+Build and install the new kernel and modules.
+
+1.2 Bonding Control Utility
+---------------------------
+
+It is recommended to configure bonding via iproute2 (netlink)
+or sysfs, the old ifenslave control utility is obsolete.
+
+2. Bonding Driver Options
+=========================
+
+Options for the bonding driver are supplied as parameters to the
+bonding module at load time, or are specified via sysfs.
+
+Module options may be given as command line arguments to the
+insmod or modprobe command, but are usually specified in either the
+``/etc/modprobe.d/*.conf`` configuration files, or in a distro-specific
+configuration file (some of which are detailed in the next section).
+
+Details on bonding support for sysfs is provided in the
+"Configuring Bonding Manually via Sysfs" section, below.
+
+The available bonding driver parameters are listed below. If a
+parameter is not specified the default value is used.  When initially
+configuring a bond, it is recommended "tail -f /var/log/messages" be
+run in a separate window to watch for bonding driver error messages.
+
+It is critical that either the miimon or arp_interval and
+arp_ip_target parameters be specified, otherwise serious network
+degradation will occur during link failures.  Very few devices do not
+support at least miimon, so there is really no reason not to use it.
+
+Options with textual values will accept either the text name
+or, for backwards compatibility, the option value.  E.g.,
+"mode=802.3ad" and "mode=4" set the same mode.
+
+The parameters are as follows:
+
+active_slave
+
+	Specifies the new active slave for modes that support it
+	(active-backup, balance-alb and balance-tlb).  Possible values
+	are the name of any currently enslaved interface, or an empty
+	string.  If a name is given, the slave and its link must be up in order
+	to be selected as the new active slave.  If an empty string is
+	specified, the current active slave is cleared, and a new active
+	slave is selected automatically.
+
+	Note that this is only available through the sysfs interface. No module
+	parameter by this name exists.
+
+	The normal value of this option is the name of the currently
+	active slave, or the empty string if there is no active slave or
+	the current mode does not use an active slave.
+
+ad_actor_sys_prio
+
+	In an AD system, this specifies the system priority. The allowed range
+	is 1 - 65535. If the value is not specified, it takes 65535 as the
+	default value.
+
+	This parameter has effect only in 802.3ad mode and is available through
+	SysFs interface.
+
+ad_actor_system
+
+	In an AD system, this specifies the mac-address for the actor in
+	protocol packet exchanges (LACPDUs). The value cannot be NULL or
+	multicast. It is preferred to have the local-admin bit set for this
+	mac but driver does not enforce it. If the value is not given then
+	system defaults to using the masters' mac address as actors' system
+	address.
+
+	This parameter has effect only in 802.3ad mode and is available through
+	SysFs interface.
+
+ad_select
+
+	Specifies the 802.3ad aggregation selection logic to use.  The
+	possible values and their effects are:
+
+	stable or 0
+
+		The active aggregator is chosen by largest aggregate
+		bandwidth.
+
+		Reselection of the active aggregator occurs only when all
+		slaves of the active aggregator are down or the active
+		aggregator has no slaves.
+
+		This is the default value.
+
+	bandwidth or 1
+
+		The active aggregator is chosen by largest aggregate
+		bandwidth.  Reselection occurs if:
+
+		- A slave is added to or removed from the bond
+
+		- Any slave's link state changes
+
+		- Any slave's 802.3ad association state changes
+
+		- The bond's administrative state changes to up
+
+	count or 2
+
+		The active aggregator is chosen by the largest number of
+		ports (slaves).  Reselection occurs as described under the
+		"bandwidth" setting, above.
+
+	The bandwidth and count selection policies permit failover of
+	802.3ad aggregations when partial failure of the active aggregator
+	occurs.  This keeps the aggregator with the highest availability
+	(either in bandwidth or in number of ports) active at all times.
+
+	This option was added in bonding version 3.4.0.
+
+ad_user_port_key
+
+	In an AD system, the port-key has three parts as shown below -
+
+	   =====  ============
+	   Bits   Use
+	   =====  ============
+	   00     Duplex
+	   01-05  Speed
+	   06-15  User-defined
+	   =====  ============
+
+	This defines the upper 10 bits of the port key. The values can be
+	from 0 - 1023. If not given, the system defaults to 0.
+
+	This parameter has effect only in 802.3ad mode and is available through
+	SysFs interface.
+
+all_slaves_active
+
+	Specifies that duplicate frames (received on inactive ports) should be
+	dropped (0) or delivered (1).
+
+	Normally, bonding will drop duplicate frames (received on inactive
+	ports), which is desirable for most users. But there are some times
+	it is nice to allow duplicate frames to be delivered.
+
+	The default value is 0 (drop duplicate frames received on inactive
+	ports).
+
+arp_interval
+
+	Specifies the ARP link monitoring frequency in milliseconds.
+
+	The ARP monitor works by periodically checking the slave
+	devices to determine whether they have sent or received
+	traffic recently (the precise criteria depends upon the
+	bonding mode, and the state of the slave).  Regular traffic is
+	generated via ARP probes issued for the addresses specified by
+	the arp_ip_target option.
+
+	This behavior can be modified by the arp_validate option,
+	below.
+
+	If ARP monitoring is used in an etherchannel compatible mode
+	(modes 0 and 2), the switch should be configured in a mode
+	that evenly distributes packets across all links. If the
+	switch is configured to distribute the packets in an XOR
+	fashion, all replies from the ARP targets will be received on
+	the same link which could cause the other team members to
+	fail.  ARP monitoring should not be used in conjunction with
+	miimon.  A value of 0 disables ARP monitoring.  The default
+	value is 0.
+
+arp_ip_target
+
+	Specifies the IP addresses to use as ARP monitoring peers when
+	arp_interval is > 0.  These are the targets of the ARP request
+	sent to determine the health of the link to the targets.
+	Specify these values in ddd.ddd.ddd.ddd format.  Multiple IP
+	addresses must be separated by a comma.  At least one IP
+	address must be given for ARP monitoring to function.  The
+	maximum number of targets that can be specified is 16.  The
+	default value is no IP addresses.
+
+arp_validate
+
+	Specifies whether or not ARP probes and replies should be
+	validated in any mode that supports arp monitoring, or whether
+	non-ARP traffic should be filtered (disregarded) for link
+	monitoring purposes.
+
+	Possible values are:
+
+	none or 0
+
+		No validation or filtering is performed.
+
+	active or 1
+
+		Validation is performed only for the active slave.
+
+	backup or 2
+
+		Validation is performed only for backup slaves.
+
+	all or 3
+
+		Validation is performed for all slaves.
+
+	filter or 4
+
+		Filtering is applied to all slaves. No validation is
+		performed.
+
+	filter_active or 5
+
+		Filtering is applied to all slaves, validation is performed
+		only for the active slave.
+
+	filter_backup or 6
+
+		Filtering is applied to all slaves, validation is performed
+		only for backup slaves.
+
+	Validation:
+
+	Enabling validation causes the ARP monitor to examine the incoming
+	ARP requests and replies, and only consider a slave to be up if it
+	is receiving the appropriate ARP traffic.
+
+	For an active slave, the validation checks ARP replies to confirm
+	that they were generated by an arp_ip_target.  Since backup slaves
+	do not typically receive these replies, the validation performed
+	for backup slaves is on the broadcast ARP request sent out via the
+	active slave.  It is possible that some switch or network
+	configurations may result in situations wherein the backup slaves
+	do not receive the ARP requests; in such a situation, validation
+	of backup slaves must be disabled.
+
+	The validation of ARP requests on backup slaves is mainly helping
+	bonding to decide which slaves are more likely to work in case of
+	the active slave failure, it doesn't really guarantee that the
+	backup slave will work if it's selected as the next active slave.
+
+	Validation is useful in network configurations in which multiple
+	bonding hosts are concurrently issuing ARPs to one or more targets
+	beyond a common switch.  Should the link between the switch and
+	target fail (but not the switch itself), the probe traffic
+	generated by the multiple bonding instances will fool the standard
+	ARP monitor into considering the links as still up.  Use of
+	validation can resolve this, as the ARP monitor will only consider
+	ARP requests and replies associated with its own instance of
+	bonding.
+
+	Filtering:
+
+	Enabling filtering causes the ARP monitor to only use incoming ARP
+	packets for link availability purposes.  Arriving packets that are
+	not ARPs are delivered normally, but do not count when determining
+	if a slave is available.
+
+	Filtering operates by only considering the reception of ARP
+	packets (any ARP packet, regardless of source or destination) when
+	determining if a slave has received traffic for link availability
+	purposes.
+
+	Filtering is useful in network configurations in which significant
+	levels of third party broadcast traffic would fool the standard
+	ARP monitor into considering the links as still up.  Use of
+	filtering can resolve this, as only ARP traffic is considered for
+	link availability purposes.
+
+	This option was added in bonding version 3.1.0.
+
+arp_all_targets
+
+	Specifies the quantity of arp_ip_targets that must be reachable
+	in order for the ARP monitor to consider a slave as being up.
+	This option affects only active-backup mode for slaves with
+	arp_validation enabled.
+
+	Possible values are:
+
+	any or 0
+
+		consider the slave up only when any of the arp_ip_targets
+		is reachable
+
+	all or 1
+
+		consider the slave up only when all of the arp_ip_targets
+		are reachable
+
+downdelay
+
+	Specifies the time, in milliseconds, to wait before disabling
+	a slave after a link failure has been detected.  This option
+	is only valid for the miimon link monitor.  The downdelay
+	value should be a multiple of the miimon value; if not, it
+	will be rounded down to the nearest multiple.  The default
+	value is 0.
+
+fail_over_mac
+
+	Specifies whether active-backup mode should set all slaves to
+	the same MAC address at enslavement (the traditional
+	behavior), or, when enabled, perform special handling of the
+	bond's MAC address in accordance with the selected policy.
+
+	Possible values are:
+
+	none or 0
+
+		This setting disables fail_over_mac, and causes
+		bonding to set all slaves of an active-backup bond to
+		the same MAC address at enslavement time.  This is the
+		default.
+
+	active or 1
+
+		The "active" fail_over_mac policy indicates that the
+		MAC address of the bond should always be the MAC
+		address of the currently active slave.  The MAC
+		address of the slaves is not changed; instead, the MAC
+		address of the bond changes during a failover.
+
+		This policy is useful for devices that cannot ever
+		alter their MAC address, or for devices that refuse
+		incoming broadcasts with their own source MAC (which
+		interferes with the ARP monitor).
+
+		The down side of this policy is that every device on
+		the network must be updated via gratuitous ARP,
+		vs. just updating a switch or set of switches (which
+		often takes place for any traffic, not just ARP
+		traffic, if the switch snoops incoming traffic to
+		update its tables) for the traditional method.  If the
+		gratuitous ARP is lost, communication may be
+		disrupted.
+
+		When this policy is used in conjunction with the mii
+		monitor, devices which assert link up prior to being
+		able to actually transmit and receive are particularly
+		susceptible to loss of the gratuitous ARP, and an
+		appropriate updelay setting may be required.
+
+	follow or 2
+
+		The "follow" fail_over_mac policy causes the MAC
+		address of the bond to be selected normally (normally
+		the MAC address of the first slave added to the bond).
+		However, the second and subsequent slaves are not set
+		to this MAC address while they are in a backup role; a
+		slave is programmed with the bond's MAC address at
+		failover time (and the formerly active slave receives
+		the newly active slave's MAC address).
+
+		This policy is useful for multiport devices that
+		either become confused or incur a performance penalty
+		when multiple ports are programmed with the same MAC
+		address.
+
+
+	The default policy is none, unless the first slave cannot
+	change its MAC address, in which case the active policy is
+	selected by default.
+
+	This option may be modified via sysfs only when no slaves are
+	present in the bond.
+
+	This option was added in bonding version 3.2.0.  The "follow"
+	policy was added in bonding version 3.3.0.
+
+lacp_rate
+
+	Option specifying the rate in which we'll ask our link partner
+	to transmit LACPDU packets in 802.3ad mode.  Possible values
+	are:
+
+	slow or 0
+		Request partner to transmit LACPDUs every 30 seconds
+
+	fast or 1
+		Request partner to transmit LACPDUs every 1 second
+
+	The default is slow.
+
+max_bonds
+
+	Specifies the number of bonding devices to create for this
+	instance of the bonding driver.  E.g., if max_bonds is 3, and
+	the bonding driver is not already loaded, then bond0, bond1
+	and bond2 will be created.  The default value is 1.  Specifying
+	a value of 0 will load bonding, but will not create any devices.
+
+miimon
+
+	Specifies the MII link monitoring frequency in milliseconds.
+	This determines how often the link state of each slave is
+	inspected for link failures.  A value of zero disables MII
+	link monitoring.  A value of 100 is a good starting point.
+	The use_carrier option, below, affects how the link state is
+	determined.  See the High Availability section for additional
+	information.  The default value is 0.
+
+min_links
+
+	Specifies the minimum number of links that must be active before
+	asserting carrier. It is similar to the Cisco EtherChannel min-links
+	feature. This allows setting the minimum number of member ports that
+	must be up (link-up state) before marking the bond device as up
+	(carrier on). This is useful for situations where higher level services
+	such as clustering want to ensure a minimum number of low bandwidth
+	links are active before switchover. This option only affect 802.3ad
+	mode.
+
+	The default value is 0. This will cause carrier to be asserted (for
+	802.3ad mode) whenever there is an active aggregator, regardless of the
+	number of available links in that aggregator. Note that, because an
+	aggregator cannot be active without at least one available link,
+	setting this option to 0 or to 1 has the exact same effect.
+
+mode
+
+	Specifies one of the bonding policies. The default is
+	balance-rr (round robin).  Possible values are:
+
+	balance-rr or 0
+
+		Round-robin policy: Transmit packets in sequential
+		order from the first available slave through the
+		last.  This mode provides load balancing and fault
+		tolerance.
+
+	active-backup or 1
+
+		Active-backup policy: Only one slave in the bond is
+		active.  A different slave becomes active if, and only
+		if, the active slave fails.  The bond's MAC address is
+		externally visible on only one port (network adapter)
+		to avoid confusing the switch.
+
+		In bonding version 2.6.2 or later, when a failover
+		occurs in active-backup mode, bonding will issue one
+		or more gratuitous ARPs on the newly active slave.
+		One gratuitous ARP is issued for the bonding master
+		interface and each VLAN interfaces configured above
+		it, provided that the interface has at least one IP
+		address configured.  Gratuitous ARPs issued for VLAN
+		interfaces are tagged with the appropriate VLAN id.
+
+		This mode provides fault tolerance.  The primary
+		option, documented below, affects the behavior of this
+		mode.
+
+	balance-xor or 2
+
+		XOR policy: Transmit based on the selected transmit
+		hash policy.  The default policy is a simple [(source
+		MAC address XOR'd with destination MAC address XOR
+		packet type ID) modulo slave count].  Alternate transmit
+		policies may be	selected via the xmit_hash_policy option,
+		described below.
+
+		This mode provides load balancing and fault tolerance.
+
+	broadcast or 3
+
+		Broadcast policy: transmits everything on all slave
+		interfaces.  This mode provides fault tolerance.
+
+	802.3ad or 4
+
+		IEEE 802.3ad Dynamic link aggregation.  Creates
+		aggregation groups that share the same speed and
+		duplex settings.  Utilizes all slaves in the active
+		aggregator according to the 802.3ad specification.
+
+		Slave selection for outgoing traffic is done according
+		to the transmit hash policy, which may be changed from
+		the default simple XOR policy via the xmit_hash_policy
+		option, documented below.  Note that not all transmit
+		policies may be 802.3ad compliant, particularly in
+		regards to the packet mis-ordering requirements of
+		section 43.2.4 of the 802.3ad standard.  Differing
+		peer implementations will have varying tolerances for
+		noncompliance.
+
+		Prerequisites:
+
+		1. Ethtool support in the base drivers for retrieving
+		the speed and duplex of each slave.
+
+		2. A switch that supports IEEE 802.3ad Dynamic link
+		aggregation.
+
+		Most switches will require some type of configuration
+		to enable 802.3ad mode.
+
+	balance-tlb or 5
+
+		Adaptive transmit load balancing: channel bonding that
+		does not require any special switch support.
+
+		In tlb_dynamic_lb=1 mode; the outgoing traffic is
+		distributed according to the current load (computed
+		relative to the speed) on each slave.
+
+		In tlb_dynamic_lb=0 mode; the load balancing based on
+		current load is disabled and the load is distributed
+		only using the hash distribution.
+
+		Incoming traffic is received by the current slave.
+		If the receiving slave fails, another slave takes over
+		the MAC address of the failed receiving slave.
+
+		Prerequisite:
+
+		Ethtool support in the base drivers for retrieving the
+		speed of each slave.
+
+	balance-alb or 6
+
+		Adaptive load balancing: includes balance-tlb plus
+		receive load balancing (rlb) for IPV4 traffic, and
+		does not require any special switch support.  The
+		receive load balancing is achieved by ARP negotiation.
+		The bonding driver intercepts the ARP Replies sent by
+		the local system on their way out and overwrites the
+		source hardware address with the unique hardware
+		address of one of the slaves in the bond such that
+		different peers use different hardware addresses for
+		the server.
+
+		Receive traffic from connections created by the server
+		is also balanced.  When the local system sends an ARP
+		Request the bonding driver copies and saves the peer's
+		IP information from the ARP packet.  When the ARP
+		Reply arrives from the peer, its hardware address is
+		retrieved and the bonding driver initiates an ARP
+		reply to this peer assigning it to one of the slaves
+		in the bond.  A problematic outcome of using ARP
+		negotiation for balancing is that each time that an
+		ARP request is broadcast it uses the hardware address
+		of the bond.  Hence, peers learn the hardware address
+		of the bond and the balancing of receive traffic
+		collapses to the current slave.  This is handled by
+		sending updates (ARP Replies) to all the peers with
+		their individually assigned hardware address such that
+		the traffic is redistributed.  Receive traffic is also
+		redistributed when a new slave is added to the bond
+		and when an inactive slave is re-activated.  The
+		receive load is distributed sequentially (round robin)
+		among the group of highest speed slaves in the bond.
+
+		When a link is reconnected or a new slave joins the
+		bond the receive traffic is redistributed among all
+		active slaves in the bond by initiating ARP Replies
+		with the selected MAC address to each of the
+		clients. The updelay parameter (detailed below) must
+		be set to a value equal or greater than the switch's
+		forwarding delay so that the ARP Replies sent to the
+		peers will not be blocked by the switch.
+
+		Prerequisites:
+
+		1. Ethtool support in the base drivers for retrieving
+		the speed of each slave.
+
+		2. Base driver support for setting the hardware
+		address of a device while it is open.  This is
+		required so that there will always be one slave in the
+		team using the bond hardware address (the
+		curr_active_slave) while having a unique hardware
+		address for each slave in the bond.  If the
+		curr_active_slave fails its hardware address is
+		swapped with the new curr_active_slave that was
+		chosen.
+
+num_grat_arp,
+num_unsol_na
+
+	Specify the number of peer notifications (gratuitous ARPs and
+	unsolicited IPv6 Neighbor Advertisements) to be issued after a
+	failover event.  As soon as the link is up on the new slave
+	(possibly immediately) a peer notification is sent on the
+	bonding device and each VLAN sub-device. This is repeated at
+	the rate specified by peer_notif_delay if the number is
+	greater than 1.
+
+	The valid range is 0 - 255; the default value is 1.  These options
+	affect only the active-backup mode.  These options were added for
+	bonding versions 3.3.0 and 3.4.0 respectively.
+
+	From Linux 3.0 and bonding version 3.7.1, these notifications
+	are generated by the ipv4 and ipv6 code and the numbers of
+	repetitions cannot be set independently.
+
+packets_per_slave
+
+	Specify the number of packets to transmit through a slave before
+	moving to the next one. When set to 0 then a slave is chosen at
+	random.
+
+	The valid range is 0 - 65535; the default value is 1. This option
+	has effect only in balance-rr mode.
+
+peer_notif_delay
+
+	Specify the delay, in milliseconds, between each peer
+	notification (gratuitous ARP and unsolicited IPv6 Neighbor
+	Advertisement) when they are issued after a failover event.
+	This delay should be a multiple of the link monitor interval
+	(arp_interval or miimon, whichever is active). The default
+	value is 0 which means to match the value of the link monitor
+	interval.
+
+primary
+
+	A string (eth0, eth2, etc) specifying which slave is the
+	primary device.  The specified device will always be the
+	active slave while it is available.  Only when the primary is
+	off-line will alternate devices be used.  This is useful when
+	one slave is preferred over another, e.g., when one slave has
+	higher throughput than another.
+
+	The primary option is only valid for active-backup(1),
+	balance-tlb (5) and balance-alb (6) mode.
+
+primary_reselect
+
+	Specifies the reselection policy for the primary slave.  This
+	affects how the primary slave is chosen to become the active slave
+	when failure of the active slave or recovery of the primary slave
+	occurs.  This option is designed to prevent flip-flopping between
+	the primary slave and other slaves.  Possible values are:
+
+	always or 0 (default)
+
+		The primary slave becomes the active slave whenever it
+		comes back up.
+
+	better or 1
+
+		The primary slave becomes the active slave when it comes
+		back up, if the speed and duplex of the primary slave is
+		better than the speed and duplex of the current active
+		slave.
+
+	failure or 2
+
+		The primary slave becomes the active slave only if the
+		current active slave fails and the primary slave is up.
+
+	The primary_reselect setting is ignored in two cases:
+
+		If no slaves are active, the first slave to recover is
+		made the active slave.
+
+		When initially enslaved, the primary slave is always made
+		the active slave.
+
+	Changing the primary_reselect policy via sysfs will cause an
+	immediate selection of the best active slave according to the new
+	policy.  This may or may not result in a change of the active
+	slave, depending upon the circumstances.
+
+	This option was added for bonding version 3.6.0.
+
+tlb_dynamic_lb
+
+	Specifies if dynamic shuffling of flows is enabled in tlb
+	mode. The value has no effect on any other modes.
+
+	The default behavior of tlb mode is to shuffle active flows across
+	slaves based on the load in that interval. This gives nice lb
+	characteristics but can cause packet reordering. If re-ordering is
+	a concern use this variable to disable flow shuffling and rely on
+	load balancing provided solely by the hash distribution.
+	xmit-hash-policy can be used to select the appropriate hashing for
+	the setup.
+
+	The sysfs entry can be used to change the setting per bond device
+	and the initial value is derived from the module parameter. The
+	sysfs entry is allowed to be changed only if the bond device is
+	down.
+
+	The default value is "1" that enables flow shuffling while value "0"
+	disables it. This option was added in bonding driver 3.7.1
+
+
+updelay
+
+	Specifies the time, in milliseconds, to wait before enabling a
+	slave after a link recovery has been detected.  This option is
+	only valid for the miimon link monitor.  The updelay value
+	should be a multiple of the miimon value; if not, it will be
+	rounded down to the nearest multiple.  The default value is 0.
+
+use_carrier
+
+	Specifies whether or not miimon should use MII or ETHTOOL
+	ioctls vs. netif_carrier_ok() to determine the link
+	status. The MII or ETHTOOL ioctls are less efficient and
+	utilize a deprecated calling sequence within the kernel.  The
+	netif_carrier_ok() relies on the device driver to maintain its
+	state with netif_carrier_on/off; at this writing, most, but
+	not all, device drivers support this facility.
+
+	If bonding insists that the link is up when it should not be,
+	it may be that your network device driver does not support
+	netif_carrier_on/off.  The default state for netif_carrier is
+	"carrier on," so if a driver does not support netif_carrier,
+	it will appear as if the link is always up.  In this case,
+	setting use_carrier to 0 will cause bonding to revert to the
+	MII / ETHTOOL ioctl method to determine the link state.
+
+	A value of 1 enables the use of netif_carrier_ok(), a value of
+	0 will use the deprecated MII / ETHTOOL ioctls.  The default
+	value is 1.
+
+xmit_hash_policy
+
+	Selects the transmit hash policy to use for slave selection in
+	balance-xor, 802.3ad, and tlb modes.  Possible values are:
+
+	layer2
+
+		Uses XOR of hardware MAC addresses and packet type ID
+		field to generate the hash. The formula is
+
+		hash = source MAC XOR destination MAC XOR packet type ID
+		slave number = hash modulo slave count
+
+		This algorithm will place all traffic to a particular
+		network peer on the same slave.
+
+		This algorithm is 802.3ad compliant.
+
+	layer2+3
+
+		This policy uses a combination of layer2 and layer3
+		protocol information to generate the hash.
+
+		Uses XOR of hardware MAC addresses and IP addresses to
+		generate the hash.  The formula is
+
+		hash = source MAC XOR destination MAC XOR packet type ID
+		hash = hash XOR source IP XOR destination IP
+		hash = hash XOR (hash RSHIFT 16)
+		hash = hash XOR (hash RSHIFT 8)
+		And then hash is reduced modulo slave count.
+
+		If the protocol is IPv6 then the source and destination
+		addresses are first hashed using ipv6_addr_hash.
+
+		This algorithm will place all traffic to a particular
+		network peer on the same slave.  For non-IP traffic,
+		the formula is the same as for the layer2 transmit
+		hash policy.
+
+		This policy is intended to provide a more balanced
+		distribution of traffic than layer2 alone, especially
+		in environments where a layer3 gateway device is
+		required to reach most destinations.
+
+		This algorithm is 802.3ad compliant.
+
+	layer3+4
+
+		This policy uses upper layer protocol information,
+		when available, to generate the hash.  This allows for
+		traffic to a particular network peer to span multiple
+		slaves, although a single connection will not span
+		multiple slaves.
+
+		The formula for unfragmented TCP and UDP packets is
+
+		hash = source port, destination port (as in the header)
+		hash = hash XOR source IP XOR destination IP
+		hash = hash XOR (hash RSHIFT 16)
+		hash = hash XOR (hash RSHIFT 8)
+		And then hash is reduced modulo slave count.
+
+		If the protocol is IPv6 then the source and destination
+		addresses are first hashed using ipv6_addr_hash.
+
+		For fragmented TCP or UDP packets and all other IPv4 and
+		IPv6 protocol traffic, the source and destination port
+		information is omitted.  For non-IP traffic, the
+		formula is the same as for the layer2 transmit hash
+		policy.
+
+		This algorithm is not fully 802.3ad compliant.  A
+		single TCP or UDP conversation containing both
+		fragmented and unfragmented packets will see packets
+		striped across two interfaces.  This may result in out
+		of order delivery.  Most traffic types will not meet
+		this criteria, as TCP rarely fragments traffic, and
+		most UDP traffic is not involved in extended
+		conversations.  Other implementations of 802.3ad may
+		or may not tolerate this noncompliance.
+
+	encap2+3
+
+		This policy uses the same formula as layer2+3 but it
+		relies on skb_flow_dissect to obtain the header fields
+		which might result in the use of inner headers if an
+		encapsulation protocol is used. For example this will
+		improve the performance for tunnel users because the
+		packets will be distributed according to the encapsulated
+		flows.
+
+	encap3+4
+
+		This policy uses the same formula as layer3+4 but it
+		relies on skb_flow_dissect to obtain the header fields
+		which might result in the use of inner headers if an
+		encapsulation protocol is used. For example this will
+		improve the performance for tunnel users because the
+		packets will be distributed according to the encapsulated
+		flows.
+
+	The default value is layer2.  This option was added in bonding
+	version 2.6.3.  In earlier versions of bonding, this parameter
+	does not exist, and the layer2 policy is the only policy.  The
+	layer2+3 value was added for bonding version 3.2.2.
+
+resend_igmp
+
+	Specifies the number of IGMP membership reports to be issued after
+	a failover event. One membership report is issued immediately after
+	the failover, subsequent packets are sent in each 200ms interval.
+
+	The valid range is 0 - 255; the default value is 1. A value of 0
+	prevents the IGMP membership report from being issued in response
+	to the failover event.
+
+	This option is useful for bonding modes balance-rr (0), active-backup
+	(1), balance-tlb (5) and balance-alb (6), in which a failover can
+	switch the IGMP traffic from one slave to another.  Therefore a fresh
+	IGMP report must be issued to cause the switch to forward the incoming
+	IGMP traffic over the newly selected slave.
+
+	This option was added for bonding version 3.7.0.
+
+lp_interval
+
+	Specifies the number of seconds between instances where the bonding
+	driver sends learning packets to each slaves peer switch.
+
+	The valid range is 1 - 0x7fffffff; the default value is 1. This Option
+	has effect only in balance-tlb and balance-alb modes.
+
+3. Configuring Bonding Devices
+==============================
+
+You can configure bonding using either your distro's network
+initialization scripts, or manually using either iproute2 or the
+sysfs interface.  Distros generally use one of three packages for the
+network initialization scripts: initscripts, sysconfig or interfaces.
+Recent versions of these packages have support for bonding, while older
+versions do not.
+
+We will first describe the options for configuring bonding for
+distros using versions of initscripts, sysconfig and interfaces with full
+or partial support for bonding, then provide information on enabling
+bonding without support from the network initialization scripts (i.e.,
+older versions of initscripts or sysconfig).
+
+If you're unsure whether your distro uses sysconfig,
+initscripts or interfaces, or don't know if it's new enough, have no fear.
+Determining this is fairly straightforward.
+
+First, look for a file called interfaces in /etc/network directory.
+If this file is present in your system, then your system use interfaces. See
+Configuration with Interfaces Support.
+
+Else, issue the command::
+
+	$ rpm -qf /sbin/ifup
+
+It will respond with a line of text starting with either
+"initscripts" or "sysconfig," followed by some numbers.  This is the
+package that provides your network initialization scripts.
+
+Next, to determine if your installation supports bonding,
+issue the command::
+
+    $ grep ifenslave /sbin/ifup
+
+If this returns any matches, then your initscripts or
+sysconfig has support for bonding.
+
+3.1 Configuration with Sysconfig Support
+----------------------------------------
+
+This section applies to distros using a version of sysconfig
+with bonding support, for example, SuSE Linux Enterprise Server 9.
+
+SuSE SLES 9's networking configuration system does support
+bonding, however, at this writing, the YaST system configuration
+front end does not provide any means to work with bonding devices.
+Bonding devices can be managed by hand, however, as follows.
+
+First, if they have not already been configured, configure the
+slave devices.  On SLES 9, this is most easily done by running the
+yast2 sysconfig configuration utility.  The goal is for to create an
+ifcfg-id file for each slave device.  The simplest way to accomplish
+this is to configure the devices for DHCP (this is only to get the
+file ifcfg-id file created; see below for some issues with DHCP).  The
+name of the configuration file for each device will be of the form::
+
+    ifcfg-id-xx:xx:xx:xx:xx:xx
+
+Where the "xx" portion will be replaced with the digits from
+the device's permanent MAC address.
+
+Once the set of ifcfg-id-xx:xx:xx:xx:xx:xx files has been
+created, it is necessary to edit the configuration files for the slave
+devices (the MAC addresses correspond to those of the slave devices).
+Before editing, the file will contain multiple lines, and will look
+something like this::
+
+	BOOTPROTO='dhcp'
+	STARTMODE='on'
+	USERCTL='no'
+	UNIQUE='XNzu.WeZGOGF+4wE'
+	_nm_name='bus-pci-0001:61:01.0'
+
+Change the BOOTPROTO and STARTMODE lines to the following::
+
+	BOOTPROTO='none'
+	STARTMODE='off'
+
+Do not alter the UNIQUE or _nm_name lines.  Remove any other
+lines (USERCTL, etc).
+
+Once the ifcfg-id-xx:xx:xx:xx:xx:xx files have been modified,
+it's time to create the configuration file for the bonding device
+itself.  This file is named ifcfg-bondX, where X is the number of the
+bonding device to create, starting at 0.  The first such file is
+ifcfg-bond0, the second is ifcfg-bond1, and so on.  The sysconfig
+network configuration system will correctly start multiple instances
+of bonding.
+
+The contents of the ifcfg-bondX file is as follows::
+
+	BOOTPROTO="static"
+	BROADCAST="10.0.2.255"
+	IPADDR="10.0.2.10"
+	NETMASK="255.255.0.0"
+	NETWORK="10.0.2.0"
+	REMOTE_IPADDR=""
+	STARTMODE="onboot"
+	BONDING_MASTER="yes"
+	BONDING_MODULE_OPTS="mode=active-backup miimon=100"
+	BONDING_SLAVE0="eth0"
+	BONDING_SLAVE1="bus-pci-0000:06:08.1"
+
+Replace the sample BROADCAST, IPADDR, NETMASK and NETWORK
+values with the appropriate values for your network.
+
+The STARTMODE specifies when the device is brought online.
+The possible values are:
+
+	======== ======================================================
+	onboot	 The device is started at boot time.  If you're not
+		 sure, this is probably what you want.
+
+	manual	 The device is started only when ifup is called
+		 manually.  Bonding devices may be configured this
+		 way if you do not wish them to start automatically
+		 at boot for some reason.
+
+	hotplug  The device is started by a hotplug event.  This is not
+		 a valid choice for a bonding device.
+
+	off or   The device configuration is ignored.
+	ignore
+	======== ======================================================
+
+The line BONDING_MASTER='yes' indicates that the device is a
+bonding master device.  The only useful value is "yes."
+
+The contents of BONDING_MODULE_OPTS are supplied to the
+instance of the bonding module for this device.  Specify the options
+for the bonding mode, link monitoring, and so on here.  Do not include
+the max_bonds bonding parameter; this will confuse the configuration
+system if you have multiple bonding devices.
+
+Finally, supply one BONDING_SLAVEn="slave device" for each
+slave.  where "n" is an increasing value, one for each slave.  The
+"slave device" is either an interface name, e.g., "eth0", or a device
+specifier for the network device.  The interface name is easier to
+find, but the ethN names are subject to change at boot time if, e.g.,
+a device early in the sequence has failed.  The device specifiers
+(bus-pci-0000:06:08.1 in the example above) specify the physical
+network device, and will not change unless the device's bus location
+changes (for example, it is moved from one PCI slot to another).  The
+example above uses one of each type for demonstration purposes; most
+configurations will choose one or the other for all slave devices.
+
+When all configuration files have been modified or created,
+networking must be restarted for the configuration changes to take
+effect.  This can be accomplished via the following::
+
+	# /etc/init.d/network restart
+
+Note that the network control script (/sbin/ifdown) will
+remove the bonding module as part of the network shutdown processing,
+so it is not necessary to remove the module by hand if, e.g., the
+module parameters have changed.
+
+Also, at this writing, YaST/YaST2 will not manage bonding
+devices (they do not show bonding interfaces on its list of network
+devices).  It is necessary to edit the configuration file by hand to
+change the bonding configuration.
+
+Additional general options and details of the ifcfg file
+format can be found in an example ifcfg template file::
+
+	/etc/sysconfig/network/ifcfg.template
+
+Note that the template does not document the various ``BONDING_*``
+settings described above, but does describe many of the other options.
+
+3.1.1 Using DHCP with Sysconfig
+-------------------------------
+
+Under sysconfig, configuring a device with BOOTPROTO='dhcp'
+will cause it to query DHCP for its IP address information.  At this
+writing, this does not function for bonding devices; the scripts
+attempt to obtain the device address from DHCP prior to adding any of
+the slave devices.  Without active slaves, the DHCP requests are not
+sent to the network.
+
+3.1.2 Configuring Multiple Bonds with Sysconfig
+-----------------------------------------------
+
+The sysconfig network initialization system is capable of
+handling multiple bonding devices.  All that is necessary is for each
+bonding instance to have an appropriately configured ifcfg-bondX file
+(as described above).  Do not specify the "max_bonds" parameter to any
+instance of bonding, as this will confuse sysconfig.  If you require
+multiple bonding devices with identical parameters, create multiple
+ifcfg-bondX files.
+
+Because the sysconfig scripts supply the bonding module
+options in the ifcfg-bondX file, it is not necessary to add them to
+the system ``/etc/modules.d/*.conf`` configuration files.
+
+3.2 Configuration with Initscripts Support
+------------------------------------------
+
+This section applies to distros using a recent version of
+initscripts with bonding support, for example, Red Hat Enterprise Linux
+version 3 or later, Fedora, etc.  On these systems, the network
+initialization scripts have knowledge of bonding, and can be configured to
+control bonding devices.  Note that older versions of the initscripts
+package have lower levels of support for bonding; this will be noted where
+applicable.
+
+These distros will not automatically load the network adapter
+driver unless the ethX device is configured with an IP address.
+Because of this constraint, users must manually configure a
+network-script file for all physical adapters that will be members of
+a bondX link.  Network script files are located in the directory:
+
+/etc/sysconfig/network-scripts
+
+The file name must be prefixed with "ifcfg-eth" and suffixed
+with the adapter's physical adapter number.  For example, the script
+for eth0 would be named /etc/sysconfig/network-scripts/ifcfg-eth0.
+Place the following text in the file::
+
+	DEVICE=eth0
+	USERCTL=no
+	ONBOOT=yes
+	MASTER=bond0
+	SLAVE=yes
+	BOOTPROTO=none
+
+The DEVICE= line will be different for every ethX device and
+must correspond with the name of the file, i.e., ifcfg-eth1 must have
+a device line of DEVICE=eth1.  The setting of the MASTER= line will
+also depend on the final bonding interface name chosen for your bond.
+As with other network devices, these typically start at 0, and go up
+one for each device, i.e., the first bonding instance is bond0, the
+second is bond1, and so on.
+
+Next, create a bond network script.  The file name for this
+script will be /etc/sysconfig/network-scripts/ifcfg-bondX where X is
+the number of the bond.  For bond0 the file is named "ifcfg-bond0",
+for bond1 it is named "ifcfg-bond1", and so on.  Within that file,
+place the following text::
+
+	DEVICE=bond0
+	IPADDR=192.168.1.1
+	NETMASK=255.255.255.0
+	NETWORK=192.168.1.0
+	BROADCAST=192.168.1.255
+	ONBOOT=yes
+	BOOTPROTO=none
+	USERCTL=no
+
+Be sure to change the networking specific lines (IPADDR,
+NETMASK, NETWORK and BROADCAST) to match your network configuration.
+
+For later versions of initscripts, such as that found with Fedora
+7 (or later) and Red Hat Enterprise Linux version 5 (or later), it is possible,
+and, indeed, preferable, to specify the bonding options in the ifcfg-bond0
+file, e.g. a line of the format::
+
+  BONDING_OPTS="mode=active-backup arp_interval=60 arp_ip_target=192.168.1.254"
+
+will configure the bond with the specified options.  The options
+specified in BONDING_OPTS are identical to the bonding module parameters
+except for the arp_ip_target field when using versions of initscripts older
+than and 8.57 (Fedora 8) and 8.45.19 (Red Hat Enterprise Linux 5.2).  When
+using older versions each target should be included as a separate option and
+should be preceded by a '+' to indicate it should be added to the list of
+queried targets, e.g.,::
+
+    arp_ip_target=+192.168.1.1 arp_ip_target=+192.168.1.2
+
+is the proper syntax to specify multiple targets.  When specifying
+options via BONDING_OPTS, it is not necessary to edit
+``/etc/modprobe.d/*.conf``.
+
+For even older versions of initscripts that do not support
+BONDING_OPTS, it is necessary to edit /etc/modprobe.d/*.conf, depending upon
+your distro) to load the bonding module with your desired options when the
+bond0 interface is brought up.  The following lines in /etc/modprobe.d/*.conf
+will load the bonding module, and select its options:
+
+	alias bond0 bonding
+	options bond0 mode=balance-alb miimon=100
+
+Replace the sample parameters with the appropriate set of
+options for your configuration.
+
+Finally run "/etc/rc.d/init.d/network restart" as root.  This
+will restart the networking subsystem and your bond link should be now
+up and running.
+
+3.2.1 Using DHCP with Initscripts
+---------------------------------
+
+Recent versions of initscripts (the versions supplied with Fedora
+Core 3 and Red Hat Enterprise Linux 4, or later versions, are reported to
+work) have support for assigning IP information to bonding devices via
+DHCP.
+
+To configure bonding for DHCP, configure it as described
+above, except replace the line "BOOTPROTO=none" with "BOOTPROTO=dhcp"
+and add a line consisting of "TYPE=Bonding".  Note that the TYPE value
+is case sensitive.
+
+3.2.2 Configuring Multiple Bonds with Initscripts
+-------------------------------------------------
+
+Initscripts packages that are included with Fedora 7 and Red Hat
+Enterprise Linux 5 support multiple bonding interfaces by simply
+specifying the appropriate BONDING_OPTS= in ifcfg-bondX where X is the
+number of the bond.  This support requires sysfs support in the kernel,
+and a bonding driver of version 3.0.0 or later.  Other configurations may
+not support this method for specifying multiple bonding interfaces; for
+those instances, see the "Configuring Multiple Bonds Manually" section,
+below.
+
+3.3 Configuring Bonding Manually with iproute2
+-----------------------------------------------
+
+This section applies to distros whose network initialization
+scripts (the sysconfig or initscripts package) do not have specific
+knowledge of bonding.  One such distro is SuSE Linux Enterprise Server
+version 8.
+
+The general method for these systems is to place the bonding
+module parameters into a config file in /etc/modprobe.d/ (as
+appropriate for the installed distro), then add modprobe and/or
+`ip link` commands to the system's global init script.  The name of
+the global init script differs; for sysconfig, it is
+/etc/init.d/boot.local and for initscripts it is /etc/rc.d/rc.local.
+
+For example, if you wanted to make a simple bond of two e100
+devices (presumed to be eth0 and eth1), and have it persist across
+reboots, edit the appropriate file (/etc/init.d/boot.local or
+/etc/rc.d/rc.local), and add the following::
+
+	modprobe bonding mode=balance-alb miimon=100
+	modprobe e100
+	ifconfig bond0 192.168.1.1 netmask 255.255.255.0 up
+	ip link set eth0 master bond0
+	ip link set eth1 master bond0
+
+Replace the example bonding module parameters and bond0
+network configuration (IP address, netmask, etc) with the appropriate
+values for your configuration.
+
+Unfortunately, this method will not provide support for the
+ifup and ifdown scripts on the bond devices.  To reload the bonding
+configuration, it is necessary to run the initialization script, e.g.,::
+
+	# /etc/init.d/boot.local
+
+or::
+
+	# /etc/rc.d/rc.local
+
+It may be desirable in such a case to create a separate script
+which only initializes the bonding configuration, then call that
+separate script from within boot.local.  This allows for bonding to be
+enabled without re-running the entire global init script.
+
+To shut down the bonding devices, it is necessary to first
+mark the bonding device itself as being down, then remove the
+appropriate device driver modules.  For our example above, you can do
+the following::
+
+	# ifconfig bond0 down
+	# rmmod bonding
+	# rmmod e100
+
+Again, for convenience, it may be desirable to create a script
+with these commands.
+
+
+3.3.1 Configuring Multiple Bonds Manually
+-----------------------------------------
+
+This section contains information on configuring multiple
+bonding devices with differing options for those systems whose network
+initialization scripts lack support for configuring multiple bonds.
+
+If you require multiple bonding devices, but all with the same
+options, you may wish to use the "max_bonds" module parameter,
+documented above.
+
+To create multiple bonding devices with differing options, it is
+preferable to use bonding parameters exported by sysfs, documented in the
+section below.
+
+For versions of bonding without sysfs support, the only means to
+provide multiple instances of bonding with differing options is to load
+the bonding driver multiple times.  Note that current versions of the
+sysconfig network initialization scripts handle this automatically; if
+your distro uses these scripts, no special action is needed.  See the
+section Configuring Bonding Devices, above, if you're not sure about your
+network initialization scripts.
+
+To load multiple instances of the module, it is necessary to
+specify a different name for each instance (the module loading system
+requires that every loaded module, even multiple instances of the same
+module, have a unique name).  This is accomplished by supplying multiple
+sets of bonding options in ``/etc/modprobe.d/*.conf``, for example::
+
+	alias bond0 bonding
+	options bond0 -o bond0 mode=balance-rr miimon=100
+
+	alias bond1 bonding
+	options bond1 -o bond1 mode=balance-alb miimon=50
+
+will load the bonding module two times.  The first instance is
+named "bond0" and creates the bond0 device in balance-rr mode with an
+miimon of 100.  The second instance is named "bond1" and creates the
+bond1 device in balance-alb mode with an miimon of 50.
+
+In some circumstances (typically with older distributions),
+the above does not work, and the second bonding instance never sees
+its options.  In that case, the second options line can be substituted
+as follows::
+
+	install bond1 /sbin/modprobe --ignore-install bonding -o bond1 \
+				     mode=balance-alb miimon=50
+
+This may be repeated any number of times, specifying a new and
+unique name in place of bond1 for each subsequent instance.
+
+It has been observed that some Red Hat supplied kernels are unable
+to rename modules at load time (the "-o bond1" part).  Attempts to pass
+that option to modprobe will produce an "Operation not permitted" error.
+This has been reported on some Fedora Core kernels, and has been seen on
+RHEL 4 as well.  On kernels exhibiting this problem, it will be impossible
+to configure multiple bonds with differing parameters (as they are older
+kernels, and also lack sysfs support).
+
+3.4 Configuring Bonding Manually via Sysfs
+------------------------------------------
+
+Starting with version 3.0.0, Channel Bonding may be configured
+via the sysfs interface.  This interface allows dynamic configuration
+of all bonds in the system without unloading the module.  It also
+allows for adding and removing bonds at runtime.  Ifenslave is no
+longer required, though it is still supported.
+
+Use of the sysfs interface allows you to use multiple bonds
+with different configurations without having to reload the module.
+It also allows you to use multiple, differently configured bonds when
+bonding is compiled into the kernel.
+
+You must have the sysfs filesystem mounted to configure
+bonding this way.  The examples in this document assume that you
+are using the standard mount point for sysfs, e.g. /sys.  If your
+sysfs filesystem is mounted elsewhere, you will need to adjust the
+example paths accordingly.
+
+Creating and Destroying Bonds
+-----------------------------
+To add a new bond foo::
+
+	# echo +foo > /sys/class/net/bonding_masters
+
+To remove an existing bond bar::
+
+	# echo -bar > /sys/class/net/bonding_masters
+
+To show all existing bonds::
+
+	# cat /sys/class/net/bonding_masters
+
+.. note::
+
+   due to 4K size limitation of sysfs files, this list may be
+   truncated if you have more than a few hundred bonds.  This is unlikely
+   to occur under normal operating conditions.
+
+Adding and Removing Slaves
+--------------------------
+Interfaces may be enslaved to a bond using the file
+/sys/class/net/<bond>/bonding/slaves.  The semantics for this file
+are the same as for the bonding_masters file.
+
+To enslave interface eth0 to bond bond0::
+
+	# ifconfig bond0 up
+	# echo +eth0 > /sys/class/net/bond0/bonding/slaves
+
+To free slave eth0 from bond bond0::
+
+	# echo -eth0 > /sys/class/net/bond0/bonding/slaves
+
+When an interface is enslaved to a bond, symlinks between the
+two are created in the sysfs filesystem.  In this case, you would get
+/sys/class/net/bond0/slave_eth0 pointing to /sys/class/net/eth0, and
+/sys/class/net/eth0/master pointing to /sys/class/net/bond0.
+
+This means that you can tell quickly whether or not an
+interface is enslaved by looking for the master symlink.  Thus:
+# echo -eth0 > /sys/class/net/eth0/master/bonding/slaves
+will free eth0 from whatever bond it is enslaved to, regardless of
+the name of the bond interface.
+
+Changing a Bond's Configuration
+-------------------------------
+Each bond may be configured individually by manipulating the
+files located in /sys/class/net/<bond name>/bonding
+
+The names of these files correspond directly with the command-
+line parameters described elsewhere in this file, and, with the
+exception of arp_ip_target, they accept the same values.  To see the
+current setting, simply cat the appropriate file.
+
+A few examples will be given here; for specific usage
+guidelines for each parameter, see the appropriate section in this
+document.
+
+To configure bond0 for balance-alb mode::
+
+	# ifconfig bond0 down
+	# echo 6 > /sys/class/net/bond0/bonding/mode
+	- or -
+	# echo balance-alb > /sys/class/net/bond0/bonding/mode
+
+.. note::
+
+   The bond interface must be down before the mode can be changed.
+
+To enable MII monitoring on bond0 with a 1 second interval::
+
+	# echo 1000 > /sys/class/net/bond0/bonding/miimon
+
+.. note::
+
+   If ARP monitoring is enabled, it will disabled when MII
+   monitoring is enabled, and vice-versa.
+
+To add ARP targets::
+
+	# echo +192.168.0.100 > /sys/class/net/bond0/bonding/arp_ip_target
+	# echo +192.168.0.101 > /sys/class/net/bond0/bonding/arp_ip_target
+
+.. note::
+
+   up to 16 target addresses may be specified.
+
+To remove an ARP target::
+
+	# echo -192.168.0.100 > /sys/class/net/bond0/bonding/arp_ip_target
+
+To configure the interval between learning packet transmits::
+
+	# echo 12 > /sys/class/net/bond0/bonding/lp_interval
+
+.. note::
+
+   the lp_interval is the number of seconds between instances where
+   the bonding driver sends learning packets to each slaves peer switch.  The
+   default interval is 1 second.
+
+Example Configuration
+---------------------
+We begin with the same example that is shown in section 3.3,
+executed with sysfs, and without using ifenslave.
+
+To make a simple bond of two e100 devices (presumed to be eth0
+and eth1), and have it persist across reboots, edit the appropriate
+file (/etc/init.d/boot.local or /etc/rc.d/rc.local), and add the
+following::
+
+	modprobe bonding
+	modprobe e100
+	echo balance-alb > /sys/class/net/bond0/bonding/mode
+	ifconfig bond0 192.168.1.1 netmask 255.255.255.0 up
+	echo 100 > /sys/class/net/bond0/bonding/miimon
+	echo +eth0 > /sys/class/net/bond0/bonding/slaves
+	echo +eth1 > /sys/class/net/bond0/bonding/slaves
+
+To add a second bond, with two e1000 interfaces in
+active-backup mode, using ARP monitoring, add the following lines to
+your init script::
+
+	modprobe e1000
+	echo +bond1 > /sys/class/net/bonding_masters
+	echo active-backup > /sys/class/net/bond1/bonding/mode
+	ifconfig bond1 192.168.2.1 netmask 255.255.255.0 up
+	echo +192.168.2.100 /sys/class/net/bond1/bonding/arp_ip_target
+	echo 2000 > /sys/class/net/bond1/bonding/arp_interval
+	echo +eth2 > /sys/class/net/bond1/bonding/slaves
+	echo +eth3 > /sys/class/net/bond1/bonding/slaves
+
+3.5 Configuration with Interfaces Support
+-----------------------------------------
+
+This section applies to distros which use /etc/network/interfaces file
+to describe network interface configuration, most notably Debian and it's
+derivatives.
+
+The ifup and ifdown commands on Debian don't support bonding out of
+the box. The ifenslave-2.6 package should be installed to provide bonding
+support.  Once installed, this package will provide ``bond-*`` options
+to be used into /etc/network/interfaces.
+
+Note that ifenslave-2.6 package will load the bonding module and use
+the ifenslave command when appropriate.
+
+Example Configurations
+----------------------
+
+In /etc/network/interfaces, the following stanza will configure bond0, in
+active-backup mode, with eth0 and eth1 as slaves::
+
+	auto bond0
+	iface bond0 inet dhcp
+		bond-slaves eth0 eth1
+		bond-mode active-backup
+		bond-miimon 100
+		bond-primary eth0 eth1
+
+If the above configuration doesn't work, you might have a system using
+upstart for system startup. This is most notably true for recent
+Ubuntu versions. The following stanza in /etc/network/interfaces will
+produce the same result on those systems::
+
+	auto bond0
+	iface bond0 inet dhcp
+		bond-slaves none
+		bond-mode active-backup
+		bond-miimon 100
+
+	auto eth0
+	iface eth0 inet manual
+		bond-master bond0
+		bond-primary eth0 eth1
+
+	auto eth1
+	iface eth1 inet manual
+		bond-master bond0
+		bond-primary eth0 eth1
+
+For a full list of ``bond-*`` supported options in /etc/network/interfaces and
+some more advanced examples tailored to you particular distros, see the files in
+/usr/share/doc/ifenslave-2.6.
+
+3.6 Overriding Configuration for Special Cases
+----------------------------------------------
+
+When using the bonding driver, the physical port which transmits a frame is
+typically selected by the bonding driver, and is not relevant to the user or
+system administrator.  The output port is simply selected using the policies of
+the selected bonding mode.  On occasion however, it is helpful to direct certain
+classes of traffic to certain physical interfaces on output to implement
+slightly more complex policies.  For example, to reach a web server over a
+bonded interface in which eth0 connects to a private network, while eth1
+connects via a public network, it may be desirous to bias the bond to send said
+traffic over eth0 first, using eth1 only as a fall back, while all other traffic
+can safely be sent over either interface.  Such configurations may be achieved
+using the traffic control utilities inherent in linux.
+
+By default the bonding driver is multiqueue aware and 16 queues are created
+when the driver initializes (see Documentation/networking/multiqueue.txt
+for details).  If more or less queues are desired the module parameter
+tx_queues can be used to change this value.  There is no sysfs parameter
+available as the allocation is done at module init time.
+
+The output of the file /proc/net/bonding/bondX has changed so the output Queue
+ID is now printed for each slave::
+
+	Bonding Mode: fault-tolerance (active-backup)
+	Primary Slave: None
+	Currently Active Slave: eth0
+	MII Status: up
+	MII Polling Interval (ms): 0
+	Up Delay (ms): 0
+	Down Delay (ms): 0
+
+	Slave Interface: eth0
+	MII Status: up
+	Link Failure Count: 0
+	Permanent HW addr: 00:1a:a0:12:8f:cb
+	Slave queue ID: 0
+
+	Slave Interface: eth1
+	MII Status: up
+	Link Failure Count: 0
+	Permanent HW addr: 00:1a:a0:12:8f:cc
+	Slave queue ID: 2
+
+The queue_id for a slave can be set using the command::
+
+	# echo "eth1:2" > /sys/class/net/bond0/bonding/queue_id
+
+Any interface that needs a queue_id set should set it with multiple calls
+like the one above until proper priorities are set for all interfaces.  On
+distributions that allow configuration via initscripts, multiple 'queue_id'
+arguments can be added to BONDING_OPTS to set all needed slave queues.
+
+These queue id's can be used in conjunction with the tc utility to configure
+a multiqueue qdisc and filters to bias certain traffic to transmit on certain
+slave devices.  For instance, say we wanted, in the above configuration to
+force all traffic bound to 192.168.1.100 to use eth1 in the bond as its output
+device. The following commands would accomplish this::
+
+	# tc qdisc add dev bond0 handle 1 root multiq
+
+	# tc filter add dev bond0 protocol ip parent 1: prio 1 u32 match ip \
+		dst 192.168.1.100 action skbedit queue_mapping 2
+
+These commands tell the kernel to attach a multiqueue queue discipline to the
+bond0 interface and filter traffic enqueued to it, such that packets with a dst
+ip of 192.168.1.100 have their output queue mapping value overwritten to 2.
+This value is then passed into the driver, causing the normal output path
+selection policy to be overridden, selecting instead qid 2, which maps to eth1.
+
+Note that qid values begin at 1.  Qid 0 is reserved to initiate to the driver
+that normal output policy selection should take place.  One benefit to simply
+leaving the qid for a slave to 0 is the multiqueue awareness in the bonding
+driver that is now present.  This awareness allows tc filters to be placed on
+slave devices as well as bond devices and the bonding driver will simply act as
+a pass-through for selecting output queues on the slave device rather than
+output port selection.
+
+This feature first appeared in bonding driver version 3.7.0 and support for
+output slave selection was limited to round-robin and active-backup modes.
+
+3.7 Configuring LACP for 802.3ad mode in a more secure way
+----------------------------------------------------------
+
+When using 802.3ad bonding mode, the Actor (host) and Partner (switch)
+exchange LACPDUs.  These LACPDUs cannot be sniffed, because they are
+destined to link local mac addresses (which switches/bridges are not
+supposed to forward).  However, most of the values are easily predictable
+or are simply the machine's MAC address (which is trivially known to all
+other hosts in the same L2).  This implies that other machines in the L2
+domain can spoof LACPDU packets from other hosts to the switch and potentially
+cause mayhem by joining (from the point of view of the switch) another
+machine's aggregate, thus receiving a portion of that hosts incoming
+traffic and / or spoofing traffic from that machine themselves (potentially
+even successfully terminating some portion of flows). Though this is not
+a likely scenario, one could avoid this possibility by simply configuring
+few bonding parameters:
+
+   (a) ad_actor_system : You can set a random mac-address that can be used for
+       these LACPDU exchanges. The value can not be either NULL or Multicast.
+       Also it's preferable to set the local-admin bit. Following shell code
+       generates a random mac-address as described above::
+
+	      # sys_mac_addr=$(printf '%02x:%02x:%02x:%02x:%02x:%02x' \
+				       $(( (RANDOM & 0xFE) | 0x02 )) \
+				       $(( RANDOM & 0xFF )) \
+				       $(( RANDOM & 0xFF )) \
+				       $(( RANDOM & 0xFF )) \
+				       $(( RANDOM & 0xFF )) \
+				       $(( RANDOM & 0xFF )))
+	      # echo $sys_mac_addr > /sys/class/net/bond0/bonding/ad_actor_system
+
+   (b) ad_actor_sys_prio : Randomize the system priority. The default value
+       is 65535, but system can take the value from 1 - 65535. Following shell
+       code generates random priority and sets it::
+
+	    # sys_prio=$(( 1 + RANDOM + RANDOM ))
+	    # echo $sys_prio > /sys/class/net/bond0/bonding/ad_actor_sys_prio
+
+   (c) ad_user_port_key : Use the user portion of the port-key. The default
+       keeps this empty. These are the upper 10 bits of the port-key and value
+       ranges from 0 - 1023. Following shell code generates these 10 bits and
+       sets it::
+
+	    # usr_port_key=$(( RANDOM & 0x3FF ))
+	    # echo $usr_port_key > /sys/class/net/bond0/bonding/ad_user_port_key
+
+
+4 Querying Bonding Configuration
+=================================
+
+4.1 Bonding Configuration
+-------------------------
+
+Each bonding device has a read-only file residing in the
+/proc/net/bonding directory.  The file contents include information
+about the bonding configuration, options and state of each slave.
+
+For example, the contents of /proc/net/bonding/bond0 after the
+driver is loaded with parameters of mode=0 and miimon=1000 is
+generally as follows::
+
+	Ethernet Channel Bonding Driver: 2.6.1 (October 29, 2004)
+	Bonding Mode: load balancing (round-robin)
+	Currently Active Slave: eth0
+	MII Status: up
+	MII Polling Interval (ms): 1000
+	Up Delay (ms): 0
+	Down Delay (ms): 0
+
+	Slave Interface: eth1
+	MII Status: up
+	Link Failure Count: 1
+
+	Slave Interface: eth0
+	MII Status: up
+	Link Failure Count: 1
+
+The precise format and contents will change depending upon the
+bonding configuration, state, and version of the bonding driver.
+
+4.2 Network configuration
+-------------------------
+
+The network configuration can be inspected using the ifconfig
+command.  Bonding devices will have the MASTER flag set; Bonding slave
+devices will have the SLAVE flag set.  The ifconfig output does not
+contain information on which slaves are associated with which masters.
+
+In the example below, the bond0 interface is the master
+(MASTER) while eth0 and eth1 are slaves (SLAVE). Notice all slaves of
+bond0 have the same MAC address (HWaddr) as bond0 for all modes except
+TLB and ALB that require a unique MAC address for each slave::
+
+  # /sbin/ifconfig
+  bond0     Link encap:Ethernet  HWaddr 00:C0:F0:1F:37:B4
+	    inet addr:XXX.XXX.XXX.YYY  Bcast:XXX.XXX.XXX.255  Mask:255.255.252.0
+	    UP BROADCAST RUNNING MASTER MULTICAST  MTU:1500  Metric:1
+	    RX packets:7224794 errors:0 dropped:0 overruns:0 frame:0
+	    TX packets:3286647 errors:1 dropped:0 overruns:1 carrier:0
+	    collisions:0 txqueuelen:0
+
+  eth0      Link encap:Ethernet  HWaddr 00:C0:F0:1F:37:B4
+	    UP BROADCAST RUNNING SLAVE MULTICAST  MTU:1500  Metric:1
+	    RX packets:3573025 errors:0 dropped:0 overruns:0 frame:0
+	    TX packets:1643167 errors:1 dropped:0 overruns:1 carrier:0
+	    collisions:0 txqueuelen:100
+	    Interrupt:10 Base address:0x1080
+
+  eth1      Link encap:Ethernet  HWaddr 00:C0:F0:1F:37:B4
+	    UP BROADCAST RUNNING SLAVE MULTICAST  MTU:1500  Metric:1
+	    RX packets:3651769 errors:0 dropped:0 overruns:0 frame:0
+	    TX packets:1643480 errors:0 dropped:0 overruns:0 carrier:0
+	    collisions:0 txqueuelen:100
+	    Interrupt:9 Base address:0x1400
+
+5. Switch Configuration
+=======================
+
+For this section, "switch" refers to whatever system the
+bonded devices are directly connected to (i.e., where the other end of
+the cable plugs into).  This may be an actual dedicated switch device,
+or it may be another regular system (e.g., another computer running
+Linux),
+
+The active-backup, balance-tlb and balance-alb modes do not
+require any specific configuration of the switch.
+
+The 802.3ad mode requires that the switch have the appropriate
+ports configured as an 802.3ad aggregation.  The precise method used
+to configure this varies from switch to switch, but, for example, a
+Cisco 3550 series switch requires that the appropriate ports first be
+grouped together in a single etherchannel instance, then that
+etherchannel is set to mode "lacp" to enable 802.3ad (instead of
+standard EtherChannel).
+
+The balance-rr, balance-xor and broadcast modes generally
+require that the switch have the appropriate ports grouped together.
+The nomenclature for such a group differs between switches, it may be
+called an "etherchannel" (as in the Cisco example, above), a "trunk
+group" or some other similar variation.  For these modes, each switch
+will also have its own configuration options for the switch's transmit
+policy to the bond.  Typical choices include XOR of either the MAC or
+IP addresses.  The transmit policy of the two peers does not need to
+match.  For these three modes, the bonding mode really selects a
+transmit policy for an EtherChannel group; all three will interoperate
+with another EtherChannel group.
+
+
+6. 802.1q VLAN Support
+======================
+
+It is possible to configure VLAN devices over a bond interface
+using the 8021q driver.  However, only packets coming from the 8021q
+driver and passing through bonding will be tagged by default.  Self
+generated packets, for example, bonding's learning packets or ARP
+packets generated by either ALB mode or the ARP monitor mechanism, are
+tagged internally by bonding itself.  As a result, bonding must
+"learn" the VLAN IDs configured above it, and use those IDs to tag
+self generated packets.
+
+For reasons of simplicity, and to support the use of adapters
+that can do VLAN hardware acceleration offloading, the bonding
+interface declares itself as fully hardware offloading capable, it gets
+the add_vid/kill_vid notifications to gather the necessary
+information, and it propagates those actions to the slaves.  In case
+of mixed adapter types, hardware accelerated tagged packets that
+should go through an adapter that is not offloading capable are
+"un-accelerated" by the bonding driver so the VLAN tag sits in the
+regular location.
+
+VLAN interfaces *must* be added on top of a bonding interface
+only after enslaving at least one slave.  The bonding interface has a
+hardware address of 00:00:00:00:00:00 until the first slave is added.
+If the VLAN interface is created prior to the first enslavement, it
+would pick up the all-zeroes hardware address.  Once the first slave
+is attached to the bond, the bond device itself will pick up the
+slave's hardware address, which is then available for the VLAN device.
+
+Also, be aware that a similar problem can occur if all slaves
+are released from a bond that still has one or more VLAN interfaces on
+top of it.  When a new slave is added, the bonding interface will
+obtain its hardware address from the first slave, which might not
+match the hardware address of the VLAN interfaces (which was
+ultimately copied from an earlier slave).
+
+There are two methods to insure that the VLAN device operates
+with the correct hardware address if all slaves are removed from a
+bond interface:
+
+1. Remove all VLAN interfaces then recreate them
+
+2. Set the bonding interface's hardware address so that it
+matches the hardware address of the VLAN interfaces.
+
+Note that changing a VLAN interface's HW address would set the
+underlying device -- i.e. the bonding interface -- to promiscuous
+mode, which might not be what you want.
+
+
+7. Link Monitoring
+==================
+
+The bonding driver at present supports two schemes for
+monitoring a slave device's link state: the ARP monitor and the MII
+monitor.
+
+At the present time, due to implementation restrictions in the
+bonding driver itself, it is not possible to enable both ARP and MII
+monitoring simultaneously.
+
+7.1 ARP Monitor Operation
+-------------------------
+
+The ARP monitor operates as its name suggests: it sends ARP
+queries to one or more designated peer systems on the network, and
+uses the response as an indication that the link is operating.  This
+gives some assurance that traffic is actually flowing to and from one
+or more peers on the local network.
+
+The ARP monitor relies on the device driver itself to verify
+that traffic is flowing.  In particular, the driver must keep up to
+date the last receive time, dev->last_rx.  Drivers that use NETIF_F_LLTX
+flag must also update netdev_queue->trans_start.  If they do not, then the
+ARP monitor will immediately fail any slaves using that driver, and
+those slaves will stay down.  If networking monitoring (tcpdump, etc)
+shows the ARP requests and replies on the network, then it may be that
+your device driver is not updating last_rx and trans_start.
+
+7.2 Configuring Multiple ARP Targets
+------------------------------------
+
+While ARP monitoring can be done with just one target, it can
+be useful in a High Availability setup to have several targets to
+monitor.  In the case of just one target, the target itself may go
+down or have a problem making it unresponsive to ARP requests.  Having
+an additional target (or several) increases the reliability of the ARP
+monitoring.
+
+Multiple ARP targets must be separated by commas as follows::
+
+ # example options for ARP monitoring with three targets
+ alias bond0 bonding
+ options bond0 arp_interval=60 arp_ip_target=192.168.0.1,192.168.0.3,192.168.0.9
+
+For just a single target the options would resemble::
+
+    # example options for ARP monitoring with one target
+    alias bond0 bonding
+    options bond0 arp_interval=60 arp_ip_target=192.168.0.100
+
+
+7.3 MII Monitor Operation
+-------------------------
+
+The MII monitor monitors only the carrier state of the local
+network interface.  It accomplishes this in one of three ways: by
+depending upon the device driver to maintain its carrier state, by
+querying the device's MII registers, or by making an ethtool query to
+the device.
+
+If the use_carrier module parameter is 1 (the default value),
+then the MII monitor will rely on the driver for carrier state
+information (via the netif_carrier subsystem).  As explained in the
+use_carrier parameter information, above, if the MII monitor fails to
+detect carrier loss on the device (e.g., when the cable is physically
+disconnected), it may be that the driver does not support
+netif_carrier.
+
+If use_carrier is 0, then the MII monitor will first query the
+device's (via ioctl) MII registers and check the link state.  If that
+request fails (not just that it returns carrier down), then the MII
+monitor will make an ethtool ETHOOL_GLINK request to attempt to obtain
+the same information.  If both methods fail (i.e., the driver either
+does not support or had some error in processing both the MII register
+and ethtool requests), then the MII monitor will assume the link is
+up.
+
+8. Potential Sources of Trouble
+===============================
+
+8.1 Adventures in Routing
+-------------------------
+
+When bonding is configured, it is important that the slave
+devices not have routes that supersede routes of the master (or,
+generally, not have routes at all).  For example, suppose the bonding
+device bond0 has two slaves, eth0 and eth1, and the routing table is
+as follows::
+
+  Kernel IP routing table
+  Destination     Gateway         Genmask         Flags   MSS Window  irtt Iface
+  10.0.0.0        0.0.0.0         255.255.0.0     U        40 0          0 eth0
+  10.0.0.0        0.0.0.0         255.255.0.0     U        40 0          0 eth1
+  10.0.0.0        0.0.0.0         255.255.0.0     U        40 0          0 bond0
+  127.0.0.0       0.0.0.0         255.0.0.0       U        40 0          0 lo
+
+This routing configuration will likely still update the
+receive/transmit times in the driver (needed by the ARP monitor), but
+may bypass the bonding driver (because outgoing traffic to, in this
+case, another host on network 10 would use eth0 or eth1 before bond0).
+
+The ARP monitor (and ARP itself) may become confused by this
+configuration, because ARP requests (generated by the ARP monitor)
+will be sent on one interface (bond0), but the corresponding reply
+will arrive on a different interface (eth0).  This reply looks to ARP
+as an unsolicited ARP reply (because ARP matches replies on an
+interface basis), and is discarded.  The MII monitor is not affected
+by the state of the routing table.
+
+The solution here is simply to insure that slaves do not have
+routes of their own, and if for some reason they must, those routes do
+not supersede routes of their master.  This should generally be the
+case, but unusual configurations or errant manual or automatic static
+route additions may cause trouble.
+
+8.2 Ethernet Device Renaming
+----------------------------
+
+On systems with network configuration scripts that do not
+associate physical devices directly with network interface names (so
+that the same physical device always has the same "ethX" name), it may
+be necessary to add some special logic to config files in
+/etc/modprobe.d/.
+
+For example, given a modules.conf containing the following::
+
+	alias bond0 bonding
+	options bond0 mode=some-mode miimon=50
+	alias eth0 tg3
+	alias eth1 tg3
+	alias eth2 e1000
+	alias eth3 e1000
+
+If neither eth0 and eth1 are slaves to bond0, then when the
+bond0 interface comes up, the devices may end up reordered.  This
+happens because bonding is loaded first, then its slave device's
+drivers are loaded next.  Since no other drivers have been loaded,
+when the e1000 driver loads, it will receive eth0 and eth1 for its
+devices, but the bonding configuration tries to enslave eth2 and eth3
+(which may later be assigned to the tg3 devices).
+
+Adding the following::
+
+	add above bonding e1000 tg3
+
+causes modprobe to load e1000 then tg3, in that order, when
+bonding is loaded.  This command is fully documented in the
+modules.conf manual page.
+
+On systems utilizing modprobe an equivalent problem can occur.
+In this case, the following can be added to config files in
+/etc/modprobe.d/ as::
+
+	softdep bonding pre: tg3 e1000
+
+This will load tg3 and e1000 modules before loading the bonding one.
+Full documentation on this can be found in the modprobe.d and modprobe
+manual pages.
+
+8.3. Painfully Slow Or No Failed Link Detection By Miimon
+---------------------------------------------------------
+
+By default, bonding enables the use_carrier option, which
+instructs bonding to trust the driver to maintain carrier state.
+
+As discussed in the options section, above, some drivers do
+not support the netif_carrier_on/_off link state tracking system.
+With use_carrier enabled, bonding will always see these links as up,
+regardless of their actual state.
+
+Additionally, other drivers do support netif_carrier, but do
+not maintain it in real time, e.g., only polling the link state at
+some fixed interval.  In this case, miimon will detect failures, but
+only after some long period of time has expired.  If it appears that
+miimon is very slow in detecting link failures, try specifying
+use_carrier=0 to see if that improves the failure detection time.  If
+it does, then it may be that the driver checks the carrier state at a
+fixed interval, but does not cache the MII register values (so the
+use_carrier=0 method of querying the registers directly works).  If
+use_carrier=0 does not improve the failover, then the driver may cache
+the registers, or the problem may be elsewhere.
+
+Also, remember that miimon only checks for the device's
+carrier state.  It has no way to determine the state of devices on or
+beyond other ports of a switch, or if a switch is refusing to pass
+traffic while still maintaining carrier on.
+
+9. SNMP agents
+===============
+
+If running SNMP agents, the bonding driver should be loaded
+before any network drivers participating in a bond.  This requirement
+is due to the interface index (ipAdEntIfIndex) being associated to
+the first interface found with a given IP address.  That is, there is
+only one ipAdEntIfIndex for each IP address.  For example, if eth0 and
+eth1 are slaves of bond0 and the driver for eth0 is loaded before the
+bonding driver, the interface for the IP address will be associated
+with the eth0 interface.  This configuration is shown below, the IP
+address 192.168.1.1 has an interface index of 2 which indexes to eth0
+in the ifDescr table (ifDescr.2).
+
+::
+
+     interfaces.ifTable.ifEntry.ifDescr.1 = lo
+     interfaces.ifTable.ifEntry.ifDescr.2 = eth0
+     interfaces.ifTable.ifEntry.ifDescr.3 = eth1
+     interfaces.ifTable.ifEntry.ifDescr.4 = eth2
+     interfaces.ifTable.ifEntry.ifDescr.5 = eth3
+     interfaces.ifTable.ifEntry.ifDescr.6 = bond0
+     ip.ipAddrTable.ipAddrEntry.ipAdEntIfIndex.10.10.10.10 = 5
+     ip.ipAddrTable.ipAddrEntry.ipAdEntIfIndex.192.168.1.1 = 2
+     ip.ipAddrTable.ipAddrEntry.ipAdEntIfIndex.10.74.20.94 = 4
+     ip.ipAddrTable.ipAddrEntry.ipAdEntIfIndex.127.0.0.1 = 1
+
+This problem is avoided by loading the bonding driver before
+any network drivers participating in a bond.  Below is an example of
+loading the bonding driver first, the IP address 192.168.1.1 is
+correctly associated with ifDescr.2.
+
+     interfaces.ifTable.ifEntry.ifDescr.1 = lo
+     interfaces.ifTable.ifEntry.ifDescr.2 = bond0
+     interfaces.ifTable.ifEntry.ifDescr.3 = eth0
+     interfaces.ifTable.ifEntry.ifDescr.4 = eth1
+     interfaces.ifTable.ifEntry.ifDescr.5 = eth2
+     interfaces.ifTable.ifEntry.ifDescr.6 = eth3
+     ip.ipAddrTable.ipAddrEntry.ipAdEntIfIndex.10.10.10.10 = 6
+     ip.ipAddrTable.ipAddrEntry.ipAdEntIfIndex.192.168.1.1 = 2
+     ip.ipAddrTable.ipAddrEntry.ipAdEntIfIndex.10.74.20.94 = 5
+     ip.ipAddrTable.ipAddrEntry.ipAdEntIfIndex.127.0.0.1 = 1
+
+While some distributions may not report the interface name in
+ifDescr, the association between the IP address and IfIndex remains
+and SNMP functions such as Interface_Scan_Next will report that
+association.
+
+10. Promiscuous mode
+====================
+
+When running network monitoring tools, e.g., tcpdump, it is
+common to enable promiscuous mode on the device, so that all traffic
+is seen (instead of seeing only traffic destined for the local host).
+The bonding driver handles promiscuous mode changes to the bonding
+master device (e.g., bond0), and propagates the setting to the slave
+devices.
+
+For the balance-rr, balance-xor, broadcast, and 802.3ad modes,
+the promiscuous mode setting is propagated to all slaves.
+
+For the active-backup, balance-tlb and balance-alb modes, the
+promiscuous mode setting is propagated only to the active slave.
+
+For balance-tlb mode, the active slave is the slave currently
+receiving inbound traffic.
+
+For balance-alb mode, the active slave is the slave used as a
+"primary."  This slave is used for mode-specific control traffic, for
+sending to peers that are unassigned or if the load is unbalanced.
+
+For the active-backup, balance-tlb and balance-alb modes, when
+the active slave changes (e.g., due to a link failure), the
+promiscuous setting will be propagated to the new active slave.
+
+11. Configuring Bonding for High Availability
+=============================================
+
+High Availability refers to configurations that provide
+maximum network availability by having redundant or backup devices,
+links or switches between the host and the rest of the world.  The
+goal is to provide the maximum availability of network connectivity
+(i.e., the network always works), even though other configurations
+could provide higher throughput.
+
+11.1 High Availability in a Single Switch Topology
+--------------------------------------------------
+
+If two hosts (or a host and a single switch) are directly
+connected via multiple physical links, then there is no availability
+penalty to optimizing for maximum bandwidth.  In this case, there is
+only one switch (or peer), so if it fails, there is no alternative
+access to fail over to.  Additionally, the bonding load balance modes
+support link monitoring of their members, so if individual links fail,
+the load will be rebalanced across the remaining devices.
+
+See Section 12, "Configuring Bonding for Maximum Throughput"
+for information on configuring bonding with one peer device.
+
+11.2 High Availability in a Multiple Switch Topology
+----------------------------------------------------
+
+With multiple switches, the configuration of bonding and the
+network changes dramatically.  In multiple switch topologies, there is
+a trade off between network availability and usable bandwidth.
+
+Below is a sample network, configured to maximize the
+availability of the network::
+
+		|                                     |
+		|port3                           port3|
+	  +-----+----+                          +-----+----+
+	  |          |port2       ISL      port2|          |
+	  | switch A +--------------------------+ switch B |
+	  |          |                          |          |
+	  +-----+----+                          +-----++---+
+		|port1                           port1|
+		|             +-------+               |
+		+-------------+ host1 +---------------+
+			 eth0 +-------+ eth1
+
+In this configuration, there is a link between the two
+switches (ISL, or inter switch link), and multiple ports connecting to
+the outside world ("port3" on each switch).  There is no technical
+reason that this could not be extended to a third switch.
+
+11.2.1 HA Bonding Mode Selection for Multiple Switch Topology
+-------------------------------------------------------------
+
+In a topology such as the example above, the active-backup and
+broadcast modes are the only useful bonding modes when optimizing for
+availability; the other modes require all links to terminate on the
+same peer for them to behave rationally.
+
+active-backup:
+	This is generally the preferred mode, particularly if
+	the switches have an ISL and play together well.  If the
+	network configuration is such that one switch is specifically
+	a backup switch (e.g., has lower capacity, higher cost, etc),
+	then the primary option can be used to insure that the
+	preferred link is always used when it is available.
+
+broadcast:
+	This mode is really a special purpose mode, and is suitable
+	only for very specific needs.  For example, if the two
+	switches are not connected (no ISL), and the networks beyond
+	them are totally independent.  In this case, if it is
+	necessary for some specific one-way traffic to reach both
+	independent networks, then the broadcast mode may be suitable.
+
+11.2.2 HA Link Monitoring Selection for Multiple Switch Topology
+----------------------------------------------------------------
+
+The choice of link monitoring ultimately depends upon your
+switch.  If the switch can reliably fail ports in response to other
+failures, then either the MII or ARP monitors should work.  For
+example, in the above example, if the "port3" link fails at the remote
+end, the MII monitor has no direct means to detect this.  The ARP
+monitor could be configured with a target at the remote end of port3,
+thus detecting that failure without switch support.
+
+In general, however, in a multiple switch topology, the ARP
+monitor can provide a higher level of reliability in detecting end to
+end connectivity failures (which may be caused by the failure of any
+individual component to pass traffic for any reason).  Additionally,
+the ARP monitor should be configured with multiple targets (at least
+one for each switch in the network).  This will insure that,
+regardless of which switch is active, the ARP monitor has a suitable
+target to query.
+
+Note, also, that of late many switches now support a functionality
+generally referred to as "trunk failover."  This is a feature of the
+switch that causes the link state of a particular switch port to be set
+down (or up) when the state of another switch port goes down (or up).
+Its purpose is to propagate link failures from logically "exterior" ports
+to the logically "interior" ports that bonding is able to monitor via
+miimon.  Availability and configuration for trunk failover varies by
+switch, but this can be a viable alternative to the ARP monitor when using
+suitable switches.
+
+12. Configuring Bonding for Maximum Throughput
+==============================================
+
+12.1 Maximizing Throughput in a Single Switch Topology
+------------------------------------------------------
+
+In a single switch configuration, the best method to maximize
+throughput depends upon the application and network environment.  The
+various load balancing modes each have strengths and weaknesses in
+different environments, as detailed below.
+
+For this discussion, we will break down the topologies into
+two categories.  Depending upon the destination of most traffic, we
+categorize them into either "gatewayed" or "local" configurations.
+
+In a gatewayed configuration, the "switch" is acting primarily
+as a router, and the majority of traffic passes through this router to
+other networks.  An example would be the following::
+
+
+     +----------+                     +----------+
+     |          |eth0            port1|          | to other networks
+     | Host A   +---------------------+ router   +------------------->
+     |          +---------------------+          | Hosts B and C are out
+     |          |eth1            port2|          | here somewhere
+     +----------+                     +----------+
+
+The router may be a dedicated router device, or another host
+acting as a gateway.  For our discussion, the important point is that
+the majority of traffic from Host A will pass through the router to
+some other network before reaching its final destination.
+
+In a gatewayed network configuration, although Host A may
+communicate with many other systems, all of its traffic will be sent
+and received via one other peer on the local network, the router.
+
+Note that the case of two systems connected directly via
+multiple physical links is, for purposes of configuring bonding, the
+same as a gatewayed configuration.  In that case, it happens that all
+traffic is destined for the "gateway" itself, not some other network
+beyond the gateway.
+
+In a local configuration, the "switch" is acting primarily as
+a switch, and the majority of traffic passes through this switch to
+reach other stations on the same network.  An example would be the
+following::
+
+    +----------+            +----------+       +--------+
+    |          |eth0   port1|          +-------+ Host B |
+    |  Host A  +------------+  switch  |port3  +--------+
+    |          +------------+          |                  +--------+
+    |          |eth1   port2|          +------------------+ Host C |
+    +----------+            +----------+port4             +--------+
+
+
+Again, the switch may be a dedicated switch device, or another
+host acting as a gateway.  For our discussion, the important point is
+that the majority of traffic from Host A is destined for other hosts
+on the same local network (Hosts B and C in the above example).
+
+In summary, in a gatewayed configuration, traffic to and from
+the bonded device will be to the same MAC level peer on the network
+(the gateway itself, i.e., the router), regardless of its final
+destination.  In a local configuration, traffic flows directly to and
+from the final destinations, thus, each destination (Host B, Host C)
+will be addressed directly by their individual MAC addresses.
+
+This distinction between a gatewayed and a local network
+configuration is important because many of the load balancing modes
+available use the MAC addresses of the local network source and
+destination to make load balancing decisions.  The behavior of each
+mode is described below.
+
+
+12.1.1 MT Bonding Mode Selection for Single Switch Topology
+-----------------------------------------------------------
+
+This configuration is the easiest to set up and to understand,
+although you will have to decide which bonding mode best suits your
+needs.  The trade offs for each mode are detailed below:
+
+balance-rr:
+	This mode is the only mode that will permit a single
+	TCP/IP connection to stripe traffic across multiple
+	interfaces. It is therefore the only mode that will allow a
+	single TCP/IP stream to utilize more than one interface's
+	worth of throughput.  This comes at a cost, however: the
+	striping generally results in peer systems receiving packets out
+	of order, causing TCP/IP's congestion control system to kick
+	in, often by retransmitting segments.
+
+	It is possible to adjust TCP/IP's congestion limits by
+	altering the net.ipv4.tcp_reordering sysctl parameter.  The
+	usual default value is 3. But keep in mind TCP stack is able
+	to automatically increase this when it detects reorders.
+
+	Note that the fraction of packets that will be delivered out of
+	order is highly variable, and is unlikely to be zero.  The level
+	of reordering depends upon a variety of factors, including the
+	networking interfaces, the switch, and the topology of the
+	configuration.  Speaking in general terms, higher speed network
+	cards produce more reordering (due to factors such as packet
+	coalescing), and a "many to many" topology will reorder at a
+	higher rate than a "many slow to one fast" configuration.
+
+	Many switches do not support any modes that stripe traffic
+	(instead choosing a port based upon IP or MAC level addresses);
+	for those devices, traffic for a particular connection flowing
+	through the switch to a balance-rr bond will not utilize greater
+	than one interface's worth of bandwidth.
+
+	If you are utilizing protocols other than TCP/IP, UDP for
+	example, and your application can tolerate out of order
+	delivery, then this mode can allow for single stream datagram
+	performance that scales near linearly as interfaces are added
+	to the bond.
+
+	This mode requires the switch to have the appropriate ports
+	configured for "etherchannel" or "trunking."
+
+active-backup:
+	There is not much advantage in this network topology to
+	the active-backup mode, as the inactive backup devices are all
+	connected to the same peer as the primary.  In this case, a
+	load balancing mode (with link monitoring) will provide the
+	same level of network availability, but with increased
+	available bandwidth.  On the plus side, active-backup mode
+	does not require any configuration of the switch, so it may
+	have value if the hardware available does not support any of
+	the load balance modes.
+
+balance-xor:
+	This mode will limit traffic such that packets destined
+	for specific peers will always be sent over the same
+	interface.  Since the destination is determined by the MAC
+	addresses involved, this mode works best in a "local" network
+	configuration (as described above), with destinations all on
+	the same local network.  This mode is likely to be suboptimal
+	if all your traffic is passed through a single router (i.e., a
+	"gatewayed" network configuration, as described above).
+
+	As with balance-rr, the switch ports need to be configured for
+	"etherchannel" or "trunking."
+
+broadcast:
+	Like active-backup, there is not much advantage to this
+	mode in this type of network topology.
+
+802.3ad:
+	This mode can be a good choice for this type of network
+	topology.  The 802.3ad mode is an IEEE standard, so all peers
+	that implement 802.3ad should interoperate well.  The 802.3ad
+	protocol includes automatic configuration of the aggregates,
+	so minimal manual configuration of the switch is needed
+	(typically only to designate that some set of devices is
+	available for 802.3ad).  The 802.3ad standard also mandates
+	that frames be delivered in order (within certain limits), so
+	in general single connections will not see misordering of
+	packets.  The 802.3ad mode does have some drawbacks: the
+	standard mandates that all devices in the aggregate operate at
+	the same speed and duplex.  Also, as with all bonding load
+	balance modes other than balance-rr, no single connection will
+	be able to utilize more than a single interface's worth of
+	bandwidth.
+
+	Additionally, the linux bonding 802.3ad implementation
+	distributes traffic by peer (using an XOR of MAC addresses
+	and packet type ID), so in a "gatewayed" configuration, all
+	outgoing traffic will generally use the same device.  Incoming
+	traffic may also end up on a single device, but that is
+	dependent upon the balancing policy of the peer's 802.3ad
+	implementation.  In a "local" configuration, traffic will be
+	distributed across the devices in the bond.
+
+	Finally, the 802.3ad mode mandates the use of the MII monitor,
+	therefore, the ARP monitor is not available in this mode.
+
+balance-tlb:
+	The balance-tlb mode balances outgoing traffic by peer.
+	Since the balancing is done according to MAC address, in a
+	"gatewayed" configuration (as described above), this mode will
+	send all traffic across a single device.  However, in a
+	"local" network configuration, this mode balances multiple
+	local network peers across devices in a vaguely intelligent
+	manner (not a simple XOR as in balance-xor or 802.3ad mode),
+	so that mathematically unlucky MAC addresses (i.e., ones that
+	XOR to the same value) will not all "bunch up" on a single
+	interface.
+
+	Unlike 802.3ad, interfaces may be of differing speeds, and no
+	special switch configuration is required.  On the down side,
+	in this mode all incoming traffic arrives over a single
+	interface, this mode requires certain ethtool support in the
+	network device driver of the slave interfaces, and the ARP
+	monitor is not available.
+
+balance-alb:
+	This mode is everything that balance-tlb is, and more.
+	It has all of the features (and restrictions) of balance-tlb,
+	and will also balance incoming traffic from local network
+	peers (as described in the Bonding Module Options section,
+	above).
+
+	The only additional down side to this mode is that the network
+	device driver must support changing the hardware address while
+	the device is open.
+
+12.1.2 MT Link Monitoring for Single Switch Topology
+----------------------------------------------------
+
+The choice of link monitoring may largely depend upon which
+mode you choose to use.  The more advanced load balancing modes do not
+support the use of the ARP monitor, and are thus restricted to using
+the MII monitor (which does not provide as high a level of end to end
+assurance as the ARP monitor).
+
+12.2 Maximum Throughput in a Multiple Switch Topology
+-----------------------------------------------------
+
+Multiple switches may be utilized to optimize for throughput
+when they are configured in parallel as part of an isolated network
+between two or more systems, for example::
+
+		       +-----------+
+		       |  Host A   |
+		       +-+---+---+-+
+			 |   |   |
+		+--------+   |   +---------+
+		|            |             |
+	 +------+---+  +-----+----+  +-----+----+
+	 | Switch A |  | Switch B |  | Switch C |
+	 +------+---+  +-----+----+  +-----+----+
+		|            |             |
+		+--------+   |   +---------+
+			 |   |   |
+		       +-+---+---+-+
+		       |  Host B   |
+		       +-----------+
+
+In this configuration, the switches are isolated from one
+another.  One reason to employ a topology such as this is for an
+isolated network with many hosts (a cluster configured for high
+performance, for example), using multiple smaller switches can be more
+cost effective than a single larger switch, e.g., on a network with 24
+hosts, three 24 port switches can be significantly less expensive than
+a single 72 port switch.
+
+If access beyond the network is required, an individual host
+can be equipped with an additional network device connected to an
+external network; this host then additionally acts as a gateway.
+
+12.2.1 MT Bonding Mode Selection for Multiple Switch Topology
+-------------------------------------------------------------
+
+In actual practice, the bonding mode typically employed in
+configurations of this type is balance-rr.  Historically, in this
+network configuration, the usual caveats about out of order packet
+delivery are mitigated by the use of network adapters that do not do
+any kind of packet coalescing (via the use of NAPI, or because the
+device itself does not generate interrupts until some number of
+packets has arrived).  When employed in this fashion, the balance-rr
+mode allows individual connections between two hosts to effectively
+utilize greater than one interface's bandwidth.
+
+12.2.2 MT Link Monitoring for Multiple Switch Topology
+------------------------------------------------------
+
+Again, in actual practice, the MII monitor is most often used
+in this configuration, as performance is given preference over
+availability.  The ARP monitor will function in this topology, but its
+advantages over the MII monitor are mitigated by the volume of probes
+needed as the number of systems involved grows (remember that each
+host in the network is configured with bonding).
+
+13. Switch Behavior Issues
+==========================
+
+13.1 Link Establishment and Failover Delays
+-------------------------------------------
+
+Some switches exhibit undesirable behavior with regard to the
+timing of link up and down reporting by the switch.
+
+First, when a link comes up, some switches may indicate that
+the link is up (carrier available), but not pass traffic over the
+interface for some period of time.  This delay is typically due to
+some type of autonegotiation or routing protocol, but may also occur
+during switch initialization (e.g., during recovery after a switch
+failure).  If you find this to be a problem, specify an appropriate
+value to the updelay bonding module option to delay the use of the
+relevant interface(s).
+
+Second, some switches may "bounce" the link state one or more
+times while a link is changing state.  This occurs most commonly while
+the switch is initializing.  Again, an appropriate updelay value may
+help.
+
+Note that when a bonding interface has no active links, the
+driver will immediately reuse the first link that goes up, even if the
+updelay parameter has been specified (the updelay is ignored in this
+case).  If there are slave interfaces waiting for the updelay timeout
+to expire, the interface that first went into that state will be
+immediately reused.  This reduces down time of the network if the
+value of updelay has been overestimated, and since this occurs only in
+cases with no connectivity, there is no additional penalty for
+ignoring the updelay.
+
+In addition to the concerns about switch timings, if your
+switches take a long time to go into backup mode, it may be desirable
+to not activate a backup interface immediately after a link goes down.
+Failover may be delayed via the downdelay bonding module option.
+
+13.2 Duplicated Incoming Packets
+--------------------------------
+
+NOTE: Starting with version 3.0.2, the bonding driver has logic to
+suppress duplicate packets, which should largely eliminate this problem.
+The following description is kept for reference.
+
+It is not uncommon to observe a short burst of duplicated
+traffic when the bonding device is first used, or after it has been
+idle for some period of time.  This is most easily observed by issuing
+a "ping" to some other host on the network, and noticing that the
+output from ping flags duplicates (typically one per slave).
+
+For example, on a bond in active-backup mode with five slaves
+all connected to one switch, the output may appear as follows::
+
+	# ping -n 10.0.4.2
+	PING 10.0.4.2 (10.0.4.2) from 10.0.3.10 : 56(84) bytes of data.
+	64 bytes from 10.0.4.2: icmp_seq=1 ttl=64 time=13.7 ms
+	64 bytes from 10.0.4.2: icmp_seq=1 ttl=64 time=13.8 ms (DUP!)
+	64 bytes from 10.0.4.2: icmp_seq=1 ttl=64 time=13.8 ms (DUP!)
+	64 bytes from 10.0.4.2: icmp_seq=1 ttl=64 time=13.8 ms (DUP!)
+	64 bytes from 10.0.4.2: icmp_seq=1 ttl=64 time=13.8 ms (DUP!)
+	64 bytes from 10.0.4.2: icmp_seq=2 ttl=64 time=0.216 ms
+	64 bytes from 10.0.4.2: icmp_seq=3 ttl=64 time=0.267 ms
+	64 bytes from 10.0.4.2: icmp_seq=4 ttl=64 time=0.222 ms
+
+This is not due to an error in the bonding driver, rather, it
+is a side effect of how many switches update their MAC forwarding
+tables.  Initially, the switch does not associate the MAC address in
+the packet with a particular switch port, and so it may send the
+traffic to all ports until its MAC forwarding table is updated.  Since
+the interfaces attached to the bond may occupy multiple ports on a
+single switch, when the switch (temporarily) floods the traffic to all
+ports, the bond device receives multiple copies of the same packet
+(one per slave device).
+
+The duplicated packet behavior is switch dependent, some
+switches exhibit this, and some do not.  On switches that display this
+behavior, it can be induced by clearing the MAC forwarding table (on
+most Cisco switches, the privileged command "clear mac address-table
+dynamic" will accomplish this).
+
+14. Hardware Specific Considerations
+====================================
+
+This section contains additional information for configuring
+bonding on specific hardware platforms, or for interfacing bonding
+with particular switches or other devices.
+
+14.1 IBM BladeCenter
+--------------------
+
+This applies to the JS20 and similar systems.
+
+On the JS20 blades, the bonding driver supports only
+balance-rr, active-backup, balance-tlb and balance-alb modes.  This is
+largely due to the network topology inside the BladeCenter, detailed
+below.
+
+JS20 network adapter information
+--------------------------------
+
+All JS20s come with two Broadcom Gigabit Ethernet ports
+integrated on the planar (that's "motherboard" in IBM-speak).  In the
+BladeCenter chassis, the eth0 port of all JS20 blades is hard wired to
+I/O Module #1; similarly, all eth1 ports are wired to I/O Module #2.
+An add-on Broadcom daughter card can be installed on a JS20 to provide
+two more Gigabit Ethernet ports.  These ports, eth2 and eth3, are
+wired to I/O Modules 3 and 4, respectively.
+
+Each I/O Module may contain either a switch or a passthrough
+module (which allows ports to be directly connected to an external
+switch).  Some bonding modes require a specific BladeCenter internal
+network topology in order to function; these are detailed below.
+
+Additional BladeCenter-specific networking information can be
+found in two IBM Redbooks (www.ibm.com/redbooks):
+
+- "IBM eServer BladeCenter Networking Options"
+- "IBM eServer BladeCenter Layer 2-7 Network Switching"
+
+BladeCenter networking configuration
+------------------------------------
+
+Because a BladeCenter can be configured in a very large number
+of ways, this discussion will be confined to describing basic
+configurations.
+
+Normally, Ethernet Switch Modules (ESMs) are used in I/O
+modules 1 and 2.  In this configuration, the eth0 and eth1 ports of a
+JS20 will be connected to different internal switches (in the
+respective I/O modules).
+
+A passthrough module (OPM or CPM, optical or copper,
+passthrough module) connects the I/O module directly to an external
+switch.  By using PMs in I/O module #1 and #2, the eth0 and eth1
+interfaces of a JS20 can be redirected to the outside world and
+connected to a common external switch.
+
+Depending upon the mix of ESMs and PMs, the network will
+appear to bonding as either a single switch topology (all PMs) or as a
+multiple switch topology (one or more ESMs, zero or more PMs).  It is
+also possible to connect ESMs together, resulting in a configuration
+much like the example in "High Availability in a Multiple Switch
+Topology," above.
+
+Requirements for specific modes
+-------------------------------
+
+The balance-rr mode requires the use of passthrough modules
+for devices in the bond, all connected to an common external switch.
+That switch must be configured for "etherchannel" or "trunking" on the
+appropriate ports, as is usual for balance-rr.
+
+The balance-alb and balance-tlb modes will function with
+either switch modules or passthrough modules (or a mix).  The only
+specific requirement for these modes is that all network interfaces
+must be able to reach all destinations for traffic sent over the
+bonding device (i.e., the network must converge at some point outside
+the BladeCenter).
+
+The active-backup mode has no additional requirements.
+
+Link monitoring issues
+----------------------
+
+When an Ethernet Switch Module is in place, only the ARP
+monitor will reliably detect link loss to an external switch.  This is
+nothing unusual, but examination of the BladeCenter cabinet would
+suggest that the "external" network ports are the ethernet ports for
+the system, when it fact there is a switch between these "external"
+ports and the devices on the JS20 system itself.  The MII monitor is
+only able to detect link failures between the ESM and the JS20 system.
+
+When a passthrough module is in place, the MII monitor does
+detect failures to the "external" port, which is then directly
+connected to the JS20 system.
+
+Other concerns
+--------------
+
+The Serial Over LAN (SoL) link is established over the primary
+ethernet (eth0) only, therefore, any loss of link to eth0 will result
+in losing your SoL connection.  It will not fail over with other
+network traffic, as the SoL system is beyond the control of the
+bonding driver.
+
+It may be desirable to disable spanning tree on the switch
+(either the internal Ethernet Switch Module, or an external switch) to
+avoid fail-over delay issues when using bonding.
+
+
+15. Frequently Asked Questions
+==============================
+
+1.  Is it SMP safe?
+-------------------
+
+Yes. The old 2.0.xx channel bonding patch was not SMP safe.
+The new driver was designed to be SMP safe from the start.
+
+2.  What type of cards will work with it?
+-----------------------------------------
+
+Any Ethernet type cards (you can even mix cards - a Intel
+EtherExpress PRO/100 and a 3com 3c905b, for example).  For most modes,
+devices need not be of the same speed.
+
+Starting with version 3.2.1, bonding also supports Infiniband
+slaves in active-backup mode.
+
+3.  How many bonding devices can I have?
+----------------------------------------
+
+There is no limit.
+
+4.  How many slaves can a bonding device have?
+----------------------------------------------
+
+This is limited only by the number of network interfaces Linux
+supports and/or the number of network cards you can place in your
+system.
+
+5.  What happens when a slave link dies?
+----------------------------------------
+
+If link monitoring is enabled, then the failing device will be
+disabled.  The active-backup mode will fail over to a backup link, and
+other modes will ignore the failed link.  The link will continue to be
+monitored, and should it recover, it will rejoin the bond (in whatever
+manner is appropriate for the mode). See the sections on High
+Availability and the documentation for each mode for additional
+information.
+
+Link monitoring can be enabled via either the miimon or
+arp_interval parameters (described in the module parameters section,
+above).  In general, miimon monitors the carrier state as sensed by
+the underlying network device, and the arp monitor (arp_interval)
+monitors connectivity to another host on the local network.
+
+If no link monitoring is configured, the bonding driver will
+be unable to detect link failures, and will assume that all links are
+always available.  This will likely result in lost packets, and a
+resulting degradation of performance.  The precise performance loss
+depends upon the bonding mode and network configuration.
+
+6.  Can bonding be used for High Availability?
+----------------------------------------------
+
+Yes.  See the section on High Availability for details.
+
+7.  Which switches/systems does it work with?
+---------------------------------------------
+
+The full answer to this depends upon the desired mode.
+
+In the basic balance modes (balance-rr and balance-xor), it
+works with any system that supports etherchannel (also called
+trunking).  Most managed switches currently available have such
+support, and many unmanaged switches as well.
+
+The advanced balance modes (balance-tlb and balance-alb) do
+not have special switch requirements, but do need device drivers that
+support specific features (described in the appropriate section under
+module parameters, above).
+
+In 802.3ad mode, it works with systems that support IEEE
+802.3ad Dynamic Link Aggregation.  Most managed and many unmanaged
+switches currently available support 802.3ad.
+
+The active-backup mode should work with any Layer-II switch.
+
+8.  Where does a bonding device get its MAC address from?
+---------------------------------------------------------
+
+When using slave devices that have fixed MAC addresses, or when
+the fail_over_mac option is enabled, the bonding device's MAC address is
+the MAC address of the active slave.
+
+For other configurations, if not explicitly configured (with
+ifconfig or ip link), the MAC address of the bonding device is taken from
+its first slave device.  This MAC address is then passed to all following
+slaves and remains persistent (even if the first slave is removed) until
+the bonding device is brought down or reconfigured.
+
+If you wish to change the MAC address, you can set it with
+ifconfig or ip link::
+
+	# ifconfig bond0 hw ether 00:11:22:33:44:55
+
+	# ip link set bond0 address 66:77:88:99:aa:bb
+
+The MAC address can be also changed by bringing down/up the
+device and then changing its slaves (or their order)::
+
+	# ifconfig bond0 down ; modprobe -r bonding
+	# ifconfig bond0 .... up
+	# ifenslave bond0 eth...
+
+This method will automatically take the address from the next
+slave that is added.
+
+To restore your slaves' MAC addresses, you need to detach them
+from the bond (``ifenslave -d bond0 eth0``). The bonding driver will
+then restore the MAC addresses that the slaves had before they were
+enslaved.
+
+16. Resources and Links
+=======================
+
+The latest version of the bonding driver can be found in the latest
+version of the linux kernel, found on http://kernel.org
+
+The latest version of this document can be found in the latest kernel
+source (named Documentation/networking/bonding.rst).
+
+Discussions regarding the usage of the bonding driver take place on the
+bonding-devel mailing list, hosted at sourceforge.net. If you have questions or
+problems, post them to the list.  The list address is:
+
+bonding-devel@lists.sourceforge.net
+
+The administrative interface (to subscribe or unsubscribe) can
+be found at:
+
+https://lists.sourceforge.net/lists/listinfo/bonding-devel
+
+Discussions regarding the development of the bonding driver take place
+on the main Linux network mailing list, hosted at vger.kernel.org. The list
+address is:
+
+netdev@vger.kernel.org
+
+The administrative interface (to subscribe or unsubscribe) can
+be found at:
+
+http://vger.kernel.org/vger-lists.html#netdev
+
+Donald Becker's Ethernet Drivers and diag programs may be found at :
+
+ - http://web.archive.org/web/%2E/http://www.scyld.com/network/
+
+You will also find a lot of information regarding Ethernet, NWay, MII,
+etc. at www.scyld.com.
diff --git a/Documentation/networking/bonding.txt b/Documentation/networking/bonding.txt
deleted file mode 100644
index e3abfbd32f71..000000000000
--- a/Documentation/networking/bonding.txt
+++ /dev/null
@@ -1,2837 +0,0 @@
-
-		Linux Ethernet Bonding Driver HOWTO
-
-		Latest update: 27 April 2011
-
-Initial release : Thomas Davis <tadavis at lbl.gov>
-Corrections, HA extensions : 2000/10/03-15 :
-  - Willy Tarreau <willy at meta-x.org>
-  - Constantine Gavrilov <const-g at xpert.com>
-  - Chad N. Tindel <ctindel at ieee dot org>
-  - Janice Girouard <girouard at us dot ibm dot com>
-  - Jay Vosburgh <fubar at us dot ibm dot com>
-
-Reorganized and updated Feb 2005 by Jay Vosburgh
-Added Sysfs information: 2006/04/24
-  - Mitch Williams <mitch.a.williams at intel.com>
-
-Introduction
-============
-
-	The Linux bonding driver provides a method for aggregating
-multiple network interfaces into a single logical "bonded" interface.
-The behavior of the bonded interfaces depends upon the mode; generally
-speaking, modes provide either hot standby or load balancing services.
-Additionally, link integrity monitoring may be performed.
-	
-	The bonding driver originally came from Donald Becker's
-beowulf patches for kernel 2.0. It has changed quite a bit since, and
-the original tools from extreme-linux and beowulf sites will not work
-with this version of the driver.
-
-	For new versions of the driver, updated userspace tools, and
-who to ask for help, please follow the links at the end of this file.
-
-Table of Contents
-=================
-
-1. Bonding Driver Installation
-
-2. Bonding Driver Options
-
-3. Configuring Bonding Devices
-3.1	Configuration with Sysconfig Support
-3.1.1		Using DHCP with Sysconfig
-3.1.2		Configuring Multiple Bonds with Sysconfig
-3.2	Configuration with Initscripts Support
-3.2.1		Using DHCP with Initscripts
-3.2.2		Configuring Multiple Bonds with Initscripts
-3.3	Configuring Bonding Manually with Ifenslave
-3.3.1		Configuring Multiple Bonds Manually
-3.4	Configuring Bonding Manually via Sysfs
-3.5	Configuration with Interfaces Support
-3.6	Overriding Configuration for Special Cases
-3.7 Configuring LACP for 802.3ad mode in a more secure way
-
-4. Querying Bonding Configuration
-4.1	Bonding Configuration
-4.2	Network Configuration
-
-5. Switch Configuration
-
-6. 802.1q VLAN Support
-
-7. Link Monitoring
-7.1	ARP Monitor Operation
-7.2	Configuring Multiple ARP Targets
-7.3	MII Monitor Operation
-
-8. Potential Trouble Sources
-8.1	Adventures in Routing
-8.2	Ethernet Device Renaming
-8.3	Painfully Slow Or No Failed Link Detection By Miimon
-
-9. SNMP agents
-
-10. Promiscuous mode
-
-11. Configuring Bonding for High Availability
-11.1	High Availability in a Single Switch Topology
-11.2	High Availability in a Multiple Switch Topology
-11.2.1		HA Bonding Mode Selection for Multiple Switch Topology
-11.2.2		HA Link Monitoring for Multiple Switch Topology
-
-12. Configuring Bonding for Maximum Throughput
-12.1	Maximum Throughput in a Single Switch Topology
-12.1.1		MT Bonding Mode Selection for Single Switch Topology
-12.1.2		MT Link Monitoring for Single Switch Topology
-12.2	Maximum Throughput in a Multiple Switch Topology
-12.2.1		MT Bonding Mode Selection for Multiple Switch Topology
-12.2.2		MT Link Monitoring for Multiple Switch Topology
-
-13. Switch Behavior Issues
-13.1	Link Establishment and Failover Delays
-13.2	Duplicated Incoming Packets
-
-14. Hardware Specific Considerations
-14.1	IBM BladeCenter
-
-15. Frequently Asked Questions
-
-16. Resources and Links
-
-
-1. Bonding Driver Installation
-==============================
-
-	Most popular distro kernels ship with the bonding driver
-already available as a module. If your distro does not, or you
-have need to compile bonding from source (e.g., configuring and
-installing a mainline kernel from kernel.org), you'll need to perform
-the following steps:
-
-1.1 Configure and build the kernel with bonding
------------------------------------------------
-
-	The current version of the bonding driver is available in the
-drivers/net/bonding subdirectory of the most recent kernel source
-(which is available on http://kernel.org).  Most users "rolling their
-own" will want to use the most recent kernel from kernel.org.
-
-	Configure kernel with "make menuconfig" (or "make xconfig" or
-"make config"), then select "Bonding driver support" in the "Network
-device support" section.  It is recommended that you configure the
-driver as module since it is currently the only way to pass parameters
-to the driver or configure more than one bonding device.
-
-	Build and install the new kernel and modules.
-
-1.2 Bonding Control Utility
--------------------------------------
-
-	 It is recommended to configure bonding via iproute2 (netlink)
-or sysfs, the old ifenslave control utility is obsolete.
-
-2. Bonding Driver Options
-=========================
-
-	Options for the bonding driver are supplied as parameters to the
-bonding module at load time, or are specified via sysfs.
-
-	Module options may be given as command line arguments to the
-insmod or modprobe command, but are usually specified in either the
-/etc/modprobe.d/*.conf configuration files, or in a distro-specific
-configuration file (some of which are detailed in the next section).
-
-	Details on bonding support for sysfs is provided in the
-"Configuring Bonding Manually via Sysfs" section, below.
-
-	The available bonding driver parameters are listed below. If a
-parameter is not specified the default value is used.  When initially
-configuring a bond, it is recommended "tail -f /var/log/messages" be
-run in a separate window to watch for bonding driver error messages.
-
-	It is critical that either the miimon or arp_interval and
-arp_ip_target parameters be specified, otherwise serious network
-degradation will occur during link failures.  Very few devices do not
-support at least miimon, so there is really no reason not to use it.
-
-	Options with textual values will accept either the text name
-or, for backwards compatibility, the option value.  E.g.,
-"mode=802.3ad" and "mode=4" set the same mode.
-
-	The parameters are as follows:
-
-active_slave
-
-	Specifies the new active slave for modes that support it
-	(active-backup, balance-alb and balance-tlb).  Possible values
-	are the name of any currently enslaved interface, or an empty
-	string.  If a name is given, the slave and its link must be up in order
-	to be selected as the new active slave.  If an empty string is
-	specified, the current active slave is cleared, and a new active
-	slave is selected automatically.
-
-	Note that this is only available through the sysfs interface. No module
-	parameter by this name exists.
-
-	The normal value of this option is the name of the currently
-	active slave, or the empty string if there is no active slave or
-	the current mode does not use an active slave.
-
-ad_actor_sys_prio
-
-	In an AD system, this specifies the system priority. The allowed range
-	is 1 - 65535. If the value is not specified, it takes 65535 as the
-	default value.
-
-	This parameter has effect only in 802.3ad mode and is available through
-	SysFs interface.
-
-ad_actor_system
-
-	In an AD system, this specifies the mac-address for the actor in
-	protocol packet exchanges (LACPDUs). The value cannot be NULL or
-	multicast. It is preferred to have the local-admin bit set for this
-	mac but driver does not enforce it. If the value is not given then
-	system defaults to using the masters' mac address as actors' system
-	address.
-
-	This parameter has effect only in 802.3ad mode and is available through
-	SysFs interface.
-
-ad_select
-
-	Specifies the 802.3ad aggregation selection logic to use.  The
-	possible values and their effects are:
-
-	stable or 0
-
-		The active aggregator is chosen by largest aggregate
-		bandwidth.
-
-		Reselection of the active aggregator occurs only when all
-		slaves of the active aggregator are down or the active
-		aggregator has no slaves.
-
-		This is the default value.
-
-	bandwidth or 1
-
-		The active aggregator is chosen by largest aggregate
-		bandwidth.  Reselection occurs if:
-
-		- A slave is added to or removed from the bond
-
-		- Any slave's link state changes
-
-		- Any slave's 802.3ad association state changes
-
-		- The bond's administrative state changes to up
-
-	count or 2
-
-		The active aggregator is chosen by the largest number of
-		ports (slaves).  Reselection occurs as described under the
-		"bandwidth" setting, above.
-
-	The bandwidth and count selection policies permit failover of
-	802.3ad aggregations when partial failure of the active aggregator
-	occurs.  This keeps the aggregator with the highest availability
-	(either in bandwidth or in number of ports) active at all times.
-
-	This option was added in bonding version 3.4.0.
-
-ad_user_port_key
-
-	In an AD system, the port-key has three parts as shown below -
-
-	   Bits   Use
-	   00     Duplex
-	   01-05  Speed
-	   06-15  User-defined
-
-	This defines the upper 10 bits of the port key. The values can be
-	from 0 - 1023. If not given, the system defaults to 0.
-
-	This parameter has effect only in 802.3ad mode and is available through
-	SysFs interface.
-
-all_slaves_active
-
-	Specifies that duplicate frames (received on inactive ports) should be
-	dropped (0) or delivered (1).
-
-	Normally, bonding will drop duplicate frames (received on inactive
-	ports), which is desirable for most users. But there are some times
-	it is nice to allow duplicate frames to be delivered.
-
-	The default value is 0 (drop duplicate frames received on inactive
-	ports).
-
-arp_interval
-
-	Specifies the ARP link monitoring frequency in milliseconds.
-
-	The ARP monitor works by periodically checking the slave
-	devices to determine whether they have sent or received
-	traffic recently (the precise criteria depends upon the
-	bonding mode, and the state of the slave).  Regular traffic is
-	generated via ARP probes issued for the addresses specified by
-	the arp_ip_target option.
-
-	This behavior can be modified by the arp_validate option,
-	below.
-
-	If ARP monitoring is used in an etherchannel compatible mode
-	(modes 0 and 2), the switch should be configured in a mode
-	that evenly distributes packets across all links. If the
-	switch is configured to distribute the packets in an XOR
-	fashion, all replies from the ARP targets will be received on
-	the same link which could cause the other team members to
-	fail.  ARP monitoring should not be used in conjunction with
-	miimon.  A value of 0 disables ARP monitoring.  The default
-	value is 0.
-
-arp_ip_target
-
-	Specifies the IP addresses to use as ARP monitoring peers when
-	arp_interval is > 0.  These are the targets of the ARP request
-	sent to determine the health of the link to the targets.
-	Specify these values in ddd.ddd.ddd.ddd format.  Multiple IP
-	addresses must be separated by a comma.  At least one IP
-	address must be given for ARP monitoring to function.  The
-	maximum number of targets that can be specified is 16.  The
-	default value is no IP addresses.
-
-arp_validate
-
-	Specifies whether or not ARP probes and replies should be
-	validated in any mode that supports arp monitoring, or whether
-	non-ARP traffic should be filtered (disregarded) for link
-	monitoring purposes.
-
-	Possible values are:
-
-	none or 0
-
-		No validation or filtering is performed.
-
-	active or 1
-
-		Validation is performed only for the active slave.
-
-	backup or 2
-
-		Validation is performed only for backup slaves.
-
-	all or 3
-
-		Validation is performed for all slaves.
-
-	filter or 4
-
-		Filtering is applied to all slaves. No validation is
-		performed.
-
-	filter_active or 5
-
-		Filtering is applied to all slaves, validation is performed
-		only for the active slave.
-
-	filter_backup or 6
-
-		Filtering is applied to all slaves, validation is performed
-		only for backup slaves.
-
-	Validation:
-
-	Enabling validation causes the ARP monitor to examine the incoming
-	ARP requests and replies, and only consider a slave to be up if it
-	is receiving the appropriate ARP traffic.
-
-	For an active slave, the validation checks ARP replies to confirm
-	that they were generated by an arp_ip_target.  Since backup slaves
-	do not typically receive these replies, the validation performed
-	for backup slaves is on the broadcast ARP request sent out via the
-	active slave.  It is possible that some switch or network
-	configurations may result in situations wherein the backup slaves
-	do not receive the ARP requests; in such a situation, validation
-	of backup slaves must be disabled.
-
-	The validation of ARP requests on backup slaves is mainly helping
-	bonding to decide which slaves are more likely to work in case of
-	the active slave failure, it doesn't really guarantee that the
-	backup slave will work if it's selected as the next active slave.
-
-	Validation is useful in network configurations in which multiple
-	bonding hosts are concurrently issuing ARPs to one or more targets
-	beyond a common switch.  Should the link between the switch and
-	target fail (but not the switch itself), the probe traffic
-	generated by the multiple bonding instances will fool the standard
-	ARP monitor into considering the links as still up.  Use of
-	validation can resolve this, as the ARP monitor will only consider
-	ARP requests and replies associated with its own instance of
-	bonding.
-
-	Filtering:
-
-	Enabling filtering causes the ARP monitor to only use incoming ARP
-	packets for link availability purposes.  Arriving packets that are
-	not ARPs are delivered normally, but do not count when determining
-	if a slave is available.
-
-	Filtering operates by only considering the reception of ARP
-	packets (any ARP packet, regardless of source or destination) when
-	determining if a slave has received traffic for link availability
-	purposes.
-
-	Filtering is useful in network configurations in which significant
-	levels of third party broadcast traffic would fool the standard
-	ARP monitor into considering the links as still up.  Use of
-	filtering can resolve this, as only ARP traffic is considered for
-	link availability purposes.
-
-	This option was added in bonding version 3.1.0.
-
-arp_all_targets
-
-	Specifies the quantity of arp_ip_targets that must be reachable
-	in order for the ARP monitor to consider a slave as being up.
-	This option affects only active-backup mode for slaves with
-	arp_validation enabled.
-
-	Possible values are:
-
-	any or 0
-
-		consider the slave up only when any of the arp_ip_targets
-		is reachable
-
-	all or 1
-
-		consider the slave up only when all of the arp_ip_targets
-		are reachable
-
-downdelay
-
-	Specifies the time, in milliseconds, to wait before disabling
-	a slave after a link failure has been detected.  This option
-	is only valid for the miimon link monitor.  The downdelay
-	value should be a multiple of the miimon value; if not, it
-	will be rounded down to the nearest multiple.  The default
-	value is 0.
-
-fail_over_mac
-
-	Specifies whether active-backup mode should set all slaves to
-	the same MAC address at enslavement (the traditional
-	behavior), or, when enabled, perform special handling of the
-	bond's MAC address in accordance with the selected policy.
-
-	Possible values are:
-
-	none or 0
-
-		This setting disables fail_over_mac, and causes
-		bonding to set all slaves of an active-backup bond to
-		the same MAC address at enslavement time.  This is the
-		default.
-
-	active or 1
-
-		The "active" fail_over_mac policy indicates that the
-		MAC address of the bond should always be the MAC
-		address of the currently active slave.  The MAC
-		address of the slaves is not changed; instead, the MAC
-		address of the bond changes during a failover.
-
-		This policy is useful for devices that cannot ever
-		alter their MAC address, or for devices that refuse
-		incoming broadcasts with their own source MAC (which
-		interferes with the ARP monitor).
-
-		The down side of this policy is that every device on
-		the network must be updated via gratuitous ARP,
-		vs. just updating a switch or set of switches (which
-		often takes place for any traffic, not just ARP
-		traffic, if the switch snoops incoming traffic to
-		update its tables) for the traditional method.  If the
-		gratuitous ARP is lost, communication may be
-		disrupted.
-
-		When this policy is used in conjunction with the mii
-		monitor, devices which assert link up prior to being
-		able to actually transmit and receive are particularly
-		susceptible to loss of the gratuitous ARP, and an
-		appropriate updelay setting may be required.
-
-	follow or 2
-
-		The "follow" fail_over_mac policy causes the MAC
-		address of the bond to be selected normally (normally
-		the MAC address of the first slave added to the bond).
-		However, the second and subsequent slaves are not set
-		to this MAC address while they are in a backup role; a
-		slave is programmed with the bond's MAC address at
-		failover time (and the formerly active slave receives
-		the newly active slave's MAC address).
-
-		This policy is useful for multiport devices that
-		either become confused or incur a performance penalty
-		when multiple ports are programmed with the same MAC
-		address.
-
-
-	The default policy is none, unless the first slave cannot
-	change its MAC address, in which case the active policy is
-	selected by default.
-
-	This option may be modified via sysfs only when no slaves are
-	present in the bond.
-
-	This option was added in bonding version 3.2.0.  The "follow"
-	policy was added in bonding version 3.3.0.
-
-lacp_rate
-
-	Option specifying the rate in which we'll ask our link partner
-	to transmit LACPDU packets in 802.3ad mode.  Possible values
-	are:
-
-	slow or 0
-		Request partner to transmit LACPDUs every 30 seconds
-
-	fast or 1
-		Request partner to transmit LACPDUs every 1 second
-
-	The default is slow.
-
-max_bonds
-
-	Specifies the number of bonding devices to create for this
-	instance of the bonding driver.  E.g., if max_bonds is 3, and
-	the bonding driver is not already loaded, then bond0, bond1
-	and bond2 will be created.  The default value is 1.  Specifying
-	a value of 0 will load bonding, but will not create any devices.
-
-miimon
-
-	Specifies the MII link monitoring frequency in milliseconds.
-	This determines how often the link state of each slave is
-	inspected for link failures.  A value of zero disables MII
-	link monitoring.  A value of 100 is a good starting point.
-	The use_carrier option, below, affects how the link state is
-	determined.  See the High Availability section for additional
-	information.  The default value is 0.
-
-min_links
-
-	Specifies the minimum number of links that must be active before
-	asserting carrier. It is similar to the Cisco EtherChannel min-links
-	feature. This allows setting the minimum number of member ports that
-	must be up (link-up state) before marking the bond device as up
-	(carrier on). This is useful for situations where higher level services
-	such as clustering want to ensure a minimum number of low bandwidth
-	links are active before switchover. This option only affect 802.3ad
-	mode.
-
-	The default value is 0. This will cause carrier to be asserted (for
-	802.3ad mode) whenever there is an active aggregator, regardless of the
-	number of available links in that aggregator. Note that, because an
-	aggregator cannot be active without at least one available link,
-	setting this option to 0 or to 1 has the exact same effect.
-
-mode
-
-	Specifies one of the bonding policies. The default is
-	balance-rr (round robin).  Possible values are:
-
-	balance-rr or 0
-
-		Round-robin policy: Transmit packets in sequential
-		order from the first available slave through the
-		last.  This mode provides load balancing and fault
-		tolerance.
-
-	active-backup or 1
-
-		Active-backup policy: Only one slave in the bond is
-		active.  A different slave becomes active if, and only
-		if, the active slave fails.  The bond's MAC address is
-		externally visible on only one port (network adapter)
-		to avoid confusing the switch.
-
-		In bonding version 2.6.2 or later, when a failover
-		occurs in active-backup mode, bonding will issue one
-		or more gratuitous ARPs on the newly active slave.
-		One gratuitous ARP is issued for the bonding master
-		interface and each VLAN interfaces configured above
-		it, provided that the interface has at least one IP
-		address configured.  Gratuitous ARPs issued for VLAN
-		interfaces are tagged with the appropriate VLAN id.
-
-		This mode provides fault tolerance.  The primary
-		option, documented below, affects the behavior of this
-		mode.
-
-	balance-xor or 2
-
-		XOR policy: Transmit based on the selected transmit
-		hash policy.  The default policy is a simple [(source
-		MAC address XOR'd with destination MAC address XOR
-		packet type ID) modulo slave count].  Alternate transmit
-		policies may be	selected via the xmit_hash_policy option,
-		described below.
-
-		This mode provides load balancing and fault tolerance.
-
-	broadcast or 3
-
-		Broadcast policy: transmits everything on all slave
-		interfaces.  This mode provides fault tolerance.
-
-	802.3ad or 4
-
-		IEEE 802.3ad Dynamic link aggregation.  Creates
-		aggregation groups that share the same speed and
-		duplex settings.  Utilizes all slaves in the active
-		aggregator according to the 802.3ad specification.
-
-		Slave selection for outgoing traffic is done according
-		to the transmit hash policy, which may be changed from
-		the default simple XOR policy via the xmit_hash_policy
-		option, documented below.  Note that not all transmit
-		policies may be 802.3ad compliant, particularly in
-		regards to the packet mis-ordering requirements of
-		section 43.2.4 of the 802.3ad standard.  Differing
-		peer implementations will have varying tolerances for
-		noncompliance.
-
-		Prerequisites:
-
-		1. Ethtool support in the base drivers for retrieving
-		the speed and duplex of each slave.
-
-		2. A switch that supports IEEE 802.3ad Dynamic link
-		aggregation.
-
-		Most switches will require some type of configuration
-		to enable 802.3ad mode.
-
-	balance-tlb or 5
-
-		Adaptive transmit load balancing: channel bonding that
-		does not require any special switch support.
-
-		In tlb_dynamic_lb=1 mode; the outgoing traffic is
-		distributed according to the current load (computed
-		relative to the speed) on each slave.
-
-		In tlb_dynamic_lb=0 mode; the load balancing based on
-		current load is disabled and the load is distributed
-		only using the hash distribution.
-
-		Incoming traffic is received by the current slave.
-		If the receiving slave fails, another slave takes over
-		the MAC address of the failed receiving slave.
-
-		Prerequisite:
-
-		Ethtool support in the base drivers for retrieving the
-		speed of each slave.
-
-	balance-alb or 6
-
-		Adaptive load balancing: includes balance-tlb plus
-		receive load balancing (rlb) for IPV4 traffic, and
-		does not require any special switch support.  The
-		receive load balancing is achieved by ARP negotiation.
-		The bonding driver intercepts the ARP Replies sent by
-		the local system on their way out and overwrites the
-		source hardware address with the unique hardware
-		address of one of the slaves in the bond such that
-		different peers use different hardware addresses for
-		the server.
-
-		Receive traffic from connections created by the server
-		is also balanced.  When the local system sends an ARP
-		Request the bonding driver copies and saves the peer's
-		IP information from the ARP packet.  When the ARP
-		Reply arrives from the peer, its hardware address is
-		retrieved and the bonding driver initiates an ARP
-		reply to this peer assigning it to one of the slaves
-		in the bond.  A problematic outcome of using ARP
-		negotiation for balancing is that each time that an
-		ARP request is broadcast it uses the hardware address
-		of the bond.  Hence, peers learn the hardware address
-		of the bond and the balancing of receive traffic
-		collapses to the current slave.  This is handled by
-		sending updates (ARP Replies) to all the peers with
-		their individually assigned hardware address such that
-		the traffic is redistributed.  Receive traffic is also
-		redistributed when a new slave is added to the bond
-		and when an inactive slave is re-activated.  The
-		receive load is distributed sequentially (round robin)
-		among the group of highest speed slaves in the bond.
-
-		When a link is reconnected or a new slave joins the
-		bond the receive traffic is redistributed among all
-		active slaves in the bond by initiating ARP Replies
-		with the selected MAC address to each of the
-		clients. The updelay parameter (detailed below) must
-		be set to a value equal or greater than the switch's
-		forwarding delay so that the ARP Replies sent to the
-		peers will not be blocked by the switch.
-
-		Prerequisites:
-
-		1. Ethtool support in the base drivers for retrieving
-		the speed of each slave.
-
-		2. Base driver support for setting the hardware
-		address of a device while it is open.  This is
-		required so that there will always be one slave in the
-		team using the bond hardware address (the
-		curr_active_slave) while having a unique hardware
-		address for each slave in the bond.  If the
-		curr_active_slave fails its hardware address is
-		swapped with the new curr_active_slave that was
-		chosen.
-
-num_grat_arp
-num_unsol_na
-
-	Specify the number of peer notifications (gratuitous ARPs and
-	unsolicited IPv6 Neighbor Advertisements) to be issued after a
-	failover event.  As soon as the link is up on the new slave
-	(possibly immediately) a peer notification is sent on the
-	bonding device and each VLAN sub-device. This is repeated at
-	the rate specified by peer_notif_delay if the number is
-	greater than 1.
-
-	The valid range is 0 - 255; the default value is 1.  These options
-	affect only the active-backup mode.  These options were added for
-	bonding versions 3.3.0 and 3.4.0 respectively.
-
-	From Linux 3.0 and bonding version 3.7.1, these notifications
-	are generated by the ipv4 and ipv6 code and the numbers of
-	repetitions cannot be set independently.
-
-packets_per_slave
-
-	Specify the number of packets to transmit through a slave before
-	moving to the next one. When set to 0 then a slave is chosen at
-	random.
-
-	The valid range is 0 - 65535; the default value is 1. This option
-	has effect only in balance-rr mode.
-
-peer_notif_delay
-
-        Specify the delay, in milliseconds, between each peer
-        notification (gratuitous ARP and unsolicited IPv6 Neighbor
-        Advertisement) when they are issued after a failover event.
-        This delay should be a multiple of the link monitor interval
-        (arp_interval or miimon, whichever is active). The default
-        value is 0 which means to match the value of the link monitor
-        interval.
-
-primary
-
-	A string (eth0, eth2, etc) specifying which slave is the
-	primary device.  The specified device will always be the
-	active slave while it is available.  Only when the primary is
-	off-line will alternate devices be used.  This is useful when
-	one slave is preferred over another, e.g., when one slave has
-	higher throughput than another.
-
-	The primary option is only valid for active-backup(1),
-	balance-tlb (5) and balance-alb (6) mode.
-
-primary_reselect
-
-	Specifies the reselection policy for the primary slave.  This
-	affects how the primary slave is chosen to become the active slave
-	when failure of the active slave or recovery of the primary slave
-	occurs.  This option is designed to prevent flip-flopping between
-	the primary slave and other slaves.  Possible values are:
-
-	always or 0 (default)
-
-		The primary slave becomes the active slave whenever it
-		comes back up.
-
-	better or 1
-
-		The primary slave becomes the active slave when it comes
-		back up, if the speed and duplex of the primary slave is
-		better than the speed and duplex of the current active
-		slave.
-
-	failure or 2
-
-		The primary slave becomes the active slave only if the
-		current active slave fails and the primary slave is up.
-
-	The primary_reselect setting is ignored in two cases:
-
-		If no slaves are active, the first slave to recover is
-		made the active slave.
-
-		When initially enslaved, the primary slave is always made
-		the active slave.
-
-	Changing the primary_reselect policy via sysfs will cause an
-	immediate selection of the best active slave according to the new
-	policy.  This may or may not result in a change of the active
-	slave, depending upon the circumstances.
-
-	This option was added for bonding version 3.6.0.
-
-tlb_dynamic_lb
-
-	Specifies if dynamic shuffling of flows is enabled in tlb
-	mode. The value has no effect on any other modes.
-
-	The default behavior of tlb mode is to shuffle active flows across
-	slaves based on the load in that interval. This gives nice lb
-	characteristics but can cause packet reordering. If re-ordering is
-	a concern use this variable to disable flow shuffling and rely on
-	load balancing provided solely by the hash distribution.
-	xmit-hash-policy can be used to select the appropriate hashing for
-	the setup.
-
-	The sysfs entry can be used to change the setting per bond device
-	and the initial value is derived from the module parameter. The
-	sysfs entry is allowed to be changed only if the bond device is
-	down.
-
-	The default value is "1" that enables flow shuffling while value "0"
-	disables it. This option was added in bonding driver 3.7.1
-
-
-updelay
-
-	Specifies the time, in milliseconds, to wait before enabling a
-	slave after a link recovery has been detected.  This option is
-	only valid for the miimon link monitor.  The updelay value
-	should be a multiple of the miimon value; if not, it will be
-	rounded down to the nearest multiple.  The default value is 0.
-
-use_carrier
-
-	Specifies whether or not miimon should use MII or ETHTOOL
-	ioctls vs. netif_carrier_ok() to determine the link
-	status. The MII or ETHTOOL ioctls are less efficient and
-	utilize a deprecated calling sequence within the kernel.  The
-	netif_carrier_ok() relies on the device driver to maintain its
-	state with netif_carrier_on/off; at this writing, most, but
-	not all, device drivers support this facility.
-
-	If bonding insists that the link is up when it should not be,
-	it may be that your network device driver does not support
-	netif_carrier_on/off.  The default state for netif_carrier is
-	"carrier on," so if a driver does not support netif_carrier,
-	it will appear as if the link is always up.  In this case,
-	setting use_carrier to 0 will cause bonding to revert to the
-	MII / ETHTOOL ioctl method to determine the link state.
-
-	A value of 1 enables the use of netif_carrier_ok(), a value of
-	0 will use the deprecated MII / ETHTOOL ioctls.  The default
-	value is 1.
-
-xmit_hash_policy
-
-	Selects the transmit hash policy to use for slave selection in
-	balance-xor, 802.3ad, and tlb modes.  Possible values are:
-
-	layer2
-
-		Uses XOR of hardware MAC addresses and packet type ID
-		field to generate the hash. The formula is
-
-		hash = source MAC XOR destination MAC XOR packet type ID
-		slave number = hash modulo slave count
-
-		This algorithm will place all traffic to a particular
-		network peer on the same slave.
-
-		This algorithm is 802.3ad compliant.
-
-	layer2+3
-
-		This policy uses a combination of layer2 and layer3
-		protocol information to generate the hash.
-
-		Uses XOR of hardware MAC addresses and IP addresses to
-		generate the hash.  The formula is
-
-		hash = source MAC XOR destination MAC XOR packet type ID
-		hash = hash XOR source IP XOR destination IP
-		hash = hash XOR (hash RSHIFT 16)
-		hash = hash XOR (hash RSHIFT 8)
-		And then hash is reduced modulo slave count.
-
-		If the protocol is IPv6 then the source and destination
-		addresses are first hashed using ipv6_addr_hash.
-
-		This algorithm will place all traffic to a particular
-		network peer on the same slave.  For non-IP traffic,
-		the formula is the same as for the layer2 transmit
-		hash policy.
-
-		This policy is intended to provide a more balanced
-		distribution of traffic than layer2 alone, especially
-		in environments where a layer3 gateway device is
-		required to reach most destinations.
-
-		This algorithm is 802.3ad compliant.
-
-	layer3+4
-
-		This policy uses upper layer protocol information,
-		when available, to generate the hash.  This allows for
-		traffic to a particular network peer to span multiple
-		slaves, although a single connection will not span
-		multiple slaves.
-
-		The formula for unfragmented TCP and UDP packets is
-
-		hash = source port, destination port (as in the header)
-		hash = hash XOR source IP XOR destination IP
-		hash = hash XOR (hash RSHIFT 16)
-		hash = hash XOR (hash RSHIFT 8)
-		And then hash is reduced modulo slave count.
-
-		If the protocol is IPv6 then the source and destination
-		addresses are first hashed using ipv6_addr_hash.
-
-		For fragmented TCP or UDP packets and all other IPv4 and
-		IPv6 protocol traffic, the source and destination port
-		information is omitted.  For non-IP traffic, the
-		formula is the same as for the layer2 transmit hash
-		policy.
-
-		This algorithm is not fully 802.3ad compliant.  A
-		single TCP or UDP conversation containing both
-		fragmented and unfragmented packets will see packets
-		striped across two interfaces.  This may result in out
-		of order delivery.  Most traffic types will not meet
-		this criteria, as TCP rarely fragments traffic, and
-		most UDP traffic is not involved in extended
-		conversations.  Other implementations of 802.3ad may
-		or may not tolerate this noncompliance.
-
-	encap2+3
-
-		This policy uses the same formula as layer2+3 but it
-		relies on skb_flow_dissect to obtain the header fields
-		which might result in the use of inner headers if an
-		encapsulation protocol is used. For example this will
-		improve the performance for tunnel users because the
-		packets will be distributed according to the encapsulated
-		flows.
-
-	encap3+4
-
-		This policy uses the same formula as layer3+4 but it
-		relies on skb_flow_dissect to obtain the header fields
-		which might result in the use of inner headers if an
-		encapsulation protocol is used. For example this will
-		improve the performance for tunnel users because the
-		packets will be distributed according to the encapsulated
-		flows.
-
-	The default value is layer2.  This option was added in bonding
-	version 2.6.3.  In earlier versions of bonding, this parameter
-	does not exist, and the layer2 policy is the only policy.  The
-	layer2+3 value was added for bonding version 3.2.2.
-
-resend_igmp
-
-	Specifies the number of IGMP membership reports to be issued after
-	a failover event. One membership report is issued immediately after
-	the failover, subsequent packets are sent in each 200ms interval.
-
-	The valid range is 0 - 255; the default value is 1. A value of 0
-	prevents the IGMP membership report from being issued in response
-	to the failover event.
-
-	This option is useful for bonding modes balance-rr (0), active-backup
-	(1), balance-tlb (5) and balance-alb (6), in which a failover can
-	switch the IGMP traffic from one slave to another.  Therefore a fresh
-	IGMP report must be issued to cause the switch to forward the incoming
-	IGMP traffic over the newly selected slave.
-
-	This option was added for bonding version 3.7.0.
-
-lp_interval
-
-	Specifies the number of seconds between instances where the bonding
-	driver sends learning packets to each slaves peer switch.
-
-	The valid range is 1 - 0x7fffffff; the default value is 1. This Option
-	has effect only in balance-tlb and balance-alb modes.
-
-3. Configuring Bonding Devices
-==============================
-
-	You can configure bonding using either your distro's network
-initialization scripts, or manually using either iproute2 or the
-sysfs interface.  Distros generally use one of three packages for the
-network initialization scripts: initscripts, sysconfig or interfaces.
-Recent versions of these packages have support for bonding, while older
-versions do not.
-
-	We will first describe the options for configuring bonding for
-distros using versions of initscripts, sysconfig and interfaces with full
-or partial support for bonding, then provide information on enabling
-bonding without support from the network initialization scripts (i.e.,
-older versions of initscripts or sysconfig).
-
-	If you're unsure whether your distro uses sysconfig,
-initscripts or interfaces, or don't know if it's new enough, have no fear.
-Determining this is fairly straightforward.
-
-	First, look for a file called interfaces in /etc/network directory.
-If this file is present in your system, then your system use interfaces. See
-Configuration with Interfaces Support.
-
-	Else, issue the command:
-
-$ rpm -qf /sbin/ifup
-
-	It will respond with a line of text starting with either
-"initscripts" or "sysconfig," followed by some numbers.  This is the
-package that provides your network initialization scripts.
-
-	Next, to determine if your installation supports bonding,
-issue the command:
-
-$ grep ifenslave /sbin/ifup
-
-	If this returns any matches, then your initscripts or
-sysconfig has support for bonding.
-
-3.1 Configuration with Sysconfig Support
-----------------------------------------
-
-	This section applies to distros using a version of sysconfig
-with bonding support, for example, SuSE Linux Enterprise Server 9.
-
-	SuSE SLES 9's networking configuration system does support
-bonding, however, at this writing, the YaST system configuration
-front end does not provide any means to work with bonding devices.
-Bonding devices can be managed by hand, however, as follows.
-
-	First, if they have not already been configured, configure the
-slave devices.  On SLES 9, this is most easily done by running the
-yast2 sysconfig configuration utility.  The goal is for to create an
-ifcfg-id file for each slave device.  The simplest way to accomplish
-this is to configure the devices for DHCP (this is only to get the
-file ifcfg-id file created; see below for some issues with DHCP).  The
-name of the configuration file for each device will be of the form:
-
-ifcfg-id-xx:xx:xx:xx:xx:xx
-
-	Where the "xx" portion will be replaced with the digits from
-the device's permanent MAC address.
-
-	Once the set of ifcfg-id-xx:xx:xx:xx:xx:xx files has been
-created, it is necessary to edit the configuration files for the slave
-devices (the MAC addresses correspond to those of the slave devices).
-Before editing, the file will contain multiple lines, and will look
-something like this:
-
-BOOTPROTO='dhcp'
-STARTMODE='on'
-USERCTL='no'
-UNIQUE='XNzu.WeZGOGF+4wE'
-_nm_name='bus-pci-0001:61:01.0'
-
-	Change the BOOTPROTO and STARTMODE lines to the following:
-
-BOOTPROTO='none'
-STARTMODE='off'
-
-	Do not alter the UNIQUE or _nm_name lines.  Remove any other
-lines (USERCTL, etc).
-
-	Once the ifcfg-id-xx:xx:xx:xx:xx:xx files have been modified,
-it's time to create the configuration file for the bonding device
-itself.  This file is named ifcfg-bondX, where X is the number of the
-bonding device to create, starting at 0.  The first such file is
-ifcfg-bond0, the second is ifcfg-bond1, and so on.  The sysconfig
-network configuration system will correctly start multiple instances
-of bonding.
-
-	The contents of the ifcfg-bondX file is as follows:
-
-BOOTPROTO="static"
-BROADCAST="10.0.2.255"
-IPADDR="10.0.2.10"
-NETMASK="255.255.0.0"
-NETWORK="10.0.2.0"
-REMOTE_IPADDR=""
-STARTMODE="onboot"
-BONDING_MASTER="yes"
-BONDING_MODULE_OPTS="mode=active-backup miimon=100"
-BONDING_SLAVE0="eth0"
-BONDING_SLAVE1="bus-pci-0000:06:08.1"
-
-	Replace the sample BROADCAST, IPADDR, NETMASK and NETWORK
-values with the appropriate values for your network.
-
-	The STARTMODE specifies when the device is brought online.
-The possible values are:
-
-	onboot:	 The device is started at boot time.  If you're not
-		 sure, this is probably what you want.
-
-	manual:	 The device is started only when ifup is called
-		 manually.  Bonding devices may be configured this
-		 way if you do not wish them to start automatically
-		 at boot for some reason.
-
-	hotplug: The device is started by a hotplug event.  This is not
-		 a valid choice for a bonding device.
-
-	off or ignore: The device configuration is ignored.
-
-	The line BONDING_MASTER='yes' indicates that the device is a
-bonding master device.  The only useful value is "yes."
-
-	The contents of BONDING_MODULE_OPTS are supplied to the
-instance of the bonding module for this device.  Specify the options
-for the bonding mode, link monitoring, and so on here.  Do not include
-the max_bonds bonding parameter; this will confuse the configuration
-system if you have multiple bonding devices.
-
-	Finally, supply one BONDING_SLAVEn="slave device" for each
-slave.  where "n" is an increasing value, one for each slave.  The
-"slave device" is either an interface name, e.g., "eth0", or a device
-specifier for the network device.  The interface name is easier to
-find, but the ethN names are subject to change at boot time if, e.g.,
-a device early in the sequence has failed.  The device specifiers
-(bus-pci-0000:06:08.1 in the example above) specify the physical
-network device, and will not change unless the device's bus location
-changes (for example, it is moved from one PCI slot to another).  The
-example above uses one of each type for demonstration purposes; most
-configurations will choose one or the other for all slave devices.
-
-	When all configuration files have been modified or created,
-networking must be restarted for the configuration changes to take
-effect.  This can be accomplished via the following:
-
-# /etc/init.d/network restart
-
-	Note that the network control script (/sbin/ifdown) will
-remove the bonding module as part of the network shutdown processing,
-so it is not necessary to remove the module by hand if, e.g., the
-module parameters have changed.
-
-	Also, at this writing, YaST/YaST2 will not manage bonding
-devices (they do not show bonding interfaces on its list of network
-devices).  It is necessary to edit the configuration file by hand to
-change the bonding configuration.
-
-	Additional general options and details of the ifcfg file
-format can be found in an example ifcfg template file:
-
-/etc/sysconfig/network/ifcfg.template
-
-	Note that the template does not document the various BONDING_
-settings described above, but does describe many of the other options.
-
-3.1.1 Using DHCP with Sysconfig
--------------------------------
-
-	Under sysconfig, configuring a device with BOOTPROTO='dhcp'
-will cause it to query DHCP for its IP address information.  At this
-writing, this does not function for bonding devices; the scripts
-attempt to obtain the device address from DHCP prior to adding any of
-the slave devices.  Without active slaves, the DHCP requests are not
-sent to the network.
-
-3.1.2 Configuring Multiple Bonds with Sysconfig
------------------------------------------------
-
-	The sysconfig network initialization system is capable of
-handling multiple bonding devices.  All that is necessary is for each
-bonding instance to have an appropriately configured ifcfg-bondX file
-(as described above).  Do not specify the "max_bonds" parameter to any
-instance of bonding, as this will confuse sysconfig.  If you require
-multiple bonding devices with identical parameters, create multiple
-ifcfg-bondX files.
-
-	Because the sysconfig scripts supply the bonding module
-options in the ifcfg-bondX file, it is not necessary to add them to
-the system /etc/modules.d/*.conf configuration files.
-
-3.2 Configuration with Initscripts Support
-------------------------------------------
-
-	This section applies to distros using a recent version of
-initscripts with bonding support, for example, Red Hat Enterprise Linux
-version 3 or later, Fedora, etc.  On these systems, the network
-initialization scripts have knowledge of bonding, and can be configured to
-control bonding devices.  Note that older versions of the initscripts
-package have lower levels of support for bonding; this will be noted where
-applicable.
-
-	These distros will not automatically load the network adapter
-driver unless the ethX device is configured with an IP address.
-Because of this constraint, users must manually configure a
-network-script file for all physical adapters that will be members of
-a bondX link.  Network script files are located in the directory:
-
-/etc/sysconfig/network-scripts
-
-	The file name must be prefixed with "ifcfg-eth" and suffixed
-with the adapter's physical adapter number.  For example, the script
-for eth0 would be named /etc/sysconfig/network-scripts/ifcfg-eth0.
-Place the following text in the file:
-
-DEVICE=eth0
-USERCTL=no
-ONBOOT=yes
-MASTER=bond0
-SLAVE=yes
-BOOTPROTO=none
-
-	The DEVICE= line will be different for every ethX device and
-must correspond with the name of the file, i.e., ifcfg-eth1 must have
-a device line of DEVICE=eth1.  The setting of the MASTER= line will
-also depend on the final bonding interface name chosen for your bond.
-As with other network devices, these typically start at 0, and go up
-one for each device, i.e., the first bonding instance is bond0, the
-second is bond1, and so on.
-
-	Next, create a bond network script.  The file name for this
-script will be /etc/sysconfig/network-scripts/ifcfg-bondX where X is
-the number of the bond.  For bond0 the file is named "ifcfg-bond0",
-for bond1 it is named "ifcfg-bond1", and so on.  Within that file,
-place the following text:
-
-DEVICE=bond0
-IPADDR=192.168.1.1
-NETMASK=255.255.255.0
-NETWORK=192.168.1.0
-BROADCAST=192.168.1.255
-ONBOOT=yes
-BOOTPROTO=none
-USERCTL=no
-
-	Be sure to change the networking specific lines (IPADDR,
-NETMASK, NETWORK and BROADCAST) to match your network configuration.
-
-	For later versions of initscripts, such as that found with Fedora
-7 (or later) and Red Hat Enterprise Linux version 5 (or later), it is possible,
-and, indeed, preferable, to specify the bonding options in the ifcfg-bond0
-file, e.g. a line of the format:
-
-BONDING_OPTS="mode=active-backup arp_interval=60 arp_ip_target=192.168.1.254"
-
-	will configure the bond with the specified options.  The options
-specified in BONDING_OPTS are identical to the bonding module parameters
-except for the arp_ip_target field when using versions of initscripts older
-than and 8.57 (Fedora 8) and 8.45.19 (Red Hat Enterprise Linux 5.2).  When
-using older versions each target should be included as a separate option and
-should be preceded by a '+' to indicate it should be added to the list of
-queried targets, e.g.,
-
-	arp_ip_target=+192.168.1.1 arp_ip_target=+192.168.1.2
-
-	is the proper syntax to specify multiple targets.  When specifying
-options via BONDING_OPTS, it is not necessary to edit /etc/modprobe.d/*.conf.
-
-	For even older versions of initscripts that do not support
-BONDING_OPTS, it is necessary to edit /etc/modprobe.d/*.conf, depending upon
-your distro) to load the bonding module with your desired options when the
-bond0 interface is brought up.  The following lines in /etc/modprobe.d/*.conf
-will load the bonding module, and select its options:
-
-alias bond0 bonding
-options bond0 mode=balance-alb miimon=100
-
-	Replace the sample parameters with the appropriate set of
-options for your configuration.
-
-	Finally run "/etc/rc.d/init.d/network restart" as root.  This
-will restart the networking subsystem and your bond link should be now
-up and running.
-
-3.2.1 Using DHCP with Initscripts
----------------------------------
-
-	Recent versions of initscripts (the versions supplied with Fedora
-Core 3 and Red Hat Enterprise Linux 4, or later versions, are reported to
-work) have support for assigning IP information to bonding devices via
-DHCP.
-
-	To configure bonding for DHCP, configure it as described
-above, except replace the line "BOOTPROTO=none" with "BOOTPROTO=dhcp"
-and add a line consisting of "TYPE=Bonding".  Note that the TYPE value
-is case sensitive.
-
-3.2.2 Configuring Multiple Bonds with Initscripts
--------------------------------------------------
-
-	Initscripts packages that are included with Fedora 7 and Red Hat
-Enterprise Linux 5 support multiple bonding interfaces by simply
-specifying the appropriate BONDING_OPTS= in ifcfg-bondX where X is the
-number of the bond.  This support requires sysfs support in the kernel,
-and a bonding driver of version 3.0.0 or later.  Other configurations may
-not support this method for specifying multiple bonding interfaces; for
-those instances, see the "Configuring Multiple Bonds Manually" section,
-below.
-
-3.3 Configuring Bonding Manually with iproute2
------------------------------------------------
-
-	This section applies to distros whose network initialization
-scripts (the sysconfig or initscripts package) do not have specific
-knowledge of bonding.  One such distro is SuSE Linux Enterprise Server
-version 8.
-
-	The general method for these systems is to place the bonding
-module parameters into a config file in /etc/modprobe.d/ (as
-appropriate for the installed distro), then add modprobe and/or
-`ip link` commands to the system's global init script.  The name of
-the global init script differs; for sysconfig, it is
-/etc/init.d/boot.local and for initscripts it is /etc/rc.d/rc.local.
-
-	For example, if you wanted to make a simple bond of two e100
-devices (presumed to be eth0 and eth1), and have it persist across
-reboots, edit the appropriate file (/etc/init.d/boot.local or
-/etc/rc.d/rc.local), and add the following:
-
-modprobe bonding mode=balance-alb miimon=100
-modprobe e100
-ifconfig bond0 192.168.1.1 netmask 255.255.255.0 up
-ip link set eth0 master bond0
-ip link set eth1 master bond0
-
-	Replace the example bonding module parameters and bond0
-network configuration (IP address, netmask, etc) with the appropriate
-values for your configuration.
-
-	Unfortunately, this method will not provide support for the
-ifup and ifdown scripts on the bond devices.  To reload the bonding
-configuration, it is necessary to run the initialization script, e.g.,
-
-# /etc/init.d/boot.local
-
-	or
-
-# /etc/rc.d/rc.local
-
-	It may be desirable in such a case to create a separate script
-which only initializes the bonding configuration, then call that
-separate script from within boot.local.  This allows for bonding to be
-enabled without re-running the entire global init script.
-
-	To shut down the bonding devices, it is necessary to first
-mark the bonding device itself as being down, then remove the
-appropriate device driver modules.  For our example above, you can do
-the following:
-
-# ifconfig bond0 down
-# rmmod bonding
-# rmmod e100
-
-	Again, for convenience, it may be desirable to create a script
-with these commands.
-
-
-3.3.1 Configuring Multiple Bonds Manually
------------------------------------------
-
-	This section contains information on configuring multiple
-bonding devices with differing options for those systems whose network
-initialization scripts lack support for configuring multiple bonds.
-
-	If you require multiple bonding devices, but all with the same
-options, you may wish to use the "max_bonds" module parameter,
-documented above.
-
-	To create multiple bonding devices with differing options, it is
-preferable to use bonding parameters exported by sysfs, documented in the
-section below.
-
-	For versions of bonding without sysfs support, the only means to
-provide multiple instances of bonding with differing options is to load
-the bonding driver multiple times.  Note that current versions of the
-sysconfig network initialization scripts handle this automatically; if
-your distro uses these scripts, no special action is needed.  See the
-section Configuring Bonding Devices, above, if you're not sure about your
-network initialization scripts.
-
-	To load multiple instances of the module, it is necessary to
-specify a different name for each instance (the module loading system
-requires that every loaded module, even multiple instances of the same
-module, have a unique name).  This is accomplished by supplying multiple
-sets of bonding options in /etc/modprobe.d/*.conf, for example:
-
-alias bond0 bonding
-options bond0 -o bond0 mode=balance-rr miimon=100
-
-alias bond1 bonding
-options bond1 -o bond1 mode=balance-alb miimon=50
-
-	will load the bonding module two times.  The first instance is
-named "bond0" and creates the bond0 device in balance-rr mode with an
-miimon of 100.  The second instance is named "bond1" and creates the
-bond1 device in balance-alb mode with an miimon of 50.
-
-	In some circumstances (typically with older distributions),
-the above does not work, and the second bonding instance never sees
-its options.  In that case, the second options line can be substituted
-as follows:
-
-install bond1 /sbin/modprobe --ignore-install bonding -o bond1 \
-	mode=balance-alb miimon=50
-
-	This may be repeated any number of times, specifying a new and
-unique name in place of bond1 for each subsequent instance.
-
-	It has been observed that some Red Hat supplied kernels are unable
-to rename modules at load time (the "-o bond1" part).  Attempts to pass
-that option to modprobe will produce an "Operation not permitted" error.
-This has been reported on some Fedora Core kernels, and has been seen on
-RHEL 4 as well.  On kernels exhibiting this problem, it will be impossible
-to configure multiple bonds with differing parameters (as they are older
-kernels, and also lack sysfs support).
-
-3.4 Configuring Bonding Manually via Sysfs
-------------------------------------------
-
-	Starting with version 3.0.0, Channel Bonding may be configured
-via the sysfs interface.  This interface allows dynamic configuration
-of all bonds in the system without unloading the module.  It also
-allows for adding and removing bonds at runtime.  Ifenslave is no
-longer required, though it is still supported.
-
-	Use of the sysfs interface allows you to use multiple bonds
-with different configurations without having to reload the module.
-It also allows you to use multiple, differently configured bonds when
-bonding is compiled into the kernel.
-
-	You must have the sysfs filesystem mounted to configure
-bonding this way.  The examples in this document assume that you
-are using the standard mount point for sysfs, e.g. /sys.  If your
-sysfs filesystem is mounted elsewhere, you will need to adjust the
-example paths accordingly.
-
-Creating and Destroying Bonds
------------------------------
-To add a new bond foo:
-# echo +foo > /sys/class/net/bonding_masters
-
-To remove an existing bond bar:
-# echo -bar > /sys/class/net/bonding_masters
-
-To show all existing bonds:
-# cat /sys/class/net/bonding_masters
-
-NOTE: due to 4K size limitation of sysfs files, this list may be
-truncated if you have more than a few hundred bonds.  This is unlikely
-to occur under normal operating conditions.
-
-Adding and Removing Slaves
---------------------------
-	Interfaces may be enslaved to a bond using the file
-/sys/class/net/<bond>/bonding/slaves.  The semantics for this file
-are the same as for the bonding_masters file.
-
-To enslave interface eth0 to bond bond0:
-# ifconfig bond0 up
-# echo +eth0 > /sys/class/net/bond0/bonding/slaves
-
-To free slave eth0 from bond bond0:
-# echo -eth0 > /sys/class/net/bond0/bonding/slaves
-
-	When an interface is enslaved to a bond, symlinks between the
-two are created in the sysfs filesystem.  In this case, you would get
-/sys/class/net/bond0/slave_eth0 pointing to /sys/class/net/eth0, and
-/sys/class/net/eth0/master pointing to /sys/class/net/bond0.
-
-	This means that you can tell quickly whether or not an
-interface is enslaved by looking for the master symlink.  Thus:
-# echo -eth0 > /sys/class/net/eth0/master/bonding/slaves
-will free eth0 from whatever bond it is enslaved to, regardless of
-the name of the bond interface.
-
-Changing a Bond's Configuration
--------------------------------
-	Each bond may be configured individually by manipulating the
-files located in /sys/class/net/<bond name>/bonding
-
-	The names of these files correspond directly with the command-
-line parameters described elsewhere in this file, and, with the
-exception of arp_ip_target, they accept the same values.  To see the
-current setting, simply cat the appropriate file.
-
-	A few examples will be given here; for specific usage
-guidelines for each parameter, see the appropriate section in this
-document.
-
-To configure bond0 for balance-alb mode:
-# ifconfig bond0 down
-# echo 6 > /sys/class/net/bond0/bonding/mode
- - or -
-# echo balance-alb > /sys/class/net/bond0/bonding/mode
-	NOTE: The bond interface must be down before the mode can be
-changed.
-
-To enable MII monitoring on bond0 with a 1 second interval:
-# echo 1000 > /sys/class/net/bond0/bonding/miimon
-	NOTE: If ARP monitoring is enabled, it will disabled when MII
-monitoring is enabled, and vice-versa.
-
-To add ARP targets:
-# echo +192.168.0.100 > /sys/class/net/bond0/bonding/arp_ip_target
-# echo +192.168.0.101 > /sys/class/net/bond0/bonding/arp_ip_target
-	NOTE:  up to 16 target addresses may be specified.
-
-To remove an ARP target:
-# echo -192.168.0.100 > /sys/class/net/bond0/bonding/arp_ip_target
-
-To configure the interval between learning packet transmits:
-# echo 12 > /sys/class/net/bond0/bonding/lp_interval
-	NOTE: the lp_interval is the number of seconds between instances where
-the bonding driver sends learning packets to each slaves peer switch.  The
-default interval is 1 second.
-
-Example Configuration
----------------------
-	We begin with the same example that is shown in section 3.3,
-executed with sysfs, and without using ifenslave.
-
-	To make a simple bond of two e100 devices (presumed to be eth0
-and eth1), and have it persist across reboots, edit the appropriate
-file (/etc/init.d/boot.local or /etc/rc.d/rc.local), and add the
-following:
-
-modprobe bonding
-modprobe e100
-echo balance-alb > /sys/class/net/bond0/bonding/mode
-ifconfig bond0 192.168.1.1 netmask 255.255.255.0 up
-echo 100 > /sys/class/net/bond0/bonding/miimon
-echo +eth0 > /sys/class/net/bond0/bonding/slaves
-echo +eth1 > /sys/class/net/bond0/bonding/slaves
-
-	To add a second bond, with two e1000 interfaces in
-active-backup mode, using ARP monitoring, add the following lines to
-your init script:
-
-modprobe e1000
-echo +bond1 > /sys/class/net/bonding_masters
-echo active-backup > /sys/class/net/bond1/bonding/mode
-ifconfig bond1 192.168.2.1 netmask 255.255.255.0 up
-echo +192.168.2.100 /sys/class/net/bond1/bonding/arp_ip_target
-echo 2000 > /sys/class/net/bond1/bonding/arp_interval
-echo +eth2 > /sys/class/net/bond1/bonding/slaves
-echo +eth3 > /sys/class/net/bond1/bonding/slaves
-
-3.5 Configuration with Interfaces Support
------------------------------------------
-
-        This section applies to distros which use /etc/network/interfaces file
-to describe network interface configuration, most notably Debian and it's
-derivatives.
-
-	The ifup and ifdown commands on Debian don't support bonding out of
-the box. The ifenslave-2.6 package should be installed to provide bonding
-support.  Once installed, this package will provide bond-* options to be used
-into /etc/network/interfaces.
-
-	Note that ifenslave-2.6 package will load the bonding module and use
-the ifenslave command when appropriate.
-
-Example Configurations
-----------------------
-
-In /etc/network/interfaces, the following stanza will configure bond0, in
-active-backup mode, with eth0 and eth1 as slaves.
-
-auto bond0
-iface bond0 inet dhcp
-	bond-slaves eth0 eth1
-	bond-mode active-backup
-	bond-miimon 100
-	bond-primary eth0 eth1
-
-If the above configuration doesn't work, you might have a system using
-upstart for system startup. This is most notably true for recent
-Ubuntu versions. The following stanza in /etc/network/interfaces will
-produce the same result on those systems.
-
-auto bond0
-iface bond0 inet dhcp
-	bond-slaves none
-	bond-mode active-backup
-	bond-miimon 100
-
-auto eth0
-iface eth0 inet manual
-	bond-master bond0
-	bond-primary eth0 eth1
-
-auto eth1
-iface eth1 inet manual
-	bond-master bond0
-	bond-primary eth0 eth1
-
-For a full list of bond-* supported options in /etc/network/interfaces and some
-more advanced examples tailored to you particular distros, see the files in
-/usr/share/doc/ifenslave-2.6.
-
-3.6 Overriding Configuration for Special Cases
-----------------------------------------------
-
-When using the bonding driver, the physical port which transmits a frame is
-typically selected by the bonding driver, and is not relevant to the user or
-system administrator.  The output port is simply selected using the policies of
-the selected bonding mode.  On occasion however, it is helpful to direct certain
-classes of traffic to certain physical interfaces on output to implement
-slightly more complex policies.  For example, to reach a web server over a
-bonded interface in which eth0 connects to a private network, while eth1
-connects via a public network, it may be desirous to bias the bond to send said
-traffic over eth0 first, using eth1 only as a fall back, while all other traffic
-can safely be sent over either interface.  Such configurations may be achieved
-using the traffic control utilities inherent in linux.
-
-By default the bonding driver is multiqueue aware and 16 queues are created
-when the driver initializes (see Documentation/networking/multiqueue.txt
-for details).  If more or less queues are desired the module parameter
-tx_queues can be used to change this value.  There is no sysfs parameter
-available as the allocation is done at module init time.
-
-The output of the file /proc/net/bonding/bondX has changed so the output Queue
-ID is now printed for each slave:
-
-Bonding Mode: fault-tolerance (active-backup)
-Primary Slave: None
-Currently Active Slave: eth0
-MII Status: up
-MII Polling Interval (ms): 0
-Up Delay (ms): 0
-Down Delay (ms): 0
-
-Slave Interface: eth0
-MII Status: up
-Link Failure Count: 0
-Permanent HW addr: 00:1a:a0:12:8f:cb
-Slave queue ID: 0
-
-Slave Interface: eth1
-MII Status: up
-Link Failure Count: 0
-Permanent HW addr: 00:1a:a0:12:8f:cc
-Slave queue ID: 2
-
-The queue_id for a slave can be set using the command:
-
-# echo "eth1:2" > /sys/class/net/bond0/bonding/queue_id
-
-Any interface that needs a queue_id set should set it with multiple calls
-like the one above until proper priorities are set for all interfaces.  On
-distributions that allow configuration via initscripts, multiple 'queue_id'
-arguments can be added to BONDING_OPTS to set all needed slave queues.
-
-These queue id's can be used in conjunction with the tc utility to configure
-a multiqueue qdisc and filters to bias certain traffic to transmit on certain
-slave devices.  For instance, say we wanted, in the above configuration to
-force all traffic bound to 192.168.1.100 to use eth1 in the bond as its output
-device. The following commands would accomplish this:
-
-# tc qdisc add dev bond0 handle 1 root multiq
-
-# tc filter add dev bond0 protocol ip parent 1: prio 1 u32 match ip dst \
-	192.168.1.100 action skbedit queue_mapping 2
-
-These commands tell the kernel to attach a multiqueue queue discipline to the
-bond0 interface and filter traffic enqueued to it, such that packets with a dst
-ip of 192.168.1.100 have their output queue mapping value overwritten to 2.
-This value is then passed into the driver, causing the normal output path
-selection policy to be overridden, selecting instead qid 2, which maps to eth1.
-
-Note that qid values begin at 1.  Qid 0 is reserved to initiate to the driver
-that normal output policy selection should take place.  One benefit to simply
-leaving the qid for a slave to 0 is the multiqueue awareness in the bonding
-driver that is now present.  This awareness allows tc filters to be placed on
-slave devices as well as bond devices and the bonding driver will simply act as
-a pass-through for selecting output queues on the slave device rather than 
-output port selection.
-
-This feature first appeared in bonding driver version 3.7.0 and support for
-output slave selection was limited to round-robin and active-backup modes.
-
-3.7 Configuring LACP for 802.3ad mode in a more secure way
-----------------------------------------------------------
-
-When using 802.3ad bonding mode, the Actor (host) and Partner (switch)
-exchange LACPDUs.  These LACPDUs cannot be sniffed, because they are
-destined to link local mac addresses (which switches/bridges are not
-supposed to forward).  However, most of the values are easily predictable
-or are simply the machine's MAC address (which is trivially known to all
-other hosts in the same L2).  This implies that other machines in the L2
-domain can spoof LACPDU packets from other hosts to the switch and potentially
-cause mayhem by joining (from the point of view of the switch) another
-machine's aggregate, thus receiving a portion of that hosts incoming
-traffic and / or spoofing traffic from that machine themselves (potentially
-even successfully terminating some portion of flows). Though this is not
-a likely scenario, one could avoid this possibility by simply configuring
-few bonding parameters:
-
-   (a) ad_actor_system : You can set a random mac-address that can be used for
-       these LACPDU exchanges. The value can not be either NULL or Multicast.
-       Also it's preferable to set the local-admin bit. Following shell code
-       generates a random mac-address as described above.
-
-       # sys_mac_addr=$(printf '%02x:%02x:%02x:%02x:%02x:%02x' \
-                                $(( (RANDOM & 0xFE) | 0x02 )) \
-                                $(( RANDOM & 0xFF )) \
-                                $(( RANDOM & 0xFF )) \
-                                $(( RANDOM & 0xFF )) \
-                                $(( RANDOM & 0xFF )) \
-                                $(( RANDOM & 0xFF )))
-       # echo $sys_mac_addr > /sys/class/net/bond0/bonding/ad_actor_system
-
-   (b) ad_actor_sys_prio : Randomize the system priority. The default value
-       is 65535, but system can take the value from 1 - 65535. Following shell
-       code generates random priority and sets it.
-
-       # sys_prio=$(( 1 + RANDOM + RANDOM ))
-       # echo $sys_prio > /sys/class/net/bond0/bonding/ad_actor_sys_prio
-
-   (c) ad_user_port_key : Use the user portion of the port-key. The default
-       keeps this empty. These are the upper 10 bits of the port-key and value
-       ranges from 0 - 1023. Following shell code generates these 10 bits and
-       sets it.
-
-       # usr_port_key=$(( RANDOM & 0x3FF ))
-       # echo $usr_port_key > /sys/class/net/bond0/bonding/ad_user_port_key
-
-
-4 Querying Bonding Configuration
-=================================
-
-4.1 Bonding Configuration
--------------------------
-
-	Each bonding device has a read-only file residing in the
-/proc/net/bonding directory.  The file contents include information
-about the bonding configuration, options and state of each slave.
-
-	For example, the contents of /proc/net/bonding/bond0 after the
-driver is loaded with parameters of mode=0 and miimon=1000 is
-generally as follows:
-
-	Ethernet Channel Bonding Driver: 2.6.1 (October 29, 2004)
-        Bonding Mode: load balancing (round-robin)
-        Currently Active Slave: eth0
-        MII Status: up
-        MII Polling Interval (ms): 1000
-        Up Delay (ms): 0
-        Down Delay (ms): 0
-
-        Slave Interface: eth1
-        MII Status: up
-        Link Failure Count: 1
-
-        Slave Interface: eth0
-        MII Status: up
-        Link Failure Count: 1
-
-	The precise format and contents will change depending upon the
-bonding configuration, state, and version of the bonding driver.
-
-4.2 Network configuration
--------------------------
-
-	The network configuration can be inspected using the ifconfig
-command.  Bonding devices will have the MASTER flag set; Bonding slave
-devices will have the SLAVE flag set.  The ifconfig output does not
-contain information on which slaves are associated with which masters.
-
-	In the example below, the bond0 interface is the master
-(MASTER) while eth0 and eth1 are slaves (SLAVE). Notice all slaves of
-bond0 have the same MAC address (HWaddr) as bond0 for all modes except
-TLB and ALB that require a unique MAC address for each slave.
-
-# /sbin/ifconfig
-bond0     Link encap:Ethernet  HWaddr 00:C0:F0:1F:37:B4
-          inet addr:XXX.XXX.XXX.YYY  Bcast:XXX.XXX.XXX.255  Mask:255.255.252.0
-          UP BROADCAST RUNNING MASTER MULTICAST  MTU:1500  Metric:1
-          RX packets:7224794 errors:0 dropped:0 overruns:0 frame:0
-          TX packets:3286647 errors:1 dropped:0 overruns:1 carrier:0
-          collisions:0 txqueuelen:0
-
-eth0      Link encap:Ethernet  HWaddr 00:C0:F0:1F:37:B4
-          UP BROADCAST RUNNING SLAVE MULTICAST  MTU:1500  Metric:1
-          RX packets:3573025 errors:0 dropped:0 overruns:0 frame:0
-          TX packets:1643167 errors:1 dropped:0 overruns:1 carrier:0
-          collisions:0 txqueuelen:100
-          Interrupt:10 Base address:0x1080
-
-eth1      Link encap:Ethernet  HWaddr 00:C0:F0:1F:37:B4
-          UP BROADCAST RUNNING SLAVE MULTICAST  MTU:1500  Metric:1
-          RX packets:3651769 errors:0 dropped:0 overruns:0 frame:0
-          TX packets:1643480 errors:0 dropped:0 overruns:0 carrier:0
-          collisions:0 txqueuelen:100
-          Interrupt:9 Base address:0x1400
-
-5. Switch Configuration
-=======================
-
-	For this section, "switch" refers to whatever system the
-bonded devices are directly connected to (i.e., where the other end of
-the cable plugs into).  This may be an actual dedicated switch device,
-or it may be another regular system (e.g., another computer running
-Linux),
-
-	The active-backup, balance-tlb and balance-alb modes do not
-require any specific configuration of the switch.
-
-	The 802.3ad mode requires that the switch have the appropriate
-ports configured as an 802.3ad aggregation.  The precise method used
-to configure this varies from switch to switch, but, for example, a
-Cisco 3550 series switch requires that the appropriate ports first be
-grouped together in a single etherchannel instance, then that
-etherchannel is set to mode "lacp" to enable 802.3ad (instead of
-standard EtherChannel).
-
-	The balance-rr, balance-xor and broadcast modes generally
-require that the switch have the appropriate ports grouped together.
-The nomenclature for such a group differs between switches, it may be
-called an "etherchannel" (as in the Cisco example, above), a "trunk
-group" or some other similar variation.  For these modes, each switch
-will also have its own configuration options for the switch's transmit
-policy to the bond.  Typical choices include XOR of either the MAC or
-IP addresses.  The transmit policy of the two peers does not need to
-match.  For these three modes, the bonding mode really selects a
-transmit policy for an EtherChannel group; all three will interoperate
-with another EtherChannel group.
-
-
-6. 802.1q VLAN Support
-======================
-
-	It is possible to configure VLAN devices over a bond interface
-using the 8021q driver.  However, only packets coming from the 8021q
-driver and passing through bonding will be tagged by default.  Self
-generated packets, for example, bonding's learning packets or ARP
-packets generated by either ALB mode or the ARP monitor mechanism, are
-tagged internally by bonding itself.  As a result, bonding must
-"learn" the VLAN IDs configured above it, and use those IDs to tag
-self generated packets.
-
-	For reasons of simplicity, and to support the use of adapters
-that can do VLAN hardware acceleration offloading, the bonding
-interface declares itself as fully hardware offloading capable, it gets
-the add_vid/kill_vid notifications to gather the necessary
-information, and it propagates those actions to the slaves.  In case
-of mixed adapter types, hardware accelerated tagged packets that
-should go through an adapter that is not offloading capable are
-"un-accelerated" by the bonding driver so the VLAN tag sits in the
-regular location.
-
-	VLAN interfaces *must* be added on top of a bonding interface
-only after enslaving at least one slave.  The bonding interface has a
-hardware address of 00:00:00:00:00:00 until the first slave is added.
-If the VLAN interface is created prior to the first enslavement, it
-would pick up the all-zeroes hardware address.  Once the first slave
-is attached to the bond, the bond device itself will pick up the
-slave's hardware address, which is then available for the VLAN device.
-
-	Also, be aware that a similar problem can occur if all slaves
-are released from a bond that still has one or more VLAN interfaces on
-top of it.  When a new slave is added, the bonding interface will
-obtain its hardware address from the first slave, which might not
-match the hardware address of the VLAN interfaces (which was
-ultimately copied from an earlier slave).
-
-	There are two methods to insure that the VLAN device operates
-with the correct hardware address if all slaves are removed from a
-bond interface:
-
-	1. Remove all VLAN interfaces then recreate them
-
-	2. Set the bonding interface's hardware address so that it
-matches the hardware address of the VLAN interfaces.
-
-	Note that changing a VLAN interface's HW address would set the
-underlying device -- i.e. the bonding interface -- to promiscuous
-mode, which might not be what you want.
-
-
-7. Link Monitoring
-==================
-
-	The bonding driver at present supports two schemes for
-monitoring a slave device's link state: the ARP monitor and the MII
-monitor.
-
-	At the present time, due to implementation restrictions in the
-bonding driver itself, it is not possible to enable both ARP and MII
-monitoring simultaneously.
-
-7.1 ARP Monitor Operation
--------------------------
-
-	The ARP monitor operates as its name suggests: it sends ARP
-queries to one or more designated peer systems on the network, and
-uses the response as an indication that the link is operating.  This
-gives some assurance that traffic is actually flowing to and from one
-or more peers on the local network.
-
-	The ARP monitor relies on the device driver itself to verify
-that traffic is flowing.  In particular, the driver must keep up to
-date the last receive time, dev->last_rx.  Drivers that use NETIF_F_LLTX
-flag must also update netdev_queue->trans_start.  If they do not, then the
-ARP monitor will immediately fail any slaves using that driver, and
-those slaves will stay down.  If networking monitoring (tcpdump, etc)
-shows the ARP requests and replies on the network, then it may be that
-your device driver is not updating last_rx and trans_start.
-
-7.2 Configuring Multiple ARP Targets
-------------------------------------
-
-	While ARP monitoring can be done with just one target, it can
-be useful in a High Availability setup to have several targets to
-monitor.  In the case of just one target, the target itself may go
-down or have a problem making it unresponsive to ARP requests.  Having
-an additional target (or several) increases the reliability of the ARP
-monitoring.
-
-	Multiple ARP targets must be separated by commas as follows:
-
-# example options for ARP monitoring with three targets
-alias bond0 bonding
-options bond0 arp_interval=60 arp_ip_target=192.168.0.1,192.168.0.3,192.168.0.9
-
-	For just a single target the options would resemble:
-
-# example options for ARP monitoring with one target
-alias bond0 bonding
-options bond0 arp_interval=60 arp_ip_target=192.168.0.100
-
-
-7.3 MII Monitor Operation
--------------------------
-
-	The MII monitor monitors only the carrier state of the local
-network interface.  It accomplishes this in one of three ways: by
-depending upon the device driver to maintain its carrier state, by
-querying the device's MII registers, or by making an ethtool query to
-the device.
-
-	If the use_carrier module parameter is 1 (the default value),
-then the MII monitor will rely on the driver for carrier state
-information (via the netif_carrier subsystem).  As explained in the
-use_carrier parameter information, above, if the MII monitor fails to
-detect carrier loss on the device (e.g., when the cable is physically
-disconnected), it may be that the driver does not support
-netif_carrier.
-
-	If use_carrier is 0, then the MII monitor will first query the
-device's (via ioctl) MII registers and check the link state.  If that
-request fails (not just that it returns carrier down), then the MII
-monitor will make an ethtool ETHOOL_GLINK request to attempt to obtain
-the same information.  If both methods fail (i.e., the driver either
-does not support or had some error in processing both the MII register
-and ethtool requests), then the MII monitor will assume the link is
-up.
-
-8. Potential Sources of Trouble
-===============================
-
-8.1 Adventures in Routing
--------------------------
-
-	When bonding is configured, it is important that the slave
-devices not have routes that supersede routes of the master (or,
-generally, not have routes at all).  For example, suppose the bonding
-device bond0 has two slaves, eth0 and eth1, and the routing table is
-as follows:
-
-Kernel IP routing table
-Destination     Gateway         Genmask         Flags   MSS Window  irtt Iface
-10.0.0.0        0.0.0.0         255.255.0.0     U        40 0          0 eth0
-10.0.0.0        0.0.0.0         255.255.0.0     U        40 0          0 eth1
-10.0.0.0        0.0.0.0         255.255.0.0     U        40 0          0 bond0
-127.0.0.0       0.0.0.0         255.0.0.0       U        40 0          0 lo
-
-	This routing configuration will likely still update the
-receive/transmit times in the driver (needed by the ARP monitor), but
-may bypass the bonding driver (because outgoing traffic to, in this
-case, another host on network 10 would use eth0 or eth1 before bond0).
-
-	The ARP monitor (and ARP itself) may become confused by this
-configuration, because ARP requests (generated by the ARP monitor)
-will be sent on one interface (bond0), but the corresponding reply
-will arrive on a different interface (eth0).  This reply looks to ARP
-as an unsolicited ARP reply (because ARP matches replies on an
-interface basis), and is discarded.  The MII monitor is not affected
-by the state of the routing table.
-
-	The solution here is simply to insure that slaves do not have
-routes of their own, and if for some reason they must, those routes do
-not supersede routes of their master.  This should generally be the
-case, but unusual configurations or errant manual or automatic static
-route additions may cause trouble.
-
-8.2 Ethernet Device Renaming
-----------------------------
-
-	On systems with network configuration scripts that do not
-associate physical devices directly with network interface names (so
-that the same physical device always has the same "ethX" name), it may
-be necessary to add some special logic to config files in
-/etc/modprobe.d/.
-
-	For example, given a modules.conf containing the following:
-
-alias bond0 bonding
-options bond0 mode=some-mode miimon=50
-alias eth0 tg3
-alias eth1 tg3
-alias eth2 e1000
-alias eth3 e1000
-
-	If neither eth0 and eth1 are slaves to bond0, then when the
-bond0 interface comes up, the devices may end up reordered.  This
-happens because bonding is loaded first, then its slave device's
-drivers are loaded next.  Since no other drivers have been loaded,
-when the e1000 driver loads, it will receive eth0 and eth1 for its
-devices, but the bonding configuration tries to enslave eth2 and eth3
-(which may later be assigned to the tg3 devices).
-
-	Adding the following:
-
-add above bonding e1000 tg3
-
-	causes modprobe to load e1000 then tg3, in that order, when
-bonding is loaded.  This command is fully documented in the
-modules.conf manual page.
-
-	On systems utilizing modprobe an equivalent problem can occur.
-In this case, the following can be added to config files in
-/etc/modprobe.d/ as:
-
-softdep bonding pre: tg3 e1000
-
-	This will load tg3 and e1000 modules before loading the bonding one.
-Full documentation on this can be found in the modprobe.d and modprobe
-manual pages.
-
-8.3. Painfully Slow Or No Failed Link Detection By Miimon
----------------------------------------------------------
-
-	By default, bonding enables the use_carrier option, which
-instructs bonding to trust the driver to maintain carrier state.
-
-	As discussed in the options section, above, some drivers do
-not support the netif_carrier_on/_off link state tracking system.
-With use_carrier enabled, bonding will always see these links as up,
-regardless of their actual state.
-
-	Additionally, other drivers do support netif_carrier, but do
-not maintain it in real time, e.g., only polling the link state at
-some fixed interval.  In this case, miimon will detect failures, but
-only after some long period of time has expired.  If it appears that
-miimon is very slow in detecting link failures, try specifying
-use_carrier=0 to see if that improves the failure detection time.  If
-it does, then it may be that the driver checks the carrier state at a
-fixed interval, but does not cache the MII register values (so the
-use_carrier=0 method of querying the registers directly works).  If
-use_carrier=0 does not improve the failover, then the driver may cache
-the registers, or the problem may be elsewhere.
-
-	Also, remember that miimon only checks for the device's
-carrier state.  It has no way to determine the state of devices on or
-beyond other ports of a switch, or if a switch is refusing to pass
-traffic while still maintaining carrier on.
-
-9. SNMP agents
-===============
-
-	If running SNMP agents, the bonding driver should be loaded
-before any network drivers participating in a bond.  This requirement
-is due to the interface index (ipAdEntIfIndex) being associated to
-the first interface found with a given IP address.  That is, there is
-only one ipAdEntIfIndex for each IP address.  For example, if eth0 and
-eth1 are slaves of bond0 and the driver for eth0 is loaded before the
-bonding driver, the interface for the IP address will be associated
-with the eth0 interface.  This configuration is shown below, the IP
-address 192.168.1.1 has an interface index of 2 which indexes to eth0
-in the ifDescr table (ifDescr.2).
-
-     interfaces.ifTable.ifEntry.ifDescr.1 = lo
-     interfaces.ifTable.ifEntry.ifDescr.2 = eth0
-     interfaces.ifTable.ifEntry.ifDescr.3 = eth1
-     interfaces.ifTable.ifEntry.ifDescr.4 = eth2
-     interfaces.ifTable.ifEntry.ifDescr.5 = eth3
-     interfaces.ifTable.ifEntry.ifDescr.6 = bond0
-     ip.ipAddrTable.ipAddrEntry.ipAdEntIfIndex.10.10.10.10 = 5
-     ip.ipAddrTable.ipAddrEntry.ipAdEntIfIndex.192.168.1.1 = 2
-     ip.ipAddrTable.ipAddrEntry.ipAdEntIfIndex.10.74.20.94 = 4
-     ip.ipAddrTable.ipAddrEntry.ipAdEntIfIndex.127.0.0.1 = 1
-
-	This problem is avoided by loading the bonding driver before
-any network drivers participating in a bond.  Below is an example of
-loading the bonding driver first, the IP address 192.168.1.1 is
-correctly associated with ifDescr.2.
-
-     interfaces.ifTable.ifEntry.ifDescr.1 = lo
-     interfaces.ifTable.ifEntry.ifDescr.2 = bond0
-     interfaces.ifTable.ifEntry.ifDescr.3 = eth0
-     interfaces.ifTable.ifEntry.ifDescr.4 = eth1
-     interfaces.ifTable.ifEntry.ifDescr.5 = eth2
-     interfaces.ifTable.ifEntry.ifDescr.6 = eth3
-     ip.ipAddrTable.ipAddrEntry.ipAdEntIfIndex.10.10.10.10 = 6
-     ip.ipAddrTable.ipAddrEntry.ipAdEntIfIndex.192.168.1.1 = 2
-     ip.ipAddrTable.ipAddrEntry.ipAdEntIfIndex.10.74.20.94 = 5
-     ip.ipAddrTable.ipAddrEntry.ipAdEntIfIndex.127.0.0.1 = 1
-
-	While some distributions may not report the interface name in
-ifDescr, the association between the IP address and IfIndex remains
-and SNMP functions such as Interface_Scan_Next will report that
-association.
-
-10. Promiscuous mode
-====================
-
-	When running network monitoring tools, e.g., tcpdump, it is
-common to enable promiscuous mode on the device, so that all traffic
-is seen (instead of seeing only traffic destined for the local host).
-The bonding driver handles promiscuous mode changes to the bonding
-master device (e.g., bond0), and propagates the setting to the slave
-devices.
-
-	For the balance-rr, balance-xor, broadcast, and 802.3ad modes,
-the promiscuous mode setting is propagated to all slaves.
-
-	For the active-backup, balance-tlb and balance-alb modes, the
-promiscuous mode setting is propagated only to the active slave.
-
-	For balance-tlb mode, the active slave is the slave currently
-receiving inbound traffic.
-
-	For balance-alb mode, the active slave is the slave used as a
-"primary."  This slave is used for mode-specific control traffic, for
-sending to peers that are unassigned or if the load is unbalanced.
-
-	For the active-backup, balance-tlb and balance-alb modes, when
-the active slave changes (e.g., due to a link failure), the
-promiscuous setting will be propagated to the new active slave.
-
-11. Configuring Bonding for High Availability
-=============================================
-
-	High Availability refers to configurations that provide
-maximum network availability by having redundant or backup devices,
-links or switches between the host and the rest of the world.  The
-goal is to provide the maximum availability of network connectivity
-(i.e., the network always works), even though other configurations
-could provide higher throughput.
-
-11.1 High Availability in a Single Switch Topology
---------------------------------------------------
-
-	If two hosts (or a host and a single switch) are directly
-connected via multiple physical links, then there is no availability
-penalty to optimizing for maximum bandwidth.  In this case, there is
-only one switch (or peer), so if it fails, there is no alternative
-access to fail over to.  Additionally, the bonding load balance modes
-support link monitoring of their members, so if individual links fail,
-the load will be rebalanced across the remaining devices.
-
-	See Section 12, "Configuring Bonding for Maximum Throughput"
-for information on configuring bonding with one peer device.
-
-11.2 High Availability in a Multiple Switch Topology
-----------------------------------------------------
-
-	With multiple switches, the configuration of bonding and the
-network changes dramatically.  In multiple switch topologies, there is
-a trade off between network availability and usable bandwidth.
-
-	Below is a sample network, configured to maximize the
-availability of the network:
-
-                |                                     |
-                |port3                           port3|
-          +-----+----+                          +-----+----+
-          |          |port2       ISL      port2|          |
-          | switch A +--------------------------+ switch B |
-          |          |                          |          |
-          +-----+----+                          +-----++---+
-                |port1                           port1|
-                |             +-------+               |
-                +-------------+ host1 +---------------+
-                         eth0 +-------+ eth1
-
-	In this configuration, there is a link between the two
-switches (ISL, or inter switch link), and multiple ports connecting to
-the outside world ("port3" on each switch).  There is no technical
-reason that this could not be extended to a third switch.
-
-11.2.1 HA Bonding Mode Selection for Multiple Switch Topology
--------------------------------------------------------------
-
-	In a topology such as the example above, the active-backup and
-broadcast modes are the only useful bonding modes when optimizing for
-availability; the other modes require all links to terminate on the
-same peer for them to behave rationally.
-
-active-backup: This is generally the preferred mode, particularly if
-	the switches have an ISL and play together well.  If the
-	network configuration is such that one switch is specifically
-	a backup switch (e.g., has lower capacity, higher cost, etc),
-	then the primary option can be used to insure that the
-	preferred link is always used when it is available.
-
-broadcast: This mode is really a special purpose mode, and is suitable
-	only for very specific needs.  For example, if the two
-	switches are not connected (no ISL), and the networks beyond
-	them are totally independent.  In this case, if it is
-	necessary for some specific one-way traffic to reach both
-	independent networks, then the broadcast mode may be suitable.
-
-11.2.2 HA Link Monitoring Selection for Multiple Switch Topology
-----------------------------------------------------------------
-
-	The choice of link monitoring ultimately depends upon your
-switch.  If the switch can reliably fail ports in response to other
-failures, then either the MII or ARP monitors should work.  For
-example, in the above example, if the "port3" link fails at the remote
-end, the MII monitor has no direct means to detect this.  The ARP
-monitor could be configured with a target at the remote end of port3,
-thus detecting that failure without switch support.
-
-	In general, however, in a multiple switch topology, the ARP
-monitor can provide a higher level of reliability in detecting end to
-end connectivity failures (which may be caused by the failure of any
-individual component to pass traffic for any reason).  Additionally,
-the ARP monitor should be configured with multiple targets (at least
-one for each switch in the network).  This will insure that,
-regardless of which switch is active, the ARP monitor has a suitable
-target to query.
-
-	Note, also, that of late many switches now support a functionality
-generally referred to as "trunk failover."  This is a feature of the
-switch that causes the link state of a particular switch port to be set
-down (or up) when the state of another switch port goes down (or up).
-Its purpose is to propagate link failures from logically "exterior" ports
-to the logically "interior" ports that bonding is able to monitor via
-miimon.  Availability and configuration for trunk failover varies by
-switch, but this can be a viable alternative to the ARP monitor when using
-suitable switches.
-
-12. Configuring Bonding for Maximum Throughput
-==============================================
-
-12.1 Maximizing Throughput in a Single Switch Topology
-------------------------------------------------------
-
-	In a single switch configuration, the best method to maximize
-throughput depends upon the application and network environment.  The
-various load balancing modes each have strengths and weaknesses in
-different environments, as detailed below.
-
-	For this discussion, we will break down the topologies into
-two categories.  Depending upon the destination of most traffic, we
-categorize them into either "gatewayed" or "local" configurations.
-
-	In a gatewayed configuration, the "switch" is acting primarily
-as a router, and the majority of traffic passes through this router to
-other networks.  An example would be the following:
-
-
-     +----------+                     +----------+
-     |          |eth0            port1|          | to other networks
-     | Host A   +---------------------+ router   +------------------->
-     |          +---------------------+          | Hosts B and C are out
-     |          |eth1            port2|          | here somewhere
-     +----------+                     +----------+
-
-	The router may be a dedicated router device, or another host
-acting as a gateway.  For our discussion, the important point is that
-the majority of traffic from Host A will pass through the router to
-some other network before reaching its final destination.
-
-	In a gatewayed network configuration, although Host A may
-communicate with many other systems, all of its traffic will be sent
-and received via one other peer on the local network, the router.
-
-	Note that the case of two systems connected directly via
-multiple physical links is, for purposes of configuring bonding, the
-same as a gatewayed configuration.  In that case, it happens that all
-traffic is destined for the "gateway" itself, not some other network
-beyond the gateway.
-
-	In a local configuration, the "switch" is acting primarily as
-a switch, and the majority of traffic passes through this switch to
-reach other stations on the same network.  An example would be the
-following:
-
-    +----------+            +----------+       +--------+
-    |          |eth0   port1|          +-------+ Host B |
-    |  Host A  +------------+  switch  |port3  +--------+
-    |          +------------+          |                  +--------+
-    |          |eth1   port2|          +------------------+ Host C |
-    +----------+            +----------+port4             +--------+
-
-
-	Again, the switch may be a dedicated switch device, or another
-host acting as a gateway.  For our discussion, the important point is
-that the majority of traffic from Host A is destined for other hosts
-on the same local network (Hosts B and C in the above example).
-
-	In summary, in a gatewayed configuration, traffic to and from
-the bonded device will be to the same MAC level peer on the network
-(the gateway itself, i.e., the router), regardless of its final
-destination.  In a local configuration, traffic flows directly to and
-from the final destinations, thus, each destination (Host B, Host C)
-will be addressed directly by their individual MAC addresses.
-
-	This distinction between a gatewayed and a local network
-configuration is important because many of the load balancing modes
-available use the MAC addresses of the local network source and
-destination to make load balancing decisions.  The behavior of each
-mode is described below.
-
-
-12.1.1 MT Bonding Mode Selection for Single Switch Topology
------------------------------------------------------------
-
-	This configuration is the easiest to set up and to understand,
-although you will have to decide which bonding mode best suits your
-needs.  The trade offs for each mode are detailed below:
-
-balance-rr: This mode is the only mode that will permit a single
-	TCP/IP connection to stripe traffic across multiple
-	interfaces. It is therefore the only mode that will allow a
-	single TCP/IP stream to utilize more than one interface's
-	worth of throughput.  This comes at a cost, however: the
-	striping generally results in peer systems receiving packets out
-	of order, causing TCP/IP's congestion control system to kick
-	in, often by retransmitting segments.
-
-	It is possible to adjust TCP/IP's congestion limits by
-	altering the net.ipv4.tcp_reordering sysctl parameter.  The
-	usual default value is 3. But keep in mind TCP stack is able
-	to automatically increase this when it detects reorders.
-
-	Note that the fraction of packets that will be delivered out of
-	order is highly variable, and is unlikely to be zero.  The level
-	of reordering depends upon a variety of factors, including the
-	networking interfaces, the switch, and the topology of the
-	configuration.  Speaking in general terms, higher speed network
-	cards produce more reordering (due to factors such as packet
-	coalescing), and a "many to many" topology will reorder at a
-	higher rate than a "many slow to one fast" configuration.
-
-	Many switches do not support any modes that stripe traffic
-	(instead choosing a port based upon IP or MAC level addresses);
-	for those devices, traffic for a particular connection flowing
-	through the switch to a balance-rr bond will not utilize greater
-	than one interface's worth of bandwidth.
-
-	If you are utilizing protocols other than TCP/IP, UDP for
-	example, and your application can tolerate out of order
-	delivery, then this mode can allow for single stream datagram
-	performance that scales near linearly as interfaces are added
-	to the bond.
-
-	This mode requires the switch to have the appropriate ports
-	configured for "etherchannel" or "trunking."
-
-active-backup: There is not much advantage in this network topology to
-	the active-backup mode, as the inactive backup devices are all
-	connected to the same peer as the primary.  In this case, a
-	load balancing mode (with link monitoring) will provide the
-	same level of network availability, but with increased
-	available bandwidth.  On the plus side, active-backup mode
-	does not require any configuration of the switch, so it may
-	have value if the hardware available does not support any of
-	the load balance modes.
-
-balance-xor: This mode will limit traffic such that packets destined
-	for specific peers will always be sent over the same
-	interface.  Since the destination is determined by the MAC
-	addresses involved, this mode works best in a "local" network
-	configuration (as described above), with destinations all on
-	the same local network.  This mode is likely to be suboptimal
-	if all your traffic is passed through a single router (i.e., a
-	"gatewayed" network configuration, as described above).
-
-	As with balance-rr, the switch ports need to be configured for
-	"etherchannel" or "trunking."
-
-broadcast: Like active-backup, there is not much advantage to this
-	mode in this type of network topology.
-
-802.3ad: This mode can be a good choice for this type of network
-	topology.  The 802.3ad mode is an IEEE standard, so all peers
-	that implement 802.3ad should interoperate well.  The 802.3ad
-	protocol includes automatic configuration of the aggregates,
-	so minimal manual configuration of the switch is needed
-	(typically only to designate that some set of devices is
-	available for 802.3ad).  The 802.3ad standard also mandates
-	that frames be delivered in order (within certain limits), so
-	in general single connections will not see misordering of
-	packets.  The 802.3ad mode does have some drawbacks: the
-	standard mandates that all devices in the aggregate operate at
-	the same speed and duplex.  Also, as with all bonding load
-	balance modes other than balance-rr, no single connection will
-	be able to utilize more than a single interface's worth of
-	bandwidth.  
-
-	Additionally, the linux bonding 802.3ad implementation
-	distributes traffic by peer (using an XOR of MAC addresses
-	and packet type ID), so in a "gatewayed" configuration, all
-	outgoing traffic will generally use the same device.  Incoming
-	traffic may also end up on a single device, but that is
-	dependent upon the balancing policy of the peer's 802.3ad
-	implementation.  In a "local" configuration, traffic will be
-	distributed across the devices in the bond.
-
-	Finally, the 802.3ad mode mandates the use of the MII monitor,
-	therefore, the ARP monitor is not available in this mode.
-
-balance-tlb: The balance-tlb mode balances outgoing traffic by peer.
-	Since the balancing is done according to MAC address, in a
-	"gatewayed" configuration (as described above), this mode will
-	send all traffic across a single device.  However, in a
-	"local" network configuration, this mode balances multiple
-	local network peers across devices in a vaguely intelligent
-	manner (not a simple XOR as in balance-xor or 802.3ad mode),
-	so that mathematically unlucky MAC addresses (i.e., ones that
-	XOR to the same value) will not all "bunch up" on a single
-	interface.
-
-	Unlike 802.3ad, interfaces may be of differing speeds, and no
-	special switch configuration is required.  On the down side,
-	in this mode all incoming traffic arrives over a single
-	interface, this mode requires certain ethtool support in the
-	network device driver of the slave interfaces, and the ARP
-	monitor is not available.
-
-balance-alb: This mode is everything that balance-tlb is, and more.
-	It has all of the features (and restrictions) of balance-tlb,
-	and will also balance incoming traffic from local network
-	peers (as described in the Bonding Module Options section,
-	above).
-
-	The only additional down side to this mode is that the network
-	device driver must support changing the hardware address while
-	the device is open.
-
-12.1.2 MT Link Monitoring for Single Switch Topology
-----------------------------------------------------
-
-	The choice of link monitoring may largely depend upon which
-mode you choose to use.  The more advanced load balancing modes do not
-support the use of the ARP monitor, and are thus restricted to using
-the MII monitor (which does not provide as high a level of end to end
-assurance as the ARP monitor).
-
-12.2 Maximum Throughput in a Multiple Switch Topology
------------------------------------------------------
-
-	Multiple switches may be utilized to optimize for throughput
-when they are configured in parallel as part of an isolated network
-between two or more systems, for example:
-
-                       +-----------+
-                       |  Host A   | 
-                       +-+---+---+-+
-                         |   |   |
-                +--------+   |   +---------+
-                |            |             |
-         +------+---+  +-----+----+  +-----+----+
-         | Switch A |  | Switch B |  | Switch C |
-         +------+---+  +-----+----+  +-----+----+
-                |            |             |
-                +--------+   |   +---------+
-                         |   |   |
-                       +-+---+---+-+
-                       |  Host B   | 
-                       +-----------+
-
-	In this configuration, the switches are isolated from one
-another.  One reason to employ a topology such as this is for an
-isolated network with many hosts (a cluster configured for high
-performance, for example), using multiple smaller switches can be more
-cost effective than a single larger switch, e.g., on a network with 24
-hosts, three 24 port switches can be significantly less expensive than
-a single 72 port switch.
-
-	If access beyond the network is required, an individual host
-can be equipped with an additional network device connected to an
-external network; this host then additionally acts as a gateway.
-
-12.2.1 MT Bonding Mode Selection for Multiple Switch Topology
--------------------------------------------------------------
-
-	In actual practice, the bonding mode typically employed in
-configurations of this type is balance-rr.  Historically, in this
-network configuration, the usual caveats about out of order packet
-delivery are mitigated by the use of network adapters that do not do
-any kind of packet coalescing (via the use of NAPI, or because the
-device itself does not generate interrupts until some number of
-packets has arrived).  When employed in this fashion, the balance-rr
-mode allows individual connections between two hosts to effectively
-utilize greater than one interface's bandwidth.
-
-12.2.2 MT Link Monitoring for Multiple Switch Topology
-------------------------------------------------------
-
-	Again, in actual practice, the MII monitor is most often used
-in this configuration, as performance is given preference over
-availability.  The ARP monitor will function in this topology, but its
-advantages over the MII monitor are mitigated by the volume of probes
-needed as the number of systems involved grows (remember that each
-host in the network is configured with bonding).
-
-13. Switch Behavior Issues
-==========================
-
-13.1 Link Establishment and Failover Delays
--------------------------------------------
-
-	Some switches exhibit undesirable behavior with regard to the
-timing of link up and down reporting by the switch.
-
-	First, when a link comes up, some switches may indicate that
-the link is up (carrier available), but not pass traffic over the
-interface for some period of time.  This delay is typically due to
-some type of autonegotiation or routing protocol, but may also occur
-during switch initialization (e.g., during recovery after a switch
-failure).  If you find this to be a problem, specify an appropriate
-value to the updelay bonding module option to delay the use of the
-relevant interface(s).
-
-	Second, some switches may "bounce" the link state one or more
-times while a link is changing state.  This occurs most commonly while
-the switch is initializing.  Again, an appropriate updelay value may
-help.
-
-	Note that when a bonding interface has no active links, the
-driver will immediately reuse the first link that goes up, even if the
-updelay parameter has been specified (the updelay is ignored in this
-case).  If there are slave interfaces waiting for the updelay timeout
-to expire, the interface that first went into that state will be
-immediately reused.  This reduces down time of the network if the
-value of updelay has been overestimated, and since this occurs only in
-cases with no connectivity, there is no additional penalty for
-ignoring the updelay.
-
-	In addition to the concerns about switch timings, if your
-switches take a long time to go into backup mode, it may be desirable
-to not activate a backup interface immediately after a link goes down.
-Failover may be delayed via the downdelay bonding module option.
-
-13.2 Duplicated Incoming Packets
---------------------------------
-
-	NOTE: Starting with version 3.0.2, the bonding driver has logic to
-suppress duplicate packets, which should largely eliminate this problem.
-The following description is kept for reference.
-
-	It is not uncommon to observe a short burst of duplicated
-traffic when the bonding device is first used, or after it has been
-idle for some period of time.  This is most easily observed by issuing
-a "ping" to some other host on the network, and noticing that the
-output from ping flags duplicates (typically one per slave).
-
-	For example, on a bond in active-backup mode with five slaves
-all connected to one switch, the output may appear as follows:
-
-# ping -n 10.0.4.2
-PING 10.0.4.2 (10.0.4.2) from 10.0.3.10 : 56(84) bytes of data.
-64 bytes from 10.0.4.2: icmp_seq=1 ttl=64 time=13.7 ms
-64 bytes from 10.0.4.2: icmp_seq=1 ttl=64 time=13.8 ms (DUP!)
-64 bytes from 10.0.4.2: icmp_seq=1 ttl=64 time=13.8 ms (DUP!)
-64 bytes from 10.0.4.2: icmp_seq=1 ttl=64 time=13.8 ms (DUP!)
-64 bytes from 10.0.4.2: icmp_seq=1 ttl=64 time=13.8 ms (DUP!)
-64 bytes from 10.0.4.2: icmp_seq=2 ttl=64 time=0.216 ms
-64 bytes from 10.0.4.2: icmp_seq=3 ttl=64 time=0.267 ms
-64 bytes from 10.0.4.2: icmp_seq=4 ttl=64 time=0.222 ms
-
-	This is not due to an error in the bonding driver, rather, it
-is a side effect of how many switches update their MAC forwarding
-tables.  Initially, the switch does not associate the MAC address in
-the packet with a particular switch port, and so it may send the
-traffic to all ports until its MAC forwarding table is updated.  Since
-the interfaces attached to the bond may occupy multiple ports on a
-single switch, when the switch (temporarily) floods the traffic to all
-ports, the bond device receives multiple copies of the same packet
-(one per slave device).
-
-	The duplicated packet behavior is switch dependent, some
-switches exhibit this, and some do not.  On switches that display this
-behavior, it can be induced by clearing the MAC forwarding table (on
-most Cisco switches, the privileged command "clear mac address-table
-dynamic" will accomplish this).
-
-14. Hardware Specific Considerations
-====================================
-
-	This section contains additional information for configuring
-bonding on specific hardware platforms, or for interfacing bonding
-with particular switches or other devices.
-
-14.1 IBM BladeCenter
---------------------
-
-	This applies to the JS20 and similar systems.
-
-	On the JS20 blades, the bonding driver supports only
-balance-rr, active-backup, balance-tlb and balance-alb modes.  This is
-largely due to the network topology inside the BladeCenter, detailed
-below.
-
-JS20 network adapter information
---------------------------------
-
-	All JS20s come with two Broadcom Gigabit Ethernet ports
-integrated on the planar (that's "motherboard" in IBM-speak).  In the
-BladeCenter chassis, the eth0 port of all JS20 blades is hard wired to
-I/O Module #1; similarly, all eth1 ports are wired to I/O Module #2.
-An add-on Broadcom daughter card can be installed on a JS20 to provide
-two more Gigabit Ethernet ports.  These ports, eth2 and eth3, are
-wired to I/O Modules 3 and 4, respectively.
-
-	Each I/O Module may contain either a switch or a passthrough
-module (which allows ports to be directly connected to an external
-switch).  Some bonding modes require a specific BladeCenter internal
-network topology in order to function; these are detailed below.
-
-	Additional BladeCenter-specific networking information can be
-found in two IBM Redbooks (www.ibm.com/redbooks):
-
-"IBM eServer BladeCenter Networking Options"
-"IBM eServer BladeCenter Layer 2-7 Network Switching"
-
-BladeCenter networking configuration
-------------------------------------
-
-	Because a BladeCenter can be configured in a very large number
-of ways, this discussion will be confined to describing basic
-configurations.
-
-	Normally, Ethernet Switch Modules (ESMs) are used in I/O
-modules 1 and 2.  In this configuration, the eth0 and eth1 ports of a
-JS20 will be connected to different internal switches (in the
-respective I/O modules).
-
-	A passthrough module (OPM or CPM, optical or copper,
-passthrough module) connects the I/O module directly to an external
-switch.  By using PMs in I/O module #1 and #2, the eth0 and eth1
-interfaces of a JS20 can be redirected to the outside world and
-connected to a common external switch.
-
-	Depending upon the mix of ESMs and PMs, the network will
-appear to bonding as either a single switch topology (all PMs) or as a
-multiple switch topology (one or more ESMs, zero or more PMs).  It is
-also possible to connect ESMs together, resulting in a configuration
-much like the example in "High Availability in a Multiple Switch
-Topology," above.
-
-Requirements for specific modes
--------------------------------
-
-	The balance-rr mode requires the use of passthrough modules
-for devices in the bond, all connected to an common external switch.
-That switch must be configured for "etherchannel" or "trunking" on the
-appropriate ports, as is usual for balance-rr.
-
-	The balance-alb and balance-tlb modes will function with
-either switch modules or passthrough modules (or a mix).  The only
-specific requirement for these modes is that all network interfaces
-must be able to reach all destinations for traffic sent over the
-bonding device (i.e., the network must converge at some point outside
-the BladeCenter).
-
-	The active-backup mode has no additional requirements.
-
-Link monitoring issues
-----------------------
-
-	When an Ethernet Switch Module is in place, only the ARP
-monitor will reliably detect link loss to an external switch.  This is
-nothing unusual, but examination of the BladeCenter cabinet would
-suggest that the "external" network ports are the ethernet ports for
-the system, when it fact there is a switch between these "external"
-ports and the devices on the JS20 system itself.  The MII monitor is
-only able to detect link failures between the ESM and the JS20 system.
-
-	When a passthrough module is in place, the MII monitor does
-detect failures to the "external" port, which is then directly
-connected to the JS20 system.
-
-Other concerns
---------------
-
-	The Serial Over LAN (SoL) link is established over the primary
-ethernet (eth0) only, therefore, any loss of link to eth0 will result
-in losing your SoL connection.  It will not fail over with other
-network traffic, as the SoL system is beyond the control of the
-bonding driver.
-
-	It may be desirable to disable spanning tree on the switch
-(either the internal Ethernet Switch Module, or an external switch) to
-avoid fail-over delay issues when using bonding.
-
-	
-15. Frequently Asked Questions
-==============================
-
-1.  Is it SMP safe?
-
-	Yes. The old 2.0.xx channel bonding patch was not SMP safe.
-The new driver was designed to be SMP safe from the start.
-
-2.  What type of cards will work with it?
-
-	Any Ethernet type cards (you can even mix cards - a Intel
-EtherExpress PRO/100 and a 3com 3c905b, for example).  For most modes,
-devices need not be of the same speed.
-
-	Starting with version 3.2.1, bonding also supports Infiniband
-slaves in active-backup mode.
-
-3.  How many bonding devices can I have?
-
-	There is no limit.
-
-4.  How many slaves can a bonding device have?
-
-	This is limited only by the number of network interfaces Linux
-supports and/or the number of network cards you can place in your
-system.
-
-5.  What happens when a slave link dies?
-
-	If link monitoring is enabled, then the failing device will be
-disabled.  The active-backup mode will fail over to a backup link, and
-other modes will ignore the failed link.  The link will continue to be
-monitored, and should it recover, it will rejoin the bond (in whatever
-manner is appropriate for the mode). See the sections on High
-Availability and the documentation for each mode for additional
-information.
-	
-	Link monitoring can be enabled via either the miimon or
-arp_interval parameters (described in the module parameters section,
-above).  In general, miimon monitors the carrier state as sensed by
-the underlying network device, and the arp monitor (arp_interval)
-monitors connectivity to another host on the local network.
-
-	If no link monitoring is configured, the bonding driver will
-be unable to detect link failures, and will assume that all links are
-always available.  This will likely result in lost packets, and a
-resulting degradation of performance.  The precise performance loss
-depends upon the bonding mode and network configuration.
-
-6.  Can bonding be used for High Availability?
-
-	Yes.  See the section on High Availability for details.
-
-7.  Which switches/systems does it work with?
-
-	The full answer to this depends upon the desired mode.
-
-	In the basic balance modes (balance-rr and balance-xor), it
-works with any system that supports etherchannel (also called
-trunking).  Most managed switches currently available have such
-support, and many unmanaged switches as well.
-
-	The advanced balance modes (balance-tlb and balance-alb) do
-not have special switch requirements, but do need device drivers that
-support specific features (described in the appropriate section under
-module parameters, above).
-
-	In 802.3ad mode, it works with systems that support IEEE
-802.3ad Dynamic Link Aggregation.  Most managed and many unmanaged
-switches currently available support 802.3ad.
-
-        The active-backup mode should work with any Layer-II switch.
-
-8.  Where does a bonding device get its MAC address from?
-
-	When using slave devices that have fixed MAC addresses, or when
-the fail_over_mac option is enabled, the bonding device's MAC address is
-the MAC address of the active slave.
-
-	For other configurations, if not explicitly configured (with
-ifconfig or ip link), the MAC address of the bonding device is taken from
-its first slave device.  This MAC address is then passed to all following
-slaves and remains persistent (even if the first slave is removed) until
-the bonding device is brought down or reconfigured.
-
-	If you wish to change the MAC address, you can set it with
-ifconfig or ip link:
-
-# ifconfig bond0 hw ether 00:11:22:33:44:55
-
-# ip link set bond0 address 66:77:88:99:aa:bb
-
-	The MAC address can be also changed by bringing down/up the
-device and then changing its slaves (or their order):
-
-# ifconfig bond0 down ; modprobe -r bonding
-# ifconfig bond0 .... up
-# ifenslave bond0 eth...
-
-	This method will automatically take the address from the next
-slave that is added.
-
-	To restore your slaves' MAC addresses, you need to detach them
-from the bond (`ifenslave -d bond0 eth0'). The bonding driver will
-then restore the MAC addresses that the slaves had before they were
-enslaved.
-
-16. Resources and Links
-=======================
-
-	The latest version of the bonding driver can be found in the latest
-version of the linux kernel, found on http://kernel.org
-
-	The latest version of this document can be found in the latest kernel
-source (named Documentation/networking/bonding.txt).
-
-	Discussions regarding the usage of the bonding driver take place on the
-bonding-devel mailing list, hosted at sourceforge.net. If you have questions or
-problems, post them to the list.  The list address is:
-
-bonding-devel@lists.sourceforge.net
-
-	The administrative interface (to subscribe or unsubscribe) can
-be found at:
-
-https://lists.sourceforge.net/lists/listinfo/bonding-devel
-
-	Discussions regarding the development of the bonding driver take place
-on the main Linux network mailing list, hosted at vger.kernel.org. The list
-address is:
-
-netdev@vger.kernel.org
-
-	The administrative interface (to subscribe or unsubscribe) can
-be found at:
-
-http://vger.kernel.org/vger-lists.html#netdev
-
-Donald Becker's Ethernet Drivers and diag programs may be found at :
- - http://web.archive.org/web/*/http://www.scyld.com/network/ 
-
-You will also find a lot of information regarding Ethernet, NWay, MII,
-etc. at www.scyld.com.
-
--- END --
diff --git a/Documentation/networking/device_drivers/intel/e100.rst b/Documentation/networking/device_drivers/intel/e100.rst
index caf023cc88de..3ac21e7119a7 100644
--- a/Documentation/networking/device_drivers/intel/e100.rst
+++ b/Documentation/networking/device_drivers/intel/e100.rst
@@ -33,7 +33,7 @@ The following features are now available in supported kernels:
  - SNMP
 
 Channel Bonding documentation can be found in the Linux kernel source:
-/Documentation/networking/bonding.txt
+/Documentation/networking/bonding.rst
 
 
 Identifying Your Adapter
diff --git a/Documentation/networking/device_drivers/intel/ixgb.rst b/Documentation/networking/device_drivers/intel/ixgb.rst
index 945018207a92..ab624f1a44a8 100644
--- a/Documentation/networking/device_drivers/intel/ixgb.rst
+++ b/Documentation/networking/device_drivers/intel/ixgb.rst
@@ -37,7 +37,7 @@ The following features are available in this kernel:
  - SNMP
 
 Channel Bonding documentation can be found in the Linux kernel source:
-/Documentation/networking/bonding.txt
+/Documentation/networking/bonding.rst
 
 The driver information previously displayed in the /proc filesystem is not
 supported in this release.  Alternatively, you can use ethtool (version 1.6
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index fbf845fbaff7..22b872834ef0 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -44,6 +44,7 @@ Contents:
    atm
    ax25
    baycom
+   bonding
 
 .. only::  subproject and html
 
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index b103fbdd0f68..4ab6d343fd86 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -50,7 +50,7 @@ config BONDING
 	  The driver supports multiple bonding modes to allow for both high
 	  performance and high availability operation.
 
-	  Refer to <file:Documentation/networking/bonding.txt> for more
+	  Refer to <file:Documentation/networking/bonding.rst> for more
 	  information.
 
 	  To compile this driver as a module, choose M here: the module
-- 
cgit v1.2.3-59-g8ed1b


From 92f06f4226fd9bdd6fbbd2e8b84601fc14b5855e Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 28 Apr 2020 00:01:25 +0200
Subject: docs: networking: convert cdc_mbim.txt to ReST

- add SPDX header;
- mark code blocks and literals as such;
- use :field: markup;
- adjust identation, whitespaces and blank lines;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/cdc_mbim.rst | 355 ++++++++++++++++++++++++++++++++++
 Documentation/networking/cdc_mbim.txt | 339 --------------------------------
 Documentation/networking/index.rst    |   1 +
 3 files changed, 356 insertions(+), 339 deletions(-)
 create mode 100644 Documentation/networking/cdc_mbim.rst
 delete mode 100644 Documentation/networking/cdc_mbim.txt

diff --git a/Documentation/networking/cdc_mbim.rst b/Documentation/networking/cdc_mbim.rst
new file mode 100644
index 000000000000..0048409c06b4
--- /dev/null
+++ b/Documentation/networking/cdc_mbim.rst
@@ -0,0 +1,355 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================================================
+cdc_mbim - Driver for CDC MBIM Mobile Broadband modems
+======================================================
+
+The cdc_mbim driver supports USB devices conforming to the "Universal
+Serial Bus Communications Class Subclass Specification for Mobile
+Broadband Interface Model" [1], which is a further development of
+"Universal Serial Bus Communications Class Subclass Specifications for
+Network Control Model Devices" [2] optimized for Mobile Broadband
+devices, aka "3G/LTE modems".
+
+
+Command Line Parameters
+=======================
+
+The cdc_mbim driver has no parameters of its own.  But the probing
+behaviour for NCM 1.0 backwards compatible MBIM functions (an
+"NCM/MBIM function" as defined in section 3.2 of [1]) is affected
+by a cdc_ncm driver parameter:
+
+prefer_mbim
+-----------
+:Type:          Boolean
+:Valid Range:   N/Y (0-1)
+:Default Value: Y (MBIM is preferred)
+
+This parameter sets the system policy for NCM/MBIM functions.  Such
+functions will be handled by either the cdc_ncm driver or the cdc_mbim
+driver depending on the prefer_mbim setting.  Setting prefer_mbim=N
+makes the cdc_mbim driver ignore these functions and lets the cdc_ncm
+driver handle them instead.
+
+The parameter is writable, and can be changed at any time. A manual
+unbind/bind is required to make the change effective for NCM/MBIM
+functions bound to the "wrong" driver
+
+
+Basic usage
+===========
+
+MBIM functions are inactive when unmanaged. The cdc_mbim driver only
+provides a userspace interface to the MBIM control channel, and will
+not participate in the management of the function. This implies that a
+userspace MBIM management application always is required to enable a
+MBIM function.
+
+Such userspace applications includes, but are not limited to:
+
+ - mbimcli (included with the libmbim [3] library), and
+ - ModemManager [4]
+
+Establishing a MBIM IP session reequires at least these actions by the
+management application:
+
+ - open the control channel
+ - configure network connection settings
+ - connect to network
+ - configure IP interface
+
+Management application development
+----------------------------------
+The driver <-> userspace interfaces are described below.  The MBIM
+control channel protocol is described in [1].
+
+
+MBIM control channel userspace ABI
+==================================
+
+/dev/cdc-wdmX character device
+------------------------------
+The driver creates a two-way pipe to the MBIM function control channel
+using the cdc-wdm driver as a subdriver.  The userspace end of the
+control channel pipe is a /dev/cdc-wdmX character device.
+
+The cdc_mbim driver does not process or police messages on the control
+channel.  The channel is fully delegated to the userspace management
+application.  It is therefore up to this application to ensure that it
+complies with all the control channel requirements in [1].
+
+The cdc-wdmX device is created as a child of the MBIM control
+interface USB device.  The character device associated with a specific
+MBIM function can be looked up using sysfs.  For example::
+
+ bjorn@nemi:~$ ls /sys/bus/usb/drivers/cdc_mbim/2-4:2.12/usbmisc
+ cdc-wdm0
+
+ bjorn@nemi:~$ grep . /sys/bus/usb/drivers/cdc_mbim/2-4:2.12/usbmisc/cdc-wdm0/dev
+ 180:0
+
+
+USB configuration descriptors
+-----------------------------
+The wMaxControlMessage field of the CDC MBIM functional descriptor
+limits the maximum control message size. The managament application is
+responsible for negotiating a control message size complying with the
+requirements in section 9.3.1 of [1], taking this descriptor field
+into consideration.
+
+The userspace application can access the CDC MBIM functional
+descriptor of a MBIM function using either of the two USB
+configuration descriptor kernel interfaces described in [6] or [7].
+
+See also the ioctl documentation below.
+
+
+Fragmentation
+-------------
+The userspace application is responsible for all control message
+fragmentation and defragmentaion, as described in section 9.5 of [1].
+
+
+/dev/cdc-wdmX write()
+---------------------
+The MBIM control messages from the management application *must not*
+exceed the negotiated control message size.
+
+
+/dev/cdc-wdmX read()
+--------------------
+The management application *must* accept control messages of up the
+negotiated control message size.
+
+
+/dev/cdc-wdmX ioctl()
+---------------------
+IOCTL_WDM_MAX_COMMAND: Get Maximum Command Size
+This ioctl returns the wMaxControlMessage field of the CDC MBIM
+functional descriptor for MBIM devices.  This is intended as a
+convenience, eliminating the need to parse the USB descriptors from
+userspace.
+
+::
+
+	#include <stdio.h>
+	#include <fcntl.h>
+	#include <sys/ioctl.h>
+	#include <linux/types.h>
+	#include <linux/usb/cdc-wdm.h>
+	int main()
+	{
+		__u16 max;
+		int fd = open("/dev/cdc-wdm0", O_RDWR);
+		if (!ioctl(fd, IOCTL_WDM_MAX_COMMAND, &max))
+			printf("wMaxControlMessage is %d\n", max);
+	}
+
+
+Custom device services
+----------------------
+The MBIM specification allows vendors to freely define additional
+services.  This is fully supported by the cdc_mbim driver.
+
+Support for new MBIM services, including vendor specified services, is
+implemented entirely in userspace, like the rest of the MBIM control
+protocol
+
+New services should be registered in the MBIM Registry [5].
+
+
+
+MBIM data channel userspace ABI
+===============================
+
+wwanY network device
+--------------------
+The cdc_mbim driver represents the MBIM data channel as a single
+network device of the "wwan" type. This network device is initially
+mapped to MBIM IP session 0.
+
+
+Multiplexed IP sessions (IPS)
+-----------------------------
+MBIM allows multiplexing up to 256 IP sessions over a single USB data
+channel.  The cdc_mbim driver models such IP sessions as 802.1q VLAN
+subdevices of the master wwanY device, mapping MBIM IP session Z to
+VLAN ID Z for all values of Z greater than 0.
+
+The device maximum Z is given in the MBIM_DEVICE_CAPS_INFO structure
+described in section 10.5.1 of [1].
+
+The userspace management application is responsible for adding new
+VLAN links prior to establishing MBIM IP sessions where the SessionId
+is greater than 0. These links can be added by using the normal VLAN
+kernel interfaces, either ioctl or netlink.
+
+For example, adding a link for a MBIM IP session with SessionId 3::
+
+  ip link add link wwan0 name wwan0.3 type vlan id 3
+
+The driver will automatically map the "wwan0.3" network device to MBIM
+IP session 3.
+
+
+Device Service Streams (DSS)
+----------------------------
+MBIM also allows up to 256 non-IP data streams to be multiplexed over
+the same shared USB data channel.  The cdc_mbim driver models these
+sessions as another set of 802.1q VLAN subdevices of the master wwanY
+device, mapping MBIM DSS session A to VLAN ID (256 + A) for all values
+of A.
+
+The device maximum A is given in the MBIM_DEVICE_SERVICES_INFO
+structure described in section 10.5.29 of [1].
+
+The DSS VLAN subdevices are used as a practical interface between the
+shared MBIM data channel and a MBIM DSS aware userspace application.
+It is not intended to be presented as-is to an end user. The
+assumption is that a userspace application initiating a DSS session
+also takes care of the necessary framing of the DSS data, presenting
+the stream to the end user in an appropriate way for the stream type.
+
+The network device ABI requires a dummy ethernet header for every DSS
+data frame being transported.  The contents of this header is
+arbitrary, with the following exceptions:
+
+ - TX frames using an IP protocol (0x0800 or 0x86dd) will be dropped
+ - RX frames will have the protocol field set to ETH_P_802_3 (but will
+   not be properly formatted 802.3 frames)
+ - RX frames will have the destination address set to the hardware
+   address of the master device
+
+The DSS supporting userspace management application is responsible for
+adding the dummy ethernet header on TX and stripping it on RX.
+
+This is a simple example using tools commonly available, exporting
+DssSessionId 5 as a pty character device pointed to by a /dev/nmea
+symlink::
+
+  ip link add link wwan0 name wwan0.dss5 type vlan id 261
+  ip link set dev wwan0.dss5 up
+  socat INTERFACE:wwan0.dss5,type=2 PTY:,echo=0,link=/dev/nmea
+
+This is only an example, most suitable for testing out a DSS
+service. Userspace applications supporting specific MBIM DSS services
+are expected to use the tools and programming interfaces required by
+that service.
+
+Note that adding VLAN links for DSS sessions is entirely optional.  A
+management application may instead choose to bind a packet socket
+directly to the master network device, using the received VLAN tags to
+map frames to the correct DSS session and adding 18 byte VLAN ethernet
+headers with the appropriate tag on TX.  In this case using a socket
+filter is recommended, matching only the DSS VLAN subset. This avoid
+unnecessary copying of unrelated IP session data to userspace.  For
+example::
+
+  static struct sock_filter dssfilter[] = {
+	/* use special negative offsets to get VLAN tag */
+	BPF_STMT(BPF_LD|BPF_B|BPF_ABS, SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT),
+	BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 1, 0, 6), /* true */
+
+	/* verify DSS VLAN range */
+	BPF_STMT(BPF_LD|BPF_H|BPF_ABS, SKF_AD_OFF + SKF_AD_VLAN_TAG),
+	BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, 256, 0, 4),	/* 256 is first DSS VLAN */
+	BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, 512, 3, 0),	/* 511 is last DSS VLAN */
+
+	/* verify ethertype */
+	BPF_STMT(BPF_LD|BPF_H|BPF_ABS, 2 * ETH_ALEN),
+	BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, ETH_P_802_3, 0, 1),
+
+	BPF_STMT(BPF_RET|BPF_K, (u_int)-1),	/* accept */
+	BPF_STMT(BPF_RET|BPF_K, 0),		/* ignore */
+  };
+
+
+
+Tagged IP session 0 VLAN
+------------------------
+As described above, MBIM IP session 0 is treated as special by the
+driver.  It is initially mapped to untagged frames on the wwanY
+network device.
+
+This mapping implies a few restrictions on multiplexed IPS and DSS
+sessions, which may not always be practical:
+
+ - no IPS or DSS session can use a frame size greater than the MTU on
+   IP session 0
+ - no IPS or DSS session can be in the up state unless the network
+   device representing IP session 0 also is up
+
+These problems can be avoided by optionally making the driver map IP
+session 0 to a VLAN subdevice, similar to all other IP sessions.  This
+behaviour is triggered by adding a VLAN link for the magic VLAN ID
+4094.  The driver will then immediately start mapping MBIM IP session
+0 to this VLAN, and will drop untagged frames on the master wwanY
+device.
+
+Tip: It might be less confusing to the end user to name this VLAN
+subdevice after the MBIM SessionID instead of the VLAN ID.  For
+example::
+
+  ip link add link wwan0 name wwan0.0 type vlan id 4094
+
+
+VLAN mapping
+------------
+
+Summarizing the cdc_mbim driver mapping described above, we have this
+relationship between VLAN tags on the wwanY network device and MBIM
+sessions on the shared USB data channel::
+
+  VLAN ID       MBIM type   MBIM SessionID           Notes
+  ---------------------------------------------------------
+  untagged      IPS         0                        a)
+  1 - 255       IPS         1 - 255 <VLANID>
+  256 - 511     DSS         0 - 255 <VLANID - 256>
+  512 - 4093                                         b)
+  4094          IPS         0                        c)
+
+    a) if no VLAN ID 4094 link exists, else dropped
+    b) unsupported VLAN range, unconditionally dropped
+    c) if a VLAN ID 4094 link exists, else dropped
+
+
+
+
+References
+==========
+
+ 1) USB Implementers Forum, Inc. - "Universal Serial Bus
+    Communications Class Subclass Specification for Mobile Broadband
+    Interface Model", Revision 1.0 (Errata 1), May 1, 2013
+
+      - http://www.usb.org/developers/docs/devclass_docs/
+
+ 2) USB Implementers Forum, Inc. - "Universal Serial Bus
+    Communications Class Subclass Specifications for Network Control
+    Model Devices", Revision 1.0 (Errata 1), November 24, 2010
+
+      - http://www.usb.org/developers/docs/devclass_docs/
+
+ 3) libmbim - "a glib-based library for talking to WWAN modems and
+    devices which speak the Mobile Interface Broadband Model (MBIM)
+    protocol"
+
+      - http://www.freedesktop.org/wiki/Software/libmbim/
+
+ 4) ModemManager - "a DBus-activated daemon which controls mobile
+    broadband (2G/3G/4G) devices and connections"
+
+      - http://www.freedesktop.org/wiki/Software/ModemManager/
+
+ 5) "MBIM (Mobile Broadband Interface Model) Registry"
+
+       - http://compliance.usb.org/mbim/
+
+ 6) "/sys/kernel/debug/usb/devices output format"
+
+       - Documentation/driver-api/usb/usb.rst
+
+ 7) "/sys/bus/usb/devices/.../descriptors"
+
+       - Documentation/ABI/stable/sysfs-bus-usb
diff --git a/Documentation/networking/cdc_mbim.txt b/Documentation/networking/cdc_mbim.txt
deleted file mode 100644
index 4e68f0bc5dba..000000000000
--- a/Documentation/networking/cdc_mbim.txt
+++ /dev/null
@@ -1,339 +0,0 @@
-     cdc_mbim - Driver for CDC MBIM Mobile Broadband modems
-    ========================================================
-
-The cdc_mbim driver supports USB devices conforming to the "Universal
-Serial Bus Communications Class Subclass Specification for Mobile
-Broadband Interface Model" [1], which is a further development of
-"Universal Serial Bus Communications Class Subclass Specifications for
-Network Control Model Devices" [2] optimized for Mobile Broadband
-devices, aka "3G/LTE modems".
-
-
-Command Line Parameters
-=======================
-
-The cdc_mbim driver has no parameters of its own.  But the probing
-behaviour for NCM 1.0 backwards compatible MBIM functions (an
-"NCM/MBIM function" as defined in section 3.2 of [1]) is affected
-by a cdc_ncm driver parameter:
-
-prefer_mbim
------------
-Type:          Boolean
-Valid Range:   N/Y (0-1)
-Default Value: Y (MBIM is preferred)
-
-This parameter sets the system policy for NCM/MBIM functions.  Such
-functions will be handled by either the cdc_ncm driver or the cdc_mbim
-driver depending on the prefer_mbim setting.  Setting prefer_mbim=N
-makes the cdc_mbim driver ignore these functions and lets the cdc_ncm
-driver handle them instead.
-
-The parameter is writable, and can be changed at any time. A manual
-unbind/bind is required to make the change effective for NCM/MBIM
-functions bound to the "wrong" driver
-
-
-Basic usage
-===========
-
-MBIM functions are inactive when unmanaged. The cdc_mbim driver only
-provides a userspace interface to the MBIM control channel, and will
-not participate in the management of the function. This implies that a
-userspace MBIM management application always is required to enable a
-MBIM function.
-
-Such userspace applications includes, but are not limited to:
- - mbimcli (included with the libmbim [3] library), and
- - ModemManager [4]
-
-Establishing a MBIM IP session reequires at least these actions by the
-management application:
- - open the control channel
- - configure network connection settings
- - connect to network
- - configure IP interface
-
-Management application development
-----------------------------------
-The driver <-> userspace interfaces are described below.  The MBIM
-control channel protocol is described in [1].
-
-
-MBIM control channel userspace ABI
-==================================
-
-/dev/cdc-wdmX character device
-------------------------------
-The driver creates a two-way pipe to the MBIM function control channel
-using the cdc-wdm driver as a subdriver.  The userspace end of the
-control channel pipe is a /dev/cdc-wdmX character device.
-
-The cdc_mbim driver does not process or police messages on the control
-channel.  The channel is fully delegated to the userspace management
-application.  It is therefore up to this application to ensure that it
-complies with all the control channel requirements in [1].
-
-The cdc-wdmX device is created as a child of the MBIM control
-interface USB device.  The character device associated with a specific
-MBIM function can be looked up using sysfs.  For example:
-
- bjorn@nemi:~$ ls /sys/bus/usb/drivers/cdc_mbim/2-4:2.12/usbmisc
- cdc-wdm0
-
- bjorn@nemi:~$ grep . /sys/bus/usb/drivers/cdc_mbim/2-4:2.12/usbmisc/cdc-wdm0/dev
- 180:0
-
-
-USB configuration descriptors
------------------------------
-The wMaxControlMessage field of the CDC MBIM functional descriptor
-limits the maximum control message size. The managament application is
-responsible for negotiating a control message size complying with the
-requirements in section 9.3.1 of [1], taking this descriptor field
-into consideration.
-
-The userspace application can access the CDC MBIM functional
-descriptor of a MBIM function using either of the two USB
-configuration descriptor kernel interfaces described in [6] or [7].
-
-See also the ioctl documentation below.
-
-
-Fragmentation
--------------
-The userspace application is responsible for all control message
-fragmentation and defragmentaion, as described in section 9.5 of [1].
-
-
-/dev/cdc-wdmX write()
----------------------
-The MBIM control messages from the management application *must not*
-exceed the negotiated control message size.
-
-
-/dev/cdc-wdmX read()
---------------------
-The management application *must* accept control messages of up the
-negotiated control message size.
-
-
-/dev/cdc-wdmX ioctl()
---------------------
-IOCTL_WDM_MAX_COMMAND: Get Maximum Command Size
-This ioctl returns the wMaxControlMessage field of the CDC MBIM
-functional descriptor for MBIM devices.  This is intended as a
-convenience, eliminating the need to parse the USB descriptors from
-userspace.
-
-	#include <stdio.h>
-	#include <fcntl.h>
-	#include <sys/ioctl.h>
-	#include <linux/types.h>
-	#include <linux/usb/cdc-wdm.h>
-	int main()
-	{
-		__u16 max;
-		int fd = open("/dev/cdc-wdm0", O_RDWR);
-		if (!ioctl(fd, IOCTL_WDM_MAX_COMMAND, &max))
-			printf("wMaxControlMessage is %d\n", max);
-	}
-
-
-Custom device services
-----------------------
-The MBIM specification allows vendors to freely define additional
-services.  This is fully supported by the cdc_mbim driver.
-
-Support for new MBIM services, including vendor specified services, is
-implemented entirely in userspace, like the rest of the MBIM control
-protocol
-
-New services should be registered in the MBIM Registry [5].
-
-
-
-MBIM data channel userspace ABI
-===============================
-
-wwanY network device
---------------------
-The cdc_mbim driver represents the MBIM data channel as a single
-network device of the "wwan" type. This network device is initially
-mapped to MBIM IP session 0.
-
-
-Multiplexed IP sessions (IPS)
------------------------------
-MBIM allows multiplexing up to 256 IP sessions over a single USB data
-channel.  The cdc_mbim driver models such IP sessions as 802.1q VLAN
-subdevices of the master wwanY device, mapping MBIM IP session Z to
-VLAN ID Z for all values of Z greater than 0.
-
-The device maximum Z is given in the MBIM_DEVICE_CAPS_INFO structure
-described in section 10.5.1 of [1].
-
-The userspace management application is responsible for adding new
-VLAN links prior to establishing MBIM IP sessions where the SessionId
-is greater than 0. These links can be added by using the normal VLAN
-kernel interfaces, either ioctl or netlink.
-
-For example, adding a link for a MBIM IP session with SessionId 3:
-
-  ip link add link wwan0 name wwan0.3 type vlan id 3
-
-The driver will automatically map the "wwan0.3" network device to MBIM
-IP session 3.
-
-
-Device Service Streams (DSS)
-----------------------------
-MBIM also allows up to 256 non-IP data streams to be multiplexed over
-the same shared USB data channel.  The cdc_mbim driver models these
-sessions as another set of 802.1q VLAN subdevices of the master wwanY
-device, mapping MBIM DSS session A to VLAN ID (256 + A) for all values
-of A.
-
-The device maximum A is given in the MBIM_DEVICE_SERVICES_INFO
-structure described in section 10.5.29 of [1].
-
-The DSS VLAN subdevices are used as a practical interface between the
-shared MBIM data channel and a MBIM DSS aware userspace application.
-It is not intended to be presented as-is to an end user. The
-assumption is that a userspace application initiating a DSS session
-also takes care of the necessary framing of the DSS data, presenting
-the stream to the end user in an appropriate way for the stream type.
-
-The network device ABI requires a dummy ethernet header for every DSS
-data frame being transported.  The contents of this header is
-arbitrary, with the following exceptions:
- - TX frames using an IP protocol (0x0800 or 0x86dd) will be dropped
- - RX frames will have the protocol field set to ETH_P_802_3 (but will
-   not be properly formatted 802.3 frames)
- - RX frames will have the destination address set to the hardware
-   address of the master device
-
-The DSS supporting userspace management application is responsible for
-adding the dummy ethernet header on TX and stripping it on RX.
-
-This is a simple example using tools commonly available, exporting
-DssSessionId 5 as a pty character device pointed to by a /dev/nmea
-symlink:
-
-  ip link add link wwan0 name wwan0.dss5 type vlan id 261
-  ip link set dev wwan0.dss5 up
-  socat INTERFACE:wwan0.dss5,type=2 PTY:,echo=0,link=/dev/nmea
-
-This is only an example, most suitable for testing out a DSS
-service. Userspace applications supporting specific MBIM DSS services
-are expected to use the tools and programming interfaces required by
-that service.
-
-Note that adding VLAN links for DSS sessions is entirely optional.  A
-management application may instead choose to bind a packet socket
-directly to the master network device, using the received VLAN tags to
-map frames to the correct DSS session and adding 18 byte VLAN ethernet
-headers with the appropriate tag on TX.  In this case using a socket
-filter is recommended, matching only the DSS VLAN subset. This avoid
-unnecessary copying of unrelated IP session data to userspace.  For
-example:
-
-  static struct sock_filter dssfilter[] = {
-	/* use special negative offsets to get VLAN tag */
-	BPF_STMT(BPF_LD|BPF_B|BPF_ABS, SKF_AD_OFF + SKF_AD_VLAN_TAG_PRESENT),
-	BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, 1, 0, 6), /* true */
-
-	/* verify DSS VLAN range */
-	BPF_STMT(BPF_LD|BPF_H|BPF_ABS, SKF_AD_OFF + SKF_AD_VLAN_TAG),
-	BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, 256, 0, 4),	/* 256 is first DSS VLAN */
-	BPF_JUMP(BPF_JMP|BPF_JGE|BPF_K, 512, 3, 0),	/* 511 is last DSS VLAN */
-
-	/* verify ethertype */
-        BPF_STMT(BPF_LD|BPF_H|BPF_ABS, 2 * ETH_ALEN),
-        BPF_JUMP(BPF_JMP|BPF_JEQ|BPF_K, ETH_P_802_3, 0, 1),
-
-        BPF_STMT(BPF_RET|BPF_K, (u_int)-1),	/* accept */
-        BPF_STMT(BPF_RET|BPF_K, 0),		/* ignore */
-  };
-
-
-
-Tagged IP session 0 VLAN
-------------------------
-As described above, MBIM IP session 0 is treated as special by the
-driver.  It is initially mapped to untagged frames on the wwanY
-network device.
-
-This mapping implies a few restrictions on multiplexed IPS and DSS
-sessions, which may not always be practical:
- - no IPS or DSS session can use a frame size greater than the MTU on
-   IP session 0
- - no IPS or DSS session can be in the up state unless the network
-   device representing IP session 0 also is up
-
-These problems can be avoided by optionally making the driver map IP
-session 0 to a VLAN subdevice, similar to all other IP sessions.  This
-behaviour is triggered by adding a VLAN link for the magic VLAN ID
-4094.  The driver will then immediately start mapping MBIM IP session
-0 to this VLAN, and will drop untagged frames on the master wwanY
-device.
-
-Tip: It might be less confusing to the end user to name this VLAN
-subdevice after the MBIM SessionID instead of the VLAN ID.  For
-example:
-
-  ip link add link wwan0 name wwan0.0 type vlan id 4094
-
-
-VLAN mapping
-------------
-
-Summarizing the cdc_mbim driver mapping described above, we have this
-relationship between VLAN tags on the wwanY network device and MBIM
-sessions on the shared USB data channel:
-
-  VLAN ID       MBIM type   MBIM SessionID           Notes
-  ---------------------------------------------------------
-  untagged      IPS         0                        a)
-  1 - 255       IPS         1 - 255 <VLANID>
-  256 - 511     DSS         0 - 255 <VLANID - 256>
-  512 - 4093                                         b)
-  4094          IPS         0                        c)
-
-    a) if no VLAN ID 4094 link exists, else dropped
-    b) unsupported VLAN range, unconditionally dropped
-    c) if a VLAN ID 4094 link exists, else dropped
-
-
-
-
-References
-==========
-
-[1] USB Implementers Forum, Inc. - "Universal Serial Bus
-      Communications Class Subclass Specification for Mobile Broadband
-      Interface Model", Revision 1.0 (Errata 1), May 1, 2013
-      - http://www.usb.org/developers/docs/devclass_docs/
-
-[2] USB Implementers Forum, Inc. - "Universal Serial Bus
-      Communications Class Subclass Specifications for Network Control
-      Model Devices", Revision 1.0 (Errata 1), November 24, 2010
-      - http://www.usb.org/developers/docs/devclass_docs/
-
-[3] libmbim - "a glib-based library for talking to WWAN modems and
-      devices which speak the Mobile Interface Broadband Model (MBIM)
-      protocol"
-      - http://www.freedesktop.org/wiki/Software/libmbim/
-
-[4] ModemManager - "a DBus-activated daemon which controls mobile
-      broadband (2G/3G/4G) devices and connections"
-      - http://www.freedesktop.org/wiki/Software/ModemManager/
-
-[5] "MBIM (Mobile Broadband Interface Model) Registry"
-       - http://compliance.usb.org/mbim/
-
-[6] "/sys/kernel/debug/usb/devices output format"
-       - Documentation/driver-api/usb/usb.rst
-
-[7] "/sys/bus/usb/devices/.../descriptors"
-       - Documentation/ABI/stable/sysfs-bus-usb
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 22b872834ef0..55802abd65a0 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -45,6 +45,7 @@ Contents:
    ax25
    baycom
    bonding
+   cdc_mbim
 
 .. only::  subproject and html
 
-- 
cgit v1.2.3-59-g8ed1b


From 99b0e82dc5e36edb625f519121d4398628f05e95 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 28 Apr 2020 00:01:26 +0200
Subject: docs: networking: convert cops.txt to ReST

- add SPDX header;
- adjust titles and chapters, adding proper markups;
- mark code blocks and literals as such;
- adjust identation, whitespaces and blank lines;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/cops.rst  | 80 ++++++++++++++++++++++++++++++++++++++
 Documentation/networking/cops.txt  | 63 ------------------------------
 Documentation/networking/index.rst |  1 +
 drivers/net/appletalk/Kconfig      |  2 +-
 4 files changed, 82 insertions(+), 64 deletions(-)
 create mode 100644 Documentation/networking/cops.rst
 delete mode 100644 Documentation/networking/cops.txt

diff --git a/Documentation/networking/cops.rst b/Documentation/networking/cops.rst
new file mode 100644
index 000000000000..964ba80599a9
--- /dev/null
+++ b/Documentation/networking/cops.rst
@@ -0,0 +1,80 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+========================================
+The COPS LocalTalk Linux driver (cops.c)
+========================================
+
+By Jay Schulist <jschlst@samba.org>
+
+This driver has two modes and they are: Dayna mode and Tangent mode.
+Each mode corresponds with the type of card. It has been found
+that there are 2 main types of cards and all other cards are
+the same and just have different names or only have minor differences
+such as more IO ports. As this driver is tested it will
+become more clear exactly what cards are supported.
+
+Right now these cards are known to work with the COPS driver. The
+LT-200 cards work in a somewhat more limited capacity than the
+DL200 cards, which work very well and are in use by many people.
+
+TANGENT driver mode:
+	- Tangent ATB-II, Novell NL-1000, Daystar Digital LT-200
+
+DAYNA driver mode:
+	- Dayna DL2000/DaynaTalk PC (Half Length), COPS LT-95,
+	- Farallon PhoneNET PC III, Farallon PhoneNET PC II
+
+Other cards possibly supported mode unknown though:
+	- Dayna DL2000 (Full length)
+
+The COPS driver defaults to using Dayna mode. To change the driver's
+mode if you built a driver with dual support use board_type=1 or
+board_type=2 for Dayna or Tangent with insmod.
+
+Operation/loading of the driver
+===============================
+
+Use modprobe like this:	/sbin/modprobe cops.o (IO #) (IRQ #)
+If you do not specify any options the driver will try and use the IO = 0x240,
+IRQ = 5. As of right now I would only use IRQ 5 for the card, if autoprobing.
+
+To load multiple COPS driver Localtalk cards you can do one of the following::
+
+	insmod cops io=0x240 irq=5
+	insmod -o cops2 cops io=0x260 irq=3
+
+Or in lilo.conf put something like this::
+
+	append="ether=5,0x240,lt0 ether=3,0x260,lt1"
+
+Then bring up the interface with ifconfig. It will look something like this::
+
+  lt0       Link encap:UNSPEC  HWaddr 00-00-00-00-00-00-00-F7-00-00-00-00-00-00-00-00
+	    inet addr:192.168.1.2  Bcast:192.168.1.255  Mask:255.255.255.0
+	    UP BROADCAST RUNNING NOARP MULTICAST  MTU:600  Metric:1
+	    RX packets:0 errors:0 dropped:0 overruns:0 frame:0
+	    TX packets:0 errors:0 dropped:0 overruns:0 carrier:0 coll:0
+
+Netatalk Configuration
+======================
+
+You will need to configure atalkd with something like the following to make
+it work with the cops.c driver.
+
+* For single LTalk card use::
+
+    dummy -seed -phase 2 -net 2000 -addr 2000.10 -zone "1033"
+    lt0 -seed -phase 1 -net 1000 -addr 1000.50 -zone "1033"
+
+* For multiple cards, Ethernet and LocalTalk::
+
+    eth0 -seed -phase 2 -net 3000 -addr 3000.20 -zone "1033"
+    lt0 -seed -phase 1 -net 1000 -addr 1000.50 -zone "1033"
+
+* For multiple LocalTalk cards, and an Ethernet card.
+
+* Order seems to matter here, Ethernet last::
+
+    lt0 -seed -phase 1 -net 1000 -addr 1000.10 -zone "LocalTalk1"
+    lt1 -seed -phase 1 -net 2000 -addr 2000.20 -zone "LocalTalk2"
+    eth0 -seed -phase 2 -net 3000 -addr 3000.30 -zone "EtherTalk"
diff --git a/Documentation/networking/cops.txt b/Documentation/networking/cops.txt
deleted file mode 100644
index 3e344b448e07..000000000000
--- a/Documentation/networking/cops.txt
+++ /dev/null
@@ -1,63 +0,0 @@
-Text File for the COPS LocalTalk Linux driver (cops.c).
-	By Jay Schulist <jschlst@samba.org>
-
-This driver has two modes and they are: Dayna mode and Tangent mode.
-Each mode corresponds with the type of card. It has been found
-that there are 2 main types of cards and all other cards are
-the same and just have different names or only have minor differences
-such as more IO ports. As this driver is tested it will
-become more clear exactly what cards are supported. 
-
-Right now these cards are known to work with the COPS driver. The
-LT-200 cards work in a somewhat more limited capacity than the
-DL200 cards, which work very well and are in use by many people.
-
-TANGENT driver mode:
-	Tangent ATB-II, Novell NL-1000, Daystar Digital LT-200
-DAYNA driver mode:
-	Dayna DL2000/DaynaTalk PC (Half Length), COPS LT-95,
-	Farallon PhoneNET PC III, Farallon PhoneNET PC II
-Other cards possibly supported mode unknown though:
-	Dayna DL2000 (Full length)
-
-The COPS driver defaults to using Dayna mode. To change the driver's 
-mode if you built a driver with dual support use board_type=1 or
-board_type=2 for Dayna or Tangent with insmod.
-
-** Operation/loading of the driver.
-Use modprobe like this:	/sbin/modprobe cops.o (IO #) (IRQ #)
-If you do not specify any options the driver will try and use the IO = 0x240,
-IRQ = 5. As of right now I would only use IRQ 5 for the card, if autoprobing.
-
-To load multiple COPS driver Localtalk cards you can do one of the following.
-
-insmod cops io=0x240 irq=5
-insmod -o cops2 cops io=0x260 irq=3
-
-Or in lilo.conf put something like this:
-	append="ether=5,0x240,lt0 ether=3,0x260,lt1"
-
-Then bring up the interface with ifconfig. It will look something like this:
-lt0       Link encap:UNSPEC  HWaddr 00-00-00-00-00-00-00-F7-00-00-00-00-00-00-00-00
-          inet addr:192.168.1.2  Bcast:192.168.1.255  Mask:255.255.255.0
-          UP BROADCAST RUNNING NOARP MULTICAST  MTU:600  Metric:1
-          RX packets:0 errors:0 dropped:0 overruns:0 frame:0
-          TX packets:0 errors:0 dropped:0 overruns:0 carrier:0 coll:0
-
-** Netatalk Configuration
-You will need to configure atalkd with something like the following to make
-it work with the cops.c driver.
-
-* For single LTalk card use.
-dummy -seed -phase 2 -net 2000 -addr 2000.10 -zone "1033"
-lt0 -seed -phase 1 -net 1000 -addr 1000.50 -zone "1033"
-
-* For multiple cards, Ethernet and LocalTalk.
-eth0 -seed -phase 2 -net 3000 -addr 3000.20 -zone "1033"
-lt0 -seed -phase 1 -net 1000 -addr 1000.50 -zone "1033"
-
-* For multiple LocalTalk cards, and an Ethernet card.
-* Order seems to matter here, Ethernet last.
-lt0 -seed -phase 1 -net 1000 -addr 1000.10 -zone "LocalTalk1"
-lt1 -seed -phase 1 -net 2000 -addr 2000.20 -zone "LocalTalk2"
-eth0 -seed -phase 2 -net 3000 -addr 3000.30 -zone "EtherTalk"
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 55802abd65a0..7b596810d479 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -46,6 +46,7 @@ Contents:
    baycom
    bonding
    cdc_mbim
+   cops
 
 .. only::  subproject and html
 
diff --git a/drivers/net/appletalk/Kconfig b/drivers/net/appletalk/Kconfig
index af509b05ac5c..d4e51c048f62 100644
--- a/drivers/net/appletalk/Kconfig
+++ b/drivers/net/appletalk/Kconfig
@@ -59,7 +59,7 @@ config COPS
 	  package. This driver is experimental, which means that it may not
 	  work. This driver will only work if you choose "AppleTalk DDP"
 	  networking support, above.
-	  Please read the file <file:Documentation/networking/cops.txt>.
+	  Please read the file <file:Documentation/networking/cops.rst>.
 
 config COPS_DAYNA
 	bool "Dayna firmware support"
-- 
cgit v1.2.3-59-g8ed1b


From 9a9891fbdf935c270388fca856c117ad71c02458 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 28 Apr 2020 00:01:27 +0200
Subject: docs: networking: convert cxacru.txt to ReST

- add SPDX header;
- add a document title;
- mark code blocks and literals as such;
- mark lists as such;
- adjust identation, whitespaces and blank lines;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/cxacru.rst | 120 ++++++++++++++++++++++++++++++++++++
 Documentation/networking/cxacru.txt | 100 ------------------------------
 Documentation/networking/index.rst  |   1 +
 3 files changed, 121 insertions(+), 100 deletions(-)
 create mode 100644 Documentation/networking/cxacru.rst
 delete mode 100644 Documentation/networking/cxacru.txt

diff --git a/Documentation/networking/cxacru.rst b/Documentation/networking/cxacru.rst
new file mode 100644
index 000000000000..6088af2ffeda
--- /dev/null
+++ b/Documentation/networking/cxacru.rst
@@ -0,0 +1,120 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+========================
+ATM cxacru device driver
+========================
+
+Firmware is required for this device: http://accessrunner.sourceforge.net/
+
+While it is capable of managing/maintaining the ADSL connection without the
+module loaded, the device will sometimes stop responding after unloading the
+driver and it is necessary to unplug/remove power to the device to fix this.
+
+Note: support for cxacru-cf.bin has been removed. It was not loaded correctly
+so it had no effect on the device configuration. Fixing it could have stopped
+existing devices working when an invalid configuration is supplied.
+
+There is a script cxacru-cf.py to convert an existing file to the sysfs form.
+
+Detected devices will appear as ATM devices named "cxacru". In /sys/class/atm/
+these are directories named cxacruN where N is the device number. A symlink
+named device points to the USB interface device's directory which contains
+several sysfs attribute files for retrieving device statistics:
+
+* adsl_controller_version
+
+* adsl_headend
+* adsl_headend_environment
+
+	- Information about the remote headend.
+
+* adsl_config
+
+	- Configuration writing interface.
+	- Write parameters in hexadecimal format <index>=<value>,
+	  separated by whitespace, e.g.:
+
+		"1=0 a=5"
+
+	- Up to 7 parameters at a time will be sent and the modem will restart
+	  the ADSL connection when any value is set. These are logged for future
+	  reference.
+
+* downstream_attenuation (dB)
+* downstream_bits_per_frame
+* downstream_rate (kbps)
+* downstream_snr_margin (dB)
+
+	- Downstream stats.
+
+* upstream_attenuation (dB)
+* upstream_bits_per_frame
+* upstream_rate (kbps)
+* upstream_snr_margin (dB)
+* transmitter_power (dBm/Hz)
+
+	- Upstream stats.
+
+* downstream_crc_errors
+* downstream_fec_errors
+* downstream_hec_errors
+* upstream_crc_errors
+* upstream_fec_errors
+* upstream_hec_errors
+
+	- Error counts.
+
+* line_startable
+
+	- Indicates that ADSL support on the device
+	  is/can be enabled, see adsl_start.
+
+* line_status
+
+	 - "initialising"
+	 - "down"
+	 - "attempting to activate"
+	 - "training"
+	 - "channel analysis"
+	 - "exchange"
+	 - "waiting"
+	 - "up"
+
+	Changes between "down" and "attempting to activate"
+	if there is no signal.
+
+* link_status
+
+	 - "not connected"
+	 - "connected"
+	 - "lost"
+
+* mac_address
+
+* modulation
+
+	 - "" (when not connected)
+	 - "ANSI T1.413"
+	 - "ITU-T G.992.1 (G.DMT)"
+	 - "ITU-T G.992.2 (G.LITE)"
+
+* startup_attempts
+
+	- Count of total attempts to initialise ADSL.
+
+To enable/disable ADSL, the following can be written to the adsl_state file:
+
+	 - "start"
+	 - "stop
+	 - "restart" (stops, waits 1.5s, then starts)
+	 - "poll" (used to resume status polling if it was disabled due to failure)
+
+Changes in adsl/line state are reported via kernel log messages::
+
+	[4942145.150704] ATM dev 0: ADSL state: running
+	[4942243.663766] ATM dev 0: ADSL line: down
+	[4942249.665075] ATM dev 0: ADSL line: attempting to activate
+	[4942253.654954] ATM dev 0: ADSL line: training
+	[4942255.666387] ATM dev 0: ADSL line: channel analysis
+	[4942259.656262] ATM dev 0: ADSL line: exchange
+	[2635357.696901] ATM dev 0: ADSL line: up (8128 kb/s down | 832 kb/s up)
diff --git a/Documentation/networking/cxacru.txt b/Documentation/networking/cxacru.txt
deleted file mode 100644
index 2cce04457b4d..000000000000
--- a/Documentation/networking/cxacru.txt
+++ /dev/null
@@ -1,100 +0,0 @@
-Firmware is required for this device: http://accessrunner.sourceforge.net/
-
-While it is capable of managing/maintaining the ADSL connection without the
-module loaded, the device will sometimes stop responding after unloading the
-driver and it is necessary to unplug/remove power to the device to fix this.
-
-Note: support for cxacru-cf.bin has been removed. It was not loaded correctly
-so it had no effect on the device configuration. Fixing it could have stopped
-existing devices working when an invalid configuration is supplied.
-
-There is a script cxacru-cf.py to convert an existing file to the sysfs form.
-
-Detected devices will appear as ATM devices named "cxacru". In /sys/class/atm/
-these are directories named cxacruN where N is the device number. A symlink
-named device points to the USB interface device's directory which contains
-several sysfs attribute files for retrieving device statistics:
-
-* adsl_controller_version
-
-* adsl_headend
-* adsl_headend_environment
-	Information about the remote headend.
-
-* adsl_config
-	Configuration writing interface.
-	Write parameters in hexadecimal format <index>=<value>,
-	separated by whitespace, e.g.:
-		"1=0 a=5"
-	Up to 7 parameters at a time will be sent and the modem will restart
-	the ADSL connection when any value is set. These are logged for future
-	reference.
-
-* downstream_attenuation (dB)
-* downstream_bits_per_frame
-* downstream_rate (kbps)
-* downstream_snr_margin (dB)
-	Downstream stats.
-
-* upstream_attenuation (dB)
-* upstream_bits_per_frame
-* upstream_rate (kbps)
-* upstream_snr_margin (dB)
-* transmitter_power (dBm/Hz)
-	Upstream stats.
-
-* downstream_crc_errors
-* downstream_fec_errors
-* downstream_hec_errors
-* upstream_crc_errors
-* upstream_fec_errors
-* upstream_hec_errors
-	Error counts.
-
-* line_startable
-	Indicates that ADSL support on the device
-	is/can be enabled, see adsl_start.
-
-* line_status
-	"initialising"
-	"down"
-	"attempting to activate"
-	"training"
-	"channel analysis"
-	"exchange"
-	"waiting"
-	"up"
-
-	Changes between "down" and "attempting to activate"
-	if there is no signal.
-
-* link_status
-	"not connected"
-	"connected"
-	"lost"
-
-* mac_address
-
-* modulation
-	"" (when not connected)
-	"ANSI T1.413"
-	"ITU-T G.992.1 (G.DMT)"
-	"ITU-T G.992.2 (G.LITE)"
-
-* startup_attempts
-	Count of total attempts to initialise ADSL.
-
-To enable/disable ADSL, the following can be written to the adsl_state file:
-	"start"
-	"stop
-	"restart" (stops, waits 1.5s, then starts)
-	"poll" (used to resume status polling if it was disabled due to failure)
-
-Changes in adsl/line state are reported via kernel log messages:
-	[4942145.150704] ATM dev 0: ADSL state: running
-	[4942243.663766] ATM dev 0: ADSL line: down
-	[4942249.665075] ATM dev 0: ADSL line: attempting to activate
-	[4942253.654954] ATM dev 0: ADSL line: training
-	[4942255.666387] ATM dev 0: ADSL line: channel analysis
-	[4942259.656262] ATM dev 0: ADSL line: exchange
-	[2635357.696901] ATM dev 0: ADSL line: up (8128 kb/s down | 832 kb/s up)
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 7b596810d479..4c8e896490e0 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -47,6 +47,7 @@ Contents:
    bonding
    cdc_mbim
    cops
+   cxacru
 
 .. only::  subproject and html
 
-- 
cgit v1.2.3-59-g8ed1b


From 33155bac6519f545137d9c46d2e59e5f8332dd50 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 28 Apr 2020 00:01:28 +0200
Subject: docs: networking: convert dccp.txt to ReST

- add SPDX header;
- adjust title markup;
- comment out text-only TOC from html/pdf output;
- mark code blocks and literals as such;
- adjust identation, whitespaces and blank lines;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/dccp.rst  | 216 +++++++++++++++++++++++++++++++++++++
 Documentation/networking/dccp.txt  | 207 -----------------------------------
 Documentation/networking/index.rst |   1 +
 3 files changed, 217 insertions(+), 207 deletions(-)
 create mode 100644 Documentation/networking/dccp.rst
 delete mode 100644 Documentation/networking/dccp.txt

diff --git a/Documentation/networking/dccp.rst b/Documentation/networking/dccp.rst
new file mode 100644
index 000000000000..dde16be04456
--- /dev/null
+++ b/Documentation/networking/dccp.rst
@@ -0,0 +1,216 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=============
+DCCP protocol
+=============
+
+
+.. Contents
+   - Introduction
+   - Missing features
+   - Socket options
+   - Sysctl variables
+   - IOCTLs
+   - Other tunables
+   - Notes
+
+
+Introduction
+============
+Datagram Congestion Control Protocol (DCCP) is an unreliable, connection
+oriented protocol designed to solve issues present in UDP and TCP, particularly
+for real-time and multimedia (streaming) traffic.
+It divides into a base protocol (RFC 4340) and pluggable congestion control
+modules called CCIDs. Like pluggable TCP congestion control, at least one CCID
+needs to be enabled in order for the protocol to function properly. In the Linux
+implementation, this is the TCP-like CCID2 (RFC 4341). Additional CCIDs, such as
+the TCP-friendly CCID3 (RFC 4342), are optional.
+For a brief introduction to CCIDs and suggestions for choosing a CCID to match
+given applications, see section 10 of RFC 4340.
+
+It has a base protocol and pluggable congestion control IDs (CCIDs).
+
+DCCP is a Proposed Standard (RFC 2026), and the homepage for DCCP as a protocol
+is at http://www.ietf.org/html.charters/dccp-charter.html
+
+
+Missing features
+================
+The Linux DCCP implementation does not currently support all the features that are
+specified in RFCs 4340...42.
+
+The known bugs are at:
+
+	http://www.linuxfoundation.org/collaborate/workgroups/networking/todo#DCCP
+
+For more up-to-date versions of the DCCP implementation, please consider using
+the experimental DCCP test tree; instructions for checking this out are on:
+http://www.linuxfoundation.org/collaborate/workgroups/networking/dccp_testing#Experimental_DCCP_source_tree
+
+
+Socket options
+==============
+DCCP_SOCKOPT_QPOLICY_ID sets the dequeuing policy for outgoing packets. It takes
+a policy ID as argument and can only be set before the connection (i.e. changes
+during an established connection are not supported). Currently, two policies are
+defined: the "simple" policy (DCCPQ_POLICY_SIMPLE), which does nothing special,
+and a priority-based variant (DCCPQ_POLICY_PRIO). The latter allows to pass an
+u32 priority value as ancillary data to sendmsg(), where higher numbers indicate
+a higher packet priority (similar to SO_PRIORITY). This ancillary data needs to
+be formatted using a cmsg(3) message header filled in as follows::
+
+	cmsg->cmsg_level = SOL_DCCP;
+	cmsg->cmsg_type	 = DCCP_SCM_PRIORITY;
+	cmsg->cmsg_len	 = CMSG_LEN(sizeof(uint32_t));	/* or CMSG_LEN(4) */
+
+DCCP_SOCKOPT_QPOLICY_TXQLEN sets the maximum length of the output queue. A zero
+value is always interpreted as unbounded queue length. If different from zero,
+the interpretation of this parameter depends on the current dequeuing policy
+(see above): the "simple" policy will enforce a fixed queue size by returning
+EAGAIN, whereas the "prio" policy enforces a fixed queue length by dropping the
+lowest-priority packet first. The default value for this parameter is
+initialised from /proc/sys/net/dccp/default/tx_qlen.
+
+DCCP_SOCKOPT_SERVICE sets the service. The specification mandates use of
+service codes (RFC 4340, sec. 8.1.2); if this socket option is not set,
+the socket will fall back to 0 (which means that no meaningful service code
+is present). On active sockets this is set before connect(); specifying more
+than one code has no effect (all subsequent service codes are ignored). The
+case is different for passive sockets, where multiple service codes (up to 32)
+can be set before calling bind().
+
+DCCP_SOCKOPT_GET_CUR_MPS is read-only and retrieves the current maximum packet
+size (application payload size) in bytes, see RFC 4340, section 14.
+
+DCCP_SOCKOPT_AVAILABLE_CCIDS is also read-only and returns the list of CCIDs
+supported by the endpoint. The option value is an array of type uint8_t whose
+size is passed as option length. The minimum array size is 4 elements, the
+value returned in the optlen argument always reflects the true number of
+built-in CCIDs.
+
+DCCP_SOCKOPT_CCID is write-only and sets both the TX and RX CCIDs at the same
+time, combining the operation of the next two socket options. This option is
+preferable over the latter two, since often applications will use the same
+type of CCID for both directions; and mixed use of CCIDs is not currently well
+understood. This socket option takes as argument at least one uint8_t value, or
+an array of uint8_t values, which must match available CCIDS (see above). CCIDs
+must be registered on the socket before calling connect() or listen().
+
+DCCP_SOCKOPT_TX_CCID is read/write. It returns the current CCID (if set) or sets
+the preference list for the TX CCID, using the same format as DCCP_SOCKOPT_CCID.
+Please note that the getsockopt argument type here is ``int``, not uint8_t.
+
+DCCP_SOCKOPT_RX_CCID is analogous to DCCP_SOCKOPT_TX_CCID, but for the RX CCID.
+
+DCCP_SOCKOPT_SERVER_TIMEWAIT enables the server (listening socket) to hold
+timewait state when closing the connection (RFC 4340, 8.3). The usual case is
+that the closing server sends a CloseReq, whereupon the client holds timewait
+state. When this boolean socket option is on, the server sends a Close instead
+and will enter TIMEWAIT. This option must be set after accept() returns.
+
+DCCP_SOCKOPT_SEND_CSCOV and DCCP_SOCKOPT_RECV_CSCOV are used for setting the
+partial checksum coverage (RFC 4340, sec. 9.2). The default is that checksums
+always cover the entire packet and that only fully covered application data is
+accepted by the receiver. Hence, when using this feature on the sender, it must
+be enabled at the receiver, too with suitable choice of CsCov.
+
+DCCP_SOCKOPT_SEND_CSCOV sets the sender checksum coverage. Values in the
+	range 0..15 are acceptable. The default setting is 0 (full coverage),
+	values between 1..15 indicate partial coverage.
+
+DCCP_SOCKOPT_RECV_CSCOV is for the receiver and has a different meaning: it
+	sets a threshold, where again values 0..15 are acceptable. The default
+	of 0 means that all packets with a partial coverage will be discarded.
+	Values in the range 1..15 indicate that packets with minimally such a
+	coverage value are also acceptable. The higher the number, the more
+	restrictive this setting (see [RFC 4340, sec. 9.2.1]). Partial coverage
+	settings are inherited to the child socket after accept().
+
+The following two options apply to CCID 3 exclusively and are getsockopt()-only.
+In either case, a TFRC info struct (defined in <linux/tfrc.h>) is returned.
+
+DCCP_SOCKOPT_CCID_RX_INFO
+	Returns a ``struct tfrc_rx_info`` in optval; the buffer for optval and
+	optlen must be set to at least sizeof(struct tfrc_rx_info).
+
+DCCP_SOCKOPT_CCID_TX_INFO
+	Returns a ``struct tfrc_tx_info`` in optval; the buffer for optval and
+	optlen must be set to at least sizeof(struct tfrc_tx_info).
+
+On unidirectional connections it is useful to close the unused half-connection
+via shutdown (SHUT_WR or SHUT_RD): this will reduce per-packet processing costs.
+
+
+Sysctl variables
+================
+Several DCCP default parameters can be managed by the following sysctls
+(sysctl net.dccp.default or /proc/sys/net/dccp/default):
+
+request_retries
+	The number of active connection initiation retries (the number of
+	Requests minus one) before timing out. In addition, it also governs
+	the behaviour of the other, passive side: this variable also sets
+	the number of times DCCP repeats sending a Response when the initial
+	handshake does not progress from RESPOND to OPEN (i.e. when no Ack
+	is received after the initial Request).  This value should be greater
+	than 0, suggested is less than 10. Analogue of tcp_syn_retries.
+
+retries1
+	How often a DCCP Response is retransmitted until the listening DCCP
+	side considers its connecting peer dead. Analogue of tcp_retries1.
+
+retries2
+	The number of times a general DCCP packet is retransmitted. This has
+	importance for retransmitted acknowledgments and feature negotiation,
+	data packets are never retransmitted. Analogue of tcp_retries2.
+
+tx_ccid = 2
+	Default CCID for the sender-receiver half-connection. Depending on the
+	choice of CCID, the Send Ack Vector feature is enabled automatically.
+
+rx_ccid = 2
+	Default CCID for the receiver-sender half-connection; see tx_ccid.
+
+seq_window = 100
+	The initial sequence window (sec. 7.5.2) of the sender. This influences
+	the local ackno validity and the remote seqno validity windows (7.5.1).
+	Values in the range Wmin = 32 (RFC 4340, 7.5.2) up to 2^32-1 can be set.
+
+tx_qlen = 5
+	The size of the transmit buffer in packets. A value of 0 corresponds
+	to an unbounded transmit buffer.
+
+sync_ratelimit = 125 ms
+	The timeout between subsequent DCCP-Sync packets sent in response to
+	sequence-invalid packets on the same socket (RFC 4340, 7.5.4). The unit
+	of this parameter is milliseconds; a value of 0 disables rate-limiting.
+
+
+IOCTLS
+======
+FIONREAD
+	Works as in udp(7): returns in the ``int`` argument pointer the size of
+	the next pending datagram in bytes, or 0 when no datagram is pending.
+
+
+Other tunables
+==============
+Per-route rto_min support
+	CCID-2 supports the RTAX_RTO_MIN per-route setting for the minimum value
+	of the RTO timer. This setting can be modified via the 'rto_min' option
+	of iproute2; for example::
+
+		> ip route change 10.0.0.0/24   rto_min 250j dev wlan0
+		> ip route add    10.0.0.254/32 rto_min 800j dev wlan0
+		> ip route show dev wlan0
+
+	CCID-3 also supports the rto_min setting: it is used to define the lower
+	bound for the expiry of the nofeedback timer. This can be useful on LANs
+	with very low RTTs (e.g., loopback, Gbit ethernet).
+
+
+Notes
+=====
+DCCP does not travel through NAT successfully at present on many boxes. This is
+because the checksum covers the pseudo-header as per TCP and UDP. Linux NAT
+support for DCCP has been added.
diff --git a/Documentation/networking/dccp.txt b/Documentation/networking/dccp.txt
deleted file mode 100644
index 55c575fcaf17..000000000000
--- a/Documentation/networking/dccp.txt
+++ /dev/null
@@ -1,207 +0,0 @@
-DCCP protocol
-=============
-
-
-Contents
-========
-- Introduction
-- Missing features
-- Socket options
-- Sysctl variables
-- IOCTLs
-- Other tunables
-- Notes
-
-
-Introduction
-============
-Datagram Congestion Control Protocol (DCCP) is an unreliable, connection
-oriented protocol designed to solve issues present in UDP and TCP, particularly
-for real-time and multimedia (streaming) traffic.
-It divides into a base protocol (RFC 4340) and pluggable congestion control
-modules called CCIDs. Like pluggable TCP congestion control, at least one CCID
-needs to be enabled in order for the protocol to function properly. In the Linux
-implementation, this is the TCP-like CCID2 (RFC 4341). Additional CCIDs, such as
-the TCP-friendly CCID3 (RFC 4342), are optional.
-For a brief introduction to CCIDs and suggestions for choosing a CCID to match
-given applications, see section 10 of RFC 4340.
-
-It has a base protocol and pluggable congestion control IDs (CCIDs).
-
-DCCP is a Proposed Standard (RFC 2026), and the homepage for DCCP as a protocol
-is at http://www.ietf.org/html.charters/dccp-charter.html
-
-
-Missing features
-================
-The Linux DCCP implementation does not currently support all the features that are
-specified in RFCs 4340...42.
-
-The known bugs are at:
-	http://www.linuxfoundation.org/collaborate/workgroups/networking/todo#DCCP
-
-For more up-to-date versions of the DCCP implementation, please consider using
-the experimental DCCP test tree; instructions for checking this out are on:
-http://www.linuxfoundation.org/collaborate/workgroups/networking/dccp_testing#Experimental_DCCP_source_tree
-
-
-Socket options
-==============
-DCCP_SOCKOPT_QPOLICY_ID sets the dequeuing policy for outgoing packets. It takes
-a policy ID as argument and can only be set before the connection (i.e. changes
-during an established connection are not supported). Currently, two policies are
-defined: the "simple" policy (DCCPQ_POLICY_SIMPLE), which does nothing special,
-and a priority-based variant (DCCPQ_POLICY_PRIO). The latter allows to pass an
-u32 priority value as ancillary data to sendmsg(), where higher numbers indicate
-a higher packet priority (similar to SO_PRIORITY). This ancillary data needs to
-be formatted using a cmsg(3) message header filled in as follows:
-	cmsg->cmsg_level = SOL_DCCP;
-	cmsg->cmsg_type	 = DCCP_SCM_PRIORITY;
-	cmsg->cmsg_len	 = CMSG_LEN(sizeof(uint32_t));	/* or CMSG_LEN(4) */
-
-DCCP_SOCKOPT_QPOLICY_TXQLEN sets the maximum length of the output queue. A zero
-value is always interpreted as unbounded queue length. If different from zero,
-the interpretation of this parameter depends on the current dequeuing policy
-(see above): the "simple" policy will enforce a fixed queue size by returning
-EAGAIN, whereas the "prio" policy enforces a fixed queue length by dropping the
-lowest-priority packet first. The default value for this parameter is
-initialised from /proc/sys/net/dccp/default/tx_qlen.
-
-DCCP_SOCKOPT_SERVICE sets the service. The specification mandates use of
-service codes (RFC 4340, sec. 8.1.2); if this socket option is not set,
-the socket will fall back to 0 (which means that no meaningful service code
-is present). On active sockets this is set before connect(); specifying more
-than one code has no effect (all subsequent service codes are ignored). The
-case is different for passive sockets, where multiple service codes (up to 32)
-can be set before calling bind().
-
-DCCP_SOCKOPT_GET_CUR_MPS is read-only and retrieves the current maximum packet
-size (application payload size) in bytes, see RFC 4340, section 14.
-
-DCCP_SOCKOPT_AVAILABLE_CCIDS is also read-only and returns the list of CCIDs
-supported by the endpoint. The option value is an array of type uint8_t whose
-size is passed as option length. The minimum array size is 4 elements, the
-value returned in the optlen argument always reflects the true number of
-built-in CCIDs.
-
-DCCP_SOCKOPT_CCID is write-only and sets both the TX and RX CCIDs at the same
-time, combining the operation of the next two socket options. This option is
-preferable over the latter two, since often applications will use the same
-type of CCID for both directions; and mixed use of CCIDs is not currently well
-understood. This socket option takes as argument at least one uint8_t value, or
-an array of uint8_t values, which must match available CCIDS (see above). CCIDs
-must be registered on the socket before calling connect() or listen().
-
-DCCP_SOCKOPT_TX_CCID is read/write. It returns the current CCID (if set) or sets
-the preference list for the TX CCID, using the same format as DCCP_SOCKOPT_CCID.
-Please note that the getsockopt argument type here is `int', not uint8_t.
-
-DCCP_SOCKOPT_RX_CCID is analogous to DCCP_SOCKOPT_TX_CCID, but for the RX CCID.
-
-DCCP_SOCKOPT_SERVER_TIMEWAIT enables the server (listening socket) to hold
-timewait state when closing the connection (RFC 4340, 8.3). The usual case is
-that the closing server sends a CloseReq, whereupon the client holds timewait
-state. When this boolean socket option is on, the server sends a Close instead
-and will enter TIMEWAIT. This option must be set after accept() returns.
-
-DCCP_SOCKOPT_SEND_CSCOV and DCCP_SOCKOPT_RECV_CSCOV are used for setting the
-partial checksum coverage (RFC 4340, sec. 9.2). The default is that checksums
-always cover the entire packet and that only fully covered application data is
-accepted by the receiver. Hence, when using this feature on the sender, it must
-be enabled at the receiver, too with suitable choice of CsCov.
-
-DCCP_SOCKOPT_SEND_CSCOV sets the sender checksum coverage. Values in the
-	range 0..15 are acceptable. The default setting is 0 (full coverage),
-	values between 1..15 indicate partial coverage.
-DCCP_SOCKOPT_RECV_CSCOV is for the receiver and has a different meaning: it
-	sets a threshold, where again values 0..15 are acceptable. The default
-	of 0 means that all packets with a partial coverage will be discarded.
-	Values in the range 1..15 indicate that packets with minimally such a
-	coverage value are also acceptable. The higher the number, the more
-	restrictive this setting (see [RFC 4340, sec. 9.2.1]). Partial coverage
-	settings are inherited to the child socket after accept().
-
-The following two options apply to CCID 3 exclusively and are getsockopt()-only.
-In either case, a TFRC info struct (defined in <linux/tfrc.h>) is returned.
-DCCP_SOCKOPT_CCID_RX_INFO
-	Returns a `struct tfrc_rx_info' in optval; the buffer for optval and
-	optlen must be set to at least sizeof(struct tfrc_rx_info).
-DCCP_SOCKOPT_CCID_TX_INFO
-	Returns a `struct tfrc_tx_info' in optval; the buffer for optval and
-	optlen must be set to at least sizeof(struct tfrc_tx_info).
-
-On unidirectional connections it is useful to close the unused half-connection
-via shutdown (SHUT_WR or SHUT_RD): this will reduce per-packet processing costs.
-
-
-Sysctl variables
-================
-Several DCCP default parameters can be managed by the following sysctls
-(sysctl net.dccp.default or /proc/sys/net/dccp/default):
-
-request_retries
-	The number of active connection initiation retries (the number of
-	Requests minus one) before timing out. In addition, it also governs
-	the behaviour of the other, passive side: this variable also sets
-	the number of times DCCP repeats sending a Response when the initial
-	handshake does not progress from RESPOND to OPEN (i.e. when no Ack
-	is received after the initial Request).  This value should be greater
-	than 0, suggested is less than 10. Analogue of tcp_syn_retries.
-
-retries1
-	How often a DCCP Response is retransmitted until the listening DCCP
-	side considers its connecting peer dead. Analogue of tcp_retries1.
-
-retries2
-	The number of times a general DCCP packet is retransmitted. This has
-	importance for retransmitted acknowledgments and feature negotiation,
-	data packets are never retransmitted. Analogue of tcp_retries2.
-
-tx_ccid = 2
-	Default CCID for the sender-receiver half-connection. Depending on the
-	choice of CCID, the Send Ack Vector feature is enabled automatically.
-
-rx_ccid = 2
-	Default CCID for the receiver-sender half-connection; see tx_ccid.
-
-seq_window = 100
-	The initial sequence window (sec. 7.5.2) of the sender. This influences
-	the local ackno validity and the remote seqno validity windows (7.5.1).
-	Values in the range Wmin = 32 (RFC 4340, 7.5.2) up to 2^32-1 can be set.
-
-tx_qlen = 5
-	The size of the transmit buffer in packets. A value of 0 corresponds
-	to an unbounded transmit buffer.
-
-sync_ratelimit = 125 ms
-	The timeout between subsequent DCCP-Sync packets sent in response to
-	sequence-invalid packets on the same socket (RFC 4340, 7.5.4). The unit
-	of this parameter is milliseconds; a value of 0 disables rate-limiting.
-
-
-IOCTLS
-======
-FIONREAD
-	Works as in udp(7): returns in the `int' argument pointer the size of
-	the next pending datagram in bytes, or 0 when no datagram is pending.
-
-
-Other tunables
-==============
-Per-route rto_min support
-	CCID-2 supports the RTAX_RTO_MIN per-route setting for the minimum value
-	of the RTO timer. This setting can be modified via the 'rto_min' option
-	of iproute2; for example:
-		> ip route change 10.0.0.0/24   rto_min 250j dev wlan0
-		> ip route add    10.0.0.254/32 rto_min 800j dev wlan0
-		> ip route show dev wlan0
-	CCID-3 also supports the rto_min setting: it is used to define the lower
-	bound for the expiry of the nofeedback timer. This can be useful on LANs
-	with very low RTTs (e.g., loopback, Gbit ethernet).
-
-
-Notes
-=====
-DCCP does not travel through NAT successfully at present on many boxes. This is
-because the checksum covers the pseudo-header as per TCP and UDP. Linux NAT
-support for DCCP has been added.
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 4c8e896490e0..3894043332de 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -48,6 +48,7 @@ Contents:
    cdc_mbim
    cops
    cxacru
+   dccp
 
 .. only::  subproject and html
 
-- 
cgit v1.2.3-59-g8ed1b


From 8447bb44ef7c452cbc94c04fc38d4946a3ef9165 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 28 Apr 2020 00:01:29 +0200
Subject: docs: networking: convert dctcp.txt to ReST

- add SPDX header;
- adjust title markup;
- mark code blocks and literals as such;
- adjust identation, whitespaces and blank lines;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/dctcp.rst | 52 ++++++++++++++++++++++++++++++++++++++
 Documentation/networking/dctcp.txt | 44 --------------------------------
 Documentation/networking/index.rst |  1 +
 3 files changed, 53 insertions(+), 44 deletions(-)
 create mode 100644 Documentation/networking/dctcp.rst
 delete mode 100644 Documentation/networking/dctcp.txt

diff --git a/Documentation/networking/dctcp.rst b/Documentation/networking/dctcp.rst
new file mode 100644
index 000000000000..4cc8bb2dad50
--- /dev/null
+++ b/Documentation/networking/dctcp.rst
@@ -0,0 +1,52 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================
+DCTCP (DataCenter TCP)
+======================
+
+DCTCP is an enhancement to the TCP congestion control algorithm for data
+center networks and leverages Explicit Congestion Notification (ECN) in
+the data center network to provide multi-bit feedback to the end hosts.
+
+To enable it on end hosts::
+
+  sysctl -w net.ipv4.tcp_congestion_control=dctcp
+  sysctl -w net.ipv4.tcp_ecn_fallback=0 (optional)
+
+All switches in the data center network running DCTCP must support ECN
+marking and be configured for marking when reaching defined switch buffer
+thresholds. The default ECN marking threshold heuristic for DCTCP on
+switches is 20 packets (30KB) at 1Gbps, and 65 packets (~100KB) at 10Gbps,
+but might need further careful tweaking.
+
+For more details, see below documents:
+
+Paper:
+
+The algorithm is further described in detail in the following two
+SIGCOMM/SIGMETRICS papers:
+
+ i) Mohammad Alizadeh, Albert Greenberg, David A. Maltz, Jitendra Padhye,
+    Parveen Patel, Balaji Prabhakar, Sudipta Sengupta, and Murari Sridharan:
+
+      "Data Center TCP (DCTCP)", Data Center Networks session"
+
+      Proc. ACM SIGCOMM, New Delhi, 2010.
+
+    http://simula.stanford.edu/~alizade/Site/DCTCP_files/dctcp-final.pdf
+    http://www.sigcomm.org/ccr/papers/2010/October/1851275.1851192
+
+ii) Mohammad Alizadeh, Adel Javanmard, and Balaji Prabhakar:
+
+      "Analysis of DCTCP: Stability, Convergence, and Fairness"
+      Proc. ACM SIGMETRICS, San Jose, 2011.
+
+    http://simula.stanford.edu/~alizade/Site/DCTCP_files/dctcp_analysis-full.pdf
+
+IETF informational draft:
+
+  http://tools.ietf.org/html/draft-bensley-tcpm-dctcp-00
+
+DCTCP site:
+
+  http://simula.stanford.edu/~alizade/Site/DCTCP.html
diff --git a/Documentation/networking/dctcp.txt b/Documentation/networking/dctcp.txt
deleted file mode 100644
index 13a857753208..000000000000
--- a/Documentation/networking/dctcp.txt
+++ /dev/null
@@ -1,44 +0,0 @@
-DCTCP (DataCenter TCP)
-----------------------
-
-DCTCP is an enhancement to the TCP congestion control algorithm for data
-center networks and leverages Explicit Congestion Notification (ECN) in
-the data center network to provide multi-bit feedback to the end hosts.
-
-To enable it on end hosts:
-
-  sysctl -w net.ipv4.tcp_congestion_control=dctcp
-  sysctl -w net.ipv4.tcp_ecn_fallback=0 (optional)
-
-All switches in the data center network running DCTCP must support ECN
-marking and be configured for marking when reaching defined switch buffer
-thresholds. The default ECN marking threshold heuristic for DCTCP on
-switches is 20 packets (30KB) at 1Gbps, and 65 packets (~100KB) at 10Gbps,
-but might need further careful tweaking.
-
-For more details, see below documents:
-
-Paper:
-
-The algorithm is further described in detail in the following two
-SIGCOMM/SIGMETRICS papers:
-
- i) Mohammad Alizadeh, Albert Greenberg, David A. Maltz, Jitendra Padhye,
-    Parveen Patel, Balaji Prabhakar, Sudipta Sengupta, and Murari Sridharan:
-      "Data Center TCP (DCTCP)", Data Center Networks session
-      Proc. ACM SIGCOMM, New Delhi, 2010.
-    http://simula.stanford.edu/~alizade/Site/DCTCP_files/dctcp-final.pdf
-    http://www.sigcomm.org/ccr/papers/2010/October/1851275.1851192
-
-ii) Mohammad Alizadeh, Adel Javanmard, and Balaji Prabhakar:
-      "Analysis of DCTCP: Stability, Convergence, and Fairness"
-      Proc. ACM SIGMETRICS, San Jose, 2011.
-    http://simula.stanford.edu/~alizade/Site/DCTCP_files/dctcp_analysis-full.pdf
-
-IETF informational draft:
-
-  http://tools.ietf.org/html/draft-bensley-tcpm-dctcp-00
-
-DCTCP site:
-
-  http://simula.stanford.edu/~alizade/Site/DCTCP.html
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 3894043332de..9e83d3bda4e0 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -49,6 +49,7 @@ Contents:
    cops
    cxacru
    dccp
+   dctcp
 
 .. only::  subproject and html
 
-- 
cgit v1.2.3-59-g8ed1b


From 9a69fb9c21c4bf4107becb877729544759bdd059 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 28 Apr 2020 00:01:30 +0200
Subject: docs: networking: convert decnet.txt to ReST

- add SPDX header;
- adjust titles and chapters, adding proper markups;
- mark lists as such;
- mark code blocks and literals as such;
- adjust identation, whitespaces and blank lines;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/admin-guide/kernel-parameters.txt |   2 +-
 Documentation/networking/decnet.rst             | 243 ++++++++++++++++++++++++
 Documentation/networking/decnet.txt             | 230 ----------------------
 Documentation/networking/index.rst              |   1 +
 MAINTAINERS                                     |   2 +-
 net/decnet/Kconfig                              |   4 +-
 6 files changed, 248 insertions(+), 234 deletions(-)
 create mode 100644 Documentation/networking/decnet.rst
 delete mode 100644 Documentation/networking/decnet.txt

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index f2a93c8679e8..b23ab11587a6 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -831,7 +831,7 @@
 
 	decnet.addr=	[HW,NET]
 			Format: <area>[,<node>]
-			See also Documentation/networking/decnet.txt.
+			See also Documentation/networking/decnet.rst.
 
 	default_hugepagesz=
 			[same as hugepagesz=] The size of the default
diff --git a/Documentation/networking/decnet.rst b/Documentation/networking/decnet.rst
new file mode 100644
index 000000000000..b8bc11ff8370
--- /dev/null
+++ b/Documentation/networking/decnet.rst
@@ -0,0 +1,243 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========================================
+Linux DECnet Networking Layer Information
+=========================================
+
+1. Other documentation....
+==========================
+
+   - Project Home Pages
+     - http://www.chygwyn.com/				   - Kernel info
+     - http://linux-decnet.sourceforge.net/                - Userland tools
+     - http://www.sourceforge.net/projects/linux-decnet/   - Status page
+
+2. Configuring the kernel
+=========================
+
+Be sure to turn on the following options:
+
+    - CONFIG_DECNET (obviously)
+    - CONFIG_PROC_FS (to see what's going on)
+    - CONFIG_SYSCTL (for easy configuration)
+
+if you want to try out router support (not properly debugged yet)
+you'll need the following options as well...
+
+    - CONFIG_DECNET_ROUTER (to be able to add/delete routes)
+    - CONFIG_NETFILTER (will be required for the DECnet routing daemon)
+
+Don't turn on SIOCGIFCONF support for DECnet unless you are really sure
+that you need it, in general you won't and it can cause ifconfig to
+malfunction.
+
+Run time configuration has changed slightly from the 2.4 system. If you
+want to configure an endnode, then the simplified procedure is as follows:
+
+ - Set the MAC address on your ethernet card before starting _any_ other
+   network protocols.
+
+As soon as your network card is brought into the UP state, DECnet should
+start working. If you need something more complicated or are unsure how
+to set the MAC address, see the next section. Also all configurations which
+worked with 2.4 will work under 2.5 with no change.
+
+3. Command line options
+=======================
+
+You can set a DECnet address on the kernel command line for compatibility
+with the 2.4 configuration procedure, but in general it's not needed any more.
+If you do st a DECnet address on the command line, it has only one purpose
+which is that its added to the addresses on the loopback device.
+
+With 2.4 kernels, DECnet would only recognise addresses as local if they
+were added to the loopback device. In 2.5, any local interface address
+can be used to loop back to the local machine. Of course this does not
+prevent you adding further addresses to the loopback device if you
+want to.
+
+N.B. Since the address list of an interface determines the addresses for
+which "hello" messages are sent, if you don't set an address on the loopback
+interface then you won't see any entries in /proc/net/neigh for the local
+host until such time as you start a connection. This doesn't affect the
+operation of the local communications in any other way though.
+
+The kernel command line takes options looking like the following::
+
+    decnet.addr=1,2
+
+the two numbers are the node address 1,2 = 1.2 For 2.2.xx kernels
+and early 2.3.xx kernels, you must use a comma when specifying the
+DECnet address like this. For more recent 2.3.xx kernels, you may
+use almost any character except space, although a `.` would be the most
+obvious choice :-)
+
+There used to be a third number specifying the node type. This option
+has gone away in favour of a per interface node type. This is now set
+using /proc/sys/net/decnet/conf/<dev>/forwarding. This file can be
+set with a single digit, 0=EndNode, 1=L1 Router and  2=L2 Router.
+
+There are also equivalent options for modules. The node address can
+also be set through the /proc/sys/net/decnet/ files, as can other system
+parameters.
+
+Currently the only supported devices are ethernet and ip_gre. The
+ethernet address of your ethernet card has to be set according to the DECnet
+address of the node in order for it to be autoconfigured (and then appear in
+/proc/net/decnet_dev). There is a utility available at the above
+FTP sites called dn2ethaddr which can compute the correct ethernet
+address to use. The address can be set by ifconfig either before or
+at the time the device is brought up. If you are using RedHat you can
+add the line::
+
+    MACADDR=AA:00:04:00:03:04
+
+or something similar, to /etc/sysconfig/network-scripts/ifcfg-eth0 or
+wherever your network card's configuration lives. Setting the MAC address
+of your ethernet card to an address starting with "hi-ord" will cause a
+DECnet address which matches to be added to the interface (which you can
+verify with iproute2).
+
+The default device for routing can be set through the /proc filesystem
+by setting /proc/sys/net/decnet/default_device to the
+device you want DECnet to route packets out of when no specific route
+is available. Usually this will be eth0, for example::
+
+    echo -n "eth0" >/proc/sys/net/decnet/default_device
+
+If you don't set the default device, then it will default to the first
+ethernet card which has been autoconfigured as described above. You can
+confirm that by looking in the default_device file of course.
+
+There is a list of what the other files under /proc/sys/net/decnet/ do
+on the kernel patch web site (shown above).
+
+4. Run time kernel configuration
+================================
+
+
+This is either done through the sysctl/proc interface (see the kernel web
+pages for details on what the various options do) or through the iproute2
+package in the same way as IPv4/6 configuration is performed.
+
+Documentation for iproute2 is included with the package, although there is
+as yet no specific section on DECnet, most of the features apply to both
+IP and DECnet, albeit with DECnet addresses instead of IP addresses and
+a reduced functionality.
+
+If you want to configure a DECnet router you'll need the iproute2 package
+since its the _only_ way to add and delete routes currently. Eventually
+there will be a routing daemon to send and receive routing messages for
+each interface and update the kernel routing tables accordingly. The
+routing daemon will use netfilter to listen to routing packets, and
+rtnetlink to update the kernels routing tables.
+
+The DECnet raw socket layer has been removed since it was there purely
+for use by the routing daemon which will now use netfilter (a much cleaner
+and more generic solution) instead.
+
+5. How can I tell if its working?
+=================================
+
+Here is a quick guide of what to look for in order to know if your DECnet
+kernel subsystem is working.
+
+   - Is the node address set (see /proc/sys/net/decnet/node_address)
+   - Is the node of the correct type
+     (see /proc/sys/net/decnet/conf/<dev>/forwarding)
+   - Is the Ethernet MAC address of each Ethernet card set to match
+     the DECnet address. If in doubt use the dn2ethaddr utility available
+     at the ftp archive.
+   - If the previous two steps are satisfied, and the Ethernet card is up,
+     you should find that it is listed in /proc/net/decnet_dev and also
+     that it appears as a directory in /proc/sys/net/decnet/conf/. The
+     loopback device (lo) should also appear and is required to communicate
+     within a node.
+   - If you have any DECnet routers on your network, they should appear
+     in /proc/net/decnet_neigh, otherwise this file will only contain the
+     entry for the node itself (if it doesn't check to see if lo is up).
+   - If you want to send to any node which is not listed in the
+     /proc/net/decnet_neigh file, you'll need to set the default device
+     to point to an Ethernet card with connection to a router. This is
+     again done with the /proc/sys/net/decnet/default_device file.
+   - Try starting a simple server and client, like the dnping/dnmirror
+     over the loopback interface. With luck they should communicate.
+     For this step and those after, you'll need the DECnet library
+     which can be obtained from the above ftp sites as well as the
+     actual utilities themselves.
+   - If this seems to work, then try talking to a node on your local
+     network, and see if you can obtain the same results.
+   - At this point you are on your own... :-)
+
+6. How to send a bug report
+===========================
+
+If you've found a bug and want to report it, then there are several things
+you can do to help me work out exactly what it is that is wrong. Useful
+information (_most_ of which _is_ _essential_) includes:
+
+ - What kernel version are you running ?
+ - What version of the patch are you running ?
+ - How far though the above set of tests can you get ?
+ - What is in the /proc/decnet* files and /proc/sys/net/decnet/* files ?
+ - Which services are you running ?
+ - Which client caused the problem ?
+ - How much data was being transferred ?
+ - Was the network congested ?
+ - How can the problem be reproduced ?
+ - Can you use tcpdump to get a trace ? (N.B. Most (all?) versions of
+   tcpdump don't understand how to dump DECnet properly, so including
+   the hex listing of the packet contents is _essential_, usually the -x flag.
+   You may also need to increase the length grabbed with the -s flag. The
+   -e flag also provides very useful information (ethernet MAC addresses))
+
+7. MAC FAQ
+==========
+
+A quick FAQ on ethernet MAC addresses to explain how Linux and DECnet
+interact and how to get the best performance from your hardware.
+
+Ethernet cards are designed to normally only pass received network frames
+to a host computer when they are addressed to it, or to the broadcast address.
+
+Linux has an interface which allows the setting of extra addresses for
+an ethernet card to listen to. If the ethernet card supports it, the
+filtering operation will be done in hardware, if not the extra unwanted packets
+received will be discarded by the host computer. In the latter case,
+significant processor time and bus bandwidth can be used up on a busy
+network (see the NAPI documentation for a longer explanation of these
+effects).
+
+DECnet makes use of this interface to allow running DECnet on an ethernet
+card which has already been configured using TCP/IP (presumably using the
+built in MAC address of the card, as usual) and/or to allow multiple DECnet
+addresses on each physical interface. If you do this, be aware that if your
+ethernet card doesn't support perfect hashing in its MAC address filter
+then your computer will be doing more work than required. Some cards
+will simply set themselves into promiscuous mode in order to receive
+packets from the DECnet specified addresses. So if you have one of these
+cards its better to set the MAC address of the card as described above
+to gain the best efficiency. Better still is to use a card which supports
+NAPI as well.
+
+
+8. Mailing list
+===============
+
+If you are keen to get involved in development, or want to ask questions
+about configuration, or even just report bugs, then there is a mailing
+list that you can join, details are at:
+
+http://sourceforge.net/mail/?group_id=4993
+
+9. Legal Info
+=============
+
+The Linux DECnet project team have placed their code under the GPL. The
+software is provided "as is" and without warranty express or implied.
+DECnet is a trademark of Compaq. This software is not a product of
+Compaq. We acknowledge the help of people at Compaq in providing extra
+documentation above and beyond what was previously publicly available.
+
+Steve Whitehouse <SteveW@ACM.org>
+
diff --git a/Documentation/networking/decnet.txt b/Documentation/networking/decnet.txt
deleted file mode 100644
index d192f8b9948b..000000000000
--- a/Documentation/networking/decnet.txt
+++ /dev/null
@@ -1,230 +0,0 @@
-                    Linux DECnet Networking Layer Information
-                   ===========================================
-
-1) Other documentation....
-
-   o Project Home Pages
-       http://www.chygwyn.com/                      	    - Kernel info
-       http://linux-decnet.sourceforge.net/                - Userland tools
-       http://www.sourceforge.net/projects/linux-decnet/   - Status page
-
-2) Configuring the kernel
-
-Be sure to turn on the following options:
-
-    CONFIG_DECNET (obviously)
-    CONFIG_PROC_FS (to see what's going on)
-    CONFIG_SYSCTL (for easy configuration)
-
-if you want to try out router support (not properly debugged yet)
-you'll need the following options as well...
-
-    CONFIG_DECNET_ROUTER (to be able to add/delete routes)
-    CONFIG_NETFILTER (will be required for the DECnet routing daemon)
-
-Don't turn on SIOCGIFCONF support for DECnet unless you are really sure
-that you need it, in general you won't and it can cause ifconfig to
-malfunction.
-
-Run time configuration has changed slightly from the 2.4 system. If you
-want to configure an endnode, then the simplified procedure is as follows:
-
- o Set the MAC address on your ethernet card before starting _any_ other
-   network protocols.
-
-As soon as your network card is brought into the UP state, DECnet should
-start working. If you need something more complicated or are unsure how
-to set the MAC address, see the next section. Also all configurations which
-worked with 2.4 will work under 2.5 with no change.
-
-3) Command line options
-
-You can set a DECnet address on the kernel command line for compatibility
-with the 2.4 configuration procedure, but in general it's not needed any more.
-If you do st a DECnet address on the command line, it has only one purpose
-which is that its added to the addresses on the loopback device.
-
-With 2.4 kernels, DECnet would only recognise addresses as local if they
-were added to the loopback device. In 2.5, any local interface address
-can be used to loop back to the local machine. Of course this does not
-prevent you adding further addresses to the loopback device if you
-want to.
-
-N.B. Since the address list of an interface determines the addresses for
-which "hello" messages are sent, if you don't set an address on the loopback
-interface then you won't see any entries in /proc/net/neigh for the local
-host until such time as you start a connection. This doesn't affect the
-operation of the local communications in any other way though.
-
-The kernel command line takes options looking like the following:
-
-    decnet.addr=1,2
-
-the two numbers are the node address 1,2 = 1.2 For 2.2.xx kernels
-and early 2.3.xx kernels, you must use a comma when specifying the
-DECnet address like this. For more recent 2.3.xx kernels, you may
-use almost any character except space, although a `.` would be the most
-obvious choice :-)
-
-There used to be a third number specifying the node type. This option
-has gone away in favour of a per interface node type. This is now set
-using /proc/sys/net/decnet/conf/<dev>/forwarding. This file can be
-set with a single digit, 0=EndNode, 1=L1 Router and  2=L2 Router.
-
-There are also equivalent options for modules. The node address can
-also be set through the /proc/sys/net/decnet/ files, as can other system
-parameters.
-
-Currently the only supported devices are ethernet and ip_gre. The
-ethernet address of your ethernet card has to be set according to the DECnet
-address of the node in order for it to be autoconfigured (and then appear in
-/proc/net/decnet_dev). There is a utility available at the above
-FTP sites called dn2ethaddr which can compute the correct ethernet
-address to use. The address can be set by ifconfig either before or
-at the time the device is brought up. If you are using RedHat you can
-add the line:
-
-    MACADDR=AA:00:04:00:03:04
-
-or something similar, to /etc/sysconfig/network-scripts/ifcfg-eth0 or
-wherever your network card's configuration lives. Setting the MAC address
-of your ethernet card to an address starting with "hi-ord" will cause a
-DECnet address which matches to be added to the interface (which you can
-verify with iproute2).
-
-The default device for routing can be set through the /proc filesystem
-by setting /proc/sys/net/decnet/default_device to the
-device you want DECnet to route packets out of when no specific route
-is available. Usually this will be eth0, for example:
-
-    echo -n "eth0" >/proc/sys/net/decnet/default_device
-
-If you don't set the default device, then it will default to the first
-ethernet card which has been autoconfigured as described above. You can
-confirm that by looking in the default_device file of course.
-
-There is a list of what the other files under /proc/sys/net/decnet/ do
-on the kernel patch web site (shown above).
-
-4) Run time kernel configuration
-
-This is either done through the sysctl/proc interface (see the kernel web
-pages for details on what the various options do) or through the iproute2
-package in the same way as IPv4/6 configuration is performed.
-
-Documentation for iproute2 is included with the package, although there is
-as yet no specific section on DECnet, most of the features apply to both
-IP and DECnet, albeit with DECnet addresses instead of IP addresses and
-a reduced functionality.
-
-If you want to configure a DECnet router you'll need the iproute2 package
-since its the _only_ way to add and delete routes currently. Eventually
-there will be a routing daemon to send and receive routing messages for
-each interface and update the kernel routing tables accordingly. The
-routing daemon will use netfilter to listen to routing packets, and
-rtnetlink to update the kernels routing tables. 
-
-The DECnet raw socket layer has been removed since it was there purely
-for use by the routing daemon which will now use netfilter (a much cleaner
-and more generic solution) instead.
-
-5) How can I tell if its working ?
-
-Here is a quick guide of what to look for in order to know if your DECnet
-kernel subsystem is working.
-
-   - Is the node address set (see /proc/sys/net/decnet/node_address)
-   - Is the node of the correct type 
-                             (see /proc/sys/net/decnet/conf/<dev>/forwarding)
-   - Is the Ethernet MAC address of each Ethernet card set to match
-     the DECnet address. If in doubt use the dn2ethaddr utility available
-     at the ftp archive.
-   - If the previous two steps are satisfied, and the Ethernet card is up,
-     you should find that it is listed in /proc/net/decnet_dev and also
-     that it appears as a directory in /proc/sys/net/decnet/conf/. The
-     loopback device (lo) should also appear and is required to communicate
-     within a node.
-   - If you have any DECnet routers on your network, they should appear
-     in /proc/net/decnet_neigh, otherwise this file will only contain the
-     entry for the node itself (if it doesn't check to see if lo is up).
-   - If you want to send to any node which is not listed in the
-     /proc/net/decnet_neigh file, you'll need to set the default device
-     to point to an Ethernet card with connection to a router. This is
-     again done with the /proc/sys/net/decnet/default_device file.
-   - Try starting a simple server and client, like the dnping/dnmirror
-     over the loopback interface. With luck they should communicate.
-     For this step and those after, you'll need the DECnet library
-     which can be obtained from the above ftp sites as well as the
-     actual utilities themselves.
-   - If this seems to work, then try talking to a node on your local
-     network, and see if you can obtain the same results.
-   - At this point you are on your own... :-)
-
-6) How to send a bug report
-
-If you've found a bug and want to report it, then there are several things
-you can do to help me work out exactly what it is that is wrong. Useful
-information (_most_ of which _is_ _essential_) includes:
-
- - What kernel version are you running ?
- - What version of the patch are you running ?
- - How far though the above set of tests can you get ?
- - What is in the /proc/decnet* files and /proc/sys/net/decnet/* files ?
- - Which services are you running ?
- - Which client caused the problem ?
- - How much data was being transferred ?
- - Was the network congested ?
- - How can the problem be reproduced ?
- - Can you use tcpdump to get a trace ? (N.B. Most (all?) versions of 
-   tcpdump don't understand how to dump DECnet properly, so including
-   the hex listing of the packet contents is _essential_, usually the -x flag.
-   You may also need to increase the length grabbed with the -s flag. The
-   -e flag also provides very useful information (ethernet MAC addresses))
-
-7) MAC FAQ
-
-A quick FAQ on ethernet MAC addresses to explain how Linux and DECnet
-interact and how to get the best performance from your hardware. 
-
-Ethernet cards are designed to normally only pass received network frames 
-to a host computer when they are addressed to it, or to the broadcast address.
-
-Linux has an interface which allows the setting of extra addresses for
-an ethernet card to listen to. If the ethernet card supports it, the
-filtering operation will be done in hardware, if not the extra unwanted packets
-received will be discarded by the host computer. In the latter case,
-significant processor time and bus bandwidth can be used up on a busy
-network (see the NAPI documentation for a longer explanation of these
-effects).
-
-DECnet makes use of this interface to allow running DECnet on an ethernet 
-card which has already been configured using TCP/IP (presumably using the 
-built in MAC address of the card, as usual) and/or to allow multiple DECnet
-addresses on each physical interface. If you do this, be aware that if your
-ethernet card doesn't support perfect hashing in its MAC address filter
-then your computer will be doing more work than required. Some cards
-will simply set themselves into promiscuous mode in order to receive
-packets from the DECnet specified addresses. So if you have one of these
-cards its better to set the MAC address of the card as described above
-to gain the best efficiency. Better still is to use a card which supports
-NAPI as well.
-
-
-8) Mailing list
-
-If you are keen to get involved in development, or want to ask questions
-about configuration, or even just report bugs, then there is a mailing
-list that you can join, details are at:
-
-http://sourceforge.net/mail/?group_id=4993
-
-9) Legal Info
-
-The Linux DECnet project team have placed their code under the GPL. The
-software is provided "as is" and without warranty express or implied.
-DECnet is a trademark of Compaq. This software is not a product of
-Compaq. We acknowledge the help of people at Compaq in providing extra
-documentation above and beyond what was previously publicly available.
-
-Steve Whitehouse <SteveW@ACM.org>
-
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 9e83d3bda4e0..e17432492745 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -50,6 +50,7 @@ Contents:
    cxacru
    dccp
    dctcp
+   decnet
 
 .. only::  subproject and html
 
diff --git a/MAINTAINERS b/MAINTAINERS
index 453fe0713e68..7323bfc1720f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4728,7 +4728,7 @@ DECnet NETWORK LAYER
 L:	linux-decnet-user@lists.sourceforge.net
 S:	Orphan
 W:	http://linux-decnet.sourceforge.net
-F:	Documentation/networking/decnet.txt
+F:	Documentation/networking/decnet.rst
 F:	net/decnet/
 
 DECSTATION PLATFORM SUPPORT
diff --git a/net/decnet/Kconfig b/net/decnet/Kconfig
index 0935453ccfd5..8f98fb2f2ec9 100644
--- a/net/decnet/Kconfig
+++ b/net/decnet/Kconfig
@@ -15,7 +15,7 @@ config DECNET
 	  <http://linux-decnet.sourceforge.net/>.
 
 	  More detailed documentation is available in
-	  <file:Documentation/networking/decnet.txt>.
+	  <file:Documentation/networking/decnet.rst>.
 
 	  Be sure to say Y to "/proc file system support" and "Sysctl support"
 	  below when using DECnet, since you will need sysctl support to aid
@@ -40,4 +40,4 @@ config DECNET_ROUTER
 	  filtering" option will be required for the forthcoming routing daemon
 	  to work.
 
-	  See <file:Documentation/networking/decnet.txt> for more information.
+	  See <file:Documentation/networking/decnet.rst> for more information.
-- 
cgit v1.2.3-59-g8ed1b


From 5f32c920c23b75654a839aa87c344b2bcaf350e2 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 28 Apr 2020 00:01:31 +0200
Subject: docs: networking: convert defza.txt to ReST

Not much to be done here:

- add SPDX header;
- add a document title;
- use :field: markup for the version number;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/defza.rst | 63 ++++++++++++++++++++++++++++++++++++++
 Documentation/networking/defza.txt | 57 ----------------------------------
 Documentation/networking/index.rst |  1 +
 3 files changed, 64 insertions(+), 57 deletions(-)
 create mode 100644 Documentation/networking/defza.rst
 delete mode 100644 Documentation/networking/defza.txt

diff --git a/Documentation/networking/defza.rst b/Documentation/networking/defza.rst
new file mode 100644
index 000000000000..73c2f793ea26
--- /dev/null
+++ b/Documentation/networking/defza.rst
@@ -0,0 +1,63 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================================================
+Notes on the DEC FDDIcontroller 700 (DEFZA-xx) driver
+=====================================================
+
+:Version: v.1.1.4
+
+
+DEC FDDIcontroller 700 is DEC's first-generation TURBOchannel FDDI
+network card, designed in 1990 specifically for the DECstation 5000
+model 200 workstation.  The board is a single attachment station and
+it was manufactured in two variations, both of which are supported.
+
+First is the SAS MMF DEFZA-AA option, the original design implementing
+the standard MMF-PMD, however with a pair of ST connectors rather than
+the usual MIC connector.  The other one is the SAS ThinWire/STP DEFZA-CA
+option, denoted 700-C, with the network medium selectable by a switch
+between the DEC proprietary ThinWire-PMD using a BNC connector and the
+standard STP-PMD using a DE-9F connector.  This option can interface to
+a DECconcentrator 500 device and, in the case of the STP-PMD, also other
+FDDI equipment and was designed to make it easier to transition from
+existing IEEE 802.3 10BASE2 Ethernet and IEEE 802.5 Token Ring networks
+by providing means to reuse existing cabling.
+
+This driver handles any number of cards installed in a single system.
+They get fddi0, fddi1, etc. interface names assigned in the order of
+increasing TURBOchannel slot numbers.
+
+The board only supports DMA on the receive side.  Transmission involves
+the use of PIO.  As a result under a heavy transmission load there will
+be a significant impact on system performance.
+
+The board supports a 64-entry CAM for matching destination addresses.
+Two entries are preoccupied by the Directed Beacon and Ring Purger
+multicast addresses and the rest is used as a multicast filter.  An
+all-multi mode is also supported for LLC frames and it is used if
+requested explicitly or if the CAM overflows.  The promiscuous mode
+supports separate enables for LLC and SMT frames, but this driver
+doesn't support changing them individually.
+
+
+Known problems:
+
+None.
+
+
+To do:
+
+5. MAC address change.  The card does not support changing the Media
+   Access Controller's address registers but a similar effect can be
+   achieved by adding an alias to the CAM.  There is no way to disable
+   matching against the original address though.
+
+7. Queueing incoming/outgoing SMT frames in the driver if the SMT
+   receive/RMC transmit ring is full. (?)
+
+8. Retrieving/reporting FDDI/SNMP stats.
+
+
+Both success and failure reports are welcome.
+
+Maciej W. Rozycki  <macro@linux-mips.org>
diff --git a/Documentation/networking/defza.txt b/Documentation/networking/defza.txt
deleted file mode 100644
index 663e4a906751..000000000000
--- a/Documentation/networking/defza.txt
+++ /dev/null
@@ -1,57 +0,0 @@
-Notes on the DEC FDDIcontroller 700 (DEFZA-xx) driver v.1.1.4.
-
-
-DEC FDDIcontroller 700 is DEC's first-generation TURBOchannel FDDI
-network card, designed in 1990 specifically for the DECstation 5000
-model 200 workstation.  The board is a single attachment station and
-it was manufactured in two variations, both of which are supported.
-
-First is the SAS MMF DEFZA-AA option, the original design implementing
-the standard MMF-PMD, however with a pair of ST connectors rather than
-the usual MIC connector.  The other one is the SAS ThinWire/STP DEFZA-CA
-option, denoted 700-C, with the network medium selectable by a switch
-between the DEC proprietary ThinWire-PMD using a BNC connector and the
-standard STP-PMD using a DE-9F connector.  This option can interface to
-a DECconcentrator 500 device and, in the case of the STP-PMD, also other
-FDDI equipment and was designed to make it easier to transition from
-existing IEEE 802.3 10BASE2 Ethernet and IEEE 802.5 Token Ring networks
-by providing means to reuse existing cabling.
-
-This driver handles any number of cards installed in a single system.
-They get fddi0, fddi1, etc. interface names assigned in the order of
-increasing TURBOchannel slot numbers.
-
-The board only supports DMA on the receive side.  Transmission involves
-the use of PIO.  As a result under a heavy transmission load there will
-be a significant impact on system performance.
-
-The board supports a 64-entry CAM for matching destination addresses.
-Two entries are preoccupied by the Directed Beacon and Ring Purger
-multicast addresses and the rest is used as a multicast filter.  An
-all-multi mode is also supported for LLC frames and it is used if
-requested explicitly or if the CAM overflows.  The promiscuous mode
-supports separate enables for LLC and SMT frames, but this driver
-doesn't support changing them individually.
-
-
-Known problems:
-
-None.
-
-
-To do:
-
-5. MAC address change.  The card does not support changing the Media
-   Access Controller's address registers but a similar effect can be
-   achieved by adding an alias to the CAM.  There is no way to disable
-   matching against the original address though.
-
-7. Queueing incoming/outgoing SMT frames in the driver if the SMT
-   receive/RMC transmit ring is full. (?)
-
-8. Retrieving/reporting FDDI/SNMP stats.
-
-
-Both success and failure reports are welcome.
-
-Maciej W. Rozycki  <macro@linux-mips.org>
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index e17432492745..c893127004b9 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -51,6 +51,7 @@ Contents:
    dccp
    dctcp
    decnet
+   defza
 
 .. only::  subproject and html
 
-- 
cgit v1.2.3-59-g8ed1b


From 9dfe1361261be48c92fd7cb26909cbcd5d496220 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 28 Apr 2020 00:01:32 +0200
Subject: docs: networking: convert dns_resolver.txt to ReST

- add SPDX header;
- adjust titles and chapters, adding proper markups;
- comment out text-only TOC from html/pdf output;

- mark code blocks and literals as such;

- adjust identation, whitespaces and blank lines;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/dns_resolver.rst | 155 +++++++++++++++++++++++++++++
 Documentation/networking/dns_resolver.txt | 157 ------------------------------
 Documentation/networking/index.rst        |   1 +
 net/ceph/Kconfig                          |   2 +-
 net/dns_resolver/Kconfig                  |   2 +-
 net/dns_resolver/dns_key.c                |   2 +-
 net/dns_resolver/dns_query.c              |   2 +-
 7 files changed, 160 insertions(+), 161 deletions(-)
 create mode 100644 Documentation/networking/dns_resolver.rst
 delete mode 100644 Documentation/networking/dns_resolver.txt

diff --git a/Documentation/networking/dns_resolver.rst b/Documentation/networking/dns_resolver.rst
new file mode 100644
index 000000000000..add4d59a99a5
--- /dev/null
+++ b/Documentation/networking/dns_resolver.rst
@@ -0,0 +1,155 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================
+DNS Resolver Module
+===================
+
+.. Contents:
+
+ - Overview.
+ - Compilation.
+ - Setting up.
+ - Usage.
+ - Mechanism.
+ - Debugging.
+
+
+Overview
+========
+
+The DNS resolver module provides a way for kernel services to make DNS queries
+by way of requesting a key of key type dns_resolver.  These queries are
+upcalled to userspace through /sbin/request-key.
+
+These routines must be supported by userspace tools dns.upcall, cifs.upcall and
+request-key.  It is under development and does not yet provide the full feature
+set.  The features it does support include:
+
+ (*) Implements the dns_resolver key_type to contact userspace.
+
+It does not yet support the following AFS features:
+
+ (*) Dns query support for AFSDB resource record.
+
+This code is extracted from the CIFS filesystem.
+
+
+Compilation
+===========
+
+The module should be enabled by turning on the kernel configuration options::
+
+	CONFIG_DNS_RESOLVER	- tristate "DNS Resolver support"
+
+
+Setting up
+==========
+
+To set up this facility, the /etc/request-key.conf file must be altered so that
+/sbin/request-key can appropriately direct the upcalls.  For example, to handle
+basic dname to IPv4/IPv6 address resolution, the following line should be
+added::
+
+
+	#OP	TYPE		DESC	CO-INFO	PROGRAM ARG1 ARG2 ARG3 ...
+	#======	============	=======	=======	==========================
+	create	dns_resolver  	*	*	/usr/sbin/cifs.upcall %k
+
+To direct a query for query type 'foo', a line of the following should be added
+before the more general line given above as the first match is the one taken::
+
+	create	dns_resolver  	foo:*	*	/usr/sbin/dns.foo %k
+
+
+Usage
+=====
+
+To make use of this facility, one of the following functions that are
+implemented in the module can be called after doing::
+
+	#include <linux/dns_resolver.h>
+
+     ::
+
+	int dns_query(const char *type, const char *name, size_t namelen,
+		     const char *options, char **_result, time_t *_expiry);
+
+     This is the basic access function.  It looks for a cached DNS query and if
+     it doesn't find it, it upcalls to userspace to make a new DNS query, which
+     may then be cached.  The key description is constructed as a string of the
+     form::
+
+		[<type>:]<name>
+
+     where <type> optionally specifies the particular upcall program to invoke,
+     and thus the type of query to do, and <name> specifies the string to be
+     looked up.  The default query type is a straight hostname to IP address
+     set lookup.
+
+     The name parameter is not required to be a NUL-terminated string, and its
+     length should be given by the namelen argument.
+
+     The options parameter may be NULL or it may be a set of options
+     appropriate to the query type.
+
+     The return value is a string appropriate to the query type.  For instance,
+     for the default query type it is just a list of comma-separated IPv4 and
+     IPv6 addresses.  The caller must free the result.
+
+     The length of the result string is returned on success, and a negative
+     error code is returned otherwise.  -EKEYREJECTED will be returned if the
+     DNS lookup failed.
+
+     If _expiry is non-NULL, the expiry time (TTL) of the result will be
+     returned also.
+
+The kernel maintains an internal keyring in which it caches looked up keys.
+This can be cleared by any process that has the CAP_SYS_ADMIN capability by
+the use of KEYCTL_KEYRING_CLEAR on the keyring ID.
+
+
+Reading DNS Keys from Userspace
+===============================
+
+Keys of dns_resolver type can be read from userspace using keyctl_read() or
+"keyctl read/print/pipe".
+
+
+Mechanism
+=========
+
+The dnsresolver module registers a key type called "dns_resolver".  Keys of
+this type are used to transport and cache DNS lookup results from userspace.
+
+When dns_query() is invoked, it calls request_key() to search the local
+keyrings for a cached DNS result.  If that fails to find one, it upcalls to
+userspace to get a new result.
+
+Upcalls to userspace are made through the request_key() upcall vector, and are
+directed by means of configuration lines in /etc/request-key.conf that tell
+/sbin/request-key what program to run to instantiate the key.
+
+The upcall handler program is responsible for querying the DNS, processing the
+result into a form suitable for passing to the keyctl_instantiate_key()
+routine.  This then passes the data to dns_resolver_instantiate() which strips
+off and processes any options included in the data, and then attaches the
+remainder of the string to the key as its payload.
+
+The upcall handler program should set the expiry time on the key to that of the
+lowest TTL of all the records it has extracted a result from.  This means that
+the key will be discarded and recreated when the data it holds has expired.
+
+dns_query() returns a copy of the value attached to the key, or an error if
+that is indicated instead.
+
+See <file:Documentation/security/keys/request-key.rst> for further
+information about request-key function.
+
+
+Debugging
+=========
+
+Debugging messages can be turned on dynamically by writing a 1 into the
+following file::
+
+	/sys/module/dnsresolver/parameters/debug
diff --git a/Documentation/networking/dns_resolver.txt b/Documentation/networking/dns_resolver.txt
deleted file mode 100644
index eaa8f9a6fd5d..000000000000
--- a/Documentation/networking/dns_resolver.txt
+++ /dev/null
@@ -1,157 +0,0 @@
-			     ===================
-			     DNS Resolver Module
-			     ===================
-
-Contents:
-
- - Overview.
- - Compilation.
- - Setting up.
- - Usage.
- - Mechanism.
- - Debugging.
-
-
-========
-OVERVIEW
-========
-
-The DNS resolver module provides a way for kernel services to make DNS queries
-by way of requesting a key of key type dns_resolver.  These queries are
-upcalled to userspace through /sbin/request-key.
-
-These routines must be supported by userspace tools dns.upcall, cifs.upcall and
-request-key.  It is under development and does not yet provide the full feature
-set.  The features it does support include:
-
- (*) Implements the dns_resolver key_type to contact userspace.
-
-It does not yet support the following AFS features:
-
- (*) Dns query support for AFSDB resource record.
-
-This code is extracted from the CIFS filesystem.
-
-
-===========
-COMPILATION
-===========
-
-The module should be enabled by turning on the kernel configuration options:
-
-	CONFIG_DNS_RESOLVER	- tristate "DNS Resolver support"
-
-
-==========
-SETTING UP
-==========
-
-To set up this facility, the /etc/request-key.conf file must be altered so that
-/sbin/request-key can appropriately direct the upcalls.  For example, to handle
-basic dname to IPv4/IPv6 address resolution, the following line should be
-added:
-
-	#OP	TYPE		DESC	CO-INFO	PROGRAM ARG1 ARG2 ARG3 ...
-	#======	============	=======	=======	==========================
-	create	dns_resolver  	*	*	/usr/sbin/cifs.upcall %k
-
-To direct a query for query type 'foo', a line of the following should be added
-before the more general line given above as the first match is the one taken.
-
-	create	dns_resolver  	foo:*	*	/usr/sbin/dns.foo %k
-
-
-=====
-USAGE
-=====
-
-To make use of this facility, one of the following functions that are
-implemented in the module can be called after doing:
-
-	#include <linux/dns_resolver.h>
-
- (1) int dns_query(const char *type, const char *name, size_t namelen,
-		   const char *options, char **_result, time_t *_expiry);
-
-     This is the basic access function.  It looks for a cached DNS query and if
-     it doesn't find it, it upcalls to userspace to make a new DNS query, which
-     may then be cached.  The key description is constructed as a string of the
-     form:
-
-		[<type>:]<name>
-
-     where <type> optionally specifies the particular upcall program to invoke,
-     and thus the type of query to do, and <name> specifies the string to be
-     looked up.  The default query type is a straight hostname to IP address
-     set lookup.
-
-     The name parameter is not required to be a NUL-terminated string, and its
-     length should be given by the namelen argument.
-
-     The options parameter may be NULL or it may be a set of options
-     appropriate to the query type.
-
-     The return value is a string appropriate to the query type.  For instance,
-     for the default query type it is just a list of comma-separated IPv4 and
-     IPv6 addresses.  The caller must free the result.
-
-     The length of the result string is returned on success, and a negative
-     error code is returned otherwise.  -EKEYREJECTED will be returned if the
-     DNS lookup failed.
-
-     If _expiry is non-NULL, the expiry time (TTL) of the result will be
-     returned also.
-
-The kernel maintains an internal keyring in which it caches looked up keys.
-This can be cleared by any process that has the CAP_SYS_ADMIN capability by
-the use of KEYCTL_KEYRING_CLEAR on the keyring ID.
-
-
-===============================
-READING DNS KEYS FROM USERSPACE
-===============================
-
-Keys of dns_resolver type can be read from userspace using keyctl_read() or
-"keyctl read/print/pipe".
-
-
-=========
-MECHANISM
-=========
-
-The dnsresolver module registers a key type called "dns_resolver".  Keys of
-this type are used to transport and cache DNS lookup results from userspace.
-
-When dns_query() is invoked, it calls request_key() to search the local
-keyrings for a cached DNS result.  If that fails to find one, it upcalls to
-userspace to get a new result.
-
-Upcalls to userspace are made through the request_key() upcall vector, and are
-directed by means of configuration lines in /etc/request-key.conf that tell
-/sbin/request-key what program to run to instantiate the key.
-
-The upcall handler program is responsible for querying the DNS, processing the
-result into a form suitable for passing to the keyctl_instantiate_key()
-routine.  This then passes the data to dns_resolver_instantiate() which strips
-off and processes any options included in the data, and then attaches the
-remainder of the string to the key as its payload.
-
-The upcall handler program should set the expiry time on the key to that of the
-lowest TTL of all the records it has extracted a result from.  This means that
-the key will be discarded and recreated when the data it holds has expired.
-
-dns_query() returns a copy of the value attached to the key, or an error if
-that is indicated instead.
-
-See <file:Documentation/security/keys/request-key.rst> for further
-information about request-key function.
-
-
-=========
-DEBUGGING
-=========
-
-Debugging messages can be turned on dynamically by writing a 1 into the
-following file:
-
-        /sys/module/dnsresolver/parameters/debug
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index c893127004b9..55746038a7e9 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -52,6 +52,7 @@ Contents:
    dctcp
    decnet
    defza
+   dns_resolver
 
 .. only::  subproject and html
 
diff --git a/net/ceph/Kconfig b/net/ceph/Kconfig
index 2e8e6f904920..d7bec7adc267 100644
--- a/net/ceph/Kconfig
+++ b/net/ceph/Kconfig
@@ -39,6 +39,6 @@ config CEPH_LIB_USE_DNS_RESOLVER
 	  be resolved using the CONFIG_DNS_RESOLVER facility.
 
 	  For information on how to use CONFIG_DNS_RESOLVER consult
-	  Documentation/networking/dns_resolver.txt
+	  Documentation/networking/dns_resolver.rst
 
 	  If unsure, say N.
diff --git a/net/dns_resolver/Kconfig b/net/dns_resolver/Kconfig
index 0a1c2238b4bd..255df9b6e9e8 100644
--- a/net/dns_resolver/Kconfig
+++ b/net/dns_resolver/Kconfig
@@ -19,7 +19,7 @@ config DNS_RESOLVER
 	  SMB2 later.  DNS Resolver is supported by the userspace upcall
 	  helper "/sbin/dns.resolver" via /etc/request-key.conf.
 
-	  See <file:Documentation/networking/dns_resolver.txt> for further
+	  See <file:Documentation/networking/dns_resolver.rst> for further
 	  information.
 
 	  To compile this as a module, choose M here: the module will be called
diff --git a/net/dns_resolver/dns_key.c b/net/dns_resolver/dns_key.c
index ad53eb31d40f..3aced951d5ab 100644
--- a/net/dns_resolver/dns_key.c
+++ b/net/dns_resolver/dns_key.c
@@ -1,6 +1,6 @@
 /* Key type used to cache DNS lookups made by the kernel
  *
- * See Documentation/networking/dns_resolver.txt
+ * See Documentation/networking/dns_resolver.rst
  *
  *   Copyright (c) 2007 Igor Mammedov
  *   Author(s): Igor Mammedov (niallain@gmail.com)
diff --git a/net/dns_resolver/dns_query.c b/net/dns_resolver/dns_query.c
index cab4e0df924f..82b084cc1cc6 100644
--- a/net/dns_resolver/dns_query.c
+++ b/net/dns_resolver/dns_query.c
@@ -1,7 +1,7 @@
 /* Upcall routine, designed to work as a key type and working through
  * /sbin/request-key to contact userspace when handling DNS queries.
  *
- * See Documentation/networking/dns_resolver.txt
+ * See Documentation/networking/dns_resolver.rst
  *
  *   Copyright (c) 2007 Igor Mammedov
  *   Author(s): Igor Mammedov (niallain@gmail.com)
-- 
cgit v1.2.3-59-g8ed1b


From 28d23311ff35ac97ff20608f47c84c95d6389c33 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 28 Apr 2020 00:01:33 +0200
Subject: docs: networking: convert driver.txt to ReST

- add SPDX header;
- add a document title;
- mark code blocks and literals as such;
- adjust identation, whitespaces and blank lines;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/driver.rst | 97 +++++++++++++++++++++++++++++++++++++
 Documentation/networking/driver.txt | 93 -----------------------------------
 Documentation/networking/index.rst  |  1 +
 3 files changed, 98 insertions(+), 93 deletions(-)
 create mode 100644 Documentation/networking/driver.rst
 delete mode 100644 Documentation/networking/driver.txt

diff --git a/Documentation/networking/driver.rst b/Documentation/networking/driver.rst
new file mode 100644
index 000000000000..c8f59dbda46f
--- /dev/null
+++ b/Documentation/networking/driver.rst
@@ -0,0 +1,97 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================
+Softnet Driver Issues
+=====================
+
+Transmit path guidelines:
+
+1) The ndo_start_xmit method must not return NETDEV_TX_BUSY under
+   any normal circumstances.  It is considered a hard error unless
+   there is no way your device can tell ahead of time when it's
+   transmit function will become busy.
+
+   Instead it must maintain the queue properly.  For example,
+   for a driver implementing scatter-gather this means::
+
+	static netdev_tx_t drv_hard_start_xmit(struct sk_buff *skb,
+					       struct net_device *dev)
+	{
+		struct drv *dp = netdev_priv(dev);
+
+		lock_tx(dp);
+		...
+		/* This is a hard error log it. */
+		if (TX_BUFFS_AVAIL(dp) <= (skb_shinfo(skb)->nr_frags + 1)) {
+			netif_stop_queue(dev);
+			unlock_tx(dp);
+			printk(KERN_ERR PFX "%s: BUG! Tx Ring full when queue awake!\n",
+			       dev->name);
+			return NETDEV_TX_BUSY;
+		}
+
+		... queue packet to card ...
+		... update tx consumer index ...
+
+		if (TX_BUFFS_AVAIL(dp) <= (MAX_SKB_FRAGS + 1))
+			netif_stop_queue(dev);
+
+		...
+		unlock_tx(dp);
+		...
+		return NETDEV_TX_OK;
+	}
+
+   And then at the end of your TX reclamation event handling::
+
+	if (netif_queue_stopped(dp->dev) &&
+	    TX_BUFFS_AVAIL(dp) > (MAX_SKB_FRAGS + 1))
+		netif_wake_queue(dp->dev);
+
+   For a non-scatter-gather supporting card, the three tests simply become::
+
+		/* This is a hard error log it. */
+		if (TX_BUFFS_AVAIL(dp) <= 0)
+
+   and::
+
+		if (TX_BUFFS_AVAIL(dp) == 0)
+
+   and::
+
+	if (netif_queue_stopped(dp->dev) &&
+	    TX_BUFFS_AVAIL(dp) > 0)
+		netif_wake_queue(dp->dev);
+
+2) An ndo_start_xmit method must not modify the shared parts of a
+   cloned SKB.
+
+3) Do not forget that once you return NETDEV_TX_OK from your
+   ndo_start_xmit method, it is your driver's responsibility to free
+   up the SKB and in some finite amount of time.
+
+   For example, this means that it is not allowed for your TX
+   mitigation scheme to let TX packets "hang out" in the TX
+   ring unreclaimed forever if no new TX packets are sent.
+   This error can deadlock sockets waiting for send buffer room
+   to be freed up.
+
+   If you return NETDEV_TX_BUSY from the ndo_start_xmit method, you
+   must not keep any reference to that SKB and you must not attempt
+   to free it up.
+
+Probing guidelines:
+
+1) Any hardware layer address you obtain for your device should
+   be verified.  For example, for ethernet check it with
+   linux/etherdevice.h:is_valid_ether_addr()
+
+Close/stop guidelines:
+
+1) After the ndo_stop routine has been called, the hardware must
+   not receive or transmit any data.  All in flight packets must
+   be aborted. If necessary, poll or wait for completion of
+   any reset commands.
+
+2) The ndo_stop routine will be called by unregister_netdevice
+   if device is still UP.
diff --git a/Documentation/networking/driver.txt b/Documentation/networking/driver.txt
deleted file mode 100644
index da59e2884130..000000000000
--- a/Documentation/networking/driver.txt
+++ /dev/null
@@ -1,93 +0,0 @@
-Document about softnet driver issues
-
-Transmit path guidelines:
-
-1) The ndo_start_xmit method must not return NETDEV_TX_BUSY under
-   any normal circumstances.  It is considered a hard error unless
-   there is no way your device can tell ahead of time when it's
-   transmit function will become busy.
-
-   Instead it must maintain the queue properly.  For example,
-   for a driver implementing scatter-gather this means:
-
-	static netdev_tx_t drv_hard_start_xmit(struct sk_buff *skb,
-					       struct net_device *dev)
-	{
-		struct drv *dp = netdev_priv(dev);
-
-		lock_tx(dp);
-		...
-		/* This is a hard error log it. */
-		if (TX_BUFFS_AVAIL(dp) <= (skb_shinfo(skb)->nr_frags + 1)) {
-			netif_stop_queue(dev);
-			unlock_tx(dp);
-			printk(KERN_ERR PFX "%s: BUG! Tx Ring full when queue awake!\n",
-			       dev->name);
-			return NETDEV_TX_BUSY;
-		}
-
-		... queue packet to card ...
-		... update tx consumer index ...
-
-		if (TX_BUFFS_AVAIL(dp) <= (MAX_SKB_FRAGS + 1))
-			netif_stop_queue(dev);
-
-		...
-		unlock_tx(dp);
-		...
-		return NETDEV_TX_OK;
-	}
-
-   And then at the end of your TX reclamation event handling:
-
-	if (netif_queue_stopped(dp->dev) &&
-            TX_BUFFS_AVAIL(dp) > (MAX_SKB_FRAGS + 1))
-		netif_wake_queue(dp->dev);
-
-   For a non-scatter-gather supporting card, the three tests simply become:
-
-		/* This is a hard error log it. */
-		if (TX_BUFFS_AVAIL(dp) <= 0)
-
-   and:
-
-		if (TX_BUFFS_AVAIL(dp) == 0)
-
-   and:
-
-	if (netif_queue_stopped(dp->dev) &&
-            TX_BUFFS_AVAIL(dp) > 0)
-		netif_wake_queue(dp->dev);
-
-2) An ndo_start_xmit method must not modify the shared parts of a
-   cloned SKB.
-
-3) Do not forget that once you return NETDEV_TX_OK from your
-   ndo_start_xmit method, it is your driver's responsibility to free
-   up the SKB and in some finite amount of time.
-
-   For example, this means that it is not allowed for your TX
-   mitigation scheme to let TX packets "hang out" in the TX
-   ring unreclaimed forever if no new TX packets are sent.
-   This error can deadlock sockets waiting for send buffer room
-   to be freed up.
-
-   If you return NETDEV_TX_BUSY from the ndo_start_xmit method, you
-   must not keep any reference to that SKB and you must not attempt
-   to free it up.
-
-Probing guidelines:
-
-1) Any hardware layer address you obtain for your device should
-   be verified.  For example, for ethernet check it with
-   linux/etherdevice.h:is_valid_ether_addr()
-
-Close/stop guidelines:
-
-1) After the ndo_stop routine has been called, the hardware must
-   not receive or transmit any data.  All in flight packets must
-   be aborted. If necessary, poll or wait for completion of 
-   any reset commands.
-
-2) The ndo_stop routine will be called by unregister_netdevice
-   if device is still UP.
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 55746038a7e9..313f66900bce 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -53,6 +53,7 @@ Contents:
    decnet
    defza
    dns_resolver
+   driver
 
 .. only::  subproject and html
 
-- 
cgit v1.2.3-59-g8ed1b


From 06df65723b69a3d4691737503654400c35f9ca5a Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 28 Apr 2020 00:01:34 +0200
Subject: docs: networking: convert eql.txt to ReST

- add SPDX header;
- add a document title;
- adjust titles and chapters, adding proper markups;
- mark code blocks and literals as such;
- mark tables as such;
- adjust identation, whitespaces and blank lines;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/eql.rst   | 373 ++++++++++++++++++++++++++
 Documentation/networking/eql.txt   | 528 -------------------------------------
 Documentation/networking/index.rst |   1 +
 drivers/net/Kconfig                |   2 +-
 4 files changed, 375 insertions(+), 529 deletions(-)
 create mode 100644 Documentation/networking/eql.rst
 delete mode 100644 Documentation/networking/eql.txt

diff --git a/Documentation/networking/eql.rst b/Documentation/networking/eql.rst
new file mode 100644
index 000000000000..a628c4c81166
--- /dev/null
+++ b/Documentation/networking/eql.rst
@@ -0,0 +1,373 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========================================
+EQL Driver: Serial IP Load Balancing HOWTO
+==========================================
+
+  Simon "Guru Aleph-Null" Janes, simon@ncm.com
+
+  v1.1, February 27, 1995
+
+  This is the manual for the EQL device driver. EQL is a software device
+  that lets you load-balance IP serial links (SLIP or uncompressed PPP)
+  to increase your bandwidth. It will not reduce your latency (i.e. ping
+  times) except in the case where you already have lots of traffic on
+  your link, in which it will help them out. This driver has been tested
+  with the 1.1.75 kernel, and is known to have patched cleanly with
+  1.1.86.  Some testing with 1.1.92 has been done with the v1.1 patch
+  which was only created to patch cleanly in the very latest kernel
+  source trees. (Yes, it worked fine.)
+
+1. Introduction
+===============
+
+  Which is worse? A huge fee for a 56K leased line or two phone lines?
+  It's probably the former.  If you find yourself craving more bandwidth,
+  and have a ISP that is flexible, it is now possible to bind modems
+  together to work as one point-to-point link to increase your
+  bandwidth.  All without having to have a special black box on either
+  side.
+
+
+  The eql driver has only been tested with the Livingston PortMaster-2e
+  terminal server. I do not know if other terminal servers support load-
+  balancing, but I do know that the PortMaster does it, and does it
+  almost as well as the eql driver seems to do it (-- Unfortunately, in
+  my testing so far, the Livingston PortMaster 2e's load-balancing is a
+  good 1 to 2 KB/s slower than the test machine working with a 28.8 Kbps
+  and 14.4 Kbps connection.  However, I am not sure that it really is
+  the PortMaster, or if it's Linux's TCP drivers. I'm told that Linux's
+  TCP implementation is pretty fast though.--)
+
+
+  I suggest to ISPs out there that it would probably be fair to charge
+  a load-balancing client 75% of the cost of the second line and 50% of
+  the cost of the third line etc...
+
+
+  Hey, we can all dream you know...
+
+
+2. Kernel Configuration
+=======================
+
+  Here I describe the general steps of getting a kernel up and working
+  with the eql driver.	From patching, building, to installing.
+
+
+2.1. Patching The Kernel
+------------------------
+
+  If you do not have or cannot get a copy of the kernel with the eql
+  driver folded into it, get your copy of the driver from
+  ftp://slaughter.ncm.com/pub/Linux/LOAD_BALANCING/eql-1.1.tar.gz.
+  Unpack this archive someplace obvious like /usr/local/src/.  It will
+  create the following files::
+
+       -rw-r--r-- guru/ncm	198 Jan 19 18:53 1995 eql-1.1/NO-WARRANTY
+       -rw-r--r-- guru/ncm	30620 Feb 27 21:40 1995 eql-1.1/eql-1.1.patch
+       -rwxr-xr-x guru/ncm	16111 Jan 12 22:29 1995 eql-1.1/eql_enslave
+       -rw-r--r-- guru/ncm	2195 Jan 10 21:48 1995 eql-1.1/eql_enslave.c
+
+  Unpack a recent kernel (something after 1.1.92) someplace convenient
+  like say /usr/src/linux-1.1.92.eql. Use symbolic links to point
+  /usr/src/linux to this development directory.
+
+
+  Apply the patch by running the commands::
+
+       cd /usr/src
+       patch </usr/local/src/eql-1.1/eql-1.1.patch
+
+
+2.2. Building The Kernel
+------------------------
+
+  After patching the kernel, run make config and configure the kernel
+  for your hardware.
+
+
+  After configuration, make and install according to your habit.
+
+
+3. Network Configuration
+========================
+
+  So far, I have only used the eql device with the DSLIP SLIP connection
+  manager by Matt Dillon (-- "The man who sold his soul to code so much
+  so quickly."--) .  How you configure it for other "connection"
+  managers is up to you.  Most other connection managers that I've seen
+  don't do a very good job when it comes to handling more than one
+  connection.
+
+
+3.1. /etc/rc.d/rc.inet1
+-----------------------
+
+  In rc.inet1, ifconfig the eql device to the IP address you usually use
+  for your machine, and the MTU you prefer for your SLIP lines.	One
+  could argue that MTU should be roughly half the usual size for two
+  modems, one-third for three, one-fourth for four, etc...  But going
+  too far below 296 is probably overkill. Here is an example ifconfig
+  command that sets up the eql device::
+
+       ifconfig eql 198.67.33.239 mtu 1006
+
+  Once the eql device is up and running, add a static default route to
+  it in the routing table using the cool new route syntax that makes
+  life so much easier::
+
+       route add default eql
+
+
+3.2. Enslaving Devices By Hand
+------------------------------
+
+  Enslaving devices by hand requires two utility programs: eql_enslave
+  and eql_emancipate (-- eql_emancipate hasn't been written because when
+  an enslaved device "dies", it is automatically taken out of the queue.
+  I haven't found a good reason to write it yet... other than for
+  completeness, but that isn't a good motivator is it?--)
+
+
+  The syntax for enslaving a device is "eql_enslave <master-name>
+  <slave-name> <estimated-bps>".  Here are some example enslavings::
+
+       eql_enslave eql sl0 28800
+       eql_enslave eql ppp0 14400
+       eql_enslave eql sl1 57600
+
+  When you want to free a device from its life of slavery, you can
+  either down the device with ifconfig (eql will automatically bury the
+  dead slave and remove it from its queue) or use eql_emancipate to free
+  it. (-- Or just ifconfig it down, and the eql driver will take it out
+  for you.--)::
+
+       eql_emancipate eql sl0
+       eql_emancipate eql ppp0
+       eql_emancipate eql sl1
+
+
+3.3. DSLIP Configuration for the eql Device
+-------------------------------------------
+
+  The general idea is to bring up and keep up as many SLIP connections
+  as you need, automatically.
+
+
+3.3.1.  /etc/slip/runslip.conf
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+  Here is an example runslip.conf::
+
+	  name		sl-line-1
+	  enabled
+	  baud		38400
+	  mtu		576
+	  ducmd		-e /etc/slip/dialout/cua2-288.xp -t 9
+	  command	 eql_enslave eql $interface 28800
+	  address	 198.67.33.239
+	  line		/dev/cua2
+
+	  name		sl-line-2
+	  enabled
+	  baud		38400
+	  mtu		576
+	  ducmd		-e /etc/slip/dialout/cua3-288.xp -t 9
+	  command	 eql_enslave eql $interface 28800
+	  address	 198.67.33.239
+	  line		/dev/cua3
+
+
+3.4. Using PPP and the eql Device
+---------------------------------
+
+  I have not yet done any load-balancing testing for PPP devices, mainly
+  because I don't have a PPP-connection manager like SLIP has with
+  DSLIP. I did find a good tip from LinuxNET:Billy for PPP performance:
+  make sure you have asyncmap set to something so that control
+  characters are not escaped.
+
+
+  I tried to fix up a PPP script/system for redialing lost PPP
+  connections for use with the eql driver the weekend of Feb 25-26 '95
+  (Hereafter known as the 8-hour PPP Hate Festival).  Perhaps later this
+  year.
+
+
+4. About the Slave Scheduler Algorithm
+======================================
+
+  The slave scheduler probably could be replaced with a dozen other
+  things and push traffic much faster.	The formula in the current set
+  up of the driver was tuned to handle slaves with wildly different
+  bits-per-second "priorities".
+
+
+  All testing I have done was with two 28.8 V.FC modems, one connecting
+  at 28800 bps or slower, and the other connecting at 14400 bps all the
+  time.
+
+
+  One version of the scheduler was able to push 5.3 K/s through the
+  28800 and 14400 connections, but when the priorities on the links were
+  very wide apart (57600 vs. 14400) the "faster" modem received all
+  traffic and the "slower" modem starved.
+
+
+5. Testers' Reports
+===================
+
+  Some people have experimented with the eql device with newer
+  kernels (than 1.1.75).  I have since updated the driver to patch
+  cleanly in newer kernels because of the removal of the old "slave-
+  balancing" driver config option.
+
+
+  -  icee from LinuxNET patched 1.1.86 without any rejects and was able
+     to boot the kernel and enslave a couple of ISDN PPP links.
+
+5.1. Randolph Bentson's Test Report
+-----------------------------------
+
+  ::
+
+    From bentson@grieg.seaslug.org Wed Feb  8 19:08:09 1995
+    Date: Tue, 7 Feb 95 22:57 PST
+    From: Randolph Bentson <bentson@grieg.seaslug.org>
+    To: guru@ncm.com
+    Subject: EQL driver tests
+
+
+    I have been checking out your eql driver.  (Nice work, that!)
+    Although you may already done this performance testing, here
+    are some data I've discovered.
+
+    Randolph Bentson
+    bentson@grieg.seaslug.org
+
+------------------------------------------------------------------
+
+
+  A pseudo-device driver, EQL, written by Simon Janes, can be used
+  to bundle multiple SLIP connections into what appears to be a
+  single connection.  This allows one to improve dial-up network
+  connectivity gradually, without having to buy expensive DSU/CSU
+  hardware and services.
+
+  I have done some testing of this software, with two goals in
+  mind: first, to ensure it actually works as described and
+  second, as a method of exercising my device driver.
+
+  The following performance measurements were derived from a set
+  of SLIP connections run between two Linux systems (1.1.84) using
+  a 486DX2/66 with a Cyclom-8Ys and a 486SLC/40 with a Cyclom-16Y.
+  (Ports 0,1,2,3 were used.  A later configuration will distribute
+  port selection across the different Cirrus chips on the boards.)
+  Once a link was established, I timed a binary ftp transfer of
+  289284 bytes of data.	If there were no overhead (packet headers,
+  inter-character and inter-packet delays, etc.) the transfers
+  would take the following times::
+
+      bits/sec	seconds
+      345600	8.3
+      234600	12.3
+      172800	16.7
+      153600	18.8
+      76800	37.6
+      57600	50.2
+      38400	75.3
+      28800	100.4
+      19200	150.6
+      9600	301.3
+
+  A single line running at the lower speeds and with large packets
+  comes to within 2% of this.  Performance is limited for the higher
+  speeds (as predicted by the Cirrus databook) to an aggregate of
+  about 160 kbits/sec.	The next round of testing will distribute
+  the load across two or more Cirrus chips.
+
+  The good news is that one gets nearly the full advantage of the
+  second, third, and fourth line's bandwidth.  (The bad news is
+  that the connection establishment seemed fragile for the higher
+  speeds.  Once established, the connection seemed robust enough.)
+
+  ======  ========	===  ========   ======= ======= ===
+  #lines  speed		mtu  seconds	theory  actual  %of
+	  kbit/sec	     duration	speed	speed	max
+  ======  ========	===  ========   ======= ======= ===
+  3	  115200	900	_	345600
+  3	  115200	400	18.1	345600  159825  46
+  2	  115200	900	_	230400
+  2	  115200	600	18.1	230400  159825  69
+  2	  115200	400	19.3	230400  149888  65
+  4	  57600		900	_	234600
+  4	  57600		600	_	234600
+  4	  57600		400	_	234600
+  3	  57600		600	20.9	172800  138413  80
+  3	  57600		900	21.2	172800  136455  78
+  3	  115200	600	21.7	345600  133311  38
+  3	  57600		400	22.5	172800  128571  74
+  4	  38400		900	25.2	153600  114795  74
+  4	  38400		600	26.4	153600  109577  71
+  4	  38400		400	27.3	153600  105965  68
+  2	  57600		900	29.1	115200  99410.3 86
+  1	  115200	900	30.7	115200  94229.3 81
+  2	  57600		600	30.2	115200  95789.4 83
+  3	  38400		900	30.3	115200  95473.3 82
+  3	  38400		600	31.2	115200  92719.2 80
+  1	  115200	600	31.3	115200  92423	80
+  2	  57600		400	32.3	115200  89561.6 77
+  1	  115200	400	32.8	115200  88196.3 76
+  3	  38400		400	33.5	115200  86353.4 74
+  2	  38400		900	43.7	76800	66197.7 86
+  2	  38400		600	44	76800	65746.4 85
+  2	  38400		400	47.2	76800	61289	79
+  4	  19200		900	50.8	76800	56945.7 74
+  4	  19200		400	53.2	76800	54376.7 70
+  4	  19200		600	53.7	76800	53870.4 70
+  1	  57600		900	54.6	57600	52982.4 91
+  1	  57600		600	56.2	57600	51474	89
+  3	  19200		900	60.5	57600	47815.5 83
+  1	  57600		400	60.2	57600	48053.8 83
+  3	  19200		600	62	57600	46658.7 81
+  3	  19200		400	64.7	57600	44711.6 77
+  1	  38400		900	79.4	38400	36433.8 94
+  1	  38400		600	82.4	38400	35107.3 91
+  2	  19200		900	84.4	38400	34275.4 89
+  1	  38400		400	86.8	38400	33327.6 86
+  2	  19200		600	87.6	38400	33023.3 85
+  2	  19200		400	91.2	38400	31719.7 82
+  4	  9600		900	94.7	38400	30547.4 79
+  4	  9600		400	106	38400	27290.9 71
+  4	  9600		600	110	38400	26298.5 68
+  3	  9600		900	118	28800	24515.6 85
+  3	  9600		600	120	28800	24107	83
+  3	  9600		400	131	28800	22082.7 76
+  1	  19200		900	155	19200	18663.5 97
+  1	  19200		600	161	19200	17968	93
+  1	  19200		400	170	19200	17016.7 88
+  2	  9600		600	176	19200	16436.6 85
+  2	  9600		900	180	19200	16071.3 83
+  2	  9600		400	181	19200	15982.5 83
+  1	  9600		900	305	9600	9484.72 98
+  1	  9600		600	314	9600	9212.87 95
+  1	  9600		400	332	9600	8713.37 90
+  ======  ========	===  ========   ======= ======= ===
+
+5.2. Anthony Healy's Report
+---------------------------
+
+  ::
+
+    Date: Mon, 13 Feb 1995 16:17:29 +1100 (EST)
+    From: Antony Healey <ahealey@st.nepean.uws.edu.au>
+    To: Simon Janes <guru@ncm.com>
+    Subject: Re: Load Balancing
+
+    Hi Simon,
+	  I've installed your patch and it works great. I have trialed
+	  it over twin SL/IP lines, just over null modems, but I was
+	  able to data at over 48Kb/s [ISDN link -Simon]. I managed a
+	  transfer of up to 7.5 Kbyte/s on one go, but averaged around
+	  6.4 Kbyte/s, which I think is pretty cool.  :)
diff --git a/Documentation/networking/eql.txt b/Documentation/networking/eql.txt
deleted file mode 100644
index 0f1550150f05..000000000000
--- a/Documentation/networking/eql.txt
+++ /dev/null
@@ -1,528 +0,0 @@
-  EQL Driver: Serial IP Load Balancing HOWTO
-  Simon "Guru Aleph-Null" Janes, simon@ncm.com
-  v1.1, February 27, 1995
-
-  This is the manual for the EQL device driver. EQL is a software device
-  that lets you load-balance IP serial links (SLIP or uncompressed PPP)
-  to increase your bandwidth. It will not reduce your latency (i.e. ping
-  times) except in the case where you already have lots of traffic on
-  your link, in which it will help them out. This driver has been tested
-  with the 1.1.75 kernel, and is known to have patched cleanly with
-  1.1.86.  Some testing with 1.1.92 has been done with the v1.1 patch
-  which was only created to patch cleanly in the very latest kernel
-  source trees. (Yes, it worked fine.)
-
-  1.  Introduction
-
-  Which is worse? A huge fee for a 56K leased line or two phone lines?
-  It's probably the former.  If you find yourself craving more bandwidth,
-  and have a ISP that is flexible, it is now possible to bind modems
-  together to work as one point-to-point link to increase your
-  bandwidth.  All without having to have a special black box on either
-  side.
-
-
-  The eql driver has only been tested with the Livingston PortMaster-2e
-  terminal server. I do not know if other terminal servers support load-
-  balancing, but I do know that the PortMaster does it, and does it
-  almost as well as the eql driver seems to do it (-- Unfortunately, in
-  my testing so far, the Livingston PortMaster 2e's load-balancing is a
-  good 1 to 2 KB/s slower than the test machine working with a 28.8 Kbps
-  and 14.4 Kbps connection.  However, I am not sure that it really is
-  the PortMaster, or if it's Linux's TCP drivers. I'm told that Linux's
-  TCP implementation is pretty fast though.--)
-
-
-  I suggest to ISPs out there that it would probably be fair to charge
-  a load-balancing client 75% of the cost of the second line and 50% of
-  the cost of the third line etc...
-
-
-  Hey, we can all dream you know...
-
-
-  2.  Kernel Configuration
-
-  Here I describe the general steps of getting a kernel up and working
-  with the eql driver.	From patching, building, to installing.
-
-
-  2.1.	Patching The Kernel
-
-  If you do not have or cannot get a copy of the kernel with the eql
-  driver folded into it, get your copy of the driver from
-  ftp://slaughter.ncm.com/pub/Linux/LOAD_BALANCING/eql-1.1.tar.gz.
-  Unpack this archive someplace obvious like /usr/local/src/.  It will
-  create the following files:
-
-
-
-       ______________________________________________________________________
-       -rw-r--r-- guru/ncm	198 Jan 19 18:53 1995 eql-1.1/NO-WARRANTY
-       -rw-r--r-- guru/ncm	30620 Feb 27 21:40 1995 eql-1.1/eql-1.1.patch
-       -rwxr-xr-x guru/ncm	16111 Jan 12 22:29 1995 eql-1.1/eql_enslave
-       -rw-r--r-- guru/ncm	2195 Jan 10 21:48 1995 eql-1.1/eql_enslave.c
-       ______________________________________________________________________
-
-  Unpack a recent kernel (something after 1.1.92) someplace convenient
-  like say /usr/src/linux-1.1.92.eql. Use symbolic links to point
-  /usr/src/linux to this development directory.
-
-
-  Apply the patch by running the commands:
-
-
-       ______________________________________________________________________
-       cd /usr/src
-       patch </usr/local/src/eql-1.1/eql-1.1.patch
-       ______________________________________________________________________
-
-
-
-
-
-  2.2.	Building The Kernel
-
-  After patching the kernel, run make config and configure the kernel
-  for your hardware.
-
-
-  After configuration, make and install according to your habit.
-
-
-  3.  Network Configuration
-
-  So far, I have only used the eql device with the DSLIP SLIP connection
-  manager by Matt Dillon (-- "The man who sold his soul to code so much
-  so quickly."--) .  How you configure it for other "connection"
-  managers is up to you.  Most other connection managers that I've seen
-  don't do a very good job when it comes to handling more than one
-  connection.
-
-
-  3.1.	/etc/rc.d/rc.inet1
-
-  In rc.inet1, ifconfig the eql device to the IP address you usually use
-  for your machine, and the MTU you prefer for your SLIP lines.	One
-  could argue that MTU should be roughly half the usual size for two
-  modems, one-third for three, one-fourth for four, etc...  But going
-  too far below 296 is probably overkill. Here is an example ifconfig
-  command that sets up the eql device:
-
-
-
-       ______________________________________________________________________
-       ifconfig eql 198.67.33.239 mtu 1006
-       ______________________________________________________________________
-
-
-
-
-
-  Once the eql device is up and running, add a static default route to
-  it in the routing table using the cool new route syntax that makes
-  life so much easier:
-
-
-
-       ______________________________________________________________________
-       route add default eql
-       ______________________________________________________________________
-
-
-  3.2.	Enslaving Devices By Hand
-
-  Enslaving devices by hand requires two utility programs: eql_enslave
-  and eql_emancipate (-- eql_emancipate hasn't been written because when
-  an enslaved device "dies", it is automatically taken out of the queue.
-  I haven't found a good reason to write it yet... other than for
-  completeness, but that isn't a good motivator is it?--)
-
-
-  The syntax for enslaving a device is "eql_enslave <master-name>
-  <slave-name> <estimated-bps>".  Here are some example enslavings:
-
-
-
-       ______________________________________________________________________
-       eql_enslave eql sl0 28800
-       eql_enslave eql ppp0 14400
-       eql_enslave eql sl1 57600
-       ______________________________________________________________________
-
-
-
-
-
-  When you want to free a device from its life of slavery, you can
-  either down the device with ifconfig (eql will automatically bury the
-  dead slave and remove it from its queue) or use eql_emancipate to free
-  it. (-- Or just ifconfig it down, and the eql driver will take it out
-  for you.--)
-
-
-
-       ______________________________________________________________________
-       eql_emancipate eql sl0
-       eql_emancipate eql ppp0
-       eql_emancipate eql sl1
-       ______________________________________________________________________
-
-
-
-
-
-  3.3.	DSLIP Configuration for the eql Device
-
-  The general idea is to bring up and keep up as many SLIP connections
-  as you need, automatically.
-
-
-  3.3.1.  /etc/slip/runslip.conf
-
-  Here is an example runslip.conf:
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-  ______________________________________________________________________
-  name		sl-line-1
-  enabled
-  baud		38400
-  mtu		576
-  ducmd		-e /etc/slip/dialout/cua2-288.xp -t 9
-  command	 eql_enslave eql $interface 28800
-  address	 198.67.33.239
-  line		/dev/cua2
-
-  name		sl-line-2
-  enabled
-  baud		38400
-  mtu		576
-  ducmd		-e /etc/slip/dialout/cua3-288.xp -t 9
-  command	 eql_enslave eql $interface 28800
-  address	 198.67.33.239
-  line		/dev/cua3
-  ______________________________________________________________________
-
-
-
-
-
-  3.4.	Using PPP and the eql Device
-
-  I have not yet done any load-balancing testing for PPP devices, mainly
-  because I don't have a PPP-connection manager like SLIP has with
-  DSLIP. I did find a good tip from LinuxNET:Billy for PPP performance:
-  make sure you have asyncmap set to something so that control
-  characters are not escaped.
-
-
-  I tried to fix up a PPP script/system for redialing lost PPP
-  connections for use with the eql driver the weekend of Feb 25-26 '95
-  (Hereafter known as the 8-hour PPP Hate Festival).  Perhaps later this
-  year.
-
-
-  4.  About the Slave Scheduler Algorithm
-
-  The slave scheduler probably could be replaced with a dozen other
-  things and push traffic much faster.	The formula in the current set
-  up of the driver was tuned to handle slaves with wildly different
-  bits-per-second "priorities".
-
-
-  All testing I have done was with two 28.8 V.FC modems, one connecting
-  at 28800 bps or slower, and the other connecting at 14400 bps all the
-  time.
-
-
-  One version of the scheduler was able to push 5.3 K/s through the
-  28800 and 14400 connections, but when the priorities on the links were
-  very wide apart (57600 vs. 14400) the "faster" modem received all
-  traffic and the "slower" modem starved.
-
-
-  5.  Testers' Reports
-
-  Some people have experimented with the eql device with newer
-  kernels (than 1.1.75).  I have since updated the driver to patch
-  cleanly in newer kernels because of the removal of the old "slave-
-  balancing" driver config option.
-
-
-  o  icee from LinuxNET patched 1.1.86 without any rejects and was able
-     to boot the kernel and enslave a couple of ISDN PPP links.
-
-  5.1.	Randolph Bentson's Test Report
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-  From bentson@grieg.seaslug.org Wed Feb  8 19:08:09 1995
-  Date: Tue, 7 Feb 95 22:57 PST
-  From: Randolph Bentson <bentson@grieg.seaslug.org>
-  To: guru@ncm.com
-  Subject: EQL driver tests
-
-
-  I have been checking out your eql driver.  (Nice work, that!)
-  Although you may already done this performance testing, here
-  are some data I've discovered.
-
-  Randolph Bentson
-  bentson@grieg.seaslug.org
-
-  ---------------------------------------------------------
-
-
-  A pseudo-device driver, EQL, written by Simon Janes, can be used
-  to bundle multiple SLIP connections into what appears to be a
-  single connection.  This allows one to improve dial-up network
-  connectivity gradually, without having to buy expensive DSU/CSU
-  hardware and services.
-
-  I have done some testing of this software, with two goals in
-  mind: first, to ensure it actually works as described and
-  second, as a method of exercising my device driver.
-
-  The following performance measurements were derived from a set
-  of SLIP connections run between two Linux systems (1.1.84) using
-  a 486DX2/66 with a Cyclom-8Ys and a 486SLC/40 with a Cyclom-16Y.
-  (Ports 0,1,2,3 were used.  A later configuration will distribute
-  port selection across the different Cirrus chips on the boards.)
-  Once a link was established, I timed a binary ftp transfer of
-  289284 bytes of data.	If there were no overhead (packet headers,
-  inter-character and inter-packet delays, etc.) the transfers
-  would take the following times:
-
-      bits/sec	seconds
-      345600	8.3
-      234600	12.3
-      172800	16.7
-      153600	18.8
-      76800	37.6
-      57600	50.2
-      38400	75.3
-      28800	100.4
-      19200	150.6
-      9600	301.3
-
-  A single line running at the lower speeds and with large packets
-  comes to within 2% of this.  Performance is limited for the higher
-  speeds (as predicted by the Cirrus databook) to an aggregate of
-  about 160 kbits/sec.	The next round of testing will distribute
-  the load across two or more Cirrus chips.
-
-  The good news is that one gets nearly the full advantage of the
-  second, third, and fourth line's bandwidth.  (The bad news is
-  that the connection establishment seemed fragile for the higher
-  speeds.  Once established, the connection seemed robust enough.)
-
-  #lines  speed	mtu  seconds	theory  actual  %of
-	 kbit/sec      duration	speed	speed	max
-  3	115200  900	_	345600
-  3	115200  400	18.1	345600  159825  46
-  2	115200  900	_	230400
-  2	115200  600	18.1	230400  159825  69
-  2	115200  400	19.3	230400  149888  65
-  4	57600	900	_	234600
-  4	57600	600	_	234600
-  4	57600	400	_	234600
-  3	57600	600	20.9	172800  138413  80
-  3	57600	900	21.2	172800  136455  78
-  3	115200  600	21.7	345600  133311  38
-  3	57600	400	22.5	172800  128571  74
-  4	38400	900	25.2	153600  114795  74
-  4	38400	600	26.4	153600  109577  71
-  4	38400	400	27.3	153600  105965  68
-  2	57600	900	29.1	115200  99410.3 86
-  1	115200  900	30.7	115200  94229.3 81
-  2	57600	600	30.2	115200  95789.4 83
-  3	38400	900	30.3	115200  95473.3 82
-  3	38400	600	31.2	115200  92719.2 80
-  1	115200  600	31.3	115200  92423	80
-  2	57600	400	32.3	115200  89561.6 77
-  1	115200  400	32.8	115200  88196.3 76
-  3	38400	400	33.5	115200  86353.4 74
-  2	38400	900	43.7	76800	66197.7 86
-  2	38400	600	44	76800	65746.4 85
-  2	38400	400	47.2	76800	61289	79
-  4	19200	900	50.8	76800	56945.7 74
-  4	19200	400	53.2	76800	54376.7 70
-  4	19200	600	53.7	76800	53870.4 70
-  1	57600	900	54.6	57600	52982.4 91
-  1	57600	600	56.2	57600	51474	89
-  3	19200	900	60.5	57600	47815.5 83
-  1	57600	400	60.2	57600	48053.8 83
-  3	19200	600	62	57600	46658.7 81
-  3	19200	400	64.7	57600	44711.6 77
-  1	38400	900	79.4	38400	36433.8 94
-  1	38400	600	82.4	38400	35107.3 91
-  2	19200	900	84.4	38400	34275.4 89
-  1	38400	400	86.8	38400	33327.6 86
-  2	19200	600	87.6	38400	33023.3 85
-  2	19200	400	91.2	38400	31719.7 82
-  4	9600	900	94.7	38400	30547.4 79
-  4	9600	400	106	38400	27290.9 71
-  4	9600	600	110	38400	26298.5 68
-  3	9600	900	118	28800	24515.6 85
-  3	9600	600	120	28800	24107	83
-  3	9600	400	131	28800	22082.7 76
-  1	19200	900	155	19200	18663.5 97
-  1	19200	600	161	19200	17968	93
-  1	19200	400	170	19200	17016.7 88
-  2	9600	600	176	19200	16436.6 85
-  2	9600	900	180	19200	16071.3 83
-  2	9600	400	181	19200	15982.5 83
-  1	9600	900	305	9600	9484.72 98
-  1	9600	600	314	9600	9212.87 95
-  1	9600	400	332	9600	8713.37 90
-
-
-
-
-
-  5.2.	Anthony Healy's Report
-
-
-
-
-
-
-
-  Date: Mon, 13 Feb 1995 16:17:29 +1100 (EST)
-  From: Antony Healey <ahealey@st.nepean.uws.edu.au>
-  To: Simon Janes <guru@ncm.com>
-  Subject: Re: Load Balancing
-
-  Hi Simon,
-	  I've installed your patch and it works great. I have trialed
-	  it over twin SL/IP lines, just over null modems, but I was
-	  able to data at over 48Kb/s [ISDN link -Simon]. I managed a
-	  transfer of up to 7.5 Kbyte/s on one go, but averaged around
-	  6.4 Kbyte/s, which I think is pretty cool.  :)
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 313f66900bce..9ef6ef42bdc5 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -54,6 +54,7 @@ Contents:
    defza
    dns_resolver
    driver
+   eql
 
 .. only::  subproject and html
 
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 4ab6d343fd86..c822f4a6d166 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -126,7 +126,7 @@ config EQUALIZER
 	  Linux driver or with a Livingston Portmaster 2e.
 
 	  Say Y if you want this and read
-	  <file:Documentation/networking/eql.txt>.  You may also want to read
+	  <file:Documentation/networking/eql.rst>.  You may also want to read
 	  section 6.2 of the NET-3-HOWTO, available from
 	  <http://www.tldp.org/docs.html#howto>.
 
-- 
cgit v1.2.3-59-g8ed1b


From aee113427c5d205730b2c1a023661799f41aca23 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 28 Apr 2020 00:01:35 +0200
Subject: docs: networking: convert fib_trie.txt to ReST

- add SPDX header;
- adjust title markup;
- adjust identation, whitespaces and blank lines;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/fib_trie.rst | 149 ++++++++++++++++++++++++++++++++++
 Documentation/networking/fib_trie.txt | 145 ---------------------------------
 Documentation/networking/index.rst    |   1 +
 3 files changed, 150 insertions(+), 145 deletions(-)
 create mode 100644 Documentation/networking/fib_trie.rst
 delete mode 100644 Documentation/networking/fib_trie.txt

diff --git a/Documentation/networking/fib_trie.rst b/Documentation/networking/fib_trie.rst
new file mode 100644
index 000000000000..f1435b7fcdb7
--- /dev/null
+++ b/Documentation/networking/fib_trie.rst
@@ -0,0 +1,149 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============================
+LC-trie implementation notes
+============================
+
+Node types
+----------
+leaf
+	An end node with data. This has a copy of the relevant key, along
+	with 'hlist' with routing table entries sorted by prefix length.
+	See struct leaf and struct leaf_info.
+
+trie node or tnode
+	An internal node, holding an array of child (leaf or tnode) pointers,
+	indexed	through a subset of the key. See Level Compression.
+
+A few concepts explained
+------------------------
+Bits (tnode)
+	The number of bits in the key segment used for indexing into the
+	child array - the "child index". See Level Compression.
+
+Pos (tnode)
+	The position (in the key) of the key segment used for indexing into
+	the child array. See Path Compression.
+
+Path Compression / skipped bits
+	Any given tnode is linked to from the child array of its parent, using
+	a segment of the key specified by the parent's "pos" and "bits"
+	In certain cases, this tnode's own "pos" will not be immediately
+	adjacent to the parent (pos+bits), but there will be some bits
+	in the key skipped over because they represent a single path with no
+	deviations. These "skipped bits" constitute Path Compression.
+	Note that the search algorithm will simply skip over these bits when
+	searching, making it necessary to save the keys in the leaves to
+	verify that they actually do match the key we are searching for.
+
+Level Compression / child arrays
+	the trie is kept level balanced moving, under certain conditions, the
+	children of a full child (see "full_children") up one level, so that
+	instead of a pure binary tree, each internal node ("tnode") may
+	contain an arbitrarily large array of links to several children.
+	Conversely, a tnode with a mostly empty	child array (see empty_children)
+	may be "halved", having some of its children moved downwards one level,
+	in order to avoid ever-increasing child arrays.
+
+empty_children
+	the number of positions in the child array of a given tnode that are
+	NULL.
+
+full_children
+	the number of children of a given tnode that aren't path compressed.
+	(in other words, they aren't NULL or leaves and their "pos" is equal
+	to this	tnode's "pos"+"bits").
+
+	(The word "full" here is used more in the sense of "complete" than
+	as the opposite of "empty", which might be a tad confusing.)
+
+Comments
+---------
+
+We have tried to keep the structure of the code as close to fib_hash as
+possible to allow verification and help up reviewing.
+
+fib_find_node()
+	A good start for understanding this code. This function implements a
+	straightforward trie lookup.
+
+fib_insert_node()
+	Inserts a new leaf node in the trie. This is bit more complicated than
+	fib_find_node(). Inserting a new node means we might have to run the
+	level compression algorithm on part of the trie.
+
+trie_leaf_remove()
+	Looks up a key, deletes it and runs the level compression algorithm.
+
+trie_rebalance()
+	The key function for the dynamic trie after any change in the trie
+	it is run to optimize and reorganize. It will walk the trie upwards
+	towards the root from a given tnode, doing a resize() at each step
+	to implement level compression.
+
+resize()
+	Analyzes a tnode and optimizes the child array size by either inflating
+	or shrinking it repeatedly until it fulfills the criteria for optimal
+	level compression. This part follows the original paper pretty closely
+	and there may be some room for experimentation here.
+
+inflate()
+	Doubles the size of the child array within a tnode. Used by resize().
+
+halve()
+	Halves the size of the child array within a tnode - the inverse of
+	inflate(). Used by resize();
+
+fn_trie_insert(), fn_trie_delete(), fn_trie_select_default()
+	The route manipulation functions. Should conform pretty closely to the
+	corresponding functions in fib_hash.
+
+fn_trie_flush()
+	This walks the full trie (using nextleaf()) and searches for empty
+	leaves which have to be removed.
+
+fn_trie_dump()
+	Dumps the routing table ordered by prefix length. This is somewhat
+	slower than the corresponding fib_hash function, as we have to walk the
+	entire trie for each prefix length. In comparison, fib_hash is organized
+	as one "zone"/hash per prefix length.
+
+Locking
+-------
+
+fib_lock is used for an RW-lock in the same way that this is done in fib_hash.
+However, the functions are somewhat separated for other possible locking
+scenarios. It might conceivably be possible to run trie_rebalance via RCU
+to avoid read_lock in the fn_trie_lookup() function.
+
+Main lookup mechanism
+---------------------
+fn_trie_lookup() is the main lookup function.
+
+The lookup is in its simplest form just like fib_find_node(). We descend the
+trie, key segment by key segment, until we find a leaf. check_leaf() does
+the fib_semantic_match in the leaf's sorted prefix hlist.
+
+If we find a match, we are done.
+
+If we don't find a match, we enter prefix matching mode. The prefix length,
+starting out at the same as the key length, is reduced one step at a time,
+and we backtrack upwards through the trie trying to find a longest matching
+prefix. The goal is always to reach a leaf and get a positive result from the
+fib_semantic_match mechanism.
+
+Inside each tnode, the search for longest matching prefix consists of searching
+through the child array, chopping off (zeroing) the least significant "1" of
+the child index until we find a match or the child index consists of nothing but
+zeros.
+
+At this point we backtrack (t->stats.backtrack++) up the trie, continuing to
+chop off part of the key in order to find the longest matching prefix.
+
+At this point we will repeatedly descend subtries to look for a match, and there
+are some optimizations available that can provide us with "shortcuts" to avoid
+descending into dead ends. Look for "HL_OPTIMIZE" sections in the code.
+
+To alleviate any doubts about the correctness of the route selection process,
+a new netlink operation has been added. Look for NETLINK_FIB_LOOKUP, which
+gives userland access to fib_lookup().
diff --git a/Documentation/networking/fib_trie.txt b/Documentation/networking/fib_trie.txt
deleted file mode 100644
index fe719388518b..000000000000
--- a/Documentation/networking/fib_trie.txt
+++ /dev/null
@@ -1,145 +0,0 @@
-			LC-trie implementation notes.
-
-Node types
-----------
-leaf 
-	An end node with data. This has a copy of the relevant key, along
-	with 'hlist' with routing table entries sorted by prefix length.
-	See struct leaf and struct leaf_info.
-
-trie node or tnode
-	An internal node, holding an array of child (leaf or tnode) pointers,
-	indexed	through a subset of the key. See Level Compression.
-
-A few concepts explained
-------------------------
-Bits (tnode) 
-	The number of bits in the key segment used for indexing into the
-	child array - the "child index". See Level Compression.
-
-Pos (tnode)
-	The position (in the key) of the key segment used for indexing into
-	the child array. See Path Compression.
-
-Path Compression / skipped bits
-	Any given tnode is linked to from the child array of its parent, using
-	a segment of the key specified by the parent's "pos" and "bits" 
-	In certain cases, this tnode's own "pos" will not be immediately
-	adjacent to the parent (pos+bits), but there will be some bits
-	in the key skipped over because they represent a single path with no
-	deviations. These "skipped bits" constitute Path Compression.
-	Note that the search algorithm will simply skip over these bits when
-	searching, making it necessary to save the keys in the leaves to
-	verify that they actually do match the key we are searching for.
-
-Level Compression / child arrays
-	the trie is kept level balanced moving, under certain conditions, the
-	children of a full child (see "full_children") up one level, so that
-	instead of a pure binary tree, each internal node ("tnode") may
-	contain an arbitrarily large array of links to several children.
-	Conversely, a tnode with a mostly empty	child array (see empty_children)
-	may be "halved", having some of its children moved downwards one level,
-	in order to avoid ever-increasing child arrays.
-
-empty_children
-	the number of positions in the child array of a given tnode that are
-	NULL.
-
-full_children
-	the number of children of a given tnode that aren't path compressed.
-	(in other words, they aren't NULL or leaves and their "pos" is equal
-	to this	tnode's "pos"+"bits").
-
-	(The word "full" here is used more in the sense of "complete" than
-	as the opposite of "empty", which might be a tad confusing.)
-
-Comments
----------
-
-We have tried to keep the structure of the code as close to fib_hash as 
-possible to allow verification and help up reviewing. 
-
-fib_find_node()
-	A good start for understanding this code. This function implements a
-	straightforward trie lookup.
-
-fib_insert_node()
-	Inserts a new leaf node in the trie. This is bit more complicated than
-	fib_find_node(). Inserting a new node means we might have to run the
-	level compression algorithm on part of the trie.
-
-trie_leaf_remove()
-	Looks up a key, deletes it and runs the level compression algorithm.
-
-trie_rebalance()
-	The key function for the dynamic trie after any change in the trie
-	it is run to optimize and reorganize. It will walk the trie upwards
-	towards the root from a given tnode, doing a resize() at each step
-	to implement level compression.
-
-resize()
-	Analyzes a tnode and optimizes the child array size by either inflating
-	or shrinking it repeatedly until it fulfills the criteria for optimal
-	level compression. This part follows the original paper pretty closely
-	and there may be some room for experimentation here.
-
-inflate()
-	Doubles the size of the child array within a tnode. Used by resize().
-
-halve()
-	Halves the size of the child array within a tnode - the inverse of
-	inflate(). Used by resize();
-
-fn_trie_insert(), fn_trie_delete(), fn_trie_select_default()
-	The route manipulation functions. Should conform pretty closely to the
-	corresponding functions in fib_hash.
-
-fn_trie_flush()
-	This walks the full trie (using nextleaf()) and searches for empty
-	leaves which have to be removed.
-
-fn_trie_dump()
-	Dumps the routing table ordered by prefix length. This is somewhat
-	slower than the corresponding fib_hash function, as we have to walk the
-	entire trie for each prefix length. In comparison, fib_hash is organized
-	as one "zone"/hash per prefix length.
-
-Locking
--------
-
-fib_lock is used for an RW-lock in the same way that this is done in fib_hash.
-However, the functions are somewhat separated for other possible locking
-scenarios. It might conceivably be possible to run trie_rebalance via RCU
-to avoid read_lock in the fn_trie_lookup() function.
-
-Main lookup mechanism
----------------------
-fn_trie_lookup() is the main lookup function.
-
-The lookup is in its simplest form just like fib_find_node(). We descend the
-trie, key segment by key segment, until we find a leaf. check_leaf() does
-the fib_semantic_match in the leaf's sorted prefix hlist.
-
-If we find a match, we are done.
-
-If we don't find a match, we enter prefix matching mode. The prefix length,
-starting out at the same as the key length, is reduced one step at a time,
-and we backtrack upwards through the trie trying to find a longest matching
-prefix. The goal is always to reach a leaf and get a positive result from the
-fib_semantic_match mechanism.
-
-Inside each tnode, the search for longest matching prefix consists of searching
-through the child array, chopping off (zeroing) the least significant "1" of
-the child index until we find a match or the child index consists of nothing but
-zeros.
-
-At this point we backtrack (t->stats.backtrack++) up the trie, continuing to
-chop off part of the key in order to find the longest matching prefix.
-
-At this point we will repeatedly descend subtries to look for a match, and there
-are some optimizations available that can provide us with "shortcuts" to avoid
-descending into dead ends. Look for "HL_OPTIMIZE" sections in the code.
-
-To alleviate any doubts about the correctness of the route selection process,
-a new netlink operation has been added. Look for NETLINK_FIB_LOOKUP, which
-gives userland access to fib_lookup().
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 9ef6ef42bdc5..807abe25ae4b 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -55,6 +55,7 @@ Contents:
    dns_resolver
    driver
    eql
+   fib_trie
 
 .. only::  subproject and html
 
-- 
cgit v1.2.3-59-g8ed1b


From cb3f0d56e153398a035eb22769d2cb2837f29747 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 28 Apr 2020 00:01:36 +0200
Subject: docs: networking: convert filter.txt to ReST

- add SPDX header;
- adjust title markup;
- mark code blocks and literals as such;
- use footnote markup;
- mark tables as such;
- adjust identation, whitespaces and blank lines;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/bpf/index.rst              |    4 +-
 Documentation/networking/filter.rst      | 1651 ++++++++++++++++++++++++++++++
 Documentation/networking/filter.txt      | 1545 ----------------------------
 Documentation/networking/index.rst       |    1 +
 Documentation/networking/packet_mmap.txt |    2 +-
 MAINTAINERS                              |    2 +-
 tools/bpf/bpf_asm.c                      |    2 +-
 tools/bpf/bpf_dbg.c                      |    2 +-
 8 files changed, 1658 insertions(+), 1551 deletions(-)
 create mode 100644 Documentation/networking/filter.rst
 delete mode 100644 Documentation/networking/filter.txt

diff --git a/Documentation/bpf/index.rst b/Documentation/bpf/index.rst
index f99677f3572f..38b4db8be7a2 100644
--- a/Documentation/bpf/index.rst
+++ b/Documentation/bpf/index.rst
@@ -7,7 +7,7 @@ Filter) facility, with a focus on the extended BPF version (eBPF).
 
 This kernel side documentation is still work in progress.  The main
 textual documentation is (for historical reasons) described in
-`Documentation/networking/filter.txt`_, which describe both classical
+`Documentation/networking/filter.rst`_, which describe both classical
 and extended BPF instruction-set.
 The Cilium project also maintains a `BPF and XDP Reference Guide`_
 that goes into great technical depth about the BPF Architecture.
@@ -59,7 +59,7 @@ Testing and debugging BPF
 
 
 .. Links:
-.. _Documentation/networking/filter.txt: ../networking/filter.txt
+.. _Documentation/networking/filter.rst: ../networking/filter.txt
 .. _man-pages: https://www.kernel.org/doc/man-pages/
 .. _bpf(2): http://man7.org/linux/man-pages/man2/bpf.2.html
 .. _BPF and XDP Reference Guide: http://cilium.readthedocs.io/en/latest/bpf/
diff --git a/Documentation/networking/filter.rst b/Documentation/networking/filter.rst
new file mode 100644
index 000000000000..a1d3e192b9fa
--- /dev/null
+++ b/Documentation/networking/filter.rst
@@ -0,0 +1,1651 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=======================================================
+Linux Socket Filtering aka Berkeley Packet Filter (BPF)
+=======================================================
+
+Introduction
+------------
+
+Linux Socket Filtering (LSF) is derived from the Berkeley Packet Filter.
+Though there are some distinct differences between the BSD and Linux
+Kernel filtering, but when we speak of BPF or LSF in Linux context, we
+mean the very same mechanism of filtering in the Linux kernel.
+
+BPF allows a user-space program to attach a filter onto any socket and
+allow or disallow certain types of data to come through the socket. LSF
+follows exactly the same filter code structure as BSD's BPF, so referring
+to the BSD bpf.4 manpage is very helpful in creating filters.
+
+On Linux, BPF is much simpler than on BSD. One does not have to worry
+about devices or anything like that. You simply create your filter code,
+send it to the kernel via the SO_ATTACH_FILTER option and if your filter
+code passes the kernel check on it, you then immediately begin filtering
+data on that socket.
+
+You can also detach filters from your socket via the SO_DETACH_FILTER
+option. This will probably not be used much since when you close a socket
+that has a filter on it the filter is automagically removed. The other
+less common case may be adding a different filter on the same socket where
+you had another filter that is still running: the kernel takes care of
+removing the old one and placing your new one in its place, assuming your
+filter has passed the checks, otherwise if it fails the old filter will
+remain on that socket.
+
+SO_LOCK_FILTER option allows to lock the filter attached to a socket. Once
+set, a filter cannot be removed or changed. This allows one process to
+setup a socket, attach a filter, lock it then drop privileges and be
+assured that the filter will be kept until the socket is closed.
+
+The biggest user of this construct might be libpcap. Issuing a high-level
+filter command like `tcpdump -i em1 port 22` passes through the libpcap
+internal compiler that generates a structure that can eventually be loaded
+via SO_ATTACH_FILTER to the kernel. `tcpdump -i em1 port 22 -ddd`
+displays what is being placed into this structure.
+
+Although we were only speaking about sockets here, BPF in Linux is used
+in many more places. There's xt_bpf for netfilter, cls_bpf in the kernel
+qdisc layer, SECCOMP-BPF (SECure COMPuting [1]_), and lots of other places
+such as team driver, PTP code, etc where BPF is being used.
+
+.. [1] Documentation/userspace-api/seccomp_filter.rst
+
+Original BPF paper:
+
+Steven McCanne and Van Jacobson. 1993. The BSD packet filter: a new
+architecture for user-level packet capture. In Proceedings of the
+USENIX Winter 1993 Conference Proceedings on USENIX Winter 1993
+Conference Proceedings (USENIX'93). USENIX Association, Berkeley,
+CA, USA, 2-2. [http://www.tcpdump.org/papers/bpf-usenix93.pdf]
+
+Structure
+---------
+
+User space applications include <linux/filter.h> which contains the
+following relevant structures::
+
+	struct sock_filter {	/* Filter block */
+		__u16	code;   /* Actual filter code */
+		__u8	jt;	/* Jump true */
+		__u8	jf;	/* Jump false */
+		__u32	k;      /* Generic multiuse field */
+	};
+
+Such a structure is assembled as an array of 4-tuples, that contains
+a code, jt, jf and k value. jt and jf are jump offsets and k a generic
+value to be used for a provided code::
+
+	struct sock_fprog {			/* Required for SO_ATTACH_FILTER. */
+		unsigned short		   len;	/* Number of filter blocks */
+		struct sock_filter __user *filter;
+	};
+
+For socket filtering, a pointer to this structure (as shown in
+follow-up example) is being passed to the kernel through setsockopt(2).
+
+Example
+-------
+
+::
+
+    #include <sys/socket.h>
+    #include <sys/types.h>
+    #include <arpa/inet.h>
+    #include <linux/if_ether.h>
+    /* ... */
+
+    /* From the example above: tcpdump -i em1 port 22 -dd */
+    struct sock_filter code[] = {
+	    { 0x28,  0,  0, 0x0000000c },
+	    { 0x15,  0,  8, 0x000086dd },
+	    { 0x30,  0,  0, 0x00000014 },
+	    { 0x15,  2,  0, 0x00000084 },
+	    { 0x15,  1,  0, 0x00000006 },
+	    { 0x15,  0, 17, 0x00000011 },
+	    { 0x28,  0,  0, 0x00000036 },
+	    { 0x15, 14,  0, 0x00000016 },
+	    { 0x28,  0,  0, 0x00000038 },
+	    { 0x15, 12, 13, 0x00000016 },
+	    { 0x15,  0, 12, 0x00000800 },
+	    { 0x30,  0,  0, 0x00000017 },
+	    { 0x15,  2,  0, 0x00000084 },
+	    { 0x15,  1,  0, 0x00000006 },
+	    { 0x15,  0,  8, 0x00000011 },
+	    { 0x28,  0,  0, 0x00000014 },
+	    { 0x45,  6,  0, 0x00001fff },
+	    { 0xb1,  0,  0, 0x0000000e },
+	    { 0x48,  0,  0, 0x0000000e },
+	    { 0x15,  2,  0, 0x00000016 },
+	    { 0x48,  0,  0, 0x00000010 },
+	    { 0x15,  0,  1, 0x00000016 },
+	    { 0x06,  0,  0, 0x0000ffff },
+	    { 0x06,  0,  0, 0x00000000 },
+    };
+
+    struct sock_fprog bpf = {
+	    .len = ARRAY_SIZE(code),
+	    .filter = code,
+    };
+
+    sock = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
+    if (sock < 0)
+	    /* ... bail out ... */
+
+    ret = setsockopt(sock, SOL_SOCKET, SO_ATTACH_FILTER, &bpf, sizeof(bpf));
+    if (ret < 0)
+	    /* ... bail out ... */
+
+    /* ... */
+    close(sock);
+
+The above example code attaches a socket filter for a PF_PACKET socket
+in order to let all IPv4/IPv6 packets with port 22 pass. The rest will
+be dropped for this socket.
+
+The setsockopt(2) call to SO_DETACH_FILTER doesn't need any arguments
+and SO_LOCK_FILTER for preventing the filter to be detached, takes an
+integer value with 0 or 1.
+
+Note that socket filters are not restricted to PF_PACKET sockets only,
+but can also be used on other socket families.
+
+Summary of system calls:
+
+ * setsockopt(sockfd, SOL_SOCKET, SO_ATTACH_FILTER, &val, sizeof(val));
+ * setsockopt(sockfd, SOL_SOCKET, SO_DETACH_FILTER, &val, sizeof(val));
+ * setsockopt(sockfd, SOL_SOCKET, SO_LOCK_FILTER,   &val, sizeof(val));
+
+Normally, most use cases for socket filtering on packet sockets will be
+covered by libpcap in high-level syntax, so as an application developer
+you should stick to that. libpcap wraps its own layer around all that.
+
+Unless i) using/linking to libpcap is not an option, ii) the required BPF
+filters use Linux extensions that are not supported by libpcap's compiler,
+iii) a filter might be more complex and not cleanly implementable with
+libpcap's compiler, or iv) particular filter codes should be optimized
+differently than libpcap's internal compiler does; then in such cases
+writing such a filter "by hand" can be of an alternative. For example,
+xt_bpf and cls_bpf users might have requirements that could result in
+more complex filter code, or one that cannot be expressed with libpcap
+(e.g. different return codes for various code paths). Moreover, BPF JIT
+implementors may wish to manually write test cases and thus need low-level
+access to BPF code as well.
+
+BPF engine and instruction set
+------------------------------
+
+Under tools/bpf/ there's a small helper tool called bpf_asm which can
+be used to write low-level filters for example scenarios mentioned in the
+previous section. Asm-like syntax mentioned here has been implemented in
+bpf_asm and will be used for further explanations (instead of dealing with
+less readable opcodes directly, principles are the same). The syntax is
+closely modelled after Steven McCanne's and Van Jacobson's BPF paper.
+
+The BPF architecture consists of the following basic elements:
+
+  =======          ====================================================
+  Element          Description
+  =======          ====================================================
+  A                32 bit wide accumulator
+  X                32 bit wide X register
+  M[]              16 x 32 bit wide misc registers aka "scratch memory
+		   store", addressable from 0 to 15
+  =======          ====================================================
+
+A program, that is translated by bpf_asm into "opcodes" is an array that
+consists of the following elements (as already mentioned)::
+
+  op:16, jt:8, jf:8, k:32
+
+The element op is a 16 bit wide opcode that has a particular instruction
+encoded. jt and jf are two 8 bit wide jump targets, one for condition
+"jump if true", the other one "jump if false". Eventually, element k
+contains a miscellaneous argument that can be interpreted in different
+ways depending on the given instruction in op.
+
+The instruction set consists of load, store, branch, alu, miscellaneous
+and return instructions that are also represented in bpf_asm syntax. This
+table lists all bpf_asm instructions available resp. what their underlying
+opcodes as defined in linux/filter.h stand for:
+
+  ===========      ===================  =====================
+  Instruction      Addressing mode      Description
+  ===========      ===================  =====================
+  ld               1, 2, 3, 4, 12       Load word into A
+  ldi              4                    Load word into A
+  ldh              1, 2                 Load half-word into A
+  ldb              1, 2                 Load byte into A
+  ldx              3, 4, 5, 12          Load word into X
+  ldxi             4                    Load word into X
+  ldxb             5                    Load byte into X
+
+  st               3                    Store A into M[]
+  stx              3                    Store X into M[]
+
+  jmp              6                    Jump to label
+  ja               6                    Jump to label
+  jeq              7, 8, 9, 10          Jump on A == <x>
+  jneq             9, 10                Jump on A != <x>
+  jne              9, 10                Jump on A != <x>
+  jlt              9, 10                Jump on A <  <x>
+  jle              9, 10                Jump on A <= <x>
+  jgt              7, 8, 9, 10          Jump on A >  <x>
+  jge              7, 8, 9, 10          Jump on A >= <x>
+  jset             7, 8, 9, 10          Jump on A &  <x>
+
+  add              0, 4                 A + <x>
+  sub              0, 4                 A - <x>
+  mul              0, 4                 A * <x>
+  div              0, 4                 A / <x>
+  mod              0, 4                 A % <x>
+  neg                                   !A
+  and              0, 4                 A & <x>
+  or               0, 4                 A | <x>
+  xor              0, 4                 A ^ <x>
+  lsh              0, 4                 A << <x>
+  rsh              0, 4                 A >> <x>
+
+  tax                                   Copy A into X
+  txa                                   Copy X into A
+
+  ret              4, 11                Return
+  ===========      ===================  =====================
+
+The next table shows addressing formats from the 2nd column:
+
+  ===============  ===================  ===============================================
+  Addressing mode  Syntax               Description
+  ===============  ===================  ===============================================
+   0               x/%x                 Register X
+   1               [k]                  BHW at byte offset k in the packet
+   2               [x + k]              BHW at the offset X + k in the packet
+   3               M[k]                 Word at offset k in M[]
+   4               #k                   Literal value stored in k
+   5               4*([k]&0xf)          Lower nibble * 4 at byte offset k in the packet
+   6               L                    Jump label L
+   7               #k,Lt,Lf             Jump to Lt if true, otherwise jump to Lf
+   8               x/%x,Lt,Lf           Jump to Lt if true, otherwise jump to Lf
+   9               #k,Lt                Jump to Lt if predicate is true
+  10               x/%x,Lt              Jump to Lt if predicate is true
+  11               a/%a                 Accumulator A
+  12               extension            BPF extension
+  ===============  ===================  ===============================================
+
+The Linux kernel also has a couple of BPF extensions that are used along
+with the class of load instructions by "overloading" the k argument with
+a negative offset + a particular extension offset. The result of such BPF
+extensions are loaded into A.
+
+Possible BPF extensions are shown in the following table:
+
+  ===================================   =================================================
+  Extension                             Description
+  ===================================   =================================================
+  len                                   skb->len
+  proto                                 skb->protocol
+  type                                  skb->pkt_type
+  poff                                  Payload start offset
+  ifidx                                 skb->dev->ifindex
+  nla                                   Netlink attribute of type X with offset A
+  nlan                                  Nested Netlink attribute of type X with offset A
+  mark                                  skb->mark
+  queue                                 skb->queue_mapping
+  hatype                                skb->dev->type
+  rxhash                                skb->hash
+  cpu                                   raw_smp_processor_id()
+  vlan_tci                              skb_vlan_tag_get(skb)
+  vlan_avail                            skb_vlan_tag_present(skb)
+  vlan_tpid                             skb->vlan_proto
+  rand                                  prandom_u32()
+  ===================================   =================================================
+
+These extensions can also be prefixed with '#'.
+Examples for low-level BPF:
+
+**ARP packets**::
+
+  ldh [12]
+  jne #0x806, drop
+  ret #-1
+  drop: ret #0
+
+**IPv4 TCP packets**::
+
+  ldh [12]
+  jne #0x800, drop
+  ldb [23]
+  jneq #6, drop
+  ret #-1
+  drop: ret #0
+
+**(Accelerated) VLAN w/ id 10**::
+
+  ld vlan_tci
+  jneq #10, drop
+  ret #-1
+  drop: ret #0
+
+**icmp random packet sampling, 1 in 4**:
+
+  ldh [12]
+  jne #0x800, drop
+  ldb [23]
+  jneq #1, drop
+  # get a random uint32 number
+  ld rand
+  mod #4
+  jneq #1, drop
+  ret #-1
+  drop: ret #0
+
+**SECCOMP filter example**::
+
+  ld [4]                  /* offsetof(struct seccomp_data, arch) */
+  jne #0xc000003e, bad    /* AUDIT_ARCH_X86_64 */
+  ld [0]                  /* offsetof(struct seccomp_data, nr) */
+  jeq #15, good           /* __NR_rt_sigreturn */
+  jeq #231, good          /* __NR_exit_group */
+  jeq #60, good           /* __NR_exit */
+  jeq #0, good            /* __NR_read */
+  jeq #1, good            /* __NR_write */
+  jeq #5, good            /* __NR_fstat */
+  jeq #9, good            /* __NR_mmap */
+  jeq #14, good           /* __NR_rt_sigprocmask */
+  jeq #13, good           /* __NR_rt_sigaction */
+  jeq #35, good           /* __NR_nanosleep */
+  bad: ret #0             /* SECCOMP_RET_KILL_THREAD */
+  good: ret #0x7fff0000   /* SECCOMP_RET_ALLOW */
+
+The above example code can be placed into a file (here called "foo"), and
+then be passed to the bpf_asm tool for generating opcodes, output that xt_bpf
+and cls_bpf understands and can directly be loaded with. Example with above
+ARP code::
+
+    $ ./bpf_asm foo
+    4,40 0 0 12,21 0 1 2054,6 0 0 4294967295,6 0 0 0,
+
+In copy and paste C-like output::
+
+    $ ./bpf_asm -c foo
+    { 0x28,  0,  0, 0x0000000c },
+    { 0x15,  0,  1, 0x00000806 },
+    { 0x06,  0,  0, 0xffffffff },
+    { 0x06,  0,  0, 0000000000 },
+
+In particular, as usage with xt_bpf or cls_bpf can result in more complex BPF
+filters that might not be obvious at first, it's good to test filters before
+attaching to a live system. For that purpose, there's a small tool called
+bpf_dbg under tools/bpf/ in the kernel source directory. This debugger allows
+for testing BPF filters against given pcap files, single stepping through the
+BPF code on the pcap's packets and to do BPF machine register dumps.
+
+Starting bpf_dbg is trivial and just requires issuing::
+
+    # ./bpf_dbg
+
+In case input and output do not equal stdin/stdout, bpf_dbg takes an
+alternative stdin source as a first argument, and an alternative stdout
+sink as a second one, e.g. `./bpf_dbg test_in.txt test_out.txt`.
+
+Other than that, a particular libreadline configuration can be set via
+file "~/.bpf_dbg_init" and the command history is stored in the file
+"~/.bpf_dbg_history".
+
+Interaction in bpf_dbg happens through a shell that also has auto-completion
+support (follow-up example commands starting with '>' denote bpf_dbg shell).
+The usual workflow would be to ...
+
+* load bpf 6,40 0 0 12,21 0 3 2048,48 0 0 23,21 0 1 1,6 0 0 65535,6 0 0 0
+  Loads a BPF filter from standard output of bpf_asm, or transformed via
+  e.g. ``tcpdump -iem1 -ddd port 22 | tr '\n' ','``. Note that for JIT
+  debugging (next section), this command creates a temporary socket and
+  loads the BPF code into the kernel. Thus, this will also be useful for
+  JIT developers.
+
+* load pcap foo.pcap
+
+  Loads standard tcpdump pcap file.
+
+* run [<n>]
+
+bpf passes:1 fails:9
+  Runs through all packets from a pcap to account how many passes and fails
+  the filter will generate. A limit of packets to traverse can be given.
+
+* disassemble::
+
+	l0:	ldh [12]
+	l1:	jeq #0x800, l2, l5
+	l2:	ldb [23]
+	l3:	jeq #0x1, l4, l5
+	l4:	ret #0xffff
+	l5:	ret #0
+
+  Prints out BPF code disassembly.
+
+* dump::
+
+	/* { op, jt, jf, k }, */
+	{ 0x28,  0,  0, 0x0000000c },
+	{ 0x15,  0,  3, 0x00000800 },
+	{ 0x30,  0,  0, 0x00000017 },
+	{ 0x15,  0,  1, 0x00000001 },
+	{ 0x06,  0,  0, 0x0000ffff },
+	{ 0x06,  0,  0, 0000000000 },
+
+  Prints out C-style BPF code dump.
+
+* breakpoint 0::
+
+	breakpoint at: l0:	ldh [12]
+
+* breakpoint 1::
+
+	breakpoint at: l1:	jeq #0x800, l2, l5
+
+  ...
+
+  Sets breakpoints at particular BPF instructions. Issuing a `run` command
+  will walk through the pcap file continuing from the current packet and
+  break when a breakpoint is being hit (another `run` will continue from
+  the currently active breakpoint executing next instructions):
+
+  * run::
+
+	-- register dump --
+	pc:       [0]                       <-- program counter
+	code:     [40] jt[0] jf[0] k[12]    <-- plain BPF code of current instruction
+	curr:     l0:	ldh [12]              <-- disassembly of current instruction
+	A:        [00000000][0]             <-- content of A (hex, decimal)
+	X:        [00000000][0]             <-- content of X (hex, decimal)
+	M[0,15]:  [00000000][0]             <-- folded content of M (hex, decimal)
+	-- packet dump --                   <-- Current packet from pcap (hex)
+	len: 42
+	    0: 00 19 cb 55 55 a4 00 14 a4 43 78 69 08 06 00 01
+	16: 08 00 06 04 00 01 00 14 a4 43 78 69 0a 3b 01 26
+	32: 00 00 00 00 00 00 0a 3b 01 01
+	(breakpoint)
+	>
+
+  * breakpoint::
+
+	breakpoints: 0 1
+
+    Prints currently set breakpoints.
+
+* step [-<n>, +<n>]
+
+  Performs single stepping through the BPF program from the current pc
+  offset. Thus, on each step invocation, above register dump is issued.
+  This can go forwards and backwards in time, a plain `step` will break
+  on the next BPF instruction, thus +1. (No `run` needs to be issued here.)
+
+* select <n>
+
+  Selects a given packet from the pcap file to continue from. Thus, on
+  the next `run` or `step`, the BPF program is being evaluated against
+  the user pre-selected packet. Numbering starts just as in Wireshark
+  with index 1.
+
+* quit
+
+  Exits bpf_dbg.
+
+JIT compiler
+------------
+
+The Linux kernel has a built-in BPF JIT compiler for x86_64, SPARC,
+PowerPC, ARM, ARM64, MIPS, RISC-V and s390 and can be enabled through
+CONFIG_BPF_JIT. The JIT compiler is transparently invoked for each
+attached filter from user space or for internal kernel users if it has
+been previously enabled by root::
+
+  echo 1 > /proc/sys/net/core/bpf_jit_enable
+
+For JIT developers, doing audits etc, each compile run can output the generated
+opcode image into the kernel log via::
+
+  echo 2 > /proc/sys/net/core/bpf_jit_enable
+
+Example output from dmesg::
+
+    [ 3389.935842] flen=6 proglen=70 pass=3 image=ffffffffa0069c8f
+    [ 3389.935847] JIT code: 00000000: 55 48 89 e5 48 83 ec 60 48 89 5d f8 44 8b 4f 68
+    [ 3389.935849] JIT code: 00000010: 44 2b 4f 6c 4c 8b 87 d8 00 00 00 be 0c 00 00 00
+    [ 3389.935850] JIT code: 00000020: e8 1d 94 ff e0 3d 00 08 00 00 75 16 be 17 00 00
+    [ 3389.935851] JIT code: 00000030: 00 e8 28 94 ff e0 83 f8 01 75 07 b8 ff ff 00 00
+    [ 3389.935852] JIT code: 00000040: eb 02 31 c0 c9 c3
+
+When CONFIG_BPF_JIT_ALWAYS_ON is enabled, bpf_jit_enable is permanently set to 1 and
+setting any other value than that will return in failure. This is even the case for
+setting bpf_jit_enable to 2, since dumping the final JIT image into the kernel log
+is discouraged and introspection through bpftool (under tools/bpf/bpftool/) is the
+generally recommended approach instead.
+
+In the kernel source tree under tools/bpf/, there's bpf_jit_disasm for
+generating disassembly out of the kernel log's hexdump::
+
+	# ./bpf_jit_disasm
+	70 bytes emitted from JIT compiler (pass:3, flen:6)
+	ffffffffa0069c8f + <x>:
+	0:	push   %rbp
+	1:	mov    %rsp,%rbp
+	4:	sub    $0x60,%rsp
+	8:	mov    %rbx,-0x8(%rbp)
+	c:	mov    0x68(%rdi),%r9d
+	10:	sub    0x6c(%rdi),%r9d
+	14:	mov    0xd8(%rdi),%r8
+	1b:	mov    $0xc,%esi
+	20:	callq  0xffffffffe0ff9442
+	25:	cmp    $0x800,%eax
+	2a:	jne    0x0000000000000042
+	2c:	mov    $0x17,%esi
+	31:	callq  0xffffffffe0ff945e
+	36:	cmp    $0x1,%eax
+	39:	jne    0x0000000000000042
+	3b:	mov    $0xffff,%eax
+	40:	jmp    0x0000000000000044
+	42:	xor    %eax,%eax
+	44:	leaveq
+	45:	retq
+
+	Issuing option `-o` will "annotate" opcodes to resulting assembler
+	instructions, which can be very useful for JIT developers:
+
+	# ./bpf_jit_disasm -o
+	70 bytes emitted from JIT compiler (pass:3, flen:6)
+	ffffffffa0069c8f + <x>:
+	0:	push   %rbp
+		55
+	1:	mov    %rsp,%rbp
+		48 89 e5
+	4:	sub    $0x60,%rsp
+		48 83 ec 60
+	8:	mov    %rbx,-0x8(%rbp)
+		48 89 5d f8
+	c:	mov    0x68(%rdi),%r9d
+		44 8b 4f 68
+	10:	sub    0x6c(%rdi),%r9d
+		44 2b 4f 6c
+	14:	mov    0xd8(%rdi),%r8
+		4c 8b 87 d8 00 00 00
+	1b:	mov    $0xc,%esi
+		be 0c 00 00 00
+	20:	callq  0xffffffffe0ff9442
+		e8 1d 94 ff e0
+	25:	cmp    $0x800,%eax
+		3d 00 08 00 00
+	2a:	jne    0x0000000000000042
+		75 16
+	2c:	mov    $0x17,%esi
+		be 17 00 00 00
+	31:	callq  0xffffffffe0ff945e
+		e8 28 94 ff e0
+	36:	cmp    $0x1,%eax
+		83 f8 01
+	39:	jne    0x0000000000000042
+		75 07
+	3b:	mov    $0xffff,%eax
+		b8 ff ff 00 00
+	40:	jmp    0x0000000000000044
+		eb 02
+	42:	xor    %eax,%eax
+		31 c0
+	44:	leaveq
+		c9
+	45:	retq
+		c3
+
+For BPF JIT developers, bpf_jit_disasm, bpf_asm and bpf_dbg provides a useful
+toolchain for developing and testing the kernel's JIT compiler.
+
+BPF kernel internals
+--------------------
+Internally, for the kernel interpreter, a different instruction set
+format with similar underlying principles from BPF described in previous
+paragraphs is being used. However, the instruction set format is modelled
+closer to the underlying architecture to mimic native instruction sets, so
+that a better performance can be achieved (more details later). This new
+ISA is called 'eBPF' or 'internal BPF' interchangeably. (Note: eBPF which
+originates from [e]xtended BPF is not the same as BPF extensions! While
+eBPF is an ISA, BPF extensions date back to classic BPF's 'overloading'
+of BPF_LD | BPF_{B,H,W} | BPF_ABS instruction.)
+
+It is designed to be JITed with one to one mapping, which can also open up
+the possibility for GCC/LLVM compilers to generate optimized eBPF code through
+an eBPF backend that performs almost as fast as natively compiled code.
+
+The new instruction set was originally designed with the possible goal in
+mind to write programs in "restricted C" and compile into eBPF with a optional
+GCC/LLVM backend, so that it can just-in-time map to modern 64-bit CPUs with
+minimal performance overhead over two steps, that is, C -> eBPF -> native code.
+
+Currently, the new format is being used for running user BPF programs, which
+includes seccomp BPF, classic socket filters, cls_bpf traffic classifier,
+team driver's classifier for its load-balancing mode, netfilter's xt_bpf
+extension, PTP dissector/classifier, and much more. They are all internally
+converted by the kernel into the new instruction set representation and run
+in the eBPF interpreter. For in-kernel handlers, this all works transparently
+by using bpf_prog_create() for setting up the filter, resp.
+bpf_prog_destroy() for destroying it. The macro
+BPF_PROG_RUN(filter, ctx) transparently invokes eBPF interpreter or JITed
+code to run the filter. 'filter' is a pointer to struct bpf_prog that we
+got from bpf_prog_create(), and 'ctx' the given context (e.g.
+skb pointer). All constraints and restrictions from bpf_check_classic() apply
+before a conversion to the new layout is being done behind the scenes!
+
+Currently, the classic BPF format is being used for JITing on most
+32-bit architectures, whereas x86-64, aarch64, s390x, powerpc64,
+sparc64, arm32, riscv64, riscv32 perform JIT compilation from eBPF
+instruction set.
+
+Some core changes of the new internal format:
+
+- Number of registers increase from 2 to 10:
+
+  The old format had two registers A and X, and a hidden frame pointer. The
+  new layout extends this to be 10 internal registers and a read-only frame
+  pointer. Since 64-bit CPUs are passing arguments to functions via registers
+  the number of args from eBPF program to in-kernel function is restricted
+  to 5 and one register is used to accept return value from an in-kernel
+  function. Natively, x86_64 passes first 6 arguments in registers, aarch64/
+  sparcv9/mips64 have 7 - 8 registers for arguments; x86_64 has 6 callee saved
+  registers, and aarch64/sparcv9/mips64 have 11 or more callee saved registers.
+
+  Therefore, eBPF calling convention is defined as:
+
+    * R0	- return value from in-kernel function, and exit value for eBPF program
+    * R1 - R5	- arguments from eBPF program to in-kernel function
+    * R6 - R9	- callee saved registers that in-kernel function will preserve
+    * R10	- read-only frame pointer to access stack
+
+  Thus, all eBPF registers map one to one to HW registers on x86_64, aarch64,
+  etc, and eBPF calling convention maps directly to ABIs used by the kernel on
+  64-bit architectures.
+
+  On 32-bit architectures JIT may map programs that use only 32-bit arithmetic
+  and may let more complex programs to be interpreted.
+
+  R0 - R5 are scratch registers and eBPF program needs spill/fill them if
+  necessary across calls. Note that there is only one eBPF program (== one
+  eBPF main routine) and it cannot call other eBPF functions, it can only
+  call predefined in-kernel functions, though.
+
+- Register width increases from 32-bit to 64-bit:
+
+  Still, the semantics of the original 32-bit ALU operations are preserved
+  via 32-bit subregisters. All eBPF registers are 64-bit with 32-bit lower
+  subregisters that zero-extend into 64-bit if they are being written to.
+  That behavior maps directly to x86_64 and arm64 subregister definition, but
+  makes other JITs more difficult.
+
+  32-bit architectures run 64-bit internal BPF programs via interpreter.
+  Their JITs may convert BPF programs that only use 32-bit subregisters into
+  native instruction set and let the rest being interpreted.
+
+  Operation is 64-bit, because on 64-bit architectures, pointers are also
+  64-bit wide, and we want to pass 64-bit values in/out of kernel functions,
+  so 32-bit eBPF registers would otherwise require to define register-pair
+  ABI, thus, there won't be able to use a direct eBPF register to HW register
+  mapping and JIT would need to do combine/split/move operations for every
+  register in and out of the function, which is complex, bug prone and slow.
+  Another reason is the use of atomic 64-bit counters.
+
+- Conditional jt/jf targets replaced with jt/fall-through:
+
+  While the original design has constructs such as ``if (cond) jump_true;
+  else jump_false;``, they are being replaced into alternative constructs like
+  ``if (cond) jump_true; /* else fall-through */``.
+
+- Introduces bpf_call insn and register passing convention for zero overhead
+  calls from/to other kernel functions:
+
+  Before an in-kernel function call, the internal BPF program needs to
+  place function arguments into R1 to R5 registers to satisfy calling
+  convention, then the interpreter will take them from registers and pass
+  to in-kernel function. If R1 - R5 registers are mapped to CPU registers
+  that are used for argument passing on given architecture, the JIT compiler
+  doesn't need to emit extra moves. Function arguments will be in the correct
+  registers and BPF_CALL instruction will be JITed as single 'call' HW
+  instruction. This calling convention was picked to cover common call
+  situations without performance penalty.
+
+  After an in-kernel function call, R1 - R5 are reset to unreadable and R0 has
+  a return value of the function. Since R6 - R9 are callee saved, their state
+  is preserved across the call.
+
+  For example, consider three C functions::
+
+    u64 f1() { return (*_f2)(1); }
+    u64 f2(u64 a) { return f3(a + 1, a); }
+    u64 f3(u64 a, u64 b) { return a - b; }
+
+  GCC can compile f1, f3 into x86_64::
+
+    f1:
+	movl $1, %edi
+	movq _f2(%rip), %rax
+	jmp  *%rax
+    f3:
+	movq %rdi, %rax
+	subq %rsi, %rax
+	ret
+
+  Function f2 in eBPF may look like::
+
+    f2:
+	bpf_mov R2, R1
+	bpf_add R1, 1
+	bpf_call f3
+	bpf_exit
+
+  If f2 is JITed and the pointer stored to ``_f2``. The calls f1 -> f2 -> f3 and
+  returns will be seamless. Without JIT, __bpf_prog_run() interpreter needs to
+  be used to call into f2.
+
+  For practical reasons all eBPF programs have only one argument 'ctx' which is
+  already placed into R1 (e.g. on __bpf_prog_run() startup) and the programs
+  can call kernel functions with up to 5 arguments. Calls with 6 or more arguments
+  are currently not supported, but these restrictions can be lifted if necessary
+  in the future.
+
+  On 64-bit architectures all register map to HW registers one to one. For
+  example, x86_64 JIT compiler can map them as ...
+
+  ::
+
+    R0 - rax
+    R1 - rdi
+    R2 - rsi
+    R3 - rdx
+    R4 - rcx
+    R5 - r8
+    R6 - rbx
+    R7 - r13
+    R8 - r14
+    R9 - r15
+    R10 - rbp
+
+  ... since x86_64 ABI mandates rdi, rsi, rdx, rcx, r8, r9 for argument passing
+  and rbx, r12 - r15 are callee saved.
+
+  Then the following internal BPF pseudo-program::
+
+    bpf_mov R6, R1 /* save ctx */
+    bpf_mov R2, 2
+    bpf_mov R3, 3
+    bpf_mov R4, 4
+    bpf_mov R5, 5
+    bpf_call foo
+    bpf_mov R7, R0 /* save foo() return value */
+    bpf_mov R1, R6 /* restore ctx for next call */
+    bpf_mov R2, 6
+    bpf_mov R3, 7
+    bpf_mov R4, 8
+    bpf_mov R5, 9
+    bpf_call bar
+    bpf_add R0, R7
+    bpf_exit
+
+  After JIT to x86_64 may look like::
+
+    push %rbp
+    mov %rsp,%rbp
+    sub $0x228,%rsp
+    mov %rbx,-0x228(%rbp)
+    mov %r13,-0x220(%rbp)
+    mov %rdi,%rbx
+    mov $0x2,%esi
+    mov $0x3,%edx
+    mov $0x4,%ecx
+    mov $0x5,%r8d
+    callq foo
+    mov %rax,%r13
+    mov %rbx,%rdi
+    mov $0x6,%esi
+    mov $0x7,%edx
+    mov $0x8,%ecx
+    mov $0x9,%r8d
+    callq bar
+    add %r13,%rax
+    mov -0x228(%rbp),%rbx
+    mov -0x220(%rbp),%r13
+    leaveq
+    retq
+
+  Which is in this example equivalent in C to::
+
+    u64 bpf_filter(u64 ctx)
+    {
+	return foo(ctx, 2, 3, 4, 5) + bar(ctx, 6, 7, 8, 9);
+    }
+
+  In-kernel functions foo() and bar() with prototype: u64 (*)(u64 arg1, u64
+  arg2, u64 arg3, u64 arg4, u64 arg5); will receive arguments in proper
+  registers and place their return value into ``%rax`` which is R0 in eBPF.
+  Prologue and epilogue are emitted by JIT and are implicit in the
+  interpreter. R0-R5 are scratch registers, so eBPF program needs to preserve
+  them across the calls as defined by calling convention.
+
+  For example the following program is invalid::
+
+    bpf_mov R1, 1
+    bpf_call foo
+    bpf_mov R0, R1
+    bpf_exit
+
+  After the call the registers R1-R5 contain junk values and cannot be read.
+  An in-kernel eBPF verifier is used to validate internal BPF programs.
+
+Also in the new design, eBPF is limited to 4096 insns, which means that any
+program will terminate quickly and will only call a fixed number of kernel
+functions. Original BPF and the new format are two operand instructions,
+which helps to do one-to-one mapping between eBPF insn and x86 insn during JIT.
+
+The input context pointer for invoking the interpreter function is generic,
+its content is defined by a specific use case. For seccomp register R1 points
+to seccomp_data, for converted BPF filters R1 points to a skb.
+
+A program, that is translated internally consists of the following elements::
+
+  op:16, jt:8, jf:8, k:32    ==>    op:8, dst_reg:4, src_reg:4, off:16, imm:32
+
+So far 87 internal BPF instructions were implemented. 8-bit 'op' opcode field
+has room for new instructions. Some of them may use 16/24/32 byte encoding. New
+instructions must be multiple of 8 bytes to preserve backward compatibility.
+
+Internal BPF is a general purpose RISC instruction set. Not every register and
+every instruction are used during translation from original BPF to new format.
+For example, socket filters are not using ``exclusive add`` instruction, but
+tracing filters may do to maintain counters of events, for example. Register R9
+is not used by socket filters either, but more complex filters may be running
+out of registers and would have to resort to spill/fill to stack.
+
+Internal BPF can be used as a generic assembler for last step performance
+optimizations, socket filters and seccomp are using it as assembler. Tracing
+filters may use it as assembler to generate code from kernel. In kernel usage
+may not be bounded by security considerations, since generated internal BPF code
+may be optimizing internal code path and not being exposed to the user space.
+Safety of internal BPF can come from a verifier (TBD). In such use cases as
+described, it may be used as safe instruction set.
+
+Just like the original BPF, the new format runs within a controlled environment,
+is deterministic and the kernel can easily prove that. The safety of the program
+can be determined in two steps: first step does depth-first-search to disallow
+loops and other CFG validation; second step starts from the first insn and
+descends all possible paths. It simulates execution of every insn and observes
+the state change of registers and stack.
+
+eBPF opcode encoding
+--------------------
+
+eBPF is reusing most of the opcode encoding from classic to simplify conversion
+of classic BPF to eBPF. For arithmetic and jump instructions the 8-bit 'code'
+field is divided into three parts::
+
+  +----------------+--------+--------------------+
+  |   4 bits       |  1 bit |   3 bits           |
+  | operation code | source | instruction class  |
+  +----------------+--------+--------------------+
+  (MSB)                                      (LSB)
+
+Three LSB bits store instruction class which is one of:
+
+  ===================     ===============
+  Classic BPF classes     eBPF classes
+  ===================     ===============
+  BPF_LD    0x00          BPF_LD    0x00
+  BPF_LDX   0x01          BPF_LDX   0x01
+  BPF_ST    0x02          BPF_ST    0x02
+  BPF_STX   0x03          BPF_STX   0x03
+  BPF_ALU   0x04          BPF_ALU   0x04
+  BPF_JMP   0x05          BPF_JMP   0x05
+  BPF_RET   0x06          BPF_JMP32 0x06
+  BPF_MISC  0x07          BPF_ALU64 0x07
+  ===================     ===============
+
+When BPF_CLASS(code) == BPF_ALU or BPF_JMP, 4th bit encodes source operand ...
+
+    ::
+
+	BPF_K     0x00
+	BPF_X     0x08
+
+ * in classic BPF, this means::
+
+	BPF_SRC(code) == BPF_X - use register X as source operand
+	BPF_SRC(code) == BPF_K - use 32-bit immediate as source operand
+
+ * in eBPF, this means::
+
+	BPF_SRC(code) == BPF_X - use 'src_reg' register as source operand
+	BPF_SRC(code) == BPF_K - use 32-bit immediate as source operand
+
+... and four MSB bits store operation code.
+
+If BPF_CLASS(code) == BPF_ALU or BPF_ALU64 [ in eBPF ], BPF_OP(code) is one of::
+
+  BPF_ADD   0x00
+  BPF_SUB   0x10
+  BPF_MUL   0x20
+  BPF_DIV   0x30
+  BPF_OR    0x40
+  BPF_AND   0x50
+  BPF_LSH   0x60
+  BPF_RSH   0x70
+  BPF_NEG   0x80
+  BPF_MOD   0x90
+  BPF_XOR   0xa0
+  BPF_MOV   0xb0  /* eBPF only: mov reg to reg */
+  BPF_ARSH  0xc0  /* eBPF only: sign extending shift right */
+  BPF_END   0xd0  /* eBPF only: endianness conversion */
+
+If BPF_CLASS(code) == BPF_JMP or BPF_JMP32 [ in eBPF ], BPF_OP(code) is one of::
+
+  BPF_JA    0x00  /* BPF_JMP only */
+  BPF_JEQ   0x10
+  BPF_JGT   0x20
+  BPF_JGE   0x30
+  BPF_JSET  0x40
+  BPF_JNE   0x50  /* eBPF only: jump != */
+  BPF_JSGT  0x60  /* eBPF only: signed '>' */
+  BPF_JSGE  0x70  /* eBPF only: signed '>=' */
+  BPF_CALL  0x80  /* eBPF BPF_JMP only: function call */
+  BPF_EXIT  0x90  /* eBPF BPF_JMP only: function return */
+  BPF_JLT   0xa0  /* eBPF only: unsigned '<' */
+  BPF_JLE   0xb0  /* eBPF only: unsigned '<=' */
+  BPF_JSLT  0xc0  /* eBPF only: signed '<' */
+  BPF_JSLE  0xd0  /* eBPF only: signed '<=' */
+
+So BPF_ADD | BPF_X | BPF_ALU means 32-bit addition in both classic BPF
+and eBPF. There are only two registers in classic BPF, so it means A += X.
+In eBPF it means dst_reg = (u32) dst_reg + (u32) src_reg; similarly,
+BPF_XOR | BPF_K | BPF_ALU means A ^= imm32 in classic BPF and analogous
+src_reg = (u32) src_reg ^ (u32) imm32 in eBPF.
+
+Classic BPF is using BPF_MISC class to represent A = X and X = A moves.
+eBPF is using BPF_MOV | BPF_X | BPF_ALU code instead. Since there are no
+BPF_MISC operations in eBPF, the class 7 is used as BPF_ALU64 to mean
+exactly the same operations as BPF_ALU, but with 64-bit wide operands
+instead. So BPF_ADD | BPF_X | BPF_ALU64 means 64-bit addition, i.e.:
+dst_reg = dst_reg + src_reg
+
+Classic BPF wastes the whole BPF_RET class to represent a single ``ret``
+operation. Classic BPF_RET | BPF_K means copy imm32 into return register
+and perform function exit. eBPF is modeled to match CPU, so BPF_JMP | BPF_EXIT
+in eBPF means function exit only. The eBPF program needs to store return
+value into register R0 before doing a BPF_EXIT. Class 6 in eBPF is used as
+BPF_JMP32 to mean exactly the same operations as BPF_JMP, but with 32-bit wide
+operands for the comparisons instead.
+
+For load and store instructions the 8-bit 'code' field is divided as::
+
+  +--------+--------+-------------------+
+  | 3 bits | 2 bits |   3 bits          |
+  |  mode  |  size  | instruction class |
+  +--------+--------+-------------------+
+  (MSB)                             (LSB)
+
+Size modifier is one of ...
+
+::
+
+  BPF_W   0x00    /* word */
+  BPF_H   0x08    /* half word */
+  BPF_B   0x10    /* byte */
+  BPF_DW  0x18    /* eBPF only, double word */
+
+... which encodes size of load/store operation::
+
+ B  - 1 byte
+ H  - 2 byte
+ W  - 4 byte
+ DW - 8 byte (eBPF only)
+
+Mode modifier is one of::
+
+  BPF_IMM  0x00  /* used for 32-bit mov in classic BPF and 64-bit in eBPF */
+  BPF_ABS  0x20
+  BPF_IND  0x40
+  BPF_MEM  0x60
+  BPF_LEN  0x80  /* classic BPF only, reserved in eBPF */
+  BPF_MSH  0xa0  /* classic BPF only, reserved in eBPF */
+  BPF_XADD 0xc0  /* eBPF only, exclusive add */
+
+eBPF has two non-generic instructions: (BPF_ABS | <size> | BPF_LD) and
+(BPF_IND | <size> | BPF_LD) which are used to access packet data.
+
+They had to be carried over from classic to have strong performance of
+socket filters running in eBPF interpreter. These instructions can only
+be used when interpreter context is a pointer to ``struct sk_buff`` and
+have seven implicit operands. Register R6 is an implicit input that must
+contain pointer to sk_buff. Register R0 is an implicit output which contains
+the data fetched from the packet. Registers R1-R5 are scratch registers
+and must not be used to store the data across BPF_ABS | BPF_LD or
+BPF_IND | BPF_LD instructions.
+
+These instructions have implicit program exit condition as well. When
+eBPF program is trying to access the data beyond the packet boundary,
+the interpreter will abort the execution of the program. JIT compilers
+therefore must preserve this property. src_reg and imm32 fields are
+explicit inputs to these instructions.
+
+For example::
+
+  BPF_IND | BPF_W | BPF_LD means:
+
+    R0 = ntohl(*(u32 *) (((struct sk_buff *) R6)->data + src_reg + imm32))
+    and R1 - R5 were scratched.
+
+Unlike classic BPF instruction set, eBPF has generic load/store operations::
+
+    BPF_MEM | <size> | BPF_STX:  *(size *) (dst_reg + off) = src_reg
+    BPF_MEM | <size> | BPF_ST:   *(size *) (dst_reg + off) = imm32
+    BPF_MEM | <size> | BPF_LDX:  dst_reg = *(size *) (src_reg + off)
+    BPF_XADD | BPF_W  | BPF_STX: lock xadd *(u32 *)(dst_reg + off16) += src_reg
+    BPF_XADD | BPF_DW | BPF_STX: lock xadd *(u64 *)(dst_reg + off16) += src_reg
+
+Where size is one of: BPF_B or BPF_H or BPF_W or BPF_DW. Note that 1 and
+2 byte atomic increments are not supported.
+
+eBPF has one 16-byte instruction: BPF_LD | BPF_DW | BPF_IMM which consists
+of two consecutive ``struct bpf_insn`` 8-byte blocks and interpreted as single
+instruction that loads 64-bit immediate value into a dst_reg.
+Classic BPF has similar instruction: BPF_LD | BPF_W | BPF_IMM which loads
+32-bit immediate value into a register.
+
+eBPF verifier
+-------------
+The safety of the eBPF program is determined in two steps.
+
+First step does DAG check to disallow loops and other CFG validation.
+In particular it will detect programs that have unreachable instructions.
+(though classic BPF checker allows them)
+
+Second step starts from the first insn and descends all possible paths.
+It simulates execution of every insn and observes the state change of
+registers and stack.
+
+At the start of the program the register R1 contains a pointer to context
+and has type PTR_TO_CTX.
+If verifier sees an insn that does R2=R1, then R2 has now type
+PTR_TO_CTX as well and can be used on the right hand side of expression.
+If R1=PTR_TO_CTX and insn is R2=R1+R1, then R2=SCALAR_VALUE,
+since addition of two valid pointers makes invalid pointer.
+(In 'secure' mode verifier will reject any type of pointer arithmetic to make
+sure that kernel addresses don't leak to unprivileged users)
+
+If register was never written to, it's not readable::
+
+  bpf_mov R0 = R2
+  bpf_exit
+
+will be rejected, since R2 is unreadable at the start of the program.
+
+After kernel function call, R1-R5 are reset to unreadable and
+R0 has a return type of the function.
+
+Since R6-R9 are callee saved, their state is preserved across the call.
+
+::
+
+  bpf_mov R6 = 1
+  bpf_call foo
+  bpf_mov R0 = R6
+  bpf_exit
+
+is a correct program. If there was R1 instead of R6, it would have
+been rejected.
+
+load/store instructions are allowed only with registers of valid types, which
+are PTR_TO_CTX, PTR_TO_MAP, PTR_TO_STACK. They are bounds and alignment checked.
+For example::
+
+ bpf_mov R1 = 1
+ bpf_mov R2 = 2
+ bpf_xadd *(u32 *)(R1 + 3) += R2
+ bpf_exit
+
+will be rejected, since R1 doesn't have a valid pointer type at the time of
+execution of instruction bpf_xadd.
+
+At the start R1 type is PTR_TO_CTX (a pointer to generic ``struct bpf_context``)
+A callback is used to customize verifier to restrict eBPF program access to only
+certain fields within ctx structure with specified size and alignment.
+
+For example, the following insn::
+
+  bpf_ld R0 = *(u32 *)(R6 + 8)
+
+intends to load a word from address R6 + 8 and store it into R0
+If R6=PTR_TO_CTX, via is_valid_access() callback the verifier will know
+that offset 8 of size 4 bytes can be accessed for reading, otherwise
+the verifier will reject the program.
+If R6=PTR_TO_STACK, then access should be aligned and be within
+stack bounds, which are [-MAX_BPF_STACK, 0). In this example offset is 8,
+so it will fail verification, since it's out of bounds.
+
+The verifier will allow eBPF program to read data from stack only after
+it wrote into it.
+
+Classic BPF verifier does similar check with M[0-15] memory slots.
+For example::
+
+  bpf_ld R0 = *(u32 *)(R10 - 4)
+  bpf_exit
+
+is invalid program.
+Though R10 is correct read-only register and has type PTR_TO_STACK
+and R10 - 4 is within stack bounds, there were no stores into that location.
+
+Pointer register spill/fill is tracked as well, since four (R6-R9)
+callee saved registers may not be enough for some programs.
+
+Allowed function calls are customized with bpf_verifier_ops->get_func_proto()
+The eBPF verifier will check that registers match argument constraints.
+After the call register R0 will be set to return type of the function.
+
+Function calls is a main mechanism to extend functionality of eBPF programs.
+Socket filters may let programs to call one set of functions, whereas tracing
+filters may allow completely different set.
+
+If a function made accessible to eBPF program, it needs to be thought through
+from safety point of view. The verifier will guarantee that the function is
+called with valid arguments.
+
+seccomp vs socket filters have different security restrictions for classic BPF.
+Seccomp solves this by two stage verifier: classic BPF verifier is followed
+by seccomp verifier. In case of eBPF one configurable verifier is shared for
+all use cases.
+
+See details of eBPF verifier in kernel/bpf/verifier.c
+
+Register value tracking
+-----------------------
+In order to determine the safety of an eBPF program, the verifier must track
+the range of possible values in each register and also in each stack slot.
+This is done with ``struct bpf_reg_state``, defined in include/linux/
+bpf_verifier.h, which unifies tracking of scalar and pointer values.  Each
+register state has a type, which is either NOT_INIT (the register has not been
+written to), SCALAR_VALUE (some value which is not usable as a pointer), or a
+pointer type.  The types of pointers describe their base, as follows:
+
+
+    PTR_TO_CTX
+			Pointer to bpf_context.
+    CONST_PTR_TO_MAP
+			Pointer to struct bpf_map.  "Const" because arithmetic
+			on these pointers is forbidden.
+    PTR_TO_MAP_VALUE
+			Pointer to the value stored in a map element.
+    PTR_TO_MAP_VALUE_OR_NULL
+			Either a pointer to a map value, or NULL; map accesses
+			(see section 'eBPF maps', below) return this type,
+			which becomes a PTR_TO_MAP_VALUE when checked != NULL.
+			Arithmetic on these pointers is forbidden.
+    PTR_TO_STACK
+			Frame pointer.
+    PTR_TO_PACKET
+			skb->data.
+    PTR_TO_PACKET_END
+			skb->data + headlen; arithmetic forbidden.
+    PTR_TO_SOCKET
+			Pointer to struct bpf_sock_ops, implicitly refcounted.
+    PTR_TO_SOCKET_OR_NULL
+			Either a pointer to a socket, or NULL; socket lookup
+			returns this type, which becomes a PTR_TO_SOCKET when
+			checked != NULL. PTR_TO_SOCKET is reference-counted,
+			so programs must release the reference through the
+			socket release function before the end of the program.
+			Arithmetic on these pointers is forbidden.
+
+However, a pointer may be offset from this base (as a result of pointer
+arithmetic), and this is tracked in two parts: the 'fixed offset' and 'variable
+offset'.  The former is used when an exactly-known value (e.g. an immediate
+operand) is added to a pointer, while the latter is used for values which are
+not exactly known.  The variable offset is also used in SCALAR_VALUEs, to track
+the range of possible values in the register.
+
+The verifier's knowledge about the variable offset consists of:
+
+* minimum and maximum values as unsigned
+* minimum and maximum values as signed
+
+* knowledge of the values of individual bits, in the form of a 'tnum': a u64
+  'mask' and a u64 'value'.  1s in the mask represent bits whose value is unknown;
+  1s in the value represent bits known to be 1.  Bits known to be 0 have 0 in both
+  mask and value; no bit should ever be 1 in both.  For example, if a byte is read
+  into a register from memory, the register's top 56 bits are known zero, while
+  the low 8 are unknown - which is represented as the tnum (0x0; 0xff).  If we
+  then OR this with 0x40, we get (0x40; 0xbf), then if we add 1 we get (0x0;
+  0x1ff), because of potential carries.
+
+Besides arithmetic, the register state can also be updated by conditional
+branches.  For instance, if a SCALAR_VALUE is compared > 8, in the 'true' branch
+it will have a umin_value (unsigned minimum value) of 9, whereas in the 'false'
+branch it will have a umax_value of 8.  A signed compare (with BPF_JSGT or
+BPF_JSGE) would instead update the signed minimum/maximum values.  Information
+from the signed and unsigned bounds can be combined; for instance if a value is
+first tested < 8 and then tested s> 4, the verifier will conclude that the value
+is also > 4 and s< 8, since the bounds prevent crossing the sign boundary.
+
+PTR_TO_PACKETs with a variable offset part have an 'id', which is common to all
+pointers sharing that same variable offset.  This is important for packet range
+checks: after adding a variable to a packet pointer register A, if you then copy
+it to another register B and then add a constant 4 to A, both registers will
+share the same 'id' but the A will have a fixed offset of +4.  Then if A is
+bounds-checked and found to be less than a PTR_TO_PACKET_END, the register B is
+now known to have a safe range of at least 4 bytes.  See 'Direct packet access',
+below, for more on PTR_TO_PACKET ranges.
+
+The 'id' field is also used on PTR_TO_MAP_VALUE_OR_NULL, common to all copies of
+the pointer returned from a map lookup.  This means that when one copy is
+checked and found to be non-NULL, all copies can become PTR_TO_MAP_VALUEs.
+As well as range-checking, the tracked information is also used for enforcing
+alignment of pointer accesses.  For instance, on most systems the packet pointer
+is 2 bytes after a 4-byte alignment.  If a program adds 14 bytes to that to jump
+over the Ethernet header, then reads IHL and addes (IHL * 4), the resulting
+pointer will have a variable offset known to be 4n+2 for some n, so adding the 2
+bytes (NET_IP_ALIGN) gives a 4-byte alignment and so word-sized accesses through
+that pointer are safe.
+The 'id' field is also used on PTR_TO_SOCKET and PTR_TO_SOCKET_OR_NULL, common
+to all copies of the pointer returned from a socket lookup. This has similar
+behaviour to the handling for PTR_TO_MAP_VALUE_OR_NULL->PTR_TO_MAP_VALUE, but
+it also handles reference tracking for the pointer. PTR_TO_SOCKET implicitly
+represents a reference to the corresponding ``struct sock``. To ensure that the
+reference is not leaked, it is imperative to NULL-check the reference and in
+the non-NULL case, and pass the valid reference to the socket release function.
+
+Direct packet access
+--------------------
+In cls_bpf and act_bpf programs the verifier allows direct access to the packet
+data via skb->data and skb->data_end pointers.
+Ex::
+
+    1:  r4 = *(u32 *)(r1 +80)  /* load skb->data_end */
+    2:  r3 = *(u32 *)(r1 +76)  /* load skb->data */
+    3:  r5 = r3
+    4:  r5 += 14
+    5:  if r5 > r4 goto pc+16
+    R1=ctx R3=pkt(id=0,off=0,r=14) R4=pkt_end R5=pkt(id=0,off=14,r=14) R10=fp
+    6:  r0 = *(u16 *)(r3 +12) /* access 12 and 13 bytes of the packet */
+
+this 2byte load from the packet is safe to do, since the program author
+did check ``if (skb->data + 14 > skb->data_end) goto err`` at insn #5 which
+means that in the fall-through case the register R3 (which points to skb->data)
+has at least 14 directly accessible bytes. The verifier marks it
+as R3=pkt(id=0,off=0,r=14).
+id=0 means that no additional variables were added to the register.
+off=0 means that no additional constants were added.
+r=14 is the range of safe access which means that bytes [R3, R3 + 14) are ok.
+Note that R5 is marked as R5=pkt(id=0,off=14,r=14). It also points
+to the packet data, but constant 14 was added to the register, so
+it now points to ``skb->data + 14`` and accessible range is [R5, R5 + 14 - 14)
+which is zero bytes.
+
+More complex packet access may look like::
+
+
+    R0=inv1 R1=ctx R3=pkt(id=0,off=0,r=14) R4=pkt_end R5=pkt(id=0,off=14,r=14) R10=fp
+    6:  r0 = *(u8 *)(r3 +7) /* load 7th byte from the packet */
+    7:  r4 = *(u8 *)(r3 +12)
+    8:  r4 *= 14
+    9:  r3 = *(u32 *)(r1 +76) /* load skb->data */
+    10:  r3 += r4
+    11:  r2 = r1
+    12:  r2 <<= 48
+    13:  r2 >>= 48
+    14:  r3 += r2
+    15:  r2 = r3
+    16:  r2 += 8
+    17:  r1 = *(u32 *)(r1 +80) /* load skb->data_end */
+    18:  if r2 > r1 goto pc+2
+    R0=inv(id=0,umax_value=255,var_off=(0x0; 0xff)) R1=pkt_end R2=pkt(id=2,off=8,r=8) R3=pkt(id=2,off=0,r=8) R4=inv(id=0,umax_value=3570,var_off=(0x0; 0xfffe)) R5=pkt(id=0,off=14,r=14) R10=fp
+    19:  r1 = *(u8 *)(r3 +4)
+
+The state of the register R3 is R3=pkt(id=2,off=0,r=8)
+id=2 means that two ``r3 += rX`` instructions were seen, so r3 points to some
+offset within a packet and since the program author did
+``if (r3 + 8 > r1) goto err`` at insn #18, the safe range is [R3, R3 + 8).
+The verifier only allows 'add'/'sub' operations on packet registers. Any other
+operation will set the register state to 'SCALAR_VALUE' and it won't be
+available for direct packet access.
+
+Operation ``r3 += rX`` may overflow and become less than original skb->data,
+therefore the verifier has to prevent that.  So when it sees ``r3 += rX``
+instruction and rX is more than 16-bit value, any subsequent bounds-check of r3
+against skb->data_end will not give us 'range' information, so attempts to read
+through the pointer will give "invalid access to packet" error.
+
+Ex. after insn ``r4 = *(u8 *)(r3 +12)`` (insn #7 above) the state of r4 is
+R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff)) which means that upper 56 bits
+of the register are guaranteed to be zero, and nothing is known about the lower
+8 bits. After insn ``r4 *= 14`` the state becomes
+R4=inv(id=0,umax_value=3570,var_off=(0x0; 0xfffe)), since multiplying an 8-bit
+value by constant 14 will keep upper 52 bits as zero, also the least significant
+bit will be zero as 14 is even.  Similarly ``r2 >>= 48`` will make
+R2=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff)), since the shift is not sign
+extending.  This logic is implemented in adjust_reg_min_max_vals() function,
+which calls adjust_ptr_min_max_vals() for adding pointer to scalar (or vice
+versa) and adjust_scalar_min_max_vals() for operations on two scalars.
+
+The end result is that bpf program author can access packet directly
+using normal C code as::
+
+  void *data = (void *)(long)skb->data;
+  void *data_end = (void *)(long)skb->data_end;
+  struct eth_hdr *eth = data;
+  struct iphdr *iph = data + sizeof(*eth);
+  struct udphdr *udp = data + sizeof(*eth) + sizeof(*iph);
+
+  if (data + sizeof(*eth) + sizeof(*iph) + sizeof(*udp) > data_end)
+	  return 0;
+  if (eth->h_proto != htons(ETH_P_IP))
+	  return 0;
+  if (iph->protocol != IPPROTO_UDP || iph->ihl != 5)
+	  return 0;
+  if (udp->dest == 53 || udp->source == 9)
+	  ...;
+
+which makes such programs easier to write comparing to LD_ABS insn
+and significantly faster.
+
+eBPF maps
+---------
+'maps' is a generic storage of different types for sharing data between kernel
+and userspace.
+
+The maps are accessed from user space via BPF syscall, which has commands:
+
+- create a map with given type and attributes
+  ``map_fd = bpf(BPF_MAP_CREATE, union bpf_attr *attr, u32 size)``
+  using attr->map_type, attr->key_size, attr->value_size, attr->max_entries
+  returns process-local file descriptor or negative error
+
+- lookup key in a given map
+  ``err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size)``
+  using attr->map_fd, attr->key, attr->value
+  returns zero and stores found elem into value or negative error
+
+- create or update key/value pair in a given map
+  ``err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size)``
+  using attr->map_fd, attr->key, attr->value
+  returns zero or negative error
+
+- find and delete element by key in a given map
+  ``err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size)``
+  using attr->map_fd, attr->key
+
+- to delete map: close(fd)
+  Exiting process will delete maps automatically
+
+userspace programs use this syscall to create/access maps that eBPF programs
+are concurrently updating.
+
+maps can have different types: hash, array, bloom filter, radix-tree, etc.
+
+The map is defined by:
+
+  - type
+  - max number of elements
+  - key size in bytes
+  - value size in bytes
+
+Pruning
+-------
+The verifier does not actually walk all possible paths through the program.  For
+each new branch to analyse, the verifier looks at all the states it's previously
+been in when at this instruction.  If any of them contain the current state as a
+subset, the branch is 'pruned' - that is, the fact that the previous state was
+accepted implies the current state would be as well.  For instance, if in the
+previous state, r1 held a packet-pointer, and in the current state, r1 holds a
+packet-pointer with a range as long or longer and at least as strict an
+alignment, then r1 is safe.  Similarly, if r2 was NOT_INIT before then it can't
+have been used by any path from that point, so any value in r2 (including
+another NOT_INIT) is safe.  The implementation is in the function regsafe().
+Pruning considers not only the registers but also the stack (and any spilled
+registers it may hold).  They must all be safe for the branch to be pruned.
+This is implemented in states_equal().
+
+Understanding eBPF verifier messages
+------------------------------------
+
+The following are few examples of invalid eBPF programs and verifier error
+messages as seen in the log:
+
+Program with unreachable instructions::
+
+  static struct bpf_insn prog[] = {
+  BPF_EXIT_INSN(),
+  BPF_EXIT_INSN(),
+  };
+
+Error:
+
+  unreachable insn 1
+
+Program that reads uninitialized register::
+
+  BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
+  BPF_EXIT_INSN(),
+
+Error::
+
+  0: (bf) r0 = r2
+  R2 !read_ok
+
+Program that doesn't initialize R0 before exiting::
+
+  BPF_MOV64_REG(BPF_REG_2, BPF_REG_1),
+  BPF_EXIT_INSN(),
+
+Error::
+
+  0: (bf) r2 = r1
+  1: (95) exit
+  R0 !read_ok
+
+Program that accesses stack out of bounds::
+
+    BPF_ST_MEM(BPF_DW, BPF_REG_10, 8, 0),
+    BPF_EXIT_INSN(),
+
+Error::
+
+    0: (7a) *(u64 *)(r10 +8) = 0
+    invalid stack off=8 size=8
+
+Program that doesn't initialize stack before passing its address into function::
+
+  BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+  BPF_LD_MAP_FD(BPF_REG_1, 0),
+  BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+  BPF_EXIT_INSN(),
+
+Error::
+
+  0: (bf) r2 = r10
+  1: (07) r2 += -8
+  2: (b7) r1 = 0x0
+  3: (85) call 1
+  invalid indirect read from stack off -8+0 size 8
+
+Program that uses invalid map_fd=0 while calling to map_lookup_elem() function::
+
+  BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+  BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+  BPF_LD_MAP_FD(BPF_REG_1, 0),
+  BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+  BPF_EXIT_INSN(),
+
+Error::
+
+  0: (7a) *(u64 *)(r10 -8) = 0
+  1: (bf) r2 = r10
+  2: (07) r2 += -8
+  3: (b7) r1 = 0x0
+  4: (85) call 1
+  fd 0 is not pointing to valid bpf_map
+
+Program that doesn't check return value of map_lookup_elem() before accessing
+map element::
+
+  BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+  BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+  BPF_LD_MAP_FD(BPF_REG_1, 0),
+  BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+  BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+  BPF_EXIT_INSN(),
+
+Error::
+
+  0: (7a) *(u64 *)(r10 -8) = 0
+  1: (bf) r2 = r10
+  2: (07) r2 += -8
+  3: (b7) r1 = 0x0
+  4: (85) call 1
+  5: (7a) *(u64 *)(r0 +0) = 0
+  R0 invalid mem access 'map_value_or_null'
+
+Program that correctly checks map_lookup_elem() returned value for NULL, but
+accesses the memory with incorrect alignment::
+
+  BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+  BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+  BPF_LD_MAP_FD(BPF_REG_1, 0),
+  BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+  BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
+  BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
+  BPF_EXIT_INSN(),
+
+Error::
+
+  0: (7a) *(u64 *)(r10 -8) = 0
+  1: (bf) r2 = r10
+  2: (07) r2 += -8
+  3: (b7) r1 = 1
+  4: (85) call 1
+  5: (15) if r0 == 0x0 goto pc+1
+   R0=map_ptr R10=fp
+  6: (7a) *(u64 *)(r0 +4) = 0
+  misaligned access off 4 size 8
+
+Program that correctly checks map_lookup_elem() returned value for NULL and
+accesses memory with correct alignment in one side of 'if' branch, but fails
+to do so in the other side of 'if' branch::
+
+  BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
+  BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+  BPF_LD_MAP_FD(BPF_REG_1, 0),
+  BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+  BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
+  BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
+  BPF_EXIT_INSN(),
+  BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 1),
+  BPF_EXIT_INSN(),
+
+Error::
+
+  0: (7a) *(u64 *)(r10 -8) = 0
+  1: (bf) r2 = r10
+  2: (07) r2 += -8
+  3: (b7) r1 = 1
+  4: (85) call 1
+  5: (15) if r0 == 0x0 goto pc+2
+   R0=map_ptr R10=fp
+  6: (7a) *(u64 *)(r0 +0) = 0
+  7: (95) exit
+
+  from 5 to 8: R0=imm0 R10=fp
+  8: (7a) *(u64 *)(r0 +0) = 1
+  R0 invalid mem access 'imm'
+
+Program that performs a socket lookup then sets the pointer to NULL without
+checking it::
+
+  BPF_MOV64_IMM(BPF_REG_2, 0),
+  BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -8),
+  BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+  BPF_MOV64_IMM(BPF_REG_3, 4),
+  BPF_MOV64_IMM(BPF_REG_4, 0),
+  BPF_MOV64_IMM(BPF_REG_5, 0),
+  BPF_EMIT_CALL(BPF_FUNC_sk_lookup_tcp),
+  BPF_MOV64_IMM(BPF_REG_0, 0),
+  BPF_EXIT_INSN(),
+
+Error::
+
+  0: (b7) r2 = 0
+  1: (63) *(u32 *)(r10 -8) = r2
+  2: (bf) r2 = r10
+  3: (07) r2 += -8
+  4: (b7) r3 = 4
+  5: (b7) r4 = 0
+  6: (b7) r5 = 0
+  7: (85) call bpf_sk_lookup_tcp#65
+  8: (b7) r0 = 0
+  9: (95) exit
+  Unreleased reference id=1, alloc_insn=7
+
+Program that performs a socket lookup but does not NULL-check the returned
+value::
+
+  BPF_MOV64_IMM(BPF_REG_2, 0),
+  BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -8),
+  BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+  BPF_MOV64_IMM(BPF_REG_3, 4),
+  BPF_MOV64_IMM(BPF_REG_4, 0),
+  BPF_MOV64_IMM(BPF_REG_5, 0),
+  BPF_EMIT_CALL(BPF_FUNC_sk_lookup_tcp),
+  BPF_EXIT_INSN(),
+
+Error::
+
+  0: (b7) r2 = 0
+  1: (63) *(u32 *)(r10 -8) = r2
+  2: (bf) r2 = r10
+  3: (07) r2 += -8
+  4: (b7) r3 = 4
+  5: (b7) r4 = 0
+  6: (b7) r5 = 0
+  7: (85) call bpf_sk_lookup_tcp#65
+  8: (95) exit
+  Unreleased reference id=1, alloc_insn=7
+
+Testing
+-------
+
+Next to the BPF toolchain, the kernel also ships a test module that contains
+various test cases for classic and internal BPF that can be executed against
+the BPF interpreter and JIT compiler. It can be found in lib/test_bpf.c and
+enabled via Kconfig::
+
+  CONFIG_TEST_BPF=m
+
+After the module has been built and installed, the test suite can be executed
+via insmod or modprobe against 'test_bpf' module. Results of the test cases
+including timings in nsec can be found in the kernel log (dmesg).
+
+Misc
+----
+
+Also trinity, the Linux syscall fuzzer, has built-in support for BPF and
+SECCOMP-BPF kernel fuzzing.
+
+Written by
+----------
+
+The document was written in the hope that it is found useful and in order
+to give potential BPF hackers or security auditors a better overview of
+the underlying architecture.
+
+- Jay Schulist <jschlst@samba.org>
+- Daniel Borkmann <daniel@iogearbox.net>
+- Alexei Starovoitov <ast@kernel.org>
diff --git a/Documentation/networking/filter.txt b/Documentation/networking/filter.txt
deleted file mode 100644
index 2f0f8b17dade..000000000000
--- a/Documentation/networking/filter.txt
+++ /dev/null
@@ -1,1545 +0,0 @@
-Linux Socket Filtering aka Berkeley Packet Filter (BPF)
-=======================================================
-
-Introduction
-------------
-
-Linux Socket Filtering (LSF) is derived from the Berkeley Packet Filter.
-Though there are some distinct differences between the BSD and Linux
-Kernel filtering, but when we speak of BPF or LSF in Linux context, we
-mean the very same mechanism of filtering in the Linux kernel.
-
-BPF allows a user-space program to attach a filter onto any socket and
-allow or disallow certain types of data to come through the socket. LSF
-follows exactly the same filter code structure as BSD's BPF, so referring
-to the BSD bpf.4 manpage is very helpful in creating filters.
-
-On Linux, BPF is much simpler than on BSD. One does not have to worry
-about devices or anything like that. You simply create your filter code,
-send it to the kernel via the SO_ATTACH_FILTER option and if your filter
-code passes the kernel check on it, you then immediately begin filtering
-data on that socket.
-
-You can also detach filters from your socket via the SO_DETACH_FILTER
-option. This will probably not be used much since when you close a socket
-that has a filter on it the filter is automagically removed. The other
-less common case may be adding a different filter on the same socket where
-you had another filter that is still running: the kernel takes care of
-removing the old one and placing your new one in its place, assuming your
-filter has passed the checks, otherwise if it fails the old filter will
-remain on that socket.
-
-SO_LOCK_FILTER option allows to lock the filter attached to a socket. Once
-set, a filter cannot be removed or changed. This allows one process to
-setup a socket, attach a filter, lock it then drop privileges and be
-assured that the filter will be kept until the socket is closed.
-
-The biggest user of this construct might be libpcap. Issuing a high-level
-filter command like `tcpdump -i em1 port 22` passes through the libpcap
-internal compiler that generates a structure that can eventually be loaded
-via SO_ATTACH_FILTER to the kernel. `tcpdump -i em1 port 22 -ddd`
-displays what is being placed into this structure.
-
-Although we were only speaking about sockets here, BPF in Linux is used
-in many more places. There's xt_bpf for netfilter, cls_bpf in the kernel
-qdisc layer, SECCOMP-BPF (SECure COMPuting [1]), and lots of other places
-such as team driver, PTP code, etc where BPF is being used.
-
- [1] Documentation/userspace-api/seccomp_filter.rst
-
-Original BPF paper:
-
-Steven McCanne and Van Jacobson. 1993. The BSD packet filter: a new
-architecture for user-level packet capture. In Proceedings of the
-USENIX Winter 1993 Conference Proceedings on USENIX Winter 1993
-Conference Proceedings (USENIX'93). USENIX Association, Berkeley,
-CA, USA, 2-2. [http://www.tcpdump.org/papers/bpf-usenix93.pdf]
-
-Structure
----------
-
-User space applications include <linux/filter.h> which contains the
-following relevant structures:
-
-struct sock_filter {	/* Filter block */
-	__u16	code;   /* Actual filter code */
-	__u8	jt;	/* Jump true */
-	__u8	jf;	/* Jump false */
-	__u32	k;      /* Generic multiuse field */
-};
-
-Such a structure is assembled as an array of 4-tuples, that contains
-a code, jt, jf and k value. jt and jf are jump offsets and k a generic
-value to be used for a provided code.
-
-struct sock_fprog {			/* Required for SO_ATTACH_FILTER. */
-	unsigned short		   len;	/* Number of filter blocks */
-	struct sock_filter __user *filter;
-};
-
-For socket filtering, a pointer to this structure (as shown in
-follow-up example) is being passed to the kernel through setsockopt(2).
-
-Example
--------
-
-#include <sys/socket.h>
-#include <sys/types.h>
-#include <arpa/inet.h>
-#include <linux/if_ether.h>
-/* ... */
-
-/* From the example above: tcpdump -i em1 port 22 -dd */
-struct sock_filter code[] = {
-	{ 0x28,  0,  0, 0x0000000c },
-	{ 0x15,  0,  8, 0x000086dd },
-	{ 0x30,  0,  0, 0x00000014 },
-	{ 0x15,  2,  0, 0x00000084 },
-	{ 0x15,  1,  0, 0x00000006 },
-	{ 0x15,  0, 17, 0x00000011 },
-	{ 0x28,  0,  0, 0x00000036 },
-	{ 0x15, 14,  0, 0x00000016 },
-	{ 0x28,  0,  0, 0x00000038 },
-	{ 0x15, 12, 13, 0x00000016 },
-	{ 0x15,  0, 12, 0x00000800 },
-	{ 0x30,  0,  0, 0x00000017 },
-	{ 0x15,  2,  0, 0x00000084 },
-	{ 0x15,  1,  0, 0x00000006 },
-	{ 0x15,  0,  8, 0x00000011 },
-	{ 0x28,  0,  0, 0x00000014 },
-	{ 0x45,  6,  0, 0x00001fff },
-	{ 0xb1,  0,  0, 0x0000000e },
-	{ 0x48,  0,  0, 0x0000000e },
-	{ 0x15,  2,  0, 0x00000016 },
-	{ 0x48,  0,  0, 0x00000010 },
-	{ 0x15,  0,  1, 0x00000016 },
-	{ 0x06,  0,  0, 0x0000ffff },
-	{ 0x06,  0,  0, 0x00000000 },
-};
-
-struct sock_fprog bpf = {
-	.len = ARRAY_SIZE(code),
-	.filter = code,
-};
-
-sock = socket(PF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
-if (sock < 0)
-	/* ... bail out ... */
-
-ret = setsockopt(sock, SOL_SOCKET, SO_ATTACH_FILTER, &bpf, sizeof(bpf));
-if (ret < 0)
-	/* ... bail out ... */
-
-/* ... */
-close(sock);
-
-The above example code attaches a socket filter for a PF_PACKET socket
-in order to let all IPv4/IPv6 packets with port 22 pass. The rest will
-be dropped for this socket.
-
-The setsockopt(2) call to SO_DETACH_FILTER doesn't need any arguments
-and SO_LOCK_FILTER for preventing the filter to be detached, takes an
-integer value with 0 or 1.
-
-Note that socket filters are not restricted to PF_PACKET sockets only,
-but can also be used on other socket families.
-
-Summary of system calls:
-
- * setsockopt(sockfd, SOL_SOCKET, SO_ATTACH_FILTER, &val, sizeof(val));
- * setsockopt(sockfd, SOL_SOCKET, SO_DETACH_FILTER, &val, sizeof(val));
- * setsockopt(sockfd, SOL_SOCKET, SO_LOCK_FILTER,   &val, sizeof(val));
-
-Normally, most use cases for socket filtering on packet sockets will be
-covered by libpcap in high-level syntax, so as an application developer
-you should stick to that. libpcap wraps its own layer around all that.
-
-Unless i) using/linking to libpcap is not an option, ii) the required BPF
-filters use Linux extensions that are not supported by libpcap's compiler,
-iii) a filter might be more complex and not cleanly implementable with
-libpcap's compiler, or iv) particular filter codes should be optimized
-differently than libpcap's internal compiler does; then in such cases
-writing such a filter "by hand" can be of an alternative. For example,
-xt_bpf and cls_bpf users might have requirements that could result in
-more complex filter code, or one that cannot be expressed with libpcap
-(e.g. different return codes for various code paths). Moreover, BPF JIT
-implementors may wish to manually write test cases and thus need low-level
-access to BPF code as well.
-
-BPF engine and instruction set
-------------------------------
-
-Under tools/bpf/ there's a small helper tool called bpf_asm which can
-be used to write low-level filters for example scenarios mentioned in the
-previous section. Asm-like syntax mentioned here has been implemented in
-bpf_asm and will be used for further explanations (instead of dealing with
-less readable opcodes directly, principles are the same). The syntax is
-closely modelled after Steven McCanne's and Van Jacobson's BPF paper.
-
-The BPF architecture consists of the following basic elements:
-
-  Element          Description
-
-  A                32 bit wide accumulator
-  X                32 bit wide X register
-  M[]              16 x 32 bit wide misc registers aka "scratch memory
-                   store", addressable from 0 to 15
-
-A program, that is translated by bpf_asm into "opcodes" is an array that
-consists of the following elements (as already mentioned):
-
-  op:16, jt:8, jf:8, k:32
-
-The element op is a 16 bit wide opcode that has a particular instruction
-encoded. jt and jf are two 8 bit wide jump targets, one for condition
-"jump if true", the other one "jump if false". Eventually, element k
-contains a miscellaneous argument that can be interpreted in different
-ways depending on the given instruction in op.
-
-The instruction set consists of load, store, branch, alu, miscellaneous
-and return instructions that are also represented in bpf_asm syntax. This
-table lists all bpf_asm instructions available resp. what their underlying
-opcodes as defined in linux/filter.h stand for:
-
-  Instruction      Addressing mode      Description
-
-  ld               1, 2, 3, 4, 12       Load word into A
-  ldi              4                    Load word into A
-  ldh              1, 2                 Load half-word into A
-  ldb              1, 2                 Load byte into A
-  ldx              3, 4, 5, 12          Load word into X
-  ldxi             4                    Load word into X
-  ldxb             5                    Load byte into X
-
-  st               3                    Store A into M[]
-  stx              3                    Store X into M[]
-
-  jmp              6                    Jump to label
-  ja               6                    Jump to label
-  jeq              7, 8, 9, 10          Jump on A == <x>
-  jneq             9, 10                Jump on A != <x>
-  jne              9, 10                Jump on A != <x>
-  jlt              9, 10                Jump on A <  <x>
-  jle              9, 10                Jump on A <= <x>
-  jgt              7, 8, 9, 10          Jump on A >  <x>
-  jge              7, 8, 9, 10          Jump on A >= <x>
-  jset             7, 8, 9, 10          Jump on A &  <x>
-
-  add              0, 4                 A + <x>
-  sub              0, 4                 A - <x>
-  mul              0, 4                 A * <x>
-  div              0, 4                 A / <x>
-  mod              0, 4                 A % <x>
-  neg                                   !A
-  and              0, 4                 A & <x>
-  or               0, 4                 A | <x>
-  xor              0, 4                 A ^ <x>
-  lsh              0, 4                 A << <x>
-  rsh              0, 4                 A >> <x>
-
-  tax                                   Copy A into X
-  txa                                   Copy X into A
-
-  ret              4, 11                Return
-
-The next table shows addressing formats from the 2nd column:
-
-  Addressing mode  Syntax               Description
-
-   0               x/%x                 Register X
-   1               [k]                  BHW at byte offset k in the packet
-   2               [x + k]              BHW at the offset X + k in the packet
-   3               M[k]                 Word at offset k in M[]
-   4               #k                   Literal value stored in k
-   5               4*([k]&0xf)          Lower nibble * 4 at byte offset k in the packet
-   6               L                    Jump label L
-   7               #k,Lt,Lf             Jump to Lt if true, otherwise jump to Lf
-   8               x/%x,Lt,Lf           Jump to Lt if true, otherwise jump to Lf
-   9               #k,Lt                Jump to Lt if predicate is true
-  10               x/%x,Lt              Jump to Lt if predicate is true
-  11               a/%a                 Accumulator A
-  12               extension            BPF extension
-
-The Linux kernel also has a couple of BPF extensions that are used along
-with the class of load instructions by "overloading" the k argument with
-a negative offset + a particular extension offset. The result of such BPF
-extensions are loaded into A.
-
-Possible BPF extensions are shown in the following table:
-
-  Extension                             Description
-
-  len                                   skb->len
-  proto                                 skb->protocol
-  type                                  skb->pkt_type
-  poff                                  Payload start offset
-  ifidx                                 skb->dev->ifindex
-  nla                                   Netlink attribute of type X with offset A
-  nlan                                  Nested Netlink attribute of type X with offset A
-  mark                                  skb->mark
-  queue                                 skb->queue_mapping
-  hatype                                skb->dev->type
-  rxhash                                skb->hash
-  cpu                                   raw_smp_processor_id()
-  vlan_tci                              skb_vlan_tag_get(skb)
-  vlan_avail                            skb_vlan_tag_present(skb)
-  vlan_tpid                             skb->vlan_proto
-  rand                                  prandom_u32()
-
-These extensions can also be prefixed with '#'.
-Examples for low-level BPF:
-
-** ARP packets:
-
-  ldh [12]
-  jne #0x806, drop
-  ret #-1
-  drop: ret #0
-
-** IPv4 TCP packets:
-
-  ldh [12]
-  jne #0x800, drop
-  ldb [23]
-  jneq #6, drop
-  ret #-1
-  drop: ret #0
-
-** (Accelerated) VLAN w/ id 10:
-
-  ld vlan_tci
-  jneq #10, drop
-  ret #-1
-  drop: ret #0
-
-** icmp random packet sampling, 1 in 4
-  ldh [12]
-  jne #0x800, drop
-  ldb [23]
-  jneq #1, drop
-  # get a random uint32 number
-  ld rand
-  mod #4
-  jneq #1, drop
-  ret #-1
-  drop: ret #0
-
-** SECCOMP filter example:
-
-  ld [4]                  /* offsetof(struct seccomp_data, arch) */
-  jne #0xc000003e, bad    /* AUDIT_ARCH_X86_64 */
-  ld [0]                  /* offsetof(struct seccomp_data, nr) */
-  jeq #15, good           /* __NR_rt_sigreturn */
-  jeq #231, good          /* __NR_exit_group */
-  jeq #60, good           /* __NR_exit */
-  jeq #0, good            /* __NR_read */
-  jeq #1, good            /* __NR_write */
-  jeq #5, good            /* __NR_fstat */
-  jeq #9, good            /* __NR_mmap */
-  jeq #14, good           /* __NR_rt_sigprocmask */
-  jeq #13, good           /* __NR_rt_sigaction */
-  jeq #35, good           /* __NR_nanosleep */
-  bad: ret #0             /* SECCOMP_RET_KILL_THREAD */
-  good: ret #0x7fff0000   /* SECCOMP_RET_ALLOW */
-
-The above example code can be placed into a file (here called "foo"), and
-then be passed to the bpf_asm tool for generating opcodes, output that xt_bpf
-and cls_bpf understands and can directly be loaded with. Example with above
-ARP code:
-
-$ ./bpf_asm foo
-4,40 0 0 12,21 0 1 2054,6 0 0 4294967295,6 0 0 0,
-
-In copy and paste C-like output:
-
-$ ./bpf_asm -c foo
-{ 0x28,  0,  0, 0x0000000c },
-{ 0x15,  0,  1, 0x00000806 },
-{ 0x06,  0,  0, 0xffffffff },
-{ 0x06,  0,  0, 0000000000 },
-
-In particular, as usage with xt_bpf or cls_bpf can result in more complex BPF
-filters that might not be obvious at first, it's good to test filters before
-attaching to a live system. For that purpose, there's a small tool called
-bpf_dbg under tools/bpf/ in the kernel source directory. This debugger allows
-for testing BPF filters against given pcap files, single stepping through the
-BPF code on the pcap's packets and to do BPF machine register dumps.
-
-Starting bpf_dbg is trivial and just requires issuing:
-
-# ./bpf_dbg
-
-In case input and output do not equal stdin/stdout, bpf_dbg takes an
-alternative stdin source as a first argument, and an alternative stdout
-sink as a second one, e.g. `./bpf_dbg test_in.txt test_out.txt`.
-
-Other than that, a particular libreadline configuration can be set via
-file "~/.bpf_dbg_init" and the command history is stored in the file
-"~/.bpf_dbg_history".
-
-Interaction in bpf_dbg happens through a shell that also has auto-completion
-support (follow-up example commands starting with '>' denote bpf_dbg shell).
-The usual workflow would be to ...
-
-> load bpf 6,40 0 0 12,21 0 3 2048,48 0 0 23,21 0 1 1,6 0 0 65535,6 0 0 0
-  Loads a BPF filter from standard output of bpf_asm, or transformed via
-  e.g. `tcpdump -iem1 -ddd port 22 | tr '\n' ','`. Note that for JIT
-  debugging (next section), this command creates a temporary socket and
-  loads the BPF code into the kernel. Thus, this will also be useful for
-  JIT developers.
-
-> load pcap foo.pcap
-  Loads standard tcpdump pcap file.
-
-> run [<n>]
-bpf passes:1 fails:9
-  Runs through all packets from a pcap to account how many passes and fails
-  the filter will generate. A limit of packets to traverse can be given.
-
-> disassemble
-l0:	ldh [12]
-l1:	jeq #0x800, l2, l5
-l2:	ldb [23]
-l3:	jeq #0x1, l4, l5
-l4:	ret #0xffff
-l5:	ret #0
-  Prints out BPF code disassembly.
-
-> dump
-/* { op, jt, jf, k }, */
-{ 0x28,  0,  0, 0x0000000c },
-{ 0x15,  0,  3, 0x00000800 },
-{ 0x30,  0,  0, 0x00000017 },
-{ 0x15,  0,  1, 0x00000001 },
-{ 0x06,  0,  0, 0x0000ffff },
-{ 0x06,  0,  0, 0000000000 },
-  Prints out C-style BPF code dump.
-
-> breakpoint 0
-breakpoint at: l0:	ldh [12]
-> breakpoint 1
-breakpoint at: l1:	jeq #0x800, l2, l5
-  ...
-  Sets breakpoints at particular BPF instructions. Issuing a `run` command
-  will walk through the pcap file continuing from the current packet and
-  break when a breakpoint is being hit (another `run` will continue from
-  the currently active breakpoint executing next instructions):
-
-  > run
-  -- register dump --
-  pc:       [0]                       <-- program counter
-  code:     [40] jt[0] jf[0] k[12]    <-- plain BPF code of current instruction
-  curr:     l0:	ldh [12]              <-- disassembly of current instruction
-  A:        [00000000][0]             <-- content of A (hex, decimal)
-  X:        [00000000][0]             <-- content of X (hex, decimal)
-  M[0,15]:  [00000000][0]             <-- folded content of M (hex, decimal)
-  -- packet dump --                   <-- Current packet from pcap (hex)
-  len: 42
-    0: 00 19 cb 55 55 a4 00 14 a4 43 78 69 08 06 00 01
-   16: 08 00 06 04 00 01 00 14 a4 43 78 69 0a 3b 01 26
-   32: 00 00 00 00 00 00 0a 3b 01 01
-  (breakpoint)
-  >
-
-> breakpoint
-breakpoints: 0 1
-  Prints currently set breakpoints.
-
-> step [-<n>, +<n>]
-  Performs single stepping through the BPF program from the current pc
-  offset. Thus, on each step invocation, above register dump is issued.
-  This can go forwards and backwards in time, a plain `step` will break
-  on the next BPF instruction, thus +1. (No `run` needs to be issued here.)
-
-> select <n>
-  Selects a given packet from the pcap file to continue from. Thus, on
-  the next `run` or `step`, the BPF program is being evaluated against
-  the user pre-selected packet. Numbering starts just as in Wireshark
-  with index 1.
-
-> quit
-#
-  Exits bpf_dbg.
-
-JIT compiler
-------------
-
-The Linux kernel has a built-in BPF JIT compiler for x86_64, SPARC,
-PowerPC, ARM, ARM64, MIPS, RISC-V and s390 and can be enabled through
-CONFIG_BPF_JIT. The JIT compiler is transparently invoked for each
-attached filter from user space or for internal kernel users if it has
-been previously enabled by root:
-
-  echo 1 > /proc/sys/net/core/bpf_jit_enable
-
-For JIT developers, doing audits etc, each compile run can output the generated
-opcode image into the kernel log via:
-
-  echo 2 > /proc/sys/net/core/bpf_jit_enable
-
-Example output from dmesg:
-
-[ 3389.935842] flen=6 proglen=70 pass=3 image=ffffffffa0069c8f
-[ 3389.935847] JIT code: 00000000: 55 48 89 e5 48 83 ec 60 48 89 5d f8 44 8b 4f 68
-[ 3389.935849] JIT code: 00000010: 44 2b 4f 6c 4c 8b 87 d8 00 00 00 be 0c 00 00 00
-[ 3389.935850] JIT code: 00000020: e8 1d 94 ff e0 3d 00 08 00 00 75 16 be 17 00 00
-[ 3389.935851] JIT code: 00000030: 00 e8 28 94 ff e0 83 f8 01 75 07 b8 ff ff 00 00
-[ 3389.935852] JIT code: 00000040: eb 02 31 c0 c9 c3
-
-When CONFIG_BPF_JIT_ALWAYS_ON is enabled, bpf_jit_enable is permanently set to 1 and
-setting any other value than that will return in failure. This is even the case for
-setting bpf_jit_enable to 2, since dumping the final JIT image into the kernel log
-is discouraged and introspection through bpftool (under tools/bpf/bpftool/) is the
-generally recommended approach instead.
-
-In the kernel source tree under tools/bpf/, there's bpf_jit_disasm for
-generating disassembly out of the kernel log's hexdump:
-
-# ./bpf_jit_disasm
-70 bytes emitted from JIT compiler (pass:3, flen:6)
-ffffffffa0069c8f + <x>:
-   0:	push   %rbp
-   1:	mov    %rsp,%rbp
-   4:	sub    $0x60,%rsp
-   8:	mov    %rbx,-0x8(%rbp)
-   c:	mov    0x68(%rdi),%r9d
-  10:	sub    0x6c(%rdi),%r9d
-  14:	mov    0xd8(%rdi),%r8
-  1b:	mov    $0xc,%esi
-  20:	callq  0xffffffffe0ff9442
-  25:	cmp    $0x800,%eax
-  2a:	jne    0x0000000000000042
-  2c:	mov    $0x17,%esi
-  31:	callq  0xffffffffe0ff945e
-  36:	cmp    $0x1,%eax
-  39:	jne    0x0000000000000042
-  3b:	mov    $0xffff,%eax
-  40:	jmp    0x0000000000000044
-  42:	xor    %eax,%eax
-  44:	leaveq
-  45:	retq
-
-Issuing option `-o` will "annotate" opcodes to resulting assembler
-instructions, which can be very useful for JIT developers:
-
-# ./bpf_jit_disasm -o
-70 bytes emitted from JIT compiler (pass:3, flen:6)
-ffffffffa0069c8f + <x>:
-   0:	push   %rbp
-	55
-   1:	mov    %rsp,%rbp
-	48 89 e5
-   4:	sub    $0x60,%rsp
-	48 83 ec 60
-   8:	mov    %rbx,-0x8(%rbp)
-	48 89 5d f8
-   c:	mov    0x68(%rdi),%r9d
-	44 8b 4f 68
-  10:	sub    0x6c(%rdi),%r9d
-	44 2b 4f 6c
-  14:	mov    0xd8(%rdi),%r8
-	4c 8b 87 d8 00 00 00
-  1b:	mov    $0xc,%esi
-	be 0c 00 00 00
-  20:	callq  0xffffffffe0ff9442
-	e8 1d 94 ff e0
-  25:	cmp    $0x800,%eax
-	3d 00 08 00 00
-  2a:	jne    0x0000000000000042
-	75 16
-  2c:	mov    $0x17,%esi
-	be 17 00 00 00
-  31:	callq  0xffffffffe0ff945e
-	e8 28 94 ff e0
-  36:	cmp    $0x1,%eax
-	83 f8 01
-  39:	jne    0x0000000000000042
-	75 07
-  3b:	mov    $0xffff,%eax
-	b8 ff ff 00 00
-  40:	jmp    0x0000000000000044
-	eb 02
-  42:	xor    %eax,%eax
-	31 c0
-  44:	leaveq
-	c9
-  45:	retq
-	c3
-
-For BPF JIT developers, bpf_jit_disasm, bpf_asm and bpf_dbg provides a useful
-toolchain for developing and testing the kernel's JIT compiler.
-
-BPF kernel internals
---------------------
-Internally, for the kernel interpreter, a different instruction set
-format with similar underlying principles from BPF described in previous
-paragraphs is being used. However, the instruction set format is modelled
-closer to the underlying architecture to mimic native instruction sets, so
-that a better performance can be achieved (more details later). This new
-ISA is called 'eBPF' or 'internal BPF' interchangeably. (Note: eBPF which
-originates from [e]xtended BPF is not the same as BPF extensions! While
-eBPF is an ISA, BPF extensions date back to classic BPF's 'overloading'
-of BPF_LD | BPF_{B,H,W} | BPF_ABS instruction.)
-
-It is designed to be JITed with one to one mapping, which can also open up
-the possibility for GCC/LLVM compilers to generate optimized eBPF code through
-an eBPF backend that performs almost as fast as natively compiled code.
-
-The new instruction set was originally designed with the possible goal in
-mind to write programs in "restricted C" and compile into eBPF with a optional
-GCC/LLVM backend, so that it can just-in-time map to modern 64-bit CPUs with
-minimal performance overhead over two steps, that is, C -> eBPF -> native code.
-
-Currently, the new format is being used for running user BPF programs, which
-includes seccomp BPF, classic socket filters, cls_bpf traffic classifier,
-team driver's classifier for its load-balancing mode, netfilter's xt_bpf
-extension, PTP dissector/classifier, and much more. They are all internally
-converted by the kernel into the new instruction set representation and run
-in the eBPF interpreter. For in-kernel handlers, this all works transparently
-by using bpf_prog_create() for setting up the filter, resp.
-bpf_prog_destroy() for destroying it. The macro
-BPF_PROG_RUN(filter, ctx) transparently invokes eBPF interpreter or JITed
-code to run the filter. 'filter' is a pointer to struct bpf_prog that we
-got from bpf_prog_create(), and 'ctx' the given context (e.g.
-skb pointer). All constraints and restrictions from bpf_check_classic() apply
-before a conversion to the new layout is being done behind the scenes!
-
-Currently, the classic BPF format is being used for JITing on most
-32-bit architectures, whereas x86-64, aarch64, s390x, powerpc64,
-sparc64, arm32, riscv64, riscv32 perform JIT compilation from eBPF
-instruction set.
-
-Some core changes of the new internal format:
-
-- Number of registers increase from 2 to 10:
-
-  The old format had two registers A and X, and a hidden frame pointer. The
-  new layout extends this to be 10 internal registers and a read-only frame
-  pointer. Since 64-bit CPUs are passing arguments to functions via registers
-  the number of args from eBPF program to in-kernel function is restricted
-  to 5 and one register is used to accept return value from an in-kernel
-  function. Natively, x86_64 passes first 6 arguments in registers, aarch64/
-  sparcv9/mips64 have 7 - 8 registers for arguments; x86_64 has 6 callee saved
-  registers, and aarch64/sparcv9/mips64 have 11 or more callee saved registers.
-
-  Therefore, eBPF calling convention is defined as:
-
-    * R0	- return value from in-kernel function, and exit value for eBPF program
-    * R1 - R5	- arguments from eBPF program to in-kernel function
-    * R6 - R9	- callee saved registers that in-kernel function will preserve
-    * R10	- read-only frame pointer to access stack
-
-  Thus, all eBPF registers map one to one to HW registers on x86_64, aarch64,
-  etc, and eBPF calling convention maps directly to ABIs used by the kernel on
-  64-bit architectures.
-
-  On 32-bit architectures JIT may map programs that use only 32-bit arithmetic
-  and may let more complex programs to be interpreted.
-
-  R0 - R5 are scratch registers and eBPF program needs spill/fill them if
-  necessary across calls. Note that there is only one eBPF program (== one
-  eBPF main routine) and it cannot call other eBPF functions, it can only
-  call predefined in-kernel functions, though.
-
-- Register width increases from 32-bit to 64-bit:
-
-  Still, the semantics of the original 32-bit ALU operations are preserved
-  via 32-bit subregisters. All eBPF registers are 64-bit with 32-bit lower
-  subregisters that zero-extend into 64-bit if they are being written to.
-  That behavior maps directly to x86_64 and arm64 subregister definition, but
-  makes other JITs more difficult.
-
-  32-bit architectures run 64-bit internal BPF programs via interpreter.
-  Their JITs may convert BPF programs that only use 32-bit subregisters into
-  native instruction set and let the rest being interpreted.
-
-  Operation is 64-bit, because on 64-bit architectures, pointers are also
-  64-bit wide, and we want to pass 64-bit values in/out of kernel functions,
-  so 32-bit eBPF registers would otherwise require to define register-pair
-  ABI, thus, there won't be able to use a direct eBPF register to HW register
-  mapping and JIT would need to do combine/split/move operations for every
-  register in and out of the function, which is complex, bug prone and slow.
-  Another reason is the use of atomic 64-bit counters.
-
-- Conditional jt/jf targets replaced with jt/fall-through:
-
-  While the original design has constructs such as "if (cond) jump_true;
-  else jump_false;", they are being replaced into alternative constructs like
-  "if (cond) jump_true; /* else fall-through */".
-
-- Introduces bpf_call insn and register passing convention for zero overhead
-  calls from/to other kernel functions:
-
-  Before an in-kernel function call, the internal BPF program needs to
-  place function arguments into R1 to R5 registers to satisfy calling
-  convention, then the interpreter will take them from registers and pass
-  to in-kernel function. If R1 - R5 registers are mapped to CPU registers
-  that are used for argument passing on given architecture, the JIT compiler
-  doesn't need to emit extra moves. Function arguments will be in the correct
-  registers and BPF_CALL instruction will be JITed as single 'call' HW
-  instruction. This calling convention was picked to cover common call
-  situations without performance penalty.
-
-  After an in-kernel function call, R1 - R5 are reset to unreadable and R0 has
-  a return value of the function. Since R6 - R9 are callee saved, their state
-  is preserved across the call.
-
-  For example, consider three C functions:
-
-  u64 f1() { return (*_f2)(1); }
-  u64 f2(u64 a) { return f3(a + 1, a); }
-  u64 f3(u64 a, u64 b) { return a - b; }
-
-  GCC can compile f1, f3 into x86_64:
-
-  f1:
-    movl $1, %edi
-    movq _f2(%rip), %rax
-    jmp  *%rax
-  f3:
-    movq %rdi, %rax
-    subq %rsi, %rax
-    ret
-
-  Function f2 in eBPF may look like:
-
-  f2:
-    bpf_mov R2, R1
-    bpf_add R1, 1
-    bpf_call f3
-    bpf_exit
-
-  If f2 is JITed and the pointer stored to '_f2'. The calls f1 -> f2 -> f3 and
-  returns will be seamless. Without JIT, __bpf_prog_run() interpreter needs to
-  be used to call into f2.
-
-  For practical reasons all eBPF programs have only one argument 'ctx' which is
-  already placed into R1 (e.g. on __bpf_prog_run() startup) and the programs
-  can call kernel functions with up to 5 arguments. Calls with 6 or more arguments
-  are currently not supported, but these restrictions can be lifted if necessary
-  in the future.
-
-  On 64-bit architectures all register map to HW registers one to one. For
-  example, x86_64 JIT compiler can map them as ...
-
-    R0 - rax
-    R1 - rdi
-    R2 - rsi
-    R3 - rdx
-    R4 - rcx
-    R5 - r8
-    R6 - rbx
-    R7 - r13
-    R8 - r14
-    R9 - r15
-    R10 - rbp
-
-  ... since x86_64 ABI mandates rdi, rsi, rdx, rcx, r8, r9 for argument passing
-  and rbx, r12 - r15 are callee saved.
-
-  Then the following internal BPF pseudo-program:
-
-    bpf_mov R6, R1 /* save ctx */
-    bpf_mov R2, 2
-    bpf_mov R3, 3
-    bpf_mov R4, 4
-    bpf_mov R5, 5
-    bpf_call foo
-    bpf_mov R7, R0 /* save foo() return value */
-    bpf_mov R1, R6 /* restore ctx for next call */
-    bpf_mov R2, 6
-    bpf_mov R3, 7
-    bpf_mov R4, 8
-    bpf_mov R5, 9
-    bpf_call bar
-    bpf_add R0, R7
-    bpf_exit
-
-  After JIT to x86_64 may look like:
-
-    push %rbp
-    mov %rsp,%rbp
-    sub $0x228,%rsp
-    mov %rbx,-0x228(%rbp)
-    mov %r13,-0x220(%rbp)
-    mov %rdi,%rbx
-    mov $0x2,%esi
-    mov $0x3,%edx
-    mov $0x4,%ecx
-    mov $0x5,%r8d
-    callq foo
-    mov %rax,%r13
-    mov %rbx,%rdi
-    mov $0x6,%esi
-    mov $0x7,%edx
-    mov $0x8,%ecx
-    mov $0x9,%r8d
-    callq bar
-    add %r13,%rax
-    mov -0x228(%rbp),%rbx
-    mov -0x220(%rbp),%r13
-    leaveq
-    retq
-
-  Which is in this example equivalent in C to:
-
-    u64 bpf_filter(u64 ctx)
-    {
-        return foo(ctx, 2, 3, 4, 5) + bar(ctx, 6, 7, 8, 9);
-    }
-
-  In-kernel functions foo() and bar() with prototype: u64 (*)(u64 arg1, u64
-  arg2, u64 arg3, u64 arg4, u64 arg5); will receive arguments in proper
-  registers and place their return value into '%rax' which is R0 in eBPF.
-  Prologue and epilogue are emitted by JIT and are implicit in the
-  interpreter. R0-R5 are scratch registers, so eBPF program needs to preserve
-  them across the calls as defined by calling convention.
-
-  For example the following program is invalid:
-
-    bpf_mov R1, 1
-    bpf_call foo
-    bpf_mov R0, R1
-    bpf_exit
-
-  After the call the registers R1-R5 contain junk values and cannot be read.
-  An in-kernel eBPF verifier is used to validate internal BPF programs.
-
-Also in the new design, eBPF is limited to 4096 insns, which means that any
-program will terminate quickly and will only call a fixed number of kernel
-functions. Original BPF and the new format are two operand instructions,
-which helps to do one-to-one mapping between eBPF insn and x86 insn during JIT.
-
-The input context pointer for invoking the interpreter function is generic,
-its content is defined by a specific use case. For seccomp register R1 points
-to seccomp_data, for converted BPF filters R1 points to a skb.
-
-A program, that is translated internally consists of the following elements:
-
-  op:16, jt:8, jf:8, k:32    ==>    op:8, dst_reg:4, src_reg:4, off:16, imm:32
-
-So far 87 internal BPF instructions were implemented. 8-bit 'op' opcode field
-has room for new instructions. Some of them may use 16/24/32 byte encoding. New
-instructions must be multiple of 8 bytes to preserve backward compatibility.
-
-Internal BPF is a general purpose RISC instruction set. Not every register and
-every instruction are used during translation from original BPF to new format.
-For example, socket filters are not using 'exclusive add' instruction, but
-tracing filters may do to maintain counters of events, for example. Register R9
-is not used by socket filters either, but more complex filters may be running
-out of registers and would have to resort to spill/fill to stack.
-
-Internal BPF can be used as a generic assembler for last step performance
-optimizations, socket filters and seccomp are using it as assembler. Tracing
-filters may use it as assembler to generate code from kernel. In kernel usage
-may not be bounded by security considerations, since generated internal BPF code
-may be optimizing internal code path and not being exposed to the user space.
-Safety of internal BPF can come from a verifier (TBD). In such use cases as
-described, it may be used as safe instruction set.
-
-Just like the original BPF, the new format runs within a controlled environment,
-is deterministic and the kernel can easily prove that. The safety of the program
-can be determined in two steps: first step does depth-first-search to disallow
-loops and other CFG validation; second step starts from the first insn and
-descends all possible paths. It simulates execution of every insn and observes
-the state change of registers and stack.
-
-eBPF opcode encoding
---------------------
-
-eBPF is reusing most of the opcode encoding from classic to simplify conversion
-of classic BPF to eBPF. For arithmetic and jump instructions the 8-bit 'code'
-field is divided into three parts:
-
-  +----------------+--------+--------------------+
-  |   4 bits       |  1 bit |   3 bits           |
-  | operation code | source | instruction class  |
-  +----------------+--------+--------------------+
-  (MSB)                                      (LSB)
-
-Three LSB bits store instruction class which is one of:
-
-  Classic BPF classes:    eBPF classes:
-
-  BPF_LD    0x00          BPF_LD    0x00
-  BPF_LDX   0x01          BPF_LDX   0x01
-  BPF_ST    0x02          BPF_ST    0x02
-  BPF_STX   0x03          BPF_STX   0x03
-  BPF_ALU   0x04          BPF_ALU   0x04
-  BPF_JMP   0x05          BPF_JMP   0x05
-  BPF_RET   0x06          BPF_JMP32 0x06
-  BPF_MISC  0x07          BPF_ALU64 0x07
-
-When BPF_CLASS(code) == BPF_ALU or BPF_JMP, 4th bit encodes source operand ...
-
-  BPF_K     0x00
-  BPF_X     0x08
-
- * in classic BPF, this means:
-
-  BPF_SRC(code) == BPF_X - use register X as source operand
-  BPF_SRC(code) == BPF_K - use 32-bit immediate as source operand
-
- * in eBPF, this means:
-
-  BPF_SRC(code) == BPF_X - use 'src_reg' register as source operand
-  BPF_SRC(code) == BPF_K - use 32-bit immediate as source operand
-
-... and four MSB bits store operation code.
-
-If BPF_CLASS(code) == BPF_ALU or BPF_ALU64 [ in eBPF ], BPF_OP(code) is one of:
-
-  BPF_ADD   0x00
-  BPF_SUB   0x10
-  BPF_MUL   0x20
-  BPF_DIV   0x30
-  BPF_OR    0x40
-  BPF_AND   0x50
-  BPF_LSH   0x60
-  BPF_RSH   0x70
-  BPF_NEG   0x80
-  BPF_MOD   0x90
-  BPF_XOR   0xa0
-  BPF_MOV   0xb0  /* eBPF only: mov reg to reg */
-  BPF_ARSH  0xc0  /* eBPF only: sign extending shift right */
-  BPF_END   0xd0  /* eBPF only: endianness conversion */
-
-If BPF_CLASS(code) == BPF_JMP or BPF_JMP32 [ in eBPF ], BPF_OP(code) is one of:
-
-  BPF_JA    0x00  /* BPF_JMP only */
-  BPF_JEQ   0x10
-  BPF_JGT   0x20
-  BPF_JGE   0x30
-  BPF_JSET  0x40
-  BPF_JNE   0x50  /* eBPF only: jump != */
-  BPF_JSGT  0x60  /* eBPF only: signed '>' */
-  BPF_JSGE  0x70  /* eBPF only: signed '>=' */
-  BPF_CALL  0x80  /* eBPF BPF_JMP only: function call */
-  BPF_EXIT  0x90  /* eBPF BPF_JMP only: function return */
-  BPF_JLT   0xa0  /* eBPF only: unsigned '<' */
-  BPF_JLE   0xb0  /* eBPF only: unsigned '<=' */
-  BPF_JSLT  0xc0  /* eBPF only: signed '<' */
-  BPF_JSLE  0xd0  /* eBPF only: signed '<=' */
-
-So BPF_ADD | BPF_X | BPF_ALU means 32-bit addition in both classic BPF
-and eBPF. There are only two registers in classic BPF, so it means A += X.
-In eBPF it means dst_reg = (u32) dst_reg + (u32) src_reg; similarly,
-BPF_XOR | BPF_K | BPF_ALU means A ^= imm32 in classic BPF and analogous
-src_reg = (u32) src_reg ^ (u32) imm32 in eBPF.
-
-Classic BPF is using BPF_MISC class to represent A = X and X = A moves.
-eBPF is using BPF_MOV | BPF_X | BPF_ALU code instead. Since there are no
-BPF_MISC operations in eBPF, the class 7 is used as BPF_ALU64 to mean
-exactly the same operations as BPF_ALU, but with 64-bit wide operands
-instead. So BPF_ADD | BPF_X | BPF_ALU64 means 64-bit addition, i.e.:
-dst_reg = dst_reg + src_reg
-
-Classic BPF wastes the whole BPF_RET class to represent a single 'ret'
-operation. Classic BPF_RET | BPF_K means copy imm32 into return register
-and perform function exit. eBPF is modeled to match CPU, so BPF_JMP | BPF_EXIT
-in eBPF means function exit only. The eBPF program needs to store return
-value into register R0 before doing a BPF_EXIT. Class 6 in eBPF is used as
-BPF_JMP32 to mean exactly the same operations as BPF_JMP, but with 32-bit wide
-operands for the comparisons instead.
-
-For load and store instructions the 8-bit 'code' field is divided as:
-
-  +--------+--------+-------------------+
-  | 3 bits | 2 bits |   3 bits          |
-  |  mode  |  size  | instruction class |
-  +--------+--------+-------------------+
-  (MSB)                             (LSB)
-
-Size modifier is one of ...
-
-  BPF_W   0x00    /* word */
-  BPF_H   0x08    /* half word */
-  BPF_B   0x10    /* byte */
-  BPF_DW  0x18    /* eBPF only, double word */
-
-... which encodes size of load/store operation:
-
- B  - 1 byte
- H  - 2 byte
- W  - 4 byte
- DW - 8 byte (eBPF only)
-
-Mode modifier is one of:
-
-  BPF_IMM  0x00  /* used for 32-bit mov in classic BPF and 64-bit in eBPF */
-  BPF_ABS  0x20
-  BPF_IND  0x40
-  BPF_MEM  0x60
-  BPF_LEN  0x80  /* classic BPF only, reserved in eBPF */
-  BPF_MSH  0xa0  /* classic BPF only, reserved in eBPF */
-  BPF_XADD 0xc0  /* eBPF only, exclusive add */
-
-eBPF has two non-generic instructions: (BPF_ABS | <size> | BPF_LD) and
-(BPF_IND | <size> | BPF_LD) which are used to access packet data.
-
-They had to be carried over from classic to have strong performance of
-socket filters running in eBPF interpreter. These instructions can only
-be used when interpreter context is a pointer to 'struct sk_buff' and
-have seven implicit operands. Register R6 is an implicit input that must
-contain pointer to sk_buff. Register R0 is an implicit output which contains
-the data fetched from the packet. Registers R1-R5 are scratch registers
-and must not be used to store the data across BPF_ABS | BPF_LD or
-BPF_IND | BPF_LD instructions.
-
-These instructions have implicit program exit condition as well. When
-eBPF program is trying to access the data beyond the packet boundary,
-the interpreter will abort the execution of the program. JIT compilers
-therefore must preserve this property. src_reg and imm32 fields are
-explicit inputs to these instructions.
-
-For example:
-
-  BPF_IND | BPF_W | BPF_LD means:
-
-    R0 = ntohl(*(u32 *) (((struct sk_buff *) R6)->data + src_reg + imm32))
-    and R1 - R5 were scratched.
-
-Unlike classic BPF instruction set, eBPF has generic load/store operations:
-
-BPF_MEM | <size> | BPF_STX:  *(size *) (dst_reg + off) = src_reg
-BPF_MEM | <size> | BPF_ST:   *(size *) (dst_reg + off) = imm32
-BPF_MEM | <size> | BPF_LDX:  dst_reg = *(size *) (src_reg + off)
-BPF_XADD | BPF_W  | BPF_STX: lock xadd *(u32 *)(dst_reg + off16) += src_reg
-BPF_XADD | BPF_DW | BPF_STX: lock xadd *(u64 *)(dst_reg + off16) += src_reg
-
-Where size is one of: BPF_B or BPF_H or BPF_W or BPF_DW. Note that 1 and
-2 byte atomic increments are not supported.
-
-eBPF has one 16-byte instruction: BPF_LD | BPF_DW | BPF_IMM which consists
-of two consecutive 'struct bpf_insn' 8-byte blocks and interpreted as single
-instruction that loads 64-bit immediate value into a dst_reg.
-Classic BPF has similar instruction: BPF_LD | BPF_W | BPF_IMM which loads
-32-bit immediate value into a register.
-
-eBPF verifier
--------------
-The safety of the eBPF program is determined in two steps.
-
-First step does DAG check to disallow loops and other CFG validation.
-In particular it will detect programs that have unreachable instructions.
-(though classic BPF checker allows them)
-
-Second step starts from the first insn and descends all possible paths.
-It simulates execution of every insn and observes the state change of
-registers and stack.
-
-At the start of the program the register R1 contains a pointer to context
-and has type PTR_TO_CTX.
-If verifier sees an insn that does R2=R1, then R2 has now type
-PTR_TO_CTX as well and can be used on the right hand side of expression.
-If R1=PTR_TO_CTX and insn is R2=R1+R1, then R2=SCALAR_VALUE,
-since addition of two valid pointers makes invalid pointer.
-(In 'secure' mode verifier will reject any type of pointer arithmetic to make
-sure that kernel addresses don't leak to unprivileged users)
-
-If register was never written to, it's not readable:
-  bpf_mov R0 = R2
-  bpf_exit
-will be rejected, since R2 is unreadable at the start of the program.
-
-After kernel function call, R1-R5 are reset to unreadable and
-R0 has a return type of the function.
-
-Since R6-R9 are callee saved, their state is preserved across the call.
-  bpf_mov R6 = 1
-  bpf_call foo
-  bpf_mov R0 = R6
-  bpf_exit
-is a correct program. If there was R1 instead of R6, it would have
-been rejected.
-
-load/store instructions are allowed only with registers of valid types, which
-are PTR_TO_CTX, PTR_TO_MAP, PTR_TO_STACK. They are bounds and alignment checked.
-For example:
- bpf_mov R1 = 1
- bpf_mov R2 = 2
- bpf_xadd *(u32 *)(R1 + 3) += R2
- bpf_exit
-will be rejected, since R1 doesn't have a valid pointer type at the time of
-execution of instruction bpf_xadd.
-
-At the start R1 type is PTR_TO_CTX (a pointer to generic 'struct bpf_context')
-A callback is used to customize verifier to restrict eBPF program access to only
-certain fields within ctx structure with specified size and alignment.
-
-For example, the following insn:
-  bpf_ld R0 = *(u32 *)(R6 + 8)
-intends to load a word from address R6 + 8 and store it into R0
-If R6=PTR_TO_CTX, via is_valid_access() callback the verifier will know
-that offset 8 of size 4 bytes can be accessed for reading, otherwise
-the verifier will reject the program.
-If R6=PTR_TO_STACK, then access should be aligned and be within
-stack bounds, which are [-MAX_BPF_STACK, 0). In this example offset is 8,
-so it will fail verification, since it's out of bounds.
-
-The verifier will allow eBPF program to read data from stack only after
-it wrote into it.
-Classic BPF verifier does similar check with M[0-15] memory slots.
-For example:
-  bpf_ld R0 = *(u32 *)(R10 - 4)
-  bpf_exit
-is invalid program.
-Though R10 is correct read-only register and has type PTR_TO_STACK
-and R10 - 4 is within stack bounds, there were no stores into that location.
-
-Pointer register spill/fill is tracked as well, since four (R6-R9)
-callee saved registers may not be enough for some programs.
-
-Allowed function calls are customized with bpf_verifier_ops->get_func_proto()
-The eBPF verifier will check that registers match argument constraints.
-After the call register R0 will be set to return type of the function.
-
-Function calls is a main mechanism to extend functionality of eBPF programs.
-Socket filters may let programs to call one set of functions, whereas tracing
-filters may allow completely different set.
-
-If a function made accessible to eBPF program, it needs to be thought through
-from safety point of view. The verifier will guarantee that the function is
-called with valid arguments.
-
-seccomp vs socket filters have different security restrictions for classic BPF.
-Seccomp solves this by two stage verifier: classic BPF verifier is followed
-by seccomp verifier. In case of eBPF one configurable verifier is shared for
-all use cases.
-
-See details of eBPF verifier in kernel/bpf/verifier.c
-
-Register value tracking
------------------------
-In order to determine the safety of an eBPF program, the verifier must track
-the range of possible values in each register and also in each stack slot.
-This is done with 'struct bpf_reg_state', defined in include/linux/
-bpf_verifier.h, which unifies tracking of scalar and pointer values.  Each
-register state has a type, which is either NOT_INIT (the register has not been
-written to), SCALAR_VALUE (some value which is not usable as a pointer), or a
-pointer type.  The types of pointers describe their base, as follows:
-    PTR_TO_CTX          Pointer to bpf_context.
-    CONST_PTR_TO_MAP    Pointer to struct bpf_map.  "Const" because arithmetic
-                        on these pointers is forbidden.
-    PTR_TO_MAP_VALUE    Pointer to the value stored in a map element.
-    PTR_TO_MAP_VALUE_OR_NULL
-                        Either a pointer to a map value, or NULL; map accesses
-                        (see section 'eBPF maps', below) return this type,
-                        which becomes a PTR_TO_MAP_VALUE when checked != NULL.
-                        Arithmetic on these pointers is forbidden.
-    PTR_TO_STACK        Frame pointer.
-    PTR_TO_PACKET       skb->data.
-    PTR_TO_PACKET_END   skb->data + headlen; arithmetic forbidden.
-    PTR_TO_SOCKET       Pointer to struct bpf_sock_ops, implicitly refcounted.
-    PTR_TO_SOCKET_OR_NULL
-                        Either a pointer to a socket, or NULL; socket lookup
-                        returns this type, which becomes a PTR_TO_SOCKET when
-                        checked != NULL. PTR_TO_SOCKET is reference-counted,
-                        so programs must release the reference through the
-                        socket release function before the end of the program.
-                        Arithmetic on these pointers is forbidden.
-However, a pointer may be offset from this base (as a result of pointer
-arithmetic), and this is tracked in two parts: the 'fixed offset' and 'variable
-offset'.  The former is used when an exactly-known value (e.g. an immediate
-operand) is added to a pointer, while the latter is used for values which are
-not exactly known.  The variable offset is also used in SCALAR_VALUEs, to track
-the range of possible values in the register.
-The verifier's knowledge about the variable offset consists of:
-* minimum and maximum values as unsigned
-* minimum and maximum values as signed
-* knowledge of the values of individual bits, in the form of a 'tnum': a u64
-'mask' and a u64 'value'.  1s in the mask represent bits whose value is unknown;
-1s in the value represent bits known to be 1.  Bits known to be 0 have 0 in both
-mask and value; no bit should ever be 1 in both.  For example, if a byte is read
-into a register from memory, the register's top 56 bits are known zero, while
-the low 8 are unknown - which is represented as the tnum (0x0; 0xff).  If we
-then OR this with 0x40, we get (0x40; 0xbf), then if we add 1 we get (0x0;
-0x1ff), because of potential carries.
-
-Besides arithmetic, the register state can also be updated by conditional
-branches.  For instance, if a SCALAR_VALUE is compared > 8, in the 'true' branch
-it will have a umin_value (unsigned minimum value) of 9, whereas in the 'false'
-branch it will have a umax_value of 8.  A signed compare (with BPF_JSGT or
-BPF_JSGE) would instead update the signed minimum/maximum values.  Information
-from the signed and unsigned bounds can be combined; for instance if a value is
-first tested < 8 and then tested s> 4, the verifier will conclude that the value
-is also > 4 and s< 8, since the bounds prevent crossing the sign boundary.
-
-PTR_TO_PACKETs with a variable offset part have an 'id', which is common to all
-pointers sharing that same variable offset.  This is important for packet range
-checks: after adding a variable to a packet pointer register A, if you then copy
-it to another register B and then add a constant 4 to A, both registers will
-share the same 'id' but the A will have a fixed offset of +4.  Then if A is
-bounds-checked and found to be less than a PTR_TO_PACKET_END, the register B is
-now known to have a safe range of at least 4 bytes.  See 'Direct packet access',
-below, for more on PTR_TO_PACKET ranges.
-
-The 'id' field is also used on PTR_TO_MAP_VALUE_OR_NULL, common to all copies of
-the pointer returned from a map lookup.  This means that when one copy is
-checked and found to be non-NULL, all copies can become PTR_TO_MAP_VALUEs.
-As well as range-checking, the tracked information is also used for enforcing
-alignment of pointer accesses.  For instance, on most systems the packet pointer
-is 2 bytes after a 4-byte alignment.  If a program adds 14 bytes to that to jump
-over the Ethernet header, then reads IHL and addes (IHL * 4), the resulting
-pointer will have a variable offset known to be 4n+2 for some n, so adding the 2
-bytes (NET_IP_ALIGN) gives a 4-byte alignment and so word-sized accesses through
-that pointer are safe.
-The 'id' field is also used on PTR_TO_SOCKET and PTR_TO_SOCKET_OR_NULL, common
-to all copies of the pointer returned from a socket lookup. This has similar
-behaviour to the handling for PTR_TO_MAP_VALUE_OR_NULL->PTR_TO_MAP_VALUE, but
-it also handles reference tracking for the pointer. PTR_TO_SOCKET implicitly
-represents a reference to the corresponding 'struct sock'. To ensure that the
-reference is not leaked, it is imperative to NULL-check the reference and in
-the non-NULL case, and pass the valid reference to the socket release function.
-
-Direct packet access
---------------------
-In cls_bpf and act_bpf programs the verifier allows direct access to the packet
-data via skb->data and skb->data_end pointers.
-Ex:
-1:  r4 = *(u32 *)(r1 +80)  /* load skb->data_end */
-2:  r3 = *(u32 *)(r1 +76)  /* load skb->data */
-3:  r5 = r3
-4:  r5 += 14
-5:  if r5 > r4 goto pc+16
-R1=ctx R3=pkt(id=0,off=0,r=14) R4=pkt_end R5=pkt(id=0,off=14,r=14) R10=fp
-6:  r0 = *(u16 *)(r3 +12) /* access 12 and 13 bytes of the packet */
-
-this 2byte load from the packet is safe to do, since the program author
-did check 'if (skb->data + 14 > skb->data_end) goto err' at insn #5 which
-means that in the fall-through case the register R3 (which points to skb->data)
-has at least 14 directly accessible bytes. The verifier marks it
-as R3=pkt(id=0,off=0,r=14).
-id=0 means that no additional variables were added to the register.
-off=0 means that no additional constants were added.
-r=14 is the range of safe access which means that bytes [R3, R3 + 14) are ok.
-Note that R5 is marked as R5=pkt(id=0,off=14,r=14). It also points
-to the packet data, but constant 14 was added to the register, so
-it now points to 'skb->data + 14' and accessible range is [R5, R5 + 14 - 14)
-which is zero bytes.
-
-More complex packet access may look like:
- R0=inv1 R1=ctx R3=pkt(id=0,off=0,r=14) R4=pkt_end R5=pkt(id=0,off=14,r=14) R10=fp
- 6:  r0 = *(u8 *)(r3 +7) /* load 7th byte from the packet */
- 7:  r4 = *(u8 *)(r3 +12)
- 8:  r4 *= 14
- 9:  r3 = *(u32 *)(r1 +76) /* load skb->data */
-10:  r3 += r4
-11:  r2 = r1
-12:  r2 <<= 48
-13:  r2 >>= 48
-14:  r3 += r2
-15:  r2 = r3
-16:  r2 += 8
-17:  r1 = *(u32 *)(r1 +80) /* load skb->data_end */
-18:  if r2 > r1 goto pc+2
- R0=inv(id=0,umax_value=255,var_off=(0x0; 0xff)) R1=pkt_end R2=pkt(id=2,off=8,r=8) R3=pkt(id=2,off=0,r=8) R4=inv(id=0,umax_value=3570,var_off=(0x0; 0xfffe)) R5=pkt(id=0,off=14,r=14) R10=fp
-19:  r1 = *(u8 *)(r3 +4)
-The state of the register R3 is R3=pkt(id=2,off=0,r=8)
-id=2 means that two 'r3 += rX' instructions were seen, so r3 points to some
-offset within a packet and since the program author did
-'if (r3 + 8 > r1) goto err' at insn #18, the safe range is [R3, R3 + 8).
-The verifier only allows 'add'/'sub' operations on packet registers. Any other
-operation will set the register state to 'SCALAR_VALUE' and it won't be
-available for direct packet access.
-Operation 'r3 += rX' may overflow and become less than original skb->data,
-therefore the verifier has to prevent that.  So when it sees 'r3 += rX'
-instruction and rX is more than 16-bit value, any subsequent bounds-check of r3
-against skb->data_end will not give us 'range' information, so attempts to read
-through the pointer will give "invalid access to packet" error.
-Ex. after insn 'r4 = *(u8 *)(r3 +12)' (insn #7 above) the state of r4 is
-R4=inv(id=0,umax_value=255,var_off=(0x0; 0xff)) which means that upper 56 bits
-of the register are guaranteed to be zero, and nothing is known about the lower
-8 bits. After insn 'r4 *= 14' the state becomes
-R4=inv(id=0,umax_value=3570,var_off=(0x0; 0xfffe)), since multiplying an 8-bit
-value by constant 14 will keep upper 52 bits as zero, also the least significant
-bit will be zero as 14 is even.  Similarly 'r2 >>= 48' will make
-R2=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff)), since the shift is not sign
-extending.  This logic is implemented in adjust_reg_min_max_vals() function,
-which calls adjust_ptr_min_max_vals() for adding pointer to scalar (or vice
-versa) and adjust_scalar_min_max_vals() for operations on two scalars.
-
-The end result is that bpf program author can access packet directly
-using normal C code as:
-  void *data = (void *)(long)skb->data;
-  void *data_end = (void *)(long)skb->data_end;
-  struct eth_hdr *eth = data;
-  struct iphdr *iph = data + sizeof(*eth);
-  struct udphdr *udp = data + sizeof(*eth) + sizeof(*iph);
-
-  if (data + sizeof(*eth) + sizeof(*iph) + sizeof(*udp) > data_end)
-          return 0;
-  if (eth->h_proto != htons(ETH_P_IP))
-          return 0;
-  if (iph->protocol != IPPROTO_UDP || iph->ihl != 5)
-          return 0;
-  if (udp->dest == 53 || udp->source == 9)
-          ...;
-which makes such programs easier to write comparing to LD_ABS insn
-and significantly faster.
-
-eBPF maps
----------
-'maps' is a generic storage of different types for sharing data between kernel
-and userspace.
-
-The maps are accessed from user space via BPF syscall, which has commands:
-- create a map with given type and attributes
-  map_fd = bpf(BPF_MAP_CREATE, union bpf_attr *attr, u32 size)
-  using attr->map_type, attr->key_size, attr->value_size, attr->max_entries
-  returns process-local file descriptor or negative error
-
-- lookup key in a given map
-  err = bpf(BPF_MAP_LOOKUP_ELEM, union bpf_attr *attr, u32 size)
-  using attr->map_fd, attr->key, attr->value
-  returns zero and stores found elem into value or negative error
-
-- create or update key/value pair in a given map
-  err = bpf(BPF_MAP_UPDATE_ELEM, union bpf_attr *attr, u32 size)
-  using attr->map_fd, attr->key, attr->value
-  returns zero or negative error
-
-- find and delete element by key in a given map
-  err = bpf(BPF_MAP_DELETE_ELEM, union bpf_attr *attr, u32 size)
-  using attr->map_fd, attr->key
-
-- to delete map: close(fd)
-  Exiting process will delete maps automatically
-
-userspace programs use this syscall to create/access maps that eBPF programs
-are concurrently updating.
-
-maps can have different types: hash, array, bloom filter, radix-tree, etc.
-
-The map is defined by:
-  . type
-  . max number of elements
-  . key size in bytes
-  . value size in bytes
-
-Pruning
--------
-The verifier does not actually walk all possible paths through the program.  For
-each new branch to analyse, the verifier looks at all the states it's previously
-been in when at this instruction.  If any of them contain the current state as a
-subset, the branch is 'pruned' - that is, the fact that the previous state was
-accepted implies the current state would be as well.  For instance, if in the
-previous state, r1 held a packet-pointer, and in the current state, r1 holds a
-packet-pointer with a range as long or longer and at least as strict an
-alignment, then r1 is safe.  Similarly, if r2 was NOT_INIT before then it can't
-have been used by any path from that point, so any value in r2 (including
-another NOT_INIT) is safe.  The implementation is in the function regsafe().
-Pruning considers not only the registers but also the stack (and any spilled
-registers it may hold).  They must all be safe for the branch to be pruned.
-This is implemented in states_equal().
-
-Understanding eBPF verifier messages
-------------------------------------
-
-The following are few examples of invalid eBPF programs and verifier error
-messages as seen in the log:
-
-Program with unreachable instructions:
-static struct bpf_insn prog[] = {
-  BPF_EXIT_INSN(),
-  BPF_EXIT_INSN(),
-};
-Error:
-  unreachable insn 1
-
-Program that reads uninitialized register:
-  BPF_MOV64_REG(BPF_REG_0, BPF_REG_2),
-  BPF_EXIT_INSN(),
-Error:
-  0: (bf) r0 = r2
-  R2 !read_ok
-
-Program that doesn't initialize R0 before exiting:
-  BPF_MOV64_REG(BPF_REG_2, BPF_REG_1),
-  BPF_EXIT_INSN(),
-Error:
-  0: (bf) r2 = r1
-  1: (95) exit
-  R0 !read_ok
-
-Program that accesses stack out of bounds:
-  BPF_ST_MEM(BPF_DW, BPF_REG_10, 8, 0),
-  BPF_EXIT_INSN(),
-Error:
-  0: (7a) *(u64 *)(r10 +8) = 0
-  invalid stack off=8 size=8
-
-Program that doesn't initialize stack before passing its address into function:
-  BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
-  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
-  BPF_LD_MAP_FD(BPF_REG_1, 0),
-  BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
-  BPF_EXIT_INSN(),
-Error:
-  0: (bf) r2 = r10
-  1: (07) r2 += -8
-  2: (b7) r1 = 0x0
-  3: (85) call 1
-  invalid indirect read from stack off -8+0 size 8
-
-Program that uses invalid map_fd=0 while calling to map_lookup_elem() function:
-  BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
-  BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
-  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
-  BPF_LD_MAP_FD(BPF_REG_1, 0),
-  BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
-  BPF_EXIT_INSN(),
-Error:
-  0: (7a) *(u64 *)(r10 -8) = 0
-  1: (bf) r2 = r10
-  2: (07) r2 += -8
-  3: (b7) r1 = 0x0
-  4: (85) call 1
-  fd 0 is not pointing to valid bpf_map
-
-Program that doesn't check return value of map_lookup_elem() before accessing
-map element:
-  BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
-  BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
-  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
-  BPF_LD_MAP_FD(BPF_REG_1, 0),
-  BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
-  BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
-  BPF_EXIT_INSN(),
-Error:
-  0: (7a) *(u64 *)(r10 -8) = 0
-  1: (bf) r2 = r10
-  2: (07) r2 += -8
-  3: (b7) r1 = 0x0
-  4: (85) call 1
-  5: (7a) *(u64 *)(r0 +0) = 0
-  R0 invalid mem access 'map_value_or_null'
-
-Program that correctly checks map_lookup_elem() returned value for NULL, but
-accesses the memory with incorrect alignment:
-  BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
-  BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
-  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
-  BPF_LD_MAP_FD(BPF_REG_1, 0),
-  BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
-  BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 1),
-  BPF_ST_MEM(BPF_DW, BPF_REG_0, 4, 0),
-  BPF_EXIT_INSN(),
-Error:
-  0: (7a) *(u64 *)(r10 -8) = 0
-  1: (bf) r2 = r10
-  2: (07) r2 += -8
-  3: (b7) r1 = 1
-  4: (85) call 1
-  5: (15) if r0 == 0x0 goto pc+1
-   R0=map_ptr R10=fp
-  6: (7a) *(u64 *)(r0 +4) = 0
-  misaligned access off 4 size 8
-
-Program that correctly checks map_lookup_elem() returned value for NULL and
-accesses memory with correct alignment in one side of 'if' branch, but fails
-to do so in the other side of 'if' branch:
-  BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
-  BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
-  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
-  BPF_LD_MAP_FD(BPF_REG_1, 0),
-  BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
-  BPF_JMP_IMM(BPF_JEQ, BPF_REG_0, 0, 2),
-  BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 0),
-  BPF_EXIT_INSN(),
-  BPF_ST_MEM(BPF_DW, BPF_REG_0, 0, 1),
-  BPF_EXIT_INSN(),
-Error:
-  0: (7a) *(u64 *)(r10 -8) = 0
-  1: (bf) r2 = r10
-  2: (07) r2 += -8
-  3: (b7) r1 = 1
-  4: (85) call 1
-  5: (15) if r0 == 0x0 goto pc+2
-   R0=map_ptr R10=fp
-  6: (7a) *(u64 *)(r0 +0) = 0
-  7: (95) exit
-
-  from 5 to 8: R0=imm0 R10=fp
-  8: (7a) *(u64 *)(r0 +0) = 1
-  R0 invalid mem access 'imm'
-
-Program that performs a socket lookup then sets the pointer to NULL without
-checking it:
-value:
-  BPF_MOV64_IMM(BPF_REG_2, 0),
-  BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -8),
-  BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
-  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
-  BPF_MOV64_IMM(BPF_REG_3, 4),
-  BPF_MOV64_IMM(BPF_REG_4, 0),
-  BPF_MOV64_IMM(BPF_REG_5, 0),
-  BPF_EMIT_CALL(BPF_FUNC_sk_lookup_tcp),
-  BPF_MOV64_IMM(BPF_REG_0, 0),
-  BPF_EXIT_INSN(),
-Error:
-  0: (b7) r2 = 0
-  1: (63) *(u32 *)(r10 -8) = r2
-  2: (bf) r2 = r10
-  3: (07) r2 += -8
-  4: (b7) r3 = 4
-  5: (b7) r4 = 0
-  6: (b7) r5 = 0
-  7: (85) call bpf_sk_lookup_tcp#65
-  8: (b7) r0 = 0
-  9: (95) exit
-  Unreleased reference id=1, alloc_insn=7
-
-Program that performs a socket lookup but does not NULL-check the returned
-value:
-  BPF_MOV64_IMM(BPF_REG_2, 0),
-  BPF_STX_MEM(BPF_W, BPF_REG_10, BPF_REG_2, -8),
-  BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
-  BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
-  BPF_MOV64_IMM(BPF_REG_3, 4),
-  BPF_MOV64_IMM(BPF_REG_4, 0),
-  BPF_MOV64_IMM(BPF_REG_5, 0),
-  BPF_EMIT_CALL(BPF_FUNC_sk_lookup_tcp),
-  BPF_EXIT_INSN(),
-Error:
-  0: (b7) r2 = 0
-  1: (63) *(u32 *)(r10 -8) = r2
-  2: (bf) r2 = r10
-  3: (07) r2 += -8
-  4: (b7) r3 = 4
-  5: (b7) r4 = 0
-  6: (b7) r5 = 0
-  7: (85) call bpf_sk_lookup_tcp#65
-  8: (95) exit
-  Unreleased reference id=1, alloc_insn=7
-
-Testing
--------
-
-Next to the BPF toolchain, the kernel also ships a test module that contains
-various test cases for classic and internal BPF that can be executed against
-the BPF interpreter and JIT compiler. It can be found in lib/test_bpf.c and
-enabled via Kconfig:
-
-  CONFIG_TEST_BPF=m
-
-After the module has been built and installed, the test suite can be executed
-via insmod or modprobe against 'test_bpf' module. Results of the test cases
-including timings in nsec can be found in the kernel log (dmesg).
-
-Misc
-----
-
-Also trinity, the Linux syscall fuzzer, has built-in support for BPF and
-SECCOMP-BPF kernel fuzzing.
-
-Written by
-----------
-
-The document was written in the hope that it is found useful and in order
-to give potential BPF hackers or security auditors a better overview of
-the underlying architecture.
-
-Jay Schulist <jschlst@samba.org>
-Daniel Borkmann <daniel@iogearbox.net>
-Alexei Starovoitov <ast@kernel.org>
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 807abe25ae4b..144ed838c1a9 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -56,6 +56,7 @@ Contents:
    driver
    eql
    fib_trie
+   filter
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt
index 999eb41da81d..494614573c67 100644
--- a/Documentation/networking/packet_mmap.txt
+++ b/Documentation/networking/packet_mmap.txt
@@ -1051,7 +1051,7 @@ for more information on hardware timestamps.
 -------------------------------------------------------------------------------
 
 - Packet sockets work well together with Linux socket filters, thus you also
-  might want to have a look at Documentation/networking/filter.txt
+  might want to have a look at Documentation/networking/filter.rst
 
 --------------------------------------------------------------------------------
 + THANKS
diff --git a/MAINTAINERS b/MAINTAINERS
index 7323bfc1720f..4ec6d2741d36 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -3192,7 +3192,7 @@ Q:	https://patchwork.ozlabs.org/project/netdev/list/?delegate=77147
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf.git
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/bpf/bpf-next.git
 F:	Documentation/bpf/
-F:	Documentation/networking/filter.txt
+F:	Documentation/networking/filter.rst
 F:	arch/*/net/*
 F:	include/linux/bpf*
 F:	include/linux/filter.h
diff --git a/tools/bpf/bpf_asm.c b/tools/bpf/bpf_asm.c
index e5f95e3eede3..0063c3c029e7 100644
--- a/tools/bpf/bpf_asm.c
+++ b/tools/bpf/bpf_asm.c
@@ -11,7 +11,7 @@
  *
  * How to get into it:
  *
- * 1) read Documentation/networking/filter.txt
+ * 1) read Documentation/networking/filter.rst
  * 2) Run `bpf_asm [-c] <filter-prog file>` to translate into binary
  *    blob that is loadable with xt_bpf, cls_bpf et al. Note: -c will
  *    pretty print a C-like construct.
diff --git a/tools/bpf/bpf_dbg.c b/tools/bpf/bpf_dbg.c
index 9d3766e653a9..a0ebcdf59c31 100644
--- a/tools/bpf/bpf_dbg.c
+++ b/tools/bpf/bpf_dbg.c
@@ -13,7 +13,7 @@
  * for making a verdict when multiple simple BPF programs are combined
  * into one in order to prevent parsing same headers multiple times.
  *
- * More on how to debug BPF opcodes see Documentation/networking/filter.txt
+ * More on how to debug BPF opcodes see Documentation/networking/filter.rst
  * which is the main document on BPF. Mini howto for getting started:
  *
  *  1) `./bpf_dbg` to enter the shell (shell cmds denoted with '>'):
-- 
cgit v1.2.3-59-g8ed1b


From 62502dff2c5012c19727bd992b0101a816095f1e Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 28 Apr 2020 00:01:37 +0200
Subject: docs: networking: convert fore200e.txt to ReST

- add SPDX header;
- adjust title markup;
- mark code blocks and literals as such;
- adjust identation, whitespaces and blank lines;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/fore200e.rst | 66 +++++++++++++++++++++++++++++++++++
 Documentation/networking/fore200e.txt | 64 ---------------------------------
 Documentation/networking/index.rst    |  1 +
 drivers/atm/Kconfig                   |  2 +-
 4 files changed, 68 insertions(+), 65 deletions(-)
 create mode 100644 Documentation/networking/fore200e.rst
 delete mode 100644 Documentation/networking/fore200e.txt

diff --git a/Documentation/networking/fore200e.rst b/Documentation/networking/fore200e.rst
new file mode 100644
index 000000000000..55df9ec09ac8
--- /dev/null
+++ b/Documentation/networking/fore200e.rst
@@ -0,0 +1,66 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=============================================
+FORE Systems PCA-200E/SBA-200E ATM NIC driver
+=============================================
+
+This driver adds support for the FORE Systems 200E-series ATM adapters
+to the Linux operating system. It is based on the earlier PCA-200E driver
+written by Uwe Dannowski.
+
+The driver simultaneously supports PCA-200E and SBA-200E adapters on
+i386, alpha (untested), powerpc, sparc and sparc64 archs.
+
+The intent is to enable the use of different models of FORE adapters at the
+same time, by hosts that have several bus interfaces (such as PCI+SBUS,
+or PCI+EISA).
+
+Only PCI and SBUS devices are currently supported by the driver, but support
+for other bus interfaces such as EISA should not be too hard to add.
+
+
+Firmware Copyright Notice
+-------------------------
+
+Please read the fore200e_firmware_copyright file present
+in the linux/drivers/atm directory for details and restrictions.
+
+
+Firmware Updates
+----------------
+
+The FORE Systems 200E-series driver is shipped with firmware data being
+uploaded to the ATM adapters at system boot time or at module loading time.
+The supplied firmware images should work with all adapters.
+
+However, if you encounter problems (the firmware doesn't start or the driver
+is unable to read the PROM data), you may consider trying another firmware
+version. Alternative binary firmware images can be found somewhere on the
+ForeThought CD-ROM supplied with your adapter by FORE Systems.
+
+You can also get the latest firmware images from FORE Systems at
+https://en.wikipedia.org/wiki/FORE_Systems. Register TACTics Online and go to
+the 'software updates' pages. The firmware binaries are part of
+the various ForeThought software distributions.
+
+Notice that different versions of the PCA-200E firmware exist, depending
+on the endianness of the host architecture. The driver is shipped with
+both little and big endian PCA firmware images.
+
+Name and location of the new firmware images can be set at kernel
+configuration time:
+
+1. Copy the new firmware binary files (with .bin, .bin1 or .bin2 suffix)
+   to some directory, such as linux/drivers/atm.
+
+2. Reconfigure your kernel to set the new firmware name and location.
+   Expected pathnames are absolute or relative to the drivers/atm directory.
+
+3. Rebuild and re-install your kernel or your module.
+
+
+Feedback
+--------
+
+Feedback is welcome. Please send success stories/bug reports/
+patches/improvement/comments/flames to <lizzi@cnam.fr>.
diff --git a/Documentation/networking/fore200e.txt b/Documentation/networking/fore200e.txt
deleted file mode 100644
index 1f98f62b4370..000000000000
--- a/Documentation/networking/fore200e.txt
+++ /dev/null
@@ -1,64 +0,0 @@
-
-FORE Systems PCA-200E/SBA-200E ATM NIC driver
----------------------------------------------
-
-This driver adds support for the FORE Systems 200E-series ATM adapters
-to the Linux operating system. It is based on the earlier PCA-200E driver
-written by Uwe Dannowski.
-
-The driver simultaneously supports PCA-200E and SBA-200E adapters on
-i386, alpha (untested), powerpc, sparc and sparc64 archs.
-
-The intent is to enable the use of different models of FORE adapters at the
-same time, by hosts that have several bus interfaces (such as PCI+SBUS,
-or PCI+EISA).
-
-Only PCI and SBUS devices are currently supported by the driver, but support
-for other bus interfaces such as EISA should not be too hard to add.
-
-
-Firmware Copyright Notice
--------------------------
-
-Please read the fore200e_firmware_copyright file present
-in the linux/drivers/atm directory for details and restrictions.
-
-
-Firmware Updates
-----------------
-
-The FORE Systems 200E-series driver is shipped with firmware data being 
-uploaded to the ATM adapters at system boot time or at module loading time. 
-The supplied firmware images should work with all adapters.
-
-However, if you encounter problems (the firmware doesn't start or the driver
-is unable to read the PROM data), you may consider trying another firmware
-version. Alternative binary firmware images can be found somewhere on the
-ForeThought CD-ROM supplied with your adapter by FORE Systems.
-
-You can also get the latest firmware images from FORE Systems at
-https://en.wikipedia.org/wiki/FORE_Systems. Register TACTics Online and go to
-the 'software updates' pages. The firmware binaries are part of
-the various ForeThought software distributions.
-
-Notice that different versions of the PCA-200E firmware exist, depending
-on the endianness of the host architecture. The driver is shipped with
-both little and big endian PCA firmware images.
-
-Name and location of the new firmware images can be set at kernel
-configuration time:
-
-1. Copy the new firmware binary files (with .bin, .bin1 or .bin2 suffix)
-   to some directory, such as linux/drivers/atm.
-
-2. Reconfigure your kernel to set the new firmware name and location.
-   Expected pathnames are absolute or relative to the drivers/atm directory.
-
-3. Rebuild and re-install your kernel or your module.
-
-
-Feedback
---------
-
-Feedback is welcome. Please send success stories/bug reports/
-patches/improvement/comments/flames to <lizzi@cnam.fr>.
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 144ed838c1a9..b2fb8b907d68 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -57,6 +57,7 @@ Contents:
    eql
    fib_trie
    filter
+   fore200e
 
 .. only::  subproject and html
 
diff --git a/drivers/atm/Kconfig b/drivers/atm/Kconfig
index 8c37294f1d1e..4af7cbdcc349 100644
--- a/drivers/atm/Kconfig
+++ b/drivers/atm/Kconfig
@@ -336,7 +336,7 @@ config ATM_FORE200E
 	  on PCI and SBUS hosts. Say Y (or M to compile as a module
 	  named fore_200e) here if you have one of these ATM adapters.
 
-	  See the file <file:Documentation/networking/fore200e.txt> for
+	  See the file <file:Documentation/networking/fore200e.rst> for
 	  further details.
 
 config ATM_FORE200E_USE_TASKLET
-- 
cgit v1.2.3-59-g8ed1b


From 5b0d74b54c7f1cb9c65955df78dffe112e1959c1 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 28 Apr 2020 00:01:38 +0200
Subject: docs: networking: convert framerelay.txt to ReST

- add SPDX header;
- add a document title;
- adjust identation, whitespaces and blank lines;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/framerelay.rst | 44 +++++++++++++++++++++++++++++++++
 Documentation/networking/framerelay.txt | 39 -----------------------------
 Documentation/networking/index.rst      |  1 +
 drivers/net/wan/Kconfig                 |  4 +--
 4 files changed, 47 insertions(+), 41 deletions(-)
 create mode 100644 Documentation/networking/framerelay.rst
 delete mode 100644 Documentation/networking/framerelay.txt

diff --git a/Documentation/networking/framerelay.rst b/Documentation/networking/framerelay.rst
new file mode 100644
index 000000000000..6d904399ec6d
--- /dev/null
+++ b/Documentation/networking/framerelay.rst
@@ -0,0 +1,44 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+================
+Frame Relay (FR)
+================
+
+Frame Relay (FR) support for linux is built into a two tiered system of device
+drivers.  The upper layer implements RFC1490 FR specification, and uses the
+Data Link Connection Identifier (DLCI) as its hardware address.  Usually these
+are assigned by your network supplier, they give you the number/numbers of
+the Virtual Connections (VC) assigned to you.
+
+Each DLCI is a point-to-point link between your machine and a remote one.
+As such, a separate device is needed to accommodate the routing.  Within the
+net-tools archives is 'dlcicfg'.  This program will communicate with the
+base "DLCI" device, and create new net devices named 'dlci00', 'dlci01'...
+The configuration script will ask you how many DLCIs you need, as well as
+how many DLCIs you want to assign to each Frame Relay Access Device (FRAD).
+
+The DLCI uses a number of function calls to communicate with the FRAD, all
+of which are stored in the FRAD's private data area.  assoc/deassoc,
+activate/deactivate and dlci_config.  The DLCI supplies a receive function
+to the FRAD to accept incoming packets.
+
+With this initial offering, only 1 FRAD driver is available.  With many thanks
+to Sangoma Technologies, David Mandelstam & Gene Kozin, the S502A, S502E &
+S508 are supported.  This driver is currently set up for only FR, but as
+Sangoma makes more firmware modules available, it can be updated to provide
+them as well.
+
+Configuration of the FRAD makes use of another net-tools program, 'fradcfg'.
+This program makes use of a configuration file (which dlcicfg can also read)
+to specify the types of boards to be configured as FRADs, as well as perform
+any board specific configuration.  The Sangoma module of fradcfg loads the
+FR firmware into the card, sets the irq/port/memory information, and provides
+an initial configuration.
+
+Additional FRAD device drivers can be added as hardware is available.
+
+At this time, the dlcicfg and fradcfg programs have not been incorporated into
+the net-tools distribution.  They can be found at ftp.invlogic.com, in
+/pub/linux.  Note that with OS/2 FTPD, you end up in /pub by default, so just
+use 'cd linux'.  v0.10 is for use on pre-2.0.3 and earlier, v0.15 is for
+pre-2.0.4 and later.
diff --git a/Documentation/networking/framerelay.txt b/Documentation/networking/framerelay.txt
deleted file mode 100644
index 1a0b720440dd..000000000000
--- a/Documentation/networking/framerelay.txt
+++ /dev/null
@@ -1,39 +0,0 @@
-Frame Relay (FR) support for linux is built into a two tiered system of device 
-drivers.  The upper layer implements RFC1490 FR specification, and uses the
-Data Link Connection Identifier (DLCI) as its hardware address.  Usually these
-are assigned by your network supplier, they give you the number/numbers of
-the Virtual Connections (VC) assigned to you.
-
-Each DLCI is a point-to-point link between your machine and a remote one.
-As such, a separate device is needed to accommodate the routing.  Within the
-net-tools archives is 'dlcicfg'.  This program will communicate with the
-base "DLCI" device, and create new net devices named 'dlci00', 'dlci01'... 
-The configuration script will ask you how many DLCIs you need, as well as
-how many DLCIs you want to assign to each Frame Relay Access Device (FRAD).
-
-The DLCI uses a number of function calls to communicate with the FRAD, all
-of which are stored in the FRAD's private data area.  assoc/deassoc, 
-activate/deactivate and dlci_config.  The DLCI supplies a receive function
-to the FRAD to accept incoming packets.
-
-With this initial offering, only 1 FRAD driver is available.  With many thanks
-to Sangoma Technologies, David Mandelstam & Gene Kozin, the S502A, S502E & 
-S508 are supported.  This driver is currently set up for only FR, but as 
-Sangoma makes more firmware modules available, it can be updated to provide
-them as well.
-
-Configuration of the FRAD makes use of another net-tools program, 'fradcfg'.
-This program makes use of a configuration file (which dlcicfg can also read)
-to specify the types of boards to be configured as FRADs, as well as perform
-any board specific configuration.  The Sangoma module of fradcfg loads the
-FR firmware into the card, sets the irq/port/memory information, and provides
-an initial configuration.
-
-Additional FRAD device drivers can be added as hardware is available.
-
-At this time, the dlcicfg and fradcfg programs have not been incorporated into
-the net-tools distribution.  They can be found at ftp.invlogic.com, in 
-/pub/linux.  Note that with OS/2 FTPD, you end up in /pub by default, so just
-use 'cd linux'.  v0.10 is for use on pre-2.0.3 and earlier, v0.15 is for 
-pre-2.0.4 and later.
-
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index b2fb8b907d68..4e225f1f7039 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -58,6 +58,7 @@ Contents:
    fib_trie
    filter
    fore200e
+   framerelay
 
 .. only::  subproject and html
 
diff --git a/drivers/net/wan/Kconfig b/drivers/net/wan/Kconfig
index dbc0e3f7a3e2..3e21726c36e8 100644
--- a/drivers/net/wan/Kconfig
+++ b/drivers/net/wan/Kconfig
@@ -336,7 +336,7 @@ config DLCI
 
 	  To use frame relay, you need supporting hardware (called FRAD) and
 	  certain programs from the net-tools package as explained in
-	  <file:Documentation/networking/framerelay.txt>.
+	  <file:Documentation/networking/framerelay.rst>.
 
 	  To compile this driver as a module, choose M here: the
 	  module will be called dlci.
@@ -361,7 +361,7 @@ config SDLA
 
 	  These are multi-protocol cards, but only Frame Relay is supported
 	  by the driver at this time. Please read
-	  <file:Documentation/networking/framerelay.txt>.
+	  <file:Documentation/networking/framerelay.rst>.
 
 	  To compile this driver as a module, choose M here: the
 	  module will be called sdla.
-- 
cgit v1.2.3-59-g8ed1b


From 16128ad8f927850a1121b7645c6381341d9c0b63 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 28 Apr 2020 00:01:39 +0200
Subject: docs: networking: convert generic-hdlc.txt to ReST

- add SPDX header;
- adjust title markup;
- mark code blocks and literals as such;
- adjust identation, whitespaces and blank lines;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/generic-hdlc.rst | 170 ++++++++++++++++++++++++++++++
 Documentation/networking/generic-hdlc.txt | 132 -----------------------
 Documentation/networking/index.rst        |   1 +
 3 files changed, 171 insertions(+), 132 deletions(-)
 create mode 100644 Documentation/networking/generic-hdlc.rst
 delete mode 100644 Documentation/networking/generic-hdlc.txt

diff --git a/Documentation/networking/generic-hdlc.rst b/Documentation/networking/generic-hdlc.rst
new file mode 100644
index 000000000000..1c3bb5cb98d4
--- /dev/null
+++ b/Documentation/networking/generic-hdlc.rst
@@ -0,0 +1,170 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==================
+Generic HDLC layer
+==================
+
+Krzysztof Halasa <khc@pm.waw.pl>
+
+
+Generic HDLC layer currently supports:
+
+1. Frame Relay (ANSI, CCITT, Cisco and no LMI)
+
+   - Normal (routed) and Ethernet-bridged (Ethernet device emulation)
+     interfaces can share a single PVC.
+   - ARP support (no InARP support in the kernel - there is an
+     experimental InARP user-space daemon available on:
+     http://www.kernel.org/pub/linux/utils/net/hdlc/).
+
+2. raw HDLC - either IP (IPv4) interface or Ethernet device emulation
+3. Cisco HDLC
+4. PPP
+5. X.25 (uses X.25 routines).
+
+Generic HDLC is a protocol driver only - it needs a low-level driver
+for your particular hardware.
+
+Ethernet device emulation (using HDLC or Frame-Relay PVC) is compatible
+with IEEE 802.1Q (VLANs) and 802.1D (Ethernet bridging).
+
+
+Make sure the hdlc.o and the hardware driver are loaded. It should
+create a number of "hdlc" (hdlc0 etc) network devices, one for each
+WAN port. You'll need the "sethdlc" utility, get it from:
+
+	http://www.kernel.org/pub/linux/utils/net/hdlc/
+
+Compile sethdlc.c utility::
+
+	gcc -O2 -Wall -o sethdlc sethdlc.c
+
+Make sure you're using a correct version of sethdlc for your kernel.
+
+Use sethdlc to set physical interface, clock rate, HDLC mode used,
+and add any required PVCs if using Frame Relay.
+Usually you want something like::
+
+	sethdlc hdlc0 clock int rate 128000
+	sethdlc hdlc0 cisco interval 10 timeout 25
+
+or::
+
+	sethdlc hdlc0 rs232 clock ext
+	sethdlc hdlc0 fr lmi ansi
+	sethdlc hdlc0 create 99
+	ifconfig hdlc0 up
+	ifconfig pvc0 localIP pointopoint remoteIP
+
+In Frame Relay mode, ifconfig master hdlc device up (without assigning
+any IP address to it) before using pvc devices.
+
+
+Setting interface:
+
+* v35 | rs232 | x21 | t1 | e1
+    - sets physical interface for a given port
+      if the card has software-selectable interfaces
+  loopback
+    - activate hardware loopback (for testing only)
+* clock ext
+    - both RX clock and TX clock external
+* clock int
+    - both RX clock and TX clock internal
+* clock txint
+    - RX clock external, TX clock internal
+* clock txfromrx
+    - RX clock external, TX clock derived from RX clock
+* rate
+    - sets clock rate in bps (for "int" or "txint" clock only)
+
+
+Setting protocol:
+
+* hdlc - sets raw HDLC (IP-only) mode
+
+  nrz / nrzi / fm-mark / fm-space / manchester - sets transmission code
+
+  no-parity / crc16 / crc16-pr0 (CRC16 with preset zeros) / crc32-itu
+
+  crc16-itu (CRC16 with ITU-T polynomial) / crc16-itu-pr0 - sets parity
+
+* hdlc-eth - Ethernet device emulation using HDLC. Parity and encoding
+  as above.
+
+* cisco - sets Cisco HDLC mode (IP, IPv6 and IPX supported)
+
+  interval - time in seconds between keepalive packets
+
+  timeout - time in seconds after last received keepalive packet before
+	    we assume the link is down
+
+* ppp - sets synchronous PPP mode
+
+* x25 - sets X.25 mode
+
+* fr - Frame Relay mode
+
+  lmi ansi / ccitt / cisco / none - LMI (link management) type
+
+  dce - Frame Relay DCE (network) side LMI instead of default DTE (user).
+
+  It has nothing to do with clocks!
+
+  - t391 - link integrity verification polling timer (in seconds) - user
+  - t392 - polling verification timer (in seconds) - network
+  - n391 - full status polling counter - user
+  - n392 - error threshold - both user and network
+  - n393 - monitored events count - both user and network
+
+Frame-Relay only:
+
+* create n | delete n - adds / deletes PVC interface with DLCI #n.
+  Newly created interface will be named pvc0, pvc1 etc.
+
+* create ether n | delete ether n - adds a device for Ethernet-bridged
+  frames. The device will be named pvceth0, pvceth1 etc.
+
+
+
+
+Board-specific issues
+---------------------
+
+n2.o and c101.o need parameters to work::
+
+	insmod n2 hw=io,irq,ram,ports[:io,irq,...]
+
+example::
+
+	insmod n2 hw=0x300,10,0xD0000,01
+
+or::
+
+	insmod c101 hw=irq,ram[:irq,...]
+
+example::
+
+	insmod c101 hw=9,0xdc000
+
+If built into the kernel, these drivers need kernel (command line) parameters::
+
+	n2.hw=io,irq,ram,ports:...
+
+or::
+
+	c101.hw=irq,ram:...
+
+
+
+If you have a problem with N2, C101 or PLX200SYN card, you can issue the
+"private" command to see port's packet descriptor rings (in kernel logs)::
+
+	sethdlc hdlc0 private
+
+The hardware driver has to be build with #define DEBUG_RINGS.
+Attaching this info to bug reports would be helpful. Anyway, let me know
+if you have problems using this.
+
+For patches and other info look at:
+<http://www.kernel.org/pub/linux/utils/net/hdlc/>.
diff --git a/Documentation/networking/generic-hdlc.txt b/Documentation/networking/generic-hdlc.txt
deleted file mode 100644
index 4eb3cc40b702..000000000000
--- a/Documentation/networking/generic-hdlc.txt
+++ /dev/null
@@ -1,132 +0,0 @@
-Generic HDLC layer
-Krzysztof Halasa <khc@pm.waw.pl>
-
-
-Generic HDLC layer currently supports:
-1. Frame Relay (ANSI, CCITT, Cisco and no LMI)
-   - Normal (routed) and Ethernet-bridged (Ethernet device emulation)
-     interfaces can share a single PVC.
-   - ARP support (no InARP support in the kernel - there is an
-     experimental InARP user-space daemon available on:
-     http://www.kernel.org/pub/linux/utils/net/hdlc/).
-2. raw HDLC - either IP (IPv4) interface or Ethernet device emulation
-3. Cisco HDLC
-4. PPP
-5. X.25 (uses X.25 routines).
-
-Generic HDLC is a protocol driver only - it needs a low-level driver
-for your particular hardware.
-
-Ethernet device emulation (using HDLC or Frame-Relay PVC) is compatible
-with IEEE 802.1Q (VLANs) and 802.1D (Ethernet bridging).
-
-
-Make sure the hdlc.o and the hardware driver are loaded. It should
-create a number of "hdlc" (hdlc0 etc) network devices, one for each
-WAN port. You'll need the "sethdlc" utility, get it from:
-	http://www.kernel.org/pub/linux/utils/net/hdlc/
-
-Compile sethdlc.c utility:
-	gcc -O2 -Wall -o sethdlc sethdlc.c
-Make sure you're using a correct version of sethdlc for your kernel.
-
-Use sethdlc to set physical interface, clock rate, HDLC mode used,
-and add any required PVCs if using Frame Relay.
-Usually you want something like:
-
-	sethdlc hdlc0 clock int rate 128000
-	sethdlc hdlc0 cisco interval 10 timeout 25
-or
-	sethdlc hdlc0 rs232 clock ext
-	sethdlc hdlc0 fr lmi ansi
-	sethdlc hdlc0 create 99
-	ifconfig hdlc0 up
-	ifconfig pvc0 localIP pointopoint remoteIP
-
-In Frame Relay mode, ifconfig master hdlc device up (without assigning
-any IP address to it) before using pvc devices.
-
-
-Setting interface:
-
-* v35 | rs232 | x21 | t1 | e1 - sets physical interface for a given port
-                                if the card has software-selectable interfaces
-  loopback - activate hardware loopback (for testing only)
-* clock ext - both RX clock and TX clock external
-* clock int - both RX clock and TX clock internal
-* clock txint - RX clock external, TX clock internal
-* clock txfromrx - RX clock external, TX clock derived from RX clock
-* rate - sets clock rate in bps (for "int" or "txint" clock only)
-
-
-Setting protocol:
-
-* hdlc - sets raw HDLC (IP-only) mode
-  nrz / nrzi / fm-mark / fm-space / manchester - sets transmission code
-  no-parity / crc16 / crc16-pr0 (CRC16 with preset zeros) / crc32-itu
-  crc16-itu (CRC16 with ITU-T polynomial) / crc16-itu-pr0 - sets parity
-
-* hdlc-eth - Ethernet device emulation using HDLC. Parity and encoding
-  as above.
-
-* cisco - sets Cisco HDLC mode (IP, IPv6 and IPX supported)
-  interval - time in seconds between keepalive packets
-  timeout - time in seconds after last received keepalive packet before
-            we assume the link is down
-
-* ppp - sets synchronous PPP mode
-
-* x25 - sets X.25 mode
-
-* fr - Frame Relay mode
-  lmi ansi / ccitt / cisco / none - LMI (link management) type
-  dce - Frame Relay DCE (network) side LMI instead of default DTE (user).
-  It has nothing to do with clocks!
-  t391 - link integrity verification polling timer (in seconds) - user
-  t392 - polling verification timer (in seconds) - network
-  n391 - full status polling counter - user
-  n392 - error threshold - both user and network
-  n393 - monitored events count - both user and network
-
-Frame-Relay only:
-* create n | delete n - adds / deletes PVC interface with DLCI #n.
-  Newly created interface will be named pvc0, pvc1 etc.
-
-* create ether n | delete ether n - adds a device for Ethernet-bridged
-  frames. The device will be named pvceth0, pvceth1 etc.
-
-
-
-
-Board-specific issues
----------------------
-
-n2.o and c101.o need parameters to work:
-
-	insmod n2 hw=io,irq,ram,ports[:io,irq,...]
-example:
-	insmod n2 hw=0x300,10,0xD0000,01
-
-or
-	insmod c101 hw=irq,ram[:irq,...]
-example:
-	insmod c101 hw=9,0xdc000
-
-If built into the kernel, these drivers need kernel (command line) parameters:
-	n2.hw=io,irq,ram,ports:...
-or
-	c101.hw=irq,ram:...
-
-
-
-If you have a problem with N2, C101 or PLX200SYN card, you can issue the
-"private" command to see port's packet descriptor rings (in kernel logs):
-
-	sethdlc hdlc0 private
-
-The hardware driver has to be build with #define DEBUG_RINGS.
-Attaching this info to bug reports would be helpful. Anyway, let me know
-if you have problems using this.
-
-For patches and other info look at:
-<http://www.kernel.org/pub/linux/utils/net/hdlc/>.
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 4e225f1f7039..d34824b27264 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -59,6 +59,7 @@ Contents:
    filter
    fore200e
    framerelay
+   generic-hdlc
 
 .. only::  subproject and html
 
-- 
cgit v1.2.3-59-g8ed1b


From 110662503de20f21ab22cf409753124d0977a339 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 28 Apr 2020 00:01:40 +0200
Subject: docs: networking: convert generic_netlink.txt to ReST

Not much to be done here:
- add SPDX header;
- add a document title;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/generic_netlink.rst | 9 +++++++++
 Documentation/networking/generic_netlink.txt | 3 ---
 Documentation/networking/index.rst           | 1 +
 3 files changed, 10 insertions(+), 3 deletions(-)
 create mode 100644 Documentation/networking/generic_netlink.rst
 delete mode 100644 Documentation/networking/generic_netlink.txt

diff --git a/Documentation/networking/generic_netlink.rst b/Documentation/networking/generic_netlink.rst
new file mode 100644
index 000000000000..59e04ccf80c1
--- /dev/null
+++ b/Documentation/networking/generic_netlink.rst
@@ -0,0 +1,9 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============
+Generic Netlink
+===============
+
+A wiki document on how to use Generic Netlink can be found here:
+
+ * http://www.linuxfoundation.org/collaborate/workgroups/networking/generic_netlink_howto
diff --git a/Documentation/networking/generic_netlink.txt b/Documentation/networking/generic_netlink.txt
deleted file mode 100644
index 3e071115ca90..000000000000
--- a/Documentation/networking/generic_netlink.txt
+++ /dev/null
@@ -1,3 +0,0 @@
-A wiki document on how to use Generic Netlink can be found here:
-
- * http://www.linuxfoundation.org/collaborate/workgroups/networking/generic_netlink_howto
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index d34824b27264..42e556509e22 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -60,6 +60,7 @@ Contents:
    fore200e
    framerelay
    generic-hdlc
+   generic_netlink
 
 .. only::  subproject and html
 
-- 
cgit v1.2.3-59-g8ed1b


From 8c498935585680284e5f3e5294d3c901b7c89d57 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 28 Apr 2020 00:01:41 +0200
Subject: docs: networking: convert gen_stats.txt to ReST

- add SPDX header;
- mark code blocks and literals as such;
- mark tables as such;
- mark lists as such;
- adjust identation, whitespaces and blank lines;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/gen_stats.rst | 129 +++++++++++++++++++++++++++++++++
 Documentation/networking/gen_stats.txt | 119 ------------------------------
 Documentation/networking/index.rst     |   1 +
 net/core/gen_stats.c                   |   2 +-
 4 files changed, 131 insertions(+), 120 deletions(-)
 create mode 100644 Documentation/networking/gen_stats.rst
 delete mode 100644 Documentation/networking/gen_stats.txt

diff --git a/Documentation/networking/gen_stats.rst b/Documentation/networking/gen_stats.rst
new file mode 100644
index 000000000000..595a83b9a61b
--- /dev/null
+++ b/Documentation/networking/gen_stats.rst
@@ -0,0 +1,129 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============================================
+Generic networking statistics for netlink users
+===============================================
+
+Statistic counters are grouped into structs:
+
+==================== ===================== =====================
+Struct               TLV type              Description
+==================== ===================== =====================
+gnet_stats_basic     TCA_STATS_BASIC       Basic statistics
+gnet_stats_rate_est  TCA_STATS_RATE_EST    Rate estimator
+gnet_stats_queue     TCA_STATS_QUEUE       Queue statistics
+none                 TCA_STATS_APP         Application specific
+==================== ===================== =====================
+
+
+Collecting:
+-----------
+
+Declare the statistic structs you need::
+
+	struct mystruct {
+		struct gnet_stats_basic	bstats;
+		struct gnet_stats_queue	qstats;
+		...
+	};
+
+Update statistics, in dequeue() methods only, (while owning qdisc->running)::
+
+	mystruct->tstats.packet++;
+	mystruct->qstats.backlog += skb->pkt_len;
+
+
+Export to userspace (Dump):
+---------------------------
+
+::
+
+    my_dumping_routine(struct sk_buff *skb, ...)
+    {
+	    struct gnet_dump dump;
+
+	    if (gnet_stats_start_copy(skb, TCA_STATS2, &mystruct->lock, &dump,
+				    TCA_PAD) < 0)
+		    goto rtattr_failure;
+
+	    if (gnet_stats_copy_basic(&dump, &mystruct->bstats) < 0 ||
+		gnet_stats_copy_queue(&dump, &mystruct->qstats) < 0 ||
+		    gnet_stats_copy_app(&dump, &xstats, sizeof(xstats)) < 0)
+		    goto rtattr_failure;
+
+	    if (gnet_stats_finish_copy(&dump) < 0)
+		    goto rtattr_failure;
+	    ...
+    }
+
+TCA_STATS/TCA_XSTATS backward compatibility:
+--------------------------------------------
+
+Prior users of struct tc_stats and xstats can maintain backward
+compatibility by calling the compat wrappers to keep providing the
+existing TLV types::
+
+    my_dumping_routine(struct sk_buff *skb, ...)
+    {
+	if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
+					TCA_XSTATS, &mystruct->lock, &dump,
+					TCA_PAD) < 0)
+		    goto rtattr_failure;
+	    ...
+    }
+
+A struct tc_stats will be filled out during gnet_stats_copy_* calls
+and appended to the skb. TCA_XSTATS is provided if gnet_stats_copy_app
+was called.
+
+
+Locking:
+--------
+
+Locks are taken before writing and released once all statistics have
+been written. Locks are always released in case of an error. You
+are responsible for making sure that the lock is initialized.
+
+
+Rate Estimator:
+---------------
+
+0) Prepare an estimator attribute. Most likely this would be in user
+   space. The value of this TLV should contain a tc_estimator structure.
+   As usual, such a TLV needs to be 32 bit aligned and therefore the
+   length needs to be appropriately set, etc. The estimator interval
+   and ewma log need to be converted to the appropriate values.
+   tc_estimator.c::tc_setup_estimator() is advisable to be used as the
+   conversion routine. It does a few clever things. It takes a time
+   interval in microsecs, a time constant also in microsecs and a struct
+   tc_estimator to  be populated. The returned tc_estimator can be
+   transported to the kernel.  Transfer such a structure in a TLV of type
+   TCA_RATE to your code in the kernel.
+
+In the kernel when setting up:
+
+1) make sure you have basic stats and rate stats setup first.
+2) make sure you have initialized stats lock that is used to setup such
+   stats.
+3) Now initialize a new estimator::
+
+    int ret = gen_new_estimator(my_basicstats,my_rate_est_stats,
+	mystats_lock, attr_with_tcestimator_struct);
+
+    if ret == 0
+	success
+    else
+	failed
+
+From now on, every time you dump my_rate_est_stats it will contain
+up-to-date info.
+
+Once you are done, call gen_kill_estimator(my_basicstats,
+my_rate_est_stats) Make sure that my_basicstats and my_rate_est_stats
+are still valid (i.e still exist) at the time of making this call.
+
+
+Authors:
+--------
+- Thomas Graf <tgraf@suug.ch>
+- Jamal Hadi Salim <hadi@cyberus.ca>
diff --git a/Documentation/networking/gen_stats.txt b/Documentation/networking/gen_stats.txt
deleted file mode 100644
index 179b18ce45ff..000000000000
--- a/Documentation/networking/gen_stats.txt
+++ /dev/null
@@ -1,119 +0,0 @@
-Generic networking statistics for netlink users
-======================================================================
-
-Statistic counters are grouped into structs:
-
-Struct               TLV type              Description
-----------------------------------------------------------------------
-gnet_stats_basic     TCA_STATS_BASIC       Basic statistics
-gnet_stats_rate_est  TCA_STATS_RATE_EST    Rate estimator
-gnet_stats_queue     TCA_STATS_QUEUE       Queue statistics
-none                 TCA_STATS_APP         Application specific
-
-
-Collecting:
------------
-
-Declare the statistic structs you need:
-struct mystruct {
-	struct gnet_stats_basic	bstats;
-	struct gnet_stats_queue	qstats;
-	...
-};
-
-Update statistics, in dequeue() methods only, (while owning qdisc->running)
-mystruct->tstats.packet++;
-mystruct->qstats.backlog += skb->pkt_len;
-
-
-Export to userspace (Dump):
----------------------------
-
-my_dumping_routine(struct sk_buff *skb, ...)
-{
-	struct gnet_dump dump;
-
-	if (gnet_stats_start_copy(skb, TCA_STATS2, &mystruct->lock, &dump,
-				  TCA_PAD) < 0)
-		goto rtattr_failure;
-
-	if (gnet_stats_copy_basic(&dump, &mystruct->bstats) < 0 ||
-	    gnet_stats_copy_queue(&dump, &mystruct->qstats) < 0 ||
-		gnet_stats_copy_app(&dump, &xstats, sizeof(xstats)) < 0)
-		goto rtattr_failure;
-
-	if (gnet_stats_finish_copy(&dump) < 0)
-		goto rtattr_failure;
-	...
-}
-
-TCA_STATS/TCA_XSTATS backward compatibility:
---------------------------------------------
-
-Prior users of struct tc_stats and xstats can maintain backward
-compatibility by calling the compat wrappers to keep providing the
-existing TLV types.
-
-my_dumping_routine(struct sk_buff *skb, ...)
-{
-    if (gnet_stats_start_copy_compat(skb, TCA_STATS2, TCA_STATS,
-				     TCA_XSTATS, &mystruct->lock, &dump,
-				     TCA_PAD) < 0)
-		goto rtattr_failure;
-	...
-}
-
-A struct tc_stats will be filled out during gnet_stats_copy_* calls
-and appended to the skb. TCA_XSTATS is provided if gnet_stats_copy_app
-was called.
-
-
-Locking:
---------
-
-Locks are taken before writing and released once all statistics have
-been written. Locks are always released in case of an error. You
-are responsible for making sure that the lock is initialized.
-
-
-Rate Estimator:
---------------
-
-0) Prepare an estimator attribute. Most likely this would be in user
-   space. The value of this TLV should contain a tc_estimator structure.
-   As usual, such a TLV needs to be 32 bit aligned and therefore the
-   length needs to be appropriately set, etc. The estimator interval
-   and ewma log need to be converted to the appropriate values.
-   tc_estimator.c::tc_setup_estimator() is advisable to be used as the
-   conversion routine. It does a few clever things. It takes a time
-   interval in microsecs, a time constant also in microsecs and a struct
-   tc_estimator to  be populated. The returned tc_estimator can be
-   transported to the kernel.  Transfer such a structure in a TLV of type
-   TCA_RATE to your code in the kernel.
-
-In the kernel when setting up:
-1) make sure you have basic stats and rate stats setup first.
-2) make sure you have initialized stats lock that is used to setup such
-   stats.
-3) Now initialize a new estimator:
-
-   int ret = gen_new_estimator(my_basicstats,my_rate_est_stats,
-       mystats_lock, attr_with_tcestimator_struct);
-
-   if ret == 0
-       success
-   else
-       failed
-
-From now on, every time you dump my_rate_est_stats it will contain
-up-to-date info.
-
-Once you are done, call gen_kill_estimator(my_basicstats,
-my_rate_est_stats) Make sure that my_basicstats and my_rate_est_stats
-are still valid (i.e still exist) at the time of making this call.
-
-
-Authors:
---------
-Thomas Graf <tgraf@suug.ch>
-Jamal Hadi Salim <hadi@cyberus.ca>
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 42e556509e22..33afbb67f3fa 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -61,6 +61,7 @@ Contents:
    framerelay
    generic-hdlc
    generic_netlink
+   gen_stats
 
 .. only::  subproject and html
 
diff --git a/net/core/gen_stats.c b/net/core/gen_stats.c
index 1d653fbfcf52..e491b083b348 100644
--- a/net/core/gen_stats.c
+++ b/net/core/gen_stats.c
@@ -6,7 +6,7 @@
  *           Jamal Hadi Salim
  *           Alexey Kuznetsov, <kuznet@ms2.inr.ac.ru>
  *
- * See Documentation/networking/gen_stats.txt
+ * See Documentation/networking/gen_stats.rst
  */
 
 #include <linux/types.h>
-- 
cgit v1.2.3-59-g8ed1b


From 81baecb6f6dc507f1b565e711b5193cdbb3fa939 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 28 Apr 2020 00:01:42 +0200
Subject: docs: networking: convert gtp.txt to ReST

- add SPDX header;
- adjust titles and chapters, adding proper markups;
- add notes markups;
- adjust identation, whitespaces and blank lines;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/gtp.rst   | 251 +++++++++++++++++++++++++++++++++++++
 Documentation/networking/gtp.txt   | 230 ---------------------------------
 Documentation/networking/index.rst |   1 +
 3 files changed, 252 insertions(+), 230 deletions(-)
 create mode 100644 Documentation/networking/gtp.rst
 delete mode 100644 Documentation/networking/gtp.txt

diff --git a/Documentation/networking/gtp.rst b/Documentation/networking/gtp.rst
new file mode 100644
index 000000000000..1563fb94b289
--- /dev/null
+++ b/Documentation/networking/gtp.rst
@@ -0,0 +1,251 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================================
+The Linux kernel GTP tunneling module
+=====================================
+
+Documentation by
+		 Harald Welte <laforge@gnumonks.org> and
+		 Andreas Schultz <aschultz@tpip.net>
+
+In 'drivers/net/gtp.c' you are finding a kernel-level implementation
+of a GTP tunnel endpoint.
+
+What is GTP
+===========
+
+GTP is the Generic Tunnel Protocol, which is a 3GPP protocol used for
+tunneling User-IP payload between a mobile station (phone, modem)
+and the interconnection between an external packet data network (such
+as the internet).
+
+So when you start a 'data connection' from your mobile phone, the
+phone will use the control plane to signal for the establishment of
+such a tunnel between that external data network and the phone.  The
+tunnel endpoints thus reside on the phone and in the gateway.  All
+intermediate nodes just transport the encapsulated packet.
+
+The phone itself does not implement GTP but uses some other
+technology-dependent protocol stack for transmitting the user IP
+payload, such as LLC/SNDCP/RLC/MAC.
+
+At some network element inside the cellular operator infrastructure
+(SGSN in case of GPRS/EGPRS or classic UMTS, hNodeB in case of a 3G
+femtocell, eNodeB in case of 4G/LTE), the cellular protocol stacking
+is translated into GTP *without breaking the end-to-end tunnel*.  So
+intermediate nodes just perform some specific relay function.
+
+At some point the GTP packet ends up on the so-called GGSN (GSM/UMTS)
+or P-GW (LTE), which terminates the tunnel, decapsulates the packet
+and forwards it onto an external packet data network.  This can be
+public internet, but can also be any private IP network (or even
+theoretically some non-IP network like X.25).
+
+You can find the protocol specification in 3GPP TS 29.060, available
+publicly via the 3GPP website at http://www.3gpp.org/DynaReport/29060.htm
+
+A direct PDF link to v13.6.0 is provided for convenience below:
+http://www.etsi.org/deliver/etsi_ts/129000_129099/129060/13.06.00_60/ts_129060v130600p.pdf
+
+The Linux GTP tunnelling module
+===============================
+
+The module implements the function of a tunnel endpoint, i.e. it is
+able to decapsulate tunneled IP packets in the uplink originated by
+the phone, and encapsulate raw IP packets received from the external
+packet network in downlink towards the phone.
+
+It *only* implements the so-called 'user plane', carrying the User-IP
+payload, called GTP-U.  It does not implement the 'control plane',
+which is a signaling protocol used for establishment and teardown of
+GTP tunnels (GTP-C).
+
+So in order to have a working GGSN/P-GW setup, you will need a
+userspace program that implements the GTP-C protocol and which then
+uses the netlink interface provided by the GTP-U module in the kernel
+to configure the kernel module.
+
+This split architecture follows the tunneling modules of other
+protocols, e.g. PPPoE or L2TP, where you also run a userspace daemon
+to handle the tunnel establishment, authentication etc. and only the
+data plane is accelerated inside the kernel.
+
+Don't be confused by terminology:  The GTP User Plane goes through
+kernel accelerated path, while the GTP Control Plane goes to
+Userspace :)
+
+The official homepage of the module is at
+https://osmocom.org/projects/linux-kernel-gtp-u/wiki
+
+Userspace Programs with Linux Kernel GTP-U support
+==================================================
+
+At the time of this writing, there are at least two Free Software
+implementations that implement GTP-C and can use the netlink interface
+to make use of the Linux kernel GTP-U support:
+
+* OpenGGSN (classic 2G/3G GGSN in C):
+  https://osmocom.org/projects/openggsn/wiki/OpenGGSN
+
+* ergw (GGSN + P-GW in Erlang):
+  https://github.com/travelping/ergw
+
+Userspace Library / Command Line Utilities
+==========================================
+
+There is a userspace library called 'libgtpnl' which is based on
+libmnl and which implements a C-language API towards the netlink
+interface provided by the Kernel GTP module:
+
+http://git.osmocom.org/libgtpnl/
+
+Protocol Versions
+=================
+
+There are two different versions of GTP-U: v0 [GSM TS 09.60] and v1
+[3GPP TS 29.281].  Both are implemented in the Kernel GTP module.
+Version 0 is a legacy version, and deprecated from recent 3GPP
+specifications.
+
+GTP-U uses UDP for transporting PDUs.  The receiving UDP port is 2151
+for GTPv1-U and 3386 for GTPv0-U.
+
+There are three versions of GTP-C: v0, v1, and v2.  As the kernel
+doesn't implement GTP-C, we don't have to worry about this.  It's the
+responsibility of the control plane implementation in userspace to
+implement that.
+
+IPv6
+====
+
+The 3GPP specifications indicate either IPv4 or IPv6 can be used both
+on the inner (user) IP layer, or on the outer (transport) layer.
+
+Unfortunately, the Kernel module currently supports IPv6 neither for
+the User IP payload, nor for the outer IP layer.  Patches or other
+Contributions to fix this are most welcome!
+
+Mailing List
+============
+
+If you have questions regarding how to use the Kernel GTP module from
+your own software, or want to contribute to the code, please use the
+osmocom-net-grps mailing list for related discussion. The list can be
+reached at osmocom-net-gprs@lists.osmocom.org and the mailman
+interface for managing your subscription is at
+https://lists.osmocom.org/mailman/listinfo/osmocom-net-gprs
+
+Issue Tracker
+=============
+
+The Osmocom project maintains an issue tracker for the Kernel GTP-U
+module at
+https://osmocom.org/projects/linux-kernel-gtp-u/issues
+
+History / Acknowledgements
+==========================
+
+The Module was originally created in 2012 by Harald Welte, but never
+completed.  Pablo came in to finish the mess Harald left behind.  But
+doe to a lack of user interest, it never got merged.
+
+In 2015, Andreas Schultz came to the rescue and fixed lots more bugs,
+extended it with new features and finally pushed all of us to get it
+mainline, where it was merged in 4.7.0.
+
+Architectural Details
+=====================
+
+Local GTP-U entity and tunnel identification
+--------------------------------------------
+
+GTP-U uses UDP for transporting PDU's. The receiving UDP port is 2152
+for GTPv1-U and 3386 for GTPv0-U.
+
+There is only one GTP-U entity (and therefor SGSN/GGSN/S-GW/PDN-GW
+instance) per IP address. Tunnel Endpoint Identifier (TEID) are unique
+per GTP-U entity.
+
+A specific tunnel is only defined by the destination entity. Since the
+destination port is constant, only the destination IP and TEID define
+a tunnel. The source IP and Port have no meaning for the tunnel.
+
+Therefore:
+
+  * when sending, the remote entity is defined by the remote IP and
+    the tunnel endpoint id. The source IP and port have no meaning and
+    can be changed at any time.
+
+  * when receiving the local entity is defined by the local
+    destination IP and the tunnel endpoint id. The source IP and port
+    have no meaning and can change at any time.
+
+[3GPP TS 29.281] Section 4.3.0 defines this so::
+
+  The TEID in the GTP-U header is used to de-multiplex traffic
+  incoming from remote tunnel endpoints so that it is delivered to the
+  User plane entities in a way that allows multiplexing of different
+  users, different packet protocols and different QoS levels.
+  Therefore no two remote GTP-U endpoints shall send traffic to a
+  GTP-U protocol entity using the same TEID value except
+  for data forwarding as part of mobility procedures.
+
+The definition above only defines that two remote GTP-U endpoints
+*should not* send to the same TEID, it *does not* forbid or exclude
+such a scenario. In fact, the mentioned mobility procedures make it
+necessary that the GTP-U entity accepts traffic for TEIDs from
+multiple or unknown peers.
+
+Therefore, the receiving side identifies tunnels exclusively based on
+TEIDs, not based on the source IP!
+
+APN vs. Network Device
+======================
+
+The GTP-U driver creates a Linux network device for each Gi/SGi
+interface.
+
+[3GPP TS 29.281] calls the Gi/SGi reference point an interface. This
+may lead to the impression that the GGSN/P-GW can have only one such
+interface.
+
+Correct is that the Gi/SGi reference point defines the interworking
+between +the 3GPP packet domain (PDN) based on GTP-U tunnel and IP
+based networks.
+
+There is no provision in any of the 3GPP documents that limits the
+number of Gi/SGi interfaces implemented by a GGSN/P-GW.
+
+[3GPP TS 29.061] Section 11.3 makes it clear that the selection of a
+specific Gi/SGi interfaces is made through the Access Point Name
+(APN)::
+
+  2. each private network manages its own addressing. In general this
+     will result in different private networks having overlapping
+     address ranges. A logically separate connection (e.g. an IP in IP
+     tunnel or layer 2 virtual circuit) is used between the GGSN/P-GW
+     and each private network.
+
+     In this case the IP address alone is not necessarily unique.  The
+     pair of values, Access Point Name (APN) and IPv4 address and/or
+     IPv6 prefixes, is unique.
+
+In order to support the overlapping address range use case, each APN
+is mapped to a separate Gi/SGi interface (network device).
+
+.. note::
+
+   The Access Point Name is purely a control plane (GTP-C) concept.
+   At the GTP-U level, only Tunnel Endpoint Identifiers are present in
+   GTP-U packets and network devices are known
+
+Therefore for a given UE the mapping in IP to PDN network is:
+
+  * network device + MS IP -> Peer IP + Peer TEID,
+
+and from PDN to IP network:
+
+  * local GTP-U IP + TEID  -> network device
+
+Furthermore, before a received T-PDU is injected into the network
+device the MS IP is checked against the IP recorded in PDP context.
diff --git a/Documentation/networking/gtp.txt b/Documentation/networking/gtp.txt
deleted file mode 100644
index 6966bbec1ecb..000000000000
--- a/Documentation/networking/gtp.txt
+++ /dev/null
@@ -1,230 +0,0 @@
-The Linux kernel GTP tunneling module
-======================================================================
-Documentation by Harald Welte <laforge@gnumonks.org> and
-                 Andreas Schultz <aschultz@tpip.net>
-
-In 'drivers/net/gtp.c' you are finding a kernel-level implementation
-of a GTP tunnel endpoint.
-
-== What is GTP ==
-
-GTP is the Generic Tunnel Protocol, which is a 3GPP protocol used for
-tunneling User-IP payload between a mobile station (phone, modem)
-and the interconnection between an external packet data network (such
-as the internet).
-
-So when you start a 'data connection' from your mobile phone, the
-phone will use the control plane to signal for the establishment of
-such a tunnel between that external data network and the phone.  The
-tunnel endpoints thus reside on the phone and in the gateway.  All
-intermediate nodes just transport the encapsulated packet.
-
-The phone itself does not implement GTP but uses some other
-technology-dependent protocol stack for transmitting the user IP
-payload, such as LLC/SNDCP/RLC/MAC.
-
-At some network element inside the cellular operator infrastructure
-(SGSN in case of GPRS/EGPRS or classic UMTS, hNodeB in case of a 3G
-femtocell, eNodeB in case of 4G/LTE), the cellular protocol stacking
-is translated into GTP *without breaking the end-to-end tunnel*.  So
-intermediate nodes just perform some specific relay function.
-
-At some point the GTP packet ends up on the so-called GGSN (GSM/UMTS)
-or P-GW (LTE), which terminates the tunnel, decapsulates the packet
-and forwards it onto an external packet data network.  This can be
-public internet, but can also be any private IP network (or even
-theoretically some non-IP network like X.25).
-
-You can find the protocol specification in 3GPP TS 29.060, available
-publicly via the 3GPP website at http://www.3gpp.org/DynaReport/29060.htm
-
-A direct PDF link to v13.6.0 is provided for convenience below:
-http://www.etsi.org/deliver/etsi_ts/129000_129099/129060/13.06.00_60/ts_129060v130600p.pdf
-
-== The Linux GTP tunnelling module ==
-
-The module implements the function of a tunnel endpoint, i.e. it is
-able to decapsulate tunneled IP packets in the uplink originated by
-the phone, and encapsulate raw IP packets received from the external
-packet network in downlink towards the phone.
-
-It *only* implements the so-called 'user plane', carrying the User-IP
-payload, called GTP-U.  It does not implement the 'control plane',
-which is a signaling protocol used for establishment and teardown of
-GTP tunnels (GTP-C).
-
-So in order to have a working GGSN/P-GW setup, you will need a
-userspace program that implements the GTP-C protocol and which then
-uses the netlink interface provided by the GTP-U module in the kernel
-to configure the kernel module.
-
-This split architecture follows the tunneling modules of other
-protocols, e.g. PPPoE or L2TP, where you also run a userspace daemon
-to handle the tunnel establishment, authentication etc. and only the
-data plane is accelerated inside the kernel.
-
-Don't be confused by terminology:  The GTP User Plane goes through
-kernel accelerated path, while the GTP Control Plane goes to
-Userspace :)
-
-The official homepage of the module is at
-https://osmocom.org/projects/linux-kernel-gtp-u/wiki
-
-== Userspace Programs with Linux Kernel GTP-U support ==
-
-At the time of this writing, there are at least two Free Software
-implementations that implement GTP-C and can use the netlink interface
-to make use of the Linux kernel GTP-U support:
-
-* OpenGGSN (classic 2G/3G GGSN in C):
-  https://osmocom.org/projects/openggsn/wiki/OpenGGSN
-
-* ergw (GGSN + P-GW in Erlang):
-  https://github.com/travelping/ergw
-
-== Userspace Library / Command Line Utilities ==
-
-There is a userspace library called 'libgtpnl' which is based on
-libmnl and which implements a C-language API towards the netlink
-interface provided by the Kernel GTP module:
-
-http://git.osmocom.org/libgtpnl/
-
-== Protocol Versions ==
-
-There are two different versions of GTP-U: v0 [GSM TS 09.60] and v1
-[3GPP TS 29.281].  Both are implemented in the Kernel GTP module.
-Version 0 is a legacy version, and deprecated from recent 3GPP
-specifications.
-
-GTP-U uses UDP for transporting PDUs.  The receiving UDP port is 2151
-for GTPv1-U and 3386 for GTPv0-U.
-
-There are three versions of GTP-C: v0, v1, and v2.  As the kernel
-doesn't implement GTP-C, we don't have to worry about this.  It's the
-responsibility of the control plane implementation in userspace to
-implement that.
-
-== IPv6 ==
-
-The 3GPP specifications indicate either IPv4 or IPv6 can be used both
-on the inner (user) IP layer, or on the outer (transport) layer.
-
-Unfortunately, the Kernel module currently supports IPv6 neither for
-the User IP payload, nor for the outer IP layer.  Patches or other
-Contributions to fix this are most welcome!
-
-== Mailing List ==
-
-If yo have questions regarding how to use the Kernel GTP module from
-your own software, or want to contribute to the code, please use the
-osmocom-net-grps mailing list for related discussion. The list can be
-reached at osmocom-net-gprs@lists.osmocom.org and the mailman
-interface for managing your subscription is at
-https://lists.osmocom.org/mailman/listinfo/osmocom-net-gprs
-
-== Issue Tracker ==
-
-The Osmocom project maintains an issue tracker for the Kernel GTP-U
-module at
-https://osmocom.org/projects/linux-kernel-gtp-u/issues
-
-== History / Acknowledgements ==
-
-The Module was originally created in 2012 by Harald Welte, but never
-completed.  Pablo came in to finish the mess Harald left behind.  But
-doe to a lack of user interest, it never got merged.
-
-In 2015, Andreas Schultz came to the rescue and fixed lots more bugs,
-extended it with new features and finally pushed all of us to get it
-mainline, where it was merged in 4.7.0.
-
-== Architectural Details ==
-
-=== Local GTP-U entity and tunnel identification ===
-
-GTP-U uses UDP for transporting PDU's. The receiving UDP port is 2152
-for GTPv1-U and 3386 for GTPv0-U.
-
-There is only one GTP-U entity (and therefor SGSN/GGSN/S-GW/PDN-GW
-instance) per IP address. Tunnel Endpoint Identifier (TEID) are unique
-per GTP-U entity.
-
-A specific tunnel is only defined by the destination entity. Since the
-destination port is constant, only the destination IP and TEID define
-a tunnel. The source IP and Port have no meaning for the tunnel.
-
-Therefore:
-
-  * when sending, the remote entity is defined by the remote IP and
-    the tunnel endpoint id. The source IP and port have no meaning and
-    can be changed at any time.
-
-  * when receiving the local entity is defined by the local
-    destination IP and the tunnel endpoint id. The source IP and port
-    have no meaning and can change at any time.
-
-[3GPP TS 29.281] Section 4.3.0 defines this so:
-
-> The TEID in the GTP-U header is used to de-multiplex traffic
-> incoming from remote tunnel endpoints so that it is delivered to the
-> User plane entities in a way that allows multiplexing of different
-> users, different packet protocols and different QoS levels.
-> Therefore no two remote GTP-U endpoints shall send traffic to a
-> GTP-U protocol entity using the same TEID value except
-> for data forwarding as part of mobility procedures.
-
-The definition above only defines that two remote GTP-U endpoints
-*should not* send to the same TEID, it *does not* forbid or exclude
-such a scenario. In fact, the mentioned mobility procedures make it
-necessary that the GTP-U entity accepts traffic for TEIDs from
-multiple or unknown peers.
-
-Therefore, the receiving side identifies tunnels exclusively based on
-TEIDs, not based on the source IP!
-
-== APN vs. Network Device ==
-
-The GTP-U driver creates a Linux network device for each Gi/SGi
-interface.
-
-[3GPP TS 29.281] calls the Gi/SGi reference point an interface. This
-may lead to the impression that the GGSN/P-GW can have only one such
-interface.
-
-Correct is that the Gi/SGi reference point defines the interworking
-between +the 3GPP packet domain (PDN) based on GTP-U tunnel and IP
-based networks.
-
-There is no provision in any of the 3GPP documents that limits the
-number of Gi/SGi interfaces implemented by a GGSN/P-GW.
-
-[3GPP TS 29.061] Section 11.3 makes it clear that the selection of a
-specific Gi/SGi interfaces is made through the Access Point Name
-(APN):
-
-> 2. each private network manages its own addressing. In general this
->    will result in different private networks having overlapping
->    address ranges. A logically separate connection (e.g. an IP in IP
->    tunnel or layer 2 virtual circuit) is used between the GGSN/P-GW
->    and each private network.
->
->    In this case the IP address alone is not necessarily unique.  The
->    pair of values, Access Point Name (APN) and IPv4 address and/or
->    IPv6 prefixes, is unique.
-
-In order to support the overlapping address range use case, each APN
-is mapped to a separate Gi/SGi interface (network device).
-
-NOTE: The Access Point Name is purely a control plane (GTP-C) concept.
-At the GTP-U level, only Tunnel Endpoint Identifiers are present in
-GTP-U packets and network devices are known
-
-Therefore for a given UE the mapping in IP to PDN network is:
-  * network device + MS IP -> Peer IP + Peer TEID,
-
-and from PDN to IP network:
-  * local GTP-U IP + TEID  -> network device
-
-Furthermore, before a received T-PDU is injected into the network
-device the MS IP is checked against the IP recorded in PDP context.
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 33afbb67f3fa..b29a08d1f941 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -62,6 +62,7 @@ Contents:
    generic-hdlc
    generic_netlink
    gen_stats
+   gtp
 
 .. only::  subproject and html
 
-- 
cgit v1.2.3-59-g8ed1b


From 3c3a2fde4d88bb3d6c0592b4b7754f26dab9f697 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 28 Apr 2020 00:01:43 +0200
Subject: docs: networking: convert hinic.txt to ReST

Not much to be done here:

- add SPDX header;
- adjust titles and chapters, adding proper markups;
- adjust identation, whitespaces and blank lines;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/hinic.rst | 128 +++++++++++++++++++++++++++++++++++++
 Documentation/networking/hinic.txt | 125 ------------------------------------
 Documentation/networking/index.rst |   1 +
 MAINTAINERS                        |   2 +-
 4 files changed, 130 insertions(+), 126 deletions(-)
 create mode 100644 Documentation/networking/hinic.rst
 delete mode 100644 Documentation/networking/hinic.txt

diff --git a/Documentation/networking/hinic.rst b/Documentation/networking/hinic.rst
new file mode 100644
index 000000000000..867ac8f4e04a
--- /dev/null
+++ b/Documentation/networking/hinic.rst
@@ -0,0 +1,128 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============================================================
+Linux Kernel Driver for Huawei Intelligent NIC(HiNIC) family
+============================================================
+
+Overview:
+=========
+HiNIC is a network interface card for the Data Center Area.
+
+The driver supports a range of link-speed devices (10GbE, 25GbE, 40GbE, etc.).
+The driver supports also a negotiated and extendable feature set.
+
+Some HiNIC devices support SR-IOV. This driver is used for Physical Function
+(PF).
+
+HiNIC devices support MSI-X interrupt vector for each Tx/Rx queue and
+adaptive interrupt moderation.
+
+HiNIC devices support also various offload features such as checksum offload,
+TCP Transmit Segmentation Offload(TSO), Receive-Side Scaling(RSS) and
+LRO(Large Receive Offload).
+
+
+Supported PCI vendor ID/device IDs:
+===================================
+
+19e5:1822 - HiNIC PF
+
+
+Driver Architecture and Source Code:
+====================================
+
+hinic_dev - Implement a Logical Network device that is independent from
+specific HW details about HW data structure formats.
+
+hinic_hwdev - Implement the HW details of the device and include the components
+for accessing the PCI NIC.
+
+hinic_hwdev contains the following components:
+===============================================
+
+HW Interface:
+=============
+
+The interface for accessing the pci device (DMA memory and PCI BARs).
+(hinic_hw_if.c, hinic_hw_if.h)
+
+Configuration Status Registers Area that describes the HW Registers on the
+configuration and status BAR0. (hinic_hw_csr.h)
+
+MGMT components:
+================
+
+Asynchronous Event Queues(AEQs) - The event queues for receiving messages from
+the MGMT modules on the cards. (hinic_hw_eqs.c, hinic_hw_eqs.h)
+
+Application Programmable Interface commands(API CMD) - Interface for sending
+MGMT commands to the card. (hinic_hw_api_cmd.c, hinic_hw_api_cmd.h)
+
+Management (MGMT) - the PF to MGMT channel that uses API CMD for sending MGMT
+commands to the card and receives notifications from the MGMT modules on the
+card by AEQs. Also set the addresses of the IO CMDQs in HW.
+(hinic_hw_mgmt.c, hinic_hw_mgmt.h)
+
+IO components:
+==============
+
+Completion Event Queues(CEQs) - The completion Event Queues that describe IO
+tasks that are finished. (hinic_hw_eqs.c, hinic_hw_eqs.h)
+
+Work Queues(WQ) - Contain the memory and operations for use by CMD queues and
+the Queue Pairs. The WQ is a Memory Block in a Page. The Block contains
+pointers to Memory Areas that are the Memory for the Work Queue Elements(WQEs).
+(hinic_hw_wq.c, hinic_hw_wq.h)
+
+Command Queues(CMDQ) - The queues for sending commands for IO management and is
+used to set the QPs addresses in HW. The commands completion events are
+accumulated on the CEQ that is configured to receive the CMDQ completion events.
+(hinic_hw_cmdq.c, hinic_hw_cmdq.h)
+
+Queue Pairs(QPs) - The HW Receive and Send queues for Receiving and Transmitting
+Data. (hinic_hw_qp.c, hinic_hw_qp.h, hinic_hw_qp_ctxt.h)
+
+IO - de/constructs all the IO components. (hinic_hw_io.c, hinic_hw_io.h)
+
+HW device:
+==========
+
+HW device - de/constructs the HW Interface, the MGMT components on the
+initialization of the driver and the IO components on the case of Interface
+UP/DOWN Events. (hinic_hw_dev.c, hinic_hw_dev.h)
+
+
+hinic_dev contains the following components:
+===============================================
+
+PCI ID table - Contains the supported PCI Vendor/Device IDs.
+(hinic_pci_tbl.h)
+
+Port Commands - Send commands to the HW device for port management
+(MAC, Vlan, MTU, ...). (hinic_port.c, hinic_port.h)
+
+Tx Queues - Logical Tx Queues that use the HW Send Queues for transmit.
+The Logical Tx queue is not dependent on the format of the HW Send Queue.
+(hinic_tx.c, hinic_tx.h)
+
+Rx Queues - Logical Rx Queues that use the HW Receive Queues for receive.
+The Logical Rx queue is not dependent on the format of the HW Receive Queue.
+(hinic_rx.c, hinic_rx.h)
+
+hinic_dev - de/constructs the Logical Tx and Rx Queues.
+(hinic_main.c, hinic_dev.h)
+
+
+Miscellaneous
+=============
+
+Common functions that are used by HW and Logical Device.
+(hinic_common.c, hinic_common.h)
+
+
+Support
+=======
+
+If an issue is identified with the released source code on the supported kernel
+with a supported adapter, email the specific information related to the issue to
+aviad.krawczyk@huawei.com.
diff --git a/Documentation/networking/hinic.txt b/Documentation/networking/hinic.txt
deleted file mode 100644
index 989366a4039c..000000000000
--- a/Documentation/networking/hinic.txt
+++ /dev/null
@@ -1,125 +0,0 @@
-Linux Kernel Driver for Huawei Intelligent NIC(HiNIC) family
-============================================================
-
-Overview:
-=========
-HiNIC is a network interface card for the Data Center Area.
-
-The driver supports a range of link-speed devices (10GbE, 25GbE, 40GbE, etc.).
-The driver supports also a negotiated and extendable feature set.
-
-Some HiNIC devices support SR-IOV. This driver is used for Physical Function
-(PF).
-
-HiNIC devices support MSI-X interrupt vector for each Tx/Rx queue and
-adaptive interrupt moderation.
-
-HiNIC devices support also various offload features such as checksum offload,
-TCP Transmit Segmentation Offload(TSO), Receive-Side Scaling(RSS) and
-LRO(Large Receive Offload).
-
-
-Supported PCI vendor ID/device IDs:
-===================================
-
-19e5:1822 - HiNIC PF
-
-
-Driver Architecture and Source Code:
-====================================
-
-hinic_dev - Implement a Logical Network device that is independent from
-specific HW details about HW data structure formats.
-
-hinic_hwdev - Implement the HW details of the device and include the components
-for accessing the PCI NIC.
-
-hinic_hwdev contains the following components:
-===============================================
-
-HW Interface:
-=============
-
-The interface for accessing the pci device (DMA memory and PCI BARs).
-(hinic_hw_if.c, hinic_hw_if.h)
-
-Configuration Status Registers Area that describes the HW Registers on the
-configuration and status BAR0. (hinic_hw_csr.h)
-
-MGMT components:
-================
-
-Asynchronous Event Queues(AEQs) - The event queues for receiving messages from
-the MGMT modules on the cards. (hinic_hw_eqs.c, hinic_hw_eqs.h)
-
-Application Programmable Interface commands(API CMD) - Interface for sending
-MGMT commands to the card. (hinic_hw_api_cmd.c, hinic_hw_api_cmd.h)
-
-Management (MGMT) - the PF to MGMT channel that uses API CMD for sending MGMT
-commands to the card and receives notifications from the MGMT modules on the
-card by AEQs. Also set the addresses of the IO CMDQs in HW.
-(hinic_hw_mgmt.c, hinic_hw_mgmt.h)
-
-IO components:
-==============
-
-Completion Event Queues(CEQs) - The completion Event Queues that describe IO
-tasks that are finished. (hinic_hw_eqs.c, hinic_hw_eqs.h)
-
-Work Queues(WQ) - Contain the memory and operations for use by CMD queues and
-the Queue Pairs. The WQ is a Memory Block in a Page. The Block contains
-pointers to Memory Areas that are the Memory for the Work Queue Elements(WQEs).
-(hinic_hw_wq.c, hinic_hw_wq.h)
-
-Command Queues(CMDQ) - The queues for sending commands for IO management and is
-used to set the QPs addresses in HW. The commands completion events are
-accumulated on the CEQ that is configured to receive the CMDQ completion events.
-(hinic_hw_cmdq.c, hinic_hw_cmdq.h)
-
-Queue Pairs(QPs) - The HW Receive and Send queues for Receiving and Transmitting
-Data. (hinic_hw_qp.c, hinic_hw_qp.h, hinic_hw_qp_ctxt.h)
-
-IO - de/constructs all the IO components. (hinic_hw_io.c, hinic_hw_io.h)
-
-HW device:
-==========
-
-HW device - de/constructs the HW Interface, the MGMT components on the
-initialization of the driver and the IO components on the case of Interface
-UP/DOWN Events. (hinic_hw_dev.c, hinic_hw_dev.h)
-
-
-hinic_dev contains the following components:
-===============================================
-
-PCI ID table - Contains the supported PCI Vendor/Device IDs.
-(hinic_pci_tbl.h)
-
-Port Commands - Send commands to the HW device for port management
-(MAC, Vlan, MTU, ...). (hinic_port.c, hinic_port.h)
-
-Tx Queues - Logical Tx Queues that use the HW Send Queues for transmit.
-The Logical Tx queue is not dependent on the format of the HW Send Queue.
-(hinic_tx.c, hinic_tx.h)
-
-Rx Queues - Logical Rx Queues that use the HW Receive Queues for receive.
-The Logical Rx queue is not dependent on the format of the HW Receive Queue.
-(hinic_rx.c, hinic_rx.h)
-
-hinic_dev - de/constructs the Logical Tx and Rx Queues.
-(hinic_main.c, hinic_dev.h)
-
-
-Miscellaneous:
-=============
-
-Common functions that are used by HW and Logical Device.
-(hinic_common.c, hinic_common.h)
-
-
-Support
-=======
-
-If an issue is identified with the released source code on the supported kernel
-with a supported adapter, email the specific information related to the issue to
-aviad.krawczyk@huawei.com.
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index b29a08d1f941..5a7889df1375 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -63,6 +63,7 @@ Contents:
    generic_netlink
    gen_stats
    gtp
+   hinic
 
 .. only::  subproject and html
 
diff --git a/MAINTAINERS b/MAINTAINERS
index 4ec6d2741d36..df5e4ccc1ccb 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7815,7 +7815,7 @@ HUAWEI ETHERNET DRIVER
 M:	Aviad Krawczyk <aviad.krawczyk@huawei.com>
 L:	netdev@vger.kernel.org
 S:	Supported
-F:	Documentation/networking/hinic.txt
+F:	Documentation/networking/hinic.rst
 F:	drivers/net/ethernet/huawei/hinic/
 
 HUGETLB FILESYSTEM
-- 
cgit v1.2.3-59-g8ed1b


From 1d2698fa05f57ba2900e1ff50ac33ec85d2087d3 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 28 Apr 2020 00:01:44 +0200
Subject: docs: networking: convert ila.txt to ReST

- add SPDX header;
- adjust title markup;
- mark code blocks and literals as such;
- adjust identation, whitespaces and blank lines;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ila.rst   | 296 +++++++++++++++++++++++++++++++++++++
 Documentation/networking/ila.txt   | 285 -----------------------------------
 Documentation/networking/index.rst |   1 +
 3 files changed, 297 insertions(+), 285 deletions(-)
 create mode 100644 Documentation/networking/ila.rst
 delete mode 100644 Documentation/networking/ila.txt

diff --git a/Documentation/networking/ila.rst b/Documentation/networking/ila.rst
new file mode 100644
index 000000000000..5ac0a6270b17
--- /dev/null
+++ b/Documentation/networking/ila.rst
@@ -0,0 +1,296 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================================
+Identifier Locator Addressing (ILA)
+===================================
+
+
+Introduction
+============
+
+Identifier-locator addressing (ILA) is a technique used with IPv6 that
+differentiates between location and identity of a network node. Part of an
+address expresses the immutable identity of the node, and another part
+indicates the location of the node which can be dynamic. Identifier-locator
+addressing can be used to efficiently implement overlay networks for
+network virtualization as well as solutions for use cases in mobility.
+
+ILA can be thought of as means to implement an overlay network without
+encapsulation. This is accomplished by performing network address
+translation on destination addresses as a packet traverses a network. To
+the network, an ILA translated packet appears to be no different than any
+other IPv6 packet. For instance, if the transport protocol is TCP then an
+ILA translated packet looks like just another TCP/IPv6 packet. The
+advantage of this is that ILA is transparent to the network so that
+optimizations in the network, such as ECMP, RSS, GRO, GSO, etc., just work.
+
+The ILA protocol is described in Internet-Draft draft-herbert-intarea-ila.
+
+
+ILA terminology
+===============
+
+  - Identifier
+		A number that identifies an addressable node in the network
+		independent of its location. ILA identifiers are sixty-four
+		bit values.
+
+  - Locator
+		A network prefix that routes to a physical host. Locators
+		provide the topological location of an addressed node. ILA
+		locators are sixty-four bit prefixes.
+
+  - ILA mapping
+		A mapping of an ILA identifier to a locator (or to a
+		locator and meta data). An ILA domain maintains a database
+		that contains mappings for all destinations in the domain.
+
+  - SIR address
+		An IPv6 address composed of a SIR prefix (upper sixty-
+		four bits) and an identifier (lower sixty-four bits).
+		SIR addresses are visible to applications and provide a
+		means for them to address nodes independent of their
+		location.
+
+  - ILA address
+		An IPv6 address composed of a locator (upper sixty-four
+		bits) and an identifier (low order sixty-four bits). ILA
+		addresses are never visible to an application.
+
+  - ILA host
+		An end host that is capable of performing ILA translations
+		on transmit or receive.
+
+  - ILA router
+		A network node that performs ILA translation and forwarding
+		of translated packets.
+
+  - ILA forwarding cache
+		A type of ILA router that only maintains a working set
+		cache of mappings.
+
+  - ILA node
+		A network node capable of performing ILA translations. This
+		can be an ILA router, ILA forwarding cache, or ILA host.
+
+
+Operation
+=========
+
+There are two fundamental operations with ILA:
+
+  - Translate a SIR address to an ILA address. This is performed on ingress
+    to an ILA overlay.
+
+  - Translate an ILA address to a SIR address. This is performed on egress
+    from the ILA overlay.
+
+ILA can be deployed either on end hosts or intermediate devices in the
+network; these are provided by "ILA hosts" and "ILA routers" respectively.
+Configuration and datapath for these two points of deployment is somewhat
+different.
+
+The diagram below illustrates the flow of packets through ILA as well
+as showing ILA hosts and routers::
+
+    +--------+                                                +--------+
+    | Host A +-+                                         +--->| Host B |
+    |        | |              (2) ILA                   (')   |        |
+    +--------+ |            ...addressed....           (   )  +--------+
+	       V  +---+--+  .  packet      .  +---+--+  (_)
+   (1) SIR     |  | ILA  |----->-------->---->| ILA  |   |   (3) SIR
+    addressed  +->|router|  .              .  |router|->-+    addressed
+    packet        +---+--+  .     IPv6     .  +---+--+        packet
+		   /        .    Network   .
+		  /         .              .   +--+-++--------+
+    +--------+   /          .              .   |ILA ||  Host  |
+    |  Host  +--+           .              .- -|host||        |
+    |        |              .              .   +--+-++--------+
+    +--------+              ................
+
+
+Transport checksum handling
+===========================
+
+When an address is translated by ILA, an encapsulated transport checksum
+that includes the translated address in a pseudo header may be rendered
+incorrect on the wire. This is a problem for intermediate devices,
+including checksum offload in NICs, that process the checksum. There are
+three options to deal with this:
+
+- no action	Allow the checksum to be incorrect on the wire. Before
+		a receiver verifies a checksum the ILA to SIR address
+		translation must be done.
+
+- adjust transport checksum
+		When ILA translation is performed the packet is parsed
+		and if a transport layer checksum is found then it is
+		adjusted to reflect the correct checksum per the
+		translated address.
+
+- checksum neutral mapping
+		When an address is translated the difference can be offset
+		elsewhere in a part of the packet that is covered by
+		the checksum. The low order sixteen bits of the identifier
+		are used. This method is preferred since it doesn't require
+		parsing a packet beyond the IP header and in most cases the
+		adjustment can be precomputed and saved with the mapping.
+
+Note that the checksum neutral adjustment affects the low order sixteen
+bits of the identifier. When ILA to SIR address translation is done on
+egress the low order bits are restored to the original value which
+restores the identifier as it was originally sent.
+
+
+Identifier types
+================
+
+ILA defines different types of identifiers for different use cases.
+
+The defined types are:
+
+      0: interface identifier
+
+      1: locally unique identifier
+
+      2: virtual networking identifier for IPv4 address
+
+      3: virtual networking identifier for IPv6 unicast address
+
+      4: virtual networking identifier for IPv6 multicast address
+
+      5: non-local address identifier
+
+In the current implementation of kernel ILA only locally unique identifiers
+(LUID) are supported. LUID allows for a generic, unformatted 64 bit
+identifier.
+
+
+Identifier formats
+==================
+
+Kernel ILA supports two optional fields in an identifier for formatting:
+"C-bit" and "identifier type". The presence of these fields is determined
+by configuration as demonstrated below.
+
+If the identifier type is present it occupies the three highest order
+bits of an identifier. The possible values are given in the above list.
+
+If the C-bit is present,  this is used as an indication that checksum
+neutral mapping has been done. The C-bit can only be set in an
+ILA address, never a SIR address.
+
+In the simplest format the identifier types, C-bit, and checksum
+adjustment value are not present so an identifier is considered an
+unstructured sixty-four bit value::
+
+     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+     |                            Identifier                         |
+     +                                                               +
+     |                                                               |
+     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+The checksum neutral adjustment may be configured to always be
+present using neutral-map-auto. In this case there is no C-bit, but the
+checksum adjustment is in the low order 16 bits. The identifier is
+still sixty-four bits::
+
+     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+     |                            Identifier                         |
+     |                               +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+     |                               |  Checksum-neutral adjustment  |
+     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+The C-bit may used to explicitly indicate that checksum neutral
+mapping has been applied to an ILA address. The format is::
+
+     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+     |     |C|                    Identifier                         |
+     |     +-+                       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+     |                               |  Checksum-neutral adjustment  |
+     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+The identifier type field may be present to indicate the identifier
+type. If it is not present then the type is inferred based on mapping
+configuration. The checksum neutral adjustment may automatically
+used with the identifier type as illustrated below::
+
+     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+     | Type|                      Identifier                         |
+     +-+-+-+                         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+     |                               |  Checksum-neutral adjustment  |
+     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+If the identifier type and the C-bit can be present simultaneously so
+the identifier format would be::
+
+     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+     | Type|C|                    Identifier                         |
+     +-+-+-+-+                       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+     |                               |  Checksum-neutral adjustment  |
+     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+
+
+Configuration
+=============
+
+There are two methods to configure ILA mappings. One is by using LWT routes
+and the other is ila_xlat (called from NFHOOK PREROUTING hook). ila_xlat
+is intended to be used in the receive path for ILA hosts .
+
+An ILA router has also been implemented in XDP. Description of that is
+outside the scope of this document.
+
+The usage of for ILA LWT routes is:
+
+ip route add DEST/128 encap ila LOC csum-mode MODE ident-type TYPE via ADDR
+
+Destination (DEST) can either be a SIR address (for an ILA host or ingress
+ILA router) or an ILA address (egress ILA router). LOC is the sixty-four
+bit locator (with format W:X:Y:Z) that overwrites the upper sixty-four
+bits of the destination address.  Checksum MODE is one of "no-action",
+"adj-transport", "neutral-map", and "neutral-map-auto". If neutral-map is
+set then the C-bit will be present. Identifier TYPE one of "luid" or
+"use-format." In the case of use-format, the identifier type field is
+present and the effective type is taken from that.
+
+The usage of ila_xlat is:
+
+ip ila add loc_match MATCH loc LOC csum-mode MODE ident-type TYPE
+
+MATCH indicates the incoming locator that must be matched to apply
+a the translaiton. LOC is the locator that overwrites the upper
+sixty-four bits of the destination address. MODE and TYPE have the
+same meanings as described above.
+
+
+Some examples
+=============
+
+::
+
+     # Configure an ILA route that uses checksum neutral mapping as well
+     # as type field. Note that the type field is set in the SIR address
+     # (the 2000 implies type is 1 which is LUID).
+     ip route add 3333:0:0:1:2000:0:1:87/128 encap ila 2001:0:87:0 \
+	  csum-mode neutral-map ident-type use-format
+
+     # Configure an ILA LWT route that uses auto checksum neutral mapping
+     # (no C-bit) and configure identifier type to be LUID so that the
+     # identifier type field will not be present.
+     ip route add 3333:0:0:1:2000:0:2:87/128 encap ila 2001:0:87:1 \
+	  csum-mode neutral-map-auto ident-type luid
+
+     ila_xlat configuration
+
+     # Configure an ILA to SIR mapping that matches a locator and overwrites
+     # it with a SIR address (3333:0:0:1 in this example). The C-bit and
+     # identifier field are used.
+     ip ila add loc_match 2001:0:119:0 loc 3333:0:0:1 \
+	 csum-mode neutral-map-auto ident-type use-format
+
+     # Configure an ILA to SIR mapping where checksum neutral is automatically
+     # set without the C-bit and the identifier type is configured to be LUID
+     # so that the identifier type field is not present.
+     ip ila add loc_match 2001:0:119:0 loc 3333:0:0:1 \
+	 csum-mode neutral-map-auto ident-type use-format
diff --git a/Documentation/networking/ila.txt b/Documentation/networking/ila.txt
deleted file mode 100644
index a17dac9dc915..000000000000
--- a/Documentation/networking/ila.txt
+++ /dev/null
@@ -1,285 +0,0 @@
-Identifier Locator Addressing (ILA)
-
-
-Introduction
-============
-
-Identifier-locator addressing (ILA) is a technique used with IPv6 that
-differentiates between location and identity of a network node. Part of an
-address expresses the immutable identity of the node, and another part
-indicates the location of the node which can be dynamic. Identifier-locator
-addressing can be used to efficiently implement overlay networks for
-network virtualization as well as solutions for use cases in mobility.
-
-ILA can be thought of as means to implement an overlay network without
-encapsulation. This is accomplished by performing network address
-translation on destination addresses as a packet traverses a network. To
-the network, an ILA translated packet appears to be no different than any
-other IPv6 packet. For instance, if the transport protocol is TCP then an
-ILA translated packet looks like just another TCP/IPv6 packet. The
-advantage of this is that ILA is transparent to the network so that
-optimizations in the network, such as ECMP, RSS, GRO, GSO, etc., just work.
-
-The ILA protocol is described in Internet-Draft draft-herbert-intarea-ila.
-
-
-ILA terminology
-===============
-
-  - Identifier	A number that identifies an addressable node in the network
-		independent of its location. ILA identifiers are sixty-four
-		bit values.
-
-  - Locator	A network prefix that routes to a physical host. Locators
-		provide the topological location of an addressed node. ILA
-		locators are sixty-four bit prefixes.
-
-  - ILA mapping
-		A mapping of an ILA identifier to a locator (or to a
-		locator and meta data). An ILA domain maintains a database
-		that contains mappings for all destinations in the domain.
-
-  - SIR address
-		An IPv6 address composed of a SIR prefix (upper sixty-
-		four bits) and an identifier (lower sixty-four bits).
-		SIR addresses are visible to applications and provide a
-		means for them to address nodes independent of their
-		location.
-
-  - ILA address
-		An IPv6 address composed of a locator (upper sixty-four
-		bits) and an identifier (low order sixty-four bits). ILA
-		addresses are never visible to an application.
-
-  - ILA host	An end host that is capable of performing ILA translations
-		on transmit or receive.
-
-  - ILA router	A network node that performs ILA translation and forwarding
-		of translated packets.
-
-  - ILA forwarding cache
-		A type of ILA router that only maintains a working set
-		cache of mappings.
-
-  - ILA node	A network node capable of performing ILA translations. This
-		can be an ILA router, ILA forwarding cache, or ILA host.
-
-
-Operation
-=========
-
-There are two fundamental operations with ILA:
-
-  - Translate a SIR address to an ILA address. This is performed on ingress
-    to an ILA overlay.
-
-  - Translate an ILA address to a SIR address. This is performed on egress
-    from the ILA overlay.
-
-ILA can be deployed either on end hosts or intermediate devices in the
-network; these are provided by "ILA hosts" and "ILA routers" respectively.
-Configuration and datapath for these two points of deployment is somewhat
-different.
-
-The diagram below illustrates the flow of packets through ILA as well
-as showing ILA hosts and routers.
-
-    +--------+                                                +--------+
-    | Host A +-+                                         +--->| Host B |
-    |        | |              (2) ILA                   (')   |        |
-    +--------+ |            ...addressed....           (   )  +--------+
-               V  +---+--+  .  packet      .  +---+--+  (_)
-   (1) SIR     |  | ILA  |----->-------->---->| ILA  |   |   (3) SIR
-    addressed  +->|router|  .              .  |router|->-+    addressed
-    packet        +---+--+  .     IPv6     .  +---+--+        packet
-                   /        .    Network   .
-                  /         .              .   +--+-++--------+
-    +--------+   /          .              .   |ILA ||  Host  |
-    |  Host  +--+           .              .- -|host||        |
-    |        |              .              .   +--+-++--------+
-    +--------+              ................
-
-
-Transport checksum handling
-===========================
-
-When an address is translated by ILA, an encapsulated transport checksum
-that includes the translated address in a pseudo header may be rendered
-incorrect on the wire. This is a problem for intermediate devices,
-including checksum offload in NICs, that process the checksum. There are
-three options to deal with this:
-
-- no action	Allow the checksum to be incorrect on the wire. Before
-		a receiver verifies a checksum the ILA to SIR address
-		translation must be done.
-
-- adjust transport checksum
-		When ILA translation is performed the packet is parsed
-		and if a transport layer checksum is found then it is
-		adjusted to reflect the correct checksum per the
-		translated address.
-
-- checksum neutral mapping
-		When an address is translated the difference can be offset
-		elsewhere in a part of the packet that is covered by
-		the checksum. The low order sixteen bits of the identifier
-		are used. This method is preferred since it doesn't require
-		parsing a packet beyond the IP header and in most cases the
-		adjustment can be precomputed and saved with the mapping.
-
-Note that the checksum neutral adjustment affects the low order sixteen
-bits of the identifier. When ILA to SIR address translation is done on
-egress the low order bits are restored to the original value which
-restores the identifier as it was originally sent.
-
-
-Identifier types
-================
-
-ILA defines different types of identifiers for different use cases.
-
-The defined types are:
-
-      0: interface identifier
-
-      1: locally unique identifier
-
-      2: virtual networking identifier for IPv4 address
-
-      3: virtual networking identifier for IPv6 unicast address
-
-      4: virtual networking identifier for IPv6 multicast address
-
-      5: non-local address identifier
-
-In the current implementation of kernel ILA only locally unique identifiers
-(LUID) are supported. LUID allows for a generic, unformatted 64 bit
-identifier.
-
-
-Identifier formats
-==================
-
-Kernel ILA supports two optional fields in an identifier for formatting:
-"C-bit" and "identifier type". The presence of these fields is determined
-by configuration as demonstrated below.
-
-If the identifier type is present it occupies the three highest order
-bits of an identifier. The possible values are given in the above list.
-
-If the C-bit is present,  this is used as an indication that checksum
-neutral mapping has been done. The C-bit can only be set in an
-ILA address, never a SIR address.
-
-In the simplest format the identifier types, C-bit, and checksum
-adjustment value are not present so an identifier is considered an
-unstructured sixty-four bit value.
-
-     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-     |                            Identifier                         |
-     +                                                               +
-     |                                                               |
-     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-
-The checksum neutral adjustment may be configured to always be
-present using neutral-map-auto. In this case there is no C-bit, but the
-checksum adjustment is in the low order 16 bits. The identifier is
-still sixty-four bits.
-
-     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-     |                            Identifier                         |
-     |                               +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-     |                               |  Checksum-neutral adjustment  |
-     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-
-The C-bit may used to explicitly indicate that checksum neutral
-mapping has been applied to an ILA address. The format is:
-
-     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-     |     |C|                    Identifier                         |
-     |     +-+                       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-     |                               |  Checksum-neutral adjustment  |
-     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-
-The identifier type field may be present to indicate the identifier
-type. If it is not present then the type is inferred based on mapping
-configuration. The checksum neutral adjustment may automatically
-used with the identifier type as illustrated below.
-
-     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-     | Type|                      Identifier                         |
-     +-+-+-+                         +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-     |                               |  Checksum-neutral adjustment  |
-     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-
-If the identifier type and the C-bit can be present simultaneously so
-the identifier format would be:
-
-     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-     | Type|C|                    Identifier                         |
-     +-+-+-+-+                       +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-     |                               |  Checksum-neutral adjustment  |
-     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
-
-
-Configuration
-=============
-
-There are two methods to configure ILA mappings. One is by using LWT routes
-and the other is ila_xlat (called from NFHOOK PREROUTING hook). ila_xlat
-is intended to be used in the receive path for ILA hosts .
-
-An ILA router has also been implemented in XDP. Description of that is
-outside the scope of this document.
-
-The usage of for ILA LWT routes is:
-
-ip route add DEST/128 encap ila LOC csum-mode MODE ident-type TYPE via ADDR
-
-Destination (DEST) can either be a SIR address (for an ILA host or ingress
-ILA router) or an ILA address (egress ILA router). LOC is the sixty-four
-bit locator (with format W:X:Y:Z) that overwrites the upper sixty-four
-bits of the destination address.  Checksum MODE is one of "no-action",
-"adj-transport", "neutral-map", and "neutral-map-auto". If neutral-map is
-set then the C-bit will be present. Identifier TYPE one of "luid" or
-"use-format." In the case of use-format, the identifier type field is
-present and the effective type is taken from that.
-
-The usage of ila_xlat is:
-
-ip ila add loc_match MATCH loc LOC csum-mode MODE ident-type TYPE
-
-MATCH indicates the incoming locator that must be matched to apply
-a the translaiton. LOC is the locator that overwrites the upper
-sixty-four bits of the destination address. MODE and TYPE have the
-same meanings as described above.
-
-
-Some examples
-=============
-
-# Configure an ILA route that uses checksum neutral mapping as well
-# as type field. Note that the type field is set in the SIR address
-# (the 2000 implies type is 1 which is LUID).
-ip route add 3333:0:0:1:2000:0:1:87/128 encap ila 2001:0:87:0 \
-     csum-mode neutral-map ident-type use-format
-
-# Configure an ILA LWT route that uses auto checksum neutral mapping
-# (no C-bit) and configure identifier type to be LUID so that the
-# identifier type field will not be present.
-ip route add 3333:0:0:1:2000:0:2:87/128 encap ila 2001:0:87:1 \
-     csum-mode neutral-map-auto ident-type luid
-
-ila_xlat configuration
-
-# Configure an ILA to SIR mapping that matches a locator and overwrites
-# it with a SIR address (3333:0:0:1 in this example). The C-bit and
-# identifier field are used.
-ip ila add loc_match 2001:0:119:0 loc 3333:0:0:1 \
-    csum-mode neutral-map-auto ident-type use-format
-
-# Configure an ILA to SIR mapping where checksum neutral is automatically
-# set without the C-bit and the identifier type is configured to be LUID
-# so that the identifier type field is not present.
-ip ila add loc_match 2001:0:119:0 loc 3333:0:0:1 \
-    csum-mode neutral-map-auto ident-type use-format
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 5a7889df1375..488971f6b650 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -64,6 +64,7 @@ Contents:
    gen_stats
    gtp
    hinic
+   ila
 
 .. only::  subproject and html
 
-- 
cgit v1.2.3-59-g8ed1b


From 7cdb25400f7e8624414260d1b0fa70da280b2303 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 28 Apr 2020 00:01:45 +0200
Subject: docs: networking: convert ipddp.txt to ReST

Not much to be done here:

- add SPDX header;
- use a document title from existing text;
- adjust a chapter markup;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst |  1 +
 Documentation/networking/ipddp.rst | 78 ++++++++++++++++++++++++++++++++++++++
 Documentation/networking/ipddp.txt | 73 -----------------------------------
 Documentation/networking/ltpc.txt  |  2 +-
 drivers/net/appletalk/Kconfig      |  4 +-
 5 files changed, 82 insertions(+), 76 deletions(-)
 create mode 100644 Documentation/networking/ipddp.rst
 delete mode 100644 Documentation/networking/ipddp.txt

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 488971f6b650..cf85d0a73144 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -65,6 +65,7 @@ Contents:
    gtp
    hinic
    ila
+   ipddp
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/ipddp.rst b/Documentation/networking/ipddp.rst
new file mode 100644
index 000000000000..be7091b77927
--- /dev/null
+++ b/Documentation/networking/ipddp.rst
@@ -0,0 +1,78 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========================================================
+AppleTalk-IP Decapsulation and AppleTalk-IP Encapsulation
+=========================================================
+
+Documentation ipddp.c
+
+This file is written by Jay Schulist <jschlst@samba.org>
+
+Introduction
+------------
+
+AppleTalk-IP (IPDDP) is the method computers connected to AppleTalk
+networks can use to communicate via IP. AppleTalk-IP is simply IP datagrams
+inside AppleTalk packets.
+
+Through this driver you can either allow your Linux box to communicate
+IP over an AppleTalk network or you can provide IP gatewaying functions
+for your AppleTalk users.
+
+You can currently encapsulate or decapsulate AppleTalk-IP on LocalTalk,
+EtherTalk and PPPTalk. The only limit on the protocol is that of what
+kernel AppleTalk layer and drivers are available.
+
+Each mode requires its own user space software.
+
+Compiling AppleTalk-IP Decapsulation/Encapsulation
+==================================================
+
+AppleTalk-IP decapsulation needs to be compiled into your kernel. You
+will need to turn on AppleTalk-IP driver support. Then you will need to
+select ONE of the two options; IP to AppleTalk-IP encapsulation support or
+AppleTalk-IP to IP decapsulation support. If you compile the driver
+statically you will only be able to use the driver for the function you have
+enabled in the kernel. If you compile the driver as a module you can
+select what mode you want it to run in via a module loading param.
+ipddp_mode=1 for AppleTalk-IP encapsulation and ipddp_mode=2 for
+AppleTalk-IP to IP decapsulation.
+
+Basic instructions for user space tools
+=======================================
+
+I will briefly describe the operation of the tools, but you will
+need to consult the supporting documentation for each set of tools.
+
+Decapsulation - You will need to download a software package called
+MacGate. In this distribution there will be a tool called MacRoute
+which enables you to add routes to the kernel for your Macs by hand.
+Also the tool MacRegGateWay is included to register the
+proper IP Gateway and IP addresses for your machine. Included in this
+distribution is a patch to netatalk-1.4b2+asun2.0a17.2 (available from
+ftp.u.washington.edu/pub/user-supported/asun/) this patch is optional
+but it allows automatic adding and deleting of routes for Macs. (Handy
+for locations with large Mac installations)
+
+Encapsulation - You will need to download a software daemon called ipddpd.
+This software expects there to be an AppleTalk-IP gateway on the network.
+You will also need to add the proper routes to route your Linux box's IP
+traffic out the ipddp interface.
+
+Common Uses of ipddp.c
+----------------------
+Of course AppleTalk-IP decapsulation and encapsulation, but specifically
+decapsulation is being used most for connecting LocalTalk networks to
+IP networks. Although it has been used on EtherTalk networks to allow
+Macs that are only able to tunnel IP over EtherTalk.
+
+Encapsulation has been used to allow a Linux box stuck on a LocalTalk
+network to use IP. It should work equally well if you are stuck on an
+EtherTalk only network.
+
+Further Assistance
+-------------------
+You can contact me (Jay Schulist <jschlst@samba.org>) with any
+questions regarding decapsulation or encapsulation. Bradford W. Johnson
+<johns393@maroon.tc.umn.edu> originally wrote the ipddp.c driver for IP
+encapsulation in AppleTalk.
diff --git a/Documentation/networking/ipddp.txt b/Documentation/networking/ipddp.txt
deleted file mode 100644
index ba5c217fffe0..000000000000
--- a/Documentation/networking/ipddp.txt
+++ /dev/null
@@ -1,73 +0,0 @@
-Text file for ipddp.c:
-	AppleTalk-IP Decapsulation and AppleTalk-IP Encapsulation
-
-This text file is written by Jay Schulist <jschlst@samba.org>
-
-Introduction
-------------
-
-AppleTalk-IP (IPDDP) is the method computers connected to AppleTalk
-networks can use to communicate via IP. AppleTalk-IP is simply IP datagrams
-inside AppleTalk packets.
-
-Through this driver you can either allow your Linux box to communicate
-IP over an AppleTalk network or you can provide IP gatewaying functions
-for your AppleTalk users.
-
-You can currently encapsulate or decapsulate AppleTalk-IP on LocalTalk,
-EtherTalk and PPPTalk. The only limit on the protocol is that of what
-kernel AppleTalk layer and drivers are available.
-
-Each mode requires its own user space software.
-
-Compiling AppleTalk-IP Decapsulation/Encapsulation
-=================================================
-
-AppleTalk-IP decapsulation needs to be compiled into your kernel. You
-will need to turn on AppleTalk-IP driver support. Then you will need to
-select ONE of the two options; IP to AppleTalk-IP encapsulation support or
-AppleTalk-IP to IP decapsulation support. If you compile the driver
-statically you will only be able to use the driver for the function you have
-enabled in the kernel. If you compile the driver as a module you can
-select what mode you want it to run in via a module loading param.
-ipddp_mode=1 for AppleTalk-IP encapsulation and ipddp_mode=2 for
-AppleTalk-IP to IP decapsulation.
-
-Basic instructions for user space tools
-=======================================
-
-I will briefly describe the operation of the tools, but you will
-need to consult the supporting documentation for each set of tools.
-
-Decapsulation - You will need to download a software package called
-MacGate. In this distribution there will be a tool called MacRoute
-which enables you to add routes to the kernel for your Macs by hand.
-Also the tool MacRegGateWay is included to register the
-proper IP Gateway and IP addresses for your machine. Included in this
-distribution is a patch to netatalk-1.4b2+asun2.0a17.2 (available from
-ftp.u.washington.edu/pub/user-supported/asun/) this patch is optional
-but it allows automatic adding and deleting of routes for Macs. (Handy
-for locations with large Mac installations)
-
-Encapsulation - You will need to download a software daemon called ipddpd.
-This software expects there to be an AppleTalk-IP gateway on the network.
-You will also need to add the proper routes to route your Linux box's IP
-traffic out the ipddp interface.
-
-Common Uses of ipddp.c
-----------------------
-Of course AppleTalk-IP decapsulation and encapsulation, but specifically
-decapsulation is being used most for connecting LocalTalk networks to
-IP networks. Although it has been used on EtherTalk networks to allow
-Macs that are only able to tunnel IP over EtherTalk.
-
-Encapsulation has been used to allow a Linux box stuck on a LocalTalk
-network to use IP. It should work equally well if you are stuck on an
-EtherTalk only network.
-
-Further Assistance
--------------------
-You can contact me (Jay Schulist <jschlst@samba.org>) with any
-questions regarding decapsulation or encapsulation. Bradford W. Johnson
-<johns393@maroon.tc.umn.edu> originally wrote the ipddp.c driver for IP
-encapsulation in AppleTalk.
diff --git a/Documentation/networking/ltpc.txt b/Documentation/networking/ltpc.txt
index 0bf3220c715b..a005a73b76d0 100644
--- a/Documentation/networking/ltpc.txt
+++ b/Documentation/networking/ltpc.txt
@@ -99,7 +99,7 @@ treat the LocalTalk device like an ordinary Ethernet device, even if
 that's what it looks like to Netatalk.
 
 Instead, you follow the same procedure as for doing IP in EtherTalk.
-See Documentation/networking/ipddp.txt for more information about the
+See Documentation/networking/ipddp.rst for more information about the
 kernel driver and userspace tools needed.
 
 --------------------------------------
diff --git a/drivers/net/appletalk/Kconfig b/drivers/net/appletalk/Kconfig
index d4e51c048f62..ccde6479050c 100644
--- a/drivers/net/appletalk/Kconfig
+++ b/drivers/net/appletalk/Kconfig
@@ -86,7 +86,7 @@ config IPDDP
 	  box is stuck on an AppleTalk only network) or decapsulate (e.g. if
 	  you want your Linux box to act as an Internet gateway for a zoo of
 	  AppleTalk connected Macs). Please see the file
-	  <file:Documentation/networking/ipddp.txt> for more information.
+	  <file:Documentation/networking/ipddp.rst> for more information.
 
 	  If you say Y here, the AppleTalk-IP support will be compiled into
 	  the kernel. In this case, you can either use encapsulation or
@@ -107,4 +107,4 @@ config IPDDP_ENCAP
 	  IP packets inside AppleTalk frames; this is useful if your Linux box
 	  is stuck on an AppleTalk network (which hopefully contains a
 	  decapsulator somewhere). Please see
-	  <file:Documentation/networking/ipddp.txt> for more information.
+	  <file:Documentation/networking/ipddp.rst> for more information.
-- 
cgit v1.2.3-59-g8ed1b


From 9de1fcdf36e7e00693a260865a5f2a58af1c7040 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 28 Apr 2020 00:01:46 +0200
Subject: docs: networking: convert ip_dynaddr.txt to ReST

- add SPDX header;
- adjust title markup;
- mark code blocks and literals as such;
- adjust identation, whitespaces and blank lines;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst      |  1 +
 Documentation/networking/ip_dynaddr.rst | 40 +++++++++++++++++++++++++++++++++
 Documentation/networking/ip_dynaddr.txt | 29 ------------------------
 3 files changed, 41 insertions(+), 29 deletions(-)
 create mode 100644 Documentation/networking/ip_dynaddr.rst
 delete mode 100644 Documentation/networking/ip_dynaddr.txt

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index cf85d0a73144..f81aeb87aa28 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -66,6 +66,7 @@ Contents:
    hinic
    ila
    ipddp
+   ip_dynaddr
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/ip_dynaddr.rst b/Documentation/networking/ip_dynaddr.rst
new file mode 100644
index 000000000000..eacc0c780c7f
--- /dev/null
+++ b/Documentation/networking/ip_dynaddr.rst
@@ -0,0 +1,40 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==================================
+IP dynamic address hack-port v0.03
+==================================
+
+This stuff allows diald ONESHOT connections to get established by
+dynamically changing packet source address (and socket's if local procs).
+It is implemented for TCP diald-box connections(1) and IP_MASQuerading(2).
+
+If enabled\ [#]_ and forwarding interface has changed:
+
+  1)  Socket (and packet) source address is rewritten ON RETRANSMISSIONS
+      while in SYN_SENT state (diald-box processes).
+  2)  Out-bounded MASQueraded source address changes ON OUTPUT (when
+      internal host does retransmission) until a packet from outside is
+      received by the tunnel.
+
+This is specially helpful for auto dialup links (diald), where the
+``actual`` outgoing address is unknown at the moment the link is
+going up. So, the *same* (local AND masqueraded) connections requests that
+bring the link up will be able to get established.
+
+.. [#] At boot, by default no address rewriting is attempted.
+
+  To enable::
+
+     # echo 1 > /proc/sys/net/ipv4/ip_dynaddr
+
+  To enable verbose mode::
+
+    # echo 2 > /proc/sys/net/ipv4/ip_dynaddr
+
+  To disable (default)::
+
+     # echo 0 > /proc/sys/net/ipv4/ip_dynaddr
+
+Enjoy!
+
+Juanjo  <jjciarla@raiz.uncu.edu.ar>
diff --git a/Documentation/networking/ip_dynaddr.txt b/Documentation/networking/ip_dynaddr.txt
deleted file mode 100644
index 45f3c1268e86..000000000000
--- a/Documentation/networking/ip_dynaddr.txt
+++ /dev/null
@@ -1,29 +0,0 @@
-IP dynamic address hack-port v0.03
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-This stuff allows diald ONESHOT connections to get established by
-dynamically changing packet source address (and socket's if local procs).
-It is implemented for TCP diald-box connections(1) and IP_MASQuerading(2).
-
-If enabled[*] and forwarding interface has changed:
-  1)  Socket (and packet) source address is rewritten ON RETRANSMISSIONS
-      while in SYN_SENT state (diald-box processes).
-  2)  Out-bounded MASQueraded source address changes ON OUTPUT (when
-      internal host does retransmission) until a packet from outside is
-      received by the tunnel.
-
-This is specially helpful for auto dialup links (diald), where the
-``actual'' outgoing address is unknown at the moment the link is
-going up. So, the *same* (local AND masqueraded) connections requests that
-bring the link up will be able to get established.
-
-[*] At boot, by default no address rewriting is attempted. 
-  To enable:
-     # echo 1 > /proc/sys/net/ipv4/ip_dynaddr
-  To enable verbose mode:
-     # echo 2 > /proc/sys/net/ipv4/ip_dynaddr
-  To disable (default)
-     # echo 0 > /proc/sys/net/ipv4/ip_dynaddr
-
-Enjoy!
-
--- Juanjo  <jjciarla@raiz.uncu.edu.ar>
-- 
cgit v1.2.3-59-g8ed1b


From aac86c887ed66ac4f467821ebf75373124a148d7 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 28 Apr 2020 00:01:47 +0200
Subject: docs: networking: convert iphase.txt to ReST

- add SPDX header;
- adjust title using the proper markup;
- mark code blocks and literals as such;
- mark tables as such;
- mark lists as such;
- adjust identation, whitespaces and blank lines;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst  |   1 +
 Documentation/networking/iphase.rst | 193 ++++++++++++++++++++++++++++++++++++
 Documentation/networking/iphase.txt | 158 -----------------------------
 drivers/atm/Kconfig                 |   2 +-
 4 files changed, 195 insertions(+), 159 deletions(-)
 create mode 100644 Documentation/networking/iphase.rst
 delete mode 100644 Documentation/networking/iphase.txt

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index f81aeb87aa28..505eaa41ca2b 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -67,6 +67,7 @@ Contents:
    ila
    ipddp
    ip_dynaddr
+   iphase
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/iphase.rst b/Documentation/networking/iphase.rst
new file mode 100644
index 000000000000..92d9b757d75a
--- /dev/null
+++ b/Documentation/networking/iphase.rst
@@ -0,0 +1,193 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==================================
+ATM (i)Chip IA Linux Driver Source
+==================================
+
+			      READ ME FISRT
+
+--------------------------------------------------------------------------------
+
+		     Read This Before You Begin!
+
+--------------------------------------------------------------------------------
+
+Description
+===========
+
+This is the README file for the Interphase PCI ATM (i)Chip IA Linux driver
+source release.
+
+The features and limitations of this driver are as follows:
+
+    - A single VPI (VPI value of 0) is supported.
+    - Supports 4K VCs for the server board (with 512K control memory) and 1K
+      VCs for the client board (with 128K control memory).
+    - UBR, ABR and CBR service categories are supported.
+    - Only AAL5 is supported.
+    - Supports setting of PCR on the VCs.
+    - Multiple adapters in a system are supported.
+    - All variants of Interphase ATM PCI (i)Chip adapter cards are supported,
+      including x575 (OC3, control memory 128K , 512K and packet memory 128K,
+      512K and 1M), x525 (UTP25) and x531 (DS3 and E3). See
+      http://www.iphase.com/
+      for details.
+    - Only x86 platforms are supported.
+    - SMP is supported.
+
+
+Before You Start
+================
+
+
+Installation
+------------
+
+1. Installing the adapters in the system
+
+   To install the ATM adapters in the system, follow the steps below.
+
+       a. Login as root.
+       b. Shut down the system and power off the system.
+       c. Install one or more ATM adapters in the system.
+       d. Connect each adapter to a port on an ATM switch. The green 'Link'
+	  LED on the front panel of the adapter will be on if the adapter is
+	  connected to the switch properly when the system is powered up.
+       e. Power on and boot the system.
+
+2. [ Removed ]
+
+3. Rebuild kernel with ABR support
+
+   [ a. and b. removed ]
+
+    c. Reconfigure the kernel, choose the Interphase ia driver through "make
+       menuconfig" or "make xconfig".
+    d. Rebuild the kernel, loadable modules and the atm tools.
+    e. Install the new built kernel and modules and reboot.
+
+4. Load the adapter hardware driver (ia driver) if it is built as a module
+
+       a. Login as root.
+       b. Change directory to /lib/modules/<kernel-version>/atm.
+       c. Run "insmod suni.o;insmod iphase.o"
+	  The yellow 'status' LED on the front panel of the adapter will blink
+	  while the driver is loaded in the system.
+       d. To verify that the 'ia' driver is loaded successfully, run the
+	  following command::
+
+	      cat /proc/atm/devices
+
+	  If the driver is loaded successfully, the output of the command will
+	  be similar to the following lines::
+
+	      Itf Type    ESI/"MAC"addr AAL(TX,err,RX,err,drop) ...
+	      0   ia      xxxxxxxxx  0 ( 0 0 0 0 0 )  5 ( 0 0 0 0 0 )
+
+	  You can also check the system log file /var/log/messages for messages
+	  related to the ATM driver.
+
+5. Ia Driver Configuration
+
+5.1 Configuration of adapter buffers
+    The (i)Chip boards have 3 different packet RAM size variants: 128K, 512K and
+    1M. The RAM size decides the number of buffers and buffer size. The default
+    size and number of buffers are set as following:
+
+	=========  =======  ======   ======   ======   ======   ======
+	 Total     Rx RAM   Tx RAM   Rx Buf   Tx Buf   Rx buf   Tx buf
+	 RAM size  size     size     size     size     cnt      cnt
+	=========  =======  ======   ======   ======   ======   ======
+	   128K      64K      64K      10K      10K       6        6
+	   512K     256K     256K      10K      10K      25       25
+	     1M     512K     512K      10K      10K      51       51
+	=========  =======  ======   ======   ======   ======   ======
+
+       These setting should work well in most environments, but can be
+       changed by typing the following command::
+
+	   insmod <IA_DIR>/ia.o IA_RX_BUF=<RX_CNT> IA_RX_BUF_SZ=<RX_SIZE> \
+		   IA_TX_BUF=<TX_CNT> IA_TX_BUF_SZ=<TX_SIZE>
+
+       Where:
+
+	    - RX_CNT = number of receive buffers in the range (1-128)
+	    - RX_SIZE = size of receive buffers in the range (48-64K)
+	    - TX_CNT = number of transmit buffers in the range (1-128)
+	    - TX_SIZE = size of transmit buffers in the range (48-64K)
+
+	    1. Transmit and receive buffer size must be a multiple of 4.
+	    2. Care should be taken so that the memory required for the
+	       transmit and receive buffers is less than or equal to the
+	       total adapter packet memory.
+
+5.2 Turn on ia debug trace
+
+    When the ia driver is built with the CONFIG_ATM_IA_DEBUG flag, the driver
+    can provide more debug trace if needed. There is a bit mask variable,
+    IADebugFlag, which controls the output of the traces. You can find the bit
+    map of the IADebugFlag in iphase.h.
+    The debug trace can be turn on through the insmod command line option, for
+    example, "insmod iphase.o IADebugFlag=0xffffffff" can turn on all the debug
+    traces together with loading the driver.
+
+6. Ia Driver Test Using ttcp_atm and PVC
+
+   For the PVC setup, the test machines can either be connected back-to-back or
+   through a switch. If connected through the switch, the switch must be
+   configured for the PVC(s).
+
+   a. For UBR test:
+
+      At the test machine intended to receive data, type::
+
+	 ttcp_atm -r -a -s 0.100
+
+      At the other test machine, type::
+
+	 ttcp_atm -t -a -s 0.100 -n 10000
+
+      Run "ttcp_atm -h" to display more options of the ttcp_atm tool.
+   b. For ABR test:
+
+      It is the same as the UBR testing, but with an extra command option::
+
+	 -Pabr:max_pcr=<xxx>
+
+      where:
+
+	     xxx = the maximum peak cell rate, from 170 - 353207.
+
+      This option must be set on both the machines.
+
+   c. For CBR test:
+
+      It is the same as the UBR testing, but with an extra command option::
+
+	 -Pcbr:max_pcr=<xxx>
+
+      where:
+
+	     xxx = the maximum peak cell rate, from 170 - 353207.
+
+      This option may only be set on the transmit machine.
+
+
+Outstanding Issues
+==================
+
+
+
+Contact Information
+-------------------
+
+::
+
+     Customer Support:
+	 United States:	Telephone:	(214) 654-5555
+			Fax:		(214) 654-5500
+			E-Mail:		intouch@iphase.com
+	 Europe:	Telephone:	33 (0)1 41 15 44 00
+			Fax:		33 (0)1 41 15 12 13
+     World Wide Web:	http://www.iphase.com
+     Anonymous FTP:	ftp.iphase.com
diff --git a/Documentation/networking/iphase.txt b/Documentation/networking/iphase.txt
deleted file mode 100644
index 670b72f16585..000000000000
--- a/Documentation/networking/iphase.txt
+++ /dev/null
@@ -1,158 +0,0 @@
-
-                              READ ME FISRT
-		  ATM (i)Chip IA Linux Driver Source
---------------------------------------------------------------------------------
-                     Read This Before You Begin!
---------------------------------------------------------------------------------
-
-Description
------------
-
-This is the README file for the Interphase PCI ATM (i)Chip IA Linux driver 
-source release.
-
-The features and limitations of this driver are as follows:
-    - A single VPI (VPI value of 0) is supported.
-    - Supports 4K VCs for the server board (with 512K control memory) and 1K 
-      VCs for the client board (with 128K control memory).
-    - UBR, ABR and CBR service categories are supported.
-    - Only AAL5 is supported. 
-    - Supports setting of PCR on the VCs. 
-    - Multiple adapters in a system are supported.
-    - All variants of Interphase ATM PCI (i)Chip adapter cards are supported, 
-      including x575 (OC3, control memory 128K , 512K and packet memory 128K, 
-      512K and 1M), x525 (UTP25) and x531 (DS3 and E3). See 
-      http://www.iphase.com/
-      for details.
-    - Only x86 platforms are supported.
-    - SMP is supported.
-
-
-Before You Start
----------------- 
-
-
-Installation
-------------
-
-1. Installing the adapters in the system
-   To install the ATM adapters in the system, follow the steps below.
-       a. Login as root.
-       b. Shut down the system and power off the system.
-       c. Install one or more ATM adapters in the system.
-       d. Connect each adapter to a port on an ATM switch. The green 'Link' 
-          LED on the front panel of the adapter will be on if the adapter is 
-          connected to the switch properly when the system is powered up.
-       e. Power on and boot the system.
-
-2. [ Removed ]
-
-3. Rebuild kernel with ABR support
-   [ a. and b. removed ]
-    c. Reconfigure the kernel, choose the Interphase ia driver through "make 
-       menuconfig" or "make xconfig".
-    d. Rebuild the kernel, loadable modules and the atm tools. 
-    e. Install the new built kernel and modules and reboot.
-
-4. Load the adapter hardware driver (ia driver) if it is built as a module
-       a. Login as root.
-       b. Change directory to /lib/modules/<kernel-version>/atm.
-       c. Run "insmod suni.o;insmod iphase.o"
-	  The yellow 'status' LED on the front panel of the adapter will blink 
-          while the driver is loaded in the system.
-       d. To verify that the 'ia' driver is loaded successfully, run the 
-          following command:
-
-              cat /proc/atm/devices
-
-          If the driver is loaded successfully, the output of the command will 
-          be similar to the following lines:
-
-              Itf Type    ESI/"MAC"addr AAL(TX,err,RX,err,drop) ...
-              0   ia      xxxxxxxxx  0 ( 0 0 0 0 0 )  5 ( 0 0 0 0 0 )
-
-          You can also check the system log file /var/log/messages for messages
-          related to the ATM driver.
-
-5. Ia Driver Configuration 
-
-5.1 Configuration of adapter buffers
-    The (i)Chip boards have 3 different packet RAM size variants: 128K, 512K and
-    1M. The RAM size decides the number of buffers and buffer size. The default 
-    size and number of buffers are set as following: 
-
-          Total    Rx RAM   Tx RAM   Rx Buf   Tx Buf   Rx buf   Tx buf
-         RAM size   size     size     size     size      cnt      cnt
-         --------  ------   ------   ------   ------   ------   ------
-           128K      64K      64K      10K      10K       6        6
-           512K     256K     256K      10K      10K      25       25
-             1M     512K     512K      10K      10K      51       51
-
-       These setting should work well in most environments, but can be
-       changed by typing the following command: 
- 
-           insmod <IA_DIR>/ia.o IA_RX_BUF=<RX_CNT> IA_RX_BUF_SZ=<RX_SIZE> \
-                   IA_TX_BUF=<TX_CNT> IA_TX_BUF_SZ=<TX_SIZE> 
-       Where:
-            RX_CNT = number of receive buffers in the range (1-128)
-            RX_SIZE = size of receive buffers in the range (48-64K)
-            TX_CNT = number of transmit buffers in the range (1-128)
-            TX_SIZE = size of transmit buffers in the range (48-64K)
-
-            1. Transmit and receive buffer size must be a multiple of 4.
-            2. Care should be taken so that the memory required for the
-               transmit and receive buffers is less than or equal to the
-               total adapter packet memory.   
-
-5.2 Turn on ia debug trace
-
-    When the ia driver is built with the CONFIG_ATM_IA_DEBUG flag, the driver 
-    can provide more debug trace if needed. There is a bit mask variable, 
-    IADebugFlag, which controls the output of the traces. You can find the bit 
-    map of the IADebugFlag in iphase.h. 
-    The debug trace can be turn on through the insmod command line option, for 
-    example, "insmod iphase.o IADebugFlag=0xffffffff" can turn on all the debug 
-    traces together with loading the driver.
-
-6. Ia Driver Test Using ttcp_atm and PVC
-
-   For the PVC setup, the test machines can either be connected back-to-back or 
-   through a switch. If connected through the switch, the switch must be 
-   configured for the PVC(s).
-
-   a. For UBR test:
-      At the test machine intended to receive data, type:
-         ttcp_atm -r -a -s 0.100 
-      At the other test machine, type:
-         ttcp_atm -t -a -s 0.100 -n 10000
-      Run "ttcp_atm -h" to display more options of the ttcp_atm tool.
-   b. For ABR test:
-      It is the same as the UBR testing, but with an extra command option:
-         -Pabr:max_pcr=<xxx>
-         where:
-             xxx = the maximum peak cell rate, from 170 - 353207.
-         This option must be set on both the machines.
-   c. For CBR test:
-      It is the same as the UBR testing, but with an extra command option:
-         -Pcbr:max_pcr=<xxx>
-         where:
-             xxx = the maximum peak cell rate, from 170 - 353207.
-         This option may only be set on the transmit machine.
-
-
-OUTSTANDING ISSUES
-------------------
-
-
-
-Contact Information
--------------------
-
-     Customer Support:
-         United States:	Telephone:	(214) 654-5555
-     			Fax:		(214) 654-5500
-			E-Mail:		intouch@iphase.com
-	 Europe:	Telephone:	33 (0)1 41 15 44 00
-			Fax:		33 (0)1 41 15 12 13
-     World Wide Web:	http://www.iphase.com
-     Anonymous FTP:	ftp.iphase.com
diff --git a/drivers/atm/Kconfig b/drivers/atm/Kconfig
index 4af7cbdcc349..cfb0d16b60ad 100644
--- a/drivers/atm/Kconfig
+++ b/drivers/atm/Kconfig
@@ -306,7 +306,7 @@ config ATM_IA
 	  for more info about the cards. Say Y (or M to compile as a module
 	  named iphase) here if you have one of these cards.
 
-	  See the file <file:Documentation/networking/iphase.txt> for further
+	  See the file <file:Documentation/networking/iphase.rst> for further
 	  details.
 
 config ATM_IA_DEBUG
-- 
cgit v1.2.3-59-g8ed1b


From 355e656e017c3b42deb57d125d86c4cbd277d6db Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 28 Apr 2020 00:01:48 +0200
Subject: docs: networking: convert ipsec.txt to ReST

Not much to be done here:

- add SPDX header;
- add a document title;
- adjust identation, whitespaces and blank lines;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst |  1 +
 Documentation/networking/ipsec.rst | 46 ++++++++++++++++++++++++++++++++++++++
 Documentation/networking/ipsec.txt | 38 -------------------------------
 3 files changed, 47 insertions(+), 38 deletions(-)
 create mode 100644 Documentation/networking/ipsec.rst
 delete mode 100644 Documentation/networking/ipsec.txt

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 505eaa41ca2b..3efb4608649a 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -68,6 +68,7 @@ Contents:
    ipddp
    ip_dynaddr
    iphase
+   ipsec
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/ipsec.rst b/Documentation/networking/ipsec.rst
new file mode 100644
index 000000000000..afe9d7b48be3
--- /dev/null
+++ b/Documentation/networking/ipsec.rst
@@ -0,0 +1,46 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====
+IPsec
+=====
+
+
+Here documents known IPsec corner cases which need to be keep in mind when
+deploy various IPsec configuration in real world production environment.
+
+1. IPcomp:
+	   Small IP packet won't get compressed at sender, and failed on
+	   policy check on receiver.
+
+Quote from RFC3173::
+
+  2.2. Non-Expansion Policy
+
+   If the total size of a compressed payload and the IPComp header, as
+   defined in section 3, is not smaller than the size of the original
+   payload, the IP datagram MUST be sent in the original non-compressed
+   form.  To clarify: If an IP datagram is sent non-compressed, no
+
+   IPComp header is added to the datagram.  This policy ensures saving
+   the decompression processing cycles and avoiding incurring IP
+   datagram fragmentation when the expanded datagram is larger than the
+   MTU.
+
+   Small IP datagrams are likely to expand as a result of compression.
+   Therefore, a numeric threshold should be applied before compression,
+   where IP datagrams of size smaller than the threshold are sent in the
+   original form without attempting compression.  The numeric threshold
+   is implementation dependent.
+
+Current IPComp implementation is indeed by the book, while as in practice
+when sending non-compressed packet to the peer (whether or not packet len
+is smaller than the threshold or the compressed len is larger than original
+packet len), the packet is dropped when checking the policy as this packet
+matches the selector but not coming from any XFRM layer, i.e., with no
+security path. Such naked packet will not eventually make it to upper layer.
+The result is much more wired to the user when ping peer with different
+payload length.
+
+One workaround is try to set "level use" for each policy if user observed
+above scenario. The consequence of doing so is small packet(uncompressed)
+will skip policy checking on receiver side.
diff --git a/Documentation/networking/ipsec.txt b/Documentation/networking/ipsec.txt
deleted file mode 100644
index ba794b7e51be..000000000000
--- a/Documentation/networking/ipsec.txt
+++ /dev/null
@@ -1,38 +0,0 @@
-
-Here documents known IPsec corner cases which need to be keep in mind when
-deploy various IPsec configuration in real world production environment.
-
-1. IPcomp: Small IP packet won't get compressed at sender, and failed on
-	   policy check on receiver.
-
-Quote from RFC3173:
-2.2. Non-Expansion Policy
-
-   If the total size of a compressed payload and the IPComp header, as
-   defined in section 3, is not smaller than the size of the original
-   payload, the IP datagram MUST be sent in the original non-compressed
-   form.  To clarify: If an IP datagram is sent non-compressed, no
-
-   IPComp header is added to the datagram.  This policy ensures saving
-   the decompression processing cycles and avoiding incurring IP
-   datagram fragmentation when the expanded datagram is larger than the
-   MTU.
-
-   Small IP datagrams are likely to expand as a result of compression.
-   Therefore, a numeric threshold should be applied before compression,
-   where IP datagrams of size smaller than the threshold are sent in the
-   original form without attempting compression.  The numeric threshold
-   is implementation dependent.
-
-Current IPComp implementation is indeed by the book, while as in practice
-when sending non-compressed packet to the peer (whether or not packet len
-is smaller than the threshold or the compressed len is larger than original
-packet len), the packet is dropped when checking the policy as this packet
-matches the selector but not coming from any XFRM layer, i.e., with no
-security path. Such naked packet will not eventually make it to upper layer.
-The result is much more wired to the user when ping peer with different
-payload length.
-
-One workaround is try to set "level use" for each policy if user observed
-above scenario. The consequence of doing so is small packet(uncompressed)
-will skip policy checking on receiver side.
-- 
cgit v1.2.3-59-g8ed1b


From 1cec2cacaaec5d53adc04dd3ecfdb687b26c0e89 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 28 Apr 2020 00:01:49 +0200
Subject: docs: networking: convert ip-sysctl.txt to ReST

- add SPDX header;
- adjust titles and chapters, adding proper markups;
- mark code blocks and literals as such;
- mark lists as such;
- mark tables as such;
- use footnote markup;
- adjust identation, whitespaces and blank lines;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/admin-guide/kernel-parameters.txt |    2 +-
 Documentation/admin-guide/sysctl/net.rst        |    2 +-
 Documentation/networking/index.rst              |    1 +
 Documentation/networking/ip-sysctl.rst          | 2649 +++++++++++++++++++++++
 Documentation/networking/ip-sysctl.txt          | 2374 --------------------
 Documentation/networking/snmp_counter.rst       |    2 +-
 net/Kconfig                                     |    2 +-
 net/ipv4/Kconfig                                |    2 +-
 net/ipv4/icmp.c                                 |    2 +-
 9 files changed, 2656 insertions(+), 2380 deletions(-)
 create mode 100644 Documentation/networking/ip-sysctl.rst
 delete mode 100644 Documentation/networking/ip-sysctl.txt

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index b23ab11587a6..e37db6f1be64 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -4910,7 +4910,7 @@
 			Set the number of tcp_metrics_hash slots.
 			Default value is 8192 or 16384 depending on total
 			ram pages. This is used to specify the TCP metrics
-			cache size. See Documentation/networking/ip-sysctl.txt
+			cache size. See Documentation/networking/ip-sysctl.rst
 			"tcp_no_metrics_save" section for more details.
 
 	tdfx=		[HW,DRM]
diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst
index e043c9213388..84e3348a9543 100644
--- a/Documentation/admin-guide/sysctl/net.rst
+++ b/Documentation/admin-guide/sysctl/net.rst
@@ -353,7 +353,7 @@ socket's buffer. It will not take effect unless PF_UNIX flag is specified.
 
 3. /proc/sys/net/ipv4 - IPV4 settings
 -------------------------------------
-Please see: Documentation/networking/ip-sysctl.txt and ipvs-sysctl.txt for
+Please see: Documentation/networking/ip-sysctl.rst and ipvs-sysctl.txt for
 descriptions of these entries.
 
 
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 3efb4608649a..7d133d8dbe2a 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -69,6 +69,7 @@ Contents:
    ip_dynaddr
    iphase
    ipsec
+   ip-sysctl
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
new file mode 100644
index 000000000000..38f811d4b2f0
--- /dev/null
+++ b/Documentation/networking/ip-sysctl.rst
@@ -0,0 +1,2649 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========
+IP Sysctl
+=========
+
+/proc/sys/net/ipv4/* Variables
+==============================
+
+ip_forward - BOOLEAN
+	- 0 - disabled (default)
+	- not 0 - enabled
+
+	Forward Packets between interfaces.
+
+	This variable is special, its change resets all configuration
+	parameters to their default state (RFC1122 for hosts, RFC1812
+	for routers)
+
+ip_default_ttl - INTEGER
+	Default value of TTL field (Time To Live) for outgoing (but not
+	forwarded) IP packets. Should be between 1 and 255 inclusive.
+	Default: 64 (as recommended by RFC1700)
+
+ip_no_pmtu_disc - INTEGER
+	Disable Path MTU Discovery. If enabled in mode 1 and a
+	fragmentation-required ICMP is received, the PMTU to this
+	destination will be set to min_pmtu (see below). You will need
+	to raise min_pmtu to the smallest interface MTU on your system
+	manually if you want to avoid locally generated fragments.
+
+	In mode 2 incoming Path MTU Discovery messages will be
+	discarded. Outgoing frames are handled the same as in mode 1,
+	implicitly setting IP_PMTUDISC_DONT on every created socket.
+
+	Mode 3 is a hardened pmtu discover mode. The kernel will only
+	accept fragmentation-needed errors if the underlying protocol
+	can verify them besides a plain socket lookup. Current
+	protocols for which pmtu events will be honored are TCP, SCTP
+	and DCCP as they verify e.g. the sequence number or the
+	association. This mode should not be enabled globally but is
+	only intended to secure e.g. name servers in namespaces where
+	TCP path mtu must still work but path MTU information of other
+	protocols should be discarded. If enabled globally this mode
+	could break other protocols.
+
+	Possible values: 0-3
+
+	Default: FALSE
+
+min_pmtu - INTEGER
+	default 552 - minimum discovered Path MTU
+
+ip_forward_use_pmtu - BOOLEAN
+	By default we don't trust protocol path MTUs while forwarding
+	because they could be easily forged and can lead to unwanted
+	fragmentation by the router.
+	You only need to enable this if you have user-space software
+	which tries to discover path mtus by itself and depends on the
+	kernel honoring this information. This is normally not the
+	case.
+
+	Default: 0 (disabled)
+
+	Possible values:
+
+	- 0 - disabled
+	- 1 - enabled
+
+fwmark_reflect - BOOLEAN
+	Controls the fwmark of kernel-generated IPv4 reply packets that are not
+	associated with a socket for example, TCP RSTs or ICMP echo replies).
+	If unset, these packets have a fwmark of zero. If set, they have the
+	fwmark of the packet they are replying to.
+
+	Default: 0
+
+fib_multipath_use_neigh - BOOLEAN
+	Use status of existing neighbor entry when determining nexthop for
+	multipath routes. If disabled, neighbor information is not used and
+	packets could be directed to a failed nexthop. Only valid for kernels
+	built with CONFIG_IP_ROUTE_MULTIPATH enabled.
+
+	Default: 0 (disabled)
+
+	Possible values:
+
+	- 0 - disabled
+	- 1 - enabled
+
+fib_multipath_hash_policy - INTEGER
+	Controls which hash policy to use for multipath routes. Only valid
+	for kernels built with CONFIG_IP_ROUTE_MULTIPATH enabled.
+
+	Default: 0 (Layer 3)
+
+	Possible values:
+
+	- 0 - Layer 3
+	- 1 - Layer 4
+	- 2 - Layer 3 or inner Layer 3 if present
+
+fib_sync_mem - UNSIGNED INTEGER
+	Amount of dirty memory from fib entries that can be backlogged before
+	synchronize_rcu is forced.
+
+	Default: 512kB   Minimum: 64kB   Maximum: 64MB
+
+ip_forward_update_priority - INTEGER
+	Whether to update SKB priority from "TOS" field in IPv4 header after it
+	is forwarded. The new SKB priority is mapped from TOS field value
+	according to an rt_tos2priority table (see e.g. man tc-prio).
+
+	Default: 1 (Update priority.)
+
+	Possible values:
+
+	- 0 - Do not update priority.
+	- 1 - Update priority.
+
+route/max_size - INTEGER
+	Maximum number of routes allowed in the kernel.  Increase
+	this when using large numbers of interfaces and/or routes.
+
+	From linux kernel 3.6 onwards, this is deprecated for ipv4
+	as route cache is no longer used.
+
+neigh/default/gc_thresh1 - INTEGER
+	Minimum number of entries to keep.  Garbage collector will not
+	purge entries if there are fewer than this number.
+
+	Default: 128
+
+neigh/default/gc_thresh2 - INTEGER
+	Threshold when garbage collector becomes more aggressive about
+	purging entries. Entries older than 5 seconds will be cleared
+	when over this number.
+
+	Default: 512
+
+neigh/default/gc_thresh3 - INTEGER
+	Maximum number of non-PERMANENT neighbor entries allowed.  Increase
+	this when using large numbers of interfaces and when communicating
+	with large numbers of directly-connected peers.
+
+	Default: 1024
+
+neigh/default/unres_qlen_bytes - INTEGER
+	The maximum number of bytes which may be used by packets
+	queued for each	unresolved address by other network layers.
+	(added in linux 3.3)
+
+	Setting negative value is meaningless and will return error.
+
+	Default: SK_WMEM_MAX, (same as net.core.wmem_default).
+
+		Exact value depends on architecture and kernel options,
+		but should be enough to allow queuing 256 packets
+		of medium size.
+
+neigh/default/unres_qlen - INTEGER
+	The maximum number of packets which may be queued for each
+	unresolved address by other network layers.
+
+	(deprecated in linux 3.3) : use unres_qlen_bytes instead.
+
+	Prior to linux 3.3, the default value is 3 which may cause
+	unexpected packet loss. The current default value is calculated
+	according to default value of unres_qlen_bytes and true size of
+	packet.
+
+	Default: 101
+
+mtu_expires - INTEGER
+	Time, in seconds, that cached PMTU information is kept.
+
+min_adv_mss - INTEGER
+	The advertised MSS depends on the first hop route MTU, but will
+	never be lower than this setting.
+
+IP Fragmentation:
+
+ipfrag_high_thresh - LONG INTEGER
+	Maximum memory used to reassemble IP fragments.
+
+ipfrag_low_thresh - LONG INTEGER
+	(Obsolete since linux-4.17)
+	Maximum memory used to reassemble IP fragments before the kernel
+	begins to remove incomplete fragment queues to free up resources.
+	The kernel still accepts new fragments for defragmentation.
+
+ipfrag_time - INTEGER
+	Time in seconds to keep an IP fragment in memory.
+
+ipfrag_max_dist - INTEGER
+	ipfrag_max_dist is a non-negative integer value which defines the
+	maximum "disorder" which is allowed among fragments which share a
+	common IP source address. Note that reordering of packets is
+	not unusual, but if a large number of fragments arrive from a source
+	IP address while a particular fragment queue remains incomplete, it
+	probably indicates that one or more fragments belonging to that queue
+	have been lost. When ipfrag_max_dist is positive, an additional check
+	is done on fragments before they are added to a reassembly queue - if
+	ipfrag_max_dist (or more) fragments have arrived from a particular IP
+	address between additions to any IP fragment queue using that source
+	address, it's presumed that one or more fragments in the queue are
+	lost. The existing fragment queue will be dropped, and a new one
+	started. An ipfrag_max_dist value of zero disables this check.
+
+	Using a very small value, e.g. 1 or 2, for ipfrag_max_dist can
+	result in unnecessarily dropping fragment queues when normal
+	reordering of packets occurs, which could lead to poor application
+	performance. Using a very large value, e.g. 50000, increases the
+	likelihood of incorrectly reassembling IP fragments that originate
+	from different IP datagrams, which could result in data corruption.
+	Default: 64
+
+INET peer storage
+=================
+
+inet_peer_threshold - INTEGER
+	The approximate size of the storage.  Starting from this threshold
+	entries will be thrown aggressively.  This threshold also determines
+	entries' time-to-live and time intervals between garbage collection
+	passes.  More entries, less time-to-live, less GC interval.
+
+inet_peer_minttl - INTEGER
+	Minimum time-to-live of entries.  Should be enough to cover fragment
+	time-to-live on the reassembling side.  This minimum time-to-live  is
+	guaranteed if the pool size is less than inet_peer_threshold.
+	Measured in seconds.
+
+inet_peer_maxttl - INTEGER
+	Maximum time-to-live of entries.  Unused entries will expire after
+	this period of time if there is no memory pressure on the pool (i.e.
+	when the number of entries in the pool is very small).
+	Measured in seconds.
+
+TCP variables
+=============
+
+somaxconn - INTEGER
+	Limit of socket listen() backlog, known in userspace as SOMAXCONN.
+	Defaults to 4096. (Was 128 before linux-5.4)
+	See also tcp_max_syn_backlog for additional tuning for TCP sockets.
+
+tcp_abort_on_overflow - BOOLEAN
+	If listening service is too slow to accept new connections,
+	reset them. Default state is FALSE. It means that if overflow
+	occurred due to a burst, connection will recover. Enable this
+	option _only_ if you are really sure that listening daemon
+	cannot be tuned to accept connections faster. Enabling this
+	option can harm clients of your server.
+
+tcp_adv_win_scale - INTEGER
+	Count buffering overhead as bytes/2^tcp_adv_win_scale
+	(if tcp_adv_win_scale > 0) or bytes-bytes/2^(-tcp_adv_win_scale),
+	if it is <= 0.
+
+	Possible values are [-31, 31], inclusive.
+
+	Default: 1
+
+tcp_allowed_congestion_control - STRING
+	Show/set the congestion control choices available to non-privileged
+	processes. The list is a subset of those listed in
+	tcp_available_congestion_control.
+
+	Default is "reno" and the default setting (tcp_congestion_control).
+
+tcp_app_win - INTEGER
+	Reserve max(window/2^tcp_app_win, mss) of window for application
+	buffer. Value 0 is special, it means that nothing is reserved.
+
+	Default: 31
+
+tcp_autocorking - BOOLEAN
+	Enable TCP auto corking :
+	When applications do consecutive small write()/sendmsg() system calls,
+	we try to coalesce these small writes as much as possible, to lower
+	total amount of sent packets. This is done if at least one prior
+	packet for the flow is waiting in Qdisc queues or device transmit
+	queue. Applications can still use TCP_CORK for optimal behavior
+	when they know how/when to uncork their sockets.
+
+	Default : 1
+
+tcp_available_congestion_control - STRING
+	Shows the available congestion control choices that are registered.
+	More congestion control algorithms may be available as modules,
+	but not loaded.
+
+tcp_base_mss - INTEGER
+	The initial value of search_low to be used by the packetization layer
+	Path MTU discovery (MTU probing).  If MTU probing is enabled,
+	this is the initial MSS used by the connection.
+
+tcp_mtu_probe_floor - INTEGER
+	If MTU probing is enabled this caps the minimum MSS used for search_low
+	for the connection.
+
+	Default : 48
+
+tcp_min_snd_mss - INTEGER
+	TCP SYN and SYNACK messages usually advertise an ADVMSS option,
+	as described in RFC 1122 and RFC 6691.
+
+	If this ADVMSS option is smaller than tcp_min_snd_mss,
+	it is silently capped to tcp_min_snd_mss.
+
+	Default : 48 (at least 8 bytes of payload per segment)
+
+tcp_congestion_control - STRING
+	Set the congestion control algorithm to be used for new
+	connections. The algorithm "reno" is always available, but
+	additional choices may be available based on kernel configuration.
+	Default is set as part of kernel configuration.
+	For passive connections, the listener congestion control choice
+	is inherited.
+
+	[see setsockopt(listenfd, SOL_TCP, TCP_CONGESTION, "name" ...) ]
+
+tcp_dsack - BOOLEAN
+	Allows TCP to send "duplicate" SACKs.
+
+tcp_early_retrans - INTEGER
+	Tail loss probe (TLP) converts RTOs occurring due to tail
+	losses into fast recovery (draft-ietf-tcpm-rack). Note that
+	TLP requires RACK to function properly (see tcp_recovery below)
+
+	Possible values:
+
+		- 0 disables TLP
+		- 3 or 4 enables TLP
+
+	Default: 3
+
+tcp_ecn - INTEGER
+	Control use of Explicit Congestion Notification (ECN) by TCP.
+	ECN is used only when both ends of the TCP connection indicate
+	support for it.  This feature is useful in avoiding losses due
+	to congestion by allowing supporting routers to signal
+	congestion before having to drop packets.
+
+	Possible values are:
+
+		=  =====================================================
+		0  Disable ECN.  Neither initiate nor accept ECN.
+		1  Enable ECN when requested by incoming connections and
+		   also request ECN on outgoing connection attempts.
+		2  Enable ECN when requested by incoming connections
+		   but do not request ECN on outgoing connections.
+		=  =====================================================
+
+	Default: 2
+
+tcp_ecn_fallback - BOOLEAN
+	If the kernel detects that ECN connection misbehaves, enable fall
+	back to non-ECN. Currently, this knob implements the fallback
+	from RFC3168, section 6.1.1.1., but we reserve that in future,
+	additional detection mechanisms could be implemented under this
+	knob. The value	is not used, if tcp_ecn or per route (or congestion
+	control) ECN settings are disabled.
+
+	Default: 1 (fallback enabled)
+
+tcp_fack - BOOLEAN
+	This is a legacy option, it has no effect anymore.
+
+tcp_fin_timeout - INTEGER
+	The length of time an orphaned (no longer referenced by any
+	application) connection will remain in the FIN_WAIT_2 state
+	before it is aborted at the local end.  While a perfectly
+	valid "receive only" state for an un-orphaned connection, an
+	orphaned connection in FIN_WAIT_2 state could otherwise wait
+	forever for the remote to close its end of the connection.
+
+	Cf. tcp_max_orphans
+
+	Default: 60 seconds
+
+tcp_frto - INTEGER
+	Enables Forward RTO-Recovery (F-RTO) defined in RFC5682.
+	F-RTO is an enhanced recovery algorithm for TCP retransmission
+	timeouts.  It is particularly beneficial in networks where the
+	RTT fluctuates (e.g., wireless). F-RTO is sender-side only
+	modification. It does not require any support from the peer.
+
+	By default it's enabled with a non-zero value. 0 disables F-RTO.
+
+tcp_fwmark_accept - BOOLEAN
+	If set, incoming connections to listening sockets that do not have a
+	socket mark will set the mark of the accepting socket to the fwmark of
+	the incoming SYN packet. This will cause all packets on that connection
+	(starting from the first SYNACK) to be sent with that fwmark. The
+	listening socket's mark is unchanged. Listening sockets that already
+	have a fwmark set via setsockopt(SOL_SOCKET, SO_MARK, ...) are
+	unaffected.
+
+	Default: 0
+
+tcp_invalid_ratelimit - INTEGER
+	Limit the maximal rate for sending duplicate acknowledgments
+	in response to incoming TCP packets that are for an existing
+	connection but that are invalid due to any of these reasons:
+
+	  (a) out-of-window sequence number,
+	  (b) out-of-window acknowledgment number, or
+	  (c) PAWS (Protection Against Wrapped Sequence numbers) check failure
+
+	This can help mitigate simple "ack loop" DoS attacks, wherein
+	a buggy or malicious middlebox or man-in-the-middle can
+	rewrite TCP header fields in manner that causes each endpoint
+	to think that the other is sending invalid TCP segments, thus
+	causing each side to send an unterminating stream of duplicate
+	acknowledgments for invalid segments.
+
+	Using 0 disables rate-limiting of dupacks in response to
+	invalid segments; otherwise this value specifies the minimal
+	space between sending such dupacks, in milliseconds.
+
+	Default: 500 (milliseconds).
+
+tcp_keepalive_time - INTEGER
+	How often TCP sends out keepalive messages when keepalive is enabled.
+	Default: 2hours.
+
+tcp_keepalive_probes - INTEGER
+	How many keepalive probes TCP sends out, until it decides that the
+	connection is broken. Default value: 9.
+
+tcp_keepalive_intvl - INTEGER
+	How frequently the probes are send out. Multiplied by
+	tcp_keepalive_probes it is time to kill not responding connection,
+	after probes started. Default value: 75sec i.e. connection
+	will be aborted after ~11 minutes of retries.
+
+tcp_l3mdev_accept - BOOLEAN
+	Enables child sockets to inherit the L3 master device index.
+	Enabling this option allows a "global" listen socket to work
+	across L3 master domains (e.g., VRFs) with connected sockets
+	derived from the listen socket to be bound to the L3 domain in
+	which the packets originated. Only valid when the kernel was
+	compiled with CONFIG_NET_L3_MASTER_DEV.
+
+	Default: 0 (disabled)
+
+tcp_low_latency - BOOLEAN
+	This is a legacy option, it has no effect anymore.
+
+tcp_max_orphans - INTEGER
+	Maximal number of TCP sockets not attached to any user file handle,
+	held by system.	If this number is exceeded orphaned connections are
+	reset immediately and warning is printed. This limit exists
+	only to prevent simple DoS attacks, you _must_ not rely on this
+	or lower the limit artificially, but rather increase it
+	(probably, after increasing installed memory),
+	if network conditions require more than default value,
+	and tune network services to linger and kill such states
+	more aggressively. Let me to remind again: each orphan eats
+	up to ~64K of unswappable memory.
+
+tcp_max_syn_backlog - INTEGER
+	Maximal number of remembered connection requests (SYN_RECV),
+	which have not received an acknowledgment from connecting client.
+
+	This is a per-listener limit.
+
+	The minimal value is 128 for low memory machines, and it will
+	increase in proportion to the memory of machine.
+
+	If server suffers from overload, try increasing this number.
+
+	Remember to also check /proc/sys/net/core/somaxconn
+	A SYN_RECV request socket consumes about 304 bytes of memory.
+
+tcp_max_tw_buckets - INTEGER
+	Maximal number of timewait sockets held by system simultaneously.
+	If this number is exceeded time-wait socket is immediately destroyed
+	and warning is printed. This limit exists only to prevent
+	simple DoS attacks, you _must_ not lower the limit artificially,
+	but rather increase it (probably, after increasing installed memory),
+	if network conditions require more than default value.
+
+tcp_mem - vector of 3 INTEGERs: min, pressure, max
+	min: below this number of pages TCP is not bothered about its
+	memory appetite.
+
+	pressure: when amount of memory allocated by TCP exceeds this number
+	of pages, TCP moderates its memory consumption and enters memory
+	pressure mode, which is exited when memory consumption falls
+	under "min".
+
+	max: number of pages allowed for queueing by all TCP sockets.
+
+	Defaults are calculated at boot time from amount of available
+	memory.
+
+tcp_min_rtt_wlen - INTEGER
+	The window length of the windowed min filter to track the minimum RTT.
+	A shorter window lets a flow more quickly pick up new (higher)
+	minimum RTT when it is moved to a longer path (e.g., due to traffic
+	engineering). A longer window makes the filter more resistant to RTT
+	inflations such as transient congestion. The unit is seconds.
+
+	Possible values: 0 - 86400 (1 day)
+
+	Default: 300
+
+tcp_moderate_rcvbuf - BOOLEAN
+	If set, TCP performs receive buffer auto-tuning, attempting to
+	automatically size the buffer (no greater than tcp_rmem[2]) to
+	match the size required by the path for full throughput.  Enabled by
+	default.
+
+tcp_mtu_probing - INTEGER
+	Controls TCP Packetization-Layer Path MTU Discovery.  Takes three
+	values:
+
+	- 0 - Disabled
+	- 1 - Disabled by default, enabled when an ICMP black hole detected
+	- 2 - Always enabled, use initial MSS of tcp_base_mss.
+
+tcp_probe_interval - UNSIGNED INTEGER
+	Controls how often to start TCP Packetization-Layer Path MTU
+	Discovery reprobe. The default is reprobing every 10 minutes as
+	per RFC4821.
+
+tcp_probe_threshold - INTEGER
+	Controls when TCP Packetization-Layer Path MTU Discovery probing
+	will stop in respect to the width of search range in bytes. Default
+	is 8 bytes.
+
+tcp_no_metrics_save - BOOLEAN
+	By default, TCP saves various connection metrics in the route cache
+	when the connection closes, so that connections established in the
+	near future can use these to set initial conditions.  Usually, this
+	increases overall performance, but may sometimes cause performance
+	degradation.  If set, TCP will not cache metrics on closing
+	connections.
+
+tcp_no_ssthresh_metrics_save - BOOLEAN
+	Controls whether TCP saves ssthresh metrics in the route cache.
+
+	Default is 1, which disables ssthresh metrics.
+
+tcp_orphan_retries - INTEGER
+	This value influences the timeout of a locally closed TCP connection,
+	when RTO retransmissions remain unacknowledged.
+	See tcp_retries2 for more details.
+
+	The default value is 8.
+
+	If your machine is a loaded WEB server,
+	you should think about lowering this value, such sockets
+	may consume significant resources. Cf. tcp_max_orphans.
+
+tcp_recovery - INTEGER
+	This value is a bitmap to enable various experimental loss recovery
+	features.
+
+	=========   =============================================================
+	RACK: 0x1   enables the RACK loss detection for fast detection of lost
+		    retransmissions and tail drops. It also subsumes and disables
+		    RFC6675 recovery for SACK connections.
+
+	RACK: 0x2   makes RACK's reordering window static (min_rtt/4).
+
+	RACK: 0x4   disables RACK's DUPACK threshold heuristic
+	=========   =============================================================
+
+	Default: 0x1
+
+tcp_reordering - INTEGER
+	Initial reordering level of packets in a TCP stream.
+	TCP stack can then dynamically adjust flow reordering level
+	between this initial value and tcp_max_reordering
+
+	Default: 3
+
+tcp_max_reordering - INTEGER
+	Maximal reordering level of packets in a TCP stream.
+	300 is a fairly conservative value, but you might increase it
+	if paths are using per packet load balancing (like bonding rr mode)
+
+	Default: 300
+
+tcp_retrans_collapse - BOOLEAN
+	Bug-to-bug compatibility with some broken printers.
+	On retransmit try to send bigger packets to work around bugs in
+	certain TCP stacks.
+
+tcp_retries1 - INTEGER
+	This value influences the time, after which TCP decides, that
+	something is wrong due to unacknowledged RTO retransmissions,
+	and reports this suspicion to the network layer.
+	See tcp_retries2 for more details.
+
+	RFC 1122 recommends at least 3 retransmissions, which is the
+	default.
+
+tcp_retries2 - INTEGER
+	This value influences the timeout of an alive TCP connection,
+	when RTO retransmissions remain unacknowledged.
+	Given a value of N, a hypothetical TCP connection following
+	exponential backoff with an initial RTO of TCP_RTO_MIN would
+	retransmit N times before killing the connection at the (N+1)th RTO.
+
+	The default value of 15 yields a hypothetical timeout of 924.6
+	seconds and is a lower bound for the effective timeout.
+	TCP will effectively time out at the first RTO which exceeds the
+	hypothetical timeout.
+
+	RFC 1122 recommends at least 100 seconds for the timeout,
+	which corresponds to a value of at least 8.
+
+tcp_rfc1337 - BOOLEAN
+	If set, the TCP stack behaves conforming to RFC1337. If unset,
+	we are not conforming to RFC, but prevent TCP TIME_WAIT
+	assassination.
+
+	Default: 0
+
+tcp_rmem - vector of 3 INTEGERs: min, default, max
+	min: Minimal size of receive buffer used by TCP sockets.
+	It is guaranteed to each TCP socket, even under moderate memory
+	pressure.
+
+	Default: 4K
+
+	default: initial size of receive buffer used by TCP sockets.
+	This value overrides net.core.rmem_default used by other protocols.
+	Default: 87380 bytes. This value results in window of 65535 with
+	default setting of tcp_adv_win_scale and tcp_app_win:0 and a bit
+	less for default tcp_app_win. See below about these variables.
+
+	max: maximal size of receive buffer allowed for automatically
+	selected receiver buffers for TCP socket. This value does not override
+	net.core.rmem_max.  Calling setsockopt() with SO_RCVBUF disables
+	automatic tuning of that socket's receive buffer size, in which
+	case this value is ignored.
+	Default: between 87380B and 6MB, depending on RAM size.
+
+tcp_sack - BOOLEAN
+	Enable select acknowledgments (SACKS).
+
+tcp_comp_sack_delay_ns - LONG INTEGER
+	TCP tries to reduce number of SACK sent, using a timer
+	based on 5% of SRTT, capped by this sysctl, in nano seconds.
+	The default is 1ms, based on TSO autosizing period.
+
+	Default : 1,000,000 ns (1 ms)
+
+tcp_comp_sack_nr - INTEGER
+	Max number of SACK that can be compressed.
+	Using 0 disables SACK compression.
+
+	Default : 44
+
+tcp_slow_start_after_idle - BOOLEAN
+	If set, provide RFC2861 behavior and time out the congestion
+	window after an idle period.  An idle period is defined at
+	the current RTO.  If unset, the congestion window will not
+	be timed out after an idle period.
+
+	Default: 1
+
+tcp_stdurg - BOOLEAN
+	Use the Host requirements interpretation of the TCP urgent pointer field.
+	Most hosts use the older BSD interpretation, so if you turn this on
+	Linux might not communicate correctly with them.
+
+	Default: FALSE
+
+tcp_synack_retries - INTEGER
+	Number of times SYNACKs for a passive TCP connection attempt will
+	be retransmitted. Should not be higher than 255. Default value
+	is 5, which corresponds to 31seconds till the last retransmission
+	with the current initial RTO of 1second. With this the final timeout
+	for a passive TCP connection will happen after 63seconds.
+
+tcp_syncookies - INTEGER
+	Only valid when the kernel was compiled with CONFIG_SYN_COOKIES
+	Send out syncookies when the syn backlog queue of a socket
+	overflows. This is to prevent against the common 'SYN flood attack'
+	Default: 1
+
+	Note, that syncookies is fallback facility.
+	It MUST NOT be used to help highly loaded servers to stand
+	against legal connection rate. If you see SYN flood warnings
+	in your logs, but investigation	shows that they occur
+	because of overload with legal connections, you should tune
+	another parameters until this warning disappear.
+	See: tcp_max_syn_backlog, tcp_synack_retries, tcp_abort_on_overflow.
+
+	syncookies seriously violate TCP protocol, do not allow
+	to use TCP extensions, can result in serious degradation
+	of some services (f.e. SMTP relaying), visible not by you,
+	but your clients and relays, contacting you. While you see
+	SYN flood warnings in logs not being really flooded, your server
+	is seriously misconfigured.
+
+	If you want to test which effects syncookies have to your
+	network connections you can set this knob to 2 to enable
+	unconditionally generation of syncookies.
+
+tcp_fastopen - INTEGER
+	Enable TCP Fast Open (RFC7413) to send and accept data in the opening
+	SYN packet.
+
+	The client support is enabled by flag 0x1 (on by default). The client
+	then must use sendmsg() or sendto() with the MSG_FASTOPEN flag,
+	rather than connect() to send data in SYN.
+
+	The server support is enabled by flag 0x2 (off by default). Then
+	either enable for all listeners with another flag (0x400) or
+	enable individual listeners via TCP_FASTOPEN socket option with
+	the option value being the length of the syn-data backlog.
+
+	The values (bitmap) are
+
+	=====  ======== ======================================================
+	  0x1  (client) enables sending data in the opening SYN on the client.
+	  0x2  (server) enables the server support, i.e., allowing data in
+			a SYN packet to be accepted and passed to the
+			application before 3-way handshake finishes.
+	  0x4  (client) send data in the opening SYN regardless of cookie
+			availability and without a cookie option.
+	0x200  (server) accept data-in-SYN w/o any cookie option present.
+	0x400  (server) enable all listeners to support Fast Open by
+			default without explicit TCP_FASTOPEN socket option.
+	=====  ======== ======================================================
+
+	Default: 0x1
+
+	Note that that additional client or server features are only
+	effective if the basic support (0x1 and 0x2) are enabled respectively.
+
+tcp_fastopen_blackhole_timeout_sec - INTEGER
+	Initial time period in second to disable Fastopen on active TCP sockets
+	when a TFO firewall blackhole issue happens.
+	This time period will grow exponentially when more blackhole issues
+	get detected right after Fastopen is re-enabled and will reset to
+	initial value when the blackhole issue goes away.
+	0 to disable the blackhole detection.
+
+	By default, it is set to 1hr.
+
+tcp_fastopen_key - list of comma separated 32-digit hexadecimal INTEGERs
+	The list consists of a primary key and an optional backup key. The
+	primary key is used for both creating and validating cookies, while the
+	optional backup key is only used for validating cookies. The purpose of
+	the backup key is to maximize TFO validation when keys are rotated.
+
+	A randomly chosen primary key may be configured by the kernel if
+	the tcp_fastopen sysctl is set to 0x400 (see above), or if the
+	TCP_FASTOPEN setsockopt() optname is set and a key has not been
+	previously configured via sysctl. If keys are configured via
+	setsockopt() by using the TCP_FASTOPEN_KEY optname, then those
+	per-socket keys will be used instead of any keys that are specified via
+	sysctl.
+
+	A key is specified as 4 8-digit hexadecimal integers which are separated
+	by a '-' as: xxxxxxxx-xxxxxxxx-xxxxxxxx-xxxxxxxx. Leading zeros may be
+	omitted. A primary and a backup key may be specified by separating them
+	by a comma. If only one key is specified, it becomes the primary key and
+	any previously configured backup keys are removed.
+
+tcp_syn_retries - INTEGER
+	Number of times initial SYNs for an active TCP connection attempt
+	will be retransmitted. Should not be higher than 127. Default value
+	is 6, which corresponds to 63seconds till the last retransmission
+	with the current initial RTO of 1second. With this the final timeout
+	for an active TCP connection attempt will happen after 127seconds.
+
+tcp_timestamps - INTEGER
+	Enable timestamps as defined in RFC1323.
+
+	- 0: Disabled.
+	- 1: Enable timestamps as defined in RFC1323 and use random offset for
+	  each connection rather than only using the current time.
+	- 2: Like 1, but without random offsets.
+
+	Default: 1
+
+tcp_min_tso_segs - INTEGER
+	Minimal number of segments per TSO frame.
+
+	Since linux-3.12, TCP does an automatic sizing of TSO frames,
+	depending on flow rate, instead of filling 64Kbytes packets.
+	For specific usages, it's possible to force TCP to build big
+	TSO frames. Note that TCP stack might split too big TSO packets
+	if available window is too small.
+
+	Default: 2
+
+tcp_pacing_ss_ratio - INTEGER
+	sk->sk_pacing_rate is set by TCP stack using a ratio applied
+	to current rate. (current_rate = cwnd * mss / srtt)
+	If TCP is in slow start, tcp_pacing_ss_ratio is applied
+	to let TCP probe for bigger speeds, assuming cwnd can be
+	doubled every other RTT.
+
+	Default: 200
+
+tcp_pacing_ca_ratio - INTEGER
+	sk->sk_pacing_rate is set by TCP stack using a ratio applied
+	to current rate. (current_rate = cwnd * mss / srtt)
+	If TCP is in congestion avoidance phase, tcp_pacing_ca_ratio
+	is applied to conservatively probe for bigger throughput.
+
+	Default: 120
+
+tcp_tso_win_divisor - INTEGER
+	This allows control over what percentage of the congestion window
+	can be consumed by a single TSO frame.
+	The setting of this parameter is a choice between burstiness and
+	building larger TSO frames.
+
+	Default: 3
+
+tcp_tw_reuse - INTEGER
+	Enable reuse of TIME-WAIT sockets for new connections when it is
+	safe from protocol viewpoint.
+
+	- 0 - disable
+	- 1 - global enable
+	- 2 - enable for loopback traffic only
+
+	It should not be changed without advice/request of technical
+	experts.
+
+	Default: 2
+
+tcp_window_scaling - BOOLEAN
+	Enable window scaling as defined in RFC1323.
+
+tcp_wmem - vector of 3 INTEGERs: min, default, max
+	min: Amount of memory reserved for send buffers for TCP sockets.
+	Each TCP socket has rights to use it due to fact of its birth.
+
+	Default: 4K
+
+	default: initial size of send buffer used by TCP sockets.  This
+	value overrides net.core.wmem_default used by other protocols.
+
+	It is usually lower than net.core.wmem_default.
+
+	Default: 16K
+
+	max: Maximal amount of memory allowed for automatically tuned
+	send buffers for TCP sockets. This value does not override
+	net.core.wmem_max.  Calling setsockopt() with SO_SNDBUF disables
+	automatic tuning of that socket's send buffer size, in which case
+	this value is ignored.
+
+	Default: between 64K and 4MB, depending on RAM size.
+
+tcp_notsent_lowat - UNSIGNED INTEGER
+	A TCP socket can control the amount of unsent bytes in its write queue,
+	thanks to TCP_NOTSENT_LOWAT socket option. poll()/select()/epoll()
+	reports POLLOUT events if the amount of unsent bytes is below a per
+	socket value, and if the write queue is not full. sendmsg() will
+	also not add new buffers if the limit is hit.
+
+	This global variable controls the amount of unsent data for
+	sockets not using TCP_NOTSENT_LOWAT. For these sockets, a change
+	to the global variable has immediate effect.
+
+	Default: UINT_MAX (0xFFFFFFFF)
+
+tcp_workaround_signed_windows - BOOLEAN
+	If set, assume no receipt of a window scaling option means the
+	remote TCP is broken and treats the window as a signed quantity.
+	If unset, assume the remote TCP is not broken even if we do
+	not receive a window scaling option from them.
+
+	Default: 0
+
+tcp_thin_linear_timeouts - BOOLEAN
+	Enable dynamic triggering of linear timeouts for thin streams.
+	If set, a check is performed upon retransmission by timeout to
+	determine if the stream is thin (less than 4 packets in flight).
+	As long as the stream is found to be thin, up to 6 linear
+	timeouts may be performed before exponential backoff mode is
+	initiated. This improves retransmission latency for
+	non-aggressive thin streams, often found to be time-dependent.
+	For more information on thin streams, see
+	Documentation/networking/tcp-thin.txt
+
+	Default: 0
+
+tcp_limit_output_bytes - INTEGER
+	Controls TCP Small Queue limit per tcp socket.
+	TCP bulk sender tends to increase packets in flight until it
+	gets losses notifications. With SNDBUF autotuning, this can
+	result in a large amount of packets queued on the local machine
+	(e.g.: qdiscs, CPU backlog, or device) hurting latency of other
+	flows, for typical pfifo_fast qdiscs.  tcp_limit_output_bytes
+	limits the number of bytes on qdisc or device to reduce artificial
+	RTT/cwnd and reduce bufferbloat.
+
+	Default: 1048576 (16 * 65536)
+
+tcp_challenge_ack_limit - INTEGER
+	Limits number of Challenge ACK sent per second, as recommended
+	in RFC 5961 (Improving TCP's Robustness to Blind In-Window Attacks)
+	Default: 1000
+
+tcp_rx_skb_cache - BOOLEAN
+	Controls a per TCP socket cache of one skb, that might help
+	performance of some workloads. This might be dangerous
+	on systems with a lot of TCP sockets, since it increases
+	memory usage.
+
+	Default: 0 (disabled)
+
+UDP variables
+=============
+
+udp_l3mdev_accept - BOOLEAN
+	Enabling this option allows a "global" bound socket to work
+	across L3 master domains (e.g., VRFs) with packets capable of
+	being received regardless of the L3 domain in which they
+	originated. Only valid when the kernel was compiled with
+	CONFIG_NET_L3_MASTER_DEV.
+
+	Default: 0 (disabled)
+
+udp_mem - vector of 3 INTEGERs: min, pressure, max
+	Number of pages allowed for queueing by all UDP sockets.
+
+	min: Below this number of pages UDP is not bothered about its
+	memory appetite. When amount of memory allocated by UDP exceeds
+	this number, UDP starts to moderate memory usage.
+
+	pressure: This value was introduced to follow format of tcp_mem.
+
+	max: Number of pages allowed for queueing by all UDP sockets.
+
+	Default is calculated at boot time from amount of available memory.
+
+udp_rmem_min - INTEGER
+	Minimal size of receive buffer used by UDP sockets in moderation.
+	Each UDP socket is able to use the size for receiving data, even if
+	total pages of UDP sockets exceed udp_mem pressure. The unit is byte.
+
+	Default: 4K
+
+udp_wmem_min - INTEGER
+	Minimal size of send buffer used by UDP sockets in moderation.
+	Each UDP socket is able to use the size for sending data, even if
+	total pages of UDP sockets exceed udp_mem pressure. The unit is byte.
+
+	Default: 4K
+
+RAW variables
+=============
+
+raw_l3mdev_accept - BOOLEAN
+	Enabling this option allows a "global" bound socket to work
+	across L3 master domains (e.g., VRFs) with packets capable of
+	being received regardless of the L3 domain in which they
+	originated. Only valid when the kernel was compiled with
+	CONFIG_NET_L3_MASTER_DEV.
+
+	Default: 1 (enabled)
+
+CIPSOv4 Variables
+=================
+
+cipso_cache_enable - BOOLEAN
+	If set, enable additions to and lookups from the CIPSO label mapping
+	cache.  If unset, additions are ignored and lookups always result in a
+	miss.  However, regardless of the setting the cache is still
+	invalidated when required when means you can safely toggle this on and
+	off and the cache will always be "safe".
+
+	Default: 1
+
+cipso_cache_bucket_size - INTEGER
+	The CIPSO label cache consists of a fixed size hash table with each
+	hash bucket containing a number of cache entries.  This variable limits
+	the number of entries in each hash bucket; the larger the value the
+	more CIPSO label mappings that can be cached.  When the number of
+	entries in a given hash bucket reaches this limit adding new entries
+	causes the oldest entry in the bucket to be removed to make room.
+
+	Default: 10
+
+cipso_rbm_optfmt - BOOLEAN
+	Enable the "Optimized Tag 1 Format" as defined in section 3.4.2.6 of
+	the CIPSO draft specification (see Documentation/netlabel for details).
+	This means that when set the CIPSO tag will be padded with empty
+	categories in order to make the packet data 32-bit aligned.
+
+	Default: 0
+
+cipso_rbm_structvalid - BOOLEAN
+	If set, do a very strict check of the CIPSO option when
+	ip_options_compile() is called.  If unset, relax the checks done during
+	ip_options_compile().  Either way is "safe" as errors are caught else
+	where in the CIPSO processing code but setting this to 0 (False) should
+	result in less work (i.e. it should be faster) but could cause problems
+	with other implementations that require strict checking.
+
+	Default: 0
+
+IP Variables
+============
+
+ip_local_port_range - 2 INTEGERS
+	Defines the local port range that is used by TCP and UDP to
+	choose the local port. The first number is the first, the
+	second the last local port number.
+	If possible, it is better these numbers have different parity
+	(one even and one odd value).
+	Must be greater than or equal to ip_unprivileged_port_start.
+	The default values are 32768 and 60999 respectively.
+
+ip_local_reserved_ports - list of comma separated ranges
+	Specify the ports which are reserved for known third-party
+	applications. These ports will not be used by automatic port
+	assignments (e.g. when calling connect() or bind() with port
+	number 0). Explicit port allocation behavior is unchanged.
+
+	The format used for both input and output is a comma separated
+	list of ranges (e.g. "1,2-4,10-10" for ports 1, 2, 3, 4 and
+	10). Writing to the file will clear all previously reserved
+	ports and update the current list with the one given in the
+	input.
+
+	Note that ip_local_port_range and ip_local_reserved_ports
+	settings are independent and both are considered by the kernel
+	when determining which ports are available for automatic port
+	assignments.
+
+	You can reserve ports which are not in the current
+	ip_local_port_range, e.g.::
+
+	    $ cat /proc/sys/net/ipv4/ip_local_port_range
+	    32000	60999
+	    $ cat /proc/sys/net/ipv4/ip_local_reserved_ports
+	    8080,9148
+
+	although this is redundant. However such a setting is useful
+	if later the port range is changed to a value that will
+	include the reserved ports.
+
+	Default: Empty
+
+ip_unprivileged_port_start - INTEGER
+	This is a per-namespace sysctl.  It defines the first
+	unprivileged port in the network namespace.  Privileged ports
+	require root or CAP_NET_BIND_SERVICE in order to bind to them.
+	To disable all privileged ports, set this to 0.  They must not
+	overlap with the ip_local_port_range.
+
+	Default: 1024
+
+ip_nonlocal_bind - BOOLEAN
+	If set, allows processes to bind() to non-local IP addresses,
+	which can be quite useful - but may break some applications.
+
+	Default: 0
+
+ip_autobind_reuse - BOOLEAN
+	By default, bind() does not select the ports automatically even if
+	the new socket and all sockets bound to the port have SO_REUSEADDR.
+	ip_autobind_reuse allows bind() to reuse the port and this is useful
+	when you use bind()+connect(), but may break some applications.
+	The preferred solution is to use IP_BIND_ADDRESS_NO_PORT and this
+	option should only be set by experts.
+	Default: 0
+
+ip_dynaddr - BOOLEAN
+	If set non-zero, enables support for dynamic addresses.
+	If set to a non-zero value larger than 1, a kernel log
+	message will be printed when dynamic address rewriting
+	occurs.
+
+	Default: 0
+
+ip_early_demux - BOOLEAN
+	Optimize input packet processing down to one demux for
+	certain kinds of local sockets.  Currently we only do this
+	for established TCP and connected UDP sockets.
+
+	It may add an additional cost for pure routing workloads that
+	reduces overall throughput, in such case you should disable it.
+
+	Default: 1
+
+ping_group_range - 2 INTEGERS
+	Restrict ICMP_PROTO datagram sockets to users in the group range.
+	The default is "1 0", meaning, that nobody (not even root) may
+	create ping sockets.  Setting it to "100 100" would grant permissions
+	to the single group. "0 4294967295" would enable it for the world, "100
+	4294967295" would enable it for the users, but not daemons.
+
+tcp_early_demux - BOOLEAN
+	Enable early demux for established TCP sockets.
+
+	Default: 1
+
+udp_early_demux - BOOLEAN
+	Enable early demux for connected UDP sockets. Disable this if
+	your system could experience more unconnected load.
+
+	Default: 1
+
+icmp_echo_ignore_all - BOOLEAN
+	If set non-zero, then the kernel will ignore all ICMP ECHO
+	requests sent to it.
+
+	Default: 0
+
+icmp_echo_ignore_broadcasts - BOOLEAN
+	If set non-zero, then the kernel will ignore all ICMP ECHO and
+	TIMESTAMP requests sent to it via broadcast/multicast.
+
+	Default: 1
+
+icmp_ratelimit - INTEGER
+	Limit the maximal rates for sending ICMP packets whose type matches
+	icmp_ratemask (see below) to specific targets.
+	0 to disable any limiting,
+	otherwise the minimal space between responses in milliseconds.
+	Note that another sysctl, icmp_msgs_per_sec limits the number
+	of ICMP packets	sent on all targets.
+
+	Default: 1000
+
+icmp_msgs_per_sec - INTEGER
+	Limit maximal number of ICMP packets sent per second from this host.
+	Only messages whose type matches icmp_ratemask (see below) are
+	controlled by this limit.
+
+	Default: 1000
+
+icmp_msgs_burst - INTEGER
+	icmp_msgs_per_sec controls number of ICMP packets sent per second,
+	while icmp_msgs_burst controls the burst size of these packets.
+
+	Default: 50
+
+icmp_ratemask - INTEGER
+	Mask made of ICMP types for which rates are being limited.
+
+	Significant bits: IHGFEDCBA9876543210
+
+	Default mask:     0000001100000011000 (6168)
+
+	Bit definitions (see include/linux/icmp.h):
+
+		= =========================
+		0 Echo Reply
+		3 Destination Unreachable [1]_
+		4 Source Quench [1]_
+		5 Redirect
+		8 Echo Request
+		B Time Exceeded [1]_
+		C Parameter Problem [1]_
+		D Timestamp Request
+		E Timestamp Reply
+		F Info Request
+		G Info Reply
+		H Address Mask Request
+		I Address Mask Reply
+		= =========================
+
+	.. [1] These are rate limited by default (see default mask above)
+
+icmp_ignore_bogus_error_responses - BOOLEAN
+	Some routers violate RFC1122 by sending bogus responses to broadcast
+	frames.  Such violations are normally logged via a kernel warning.
+	If this is set to TRUE, the kernel will not give such warnings, which
+	will avoid log file clutter.
+
+	Default: 1
+
+icmp_errors_use_inbound_ifaddr - BOOLEAN
+
+	If zero, icmp error messages are sent with the primary address of
+	the exiting interface.
+
+	If non-zero, the message will be sent with the primary address of
+	the interface that received the packet that caused the icmp error.
+	This is the behaviour network many administrators will expect from
+	a router. And it can make debugging complicated network layouts
+	much easier.
+
+	Note that if no primary address exists for the interface selected,
+	then the primary address of the first non-loopback interface that
+	has one will be used regardless of this setting.
+
+	Default: 0
+
+igmp_max_memberships - INTEGER
+	Change the maximum number of multicast groups we can subscribe to.
+	Default: 20
+
+	Theoretical maximum value is bounded by having to send a membership
+	report in a single datagram (i.e. the report can't span multiple
+	datagrams, or risk confusing the switch and leaving groups you don't
+	intend to).
+
+	The number of supported groups 'M' is bounded by the number of group
+	report entries you can fit into a single datagram of 65535 bytes.
+
+	M = 65536-sizeof (ip header)/(sizeof(Group record))
+
+	Group records are variable length, with a minimum of 12 bytes.
+	So net.ipv4.igmp_max_memberships should not be set higher than:
+
+	(65536-24) / 12 = 5459
+
+	The value 5459 assumes no IP header options, so in practice
+	this number may be lower.
+
+igmp_max_msf - INTEGER
+	Maximum number of addresses allowed in the source filter list for a
+	multicast group.
+
+	Default: 10
+
+igmp_qrv - INTEGER
+	Controls the IGMP query robustness variable (see RFC2236 8.1).
+
+	Default: 2 (as specified by RFC2236 8.1)
+
+	Minimum: 1 (as specified by RFC6636 4.5)
+
+force_igmp_version - INTEGER
+	- 0 - (default) No enforcement of a IGMP version, IGMPv1/v2 fallback
+	  allowed. Will back to IGMPv3 mode again if all IGMPv1/v2 Querier
+	  Present timer expires.
+	- 1 - Enforce to use IGMP version 1. Will also reply IGMPv1 report if
+	  receive IGMPv2/v3 query.
+	- 2 - Enforce to use IGMP version 2. Will fallback to IGMPv1 if receive
+	  IGMPv1 query message. Will reply report if receive IGMPv3 query.
+	- 3 - Enforce to use IGMP version 3. The same react with default 0.
+
+	.. note::
+
+	   this is not the same with force_mld_version because IGMPv3 RFC3376
+	   Security Considerations does not have clear description that we could
+	   ignore other version messages completely as MLDv2 RFC3810. So make
+	   this value as default 0 is recommended.
+
+``conf/interface/*``
+	changes special settings per interface (where
+	interface" is the name of your network interface)
+
+``conf/all/*``
+	  is special, changes the settings for all interfaces
+
+log_martians - BOOLEAN
+	Log packets with impossible addresses to kernel log.
+	log_martians for the interface will be enabled if at least one of
+	conf/{all,interface}/log_martians is set to TRUE,
+	it will be disabled otherwise
+
+accept_redirects - BOOLEAN
+	Accept ICMP redirect messages.
+	accept_redirects for the interface will be enabled if:
+
+	- both conf/{all,interface}/accept_redirects are TRUE in the case
+	  forwarding for the interface is enabled
+
+	or
+
+	- at least one of conf/{all,interface}/accept_redirects is TRUE in the
+	  case forwarding for the interface is disabled
+
+	accept_redirects for the interface will be disabled otherwise
+
+	default:
+
+		- TRUE (host)
+		- FALSE (router)
+
+forwarding - BOOLEAN
+	Enable IP forwarding on this interface.  This controls whether packets
+	received _on_ this interface can be forwarded.
+
+mc_forwarding - BOOLEAN
+	Do multicast routing. The kernel needs to be compiled with CONFIG_MROUTE
+	and a multicast routing daemon is required.
+	conf/all/mc_forwarding must also be set to TRUE to enable multicast
+	routing	for the interface
+
+medium_id - INTEGER
+	Integer value used to differentiate the devices by the medium they
+	are attached to. Two devices can have different id values when
+	the broadcast packets are received only on one of them.
+	The default value 0 means that the device is the only interface
+	to its medium, value of -1 means that medium is not known.
+
+	Currently, it is used to change the proxy_arp behavior:
+	the proxy_arp feature is enabled for packets forwarded between
+	two devices attached to different media.
+
+proxy_arp - BOOLEAN
+	Do proxy arp.
+
+	proxy_arp for the interface will be enabled if at least one of
+	conf/{all,interface}/proxy_arp is set to TRUE,
+	it will be disabled otherwise
+
+proxy_arp_pvlan - BOOLEAN
+	Private VLAN proxy arp.
+
+	Basically allow proxy arp replies back to the same interface
+	(from which the ARP request/solicitation was received).
+
+	This is done to support (ethernet) switch features, like RFC
+	3069, where the individual ports are NOT allowed to
+	communicate with each other, but they are allowed to talk to
+	the upstream router.  As described in RFC 3069, it is possible
+	to allow these hosts to communicate through the upstream
+	router by proxy_arp'ing. Don't need to be used together with
+	proxy_arp.
+
+	This technology is known by different names:
+
+	  In RFC 3069 it is called VLAN Aggregation.
+	  Cisco and Allied Telesyn call it Private VLAN.
+	  Hewlett-Packard call it Source-Port filtering or port-isolation.
+	  Ericsson call it MAC-Forced Forwarding (RFC Draft).
+
+shared_media - BOOLEAN
+	Send(router) or accept(host) RFC1620 shared media redirects.
+	Overrides secure_redirects.
+
+	shared_media for the interface will be enabled if at least one of
+	conf/{all,interface}/shared_media is set to TRUE,
+	it will be disabled otherwise
+
+	default TRUE
+
+secure_redirects - BOOLEAN
+	Accept ICMP redirect messages only to gateways listed in the
+	interface's current gateway list. Even if disabled, RFC1122 redirect
+	rules still apply.
+
+	Overridden by shared_media.
+
+	secure_redirects for the interface will be enabled if at least one of
+	conf/{all,interface}/secure_redirects is set to TRUE,
+	it will be disabled otherwise
+
+	default TRUE
+
+send_redirects - BOOLEAN
+	Send redirects, if router.
+
+	send_redirects for the interface will be enabled if at least one of
+	conf/{all,interface}/send_redirects is set to TRUE,
+	it will be disabled otherwise
+
+	Default: TRUE
+
+bootp_relay - BOOLEAN
+	Accept packets with source address 0.b.c.d destined
+	not to this host as local ones. It is supposed, that
+	BOOTP relay daemon will catch and forward such packets.
+	conf/all/bootp_relay must also be set to TRUE to enable BOOTP relay
+	for the interface
+
+	default FALSE
+
+	Not Implemented Yet.
+
+accept_source_route - BOOLEAN
+	Accept packets with SRR option.
+	conf/all/accept_source_route must also be set to TRUE to accept packets
+	with SRR option on the interface
+
+	default
+
+		- TRUE (router)
+		- FALSE (host)
+
+accept_local - BOOLEAN
+	Accept packets with local source addresses. In combination with
+	suitable routing, this can be used to direct packets between two
+	local interfaces over the wire and have them accepted properly.
+	default FALSE
+
+route_localnet - BOOLEAN
+	Do not consider loopback addresses as martian source or destination
+	while routing. This enables the use of 127/8 for local routing purposes.
+
+	default FALSE
+
+rp_filter - INTEGER
+	- 0 - No source validation.
+	- 1 - Strict mode as defined in RFC3704 Strict Reverse Path
+	  Each incoming packet is tested against the FIB and if the interface
+	  is not the best reverse path the packet check will fail.
+	  By default failed packets are discarded.
+	- 2 - Loose mode as defined in RFC3704 Loose Reverse Path
+	  Each incoming packet's source address is also tested against the FIB
+	  and if the source address is not reachable via any interface
+	  the packet check will fail.
+
+	Current recommended practice in RFC3704 is to enable strict mode
+	to prevent IP spoofing from DDos attacks. If using asymmetric routing
+	or other complicated routing, then loose mode is recommended.
+
+	The max value from conf/{all,interface}/rp_filter is used
+	when doing source validation on the {interface}.
+
+	Default value is 0. Note that some distributions enable it
+	in startup scripts.
+
+arp_filter - BOOLEAN
+	- 1 - Allows you to have multiple network interfaces on the same
+	  subnet, and have the ARPs for each interface be answered
+	  based on whether or not the kernel would route a packet from
+	  the ARP'd IP out that interface (therefore you must use source
+	  based routing for this to work). In other words it allows control
+	  of which cards (usually 1) will respond to an arp request.
+
+	- 0 - (default) The kernel can respond to arp requests with addresses
+	  from other interfaces. This may seem wrong but it usually makes
+	  sense, because it increases the chance of successful communication.
+	  IP addresses are owned by the complete host on Linux, not by
+	  particular interfaces. Only for more complex setups like load-
+	  balancing, does this behaviour cause problems.
+
+	arp_filter for the interface will be enabled if at least one of
+	conf/{all,interface}/arp_filter is set to TRUE,
+	it will be disabled otherwise
+
+arp_announce - INTEGER
+	Define different restriction levels for announcing the local
+	source IP address from IP packets in ARP requests sent on
+	interface:
+
+	- 0 - (default) Use any local address, configured on any interface
+	- 1 - Try to avoid local addresses that are not in the target's
+	  subnet for this interface. This mode is useful when target
+	  hosts reachable via this interface require the source IP
+	  address in ARP requests to be part of their logical network
+	  configured on the receiving interface. When we generate the
+	  request we will check all our subnets that include the
+	  target IP and will preserve the source address if it is from
+	  such subnet. If there is no such subnet we select source
+	  address according to the rules for level 2.
+	- 2 - Always use the best local address for this target.
+	  In this mode we ignore the source address in the IP packet
+	  and try to select local address that we prefer for talks with
+	  the target host. Such local address is selected by looking
+	  for primary IP addresses on all our subnets on the outgoing
+	  interface that include the target IP address. If no suitable
+	  local address is found we select the first local address
+	  we have on the outgoing interface or on all other interfaces,
+	  with the hope we will receive reply for our request and
+	  even sometimes no matter the source IP address we announce.
+
+	The max value from conf/{all,interface}/arp_announce is used.
+
+	Increasing the restriction level gives more chance for
+	receiving answer from the resolved target while decreasing
+	the level announces more valid sender's information.
+
+arp_ignore - INTEGER
+	Define different modes for sending replies in response to
+	received ARP requests that resolve local target IP addresses:
+
+	- 0 - (default): reply for any local target IP address, configured
+	  on any interface
+	- 1 - reply only if the target IP address is local address
+	  configured on the incoming interface
+	- 2 - reply only if the target IP address is local address
+	  configured on the incoming interface and both with the
+	  sender's IP address are part from same subnet on this interface
+	- 3 - do not reply for local addresses configured with scope host,
+	  only resolutions for global and link addresses are replied
+	- 4-7 - reserved
+	- 8 - do not reply for all local addresses
+
+	The max value from conf/{all,interface}/arp_ignore is used
+	when ARP request is received on the {interface}
+
+arp_notify - BOOLEAN
+	Define mode for notification of address and device changes.
+
+	 ==  ==========================================================
+	  0  (default): do nothing
+	  1  Generate gratuitous arp requests when device is brought up
+	     or hardware address changes.
+	 ==  ==========================================================
+
+arp_accept - BOOLEAN
+	Define behavior for gratuitous ARP frames who's IP is not
+	already present in the ARP table:
+
+	- 0 - don't create new entries in the ARP table
+	- 1 - create new entries in the ARP table
+
+	Both replies and requests type gratuitous arp will trigger the
+	ARP table to be updated, if this setting is on.
+
+	If the ARP table already contains the IP address of the
+	gratuitous arp frame, the arp table will be updated regardless
+	if this setting is on or off.
+
+mcast_solicit - INTEGER
+	The maximum number of multicast probes in INCOMPLETE state,
+	when the associated hardware address is unknown.  Defaults
+	to 3.
+
+ucast_solicit - INTEGER
+	The maximum number of unicast probes in PROBE state, when
+	the hardware address is being reconfirmed.  Defaults to 3.
+
+app_solicit - INTEGER
+	The maximum number of probes to send to the user space ARP daemon
+	via netlink before dropping back to multicast probes (see
+	mcast_resolicit).  Defaults to 0.
+
+mcast_resolicit - INTEGER
+	The maximum number of multicast probes after unicast and
+	app probes in PROBE state.  Defaults to 0.
+
+disable_policy - BOOLEAN
+	Disable IPSEC policy (SPD) for this interface
+
+disable_xfrm - BOOLEAN
+	Disable IPSEC encryption on this interface, whatever the policy
+
+igmpv2_unsolicited_report_interval - INTEGER
+	The interval in milliseconds in which the next unsolicited
+	IGMPv1 or IGMPv2 report retransmit will take place.
+
+	Default: 10000 (10 seconds)
+
+igmpv3_unsolicited_report_interval - INTEGER
+	The interval in milliseconds in which the next unsolicited
+	IGMPv3 report retransmit will take place.
+
+	Default: 1000 (1 seconds)
+
+promote_secondaries - BOOLEAN
+	When a primary IP address is removed from this interface
+	promote a corresponding secondary IP address instead of
+	removing all the corresponding secondary IP addresses.
+
+drop_unicast_in_l2_multicast - BOOLEAN
+	Drop any unicast IP packets that are received in link-layer
+	multicast (or broadcast) frames.
+
+	This behavior (for multicast) is actually a SHOULD in RFC
+	1122, but is disabled by default for compatibility reasons.
+
+	Default: off (0)
+
+drop_gratuitous_arp - BOOLEAN
+	Drop all gratuitous ARP frames, for example if there's a known
+	good ARP proxy on the network and such frames need not be used
+	(or in the case of 802.11, must not be used to prevent attacks.)
+
+	Default: off (0)
+
+
+tag - INTEGER
+	Allows you to write a number, which can be used as required.
+
+	Default value is 0.
+
+xfrm4_gc_thresh - INTEGER
+	(Obsolete since linux-4.14)
+	The threshold at which we will start garbage collecting for IPv4
+	destination cache entries.  At twice this value the system will
+	refuse new allocations.
+
+igmp_link_local_mcast_reports - BOOLEAN
+	Enable IGMP reports for link local multicast groups in the
+	224.0.0.X range.
+
+	Default TRUE
+
+Alexey Kuznetsov.
+kuznet@ms2.inr.ac.ru
+
+Updated by:
+
+- Andi Kleen
+  ak@muc.de
+- Nicolas Delon
+  delon.nicolas@wanadoo.fr
+
+
+
+
+/proc/sys/net/ipv6/* Variables
+==============================
+
+IPv6 has no global variables such as tcp_*.  tcp_* settings under ipv4/ also
+apply to IPv6 [XXX?].
+
+bindv6only - BOOLEAN
+	Default value for IPV6_V6ONLY socket option,
+	which restricts use of the IPv6 socket to IPv6 communication
+	only.
+
+		- TRUE: disable IPv4-mapped address feature
+		- FALSE: enable IPv4-mapped address feature
+
+	Default: FALSE (as specified in RFC3493)
+
+flowlabel_consistency - BOOLEAN
+	Protect the consistency (and unicity) of flow label.
+	You have to disable it to use IPV6_FL_F_REFLECT flag on the
+	flow label manager.
+
+	- TRUE: enabled
+	- FALSE: disabled
+
+	Default: TRUE
+
+auto_flowlabels - INTEGER
+	Automatically generate flow labels based on a flow hash of the
+	packet. This allows intermediate devices, such as routers, to
+	identify packet flows for mechanisms like Equal Cost Multipath
+	Routing (see RFC 6438).
+
+	=  ===========================================================
+	0  automatic flow labels are completely disabled
+	1  automatic flow labels are enabled by default, they can be
+	   disabled on a per socket basis using the IPV6_AUTOFLOWLABEL
+	   socket option
+	2  automatic flow labels are allowed, they may be enabled on a
+	   per socket basis using the IPV6_AUTOFLOWLABEL socket option
+	3  automatic flow labels are enabled and enforced, they cannot
+	   be disabled by the socket option
+	=  ===========================================================
+
+	Default: 1
+
+flowlabel_state_ranges - BOOLEAN
+	Split the flow label number space into two ranges. 0-0x7FFFF is
+	reserved for the IPv6 flow manager facility, 0x80000-0xFFFFF
+	is reserved for stateless flow labels as described in RFC6437.
+
+	- TRUE: enabled
+	- FALSE: disabled
+
+	Default: true
+
+flowlabel_reflect - INTEGER
+	Control flow label reflection. Needed for Path MTU
+	Discovery to work with Equal Cost Multipath Routing in anycast
+	environments. See RFC 7690 and:
+	https://tools.ietf.org/html/draft-wang-6man-flow-label-reflection-01
+
+	This is a bitmask.
+
+	- 1: enabled for established flows
+
+	  Note that this prevents automatic flowlabel changes, as done
+	  in "tcp: change IPv6 flow-label upon receiving spurious retransmission"
+	  and "tcp: Change txhash on every SYN and RTO retransmit"
+
+	- 2: enabled for TCP RESET packets (no active listener)
+	  If set, a RST packet sent in response to a SYN packet on a closed
+	  port will reflect the incoming flow label.
+
+	- 4: enabled for ICMPv6 echo reply messages.
+
+	Default: 0
+
+fib_multipath_hash_policy - INTEGER
+	Controls which hash policy to use for multipath routes.
+
+	Default: 0 (Layer 3)
+
+	Possible values:
+
+	- 0 - Layer 3 (source and destination addresses plus flow label)
+	- 1 - Layer 4 (standard 5-tuple)
+	- 2 - Layer 3 or inner Layer 3 if present
+
+anycast_src_echo_reply - BOOLEAN
+	Controls the use of anycast addresses as source addresses for ICMPv6
+	echo reply
+
+	- TRUE:  enabled
+	- FALSE: disabled
+
+	Default: FALSE
+
+idgen_delay - INTEGER
+	Controls the delay in seconds after which time to retry
+	privacy stable address generation if a DAD conflict is
+	detected.
+
+	Default: 1 (as specified in RFC7217)
+
+idgen_retries - INTEGER
+	Controls the number of retries to generate a stable privacy
+	address if a DAD conflict is detected.
+
+	Default: 3 (as specified in RFC7217)
+
+mld_qrv - INTEGER
+	Controls the MLD query robustness variable (see RFC3810 9.1).
+
+	Default: 2 (as specified by RFC3810 9.1)
+
+	Minimum: 1 (as specified by RFC6636 4.5)
+
+max_dst_opts_number - INTEGER
+	Maximum number of non-padding TLVs allowed in a Destination
+	options extension header. If this value is less than zero
+	then unknown options are disallowed and the number of known
+	TLVs allowed is the absolute value of this number.
+
+	Default: 8
+
+max_hbh_opts_number - INTEGER
+	Maximum number of non-padding TLVs allowed in a Hop-by-Hop
+	options extension header. If this value is less than zero
+	then unknown options are disallowed and the number of known
+	TLVs allowed is the absolute value of this number.
+
+	Default: 8
+
+max_dst_opts_length - INTEGER
+	Maximum length allowed for a Destination options extension
+	header.
+
+	Default: INT_MAX (unlimited)
+
+max_hbh_length - INTEGER
+	Maximum length allowed for a Hop-by-Hop options extension
+	header.
+
+	Default: INT_MAX (unlimited)
+
+skip_notify_on_dev_down - BOOLEAN
+	Controls whether an RTM_DELROUTE message is generated for routes
+	removed when a device is taken down or deleted. IPv4 does not
+	generate this message; IPv6 does by default. Setting this sysctl
+	to true skips the message, making IPv4 and IPv6 on par in relying
+	on userspace caches to track link events and evict routes.
+
+	Default: false (generate message)
+
+nexthop_compat_mode - BOOLEAN
+	New nexthop API provides a means for managing nexthops independent of
+	prefixes. Backwards compatibilty with old route format is enabled by
+	default which means route dumps and notifications contain the new
+	nexthop attribute but also the full, expanded nexthop definition.
+	Further, updates or deletes of a nexthop configuration generate route
+	notifications for each fib entry using the nexthop. Once a system
+	understands the new API, this sysctl can be disabled to achieve full
+	performance benefits of the new API by disabling the nexthop expansion
+	and extraneous notifications.
+	Default: true (backward compat mode)
+
+IPv6 Fragmentation:
+
+ip6frag_high_thresh - INTEGER
+	Maximum memory used to reassemble IPv6 fragments. When
+	ip6frag_high_thresh bytes of memory is allocated for this purpose,
+	the fragment handler will toss packets until ip6frag_low_thresh
+	is reached.
+
+ip6frag_low_thresh - INTEGER
+	See ip6frag_high_thresh
+
+ip6frag_time - INTEGER
+	Time in seconds to keep an IPv6 fragment in memory.
+
+IPv6 Segment Routing:
+
+seg6_flowlabel - INTEGER
+	Controls the behaviour of computing the flowlabel of outer
+	IPv6 header in case of SR T.encaps
+
+	 == =======================================================
+	 -1  set flowlabel to zero.
+	  0  copy flowlabel from Inner packet in case of Inner IPv6
+	     (Set flowlabel to 0 in case IPv4/L2)
+	  1  Compute the flowlabel using seg6_make_flowlabel()
+	 == =======================================================
+
+	Default is 0.
+
+``conf/default/*``:
+	Change the interface-specific default settings.
+
+
+``conf/all/*``:
+	Change all the interface-specific settings.
+
+	[XXX:  Other special features than forwarding?]
+
+conf/all/forwarding - BOOLEAN
+	Enable global IPv6 forwarding between all interfaces.
+
+	IPv4 and IPv6 work differently here; e.g. netfilter must be used
+	to control which interfaces may forward packets and which not.
+
+	This also sets all interfaces' Host/Router setting
+	'forwarding' to the specified value.  See below for details.
+
+	This referred to as global forwarding.
+
+proxy_ndp - BOOLEAN
+	Do proxy ndp.
+
+fwmark_reflect - BOOLEAN
+	Controls the fwmark of kernel-generated IPv6 reply packets that are not
+	associated with a socket for example, TCP RSTs or ICMPv6 echo replies).
+	If unset, these packets have a fwmark of zero. If set, they have the
+	fwmark of the packet they are replying to.
+
+	Default: 0
+
+``conf/interface/*``:
+	Change special settings per interface.
+
+	The functional behaviour for certain settings is different
+	depending on whether local forwarding is enabled or not.
+
+accept_ra - INTEGER
+	Accept Router Advertisements; autoconfigure using them.
+
+	It also determines whether or not to transmit Router
+	Solicitations. If and only if the functional setting is to
+	accept Router Advertisements, Router Solicitations will be
+	transmitted.
+
+	Possible values are:
+
+		==  ===========================================================
+		 0  Do not accept Router Advertisements.
+		 1  Accept Router Advertisements if forwarding is disabled.
+		 2  Overrule forwarding behaviour. Accept Router Advertisements
+		    even if forwarding is enabled.
+		==  ===========================================================
+
+	Functional default:
+
+		- enabled if local forwarding is disabled.
+		- disabled if local forwarding is enabled.
+
+accept_ra_defrtr - BOOLEAN
+	Learn default router in Router Advertisement.
+
+	Functional default:
+
+		- enabled if accept_ra is enabled.
+		- disabled if accept_ra is disabled.
+
+accept_ra_from_local - BOOLEAN
+	Accept RA with source-address that is found on local machine
+	if the RA is otherwise proper and able to be accepted.
+
+	Default is to NOT accept these as it may be an un-intended
+	network loop.
+
+	Functional default:
+
+	   - enabled if accept_ra_from_local is enabled
+	     on a specific interface.
+	   - disabled if accept_ra_from_local is disabled
+	     on a specific interface.
+
+accept_ra_min_hop_limit - INTEGER
+	Minimum hop limit Information in Router Advertisement.
+
+	Hop limit Information in Router Advertisement less than this
+	variable shall be ignored.
+
+	Default: 1
+
+accept_ra_pinfo - BOOLEAN
+	Learn Prefix Information in Router Advertisement.
+
+	Functional default:
+
+		- enabled if accept_ra is enabled.
+		- disabled if accept_ra is disabled.
+
+accept_ra_rt_info_min_plen - INTEGER
+	Minimum prefix length of Route Information in RA.
+
+	Route Information w/ prefix smaller than this variable shall
+	be ignored.
+
+	Functional default:
+
+		* 0 if accept_ra_rtr_pref is enabled.
+		* -1 if accept_ra_rtr_pref is disabled.
+
+accept_ra_rt_info_max_plen - INTEGER
+	Maximum prefix length of Route Information in RA.
+
+	Route Information w/ prefix larger than this variable shall
+	be ignored.
+
+	Functional default:
+
+		* 0 if accept_ra_rtr_pref is enabled.
+		* -1 if accept_ra_rtr_pref is disabled.
+
+accept_ra_rtr_pref - BOOLEAN
+	Accept Router Preference in RA.
+
+	Functional default:
+
+		- enabled if accept_ra is enabled.
+		- disabled if accept_ra is disabled.
+
+accept_ra_mtu - BOOLEAN
+	Apply the MTU value specified in RA option 5 (RFC4861). If
+	disabled, the MTU specified in the RA will be ignored.
+
+	Functional default:
+
+		- enabled if accept_ra is enabled.
+		- disabled if accept_ra is disabled.
+
+accept_redirects - BOOLEAN
+	Accept Redirects.
+
+	Functional default:
+
+		- enabled if local forwarding is disabled.
+		- disabled if local forwarding is enabled.
+
+accept_source_route - INTEGER
+	Accept source routing (routing extension header).
+
+	- >= 0: Accept only routing header type 2.
+	- < 0: Do not accept routing header.
+
+	Default: 0
+
+autoconf - BOOLEAN
+	Autoconfigure addresses using Prefix Information in Router
+	Advertisements.
+
+	Functional default:
+
+		- enabled if accept_ra_pinfo is enabled.
+		- disabled if accept_ra_pinfo is disabled.
+
+dad_transmits - INTEGER
+	The amount of Duplicate Address Detection probes to send.
+
+	Default: 1
+
+forwarding - INTEGER
+	Configure interface-specific Host/Router behaviour.
+
+	.. note::
+
+	   It is recommended to have the same setting on all
+	   interfaces; mixed router/host scenarios are rather uncommon.
+
+	Possible values are:
+
+		- 0 Forwarding disabled
+		- 1 Forwarding enabled
+
+	**FALSE (0)**:
+
+	By default, Host behaviour is assumed.  This means:
+
+	1. IsRouter flag is not set in Neighbour Advertisements.
+	2. If accept_ra is TRUE (default), transmit Router
+	   Solicitations.
+	3. If accept_ra is TRUE (default), accept Router
+	   Advertisements (and do autoconfiguration).
+	4. If accept_redirects is TRUE (default), accept Redirects.
+
+	**TRUE (1)**:
+
+	If local forwarding is enabled, Router behaviour is assumed.
+	This means exactly the reverse from the above:
+
+	1. IsRouter flag is set in Neighbour Advertisements.
+	2. Router Solicitations are not sent unless accept_ra is 2.
+	3. Router Advertisements are ignored unless accept_ra is 2.
+	4. Redirects are ignored.
+
+	Default: 0 (disabled) if global forwarding is disabled (default),
+	otherwise 1 (enabled).
+
+hop_limit - INTEGER
+	Default Hop Limit to set.
+
+	Default: 64
+
+mtu - INTEGER
+	Default Maximum Transfer Unit
+
+	Default: 1280 (IPv6 required minimum)
+
+ip_nonlocal_bind - BOOLEAN
+	If set, allows processes to bind() to non-local IPv6 addresses,
+	which can be quite useful - but may break some applications.
+
+	Default: 0
+
+router_probe_interval - INTEGER
+	Minimum interval (in seconds) between Router Probing described
+	in RFC4191.
+
+	Default: 60
+
+router_solicitation_delay - INTEGER
+	Number of seconds to wait after interface is brought up
+	before sending Router Solicitations.
+
+	Default: 1
+
+router_solicitation_interval - INTEGER
+	Number of seconds to wait between Router Solicitations.
+
+	Default: 4
+
+router_solicitations - INTEGER
+	Number of Router Solicitations to send until assuming no
+	routers are present.
+
+	Default: 3
+
+use_oif_addrs_only - BOOLEAN
+	When enabled, the candidate source addresses for destinations
+	routed via this interface are restricted to the set of addresses
+	configured on this interface (vis. RFC 6724, section 4).
+
+	Default: false
+
+use_tempaddr - INTEGER
+	Preference for Privacy Extensions (RFC3041).
+
+	  * <= 0 : disable Privacy Extensions
+	  * == 1 : enable Privacy Extensions, but prefer public
+	    addresses over temporary addresses.
+	  * >  1 : enable Privacy Extensions and prefer temporary
+	    addresses over public addresses.
+
+	Default:
+
+		* 0 (for most devices)
+		* -1 (for point-to-point devices and loopback devices)
+
+temp_valid_lft - INTEGER
+	valid lifetime (in seconds) for temporary addresses.
+
+	Default: 604800 (7 days)
+
+temp_prefered_lft - INTEGER
+	Preferred lifetime (in seconds) for temporary addresses.
+
+	Default: 86400 (1 day)
+
+keep_addr_on_down - INTEGER
+	Keep all IPv6 addresses on an interface down event. If set static
+	global addresses with no expiration time are not flushed.
+
+	*   >0 : enabled
+	*    0 : system default
+	*   <0 : disabled
+
+	Default: 0 (addresses are removed)
+
+max_desync_factor - INTEGER
+	Maximum value for DESYNC_FACTOR, which is a random value
+	that ensures that clients don't synchronize with each
+	other and generate new addresses at exactly the same time.
+	value is in seconds.
+
+	Default: 600
+
+regen_max_retry - INTEGER
+	Number of attempts before give up attempting to generate
+	valid temporary addresses.
+
+	Default: 5
+
+max_addresses - INTEGER
+	Maximum number of autoconfigured addresses per interface.  Setting
+	to zero disables the limitation.  It is not recommended to set this
+	value too large (or to zero) because it would be an easy way to
+	crash the kernel by allowing too many addresses to be created.
+
+	Default: 16
+
+disable_ipv6 - BOOLEAN
+	Disable IPv6 operation.  If accept_dad is set to 2, this value
+	will be dynamically set to TRUE if DAD fails for the link-local
+	address.
+
+	Default: FALSE (enable IPv6 operation)
+
+	When this value is changed from 1 to 0 (IPv6 is being enabled),
+	it will dynamically create a link-local address on the given
+	interface and start Duplicate Address Detection, if necessary.
+
+	When this value is changed from 0 to 1 (IPv6 is being disabled),
+	it will dynamically delete all addresses and routes on the given
+	interface. From now on it will not possible to add addresses/routes
+	to the selected interface.
+
+accept_dad - INTEGER
+	Whether to accept DAD (Duplicate Address Detection).
+
+	 == ==============================================================
+	  0  Disable DAD
+	  1  Enable DAD (default)
+	  2  Enable DAD, and disable IPv6 operation if MAC-based duplicate
+	     link-local address has been found.
+	 == ==============================================================
+
+	DAD operation and mode on a given interface will be selected according
+	to the maximum value of conf/{all,interface}/accept_dad.
+
+force_tllao - BOOLEAN
+	Enable sending the target link-layer address option even when
+	responding to a unicast neighbor solicitation.
+
+	Default: FALSE
+
+	Quoting from RFC 2461, section 4.4, Target link-layer address:
+
+	"The option MUST be included for multicast solicitations in order to
+	avoid infinite Neighbor Solicitation "recursion" when the peer node
+	does not have a cache entry to return a Neighbor Advertisements
+	message.  When responding to unicast solicitations, the option can be
+	omitted since the sender of the solicitation has the correct link-
+	layer address; otherwise it would not have be able to send the unicast
+	solicitation in the first place. However, including the link-layer
+	address in this case adds little overhead and eliminates a potential
+	race condition where the sender deletes the cached link-layer address
+	prior to receiving a response to a previous solicitation."
+
+ndisc_notify - BOOLEAN
+	Define mode for notification of address and device changes.
+
+	* 0 - (default): do nothing
+	* 1 - Generate unsolicited neighbour advertisements when device is brought
+	  up or hardware address changes.
+
+ndisc_tclass - INTEGER
+	The IPv6 Traffic Class to use by default when sending IPv6 Neighbor
+	Discovery (Router Solicitation, Router Advertisement, Neighbor
+	Solicitation, Neighbor Advertisement, Redirect) messages.
+	These 8 bits can be interpreted as 6 high order bits holding the DSCP
+	value and 2 low order bits representing ECN (which you probably want
+	to leave cleared).
+
+	* 0 - (default)
+
+mldv1_unsolicited_report_interval - INTEGER
+	The interval in milliseconds in which the next unsolicited
+	MLDv1 report retransmit will take place.
+
+	Default: 10000 (10 seconds)
+
+mldv2_unsolicited_report_interval - INTEGER
+	The interval in milliseconds in which the next unsolicited
+	MLDv2 report retransmit will take place.
+
+	Default: 1000 (1 second)
+
+force_mld_version - INTEGER
+	* 0 - (default) No enforcement of a MLD version, MLDv1 fallback allowed
+	* 1 - Enforce to use MLD version 1
+	* 2 - Enforce to use MLD version 2
+
+suppress_frag_ndisc - INTEGER
+	Control RFC 6980 (Security Implications of IPv6 Fragmentation
+	with IPv6 Neighbor Discovery) behavior:
+
+	* 1 - (default) discard fragmented neighbor discovery packets
+	* 0 - allow fragmented neighbor discovery packets
+
+optimistic_dad - BOOLEAN
+	Whether to perform Optimistic Duplicate Address Detection (RFC 4429).
+
+	* 0: disabled (default)
+	* 1: enabled
+
+	Optimistic Duplicate Address Detection for the interface will be enabled
+	if at least one of conf/{all,interface}/optimistic_dad is set to 1,
+	it will be disabled otherwise.
+
+use_optimistic - BOOLEAN
+	If enabled, do not classify optimistic addresses as deprecated during
+	source address selection.  Preferred addresses will still be chosen
+	before optimistic addresses, subject to other ranking in the source
+	address selection algorithm.
+
+	* 0: disabled (default)
+	* 1: enabled
+
+	This will be enabled if at least one of
+	conf/{all,interface}/use_optimistic is set to 1, disabled otherwise.
+
+stable_secret - IPv6 address
+	This IPv6 address will be used as a secret to generate IPv6
+	addresses for link-local addresses and autoconfigured
+	ones. All addresses generated after setting this secret will
+	be stable privacy ones by default. This can be changed via the
+	addrgenmode ip-link. conf/default/stable_secret is used as the
+	secret for the namespace, the interface specific ones can
+	overwrite that. Writes to conf/all/stable_secret are refused.
+
+	It is recommended to generate this secret during installation
+	of a system and keep it stable after that.
+
+	By default the stable secret is unset.
+
+addr_gen_mode - INTEGER
+	Defines how link-local and autoconf addresses are generated.
+
+	=  =================================================================
+	0  generate address based on EUI64 (default)
+	1  do no generate a link-local address, use EUI64 for addresses
+	   generated from autoconf
+	2  generate stable privacy addresses, using the secret from
+	   stable_secret (RFC7217)
+	3  generate stable privacy addresses, using a random secret if unset
+	=  =================================================================
+
+drop_unicast_in_l2_multicast - BOOLEAN
+	Drop any unicast IPv6 packets that are received in link-layer
+	multicast (or broadcast) frames.
+
+	By default this is turned off.
+
+drop_unsolicited_na - BOOLEAN
+	Drop all unsolicited neighbor advertisements, for example if there's
+	a known good NA proxy on the network and such frames need not be used
+	(or in the case of 802.11, must not be used to prevent attacks.)
+
+	By default this is turned off.
+
+enhanced_dad - BOOLEAN
+	Include a nonce option in the IPv6 neighbor solicitation messages used for
+	duplicate address detection per RFC7527. A received DAD NS will only signal
+	a duplicate address if the nonce is different. This avoids any false
+	detection of duplicates due to loopback of the NS messages that we send.
+	The nonce option will be sent on an interface unless both of
+	conf/{all,interface}/enhanced_dad are set to FALSE.
+
+	Default: TRUE
+
+``icmp/*``:
+===========
+
+ratelimit - INTEGER
+	Limit the maximal rates for sending ICMPv6 messages.
+
+	0 to disable any limiting,
+	otherwise the minimal space between responses in milliseconds.
+
+	Default: 1000
+
+ratemask - list of comma separated ranges
+	For ICMPv6 message types matching the ranges in the ratemask, limit
+	the sending of the message according to ratelimit parameter.
+
+	The format used for both input and output is a comma separated
+	list of ranges (e.g. "0-127,129" for ICMPv6 message type 0 to 127 and
+	129). Writing to the file will clear all previous ranges of ICMPv6
+	message types and update the current list with the input.
+
+	Refer to: https://www.iana.org/assignments/icmpv6-parameters/icmpv6-parameters.xhtml
+	for numerical values of ICMPv6 message types, e.g. echo request is 128
+	and echo reply is 129.
+
+	Default: 0-1,3-127 (rate limit ICMPv6 errors except Packet Too Big)
+
+echo_ignore_all - BOOLEAN
+	If set non-zero, then the kernel will ignore all ICMP ECHO
+	requests sent to it over the IPv6 protocol.
+
+	Default: 0
+
+echo_ignore_multicast - BOOLEAN
+	If set non-zero, then the kernel will ignore all ICMP ECHO
+	requests sent to it over the IPv6 protocol via multicast.
+
+	Default: 0
+
+echo_ignore_anycast - BOOLEAN
+	If set non-zero, then the kernel will ignore all ICMP ECHO
+	requests sent to it over the IPv6 protocol destined to anycast address.
+
+	Default: 0
+
+xfrm6_gc_thresh - INTEGER
+	(Obsolete since linux-4.14)
+	The threshold at which we will start garbage collecting for IPv6
+	destination cache entries.  At twice this value the system will
+	refuse new allocations.
+
+
+IPv6 Update by:
+Pekka Savola <pekkas@netcore.fi>
+YOSHIFUJI Hideaki / USAGI Project <yoshfuji@linux-ipv6.org>
+
+
+/proc/sys/net/bridge/* Variables:
+=================================
+
+bridge-nf-call-arptables - BOOLEAN
+	- 1 : pass bridged ARP traffic to arptables' FORWARD chain.
+	- 0 : disable this.
+
+	Default: 1
+
+bridge-nf-call-iptables - BOOLEAN
+	- 1 : pass bridged IPv4 traffic to iptables' chains.
+	- 0 : disable this.
+
+	Default: 1
+
+bridge-nf-call-ip6tables - BOOLEAN
+	- 1 : pass bridged IPv6 traffic to ip6tables' chains.
+	- 0 : disable this.
+
+	Default: 1
+
+bridge-nf-filter-vlan-tagged - BOOLEAN
+	- 1 : pass bridged vlan-tagged ARP/IP/IPv6 traffic to {arp,ip,ip6}tables.
+	- 0 : disable this.
+
+	Default: 0
+
+bridge-nf-filter-pppoe-tagged - BOOLEAN
+	- 1 : pass bridged pppoe-tagged IP/IPv6 traffic to {ip,ip6}tables.
+	- 0 : disable this.
+
+	Default: 0
+
+bridge-nf-pass-vlan-input-dev - BOOLEAN
+	- 1: if bridge-nf-filter-vlan-tagged is enabled, try to find a vlan
+	  interface on the bridge and set the netfilter input device to the
+	  vlan. This allows use of e.g. "iptables -i br0.1" and makes the
+	  REDIRECT target work with vlan-on-top-of-bridge interfaces.  When no
+	  matching vlan interface is found, or this switch is off, the input
+	  device is set to the bridge interface.
+
+	- 0: disable bridge netfilter vlan interface lookup.
+
+	Default: 0
+
+``proc/sys/net/sctp/*`` Variables:
+==================================
+
+addip_enable - BOOLEAN
+	Enable or disable extension of  Dynamic Address Reconfiguration
+	(ADD-IP) functionality specified in RFC5061.  This extension provides
+	the ability to dynamically add and remove new addresses for the SCTP
+	associations.
+
+	1: Enable extension.
+
+	0: Disable extension.
+
+	Default: 0
+
+pf_enable - INTEGER
+	Enable or disable pf (pf is short for potentially failed) state. A value
+	of pf_retrans > path_max_retrans also disables pf state. That is, one of
+	both pf_enable and pf_retrans > path_max_retrans can disable pf state.
+	Since pf_retrans and path_max_retrans can be changed by userspace
+	application, sometimes user expects to disable pf state by the value of
+	pf_retrans > path_max_retrans, but occasionally the value of pf_retrans
+	or path_max_retrans is changed by the user application, this pf state is
+	enabled. As such, it is necessary to add this to dynamically enable
+	and disable pf state. See:
+	https://datatracker.ietf.org/doc/draft-ietf-tsvwg-sctp-failover for
+	details.
+
+	1: Enable pf.
+
+	0: Disable pf.
+
+	Default: 1
+
+pf_expose - INTEGER
+	Unset or enable/disable pf (pf is short for potentially failed) state
+	exposure.  Applications can control the exposure of the PF path state
+	in the SCTP_PEER_ADDR_CHANGE event and the SCTP_GET_PEER_ADDR_INFO
+	sockopt.   When it's unset, no SCTP_PEER_ADDR_CHANGE event with
+	SCTP_ADDR_PF state will be sent and a SCTP_PF-state transport info
+	can be got via SCTP_GET_PEER_ADDR_INFO sockopt;  When it's enabled,
+	a SCTP_PEER_ADDR_CHANGE event will be sent for a transport becoming
+	SCTP_PF state and a SCTP_PF-state transport info can be got via
+	SCTP_GET_PEER_ADDR_INFO sockopt;  When it's diabled, no
+	SCTP_PEER_ADDR_CHANGE event will be sent and it returns -EACCES when
+	trying to get a SCTP_PF-state transport info via SCTP_GET_PEER_ADDR_INFO
+	sockopt.
+
+	0: Unset pf state exposure, Compatible with old applications.
+
+	1: Disable pf state exposure.
+
+	2: Enable pf state exposure.
+
+	Default: 0
+
+addip_noauth_enable - BOOLEAN
+	Dynamic Address Reconfiguration (ADD-IP) requires the use of
+	authentication to protect the operations of adding or removing new
+	addresses.  This requirement is mandated so that unauthorized hosts
+	would not be able to hijack associations.  However, older
+	implementations may not have implemented this requirement while
+	allowing the ADD-IP extension.  For reasons of interoperability,
+	we provide this variable to control the enforcement of the
+	authentication requirement.
+
+	== ===============================================================
+	1  Allow ADD-IP extension to be used without authentication.  This
+	   should only be set in a closed environment for interoperability
+	   with older implementations.
+
+	0  Enforce the authentication requirement
+	== ===============================================================
+
+	Default: 0
+
+auth_enable - BOOLEAN
+	Enable or disable Authenticated Chunks extension.  This extension
+	provides the ability to send and receive authenticated chunks and is
+	required for secure operation of Dynamic Address Reconfiguration
+	(ADD-IP) extension.
+
+	- 1: Enable this extension.
+	- 0: Disable this extension.
+
+	Default: 0
+
+prsctp_enable - BOOLEAN
+	Enable or disable the Partial Reliability extension (RFC3758) which
+	is used to notify peers that a given DATA should no longer be expected.
+
+	- 1: Enable extension
+	- 0: Disable
+
+	Default: 1
+
+max_burst - INTEGER
+	The limit of the number of new packets that can be initially sent.  It
+	controls how bursty the generated traffic can be.
+
+	Default: 4
+
+association_max_retrans - INTEGER
+	Set the maximum number for retransmissions that an association can
+	attempt deciding that the remote end is unreachable.  If this value
+	is exceeded, the association is terminated.
+
+	Default: 10
+
+max_init_retransmits - INTEGER
+	The maximum number of retransmissions of INIT and COOKIE-ECHO chunks
+	that an association will attempt before declaring the destination
+	unreachable and terminating.
+
+	Default: 8
+
+path_max_retrans - INTEGER
+	The maximum number of retransmissions that will be attempted on a given
+	path.  Once this threshold is exceeded, the path is considered
+	unreachable, and new traffic will use a different path when the
+	association is multihomed.
+
+	Default: 5
+
+pf_retrans - INTEGER
+	The number of retransmissions that will be attempted on a given path
+	before traffic is redirected to an alternate transport (should one
+	exist).  Note this is distinct from path_max_retrans, as a path that
+	passes the pf_retrans threshold can still be used.  Its only
+	deprioritized when a transmission path is selected by the stack.  This
+	setting is primarily used to enable fast failover mechanisms without
+	having to reduce path_max_retrans to a very low value.  See:
+	http://www.ietf.org/id/draft-nishida-tsvwg-sctp-failover-05.txt
+	for details.  Note also that a value of pf_retrans > path_max_retrans
+	disables this feature. Since both pf_retrans and path_max_retrans can
+	be changed by userspace application, a variable pf_enable is used to
+	disable pf state.
+
+	Default: 0
+
+ps_retrans - INTEGER
+	Primary.Switchover.Max.Retrans (PSMR), it's a tunable parameter coming
+	from section-5 "Primary Path Switchover" in rfc7829.  The primary path
+	will be changed to another active path when the path error counter on
+	the old primary path exceeds PSMR, so that "the SCTP sender is allowed
+	to continue data transmission on a new working path even when the old
+	primary destination address becomes active again".   Note this feature
+	is disabled by initializing 'ps_retrans' per netns as 0xffff by default,
+	and its value can't be less than 'pf_retrans' when changing by sysctl.
+
+	Default: 0xffff
+
+rto_initial - INTEGER
+	The initial round trip timeout value in milliseconds that will be used
+	in calculating round trip times.  This is the initial time interval
+	for retransmissions.
+
+	Default: 3000
+
+rto_max - INTEGER
+	The maximum value (in milliseconds) of the round trip timeout.  This
+	is the largest time interval that can elapse between retransmissions.
+
+	Default: 60000
+
+rto_min - INTEGER
+	The minimum value (in milliseconds) of the round trip timeout.  This
+	is the smallest time interval the can elapse between retransmissions.
+
+	Default: 1000
+
+hb_interval - INTEGER
+	The interval (in milliseconds) between HEARTBEAT chunks.  These chunks
+	are sent at the specified interval on idle paths to probe the state of
+	a given path between 2 associations.
+
+	Default: 30000
+
+sack_timeout - INTEGER
+	The amount of time (in milliseconds) that the implementation will wait
+	to send a SACK.
+
+	Default: 200
+
+valid_cookie_life - INTEGER
+	The default lifetime of the SCTP cookie (in milliseconds).  The cookie
+	is used during association establishment.
+
+	Default: 60000
+
+cookie_preserve_enable - BOOLEAN
+	Enable or disable the ability to extend the lifetime of the SCTP cookie
+	that is used during the establishment phase of SCTP association
+
+	- 1: Enable cookie lifetime extension.
+	- 0: Disable
+
+	Default: 1
+
+cookie_hmac_alg - STRING
+	Select the hmac algorithm used when generating the cookie value sent by
+	a listening sctp socket to a connecting client in the INIT-ACK chunk.
+	Valid values are:
+
+	* md5
+	* sha1
+	* none
+
+	Ability to assign md5 or sha1 as the selected alg is predicated on the
+	configuration of those algorithms at build time (CONFIG_CRYPTO_MD5 and
+	CONFIG_CRYPTO_SHA1).
+
+	Default: Dependent on configuration.  MD5 if available, else SHA1 if
+	available, else none.
+
+rcvbuf_policy - INTEGER
+	Determines if the receive buffer is attributed to the socket or to
+	association.   SCTP supports the capability to create multiple
+	associations on a single socket.  When using this capability, it is
+	possible that a single stalled association that's buffering a lot
+	of data may block other associations from delivering their data by
+	consuming all of the receive buffer space.  To work around this,
+	the rcvbuf_policy could be set to attribute the receiver buffer space
+	to each association instead of the socket.  This prevents the described
+	blocking.
+
+	- 1: rcvbuf space is per association
+	- 0: rcvbuf space is per socket
+
+	Default: 0
+
+sndbuf_policy - INTEGER
+	Similar to rcvbuf_policy above, this applies to send buffer space.
+
+	- 1: Send buffer is tracked per association
+	- 0: Send buffer is tracked per socket.
+
+	Default: 0
+
+sctp_mem - vector of 3 INTEGERs: min, pressure, max
+	Number of pages allowed for queueing by all SCTP sockets.
+
+	min: Below this number of pages SCTP is not bothered about its
+	memory appetite. When amount of memory allocated by SCTP exceeds
+	this number, SCTP starts to moderate memory usage.
+
+	pressure: This value was introduced to follow format of tcp_mem.
+
+	max: Number of pages allowed for queueing by all SCTP sockets.
+
+	Default is calculated at boot time from amount of available memory.
+
+sctp_rmem - vector of 3 INTEGERs: min, default, max
+	Only the first value ("min") is used, "default" and "max" are
+	ignored.
+
+	min: Minimal size of receive buffer used by SCTP socket.
+	It is guaranteed to each SCTP socket (but not association) even
+	under moderate memory pressure.
+
+	Default: 4K
+
+sctp_wmem  - vector of 3 INTEGERs: min, default, max
+	Currently this tunable has no effect.
+
+addr_scope_policy - INTEGER
+	Control IPv4 address scoping - draft-stewart-tsvwg-sctp-ipv4-00
+
+	- 0   - Disable IPv4 address scoping
+	- 1   - Enable IPv4 address scoping
+	- 2   - Follow draft but allow IPv4 private addresses
+	- 3   - Follow draft but allow IPv4 link local addresses
+
+	Default: 1
+
+
+``/proc/sys/net/core/*``
+========================
+
+	Please see: Documentation/admin-guide/sysctl/net.rst for descriptions of these entries.
+
+
+``/proc/sys/net/unix/*``
+========================
+
+max_dgram_qlen - INTEGER
+	The maximum length of dgram socket receive queue
+
+	Default: 10
+
diff --git a/Documentation/networking/ip-sysctl.txt b/Documentation/networking/ip-sysctl.txt
deleted file mode 100644
index 5cdc37c34830..000000000000
--- a/Documentation/networking/ip-sysctl.txt
+++ /dev/null
@@ -1,2374 +0,0 @@
-/proc/sys/net/ipv4/* Variables:
-
-ip_forward - BOOLEAN
-	0 - disabled (default)
-	not 0 - enabled
-
-	Forward Packets between interfaces.
-
-	This variable is special, its change resets all configuration
-	parameters to their default state (RFC1122 for hosts, RFC1812
-	for routers)
-
-ip_default_ttl - INTEGER
-	Default value of TTL field (Time To Live) for outgoing (but not
-	forwarded) IP packets. Should be between 1 and 255 inclusive.
-	Default: 64 (as recommended by RFC1700)
-
-ip_no_pmtu_disc - INTEGER
-	Disable Path MTU Discovery. If enabled in mode 1 and a
-	fragmentation-required ICMP is received, the PMTU to this
-	destination will be set to min_pmtu (see below). You will need
-	to raise min_pmtu to the smallest interface MTU on your system
-	manually if you want to avoid locally generated fragments.
-
-	In mode 2 incoming Path MTU Discovery messages will be
-	discarded. Outgoing frames are handled the same as in mode 1,
-	implicitly setting IP_PMTUDISC_DONT on every created socket.
-
-	Mode 3 is a hardened pmtu discover mode. The kernel will only
-	accept fragmentation-needed errors if the underlying protocol
-	can verify them besides a plain socket lookup. Current
-	protocols for which pmtu events will be honored are TCP, SCTP
-	and DCCP as they verify e.g. the sequence number or the
-	association. This mode should not be enabled globally but is
-	only intended to secure e.g. name servers in namespaces where
-	TCP path mtu must still work but path MTU information of other
-	protocols should be discarded. If enabled globally this mode
-	could break other protocols.
-
-	Possible values: 0-3
-	Default: FALSE
-
-min_pmtu - INTEGER
-	default 552 - minimum discovered Path MTU
-
-ip_forward_use_pmtu - BOOLEAN
-	By default we don't trust protocol path MTUs while forwarding
-	because they could be easily forged and can lead to unwanted
-	fragmentation by the router.
-	You only need to enable this if you have user-space software
-	which tries to discover path mtus by itself and depends on the
-	kernel honoring this information. This is normally not the
-	case.
-	Default: 0 (disabled)
-	Possible values:
-	0 - disabled
-	1 - enabled
-
-fwmark_reflect - BOOLEAN
-	Controls the fwmark of kernel-generated IPv4 reply packets that are not
-	associated with a socket for example, TCP RSTs or ICMP echo replies).
-	If unset, these packets have a fwmark of zero. If set, they have the
-	fwmark of the packet they are replying to.
-	Default: 0
-
-fib_multipath_use_neigh - BOOLEAN
-	Use status of existing neighbor entry when determining nexthop for
-	multipath routes. If disabled, neighbor information is not used and
-	packets could be directed to a failed nexthop. Only valid for kernels
-	built with CONFIG_IP_ROUTE_MULTIPATH enabled.
-	Default: 0 (disabled)
-	Possible values:
-	0 - disabled
-	1 - enabled
-
-fib_multipath_hash_policy - INTEGER
-	Controls which hash policy to use for multipath routes. Only valid
-	for kernels built with CONFIG_IP_ROUTE_MULTIPATH enabled.
-	Default: 0 (Layer 3)
-	Possible values:
-	0 - Layer 3
-	1 - Layer 4
-	2 - Layer 3 or inner Layer 3 if present
-
-fib_sync_mem - UNSIGNED INTEGER
-	Amount of dirty memory from fib entries that can be backlogged before
-	synchronize_rcu is forced.
-	  Default: 512kB   Minimum: 64kB   Maximum: 64MB
-
-ip_forward_update_priority - INTEGER
-	Whether to update SKB priority from "TOS" field in IPv4 header after it
-	is forwarded. The new SKB priority is mapped from TOS field value
-	according to an rt_tos2priority table (see e.g. man tc-prio).
-	Default: 1 (Update priority.)
-	Possible values:
-	0 - Do not update priority.
-	1 - Update priority.
-
-route/max_size - INTEGER
-	Maximum number of routes allowed in the kernel.  Increase
-	this when using large numbers of interfaces and/or routes.
-	From linux kernel 3.6 onwards, this is deprecated for ipv4
-	as route cache is no longer used.
-
-neigh/default/gc_thresh1 - INTEGER
-	Minimum number of entries to keep.  Garbage collector will not
-	purge entries if there are fewer than this number.
-	Default: 128
-
-neigh/default/gc_thresh2 - INTEGER
-	Threshold when garbage collector becomes more aggressive about
-	purging entries. Entries older than 5 seconds will be cleared
-	when over this number.
-	Default: 512
-
-neigh/default/gc_thresh3 - INTEGER
-	Maximum number of non-PERMANENT neighbor entries allowed.  Increase
-	this when using large numbers of interfaces and when communicating
-	with large numbers of directly-connected peers.
-	Default: 1024
-
-neigh/default/unres_qlen_bytes - INTEGER
-	The maximum number of bytes which may be used by packets
-	queued for each	unresolved address by other network layers.
-	(added in linux 3.3)
-	Setting negative value is meaningless and will return error.
-	Default: SK_WMEM_MAX, (same as net.core.wmem_default).
-		Exact value depends on architecture and kernel options,
-		but should be enough to allow queuing 256 packets
-		of medium size.
-
-neigh/default/unres_qlen - INTEGER
-	The maximum number of packets which may be queued for each
-	unresolved address by other network layers.
-	(deprecated in linux 3.3) : use unres_qlen_bytes instead.
-	Prior to linux 3.3, the default value is 3 which may cause
-	unexpected packet loss. The current default value is calculated
-	according to default value of unres_qlen_bytes and true size of
-	packet.
-	Default: 101
-
-mtu_expires - INTEGER
-	Time, in seconds, that cached PMTU information is kept.
-
-min_adv_mss - INTEGER
-	The advertised MSS depends on the first hop route MTU, but will
-	never be lower than this setting.
-
-IP Fragmentation:
-
-ipfrag_high_thresh - LONG INTEGER
-	Maximum memory used to reassemble IP fragments.
-
-ipfrag_low_thresh - LONG INTEGER
-	(Obsolete since linux-4.17)
-	Maximum memory used to reassemble IP fragments before the kernel
-	begins to remove incomplete fragment queues to free up resources.
-	The kernel still accepts new fragments for defragmentation.
-
-ipfrag_time - INTEGER
-	Time in seconds to keep an IP fragment in memory.
-
-ipfrag_max_dist - INTEGER
-	ipfrag_max_dist is a non-negative integer value which defines the
-	maximum "disorder" which is allowed among fragments which share a
-	common IP source address. Note that reordering of packets is
-	not unusual, but if a large number of fragments arrive from a source
-	IP address while a particular fragment queue remains incomplete, it
-	probably indicates that one or more fragments belonging to that queue
-	have been lost. When ipfrag_max_dist is positive, an additional check
-	is done on fragments before they are added to a reassembly queue - if
-	ipfrag_max_dist (or more) fragments have arrived from a particular IP
-	address between additions to any IP fragment queue using that source
-	address, it's presumed that one or more fragments in the queue are
-	lost. The existing fragment queue will be dropped, and a new one
-	started. An ipfrag_max_dist value of zero disables this check.
-
-	Using a very small value, e.g. 1 or 2, for ipfrag_max_dist can
-	result in unnecessarily dropping fragment queues when normal
-	reordering of packets occurs, which could lead to poor application
-	performance. Using a very large value, e.g. 50000, increases the
-	likelihood of incorrectly reassembling IP fragments that originate
-	from different IP datagrams, which could result in data corruption.
-	Default: 64
-
-INET peer storage:
-
-inet_peer_threshold - INTEGER
-	The approximate size of the storage.  Starting from this threshold
-	entries will be thrown aggressively.  This threshold also determines
-	entries' time-to-live and time intervals between garbage collection
-	passes.  More entries, less time-to-live, less GC interval.
-
-inet_peer_minttl - INTEGER
-	Minimum time-to-live of entries.  Should be enough to cover fragment
-	time-to-live on the reassembling side.  This minimum time-to-live  is
-	guaranteed if the pool size is less than inet_peer_threshold.
-	Measured in seconds.
-
-inet_peer_maxttl - INTEGER
-	Maximum time-to-live of entries.  Unused entries will expire after
-	this period of time if there is no memory pressure on the pool (i.e.
-	when the number of entries in the pool is very small).
-	Measured in seconds.
-
-TCP variables:
-
-somaxconn - INTEGER
-	Limit of socket listen() backlog, known in userspace as SOMAXCONN.
-	Defaults to 4096. (Was 128 before linux-5.4)
-	See also tcp_max_syn_backlog for additional tuning for TCP sockets.
-
-tcp_abort_on_overflow - BOOLEAN
-	If listening service is too slow to accept new connections,
-	reset them. Default state is FALSE. It means that if overflow
-	occurred due to a burst, connection will recover. Enable this
-	option _only_ if you are really sure that listening daemon
-	cannot be tuned to accept connections faster. Enabling this
-	option can harm clients of your server.
-
-tcp_adv_win_scale - INTEGER
-	Count buffering overhead as bytes/2^tcp_adv_win_scale
-	(if tcp_adv_win_scale > 0) or bytes-bytes/2^(-tcp_adv_win_scale),
-	if it is <= 0.
-	Possible values are [-31, 31], inclusive.
-	Default: 1
-
-tcp_allowed_congestion_control - STRING
-	Show/set the congestion control choices available to non-privileged
-	processes. The list is a subset of those listed in
-	tcp_available_congestion_control.
-	Default is "reno" and the default setting (tcp_congestion_control).
-
-tcp_app_win - INTEGER
-	Reserve max(window/2^tcp_app_win, mss) of window for application
-	buffer. Value 0 is special, it means that nothing is reserved.
-	Default: 31
-
-tcp_autocorking - BOOLEAN
-	Enable TCP auto corking :
-	When applications do consecutive small write()/sendmsg() system calls,
-	we try to coalesce these small writes as much as possible, to lower
-	total amount of sent packets. This is done if at least one prior
-	packet for the flow is waiting in Qdisc queues or device transmit
-	queue. Applications can still use TCP_CORK for optimal behavior
-	when they know how/when to uncork their sockets.
-	Default : 1
-
-tcp_available_congestion_control - STRING
-	Shows the available congestion control choices that are registered.
-	More congestion control algorithms may be available as modules,
-	but not loaded.
-
-tcp_base_mss - INTEGER
-	The initial value of search_low to be used by the packetization layer
-	Path MTU discovery (MTU probing).  If MTU probing is enabled,
-	this is the initial MSS used by the connection.
-
-tcp_mtu_probe_floor - INTEGER
-	If MTU probing is enabled this caps the minimum MSS used for search_low
-	for the connection.
-
-	Default : 48
-
-tcp_min_snd_mss - INTEGER
-	TCP SYN and SYNACK messages usually advertise an ADVMSS option,
-	as described in RFC 1122 and RFC 6691.
-	If this ADVMSS option is smaller than tcp_min_snd_mss,
-	it is silently capped to tcp_min_snd_mss.
-
-	Default : 48 (at least 8 bytes of payload per segment)
-
-tcp_congestion_control - STRING
-	Set the congestion control algorithm to be used for new
-	connections. The algorithm "reno" is always available, but
-	additional choices may be available based on kernel configuration.
-	Default is set as part of kernel configuration.
-	For passive connections, the listener congestion control choice
-	is inherited.
-	[see setsockopt(listenfd, SOL_TCP, TCP_CONGESTION, "name" ...) ]
-
-tcp_dsack - BOOLEAN
-	Allows TCP to send "duplicate" SACKs.
-
-tcp_early_retrans - INTEGER
-	Tail loss probe (TLP) converts RTOs occurring due to tail
-	losses into fast recovery (draft-ietf-tcpm-rack). Note that
-	TLP requires RACK to function properly (see tcp_recovery below)
-	Possible values:
-		0 disables TLP
-		3 or 4 enables TLP
-	Default: 3
-
-tcp_ecn - INTEGER
-	Control use of Explicit Congestion Notification (ECN) by TCP.
-	ECN is used only when both ends of the TCP connection indicate
-	support for it.  This feature is useful in avoiding losses due
-	to congestion by allowing supporting routers to signal
-	congestion before having to drop packets.
-	Possible values are:
-		0 Disable ECN.  Neither initiate nor accept ECN.
-		1 Enable ECN when requested by incoming connections and
-		  also request ECN on outgoing connection attempts.
-		2 Enable ECN when requested by incoming connections
-		  but do not request ECN on outgoing connections.
-	Default: 2
-
-tcp_ecn_fallback - BOOLEAN
-	If the kernel detects that ECN connection misbehaves, enable fall
-	back to non-ECN. Currently, this knob implements the fallback
-	from RFC3168, section 6.1.1.1., but we reserve that in future,
-	additional detection mechanisms could be implemented under this
-	knob. The value	is not used, if tcp_ecn or per route (or congestion
-	control) ECN settings are disabled.
-	Default: 1 (fallback enabled)
-
-tcp_fack - BOOLEAN
-	This is a legacy option, it has no effect anymore.
-
-tcp_fin_timeout - INTEGER
-	The length of time an orphaned (no longer referenced by any
-	application) connection will remain in the FIN_WAIT_2 state
-	before it is aborted at the local end.  While a perfectly
-	valid "receive only" state for an un-orphaned connection, an
-	orphaned connection in FIN_WAIT_2 state could otherwise wait
-	forever for the remote to close its end of the connection.
-	Cf. tcp_max_orphans
-	Default: 60 seconds
-
-tcp_frto - INTEGER
-	Enables Forward RTO-Recovery (F-RTO) defined in RFC5682.
-	F-RTO is an enhanced recovery algorithm for TCP retransmission
-	timeouts.  It is particularly beneficial in networks where the
-	RTT fluctuates (e.g., wireless). F-RTO is sender-side only
-	modification. It does not require any support from the peer.
-
-	By default it's enabled with a non-zero value. 0 disables F-RTO.
-
-tcp_fwmark_accept - BOOLEAN
-	If set, incoming connections to listening sockets that do not have a
-	socket mark will set the mark of the accepting socket to the fwmark of
-	the incoming SYN packet. This will cause all packets on that connection
-	(starting from the first SYNACK) to be sent with that fwmark. The
-	listening socket's mark is unchanged. Listening sockets that already
-	have a fwmark set via setsockopt(SOL_SOCKET, SO_MARK, ...) are
-	unaffected.
-
-	Default: 0
-
-tcp_invalid_ratelimit - INTEGER
-	Limit the maximal rate for sending duplicate acknowledgments
-	in response to incoming TCP packets that are for an existing
-	connection but that are invalid due to any of these reasons:
-
-	  (a) out-of-window sequence number,
-	  (b) out-of-window acknowledgment number, or
-	  (c) PAWS (Protection Against Wrapped Sequence numbers) check failure
-
-	This can help mitigate simple "ack loop" DoS attacks, wherein
-	a buggy or malicious middlebox or man-in-the-middle can
-	rewrite TCP header fields in manner that causes each endpoint
-	to think that the other is sending invalid TCP segments, thus
-	causing each side to send an unterminating stream of duplicate
-	acknowledgments for invalid segments.
-
-	Using 0 disables rate-limiting of dupacks in response to
-	invalid segments; otherwise this value specifies the minimal
-	space between sending such dupacks, in milliseconds.
-
-	Default: 500 (milliseconds).
-
-tcp_keepalive_time - INTEGER
-	How often TCP sends out keepalive messages when keepalive is enabled.
-	Default: 2hours.
-
-tcp_keepalive_probes - INTEGER
-	How many keepalive probes TCP sends out, until it decides that the
-	connection is broken. Default value: 9.
-
-tcp_keepalive_intvl - INTEGER
-	How frequently the probes are send out. Multiplied by
-	tcp_keepalive_probes it is time to kill not responding connection,
-	after probes started. Default value: 75sec i.e. connection
-	will be aborted after ~11 minutes of retries.
-
-tcp_l3mdev_accept - BOOLEAN
-	Enables child sockets to inherit the L3 master device index.
-	Enabling this option allows a "global" listen socket to work
-	across L3 master domains (e.g., VRFs) with connected sockets
-	derived from the listen socket to be bound to the L3 domain in
-	which the packets originated. Only valid when the kernel was
-	compiled with CONFIG_NET_L3_MASTER_DEV.
-        Default: 0 (disabled)
-
-tcp_low_latency - BOOLEAN
-	This is a legacy option, it has no effect anymore.
-
-tcp_max_orphans - INTEGER
-	Maximal number of TCP sockets not attached to any user file handle,
-	held by system.	If this number is exceeded orphaned connections are
-	reset immediately and warning is printed. This limit exists
-	only to prevent simple DoS attacks, you _must_ not rely on this
-	or lower the limit artificially, but rather increase it
-	(probably, after increasing installed memory),
-	if network conditions require more than default value,
-	and tune network services to linger and kill such states
-	more aggressively. Let me to remind again: each orphan eats
-	up to ~64K of unswappable memory.
-
-tcp_max_syn_backlog - INTEGER
-	Maximal number of remembered connection requests (SYN_RECV),
-	which have not received an acknowledgment from connecting client.
-	This is a per-listener limit.
-	The minimal value is 128 for low memory machines, and it will
-	increase in proportion to the memory of machine.
-	If server suffers from overload, try increasing this number.
-	Remember to also check /proc/sys/net/core/somaxconn
-	A SYN_RECV request socket consumes about 304 bytes of memory.
-
-tcp_max_tw_buckets - INTEGER
-	Maximal number of timewait sockets held by system simultaneously.
-	If this number is exceeded time-wait socket is immediately destroyed
-	and warning is printed. This limit exists only to prevent
-	simple DoS attacks, you _must_ not lower the limit artificially,
-	but rather increase it (probably, after increasing installed memory),
-	if network conditions require more than default value.
-
-tcp_mem - vector of 3 INTEGERs: min, pressure, max
-	min: below this number of pages TCP is not bothered about its
-	memory appetite.
-
-	pressure: when amount of memory allocated by TCP exceeds this number
-	of pages, TCP moderates its memory consumption and enters memory
-	pressure mode, which is exited when memory consumption falls
-	under "min".
-
-	max: number of pages allowed for queueing by all TCP sockets.
-
-	Defaults are calculated at boot time from amount of available
-	memory.
-
-tcp_min_rtt_wlen - INTEGER
-	The window length of the windowed min filter to track the minimum RTT.
-	A shorter window lets a flow more quickly pick up new (higher)
-	minimum RTT when it is moved to a longer path (e.g., due to traffic
-	engineering). A longer window makes the filter more resistant to RTT
-	inflations such as transient congestion. The unit is seconds.
-	Possible values: 0 - 86400 (1 day)
-	Default: 300
-
-tcp_moderate_rcvbuf - BOOLEAN
-	If set, TCP performs receive buffer auto-tuning, attempting to
-	automatically size the buffer (no greater than tcp_rmem[2]) to
-	match the size required by the path for full throughput.  Enabled by
-	default.
-
-tcp_mtu_probing - INTEGER
-	Controls TCP Packetization-Layer Path MTU Discovery.  Takes three
-	values:
-	  0 - Disabled
-	  1 - Disabled by default, enabled when an ICMP black hole detected
-	  2 - Always enabled, use initial MSS of tcp_base_mss.
-
-tcp_probe_interval - UNSIGNED INTEGER
-	Controls how often to start TCP Packetization-Layer Path MTU
-	Discovery reprobe. The default is reprobing every 10 minutes as
-	per RFC4821.
-
-tcp_probe_threshold - INTEGER
-	Controls when TCP Packetization-Layer Path MTU Discovery probing
-	will stop in respect to the width of search range in bytes. Default
-	is 8 bytes.
-
-tcp_no_metrics_save - BOOLEAN
-	By default, TCP saves various connection metrics in the route cache
-	when the connection closes, so that connections established in the
-	near future can use these to set initial conditions.  Usually, this
-	increases overall performance, but may sometimes cause performance
-	degradation.  If set, TCP will not cache metrics on closing
-	connections.
-
-tcp_no_ssthresh_metrics_save - BOOLEAN
-	Controls whether TCP saves ssthresh metrics in the route cache.
-	Default is 1, which disables ssthresh metrics.
-
-tcp_orphan_retries - INTEGER
-	This value influences the timeout of a locally closed TCP connection,
-	when RTO retransmissions remain unacknowledged.
-	See tcp_retries2 for more details.
-
-	The default value is 8.
-	If your machine is a loaded WEB server,
-	you should think about lowering this value, such sockets
-	may consume significant resources. Cf. tcp_max_orphans.
-
-tcp_recovery - INTEGER
-	This value is a bitmap to enable various experimental loss recovery
-	features.
-
-	RACK: 0x1 enables the RACK loss detection for fast detection of lost
-	      retransmissions and tail drops. It also subsumes and disables
-	      RFC6675 recovery for SACK connections.
-	RACK: 0x2 makes RACK's reordering window static (min_rtt/4).
-	RACK: 0x4 disables RACK's DUPACK threshold heuristic
-
-	Default: 0x1
-
-tcp_reordering - INTEGER
-	Initial reordering level of packets in a TCP stream.
-	TCP stack can then dynamically adjust flow reordering level
-	between this initial value and tcp_max_reordering
-	Default: 3
-
-tcp_max_reordering - INTEGER
-	Maximal reordering level of packets in a TCP stream.
-	300 is a fairly conservative value, but you might increase it
-	if paths are using per packet load balancing (like bonding rr mode)
-	Default: 300
-
-tcp_retrans_collapse - BOOLEAN
-	Bug-to-bug compatibility with some broken printers.
-	On retransmit try to send bigger packets to work around bugs in
-	certain TCP stacks.
-
-tcp_retries1 - INTEGER
-	This value influences the time, after which TCP decides, that
-	something is wrong due to unacknowledged RTO retransmissions,
-	and reports this suspicion to the network layer.
-	See tcp_retries2 for more details.
-
-	RFC 1122 recommends at least 3 retransmissions, which is the
-	default.
-
-tcp_retries2 - INTEGER
-	This value influences the timeout of an alive TCP connection,
-	when RTO retransmissions remain unacknowledged.
-	Given a value of N, a hypothetical TCP connection following
-	exponential backoff with an initial RTO of TCP_RTO_MIN would
-	retransmit N times before killing the connection at the (N+1)th RTO.
-
-	The default value of 15 yields a hypothetical timeout of 924.6
-	seconds and is a lower bound for the effective timeout.
-	TCP will effectively time out at the first RTO which exceeds the
-	hypothetical timeout.
-
-	RFC 1122 recommends at least 100 seconds for the timeout,
-	which corresponds to a value of at least 8.
-
-tcp_rfc1337 - BOOLEAN
-	If set, the TCP stack behaves conforming to RFC1337. If unset,
-	we are not conforming to RFC, but prevent TCP TIME_WAIT
-	assassination.
-	Default: 0
-
-tcp_rmem - vector of 3 INTEGERs: min, default, max
-	min: Minimal size of receive buffer used by TCP sockets.
-	It is guaranteed to each TCP socket, even under moderate memory
-	pressure.
-	Default: 4K
-
-	default: initial size of receive buffer used by TCP sockets.
-	This value overrides net.core.rmem_default used by other protocols.
-	Default: 87380 bytes. This value results in window of 65535 with
-	default setting of tcp_adv_win_scale and tcp_app_win:0 and a bit
-	less for default tcp_app_win. See below about these variables.
-
-	max: maximal size of receive buffer allowed for automatically
-	selected receiver buffers for TCP socket. This value does not override
-	net.core.rmem_max.  Calling setsockopt() with SO_RCVBUF disables
-	automatic tuning of that socket's receive buffer size, in which
-	case this value is ignored.
-	Default: between 87380B and 6MB, depending on RAM size.
-
-tcp_sack - BOOLEAN
-	Enable select acknowledgments (SACKS).
-
-tcp_comp_sack_delay_ns - LONG INTEGER
-	TCP tries to reduce number of SACK sent, using a timer
-	based on 5% of SRTT, capped by this sysctl, in nano seconds.
-	The default is 1ms, based on TSO autosizing period.
-
-	Default : 1,000,000 ns (1 ms)
-
-tcp_comp_sack_nr - INTEGER
-	Max number of SACK that can be compressed.
-	Using 0 disables SACK compression.
-
-	Default : 44
-
-tcp_slow_start_after_idle - BOOLEAN
-	If set, provide RFC2861 behavior and time out the congestion
-	window after an idle period.  An idle period is defined at
-	the current RTO.  If unset, the congestion window will not
-	be timed out after an idle period.
-	Default: 1
-
-tcp_stdurg - BOOLEAN
-	Use the Host requirements interpretation of the TCP urgent pointer field.
-	Most hosts use the older BSD interpretation, so if you turn this on
-	Linux might not communicate correctly with them.
-	Default: FALSE
-
-tcp_synack_retries - INTEGER
-	Number of times SYNACKs for a passive TCP connection attempt will
-	be retransmitted. Should not be higher than 255. Default value
-	is 5, which corresponds to 31seconds till the last retransmission
-	with the current initial RTO of 1second. With this the final timeout
-	for a passive TCP connection will happen after 63seconds.
-
-tcp_syncookies - INTEGER
-	Only valid when the kernel was compiled with CONFIG_SYN_COOKIES
-	Send out syncookies when the syn backlog queue of a socket
-	overflows. This is to prevent against the common 'SYN flood attack'
-	Default: 1
-
-	Note, that syncookies is fallback facility.
-	It MUST NOT be used to help highly loaded servers to stand
-	against legal connection rate. If you see SYN flood warnings
-	in your logs, but investigation	shows that they occur
-	because of overload with legal connections, you should tune
-	another parameters until this warning disappear.
-	See: tcp_max_syn_backlog, tcp_synack_retries, tcp_abort_on_overflow.
-
-	syncookies seriously violate TCP protocol, do not allow
-	to use TCP extensions, can result in serious degradation
-	of some services (f.e. SMTP relaying), visible not by you,
-	but your clients and relays, contacting you. While you see
-	SYN flood warnings in logs not being really flooded, your server
-	is seriously misconfigured.
-
-	If you want to test which effects syncookies have to your
-	network connections you can set this knob to 2 to enable
-	unconditionally generation of syncookies.
-
-tcp_fastopen - INTEGER
-	Enable TCP Fast Open (RFC7413) to send and accept data in the opening
-	SYN packet.
-
-	The client support is enabled by flag 0x1 (on by default). The client
-	then must use sendmsg() or sendto() with the MSG_FASTOPEN flag,
-	rather than connect() to send data in SYN.
-
-	The server support is enabled by flag 0x2 (off by default). Then
-	either enable for all listeners with another flag (0x400) or
-	enable individual listeners via TCP_FASTOPEN socket option with
-	the option value being the length of the syn-data backlog.
-
-	The values (bitmap) are
-	  0x1: (client) enables sending data in the opening SYN on the client.
-	  0x2: (server) enables the server support, i.e., allowing data in
-			a SYN packet to be accepted and passed to the
-			application before 3-way handshake finishes.
-	  0x4: (client) send data in the opening SYN regardless of cookie
-			availability and without a cookie option.
-	0x200: (server) accept data-in-SYN w/o any cookie option present.
-	0x400: (server) enable all listeners to support Fast Open by
-			default without explicit TCP_FASTOPEN socket option.
-
-	Default: 0x1
-
-	Note that that additional client or server features are only
-	effective if the basic support (0x1 and 0x2) are enabled respectively.
-
-tcp_fastopen_blackhole_timeout_sec - INTEGER
-	Initial time period in second to disable Fastopen on active TCP sockets
-	when a TFO firewall blackhole issue happens.
-	This time period will grow exponentially when more blackhole issues
-	get detected right after Fastopen is re-enabled and will reset to
-	initial value when the blackhole issue goes away.
-	0 to disable the blackhole detection.
-	By default, it is set to 1hr.
-
-tcp_fastopen_key - list of comma separated 32-digit hexadecimal INTEGERs
-	The list consists of a primary key and an optional backup key. The
-	primary key is used for both creating and validating cookies, while the
-	optional backup key is only used for validating cookies. The purpose of
-	the backup key is to maximize TFO validation when keys are rotated.
-
-	A randomly chosen primary key may be configured by the kernel if
-	the tcp_fastopen sysctl is set to 0x400 (see above), or if the
-	TCP_FASTOPEN setsockopt() optname is set and a key has not been
-	previously configured via sysctl. If keys are configured via
-	setsockopt() by using the TCP_FASTOPEN_KEY optname, then those
-	per-socket keys will be used instead of any keys that are specified via
-	sysctl.
-
-	A key is specified as 4 8-digit hexadecimal integers which are separated
-	by a '-' as: xxxxxxxx-xxxxxxxx-xxxxxxxx-xxxxxxxx. Leading zeros may be
-	omitted. A primary and a backup key may be specified by separating them
-	by a comma. If only one key is specified, it becomes the primary key and
-	any previously configured backup keys are removed.
-
-tcp_syn_retries - INTEGER
-	Number of times initial SYNs for an active TCP connection attempt
-	will be retransmitted. Should not be higher than 127. Default value
-	is 6, which corresponds to 63seconds till the last retransmission
-	with the current initial RTO of 1second. With this the final timeout
-	for an active TCP connection attempt will happen after 127seconds.
-
-tcp_timestamps - INTEGER
-Enable timestamps as defined in RFC1323.
-	0: Disabled.
-	1: Enable timestamps as defined in RFC1323 and use random offset for
-	each connection rather than only using the current time.
-	2: Like 1, but without random offsets.
-	Default: 1
-
-tcp_min_tso_segs - INTEGER
-	Minimal number of segments per TSO frame.
-	Since linux-3.12, TCP does an automatic sizing of TSO frames,
-	depending on flow rate, instead of filling 64Kbytes packets.
-	For specific usages, it's possible to force TCP to build big
-	TSO frames. Note that TCP stack might split too big TSO packets
-	if available window is too small.
-	Default: 2
-
-tcp_pacing_ss_ratio - INTEGER
-	sk->sk_pacing_rate is set by TCP stack using a ratio applied
-	to current rate. (current_rate = cwnd * mss / srtt)
-	If TCP is in slow start, tcp_pacing_ss_ratio is applied
-	to let TCP probe for bigger speeds, assuming cwnd can be
-	doubled every other RTT.
-	Default: 200
-
-tcp_pacing_ca_ratio - INTEGER
-	sk->sk_pacing_rate is set by TCP stack using a ratio applied
-	to current rate. (current_rate = cwnd * mss / srtt)
-	If TCP is in congestion avoidance phase, tcp_pacing_ca_ratio
-	is applied to conservatively probe for bigger throughput.
-	Default: 120
-
-tcp_tso_win_divisor - INTEGER
-	This allows control over what percentage of the congestion window
-	can be consumed by a single TSO frame.
-	The setting of this parameter is a choice between burstiness and
-	building larger TSO frames.
-	Default: 3
-
-tcp_tw_reuse - INTEGER
-	Enable reuse of TIME-WAIT sockets for new connections when it is
-	safe from protocol viewpoint.
-	0 - disable
-	1 - global enable
-	2 - enable for loopback traffic only
-	It should not be changed without advice/request of technical
-	experts.
-	Default: 2
-
-tcp_window_scaling - BOOLEAN
-	Enable window scaling as defined in RFC1323.
-
-tcp_wmem - vector of 3 INTEGERs: min, default, max
-	min: Amount of memory reserved for send buffers for TCP sockets.
-	Each TCP socket has rights to use it due to fact of its birth.
-	Default: 4K
-
-	default: initial size of send buffer used by TCP sockets.  This
-	value overrides net.core.wmem_default used by other protocols.
-	It is usually lower than net.core.wmem_default.
-	Default: 16K
-
-	max: Maximal amount of memory allowed for automatically tuned
-	send buffers for TCP sockets. This value does not override
-	net.core.wmem_max.  Calling setsockopt() with SO_SNDBUF disables
-	automatic tuning of that socket's send buffer size, in which case
-	this value is ignored.
-	Default: between 64K and 4MB, depending on RAM size.
-
-tcp_notsent_lowat - UNSIGNED INTEGER
-	A TCP socket can control the amount of unsent bytes in its write queue,
-	thanks to TCP_NOTSENT_LOWAT socket option. poll()/select()/epoll()
-	reports POLLOUT events if the amount of unsent bytes is below a per
-	socket value, and if the write queue is not full. sendmsg() will
-	also not add new buffers if the limit is hit.
-
-	This global variable controls the amount of unsent data for
-	sockets not using TCP_NOTSENT_LOWAT. For these sockets, a change
-	to the global variable has immediate effect.
-
-	Default: UINT_MAX (0xFFFFFFFF)
-
-tcp_workaround_signed_windows - BOOLEAN
-	If set, assume no receipt of a window scaling option means the
-	remote TCP is broken and treats the window as a signed quantity.
-	If unset, assume the remote TCP is not broken even if we do
-	not receive a window scaling option from them.
-	Default: 0
-
-tcp_thin_linear_timeouts - BOOLEAN
-	Enable dynamic triggering of linear timeouts for thin streams.
-	If set, a check is performed upon retransmission by timeout to
-	determine if the stream is thin (less than 4 packets in flight).
-	As long as the stream is found to be thin, up to 6 linear
-	timeouts may be performed before exponential backoff mode is
-	initiated. This improves retransmission latency for
-	non-aggressive thin streams, often found to be time-dependent.
-	For more information on thin streams, see
-	Documentation/networking/tcp-thin.txt
-	Default: 0
-
-tcp_limit_output_bytes - INTEGER
-	Controls TCP Small Queue limit per tcp socket.
-	TCP bulk sender tends to increase packets in flight until it
-	gets losses notifications. With SNDBUF autotuning, this can
-	result in a large amount of packets queued on the local machine
-	(e.g.: qdiscs, CPU backlog, or device) hurting latency of other
-	flows, for typical pfifo_fast qdiscs.  tcp_limit_output_bytes
-	limits the number of bytes on qdisc or device to reduce artificial
-	RTT/cwnd and reduce bufferbloat.
-	Default: 1048576 (16 * 65536)
-
-tcp_challenge_ack_limit - INTEGER
-	Limits number of Challenge ACK sent per second, as recommended
-	in RFC 5961 (Improving TCP's Robustness to Blind In-Window Attacks)
-	Default: 1000
-
-tcp_rx_skb_cache - BOOLEAN
-	Controls a per TCP socket cache of one skb, that might help
-	performance of some workloads. This might be dangerous
-	on systems with a lot of TCP sockets, since it increases
-	memory usage.
-
-	Default: 0 (disabled)
-
-UDP variables:
-
-udp_l3mdev_accept - BOOLEAN
-	Enabling this option allows a "global" bound socket to work
-	across L3 master domains (e.g., VRFs) with packets capable of
-	being received regardless of the L3 domain in which they
-	originated. Only valid when the kernel was compiled with
-	CONFIG_NET_L3_MASTER_DEV.
-        Default: 0 (disabled)
-
-udp_mem - vector of 3 INTEGERs: min, pressure, max
-	Number of pages allowed for queueing by all UDP sockets.
-
-	min: Below this number of pages UDP is not bothered about its
-	memory appetite. When amount of memory allocated by UDP exceeds
-	this number, UDP starts to moderate memory usage.
-
-	pressure: This value was introduced to follow format of tcp_mem.
-
-	max: Number of pages allowed for queueing by all UDP sockets.
-
-	Default is calculated at boot time from amount of available memory.
-
-udp_rmem_min - INTEGER
-	Minimal size of receive buffer used by UDP sockets in moderation.
-	Each UDP socket is able to use the size for receiving data, even if
-	total pages of UDP sockets exceed udp_mem pressure. The unit is byte.
-	Default: 4K
-
-udp_wmem_min - INTEGER
-	Minimal size of send buffer used by UDP sockets in moderation.
-	Each UDP socket is able to use the size for sending data, even if
-	total pages of UDP sockets exceed udp_mem pressure. The unit is byte.
-	Default: 4K
-
-RAW variables:
-
-raw_l3mdev_accept - BOOLEAN
-	Enabling this option allows a "global" bound socket to work
-	across L3 master domains (e.g., VRFs) with packets capable of
-	being received regardless of the L3 domain in which they
-	originated. Only valid when the kernel was compiled with
-	CONFIG_NET_L3_MASTER_DEV.
-	Default: 1 (enabled)
-
-CIPSOv4 Variables:
-
-cipso_cache_enable - BOOLEAN
-	If set, enable additions to and lookups from the CIPSO label mapping
-	cache.  If unset, additions are ignored and lookups always result in a
-	miss.  However, regardless of the setting the cache is still
-	invalidated when required when means you can safely toggle this on and
-	off and the cache will always be "safe".
-	Default: 1
-
-cipso_cache_bucket_size - INTEGER
-	The CIPSO label cache consists of a fixed size hash table with each
-	hash bucket containing a number of cache entries.  This variable limits
-	the number of entries in each hash bucket; the larger the value the
-	more CIPSO label mappings that can be cached.  When the number of
-	entries in a given hash bucket reaches this limit adding new entries
-	causes the oldest entry in the bucket to be removed to make room.
-	Default: 10
-
-cipso_rbm_optfmt - BOOLEAN
-	Enable the "Optimized Tag 1 Format" as defined in section 3.4.2.6 of
-	the CIPSO draft specification (see Documentation/netlabel for details).
-	This means that when set the CIPSO tag will be padded with empty
-	categories in order to make the packet data 32-bit aligned.
-	Default: 0
-
-cipso_rbm_structvalid - BOOLEAN
-	If set, do a very strict check of the CIPSO option when
-	ip_options_compile() is called.  If unset, relax the checks done during
-	ip_options_compile().  Either way is "safe" as errors are caught else
-	where in the CIPSO processing code but setting this to 0 (False) should
-	result in less work (i.e. it should be faster) but could cause problems
-	with other implementations that require strict checking.
-	Default: 0
-
-IP Variables:
-
-ip_local_port_range - 2 INTEGERS
-	Defines the local port range that is used by TCP and UDP to
-	choose the local port. The first number is the first, the
-	second the last local port number.
-	If possible, it is better these numbers have different parity
-	(one even and one odd value).
-	Must be greater than or equal to ip_unprivileged_port_start.
-	The default values are 32768 and 60999 respectively.
-
-ip_local_reserved_ports - list of comma separated ranges
-	Specify the ports which are reserved for known third-party
-	applications. These ports will not be used by automatic port
-	assignments (e.g. when calling connect() or bind() with port
-	number 0). Explicit port allocation behavior is unchanged.
-
-	The format used for both input and output is a comma separated
-	list of ranges (e.g. "1,2-4,10-10" for ports 1, 2, 3, 4 and
-	10). Writing to the file will clear all previously reserved
-	ports and update the current list with the one given in the
-	input.
-
-	Note that ip_local_port_range and ip_local_reserved_ports
-	settings are independent and both are considered by the kernel
-	when determining which ports are available for automatic port
-	assignments.
-
-	You can reserve ports which are not in the current
-	ip_local_port_range, e.g.:
-
-	$ cat /proc/sys/net/ipv4/ip_local_port_range
-	32000	60999
-	$ cat /proc/sys/net/ipv4/ip_local_reserved_ports
-	8080,9148
-
-	although this is redundant. However such a setting is useful
-	if later the port range is changed to a value that will
-	include the reserved ports.
-
-	Default: Empty
-
-ip_unprivileged_port_start - INTEGER
-	This is a per-namespace sysctl.  It defines the first
-	unprivileged port in the network namespace.  Privileged ports
-	require root or CAP_NET_BIND_SERVICE in order to bind to them.
-	To disable all privileged ports, set this to 0.  They must not
-	overlap with the ip_local_port_range.
-
-	Default: 1024
-
-ip_nonlocal_bind - BOOLEAN
-	If set, allows processes to bind() to non-local IP addresses,
-	which can be quite useful - but may break some applications.
-	Default: 0
-
-ip_autobind_reuse - BOOLEAN
-	By default, bind() does not select the ports automatically even if
-	the new socket and all sockets bound to the port have SO_REUSEADDR.
-	ip_autobind_reuse allows bind() to reuse the port and this is useful
-	when you use bind()+connect(), but may break some applications.
-	The preferred solution is to use IP_BIND_ADDRESS_NO_PORT and this
-	option should only be set by experts.
-	Default: 0
-
-ip_dynaddr - BOOLEAN
-	If set non-zero, enables support for dynamic addresses.
-	If set to a non-zero value larger than 1, a kernel log
-	message will be printed when dynamic address rewriting
-	occurs.
-	Default: 0
-
-ip_early_demux - BOOLEAN
-	Optimize input packet processing down to one demux for
-	certain kinds of local sockets.  Currently we only do this
-	for established TCP and connected UDP sockets.
-
-	It may add an additional cost for pure routing workloads that
-	reduces overall throughput, in such case you should disable it.
-	Default: 1
-
-ping_group_range - 2 INTEGERS
-	Restrict ICMP_PROTO datagram sockets to users in the group range.
-	The default is "1 0", meaning, that nobody (not even root) may
-	create ping sockets.  Setting it to "100 100" would grant permissions
-	to the single group. "0 4294967295" would enable it for the world, "100
-	4294967295" would enable it for the users, but not daemons.
-
-tcp_early_demux - BOOLEAN
-	Enable early demux for established TCP sockets.
-	Default: 1
-
-udp_early_demux - BOOLEAN
-	Enable early demux for connected UDP sockets. Disable this if
-	your system could experience more unconnected load.
-	Default: 1
-
-icmp_echo_ignore_all - BOOLEAN
-	If set non-zero, then the kernel will ignore all ICMP ECHO
-	requests sent to it.
-	Default: 0
-
-icmp_echo_ignore_broadcasts - BOOLEAN
-	If set non-zero, then the kernel will ignore all ICMP ECHO and
-	TIMESTAMP requests sent to it via broadcast/multicast.
-	Default: 1
-
-icmp_ratelimit - INTEGER
-	Limit the maximal rates for sending ICMP packets whose type matches
-	icmp_ratemask (see below) to specific targets.
-	0 to disable any limiting,
-	otherwise the minimal space between responses in milliseconds.
-	Note that another sysctl, icmp_msgs_per_sec limits the number
-	of ICMP packets	sent on all targets.
-	Default: 1000
-
-icmp_msgs_per_sec - INTEGER
-	Limit maximal number of ICMP packets sent per second from this host.
-	Only messages whose type matches icmp_ratemask (see below) are
-	controlled by this limit.
-	Default: 1000
-
-icmp_msgs_burst - INTEGER
-	icmp_msgs_per_sec controls number of ICMP packets sent per second,
-	while icmp_msgs_burst controls the burst size of these packets.
-	Default: 50
-
-icmp_ratemask - INTEGER
-	Mask made of ICMP types for which rates are being limited.
-	Significant bits: IHGFEDCBA9876543210
-	Default mask:     0000001100000011000 (6168)
-
-	Bit definitions (see include/linux/icmp.h):
-		0 Echo Reply
-		3 Destination Unreachable *
-		4 Source Quench *
-		5 Redirect
-		8 Echo Request
-		B Time Exceeded *
-		C Parameter Problem *
-		D Timestamp Request
-		E Timestamp Reply
-		F Info Request
-		G Info Reply
-		H Address Mask Request
-		I Address Mask Reply
-
-	* These are rate limited by default (see default mask above)
-
-icmp_ignore_bogus_error_responses - BOOLEAN
-	Some routers violate RFC1122 by sending bogus responses to broadcast
-	frames.  Such violations are normally logged via a kernel warning.
-	If this is set to TRUE, the kernel will not give such warnings, which
-	will avoid log file clutter.
-	Default: 1
-
-icmp_errors_use_inbound_ifaddr - BOOLEAN
-
-	If zero, icmp error messages are sent with the primary address of
-	the exiting interface.
-
-	If non-zero, the message will be sent with the primary address of
-	the interface that received the packet that caused the icmp error.
-	This is the behaviour network many administrators will expect from
-	a router. And it can make debugging complicated network layouts
-	much easier.
-
-	Note that if no primary address exists for the interface selected,
-	then the primary address of the first non-loopback interface that
-	has one will be used regardless of this setting.
-
-	Default: 0
-
-igmp_max_memberships - INTEGER
-	Change the maximum number of multicast groups we can subscribe to.
-	Default: 20
-
-	Theoretical maximum value is bounded by having to send a membership
-	report in a single datagram (i.e. the report can't span multiple
-	datagrams, or risk confusing the switch and leaving groups you don't
-	intend to).
-
-	The number of supported groups 'M' is bounded by the number of group
-	report entries you can fit into a single datagram of 65535 bytes.
-
-	M = 65536-sizeof (ip header)/(sizeof(Group record))
-
-	Group records are variable length, with a minimum of 12 bytes.
-	So net.ipv4.igmp_max_memberships should not be set higher than:
-
-	(65536-24) / 12 = 5459
-
-	The value 5459 assumes no IP header options, so in practice
-	this number may be lower.
-
-igmp_max_msf - INTEGER
-	Maximum number of addresses allowed in the source filter list for a
-	multicast group.
-	Default: 10
-
-igmp_qrv - INTEGER
-	Controls the IGMP query robustness variable (see RFC2236 8.1).
-	Default: 2 (as specified by RFC2236 8.1)
-	Minimum: 1 (as specified by RFC6636 4.5)
-
-force_igmp_version - INTEGER
-	0 - (default) No enforcement of a IGMP version, IGMPv1/v2 fallback
-	    allowed. Will back to IGMPv3 mode again if all IGMPv1/v2 Querier
-	    Present timer expires.
-	1 - Enforce to use IGMP version 1. Will also reply IGMPv1 report if
-	    receive IGMPv2/v3 query.
-	2 - Enforce to use IGMP version 2. Will fallback to IGMPv1 if receive
-	    IGMPv1 query message. Will reply report if receive IGMPv3 query.
-	3 - Enforce to use IGMP version 3. The same react with default 0.
-
-	Note: this is not the same with force_mld_version because IGMPv3 RFC3376
-	Security Considerations does not have clear description that we could
-	ignore other version messages completely as MLDv2 RFC3810. So make
-	this value as default 0 is recommended.
-
-conf/interface/*  changes special settings per interface (where
-"interface" is the name of your network interface)
-
-conf/all/*	  is special, changes the settings for all interfaces
-
-log_martians - BOOLEAN
-	Log packets with impossible addresses to kernel log.
-	log_martians for the interface will be enabled if at least one of
-	conf/{all,interface}/log_martians is set to TRUE,
-	it will be disabled otherwise
-
-accept_redirects - BOOLEAN
-	Accept ICMP redirect messages.
-	accept_redirects for the interface will be enabled if:
-	- both conf/{all,interface}/accept_redirects are TRUE in the case
-	  forwarding for the interface is enabled
-	or
-	- at least one of conf/{all,interface}/accept_redirects is TRUE in the
-	  case forwarding for the interface is disabled
-	accept_redirects for the interface will be disabled otherwise
-	default TRUE (host)
-		FALSE (router)
-
-forwarding - BOOLEAN
-	Enable IP forwarding on this interface.  This controls whether packets
-	received _on_ this interface can be forwarded.
-
-mc_forwarding - BOOLEAN
-	Do multicast routing. The kernel needs to be compiled with CONFIG_MROUTE
-	and a multicast routing daemon is required.
-	conf/all/mc_forwarding must also be set to TRUE to enable multicast
-	routing	for the interface
-
-medium_id - INTEGER
-	Integer value used to differentiate the devices by the medium they
-	are attached to. Two devices can have different id values when
-	the broadcast packets are received only on one of them.
-	The default value 0 means that the device is the only interface
-	to its medium, value of -1 means that medium is not known.
-
-	Currently, it is used to change the proxy_arp behavior:
-	the proxy_arp feature is enabled for packets forwarded between
-	two devices attached to different media.
-
-proxy_arp - BOOLEAN
-	Do proxy arp.
-	proxy_arp for the interface will be enabled if at least one of
-	conf/{all,interface}/proxy_arp is set to TRUE,
-	it will be disabled otherwise
-
-proxy_arp_pvlan - BOOLEAN
-	Private VLAN proxy arp.
-	Basically allow proxy arp replies back to the same interface
-	(from which the ARP request/solicitation was received).
-
-	This is done to support (ethernet) switch features, like RFC
-	3069, where the individual ports are NOT allowed to
-	communicate with each other, but they are allowed to talk to
-	the upstream router.  As described in RFC 3069, it is possible
-	to allow these hosts to communicate through the upstream
-	router by proxy_arp'ing. Don't need to be used together with
-	proxy_arp.
-
-	This technology is known by different names:
-	  In RFC 3069 it is called VLAN Aggregation.
-	  Cisco and Allied Telesyn call it Private VLAN.
-	  Hewlett-Packard call it Source-Port filtering or port-isolation.
-	  Ericsson call it MAC-Forced Forwarding (RFC Draft).
-
-shared_media - BOOLEAN
-	Send(router) or accept(host) RFC1620 shared media redirects.
-	Overrides secure_redirects.
-	shared_media for the interface will be enabled if at least one of
-	conf/{all,interface}/shared_media is set to TRUE,
-	it will be disabled otherwise
-	default TRUE
-
-secure_redirects - BOOLEAN
-	Accept ICMP redirect messages only to gateways listed in the
-	interface's current gateway list. Even if disabled, RFC1122 redirect
-	rules still apply.
-	Overridden by shared_media.
-	secure_redirects for the interface will be enabled if at least one of
-	conf/{all,interface}/secure_redirects is set to TRUE,
-	it will be disabled otherwise
-	default TRUE
-
-send_redirects - BOOLEAN
-	Send redirects, if router.
-	send_redirects for the interface will be enabled if at least one of
-	conf/{all,interface}/send_redirects is set to TRUE,
-	it will be disabled otherwise
-	Default: TRUE
-
-bootp_relay - BOOLEAN
-	Accept packets with source address 0.b.c.d destined
-	not to this host as local ones. It is supposed, that
-	BOOTP relay daemon will catch and forward such packets.
-	conf/all/bootp_relay must also be set to TRUE to enable BOOTP relay
-	for the interface
-	default FALSE
-	Not Implemented Yet.
-
-accept_source_route - BOOLEAN
-	Accept packets with SRR option.
-	conf/all/accept_source_route must also be set to TRUE to accept packets
-	with SRR option on the interface
-	default TRUE (router)
-		FALSE (host)
-
-accept_local - BOOLEAN
-	Accept packets with local source addresses. In combination with
-	suitable routing, this can be used to direct packets between two
-	local interfaces over the wire and have them accepted properly.
-	default FALSE
-
-route_localnet - BOOLEAN
-	Do not consider loopback addresses as martian source or destination
-	while routing. This enables the use of 127/8 for local routing purposes.
-	default FALSE
-
-rp_filter - INTEGER
-	0 - No source validation.
-	1 - Strict mode as defined in RFC3704 Strict Reverse Path
-	    Each incoming packet is tested against the FIB and if the interface
-	    is not the best reverse path the packet check will fail.
-	    By default failed packets are discarded.
-	2 - Loose mode as defined in RFC3704 Loose Reverse Path
-	    Each incoming packet's source address is also tested against the FIB
-	    and if the source address is not reachable via any interface
-	    the packet check will fail.
-
-	Current recommended practice in RFC3704 is to enable strict mode
-	to prevent IP spoofing from DDos attacks. If using asymmetric routing
-	or other complicated routing, then loose mode is recommended.
-
-	The max value from conf/{all,interface}/rp_filter is used
-	when doing source validation on the {interface}.
-
-	Default value is 0. Note that some distributions enable it
-	in startup scripts.
-
-arp_filter - BOOLEAN
-	1 - Allows you to have multiple network interfaces on the same
-	subnet, and have the ARPs for each interface be answered
-	based on whether or not the kernel would route a packet from
-	the ARP'd IP out that interface (therefore you must use source
-	based routing for this to work). In other words it allows control
-	of which cards (usually 1) will respond to an arp request.
-
-	0 - (default) The kernel can respond to arp requests with addresses
-	from other interfaces. This may seem wrong but it usually makes
-	sense, because it increases the chance of successful communication.
-	IP addresses are owned by the complete host on Linux, not by
-	particular interfaces. Only for more complex setups like load-
-	balancing, does this behaviour cause problems.
-
-	arp_filter for the interface will be enabled if at least one of
-	conf/{all,interface}/arp_filter is set to TRUE,
-	it will be disabled otherwise
-
-arp_announce - INTEGER
-	Define different restriction levels for announcing the local
-	source IP address from IP packets in ARP requests sent on
-	interface:
-	0 - (default) Use any local address, configured on any interface
-	1 - Try to avoid local addresses that are not in the target's
-	subnet for this interface. This mode is useful when target
-	hosts reachable via this interface require the source IP
-	address in ARP requests to be part of their logical network
-	configured on the receiving interface. When we generate the
-	request we will check all our subnets that include the
-	target IP and will preserve the source address if it is from
-	such subnet. If there is no such subnet we select source
-	address according to the rules for level 2.
-	2 - Always use the best local address for this target.
-	In this mode we ignore the source address in the IP packet
-	and try to select local address that we prefer for talks with
-	the target host. Such local address is selected by looking
-	for primary IP addresses on all our subnets on the outgoing
-	interface that include the target IP address. If no suitable
-	local address is found we select the first local address
-	we have on the outgoing interface or on all other interfaces,
-	with the hope we will receive reply for our request and
-	even sometimes no matter the source IP address we announce.
-
-	The max value from conf/{all,interface}/arp_announce is used.
-
-	Increasing the restriction level gives more chance for
-	receiving answer from the resolved target while decreasing
-	the level announces more valid sender's information.
-
-arp_ignore - INTEGER
-	Define different modes for sending replies in response to
-	received ARP requests that resolve local target IP addresses:
-	0 - (default): reply for any local target IP address, configured
-	on any interface
-	1 - reply only if the target IP address is local address
-	configured on the incoming interface
-	2 - reply only if the target IP address is local address
-	configured on the incoming interface and both with the
-	sender's IP address are part from same subnet on this interface
-	3 - do not reply for local addresses configured with scope host,
-	only resolutions for global and link addresses are replied
-	4-7 - reserved
-	8 - do not reply for all local addresses
-
-	The max value from conf/{all,interface}/arp_ignore is used
-	when ARP request is received on the {interface}
-
-arp_notify - BOOLEAN
-	Define mode for notification of address and device changes.
-	0 - (default): do nothing
-	1 - Generate gratuitous arp requests when device is brought up
-	    or hardware address changes.
-
-arp_accept - BOOLEAN
-	Define behavior for gratuitous ARP frames who's IP is not
-	already present in the ARP table:
-	0 - don't create new entries in the ARP table
-	1 - create new entries in the ARP table
-
-	Both replies and requests type gratuitous arp will trigger the
-	ARP table to be updated, if this setting is on.
-
-	If the ARP table already contains the IP address of the
-	gratuitous arp frame, the arp table will be updated regardless
-	if this setting is on or off.
-
-mcast_solicit - INTEGER
-	The maximum number of multicast probes in INCOMPLETE state,
-	when the associated hardware address is unknown.  Defaults
-	to 3.
-
-ucast_solicit - INTEGER
-	The maximum number of unicast probes in PROBE state, when
-	the hardware address is being reconfirmed.  Defaults to 3.
-
-app_solicit - INTEGER
-	The maximum number of probes to send to the user space ARP daemon
-	via netlink before dropping back to multicast probes (see
-	mcast_resolicit).  Defaults to 0.
-
-mcast_resolicit - INTEGER
-	The maximum number of multicast probes after unicast and
-	app probes in PROBE state.  Defaults to 0.
-
-disable_policy - BOOLEAN
-	Disable IPSEC policy (SPD) for this interface
-
-disable_xfrm - BOOLEAN
-	Disable IPSEC encryption on this interface, whatever the policy
-
-igmpv2_unsolicited_report_interval - INTEGER
-	The interval in milliseconds in which the next unsolicited
-	IGMPv1 or IGMPv2 report retransmit will take place.
-	Default: 10000 (10 seconds)
-
-igmpv3_unsolicited_report_interval - INTEGER
-	The interval in milliseconds in which the next unsolicited
-	IGMPv3 report retransmit will take place.
-	Default: 1000 (1 seconds)
-
-promote_secondaries - BOOLEAN
-	When a primary IP address is removed from this interface
-	promote a corresponding secondary IP address instead of
-	removing all the corresponding secondary IP addresses.
-
-drop_unicast_in_l2_multicast - BOOLEAN
-	Drop any unicast IP packets that are received in link-layer
-	multicast (or broadcast) frames.
-	This behavior (for multicast) is actually a SHOULD in RFC
-	1122, but is disabled by default for compatibility reasons.
-	Default: off (0)
-
-drop_gratuitous_arp - BOOLEAN
-	Drop all gratuitous ARP frames, for example if there's a known
-	good ARP proxy on the network and such frames need not be used
-	(or in the case of 802.11, must not be used to prevent attacks.)
-	Default: off (0)
-
-
-tag - INTEGER
-	Allows you to write a number, which can be used as required.
-	Default value is 0.
-
-xfrm4_gc_thresh - INTEGER
-	(Obsolete since linux-4.14)
-	The threshold at which we will start garbage collecting for IPv4
-	destination cache entries.  At twice this value the system will
-	refuse new allocations.
-
-igmp_link_local_mcast_reports - BOOLEAN
-	Enable IGMP reports for link local multicast groups in the
-	224.0.0.X range.
-	Default TRUE
-
-Alexey Kuznetsov.
-kuznet@ms2.inr.ac.ru
-
-Updated by:
-Andi Kleen
-ak@muc.de
-Nicolas Delon
-delon.nicolas@wanadoo.fr
-
-
-
-
-/proc/sys/net/ipv6/* Variables:
-
-IPv6 has no global variables such as tcp_*.  tcp_* settings under ipv4/ also
-apply to IPv6 [XXX?].
-
-bindv6only - BOOLEAN
-	Default value for IPV6_V6ONLY socket option,
-	which restricts use of the IPv6 socket to IPv6 communication
-	only.
-		TRUE: disable IPv4-mapped address feature
-		FALSE: enable IPv4-mapped address feature
-
-	Default: FALSE (as specified in RFC3493)
-
-flowlabel_consistency - BOOLEAN
-	Protect the consistency (and unicity) of flow label.
-	You have to disable it to use IPV6_FL_F_REFLECT flag on the
-	flow label manager.
-	TRUE: enabled
-	FALSE: disabled
-	Default: TRUE
-
-auto_flowlabels - INTEGER
-	Automatically generate flow labels based on a flow hash of the
-	packet. This allows intermediate devices, such as routers, to
-	identify packet flows for mechanisms like Equal Cost Multipath
-	Routing (see RFC 6438).
-	0: automatic flow labels are completely disabled
-	1: automatic flow labels are enabled by default, they can be
-	   disabled on a per socket basis using the IPV6_AUTOFLOWLABEL
-	   socket option
-	2: automatic flow labels are allowed, they may be enabled on a
-	   per socket basis using the IPV6_AUTOFLOWLABEL socket option
-	3: automatic flow labels are enabled and enforced, they cannot
-	   be disabled by the socket option
-	Default: 1
-
-flowlabel_state_ranges - BOOLEAN
-	Split the flow label number space into two ranges. 0-0x7FFFF is
-	reserved for the IPv6 flow manager facility, 0x80000-0xFFFFF
-	is reserved for stateless flow labels as described in RFC6437.
-	TRUE: enabled
-	FALSE: disabled
-	Default: true
-
-flowlabel_reflect - INTEGER
-	Control flow label reflection. Needed for Path MTU
-	Discovery to work with Equal Cost Multipath Routing in anycast
-	environments. See RFC 7690 and:
-	https://tools.ietf.org/html/draft-wang-6man-flow-label-reflection-01
-
-	This is a bitmask.
-	1: enabled for established flows
-
-	Note that this prevents automatic flowlabel changes, as done
-	in "tcp: change IPv6 flow-label upon receiving spurious retransmission"
-	and "tcp: Change txhash on every SYN and RTO retransmit"
-
-	2: enabled for TCP RESET packets (no active listener)
-	If set, a RST packet sent in response to a SYN packet on a closed
-	port will reflect the incoming flow label.
-
-	4: enabled for ICMPv6 echo reply messages.
-
-	Default: 0
-
-fib_multipath_hash_policy - INTEGER
-	Controls which hash policy to use for multipath routes.
-	Default: 0 (Layer 3)
-	Possible values:
-	0 - Layer 3 (source and destination addresses plus flow label)
-	1 - Layer 4 (standard 5-tuple)
-	2 - Layer 3 or inner Layer 3 if present
-
-anycast_src_echo_reply - BOOLEAN
-	Controls the use of anycast addresses as source addresses for ICMPv6
-	echo reply
-	TRUE:  enabled
-	FALSE: disabled
-	Default: FALSE
-
-idgen_delay - INTEGER
-	Controls the delay in seconds after which time to retry
-	privacy stable address generation if a DAD conflict is
-	detected.
-	Default: 1 (as specified in RFC7217)
-
-idgen_retries - INTEGER
-	Controls the number of retries to generate a stable privacy
-	address if a DAD conflict is detected.
-	Default: 3 (as specified in RFC7217)
-
-mld_qrv - INTEGER
-	Controls the MLD query robustness variable (see RFC3810 9.1).
-	Default: 2 (as specified by RFC3810 9.1)
-	Minimum: 1 (as specified by RFC6636 4.5)
-
-max_dst_opts_number - INTEGER
-	Maximum number of non-padding TLVs allowed in a Destination
-	options extension header. If this value is less than zero
-	then unknown options are disallowed and the number of known
-	TLVs allowed is the absolute value of this number.
-	Default: 8
-
-max_hbh_opts_number - INTEGER
-	Maximum number of non-padding TLVs allowed in a Hop-by-Hop
-	options extension header. If this value is less than zero
-	then unknown options are disallowed and the number of known
-	TLVs allowed is the absolute value of this number.
-	Default: 8
-
-max_dst_opts_length - INTEGER
-	Maximum length allowed for a Destination options extension
-	header.
-	Default: INT_MAX (unlimited)
-
-max_hbh_length - INTEGER
-	Maximum length allowed for a Hop-by-Hop options extension
-	header.
-	Default: INT_MAX (unlimited)
-
-skip_notify_on_dev_down - BOOLEAN
-	Controls whether an RTM_DELROUTE message is generated for routes
-	removed when a device is taken down or deleted. IPv4 does not
-	generate this message; IPv6 does by default. Setting this sysctl
-	to true skips the message, making IPv4 and IPv6 on par in relying
-	on userspace caches to track link events and evict routes.
-	Default: false (generate message)
-
-nexthop_compat_mode - BOOLEAN
-	New nexthop API provides a means for managing nexthops independent of
-	prefixes. Backwards compatibilty with old route format is enabled by
-	default which means route dumps and notifications contain the new
-	nexthop attribute but also the full, expanded nexthop definition.
-	Further, updates or deletes of a nexthop configuration generate route
-	notifications for each fib entry using the nexthop. Once a system
-	understands the new API, this sysctl can be disabled to achieve full
-	performance benefits of the new API by disabling the nexthop expansion
-	and extraneous notifications.
-	Default: true (backward compat mode)
-
-IPv6 Fragmentation:
-
-ip6frag_high_thresh - INTEGER
-	Maximum memory used to reassemble IPv6 fragments. When
-	ip6frag_high_thresh bytes of memory is allocated for this purpose,
-	the fragment handler will toss packets until ip6frag_low_thresh
-	is reached.
-
-ip6frag_low_thresh - INTEGER
-	See ip6frag_high_thresh
-
-ip6frag_time - INTEGER
-	Time in seconds to keep an IPv6 fragment in memory.
-
-IPv6 Segment Routing:
-
-seg6_flowlabel - INTEGER
-	Controls the behaviour of computing the flowlabel of outer
-	IPv6 header in case of SR T.encaps
-
-	-1 set flowlabel to zero.
-	0 copy flowlabel from Inner packet in case of Inner IPv6
-		(Set flowlabel to 0 in case IPv4/L2)
-	1 Compute the flowlabel using seg6_make_flowlabel()
-
-	Default is 0.
-
-conf/default/*:
-	Change the interface-specific default settings.
-
-
-conf/all/*:
-	Change all the interface-specific settings.
-
-	[XXX:  Other special features than forwarding?]
-
-conf/all/forwarding - BOOLEAN
-	Enable global IPv6 forwarding between all interfaces.
-
-	IPv4 and IPv6 work differently here; e.g. netfilter must be used
-	to control which interfaces may forward packets and which not.
-
-	This also sets all interfaces' Host/Router setting
-	'forwarding' to the specified value.  See below for details.
-
-	This referred to as global forwarding.
-
-proxy_ndp - BOOLEAN
-	Do proxy ndp.
-
-fwmark_reflect - BOOLEAN
-	Controls the fwmark of kernel-generated IPv6 reply packets that are not
-	associated with a socket for example, TCP RSTs or ICMPv6 echo replies).
-	If unset, these packets have a fwmark of zero. If set, they have the
-	fwmark of the packet they are replying to.
-	Default: 0
-
-conf/interface/*:
-	Change special settings per interface.
-
-	The functional behaviour for certain settings is different
-	depending on whether local forwarding is enabled or not.
-
-accept_ra - INTEGER
-	Accept Router Advertisements; autoconfigure using them.
-
-	It also determines whether or not to transmit Router
-	Solicitations. If and only if the functional setting is to
-	accept Router Advertisements, Router Solicitations will be
-	transmitted.
-
-	Possible values are:
-		0 Do not accept Router Advertisements.
-		1 Accept Router Advertisements if forwarding is disabled.
-		2 Overrule forwarding behaviour. Accept Router Advertisements
-		  even if forwarding is enabled.
-
-	Functional default: enabled if local forwarding is disabled.
-			    disabled if local forwarding is enabled.
-
-accept_ra_defrtr - BOOLEAN
-	Learn default router in Router Advertisement.
-
-	Functional default: enabled if accept_ra is enabled.
-			    disabled if accept_ra is disabled.
-
-accept_ra_from_local - BOOLEAN
-	Accept RA with source-address that is found on local machine
-        if the RA is otherwise proper and able to be accepted.
-        Default is to NOT accept these as it may be an un-intended
-        network loop.
-
-	Functional default:
-           enabled if accept_ra_from_local is enabled
-               on a specific interface.
-	   disabled if accept_ra_from_local is disabled
-               on a specific interface.
-
-accept_ra_min_hop_limit - INTEGER
-	Minimum hop limit Information in Router Advertisement.
-
-	Hop limit Information in Router Advertisement less than this
-	variable shall be ignored.
-
-	Default: 1
-
-accept_ra_pinfo - BOOLEAN
-	Learn Prefix Information in Router Advertisement.
-
-	Functional default: enabled if accept_ra is enabled.
-			    disabled if accept_ra is disabled.
-
-accept_ra_rt_info_min_plen - INTEGER
-	Minimum prefix length of Route Information in RA.
-
-	Route Information w/ prefix smaller than this variable shall
-	be ignored.
-
-	Functional default: 0 if accept_ra_rtr_pref is enabled.
-			    -1 if accept_ra_rtr_pref is disabled.
-
-accept_ra_rt_info_max_plen - INTEGER
-	Maximum prefix length of Route Information in RA.
-
-	Route Information w/ prefix larger than this variable shall
-	be ignored.
-
-	Functional default: 0 if accept_ra_rtr_pref is enabled.
-			    -1 if accept_ra_rtr_pref is disabled.
-
-accept_ra_rtr_pref - BOOLEAN
-	Accept Router Preference in RA.
-
-	Functional default: enabled if accept_ra is enabled.
-			    disabled if accept_ra is disabled.
-
-accept_ra_mtu - BOOLEAN
-	Apply the MTU value specified in RA option 5 (RFC4861). If
-	disabled, the MTU specified in the RA will be ignored.
-
-	Functional default: enabled if accept_ra is enabled.
-			    disabled if accept_ra is disabled.
-
-accept_redirects - BOOLEAN
-	Accept Redirects.
-
-	Functional default: enabled if local forwarding is disabled.
-			    disabled if local forwarding is enabled.
-
-accept_source_route - INTEGER
-	Accept source routing (routing extension header).
-
-	>= 0: Accept only routing header type 2.
-	< 0: Do not accept routing header.
-
-	Default: 0
-
-autoconf - BOOLEAN
-	Autoconfigure addresses using Prefix Information in Router
-	Advertisements.
-
-	Functional default: enabled if accept_ra_pinfo is enabled.
-			    disabled if accept_ra_pinfo is disabled.
-
-dad_transmits - INTEGER
-	The amount of Duplicate Address Detection probes to send.
-	Default: 1
-
-forwarding - INTEGER
-	Configure interface-specific Host/Router behaviour.
-
-	Note: It is recommended to have the same setting on all
-	interfaces; mixed router/host scenarios are rather uncommon.
-
-	Possible values are:
-		0 Forwarding disabled
-		1 Forwarding enabled
-
-	FALSE (0):
-
-	By default, Host behaviour is assumed.  This means:
-
-	1. IsRouter flag is not set in Neighbour Advertisements.
-	2. If accept_ra is TRUE (default), transmit Router
-	   Solicitations.
-	3. If accept_ra is TRUE (default), accept Router
-	   Advertisements (and do autoconfiguration).
-	4. If accept_redirects is TRUE (default), accept Redirects.
-
-	TRUE (1):
-
-	If local forwarding is enabled, Router behaviour is assumed.
-	This means exactly the reverse from the above:
-
-	1. IsRouter flag is set in Neighbour Advertisements.
-	2. Router Solicitations are not sent unless accept_ra is 2.
-	3. Router Advertisements are ignored unless accept_ra is 2.
-	4. Redirects are ignored.
-
-	Default: 0 (disabled) if global forwarding is disabled (default),
-		 otherwise 1 (enabled).
-
-hop_limit - INTEGER
-	Default Hop Limit to set.
-	Default: 64
-
-mtu - INTEGER
-	Default Maximum Transfer Unit
-	Default: 1280 (IPv6 required minimum)
-
-ip_nonlocal_bind - BOOLEAN
-	If set, allows processes to bind() to non-local IPv6 addresses,
-	which can be quite useful - but may break some applications.
-	Default: 0
-
-router_probe_interval - INTEGER
-	Minimum interval (in seconds) between Router Probing described
-	in RFC4191.
-
-	Default: 60
-
-router_solicitation_delay - INTEGER
-	Number of seconds to wait after interface is brought up
-	before sending Router Solicitations.
-	Default: 1
-
-router_solicitation_interval - INTEGER
-	Number of seconds to wait between Router Solicitations.
-	Default: 4
-
-router_solicitations - INTEGER
-	Number of Router Solicitations to send until assuming no
-	routers are present.
-	Default: 3
-
-use_oif_addrs_only - BOOLEAN
-	When enabled, the candidate source addresses for destinations
-	routed via this interface are restricted to the set of addresses
-	configured on this interface (vis. RFC 6724, section 4).
-
-	Default: false
-
-use_tempaddr - INTEGER
-	Preference for Privacy Extensions (RFC3041).
-	  <= 0 : disable Privacy Extensions
-	  == 1 : enable Privacy Extensions, but prefer public
-	         addresses over temporary addresses.
-	  >  1 : enable Privacy Extensions and prefer temporary
-	         addresses over public addresses.
-	Default:  0 (for most devices)
-		 -1 (for point-to-point devices and loopback devices)
-
-temp_valid_lft - INTEGER
-	valid lifetime (in seconds) for temporary addresses.
-	Default: 604800 (7 days)
-
-temp_prefered_lft - INTEGER
-	Preferred lifetime (in seconds) for temporary addresses.
-	Default: 86400 (1 day)
-
-keep_addr_on_down - INTEGER
-	Keep all IPv6 addresses on an interface down event. If set static
-	global addresses with no expiration time are not flushed.
-	  >0 : enabled
-	   0 : system default
-	  <0 : disabled
-
-	Default: 0 (addresses are removed)
-
-max_desync_factor - INTEGER
-	Maximum value for DESYNC_FACTOR, which is a random value
-	that ensures that clients don't synchronize with each
-	other and generate new addresses at exactly the same time.
-	value is in seconds.
-	Default: 600
-
-regen_max_retry - INTEGER
-	Number of attempts before give up attempting to generate
-	valid temporary addresses.
-	Default: 5
-
-max_addresses - INTEGER
-	Maximum number of autoconfigured addresses per interface.  Setting
-	to zero disables the limitation.  It is not recommended to set this
-	value too large (or to zero) because it would be an easy way to
-	crash the kernel by allowing too many addresses to be created.
-	Default: 16
-
-disable_ipv6 - BOOLEAN
-	Disable IPv6 operation.  If accept_dad is set to 2, this value
-	will be dynamically set to TRUE if DAD fails for the link-local
-	address.
-	Default: FALSE (enable IPv6 operation)
-
-	When this value is changed from 1 to 0 (IPv6 is being enabled),
-	it will dynamically create a link-local address on the given
-	interface and start Duplicate Address Detection, if necessary.
-
-	When this value is changed from 0 to 1 (IPv6 is being disabled),
-	it will dynamically delete all addresses and routes on the given
-	interface. From now on it will not possible to add addresses/routes
-	to the selected interface.
-
-accept_dad - INTEGER
-	Whether to accept DAD (Duplicate Address Detection).
-	0: Disable DAD
-	1: Enable DAD (default)
-	2: Enable DAD, and disable IPv6 operation if MAC-based duplicate
-	   link-local address has been found.
-
-	DAD operation and mode on a given interface will be selected according
-	to the maximum value of conf/{all,interface}/accept_dad.
-
-force_tllao - BOOLEAN
-	Enable sending the target link-layer address option even when
-	responding to a unicast neighbor solicitation.
-	Default: FALSE
-
-	Quoting from RFC 2461, section 4.4, Target link-layer address:
-
-	"The option MUST be included for multicast solicitations in order to
-	avoid infinite Neighbor Solicitation "recursion" when the peer node
-	does not have a cache entry to return a Neighbor Advertisements
-	message.  When responding to unicast solicitations, the option can be
-	omitted since the sender of the solicitation has the correct link-
-	layer address; otherwise it would not have be able to send the unicast
-	solicitation in the first place. However, including the link-layer
-	address in this case adds little overhead and eliminates a potential
-	race condition where the sender deletes the cached link-layer address
-	prior to receiving a response to a previous solicitation."
-
-ndisc_notify - BOOLEAN
-	Define mode for notification of address and device changes.
-	0 - (default): do nothing
-	1 - Generate unsolicited neighbour advertisements when device is brought
-	    up or hardware address changes.
-
-ndisc_tclass - INTEGER
-	The IPv6 Traffic Class to use by default when sending IPv6 Neighbor
-	Discovery (Router Solicitation, Router Advertisement, Neighbor
-	Solicitation, Neighbor Advertisement, Redirect) messages.
-	These 8 bits can be interpreted as 6 high order bits holding the DSCP
-	value and 2 low order bits representing ECN (which you probably want
-	to leave cleared).
-	0 - (default)
-
-mldv1_unsolicited_report_interval - INTEGER
-	The interval in milliseconds in which the next unsolicited
-	MLDv1 report retransmit will take place.
-	Default: 10000 (10 seconds)
-
-mldv2_unsolicited_report_interval - INTEGER
-	The interval in milliseconds in which the next unsolicited
-	MLDv2 report retransmit will take place.
-	Default: 1000 (1 second)
-
-force_mld_version - INTEGER
-	0 - (default) No enforcement of a MLD version, MLDv1 fallback allowed
-	1 - Enforce to use MLD version 1
-	2 - Enforce to use MLD version 2
-
-suppress_frag_ndisc - INTEGER
-	Control RFC 6980 (Security Implications of IPv6 Fragmentation
-	with IPv6 Neighbor Discovery) behavior:
-	1 - (default) discard fragmented neighbor discovery packets
-	0 - allow fragmented neighbor discovery packets
-
-optimistic_dad - BOOLEAN
-	Whether to perform Optimistic Duplicate Address Detection (RFC 4429).
-	0: disabled (default)
-	1: enabled
-
-	Optimistic Duplicate Address Detection for the interface will be enabled
-	if at least one of conf/{all,interface}/optimistic_dad is set to 1,
-	it will be disabled otherwise.
-
-use_optimistic - BOOLEAN
-	If enabled, do not classify optimistic addresses as deprecated during
-	source address selection.  Preferred addresses will still be chosen
-	before optimistic addresses, subject to other ranking in the source
-	address selection algorithm.
-	0: disabled (default)
-	1: enabled
-
-	This will be enabled if at least one of
-	conf/{all,interface}/use_optimistic is set to 1, disabled otherwise.
-
-stable_secret - IPv6 address
-	This IPv6 address will be used as a secret to generate IPv6
-	addresses for link-local addresses and autoconfigured
-	ones. All addresses generated after setting this secret will
-	be stable privacy ones by default. This can be changed via the
-	addrgenmode ip-link. conf/default/stable_secret is used as the
-	secret for the namespace, the interface specific ones can
-	overwrite that. Writes to conf/all/stable_secret are refused.
-
-	It is recommended to generate this secret during installation
-	of a system and keep it stable after that.
-
-	By default the stable secret is unset.
-
-addr_gen_mode - INTEGER
-	Defines how link-local and autoconf addresses are generated.
-
-	0: generate address based on EUI64 (default)
-	1: do no generate a link-local address, use EUI64 for addresses generated
-	   from autoconf
-	2: generate stable privacy addresses, using the secret from
-	   stable_secret (RFC7217)
-	3: generate stable privacy addresses, using a random secret if unset
-
-drop_unicast_in_l2_multicast - BOOLEAN
-	Drop any unicast IPv6 packets that are received in link-layer
-	multicast (or broadcast) frames.
-
-	By default this is turned off.
-
-drop_unsolicited_na - BOOLEAN
-	Drop all unsolicited neighbor advertisements, for example if there's
-	a known good NA proxy on the network and such frames need not be used
-	(or in the case of 802.11, must not be used to prevent attacks.)
-
-	By default this is turned off.
-
-enhanced_dad - BOOLEAN
-	Include a nonce option in the IPv6 neighbor solicitation messages used for
-	duplicate address detection per RFC7527. A received DAD NS will only signal
-	a duplicate address if the nonce is different. This avoids any false
-	detection of duplicates due to loopback of the NS messages that we send.
-	The nonce option will be sent on an interface unless both of
-	conf/{all,interface}/enhanced_dad are set to FALSE.
-	Default: TRUE
-
-icmp/*:
-ratelimit - INTEGER
-	Limit the maximal rates for sending ICMPv6 messages.
-	0 to disable any limiting,
-	otherwise the minimal space between responses in milliseconds.
-	Default: 1000
-
-ratemask - list of comma separated ranges
-	For ICMPv6 message types matching the ranges in the ratemask, limit
-	the sending of the message according to ratelimit parameter.
-
-	The format used for both input and output is a comma separated
-	list of ranges (e.g. "0-127,129" for ICMPv6 message type 0 to 127 and
-	129). Writing to the file will clear all previous ranges of ICMPv6
-	message types and update the current list with the input.
-
-	Refer to: https://www.iana.org/assignments/icmpv6-parameters/icmpv6-parameters.xhtml
-	for numerical values of ICMPv6 message types, e.g. echo request is 128
-	and echo reply is 129.
-
-	Default: 0-1,3-127 (rate limit ICMPv6 errors except Packet Too Big)
-
-echo_ignore_all - BOOLEAN
-	If set non-zero, then the kernel will ignore all ICMP ECHO
-	requests sent to it over the IPv6 protocol.
-	Default: 0
-
-echo_ignore_multicast - BOOLEAN
-	If set non-zero, then the kernel will ignore all ICMP ECHO
-	requests sent to it over the IPv6 protocol via multicast.
-	Default: 0
-
-echo_ignore_anycast - BOOLEAN
-	If set non-zero, then the kernel will ignore all ICMP ECHO
-	requests sent to it over the IPv6 protocol destined to anycast address.
-	Default: 0
-
-xfrm6_gc_thresh - INTEGER
-	(Obsolete since linux-4.14)
-	The threshold at which we will start garbage collecting for IPv6
-	destination cache entries.  At twice this value the system will
-	refuse new allocations.
-
-
-IPv6 Update by:
-Pekka Savola <pekkas@netcore.fi>
-YOSHIFUJI Hideaki / USAGI Project <yoshfuji@linux-ipv6.org>
-
-
-/proc/sys/net/bridge/* Variables:
-
-bridge-nf-call-arptables - BOOLEAN
-	1 : pass bridged ARP traffic to arptables' FORWARD chain.
-	0 : disable this.
-	Default: 1
-
-bridge-nf-call-iptables - BOOLEAN
-	1 : pass bridged IPv4 traffic to iptables' chains.
-	0 : disable this.
-	Default: 1
-
-bridge-nf-call-ip6tables - BOOLEAN
-	1 : pass bridged IPv6 traffic to ip6tables' chains.
-	0 : disable this.
-	Default: 1
-
-bridge-nf-filter-vlan-tagged - BOOLEAN
-	1 : pass bridged vlan-tagged ARP/IP/IPv6 traffic to {arp,ip,ip6}tables.
-	0 : disable this.
-	Default: 0
-
-bridge-nf-filter-pppoe-tagged - BOOLEAN
-	1 : pass bridged pppoe-tagged IP/IPv6 traffic to {ip,ip6}tables.
-	0 : disable this.
-	Default: 0
-
-bridge-nf-pass-vlan-input-dev - BOOLEAN
-	1: if bridge-nf-filter-vlan-tagged is enabled, try to find a vlan
-	interface on the bridge and set the netfilter input device to the vlan.
-	This allows use of e.g. "iptables -i br0.1" and makes the REDIRECT
-	target work with vlan-on-top-of-bridge interfaces.  When no matching
-	vlan interface is found, or this switch is off, the input device is
-	set to the bridge interface.
-	0: disable bridge netfilter vlan interface lookup.
-	Default: 0
-
-proc/sys/net/sctp/* Variables:
-
-addip_enable - BOOLEAN
-	Enable or disable extension of  Dynamic Address Reconfiguration
-	(ADD-IP) functionality specified in RFC5061.  This extension provides
-	the ability to dynamically add and remove new addresses for the SCTP
-	associations.
-
-	1: Enable extension.
-
-	0: Disable extension.
-
-	Default: 0
-
-pf_enable - INTEGER
-	Enable or disable pf (pf is short for potentially failed) state. A value
-	of pf_retrans > path_max_retrans also disables pf state. That is, one of
-	both pf_enable and pf_retrans > path_max_retrans can disable pf state.
-	Since pf_retrans and path_max_retrans can be changed by userspace
-	application, sometimes user expects to disable pf state by the value of
-	pf_retrans > path_max_retrans, but occasionally the value of pf_retrans
-	or path_max_retrans is changed by the user application, this pf state is
-	enabled. As such, it is necessary to add this to dynamically enable
-	and disable pf state. See:
-	https://datatracker.ietf.org/doc/draft-ietf-tsvwg-sctp-failover for
-	details.
-
-	1: Enable pf.
-
-	0: Disable pf.
-
-	Default: 1
-
-pf_expose - INTEGER
-	Unset or enable/disable pf (pf is short for potentially failed) state
-	exposure.  Applications can control the exposure of the PF path state
-	in the SCTP_PEER_ADDR_CHANGE event and the SCTP_GET_PEER_ADDR_INFO
-	sockopt.   When it's unset, no SCTP_PEER_ADDR_CHANGE event with
-	SCTP_ADDR_PF state will be sent and a SCTP_PF-state transport info
-	can be got via SCTP_GET_PEER_ADDR_INFO sockopt;  When it's enabled,
-	a SCTP_PEER_ADDR_CHANGE event will be sent for a transport becoming
-	SCTP_PF state and a SCTP_PF-state transport info can be got via
-	SCTP_GET_PEER_ADDR_INFO sockopt;  When it's diabled, no
-	SCTP_PEER_ADDR_CHANGE event will be sent and it returns -EACCES when
-	trying to get a SCTP_PF-state transport info via SCTP_GET_PEER_ADDR_INFO
-	sockopt.
-
-	0: Unset pf state exposure, Compatible with old applications.
-
-	1: Disable pf state exposure.
-
-	2: Enable pf state exposure.
-
-	Default: 0
-
-addip_noauth_enable - BOOLEAN
-	Dynamic Address Reconfiguration (ADD-IP) requires the use of
-	authentication to protect the operations of adding or removing new
-	addresses.  This requirement is mandated so that unauthorized hosts
-	would not be able to hijack associations.  However, older
-	implementations may not have implemented this requirement while
-	allowing the ADD-IP extension.  For reasons of interoperability,
-	we provide this variable to control the enforcement of the
-	authentication requirement.
-
-	1: Allow ADD-IP extension to be used without authentication.  This
-	   should only be set in a closed environment for interoperability
-	   with older implementations.
-
-	0: Enforce the authentication requirement
-
-	Default: 0
-
-auth_enable - BOOLEAN
-	Enable or disable Authenticated Chunks extension.  This extension
-	provides the ability to send and receive authenticated chunks and is
-	required for secure operation of Dynamic Address Reconfiguration
-	(ADD-IP) extension.
-
-	1: Enable this extension.
-	0: Disable this extension.
-
-	Default: 0
-
-prsctp_enable - BOOLEAN
-	Enable or disable the Partial Reliability extension (RFC3758) which
-	is used to notify peers that a given DATA should no longer be expected.
-
-	1: Enable extension
-	0: Disable
-
-	Default: 1
-
-max_burst - INTEGER
-	The limit of the number of new packets that can be initially sent.  It
-	controls how bursty the generated traffic can be.
-
-	Default: 4
-
-association_max_retrans - INTEGER
-	Set the maximum number for retransmissions that an association can
-	attempt deciding that the remote end is unreachable.  If this value
-	is exceeded, the association is terminated.
-
-	Default: 10
-
-max_init_retransmits - INTEGER
-	The maximum number of retransmissions of INIT and COOKIE-ECHO chunks
-	that an association will attempt before declaring the destination
-	unreachable and terminating.
-
-	Default: 8
-
-path_max_retrans - INTEGER
-	The maximum number of retransmissions that will be attempted on a given
-	path.  Once this threshold is exceeded, the path is considered
-	unreachable, and new traffic will use a different path when the
-	association is multihomed.
-
-	Default: 5
-
-pf_retrans - INTEGER
-	The number of retransmissions that will be attempted on a given path
-	before traffic is redirected to an alternate transport (should one
-	exist).  Note this is distinct from path_max_retrans, as a path that
-	passes the pf_retrans threshold can still be used.  Its only
-	deprioritized when a transmission path is selected by the stack.  This
-	setting is primarily used to enable fast failover mechanisms without
-	having to reduce path_max_retrans to a very low value.  See:
-	http://www.ietf.org/id/draft-nishida-tsvwg-sctp-failover-05.txt
-	for details.  Note also that a value of pf_retrans > path_max_retrans
-	disables this feature. Since both pf_retrans and path_max_retrans can
-	be changed by userspace application, a variable pf_enable is used to
-	disable pf state.
-
-	Default: 0
-
-ps_retrans - INTEGER
-	Primary.Switchover.Max.Retrans (PSMR), it's a tunable parameter coming
-	from section-5 "Primary Path Switchover" in rfc7829.  The primary path
-	will be changed to another active path when the path error counter on
-	the old primary path exceeds PSMR, so that "the SCTP sender is allowed
-	to continue data transmission on a new working path even when the old
-	primary destination address becomes active again".   Note this feature
-	is disabled by initializing 'ps_retrans' per netns as 0xffff by default,
-	and its value can't be less than 'pf_retrans' when changing by sysctl.
-
-	Default: 0xffff
-
-rto_initial - INTEGER
-	The initial round trip timeout value in milliseconds that will be used
-	in calculating round trip times.  This is the initial time interval
-	for retransmissions.
-
-	Default: 3000
-
-rto_max - INTEGER
-	The maximum value (in milliseconds) of the round trip timeout.  This
-	is the largest time interval that can elapse between retransmissions.
-
-	Default: 60000
-
-rto_min - INTEGER
-	The minimum value (in milliseconds) of the round trip timeout.  This
-	is the smallest time interval the can elapse between retransmissions.
-
-	Default: 1000
-
-hb_interval - INTEGER
-	The interval (in milliseconds) between HEARTBEAT chunks.  These chunks
-	are sent at the specified interval on idle paths to probe the state of
-	a given path between 2 associations.
-
-	Default: 30000
-
-sack_timeout - INTEGER
-	The amount of time (in milliseconds) that the implementation will wait
-	to send a SACK.
-
-	Default: 200
-
-valid_cookie_life - INTEGER
-	The default lifetime of the SCTP cookie (in milliseconds).  The cookie
-	is used during association establishment.
-
-	Default: 60000
-
-cookie_preserve_enable - BOOLEAN
-	Enable or disable the ability to extend the lifetime of the SCTP cookie
-	that is used during the establishment phase of SCTP association
-
-	1: Enable cookie lifetime extension.
-	0: Disable
-
-	Default: 1
-
-cookie_hmac_alg - STRING
-	Select the hmac algorithm used when generating the cookie value sent by
-	a listening sctp socket to a connecting client in the INIT-ACK chunk.
-	Valid values are:
-	* md5
-	* sha1
-	* none
-	Ability to assign md5 or sha1 as the selected alg is predicated on the
-	configuration of those algorithms at build time (CONFIG_CRYPTO_MD5 and
-	CONFIG_CRYPTO_SHA1).
-
-	Default: Dependent on configuration.  MD5 if available, else SHA1 if
-	available, else none.
-
-rcvbuf_policy - INTEGER
-	Determines if the receive buffer is attributed to the socket or to
-	association.   SCTP supports the capability to create multiple
-	associations on a single socket.  When using this capability, it is
-	possible that a single stalled association that's buffering a lot
-	of data may block other associations from delivering their data by
-	consuming all of the receive buffer space.  To work around this,
-	the rcvbuf_policy could be set to attribute the receiver buffer space
-	to each association instead of the socket.  This prevents the described
-	blocking.
-
-	1: rcvbuf space is per association
-	0: rcvbuf space is per socket
-
-	Default: 0
-
-sndbuf_policy - INTEGER
-	Similar to rcvbuf_policy above, this applies to send buffer space.
-
-	1: Send buffer is tracked per association
-	0: Send buffer is tracked per socket.
-
-	Default: 0
-
-sctp_mem - vector of 3 INTEGERs: min, pressure, max
-	Number of pages allowed for queueing by all SCTP sockets.
-
-	min: Below this number of pages SCTP is not bothered about its
-	memory appetite. When amount of memory allocated by SCTP exceeds
-	this number, SCTP starts to moderate memory usage.
-
-	pressure: This value was introduced to follow format of tcp_mem.
-
-	max: Number of pages allowed for queueing by all SCTP sockets.
-
-	Default is calculated at boot time from amount of available memory.
-
-sctp_rmem - vector of 3 INTEGERs: min, default, max
-	Only the first value ("min") is used, "default" and "max" are
-	ignored.
-
-	min: Minimal size of receive buffer used by SCTP socket.
-	It is guaranteed to each SCTP socket (but not association) even
-	under moderate memory pressure.
-
-	Default: 4K
-
-sctp_wmem  - vector of 3 INTEGERs: min, default, max
-	Currently this tunable has no effect.
-
-addr_scope_policy - INTEGER
-	Control IPv4 address scoping - draft-stewart-tsvwg-sctp-ipv4-00
-
-	0   - Disable IPv4 address scoping
-	1   - Enable IPv4 address scoping
-	2   - Follow draft but allow IPv4 private addresses
-	3   - Follow draft but allow IPv4 link local addresses
-
-	Default: 1
-
-
-/proc/sys/net/core/*
-	Please see: Documentation/admin-guide/sysctl/net.rst for descriptions of these entries.
-
-
-/proc/sys/net/unix/*
-max_dgram_qlen - INTEGER
-	The maximum length of dgram socket receive queue
-
-	Default: 10
-
diff --git a/Documentation/networking/snmp_counter.rst b/Documentation/networking/snmp_counter.rst
index 10e11099e74a..4edd0d38779e 100644
--- a/Documentation/networking/snmp_counter.rst
+++ b/Documentation/networking/snmp_counter.rst
@@ -792,7 +792,7 @@ counters to indicate the ACK is skipped in which scenario. The ACK
 would only be skipped if the received packet is either a SYN packet or
 it has no data.
 
-.. _sysctl document: https://www.kernel.org/doc/Documentation/networking/ip-sysctl.txt
+.. _sysctl document: https://www.kernel.org/doc/Documentation/networking/ip-sysctl.rst
 
 * TcpExtTCPACKSkippedSynRecv
 
diff --git a/net/Kconfig b/net/Kconfig
index df8d8c9bd021..8b1f85820a6b 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -86,7 +86,7 @@ config INET
 	  "Sysctl support" below, you can change various aspects of the
 	  behavior of the TCP/IP code by writing to the (virtual) files in
 	  /proc/sys/net/ipv4/*; the options are explained in the file
-	  <file:Documentation/networking/ip-sysctl.txt>.
+	  <file:Documentation/networking/ip-sysctl.rst>.
 
 	  Short answer: say Y.
 
diff --git a/net/ipv4/Kconfig b/net/ipv4/Kconfig
index 25a8888826b8..5da4733067fb 100644
--- a/net/ipv4/Kconfig
+++ b/net/ipv4/Kconfig
@@ -49,7 +49,7 @@ config IP_ADVANCED_ROUTER
 
 	  Note that some distributions enable it in startup scripts.
 	  For details about rp_filter strict and loose mode read
-	  <file:Documentation/networking/ip-sysctl.txt>.
+	  <file:Documentation/networking/ip-sysctl.rst>.
 
 	  If unsure, say N here.
 
diff --git a/net/ipv4/icmp.c b/net/ipv4/icmp.c
index fc61f51d87a3..956a806649f7 100644
--- a/net/ipv4/icmp.c
+++ b/net/ipv4/icmp.c
@@ -853,7 +853,7 @@ static bool icmp_unreach(struct sk_buff *skb)
 		case ICMP_FRAG_NEEDED:
 			/* for documentation of the ip_no_pmtu_disc
 			 * values please see
-			 * Documentation/networking/ip-sysctl.txt
+			 * Documentation/networking/ip-sysctl.rst
 			 */
 			switch (net->ipv4.sysctl_ip_no_pmtu_disc) {
 			default:
-- 
cgit v1.2.3-59-g8ed1b


From 19093313cb0486d568232934bb80dd422d891623 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 28 Apr 2020 00:01:50 +0200
Subject: docs: networking: convert ipv6.txt to ReST

Not much to be done here:

- add SPDX header;
- add a document title;
- mark a literal as such, in order to avoid a warning;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/admin-guide/kernel-parameters.txt |  6 +-
 Documentation/networking/index.rst              |  1 +
 Documentation/networking/ipv6.rst               | 78 +++++++++++++++++++++++++
 Documentation/networking/ipv6.txt               | 72 -----------------------
 net/ipv6/Kconfig                                |  2 +-
 5 files changed, 83 insertions(+), 76 deletions(-)
 create mode 100644 Documentation/networking/ipv6.rst
 delete mode 100644 Documentation/networking/ipv6.txt

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index e37db6f1be64..e43f2e1f2958 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -356,7 +356,7 @@
 			      shot down by NMI
 
 	autoconf=	[IPV6]
-			See Documentation/networking/ipv6.txt.
+			See Documentation/networking/ipv6.rst.
 
 	show_lapic=	[APIC,X86] Advanced Programmable Interrupt Controller
 			Limit apic dumping. The parameter defines the maximal
@@ -872,7 +872,7 @@
 			miss to occur.
 
 	disable=	[IPV6]
-			See Documentation/networking/ipv6.txt.
+			See Documentation/networking/ipv6.rst.
 
 	hardened_usercopy=
                         [KNL] Under CONFIG_HARDENED_USERCOPY, whether
@@ -912,7 +912,7 @@
 			to workaround buggy firmware.
 
 	disable_ipv6=	[IPV6]
-			See Documentation/networking/ipv6.txt.
+			See Documentation/networking/ipv6.rst.
 
 	disable_mtrr_cleanup [X86]
 			The kernel tries to adjust MTRR layout from continuous
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 7d133d8dbe2a..709675464e51 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -70,6 +70,7 @@ Contents:
    iphase
    ipsec
    ip-sysctl
+   ipv6
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/ipv6.rst b/Documentation/networking/ipv6.rst
new file mode 100644
index 000000000000..ba09c2f2dcc7
--- /dev/null
+++ b/Documentation/networking/ipv6.rst
@@ -0,0 +1,78 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====
+IPv6
+====
+
+
+Options for the ipv6 module are supplied as parameters at load time.
+
+Module options may be given as command line arguments to the insmod
+or modprobe command, but are usually specified in either
+``/etc/modules.d/*.conf`` configuration files, or in a distro-specific
+configuration file.
+
+The available ipv6 module parameters are listed below.  If a parameter
+is not specified the default value is used.
+
+The parameters are as follows:
+
+disable
+
+	Specifies whether to load the IPv6 module, but disable all
+	its functionality.  This might be used when another module
+	has a dependency on the IPv6 module being loaded, but no
+	IPv6 addresses or operations are desired.
+
+	The possible values and their effects are:
+
+	0
+		IPv6 is enabled.
+
+		This is the default value.
+
+	1
+		IPv6 is disabled.
+
+		No IPv6 addresses will be added to interfaces, and
+		it will not be possible to open an IPv6 socket.
+
+		A reboot is required to enable IPv6.
+
+autoconf
+
+	Specifies whether to enable IPv6 address autoconfiguration
+	on all interfaces.  This might be used when one does not wish
+	for addresses to be automatically generated from prefixes
+	received in Router Advertisements.
+
+	The possible values and their effects are:
+
+	0
+		IPv6 address autoconfiguration is disabled on all interfaces.
+
+		Only the IPv6 loopback address (::1) and link-local addresses
+		will be added to interfaces.
+
+	1
+		IPv6 address autoconfiguration is enabled on all interfaces.
+
+		This is the default value.
+
+disable_ipv6
+
+	Specifies whether to disable IPv6 on all interfaces.
+	This might be used when no IPv6 addresses are desired.
+
+	The possible values and their effects are:
+
+	0
+		IPv6 is enabled on all interfaces.
+
+		This is the default value.
+
+	1
+		IPv6 is disabled on all interfaces.
+
+		No IPv6 addresses will be added to interfaces.
+
diff --git a/Documentation/networking/ipv6.txt b/Documentation/networking/ipv6.txt
deleted file mode 100644
index 6cd74fa55358..000000000000
--- a/Documentation/networking/ipv6.txt
+++ /dev/null
@@ -1,72 +0,0 @@
-
-Options for the ipv6 module are supplied as parameters at load time.
-
-Module options may be given as command line arguments to the insmod
-or modprobe command, but are usually specified in either
-/etc/modules.d/*.conf configuration files, or in a distro-specific
-configuration file.
-
-The available ipv6 module parameters are listed below.  If a parameter
-is not specified the default value is used.
-
-The parameters are as follows:
-
-disable
-
-	Specifies whether to load the IPv6 module, but disable all
-	its functionality.  This might be used when another module
-	has a dependency on the IPv6 module being loaded, but no
-	IPv6 addresses or operations are desired.
-
-	The possible values and their effects are:
-
-	0
-		IPv6 is enabled.
-
-		This is the default value.
-
-	1
-		IPv6 is disabled.
-
-		No IPv6 addresses will be added to interfaces, and
-		it will not be possible to open an IPv6 socket.
-
-		A reboot is required to enable IPv6.
-
-autoconf
-
-	Specifies whether to enable IPv6 address autoconfiguration
-	on all interfaces.  This might be used when one does not wish
-	for addresses to be automatically generated from prefixes
-	received in Router Advertisements.
-
-	The possible values and their effects are:
-
-	0
-		IPv6 address autoconfiguration is disabled on all interfaces.
-
-		Only the IPv6 loopback address (::1) and link-local addresses
-		will be added to interfaces.
-
-	1
-		IPv6 address autoconfiguration is enabled on all interfaces.
-
-		This is the default value.
-
-disable_ipv6
-
-	Specifies whether to disable IPv6 on all interfaces.
-	This might be used when no IPv6 addresses are desired.
-
-	The possible values and their effects are:
-
-	0
-		IPv6 is enabled on all interfaces.
-
-		This is the default value.
-
-	1
-		IPv6 is disabled on all interfaces.
-
-		No IPv6 addresses will be added to interfaces.
-
diff --git a/net/ipv6/Kconfig b/net/ipv6/Kconfig
index 2ccaee98fddb..5a6111da26c4 100644
--- a/net/ipv6/Kconfig
+++ b/net/ipv6/Kconfig
@@ -13,7 +13,7 @@ menuconfig IPV6
 	  For general information about IPv6, see
 	  <https://en.wikipedia.org/wiki/IPv6>.
 	  For specific information about IPv6 under Linux, see
-	  Documentation/networking/ipv6.txt and read the HOWTO at
+	  Documentation/networking/ipv6.rst and read the HOWTO at
 	  <http://www.tldp.org/HOWTO/Linux+IPv6-HOWTO/>
 
 	  To compile this protocol support as a module, choose M here: the
-- 
cgit v1.2.3-59-g8ed1b


From 1dc2a785954bf4e562d0c85bea435ee56f705db5 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 28 Apr 2020 00:01:51 +0200
Subject: docs: networking: convert ipvlan.txt to ReST

- add SPDX header;
- adjust titles and chapters, adding proper markups;
- mark code blocks and literals as such;
- adjust identation, whitespaces and blank lines;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst  |   1 +
 Documentation/networking/ipvlan.rst | 189 ++++++++++++++++++++++++++++++++++++
 Documentation/networking/ipvlan.txt | 146 ----------------------------
 3 files changed, 190 insertions(+), 146 deletions(-)
 create mode 100644 Documentation/networking/ipvlan.rst
 delete mode 100644 Documentation/networking/ipvlan.txt

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 709675464e51..54dee1575b54 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -71,6 +71,7 @@ Contents:
    ipsec
    ip-sysctl
    ipv6
+   ipvlan
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/ipvlan.rst b/Documentation/networking/ipvlan.rst
new file mode 100644
index 000000000000..694adcba36b0
--- /dev/null
+++ b/Documentation/networking/ipvlan.rst
@@ -0,0 +1,189 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================
+IPVLAN Driver HOWTO
+===================
+
+Initial Release:
+	Mahesh Bandewar <maheshb AT google.com>
+
+1. Introduction:
+================
+This is conceptually very similar to the macvlan driver with one major
+exception of using L3 for mux-ing /demux-ing among slaves. This property makes
+the master device share the L2 with it's slave devices. I have developed this
+driver in conjunction with network namespaces and not sure if there is use case
+outside of it.
+
+
+2. Building and Installation:
+=============================
+
+In order to build the driver, please select the config item CONFIG_IPVLAN.
+The driver can be built into the kernel (CONFIG_IPVLAN=y) or as a module
+(CONFIG_IPVLAN=m).
+
+
+3. Configuration:
+=================
+
+There are no module parameters for this driver and it can be configured
+using IProute2/ip utility.
+::
+
+    ip link add link <master> name <slave> type ipvlan [ mode MODE ] [ FLAGS ]
+       where
+	 MODE: l3 (default) | l3s | l2
+	 FLAGS: bridge (default) | private | vepa
+
+e.g.
+
+    (a) Following will create IPvlan link with eth0 as master in
+	L3 bridge mode::
+
+	  bash# ip link add link eth0 name ipvl0 type ipvlan
+    (b) This command will create IPvlan link in L2 bridge mode::
+
+	  bash# ip link add link eth0 name ipvl0 type ipvlan mode l2 bridge
+
+    (c) This command will create an IPvlan device in L2 private mode::
+
+	  bash# ip link add link eth0 name ipvlan type ipvlan mode l2 private
+
+    (d) This command will create an IPvlan device in L2 vepa mode::
+
+	  bash# ip link add link eth0 name ipvlan type ipvlan mode l2 vepa
+
+
+4. Operating modes:
+===================
+
+IPvlan has two modes of operation - L2 and L3. For a given master device,
+you can select one of these two modes and all slaves on that master will
+operate in the same (selected) mode. The RX mode is almost identical except
+that in L3 mode the slaves wont receive any multicast / broadcast traffic.
+L3 mode is more restrictive since routing is controlled from the other (mostly)
+default namespace.
+
+4.1 L2 mode:
+------------
+
+In this mode TX processing happens on the stack instance attached to the
+slave device and packets are switched and queued to the master device to send
+out. In this mode the slaves will RX/TX multicast and broadcast (if applicable)
+as well.
+
+4.2 L3 mode:
+------------
+
+In this mode TX processing up to L3 happens on the stack instance attached
+to the slave device and packets are switched to the stack instance of the
+master device for the L2 processing and routing from that instance will be
+used before packets are queued on the outbound device. In this mode the slaves
+will not receive nor can send multicast / broadcast traffic.
+
+4.3 L3S mode:
+-------------
+
+This is very similar to the L3 mode except that iptables (conn-tracking)
+works in this mode and hence it is L3-symmetric (L3s). This will have slightly less
+performance but that shouldn't matter since you are choosing this mode over plain-L3
+mode to make conn-tracking work.
+
+5. Mode flags:
+==============
+
+At this time following mode flags are available
+
+5.1 bridge:
+-----------
+This is the default option. To configure the IPvlan port in this mode,
+user can choose to either add this option on the command-line or don't specify
+anything. This is the traditional mode where slaves can cross-talk among
+themselves apart from talking through the master device.
+
+5.2 private:
+------------
+If this option is added to the command-line, the port is set in private
+mode. i.e. port won't allow cross communication between slaves.
+
+5.3 vepa:
+---------
+If this is added to the command-line, the port is set in VEPA mode.
+i.e. port will offload switching functionality to the external entity as
+described in 802.1Qbg
+Note: VEPA mode in IPvlan has limitations. IPvlan uses the mac-address of the
+master-device, so the packets which are emitted in this mode for the adjacent
+neighbor will have source and destination mac same. This will make the switch /
+router send the redirect message.
+
+6. What to choose (macvlan vs. ipvlan)?
+=======================================
+
+These two devices are very similar in many regards and the specific use
+case could very well define which device to choose. if one of the following
+situations defines your use case then you can choose to use ipvlan:
+
+
+(a) The Linux host that is connected to the external switch / router has
+    policy configured that allows only one mac per port.
+(b) No of virtual devices created on a master exceed the mac capacity and
+    puts the NIC in promiscuous mode and degraded performance is a concern.
+(c) If the slave device is to be put into the hostile / untrusted network
+    namespace where L2 on the slave could be changed / misused.
+
+
+6. Example configuration:
+=========================
+
+::
+
+  +=============================================================+
+  |  Host: host1                                                |
+  |                                                             |
+  |   +----------------------+      +----------------------+    |
+  |   |   NS:ns0             |      |  NS:ns1              |    |
+  |   |                      |      |                      |    |
+  |   |                      |      |                      |    |
+  |   |        ipvl0         |      |         ipvl1        |    |
+  |   +----------#-----------+      +-----------#----------+    |
+  |              #                              #               |
+  |              ################################               |
+  |                              # eth0                         |
+  +==============================#==============================+
+
+
+(a) Create two network namespaces - ns0, ns1::
+
+	ip netns add ns0
+	ip netns add ns1
+
+(b) Create two ipvlan slaves on eth0 (master device)::
+
+	ip link add link eth0 ipvl0 type ipvlan mode l2
+	ip link add link eth0 ipvl1 type ipvlan mode l2
+
+(c) Assign slaves to the respective network namespaces::
+
+	ip link set dev ipvl0 netns ns0
+	ip link set dev ipvl1 netns ns1
+
+(d) Now switch to the namespace (ns0 or ns1) to configure the slave devices
+
+	- For ns0::
+
+		(1) ip netns exec ns0 bash
+		(2) ip link set dev ipvl0 up
+		(3) ip link set dev lo up
+		(4) ip -4 addr add 127.0.0.1 dev lo
+		(5) ip -4 addr add $IPADDR dev ipvl0
+		(6) ip -4 route add default via $ROUTER dev ipvl0
+
+	- For ns1::
+
+		(1) ip netns exec ns1 bash
+		(2) ip link set dev ipvl1 up
+		(3) ip link set dev lo up
+		(4) ip -4 addr add 127.0.0.1 dev lo
+		(5) ip -4 addr add $IPADDR dev ipvl1
+		(6) ip -4 route add default via $ROUTER dev ipvl1
diff --git a/Documentation/networking/ipvlan.txt b/Documentation/networking/ipvlan.txt
deleted file mode 100644
index 27a38e50c287..000000000000
--- a/Documentation/networking/ipvlan.txt
+++ /dev/null
@@ -1,146 +0,0 @@
-
-                            IPVLAN Driver HOWTO
-
-Initial Release:
-	Mahesh Bandewar <maheshb AT google.com>
-
-1. Introduction:
-	This is conceptually very similar to the macvlan driver with one major
-exception of using L3 for mux-ing /demux-ing among slaves. This property makes
-the master device share the L2 with it's slave devices. I have developed this
-driver in conjunction with network namespaces and not sure if there is use case
-outside of it.
-
-
-2. Building and Installation:
-	In order to build the driver, please select the config item CONFIG_IPVLAN.
-The driver can be built into the kernel (CONFIG_IPVLAN=y) or as a module
-(CONFIG_IPVLAN=m).
-
-
-3. Configuration:
-	There are no module parameters for this driver and it can be configured
-using IProute2/ip utility.
-
-    ip link add link <master> name <slave> type ipvlan [ mode MODE ] [ FLAGS ]
-       where
-         MODE: l3 (default) | l3s | l2
-         FLAGS: bridge (default) | private | vepa
-
-    e.g.
-    (a) Following will create IPvlan link with eth0 as master in
-        L3 bridge mode
-          bash# ip link add link eth0 name ipvl0 type ipvlan
-    (b) This command will create IPvlan link in L2 bridge mode.
-          bash# ip link add link eth0 name ipvl0 type ipvlan mode l2 bridge
-    (c) This command will create an IPvlan device in L2 private mode.
-          bash# ip link add link eth0 name ipvlan type ipvlan mode l2 private
-    (d) This command will create an IPvlan device in L2 vepa mode.
-          bash# ip link add link eth0 name ipvlan type ipvlan mode l2 vepa
-
-
-4. Operating modes:
-	IPvlan has two modes of operation - L2 and L3. For a given master device,
-you can select one of these two modes and all slaves on that master will
-operate in the same (selected) mode. The RX mode is almost identical except
-that in L3 mode the slaves wont receive any multicast / broadcast traffic.
-L3 mode is more restrictive since routing is controlled from the other (mostly)
-default namespace.
-
-4.1 L2 mode:
-	In this mode TX processing happens on the stack instance attached to the
-slave device and packets are switched and queued to the master device to send
-out. In this mode the slaves will RX/TX multicast and broadcast (if applicable)
-as well.
-
-4.2 L3 mode:
-	In this mode TX processing up to L3 happens on the stack instance attached
-to the slave device and packets are switched to the stack instance of the
-master device for the L2 processing and routing from that instance will be
-used before packets are queued on the outbound device. In this mode the slaves
-will not receive nor can send multicast / broadcast traffic.
-
-4.3 L3S mode:
-	This is very similar to the L3 mode except that iptables (conn-tracking)
-works in this mode and hence it is L3-symmetric (L3s). This will have slightly less
-performance but that shouldn't matter since you are choosing this mode over plain-L3
-mode to make conn-tracking work.
-
-5. Mode flags:
-	At this time following mode flags are available
-
-5.1 bridge:
-	This is the default option. To configure the IPvlan port in this mode,
-user can choose to either add this option on the command-line or don't specify
-anything. This is the traditional mode where slaves can cross-talk among
-themselves apart from talking through the master device.
-
-5.2 private:
-	If this option is added to the command-line, the port is set in private
-mode. i.e. port won't allow cross communication between slaves.
-
-5.3 vepa:
-	If this is added to the command-line, the port is set in VEPA mode.
-i.e. port will offload switching functionality to the external entity as
-described in 802.1Qbg
-Note: VEPA mode in IPvlan has limitations. IPvlan uses the mac-address of the
-master-device, so the packets which are emitted in this mode for the adjacent
-neighbor will have source and destination mac same. This will make the switch /
-router send the redirect message.
-
-6. What to choose (macvlan vs. ipvlan)?
-	These two devices are very similar in many regards and the specific use
-case could very well define which device to choose. if one of the following
-situations defines your use case then you can choose to use ipvlan -
-	(a) The Linux host that is connected to the external switch / router has
-policy configured that allows only one mac per port.
-	(b) No of virtual devices created on a master exceed the mac capacity and
-puts the NIC in promiscuous mode and degraded performance is a concern.
-	(c) If the slave device is to be put into the hostile / untrusted network
-namespace where L2 on the slave could be changed / misused.
-
-
-6. Example configuration:
-
-  +=============================================================+
-  |  Host: host1                                                |
-  |                                                             |
-  |   +----------------------+      +----------------------+    |
-  |   |   NS:ns0             |      |  NS:ns1              |    |
-  |   |                      |      |                      |    |
-  |   |                      |      |                      |    |
-  |   |        ipvl0         |      |         ipvl1        |    |
-  |   +----------#-----------+      +-----------#----------+    |
-  |              #                              #               |
-  |              ################################               |
-  |                              # eth0                         |
-  +==============================#==============================+
-
-
-	(a) Create two network namespaces - ns0, ns1
-		ip netns add ns0
-		ip netns add ns1
-
-	(b) Create two ipvlan slaves on eth0 (master device)
-		ip link add link eth0 ipvl0 type ipvlan mode l2
-		ip link add link eth0 ipvl1 type ipvlan mode l2
-
-	(c) Assign slaves to the respective network namespaces
-		ip link set dev ipvl0 netns ns0
-		ip link set dev ipvl1 netns ns1
-
-	(d) Now switch to the namespace (ns0 or ns1) to configure the slave devices
-		- For ns0
-			(1) ip netns exec ns0 bash
-			(2) ip link set dev ipvl0 up
-			(3) ip link set dev lo up
-			(4) ip -4 addr add 127.0.0.1 dev lo
-			(5) ip -4 addr add $IPADDR dev ipvl0
-			(6) ip -4 route add default via $ROUTER dev ipvl0
-		- For ns1
-			(1) ip netns exec ns1 bash
-			(2) ip link set dev ipvl1 up
-			(3) ip link set dev lo up
-			(4) ip -4 addr add 127.0.0.1 dev lo
-			(5) ip -4 addr add $IPADDR dev ipvl1
-			(6) ip -4 route add default via $ROUTER dev ipvl1
-- 
cgit v1.2.3-59-g8ed1b


From 82a07bf33d7d0c3a194f62178e0fea2d68227b89 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 28 Apr 2020 00:01:52 +0200
Subject: docs: networking: convert ipvs-sysctl.txt to ReST

- add SPDX header;
- add a document title;
- mark lists as such;
- mark code blocks and literals as such;
- adjust identation, whitespaces and blank lines;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Acked-by: Simon Horman <horms@verge.net.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/admin-guide/sysctl/net.rst |   4 +-
 Documentation/networking/index.rst       |   1 +
 Documentation/networking/ipvs-sysctl.rst | 302 +++++++++++++++++++++++++++++++
 Documentation/networking/ipvs-sysctl.txt | 294 ------------------------------
 MAINTAINERS                              |   2 +-
 5 files changed, 306 insertions(+), 297 deletions(-)
 create mode 100644 Documentation/networking/ipvs-sysctl.rst
 delete mode 100644 Documentation/networking/ipvs-sysctl.txt

diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst
index 84e3348a9543..2ad1b77a7182 100644
--- a/Documentation/admin-guide/sysctl/net.rst
+++ b/Documentation/admin-guide/sysctl/net.rst
@@ -353,8 +353,8 @@ socket's buffer. It will not take effect unless PF_UNIX flag is specified.
 
 3. /proc/sys/net/ipv4 - IPV4 settings
 -------------------------------------
-Please see: Documentation/networking/ip-sysctl.rst and ipvs-sysctl.txt for
-descriptions of these entries.
+Please see: Documentation/networking/ip-sysctl.rst and
+Documentation/admin-guide/sysctl/net.rst for descriptions of these entries.
 
 
 4. Appletalk
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 54dee1575b54..bbd4e0041457 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -72,6 +72,7 @@ Contents:
    ip-sysctl
    ipv6
    ipvlan
+   ipvs-sysctl
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/ipvs-sysctl.rst b/Documentation/networking/ipvs-sysctl.rst
new file mode 100644
index 000000000000..be36c4600e8f
--- /dev/null
+++ b/Documentation/networking/ipvs-sysctl.rst
@@ -0,0 +1,302 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========
+IPvs-sysctl
+===========
+
+/proc/sys/net/ipv4/vs/* Variables:
+==================================
+
+am_droprate - INTEGER
+	default 10
+
+	It sets the always mode drop rate, which is used in the mode 3
+	of the drop_rate defense.
+
+amemthresh - INTEGER
+	default 1024
+
+	It sets the available memory threshold (in pages), which is
+	used in the automatic modes of defense. When there is no
+	enough available memory, the respective strategy will be
+	enabled and the variable is automatically set to 2, otherwise
+	the strategy is disabled and the variable is  set  to 1.
+
+backup_only - BOOLEAN
+	- 0 - disabled (default)
+	- not 0 - enabled
+
+	If set, disable the director function while the server is
+	in backup mode to avoid packet loops for DR/TUN methods.
+
+conn_reuse_mode - INTEGER
+	1 - default
+
+	Controls how ipvs will deal with connections that are detected
+	port reuse. It is a bitmap, with the values being:
+
+	0: disable any special handling on port reuse. The new
+	connection will be delivered to the same real server that was
+	servicing the previous connection. This will effectively
+	disable expire_nodest_conn.
+
+	bit 1: enable rescheduling of new connections when it is safe.
+	That is, whenever expire_nodest_conn and for TCP sockets, when
+	the connection is in TIME_WAIT state (which is only possible if
+	you use NAT mode).
+
+	bit 2: it is bit 1 plus, for TCP connections, when connections
+	are in FIN_WAIT state, as this is the last state seen by load
+	balancer in Direct Routing mode. This bit helps on adding new
+	real servers to a very busy cluster.
+
+conntrack - BOOLEAN
+	- 0 - disabled (default)
+	- not 0 - enabled
+
+	If set, maintain connection tracking entries for
+	connections handled by IPVS.
+
+	This should be enabled if connections handled by IPVS are to be
+	also handled by stateful firewall rules. That is, iptables rules
+	that make use of connection tracking.  It is a performance
+	optimisation to disable this setting otherwise.
+
+	Connections handled by the IPVS FTP application module
+	will have connection tracking entries regardless of this setting.
+
+	Only available when IPVS is compiled with CONFIG_IP_VS_NFCT enabled.
+
+cache_bypass - BOOLEAN
+	- 0 - disabled (default)
+	- not 0 - enabled
+
+	If it is enabled, forward packets to the original destination
+	directly when no cache server is available and destination
+	address is not local (iph->daddr is RTN_UNICAST). It is mostly
+	used in transparent web cache cluster.
+
+debug_level - INTEGER
+	- 0          - transmission error messages (default)
+	- 1          - non-fatal error messages
+	- 2          - configuration
+	- 3          - destination trash
+	- 4          - drop entry
+	- 5          - service lookup
+	- 6          - scheduling
+	- 7          - connection new/expire, lookup and synchronization
+	- 8          - state transition
+	- 9          - binding destination, template checks and applications
+	- 10         - IPVS packet transmission
+	- 11         - IPVS packet handling (ip_vs_in/ip_vs_out)
+	- 12 or more - packet traversal
+
+	Only available when IPVS is compiled with CONFIG_IP_VS_DEBUG enabled.
+
+	Higher debugging levels include the messages for lower debugging
+	levels, so setting debug level 2, includes level 0, 1 and 2
+	messages. Thus, logging becomes more and more verbose the higher
+	the level.
+
+drop_entry - INTEGER
+	- 0  - disabled (default)
+
+	The drop_entry defense is to randomly drop entries in the
+	connection hash table, just in order to collect back some
+	memory for new connections. In the current code, the
+	drop_entry procedure can be activated every second, then it
+	randomly scans 1/32 of the whole and drops entries that are in
+	the SYN-RECV/SYNACK state, which should be effective against
+	syn-flooding attack.
+
+	The valid values of drop_entry are from 0 to 3, where 0 means
+	that this strategy is always disabled, 1 and 2 mean automatic
+	modes (when there is no enough available memory, the strategy
+	is enabled and the variable is automatically set to 2,
+	otherwise the strategy is disabled and the variable is set to
+	1), and 3 means that that the strategy is always enabled.
+
+drop_packet - INTEGER
+	- 0  - disabled (default)
+
+	The drop_packet defense is designed to drop 1/rate packets
+	before forwarding them to real servers. If the rate is 1, then
+	drop all the incoming packets.
+
+	The value definition is the same as that of the drop_entry. In
+	the automatic mode, the rate is determined by the follow
+	formula: rate = amemthresh / (amemthresh - available_memory)
+	when available memory is less than the available memory
+	threshold. When the mode 3 is set, the always mode drop rate
+	is controlled by the /proc/sys/net/ipv4/vs/am_droprate.
+
+expire_nodest_conn - BOOLEAN
+	- 0 - disabled (default)
+	- not 0 - enabled
+
+	The default value is 0, the load balancer will silently drop
+	packets when its destination server is not available. It may
+	be useful, when user-space monitoring program deletes the
+	destination server (because of server overload or wrong
+	detection) and add back the server later, and the connections
+	to the server can continue.
+
+	If this feature is enabled, the load balancer will expire the
+	connection immediately when a packet arrives and its
+	destination server is not available, then the client program
+	will be notified that the connection is closed. This is
+	equivalent to the feature some people requires to flush
+	connections when its destination is not available.
+
+expire_quiescent_template - BOOLEAN
+	- 0 - disabled (default)
+	- not 0 - enabled
+
+	When set to a non-zero value, the load balancer will expire
+	persistent templates when the destination server is quiescent.
+	This may be useful, when a user makes a destination server
+	quiescent by setting its weight to 0 and it is desired that
+	subsequent otherwise persistent connections are sent to a
+	different destination server.  By default new persistent
+	connections are allowed to quiescent destination servers.
+
+	If this feature is enabled, the load balancer will expire the
+	persistence template if it is to be used to schedule a new
+	connection and the destination server is quiescent.
+
+ignore_tunneled - BOOLEAN
+	- 0 - disabled (default)
+	- not 0 - enabled
+
+	If set, ipvs will set the ipvs_property on all packets which are of
+	unrecognized protocols.  This prevents us from routing tunneled
+	protocols like ipip, which is useful to prevent rescheduling
+	packets that have been tunneled to the ipvs host (i.e. to prevent
+	ipvs routing loops when ipvs is also acting as a real server).
+
+nat_icmp_send - BOOLEAN
+	- 0 - disabled (default)
+	- not 0 - enabled
+
+	It controls sending icmp error messages (ICMP_DEST_UNREACH)
+	for VS/NAT when the load balancer receives packets from real
+	servers but the connection entries don't exist.
+
+pmtu_disc - BOOLEAN
+	- 0 - disabled
+	- not 0 - enabled (default)
+
+	By default, reject with FRAG_NEEDED all DF packets that exceed
+	the PMTU, irrespective of the forwarding method. For TUN method
+	the flag can be disabled to fragment such packets.
+
+secure_tcp - INTEGER
+	- 0  - disabled (default)
+
+	The secure_tcp defense is to use a more complicated TCP state
+	transition table. For VS/NAT, it also delays entering the
+	TCP ESTABLISHED state until the three way handshake is completed.
+
+	The value definition is the same as that of drop_entry and
+	drop_packet.
+
+sync_threshold - vector of 2 INTEGERs: sync_threshold, sync_period
+	default 3 50
+
+	It sets synchronization threshold, which is the minimum number
+	of incoming packets that a connection needs to receive before
+	the connection will be synchronized. A connection will be
+	synchronized, every time the number of its incoming packets
+	modulus sync_period equals the threshold. The range of the
+	threshold is from 0 to sync_period.
+
+	When sync_period and sync_refresh_period are 0, send sync only
+	for state changes or only once when pkts matches sync_threshold
+
+sync_refresh_period - UNSIGNED INTEGER
+	default 0
+
+	In seconds, difference in reported connection timer that triggers
+	new sync message. It can be used to avoid sync messages for the
+	specified period (or half of the connection timeout if it is lower)
+	if connection state is not changed since last sync.
+
+	This is useful for normal connections with high traffic to reduce
+	sync rate. Additionally, retry sync_retries times with period of
+	sync_refresh_period/8.
+
+sync_retries - INTEGER
+	default 0
+
+	Defines sync retries with period of sync_refresh_period/8. Useful
+	to protect against loss of sync messages. The range of the
+	sync_retries is from 0 to 3.
+
+sync_qlen_max - UNSIGNED LONG
+
+	Hard limit for queued sync messages that are not sent yet. It
+	defaults to 1/32 of the memory pages but actually represents
+	number of messages. It will protect us from allocating large
+	parts of memory when the sending rate is lower than the queuing
+	rate.
+
+sync_sock_size - INTEGER
+	default 0
+
+	Configuration of SNDBUF (master) or RCVBUF (slave) socket limit.
+	Default value is 0 (preserve system defaults).
+
+sync_ports - INTEGER
+	default 1
+
+	The number of threads that master and backup servers can use for
+	sync traffic. Every thread will use single UDP port, thread 0 will
+	use the default port 8848 while last thread will use port
+	8848+sync_ports-1.
+
+snat_reroute - BOOLEAN
+	- 0 - disabled
+	- not 0 - enabled (default)
+
+	If enabled, recalculate the route of SNATed packets from
+	realservers so that they are routed as if they originate from the
+	director. Otherwise they are routed as if they are forwarded by the
+	director.
+
+	If policy routing is in effect then it is possible that the route
+	of a packet originating from a director is routed differently to a
+	packet being forwarded by the director.
+
+	If policy routing is not in effect then the recalculated route will
+	always be the same as the original route so it is an optimisation
+	to disable snat_reroute and avoid the recalculation.
+
+sync_persist_mode - INTEGER
+	default 0
+
+	Controls the synchronisation of connections when using persistence
+
+	0: All types of connections are synchronised
+
+	1: Attempt to reduce the synchronisation traffic depending on
+	the connection type. For persistent services avoid synchronisation
+	for normal connections, do it only for persistence templates.
+	In such case, for TCP and SCTP it may need enabling sloppy_tcp and
+	sloppy_sctp flags on backup servers. For non-persistent services
+	such optimization is not applied, mode 0 is assumed.
+
+sync_version - INTEGER
+	default 1
+
+	The version of the synchronisation protocol used when sending
+	synchronisation messages.
+
+	0 selects the original synchronisation protocol (version 0). This
+	should be used when sending synchronisation messages to a legacy
+	system that only understands the original synchronisation protocol.
+
+	1 selects the current synchronisation protocol (version 1). This
+	should be used where possible.
+
+	Kernels with this sync_version entry are able to receive messages
+	of both version 1 and version 2 of the synchronisation protocol.
diff --git a/Documentation/networking/ipvs-sysctl.txt b/Documentation/networking/ipvs-sysctl.txt
deleted file mode 100644
index 056898685d40..000000000000
--- a/Documentation/networking/ipvs-sysctl.txt
+++ /dev/null
@@ -1,294 +0,0 @@
-/proc/sys/net/ipv4/vs/* Variables:
-
-am_droprate - INTEGER
-        default 10
-
-        It sets the always mode drop rate, which is used in the mode 3
-        of the drop_rate defense.
-
-amemthresh - INTEGER
-        default 1024
-
-        It sets the available memory threshold (in pages), which is
-        used in the automatic modes of defense. When there is no
-        enough available memory, the respective strategy will be
-        enabled and the variable is automatically set to 2, otherwise
-        the strategy is disabled and the variable is  set  to 1.
-
-backup_only - BOOLEAN
-	0 - disabled (default)
-	not 0 - enabled
-
-	If set, disable the director function while the server is
-	in backup mode to avoid packet loops for DR/TUN methods.
-
-conn_reuse_mode - INTEGER
-	1 - default
-
-	Controls how ipvs will deal with connections that are detected
-	port reuse. It is a bitmap, with the values being:
-
-	0: disable any special handling on port reuse. The new
-	connection will be delivered to the same real server that was
-	servicing the previous connection. This will effectively
-	disable expire_nodest_conn.
-
-	bit 1: enable rescheduling of new connections when it is safe.
-	That is, whenever expire_nodest_conn and for TCP sockets, when
-	the connection is in TIME_WAIT state (which is only possible if
-	you use NAT mode).
-
-	bit 2: it is bit 1 plus, for TCP connections, when connections
-	are in FIN_WAIT state, as this is the last state seen by load
-	balancer in Direct Routing mode. This bit helps on adding new
-	real servers to a very busy cluster.
-
-conntrack - BOOLEAN
-	0 - disabled (default)
-	not 0 - enabled
-
-	If set, maintain connection tracking entries for
-	connections handled by IPVS.
-
-	This should be enabled if connections handled by IPVS are to be
-	also handled by stateful firewall rules. That is, iptables rules
-	that make use of connection tracking.  It is a performance
-	optimisation to disable this setting otherwise.
-
-	Connections handled by the IPVS FTP application module
-	will have connection tracking entries regardless of this setting.
-
-	Only available when IPVS is compiled with CONFIG_IP_VS_NFCT enabled.
-
-cache_bypass - BOOLEAN
-        0 - disabled (default)
-        not 0 - enabled
-
-        If it is enabled, forward packets to the original destination
-        directly when no cache server is available and destination
-        address is not local (iph->daddr is RTN_UNICAST). It is mostly
-        used in transparent web cache cluster.
-
-debug_level - INTEGER
-	0          - transmission error messages (default)
-	1          - non-fatal error messages
-	2          - configuration
-	3          - destination trash
-	4          - drop entry
-	5          - service lookup
-	6          - scheduling
-	7          - connection new/expire, lookup and synchronization
-	8          - state transition
-	9          - binding destination, template checks and applications
-	10         - IPVS packet transmission
-	11         - IPVS packet handling (ip_vs_in/ip_vs_out)
-	12 or more - packet traversal
-
-	Only available when IPVS is compiled with CONFIG_IP_VS_DEBUG enabled.
-
-	Higher debugging levels include the messages for lower debugging
-	levels, so setting debug level 2, includes level 0, 1 and 2
-	messages. Thus, logging becomes more and more verbose the higher
-	the level.
-
-drop_entry - INTEGER
-        0  - disabled (default)
-
-        The drop_entry defense is to randomly drop entries in the
-        connection hash table, just in order to collect back some
-        memory for new connections. In the current code, the
-        drop_entry procedure can be activated every second, then it
-        randomly scans 1/32 of the whole and drops entries that are in
-        the SYN-RECV/SYNACK state, which should be effective against
-        syn-flooding attack.
-
-        The valid values of drop_entry are from 0 to 3, where 0 means
-        that this strategy is always disabled, 1 and 2 mean automatic
-        modes (when there is no enough available memory, the strategy
-        is enabled and the variable is automatically set to 2,
-        otherwise the strategy is disabled and the variable is set to
-        1), and 3 means that that the strategy is always enabled.
-
-drop_packet - INTEGER
-        0  - disabled (default)
-
-        The drop_packet defense is designed to drop 1/rate packets
-        before forwarding them to real servers. If the rate is 1, then
-        drop all the incoming packets.
-
-        The value definition is the same as that of the drop_entry. In
-        the automatic mode, the rate is determined by the follow
-        formula: rate = amemthresh / (amemthresh - available_memory)
-        when available memory is less than the available memory
-        threshold. When the mode 3 is set, the always mode drop rate
-        is controlled by the /proc/sys/net/ipv4/vs/am_droprate.
-
-expire_nodest_conn - BOOLEAN
-        0 - disabled (default)
-        not 0 - enabled
-
-        The default value is 0, the load balancer will silently drop
-        packets when its destination server is not available. It may
-        be useful, when user-space monitoring program deletes the
-        destination server (because of server overload or wrong
-        detection) and add back the server later, and the connections
-        to the server can continue.
-
-        If this feature is enabled, the load balancer will expire the
-        connection immediately when a packet arrives and its
-        destination server is not available, then the client program
-        will be notified that the connection is closed. This is
-        equivalent to the feature some people requires to flush
-        connections when its destination is not available.
-
-expire_quiescent_template - BOOLEAN
-	0 - disabled (default)
-	not 0 - enabled
-
-	When set to a non-zero value, the load balancer will expire
-	persistent templates when the destination server is quiescent.
-	This may be useful, when a user makes a destination server
-	quiescent by setting its weight to 0 and it is desired that
-	subsequent otherwise persistent connections are sent to a
-	different destination server.  By default new persistent
-	connections are allowed to quiescent destination servers.
-
-	If this feature is enabled, the load balancer will expire the
-	persistence template if it is to be used to schedule a new
-	connection and the destination server is quiescent.
-
-ignore_tunneled - BOOLEAN
-	0 - disabled (default)
-	not 0 - enabled
-
-	If set, ipvs will set the ipvs_property on all packets which are of
-	unrecognized protocols.  This prevents us from routing tunneled
-	protocols like ipip, which is useful to prevent rescheduling
-	packets that have been tunneled to the ipvs host (i.e. to prevent
-	ipvs routing loops when ipvs is also acting as a real server).
-
-nat_icmp_send - BOOLEAN
-        0 - disabled (default)
-        not 0 - enabled
-
-        It controls sending icmp error messages (ICMP_DEST_UNREACH)
-        for VS/NAT when the load balancer receives packets from real
-        servers but the connection entries don't exist.
-
-pmtu_disc - BOOLEAN
-	0 - disabled
-	not 0 - enabled (default)
-
-	By default, reject with FRAG_NEEDED all DF packets that exceed
-	the PMTU, irrespective of the forwarding method. For TUN method
-	the flag can be disabled to fragment such packets.
-
-secure_tcp - INTEGER
-        0  - disabled (default)
-
-	The secure_tcp defense is to use a more complicated TCP state
-	transition table. For VS/NAT, it also delays entering the
-	TCP ESTABLISHED state until the three way handshake is completed.
-
-        The value definition is the same as that of drop_entry and
-        drop_packet.
-
-sync_threshold - vector of 2 INTEGERs: sync_threshold, sync_period
-	default 3 50
-
-	It sets synchronization threshold, which is the minimum number
-	of incoming packets that a connection needs to receive before
-	the connection will be synchronized. A connection will be
-	synchronized, every time the number of its incoming packets
-	modulus sync_period equals the threshold. The range of the
-	threshold is from 0 to sync_period.
-
-	When sync_period and sync_refresh_period are 0, send sync only
-	for state changes or only once when pkts matches sync_threshold
-
-sync_refresh_period - UNSIGNED INTEGER
-	default 0
-
-	In seconds, difference in reported connection timer that triggers
-	new sync message. It can be used to avoid sync messages for the
-	specified period (or half of the connection timeout if it is lower)
-	if connection state is not changed since last sync.
-
-	This is useful for normal connections with high traffic to reduce
-	sync rate. Additionally, retry sync_retries times with period of
-	sync_refresh_period/8.
-
-sync_retries - INTEGER
-	default 0
-
-	Defines sync retries with period of sync_refresh_period/8. Useful
-	to protect against loss of sync messages. The range of the
-	sync_retries is from 0 to 3.
-
-sync_qlen_max - UNSIGNED LONG
-
-	Hard limit for queued sync messages that are not sent yet. It
-	defaults to 1/32 of the memory pages but actually represents
-	number of messages. It will protect us from allocating large
-	parts of memory when the sending rate is lower than the queuing
-	rate.
-
-sync_sock_size - INTEGER
-	default 0
-
-	Configuration of SNDBUF (master) or RCVBUF (slave) socket limit.
-	Default value is 0 (preserve system defaults).
-
-sync_ports - INTEGER
-	default 1
-
-	The number of threads that master and backup servers can use for
-	sync traffic. Every thread will use single UDP port, thread 0 will
-	use the default port 8848 while last thread will use port
-	8848+sync_ports-1.
-
-snat_reroute - BOOLEAN
-	0 - disabled
-	not 0 - enabled (default)
-
-	If enabled, recalculate the route of SNATed packets from
-	realservers so that they are routed as if they originate from the
-	director. Otherwise they are routed as if they are forwarded by the
-	director.
-
-	If policy routing is in effect then it is possible that the route
-	of a packet originating from a director is routed differently to a
-	packet being forwarded by the director.
-
-	If policy routing is not in effect then the recalculated route will
-	always be the same as the original route so it is an optimisation
-	to disable snat_reroute and avoid the recalculation.
-
-sync_persist_mode - INTEGER
-	default 0
-
-	Controls the synchronisation of connections when using persistence
-
-	0: All types of connections are synchronised
-	1: Attempt to reduce the synchronisation traffic depending on
-	the connection type. For persistent services avoid synchronisation
-	for normal connections, do it only for persistence templates.
-	In such case, for TCP and SCTP it may need enabling sloppy_tcp and
-	sloppy_sctp flags on backup servers. For non-persistent services
-	such optimization is not applied, mode 0 is assumed.
-
-sync_version - INTEGER
-	default 1
-
-	The version of the synchronisation protocol used when sending
-	synchronisation messages.
-
-	0 selects the original synchronisation protocol (version 0). This
-	should be used when sending synchronisation messages to a legacy
-	system that only understands the original synchronisation protocol.
-
-	1 selects the current synchronisation protocol (version 1). This
-	should be used where possible.
-
-	Kernels with this sync_version entry are able to receive messages
-	of both version 1 and version 2 of the synchronisation protocol.
diff --git a/MAINTAINERS b/MAINTAINERS
index df5e4ccc1ccb..3a5f52a3c055 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8934,7 +8934,7 @@ L:	lvs-devel@vger.kernel.org
 S:	Maintained
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/horms/ipvs-next.git
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/horms/ipvs.git
-F:	Documentation/networking/ipvs-sysctl.txt
+F:	Documentation/networking/ipvs-sysctl.rst
 F:	include/net/ip_vs.h
 F:	include/uapi/linux/ip_vs.h
 F:	net/netfilter/ipvs/
-- 
cgit v1.2.3-59-g8ed1b


From b9dd2bea2245dd8ba4f68e801af93e4b38bfe6b0 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Tue, 28 Apr 2020 00:01:53 +0200
Subject: docs: networking: convert kcm.txt to ReST

- add SPDX header;
- adjust titles and chapters, adding proper markups;
- mark code blocks and literals as such;
- adjust identation, whitespaces and blank lines;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst |   1 +
 Documentation/networking/kcm.rst   | 290 +++++++++++++++++++++++++++++++++++++
 Documentation/networking/kcm.txt   | 285 ------------------------------------
 3 files changed, 291 insertions(+), 285 deletions(-)
 create mode 100644 Documentation/networking/kcm.rst
 delete mode 100644 Documentation/networking/kcm.txt

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index bbd4e0041457..e1ff08b94d90 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -73,6 +73,7 @@ Contents:
    ipv6
    ipvlan
    ipvs-sysctl
+   kcm
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/kcm.rst b/Documentation/networking/kcm.rst
new file mode 100644
index 000000000000..db0f5560ac1c
--- /dev/null
+++ b/Documentation/networking/kcm.rst
@@ -0,0 +1,290 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=============================
+Kernel Connection Multiplexor
+=============================
+
+Kernel Connection Multiplexor (KCM) is a mechanism that provides a message based
+interface over TCP for generic application protocols. With KCM an application
+can efficiently send and receive application protocol messages over TCP using
+datagram sockets.
+
+KCM implements an NxM multiplexor in the kernel as diagrammed below::
+
+    +------------+   +------------+   +------------+   +------------+
+    | KCM socket |   | KCM socket |   | KCM socket |   | KCM socket |
+    +------------+   +------------+   +------------+   +------------+
+	|                 |               |                |
+	+-----------+     |               |     +----------+
+		    |     |               |     |
+		+----------------------------------+
+		|           Multiplexor            |
+		+----------------------------------+
+		    |   |           |           |  |
+	+---------+   |           |           |  ------------+
+	|             |           |           |              |
+    +----------+  +----------+  +----------+  +----------+ +----------+
+    |  Psock   |  |  Psock   |  |  Psock   |  |  Psock   | |  Psock   |
+    +----------+  +----------+  +----------+  +----------+ +----------+
+	|              |           |            |             |
+    +----------+  +----------+  +----------+  +----------+ +----------+
+    | TCP sock |  | TCP sock |  | TCP sock |  | TCP sock | | TCP sock |
+    +----------+  +----------+  +----------+  +----------+ +----------+
+
+KCM sockets
+===========
+
+The KCM sockets provide the user interface to the multiplexor. All the KCM sockets
+bound to a multiplexor are considered to have equivalent function, and I/O
+operations in different sockets may be done in parallel without the need for
+synchronization between threads in userspace.
+
+Multiplexor
+===========
+
+The multiplexor provides the message steering. In the transmit path, messages
+written on a KCM socket are sent atomically on an appropriate TCP socket.
+Similarly, in the receive path, messages are constructed on each TCP socket
+(Psock) and complete messages are steered to a KCM socket.
+
+TCP sockets & Psocks
+====================
+
+TCP sockets may be bound to a KCM multiplexor. A Psock structure is allocated
+for each bound TCP socket, this structure holds the state for constructing
+messages on receive as well as other connection specific information for KCM.
+
+Connected mode semantics
+========================
+
+Each multiplexor assumes that all attached TCP connections are to the same
+destination and can use the different connections for load balancing when
+transmitting. The normal send and recv calls (include sendmmsg and recvmmsg)
+can be used to send and receive messages from the KCM socket.
+
+Socket types
+============
+
+KCM supports SOCK_DGRAM and SOCK_SEQPACKET socket types.
+
+Message delineation
+-------------------
+
+Messages are sent over a TCP stream with some application protocol message
+format that typically includes a header which frames the messages. The length
+of a received message can be deduced from the application protocol header
+(often just a simple length field).
+
+A TCP stream must be parsed to determine message boundaries. Berkeley Packet
+Filter (BPF) is used for this. When attaching a TCP socket to a multiplexor a
+BPF program must be specified. The program is called at the start of receiving
+a new message and is given an skbuff that contains the bytes received so far.
+It parses the message header and returns the length of the message. Given this
+information, KCM will construct the message of the stated length and deliver it
+to a KCM socket.
+
+TCP socket management
+---------------------
+
+When a TCP socket is attached to a KCM multiplexor data ready (POLLIN) and
+write space available (POLLOUT) events are handled by the multiplexor. If there
+is a state change (disconnection) or other error on a TCP socket, an error is
+posted on the TCP socket so that a POLLERR event happens and KCM discontinues
+using the socket. When the application gets the error notification for a
+TCP socket, it should unattach the socket from KCM and then handle the error
+condition (the typical response is to close the socket and create a new
+connection if necessary).
+
+KCM limits the maximum receive message size to be the size of the receive
+socket buffer on the attached TCP socket (the socket buffer size can be set by
+SO_RCVBUF). If the length of a new message reported by the BPF program is
+greater than this limit a corresponding error (EMSGSIZE) is posted on the TCP
+socket. The BPF program may also enforce a maximum messages size and report an
+error when it is exceeded.
+
+A timeout may be set for assembling messages on a receive socket. The timeout
+value is taken from the receive timeout of the attached TCP socket (this is set
+by SO_RCVTIMEO). If the timer expires before assembly is complete an error
+(ETIMEDOUT) is posted on the socket.
+
+User interface
+==============
+
+Creating a multiplexor
+----------------------
+
+A new multiplexor and initial KCM socket is created by a socket call::
+
+  socket(AF_KCM, type, protocol)
+
+- type is either SOCK_DGRAM or SOCK_SEQPACKET
+- protocol is KCMPROTO_CONNECTED
+
+Cloning KCM sockets
+-------------------
+
+After the first KCM socket is created using the socket call as described
+above, additional sockets for the multiplexor can be created by cloning
+a KCM socket. This is accomplished by an ioctl on a KCM socket::
+
+  /* From linux/kcm.h */
+  struct kcm_clone {
+	int fd;
+  };
+
+  struct kcm_clone info;
+
+  memset(&info, 0, sizeof(info));
+
+  err = ioctl(kcmfd, SIOCKCMCLONE, &info);
+
+  if (!err)
+    newkcmfd = info.fd;
+
+Attach transport sockets
+------------------------
+
+Attaching of transport sockets to a multiplexor is performed by calling an
+ioctl on a KCM socket for the multiplexor. e.g.::
+
+  /* From linux/kcm.h */
+  struct kcm_attach {
+	int fd;
+	int bpf_fd;
+  };
+
+  struct kcm_attach info;
+
+  memset(&info, 0, sizeof(info));
+
+  info.fd = tcpfd;
+  info.bpf_fd = bpf_prog_fd;
+
+  ioctl(kcmfd, SIOCKCMATTACH, &info);
+
+The kcm_attach structure contains:
+
+  - fd: file descriptor for TCP socket being attached
+  - bpf_prog_fd: file descriptor for compiled BPF program downloaded
+
+Unattach transport sockets
+--------------------------
+
+Unattaching a transport socket from a multiplexor is straightforward. An
+"unattach" ioctl is done with the kcm_unattach structure as the argument::
+
+  /* From linux/kcm.h */
+  struct kcm_unattach {
+	int fd;
+  };
+
+  struct kcm_unattach info;
+
+  memset(&info, 0, sizeof(info));
+
+  info.fd = cfd;
+
+  ioctl(fd, SIOCKCMUNATTACH, &info);
+
+Disabling receive on KCM socket
+-------------------------------
+
+A setsockopt is used to disable or enable receiving on a KCM socket.
+When receive is disabled, any pending messages in the socket's
+receive buffer are moved to other sockets. This feature is useful
+if an application thread knows that it will be doing a lot of
+work on a request and won't be able to service new messages for a
+while. Example use::
+
+  int val = 1;
+
+  setsockopt(kcmfd, SOL_KCM, KCM_RECV_DISABLE, &val, sizeof(val))
+
+BFP programs for message delineation
+------------------------------------
+
+BPF programs can be compiled using the BPF LLVM backend. For example,
+the BPF program for parsing Thrift is::
+
+  #include "bpf.h" /* for __sk_buff */
+  #include "bpf_helpers.h" /* for load_word intrinsic */
+
+  SEC("socket_kcm")
+  int bpf_prog1(struct __sk_buff *skb)
+  {
+       return load_word(skb, 0) + 4;
+  }
+
+  char _license[] SEC("license") = "GPL";
+
+Use in applications
+===================
+
+KCM accelerates application layer protocols. Specifically, it allows
+applications to use a message based interface for sending and receiving
+messages. The kernel provides necessary assurances that messages are sent
+and received atomically. This relieves much of the burden applications have
+in mapping a message based protocol onto the TCP stream. KCM also make
+application layer messages a unit of work in the kernel for the purposes of
+steering and scheduling, which in turn allows a simpler networking model in
+multithreaded applications.
+
+Configurations
+--------------
+
+In an Nx1 configuration, KCM logically provides multiple socket handles
+to the same TCP connection. This allows parallelism between in I/O
+operations on the TCP socket (for instance copyin and copyout of data is
+parallelized). In an application, a KCM socket can be opened for each
+processing thread and inserted into the epoll (similar to how SO_REUSEPORT
+is used to allow multiple listener sockets on the same port).
+
+In a MxN configuration, multiple connections are established to the
+same destination. These are used for simple load balancing.
+
+Message batching
+----------------
+
+The primary purpose of KCM is load balancing between KCM sockets and hence
+threads in a nominal use case. Perfect load balancing, that is steering
+each received message to a different KCM socket or steering each sent
+message to a different TCP socket, can negatively impact performance
+since this doesn't allow for affinities to be established. Balancing
+based on groups, or batches of messages, can be beneficial for performance.
+
+On transmit, there are three ways an application can batch (pipeline)
+messages on a KCM socket.
+
+  1) Send multiple messages in a single sendmmsg.
+  2) Send a group of messages each with a sendmsg call, where all messages
+     except the last have MSG_BATCH in the flags of sendmsg call.
+  3) Create "super message" composed of multiple messages and send this
+     with a single sendmsg.
+
+On receive, the KCM module attempts to queue messages received on the
+same KCM socket during each TCP ready callback. The targeted KCM socket
+changes at each receive ready callback on the KCM socket. The application
+does not need to configure this.
+
+Error handling
+--------------
+
+An application should include a thread to monitor errors raised on
+the TCP connection. Normally, this will be done by placing each
+TCP socket attached to a KCM multiplexor in epoll set for POLLERR
+event. If an error occurs on an attached TCP socket, KCM sets an EPIPE
+on the socket thus waking up the application thread. When the application
+sees the error (which may just be a disconnect) it should unattach the
+socket from KCM and then close it. It is assumed that once an error is
+posted on the TCP socket the data stream is unrecoverable (i.e. an error
+may have occurred in the middle of receiving a message).
+
+TCP connection monitoring
+-------------------------
+
+In KCM there is no means to correlate a message to the TCP socket that
+was used to send or receive the message (except in the case there is
+only one attached TCP socket). However, the application does retain
+an open file descriptor to the socket so it will be able to get statistics
+from the socket which can be used in detecting issues (such as high
+retransmissions on the socket).
diff --git a/Documentation/networking/kcm.txt b/Documentation/networking/kcm.txt
deleted file mode 100644
index b773a5278ac4..000000000000
--- a/Documentation/networking/kcm.txt
+++ /dev/null
@@ -1,285 +0,0 @@
-Kernel Connection Multiplexor
------------------------------
-
-Kernel Connection Multiplexor (KCM) is a mechanism that provides a message based
-interface over TCP for generic application protocols. With KCM an application
-can efficiently send and receive application protocol messages over TCP using
-datagram sockets.
-
-KCM implements an NxM multiplexor in the kernel as diagrammed below:
-
-+------------+   +------------+   +------------+   +------------+
-| KCM socket |   | KCM socket |   | KCM socket |   | KCM socket |
-+------------+   +------------+   +------------+   +------------+
-      |                 |               |                |
-      +-----------+     |               |     +----------+
-                  |     |               |     |
-               +----------------------------------+
-               |           Multiplexor            |
-               +----------------------------------+
-                 |   |           |           |  |
-       +---------+   |           |           |  ------------+
-       |             |           |           |              |
-+----------+  +----------+  +----------+  +----------+ +----------+
-|  Psock   |  |  Psock   |  |  Psock   |  |  Psock   | |  Psock   |
-+----------+  +----------+  +----------+  +----------+ +----------+
-      |              |           |            |             |
-+----------+  +----------+  +----------+  +----------+ +----------+
-| TCP sock |  | TCP sock |  | TCP sock |  | TCP sock | | TCP sock |
-+----------+  +----------+  +----------+  +----------+ +----------+
-
-KCM sockets
------------
-
-The KCM sockets provide the user interface to the multiplexor. All the KCM sockets
-bound to a multiplexor are considered to have equivalent function, and I/O
-operations in different sockets may be done in parallel without the need for
-synchronization between threads in userspace.
-
-Multiplexor
------------
-
-The multiplexor provides the message steering. In the transmit path, messages
-written on a KCM socket are sent atomically on an appropriate TCP socket.
-Similarly, in the receive path, messages are constructed on each TCP socket
-(Psock) and complete messages are steered to a KCM socket.
-
-TCP sockets & Psocks
---------------------
-
-TCP sockets may be bound to a KCM multiplexor. A Psock structure is allocated
-for each bound TCP socket, this structure holds the state for constructing
-messages on receive as well as other connection specific information for KCM.
-
-Connected mode semantics
-------------------------
-
-Each multiplexor assumes that all attached TCP connections are to the same
-destination and can use the different connections for load balancing when
-transmitting. The normal send and recv calls (include sendmmsg and recvmmsg)
-can be used to send and receive messages from the KCM socket.
-
-Socket types
-------------
-
-KCM supports SOCK_DGRAM and SOCK_SEQPACKET socket types.
-
-Message delineation
--------------------
-
-Messages are sent over a TCP stream with some application protocol message
-format that typically includes a header which frames the messages. The length
-of a received message can be deduced from the application protocol header
-(often just a simple length field).
-
-A TCP stream must be parsed to determine message boundaries. Berkeley Packet
-Filter (BPF) is used for this. When attaching a TCP socket to a multiplexor a
-BPF program must be specified. The program is called at the start of receiving
-a new message and is given an skbuff that contains the bytes received so far.
-It parses the message header and returns the length of the message. Given this
-information, KCM will construct the message of the stated length and deliver it
-to a KCM socket.
-
-TCP socket management
----------------------
-
-When a TCP socket is attached to a KCM multiplexor data ready (POLLIN) and
-write space available (POLLOUT) events are handled by the multiplexor. If there
-is a state change (disconnection) or other error on a TCP socket, an error is
-posted on the TCP socket so that a POLLERR event happens and KCM discontinues
-using the socket. When the application gets the error notification for a
-TCP socket, it should unattach the socket from KCM and then handle the error
-condition (the typical response is to close the socket and create a new
-connection if necessary).
-
-KCM limits the maximum receive message size to be the size of the receive
-socket buffer on the attached TCP socket (the socket buffer size can be set by
-SO_RCVBUF). If the length of a new message reported by the BPF program is
-greater than this limit a corresponding error (EMSGSIZE) is posted on the TCP
-socket. The BPF program may also enforce a maximum messages size and report an
-error when it is exceeded.
-
-A timeout may be set for assembling messages on a receive socket. The timeout
-value is taken from the receive timeout of the attached TCP socket (this is set
-by SO_RCVTIMEO). If the timer expires before assembly is complete an error
-(ETIMEDOUT) is posted on the socket.
-
-User interface
-==============
-
-Creating a multiplexor
-----------------------
-
-A new multiplexor and initial KCM socket is created by a socket call:
-
-  socket(AF_KCM, type, protocol)
-
-  - type is either SOCK_DGRAM or SOCK_SEQPACKET
-  - protocol is KCMPROTO_CONNECTED
-
-Cloning KCM sockets
--------------------
-
-After the first KCM socket is created using the socket call as described
-above, additional sockets for the multiplexor can be created by cloning
-a KCM socket. This is accomplished by an ioctl on a KCM socket:
-
-  /* From linux/kcm.h */
-  struct kcm_clone {
-        int fd;
-  };
-
-  struct kcm_clone info;
-
-  memset(&info, 0, sizeof(info));
-
-  err = ioctl(kcmfd, SIOCKCMCLONE, &info);
-
-  if (!err)
-    newkcmfd = info.fd;
-
-Attach transport sockets
-------------------------
-
-Attaching of transport sockets to a multiplexor is performed by calling an
-ioctl on a KCM socket for the multiplexor. e.g.:
-
-  /* From linux/kcm.h */
-  struct kcm_attach {
-        int fd;
-	int bpf_fd;
-  };
-
-  struct kcm_attach info;
-
-  memset(&info, 0, sizeof(info));
-
-  info.fd = tcpfd;
-  info.bpf_fd = bpf_prog_fd;
-
-  ioctl(kcmfd, SIOCKCMATTACH, &info);
-
-The kcm_attach structure contains:
-  fd: file descriptor for TCP socket being attached
-  bpf_prog_fd: file descriptor for compiled BPF program downloaded
-
-Unattach transport sockets
---------------------------
-
-Unattaching a transport socket from a multiplexor is straightforward. An
-"unattach" ioctl is done with the kcm_unattach structure as the argument:
-
-  /* From linux/kcm.h */
-  struct kcm_unattach {
-        int fd;
-  };
-
-  struct kcm_unattach info;
-
-  memset(&info, 0, sizeof(info));
-
-  info.fd = cfd;
-
-  ioctl(fd, SIOCKCMUNATTACH, &info);
-
-Disabling receive on KCM socket
--------------------------------
-
-A setsockopt is used to disable or enable receiving on a KCM socket.
-When receive is disabled, any pending messages in the socket's
-receive buffer are moved to other sockets. This feature is useful
-if an application thread knows that it will be doing a lot of
-work on a request and won't be able to service new messages for a
-while. Example use:
-
-  int val = 1;
-
-  setsockopt(kcmfd, SOL_KCM, KCM_RECV_DISABLE, &val, sizeof(val))
-
-BFP programs for message delineation
-------------------------------------
-
-BPF programs can be compiled using the BPF LLVM backend. For example,
-the BPF program for parsing Thrift is:
-
-  #include "bpf.h" /* for __sk_buff */
-  #include "bpf_helpers.h" /* for load_word intrinsic */
-
-  SEC("socket_kcm")
-  int bpf_prog1(struct __sk_buff *skb)
-  {
-       return load_word(skb, 0) + 4;
-  }
-
-  char _license[] SEC("license") = "GPL";
-
-Use in applications
-===================
-
-KCM accelerates application layer protocols. Specifically, it allows
-applications to use a message based interface for sending and receiving
-messages. The kernel provides necessary assurances that messages are sent
-and received atomically. This relieves much of the burden applications have
-in mapping a message based protocol onto the TCP stream. KCM also make
-application layer messages a unit of work in the kernel for the purposes of
-steering and scheduling, which in turn allows a simpler networking model in
-multithreaded applications.
-
-Configurations
---------------
-
-In an Nx1 configuration, KCM logically provides multiple socket handles
-to the same TCP connection. This allows parallelism between in I/O
-operations on the TCP socket (for instance copyin and copyout of data is
-parallelized). In an application, a KCM socket can be opened for each
-processing thread and inserted into the epoll (similar to how SO_REUSEPORT
-is used to allow multiple listener sockets on the same port).
-
-In a MxN configuration, multiple connections are established to the
-same destination. These are used for simple load balancing.
-
-Message batching
-----------------
-
-The primary purpose of KCM is load balancing between KCM sockets and hence
-threads in a nominal use case. Perfect load balancing, that is steering
-each received message to a different KCM socket or steering each sent
-message to a different TCP socket, can negatively impact performance
-since this doesn't allow for affinities to be established. Balancing
-based on groups, or batches of messages, can be beneficial for performance.
-
-On transmit, there are three ways an application can batch (pipeline)
-messages on a KCM socket.
-  1) Send multiple messages in a single sendmmsg.
-  2) Send a group of messages each with a sendmsg call, where all messages
-     except the last have MSG_BATCH in the flags of sendmsg call.
-  3) Create "super message" composed of multiple messages and send this
-     with a single sendmsg.
-
-On receive, the KCM module attempts to queue messages received on the
-same KCM socket during each TCP ready callback. The targeted KCM socket
-changes at each receive ready callback on the KCM socket. The application
-does not need to configure this.
-
-Error handling
---------------
-
-An application should include a thread to monitor errors raised on
-the TCP connection. Normally, this will be done by placing each
-TCP socket attached to a KCM multiplexor in epoll set for POLLERR
-event. If an error occurs on an attached TCP socket, KCM sets an EPIPE
-on the socket thus waking up the application thread. When the application
-sees the error (which may just be a disconnect) it should unattach the
-socket from KCM and then close it. It is assumed that once an error is
-posted on the TCP socket the data stream is unrecoverable (i.e. an error
-may have occurred in the middle of receiving a message).
-
-TCP connection monitoring
--------------------------
-
-In KCM there is no means to correlate a message to the TCP socket that
-was used to send or receive the message (except in the case there is
-only one attached TCP socket). However, the application does retain
-an open file descriptor to the socket so it will be able to get statistics
-from the socket which can be used in detecting issues (such as high
-retransmissions on the socket).
-- 
cgit v1.2.3-59-g8ed1b


From 9b329d0dbe413bf46eb5010edd06b3076960a60a Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Tue, 28 Apr 2020 15:30:48 -0700
Subject: selftests/bpf: fix test_sysctl_prog with alu32

Similar to commit b7a0d65d80a0 ("bpf, testing: Workaround a verifier failure for test_progs")
fix test_sysctl_prog.c as well.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/progs/test_sysctl_prog.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
index 2d0b0b82a78a..50525235380e 100644
--- a/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
+++ b/tools/testing/selftests/bpf/progs/test_sysctl_prog.c
@@ -45,7 +45,7 @@ int sysctl_tcp_mem(struct bpf_sysctl *ctx)
 	unsigned long tcp_mem[3] = {0, 0, 0};
 	char value[MAX_VALUE_STR_LEN];
 	unsigned char i, off = 0;
-	int ret;
+	volatile int ret;
 
 	if (ctx->write)
 		return 0;
-- 
cgit v1.2.3-59-g8ed1b


From f9d041271cf44ca02eed0cc82e1a6d8c814c53ed Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 28 Apr 2020 17:16:05 -0700
Subject: bpf: Refactor bpf_link update handling

Make bpf_link update support more generic by making it into another
bpf_link_ops methods. This allows generic syscall handling code to be agnostic
to various conditionally compiled features (e.g., the case of
CONFIG_CGROUP_BPF). This also allows to keep link type-specific code to remain
static within respective code base. Refactor existing bpf_cgroup_link code and
take advantage of this.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200429001614.1544-2-andriin@fb.com
---
 include/linux/bpf-cgroup.h | 12 ------------
 include/linux/bpf.h        |  3 ++-
 kernel/bpf/cgroup.c        | 30 ++++++++++++++++++++++++++++--
 kernel/bpf/syscall.c       | 11 ++++-------
 kernel/cgroup/cgroup.c     | 27 ---------------------------
 5 files changed, 34 insertions(+), 49 deletions(-)

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 0b41fd5fc96b..a9cb9a5bf8e9 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -100,8 +100,6 @@ int __cgroup_bpf_attach(struct cgroup *cgrp,
 int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
 			struct bpf_cgroup_link *link,
 			enum bpf_attach_type type);
-int __cgroup_bpf_replace(struct cgroup *cgrp, struct bpf_cgroup_link *link,
-			 struct bpf_prog *new_prog);
 int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
 		       union bpf_attr __user *uattr);
 
@@ -112,8 +110,6 @@ int cgroup_bpf_attach(struct cgroup *cgrp,
 		      u32 flags);
 int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
 		      enum bpf_attach_type type);
-int cgroup_bpf_replace(struct bpf_link *link, struct bpf_prog *old_prog,
-		       struct bpf_prog *new_prog);
 int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr,
 		     union bpf_attr __user *uattr);
 
@@ -353,7 +349,6 @@ int cgroup_bpf_prog_query(const union bpf_attr *attr,
 #else
 
 struct bpf_prog;
-struct bpf_link;
 struct cgroup_bpf {};
 static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; }
 static inline void cgroup_bpf_offline(struct cgroup *cgrp) {}
@@ -377,13 +372,6 @@ static inline int cgroup_bpf_link_attach(const union bpf_attr *attr,
 	return -EINVAL;
 }
 
-static inline int cgroup_bpf_replace(struct bpf_link *link,
-				     struct bpf_prog *old_prog,
-				     struct bpf_prog *new_prog)
-{
-	return -EINVAL;
-}
-
 static inline int cgroup_bpf_prog_query(const union bpf_attr *attr,
 					union bpf_attr __user *uattr)
 {
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 10960cfabea4..81c8620cb4c4 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1093,7 +1093,8 @@ struct bpf_link {
 struct bpf_link_ops {
 	void (*release)(struct bpf_link *link);
 	void (*dealloc)(struct bpf_link *link);
-
+	int (*update_prog)(struct bpf_link *link, struct bpf_prog *new_prog,
+			   struct bpf_prog *old_prog);
 };
 
 void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops,
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index bf634959885c..da6e48e802b2 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -557,8 +557,9 @@ found:
  *
  * Must be called with cgroup_mutex held.
  */
-int __cgroup_bpf_replace(struct cgroup *cgrp, struct bpf_cgroup_link *link,
-			 struct bpf_prog *new_prog)
+static int __cgroup_bpf_replace(struct cgroup *cgrp,
+				struct bpf_cgroup_link *link,
+				struct bpf_prog *new_prog)
 {
 	struct list_head *progs = &cgrp->bpf.progs[link->type];
 	struct bpf_prog *old_prog;
@@ -583,6 +584,30 @@ int __cgroup_bpf_replace(struct cgroup *cgrp, struct bpf_cgroup_link *link,
 	return 0;
 }
 
+static int cgroup_bpf_replace(struct bpf_link *link, struct bpf_prog *new_prog,
+			      struct bpf_prog *old_prog)
+{
+	struct bpf_cgroup_link *cg_link;
+	int ret;
+
+	cg_link = container_of(link, struct bpf_cgroup_link, link);
+
+	mutex_lock(&cgroup_mutex);
+	/* link might have been auto-released by dying cgroup, so fail */
+	if (!cg_link->cgroup) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+	if (old_prog && link->prog != old_prog) {
+		ret = -EPERM;
+		goto out_unlock;
+	}
+	ret = __cgroup_bpf_replace(cg_link->cgroup, cg_link, new_prog);
+out_unlock:
+	mutex_unlock(&cgroup_mutex);
+	return ret;
+}
+
 static struct bpf_prog_list *find_detach_entry(struct list_head *progs,
 					       struct bpf_prog *prog,
 					       struct bpf_cgroup_link *link,
@@ -811,6 +836,7 @@ static void bpf_cgroup_link_dealloc(struct bpf_link *link)
 const struct bpf_link_ops bpf_cgroup_link_lops = {
 	.release = bpf_cgroup_link_release,
 	.dealloc = bpf_cgroup_link_dealloc,
+	.update_prog = cgroup_bpf_replace,
 };
 
 int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 7626b8024471..f5358e1462eb 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3645,13 +3645,10 @@ static int link_update(union bpf_attr *attr)
 		goto out_put_progs;
 	}
 
-#ifdef CONFIG_CGROUP_BPF
-	if (link->ops == &bpf_cgroup_link_lops) {
-		ret = cgroup_bpf_replace(link, old_prog, new_prog);
-		goto out_put_progs;
-	}
-#endif
-	ret = -EINVAL;
+	if (link->ops->update_prog)
+		ret = link->ops->update_prog(link, new_prog, old_prog);
+	else
+		ret = EINVAL;
 
 out_put_progs:
 	if (old_prog)
diff --git a/kernel/cgroup/cgroup.c b/kernel/cgroup/cgroup.c
index 06b5ea9d899d..557a9b9d2244 100644
--- a/kernel/cgroup/cgroup.c
+++ b/kernel/cgroup/cgroup.c
@@ -6508,33 +6508,6 @@ int cgroup_bpf_attach(struct cgroup *cgrp,
 	return ret;
 }
 
-int cgroup_bpf_replace(struct bpf_link *link, struct bpf_prog *old_prog,
-		       struct bpf_prog *new_prog)
-{
-	struct bpf_cgroup_link *cg_link;
-	int ret;
-
-	if (link->ops != &bpf_cgroup_link_lops)
-		return -EINVAL;
-
-	cg_link = container_of(link, struct bpf_cgroup_link, link);
-
-	mutex_lock(&cgroup_mutex);
-	/* link might have been auto-released by dying cgroup, so fail */
-	if (!cg_link->cgroup) {
-		ret = -EINVAL;
-		goto out_unlock;
-	}
-	if (old_prog && link->prog != old_prog) {
-		ret = -EPERM;
-		goto out_unlock;
-	}
-	ret = __cgroup_bpf_replace(cg_link->cgroup, cg_link, new_prog);
-out_unlock:
-	mutex_unlock(&cgroup_mutex);
-	return ret;
-}
-
 int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog,
 		      enum bpf_attach_type type)
 {
-- 
cgit v1.2.3-59-g8ed1b


From a3b80e1078943dc12553166fb08e258463dec013 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 28 Apr 2020 17:16:06 -0700
Subject: bpf: Allocate ID for bpf_link

Generate ID for each bpf_link using IDR, similarly to bpf_map and bpf_prog.
bpf_link creation, initialization, attachment, and exposing to user-space
through FD and ID is a complicated multi-step process, abstract it away
through bpf_link_primer and bpf_link_prime(), bpf_link_settle(), and
bpf_link_cleanup() internal API. They guarantee that until bpf_link is
properly attached, user-space won't be able to access partially-initialized
bpf_link either from FD or ID. All this allows to simplify bpf_link attachment
and error handling code.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200429001614.1544-3-andriin@fb.com
---
 include/linux/bpf.h      |  17 ++++--
 include/uapi/linux/bpf.h |   1 +
 kernel/bpf/cgroup.c      |  14 ++---
 kernel/bpf/syscall.c     | 143 ++++++++++++++++++++++++++++++++---------------
 4 files changed, 118 insertions(+), 57 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 81c8620cb4c4..875d1f0af803 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1085,11 +1085,19 @@ int bpf_prog_new_fd(struct bpf_prog *prog);
 
 struct bpf_link {
 	atomic64_t refcnt;
+	u32 id;
 	const struct bpf_link_ops *ops;
 	struct bpf_prog *prog;
 	struct work_struct work;
 };
 
+struct bpf_link_primer {
+	struct bpf_link *link;
+	struct file *file;
+	int fd;
+	u32 id;
+};
+
 struct bpf_link_ops {
 	void (*release)(struct bpf_link *link);
 	void (*dealloc)(struct bpf_link *link);
@@ -1097,10 +1105,11 @@ struct bpf_link_ops {
 			   struct bpf_prog *old_prog);
 };
 
-void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops,
-		   struct bpf_prog *prog);
-void bpf_link_cleanup(struct bpf_link *link, struct file *link_file,
-		      int link_fd);
+void bpf_link_init(struct bpf_link *link,
+		   const struct bpf_link_ops *ops, struct bpf_prog *prog);
+int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer);
+int bpf_link_settle(struct bpf_link_primer *primer);
+void bpf_link_cleanup(struct bpf_link_primer *primer);
 void bpf_link_inc(struct bpf_link *link);
 void bpf_link_put(struct bpf_link *link);
 int bpf_link_new_fd(struct bpf_link *link);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 4a6c47f3febe..6121aa487465 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -523,6 +523,7 @@ union bpf_attr {
 			__u32		prog_id;
 			__u32		map_id;
 			__u32		btf_id;
+			__u32		link_id;
 		};
 		__u32		next_id;
 		__u32		open_flags;
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index da6e48e802b2..1bdf37fca879 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -841,10 +841,10 @@ const struct bpf_link_ops bpf_cgroup_link_lops = {
 
 int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
 {
+	struct bpf_link_primer link_primer;
 	struct bpf_cgroup_link *link;
-	struct file *link_file;
 	struct cgroup *cgrp;
-	int err, link_fd;
+	int err;
 
 	if (attr->link_create.flags)
 		return -EINVAL;
@@ -862,22 +862,20 @@ int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
 	link->cgroup = cgrp;
 	link->type = attr->link_create.attach_type;
 
-	link_file = bpf_link_new_file(&link->link, &link_fd);
-	if (IS_ERR(link_file)) {
+	err  = bpf_link_prime(&link->link, &link_primer);
+	if (err) {
 		kfree(link);
-		err = PTR_ERR(link_file);
 		goto out_put_cgroup;
 	}
 
 	err = cgroup_bpf_attach(cgrp, NULL, NULL, link, link->type,
 				BPF_F_ALLOW_MULTI);
 	if (err) {
-		bpf_link_cleanup(&link->link, link_file, link_fd);
+		bpf_link_cleanup(&link_primer);
 		goto out_put_cgroup;
 	}
 
-	fd_install(link_fd, link_file);
-	return link_fd;
+	return bpf_link_settle(&link_primer);
 
 out_put_cgroup:
 	cgroup_put(cgrp);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index f5358e1462eb..5439e05e3d25 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -42,6 +42,8 @@ static DEFINE_IDR(prog_idr);
 static DEFINE_SPINLOCK(prog_idr_lock);
 static DEFINE_IDR(map_idr);
 static DEFINE_SPINLOCK(map_idr_lock);
+static DEFINE_IDR(link_idr);
+static DEFINE_SPINLOCK(link_idr_lock);
 
 int sysctl_unprivileged_bpf_disabled __read_mostly;
 
@@ -2181,25 +2183,38 @@ static int bpf_obj_get(const union bpf_attr *attr)
 				attr->file_flags);
 }
 
-void bpf_link_init(struct bpf_link *link, const struct bpf_link_ops *ops,
-		   struct bpf_prog *prog)
+void bpf_link_init(struct bpf_link *link,
+		   const struct bpf_link_ops *ops, struct bpf_prog *prog)
 {
 	atomic64_set(&link->refcnt, 1);
+	link->id = 0;
 	link->ops = ops;
 	link->prog = prog;
 }
 
+static void bpf_link_free_id(int id)
+{
+	if (!id)
+		return;
+
+	spin_lock_bh(&link_idr_lock);
+	idr_remove(&link_idr, id);
+	spin_unlock_bh(&link_idr_lock);
+}
+
 /* Clean up bpf_link and corresponding anon_inode file and FD. After
  * anon_inode is created, bpf_link can't be just kfree()'d due to deferred
- * anon_inode's release() call. This helper manages marking bpf_link as
- * defunct, releases anon_inode file and puts reserved FD.
+ * anon_inode's release() call. This helper marksbpf_link as
+ * defunct, releases anon_inode file and puts reserved FD. bpf_prog's refcnt
+ * is not decremented, it's the responsibility of a calling code that failed
+ * to complete bpf_link initialization.
  */
-void bpf_link_cleanup(struct bpf_link *link, struct file *link_file,
-		      int link_fd)
+void bpf_link_cleanup(struct bpf_link_primer *primer)
 {
-	link->prog = NULL;
-	fput(link_file);
-	put_unused_fd(link_fd);
+	primer->link->prog = NULL;
+	bpf_link_free_id(primer->id);
+	fput(primer->file);
+	put_unused_fd(primer->fd);
 }
 
 void bpf_link_inc(struct bpf_link *link)
@@ -2210,6 +2225,7 @@ void bpf_link_inc(struct bpf_link *link)
 /* bpf_link_free is guaranteed to be called from process context */
 static void bpf_link_free(struct bpf_link *link)
 {
+	bpf_link_free_id(link->id);
 	if (link->prog) {
 		/* detach BPF program, clean up used resources */
 		link->ops->release(link);
@@ -2275,9 +2291,11 @@ static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp)
 	bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
 	seq_printf(m,
 		   "link_type:\t%s\n"
+		   "link_id:\t%u\n"
 		   "prog_tag:\t%s\n"
 		   "prog_id:\t%u\n",
 		   link_type,
+		   link->id,
 		   prog_tag,
 		   prog->aux->id);
 }
@@ -2292,36 +2310,76 @@ static const struct file_operations bpf_link_fops = {
 	.write		= bpf_dummy_write,
 };
 
-int bpf_link_new_fd(struct bpf_link *link)
+static int bpf_link_alloc_id(struct bpf_link *link)
 {
-	return anon_inode_getfd("bpf-link", &bpf_link_fops, link, O_CLOEXEC);
-}
+	int id;
+
+	idr_preload(GFP_KERNEL);
+	spin_lock_bh(&link_idr_lock);
+	id = idr_alloc_cyclic(&link_idr, link, 1, INT_MAX, GFP_ATOMIC);
+	spin_unlock_bh(&link_idr_lock);
+	idr_preload_end();
 
-/* Similar to bpf_link_new_fd, create anon_inode for given bpf_link, but
- * instead of immediately installing fd in fdtable, just reserve it and
- * return. Caller then need to either install it with fd_install(fd, file) or
- * release with put_unused_fd(fd).
- * This is useful for cases when bpf_link attachment/detachment are
- * complicated and expensive operations and should be delayed until all the fd
- * reservation and anon_inode creation succeeds.
+	return id;
+}
+
+/* Prepare bpf_link to be exposed to user-space by allocating anon_inode file,
+ * reserving unused FD and allocating ID from link_idr. This is to be paired
+ * with bpf_link_settle() to install FD and ID and expose bpf_link to
+ * user-space, if bpf_link is successfully attached. If not, bpf_link and
+ * pre-allocated resources are to be freed with bpf_cleanup() call. All the
+ * transient state is passed around in struct bpf_link_primer.
+ * This is preferred way to create and initialize bpf_link, especially when
+ * there are complicated and expensive operations inbetween creating bpf_link
+ * itself and attaching it to BPF hook. By using bpf_link_prime() and
+ * bpf_link_settle() kernel code using bpf_link doesn't have to perform
+ * expensive (and potentially failing) roll back operations in a rare case
+ * that file, FD, or ID can't be allocated.
  */
-struct file *bpf_link_new_file(struct bpf_link *link, int *reserved_fd)
+int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer)
 {
 	struct file *file;
-	int fd;
+	int fd, id;
 
 	fd = get_unused_fd_flags(O_CLOEXEC);
 	if (fd < 0)
-		return ERR_PTR(fd);
+		return fd;
 
 	file = anon_inode_getfile("bpf_link", &bpf_link_fops, link, O_CLOEXEC);
 	if (IS_ERR(file)) {
 		put_unused_fd(fd);
-		return file;
+		return PTR_ERR(file);
 	}
 
-	*reserved_fd = fd;
-	return file;
+	id = bpf_link_alloc_id(link);
+	if (id < 0) {
+		put_unused_fd(fd);
+		fput(file);
+		return id;
+	}
+
+	primer->link = link;
+	primer->file = file;
+	primer->fd = fd;
+	primer->id = id;
+	return 0;
+}
+
+int bpf_link_settle(struct bpf_link_primer *primer)
+{
+	/* make bpf_link fetchable by ID */
+	spin_lock_bh(&link_idr_lock);
+	primer->link->id = primer->id;
+	spin_unlock_bh(&link_idr_lock);
+	/* make bpf_link fetchable by FD */
+	fd_install(primer->fd, primer->file);
+	/* pass through installed FD */
+	return primer->fd;
+}
+
+int bpf_link_new_fd(struct bpf_link *link)
+{
+	return anon_inode_getfd("bpf-link", &bpf_link_fops, link, O_CLOEXEC);
 }
 
 struct bpf_link *bpf_link_get_from_fd(u32 ufd)
@@ -2367,9 +2425,9 @@ static const struct bpf_link_ops bpf_tracing_link_lops = {
 
 static int bpf_tracing_prog_attach(struct bpf_prog *prog)
 {
+	struct bpf_link_primer link_primer;
 	struct bpf_tracing_link *link;
-	struct file *link_file;
-	int link_fd, err;
+	int err;
 
 	switch (prog->type) {
 	case BPF_PROG_TYPE_TRACING:
@@ -2404,22 +2462,19 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog)
 	}
 	bpf_link_init(&link->link, &bpf_tracing_link_lops, prog);
 
-	link_file = bpf_link_new_file(&link->link, &link_fd);
-	if (IS_ERR(link_file)) {
+	err = bpf_link_prime(&link->link, &link_primer);
+	if (err) {
 		kfree(link);
-		err = PTR_ERR(link_file);
 		goto out_put_prog;
 	}
 
 	err = bpf_trampoline_link_prog(prog);
 	if (err) {
-		bpf_link_cleanup(&link->link, link_file, link_fd);
+		bpf_link_cleanup(&link_primer);
 		goto out_put_prog;
 	}
 
-	fd_install(link_fd, link_file);
-	return link_fd;
-
+	return bpf_link_settle(&link_primer);
 out_put_prog:
 	bpf_prog_put(prog);
 	return err;
@@ -2447,7 +2502,7 @@ static void bpf_raw_tp_link_dealloc(struct bpf_link *link)
 	kfree(raw_tp);
 }
 
-static const struct bpf_link_ops bpf_raw_tp_lops = {
+static const struct bpf_link_ops bpf_raw_tp_link_lops = {
 	.release = bpf_raw_tp_link_release,
 	.dealloc = bpf_raw_tp_link_dealloc,
 };
@@ -2456,13 +2511,13 @@ static const struct bpf_link_ops bpf_raw_tp_lops = {
 
 static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
 {
+	struct bpf_link_primer link_primer;
 	struct bpf_raw_tp_link *link;
 	struct bpf_raw_event_map *btp;
-	struct file *link_file;
 	struct bpf_prog *prog;
 	const char *tp_name;
 	char buf[128];
-	int link_fd, err;
+	int err;
 
 	if (CHECK_ATTR(BPF_RAW_TRACEPOINT_OPEN))
 		return -EINVAL;
@@ -2515,24 +2570,22 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
 		err = -ENOMEM;
 		goto out_put_btp;
 	}
-	bpf_link_init(&link->link, &bpf_raw_tp_lops, prog);
+	bpf_link_init(&link->link, &bpf_raw_tp_link_lops, prog);
 	link->btp = btp;
 
-	link_file = bpf_link_new_file(&link->link, &link_fd);
-	if (IS_ERR(link_file)) {
+	err = bpf_link_prime(&link->link, &link_primer);
+	if (err) {
 		kfree(link);
-		err = PTR_ERR(link_file);
 		goto out_put_btp;
 	}
 
 	err = bpf_probe_register(link->btp, prog);
 	if (err) {
-		bpf_link_cleanup(&link->link, link_file, link_fd);
+		bpf_link_cleanup(&link_primer);
 		goto out_put_btp;
 	}
 
-	fd_install(link_fd, link_file);
-	return link_fd;
+	return bpf_link_settle(&link_primer);
 
 out_put_btp:
 	bpf_put_raw_tracepoint(btp);
@@ -3464,7 +3517,7 @@ static int bpf_task_fd_query(const union bpf_attr *attr,
 	if (file->f_op == &bpf_link_fops) {
 		struct bpf_link *link = file->private_data;
 
-		if (link->ops == &bpf_raw_tp_lops) {
+		if (link->ops == &bpf_raw_tp_link_lops) {
 			struct bpf_raw_tp_link *raw_tp =
 				container_of(link, struct bpf_raw_tp_link, link);
 			struct bpf_raw_event_map *btp = raw_tp->btp;
-- 
cgit v1.2.3-59-g8ed1b


From 2d602c8cf40d65d4a7ac34fe18648d8778e6e594 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 28 Apr 2020 17:16:07 -0700
Subject: bpf: Support GET_FD_BY_ID and GET_NEXT_ID for bpf_link

Add support to look up bpf_link by ID and iterate over all existing bpf_links
in the system. GET_FD_BY_ID code handles not-yet-ready bpf_link by checking
that its ID hasn't been set to non-zero value yet. Setting bpf_link's ID is
done as the very last step in finalizing bpf_link, together with installing
FD. This approach allows users of bpf_link in kernel code to not worry about
races between user-space and kernel code that hasn't finished attaching and
initializing bpf_link.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200429001614.1544-4-andriin@fb.com
---
 include/uapi/linux/bpf.h |  2 ++
 kernel/bpf/syscall.c     | 49 ++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 51 insertions(+)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 6121aa487465..7e6541fceade 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -113,6 +113,8 @@ enum bpf_cmd {
 	BPF_MAP_DELETE_BATCH,
 	BPF_LINK_CREATE,
 	BPF_LINK_UPDATE,
+	BPF_LINK_GET_FD_BY_ID,
+	BPF_LINK_GET_NEXT_ID,
 };
 
 enum bpf_map_type {
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 5439e05e3d25..1c213a730502 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3713,6 +3713,48 @@ out_put_link:
 	return ret;
 }
 
+static int bpf_link_inc_not_zero(struct bpf_link *link)
+{
+	return atomic64_fetch_add_unless(&link->refcnt, 1, 0) ? 0 : -ENOENT;
+}
+
+#define BPF_LINK_GET_FD_BY_ID_LAST_FIELD link_id
+
+static int bpf_link_get_fd_by_id(const union bpf_attr *attr)
+{
+	struct bpf_link *link;
+	u32 id = attr->link_id;
+	int fd, err;
+
+	if (CHECK_ATTR(BPF_LINK_GET_FD_BY_ID))
+		return -EINVAL;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	spin_lock_bh(&link_idr_lock);
+	link = idr_find(&link_idr, id);
+	/* before link is "settled", ID is 0, pretend it doesn't exist yet */
+	if (link) {
+		if (link->id)
+			err = bpf_link_inc_not_zero(link);
+		else
+			err = -EAGAIN;
+	} else {
+		err = -ENOENT;
+	}
+	spin_unlock_bh(&link_idr_lock);
+
+	if (err)
+		return err;
+
+	fd = bpf_link_new_fd(link);
+	if (fd < 0)
+		bpf_link_put(link);
+
+	return fd;
+}
+
 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
 {
 	union bpf_attr attr;
@@ -3830,6 +3872,13 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 	case BPF_LINK_UPDATE:
 		err = link_update(&attr);
 		break;
+	case BPF_LINK_GET_FD_BY_ID:
+		err = bpf_link_get_fd_by_id(&attr);
+		break;
+	case BPF_LINK_GET_NEXT_ID:
+		err = bpf_obj_get_next_id(&attr, uattr,
+					  &link_idr, &link_idr_lock);
+		break;
 	default:
 		err = -EINVAL;
 		break;
-- 
cgit v1.2.3-59-g8ed1b


From f2e10bff16a0fdd41ba278c84da9813700e356af Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 28 Apr 2020 17:16:08 -0700
Subject: bpf: Add support for BPF_OBJ_GET_INFO_BY_FD for bpf_link

Add ability to fetch bpf_link details through BPF_OBJ_GET_INFO_BY_FD command.
Also enhance show_fdinfo to potentially include bpf_link type-specific
information (similarly to obj_info).

Also introduce enum bpf_link_type stored in bpf_link itself and expose it in
UAPI. bpf_link_tracing also now will store and return bpf_attach_type.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200429001614.1544-5-andriin@fb.com
---
 include/linux/bpf-cgroup.h     |   2 -
 include/linux/bpf.h            |   8 ++-
 include/linux/bpf_types.h      |   6 ++
 include/uapi/linux/bpf.h       |  28 ++++++++
 kernel/bpf/btf.c               |   2 +
 kernel/bpf/cgroup.c            |  43 +++++++++++-
 kernel/bpf/syscall.c           | 155 ++++++++++++++++++++++++++++++++++++-----
 kernel/bpf/verifier.c          |   2 +
 tools/include/uapi/linux/bpf.h |  31 +++++++++
 9 files changed, 253 insertions(+), 24 deletions(-)

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index a9cb9a5bf8e9..272626cc3fc9 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -57,8 +57,6 @@ struct bpf_cgroup_link {
 	enum bpf_attach_type type;
 };
 
-extern const struct bpf_link_ops bpf_cgroup_link_lops;
-
 struct bpf_prog_list {
 	struct list_head node;
 	struct bpf_prog *prog;
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 875d1f0af803..c07b1d2f3824 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1026,9 +1026,11 @@ extern const struct file_operations bpf_prog_fops;
 	extern const struct bpf_verifier_ops _name ## _verifier_ops;
 #define BPF_MAP_TYPE(_id, _ops) \
 	extern const struct bpf_map_ops _ops;
+#define BPF_LINK_TYPE(_id, _name)
 #include <linux/bpf_types.h>
 #undef BPF_PROG_TYPE
 #undef BPF_MAP_TYPE
+#undef BPF_LINK_TYPE
 
 extern const struct bpf_prog_ops bpf_offload_prog_ops;
 extern const struct bpf_verifier_ops tc_cls_act_analyzer_ops;
@@ -1086,6 +1088,7 @@ int bpf_prog_new_fd(struct bpf_prog *prog);
 struct bpf_link {
 	atomic64_t refcnt;
 	u32 id;
+	enum bpf_link_type type;
 	const struct bpf_link_ops *ops;
 	struct bpf_prog *prog;
 	struct work_struct work;
@@ -1103,9 +1106,12 @@ struct bpf_link_ops {
 	void (*dealloc)(struct bpf_link *link);
 	int (*update_prog)(struct bpf_link *link, struct bpf_prog *new_prog,
 			   struct bpf_prog *old_prog);
+	void (*show_fdinfo)(const struct bpf_link *link, struct seq_file *seq);
+	int (*fill_link_info)(const struct bpf_link *link,
+			      struct bpf_link_info *info);
 };
 
-void bpf_link_init(struct bpf_link *link,
+void bpf_link_init(struct bpf_link *link, enum bpf_link_type type,
 		   const struct bpf_link_ops *ops, struct bpf_prog *prog);
 int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer);
 int bpf_link_settle(struct bpf_link_primer *primer);
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index ba0c2d56f8a3..8345cdf553b8 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -118,3 +118,9 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops)
 #if defined(CONFIG_BPF_JIT)
 BPF_MAP_TYPE(BPF_MAP_TYPE_STRUCT_OPS, bpf_struct_ops_map_ops)
 #endif
+
+BPF_LINK_TYPE(BPF_LINK_TYPE_RAW_TRACEPOINT, raw_tracepoint)
+BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing)
+#ifdef CONFIG_CGROUP_BPF
+BPF_LINK_TYPE(BPF_LINK_TYPE_CGROUP, cgroup)
+#endif
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 7e6541fceade..0eccafae55bb 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -222,6 +222,15 @@ enum bpf_attach_type {
 
 #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
 
+enum bpf_link_type {
+	BPF_LINK_TYPE_UNSPEC = 0,
+	BPF_LINK_TYPE_RAW_TRACEPOINT = 1,
+	BPF_LINK_TYPE_TRACING = 2,
+	BPF_LINK_TYPE_CGROUP = 3,
+
+	MAX_BPF_LINK_TYPE,
+};
+
 /* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
  *
  * NONE(default): No further bpf programs allowed in the subtree.
@@ -3612,6 +3621,25 @@ struct bpf_btf_info {
 	__u32 id;
 } __attribute__((aligned(8)));
 
+struct bpf_link_info {
+	__u32 type;
+	__u32 id;
+	__u32 prog_id;
+	union {
+		struct {
+			__aligned_u64 tp_name; /* in/out: tp_name buffer ptr */
+			__u32 tp_name_len;     /* in/out: tp_name buffer len */
+		} raw_tracepoint;
+		struct {
+			__u32 attach_type;
+		} tracing;
+		struct {
+			__u64 cgroup_id;
+			__u32 attach_type;
+		} cgroup;
+	};
+} __attribute__((aligned(8)));
+
 /* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
  * by user and intended to be used by socket (e.g. to bind to, depends on
  * attach attach type).
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index d65c6912bdaf..a2cfba89a8e1 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3482,6 +3482,7 @@ extern char __weak __stop_BTF[];
 extern struct btf *btf_vmlinux;
 
 #define BPF_MAP_TYPE(_id, _ops)
+#define BPF_LINK_TYPE(_id, _name)
 static union {
 	struct bpf_ctx_convert {
 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
@@ -3508,6 +3509,7 @@ static u8 bpf_ctx_convert_map[] = {
 	0, /* avoid empty array */
 };
 #undef BPF_MAP_TYPE
+#undef BPF_LINK_TYPE
 
 static const struct btf_member *
 btf_get_prog_ctx_type(struct bpf_verifier_log *log, struct btf *btf,
diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 1bdf37fca879..5c0e964105ac 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -833,10 +833,48 @@ static void bpf_cgroup_link_dealloc(struct bpf_link *link)
 	kfree(cg_link);
 }
 
-const struct bpf_link_ops bpf_cgroup_link_lops = {
+static void bpf_cgroup_link_show_fdinfo(const struct bpf_link *link,
+					struct seq_file *seq)
+{
+	struct bpf_cgroup_link *cg_link =
+		container_of(link, struct bpf_cgroup_link, link);
+	u64 cg_id = 0;
+
+	mutex_lock(&cgroup_mutex);
+	if (cg_link->cgroup)
+		cg_id = cgroup_id(cg_link->cgroup);
+	mutex_unlock(&cgroup_mutex);
+
+	seq_printf(seq,
+		   "cgroup_id:\t%llu\n"
+		   "attach_type:\t%d\n",
+		   cg_id,
+		   cg_link->type);
+}
+
+static int bpf_cgroup_link_fill_link_info(const struct bpf_link *link,
+					  struct bpf_link_info *info)
+{
+	struct bpf_cgroup_link *cg_link =
+		container_of(link, struct bpf_cgroup_link, link);
+	u64 cg_id = 0;
+
+	mutex_lock(&cgroup_mutex);
+	if (cg_link->cgroup)
+		cg_id = cgroup_id(cg_link->cgroup);
+	mutex_unlock(&cgroup_mutex);
+
+	info->cgroup.cgroup_id = cg_id;
+	info->cgroup.attach_type = cg_link->type;
+	return 0;
+}
+
+static const struct bpf_link_ops bpf_cgroup_link_lops = {
 	.release = bpf_cgroup_link_release,
 	.dealloc = bpf_cgroup_link_dealloc,
 	.update_prog = cgroup_bpf_replace,
+	.show_fdinfo = bpf_cgroup_link_show_fdinfo,
+	.fill_link_info = bpf_cgroup_link_fill_link_info,
 };
 
 int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
@@ -858,7 +896,8 @@ int cgroup_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
 		err = -ENOMEM;
 		goto out_put_cgroup;
 	}
-	bpf_link_init(&link->link, &bpf_cgroup_link_lops, prog);
+	bpf_link_init(&link->link, BPF_LINK_TYPE_CGROUP, &bpf_cgroup_link_lops,
+		      prog);
 	link->cgroup = cgrp;
 	link->type = attr->link_create.attach_type;
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 1c213a730502..d23c04cbe14f 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -51,9 +51,11 @@ static const struct bpf_map_ops * const bpf_map_types[] = {
 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type)
 #define BPF_MAP_TYPE(_id, _ops) \
 	[_id] = &_ops,
+#define BPF_LINK_TYPE(_id, _name)
 #include <linux/bpf_types.h>
 #undef BPF_PROG_TYPE
 #undef BPF_MAP_TYPE
+#undef BPF_LINK_TYPE
 };
 
 /*
@@ -1548,9 +1550,11 @@ static const struct bpf_prog_ops * const bpf_prog_types[] = {
 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
 	[_id] = & _name ## _prog_ops,
 #define BPF_MAP_TYPE(_id, _ops)
+#define BPF_LINK_TYPE(_id, _name)
 #include <linux/bpf_types.h>
 #undef BPF_PROG_TYPE
 #undef BPF_MAP_TYPE
+#undef BPF_LINK_TYPE
 };
 
 static int find_prog_type(enum bpf_prog_type type, struct bpf_prog *prog)
@@ -2183,10 +2187,11 @@ static int bpf_obj_get(const union bpf_attr *attr)
 				attr->file_flags);
 }
 
-void bpf_link_init(struct bpf_link *link,
+void bpf_link_init(struct bpf_link *link, enum bpf_link_type type,
 		   const struct bpf_link_ops *ops, struct bpf_prog *prog)
 {
 	atomic64_set(&link->refcnt, 1);
+	link->type = type;
 	link->id = 0;
 	link->ops = ops;
 	link->prog = prog;
@@ -2266,27 +2271,23 @@ static int bpf_link_release(struct inode *inode, struct file *filp)
 	return 0;
 }
 
-#ifdef CONFIG_PROC_FS
-static const struct bpf_link_ops bpf_raw_tp_lops;
-static const struct bpf_link_ops bpf_tracing_link_lops;
+#define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type)
+#define BPF_MAP_TYPE(_id, _ops)
+#define BPF_LINK_TYPE(_id, _name) [_id] = #_name,
+static const char *bpf_link_type_strs[] = {
+	[BPF_LINK_TYPE_UNSPEC] = "<invalid>",
+#include <linux/bpf_types.h>
+};
+#undef BPF_PROG_TYPE
+#undef BPF_MAP_TYPE
+#undef BPF_LINK_TYPE
 
+#ifdef CONFIG_PROC_FS
 static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp)
 {
 	const struct bpf_link *link = filp->private_data;
 	const struct bpf_prog *prog = link->prog;
 	char prog_tag[sizeof(prog->tag) * 2 + 1] = { };
-	const char *link_type;
-
-	if (link->ops == &bpf_raw_tp_lops)
-		link_type = "raw_tracepoint";
-	else if (link->ops == &bpf_tracing_link_lops)
-		link_type = "tracing";
-#ifdef CONFIG_CGROUP_BPF
-	else if (link->ops == &bpf_cgroup_link_lops)
-		link_type = "cgroup";
-#endif
-	else
-		link_type = "unknown";
 
 	bin2hex(prog_tag, prog->tag, sizeof(prog->tag));
 	seq_printf(m,
@@ -2294,10 +2295,12 @@ static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp)
 		   "link_id:\t%u\n"
 		   "prog_tag:\t%s\n"
 		   "prog_id:\t%u\n",
-		   link_type,
+		   bpf_link_type_strs[link->type],
 		   link->id,
 		   prog_tag,
 		   prog->aux->id);
+	if (link->ops->show_fdinfo)
+		link->ops->show_fdinfo(link, m);
 }
 #endif
 
@@ -2403,6 +2406,7 @@ struct bpf_link *bpf_link_get_from_fd(u32 ufd)
 
 struct bpf_tracing_link {
 	struct bpf_link link;
+	enum bpf_attach_type attach_type;
 };
 
 static void bpf_tracing_link_release(struct bpf_link *link)
@@ -2418,9 +2422,33 @@ static void bpf_tracing_link_dealloc(struct bpf_link *link)
 	kfree(tr_link);
 }
 
+static void bpf_tracing_link_show_fdinfo(const struct bpf_link *link,
+					 struct seq_file *seq)
+{
+	struct bpf_tracing_link *tr_link =
+		container_of(link, struct bpf_tracing_link, link);
+
+	seq_printf(seq,
+		   "attach_type:\t%d\n",
+		   tr_link->attach_type);
+}
+
+static int bpf_tracing_link_fill_link_info(const struct bpf_link *link,
+					   struct bpf_link_info *info)
+{
+	struct bpf_tracing_link *tr_link =
+		container_of(link, struct bpf_tracing_link, link);
+
+	info->tracing.attach_type = tr_link->attach_type;
+
+	return 0;
+}
+
 static const struct bpf_link_ops bpf_tracing_link_lops = {
 	.release = bpf_tracing_link_release,
 	.dealloc = bpf_tracing_link_dealloc,
+	.show_fdinfo = bpf_tracing_link_show_fdinfo,
+	.fill_link_info = bpf_tracing_link_fill_link_info,
 };
 
 static int bpf_tracing_prog_attach(struct bpf_prog *prog)
@@ -2460,7 +2488,9 @@ static int bpf_tracing_prog_attach(struct bpf_prog *prog)
 		err = -ENOMEM;
 		goto out_put_prog;
 	}
-	bpf_link_init(&link->link, &bpf_tracing_link_lops, prog);
+	bpf_link_init(&link->link, BPF_LINK_TYPE_TRACING,
+		      &bpf_tracing_link_lops, prog);
+	link->attach_type = prog->expected_attach_type;
 
 	err = bpf_link_prime(&link->link, &link_primer);
 	if (err) {
@@ -2502,9 +2532,56 @@ static void bpf_raw_tp_link_dealloc(struct bpf_link *link)
 	kfree(raw_tp);
 }
 
+static void bpf_raw_tp_link_show_fdinfo(const struct bpf_link *link,
+					struct seq_file *seq)
+{
+	struct bpf_raw_tp_link *raw_tp_link =
+		container_of(link, struct bpf_raw_tp_link, link);
+
+	seq_printf(seq,
+		   "tp_name:\t%s\n",
+		   raw_tp_link->btp->tp->name);
+}
+
+static int bpf_raw_tp_link_fill_link_info(const struct bpf_link *link,
+					  struct bpf_link_info *info)
+{
+	struct bpf_raw_tp_link *raw_tp_link =
+		container_of(link, struct bpf_raw_tp_link, link);
+	char __user *ubuf = u64_to_user_ptr(info->raw_tracepoint.tp_name);
+	const char *tp_name = raw_tp_link->btp->tp->name;
+	u32 ulen = info->raw_tracepoint.tp_name_len;
+	size_t tp_len = strlen(tp_name);
+
+	if (ulen && !ubuf)
+		return -EINVAL;
+
+	info->raw_tracepoint.tp_name_len = tp_len + 1;
+
+	if (!ubuf)
+		return 0;
+
+	if (ulen >= tp_len + 1) {
+		if (copy_to_user(ubuf, tp_name, tp_len + 1))
+			return -EFAULT;
+	} else {
+		char zero = '\0';
+
+		if (copy_to_user(ubuf, tp_name, ulen - 1))
+			return -EFAULT;
+		if (put_user(zero, ubuf + ulen - 1))
+			return -EFAULT;
+		return -ENOSPC;
+	}
+
+	return 0;
+}
+
 static const struct bpf_link_ops bpf_raw_tp_link_lops = {
 	.release = bpf_raw_tp_link_release,
 	.dealloc = bpf_raw_tp_link_dealloc,
+	.show_fdinfo = bpf_raw_tp_link_show_fdinfo,
+	.fill_link_info = bpf_raw_tp_link_fill_link_info,
 };
 
 #define BPF_RAW_TRACEPOINT_OPEN_LAST_FIELD raw_tracepoint.prog_fd
@@ -2570,7 +2647,8 @@ static int bpf_raw_tracepoint_open(const union bpf_attr *attr)
 		err = -ENOMEM;
 		goto out_put_btp;
 	}
-	bpf_link_init(&link->link, &bpf_raw_tp_link_lops, prog);
+	bpf_link_init(&link->link, BPF_LINK_TYPE_RAW_TRACEPOINT,
+		      &bpf_raw_tp_link_lops, prog);
 	link->btp = btp;
 
 	err = bpf_link_prime(&link->link, &link_primer);
@@ -3366,6 +3444,42 @@ static int bpf_btf_get_info_by_fd(struct btf *btf,
 	return btf_get_info_by_fd(btf, attr, uattr);
 }
 
+static int bpf_link_get_info_by_fd(struct bpf_link *link,
+				  const union bpf_attr *attr,
+				  union bpf_attr __user *uattr)
+{
+	struct bpf_link_info __user *uinfo = u64_to_user_ptr(attr->info.info);
+	struct bpf_link_info info;
+	u32 info_len = attr->info.info_len;
+	int err;
+
+	err = bpf_check_uarg_tail_zero(uinfo, sizeof(info), info_len);
+	if (err)
+		return err;
+	info_len = min_t(u32, sizeof(info), info_len);
+
+	memset(&info, 0, sizeof(info));
+	if (copy_from_user(&info, uinfo, info_len))
+		return -EFAULT;
+
+	info.type = link->type;
+	info.id = link->id;
+	info.prog_id = link->prog->aux->id;
+
+	if (link->ops->fill_link_info) {
+		err = link->ops->fill_link_info(link, &info);
+		if (err)
+			return err;
+	}
+
+	if (copy_to_user(uinfo, &info, info_len) ||
+	    put_user(info_len, &uattr->info.info_len))
+		return -EFAULT;
+
+	return 0;
+}
+
+
 #define BPF_OBJ_GET_INFO_BY_FD_LAST_FIELD info.info
 
 static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
@@ -3390,6 +3504,9 @@ static int bpf_obj_get_info_by_fd(const union bpf_attr *attr,
 					     uattr);
 	else if (f.file->f_op == &btf_fops)
 		err = bpf_btf_get_info_by_fd(f.file->private_data, attr, uattr);
+	else if (f.file->f_op == &bpf_link_fops)
+		err = bpf_link_get_info_by_fd(f.file->private_data,
+					      attr, uattr);
 	else
 		err = -EINVAL;
 
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 91728e0f27eb..2b337e32aa94 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -28,9 +28,11 @@ static const struct bpf_verifier_ops * const bpf_verifier_ops[] = {
 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
 	[_id] = & _name ## _verifier_ops,
 #define BPF_MAP_TYPE(_id, _ops)
+#define BPF_LINK_TYPE(_id, _name)
 #include <linux/bpf_types.h>
 #undef BPF_PROG_TYPE
 #undef BPF_MAP_TYPE
+#undef BPF_LINK_TYPE
 };
 
 /* bpf_check() is a static code analyzer that walks eBPF program
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 4a6c47f3febe..0eccafae55bb 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -113,6 +113,8 @@ enum bpf_cmd {
 	BPF_MAP_DELETE_BATCH,
 	BPF_LINK_CREATE,
 	BPF_LINK_UPDATE,
+	BPF_LINK_GET_FD_BY_ID,
+	BPF_LINK_GET_NEXT_ID,
 };
 
 enum bpf_map_type {
@@ -220,6 +222,15 @@ enum bpf_attach_type {
 
 #define MAX_BPF_ATTACH_TYPE __MAX_BPF_ATTACH_TYPE
 
+enum bpf_link_type {
+	BPF_LINK_TYPE_UNSPEC = 0,
+	BPF_LINK_TYPE_RAW_TRACEPOINT = 1,
+	BPF_LINK_TYPE_TRACING = 2,
+	BPF_LINK_TYPE_CGROUP = 3,
+
+	MAX_BPF_LINK_TYPE,
+};
+
 /* cgroup-bpf attach flags used in BPF_PROG_ATTACH command
  *
  * NONE(default): No further bpf programs allowed in the subtree.
@@ -523,6 +534,7 @@ union bpf_attr {
 			__u32		prog_id;
 			__u32		map_id;
 			__u32		btf_id;
+			__u32		link_id;
 		};
 		__u32		next_id;
 		__u32		open_flags;
@@ -3609,6 +3621,25 @@ struct bpf_btf_info {
 	__u32 id;
 } __attribute__((aligned(8)));
 
+struct bpf_link_info {
+	__u32 type;
+	__u32 id;
+	__u32 prog_id;
+	union {
+		struct {
+			__aligned_u64 tp_name; /* in/out: tp_name buffer ptr */
+			__u32 tp_name_len;     /* in/out: tp_name buffer len */
+		} raw_tracepoint;
+		struct {
+			__u32 attach_type;
+		} tracing;
+		struct {
+			__u64 cgroup_id;
+			__u32 attach_type;
+		} cgroup;
+	};
+} __attribute__((aligned(8)));
+
 /* User bpf_sock_addr struct to access socket fields and sockaddr struct passed
  * by user and intended to be used by socket (e.g. to bind to, depends on
  * attach attach type).
-- 
cgit v1.2.3-59-g8ed1b


From 0dbc866832a0fbf9f2b98d412da44c5cfd1b7756 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 28 Apr 2020 17:16:09 -0700
Subject: libbpf: Add low-level APIs for new bpf_link commands

Add low-level API calls for bpf_link_get_next_id() and
bpf_link_get_fd_by_id().

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200429001614.1544-6-andriin@fb.com
---
 tools/lib/bpf/bpf.c      | 19 +++++++++++++++++--
 tools/lib/bpf/bpf.h      |  4 +++-
 tools/lib/bpf/libbpf.map |  6 ++++++
 3 files changed, 26 insertions(+), 3 deletions(-)

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 5cc1b0785d18..8f2f0958d446 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -721,6 +721,11 @@ int bpf_btf_get_next_id(__u32 start_id, __u32 *next_id)
 	return bpf_obj_get_next_id(start_id, next_id, BPF_BTF_GET_NEXT_ID);
 }
 
+int bpf_link_get_next_id(__u32 start_id, __u32 *next_id)
+{
+	return bpf_obj_get_next_id(start_id, next_id, BPF_LINK_GET_NEXT_ID);
+}
+
 int bpf_prog_get_fd_by_id(__u32 id)
 {
 	union bpf_attr attr;
@@ -751,13 +756,23 @@ int bpf_btf_get_fd_by_id(__u32 id)
 	return sys_bpf(BPF_BTF_GET_FD_BY_ID, &attr, sizeof(attr));
 }
 
-int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len)
+int bpf_link_get_fd_by_id(__u32 id)
+{
+	union bpf_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.link_id = id;
+
+	return sys_bpf(BPF_LINK_GET_FD_BY_ID, &attr, sizeof(attr));
+}
+
+int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len)
 {
 	union bpf_attr attr;
 	int err;
 
 	memset(&attr, 0, sizeof(attr));
-	attr.info.bpf_fd = prog_fd;
+	attr.info.bpf_fd = bpf_fd;
 	attr.info.info_len = *info_len;
 	attr.info.info = ptr_to_u64(info);
 
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 46d47afdd887..335b457b3a25 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -216,10 +216,12 @@ LIBBPF_API int bpf_prog_test_run(int prog_fd, int repeat, void *data,
 LIBBPF_API int bpf_prog_get_next_id(__u32 start_id, __u32 *next_id);
 LIBBPF_API int bpf_map_get_next_id(__u32 start_id, __u32 *next_id);
 LIBBPF_API int bpf_btf_get_next_id(__u32 start_id, __u32 *next_id);
+LIBBPF_API int bpf_link_get_next_id(__u32 start_id, __u32 *next_id);
 LIBBPF_API int bpf_prog_get_fd_by_id(__u32 id);
 LIBBPF_API int bpf_map_get_fd_by_id(__u32 id);
 LIBBPF_API int bpf_btf_get_fd_by_id(__u32 id);
-LIBBPF_API int bpf_obj_get_info_by_fd(int prog_fd, void *info, __u32 *info_len);
+LIBBPF_API int bpf_link_get_fd_by_id(__u32 id);
+LIBBPF_API int bpf_obj_get_info_by_fd(int bpf_fd, void *info, __u32 *info_len);
 LIBBPF_API int bpf_prog_query(int target_fd, enum bpf_attach_type type,
 			      __u32 query_flags, __u32 *attach_flags,
 			      __u32 *prog_ids, __u32 *prog_cnt);
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index bb8831605b25..7cd49aa38005 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -254,3 +254,9 @@ LIBBPF_0.0.8 {
 		bpf_program__set_lsm;
 		bpf_set_link_xdp_fd_opts;
 } LIBBPF_0.0.7;
+
+LIBBPF_0.0.9 {
+	global:
+		bpf_link_get_fd_by_id;
+		bpf_link_get_next_id;
+} LIBBPF_0.0.8;
-- 
cgit v1.2.3-59-g8ed1b


From 2c2837b09e9ab4874353186599609fa2e1ccabce Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 28 Apr 2020 17:16:10 -0700
Subject: selftests/bpf: Test bpf_link's get_next_id, get_fd_by_id, and
 get_obj_info

Extend bpf_obj_id selftest to verify bpf_link's observability APIs.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200429001614.1544-7-andriin@fb.com
---
 .../testing/selftests/bpf/prog_tests/bpf_obj_id.c  | 110 +++++++++++++++++++--
 tools/testing/selftests/bpf/progs/test_obj_id.c    |  14 +--
 2 files changed, 104 insertions(+), 20 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
index f10029821e16..7afa4160416f 100644
--- a/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_obj_id.c
@@ -1,26 +1,30 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
 
+#define nr_iters 2
+
 void test_bpf_obj_id(void)
 {
 	const __u64 array_magic_value = 0xfaceb00c;
 	const __u32 array_key = 0;
-	const int nr_iters = 2;
 	const char *file = "./test_obj_id.o";
 	const char *expected_prog_name = "test_obj_id";
 	const char *expected_map_name = "test_map_id";
 	const __u64 nsec_per_sec = 1000000000;
 
-	struct bpf_object *objs[nr_iters];
+	struct bpf_object *objs[nr_iters] = {};
+	struct bpf_link *links[nr_iters] = {};
+	struct bpf_program *prog;
 	int prog_fds[nr_iters], map_fds[nr_iters];
 	/* +1 to test for the info_len returned by kernel */
 	struct bpf_prog_info prog_infos[nr_iters + 1];
 	struct bpf_map_info map_infos[nr_iters + 1];
+	struct bpf_link_info link_infos[nr_iters + 1];
 	/* Each prog only uses one map. +1 to test nr_map_ids
 	 * returned by kernel.
 	 */
 	__u32 map_ids[nr_iters + 1];
-	char jited_insns[128], xlated_insns[128], zeros[128];
+	char jited_insns[128], xlated_insns[128], zeros[128], tp_name[128];
 	__u32 i, next_id, info_len, nr_id_found, duration = 0;
 	struct timespec real_time_ts, boot_time_ts;
 	int err = 0;
@@ -36,14 +40,15 @@ void test_bpf_obj_id(void)
 	CHECK(err >= 0 || errno != ENOENT,
 	      "get-fd-by-notexist-map-id", "err %d errno %d\n", err, errno);
 
-	for (i = 0; i < nr_iters; i++)
-		objs[i] = NULL;
+	err = bpf_link_get_fd_by_id(0);
+	CHECK(err >= 0 || errno != ENOENT,
+	      "get-fd-by-notexist-link-id", "err %d errno %d\n", err, errno);
 
 	/* Check bpf_obj_get_info_by_fd() */
 	bzero(zeros, sizeof(zeros));
 	for (i = 0; i < nr_iters; i++) {
 		now = time(NULL);
-		err = bpf_prog_load(file, BPF_PROG_TYPE_SOCKET_FILTER,
+		err = bpf_prog_load(file, BPF_PROG_TYPE_RAW_TRACEPOINT,
 				    &objs[i], &prog_fds[i]);
 		/* test_obj_id.o is a dumb prog. It should never fail
 		 * to load.
@@ -60,6 +65,17 @@ void test_bpf_obj_id(void)
 		if (CHECK_FAIL(err))
 			goto done;
 
+		prog = bpf_object__find_program_by_title(objs[i],
+							 "raw_tp/sys_enter");
+		if (CHECK_FAIL(!prog))
+			goto done;
+		links[i] = bpf_program__attach(prog);
+		err = libbpf_get_error(links[i]);
+		if (CHECK(err, "prog_attach", "prog #%d, err %d\n", i, err)) {
+			links[i] = NULL;
+			goto done;
+		}
+
 		/* Check getting map info */
 		info_len = sizeof(struct bpf_map_info) * 2;
 		bzero(&map_infos[i], info_len);
@@ -107,7 +123,7 @@ void test_bpf_obj_id(void)
 		load_time = (real_time_ts.tv_sec - boot_time_ts.tv_sec)
 			+ (prog_infos[i].load_time / nsec_per_sec);
 		if (CHECK(err ||
-			  prog_infos[i].type != BPF_PROG_TYPE_SOCKET_FILTER ||
+			  prog_infos[i].type != BPF_PROG_TYPE_RAW_TRACEPOINT ||
 			  info_len != sizeof(struct bpf_prog_info) ||
 			  (env.jit_enabled && !prog_infos[i].jited_prog_len) ||
 			  (env.jit_enabled &&
@@ -120,7 +136,11 @@ void test_bpf_obj_id(void)
 			  *(int *)(long)prog_infos[i].map_ids != map_infos[i].id ||
 			  strcmp((char *)prog_infos[i].name, expected_prog_name),
 			  "get-prog-info(fd)",
-			  "err %d errno %d i %d type %d(%d) info_len %u(%zu) jit_enabled %d jited_prog_len %u xlated_prog_len %u jited_prog %d xlated_prog %d load_time %lu(%lu) uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) name %s(%s)\n",
+			  "err %d errno %d i %d type %d(%d) info_len %u(%zu) "
+			  "jit_enabled %d jited_prog_len %u xlated_prog_len %u "
+			  "jited_prog %d xlated_prog %d load_time %lu(%lu) "
+			  "uid %u(%u) nr_map_ids %u(%u) map_id %u(%u) "
+			  "name %s(%s)\n",
 			  err, errno, i,
 			  prog_infos[i].type, BPF_PROG_TYPE_SOCKET_FILTER,
 			  info_len, sizeof(struct bpf_prog_info),
@@ -135,6 +155,33 @@ void test_bpf_obj_id(void)
 			  *(int *)(long)prog_infos[i].map_ids, map_infos[i].id,
 			  prog_infos[i].name, expected_prog_name))
 			goto done;
+
+		/* Check getting link info */
+		info_len = sizeof(struct bpf_link_info) * 2;
+		bzero(&link_infos[i], info_len);
+		link_infos[i].raw_tracepoint.tp_name = (__u64)&tp_name;
+		link_infos[i].raw_tracepoint.tp_name_len = sizeof(tp_name);
+		err = bpf_obj_get_info_by_fd(bpf_link__fd(links[i]),
+					     &link_infos[i], &info_len);
+		if (CHECK(err ||
+			  link_infos[i].type != BPF_LINK_TYPE_RAW_TRACEPOINT ||
+			  link_infos[i].prog_id != prog_infos[i].id ||
+			  link_infos[i].raw_tracepoint.tp_name != (__u64)&tp_name ||
+			  strcmp((char *)link_infos[i].raw_tracepoint.tp_name,
+				 "sys_enter") ||
+			  info_len != sizeof(struct bpf_link_info),
+			  "get-link-info(fd)",
+			  "err %d errno %d info_len %u(%zu) type %d(%d) id %d "
+			  "prog_id %d (%d) tp_name %s(%s)\n",
+			  err, errno,
+			  info_len, sizeof(struct bpf_link_info),
+			  link_infos[i].type, BPF_LINK_TYPE_RAW_TRACEPOINT,
+			  link_infos[i].id,
+			  link_infos[i].prog_id, prog_infos[i].id,
+			  (char *)link_infos[i].raw_tracepoint.tp_name,
+			  "sys_enter"))
+			goto done;
+
 	}
 
 	/* Check bpf_prog_get_next_id() */
@@ -247,7 +294,52 @@ void test_bpf_obj_id(void)
 	      "nr_id_found %u(%u)\n",
 	      nr_id_found, nr_iters);
 
+	/* Check bpf_link_get_next_id() */
+	nr_id_found = 0;
+	next_id = 0;
+	while (!bpf_link_get_next_id(next_id, &next_id)) {
+		struct bpf_link_info link_info;
+		int link_fd, cmp_res;
+
+		info_len = sizeof(link_info);
+		memset(&link_info, 0, info_len);
+
+		link_fd = bpf_link_get_fd_by_id(next_id);
+		if (link_fd < 0 && errno == ENOENT)
+			/* The bpf_link is in the dead row */
+			continue;
+		if (CHECK(link_fd < 0, "get-link-fd(next_id)",
+			  "link_fd %d next_id %u errno %d\n",
+			  link_fd, next_id, errno))
+			break;
+
+		for (i = 0; i < nr_iters; i++)
+			if (link_infos[i].id == next_id)
+				break;
+
+		if (i == nr_iters)
+			continue;
+
+		nr_id_found++;
+
+		err = bpf_obj_get_info_by_fd(link_fd, &link_info, &info_len);
+		cmp_res = memcmp(&link_info, &link_infos[i],
+				offsetof(struct bpf_link_info, raw_tracepoint));
+		CHECK(err || info_len != sizeof(link_info) || cmp_res,
+		      "check get-link-info(next_id->fd)",
+		      "err %d errno %d info_len %u(%zu) memcmp %d\n",
+		      err, errno, info_len, sizeof(struct bpf_link_info),
+		      cmp_res);
+
+		close(link_fd);
+	}
+	CHECK(nr_id_found != nr_iters,
+	      "check total link id found by get_next_id",
+	      "nr_id_found %u(%u)\n", nr_id_found, nr_iters);
+
 done:
-	for (i = 0; i < nr_iters; i++)
+	for (i = 0; i < nr_iters; i++) {
+		bpf_link__destroy(links[i]);
 		bpf_object__close(objs[i]);
+	}
 }
diff --git a/tools/testing/selftests/bpf/progs/test_obj_id.c b/tools/testing/selftests/bpf/progs/test_obj_id.c
index 98b9de2fafd0..ded71b3ff6b4 100644
--- a/tools/testing/selftests/bpf/progs/test_obj_id.c
+++ b/tools/testing/selftests/bpf/progs/test_obj_id.c
@@ -3,16 +3,8 @@
  */
 #include <stddef.h>
 #include <linux/bpf.h>
-#include <linux/pkt_cls.h>
 #include <bpf/bpf_helpers.h>
 
-/* It is a dumb bpf program such that it must have no
- * issue to be loaded since testing the verifier is
- * not the focus here.
- */
-
-int _version SEC("version") = 1;
-
 struct {
 	__uint(type, BPF_MAP_TYPE_ARRAY);
 	__uint(max_entries, 1);
@@ -20,13 +12,13 @@ struct {
 	__type(value, __u64);
 } test_map_id SEC(".maps");
 
-SEC("test_obj_id_dummy")
-int test_obj_id(struct __sk_buff *skb)
+SEC("raw_tp/sys_enter")
+int test_obj_id(void *ctx)
 {
 	__u32 key = 0;
 	__u64 *value;
 
 	value = bpf_map_lookup_elem(&test_map_id, &key);
 
-	return TC_ACT_OK;
+	return 0;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 50325b1761e31ad17d252e795af72a9af8c5a7d7 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 28 Apr 2020 17:16:11 -0700
Subject: bpftool: Expose attach_type-to-string array to non-cgroup code

Move attach_type_strings into main.h for access in non-cgroup code.
bpf_attach_type is used for non-cgroup attach types quite widely now. So also
complete missing string translations for non-cgroup attach types.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Link: https://lore.kernel.org/bpf/20200429001614.1544-8-andriin@fb.com
---
 tools/bpf/bpftool/cgroup.c | 48 +++++++++++++++-------------------------------
 tools/bpf/bpftool/main.h   | 32 +++++++++++++++++++++++++++++++
 2 files changed, 47 insertions(+), 33 deletions(-)

diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index 62c6a1d7cd18..1693c802bb20 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -31,42 +31,20 @@
 
 static unsigned int query_flags;
 
-static const char * const attach_type_strings[] = {
-	[BPF_CGROUP_INET_INGRESS] = "ingress",
-	[BPF_CGROUP_INET_EGRESS] = "egress",
-	[BPF_CGROUP_INET_SOCK_CREATE] = "sock_create",
-	[BPF_CGROUP_SOCK_OPS] = "sock_ops",
-	[BPF_CGROUP_DEVICE] = "device",
-	[BPF_CGROUP_INET4_BIND] = "bind4",
-	[BPF_CGROUP_INET6_BIND] = "bind6",
-	[BPF_CGROUP_INET4_CONNECT] = "connect4",
-	[BPF_CGROUP_INET6_CONNECT] = "connect6",
-	[BPF_CGROUP_INET4_POST_BIND] = "post_bind4",
-	[BPF_CGROUP_INET6_POST_BIND] = "post_bind6",
-	[BPF_CGROUP_UDP4_SENDMSG] = "sendmsg4",
-	[BPF_CGROUP_UDP6_SENDMSG] = "sendmsg6",
-	[BPF_CGROUP_SYSCTL] = "sysctl",
-	[BPF_CGROUP_UDP4_RECVMSG] = "recvmsg4",
-	[BPF_CGROUP_UDP6_RECVMSG] = "recvmsg6",
-	[BPF_CGROUP_GETSOCKOPT] = "getsockopt",
-	[BPF_CGROUP_SETSOCKOPT] = "setsockopt",
-	[__MAX_BPF_ATTACH_TYPE] = NULL,
-};
-
 static enum bpf_attach_type parse_attach_type(const char *str)
 {
 	enum bpf_attach_type type;
 
 	for (type = 0; type < __MAX_BPF_ATTACH_TYPE; type++) {
-		if (attach_type_strings[type] &&
-		    is_prefix(str, attach_type_strings[type]))
+		if (attach_type_name[type] &&
+		    is_prefix(str, attach_type_name[type]))
 			return type;
 	}
 
 	return __MAX_BPF_ATTACH_TYPE;
 }
 
-static int show_bpf_prog(int id, const char *attach_type_str,
+static int show_bpf_prog(int id, enum bpf_attach_type attach_type,
 			 const char *attach_flags_str,
 			 int level)
 {
@@ -86,18 +64,22 @@ static int show_bpf_prog(int id, const char *attach_type_str,
 	if (json_output) {
 		jsonw_start_object(json_wtr);
 		jsonw_uint_field(json_wtr, "id", info.id);
-		jsonw_string_field(json_wtr, "attach_type",
-				   attach_type_str);
+		if (attach_type < ARRAY_SIZE(attach_type_name))
+			jsonw_string_field(json_wtr, "attach_type",
+					   attach_type_name[attach_type]);
+		else
+			jsonw_uint_field(json_wtr, "attach_type", attach_type);
 		jsonw_string_field(json_wtr, "attach_flags",
 				   attach_flags_str);
 		jsonw_string_field(json_wtr, "name", info.name);
 		jsonw_end_object(json_wtr);
 	} else {
-		printf("%s%-8u %-15s %-15s %-15s\n", level ? "    " : "",
-		       info.id,
-		       attach_type_str,
-		       attach_flags_str,
-		       info.name);
+		printf("%s%-8u ", level ? "    " : "", info.id);
+		if (attach_type < ARRAY_SIZE(attach_type_name))
+			printf("%-15s", attach_type_name[attach_type]);
+		else
+			printf("type %-10u", attach_type);
+		printf(" %-15s %-15s\n", attach_flags_str, info.name);
 	}
 
 	close(prog_fd);
@@ -171,7 +153,7 @@ static int show_attached_bpf_progs(int cgroup_fd, enum bpf_attach_type type,
 	}
 
 	for (iter = 0; iter < prog_cnt; iter++)
-		show_bpf_prog(prog_ids[iter], attach_type_strings[type],
+		show_bpf_prog(prog_ids[iter], type,
 			      attach_flags_str, level);
 
 	return 0;
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 86f14ce26fd7..99d84bd1d5b2 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -83,6 +83,38 @@ static const char * const prog_type_name[] = {
 	[BPF_PROG_TYPE_EXT]			= "ext",
 };
 
+static const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = {
+	[BPF_CGROUP_INET_INGRESS] = "ingress",
+	[BPF_CGROUP_INET_EGRESS] = "egress",
+	[BPF_CGROUP_INET_SOCK_CREATE] = "sock_create",
+	[BPF_CGROUP_SOCK_OPS] = "sock_ops",
+	[BPF_CGROUP_DEVICE] = "device",
+	[BPF_CGROUP_INET4_BIND] = "bind4",
+	[BPF_CGROUP_INET6_BIND] = "bind6",
+	[BPF_CGROUP_INET4_CONNECT] = "connect4",
+	[BPF_CGROUP_INET6_CONNECT] = "connect6",
+	[BPF_CGROUP_INET4_POST_BIND] = "post_bind4",
+	[BPF_CGROUP_INET6_POST_BIND] = "post_bind6",
+	[BPF_CGROUP_UDP4_SENDMSG] = "sendmsg4",
+	[BPF_CGROUP_UDP6_SENDMSG] = "sendmsg6",
+	[BPF_CGROUP_SYSCTL] = "sysctl",
+	[BPF_CGROUP_UDP4_RECVMSG] = "recvmsg4",
+	[BPF_CGROUP_UDP6_RECVMSG] = "recvmsg6",
+	[BPF_CGROUP_GETSOCKOPT] = "getsockopt",
+	[BPF_CGROUP_SETSOCKOPT] = "setsockopt",
+
+	[BPF_SK_SKB_STREAM_PARSER] = "sk_skb_stream_parser",
+	[BPF_SK_SKB_STREAM_VERDICT] = "sk_skb_stream_verdict",
+	[BPF_SK_MSG_VERDICT] = "sk_msg_verdict",
+	[BPF_LIRC_MODE2] = "lirc_mode2",
+	[BPF_FLOW_DISSECTOR] = "flow_dissector",
+	[BPF_TRACE_RAW_TP] = "raw_tp",
+	[BPF_TRACE_FENTRY] = "fentry",
+	[BPF_TRACE_FEXIT] = "fexit",
+	[BPF_MODIFY_RETURN] = "mod_ret",
+	[BPF_LSM_MAC] = "lsm_mac",
+};
+
 extern const char * const map_type_name[];
 extern const size_t map_type_name_size;
 
-- 
cgit v1.2.3-59-g8ed1b


From c5481f9a954f27b8730c1dfeebbc9b3b5b2b2481 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 28 Apr 2020 17:16:12 -0700
Subject: bpftool: Add bpf_link show and pin support

Add `bpftool link show` and `bpftool link pin` commands.

Example plain output for `link show` (with showing pinned paths):

[vmuser@archvm bpf]$ sudo ~/local/linux/tools/bpf/bpftool/bpftool -f link
1: tracing  prog 12
        prog_type tracing  attach_type fentry
        pinned /sys/fs/bpf/my_test_link
        pinned /sys/fs/bpf/my_test_link2
2: tracing  prog 13
        prog_type tracing  attach_type fentry
3: tracing  prog 14
        prog_type tracing  attach_type fentry
4: tracing  prog 15
        prog_type tracing  attach_type fentry
5: tracing  prog 16
        prog_type tracing  attach_type fentry
6: tracing  prog 17
        prog_type tracing  attach_type fentry
7: raw_tracepoint  prog 21
        tp 'sys_enter'
8: cgroup  prog 25
        cgroup_id 584  attach_type egress
9: cgroup  prog 25
        cgroup_id 599  attach_type egress
10: cgroup  prog 25
        cgroup_id 614  attach_type egress
11: cgroup  prog 25
        cgroup_id 629  attach_type egress

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Link: https://lore.kernel.org/bpf/20200429001614.1544-9-andriin@fb.com
---
 tools/bpf/bpftool/common.c |   2 +
 tools/bpf/bpftool/link.c   | 333 +++++++++++++++++++++++++++++++++++++++++++++
 tools/bpf/bpftool/main.c   |   6 +-
 tools/bpf/bpftool/main.h   |   5 +
 4 files changed, 345 insertions(+), 1 deletion(-)
 create mode 100644 tools/bpf/bpftool/link.c

diff --git a/tools/bpf/bpftool/common.c b/tools/bpf/bpftool/common.c
index f2223dbdfb0a..c47bdc65de8e 100644
--- a/tools/bpf/bpftool/common.c
+++ b/tools/bpf/bpftool/common.c
@@ -262,6 +262,8 @@ int get_fd_type(int fd)
 		return BPF_OBJ_MAP;
 	else if (strstr(buf, "bpf-prog"))
 		return BPF_OBJ_PROG;
+	else if (strstr(buf, "bpf-link"))
+		return BPF_OBJ_LINK;
 
 	return BPF_OBJ_UNKNOWN;
 }
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
new file mode 100644
index 000000000000..adc7dc431ed8
--- /dev/null
+++ b/tools/bpf/bpftool/link.c
@@ -0,0 +1,333 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+/* Copyright (C) 2020 Facebook */
+
+#include <errno.h>
+#include <net/if.h>
+#include <stdio.h>
+#include <unistd.h>
+
+#include <bpf/bpf.h>
+
+#include "json_writer.h"
+#include "main.h"
+
+static const char * const link_type_name[] = {
+	[BPF_LINK_TYPE_UNSPEC]			= "unspec",
+	[BPF_LINK_TYPE_RAW_TRACEPOINT]		= "raw_tracepoint",
+	[BPF_LINK_TYPE_TRACING]			= "tracing",
+	[BPF_LINK_TYPE_CGROUP]			= "cgroup",
+};
+
+static int link_parse_fd(int *argc, char ***argv)
+{
+	if (is_prefix(**argv, "id")) {
+		unsigned int id;
+		char *endptr;
+
+		NEXT_ARGP();
+
+		id = strtoul(**argv, &endptr, 0);
+		if (*endptr) {
+			p_err("can't parse %s as ID", **argv);
+			return -1;
+		}
+		NEXT_ARGP();
+
+		return bpf_link_get_fd_by_id(id);
+	} else if (is_prefix(**argv, "pinned")) {
+		char *path;
+
+		NEXT_ARGP();
+
+		path = **argv;
+		NEXT_ARGP();
+
+		return open_obj_pinned_any(path, BPF_OBJ_LINK);
+	}
+
+	p_err("expected 'id' or 'pinned', got: '%s'?", **argv);
+	return -1;
+}
+
+static void
+show_link_header_json(struct bpf_link_info *info, json_writer_t *wtr)
+{
+	jsonw_uint_field(wtr, "id", info->id);
+	if (info->type < ARRAY_SIZE(link_type_name))
+		jsonw_string_field(wtr, "type", link_type_name[info->type]);
+	else
+		jsonw_uint_field(wtr, "type", info->type);
+
+	jsonw_uint_field(json_wtr, "prog_id", info->prog_id);
+}
+
+static int get_prog_info(int prog_id, struct bpf_prog_info *info)
+{
+	__u32 len = sizeof(*info);
+	int err, prog_fd;
+
+	prog_fd = bpf_prog_get_fd_by_id(prog_id);
+	if (prog_fd < 0)
+		return prog_fd;
+
+	memset(info, 0, sizeof(*info));
+	err = bpf_obj_get_info_by_fd(prog_fd, info, &len);
+	if (err)
+		p_err("can't get prog info: %s", strerror(errno));
+	close(prog_fd);
+	return err;
+}
+
+static int show_link_close_json(int fd, struct bpf_link_info *info)
+{
+	struct bpf_prog_info prog_info;
+	int err;
+
+	jsonw_start_object(json_wtr);
+
+	show_link_header_json(info, json_wtr);
+
+	switch (info->type) {
+	case BPF_LINK_TYPE_RAW_TRACEPOINT:
+		jsonw_string_field(json_wtr, "tp_name",
+				   (const char *)info->raw_tracepoint.tp_name);
+		break;
+	case BPF_LINK_TYPE_TRACING:
+		err = get_prog_info(info->prog_id, &prog_info);
+		if (err)
+			return err;
+
+		if (prog_info.type < ARRAY_SIZE(prog_type_name))
+			jsonw_string_field(json_wtr, "prog_type",
+					   prog_type_name[prog_info.type]);
+		else
+			jsonw_uint_field(json_wtr, "prog_type",
+					 prog_info.type);
+
+		if (info->tracing.attach_type < ARRAY_SIZE(attach_type_name))
+			jsonw_string_field(json_wtr, "attach_type",
+			       attach_type_name[info->tracing.attach_type]);
+		else
+			jsonw_uint_field(json_wtr, "attach_type",
+					 info->tracing.attach_type);
+		break;
+	case BPF_LINK_TYPE_CGROUP:
+		jsonw_lluint_field(json_wtr, "cgroup_id",
+				   info->cgroup.cgroup_id);
+		if (info->cgroup.attach_type < ARRAY_SIZE(attach_type_name))
+			jsonw_string_field(json_wtr, "attach_type",
+			       attach_type_name[info->cgroup.attach_type]);
+		else
+			jsonw_uint_field(json_wtr, "attach_type",
+					 info->cgroup.attach_type);
+		break;
+	default:
+		break;
+	}
+
+	if (!hash_empty(link_table.table)) {
+		struct pinned_obj *obj;
+
+		jsonw_name(json_wtr, "pinned");
+		jsonw_start_array(json_wtr);
+		hash_for_each_possible(link_table.table, obj, hash, info->id) {
+			if (obj->id == info->id)
+				jsonw_string(json_wtr, obj->path);
+		}
+		jsonw_end_array(json_wtr);
+	}
+	jsonw_end_object(json_wtr);
+
+	return 0;
+}
+
+static void show_link_header_plain(struct bpf_link_info *info)
+{
+	printf("%u: ", info->id);
+	if (info->type < ARRAY_SIZE(link_type_name))
+		printf("%s  ", link_type_name[info->type]);
+	else
+		printf("type %u  ", info->type);
+
+	printf("prog %u  ", info->prog_id);
+}
+
+static int show_link_close_plain(int fd, struct bpf_link_info *info)
+{
+	struct bpf_prog_info prog_info;
+	int err;
+
+	show_link_header_plain(info);
+
+	switch (info->type) {
+	case BPF_LINK_TYPE_RAW_TRACEPOINT:
+		printf("\n\ttp '%s'  ",
+		       (const char *)info->raw_tracepoint.tp_name);
+		break;
+	case BPF_LINK_TYPE_TRACING:
+		err = get_prog_info(info->prog_id, &prog_info);
+		if (err)
+			return err;
+
+		if (prog_info.type < ARRAY_SIZE(prog_type_name))
+			printf("\n\tprog_type %s  ",
+			       prog_type_name[prog_info.type]);
+		else
+			printf("\n\tprog_type %u  ", prog_info.type);
+
+		if (info->tracing.attach_type < ARRAY_SIZE(attach_type_name))
+			printf("attach_type %s  ",
+			       attach_type_name[info->tracing.attach_type]);
+		else
+			printf("attach_type %u  ", info->tracing.attach_type);
+		break;
+	case BPF_LINK_TYPE_CGROUP:
+		printf("\n\tcgroup_id %zu  ", (size_t)info->cgroup.cgroup_id);
+		if (info->cgroup.attach_type < ARRAY_SIZE(attach_type_name))
+			printf("attach_type %s  ",
+			       attach_type_name[info->cgroup.attach_type]);
+		else
+			printf("attach_type %u  ", info->cgroup.attach_type);
+		break;
+	default:
+		break;
+	}
+
+	if (!hash_empty(link_table.table)) {
+		struct pinned_obj *obj;
+
+		hash_for_each_possible(link_table.table, obj, hash, info->id) {
+			if (obj->id == info->id)
+				printf("\n\tpinned %s", obj->path);
+		}
+	}
+
+	printf("\n");
+
+	return 0;
+}
+
+static int do_show_link(int fd)
+{
+	struct bpf_link_info info;
+	__u32 len = sizeof(info);
+	char raw_tp_name[256];
+	int err;
+
+	memset(&info, 0, sizeof(info));
+again:
+	err = bpf_obj_get_info_by_fd(fd, &info, &len);
+	if (err) {
+		p_err("can't get link info: %s",
+		      strerror(errno));
+		close(fd);
+		return err;
+	}
+	if (info.type == BPF_LINK_TYPE_RAW_TRACEPOINT &&
+	    !info.raw_tracepoint.tp_name) {
+		info.raw_tracepoint.tp_name = (unsigned long)&raw_tp_name;
+		info.raw_tracepoint.tp_name_len = sizeof(raw_tp_name);
+		goto again;
+	}
+
+	if (json_output)
+		show_link_close_json(fd, &info);
+	else
+		show_link_close_plain(fd, &info);
+
+	close(fd);
+	return 0;
+}
+
+static int do_show(int argc, char **argv)
+{
+	__u32 id = 0;
+	int err, fd;
+
+	if (show_pinned)
+		build_pinned_obj_table(&link_table, BPF_OBJ_LINK);
+
+	if (argc == 2) {
+		fd = link_parse_fd(&argc, &argv);
+		if (fd < 0)
+			return fd;
+		return do_show_link(fd);
+	}
+
+	if (argc)
+		return BAD_ARG();
+
+	if (json_output)
+		jsonw_start_array(json_wtr);
+	while (true) {
+		err = bpf_link_get_next_id(id, &id);
+		if (err) {
+			if (errno == ENOENT)
+				break;
+			p_err("can't get next link: %s%s", strerror(errno),
+			      errno == EINVAL ? " -- kernel too old?" : "");
+			break;
+		}
+
+		fd = bpf_link_get_fd_by_id(id);
+		if (fd < 0) {
+			if (errno == ENOENT)
+				continue;
+			p_err("can't get link by id (%u): %s",
+			      id, strerror(errno));
+			break;
+		}
+
+		err = do_show_link(fd);
+		if (err)
+			break;
+	}
+	if (json_output)
+		jsonw_end_array(json_wtr);
+
+	return errno == ENOENT ? 0 : -1;
+}
+
+static int do_pin(int argc, char **argv)
+{
+	int err;
+
+	err = do_pin_any(argc, argv, link_parse_fd);
+	if (!err && json_output)
+		jsonw_null(json_wtr);
+	return err;
+}
+
+static int do_help(int argc, char **argv)
+{
+	if (json_output) {
+		jsonw_null(json_wtr);
+		return 0;
+	}
+
+	fprintf(stderr,
+		"Usage: %1$s %2$s { show | list }   [LINK]\n"
+		"       %1$s %2$s pin        LINK  FILE\n"
+		"       %1$s %2$s help\n"
+		"\n"
+		"       " HELP_SPEC_LINK "\n"
+		"       " HELP_SPEC_PROGRAM "\n"
+		"       " HELP_SPEC_OPTIONS "\n"
+		"",
+		bin_name, argv[-2]);
+
+	return 0;
+}
+
+static const struct cmd cmds[] = {
+	{ "show",	do_show },
+	{ "list",	do_show },
+	{ "help",	do_help },
+	{ "pin",	do_pin },
+	{ 0 }
+};
+
+int do_link(int argc, char **argv)
+{
+	return cmd_select(cmds, argc, argv, do_help);
+}
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 466c269eabdd..1413a154806e 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -30,6 +30,7 @@ bool verifier_logs;
 bool relaxed_maps;
 struct pinned_obj_table prog_table;
 struct pinned_obj_table map_table;
+struct pinned_obj_table link_table;
 
 static void __noreturn clean_and_exit(int i)
 {
@@ -58,7 +59,7 @@ static int do_help(int argc, char **argv)
 		"       %s batch file FILE\n"
 		"       %s version\n"
 		"\n"
-		"       OBJECT := { prog | map | cgroup | perf | net | feature | btf | gen | struct_ops }\n"
+		"       OBJECT := { prog | map | link | cgroup | perf | net | feature | btf | gen | struct_ops }\n"
 		"       " HELP_SPEC_OPTIONS "\n"
 		"",
 		bin_name, bin_name, bin_name);
@@ -215,6 +216,7 @@ static const struct cmd cmds[] = {
 	{ "batch",	do_batch },
 	{ "prog",	do_prog },
 	{ "map",	do_map },
+	{ "link",	do_link },
 	{ "cgroup",	do_cgroup },
 	{ "perf",	do_perf },
 	{ "net",	do_net },
@@ -364,6 +366,7 @@ int main(int argc, char **argv)
 
 	hash_init(prog_table.table);
 	hash_init(map_table.table);
+	hash_init(link_table.table);
 
 	opterr = 0;
 	while ((opt = getopt_long(argc, argv, "Vhpjfmnd",
@@ -422,6 +425,7 @@ int main(int argc, char **argv)
 	if (show_pinned) {
 		delete_pinned_obj_table(&prog_table);
 		delete_pinned_obj_table(&map_table);
+		delete_pinned_obj_table(&link_table);
 	}
 
 	return ret;
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 99d84bd1d5b2..9b1fb81a8331 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -50,6 +50,8 @@
 	"\t            {-m|--mapcompat} | {-n|--nomount} }"
 #define HELP_SPEC_MAP							\
 	"MAP := { id MAP_ID | pinned FILE | name MAP_NAME }"
+#define HELP_SPEC_LINK							\
+	"LINK := { id LINK_ID | pinned FILE }"
 
 static const char * const prog_type_name[] = {
 	[BPF_PROG_TYPE_UNSPEC]			= "unspec",
@@ -122,6 +124,7 @@ enum bpf_obj_type {
 	BPF_OBJ_UNKNOWN,
 	BPF_OBJ_PROG,
 	BPF_OBJ_MAP,
+	BPF_OBJ_LINK,
 };
 
 extern const char *bin_name;
@@ -134,6 +137,7 @@ extern bool verifier_logs;
 extern bool relaxed_maps;
 extern struct pinned_obj_table prog_table;
 extern struct pinned_obj_table map_table;
+extern struct pinned_obj_table link_table;
 
 void __printf(1, 2) p_err(const char *fmt, ...);
 void __printf(1, 2) p_info(const char *fmt, ...);
@@ -185,6 +189,7 @@ int do_pin_fd(int fd, const char *name);
 
 int do_prog(int argc, char **arg);
 int do_map(int argc, char **arg);
+int do_link(int argc, char **arg);
 int do_event_pipe(int argc, char **argv);
 int do_cgroup(int argc, char **arg);
 int do_perf(int argc, char **arg);
-- 
cgit v1.2.3-59-g8ed1b


From 7464d013ccd4db8544df5eddb05ddd509b9c46e5 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 28 Apr 2020 17:16:13 -0700
Subject: bpftool: Add bpftool-link manpage

Add bpftool-link manpage with information and examples of link-related
commands.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Link: https://lore.kernel.org/bpf/20200429001614.1544-10-andriin@fb.com
---
 tools/bpf/bpftool/Documentation/bpftool-link.rst | 118 +++++++++++++++++++++++
 1 file changed, 118 insertions(+)
 create mode 100644 tools/bpf/bpftool/Documentation/bpftool-link.rst

diff --git a/tools/bpf/bpftool/Documentation/bpftool-link.rst b/tools/bpf/bpftool/Documentation/bpftool-link.rst
new file mode 100644
index 000000000000..ee6500d6e6e4
--- /dev/null
+++ b/tools/bpf/bpftool/Documentation/bpftool-link.rst
@@ -0,0 +1,118 @@
+================
+bpftool-link
+================
+-------------------------------------------------------------------------------
+tool for inspection and simple manipulation of eBPF links
+-------------------------------------------------------------------------------
+
+:Manual section: 8
+
+SYNOPSIS
+========
+
+	**bpftool** [*OPTIONS*] **link** *COMMAND*
+
+	*OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] | { **-f** | **--bpffs** } }
+
+	*COMMANDS* := { **show** | **list** | **pin** | **help** }
+
+LINK COMMANDS
+=============
+
+|	**bpftool** **link { show | list }** [*LINK*]
+|	**bpftool** **link pin** *LINK* *FILE*
+|	**bpftool** **link help**
+|
+|	*LINK* := { **id** *LINK_ID* | **pinned** *FILE* }
+
+
+DESCRIPTION
+===========
+	**bpftool link { show | list }** [*LINK*]
+		  Show information about active links. If *LINK* is
+		  specified show information only about given link,
+		  otherwise list all links currently active on the system.
+
+		  Output will start with link ID followed by link type and
+		  zero or more named attributes, some of which depend on type
+		  of link.
+
+	**bpftool link pin** *LINK* *FILE*
+		  Pin link *LINK* as *FILE*.
+
+		  Note: *FILE* must be located in *bpffs* mount. It must not
+		  contain a dot character ('.'), which is reserved for future
+		  extensions of *bpffs*.
+
+	**bpftool link help**
+		  Print short help message.
+
+OPTIONS
+=======
+	-h, --help
+		  Print short generic help message (similar to **bpftool help**).
+
+	-V, --version
+		  Print version number (similar to **bpftool version**).
+
+	-j, --json
+		  Generate JSON output. For commands that cannot produce JSON, this
+		  option has no effect.
+
+	-p, --pretty
+		  Generate human-readable JSON output. Implies **-j**.
+
+	-f, --bpffs
+		  When showing BPF links, show file names of pinned
+		  links.
+
+	-n, --nomount
+		  Do not automatically attempt to mount any virtual file system
+		  (such as tracefs or BPF virtual file system) when necessary.
+
+	-d, --debug
+		  Print all logs available, even debug-level information. This
+		  includes logs from libbpf.
+
+EXAMPLES
+========
+**# bpftool link show**
+
+::
+
+    10: cgroup  prog 25
+            cgroup_id 614  attach_type egress
+
+**# bpftool --json --pretty link show**
+
+::
+
+    [{
+            "type": "cgroup",
+            "prog_id": 25,
+            "cgroup_id": 614,
+            "attach_type": "egress"
+        }
+    ]
+
+|
+| **# bpftool link pin id 10 /sys/fs/bpf/link**
+| **# ls -l /sys/fs/bpf/**
+
+::
+
+    -rw------- 1 root root 0 Apr 23 21:39 link
+
+
+SEE ALSO
+========
+	**bpf**\ (2),
+	**bpf-helpers**\ (7),
+	**bpftool**\ (8),
+	**bpftool-prog\ (8),
+	**bpftool-map**\ (8),
+	**bpftool-cgroup**\ (8),
+	**bpftool-feature**\ (8),
+	**bpftool-net**\ (8),
+	**bpftool-perf**\ (8),
+	**bpftool-btf**\ (8)
-- 
cgit v1.2.3-59-g8ed1b


From 5d085ad2e68cceec8332b23ea8f630a28b506366 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 28 Apr 2020 17:16:14 -0700
Subject: bpftool: Add link bash completions

Extend bpftool's bash-completion script to handle new link command and its
sub-commands.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Link: https://lore.kernel.org/bpf/20200429001614.1544-11-andriin@fb.com
---
 tools/bpf/bpftool/bash-completion/bpftool | 39 +++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 45ee99b159e2..c033c3329f73 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -98,6 +98,12 @@ _bpftool_get_btf_ids()
         command sed -n 's/.*"id": \(.*\),$/\1/p' )" -- "$cur" ) )
 }
 
+_bpftool_get_link_ids()
+{
+    COMPREPLY+=( $( compgen -W "$( bpftool -jp link 2>&1 | \
+        command sed -n 's/.*"id": \(.*\),$/\1/p' )" -- "$cur" ) )
+}
+
 _bpftool_get_obj_map_names()
 {
     local obj
@@ -1082,6 +1088,39 @@ _bpftool()
                     ;;
             esac
             ;;
+        link)
+            case $command in
+                show|list|pin)
+                    case $prev in
+                        id)
+                            _bpftool_get_link_ids
+                            return 0
+                            ;;
+                    esac
+                    ;;
+            esac
+
+            local LINK_TYPE='id pinned'
+            case $command in
+                show|list)
+                    [[ $prev != "$command" ]] && return 0
+                    COMPREPLY=( $( compgen -W "$LINK_TYPE" -- "$cur" ) )
+                    return 0
+                    ;;
+                pin)
+                    if [[ $prev == "$command" ]]; then
+                        COMPREPLY=( $( compgen -W "$LINK_TYPE" -- "$cur" ) )
+                    else
+                        _filedir
+                    fi
+                    return 0
+                    ;;
+                *)
+                    [[ $prev == $object ]] && \
+                        COMPREPLY=( $( compgen -W 'help pin show list' -- "$cur" ) )
+                    ;;
+            esac
+            ;;
     esac
 } &&
 complete -F _bpftool bpftool
-- 
cgit v1.2.3-59-g8ed1b


From 41017e56af6cf99122c86655f60fe4e1b75ecf48 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 28 Apr 2020 17:27:37 -0700
Subject: libbpf: Refactor BTF-defined map definition parsing logic

Factor out BTF map definition logic into stand-alone routine for easier reuse
for map-in-map case.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200429002739.48006-2-andriin@fb.com
---
 tools/lib/bpf/libbpf.c | 195 ++++++++++++++++++++++++++-----------------------
 1 file changed, 103 insertions(+), 92 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 8e1dc6980fac..7d10436d7b58 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -1914,109 +1914,54 @@ static int build_map_pin_path(struct bpf_map *map, const char *path)
 	return 0;
 }
 
-static int bpf_object__init_user_btf_map(struct bpf_object *obj,
-					 const struct btf_type *sec,
-					 int var_idx, int sec_idx,
-					 const Elf_Data *data, bool strict,
-					 const char *pin_root_path)
+
+static int parse_btf_map_def(struct bpf_object *obj,
+			     struct bpf_map *map,
+			     const struct btf_type *def,
+			     bool strict,
+			     const char *pin_root_path)
 {
-	const struct btf_type *var, *def, *t;
-	const struct btf_var_secinfo *vi;
-	const struct btf_var *var_extra;
+	const struct btf_type *t;
 	const struct btf_member *m;
-	const char *map_name;
-	struct bpf_map *map;
 	int vlen, i;
 
-	vi = btf_var_secinfos(sec) + var_idx;
-	var = btf__type_by_id(obj->btf, vi->type);
-	var_extra = btf_var(var);
-	map_name = btf__name_by_offset(obj->btf, var->name_off);
-	vlen = btf_vlen(var);
-
-	if (map_name == NULL || map_name[0] == '\0') {
-		pr_warn("map #%d: empty name.\n", var_idx);
-		return -EINVAL;
-	}
-	if ((__u64)vi->offset + vi->size > data->d_size) {
-		pr_warn("map '%s' BTF data is corrupted.\n", map_name);
-		return -EINVAL;
-	}
-	if (!btf_is_var(var)) {
-		pr_warn("map '%s': unexpected var kind %u.\n",
-			map_name, btf_kind(var));
-		return -EINVAL;
-	}
-	if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED &&
-	    var_extra->linkage != BTF_VAR_STATIC) {
-		pr_warn("map '%s': unsupported var linkage %u.\n",
-			map_name, var_extra->linkage);
-		return -EOPNOTSUPP;
-	}
-
-	def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
-	if (!btf_is_struct(def)) {
-		pr_warn("map '%s': unexpected def kind %u.\n",
-			map_name, btf_kind(var));
-		return -EINVAL;
-	}
-	if (def->size > vi->size) {
-		pr_warn("map '%s': invalid def size.\n", map_name);
-		return -EINVAL;
-	}
-
-	map = bpf_object__add_map(obj);
-	if (IS_ERR(map))
-		return PTR_ERR(map);
-	map->name = strdup(map_name);
-	if (!map->name) {
-		pr_warn("map '%s': failed to alloc map name.\n", map_name);
-		return -ENOMEM;
-	}
-	map->libbpf_type = LIBBPF_MAP_UNSPEC;
-	map->def.type = BPF_MAP_TYPE_UNSPEC;
-	map->sec_idx = sec_idx;
-	map->sec_offset = vi->offset;
-	pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
-		 map_name, map->sec_idx, map->sec_offset);
-
 	vlen = btf_vlen(def);
 	m = btf_members(def);
 	for (i = 0; i < vlen; i++, m++) {
 		const char *name = btf__name_by_offset(obj->btf, m->name_off);
 
 		if (!name) {
-			pr_warn("map '%s': invalid field #%d.\n", map_name, i);
+			pr_warn("map '%s': invalid field #%d.\n", map->name, i);
 			return -EINVAL;
 		}
 		if (strcmp(name, "type") == 0) {
-			if (!get_map_field_int(map_name, obj->btf, m,
+			if (!get_map_field_int(map->name, obj->btf, m,
 					       &map->def.type))
 				return -EINVAL;
 			pr_debug("map '%s': found type = %u.\n",
-				 map_name, map->def.type);
+				 map->name, map->def.type);
 		} else if (strcmp(name, "max_entries") == 0) {
-			if (!get_map_field_int(map_name, obj->btf, m,
+			if (!get_map_field_int(map->name, obj->btf, m,
 					       &map->def.max_entries))
 				return -EINVAL;
 			pr_debug("map '%s': found max_entries = %u.\n",
-				 map_name, map->def.max_entries);
+				 map->name, map->def.max_entries);
 		} else if (strcmp(name, "map_flags") == 0) {
-			if (!get_map_field_int(map_name, obj->btf, m,
+			if (!get_map_field_int(map->name, obj->btf, m,
 					       &map->def.map_flags))
 				return -EINVAL;
 			pr_debug("map '%s': found map_flags = %u.\n",
-				 map_name, map->def.map_flags);
+				 map->name, map->def.map_flags);
 		} else if (strcmp(name, "key_size") == 0) {
 			__u32 sz;
 
-			if (!get_map_field_int(map_name, obj->btf, m, &sz))
+			if (!get_map_field_int(map->name, obj->btf, m, &sz))
 				return -EINVAL;
 			pr_debug("map '%s': found key_size = %u.\n",
-				 map_name, sz);
+				 map->name, sz);
 			if (map->def.key_size && map->def.key_size != sz) {
 				pr_warn("map '%s': conflicting key size %u != %u.\n",
-					map_name, map->def.key_size, sz);
+					map->name, map->def.key_size, sz);
 				return -EINVAL;
 			}
 			map->def.key_size = sz;
@@ -2026,25 +1971,25 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
 			t = btf__type_by_id(obj->btf, m->type);
 			if (!t) {
 				pr_warn("map '%s': key type [%d] not found.\n",
-					map_name, m->type);
+					map->name, m->type);
 				return -EINVAL;
 			}
 			if (!btf_is_ptr(t)) {
 				pr_warn("map '%s': key spec is not PTR: %u.\n",
-					map_name, btf_kind(t));
+					map->name, btf_kind(t));
 				return -EINVAL;
 			}
 			sz = btf__resolve_size(obj->btf, t->type);
 			if (sz < 0) {
 				pr_warn("map '%s': can't determine key size for type [%u]: %zd.\n",
-					map_name, t->type, (ssize_t)sz);
+					map->name, t->type, (ssize_t)sz);
 				return sz;
 			}
 			pr_debug("map '%s': found key [%u], sz = %zd.\n",
-				 map_name, t->type, (ssize_t)sz);
+				 map->name, t->type, (ssize_t)sz);
 			if (map->def.key_size && map->def.key_size != sz) {
 				pr_warn("map '%s': conflicting key size %u != %zd.\n",
-					map_name, map->def.key_size, (ssize_t)sz);
+					map->name, map->def.key_size, (ssize_t)sz);
 				return -EINVAL;
 			}
 			map->def.key_size = sz;
@@ -2052,13 +1997,13 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
 		} else if (strcmp(name, "value_size") == 0) {
 			__u32 sz;
 
-			if (!get_map_field_int(map_name, obj->btf, m, &sz))
+			if (!get_map_field_int(map->name, obj->btf, m, &sz))
 				return -EINVAL;
 			pr_debug("map '%s': found value_size = %u.\n",
-				 map_name, sz);
+				 map->name, sz);
 			if (map->def.value_size && map->def.value_size != sz) {
 				pr_warn("map '%s': conflicting value size %u != %u.\n",
-					map_name, map->def.value_size, sz);
+					map->name, map->def.value_size, sz);
 				return -EINVAL;
 			}
 			map->def.value_size = sz;
@@ -2068,25 +2013,25 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
 			t = btf__type_by_id(obj->btf, m->type);
 			if (!t) {
 				pr_warn("map '%s': value type [%d] not found.\n",
-					map_name, m->type);
+					map->name, m->type);
 				return -EINVAL;
 			}
 			if (!btf_is_ptr(t)) {
 				pr_warn("map '%s': value spec is not PTR: %u.\n",
-					map_name, btf_kind(t));
+					map->name, btf_kind(t));
 				return -EINVAL;
 			}
 			sz = btf__resolve_size(obj->btf, t->type);
 			if (sz < 0) {
 				pr_warn("map '%s': can't determine value size for type [%u]: %zd.\n",
-					map_name, t->type, (ssize_t)sz);
+					map->name, t->type, (ssize_t)sz);
 				return sz;
 			}
 			pr_debug("map '%s': found value [%u], sz = %zd.\n",
-				 map_name, t->type, (ssize_t)sz);
+				 map->name, t->type, (ssize_t)sz);
 			if (map->def.value_size && map->def.value_size != sz) {
 				pr_warn("map '%s': conflicting value size %u != %zd.\n",
-					map_name, map->def.value_size, (ssize_t)sz);
+					map->name, map->def.value_size, (ssize_t)sz);
 				return -EINVAL;
 			}
 			map->def.value_size = sz;
@@ -2095,44 +2040,110 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
 			__u32 val;
 			int err;
 
-			if (!get_map_field_int(map_name, obj->btf, m, &val))
+			if (!get_map_field_int(map->name, obj->btf, m, &val))
 				return -EINVAL;
 			pr_debug("map '%s': found pinning = %u.\n",
-				 map_name, val);
+				 map->name, val);
 
 			if (val != LIBBPF_PIN_NONE &&
 			    val != LIBBPF_PIN_BY_NAME) {
 				pr_warn("map '%s': invalid pinning value %u.\n",
-					map_name, val);
+					map->name, val);
 				return -EINVAL;
 			}
 			if (val == LIBBPF_PIN_BY_NAME) {
 				err = build_map_pin_path(map, pin_root_path);
 				if (err) {
 					pr_warn("map '%s': couldn't build pin path.\n",
-						map_name);
+						map->name);
 					return err;
 				}
 			}
 		} else {
 			if (strict) {
 				pr_warn("map '%s': unknown field '%s'.\n",
-					map_name, name);
+					map->name, name);
 				return -ENOTSUP;
 			}
 			pr_debug("map '%s': ignoring unknown field '%s'.\n",
-				 map_name, name);
+				 map->name, name);
 		}
 	}
 
 	if (map->def.type == BPF_MAP_TYPE_UNSPEC) {
-		pr_warn("map '%s': map type isn't specified.\n", map_name);
+		pr_warn("map '%s': map type isn't specified.\n", map->name);
 		return -EINVAL;
 	}
 
 	return 0;
 }
 
+static int bpf_object__init_user_btf_map(struct bpf_object *obj,
+					 const struct btf_type *sec,
+					 int var_idx, int sec_idx,
+					 const Elf_Data *data, bool strict,
+					 const char *pin_root_path)
+{
+	const struct btf_type *var, *def;
+	const struct btf_var_secinfo *vi;
+	const struct btf_var *var_extra;
+	const char *map_name;
+	struct bpf_map *map;
+
+	vi = btf_var_secinfos(sec) + var_idx;
+	var = btf__type_by_id(obj->btf, vi->type);
+	var_extra = btf_var(var);
+	map_name = btf__name_by_offset(obj->btf, var->name_off);
+
+	if (map_name == NULL || map_name[0] == '\0') {
+		pr_warn("map #%d: empty name.\n", var_idx);
+		return -EINVAL;
+	}
+	if ((__u64)vi->offset + vi->size > data->d_size) {
+		pr_warn("map '%s' BTF data is corrupted.\n", map_name);
+		return -EINVAL;
+	}
+	if (!btf_is_var(var)) {
+		pr_warn("map '%s': unexpected var kind %u.\n",
+			map_name, btf_kind(var));
+		return -EINVAL;
+	}
+	if (var_extra->linkage != BTF_VAR_GLOBAL_ALLOCATED &&
+	    var_extra->linkage != BTF_VAR_STATIC) {
+		pr_warn("map '%s': unsupported var linkage %u.\n",
+			map_name, var_extra->linkage);
+		return -EOPNOTSUPP;
+	}
+
+	def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
+	if (!btf_is_struct(def)) {
+		pr_warn("map '%s': unexpected def kind %u.\n",
+			map_name, btf_kind(var));
+		return -EINVAL;
+	}
+	if (def->size > vi->size) {
+		pr_warn("map '%s': invalid def size.\n", map_name);
+		return -EINVAL;
+	}
+
+	map = bpf_object__add_map(obj);
+	if (IS_ERR(map))
+		return PTR_ERR(map);
+	map->name = strdup(map_name);
+	if (!map->name) {
+		pr_warn("map '%s': failed to alloc map name.\n", map_name);
+		return -ENOMEM;
+	}
+	map->libbpf_type = LIBBPF_MAP_UNSPEC;
+	map->def.type = BPF_MAP_TYPE_UNSPEC;
+	map->sec_idx = sec_idx;
+	map->sec_offset = vi->offset;
+	pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
+		 map_name, map->sec_idx, map->sec_offset);
+
+	return parse_btf_map_def(obj, map, def, strict, pin_root_path);
+}
+
 static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
 					  const char *pin_root_path)
 {
-- 
cgit v1.2.3-59-g8ed1b


From 2d39d7c56f115148b05d1d8c6b8698a5730c8b53 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 28 Apr 2020 17:27:38 -0700
Subject: libbpf: Refactor map creation logic and fix cleanup leak
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Factor out map creation and destruction logic to simplify code and especially
error handling. Also fix map FD leak in case of partially successful map
creation during bpf_object load operation.

Fixes: 57a00f41644f ("libbpf: Add auto-pinning of maps when loading BPF objects")
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/bpf/20200429002739.48006-3-andriin@fb.com
---
 tools/lib/bpf/libbpf.c | 226 ++++++++++++++++++++++++++-----------------------
 1 file changed, 121 insertions(+), 105 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 7d10436d7b58..9c845cf4cfcf 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -3493,107 +3493,111 @@ bpf_object__populate_internal_map(struct bpf_object *obj, struct bpf_map *map)
 	return 0;
 }
 
+static void bpf_map__destroy(struct bpf_map *map);
+
+static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map)
+{
+	struct bpf_create_map_attr create_attr;
+	struct bpf_map_def *def = &map->def;
+
+	memset(&create_attr, 0, sizeof(create_attr));
+
+	if (obj->caps.name)
+		create_attr.name = map->name;
+	create_attr.map_ifindex = map->map_ifindex;
+	create_attr.map_type = def->type;
+	create_attr.map_flags = def->map_flags;
+	create_attr.key_size = def->key_size;
+	create_attr.value_size = def->value_size;
+
+	if (def->type == BPF_MAP_TYPE_PERF_EVENT_ARRAY && !def->max_entries) {
+		int nr_cpus;
+
+		nr_cpus = libbpf_num_possible_cpus();
+		if (nr_cpus < 0) {
+			pr_warn("map '%s': failed to determine number of system CPUs: %d\n",
+				map->name, nr_cpus);
+			return nr_cpus;
+		}
+		pr_debug("map '%s': setting size to %d\n", map->name, nr_cpus);
+		create_attr.max_entries = nr_cpus;
+	} else {
+		create_attr.max_entries = def->max_entries;
+	}
+
+	if (bpf_map__is_struct_ops(map))
+		create_attr.btf_vmlinux_value_type_id =
+			map->btf_vmlinux_value_type_id;
+
+	create_attr.btf_fd = 0;
+	create_attr.btf_key_type_id = 0;
+	create_attr.btf_value_type_id = 0;
+	if (obj->btf && !bpf_map_find_btf_info(obj, map)) {
+		create_attr.btf_fd = btf__fd(obj->btf);
+		create_attr.btf_key_type_id = map->btf_key_type_id;
+		create_attr.btf_value_type_id = map->btf_value_type_id;
+	}
+
+	map->fd = bpf_create_map_xattr(&create_attr);
+	if (map->fd < 0 && (create_attr.btf_key_type_id ||
+			    create_attr.btf_value_type_id)) {
+		char *cp, errmsg[STRERR_BUFSIZE];
+		int err = -errno;
+
+		cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
+		pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
+			map->name, cp, err);
+		create_attr.btf_fd = 0;
+		create_attr.btf_key_type_id = 0;
+		create_attr.btf_value_type_id = 0;
+		map->btf_key_type_id = 0;
+		map->btf_value_type_id = 0;
+		map->fd = bpf_create_map_xattr(&create_attr);
+	}
+
+	if (map->fd < 0)
+		return -errno;
+
+	return 0;
+}
+
 static int
 bpf_object__create_maps(struct bpf_object *obj)
 {
-	struct bpf_create_map_attr create_attr = {};
-	int nr_cpus = 0;
-	unsigned int i;
+	struct bpf_map *map;
+	char *cp, errmsg[STRERR_BUFSIZE];
+	unsigned int i, j;
 	int err;
 
 	for (i = 0; i < obj->nr_maps; i++) {
-		struct bpf_map *map = &obj->maps[i];
-		struct bpf_map_def *def = &map->def;
-		char *cp, errmsg[STRERR_BUFSIZE];
-		int *pfd = &map->fd;
+		map = &obj->maps[i];
 
 		if (map->pin_path) {
 			err = bpf_object__reuse_map(map);
 			if (err) {
-				pr_warn("error reusing pinned map %s\n",
+				pr_warn("map '%s': error reusing pinned map\n",
 					map->name);
-				return err;
+				goto err_out;
 			}
 		}
 
 		if (map->fd >= 0) {
-			pr_debug("skip map create (preset) %s: fd=%d\n",
+			pr_debug("map '%s': skipping creation (preset fd=%d)\n",
 				 map->name, map->fd);
 			continue;
 		}
 
-		if (obj->caps.name)
-			create_attr.name = map->name;
-		create_attr.map_ifindex = map->map_ifindex;
-		create_attr.map_type = def->type;
-		create_attr.map_flags = def->map_flags;
-		create_attr.key_size = def->key_size;
-		create_attr.value_size = def->value_size;
-		if (def->type == BPF_MAP_TYPE_PERF_EVENT_ARRAY &&
-		    !def->max_entries) {
-			if (!nr_cpus)
-				nr_cpus = libbpf_num_possible_cpus();
-			if (nr_cpus < 0) {
-				pr_warn("failed to determine number of system CPUs: %d\n",
-					nr_cpus);
-				err = nr_cpus;
-				goto err_out;
-			}
-			pr_debug("map '%s': setting size to %d\n",
-				 map->name, nr_cpus);
-			create_attr.max_entries = nr_cpus;
-		} else {
-			create_attr.max_entries = def->max_entries;
-		}
-		create_attr.btf_fd = 0;
-		create_attr.btf_key_type_id = 0;
-		create_attr.btf_value_type_id = 0;
-		if (bpf_map_type__is_map_in_map(def->type) &&
-		    map->inner_map_fd >= 0)
-			create_attr.inner_map_fd = map->inner_map_fd;
-		if (bpf_map__is_struct_ops(map))
-			create_attr.btf_vmlinux_value_type_id =
-				map->btf_vmlinux_value_type_id;
-
-		if (obj->btf && !bpf_map_find_btf_info(obj, map)) {
-			create_attr.btf_fd = btf__fd(obj->btf);
-			create_attr.btf_key_type_id = map->btf_key_type_id;
-			create_attr.btf_value_type_id = map->btf_value_type_id;
-		}
-
-		*pfd = bpf_create_map_xattr(&create_attr);
-		if (*pfd < 0 && (create_attr.btf_key_type_id ||
-				 create_attr.btf_value_type_id)) {
-			err = -errno;
-			cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
-			pr_warn("Error in bpf_create_map_xattr(%s):%s(%d). Retrying without BTF.\n",
-				map->name, cp, err);
-			create_attr.btf_fd = 0;
-			create_attr.btf_key_type_id = 0;
-			create_attr.btf_value_type_id = 0;
-			map->btf_key_type_id = 0;
-			map->btf_value_type_id = 0;
-			*pfd = bpf_create_map_xattr(&create_attr);
-		}
-
-		if (*pfd < 0) {
-			size_t j;
+		err = bpf_object__create_map(obj, map);
+		if (err)
+			goto err_out;
 
-			err = -errno;
-err_out:
-			cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
-			pr_warn("failed to create map (name: '%s'): %s(%d)\n",
-				map->name, cp, err);
-			pr_perm_msg(err);
-			for (j = 0; j < i; j++)
-				zclose(obj->maps[j].fd);
-			return err;
-		}
+		pr_debug("map '%s': created successfully, fd=%d\n", map->name,
+			 map->fd);
 
 		if (bpf_map__is_internal(map)) {
 			err = bpf_object__populate_internal_map(obj, map);
 			if (err < 0) {
-				zclose(*pfd);
+				zclose(map->fd);
 				goto err_out;
 			}
 		}
@@ -3601,16 +3605,23 @@ err_out:
 		if (map->pin_path && !map->pinned) {
 			err = bpf_map__pin(map, NULL);
 			if (err) {
-				pr_warn("failed to auto-pin map name '%s' at '%s'\n",
-					map->name, map->pin_path);
-				return err;
+				pr_warn("map '%s': failed to auto-pin at '%s': %d\n",
+					map->name, map->pin_path, err);
+				zclose(map->fd);
+				goto err_out;
 			}
 		}
-
-		pr_debug("created map %s: fd=%d\n", map->name, *pfd);
 	}
 
 	return 0;
+
+err_out:
+	cp = libbpf_strerror_r(err, errmsg, sizeof(errmsg));
+	pr_warn("map '%s': failed to create: %s(%d)\n", map->name, cp, err);
+	pr_perm_msg(err);
+	for (j = 0; j < i; j++)
+		zclose(obj->maps[j].fd);
+	return err;
 }
 
 static int
@@ -5966,6 +5977,32 @@ int bpf_object__pin(struct bpf_object *obj, const char *path)
 	return 0;
 }
 
+static void bpf_map__destroy(struct bpf_map *map)
+{
+	if (map->clear_priv)
+		map->clear_priv(map, map->priv);
+	map->priv = NULL;
+	map->clear_priv = NULL;
+
+	if (map->mmaped) {
+		munmap(map->mmaped, bpf_map_mmap_sz(map));
+		map->mmaped = NULL;
+	}
+
+	if (map->st_ops) {
+		zfree(&map->st_ops->data);
+		zfree(&map->st_ops->progs);
+		zfree(&map->st_ops->kern_func_off);
+		zfree(&map->st_ops);
+	}
+
+	zfree(&map->name);
+	zfree(&map->pin_path);
+
+	if (map->fd >= 0)
+		zclose(map->fd);
+}
+
 void bpf_object__close(struct bpf_object *obj)
 {
 	size_t i;
@@ -5981,29 +6018,8 @@ void bpf_object__close(struct bpf_object *obj)
 	btf__free(obj->btf);
 	btf_ext__free(obj->btf_ext);
 
-	for (i = 0; i < obj->nr_maps; i++) {
-		struct bpf_map *map = &obj->maps[i];
-
-		if (map->clear_priv)
-			map->clear_priv(map, map->priv);
-		map->priv = NULL;
-		map->clear_priv = NULL;
-
-		if (map->mmaped) {
-			munmap(map->mmaped, bpf_map_mmap_sz(map));
-			map->mmaped = NULL;
-		}
-
-		if (map->st_ops) {
-			zfree(&map->st_ops->data);
-			zfree(&map->st_ops->progs);
-			zfree(&map->st_ops->kern_func_off);
-			zfree(&map->st_ops);
-		}
-
-		zfree(&map->name);
-		zfree(&map->pin_path);
-	}
+	for (i = 0; i < obj->nr_maps; i++)
+		bpf_map__destroy(&obj->maps[i]);
 
 	zfree(&obj->kconfig);
 	zfree(&obj->externs);
-- 
cgit v1.2.3-59-g8ed1b


From 646f02ffdd49c466cb81642c2b013beb80092d01 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 28 Apr 2020 17:27:39 -0700
Subject: libbpf: Add BTF-defined map-in-map support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As discussed at LPC 2019 ([0]), this patch brings (a quite belated) support
for declarative BTF-defined map-in-map support in libbpf. It allows to define
ARRAY_OF_MAPS and HASH_OF_MAPS BPF maps without any user-space initialization
code involved.

Additionally, it allows to initialize outer map's slots with references to
respective inner maps at load time, also completely declaratively.

Despite a weak type system of C, the way BTF-defined map-in-map definition
works, it's actually quite hard to accidentally initialize outer map with
incompatible inner maps. This being C, of course, it's still possible, but
even that would be caught at load time and error returned with helpful debug
log pointing exactly to the slot that failed to be initialized.

As an example, here's a rather advanced HASH_OF_MAPS declaration and
initialization example, filling slots #0 and #4 with two inner maps:

  #include <bpf/bpf_helpers.h>

  struct inner_map {
          __uint(type, BPF_MAP_TYPE_ARRAY);
          __uint(max_entries, 1);
          __type(key, int);
          __type(value, int);
  } inner_map1 SEC(".maps"),
    inner_map2 SEC(".maps");

  struct outer_hash {
          __uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
          __uint(max_entries, 5);
          __uint(key_size, sizeof(int));
          __array(values, struct inner_map);
  } outer_hash SEC(".maps") = {
          .values = {
                  [0] = &inner_map2,
                  [4] = &inner_map1,
          },
  };

Here's the relevant part of libbpf debug log showing pretty clearly of what's
going on with map-in-map initialization:

  libbpf: .maps relo #0: for 6 value 0 rel.r_offset 96 name 260 ('inner_map1')
  libbpf: .maps relo #0: map 'outer_arr' slot [0] points to map 'inner_map1'
  libbpf: .maps relo #1: for 7 value 32 rel.r_offset 112 name 249 ('inner_map2')
  libbpf: .maps relo #1: map 'outer_arr' slot [2] points to map 'inner_map2'
  libbpf: .maps relo #2: for 7 value 32 rel.r_offset 144 name 249 ('inner_map2')
  libbpf: .maps relo #2: map 'outer_hash' slot [0] points to map 'inner_map2'
  libbpf: .maps relo #3: for 6 value 0 rel.r_offset 176 name 260 ('inner_map1')
  libbpf: .maps relo #3: map 'outer_hash' slot [4] points to map 'inner_map1'
  libbpf: map 'inner_map1': created successfully, fd=4
  libbpf: map 'inner_map2': created successfully, fd=5
  libbpf: map 'outer_hash': created successfully, fd=7
  libbpf: map 'outer_hash': slot [0] set to map 'inner_map2' fd=5
  libbpf: map 'outer_hash': slot [4] set to map 'inner_map1' fd=4

Notice from the log above that fd=6 (not logged explicitly) is used for inner
"prototype" map, necessary for creation of outer map. It is destroyed
immediately after outer map is created.

See also included selftest with some extra comments explaining extra details
of usage. Additionally, similar initialization syntax and libbpf functionality
can be used to do initialization of BPF_PROG_ARRAY with references to BPF
sub-programs. This can be done in follow up patches, if there will be a demand
for this.

  [0] https://linuxplumbersconf.org/event/4/contributions/448/

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/bpf/20200429002739.48006-4-andriin@fb.com
---
 tools/lib/bpf/bpf_helpers.h                        |   1 +
 tools/lib/bpf/libbpf.c                             | 281 +++++++++++++++++++--
 .../selftests/bpf/prog_tests/btf_map_in_map.c      |  49 ++++
 .../selftests/bpf/progs/test_btf_map_in_map.c      |  76 ++++++
 4 files changed, 384 insertions(+), 23 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_btf_map_in_map.c

diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index 60aad054eea1..da00b87aa199 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -12,6 +12,7 @@
 
 #define __uint(name, val) int (*name)[val]
 #define __type(name, val) typeof(val) *name
+#define __array(name, val) typeof(val) *name[]
 
 /* Helper macro to print out debug messages */
 #define bpf_printk(fmt, ...)				\
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 9c845cf4cfcf..445ee903f9cd 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -310,6 +310,7 @@ struct bpf_map {
 	int map_ifindex;
 	int inner_map_fd;
 	struct bpf_map_def def;
+	__u32 btf_var_idx;
 	__u32 btf_key_type_id;
 	__u32 btf_value_type_id;
 	__u32 btf_vmlinux_value_type_id;
@@ -318,6 +319,9 @@ struct bpf_map {
 	enum libbpf_map_type libbpf_type;
 	void *mmaped;
 	struct bpf_struct_ops *st_ops;
+	struct bpf_map *inner_map;
+	void **init_slots;
+	int init_slots_sz;
 	char *pin_path;
 	bool pinned;
 	bool reused;
@@ -389,6 +393,7 @@ struct bpf_object {
 		int nr_reloc_sects;
 		int maps_shndx;
 		int btf_maps_shndx;
+		__u32 btf_maps_sec_btf_id;
 		int text_shndx;
 		int symbols_shndx;
 		int data_shndx;
@@ -1918,7 +1923,7 @@ static int build_map_pin_path(struct bpf_map *map, const char *path)
 static int parse_btf_map_def(struct bpf_object *obj,
 			     struct bpf_map *map,
 			     const struct btf_type *def,
-			     bool strict,
+			     bool strict, bool is_inner,
 			     const char *pin_root_path)
 {
 	const struct btf_type *t;
@@ -2036,10 +2041,79 @@ static int parse_btf_map_def(struct bpf_object *obj,
 			}
 			map->def.value_size = sz;
 			map->btf_value_type_id = t->type;
+		}
+		else if (strcmp(name, "values") == 0) {
+			int err;
+
+			if (is_inner) {
+				pr_warn("map '%s': multi-level inner maps not supported.\n",
+					map->name);
+				return -ENOTSUP;
+			}
+			if (i != vlen - 1) {
+				pr_warn("map '%s': '%s' member should be last.\n",
+					map->name, name);
+				return -EINVAL;
+			}
+			if (!bpf_map_type__is_map_in_map(map->def.type)) {
+				pr_warn("map '%s': should be map-in-map.\n",
+					map->name);
+				return -ENOTSUP;
+			}
+			if (map->def.value_size && map->def.value_size != 4) {
+				pr_warn("map '%s': conflicting value size %u != 4.\n",
+					map->name, map->def.value_size);
+				return -EINVAL;
+			}
+			map->def.value_size = 4;
+			t = btf__type_by_id(obj->btf, m->type);
+			if (!t) {
+				pr_warn("map '%s': map-in-map inner type [%d] not found.\n",
+					map->name, m->type);
+				return -EINVAL;
+			}
+			if (!btf_is_array(t) || btf_array(t)->nelems) {
+				pr_warn("map '%s': map-in-map inner spec is not a zero-sized array.\n",
+					map->name);
+				return -EINVAL;
+			}
+			t = skip_mods_and_typedefs(obj->btf, btf_array(t)->type,
+						   NULL);
+			if (!btf_is_ptr(t)) {
+				pr_warn("map '%s': map-in-map inner def is of unexpected kind %u.\n",
+					map->name, btf_kind(t));
+				return -EINVAL;
+			}
+			t = skip_mods_and_typedefs(obj->btf, t->type, NULL);
+			if (!btf_is_struct(t)) {
+				pr_warn("map '%s': map-in-map inner def is of unexpected kind %u.\n",
+					map->name, btf_kind(t));
+				return -EINVAL;
+			}
+
+			map->inner_map = calloc(1, sizeof(*map->inner_map));
+			if (!map->inner_map)
+				return -ENOMEM;
+			map->inner_map->sec_idx = obj->efile.btf_maps_shndx;
+			map->inner_map->name = malloc(strlen(map->name) +
+						      sizeof(".inner") + 1);
+			if (!map->inner_map->name)
+				return -ENOMEM;
+			sprintf(map->inner_map->name, "%s.inner", map->name);
+
+			err = parse_btf_map_def(obj, map->inner_map, t, strict,
+						true /* is_inner */, NULL);
+			if (err)
+				return err;
 		} else if (strcmp(name, "pinning") == 0) {
 			__u32 val;
 			int err;
 
+			if (is_inner) {
+				pr_debug("map '%s': inner def can't be pinned.\n",
+					 map->name);
+				return -EINVAL;
+			}
 			if (!get_map_field_int(map->name, obj->btf, m, &val))
 				return -EINVAL;
 			pr_debug("map '%s': found pinning = %u.\n",
@@ -2138,10 +2212,11 @@ static int bpf_object__init_user_btf_map(struct bpf_object *obj,
 	map->def.type = BPF_MAP_TYPE_UNSPEC;
 	map->sec_idx = sec_idx;
 	map->sec_offset = vi->offset;
+	map->btf_var_idx = var_idx;
 	pr_debug("map '%s': at sec_idx %d, offset %zu.\n",
 		 map_name, map->sec_idx, map->sec_offset);
 
-	return parse_btf_map_def(obj, map, def, strict, pin_root_path);
+	return parse_btf_map_def(obj, map, def, strict, false, pin_root_path);
 }
 
 static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
@@ -2174,6 +2249,7 @@ static int bpf_object__init_user_btf_maps(struct bpf_object *obj, bool strict,
 		name = btf__name_by_offset(obj->btf, t->name_off);
 		if (strcmp(name, MAPS_ELF_SEC) == 0) {
 			sec = t;
+			obj->efile.btf_maps_sec_btf_id = i;
 			break;
 		}
 	}
@@ -2560,7 +2636,8 @@ static int bpf_object__elf_collect(struct bpf_object *obj)
 
 			/* Only do relo for section with exec instructions */
 			if (!section_have_execinstr(obj, sec) &&
-			    strcmp(name, ".rel" STRUCT_OPS_SEC)) {
+			    strcmp(name, ".rel" STRUCT_OPS_SEC) &&
+			    strcmp(name, ".rel" MAPS_ELF_SEC)) {
 				pr_debug("skip relo %s(%d) for section(%d)\n",
 					 name, idx, sec);
 				continue;
@@ -3538,6 +3615,22 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map)
 		create_attr.btf_value_type_id = map->btf_value_type_id;
 	}
 
+	if (bpf_map_type__is_map_in_map(def->type)) {
+		if (map->inner_map) {
+			int err;
+
+			err = bpf_object__create_map(obj, map->inner_map);
+			if (err) {
+				pr_warn("map '%s': failed to create inner map: %d\n",
+					map->name, err);
+				return err;
+			}
+			map->inner_map_fd = bpf_map__fd(map->inner_map);
+		}
+		if (map->inner_map_fd >= 0)
+			create_attr.inner_map_fd = map->inner_map_fd;
+	}
+
 	map->fd = bpf_create_map_xattr(&create_attr);
 	if (map->fd < 0 && (create_attr.btf_key_type_id ||
 			    create_attr.btf_value_type_id)) {
@@ -3558,6 +3651,11 @@ static int bpf_object__create_map(struct bpf_object *obj, struct bpf_map *map)
 	if (map->fd < 0)
 		return -errno;
 
+	if (bpf_map_type__is_map_in_map(def->type) && map->inner_map) {
+		bpf_map__destroy(map->inner_map);
+		zfree(&map->inner_map);
+	}
+
 	return 0;
 }
 
@@ -3602,6 +3700,31 @@ bpf_object__create_maps(struct bpf_object *obj)
 			}
 		}
 
+		if (map->init_slots_sz) {
+			for (j = 0; j < map->init_slots_sz; j++) {
+				const struct bpf_map *targ_map;
+				int fd;
+
+				if (!map->init_slots[j])
+					continue;
+
+				targ_map = map->init_slots[j];
+				fd = bpf_map__fd(targ_map);
+				err = bpf_map_update_elem(map->fd, &j, &fd, 0);
+				if (err) {
+					err = -errno;
+					pr_warn("map '%s': failed to initialize slot [%d] to map '%s' fd=%d: %d\n",
+						map->name, j, targ_map->name,
+						fd, err);
+					goto err_out;
+				}
+				pr_debug("map '%s': slot [%d] set to map '%s' fd=%d\n",
+					 map->name, j, targ_map->name, fd);
+			}
+			zfree(&map->init_slots);
+			map->init_slots_sz = 0;
+		}
+
 		if (map->pin_path && !map->pinned) {
 			err = bpf_map__pin(map, NULL);
 			if (err) {
@@ -4873,9 +4996,118 @@ bpf_object__relocate(struct bpf_object *obj, const char *targ_btf_path)
 	return 0;
 }
 
-static int bpf_object__collect_struct_ops_map_reloc(struct bpf_object *obj,
-						    GElf_Shdr *shdr,
-						    Elf_Data *data);
+static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
+					    GElf_Shdr *shdr, Elf_Data *data);
+
+static int bpf_object__collect_map_relos(struct bpf_object *obj,
+					 GElf_Shdr *shdr, Elf_Data *data)
+{
+	int i, j, nrels, new_sz, ptr_sz = sizeof(void *);
+	const struct btf_type *sec, *var, *def;
+	const struct btf_var_secinfo *vi;
+	const struct btf_member *member;
+	struct bpf_map *map, *targ_map;
+	const char *name, *mname;
+	Elf_Data *symbols;
+	unsigned int moff;
+	GElf_Sym sym;
+	GElf_Rel rel;
+	void *tmp;
+
+	if (!obj->efile.btf_maps_sec_btf_id || !obj->btf)
+		return -EINVAL;
+	sec = btf__type_by_id(obj->btf, obj->efile.btf_maps_sec_btf_id);
+	if (!sec)
+		return -EINVAL;
+
+	symbols = obj->efile.symbols;
+	nrels = shdr->sh_size / shdr->sh_entsize;
+	for (i = 0; i < nrels; i++) {
+		if (!gelf_getrel(data, i, &rel)) {
+			pr_warn(".maps relo #%d: failed to get ELF relo\n", i);
+			return -LIBBPF_ERRNO__FORMAT;
+		}
+		if (!gelf_getsym(symbols, GELF_R_SYM(rel.r_info), &sym)) {
+			pr_warn(".maps relo #%d: symbol %zx not found\n",
+				i, (size_t)GELF_R_SYM(rel.r_info));
+			return -LIBBPF_ERRNO__FORMAT;
+		}
+		name = elf_strptr(obj->efile.elf, obj->efile.strtabidx,
+				  sym.st_name) ? : "<?>";
+		if (sym.st_shndx != obj->efile.btf_maps_shndx) {
+			pr_warn(".maps relo #%d: '%s' isn't a BTF-defined map\n",
+				i, name);
+			return -LIBBPF_ERRNO__RELOC;
+		}
+
+		pr_debug(".maps relo #%d: for %zd value %zd rel.r_offset %zu name %d ('%s')\n",
+			 i, (ssize_t)(rel.r_info >> 32), (size_t)sym.st_value,
+			 (size_t)rel.r_offset, sym.st_name, name);
+
+		for (j = 0; j < obj->nr_maps; j++) {
+			map = &obj->maps[j];
+			if (map->sec_idx != obj->efile.btf_maps_shndx)
+				continue;
+
+			vi = btf_var_secinfos(sec) + map->btf_var_idx;
+			if (vi->offset <= rel.r_offset &&
+			    rel.r_offset + sizeof(void *) <= vi->offset + vi->size)
+				break;
+		}
+		if (j == obj->nr_maps) {
+			pr_warn(".maps relo #%d: cannot find map '%s' at rel.r_offset %zu\n",
+				i, name, (size_t)rel.r_offset);
+			return -EINVAL;
+		}
+
+		if (!bpf_map_type__is_map_in_map(map->def.type))
+			return -EINVAL;
+		if (map->def.type == BPF_MAP_TYPE_HASH_OF_MAPS &&
+		    map->def.key_size != sizeof(int)) {
+			pr_warn(".maps relo #%d: hash-of-maps '%s' should have key size %zu.\n",
+				i, map->name, sizeof(int));
+			return -EINVAL;
+		}
+
+		targ_map = bpf_object__find_map_by_name(obj, name);
+		if (!targ_map)
+			return -ESRCH;
+
+		var = btf__type_by_id(obj->btf, vi->type);
+		def = skip_mods_and_typedefs(obj->btf, var->type, NULL);
+		if (btf_vlen(def) == 0)
+			return -EINVAL;
+		member = btf_members(def) + btf_vlen(def) - 1;
+		mname = btf__name_by_offset(obj->btf, member->name_off);
+		if (strcmp(mname, "values"))
+			return -EINVAL;
+
+		moff = btf_member_bit_offset(def, btf_vlen(def) - 1) / 8;
+		if (rel.r_offset - vi->offset < moff)
+			return -EINVAL;
+
+		moff = rel.r_offset - vi->offset - moff;
+		if (moff % ptr_sz)
+			return -EINVAL;
+		moff /= ptr_sz;
+		if (moff >= map->init_slots_sz) {
+			new_sz = moff + 1;
+			tmp = realloc(map->init_slots, new_sz * ptr_sz);
+			if (!tmp)
+				return -ENOMEM;
+			map->init_slots = tmp;
+			memset(map->init_slots + map->init_slots_sz, 0,
+			       (new_sz - map->init_slots_sz) * ptr_sz);
+			map->init_slots_sz = new_sz;
+		}
+		map->init_slots[moff] = targ_map;
+
+		pr_debug(".maps relo #%d: map '%s' slot [%d] points to map '%s'\n",
+			 i, map->name, moff, name);
+	}
+
+	return 0;
+}
 
 static int bpf_object__collect_reloc(struct bpf_object *obj)
 {
@@ -4898,21 +5130,17 @@ static int bpf_object__collect_reloc(struct bpf_object *obj)
 		}
 
 		if (idx == obj->efile.st_ops_shndx) {
-			err = bpf_object__collect_struct_ops_map_reloc(obj,
-								       shdr,
-								       data);
-			if (err)
-				return err;
-			continue;
-		}
-
-		prog = bpf_object__find_prog_by_idx(obj, idx);
-		if (!prog) {
-			pr_warn("relocation failed: no section(%d)\n", idx);
-			return -LIBBPF_ERRNO__RELOC;
+			err = bpf_object__collect_st_ops_relos(obj, shdr, data);
+		} else if (idx == obj->efile.btf_maps_shndx) {
+			err = bpf_object__collect_map_relos(obj, shdr, data);
+		} else {
+			prog = bpf_object__find_prog_by_idx(obj, idx);
+			if (!prog) {
+				pr_warn("relocation failed: no prog in section(%d)\n", idx);
+				return -LIBBPF_ERRNO__RELOC;
+			}
+			err = bpf_program__collect_reloc(prog, shdr, data, obj);
 		}
-
-		err = bpf_program__collect_reloc(prog, shdr, data, obj);
 		if (err)
 			return err;
 	}
@@ -5984,6 +6212,14 @@ static void bpf_map__destroy(struct bpf_map *map)
 	map->priv = NULL;
 	map->clear_priv = NULL;
 
+	if (map->inner_map) {
+		bpf_map__destroy(map->inner_map);
+		zfree(&map->inner_map);
+	}
+
+	zfree(&map->init_slots);
+	map->init_slots_sz = 0;
+
 	if (map->mmaped) {
 		munmap(map->mmaped, bpf_map_mmap_sz(map));
 		map->mmaped = NULL;
@@ -6543,9 +6779,8 @@ static struct bpf_map *find_struct_ops_map_by_offset(struct bpf_object *obj,
 }
 
 /* Collect the reloc from ELF and populate the st_ops->progs[] */
-static int bpf_object__collect_struct_ops_map_reloc(struct bpf_object *obj,
-						    GElf_Shdr *shdr,
-						    Elf_Data *data)
+static int bpf_object__collect_st_ops_relos(struct bpf_object *obj,
+					    GElf_Shdr *shdr, Elf_Data *data)
 {
 	const struct btf_member *member;
 	struct bpf_struct_ops *st_ops;
diff --git a/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c
new file mode 100644
index 000000000000..f7ee8fa377ad
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/btf_map_in_map.c
@@ -0,0 +1,49 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+
+#include <test_progs.h>
+
+#include "test_btf_map_in_map.skel.h"
+
+void test_btf_map_in_map(void)
+{
+	int duration = 0, err, key = 0, val;
+	struct test_btf_map_in_map* skel;
+
+	skel = test_btf_map_in_map__open_and_load();
+	if (CHECK(!skel, "skel_open", "failed to open&load skeleton\n"))
+		return;
+
+	err = test_btf_map_in_map__attach(skel);
+	if (CHECK(err, "skel_attach", "skeleton attach failed: %d\n", err))
+		goto cleanup;
+
+	/* inner1 = input, inner2 = input + 1 */
+	val = bpf_map__fd(skel->maps.inner_map1);
+	bpf_map_update_elem(bpf_map__fd(skel->maps.outer_arr), &key, &val, 0);
+	val = bpf_map__fd(skel->maps.inner_map2);
+	bpf_map_update_elem(bpf_map__fd(skel->maps.outer_hash), &key, &val, 0);
+	skel->bss->input = 1;
+	usleep(1);
+
+	bpf_map_lookup_elem(bpf_map__fd(skel->maps.inner_map1), &key, &val);
+	CHECK(val != 1, "inner1", "got %d != exp %d\n", val, 1);
+	bpf_map_lookup_elem(bpf_map__fd(skel->maps.inner_map2), &key, &val);
+	CHECK(val != 2, "inner2", "got %d != exp %d\n", val, 2);
+
+	/* inner1 = input + 1, inner2 = input */
+	val = bpf_map__fd(skel->maps.inner_map2);
+	bpf_map_update_elem(bpf_map__fd(skel->maps.outer_arr), &key, &val, 0);
+	val = bpf_map__fd(skel->maps.inner_map1);
+	bpf_map_update_elem(bpf_map__fd(skel->maps.outer_hash), &key, &val, 0);
+	skel->bss->input = 3;
+	usleep(1);
+
+	bpf_map_lookup_elem(bpf_map__fd(skel->maps.inner_map1), &key, &val);
+	CHECK(val != 4, "inner1", "got %d != exp %d\n", val, 4);
+	bpf_map_lookup_elem(bpf_map__fd(skel->maps.inner_map2), &key, &val);
+	CHECK(val != 3, "inner2", "got %d != exp %d\n", val, 3);
+
+cleanup:
+	test_btf_map_in_map__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c b/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c
new file mode 100644
index 000000000000..e5093796be97
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_btf_map_in_map.c
@@ -0,0 +1,76 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2020 Facebook */
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+struct inner_map {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, int);
+} inner_map1 SEC(".maps"),
+  inner_map2 SEC(".maps");
+
+struct outer_arr {
+	__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+	__uint(max_entries, 3);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(int));
+	/* it's possible to use anonymous struct as inner map definition here */
+	__array(values, struct {
+		__uint(type, BPF_MAP_TYPE_ARRAY);
+		/* changing max_entries to 2 will fail during load
+		 * due to incompatibility with inner_map definition */
+		__uint(max_entries, 1);
+		__type(key, int);
+		__type(value, int);
+	});
+} outer_arr SEC(".maps") = {
+	/* (void *) cast is necessary because we didn't use `struct inner_map`
+	 * in __inner(values, ...)
+	 * Actually, a conscious effort is required to screw up initialization
+	 * of inner map slots, which is a great thing!
+	 */
+	.values = { (void *)&inner_map1, 0, (void *)&inner_map2 },
+};
+
+struct outer_hash {
+	__uint(type, BPF_MAP_TYPE_HASH_OF_MAPS);
+	__uint(max_entries, 5);
+	__uint(key_size, sizeof(int));
+	/* Here everything works flawlessly due to reuse of struct inner_map
+	 * and compiler will complain at the attempt to use non-inner_map
+	 * references below. This is great experience.
+	 */
+	__array(values, struct inner_map);
+} outer_hash SEC(".maps") = {
+	.values = {
+		[0] = &inner_map2,
+		[4] = &inner_map1,
+	},
+};
+
+int input = 0;
+
+SEC("raw_tp/sys_enter")
+int handle__sys_enter(void *ctx)
+{
+	struct inner_map *inner_map;
+	int key = 0, val;
+
+	inner_map = bpf_map_lookup_elem(&outer_arr, &key);
+	if (!inner_map)
+		return 1;
+	val = input;
+	bpf_map_update_elem(inner_map, &key, &val, 0);
+
+	inner_map = bpf_map_lookup_elem(&outer_hash, &key);
+	if (!inner_map)
+		return 1;
+	val = input + 1;
+	bpf_map_update_elem(inner_map, &key, &val, 0);
+
+	return 0;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3-59-g8ed1b


From 76148faa161e7cfb2d7719f35b37d7db4f3f8596 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 28 Apr 2020 18:21:01 -0700
Subject: selftests/bpf: Ensure test flavors use correct skeletons

Ensure that test runner flavors include their own skeletons from <flavor>/
directory. Previously, skeletons generated for no-flavor test_progs were used.
Apart from fixing correctness, this also makes it possible to compile only
flavors individually:

  $ make clean && make test_progs-no_alu32
  ... now succeeds ...

Fixes: 74b5a5968fe8 ("selftests/bpf: Replace test_progs and test_maps w/ general rule")
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200429012111.277390-2-andriin@fb.com
---
 tools/testing/selftests/bpf/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 4e654d41c7af..01c95f8278c7 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -324,7 +324,7 @@ $(TRUNNER_TEST_OBJS): $(TRUNNER_OUTPUT)/%.test.o:			\
 		      $(TRUNNER_BPF_SKELS)				\
 		      $$(BPFOBJ) | $(TRUNNER_OUTPUT)
 	$$(call msg,TEST-OBJ,$(TRUNNER_BINARY),$$@)
-	cd $$(@D) && $$(CC) $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F)
+	cd $$(@D) && $$(CC) -I. $$(CFLAGS) -c $(CURDIR)/$$< $$(LDLIBS) -o $$(@F)
 
 $(TRUNNER_EXTRA_OBJS): $(TRUNNER_OUTPUT)/%.o:				\
 		       %.c						\
-- 
cgit v1.2.3-59-g8ed1b


From 02995dd4bb02a5359a08e44abb3c18c2f456bd19 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 28 Apr 2020 18:21:02 -0700
Subject: selftests/bpf: Add SAN_CFLAGS param to selftests build to allow
 sanitizers

Add ability to specify extra compiler flags with SAN_CFLAGS for compilation of
all user-space C files.  This allows to build all of selftest programs with,
e.g., custom sanitizer flags, without requiring support for such sanitizers
from anyone compiling selftest/bpf.

As an example, to compile everything with AddressSanitizer, one would do:

  $ make clean && make SAN_CFLAGS="-fsanitize=address"

For AddressSanitizer to work, one needs appropriate libasan shared library
installed in the system, with version of libasan matching what GCC links
against. E.g., GCC8 needs libasan5, while GCC7 uses libasan4.

For CentOS 7, to build everything successfully one would need to:
  $ sudo yum install devtoolset-8-gcc devtoolset-libasan-devel
  $ scl enable devtoolset-8 bash # set up environment

For Arch Linux to run selftests, one would need to install gcc-libs package to
get libasan.so.5:
  $ sudo pacman -S gcc-libs

N.B. EXTRA_CFLAGS name wasn't used, because it's also used by libbpf's
Makefile and this causes few issues:
1. default "-g -Wall" flags are overriden;
2. compiling shared library with AddressSanitizer generates a bunch of symbols
   like: "_GLOBAL__sub_D_00099_0_btf_dump.c", "_GLOBAL__sub_D_00099_0_bpf.c",
   etc, which screws up versioned symbols check.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Cc: Julia Kartseva <hex@fb.com>
Link: https://lore.kernel.org/bpf/20200429012111.277390-3-andriin@fb.com
---
 tools/testing/selftests/bpf/Makefile | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 01c95f8278c7..887f06a514ee 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -20,9 +20,10 @@ CLANG		?= clang
 LLC		?= llc
 LLVM_OBJCOPY	?= llvm-objcopy
 BPF_GCC		?= $(shell command -v bpf-gcc;)
-CFLAGS += -g -rdynamic -Wall -O2 $(GENFLAGS) -I$(CURDIR)		\
-	  -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR) -I$(TOOLSINCDIR)	\
-	  -I$(APIDIR)							\
+SAN_CFLAGS	?=
+CFLAGS += -g -rdynamic -Wall -O2 $(GENFLAGS) $(SAN_CFLAGS)		\
+	  -I$(CURDIR) -I$(INCLUDE_DIR) -I$(GENDIR) -I$(LIBDIR)		\
+	  -I$(TOOLSINCDIR) -I$(APIDIR)					\
 	  -Dbpf_prog_load=bpf_prog_test_load				\
 	  -Dbpf_load_program=bpf_test_load_program
 LDLIBS += -lcap -lelf -lz -lrt -lpthread
-- 
cgit v1.2.3-59-g8ed1b


From 42fce2cfb405e613f0355c4f92429d651bf0a5b3 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 28 Apr 2020 18:21:03 -0700
Subject: selftests/bpf: Convert test_hashmap into test_progs test

Fold stand-alone test_hashmap test into test_progs.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200429012111.277390-4-andriin@fb.com
---
 tools/testing/selftests/bpf/.gitignore           |   2 -
 tools/testing/selftests/bpf/Makefile             |   2 +-
 tools/testing/selftests/bpf/prog_tests/hashmap.c | 380 ++++++++++++++++++++++
 tools/testing/selftests/bpf/test_hashmap.c       | 382 -----------------------
 4 files changed, 381 insertions(+), 385 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/hashmap.c
 delete mode 100644 tools/testing/selftests/bpf/test_hashmap.c

diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index c30079c86998..16b9774d8b68 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -30,8 +30,6 @@ test_tcpnotify_user
 test_libbpf
 test_tcp_check_syncookie_user
 test_sysctl
-test_hashmap
-test_btf_dump
 test_current_pid_tgid_new_ns
 xdping
 test_cpp
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 887f06a514ee..10f12a5aac20 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -33,7 +33,7 @@ TEST_GEN_PROGS = test_verifier test_tag test_maps test_lru_map test_lpm_map test
 	test_align test_verifier_log test_dev_cgroup test_tcpbpf_user \
 	test_sock test_btf test_sockmap get_cgroup_id_user test_socket_cookie \
 	test_cgroup_storage \
-	test_netcnt test_tcpnotify_user test_sock_fields test_sysctl test_hashmap \
+	test_netcnt test_tcpnotify_user test_sock_fields test_sysctl \
 	test_progs-no_alu32 \
 	test_current_pid_tgid_new_ns
 
diff --git a/tools/testing/selftests/bpf/prog_tests/hashmap.c b/tools/testing/selftests/bpf/prog_tests/hashmap.c
new file mode 100644
index 000000000000..428d488830c6
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/hashmap.c
@@ -0,0 +1,380 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+
+/*
+ * Tests for libbpf's hashmap.
+ *
+ * Copyright (c) 2019 Facebook
+ */
+#include "test_progs.h"
+#include "bpf/hashmap.h"
+
+static int duration = 0;
+
+static size_t hash_fn(const void *k, void *ctx)
+{
+	return (long)k;
+}
+
+static bool equal_fn(const void *a, const void *b, void *ctx)
+{
+	return (long)a == (long)b;
+}
+
+static inline size_t next_pow_2(size_t n)
+{
+	size_t r = 1;
+
+	while (r < n)
+		r <<= 1;
+	return r;
+}
+
+static inline size_t exp_cap(size_t sz)
+{
+	size_t r = next_pow_2(sz);
+
+	if (sz * 4 / 3 > r)
+		r <<= 1;
+	return r;
+}
+
+#define ELEM_CNT 62
+
+static void test_hashmap_generic(void)
+{
+	struct hashmap_entry *entry, *tmp;
+	int err, bkt, found_cnt, i;
+	long long found_msk;
+	struct hashmap *map;
+
+	map = hashmap__new(hash_fn, equal_fn, NULL);
+	if (CHECK(IS_ERR(map), "hashmap__new",
+		  "failed to create map: %ld\n", PTR_ERR(map)))
+		return;
+
+	for (i = 0; i < ELEM_CNT; i++) {
+		const void *oldk, *k = (const void *)(long)i;
+		void *oldv, *v = (void *)(long)(1024 + i);
+
+		err = hashmap__update(map, k, v, &oldk, &oldv);
+		if (CHECK(err != -ENOENT, "hashmap__update",
+			  "unexpected result: %d\n", err))
+			goto cleanup;
+
+		if (i % 2) {
+			err = hashmap__add(map, k, v);
+		} else {
+			err = hashmap__set(map, k, v, &oldk, &oldv);
+			if (CHECK(oldk != NULL || oldv != NULL, "check_kv",
+				  "unexpected k/v: %p=%p\n", oldk, oldv))
+				goto cleanup;
+		}
+
+		if (CHECK(err, "elem_add", "failed to add k/v %ld = %ld: %d\n",
+			       (long)k, (long)v, err))
+			goto cleanup;
+
+		if (CHECK(!hashmap__find(map, k, &oldv), "elem_find",
+			  "failed to find key %ld\n", (long)k))
+			goto cleanup;
+		if (CHECK(oldv != v, "elem_val",
+			  "found value is wrong: %ld\n", (long)oldv))
+			goto cleanup;
+	}
+
+	if (CHECK(hashmap__size(map) != ELEM_CNT, "hashmap__size",
+		  "invalid map size: %zu\n", hashmap__size(map)))
+		goto cleanup;
+	if (CHECK(hashmap__capacity(map) != exp_cap(hashmap__size(map)),
+		  "hashmap_cap",
+		  "unexpected map capacity: %zu\n", hashmap__capacity(map)))
+		goto cleanup;
+
+	found_msk = 0;
+	hashmap__for_each_entry(map, entry, bkt) {
+		long k = (long)entry->key;
+		long v = (long)entry->value;
+
+		found_msk |= 1ULL << k;
+		if (CHECK(v - k != 1024, "check_kv",
+			  "invalid k/v pair: %ld = %ld\n", k, v))
+			goto cleanup;
+	}
+	if (CHECK(found_msk != (1ULL << ELEM_CNT) - 1, "elem_cnt",
+		  "not all keys iterated: %llx\n", found_msk))
+		goto cleanup;
+
+	for (i = 0; i < ELEM_CNT; i++) {
+		const void *oldk, *k = (const void *)(long)i;
+		void *oldv, *v = (void *)(long)(256 + i);
+
+		err = hashmap__add(map, k, v);
+		if (CHECK(err != -EEXIST, "hashmap__add",
+			  "unexpected add result: %d\n", err))
+			goto cleanup;
+
+		if (i % 2)
+			err = hashmap__update(map, k, v, &oldk, &oldv);
+		else
+			err = hashmap__set(map, k, v, &oldk, &oldv);
+
+		if (CHECK(err, "elem_upd",
+			  "failed to update k/v %ld = %ld: %d\n",
+			  (long)k, (long)v, err))
+			goto cleanup;
+		if (CHECK(!hashmap__find(map, k, &oldv), "elem_find",
+			  "failed to find key %ld\n", (long)k))
+			goto cleanup;
+		if (CHECK(oldv != v, "elem_val",
+			  "found value is wrong: %ld\n", (long)oldv))
+			goto cleanup;
+	}
+
+	if (CHECK(hashmap__size(map) != ELEM_CNT, "hashmap__size",
+		  "invalid updated map size: %zu\n", hashmap__size(map)))
+		goto cleanup;
+	if (CHECK(hashmap__capacity(map) != exp_cap(hashmap__size(map)),
+		  "hashmap__capacity",
+		  "unexpected map capacity: %zu\n", hashmap__capacity(map)))
+		goto cleanup;
+
+	found_msk = 0;
+	hashmap__for_each_entry_safe(map, entry, tmp, bkt) {
+		long k = (long)entry->key;
+		long v = (long)entry->value;
+
+		found_msk |= 1ULL << k;
+		if (CHECK(v - k != 256, "elem_check",
+			  "invalid updated k/v pair: %ld = %ld\n", k, v))
+			goto cleanup;
+	}
+	if (CHECK(found_msk != (1ULL << ELEM_CNT) - 1, "elem_cnt",
+		  "not all keys iterated after update: %llx\n", found_msk))
+		goto cleanup;
+
+	found_cnt = 0;
+	hashmap__for_each_key_entry(map, entry, (void *)0) {
+		found_cnt++;
+	}
+	if (CHECK(!found_cnt, "found_cnt",
+		  "didn't find any entries for key 0\n"))
+		goto cleanup;
+
+	found_msk = 0;
+	found_cnt = 0;
+	hashmap__for_each_key_entry_safe(map, entry, tmp, (void *)0) {
+		const void *oldk, *k;
+		void *oldv, *v;
+
+		k = entry->key;
+		v = entry->value;
+
+		found_cnt++;
+		found_msk |= 1ULL << (long)k;
+
+		if (CHECK(!hashmap__delete(map, k, &oldk, &oldv), "elem_del",
+			  "failed to delete k/v %ld = %ld\n",
+			  (long)k, (long)v))
+			goto cleanup;
+		if (CHECK(oldk != k || oldv != v, "check_old",
+			  "invalid deleted k/v: expected %ld = %ld, got %ld = %ld\n",
+			  (long)k, (long)v, (long)oldk, (long)oldv))
+			goto cleanup;
+		if (CHECK(hashmap__delete(map, k, &oldk, &oldv), "elem_del",
+			  "unexpectedly deleted k/v %ld = %ld\n",
+			  (long)oldk, (long)oldv))
+			goto cleanup;
+	}
+
+	if (CHECK(!found_cnt || !found_msk, "found_entries",
+		  "didn't delete any key entries\n"))
+		goto cleanup;
+	if (CHECK(hashmap__size(map) != ELEM_CNT - found_cnt, "elem_cnt",
+		  "invalid updated map size (already deleted: %d): %zu\n",
+		  found_cnt, hashmap__size(map)))
+		goto cleanup;
+	if (CHECK(hashmap__capacity(map) != exp_cap(hashmap__size(map)),
+		  "hashmap__capacity",
+		  "unexpected map capacity: %zu\n", hashmap__capacity(map)))
+		goto cleanup;
+
+	hashmap__for_each_entry_safe(map, entry, tmp, bkt) {
+		const void *oldk, *k;
+		void *oldv, *v;
+
+		k = entry->key;
+		v = entry->value;
+
+		found_cnt++;
+		found_msk |= 1ULL << (long)k;
+
+		if (CHECK(!hashmap__delete(map, k, &oldk, &oldv), "elem_del",
+			  "failed to delete k/v %ld = %ld\n",
+			  (long)k, (long)v))
+			goto cleanup;
+		if (CHECK(oldk != k || oldv != v, "elem_check",
+			  "invalid old k/v: expect %ld = %ld, got %ld = %ld\n",
+			  (long)k, (long)v, (long)oldk, (long)oldv))
+			goto cleanup;
+		if (CHECK(hashmap__delete(map, k, &oldk, &oldv), "elem_del",
+			  "unexpectedly deleted k/v %ld = %ld\n",
+			  (long)k, (long)v))
+			goto cleanup;
+	}
+
+	if (CHECK(found_cnt != ELEM_CNT || found_msk != (1ULL << ELEM_CNT) - 1,
+		  "found_cnt",
+		  "not all keys were deleted: found_cnt:%d, found_msk:%llx\n",
+		  found_cnt, found_msk))
+		goto cleanup;
+	if (CHECK(hashmap__size(map) != 0, "hashmap__size",
+		  "invalid updated map size (already deleted: %d): %zu\n",
+		  found_cnt, hashmap__size(map)))
+		goto cleanup;
+
+	found_cnt = 0;
+	hashmap__for_each_entry(map, entry, bkt) {
+		CHECK(false, "elem_exists",
+		      "unexpected map entries left: %ld = %ld\n",
+		      (long)entry->key, (long)entry->value);
+		goto cleanup;
+	}
+
+	hashmap__clear(map);
+	hashmap__for_each_entry(map, entry, bkt) {
+		CHECK(false, "elem_exists",
+		      "unexpected map entries left: %ld = %ld\n",
+		      (long)entry->key, (long)entry->value);
+		goto cleanup;
+	}
+
+cleanup:
+	hashmap__free(map);
+}
+
+static size_t collision_hash_fn(const void *k, void *ctx)
+{
+	return 0;
+}
+
+static void test_hashmap_multimap(void)
+{
+	void *k1 = (void *)0, *k2 = (void *)1;
+	struct hashmap_entry *entry;
+	struct hashmap *map;
+	long found_msk;
+	int err, bkt;
+
+	/* force collisions */
+	map = hashmap__new(collision_hash_fn, equal_fn, NULL);
+	if (CHECK(IS_ERR(map), "hashmap__new",
+		  "failed to create map: %ld\n", PTR_ERR(map)))
+		return;
+
+	/* set up multimap:
+	 * [0] -> 1, 2, 4;
+	 * [1] -> 8, 16, 32;
+	 */
+	err = hashmap__append(map, k1, (void *)1);
+	if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err))
+		goto cleanup;
+	err = hashmap__append(map, k1, (void *)2);
+	if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err))
+		goto cleanup;
+	err = hashmap__append(map, k1, (void *)4);
+	if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err))
+		goto cleanup;
+
+	err = hashmap__append(map, k2, (void *)8);
+	if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err))
+		goto cleanup;
+	err = hashmap__append(map, k2, (void *)16);
+	if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err))
+		goto cleanup;
+	err = hashmap__append(map, k2, (void *)32);
+	if (CHECK(err, "elem_add", "failed to add k/v: %d\n", err))
+		goto cleanup;
+
+	if (CHECK(hashmap__size(map) != 6, "hashmap_size",
+		  "invalid map size: %zu\n", hashmap__size(map)))
+		goto cleanup;
+
+	/* verify global iteration still works and sees all values */
+	found_msk = 0;
+	hashmap__for_each_entry(map, entry, bkt) {
+		found_msk |= (long)entry->value;
+	}
+	if (CHECK(found_msk != (1 << 6) - 1, "found_msk",
+		  "not all keys iterated: %lx\n", found_msk))
+		goto cleanup;
+
+	/* iterate values for key 1 */
+	found_msk = 0;
+	hashmap__for_each_key_entry(map, entry, k1) {
+		found_msk |= (long)entry->value;
+	}
+	if (CHECK(found_msk != (1 | 2 | 4), "found_msk",
+		  "invalid k1 values: %lx\n", found_msk))
+		goto cleanup;
+
+	/* iterate values for key 2 */
+	found_msk = 0;
+	hashmap__for_each_key_entry(map, entry, k2) {
+		found_msk |= (long)entry->value;
+	}
+	if (CHECK(found_msk != (8 | 16 | 32), "found_msk",
+		  "invalid k2 values: %lx\n", found_msk))
+		goto cleanup;
+
+cleanup:
+	hashmap__free(map);
+}
+
+static void test_hashmap_empty()
+{
+	struct hashmap_entry *entry;
+	int bkt;
+	struct hashmap *map;
+	void *k = (void *)0;
+
+	/* force collisions */
+	map = hashmap__new(hash_fn, equal_fn, NULL);
+	if (CHECK(IS_ERR(map), "hashmap__new",
+		  "failed to create map: %ld\n", PTR_ERR(map)))
+		goto cleanup;
+
+	if (CHECK(hashmap__size(map) != 0, "hashmap__size",
+		  "invalid map size: %zu\n", hashmap__size(map)))
+		goto cleanup;
+	if (CHECK(hashmap__capacity(map) != 0, "hashmap__capacity",
+		  "invalid map capacity: %zu\n", hashmap__capacity(map)))
+		goto cleanup;
+	if (CHECK(hashmap__find(map, k, NULL), "elem_find",
+		  "unexpected find\n"))
+		goto cleanup;
+	if (CHECK(hashmap__delete(map, k, NULL, NULL), "elem_del",
+		  "unexpected delete\n"))
+		goto cleanup;
+
+	hashmap__for_each_entry(map, entry, bkt) {
+		CHECK(false, "elem_found", "unexpected iterated entry\n");
+		goto cleanup;
+	}
+	hashmap__for_each_key_entry(map, entry, k) {
+		CHECK(false, "key_found", "unexpected key entry\n");
+		goto cleanup;
+	}
+
+cleanup:
+	hashmap__free(map);
+}
+
+void test_hashmap()
+{
+	if (test__start_subtest("generic"))
+		test_hashmap_generic();
+	if (test__start_subtest("multimap"))
+		test_hashmap_multimap();
+	if (test__start_subtest("empty"))
+		test_hashmap_empty();
+}
diff --git a/tools/testing/selftests/bpf/test_hashmap.c b/tools/testing/selftests/bpf/test_hashmap.c
deleted file mode 100644
index c490e012c23f..000000000000
--- a/tools/testing/selftests/bpf/test_hashmap.c
+++ /dev/null
@@ -1,382 +0,0 @@
-// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
-
-/*
- * Tests for libbpf's hashmap.
- *
- * Copyright (c) 2019 Facebook
- */
-#include <stdio.h>
-#include <errno.h>
-#include <linux/err.h>
-#include "bpf/hashmap.h"
-
-#define CHECK(condition, format...) ({					\
-	int __ret = !!(condition);					\
-	if (__ret) {							\
-		fprintf(stderr, "%s:%d:FAIL ", __func__, __LINE__);	\
-		fprintf(stderr, format);				\
-	}								\
-	__ret;								\
-})
-
-size_t hash_fn(const void *k, void *ctx)
-{
-	return (long)k;
-}
-
-bool equal_fn(const void *a, const void *b, void *ctx)
-{
-	return (long)a == (long)b;
-}
-
-static inline size_t next_pow_2(size_t n)
-{
-	size_t r = 1;
-
-	while (r < n)
-		r <<= 1;
-	return r;
-}
-
-static inline size_t exp_cap(size_t sz)
-{
-	size_t r = next_pow_2(sz);
-
-	if (sz * 4 / 3 > r)
-		r <<= 1;
-	return r;
-}
-
-#define ELEM_CNT 62
-
-int test_hashmap_generic(void)
-{
-	struct hashmap_entry *entry, *tmp;
-	int err, bkt, found_cnt, i;
-	long long found_msk;
-	struct hashmap *map;
-
-	fprintf(stderr, "%s: ", __func__);
-
-	map = hashmap__new(hash_fn, equal_fn, NULL);
-	if (CHECK(IS_ERR(map), "failed to create map: %ld\n", PTR_ERR(map)))
-		return 1;
-
-	for (i = 0; i < ELEM_CNT; i++) {
-		const void *oldk, *k = (const void *)(long)i;
-		void *oldv, *v = (void *)(long)(1024 + i);
-
-		err = hashmap__update(map, k, v, &oldk, &oldv);
-		if (CHECK(err != -ENOENT, "unexpected result: %d\n", err))
-			return 1;
-
-		if (i % 2) {
-			err = hashmap__add(map, k, v);
-		} else {
-			err = hashmap__set(map, k, v, &oldk, &oldv);
-			if (CHECK(oldk != NULL || oldv != NULL,
-				  "unexpected k/v: %p=%p\n", oldk, oldv))
-				return 1;
-		}
-
-		if (CHECK(err, "failed to add k/v %ld = %ld: %d\n",
-			       (long)k, (long)v, err))
-			return 1;
-
-		if (CHECK(!hashmap__find(map, k, &oldv),
-			  "failed to find key %ld\n", (long)k))
-			return 1;
-		if (CHECK(oldv != v, "found value is wrong: %ld\n", (long)oldv))
-			return 1;
-	}
-
-	if (CHECK(hashmap__size(map) != ELEM_CNT,
-		  "invalid map size: %zu\n", hashmap__size(map)))
-		return 1;
-	if (CHECK(hashmap__capacity(map) != exp_cap(hashmap__size(map)),
-		  "unexpected map capacity: %zu\n", hashmap__capacity(map)))
-		return 1;
-
-	found_msk = 0;
-	hashmap__for_each_entry(map, entry, bkt) {
-		long k = (long)entry->key;
-		long v = (long)entry->value;
-
-		found_msk |= 1ULL << k;
-		if (CHECK(v - k != 1024, "invalid k/v pair: %ld = %ld\n", k, v))
-			return 1;
-	}
-	if (CHECK(found_msk != (1ULL << ELEM_CNT) - 1,
-		  "not all keys iterated: %llx\n", found_msk))
-		return 1;
-
-	for (i = 0; i < ELEM_CNT; i++) {
-		const void *oldk, *k = (const void *)(long)i;
-		void *oldv, *v = (void *)(long)(256 + i);
-
-		err = hashmap__add(map, k, v);
-		if (CHECK(err != -EEXIST, "unexpected add result: %d\n", err))
-			return 1;
-
-		if (i % 2)
-			err = hashmap__update(map, k, v, &oldk, &oldv);
-		else
-			err = hashmap__set(map, k, v, &oldk, &oldv);
-
-		if (CHECK(err, "failed to update k/v %ld = %ld: %d\n",
-			       (long)k, (long)v, err))
-			return 1;
-		if (CHECK(!hashmap__find(map, k, &oldv),
-			  "failed to find key %ld\n", (long)k))
-			return 1;
-		if (CHECK(oldv != v, "found value is wrong: %ld\n", (long)oldv))
-			return 1;
-	}
-
-	if (CHECK(hashmap__size(map) != ELEM_CNT,
-		  "invalid updated map size: %zu\n", hashmap__size(map)))
-		return 1;
-	if (CHECK(hashmap__capacity(map) != exp_cap(hashmap__size(map)),
-		  "unexpected map capacity: %zu\n", hashmap__capacity(map)))
-		return 1;
-
-	found_msk = 0;
-	hashmap__for_each_entry_safe(map, entry, tmp, bkt) {
-		long k = (long)entry->key;
-		long v = (long)entry->value;
-
-		found_msk |= 1ULL << k;
-		if (CHECK(v - k != 256,
-			  "invalid updated k/v pair: %ld = %ld\n", k, v))
-			return 1;
-	}
-	if (CHECK(found_msk != (1ULL << ELEM_CNT) - 1,
-		  "not all keys iterated after update: %llx\n", found_msk))
-		return 1;
-
-	found_cnt = 0;
-	hashmap__for_each_key_entry(map, entry, (void *)0) {
-		found_cnt++;
-	}
-	if (CHECK(!found_cnt, "didn't find any entries for key 0\n"))
-		return 1;
-
-	found_msk = 0;
-	found_cnt = 0;
-	hashmap__for_each_key_entry_safe(map, entry, tmp, (void *)0) {
-		const void *oldk, *k;
-		void *oldv, *v;
-
-		k = entry->key;
-		v = entry->value;
-
-		found_cnt++;
-		found_msk |= 1ULL << (long)k;
-
-		if (CHECK(!hashmap__delete(map, k, &oldk, &oldv),
-			  "failed to delete k/v %ld = %ld\n",
-			  (long)k, (long)v))
-			return 1;
-		if (CHECK(oldk != k || oldv != v,
-			  "invalid deleted k/v: expected %ld = %ld, got %ld = %ld\n",
-			  (long)k, (long)v, (long)oldk, (long)oldv))
-			return 1;
-		if (CHECK(hashmap__delete(map, k, &oldk, &oldv),
-			  "unexpectedly deleted k/v %ld = %ld\n",
-			  (long)oldk, (long)oldv))
-			return 1;
-	}
-
-	if (CHECK(!found_cnt || !found_msk,
-		  "didn't delete any key entries\n"))
-		return 1;
-	if (CHECK(hashmap__size(map) != ELEM_CNT - found_cnt,
-		  "invalid updated map size (already deleted: %d): %zu\n",
-		  found_cnt, hashmap__size(map)))
-		return 1;
-	if (CHECK(hashmap__capacity(map) != exp_cap(hashmap__size(map)),
-		  "unexpected map capacity: %zu\n", hashmap__capacity(map)))
-		return 1;
-
-	hashmap__for_each_entry_safe(map, entry, tmp, bkt) {
-		const void *oldk, *k;
-		void *oldv, *v;
-
-		k = entry->key;
-		v = entry->value;
-
-		found_cnt++;
-		found_msk |= 1ULL << (long)k;
-
-		if (CHECK(!hashmap__delete(map, k, &oldk, &oldv),
-			  "failed to delete k/v %ld = %ld\n",
-			  (long)k, (long)v))
-			return 1;
-		if (CHECK(oldk != k || oldv != v,
-			  "invalid old k/v: expect %ld = %ld, got %ld = %ld\n",
-			  (long)k, (long)v, (long)oldk, (long)oldv))
-			return 1;
-		if (CHECK(hashmap__delete(map, k, &oldk, &oldv),
-			  "unexpectedly deleted k/v %ld = %ld\n",
-			  (long)k, (long)v))
-			return 1;
-	}
-
-	if (CHECK(found_cnt != ELEM_CNT || found_msk != (1ULL << ELEM_CNT) - 1,
-		  "not all keys were deleted: found_cnt:%d, found_msk:%llx\n",
-		  found_cnt, found_msk))
-		return 1;
-	if (CHECK(hashmap__size(map) != 0,
-		  "invalid updated map size (already deleted: %d): %zu\n",
-		  found_cnt, hashmap__size(map)))
-		return 1;
-
-	found_cnt = 0;
-	hashmap__for_each_entry(map, entry, bkt) {
-		CHECK(false, "unexpected map entries left: %ld = %ld\n",
-			     (long)entry->key, (long)entry->value);
-		return 1;
-	}
-
-	hashmap__free(map);
-	hashmap__for_each_entry(map, entry, bkt) {
-		CHECK(false, "unexpected map entries left: %ld = %ld\n",
-			     (long)entry->key, (long)entry->value);
-		return 1;
-	}
-
-	fprintf(stderr, "OK\n");
-	return 0;
-}
-
-size_t collision_hash_fn(const void *k, void *ctx)
-{
-	return 0;
-}
-
-int test_hashmap_multimap(void)
-{
-	void *k1 = (void *)0, *k2 = (void *)1;
-	struct hashmap_entry *entry;
-	struct hashmap *map;
-	long found_msk;
-	int err, bkt;
-
-	fprintf(stderr, "%s: ", __func__);
-
-	/* force collisions */
-	map = hashmap__new(collision_hash_fn, equal_fn, NULL);
-	if (CHECK(IS_ERR(map), "failed to create map: %ld\n", PTR_ERR(map)))
-		return 1;
-
-
-	/* set up multimap:
-	 * [0] -> 1, 2, 4;
-	 * [1] -> 8, 16, 32;
-	 */
-	err = hashmap__append(map, k1, (void *)1);
-	if (CHECK(err, "failed to add k/v: %d\n", err))
-		return 1;
-	err = hashmap__append(map, k1, (void *)2);
-	if (CHECK(err, "failed to add k/v: %d\n", err))
-		return 1;
-	err = hashmap__append(map, k1, (void *)4);
-	if (CHECK(err, "failed to add k/v: %d\n", err))
-		return 1;
-
-	err = hashmap__append(map, k2, (void *)8);
-	if (CHECK(err, "failed to add k/v: %d\n", err))
-		return 1;
-	err = hashmap__append(map, k2, (void *)16);
-	if (CHECK(err, "failed to add k/v: %d\n", err))
-		return 1;
-	err = hashmap__append(map, k2, (void *)32);
-	if (CHECK(err, "failed to add k/v: %d\n", err))
-		return 1;
-
-	if (CHECK(hashmap__size(map) != 6,
-		  "invalid map size: %zu\n", hashmap__size(map)))
-		return 1;
-
-	/* verify global iteration still works and sees all values */
-	found_msk = 0;
-	hashmap__for_each_entry(map, entry, bkt) {
-		found_msk |= (long)entry->value;
-	}
-	if (CHECK(found_msk != (1 << 6) - 1,
-		  "not all keys iterated: %lx\n", found_msk))
-		return 1;
-
-	/* iterate values for key 1 */
-	found_msk = 0;
-	hashmap__for_each_key_entry(map, entry, k1) {
-		found_msk |= (long)entry->value;
-	}
-	if (CHECK(found_msk != (1 | 2 | 4),
-		  "invalid k1 values: %lx\n", found_msk))
-		return 1;
-
-	/* iterate values for key 2 */
-	found_msk = 0;
-	hashmap__for_each_key_entry(map, entry, k2) {
-		found_msk |= (long)entry->value;
-	}
-	if (CHECK(found_msk != (8 | 16 | 32),
-		  "invalid k2 values: %lx\n", found_msk))
-		return 1;
-
-	fprintf(stderr, "OK\n");
-	return 0;
-}
-
-int test_hashmap_empty()
-{
-	struct hashmap_entry *entry;
-	int bkt;
-	struct hashmap *map;
-	void *k = (void *)0;
-
-	fprintf(stderr, "%s: ", __func__);
-
-	/* force collisions */
-	map = hashmap__new(hash_fn, equal_fn, NULL);
-	if (CHECK(IS_ERR(map), "failed to create map: %ld\n", PTR_ERR(map)))
-		return 1;
-
-	if (CHECK(hashmap__size(map) != 0,
-		  "invalid map size: %zu\n", hashmap__size(map)))
-		return 1;
-	if (CHECK(hashmap__capacity(map) != 0,
-		  "invalid map capacity: %zu\n", hashmap__capacity(map)))
-		return 1;
-	if (CHECK(hashmap__find(map, k, NULL), "unexpected find\n"))
-		return 1;
-	if (CHECK(hashmap__delete(map, k, NULL, NULL), "unexpected delete\n"))
-		return 1;
-
-	hashmap__for_each_entry(map, entry, bkt) {
-		CHECK(false, "unexpected iterated entry\n");
-		return 1;
-	}
-	hashmap__for_each_key_entry(map, entry, k) {
-		CHECK(false, "unexpected key entry\n");
-		return 1;
-	}
-
-	fprintf(stderr, "OK\n");
-	return 0;
-}
-
-int main(int argc, char **argv)
-{
-	bool failed = false;
-
-	if (test_hashmap_generic())
-		failed = true;
-	if (test_hashmap_multimap())
-		failed = true;
-	if (test_hashmap_empty())
-		failed = true;
-
-	return failed;
-}
-- 
cgit v1.2.3-59-g8ed1b


From 229bf8bf4d910510bc1a2fd0b89bd467cd71050d Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 28 Apr 2020 18:21:04 -0700
Subject: libbpf: Fix memory leak and possible double-free in hashmap__clear

Fix memory leak in hashmap_clear() not freeing hashmap_entry structs for each
of the remaining entries. Also NULL-out bucket list to prevent possible
double-free between hashmap__clear() and hashmap__free().

Running test_progs-asan flavor clearly showed this problem.

Reported-by: Alston Tang <alston64@fb.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200429012111.277390-5-andriin@fb.com
---
 tools/lib/bpf/hashmap.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/tools/lib/bpf/hashmap.c b/tools/lib/bpf/hashmap.c
index 54c30c802070..cffb96202e0d 100644
--- a/tools/lib/bpf/hashmap.c
+++ b/tools/lib/bpf/hashmap.c
@@ -59,7 +59,14 @@ struct hashmap *hashmap__new(hashmap_hash_fn hash_fn,
 
 void hashmap__clear(struct hashmap *map)
 {
+	struct hashmap_entry *cur, *tmp;
+	int bkt;
+
+	hashmap__for_each_entry_safe(map, cur, tmp, bkt) {
+		free(cur);
+	}
 	free(map->buckets);
+	map->buckets = NULL;
 	map->cap = map->cap_bits = map->sz = 0;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From f25d5416d64c796aa639136eb0b076c8bd579b54 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 28 Apr 2020 18:21:05 -0700
Subject: selftests/bpf: Fix memory leak in test selector

Free test selector substrings, which were strdup()'ed.

Fixes: b65053cd94f4 ("selftests/bpf: Add whitelist/blacklist of test names to test_progs")
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200429012111.277390-6-andriin@fb.com
---
 tools/testing/selftests/bpf/test_progs.c | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index b521e0a512b6..86d0020c9eec 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -420,6 +420,18 @@ static int libbpf_print_fn(enum libbpf_print_level level,
 	return 0;
 }
 
+static void free_str_set(const struct str_set *set)
+{
+	int i;
+
+	if (!set)
+		return;
+
+	for (i = 0; i < set->cnt; i++)
+		free((void *)set->strs[i]);
+	free(set->strs);
+}
+
 static int parse_str_list(const char *s, struct str_set *set)
 {
 	char *input, *state = NULL, *next, **tmp, **strs = NULL;
@@ -756,11 +768,11 @@ int main(int argc, char **argv)
 	fprintf(stdout, "Summary: %d/%d PASSED, %d SKIPPED, %d FAILED\n",
 		env.succ_cnt, env.sub_succ_cnt, env.skip_cnt, env.fail_cnt);
 
-	free(env.test_selector.blacklist.strs);
-	free(env.test_selector.whitelist.strs);
+	free_str_set(&env.test_selector.blacklist);
+	free_str_set(&env.test_selector.whitelist);
 	free(env.test_selector.num_set);
-	free(env.subtest_selector.blacklist.strs);
-	free(env.subtest_selector.whitelist.strs);
+	free_str_set(&env.subtest_selector.blacklist);
+	free_str_set(&env.subtest_selector.whitelist);
 	free(env.subtest_selector.num_set);
 
 	return env.fail_cnt ? EXIT_FAILURE : EXIT_SUCCESS;
-- 
cgit v1.2.3-59-g8ed1b


From 9f56bb531a809ecaa7f0ddca61d2cf3adc1cb81a Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 28 Apr 2020 18:21:06 -0700
Subject: selftests/bpf: Fix memory leak in extract_build_id()

getline() allocates string, which has to be freed.

Fixes: 81f77fd0deeb ("bpf: add selftest for stackmap with BPF_F_STACK_BUILD_ID")
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Cc: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20200429012111.277390-7-andriin@fb.com
---
 tools/testing/selftests/bpf/test_progs.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 86d0020c9eec..93970ec1c9e9 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -351,6 +351,7 @@ int extract_build_id(char *build_id, size_t size)
 		len = size;
 	memcpy(build_id, line, len);
 	build_id[len] = '\0';
+	free(line);
 	return 0;
 err:
 	fclose(fp);
-- 
cgit v1.2.3-59-g8ed1b


From 13c908495e5d51718a6da84ae925fa2aac056380 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 28 Apr 2020 18:21:07 -0700
Subject: selftests/bpf: Fix invalid memory reads in core_relo selftest

Another one found by AddressSanitizer. input_len is bigger than actually
initialized data size.

Fixes: c7566a69695c ("selftests/bpf: Add field existence CO-RE relocs tests")
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200429012111.277390-8-andriin@fb.com
---
 tools/testing/selftests/bpf/prog_tests/core_reloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/core_reloc.c b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
index 31e177adbdf1..084ed26a7d78 100644
--- a/tools/testing/selftests/bpf/prog_tests/core_reloc.c
+++ b/tools/testing/selftests/bpf/prog_tests/core_reloc.c
@@ -392,7 +392,7 @@ static struct core_reloc_test_case test_cases[] = {
 		.input = STRUCT_TO_CHAR_PTR(core_reloc_existence___minimal) {
 			.a = 42,
 		},
-		.input_len = sizeof(struct core_reloc_existence),
+		.input_len = sizeof(struct core_reloc_existence___minimal),
 		.output = STRUCT_TO_CHAR_PTR(core_reloc_existence_output) {
 			.a_exists = 1,
 			.b_exists = 0,
-- 
cgit v1.2.3-59-g8ed1b


From 3521ffa2ee9a48c3236c93f54ae11c074490ebce Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 28 Apr 2020 18:21:08 -0700
Subject: libbpf: Fix huge memory leak in libbpf_find_vmlinux_btf_id()

BTF object wasn't freed.

Fixes: a6ed02cac690 ("libbpf: Load btf_vmlinux only once per object.")
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Cc: KP Singh <kpsingh@google.com>
Link: https://lore.kernel.org/bpf/20200429012111.277390-9-andriin@fb.com
---
 tools/lib/bpf/libbpf.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 445ee903f9cd..d86ff8214b96 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -6934,6 +6934,7 @@ int libbpf_find_vmlinux_btf_id(const char *name,
 			       enum bpf_attach_type attach_type)
 {
 	struct btf *btf;
+	int err;
 
 	btf = libbpf_find_kernel_btf();
 	if (IS_ERR(btf)) {
@@ -6941,7 +6942,9 @@ int libbpf_find_vmlinux_btf_id(const char *name,
 		return -EINVAL;
 	}
 
-	return __find_vmlinux_btf_id(btf, name, attach_type);
+	err = __find_vmlinux_btf_id(btf, name, attach_type);
+	btf__free(btf);
+	return err;
 }
 
 static int libbpf_find_prog_btf_id(const char *name, __u32 attach_prog_fd)
-- 
cgit v1.2.3-59-g8ed1b


From 36d0b6159f6a6f51f600bf1777702f7036fb9839 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 28 Apr 2020 18:21:09 -0700
Subject: selftests/bpf: Disable ASAN instrumentation for mmap()'ed memory read

AddressSanitizer assumes that all memory dereferences are done against memory
allocated by sanitizer's malloc()/free() code and not touched by anyone else.
Seems like this doesn't hold for perf buffer memory. Disable instrumentation
on perf buffer callback function.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200429012111.277390-10-andriin@fb.com
---
 tools/testing/selftests/bpf/prog_tests/perf_buffer.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
index 1450ea2dd4cc..a122ce3b360e 100644
--- a/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
+++ b/tools/testing/selftests/bpf/prog_tests/perf_buffer.c
@@ -6,6 +6,11 @@
 #include <test_progs.h>
 #include "bpf/libbpf_internal.h"
 
+/* AddressSanitizer sometimes crashes due to data dereference below, due to
+ * this being mmap()'ed memory. Disable instrumentation with
+ * no_sanitize_address attribute
+ */
+__attribute__((no_sanitize_address))
 static void on_sample(void *ctx, int cpu, void *data, __u32 size)
 {
 	int cpu_data = *(int *)data, duration = 0;
-- 
cgit v1.2.3-59-g8ed1b


From 8d30e80a049ad699264e4a12911e349f93c7279a Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 28 Apr 2020 18:21:10 -0700
Subject: selftests/bpf: Fix bpf_link leak in ns_current_pid_tgid selftest

If condition is inverted, but it's also just not necessary.

Fixes: 1c1052e0140a ("tools/testing/selftests/bpf: Add self-tests for new helper bpf_get_ns_current_pid_tgid.")
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Cc: Carlos Neira <cneirabustos@gmail.com>
Link: https://lore.kernel.org/bpf/20200429012111.277390-11-andriin@fb.com
---
 tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
index 542240e16564..e74dc501b27f 100644
--- a/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
+++ b/tools/testing/selftests/bpf/prog_tests/ns_current_pid_tgid.c
@@ -80,9 +80,6 @@ void test_ns_current_pid_tgid(void)
 		  "User pid/tgid %llu BPF pid/tgid %llu\n", id, bss.pid_tgid))
 		goto cleanup;
 cleanup:
-	if (!link) {
-		bpf_link__destroy(link);
-		link = NULL;
-	}
+	bpf_link__destroy(link);
 	bpf_object__close(obj);
 }
-- 
cgit v1.2.3-59-g8ed1b


From e4e8f4d047fdcf7ac7d944e266e85d8041f16cd6 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 28 Apr 2020 18:21:11 -0700
Subject: selftests/bpf: Add runqslower binary to .gitignore

With recent changes, runqslower is being copied into selftests/bpf root
directory. So add it into .gitignore.

Fixes: b26d1e2b6028 ("selftests/bpf: Copy runqslower to OUTPUT directory")
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Cc: Veronika Kabatova <vkabatov@redhat.com>
Link: https://lore.kernel.org/bpf/20200429012111.277390-12-andriin@fb.com
---
 tools/testing/selftests/bpf/.gitignore | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 16b9774d8b68..3ff031972975 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -37,4 +37,4 @@ test_cpp
 /no_alu32
 /bpf_gcc
 /tools
-
+/runqslower
-- 
cgit v1.2.3-59-g8ed1b


From 2e410da6a098a9ff25d22a56ecb724b3c36fd528 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 28 Apr 2020 10:14:02 +0200
Subject: staging: rtl8723bs: remove mgmt_frame_register method

This was changed in cfg80211, so having it broke things, but
there's no need to adjust since it's an empty implementation.
Just remove it.

Fixes: 6cd536fe62ef ("cfg80211: change internal management frame registration API")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20200428101400.ae19d651ec38.Ieb15844bb5ab93b3d7931d6561f42e3316ef8251@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c | 24 -----------------------
 1 file changed, 24 deletions(-)

diff --git a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
index 1ba85a43f05a..cd31ad2b8a7b 100644
--- a/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
+++ b/drivers/staging/rtl8723bs/os_dep/ioctl_cfg80211.c
@@ -3163,29 +3163,6 @@ exit:
 	return ret;
 }
 
-static void cfg80211_rtw_mgmt_frame_register(struct wiphy *wiphy,
-	struct wireless_dev *wdev,
-	u16 frame_type, bool reg)
-{
-	struct net_device *ndev = wdev_to_ndev(wdev);
-	struct adapter *adapter;
-
-	if (ndev == NULL)
-		goto exit;
-
-	adapter = (struct adapter *)rtw_netdev_priv(ndev);
-
-#ifdef DEBUG_CFG80211
-	DBG_871X(FUNC_ADPT_FMT" frame_type:%x, reg:%d\n", FUNC_ADPT_ARG(adapter),
-		frame_type, reg);
-#endif
-
-	if (frame_type != (IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_PROBE_REQ))
-		return;
-exit:
-	return;
-}
-
 #if defined(CONFIG_PNO_SUPPORT)
 static int cfg80211_rtw_sched_scan_start(struct wiphy *wiphy,
 		struct net_device *dev,
@@ -3397,7 +3374,6 @@ static struct cfg80211_ops rtw_cfg80211_ops = {
 	.change_bss = cfg80211_rtw_change_bss,
 
 	.mgmt_tx = cfg80211_rtw_mgmt_tx,
-	.mgmt_frame_register = cfg80211_rtw_mgmt_frame_register,
 
 #if defined(CONFIG_PNO_SUPPORT)
 	.sched_scan_start = cfg80211_rtw_sched_scan_start,
-- 
cgit v1.2.3-59-g8ed1b


From d530b9864073a714a6b6dcddee77c9b24074071f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 28 Apr 2020 10:14:03 +0200
Subject: staging: wilc1000: adjust for management frame register API changes

Adjust to the API changes in cfg80211 for management frame registration.

Fixes: 6cd536fe62ef ("cfg80211: change internal management frame registration API")
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Reviewed-by: Sergey Matyukevich <sergey.matyukevich.os@quantenna.com>
Acked-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Ajay Singh <ajay.kathat@microchip.com>
Link: https://lore.kernel.org/r/20200428101400.bac7e94c2bf8.I6a2287b9f68f35aff5f6de409c5ffa388de760e2@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/staging/wilc1000/cfg80211.c | 36 +++++++++++++++++-------------------
 drivers/staging/wilc1000/cfg80211.h |  5 +++--
 drivers/staging/wilc1000/netdev.c   | 21 +++++++++------------
 drivers/staging/wilc1000/netdev.h   |  9 +--------
 4 files changed, 30 insertions(+), 41 deletions(-)

diff --git a/drivers/staging/wilc1000/cfg80211.c b/drivers/staging/wilc1000/cfg80211.c
index 4bdcbc5fd2fd..b6065a0d660f 100644
--- a/drivers/staging/wilc1000/cfg80211.c
+++ b/drivers/staging/wilc1000/cfg80211.c
@@ -1217,33 +1217,31 @@ static int mgmt_tx_cancel_wait(struct wiphy *wiphy,
 	return 0;
 }
 
-void wilc_mgmt_frame_register(struct wiphy *wiphy, struct wireless_dev *wdev,
-			      u16 frame_type, bool reg)
+void wilc_update_mgmt_frame_registrations(struct wiphy *wiphy,
+					  struct wireless_dev *wdev,
+					  struct mgmt_frame_regs *upd)
 {
 	struct wilc *wl = wiphy_priv(wiphy);
 	struct wilc_vif *vif = netdev_priv(wdev->netdev);
+	u32 presp_bit = BIT(IEEE80211_STYPE_PROBE_REQ >> 4);
+	u32 action_bit = BIT(IEEE80211_STYPE_ACTION >> 4);
 
-	if (!frame_type)
-		return;
+	if (wl->initialized) {
+		bool prev = vif->mgmt_reg_stypes & presp_bit;
+		bool now = upd->interface_stypes & presp_bit;
 
-	switch (frame_type) {
-	case IEEE80211_STYPE_PROBE_REQ:
-		vif->frame_reg[0].type = frame_type;
-		vif->frame_reg[0].reg = reg;
-		break;
+		if (now != prev)
+			wilc_frame_register(vif, IEEE80211_STYPE_PROBE_REQ, now);
 
-	case IEEE80211_STYPE_ACTION:
-		vif->frame_reg[1].type = frame_type;
-		vif->frame_reg[1].reg = reg;
-		break;
+		prev = vif->mgmt_reg_stypes & action_bit;
+		now = upd->interface_stypes & action_bit;
 
-	default:
-		break;
+		if (now != prev)
+			wilc_frame_register(vif, IEEE80211_STYPE_ACTION, now);
 	}
 
-	if (!wl->initialized)
-		return;
-	wilc_frame_register(vif, frame_type, reg);
+	vif->mgmt_reg_stypes =
+		upd->interface_stypes & (presp_bit | action_bit);
 }
 
 static int set_cqm_rssi_config(struct wiphy *wiphy, struct net_device *dev,
@@ -1665,7 +1663,7 @@ static const struct cfg80211_ops wilc_cfg80211_ops = {
 	.cancel_remain_on_channel = cancel_remain_on_channel,
 	.mgmt_tx_cancel_wait = mgmt_tx_cancel_wait,
 	.mgmt_tx = mgmt_tx,
-	.mgmt_frame_register = wilc_mgmt_frame_register,
+	.update_mgmt_frame_registrations = wilc_update_mgmt_frame_registrations,
 	.set_power_mgmt = set_power_mgmt,
 	.set_cqm_rssi_config = set_cqm_rssi_config,
 
diff --git a/drivers/staging/wilc1000/cfg80211.h b/drivers/staging/wilc1000/cfg80211.h
index 5e5d63f70df2..37b294cb3b37 100644
--- a/drivers/staging/wilc1000/cfg80211.h
+++ b/drivers/staging/wilc1000/cfg80211.h
@@ -21,8 +21,9 @@ void wilc_wfi_deinit_mon_interface(struct wilc *wl, bool rtnl_locked);
 struct net_device *wilc_wfi_init_mon_interface(struct wilc *wl,
 					       const char *name,
 					       struct net_device *real_dev);
-void wilc_mgmt_frame_register(struct wiphy *wiphy, struct wireless_dev *wdev,
-			      u16 frame_type, bool reg);
+void wilc_update_mgmt_frame_registrations(struct wiphy *wiphy,
+					  struct wireless_dev *wdev,
+					  struct mgmt_frame_regs *upd);
 struct wilc_vif *wilc_get_interface(struct wilc *wl);
 struct wilc_vif *wilc_get_wl_to_vif(struct wilc *wl);
 void wlan_deinit_locks(struct wilc *wilc);
diff --git a/drivers/staging/wilc1000/netdev.c b/drivers/staging/wilc1000/netdev.c
index f94a17babd12..fda0ab97b02c 100644
--- a/drivers/staging/wilc1000/netdev.c
+++ b/drivers/staging/wilc1000/netdev.c
@@ -571,6 +571,7 @@ static int wilc_mac_open(struct net_device *ndev)
 	struct wilc *wl = vif->wilc;
 	unsigned char mac_add[ETH_ALEN] = {0};
 	int ret = 0;
+	struct mgmt_frame_regs mgmt_regs = {};
 
 	if (!wl || !wl->dev) {
 		netdev_err(ndev, "device not ready\n");
@@ -602,14 +603,12 @@ static int wilc_mac_open(struct net_device *ndev)
 		return -EINVAL;
 	}
 
-	wilc_mgmt_frame_register(vif->ndev->ieee80211_ptr->wiphy,
-				 vif->ndev->ieee80211_ptr,
-				 vif->frame_reg[0].type,
-				 vif->frame_reg[0].reg);
-	wilc_mgmt_frame_register(vif->ndev->ieee80211_ptr->wiphy,
-				 vif->ndev->ieee80211_ptr,
-				 vif->frame_reg[1].type,
-				 vif->frame_reg[1].reg);
+	mgmt_regs.interface_stypes = vif->mgmt_reg_stypes;
+	/* so we detect a change */
+	vif->mgmt_reg_stypes = 0;
+	wilc_update_mgmt_frame_registrations(vif->ndev->ieee80211_ptr->wiphy,
+					     vif->ndev->ieee80211_ptr,
+					     &mgmt_regs);
 	netif_wake_queue(ndev);
 	wl->open_ifcs++;
 	vif->mac_opened = 1;
@@ -792,12 +791,10 @@ void wilc_wfi_mgmt_rx(struct wilc *wilc, u8 *buff, u32 size)
 	srcu_idx = srcu_read_lock(&wilc->srcu);
 	list_for_each_entry_rcu(vif, &wilc->vif_list, list) {
 		u16 type = le16_to_cpup((__le16 *)buff);
+		u32 type_bit = BIT(type >> 4);
 
 		if (vif->priv.p2p_listen_state &&
-		    ((type == vif->frame_reg[0].type &&
-		      vif->frame_reg[0].reg) ||
-		     (type == vif->frame_reg[1].type &&
-		      vif->frame_reg[1].reg)))
+		    vif->mgmt_reg_stypes & type_bit)
 			wilc_wfi_p2p_rx(vif, buff, size);
 
 		if (vif->monitor_flag)
diff --git a/drivers/staging/wilc1000/netdev.h b/drivers/staging/wilc1000/netdev.h
index 61cbec674a62..d0a006b68d08 100644
--- a/drivers/staging/wilc1000/netdev.h
+++ b/drivers/staging/wilc1000/netdev.h
@@ -24,8 +24,6 @@
 #define PMKID_FOUND				1
 #define NUM_STA_ASSOCIATED			8
 
-#define NUM_REG_FRAME				2
-
 #define TCP_ACK_FILTER_LINK_SPEED_THRESH	54
 #define DEFAULT_LINK_SPEED			72
 
@@ -151,11 +149,6 @@ struct wilc_priv {
 	u64 inc_roc_cookie;
 };
 
-struct frame_reg {
-	u16 type;
-	bool reg;
-};
-
 #define MAX_TCP_SESSION                25
 #define MAX_PENDING_ACKS               256
 
@@ -187,7 +180,7 @@ struct wilc_vif {
 	u8 iftype;
 	int monitor_flag;
 	int mac_opened;
-	struct frame_reg frame_reg[NUM_REG_FRAME];
+	u32 mgmt_reg_stypes;
 	struct net_device_stats netstats;
 	struct wilc *wilc;
 	u8 bssid[ETH_ALEN];
-- 
cgit v1.2.3-59-g8ed1b


From bedd7904e86c02ae80513c212ea25789d8bf4fb4 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni@codeaurora.org>
Date: Sat, 25 Apr 2020 18:57:11 +0300
Subject: mac80211_hwsim: Advertise support for multicast RX registration

While mac80211_hwsim does not need this to configure RX filters, it is
convenient to have this enabled for testing purposes.

Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
Link: https://lore.kernel.org/r/20200425155713.25687-3-jouni@codeaurora.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/mac80211_hwsim.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index 05e8203aa6d9..bd1f4c249d11 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -1827,6 +1827,8 @@ static void mac80211_hwsim_configure_filter(struct ieee80211_hw *hw,
 	data->rx_filter = 0;
 	if (*total_flags & FIF_ALLMULTI)
 		data->rx_filter |= FIF_ALLMULTI;
+	if (*total_flags & FIF_MCAST_ACTION)
+		data->rx_filter |= FIF_MCAST_ACTION;
 
 	*total_flags = data->rx_filter;
 }
@@ -3060,6 +3062,8 @@ static int mac80211_hwsim_new_radio(struct genl_info *info,
 			       NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR;
 	wiphy_ext_feature_set(hw->wiphy, NL80211_EXT_FEATURE_VHT_IBSS);
 	wiphy_ext_feature_set(hw->wiphy, NL80211_EXT_FEATURE_BEACON_PROTECTION);
+	wiphy_ext_feature_set(hw->wiphy,
+			      NL80211_EXT_FEATURE_MULTICAST_REGISTRATIONS);
 
 	hw->wiphy->interface_modes = param->iftypes;
 
-- 
cgit v1.2.3-59-g8ed1b


From 08fad438bed0ada1a3308987862327286fcbb5f5 Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni@codeaurora.org>
Date: Sat, 25 Apr 2020 18:57:12 +0300
Subject: mac80211: TX legacy rate control for Beacon frames

Use the Beacon frame specific legacy rate configuration, if specified
for AP or mesh, instead of the generic rate mask when selecting the TX
rate for Beacon frames.

Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
Link: https://lore.kernel.org/r/20200425155713.25687-4-jouni@codeaurora.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/cfg.c         | 26 +++++++++++++++++++++++++-
 net/mac80211/ieee80211_i.h |  4 ++++
 net/mac80211/mesh.c        |  1 +
 net/mac80211/tx.c          |  5 ++++-
 4 files changed, 34 insertions(+), 2 deletions(-)

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index ae3e06375a28..548a384b0509 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -994,7 +994,7 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
 		      BSS_CHANGED_TWT |
 		      BSS_CHANGED_HE_OBSS_PD |
 		      BSS_CHANGED_HE_BSS_COLOR;
-	int err;
+	int i, err;
 	int prev_beacon_int;
 
 	old = sdata_dereference(sdata->u.ap.beacon, sdata);
@@ -1085,6 +1085,17 @@ static int ieee80211_start_ap(struct wiphy *wiphy, struct net_device *dev,
 		sdata->vif.bss_conf.p2p_noa_attr.oppps_ctwindow |=
 					IEEE80211_P2P_OPPPS_ENABLE_BIT;
 
+	sdata->beacon_rate_set = false;
+	if (wiphy_ext_feature_isset(local->hw.wiphy,
+				    NL80211_EXT_FEATURE_BEACON_RATE_LEGACY)) {
+		for (i = 0; i < NUM_NL80211_BANDS; i++) {
+			sdata->beacon_rateidx_mask[i] =
+				params->beacon_rate.control[i].legacy;
+			if (sdata->beacon_rateidx_mask[i])
+				sdata->beacon_rate_set = true;
+		}
+	}
+
 	err = ieee80211_assign_beacon(sdata, &params->beacon, NULL);
 	if (err < 0) {
 		ieee80211_vif_release_channel(sdata);
@@ -1189,6 +1200,7 @@ static int ieee80211_stop_ap(struct wiphy *wiphy, struct net_device *dev)
 	ieee80211_free_keys(sdata, true);
 
 	sdata->vif.bss_conf.enable_beacon = false;
+	sdata->beacon_rate_set = false;
 	sdata->vif.bss_conf.ssid_len = 0;
 	clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state);
 	ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED);
@@ -1949,6 +1961,7 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh,
 	const u8 *old_ie;
 	struct ieee80211_sub_if_data *sdata = container_of(ifmsh,
 					struct ieee80211_sub_if_data, u.mesh);
+	int i;
 
 	/* allocate information elements */
 	new_ie = NULL;
@@ -1987,6 +2000,17 @@ static int copy_mesh_setup(struct ieee80211_if_mesh *ifmsh,
 	sdata->vif.bss_conf.beacon_int = setup->beacon_interval;
 	sdata->vif.bss_conf.dtim_period = setup->dtim_period;
 
+	sdata->beacon_rate_set = false;
+	if (wiphy_ext_feature_isset(sdata->local->hw.wiphy,
+				    NL80211_EXT_FEATURE_BEACON_RATE_LEGACY)) {
+		for (i = 0; i < NUM_NL80211_BANDS; i++) {
+			sdata->beacon_rateidx_mask[i] =
+				setup->beacon_rate.control[i].legacy;
+			if (sdata->beacon_rateidx_mask[i])
+				sdata->beacon_rate_set = true;
+		}
+	}
+
 	return 0;
 }
 
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 9407cf44305c..8cbae66b5cdb 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -962,6 +962,10 @@ struct ieee80211_sub_if_data {
 	bool rc_has_vht_mcs_mask[NUM_NL80211_BANDS];
 	u16 rc_rateidx_vht_mcs_mask[NUM_NL80211_BANDS][NL80211_VHT_NSS_MAX];
 
+	/* Beacon frame (non-MCS) rate (as a bitmap) */
+	u32 beacon_rateidx_mask[NUM_NL80211_BANDS];
+	bool beacon_rate_set;
+
 	union {
 		struct ieee80211_if_ap ap;
 		struct ieee80211_if_wds wds;
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 36978a0e5000..5930d07b1e43 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -994,6 +994,7 @@ void ieee80211_stop_mesh(struct ieee80211_sub_if_data *sdata)
 	/* stop the beacon */
 	ifmsh->mesh_id_len = 0;
 	sdata->vif.bss_conf.enable_beacon = false;
+	sdata->beacon_rate_set = false;
 	clear_bit(SDATA_STATE_OFFCHANNEL_BEACON_STOPPED, &sdata->state);
 	ieee80211_bss_info_change_notify(sdata, BSS_CHANGED_BEACON_ENABLED);
 
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 3dc1990e15c5..6dad67eb60b2 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -4883,7 +4883,10 @@ __ieee80211_beacon_get(struct ieee80211_hw *hw,
 	txrc.bss_conf = &sdata->vif.bss_conf;
 	txrc.skb = skb;
 	txrc.reported_rate.idx = -1;
-	txrc.rate_idx_mask = sdata->rc_rateidx_mask[band];
+	if (sdata->beacon_rate_set && sdata->beacon_rateidx_mask[band])
+		txrc.rate_idx_mask = sdata->beacon_rateidx_mask[band];
+	else
+		txrc.rate_idx_mask = sdata->rc_rateidx_mask[band];
 	txrc.bss = true;
 	rate_control_get_rate(sdata, NULL, &txrc);
 
-- 
cgit v1.2.3-59-g8ed1b


From 1512bc076e5ba2c4e8a189a4dbc883d59b4c37ef Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni@codeaurora.org>
Date: Sat, 25 Apr 2020 18:57:13 +0300
Subject: mac80211_hwsim: Claim support for setting Beacon frame TX legacy rate

mac80211 takes care of rate control for the Beacon frames, so all
mac80211_hwsim needs to do here is advertise support for this.

Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
Link: https://lore.kernel.org/r/20200425155713.25687-5-jouni@codeaurora.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/mac80211_hwsim.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index bd1f4c249d11..29084096044e 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -3064,6 +3064,8 @@ static int mac80211_hwsim_new_radio(struct genl_info *info,
 	wiphy_ext_feature_set(hw->wiphy, NL80211_EXT_FEATURE_BEACON_PROTECTION);
 	wiphy_ext_feature_set(hw->wiphy,
 			      NL80211_EXT_FEATURE_MULTICAST_REGISTRATIONS);
+	wiphy_ext_feature_set(hw->wiphy,
+			      NL80211_EXT_FEATURE_BEACON_RATE_LEGACY);
 
 	hw->wiphy->interface_modes = param->iftypes;
 
-- 
cgit v1.2.3-59-g8ed1b


From 60689de46c7f6a0028c8b37b6f03db68cbfad8ed Mon Sep 17 00:00:00 2001
From: Rajkumar Manoharan <rmanohar@codeaurora.org>
Date: Fri, 24 Apr 2020 15:41:39 -0700
Subject: mac80211: fix memory overlap due to variable length param

As of now HE operation element in bss_conf includes variable length
optional field followed by other HE variable. Though the optional
field never be used, actually it is referring to next member of the
bss_conf structure which is not correct. Fix it by declaring needed
HE operation fields within bss_conf itself.

Signed-off-by: Rajkumar Manoharan <rmanohar@codeaurora.org>
Link: https://lore.kernel.org/r/1587768108-25248-2-git-send-email-rmanohar@codeaurora.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/ath/ath11k/mac.c |  3 +--
 include/net/mac80211.h                |  7 +++++--
 net/mac80211/he.c                     | 13 +++++--------
 3 files changed, 11 insertions(+), 12 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c
index 9f8bc19cc5ae..06d063274eea 100644
--- a/drivers/net/wireless/ath/ath11k/mac.c
+++ b/drivers/net/wireless/ath/ath11k/mac.c
@@ -1168,8 +1168,7 @@ static void ath11k_peer_assoc_h_he(struct ath11k *ar,
 	       sizeof(arg->peer_he_cap_macinfo));
 	memcpy(&arg->peer_he_cap_phyinfo, he_cap->he_cap_elem.phy_cap_info,
 	       sizeof(arg->peer_he_cap_phyinfo));
-	memcpy(&arg->peer_he_ops, &vif->bss_conf.he_operation,
-	       sizeof(arg->peer_he_ops));
+	arg->peer_he_ops = vif->bss_conf.he_oper.params;
 
 	/* the top most byte is used to indicate BSS color info */
 	arg->peer_he_ops &= 0xffffff;
diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index ecb219e3ec4f..78f7ce586287 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -604,7 +604,7 @@ struct ieee80211_ftm_responder_params {
  *	nontransmitted BSSIDs
  * @profile_periodicity: the least number of beacon frames need to be received
  *	in order to discover all the nontransmitted BSSIDs in the set.
- * @he_operation: HE operation information of the AP we are connected to
+ * @he_oper: HE operation information of the AP we are connected to
  * @he_obss_pd: OBSS Packet Detection parameters.
  * @he_bss_color: BSS coloring settings, if BSS supports HE
  */
@@ -668,7 +668,10 @@ struct ieee80211_bss_conf {
 	u8 bssid_indicator;
 	bool ema_ap;
 	u8 profile_periodicity;
-	struct ieee80211_he_operation he_operation;
+	struct {
+		u32 params;
+		u16 nss_set;
+	} he_oper;
 	struct ieee80211_he_obss_pd he_obss_pd;
 	struct cfg80211_he_bss_color he_bss_color;
 };
diff --git a/net/mac80211/he.c b/net/mac80211/he.c
index 1087f715338b..f520552b22be 100644
--- a/net/mac80211/he.c
+++ b/net/mac80211/he.c
@@ -57,17 +57,14 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata,
 
 void
 ieee80211_he_op_ie_to_bss_conf(struct ieee80211_vif *vif,
-			const struct ieee80211_he_operation *he_op_ie_elem)
+			const struct ieee80211_he_operation *he_op_ie)
 {
-	struct ieee80211_he_operation *he_operation =
-					&vif->bss_conf.he_operation;
-
-	if (!he_op_ie_elem) {
-		memset(he_operation, 0, sizeof(*he_operation));
+	memset(&vif->bss_conf.he_oper, 0, sizeof(vif->bss_conf.he_oper));
+	if (!he_op_ie)
 		return;
-	}
 
-	vif->bss_conf.he_operation = *he_op_ie_elem;
+	vif->bss_conf.he_oper.params = __le32_to_cpu(he_op_ie->he_oper_params);
+	vif->bss_conf.he_oper.nss_set = __le16_to_cpu(he_op_ie->he_mcs_nss_set);
 }
 
 void
-- 
cgit v1.2.3-59-g8ed1b


From cb10228d234c49e2035bfce7bdb42c29e1049c5c Mon Sep 17 00:00:00 2001
From: Yonglong Liu <liuyonglong@huawei.com>
Date: Wed, 29 Apr 2020 11:46:24 +0800
Subject: net: hns3: adds support for reading module eeprom info

This patch adds support for reading the optical module eeprom
info via "ethtool -m".

Signed-off-by: Yonglong Liu <liuyonglong@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h        |   4 +
 drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c |  75 +++++++++++++++
 .../net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h |  15 +++
 .../ethernet/hisilicon/hns3/hns3pf/hclge_main.c    | 102 +++++++++++++++++++++
 4 files changed, 196 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 6291aa9f06b0..5602bf226687 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -374,6 +374,8 @@ struct hnae3_ae_dev {
  *   Set the max tx rate of specified vf.
  * set_vf_mac
  *   Configure the default MAC for specified VF
+ * get_module_eeprom
+ *   Get the optical module eeprom info.
  */
 struct hnae3_ae_ops {
 	int (*init_ae_dev)(struct hnae3_ae_dev *ae_dev);
@@ -548,6 +550,8 @@ struct hnae3_ae_ops {
 	int (*set_vf_rate)(struct hnae3_handle *handle, int vf,
 			   int min_tx_rate, int max_tx_rate, bool force);
 	int (*set_vf_mac)(struct hnae3_handle *handle, int vf, u8 *p);
+	int (*get_module_eeprom)(struct hnae3_handle *handle, u32 offset,
+				 u32 len, u8 *data);
 };
 
 struct hnae3_dcb_ops {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index 4d9c85f049dc..1a105f2f87a4 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -4,6 +4,7 @@
 #include <linux/etherdevice.h>
 #include <linux/string.h>
 #include <linux/phy.h>
+#include <linux/sfp.h>
 
 #include "hns3_enet.h"
 
@@ -12,6 +13,11 @@ struct hns3_stats {
 	int stats_offset;
 };
 
+struct hns3_sfp_type {
+	u8 type;
+	u8 ext_type;
+};
+
 /* tqp related stats */
 #define HNS3_TQP_STAT(_string, _member)	{			\
 	.stats_string = _string,				\
@@ -1386,6 +1392,73 @@ static int hns3_set_fecparam(struct net_device *netdev,
 	return ops->set_fec(handle, fec_mode);
 }
 
+static int hns3_get_module_info(struct net_device *netdev,
+				struct ethtool_modinfo *modinfo)
+{
+#define HNS3_SFF_8636_V1_3 0x03
+
+	struct hnae3_handle *handle = hns3_get_handle(netdev);
+	const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+	struct hns3_sfp_type sfp_type;
+	int ret;
+
+	if (handle->pdev->revision == 0x20 || !ops->get_module_eeprom)
+		return -EOPNOTSUPP;
+
+	memset(&sfp_type, 0, sizeof(sfp_type));
+	ret = ops->get_module_eeprom(handle, 0, sizeof(sfp_type) / sizeof(u8),
+				     (u8 *)&sfp_type);
+	if (ret)
+		return ret;
+
+	switch (sfp_type.type) {
+	case SFF8024_ID_SFP:
+		modinfo->type = ETH_MODULE_SFF_8472;
+		modinfo->eeprom_len = ETH_MODULE_SFF_8472_LEN;
+		break;
+	case SFF8024_ID_QSFP_8438:
+		modinfo->type = ETH_MODULE_SFF_8436;
+		modinfo->eeprom_len = ETH_MODULE_SFF_8436_MAX_LEN;
+		break;
+	case SFF8024_ID_QSFP_8436_8636:
+		if (sfp_type.ext_type < HNS3_SFF_8636_V1_3) {
+			modinfo->type = ETH_MODULE_SFF_8436;
+			modinfo->eeprom_len = ETH_MODULE_SFF_8436_MAX_LEN;
+		} else {
+			modinfo->type = ETH_MODULE_SFF_8636;
+			modinfo->eeprom_len = ETH_MODULE_SFF_8636_MAX_LEN;
+		}
+		break;
+	case SFF8024_ID_QSFP28_8636:
+		modinfo->type = ETH_MODULE_SFF_8636;
+		modinfo->eeprom_len = ETH_MODULE_SFF_8636_MAX_LEN;
+		break;
+	default:
+		netdev_err(netdev, "Optical module unknown: %#x\n",
+			   sfp_type.type);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int hns3_get_module_eeprom(struct net_device *netdev,
+				  struct ethtool_eeprom *ee, u8 *data)
+{
+	struct hnae3_handle *handle = hns3_get_handle(netdev);
+	const struct hnae3_ae_ops *ops = handle->ae_algo->ops;
+
+	if (handle->pdev->revision == 0x20 || !ops->get_module_eeprom)
+		return -EOPNOTSUPP;
+
+	if (!ee->len)
+		return -EINVAL;
+
+	memset(data, 0, ee->len);
+
+	return ops->get_module_eeprom(handle, ee->offset, ee->len, data);
+}
+
 #define HNS3_ETHTOOL_COALESCE	(ETHTOOL_COALESCE_USECS |		\
 				 ETHTOOL_COALESCE_USE_ADAPTIVE |	\
 				 ETHTOOL_COALESCE_RX_USECS_HIGH |	\
@@ -1449,6 +1522,8 @@ static const struct ethtool_ops hns3_ethtool_ops = {
 	.set_msglevel = hns3_set_msglevel,
 	.get_fecparam = hns3_get_fecparam,
 	.set_fecparam = hns3_set_fecparam,
+	.get_module_info = hns3_get_module_info,
+	.get_module_eeprom = hns3_get_module_eeprom,
 };
 
 void hns3_ethtool_set_ops(struct net_device *netdev)
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index 90e422efe590..9a9d752aedc5 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -270,6 +270,8 @@ enum hclge_opcode_type {
 	HCLGE_OPC_M7_COMPAT_CFG		= 0x701A,
 
 	/* SFP command */
+	HCLGE_OPC_GET_SFP_EEPROM	= 0x7100,
+	HCLGE_OPC_GET_SFP_EXIST		= 0x7101,
 	HCLGE_OPC_GET_SFP_INFO		= 0x7104,
 
 	/* Error INT commands */
@@ -1054,6 +1056,19 @@ struct hclge_firmware_compat_cmd {
 	u8 rsv[20];
 };
 
+#define HCLGE_SFP_INFO_CMD_NUM	6
+#define HCLGE_SFP_INFO_BD0_LEN	20
+#define HCLGE_SFP_INFO_BDX_LEN	24
+#define HCLGE_SFP_INFO_MAX_LEN \
+	(HCLGE_SFP_INFO_BD0_LEN + \
+	(HCLGE_SFP_INFO_CMD_NUM - 1) * HCLGE_SFP_INFO_BDX_LEN)
+
+struct hclge_sfp_info_bd0_cmd {
+	__le16 offset;
+	__le16 read_len;
+	u8 data[HCLGE_SFP_INFO_BD0_LEN];
+};
+
 int hclge_cmd_init(struct hclge_dev *hdev);
 static inline void hclge_write_reg(void __iomem *base, u32 reg, u32 value)
 {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index e2fec832fdf0..71a54ddb51f5 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -11119,6 +11119,107 @@ static void hclge_sync_promisc_mode(struct hclge_dev *hdev)
 	}
 }
 
+static bool hclge_module_existed(struct hclge_dev *hdev)
+{
+	struct hclge_desc desc;
+	u32 existed;
+	int ret;
+
+	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_GET_SFP_EXIST, true);
+	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"failed to get SFP exist state, ret = %d\n", ret);
+		return false;
+	}
+
+	existed = le32_to_cpu(desc.data[0]);
+
+	return existed != 0;
+}
+
+/* need 6 bds(total 140 bytes) in one reading
+ * return the number of bytes actually read, 0 means read failed.
+ */
+static u16 hclge_get_sfp_eeprom_info(struct hclge_dev *hdev, u32 offset,
+				     u32 len, u8 *data)
+{
+	struct hclge_desc desc[HCLGE_SFP_INFO_CMD_NUM];
+	struct hclge_sfp_info_bd0_cmd *sfp_info_bd0;
+	u16 read_len;
+	u16 copy_len;
+	int ret;
+	int i;
+
+	/* setup all 6 bds to read module eeprom info. */
+	for (i = 0; i < HCLGE_SFP_INFO_CMD_NUM; i++) {
+		hclge_cmd_setup_basic_desc(&desc[i], HCLGE_OPC_GET_SFP_EEPROM,
+					   true);
+
+		/* bd0~bd4 need next flag */
+		if (i < HCLGE_SFP_INFO_CMD_NUM - 1)
+			desc[i].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+	}
+
+	/* setup bd0, this bd contains offset and read length. */
+	sfp_info_bd0 = (struct hclge_sfp_info_bd0_cmd *)desc[0].data;
+	sfp_info_bd0->offset = cpu_to_le16((u16)offset);
+	read_len = min_t(u16, len, HCLGE_SFP_INFO_MAX_LEN);
+	sfp_info_bd0->read_len = cpu_to_le16(read_len);
+
+	ret = hclge_cmd_send(&hdev->hw, desc, i);
+	if (ret) {
+		dev_err(&hdev->pdev->dev,
+			"failed to get SFP eeprom info, ret = %d\n", ret);
+		return 0;
+	}
+
+	/* copy sfp info from bd0 to out buffer. */
+	copy_len = min_t(u16, len, HCLGE_SFP_INFO_BD0_LEN);
+	memcpy(data, sfp_info_bd0->data, copy_len);
+	read_len = copy_len;
+
+	/* copy sfp info from bd1~bd5 to out buffer if needed. */
+	for (i = 1; i < HCLGE_SFP_INFO_CMD_NUM; i++) {
+		if (read_len >= len)
+			return read_len;
+
+		copy_len = min_t(u16, len - read_len, HCLGE_SFP_INFO_BDX_LEN);
+		memcpy(data + read_len, desc[i].data, copy_len);
+		read_len += copy_len;
+	}
+
+	return read_len;
+}
+
+static int hclge_get_module_eeprom(struct hnae3_handle *handle, u32 offset,
+				   u32 len, u8 *data)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+	u32 read_len = 0;
+	u16 data_len;
+
+	if (hdev->hw.mac.media_type != HNAE3_MEDIA_TYPE_FIBER)
+		return -EOPNOTSUPP;
+
+	if (!hclge_module_existed(hdev))
+		return -ENXIO;
+
+	while (read_len < len) {
+		data_len = hclge_get_sfp_eeprom_info(hdev,
+						     offset + read_len,
+						     len - read_len,
+						     data + read_len);
+		if (!data_len)
+			return -EIO;
+
+		read_len += data_len;
+	}
+
+	return 0;
+}
+
 static const struct hnae3_ae_ops hclge_ops = {
 	.init_ae_dev = hclge_init_ae_dev,
 	.uninit_ae_dev = hclge_uninit_ae_dev,
@@ -11211,6 +11312,7 @@ static const struct hnae3_ae_ops hclge_ops = {
 	.set_vf_trust = hclge_set_vf_trust,
 	.set_vf_rate = hclge_set_vf_rate,
 	.set_vf_mac = hclge_set_vf_mac,
+	.get_module_eeprom = hclge_get_module_eeprom,
 };
 
 static struct hnae3_ae_algo ae_algo = {
-- 
cgit v1.2.3-59-g8ed1b


From 00b5aac59966ec3d608dc150aba552121e7de4f0 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Wed, 29 Apr 2020 07:58:20 +0000
Subject: ptp: ptp_ines: convert to devm_platform_ioremap_resource

Use the helper function that wraps the calls to platform_get_resource()
and devm_ioremap_resource() together.

Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ptp/ptp_ines.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/drivers/ptp/ptp_ines.c b/drivers/ptp/ptp_ines.c
index 52d77db39829..7711651ff19e 100644
--- a/drivers/ptp/ptp_ines.c
+++ b/drivers/ptp/ptp_ines.c
@@ -783,16 +783,10 @@ static struct mii_timestamping_ctrl ines_ctrl = {
 static int ines_ptp_ctrl_probe(struct platform_device *pld)
 {
 	struct ines_clock *clock;
-	struct resource *res;
 	void __iomem *addr;
 	int err = 0;
 
-	res = platform_get_resource(pld, IORESOURCE_MEM, 0);
-	if (!res) {
-		dev_err(&pld->dev, "missing memory resource\n");
-		return -EINVAL;
-	}
-	addr = devm_ioremap_resource(&pld->dev, res);
+	addr = devm_platform_ioremap_resource(pld, 0);
 	if (IS_ERR(addr)) {
 		err = PTR_ERR(addr);
 		goto out;
-- 
cgit v1.2.3-59-g8ed1b


From a54776f2c4939bdee084c9ecd00a4a5a25b7c429 Mon Sep 17 00:00:00 2001
From: Yunjian Wang <wangyunjian@huawei.com>
Date: Wed, 29 Apr 2020 18:20:58 +0800
Subject: netpoll: Fix use correct return type for ndo_start_xmit()

The method ndo_start_xmit() returns a value of type netdev_tx_t. Fix
the ndo function to use the correct type.

Signed-off-by: Yunjian Wang <wangyunjian@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/netpoll.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 849380a622ef..15b366a1a958 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -69,10 +69,11 @@ module_param(carrier_timeout, uint, 0644);
 #define np_notice(np, fmt, ...)				\
 	pr_notice("%s: " fmt, np->name, ##__VA_ARGS__)
 
-static int netpoll_start_xmit(struct sk_buff *skb, struct net_device *dev,
-			      struct netdev_queue *txq)
+static netdev_tx_t netpoll_start_xmit(struct sk_buff *skb,
+				      struct net_device *dev,
+				      struct netdev_queue *txq)
 {
-	int status = NETDEV_TX_OK;
+	netdev_tx_t status = NETDEV_TX_OK;
 	netdev_features_t features;
 
 	features = netif_skb_features(skb);
@@ -307,7 +308,7 @@ static int netpoll_owner_active(struct net_device *dev)
 void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
 			     struct net_device *dev)
 {
-	int status = NETDEV_TX_BUSY;
+	netdev_tx_t status = NETDEV_TX_BUSY;
 	unsigned long tries;
 	/* It is up to the caller to keep npinfo alive. */
 	struct netpoll_info *npinfo;
-- 
cgit v1.2.3-59-g8ed1b


From ad56623119fda623ade9bc570294a1c2b203d374 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Wed, 29 Apr 2020 21:24:30 +0800
Subject: net: hsr: remove unused inline functions

There's no callers in-tree anymore.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/hsr/hsr_main.h | 19 -------------------
 1 file changed, 19 deletions(-)

diff --git a/net/hsr/hsr_main.h b/net/hsr/hsr_main.h
index 7321cf8d6d2c..f74193465bf5 100644
--- a/net/hsr/hsr_main.h
+++ b/net/hsr/hsr_main.h
@@ -62,15 +62,6 @@ struct hsr_tag {
  * with the path field in-between, which seems strange. I'm guessing the MAC
  * address definition is in error.
  */
-static inline u16 get_hsr_tag_path(struct hsr_tag *ht)
-{
-	return ntohs(ht->path_and_LSDU_size) >> 12;
-}
-
-static inline u16 get_hsr_tag_LSDU_size(struct hsr_tag *ht)
-{
-	return ntohs(ht->path_and_LSDU_size) & 0x0FFF;
-}
 
 static inline void set_hsr_tag_path(struct hsr_tag *ht, u16 path)
 {
@@ -103,16 +94,6 @@ struct hsr_sup_payload {
 	unsigned char	macaddress_A[ETH_ALEN];
 } __packed;
 
-static inline u16 get_hsr_stag_path(struct hsr_sup_tag *hst)
-{
-	return get_hsr_tag_path((struct hsr_tag *)hst);
-}
-
-static inline u16 get_hsr_stag_HSR_ver(struct hsr_sup_tag *hst)
-{
-	return get_hsr_tag_LSDU_size((struct hsr_tag *)hst);
-}
-
 static inline void set_hsr_stag_path(struct hsr_sup_tag *hst, u16 path)
 {
 	set_hsr_tag_path((struct hsr_tag *)hst, path);
-- 
cgit v1.2.3-59-g8ed1b


From 0477e032a9ea34780180d9d5289f9da06714de43 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Wed, 29 Apr 2020 21:25:30 +0800
Subject: ila: remove unused inline function ila_addr_is_ila

There's no callers in-tree anymore since commit 84287bb32856 ("ila: add
checksum neutral map auto").

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ila/ila.h | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/net/ipv6/ila/ila.h b/net/ipv6/ila/ila.h
index bb6fc0d54dae..ad5f6f6ba333 100644
--- a/net/ipv6/ila/ila.h
+++ b/net/ipv6/ila/ila.h
@@ -68,11 +68,6 @@ static inline struct ila_addr *ila_a2i(struct in6_addr *addr)
 	return (struct ila_addr *)addr;
 }
 
-static inline bool ila_addr_is_ila(struct ila_addr *iaddr)
-{
-	return (iaddr->ident.type != ILA_ATYPE_IID);
-}
-
 struct ila_params {
 	struct ila_locator locator;
 	struct ila_locator locator_match;
-- 
cgit v1.2.3-59-g8ed1b


From 21615efa6a69891fa287bade979d56dd68b09878 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Wed, 29 Apr 2020 12:15:42 -0700
Subject: Revert "net: ethernet: fec: Prevent MII event after MII_SPEED write"

This reverts commit 790ab249b55d75fdb427b92f81964cd7cb525eec.

This change needs more work.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/fec_main.c | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index aa5e744ec098..1ae075a246a3 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -996,9 +996,6 @@ fec_restart(struct net_device *ndev)
 		writel(0x0, fep->hwp + FEC_X_CNTRL);
 	}
 
-	/* Prevent an MII event being report when changing speed */
-	writel(0, fep->hwp + FEC_MII_DATA);
-
 	/* Set MII speed */
 	writel(fep->phy_speed, fep->hwp + FEC_MII_SPEED);
 
@@ -1185,10 +1182,6 @@ fec_stop(struct net_device *ndev)
 		writel(val, fep->hwp + FEC_ECNTRL);
 		fec_enet_stop_mode(fep, true);
 	}
-
-	/* Prevent an MII event being report when changing speed */
-	writel(0, fep->hwp + FEC_MII_DATA);
-
 	writel(fep->phy_speed, fep->hwp + FEC_MII_SPEED);
 
 	/* We have to keep ENET enabled to have MII interrupt stay working */
@@ -2149,16 +2142,6 @@ static int fec_enet_mii_init(struct platform_device *pdev)
 	if (suppress_preamble)
 		fep->phy_speed |= BIT(7);
 
-	/* Clear MMFR to avoid to generate MII event by writing MSCR.
-	 * MII event generation condition:
-	 * - writing MSCR:
-	 *	- mmfr[31:0]_not_zero & mscr[7:0]_is_zero &
-	 *	  mscr_reg_data_in[7:0] != 0
-	 * - writing MMFR:
-	 *	- mscr[7:0]_not_zero
-	 */
-	writel(0, fep->hwp + FEC_MII_DATA);
-
 	writel(fep->phy_speed, fep->hwp + FEC_MII_SPEED);
 
 	/* Clear any pending transaction complete indication */
-- 
cgit v1.2.3-59-g8ed1b


From fdff704dc60418e9a1bac78ae09c857d05c65aa3 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Wed, 29 Apr 2020 17:10:37 +0200
Subject: net/smc: rework pnet table to support SMC-R failover

The pnet table stored pnet ids in the smc device structures. When a
device is going down its smc device structure is freed, and when the
device is brought online again it no longer has a pnet id set.
Rework the pnet table implementation to store the device name with their
assigned pnet id and apply the pnet id to devices when they are
registered.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_ib.c   |   5 +-
 net/smc/smc_ism.c  |   3 +-
 net/smc/smc_pnet.c | 539 +++++++++++++++++++++++++++++++----------------------
 net/smc/smc_pnet.h |   2 +
 4 files changed, 319 insertions(+), 230 deletions(-)

diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 04b6fefb8bce..440f9e319a38 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -579,8 +579,9 @@ static void smc_ib_add_dev(struct ib_device *ibdev)
 	     i++) {
 		set_bit(i, &smcibdev->port_event_mask);
 		/* determine pnetids of the port */
-		smc_pnetid_by_dev_port(ibdev->dev.parent, i,
-				       smcibdev->pnetid[i]);
+		if (smc_pnetid_by_dev_port(ibdev->dev.parent, i,
+					   smcibdev->pnetid[i]))
+			smc_pnetid_by_table_ib(smcibdev, i + 1);
 	}
 	schedule_work(&smcibdev->port_event_work);
 }
diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c
index 5c4727d5066e..32be2da2cb85 100644
--- a/net/smc/smc_ism.c
+++ b/net/smc/smc_ism.c
@@ -296,7 +296,8 @@ struct smcd_dev *smcd_alloc_dev(struct device *parent, const char *name,
 	device_initialize(&smcd->dev);
 	dev_set_name(&smcd->dev, name);
 	smcd->ops = ops;
-	smc_pnetid_by_dev_port(parent, 0, smcd->pnetid);
+	if (smc_pnetid_by_dev_port(parent, 0, smcd->pnetid))
+		smc_pnetid_by_table_smcd(smcd);
 
 	spin_lock_init(&smcd->lock);
 	spin_lock_init(&smcd->lgr_lock);
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index 2a5ed47c3e08..bd01c71b827a 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -50,29 +50,26 @@ static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = {
 
 static struct genl_family smc_pnet_nl_family;
 
-/**
- * struct smc_user_pnetentry - pnet identifier name entry for/from user
- * @list: List node.
- * @pnet_name: Pnet identifier name
- * @ndev: pointer to network device.
- * @smcibdev: Pointer to IB device.
- * @ib_port: Port of IB device.
- * @smcd_dev: Pointer to smcd device.
- */
-struct smc_user_pnetentry {
-	struct list_head list;
-	char pnet_name[SMC_MAX_PNETID_LEN + 1];
-	struct net_device *ndev;
-	struct smc_ib_device *smcibdev;
-	u8 ib_port;
-	struct smcd_dev *smcd_dev;
+enum smc_pnet_nametype {
+	SMC_PNET_ETH	= 1,
+	SMC_PNET_IB	= 2,
 };
 
 /* pnet entry stored in pnet table */
 struct smc_pnetentry {
 	struct list_head list;
 	char pnet_name[SMC_MAX_PNETID_LEN + 1];
-	struct net_device *ndev;
+	enum smc_pnet_nametype type;
+	union {
+		struct {
+			char eth_name[IFNAMSIZ + 1];
+			struct net_device *ndev;
+		};
+		struct {
+			char ib_name[IB_DEVICE_NAME_MAX + 1];
+			u8 ib_port;
+		};
+	};
 };
 
 /* Check if two given pnetids match */
@@ -106,14 +103,15 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name)
 	sn = net_generic(net, smc_net_id);
 	pnettable = &sn->pnettable;
 
-	/* remove netdevices */
+	/* remove table entry */
 	write_lock(&pnettable->lock);
 	list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist,
 				 list) {
 		if (!pnet_name ||
 		    smc_pnet_match(pnetelem->pnet_name, pnet_name)) {
 			list_del(&pnetelem->list);
-			dev_put(pnetelem->ndev);
+			if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev)
+				dev_put(pnetelem->ndev);
 			kfree(pnetelem);
 			rc = 0;
 		}
@@ -155,9 +153,9 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name)
 	return rc;
 }
 
-/* Remove a pnet entry mentioning a given network device from the pnet table.
+/* Add the reference to a given network device to the pnet table.
  */
-static int smc_pnet_remove_by_ndev(struct net_device *ndev)
+static int smc_pnet_add_by_ndev(struct net_device *ndev)
 {
 	struct smc_pnetentry *pnetelem, *tmp_pe;
 	struct smc_pnettable *pnettable;
@@ -171,10 +169,10 @@ static int smc_pnet_remove_by_ndev(struct net_device *ndev)
 
 	write_lock(&pnettable->lock);
 	list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) {
-		if (pnetelem->ndev == ndev) {
-			list_del(&pnetelem->list);
-			dev_put(pnetelem->ndev);
-			kfree(pnetelem);
+		if (pnetelem->type == SMC_PNET_ETH && !pnetelem->ndev &&
+		    !strncmp(pnetelem->eth_name, ndev->name, IFNAMSIZ)) {
+			dev_hold(ndev);
+			pnetelem->ndev = ndev;
 			rc = 0;
 			break;
 		}
@@ -183,80 +181,67 @@ static int smc_pnet_remove_by_ndev(struct net_device *ndev)
 	return rc;
 }
 
-/* Append a pnetid to the end of the pnet table if not already on this list.
+/* Remove the reference to a given network device from the pnet table.
  */
-static int smc_pnet_enter(struct smc_pnettable *pnettable,
-			  struct smc_user_pnetentry *new_pnetelem)
+static int smc_pnet_remove_by_ndev(struct net_device *ndev)
 {
-	u8 pnet_null[SMC_MAX_PNETID_LEN] = {0};
-	u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
-	struct smc_pnetentry *tmp_pnetelem;
-	struct smc_pnetentry *pnetelem;
-	bool new_smcddev = false;
-	struct net_device *ndev;
-	bool new_netdev = true;
-	bool new_ibdev = false;
-
-	if (new_pnetelem->smcibdev) {
-		struct smc_ib_device *ib_dev = new_pnetelem->smcibdev;
-		int ib_port = new_pnetelem->ib_port;
+	struct smc_pnetentry *pnetelem, *tmp_pe;
+	struct smc_pnettable *pnettable;
+	struct net *net = dev_net(ndev);
+	struct smc_net *sn;
+	int rc = -ENOENT;
 
-		spin_lock(&smc_ib_devices.lock);
-		if (smc_pnet_match(ib_dev->pnetid[ib_port - 1], pnet_null)) {
-			memcpy(ib_dev->pnetid[ib_port - 1],
-			       new_pnetelem->pnet_name, SMC_MAX_PNETID_LEN);
-			ib_dev->pnetid_by_user[ib_port - 1] = true;
-			new_ibdev = true;
-		}
-		spin_unlock(&smc_ib_devices.lock);
-	}
-	if (new_pnetelem->smcd_dev) {
-		struct smcd_dev *smcd_dev = new_pnetelem->smcd_dev;
+	/* get pnettable for namespace */
+	sn = net_generic(net, smc_net_id);
+	pnettable = &sn->pnettable;
 
-		spin_lock(&smcd_dev_list.lock);
-		if (smc_pnet_match(smcd_dev->pnetid, pnet_null)) {
-			memcpy(smcd_dev->pnetid, new_pnetelem->pnet_name,
-			       SMC_MAX_PNETID_LEN);
-			smcd_dev->pnetid_by_user = true;
-			new_smcddev = true;
+	write_lock(&pnettable->lock);
+	list_for_each_entry_safe(pnetelem, tmp_pe, &pnettable->pnetlist, list) {
+		if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev == ndev) {
+			dev_put(pnetelem->ndev);
+			pnetelem->ndev = NULL;
+			rc = 0;
+			break;
 		}
-		spin_unlock(&smcd_dev_list.lock);
 	}
+	write_unlock(&pnettable->lock);
+	return rc;
+}
 
-	if (!new_pnetelem->ndev)
-		return (new_ibdev || new_smcddev) ? 0 : -EEXIST;
+/* Apply pnetid to ib device when no pnetid is set.
+ */
+static bool smc_pnet_apply_ib(struct smc_ib_device *ib_dev, u8 ib_port,
+			      char *pnet_name)
+{
+	u8 pnet_null[SMC_MAX_PNETID_LEN] = {0};
+	bool applied = false;
 
-	/* check if (base) netdev already has a pnetid. If there is one, we do
-	 * not want to add a pnet table entry
-	 */
-	ndev = pnet_find_base_ndev(new_pnetelem->ndev);
-	if (!smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
-				    ndev_pnetid))
-		return (new_ibdev || new_smcddev) ? 0 : -EEXIST;
+	spin_lock(&smc_ib_devices.lock);
+	if (smc_pnet_match(ib_dev->pnetid[ib_port - 1], pnet_null)) {
+		memcpy(ib_dev->pnetid[ib_port - 1], pnet_name,
+		       SMC_MAX_PNETID_LEN);
+		ib_dev->pnetid_by_user[ib_port - 1] = true;
+		applied = true;
+	}
+	spin_unlock(&smc_ib_devices.lock);
+	return applied;
+}
 
-	/* add a new netdev entry to the pnet table if there isn't one */
-	tmp_pnetelem = kzalloc(sizeof(*pnetelem), GFP_KERNEL);
-	if (!tmp_pnetelem)
-		return -ENOMEM;
-	memcpy(tmp_pnetelem->pnet_name, new_pnetelem->pnet_name,
-	       SMC_MAX_PNETID_LEN);
-	tmp_pnetelem->ndev = new_pnetelem->ndev;
+/* Apply pnetid to smcd device when no pnetid is set.
+ */
+static bool smc_pnet_apply_smcd(struct smcd_dev *smcd_dev, char *pnet_name)
+{
+	u8 pnet_null[SMC_MAX_PNETID_LEN] = {0};
+	bool applied = false;
 
-	write_lock(&pnettable->lock);
-	list_for_each_entry(pnetelem, &pnettable->pnetlist, list) {
-		if (pnetelem->ndev == new_pnetelem->ndev)
-			new_netdev = false;
-	}
-	if (new_netdev) {
-		dev_hold(tmp_pnetelem->ndev);
-		list_add_tail(&tmp_pnetelem->list, &pnettable->pnetlist);
-		write_unlock(&pnettable->lock);
-	} else {
-		write_unlock(&pnettable->lock);
-		kfree(tmp_pnetelem);
+	spin_lock(&smcd_dev_list.lock);
+	if (smc_pnet_match(smcd_dev->pnetid, pnet_null)) {
+		memcpy(smcd_dev->pnetid, pnet_name, SMC_MAX_PNETID_LEN);
+		smcd_dev->pnetid_by_user = true;
+		applied = true;
 	}
-
-	return (new_netdev || new_ibdev || new_smcddev) ? 0 : -EEXIST;
+	spin_unlock(&smcd_dev_list.lock);
+	return applied;
 }
 
 /* The limit for pnetid is 16 characters.
@@ -323,57 +308,167 @@ out:
 	return smcd_dev;
 }
 
-/* Parse the supplied netlink attributes and fill a pnetentry structure.
- * For ethernet and infiniband device names verify that the devices exist.
+static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net,
+			    char *eth_name, char *pnet_name)
+{
+	struct smc_pnetentry *tmp_pe, *new_pe;
+	struct net_device *ndev, *base_ndev;
+	u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
+	bool new_netdev;
+	int rc;
+
+	/* check if (base) netdev already has a pnetid. If there is one, we do
+	 * not want to add a pnet table entry
+	 */
+	rc = -EEXIST;
+	ndev = dev_get_by_name(net, eth_name);	/* dev_hold() */
+	if (ndev) {
+		base_ndev = pnet_find_base_ndev(ndev);
+		if (!smc_pnetid_by_dev_port(base_ndev->dev.parent,
+					    base_ndev->dev_port, ndev_pnetid))
+			goto out_put;
+	}
+
+	/* add a new netdev entry to the pnet table if there isn't one */
+	rc = -ENOMEM;
+	new_pe = kzalloc(sizeof(*new_pe), GFP_KERNEL);
+	if (!new_pe)
+		goto out_put;
+	new_pe->type = SMC_PNET_ETH;
+	memcpy(new_pe->pnet_name, pnet_name, SMC_MAX_PNETID_LEN);
+	strncpy(new_pe->eth_name, eth_name, IFNAMSIZ);
+	new_pe->ndev = ndev;
+
+	rc = -EEXIST;
+	new_netdev = true;
+	write_lock(&pnettable->lock);
+	list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) {
+		if (tmp_pe->type == SMC_PNET_ETH &&
+		    !strncmp(tmp_pe->eth_name, eth_name, IFNAMSIZ)) {
+			new_netdev = false;
+			break;
+		}
+	}
+	if (new_netdev) {
+		list_add_tail(&new_pe->list, &pnettable->pnetlist);
+		write_unlock(&pnettable->lock);
+	} else {
+		write_unlock(&pnettable->lock);
+		kfree(new_pe);
+		goto out_put;
+	}
+	return 0;
+
+out_put:
+	if (ndev)
+		dev_put(ndev);
+	return rc;
+}
+
+static int smc_pnet_add_ib(struct smc_pnettable *pnettable, char *ib_name,
+			   u8 ib_port, char *pnet_name)
+{
+	struct smc_pnetentry *tmp_pe, *new_pe;
+	struct smc_ib_device *ib_dev;
+	bool smcddev_applied = true;
+	bool ibdev_applied = true;
+	struct smcd_dev *smcd_dev;
+	bool new_ibdev;
+
+	/* try to apply the pnetid to active devices */
+	ib_dev = smc_pnet_find_ib(ib_name);
+	if (ib_dev)
+		ibdev_applied = smc_pnet_apply_ib(ib_dev, ib_port, pnet_name);
+	smcd_dev = smc_pnet_find_smcd(ib_name);
+	if (smcd_dev)
+		smcddev_applied = smc_pnet_apply_smcd(smcd_dev, pnet_name);
+	/* Apply fails when a device has a hardware-defined pnetid set, do not
+	 * add a pnet table entry in that case.
+	 */
+	if (!ibdev_applied || !smcddev_applied)
+		return -EEXIST;
+
+	/* add a new ib entry to the pnet table if there isn't one */
+	new_pe = kzalloc(sizeof(*new_pe), GFP_KERNEL);
+	if (!new_pe)
+		return -ENOMEM;
+	new_pe->type = SMC_PNET_IB;
+	memcpy(new_pe->pnet_name, pnet_name, SMC_MAX_PNETID_LEN);
+	strncpy(new_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX);
+	new_pe->ib_port = ib_port;
+
+	new_ibdev = true;
+	write_lock(&pnettable->lock);
+	list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) {
+		if (tmp_pe->type == SMC_PNET_IB &&
+		    !strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX)) {
+			new_ibdev = false;
+			break;
+		}
+	}
+	if (new_ibdev) {
+		list_add_tail(&new_pe->list, &pnettable->pnetlist);
+		write_unlock(&pnettable->lock);
+	} else {
+		write_unlock(&pnettable->lock);
+		kfree(new_pe);
+	}
+	return (new_ibdev) ? 0 : -EEXIST;
+}
+
+/* Append a pnetid to the end of the pnet table if not already on this list.
  */
-static int smc_pnet_fill_entry(struct net *net,
-			       struct smc_user_pnetentry *pnetelem,
-			       struct nlattr *tb[])
+static int smc_pnet_enter(struct net *net, struct nlattr *tb[])
 {
-	char *string, *ibname;
+	char pnet_name[SMC_MAX_PNETID_LEN + 1];
+	struct smc_pnettable *pnettable;
+	bool new_netdev = false;
+	bool new_ibdev = false;
+	struct smc_net *sn;
+	u8 ibport = 1;
+	char *string;
 	int rc;
 
-	memset(pnetelem, 0, sizeof(*pnetelem));
-	INIT_LIST_HEAD(&pnetelem->list);
+	/* get pnettable for namespace */
+	sn = net_generic(net, smc_net_id);
+	pnettable = &sn->pnettable;
 
 	rc = -EINVAL;
 	if (!tb[SMC_PNETID_NAME])
 		goto error;
 	string = (char *)nla_data(tb[SMC_PNETID_NAME]);
-	if (!smc_pnetid_valid(string, pnetelem->pnet_name))
+	if (!smc_pnetid_valid(string, pnet_name))
 		goto error;
 
-	rc = -EINVAL;
 	if (tb[SMC_PNETID_ETHNAME]) {
 		string = (char *)nla_data(tb[SMC_PNETID_ETHNAME]);
-		pnetelem->ndev = dev_get_by_name(net, string);
-		if (!pnetelem->ndev)
+		rc = smc_pnet_add_eth(pnettable, net, string, pnet_name);
+		if (!rc)
+			new_netdev = true;
+		else if (rc != -EEXIST)
 			goto error;
 	}
 
 	/* if this is not the initial namespace, stop here */
 	if (net != &init_net)
-		return 0;
+		return new_netdev ? 0 : -EEXIST;
 
 	rc = -EINVAL;
 	if (tb[SMC_PNETID_IBNAME]) {
-		ibname = (char *)nla_data(tb[SMC_PNETID_IBNAME]);
-		ibname = strim(ibname);
-		pnetelem->smcibdev = smc_pnet_find_ib(ibname);
-		pnetelem->smcd_dev = smc_pnet_find_smcd(ibname);
-		if (!pnetelem->smcibdev && !pnetelem->smcd_dev)
-			goto error;
-		if (pnetelem->smcibdev) {
-			if (!tb[SMC_PNETID_IBPORT])
-				goto error;
-			pnetelem->ib_port = nla_get_u8(tb[SMC_PNETID_IBPORT]);
-			if (pnetelem->ib_port < 1 ||
-			    pnetelem->ib_port > SMC_MAX_PORTS)
+		string = (char *)nla_data(tb[SMC_PNETID_IBNAME]);
+		string = strim(string);
+		if (tb[SMC_PNETID_IBPORT]) {
+			ibport = nla_get_u8(tb[SMC_PNETID_IBPORT]);
+			if (ibport < 1 || ibport > SMC_MAX_PORTS)
 				goto error;
 		}
+		rc = smc_pnet_add_ib(pnettable, string, ibport, pnet_name);
+		if (!rc)
+			new_ibdev = true;
+		else if (rc != -EEXIST)
+			goto error;
 	}
-
-	return 0;
+	return (new_netdev || new_ibdev) ? 0 : -EEXIST;
 
 error:
 	return rc;
@@ -381,28 +476,22 @@ error:
 
 /* Convert an smc_pnetentry to a netlink attribute sequence */
 static int smc_pnet_set_nla(struct sk_buff *msg,
-			    struct smc_user_pnetentry *pnetelem)
+			    struct smc_pnetentry *pnetelem)
 {
 	if (nla_put_string(msg, SMC_PNETID_NAME, pnetelem->pnet_name))
 		return -1;
-	if (pnetelem->ndev) {
+	if (pnetelem->type == SMC_PNET_ETH) {
 		if (nla_put_string(msg, SMC_PNETID_ETHNAME,
-				   pnetelem->ndev->name))
+				   pnetelem->eth_name))
 			return -1;
 	} else {
 		if (nla_put_string(msg, SMC_PNETID_ETHNAME, "n/a"))
 			return -1;
 	}
-	if (pnetelem->smcibdev) {
-		if (nla_put_string(msg, SMC_PNETID_IBNAME,
-			dev_name(pnetelem->smcibdev->ibdev->dev.parent)) ||
+	if (pnetelem->type == SMC_PNET_IB) {
+		if (nla_put_string(msg, SMC_PNETID_IBNAME, pnetelem->ib_name) ||
 		    nla_put_u8(msg, SMC_PNETID_IBPORT, pnetelem->ib_port))
 			return -1;
-	} else if (pnetelem->smcd_dev) {
-		if (nla_put_string(msg, SMC_PNETID_IBNAME,
-				   dev_name(&pnetelem->smcd_dev->dev)) ||
-		    nla_put_u8(msg, SMC_PNETID_IBPORT, 1))
-			return -1;
 	} else {
 		if (nla_put_string(msg, SMC_PNETID_IBNAME, "n/a") ||
 		    nla_put_u8(msg, SMC_PNETID_IBPORT, 0xff))
@@ -415,21 +504,8 @@ static int smc_pnet_set_nla(struct sk_buff *msg,
 static int smc_pnet_add(struct sk_buff *skb, struct genl_info *info)
 {
 	struct net *net = genl_info_net(info);
-	struct smc_user_pnetentry pnetelem;
-	struct smc_pnettable *pnettable;
-	struct smc_net *sn;
-	int rc;
-
-	/* get pnettable for namespace */
-	sn = net_generic(net, smc_net_id);
-	pnettable = &sn->pnettable;
 
-	rc = smc_pnet_fill_entry(net, &pnetelem, info->attrs);
-	if (!rc)
-		rc = smc_pnet_enter(pnettable, &pnetelem);
-	if (pnetelem.ndev)
-		dev_put(pnetelem.ndev);
-	return rc;
+	return smc_pnet_enter(net, info->attrs);
 }
 
 static int smc_pnet_del(struct sk_buff *skb, struct genl_info *info)
@@ -450,7 +526,7 @@ static int smc_pnet_dump_start(struct netlink_callback *cb)
 
 static int smc_pnet_dumpinfo(struct sk_buff *skb,
 			     u32 portid, u32 seq, u32 flags,
-			     struct smc_user_pnetentry *pnetelem)
+			     struct smc_pnetentry *pnetelem)
 {
 	void *hdr;
 
@@ -469,91 +545,32 @@ static int smc_pnet_dumpinfo(struct sk_buff *skb,
 static int _smc_pnet_dump(struct net *net, struct sk_buff *skb, u32 portid,
 			  u32 seq, u8 *pnetid, int start_idx)
 {
-	struct smc_user_pnetentry tmp_entry;
 	struct smc_pnettable *pnettable;
 	struct smc_pnetentry *pnetelem;
-	struct smc_ib_device *ibdev;
-	struct smcd_dev *smcd_dev;
 	struct smc_net *sn;
 	int idx = 0;
-	int ibport;
 
 	/* get pnettable for namespace */
 	sn = net_generic(net, smc_net_id);
 	pnettable = &sn->pnettable;
 
-	/* dump netdevices */
+	/* dump pnettable entries */
 	read_lock(&pnettable->lock);
 	list_for_each_entry(pnetelem, &pnettable->pnetlist, list) {
 		if (pnetid && !smc_pnet_match(pnetelem->pnet_name, pnetid))
 			continue;
 		if (idx++ < start_idx)
 			continue;
-		memset(&tmp_entry, 0, sizeof(tmp_entry));
-		memcpy(&tmp_entry.pnet_name, pnetelem->pnet_name,
-		       SMC_MAX_PNETID_LEN);
-		tmp_entry.ndev = pnetelem->ndev;
+		/* if this is not the initial namespace, dump only netdev */
+		if (net != &init_net && pnetelem->type != SMC_PNET_ETH)
+			continue;
 		if (smc_pnet_dumpinfo(skb, portid, seq, NLM_F_MULTI,
-				      &tmp_entry)) {
+				      pnetelem)) {
 			--idx;
 			break;
 		}
 	}
 	read_unlock(&pnettable->lock);
-
-	/* if this is not the initial namespace, stop here */
-	if (net != &init_net)
-		return idx;
-
-	/* dump ib devices */
-	spin_lock(&smc_ib_devices.lock);
-	list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
-		for (ibport = 0; ibport < SMC_MAX_PORTS; ibport++) {
-			if (ibdev->pnetid_by_user[ibport]) {
-				if (pnetid &&
-				    !smc_pnet_match(ibdev->pnetid[ibport],
-						    pnetid))
-					continue;
-				if (idx++ < start_idx)
-					continue;
-				memset(&tmp_entry, 0, sizeof(tmp_entry));
-				memcpy(&tmp_entry.pnet_name,
-				       ibdev->pnetid[ibport],
-				       SMC_MAX_PNETID_LEN);
-				tmp_entry.smcibdev = ibdev;
-				tmp_entry.ib_port = ibport + 1;
-				if (smc_pnet_dumpinfo(skb, portid, seq,
-						      NLM_F_MULTI,
-						      &tmp_entry)) {
-					--idx;
-					break;
-				}
-			}
-		}
-	}
-	spin_unlock(&smc_ib_devices.lock);
-
-	/* dump smcd devices */
-	spin_lock(&smcd_dev_list.lock);
-	list_for_each_entry(smcd_dev, &smcd_dev_list.list, list) {
-		if (smcd_dev->pnetid_by_user) {
-			if (pnetid && !smc_pnet_match(smcd_dev->pnetid, pnetid))
-				continue;
-			if (idx++ < start_idx)
-				continue;
-			memset(&tmp_entry, 0, sizeof(tmp_entry));
-			memcpy(&tmp_entry.pnet_name, smcd_dev->pnetid,
-			       SMC_MAX_PNETID_LEN);
-			tmp_entry.smcd_dev = smcd_dev;
-			if (smc_pnet_dumpinfo(skb, portid, seq, NLM_F_MULTI,
-					      &tmp_entry)) {
-				--idx;
-				break;
-			}
-		}
-	}
-	spin_unlock(&smcd_dev_list.lock);
-
 	return idx;
 }
 
@@ -659,6 +676,9 @@ static int smc_pnet_netdev_event(struct notifier_block *this,
 	case NETDEV_UNREGISTER:
 		smc_pnet_remove_by_ndev(event_dev);
 		return NOTIFY_OK;
+	case NETDEV_REGISTER:
+		smc_pnet_add_by_ndev(event_dev);
+		return NOTIFY_OK;
 	default:
 		return NOTIFY_DONE;
 	}
@@ -744,7 +764,7 @@ static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev,
 
 	read_lock(&pnettable->lock);
 	list_for_each_entry(pnetelem, &pnettable->pnetlist, list) {
-		if (ndev == pnetelem->ndev) {
+		if (pnetelem->type == SMC_PNET_ETH && ndev == pnetelem->ndev) {
 			/* get pnetid of netdev device */
 			memcpy(pnetid, pnetelem->pnet_name, SMC_MAX_PNETID_LEN);
 			rc = 0;
@@ -755,6 +775,34 @@ static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev,
 	return rc;
 }
 
+/* find a roce device for the given pnetid */
+static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id,
+					  struct smc_init_info *ini)
+{
+	struct smc_ib_device *ibdev;
+	int i;
+
+	ini->ib_dev = NULL;
+	spin_lock(&smc_ib_devices.lock);
+	list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
+		for (i = 1; i <= SMC_MAX_PORTS; i++) {
+			if (!rdma_is_port_valid(ibdev->ibdev, i))
+				continue;
+			if (smc_pnet_match(ibdev->pnetid[i - 1], pnet_id) &&
+			    smc_ib_port_active(ibdev, i) &&
+			    !test_bit(i - 1, ibdev->ports_going_away) &&
+			    !smc_ib_determine_gid(ibdev, i, ini->vlan_id,
+						  ini->ib_gid, NULL)) {
+				ini->ib_dev = ibdev;
+				ini->ib_port = i;
+				goto out;
+			}
+		}
+	}
+out:
+	spin_unlock(&smc_ib_devices.lock);
+}
+
 /* if handshake network device belongs to a roce device, return its
  * IB device and port
  */
@@ -801,8 +849,6 @@ static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev,
 					 struct smc_init_info *ini)
 {
 	u8 ndev_pnetid[SMC_MAX_PNETID_LEN];
-	struct smc_ib_device *ibdev;
-	int i;
 
 	ndev = pnet_find_base_ndev(ndev);
 	if (smc_pnetid_by_dev_port(ndev->dev.parent, ndev->dev_port,
@@ -811,25 +857,7 @@ static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev,
 		smc_pnet_find_rdma_dev(ndev, ini);
 		return; /* pnetid could not be determined */
 	}
-
-	spin_lock(&smc_ib_devices.lock);
-	list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
-		for (i = 1; i <= SMC_MAX_PORTS; i++) {
-			if (!rdma_is_port_valid(ibdev->ibdev, i))
-				continue;
-			if (smc_pnet_match(ibdev->pnetid[i - 1], ndev_pnetid) &&
-			    smc_ib_port_active(ibdev, i) &&
-			    !test_bit(i - 1, ibdev->ports_going_away) &&
-			    !smc_ib_determine_gid(ibdev, i, ini->vlan_id,
-						  ini->ib_gid, NULL)) {
-				ini->ib_dev = ibdev;
-				ini->ib_port = i;
-				goto out;
-			}
-		}
-	}
-out:
-	spin_unlock(&smc_ib_devices.lock);
+	_smc_pnet_find_roce_by_pnetid(ndev_pnetid, ini);
 }
 
 static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev,
@@ -895,3 +923,60 @@ out_rel:
 out:
 	return;
 }
+
+/* Lookup and apply a pnet table entry to the given ib device.
+ */
+int smc_pnetid_by_table_ib(struct smc_ib_device *smcibdev, u8 ib_port)
+{
+	char *ib_name = smcibdev->ibdev->name;
+	struct smc_pnettable *pnettable;
+	struct smc_pnetentry *tmp_pe;
+	struct smc_net *sn;
+	int rc = -ENOENT;
+
+	/* get pnettable for init namespace */
+	sn = net_generic(&init_net, smc_net_id);
+	pnettable = &sn->pnettable;
+
+	read_lock(&pnettable->lock);
+	list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) {
+		if (tmp_pe->type == SMC_PNET_IB &&
+		    !strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX) &&
+		    tmp_pe->ib_port == ib_port) {
+			smc_pnet_apply_ib(smcibdev, ib_port, tmp_pe->pnet_name);
+			rc = 0;
+			break;
+		}
+	}
+	read_unlock(&pnettable->lock);
+
+	return rc;
+}
+
+/* Lookup and apply a pnet table entry to the given smcd device.
+ */
+int smc_pnetid_by_table_smcd(struct smcd_dev *smcddev)
+{
+	const char *ib_name = dev_name(&smcddev->dev);
+	struct smc_pnettable *pnettable;
+	struct smc_pnetentry *tmp_pe;
+	struct smc_net *sn;
+	int rc = -ENOENT;
+
+	/* get pnettable for init namespace */
+	sn = net_generic(&init_net, smc_net_id);
+	pnettable = &sn->pnettable;
+
+	read_lock(&pnettable->lock);
+	list_for_each_entry(tmp_pe, &pnettable->pnetlist, list) {
+		if (tmp_pe->type == SMC_PNET_IB &&
+		    !strncmp(tmp_pe->ib_name, ib_name, IB_DEVICE_NAME_MAX)) {
+			smc_pnet_apply_smcd(smcddev, tmp_pe->pnet_name);
+			rc = 0;
+			break;
+		}
+	}
+	read_unlock(&pnettable->lock);
+
+	return rc;
+}
diff --git a/net/smc/smc_pnet.h b/net/smc/smc_pnet.h
index 4564e4d69c2e..ea207f8fc6f7 100644
--- a/net/smc/smc_pnet.h
+++ b/net/smc/smc_pnet.h
@@ -46,5 +46,7 @@ void smc_pnet_exit(void);
 void smc_pnet_net_exit(struct net *net);
 void smc_pnet_find_roce_resource(struct sock *sk, struct smc_init_info *ini);
 void smc_pnet_find_ism_resource(struct sock *sk, struct smc_init_info *ini);
+int smc_pnetid_by_table_ib(struct smc_ib_device *smcibdev, u8 ib_port);
+int smc_pnetid_by_table_smcd(struct smcd_dev *smcd);
 
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From f3c1deddb21c19fb0eec3c61e80567ef4a79b58b Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Wed, 29 Apr 2020 17:10:38 +0200
Subject: net/smc: separate function for link initialization

Move the initialization of a new link into its own function, separate
from smc_lgr_create, to allow more than one link per link group.
Do an extra check if the IB device initialization was successful, and
reset the link state if any error occurs during smcr_link_init().
And rename two existing functions to use the prefix smcr_ to indicate
that they belong to the SMC-R code path.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_core.c | 114 +++++++++++++++++++++++++++++++----------------------
 1 file changed, 66 insertions(+), 48 deletions(-)

diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 824c5211b027..3bb45c33db22 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -179,7 +179,7 @@ void smc_lgr_cleanup_early(struct smc_connection *conn)
  * of the DELETE LINK sequence from server; or as server to
  * initiate the delete processing. See smc_llc_rx_delete_link().
  */
-static int smc_link_send_delete(struct smc_link *lnk, bool orderly)
+static int smcr_link_send_delete(struct smc_link *lnk, bool orderly)
 {
 	if (lnk->state == SMC_LNK_ACTIVE &&
 	    !smc_llc_send_delete_link(lnk, SMC_LLC_REQ, orderly)) {
@@ -219,7 +219,7 @@ static void smc_lgr_free_work(struct work_struct *work)
 	if (!lgr->is_smcd && !lgr->terminating)	{
 		/* try to send del link msg, on error free lgr immediately */
 		if (lnk->state == SMC_LNK_ACTIVE &&
-		    !smc_link_send_delete(lnk, true)) {
+		    !smcr_link_send_delete(lnk, true)) {
 			/* reschedule in case we never receive a response */
 			smc_lgr_schedule_free_work(lgr);
 			spin_unlock_bh(lgr_lock);
@@ -245,6 +245,64 @@ static void smc_lgr_terminate_work(struct work_struct *work)
 	__smc_lgr_terminate(lgr, true);
 }
 
+static int smcr_link_init(struct smc_link *lnk, u8 link_id,
+			  struct smc_init_info *ini)
+{
+	u8 rndvec[3];
+	int rc;
+
+	get_device(&ini->ib_dev->ibdev->dev);
+	atomic_inc(&ini->ib_dev->lnk_cnt);
+	lnk->state = SMC_LNK_ACTIVATING;
+	lnk->link_id = link_id;
+	lnk->smcibdev = ini->ib_dev;
+	lnk->ibport = ini->ib_port;
+	lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
+	if (!ini->ib_dev->initialized) {
+		rc = (int)smc_ib_setup_per_ibdev(ini->ib_dev);
+		if (rc)
+			goto out;
+	}
+	get_random_bytes(rndvec, sizeof(rndvec));
+	lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
+		(rndvec[2] << 16);
+	rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
+				  ini->vlan_id, lnk->gid, &lnk->sgid_index);
+	if (rc)
+		goto out;
+	rc = smc_llc_link_init(lnk);
+	if (rc)
+		goto out;
+	rc = smc_wr_alloc_link_mem(lnk);
+	if (rc)
+		goto clear_llc_lnk;
+	rc = smc_ib_create_protection_domain(lnk);
+	if (rc)
+		goto free_link_mem;
+	rc = smc_ib_create_queue_pair(lnk);
+	if (rc)
+		goto dealloc_pd;
+	rc = smc_wr_create_link(lnk);
+	if (rc)
+		goto destroy_qp;
+	return 0;
+
+destroy_qp:
+	smc_ib_destroy_queue_pair(lnk);
+dealloc_pd:
+	smc_ib_dealloc_protection_domain(lnk);
+free_link_mem:
+	smc_wr_free_link_mem(lnk);
+clear_llc_lnk:
+	smc_llc_link_clear(lnk);
+out:
+	put_device(&ini->ib_dev->ibdev->dev);
+	memset(lnk, 0, sizeof(struct smc_link));
+	if (!atomic_dec_return(&ini->ib_dev->lnk_cnt))
+		wake_up(&ini->ib_dev->lnks_deleted);
+	return rc;
+}
+
 /* create a new SMC link group */
 static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
 {
@@ -252,7 +310,6 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
 	struct list_head *lgr_list;
 	struct smc_link *lnk;
 	spinlock_t *lgr_lock;
-	u8 rndvec[3];
 	int rc = 0;
 	int i;
 
@@ -297,48 +354,17 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
 		atomic_inc(&ini->ism_dev->lgr_cnt);
 	} else {
 		/* SMC-R specific settings */
-		get_device(&ini->ib_dev->ibdev->dev);
 		lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
 		memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer,
 		       SMC_SYSTEMID_LEN);
 
 		lnk = &lgr->lnk[SMC_SINGLE_LINK];
-		/* initialize link */
-		lnk->state = SMC_LNK_ACTIVATING;
-		lnk->link_id = SMC_SINGLE_LINK;
-		lnk->smcibdev = ini->ib_dev;
-		lnk->ibport = ini->ib_port;
-		lgr_list = &smc_lgr_list.list;
-		lgr_lock = &smc_lgr_list.lock;
-		lnk->path_mtu =
-			ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
-		if (!ini->ib_dev->initialized)
-			smc_ib_setup_per_ibdev(ini->ib_dev);
-		get_random_bytes(rndvec, sizeof(rndvec));
-		lnk->psn_initial = rndvec[0] + (rndvec[1] << 8) +
-			(rndvec[2] << 16);
-		rc = smc_ib_determine_gid(lnk->smcibdev, lnk->ibport,
-					  ini->vlan_id, lnk->gid,
-					  &lnk->sgid_index);
-		if (rc)
-			goto free_lgr;
-		rc = smc_llc_link_init(lnk);
+		rc = smcr_link_init(lnk, SMC_SINGLE_LINK, ini);
 		if (rc)
 			goto free_lgr;
-		rc = smc_wr_alloc_link_mem(lnk);
-		if (rc)
-			goto clear_llc_lnk;
-		rc = smc_ib_create_protection_domain(lnk);
-		if (rc)
-			goto free_link_mem;
-		rc = smc_ib_create_queue_pair(lnk);
-		if (rc)
-			goto dealloc_pd;
-		rc = smc_wr_create_link(lnk);
-		if (rc)
-			goto destroy_qp;
+		lgr_list = &smc_lgr_list.list;
+		lgr_lock = &smc_lgr_list.lock;
 		atomic_inc(&lgr_cnt);
-		atomic_inc(&ini->ib_dev->lnk_cnt);
 	}
 	smc->conn.lgr = lgr;
 	spin_lock_bh(lgr_lock);
@@ -346,14 +372,6 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
 	spin_unlock_bh(lgr_lock);
 	return 0;
 
-destroy_qp:
-	smc_ib_destroy_queue_pair(lnk);
-dealloc_pd:
-	smc_ib_dealloc_protection_domain(lnk);
-free_link_mem:
-	smc_wr_free_link_mem(lnk);
-clear_llc_lnk:
-	smc_llc_link_clear(lnk);
 free_lgr:
 	kfree(lgr);
 ism_put_vlan:
@@ -417,7 +435,7 @@ void smc_conn_free(struct smc_connection *conn)
 		smc_lgr_schedule_free_work(lgr);
 }
 
-static void smc_link_clear(struct smc_link *lnk)
+static void smcr_link_clear(struct smc_link *lnk)
 {
 	lnk->peer_qpn = 0;
 	smc_llc_link_clear(lnk);
@@ -426,6 +444,7 @@ static void smc_link_clear(struct smc_link *lnk)
 	smc_ib_destroy_queue_pair(lnk);
 	smc_ib_dealloc_protection_domain(lnk);
 	smc_wr_free_link_mem(lnk);
+	put_device(&lnk->smcibdev->ibdev->dev);
 	if (!atomic_dec_return(&lnk->smcibdev->lnk_cnt))
 		wake_up(&lnk->smcibdev->lnks_deleted);
 }
@@ -512,8 +531,7 @@ static void smc_lgr_free(struct smc_link_group *lgr)
 		if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
 			wake_up(&lgr->smcd->lgrs_deleted);
 	} else {
-		smc_link_clear(&lgr->lnk[SMC_SINGLE_LINK]);
-		put_device(&lgr->lnk[SMC_SINGLE_LINK].smcibdev->ibdev->dev);
+		smcr_link_clear(&lgr->lnk[SMC_SINGLE_LINK]);
 		if (!atomic_dec_return(&lgr_cnt))
 			wake_up(&lgrs_deleted);
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 026c381fb4778d0d44af57b7ff674f31f04af221 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Wed, 29 Apr 2020 17:10:39 +0200
Subject: net/smc: introduce link_idx for link group array

The link_id is the index of the link in the array of the link group.
When a link in the array is reused for a new link, a different unique
link_id should be used, otherwise the index in the array could collide
with the previous link at this array position.
Use a new variable link_idx as array index, and make link_id an
increasing unique id value.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_core.c | 34 +++++++++++++++++++++++++++++-----
 net/smc/smc_core.h |  2 ++
 2 files changed, 31 insertions(+), 5 deletions(-)

diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 3bb45c33db22..d233112ced2a 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -245,8 +245,28 @@ static void smc_lgr_terminate_work(struct work_struct *work)
 	__smc_lgr_terminate(lgr, true);
 }
 
-static int smcr_link_init(struct smc_link *lnk, u8 link_id,
-			  struct smc_init_info *ini)
+/* return next unique link id for the lgr */
+static u8 smcr_next_link_id(struct smc_link_group *lgr)
+{
+	u8 link_id;
+	int i;
+
+	while (1) {
+		link_id = ++lgr->next_link_id;
+		if (!link_id)	/* skip zero as link_id */
+			link_id = ++lgr->next_link_id;
+		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+			if (lgr->lnk[i].state != SMC_LNK_INACTIVE &&
+			    lgr->lnk[i].link_id == link_id)
+				continue;
+		}
+		break;
+	}
+	return link_id;
+}
+
+static int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
+			  u8 link_idx, struct smc_init_info *ini)
 {
 	u8 rndvec[3];
 	int rc;
@@ -254,7 +274,8 @@ static int smcr_link_init(struct smc_link *lnk, u8 link_id,
 	get_device(&ini->ib_dev->ibdev->dev);
 	atomic_inc(&ini->ib_dev->lnk_cnt);
 	lnk->state = SMC_LNK_ACTIVATING;
-	lnk->link_id = link_id;
+	lnk->link_id = smcr_next_link_id(lgr);
+	lnk->link_idx = link_idx;
 	lnk->smcibdev = ini->ib_dev;
 	lnk->ibport = ini->ib_port;
 	lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
@@ -310,6 +331,7 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
 	struct list_head *lgr_list;
 	struct smc_link *lnk;
 	spinlock_t *lgr_lock;
+	u8 link_idx;
 	int rc = 0;
 	int i;
 
@@ -338,6 +360,7 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
 		INIT_LIST_HEAD(&lgr->sndbufs[i]);
 		INIT_LIST_HEAD(&lgr->rmbs[i]);
 	}
+	lgr->next_link_id = 0;
 	smc_lgr_list.num += SMC_LGR_NUM_INCR;
 	memcpy(&lgr->id, (u8 *)&smc_lgr_list.num, SMC_LGR_ID_SIZE);
 	INIT_DELAYED_WORK(&lgr->free_work, smc_lgr_free_work);
@@ -358,8 +381,9 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
 		memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer,
 		       SMC_SYSTEMID_LEN);
 
-		lnk = &lgr->lnk[SMC_SINGLE_LINK];
-		rc = smcr_link_init(lnk, SMC_SINGLE_LINK, ini);
+		link_idx = SMC_SINGLE_LINK;
+		lnk = &lgr->lnk[link_idx];
+		rc = smcr_link_init(lgr, lnk, link_idx, ini);
 		if (rc)
 			goto free_lgr;
 		lgr_list = &smc_lgr_list.list;
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 8041db20c753..c459b0639bf3 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -115,6 +115,7 @@ struct smc_link {
 	u8			peer_mac[ETH_ALEN];	/* = gid[8:10||13:15] */
 	u8			peer_gid[SMC_GID_SIZE];	/* gid of peer*/
 	u8			link_id;	/* unique # within link group */
+	u8			link_idx;	/* index in lgr link array */
 
 	enum smc_link_state	state;		/* state of link */
 	struct workqueue_struct *llc_wq;	/* single thread work queue */
@@ -222,6 +223,7 @@ struct smc_link_group {
 						/* remote addr/key pairs */
 			DECLARE_BITMAP(rtokens_used_mask, SMC_RMBS_PER_LGR_MAX);
 						/* used rtoken elements */
+			u8			next_link_id;
 		};
 		struct { /* SMC-D */
 			u64			peer_gid;
-- 
cgit v1.2.3-59-g8ed1b


From 387707fdf48697c667dd5e9715ac4feb41602d15 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Wed, 29 Apr 2020 17:10:40 +0200
Subject: net/smc: convert static link ID to dynamic references

As a preparation for the support of multiple links remove the usage of
a static link id (SMC_SINGLE_LINK) and allow dynamic link ids.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/af_smc.c   | 17 +++++---------
 net/smc/smc.h      |  1 +
 net/smc/smc_cdc.c  |  8 +++----
 net/smc/smc_clc.c  | 12 +++++-----
 net/smc/smc_core.c | 66 +++++++++++++++++++++++++++---------------------------
 net/smc/smc_core.h |  7 +++---
 net/smc/smc_ib.c   | 58 +++++++++++++++++++++++------------------------
 net/smc/smc_ib.h   | 10 ++++-----
 net/smc/smc_llc.c  | 10 ++++-----
 net/smc/smc_tx.c   | 13 ++++++-----
 10 files changed, 99 insertions(+), 103 deletions(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 6fd44bdb0fc3..6e4bad8c64a8 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -343,7 +343,7 @@ static int smc_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc,
 {
 	if (!rmb_desc->wr_reg) {
 		/* register memory region for new rmb */
-		if (smc_wr_reg_send(link, rmb_desc->mr_rx[SMC_SINGLE_LINK])) {
+		if (smc_wr_reg_send(link, rmb_desc->mr_rx[link->link_idx])) {
 			rmb_desc->regerr = 1;
 			return -EFAULT;
 		}
@@ -362,12 +362,10 @@ static int smc_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc,
 static int smc_clnt_conf_first_link(struct smc_sock *smc)
 {
 	struct net *net = sock_net(smc->clcsock->sk);
-	struct smc_link_group *lgr = smc->conn.lgr;
-	struct smc_link *link;
+	struct smc_link *link = smc->conn.lnk;
 	int rest;
 	int rc;
 
-	link = &lgr->lnk[SMC_SINGLE_LINK];
 	/* receive CONFIRM LINK request from server over RoCE fabric */
 	rest = wait_for_completion_interruptible_timeout(
 		&link->llc_confirm,
@@ -610,7 +608,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
 		mutex_unlock(&smc_client_lgr_pending);
 		return reason_code;
 	}
-	link = &smc->conn.lgr->lnk[SMC_SINGLE_LINK];
+	link = smc->conn.lnk;
 
 	smc_conn_save_peer_info(smc, aclc);
 
@@ -1002,13 +1000,10 @@ void smc_close_non_accepted(struct sock *sk)
 static int smc_serv_conf_first_link(struct smc_sock *smc)
 {
 	struct net *net = sock_net(smc->clcsock->sk);
-	struct smc_link_group *lgr = smc->conn.lgr;
-	struct smc_link *link;
+	struct smc_link *link = smc->conn.lnk;
 	int rest;
 	int rc;
 
-	link = &lgr->lnk[SMC_SINGLE_LINK];
-
 	if (smc_reg_rmb(link, smc->conn.rmb_desc, false))
 		return SMC_CLC_DECL_ERR_REGRMB;
 
@@ -1194,7 +1189,7 @@ static int smc_listen_ism_init(struct smc_sock *new_smc,
 /* listen worker: register buffers */
 static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact)
 {
-	struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
+	struct smc_link *link = new_smc->conn.lnk;
 
 	if (local_contact != SMC_FIRST_CONTACT) {
 		if (smc_reg_rmb(link, new_smc->conn.rmb_desc, true))
@@ -1210,7 +1205,7 @@ static int smc_listen_rdma_finish(struct smc_sock *new_smc,
 				  struct smc_clc_msg_accept_confirm *cclc,
 				  int local_contact)
 {
-	struct smc_link *link = &new_smc->conn.lgr->lnk[SMC_SINGLE_LINK];
+	struct smc_link *link = new_smc->conn.lnk;
 	int reason_code = 0;
 
 	if (local_contact == SMC_FIRST_CONTACT)
diff --git a/net/smc/smc.h b/net/smc/smc.h
index be11ba41190f..1a084afa7372 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -121,6 +121,7 @@ enum smc_urg_state {
 struct smc_connection {
 	struct rb_node		alert_node;
 	struct smc_link_group	*lgr;		/* link group of connection */
+	struct smc_link		*lnk;		/* assigned SMC-R link */
 	u32			alert_token_local; /* unique conn. id */
 	u8			peer_rmbe_idx;	/* from tcp handshake */
 	int			peer_rmbe_size;	/* size of peer rx buffer */
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index 164f1584861b..f64589d823aa 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -57,7 +57,7 @@ int smc_cdc_get_free_slot(struct smc_connection *conn,
 			  struct smc_rdma_wr **wr_rdma_buf,
 			  struct smc_cdc_tx_pend **pend)
 {
-	struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
+	struct smc_link *link = conn->lnk;
 	int rc;
 
 	rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf,
@@ -91,12 +91,10 @@ int smc_cdc_msg_send(struct smc_connection *conn,
 		     struct smc_wr_buf *wr_buf,
 		     struct smc_cdc_tx_pend *pend)
 {
+	struct smc_link *link = conn->lnk;
 	union smc_host_cursor cfed;
-	struct smc_link *link;
 	int rc;
 
-	link = &conn->lgr->lnk[SMC_SINGLE_LINK];
-
 	smc_cdc_add_pending_send(conn, pend);
 
 	conn->tx_cdc_seq++;
@@ -165,7 +163,7 @@ static void smc_cdc_tx_dismisser(struct smc_wr_tx_pend_priv *tx_pend)
 
 void smc_cdc_tx_dismiss_slots(struct smc_connection *conn)
 {
-	struct smc_link *link = &conn->lgr->lnk[SMC_SINGLE_LINK];
+	struct smc_link *link = conn->lnk;
 
 	smc_wr_tx_dismiss_slots(link, SMC_CDC_MSG_TYPE,
 				smc_cdc_tx_filter, smc_cdc_tx_dismisser,
diff --git a/net/smc/smc_clc.c b/net/smc/smc_clc.c
index ea0068f0173c..d5627df24215 100644
--- a/net/smc/smc_clc.c
+++ b/net/smc/smc_clc.c
@@ -496,7 +496,7 @@ int smc_clc_send_confirm(struct smc_sock *smc)
 		       sizeof(SMCD_EYECATCHER));
 	} else {
 		/* SMC-R specific settings */
-		link = &conn->lgr->lnk[SMC_SINGLE_LINK];
+		link = conn->lnk;
 		memcpy(cclc.hdr.eyecatcher, SMC_EYECATCHER,
 		       sizeof(SMC_EYECATCHER));
 		cclc.hdr.path = SMC_TYPE_R;
@@ -508,13 +508,13 @@ int smc_clc_send_confirm(struct smc_sock *smc)
 		       ETH_ALEN);
 		hton24(cclc.qpn, link->roce_qp->qp_num);
 		cclc.rmb_rkey =
-			htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
+			htonl(conn->rmb_desc->mr_rx[link->link_idx]->rkey);
 		cclc.rmbe_idx = 1; /* for now: 1 RMB = 1 RMBE */
 		cclc.rmbe_alert_token = htonl(conn->alert_token_local);
 		cclc.qp_mtu = min(link->path_mtu, link->peer_mtu);
 		cclc.rmbe_size = conn->rmbe_size_short;
 		cclc.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address
-				(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
+				(conn->rmb_desc->sgt[link->link_idx].sgl));
 		hton24(cclc.psn, link->psn_initial);
 		memcpy(cclc.smcr_trl.eyecatcher, SMC_EYECATCHER,
 		       sizeof(SMC_EYECATCHER));
@@ -572,7 +572,7 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact)
 		memcpy(aclc.hdr.eyecatcher, SMC_EYECATCHER,
 		       sizeof(SMC_EYECATCHER));
 		aclc.hdr.path = SMC_TYPE_R;
-		link = &conn->lgr->lnk[SMC_SINGLE_LINK];
+		link = conn->lnk;
 		memcpy(aclc.lcl.id_for_peer, local_systemid,
 		       sizeof(local_systemid));
 		memcpy(&aclc.lcl.gid, link->gid, SMC_GID_SIZE);
@@ -580,13 +580,13 @@ int smc_clc_send_accept(struct smc_sock *new_smc, int srv_first_contact)
 		       ETH_ALEN);
 		hton24(aclc.qpn, link->roce_qp->qp_num);
 		aclc.rmb_rkey =
-			htonl(conn->rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
+			htonl(conn->rmb_desc->mr_rx[link->link_idx]->rkey);
 		aclc.rmbe_idx = 1;		/* as long as 1 RMB = 1 RMBE */
 		aclc.rmbe_alert_token = htonl(conn->alert_token_local);
 		aclc.qp_mtu = link->path_mtu;
 		aclc.rmbe_size = conn->rmbe_size_short,
 		aclc.rmb_dma_addr = cpu_to_be64((u64)sg_dma_address
-				(conn->rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
+				(conn->rmb_desc->sgt[link->link_idx].sgl));
 		hton24(aclc.psn, link->psn_initial);
 		memcpy(aclc.smcr_trl.eyecatcher, SMC_EYECATCHER,
 		       sizeof(SMC_EYECATCHER));
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index d233112ced2a..1d695093f205 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -131,6 +131,11 @@ static void smc_lgr_register_conn(struct smc_connection *conn)
 			conn->alert_token_local = 0;
 	}
 	smc_lgr_add_alert_token(conn);
+
+	/* assign the new connection to a link */
+	if (!conn->lgr->is_smcd)
+		conn->lnk = &conn->lgr->lnk[SMC_SINGLE_LINK];
+
 	conn->lgr->conns_num++;
 }
 
@@ -275,6 +280,7 @@ static int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
 	atomic_inc(&ini->ib_dev->lnk_cnt);
 	lnk->state = SMC_LNK_ACTIVATING;
 	lnk->link_id = smcr_next_link_id(lgr);
+	lnk->lgr = lgr;
 	lnk->link_idx = link_idx;
 	lnk->smcibdev = ini->ib_dev;
 	lnk->ibport = ini->ib_port;
@@ -421,7 +427,7 @@ static void smc_buf_unuse(struct smc_connection *conn,
 			if (!lgr->is_smcd && !list_empty(&lgr->list)) {
 				/* unregister rmb with peer */
 				smc_llc_do_delete_rkey(
-						&lgr->lnk[SMC_SINGLE_LINK],
+						conn->lnk,
 						conn->rmb_desc);
 			}
 			conn->rmb_desc->used = 0;
@@ -479,16 +485,15 @@ static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
 	struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
 
 	if (is_rmb) {
-		if (buf_desc->mr_rx[SMC_SINGLE_LINK])
+		if (buf_desc->mr_rx[lnk->link_idx])
 			smc_ib_put_memory_region(
-					buf_desc->mr_rx[SMC_SINGLE_LINK]);
-		smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc,
-				    DMA_FROM_DEVICE);
+					buf_desc->mr_rx[lnk->link_idx]);
+		smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_FROM_DEVICE);
 	} else {
-		smc_ib_buf_unmap_sg(lnk->smcibdev, buf_desc,
-				    DMA_TO_DEVICE);
+		smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_TO_DEVICE);
 	}
-	sg_free_table(&buf_desc->sgt[SMC_SINGLE_LINK]);
+	sg_free_table(&buf_desc->sgt[lnk->link_idx]);
+
 	if (buf_desc->pages)
 		__free_pages(buf_desc->pages, buf_desc->order);
 	kfree(buf_desc);
@@ -1026,17 +1031,16 @@ static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
 
 	/* build the sg table from the pages */
 	lnk = &lgr->lnk[SMC_SINGLE_LINK];
-	rc = sg_alloc_table(&buf_desc->sgt[SMC_SINGLE_LINK], 1,
-			    GFP_KERNEL);
+	rc = sg_alloc_table(&buf_desc->sgt[lnk->link_idx], 1, GFP_KERNEL);
 	if (rc) {
 		smc_buf_free(lgr, is_rmb, buf_desc);
 		return ERR_PTR(rc);
 	}
-	sg_set_buf(buf_desc->sgt[SMC_SINGLE_LINK].sgl,
+	sg_set_buf(buf_desc->sgt[lnk->link_idx].sgl,
 		   buf_desc->cpu_addr, bufsize);
 
 	/* map sg table to DMA address */
-	rc = smc_ib_buf_map_sg(lnk->smcibdev, buf_desc,
+	rc = smc_ib_buf_map_sg(lnk, buf_desc,
 			       is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
 	/* SMC protocol depends on mapping to one DMA address only */
 	if (rc != 1)  {
@@ -1049,7 +1053,7 @@ static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
 		rc = smc_ib_get_memory_region(lnk->roce_pd,
 					      IB_ACCESS_REMOTE_WRITE |
 					      IB_ACCESS_LOCAL_WRITE,
-					      buf_desc);
+					      buf_desc, lnk->link_idx);
 		if (rc) {
 			smc_buf_free(lgr, is_rmb, buf_desc);
 			return ERR_PTR(rc);
@@ -1174,22 +1178,16 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
 
 void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn)
 {
-	struct smc_link_group *lgr = conn->lgr;
-
 	if (!conn->lgr || conn->lgr->is_smcd)
 		return;
-	smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
-			       conn->sndbuf_desc, DMA_TO_DEVICE);
+	smc_ib_sync_sg_for_cpu(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
 }
 
 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
 {
-	struct smc_link_group *lgr = conn->lgr;
-
 	if (!conn->lgr || conn->lgr->is_smcd)
 		return;
-	smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
-				  conn->sndbuf_desc, DMA_TO_DEVICE);
+	smc_ib_sync_sg_for_device(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
 }
 
 void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
@@ -1198,7 +1196,7 @@ void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
 
 	if (!conn->lgr || conn->lgr->is_smcd)
 		return;
-	smc_ib_sync_sg_for_cpu(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
+	smc_ib_sync_sg_for_cpu(&lgr->lnk[SMC_SINGLE_LINK],
 			       conn->rmb_desc, DMA_FROM_DEVICE);
 }
 
@@ -1208,7 +1206,7 @@ void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
 
 	if (!conn->lgr || conn->lgr->is_smcd)
 		return;
-	smc_ib_sync_sg_for_device(lgr->lnk[SMC_SINGLE_LINK].smcibdev,
+	smc_ib_sync_sg_for_device(&lgr->lnk[SMC_SINGLE_LINK],
 				  conn->rmb_desc, DMA_FROM_DEVICE);
 }
 
@@ -1245,15 +1243,16 @@ static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
 }
 
 /* add a new rtoken from peer */
-int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey)
+int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey)
 {
+	struct smc_link_group *lgr = smc_get_lgr(lnk);
 	u64 dma_addr = be64_to_cpu(nw_vaddr);
 	u32 rkey = ntohl(nw_rkey);
 	int i;
 
 	for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
-		if ((lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey) &&
-		    (lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr == dma_addr) &&
+		if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
+		    lgr->rtokens[i][lnk->link_idx].dma_addr == dma_addr &&
 		    test_bit(i, lgr->rtokens_used_mask)) {
 			/* already in list */
 			return i;
@@ -1262,22 +1261,23 @@ int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey)
 	i = smc_rmb_reserve_rtoken_idx(lgr);
 	if (i < 0)
 		return i;
-	lgr->rtokens[i][SMC_SINGLE_LINK].rkey = rkey;
-	lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = dma_addr;
+	lgr->rtokens[i][lnk->link_idx].rkey = rkey;
+	lgr->rtokens[i][lnk->link_idx].dma_addr = dma_addr;
 	return i;
 }
 
 /* delete an rtoken */
-int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey)
+int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey)
 {
+	struct smc_link_group *lgr = smc_get_lgr(lnk);
 	u32 rkey = ntohl(nw_rkey);
 	int i;
 
 	for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
-		if (lgr->rtokens[i][SMC_SINGLE_LINK].rkey == rkey &&
+		if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
 		    test_bit(i, lgr->rtokens_used_mask)) {
-			lgr->rtokens[i][SMC_SINGLE_LINK].rkey = 0;
-			lgr->rtokens[i][SMC_SINGLE_LINK].dma_addr = 0;
+			lgr->rtokens[i][lnk->link_idx].rkey = 0;
+			lgr->rtokens[i][lnk->link_idx].dma_addr = 0;
 
 			clear_bit(i, lgr->rtokens_used_mask);
 			return 0;
@@ -1290,7 +1290,7 @@ int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey)
 int smc_rmb_rtoken_handling(struct smc_connection *conn,
 			    struct smc_clc_msg_accept_confirm *clc)
 {
-	conn->rtoken_idx = smc_rtoken_add(conn->lgr, clc->rmb_dma_addr,
+	conn->rtoken_idx = smc_rtoken_add(conn->lnk, clc->rmb_dma_addr,
 					  clc->rmb_rkey);
 	if (conn->rtoken_idx < 0)
 		return conn->rtoken_idx;
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index c459b0639bf3..c71c35a3596c 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -116,6 +116,7 @@ struct smc_link {
 	u8			peer_gid[SMC_GID_SIZE];	/* gid of peer*/
 	u8			link_id;	/* unique # within link group */
 	u8			link_idx;	/* index in lgr link array */
+	struct smc_link_group	*lgr;		/* parent link group */
 
 	enum smc_link_state	state;		/* state of link */
 	struct workqueue_struct *llc_wq;	/* single thread work queue */
@@ -303,8 +304,8 @@ int smc_buf_create(struct smc_sock *smc, bool is_smcd);
 int smc_uncompress_bufsize(u8 compressed);
 int smc_rmb_rtoken_handling(struct smc_connection *conn,
 			    struct smc_clc_msg_accept_confirm *clc);
-int smc_rtoken_add(struct smc_link_group *lgr, __be64 nw_vaddr, __be32 nw_rkey);
-int smc_rtoken_delete(struct smc_link_group *lgr, __be32 nw_rkey);
+int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey);
+int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey);
 void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn);
 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn);
 void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn);
@@ -319,6 +320,6 @@ void smc_core_exit(void);
 
 static inline struct smc_link_group *smc_get_lgr(struct smc_link *link)
 {
-	return container_of(link, struct smc_link_group, lnk[SMC_SINGLE_LINK]);
+	return link->lgr;
 }
 #endif
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 440f9e319a38..c090678a3e5a 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -389,15 +389,15 @@ void smc_ib_put_memory_region(struct ib_mr *mr)
 	ib_dereg_mr(mr);
 }
 
-static int smc_ib_map_mr_sg(struct smc_buf_desc *buf_slot)
+static int smc_ib_map_mr_sg(struct smc_buf_desc *buf_slot, u8 link_idx)
 {
 	unsigned int offset = 0;
 	int sg_num;
 
 	/* map the largest prefix of a dma mapped SG list */
-	sg_num = ib_map_mr_sg(buf_slot->mr_rx[SMC_SINGLE_LINK],
-			      buf_slot->sgt[SMC_SINGLE_LINK].sgl,
-			      buf_slot->sgt[SMC_SINGLE_LINK].orig_nents,
+	sg_num = ib_map_mr_sg(buf_slot->mr_rx[link_idx],
+			      buf_slot->sgt[link_idx].sgl,
+			      buf_slot->sgt[link_idx].orig_nents,
 			      &offset, PAGE_SIZE);
 
 	return sg_num;
@@ -405,29 +405,29 @@ static int smc_ib_map_mr_sg(struct smc_buf_desc *buf_slot)
 
 /* Allocate a memory region and map the dma mapped SG list of buf_slot */
 int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags,
-			     struct smc_buf_desc *buf_slot)
+			     struct smc_buf_desc *buf_slot, u8 link_idx)
 {
-	if (buf_slot->mr_rx[SMC_SINGLE_LINK])
+	if (buf_slot->mr_rx[link_idx])
 		return 0; /* already done */
 
-	buf_slot->mr_rx[SMC_SINGLE_LINK] =
+	buf_slot->mr_rx[link_idx] =
 		ib_alloc_mr(pd, IB_MR_TYPE_MEM_REG, 1 << buf_slot->order);
-	if (IS_ERR(buf_slot->mr_rx[SMC_SINGLE_LINK])) {
+	if (IS_ERR(buf_slot->mr_rx[link_idx])) {
 		int rc;
 
-		rc = PTR_ERR(buf_slot->mr_rx[SMC_SINGLE_LINK]);
-		buf_slot->mr_rx[SMC_SINGLE_LINK] = NULL;
+		rc = PTR_ERR(buf_slot->mr_rx[link_idx]);
+		buf_slot->mr_rx[link_idx] = NULL;
 		return rc;
 	}
 
-	if (smc_ib_map_mr_sg(buf_slot) != 1)
+	if (smc_ib_map_mr_sg(buf_slot, link_idx) != 1)
 		return -EINVAL;
 
 	return 0;
 }
 
 /* synchronize buffer usage for cpu access */
-void smc_ib_sync_sg_for_cpu(struct smc_ib_device *smcibdev,
+void smc_ib_sync_sg_for_cpu(struct smc_link *lnk,
 			    struct smc_buf_desc *buf_slot,
 			    enum dma_data_direction data_direction)
 {
@@ -435,11 +435,11 @@ void smc_ib_sync_sg_for_cpu(struct smc_ib_device *smcibdev,
 	unsigned int i;
 
 	/* for now there is just one DMA address */
-	for_each_sg(buf_slot->sgt[SMC_SINGLE_LINK].sgl, sg,
-		    buf_slot->sgt[SMC_SINGLE_LINK].nents, i) {
+	for_each_sg(buf_slot->sgt[lnk->link_idx].sgl, sg,
+		    buf_slot->sgt[lnk->link_idx].nents, i) {
 		if (!sg_dma_len(sg))
 			break;
-		ib_dma_sync_single_for_cpu(smcibdev->ibdev,
+		ib_dma_sync_single_for_cpu(lnk->smcibdev->ibdev,
 					   sg_dma_address(sg),
 					   sg_dma_len(sg),
 					   data_direction);
@@ -447,7 +447,7 @@ void smc_ib_sync_sg_for_cpu(struct smc_ib_device *smcibdev,
 }
 
 /* synchronize buffer usage for device access */
-void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev,
+void smc_ib_sync_sg_for_device(struct smc_link *lnk,
 			       struct smc_buf_desc *buf_slot,
 			       enum dma_data_direction data_direction)
 {
@@ -455,11 +455,11 @@ void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev,
 	unsigned int i;
 
 	/* for now there is just one DMA address */
-	for_each_sg(buf_slot->sgt[SMC_SINGLE_LINK].sgl, sg,
-		    buf_slot->sgt[SMC_SINGLE_LINK].nents, i) {
+	for_each_sg(buf_slot->sgt[lnk->link_idx].sgl, sg,
+		    buf_slot->sgt[lnk->link_idx].nents, i) {
 		if (!sg_dma_len(sg))
 			break;
-		ib_dma_sync_single_for_device(smcibdev->ibdev,
+		ib_dma_sync_single_for_device(lnk->smcibdev->ibdev,
 					      sg_dma_address(sg),
 					      sg_dma_len(sg),
 					      data_direction);
@@ -467,15 +467,15 @@ void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev,
 }
 
 /* Map a new TX or RX buffer SG-table to DMA */
-int smc_ib_buf_map_sg(struct smc_ib_device *smcibdev,
+int smc_ib_buf_map_sg(struct smc_link *lnk,
 		      struct smc_buf_desc *buf_slot,
 		      enum dma_data_direction data_direction)
 {
 	int mapped_nents;
 
-	mapped_nents = ib_dma_map_sg(smcibdev->ibdev,
-				     buf_slot->sgt[SMC_SINGLE_LINK].sgl,
-				     buf_slot->sgt[SMC_SINGLE_LINK].orig_nents,
+	mapped_nents = ib_dma_map_sg(lnk->smcibdev->ibdev,
+				     buf_slot->sgt[lnk->link_idx].sgl,
+				     buf_slot->sgt[lnk->link_idx].orig_nents,
 				     data_direction);
 	if (!mapped_nents)
 		return -ENOMEM;
@@ -483,18 +483,18 @@ int smc_ib_buf_map_sg(struct smc_ib_device *smcibdev,
 	return mapped_nents;
 }
 
-void smc_ib_buf_unmap_sg(struct smc_ib_device *smcibdev,
+void smc_ib_buf_unmap_sg(struct smc_link *lnk,
 			 struct smc_buf_desc *buf_slot,
 			 enum dma_data_direction data_direction)
 {
-	if (!buf_slot->sgt[SMC_SINGLE_LINK].sgl->dma_address)
+	if (!buf_slot->sgt[lnk->link_idx].sgl->dma_address)
 		return; /* already unmapped */
 
-	ib_dma_unmap_sg(smcibdev->ibdev,
-			buf_slot->sgt[SMC_SINGLE_LINK].sgl,
-			buf_slot->sgt[SMC_SINGLE_LINK].orig_nents,
+	ib_dma_unmap_sg(lnk->smcibdev->ibdev,
+			buf_slot->sgt[lnk->link_idx].sgl,
+			buf_slot->sgt[lnk->link_idx].orig_nents,
 			data_direction);
-	buf_slot->sgt[SMC_SINGLE_LINK].sgl->dma_address = 0;
+	buf_slot->sgt[lnk->link_idx].sgl->dma_address = 0;
 }
 
 long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev)
diff --git a/net/smc/smc_ib.h b/net/smc/smc_ib.h
index 5c2b115d36da..e6a696ae15f3 100644
--- a/net/smc/smc_ib.h
+++ b/net/smc/smc_ib.h
@@ -59,10 +59,10 @@ struct smc_link;
 int smc_ib_register_client(void) __init;
 void smc_ib_unregister_client(void);
 bool smc_ib_port_active(struct smc_ib_device *smcibdev, u8 ibport);
-int smc_ib_buf_map_sg(struct smc_ib_device *smcibdev,
+int smc_ib_buf_map_sg(struct smc_link *lnk,
 		      struct smc_buf_desc *buf_slot,
 		      enum dma_data_direction data_direction);
-void smc_ib_buf_unmap_sg(struct smc_ib_device *smcibdev,
+void smc_ib_buf_unmap_sg(struct smc_link *lnk,
 			 struct smc_buf_desc *buf_slot,
 			 enum dma_data_direction data_direction);
 void smc_ib_dealloc_protection_domain(struct smc_link *lnk);
@@ -74,12 +74,12 @@ int smc_ib_modify_qp_rts(struct smc_link *lnk);
 int smc_ib_modify_qp_reset(struct smc_link *lnk);
 long smc_ib_setup_per_ibdev(struct smc_ib_device *smcibdev);
 int smc_ib_get_memory_region(struct ib_pd *pd, int access_flags,
-			     struct smc_buf_desc *buf_slot);
+			     struct smc_buf_desc *buf_slot, u8 link_idx);
 void smc_ib_put_memory_region(struct ib_mr *mr);
-void smc_ib_sync_sg_for_cpu(struct smc_ib_device *smcibdev,
+void smc_ib_sync_sg_for_cpu(struct smc_link *lnk,
 			    struct smc_buf_desc *buf_slot,
 			    enum dma_data_direction data_direction);
-void smc_ib_sync_sg_for_device(struct smc_ib_device *smcibdev,
+void smc_ib_sync_sg_for_device(struct smc_link *lnk,
 			       struct smc_buf_desc *buf_slot,
 			       enum dma_data_direction data_direction);
 int smc_ib_determine_gid(struct smc_ib_device *smcibdev, u8 ibport,
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 0e52aab53d97..34d0752ba6af 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -231,9 +231,9 @@ static int smc_llc_send_confirm_rkey(struct smc_link *link,
 	rkeyllc->hd.common.type = SMC_LLC_CONFIRM_RKEY;
 	rkeyllc->hd.length = sizeof(struct smc_llc_msg_confirm_rkey);
 	rkeyllc->rtoken[0].rmb_key =
-		htonl(rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
+		htonl(rmb_desc->mr_rx[link->link_idx]->rkey);
 	rkeyllc->rtoken[0].rmb_vaddr = cpu_to_be64(
-		(u64)sg_dma_address(rmb_desc->sgt[SMC_SINGLE_LINK].sgl));
+		(u64)sg_dma_address(rmb_desc->sgt[link->link_idx].sgl));
 	/* send llc message */
 	rc = smc_wr_tx_send(link, pend);
 	return rc;
@@ -256,7 +256,7 @@ static int smc_llc_send_delete_rkey(struct smc_link *link,
 	rkeyllc->hd.common.type = SMC_LLC_DELETE_RKEY;
 	rkeyllc->hd.length = sizeof(struct smc_llc_msg_delete_rkey);
 	rkeyllc->num_rkeys = 1;
-	rkeyllc->rkey[0] = htonl(rmb_desc->mr_rx[SMC_SINGLE_LINK]->rkey);
+	rkeyllc->rkey[0] = htonl(rmb_desc->mr_rx[link->link_idx]->rkey);
 	/* send llc message */
 	rc = smc_wr_tx_send(link, pend);
 	return rc;
@@ -501,7 +501,7 @@ static void smc_llc_rx_confirm_rkey(struct smc_link *link,
 					    SMC_LLC_FLAG_RKEY_NEG;
 		complete(&link->llc_confirm_rkey);
 	} else {
-		rc = smc_rtoken_add(smc_get_lgr(link),
+		rc = smc_rtoken_add(link,
 				    llc->rtoken[0].rmb_vaddr,
 				    llc->rtoken[0].rmb_key);
 
@@ -539,7 +539,7 @@ static void smc_llc_rx_delete_rkey(struct smc_link *link,
 	} else {
 		max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX);
 		for (i = 0; i < max; i++) {
-			if (smc_rtoken_delete(smc_get_lgr(link), llc->rkey[i]))
+			if (smc_rtoken_delete(link, llc->rkey[i]))
 				err_mask |= 1 << (SMC_LLC_DEL_RKEY_MAX - 1 - i);
 		}
 
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 9f1ade86d70e..d74bfe6a90f1 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -269,19 +269,18 @@ static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset,
 			     int num_sges, struct ib_rdma_wr *rdma_wr)
 {
 	struct smc_link_group *lgr = conn->lgr;
-	struct smc_link *link;
+	struct smc_link *link = conn->lnk;
 	int rc;
 
-	link = &lgr->lnk[SMC_SINGLE_LINK];
 	rdma_wr->wr.wr_id = smc_wr_tx_get_next_wr_id(link);
 	rdma_wr->wr.num_sge = num_sges;
 	rdma_wr->remote_addr =
-		lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].dma_addr +
+		lgr->rtokens[conn->rtoken_idx][link->link_idx].dma_addr +
 		/* RMBE within RMB */
 		conn->tx_off +
 		/* offset within RMBE */
 		peer_rmbe_offset;
-	rdma_wr->rkey = lgr->rtokens[conn->rtoken_idx][SMC_SINGLE_LINK].rkey;
+	rdma_wr->rkey = lgr->rtokens[conn->rtoken_idx][link->link_idx].rkey;
 	rc = ib_post_send(link->roce_qp, &rdma_wr->wr, NULL);
 	if (rc)
 		smc_lgr_terminate_sched(lgr);
@@ -310,8 +309,10 @@ static int smcr_tx_rdma_writes(struct smc_connection *conn, size_t len,
 			       size_t dst_off, size_t dst_len,
 			       struct smc_rdma_wr *wr_rdma_buf)
 {
+	struct smc_link *link = conn->lnk;
+
 	dma_addr_t dma_addr =
-		sg_dma_address(conn->sndbuf_desc->sgt[SMC_SINGLE_LINK].sgl);
+		sg_dma_address(conn->sndbuf_desc->sgt[link->link_idx].sgl);
 	int src_len_sum = src_len, dst_len_sum = dst_len;
 	int sent_count = src_off;
 	int srcchunk, dstchunk;
@@ -507,7 +508,7 @@ static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
 	if (!pflags->urg_data_present) {
 		rc = smc_tx_rdma_writes(conn, wr_rdma_buf);
 		if (rc) {
-			smc_wr_tx_put_slot(&conn->lgr->lnk[SMC_SINGLE_LINK],
+			smc_wr_tx_put_slot(conn->lnk,
 					   (struct smc_wr_tx_pend_priv *)pend);
 			goto out_unlock;
 		}
-- 
cgit v1.2.3-59-g8ed1b


From b9247544c1bccfe1b74ddf1dade719a69946cbb1 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Wed, 29 Apr 2020 17:10:41 +0200
Subject: net/smc: convert static link ID instances to support multiple links

As a preparation for the support of multiple links remove the usage of
a static link id (SMC_SINGLE_LINK) and allow dynamic link ids.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/af_smc.c   |  54 ++++++---
 net/smc/smc_clc.h  |   1 +
 net/smc/smc_core.c | 332 ++++++++++++++++++++++++++++++++++++-----------------
 net/smc/smc_core.h |  37 +++---
 net/smc/smc_llc.c  |   2 +
 5 files changed, 291 insertions(+), 135 deletions(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 6e4bad8c64a8..890dc6422f8c 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -338,28 +338,48 @@ static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
 }
 
 /* register a new rmb, send confirm_rkey msg to register with peer */
-static int smc_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc,
-		       bool conf_rkey)
+static int smcr_link_reg_rmb(struct smc_link *link,
+			     struct smc_buf_desc *rmb_desc, bool conf_rkey)
 {
-	if (!rmb_desc->wr_reg) {
+	if (!rmb_desc->is_reg_mr[link->link_idx]) {
 		/* register memory region for new rmb */
 		if (smc_wr_reg_send(link, rmb_desc->mr_rx[link->link_idx])) {
-			rmb_desc->regerr = 1;
+			rmb_desc->is_reg_err = true;
 			return -EFAULT;
 		}
-		rmb_desc->wr_reg = 1;
+		rmb_desc->is_reg_mr[link->link_idx] = true;
 	}
 	if (!conf_rkey)
 		return 0;
+
 	/* exchange confirm_rkey msg with peer */
-	if (smc_llc_do_confirm_rkey(link, rmb_desc)) {
-		rmb_desc->regerr = 1;
-		return -EFAULT;
+	if (!rmb_desc->is_conf_rkey) {
+		if (smc_llc_do_confirm_rkey(link, rmb_desc)) {
+			rmb_desc->is_reg_err = true;
+			return -EFAULT;
+		}
+		rmb_desc->is_conf_rkey = true;
+	}
+	return 0;
+}
+
+/* register the new rmb on all links */
+static int smcr_lgr_reg_rmbs(struct smc_link_group *lgr,
+			     struct smc_buf_desc *rmb_desc)
+{
+	int i, rc;
+
+	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+		if (lgr->lnk[i].state != SMC_LNK_ACTIVE)
+			continue;
+		rc = smcr_link_reg_rmb(&lgr->lnk[i], rmb_desc, true);
+		if (rc)
+			return rc;
 	}
 	return 0;
 }
 
-static int smc_clnt_conf_first_link(struct smc_sock *smc)
+static int smcr_clnt_conf_first_link(struct smc_sock *smc)
 {
 	struct net *net = sock_net(smc->clcsock->sk);
 	struct smc_link *link = smc->conn.lnk;
@@ -387,7 +407,7 @@ static int smc_clnt_conf_first_link(struct smc_sock *smc)
 
 	smc_wr_remember_qp_attr(link);
 
-	if (smc_reg_rmb(link, smc->conn.rmb_desc, false))
+	if (smcr_link_reg_rmb(link, smc->conn.rmb_desc, false))
 		return SMC_CLC_DECL_ERR_REGRMB;
 
 	/* send CONFIRM LINK response over RoCE fabric */
@@ -632,7 +652,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
 			return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RDYLNK,
 						 ini->cln_first_contact);
 	} else {
-		if (smc_reg_rmb(link, smc->conn.rmb_desc, true))
+		if (smcr_lgr_reg_rmbs(smc->conn.lgr, smc->conn.rmb_desc))
 			return smc_connect_abort(smc, SMC_CLC_DECL_ERR_REGRMB,
 						 ini->cln_first_contact);
 	}
@@ -647,7 +667,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
 
 	if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
 		/* QP confirmation over RoCE fabric */
-		reason_code = smc_clnt_conf_first_link(smc);
+		reason_code = smcr_clnt_conf_first_link(smc);
 		if (reason_code)
 			return smc_connect_abort(smc, reason_code,
 						 ini->cln_first_contact);
@@ -997,14 +1017,14 @@ void smc_close_non_accepted(struct sock *sk)
 	sock_put(sk); /* final sock_put */
 }
 
-static int smc_serv_conf_first_link(struct smc_sock *smc)
+static int smcr_serv_conf_first_link(struct smc_sock *smc)
 {
 	struct net *net = sock_net(smc->clcsock->sk);
 	struct smc_link *link = smc->conn.lnk;
 	int rest;
 	int rc;
 
-	if (smc_reg_rmb(link, smc->conn.rmb_desc, false))
+	if (smcr_link_reg_rmb(link, smc->conn.rmb_desc, false))
 		return SMC_CLC_DECL_ERR_REGRMB;
 
 	/* send CONFIRM LINK request to client over the RoCE fabric */
@@ -1189,10 +1209,10 @@ static int smc_listen_ism_init(struct smc_sock *new_smc,
 /* listen worker: register buffers */
 static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact)
 {
-	struct smc_link *link = new_smc->conn.lnk;
+	struct smc_connection *conn = &new_smc->conn;
 
 	if (local_contact != SMC_FIRST_CONTACT) {
-		if (smc_reg_rmb(link, new_smc->conn.rmb_desc, true))
+		if (smcr_lgr_reg_rmbs(conn->lgr, conn->rmb_desc))
 			return SMC_CLC_DECL_ERR_REGRMB;
 	}
 	smc_rmb_sync_sg_for_device(&new_smc->conn);
@@ -1222,7 +1242,7 @@ static int smc_listen_rdma_finish(struct smc_sock *new_smc,
 			goto decline;
 		}
 		/* QP confirmation over RoCE fabric */
-		reason_code = smc_serv_conf_first_link(new_smc);
+		reason_code = smcr_serv_conf_first_link(new_smc);
 		if (reason_code)
 			goto decline;
 	}
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index ca209272e5fa..4f2e150a2be1 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -44,6 +44,7 @@
 #define SMC_CLC_DECL_DIFFPREFIX	0x03070000  /* IP prefix / subnet mismatch    */
 #define SMC_CLC_DECL_GETVLANERR	0x03080000  /* err to get vlan id of ip device*/
 #define SMC_CLC_DECL_ISMVLANERR	0x03090000  /* err to reg vlan id on ism dev  */
+#define SMC_CLC_DECL_NOACTLINK	0x030a0000  /* no active smc-r link in lgr    */
 #define SMC_CLC_DECL_SYNCERR	0x04000000  /* synchronization error          */
 #define SMC_CLC_DECL_PEERDECL	0x05000000  /* peer declined during handshake */
 #define SMC_CLC_DECL_INTERR	0x09990000  /* internal error		      */
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 1d695093f205..5df3f8f41d19 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -116,7 +116,7 @@ static void smc_lgr_add_alert_token(struct smc_connection *conn)
  * Requires @conns_lock
  * Note that '0' is a reserved value and not assigned.
  */
-static void smc_lgr_register_conn(struct smc_connection *conn)
+static int smc_lgr_register_conn(struct smc_connection *conn)
 {
 	struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
 	static atomic_t nexttoken = ATOMIC_INIT(0);
@@ -133,10 +133,22 @@ static void smc_lgr_register_conn(struct smc_connection *conn)
 	smc_lgr_add_alert_token(conn);
 
 	/* assign the new connection to a link */
-	if (!conn->lgr->is_smcd)
-		conn->lnk = &conn->lgr->lnk[SMC_SINGLE_LINK];
+	if (!conn->lgr->is_smcd) {
+		struct smc_link *lnk;
+		int i;
 
+		/* tbd - link balancing */
+		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+			lnk = &conn->lgr->lnk[i];
+			if (lnk->state == SMC_LNK_ACTIVATING ||
+			    lnk->state == SMC_LNK_ACTIVE)
+				conn->lnk = lnk;
+		}
+		if (!conn->lnk)
+			return SMC_CLC_DECL_NOACTLINK;
+	}
 	conn->lgr->conns_num++;
+	return 0;
 }
 
 /* Unregister connection and reset the alert token of the given connection<
@@ -202,8 +214,8 @@ static void smc_lgr_free_work(struct work_struct *work)
 						  struct smc_link_group,
 						  free_work);
 	spinlock_t *lgr_lock;
-	struct smc_link *lnk;
 	bool conns;
+	int i;
 
 	smc_lgr_list_head(lgr, &lgr_lock);
 	spin_lock_bh(lgr_lock);
@@ -220,25 +232,38 @@ static void smc_lgr_free_work(struct work_struct *work)
 	}
 	list_del_init(&lgr->list); /* remove from smc_lgr_list */
 
-	lnk = &lgr->lnk[SMC_SINGLE_LINK];
 	if (!lgr->is_smcd && !lgr->terminating)	{
-		/* try to send del link msg, on error free lgr immediately */
-		if (lnk->state == SMC_LNK_ACTIVE &&
-		    !smcr_link_send_delete(lnk, true)) {
-			/* reschedule in case we never receive a response */
-			smc_lgr_schedule_free_work(lgr);
+		bool do_wait = false;
+
+		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+			struct smc_link *lnk = &lgr->lnk[i];
+			/* try to send del link msg, on err free immediately */
+			if (lnk->state == SMC_LNK_ACTIVE &&
+			    !smcr_link_send_delete(lnk, true)) {
+				/* reschedule in case we never receive a resp */
+				smc_lgr_schedule_free_work(lgr);
+				do_wait = true;
+			}
+		}
+		if (do_wait) {
 			spin_unlock_bh(lgr_lock);
-			return;
+			return; /* wait for resp, see smc_llc_rx_delete_link */
 		}
 	}
 	lgr->freeing = 1; /* this instance does the freeing, no new schedule */
 	spin_unlock_bh(lgr_lock);
 	cancel_delayed_work(&lgr->free_work);
 
-	if (!lgr->is_smcd && lnk->state != SMC_LNK_INACTIVE)
-		smc_llc_link_inactive(lnk);
 	if (lgr->is_smcd && !lgr->terminating)
 		smc_ism_signal_shutdown(lgr);
+	if (!lgr->is_smcd) {
+		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+			struct smc_link *lnk = &lgr->lnk[i];
+
+			if (lnk->state != SMC_LNK_INACTIVE)
+				smc_llc_link_inactive(lnk);
+		}
+	}
 	smc_lgr_free(lgr);
 }
 
@@ -417,29 +442,37 @@ out:
 	return rc;
 }
 
+static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc,
+			   struct smc_link *lnk)
+{
+	struct smc_link_group *lgr = lnk->lgr;
+
+	if (rmb_desc->is_conf_rkey && !list_empty(&lgr->list)) {
+		/* unregister rmb with peer */
+		smc_llc_do_delete_rkey(lnk, rmb_desc);
+		rmb_desc->is_conf_rkey = false;
+	}
+	if (rmb_desc->is_reg_err) {
+		/* buf registration failed, reuse not possible */
+		write_lock_bh(&lgr->rmbs_lock);
+		list_del(&rmb_desc->list);
+		write_unlock_bh(&lgr->rmbs_lock);
+
+		smc_buf_free(lgr, true, rmb_desc);
+	} else {
+		rmb_desc->used = 0;
+	}
+}
+
 static void smc_buf_unuse(struct smc_connection *conn,
 			  struct smc_link_group *lgr)
 {
 	if (conn->sndbuf_desc)
 		conn->sndbuf_desc->used = 0;
-	if (conn->rmb_desc) {
-		if (!conn->rmb_desc->regerr) {
-			if (!lgr->is_smcd && !list_empty(&lgr->list)) {
-				/* unregister rmb with peer */
-				smc_llc_do_delete_rkey(
-						conn->lnk,
-						conn->rmb_desc);
-			}
-			conn->rmb_desc->used = 0;
-		} else {
-			/* buf registration failed, reuse not possible */
-			write_lock_bh(&lgr->rmbs_lock);
-			list_del(&conn->rmb_desc->list);
-			write_unlock_bh(&lgr->rmbs_lock);
-
-			smc_buf_free(lgr, true, conn->rmb_desc);
-		}
-	}
+	if (conn->rmb_desc && lgr->is_smcd)
+		conn->rmb_desc->used = 0;
+	else if (conn->rmb_desc)
+		smcr_buf_unuse(conn->rmb_desc, conn->lnk);
 }
 
 /* remove a finished connection from its link group */
@@ -467,6 +500,8 @@ void smc_conn_free(struct smc_connection *conn)
 
 static void smcr_link_clear(struct smc_link *lnk)
 {
+	if (lnk->peer_qpn == 0)
+		return;
 	lnk->peer_qpn = 0;
 	smc_llc_link_clear(lnk);
 	smc_ib_modify_qp_reset(lnk);
@@ -482,17 +517,23 @@ static void smcr_link_clear(struct smc_link *lnk)
 static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
 			  struct smc_buf_desc *buf_desc)
 {
-	struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
+	struct smc_link *lnk;
+	int i;
 
-	if (is_rmb) {
-		if (buf_desc->mr_rx[lnk->link_idx])
-			smc_ib_put_memory_region(
-					buf_desc->mr_rx[lnk->link_idx]);
-		smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_FROM_DEVICE);
-	} else {
-		smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_TO_DEVICE);
+	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+		lnk = &lgr->lnk[i];
+		if (!buf_desc->is_map_ib[lnk->link_idx])
+			continue;
+		if (is_rmb) {
+			if (buf_desc->mr_rx[lnk->link_idx])
+				smc_ib_put_memory_region(
+						buf_desc->mr_rx[lnk->link_idx]);
+			smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_FROM_DEVICE);
+		} else {
+			smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_TO_DEVICE);
+		}
+		sg_free_table(&buf_desc->sgt[lnk->link_idx]);
 	}
-	sg_free_table(&buf_desc->sgt[lnk->link_idx]);
 
 	if (buf_desc->pages)
 		__free_pages(buf_desc->pages, buf_desc->order);
@@ -551,6 +592,8 @@ static void smc_lgr_free_bufs(struct smc_link_group *lgr)
 /* remove a link group */
 static void smc_lgr_free(struct smc_link_group *lgr)
 {
+	int i;
+
 	smc_lgr_free_bufs(lgr);
 	if (lgr->is_smcd) {
 		if (!lgr->terminating) {
@@ -560,7 +603,11 @@ static void smc_lgr_free(struct smc_link_group *lgr)
 		if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
 			wake_up(&lgr->smcd->lgrs_deleted);
 	} else {
-		smcr_link_clear(&lgr->lnk[SMC_SINGLE_LINK]);
+		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+			if (lgr->lnk[i].state == SMC_LNK_INACTIVE)
+				continue;
+			smcr_link_clear(&lgr->lnk[i]);
+		}
 		if (!atomic_dec_return(&lgr_cnt))
 			wake_up(&lgrs_deleted);
 	}
@@ -628,16 +675,20 @@ static void smc_conn_kill(struct smc_connection *conn, bool soft)
 
 static void smc_lgr_cleanup(struct smc_link_group *lgr)
 {
+	int i;
+
 	if (lgr->is_smcd) {
 		smc_ism_signal_shutdown(lgr);
 		smcd_unregister_all_dmbs(lgr);
 		smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
 		put_device(&lgr->smcd->dev);
 	} else {
-		struct smc_link *lnk = &lgr->lnk[SMC_SINGLE_LINK];
+		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+			struct smc_link *lnk = &lgr->lnk[i];
 
-		if (lnk->state != SMC_LNK_INACTIVE)
-			smc_llc_link_inactive(lnk);
+			if (lnk->state != SMC_LNK_INACTIVE)
+				smc_llc_link_inactive(lnk);
+		}
 	}
 }
 
@@ -650,6 +701,7 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft)
 	struct smc_connection *conn;
 	struct smc_sock *smc;
 	struct rb_node *node;
+	int i;
 
 	if (lgr->terminating)
 		return;	/* lgr already terminating */
@@ -657,7 +709,8 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft)
 		cancel_delayed_work_sync(&lgr->free_work);
 	lgr->terminating = 1;
 	if (!lgr->is_smcd)
-		smc_llc_link_inactive(&lgr->lnk[SMC_SINGLE_LINK]);
+		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
+			smc_llc_link_inactive(&lgr->lnk[i]);
 
 	/* kill remaining link group connections */
 	read_lock_bh(&lgr->conns_lock);
@@ -703,14 +756,22 @@ void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport)
 {
 	struct smc_link_group *lgr, *l;
 	LIST_HEAD(lgr_free_list);
+	int i;
 
 	spin_lock_bh(&smc_lgr_list.lock);
 	list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
-		if (!lgr->is_smcd &&
-		    lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev &&
-		    lgr->lnk[SMC_SINGLE_LINK].ibport == ibport) {
-			list_move(&lgr->list, &lgr_free_list);
-			lgr->freeing = 1;
+		if (lgr->is_smcd)
+			continue;
+		/* tbd - terminate only when no more links are active */
+		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+			if (lgr->lnk[i].state == SMC_LNK_INACTIVE ||
+			    lgr->lnk[i].state == SMC_LNK_DELETING)
+				continue;
+			if (lgr->lnk[i].smcibdev == smcibdev &&
+			    lgr->lnk[i].ibport == ibport) {
+				list_move(&lgr->list, &lgr_free_list);
+				lgr->freeing = 1;
+			}
 		}
 	}
 	spin_unlock_bh(&smc_lgr_list.lock);
@@ -775,6 +836,7 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
 {
 	struct smc_link_group *lgr, *lg;
 	LIST_HEAD(lgr_free_list);
+	int i;
 
 	spin_lock_bh(&smc_lgr_list.lock);
 	if (!smcibdev) {
@@ -783,9 +845,12 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
 			lgr->freeing = 1;
 	} else {
 		list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) {
-			if (lgr->lnk[SMC_SINGLE_LINK].smcibdev == smcibdev) {
-				list_move(&lgr->list, &lgr_free_list);
-				lgr->freeing = 1;
+			for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+				if (lgr->lnk[i].smcibdev == smcibdev) {
+					list_move(&lgr->list, &lgr_free_list);
+					lgr->freeing = 1;
+					break;
+				}
 			}
 		}
 	}
@@ -857,15 +922,21 @@ static bool smcr_lgr_match(struct smc_link_group *lgr,
 			   struct smc_clc_msg_local *lcl,
 			   enum smc_lgr_role role, u32 clcqpn)
 {
-	return !memcmp(lgr->peer_systemid, lcl->id_for_peer,
-		       SMC_SYSTEMID_LEN) &&
-		!memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_gid, &lcl->gid,
-			SMC_GID_SIZE) &&
-		!memcmp(lgr->lnk[SMC_SINGLE_LINK].peer_mac, lcl->mac,
-			sizeof(lcl->mac)) &&
-		lgr->role == role &&
-		(lgr->role == SMC_SERV ||
-		 lgr->lnk[SMC_SINGLE_LINK].peer_qpn == clcqpn);
+	int i;
+
+	if (memcmp(lgr->peer_systemid, lcl->id_for_peer, SMC_SYSTEMID_LEN) ||
+	    lgr->role != role)
+		return false;
+
+	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+		if (lgr->lnk[i].state != SMC_LNK_ACTIVE)
+			continue;
+		if ((lgr->role == SMC_SERV || lgr->lnk[i].peer_qpn == clcqpn) &&
+		    !memcmp(lgr->lnk[i].peer_gid, &lcl->gid, SMC_GID_SIZE) &&
+		    !memcmp(lgr->lnk[i].peer_mac, lcl->mac, sizeof(lcl->mac)))
+			return true;
+	}
+	return false;
 }
 
 static bool smcd_lgr_match(struct smc_link_group *lgr,
@@ -906,15 +977,17 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
 			/* link group found */
 			ini->cln_first_contact = SMC_REUSE_CONTACT;
 			conn->lgr = lgr;
-			smc_lgr_register_conn(conn); /* add smc conn to lgr */
-			if (delayed_work_pending(&lgr->free_work))
-				cancel_delayed_work(&lgr->free_work);
+			rc = smc_lgr_register_conn(conn); /* add conn to lgr */
 			write_unlock_bh(&lgr->conns_lock);
+			if (!rc && delayed_work_pending(&lgr->free_work))
+				cancel_delayed_work(&lgr->free_work);
 			break;
 		}
 		write_unlock_bh(&lgr->conns_lock);
 	}
 	spin_unlock_bh(lgr_lock);
+	if (rc)
+		return rc;
 
 	if (role == SMC_CLNT && !ini->srv_first_contact &&
 	    ini->cln_first_contact == SMC_FIRST_CONTACT) {
@@ -932,8 +1005,10 @@ create:
 			goto out;
 		lgr = conn->lgr;
 		write_lock_bh(&lgr->conns_lock);
-		smc_lgr_register_conn(conn); /* add smc conn to lgr */
+		rc = smc_lgr_register_conn(conn); /* add smc conn to lgr */
 		write_unlock_bh(&lgr->conns_lock);
+		if (rc)
+			goto out;
 	}
 	conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
 	conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
@@ -1006,12 +1081,55 @@ static inline int smc_rmb_wnd_update_limit(int rmbe_size)
 	return min_t(int, rmbe_size / 10, SOCK_MIN_SNDBUF / 2);
 }
 
+/* map an rmb buf to a link */
+static int smcr_buf_map_link(struct smc_buf_desc *buf_desc, bool is_rmb,
+			     struct smc_link *lnk)
+{
+	int rc;
+
+	if (buf_desc->is_map_ib[lnk->link_idx])
+		return 0;
+
+	rc = sg_alloc_table(&buf_desc->sgt[lnk->link_idx], 1, GFP_KERNEL);
+	if (rc)
+		return rc;
+	sg_set_buf(buf_desc->sgt[lnk->link_idx].sgl,
+		   buf_desc->cpu_addr, buf_desc->len);
+
+	/* map sg table to DMA address */
+	rc = smc_ib_buf_map_sg(lnk, buf_desc,
+			       is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
+	/* SMC protocol depends on mapping to one DMA address only */
+	if (rc != 1) {
+		rc = -EAGAIN;
+		goto free_table;
+	}
+
+	/* create a new memory region for the RMB */
+	if (is_rmb) {
+		rc = smc_ib_get_memory_region(lnk->roce_pd,
+					      IB_ACCESS_REMOTE_WRITE |
+					      IB_ACCESS_LOCAL_WRITE,
+					      buf_desc, lnk->link_idx);
+		if (rc)
+			goto buf_unmap;
+		smc_ib_sync_sg_for_device(lnk, buf_desc, DMA_FROM_DEVICE);
+	}
+	buf_desc->is_map_ib[lnk->link_idx] = true;
+	return 0;
+
+buf_unmap:
+	smc_ib_buf_unmap_sg(lnk, buf_desc,
+			    is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
+free_table:
+	sg_free_table(&buf_desc->sgt[lnk->link_idx]);
+	return rc;
+}
+
 static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
 						bool is_rmb, int bufsize)
 {
 	struct smc_buf_desc *buf_desc;
-	struct smc_link *lnk;
-	int rc;
 
 	/* try to alloc a new buffer */
 	buf_desc = kzalloc(sizeof(*buf_desc), GFP_KERNEL);
@@ -1028,40 +1146,32 @@ static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
 		return ERR_PTR(-EAGAIN);
 	}
 	buf_desc->cpu_addr = (void *)page_address(buf_desc->pages);
+	buf_desc->len = bufsize;
+	return buf_desc;
+}
 
-	/* build the sg table from the pages */
-	lnk = &lgr->lnk[SMC_SINGLE_LINK];
-	rc = sg_alloc_table(&buf_desc->sgt[lnk->link_idx], 1, GFP_KERNEL);
-	if (rc) {
-		smc_buf_free(lgr, is_rmb, buf_desc);
-		return ERR_PTR(rc);
-	}
-	sg_set_buf(buf_desc->sgt[lnk->link_idx].sgl,
-		   buf_desc->cpu_addr, bufsize);
+/* map buf_desc on all usable links,
+ * unused buffers stay mapped as long as the link is up
+ */
+static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
+				     struct smc_buf_desc *buf_desc, bool is_rmb)
+{
+	int i, rc = 0;
 
-	/* map sg table to DMA address */
-	rc = smc_ib_buf_map_sg(lnk, buf_desc,
-			       is_rmb ? DMA_FROM_DEVICE : DMA_TO_DEVICE);
-	/* SMC protocol depends on mapping to one DMA address only */
-	if (rc != 1)  {
-		smc_buf_free(lgr, is_rmb, buf_desc);
-		return ERR_PTR(-EAGAIN);
-	}
+	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+		struct smc_link *lnk = &lgr->lnk[i];
 
-	/* create a new memory region for the RMB */
-	if (is_rmb) {
-		rc = smc_ib_get_memory_region(lnk->roce_pd,
-					      IB_ACCESS_REMOTE_WRITE |
-					      IB_ACCESS_LOCAL_WRITE,
-					      buf_desc, lnk->link_idx);
-		if (rc) {
-			smc_buf_free(lgr, is_rmb, buf_desc);
-			return ERR_PTR(rc);
+		if (lnk->state != SMC_LNK_ACTIVE &&
+		    lnk->state != SMC_LNK_ACTIVATING)
+			continue;
+		if (smcr_buf_map_link(buf_desc, is_rmb, lnk)) {
+			smcr_buf_unuse(buf_desc, lnk);
+			rc = -ENOMEM;
+			goto out;
 		}
 	}
-
-	buf_desc->len = bufsize;
-	return buf_desc;
+out:
+	return rc;
 }
 
 #define SMCD_DMBE_SIZES		7 /* 0 -> 16KB, 1 -> 32KB, .. 6 -> 1MB */
@@ -1159,6 +1269,12 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
 	if (IS_ERR(buf_desc))
 		return -ENOMEM;
 
+	if (!is_smcd) {
+		if (smcr_buf_map_usable_links(lgr, buf_desc, is_rmb)) {
+			return -ENOMEM;
+		}
+	}
+
 	if (is_rmb) {
 		conn->rmb_desc = buf_desc;
 		conn->rmbe_size_short = bufsize_short;
@@ -1192,22 +1308,32 @@ void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
 
 void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
 {
-	struct smc_link_group *lgr = conn->lgr;
+	int i;
 
 	if (!conn->lgr || conn->lgr->is_smcd)
 		return;
-	smc_ib_sync_sg_for_cpu(&lgr->lnk[SMC_SINGLE_LINK],
-			       conn->rmb_desc, DMA_FROM_DEVICE);
+	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+		if (conn->lgr->lnk[i].state != SMC_LNK_ACTIVE &&
+		    conn->lgr->lnk[i].state != SMC_LNK_ACTIVATING)
+			continue;
+		smc_ib_sync_sg_for_cpu(&conn->lgr->lnk[i], conn->rmb_desc,
+				       DMA_FROM_DEVICE);
+	}
 }
 
 void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
 {
-	struct smc_link_group *lgr = conn->lgr;
+	int i;
 
 	if (!conn->lgr || conn->lgr->is_smcd)
 		return;
-	smc_ib_sync_sg_for_device(&lgr->lnk[SMC_SINGLE_LINK],
-				  conn->rmb_desc, DMA_FROM_DEVICE);
+	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+		if (conn->lgr->lnk[i].state != SMC_LNK_ACTIVE &&
+		    conn->lgr->lnk[i].state != SMC_LNK_ACTIVATING)
+			continue;
+		smc_ib_sync_sg_for_device(&conn->lgr->lnk[i], conn->rmb_desc,
+					  DMA_FROM_DEVICE);
+	}
 }
 
 /* create the send and receive buffer for an SMC socket;
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index c71c35a3596c..66753ba23bc6 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -152,25 +152,32 @@ struct smc_buf_desc {
 	struct page		*pages;
 	int			len;		/* length of buffer */
 	u32			used;		/* currently used / unused */
-	u8			wr_reg	: 1;	/* mem region registered */
-	u8			regerr	: 1;	/* err during registration */
 	union {
 		struct { /* SMC-R */
-			struct sg_table		sgt[SMC_LINKS_PER_LGR_MAX];
-						/* virtual buffer */
-			struct ib_mr		*mr_rx[SMC_LINKS_PER_LGR_MAX];
-						/* for rmb only: memory region
-						 * incl. rkey provided to peer
-						 */
-			u32			order;	/* allocation order */
+			struct sg_table	sgt[SMC_LINKS_PER_LGR_MAX];
+					/* virtual buffer */
+			struct ib_mr	*mr_rx[SMC_LINKS_PER_LGR_MAX];
+					/* for rmb only: memory region
+					 * incl. rkey provided to peer
+					 */
+			u32		order;	/* allocation order */
+
+			u8		is_conf_rkey;
+					/* confirm_rkey done */
+			u8		is_reg_mr[SMC_LINKS_PER_LGR_MAX];
+					/* mem region registered */
+			u8		is_map_ib[SMC_LINKS_PER_LGR_MAX];
+					/* mem region mapped to lnk */
+			u8		is_reg_err;
+					/* buffer registration err */
 		};
 		struct { /* SMC-D */
-			unsigned short		sba_idx;
-						/* SBA index number */
-			u64			token;
-						/* DMB token number */
-			dma_addr_t		dma_addr;
-						/* DMA address */
+			unsigned short	sba_idx;
+					/* SBA index number */
+			u64		token;
+					/* DMB token number */
+			dma_addr_t	dma_addr;
+					/* DMA address */
 		};
 	};
 };
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 34d0752ba6af..903ae068da3a 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -662,6 +662,8 @@ void smc_llc_link_deleting(struct smc_link *link)
 /* called in tasklet context */
 void smc_llc_link_inactive(struct smc_link *link)
 {
+	if (link->state == SMC_LNK_INACTIVE)
+		return;
 	link->state = SMC_LNK_INACTIVE;
 	cancel_delayed_work(&link->llc_testlink_wrk);
 	smc_wr_wakeup_reg_wait(link);
-- 
cgit v1.2.3-59-g8ed1b


From e07d31dc16b0d77ff6b3f71cafe3a825fb80bed4 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Wed, 29 Apr 2020 17:10:42 +0200
Subject: net/smc: multi-link support for smc_rmb_rtoken_handling()

Extend smc_rmb_rtoken_handling() and smc_rtoken_delete() to support
multiple links.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/af_smc.c   |  4 ++--
 net/smc/smc_core.c | 14 ++++++++------
 net/smc/smc_core.h |  2 +-
 3 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 890dc6422f8c..e39f6aedd3bd 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -640,7 +640,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
 	if (ini->cln_first_contact == SMC_FIRST_CONTACT)
 		smc_link_save_peer_info(link, aclc);
 
-	if (smc_rmb_rtoken_handling(&smc->conn, aclc))
+	if (smc_rmb_rtoken_handling(&smc->conn, link, aclc))
 		return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RTOK,
 					 ini->cln_first_contact);
 
@@ -1231,7 +1231,7 @@ static int smc_listen_rdma_finish(struct smc_sock *new_smc,
 	if (local_contact == SMC_FIRST_CONTACT)
 		smc_link_save_peer_info(link, cclc);
 
-	if (smc_rmb_rtoken_handling(&new_smc->conn, cclc)) {
+	if (smc_rmb_rtoken_handling(&new_smc->conn, link, cclc)) {
 		reason_code = SMC_CLC_DECL_ERR_RTOK;
 		goto decline;
 	}
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 5df3f8f41d19..e8897d60b27f 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -1392,19 +1392,20 @@ int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey)
 	return i;
 }
 
-/* delete an rtoken */
+/* delete an rtoken from all links */
 int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey)
 {
 	struct smc_link_group *lgr = smc_get_lgr(lnk);
 	u32 rkey = ntohl(nw_rkey);
-	int i;
+	int i, j;
 
 	for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
 		if (lgr->rtokens[i][lnk->link_idx].rkey == rkey &&
 		    test_bit(i, lgr->rtokens_used_mask)) {
-			lgr->rtokens[i][lnk->link_idx].rkey = 0;
-			lgr->rtokens[i][lnk->link_idx].dma_addr = 0;
-
+			for (j = 0; j < SMC_LINKS_PER_LGR_MAX; j++) {
+				lgr->rtokens[i][j].rkey = 0;
+				lgr->rtokens[i][j].dma_addr = 0;
+			}
 			clear_bit(i, lgr->rtokens_used_mask);
 			return 0;
 		}
@@ -1414,9 +1415,10 @@ int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey)
 
 /* save rkey and dma_addr received from peer during clc handshake */
 int smc_rmb_rtoken_handling(struct smc_connection *conn,
+			    struct smc_link *lnk,
 			    struct smc_clc_msg_accept_confirm *clc)
 {
-	conn->rtoken_idx = smc_rtoken_add(conn->lnk, clc->rmb_dma_addr,
+	conn->rtoken_idx = smc_rtoken_add(lnk, clc->rmb_dma_addr,
 					  clc->rmb_rkey);
 	if (conn->rtoken_idx < 0)
 		return conn->rtoken_idx;
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 66753ba23bc6..f68ba187ecf8 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -309,7 +309,7 @@ void smc_smcd_terminate_all(struct smcd_dev *dev);
 void smc_smcr_terminate_all(struct smc_ib_device *smcibdev);
 int smc_buf_create(struct smc_sock *smc, bool is_smcd);
 int smc_uncompress_bufsize(u8 compressed);
-int smc_rmb_rtoken_handling(struct smc_connection *conn,
+int smc_rmb_rtoken_handling(struct smc_connection *conn, struct smc_link *link,
 			    struct smc_clc_msg_accept_confirm *clc);
 int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey);
 int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey);
-- 
cgit v1.2.3-59-g8ed1b


From d854fcbfaeda9748c85de296fbe07b7763a1939c Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Wed, 29 Apr 2020 17:10:43 +0200
Subject: net/smc: add new link state and related helpers

Before a link can be reused it must have been cleared. Lowest current
link state is INACTIVE, which does not mean that the link is already
cleared.
Add a new state UNUSED that is set when the link is cleared and can be
reused.
Add helper smc_llc_usable_link() to find an active link in a link group,
and smc_link_usable() to determine if a link is usable.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_core.c | 36 +++++++++++++++++++-----------------
 net/smc/smc_core.h |  9 +++++++++
 net/smc/smc_llc.c  |  4 ++--
 net/smc/smc_llc.h  | 11 +++++++++++
 net/smc/smc_wr.c   |  2 +-
 5 files changed, 42 insertions(+), 20 deletions(-)

diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index e8897d60b27f..57890cbd4e8a 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -260,7 +260,7 @@ static void smc_lgr_free_work(struct work_struct *work)
 		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
 			struct smc_link *lnk = &lgr->lnk[i];
 
-			if (lnk->state != SMC_LNK_INACTIVE)
+			if (smc_link_usable(lnk))
 				smc_llc_link_inactive(lnk);
 		}
 	}
@@ -286,7 +286,7 @@ static u8 smcr_next_link_id(struct smc_link_group *lgr)
 		if (!link_id)	/* skip zero as link_id */
 			link_id = ++lgr->next_link_id;
 		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
-			if (lgr->lnk[i].state != SMC_LNK_INACTIVE &&
+			if (smc_link_usable(&lgr->lnk[i]) &&
 			    lgr->lnk[i].link_id == link_id)
 				continue;
 		}
@@ -350,6 +350,7 @@ clear_llc_lnk:
 out:
 	put_device(&ini->ib_dev->ibdev->dev);
 	memset(lnk, 0, sizeof(struct smc_link));
+	lnk->state = SMC_LNK_UNUSED;
 	if (!atomic_dec_return(&ini->ib_dev->lnk_cnt))
 		wake_up(&ini->ib_dev->lnks_deleted);
 	return rc;
@@ -500,6 +501,8 @@ void smc_conn_free(struct smc_connection *conn)
 
 static void smcr_link_clear(struct smc_link *lnk)
 {
+	struct smc_ib_device *smcibdev;
+
 	if (lnk->peer_qpn == 0)
 		return;
 	lnk->peer_qpn = 0;
@@ -510,8 +513,11 @@ static void smcr_link_clear(struct smc_link *lnk)
 	smc_ib_dealloc_protection_domain(lnk);
 	smc_wr_free_link_mem(lnk);
 	put_device(&lnk->smcibdev->ibdev->dev);
-	if (!atomic_dec_return(&lnk->smcibdev->lnk_cnt))
-		wake_up(&lnk->smcibdev->lnks_deleted);
+	smcibdev = lnk->smcibdev;
+	memset(lnk, 0, sizeof(struct smc_link));
+	lnk->state = SMC_LNK_UNUSED;
+	if (!atomic_dec_return(&smcibdev->lnk_cnt))
+		wake_up(&smcibdev->lnks_deleted);
 }
 
 static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
@@ -604,9 +610,8 @@ static void smc_lgr_free(struct smc_link_group *lgr)
 			wake_up(&lgr->smcd->lgrs_deleted);
 	} else {
 		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
-			if (lgr->lnk[i].state == SMC_LNK_INACTIVE)
-				continue;
-			smcr_link_clear(&lgr->lnk[i]);
+			if (lgr->lnk[i].state != SMC_LNK_UNUSED)
+				smcr_link_clear(&lgr->lnk[i]);
 		}
 		if (!atomic_dec_return(&lgr_cnt))
 			wake_up(&lgrs_deleted);
@@ -686,7 +691,7 @@ static void smc_lgr_cleanup(struct smc_link_group *lgr)
 		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
 			struct smc_link *lnk = &lgr->lnk[i];
 
-			if (lnk->state != SMC_LNK_INACTIVE)
+			if (smc_link_usable(lnk))
 				smc_llc_link_inactive(lnk);
 		}
 	}
@@ -764,7 +769,7 @@ void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport)
 			continue;
 		/* tbd - terminate only when no more links are active */
 		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
-			if (lgr->lnk[i].state == SMC_LNK_INACTIVE ||
+			if (!smc_link_usable(&lgr->lnk[i]) ||
 			    lgr->lnk[i].state == SMC_LNK_DELETING)
 				continue;
 			if (lgr->lnk[i].smcibdev == smcibdev &&
@@ -1161,8 +1166,7 @@ static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
 	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
 		struct smc_link *lnk = &lgr->lnk[i];
 
-		if (lnk->state != SMC_LNK_ACTIVE &&
-		    lnk->state != SMC_LNK_ACTIVATING)
+		if (!smc_link_usable(lnk))
 			continue;
 		if (smcr_buf_map_link(buf_desc, is_rmb, lnk)) {
 			smcr_buf_unuse(buf_desc, lnk);
@@ -1294,14 +1298,14 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
 
 void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn)
 {
-	if (!conn->lgr || conn->lgr->is_smcd)
+	if (!conn->lgr || conn->lgr->is_smcd || !smc_link_usable(conn->lnk))
 		return;
 	smc_ib_sync_sg_for_cpu(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
 }
 
 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn)
 {
-	if (!conn->lgr || conn->lgr->is_smcd)
+	if (!conn->lgr || conn->lgr->is_smcd || !smc_link_usable(conn->lnk))
 		return;
 	smc_ib_sync_sg_for_device(conn->lnk, conn->sndbuf_desc, DMA_TO_DEVICE);
 }
@@ -1313,8 +1317,7 @@ void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn)
 	if (!conn->lgr || conn->lgr->is_smcd)
 		return;
 	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
-		if (conn->lgr->lnk[i].state != SMC_LNK_ACTIVE &&
-		    conn->lgr->lnk[i].state != SMC_LNK_ACTIVATING)
+		if (!smc_link_usable(&conn->lgr->lnk[i]))
 			continue;
 		smc_ib_sync_sg_for_cpu(&conn->lgr->lnk[i], conn->rmb_desc,
 				       DMA_FROM_DEVICE);
@@ -1328,8 +1331,7 @@ void smc_rmb_sync_sg_for_device(struct smc_connection *conn)
 	if (!conn->lgr || conn->lgr->is_smcd)
 		return;
 	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
-		if (conn->lgr->lnk[i].state != SMC_LNK_ACTIVE &&
-		    conn->lgr->lnk[i].state != SMC_LNK_ACTIVATING)
+		if (!smc_link_usable(&conn->lgr->lnk[i]))
 			continue;
 		smc_ib_sync_sg_for_device(&conn->lgr->lnk[i], conn->rmb_desc,
 					  DMA_FROM_DEVICE);
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index f68ba187ecf8..2b1960c8c8ce 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -32,6 +32,7 @@ enum smc_lgr_role {		/* possible roles of a link group */
 };
 
 enum smc_link_state {			/* possible states of a link */
+	SMC_LNK_UNUSED,		/* link is unused */
 	SMC_LNK_INACTIVE,	/* link is inactive */
 	SMC_LNK_ACTIVATING,	/* link is being activated */
 	SMC_LNK_ACTIVE,		/* link is active */
@@ -295,6 +296,14 @@ static inline struct smc_connection *smc_lgr_find_conn(
 	return res;
 }
 
+/* returns true if the specified link is usable */
+static inline bool smc_link_usable(struct smc_link *lnk)
+{
+	if (lnk->state == SMC_LNK_UNUSED || lnk->state == SMC_LNK_INACTIVE)
+		return false;
+	return true;
+}
+
 struct smc_sock;
 struct smc_clc_msg_accept_confirm;
 struct smc_clc_msg_local;
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 903ae068da3a..c267f5006faa 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -372,7 +372,7 @@ static void smc_llc_send_message_work(struct work_struct *work)
 	struct smc_wr_buf *wr_buf;
 	int rc;
 
-	if (llcwrk->link->state == SMC_LNK_INACTIVE)
+	if (!smc_link_usable(llcwrk->link))
 		goto out;
 	rc = smc_llc_add_pending_send(llcwrk->link, &wr_buf, &pend);
 	if (rc)
@@ -562,7 +562,7 @@ static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
 		return; /* short message */
 	if (llc->raw.hdr.length != sizeof(*llc))
 		return; /* invalid message */
-	if (link->state == SMC_LNK_INACTIVE)
+	if (!smc_link_usable(link))
 		return; /* link not active, drop msg */
 
 	switch (llc->raw.hdr.common.type) {
diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h
index 461c0c3ef76e..08171131110c 100644
--- a/net/smc/smc_llc.h
+++ b/net/smc/smc_llc.h
@@ -35,6 +35,17 @@ enum smc_llc_msg_type {
 	SMC_LLC_DELETE_RKEY		= 0x09,
 };
 
+/* returns a usable link of the link group, or NULL */
+static inline struct smc_link *smc_llc_usable_link(struct smc_link_group *lgr)
+{
+	int i;
+
+	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
+		if (smc_link_usable(&lgr->lnk[i]))
+			return &lgr->lnk[i];
+	return NULL;
+}
+
 /* transmit */
 int smc_llc_send_confirm_link(struct smc_link *lnk,
 			      enum smc_llc_reqresp reqresp);
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index 337ee52ad3d3..93223628c002 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -207,7 +207,7 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
 	} else {
 		rc = wait_event_interruptible_timeout(
 			link->wr_tx_wait,
-			link->state == SMC_LNK_INACTIVE ||
+			!smc_link_usable(link) ||
 			lgr->terminating ||
 			(smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY),
 			SMC_WR_TX_WAIT_FREE_SLOT_TIME);
-- 
cgit v1.2.3-59-g8ed1b


From 1020e1ef53ceef715f2bc144eebbfe01e88effcf Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Wed, 29 Apr 2020 17:10:44 +0200
Subject: net/smc: move testlink work to system work queue

The testlink work waits for a response to the testlink request and
blocks the single threaded llc_wq. This type of work does not have to be
serialized and can be moved to the system work queue.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_llc.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index c267f5006faa..69cc0d65b437 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -613,14 +613,15 @@ static void smc_llc_testlink_work(struct work_struct *work)
 	/* receive TEST LINK response over RoCE fabric */
 	rc = wait_for_completion_interruptible_timeout(&link->llc_testlink_resp,
 						       SMC_LLC_WAIT_TIME);
+	if (link->state != SMC_LNK_ACTIVE)
+		return;		/* link state changed */
 	if (rc <= 0) {
 		smc_lgr_terminate_sched(smc_get_lgr(link));
 		return;
 	}
 	next_interval = link->llc_testlink_time;
 out:
-	queue_delayed_work(link->llc_wq, &link->llc_testlink_wrk,
-			   next_interval);
+	schedule_delayed_work(&link->llc_testlink_wrk, next_interval);
 }
 
 int smc_llc_link_init(struct smc_link *link)
@@ -648,8 +649,8 @@ void smc_llc_link_active(struct smc_link *link, int testlink_time)
 	link->state = SMC_LNK_ACTIVE;
 	if (testlink_time) {
 		link->llc_testlink_time = testlink_time * HZ;
-		queue_delayed_work(link->llc_wq, &link->llc_testlink_wrk,
-				   link->llc_testlink_time);
+		schedule_delayed_work(&link->llc_testlink_wrk,
+				      link->llc_testlink_time);
 	}
 }
 
@@ -665,7 +666,7 @@ void smc_llc_link_inactive(struct smc_link *link)
 	if (link->state == SMC_LNK_INACTIVE)
 		return;
 	link->state = SMC_LNK_INACTIVE;
-	cancel_delayed_work(&link->llc_testlink_wrk);
+	cancel_delayed_work_sync(&link->llc_testlink_wrk);
 	smc_wr_wakeup_reg_wait(link);
 	smc_wr_wakeup_tx_wait(link);
 }
-- 
cgit v1.2.3-59-g8ed1b


From 2140ac26f8f501d3cc8f1575e6419f1a50779496 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Wed, 29 Apr 2020 17:10:45 +0200
Subject: net/smc: simplify link deactivation

Cancel the testlink worker during link clear processing and remove the
extra function smc_llc_link_inactive().

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_core.c |  8 ++------
 net/smc/smc_llc.c  | 15 ++++-----------
 net/smc/smc_llc.h  |  1 -
 3 files changed, 6 insertions(+), 18 deletions(-)

diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 57890cbd4e8a..78ccfbf6e4af 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -261,7 +261,7 @@ static void smc_lgr_free_work(struct work_struct *work)
 			struct smc_link *lnk = &lgr->lnk[i];
 
 			if (smc_link_usable(lnk))
-				smc_llc_link_inactive(lnk);
+				lnk->state = SMC_LNK_INACTIVE;
 		}
 	}
 	smc_lgr_free(lgr);
@@ -692,7 +692,7 @@ static void smc_lgr_cleanup(struct smc_link_group *lgr)
 			struct smc_link *lnk = &lgr->lnk[i];
 
 			if (smc_link_usable(lnk))
-				smc_llc_link_inactive(lnk);
+				lnk->state = SMC_LNK_INACTIVE;
 		}
 	}
 }
@@ -706,16 +706,12 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft)
 	struct smc_connection *conn;
 	struct smc_sock *smc;
 	struct rb_node *node;
-	int i;
 
 	if (lgr->terminating)
 		return;	/* lgr already terminating */
 	if (!soft)
 		cancel_delayed_work_sync(&lgr->free_work);
 	lgr->terminating = 1;
-	if (!lgr->is_smcd)
-		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
-			smc_llc_link_inactive(&lgr->lnk[i]);
 
 	/* kill remaining link group connections */
 	read_lock_bh(&lgr->conns_lock);
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 69cc0d65b437..2f03131c85fd 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -660,22 +660,15 @@ void smc_llc_link_deleting(struct smc_link *link)
 	smc_wr_wakeup_tx_wait(link);
 }
 
-/* called in tasklet context */
-void smc_llc_link_inactive(struct smc_link *link)
-{
-	if (link->state == SMC_LNK_INACTIVE)
-		return;
-	link->state = SMC_LNK_INACTIVE;
-	cancel_delayed_work_sync(&link->llc_testlink_wrk);
-	smc_wr_wakeup_reg_wait(link);
-	smc_wr_wakeup_tx_wait(link);
-}
-
 /* called in worker context */
 void smc_llc_link_clear(struct smc_link *link)
 {
 	flush_workqueue(link->llc_wq);
 	destroy_workqueue(link->llc_wq);
+	complete(&link->llc_testlink_resp);
+	cancel_delayed_work_sync(&link->llc_testlink_wrk);
+	smc_wr_wakeup_reg_wait(link);
+	smc_wr_wakeup_tx_wait(link);
 }
 
 /* register a new rtoken at the remote peer */
diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h
index 08171131110c..c2c9d48d079f 100644
--- a/net/smc/smc_llc.h
+++ b/net/smc/smc_llc.h
@@ -56,7 +56,6 @@ int smc_llc_send_delete_link(struct smc_link *link,
 int smc_llc_link_init(struct smc_link *link);
 void smc_llc_link_active(struct smc_link *link, int testlink_time);
 void smc_llc_link_deleting(struct smc_link *link);
-void smc_llc_link_inactive(struct smc_link *link);
 void smc_llc_link_clear(struct smc_link *link);
 int smc_llc_do_confirm_rkey(struct smc_link *link,
 			    struct smc_buf_desc *rmb_desc);
-- 
cgit v1.2.3-59-g8ed1b


From 6c8968c421e0e6bea8a78ee4fdd043d850cd5b26 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Wed, 29 Apr 2020 17:10:46 +0200
Subject: net/smc: use worker to process incoming llc messages

Incoming llc messages are processed in irq tasklet context, and
a worker is used to send outgoing messages. The worker is needed
because getting a send buffer could result in a wait for a free buffer.

To make sure all incoming llc messages are processed in a serialized way
introduce an event queue and create a new queue entry for each message
which is queued to this event queue. A new worker processes the event
queue entries in order.
And remove the use of a separate worker to send outgoing llc messages
because the messages are processed in worker context already.
With this event queue the serialized llc_wq work queue is obsolete,
remove it.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_core.c |   4 +-
 net/smc/smc_core.h |   7 ++-
 net/smc/smc_llc.c  | 142 ++++++++++++++++++++++++++++++++---------------------
 net/smc/smc_llc.h  |   1 +
 4 files changed, 96 insertions(+), 58 deletions(-)

diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 78ccfbf6e4af..a1463da14614 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -412,7 +412,8 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
 		lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
 		memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer,
 		       SMC_SYSTEMID_LEN);
-
+		INIT_LIST_HEAD(&lgr->llc_event_q);
+		spin_lock_init(&lgr->llc_event_q_lock);
 		link_idx = SMC_SINGLE_LINK;
 		lnk = &lgr->lnk[link_idx];
 		rc = smcr_link_init(lgr, lnk, link_idx, ini);
@@ -613,6 +614,7 @@ static void smc_lgr_free(struct smc_link_group *lgr)
 			if (lgr->lnk[i].state != SMC_LNK_UNUSED)
 				smcr_link_clear(&lgr->lnk[i]);
 		}
+		smc_llc_event_flush(lgr);
 		if (!atomic_dec_return(&lgr_cnt))
 			wake_up(&lgrs_deleted);
 	}
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 2b1960c8c8ce..6548e9a06f73 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -120,7 +120,6 @@ struct smc_link {
 	struct smc_link_group	*lgr;		/* parent link group */
 
 	enum smc_link_state	state;		/* state of link */
-	struct workqueue_struct *llc_wq;	/* single thread work queue */
 	struct completion	llc_confirm;	/* wait for rx of conf link */
 	struct completion	llc_confirm_resp; /* wait 4 rx of cnf lnk rsp */
 	int			llc_confirm_rc; /* rc from confirm link msg */
@@ -233,6 +232,12 @@ struct smc_link_group {
 			DECLARE_BITMAP(rtokens_used_mask, SMC_RMBS_PER_LGR_MAX);
 						/* used rtoken elements */
 			u8			next_link_id;
+			struct list_head	llc_event_q;
+						/* queue for llc events */
+			spinlock_t		llc_event_q_lock;
+						/* protects llc_event_q */
+			struct work_struct	llc_event_work;
+						/* llc event worker */
 		};
 		struct { /* SMC-D */
 			u64			peer_gid;
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 2f03131c85fd..be74876a36ae 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -134,6 +134,12 @@ union smc_llc_msg {
 
 #define SMC_LLC_FLAG_RESP		0x80
 
+struct smc_llc_qentry {
+	struct list_head list;
+	struct smc_link *link;
+	union smc_llc_msg msg;
+};
+
 /********************************** send *************************************/
 
 struct smc_llc_tx_pend {
@@ -356,46 +362,20 @@ static int smc_llc_send_test_link(struct smc_link *link, u8 user_data[16])
 	return rc;
 }
 
-struct smc_llc_send_work {
-	struct work_struct work;
-	struct smc_link *link;
-	int llclen;
-	union smc_llc_msg llcbuf;
-};
-
-/* worker that sends a prepared message */
-static void smc_llc_send_message_work(struct work_struct *work)
+/* schedule an llc send on link, may wait for buffers */
+static int smc_llc_send_message(struct smc_link *link, void *llcbuf)
 {
-	struct smc_llc_send_work *llcwrk = container_of(work,
-						struct smc_llc_send_work, work);
 	struct smc_wr_tx_pend_priv *pend;
 	struct smc_wr_buf *wr_buf;
 	int rc;
 
-	if (!smc_link_usable(llcwrk->link))
-		goto out;
-	rc = smc_llc_add_pending_send(llcwrk->link, &wr_buf, &pend);
+	if (!smc_link_usable(link))
+		return -ENOLINK;
+	rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
 	if (rc)
-		goto out;
-	memcpy(wr_buf, &llcwrk->llcbuf, llcwrk->llclen);
-	smc_wr_tx_send(llcwrk->link, pend);
-out:
-	kfree(llcwrk);
-}
-
-/* copy llcbuf and schedule an llc send on link */
-static int smc_llc_send_message(struct smc_link *link, void *llcbuf, int llclen)
-{
-	struct smc_llc_send_work *wrk = kmalloc(sizeof(*wrk), GFP_ATOMIC);
-
-	if (!wrk)
-		return -ENOMEM;
-	INIT_WORK(&wrk->work, smc_llc_send_message_work);
-	wrk->link = link;
-	wrk->llclen = llclen;
-	memcpy(&wrk->llcbuf, llcbuf, llclen);
-	queue_work(link->llc_wq, &wrk->work);
-	return 0;
+		return rc;
+	memcpy(wr_buf, llcbuf, sizeof(union smc_llc_msg));
+	return smc_wr_tx_send(link, pend);
 }
 
 /********************************* receive ***********************************/
@@ -452,7 +432,7 @@ static void smc_llc_rx_add_link(struct smc_link *link,
 					link->smcibdev->mac[link->ibport - 1],
 					link->gid, SMC_LLC_RESP);
 		}
-		smc_llc_send_message(link, llc, sizeof(*llc));
+		smc_llc_send_message(link, llc);
 	}
 }
 
@@ -474,7 +454,7 @@ static void smc_llc_rx_delete_link(struct smc_link *link,
 			/* server requests to delete this link, send response */
 			smc_llc_prep_delete_link(llc, link, SMC_LLC_RESP, true);
 		}
-		smc_llc_send_message(link, llc, sizeof(*llc));
+		smc_llc_send_message(link, llc);
 		smc_lgr_terminate_sched(lgr);
 	}
 }
@@ -487,7 +467,7 @@ static void smc_llc_rx_test_link(struct smc_link *link,
 			complete(&link->llc_testlink_resp);
 	} else {
 		llc->hd.flags |= SMC_LLC_FLAG_RESP;
-		smc_llc_send_message(link, llc, sizeof(*llc));
+		smc_llc_send_message(link, llc);
 	}
 }
 
@@ -510,7 +490,7 @@ static void smc_llc_rx_confirm_rkey(struct smc_link *link,
 		llc->hd.flags |= SMC_LLC_FLAG_RESP;
 		if (rc < 0)
 			llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
-		smc_llc_send_message(link, llc, sizeof(*llc));
+		smc_llc_send_message(link, llc);
 	}
 }
 
@@ -522,7 +502,7 @@ static void smc_llc_rx_confirm_rkey_cont(struct smc_link *link,
 	} else {
 		/* ignore rtokens for other links, we have only one link */
 		llc->hd.flags |= SMC_LLC_FLAG_RESP;
-		smc_llc_send_message(link, llc, sizeof(*llc));
+		smc_llc_send_message(link, llc);
 	}
 }
 
@@ -549,21 +529,30 @@ static void smc_llc_rx_delete_rkey(struct smc_link *link,
 		}
 
 		llc->hd.flags |= SMC_LLC_FLAG_RESP;
-		smc_llc_send_message(link, llc, sizeof(*llc));
+		smc_llc_send_message(link, llc);
 	}
 }
 
-static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
+/* flush the llc event queue */
+void smc_llc_event_flush(struct smc_link_group *lgr)
 {
-	struct smc_link *link = (struct smc_link *)wc->qp->qp_context;
-	union smc_llc_msg *llc = buf;
+	struct smc_llc_qentry *qentry, *q;
+
+	spin_lock_bh(&lgr->llc_event_q_lock);
+	list_for_each_entry_safe(qentry, q, &lgr->llc_event_q, list) {
+		list_del_init(&qentry->list);
+		kfree(qentry);
+	}
+	spin_unlock_bh(&lgr->llc_event_q_lock);
+}
+
+static void smc_llc_event_handler(struct smc_llc_qentry *qentry)
+{
+	union smc_llc_msg *llc = &qentry->msg;
+	struct smc_link *link = qentry->link;
 
-	if (wc->byte_len < sizeof(*llc))
-		return; /* short message */
-	if (llc->raw.hdr.length != sizeof(*llc))
-		return; /* invalid message */
 	if (!smc_link_usable(link))
-		return; /* link not active, drop msg */
+		goto out;
 
 	switch (llc->raw.hdr.common.type) {
 	case SMC_LLC_TEST_LINK:
@@ -588,6 +577,54 @@ static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
 		smc_llc_rx_delete_rkey(link, &llc->delete_rkey);
 		break;
 	}
+out:
+	kfree(qentry);
+}
+
+/* worker to process llc messages on the event queue */
+static void smc_llc_event_work(struct work_struct *work)
+{
+	struct smc_link_group *lgr = container_of(work, struct smc_link_group,
+						  llc_event_work);
+	struct smc_llc_qentry *qentry;
+
+again:
+	spin_lock_bh(&lgr->llc_event_q_lock);
+	if (!list_empty(&lgr->llc_event_q)) {
+		qentry = list_first_entry(&lgr->llc_event_q,
+					  struct smc_llc_qentry, list);
+		list_del_init(&qentry->list);
+		spin_unlock_bh(&lgr->llc_event_q_lock);
+		smc_llc_event_handler(qentry);
+		goto again;
+	}
+	spin_unlock_bh(&lgr->llc_event_q_lock);
+}
+
+/* copy received msg and add it to the event queue */
+static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
+{
+	struct smc_link *link = (struct smc_link *)wc->qp->qp_context;
+	struct smc_link_group *lgr = link->lgr;
+	struct smc_llc_qentry *qentry;
+	union smc_llc_msg *llc = buf;
+	unsigned long flags;
+
+	if (wc->byte_len < sizeof(*llc))
+		return; /* short message */
+	if (llc->raw.hdr.length != sizeof(*llc))
+		return; /* invalid message */
+
+	qentry = kmalloc(sizeof(*qentry), GFP_ATOMIC);
+	if (!qentry)
+		return;
+	qentry->link = link;
+	INIT_LIST_HEAD(&qentry->list);
+	memcpy(&qentry->msg, llc, sizeof(union smc_llc_msg));
+	spin_lock_irqsave(&lgr->llc_event_q_lock, flags);
+	list_add_tail(&qentry->list, &lgr->llc_event_q);
+	spin_unlock_irqrestore(&lgr->llc_event_q_lock, flags);
+	schedule_work(&link->lgr->llc_event_work);
 }
 
 /***************************** worker, utils *********************************/
@@ -626,12 +663,6 @@ out:
 
 int smc_llc_link_init(struct smc_link *link)
 {
-	struct smc_link_group *lgr = smc_get_lgr(link);
-	link->llc_wq = alloc_ordered_workqueue("llc_wq-%x:%x)", WQ_MEM_RECLAIM,
-					       *((u32 *)lgr->id),
-					       link->link_id);
-	if (!link->llc_wq)
-		return -ENOMEM;
 	init_completion(&link->llc_confirm);
 	init_completion(&link->llc_confirm_resp);
 	init_completion(&link->llc_add);
@@ -640,6 +671,7 @@ int smc_llc_link_init(struct smc_link *link)
 	init_completion(&link->llc_delete_rkey);
 	mutex_init(&link->llc_delete_rkey_mutex);
 	init_completion(&link->llc_testlink_resp);
+	INIT_WORK(&link->lgr->llc_event_work, smc_llc_event_work);
 	INIT_DELAYED_WORK(&link->llc_testlink_wrk, smc_llc_testlink_work);
 	return 0;
 }
@@ -663,8 +695,6 @@ void smc_llc_link_deleting(struct smc_link *link)
 /* called in worker context */
 void smc_llc_link_clear(struct smc_link *link)
 {
-	flush_workqueue(link->llc_wq);
-	destroy_workqueue(link->llc_wq);
 	complete(&link->llc_testlink_resp);
 	cancel_delayed_work_sync(&link->llc_testlink_wrk);
 	smc_wr_wakeup_reg_wait(link);
diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h
index c2c9d48d079f..9de83495ad14 100644
--- a/net/smc/smc_llc.h
+++ b/net/smc/smc_llc.h
@@ -61,6 +61,7 @@ int smc_llc_do_confirm_rkey(struct smc_link *link,
 			    struct smc_buf_desc *rmb_desc);
 int smc_llc_do_delete_rkey(struct smc_link *link,
 			   struct smc_buf_desc *rmb_desc);
+void smc_llc_event_flush(struct smc_link_group *lgr);
 int smc_llc_init(void) __init;
 
 #endif /* SMC_LLC_H */
-- 
cgit v1.2.3-59-g8ed1b


From ef79d439cd124d9fb7258bb35d44c71aec11b829 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Wed, 29 Apr 2020 17:10:47 +0200
Subject: net/smc: process llc responses in tasklet context

When llc responses are received then possible waiters for this response
are to be notified. This can be done in tasklet context, without to
use a work in the llc work queue. Move all code that handles llc
responses into smc_llc_rx_response().

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_core.h |   8 +-
 net/smc/smc_llc.c  | 216 +++++++++++++++++++++++++++--------------------------
 2 files changed, 116 insertions(+), 108 deletions(-)

diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 6548e9a06f73..d785656b3489 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -129,10 +129,10 @@ struct smc_link {
 	struct delayed_work	llc_testlink_wrk; /* testlink worker */
 	struct completion	llc_testlink_resp; /* wait for rx of testlink */
 	int			llc_testlink_time; /* testlink interval */
-	struct completion	llc_confirm_rkey; /* wait 4 rx of cnf rkey */
-	int			llc_confirm_rkey_rc; /* rc from cnf rkey msg */
-	struct completion	llc_delete_rkey; /* wait 4 rx of del rkey */
-	int			llc_delete_rkey_rc; /* rc from del rkey msg */
+	struct completion	llc_confirm_rkey_resp; /* w4 rx of cnf rkey */
+	int			llc_confirm_rkey_resp_rc; /* rc from cnf rkey */
+	struct completion	llc_delete_rkey_resp; /* w4 rx of del rkey */
+	int			llc_delete_rkey_resp_rc; /* rc from del rkey */
 	struct mutex		llc_delete_rkey_mutex; /* serialize usage */
 };
 
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index be74876a36ae..265889c8b03b 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -384,27 +384,17 @@ static void smc_llc_rx_confirm_link(struct smc_link *link,
 				    struct smc_llc_msg_confirm_link *llc)
 {
 	struct smc_link_group *lgr = smc_get_lgr(link);
-	int conf_rc;
+	int conf_rc = 0;
 
 	/* RMBE eyecatchers are not supported */
-	if (llc->hd.flags & SMC_LLC_FLAG_NO_RMBE_EYEC)
-		conf_rc = 0;
-	else
+	if (!(llc->hd.flags & SMC_LLC_FLAG_NO_RMBE_EYEC))
 		conf_rc = ENOTSUPP;
 
-	if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
-		if (lgr->role == SMC_SERV &&
-		    link->state == SMC_LNK_ACTIVATING) {
-			link->llc_confirm_resp_rc = conf_rc;
-			complete(&link->llc_confirm_resp);
-		}
-	} else {
-		if (lgr->role == SMC_CLNT &&
-		    link->state == SMC_LNK_ACTIVATING) {
-			link->llc_confirm_rc = conf_rc;
-			link->link_id = llc->link_num;
-			complete(&link->llc_confirm);
-		}
+	if (lgr->role == SMC_CLNT &&
+	    link->state == SMC_LNK_ACTIVATING) {
+		link->llc_confirm_rc = conf_rc;
+		link->link_id = llc->link_num;
+		complete(&link->llc_confirm);
 	}
 }
 
@@ -413,27 +403,22 @@ static void smc_llc_rx_add_link(struct smc_link *link,
 {
 	struct smc_link_group *lgr = smc_get_lgr(link);
 
-	if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
-		if (link->state == SMC_LNK_ACTIVATING)
-			complete(&link->llc_add_resp);
-	} else {
-		if (link->state == SMC_LNK_ACTIVATING) {
-			complete(&link->llc_add);
-			return;
-		}
+	if (link->state == SMC_LNK_ACTIVATING) {
+		complete(&link->llc_add);
+		return;
+	}
 
-		if (lgr->role == SMC_SERV) {
-			smc_llc_prep_add_link(llc, link,
-					link->smcibdev->mac[link->ibport - 1],
-					link->gid, SMC_LLC_REQ);
+	if (lgr->role == SMC_SERV) {
+		smc_llc_prep_add_link(llc, link,
+				link->smcibdev->mac[link->ibport - 1],
+				link->gid, SMC_LLC_REQ);
 
-		} else {
-			smc_llc_prep_add_link(llc, link,
-					link->smcibdev->mac[link->ibport - 1],
-					link->gid, SMC_LLC_RESP);
-		}
-		smc_llc_send_message(link, llc);
+	} else {
+		smc_llc_prep_add_link(llc, link,
+				link->smcibdev->mac[link->ibport - 1],
+				link->gid, SMC_LLC_RESP);
 	}
+	smc_llc_send_message(link, llc);
 }
 
 static void smc_llc_rx_delete_link(struct smc_link *link,
@@ -441,34 +426,24 @@ static void smc_llc_rx_delete_link(struct smc_link *link,
 {
 	struct smc_link_group *lgr = smc_get_lgr(link);
 
-	if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
-		if (lgr->role == SMC_SERV)
-			smc_lgr_schedule_free_work_fast(lgr);
+	smc_lgr_forget(lgr);
+	smc_llc_link_deleting(link);
+	if (lgr->role == SMC_SERV) {
+		/* client asks to delete this link, send request */
+		smc_llc_prep_delete_link(llc, link, SMC_LLC_REQ, true);
 	} else {
-		smc_lgr_forget(lgr);
-		smc_llc_link_deleting(link);
-		if (lgr->role == SMC_SERV) {
-			/* client asks to delete this link, send request */
-			smc_llc_prep_delete_link(llc, link, SMC_LLC_REQ, true);
-		} else {
-			/* server requests to delete this link, send response */
-			smc_llc_prep_delete_link(llc, link, SMC_LLC_RESP, true);
-		}
-		smc_llc_send_message(link, llc);
-		smc_lgr_terminate_sched(lgr);
+		/* server requests to delete this link, send response */
+		smc_llc_prep_delete_link(llc, link, SMC_LLC_RESP, true);
 	}
+	smc_llc_send_message(link, llc);
+	smc_lgr_terminate_sched(lgr);
 }
 
 static void smc_llc_rx_test_link(struct smc_link *link,
 				 struct smc_llc_msg_test_link *llc)
 {
-	if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
-		if (link->state == SMC_LNK_ACTIVE)
-			complete(&link->llc_testlink_resp);
-	} else {
-		llc->hd.flags |= SMC_LLC_FLAG_RESP;
-		smc_llc_send_message(link, llc);
-	}
+	llc->hd.flags |= SMC_LLC_FLAG_RESP;
+	smc_llc_send_message(link, llc);
 }
 
 static void smc_llc_rx_confirm_rkey(struct smc_link *link,
@@ -476,34 +451,24 @@ static void smc_llc_rx_confirm_rkey(struct smc_link *link,
 {
 	int rc;
 
-	if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
-		link->llc_confirm_rkey_rc = llc->hd.flags &
-					    SMC_LLC_FLAG_RKEY_NEG;
-		complete(&link->llc_confirm_rkey);
-	} else {
-		rc = smc_rtoken_add(link,
-				    llc->rtoken[0].rmb_vaddr,
-				    llc->rtoken[0].rmb_key);
+	rc = smc_rtoken_add(link,
+			    llc->rtoken[0].rmb_vaddr,
+			    llc->rtoken[0].rmb_key);
 
-		/* ignore rtokens for other links, we have only one link */
+	/* ignore rtokens for other links, we have only one link */
 
-		llc->hd.flags |= SMC_LLC_FLAG_RESP;
-		if (rc < 0)
-			llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
-		smc_llc_send_message(link, llc);
-	}
+	llc->hd.flags |= SMC_LLC_FLAG_RESP;
+	if (rc < 0)
+		llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
+	smc_llc_send_message(link, llc);
 }
 
 static void smc_llc_rx_confirm_rkey_cont(struct smc_link *link,
 				      struct smc_llc_msg_confirm_rkey_cont *llc)
 {
-	if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
-		/* unused as long as we don't send this type of msg */
-	} else {
-		/* ignore rtokens for other links, we have only one link */
-		llc->hd.flags |= SMC_LLC_FLAG_RESP;
-		smc_llc_send_message(link, llc);
-	}
+	/* ignore rtokens for other links, we have only one link */
+	llc->hd.flags |= SMC_LLC_FLAG_RESP;
+	smc_llc_send_message(link, llc);
 }
 
 static void smc_llc_rx_delete_rkey(struct smc_link *link,
@@ -512,25 +477,19 @@ static void smc_llc_rx_delete_rkey(struct smc_link *link,
 	u8 err_mask = 0;
 	int i, max;
 
-	if (llc->hd.flags & SMC_LLC_FLAG_RESP) {
-		link->llc_delete_rkey_rc = llc->hd.flags &
-					    SMC_LLC_FLAG_RKEY_NEG;
-		complete(&link->llc_delete_rkey);
-	} else {
-		max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX);
-		for (i = 0; i < max; i++) {
-			if (smc_rtoken_delete(link, llc->rkey[i]))
-				err_mask |= 1 << (SMC_LLC_DEL_RKEY_MAX - 1 - i);
-		}
-
-		if (err_mask) {
-			llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
-			llc->err_mask = err_mask;
-		}
+	max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX);
+	for (i = 0; i < max; i++) {
+		if (smc_rtoken_delete(link, llc->rkey[i]))
+			err_mask |= 1 << (SMC_LLC_DEL_RKEY_MAX - 1 - i);
+	}
 
-		llc->hd.flags |= SMC_LLC_FLAG_RESP;
-		smc_llc_send_message(link, llc);
+	if (err_mask) {
+		llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
+		llc->err_mask = err_mask;
 	}
+
+	llc->hd.flags |= SMC_LLC_FLAG_RESP;
+	smc_llc_send_message(link, llc);
 }
 
 /* flush the llc event queue */
@@ -601,6 +560,49 @@ again:
 	spin_unlock_bh(&lgr->llc_event_q_lock);
 }
 
+/* process llc responses in tasklet context */
+static void smc_llc_rx_response(struct smc_link *link, union smc_llc_msg *llc)
+{
+	int rc = 0;
+
+	switch (llc->raw.hdr.common.type) {
+	case SMC_LLC_TEST_LINK:
+		if (link->state == SMC_LNK_ACTIVE)
+			complete(&link->llc_testlink_resp);
+		break;
+	case SMC_LLC_CONFIRM_LINK:
+		if (!(llc->raw.hdr.flags & SMC_LLC_FLAG_NO_RMBE_EYEC))
+			rc = ENOTSUPP;
+		if (link->lgr->role == SMC_SERV &&
+		    link->state == SMC_LNK_ACTIVATING) {
+			link->llc_confirm_resp_rc = rc;
+			complete(&link->llc_confirm_resp);
+		}
+		break;
+	case SMC_LLC_ADD_LINK:
+		if (link->state == SMC_LNK_ACTIVATING)
+			complete(&link->llc_add_resp);
+		break;
+	case SMC_LLC_DELETE_LINK:
+		if (link->lgr->role == SMC_SERV)
+			smc_lgr_schedule_free_work_fast(link->lgr);
+		break;
+	case SMC_LLC_CONFIRM_RKEY:
+		link->llc_confirm_rkey_resp_rc = llc->raw.hdr.flags &
+						 SMC_LLC_FLAG_RKEY_NEG;
+		complete(&link->llc_confirm_rkey_resp);
+		break;
+	case SMC_LLC_CONFIRM_RKEY_CONT:
+		/* unused as long as we don't send this type of msg */
+		break;
+	case SMC_LLC_DELETE_RKEY:
+		link->llc_delete_rkey_resp_rc = llc->raw.hdr.flags &
+						SMC_LLC_FLAG_RKEY_NEG;
+		complete(&link->llc_delete_rkey_resp);
+		break;
+	}
+}
+
 /* copy received msg and add it to the event queue */
 static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
 {
@@ -615,6 +617,12 @@ static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
 	if (llc->raw.hdr.length != sizeof(*llc))
 		return; /* invalid message */
 
+	/* process responses immediately */
+	if (llc->raw.hdr.flags & SMC_LLC_FLAG_RESP) {
+		smc_llc_rx_response(link, llc);
+		return;
+	}
+
 	qentry = kmalloc(sizeof(*qentry), GFP_ATOMIC);
 	if (!qentry)
 		return;
@@ -667,8 +675,8 @@ int smc_llc_link_init(struct smc_link *link)
 	init_completion(&link->llc_confirm_resp);
 	init_completion(&link->llc_add);
 	init_completion(&link->llc_add_resp);
-	init_completion(&link->llc_confirm_rkey);
-	init_completion(&link->llc_delete_rkey);
+	init_completion(&link->llc_confirm_rkey_resp);
+	init_completion(&link->llc_delete_rkey_resp);
 	mutex_init(&link->llc_delete_rkey_mutex);
 	init_completion(&link->llc_testlink_resp);
 	INIT_WORK(&link->lgr->llc_event_work, smc_llc_event_work);
@@ -708,14 +716,14 @@ int smc_llc_do_confirm_rkey(struct smc_link *link,
 	int rc;
 
 	/* protected by mutex smc_create_lgr_pending */
-	reinit_completion(&link->llc_confirm_rkey);
+	reinit_completion(&link->llc_confirm_rkey_resp);
 	rc = smc_llc_send_confirm_rkey(link, rmb_desc);
 	if (rc)
 		return rc;
 	/* receive CONFIRM RKEY response from server over RoCE fabric */
-	rc = wait_for_completion_interruptible_timeout(&link->llc_confirm_rkey,
-						       SMC_LLC_WAIT_TIME);
-	if (rc <= 0 || link->llc_confirm_rkey_rc)
+	rc = wait_for_completion_interruptible_timeout(
+			&link->llc_confirm_rkey_resp, SMC_LLC_WAIT_TIME);
+	if (rc <= 0 || link->llc_confirm_rkey_resp_rc)
 		return -EFAULT;
 	return 0;
 }
@@ -729,14 +737,14 @@ int smc_llc_do_delete_rkey(struct smc_link *link,
 	mutex_lock(&link->llc_delete_rkey_mutex);
 	if (link->state != SMC_LNK_ACTIVE)
 		goto out;
-	reinit_completion(&link->llc_delete_rkey);
+	reinit_completion(&link->llc_delete_rkey_resp);
 	rc = smc_llc_send_delete_rkey(link, rmb_desc);
 	if (rc)
 		goto out;
 	/* receive DELETE RKEY response from server over RoCE fabric */
-	rc = wait_for_completion_interruptible_timeout(&link->llc_delete_rkey,
-						       SMC_LLC_WAIT_TIME);
-	if (rc <= 0 || link->llc_delete_rkey_rc)
+	rc = wait_for_completion_interruptible_timeout(
+			&link->llc_delete_rkey_resp, SMC_LLC_WAIT_TIME);
+	if (rc <= 0 || link->llc_delete_rkey_resp_rc)
 		rc = -EFAULT;
 	else
 		rc = 0;
-- 
cgit v1.2.3-59-g8ed1b


From faca536008375bece23783e7382b5d0356c13ba5 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Wed, 29 Apr 2020 17:10:48 +0200
Subject: net/smc: use mutex instead of rwlock_t to protect buffers

The locks for sndbufs and rmbs are never used from atomic context. Using
a mutex for these locks will allow to nest locks with other mutexes.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_core.c | 22 +++++++++++-----------
 net/smc/smc_core.h |  4 ++--
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index a1463da14614..8a43d2948493 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -385,8 +385,8 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
 	lgr->freefast = 0;
 	lgr->freeing = 0;
 	lgr->vlan_id = ini->vlan_id;
-	rwlock_init(&lgr->sndbufs_lock);
-	rwlock_init(&lgr->rmbs_lock);
+	mutex_init(&lgr->sndbufs_lock);
+	mutex_init(&lgr->rmbs_lock);
 	rwlock_init(&lgr->conns_lock);
 	for (i = 0; i < SMC_RMBE_SIZES; i++) {
 		INIT_LIST_HEAD(&lgr->sndbufs[i]);
@@ -456,9 +456,9 @@ static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc,
 	}
 	if (rmb_desc->is_reg_err) {
 		/* buf registration failed, reuse not possible */
-		write_lock_bh(&lgr->rmbs_lock);
+		mutex_lock(&lgr->rmbs_lock);
 		list_del(&rmb_desc->list);
-		write_unlock_bh(&lgr->rmbs_lock);
+		mutex_unlock(&lgr->rmbs_lock);
 
 		smc_buf_free(lgr, true, rmb_desc);
 	} else {
@@ -1059,19 +1059,19 @@ int smc_uncompress_bufsize(u8 compressed)
  * buffer size; if not available, return NULL
  */
 static struct smc_buf_desc *smc_buf_get_slot(int compressed_bufsize,
-					     rwlock_t *lock,
+					     struct mutex *lock,
 					     struct list_head *buf_list)
 {
 	struct smc_buf_desc *buf_slot;
 
-	read_lock_bh(lock);
+	mutex_lock(lock);
 	list_for_each_entry(buf_slot, buf_list, list) {
 		if (cmpxchg(&buf_slot->used, 0, 1) == 0) {
-			read_unlock_bh(lock);
+			mutex_unlock(lock);
 			return buf_slot;
 		}
 	}
-	read_unlock_bh(lock);
+	mutex_unlock(lock);
 	return NULL;
 }
 
@@ -1220,8 +1220,8 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
 	struct smc_link_group *lgr = conn->lgr;
 	struct list_head *buf_list;
 	int bufsize, bufsize_short;
+	struct mutex *lock;	/* lock buffer list */
 	int sk_buf_size;
-	rwlock_t *lock;
 
 	if (is_rmb)
 		/* use socket recv buffer size (w/o overhead) as start value */
@@ -1262,9 +1262,9 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
 			continue;
 
 		buf_desc->used = 1;
-		write_lock_bh(lock);
+		mutex_lock(lock);
 		list_add(&buf_desc->list, buf_list);
-		write_unlock_bh(lock);
+		mutex_unlock(lock);
 		break; /* found */
 	}
 
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index d785656b3489..379ced490c49 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -205,9 +205,9 @@ struct smc_link_group {
 	unsigned short		vlan_id;	/* vlan id of link group */
 
 	struct list_head	sndbufs[SMC_RMBE_SIZES];/* tx buffers */
-	rwlock_t		sndbufs_lock;	/* protects tx buffers */
+	struct mutex		sndbufs_lock;	/* protects tx buffers */
 	struct list_head	rmbs[SMC_RMBE_SIZES];	/* rx buffers */
-	rwlock_t		rmbs_lock;	/* protects rx buffers */
+	struct mutex		rmbs_lock;	/* protects rx buffers */
 
 	u8			id[SMC_LGR_ID_SIZE];	/* unique lgr id */
 	struct delayed_work	free_work;	/* delayed freeing of an lgr */
-- 
cgit v1.2.3-59-g8ed1b


From 00a049cfde95931c6832edad19d9a4be441cacf5 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Wed, 29 Apr 2020 17:10:49 +0200
Subject: net/smc: move llc layer related init and clear into smc_llc.c

Introduce smc_llc_lgr_init() and smc_llc_lgr_clear() to implement all
llc layer specific initialization and cleanup in module smc_llc.c.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/af_smc.c   |  6 ++----
 net/smc/smc_core.c |  6 +++---
 net/smc/smc_core.h |  2 ++
 net/smc/smc_llc.c  | 26 +++++++++++++++++++++-----
 net/smc/smc_llc.h  |  5 +++--
 5 files changed, 31 insertions(+), 14 deletions(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index e39f6aedd3bd..e859e3f420d9 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -381,7 +381,6 @@ static int smcr_lgr_reg_rmbs(struct smc_link_group *lgr,
 
 static int smcr_clnt_conf_first_link(struct smc_sock *smc)
 {
-	struct net *net = sock_net(smc->clcsock->sk);
 	struct smc_link *link = smc->conn.lnk;
 	int rest;
 	int rc;
@@ -433,7 +432,7 @@ static int smcr_clnt_conf_first_link(struct smc_sock *smc)
 	if (rc < 0)
 		return SMC_CLC_DECL_TIMEOUT_AL;
 
-	smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time);
+	smc_llc_link_active(link);
 
 	return 0;
 }
@@ -1019,7 +1018,6 @@ void smc_close_non_accepted(struct sock *sk)
 
 static int smcr_serv_conf_first_link(struct smc_sock *smc)
 {
-	struct net *net = sock_net(smc->clcsock->sk);
 	struct smc_link *link = smc->conn.lnk;
 	int rest;
 	int rc;
@@ -1065,7 +1063,7 @@ static int smcr_serv_conf_first_link(struct smc_sock *smc)
 		return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_AL : rc;
 	}
 
-	smc_llc_link_active(link, net->ipv4.sysctl_tcp_keepalive_time);
+	smc_llc_link_active(link);
 
 	return 0;
 }
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 8a43d2948493..db49f8cd5c95 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -412,8 +412,8 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
 		lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
 		memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer,
 		       SMC_SYSTEMID_LEN);
-		INIT_LIST_HEAD(&lgr->llc_event_q);
-		spin_lock_init(&lgr->llc_event_q_lock);
+		smc_llc_lgr_init(lgr, smc);
+
 		link_idx = SMC_SINGLE_LINK;
 		lnk = &lgr->lnk[link_idx];
 		rc = smcr_link_init(lgr, lnk, link_idx, ini);
@@ -614,7 +614,7 @@ static void smc_lgr_free(struct smc_link_group *lgr)
 			if (lgr->lnk[i].state != SMC_LNK_UNUSED)
 				smcr_link_clear(&lgr->lnk[i]);
 		}
-		smc_llc_event_flush(lgr);
+		smc_llc_lgr_clear(lgr);
 		if (!atomic_dec_return(&lgr_cnt))
 			wake_up(&lgrs_deleted);
 	}
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 379ced490c49..b5781511063d 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -238,6 +238,8 @@ struct smc_link_group {
 						/* protects llc_event_q */
 			struct work_struct	llc_event_work;
 						/* llc event worker */
+			int			llc_testlink_time;
+						/* link keep alive time */
 		};
 		struct { /* SMC-D */
 			u64			peer_gid;
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 265889c8b03b..e715dd6735ee 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -493,7 +493,7 @@ static void smc_llc_rx_delete_rkey(struct smc_link *link,
 }
 
 /* flush the llc event queue */
-void smc_llc_event_flush(struct smc_link_group *lgr)
+static void smc_llc_event_flush(struct smc_link_group *lgr)
 {
 	struct smc_llc_qentry *qentry, *q;
 
@@ -669,6 +669,23 @@ out:
 	schedule_delayed_work(&link->llc_testlink_wrk, next_interval);
 }
 
+void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc)
+{
+	struct net *net = sock_net(smc->clcsock->sk);
+
+	INIT_WORK(&lgr->llc_event_work, smc_llc_event_work);
+	INIT_LIST_HEAD(&lgr->llc_event_q);
+	spin_lock_init(&lgr->llc_event_q_lock);
+	lgr->llc_testlink_time = net->ipv4.sysctl_tcp_keepalive_time;
+}
+
+/* called after lgr was removed from lgr_list */
+void smc_llc_lgr_clear(struct smc_link_group *lgr)
+{
+	smc_llc_event_flush(lgr);
+	cancel_work_sync(&lgr->llc_event_work);
+}
+
 int smc_llc_link_init(struct smc_link *link)
 {
 	init_completion(&link->llc_confirm);
@@ -679,16 +696,15 @@ int smc_llc_link_init(struct smc_link *link)
 	init_completion(&link->llc_delete_rkey_resp);
 	mutex_init(&link->llc_delete_rkey_mutex);
 	init_completion(&link->llc_testlink_resp);
-	INIT_WORK(&link->lgr->llc_event_work, smc_llc_event_work);
 	INIT_DELAYED_WORK(&link->llc_testlink_wrk, smc_llc_testlink_work);
 	return 0;
 }
 
-void smc_llc_link_active(struct smc_link *link, int testlink_time)
+void smc_llc_link_active(struct smc_link *link)
 {
 	link->state = SMC_LNK_ACTIVE;
-	if (testlink_time) {
-		link->llc_testlink_time = testlink_time * HZ;
+	if (link->lgr->llc_testlink_time) {
+		link->llc_testlink_time = link->lgr->llc_testlink_time * HZ;
 		schedule_delayed_work(&link->llc_testlink_wrk,
 				      link->llc_testlink_time);
 	}
diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h
index 9de83495ad14..66063f22166b 100644
--- a/net/smc/smc_llc.h
+++ b/net/smc/smc_llc.h
@@ -53,15 +53,16 @@ int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[],
 			  enum smc_llc_reqresp reqresp);
 int smc_llc_send_delete_link(struct smc_link *link,
 			     enum smc_llc_reqresp reqresp, bool orderly);
+void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc);
+void smc_llc_lgr_clear(struct smc_link_group *lgr);
 int smc_llc_link_init(struct smc_link *link);
-void smc_llc_link_active(struct smc_link *link, int testlink_time);
+void smc_llc_link_active(struct smc_link *link);
 void smc_llc_link_deleting(struct smc_link *link);
 void smc_llc_link_clear(struct smc_link *link);
 int smc_llc_do_confirm_rkey(struct smc_link *link,
 			    struct smc_buf_desc *rmb_desc);
 int smc_llc_do_delete_rkey(struct smc_link *link,
 			   struct smc_buf_desc *rmb_desc);
-void smc_llc_event_flush(struct smc_link_group *lgr);
 int smc_llc_init(void) __init;
 
 #endif /* SMC_LLC_H */
-- 
cgit v1.2.3-59-g8ed1b


From e3450b79dfe47632ffa65042c6d5a6b48263da4e Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin@isovalent.com>
Date: Wed, 29 Apr 2020 15:45:04 +0100
Subject: tools: bpftool: For "feature probe" define "full_mode" bool as global

The "full_mode" variable used for switching between full or partial
feature probing (i.e. with or without probing helpers that will log
warnings in kernel logs) was piped from the main do_probe() function
down to probe_helpers_for_progtype(), where it is needed.

Define it as a global variable: the calls will be more readable, and if
other similar flags were to be used in the future, we could use global
variables as well instead of extending again the list of arguments with
new flags.

Signed-off-by: Quentin Monnet <quentin@isovalent.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20200429144506.8999-2-quentin@isovalent.com
---
 tools/bpf/bpftool/feature.c | 15 +++++++--------
 1 file changed, 7 insertions(+), 8 deletions(-)

diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
index 88718ee6a438..59e4cb44efbc 100644
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -35,6 +35,8 @@ static const char * const helper_name[] = {
 
 #undef BPF_HELPER_MAKE_ENTRY
 
+static bool full_mode;
+
 /* Miscellaneous utility functions */
 
 static bool check_procfs(void)
@@ -540,8 +542,7 @@ probe_helper_for_progtype(enum bpf_prog_type prog_type, bool supported_type,
 
 static void
 probe_helpers_for_progtype(enum bpf_prog_type prog_type, bool supported_type,
-			   const char *define_prefix, bool full_mode,
-			   __u32 ifindex)
+			   const char *define_prefix, __u32 ifindex)
 {
 	const char *ptype_name = prog_type_name[prog_type];
 	char feat_name[128];
@@ -678,8 +679,7 @@ static void section_map_types(const char *define_prefix, __u32 ifindex)
 }
 
 static void
-section_helpers(bool *supported_types, const char *define_prefix,
-		bool full_mode, __u32 ifindex)
+section_helpers(bool *supported_types, const char *define_prefix, __u32 ifindex)
 {
 	unsigned int i;
 
@@ -704,8 +704,8 @@ section_helpers(bool *supported_types, const char *define_prefix,
 		       define_prefix, define_prefix, define_prefix,
 		       define_prefix);
 	for (i = BPF_PROG_TYPE_UNSPEC + 1; i < ARRAY_SIZE(prog_type_name); i++)
-		probe_helpers_for_progtype(i, supported_types[i],
-					   define_prefix, full_mode, ifindex);
+		probe_helpers_for_progtype(i, supported_types[i], define_prefix,
+					   ifindex);
 
 	print_end_section();
 }
@@ -725,7 +725,6 @@ static int do_probe(int argc, char **argv)
 	enum probe_component target = COMPONENT_UNSPEC;
 	const char *define_prefix = NULL;
 	bool supported_types[128] = {};
-	bool full_mode = false;
 	__u32 ifindex = 0;
 	char *ifname;
 
@@ -803,7 +802,7 @@ static int do_probe(int argc, char **argv)
 		goto exit_close_json;
 	section_program_types(supported_types, define_prefix, ifindex);
 	section_map_types(define_prefix, ifindex);
-	section_helpers(supported_types, define_prefix, full_mode, ifindex);
+	section_helpers(supported_types, define_prefix, ifindex);
 	section_misc(define_prefix, ifindex);
 
 exit_close_json:
-- 
cgit v1.2.3-59-g8ed1b


From cf9bf714523dbbc97953be6de6ca14d57d4f8a21 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin@isovalent.com>
Date: Wed, 29 Apr 2020 15:45:05 +0100
Subject: tools: bpftool: Allow unprivileged users to probe features

There is demand for a way to identify what BPF helper functions are
available to unprivileged users. To do so, allow unprivileged users to
run "bpftool feature probe" to list BPF-related features. This will only
show features accessible to those users, and may not reflect the full
list of features available (to administrators) on the system.

To avoid the case where bpftool is inadvertently run as non-root and
would list only a subset of the features supported by the system when it
would be expected to list all of them, running as unprivileged is gated
behind the "unprivileged" keyword passed to the command line. When used
by a privileged user, this keyword allows to drop the CAP_SYS_ADMIN and
to list the features available to unprivileged users. Note that this
addsd a dependency on libpcap for compiling bpftool.

Note that there is no particular reason why the probes were restricted
to root, other than the fact I did not need them for unprivileged and
did not bother with the additional checks at the time probes were added.

Signed-off-by: Quentin Monnet <quentin@isovalent.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20200429144506.8999-3-quentin@isovalent.com
---
 .../bpf/bpftool/Documentation/bpftool-feature.rst  |  10 +-
 tools/bpf/bpftool/Makefile                         |   2 +-
 tools/bpf/bpftool/bash-completion/bpftool          |   2 +-
 tools/bpf/bpftool/feature.c                        | 102 ++++++++++++++++++---
 4 files changed, 100 insertions(+), 16 deletions(-)

diff --git a/tools/bpf/bpftool/Documentation/bpftool-feature.rst b/tools/bpf/bpftool/Documentation/bpftool-feature.rst
index b04156cfd7a3..ca085944e4cf 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-feature.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-feature.rst
@@ -19,7 +19,7 @@ SYNOPSIS
 FEATURE COMMANDS
 ================
 
-|	**bpftool** **feature probe** [*COMPONENT*] [**full**] [**macros** [**prefix** *PREFIX*]]
+|	**bpftool** **feature probe** [*COMPONENT*] [**full**] [**unprivileged**] [**macros** [**prefix** *PREFIX*]]
 |	**bpftool** **feature help**
 |
 |	*COMPONENT* := { **kernel** | **dev** *NAME* }
@@ -49,6 +49,14 @@ DESCRIPTION
 		  Keyword **kernel** can be omitted. If no probe target is
 		  specified, probing the kernel is the default behaviour.
 
+		  When the **unprivileged** keyword is used, bpftool will dump
+		  only the features available to a user who does not have the
+		  **CAP_SYS_ADMIN** capability set. The features available in
+		  that case usually represent a small subset of the parameters
+		  supported by the system. Unprivileged users MUST use the
+		  **unprivileged** keyword: This is to avoid misdetection if
+		  bpftool is inadvertently run as non-root, for example.
+
 	**bpftool feature probe dev** *NAME* [**full**] [**macros** [**prefix** *PREFIX*]]
 		  Probe network device for supported eBPF features and dump
 		  results to the console.
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index f584d1fdfc64..89d7962a4a44 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -55,7 +55,7 @@ ifneq ($(EXTRA_LDFLAGS),)
 LDFLAGS += $(EXTRA_LDFLAGS)
 endif
 
-LIBS = $(LIBBPF) -lelf -lz
+LIBS = $(LIBBPF) -lelf -lz -lcap
 
 INSTALL ?= install
 RM ?= rm -f
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index c033c3329f73..fc989ead7313 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -1079,7 +1079,7 @@ _bpftool()
                         COMPREPLY+=( $( compgen -W 'macros' -- "$cur" ) )
                     fi
                     _bpftool_one_of_list 'kernel dev'
-                    _bpftool_once_attr 'full'
+                    _bpftool_once_attr 'full unprivileged'
                     return 0
                     ;;
                 *)
diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
index 59e4cb44efbc..952f4b1987c0 100644
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -6,6 +6,7 @@
 #include <string.h>
 #include <unistd.h>
 #include <net/if.h>
+#include <sys/capability.h>
 #include <sys/utsname.h>
 #include <sys/vfs.h>
 
@@ -36,6 +37,7 @@ static const char * const helper_name[] = {
 #undef BPF_HELPER_MAKE_ENTRY
 
 static bool full_mode;
+static bool run_as_unprivileged;
 
 /* Miscellaneous utility functions */
 
@@ -473,6 +475,11 @@ probe_prog_type(enum bpf_prog_type prog_type, bool *supported_types,
 		}
 
 	res = bpf_probe_prog_type(prog_type, ifindex);
+	/* Probe may succeed even if program load fails, for unprivileged users
+	 * check that we did not fail because of insufficient permissions
+	 */
+	if (run_as_unprivileged && errno == EPERM)
+		res = false;
 
 	supported_types[prog_type] |= res;
 
@@ -501,6 +508,10 @@ probe_map_type(enum bpf_map_type map_type, const char *define_prefix,
 
 	res = bpf_probe_map_type(map_type, ifindex);
 
+	/* Probe result depends on the success of map creation, no additional
+	 * check required for unprivileged users
+	 */
+
 	maxlen = sizeof(plain_desc) - strlen(plain_comment) - 1;
 	if (strlen(map_type_name[map_type]) > maxlen) {
 		p_info("map type name too long");
@@ -520,12 +531,17 @@ probe_helper_for_progtype(enum bpf_prog_type prog_type, bool supported_type,
 			  const char *define_prefix, unsigned int id,
 			  const char *ptype_name, __u32 ifindex)
 {
-	bool res;
+	bool res = false;
 
-	if (!supported_type)
-		res = false;
-	else
+	if (supported_type) {
 		res = bpf_probe_helper(id, prog_type, ifindex);
+		/* Probe may succeed even if program load fails, for
+		 * unprivileged users check that we did not fail because of
+		 * insufficient permissions
+		 */
+		if (run_as_unprivileged && errno == EPERM)
+			res = false;
+	}
 
 	if (json_output) {
 		if (res)
@@ -720,6 +736,65 @@ static void section_misc(const char *define_prefix, __u32 ifindex)
 	print_end_section();
 }
 
+static int handle_perms(void)
+{
+	cap_value_t cap_list[1] = { CAP_SYS_ADMIN };
+	bool has_sys_admin_cap = false;
+	cap_flag_value_t val;
+	int res = -1;
+	cap_t caps;
+
+	caps = cap_get_proc();
+	if (!caps) {
+		p_err("failed to get capabilities for process: %s",
+		      strerror(errno));
+		return -1;
+	}
+
+	if (cap_get_flag(caps, CAP_SYS_ADMIN, CAP_EFFECTIVE, &val)) {
+		p_err("bug: failed to retrieve CAP_SYS_ADMIN status");
+		goto exit_free;
+	}
+	if (val == CAP_SET)
+		has_sys_admin_cap = true;
+
+	if (!run_as_unprivileged && !has_sys_admin_cap) {
+		p_err("full feature probing requires CAP_SYS_ADMIN, run as root or use 'unprivileged'");
+		goto exit_free;
+	}
+
+	if ((run_as_unprivileged && !has_sys_admin_cap) ||
+	    (!run_as_unprivileged && has_sys_admin_cap)) {
+		/* We are all good, exit now */
+		res = 0;
+		goto exit_free;
+	}
+
+	/* if (run_as_unprivileged && has_sys_admin_cap), drop CAP_SYS_ADMIN */
+
+	if (cap_set_flag(caps, CAP_EFFECTIVE, ARRAY_SIZE(cap_list), cap_list,
+			 CAP_CLEAR)) {
+		p_err("bug: failed to clear CAP_SYS_ADMIN from capabilities");
+		goto exit_free;
+	}
+
+	if (cap_set_proc(caps)) {
+		p_err("failed to drop CAP_SYS_ADMIN: %s", strerror(errno));
+		goto exit_free;
+	}
+
+	res = 0;
+
+exit_free:
+	if (cap_free(caps) && !res) {
+		p_err("failed to clear storage object for capabilities: %s",
+		      strerror(errno));
+		res = -1;
+	}
+
+	return res;
+}
+
 static int do_probe(int argc, char **argv)
 {
 	enum probe_component target = COMPONENT_UNSPEC;
@@ -728,14 +803,6 @@ static int do_probe(int argc, char **argv)
 	__u32 ifindex = 0;
 	char *ifname;
 
-	/* Detection assumes user has sufficient privileges (CAP_SYS_ADMIN).
-	 * Let's approximate, and restrict usage to root user only.
-	 */
-	if (geteuid()) {
-		p_err("please run this command as root user");
-		return -1;
-	}
-
 	set_max_rlimit();
 
 	while (argc) {
@@ -784,6 +851,9 @@ static int do_probe(int argc, char **argv)
 			if (!REQ_ARGS(1))
 				return -1;
 			define_prefix = GET_ARG();
+		} else if (is_prefix(*argv, "unprivileged")) {
+			run_as_unprivileged = true;
+			NEXT_ARG();
 		} else {
 			p_err("expected no more arguments, 'kernel', 'dev', 'macros' or 'prefix', got: '%s'?",
 			      *argv);
@@ -791,6 +861,12 @@ static int do_probe(int argc, char **argv)
 		}
 	}
 
+	/* Full feature detection requires CAP_SYS_ADMIN privilege.
+	 * Let's approximate, and warn if user is not root.
+	 */
+	if (handle_perms())
+		return -1;
+
 	if (json_output) {
 		define_prefix = NULL;
 		jsonw_start_object(json_wtr);
@@ -821,7 +897,7 @@ static int do_help(int argc, char **argv)
 	}
 
 	fprintf(stderr,
-		"Usage: %s %s probe [COMPONENT] [full] [macros [prefix PREFIX]]\n"
+		"Usage: %s %s probe [COMPONENT] [full] [unprivileged] [macros [prefix PREFIX]]\n"
 		"       %s %s help\n"
 		"\n"
 		"       COMPONENT := { kernel | dev NAME }\n"
-- 
cgit v1.2.3-59-g8ed1b


From 0b3b9ca3d154486baa08a41cbc62fde67ba8c6c3 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin@isovalent.com>
Date: Wed, 29 Apr 2020 15:45:06 +0100
Subject: tools: bpftool: Make libcap dependency optional

The new libcap dependency is not used for an essential feature of
bpftool, and we could imagine building the tool without checks on
CAP_SYS_ADMIN by disabling probing features as an unprivileged users.

Make it so, in order to avoid a hard dependency on libcap, and to ease
packaging/embedding of bpftool.

Signed-off-by: Quentin Monnet <quentin@isovalent.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20200429144506.8999-4-quentin@isovalent.com
---
 .../bpf/bpftool/Documentation/bpftool-feature.rst  |  4 +++-
 tools/bpf/bpftool/Makefile                         | 13 +++++++----
 tools/bpf/bpftool/feature.c                        | 26 ++++++++++++++++++++++
 3 files changed, 38 insertions(+), 5 deletions(-)

diff --git a/tools/bpf/bpftool/Documentation/bpftool-feature.rst b/tools/bpf/bpftool/Documentation/bpftool-feature.rst
index ca085944e4cf..1fa755f55e0c 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-feature.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-feature.rst
@@ -55,7 +55,9 @@ DESCRIPTION
 		  that case usually represent a small subset of the parameters
 		  supported by the system. Unprivileged users MUST use the
 		  **unprivileged** keyword: This is to avoid misdetection if
-		  bpftool is inadvertently run as non-root, for example.
+		  bpftool is inadvertently run as non-root, for example. This
+		  keyword is unavailable if bpftool was compiled without
+		  libcap.
 
 	**bpftool feature probe dev** *NAME* [**full**] [**macros** [**prefix** *PREFIX*]]
 		  Probe network device for supported eBPF features and dump
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 89d7962a4a44..2759f9cc3289 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -55,16 +55,15 @@ ifneq ($(EXTRA_LDFLAGS),)
 LDFLAGS += $(EXTRA_LDFLAGS)
 endif
 
-LIBS = $(LIBBPF) -lelf -lz -lcap
-
 INSTALL ?= install
 RM ?= rm -f
 CLANG ?= clang
 
 FEATURE_USER = .bpftool
-FEATURE_TESTS = libbfd disassembler-four-args reallocarray zlib \
+FEATURE_TESTS = libbfd disassembler-four-args reallocarray zlib libcap \
+	clang-bpf-global-var
+FEATURE_DISPLAY = libbfd disassembler-four-args zlib libcap \
 	clang-bpf-global-var
-FEATURE_DISPLAY = libbfd disassembler-four-args zlib clang-bpf-global-var
 
 check_feat := 1
 NON_CHECK_FEAT_TARGETS := clean uninstall doc doc-clean doc-install doc-uninstall
@@ -90,6 +89,12 @@ ifeq ($(feature-reallocarray), 0)
 CFLAGS += -DCOMPAT_NEED_REALLOCARRAY
 endif
 
+LIBS = $(LIBBPF) -lelf -lz
+ifeq ($(feature-libcap), 1)
+CFLAGS += -DUSE_LIBCAP
+LIBS += -lcap
+endif
+
 include $(wildcard $(OUTPUT)*.d)
 
 all: $(OUTPUT)bpftool
diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
index 952f4b1987c0..f54347f55ee0 100644
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -6,7 +6,9 @@
 #include <string.h>
 #include <unistd.h>
 #include <net/if.h>
+#ifdef USE_LIBCAP
 #include <sys/capability.h>
+#endif
 #include <sys/utsname.h>
 #include <sys/vfs.h>
 
@@ -37,7 +39,9 @@ static const char * const helper_name[] = {
 #undef BPF_HELPER_MAKE_ENTRY
 
 static bool full_mode;
+#ifdef USE_LIBCAP
 static bool run_as_unprivileged;
+#endif
 
 /* Miscellaneous utility functions */
 
@@ -475,11 +479,13 @@ probe_prog_type(enum bpf_prog_type prog_type, bool *supported_types,
 		}
 
 	res = bpf_probe_prog_type(prog_type, ifindex);
+#ifdef USE_LIBCAP
 	/* Probe may succeed even if program load fails, for unprivileged users
 	 * check that we did not fail because of insufficient permissions
 	 */
 	if (run_as_unprivileged && errno == EPERM)
 		res = false;
+#endif
 
 	supported_types[prog_type] |= res;
 
@@ -535,12 +541,14 @@ probe_helper_for_progtype(enum bpf_prog_type prog_type, bool supported_type,
 
 	if (supported_type) {
 		res = bpf_probe_helper(id, prog_type, ifindex);
+#ifdef USE_LIBCAP
 		/* Probe may succeed even if program load fails, for
 		 * unprivileged users check that we did not fail because of
 		 * insufficient permissions
 		 */
 		if (run_as_unprivileged && errno == EPERM)
 			res = false;
+#endif
 	}
 
 	if (json_output) {
@@ -738,6 +746,7 @@ static void section_misc(const char *define_prefix, __u32 ifindex)
 
 static int handle_perms(void)
 {
+#ifdef USE_LIBCAP
 	cap_value_t cap_list[1] = { CAP_SYS_ADMIN };
 	bool has_sys_admin_cap = false;
 	cap_flag_value_t val;
@@ -793,6 +802,18 @@ exit_free:
 	}
 
 	return res;
+#else
+	/* Detection assumes user has sufficient privileges (CAP_SYS_ADMIN).
+	 * We do not use libpcap so let's approximate, and restrict usage to
+	 * root user only.
+	 */
+	if (geteuid()) {
+		p_err("full feature probing requires root privileges");
+		return -1;
+	}
+
+	return 0;
+#endif /* USE_LIBCAP */
 }
 
 static int do_probe(int argc, char **argv)
@@ -852,8 +873,13 @@ static int do_probe(int argc, char **argv)
 				return -1;
 			define_prefix = GET_ARG();
 		} else if (is_prefix(*argv, "unprivileged")) {
+#ifdef USE_LIBCAP
 			run_as_unprivileged = true;
 			NEXT_ARG();
+#else
+			p_err("unprivileged run not supported, recompile bpftool with libcap");
+			return -1;
+#endif
 		} else {
 			p_err("expected no more arguments, 'kernel', 'dev', 'macros' or 'prefix', got: '%s'?",
 			      *argv);
-- 
cgit v1.2.3-59-g8ed1b


From 64d85290d79c0677edb5a8ee2295b36c022fa5df Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Wed, 29 Apr 2020 20:11:52 +0200
Subject: bpf: Allow bpf_map_lookup_elem for SOCKMAP and SOCKHASH

White-list map lookup for SOCKMAP/SOCKHASH from BPF. Lookup returns a
pointer to a full socket and acquires a reference if necessary.

To support it we need to extend the verifier to know that:

 (1) register storing the lookup result holds a pointer to socket, if
     lookup was done on SOCKMAP/SOCKHASH, and that

 (2) map lookup on SOCKMAP/SOCKHASH is a reference acquiring operation,
     which needs a corresponding reference release with bpf_sk_release.

On sock_map side, lookup handlers exposed via bpf_map_ops now bump
sk_refcnt if socket is reference counted. In turn, bpf_sk_select_reuseport,
the only in-kernel user of SOCKMAP/SOCKHASH ops->map_lookup_elem, was
updated to release the reference.

Sockets fetched from a map can be used in the same way as ones returned by
BPF socket lookup helpers, such as bpf_sk_lookup_tcp. In particular, they
can be used with bpf_sk_assign to direct packets toward a socket on TC
ingress path.

Suggested-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20200429181154.479310-2-jakub@cloudflare.com
---
 kernel/bpf/verifier.c | 45 +++++++++++++++++++++++++++++++++++----------
 net/core/filter.c     |  4 ++++
 net/core/sock_map.c   | 18 ++++++++++++++++--
 3 files changed, 55 insertions(+), 12 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 2b337e32aa94..70ad009577f8 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -429,11 +429,30 @@ static bool is_release_function(enum bpf_func_id func_id)
 	return func_id == BPF_FUNC_sk_release;
 }
 
-static bool is_acquire_function(enum bpf_func_id func_id)
+static bool may_be_acquire_function(enum bpf_func_id func_id)
 {
 	return func_id == BPF_FUNC_sk_lookup_tcp ||
 		func_id == BPF_FUNC_sk_lookup_udp ||
-		func_id == BPF_FUNC_skc_lookup_tcp;
+		func_id == BPF_FUNC_skc_lookup_tcp ||
+		func_id == BPF_FUNC_map_lookup_elem;
+}
+
+static bool is_acquire_function(enum bpf_func_id func_id,
+				const struct bpf_map *map)
+{
+	enum bpf_map_type map_type = map ? map->map_type : BPF_MAP_TYPE_UNSPEC;
+
+	if (func_id == BPF_FUNC_sk_lookup_tcp ||
+	    func_id == BPF_FUNC_sk_lookup_udp ||
+	    func_id == BPF_FUNC_skc_lookup_tcp)
+		return true;
+
+	if (func_id == BPF_FUNC_map_lookup_elem &&
+	    (map_type == BPF_MAP_TYPE_SOCKMAP ||
+	     map_type == BPF_MAP_TYPE_SOCKHASH))
+		return true;
+
+	return false;
 }
 
 static bool is_ptr_cast_function(enum bpf_func_id func_id)
@@ -3934,7 +3953,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 		    func_id != BPF_FUNC_sock_map_update &&
 		    func_id != BPF_FUNC_map_delete_elem &&
 		    func_id != BPF_FUNC_msg_redirect_map &&
-		    func_id != BPF_FUNC_sk_select_reuseport)
+		    func_id != BPF_FUNC_sk_select_reuseport &&
+		    func_id != BPF_FUNC_map_lookup_elem)
 			goto error;
 		break;
 	case BPF_MAP_TYPE_SOCKHASH:
@@ -3942,7 +3962,8 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 		    func_id != BPF_FUNC_sock_hash_update &&
 		    func_id != BPF_FUNC_map_delete_elem &&
 		    func_id != BPF_FUNC_msg_redirect_hash &&
-		    func_id != BPF_FUNC_sk_select_reuseport)
+		    func_id != BPF_FUNC_sk_select_reuseport &&
+		    func_id != BPF_FUNC_map_lookup_elem)
 			goto error;
 		break;
 	case BPF_MAP_TYPE_REUSEPORT_SOCKARRAY:
@@ -4112,7 +4133,7 @@ static bool check_refcount_ok(const struct bpf_func_proto *fn, int func_id)
 	/* A reference acquiring function cannot acquire
 	 * another refcounted ptr.
 	 */
-	if (is_acquire_function(func_id) && count)
+	if (may_be_acquire_function(func_id) && count)
 		return false;
 
 	/* We only support one arg being unreferenced at the moment,
@@ -4623,7 +4644,7 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
 	if (is_ptr_cast_function(func_id)) {
 		/* For release_reference() */
 		regs[BPF_REG_0].ref_obj_id = meta.ref_obj_id;
-	} else if (is_acquire_function(func_id)) {
+	} else if (is_acquire_function(func_id, meta.map_ptr)) {
 		int id = acquire_reference_state(env, insn_idx);
 
 		if (id < 0)
@@ -6532,12 +6553,16 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
 		if (is_null) {
 			reg->type = SCALAR_VALUE;
 		} else if (reg->type == PTR_TO_MAP_VALUE_OR_NULL) {
-			if (reg->map_ptr->inner_map_meta) {
+			const struct bpf_map *map = reg->map_ptr;
+
+			if (map->inner_map_meta) {
 				reg->type = CONST_PTR_TO_MAP;
-				reg->map_ptr = reg->map_ptr->inner_map_meta;
-			} else if (reg->map_ptr->map_type ==
-				   BPF_MAP_TYPE_XSKMAP) {
+				reg->map_ptr = map->inner_map_meta;
+			} else if (map->map_type == BPF_MAP_TYPE_XSKMAP) {
 				reg->type = PTR_TO_XDP_SOCK;
+			} else if (map->map_type == BPF_MAP_TYPE_SOCKMAP ||
+				   map->map_type == BPF_MAP_TYPE_SOCKHASH) {
+				reg->type = PTR_TO_SOCKET;
 			} else {
 				reg->type = PTR_TO_MAP_VALUE;
 			}
diff --git a/net/core/filter.c b/net/core/filter.c
index da3b7a72c37c..70b32723e6be 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -8712,6 +8712,10 @@ BPF_CALL_4(sk_select_reuseport, struct sk_reuseport_kern *, reuse_kern,
 
 	reuse = rcu_dereference(selected_sk->sk_reuseport_cb);
 	if (!reuse) {
+		/* Lookup in sock_map can return TCP ESTABLISHED sockets. */
+		if (sk_is_refcounted(selected_sk))
+			sock_put(selected_sk);
+
 		/* reuseport_array has only sk with non NULL sk_reuseport_cb.
 		 * The only (!reuse) case here is - the sk has already been
 		 * unhashed (e.g. by close()), so treat it as -ENOENT.
diff --git a/net/core/sock_map.c b/net/core/sock_map.c
index b08dfae10f88..00a26cf2cfe9 100644
--- a/net/core/sock_map.c
+++ b/net/core/sock_map.c
@@ -343,7 +343,14 @@ static struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key)
 
 static void *sock_map_lookup(struct bpf_map *map, void *key)
 {
-	return __sock_map_lookup_elem(map, *(u32 *)key);
+	struct sock *sk;
+
+	sk = __sock_map_lookup_elem(map, *(u32 *)key);
+	if (!sk || !sk_fullsock(sk))
+		return NULL;
+	if (sk_is_refcounted(sk) && !refcount_inc_not_zero(&sk->sk_refcnt))
+		return NULL;
+	return sk;
 }
 
 static void *sock_map_lookup_sys(struct bpf_map *map, void *key)
@@ -1051,7 +1058,14 @@ static void *sock_hash_lookup_sys(struct bpf_map *map, void *key)
 
 static void *sock_hash_lookup(struct bpf_map *map, void *key)
 {
-	return __sock_hash_lookup_elem(map, key);
+	struct sock *sk;
+
+	sk = __sock_hash_lookup_elem(map, key);
+	if (!sk || !sk_fullsock(sk))
+		return NULL;
+	if (sk_is_refcounted(sk) && !refcount_inc_not_zero(&sk->sk_refcnt))
+		return NULL;
+	return sk;
 }
 
 static void sock_hash_release_progs(struct bpf_map *map)
-- 
cgit v1.2.3-59-g8ed1b


From 34a2cc6eee809f974111979f4c2b3c62aaaad457 Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Wed, 29 Apr 2020 20:11:53 +0200
Subject: selftests/bpf: Test that lookup on SOCKMAP/SOCKHASH is allowed

Now that bpf_map_lookup_elem() is white-listed for SOCKMAP/SOCKHASH,
replace the tests which check that verifier prevents lookup on these map
types with ones that ensure that lookup operation is permitted, but only
with a release of acquired socket reference.

Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20200429181154.479310-3-jakub@cloudflare.com
---
 .../selftests/bpf/verifier/prevent_map_lookup.c    | 30 ----------
 tools/testing/selftests/bpf/verifier/sock.c        | 70 ++++++++++++++++++++++
 2 files changed, 70 insertions(+), 30 deletions(-)

diff --git a/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c b/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c
index da7a4b37cb98..fc4e301260f6 100644
--- a/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c
+++ b/tools/testing/selftests/bpf/verifier/prevent_map_lookup.c
@@ -1,33 +1,3 @@
-{
-	"prevent map lookup in sockmap",
-	.insns = {
-	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
-	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
-	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
-	BPF_LD_MAP_FD(BPF_REG_1, 0),
-	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
-	BPF_EXIT_INSN(),
-	},
-	.fixup_map_sockmap = { 3 },
-	.result = REJECT,
-	.errstr = "cannot pass map_type 15 into func bpf_map_lookup_elem",
-	.prog_type = BPF_PROG_TYPE_SOCK_OPS,
-},
-{
-	"prevent map lookup in sockhash",
-	.insns = {
-	BPF_ST_MEM(BPF_DW, BPF_REG_10, -8, 0),
-	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
-	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
-	BPF_LD_MAP_FD(BPF_REG_1, 0),
-	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
-	BPF_EXIT_INSN(),
-	},
-	.fixup_map_sockhash = { 3 },
-	.result = REJECT,
-	.errstr = "cannot pass map_type 18 into func bpf_map_lookup_elem",
-	.prog_type = BPF_PROG_TYPE_SOCK_OPS,
-},
 {
 	"prevent map lookup in stack trace",
 	.insns = {
diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c
index 9ed192e14f5f..f87ad69dbc62 100644
--- a/tools/testing/selftests/bpf/verifier/sock.c
+++ b/tools/testing/selftests/bpf/verifier/sock.c
@@ -516,3 +516,73 @@
 	.prog_type = BPF_PROG_TYPE_XDP,
 	.result = ACCEPT,
 },
+{
+	"bpf_map_lookup_elem(sockmap, &key)",
+	.insns = {
+	BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_sockmap = { 3 },
+	.prog_type = BPF_PROG_TYPE_SK_SKB,
+	.result = REJECT,
+	.errstr = "Unreleased reference id=2 alloc_insn=5",
+},
+{
+	"bpf_map_lookup_elem(sockhash, &key)",
+	.insns = {
+	BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+	BPF_MOV64_IMM(BPF_REG_0, 0),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_sockhash = { 3 },
+	.prog_type = BPF_PROG_TYPE_SK_SKB,
+	.result = REJECT,
+	.errstr = "Unreleased reference id=2 alloc_insn=5",
+},
+{
+	"bpf_map_lookup_elem(sockmap, &key); sk->type [fullsock field]; bpf_sk_release(sk)",
+	.insns = {
+	BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, type)),
+	BPF_EMIT_CALL(BPF_FUNC_sk_release),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_sockmap = { 3 },
+	.prog_type = BPF_PROG_TYPE_SK_SKB,
+	.result = ACCEPT,
+},
+{
+	"bpf_map_lookup_elem(sockhash, &key); sk->type [fullsock field]; bpf_sk_release(sk)",
+	.insns = {
+	BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -4),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_EMIT_CALL(BPF_FUNC_map_lookup_elem),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 1),
+	BPF_EXIT_INSN(),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
+	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetof(struct bpf_sock, type)),
+	BPF_EMIT_CALL(BPF_FUNC_sk_release),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_sockhash = { 3 },
+	.prog_type = BPF_PROG_TYPE_SK_SKB,
+	.result = ACCEPT,
+},
-- 
cgit v1.2.3-59-g8ed1b


From 0b9ad56b1ea66382a3dcc8e3e7c54967bf8c6d94 Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Wed, 29 Apr 2020 20:11:54 +0200
Subject: selftests/bpf: Use SOCKMAP for server sockets in bpf_sk_assign test

Update bpf_sk_assign test to fetch the server socket from SOCKMAP, now that
map lookup from BPF in SOCKMAP is enabled. This way the test TC BPF program
doesn't need to know what address server socket is bound to.

Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20200429181154.479310-4-jakub@cloudflare.com
---
 tools/testing/selftests/bpf/Makefile               |  2 +-
 tools/testing/selftests/bpf/prog_tests/sk_assign.c | 21 +++++-
 tools/testing/selftests/bpf/progs/test_sk_assign.c | 82 +++++++++-------------
 3 files changed, 53 insertions(+), 52 deletions(-)

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 10f12a5aac20..3d942be23d09 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -243,7 +243,7 @@ define GCC_BPF_BUILD_RULE
 	$(BPF_GCC) $3 $4 -O2 -c $1 -o $2
 endef
 
-SKEL_BLACKLIST := btf__% test_pinning_invalid.c
+SKEL_BLACKLIST := btf__% test_pinning_invalid.c test_sk_assign.c
 
 # Set up extra TRUNNER_XXX "temporary" variables in the environment (relies on
 # $eval()) and pass control to DEFINE_TEST_RUNNER_RULES.
diff --git a/tools/testing/selftests/bpf/prog_tests/sk_assign.c b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
index d572e1a2c297..47fa04adc147 100644
--- a/tools/testing/selftests/bpf/prog_tests/sk_assign.c
+++ b/tools/testing/selftests/bpf/prog_tests/sk_assign.c
@@ -20,6 +20,7 @@
 #define CONNECT_PORT 4321
 #define TEST_DADDR (0xC0A80203)
 #define NS_SELF "/proc/self/ns/net"
+#define SERVER_MAP_PATH "/sys/fs/bpf/tc/globals/server_map"
 
 static const struct timeval timeo_sec = { .tv_sec = 3 };
 static const size_t timeo_optlen = sizeof(timeo_sec);
@@ -265,6 +266,7 @@ void test_sk_assign(void)
 		TEST("ipv6 udp addr redir", AF_INET6, SOCK_DGRAM, true),
 	};
 	int server = -1;
+	int server_map;
 	int self_net;
 
 	self_net = open(NS_SELF, O_RDONLY);
@@ -278,9 +280,17 @@ void test_sk_assign(void)
 		goto cleanup;
 	}
 
+	server_map = bpf_obj_get(SERVER_MAP_PATH);
+	if (CHECK_FAIL(server_map < 0)) {
+		perror("Unable to open " SERVER_MAP_PATH);
+		goto cleanup;
+	}
+
 	for (int i = 0; i < ARRAY_SIZE(tests) && !READ_ONCE(stop); i++) {
 		struct test_sk_cfg *test = &tests[i];
 		const struct sockaddr *addr;
+		const int zero = 0;
+		int err;
 
 		if (!test__start_subtest(test->name))
 			continue;
@@ -288,7 +298,13 @@ void test_sk_assign(void)
 		addr = (const struct sockaddr *)test->addr;
 		server = start_server(addr, test->len, test->type);
 		if (server == -1)
-			goto cleanup;
+			goto close;
+
+		err = bpf_map_update_elem(server_map, &zero, &server, BPF_ANY);
+		if (CHECK_FAIL(err)) {
+			perror("Unable to update server_map");
+			goto close;
+		}
 
 		/* connect to unbound ports */
 		prepare_addr(test->addr, test->family, CONNECT_PORT,
@@ -302,7 +318,10 @@ void test_sk_assign(void)
 
 close:
 	close(server);
+	close(server_map);
 cleanup:
+	if (CHECK_FAIL(unlink(SERVER_MAP_PATH)))
+		perror("Unable to unlink " SERVER_MAP_PATH);
 	if (CHECK_FAIL(setns(self_net, CLONE_NEWNET)))
 		perror("Failed to setns("NS_SELF")");
 	close(self_net);
diff --git a/tools/testing/selftests/bpf/progs/test_sk_assign.c b/tools/testing/selftests/bpf/progs/test_sk_assign.c
index 8f530843b4da..1ecd987005d2 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_assign.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_assign.c
@@ -16,6 +16,26 @@
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_endian.h>
 
+/* Pin map under /sys/fs/bpf/tc/globals/<map name> */
+#define PIN_GLOBAL_NS 2
+
+/* Must match struct bpf_elf_map layout from iproute2 */
+struct {
+	__u32 type;
+	__u32 size_key;
+	__u32 size_value;
+	__u32 max_elem;
+	__u32 flags;
+	__u32 id;
+	__u32 pinning;
+} server_map SEC("maps") = {
+	.type = BPF_MAP_TYPE_SOCKMAP,
+	.size_key = sizeof(int),
+	.size_value  = sizeof(__u64),
+	.max_elem = 1,
+	.pinning = PIN_GLOBAL_NS,
+};
+
 int _version SEC("version") = 1;
 char _license[] SEC("license") = "GPL";
 
@@ -72,7 +92,9 @@ handle_udp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4)
 {
 	struct bpf_sock_tuple ln = {0};
 	struct bpf_sock *sk;
+	const int zero = 0;
 	size_t tuple_len;
+	__be16 dport;
 	int ret;
 
 	tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6);
@@ -83,32 +105,11 @@ handle_udp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4)
 	if (sk)
 		goto assign;
 
-	if (ipv4) {
-		if (tuple->ipv4.dport != bpf_htons(4321))
-			return TC_ACT_OK;
-
-		ln.ipv4.daddr = bpf_htonl(0x7f000001);
-		ln.ipv4.dport = bpf_htons(1234);
-
-		sk = bpf_sk_lookup_udp(skb, &ln, sizeof(ln.ipv4),
-					BPF_F_CURRENT_NETNS, 0);
-	} else {
-		if (tuple->ipv6.dport != bpf_htons(4321))
-			return TC_ACT_OK;
-
-		/* Upper parts of daddr are already zero. */
-		ln.ipv6.daddr[3] = bpf_htonl(0x1);
-		ln.ipv6.dport = bpf_htons(1234);
-
-		sk = bpf_sk_lookup_udp(skb, &ln, sizeof(ln.ipv6),
-					BPF_F_CURRENT_NETNS, 0);
-	}
+	dport = ipv4 ? tuple->ipv4.dport : tuple->ipv6.dport;
+	if (dport != bpf_htons(4321))
+		return TC_ACT_OK;
 
-	/* workaround: We can't do a single socket lookup here, because then
-	 * the compiler will likely spill tuple_len to the stack. This makes it
-	 * lose all bounds information in the verifier, which then rejects the
-	 * call as unsafe.
-	 */
+	sk = bpf_map_lookup_elem(&server_map, &zero);
 	if (!sk)
 		return TC_ACT_SHOT;
 
@@ -123,7 +124,9 @@ handle_tcp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4)
 {
 	struct bpf_sock_tuple ln = {0};
 	struct bpf_sock *sk;
+	const int zero = 0;
 	size_t tuple_len;
+	__be16 dport;
 	int ret;
 
 	tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6);
@@ -137,32 +140,11 @@ handle_tcp(struct __sk_buff *skb, struct bpf_sock_tuple *tuple, bool ipv4)
 		bpf_sk_release(sk);
 	}
 
-	if (ipv4) {
-		if (tuple->ipv4.dport != bpf_htons(4321))
-			return TC_ACT_OK;
+	dport = ipv4 ? tuple->ipv4.dport : tuple->ipv6.dport;
+	if (dport != bpf_htons(4321))
+		return TC_ACT_OK;
 
-		ln.ipv4.daddr = bpf_htonl(0x7f000001);
-		ln.ipv4.dport = bpf_htons(1234);
-
-		sk = bpf_skc_lookup_tcp(skb, &ln, sizeof(ln.ipv4),
-					BPF_F_CURRENT_NETNS, 0);
-	} else {
-		if (tuple->ipv6.dport != bpf_htons(4321))
-			return TC_ACT_OK;
-
-		/* Upper parts of daddr are already zero. */
-		ln.ipv6.daddr[3] = bpf_htonl(0x1);
-		ln.ipv6.dport = bpf_htons(1234);
-
-		sk = bpf_skc_lookup_tcp(skb, &ln, sizeof(ln.ipv6),
-					BPF_F_CURRENT_NETNS, 0);
-	}
-
-	/* workaround: We can't do a single socket lookup here, because then
-	 * the compiler will likely spill tuple_len to the stack. This makes it
-	 * lose all bounds information in the verifier, which then rejects the
-	 * call as unsafe.
-	 */
+	sk = bpf_map_lookup_elem(&server_map, &zero);
 	if (!sk)
 		return TC_ACT_SHOT;
 
-- 
cgit v1.2.3-59-g8ed1b


From 449e14bfdb83bf772200840a7ac4dcc1d7cacf54 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 29 Apr 2020 15:21:58 +0200
Subject: bpf: Fix unused variable warning

Hiding the only using of bpf_link_type_strs[] in an #ifdef causes
an unused-variable warning:

kernel/bpf/syscall.c:2280:20: error: 'bpf_link_type_strs' defined but not used [-Werror=unused-variable]
 2280 | static const char *bpf_link_type_strs[] = {

Move the definition into the same #ifdef.

Fixes: f2e10bff16a0 ("bpf: Add support for BPF_OBJ_GET_INFO_BY_FD for bpf_link")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200429132217.1294289-1-arnd@arndb.de
---
 kernel/bpf/syscall.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index d23c04cbe14f..c75b2dd2459c 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2271,6 +2271,7 @@ static int bpf_link_release(struct inode *inode, struct file *filp)
 	return 0;
 }
 
+#ifdef CONFIG_PROC_FS
 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type)
 #define BPF_MAP_TYPE(_id, _ops)
 #define BPF_LINK_TYPE(_id, _name) [_id] = #_name,
@@ -2282,7 +2283,6 @@ static const char *bpf_link_type_strs[] = {
 #undef BPF_MAP_TYPE
 #undef BPF_LINK_TYPE
 
-#ifdef CONFIG_PROC_FS
 static void bpf_link_show_fdinfo(struct seq_file *m, struct file *filp)
 {
 	const struct bpf_link *link = filp->private_data;
-- 
cgit v1.2.3-59-g8ed1b


From 72f96347628e73dbb61b307f18dd19293cc6792a Mon Sep 17 00:00:00 2001
From: Doug Berger <opendmb@gmail.com>
Date: Wed, 29 Apr 2020 13:02:00 -0700
Subject: net: bcmgenet: set Rx mode before starting netif

This commit explicitly calls the bcmgenet_set_rx_mode() function when
the network interface is started. This function is normally called by
ndo_set_rx_mode when the flags are changed, but apparently not when
the driver is suspended and resumed.

This change ensures that address filtering or promiscuous mode are
properly restored by the driver after the MAC may have been reset.

Fixes: b6e978e50444 ("net: bcmgenet: add suspend/resume callbacks")
Signed-off-by: Doug Berger <opendmb@gmail.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/genet/bcmgenet.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 351d0282f199..eb0dd4d4800c 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -65,6 +65,9 @@
 #define GENET_RDMA_REG_OFF	(priv->hw_params->rdma_offset + \
 				TOTAL_DESC * DMA_DESC_SIZE)
 
+/* Forward declarations */
+static void bcmgenet_set_rx_mode(struct net_device *dev);
+
 static inline void bcmgenet_writel(u32 value, void __iomem *offset)
 {
 	/* MIPS chips strapped for BE will automagically configure the
@@ -2793,6 +2796,7 @@ static void bcmgenet_netif_start(struct net_device *dev)
 	struct bcmgenet_priv *priv = netdev_priv(dev);
 
 	/* Start the network engine */
+	bcmgenet_set_rx_mode(dev);
 	bcmgenet_enable_rx_napi(priv);
 
 	umac_enable_set(priv, CMD_TX_EN | CMD_RX_EN, true);
-- 
cgit v1.2.3-59-g8ed1b


From 6f7689057a0f10a6c967b9f2759d7a3dc948b930 Mon Sep 17 00:00:00 2001
From: Doug Berger <opendmb@gmail.com>
Date: Wed, 29 Apr 2020 13:02:01 -0700
Subject: net: bcmgenet: Fix WoL with password after deep sleep

Broadcom STB chips support a deep sleep mode where all register contents
are lost. Because we were stashing the MagicPacket password into some of
these registers a suspend into that deep sleep then a resumption would
not lead to being able to wake-up from MagicPacket with password again.

Fix this by keeping a software copy of the password and program it
during suspend.

Fixes: c51de7f3976b ("net: bcmgenet: add Wake-on-LAN support code")
Suggested-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Doug Berger <opendmb@gmail.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/genet/bcmgenet.h     |  2 ++
 drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c | 39 ++++++++++------------
 2 files changed, 20 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index daf8fb2c39b6..c3bfe97f2e5c 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -14,6 +14,7 @@
 #include <linux/if_vlan.h>
 #include <linux/phy.h>
 #include <linux/dim.h>
+#include <linux/ethtool.h>
 
 /* total number of Buffer Descriptors, same for Rx/Tx */
 #define TOTAL_DESC				256
@@ -676,6 +677,7 @@ struct bcmgenet_priv {
 	/* WOL */
 	struct clk *clk_wol;
 	u32 wolopts;
+	u8 sopass[SOPASS_MAX];
 
 	struct bcmgenet_mib_counters mib;
 
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c
index c9a43695b182..597c0498689a 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c
@@ -41,18 +41,13 @@
 void bcmgenet_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 {
 	struct bcmgenet_priv *priv = netdev_priv(dev);
-	u32 reg;
 
 	wol->supported = WAKE_MAGIC | WAKE_MAGICSECURE;
 	wol->wolopts = priv->wolopts;
 	memset(wol->sopass, 0, sizeof(wol->sopass));
 
-	if (wol->wolopts & WAKE_MAGICSECURE) {
-		reg = bcmgenet_umac_readl(priv, UMAC_MPD_PW_MS);
-		put_unaligned_be16(reg, &wol->sopass[0]);
-		reg = bcmgenet_umac_readl(priv, UMAC_MPD_PW_LS);
-		put_unaligned_be32(reg, &wol->sopass[2]);
-	}
+	if (wol->wolopts & WAKE_MAGICSECURE)
+		memcpy(wol->sopass, priv->sopass, sizeof(priv->sopass));
 }
 
 /* ethtool function - set WOL (Wake on LAN) settings.
@@ -62,7 +57,6 @@ int bcmgenet_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 {
 	struct bcmgenet_priv *priv = netdev_priv(dev);
 	struct device *kdev = &priv->pdev->dev;
-	u32 reg;
 
 	if (!device_can_wakeup(kdev))
 		return -ENOTSUPP;
@@ -70,17 +64,8 @@ int bcmgenet_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 	if (wol->wolopts & ~(WAKE_MAGIC | WAKE_MAGICSECURE))
 		return -EINVAL;
 
-	reg = bcmgenet_umac_readl(priv, UMAC_MPD_CTRL);
-	if (wol->wolopts & WAKE_MAGICSECURE) {
-		bcmgenet_umac_writel(priv, get_unaligned_be16(&wol->sopass[0]),
-				     UMAC_MPD_PW_MS);
-		bcmgenet_umac_writel(priv, get_unaligned_be32(&wol->sopass[2]),
-				     UMAC_MPD_PW_LS);
-		reg |= MPD_PW_EN;
-	} else {
-		reg &= ~MPD_PW_EN;
-	}
-	bcmgenet_umac_writel(priv, reg, UMAC_MPD_CTRL);
+	if (wol->wolopts & WAKE_MAGICSECURE)
+		memcpy(priv->sopass, wol->sopass, sizeof(priv->sopass));
 
 	/* Flag the device and relevant IRQ as wakeup capable */
 	if (wol->wolopts) {
@@ -120,6 +105,14 @@ static int bcmgenet_poll_wol_status(struct bcmgenet_priv *priv)
 	return retries;
 }
 
+static void bcmgenet_set_mpd_password(struct bcmgenet_priv *priv)
+{
+	bcmgenet_umac_writel(priv, get_unaligned_be16(&priv->sopass[0]),
+			     UMAC_MPD_PW_MS);
+	bcmgenet_umac_writel(priv, get_unaligned_be32(&priv->sopass[2]),
+			     UMAC_MPD_PW_LS);
+}
+
 int bcmgenet_wol_power_down_cfg(struct bcmgenet_priv *priv,
 				enum bcmgenet_power_mode mode)
 {
@@ -144,13 +137,17 @@ int bcmgenet_wol_power_down_cfg(struct bcmgenet_priv *priv,
 
 	reg = bcmgenet_umac_readl(priv, UMAC_MPD_CTRL);
 	reg |= MPD_EN;
+	if (priv->wolopts & WAKE_MAGICSECURE) {
+		bcmgenet_set_mpd_password(priv);
+		reg |= MPD_PW_EN;
+	}
 	bcmgenet_umac_writel(priv, reg, UMAC_MPD_CTRL);
 
 	/* Do not leave UniMAC in MPD mode only */
 	retries = bcmgenet_poll_wol_status(priv);
 	if (retries < 0) {
 		reg = bcmgenet_umac_readl(priv, UMAC_MPD_CTRL);
-		reg &= ~MPD_EN;
+		reg &= ~(MPD_EN | MPD_PW_EN);
 		bcmgenet_umac_writel(priv, reg, UMAC_MPD_CTRL);
 		return retries;
 	}
@@ -189,7 +186,7 @@ void bcmgenet_wol_power_up_cfg(struct bcmgenet_priv *priv,
 	reg = bcmgenet_umac_readl(priv, UMAC_MPD_CTRL);
 	if (!(reg & MPD_EN))
 		return;	/* already powered up so skip the rest */
-	reg &= ~MPD_EN;
+	reg &= ~(MPD_EN | MPD_PW_EN);
 	bcmgenet_umac_writel(priv, reg, UMAC_MPD_CTRL);
 
 	/* Disable CRC Forward */
-- 
cgit v1.2.3-59-g8ed1b


From 1a1d5106c1e37321f3fe394b786d1aece56d0df5 Mon Sep 17 00:00:00 2001
From: Doug Berger <opendmb@gmail.com>
Date: Wed, 29 Apr 2020 13:02:02 -0700
Subject: net: bcmgenet: move clk_wol management to bcmgenet_wol

The GENET_POWER_WOL_MAGIC power up and power down code configures
the device for WoL when suspending and disables the WoL logic when
resuming. It makes sense that this code should also manage the WoL
clocking.

This commit consolidates the logic and moves it earlier in the
resume sequence.

Since the clock is now only enabled if WoL is successfully entered
the wol_active flag is introduced to track that state to keep the
clock enables and disables balanced in case a suspend is aborted.
The MPD_EN hardware bit can't be used because it can be cleared
when the MAC is reset by a deep sleep.

Signed-off-by: Doug Berger <opendmb@gmail.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/genet/bcmgenet.c     | 19 +++++++------------
 drivers/net/ethernet/broadcom/genet/bcmgenet.h     |  3 ++-
 drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c | 14 +++++++++++---
 3 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index eb0dd4d4800c..57b8608feae1 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -2,7 +2,7 @@
 /*
  * Broadcom GENET (Gigabit Ethernet) controller driver
  *
- * Copyright (c) 2014-2019 Broadcom
+ * Copyright (c) 2014-2020 Broadcom
  */
 
 #define pr_fmt(fmt)				"bcmgenet: " fmt
@@ -3619,6 +3619,10 @@ static int bcmgenet_resume(struct device *d)
 	if (ret)
 		return ret;
 
+	/* From WOL-enabled suspend, switch to regular clock */
+	if (device_may_wakeup(d) && priv->wolopts)
+		bcmgenet_power_up(priv, GENET_POWER_WOL_MAGIC);
+
 	/* If this is an internal GPHY, power it back on now, before UniMAC is
 	 * brought out of reset as absolutely no UniMAC activity is allowed
 	 */
@@ -3629,10 +3633,6 @@ static int bcmgenet_resume(struct device *d)
 
 	init_umac(priv);
 
-	/* From WOL-enabled suspend, switch to regular clock */
-	if (priv->wolopts)
-		clk_disable_unprepare(priv->clk_wol);
-
 	phy_init_hw(dev->phydev);
 
 	/* Speed settings must be restored */
@@ -3650,9 +3650,6 @@ static int bcmgenet_resume(struct device *d)
 		bcmgenet_ext_writel(priv, reg, EXT_EXT_PWR_MGMT);
 	}
 
-	if (priv->wolopts)
-		bcmgenet_power_up(priv, GENET_POWER_WOL_MAGIC);
-
 	/* Disable RX/TX DMA and flush TX queues */
 	dma_ctrl = bcmgenet_dma_disable(priv);
 
@@ -3702,12 +3699,10 @@ static int bcmgenet_suspend(struct device *d)
 		phy_suspend(dev->phydev);
 
 	/* Prepare the device for Wake-on-LAN and switch to the slow clock */
-	if (device_may_wakeup(d) && priv->wolopts) {
+	if (device_may_wakeup(d) && priv->wolopts)
 		ret = bcmgenet_power_down(priv, GENET_POWER_WOL_MAGIC);
-		clk_prepare_enable(priv->clk_wol);
-	} else if (priv->internal_phy) {
+	else if (priv->internal_phy)
 		ret = bcmgenet_power_down(priv, GENET_POWER_PASSIVE);
-	}
 
 	/* Turn off the clocks */
 	clk_disable_unprepare(priv->clk);
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index c3bfe97f2e5c..a858b7305832 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -1,6 +1,6 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /*
- * Copyright (c) 2014-2017 Broadcom
+ * Copyright (c) 2014-2020 Broadcom
  */
 
 #ifndef __BCMGENET_H__
@@ -678,6 +678,7 @@ struct bcmgenet_priv {
 	struct clk *clk_wol;
 	u32 wolopts;
 	u8 sopass[SOPASS_MAX];
+	bool wol_active;
 
 	struct bcmgenet_mib_counters mib;
 
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c
index 597c0498689a..da45a4645b94 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c
@@ -2,7 +2,7 @@
 /*
  * Broadcom GENET (Gigabit Ethernet) Wake-on-LAN support
  *
- * Copyright (c) 2014-2017 Broadcom
+ * Copyright (c) 2014-2020 Broadcom
  */
 
 #define pr_fmt(fmt)				"bcmgenet_wol: " fmt
@@ -155,6 +155,9 @@ int bcmgenet_wol_power_down_cfg(struct bcmgenet_priv *priv,
 	netif_dbg(priv, wol, dev, "MPD WOL-ready status set after %d msec\n",
 		  retries);
 
+	clk_prepare_enable(priv->clk_wol);
+	priv->wol_active = 1;
+
 	/* Enable CRC forward */
 	reg = bcmgenet_umac_readl(priv, UMAC_CMD);
 	priv->crc_fwd_en = 1;
@@ -183,9 +186,14 @@ void bcmgenet_wol_power_up_cfg(struct bcmgenet_priv *priv,
 		return;
 	}
 
+	if (!priv->wol_active)
+		return;	/* failed to suspend so skip the rest */
+
+	priv->wol_active = 0;
+	clk_disable_unprepare(priv->clk_wol);
+
+	/* Disable Magic Packet Detection */
 	reg = bcmgenet_umac_readl(priv, UMAC_MPD_CTRL);
-	if (!(reg & MPD_EN))
-		return;	/* already powered up so skip the rest */
 	reg &= ~(MPD_EN | MPD_PW_EN);
 	bcmgenet_umac_writel(priv, reg, UMAC_MPD_CTRL);
 
-- 
cgit v1.2.3-59-g8ed1b


From 14da1510fedc2d72ca81344a0f62939e0a1bc648 Mon Sep 17 00:00:00 2001
From: Doug Berger <opendmb@gmail.com>
Date: Wed, 29 Apr 2020 13:02:03 -0700
Subject: Revert "net: bcmgenet: remove unused function in bcmgenet.c"

This reverts commit e2072600a24161b7ddcfb26814f69f5fbc8ef85a.

This commit restores the previous implementation of Hardware Filter
Block functions to the file for use in subsequent commits.

Signed-off-by: Doug Berger <opendmb@gmail.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/genet/bcmgenet.c | 122 +++++++++++++++++++++++++
 1 file changed, 122 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 57b8608feae1..b37ef05c5083 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -2759,6 +2759,128 @@ static void bcmgenet_enable_dma(struct bcmgenet_priv *priv, u32 dma_ctrl)
 	bcmgenet_tdma_writel(priv, reg, DMA_CTRL);
 }
 
+static bool bcmgenet_hfb_is_filter_enabled(struct bcmgenet_priv *priv,
+					   u32 f_index)
+{
+	u32 offset;
+	u32 reg;
+
+	offset = HFB_FLT_ENABLE_V3PLUS + (f_index < 32) * sizeof(u32);
+	reg = bcmgenet_hfb_reg_readl(priv, offset);
+	return !!(reg & (1 << (f_index % 32)));
+}
+
+static void bcmgenet_hfb_enable_filter(struct bcmgenet_priv *priv, u32 f_index)
+{
+	u32 offset;
+	u32 reg;
+
+	offset = HFB_FLT_ENABLE_V3PLUS + (f_index < 32) * sizeof(u32);
+	reg = bcmgenet_hfb_reg_readl(priv, offset);
+	reg |= (1 << (f_index % 32));
+	bcmgenet_hfb_reg_writel(priv, reg, offset);
+}
+
+static void bcmgenet_hfb_set_filter_rx_queue_mapping(struct bcmgenet_priv *priv,
+						     u32 f_index, u32 rx_queue)
+{
+	u32 offset;
+	u32 reg;
+
+	offset = f_index / 8;
+	reg = bcmgenet_rdma_readl(priv, DMA_INDEX2RING_0 + offset);
+	reg &= ~(0xF << (4 * (f_index % 8)));
+	reg |= ((rx_queue & 0xF) << (4 * (f_index % 8)));
+	bcmgenet_rdma_writel(priv, reg, DMA_INDEX2RING_0 + offset);
+}
+
+static void bcmgenet_hfb_set_filter_length(struct bcmgenet_priv *priv,
+					   u32 f_index, u32 f_length)
+{
+	u32 offset;
+	u32 reg;
+
+	offset = HFB_FLT_LEN_V3PLUS +
+		 ((priv->hw_params->hfb_filter_cnt - 1 - f_index) / 4) *
+		 sizeof(u32);
+	reg = bcmgenet_hfb_reg_readl(priv, offset);
+	reg &= ~(0xFF << (8 * (f_index % 4)));
+	reg |= ((f_length & 0xFF) << (8 * (f_index % 4)));
+	bcmgenet_hfb_reg_writel(priv, reg, offset);
+}
+
+static int bcmgenet_hfb_find_unused_filter(struct bcmgenet_priv *priv)
+{
+	u32 f_index;
+
+	for (f_index = 0; f_index < priv->hw_params->hfb_filter_cnt; f_index++)
+		if (!bcmgenet_hfb_is_filter_enabled(priv, f_index))
+			return f_index;
+
+	return -ENOMEM;
+}
+
+/* bcmgenet_hfb_add_filter
+ *
+ * Add new filter to Hardware Filter Block to match and direct Rx traffic to
+ * desired Rx queue.
+ *
+ * f_data is an array of unsigned 32-bit integers where each 32-bit integer
+ * provides filter data for 2 bytes (4 nibbles) of Rx frame:
+ *
+ * bits 31:20 - unused
+ * bit  19    - nibble 0 match enable
+ * bit  18    - nibble 1 match enable
+ * bit  17    - nibble 2 match enable
+ * bit  16    - nibble 3 match enable
+ * bits 15:12 - nibble 0 data
+ * bits 11:8  - nibble 1 data
+ * bits 7:4   - nibble 2 data
+ * bits 3:0   - nibble 3 data
+ *
+ * Example:
+ * In order to match:
+ * - Ethernet frame type = 0x0800 (IP)
+ * - IP version field = 4
+ * - IP protocol field = 0x11 (UDP)
+ *
+ * The following filter is needed:
+ * u32 hfb_filter_ipv4_udp[] = {
+ *   Rx frame offset 0x00: 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ *   Rx frame offset 0x08: 0x00000000, 0x00000000, 0x000F0800, 0x00084000,
+ *   Rx frame offset 0x10: 0x00000000, 0x00000000, 0x00000000, 0x00030011,
+ * };
+ *
+ * To add the filter to HFB and direct the traffic to Rx queue 0, call:
+ * bcmgenet_hfb_add_filter(priv, hfb_filter_ipv4_udp,
+ *                         ARRAY_SIZE(hfb_filter_ipv4_udp), 0);
+ */
+int bcmgenet_hfb_add_filter(struct bcmgenet_priv *priv, u32 *f_data,
+			    u32 f_length, u32 rx_queue)
+{
+	int f_index;
+	u32 i;
+
+	f_index = bcmgenet_hfb_find_unused_filter(priv);
+	if (f_index < 0)
+		return -ENOMEM;
+
+	if (f_length > priv->hw_params->hfb_filter_size)
+		return -EINVAL;
+
+	for (i = 0; i < f_length; i++)
+		bcmgenet_hfb_writel(priv, f_data[i],
+			(f_index * priv->hw_params->hfb_filter_size + i) *
+			sizeof(u32));
+
+	bcmgenet_hfb_set_filter_length(priv, f_index, 2 * f_length);
+	bcmgenet_hfb_set_filter_rx_queue_mapping(priv, f_index, rx_queue);
+	bcmgenet_hfb_enable_filter(priv, f_index);
+	bcmgenet_hfb_reg_writel(priv, 0x1, HFB_CTRL);
+
+	return 0;
+}
+
 /* bcmgenet_hfb_clear
  *
  * Clear Hardware Filter Block and disable all filtering.
-- 
cgit v1.2.3-59-g8ed1b


From 854295d03ca04461f275d723289a5886e6827498 Mon Sep 17 00:00:00 2001
From: Doug Berger <opendmb@gmail.com>
Date: Wed, 29 Apr 2020 13:02:04 -0700
Subject: net: bcmgenet: code movement

The Hardware Filter Block code will be used by ethtool functions
when defining flow types so this commit moves the functions in the
file to prevent the need for prototype declarations.

This is broken out to facilitate review.

Signed-off-by: Doug Berger <opendmb@gmail.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/genet/bcmgenet.c | 308 ++++++++++++-------------
 1 file changed, 154 insertions(+), 154 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index b37ef05c5083..ad41944d2cc0 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -459,6 +459,160 @@ static inline void bcmgenet_rdma_ring_writel(struct bcmgenet_priv *priv,
 			genet_dma_ring_regs[r]);
 }
 
+static bool bcmgenet_hfb_is_filter_enabled(struct bcmgenet_priv *priv,
+					   u32 f_index)
+{
+	u32 offset;
+	u32 reg;
+
+	offset = HFB_FLT_ENABLE_V3PLUS + (f_index < 32) * sizeof(u32);
+	reg = bcmgenet_hfb_reg_readl(priv, offset);
+	return !!(reg & (1 << (f_index % 32)));
+}
+
+static void bcmgenet_hfb_enable_filter(struct bcmgenet_priv *priv, u32 f_index)
+{
+	u32 offset;
+	u32 reg;
+
+	offset = HFB_FLT_ENABLE_V3PLUS + (f_index < 32) * sizeof(u32);
+	reg = bcmgenet_hfb_reg_readl(priv, offset);
+	reg |= (1 << (f_index % 32));
+	bcmgenet_hfb_reg_writel(priv, reg, offset);
+}
+
+static void bcmgenet_hfb_set_filter_rx_queue_mapping(struct bcmgenet_priv *priv,
+						     u32 f_index, u32 rx_queue)
+{
+	u32 offset;
+	u32 reg;
+
+	offset = f_index / 8;
+	reg = bcmgenet_rdma_readl(priv, DMA_INDEX2RING_0 + offset);
+	reg &= ~(0xF << (4 * (f_index % 8)));
+	reg |= ((rx_queue & 0xF) << (4 * (f_index % 8)));
+	bcmgenet_rdma_writel(priv, reg, DMA_INDEX2RING_0 + offset);
+}
+
+static void bcmgenet_hfb_set_filter_length(struct bcmgenet_priv *priv,
+					   u32 f_index, u32 f_length)
+{
+	u32 offset;
+	u32 reg;
+
+	offset = HFB_FLT_LEN_V3PLUS +
+		 ((priv->hw_params->hfb_filter_cnt - 1 - f_index) / 4) *
+		 sizeof(u32);
+	reg = bcmgenet_hfb_reg_readl(priv, offset);
+	reg &= ~(0xFF << (8 * (f_index % 4)));
+	reg |= ((f_length & 0xFF) << (8 * (f_index % 4)));
+	bcmgenet_hfb_reg_writel(priv, reg, offset);
+}
+
+static int bcmgenet_hfb_find_unused_filter(struct bcmgenet_priv *priv)
+{
+	u32 f_index;
+
+	for (f_index = 0; f_index < priv->hw_params->hfb_filter_cnt; f_index++)
+		if (!bcmgenet_hfb_is_filter_enabled(priv, f_index))
+			return f_index;
+
+	return -ENOMEM;
+}
+
+/* bcmgenet_hfb_add_filter
+ *
+ * Add new filter to Hardware Filter Block to match and direct Rx traffic to
+ * desired Rx queue.
+ *
+ * f_data is an array of unsigned 32-bit integers where each 32-bit integer
+ * provides filter data for 2 bytes (4 nibbles) of Rx frame:
+ *
+ * bits 31:20 - unused
+ * bit  19    - nibble 0 match enable
+ * bit  18    - nibble 1 match enable
+ * bit  17    - nibble 2 match enable
+ * bit  16    - nibble 3 match enable
+ * bits 15:12 - nibble 0 data
+ * bits 11:8  - nibble 1 data
+ * bits 7:4   - nibble 2 data
+ * bits 3:0   - nibble 3 data
+ *
+ * Example:
+ * In order to match:
+ * - Ethernet frame type = 0x0800 (IP)
+ * - IP version field = 4
+ * - IP protocol field = 0x11 (UDP)
+ *
+ * The following filter is needed:
+ * u32 hfb_filter_ipv4_udp[] = {
+ *   Rx frame offset 0x00: 0x00000000, 0x00000000, 0x00000000, 0x00000000,
+ *   Rx frame offset 0x08: 0x00000000, 0x00000000, 0x000F0800, 0x00084000,
+ *   Rx frame offset 0x10: 0x00000000, 0x00000000, 0x00000000, 0x00030011,
+ * };
+ *
+ * To add the filter to HFB and direct the traffic to Rx queue 0, call:
+ * bcmgenet_hfb_add_filter(priv, hfb_filter_ipv4_udp,
+ *                         ARRAY_SIZE(hfb_filter_ipv4_udp), 0);
+ */
+int bcmgenet_hfb_add_filter(struct bcmgenet_priv *priv, u32 *f_data,
+			    u32 f_length, u32 rx_queue)
+{
+	int f_index;
+	u32 i;
+
+	f_index = bcmgenet_hfb_find_unused_filter(priv);
+	if (f_index < 0)
+		return -ENOMEM;
+
+	if (f_length > priv->hw_params->hfb_filter_size)
+		return -EINVAL;
+
+	for (i = 0; i < f_length; i++)
+		bcmgenet_hfb_writel(priv, f_data[i],
+			(f_index * priv->hw_params->hfb_filter_size + i) *
+			sizeof(u32));
+
+	bcmgenet_hfb_set_filter_length(priv, f_index, 2 * f_length);
+	bcmgenet_hfb_set_filter_rx_queue_mapping(priv, f_index, rx_queue);
+	bcmgenet_hfb_enable_filter(priv, f_index);
+	bcmgenet_hfb_reg_writel(priv, 0x1, HFB_CTRL);
+
+	return 0;
+}
+
+/* bcmgenet_hfb_clear
+ *
+ * Clear Hardware Filter Block and disable all filtering.
+ */
+static void bcmgenet_hfb_clear(struct bcmgenet_priv *priv)
+{
+	u32 i;
+
+	bcmgenet_hfb_reg_writel(priv, 0x0, HFB_CTRL);
+	bcmgenet_hfb_reg_writel(priv, 0x0, HFB_FLT_ENABLE_V3PLUS);
+	bcmgenet_hfb_reg_writel(priv, 0x0, HFB_FLT_ENABLE_V3PLUS + 4);
+
+	for (i = DMA_INDEX2RING_0; i <= DMA_INDEX2RING_7; i++)
+		bcmgenet_rdma_writel(priv, 0x0, i);
+
+	for (i = 0; i < (priv->hw_params->hfb_filter_cnt / 4); i++)
+		bcmgenet_hfb_reg_writel(priv, 0x0,
+					HFB_FLT_LEN_V3PLUS + i * sizeof(u32));
+
+	for (i = 0; i < priv->hw_params->hfb_filter_cnt *
+			priv->hw_params->hfb_filter_size; i++)
+		bcmgenet_hfb_writel(priv, 0x0, i * sizeof(u32));
+}
+
+static void bcmgenet_hfb_init(struct bcmgenet_priv *priv)
+{
+	if (GENET_IS_V1(priv) || GENET_IS_V2(priv))
+		return;
+
+	bcmgenet_hfb_clear(priv);
+}
+
 static int bcmgenet_begin(struct net_device *dev)
 {
 	struct bcmgenet_priv *priv = netdev_priv(dev);
@@ -2759,160 +2913,6 @@ static void bcmgenet_enable_dma(struct bcmgenet_priv *priv, u32 dma_ctrl)
 	bcmgenet_tdma_writel(priv, reg, DMA_CTRL);
 }
 
-static bool bcmgenet_hfb_is_filter_enabled(struct bcmgenet_priv *priv,
-					   u32 f_index)
-{
-	u32 offset;
-	u32 reg;
-
-	offset = HFB_FLT_ENABLE_V3PLUS + (f_index < 32) * sizeof(u32);
-	reg = bcmgenet_hfb_reg_readl(priv, offset);
-	return !!(reg & (1 << (f_index % 32)));
-}
-
-static void bcmgenet_hfb_enable_filter(struct bcmgenet_priv *priv, u32 f_index)
-{
-	u32 offset;
-	u32 reg;
-
-	offset = HFB_FLT_ENABLE_V3PLUS + (f_index < 32) * sizeof(u32);
-	reg = bcmgenet_hfb_reg_readl(priv, offset);
-	reg |= (1 << (f_index % 32));
-	bcmgenet_hfb_reg_writel(priv, reg, offset);
-}
-
-static void bcmgenet_hfb_set_filter_rx_queue_mapping(struct bcmgenet_priv *priv,
-						     u32 f_index, u32 rx_queue)
-{
-	u32 offset;
-	u32 reg;
-
-	offset = f_index / 8;
-	reg = bcmgenet_rdma_readl(priv, DMA_INDEX2RING_0 + offset);
-	reg &= ~(0xF << (4 * (f_index % 8)));
-	reg |= ((rx_queue & 0xF) << (4 * (f_index % 8)));
-	bcmgenet_rdma_writel(priv, reg, DMA_INDEX2RING_0 + offset);
-}
-
-static void bcmgenet_hfb_set_filter_length(struct bcmgenet_priv *priv,
-					   u32 f_index, u32 f_length)
-{
-	u32 offset;
-	u32 reg;
-
-	offset = HFB_FLT_LEN_V3PLUS +
-		 ((priv->hw_params->hfb_filter_cnt - 1 - f_index) / 4) *
-		 sizeof(u32);
-	reg = bcmgenet_hfb_reg_readl(priv, offset);
-	reg &= ~(0xFF << (8 * (f_index % 4)));
-	reg |= ((f_length & 0xFF) << (8 * (f_index % 4)));
-	bcmgenet_hfb_reg_writel(priv, reg, offset);
-}
-
-static int bcmgenet_hfb_find_unused_filter(struct bcmgenet_priv *priv)
-{
-	u32 f_index;
-
-	for (f_index = 0; f_index < priv->hw_params->hfb_filter_cnt; f_index++)
-		if (!bcmgenet_hfb_is_filter_enabled(priv, f_index))
-			return f_index;
-
-	return -ENOMEM;
-}
-
-/* bcmgenet_hfb_add_filter
- *
- * Add new filter to Hardware Filter Block to match and direct Rx traffic to
- * desired Rx queue.
- *
- * f_data is an array of unsigned 32-bit integers where each 32-bit integer
- * provides filter data for 2 bytes (4 nibbles) of Rx frame:
- *
- * bits 31:20 - unused
- * bit  19    - nibble 0 match enable
- * bit  18    - nibble 1 match enable
- * bit  17    - nibble 2 match enable
- * bit  16    - nibble 3 match enable
- * bits 15:12 - nibble 0 data
- * bits 11:8  - nibble 1 data
- * bits 7:4   - nibble 2 data
- * bits 3:0   - nibble 3 data
- *
- * Example:
- * In order to match:
- * - Ethernet frame type = 0x0800 (IP)
- * - IP version field = 4
- * - IP protocol field = 0x11 (UDP)
- *
- * The following filter is needed:
- * u32 hfb_filter_ipv4_udp[] = {
- *   Rx frame offset 0x00: 0x00000000, 0x00000000, 0x00000000, 0x00000000,
- *   Rx frame offset 0x08: 0x00000000, 0x00000000, 0x000F0800, 0x00084000,
- *   Rx frame offset 0x10: 0x00000000, 0x00000000, 0x00000000, 0x00030011,
- * };
- *
- * To add the filter to HFB and direct the traffic to Rx queue 0, call:
- * bcmgenet_hfb_add_filter(priv, hfb_filter_ipv4_udp,
- *                         ARRAY_SIZE(hfb_filter_ipv4_udp), 0);
- */
-int bcmgenet_hfb_add_filter(struct bcmgenet_priv *priv, u32 *f_data,
-			    u32 f_length, u32 rx_queue)
-{
-	int f_index;
-	u32 i;
-
-	f_index = bcmgenet_hfb_find_unused_filter(priv);
-	if (f_index < 0)
-		return -ENOMEM;
-
-	if (f_length > priv->hw_params->hfb_filter_size)
-		return -EINVAL;
-
-	for (i = 0; i < f_length; i++)
-		bcmgenet_hfb_writel(priv, f_data[i],
-			(f_index * priv->hw_params->hfb_filter_size + i) *
-			sizeof(u32));
-
-	bcmgenet_hfb_set_filter_length(priv, f_index, 2 * f_length);
-	bcmgenet_hfb_set_filter_rx_queue_mapping(priv, f_index, rx_queue);
-	bcmgenet_hfb_enable_filter(priv, f_index);
-	bcmgenet_hfb_reg_writel(priv, 0x1, HFB_CTRL);
-
-	return 0;
-}
-
-/* bcmgenet_hfb_clear
- *
- * Clear Hardware Filter Block and disable all filtering.
- */
-static void bcmgenet_hfb_clear(struct bcmgenet_priv *priv)
-{
-	u32 i;
-
-	bcmgenet_hfb_reg_writel(priv, 0x0, HFB_CTRL);
-	bcmgenet_hfb_reg_writel(priv, 0x0, HFB_FLT_ENABLE_V3PLUS);
-	bcmgenet_hfb_reg_writel(priv, 0x0, HFB_FLT_ENABLE_V3PLUS + 4);
-
-	for (i = DMA_INDEX2RING_0; i <= DMA_INDEX2RING_7; i++)
-		bcmgenet_rdma_writel(priv, 0x0, i);
-
-	for (i = 0; i < (priv->hw_params->hfb_filter_cnt / 4); i++)
-		bcmgenet_hfb_reg_writel(priv, 0x0,
-					HFB_FLT_LEN_V3PLUS + i * sizeof(u32));
-
-	for (i = 0; i < priv->hw_params->hfb_filter_cnt *
-			priv->hw_params->hfb_filter_size; i++)
-		bcmgenet_hfb_writel(priv, 0x0, i * sizeof(u32));
-}
-
-static void bcmgenet_hfb_init(struct bcmgenet_priv *priv)
-{
-	if (GENET_IS_V1(priv) || GENET_IS_V2(priv))
-		return;
-
-	bcmgenet_hfb_clear(priv);
-}
-
 static void bcmgenet_netif_start(struct net_device *dev)
 {
 	struct bcmgenet_priv *priv = netdev_priv(dev);
-- 
cgit v1.2.3-59-g8ed1b


From 3e370952287c55e5fd240cb8bb41ef8acff8829d Mon Sep 17 00:00:00 2001
From: Doug Berger <opendmb@gmail.com>
Date: Wed, 29 Apr 2020 13:02:05 -0700
Subject: net: bcmgenet: add support for ethtool rxnfc flows

This commit enables driver support for ethtool commands of this form:
ethtool -N|-U|--config-nfc|--config-ntuple devname
    flow-type ether|ip4
    [src xx:yy:zz:aa:bb:cc [m xx:yy:zz:aa:bb:cc]]
    [dst xx:yy:zz:aa:bb:cc [m xx:yy:zz:aa:bb:cc]] [proto N [m N]]
    [src-ip x.x.x.x [m x.x.x.x]] [dst-ip x.x.x.x [m x.x.x.x]] [tos N [m N]]
    [l4proto N [m N]] [src-port N [m N]] [dst-port N [m N]] [spi N [m N]]
    [l4data N [m N]] [vlan-etype N [m N]] [vlan N [m N]]
    [dst-mac xx:yy:zz:aa:bb:cc [m xx:yy:zz:aa:bb:cc]] [action 0] [loc N] |
    delete N

Since there is only one Rx Ring in this implementation action 0 behaves no
differently from not specifying a rule.

The rules can be seen with ethtool commands of this form:
ethtool -n|-u|--show-nfc|--show-ntuple devname [rule N]

Signed-off-by: Doug Berger <opendmb@gmail.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/genet/bcmgenet.c | 483 ++++++++++++++++++++++++-
 drivers/net/ethernet/broadcom/genet/bcmgenet.h |  16 +
 2 files changed, 488 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index ad41944d2cc0..5ef1ea7e5312 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -479,6 +479,30 @@ static void bcmgenet_hfb_enable_filter(struct bcmgenet_priv *priv, u32 f_index)
 	reg = bcmgenet_hfb_reg_readl(priv, offset);
 	reg |= (1 << (f_index % 32));
 	bcmgenet_hfb_reg_writel(priv, reg, offset);
+	reg = bcmgenet_hfb_reg_readl(priv, HFB_CTRL);
+	reg |= RBUF_HFB_EN;
+	bcmgenet_hfb_reg_writel(priv, reg, HFB_CTRL);
+}
+
+static void bcmgenet_hfb_disable_filter(struct bcmgenet_priv *priv, u32 f_index)
+{
+	u32 offset, reg, reg1;
+
+	offset = HFB_FLT_ENABLE_V3PLUS;
+	reg = bcmgenet_hfb_reg_readl(priv, offset);
+	reg1 = bcmgenet_hfb_reg_readl(priv, offset + sizeof(u32));
+	if  (f_index < 32) {
+		reg1 &= ~(1 << (f_index % 32));
+		bcmgenet_hfb_reg_writel(priv, reg1, offset + sizeof(u32));
+	} else {
+		reg &= ~(1 << (f_index % 32));
+		bcmgenet_hfb_reg_writel(priv, reg, offset);
+	}
+	if (!reg && !reg1) {
+		reg = bcmgenet_hfb_reg_readl(priv, HFB_CTRL);
+		reg &= ~RBUF_HFB_EN;
+		bcmgenet_hfb_reg_writel(priv, reg, HFB_CTRL);
+	}
 }
 
 static void bcmgenet_hfb_set_filter_rx_queue_mapping(struct bcmgenet_priv *priv,
@@ -513,13 +537,213 @@ static int bcmgenet_hfb_find_unused_filter(struct bcmgenet_priv *priv)
 {
 	u32 f_index;
 
-	for (f_index = 0; f_index < priv->hw_params->hfb_filter_cnt; f_index++)
+	/* First MAX_NUM_OF_FS_RULES are reserved for Rx NFC filters */
+	for (f_index = MAX_NUM_OF_FS_RULES;
+	     f_index < priv->hw_params->hfb_filter_cnt; f_index++)
 		if (!bcmgenet_hfb_is_filter_enabled(priv, f_index))
 			return f_index;
 
 	return -ENOMEM;
 }
 
+static int bcmgenet_hfb_validate_mask(void *mask, size_t size)
+{
+	while (size) {
+		switch (*(unsigned char *)mask++) {
+		case 0x00:
+		case 0x0f:
+		case 0xf0:
+		case 0xff:
+			size--;
+			continue;
+		default:
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+#define VALIDATE_MASK(x) \
+	bcmgenet_hfb_validate_mask(&(x), sizeof(x))
+
+static int bcmgenet_hfb_insert_data(u32 *f, int offset,
+				    void *val, void *mask, size_t size)
+{
+	int index;
+	u32 tmp;
+
+	index = offset / 2;
+	tmp = f[index];
+
+	while (size--) {
+		if (offset++ & 1) {
+			tmp &= ~0x300FF;
+			tmp |= (*(unsigned char *)val++);
+			switch ((*(unsigned char *)mask++)) {
+			case 0xFF:
+				tmp |= 0x30000;
+				break;
+			case 0xF0:
+				tmp |= 0x20000;
+				break;
+			case 0x0F:
+				tmp |= 0x10000;
+				break;
+			}
+			f[index++] = tmp;
+			if (size)
+				tmp = f[index];
+		} else {
+			tmp &= ~0xCFF00;
+			tmp |= (*(unsigned char *)val++) << 8;
+			switch ((*(unsigned char *)mask++)) {
+			case 0xFF:
+				tmp |= 0xC0000;
+				break;
+			case 0xF0:
+				tmp |= 0x80000;
+				break;
+			case 0x0F:
+				tmp |= 0x40000;
+				break;
+			}
+			if (!size)
+				f[index] = tmp;
+		}
+	}
+
+	return 0;
+}
+
+static void bcmgenet_hfb_set_filter(struct bcmgenet_priv *priv, u32 *f_data,
+				    u32 f_length, u32 rx_queue, int f_index)
+{
+	u32 base = f_index * priv->hw_params->hfb_filter_size;
+	int i;
+
+	for (i = 0; i < f_length; i++)
+		bcmgenet_hfb_writel(priv, f_data[i], (base + i) * sizeof(u32));
+
+	bcmgenet_hfb_set_filter_length(priv, f_index, 2 * f_length);
+	bcmgenet_hfb_set_filter_rx_queue_mapping(priv, f_index, rx_queue);
+}
+
+static int bcmgenet_hfb_create_rxnfc_filter(struct bcmgenet_priv *priv,
+					    struct bcmgenet_rxnfc_rule *rule)
+{
+	struct ethtool_rx_flow_spec *fs = &rule->fs;
+	int err = 0, offset = 0, f_length = 0;
+	u16 val_16, mask_16;
+	u8 val_8, mask_8;
+	size_t size;
+	u32 *f_data;
+
+	f_data = kcalloc(priv->hw_params->hfb_filter_size, sizeof(u32),
+			 GFP_KERNEL);
+	if (!f_data)
+		return -ENOMEM;
+
+	if (fs->flow_type & FLOW_MAC_EXT) {
+		bcmgenet_hfb_insert_data(f_data, 0,
+					 &fs->h_ext.h_dest, &fs->m_ext.h_dest,
+					 sizeof(fs->h_ext.h_dest));
+	}
+
+	if (fs->flow_type & FLOW_EXT) {
+		if (fs->m_ext.vlan_etype ||
+		    fs->m_ext.vlan_tci) {
+			bcmgenet_hfb_insert_data(f_data, 12,
+						 &fs->h_ext.vlan_etype,
+						 &fs->m_ext.vlan_etype,
+						 sizeof(fs->h_ext.vlan_etype));
+			bcmgenet_hfb_insert_data(f_data, 14,
+						 &fs->h_ext.vlan_tci,
+						 &fs->m_ext.vlan_tci,
+						 sizeof(fs->h_ext.vlan_tci));
+			offset += VLAN_HLEN;
+			f_length += DIV_ROUND_UP(VLAN_HLEN, 2);
+		}
+	}
+
+	switch (fs->flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) {
+	case ETHER_FLOW:
+		f_length += DIV_ROUND_UP(ETH_HLEN, 2);
+		bcmgenet_hfb_insert_data(f_data, 0,
+					 &fs->h_u.ether_spec.h_dest,
+					 &fs->m_u.ether_spec.h_dest,
+					 sizeof(fs->h_u.ether_spec.h_dest));
+		bcmgenet_hfb_insert_data(f_data, ETH_ALEN,
+					 &fs->h_u.ether_spec.h_source,
+					 &fs->m_u.ether_spec.h_source,
+					 sizeof(fs->h_u.ether_spec.h_source));
+		bcmgenet_hfb_insert_data(f_data, (2 * ETH_ALEN) + offset,
+					 &fs->h_u.ether_spec.h_proto,
+					 &fs->m_u.ether_spec.h_proto,
+					 sizeof(fs->h_u.ether_spec.h_proto));
+		break;
+	case IP_USER_FLOW:
+		f_length += DIV_ROUND_UP(ETH_HLEN + 20, 2);
+		/* Specify IP Ether Type */
+		val_16 = htons(ETH_P_IP);
+		mask_16 = 0xFFFF;
+		bcmgenet_hfb_insert_data(f_data, (2 * ETH_ALEN) + offset,
+					 &val_16, &mask_16, sizeof(val_16));
+		bcmgenet_hfb_insert_data(f_data, 15 + offset,
+					 &fs->h_u.usr_ip4_spec.tos,
+					 &fs->m_u.usr_ip4_spec.tos,
+					 sizeof(fs->h_u.usr_ip4_spec.tos));
+		bcmgenet_hfb_insert_data(f_data, 23 + offset,
+					 &fs->h_u.usr_ip4_spec.proto,
+					 &fs->m_u.usr_ip4_spec.proto,
+					 sizeof(fs->h_u.usr_ip4_spec.proto));
+		bcmgenet_hfb_insert_data(f_data, 26 + offset,
+					 &fs->h_u.usr_ip4_spec.ip4src,
+					 &fs->m_u.usr_ip4_spec.ip4src,
+					 sizeof(fs->h_u.usr_ip4_spec.ip4src));
+		bcmgenet_hfb_insert_data(f_data, 30 + offset,
+					 &fs->h_u.usr_ip4_spec.ip4dst,
+					 &fs->m_u.usr_ip4_spec.ip4dst,
+					 sizeof(fs->h_u.usr_ip4_spec.ip4dst));
+		if (!fs->m_u.usr_ip4_spec.l4_4_bytes)
+			break;
+
+		/* Only supports 20 byte IPv4 header */
+		val_8 = 0x45;
+		mask_8 = 0xFF;
+		bcmgenet_hfb_insert_data(f_data, ETH_HLEN + offset,
+					 &val_8, &mask_8,
+					 sizeof(val_8));
+		size = sizeof(fs->h_u.usr_ip4_spec.l4_4_bytes);
+		bcmgenet_hfb_insert_data(f_data,
+					 ETH_HLEN + 20 + offset,
+					 &fs->h_u.usr_ip4_spec.l4_4_bytes,
+					 &fs->m_u.usr_ip4_spec.l4_4_bytes,
+					 size);
+		f_length += DIV_ROUND_UP(size, 2);
+		break;
+	}
+
+	if (!fs->ring_cookie) {
+		/* Ring 0 flows can be handled by the default Descriptor Ring
+		 * We'll map them to ring 0, but don't enable the filter
+		 */
+		bcmgenet_hfb_set_filter(priv, f_data, f_length,	0,
+					fs->location);
+		rule->state = BCMGENET_RXNFC_STATE_DISABLED;
+	} else {
+		/* Other Rx rings are direct mapped here */
+		bcmgenet_hfb_set_filter(priv, f_data, f_length,
+					fs->ring_cookie, fs->location);
+		bcmgenet_hfb_enable_filter(priv, fs->location);
+		rule->state = BCMGENET_RXNFC_STATE_ENABLED;
+	}
+
+	kfree(f_data);
+
+	return err;
+}
+
 /* bcmgenet_hfb_add_filter
  *
  * Add new filter to Hardware Filter Block to match and direct Rx traffic to
@@ -559,7 +783,6 @@ int bcmgenet_hfb_add_filter(struct bcmgenet_priv *priv, u32 *f_data,
 			    u32 f_length, u32 rx_queue)
 {
 	int f_index;
-	u32 i;
 
 	f_index = bcmgenet_hfb_find_unused_filter(priv);
 	if (f_index < 0)
@@ -568,15 +791,8 @@ int bcmgenet_hfb_add_filter(struct bcmgenet_priv *priv, u32 *f_data,
 	if (f_length > priv->hw_params->hfb_filter_size)
 		return -EINVAL;
 
-	for (i = 0; i < f_length; i++)
-		bcmgenet_hfb_writel(priv, f_data[i],
-			(f_index * priv->hw_params->hfb_filter_size + i) *
-			sizeof(u32));
-
-	bcmgenet_hfb_set_filter_length(priv, f_index, 2 * f_length);
-	bcmgenet_hfb_set_filter_rx_queue_mapping(priv, f_index, rx_queue);
+	bcmgenet_hfb_set_filter(priv, f_data, f_length, rx_queue, f_index);
 	bcmgenet_hfb_enable_filter(priv, f_index);
-	bcmgenet_hfb_reg_writel(priv, 0x1, HFB_CTRL);
 
 	return 0;
 }
@@ -607,9 +823,17 @@ static void bcmgenet_hfb_clear(struct bcmgenet_priv *priv)
 
 static void bcmgenet_hfb_init(struct bcmgenet_priv *priv)
 {
+	int i;
+
 	if (GENET_IS_V1(priv) || GENET_IS_V2(priv))
 		return;
 
+	INIT_LIST_HEAD(&priv->rxnfc_list);
+	for (i = 0; i < MAX_NUM_OF_FS_RULES; i++) {
+		INIT_LIST_HEAD(&priv->rxnfc_rules[i].list);
+		priv->rxnfc_rules[i].state = BCMGENET_RXNFC_STATE_UNUSED;
+	}
+
 	bcmgenet_hfb_clear(priv);
 }
 
@@ -1197,6 +1421,228 @@ static int bcmgenet_set_eee(struct net_device *dev, struct ethtool_eee *e)
 	return phy_ethtool_set_eee(dev->phydev, e);
 }
 
+static int bcmgenet_validate_flow(struct net_device *dev,
+				  struct ethtool_rxnfc *cmd)
+{
+	struct ethtool_usrip4_spec *l4_mask;
+	struct ethhdr *eth_mask;
+
+	if (cmd->fs.location >= MAX_NUM_OF_FS_RULES) {
+		netdev_err(dev, "rxnfc: Invalid location (%d)\n",
+			   cmd->fs.location);
+		return -EINVAL;
+	}
+
+	switch (cmd->fs.flow_type & ~(FLOW_EXT | FLOW_MAC_EXT)) {
+	case IP_USER_FLOW:
+		l4_mask = &cmd->fs.m_u.usr_ip4_spec;
+		/* don't allow mask which isn't valid */
+		if (VALIDATE_MASK(l4_mask->ip4src) ||
+		    VALIDATE_MASK(l4_mask->ip4dst) ||
+		    VALIDATE_MASK(l4_mask->l4_4_bytes) ||
+		    VALIDATE_MASK(l4_mask->proto) ||
+		    VALIDATE_MASK(l4_mask->ip_ver) ||
+		    VALIDATE_MASK(l4_mask->tos)) {
+			netdev_err(dev, "rxnfc: Unsupported mask\n");
+			return -EINVAL;
+		}
+		break;
+	case ETHER_FLOW:
+		eth_mask = &cmd->fs.m_u.ether_spec;
+		/* don't allow mask which isn't valid */
+		if (VALIDATE_MASK(eth_mask->h_source) ||
+		    VALIDATE_MASK(eth_mask->h_source) ||
+		    VALIDATE_MASK(eth_mask->h_proto)) {
+			netdev_err(dev, "rxnfc: Unsupported mask\n");
+			return -EINVAL;
+		}
+		break;
+	default:
+		netdev_err(dev, "rxnfc: Unsupported flow type (0x%x)\n",
+			   cmd->fs.flow_type);
+		return -EINVAL;
+	}
+
+	if ((cmd->fs.flow_type & FLOW_EXT)) {
+		/* don't allow mask which isn't valid */
+		if (VALIDATE_MASK(cmd->fs.m_ext.vlan_etype) ||
+		    VALIDATE_MASK(cmd->fs.m_ext.vlan_tci)) {
+			netdev_err(dev, "rxnfc: Unsupported mask\n");
+			return -EINVAL;
+		}
+		if (cmd->fs.m_ext.data[0] || cmd->fs.m_ext.data[1]) {
+			netdev_err(dev, "rxnfc: user-def not supported\n");
+			return -EINVAL;
+		}
+	}
+
+	if ((cmd->fs.flow_type & FLOW_MAC_EXT)) {
+		/* don't allow mask which isn't valid */
+		if (VALIDATE_MASK(cmd->fs.m_ext.h_dest)) {
+			netdev_err(dev, "rxnfc: Unsupported mask\n");
+			return -EINVAL;
+		}
+	}
+
+	return 0;
+}
+
+static int bcmgenet_insert_flow(struct net_device *dev,
+				struct ethtool_rxnfc *cmd)
+{
+	struct bcmgenet_priv *priv = netdev_priv(dev);
+	struct bcmgenet_rxnfc_rule *loc_rule;
+	int err;
+
+	if (priv->hw_params->hfb_filter_size < 128) {
+		netdev_err(dev, "rxnfc: Not supported by this device\n");
+		return -EINVAL;
+	}
+
+	if (cmd->fs.ring_cookie > priv->hw_params->rx_queues) {
+		netdev_err(dev, "rxnfc: Unsupported action (%llu)\n",
+			   cmd->fs.ring_cookie);
+		return -EINVAL;
+	}
+
+	err = bcmgenet_validate_flow(dev, cmd);
+	if (err)
+		return err;
+
+	loc_rule = &priv->rxnfc_rules[cmd->fs.location];
+	if (loc_rule->state == BCMGENET_RXNFC_STATE_ENABLED)
+		bcmgenet_hfb_disable_filter(priv, cmd->fs.location);
+	if (loc_rule->state != BCMGENET_RXNFC_STATE_UNUSED)
+		list_del(&loc_rule->list);
+	loc_rule->state = BCMGENET_RXNFC_STATE_UNUSED;
+	memcpy(&loc_rule->fs, &cmd->fs,
+	       sizeof(struct ethtool_rx_flow_spec));
+
+	err = bcmgenet_hfb_create_rxnfc_filter(priv, loc_rule);
+	if (err) {
+		netdev_err(dev, "rxnfc: Could not install rule (%d)\n",
+			   err);
+		return err;
+	}
+
+	list_add_tail(&loc_rule->list, &priv->rxnfc_list);
+
+	return 0;
+}
+
+static int bcmgenet_delete_flow(struct net_device *dev,
+				struct ethtool_rxnfc *cmd)
+{
+	struct bcmgenet_priv *priv = netdev_priv(dev);
+	struct bcmgenet_rxnfc_rule *rule;
+	int err = 0;
+
+	if (cmd->fs.location >= MAX_NUM_OF_FS_RULES)
+		return -EINVAL;
+
+	rule = &priv->rxnfc_rules[cmd->fs.location];
+	if (rule->state == BCMGENET_RXNFC_STATE_UNUSED) {
+		err =  -ENOENT;
+		goto out;
+	}
+
+	if (rule->state == BCMGENET_RXNFC_STATE_ENABLED)
+		bcmgenet_hfb_disable_filter(priv, cmd->fs.location);
+	if (rule->state != BCMGENET_RXNFC_STATE_UNUSED)
+		list_del(&rule->list);
+	rule->state = BCMGENET_RXNFC_STATE_UNUSED;
+	memset(&rule->fs, 0, sizeof(struct ethtool_rx_flow_spec));
+
+out:
+	return err;
+}
+
+static int bcmgenet_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
+{
+	struct bcmgenet_priv *priv = netdev_priv(dev);
+	int err = 0;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_SRXCLSRLINS:
+		err = bcmgenet_insert_flow(dev, cmd);
+		break;
+	case ETHTOOL_SRXCLSRLDEL:
+		err = bcmgenet_delete_flow(dev, cmd);
+		break;
+	default:
+		netdev_warn(priv->dev, "Unsupported ethtool command. (%d)\n",
+			    cmd->cmd);
+		return -EINVAL;
+	}
+
+	return err;
+}
+
+static int bcmgenet_get_flow(struct net_device *dev, struct ethtool_rxnfc *cmd,
+			     int loc)
+{
+	struct bcmgenet_priv *priv = netdev_priv(dev);
+	struct bcmgenet_rxnfc_rule *rule;
+	int err = 0;
+
+	if (loc < 0 || loc >= MAX_NUM_OF_FS_RULES)
+		return -EINVAL;
+
+	rule = &priv->rxnfc_rules[loc];
+	if (rule->state == BCMGENET_RXNFC_STATE_UNUSED)
+		err = -ENOENT;
+	else
+		memcpy(&cmd->fs, &rule->fs,
+		       sizeof(struct ethtool_rx_flow_spec));
+
+	return err;
+}
+
+static int bcmgenet_get_num_flows(struct bcmgenet_priv *priv)
+{
+	struct list_head *pos;
+	int res = 0;
+
+	list_for_each(pos, &priv->rxnfc_list)
+		res++;
+
+	return res;
+}
+
+static int bcmgenet_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
+			      u32 *rule_locs)
+{
+	struct bcmgenet_priv *priv = netdev_priv(dev);
+	struct bcmgenet_rxnfc_rule *rule;
+	int err = 0;
+	int i = 0;
+
+	switch (cmd->cmd) {
+	case ETHTOOL_GRXRINGS:
+		cmd->data = priv->hw_params->rx_queues ?: 1;
+		break;
+	case ETHTOOL_GRXCLSRLCNT:
+		cmd->rule_cnt = bcmgenet_get_num_flows(priv);
+		cmd->data = MAX_NUM_OF_FS_RULES;
+		break;
+	case ETHTOOL_GRXCLSRULE:
+		err = bcmgenet_get_flow(dev, cmd, cmd->fs.location);
+		break;
+	case ETHTOOL_GRXCLSRLALL:
+		list_for_each_entry(rule, &priv->rxnfc_list, list)
+			if (i < cmd->rule_cnt)
+				rule_locs[i++] = rule->fs.location;
+		cmd->rule_cnt = i;
+		cmd->data = MAX_NUM_OF_FS_RULES;
+		break;
+	default:
+		err = -EOPNOTSUPP;
+		break;
+	}
+
+	return err;
+}
+
 /* standard ethtool support functions. */
 static const struct ethtool_ops bcmgenet_ethtool_ops = {
 	.supported_coalesce_params = ETHTOOL_COALESCE_RX_USECS |
@@ -1221,6 +1667,8 @@ static const struct ethtool_ops bcmgenet_ethtool_ops = {
 	.get_link_ksettings	= bcmgenet_get_link_ksettings,
 	.set_link_ksettings	= bcmgenet_set_link_ksettings,
 	.get_ts_info		= ethtool_op_get_ts_info,
+	.get_rxnfc		= bcmgenet_get_rxnfc,
+	.set_rxnfc		= bcmgenet_set_rxnfc,
 };
 
 /* Power down the unimac, based on mode. */
@@ -3730,8 +4178,8 @@ static int bcmgenet_resume(struct device *d)
 	struct net_device *dev = dev_get_drvdata(d);
 	struct bcmgenet_priv *priv = netdev_priv(dev);
 	unsigned long dma_ctrl;
+	u32 offset, reg;
 	int ret;
-	u32 reg;
 
 	if (!netif_running(dev))
 		return 0;
@@ -3766,6 +4214,11 @@ static int bcmgenet_resume(struct device *d)
 
 	bcmgenet_set_hw_addr(priv, dev->dev_addr);
 
+	offset = HFB_FLT_ENABLE_V3PLUS;
+	bcmgenet_hfb_reg_writel(priv, priv->hfb_en[1], offset);
+	bcmgenet_hfb_reg_writel(priv, priv->hfb_en[2], offset + sizeof(u32));
+	bcmgenet_hfb_reg_writel(priv, priv->hfb_en[0], HFB_CTRL);
+
 	if (priv->internal_phy) {
 		reg = bcmgenet_ext_readl(priv, EXT_EXT_PWR_MGMT);
 		reg |= EXT_ENERGY_DET_MASK;
@@ -3809,6 +4262,7 @@ static int bcmgenet_suspend(struct device *d)
 	struct net_device *dev = dev_get_drvdata(d);
 	struct bcmgenet_priv *priv = netdev_priv(dev);
 	int ret = 0;
+	u32 offset;
 
 	if (!netif_running(dev))
 		return 0;
@@ -3820,6 +4274,13 @@ static int bcmgenet_suspend(struct device *d)
 	if (!device_may_wakeup(d))
 		phy_suspend(dev->phydev);
 
+	/* Preserve filter state and disable filtering */
+	priv->hfb_en[0] = bcmgenet_hfb_reg_readl(priv, HFB_CTRL);
+	offset = HFB_FLT_ENABLE_V3PLUS;
+	priv->hfb_en[1] = bcmgenet_hfb_reg_readl(priv, offset);
+	priv->hfb_en[2] = bcmgenet_hfb_reg_readl(priv, offset + sizeof(u32));
+	bcmgenet_hfb_reg_writel(priv, 0, HFB_CTRL);
+
 	/* Prepare the device for Wake-on-LAN and switch to the slow clock */
 	if (device_may_wakeup(d) && priv->wolopts)
 		ret = bcmgenet_power_down(priv, GENET_POWER_WOL_MAGIC);
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index a858b7305832..031d91f45067 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -32,6 +32,7 @@
 #define DMA_MAX_BURST_LENGTH    0x10
 
 /* misc. configuration */
+#define MAX_NUM_OF_FS_RULES		16
 #define CLEAR_ALL_HFB			0xFF
 #define DMA_FC_THRESH_HI		(TOTAL_DESC >> 4)
 #define DMA_FC_THRESH_LO		5
@@ -609,6 +610,18 @@ struct bcmgenet_rx_ring {
 	struct bcmgenet_priv *priv;
 };
 
+enum bcmgenet_rxnfc_state {
+	BCMGENET_RXNFC_STATE_UNUSED = 0,
+	BCMGENET_RXNFC_STATE_DISABLED,
+	BCMGENET_RXNFC_STATE_ENABLED
+};
+
+struct bcmgenet_rxnfc_rule {
+	struct	list_head list;
+	struct ethtool_rx_flow_spec	fs;
+	enum bcmgenet_rxnfc_state state;
+};
+
 /* device context */
 struct bcmgenet_priv {
 	void __iomem *base;
@@ -627,6 +640,8 @@ struct bcmgenet_priv {
 	struct enet_cb *rx_cbs;
 	unsigned int num_rx_bds;
 	unsigned int rx_buf_len;
+	struct bcmgenet_rxnfc_rule rxnfc_rules[MAX_NUM_OF_FS_RULES];
+	struct list_head rxnfc_list;
 
 	struct bcmgenet_rx_ring rx_rings[DESC_INDEX + 1];
 
@@ -679,6 +694,7 @@ struct bcmgenet_priv {
 	u32 wolopts;
 	u8 sopass[SOPASS_MAX];
 	bool wol_active;
+	u32 hfb_en[3];
 
 	struct bcmgenet_mib_counters mib;
 
-- 
cgit v1.2.3-59-g8ed1b


From f50932cca632fb87ab4de678ecc7c3b41116140b Mon Sep 17 00:00:00 2001
From: Doug Berger <opendmb@gmail.com>
Date: Wed, 29 Apr 2020 13:02:06 -0700
Subject: net: bcmgenet: add WAKE_FILTER support

This commit enables support for the WAKE_FILTER method of Wake on
LAN for the GENET driver. The method can be enabled by adding 'f'
to the interface 'wol' setting specified by ethtool.

Rx network flow rules can be specified using ethtool. Rules that
define a flow-type with the RX_CLS_FLOW_WAKE action (i.e. -2) can
wake the system from the 'standby' power state when the WAKE_FILTER
WoL method is enabled.

Signed-off-by: Doug Berger <opendmb@gmail.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/genet/bcmgenet.c     |  5 ++-
 drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c | 43 +++++++++++++++++-----
 2 files changed, 37 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index 5ef1ea7e5312..ad614d7201bd 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -724,7 +724,7 @@ static int bcmgenet_hfb_create_rxnfc_filter(struct bcmgenet_priv *priv,
 		break;
 	}
 
-	if (!fs->ring_cookie) {
+	if (!fs->ring_cookie || fs->ring_cookie == RX_CLS_FLOW_WAKE) {
 		/* Ring 0 flows can be handled by the default Descriptor Ring
 		 * We'll map them to ring 0, but don't enable the filter
 		 */
@@ -1499,7 +1499,8 @@ static int bcmgenet_insert_flow(struct net_device *dev,
 		return -EINVAL;
 	}
 
-	if (cmd->fs.ring_cookie > priv->hw_params->rx_queues) {
+	if (cmd->fs.ring_cookie > priv->hw_params->rx_queues &&
+	    cmd->fs.ring_cookie != RX_CLS_FLOW_WAKE) {
 		netdev_err(dev, "rxnfc: Unsupported action (%llu)\n",
 			   cmd->fs.ring_cookie);
 		return -EINVAL;
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c
index da45a4645b94..4b9d65f392c2 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c
@@ -42,7 +42,7 @@ void bcmgenet_get_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 {
 	struct bcmgenet_priv *priv = netdev_priv(dev);
 
-	wol->supported = WAKE_MAGIC | WAKE_MAGICSECURE;
+	wol->supported = WAKE_MAGIC | WAKE_MAGICSECURE | WAKE_FILTER;
 	wol->wolopts = priv->wolopts;
 	memset(wol->sopass, 0, sizeof(wol->sopass));
 
@@ -61,7 +61,7 @@ int bcmgenet_set_wol(struct net_device *dev, struct ethtool_wolinfo *wol)
 	if (!device_can_wakeup(kdev))
 		return -ENOTSUPP;
 
-	if (wol->wolopts & ~(WAKE_MAGIC | WAKE_MAGICSECURE))
+	if (wol->wolopts & ~(WAKE_MAGIC | WAKE_MAGICSECURE | WAKE_FILTER))
 		return -EINVAL;
 
 	if (wol->wolopts & WAKE_MAGICSECURE)
@@ -117,8 +117,9 @@ int bcmgenet_wol_power_down_cfg(struct bcmgenet_priv *priv,
 				enum bcmgenet_power_mode mode)
 {
 	struct net_device *dev = priv->dev;
+	struct bcmgenet_rxnfc_rule *rule;
+	u32 reg, hfb_ctrl_reg, hfb_enable = 0;
 	int retries = 0;
-	u32 reg;
 
 	if (mode != GENET_POWER_WOL_MAGIC) {
 		netif_err(priv, wol, dev, "unsupported mode: %d\n", mode);
@@ -135,13 +136,24 @@ int bcmgenet_wol_power_down_cfg(struct bcmgenet_priv *priv,
 	bcmgenet_umac_writel(priv, reg, UMAC_CMD);
 	mdelay(10);
 
-	reg = bcmgenet_umac_readl(priv, UMAC_MPD_CTRL);
-	reg |= MPD_EN;
-	if (priv->wolopts & WAKE_MAGICSECURE) {
-		bcmgenet_set_mpd_password(priv);
-		reg |= MPD_PW_EN;
+	if (priv->wolopts & (WAKE_MAGIC | WAKE_MAGICSECURE)) {
+		reg = bcmgenet_umac_readl(priv, UMAC_MPD_CTRL);
+		reg |= MPD_EN;
+		if (priv->wolopts & WAKE_MAGICSECURE) {
+			bcmgenet_set_mpd_password(priv);
+			reg |= MPD_PW_EN;
+		}
+		bcmgenet_umac_writel(priv, reg, UMAC_MPD_CTRL);
+	}
+
+	hfb_ctrl_reg = bcmgenet_hfb_reg_readl(priv, HFB_CTRL);
+	if (priv->wolopts & WAKE_FILTER) {
+		list_for_each_entry(rule, &priv->rxnfc_list, list)
+			if (rule->fs.ring_cookie == RX_CLS_FLOW_WAKE)
+				hfb_enable |= (1 << rule->fs.location);
+		reg = (hfb_ctrl_reg & ~RBUF_HFB_EN) | RBUF_ACPI_EN;
+		bcmgenet_hfb_reg_writel(priv, reg, HFB_CTRL);
 	}
-	bcmgenet_umac_writel(priv, reg, UMAC_MPD_CTRL);
 
 	/* Do not leave UniMAC in MPD mode only */
 	retries = bcmgenet_poll_wol_status(priv);
@@ -149,6 +161,7 @@ int bcmgenet_wol_power_down_cfg(struct bcmgenet_priv *priv,
 		reg = bcmgenet_umac_readl(priv, UMAC_MPD_CTRL);
 		reg &= ~(MPD_EN | MPD_PW_EN);
 		bcmgenet_umac_writel(priv, reg, UMAC_MPD_CTRL);
+		bcmgenet_hfb_reg_writel(priv, hfb_ctrl_reg, HFB_CTRL);
 		return retries;
 	}
 
@@ -158,6 +171,13 @@ int bcmgenet_wol_power_down_cfg(struct bcmgenet_priv *priv,
 	clk_prepare_enable(priv->clk_wol);
 	priv->wol_active = 1;
 
+	if (hfb_enable) {
+		bcmgenet_hfb_reg_writel(priv, hfb_enable,
+					HFB_FLT_ENABLE_V3PLUS + 4);
+		hfb_ctrl_reg = RBUF_HFB_EN | RBUF_ACPI_EN;
+		bcmgenet_hfb_reg_writel(priv, hfb_ctrl_reg, HFB_CTRL);
+	}
+
 	/* Enable CRC forward */
 	reg = bcmgenet_umac_readl(priv, UMAC_CMD);
 	priv->crc_fwd_en = 1;
@@ -197,6 +217,11 @@ void bcmgenet_wol_power_up_cfg(struct bcmgenet_priv *priv,
 	reg &= ~(MPD_EN | MPD_PW_EN);
 	bcmgenet_umac_writel(priv, reg, UMAC_MPD_CTRL);
 
+	/* Disable WAKE_FILTER Detection */
+	reg = bcmgenet_hfb_reg_readl(priv, HFB_CTRL);
+	reg &= ~(RBUF_HFB_EN | RBUF_ACPI_EN);
+	bcmgenet_hfb_reg_writel(priv, reg, HFB_CTRL);
+
 	/* Disable CRC Forward */
 	reg = bcmgenet_umac_readl(priv, UMAC_CMD);
 	reg &= ~CMD_CRC_FWD;
-- 
cgit v1.2.3-59-g8ed1b


From 91f658587a962378a410cc7dc90e122a4ccd7cf3 Mon Sep 17 00:00:00 2001
From: Luke Nelson <lukenels@cs.washington.edu>
Date: Wed, 29 Apr 2020 17:51:27 -0700
Subject: bpf, riscv: Fix stack layout of JITed code on RV32

This patch fixes issues with stackframe unwinding and alignment in the
current stack layout for BPF programs on RV32.

In the current layout, RV32 fp points to the JIT scratch registers, rather
than to the callee-saved registers. This breaks stackframe unwinding,
which expects fp to point just above the saved ra and fp registers.

This patch fixes the issue by moving the callee-saved registers to be
stored on the top of the stack, pointed to by fp. This satisfies the
assumptions of stackframe unwinding.

This patch also fixes an issue with the old layout that the stack was
not aligned to 16 bytes.

Stacktrace from JITed code using the old stack layout:

  [   12.196249 ] [<c0402200>] walk_stackframe+0x0/0x96

Stacktrace using the new stack layout:

  [   13.062888 ] [<c0402200>] walk_stackframe+0x0/0x96
  [   13.063028 ] [<c04023c6>] show_stack+0x28/0x32
  [   13.063253 ] [<a403e778>] bpf_prog_82b916b2dfa00464+0x80/0x908
  [   13.063417 ] [<c09270b2>] bpf_test_run+0x124/0x39a
  [   13.063553 ] [<c09276c0>] bpf_prog_test_run_skb+0x234/0x448
  [   13.063704 ] [<c048510e>] __do_sys_bpf+0x766/0x13b4
  [   13.063840 ] [<c0485d82>] sys_bpf+0xc/0x14
  [   13.063961 ] [<c04010f0>] ret_from_syscall+0x0/0x2

The new code is also simpler to understand and includes an ASCII diagram
of the stack layout.

Tested on riscv32 QEMU virt machine.

Signed-off-by: Luke Nelson <luke.r.nels@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Xi Wang <xi.wang@gmail.com>
Link: https://lore.kernel.org/bpf/20200430005127.2205-1-luke.r.nels@gmail.com
---
 arch/riscv/net/bpf_jit_comp32.c | 98 +++++++++++++++++++++++++++--------------
 1 file changed, 65 insertions(+), 33 deletions(-)

diff --git a/arch/riscv/net/bpf_jit_comp32.c b/arch/riscv/net/bpf_jit_comp32.c
index 11083d4d5f2d..b198eaa74456 100644
--- a/arch/riscv/net/bpf_jit_comp32.c
+++ b/arch/riscv/net/bpf_jit_comp32.c
@@ -13,8 +13,35 @@
 #include <linux/filter.h>
 #include "bpf_jit.h"
 
+/*
+ * Stack layout during BPF program execution:
+ *
+ *                     high
+ *     RV32 fp =>  +----------+
+ *                 | saved ra |
+ *                 | saved fp | RV32 callee-saved registers
+ *                 |   ...    |
+ *                 +----------+ <= (fp - 4 * NR_SAVED_REGISTERS)
+ *                 |  hi(R6)  |
+ *                 |  lo(R6)  |
+ *                 |  hi(R7)  | JIT scratch space for BPF registers
+ *                 |  lo(R7)  |
+ *                 |   ...    |
+ *  BPF_REG_FP =>  +----------+ <= (fp - 4 * NR_SAVED_REGISTERS
+ *                 |          |        - 4 * BPF_JIT_SCRATCH_REGS)
+ *                 |          |
+ *                 |   ...    | BPF program stack
+ *                 |          |
+ *     RV32 sp =>  +----------+
+ *                 |          |
+ *                 |   ...    | Function call stack
+ *                 |          |
+ *                 +----------+
+ *                     low
+ */
+
 enum {
-	/* Stack layout - these are offsets from (top of stack - 4). */
+	/* Stack layout - these are offsets from top of JIT scratch space. */
 	BPF_R6_HI,
 	BPF_R6_LO,
 	BPF_R7_HI,
@@ -29,7 +56,11 @@ enum {
 	BPF_JIT_SCRATCH_REGS,
 };
 
-#define STACK_OFFSET(k) (-4 - ((k) * 4))
+/* Number of callee-saved registers stored to stack: ra, fp, s1--s7. */
+#define NR_SAVED_REGISTERS	9
+
+/* Offset from fp for BPF registers stored on stack. */
+#define STACK_OFFSET(k)	(-4 - (4 * NR_SAVED_REGISTERS) - (4 * (k)))
 
 #define TMP_REG_1	(MAX_BPF_JIT_REG + 0)
 #define TMP_REG_2	(MAX_BPF_JIT_REG + 1)
@@ -111,11 +142,9 @@ static void emit_imm64(const s8 *rd, s32 imm_hi, s32 imm_lo,
 
 static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx)
 {
-	int stack_adjust = ctx->stack_size, store_offset = stack_adjust - 4;
+	int stack_adjust = ctx->stack_size;
 	const s8 *r0 = bpf2rv32[BPF_REG_0];
 
-	store_offset -= 4 * BPF_JIT_SCRATCH_REGS;
-
 	/* Set return value if not tail call. */
 	if (!is_tail_call) {
 		emit(rv_addi(RV_REG_A0, lo(r0), 0), ctx);
@@ -123,15 +152,15 @@ static void __build_epilogue(bool is_tail_call, struct rv_jit_context *ctx)
 	}
 
 	/* Restore callee-saved registers. */
-	emit(rv_lw(RV_REG_RA, store_offset - 0, RV_REG_SP), ctx);
-	emit(rv_lw(RV_REG_FP, store_offset - 4, RV_REG_SP), ctx);
-	emit(rv_lw(RV_REG_S1, store_offset - 8, RV_REG_SP), ctx);
-	emit(rv_lw(RV_REG_S2, store_offset - 12, RV_REG_SP), ctx);
-	emit(rv_lw(RV_REG_S3, store_offset - 16, RV_REG_SP), ctx);
-	emit(rv_lw(RV_REG_S4, store_offset - 20, RV_REG_SP), ctx);
-	emit(rv_lw(RV_REG_S5, store_offset - 24, RV_REG_SP), ctx);
-	emit(rv_lw(RV_REG_S6, store_offset - 28, RV_REG_SP), ctx);
-	emit(rv_lw(RV_REG_S7, store_offset - 32, RV_REG_SP), ctx);
+	emit(rv_lw(RV_REG_RA, stack_adjust - 4, RV_REG_SP), ctx);
+	emit(rv_lw(RV_REG_FP, stack_adjust - 8, RV_REG_SP), ctx);
+	emit(rv_lw(RV_REG_S1, stack_adjust - 12, RV_REG_SP), ctx);
+	emit(rv_lw(RV_REG_S2, stack_adjust - 16, RV_REG_SP), ctx);
+	emit(rv_lw(RV_REG_S3, stack_adjust - 20, RV_REG_SP), ctx);
+	emit(rv_lw(RV_REG_S4, stack_adjust - 24, RV_REG_SP), ctx);
+	emit(rv_lw(RV_REG_S5, stack_adjust - 28, RV_REG_SP), ctx);
+	emit(rv_lw(RV_REG_S6, stack_adjust - 32, RV_REG_SP), ctx);
+	emit(rv_lw(RV_REG_S7, stack_adjust - 36, RV_REG_SP), ctx);
 
 	emit(rv_addi(RV_REG_SP, RV_REG_SP, stack_adjust), ctx);
 
@@ -1260,17 +1289,20 @@ notsupported:
 
 void bpf_jit_build_prologue(struct rv_jit_context *ctx)
 {
-	/* Make space to save 9 registers: ra, fp, s1--s7. */
-	int stack_adjust = 9 * sizeof(u32), store_offset, bpf_stack_adjust;
 	const s8 *fp = bpf2rv32[BPF_REG_FP];
 	const s8 *r1 = bpf2rv32[BPF_REG_1];
-
-	bpf_stack_adjust = round_up(ctx->prog->aux->stack_depth, 16);
+	int stack_adjust = 0;
+	int bpf_stack_adjust =
+		round_up(ctx->prog->aux->stack_depth, STACK_ALIGN);
+
+	/* Make space for callee-saved registers. */
+	stack_adjust += NR_SAVED_REGISTERS * sizeof(u32);
+	/* Make space for BPF registers on stack. */
+	stack_adjust += BPF_JIT_SCRATCH_REGS * sizeof(u32);
+	/* Make space for BPF stack. */
 	stack_adjust += bpf_stack_adjust;
-
-	store_offset = stack_adjust - 4;
-
-	stack_adjust += 4 * BPF_JIT_SCRATCH_REGS;
+	/* Round up for stack alignment. */
+	stack_adjust = round_up(stack_adjust, STACK_ALIGN);
 
 	/*
 	 * The first instruction sets the tail-call-counter (TCC) register.
@@ -1281,24 +1313,24 @@ void bpf_jit_build_prologue(struct rv_jit_context *ctx)
 	emit(rv_addi(RV_REG_SP, RV_REG_SP, -stack_adjust), ctx);
 
 	/* Save callee-save registers. */
-	emit(rv_sw(RV_REG_SP, store_offset - 0, RV_REG_RA), ctx);
-	emit(rv_sw(RV_REG_SP, store_offset - 4, RV_REG_FP), ctx);
-	emit(rv_sw(RV_REG_SP, store_offset - 8, RV_REG_S1), ctx);
-	emit(rv_sw(RV_REG_SP, store_offset - 12, RV_REG_S2), ctx);
-	emit(rv_sw(RV_REG_SP, store_offset - 16, RV_REG_S3), ctx);
-	emit(rv_sw(RV_REG_SP, store_offset - 20, RV_REG_S4), ctx);
-	emit(rv_sw(RV_REG_SP, store_offset - 24, RV_REG_S5), ctx);
-	emit(rv_sw(RV_REG_SP, store_offset - 28, RV_REG_S6), ctx);
-	emit(rv_sw(RV_REG_SP, store_offset - 32, RV_REG_S7), ctx);
+	emit(rv_sw(RV_REG_SP, stack_adjust - 4, RV_REG_RA), ctx);
+	emit(rv_sw(RV_REG_SP, stack_adjust - 8, RV_REG_FP), ctx);
+	emit(rv_sw(RV_REG_SP, stack_adjust - 12, RV_REG_S1), ctx);
+	emit(rv_sw(RV_REG_SP, stack_adjust - 16, RV_REG_S2), ctx);
+	emit(rv_sw(RV_REG_SP, stack_adjust - 20, RV_REG_S3), ctx);
+	emit(rv_sw(RV_REG_SP, stack_adjust - 24, RV_REG_S4), ctx);
+	emit(rv_sw(RV_REG_SP, stack_adjust - 28, RV_REG_S5), ctx);
+	emit(rv_sw(RV_REG_SP, stack_adjust - 32, RV_REG_S6), ctx);
+	emit(rv_sw(RV_REG_SP, stack_adjust - 36, RV_REG_S7), ctx);
 
 	/* Set fp: used as the base address for stacked BPF registers. */
 	emit(rv_addi(RV_REG_FP, RV_REG_SP, stack_adjust), ctx);
 
-	/* Set up BPF stack pointer. */
+	/* Set up BPF frame pointer. */
 	emit(rv_addi(lo(fp), RV_REG_SP, bpf_stack_adjust), ctx);
 	emit(rv_addi(hi(fp), RV_REG_ZERO, 0), ctx);
 
-	/* Set up context pointer. */
+	/* Set up BPF context pointer. */
 	emit(rv_addi(lo(r1), RV_REG_A0, 0), ctx);
 	emit(rv_addi(hi(r1), RV_REG_ZERO, 0), ctx);
 
-- 
cgit v1.2.3-59-g8ed1b


From 063e688133914505ddb396cc33231f22f12e0685 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Wed, 29 Apr 2020 19:14:36 -0700
Subject: libbpf: Fix false uninitialized variable warning

Some versions of GCC falsely detect that vi might not be initialized. That's
not true, but let's silence it with NULL initialization.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200430021436.1522502-1-andriin@fb.com
---
 tools/lib/bpf/libbpf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index d86ff8214b96..977add1b73e2 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -5003,8 +5003,8 @@ static int bpf_object__collect_map_relos(struct bpf_object *obj,
 					 GElf_Shdr *shdr, Elf_Data *data)
 {
 	int i, j, nrels, new_sz, ptr_sz = sizeof(void *);
+	const struct btf_var_secinfo *vi = NULL;
 	const struct btf_type *sec, *var, *def;
-	const struct btf_var_secinfo *vi;
 	const struct btf_member *member;
 	struct bpf_map *map, *targ_map;
 	const char *name, *mname;
-- 
cgit v1.2.3-59-g8ed1b


From c321022244708aec4675de4f032ef1ba9ff0c640 Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Thu, 30 Apr 2020 12:47:38 +0200
Subject: selftests/bpf: Test allowed maps for bpf_sk_select_reuseport

Check that verifier allows passing a map of type:

 BPF_MAP_TYPE_REUSEPORT_SOCKARRARY, or
 BPF_MAP_TYPE_SOCKMAP, or
 BPF_MAP_TYPE_SOCKHASH

... to bpf_sk_select_reuseport helper.

Suggested-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200430104738.494180-1-jakub@cloudflare.com
---
 tools/testing/selftests/bpf/test_verifier.c | 12 +++++++-
 tools/testing/selftests/bpf/verifier/sock.c | 45 +++++++++++++++++++++++++++++
 2 files changed, 56 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index ad6939c67c5e..21a1ce219c1c 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -50,7 +50,7 @@
 #define MAX_INSNS	BPF_MAXINSNS
 #define MAX_TEST_INSNS	1000000
 #define MAX_FIXUPS	8
-#define MAX_NR_MAPS	19
+#define MAX_NR_MAPS	20
 #define MAX_TEST_RUNS	8
 #define POINTER_VALUE	0xcafe4all
 #define TEST_DATA_LEN	64
@@ -86,6 +86,7 @@ struct bpf_test {
 	int fixup_map_array_small[MAX_FIXUPS];
 	int fixup_sk_storage_map[MAX_FIXUPS];
 	int fixup_map_event_output[MAX_FIXUPS];
+	int fixup_map_reuseport_array[MAX_FIXUPS];
 	const char *errstr;
 	const char *errstr_unpriv;
 	uint32_t insn_processed;
@@ -637,6 +638,7 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
 	int *fixup_map_array_small = test->fixup_map_array_small;
 	int *fixup_sk_storage_map = test->fixup_sk_storage_map;
 	int *fixup_map_event_output = test->fixup_map_event_output;
+	int *fixup_map_reuseport_array = test->fixup_map_reuseport_array;
 
 	if (test->fill_helper) {
 		test->fill_insns = calloc(MAX_TEST_INSNS, sizeof(struct bpf_insn));
@@ -806,6 +808,14 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
 			fixup_map_event_output++;
 		} while (*fixup_map_event_output);
 	}
+	if (*fixup_map_reuseport_array) {
+		map_fds[19] = __create_map(BPF_MAP_TYPE_REUSEPORT_SOCKARRAY,
+					   sizeof(u32), sizeof(u64), 1, 0);
+		do {
+			prog[*fixup_map_reuseport_array].imm = map_fds[19];
+			fixup_map_reuseport_array++;
+		} while (*fixup_map_reuseport_array);
+	}
 }
 
 static int set_admin(bool admin)
diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c
index f87ad69dbc62..0bc51ad9e0fb 100644
--- a/tools/testing/selftests/bpf/verifier/sock.c
+++ b/tools/testing/selftests/bpf/verifier/sock.c
@@ -586,3 +586,48 @@
 	.prog_type = BPF_PROG_TYPE_SK_SKB,
 	.result = ACCEPT,
 },
+{
+	"bpf_sk_select_reuseport(ctx, reuseport_array, &key, flags)",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_4, 0),
+	BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
+	BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -4),
+	BPF_LD_MAP_FD(BPF_REG_2, 0),
+	BPF_EMIT_CALL(BPF_FUNC_sk_select_reuseport),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_reuseport_array = { 4 },
+	.prog_type = BPF_PROG_TYPE_SK_REUSEPORT,
+	.result = ACCEPT,
+},
+{
+	"bpf_sk_select_reuseport(ctx, sockmap, &key, flags)",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_4, 0),
+	BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
+	BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -4),
+	BPF_LD_MAP_FD(BPF_REG_2, 0),
+	BPF_EMIT_CALL(BPF_FUNC_sk_select_reuseport),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_sockmap = { 4 },
+	.prog_type = BPF_PROG_TYPE_SK_REUSEPORT,
+	.result = ACCEPT,
+},
+{
+	"bpf_sk_select_reuseport(ctx, sockhash, &key, flags)",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_4, 0),
+	BPF_ST_MEM(BPF_W, BPF_REG_10, -4, 0),
+	BPF_MOV64_REG(BPF_REG_3, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, -4),
+	BPF_LD_MAP_FD(BPF_REG_2, 0),
+	BPF_EMIT_CALL(BPF_FUNC_sk_select_reuseport),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_sockmap = { 4 },
+	.prog_type = BPF_PROG_TYPE_SK_REUSEPORT,
+	.result = ACCEPT,
+},
-- 
cgit v1.2.3-59-g8ed1b


From 72d3fef16158b9c1852855a3846757ec165c16e1 Mon Sep 17 00:00:00 2001
From: Raed Salem <raeds@mellanox.com>
Date: Wed, 22 Apr 2020 09:19:08 +0300
Subject: net/mlx5: IPsec, Fix coverity issue

The cited commit introduced the following coverity issue at functions
mlx5_fpga_is_ipsec_device() and mlx5_fpga_ipsec_release_sa_ctx():
- bit_and_with_zero:
  accel_xfrm->attrs.action & MLX5_ACCEL_ESP_ACTION_DECRYPT is always 0.

As MLX5_ACCEL_ESP_ACTION_DECRYPT is not a bitwise flag and was wrongly
used with bitwise operation, the above expression is always zero value
as MLX5_ACCEL_ESP_ACTION_DECRYPT is zero.

Fix by using "==" comparison operator instead.

Fixes: 7dfee4b1d79e ("net/mlx5: IPsec, Refactor SA handle creation and destruction")
Signed-off-by: Raed Salem <raeds@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
index 0604216eb94f..b463787d6ca1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/ipsec.c
@@ -708,7 +708,7 @@ void *mlx5_fpga_ipsec_create_sa_ctx(struct mlx5_core_dev *mdev,
 		goto exists;
 	}
 
-	if (accel_xfrm->attrs.action & MLX5_ACCEL_ESP_ACTION_DECRYPT) {
+	if (accel_xfrm->attrs.action == MLX5_ACCEL_ESP_ACTION_DECRYPT) {
 		err = ida_simple_get(&fipsec->halloc, 1, 0, GFP_KERNEL);
 		if (err < 0) {
 			context = ERR_PTR(err);
@@ -759,7 +759,7 @@ delete_hash:
 				       rhash_sa));
 unlock_hash:
 	mutex_unlock(&fipsec->sa_hash_lock);
-	if (accel_xfrm->attrs.action & MLX5_ACCEL_ESP_ACTION_DECRYPT)
+	if (accel_xfrm->attrs.action == MLX5_ACCEL_ESP_ACTION_DECRYPT)
 		ida_simple_remove(&fipsec->halloc, sa_ctx->sa_handle);
 exists:
 	mutex_unlock(&fpga_xfrm->lock);
-- 
cgit v1.2.3-59-g8ed1b


From 9c8e7434e0349b26df82ed25522e812e4feeb873 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@mellanox.com>
Date: Fri, 3 Apr 2020 05:49:02 -0500
Subject: net/mlx5e: Use helper API to get devlink port index for all port
 flavours

Use existing helper API to get unique devlink port index for all
devlink port flavours.

Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c | 12 ++++--------
 1 file changed, 4 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 55457f268495..2de54d865dc8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -2056,26 +2056,22 @@ static int register_devlink_port(struct mlx5_core_dev *dev,
 		return 0;
 
 	mlx5e_rep_get_port_parent_id(rpriv->netdev, &ppid);
+	dl_port_index = vport_to_devlink_port_index(dev, rep->vport);
 	pfnum = PCI_FUNC(dev->pdev->devfn);
 
-	if (rep->vport == MLX5_VPORT_UPLINK) {
+	if (rep->vport == MLX5_VPORT_UPLINK)
 		devlink_port_attrs_set(&rpriv->dl_port,
 				       DEVLINK_PORT_FLAVOUR_PHYSICAL,
 				       pfnum, false, 0,
 				       &ppid.id[0], ppid.id_len);
-		dl_port_index = vport_to_devlink_port_index(dev, rep->vport);
-	} else if (rep->vport == MLX5_VPORT_PF) {
+	else if (rep->vport == MLX5_VPORT_PF)
 		devlink_port_attrs_pci_pf_set(&rpriv->dl_port,
 					      &ppid.id[0], ppid.id_len,
 					      pfnum);
-		dl_port_index = rep->vport;
-	} else if (mlx5_eswitch_is_vf_vport(dev->priv.eswitch,
-					    rpriv->rep->vport)) {
+	else if (mlx5_eswitch_is_vf_vport(dev->priv.eswitch, rpriv->rep->vport))
 		devlink_port_attrs_pci_vf_set(&rpriv->dl_port,
 					      &ppid.id[0], ppid.id_len,
 					      pfnum, rep->vport - 1);
-		dl_port_index = vport_to_devlink_port_index(dev, rep->vport);
-	}
 
 	return devlink_port_register(devlink, &rpriv->dl_port, dl_port_index);
 }
-- 
cgit v1.2.3-59-g8ed1b


From e59b254cbecc10088b691a2abdaeb6ded872b7a1 Mon Sep 17 00:00:00 2001
From: Zheng Bin <zhengbin13@huawei.com>
Date: Fri, 24 Apr 2020 16:43:57 +0800
Subject: net/mlx5e: Remove unneeded semicolon

Fixes coccicheck warning:

drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c:690:2-3: Unneeded semicolon

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zheng Bin <zhengbin13@huawei.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index 1079558d292a..77b3f372e831 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -687,7 +687,7 @@ mlx5_tc_ct_block_flow_offload(enum tc_setup_type type, void *type_data,
 		return mlx5_tc_ct_block_flow_offload_stats(ft, f);
 	default:
 		break;
-	};
+	}
 
 	return -EOPNOTSUPP;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 70a5698a5683cd504b03c6030ee622b1bec3f702 Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@mellanox.com>
Date: Sun, 26 Apr 2020 09:52:02 +0300
Subject: net/mlx5e: CT: Avoid false warning about rule may be used
 uninitialized

Avoid gcc warning by preset rule to invalid ptr.

Fixes: 4c3844d9e97e ("net/mlx5e: CT: Introduce connection tracking")
Signed-off-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index 77b3f372e831..44f806e79e8d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -1131,7 +1131,7 @@ mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
 {
 	bool clear_action = attr->ct_attr.ct_action & TCA_CT_ACT_CLEAR;
 	struct mlx5_tc_ct_priv *ct_priv = mlx5_tc_ct_get_ct_priv(priv);
-	struct mlx5_flow_handle *rule;
+	struct mlx5_flow_handle *rule = ERR_PTR(-EINVAL);
 	int err;
 
 	if (!ct_priv)
-- 
cgit v1.2.3-59-g8ed1b


From d2658b4a1d06e8458f2c88ee600afa1b1acd2627 Mon Sep 17 00:00:00 2001
From: Paul Blakey <paulb@mellanox.com>
Date: Tue, 14 Apr 2020 11:30:39 +0300
Subject: net/mlx5: CT: Remove unused variables

Signed-off-by: Paul Blakey <paulb@mellanox.com>
Reviewed-by: Oz Shlomo <ozsh@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index 44f806e79e8d..5568ded97e0b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -73,9 +73,7 @@ struct mlx5_ct_ft {
 struct mlx5_ct_entry {
 	u16 zone;
 	struct rhash_head node;
-	struct flow_rule *flow_rule;
 	struct mlx5_fc *counter;
-	unsigned long lastuse;
 	unsigned long cookie;
 	unsigned long restore_cookie;
 	struct mlx5_ct_zone_rule zone_rules[2];
@@ -603,7 +601,6 @@ mlx5_tc_ct_block_flow_offload_add(struct mlx5_ct_ft *ft,
 		return -ENOMEM;
 
 	entry->zone = ft->zone;
-	entry->flow_rule = flow_rule;
 	entry->cookie = flow->cookie;
 	entry->restore_cookie = meta_action->ct_metadata.cookie;
 
-- 
cgit v1.2.3-59-g8ed1b


From 51dde00b8fb3cf07e577be6aa4d98ee1f34b84be Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Wed, 15 Apr 2020 19:34:03 +0300
Subject: net/mlx5: Remove unused field in EQ

The size field in EQ is not in use.
Remove it.

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Reviewed-by: Tal Gilboa <talgi@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
index 4be4d2d36218..4aaca7400fb2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/eq.h
@@ -27,7 +27,6 @@ struct mlx5_eq {
 	__be32 __iomem	        *doorbell;
 	u32                     cons_index;
 	struct mlx5_frag_buf    buf;
-	int                     size;
 	unsigned int            vecidx;
 	unsigned int            irqn;
 	u8                      eqn;
-- 
cgit v1.2.3-59-g8ed1b


From c655c1f46957eb4f30221c52580e38f85058e167 Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Mon, 30 Mar 2020 14:27:08 +0300
Subject: net/mlx5: Add helper function to release fw page

Factor out the fwp address release page to an helper function, will be
used in the downstream patch.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/pagealloc.c    | 30 ++++++++++++----------
 1 file changed, 17 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index 3d6f617abb7d..c39907c641a0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -181,25 +181,17 @@ static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr)
 
 #define MLX5_U64_4K_PAGE_MASK ((~(u64)0U) << PAGE_SHIFT)
 
-static void free_4k(struct mlx5_core_dev *dev, u64 addr)
+static void free_fwp(struct mlx5_core_dev *dev, struct fw_page *fwp)
 {
-	struct fw_page *fwp;
-	int n;
+	int n = (fwp->addr & ~MLX5_U64_4K_PAGE_MASK) >> MLX5_ADAPTER_PAGE_SHIFT;
 
-	fwp = find_fw_page(dev, addr & MLX5_U64_4K_PAGE_MASK);
-	if (!fwp) {
-		mlx5_core_warn(dev, "page not found\n");
-		return;
-	}
-
-	n = (addr & ~MLX5_U64_4K_PAGE_MASK) >> MLX5_ADAPTER_PAGE_SHIFT;
 	fwp->free_count++;
 	set_bit(n, &fwp->bitmask);
 	if (fwp->free_count == MLX5_NUM_4K_IN_PAGE) {
 		rb_erase(&fwp->rb_node, &dev->priv.page_root);
 		if (fwp->free_count != 1)
 			list_del(&fwp->list);
-		dma_unmap_page(dev->device, addr & MLX5_U64_4K_PAGE_MASK,
+		dma_unmap_page(dev->device, fwp->addr & MLX5_U64_4K_PAGE_MASK,
 			       PAGE_SIZE, DMA_BIDIRECTIONAL);
 		__free_page(fwp->page);
 		kfree(fwp);
@@ -208,6 +200,18 @@ static void free_4k(struct mlx5_core_dev *dev, u64 addr)
 	}
 }
 
+static void free_addr(struct mlx5_core_dev *dev, u64 addr)
+{
+	struct fw_page *fwp;
+
+	fwp = find_fw_page(dev, addr & MLX5_U64_4K_PAGE_MASK);
+	if (!fwp) {
+		mlx5_core_warn(dev, "page not found\n");
+		return;
+	}
+	free_fwp(dev, fwp);
+}
+
 static int alloc_system_page(struct mlx5_core_dev *dev, u16 func_id)
 {
 	struct device *device = dev->device;
@@ -329,7 +333,7 @@ retry:
 
 out_4k:
 	for (i--; i >= 0; i--)
-		free_4k(dev, MLX5_GET64(manage_pages_in, in, pas[i]));
+		free_addr(dev, MLX5_GET64(manage_pages_in, in, pas[i]));
 out_free:
 	kvfree(in);
 	if (notify_fail)
@@ -408,7 +412,7 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
 	}
 
 	for (i = 0; i < num_claimed; i++)
-		free_4k(dev, MLX5_GET64(manage_pages_out, out, pas[i]));
+		free_addr(dev, MLX5_GET64(manage_pages_out, out, pas[i]));
 
 	if (nclaimed)
 		*nclaimed = num_claimed;
-- 
cgit v1.2.3-59-g8ed1b


From c7636942d278db7f502c626a47d2ce1111602716 Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Sun, 19 Apr 2020 10:20:40 +0300
Subject: net/mlx5: Rate limit page not found error messages

Thousands of pages are released with free_addr() function. In case of
buggy sync between FW and driver on released address, the log will be
flooded with error messages. Use mlx5_core_warn_rl() to limit it.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index c39907c641a0..c790d6e3d204 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -206,7 +206,7 @@ static void free_addr(struct mlx5_core_dev *dev, u64 addr)
 
 	fwp = find_fw_page(dev, addr & MLX5_U64_4K_PAGE_MASK);
 	if (!fwp) {
-		mlx5_core_warn(dev, "page not found\n");
+		mlx5_core_warn_rl(dev, "page not found\n");
 		return;
 	}
 	free_fwp(dev, fwp);
-- 
cgit v1.2.3-59-g8ed1b


From c6168161f693e6d26cdcce891f99399f1432ac80 Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Wed, 1 Apr 2020 10:30:32 +0300
Subject: net/mlx5: Add support for release all pages event

If FW sets release_all_pages bit in MLX5_EVENT_TYPE_PAGE_REQUEST,
driver shall release all pages of a given function id, with no further
pages reclaim negotiation with FW nor MANAGE_PAGES commands from driver
towards FW.

Upon receiving this bit as part of pages reclaim event, driver will
initiate release all flow, in which it will iterate and release all
function's pages.

As part of driver <-> FW capabilities handshake, FW will report
release_all_pages max HCA cap bit, and driver will set the
release_all_pages bit in HCA cap.

NIC: ConnectX-4 Lx
CPU: Intel(R) Xeon(R) CPU E5-2650 v2 @ 2.60GHz
Test case: Simulataniously FLR 4 VFs, and measure FW release pages by
driver.
Before: 3.18 Sec
After:  0.31 Sec

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/main.c     |  3 ++
 .../net/ethernet/mellanox/mlx5/core/pagealloc.c    | 41 ++++++++++++++++++++--
 2 files changed, 41 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index fbbf51026b52..742ba012c234 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -549,6 +549,9 @@ static int handle_hca_cap(struct mlx5_core_dev *dev, void *set_ctx)
 			 num_vhca_ports,
 			 MLX5_CAP_GEN_MAX(dev, num_vhca_ports));
 
+	if (MLX5_CAP_GEN_MAX(dev, release_all_pages))
+		MLX5_SET(cmd_hca_cap, set_hca_cap, release_all_pages, 1);
+
 	return set_caps(dev, set_ctx, MLX5_SET_HCA_CAP_OP_MOD_GENERAL_DEVICE);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index c790d6e3d204..8ce78f42dfc0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -50,6 +50,7 @@ struct mlx5_pages_req {
 	u8	ec_function;
 	s32	npages;
 	struct work_struct work;
+	u8	release_all;
 };
 
 struct fw_page {
@@ -341,6 +342,33 @@ out_free:
 	return err;
 }
 
+static void release_all_pages(struct mlx5_core_dev *dev, u32 func_id,
+			      bool ec_function)
+{
+	struct rb_node *p;
+	int npages = 0;
+
+	p = rb_first(&dev->priv.page_root);
+	while (p) {
+		struct fw_page *fwp = rb_entry(p, struct fw_page, rb_node);
+
+		p = rb_next(p);
+		if (fwp->func_id != func_id)
+			continue;
+		free_fwp(dev, fwp);
+		npages++;
+	}
+
+	dev->priv.fw_pages -= npages;
+	if (func_id)
+		dev->priv.vfs_pages -= npages;
+	else if (mlx5_core_is_ecpf(dev) && !ec_function)
+		dev->priv.peer_pf_pages -= npages;
+
+	mlx5_core_dbg(dev, "npages %d, ec_function %d, func_id 0x%x\n",
+		      npages, ec_function, func_id);
+}
+
 static int reclaim_pages_cmd(struct mlx5_core_dev *dev,
 			     u32 *in, int in_size, u32 *out, int out_size)
 {
@@ -434,7 +462,9 @@ static void pages_work_handler(struct work_struct *work)
 	struct mlx5_core_dev *dev = req->dev;
 	int err = 0;
 
-	if (req->npages < 0)
+	if (req->release_all)
+		release_all_pages(dev, req->func_id, req->ec_function);
+	else if (req->npages < 0)
 		err = reclaim_pages(dev, req->func_id, -1 * req->npages, NULL,
 				    req->ec_function);
 	else if (req->npages > 0)
@@ -449,6 +479,7 @@ static void pages_work_handler(struct work_struct *work)
 
 enum {
 	EC_FUNCTION_MASK = 0x8000,
+	RELEASE_ALL_PAGES_MASK = 0x4000,
 };
 
 static int req_pages_handler(struct notifier_block *nb,
@@ -459,6 +490,7 @@ static int req_pages_handler(struct notifier_block *nb,
 	struct mlx5_priv *priv;
 	struct mlx5_eqe *eqe;
 	bool ec_function;
+	bool release_all;
 	u16 func_id;
 	s32 npages;
 
@@ -469,8 +501,10 @@ static int req_pages_handler(struct notifier_block *nb,
 	func_id = be16_to_cpu(eqe->data.req_pages.func_id);
 	npages  = be32_to_cpu(eqe->data.req_pages.num_pages);
 	ec_function = be16_to_cpu(eqe->data.req_pages.ec_function) & EC_FUNCTION_MASK;
-	mlx5_core_dbg(dev, "page request for func 0x%x, npages %d\n",
-		      func_id, npages);
+	release_all = be16_to_cpu(eqe->data.req_pages.ec_function) &
+		      RELEASE_ALL_PAGES_MASK;
+	mlx5_core_dbg(dev, "page request for func 0x%x, npages %d, release_all %d\n",
+		      func_id, npages, release_all);
 	req = kzalloc(sizeof(*req), GFP_ATOMIC);
 	if (!req) {
 		mlx5_core_warn(dev, "failed to allocate pages request\n");
@@ -481,6 +515,7 @@ static int req_pages_handler(struct notifier_block *nb,
 	req->func_id = func_id;
 	req->npages = npages;
 	req->ec_function = ec_function;
+	req->release_all = release_all;
 	INIT_WORK(&req->work, pages_work_handler);
 	queue_work(dev->priv.pg_wq, &req->work);
 	return NOTIFY_OK;
-- 
cgit v1.2.3-59-g8ed1b


From e658664c77c11c5ba77dfb231138505ecac71c80 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Thu, 8 Aug 2019 12:32:17 +0300
Subject: net/mlx5e: Use proper name field for the UMR key

Even though some of the WQE control segment's field share
the same memory bits (a union of fields), prefer having the
right field name for every different usage.

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Reviewed-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 10c933e5da9a..bf3fdbea1074 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -233,7 +233,7 @@ static inline void mlx5e_build_umr_wqe(struct mlx5e_rq *rq,
 	cseg->qpn_ds    = cpu_to_be32((sq->sqn << MLX5_WQE_CTRL_QPN_SHIFT) |
 				      ds_cnt);
 	cseg->fm_ce_se  = MLX5_WQE_CTRL_CQ_UPDATE;
-	cseg->imm       = rq->mkey_be;
+	cseg->umr_mkey  = rq->mkey_be;
 
 	ucseg->flags = MLX5_UMR_TRANSLATION_OFFSET_EN | MLX5_UMR_INLINE;
 	ucseg->xlt_octowords =
-- 
cgit v1.2.3-59-g8ed1b


From f1b95753eeedc00f1223e8033d96dae9f996ca6d Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Sun, 9 Feb 2020 17:06:49 +0200
Subject: net/mlx5e: TX, Generalise code and usage of error CQE dump

Error CQE was dumped only for TXQ SQs.
Generalise the function, and add usage for error completions
on ICO SQs and XDP SQs.

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Reviewed-by: Aya Levin <ayal@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h | 16 ++++++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c  | 13 ++++++++-----
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c   |  2 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c   | 18 +-----------------
 4 files changed, 27 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
index 9f6967d76053..c0249fc77eaa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -189,6 +189,22 @@ static inline void mlx5e_rqwq_reset(struct mlx5e_rq *rq)
 	}
 }
 
+static inline void mlx5e_dump_error_cqe(struct mlx5e_cq *cq, u32 sqn,
+					struct mlx5_err_cqe *err_cqe)
+{
+	struct mlx5_cqwq *wq = &cq->wq;
+	u32 ci;
+
+	ci = mlx5_cqwq_ctr2ix(wq, wq->cc - 1);
+
+	netdev_err(cq->channel->netdev,
+		   "Error cqe on cqn 0x%x, ci 0x%x, sqn 0x%x, opcode 0x%x, syndrome 0x%x, vendor syndrome 0x%x\n",
+		   cq->mcq.cqn, ci, sqn,
+		   get_cqe_opcode((struct mlx5_cqe64 *)err_cqe),
+		   err_cqe->syndrome, err_cqe->vendor_err_synd);
+	mlx5_dump_err_cqe(cq->mdev, err_cqe);
+}
+
 /* SW parser related functions */
 
 struct mlx5e_swp_spec {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index f049e0ac308a..f9dad2639061 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -415,11 +415,6 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
 
 		wqe_counter = be16_to_cpu(cqe->wqe_counter);
 
-		if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ))
-			netdev_WARN_ONCE(sq->channel->netdev,
-					 "Bad OP in XDPSQ CQE: 0x%x\n",
-					 get_cqe_opcode(cqe));
-
 		do {
 			struct mlx5e_xdp_wqe_info *wi;
 			u16 ci;
@@ -432,6 +427,14 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
 
 			mlx5e_free_xdpsq_desc(sq, wi, &xsk_frames, true);
 		} while (!last_wqe);
+
+		if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) {
+			netdev_WARN_ONCE(sq->channel->netdev,
+					 "Bad OP in XDPSQ CQE: 0x%x\n",
+					 get_cqe_opcode(cqe));
+			mlx5e_dump_error_cqe(&sq->cq, sq->sqn,
+					     (struct mlx5_err_cqe *)cqe);
+		}
 	} while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
 
 	if (xsk_frames)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index e2beb89c1832..4db1c92f0019 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -631,6 +631,8 @@ int mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
 				netdev_WARN_ONCE(cq->channel->netdev,
 						 "Bad OP in ICOSQ CQE: 0x%x\n",
 						 get_cqe_opcode(cqe));
+				mlx5e_dump_error_cqe(&sq->cq, sq->sqn,
+						     (struct mlx5_err_cqe *)cqe);
 				if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING, &sq->state))
 					queue_work(cq->channel->priv->wq, &sq->recover_work);
 				break;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index fd6b2a1898c5..1679557f34c0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -399,22 +399,6 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
 	return mlx5e_sq_xmit(sq, skb, wqe, pi, netdev_xmit_more());
 }
 
-static void mlx5e_dump_error_cqe(struct mlx5e_txqsq *sq,
-				 struct mlx5_err_cqe *err_cqe)
-{
-	struct mlx5_cqwq *wq = &sq->cq.wq;
-	u32 ci;
-
-	ci = mlx5_cqwq_ctr2ix(wq, wq->cc - 1);
-
-	netdev_err(sq->channel->netdev,
-		   "Error cqe on cqn 0x%x, ci 0x%x, sqn 0x%x, opcode 0x%x, syndrome 0x%x, vendor syndrome 0x%x\n",
-		   sq->cq.mcq.cqn, ci, sq->sqn,
-		   get_cqe_opcode((struct mlx5_cqe64 *)err_cqe),
-		   err_cqe->syndrome, err_cqe->vendor_err_synd);
-	mlx5_dump_err_cqe(sq->cq.mdev, err_cqe);
-}
-
 bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
 {
 	struct mlx5e_sq_stats *stats;
@@ -501,7 +485,7 @@ bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
 		if (unlikely(get_cqe_opcode(cqe) == MLX5_CQE_REQ_ERR)) {
 			if (!test_and_set_bit(MLX5E_SQ_STATE_RECOVERING,
 					      &sq->state)) {
-				mlx5e_dump_error_cqe(sq,
+				mlx5e_dump_error_cqe(&sq->cq, sq->sqn,
 						     (struct mlx5_err_cqe *)cqe);
 				mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs);
 				queue_work(cq->channel->priv->wq,
-- 
cgit v1.2.3-59-g8ed1b


From e2e11dbf36936d9cfea99c5b7386acea67b21634 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Sun, 9 Feb 2020 17:13:23 +0200
Subject: net/mlx5e: XDP, Print the offending TX descriptor on error completion

Upon an error completion on an XDP SQ, print the offending WQE
to ease the debug process.

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Reviewed-by: Aya Levin <ayal@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index f9dad2639061..6f32a697a4bf 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -408,7 +408,8 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
 
 	i = 0;
 	do {
-		u16 wqe_counter;
+		struct mlx5e_xdp_wqe_info *wi;
+		u16 wqe_counter, ci;
 		bool last_wqe;
 
 		mlx5_cqwq_pop(&cq->wq);
@@ -416,9 +417,6 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
 		wqe_counter = be16_to_cpu(cqe->wqe_counter);
 
 		do {
-			struct mlx5e_xdp_wqe_info *wi;
-			u16 ci;
-
 			last_wqe = (sqcc == wqe_counter);
 			ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
 			wi = &sq->db.wqe_info[ci];
@@ -434,6 +432,7 @@ bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq)
 					 get_cqe_opcode(cqe));
 			mlx5e_dump_error_cqe(&sq->cq, sq->sqn,
 					     (struct mlx5_err_cqe *)cqe);
+			mlx5_wq_cyc_wqe_dump(&sq->wq, ci, wi->num_wqebbs);
 		}
 	} while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
 
-- 
cgit v1.2.3-59-g8ed1b


From fed0c6cfcd58f29ff60f47559b88a6289b6b680a Mon Sep 17 00:00:00 2001
From: Maxim Mikityanskiy <maximmi@mellanox.com>
Date: Fri, 15 Nov 2019 13:48:38 +0200
Subject: net/mlx5e: Fetch WQE: reuse code and enforce typing

There are multiple functions mlx5{e,i}_*_fetch_wqe that contain the same
code, that is repeated, because they operate on different SQ struct
types. mlx5e_sq_fetch_wqe also returns void *, instead of the concrete
WQE type.

This commit generalizes the fetch WQE operation by putting this code
into a single function. To simplify calls of the generic function in
concrete use cases, macros are provided that substitute the right WQE
size and cast the return type.

Before this patch, fetch_wqe used to calculate pi itself, but the value
was often known to the caller. This calculation is moved outside to
eliminate this unnecessary step and prepare for the fill_frag_edge
refactoring in the next patch.

Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h         | 12 ++++++------
 drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c          |  6 ++++--
 drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h          | 13 -------------
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h   |  8 ++++++++
 .../net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c    | 15 ++++++++++-----
 .../net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c   |  3 ++-
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c           | 10 ++++++----
 drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h     | 11 ++---------
 8 files changed, 38 insertions(+), 40 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
index c0249fc77eaa..8682d9148ab9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -33,19 +33,19 @@ mlx5e_wqc_has_room_for(struct mlx5_wq_cyc *wq, u16 cc, u16 pc, u16 n)
 	return (mlx5_wq_cyc_ctr2ix(wq, cc - pc) >= n) || (cc == pc);
 }
 
-static inline void *
-mlx5e_sq_fetch_wqe(struct mlx5e_txqsq *sq, size_t size, u16 *pi)
+static inline void *mlx5e_fetch_wqe(struct mlx5_wq_cyc *wq, u16 pi, size_t wqe_size)
 {
-	struct mlx5_wq_cyc *wq = &sq->wq;
 	void *wqe;
 
-	*pi  = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
-	wqe = mlx5_wq_cyc_get_wqe(wq, *pi);
-	memset(wqe, 0, size);
+	wqe = mlx5_wq_cyc_get_wqe(wq, pi);
+	memset(wqe, 0, wqe_size);
 
 	return wqe;
 }
 
+#define MLX5E_TX_FETCH_WQE(sq, pi) \
+	((struct mlx5e_tx_wqe *)mlx5e_fetch_wqe(&(sq)->wq, pi, sizeof(struct mlx5e_tx_wqe)))
+
 static inline struct mlx5e_tx_wqe *
 mlx5e_post_nop(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc)
 {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index 6f32a697a4bf..cf089520c031 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -188,10 +188,12 @@ static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
 	pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
 	contig_wqebbs = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
 
-	if (unlikely(contig_wqebbs < MLX5_SEND_WQE_MAX_WQEBBS))
+	if (unlikely(contig_wqebbs < MLX5_SEND_WQE_MAX_WQEBBS)) {
 		mlx5e_fill_xdpsq_frag_edge(sq, wq, pi, contig_wqebbs);
+		pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+	}
 
-	session->wqe = mlx5e_xdpsq_fetch_wqe(sq, &pi);
+	session->wqe = MLX5E_TX_FETCH_WQE(sq, pi);
 
 	prefetchw(session->wqe->data);
 	session->ds_count  = MLX5E_XDP_TX_EMPTY_DS_COUNT;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
index d7587f40ecae..4fd0ff47bdc3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
@@ -186,19 +186,6 @@ mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq,
 	session->ds_count++;
 }
 
-static inline struct mlx5e_tx_wqe *
-mlx5e_xdpsq_fetch_wqe(struct mlx5e_xdpsq *sq, u16 *pi)
-{
-	struct mlx5_wq_cyc *wq = &sq->wq;
-	struct mlx5e_tx_wqe *wqe;
-
-	*pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
-	wqe = mlx5_wq_cyc_get_wqe(wq, *pi);
-	memset(wqe, 0, sizeof(*wqe));
-
-	return wqe;
-}
-
 static inline void
 mlx5e_xdpi_fifo_push(struct mlx5e_xdp_info_fifo *fifo,
 		     struct mlx5e_xdp_info *xi)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
index 63116be6b1d6..9daaec244385 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
@@ -27,6 +27,14 @@ struct mlx5e_dump_wqe {
 	struct mlx5_wqe_data_seg data;
 };
 
+#define MLX5E_TLS_FETCH_UMR_WQE(sq, pi) \
+	((struct mlx5e_umr_wqe *)mlx5e_fetch_wqe(&(sq)->wq, pi, MLX5E_KTLS_STATIC_UMR_WQE_SZ))
+#define MLX5E_TLS_FETCH_PROGRESS_WQE(sq, pi) \
+	((struct mlx5e_tx_wqe *)mlx5e_fetch_wqe(&(sq)->wq, pi, MLX5E_KTLS_PROGRESS_WQE_SZ))
+#define MLX5E_TLS_FETCH_DUMP_WQE(sq, pi) \
+	((struct mlx5e_dump_wqe *)mlx5e_fetch_wqe(&(sq)->wq, pi, \
+						  sizeof(struct mlx5e_dump_wqe)))
+
 #define MLX5E_KTLS_DUMP_WQEBBS \
 	(DIV_ROUND_UP(sizeof(struct mlx5e_dump_wqe), MLX5_SEND_WQE_BB))
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
index 52a56622034a..717d36b45aa9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
@@ -137,7 +137,8 @@ post_static_params(struct mlx5e_txqsq *sq,
 	struct mlx5e_umr_wqe *umr_wqe;
 	u16 pi;
 
-	umr_wqe = mlx5e_sq_fetch_wqe(sq, MLX5E_KTLS_STATIC_UMR_WQE_SZ, &pi);
+	pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
+	umr_wqe = MLX5E_TLS_FETCH_UMR_WQE(sq, pi);
 	build_static_params(umr_wqe, sq->pc, sq->sqn, priv_tx, fence);
 	tx_fill_wi(sq, pi, MLX5E_KTLS_STATIC_WQEBBS, 0, NULL);
 	sq->pc += MLX5E_KTLS_STATIC_WQEBBS;
@@ -151,7 +152,8 @@ post_progress_params(struct mlx5e_txqsq *sq,
 	struct mlx5e_tx_wqe *wqe;
 	u16 pi;
 
-	wqe = mlx5e_sq_fetch_wqe(sq, MLX5E_KTLS_PROGRESS_WQE_SZ, &pi);
+	pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
+	wqe = MLX5E_TLS_FETCH_PROGRESS_WQE(sq, pi);
 	build_progress_params(wqe, sq->pc, sq->sqn, priv_tx, fence);
 	tx_fill_wi(sq, pi, MLX5E_KTLS_PROGRESS_WQEBBS, 0, NULL);
 	sq->pc += MLX5E_KTLS_PROGRESS_WQEBBS;
@@ -278,7 +280,8 @@ tx_post_resync_dump(struct mlx5e_txqsq *sq, skb_frag_t *frag, u32 tisn, bool fir
 	int fsz;
 	u16 pi;
 
-	wqe = mlx5e_sq_fetch_wqe(sq, sizeof(*wqe), &pi);
+	pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
+	wqe = MLX5E_TLS_FETCH_DUMP_WQE(sq, pi);
 
 	ds_cnt = sizeof(*wqe) / MLX5_SEND_WQE_DS;
 
@@ -449,7 +452,8 @@ struct sk_buff *mlx5e_ktls_handle_tx_skb(struct net_device *netdev,
 
 	if (unlikely(mlx5e_ktls_tx_offload_test_and_clear_pending(priv_tx))) {
 		mlx5e_ktls_tx_post_param_wqes(sq, priv_tx, false, false);
-		*wqe = mlx5e_sq_fetch_wqe(sq, sizeof(**wqe), pi);
+		*pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
+		*wqe = MLX5E_TX_FETCH_WQE(sq, *pi);
 		stats->tls_ctx++;
 	}
 
@@ -460,7 +464,8 @@ struct sk_buff *mlx5e_ktls_handle_tx_skb(struct net_device *netdev,
 
 		switch (ret) {
 		case MLX5E_KTLS_SYNC_DONE:
-			*wqe = mlx5e_sq_fetch_wqe(sq, sizeof(**wqe), pi);
+			*pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
+			*wqe = MLX5E_TX_FETCH_WQE(sq, *pi);
 			break;
 		case MLX5E_KTLS_SYNC_SKIP_NO_DATA:
 			if (likely(!skb->decrypted))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
index ef1ed15a53b4..1d7ddeb7a46b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
@@ -248,7 +248,8 @@ mlx5e_tls_handle_ooo(struct mlx5e_tls_offload_context_tx *context,
 	mlx5e_tls_complete_sync_skb(skb, nskb, tcp_seq, headln,
 				    cpu_to_be64(info.rcd_sn));
 	mlx5e_sq_xmit(sq, nskb, *wqe, *pi, true);
-	*wqe = mlx5e_sq_fetch_wqe(sq, sizeof(**wqe), pi);
+	*pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
+	*wqe = MLX5E_TX_FETCH_WQE(sq, *pi);
 	return skb;
 
 err_out:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 1679557f34c0..ec1429596cb7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -324,7 +324,8 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 		struct mlx5_wqe_ctrl_seg cur_ctrl = wqe->ctrl;
 #endif
 		mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
-		wqe = mlx5e_sq_fetch_wqe(sq, sizeof(*wqe), &pi);
+		pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+		wqe = MLX5E_TX_FETCH_WQE(sq, pi);
 #ifdef CONFIG_MLX5_EN_IPSEC
 		wqe->eth = cur_eth;
 #endif
@@ -389,7 +390,8 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
 	u16 pi;
 
 	sq = priv->txq2sq[skb_get_queue_mapping(skb)];
-	wqe = mlx5e_sq_fetch_wqe(sq, sizeof(*wqe), &pi);
+	pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
+	wqe = MLX5E_TX_FETCH_WQE(sq, pi);
 
 	/* might send skbs and update wqe and pi */
 	skb = mlx5e_accel_handle_tx(skb, sq, dev, &wqe, &pi);
@@ -622,10 +624,10 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 	contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
 	if (unlikely(contig_wqebbs_room < num_wqebbs)) {
 		mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
-		pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+		pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
 	}
 
-	mlx5i_sq_fetch_wqe(sq, &wqe, pi);
+	wqe = MLX5I_SQ_FETCH_WQE(sq, pi);
 
 	/* fill wqe */
 	wi       = &sq->db.wqe_info[pi];
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
index 3483ba642cfe..7844ab5d0ce7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
@@ -110,15 +110,8 @@ struct mlx5i_tx_wqe {
 	struct mlx5_wqe_data_seg     data[];
 };
 
-static inline void mlx5i_sq_fetch_wqe(struct mlx5e_txqsq *sq,
-				      struct mlx5i_tx_wqe **wqe,
-				      u16 pi)
-{
-	struct mlx5_wq_cyc *wq = &sq->wq;
-
-	*wqe = mlx5_wq_cyc_get_wqe(wq, pi);
-	memset(*wqe, 0, sizeof(**wqe));
-}
+#define MLX5I_SQ_FETCH_WQE(sq, pi) \
+	((struct mlx5i_tx_wqe *)mlx5e_fetch_wqe(&(sq)->wq, pi, sizeof(struct mlx5i_tx_wqe)))
 
 netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 			  struct mlx5_av *av, u32 dqpn, u32 dqkey,
-- 
cgit v1.2.3-59-g8ed1b


From 7d42c8e9ab50091e4dda29066975fbb7aa0f1585 Mon Sep 17 00:00:00 2001
From: Maxim Mikityanskiy <maximmi@mellanox.com>
Date: Thu, 16 Apr 2020 11:32:42 +0300
Subject: net/mlx5e: Rename ICOSQ WQE info struct and field

Structs mlx5e_txqsq and mlx5e_xdpsq contain wqe_info arrays to store
supplementary information corresponding to WQEs in the queue. Struct
mlx5e_icosq also has such an array, but it's called differently -
ico_wqe. This patch renames it to unify with the other SQs.

In addition, rename the struct to emphasize its specific usage.

Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h      |  4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 10 +++++-----
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c   | 12 ++++++------
 drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c |  4 ++--
 4 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index e8508c74eaa8..0864b76ca2c0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -370,7 +370,7 @@ enum {
 	MLX5E_SQ_STATE_PENDING_XSK_TX,
 };
 
-struct mlx5e_sq_wqe_info {
+struct mlx5e_icosq_wqe_info {
 	u8  opcode;
 	u8 num_wqebbs;
 
@@ -552,7 +552,7 @@ struct mlx5e_icosq {
 
 	/* write@xmit, read@completion */
 	struct {
-		struct mlx5e_sq_wqe_info *ico_wqe;
+		struct mlx5e_icosq_wqe_info *wqe_info;
 	} db;
 
 	/* read only */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index bf3fdbea1074..048a4f8601a8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1027,17 +1027,17 @@ static void mlx5e_free_xdpsq(struct mlx5e_xdpsq *sq)
 
 static void mlx5e_free_icosq_db(struct mlx5e_icosq *sq)
 {
-	kvfree(sq->db.ico_wqe);
+	kvfree(sq->db.wqe_info);
 }
 
 static int mlx5e_alloc_icosq_db(struct mlx5e_icosq *sq, int numa)
 {
 	int wq_sz = mlx5_wq_cyc_get_size(&sq->wq);
+	size_t size;
 
-	sq->db.ico_wqe = kvzalloc_node(array_size(wq_sz,
-						  sizeof(*sq->db.ico_wqe)),
-				       GFP_KERNEL, numa);
-	if (!sq->db.ico_wqe)
+	size = array_size(wq_sz, sizeof(*sq->db.wqe_info));
+	sq->db.wqe_info = kvzalloc_node(size, GFP_KERNEL, numa);
+	if (!sq->db.wqe_info)
 		return -ENOMEM;
 
 	return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 4db1c92f0019..9f33a0e7dd9a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -472,7 +472,7 @@ static inline void mlx5e_fill_icosq_frag_edge(struct mlx5e_icosq *sq,
 					      struct mlx5_wq_cyc *wq,
 					      u16 pi, u16 nnops)
 {
-	struct mlx5e_sq_wqe_info *edge_wi, *wi = &sq->db.ico_wqe[pi];
+	struct mlx5e_icosq_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi];
 
 	edge_wi = wi + nnops;
 
@@ -527,9 +527,9 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
 			    MLX5_OPCODE_UMR);
 	umr_wqe->uctrl.xlt_offset = cpu_to_be16(xlt_offset);
 
-	sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_UMR;
-	sq->db.ico_wqe[pi].num_wqebbs = MLX5E_UMR_WQEBBS;
-	sq->db.ico_wqe[pi].umr.rq = rq;
+	sq->db.wqe_info[pi].opcode = MLX5_OPCODE_UMR;
+	sq->db.wqe_info[pi].num_wqebbs = MLX5E_UMR_WQEBBS;
+	sq->db.wqe_info[pi].umr.rq = rq;
 	sq->pc += MLX5E_UMR_WQEBBS;
 
 	sq->doorbell_cseg = &umr_wqe->ctrl;
@@ -618,13 +618,13 @@ int mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
 		wqe_counter = be16_to_cpu(cqe->wqe_counter);
 
 		do {
-			struct mlx5e_sq_wqe_info *wi;
+			struct mlx5e_icosq_wqe_info *wi;
 			u16 ci;
 
 			last_wqe = (sqcc == wqe_counter);
 
 			ci = mlx5_wq_cyc_ctr2ix(&sq->wq, sqcc);
-			wi = &sq->db.ico_wqe[ci];
+			wi = &sq->db.wqe_info[ci];
 			sqcc += wi->num_wqebbs;
 
 			if (last_wqe && unlikely(get_cqe_opcode(cqe) != MLX5_CQE_REQ)) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
index acb20215a33b..869fd58a6775 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -78,8 +78,8 @@ void mlx5e_trigger_irq(struct mlx5e_icosq *sq)
 	struct mlx5e_tx_wqe *nopwqe;
 	u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
 
-	sq->db.ico_wqe[pi].opcode = MLX5_OPCODE_NOP;
-	sq->db.ico_wqe[pi].num_wqebbs = 1;
+	sq->db.wqe_info[pi].opcode = MLX5_OPCODE_NOP;
+	sq->db.wqe_info[pi].num_wqebbs = 1;
 	nopwqe = mlx5e_post_nop(wq, sq->sqn, &sq->pc);
 	mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &nopwqe->ctrl);
 }
-- 
cgit v1.2.3-59-g8ed1b


From ec9cdca0663a543ede2072ff091beec1787e3374 Mon Sep 17 00:00:00 2001
From: Maxim Mikityanskiy <maximmi@mellanox.com>
Date: Thu, 16 Apr 2020 11:29:49 +0300
Subject: net/mlx5e: Unify reserving space for WQEs

In our fast-path design, a WQE (Work Queue Element) must not cross the
page boundary. To enforce that, for WQEs consisting of more than one BB
(Basic Block), the driver checks the available contiguous space in the
WQ in advance, and if it's not enough, it pads it with NOPs.

This patch modifies the code that calculates the position of next WQE,
considering the padding, and prepares the WQE. This code is common for
all SQ types. In this patch it's reorganized in a way that makes the
usage pattern unified for all SQ types, and makes the implementations
self-contained and look almost the same, preparing the repeating code to
further attempts to deduplicate it.

One place is left as is: mlx5e_sq_xmit and mlx5e_fill_sq_frag_edge call
inside, because it is special in a way that it may also copy WQE's cseg
and eseg when reserving space. This will be eliminated in one of the
following patches, and this place will be converted to the new approach,
too.

Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h  | 56 ++++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c   | 31 ++++++++++--
 drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h   | 17 -------
 .../ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c | 16 +------
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c    | 26 +---------
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c    | 11 +----
 6 files changed, 88 insertions(+), 69 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
index 8682d9148ab9..89fe65593c16 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -81,6 +81,62 @@ mlx5e_post_nop_fence(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc)
 	return wqe;
 }
 
+static inline u16 mlx5e_txqsq_get_next_pi(struct mlx5e_txqsq *sq, u16 size)
+{
+	struct mlx5_wq_cyc *wq = &sq->wq;
+	u16 pi, contig_wqebbs;
+
+	pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+	contig_wqebbs = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+	if (unlikely(contig_wqebbs < size)) {
+		struct mlx5e_tx_wqe_info *wi, *edge_wi;
+
+		wi = &sq->db.wqe_info[pi];
+		edge_wi = wi + contig_wqebbs;
+
+		/* Fill SQ frag edge with NOPs to avoid WQE wrapping two pages. */
+		for (; wi < edge_wi; wi++) {
+			*wi = (struct mlx5e_tx_wqe_info) {
+				.num_wqebbs = 1,
+			};
+			mlx5e_post_nop(wq, sq->sqn, &sq->pc);
+		}
+		sq->stats->nop += contig_wqebbs;
+
+		pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+	}
+
+	return pi;
+}
+
+static inline u16 mlx5e_icosq_get_next_pi(struct mlx5e_icosq *sq, u16 size)
+{
+	struct mlx5_wq_cyc *wq = &sq->wq;
+	u16 pi, contig_wqebbs;
+
+	pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+	contig_wqebbs = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+	if (unlikely(contig_wqebbs < size)) {
+		struct mlx5e_icosq_wqe_info *wi, *edge_wi;
+
+		wi = &sq->db.wqe_info[pi];
+		edge_wi = wi + contig_wqebbs;
+
+		/* Fill SQ frag edge with NOPs to avoid WQE wrapping two pages. */
+		for (; wi < edge_wi; wi++) {
+			*wi = (struct mlx5e_icosq_wqe_info) {
+				.opcode = MLX5_OPCODE_NOP,
+				.num_wqebbs = 1,
+			};
+			mlx5e_post_nop(wq, sq->sqn, &sq->pc);
+		}
+
+		pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
+	}
+
+	return pi;
+}
+
 static inline void
 mlx5e_fill_sq_frag_edge(struct mlx5e_txqsq *sq, struct mlx5_wq_cyc *wq,
 			u16 pi, u16 nnops)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index cf089520c031..c4a7fb4ecd14 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -178,21 +178,42 @@ xdp_abort:
 	}
 }
 
-static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
+static u16 mlx5e_xdpsq_get_next_pi(struct mlx5e_xdpsq *sq, u16 size)
 {
-	struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
-	struct mlx5e_xdpsq_stats *stats = sq->stats;
 	struct mlx5_wq_cyc *wq = &sq->wq;
 	u16 pi, contig_wqebbs;
 
 	pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
 	contig_wqebbs = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
+	if (unlikely(contig_wqebbs < size)) {
+		struct mlx5e_xdp_wqe_info *wi, *edge_wi;
+
+		wi = &sq->db.wqe_info[pi];
+		edge_wi = wi + contig_wqebbs;
+
+		/* Fill SQ frag edge with NOPs to avoid WQE wrapping two pages. */
+		for (; wi < edge_wi; wi++) {
+			*wi = (struct mlx5e_xdp_wqe_info) {
+				.num_wqebbs = 1,
+				.num_pkts = 0,
+			};
+			mlx5e_post_nop(wq, sq->sqn, &sq->pc);
+		}
+		sq->stats->nops += contig_wqebbs;
 
-	if (unlikely(contig_wqebbs < MLX5_SEND_WQE_MAX_WQEBBS)) {
-		mlx5e_fill_xdpsq_frag_edge(sq, wq, pi, contig_wqebbs);
 		pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
 	}
 
+	return pi;
+}
+
+static void mlx5e_xdp_mpwqe_session_start(struct mlx5e_xdpsq *sq)
+{
+	struct mlx5e_xdp_mpwqe *session = &sq->mpwqe;
+	struct mlx5e_xdpsq_stats *stats = sq->stats;
+	u16 pi;
+
+	pi = mlx5e_xdpsq_get_next_pi(sq, MLX5_SEND_WQE_MAX_WQEBBS);
 	session->wqe = MLX5E_TX_FETCH_WQE(sq, pi);
 
 	prefetchw(session->wqe->data);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
index 4fd0ff47bdc3..ed6f045febeb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
@@ -137,23 +137,6 @@ mlx5e_xdp_no_room_for_inline_pkt(struct mlx5e_xdp_mpwqe *session)
 	       session->ds_count + MLX5E_XDP_INLINE_WQE_MAX_DS_CNT > MLX5E_XDP_MPW_MAX_NUM_DS;
 }
 
-static inline void
-mlx5e_fill_xdpsq_frag_edge(struct mlx5e_xdpsq *sq, struct mlx5_wq_cyc *wq,
-			   u16 pi, u16 nnops)
-{
-	struct mlx5e_xdp_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi];
-
-	edge_wi = wi + nnops;
-	/* fill sq frag edge with nops to avoid wqe wrapping two pages */
-	for (; wi < edge_wi; wi++) {
-		wi->num_wqebbs = 1;
-		wi->num_pkts   = 0;
-		mlx5e_post_nop(wq, sq->sqn, &sq->pc);
-	}
-
-	sq->stats->nops += nnops;
-}
-
 static inline void
 mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq,
 			 struct mlx5e_xdp_xmit_data *xdptxd,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
index 717d36b45aa9..ba973937f0b5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
@@ -165,14 +165,8 @@ mlx5e_ktls_tx_post_param_wqes(struct mlx5e_txqsq *sq,
 			      bool skip_static_post, bool fence_first_post)
 {
 	bool progress_fence = skip_static_post || !fence_first_post;
-	struct mlx5_wq_cyc *wq = &sq->wq;
-	u16 contig_wqebbs_room, pi;
 
-	pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
-	contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
-	if (unlikely(contig_wqebbs_room <
-		     MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS))
-		mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
+	mlx5e_txqsq_get_next_pi(sq, MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS);
 
 	if (!skip_static_post)
 		post_static_params(sq, priv_tx, fence_first_post);
@@ -346,10 +340,8 @@ mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx,
 			 u32 seq)
 {
 	struct mlx5e_sq_stats *stats = sq->stats;
-	struct mlx5_wq_cyc *wq = &sq->wq;
 	enum mlx5e_ktls_sync_retval ret;
 	struct tx_sync_info info = {};
-	u16 contig_wqebbs_room, pi;
 	u8 num_wqebbs;
 	int i = 0;
 
@@ -380,11 +372,7 @@ mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx,
 	}
 
 	num_wqebbs = mlx5e_ktls_dumps_num_wqebbs(sq, info.nr_frags, info.sync_len);
-	pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
-	contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
-
-	if (unlikely(contig_wqebbs_room < num_wqebbs))
-		mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
+	mlx5e_txqsq_get_next_pi(sq, num_wqebbs);
 
 	for (; i < info.nr_frags; i++) {
 		unsigned int orig_fsz, frag_offset = 0, n = 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 9f33a0e7dd9a..d9a5a669b84d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -468,22 +468,6 @@ static void mlx5e_post_rx_mpwqe(struct mlx5e_rq *rq, u8 n)
 	mlx5_wq_ll_update_db_record(wq);
 }
 
-static inline void mlx5e_fill_icosq_frag_edge(struct mlx5e_icosq *sq,
-					      struct mlx5_wq_cyc *wq,
-					      u16 pi, u16 nnops)
-{
-	struct mlx5e_icosq_wqe_info *edge_wi, *wi = &sq->db.wqe_info[pi];
-
-	edge_wi = wi + nnops;
-
-	/* fill sq frag edge with nops to avoid wqe wrapping two pages */
-	for (; wi < edge_wi; wi++) {
-		wi->opcode = MLX5_OPCODE_NOP;
-		wi->num_wqebbs = 1;
-		mlx5e_post_nop(wq, sq->sqn, &sq->pc);
-	}
-}
-
 static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
 {
 	struct mlx5e_mpw_info *wi = &rq->mpwqe.info[ix];
@@ -492,7 +476,7 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
 	struct mlx5_wq_cyc *wq = &sq->wq;
 	struct mlx5e_umr_wqe *umr_wqe;
 	u16 xlt_offset = ix << (MLX5E_LOG_ALIGNED_MPWQE_PPW - 1);
-	u16 pi, contig_wqebbs_room;
+	u16 pi;
 	int err;
 	int i;
 
@@ -502,13 +486,7 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
 		goto err;
 	}
 
-	pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
-	contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
-	if (unlikely(contig_wqebbs_room < MLX5E_UMR_WQEBBS)) {
-		mlx5e_fill_icosq_frag_edge(sq, wq, pi, contig_wqebbs_room);
-		pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
-	}
-
+	pi = mlx5e_icosq_get_next_pi(sq, MLX5E_UMR_WQEBBS);
 	umr_wqe = mlx5_wq_cyc_get_wqe(wq, pi);
 	memcpy(umr_wqe, &rq->mpwqe.umr_wqe, offsetof(struct mlx5e_umr_wqe, inline_mtts));
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index ec1429596cb7..583e1b201b75 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -572,7 +572,6 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 			  struct mlx5_av *av, u32 dqpn, u32 dqkey,
 			  bool xmit_more)
 {
-	struct mlx5_wq_cyc *wq = &sq->wq;
 	struct mlx5i_tx_wqe *wqe;
 
 	struct mlx5_wqe_datagram_seg *datagram;
@@ -582,9 +581,9 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 	struct mlx5e_tx_wqe_info *wi;
 
 	struct mlx5e_sq_stats *stats = sq->stats;
-	u16 headlen, ihs, pi, contig_wqebbs_room;
 	u16 ds_cnt, ds_cnt_inl = 0;
 	u8 num_wqebbs, opcode;
+	u16 headlen, ihs, pi;
 	u32 num_bytes;
 	int num_dma;
 	__be16 mss;
@@ -620,13 +619,7 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 	}
 
 	num_wqebbs = DIV_ROUND_UP(ds_cnt, MLX5_SEND_WQEBB_NUM_DS);
-	pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
-	contig_wqebbs_room = mlx5_wq_cyc_get_contig_wqebbs(wq, pi);
-	if (unlikely(contig_wqebbs_room < num_wqebbs)) {
-		mlx5e_fill_sq_frag_edge(sq, wq, pi, contig_wqebbs_room);
-		pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
-	}
-
+	pi = mlx5e_txqsq_get_next_pi(sq, num_wqebbs);
 	wqe = MLX5I_SQ_FETCH_WQE(sq, pi);
 
 	/* fill wqe */
-- 
cgit v1.2.3-59-g8ed1b


From ddc2118ef064ca130f06716829e11e02b3915fd0 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Thu, 30 Apr 2020 09:32:45 +0800
Subject: hinic: make a bunch of functions static

These fucntions is used only in hinic_sriov.c,
so make them static to fix sparse warnings.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/huawei/hinic/hinic_sriov.c | 91 +++++++++++++------------
 1 file changed, 48 insertions(+), 43 deletions(-)

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_sriov.c b/drivers/net/ethernet/huawei/hinic/hinic_sriov.c
index b24788e9733c..0d44af9dce2a 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_sriov.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_sriov.c
@@ -23,8 +23,8 @@ MODULE_PARM_DESC(set_vf_link_state, "Set vf link state, 0 represents link auto,
 #define HINIC_VLAN_PRIORITY_SHIFT 13
 #define HINIC_ADD_VLAN_IN_MAC 0x8000
 
-int hinic_set_mac(struct hinic_hwdev *hwdev, const u8 *mac_addr, u16 vlan_id,
-		  u16 func_id)
+static int hinic_set_mac(struct hinic_hwdev *hwdev, const u8 *mac_addr,
+			 u16 vlan_id, u16 func_id)
 {
 	struct hinic_port_mac_cmd mac_info = {0};
 	u16 out_size = sizeof(mac_info);
@@ -84,7 +84,7 @@ void hinic_notify_all_vfs_link_changed(struct hinic_hwdev *hwdev,
 	}
 }
 
-u16 hinic_vf_info_vlanprio(struct hinic_hwdev *hwdev, int vf_id)
+static u16 hinic_vf_info_vlanprio(struct hinic_hwdev *hwdev, int vf_id)
 {
 	struct hinic_func_to_io *nic_io = &hwdev->func_to_io;
 	u16 pf_vlan, vlanprio;
@@ -97,8 +97,8 @@ u16 hinic_vf_info_vlanprio(struct hinic_hwdev *hwdev, int vf_id)
 	return vlanprio;
 }
 
-int hinic_set_vf_vlan(struct hinic_hwdev *hwdev, bool add, u16 vid,
-		      u8 qos, int vf_id)
+static int hinic_set_vf_vlan(struct hinic_hwdev *hwdev, bool add, u16 vid,
+			     u8 qos, int vf_id)
 {
 	struct hinic_vf_vlan_config vf_vlan = {0};
 	u16 out_size = sizeof(vf_vlan);
@@ -163,9 +163,9 @@ static int hinic_init_vf_config(struct hinic_hwdev *hwdev, u16 vf_id)
 	return 0;
 }
 
-int hinic_register_vf_msg_handler(void *hwdev, u16 vf_id,
-				  void *buf_in, u16 in_size,
-				  void *buf_out, u16 *out_size)
+static int hinic_register_vf_msg_handler(void *hwdev, u16 vf_id,
+					 void *buf_in, u16 in_size,
+					 void *buf_out, u16 *out_size)
 {
 	struct hinic_register_vf *register_info = buf_out;
 	struct hinic_hwdev *hw_dev = hwdev;
@@ -192,9 +192,9 @@ int hinic_register_vf_msg_handler(void *hwdev, u16 vf_id,
 	return 0;
 }
 
-int hinic_unregister_vf_msg_handler(void *hwdev, u16 vf_id,
-				    void *buf_in, u16 in_size,
-				    void *buf_out, u16 *out_size)
+static int hinic_unregister_vf_msg_handler(void *hwdev, u16 vf_id,
+					   void *buf_in, u16 in_size,
+					   void *buf_out, u16 *out_size)
 {
 	struct hinic_hwdev *hw_dev = hwdev;
 	struct hinic_func_to_io *nic_io;
@@ -209,9 +209,9 @@ int hinic_unregister_vf_msg_handler(void *hwdev, u16 vf_id,
 	return 0;
 }
 
-int hinic_change_vf_mtu_msg_handler(void *hwdev, u16 vf_id,
-				    void *buf_in, u16 in_size,
-				    void *buf_out, u16 *out_size)
+static int hinic_change_vf_mtu_msg_handler(void *hwdev, u16 vf_id,
+					   void *buf_in, u16 in_size,
+					   void *buf_out, u16 *out_size)
 {
 	struct hinic_hwdev *hw_dev = hwdev;
 	int err;
@@ -227,9 +227,9 @@ int hinic_change_vf_mtu_msg_handler(void *hwdev, u16 vf_id,
 	return 0;
 }
 
-int hinic_get_vf_mac_msg_handler(void *hwdev, u16 vf_id,
-				 void *buf_in, u16 in_size,
-				 void *buf_out, u16 *out_size)
+static int hinic_get_vf_mac_msg_handler(void *hwdev, u16 vf_id,
+					void *buf_in, u16 in_size,
+					void *buf_out, u16 *out_size)
 {
 	struct hinic_port_mac_cmd *mac_info = buf_out;
 	struct hinic_hwdev *dev = hwdev;
@@ -246,9 +246,9 @@ int hinic_get_vf_mac_msg_handler(void *hwdev, u16 vf_id,
 	return 0;
 }
 
-int hinic_set_vf_mac_msg_handler(void *hwdev, u16 vf_id,
-				 void *buf_in, u16 in_size,
-				 void *buf_out, u16 *out_size)
+static int hinic_set_vf_mac_msg_handler(void *hwdev, u16 vf_id,
+					void *buf_in, u16 in_size,
+					void *buf_out, u16 *out_size)
 {
 	struct hinic_port_mac_cmd *mac_out = buf_out;
 	struct hinic_port_mac_cmd *mac_in = buf_in;
@@ -280,9 +280,9 @@ int hinic_set_vf_mac_msg_handler(void *hwdev, u16 vf_id,
 	return err;
 }
 
-int hinic_del_vf_mac_msg_handler(void *hwdev, u16 vf_id,
-				 void *buf_in, u16 in_size,
-				 void *buf_out, u16 *out_size)
+static int hinic_del_vf_mac_msg_handler(void *hwdev, u16 vf_id,
+					void *buf_in, u16 in_size,
+					void *buf_out, u16 *out_size)
 {
 	struct hinic_port_mac_cmd *mac_out = buf_out;
 	struct hinic_port_mac_cmd *mac_in = buf_in;
@@ -312,9 +312,9 @@ int hinic_del_vf_mac_msg_handler(void *hwdev, u16 vf_id,
 	return err;
 }
 
-int hinic_get_vf_link_status_msg_handler(void *hwdev, u16 vf_id,
-					 void *buf_in, u16 in_size,
-					 void *buf_out, u16 *out_size)
+static int hinic_get_vf_link_status_msg_handler(void *hwdev, u16 vf_id,
+						void *buf_in, u16 in_size,
+						void *buf_out, u16 *out_size)
 {
 	struct hinic_port_link_cmd *get_link = buf_out;
 	struct hinic_hwdev *hw_dev = hwdev;
@@ -339,7 +339,7 @@ int hinic_get_vf_link_status_msg_handler(void *hwdev, u16 vf_id,
 	return 0;
 }
 
-struct vf_cmd_msg_handle nic_vf_cmd_msg_handler[] = {
+static struct vf_cmd_msg_handle nic_vf_cmd_msg_handler[] = {
 	{HINIC_PORT_CMD_VF_REGISTER, hinic_register_vf_msg_handler},
 	{HINIC_PORT_CMD_VF_UNREGISTER, hinic_unregister_vf_msg_handler},
 	{HINIC_PORT_CMD_CHANGE_MTU, hinic_change_vf_mtu_msg_handler},
@@ -351,6 +351,7 @@ struct vf_cmd_msg_handle nic_vf_cmd_msg_handler[] = {
 
 #define CHECK_IPSU_15BIT	0X8000
 
+static
 struct hinic_sriov_info *hinic_get_sriov_info_by_pcidev(struct pci_dev *pdev)
 {
 	struct net_device *netdev = pci_get_drvdata(pdev);
@@ -372,8 +373,8 @@ static int hinic_check_mac_info(u8 status, u16 vlan_id)
 
 #define HINIC_VLAN_ID_MASK	0x7FFF
 
-int hinic_update_mac(struct hinic_hwdev *hwdev, u8 *old_mac, u8 *new_mac,
-		     u16 vlan_id, u16 func_id)
+static int hinic_update_mac(struct hinic_hwdev *hwdev, u8 *old_mac,
+			    u8 *new_mac, u16 vlan_id, u16 func_id)
 {
 	struct hinic_port_mac_update mac_info = {0};
 	u16 out_size = sizeof(mac_info);
@@ -416,8 +417,8 @@ int hinic_update_mac(struct hinic_hwdev *hwdev, u8 *old_mac, u8 *new_mac,
 	return 0;
 }
 
-void hinic_get_vf_config(struct hinic_hwdev *hwdev, u16 vf_id,
-			 struct ifla_vf_info *ivi)
+static void hinic_get_vf_config(struct hinic_hwdev *hwdev, u16 vf_id,
+				struct ifla_vf_info *ivi)
 {
 	struct vf_data_storage *vfinfo;
 
@@ -455,7 +456,8 @@ int hinic_ndo_get_vf_config(struct net_device *netdev,
 	return 0;
 }
 
-int hinic_set_vf_mac(struct hinic_hwdev *hwdev, int vf, unsigned char *mac_addr)
+static int hinic_set_vf_mac(struct hinic_hwdev *hwdev, int vf,
+			    unsigned char *mac_addr)
 {
 	struct hinic_func_to_io *nic_io = &hwdev->func_to_io;
 	struct vf_data_storage *vf_info;
@@ -504,7 +506,8 @@ int hinic_ndo_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
 	return 0;
 }
 
-int hinic_add_vf_vlan(struct hinic_hwdev *hwdev, int vf_id, u16 vlan, u8 qos)
+static int hinic_add_vf_vlan(struct hinic_hwdev *hwdev, int vf_id,
+			     u16 vlan, u8 qos)
 {
 	struct hinic_func_to_io *nic_io = &hwdev->func_to_io;
 	int err;
@@ -521,7 +524,7 @@ int hinic_add_vf_vlan(struct hinic_hwdev *hwdev, int vf_id, u16 vlan, u8 qos)
 	return 0;
 }
 
-int hinic_kill_vf_vlan(struct hinic_hwdev *hwdev, int vf_id)
+static int hinic_kill_vf_vlan(struct hinic_hwdev *hwdev, int vf_id)
 {
 	struct hinic_func_to_io *nic_io = &hwdev->func_to_io;
 	int err;
@@ -543,8 +546,8 @@ int hinic_kill_vf_vlan(struct hinic_hwdev *hwdev, int vf_id)
 	return 0;
 }
 
-int hinic_update_mac_vlan(struct hinic_dev *nic_dev, u16 old_vlan, u16 new_vlan,
-			  int vf_id)
+static int hinic_update_mac_vlan(struct hinic_dev *nic_dev, u16 old_vlan,
+				 u16 new_vlan, int vf_id)
 {
 	struct vf_data_storage *vf_info;
 	u16 vlan_id;
@@ -651,7 +654,8 @@ int hinic_ndo_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan, u8 qos,
 	return set_hw_vf_vlan(nic_dev, cur_vlanprio, vf, vlan, qos);
 }
 
-int hinic_set_vf_trust(struct hinic_hwdev *hwdev, u16 vf_id, bool trust)
+static int hinic_set_vf_trust(struct hinic_hwdev *hwdev, u16 vf_id,
+			      bool trust)
 {
 	struct vf_data_storage *vf_infos;
 	struct hinic_func_to_io *nic_io;
@@ -697,8 +701,8 @@ int hinic_ndo_set_vf_trust(struct net_device *netdev, int vf, bool setting)
 }
 
 /* pf receive message from vf */
-int nic_pf_mbox_handler(void *hwdev, u16 vf_id, u8 cmd, void *buf_in,
-			u16 in_size, void *buf_out, u16 *out_size)
+static int nic_pf_mbox_handler(void *hwdev, u16 vf_id, u8 cmd, void *buf_in,
+			       u16 in_size, void *buf_out, u16 *out_size)
 {
 	struct vf_cmd_msg_handle *vf_msg_handle;
 	struct hinic_hwdev *dev = hwdev;
@@ -786,7 +790,7 @@ static int hinic_init_vf_infos(struct hinic_func_to_io *nic_io, u16 vf_id)
 	return 0;
 }
 
-void hinic_clear_vf_infos(struct hinic_dev *nic_dev, u16 vf_id)
+static void hinic_clear_vf_infos(struct hinic_dev *nic_dev, u16 vf_id)
 {
 	struct vf_data_storage *vf_infos;
 	u16 func_id;
@@ -807,8 +811,8 @@ void hinic_clear_vf_infos(struct hinic_dev *nic_dev, u16 vf_id)
 	hinic_init_vf_infos(&nic_dev->hwdev->func_to_io, HW_VF_ID_TO_OS(vf_id));
 }
 
-int hinic_deinit_vf_hw(struct hinic_sriov_info *sriov_info, u16 start_vf_id,
-		       u16 end_vf_id)
+static int hinic_deinit_vf_hw(struct hinic_sriov_info *sriov_info,
+			      u16 start_vf_id, u16 end_vf_id)
 {
 	struct hinic_dev *nic_dev;
 	u16 func_idx, idx;
@@ -908,7 +912,8 @@ void hinic_vf_func_free(struct hinic_hwdev *hwdev)
 	}
 }
 
-int hinic_init_vf_hw(struct hinic_hwdev *hwdev, u16 start_vf_id, u16 end_vf_id)
+static int hinic_init_vf_hw(struct hinic_hwdev *hwdev, u16 start_vf_id,
+			    u16 end_vf_id)
 {
 	u16 i, func_idx;
 	int err;
-- 
cgit v1.2.3-59-g8ed1b


From 40cf7fbe8a43b1d8c3c0139423abbeb9625a909a Mon Sep 17 00:00:00 2001
From: Zou Wei <zou_wei@huawei.com>
Date: Thu, 30 Apr 2020 09:51:31 +0800
Subject: hinic: Use ARRAY_SIZE for nic_vf_cmd_msg_handler

fix coccinelle warning, use ARRAY_SIZE

drivers/net/ethernet/huawei/hinic/hinic_sriov.c:713:43-44: WARNING: Use ARRAY_SIZE

v1-->v2:
   remove cmd_number

v2-->v3:
   preserve the reverse christmas tree ordering of local variables

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zou Wei <zou_wei@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/huawei/hinic/hinic_sriov.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_sriov.c b/drivers/net/ethernet/huawei/hinic/hinic_sriov.c
index 0d44af9dce2a..fd4aaf43874a 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_sriov.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_sriov.c
@@ -708,17 +708,15 @@ static int nic_pf_mbox_handler(void *hwdev, u16 vf_id, u8 cmd, void *buf_in,
 	struct hinic_hwdev *dev = hwdev;
 	struct hinic_func_to_io *nic_io;
 	struct hinic_pfhwdev *pfhwdev;
-	u32 i, cmd_number;
 	int err = 0;
+	u32 i;
 
 	if (!hwdev)
 		return -EFAULT;
 
-	cmd_number = sizeof(nic_vf_cmd_msg_handler) /
-			    sizeof(struct vf_cmd_msg_handle);
 	pfhwdev = container_of(dev, struct hinic_pfhwdev, hwdev);
 	nic_io = &dev->func_to_io;
-	for (i = 0; i < cmd_number; i++) {
+	for (i = 0; i < ARRAY_SIZE(nic_vf_cmd_msg_handler); i++) {
 		vf_msg_handle = &nic_vf_cmd_msg_handler[i];
 		if (cmd == vf_msg_handle->cmd &&
 		    vf_msg_handle->cmd_msg_handler) {
@@ -729,7 +727,7 @@ static int nic_pf_mbox_handler(void *hwdev, u16 vf_id, u8 cmd, void *buf_in,
 			break;
 		}
 	}
-	if (i == cmd_number)
+	if (i == ARRAY_SIZE(nic_vf_cmd_msg_handler))
 		err = hinic_msg_to_mgmt(&pfhwdev->pf_to_mgmt, HINIC_MOD_L2NIC,
 					cmd, buf_in, in_size, buf_out,
 					out_size, HINIC_MGMT_MSG_SYNC);
-- 
cgit v1.2.3-59-g8ed1b


From 466ed24fb22342f3ae1c10758a6a0c6a8c081b2d Mon Sep 17 00:00:00 2001
From: Robert Marko <robert.marko@sartura.hr>
Date: Thu, 30 Apr 2020 11:07:05 +0200
Subject: net: phy: mdio: add IPQ4019 MDIO driver

This patch adds the driver for the MDIO interface
inside of Qualcomm IPQ40xx series SoC-s.

Signed-off-by: Christian Lamparter <chunkeey@gmail.com>
Signed-off-by: Robert Marko <robert.marko@sartura.hr>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Cc: Luka Perkov <luka.perkov@sartura.hr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/Kconfig        |   7 ++
 drivers/net/phy/Makefile       |   1 +
 drivers/net/phy/mdio-ipq4019.c | 160 +++++++++++++++++++++++++++++++++++++++++
 3 files changed, 168 insertions(+)
 create mode 100644 drivers/net/phy/mdio-ipq4019.c

diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index bacfee41b564..2a32f26ead0b 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -157,6 +157,13 @@ config MDIO_I2C
 
 	  This is library mode.
 
+config MDIO_IPQ4019
+	tristate "Qualcomm IPQ4019 MDIO interface support"
+	depends on HAS_IOMEM && OF_MDIO
+	help
+	  This driver supports the MDIO interface found in Qualcomm
+	  IPQ40xx series Soc-s.
+
 config MDIO_IPQ8064
 	tristate "Qualcomm IPQ8064 MDIO interface support"
 	depends on HAS_IOMEM && OF_MDIO
diff --git a/drivers/net/phy/Makefile b/drivers/net/phy/Makefile
index cd345b75d127..dc9e53b511d6 100644
--- a/drivers/net/phy/Makefile
+++ b/drivers/net/phy/Makefile
@@ -37,6 +37,7 @@ obj-$(CONFIG_MDIO_CAVIUM)	+= mdio-cavium.o
 obj-$(CONFIG_MDIO_GPIO)		+= mdio-gpio.o
 obj-$(CONFIG_MDIO_HISI_FEMAC)	+= mdio-hisi-femac.o
 obj-$(CONFIG_MDIO_I2C)		+= mdio-i2c.o
+obj-$(CONFIG_MDIO_IPQ4019)	+= mdio-ipq4019.o
 obj-$(CONFIG_MDIO_IPQ8064)	+= mdio-ipq8064.o
 obj-$(CONFIG_MDIO_MOXART)	+= mdio-moxart.o
 obj-$(CONFIG_MDIO_MSCC_MIIM)	+= mdio-mscc-miim.o
diff --git a/drivers/net/phy/mdio-ipq4019.c b/drivers/net/phy/mdio-ipq4019.c
new file mode 100644
index 000000000000..1ce81ff2f41d
--- /dev/null
+++ b/drivers/net/phy/mdio-ipq4019.c
@@ -0,0 +1,160 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+/* Copyright (c) 2015, The Linux Foundation. All rights reserved. */
+/* Copyright (c) 2020 Sartura Ltd. */
+
+#include <linux/delay.h>
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/io.h>
+#include <linux/iopoll.h>
+#include <linux/of_address.h>
+#include <linux/of_mdio.h>
+#include <linux/phy.h>
+#include <linux/platform_device.h>
+
+#define MDIO_ADDR_REG				0x44
+#define MDIO_DATA_WRITE_REG			0x48
+#define MDIO_DATA_READ_REG			0x4c
+#define MDIO_CMD_REG				0x50
+#define MDIO_CMD_ACCESS_BUSY		BIT(16)
+#define MDIO_CMD_ACCESS_START		BIT(8)
+#define MDIO_CMD_ACCESS_CODE_READ	0
+#define MDIO_CMD_ACCESS_CODE_WRITE	1
+
+#define ipq4019_MDIO_TIMEOUT	10000
+#define ipq4019_MDIO_SLEEP		10
+
+struct ipq4019_mdio_data {
+	void __iomem	*membase;
+};
+
+static int ipq4019_mdio_wait_busy(struct mii_bus *bus)
+{
+	struct ipq4019_mdio_data *priv = bus->priv;
+	unsigned int busy;
+
+	return readl_poll_timeout(priv->membase + MDIO_CMD_REG, busy,
+				  (busy & MDIO_CMD_ACCESS_BUSY) == 0,
+				  ipq4019_MDIO_SLEEP, ipq4019_MDIO_TIMEOUT);
+}
+
+static int ipq4019_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
+{
+	struct ipq4019_mdio_data *priv = bus->priv;
+	unsigned int cmd;
+
+	/* Reject clause 45 */
+	if (regnum & MII_ADDR_C45)
+		return -EOPNOTSUPP;
+
+	if (ipq4019_mdio_wait_busy(bus))
+		return -ETIMEDOUT;
+
+	/* issue the phy address and reg */
+	writel((mii_id << 8) | regnum, priv->membase + MDIO_ADDR_REG);
+
+	cmd = MDIO_CMD_ACCESS_START | MDIO_CMD_ACCESS_CODE_READ;
+
+	/* issue read command */
+	writel(cmd, priv->membase + MDIO_CMD_REG);
+
+	/* Wait read complete */
+	if (ipq4019_mdio_wait_busy(bus))
+		return -ETIMEDOUT;
+
+	/* Read and return data */
+	return readl(priv->membase + MDIO_DATA_READ_REG);
+}
+
+static int ipq4019_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
+							 u16 value)
+{
+	struct ipq4019_mdio_data *priv = bus->priv;
+	unsigned int cmd;
+
+	/* Reject clause 45 */
+	if (regnum & MII_ADDR_C45)
+		return -EOPNOTSUPP;
+
+	if (ipq4019_mdio_wait_busy(bus))
+		return -ETIMEDOUT;
+
+	/* issue the phy address and reg */
+	writel((mii_id << 8) | regnum, priv->membase + MDIO_ADDR_REG);
+
+	/* issue write data */
+	writel(value, priv->membase + MDIO_DATA_WRITE_REG);
+
+	cmd = MDIO_CMD_ACCESS_START | MDIO_CMD_ACCESS_CODE_WRITE;
+	/* issue write command */
+	writel(cmd, priv->membase + MDIO_CMD_REG);
+
+	/* Wait write complete */
+	if (ipq4019_mdio_wait_busy(bus))
+		return -ETIMEDOUT;
+
+	return 0;
+}
+
+static int ipq4019_mdio_probe(struct platform_device *pdev)
+{
+	struct ipq4019_mdio_data *priv;
+	struct mii_bus *bus;
+	int ret;
+
+	bus = devm_mdiobus_alloc_size(&pdev->dev, sizeof(*priv));
+	if (!bus)
+		return -ENOMEM;
+
+	priv = bus->priv;
+
+	priv->membase = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(priv->membase))
+		return PTR_ERR(priv->membase);
+
+	bus->name = "ipq4019_mdio";
+	bus->read = ipq4019_mdio_read;
+	bus->write = ipq4019_mdio_write;
+	bus->parent = &pdev->dev;
+	snprintf(bus->id, MII_BUS_ID_SIZE, "%s%d", pdev->name, pdev->id);
+
+	ret = of_mdiobus_register(bus, pdev->dev.of_node);
+	if (ret) {
+		dev_err(&pdev->dev, "Cannot register MDIO bus!\n");
+		return ret;
+	}
+
+	platform_set_drvdata(pdev, bus);
+
+	return 0;
+}
+
+static int ipq4019_mdio_remove(struct platform_device *pdev)
+{
+	struct mii_bus *bus = platform_get_drvdata(pdev);
+
+	mdiobus_unregister(bus);
+
+	return 0;
+}
+
+static const struct of_device_id ipq4019_mdio_dt_ids[] = {
+	{ .compatible = "qcom,ipq4019-mdio" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, ipq4019_mdio_dt_ids);
+
+static struct platform_driver ipq4019_mdio_driver = {
+	.probe = ipq4019_mdio_probe,
+	.remove = ipq4019_mdio_remove,
+	.driver = {
+		.name = "ipq4019-mdio",
+		.of_match_table = ipq4019_mdio_dt_ids,
+	},
+};
+
+module_platform_driver(ipq4019_mdio_driver);
+
+MODULE_DESCRIPTION("ipq4019 MDIO interface driver");
+MODULE_AUTHOR("Qualcomm Atheros");
+MODULE_LICENSE("Dual BSD/GPL");
-- 
cgit v1.2.3-59-g8ed1b


From 4972ecee06612e167523f528317920fbeba5f12d Mon Sep 17 00:00:00 2001
From: Robert Marko <robert.marko@sartura.hr>
Date: Thu, 30 Apr 2020 11:07:06 +0200
Subject: dt-bindings: add Qualcomm IPQ4019 MDIO bindings

This patch adds the binding document for the IPQ40xx MDIO driver.

Signed-off-by: Robert Marko <robert.marko@sartura.hr>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Cc: Luka Perkov <luka.perkov@sartura.hr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../devicetree/bindings/net/qcom,ipq4019-mdio.yaml | 61 ++++++++++++++++++++++
 1 file changed, 61 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/net/qcom,ipq4019-mdio.yaml

diff --git a/Documentation/devicetree/bindings/net/qcom,ipq4019-mdio.yaml b/Documentation/devicetree/bindings/net/qcom,ipq4019-mdio.yaml
new file mode 100644
index 000000000000..13555a89975f
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/qcom,ipq4019-mdio.yaml
@@ -0,0 +1,61 @@
+# SPDX-License-Identifier: GPL-2.0-only OR BSD-2-Clause
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/qcom,ipq4019-mdio.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Qualcomm IPQ40xx MDIO Controller Device Tree Bindings
+
+maintainers:
+  - Robert Marko <robert.marko@sartura.hr>
+
+allOf:
+  - $ref: "mdio.yaml#"
+
+properties:
+  compatible:
+    const: qcom,ipq4019-mdio
+
+  "#address-cells":
+    const: 1
+
+  "#size-cells":
+    const: 0
+
+  reg:
+    maxItems: 1
+
+required:
+  - compatible
+  - reg
+  - "#address-cells"
+  - "#size-cells"
+
+examples:
+  - |
+    mdio@90000 {
+      #address-cells = <1>;
+      #size-cells = <0>;
+      compatible = "qcom,ipq4019-mdio";
+      reg = <0x90000 0x64>;
+
+      ethphy0: ethernet-phy@0 {
+        reg = <0>;
+      };
+
+      ethphy1: ethernet-phy@1 {
+        reg = <1>;
+      };
+
+      ethphy2: ethernet-phy@2 {
+        reg = <2>;
+      };
+
+      ethphy3: ethernet-phy@3 {
+        reg = <3>;
+      };
+
+      ethphy4: ethernet-phy@4 {
+        reg = <4>;
+      };
+    };
-- 
cgit v1.2.3-59-g8ed1b


From 9c8c0f70ec6fdac2398632c723c48277be09b7c0 Mon Sep 17 00:00:00 2001
From: Robert Marko <robert.marko@sartura.hr>
Date: Thu, 30 Apr 2020 11:07:07 +0200
Subject: ARM: dts: qcom: ipq4019: add MDIO node

This patch adds the necessary MDIO interface node
to the Qualcomm IPQ4019 DTSI.

Built-in QCA8337N switch is managed using it,
and since we have a driver for it lets add it.

Signed-off-by: Christian Lamparter <chunkeey@gmail.com>
Signed-off-by: Robert Marko <robert.marko@sartura.hr>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Cc: Luka Perkov <luka.perkov@sartura.hr>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm/boot/dts/qcom-ipq4019.dtsi | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/arch/arm/boot/dts/qcom-ipq4019.dtsi b/arch/arm/boot/dts/qcom-ipq4019.dtsi
index bfa9ce4c6e69..b9839f86e703 100644
--- a/arch/arm/boot/dts/qcom-ipq4019.dtsi
+++ b/arch/arm/boot/dts/qcom-ipq4019.dtsi
@@ -576,5 +576,33 @@
 					  "legacy";
 			status = "disabled";
 		};
+
+		mdio: mdio@90000 {
+			#address-cells = <1>;
+			#size-cells = <0>;
+			compatible = "qcom,ipq4019-mdio";
+			reg = <0x90000 0x64>;
+			status = "disabled";
+
+			ethphy0: ethernet-phy@0 {
+				reg = <0>;
+			};
+
+			ethphy1: ethernet-phy@1 {
+				reg = <1>;
+			};
+
+			ethphy2: ethernet-phy@2 {
+				reg = <2>;
+			};
+
+			ethphy3: ethernet-phy@3 {
+				reg = <3>;
+			};
+
+			ethphy4: ethernet-phy@4 {
+				reg = <4>;
+			};
+		};
 	};
 };
-- 
cgit v1.2.3-59-g8ed1b


From 99b2292ba21b47bbd97d33ab20892347ad2ac351 Mon Sep 17 00:00:00 2001
From: Yunjian Wang <wangyunjian@huawei.com>
Date: Thu, 30 Apr 2020 18:16:16 +0800
Subject: net: caif: Fix use correct return type for ndo_start_xmit()

The method ndo_start_xmit() returns a value of type netdev_tx_t. Fix
the ndo function to use the correct type.

Signed-off-by: Yunjian Wang <wangyunjian@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/caif/chnl_net.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/caif/chnl_net.c b/net/caif/chnl_net.c
index a56628962852..79b6a04d8eb6 100644
--- a/net/caif/chnl_net.c
+++ b/net/caif/chnl_net.c
@@ -211,7 +211,8 @@ static void chnl_flowctrl_cb(struct cflayer *layr, enum caif_ctrlcmd flow,
 	}
 }
 
-static int chnl_net_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t chnl_net_start_xmit(struct sk_buff *skb,
+				       struct net_device *dev)
 {
 	struct chnl_net *priv;
 	struct cfpkt *pkt = NULL;
-- 
cgit v1.2.3-59-g8ed1b


From 51070a3627a943b13c7e0a32bbfa96cb8dca4493 Mon Sep 17 00:00:00 2001
From: Yunjian Wang <wangyunjian@huawei.com>
Date: Thu, 30 Apr 2020 19:26:40 +0800
Subject: rionet: Fix use correct return type for ndo_start_xmit()

The method ndo_start_xmit() returns a value of type netdev_tx_t. Fix
the ndo function to use the correct type.

Signed-off-by: Yunjian Wang <wangyunjian@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/rionet.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/rionet.c b/drivers/net/rionet.c
index 8eeb38c6199e..2056d6ad04b5 100644
--- a/drivers/net/rionet.c
+++ b/drivers/net/rionet.c
@@ -166,7 +166,8 @@ static int rionet_queue_tx_msg(struct sk_buff *skb, struct net_device *ndev,
 	return 0;
 }
 
-static int rionet_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+static netdev_tx_t rionet_start_xmit(struct sk_buff *skb,
+				     struct net_device *ndev)
 {
 	int i;
 	struct rionet_private *rnet = netdev_priv(ndev);
-- 
cgit v1.2.3-59-g8ed1b


From 1569a3c44303834680e93df2ecf60f356be9b322 Mon Sep 17 00:00:00 2001
From: Tang Bin <tangbin@cmss.chinamobile.com>
Date: Thu, 30 Apr 2020 20:15:31 +0800
Subject: net/faraday: Fix unnecessary check in ftmac100_probe()

The function ftmac100_probe() is only called with an openfirmware
platform device. Therefore there is no need to check that the passed
in device is NULL.

Signed-off-by: Zhang Shengju <zhangshengju@cmss.chinamobile.com>
Signed-off-by: Tang Bin <tangbin@cmss.chinamobile.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/faraday/ftmac100.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/ethernet/faraday/ftmac100.c b/drivers/net/ethernet/faraday/ftmac100.c
index 32cf54f0e35b..473b337b2e3b 100644
--- a/drivers/net/ethernet/faraday/ftmac100.c
+++ b/drivers/net/ethernet/faraday/ftmac100.c
@@ -1057,9 +1057,6 @@ static int ftmac100_probe(struct platform_device *pdev)
 	struct ftmac100 *priv;
 	int err;
 
-	if (!pdev)
-		return -ENODEV;
-
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	if (!res)
 		return -ENXIO;
-- 
cgit v1.2.3-59-g8ed1b


From 555da9af827d95134656fa459c8f3ece04dd867a Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Thu, 30 Apr 2020 15:55:38 +0200
Subject: net/smc: add event-based llc_flow framework

The new framework allows to start specific types of LLC control flows,
protects active flows and makes it possible to wait for flows to finish
before starting a new flow.
This mechanism is used for the LLC control layer to model flows like
'add link' or 'delete link' which need to send/receive several LLC
messages and are not allowed to get interrupted by the wrong type of
messages.
'Add link' or 'Delete link' messages arriving in the middle of a flow
are delayed and processed when the current flow finished.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_core.c |   2 +
 net/smc/smc_core.h |  24 ++++++++
 net/smc/smc_llc.c  | 165 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 net/smc/smc_llc.h  |   8 +++
 4 files changed, 199 insertions(+)

diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index db49f8cd5c95..4867ddcfe0c6 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -263,6 +263,7 @@ static void smc_lgr_free_work(struct work_struct *work)
 			if (smc_link_usable(lnk))
 				lnk->state = SMC_LNK_INACTIVE;
 		}
+		wake_up_interruptible_all(&lgr->llc_waiter);
 	}
 	smc_lgr_free(lgr);
 }
@@ -696,6 +697,7 @@ static void smc_lgr_cleanup(struct smc_link_group *lgr)
 			if (smc_link_usable(lnk))
 				lnk->state = SMC_LNK_INACTIVE;
 		}
+		wake_up_interruptible_all(&lgr->llc_waiter);
 	}
 }
 
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index b5781511063d..70399217ad6f 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -197,6 +197,20 @@ struct smc_rtoken {				/* address/key of remote RMB */
 
 struct smcd_dev;
 
+enum smc_llc_flowtype {
+	SMC_LLC_FLOW_NONE	= 0,
+	SMC_LLC_FLOW_ADD_LINK	= 2,
+	SMC_LLC_FLOW_DEL_LINK	= 4,
+	SMC_LLC_FLOW_RKEY	= 6,
+};
+
+struct smc_llc_qentry;
+
+struct smc_llc_flow {
+	enum smc_llc_flowtype type;
+	struct smc_llc_qentry *qentry;
+};
+
 struct smc_link_group {
 	struct list_head	list;
 	struct rb_root		conns_all;	/* connection tree */
@@ -238,6 +252,16 @@ struct smc_link_group {
 						/* protects llc_event_q */
 			struct work_struct	llc_event_work;
 						/* llc event worker */
+			wait_queue_head_t	llc_waiter;
+						/* w4 next llc event */
+			struct smc_llc_flow	llc_flow_lcl;
+						/* llc local control field */
+			struct smc_llc_flow	llc_flow_rmt;
+						/* llc remote control field */
+			struct smc_llc_qentry	*delayed_event;
+						/* arrived when flow active */
+			spinlock_t		llc_flow_lock;
+						/* protects llc flow */
 			int			llc_testlink_time;
 						/* link keep alive time */
 		};
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index e715dd6735ee..647cf1a2dfa5 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -140,6 +140,154 @@ struct smc_llc_qentry {
 	union smc_llc_msg msg;
 };
 
+struct smc_llc_qentry *smc_llc_flow_qentry_clr(struct smc_llc_flow *flow)
+{
+	struct smc_llc_qentry *qentry = flow->qentry;
+
+	flow->qentry = NULL;
+	return qentry;
+}
+
+void smc_llc_flow_qentry_del(struct smc_llc_flow *flow)
+{
+	struct smc_llc_qentry *qentry;
+
+	if (flow->qentry) {
+		qentry = flow->qentry;
+		flow->qentry = NULL;
+		kfree(qentry);
+	}
+}
+
+static inline void smc_llc_flow_qentry_set(struct smc_llc_flow *flow,
+					   struct smc_llc_qentry *qentry)
+{
+	flow->qentry = qentry;
+}
+
+/* try to start a new llc flow, initiated by an incoming llc msg */
+static bool smc_llc_flow_start(struct smc_llc_flow *flow,
+			       struct smc_llc_qentry *qentry)
+{
+	struct smc_link_group *lgr = qentry->link->lgr;
+
+	spin_lock_bh(&lgr->llc_flow_lock);
+	if (flow->type) {
+		/* a flow is already active */
+		if ((qentry->msg.raw.hdr.common.type == SMC_LLC_ADD_LINK ||
+		     qentry->msg.raw.hdr.common.type == SMC_LLC_DELETE_LINK) &&
+		    !lgr->delayed_event) {
+			lgr->delayed_event = qentry;
+		} else {
+			/* forget this llc request */
+			kfree(qentry);
+		}
+		spin_unlock_bh(&lgr->llc_flow_lock);
+		return false;
+	}
+	switch (qentry->msg.raw.hdr.common.type) {
+	case SMC_LLC_ADD_LINK:
+		flow->type = SMC_LLC_FLOW_ADD_LINK;
+		break;
+	case SMC_LLC_DELETE_LINK:
+		flow->type = SMC_LLC_FLOW_DEL_LINK;
+		break;
+	case SMC_LLC_CONFIRM_RKEY:
+	case SMC_LLC_DELETE_RKEY:
+		flow->type = SMC_LLC_FLOW_RKEY;
+		break;
+	default:
+		flow->type = SMC_LLC_FLOW_NONE;
+	}
+	if (qentry == lgr->delayed_event)
+		lgr->delayed_event = NULL;
+	spin_unlock_bh(&lgr->llc_flow_lock);
+	smc_llc_flow_qentry_set(flow, qentry);
+	return true;
+}
+
+/* start a new local llc flow, wait till current flow finished */
+int smc_llc_flow_initiate(struct smc_link_group *lgr,
+			  enum smc_llc_flowtype type)
+{
+	enum smc_llc_flowtype allowed_remote = SMC_LLC_FLOW_NONE;
+	int rc;
+
+	/* all flows except confirm_rkey and delete_rkey are exclusive,
+	 * confirm/delete rkey flows can run concurrently (local and remote)
+	 */
+	if (type == SMC_LLC_FLOW_RKEY)
+		allowed_remote = SMC_LLC_FLOW_RKEY;
+again:
+	if (list_empty(&lgr->list))
+		return -ENODEV;
+	spin_lock_bh(&lgr->llc_flow_lock);
+	if (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE &&
+	    (lgr->llc_flow_rmt.type == SMC_LLC_FLOW_NONE ||
+	     lgr->llc_flow_rmt.type == allowed_remote)) {
+		lgr->llc_flow_lcl.type = type;
+		spin_unlock_bh(&lgr->llc_flow_lock);
+		return 0;
+	}
+	spin_unlock_bh(&lgr->llc_flow_lock);
+	rc = wait_event_interruptible_timeout(lgr->llc_waiter,
+			(lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE &&
+			 (lgr->llc_flow_rmt.type == SMC_LLC_FLOW_NONE ||
+			  lgr->llc_flow_rmt.type == allowed_remote)),
+			SMC_LLC_WAIT_TIME);
+	if (!rc)
+		return -ETIMEDOUT;
+	goto again;
+}
+
+/* finish the current llc flow */
+void smc_llc_flow_stop(struct smc_link_group *lgr, struct smc_llc_flow *flow)
+{
+	spin_lock_bh(&lgr->llc_flow_lock);
+	memset(flow, 0, sizeof(*flow));
+	flow->type = SMC_LLC_FLOW_NONE;
+	spin_unlock_bh(&lgr->llc_flow_lock);
+	if (!list_empty(&lgr->list) && lgr->delayed_event &&
+	    flow == &lgr->llc_flow_lcl)
+		schedule_work(&lgr->llc_event_work);
+	else
+		wake_up_interruptible(&lgr->llc_waiter);
+}
+
+/* lnk is optional and used for early wakeup when link goes down, useful in
+ * cases where we wait for a response on the link after we sent a request
+ */
+struct smc_llc_qentry *smc_llc_wait(struct smc_link_group *lgr,
+				    struct smc_link *lnk,
+				    int time_out, u8 exp_msg)
+{
+	struct smc_llc_flow *flow = &lgr->llc_flow_lcl;
+
+	wait_event_interruptible_timeout(lgr->llc_waiter,
+					 (flow->qentry ||
+					  (lnk && !smc_link_usable(lnk)) ||
+					  list_empty(&lgr->list)),
+					 time_out);
+	if (!flow->qentry ||
+	    (lnk && !smc_link_usable(lnk)) || list_empty(&lgr->list)) {
+		smc_llc_flow_qentry_del(flow);
+		goto out;
+	}
+	if (exp_msg && flow->qentry->msg.raw.hdr.common.type != exp_msg) {
+		if (exp_msg == SMC_LLC_ADD_LINK &&
+		    flow->qentry->msg.raw.hdr.common.type ==
+		    SMC_LLC_DELETE_LINK) {
+			/* flow_start will delay the unexpected msg */
+			smc_llc_flow_start(&lgr->llc_flow_lcl,
+					   smc_llc_flow_qentry_clr(flow));
+			return NULL;
+		}
+		smc_llc_flow_qentry_del(flow);
+	}
+out:
+	return flow->qentry;
+}
+
 /********************************** send *************************************/
 
 struct smc_llc_tx_pend {
@@ -547,6 +695,16 @@ static void smc_llc_event_work(struct work_struct *work)
 						  llc_event_work);
 	struct smc_llc_qentry *qentry;
 
+	if (!lgr->llc_flow_lcl.type && lgr->delayed_event) {
+		if (smc_link_usable(lgr->delayed_event->link)) {
+			smc_llc_event_handler(lgr->delayed_event);
+		} else {
+			qentry = lgr->delayed_event;
+			lgr->delayed_event = NULL;
+			kfree(qentry);
+		}
+	}
+
 again:
 	spin_lock_bh(&lgr->llc_event_q_lock);
 	if (!list_empty(&lgr->llc_event_q)) {
@@ -676,6 +834,8 @@ void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc)
 	INIT_WORK(&lgr->llc_event_work, smc_llc_event_work);
 	INIT_LIST_HEAD(&lgr->llc_event_q);
 	spin_lock_init(&lgr->llc_event_q_lock);
+	spin_lock_init(&lgr->llc_flow_lock);
+	init_waitqueue_head(&lgr->llc_waiter);
 	lgr->llc_testlink_time = net->ipv4.sysctl_tcp_keepalive_time;
 }
 
@@ -683,7 +843,12 @@ void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc)
 void smc_llc_lgr_clear(struct smc_link_group *lgr)
 {
 	smc_llc_event_flush(lgr);
+	wake_up_interruptible_all(&lgr->llc_waiter);
 	cancel_work_sync(&lgr->llc_event_work);
+	if (lgr->delayed_event) {
+		kfree(lgr->delayed_event);
+		lgr->delayed_event = NULL;
+	}
 }
 
 int smc_llc_link_init(struct smc_link *link)
diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h
index 66063f22166b..49e99ff00ee7 100644
--- a/net/smc/smc_llc.h
+++ b/net/smc/smc_llc.h
@@ -63,6 +63,14 @@ int smc_llc_do_confirm_rkey(struct smc_link *link,
 			    struct smc_buf_desc *rmb_desc);
 int smc_llc_do_delete_rkey(struct smc_link *link,
 			   struct smc_buf_desc *rmb_desc);
+int smc_llc_flow_initiate(struct smc_link_group *lgr,
+			  enum smc_llc_flowtype type);
+void smc_llc_flow_stop(struct smc_link_group *lgr, struct smc_llc_flow *flow);
+struct smc_llc_qentry *smc_llc_wait(struct smc_link_group *lgr,
+				    struct smc_link *lnk,
+				    int time_out, u8 exp_msg);
+struct smc_llc_qentry *smc_llc_flow_qentry_clr(struct smc_llc_flow *flow);
+void smc_llc_flow_qentry_del(struct smc_llc_flow *flow);
 int smc_llc_init(void) __init;
 
 #endif /* SMC_LLC_H */
-- 
cgit v1.2.3-59-g8ed1b


From a6688d919b220bd714948e03bb3caa8a66895005 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Thu, 30 Apr 2020 15:55:39 +0200
Subject: net/smc: enqueue all received LLC messages

Introduce smc_llc_enqueue() to enqueue LLC messages, and adapt
smc_llc_rx_handler() to enqueue all received LLC messages.
smc_llc_enqueue() also makes it possible to enqueue LLC messages from
local code.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_llc.c | 46 +++++++++++++++++++++++++++++-----------------
 1 file changed, 29 insertions(+), 17 deletions(-)

diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 647cf1a2dfa5..a146b3b43580 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -719,11 +719,14 @@ again:
 }
 
 /* process llc responses in tasklet context */
-static void smc_llc_rx_response(struct smc_link *link, union smc_llc_msg *llc)
+static void smc_llc_rx_response(struct smc_link *link,
+				struct smc_llc_qentry *qentry)
 {
+	u8 llc_type = qentry->msg.raw.hdr.common.type;
+	union smc_llc_msg *llc = &qentry->msg;
 	int rc = 0;
 
-	switch (llc->raw.hdr.common.type) {
+	switch (llc_type) {
 	case SMC_LLC_TEST_LINK:
 		if (link->state == SMC_LNK_ACTIVE)
 			complete(&link->llc_testlink_resp);
@@ -759,40 +762,49 @@ static void smc_llc_rx_response(struct smc_link *link, union smc_llc_msg *llc)
 		complete(&link->llc_delete_rkey_resp);
 		break;
 	}
+	kfree(qentry);
 }
 
-/* copy received msg and add it to the event queue */
-static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
+static void smc_llc_enqueue(struct smc_link *link, union smc_llc_msg *llc)
 {
-	struct smc_link *link = (struct smc_link *)wc->qp->qp_context;
 	struct smc_link_group *lgr = link->lgr;
 	struct smc_llc_qentry *qentry;
-	union smc_llc_msg *llc = buf;
 	unsigned long flags;
 
-	if (wc->byte_len < sizeof(*llc))
-		return; /* short message */
-	if (llc->raw.hdr.length != sizeof(*llc))
-		return; /* invalid message */
-
-	/* process responses immediately */
-	if (llc->raw.hdr.flags & SMC_LLC_FLAG_RESP) {
-		smc_llc_rx_response(link, llc);
-		return;
-	}
-
 	qentry = kmalloc(sizeof(*qentry), GFP_ATOMIC);
 	if (!qentry)
 		return;
 	qentry->link = link;
 	INIT_LIST_HEAD(&qentry->list);
 	memcpy(&qentry->msg, llc, sizeof(union smc_llc_msg));
+
+	/* process responses immediately */
+	if (llc->raw.hdr.flags & SMC_LLC_FLAG_RESP) {
+		smc_llc_rx_response(link, qentry);
+		return;
+	}
+
+	/* add requests to event queue */
 	spin_lock_irqsave(&lgr->llc_event_q_lock, flags);
 	list_add_tail(&qentry->list, &lgr->llc_event_q);
 	spin_unlock_irqrestore(&lgr->llc_event_q_lock, flags);
 	schedule_work(&link->lgr->llc_event_work);
 }
 
+/* copy received msg and add it to the event queue */
+static void smc_llc_rx_handler(struct ib_wc *wc, void *buf)
+{
+	struct smc_link *link = (struct smc_link *)wc->qp->qp_context;
+	union smc_llc_msg *llc = buf;
+
+	if (wc->byte_len < sizeof(*llc))
+		return; /* short message */
+	if (llc->raw.hdr.length != sizeof(*llc))
+		return; /* invalid message */
+
+	smc_llc_enqueue(link, llc);
+}
+
 /***************************** worker, utils *********************************/
 
 static void smc_llc_testlink_work(struct work_struct *work)
-- 
cgit v1.2.3-59-g8ed1b


From 81e6e5e70df46bb5b205e53f2b7885e49a9d4974 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Thu, 30 Apr 2020 15:55:40 +0200
Subject: net/smc: introduce link group type

Add a type field to the link group which reflects the current link group
redundancy state.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_core.h | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 70399217ad6f..51366a9f4980 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -197,6 +197,14 @@ struct smc_rtoken {				/* address/key of remote RMB */
 
 struct smcd_dev;
 
+enum smc_lgr_type {				/* redundancy state of lgr */
+	SMC_LGR_NONE,			/* no active links, lgr to be deleted */
+	SMC_LGR_SINGLE,			/* 1 active RNIC on each peer */
+	SMC_LGR_SYMMETRIC,		/* 2 active RNICs on each peer */
+	SMC_LGR_ASYMMETRIC_PEER,	/* local has 2, peer 1 active RNICs */
+	SMC_LGR_ASYMMETRIC_LOCAL,	/* local has 1, peer 2 active RNICs */
+};
+
 enum smc_llc_flowtype {
 	SMC_LLC_FLOW_NONE	= 0,
 	SMC_LLC_FLOW_ADD_LINK	= 2,
@@ -246,6 +254,8 @@ struct smc_link_group {
 			DECLARE_BITMAP(rtokens_used_mask, SMC_RMBS_PER_LGR_MAX);
 						/* used rtoken elements */
 			u8			next_link_id;
+			enum smc_lgr_type	type;
+						/* redundancy state */
 			struct list_head	llc_event_q;
 						/* queue for llc events */
 			spinlock_t		llc_event_q_lock;
-- 
cgit v1.2.3-59-g8ed1b


From 92334cfcb3a2a102dc1b23513bbe2fca4347e2d6 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Thu, 30 Apr 2020 15:55:41 +0200
Subject: net/smc: add logic to evaluate CONFIRM_LINK messages to LLC layer

Introduce smc_llc_eval_conf_link() to evaluate the CONFIRM_LINK message
contents. This implements this logic at the LLC layer. The function will
be used by af_smc.c to process the received LLC layer messages.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_llc.c | 11 +++++++++++
 net/smc/smc_llc.h |  2 ++
 2 files changed, 13 insertions(+)

diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index a146b3b43580..9248b90fe37e 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -946,6 +946,17 @@ out:
 	return rc;
 }
 
+/* evaluate confirm link request or response */
+int smc_llc_eval_conf_link(struct smc_llc_qentry *qentry,
+			   enum smc_llc_reqresp type)
+{
+	if (type == SMC_LLC_REQ)	/* SMC server assigns link_id */
+		qentry->link->link_id = qentry->msg.confirm_link.link_num;
+	if (!(qentry->msg.raw.hdr.flags & SMC_LLC_FLAG_NO_RMBE_EYEC))
+		return -ENOTSUPP;
+	return 0;
+}
+
 /***************************** init, exit, misc ******************************/
 
 static struct smc_wr_rx_handler smc_llc_rx_handlers[] = {
diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h
index 49e99ff00ee7..637acf91ffb7 100644
--- a/net/smc/smc_llc.h
+++ b/net/smc/smc_llc.h
@@ -66,6 +66,8 @@ int smc_llc_do_delete_rkey(struct smc_link *link,
 int smc_llc_flow_initiate(struct smc_link_group *lgr,
 			  enum smc_llc_flowtype type);
 void smc_llc_flow_stop(struct smc_link_group *lgr, struct smc_llc_flow *flow);
+int smc_llc_eval_conf_link(struct smc_llc_qentry *qentry,
+			   enum smc_llc_reqresp type);
 struct smc_llc_qentry *smc_llc_wait(struct smc_link_group *lgr,
 				    struct smc_link *lnk,
 				    int time_out, u8 exp_msg);
-- 
cgit v1.2.3-59-g8ed1b


From 4667bb4aaabf87d6b97be1b4671b9db340a58cdc Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Thu, 30 Apr 2020 15:55:42 +0200
Subject: net/smc: adapt SMC server code to use the LLC flow

Change the code that processes the SMC server part of connection
establishment to use the LLC flow framework (CONFIRM_LINK response
messages).

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/af_smc.c   | 39 +++++++++++++++------------------------
 net/smc/smc_core.h |  3 ---
 net/smc/smc_llc.c  | 20 +++++---------------
 3 files changed, 20 insertions(+), 42 deletions(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index e859e3f420d9..ab3aef1ddfa4 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -1019,9 +1019,11 @@ void smc_close_non_accepted(struct sock *sk)
 static int smcr_serv_conf_first_link(struct smc_sock *smc)
 {
 	struct smc_link *link = smc->conn.lnk;
-	int rest;
+	struct smc_llc_qentry *qentry;
 	int rc;
 
+	link->lgr->type = SMC_LGR_SINGLE;
+
 	if (smcr_link_reg_rmb(link, smc->conn.rmb_desc, false))
 		return SMC_CLC_DECL_ERR_REGRMB;
 
@@ -1031,40 +1033,27 @@ static int smcr_serv_conf_first_link(struct smc_sock *smc)
 		return SMC_CLC_DECL_TIMEOUT_CL;
 
 	/* receive CONFIRM LINK response from client over the RoCE fabric */
-	rest = wait_for_completion_interruptible_timeout(
-		&link->llc_confirm_resp,
-		SMC_LLC_WAIT_FIRST_TIME);
-	if (rest <= 0) {
+	qentry = smc_llc_wait(link->lgr, link, SMC_LLC_WAIT_TIME,
+			      SMC_LLC_CONFIRM_LINK);
+	if (!qentry) {
 		struct smc_clc_msg_decline dclc;
 
 		rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
 				      SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
 		return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc;
 	}
-
-	if (link->llc_confirm_resp_rc)
+	rc = smc_llc_eval_conf_link(qentry, SMC_LLC_RESP);
+	smc_llc_flow_qentry_del(&link->lgr->llc_flow_lcl);
+	if (rc)
 		return SMC_CLC_DECL_RMBE_EC;
 
-	/* send ADD LINK request to client over the RoCE fabric */
-	rc = smc_llc_send_add_link(link,
-				   link->smcibdev->mac[link->ibport - 1],
-				   link->gid, SMC_LLC_REQ);
-	if (rc < 0)
-		return SMC_CLC_DECL_TIMEOUT_AL;
-
-	/* receive ADD LINK response from client over the RoCE fabric */
-	rest = wait_for_completion_interruptible_timeout(&link->llc_add_resp,
-							 SMC_LLC_WAIT_TIME);
-	if (rest <= 0) {
-		struct smc_clc_msg_decline dclc;
-
-		rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
-				      SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
-		return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_AL : rc;
-	}
+	/* confirm_rkey is implicit on 1st contact */
+	smc->conn.rmb_desc->is_conf_rkey = true;
 
 	smc_llc_link_active(link);
 
+	/* initial contact - try to establish second link */
+	/* tbd: call smc_llc_srv_add_link(link); */
 	return 0;
 }
 
@@ -1240,7 +1229,9 @@ static int smc_listen_rdma_finish(struct smc_sock *new_smc,
 			goto decline;
 		}
 		/* QP confirmation over RoCE fabric */
+		smc_llc_flow_initiate(link->lgr, SMC_LLC_FLOW_ADD_LINK);
 		reason_code = smcr_serv_conf_first_link(new_smc);
+		smc_llc_flow_stop(link->lgr, &link->lgr->llc_flow_lcl);
 		if (reason_code)
 			goto decline;
 	}
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 51366a9f4980..01a9cb885ef2 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -121,11 +121,8 @@ struct smc_link {
 
 	enum smc_link_state	state;		/* state of link */
 	struct completion	llc_confirm;	/* wait for rx of conf link */
-	struct completion	llc_confirm_resp; /* wait 4 rx of cnf lnk rsp */
 	int			llc_confirm_rc; /* rc from confirm link msg */
-	int			llc_confirm_resp_rc; /* rc from conf_resp msg */
 	struct completion	llc_add;	/* wait for rx of add link */
-	struct completion	llc_add_resp;	/* wait for rx of add link rsp*/
 	struct delayed_work	llc_testlink_wrk; /* testlink worker */
 	struct completion	llc_testlink_resp; /* wait for rx of testlink */
 	int			llc_testlink_time; /* testlink interval */
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 9248b90fe37e..5381b16fd482 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -724,26 +724,18 @@ static void smc_llc_rx_response(struct smc_link *link,
 {
 	u8 llc_type = qentry->msg.raw.hdr.common.type;
 	union smc_llc_msg *llc = &qentry->msg;
-	int rc = 0;
 
 	switch (llc_type) {
 	case SMC_LLC_TEST_LINK:
 		if (link->state == SMC_LNK_ACTIVE)
 			complete(&link->llc_testlink_resp);
 		break;
-	case SMC_LLC_CONFIRM_LINK:
-		if (!(llc->raw.hdr.flags & SMC_LLC_FLAG_NO_RMBE_EYEC))
-			rc = ENOTSUPP;
-		if (link->lgr->role == SMC_SERV &&
-		    link->state == SMC_LNK_ACTIVATING) {
-			link->llc_confirm_resp_rc = rc;
-			complete(&link->llc_confirm_resp);
-		}
-		break;
 	case SMC_LLC_ADD_LINK:
-		if (link->state == SMC_LNK_ACTIVATING)
-			complete(&link->llc_add_resp);
-		break;
+	case SMC_LLC_CONFIRM_LINK:
+		/* assign responses to the local flow, we requested them */
+		smc_llc_flow_qentry_set(&link->lgr->llc_flow_lcl, qentry);
+		wake_up_interruptible(&link->lgr->llc_waiter);
+		return;
 	case SMC_LLC_DELETE_LINK:
 		if (link->lgr->role == SMC_SERV)
 			smc_lgr_schedule_free_work_fast(link->lgr);
@@ -866,9 +858,7 @@ void smc_llc_lgr_clear(struct smc_link_group *lgr)
 int smc_llc_link_init(struct smc_link *link)
 {
 	init_completion(&link->llc_confirm);
-	init_completion(&link->llc_confirm_resp);
 	init_completion(&link->llc_add);
-	init_completion(&link->llc_add_resp);
 	init_completion(&link->llc_confirm_rkey_resp);
 	init_completion(&link->llc_delete_rkey_resp);
 	mutex_init(&link->llc_delete_rkey_mutex);
-- 
cgit v1.2.3-59-g8ed1b


From 0fb0b02bd6fd26cba38002be4a6bbcae2228fd44 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Thu, 30 Apr 2020 15:55:43 +0200
Subject: net/smc: adapt SMC client code to use the LLC flow

Change the code that processes the SMC client part of connection
establishment to use the LLC flow framework (CONFIRM_LINK request
messages).

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/af_smc.c   | 69 +++++++++++++++++++++++++++++++++------------------
 net/smc/smc_clc.h  |  1 +
 net/smc/smc_core.h |  3 ---
 net/smc/smc_llc.c  | 72 +++++++++++++++++++-----------------------------------
 4 files changed, 71 insertions(+), 74 deletions(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index ab3aef1ddfa4..bd9662d06896 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -382,22 +382,24 @@ static int smcr_lgr_reg_rmbs(struct smc_link_group *lgr,
 static int smcr_clnt_conf_first_link(struct smc_sock *smc)
 {
 	struct smc_link *link = smc->conn.lnk;
-	int rest;
+	struct smc_llc_qentry *qentry;
 	int rc;
 
+	link->lgr->type = SMC_LGR_SINGLE;
+
 	/* receive CONFIRM LINK request from server over RoCE fabric */
-	rest = wait_for_completion_interruptible_timeout(
-		&link->llc_confirm,
-		SMC_LLC_WAIT_FIRST_TIME);
-	if (rest <= 0) {
+	qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME,
+			      SMC_LLC_CONFIRM_LINK);
+	if (!qentry) {
 		struct smc_clc_msg_decline dclc;
 
 		rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
 				      SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
 		return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc;
 	}
-
-	if (link->llc_confirm_rc)
+	rc = smc_llc_eval_conf_link(qentry, SMC_LLC_REQ);
+	smc_llc_flow_qentry_del(&link->lgr->llc_flow_lcl);
+	if (rc)
 		return SMC_CLC_DECL_RMBE_EC;
 
 	rc = smc_ib_modify_qp_rts(link);
@@ -409,31 +411,30 @@ static int smcr_clnt_conf_first_link(struct smc_sock *smc)
 	if (smcr_link_reg_rmb(link, smc->conn.rmb_desc, false))
 		return SMC_CLC_DECL_ERR_REGRMB;
 
+	/* confirm_rkey is implicit on 1st contact */
+	smc->conn.rmb_desc->is_conf_rkey = true;
+
 	/* send CONFIRM LINK response over RoCE fabric */
 	rc = smc_llc_send_confirm_link(link, SMC_LLC_RESP);
 	if (rc < 0)
 		return SMC_CLC_DECL_TIMEOUT_CL;
 
-	/* receive ADD LINK request from server over RoCE fabric */
-	rest = wait_for_completion_interruptible_timeout(&link->llc_add,
-							 SMC_LLC_WAIT_TIME);
-	if (rest <= 0) {
+	smc_llc_link_active(link);
+
+	/* optional 2nd link, receive ADD LINK request from server */
+	qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME,
+			      SMC_LLC_ADD_LINK);
+	if (!qentry) {
 		struct smc_clc_msg_decline dclc;
 
 		rc = smc_clc_wait_msg(smc, &dclc, sizeof(dclc),
 				      SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
-		return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_AL : rc;
+		if (rc == -EAGAIN)
+			rc = 0; /* no DECLINE received, go with one link */
+		return rc;
 	}
-
-	/* send add link reject message, only one link supported for now */
-	rc = smc_llc_send_add_link(link,
-				   link->smcibdev->mac[link->ibport - 1],
-				   link->gid, SMC_LLC_RESP);
-	if (rc < 0)
-		return SMC_CLC_DECL_TIMEOUT_AL;
-
-	smc_llc_link_active(link);
-
+	smc_llc_flow_qentry_clr(&link->lgr->llc_flow_lcl);
+	/* tbd: call smc_llc_cli_add_link(link, qentry); */
 	return 0;
 }
 
@@ -613,8 +614,8 @@ static int smc_connect_rdma(struct smc_sock *smc,
 			    struct smc_clc_msg_accept_confirm *aclc,
 			    struct smc_init_info *ini)
 {
+	int i, reason_code = 0;
 	struct smc_link *link;
-	int reason_code = 0;
 
 	ini->is_smcd = false;
 	ini->ib_lcl = &aclc->lcl;
@@ -627,10 +628,28 @@ static int smc_connect_rdma(struct smc_sock *smc,
 		mutex_unlock(&smc_client_lgr_pending);
 		return reason_code;
 	}
-	link = smc->conn.lnk;
 
 	smc_conn_save_peer_info(smc, aclc);
 
+	if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
+		link = smc->conn.lnk;
+	} else {
+		/* set link that was assigned by server */
+		link = NULL;
+		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+			struct smc_link *l = &smc->conn.lgr->lnk[i];
+
+			if (l->peer_qpn == ntoh24(aclc->qpn)) {
+				link = l;
+				break;
+			}
+		}
+		if (!link)
+			return smc_connect_abort(smc, SMC_CLC_DECL_NOSRVLINK,
+						 ini->cln_first_contact);
+		smc->conn.lnk = link;
+	}
+
 	/* create send buffer and rmb */
 	if (smc_buf_create(smc, false))
 		return smc_connect_abort(smc, SMC_CLC_DECL_MEM,
@@ -666,7 +685,9 @@ static int smc_connect_rdma(struct smc_sock *smc,
 
 	if (ini->cln_first_contact == SMC_FIRST_CONTACT) {
 		/* QP confirmation over RoCE fabric */
+		smc_llc_flow_initiate(link->lgr, SMC_LLC_FLOW_ADD_LINK);
 		reason_code = smcr_clnt_conf_first_link(smc);
+		smc_llc_flow_stop(link->lgr, &link->lgr->llc_flow_lcl);
 		if (reason_code)
 			return smc_connect_abort(smc, reason_code,
 						 ini->cln_first_contact);
diff --git a/net/smc/smc_clc.h b/net/smc/smc_clc.h
index 4f2e150a2be1..465876701b75 100644
--- a/net/smc/smc_clc.h
+++ b/net/smc/smc_clc.h
@@ -45,6 +45,7 @@
 #define SMC_CLC_DECL_GETVLANERR	0x03080000  /* err to get vlan id of ip device*/
 #define SMC_CLC_DECL_ISMVLANERR	0x03090000  /* err to reg vlan id on ism dev  */
 #define SMC_CLC_DECL_NOACTLINK	0x030a0000  /* no active smc-r link in lgr    */
+#define SMC_CLC_DECL_NOSRVLINK	0x030b0000  /* SMC-R link from srv not found  */
 #define SMC_CLC_DECL_SYNCERR	0x04000000  /* synchronization error          */
 #define SMC_CLC_DECL_PEERDECL	0x05000000  /* peer declined during handshake */
 #define SMC_CLC_DECL_INTERR	0x09990000  /* internal error		      */
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 01a9cb885ef2..31237c4c0d93 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -120,9 +120,6 @@ struct smc_link {
 	struct smc_link_group	*lgr;		/* parent link group */
 
 	enum smc_link_state	state;		/* state of link */
-	struct completion	llc_confirm;	/* wait for rx of conf link */
-	int			llc_confirm_rc; /* rc from confirm link msg */
-	struct completion	llc_add;	/* wait for rx of add link */
 	struct delayed_work	llc_testlink_wrk; /* testlink worker */
 	struct completion	llc_testlink_resp; /* wait for rx of testlink */
 	int			llc_testlink_time; /* testlink interval */
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 5381b16fd482..644e9ab0dec5 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -528,47 +528,6 @@ static int smc_llc_send_message(struct smc_link *link, void *llcbuf)
 
 /********************************* receive ***********************************/
 
-static void smc_llc_rx_confirm_link(struct smc_link *link,
-				    struct smc_llc_msg_confirm_link *llc)
-{
-	struct smc_link_group *lgr = smc_get_lgr(link);
-	int conf_rc = 0;
-
-	/* RMBE eyecatchers are not supported */
-	if (!(llc->hd.flags & SMC_LLC_FLAG_NO_RMBE_EYEC))
-		conf_rc = ENOTSUPP;
-
-	if (lgr->role == SMC_CLNT &&
-	    link->state == SMC_LNK_ACTIVATING) {
-		link->llc_confirm_rc = conf_rc;
-		link->link_id = llc->link_num;
-		complete(&link->llc_confirm);
-	}
-}
-
-static void smc_llc_rx_add_link(struct smc_link *link,
-				struct smc_llc_msg_add_link *llc)
-{
-	struct smc_link_group *lgr = smc_get_lgr(link);
-
-	if (link->state == SMC_LNK_ACTIVATING) {
-		complete(&link->llc_add);
-		return;
-	}
-
-	if (lgr->role == SMC_SERV) {
-		smc_llc_prep_add_link(llc, link,
-				link->smcibdev->mac[link->ibport - 1],
-				link->gid, SMC_LLC_REQ);
-
-	} else {
-		smc_llc_prep_add_link(llc, link,
-				link->smcibdev->mac[link->ibport - 1],
-				link->gid, SMC_LLC_RESP);
-	}
-	smc_llc_send_message(link, llc);
-}
-
 static void smc_llc_rx_delete_link(struct smc_link *link,
 				   struct smc_llc_msg_del_link *llc)
 {
@@ -657,6 +616,7 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry)
 {
 	union smc_llc_msg *llc = &qentry->msg;
 	struct smc_link *link = qentry->link;
+	struct smc_link_group *lgr = link->lgr;
 
 	if (!smc_link_usable(link))
 		goto out;
@@ -665,11 +625,31 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry)
 	case SMC_LLC_TEST_LINK:
 		smc_llc_rx_test_link(link, &llc->test_link);
 		break;
-	case SMC_LLC_CONFIRM_LINK:
-		smc_llc_rx_confirm_link(link, &llc->confirm_link);
-		break;
 	case SMC_LLC_ADD_LINK:
-		smc_llc_rx_add_link(link, &llc->add_link);
+		if (list_empty(&lgr->list))
+			goto out;	/* lgr is terminating */
+		if (lgr->role == SMC_CLNT) {
+			if (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_ADD_LINK) {
+				/* a flow is waiting for this message */
+				smc_llc_flow_qentry_set(&lgr->llc_flow_lcl,
+							qentry);
+				wake_up_interruptible(&lgr->llc_waiter);
+			} else if (smc_llc_flow_start(&lgr->llc_flow_lcl,
+						      qentry)) {
+				/* tbd: schedule_work(&lgr->llc_add_link_work); */
+			}
+		} else if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) {
+			/* as smc server, handle client suggestion */
+			/* tbd: schedule_work(&lgr->llc_add_link_work); */
+		}
+		return;
+	case SMC_LLC_CONFIRM_LINK:
+		if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
+			/* a flow is waiting for this message */
+			smc_llc_flow_qentry_set(&lgr->llc_flow_lcl, qentry);
+			wake_up_interruptible(&lgr->llc_waiter);
+			return;
+		}
 		break;
 	case SMC_LLC_DELETE_LINK:
 		smc_llc_rx_delete_link(link, &llc->delete_link);
@@ -857,8 +837,6 @@ void smc_llc_lgr_clear(struct smc_link_group *lgr)
 
 int smc_llc_link_init(struct smc_link *link)
 {
-	init_completion(&link->llc_confirm);
-	init_completion(&link->llc_add);
 	init_completion(&link->llc_confirm_rkey_resp);
 	init_completion(&link->llc_delete_rkey_resp);
 	mutex_init(&link->llc_delete_rkey_mutex);
-- 
cgit v1.2.3-59-g8ed1b


From 3d88a21b0cb6a2661a567e57a431e5aa12ecb203 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Thu, 30 Apr 2020 15:55:44 +0200
Subject: net/smc: multiple link support and LLC flow for
 smc_llc_do_confirm_rkey

Adapt smc_llc_do_confirm_rkey() to use the LLC flow and support the
rkeys of multiple links when the CONFIRM_RKEY LLC message is build.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_core.h |  2 --
 net/smc/smc_llc.c  | 65 +++++++++++++++++++++++++++++++++++-------------------
 net/smc/smc_llc.h  |  2 +-
 3 files changed, 43 insertions(+), 26 deletions(-)

diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 31237c4c0d93..4e0dfb1d5804 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -123,8 +123,6 @@ struct smc_link {
 	struct delayed_work	llc_testlink_wrk; /* testlink worker */
 	struct completion	llc_testlink_resp; /* wait for rx of testlink */
 	int			llc_testlink_time; /* testlink interval */
-	struct completion	llc_confirm_rkey_resp; /* w4 rx of cnf rkey */
-	int			llc_confirm_rkey_resp_rc; /* rc from cnf rkey */
 	struct completion	llc_delete_rkey_resp; /* w4 rx of del rkey */
 	int			llc_delete_rkey_resp_rc; /* rc from del rkey */
 	struct mutex		llc_delete_rkey_mutex; /* serialize usage */
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 644e9ab0dec5..5db11f54b4cd 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -369,27 +369,44 @@ int smc_llc_send_confirm_link(struct smc_link *link,
 }
 
 /* send LLC confirm rkey request */
-static int smc_llc_send_confirm_rkey(struct smc_link *link,
+static int smc_llc_send_confirm_rkey(struct smc_link *send_link,
 				     struct smc_buf_desc *rmb_desc)
 {
 	struct smc_llc_msg_confirm_rkey *rkeyllc;
 	struct smc_wr_tx_pend_priv *pend;
 	struct smc_wr_buf *wr_buf;
-	int rc;
+	struct smc_link *link;
+	int i, rc, rtok_ix;
 
-	rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+	rc = smc_llc_add_pending_send(send_link, &wr_buf, &pend);
 	if (rc)
 		return rc;
 	rkeyllc = (struct smc_llc_msg_confirm_rkey *)wr_buf;
 	memset(rkeyllc, 0, sizeof(*rkeyllc));
 	rkeyllc->hd.common.type = SMC_LLC_CONFIRM_RKEY;
 	rkeyllc->hd.length = sizeof(struct smc_llc_msg_confirm_rkey);
+
+	rtok_ix = 1;
+	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+		link = &send_link->lgr->lnk[i];
+		if (link->state == SMC_LNK_ACTIVE && link != send_link) {
+			rkeyllc->rtoken[rtok_ix].link_id = link->link_id;
+			rkeyllc->rtoken[rtok_ix].rmb_key =
+				htonl(rmb_desc->mr_rx[link->link_idx]->rkey);
+			rkeyllc->rtoken[rtok_ix].rmb_vaddr = cpu_to_be64(
+				(u64)sg_dma_address(
+					rmb_desc->sgt[link->link_idx].sgl));
+			rtok_ix++;
+		}
+	}
+	/* rkey of send_link is in rtoken[0] */
+	rkeyllc->rtoken[0].num_rkeys = rtok_ix - 1;
 	rkeyllc->rtoken[0].rmb_key =
-		htonl(rmb_desc->mr_rx[link->link_idx]->rkey);
+		htonl(rmb_desc->mr_rx[send_link->link_idx]->rkey);
 	rkeyllc->rtoken[0].rmb_vaddr = cpu_to_be64(
-		(u64)sg_dma_address(rmb_desc->sgt[link->link_idx].sgl));
+		(u64)sg_dma_address(rmb_desc->sgt[send_link->link_idx].sgl));
 	/* send llc message */
-	rc = smc_wr_tx_send(link, pend);
+	rc = smc_wr_tx_send(send_link, pend);
 	return rc;
 }
 
@@ -712,6 +729,7 @@ static void smc_llc_rx_response(struct smc_link *link,
 		break;
 	case SMC_LLC_ADD_LINK:
 	case SMC_LLC_CONFIRM_LINK:
+	case SMC_LLC_CONFIRM_RKEY:
 		/* assign responses to the local flow, we requested them */
 		smc_llc_flow_qentry_set(&link->lgr->llc_flow_lcl, qentry);
 		wake_up_interruptible(&link->lgr->llc_waiter);
@@ -720,11 +738,6 @@ static void smc_llc_rx_response(struct smc_link *link,
 		if (link->lgr->role == SMC_SERV)
 			smc_lgr_schedule_free_work_fast(link->lgr);
 		break;
-	case SMC_LLC_CONFIRM_RKEY:
-		link->llc_confirm_rkey_resp_rc = llc->raw.hdr.flags &
-						 SMC_LLC_FLAG_RKEY_NEG;
-		complete(&link->llc_confirm_rkey_resp);
-		break;
 	case SMC_LLC_CONFIRM_RKEY_CONT:
 		/* unused as long as we don't send this type of msg */
 		break;
@@ -837,7 +850,6 @@ void smc_llc_lgr_clear(struct smc_link_group *lgr)
 
 int smc_llc_link_init(struct smc_link *link)
 {
-	init_completion(&link->llc_confirm_rkey_resp);
 	init_completion(&link->llc_delete_rkey_resp);
 	mutex_init(&link->llc_delete_rkey_mutex);
 	init_completion(&link->llc_testlink_resp);
@@ -870,23 +882,30 @@ void smc_llc_link_clear(struct smc_link *link)
 	smc_wr_wakeup_tx_wait(link);
 }
 
-/* register a new rtoken at the remote peer */
-int smc_llc_do_confirm_rkey(struct smc_link *link,
+/* register a new rtoken at the remote peer (for all links) */
+int smc_llc_do_confirm_rkey(struct smc_link *send_link,
 			    struct smc_buf_desc *rmb_desc)
 {
-	int rc;
+	struct smc_link_group *lgr = send_link->lgr;
+	struct smc_llc_qentry *qentry = NULL;
+	int rc = 0;
 
-	/* protected by mutex smc_create_lgr_pending */
-	reinit_completion(&link->llc_confirm_rkey_resp);
-	rc = smc_llc_send_confirm_rkey(link, rmb_desc);
+	rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
 	if (rc)
 		return rc;
+	rc = smc_llc_send_confirm_rkey(send_link, rmb_desc);
+	if (rc)
+		goto out;
 	/* receive CONFIRM RKEY response from server over RoCE fabric */
-	rc = wait_for_completion_interruptible_timeout(
-			&link->llc_confirm_rkey_resp, SMC_LLC_WAIT_TIME);
-	if (rc <= 0 || link->llc_confirm_rkey_resp_rc)
-		return -EFAULT;
-	return 0;
+	qentry = smc_llc_wait(lgr, send_link, SMC_LLC_WAIT_TIME,
+			      SMC_LLC_CONFIRM_RKEY);
+	if (!qentry || (qentry->msg.raw.hdr.flags & SMC_LLC_FLAG_RKEY_NEG))
+		rc = -EFAULT;
+out:
+	if (qentry)
+		smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
+	smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
+	return rc;
 }
 
 /* unregister an rtoken at the remote peer */
diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h
index 637acf91ffb7..d82d8346b61e 100644
--- a/net/smc/smc_llc.h
+++ b/net/smc/smc_llc.h
@@ -59,7 +59,7 @@ int smc_llc_link_init(struct smc_link *link);
 void smc_llc_link_active(struct smc_link *link);
 void smc_llc_link_deleting(struct smc_link *link);
 void smc_llc_link_clear(struct smc_link *link);
-int smc_llc_do_confirm_rkey(struct smc_link *link,
+int smc_llc_do_confirm_rkey(struct smc_link *send_link,
 			    struct smc_buf_desc *rmb_desc);
 int smc_llc_do_delete_rkey(struct smc_link *link,
 			   struct smc_buf_desc *rmb_desc);
-- 
cgit v1.2.3-59-g8ed1b


From 6d74c3a8a3e7a488a7d9d8c4a59091ccae72fc4c Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Thu, 30 Apr 2020 15:55:45 +0200
Subject: net/smc: multiple link support and LLC flow for
 smc_llc_do_delete_rkey

Adapt smc_llc_do_delete_rkey() to use the LLC flow and support multiple
links when deleting the rkeys for rmb buffers at the peer.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_core.c | 10 ++++------
 net/smc/smc_core.h |  3 ---
 net/smc/smc_llc.c  | 39 +++++++++++++++++++--------------------
 net/smc/smc_llc.h  |  2 +-
 4 files changed, 24 insertions(+), 30 deletions(-)

diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 4867ddcfe0c6..f71a366ed6ac 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -446,13 +446,11 @@ out:
 }
 
 static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc,
-			   struct smc_link *lnk)
+			   struct smc_link_group *lgr)
 {
-	struct smc_link_group *lgr = lnk->lgr;
-
 	if (rmb_desc->is_conf_rkey && !list_empty(&lgr->list)) {
 		/* unregister rmb with peer */
-		smc_llc_do_delete_rkey(lnk, rmb_desc);
+		smc_llc_do_delete_rkey(lgr, rmb_desc);
 		rmb_desc->is_conf_rkey = false;
 	}
 	if (rmb_desc->is_reg_err) {
@@ -475,7 +473,7 @@ static void smc_buf_unuse(struct smc_connection *conn,
 	if (conn->rmb_desc && lgr->is_smcd)
 		conn->rmb_desc->used = 0;
 	else if (conn->rmb_desc)
-		smcr_buf_unuse(conn->rmb_desc, conn->lnk);
+		smcr_buf_unuse(conn->rmb_desc, lgr);
 }
 
 /* remove a finished connection from its link group */
@@ -1169,7 +1167,6 @@ static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
 		if (!smc_link_usable(lnk))
 			continue;
 		if (smcr_buf_map_link(buf_desc, is_rmb, lnk)) {
-			smcr_buf_unuse(buf_desc, lnk);
 			rc = -ENOMEM;
 			goto out;
 		}
@@ -1275,6 +1272,7 @@ static int __smc_buf_create(struct smc_sock *smc, bool is_smcd, bool is_rmb)
 
 	if (!is_smcd) {
 		if (smcr_buf_map_usable_links(lgr, buf_desc, is_rmb)) {
+			smcr_buf_unuse(buf_desc, lgr);
 			return -ENOMEM;
 		}
 	}
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 4e0dfb1d5804..364a54e28d61 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -123,9 +123,6 @@ struct smc_link {
 	struct delayed_work	llc_testlink_wrk; /* testlink worker */
 	struct completion	llc_testlink_resp; /* wait for rx of testlink */
 	int			llc_testlink_time; /* testlink interval */
-	struct completion	llc_delete_rkey_resp; /* w4 rx of del rkey */
-	int			llc_delete_rkey_resp_rc; /* rc from del rkey */
-	struct mutex		llc_delete_rkey_mutex; /* serialize usage */
 };
 
 /* For now we just allow one parallel link per link group. The SMC protocol
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 5db11f54b4cd..f9ec270818fa 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -720,7 +720,6 @@ static void smc_llc_rx_response(struct smc_link *link,
 				struct smc_llc_qentry *qentry)
 {
 	u8 llc_type = qentry->msg.raw.hdr.common.type;
-	union smc_llc_msg *llc = &qentry->msg;
 
 	switch (llc_type) {
 	case SMC_LLC_TEST_LINK:
@@ -730,6 +729,7 @@ static void smc_llc_rx_response(struct smc_link *link,
 	case SMC_LLC_ADD_LINK:
 	case SMC_LLC_CONFIRM_LINK:
 	case SMC_LLC_CONFIRM_RKEY:
+	case SMC_LLC_DELETE_RKEY:
 		/* assign responses to the local flow, we requested them */
 		smc_llc_flow_qentry_set(&link->lgr->llc_flow_lcl, qentry);
 		wake_up_interruptible(&link->lgr->llc_waiter);
@@ -741,11 +741,6 @@ static void smc_llc_rx_response(struct smc_link *link,
 	case SMC_LLC_CONFIRM_RKEY_CONT:
 		/* unused as long as we don't send this type of msg */
 		break;
-	case SMC_LLC_DELETE_RKEY:
-		link->llc_delete_rkey_resp_rc = llc->raw.hdr.flags &
-						SMC_LLC_FLAG_RKEY_NEG;
-		complete(&link->llc_delete_rkey_resp);
-		break;
 	}
 	kfree(qentry);
 }
@@ -850,8 +845,6 @@ void smc_llc_lgr_clear(struct smc_link_group *lgr)
 
 int smc_llc_link_init(struct smc_link *link)
 {
-	init_completion(&link->llc_delete_rkey_resp);
-	mutex_init(&link->llc_delete_rkey_mutex);
 	init_completion(&link->llc_testlink_resp);
 	INIT_DELAYED_WORK(&link->llc_testlink_wrk, smc_llc_testlink_work);
 	return 0;
@@ -909,27 +902,33 @@ out:
 }
 
 /* unregister an rtoken at the remote peer */
-int smc_llc_do_delete_rkey(struct smc_link *link,
+int smc_llc_do_delete_rkey(struct smc_link_group *lgr,
 			   struct smc_buf_desc *rmb_desc)
 {
+	struct smc_llc_qentry *qentry = NULL;
+	struct smc_link *send_link;
 	int rc = 0;
 
-	mutex_lock(&link->llc_delete_rkey_mutex);
-	if (link->state != SMC_LNK_ACTIVE)
-		goto out;
-	reinit_completion(&link->llc_delete_rkey_resp);
-	rc = smc_llc_send_delete_rkey(link, rmb_desc);
+	send_link = smc_llc_usable_link(lgr);
+	if (!send_link)
+		return -ENOLINK;
+
+	rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
+	if (rc)
+		return rc;
+	/* protected by llc_flow control */
+	rc = smc_llc_send_delete_rkey(send_link, rmb_desc);
 	if (rc)
 		goto out;
 	/* receive DELETE RKEY response from server over RoCE fabric */
-	rc = wait_for_completion_interruptible_timeout(
-			&link->llc_delete_rkey_resp, SMC_LLC_WAIT_TIME);
-	if (rc <= 0 || link->llc_delete_rkey_resp_rc)
+	qentry = smc_llc_wait(lgr, send_link, SMC_LLC_WAIT_TIME,
+			      SMC_LLC_DELETE_RKEY);
+	if (!qentry || (qentry->msg.raw.hdr.flags & SMC_LLC_FLAG_RKEY_NEG))
 		rc = -EFAULT;
-	else
-		rc = 0;
 out:
-	mutex_unlock(&link->llc_delete_rkey_mutex);
+	if (qentry)
+		smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
+	smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
 	return rc;
 }
 
diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h
index d82d8346b61e..e9f23affece6 100644
--- a/net/smc/smc_llc.h
+++ b/net/smc/smc_llc.h
@@ -61,7 +61,7 @@ void smc_llc_link_deleting(struct smc_link *link);
 void smc_llc_link_clear(struct smc_link *link);
 int smc_llc_do_confirm_rkey(struct smc_link *send_link,
 			    struct smc_buf_desc *rmb_desc);
-int smc_llc_do_delete_rkey(struct smc_link *link,
+int smc_llc_do_delete_rkey(struct smc_link_group *lgr,
 			   struct smc_buf_desc *rmb_desc);
 int smc_llc_flow_initiate(struct smc_link_group *lgr,
 			  enum smc_llc_flowtype type);
-- 
cgit v1.2.3-59-g8ed1b


From 56e8091c7a098ef2257f85f16665d79cf3049da9 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Thu, 30 Apr 2020 15:55:46 +0200
Subject: net/smc: move the TEST_LINK response processing into event handler

Get rid of the extra function and move the two-liner for the TEST_LINK
response processing into the event handler function.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_llc.c | 10 ++--------
 1 file changed, 2 insertions(+), 8 deletions(-)

diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index f9ec270818fa..4945abbad111 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -563,13 +563,6 @@ static void smc_llc_rx_delete_link(struct smc_link *link,
 	smc_lgr_terminate_sched(lgr);
 }
 
-static void smc_llc_rx_test_link(struct smc_link *link,
-				 struct smc_llc_msg_test_link *llc)
-{
-	llc->hd.flags |= SMC_LLC_FLAG_RESP;
-	smc_llc_send_message(link, llc);
-}
-
 static void smc_llc_rx_confirm_rkey(struct smc_link *link,
 				    struct smc_llc_msg_confirm_rkey *llc)
 {
@@ -640,7 +633,8 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry)
 
 	switch (llc->raw.hdr.common.type) {
 	case SMC_LLC_TEST_LINK:
-		smc_llc_rx_test_link(link, &llc->test_link);
+		llc->test_link.hd.flags |= SMC_LLC_FLAG_RESP;
+		smc_llc_send_message(link, llc);
 		break;
 	case SMC_LLC_ADD_LINK:
 		if (list_empty(&lgr->list))
-- 
cgit v1.2.3-59-g8ed1b


From ba21abd22f9ffa023921923a6c01d28b59731ff8 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Thu, 30 Apr 2020 15:55:47 +0200
Subject: net/smc: new smc_rtoken_set functions for multiple link support

Introduce smc_rtoken_set() to set the rtoken for a new link to an
existing rmb whose rtoken is given, and smc_rtoken_set2() to set an
rtoken for a new link whose link_id is given.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_core.c | 47 +++++++++++++++++++++++++++++++++++++++++++++++
 net/smc/smc_core.h |  4 ++++
 2 files changed, 51 insertions(+)

diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index f71a366ed6ac..096dce92ee2b 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -1368,6 +1368,53 @@ static inline int smc_rmb_reserve_rtoken_idx(struct smc_link_group *lgr)
 	return -ENOSPC;
 }
 
+static int smc_rtoken_find_by_link(struct smc_link_group *lgr, int lnk_idx,
+				   u32 rkey)
+{
+	int i;
+
+	for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
+		if (test_bit(i, lgr->rtokens_used_mask) &&
+		    lgr->rtokens[i][lnk_idx].rkey == rkey)
+			return i;
+	}
+	return -ENOENT;
+}
+
+/* set rtoken for a new link to an existing rmb */
+void smc_rtoken_set(struct smc_link_group *lgr, int link_idx, int link_idx_new,
+		    __be32 nw_rkey_known, __be64 nw_vaddr, __be32 nw_rkey)
+{
+	int rtok_idx;
+
+	rtok_idx = smc_rtoken_find_by_link(lgr, link_idx, ntohl(nw_rkey_known));
+	if (rtok_idx == -ENOENT)
+		return;
+	lgr->rtokens[rtok_idx][link_idx_new].rkey = ntohl(nw_rkey);
+	lgr->rtokens[rtok_idx][link_idx_new].dma_addr = be64_to_cpu(nw_vaddr);
+}
+
+/* set rtoken for a new link whose link_id is given */
+void smc_rtoken_set2(struct smc_link_group *lgr, int rtok_idx, int link_id,
+		     __be64 nw_vaddr, __be32 nw_rkey)
+{
+	u64 dma_addr = be64_to_cpu(nw_vaddr);
+	u32 rkey = ntohl(nw_rkey);
+	bool found = false;
+	int link_idx;
+
+	for (link_idx = 0; link_idx < SMC_LINKS_PER_LGR_MAX; link_idx++) {
+		if (lgr->lnk[link_idx].link_id == link_id) {
+			found = true;
+			break;
+		}
+	}
+	if (!found)
+		return;
+	lgr->rtokens[rtok_idx][link_idx].rkey = rkey;
+	lgr->rtokens[rtok_idx][link_idx].dma_addr = dma_addr;
+}
+
 /* add a new rtoken from peer */
 int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey)
 {
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 364a54e28d61..0be26386057f 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -352,6 +352,10 @@ int smc_rmb_rtoken_handling(struct smc_connection *conn, struct smc_link *link,
 			    struct smc_clc_msg_accept_confirm *clc);
 int smc_rtoken_add(struct smc_link *lnk, __be64 nw_vaddr, __be32 nw_rkey);
 int smc_rtoken_delete(struct smc_link *lnk, __be32 nw_rkey);
+void smc_rtoken_set(struct smc_link_group *lgr, int link_idx, int link_idx_new,
+		    __be32 nw_rkey_known, __be64 nw_vaddr, __be32 nw_rkey);
+void smc_rtoken_set2(struct smc_link_group *lgr, int rtok_idx, int link_id,
+		     __be64 nw_vaddr, __be32 nw_rkey);
 void smc_sndbuf_sync_sg_for_cpu(struct smc_connection *conn);
 void smc_sndbuf_sync_sg_for_device(struct smc_connection *conn);
 void smc_rmb_sync_sg_for_cpu(struct smc_connection *conn);
-- 
cgit v1.2.3-59-g8ed1b


From 3bc67e098c3e215f6e09ba3c0e1f569e7ae020d0 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Thu, 30 Apr 2020 15:55:48 +0200
Subject: net/smc: adapt SMC remote CONFIRM_RKEY processing to use the LLC flow

Use the LLC flow framework for the processing of CONFIRM_RKEY messages
that were received from the peer.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_llc.c | 56 ++++++++++++++++++++++++++++++++++++++++---------------
 1 file changed, 41 insertions(+), 15 deletions(-)

diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 4945abbad111..b7b5cc01b78e 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -105,6 +105,7 @@ struct smc_llc_msg_confirm_rkey_cont {	/* type 0x08 */
 };
 
 #define SMC_LLC_DEL_RKEY_MAX	8
+#define SMC_LLC_FLAG_RKEY_RETRY	0x10
 #define SMC_LLC_FLAG_RKEY_NEG	0x20
 
 struct smc_llc_msg_delete_rkey {	/* type 0x09 */
@@ -563,21 +564,41 @@ static void smc_llc_rx_delete_link(struct smc_link *link,
 	smc_lgr_terminate_sched(lgr);
 }
 
-static void smc_llc_rx_confirm_rkey(struct smc_link *link,
-				    struct smc_llc_msg_confirm_rkey *llc)
+/* process a confirm_rkey request from peer, remote flow */
+static void smc_llc_rmt_conf_rkey(struct smc_link_group *lgr)
 {
-	int rc;
-
-	rc = smc_rtoken_add(link,
-			    llc->rtoken[0].rmb_vaddr,
-			    llc->rtoken[0].rmb_key);
-
-	/* ignore rtokens for other links, we have only one link */
-
+	struct smc_llc_msg_confirm_rkey *llc;
+	struct smc_llc_qentry *qentry;
+	struct smc_link *link;
+	int num_entries;
+	int rk_idx;
+	int i;
+
+	qentry = lgr->llc_flow_rmt.qentry;
+	llc = &qentry->msg.confirm_rkey;
+	link = qentry->link;
+
+	num_entries = llc->rtoken[0].num_rkeys;
+	/* first rkey entry is for receiving link */
+	rk_idx = smc_rtoken_add(link,
+				llc->rtoken[0].rmb_vaddr,
+				llc->rtoken[0].rmb_key);
+	if (rk_idx < 0)
+		goto out_err;
+
+	for (i = 1; i <= min_t(u8, num_entries, SMC_LLC_RKEYS_PER_MSG - 1); i++)
+		smc_rtoken_set2(lgr, rk_idx, llc->rtoken[i].link_id,
+				llc->rtoken[i].rmb_vaddr,
+				llc->rtoken[i].rmb_key);
+	/* max links is 3 so there is no need to support conf_rkey_cont msgs */
+	goto out;
+out_err:
+	llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
+	llc->hd.flags |= SMC_LLC_FLAG_RKEY_RETRY;
+out:
 	llc->hd.flags |= SMC_LLC_FLAG_RESP;
-	if (rc < 0)
-		llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
-	smc_llc_send_message(link, llc);
+	smc_llc_send_message(link, &qentry->msg);
+	smc_llc_flow_qentry_del(&lgr->llc_flow_rmt);
 }
 
 static void smc_llc_rx_confirm_rkey_cont(struct smc_link *link,
@@ -666,8 +687,13 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry)
 		smc_llc_rx_delete_link(link, &llc->delete_link);
 		break;
 	case SMC_LLC_CONFIRM_RKEY:
-		smc_llc_rx_confirm_rkey(link, &llc->confirm_rkey);
-		break;
+		/* new request from remote, assign to remote flow */
+		if (smc_llc_flow_start(&lgr->llc_flow_rmt, qentry)) {
+			/* process here, does not wait for more llc msgs */
+			smc_llc_rmt_conf_rkey(lgr);
+			smc_llc_flow_stop(lgr, &lgr->llc_flow_rmt);
+		}
+		return;
 	case SMC_LLC_CONFIRM_RKEY_CONT:
 		smc_llc_rx_confirm_rkey_cont(link, &llc->confirm_rkey_cont);
 		break;
-- 
cgit v1.2.3-59-g8ed1b


From 218b24fe381238941a06496eaf221a22c5935267 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Thu, 30 Apr 2020 15:55:49 +0200
Subject: net/smc: adapt SMC remote DELETE_RKEY processing to use the LLC flow

Use the LLC flow framework for the processing of DELETE_RKEY messages
that were received from the peer.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_llc.c | 37 ++++++++++++++++++++++++-------------
 1 file changed, 24 insertions(+), 13 deletions(-)

diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index b7b5cc01b78e..e458207bde9e 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -601,31 +601,37 @@ out:
 	smc_llc_flow_qentry_del(&lgr->llc_flow_rmt);
 }
 
-static void smc_llc_rx_confirm_rkey_cont(struct smc_link *link,
-				      struct smc_llc_msg_confirm_rkey_cont *llc)
-{
-	/* ignore rtokens for other links, we have only one link */
-	llc->hd.flags |= SMC_LLC_FLAG_RESP;
-	smc_llc_send_message(link, llc);
-}
-
-static void smc_llc_rx_delete_rkey(struct smc_link *link,
-				   struct smc_llc_msg_delete_rkey *llc)
+/* process a delete_rkey request from peer, remote flow */
+static void smc_llc_rmt_delete_rkey(struct smc_link_group *lgr)
 {
+	struct smc_llc_msg_delete_rkey *llc;
+	struct smc_llc_qentry *qentry;
+	struct smc_link *link;
 	u8 err_mask = 0;
 	int i, max;
 
+	qentry = lgr->llc_flow_rmt.qentry;
+	llc = &qentry->msg.delete_rkey;
+	link = qentry->link;
+
 	max = min_t(u8, llc->num_rkeys, SMC_LLC_DEL_RKEY_MAX);
 	for (i = 0; i < max; i++) {
 		if (smc_rtoken_delete(link, llc->rkey[i]))
 			err_mask |= 1 << (SMC_LLC_DEL_RKEY_MAX - 1 - i);
 	}
-
 	if (err_mask) {
 		llc->hd.flags |= SMC_LLC_FLAG_RKEY_NEG;
 		llc->err_mask = err_mask;
 	}
+	llc->hd.flags |= SMC_LLC_FLAG_RESP;
+	smc_llc_send_message(link, &qentry->msg);
+	smc_llc_flow_qentry_del(&lgr->llc_flow_rmt);
+}
 
+static void smc_llc_rx_confirm_rkey_cont(struct smc_link *link,
+				      struct smc_llc_msg_confirm_rkey_cont *llc)
+{
+	/* ignore rtokens for other links, we have only one link */
 	llc->hd.flags |= SMC_LLC_FLAG_RESP;
 	smc_llc_send_message(link, llc);
 }
@@ -698,8 +704,13 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry)
 		smc_llc_rx_confirm_rkey_cont(link, &llc->confirm_rkey_cont);
 		break;
 	case SMC_LLC_DELETE_RKEY:
-		smc_llc_rx_delete_rkey(link, &llc->delete_rkey);
-		break;
+		/* new request from remote, assign to remote flow */
+		if (smc_llc_flow_start(&lgr->llc_flow_rmt, qentry)) {
+			/* process here, does not wait for more llc msgs */
+			smc_llc_rmt_delete_rkey(lgr);
+			smc_llc_flow_stop(lgr, &lgr->llc_flow_rmt);
+		}
+		return;
 	}
 out:
 	kfree(qentry);
-- 
cgit v1.2.3-59-g8ed1b


From 42d18acce9e29b61f5dbfc5118d7c72093e703a1 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Thu, 30 Apr 2020 15:55:50 +0200
Subject: net/smc: remove handling of CONFIRM_RKEY_CONTINUE

The new SMC-R multiple link support will support a maximum of 3 links,
and one CONFIRM_RKEY LLC message can transport 3 rkeys of an rmb buffer.
There is no need for the LLC message type CONFIRM_RKEY_CONTINUE which is
needed when more than 3 rkeys per rmb buffer needs to be exchanged.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_llc.c | 21 ++++-----------------
 1 file changed, 4 insertions(+), 17 deletions(-)

diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index e458207bde9e..92c9a8a8aaf9 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -98,12 +98,6 @@ struct smc_llc_msg_confirm_rkey {	/* type 0x06 */
 	u8 reserved;
 };
 
-struct smc_llc_msg_confirm_rkey_cont {	/* type 0x08 */
-	struct smc_llc_hdr hd;
-	u8 num_rkeys;
-	struct smc_rmb_rtoken rtoken[SMC_LLC_RKEYS_PER_MSG];
-};
-
 #define SMC_LLC_DEL_RKEY_MAX	8
 #define SMC_LLC_FLAG_RKEY_RETRY	0x10
 #define SMC_LLC_FLAG_RKEY_NEG	0x20
@@ -123,7 +117,6 @@ union smc_llc_msg {
 	struct smc_llc_msg_del_link delete_link;
 
 	struct smc_llc_msg_confirm_rkey confirm_rkey;
-	struct smc_llc_msg_confirm_rkey_cont confirm_rkey_cont;
 	struct smc_llc_msg_delete_rkey delete_rkey;
 
 	struct smc_llc_msg_test_link test_link;
@@ -628,14 +621,6 @@ static void smc_llc_rmt_delete_rkey(struct smc_link_group *lgr)
 	smc_llc_flow_qentry_del(&lgr->llc_flow_rmt);
 }
 
-static void smc_llc_rx_confirm_rkey_cont(struct smc_link *link,
-				      struct smc_llc_msg_confirm_rkey_cont *llc)
-{
-	/* ignore rtokens for other links, we have only one link */
-	llc->hd.flags |= SMC_LLC_FLAG_RESP;
-	smc_llc_send_message(link, llc);
-}
-
 /* flush the llc event queue */
 static void smc_llc_event_flush(struct smc_link_group *lgr)
 {
@@ -701,7 +686,9 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry)
 		}
 		return;
 	case SMC_LLC_CONFIRM_RKEY_CONT:
-		smc_llc_rx_confirm_rkey_cont(link, &llc->confirm_rkey_cont);
+		/* not used because max links is 3, and 3 rkeys fit into
+		 * one CONFIRM_RKEY message
+		 */
 		break;
 	case SMC_LLC_DELETE_RKEY:
 		/* new request from remote, assign to remote flow */
@@ -770,7 +757,7 @@ static void smc_llc_rx_response(struct smc_link *link,
 			smc_lgr_schedule_free_work_fast(link->lgr);
 		break;
 	case SMC_LLC_CONFIRM_RKEY_CONT:
-		/* unused as long as we don't send this type of msg */
+		/* not used because max links is 3 */
 		break;
 	}
 	kfree(qentry);
-- 
cgit v1.2.3-59-g8ed1b


From 41a211d862242439c9cdb2481946bb0928760541 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Thu, 30 Apr 2020 15:55:51 +0200
Subject: net/smc: remove obsolete link state DELETING

The connection layer in af_smc.c is now using the new LLC flow
framework, which made the link state DELETING obsolete. Remove the state
and the respective helpers.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_core.c | 4 +---
 net/smc/smc_core.h | 1 -
 net/smc/smc_llc.c  | 7 -------
 net/smc/smc_llc.h  | 1 -
 4 files changed, 1 insertion(+), 12 deletions(-)

diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 096dce92ee2b..3539ceef9a97 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -200,7 +200,6 @@ static int smcr_link_send_delete(struct smc_link *lnk, bool orderly)
 {
 	if (lnk->state == SMC_LNK_ACTIVE &&
 	    !smc_llc_send_delete_link(lnk, SMC_LLC_REQ, orderly)) {
-		smc_llc_link_deleting(lnk);
 		return 0;
 	}
 	return -ENOTCONN;
@@ -767,8 +766,7 @@ void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport)
 			continue;
 		/* tbd - terminate only when no more links are active */
 		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
-			if (!smc_link_usable(&lgr->lnk[i]) ||
-			    lgr->lnk[i].state == SMC_LNK_DELETING)
+			if (!smc_link_usable(&lgr->lnk[i]))
 				continue;
 			if (lgr->lnk[i].smcibdev == smcibdev &&
 			    lgr->lnk[i].ibport == ibport) {
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 0be26386057f..f12474cc666c 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -36,7 +36,6 @@ enum smc_link_state {			/* possible states of a link */
 	SMC_LNK_INACTIVE,	/* link is inactive */
 	SMC_LNK_ACTIVATING,	/* link is being activated */
 	SMC_LNK_ACTIVE,		/* link is active */
-	SMC_LNK_DELETING,	/* link is being deleted */
 };
 
 #define SMC_WR_BUF_SIZE		48	/* size of work request buffer */
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 92c9a8a8aaf9..327cf30b98cc 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -545,7 +545,6 @@ static void smc_llc_rx_delete_link(struct smc_link *link,
 	struct smc_link_group *lgr = smc_get_lgr(link);
 
 	smc_lgr_forget(lgr);
-	smc_llc_link_deleting(link);
 	if (lgr->role == SMC_SERV) {
 		/* client asks to delete this link, send request */
 		smc_llc_prep_delete_link(llc, link, SMC_LLC_REQ, true);
@@ -878,12 +877,6 @@ void smc_llc_link_active(struct smc_link *link)
 	}
 }
 
-void smc_llc_link_deleting(struct smc_link *link)
-{
-	link->state = SMC_LNK_DELETING;
-	smc_wr_wakeup_tx_wait(link);
-}
-
 /* called in worker context */
 void smc_llc_link_clear(struct smc_link *link)
 {
diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h
index e9f23affece6..48029a5e14c3 100644
--- a/net/smc/smc_llc.h
+++ b/net/smc/smc_llc.h
@@ -57,7 +57,6 @@ void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc);
 void smc_llc_lgr_clear(struct smc_link_group *lgr);
 int smc_llc_link_init(struct smc_link *link);
 void smc_llc_link_active(struct smc_link *link);
-void smc_llc_link_deleting(struct smc_link *link);
 void smc_llc_link_clear(struct smc_link *link);
 int smc_llc_do_confirm_rkey(struct smc_link *send_link,
 			    struct smc_buf_desc *rmb_desc);
-- 
cgit v1.2.3-59-g8ed1b


From 801eb0501824da196c7b1c18c453528457308c5a Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 30 Apr 2020 18:02:48 +0300
Subject: stmmac: intel: Fix kernel crash due to wrong error path

Unfortunately sometimes ->probe() may fail. The commit b9663b7ca6ff
("net: stmmac: Enable SERDES power up/down sequence")
messed up with error handling and thus:

[   12.811311] ------------[ cut here ]------------
[   12.811993] kernel BUG at net/core/dev.c:9937!

Fix this by properly crafted error path.

Fixes: b9663b7ca6ff ("net: stmmac: Enable SERDES power up/down sequence")
Cc: Voon Weifeng <weifeng.voon@intel.com>
Cc: Ong Boon Leong <boon.leong.ong@intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index 565da6498c84..ff22f274aa43 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -4991,7 +4991,7 @@ int stmmac_dvr_probe(struct device *device,
 						 priv->plat->bsp_priv);
 
 		if (ret < 0)
-			return ret;
+			goto error_serdes_powerup;
 	}
 
 #ifdef CONFIG_DEBUG_FS
@@ -5000,6 +5000,8 @@ int stmmac_dvr_probe(struct device *device,
 
 	return ret;
 
+error_serdes_powerup:
+	unregister_netdev(ndev);
 error_netdev_register:
 	phylink_destroy(priv->phylink);
 error_phy_setup:
-- 
cgit v1.2.3-59-g8ed1b


From 09f012e64e4b8126ed6f02d0a85a57c3a0465cf9 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 30 Apr 2020 18:02:49 +0300
Subject: stmmac: intel: Fix clock handling on error and remove paths

clk_prepare_enable() might fail, we have to check its returned value.
Besides that we have to call clk_disable_unprepare() on the error and
remove paths. Do above in the dwmac-intel driver.

While at it, remove leftover in stmmac_pci and remove unneeded condition
for NULL-aware clk_unregister_fixed_rate() call.

Fixes: 58da0cfa6cf1 ("net: stmmac: create dwmac-intel.c to contain all Intel platform")
Cc: Voon Weifeng <weifeng.voon@intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 20 ++++++++++++++++----
 drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c  |  5 -----
 2 files changed, 16 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index 2e4aaedb93f5..d163c4b43da0 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -252,6 +252,7 @@ static void common_default_data(struct plat_stmmacenet_data *plat)
 static int intel_mgbe_common_data(struct pci_dev *pdev,
 				  struct plat_stmmacenet_data *plat)
 {
+	int ret;
 	int i;
 
 	plat->clk_csr = 5;
@@ -324,7 +325,12 @@ static int intel_mgbe_common_data(struct pci_dev *pdev,
 		dev_warn(&pdev->dev, "Fail to register stmmac-clk\n");
 		plat->stmmac_clk = NULL;
 	}
-	clk_prepare_enable(plat->stmmac_clk);
+
+	ret = clk_prepare_enable(plat->stmmac_clk);
+	if (ret) {
+		clk_unregister_fixed_rate(plat->stmmac_clk);
+		return ret;
+	}
 
 	/* Set default value for multicast hash bins */
 	plat->multicast_filter_bins = HASH_TABLE_SIZE;
@@ -657,7 +663,13 @@ static int intel_eth_pci_probe(struct pci_dev *pdev,
 	res.wol_irq = pdev->irq;
 	res.irq = pdev->irq;
 
-	return stmmac_dvr_probe(&pdev->dev, plat, &res);
+	ret = stmmac_dvr_probe(&pdev->dev, plat, &res);
+	if (ret) {
+		clk_disable_unprepare(plat->stmmac_clk);
+		clk_unregister_fixed_rate(plat->stmmac_clk);
+	}
+
+	return ret;
 }
 
 /**
@@ -675,8 +687,8 @@ static void intel_eth_pci_remove(struct pci_dev *pdev)
 
 	stmmac_dvr_remove(&pdev->dev);
 
-	if (priv->plat->stmmac_clk)
-		clk_unregister_fixed_rate(priv->plat->stmmac_clk);
+	clk_disable_unprepare(priv->plat->stmmac_clk);
+	clk_unregister_fixed_rate(priv->plat->stmmac_clk);
 
 	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
 		if (pci_resource_len(pdev, i) == 0)
diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
index 3fb21f7ac9fb..272cb47af9f2 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_pci.c
@@ -217,15 +217,10 @@ static int stmmac_pci_probe(struct pci_dev *pdev,
  */
 static void stmmac_pci_remove(struct pci_dev *pdev)
 {
-	struct net_device *ndev = dev_get_drvdata(&pdev->dev);
-	struct stmmac_priv *priv = netdev_priv(ndev);
 	int i;
 
 	stmmac_dvr_remove(&pdev->dev);
 
-	if (priv->plat->stmmac_clk)
-		clk_unregister_fixed_rate(priv->plat->stmmac_clk);
-
 	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
 		if (pci_resource_len(pdev, i) == 0)
 			continue;
-- 
cgit v1.2.3-59-g8ed1b


From e578f043ffcf8b4f0999ab9e9194f156e90c7fd3 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 30 Apr 2020 18:02:50 +0300
Subject: stmmac: intel: Remove unnecessary loop for PCI BARs

Copy'n'paste without thinking is not a good idea and in this case it brought
unnecessary loop over PCI BAR resources which was needed to workaround one of
STMicro RVP boards. Remove unnecessary loops from Intel driver.

Fixes: 58da0cfa6cf1 ("net: stmmac: create dwmac-intel.c to contain all Intel platform")
Cc: Voon Weifeng <weifeng.voon@intel.com>
Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 23 +++++------------------
 1 file changed, 5 insertions(+), 18 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index d163c4b43da0..e9f948559499 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -606,7 +606,6 @@ static int intel_eth_pci_probe(struct pci_dev *pdev,
 	struct intel_priv_data *intel_priv;
 	struct plat_stmmacenet_data *plat;
 	struct stmmac_resources res;
-	int i;
 	int ret;
 
 	intel_priv = devm_kzalloc(&pdev->dev, sizeof(*intel_priv),
@@ -637,15 +636,9 @@ static int intel_eth_pci_probe(struct pci_dev *pdev,
 		return ret;
 	}
 
-	/* Get the base address of device */
-	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
-		if (pci_resource_len(pdev, i) == 0)
-			continue;
-		ret = pcim_iomap_regions(pdev, BIT(i), pci_name(pdev));
-		if (ret)
-			return ret;
-		break;
-	}
+	ret = pcim_iomap_regions(pdev, BIT(0), pci_name(pdev));
+	if (ret)
+		return ret;
 
 	pci_set_master(pdev);
 
@@ -659,7 +652,7 @@ static int intel_eth_pci_probe(struct pci_dev *pdev,
 	pci_enable_msi(pdev);
 
 	memset(&res, 0, sizeof(res));
-	res.addr = pcim_iomap_table(pdev)[i];
+	res.addr = pcim_iomap_table(pdev)[0];
 	res.wol_irq = pdev->irq;
 	res.irq = pdev->irq;
 
@@ -683,19 +676,13 @@ static void intel_eth_pci_remove(struct pci_dev *pdev)
 {
 	struct net_device *ndev = dev_get_drvdata(&pdev->dev);
 	struct stmmac_priv *priv = netdev_priv(ndev);
-	int i;
 
 	stmmac_dvr_remove(&pdev->dev);
 
 	clk_disable_unprepare(priv->plat->stmmac_clk);
 	clk_unregister_fixed_rate(priv->plat->stmmac_clk);
 
-	for (i = 0; i < PCI_STD_NUM_BARS; i++) {
-		if (pci_resource_len(pdev, i) == 0)
-			continue;
-		pcim_iounmap_regions(pdev, BIT(i));
-		break;
-	}
+	pcim_iounmap_regions(pdev, BIT(0));
 
 	pci_disable_device(pdev);
 }
-- 
cgit v1.2.3-59-g8ed1b


From 52c1f794845411c39ee6deedb443894ca141b4a7 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 30 Apr 2020 18:02:51 +0300
Subject: stmmac: intel: Convert to use pci_alloc_irq_vectors() API

pci_enable_msi() is deprecated API, thus, switch to modern
pci_alloc_irq_vectors().

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index e9f948559499..bb8bf31c1259 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -649,15 +649,18 @@ static int intel_eth_pci_probe(struct pci_dev *pdev,
 	if (ret)
 		return ret;
 
-	pci_enable_msi(pdev);
+	ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES);
+	if (ret < 0)
+		return ret;
 
 	memset(&res, 0, sizeof(res));
 	res.addr = pcim_iomap_table(pdev)[0];
-	res.wol_irq = pdev->irq;
-	res.irq = pdev->irq;
+	res.wol_irq = pci_irq_vector(pdev, 0);
+	res.irq = pci_irq_vector(pdev, 0);
 
 	ret = stmmac_dvr_probe(&pdev->dev, plat, &res);
 	if (ret) {
+		pci_free_irq_vectors(pdev);
 		clk_disable_unprepare(plat->stmmac_clk);
 		clk_unregister_fixed_rate(plat->stmmac_clk);
 	}
@@ -679,6 +682,8 @@ static void intel_eth_pci_remove(struct pci_dev *pdev)
 
 	stmmac_dvr_remove(&pdev->dev);
 
+	pci_free_irq_vectors(pdev);
+
 	clk_disable_unprepare(priv->plat->stmmac_clk);
 	clk_unregister_fixed_rate(priv->plat->stmmac_clk);
 
-- 
cgit v1.2.3-59-g8ed1b


From d5383b0376643245d82230d8a974edd193ec900c Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 30 Apr 2020 18:02:52 +0300
Subject: stmmac: intel: Eliminate useless conditions and variables

There are useless conditions like

	func()
	{
		...
		int ret;
		...
		ret = foo();
		if (ret)
			return ret;

		return 0;
	}

which may be replaced with direct return statement, what we have done here.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 14 ++------------
 1 file changed, 2 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index bb8bf31c1259..b0d735e4c13c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -347,16 +347,11 @@ static int intel_mgbe_common_data(struct pci_dev *pdev,
 static int ehl_common_data(struct pci_dev *pdev,
 			   struct plat_stmmacenet_data *plat)
 {
-	int ret;
-
 	plat->rx_queues_to_use = 8;
 	plat->tx_queues_to_use = 8;
 	plat->clk_ptp_rate = 200000000;
-	ret = intel_mgbe_common_data(pdev, plat);
-	if (ret)
-		return ret;
 
-	return 0;
+	return intel_mgbe_common_data(pdev, plat);
 }
 
 static int ehl_sgmii_data(struct pci_dev *pdev,
@@ -457,16 +452,11 @@ static struct stmmac_pci_info ehl_pse1_sgmii1g_pci_info = {
 static int tgl_common_data(struct pci_dev *pdev,
 			   struct plat_stmmacenet_data *plat)
 {
-	int ret;
-
 	plat->rx_queues_to_use = 6;
 	plat->tx_queues_to_use = 4;
 	plat->clk_ptp_rate = 200000000;
-	ret = intel_mgbe_common_data(pdev, plat);
-	if (ret)
-		return ret;
 
-	return 0;
+	return intel_mgbe_common_data(pdev, plat);
 }
 
 static int tgl_sgmii_data(struct pci_dev *pdev,
-- 
cgit v1.2.3-59-g8ed1b


From ccacb703b0f8141757fba2931a2a7202b54194d0 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 30 Apr 2020 18:02:53 +0300
Subject: stmmac: intel: Fix indentation to put on one line affected code

There is no competition to get more LOCs into the kernel, and driver can look
better and have improved readability without those additional line breaks.

While at it, shorten info structures that they are all PCI, at the end it's
a PCI driver for Intel hardware.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c | 92 ++++++++---------------
 1 file changed, 32 insertions(+), 60 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
index b0d735e4c13c..2ac9dfb3462c 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-intel.c
@@ -83,13 +83,9 @@ static int intel_serdes_powerup(struct net_device *ndev, void *priv_data)
 	serdes_phy_addr = intel_priv->mdio_adhoc_addr;
 
 	/* assert clk_req */
-	data = mdiobus_read(priv->mii, serdes_phy_addr,
-			    SERDES_GCR0);
-
+	data = mdiobus_read(priv->mii, serdes_phy_addr, SERDES_GCR0);
 	data |= SERDES_PLL_CLK;
-
-	mdiobus_write(priv->mii, serdes_phy_addr,
-		      SERDES_GCR0, data);
+	mdiobus_write(priv->mii, serdes_phy_addr, SERDES_GCR0, data);
 
 	/* check for clk_ack assertion */
 	data = serdes_status_poll(priv, serdes_phy_addr,
@@ -103,13 +99,9 @@ static int intel_serdes_powerup(struct net_device *ndev, void *priv_data)
 	}
 
 	/* assert lane reset */
-	data = mdiobus_read(priv->mii, serdes_phy_addr,
-			    SERDES_GCR0);
-
+	data = mdiobus_read(priv->mii, serdes_phy_addr, SERDES_GCR0);
 	data |= SERDES_RST;
-
-	mdiobus_write(priv->mii, serdes_phy_addr,
-		      SERDES_GCR0, data);
+	mdiobus_write(priv->mii, serdes_phy_addr, SERDES_GCR0, data);
 
 	/* check for assert lane reset reflection */
 	data = serdes_status_poll(priv, serdes_phy_addr,
@@ -123,14 +115,12 @@ static int intel_serdes_powerup(struct net_device *ndev, void *priv_data)
 	}
 
 	/*  move power state to P0 */
-	data = mdiobus_read(priv->mii, serdes_phy_addr,
-			    SERDES_GCR0);
+	data = mdiobus_read(priv->mii, serdes_phy_addr, SERDES_GCR0);
 
 	data &= ~SERDES_PWR_ST_MASK;
 	data |= SERDES_PWR_ST_P0 << SERDES_PWR_ST_SHIFT;
 
-	mdiobus_write(priv->mii, serdes_phy_addr,
-		      SERDES_GCR0, data);
+	mdiobus_write(priv->mii, serdes_phy_addr, SERDES_GCR0, data);
 
 	/* Check for P0 state */
 	data = serdes_status_poll(priv, serdes_phy_addr,
@@ -159,14 +149,12 @@ static void intel_serdes_powerdown(struct net_device *ndev, void *intel_data)
 	serdes_phy_addr = intel_priv->mdio_adhoc_addr;
 
 	/*  move power state to P3 */
-	data = mdiobus_read(priv->mii, serdes_phy_addr,
-			    SERDES_GCR0);
+	data = mdiobus_read(priv->mii, serdes_phy_addr, SERDES_GCR0);
 
 	data &= ~SERDES_PWR_ST_MASK;
 	data |= SERDES_PWR_ST_P3 << SERDES_PWR_ST_SHIFT;
 
-	mdiobus_write(priv->mii, serdes_phy_addr,
-		      SERDES_GCR0, data);
+	mdiobus_write(priv->mii, serdes_phy_addr, SERDES_GCR0, data);
 
 	/* Check for P3 state */
 	data = serdes_status_poll(priv, serdes_phy_addr,
@@ -180,13 +168,9 @@ static void intel_serdes_powerdown(struct net_device *ndev, void *intel_data)
 	}
 
 	/* de-assert clk_req */
-	data = mdiobus_read(priv->mii, serdes_phy_addr,
-			    SERDES_GCR0);
-
+	data = mdiobus_read(priv->mii, serdes_phy_addr, SERDES_GCR0);
 	data &= ~SERDES_PLL_CLK;
-
-	mdiobus_write(priv->mii, serdes_phy_addr,
-		      SERDES_GCR0, data);
+	mdiobus_write(priv->mii, serdes_phy_addr, SERDES_GCR0, data);
 
 	/* check for clk_ack de-assert */
 	data = serdes_status_poll(priv, serdes_phy_addr,
@@ -200,13 +184,9 @@ static void intel_serdes_powerdown(struct net_device *ndev, void *intel_data)
 	}
 
 	/* de-assert lane reset */
-	data = mdiobus_read(priv->mii, serdes_phy_addr,
-			    SERDES_GCR0);
-
+	data = mdiobus_read(priv->mii, serdes_phy_addr, SERDES_GCR0);
 	data &= ~SERDES_RST;
-
-	mdiobus_write(priv->mii, serdes_phy_addr,
-		      SERDES_GCR0, data);
+	mdiobus_write(priv->mii, serdes_phy_addr, SERDES_GCR0, data);
 
 	/* check for de-assert lane reset reflection */
 	data = serdes_status_poll(priv, serdes_phy_addr,
@@ -367,7 +347,7 @@ static int ehl_sgmii_data(struct pci_dev *pdev,
 	return ehl_common_data(pdev, plat);
 }
 
-static struct stmmac_pci_info ehl_sgmii1g_pci_info = {
+static struct stmmac_pci_info ehl_sgmii1g_info = {
 	.setup = ehl_sgmii_data,
 };
 
@@ -381,7 +361,7 @@ static int ehl_rgmii_data(struct pci_dev *pdev,
 	return ehl_common_data(pdev, plat);
 }
 
-static struct stmmac_pci_info ehl_rgmii1g_pci_info = {
+static struct stmmac_pci_info ehl_rgmii1g_info = {
 	.setup = ehl_rgmii_data,
 };
 
@@ -400,7 +380,7 @@ static int ehl_pse0_rgmii1g_data(struct pci_dev *pdev,
 	return ehl_pse0_common_data(pdev, plat);
 }
 
-static struct stmmac_pci_info ehl_pse0_rgmii1g_pci_info = {
+static struct stmmac_pci_info ehl_pse0_rgmii1g_info = {
 	.setup = ehl_pse0_rgmii1g_data,
 };
 
@@ -413,7 +393,7 @@ static int ehl_pse0_sgmii1g_data(struct pci_dev *pdev,
 	return ehl_pse0_common_data(pdev, plat);
 }
 
-static struct stmmac_pci_info ehl_pse0_sgmii1g_pci_info = {
+static struct stmmac_pci_info ehl_pse0_sgmii1g_info = {
 	.setup = ehl_pse0_sgmii1g_data,
 };
 
@@ -432,7 +412,7 @@ static int ehl_pse1_rgmii1g_data(struct pci_dev *pdev,
 	return ehl_pse1_common_data(pdev, plat);
 }
 
-static struct stmmac_pci_info ehl_pse1_rgmii1g_pci_info = {
+static struct stmmac_pci_info ehl_pse1_rgmii1g_info = {
 	.setup = ehl_pse1_rgmii1g_data,
 };
 
@@ -445,7 +425,7 @@ static int ehl_pse1_sgmii1g_data(struct pci_dev *pdev,
 	return ehl_pse1_common_data(pdev, plat);
 }
 
-static struct stmmac_pci_info ehl_pse1_sgmii1g_pci_info = {
+static struct stmmac_pci_info ehl_pse1_sgmii1g_info = {
 	.setup = ehl_pse1_sgmii1g_data,
 };
 
@@ -470,7 +450,7 @@ static int tgl_sgmii_data(struct pci_dev *pdev,
 	return tgl_common_data(pdev, plat);
 }
 
-static struct stmmac_pci_info tgl_sgmii1g_pci_info = {
+static struct stmmac_pci_info tgl_sgmii1g_info = {
 	.setup = tgl_sgmii_data,
 };
 
@@ -573,7 +553,7 @@ static int quark_default_data(struct pci_dev *pdev,
 	return 0;
 }
 
-static const struct stmmac_pci_info quark_pci_info = {
+static const struct stmmac_pci_info quark_info = {
 	.setup = quark_default_data,
 };
 
@@ -598,8 +578,7 @@ static int intel_eth_pci_probe(struct pci_dev *pdev,
 	struct stmmac_resources res;
 	int ret;
 
-	intel_priv = devm_kzalloc(&pdev->dev, sizeof(*intel_priv),
-				  GFP_KERNEL);
+	intel_priv = devm_kzalloc(&pdev->dev, sizeof(*intel_priv), GFP_KERNEL);
 	if (!intel_priv)
 		return -ENOMEM;
 
@@ -736,26 +715,19 @@ static SIMPLE_DEV_PM_OPS(intel_eth_pm_ops, intel_eth_pci_suspend,
 #define PCI_DEVICE_ID_INTEL_TGL_SGMII1G_ID		0xa0ac
 
 static const struct pci_device_id intel_eth_pci_id_table[] = {
-	{ PCI_DEVICE_DATA(INTEL, QUARK_ID, &quark_pci_info) },
-	{ PCI_DEVICE_DATA(INTEL, EHL_RGMII1G_ID, &ehl_rgmii1g_pci_info) },
-	{ PCI_DEVICE_DATA(INTEL, EHL_SGMII1G_ID, &ehl_sgmii1g_pci_info) },
-	{ PCI_DEVICE_DATA(INTEL, EHL_SGMII2G5_ID, &ehl_sgmii1g_pci_info) },
-	{ PCI_DEVICE_DATA(INTEL, EHL_PSE0_RGMII1G_ID,
-			  &ehl_pse0_rgmii1g_pci_info) },
-	{ PCI_DEVICE_DATA(INTEL, EHL_PSE0_SGMII1G_ID,
-			  &ehl_pse0_sgmii1g_pci_info) },
-	{ PCI_DEVICE_DATA(INTEL, EHL_PSE0_SGMII2G5_ID,
-			  &ehl_pse0_sgmii1g_pci_info) },
-	{ PCI_DEVICE_DATA(INTEL, EHL_PSE1_RGMII1G_ID,
-			  &ehl_pse1_rgmii1g_pci_info) },
-	{ PCI_DEVICE_DATA(INTEL, EHL_PSE1_SGMII1G_ID,
-			  &ehl_pse1_sgmii1g_pci_info) },
-	{ PCI_DEVICE_DATA(INTEL, EHL_PSE1_SGMII2G5_ID,
-			  &ehl_pse1_sgmii1g_pci_info) },
-	{ PCI_DEVICE_DATA(INTEL, TGL_SGMII1G_ID, &tgl_sgmii1g_pci_info) },
+	{ PCI_DEVICE_DATA(INTEL, QUARK_ID, &quark_info) },
+	{ PCI_DEVICE_DATA(INTEL, EHL_RGMII1G_ID, &ehl_rgmii1g_info) },
+	{ PCI_DEVICE_DATA(INTEL, EHL_SGMII1G_ID, &ehl_sgmii1g_info) },
+	{ PCI_DEVICE_DATA(INTEL, EHL_SGMII2G5_ID, &ehl_sgmii1g_info) },
+	{ PCI_DEVICE_DATA(INTEL, EHL_PSE0_RGMII1G_ID, &ehl_pse0_rgmii1g_info) },
+	{ PCI_DEVICE_DATA(INTEL, EHL_PSE0_SGMII1G_ID, &ehl_pse0_sgmii1g_info) },
+	{ PCI_DEVICE_DATA(INTEL, EHL_PSE0_SGMII2G5_ID, &ehl_pse0_sgmii1g_info) },
+	{ PCI_DEVICE_DATA(INTEL, EHL_PSE1_RGMII1G_ID, &ehl_pse1_rgmii1g_info) },
+	{ PCI_DEVICE_DATA(INTEL, EHL_PSE1_SGMII1G_ID, &ehl_pse1_sgmii1g_info) },
+	{ PCI_DEVICE_DATA(INTEL, EHL_PSE1_SGMII2G5_ID, &ehl_pse1_sgmii1g_info) },
+	{ PCI_DEVICE_DATA(INTEL, TGL_SGMII1G_ID, &tgl_sgmii1g_info) },
 	{}
 };
-
 MODULE_DEVICE_TABLE(pci, intel_eth_pci_id_table);
 
 static struct pci_driver intel_eth_pci_driver = {
-- 
cgit v1.2.3-59-g8ed1b


From 29e0c2f39f983f3b83f6004f9fd05f8f4ce225c6 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Thu, 30 Apr 2020 18:02:54 +0300
Subject: stmmac: intel: Place object in the Makefile according to the order

Follow the order for the platform drivers, i.e. generic object are going first.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile
index 5a6f265bc540..f9d024d6b69b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Makefile
+++ b/drivers/net/ethernet/stmicro/stmmac/Makefile
@@ -30,6 +30,6 @@ obj-$(CONFIG_DWMAC_GENERIC)	+= dwmac-generic.o
 stmmac-platform-objs:= stmmac_platform.o
 dwmac-altr-socfpga-objs := altr_tse_pcs.o dwmac-socfpga.o
 
-obj-$(CONFIG_DWMAC_INTEL) += dwmac-intel.o
-obj-$(CONFIG_STMMAC_PCI) += stmmac-pci.o
+obj-$(CONFIG_STMMAC_PCI)	+= stmmac-pci.o
+obj-$(CONFIG_DWMAC_INTEL)	+= dwmac-intel.o
 stmmac-pci-objs:= stmmac_pci.o
-- 
cgit v1.2.3-59-g8ed1b


From 6e3a401fc8af01828bcdc92d713195d318b36e7e Mon Sep 17 00:00:00 2001
From: Dmitry Yakunin <zeil@yandex-team.ru>
Date: Thu, 30 Apr 2020 18:51:14 +0300
Subject: inet_diag: add cgroup id attribute

This patch adds cgroup v2 ID to common inet diag message attributes.
Cgroup v2 ID is kernfs ID (ino or ino+gen). This attribute allows filter
inet diag output by cgroup ID obtained by name_to_handle_at() syscall.
When net_cls or net_prio cgroup is activated this ID is equal to 1 (root
cgroup ID) for newly created sockets.

Some notes about this ID:

1) gets initialized in socket() syscall
2) incoming socket gets ID from listening socket
   (not during accept() syscall)
3) not changed when process get moved to another cgroup
4) can point to deleted cgroup (refcounting)

v2:
  - use CONFIG_SOCK_CGROUP_DATA instead if CONFIG_CGROUPS

v3:
  - fix attr size by using nla_total_size_64bit() (Eric Dumazet)
  - more detailed commit message (Konstantin Khlebnikov)

Signed-off-by: Dmitry Yakunin <zeil@yandex-team.ru>
Reviewed-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Acked-By: Tejun Heo <tj@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/inet_diag.h      | 6 +++++-
 include/uapi/linux/inet_diag.h | 1 +
 net/ipv4/inet_diag.c           | 7 +++++++
 3 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/include/linux/inet_diag.h b/include/linux/inet_diag.h
index ce9ed1c0602f..0ef2d800fda7 100644
--- a/include/linux/inet_diag.h
+++ b/include/linux/inet_diag.h
@@ -71,7 +71,11 @@ static inline size_t inet_diag_msg_attrs_size(void)
 		+ nla_total_size(1)  /* INET_DIAG_SKV6ONLY */
 #endif
 		+ nla_total_size(4)  /* INET_DIAG_MARK */
-		+ nla_total_size(4); /* INET_DIAG_CLASS_ID */
+		+ nla_total_size(4)  /* INET_DIAG_CLASS_ID */
+#ifdef CONFIG_SOCK_CGROUP_DATA
+		+ nla_total_size_64bit(sizeof(u64))  /* INET_DIAG_CGROUP_ID */
+#endif
+		;
 }
 int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
 			     struct inet_diag_msg *r, int ext,
diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h
index 57cc429a9177..c9b1e551792c 100644
--- a/include/uapi/linux/inet_diag.h
+++ b/include/uapi/linux/inet_diag.h
@@ -157,6 +157,7 @@ enum {
 	INET_DIAG_MD5SIG,
 	INET_DIAG_ULP_INFO,
 	INET_DIAG_SK_BPF_STORAGES,
+	INET_DIAG_CGROUP_ID,
 	__INET_DIAG_MAX,
 };
 
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 5d50aad3cdbf..9c4c315cbc10 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -162,6 +162,13 @@ int inet_diag_msg_attrs_fill(struct sock *sk, struct sk_buff *skb,
 			goto errout;
 	}
 
+#ifdef CONFIG_SOCK_CGROUP_DATA
+	if (nla_put_u64_64bit(skb, INET_DIAG_CGROUP_ID,
+			      cgroup_id(sock_cgroup_ptr(&sk->sk_cgrp_data)),
+			      INET_DIAG_PAD))
+		goto errout;
+#endif
+
 	r->idiag_uid = from_kuid_munged(user_ns, sock_i_uid(sk));
 	r->idiag_inode = sock_i_ino(sk);
 
-- 
cgit v1.2.3-59-g8ed1b


From b1f3e43dbfacfcd95296b0f80f84b186add9ef54 Mon Sep 17 00:00:00 2001
From: Dmitry Yakunin <zeil@yandex-team.ru>
Date: Thu, 30 Apr 2020 18:51:15 +0300
Subject: inet_diag: add support for cgroup filter

This patch adds ability to filter sockets based on cgroup v2 ID.
Such filter is helpful in ss utility for filtering sockets by
cgroup pathname.

Signed-off-by: Dmitry Yakunin <zeil@yandex-team.ru>
Reviewed-by: Konstantin Khlebnikov <khlebnikov@yandex-team.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/inet_diag.h |  1 +
 net/ipv4/inet_diag.c           | 31 +++++++++++++++++++++++++++++++
 2 files changed, 32 insertions(+)

diff --git a/include/uapi/linux/inet_diag.h b/include/uapi/linux/inet_diag.h
index c9b1e551792c..e6f183ee8417 100644
--- a/include/uapi/linux/inet_diag.h
+++ b/include/uapi/linux/inet_diag.h
@@ -96,6 +96,7 @@ enum {
 	INET_DIAG_BC_MARK_COND,
 	INET_DIAG_BC_S_EQ,
 	INET_DIAG_BC_D_EQ,
+	INET_DIAG_BC_CGROUP_COND,   /* u64 cgroup v2 ID */
 };
 
 struct inet_diag_hostcond {
diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 9c4c315cbc10..0034092358c3 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -43,6 +43,9 @@ struct inet_diag_entry {
 	u16 userlocks;
 	u32 ifindex;
 	u32 mark;
+#ifdef CONFIG_SOCK_CGROUP_DATA
+	u64 cgroup_id;
+#endif
 };
 
 static DEFINE_MUTEX(inet_diag_table_mutex);
@@ -682,6 +685,16 @@ static int inet_diag_bc_run(const struct nlattr *_bc,
 				yes = 0;
 			break;
 		}
+#ifdef CONFIG_SOCK_CGROUP_DATA
+		case INET_DIAG_BC_CGROUP_COND: {
+			u64 cgroup_id;
+
+			cgroup_id = get_unaligned((const u64 *)(op + 1));
+			if (cgroup_id != entry->cgroup_id)
+				yes = 0;
+			break;
+		}
+#endif
 		}
 
 		if (yes) {
@@ -732,6 +745,9 @@ int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk)
 		entry.mark = inet_rsk(inet_reqsk(sk))->ir_mark;
 	else
 		entry.mark = 0;
+#ifdef CONFIG_SOCK_CGROUP_DATA
+	entry.cgroup_id = cgroup_id(sock_cgroup_ptr(&sk->sk_cgrp_data));
+#endif
 
 	return inet_diag_bc_run(bc, &entry);
 }
@@ -821,6 +837,15 @@ static bool valid_markcond(const struct inet_diag_bc_op *op, int len,
 	return len >= *min_len;
 }
 
+#ifdef CONFIG_SOCK_CGROUP_DATA
+static bool valid_cgroupcond(const struct inet_diag_bc_op *op, int len,
+			     int *min_len)
+{
+	*min_len += sizeof(u64);
+	return len >= *min_len;
+}
+#endif
+
 static int inet_diag_bc_audit(const struct nlattr *attr,
 			      const struct sk_buff *skb)
 {
@@ -863,6 +888,12 @@ static int inet_diag_bc_audit(const struct nlattr *attr,
 			if (!valid_markcond(bc, len, &min_len))
 				return -EINVAL;
 			break;
+#ifdef CONFIG_SOCK_CGROUP_DATA
+		case INET_DIAG_BC_CGROUP_COND:
+			if (!valid_cgroupcond(bc, len, &min_len))
+				return -EINVAL;
+			break;
+#endif
 		case INET_DIAG_BC_AUTO:
 		case INET_DIAG_BC_JMP:
 		case INET_DIAG_BC_NOP:
-- 
cgit v1.2.3-59-g8ed1b


From 10ebb22137acd97083cb52d8664cdff269e8a629 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Thu, 30 Apr 2020 18:03:56 +0200
Subject: docs: networking: convert l2tp.txt to ReST

- add SPDX header;
- add a document title;
- mark tables as such;
- adjust identation, whitespaces and blank lines;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst |   1 +
 Documentation/networking/l2tp.rst  | 358 +++++++++++++++++++++++++++++++++++++
 Documentation/networking/l2tp.txt  | 345 -----------------------------------
 3 files changed, 359 insertions(+), 345 deletions(-)
 create mode 100644 Documentation/networking/l2tp.rst
 delete mode 100644 Documentation/networking/l2tp.txt

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index e1ff08b94d90..0c5d7a037983 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -74,6 +74,7 @@ Contents:
    ipvlan
    ipvs-sysctl
    kcm
+   l2tp
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/l2tp.rst b/Documentation/networking/l2tp.rst
new file mode 100644
index 000000000000..a48238a2ec09
--- /dev/null
+++ b/Documentation/networking/l2tp.rst
@@ -0,0 +1,358 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====
+L2TP
+====
+
+This document describes how to use the kernel's L2TP drivers to
+provide L2TP functionality. L2TP is a protocol that tunnels one or
+more sessions over an IP tunnel. It is commonly used for VPNs
+(L2TP/IPSec) and by ISPs to tunnel subscriber PPP sessions over an IP
+network infrastructure. With L2TPv3, it is also useful as a Layer-2
+tunneling infrastructure.
+
+Features
+========
+
+L2TPv2 (PPP over L2TP (UDP tunnels)).
+L2TPv3 ethernet pseudowires.
+L2TPv3 PPP pseudowires.
+L2TPv3 IP encapsulation.
+Netlink sockets for L2TPv3 configuration management.
+
+History
+=======
+
+The original pppol2tp driver was introduced in 2.6.23 and provided
+L2TPv2 functionality (rfc2661). L2TPv2 is used to tunnel one or more PPP
+sessions over a UDP tunnel.
+
+L2TPv3 (rfc3931) changes the protocol to allow different frame types
+to be passed over an L2TP tunnel by moving the PPP-specific parts of
+the protocol out of the core L2TP packet headers. Each frame type is
+known as a pseudowire type. Ethernet, PPP, HDLC, Frame Relay and ATM
+pseudowires for L2TP are defined in separate RFC standards. Another
+change for L2TPv3 is that it can be carried directly over IP with no
+UDP header (UDP is optional). It is also possible to create static
+unmanaged L2TPv3 tunnels manually without a control protocol
+(userspace daemon) to manage them.
+
+To support L2TPv3, the original pppol2tp driver was split up to
+separate the L2TP and PPP functionality. Existing L2TPv2 userspace
+apps should be unaffected as the original pppol2tp sockets API is
+retained. L2TPv3, however, uses netlink to manage L2TPv3 tunnels and
+sessions.
+
+Design
+======
+
+The L2TP protocol separates control and data frames.  The L2TP kernel
+drivers handle only L2TP data frames; control frames are always
+handled by userspace. L2TP control frames carry messages between L2TP
+clients/servers and are used to setup / teardown tunnels and
+sessions. An L2TP client or server is implemented in userspace.
+
+Each L2TP tunnel is implemented using a UDP or L2TPIP socket; L2TPIP
+provides L2TPv3 IP encapsulation (no UDP) and is implemented using a
+new l2tpip socket family. The tunnel socket is typically created by
+userspace, though for unmanaged L2TPv3 tunnels, the socket can also be
+created by the kernel. Each L2TP session (pseudowire) gets a network
+interface instance. In the case of PPP, these interfaces are created
+indirectly by pppd using a pppol2tp socket. In the case of ethernet,
+the netdevice is created upon a netlink request to create an L2TPv3
+ethernet pseudowire.
+
+For PPP, the PPPoL2TP driver, net/l2tp/l2tp_ppp.c, provides a
+mechanism by which PPP frames carried through an L2TP session are
+passed through the kernel's PPP subsystem. The standard PPP daemon,
+pppd, handles all PPP interaction with the peer. PPP network
+interfaces are created for each local PPP endpoint. The kernel's PPP
+subsystem arranges for PPP control frames to be delivered to pppd,
+while data frames are forwarded as usual.
+
+For ethernet, the L2TPETH driver, net/l2tp/l2tp_eth.c, implements a
+netdevice driver, managing virtual ethernet devices, one per
+pseudowire. These interfaces can be managed using standard Linux tools
+such as "ip" and "ifconfig". If only IP frames are passed over the
+tunnel, the interface can be given an IP addresses of itself and its
+peer. If non-IP frames are to be passed over the tunnel, the interface
+can be added to a bridge using brctl. All L2TP datapath protocol
+functions are handled by the L2TP core driver.
+
+Each tunnel and session within a tunnel is assigned a unique tunnel_id
+and session_id. These ids are carried in the L2TP header of every
+control and data packet. (Actually, in L2TPv3, the tunnel_id isn't
+present in data frames - it is inferred from the IP connection on
+which the packet was received.) The L2TP driver uses the ids to lookup
+internal tunnel and/or session contexts to determine how to handle the
+packet. Zero tunnel / session ids are treated specially - zero ids are
+never assigned to tunnels or sessions in the network. In the driver,
+the tunnel context keeps a reference to the tunnel UDP or L2TPIP
+socket. The session context holds data that lets the driver interface
+to the kernel's network frame type subsystems, i.e. PPP, ethernet.
+
+Userspace Programming
+=====================
+
+For L2TPv2, there are a number of requirements on the userspace L2TP
+daemon in order to use the pppol2tp driver.
+
+1. Use a UDP socket per tunnel.
+
+2. Create a single PPPoL2TP socket per tunnel bound to a special null
+   session id. This is used only for communicating with the driver but
+   must remain open while the tunnel is active. Opening this tunnel
+   management socket causes the driver to mark the tunnel socket as an
+   L2TP UDP encapsulation socket and flags it for use by the
+   referenced tunnel id. This hooks up the UDP receive path via
+   udp_encap_rcv() in net/ipv4/udp.c. PPP data frames are never passed
+   in this special PPPoX socket.
+
+3. Create a PPPoL2TP socket per L2TP session. This is typically done
+   by starting pppd with the pppol2tp plugin and appropriate
+   arguments. A PPPoL2TP tunnel management socket (Step 2) must be
+   created before the first PPPoL2TP session socket is created.
+
+When creating PPPoL2TP sockets, the application provides information
+to the driver about the socket in a socket connect() call. Source and
+destination tunnel and session ids are provided, as well as the file
+descriptor of a UDP socket. See struct pppol2tp_addr in
+include/linux/if_pppol2tp.h. Note that zero tunnel / session ids are
+treated specially. When creating the per-tunnel PPPoL2TP management
+socket in Step 2 above, zero source and destination session ids are
+specified, which tells the driver to prepare the supplied UDP file
+descriptor for use as an L2TP tunnel socket.
+
+Userspace may control behavior of the tunnel or session using
+setsockopt and ioctl on the PPPoX socket. The following socket
+options are supported:-
+
+=========   ===========================================================
+DEBUG       bitmask of debug message categories. See below.
+SENDSEQ     - 0 => don't send packets with sequence numbers
+	    - 1 => send packets with sequence numbers
+RECVSEQ     - 0 => receive packet sequence numbers are optional
+	    - 1 => drop receive packets without sequence numbers
+LNSMODE     - 0 => act as LAC.
+	    - 1 => act as LNS.
+REORDERTO   reorder timeout (in millisecs). If 0, don't try to reorder.
+=========   ===========================================================
+
+Only the DEBUG option is supported by the special tunnel management
+PPPoX socket.
+
+In addition to the standard PPP ioctls, a PPPIOCGL2TPSTATS is provided
+to retrieve tunnel and session statistics from the kernel using the
+PPPoX socket of the appropriate tunnel or session.
+
+For L2TPv3, userspace must use the netlink API defined in
+include/linux/l2tp.h to manage tunnel and session contexts. The
+general procedure to create a new L2TP tunnel with one session is:-
+
+1. Open a GENL socket using L2TP_GENL_NAME for configuring the kernel
+   using netlink.
+
+2. Create a UDP or L2TPIP socket for the tunnel.
+
+3. Create a new L2TP tunnel using a L2TP_CMD_TUNNEL_CREATE
+   request. Set attributes according to desired tunnel parameters,
+   referencing the UDP or L2TPIP socket created in the previous step.
+
+4. Create a new L2TP session in the tunnel using a
+   L2TP_CMD_SESSION_CREATE request.
+
+The tunnel and all of its sessions are closed when the tunnel socket
+is closed. The netlink API may also be used to delete sessions and
+tunnels. Configuration and status info may be set or read using netlink.
+
+The L2TP driver also supports static (unmanaged) L2TPv3 tunnels. These
+are where there is no L2TP control message exchange with the peer to
+setup the tunnel; the tunnel is configured manually at each end of the
+tunnel. There is no need for an L2TP userspace application in this
+case -- the tunnel socket is created by the kernel and configured
+using parameters sent in the L2TP_CMD_TUNNEL_CREATE netlink
+request. The "ip" utility of iproute2 has commands for managing static
+L2TPv3 tunnels; do "ip l2tp help" for more information.
+
+Debugging
+=========
+
+The driver supports a flexible debug scheme where kernel trace
+messages may be optionally enabled per tunnel and per session. Care is
+needed when debugging a live system since the messages are not
+rate-limited and a busy system could be swamped. Userspace uses
+setsockopt on the PPPoX socket to set a debug mask.
+
+The following debug mask bits are available:
+
+================  ==============================
+L2TP_MSG_DEBUG    verbose debug (if compiled in)
+L2TP_MSG_CONTROL  userspace - kernel interface
+L2TP_MSG_SEQ      sequence numbers handling
+L2TP_MSG_DATA     data packets
+================  ==============================
+
+If enabled, files under a l2tp debugfs directory can be used to dump
+kernel state about L2TP tunnels and sessions. To access it, the
+debugfs filesystem must first be mounted::
+
+	# mount -t debugfs debugfs /debug
+
+Files under the l2tp directory can then be accessed::
+
+	# cat /debug/l2tp/tunnels
+
+The debugfs files should not be used by applications to obtain L2TP
+state information because the file format is subject to change. It is
+implemented to provide extra debug information to help diagnose
+problems.) Users should use the netlink API.
+
+/proc/net/pppol2tp is also provided for backwards compatibility with
+the original pppol2tp driver. It lists information about L2TPv2
+tunnels and sessions only. Its use is discouraged.
+
+Unmanaged L2TPv3 Tunnels
+========================
+
+Some commercial L2TP products support unmanaged L2TPv3 ethernet
+tunnels, where there is no L2TP control protocol; tunnels are
+configured at each side manually. New commands are available in
+iproute2's ip utility to support this.
+
+To create an L2TPv3 ethernet pseudowire between local host 192.168.1.1
+and peer 192.168.1.2, using IP addresses 10.5.1.1 and 10.5.1.2 for the
+tunnel endpoints::
+
+	# ip l2tp add tunnel tunnel_id 1 peer_tunnel_id 1 udp_sport 5000 \
+	  udp_dport 5000 encap udp local 192.168.1.1 remote 192.168.1.2
+	# ip l2tp add session tunnel_id 1 session_id 1 peer_session_id 1
+	# ip -s -d show dev l2tpeth0
+	# ip addr add 10.5.1.2/32 peer 10.5.1.1/32 dev l2tpeth0
+	# ip li set dev l2tpeth0 up
+
+Choose IP addresses to be the address of a local IP interface and that
+of the remote system. The IP addresses of the l2tpeth0 interface can be
+anything suitable.
+
+Repeat the above at the peer, with ports, tunnel/session ids and IP
+addresses reversed.  The tunnel and session IDs can be any non-zero
+32-bit number, but the values must be reversed at the peer.
+
+========================       ===================
+Host 1                         Host2
+========================       ===================
+udp_sport=5000                 udp_sport=5001
+udp_dport=5001                 udp_dport=5000
+tunnel_id=42                   tunnel_id=45
+peer_tunnel_id=45              peer_tunnel_id=42
+session_id=128                 session_id=5196755
+peer_session_id=5196755        peer_session_id=128
+========================       ===================
+
+When done at both ends of the tunnel, it should be possible to send
+data over the network. e.g.::
+
+	# ping 10.5.1.1
+
+
+Sample Userspace Code
+=====================
+
+1. Create tunnel management PPPoX socket::
+
+	kernel_fd = socket(AF_PPPOX, SOCK_DGRAM, PX_PROTO_OL2TP);
+	if (kernel_fd >= 0) {
+		struct sockaddr_pppol2tp sax;
+		struct sockaddr_in const *peer_addr;
+
+		peer_addr = l2tp_tunnel_get_peer_addr(tunnel);
+		memset(&sax, 0, sizeof(sax));
+		sax.sa_family = AF_PPPOX;
+		sax.sa_protocol = PX_PROTO_OL2TP;
+		sax.pppol2tp.fd = udp_fd;       /* fd of tunnel UDP socket */
+		sax.pppol2tp.addr.sin_addr.s_addr = peer_addr->sin_addr.s_addr;
+		sax.pppol2tp.addr.sin_port = peer_addr->sin_port;
+		sax.pppol2tp.addr.sin_family = AF_INET;
+		sax.pppol2tp.s_tunnel = tunnel_id;
+		sax.pppol2tp.s_session = 0;     /* special case: mgmt socket */
+		sax.pppol2tp.d_tunnel = 0;
+		sax.pppol2tp.d_session = 0;     /* special case: mgmt socket */
+
+		if(connect(kernel_fd, (struct sockaddr *)&sax, sizeof(sax) ) < 0 ) {
+			perror("connect failed");
+			result = -errno;
+			goto err;
+		}
+	}
+
+2. Create session PPPoX data socket::
+
+	struct sockaddr_pppol2tp sax;
+	int fd;
+
+	/* Note, the target socket must be bound already, else it will not be ready */
+	sax.sa_family = AF_PPPOX;
+	sax.sa_protocol = PX_PROTO_OL2TP;
+	sax.pppol2tp.fd = tunnel_fd;
+	sax.pppol2tp.addr.sin_addr.s_addr = addr->sin_addr.s_addr;
+	sax.pppol2tp.addr.sin_port = addr->sin_port;
+	sax.pppol2tp.addr.sin_family = AF_INET;
+	sax.pppol2tp.s_tunnel  = tunnel_id;
+	sax.pppol2tp.s_session = session_id;
+	sax.pppol2tp.d_tunnel  = peer_tunnel_id;
+	sax.pppol2tp.d_session = peer_session_id;
+
+	/* session_fd is the fd of the session's PPPoL2TP socket.
+	 * tunnel_fd is the fd of the tunnel UDP socket.
+	 */
+	fd = connect(session_fd, (struct sockaddr *)&sax, sizeof(sax));
+	if (fd < 0 )    {
+		return -errno;
+	}
+	return 0;
+
+Internal Implementation
+=======================
+
+The driver keeps a struct l2tp_tunnel context per L2TP tunnel and a
+struct l2tp_session context for each session. The l2tp_tunnel is
+always associated with a UDP or L2TP/IP socket and keeps a list of
+sessions in the tunnel. The l2tp_session context keeps kernel state
+about the session. It has private data which is used for data specific
+to the session type. With L2TPv2, the session always carried PPP
+traffic. With L2TPv3, the session can also carry ethernet frames
+(ethernet pseudowire) or other data types such as ATM, HDLC or Frame
+Relay.
+
+When a tunnel is first opened, the reference count on the socket is
+increased using sock_hold(). This ensures that the kernel socket
+cannot be removed while L2TP's data structures reference it.
+
+Some L2TP sessions also have a socket (PPP pseudowires) while others
+do not (ethernet pseudowires). We can't use the socket reference count
+as the reference count for session contexts. The L2TP implementation
+therefore has its own internal reference counts on the session
+contexts.
+
+To Do
+=====
+
+Add L2TP tunnel switching support. This would route tunneled traffic
+from one L2TP tunnel into another. Specified in
+http://tools.ietf.org/html/draft-ietf-l2tpext-tunnel-switching-08
+
+Add L2TPv3 VLAN pseudowire support.
+
+Add L2TPv3 IP pseudowire support.
+
+Add L2TPv3 ATM pseudowire support.
+
+Miscellaneous
+=============
+
+The L2TP drivers were developed as part of the OpenL2TP project by
+Katalix Systems Ltd. OpenL2TP is a full-featured L2TP client / server,
+designed from the ground up to have the L2TP datapath in the
+kernel. The project also implemented the pppol2tp plugin for pppd
+which allows pppd to use the kernel driver. Details can be found at
+http://www.openl2tp.org.
diff --git a/Documentation/networking/l2tp.txt b/Documentation/networking/l2tp.txt
deleted file mode 100644
index 9bc271cdc9a8..000000000000
--- a/Documentation/networking/l2tp.txt
+++ /dev/null
@@ -1,345 +0,0 @@
-This document describes how to use the kernel's L2TP drivers to
-provide L2TP functionality. L2TP is a protocol that tunnels one or
-more sessions over an IP tunnel. It is commonly used for VPNs
-(L2TP/IPSec) and by ISPs to tunnel subscriber PPP sessions over an IP
-network infrastructure. With L2TPv3, it is also useful as a Layer-2
-tunneling infrastructure.
-
-Features
-========
-
-L2TPv2 (PPP over L2TP (UDP tunnels)).
-L2TPv3 ethernet pseudowires.
-L2TPv3 PPP pseudowires.
-L2TPv3 IP encapsulation.
-Netlink sockets for L2TPv3 configuration management.
-
-History
-=======
-
-The original pppol2tp driver was introduced in 2.6.23 and provided
-L2TPv2 functionality (rfc2661). L2TPv2 is used to tunnel one or more PPP
-sessions over a UDP tunnel.
-
-L2TPv3 (rfc3931) changes the protocol to allow different frame types
-to be passed over an L2TP tunnel by moving the PPP-specific parts of
-the protocol out of the core L2TP packet headers. Each frame type is
-known as a pseudowire type. Ethernet, PPP, HDLC, Frame Relay and ATM
-pseudowires for L2TP are defined in separate RFC standards. Another
-change for L2TPv3 is that it can be carried directly over IP with no
-UDP header (UDP is optional). It is also possible to create static
-unmanaged L2TPv3 tunnels manually without a control protocol
-(userspace daemon) to manage them.
-
-To support L2TPv3, the original pppol2tp driver was split up to
-separate the L2TP and PPP functionality. Existing L2TPv2 userspace
-apps should be unaffected as the original pppol2tp sockets API is
-retained. L2TPv3, however, uses netlink to manage L2TPv3 tunnels and
-sessions.
-
-Design
-======
-
-The L2TP protocol separates control and data frames.  The L2TP kernel
-drivers handle only L2TP data frames; control frames are always
-handled by userspace. L2TP control frames carry messages between L2TP
-clients/servers and are used to setup / teardown tunnels and
-sessions. An L2TP client or server is implemented in userspace.
-
-Each L2TP tunnel is implemented using a UDP or L2TPIP socket; L2TPIP
-provides L2TPv3 IP encapsulation (no UDP) and is implemented using a
-new l2tpip socket family. The tunnel socket is typically created by
-userspace, though for unmanaged L2TPv3 tunnels, the socket can also be
-created by the kernel. Each L2TP session (pseudowire) gets a network
-interface instance. In the case of PPP, these interfaces are created
-indirectly by pppd using a pppol2tp socket. In the case of ethernet,
-the netdevice is created upon a netlink request to create an L2TPv3
-ethernet pseudowire.
-
-For PPP, the PPPoL2TP driver, net/l2tp/l2tp_ppp.c, provides a
-mechanism by which PPP frames carried through an L2TP session are
-passed through the kernel's PPP subsystem. The standard PPP daemon,
-pppd, handles all PPP interaction with the peer. PPP network
-interfaces are created for each local PPP endpoint. The kernel's PPP
-subsystem arranges for PPP control frames to be delivered to pppd,
-while data frames are forwarded as usual.
-
-For ethernet, the L2TPETH driver, net/l2tp/l2tp_eth.c, implements a
-netdevice driver, managing virtual ethernet devices, one per
-pseudowire. These interfaces can be managed using standard Linux tools
-such as "ip" and "ifconfig". If only IP frames are passed over the
-tunnel, the interface can be given an IP addresses of itself and its
-peer. If non-IP frames are to be passed over the tunnel, the interface
-can be added to a bridge using brctl. All L2TP datapath protocol
-functions are handled by the L2TP core driver.
-
-Each tunnel and session within a tunnel is assigned a unique tunnel_id
-and session_id. These ids are carried in the L2TP header of every
-control and data packet. (Actually, in L2TPv3, the tunnel_id isn't
-present in data frames - it is inferred from the IP connection on
-which the packet was received.) The L2TP driver uses the ids to lookup
-internal tunnel and/or session contexts to determine how to handle the
-packet. Zero tunnel / session ids are treated specially - zero ids are
-never assigned to tunnels or sessions in the network. In the driver,
-the tunnel context keeps a reference to the tunnel UDP or L2TPIP
-socket. The session context holds data that lets the driver interface
-to the kernel's network frame type subsystems, i.e. PPP, ethernet.
-
-Userspace Programming
-=====================
-
-For L2TPv2, there are a number of requirements on the userspace L2TP
-daemon in order to use the pppol2tp driver.
-
-1. Use a UDP socket per tunnel.
-
-2. Create a single PPPoL2TP socket per tunnel bound to a special null
-   session id. This is used only for communicating with the driver but
-   must remain open while the tunnel is active. Opening this tunnel
-   management socket causes the driver to mark the tunnel socket as an
-   L2TP UDP encapsulation socket and flags it for use by the
-   referenced tunnel id. This hooks up the UDP receive path via
-   udp_encap_rcv() in net/ipv4/udp.c. PPP data frames are never passed
-   in this special PPPoX socket.
-
-3. Create a PPPoL2TP socket per L2TP session. This is typically done
-   by starting pppd with the pppol2tp plugin and appropriate
-   arguments. A PPPoL2TP tunnel management socket (Step 2) must be
-   created before the first PPPoL2TP session socket is created.
-
-When creating PPPoL2TP sockets, the application provides information
-to the driver about the socket in a socket connect() call. Source and
-destination tunnel and session ids are provided, as well as the file
-descriptor of a UDP socket. See struct pppol2tp_addr in
-include/linux/if_pppol2tp.h. Note that zero tunnel / session ids are
-treated specially. When creating the per-tunnel PPPoL2TP management
-socket in Step 2 above, zero source and destination session ids are
-specified, which tells the driver to prepare the supplied UDP file
-descriptor for use as an L2TP tunnel socket.
-
-Userspace may control behavior of the tunnel or session using
-setsockopt and ioctl on the PPPoX socket. The following socket
-options are supported:-
-
-DEBUG     - bitmask of debug message categories. See below.
-SENDSEQ   - 0 => don't send packets with sequence numbers
-            1 => send packets with sequence numbers
-RECVSEQ   - 0 => receive packet sequence numbers are optional
-            1 => drop receive packets without sequence numbers
-LNSMODE   - 0 => act as LAC.
-            1 => act as LNS.
-REORDERTO - reorder timeout (in millisecs). If 0, don't try to reorder.
-
-Only the DEBUG option is supported by the special tunnel management
-PPPoX socket.
-
-In addition to the standard PPP ioctls, a PPPIOCGL2TPSTATS is provided
-to retrieve tunnel and session statistics from the kernel using the
-PPPoX socket of the appropriate tunnel or session.
-
-For L2TPv3, userspace must use the netlink API defined in
-include/linux/l2tp.h to manage tunnel and session contexts. The
-general procedure to create a new L2TP tunnel with one session is:-
-
-1. Open a GENL socket using L2TP_GENL_NAME for configuring the kernel
-   using netlink.
-
-2. Create a UDP or L2TPIP socket for the tunnel.
-
-3. Create a new L2TP tunnel using a L2TP_CMD_TUNNEL_CREATE
-   request. Set attributes according to desired tunnel parameters,
-   referencing the UDP or L2TPIP socket created in the previous step.
-
-4. Create a new L2TP session in the tunnel using a
-   L2TP_CMD_SESSION_CREATE request.
-
-The tunnel and all of its sessions are closed when the tunnel socket
-is closed. The netlink API may also be used to delete sessions and
-tunnels. Configuration and status info may be set or read using netlink.
-
-The L2TP driver also supports static (unmanaged) L2TPv3 tunnels. These
-are where there is no L2TP control message exchange with the peer to
-setup the tunnel; the tunnel is configured manually at each end of the
-tunnel. There is no need for an L2TP userspace application in this
-case -- the tunnel socket is created by the kernel and configured
-using parameters sent in the L2TP_CMD_TUNNEL_CREATE netlink
-request. The "ip" utility of iproute2 has commands for managing static
-L2TPv3 tunnels; do "ip l2tp help" for more information.
-
-Debugging
-=========
-
-The driver supports a flexible debug scheme where kernel trace
-messages may be optionally enabled per tunnel and per session. Care is
-needed when debugging a live system since the messages are not
-rate-limited and a busy system could be swamped. Userspace uses
-setsockopt on the PPPoX socket to set a debug mask.
-
-The following debug mask bits are available:
-
-L2TP_MSG_DEBUG    verbose debug (if compiled in)
-L2TP_MSG_CONTROL  userspace - kernel interface
-L2TP_MSG_SEQ      sequence numbers handling
-L2TP_MSG_DATA     data packets
-
-If enabled, files under a l2tp debugfs directory can be used to dump
-kernel state about L2TP tunnels and sessions. To access it, the
-debugfs filesystem must first be mounted.
-
-# mount -t debugfs debugfs /debug
-
-Files under the l2tp directory can then be accessed.
-
-# cat /debug/l2tp/tunnels
-
-The debugfs files should not be used by applications to obtain L2TP
-state information because the file format is subject to change. It is
-implemented to provide extra debug information to help diagnose
-problems.) Users should use the netlink API.
-
-/proc/net/pppol2tp is also provided for backwards compatibility with
-the original pppol2tp driver. It lists information about L2TPv2
-tunnels and sessions only. Its use is discouraged.
-
-Unmanaged L2TPv3 Tunnels
-========================
-
-Some commercial L2TP products support unmanaged L2TPv3 ethernet
-tunnels, where there is no L2TP control protocol; tunnels are
-configured at each side manually. New commands are available in
-iproute2's ip utility to support this.
-
-To create an L2TPv3 ethernet pseudowire between local host 192.168.1.1
-and peer 192.168.1.2, using IP addresses 10.5.1.1 and 10.5.1.2 for the
-tunnel endpoints:-
-
-# ip l2tp add tunnel tunnel_id 1 peer_tunnel_id 1 udp_sport 5000 \
-  udp_dport 5000 encap udp local 192.168.1.1 remote 192.168.1.2
-# ip l2tp add session tunnel_id 1 session_id 1 peer_session_id 1
-# ip -s -d show dev l2tpeth0
-# ip addr add 10.5.1.2/32 peer 10.5.1.1/32 dev l2tpeth0
-# ip li set dev l2tpeth0 up
-
-Choose IP addresses to be the address of a local IP interface and that
-of the remote system. The IP addresses of the l2tpeth0 interface can be
-anything suitable.
-
-Repeat the above at the peer, with ports, tunnel/session ids and IP
-addresses reversed.  The tunnel and session IDs can be any non-zero
-32-bit number, but the values must be reversed at the peer.
-
-Host 1                         Host2
-udp_sport=5000                 udp_sport=5001
-udp_dport=5001                 udp_dport=5000
-tunnel_id=42                   tunnel_id=45
-peer_tunnel_id=45              peer_tunnel_id=42
-session_id=128                 session_id=5196755
-peer_session_id=5196755        peer_session_id=128
-
-When done at both ends of the tunnel, it should be possible to send
-data over the network. e.g.
-
-# ping 10.5.1.1
-
-
-Sample Userspace Code
-=====================
-
-1. Create tunnel management PPPoX socket
-
-        kernel_fd = socket(AF_PPPOX, SOCK_DGRAM, PX_PROTO_OL2TP);
-        if (kernel_fd >= 0) {
-                struct sockaddr_pppol2tp sax;
-                struct sockaddr_in const *peer_addr;
-
-                peer_addr = l2tp_tunnel_get_peer_addr(tunnel);
-                memset(&sax, 0, sizeof(sax));
-                sax.sa_family = AF_PPPOX;
-                sax.sa_protocol = PX_PROTO_OL2TP;
-                sax.pppol2tp.fd = udp_fd;       /* fd of tunnel UDP socket */
-                sax.pppol2tp.addr.sin_addr.s_addr = peer_addr->sin_addr.s_addr;
-                sax.pppol2tp.addr.sin_port = peer_addr->sin_port;
-                sax.pppol2tp.addr.sin_family = AF_INET;
-                sax.pppol2tp.s_tunnel = tunnel_id;
-                sax.pppol2tp.s_session = 0;     /* special case: mgmt socket */
-                sax.pppol2tp.d_tunnel = 0;
-                sax.pppol2tp.d_session = 0;     /* special case: mgmt socket */
-
-                if(connect(kernel_fd, (struct sockaddr *)&sax, sizeof(sax) ) < 0 ) {
-                        perror("connect failed");
-                        result = -errno;
-                        goto err;
-                }
-        }
-
-2. Create session PPPoX data socket
-
-        struct sockaddr_pppol2tp sax;
-        int fd;
-
-        /* Note, the target socket must be bound already, else it will not be ready */
-        sax.sa_family = AF_PPPOX;
-        sax.sa_protocol = PX_PROTO_OL2TP;
-        sax.pppol2tp.fd = tunnel_fd;
-        sax.pppol2tp.addr.sin_addr.s_addr = addr->sin_addr.s_addr;
-        sax.pppol2tp.addr.sin_port = addr->sin_port;
-        sax.pppol2tp.addr.sin_family = AF_INET;
-        sax.pppol2tp.s_tunnel  = tunnel_id;
-        sax.pppol2tp.s_session = session_id;
-        sax.pppol2tp.d_tunnel  = peer_tunnel_id;
-        sax.pppol2tp.d_session = peer_session_id;
-
-        /* session_fd is the fd of the session's PPPoL2TP socket.
-         * tunnel_fd is the fd of the tunnel UDP socket.
-         */
-        fd = connect(session_fd, (struct sockaddr *)&sax, sizeof(sax));
-        if (fd < 0 )    {
-                return -errno;
-        }
-        return 0;
-
-Internal Implementation
-=======================
-
-The driver keeps a struct l2tp_tunnel context per L2TP tunnel and a
-struct l2tp_session context for each session. The l2tp_tunnel is
-always associated with a UDP or L2TP/IP socket and keeps a list of
-sessions in the tunnel. The l2tp_session context keeps kernel state
-about the session. It has private data which is used for data specific
-to the session type. With L2TPv2, the session always carried PPP
-traffic. With L2TPv3, the session can also carry ethernet frames
-(ethernet pseudowire) or other data types such as ATM, HDLC or Frame
-Relay.
-
-When a tunnel is first opened, the reference count on the socket is
-increased using sock_hold(). This ensures that the kernel socket
-cannot be removed while L2TP's data structures reference it.
-
-Some L2TP sessions also have a socket (PPP pseudowires) while others
-do not (ethernet pseudowires). We can't use the socket reference count
-as the reference count for session contexts. The L2TP implementation
-therefore has its own internal reference counts on the session
-contexts.
-
-To Do
-=====
-
-Add L2TP tunnel switching support. This would route tunneled traffic
-from one L2TP tunnel into another. Specified in
-http://tools.ietf.org/html/draft-ietf-l2tpext-tunnel-switching-08
-
-Add L2TPv3 VLAN pseudowire support.
-
-Add L2TPv3 IP pseudowire support.
-
-Add L2TPv3 ATM pseudowire support.
-
-Miscellaneous
-=============
-
-The L2TP drivers were developed as part of the OpenL2TP project by
-Katalix Systems Ltd. OpenL2TP is a full-featured L2TP client / server,
-designed from the ground up to have the L2TP datapath in the
-kernel. The project also implemented the pppol2tp plugin for pppd
-which allows pppd to use the kernel driver. Details can be found at
-http://www.openl2tp.org.
-- 
cgit v1.2.3-59-g8ed1b


From 40e79150c1686263e6a031d7702aec63aff31332 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Thu, 30 Apr 2020 18:03:57 +0200
Subject: docs: networking: convert lapb-module.txt to ReST

- add SPDX header;
- adjust title markup;
- mark code blocks and literals as such;
- mark tables as such;
- adjust identation, whitespaces and blank lines;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst       |   1 +
 Documentation/networking/lapb-module.rst | 305 +++++++++++++++++++++++++++++++
 Documentation/networking/lapb-module.txt | 263 --------------------------
 MAINTAINERS                              |   2 +-
 net/lapb/Kconfig                         |   2 +-
 5 files changed, 308 insertions(+), 265 deletions(-)
 create mode 100644 Documentation/networking/lapb-module.rst
 delete mode 100644 Documentation/networking/lapb-module.txt

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 0c5d7a037983..acd2567cf0d4 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -75,6 +75,7 @@ Contents:
    ipvs-sysctl
    kcm
    l2tp
+   lapb-module
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/lapb-module.rst b/Documentation/networking/lapb-module.rst
new file mode 100644
index 000000000000..ff586bc9f005
--- /dev/null
+++ b/Documentation/networking/lapb-module.rst
@@ -0,0 +1,305 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============================
+The Linux LAPB Module Interface
+===============================
+
+Version 1.3
+
+Jonathan Naylor 29.12.96
+
+Changed (Henner Eisen, 2000-10-29): int return value for data_indication()
+
+The LAPB module will be a separately compiled module for use by any parts of
+the Linux operating system that require a LAPB service. This document
+defines the interfaces to, and the services provided by this module. The
+term module in this context does not imply that the LAPB module is a
+separately loadable module, although it may be. The term module is used in
+its more standard meaning.
+
+The interface to the LAPB module consists of functions to the module,
+callbacks from the module to indicate important state changes, and
+structures for getting and setting information about the module.
+
+Structures
+----------
+
+Probably the most important structure is the skbuff structure for holding
+received and transmitted data, however it is beyond the scope of this
+document.
+
+The two LAPB specific structures are the LAPB initialisation structure and
+the LAPB parameter structure. These will be defined in a standard header
+file, <linux/lapb.h>. The header file <net/lapb.h> is internal to the LAPB
+module and is not for use.
+
+LAPB Initialisation Structure
+-----------------------------
+
+This structure is used only once, in the call to lapb_register (see below).
+It contains information about the device driver that requires the services
+of the LAPB module::
+
+	struct lapb_register_struct {
+		void (*connect_confirmation)(int token, int reason);
+		void (*connect_indication)(int token, int reason);
+		void (*disconnect_confirmation)(int token, int reason);
+		void (*disconnect_indication)(int token, int reason);
+		int  (*data_indication)(int token, struct sk_buff *skb);
+		void (*data_transmit)(int token, struct sk_buff *skb);
+	};
+
+Each member of this structure corresponds to a function in the device driver
+that is called when a particular event in the LAPB module occurs. These will
+be described in detail below. If a callback is not required (!!) then a NULL
+may be substituted.
+
+
+LAPB Parameter Structure
+------------------------
+
+This structure is used with the lapb_getparms and lapb_setparms functions
+(see below). They are used to allow the device driver to get and set the
+operational parameters of the LAPB implementation for a given connection::
+
+	struct lapb_parms_struct {
+		unsigned int t1;
+		unsigned int t1timer;
+		unsigned int t2;
+		unsigned int t2timer;
+		unsigned int n2;
+		unsigned int n2count;
+		unsigned int window;
+		unsigned int state;
+		unsigned int mode;
+	};
+
+T1 and T2 are protocol timing parameters and are given in units of 100ms. N2
+is the maximum number of tries on the link before it is declared a failure.
+The window size is the maximum number of outstanding data packets allowed to
+be unacknowledged by the remote end, the value of the window is between 1
+and 7 for a standard LAPB link, and between 1 and 127 for an extended LAPB
+link.
+
+The mode variable is a bit field used for setting (at present) three values.
+The bit fields have the following meanings:
+
+======  =================================================
+Bit	Meaning
+======  =================================================
+0	LAPB operation (0=LAPB_STANDARD 1=LAPB_EXTENDED).
+1	[SM]LP operation (0=LAPB_SLP 1=LAPB=MLP).
+2	DTE/DCE operation (0=LAPB_DTE 1=LAPB_DCE)
+3-31	Reserved, must be 0.
+======  =================================================
+
+Extended LAPB operation indicates the use of extended sequence numbers and
+consequently larger window sizes, the default is standard LAPB operation.
+MLP operation is the same as SLP operation except that the addresses used by
+LAPB are different to indicate the mode of operation, the default is Single
+Link Procedure. The difference between DCE and DTE operation is (i) the
+addresses used for commands and responses, and (ii) when the DCE is not
+connected, it sends DM without polls set, every T1. The upper case constant
+names will be defined in the public LAPB header file.
+
+
+Functions
+---------
+
+The LAPB module provides a number of function entry points.
+
+::
+
+    int lapb_register(void *token, struct lapb_register_struct);
+
+This must be called before the LAPB module may be used. If the call is
+successful then LAPB_OK is returned. The token must be a unique identifier
+generated by the device driver to allow for the unique identification of the
+instance of the LAPB link. It is returned by the LAPB module in all of the
+callbacks, and is used by the device driver in all calls to the LAPB module.
+For multiple LAPB links in a single device driver, multiple calls to
+lapb_register must be made. The format of the lapb_register_struct is given
+above. The return values are:
+
+=============		=============================
+LAPB_OK			LAPB registered successfully.
+LAPB_BADTOKEN		Token is already registered.
+LAPB_NOMEM		Out of memory
+=============		=============================
+
+::
+
+    int lapb_unregister(void *token);
+
+This releases all the resources associated with a LAPB link. Any current
+LAPB link will be abandoned without further messages being passed. After
+this call, the value of token is no longer valid for any calls to the LAPB
+function. The valid return values are:
+
+=============		===============================
+LAPB_OK			LAPB unregistered successfully.
+LAPB_BADTOKEN		Invalid/unknown LAPB token.
+=============		===============================
+
+::
+
+    int lapb_getparms(void *token, struct lapb_parms_struct *parms);
+
+This allows the device driver to get the values of the current LAPB
+variables, the lapb_parms_struct is described above. The valid return values
+are:
+
+=============		=============================
+LAPB_OK			LAPB getparms was successful.
+LAPB_BADTOKEN		Invalid/unknown LAPB token.
+=============		=============================
+
+::
+
+    int lapb_setparms(void *token, struct lapb_parms_struct *parms);
+
+This allows the device driver to set the values of the current LAPB
+variables, the lapb_parms_struct is described above. The values of t1timer,
+t2timer and n2count are ignored, likewise changing the mode bits when
+connected will be ignored. An error implies that none of the values have
+been changed. The valid return values are:
+
+=============		=================================================
+LAPB_OK			LAPB getparms was successful.
+LAPB_BADTOKEN		Invalid/unknown LAPB token.
+LAPB_INVALUE		One of the values was out of its allowable range.
+=============		=================================================
+
+::
+
+    int lapb_connect_request(void *token);
+
+Initiate a connect using the current parameter settings. The valid return
+values are:
+
+==============		=================================
+LAPB_OK			LAPB is starting to connect.
+LAPB_BADTOKEN		Invalid/unknown LAPB token.
+LAPB_CONNECTED		LAPB module is already connected.
+==============		=================================
+
+::
+
+    int lapb_disconnect_request(void *token);
+
+Initiate a disconnect. The valid return values are:
+
+=================	===============================
+LAPB_OK			LAPB is starting to disconnect.
+LAPB_BADTOKEN		Invalid/unknown LAPB token.
+LAPB_NOTCONNECTED	LAPB module is not connected.
+=================	===============================
+
+::
+
+    int lapb_data_request(void *token, struct sk_buff *skb);
+
+Queue data with the LAPB module for transmitting over the link. If the call
+is successful then the skbuff is owned by the LAPB module and may not be
+used by the device driver again. The valid return values are:
+
+=================	=============================
+LAPB_OK			LAPB has accepted the data.
+LAPB_BADTOKEN		Invalid/unknown LAPB token.
+LAPB_NOTCONNECTED	LAPB module is not connected.
+=================	=============================
+
+::
+
+    int lapb_data_received(void *token, struct sk_buff *skb);
+
+Queue data with the LAPB module which has been received from the device. It
+is expected that the data passed to the LAPB module has skb->data pointing
+to the beginning of the LAPB data. If the call is successful then the skbuff
+is owned by the LAPB module and may not be used by the device driver again.
+The valid return values are:
+
+=============		===========================
+LAPB_OK			LAPB has accepted the data.
+LAPB_BADTOKEN		Invalid/unknown LAPB token.
+=============		===========================
+
+Callbacks
+---------
+
+These callbacks are functions provided by the device driver for the LAPB
+module to call when an event occurs. They are registered with the LAPB
+module with lapb_register (see above) in the structure lapb_register_struct
+(see above).
+
+::
+
+    void (*connect_confirmation)(void *token, int reason);
+
+This is called by the LAPB module when a connection is established after
+being requested by a call to lapb_connect_request (see above). The reason is
+always LAPB_OK.
+
+::
+
+    void (*connect_indication)(void *token, int reason);
+
+This is called by the LAPB module when the link is established by the remote
+system. The value of reason is always LAPB_OK.
+
+::
+
+    void (*disconnect_confirmation)(void *token, int reason);
+
+This is called by the LAPB module when an event occurs after the device
+driver has called lapb_disconnect_request (see above). The reason indicates
+what has happened. In all cases the LAPB link can be regarded as being
+terminated. The values for reason are:
+
+=================	====================================================
+LAPB_OK			The LAPB link was terminated normally.
+LAPB_NOTCONNECTED	The remote system was not connected.
+LAPB_TIMEDOUT		No response was received in N2 tries from the remote
+			system.
+=================	====================================================
+
+::
+
+    void (*disconnect_indication)(void *token, int reason);
+
+This is called by the LAPB module when the link is terminated by the remote
+system or another event has occurred to terminate the link. This may be
+returned in response to a lapb_connect_request (see above) if the remote
+system refused the request. The values for reason are:
+
+=================	====================================================
+LAPB_OK			The LAPB link was terminated normally by the remote
+			system.
+LAPB_REFUSED		The remote system refused the connect request.
+LAPB_NOTCONNECTED	The remote system was not connected.
+LAPB_TIMEDOUT		No response was received in N2 tries from the remote
+			system.
+=================	====================================================
+
+::
+
+    int (*data_indication)(void *token, struct sk_buff *skb);
+
+This is called by the LAPB module when data has been received from the
+remote system that should be passed onto the next layer in the protocol
+stack. The skbuff becomes the property of the device driver and the LAPB
+module will not perform any more actions on it. The skb->data pointer will
+be pointing to the first byte of data after the LAPB header.
+
+This method should return NET_RX_DROP (as defined in the header
+file include/linux/netdevice.h) if and only if the frame was dropped
+before it could be delivered to the upper layer.
+
+::
+
+    void (*data_transmit)(void *token, struct sk_buff *skb);
+
+This is called by the LAPB module when data is to be transmitted to the
+remote system by the device driver. The skbuff becomes the property of the
+device driver and the LAPB module will not perform any more actions on it.
+The skb->data pointer will be pointing to the first byte of the LAPB header.
diff --git a/Documentation/networking/lapb-module.txt b/Documentation/networking/lapb-module.txt
deleted file mode 100644
index d4fc8f221559..000000000000
--- a/Documentation/networking/lapb-module.txt
+++ /dev/null
@@ -1,263 +0,0 @@
-		The Linux LAPB Module Interface 1.3
-
-		      Jonathan Naylor 29.12.96
-
-Changed (Henner Eisen, 2000-10-29): int return value for data_indication() 
-
-The LAPB module will be a separately compiled module for use by any parts of
-the Linux operating system that require a LAPB service. This document
-defines the interfaces to, and the services provided by this module. The
-term module in this context does not imply that the LAPB module is a
-separately loadable module, although it may be. The term module is used in
-its more standard meaning.
-
-The interface to the LAPB module consists of functions to the module,
-callbacks from the module to indicate important state changes, and
-structures for getting and setting information about the module.
-
-Structures
-----------
-
-Probably the most important structure is the skbuff structure for holding
-received and transmitted data, however it is beyond the scope of this
-document.
-
-The two LAPB specific structures are the LAPB initialisation structure and
-the LAPB parameter structure. These will be defined in a standard header
-file, <linux/lapb.h>. The header file <net/lapb.h> is internal to the LAPB
-module and is not for use.
-
-LAPB Initialisation Structure
------------------------------
-
-This structure is used only once, in the call to lapb_register (see below).
-It contains information about the device driver that requires the services
-of the LAPB module.
-
-struct lapb_register_struct {
-	void (*connect_confirmation)(int token, int reason);
-	void (*connect_indication)(int token, int reason);
-	void (*disconnect_confirmation)(int token, int reason);
-	void (*disconnect_indication)(int token, int reason);
-	int  (*data_indication)(int token, struct sk_buff *skb);
-	void (*data_transmit)(int token, struct sk_buff *skb);
-};
-
-Each member of this structure corresponds to a function in the device driver
-that is called when a particular event in the LAPB module occurs. These will
-be described in detail below. If a callback is not required (!!) then a NULL
-may be substituted.
-
-
-LAPB Parameter Structure
-------------------------
-
-This structure is used with the lapb_getparms and lapb_setparms functions
-(see below). They are used to allow the device driver to get and set the
-operational parameters of the LAPB implementation for a given connection.
-
-struct lapb_parms_struct {
-	unsigned int t1;
-	unsigned int t1timer;
-	unsigned int t2;
-	unsigned int t2timer;
-	unsigned int n2;
-	unsigned int n2count;
-	unsigned int window;
-	unsigned int state;
-	unsigned int mode;
-};
-
-T1 and T2 are protocol timing parameters and are given in units of 100ms. N2
-is the maximum number of tries on the link before it is declared a failure.
-The window size is the maximum number of outstanding data packets allowed to
-be unacknowledged by the remote end, the value of the window is between 1
-and 7 for a standard LAPB link, and between 1 and 127 for an extended LAPB
-link.
-
-The mode variable is a bit field used for setting (at present) three values.
-The bit fields have the following meanings:
-
-Bit	Meaning
-0	LAPB operation (0=LAPB_STANDARD 1=LAPB_EXTENDED).
-1	[SM]LP operation (0=LAPB_SLP 1=LAPB=MLP).
-2	DTE/DCE operation (0=LAPB_DTE 1=LAPB_DCE)
-3-31	Reserved, must be 0.
-
-Extended LAPB operation indicates the use of extended sequence numbers and
-consequently larger window sizes, the default is standard LAPB operation.
-MLP operation is the same as SLP operation except that the addresses used by
-LAPB are different to indicate the mode of operation, the default is Single
-Link Procedure. The difference between DCE and DTE operation is (i) the
-addresses used for commands and responses, and (ii) when the DCE is not
-connected, it sends DM without polls set, every T1. The upper case constant
-names will be defined in the public LAPB header file.
-
-
-Functions
----------
-
-The LAPB module provides a number of function entry points.
-
-
-int lapb_register(void *token, struct lapb_register_struct);
-
-This must be called before the LAPB module may be used. If the call is
-successful then LAPB_OK is returned. The token must be a unique identifier
-generated by the device driver to allow for the unique identification of the
-instance of the LAPB link. It is returned by the LAPB module in all of the
-callbacks, and is used by the device driver in all calls to the LAPB module.
-For multiple LAPB links in a single device driver, multiple calls to
-lapb_register must be made. The format of the lapb_register_struct is given
-above. The return values are:
-
-LAPB_OK			LAPB registered successfully.
-LAPB_BADTOKEN		Token is already registered.
-LAPB_NOMEM		Out of memory
-
-
-int lapb_unregister(void *token);
-
-This releases all the resources associated with a LAPB link. Any current
-LAPB link will be abandoned without further messages being passed. After
-this call, the value of token is no longer valid for any calls to the LAPB
-function. The valid return values are:
-
-LAPB_OK			LAPB unregistered successfully.
-LAPB_BADTOKEN		Invalid/unknown LAPB token.
-
-
-int lapb_getparms(void *token, struct lapb_parms_struct *parms);
-
-This allows the device driver to get the values of the current LAPB
-variables, the lapb_parms_struct is described above. The valid return values
-are:
-
-LAPB_OK			LAPB getparms was successful.
-LAPB_BADTOKEN		Invalid/unknown LAPB token.
-
-
-int lapb_setparms(void *token, struct lapb_parms_struct *parms);
-
-This allows the device driver to set the values of the current LAPB
-variables, the lapb_parms_struct is described above. The values of t1timer,
-t2timer and n2count are ignored, likewise changing the mode bits when
-connected will be ignored. An error implies that none of the values have
-been changed. The valid return values are:
-
-LAPB_OK			LAPB getparms was successful.
-LAPB_BADTOKEN		Invalid/unknown LAPB token.
-LAPB_INVALUE		One of the values was out of its allowable range.
-
-
-int lapb_connect_request(void *token);
-
-Initiate a connect using the current parameter settings. The valid return
-values are:
-
-LAPB_OK			LAPB is starting to connect.
-LAPB_BADTOKEN		Invalid/unknown LAPB token.
-LAPB_CONNECTED		LAPB module is already connected.
-
-
-int lapb_disconnect_request(void *token);
-
-Initiate a disconnect. The valid return values are:
-
-LAPB_OK			LAPB is starting to disconnect.
-LAPB_BADTOKEN		Invalid/unknown LAPB token.
-LAPB_NOTCONNECTED	LAPB module is not connected.
-
-
-int lapb_data_request(void *token, struct sk_buff *skb);
-
-Queue data with the LAPB module for transmitting over the link. If the call
-is successful then the skbuff is owned by the LAPB module and may not be
-used by the device driver again. The valid return values are:
-
-LAPB_OK			LAPB has accepted the data.
-LAPB_BADTOKEN		Invalid/unknown LAPB token.
-LAPB_NOTCONNECTED	LAPB module is not connected.
-
-
-int lapb_data_received(void *token, struct sk_buff *skb);
-
-Queue data with the LAPB module which has been received from the device. It
-is expected that the data passed to the LAPB module has skb->data pointing
-to the beginning of the LAPB data. If the call is successful then the skbuff
-is owned by the LAPB module and may not be used by the device driver again.
-The valid return values are:
-
-LAPB_OK			LAPB has accepted the data.
-LAPB_BADTOKEN		Invalid/unknown LAPB token.
-
-
-Callbacks
----------
-
-These callbacks are functions provided by the device driver for the LAPB
-module to call when an event occurs. They are registered with the LAPB
-module with lapb_register (see above) in the structure lapb_register_struct
-(see above).
-
-
-void (*connect_confirmation)(void *token, int reason);
-
-This is called by the LAPB module when a connection is established after
-being requested by a call to lapb_connect_request (see above). The reason is
-always LAPB_OK.
-
-
-void (*connect_indication)(void *token, int reason);
-
-This is called by the LAPB module when the link is established by the remote
-system. The value of reason is always LAPB_OK.
-
-
-void (*disconnect_confirmation)(void *token, int reason);
-
-This is called by the LAPB module when an event occurs after the device
-driver has called lapb_disconnect_request (see above). The reason indicates
-what has happened. In all cases the LAPB link can be regarded as being
-terminated. The values for reason are:
-
-LAPB_OK			The LAPB link was terminated normally.
-LAPB_NOTCONNECTED	The remote system was not connected.
-LAPB_TIMEDOUT		No response was received in N2 tries from the remote
-			system.
-
-
-void (*disconnect_indication)(void *token, int reason);
-
-This is called by the LAPB module when the link is terminated by the remote
-system or another event has occurred to terminate the link. This may be
-returned in response to a lapb_connect_request (see above) if the remote
-system refused the request. The values for reason are:
-
-LAPB_OK			The LAPB link was terminated normally by the remote
-			system.
-LAPB_REFUSED		The remote system refused the connect request.
-LAPB_NOTCONNECTED	The remote system was not connected.
-LAPB_TIMEDOUT		No response was received in N2 tries from the remote
-			system.
-
-
-int (*data_indication)(void *token, struct sk_buff *skb);
-
-This is called by the LAPB module when data has been received from the
-remote system that should be passed onto the next layer in the protocol
-stack. The skbuff becomes the property of the device driver and the LAPB
-module will not perform any more actions on it. The skb->data pointer will
-be pointing to the first byte of data after the LAPB header.
-
-This method should return NET_RX_DROP (as defined in the header
-file include/linux/netdevice.h) if and only if the frame was dropped
-before it could be delivered to the upper layer.
-
-
-void (*data_transmit)(void *token, struct sk_buff *skb);
-
-This is called by the LAPB module when data is to be transmitted to the
-remote system by the device driver. The skbuff becomes the property of the
-device driver and the LAPB module will not perform any more actions on it.
-The skb->data pointer will be pointing to the first byte of the LAPB header.
diff --git a/MAINTAINERS b/MAINTAINERS
index 3a5f52a3c055..956999d2d979 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -9515,7 +9515,7 @@ F:	drivers/soc/lantiq
 LAPB module
 L:	linux-x25@vger.kernel.org
 S:	Orphan
-F:	Documentation/networking/lapb-module.txt
+F:	Documentation/networking/lapb-module.rst
 F:	include/*/lapb.h
 F:	net/lapb/
 
diff --git a/net/lapb/Kconfig b/net/lapb/Kconfig
index 6acfc999c952..5b50e8d64f26 100644
--- a/net/lapb/Kconfig
+++ b/net/lapb/Kconfig
@@ -15,7 +15,7 @@ config LAPB
 	  currently supports LAPB only over Ethernet connections. If you want
 	  to use LAPB connections over Ethernet, say Y here and to "LAPB over
 	  Ethernet driver" below. Read
-	  <file:Documentation/networking/lapb-module.txt> for technical
+	  <file:Documentation/networking/lapb-module.rst> for technical
 	  details.
 
 	  To compile this driver as a module, choose M here: the
-- 
cgit v1.2.3-59-g8ed1b


From a6b93e6555a6ecd0d08b0383ea4d93d09a168187 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Thu, 30 Apr 2020 18:03:58 +0200
Subject: docs: networking: convert ltpc.txt to ReST

- add SPDX header;
- add a document title;
- mark code blocks and literals as such;
- mark tables as such;
- adjust identation, whitespaces and blank lines;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst |   1 +
 Documentation/networking/ltpc.rst  | 144 +++++++++++++++++++++++++++++++++++++
 Documentation/networking/ltpc.txt  | 131 ---------------------------------
 drivers/net/appletalk/Kconfig      |   2 +-
 4 files changed, 146 insertions(+), 132 deletions(-)
 create mode 100644 Documentation/networking/ltpc.rst
 delete mode 100644 Documentation/networking/ltpc.txt

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index acd2567cf0d4..b3608b177a8b 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -76,6 +76,7 @@ Contents:
    kcm
    l2tp
    lapb-module
+   ltpc
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/ltpc.rst b/Documentation/networking/ltpc.rst
new file mode 100644
index 000000000000..0ad197fd17ce
--- /dev/null
+++ b/Documentation/networking/ltpc.rst
@@ -0,0 +1,144 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========
+LTPC Driver
+===========
+
+This is the ALPHA version of the ltpc driver.
+
+In order to use it, you will need at least version 1.3.3 of the
+netatalk package, and the Apple or Farallon LocalTalk PC card.
+There are a number of different LocalTalk cards for the PC; this
+driver applies only to the one with the 65c02 processor chip on it.
+
+To include it in the kernel, select the CONFIG_LTPC switch in the
+configuration dialog.  You can also compile it as a module.
+
+While the driver will attempt to autoprobe the I/O port address, IRQ
+line, and DMA channel of the card, this does not always work.  For
+this reason, you should be prepared to supply these parameters
+yourself.  (see "Card Configuration" below for how to determine or
+change the settings on your card)
+
+When the driver is compiled into the kernel, you can add a line such
+as the following to your /etc/lilo.conf::
+
+ append="ltpc=0x240,9,1"
+
+where the parameters (in order) are the port address, IRQ, and DMA
+channel.  The second and third values can be omitted, in which case
+the driver will try to determine them itself.
+
+If you load the driver as a module, you can pass the parameters "io=",
+"irq=", and "dma=" on the command line with insmod or modprobe, or add
+them as options in a configuration file in /etc/modprobe.d/ directory::
+
+ alias lt0 ltpc # autoload the module when the interface is configured
+ options ltpc io=0x240 irq=9 dma=1
+
+Before starting up the netatalk demons (perhaps in rc.local), you
+need to add a line such as::
+
+ /sbin/ifconfig lt0 127.0.0.42
+
+The address is unimportant - however, the card needs to be configured
+with ifconfig so that Netatalk can find it.
+
+The appropriate netatalk configuration depends on whether you are
+attached to a network that includes AppleTalk routers or not.  If,
+like me, you are simply connecting to your home Macintoshes and
+printers, you need to set up netatalk to "seed".  The way I do this
+is to have the lines::
+
+ dummy -seed -phase 2 -net 2000 -addr 2000.26 -zone "1033"
+ lt0 -seed -phase 1 -net 1033 -addr 1033.27 -zone "1033"
+
+in my atalkd.conf.  What is going on here is that I need to fool
+netatalk into thinking that there are two AppleTalk interfaces
+present; otherwise, it refuses to seed.  This is a hack, and a more
+permanent solution would be to alter the netatalk code.  Also, make
+sure you have the correct name for the dummy interface - If it's
+compiled as a module, you will need to refer to it as "dummy0" or some
+such.
+
+If you are attached to an extended AppleTalk network, with routers on
+it, then you don't need to fool around with this -- the appropriate
+line in atalkd.conf is::
+
+ lt0 -phase 1
+
+
+Card Configuration
+==================
+
+The interrupts and so forth are configured via the dipswitch on the
+board.  Set the switches so as not to conflict with other hardware.
+
+       Interrupts -- set at most one.  If none are set, the driver uses
+       polled mode.  Because the card was developed in the XT era, the
+       original documentation refers to IRQ2.  Since you'll be running
+       this on an AT (or later) class machine, that really means IRQ9.
+
+       ===     ===========================================================
+       SW1     IRQ 4
+       SW2     IRQ 3
+       SW3     IRQ 9 (2 in original card documentation only applies to XT)
+       ===     ===========================================================
+
+
+       DMA -- choose DMA 1 or 3, and set both corresponding switches.
+
+       ===     =====
+       SW4     DMA 3
+       SW5     DMA 1
+       SW6     DMA 3
+       SW7     DMA 1
+       ===     =====
+
+
+       I/O address -- choose one.
+
+       ===     =========
+       SW8     220 / 240
+       ===     =========
+
+
+IP
+==
+
+Yes, it is possible to do IP over LocalTalk.  However, you can't just
+treat the LocalTalk device like an ordinary Ethernet device, even if
+that's what it looks like to Netatalk.
+
+Instead, you follow the same procedure as for doing IP in EtherTalk.
+See Documentation/networking/ipddp.rst for more information about the
+kernel driver and userspace tools needed.
+
+
+Bugs
+====
+
+IRQ autoprobing often doesn't work on a cold boot.  To get around
+this, either compile the driver as a module, or pass the parameters
+for the card to the kernel as described above.
+
+Also, as usual, autoprobing is not recommended when you use the driver
+as a module. (though it usually works at boot time, at least)
+
+Polled mode is *really* slow sometimes, but this seems to depend on
+the configuration of the network.
+
+It may theoretically be possible to use two LTPC cards in the same
+machine, but this is unsupported, so if you really want to do this,
+you'll probably have to hack the initialization code a bit.
+
+
+Thanks
+======
+
+Thanks to Alan Cox for helpful discussions early on in this
+work, and to Denis Hainsworth for doing the bleeding-edge testing.
+
+Bradford Johnson <bradford@math.umn.edu>
+
+Updated 11/09/1998 by David Huggins-Daines <dhd@debian.org>
diff --git a/Documentation/networking/ltpc.txt b/Documentation/networking/ltpc.txt
deleted file mode 100644
index a005a73b76d0..000000000000
--- a/Documentation/networking/ltpc.txt
+++ /dev/null
@@ -1,131 +0,0 @@
-This is the ALPHA version of the ltpc driver.
-
-In order to use it, you will need at least version 1.3.3 of the
-netatalk package, and the Apple or Farallon LocalTalk PC card.
-There are a number of different LocalTalk cards for the PC; this
-driver applies only to the one with the 65c02 processor chip on it.
-
-To include it in the kernel, select the CONFIG_LTPC switch in the
-configuration dialog.  You can also compile it as a module.
-
-While the driver will attempt to autoprobe the I/O port address, IRQ
-line, and DMA channel of the card, this does not always work.  For
-this reason, you should be prepared to supply these parameters
-yourself.  (see "Card Configuration" below for how to determine or
-change the settings on your card)
-
-When the driver is compiled into the kernel, you can add a line such
-as the following to your /etc/lilo.conf:
-
- append="ltpc=0x240,9,1"
-
-where the parameters (in order) are the port address, IRQ, and DMA
-channel.  The second and third values can be omitted, in which case
-the driver will try to determine them itself.
-
-If you load the driver as a module, you can pass the parameters "io=",
-"irq=", and "dma=" on the command line with insmod or modprobe, or add
-them as options in a configuration file in /etc/modprobe.d/ directory:
-
- alias lt0 ltpc # autoload the module when the interface is configured
- options ltpc io=0x240 irq=9 dma=1
-
-Before starting up the netatalk demons (perhaps in rc.local), you
-need to add a line such as:
-
- /sbin/ifconfig lt0 127.0.0.42
-
-The address is unimportant - however, the card needs to be configured
-with ifconfig so that Netatalk can find it.
-
-The appropriate netatalk configuration depends on whether you are
-attached to a network that includes AppleTalk routers or not.  If,
-like me, you are simply connecting to your home Macintoshes and
-printers, you need to set up netatalk to "seed".  The way I do this
-is to have the lines
-
- dummy -seed -phase 2 -net 2000 -addr 2000.26 -zone "1033"
- lt0 -seed -phase 1 -net 1033 -addr 1033.27 -zone "1033"
-
-in my atalkd.conf.  What is going on here is that I need to fool
-netatalk into thinking that there are two AppleTalk interfaces
-present; otherwise, it refuses to seed.  This is a hack, and a more
-permanent solution would be to alter the netatalk code.  Also, make
-sure you have the correct name for the dummy interface - If it's
-compiled as a module, you will need to refer to it as "dummy0" or some
-such.
-
-If you are attached to an extended AppleTalk network, with routers on
-it, then you don't need to fool around with this -- the appropriate
-line in atalkd.conf is
-
- lt0 -phase 1
-
---------------------------------------
-
-Card Configuration:
-
-The interrupts and so forth are configured via the dipswitch on the
-board.  Set the switches so as not to conflict with other hardware.
-
-       Interrupts -- set at most one.  If none are set, the driver uses
-       polled mode.  Because the card was developed in the XT era, the
-       original documentation refers to IRQ2.  Since you'll be running
-       this on an AT (or later) class machine, that really means IRQ9.
-
-       SW1     IRQ 4
-       SW2     IRQ 3
-       SW3     IRQ 9 (2 in original card documentation only applies to XT)
-
-
-       DMA -- choose DMA 1 or 3, and set both corresponding switches.
-
-       SW4     DMA 3
-       SW5     DMA 1
-       SW6     DMA 3
-       SW7     DMA 1
-
-
-       I/O address -- choose one.
-
-       SW8     220 / 240
-
---------------------------------------
-
-IP:
-
-Yes, it is possible to do IP over LocalTalk.  However, you can't just
-treat the LocalTalk device like an ordinary Ethernet device, even if
-that's what it looks like to Netatalk.
-
-Instead, you follow the same procedure as for doing IP in EtherTalk.
-See Documentation/networking/ipddp.rst for more information about the
-kernel driver and userspace tools needed.
-
---------------------------------------
-
-BUGS:
-
-IRQ autoprobing often doesn't work on a cold boot.  To get around
-this, either compile the driver as a module, or pass the parameters
-for the card to the kernel as described above.
-
-Also, as usual, autoprobing is not recommended when you use the driver
-as a module. (though it usually works at boot time, at least)
-
-Polled mode is *really* slow sometimes, but this seems to depend on
-the configuration of the network.
-
-It may theoretically be possible to use two LTPC cards in the same
-machine, but this is unsupported, so if you really want to do this,
-you'll probably have to hack the initialization code a bit.
-
-______________________________________
-
-THANKS:
-	Thanks to Alan Cox for helpful discussions early on in this
-work, and to Denis Hainsworth for doing the bleeding-edge testing.
-
--- Bradford Johnson <bradford@math.umn.edu>
-
--- Updated 11/09/1998 by David Huggins-Daines <dhd@debian.org>
diff --git a/drivers/net/appletalk/Kconfig b/drivers/net/appletalk/Kconfig
index ccde6479050c..10589a82263b 100644
--- a/drivers/net/appletalk/Kconfig
+++ b/drivers/net/appletalk/Kconfig
@@ -48,7 +48,7 @@ config LTPC
 	  If you are in doubt, this card is the one with the 65C02 chip on it.
 	  You also need version 1.3.3 or later of the netatalk package.
 	  This driver is experimental, which means that it may not work.
-	  See the file <file:Documentation/networking/ltpc.txt>.
+	  See the file <file:Documentation/networking/ltpc.rst>.
 
 config COPS
 	tristate "COPS LocalTalk PC support"
-- 
cgit v1.2.3-59-g8ed1b


From 429ff87bcac75b929d9ffec8d4d24be2616f8052 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Thu, 30 Apr 2020 18:03:59 +0200
Subject: docs: networking: convert mac80211-injection.txt to ReST

- add SPDX header;
- adjust title markup;
- mark code blocks and literals as such;
- mark tables as such;
- adjust identation, whitespaces and blank lines;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst              |   1 +
 Documentation/networking/mac80211-injection.rst | 106 ++++++++++++++++++++++++
 Documentation/networking/mac80211-injection.txt |  97 ----------------------
 MAINTAINERS                                     |   2 +-
 net/mac80211/tx.c                               |   2 +-
 5 files changed, 109 insertions(+), 99 deletions(-)
 create mode 100644 Documentation/networking/mac80211-injection.rst
 delete mode 100644 Documentation/networking/mac80211-injection.txt

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index b3608b177a8b..81c1834bfb57 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -77,6 +77,7 @@ Contents:
    l2tp
    lapb-module
    ltpc
+   mac80211-injection
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/mac80211-injection.rst b/Documentation/networking/mac80211-injection.rst
new file mode 100644
index 000000000000..75d4edcae852
--- /dev/null
+++ b/Documentation/networking/mac80211-injection.rst
@@ -0,0 +1,106 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========================================
+How to use packet injection with mac80211
+=========================================
+
+mac80211 now allows arbitrary packets to be injected down any Monitor Mode
+interface from userland.  The packet you inject needs to be composed in the
+following format::
+
+ [ radiotap header  ]
+ [ ieee80211 header ]
+ [ payload ]
+
+The radiotap format is discussed in
+./Documentation/networking/radiotap-headers.txt.
+
+Despite many radiotap parameters being currently defined, most only make sense
+to appear on received packets.  The following information is parsed from the
+radiotap headers and used to control injection:
+
+ * IEEE80211_RADIOTAP_FLAGS
+
+   =========================  ===========================================
+   IEEE80211_RADIOTAP_F_FCS   FCS will be removed and recalculated
+   IEEE80211_RADIOTAP_F_WEP   frame will be encrypted if key available
+   IEEE80211_RADIOTAP_F_FRAG  frame will be fragmented if longer than the
+			      current fragmentation threshold.
+   =========================  ===========================================
+
+ * IEEE80211_RADIOTAP_TX_FLAGS
+
+   =============================  ========================================
+   IEEE80211_RADIOTAP_F_TX_NOACK  frame should be sent without waiting for
+				  an ACK even if it is a unicast frame
+   =============================  ========================================
+
+ * IEEE80211_RADIOTAP_RATE
+
+   legacy rate for the transmission (only for devices without own rate control)
+
+ * IEEE80211_RADIOTAP_MCS
+
+   HT rate for the transmission (only for devices without own rate control).
+   Also some flags are parsed
+
+   ============================  ========================
+   IEEE80211_RADIOTAP_MCS_SGI    use short guard interval
+   IEEE80211_RADIOTAP_MCS_BW_40  send in HT40 mode
+   ============================  ========================
+
+ * IEEE80211_RADIOTAP_DATA_RETRIES
+
+   number of retries when either IEEE80211_RADIOTAP_RATE or
+   IEEE80211_RADIOTAP_MCS was used
+
+ * IEEE80211_RADIOTAP_VHT
+
+   VHT mcs and number of streams used in the transmission (only for devices
+   without own rate control). Also other fields are parsed
+
+   flags field
+	IEEE80211_RADIOTAP_VHT_FLAG_SGI: use short guard interval
+
+   bandwidth field
+	* 1: send using 40MHz channel width
+	* 4: send using 80MHz channel width
+	* 11: send using 160MHz channel width
+
+The injection code can also skip all other currently defined radiotap fields
+facilitating replay of captured radiotap headers directly.
+
+Here is an example valid radiotap header defining some parameters::
+
+	0x00, 0x00, // <-- radiotap version
+	0x0b, 0x00, // <- radiotap header length
+	0x04, 0x0c, 0x00, 0x00, // <-- bitmap
+	0x6c, // <-- rate
+	0x0c, //<-- tx power
+	0x01 //<-- antenna
+
+The ieee80211 header follows immediately afterwards, looking for example like
+this::
+
+	0x08, 0x01, 0x00, 0x00,
+	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+	0x13, 0x22, 0x33, 0x44, 0x55, 0x66,
+	0x13, 0x22, 0x33, 0x44, 0x55, 0x66,
+	0x10, 0x86
+
+Then lastly there is the payload.
+
+After composing the packet contents, it is sent by send()-ing it to a logical
+mac80211 interface that is in Monitor mode.  Libpcap can also be used,
+(which is easier than doing the work to bind the socket to the right
+interface), along the following lines:::
+
+	ppcap = pcap_open_live(szInterfaceName, 800, 1, 20, szErrbuf);
+	...
+	r = pcap_inject(ppcap, u8aSendBuffer, nLength);
+
+You can also find a link to a complete inject application here:
+
+http://wireless.kernel.org/en/users/Documentation/packetspammer
+
+Andy Green <andy@warmcat.com>
diff --git a/Documentation/networking/mac80211-injection.txt b/Documentation/networking/mac80211-injection.txt
deleted file mode 100644
index d58d78df9ca2..000000000000
--- a/Documentation/networking/mac80211-injection.txt
+++ /dev/null
@@ -1,97 +0,0 @@
-How to use packet injection with mac80211
-=========================================
-
-mac80211 now allows arbitrary packets to be injected down any Monitor Mode
-interface from userland.  The packet you inject needs to be composed in the
-following format:
-
- [ radiotap header  ]
- [ ieee80211 header ]
- [ payload ]
-
-The radiotap format is discussed in
-./Documentation/networking/radiotap-headers.txt.
-
-Despite many radiotap parameters being currently defined, most only make sense
-to appear on received packets.  The following information is parsed from the
-radiotap headers and used to control injection:
-
- * IEEE80211_RADIOTAP_FLAGS
-
-   IEEE80211_RADIOTAP_F_FCS: FCS will be removed and recalculated
-   IEEE80211_RADIOTAP_F_WEP: frame will be encrypted if key available
-   IEEE80211_RADIOTAP_F_FRAG: frame will be fragmented if longer than the
-			      current fragmentation threshold.
-
- * IEEE80211_RADIOTAP_TX_FLAGS
-
-   IEEE80211_RADIOTAP_F_TX_NOACK: frame should be sent without waiting for
-				  an ACK even if it is a unicast frame
-
- * IEEE80211_RADIOTAP_RATE
-
-   legacy rate for the transmission (only for devices without own rate control)
-
- * IEEE80211_RADIOTAP_MCS
-
-   HT rate for the transmission (only for devices without own rate control).
-   Also some flags are parsed
-
-   IEEE80211_RADIOTAP_MCS_SGI: use short guard interval
-   IEEE80211_RADIOTAP_MCS_BW_40: send in HT40 mode
-
- * IEEE80211_RADIOTAP_DATA_RETRIES
-
-   number of retries when either IEEE80211_RADIOTAP_RATE or
-   IEEE80211_RADIOTAP_MCS was used
-
- * IEEE80211_RADIOTAP_VHT
-
-   VHT mcs and number of streams used in the transmission (only for devices
-   without own rate control). Also other fields are parsed
-
-   flags field
-   IEEE80211_RADIOTAP_VHT_FLAG_SGI: use short guard interval
-
-   bandwidth field
-   1: send using 40MHz channel width
-   4: send using 80MHz channel width
-   11: send using 160MHz channel width
-
-The injection code can also skip all other currently defined radiotap fields
-facilitating replay of captured radiotap headers directly.
-
-Here is an example valid radiotap header defining some parameters
-
-	0x00, 0x00, // <-- radiotap version
-	0x0b, 0x00, // <- radiotap header length
-	0x04, 0x0c, 0x00, 0x00, // <-- bitmap
-	0x6c, // <-- rate
-	0x0c, //<-- tx power
-	0x01 //<-- antenna
-
-The ieee80211 header follows immediately afterwards, looking for example like
-this:
-
-	0x08, 0x01, 0x00, 0x00,
-	0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
-	0x13, 0x22, 0x33, 0x44, 0x55, 0x66,
-	0x13, 0x22, 0x33, 0x44, 0x55, 0x66,
-	0x10, 0x86
-
-Then lastly there is the payload.
-
-After composing the packet contents, it is sent by send()-ing it to a logical
-mac80211 interface that is in Monitor mode.  Libpcap can also be used,
-(which is easier than doing the work to bind the socket to the right
-interface), along the following lines:
-
-	ppcap = pcap_open_live(szInterfaceName, 800, 1, 20, szErrbuf);
-...
-	r = pcap_inject(ppcap, u8aSendBuffer, nLength);
-
-You can also find a link to a complete inject application here:
-
-http://wireless.kernel.org/en/users/Documentation/packetspammer
-
-Andy Green <andy@warmcat.com>
diff --git a/MAINTAINERS b/MAINTAINERS
index 956999d2d979..33bfc9e4aead 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -10079,7 +10079,7 @@ S:	Maintained
 W:	https://wireless.wiki.kernel.org/
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211.git
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211-next.git
-F:	Documentation/networking/mac80211-injection.txt
+F:	Documentation/networking/mac80211-injection.rst
 F:	Documentation/networking/mac80211_hwsim/mac80211_hwsim.rst
 F:	drivers/net/wireless/mac80211_hwsim.[ch]
 F:	include/net/mac80211.h
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 82846aca86d9..9849c14694db 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2144,7 +2144,7 @@ static bool ieee80211_parse_tx_radiotap(struct ieee80211_local *local,
 
 		/*
 		 * Please update the file
-		 * Documentation/networking/mac80211-injection.txt
+		 * Documentation/networking/mac80211-injection.rst
 		 * when parsing new fields here.
 		 */
 
-- 
cgit v1.2.3-59-g8ed1b


From e14fd64dcda5668c5dd5e59421fc5c61ce6d5951 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Thu, 30 Apr 2020 18:04:00 +0200
Subject: docs: networking: convert mpls-sysctl.txt to ReST

- add SPDX header;
- add a document title;
- mark lists as such;
- adjust identation, whitespaces and blank lines;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst       |  1 +
 Documentation/networking/mpls-sysctl.rst | 57 ++++++++++++++++++++++++++++++++
 Documentation/networking/mpls-sysctl.txt | 48 ---------------------------
 3 files changed, 58 insertions(+), 48 deletions(-)
 create mode 100644 Documentation/networking/mpls-sysctl.rst
 delete mode 100644 Documentation/networking/mpls-sysctl.txt

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 81c1834bfb57..a751cda83c3d 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -78,6 +78,7 @@ Contents:
    lapb-module
    ltpc
    mac80211-injection
+   mpls-sysctl
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/mpls-sysctl.rst b/Documentation/networking/mpls-sysctl.rst
new file mode 100644
index 000000000000..0a2ac88404d7
--- /dev/null
+++ b/Documentation/networking/mpls-sysctl.rst
@@ -0,0 +1,57 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====================
+MPLS Sysfs variables
+====================
+
+/proc/sys/net/mpls/* Variables:
+===============================
+
+platform_labels - INTEGER
+	Number of entries in the platform label table.  It is not
+	possible to configure forwarding for label values equal to or
+	greater than the number of platform labels.
+
+	A dense utilization of the entries in the platform label table
+	is possible and expected as the platform labels are locally
+	allocated.
+
+	If the number of platform label table entries is set to 0 no
+	label will be recognized by the kernel and mpls forwarding
+	will be disabled.
+
+	Reducing this value will remove all label routing entries that
+	no longer fit in the table.
+
+	Possible values: 0 - 1048575
+
+	Default: 0
+
+ip_ttl_propagate - BOOL
+	Control whether TTL is propagated from the IPv4/IPv6 header to
+	the MPLS header on imposing labels and propagated from the
+	MPLS header to the IPv4/IPv6 header on popping the last label.
+
+	If disabled, the MPLS transport network will appear as a
+	single hop to transit traffic.
+
+	* 0 - disabled / RFC 3443 [Short] Pipe Model
+	* 1 - enabled / RFC 3443 Uniform Model (default)
+
+default_ttl - INTEGER
+	Default TTL value to use for MPLS packets where it cannot be
+	propagated from an IP header, either because one isn't present
+	or ip_ttl_propagate has been disabled.
+
+	Possible values: 1 - 255
+
+	Default: 255
+
+conf/<interface>/input - BOOL
+	Control whether packets can be input on this interface.
+
+	If disabled, packets will be discarded without further
+	processing.
+
+	* 0 - disabled (default)
+	* not 0 - enabled
diff --git a/Documentation/networking/mpls-sysctl.txt b/Documentation/networking/mpls-sysctl.txt
deleted file mode 100644
index 025cc9b96992..000000000000
--- a/Documentation/networking/mpls-sysctl.txt
+++ /dev/null
@@ -1,48 +0,0 @@
-/proc/sys/net/mpls/* Variables:
-
-platform_labels - INTEGER
-	Number of entries in the platform label table.  It is not
-	possible to configure forwarding for label values equal to or
-	greater than the number of platform labels.
-
-	A dense utilization of the entries in the platform label table
-	is possible and expected as the platform labels are locally
-	allocated.
-
-	If the number of platform label table entries is set to 0 no
-	label will be recognized by the kernel and mpls forwarding
-	will be disabled.
-
-	Reducing this value will remove all label routing entries that
-	no longer fit in the table.
-
-	Possible values: 0 - 1048575
-	Default: 0
-
-ip_ttl_propagate - BOOL
-	Control whether TTL is propagated from the IPv4/IPv6 header to
-	the MPLS header on imposing labels and propagated from the
-	MPLS header to the IPv4/IPv6 header on popping the last label.
-
-	If disabled, the MPLS transport network will appear as a
-	single hop to transit traffic.
-
-	0 - disabled / RFC 3443 [Short] Pipe Model
-	1 - enabled / RFC 3443 Uniform Model (default)
-
-default_ttl - INTEGER
-	Default TTL value to use for MPLS packets where it cannot be
-	propagated from an IP header, either because one isn't present
-	or ip_ttl_propagate has been disabled.
-
-	Possible values: 1 - 255
-	Default: 255
-
-conf/<interface>/input - BOOL
-	Control whether packets can be input on this interface.
-
-	If disabled, packets will be discarded without further
-	processing.
-
-	0 - disabled (default)
-	not 0 - enabled
-- 
cgit v1.2.3-59-g8ed1b


From e98aa68223e48164c559803d25484533dfd495ba Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Thu, 30 Apr 2020 18:04:01 +0200
Subject: docs: networking: convert multiqueue.txt to ReST

- add SPDX header;
- adjust titles and chapters, adding proper markups;
- mark code blocks and literals as such;
- use :field: markup;
- adjust identation, whitespaces and blank lines;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/bonding.rst    |  2 +-
 Documentation/networking/index.rst      |  1 +
 Documentation/networking/multiqueue.rst | 78 ++++++++++++++++++++++++++++++++
 Documentation/networking/multiqueue.txt | 79 ---------------------------------
 4 files changed, 80 insertions(+), 80 deletions(-)
 create mode 100644 Documentation/networking/multiqueue.rst
 delete mode 100644 Documentation/networking/multiqueue.txt

diff --git a/Documentation/networking/bonding.rst b/Documentation/networking/bonding.rst
index dd49f95d28d3..24168b0d16bd 100644
--- a/Documentation/networking/bonding.rst
+++ b/Documentation/networking/bonding.rst
@@ -1639,7 +1639,7 @@ can safely be sent over either interface.  Such configurations may be achieved
 using the traffic control utilities inherent in linux.
 
 By default the bonding driver is multiqueue aware and 16 queues are created
-when the driver initializes (see Documentation/networking/multiqueue.txt
+when the driver initializes (see Documentation/networking/multiqueue.rst
 for details).  If more or less queues are desired the module parameter
 tx_queues can be used to change this value.  There is no sysfs parameter
 available as the allocation is done at module init time.
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index a751cda83c3d..492658bf7c0d 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -79,6 +79,7 @@ Contents:
    ltpc
    mac80211-injection
    mpls-sysctl
+   multiqueue
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/multiqueue.rst b/Documentation/networking/multiqueue.rst
new file mode 100644
index 000000000000..0a576166e9dd
--- /dev/null
+++ b/Documentation/networking/multiqueue.rst
@@ -0,0 +1,78 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================================
+HOWTO for multiqueue network device support
+===========================================
+
+Section 1: Base driver requirements for implementing multiqueue support
+=======================================================================
+
+Intro: Kernel support for multiqueue devices
+---------------------------------------------------------
+
+Kernel support for multiqueue devices is always present.
+
+Base drivers are required to use the new alloc_etherdev_mq() or
+alloc_netdev_mq() functions to allocate the subqueues for the device.  The
+underlying kernel API will take care of the allocation and deallocation of
+the subqueue memory, as well as netdev configuration of where the queues
+exist in memory.
+
+The base driver will also need to manage the queues as it does the global
+netdev->queue_lock today.  Therefore base drivers should use the
+netif_{start|stop|wake}_subqueue() functions to manage each queue while the
+device is still operational.  netdev->queue_lock is still used when the device
+comes online or when it's completely shut down (unregister_netdev(), etc.).
+
+
+Section 2: Qdisc support for multiqueue devices
+===============================================
+
+Currently two qdiscs are optimized for multiqueue devices.  The first is the
+default pfifo_fast qdisc.  This qdisc supports one qdisc per hardware queue.
+A new round-robin qdisc, sch_multiq also supports multiple hardware queues. The
+qdisc is responsible for classifying the skb's and then directing the skb's to
+bands and queues based on the value in skb->queue_mapping.  Use this field in
+the base driver to determine which queue to send the skb to.
+
+sch_multiq has been added for hardware that wishes to avoid head-of-line
+blocking.  It will cycle though the bands and verify that the hardware queue
+associated with the band is not stopped prior to dequeuing a packet.
+
+On qdisc load, the number of bands is based on the number of queues on the
+hardware.  Once the association is made, any skb with skb->queue_mapping set,
+will be queued to the band associated with the hardware queue.
+
+
+Section 3: Brief howto using MULTIQ for multiqueue devices
+==========================================================
+
+The userspace command 'tc,' part of the iproute2 package, is used to configure
+qdiscs.  To add the MULTIQ qdisc to your network device, assuming the device
+is called eth0, run the following command::
+
+    # tc qdisc add dev eth0 root handle 1: multiq
+
+The qdisc will allocate the number of bands to equal the number of queues that
+the device reports, and bring the qdisc online.  Assuming eth0 has 4 Tx
+queues, the band mapping would look like::
+
+    band 0 => queue 0
+    band 1 => queue 1
+    band 2 => queue 2
+    band 3 => queue 3
+
+Traffic will begin flowing through each queue based on either the simple_tx_hash
+function or based on netdev->select_queue() if you have it defined.
+
+The behavior of tc filters remains the same.  However a new tc action,
+skbedit, has been added.  Assuming you wanted to route all traffic to a
+specific host, for example 192.168.0.3, through a specific queue you could use
+this action and establish a filter such as::
+
+    tc filter add dev eth0 parent 1: protocol ip prio 1 u32 \
+	    match ip dst 192.168.0.3 \
+	    action skbedit queue_mapping 3
+
+:Author: Alexander Duyck <alexander.h.duyck@intel.com>
+:Original Author: Peter P. Waskiewicz Jr. <peter.p.waskiewicz.jr@intel.com>
diff --git a/Documentation/networking/multiqueue.txt b/Documentation/networking/multiqueue.txt
deleted file mode 100644
index 4caa0e314cc2..000000000000
--- a/Documentation/networking/multiqueue.txt
+++ /dev/null
@@ -1,79 +0,0 @@
-
-		HOWTO for multiqueue network device support
-		===========================================
-
-Section 1: Base driver requirements for implementing multiqueue support
-
-Intro: Kernel support for multiqueue devices
----------------------------------------------------------
-
-Kernel support for multiqueue devices is always present.
-
-Section 1: Base driver requirements for implementing multiqueue support
------------------------------------------------------------------------
-
-Base drivers are required to use the new alloc_etherdev_mq() or
-alloc_netdev_mq() functions to allocate the subqueues for the device.  The
-underlying kernel API will take care of the allocation and deallocation of
-the subqueue memory, as well as netdev configuration of where the queues
-exist in memory.
-
-The base driver will also need to manage the queues as it does the global
-netdev->queue_lock today.  Therefore base drivers should use the
-netif_{start|stop|wake}_subqueue() functions to manage each queue while the
-device is still operational.  netdev->queue_lock is still used when the device
-comes online or when it's completely shut down (unregister_netdev(), etc.).
-
-
-Section 2: Qdisc support for multiqueue devices
-
------------------------------------------------
-
-Currently two qdiscs are optimized for multiqueue devices.  The first is the
-default pfifo_fast qdisc.  This qdisc supports one qdisc per hardware queue.
-A new round-robin qdisc, sch_multiq also supports multiple hardware queues. The
-qdisc is responsible for classifying the skb's and then directing the skb's to
-bands and queues based on the value in skb->queue_mapping.  Use this field in
-the base driver to determine which queue to send the skb to.
-
-sch_multiq has been added for hardware that wishes to avoid head-of-line
-blocking.  It will cycle though the bands and verify that the hardware queue
-associated with the band is not stopped prior to dequeuing a packet.
-
-On qdisc load, the number of bands is based on the number of queues on the
-hardware.  Once the association is made, any skb with skb->queue_mapping set,
-will be queued to the band associated with the hardware queue.
-
-
-Section 3: Brief howto using MULTIQ for multiqueue devices
----------------------------------------------------------------
-
-The userspace command 'tc,' part of the iproute2 package, is used to configure
-qdiscs.  To add the MULTIQ qdisc to your network device, assuming the device
-is called eth0, run the following command:
-
-# tc qdisc add dev eth0 root handle 1: multiq
-
-The qdisc will allocate the number of bands to equal the number of queues that
-the device reports, and bring the qdisc online.  Assuming eth0 has 4 Tx
-queues, the band mapping would look like:
-
-band 0 => queue 0
-band 1 => queue 1
-band 2 => queue 2
-band 3 => queue 3
-
-Traffic will begin flowing through each queue based on either the simple_tx_hash
-function or based on netdev->select_queue() if you have it defined.
-
-The behavior of tc filters remains the same.  However a new tc action,
-skbedit, has been added.  Assuming you wanted to route all traffic to a
-specific host, for example 192.168.0.3, through a specific queue you could use
-this action and establish a filter such as:
-
-tc filter add dev eth0 parent 1: protocol ip prio 1 u32 \
-	match ip dst 192.168.0.3 \
-	action skbedit queue_mapping 3
-
-Author: Alexander Duyck <alexander.h.duyck@intel.com>
-Original Author: Peter P. Waskiewicz Jr. <peter.p.waskiewicz.jr@intel.com>
-- 
cgit v1.2.3-59-g8ed1b


From d9d6ef25ecab3e10966981bc58d014576da74272 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Thu, 30 Apr 2020 18:04:02 +0200
Subject: docs: networking: convert netconsole.txt to ReST

- add SPDX header;
- add a document title;
- mark code blocks and literals as such;
- mark tables as such;
- add notes markups;
- adjust identation, whitespaces and blank lines;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/admin-guide/kernel-parameters.txt |   2 +-
 Documentation/admin-guide/serial-console.rst    |   2 +-
 Documentation/networking/index.rst              |   1 +
 Documentation/networking/netconsole.rst         | 239 ++++++++++++++++++++++++
 Documentation/networking/netconsole.txt         | 210 ---------------------
 drivers/net/Kconfig                             |   4 +-
 drivers/net/ethernet/toshiba/ps3_gelic_net.c    |   2 +-
 drivers/net/ethernet/toshiba/spider_net.c       |   2 +-
 8 files changed, 246 insertions(+), 216 deletions(-)
 create mode 100644 Documentation/networking/netconsole.rst
 delete mode 100644 Documentation/networking/netconsole.txt

diff --git a/Documentation/admin-guide/kernel-parameters.txt b/Documentation/admin-guide/kernel-parameters.txt
index e43f2e1f2958..398c34804bb8 100644
--- a/Documentation/admin-guide/kernel-parameters.txt
+++ b/Documentation/admin-guide/kernel-parameters.txt
@@ -638,7 +638,7 @@
 
 			See Documentation/admin-guide/serial-console.rst for more
 			information.  See
-			Documentation/networking/netconsole.txt for an
+			Documentation/networking/netconsole.rst for an
 			alternative.
 
 		uart[8250],io,<addr>[,options]
diff --git a/Documentation/admin-guide/serial-console.rst b/Documentation/admin-guide/serial-console.rst
index a8d1e36b627a..58b32832e50a 100644
--- a/Documentation/admin-guide/serial-console.rst
+++ b/Documentation/admin-guide/serial-console.rst
@@ -54,7 +54,7 @@ You will need to create a new device to use ``/dev/console``. The official
 ``/dev/console`` is now character device 5,1.
 
 (You can also use a network device as a console.  See
-``Documentation/networking/netconsole.txt`` for information on that.)
+``Documentation/networking/netconsole.rst`` for information on that.)
 
 Here's an example that will use ``/dev/ttyS1`` (COM2) as the console.
 Replace the sample values as needed.
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 492658bf7c0d..e58f872d401d 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -80,6 +80,7 @@ Contents:
    mac80211-injection
    mpls-sysctl
    multiqueue
+   netconsole
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/netconsole.rst b/Documentation/networking/netconsole.rst
new file mode 100644
index 000000000000..1f5c4a04027c
--- /dev/null
+++ b/Documentation/networking/netconsole.rst
@@ -0,0 +1,239 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==========
+Netconsole
+==========
+
+
+started by Ingo Molnar <mingo@redhat.com>, 2001.09.17
+
+2.6 port and netpoll api by Matt Mackall <mpm@selenic.com>, Sep 9 2003
+
+IPv6 support by Cong Wang <xiyou.wangcong@gmail.com>, Jan 1 2013
+
+Extended console support by Tejun Heo <tj@kernel.org>, May 1 2015
+
+Please send bug reports to Matt Mackall <mpm@selenic.com>
+Satyam Sharma <satyam.sharma@gmail.com>, and Cong Wang <xiyou.wangcong@gmail.com>
+
+Introduction:
+=============
+
+This module logs kernel printk messages over UDP allowing debugging of
+problem where disk logging fails and serial consoles are impractical.
+
+It can be used either built-in or as a module. As a built-in,
+netconsole initializes immediately after NIC cards and will bring up
+the specified interface as soon as possible. While this doesn't allow
+capture of early kernel panics, it does capture most of the boot
+process.
+
+Sender and receiver configuration:
+==================================
+
+It takes a string configuration parameter "netconsole" in the
+following format::
+
+ netconsole=[+][src-port]@[src-ip]/[<dev>],[tgt-port]@<tgt-ip>/[tgt-macaddr]
+
+   where
+	+             if present, enable extended console support
+	src-port      source for UDP packets (defaults to 6665)
+	src-ip        source IP to use (interface address)
+	dev           network interface (eth0)
+	tgt-port      port for logging agent (6666)
+	tgt-ip        IP address for logging agent
+	tgt-macaddr   ethernet MAC address for logging agent (broadcast)
+
+Examples::
+
+ linux netconsole=4444@10.0.0.1/eth1,9353@10.0.0.2/12:34:56:78:9a:bc
+
+or::
+
+ insmod netconsole netconsole=@/,@10.0.0.2/
+
+or using IPv6::
+
+ insmod netconsole netconsole=@/,@fd00:1:2:3::1/
+
+It also supports logging to multiple remote agents by specifying
+parameters for the multiple agents separated by semicolons and the
+complete string enclosed in "quotes", thusly::
+
+ modprobe netconsole netconsole="@/,@10.0.0.2/;@/eth1,6892@10.0.0.3/"
+
+Built-in netconsole starts immediately after the TCP stack is
+initialized and attempts to bring up the supplied dev at the supplied
+address.
+
+The remote host has several options to receive the kernel messages,
+for example:
+
+1) syslogd
+
+2) netcat
+
+   On distributions using a BSD-based netcat version (e.g. Fedora,
+   openSUSE and Ubuntu) the listening port must be specified without
+   the -p switch::
+
+	nc -u -l -p <port>' / 'nc -u -l <port>
+
+    or::
+
+	netcat -u -l -p <port>' / 'netcat -u -l <port>
+
+3) socat
+
+::
+
+   socat udp-recv:<port> -
+
+Dynamic reconfiguration:
+========================
+
+Dynamic reconfigurability is a useful addition to netconsole that enables
+remote logging targets to be dynamically added, removed, or have their
+parameters reconfigured at runtime from a configfs-based userspace interface.
+[ Note that the parameters of netconsole targets that were specified/created
+from the boot/module option are not exposed via this interface, and hence
+cannot be modified dynamically. ]
+
+To include this feature, select CONFIG_NETCONSOLE_DYNAMIC when building the
+netconsole module (or kernel, if netconsole is built-in).
+
+Some examples follow (where configfs is mounted at the /sys/kernel/config
+mountpoint).
+
+To add a remote logging target (target names can be arbitrary)::
+
+ cd /sys/kernel/config/netconsole/
+ mkdir target1
+
+Note that newly created targets have default parameter values (as mentioned
+above) and are disabled by default -- they must first be enabled by writing
+"1" to the "enabled" attribute (usually after setting parameters accordingly)
+as described below.
+
+To remove a target::
+
+ rmdir /sys/kernel/config/netconsole/othertarget/
+
+The interface exposes these parameters of a netconsole target to userspace:
+
+	==============  =================================       ============
+	enabled		Is this target currently enabled?	(read-write)
+	extended	Extended mode enabled			(read-write)
+	dev_name	Local network interface name		(read-write)
+	local_port	Source UDP port to use			(read-write)
+	remote_port	Remote agent's UDP port			(read-write)
+	local_ip	Source IP address to use		(read-write)
+	remote_ip	Remote agent's IP address		(read-write)
+	local_mac	Local interface's MAC address		(read-only)
+	remote_mac	Remote agent's MAC address		(read-write)
+	==============  =================================       ============
+
+The "enabled" attribute is also used to control whether the parameters of
+a target can be updated or not -- you can modify the parameters of only
+disabled targets (i.e. if "enabled" is 0).
+
+To update a target's parameters::
+
+ cat enabled				# check if enabled is 1
+ echo 0 > enabled			# disable the target (if required)
+ echo eth2 > dev_name			# set local interface
+ echo 10.0.0.4 > remote_ip		# update some parameter
+ echo cb:a9:87:65:43:21 > remote_mac	# update more parameters
+ echo 1 > enabled			# enable target again
+
+You can also update the local interface dynamically. This is especially
+useful if you want to use interfaces that have newly come up (and may not
+have existed when netconsole was loaded / initialized).
+
+Extended console:
+=================
+
+If '+' is prefixed to the configuration line or "extended" config file
+is set to 1, extended console support is enabled. An example boot
+param follows::
+
+ linux netconsole=+4444@10.0.0.1/eth1,9353@10.0.0.2/12:34:56:78:9a:bc
+
+Log messages are transmitted with extended metadata header in the
+following format which is the same as /dev/kmsg::
+
+ <level>,<sequnum>,<timestamp>,<contflag>;<message text>
+
+Non printable characters in <message text> are escaped using "\xff"
+notation. If the message contains optional dictionary, verbatim
+newline is used as the delimeter.
+
+If a message doesn't fit in certain number of bytes (currently 1000),
+the message is split into multiple fragments by netconsole. These
+fragments are transmitted with "ncfrag" header field added::
+
+ ncfrag=<byte-offset>/<total-bytes>
+
+For example, assuming a lot smaller chunk size, a message "the first
+chunk, the 2nd chunk." may be split as follows::
+
+ 6,416,1758426,-,ncfrag=0/31;the first chunk,
+ 6,416,1758426,-,ncfrag=16/31; the 2nd chunk.
+
+Miscellaneous notes:
+====================
+
+.. Warning::
+
+   the default target ethernet setting uses the broadcast
+   ethernet address to send packets, which can cause increased load on
+   other systems on the same ethernet segment.
+
+.. Tip::
+
+   some LAN switches may be configured to suppress ethernet broadcasts
+   so it is advised to explicitly specify the remote agents' MAC addresses
+   from the config parameters passed to netconsole.
+
+.. Tip::
+
+   to find out the MAC address of, say, 10.0.0.2, you may try using::
+
+	ping -c 1 10.0.0.2 ; /sbin/arp -n | grep 10.0.0.2
+
+.. Tip::
+
+   in case the remote logging agent is on a separate LAN subnet than
+   the sender, it is suggested to try specifying the MAC address of the
+   default gateway (you may use /sbin/route -n to find it out) as the
+   remote MAC address instead.
+
+.. note::
+
+   the network device (eth1 in the above case) can run any kind
+   of other network traffic, netconsole is not intrusive. Netconsole
+   might cause slight delays in other traffic if the volume of kernel
+   messages is high, but should have no other impact.
+
+.. note::
+
+   if you find that the remote logging agent is not receiving or
+   printing all messages from the sender, it is likely that you have set
+   the "console_loglevel" parameter (on the sender) to only send high
+   priority messages to the console. You can change this at runtime using::
+
+	dmesg -n 8
+
+   or by specifying "debug" on the kernel command line at boot, to send
+   all kernel messages to the console. A specific value for this parameter
+   can also be set using the "loglevel" kernel boot option. See the
+   dmesg(8) man page and Documentation/admin-guide/kernel-parameters.rst
+   for details.
+
+Netconsole was designed to be as instantaneous as possible, to
+enable the logging of even the most critical kernel bugs. It works
+from IRQ contexts as well, and does not enable interrupts while
+sending packets. Due to these unique needs, configuration cannot
+be more automatic, and some fundamental limitations will remain:
+only IP networks, UDP packets and ethernet devices are supported.
diff --git a/Documentation/networking/netconsole.txt b/Documentation/networking/netconsole.txt
deleted file mode 100644
index 296ea00fd3eb..000000000000
--- a/Documentation/networking/netconsole.txt
+++ /dev/null
@@ -1,210 +0,0 @@
-
-started by Ingo Molnar <mingo@redhat.com>, 2001.09.17
-2.6 port and netpoll api by Matt Mackall <mpm@selenic.com>, Sep 9 2003
-IPv6 support by Cong Wang <xiyou.wangcong@gmail.com>, Jan 1 2013
-Extended console support by Tejun Heo <tj@kernel.org>, May 1 2015
-
-Please send bug reports to Matt Mackall <mpm@selenic.com>
-Satyam Sharma <satyam.sharma@gmail.com>, and Cong Wang <xiyou.wangcong@gmail.com>
-
-Introduction:
-=============
-
-This module logs kernel printk messages over UDP allowing debugging of
-problem where disk logging fails and serial consoles are impractical.
-
-It can be used either built-in or as a module. As a built-in,
-netconsole initializes immediately after NIC cards and will bring up
-the specified interface as soon as possible. While this doesn't allow
-capture of early kernel panics, it does capture most of the boot
-process.
-
-Sender and receiver configuration:
-==================================
-
-It takes a string configuration parameter "netconsole" in the
-following format:
-
- netconsole=[+][src-port]@[src-ip]/[<dev>],[tgt-port]@<tgt-ip>/[tgt-macaddr]
-
-   where
-        +             if present, enable extended console support
-        src-port      source for UDP packets (defaults to 6665)
-        src-ip        source IP to use (interface address)
-        dev           network interface (eth0)
-        tgt-port      port for logging agent (6666)
-        tgt-ip        IP address for logging agent
-        tgt-macaddr   ethernet MAC address for logging agent (broadcast)
-
-Examples:
-
- linux netconsole=4444@10.0.0.1/eth1,9353@10.0.0.2/12:34:56:78:9a:bc
-
-  or
-
- insmod netconsole netconsole=@/,@10.0.0.2/
-
-  or using IPv6
-
- insmod netconsole netconsole=@/,@fd00:1:2:3::1/
-
-It also supports logging to multiple remote agents by specifying
-parameters for the multiple agents separated by semicolons and the
-complete string enclosed in "quotes", thusly:
-
- modprobe netconsole netconsole="@/,@10.0.0.2/;@/eth1,6892@10.0.0.3/"
-
-Built-in netconsole starts immediately after the TCP stack is
-initialized and attempts to bring up the supplied dev at the supplied
-address.
-
-The remote host has several options to receive the kernel messages,
-for example:
-
-1) syslogd
-
-2) netcat
-
-   On distributions using a BSD-based netcat version (e.g. Fedora,
-   openSUSE and Ubuntu) the listening port must be specified without
-   the -p switch:
-
-   'nc -u -l -p <port>' / 'nc -u -l <port>' or
-   'netcat -u -l -p <port>' / 'netcat -u -l <port>'
-
-3) socat
-
-   'socat udp-recv:<port> -'
-
-Dynamic reconfiguration:
-========================
-
-Dynamic reconfigurability is a useful addition to netconsole that enables
-remote logging targets to be dynamically added, removed, or have their
-parameters reconfigured at runtime from a configfs-based userspace interface.
-[ Note that the parameters of netconsole targets that were specified/created
-from the boot/module option are not exposed via this interface, and hence
-cannot be modified dynamically. ]
-
-To include this feature, select CONFIG_NETCONSOLE_DYNAMIC when building the
-netconsole module (or kernel, if netconsole is built-in).
-
-Some examples follow (where configfs is mounted at the /sys/kernel/config
-mountpoint).
-
-To add a remote logging target (target names can be arbitrary):
-
- cd /sys/kernel/config/netconsole/
- mkdir target1
-
-Note that newly created targets have default parameter values (as mentioned
-above) and are disabled by default -- they must first be enabled by writing
-"1" to the "enabled" attribute (usually after setting parameters accordingly)
-as described below.
-
-To remove a target:
-
- rmdir /sys/kernel/config/netconsole/othertarget/
-
-The interface exposes these parameters of a netconsole target to userspace:
-
-	enabled		Is this target currently enabled?	(read-write)
-	extended	Extended mode enabled			(read-write)
-	dev_name	Local network interface name		(read-write)
-	local_port	Source UDP port to use			(read-write)
-	remote_port	Remote agent's UDP port			(read-write)
-	local_ip	Source IP address to use		(read-write)
-	remote_ip	Remote agent's IP address		(read-write)
-	local_mac	Local interface's MAC address		(read-only)
-	remote_mac	Remote agent's MAC address		(read-write)
-
-The "enabled" attribute is also used to control whether the parameters of
-a target can be updated or not -- you can modify the parameters of only
-disabled targets (i.e. if "enabled" is 0).
-
-To update a target's parameters:
-
- cat enabled				# check if enabled is 1
- echo 0 > enabled			# disable the target (if required)
- echo eth2 > dev_name			# set local interface
- echo 10.0.0.4 > remote_ip		# update some parameter
- echo cb:a9:87:65:43:21 > remote_mac	# update more parameters
- echo 1 > enabled			# enable target again
-
-You can also update the local interface dynamically. This is especially
-useful if you want to use interfaces that have newly come up (and may not
-have existed when netconsole was loaded / initialized).
-
-Extended console:
-=================
-
-If '+' is prefixed to the configuration line or "extended" config file
-is set to 1, extended console support is enabled. An example boot
-param follows.
-
- linux netconsole=+4444@10.0.0.1/eth1,9353@10.0.0.2/12:34:56:78:9a:bc
-
-Log messages are transmitted with extended metadata header in the
-following format which is the same as /dev/kmsg.
-
- <level>,<sequnum>,<timestamp>,<contflag>;<message text>
-
-Non printable characters in <message text> are escaped using "\xff"
-notation. If the message contains optional dictionary, verbatim
-newline is used as the delimeter.
-
-If a message doesn't fit in certain number of bytes (currently 1000),
-the message is split into multiple fragments by netconsole. These
-fragments are transmitted with "ncfrag" header field added.
-
- ncfrag=<byte-offset>/<total-bytes>
-
-For example, assuming a lot smaller chunk size, a message "the first
-chunk, the 2nd chunk." may be split as follows.
-
- 6,416,1758426,-,ncfrag=0/31;the first chunk,
- 6,416,1758426,-,ncfrag=16/31; the 2nd chunk.
-
-Miscellaneous notes:
-====================
-
-WARNING: the default target ethernet setting uses the broadcast
-ethernet address to send packets, which can cause increased load on
-other systems on the same ethernet segment.
-
-TIP: some LAN switches may be configured to suppress ethernet broadcasts
-so it is advised to explicitly specify the remote agents' MAC addresses
-from the config parameters passed to netconsole.
-
-TIP: to find out the MAC address of, say, 10.0.0.2, you may try using:
-
- ping -c 1 10.0.0.2 ; /sbin/arp -n | grep 10.0.0.2
-
-TIP: in case the remote logging agent is on a separate LAN subnet than
-the sender, it is suggested to try specifying the MAC address of the
-default gateway (you may use /sbin/route -n to find it out) as the
-remote MAC address instead.
-
-NOTE: the network device (eth1 in the above case) can run any kind
-of other network traffic, netconsole is not intrusive. Netconsole
-might cause slight delays in other traffic if the volume of kernel
-messages is high, but should have no other impact.
-
-NOTE: if you find that the remote logging agent is not receiving or
-printing all messages from the sender, it is likely that you have set
-the "console_loglevel" parameter (on the sender) to only send high
-priority messages to the console. You can change this at runtime using:
-
- dmesg -n 8
-
-or by specifying "debug" on the kernel command line at boot, to send
-all kernel messages to the console. A specific value for this parameter
-can also be set using the "loglevel" kernel boot option. See the
-dmesg(8) man page and Documentation/admin-guide/kernel-parameters.rst for details.
-
-Netconsole was designed to be as instantaneous as possible, to
-enable the logging of even the most critical kernel bugs. It works
-from IRQ contexts as well, and does not enable interrupts while
-sending packets. Due to these unique needs, configuration cannot
-be more automatic, and some fundamental limitations will remain:
-only IP networks, UDP packets and ethernet devices are supported.
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index c822f4a6d166..ad64be98330f 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -302,7 +302,7 @@ config NETCONSOLE
 	tristate "Network console logging support"
 	---help---
 	  If you want to log kernel messages over the network, enable this.
-	  See <file:Documentation/networking/netconsole.txt> for details.
+	  See <file:Documentation/networking/netconsole.rst> for details.
 
 config NETCONSOLE_DYNAMIC
 	bool "Dynamic reconfiguration of logging targets"
@@ -312,7 +312,7 @@ config NETCONSOLE_DYNAMIC
 	  This option enables the ability to dynamically reconfigure target
 	  parameters (interface, IP addresses, port numbers, MAC addresses)
 	  at runtime through a userspace interface exported using configfs.
-	  See <file:Documentation/networking/netconsole.txt> for details.
+	  See <file:Documentation/networking/netconsole.rst> for details.
 
 config NETPOLL
 	def_bool NETCONSOLE
diff --git a/drivers/net/ethernet/toshiba/ps3_gelic_net.c b/drivers/net/ethernet/toshiba/ps3_gelic_net.c
index 070dd6fa9401..310e6839c6e5 100644
--- a/drivers/net/ethernet/toshiba/ps3_gelic_net.c
+++ b/drivers/net/ethernet/toshiba/ps3_gelic_net.c
@@ -1150,7 +1150,7 @@ static irqreturn_t gelic_card_interrupt(int irq, void *ptr)
  * gelic_net_poll_controller - artificial interrupt for netconsole etc.
  * @netdev: interface device structure
  *
- * see Documentation/networking/netconsole.txt
+ * see Documentation/networking/netconsole.rst
  */
 void gelic_net_poll_controller(struct net_device *netdev)
 {
diff --git a/drivers/net/ethernet/toshiba/spider_net.c b/drivers/net/ethernet/toshiba/spider_net.c
index 6576271642c1..3902b3aeb0c2 100644
--- a/drivers/net/ethernet/toshiba/spider_net.c
+++ b/drivers/net/ethernet/toshiba/spider_net.c
@@ -1615,7 +1615,7 @@ spider_net_interrupt(int irq, void *ptr)
  * spider_net_poll_controller - artificial interrupt for netconsole etc.
  * @netdev: interface device structure
  *
- * see Documentation/networking/netconsole.txt
+ * see Documentation/networking/netconsole.rst
  */
 static void
 spider_net_poll_controller(struct net_device *netdev)
-- 
cgit v1.2.3-59-g8ed1b


From ea5bacaa2cec6967ed337f4d0ad6034123ca737b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Thu, 30 Apr 2020 18:04:03 +0200
Subject: docs: networking: convert netdev-features.txt to ReST

Not much to be done here:

- add SPDX header;
- adjust titles and chapters, adding proper markups;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/checksum-offloads.rst |   2 +-
 Documentation/networking/index.rst             |   1 +
 Documentation/networking/netdev-features.rst   | 184 +++++++++++++++++++++++++
 Documentation/networking/netdev-features.txt   | 181 ------------------------
 include/linux/netdev_features.h                |   2 +-
 5 files changed, 187 insertions(+), 183 deletions(-)
 create mode 100644 Documentation/networking/netdev-features.rst
 delete mode 100644 Documentation/networking/netdev-features.txt

diff --git a/Documentation/networking/checksum-offloads.rst b/Documentation/networking/checksum-offloads.rst
index 905c8a84b103..69b23cf6879e 100644
--- a/Documentation/networking/checksum-offloads.rst
+++ b/Documentation/networking/checksum-offloads.rst
@@ -59,7 +59,7 @@ recomputed for each resulting segment.  See the skbuff.h comment (section 'E')
 for more details.
 
 A driver declares its offload capabilities in netdev->hw_features; see
-Documentation/networking/netdev-features.txt for more.  Note that a device
+Documentation/networking/netdev-features.rst for more.  Note that a device
 which only advertises NETIF_F_IP[V6]_CSUM must still obey the csum_start and
 csum_offset given in the SKB; if it tries to deduce these itself in hardware
 (as some NICs do) the driver should check that the values in the SKB match
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index e58f872d401d..4c6aa3db97d4 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -81,6 +81,7 @@ Contents:
    mpls-sysctl
    multiqueue
    netconsole
+   netdev-features
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/netdev-features.rst b/Documentation/networking/netdev-features.rst
new file mode 100644
index 000000000000..a2d7d7160e39
--- /dev/null
+++ b/Documentation/networking/netdev-features.rst
@@ -0,0 +1,184 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================================================
+Netdev features mess and how to get out from it alive
+=====================================================
+
+Author:
+	Michał Mirosław <mirq-linux@rere.qmqm.pl>
+
+
+
+Part I: Feature sets
+====================
+
+Long gone are the days when a network card would just take and give packets
+verbatim.  Today's devices add multiple features and bugs (read: offloads)
+that relieve an OS of various tasks like generating and checking checksums,
+splitting packets, classifying them.  Those capabilities and their state
+are commonly referred to as netdev features in Linux kernel world.
+
+There are currently three sets of features relevant to the driver, and
+one used internally by network core:
+
+ 1. netdev->hw_features set contains features whose state may possibly
+    be changed (enabled or disabled) for a particular device by user's
+    request.  This set should be initialized in ndo_init callback and not
+    changed later.
+
+ 2. netdev->features set contains features which are currently enabled
+    for a device.  This should be changed only by network core or in
+    error paths of ndo_set_features callback.
+
+ 3. netdev->vlan_features set contains features whose state is inherited
+    by child VLAN devices (limits netdev->features set).  This is currently
+    used for all VLAN devices whether tags are stripped or inserted in
+    hardware or software.
+
+ 4. netdev->wanted_features set contains feature set requested by user.
+    This set is filtered by ndo_fix_features callback whenever it or
+    some device-specific conditions change. This set is internal to
+    networking core and should not be referenced in drivers.
+
+
+
+Part II: Controlling enabled features
+=====================================
+
+When current feature set (netdev->features) is to be changed, new set
+is calculated and filtered by calling ndo_fix_features callback
+and netdev_fix_features(). If the resulting set differs from current
+set, it is passed to ndo_set_features callback and (if the callback
+returns success) replaces value stored in netdev->features.
+NETDEV_FEAT_CHANGE notification is issued after that whenever current
+set might have changed.
+
+The following events trigger recalculation:
+ 1. device's registration, after ndo_init returned success
+ 2. user requested changes in features state
+ 3. netdev_update_features() is called
+
+ndo_*_features callbacks are called with rtnl_lock held. Missing callbacks
+are treated as always returning success.
+
+A driver that wants to trigger recalculation must do so by calling
+netdev_update_features() while holding rtnl_lock. This should not be done
+from ndo_*_features callbacks. netdev->features should not be modified by
+driver except by means of ndo_fix_features callback.
+
+
+
+Part III: Implementation hints
+==============================
+
+ * ndo_fix_features:
+
+All dependencies between features should be resolved here. The resulting
+set can be reduced further by networking core imposed limitations (as coded
+in netdev_fix_features()). For this reason it is safer to disable a feature
+when its dependencies are not met instead of forcing the dependency on.
+
+This callback should not modify hardware nor driver state (should be
+stateless).  It can be called multiple times between successive
+ndo_set_features calls.
+
+Callback must not alter features contained in NETIF_F_SOFT_FEATURES or
+NETIF_F_NEVER_CHANGE sets. The exception is NETIF_F_VLAN_CHALLENGED but
+care must be taken as the change won't affect already configured VLANs.
+
+ * ndo_set_features:
+
+Hardware should be reconfigured to match passed feature set. The set
+should not be altered unless some error condition happens that can't
+be reliably detected in ndo_fix_features. In this case, the callback
+should update netdev->features to match resulting hardware state.
+Errors returned are not (and cannot be) propagated anywhere except dmesg.
+(Note: successful return is zero, >0 means silent error.)
+
+
+
+Part IV: Features
+=================
+
+For current list of features, see include/linux/netdev_features.h.
+This section describes semantics of some of them.
+
+ * Transmit checksumming
+
+For complete description, see comments near the top of include/linux/skbuff.h.
+
+Note: NETIF_F_HW_CSUM is a superset of NETIF_F_IP_CSUM + NETIF_F_IPV6_CSUM.
+It means that device can fill TCP/UDP-like checksum anywhere in the packets
+whatever headers there might be.
+
+ * Transmit TCP segmentation offload
+
+NETIF_F_TSO_ECN means that hardware can properly split packets with CWR bit
+set, be it TCPv4 (when NETIF_F_TSO is enabled) or TCPv6 (NETIF_F_TSO6).
+
+ * Transmit UDP segmentation offload
+
+NETIF_F_GSO_UDP_L4 accepts a single UDP header with a payload that exceeds
+gso_size. On segmentation, it segments the payload on gso_size boundaries and
+replicates the network and UDP headers (fixing up the last one if less than
+gso_size).
+
+ * Transmit DMA from high memory
+
+On platforms where this is relevant, NETIF_F_HIGHDMA signals that
+ndo_start_xmit can handle skbs with frags in high memory.
+
+ * Transmit scatter-gather
+
+Those features say that ndo_start_xmit can handle fragmented skbs:
+NETIF_F_SG --- paged skbs (skb_shinfo()->frags), NETIF_F_FRAGLIST ---
+chained skbs (skb->next/prev list).
+
+ * Software features
+
+Features contained in NETIF_F_SOFT_FEATURES are features of networking
+stack. Driver should not change behaviour based on them.
+
+ * LLTX driver (deprecated for hardware drivers)
+
+NETIF_F_LLTX is meant to be used by drivers that don't need locking at all,
+e.g. software tunnels.
+
+This is also used in a few legacy drivers that implement their
+own locking, don't use it for new (hardware) drivers.
+
+ * netns-local device
+
+NETIF_F_NETNS_LOCAL is set for devices that are not allowed to move between
+network namespaces (e.g. loopback).
+
+Don't use it in drivers.
+
+ * VLAN challenged
+
+NETIF_F_VLAN_CHALLENGED should be set for devices which can't cope with VLAN
+headers. Some drivers set this because the cards can't handle the bigger MTU.
+[FIXME: Those cases could be fixed in VLAN code by allowing only reduced-MTU
+VLANs. This may be not useful, though.]
+
+*  rx-fcs
+
+This requests that the NIC append the Ethernet Frame Checksum (FCS)
+to the end of the skb data.  This allows sniffers and other tools to
+read the CRC recorded by the NIC on receipt of the packet.
+
+*  rx-all
+
+This requests that the NIC receive all possible frames, including errored
+frames (such as bad FCS, etc).  This can be helpful when sniffing a link with
+bad packets on it.  Some NICs may receive more packets if also put into normal
+PROMISC mode.
+
+*  rx-gro-hw
+
+This requests that the NIC enables Hardware GRO (generic receive offload).
+Hardware GRO is basically the exact reverse of TSO, and is generally
+stricter than Hardware LRO.  A packet stream merged by Hardware GRO must
+be re-segmentable by GSO or TSO back to the exact original packet stream.
+Hardware GRO is dependent on RXCSUM since every packet successfully merged
+by hardware must also have the checksum verified by hardware.
diff --git a/Documentation/networking/netdev-features.txt b/Documentation/networking/netdev-features.txt
deleted file mode 100644
index 58dd1c1e3c65..000000000000
--- a/Documentation/networking/netdev-features.txt
+++ /dev/null
@@ -1,181 +0,0 @@
-Netdev features mess and how to get out from it alive
-=====================================================
-
-Author:
-	Michał Mirosław <mirq-linux@rere.qmqm.pl>
-
-
-
- Part I: Feature sets
-======================
-
-Long gone are the days when a network card would just take and give packets
-verbatim.  Today's devices add multiple features and bugs (read: offloads)
-that relieve an OS of various tasks like generating and checking checksums,
-splitting packets, classifying them.  Those capabilities and their state
-are commonly referred to as netdev features in Linux kernel world.
-
-There are currently three sets of features relevant to the driver, and
-one used internally by network core:
-
- 1. netdev->hw_features set contains features whose state may possibly
-    be changed (enabled or disabled) for a particular device by user's
-    request.  This set should be initialized in ndo_init callback and not
-    changed later.
-
- 2. netdev->features set contains features which are currently enabled
-    for a device.  This should be changed only by network core or in
-    error paths of ndo_set_features callback.
-
- 3. netdev->vlan_features set contains features whose state is inherited
-    by child VLAN devices (limits netdev->features set).  This is currently
-    used for all VLAN devices whether tags are stripped or inserted in
-    hardware or software.
-
- 4. netdev->wanted_features set contains feature set requested by user.
-    This set is filtered by ndo_fix_features callback whenever it or
-    some device-specific conditions change. This set is internal to
-    networking core and should not be referenced in drivers.
-
-
-
- Part II: Controlling enabled features
-=======================================
-
-When current feature set (netdev->features) is to be changed, new set
-is calculated and filtered by calling ndo_fix_features callback
-and netdev_fix_features(). If the resulting set differs from current
-set, it is passed to ndo_set_features callback and (if the callback
-returns success) replaces value stored in netdev->features.
-NETDEV_FEAT_CHANGE notification is issued after that whenever current
-set might have changed.
-
-The following events trigger recalculation:
- 1. device's registration, after ndo_init returned success
- 2. user requested changes in features state
- 3. netdev_update_features() is called
-
-ndo_*_features callbacks are called with rtnl_lock held. Missing callbacks
-are treated as always returning success.
-
-A driver that wants to trigger recalculation must do so by calling
-netdev_update_features() while holding rtnl_lock. This should not be done
-from ndo_*_features callbacks. netdev->features should not be modified by
-driver except by means of ndo_fix_features callback.
-
-
-
- Part III: Implementation hints
-================================
-
- * ndo_fix_features:
-
-All dependencies between features should be resolved here. The resulting
-set can be reduced further by networking core imposed limitations (as coded
-in netdev_fix_features()). For this reason it is safer to disable a feature
-when its dependencies are not met instead of forcing the dependency on.
-
-This callback should not modify hardware nor driver state (should be
-stateless).  It can be called multiple times between successive
-ndo_set_features calls.
-
-Callback must not alter features contained in NETIF_F_SOFT_FEATURES or
-NETIF_F_NEVER_CHANGE sets. The exception is NETIF_F_VLAN_CHALLENGED but
-care must be taken as the change won't affect already configured VLANs.
-
- * ndo_set_features:
-
-Hardware should be reconfigured to match passed feature set. The set
-should not be altered unless some error condition happens that can't
-be reliably detected in ndo_fix_features. In this case, the callback
-should update netdev->features to match resulting hardware state.
-Errors returned are not (and cannot be) propagated anywhere except dmesg.
-(Note: successful return is zero, >0 means silent error.)
-
-
-
- Part IV: Features
-===================
-
-For current list of features, see include/linux/netdev_features.h.
-This section describes semantics of some of them.
-
- * Transmit checksumming
-
-For complete description, see comments near the top of include/linux/skbuff.h.
-
-Note: NETIF_F_HW_CSUM is a superset of NETIF_F_IP_CSUM + NETIF_F_IPV6_CSUM.
-It means that device can fill TCP/UDP-like checksum anywhere in the packets
-whatever headers there might be.
-
- * Transmit TCP segmentation offload
-
-NETIF_F_TSO_ECN means that hardware can properly split packets with CWR bit
-set, be it TCPv4 (when NETIF_F_TSO is enabled) or TCPv6 (NETIF_F_TSO6).
-
- * Transmit UDP segmentation offload
-
-NETIF_F_GSO_UDP_L4 accepts a single UDP header with a payload that exceeds
-gso_size. On segmentation, it segments the payload on gso_size boundaries and
-replicates the network and UDP headers (fixing up the last one if less than
-gso_size).
-
- * Transmit DMA from high memory
-
-On platforms where this is relevant, NETIF_F_HIGHDMA signals that
-ndo_start_xmit can handle skbs with frags in high memory.
-
- * Transmit scatter-gather
-
-Those features say that ndo_start_xmit can handle fragmented skbs:
-NETIF_F_SG --- paged skbs (skb_shinfo()->frags), NETIF_F_FRAGLIST ---
-chained skbs (skb->next/prev list).
-
- * Software features
-
-Features contained in NETIF_F_SOFT_FEATURES are features of networking
-stack. Driver should not change behaviour based on them.
-
- * LLTX driver (deprecated for hardware drivers)
-
-NETIF_F_LLTX is meant to be used by drivers that don't need locking at all,
-e.g. software tunnels.
-
-This is also used in a few legacy drivers that implement their
-own locking, don't use it for new (hardware) drivers.
-
- * netns-local device
-
-NETIF_F_NETNS_LOCAL is set for devices that are not allowed to move between
-network namespaces (e.g. loopback).
-
-Don't use it in drivers.
-
- * VLAN challenged
-
-NETIF_F_VLAN_CHALLENGED should be set for devices which can't cope with VLAN
-headers. Some drivers set this because the cards can't handle the bigger MTU.
-[FIXME: Those cases could be fixed in VLAN code by allowing only reduced-MTU
-VLANs. This may be not useful, though.]
-
-*  rx-fcs
-
-This requests that the NIC append the Ethernet Frame Checksum (FCS)
-to the end of the skb data.  This allows sniffers and other tools to
-read the CRC recorded by the NIC on receipt of the packet.
-
-*  rx-all
-
-This requests that the NIC receive all possible frames, including errored
-frames (such as bad FCS, etc).  This can be helpful when sniffing a link with
-bad packets on it.  Some NICs may receive more packets if also put into normal
-PROMISC mode.
-
-*  rx-gro-hw
-
-This requests that the NIC enables Hardware GRO (generic receive offload).
-Hardware GRO is basically the exact reverse of TSO, and is generally
-stricter than Hardware LRO.  A packet stream merged by Hardware GRO must
-be re-segmentable by GSO or TSO back to the exact original packet stream.
-Hardware GRO is dependent on RXCSUM since every packet successfully merged
-by hardware must also have the checksum verified by hardware.
diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h
index 9d53c5ad272c..2cc3cf80b49a 100644
--- a/include/linux/netdev_features.h
+++ b/include/linux/netdev_features.h
@@ -89,7 +89,7 @@ enum {
 	 * Add your fresh new feature above and remember to update
 	 * netdev_features_strings[] in net/core/ethtool.c and maybe
 	 * some feature mask #defines below. Please also describe it
-	 * in Documentation/networking/netdev-features.txt.
+	 * in Documentation/networking/netdev-features.rst.
 	 */
 
 	/**/NETDEV_FEATURE_COUNT
-- 
cgit v1.2.3-59-g8ed1b


From 482a4360c56a1ecb5ea54c00db647b0012d787cf Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Thu, 30 Apr 2020 18:04:04 +0200
Subject: docs: networking: convert netdevices.txt to ReST

- add SPDX header;
- adjust title markup;
- mark lists as such;
- adjust identation, whitespaces and blank lines;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/can.rst        |   2 +-
 Documentation/networking/index.rst      |   1 +
 Documentation/networking/netdevices.rst | 111 ++++++++++++++++++++++++++++++++
 Documentation/networking/netdevices.txt | 104 ------------------------------
 4 files changed, 113 insertions(+), 105 deletions(-)
 create mode 100644 Documentation/networking/netdevices.rst
 delete mode 100644 Documentation/networking/netdevices.txt

diff --git a/Documentation/networking/can.rst b/Documentation/networking/can.rst
index 2fd0b51a8c52..ff05cbd05e0d 100644
--- a/Documentation/networking/can.rst
+++ b/Documentation/networking/can.rst
@@ -1058,7 +1058,7 @@ drivers you mainly have to deal with:
 - TX: Put the CAN frame from the socket buffer to the CAN controller.
 - RX: Put the CAN frame from the CAN controller to the socket buffer.
 
-See e.g. at Documentation/networking/netdevices.txt . The differences
+See e.g. at Documentation/networking/netdevices.rst . The differences
 for writing CAN network device driver are described below:
 
 
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 4c6aa3db97d4..5a320553ffba 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -82,6 +82,7 @@ Contents:
    multiqueue
    netconsole
    netdev-features
+   netdevices
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/netdevices.rst b/Documentation/networking/netdevices.rst
new file mode 100644
index 000000000000..5a85fcc80c76
--- /dev/null
+++ b/Documentation/networking/netdevices.rst
@@ -0,0 +1,111 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================================
+Network Devices, the Kernel, and You!
+=====================================
+
+
+Introduction
+============
+The following is a random collection of documentation regarding
+network devices.
+
+struct net_device allocation rules
+==================================
+Network device structures need to persist even after module is unloaded and
+must be allocated with alloc_netdev_mqs() and friends.
+If device has registered successfully, it will be freed on last use
+by free_netdev(). This is required to handle the pathologic case cleanly
+(example: rmmod mydriver </sys/class/net/myeth/mtu )
+
+alloc_netdev_mqs()/alloc_netdev() reserve extra space for driver
+private data which gets freed when the network device is freed. If
+separately allocated data is attached to the network device
+(netdev_priv(dev)) then it is up to the module exit handler to free that.
+
+MTU
+===
+Each network device has a Maximum Transfer Unit. The MTU does not
+include any link layer protocol overhead. Upper layer protocols must
+not pass a socket buffer (skb) to a device to transmit with more data
+than the mtu. The MTU does not include link layer header overhead, so
+for example on Ethernet if the standard MTU is 1500 bytes used, the
+actual skb will contain up to 1514 bytes because of the Ethernet
+header. Devices should allow for the 4 byte VLAN header as well.
+
+Segmentation Offload (GSO, TSO) is an exception to this rule.  The
+upper layer protocol may pass a large socket buffer to the device
+transmit routine, and the device will break that up into separate
+packets based on the current MTU.
+
+MTU is symmetrical and applies both to receive and transmit. A device
+must be able to receive at least the maximum size packet allowed by
+the MTU. A network device may use the MTU as mechanism to size receive
+buffers, but the device should allow packets with VLAN header. With
+standard Ethernet mtu of 1500 bytes, the device should allow up to
+1518 byte packets (1500 + 14 header + 4 tag).  The device may either:
+drop, truncate, or pass up oversize packets, but dropping oversize
+packets is preferred.
+
+
+struct net_device synchronization rules
+=======================================
+ndo_open:
+	Synchronization: rtnl_lock() semaphore.
+	Context: process
+
+ndo_stop:
+	Synchronization: rtnl_lock() semaphore.
+	Context: process
+	Note: netif_running() is guaranteed false
+
+ndo_do_ioctl:
+	Synchronization: rtnl_lock() semaphore.
+	Context: process
+
+ndo_get_stats:
+	Synchronization: dev_base_lock rwlock.
+	Context: nominally process, but don't sleep inside an rwlock
+
+ndo_start_xmit:
+	Synchronization: __netif_tx_lock spinlock.
+
+	When the driver sets NETIF_F_LLTX in dev->features this will be
+	called without holding netif_tx_lock. In this case the driver
+	has to lock by itself when needed.
+	The locking there should also properly protect against
+	set_rx_mode. WARNING: use of NETIF_F_LLTX is deprecated.
+	Don't use it for new drivers.
+
+	Context: Process with BHs disabled or BH (timer),
+		 will be called with interrupts disabled by netconsole.
+
+	Return codes:
+
+	* NETDEV_TX_OK everything ok.
+	* NETDEV_TX_BUSY Cannot transmit packet, try later
+	  Usually a bug, means queue start/stop flow control is broken in
+	  the driver. Note: the driver must NOT put the skb in its DMA ring.
+
+ndo_tx_timeout:
+	Synchronization: netif_tx_lock spinlock; all TX queues frozen.
+	Context: BHs disabled
+	Notes: netif_queue_stopped() is guaranteed true
+
+ndo_set_rx_mode:
+	Synchronization: netif_addr_lock spinlock.
+	Context: BHs disabled
+
+struct napi_struct synchronization rules
+========================================
+napi->poll:
+	Synchronization:
+		NAPI_STATE_SCHED bit in napi->state.  Device
+		driver's ndo_stop method will invoke napi_disable() on
+		all NAPI instances which will do a sleeping poll on the
+		NAPI_STATE_SCHED napi->state bit, waiting for all pending
+		NAPI activity to cease.
+
+	Context:
+		 softirq
+		 will be called with interrupts disabled by netconsole.
diff --git a/Documentation/networking/netdevices.txt b/Documentation/networking/netdevices.txt
deleted file mode 100644
index 7fec2061a334..000000000000
--- a/Documentation/networking/netdevices.txt
+++ /dev/null
@@ -1,104 +0,0 @@
-
-Network Devices, the Kernel, and You!
-
-
-Introduction
-============
-The following is a random collection of documentation regarding
-network devices.
-
-struct net_device allocation rules
-==================================
-Network device structures need to persist even after module is unloaded and
-must be allocated with alloc_netdev_mqs() and friends.
-If device has registered successfully, it will be freed on last use
-by free_netdev(). This is required to handle the pathologic case cleanly
-(example: rmmod mydriver </sys/class/net/myeth/mtu )
-
-alloc_netdev_mqs()/alloc_netdev() reserve extra space for driver
-private data which gets freed when the network device is freed. If
-separately allocated data is attached to the network device
-(netdev_priv(dev)) then it is up to the module exit handler to free that.
-
-MTU
-===
-Each network device has a Maximum Transfer Unit. The MTU does not
-include any link layer protocol overhead. Upper layer protocols must
-not pass a socket buffer (skb) to a device to transmit with more data
-than the mtu. The MTU does not include link layer header overhead, so
-for example on Ethernet if the standard MTU is 1500 bytes used, the
-actual skb will contain up to 1514 bytes because of the Ethernet
-header. Devices should allow for the 4 byte VLAN header as well.
-
-Segmentation Offload (GSO, TSO) is an exception to this rule.  The
-upper layer protocol may pass a large socket buffer to the device
-transmit routine, and the device will break that up into separate
-packets based on the current MTU.
-
-MTU is symmetrical and applies both to receive and transmit. A device
-must be able to receive at least the maximum size packet allowed by
-the MTU. A network device may use the MTU as mechanism to size receive
-buffers, but the device should allow packets with VLAN header. With
-standard Ethernet mtu of 1500 bytes, the device should allow up to
-1518 byte packets (1500 + 14 header + 4 tag).  The device may either:
-drop, truncate, or pass up oversize packets, but dropping oversize
-packets is preferred.
-
-
-struct net_device synchronization rules
-=======================================
-ndo_open:
-	Synchronization: rtnl_lock() semaphore.
-	Context: process
-
-ndo_stop:
-	Synchronization: rtnl_lock() semaphore.
-	Context: process
-	Note: netif_running() is guaranteed false
-
-ndo_do_ioctl:
-	Synchronization: rtnl_lock() semaphore.
-	Context: process
-
-ndo_get_stats:
-	Synchronization: dev_base_lock rwlock.
-	Context: nominally process, but don't sleep inside an rwlock
-
-ndo_start_xmit:
-	Synchronization: __netif_tx_lock spinlock.
-
-	When the driver sets NETIF_F_LLTX in dev->features this will be
-	called without holding netif_tx_lock. In this case the driver
-	has to lock by itself when needed.
-	The locking there should also properly protect against
-	set_rx_mode. WARNING: use of NETIF_F_LLTX is deprecated.
-	Don't use it for new drivers.
-
-	Context: Process with BHs disabled or BH (timer),
-	         will be called with interrupts disabled by netconsole.
-
-	Return codes: 
-	o NETDEV_TX_OK everything ok. 
-	o NETDEV_TX_BUSY Cannot transmit packet, try later 
-	  Usually a bug, means queue start/stop flow control is broken in
-	  the driver. Note: the driver must NOT put the skb in its DMA ring.
-
-ndo_tx_timeout:
-	Synchronization: netif_tx_lock spinlock; all TX queues frozen.
-	Context: BHs disabled
-	Notes: netif_queue_stopped() is guaranteed true
-
-ndo_set_rx_mode:
-	Synchronization: netif_addr_lock spinlock.
-	Context: BHs disabled
-
-struct napi_struct synchronization rules
-========================================
-napi->poll:
-	Synchronization: NAPI_STATE_SCHED bit in napi->state.  Device
-		driver's ndo_stop method will invoke napi_disable() on
-		all NAPI instances which will do a sleeping poll on the
-		NAPI_STATE_SCHED napi->state bit, waiting for all pending
-		NAPI activity to cease.
-	Context: softirq
-	         will be called with interrupts disabled by netconsole.
-- 
cgit v1.2.3-59-g8ed1b


From 0191533087a3cfbe02a0bde9939fa978cb9761fd Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Thu, 30 Apr 2020 18:04:05 +0200
Subject: docs: networking: convert netfilter-sysctl.txt to ReST

Not much to be done here:

- add SPDX header;
- add a document title;
- add a chapter markup;
- mark tables as such;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst            |  1 +
 Documentation/networking/netfilter-sysctl.rst | 17 +++++++++++++++++
 Documentation/networking/netfilter-sysctl.txt | 10 ----------
 3 files changed, 18 insertions(+), 10 deletions(-)
 create mode 100644 Documentation/networking/netfilter-sysctl.rst
 delete mode 100644 Documentation/networking/netfilter-sysctl.txt

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 5a320553ffba..1ae0cbef8c04 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -83,6 +83,7 @@ Contents:
    netconsole
    netdev-features
    netdevices
+   netfilter-sysctl
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/netfilter-sysctl.rst b/Documentation/networking/netfilter-sysctl.rst
new file mode 100644
index 000000000000..beb6d7b275d4
--- /dev/null
+++ b/Documentation/networking/netfilter-sysctl.rst
@@ -0,0 +1,17 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========================
+Netfilter Sysfs variables
+=========================
+
+/proc/sys/net/netfilter/* Variables:
+====================================
+
+nf_log_all_netns - BOOLEAN
+	- 0 - disabled (default)
+	- not 0 - enabled
+
+	By default, only init_net namespace can log packets into kernel log
+	with LOG target; this aims to prevent containers from flooding host
+	kernel log. If enabled, this target also works in other network
+	namespaces. This variable is only accessible from init_net.
diff --git a/Documentation/networking/netfilter-sysctl.txt b/Documentation/networking/netfilter-sysctl.txt
deleted file mode 100644
index 55791e50e169..000000000000
--- a/Documentation/networking/netfilter-sysctl.txt
+++ /dev/null
@@ -1,10 +0,0 @@
-/proc/sys/net/netfilter/* Variables:
-
-nf_log_all_netns - BOOLEAN
-	0 - disabled (default)
-	not 0 - enabled
-
-	By default, only init_net namespace can log packets into kernel log
-	with LOG target; this aims to prevent containers from flooding host
-	kernel log. If enabled, this target also works in other network
-	namespaces. This variable is only accessible from init_net.
-- 
cgit v1.2.3-59-g8ed1b


From c4d5dff60f0a142937b52626653314f3d0a18420 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Thu, 30 Apr 2020 18:04:06 +0200
Subject: docs: networking: convert netif-msg.txt to ReST

- add SPDX header;
- adjust title and chapter markups;
- mark lists as such;
- mark code blocks and literals as such;
- adjust identation, whitespaces and blank lines;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst     |  1 +
 Documentation/networking/netif-msg.rst | 95 ++++++++++++++++++++++++++++++++++
 Documentation/networking/netif-msg.txt | 79 ----------------------------
 3 files changed, 96 insertions(+), 79 deletions(-)
 create mode 100644 Documentation/networking/netif-msg.rst
 delete mode 100644 Documentation/networking/netif-msg.txt

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 1ae0cbef8c04..d98509f15363 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -84,6 +84,7 @@ Contents:
    netdev-features
    netdevices
    netfilter-sysctl
+   netif-msg
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/netif-msg.rst b/Documentation/networking/netif-msg.rst
new file mode 100644
index 000000000000..b20d265a734d
--- /dev/null
+++ b/Documentation/networking/netif-msg.rst
@@ -0,0 +1,95 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============
+NETIF Msg Level
+===============
+
+The design of the network interface message level setting.
+
+History
+-------
+
+ The design of the debugging message interface was guided and
+ constrained by backwards compatibility previous practice.  It is useful
+ to understand the history and evolution in order to understand current
+ practice and relate it to older driver source code.
+
+ From the beginning of Linux, each network device driver has had a local
+ integer variable that controls the debug message level.  The message
+ level ranged from 0 to 7, and monotonically increased in verbosity.
+
+ The message level was not precisely defined past level 3, but were
+ always implemented within +-1 of the specified level.  Drivers tended
+ to shed the more verbose level messages as they matured.
+
+   - 0  Minimal messages, only essential information on fatal errors.
+   - 1  Standard messages, initialization status.  No run-time messages
+   - 2  Special media selection messages, generally timer-driver.
+   - 3  Interface starts and stops, including normal status messages
+   - 4  Tx and Rx frame error messages, and abnormal driver operation
+   - 5  Tx packet queue information, interrupt events.
+   - 6  Status on each completed Tx packet and received Rx packets
+   - 7  Initial contents of Tx and Rx packets
+
+ Initially this message level variable was uniquely named in each driver
+ e.g. "lance_debug", so that a kernel symbolic debugger could locate and
+ modify the setting.  When kernel modules became common, the variables
+ were consistently renamed to "debug" and allowed to be set as a module
+ parameter.
+
+ This approach worked well.  However there is always a demand for
+ additional features.  Over the years the following emerged as
+ reasonable and easily implemented enhancements
+
+   - Using an ioctl() call to modify the level.
+   - Per-interface rather than per-driver message level setting.
+   - More selective control over the type of messages emitted.
+
+ The netif_msg recommendation adds these features with only a minor
+ complexity and code size increase.
+
+ The recommendation is the following points
+
+  - Retaining the per-driver integer variable "debug" as a module
+    parameter with a default level of '1'.
+
+  - Adding a per-interface private variable named "msg_enable".  The
+    variable is a bit map rather than a level, and is initialized as::
+
+       1 << debug
+
+    Or more precisely::
+
+	debug < 0 ? 0 : 1 << min(sizeof(int)-1, debug)
+
+    Messages should changes from::
+
+      if (debug > 1)
+	   printk(MSG_DEBUG "%s: ...
+
+    to::
+
+      if (np->msg_enable & NETIF_MSG_LINK)
+	   printk(MSG_DEBUG "%s: ...
+
+
+The set of message levels is named
+
+
+  =========   ===================	============
+  Old level   Name			Bit position
+  =========   ===================	============
+    0         NETIF_MSG_DRV		0x0001
+    1         NETIF_MSG_PROBE		0x0002
+    2         NETIF_MSG_LINK		0x0004
+    2         NETIF_MSG_TIMER		0x0004
+    3         NETIF_MSG_IFDOWN		0x0008
+    3         NETIF_MSG_IFUP		0x0008
+    4         NETIF_MSG_RX_ERR		0x0010
+    4         NETIF_MSG_TX_ERR		0x0010
+    5         NETIF_MSG_TX_QUEUED	0x0020
+    5         NETIF_MSG_INTR		0x0020
+    6         NETIF_MSG_TX_DONE		0x0040
+    6         NETIF_MSG_RX_STATUS	0x0040
+    7         NETIF_MSG_PKTDATA		0x0080
+  =========   ===================	============
diff --git a/Documentation/networking/netif-msg.txt b/Documentation/networking/netif-msg.txt
deleted file mode 100644
index c967ddb90d0b..000000000000
--- a/Documentation/networking/netif-msg.txt
+++ /dev/null
@@ -1,79 +0,0 @@
-
-________________
-NETIF Msg Level
-
-The design of the network interface message level setting.
-
-History
-
- The design of the debugging message interface was guided and
- constrained by backwards compatibility previous practice.  It is useful
- to understand the history and evolution in order to understand current
- practice and relate it to older driver source code.
-
- From the beginning of Linux, each network device driver has had a local
- integer variable that controls the debug message level.  The message
- level ranged from 0 to 7, and monotonically increased in verbosity.
-
- The message level was not precisely defined past level 3, but were
- always implemented within +-1 of the specified level.  Drivers tended
- to shed the more verbose level messages as they matured.
-    0  Minimal messages, only essential information on fatal errors.
-    1  Standard messages, initialization status.  No run-time messages
-    2  Special media selection messages, generally timer-driver.
-    3  Interface starts and stops, including normal status messages
-    4  Tx and Rx frame error messages, and abnormal driver operation
-    5  Tx packet queue information, interrupt events.
-    6  Status on each completed Tx packet and received Rx packets
-    7  Initial contents of Tx and Rx packets
-
- Initially this message level variable was uniquely named in each driver
- e.g. "lance_debug", so that a kernel symbolic debugger could locate and
- modify the setting.  When kernel modules became common, the variables
- were consistently renamed to "debug" and allowed to be set as a module
- parameter.
-
- This approach worked well.  However there is always a demand for
- additional features.  Over the years the following emerged as
- reasonable and easily implemented enhancements
-   Using an ioctl() call to modify the level.
-   Per-interface rather than per-driver message level setting.
-   More selective control over the type of messages emitted.
-
- The netif_msg recommendation adds these features with only a minor
- complexity and code size increase.
-
- The recommendation is the following points
-    Retaining the per-driver integer variable "debug" as a module
-    parameter with a default level of '1'.
-
-    Adding a per-interface private variable named "msg_enable".  The
-    variable is a bit map rather than a level, and is initialized as
-       1 << debug
-    Or more precisely
-        debug < 0 ? 0 : 1 << min(sizeof(int)-1, debug)
-
-    Messages should changes from
-      if (debug > 1)
-           printk(MSG_DEBUG "%s: ...
-    to
-      if (np->msg_enable & NETIF_MSG_LINK)
-           printk(MSG_DEBUG "%s: ...
-
-
-The set of message levels is named
-  Old level   Name   Bit position
-    0    NETIF_MSG_DRV		0x0001
-    1    NETIF_MSG_PROBE	0x0002
-    2    NETIF_MSG_LINK		0x0004
-    2    NETIF_MSG_TIMER	0x0004
-    3    NETIF_MSG_IFDOWN	0x0008
-    3    NETIF_MSG_IFUP		0x0008
-    4    NETIF_MSG_RX_ERR	0x0010
-    4    NETIF_MSG_TX_ERR	0x0010
-    5    NETIF_MSG_TX_QUEUED	0x0020
-    5    NETIF_MSG_INTR		0x0020
-    6    NETIF_MSG_TX_DONE	0x0040
-    6    NETIF_MSG_RX_STATUS	0x0040
-    7    NETIF_MSG_PKTDATA	0x0080
-
-- 
cgit v1.2.3-59-g8ed1b


From 13df433f8c1333cd0f60e5e1878380a37576118a Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Thu, 30 Apr 2020 18:04:07 +0200
Subject: docs: networking: convert nf_conntrack-sysctl.txt to ReST

- add SPDX header;
- add a document title;
- mark lists as such;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst               |   1 +
 Documentation/networking/nf_conntrack-sysctl.rst | 179 +++++++++++++++++++++++
 Documentation/networking/nf_conntrack-sysctl.txt | 172 ----------------------
 3 files changed, 180 insertions(+), 172 deletions(-)
 create mode 100644 Documentation/networking/nf_conntrack-sysctl.rst
 delete mode 100644 Documentation/networking/nf_conntrack-sysctl.txt

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index d98509f15363..e5128bb7e7df 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -85,6 +85,7 @@ Contents:
    netdevices
    netfilter-sysctl
    netif-msg
+   nf_conntrack-sysctl
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/nf_conntrack-sysctl.rst b/Documentation/networking/nf_conntrack-sysctl.rst
new file mode 100644
index 000000000000..11a9b76786cb
--- /dev/null
+++ b/Documentation/networking/nf_conntrack-sysctl.rst
@@ -0,0 +1,179 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================================
+Netfilter Conntrack Sysfs variables
+===================================
+
+/proc/sys/net/netfilter/nf_conntrack_* Variables:
+=================================================
+
+nf_conntrack_acct - BOOLEAN
+	- 0 - disabled (default)
+	- not 0 - enabled
+
+	Enable connection tracking flow accounting. 64-bit byte and packet
+	counters per flow are added.
+
+nf_conntrack_buckets - INTEGER
+	Size of hash table. If not specified as parameter during module
+	loading, the default size is calculated by dividing total memory
+	by 16384 to determine the number of buckets but the hash table will
+	never have fewer than 32 and limited to 16384 buckets. For systems
+	with more than 4GB of memory it will be 65536 buckets.
+	This sysctl is only writeable in the initial net namespace.
+
+nf_conntrack_checksum - BOOLEAN
+	- 0 - disabled
+	- not 0 - enabled (default)
+
+	Verify checksum of incoming packets. Packets with bad checksums are
+	in INVALID state. If this is enabled, such packets will not be
+	considered for connection tracking.
+
+nf_conntrack_count - INTEGER (read-only)
+	Number of currently allocated flow entries.
+
+nf_conntrack_events - BOOLEAN
+	- 0 - disabled
+	- not 0 - enabled (default)
+
+	If this option is enabled, the connection tracking code will
+	provide userspace with connection tracking events via ctnetlink.
+
+nf_conntrack_expect_max - INTEGER
+	Maximum size of expectation table.  Default value is
+	nf_conntrack_buckets / 256. Minimum is 1.
+
+nf_conntrack_frag6_high_thresh - INTEGER
+	default 262144
+
+	Maximum memory used to reassemble IPv6 fragments.  When
+	nf_conntrack_frag6_high_thresh bytes of memory is allocated for this
+	purpose, the fragment handler will toss packets until
+	nf_conntrack_frag6_low_thresh is reached.
+
+nf_conntrack_frag6_low_thresh - INTEGER
+	default 196608
+
+	See nf_conntrack_frag6_low_thresh
+
+nf_conntrack_frag6_timeout - INTEGER (seconds)
+	default 60
+
+	Time to keep an IPv6 fragment in memory.
+
+nf_conntrack_generic_timeout - INTEGER (seconds)
+	default 600
+
+	Default for generic timeout.  This refers to layer 4 unknown/unsupported
+	protocols.
+
+nf_conntrack_helper - BOOLEAN
+	- 0 - disabled (default)
+	- not 0 - enabled
+
+	Enable automatic conntrack helper assignment.
+	If disabled it is required to set up iptables rules to assign
+	helpers to connections.  See the CT target description in the
+	iptables-extensions(8) man page for further information.
+
+nf_conntrack_icmp_timeout - INTEGER (seconds)
+	default 30
+
+	Default for ICMP timeout.
+
+nf_conntrack_icmpv6_timeout - INTEGER (seconds)
+	default 30
+
+	Default for ICMP6 timeout.
+
+nf_conntrack_log_invalid - INTEGER
+	- 0   - disable (default)
+	- 1   - log ICMP packets
+	- 6   - log TCP packets
+	- 17  - log UDP packets
+	- 33  - log DCCP packets
+	- 41  - log ICMPv6 packets
+	- 136 - log UDPLITE packets
+	- 255 - log packets of any protocol
+
+	Log invalid packets of a type specified by value.
+
+nf_conntrack_max - INTEGER
+	Size of connection tracking table.  Default value is
+	nf_conntrack_buckets value * 4.
+
+nf_conntrack_tcp_be_liberal - BOOLEAN
+	- 0 - disabled (default)
+	- not 0 - enabled
+
+	Be conservative in what you do, be liberal in what you accept from others.
+	If it's non-zero, we mark only out of window RST segments as INVALID.
+
+nf_conntrack_tcp_loose - BOOLEAN
+	- 0 - disabled
+	- not 0 - enabled (default)
+
+	If it is set to zero, we disable picking up already established
+	connections.
+
+nf_conntrack_tcp_max_retrans - INTEGER
+	default 3
+
+	Maximum number of packets that can be retransmitted without
+	received an (acceptable) ACK from the destination. If this number
+	is reached, a shorter timer will be started.
+
+nf_conntrack_tcp_timeout_close - INTEGER (seconds)
+	default 10
+
+nf_conntrack_tcp_timeout_close_wait - INTEGER (seconds)
+	default 60
+
+nf_conntrack_tcp_timeout_established - INTEGER (seconds)
+	default 432000 (5 days)
+
+nf_conntrack_tcp_timeout_fin_wait - INTEGER (seconds)
+	default 120
+
+nf_conntrack_tcp_timeout_last_ack - INTEGER (seconds)
+	default 30
+
+nf_conntrack_tcp_timeout_max_retrans - INTEGER (seconds)
+	default 300
+
+nf_conntrack_tcp_timeout_syn_recv - INTEGER (seconds)
+	default 60
+
+nf_conntrack_tcp_timeout_syn_sent - INTEGER (seconds)
+	default 120
+
+nf_conntrack_tcp_timeout_time_wait - INTEGER (seconds)
+	default 120
+
+nf_conntrack_tcp_timeout_unacknowledged - INTEGER (seconds)
+	default 300
+
+nf_conntrack_timestamp - BOOLEAN
+	- 0 - disabled (default)
+	- not 0 - enabled
+
+	Enable connection tracking flow timestamping.
+
+nf_conntrack_udp_timeout - INTEGER (seconds)
+	default 30
+
+nf_conntrack_udp_timeout_stream - INTEGER (seconds)
+	default 120
+
+	This extended timeout will be used in case there is an UDP stream
+	detected.
+
+nf_conntrack_gre_timeout - INTEGER (seconds)
+	default 30
+
+nf_conntrack_gre_timeout_stream - INTEGER (seconds)
+	default 180
+
+	This extended timeout will be used in case there is an GRE stream
+	detected.
diff --git a/Documentation/networking/nf_conntrack-sysctl.txt b/Documentation/networking/nf_conntrack-sysctl.txt
deleted file mode 100644
index f75c2ce6e136..000000000000
--- a/Documentation/networking/nf_conntrack-sysctl.txt
+++ /dev/null
@@ -1,172 +0,0 @@
-/proc/sys/net/netfilter/nf_conntrack_* Variables:
-
-nf_conntrack_acct - BOOLEAN
-	0 - disabled (default)
-	not 0 - enabled
-
-	Enable connection tracking flow accounting. 64-bit byte and packet
-	counters per flow are added.
-
-nf_conntrack_buckets - INTEGER
-	Size of hash table. If not specified as parameter during module
-	loading, the default size is calculated by dividing total memory
-	by 16384 to determine the number of buckets but the hash table will
-	never have fewer than 32 and limited to 16384 buckets. For systems
-	with more than 4GB of memory it will be 65536 buckets.
-	This sysctl is only writeable in the initial net namespace.
-
-nf_conntrack_checksum - BOOLEAN
-	0 - disabled
-	not 0 - enabled (default)
-
-	Verify checksum of incoming packets. Packets with bad checksums are
-	in INVALID state. If this is enabled, such packets will not be
-	considered for connection tracking.
-
-nf_conntrack_count - INTEGER (read-only)
-	Number of currently allocated flow entries.
-
-nf_conntrack_events - BOOLEAN
-	0 - disabled
-	not 0 - enabled (default)
-
-	If this option is enabled, the connection tracking code will
-	provide userspace with connection tracking events via ctnetlink.
-
-nf_conntrack_expect_max - INTEGER
-	Maximum size of expectation table.  Default value is
-	nf_conntrack_buckets / 256. Minimum is 1.
-
-nf_conntrack_frag6_high_thresh - INTEGER
-	default 262144
-
-	Maximum memory used to reassemble IPv6 fragments.  When
-	nf_conntrack_frag6_high_thresh bytes of memory is allocated for this
-	purpose, the fragment handler will toss packets until
-	nf_conntrack_frag6_low_thresh is reached.
-
-nf_conntrack_frag6_low_thresh - INTEGER
-	default 196608
-
-	See nf_conntrack_frag6_low_thresh
-
-nf_conntrack_frag6_timeout - INTEGER (seconds)
-	default 60
-
-	Time to keep an IPv6 fragment in memory.
-
-nf_conntrack_generic_timeout - INTEGER (seconds)
-	default 600
-
-	Default for generic timeout.  This refers to layer 4 unknown/unsupported
-	protocols.
-
-nf_conntrack_helper - BOOLEAN
-	0 - disabled (default)
-	not 0 - enabled
-
-	Enable automatic conntrack helper assignment.
-	If disabled it is required to set up iptables rules to assign
-	helpers to connections.  See the CT target description in the
-	iptables-extensions(8) man page for further information.
-
-nf_conntrack_icmp_timeout - INTEGER (seconds)
-	default 30
-
-	Default for ICMP timeout.
-
-nf_conntrack_icmpv6_timeout - INTEGER (seconds)
-	default 30
-
-	Default for ICMP6 timeout.
-
-nf_conntrack_log_invalid - INTEGER
-	0   - disable (default)
-	1   - log ICMP packets
-	6   - log TCP packets
-	17  - log UDP packets
-	33  - log DCCP packets
-	41  - log ICMPv6 packets
-	136 - log UDPLITE packets
-	255 - log packets of any protocol
-
-	Log invalid packets of a type specified by value.
-
-nf_conntrack_max - INTEGER
-	Size of connection tracking table.  Default value is
-	nf_conntrack_buckets value * 4.
-
-nf_conntrack_tcp_be_liberal - BOOLEAN
-	0 - disabled (default)
-	not 0 - enabled
-
-	Be conservative in what you do, be liberal in what you accept from others.
-	If it's non-zero, we mark only out of window RST segments as INVALID.
-
-nf_conntrack_tcp_loose - BOOLEAN
-	0 - disabled
-	not 0 - enabled (default)
-
-	If it is set to zero, we disable picking up already established
-	connections.
-
-nf_conntrack_tcp_max_retrans - INTEGER
-	default 3
-
-	Maximum number of packets that can be retransmitted without
-	received an (acceptable) ACK from the destination. If this number
-	is reached, a shorter timer will be started.
-
-nf_conntrack_tcp_timeout_close - INTEGER (seconds)
-	default 10
-
-nf_conntrack_tcp_timeout_close_wait - INTEGER (seconds)
-	default 60
-
-nf_conntrack_tcp_timeout_established - INTEGER (seconds)
-	default 432000 (5 days)
-
-nf_conntrack_tcp_timeout_fin_wait - INTEGER (seconds)
-	default 120
-
-nf_conntrack_tcp_timeout_last_ack - INTEGER (seconds)
-	default 30
-
-nf_conntrack_tcp_timeout_max_retrans - INTEGER (seconds)
-	default 300
-
-nf_conntrack_tcp_timeout_syn_recv - INTEGER (seconds)
-	default 60
-
-nf_conntrack_tcp_timeout_syn_sent - INTEGER (seconds)
-	default 120
-
-nf_conntrack_tcp_timeout_time_wait - INTEGER (seconds)
-	default 120
-
-nf_conntrack_tcp_timeout_unacknowledged - INTEGER (seconds)
-	default 300
-
-nf_conntrack_timestamp - BOOLEAN
-	0 - disabled (default)
-	not 0 - enabled
-
-	Enable connection tracking flow timestamping.
-
-nf_conntrack_udp_timeout - INTEGER (seconds)
-	default 30
-
-nf_conntrack_udp_timeout_stream - INTEGER (seconds)
-	default 120
-
-	This extended timeout will be used in case there is an UDP stream
-	detected.
-
-nf_conntrack_gre_timeout - INTEGER (seconds)
-	default 30
-
-nf_conntrack_gre_timeout_stream - INTEGER (seconds)
-	default 180
-
-	This extended timeout will be used in case there is an GRE stream
-	detected.
-- 
cgit v1.2.3-59-g8ed1b


From aa3764276a4bc1a927ab8feaad5a255234763d9d Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Thu, 30 Apr 2020 18:04:08 +0200
Subject: docs: networking: convert nf_flowtable.txt to ReST

- add SPDX header;
- adjust title markup;
- mark code blocks and literals as such;
- add notes markups;
- adjust identation, whitespaces and blank lines;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst        |   1 +
 Documentation/networking/nf_flowtable.rst | 117 ++++++++++++++++++++++++++++++
 Documentation/networking/nf_flowtable.txt | 112 ----------------------------
 3 files changed, 118 insertions(+), 112 deletions(-)
 create mode 100644 Documentation/networking/nf_flowtable.rst
 delete mode 100644 Documentation/networking/nf_flowtable.txt

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index e5128bb7e7df..c4e8a43741be 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -86,6 +86,7 @@ Contents:
    netfilter-sysctl
    netif-msg
    nf_conntrack-sysctl
+   nf_flowtable
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/nf_flowtable.rst b/Documentation/networking/nf_flowtable.rst
new file mode 100644
index 000000000000..b6e1fa141aae
--- /dev/null
+++ b/Documentation/networking/nf_flowtable.rst
@@ -0,0 +1,117 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====================================
+Netfilter's flowtable infrastructure
+====================================
+
+This documentation describes the software flowtable infrastructure available in
+Netfilter since Linux kernel 4.16.
+
+Overview
+--------
+
+Initial packets follow the classic forwarding path, once the flow enters the
+established state according to the conntrack semantics (ie. we have seen traffic
+in both directions), then you can decide to offload the flow to the flowtable
+from the forward chain via the 'flow offload' action available in nftables.
+
+Packets that find an entry in the flowtable (ie. flowtable hit) are sent to the
+output netdevice via neigh_xmit(), hence, they bypass the classic forwarding
+path (the visible effect is that you do not see these packets from any of the
+netfilter hooks coming after the ingress). In case of flowtable miss, the packet
+follows the classic forward path.
+
+The flowtable uses a resizable hashtable, lookups are based on the following
+7-tuple selectors: source, destination, layer 3 and layer 4 protocols, source
+and destination ports and the input interface (useful in case there are several
+conntrack zones in place).
+
+Flowtables are populated via the 'flow offload' nftables action, so the user can
+selectively specify what flows are placed into the flow table. Hence, packets
+follow the classic forwarding path unless the user explicitly instruct packets
+to use this new alternative forwarding path via nftables policy.
+
+This is represented in Fig.1, which describes the classic forwarding path
+including the Netfilter hooks and the flowtable fastpath bypass.
+
+::
+
+					 userspace process
+					  ^              |
+					  |              |
+				     _____|____     ____\/___
+				    /          \   /         \
+				    |   input   |  |  output  |
+				    \__________/   \_________/
+					 ^               |
+					 |               |
+      _________      __________      ---------     _____\/_____
+     /         \    /          \     |Routing |   /            \
+  -->  ingress  ---> prerouting ---> |decision|   | postrouting |--> neigh_xmit
+     \_________/    \__________/     ----------   \____________/          ^
+       |      ^                          |               ^                |
+   flowtable  |                     ____\/___            |                |
+       |      |                    /         \           |                |
+    __\/___   |                    | forward |------------                |
+    |-----|   |                    \_________/                            |
+    |-----|   |                 'flow offload' rule                       |
+    |-----|   |                   adds entry to                           |
+    |_____|   |                     flowtable                             |
+       |      |                                                           |
+      / \     |                                                           |
+     /hit\_no_|                                                           |
+     \ ? /                                                                |
+      \ /                                                                 |
+       |__yes_________________fastpath bypass ____________________________|
+
+	       Fig.1 Netfilter hooks and flowtable interactions
+
+The flowtable entry also stores the NAT configuration, so all packets are
+mangled according to the NAT policy that matches the initial packets that went
+through the classic forwarding path. The TTL is decremented before calling
+neigh_xmit(). Fragmented traffic is passed up to follow the classic forwarding
+path given that the transport selectors are missing, therefore flowtable lookup
+is not possible.
+
+Example configuration
+---------------------
+
+Enabling the flowtable bypass is relatively easy, you only need to create a
+flowtable and add one rule to your forward chain::
+
+	table inet x {
+		flowtable f {
+			hook ingress priority 0; devices = { eth0, eth1 };
+		}
+		chain y {
+			type filter hook forward priority 0; policy accept;
+			ip protocol tcp flow offload @f
+			counter packets 0 bytes 0
+		}
+	}
+
+This example adds the flowtable 'f' to the ingress hook of the eth0 and eth1
+netdevices. You can create as many flowtables as you want in case you need to
+perform resource partitioning. The flowtable priority defines the order in which
+hooks are run in the pipeline, this is convenient in case you already have a
+nftables ingress chain (make sure the flowtable priority is smaller than the
+nftables ingress chain hence the flowtable runs before in the pipeline).
+
+The 'flow offload' action from the forward chain 'y' adds an entry to the
+flowtable for the TCP syn-ack packet coming in the reply direction. Once the
+flow is offloaded, you will observe that the counter rule in the example above
+does not get updated for the packets that are being forwarded through the
+forwarding bypass.
+
+More reading
+------------
+
+This documentation is based on the LWN.net articles [1]_\ [2]_. Rafal Milecki
+also made a very complete and comprehensive summary called "A state of network
+acceleration" that describes how things were before this infrastructure was
+mailined [3]_ and it also makes a rough summary of this work [4]_.
+
+.. [1] https://lwn.net/Articles/738214/
+.. [2] https://lwn.net/Articles/742164/
+.. [3] http://lists.infradead.org/pipermail/lede-dev/2018-January/010830.html
+.. [4] http://lists.infradead.org/pipermail/lede-dev/2018-January/010829.html
diff --git a/Documentation/networking/nf_flowtable.txt b/Documentation/networking/nf_flowtable.txt
deleted file mode 100644
index 0bf32d1121be..000000000000
--- a/Documentation/networking/nf_flowtable.txt
+++ /dev/null
@@ -1,112 +0,0 @@
-Netfilter's flowtable infrastructure
-====================================
-
-This documentation describes the software flowtable infrastructure available in
-Netfilter since Linux kernel 4.16.
-
-Overview
---------
-
-Initial packets follow the classic forwarding path, once the flow enters the
-established state according to the conntrack semantics (ie. we have seen traffic
-in both directions), then you can decide to offload the flow to the flowtable
-from the forward chain via the 'flow offload' action available in nftables.
-
-Packets that find an entry in the flowtable (ie. flowtable hit) are sent to the
-output netdevice via neigh_xmit(), hence, they bypass the classic forwarding
-path (the visible effect is that you do not see these packets from any of the
-netfilter hooks coming after the ingress). In case of flowtable miss, the packet
-follows the classic forward path.
-
-The flowtable uses a resizable hashtable, lookups are based on the following
-7-tuple selectors: source, destination, layer 3 and layer 4 protocols, source
-and destination ports and the input interface (useful in case there are several
-conntrack zones in place).
-
-Flowtables are populated via the 'flow offload' nftables action, so the user can
-selectively specify what flows are placed into the flow table. Hence, packets
-follow the classic forwarding path unless the user explicitly instruct packets
-to use this new alternative forwarding path via nftables policy.
-
-This is represented in Fig.1, which describes the classic forwarding path
-including the Netfilter hooks and the flowtable fastpath bypass.
-
-                                         userspace process
-                                          ^              |
-                                          |              |
-                                     _____|____     ____\/___
-                                    /          \   /         \
-                                    |   input   |  |  output  |
-                                    \__________/   \_________/
-                                         ^               |
-                                         |               |
-      _________      __________      ---------     _____\/_____
-     /         \    /          \     |Routing |   /            \
-  -->  ingress  ---> prerouting ---> |decision|   | postrouting |--> neigh_xmit
-     \_________/    \__________/     ----------   \____________/          ^
-       |      ^                          |               ^                |
-   flowtable  |                     ____\/___            |                |
-       |      |                    /         \           |                |
-    __\/___   |                    | forward |------------                |
-    |-----|   |                    \_________/                            |
-    |-----|   |                 'flow offload' rule                       |
-    |-----|   |                   adds entry to                           |
-    |_____|   |                     flowtable                             |
-       |      |                                                           |
-      / \     |                                                           |
-     /hit\_no_|                                                           |
-     \ ? /                                                                |
-      \ /                                                                 |
-       |__yes_________________fastpath bypass ____________________________|
-
-               Fig.1 Netfilter hooks and flowtable interactions
-
-The flowtable entry also stores the NAT configuration, so all packets are
-mangled according to the NAT policy that matches the initial packets that went
-through the classic forwarding path. The TTL is decremented before calling
-neigh_xmit(). Fragmented traffic is passed up to follow the classic forwarding
-path given that the transport selectors are missing, therefore flowtable lookup
-is not possible.
-
-Example configuration
----------------------
-
-Enabling the flowtable bypass is relatively easy, you only need to create a
-flowtable and add one rule to your forward chain.
-
-        table inet x {
-		flowtable f {
-			hook ingress priority 0; devices = { eth0, eth1 };
-		}
-                chain y {
-                        type filter hook forward priority 0; policy accept;
-                        ip protocol tcp flow offload @f
-                        counter packets 0 bytes 0
-                }
-        }
-
-This example adds the flowtable 'f' to the ingress hook of the eth0 and eth1
-netdevices. You can create as many flowtables as you want in case you need to
-perform resource partitioning. The flowtable priority defines the order in which
-hooks are run in the pipeline, this is convenient in case you already have a
-nftables ingress chain (make sure the flowtable priority is smaller than the
-nftables ingress chain hence the flowtable runs before in the pipeline).
-
-The 'flow offload' action from the forward chain 'y' adds an entry to the
-flowtable for the TCP syn-ack packet coming in the reply direction. Once the
-flow is offloaded, you will observe that the counter rule in the example above
-does not get updated for the packets that are being forwarded through the
-forwarding bypass.
-
-More reading
-------------
-
-This documentation is based on the LWN.net articles [1][2]. Rafal Milecki also
-made a very complete and comprehensive summary called "A state of network
-acceleration" that describes how things were before this infrastructure was
-mailined [3] and it also makes a rough summary of this work [4].
-
-[1] https://lwn.net/Articles/738214/
-[2] https://lwn.net/Articles/742164/
-[3] http://lists.infradead.org/pipermail/lede-dev/2018-January/010830.html
-[4] http://lists.infradead.org/pipermail/lede-dev/2018-January/010829.html
-- 
cgit v1.2.3-59-g8ed1b


From 63893472d753e97204331e568a5e70290054cb38 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Thu, 30 Apr 2020 18:04:09 +0200
Subject: docs: networking: convert openvswitch.txt to ReST

- add SPDX header;
- adjust title markup;
- mark code blocks and literals as such;
- adjust identation, whitespaces and blank lines;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst       |   1 +
 Documentation/networking/openvswitch.rst | 251 +++++++++++++++++++++++++++++++
 Documentation/networking/openvswitch.txt | 248 ------------------------------
 3 files changed, 252 insertions(+), 248 deletions(-)
 create mode 100644 Documentation/networking/openvswitch.rst
 delete mode 100644 Documentation/networking/openvswitch.txt

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index c4e8a43741be..b7f558480aca 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -87,6 +87,7 @@ Contents:
    netif-msg
    nf_conntrack-sysctl
    nf_flowtable
+   openvswitch
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/openvswitch.rst b/Documentation/networking/openvswitch.rst
new file mode 100644
index 000000000000..1a8353dbf1b6
--- /dev/null
+++ b/Documentation/networking/openvswitch.rst
@@ -0,0 +1,251 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=============================================
+Open vSwitch datapath developer documentation
+=============================================
+
+The Open vSwitch kernel module allows flexible userspace control over
+flow-level packet processing on selected network devices.  It can be
+used to implement a plain Ethernet switch, network device bonding,
+VLAN processing, network access control, flow-based network control,
+and so on.
+
+The kernel module implements multiple "datapaths" (analogous to
+bridges), each of which can have multiple "vports" (analogous to ports
+within a bridge).  Each datapath also has associated with it a "flow
+table" that userspace populates with "flows" that map from keys based
+on packet headers and metadata to sets of actions.  The most common
+action forwards the packet to another vport; other actions are also
+implemented.
+
+When a packet arrives on a vport, the kernel module processes it by
+extracting its flow key and looking it up in the flow table.  If there
+is a matching flow, it executes the associated actions.  If there is
+no match, it queues the packet to userspace for processing (as part of
+its processing, userspace will likely set up a flow to handle further
+packets of the same type entirely in-kernel).
+
+
+Flow key compatibility
+----------------------
+
+Network protocols evolve over time.  New protocols become important
+and existing protocols lose their prominence.  For the Open vSwitch
+kernel module to remain relevant, it must be possible for newer
+versions to parse additional protocols as part of the flow key.  It
+might even be desirable, someday, to drop support for parsing
+protocols that have become obsolete.  Therefore, the Netlink interface
+to Open vSwitch is designed to allow carefully written userspace
+applications to work with any version of the flow key, past or future.
+
+To support this forward and backward compatibility, whenever the
+kernel module passes a packet to userspace, it also passes along the
+flow key that it parsed from the packet.  Userspace then extracts its
+own notion of a flow key from the packet and compares it against the
+kernel-provided version:
+
+    - If userspace's notion of the flow key for the packet matches the
+      kernel's, then nothing special is necessary.
+
+    - If the kernel's flow key includes more fields than the userspace
+      version of the flow key, for example if the kernel decoded IPv6
+      headers but userspace stopped at the Ethernet type (because it
+      does not understand IPv6), then again nothing special is
+      necessary.  Userspace can still set up a flow in the usual way,
+      as long as it uses the kernel-provided flow key to do it.
+
+    - If the userspace flow key includes more fields than the
+      kernel's, for example if userspace decoded an IPv6 header but
+      the kernel stopped at the Ethernet type, then userspace can
+      forward the packet manually, without setting up a flow in the
+      kernel.  This case is bad for performance because every packet
+      that the kernel considers part of the flow must go to userspace,
+      but the forwarding behavior is correct.  (If userspace can
+      determine that the values of the extra fields would not affect
+      forwarding behavior, then it could set up a flow anyway.)
+
+How flow keys evolve over time is important to making this work, so
+the following sections go into detail.
+
+
+Flow key format
+---------------
+
+A flow key is passed over a Netlink socket as a sequence of Netlink
+attributes.  Some attributes represent packet metadata, defined as any
+information about a packet that cannot be extracted from the packet
+itself, e.g. the vport on which the packet was received.  Most
+attributes, however, are extracted from headers within the packet,
+e.g. source and destination addresses from Ethernet, IP, or TCP
+headers.
+
+The <linux/openvswitch.h> header file defines the exact format of the
+flow key attributes.  For informal explanatory purposes here, we write
+them as comma-separated strings, with parentheses indicating arguments
+and nesting.  For example, the following could represent a flow key
+corresponding to a TCP packet that arrived on vport 1::
+
+    in_port(1), eth(src=e0:91:f5:21:d0:b2, dst=00:02:e3:0f:80:a4),
+    eth_type(0x0800), ipv4(src=172.16.0.20, dst=172.18.0.52, proto=17, tos=0,
+    frag=no), tcp(src=49163, dst=80)
+
+Often we ellipsize arguments not important to the discussion, e.g.::
+
+    in_port(1), eth(...), eth_type(0x0800), ipv4(...), tcp(...)
+
+
+Wildcarded flow key format
+--------------------------
+
+A wildcarded flow is described with two sequences of Netlink attributes
+passed over the Netlink socket. A flow key, exactly as described above, and an
+optional corresponding flow mask.
+
+A wildcarded flow can represent a group of exact match flows. Each '1' bit
+in the mask specifies a exact match with the corresponding bit in the flow key.
+A '0' bit specifies a don't care bit, which will match either a '1' or '0' bit
+of a incoming packet. Using wildcarded flow can improve the flow set up rate
+by reduce the number of new flows need to be processed by the user space program.
+
+Support for the mask Netlink attribute is optional for both the kernel and user
+space program. The kernel can ignore the mask attribute, installing an exact
+match flow, or reduce the number of don't care bits in the kernel to less than
+what was specified by the user space program. In this case, variations in bits
+that the kernel does not implement will simply result in additional flow setups.
+The kernel module will also work with user space programs that neither support
+nor supply flow mask attributes.
+
+Since the kernel may ignore or modify wildcard bits, it can be difficult for
+the userspace program to know exactly what matches are installed. There are
+two possible approaches: reactively install flows as they miss the kernel
+flow table (and therefore not attempt to determine wildcard changes at all)
+or use the kernel's response messages to determine the installed wildcards.
+
+When interacting with userspace, the kernel should maintain the match portion
+of the key exactly as originally installed. This will provides a handle to
+identify the flow for all future operations. However, when reporting the
+mask of an installed flow, the mask should include any restrictions imposed
+by the kernel.
+
+The behavior when using overlapping wildcarded flows is undefined. It is the
+responsibility of the user space program to ensure that any incoming packet
+can match at most one flow, wildcarded or not. The current implementation
+performs best-effort detection of overlapping wildcarded flows and may reject
+some but not all of them. However, this behavior may change in future versions.
+
+
+Unique flow identifiers
+-----------------------
+
+An alternative to using the original match portion of a key as the handle for
+flow identification is a unique flow identifier, or "UFID". UFIDs are optional
+for both the kernel and user space program.
+
+User space programs that support UFID are expected to provide it during flow
+setup in addition to the flow, then refer to the flow using the UFID for all
+future operations. The kernel is not required to index flows by the original
+flow key if a UFID is specified.
+
+
+Basic rule for evolving flow keys
+---------------------------------
+
+Some care is needed to really maintain forward and backward
+compatibility for applications that follow the rules listed under
+"Flow key compatibility" above.
+
+The basic rule is obvious::
+
+    ==================================================================
+    New network protocol support must only supplement existing flow
+    key attributes.  It must not change the meaning of already defined
+    flow key attributes.
+    ==================================================================
+
+This rule does have less-obvious consequences so it is worth working
+through a few examples.  Suppose, for example, that the kernel module
+did not already implement VLAN parsing.  Instead, it just interpreted
+the 802.1Q TPID (0x8100) as the Ethertype then stopped parsing the
+packet.  The flow key for any packet with an 802.1Q header would look
+essentially like this, ignoring metadata::
+
+    eth(...), eth_type(0x8100)
+
+Naively, to add VLAN support, it makes sense to add a new "vlan" flow
+key attribute to contain the VLAN tag, then continue to decode the
+encapsulated headers beyond the VLAN tag using the existing field
+definitions.  With this change, a TCP packet in VLAN 10 would have a
+flow key much like this::
+
+    eth(...), vlan(vid=10, pcp=0), eth_type(0x0800), ip(proto=6, ...), tcp(...)
+
+But this change would negatively affect a userspace application that
+has not been updated to understand the new "vlan" flow key attribute.
+The application could, following the flow compatibility rules above,
+ignore the "vlan" attribute that it does not understand and therefore
+assume that the flow contained IP packets.  This is a bad assumption
+(the flow only contains IP packets if one parses and skips over the
+802.1Q header) and it could cause the application's behavior to change
+across kernel versions even though it follows the compatibility rules.
+
+The solution is to use a set of nested attributes.  This is, for
+example, why 802.1Q support uses nested attributes.  A TCP packet in
+VLAN 10 is actually expressed as::
+
+    eth(...), eth_type(0x8100), vlan(vid=10, pcp=0), encap(eth_type(0x0800),
+    ip(proto=6, ...), tcp(...)))
+
+Notice how the "eth_type", "ip", and "tcp" flow key attributes are
+nested inside the "encap" attribute.  Thus, an application that does
+not understand the "vlan" key will not see either of those attributes
+and therefore will not misinterpret them.  (Also, the outer eth_type
+is still 0x8100, not changed to 0x0800.)
+
+Handling malformed packets
+--------------------------
+
+Don't drop packets in the kernel for malformed protocol headers, bad
+checksums, etc.  This would prevent userspace from implementing a
+simple Ethernet switch that forwards every packet.
+
+Instead, in such a case, include an attribute with "empty" content.
+It doesn't matter if the empty content could be valid protocol values,
+as long as those values are rarely seen in practice, because userspace
+can always forward all packets with those values to userspace and
+handle them individually.
+
+For example, consider a packet that contains an IP header that
+indicates protocol 6 for TCP, but which is truncated just after the IP
+header, so that the TCP header is missing.  The flow key for this
+packet would include a tcp attribute with all-zero src and dst, like
+this::
+
+    eth(...), eth_type(0x0800), ip(proto=6, ...), tcp(src=0, dst=0)
+
+As another example, consider a packet with an Ethernet type of 0x8100,
+indicating that a VLAN TCI should follow, but which is truncated just
+after the Ethernet type.  The flow key for this packet would include
+an all-zero-bits vlan and an empty encap attribute, like this::
+
+    eth(...), eth_type(0x8100), vlan(0), encap()
+
+Unlike a TCP packet with source and destination ports 0, an
+all-zero-bits VLAN TCI is not that rare, so the CFI bit (aka
+VLAN_TAG_PRESENT inside the kernel) is ordinarily set in a vlan
+attribute expressly to allow this situation to be distinguished.
+Thus, the flow key in this second example unambiguously indicates a
+missing or malformed VLAN TCI.
+
+Other rules
+-----------
+
+The other rules for flow keys are much less subtle:
+
+    - Duplicate attributes are not allowed at a given nesting level.
+
+    - Ordering of attributes is not significant.
+
+    - When the kernel sends a given flow key to userspace, it always
+      composes it the same way.  This allows userspace to hash and
+      compare entire flow keys that it may not be able to fully
+      interpret.
diff --git a/Documentation/networking/openvswitch.txt b/Documentation/networking/openvswitch.txt
deleted file mode 100644
index b3b9ac61d29d..000000000000
--- a/Documentation/networking/openvswitch.txt
+++ /dev/null
@@ -1,248 +0,0 @@
-Open vSwitch datapath developer documentation
-=============================================
-
-The Open vSwitch kernel module allows flexible userspace control over
-flow-level packet processing on selected network devices.  It can be
-used to implement a plain Ethernet switch, network device bonding,
-VLAN processing, network access control, flow-based network control,
-and so on.
-
-The kernel module implements multiple "datapaths" (analogous to
-bridges), each of which can have multiple "vports" (analogous to ports
-within a bridge).  Each datapath also has associated with it a "flow
-table" that userspace populates with "flows" that map from keys based
-on packet headers and metadata to sets of actions.  The most common
-action forwards the packet to another vport; other actions are also
-implemented.
-
-When a packet arrives on a vport, the kernel module processes it by
-extracting its flow key and looking it up in the flow table.  If there
-is a matching flow, it executes the associated actions.  If there is
-no match, it queues the packet to userspace for processing (as part of
-its processing, userspace will likely set up a flow to handle further
-packets of the same type entirely in-kernel).
-
-
-Flow key compatibility
-----------------------
-
-Network protocols evolve over time.  New protocols become important
-and existing protocols lose their prominence.  For the Open vSwitch
-kernel module to remain relevant, it must be possible for newer
-versions to parse additional protocols as part of the flow key.  It
-might even be desirable, someday, to drop support for parsing
-protocols that have become obsolete.  Therefore, the Netlink interface
-to Open vSwitch is designed to allow carefully written userspace
-applications to work with any version of the flow key, past or future.
-
-To support this forward and backward compatibility, whenever the
-kernel module passes a packet to userspace, it also passes along the
-flow key that it parsed from the packet.  Userspace then extracts its
-own notion of a flow key from the packet and compares it against the
-kernel-provided version:
-
-    - If userspace's notion of the flow key for the packet matches the
-      kernel's, then nothing special is necessary.
-
-    - If the kernel's flow key includes more fields than the userspace
-      version of the flow key, for example if the kernel decoded IPv6
-      headers but userspace stopped at the Ethernet type (because it
-      does not understand IPv6), then again nothing special is
-      necessary.  Userspace can still set up a flow in the usual way,
-      as long as it uses the kernel-provided flow key to do it.
-
-    - If the userspace flow key includes more fields than the
-      kernel's, for example if userspace decoded an IPv6 header but
-      the kernel stopped at the Ethernet type, then userspace can
-      forward the packet manually, without setting up a flow in the
-      kernel.  This case is bad for performance because every packet
-      that the kernel considers part of the flow must go to userspace,
-      but the forwarding behavior is correct.  (If userspace can
-      determine that the values of the extra fields would not affect
-      forwarding behavior, then it could set up a flow anyway.)
-
-How flow keys evolve over time is important to making this work, so
-the following sections go into detail.
-
-
-Flow key format
----------------
-
-A flow key is passed over a Netlink socket as a sequence of Netlink
-attributes.  Some attributes represent packet metadata, defined as any
-information about a packet that cannot be extracted from the packet
-itself, e.g. the vport on which the packet was received.  Most
-attributes, however, are extracted from headers within the packet,
-e.g. source and destination addresses from Ethernet, IP, or TCP
-headers.
-
-The <linux/openvswitch.h> header file defines the exact format of the
-flow key attributes.  For informal explanatory purposes here, we write
-them as comma-separated strings, with parentheses indicating arguments
-and nesting.  For example, the following could represent a flow key
-corresponding to a TCP packet that arrived on vport 1:
-
-    in_port(1), eth(src=e0:91:f5:21:d0:b2, dst=00:02:e3:0f:80:a4),
-    eth_type(0x0800), ipv4(src=172.16.0.20, dst=172.18.0.52, proto=17, tos=0,
-    frag=no), tcp(src=49163, dst=80)
-
-Often we ellipsize arguments not important to the discussion, e.g.:
-
-    in_port(1), eth(...), eth_type(0x0800), ipv4(...), tcp(...)
-
-
-Wildcarded flow key format
---------------------------
-
-A wildcarded flow is described with two sequences of Netlink attributes
-passed over the Netlink socket. A flow key, exactly as described above, and an
-optional corresponding flow mask.
-
-A wildcarded flow can represent a group of exact match flows. Each '1' bit
-in the mask specifies a exact match with the corresponding bit in the flow key.
-A '0' bit specifies a don't care bit, which will match either a '1' or '0' bit
-of a incoming packet. Using wildcarded flow can improve the flow set up rate
-by reduce the number of new flows need to be processed by the user space program.
-
-Support for the mask Netlink attribute is optional for both the kernel and user
-space program. The kernel can ignore the mask attribute, installing an exact
-match flow, or reduce the number of don't care bits in the kernel to less than
-what was specified by the user space program. In this case, variations in bits
-that the kernel does not implement will simply result in additional flow setups.
-The kernel module will also work with user space programs that neither support
-nor supply flow mask attributes.
-
-Since the kernel may ignore or modify wildcard bits, it can be difficult for
-the userspace program to know exactly what matches are installed. There are
-two possible approaches: reactively install flows as they miss the kernel
-flow table (and therefore not attempt to determine wildcard changes at all)
-or use the kernel's response messages to determine the installed wildcards.
-
-When interacting with userspace, the kernel should maintain the match portion
-of the key exactly as originally installed. This will provides a handle to
-identify the flow for all future operations. However, when reporting the
-mask of an installed flow, the mask should include any restrictions imposed
-by the kernel.
-
-The behavior when using overlapping wildcarded flows is undefined. It is the
-responsibility of the user space program to ensure that any incoming packet
-can match at most one flow, wildcarded or not. The current implementation
-performs best-effort detection of overlapping wildcarded flows and may reject
-some but not all of them. However, this behavior may change in future versions.
-
-
-Unique flow identifiers
------------------------
-
-An alternative to using the original match portion of a key as the handle for
-flow identification is a unique flow identifier, or "UFID". UFIDs are optional
-for both the kernel and user space program.
-
-User space programs that support UFID are expected to provide it during flow
-setup in addition to the flow, then refer to the flow using the UFID for all
-future operations. The kernel is not required to index flows by the original
-flow key if a UFID is specified.
-
-
-Basic rule for evolving flow keys
----------------------------------
-
-Some care is needed to really maintain forward and backward
-compatibility for applications that follow the rules listed under
-"Flow key compatibility" above.
-
-The basic rule is obvious:
-
-    ------------------------------------------------------------------
-    New network protocol support must only supplement existing flow
-    key attributes.  It must not change the meaning of already defined
-    flow key attributes.
-    ------------------------------------------------------------------
-
-This rule does have less-obvious consequences so it is worth working
-through a few examples.  Suppose, for example, that the kernel module
-did not already implement VLAN parsing.  Instead, it just interpreted
-the 802.1Q TPID (0x8100) as the Ethertype then stopped parsing the
-packet.  The flow key for any packet with an 802.1Q header would look
-essentially like this, ignoring metadata:
-
-    eth(...), eth_type(0x8100)
-
-Naively, to add VLAN support, it makes sense to add a new "vlan" flow
-key attribute to contain the VLAN tag, then continue to decode the
-encapsulated headers beyond the VLAN tag using the existing field
-definitions.  With this change, a TCP packet in VLAN 10 would have a
-flow key much like this:
-
-    eth(...), vlan(vid=10, pcp=0), eth_type(0x0800), ip(proto=6, ...), tcp(...)
-
-But this change would negatively affect a userspace application that
-has not been updated to understand the new "vlan" flow key attribute.
-The application could, following the flow compatibility rules above,
-ignore the "vlan" attribute that it does not understand and therefore
-assume that the flow contained IP packets.  This is a bad assumption
-(the flow only contains IP packets if one parses and skips over the
-802.1Q header) and it could cause the application's behavior to change
-across kernel versions even though it follows the compatibility rules.
-
-The solution is to use a set of nested attributes.  This is, for
-example, why 802.1Q support uses nested attributes.  A TCP packet in
-VLAN 10 is actually expressed as:
-
-    eth(...), eth_type(0x8100), vlan(vid=10, pcp=0), encap(eth_type(0x0800),
-    ip(proto=6, ...), tcp(...)))
-
-Notice how the "eth_type", "ip", and "tcp" flow key attributes are
-nested inside the "encap" attribute.  Thus, an application that does
-not understand the "vlan" key will not see either of those attributes
-and therefore will not misinterpret them.  (Also, the outer eth_type
-is still 0x8100, not changed to 0x0800.)
-
-Handling malformed packets
---------------------------
-
-Don't drop packets in the kernel for malformed protocol headers, bad
-checksums, etc.  This would prevent userspace from implementing a
-simple Ethernet switch that forwards every packet.
-
-Instead, in such a case, include an attribute with "empty" content.
-It doesn't matter if the empty content could be valid protocol values,
-as long as those values are rarely seen in practice, because userspace
-can always forward all packets with those values to userspace and
-handle them individually.
-
-For example, consider a packet that contains an IP header that
-indicates protocol 6 for TCP, but which is truncated just after the IP
-header, so that the TCP header is missing.  The flow key for this
-packet would include a tcp attribute with all-zero src and dst, like
-this:
-
-    eth(...), eth_type(0x0800), ip(proto=6, ...), tcp(src=0, dst=0)
-
-As another example, consider a packet with an Ethernet type of 0x8100,
-indicating that a VLAN TCI should follow, but which is truncated just
-after the Ethernet type.  The flow key for this packet would include
-an all-zero-bits vlan and an empty encap attribute, like this:
-
-    eth(...), eth_type(0x8100), vlan(0), encap()
-
-Unlike a TCP packet with source and destination ports 0, an
-all-zero-bits VLAN TCI is not that rare, so the CFI bit (aka
-VLAN_TAG_PRESENT inside the kernel) is ordinarily set in a vlan
-attribute expressly to allow this situation to be distinguished.
-Thus, the flow key in this second example unambiguously indicates a
-missing or malformed VLAN TCI.
-
-Other rules
------------
-
-The other rules for flow keys are much less subtle:
-
-    - Duplicate attributes are not allowed at a given nesting level.
-
-    - Ordering of attributes is not significant.
-
-    - When the kernel sends a given flow key to userspace, it always
-      composes it the same way.  This allows userspace to hash and
-      compare entire flow keys that it may not be able to fully
-      interpret.
-- 
cgit v1.2.3-59-g8ed1b


From f5c39ef3299f4efaab4d1bb410406de5909c1687 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Thu, 30 Apr 2020 18:04:10 +0200
Subject: docs: networking: convert operstates.txt to ReST

- add SPDX header;
- add a document title;
- adjust chapters, adding proper markups;
- mark lists as such;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst      |   1 +
 Documentation/networking/operstates.rst | 185 ++++++++++++++++++++++++++++++++
 Documentation/networking/operstates.txt | 164 ----------------------------
 3 files changed, 186 insertions(+), 164 deletions(-)
 create mode 100644 Documentation/networking/operstates.rst
 delete mode 100644 Documentation/networking/operstates.txt

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index b7f558480aca..028a36821b9a 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -88,6 +88,7 @@ Contents:
    nf_conntrack-sysctl
    nf_flowtable
    openvswitch
+   operstates
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/operstates.rst b/Documentation/networking/operstates.rst
new file mode 100644
index 000000000000..9c918f7cb0e8
--- /dev/null
+++ b/Documentation/networking/operstates.rst
@@ -0,0 +1,185 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==================
+Operational States
+==================
+
+
+1. Introduction
+===============
+
+Linux distinguishes between administrative and operational state of an
+interface. Administrative state is the result of "ip link set dev
+<dev> up or down" and reflects whether the administrator wants to use
+the device for traffic.
+
+However, an interface is not usable just because the admin enabled it
+- ethernet requires to be plugged into the switch and, depending on
+a site's networking policy and configuration, an 802.1X authentication
+to be performed before user data can be transferred. Operational state
+shows the ability of an interface to transmit this user data.
+
+Thanks to 802.1X, userspace must be granted the possibility to
+influence operational state. To accommodate this, operational state is
+split into two parts: Two flags that can be set by the driver only, and
+a RFC2863 compatible state that is derived from these flags, a policy,
+and changeable from userspace under certain rules.
+
+
+2. Querying from userspace
+==========================
+
+Both admin and operational state can be queried via the netlink
+operation RTM_GETLINK. It is also possible to subscribe to RTNLGRP_LINK
+to be notified of updates while the interface is admin up. This is
+important for setting from userspace.
+
+These values contain interface state:
+
+ifinfomsg::if_flags & IFF_UP:
+ Interface is admin up
+
+ifinfomsg::if_flags & IFF_RUNNING:
+ Interface is in RFC2863 operational state UP or UNKNOWN. This is for
+ backward compatibility, routing daemons, dhcp clients can use this
+ flag to determine whether they should use the interface.
+
+ifinfomsg::if_flags & IFF_LOWER_UP:
+ Driver has signaled netif_carrier_on()
+
+ifinfomsg::if_flags & IFF_DORMANT:
+ Driver has signaled netif_dormant_on()
+
+TLV IFLA_OPERSTATE
+------------------
+
+contains RFC2863 state of the interface in numeric representation:
+
+IF_OPER_UNKNOWN (0):
+ Interface is in unknown state, neither driver nor userspace has set
+ operational state. Interface must be considered for user data as
+ setting operational state has not been implemented in every driver.
+
+IF_OPER_NOTPRESENT (1):
+ Unused in current kernel (notpresent interfaces normally disappear),
+ just a numerical placeholder.
+
+IF_OPER_DOWN (2):
+ Interface is unable to transfer data on L1, f.e. ethernet is not
+ plugged or interface is ADMIN down.
+
+IF_OPER_LOWERLAYERDOWN (3):
+ Interfaces stacked on an interface that is IF_OPER_DOWN show this
+ state (f.e. VLAN).
+
+IF_OPER_TESTING (4):
+ Unused in current kernel.
+
+IF_OPER_DORMANT (5):
+ Interface is L1 up, but waiting for an external event, f.e. for a
+ protocol to establish. (802.1X)
+
+IF_OPER_UP (6):
+ Interface is operational up and can be used.
+
+This TLV can also be queried via sysfs.
+
+TLV IFLA_LINKMODE
+-----------------
+
+contains link policy. This is needed for userspace interaction
+described below.
+
+This TLV can also be queried via sysfs.
+
+
+3. Kernel driver API
+====================
+
+Kernel drivers have access to two flags that map to IFF_LOWER_UP and
+IFF_DORMANT. These flags can be set from everywhere, even from
+interrupts. It is guaranteed that only the driver has write access,
+however, if different layers of the driver manipulate the same flag,
+the driver has to provide the synchronisation needed.
+
+__LINK_STATE_NOCARRIER, maps to !IFF_LOWER_UP:
+
+The driver uses netif_carrier_on() to clear and netif_carrier_off() to
+set this flag. On netif_carrier_off(), the scheduler stops sending
+packets. The name 'carrier' and the inversion are historical, think of
+it as lower layer.
+
+Note that for certain kind of soft-devices, which are not managing any
+real hardware, it is possible to set this bit from userspace.  One
+should use TVL IFLA_CARRIER to do so.
+
+netif_carrier_ok() can be used to query that bit.
+
+__LINK_STATE_DORMANT, maps to IFF_DORMANT:
+
+Set by the driver to express that the device cannot yet be used
+because some driver controlled protocol establishment has to
+complete. Corresponding functions are netif_dormant_on() to set the
+flag, netif_dormant_off() to clear it and netif_dormant() to query.
+
+On device allocation, both flags __LINK_STATE_NOCARRIER and
+__LINK_STATE_DORMANT are cleared, so the effective state is equivalent
+to netif_carrier_ok() and !netif_dormant().
+
+
+Whenever the driver CHANGES one of these flags, a workqueue event is
+scheduled to translate the flag combination to IFLA_OPERSTATE as
+follows:
+
+!netif_carrier_ok():
+ IF_OPER_LOWERLAYERDOWN if the interface is stacked, IF_OPER_DOWN
+ otherwise. Kernel can recognise stacked interfaces because their
+ ifindex != iflink.
+
+netif_carrier_ok() && netif_dormant():
+ IF_OPER_DORMANT
+
+netif_carrier_ok() && !netif_dormant():
+ IF_OPER_UP if userspace interaction is disabled. Otherwise
+ IF_OPER_DORMANT with the possibility for userspace to initiate the
+ IF_OPER_UP transition afterwards.
+
+
+4. Setting from userspace
+=========================
+
+Applications have to use the netlink interface to influence the
+RFC2863 operational state of an interface. Setting IFLA_LINKMODE to 1
+via RTM_SETLINK instructs the kernel that an interface should go to
+IF_OPER_DORMANT instead of IF_OPER_UP when the combination
+netif_carrier_ok() && !netif_dormant() is set by the
+driver. Afterwards, the userspace application can set IFLA_OPERSTATE
+to IF_OPER_DORMANT or IF_OPER_UP as long as the driver does not set
+netif_carrier_off() or netif_dormant_on(). Changes made by userspace
+are multicasted on the netlink group RTNLGRP_LINK.
+
+So basically a 802.1X supplicant interacts with the kernel like this:
+
+- subscribe to RTNLGRP_LINK
+- set IFLA_LINKMODE to 1 via RTM_SETLINK
+- query RTM_GETLINK once to get initial state
+- if initial flags are not (IFF_LOWER_UP && !IFF_DORMANT), wait until
+  netlink multicast signals this state
+- do 802.1X, eventually abort if flags go down again
+- send RTM_SETLINK to set operstate to IF_OPER_UP if authentication
+  succeeds, IF_OPER_DORMANT otherwise
+- see how operstate and IFF_RUNNING is echoed via netlink multicast
+- set interface back to IF_OPER_DORMANT if 802.1X reauthentication
+  fails
+- restart if kernel changes IFF_LOWER_UP or IFF_DORMANT flag
+
+if supplicant goes down, bring back IFLA_LINKMODE to 0 and
+IFLA_OPERSTATE to a sane value.
+
+A routing daemon or dhcp client just needs to care for IFF_RUNNING or
+waiting for operstate to go IF_OPER_UP/IF_OPER_UNKNOWN before
+considering the interface / querying a DHCP address.
+
+
+For technical questions and/or comments please e-mail to Stefan Rompf
+(stefan at loplof.de).
diff --git a/Documentation/networking/operstates.txt b/Documentation/networking/operstates.txt
deleted file mode 100644
index b203d1334822..000000000000
--- a/Documentation/networking/operstates.txt
+++ /dev/null
@@ -1,164 +0,0 @@
-
-1. Introduction
-
-Linux distinguishes between administrative and operational state of an
-interface. Administrative state is the result of "ip link set dev
-<dev> up or down" and reflects whether the administrator wants to use
-the device for traffic.
-
-However, an interface is not usable just because the admin enabled it
-- ethernet requires to be plugged into the switch and, depending on
-a site's networking policy and configuration, an 802.1X authentication
-to be performed before user data can be transferred. Operational state
-shows the ability of an interface to transmit this user data.
-
-Thanks to 802.1X, userspace must be granted the possibility to
-influence operational state. To accommodate this, operational state is
-split into two parts: Two flags that can be set by the driver only, and
-a RFC2863 compatible state that is derived from these flags, a policy,
-and changeable from userspace under certain rules.
-
-
-2. Querying from userspace
-
-Both admin and operational state can be queried via the netlink
-operation RTM_GETLINK. It is also possible to subscribe to RTNLGRP_LINK
-to be notified of updates while the interface is admin up. This is
-important for setting from userspace.
-
-These values contain interface state:
-
-ifinfomsg::if_flags & IFF_UP:
- Interface is admin up
-ifinfomsg::if_flags & IFF_RUNNING:
- Interface is in RFC2863 operational state UP or UNKNOWN. This is for
- backward compatibility, routing daemons, dhcp clients can use this
- flag to determine whether they should use the interface.
-ifinfomsg::if_flags & IFF_LOWER_UP:
- Driver has signaled netif_carrier_on()
-ifinfomsg::if_flags & IFF_DORMANT:
- Driver has signaled netif_dormant_on()
-
-TLV IFLA_OPERSTATE
-
-contains RFC2863 state of the interface in numeric representation:
-
-IF_OPER_UNKNOWN (0):
- Interface is in unknown state, neither driver nor userspace has set
- operational state. Interface must be considered for user data as
- setting operational state has not been implemented in every driver.
-IF_OPER_NOTPRESENT (1):
- Unused in current kernel (notpresent interfaces normally disappear),
- just a numerical placeholder.
-IF_OPER_DOWN (2):
- Interface is unable to transfer data on L1, f.e. ethernet is not
- plugged or interface is ADMIN down.
-IF_OPER_LOWERLAYERDOWN (3):
- Interfaces stacked on an interface that is IF_OPER_DOWN show this
- state (f.e. VLAN).
-IF_OPER_TESTING (4):
- Unused in current kernel.
-IF_OPER_DORMANT (5):
- Interface is L1 up, but waiting for an external event, f.e. for a
- protocol to establish. (802.1X)
-IF_OPER_UP (6):
- Interface is operational up and can be used.
-
-This TLV can also be queried via sysfs.
-
-TLV IFLA_LINKMODE
-
-contains link policy. This is needed for userspace interaction
-described below.
-
-This TLV can also be queried via sysfs.
-
-
-3. Kernel driver API
-
-Kernel drivers have access to two flags that map to IFF_LOWER_UP and
-IFF_DORMANT. These flags can be set from everywhere, even from
-interrupts. It is guaranteed that only the driver has write access,
-however, if different layers of the driver manipulate the same flag,
-the driver has to provide the synchronisation needed.
-
-__LINK_STATE_NOCARRIER, maps to !IFF_LOWER_UP:
-
-The driver uses netif_carrier_on() to clear and netif_carrier_off() to
-set this flag. On netif_carrier_off(), the scheduler stops sending
-packets. The name 'carrier' and the inversion are historical, think of
-it as lower layer.
-
-Note that for certain kind of soft-devices, which are not managing any
-real hardware, it is possible to set this bit from userspace.  One
-should use TVL IFLA_CARRIER to do so.
-
-netif_carrier_ok() can be used to query that bit.
-
-__LINK_STATE_DORMANT, maps to IFF_DORMANT:
-
-Set by the driver to express that the device cannot yet be used
-because some driver controlled protocol establishment has to
-complete. Corresponding functions are netif_dormant_on() to set the
-flag, netif_dormant_off() to clear it and netif_dormant() to query.
-
-On device allocation, both flags __LINK_STATE_NOCARRIER and
-__LINK_STATE_DORMANT are cleared, so the effective state is equivalent
-to netif_carrier_ok() and !netif_dormant().
-
-
-Whenever the driver CHANGES one of these flags, a workqueue event is
-scheduled to translate the flag combination to IFLA_OPERSTATE as
-follows:
-
-!netif_carrier_ok():
- IF_OPER_LOWERLAYERDOWN if the interface is stacked, IF_OPER_DOWN
- otherwise. Kernel can recognise stacked interfaces because their
- ifindex != iflink.
-
-netif_carrier_ok() && netif_dormant():
- IF_OPER_DORMANT
-
-netif_carrier_ok() && !netif_dormant():
- IF_OPER_UP if userspace interaction is disabled. Otherwise
- IF_OPER_DORMANT with the possibility for userspace to initiate the
- IF_OPER_UP transition afterwards.
-
-
-4. Setting from userspace
-
-Applications have to use the netlink interface to influence the
-RFC2863 operational state of an interface. Setting IFLA_LINKMODE to 1
-via RTM_SETLINK instructs the kernel that an interface should go to
-IF_OPER_DORMANT instead of IF_OPER_UP when the combination
-netif_carrier_ok() && !netif_dormant() is set by the
-driver. Afterwards, the userspace application can set IFLA_OPERSTATE
-to IF_OPER_DORMANT or IF_OPER_UP as long as the driver does not set
-netif_carrier_off() or netif_dormant_on(). Changes made by userspace
-are multicasted on the netlink group RTNLGRP_LINK.
-
-So basically a 802.1X supplicant interacts with the kernel like this:
-
--subscribe to RTNLGRP_LINK
--set IFLA_LINKMODE to 1 via RTM_SETLINK
--query RTM_GETLINK once to get initial state
--if initial flags are not (IFF_LOWER_UP && !IFF_DORMANT), wait until
- netlink multicast signals this state
--do 802.1X, eventually abort if flags go down again
--send RTM_SETLINK to set operstate to IF_OPER_UP if authentication
- succeeds, IF_OPER_DORMANT otherwise
--see how operstate and IFF_RUNNING is echoed via netlink multicast
--set interface back to IF_OPER_DORMANT if 802.1X reauthentication
- fails
--restart if kernel changes IFF_LOWER_UP or IFF_DORMANT flag
-
-if supplicant goes down, bring back IFLA_LINKMODE to 0 and
-IFLA_OPERSTATE to a sane value.
-
-A routing daemon or dhcp client just needs to care for IFF_RUNNING or
-waiting for operstate to go IF_OPER_UP/IF_OPER_UNKNOWN before
-considering the interface / querying a DHCP address.
-
-
-For technical questions and/or comments please e-mail to Stefan Rompf
-(stefan at loplof.de).
-- 
cgit v1.2.3-59-g8ed1b


From 4ba7bc9f2de6b230da2e38e6317de4b5ed91f46b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Thu, 30 Apr 2020 18:04:11 +0200
Subject: docs: networking: convert packet_mmap.txt to ReST

This patch has a big diff, but most are due to whitespaces.

Yet, the conversion is similar to other files under networking:

- add SPDX header;
- add a document title;
- adjust titles and chapters, adding proper markups;
- mark lists as such;
- mark tables as such;
- mark code blocks and literals as such;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst       |    1 +
 Documentation/networking/packet_mmap.rst | 1084 ++++++++++++++++++++++++++++++
 Documentation/networking/packet_mmap.txt | 1061 -----------------------------
 3 files changed, 1085 insertions(+), 1061 deletions(-)
 create mode 100644 Documentation/networking/packet_mmap.rst
 delete mode 100644 Documentation/networking/packet_mmap.txt

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 028a36821b9a..8262b535a83e 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -89,6 +89,7 @@ Contents:
    nf_flowtable
    openvswitch
    operstates
+   packet_mmap
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/packet_mmap.rst b/Documentation/networking/packet_mmap.rst
new file mode 100644
index 000000000000..5f213d17652f
--- /dev/null
+++ b/Documentation/networking/packet_mmap.rst
@@ -0,0 +1,1084 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========
+Packet MMAP
+===========
+
+Abstract
+========
+
+This file documents the mmap() facility available with the PACKET
+socket interface on 2.4/2.6/3.x kernels. This type of sockets is used for
+
+i) capture network traffic with utilities like tcpdump,
+ii) transmit network traffic, or any other that needs raw
+    access to network interface.
+
+Howto can be found at:
+
+    https://sites.google.com/site/packetmmap/
+
+Please send your comments to
+    - Ulisses Alonso Camaró <uaca@i.hate.spam.alumni.uv.es>
+    - Johann Baudy
+
+Why use PACKET_MMAP
+===================
+
+In Linux 2.4/2.6/3.x if PACKET_MMAP is not enabled, the capture process is very
+inefficient. It uses very limited buffers and requires one system call to
+capture each packet, it requires two if you want to get packet's timestamp
+(like libpcap always does).
+
+In the other hand PACKET_MMAP is very efficient. PACKET_MMAP provides a size
+configurable circular buffer mapped in user space that can be used to either
+send or receive packets. This way reading packets just needs to wait for them,
+most of the time there is no need to issue a single system call. Concerning
+transmission, multiple packets can be sent through one system call to get the
+highest bandwidth. By using a shared buffer between the kernel and the user
+also has the benefit of minimizing packet copies.
+
+It's fine to use PACKET_MMAP to improve the performance of the capture and
+transmission process, but it isn't everything. At least, if you are capturing
+at high speeds (this is relative to the cpu speed), you should check if the
+device driver of your network interface card supports some sort of interrupt
+load mitigation or (even better) if it supports NAPI, also make sure it is
+enabled. For transmission, check the MTU (Maximum Transmission Unit) used and
+supported by devices of your network. CPU IRQ pinning of your network interface
+card can also be an advantage.
+
+How to use mmap() to improve capture process
+============================================
+
+From the user standpoint, you should use the higher level libpcap library, which
+is a de facto standard, portable across nearly all operating systems
+including Win32.
+
+Packet MMAP support was integrated into libpcap around the time of version 1.3.0;
+TPACKET_V3 support was added in version 1.5.0
+
+How to use mmap() directly to improve capture process
+=====================================================
+
+From the system calls stand point, the use of PACKET_MMAP involves
+the following process::
+
+
+    [setup]     socket() -------> creation of the capture socket
+		setsockopt() ---> allocation of the circular buffer (ring)
+				  option: PACKET_RX_RING
+		mmap() ---------> mapping of the allocated buffer to the
+				  user process
+
+    [capture]   poll() ---------> to wait for incoming packets
+
+    [shutdown]  close() --------> destruction of the capture socket and
+				  deallocation of all associated
+				  resources.
+
+
+socket creation and destruction is straight forward, and is done
+the same way with or without PACKET_MMAP::
+
+ int fd = socket(PF_PACKET, mode, htons(ETH_P_ALL));
+
+where mode is SOCK_RAW for the raw interface were link level
+information can be captured or SOCK_DGRAM for the cooked
+interface where link level information capture is not
+supported and a link level pseudo-header is provided
+by the kernel.
+
+The destruction of the socket and all associated resources
+is done by a simple call to close(fd).
+
+Similarly as without PACKET_MMAP, it is possible to use one socket
+for capture and transmission. This can be done by mapping the
+allocated RX and TX buffer ring with a single mmap() call.
+See "Mapping and use of the circular buffer (ring)".
+
+Next I will describe PACKET_MMAP settings and its constraints,
+also the mapping of the circular buffer in the user process and
+the use of this buffer.
+
+How to use mmap() directly to improve transmission process
+==========================================================
+Transmission process is similar to capture as shown below::
+
+    [setup]         socket() -------> creation of the transmission socket
+		    setsockopt() ---> allocation of the circular buffer (ring)
+				      option: PACKET_TX_RING
+		    bind() ---------> bind transmission socket with a network interface
+		    mmap() ---------> mapping of the allocated buffer to the
+				      user process
+
+    [transmission]  poll() ---------> wait for free packets (optional)
+		    send() ---------> send all packets that are set as ready in
+				      the ring
+				      The flag MSG_DONTWAIT can be used to return
+				      before end of transfer.
+
+    [shutdown]      close() --------> destruction of the transmission socket and
+				      deallocation of all associated resources.
+
+Socket creation and destruction is also straight forward, and is done
+the same way as in capturing described in the previous paragraph::
+
+ int fd = socket(PF_PACKET, mode, 0);
+
+The protocol can optionally be 0 in case we only want to transmit
+via this socket, which avoids an expensive call to packet_rcv().
+In this case, you also need to bind(2) the TX_RING with sll_protocol = 0
+set. Otherwise, htons(ETH_P_ALL) or any other protocol, for example.
+
+Binding the socket to your network interface is mandatory (with zero copy) to
+know the header size of frames used in the circular buffer.
+
+As capture, each frame contains two parts::
+
+    --------------------
+    | struct tpacket_hdr | Header. It contains the status of
+    |                    | of this frame
+    |--------------------|
+    | data buffer        |
+    .                    .  Data that will be sent over the network interface.
+    .                    .
+    --------------------
+
+ bind() associates the socket to your network interface thanks to
+ sll_ifindex parameter of struct sockaddr_ll.
+
+ Initialization example::
+
+    struct sockaddr_ll my_addr;
+    struct ifreq s_ifr;
+    ...
+
+    strncpy (s_ifr.ifr_name, "eth0", sizeof(s_ifr.ifr_name));
+
+    /* get interface index of eth0 */
+    ioctl(this->socket, SIOCGIFINDEX, &s_ifr);
+
+    /* fill sockaddr_ll struct to prepare binding */
+    my_addr.sll_family = AF_PACKET;
+    my_addr.sll_protocol = htons(ETH_P_ALL);
+    my_addr.sll_ifindex =  s_ifr.ifr_ifindex;
+
+    /* bind socket to eth0 */
+    bind(this->socket, (struct sockaddr *)&my_addr, sizeof(struct sockaddr_ll));
+
+ A complete tutorial is available at: https://sites.google.com/site/packetmmap/
+
+By default, the user should put data at::
+
+ frame base + TPACKET_HDRLEN - sizeof(struct sockaddr_ll)
+
+So, whatever you choose for the socket mode (SOCK_DGRAM or SOCK_RAW),
+the beginning of the user data will be at::
+
+ frame base + TPACKET_ALIGN(sizeof(struct tpacket_hdr))
+
+If you wish to put user data at a custom offset from the beginning of
+the frame (for payload alignment with SOCK_RAW mode for instance) you
+can set tp_net (with SOCK_DGRAM) or tp_mac (with SOCK_RAW). In order
+to make this work it must be enabled previously with setsockopt()
+and the PACKET_TX_HAS_OFF option.
+
+PACKET_MMAP settings
+====================
+
+To setup PACKET_MMAP from user level code is done with a call like
+
+ - Capture process::
+
+     setsockopt(fd, SOL_PACKET, PACKET_RX_RING, (void *) &req, sizeof(req))
+
+ - Transmission process::
+
+     setsockopt(fd, SOL_PACKET, PACKET_TX_RING, (void *) &req, sizeof(req))
+
+The most significant argument in the previous call is the req parameter,
+this parameter must to have the following structure::
+
+    struct tpacket_req
+    {
+	unsigned int    tp_block_size;  /* Minimal size of contiguous block */
+	unsigned int    tp_block_nr;    /* Number of blocks */
+	unsigned int    tp_frame_size;  /* Size of frame */
+	unsigned int    tp_frame_nr;    /* Total number of frames */
+    };
+
+This structure is defined in /usr/include/linux/if_packet.h and establishes a
+circular buffer (ring) of unswappable memory.
+Being mapped in the capture process allows reading the captured frames and
+related meta-information like timestamps without requiring a system call.
+
+Frames are grouped in blocks. Each block is a physically contiguous
+region of memory and holds tp_block_size/tp_frame_size frames. The total number
+of blocks is tp_block_nr. Note that tp_frame_nr is a redundant parameter because::
+
+    frames_per_block = tp_block_size/tp_frame_size
+
+indeed, packet_set_ring checks that the following condition is true::
+
+    frames_per_block * tp_block_nr == tp_frame_nr
+
+Lets see an example, with the following values::
+
+     tp_block_size= 4096
+     tp_frame_size= 2048
+     tp_block_nr  = 4
+     tp_frame_nr  = 8
+
+we will get the following buffer structure::
+
+	    block #1                 block #2
+    +---------+---------+    +---------+---------+
+    | frame 1 | frame 2 |    | frame 3 | frame 4 |
+    +---------+---------+    +---------+---------+
+
+	    block #3                 block #4
+    +---------+---------+    +---------+---------+
+    | frame 5 | frame 6 |    | frame 7 | frame 8 |
+    +---------+---------+    +---------+---------+
+
+A frame can be of any size with the only condition it can fit in a block. A block
+can only hold an integer number of frames, or in other words, a frame cannot
+be spawned across two blocks, so there are some details you have to take into
+account when choosing the frame_size. See "Mapping and use of the circular
+buffer (ring)".
+
+PACKET_MMAP setting constraints
+===============================
+
+In kernel versions prior to 2.4.26 (for the 2.4 branch) and 2.6.5 (2.6 branch),
+the PACKET_MMAP buffer could hold only 32768 frames in a 32 bit architecture or
+16384 in a 64 bit architecture. For information on these kernel versions
+see http://pusa.uv.es/~ulisses/packet_mmap/packet_mmap.pre-2.4.26_2.6.5.txt
+
+Block size limit
+----------------
+
+As stated earlier, each block is a contiguous physical region of memory. These
+memory regions are allocated with calls to the __get_free_pages() function. As
+the name indicates, this function allocates pages of memory, and the second
+argument is "order" or a power of two number of pages, that is
+(for PAGE_SIZE == 4096) order=0 ==> 4096 bytes, order=1 ==> 8192 bytes,
+order=2 ==> 16384 bytes, etc. The maximum size of a
+region allocated by __get_free_pages is determined by the MAX_ORDER macro. More
+precisely the limit can be calculated as::
+
+   PAGE_SIZE << MAX_ORDER
+
+   In a i386 architecture PAGE_SIZE is 4096 bytes
+   In a 2.4/i386 kernel MAX_ORDER is 10
+   In a 2.6/i386 kernel MAX_ORDER is 11
+
+So get_free_pages can allocate as much as 4MB or 8MB in a 2.4/2.6 kernel
+respectively, with an i386 architecture.
+
+User space programs can include /usr/include/sys/user.h and
+/usr/include/linux/mmzone.h to get PAGE_SIZE MAX_ORDER declarations.
+
+The pagesize can also be determined dynamically with the getpagesize (2)
+system call.
+
+Block number limit
+------------------
+
+To understand the constraints of PACKET_MMAP, we have to see the structure
+used to hold the pointers to each block.
+
+Currently, this structure is a dynamically allocated vector with kmalloc
+called pg_vec, its size limits the number of blocks that can be allocated::
+
+    +---+---+---+---+
+    | x | x | x | x |
+    +---+---+---+---+
+      |   |   |   |
+      |   |   |   v
+      |   |   v  block #4
+      |   v  block #3
+      v  block #2
+     block #1
+
+kmalloc allocates any number of bytes of physically contiguous memory from
+a pool of pre-determined sizes. This pool of memory is maintained by the slab
+allocator which is at the end the responsible for doing the allocation and
+hence which imposes the maximum memory that kmalloc can allocate.
+
+In a 2.4/2.6 kernel and the i386 architecture, the limit is 131072 bytes. The
+predetermined sizes that kmalloc uses can be checked in the "size-<bytes>"
+entries of /proc/slabinfo
+
+In a 32 bit architecture, pointers are 4 bytes long, so the total number of
+pointers to blocks is::
+
+     131072/4 = 32768 blocks
+
+PACKET_MMAP buffer size calculator
+==================================
+
+Definitions:
+
+==============  ================================================================
+<size-max>      is the maximum size of allocable with kmalloc
+		(see /proc/slabinfo)
+<pointer size>  depends on the architecture -- ``sizeof(void *)``
+<page size>     depends on the architecture -- PAGE_SIZE or getpagesize (2)
+<max-order>     is the value defined with MAX_ORDER
+<frame size>    it's an upper bound of frame's capture size (more on this later)
+==============  ================================================================
+
+from these definitions we will derive::
+
+	<block number> = <size-max>/<pointer size>
+	<block size> = <pagesize> << <max-order>
+
+so, the max buffer size is::
+
+	<block number> * <block size>
+
+and, the number of frames be::
+
+	<block number> * <block size> / <frame size>
+
+Suppose the following parameters, which apply for 2.6 kernel and an
+i386 architecture::
+
+	<size-max> = 131072 bytes
+	<pointer size> = 4 bytes
+	<pagesize> = 4096 bytes
+	<max-order> = 11
+
+and a value for <frame size> of 2048 bytes. These parameters will yield::
+
+	<block number> = 131072/4 = 32768 blocks
+	<block size> = 4096 << 11 = 8 MiB.
+
+and hence the buffer will have a 262144 MiB size. So it can hold
+262144 MiB / 2048 bytes = 134217728 frames
+
+Actually, this buffer size is not possible with an i386 architecture.
+Remember that the memory is allocated in kernel space, in the case of
+an i386 kernel's memory size is limited to 1GiB.
+
+All memory allocations are not freed until the socket is closed. The memory
+allocations are done with GFP_KERNEL priority, this basically means that
+the allocation can wait and swap other process' memory in order to allocate
+the necessary memory, so normally limits can be reached.
+
+Other constraints
+-----------------
+
+If you check the source code you will see that what I draw here as a frame
+is not only the link level frame. At the beginning of each frame there is a
+header called struct tpacket_hdr used in PACKET_MMAP to hold link level's frame
+meta information like timestamp. So what we draw here a frame it's really
+the following (from include/linux/if_packet.h)::
+
+ /*
+   Frame structure:
+
+   - Start. Frame must be aligned to TPACKET_ALIGNMENT=16
+   - struct tpacket_hdr
+   - pad to TPACKET_ALIGNMENT=16
+   - struct sockaddr_ll
+   - Gap, chosen so that packet data (Start+tp_net) aligns to
+     TPACKET_ALIGNMENT=16
+   - Start+tp_mac: [ Optional MAC header ]
+   - Start+tp_net: Packet data, aligned to TPACKET_ALIGNMENT=16.
+   - Pad to align to TPACKET_ALIGNMENT=16
+ */
+
+The following are conditions that are checked in packet_set_ring
+
+   - tp_block_size must be a multiple of PAGE_SIZE (1)
+   - tp_frame_size must be greater than TPACKET_HDRLEN (obvious)
+   - tp_frame_size must be a multiple of TPACKET_ALIGNMENT
+   - tp_frame_nr   must be exactly frames_per_block*tp_block_nr
+
+Note that tp_block_size should be chosen to be a power of two or there will
+be a waste of memory.
+
+Mapping and use of the circular buffer (ring)
+---------------------------------------------
+
+The mapping of the buffer in the user process is done with the conventional
+mmap function. Even the circular buffer is compound of several physically
+discontiguous blocks of memory, they are contiguous to the user space, hence
+just one call to mmap is needed::
+
+    mmap(0, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
+
+If tp_frame_size is a divisor of tp_block_size frames will be
+contiguously spaced by tp_frame_size bytes. If not, each
+tp_block_size/tp_frame_size frames there will be a gap between
+the frames. This is because a frame cannot be spawn across two
+blocks.
+
+To use one socket for capture and transmission, the mapping of both the
+RX and TX buffer ring has to be done with one call to mmap::
+
+    ...
+    setsockopt(fd, SOL_PACKET, PACKET_RX_RING, &foo, sizeof(foo));
+    setsockopt(fd, SOL_PACKET, PACKET_TX_RING, &bar, sizeof(bar));
+    ...
+    rx_ring = mmap(0, size * 2, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
+    tx_ring = rx_ring + size;
+
+RX must be the first as the kernel maps the TX ring memory right
+after the RX one.
+
+At the beginning of each frame there is an status field (see
+struct tpacket_hdr). If this field is 0 means that the frame is ready
+to be used for the kernel, If not, there is a frame the user can read
+and the following flags apply:
+
+Capture process
+^^^^^^^^^^^^^^^
+
+     from include/linux/if_packet.h
+
+     #define TP_STATUS_COPY          (1 << 1)
+     #define TP_STATUS_LOSING        (1 << 2)
+     #define TP_STATUS_CSUMNOTREADY  (1 << 3)
+     #define TP_STATUS_CSUM_VALID    (1 << 7)
+
+======================  =======================================================
+TP_STATUS_COPY		This flag indicates that the frame (and associated
+			meta information) has been truncated because it's
+			larger than tp_frame_size. This packet can be
+			read entirely with recvfrom().
+
+			In order to make this work it must to be
+			enabled previously with setsockopt() and
+			the PACKET_COPY_THRESH option.
+
+			The number of frames that can be buffered to
+			be read with recvfrom is limited like a normal socket.
+			See the SO_RCVBUF option in the socket (7) man page.
+
+TP_STATUS_LOSING	indicates there were packet drops from last time
+			statistics where checked with getsockopt() and
+			the PACKET_STATISTICS option.
+
+TP_STATUS_CSUMNOTREADY	currently it's used for outgoing IP packets which
+			its checksum will be done in hardware. So while
+			reading the packet we should not try to check the
+			checksum.
+
+TP_STATUS_CSUM_VALID	This flag indicates that at least the transport
+			header checksum of the packet has been already
+			validated on the kernel side. If the flag is not set
+			then we are free to check the checksum by ourselves
+			provided that TP_STATUS_CSUMNOTREADY is also not set.
+======================  =======================================================
+
+for convenience there are also the following defines::
+
+     #define TP_STATUS_KERNEL        0
+     #define TP_STATUS_USER          1
+
+The kernel initializes all frames to TP_STATUS_KERNEL, when the kernel
+receives a packet it puts in the buffer and updates the status with
+at least the TP_STATUS_USER flag. Then the user can read the packet,
+once the packet is read the user must zero the status field, so the kernel
+can use again that frame buffer.
+
+The user can use poll (any other variant should apply too) to check if new
+packets are in the ring::
+
+    struct pollfd pfd;
+
+    pfd.fd = fd;
+    pfd.revents = 0;
+    pfd.events = POLLIN|POLLRDNORM|POLLERR;
+
+    if (status == TP_STATUS_KERNEL)
+	retval = poll(&pfd, 1, timeout);
+
+It doesn't incur in a race condition to first check the status value and
+then poll for frames.
+
+Transmission process
+^^^^^^^^^^^^^^^^^^^^
+
+Those defines are also used for transmission::
+
+     #define TP_STATUS_AVAILABLE        0 // Frame is available
+     #define TP_STATUS_SEND_REQUEST     1 // Frame will be sent on next send()
+     #define TP_STATUS_SENDING          2 // Frame is currently in transmission
+     #define TP_STATUS_WRONG_FORMAT     4 // Frame format is not correct
+
+First, the kernel initializes all frames to TP_STATUS_AVAILABLE. To send a
+packet, the user fills a data buffer of an available frame, sets tp_len to
+current data buffer size and sets its status field to TP_STATUS_SEND_REQUEST.
+This can be done on multiple frames. Once the user is ready to transmit, it
+calls send(). Then all buffers with status equal to TP_STATUS_SEND_REQUEST are
+forwarded to the network device. The kernel updates each status of sent
+frames with TP_STATUS_SENDING until the end of transfer.
+
+At the end of each transfer, buffer status returns to TP_STATUS_AVAILABLE.
+
+::
+
+    header->tp_len = in_i_size;
+    header->tp_status = TP_STATUS_SEND_REQUEST;
+    retval = send(this->socket, NULL, 0, 0);
+
+The user can also use poll() to check if a buffer is available:
+
+(status == TP_STATUS_SENDING)
+
+::
+
+    struct pollfd pfd;
+    pfd.fd = fd;
+    pfd.revents = 0;
+    pfd.events = POLLOUT;
+    retval = poll(&pfd, 1, timeout);
+
+What TPACKET versions are available and when to use them?
+=========================================================
+
+::
+
+ int val = tpacket_version;
+ setsockopt(fd, SOL_PACKET, PACKET_VERSION, &val, sizeof(val));
+ getsockopt(fd, SOL_PACKET, PACKET_VERSION, &val, sizeof(val));
+
+where 'tpacket_version' can be TPACKET_V1 (default), TPACKET_V2, TPACKET_V3.
+
+TPACKET_V1:
+	- Default if not otherwise specified by setsockopt(2)
+	- RX_RING, TX_RING available
+
+TPACKET_V1 --> TPACKET_V2:
+	- Made 64 bit clean due to unsigned long usage in TPACKET_V1
+	  structures, thus this also works on 64 bit kernel with 32 bit
+	  userspace and the like
+	- Timestamp resolution in nanoseconds instead of microseconds
+	- RX_RING, TX_RING available
+	- VLAN metadata information available for packets
+	  (TP_STATUS_VLAN_VALID, TP_STATUS_VLAN_TPID_VALID),
+	  in the tpacket2_hdr structure:
+
+		- TP_STATUS_VLAN_VALID bit being set into the tp_status field indicates
+		  that the tp_vlan_tci field has valid VLAN TCI value
+		- TP_STATUS_VLAN_TPID_VALID bit being set into the tp_status field
+		  indicates that the tp_vlan_tpid field has valid VLAN TPID value
+
+	- How to switch to TPACKET_V2:
+
+		1. Replace struct tpacket_hdr by struct tpacket2_hdr
+		2. Query header len and save
+		3. Set protocol version to 2, set up ring as usual
+		4. For getting the sockaddr_ll,
+		   use ``(void *)hdr + TPACKET_ALIGN(hdrlen)`` instead of
+		   ``(void *)hdr + TPACKET_ALIGN(sizeof(struct tpacket_hdr))``
+
+TPACKET_V2 --> TPACKET_V3:
+	- Flexible buffer implementation for RX_RING:
+		1. Blocks can be configured with non-static frame-size
+		2. Read/poll is at a block-level (as opposed to packet-level)
+		3. Added poll timeout to avoid indefinite user-space wait
+		   on idle links
+		4. Added user-configurable knobs:
+
+			4.1 block::timeout
+			4.2 tpkt_hdr::sk_rxhash
+
+	- RX Hash data available in user space
+	- TX_RING semantics are conceptually similar to TPACKET_V2;
+	  use tpacket3_hdr instead of tpacket2_hdr, and TPACKET3_HDRLEN
+	  instead of TPACKET2_HDRLEN. In the current implementation,
+	  the tp_next_offset field in the tpacket3_hdr MUST be set to
+	  zero, indicating that the ring does not hold variable sized frames.
+	  Packets with non-zero values of tp_next_offset will be dropped.
+
+AF_PACKET fanout mode
+=====================
+
+In the AF_PACKET fanout mode, packet reception can be load balanced among
+processes. This also works in combination with mmap(2) on packet sockets.
+
+Currently implemented fanout policies are:
+
+  - PACKET_FANOUT_HASH: schedule to socket by skb's packet hash
+  - PACKET_FANOUT_LB: schedule to socket by round-robin
+  - PACKET_FANOUT_CPU: schedule to socket by CPU packet arrives on
+  - PACKET_FANOUT_RND: schedule to socket by random selection
+  - PACKET_FANOUT_ROLLOVER: if one socket is full, rollover to another
+  - PACKET_FANOUT_QM: schedule to socket by skbs recorded queue_mapping
+
+Minimal example code by David S. Miller (try things like "./test eth0 hash",
+"./test eth0 lb", etc.)::
+
+    #include <stddef.h>
+    #include <stdlib.h>
+    #include <stdio.h>
+    #include <string.h>
+
+    #include <sys/types.h>
+    #include <sys/wait.h>
+    #include <sys/socket.h>
+    #include <sys/ioctl.h>
+
+    #include <unistd.h>
+
+    #include <linux/if_ether.h>
+    #include <linux/if_packet.h>
+
+    #include <net/if.h>
+
+    static const char *device_name;
+    static int fanout_type;
+    static int fanout_id;
+
+    #ifndef PACKET_FANOUT
+    # define PACKET_FANOUT			18
+    # define PACKET_FANOUT_HASH		0
+    # define PACKET_FANOUT_LB		1
+    #endif
+
+    static int setup_socket(void)
+    {
+	    int err, fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_IP));
+	    struct sockaddr_ll ll;
+	    struct ifreq ifr;
+	    int fanout_arg;
+
+	    if (fd < 0) {
+		    perror("socket");
+		    return EXIT_FAILURE;
+	    }
+
+	    memset(&ifr, 0, sizeof(ifr));
+	    strcpy(ifr.ifr_name, device_name);
+	    err = ioctl(fd, SIOCGIFINDEX, &ifr);
+	    if (err < 0) {
+		    perror("SIOCGIFINDEX");
+		    return EXIT_FAILURE;
+	    }
+
+	    memset(&ll, 0, sizeof(ll));
+	    ll.sll_family = AF_PACKET;
+	    ll.sll_ifindex = ifr.ifr_ifindex;
+	    err = bind(fd, (struct sockaddr *) &ll, sizeof(ll));
+	    if (err < 0) {
+		    perror("bind");
+		    return EXIT_FAILURE;
+	    }
+
+	    fanout_arg = (fanout_id | (fanout_type << 16));
+	    err = setsockopt(fd, SOL_PACKET, PACKET_FANOUT,
+			    &fanout_arg, sizeof(fanout_arg));
+	    if (err) {
+		    perror("setsockopt");
+		    return EXIT_FAILURE;
+	    }
+
+	    return fd;
+    }
+
+    static void fanout_thread(void)
+    {
+	    int fd = setup_socket();
+	    int limit = 10000;
+
+	    if (fd < 0)
+		    exit(fd);
+
+	    while (limit-- > 0) {
+		    char buf[1600];
+		    int err;
+
+		    err = read(fd, buf, sizeof(buf));
+		    if (err < 0) {
+			    perror("read");
+			    exit(EXIT_FAILURE);
+		    }
+		    if ((limit % 10) == 0)
+			    fprintf(stdout, "(%d) \n", getpid());
+	    }
+
+	    fprintf(stdout, "%d: Received 10000 packets\n", getpid());
+
+	    close(fd);
+	    exit(0);
+    }
+
+    int main(int argc, char **argp)
+    {
+	    int fd, err;
+	    int i;
+
+	    if (argc != 3) {
+		    fprintf(stderr, "Usage: %s INTERFACE {hash|lb}\n", argp[0]);
+		    return EXIT_FAILURE;
+	    }
+
+	    if (!strcmp(argp[2], "hash"))
+		    fanout_type = PACKET_FANOUT_HASH;
+	    else if (!strcmp(argp[2], "lb"))
+		    fanout_type = PACKET_FANOUT_LB;
+	    else {
+		    fprintf(stderr, "Unknown fanout type [%s]\n", argp[2]);
+		    exit(EXIT_FAILURE);
+	    }
+
+	    device_name = argp[1];
+	    fanout_id = getpid() & 0xffff;
+
+	    for (i = 0; i < 4; i++) {
+		    pid_t pid = fork();
+
+		    switch (pid) {
+		    case 0:
+			    fanout_thread();
+
+		    case -1:
+			    perror("fork");
+			    exit(EXIT_FAILURE);
+		    }
+	    }
+
+	    for (i = 0; i < 4; i++) {
+		    int status;
+
+		    wait(&status);
+	    }
+
+	    return 0;
+    }
+
+AF_PACKET TPACKET_V3 example
+============================
+
+AF_PACKET's TPACKET_V3 ring buffer can be configured to use non-static frame
+sizes by doing it's own memory management. It is based on blocks where polling
+works on a per block basis instead of per ring as in TPACKET_V2 and predecessor.
+
+It is said that TPACKET_V3 brings the following benefits:
+
+ * ~15% - 20% reduction in CPU-usage
+ * ~20% increase in packet capture rate
+ * ~2x increase in packet density
+ * Port aggregation analysis
+ * Non static frame size to capture entire packet payload
+
+So it seems to be a good candidate to be used with packet fanout.
+
+Minimal example code by Daniel Borkmann based on Chetan Loke's lolpcap (compile
+it with gcc -Wall -O2 blob.c, and try things like "./a.out eth0", etc.)::
+
+    /* Written from scratch, but kernel-to-user space API usage
+    * dissected from lolpcap:
+    *  Copyright 2011, Chetan Loke <loke.chetan@gmail.com>
+    *  License: GPL, version 2.0
+    */
+
+    #include <stdio.h>
+    #include <stdlib.h>
+    #include <stdint.h>
+    #include <string.h>
+    #include <assert.h>
+    #include <net/if.h>
+    #include <arpa/inet.h>
+    #include <netdb.h>
+    #include <poll.h>
+    #include <unistd.h>
+    #include <signal.h>
+    #include <inttypes.h>
+    #include <sys/socket.h>
+    #include <sys/mman.h>
+    #include <linux/if_packet.h>
+    #include <linux/if_ether.h>
+    #include <linux/ip.h>
+
+    #ifndef likely
+    # define likely(x)		__builtin_expect(!!(x), 1)
+    #endif
+    #ifndef unlikely
+    # define unlikely(x)		__builtin_expect(!!(x), 0)
+    #endif
+
+    struct block_desc {
+	    uint32_t version;
+	    uint32_t offset_to_priv;
+	    struct tpacket_hdr_v1 h1;
+    };
+
+    struct ring {
+	    struct iovec *rd;
+	    uint8_t *map;
+	    struct tpacket_req3 req;
+    };
+
+    static unsigned long packets_total = 0, bytes_total = 0;
+    static sig_atomic_t sigint = 0;
+
+    static void sighandler(int num)
+    {
+	    sigint = 1;
+    }
+
+    static int setup_socket(struct ring *ring, char *netdev)
+    {
+	    int err, i, fd, v = TPACKET_V3;
+	    struct sockaddr_ll ll;
+	    unsigned int blocksiz = 1 << 22, framesiz = 1 << 11;
+	    unsigned int blocknum = 64;
+
+	    fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
+	    if (fd < 0) {
+		    perror("socket");
+		    exit(1);
+	    }
+
+	    err = setsockopt(fd, SOL_PACKET, PACKET_VERSION, &v, sizeof(v));
+	    if (err < 0) {
+		    perror("setsockopt");
+		    exit(1);
+	    }
+
+	    memset(&ring->req, 0, sizeof(ring->req));
+	    ring->req.tp_block_size = blocksiz;
+	    ring->req.tp_frame_size = framesiz;
+	    ring->req.tp_block_nr = blocknum;
+	    ring->req.tp_frame_nr = (blocksiz * blocknum) / framesiz;
+	    ring->req.tp_retire_blk_tov = 60;
+	    ring->req.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
+
+	    err = setsockopt(fd, SOL_PACKET, PACKET_RX_RING, &ring->req,
+			    sizeof(ring->req));
+	    if (err < 0) {
+		    perror("setsockopt");
+		    exit(1);
+	    }
+
+	    ring->map = mmap(NULL, ring->req.tp_block_size * ring->req.tp_block_nr,
+			    PROT_READ | PROT_WRITE, MAP_SHARED | MAP_LOCKED, fd, 0);
+	    if (ring->map == MAP_FAILED) {
+		    perror("mmap");
+		    exit(1);
+	    }
+
+	    ring->rd = malloc(ring->req.tp_block_nr * sizeof(*ring->rd));
+	    assert(ring->rd);
+	    for (i = 0; i < ring->req.tp_block_nr; ++i) {
+		    ring->rd[i].iov_base = ring->map + (i * ring->req.tp_block_size);
+		    ring->rd[i].iov_len = ring->req.tp_block_size;
+	    }
+
+	    memset(&ll, 0, sizeof(ll));
+	    ll.sll_family = PF_PACKET;
+	    ll.sll_protocol = htons(ETH_P_ALL);
+	    ll.sll_ifindex = if_nametoindex(netdev);
+	    ll.sll_hatype = 0;
+	    ll.sll_pkttype = 0;
+	    ll.sll_halen = 0;
+
+	    err = bind(fd, (struct sockaddr *) &ll, sizeof(ll));
+	    if (err < 0) {
+		    perror("bind");
+		    exit(1);
+	    }
+
+	    return fd;
+    }
+
+    static void display(struct tpacket3_hdr *ppd)
+    {
+	    struct ethhdr *eth = (struct ethhdr *) ((uint8_t *) ppd + ppd->tp_mac);
+	    struct iphdr *ip = (struct iphdr *) ((uint8_t *) eth + ETH_HLEN);
+
+	    if (eth->h_proto == htons(ETH_P_IP)) {
+		    struct sockaddr_in ss, sd;
+		    char sbuff[NI_MAXHOST], dbuff[NI_MAXHOST];
+
+		    memset(&ss, 0, sizeof(ss));
+		    ss.sin_family = PF_INET;
+		    ss.sin_addr.s_addr = ip->saddr;
+		    getnameinfo((struct sockaddr *) &ss, sizeof(ss),
+				sbuff, sizeof(sbuff), NULL, 0, NI_NUMERICHOST);
+
+		    memset(&sd, 0, sizeof(sd));
+		    sd.sin_family = PF_INET;
+		    sd.sin_addr.s_addr = ip->daddr;
+		    getnameinfo((struct sockaddr *) &sd, sizeof(sd),
+				dbuff, sizeof(dbuff), NULL, 0, NI_NUMERICHOST);
+
+		    printf("%s -> %s, ", sbuff, dbuff);
+	    }
+
+	    printf("rxhash: 0x%x\n", ppd->hv1.tp_rxhash);
+    }
+
+    static void walk_block(struct block_desc *pbd, const int block_num)
+    {
+	    int num_pkts = pbd->h1.num_pkts, i;
+	    unsigned long bytes = 0;
+	    struct tpacket3_hdr *ppd;
+
+	    ppd = (struct tpacket3_hdr *) ((uint8_t *) pbd +
+					pbd->h1.offset_to_first_pkt);
+	    for (i = 0; i < num_pkts; ++i) {
+		    bytes += ppd->tp_snaplen;
+		    display(ppd);
+
+		    ppd = (struct tpacket3_hdr *) ((uint8_t *) ppd +
+						ppd->tp_next_offset);
+	    }
+
+	    packets_total += num_pkts;
+	    bytes_total += bytes;
+    }
+
+    static void flush_block(struct block_desc *pbd)
+    {
+	    pbd->h1.block_status = TP_STATUS_KERNEL;
+    }
+
+    static void teardown_socket(struct ring *ring, int fd)
+    {
+	    munmap(ring->map, ring->req.tp_block_size * ring->req.tp_block_nr);
+	    free(ring->rd);
+	    close(fd);
+    }
+
+    int main(int argc, char **argp)
+    {
+	    int fd, err;
+	    socklen_t len;
+	    struct ring ring;
+	    struct pollfd pfd;
+	    unsigned int block_num = 0, blocks = 64;
+	    struct block_desc *pbd;
+	    struct tpacket_stats_v3 stats;
+
+	    if (argc != 2) {
+		    fprintf(stderr, "Usage: %s INTERFACE\n", argp[0]);
+		    return EXIT_FAILURE;
+	    }
+
+	    signal(SIGINT, sighandler);
+
+	    memset(&ring, 0, sizeof(ring));
+	    fd = setup_socket(&ring, argp[argc - 1]);
+	    assert(fd > 0);
+
+	    memset(&pfd, 0, sizeof(pfd));
+	    pfd.fd = fd;
+	    pfd.events = POLLIN | POLLERR;
+	    pfd.revents = 0;
+
+	    while (likely(!sigint)) {
+		    pbd = (struct block_desc *) ring.rd[block_num].iov_base;
+
+		    if ((pbd->h1.block_status & TP_STATUS_USER) == 0) {
+			    poll(&pfd, 1, -1);
+			    continue;
+		    }
+
+		    walk_block(pbd, block_num);
+		    flush_block(pbd);
+		    block_num = (block_num + 1) % blocks;
+	    }
+
+	    len = sizeof(stats);
+	    err = getsockopt(fd, SOL_PACKET, PACKET_STATISTICS, &stats, &len);
+	    if (err < 0) {
+		    perror("getsockopt");
+		    exit(1);
+	    }
+
+	    fflush(stdout);
+	    printf("\nReceived %u packets, %lu bytes, %u dropped, freeze_q_cnt: %u\n",
+		stats.tp_packets, bytes_total, stats.tp_drops,
+		stats.tp_freeze_q_cnt);
+
+	    teardown_socket(&ring, fd);
+	    return 0;
+    }
+
+PACKET_QDISC_BYPASS
+===================
+
+If there is a requirement to load the network with many packets in a similar
+fashion as pktgen does, you might set the following option after socket
+creation::
+
+    int one = 1;
+    setsockopt(fd, SOL_PACKET, PACKET_QDISC_BYPASS, &one, sizeof(one));
+
+This has the side-effect, that packets sent through PF_PACKET will bypass the
+kernel's qdisc layer and are forcedly pushed to the driver directly. Meaning,
+packet are not buffered, tc disciplines are ignored, increased loss can occur
+and such packets are also not visible to other PF_PACKET sockets anymore. So,
+you have been warned; generally, this can be useful for stress testing various
+components of a system.
+
+On default, PACKET_QDISC_BYPASS is disabled and needs to be explicitly enabled
+on PF_PACKET sockets.
+
+PACKET_TIMESTAMP
+================
+
+The PACKET_TIMESTAMP setting determines the source of the timestamp in
+the packet meta information for mmap(2)ed RX_RING and TX_RINGs.  If your
+NIC is capable of timestamping packets in hardware, you can request those
+hardware timestamps to be used. Note: you may need to enable the generation
+of hardware timestamps with SIOCSHWTSTAMP (see related information from
+Documentation/networking/timestamping.txt).
+
+PACKET_TIMESTAMP accepts the same integer bit field as SO_TIMESTAMPING::
+
+    int req = SOF_TIMESTAMPING_RAW_HARDWARE;
+    setsockopt(fd, SOL_PACKET, PACKET_TIMESTAMP, (void *) &req, sizeof(req))
+
+For the mmap(2)ed ring buffers, such timestamps are stored in the
+``tpacket{,2,3}_hdr`` structure's tp_sec and ``tp_{n,u}sec`` members.
+To determine what kind of timestamp has been reported, the tp_status field
+is binary or'ed with the following possible bits ...
+
+::
+
+    TP_STATUS_TS_RAW_HARDWARE
+    TP_STATUS_TS_SOFTWARE
+
+... that are equivalent to its ``SOF_TIMESTAMPING_*`` counterparts. For the
+RX_RING, if neither is set (i.e. PACKET_TIMESTAMP is not set), then a
+software fallback was invoked *within* PF_PACKET's processing code (less
+precise).
+
+Getting timestamps for the TX_RING works as follows: i) fill the ring frames,
+ii) call sendto() e.g. in blocking mode, iii) wait for status of relevant
+frames to be updated resp. the frame handed over to the application, iv) walk
+through the frames to pick up the individual hw/sw timestamps.
+
+Only (!) if transmit timestamping is enabled, then these bits are combined
+with binary | with TP_STATUS_AVAILABLE, so you must check for that in your
+application (e.g. !(tp_status & (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING))
+in a first step to see if the frame belongs to the application, and then
+one can extract the type of timestamp in a second step from tp_status)!
+
+If you don't care about them, thus having it disabled, checking for
+TP_STATUS_AVAILABLE resp. TP_STATUS_WRONG_FORMAT is sufficient. If in the
+TX_RING part only TP_STATUS_AVAILABLE is set, then the tp_sec and tp_{n,u}sec
+members do not contain a valid value. For TX_RINGs, by default no timestamp
+is generated!
+
+See include/linux/net_tstamp.h and Documentation/networking/timestamping.txt
+for more information on hardware timestamps.
+
+Miscellaneous bits
+==================
+
+- Packet sockets work well together with Linux socket filters, thus you also
+  might want to have a look at Documentation/networking/filter.txt
+
+THANKS
+======
+
+   Jesse Brandeburg, for fixing my grammathical/spelling errors
diff --git a/Documentation/networking/packet_mmap.txt b/Documentation/networking/packet_mmap.txt
deleted file mode 100644
index 494614573c67..000000000000
--- a/Documentation/networking/packet_mmap.txt
+++ /dev/null
@@ -1,1061 +0,0 @@
---------------------------------------------------------------------------------
-+ ABSTRACT
---------------------------------------------------------------------------------
-
-This file documents the mmap() facility available with the PACKET
-socket interface on 2.4/2.6/3.x kernels. This type of sockets is used for
-i) capture network traffic with utilities like tcpdump, ii) transmit network
-traffic, or any other that needs raw access to network interface.
-
-Howto can be found at:
-    https://sites.google.com/site/packetmmap/
-
-Please send your comments to
-    Ulisses Alonso Camaró <uaca@i.hate.spam.alumni.uv.es>
-    Johann Baudy
-
--------------------------------------------------------------------------------
-+ Why use PACKET_MMAP
---------------------------------------------------------------------------------
-
-In Linux 2.4/2.6/3.x if PACKET_MMAP is not enabled, the capture process is very
-inefficient. It uses very limited buffers and requires one system call to
-capture each packet, it requires two if you want to get packet's timestamp
-(like libpcap always does).
-
-In the other hand PACKET_MMAP is very efficient. PACKET_MMAP provides a size 
-configurable circular buffer mapped in user space that can be used to either
-send or receive packets. This way reading packets just needs to wait for them,
-most of the time there is no need to issue a single system call. Concerning
-transmission, multiple packets can be sent through one system call to get the
-highest bandwidth. By using a shared buffer between the kernel and the user
-also has the benefit of minimizing packet copies.
-
-It's fine to use PACKET_MMAP to improve the performance of the capture and
-transmission process, but it isn't everything. At least, if you are capturing
-at high speeds (this is relative to the cpu speed), you should check if the
-device driver of your network interface card supports some sort of interrupt
-load mitigation or (even better) if it supports NAPI, also make sure it is
-enabled. For transmission, check the MTU (Maximum Transmission Unit) used and
-supported by devices of your network. CPU IRQ pinning of your network interface
-card can also be an advantage.
-
---------------------------------------------------------------------------------
-+ How to use mmap() to improve capture process
---------------------------------------------------------------------------------
-
-From the user standpoint, you should use the higher level libpcap library, which
-is a de facto standard, portable across nearly all operating systems
-including Win32. 
-
-Packet MMAP support was integrated into libpcap around the time of version 1.3.0;
-TPACKET_V3 support was added in version 1.5.0
-
---------------------------------------------------------------------------------
-+ How to use mmap() directly to improve capture process
---------------------------------------------------------------------------------
-
-From the system calls stand point, the use of PACKET_MMAP involves
-the following process:
-
-
-[setup]     socket() -------> creation of the capture socket
-            setsockopt() ---> allocation of the circular buffer (ring)
-                              option: PACKET_RX_RING
-            mmap() ---------> mapping of the allocated buffer to the
-                              user process
-
-[capture]   poll() ---------> to wait for incoming packets
-
-[shutdown]  close() --------> destruction of the capture socket and
-                              deallocation of all associated 
-                              resources.
-
-
-socket creation and destruction is straight forward, and is done 
-the same way with or without PACKET_MMAP:
-
- int fd = socket(PF_PACKET, mode, htons(ETH_P_ALL));
-
-where mode is SOCK_RAW for the raw interface were link level
-information can be captured or SOCK_DGRAM for the cooked
-interface where link level information capture is not 
-supported and a link level pseudo-header is provided 
-by the kernel.
-
-The destruction of the socket and all associated resources
-is done by a simple call to close(fd).
-
-Similarly as without PACKET_MMAP, it is possible to use one socket
-for capture and transmission. This can be done by mapping the
-allocated RX and TX buffer ring with a single mmap() call.
-See "Mapping and use of the circular buffer (ring)".
-
-Next I will describe PACKET_MMAP settings and its constraints,
-also the mapping of the circular buffer in the user process and 
-the use of this buffer.
-
---------------------------------------------------------------------------------
-+ How to use mmap() directly to improve transmission process
---------------------------------------------------------------------------------
-Transmission process is similar to capture as shown below.
-
-[setup]          socket() -------> creation of the transmission socket
-                 setsockopt() ---> allocation of the circular buffer (ring)
-                                   option: PACKET_TX_RING
-                 bind() ---------> bind transmission socket with a network interface
-                 mmap() ---------> mapping of the allocated buffer to the
-                                   user process
-
-[transmission]   poll() ---------> wait for free packets (optional)
-                 send() ---------> send all packets that are set as ready in
-                                   the ring
-                                   The flag MSG_DONTWAIT can be used to return
-                                   before end of transfer.
-
-[shutdown]  close() --------> destruction of the transmission socket and
-                              deallocation of all associated resources.
-
-Socket creation and destruction is also straight forward, and is done
-the same way as in capturing described in the previous paragraph:
-
- int fd = socket(PF_PACKET, mode, 0);
-
-The protocol can optionally be 0 in case we only want to transmit
-via this socket, which avoids an expensive call to packet_rcv().
-In this case, you also need to bind(2) the TX_RING with sll_protocol = 0
-set. Otherwise, htons(ETH_P_ALL) or any other protocol, for example.
-
-Binding the socket to your network interface is mandatory (with zero copy) to
-know the header size of frames used in the circular buffer.
-
-As capture, each frame contains two parts:
-
- --------------------
-| struct tpacket_hdr | Header. It contains the status of
-|                    | of this frame
-|--------------------|
-| data buffer        |
-.                    .  Data that will be sent over the network interface.
-.                    .
- --------------------
-
- bind() associates the socket to your network interface thanks to
- sll_ifindex parameter of struct sockaddr_ll.
-
- Initialization example:
-
- struct sockaddr_ll my_addr;
- struct ifreq s_ifr;
- ...
-
- strncpy (s_ifr.ifr_name, "eth0", sizeof(s_ifr.ifr_name));
-
- /* get interface index of eth0 */
- ioctl(this->socket, SIOCGIFINDEX, &s_ifr);
-
- /* fill sockaddr_ll struct to prepare binding */
- my_addr.sll_family = AF_PACKET;
- my_addr.sll_protocol = htons(ETH_P_ALL);
- my_addr.sll_ifindex =  s_ifr.ifr_ifindex;
-
- /* bind socket to eth0 */
- bind(this->socket, (struct sockaddr *)&my_addr, sizeof(struct sockaddr_ll));
-
- A complete tutorial is available at: https://sites.google.com/site/packetmmap/
-
-By default, the user should put data at :
- frame base + TPACKET_HDRLEN - sizeof(struct sockaddr_ll)
-
-So, whatever you choose for the socket mode (SOCK_DGRAM or SOCK_RAW),
-the beginning of the user data will be at :
- frame base + TPACKET_ALIGN(sizeof(struct tpacket_hdr))
-
-If you wish to put user data at a custom offset from the beginning of
-the frame (for payload alignment with SOCK_RAW mode for instance) you
-can set tp_net (with SOCK_DGRAM) or tp_mac (with SOCK_RAW). In order
-to make this work it must be enabled previously with setsockopt()
-and the PACKET_TX_HAS_OFF option.
-
---------------------------------------------------------------------------------
-+ PACKET_MMAP settings
---------------------------------------------------------------------------------
-
-To setup PACKET_MMAP from user level code is done with a call like
-
- - Capture process
-     setsockopt(fd, SOL_PACKET, PACKET_RX_RING, (void *) &req, sizeof(req))
- - Transmission process
-     setsockopt(fd, SOL_PACKET, PACKET_TX_RING, (void *) &req, sizeof(req))
-
-The most significant argument in the previous call is the req parameter, 
-this parameter must to have the following structure:
-
-    struct tpacket_req
-    {
-        unsigned int    tp_block_size;  /* Minimal size of contiguous block */
-        unsigned int    tp_block_nr;    /* Number of blocks */
-        unsigned int    tp_frame_size;  /* Size of frame */
-        unsigned int    tp_frame_nr;    /* Total number of frames */
-    };
-
-This structure is defined in /usr/include/linux/if_packet.h and establishes a 
-circular buffer (ring) of unswappable memory.
-Being mapped in the capture process allows reading the captured frames and 
-related meta-information like timestamps without requiring a system call.
-
-Frames are grouped in blocks. Each block is a physically contiguous
-region of memory and holds tp_block_size/tp_frame_size frames. The total number 
-of blocks is tp_block_nr. Note that tp_frame_nr is a redundant parameter because
-
-    frames_per_block = tp_block_size/tp_frame_size
-
-indeed, packet_set_ring checks that the following condition is true
-
-    frames_per_block * tp_block_nr == tp_frame_nr
-
-Lets see an example, with the following values:
-
-     tp_block_size= 4096
-     tp_frame_size= 2048
-     tp_block_nr  = 4
-     tp_frame_nr  = 8
-
-we will get the following buffer structure:
-
-        block #1                 block #2         
-+---------+---------+    +---------+---------+    
-| frame 1 | frame 2 |    | frame 3 | frame 4 |    
-+---------+---------+    +---------+---------+    
-
-        block #3                 block #4
-+---------+---------+    +---------+---------+
-| frame 5 | frame 6 |    | frame 7 | frame 8 |
-+---------+---------+    +---------+---------+
-
-A frame can be of any size with the only condition it can fit in a block. A block
-can only hold an integer number of frames, or in other words, a frame cannot 
-be spawned across two blocks, so there are some details you have to take into 
-account when choosing the frame_size. See "Mapping and use of the circular 
-buffer (ring)".
-
---------------------------------------------------------------------------------
-+ PACKET_MMAP setting constraints
---------------------------------------------------------------------------------
-
-In kernel versions prior to 2.4.26 (for the 2.4 branch) and 2.6.5 (2.6 branch),
-the PACKET_MMAP buffer could hold only 32768 frames in a 32 bit architecture or
-16384 in a 64 bit architecture. For information on these kernel versions
-see http://pusa.uv.es/~ulisses/packet_mmap/packet_mmap.pre-2.4.26_2.6.5.txt
-
- Block size limit
-------------------
-
-As stated earlier, each block is a contiguous physical region of memory. These 
-memory regions are allocated with calls to the __get_free_pages() function. As 
-the name indicates, this function allocates pages of memory, and the second
-argument is "order" or a power of two number of pages, that is 
-(for PAGE_SIZE == 4096) order=0 ==> 4096 bytes, order=1 ==> 8192 bytes, 
-order=2 ==> 16384 bytes, etc. The maximum size of a 
-region allocated by __get_free_pages is determined by the MAX_ORDER macro. More 
-precisely the limit can be calculated as:
-
-   PAGE_SIZE << MAX_ORDER
-
-   In a i386 architecture PAGE_SIZE is 4096 bytes 
-   In a 2.4/i386 kernel MAX_ORDER is 10
-   In a 2.6/i386 kernel MAX_ORDER is 11
-
-So get_free_pages can allocate as much as 4MB or 8MB in a 2.4/2.6 kernel 
-respectively, with an i386 architecture.
-
-User space programs can include /usr/include/sys/user.h and 
-/usr/include/linux/mmzone.h to get PAGE_SIZE MAX_ORDER declarations.
-
-The pagesize can also be determined dynamically with the getpagesize (2) 
-system call. 
-
- Block number limit
---------------------
-
-To understand the constraints of PACKET_MMAP, we have to see the structure 
-used to hold the pointers to each block.
-
-Currently, this structure is a dynamically allocated vector with kmalloc 
-called pg_vec, its size limits the number of blocks that can be allocated.
-
-    +---+---+---+---+
-    | x | x | x | x |
-    +---+---+---+---+
-      |   |   |   |
-      |   |   |   v
-      |   |   v  block #4
-      |   v  block #3
-      v  block #2
-     block #1
-
-kmalloc allocates any number of bytes of physically contiguous memory from 
-a pool of pre-determined sizes. This pool of memory is maintained by the slab 
-allocator which is at the end the responsible for doing the allocation and 
-hence which imposes the maximum memory that kmalloc can allocate. 
-
-In a 2.4/2.6 kernel and the i386 architecture, the limit is 131072 bytes. The 
-predetermined sizes that kmalloc uses can be checked in the "size-<bytes>" 
-entries of /proc/slabinfo
-
-In a 32 bit architecture, pointers are 4 bytes long, so the total number of 
-pointers to blocks is
-
-     131072/4 = 32768 blocks
-
- PACKET_MMAP buffer size calculator
-------------------------------------
-
-Definitions:
-
-<size-max>    : is the maximum size of allocable with kmalloc (see /proc/slabinfo)
-<pointer size>: depends on the architecture -- sizeof(void *)
-<page size>   : depends on the architecture -- PAGE_SIZE or getpagesize (2)
-<max-order>   : is the value defined with MAX_ORDER
-<frame size>  : it's an upper bound of frame's capture size (more on this later)
-
-from these definitions we will derive 
-
-	<block number> = <size-max>/<pointer size>
-	<block size> = <pagesize> << <max-order>
-
-so, the max buffer size is
-
-	<block number> * <block size>
-
-and, the number of frames be
-
-	<block number> * <block size> / <frame size>
-
-Suppose the following parameters, which apply for 2.6 kernel and an
-i386 architecture:
-
-	<size-max> = 131072 bytes
-	<pointer size> = 4 bytes
-	<pagesize> = 4096 bytes
-	<max-order> = 11
-
-and a value for <frame size> of 2048 bytes. These parameters will yield
-
-	<block number> = 131072/4 = 32768 blocks
-	<block size> = 4096 << 11 = 8 MiB.
-
-and hence the buffer will have a 262144 MiB size. So it can hold 
-262144 MiB / 2048 bytes = 134217728 frames
-
-Actually, this buffer size is not possible with an i386 architecture. 
-Remember that the memory is allocated in kernel space, in the case of 
-an i386 kernel's memory size is limited to 1GiB.
-
-All memory allocations are not freed until the socket is closed. The memory 
-allocations are done with GFP_KERNEL priority, this basically means that 
-the allocation can wait and swap other process' memory in order to allocate 
-the necessary memory, so normally limits can be reached.
-
- Other constraints
--------------------
-
-If you check the source code you will see that what I draw here as a frame
-is not only the link level frame. At the beginning of each frame there is a 
-header called struct tpacket_hdr used in PACKET_MMAP to hold link level's frame
-meta information like timestamp. So what we draw here a frame it's really 
-the following (from include/linux/if_packet.h):
-
-/*
-   Frame structure:
-
-   - Start. Frame must be aligned to TPACKET_ALIGNMENT=16
-   - struct tpacket_hdr
-   - pad to TPACKET_ALIGNMENT=16
-   - struct sockaddr_ll
-   - Gap, chosen so that packet data (Start+tp_net) aligns to 
-     TPACKET_ALIGNMENT=16
-   - Start+tp_mac: [ Optional MAC header ]
-   - Start+tp_net: Packet data, aligned to TPACKET_ALIGNMENT=16.
-   - Pad to align to TPACKET_ALIGNMENT=16
- */
- 
- The following are conditions that are checked in packet_set_ring
-
-   tp_block_size must be a multiple of PAGE_SIZE (1)
-   tp_frame_size must be greater than TPACKET_HDRLEN (obvious)
-   tp_frame_size must be a multiple of TPACKET_ALIGNMENT
-   tp_frame_nr   must be exactly frames_per_block*tp_block_nr
-
-Note that tp_block_size should be chosen to be a power of two or there will
-be a waste of memory.
-
---------------------------------------------------------------------------------
-+ Mapping and use of the circular buffer (ring)
---------------------------------------------------------------------------------
-
-The mapping of the buffer in the user process is done with the conventional 
-mmap function. Even the circular buffer is compound of several physically
-discontiguous blocks of memory, they are contiguous to the user space, hence
-just one call to mmap is needed:
-
-    mmap(0, size, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
-
-If tp_frame_size is a divisor of tp_block_size frames will be 
-contiguously spaced by tp_frame_size bytes. If not, each
-tp_block_size/tp_frame_size frames there will be a gap between 
-the frames. This is because a frame cannot be spawn across two
-blocks. 
-
-To use one socket for capture and transmission, the mapping of both the
-RX and TX buffer ring has to be done with one call to mmap:
-
-    ...
-    setsockopt(fd, SOL_PACKET, PACKET_RX_RING, &foo, sizeof(foo));
-    setsockopt(fd, SOL_PACKET, PACKET_TX_RING, &bar, sizeof(bar));
-    ...
-    rx_ring = mmap(0, size * 2, PROT_READ|PROT_WRITE, MAP_SHARED, fd, 0);
-    tx_ring = rx_ring + size;
-
-RX must be the first as the kernel maps the TX ring memory right
-after the RX one.
-
-At the beginning of each frame there is an status field (see 
-struct tpacket_hdr). If this field is 0 means that the frame is ready
-to be used for the kernel, If not, there is a frame the user can read 
-and the following flags apply:
-
-+++ Capture process:
-     from include/linux/if_packet.h
-
-     #define TP_STATUS_COPY          (1 << 1)
-     #define TP_STATUS_LOSING        (1 << 2)
-     #define TP_STATUS_CSUMNOTREADY  (1 << 3)
-     #define TP_STATUS_CSUM_VALID    (1 << 7)
-
-TP_STATUS_COPY        : This flag indicates that the frame (and associated
-                        meta information) has been truncated because it's 
-                        larger than tp_frame_size. This packet can be 
-                        read entirely with recvfrom().
-                        
-                        In order to make this work it must to be
-                        enabled previously with setsockopt() and 
-                        the PACKET_COPY_THRESH option. 
-
-                        The number of frames that can be buffered to
-                        be read with recvfrom is limited like a normal socket.
-                        See the SO_RCVBUF option in the socket (7) man page.
-
-TP_STATUS_LOSING      : indicates there were packet drops from last time 
-                        statistics where checked with getsockopt() and
-                        the PACKET_STATISTICS option.
-
-TP_STATUS_CSUMNOTREADY: currently it's used for outgoing IP packets which 
-                        its checksum will be done in hardware. So while
-                        reading the packet we should not try to check the 
-                        checksum. 
-
-TP_STATUS_CSUM_VALID  : This flag indicates that at least the transport
-                        header checksum of the packet has been already
-                        validated on the kernel side. If the flag is not set
-                        then we are free to check the checksum by ourselves
-                        provided that TP_STATUS_CSUMNOTREADY is also not set.
-
-for convenience there are also the following defines:
-
-     #define TP_STATUS_KERNEL        0
-     #define TP_STATUS_USER          1
-
-The kernel initializes all frames to TP_STATUS_KERNEL, when the kernel
-receives a packet it puts in the buffer and updates the status with
-at least the TP_STATUS_USER flag. Then the user can read the packet,
-once the packet is read the user must zero the status field, so the kernel 
-can use again that frame buffer.
-
-The user can use poll (any other variant should apply too) to check if new
-packets are in the ring:
-
-    struct pollfd pfd;
-
-    pfd.fd = fd;
-    pfd.revents = 0;
-    pfd.events = POLLIN|POLLRDNORM|POLLERR;
-
-    if (status == TP_STATUS_KERNEL)
-        retval = poll(&pfd, 1, timeout);
-
-It doesn't incur in a race condition to first check the status value and 
-then poll for frames.
-
-++ Transmission process
-Those defines are also used for transmission:
-
-     #define TP_STATUS_AVAILABLE        0 // Frame is available
-     #define TP_STATUS_SEND_REQUEST     1 // Frame will be sent on next send()
-     #define TP_STATUS_SENDING          2 // Frame is currently in transmission
-     #define TP_STATUS_WRONG_FORMAT     4 // Frame format is not correct
-
-First, the kernel initializes all frames to TP_STATUS_AVAILABLE. To send a
-packet, the user fills a data buffer of an available frame, sets tp_len to
-current data buffer size and sets its status field to TP_STATUS_SEND_REQUEST.
-This can be done on multiple frames. Once the user is ready to transmit, it
-calls send(). Then all buffers with status equal to TP_STATUS_SEND_REQUEST are
-forwarded to the network device. The kernel updates each status of sent
-frames with TP_STATUS_SENDING until the end of transfer.
-At the end of each transfer, buffer status returns to TP_STATUS_AVAILABLE.
-
-    header->tp_len = in_i_size;
-    header->tp_status = TP_STATUS_SEND_REQUEST;
-    retval = send(this->socket, NULL, 0, 0);
-
-The user can also use poll() to check if a buffer is available:
-(status == TP_STATUS_SENDING)
-
-    struct pollfd pfd;
-    pfd.fd = fd;
-    pfd.revents = 0;
-    pfd.events = POLLOUT;
-    retval = poll(&pfd, 1, timeout);
-
--------------------------------------------------------------------------------
-+ What TPACKET versions are available and when to use them?
--------------------------------------------------------------------------------
-
- int val = tpacket_version;
- setsockopt(fd, SOL_PACKET, PACKET_VERSION, &val, sizeof(val));
- getsockopt(fd, SOL_PACKET, PACKET_VERSION, &val, sizeof(val));
-
-where 'tpacket_version' can be TPACKET_V1 (default), TPACKET_V2, TPACKET_V3.
-
-TPACKET_V1:
-	- Default if not otherwise specified by setsockopt(2)
-	- RX_RING, TX_RING available
-
-TPACKET_V1 --> TPACKET_V2:
-	- Made 64 bit clean due to unsigned long usage in TPACKET_V1
-	  structures, thus this also works on 64 bit kernel with 32 bit
-	  userspace and the like
-	- Timestamp resolution in nanoseconds instead of microseconds
-	- RX_RING, TX_RING available
-	- VLAN metadata information available for packets
-	  (TP_STATUS_VLAN_VALID, TP_STATUS_VLAN_TPID_VALID),
-	  in the tpacket2_hdr structure:
-		- TP_STATUS_VLAN_VALID bit being set into the tp_status field indicates
-		  that the tp_vlan_tci field has valid VLAN TCI value
-		- TP_STATUS_VLAN_TPID_VALID bit being set into the tp_status field
-		  indicates that the tp_vlan_tpid field has valid VLAN TPID value
-	- How to switch to TPACKET_V2:
-		1. Replace struct tpacket_hdr by struct tpacket2_hdr
-		2. Query header len and save
-		3. Set protocol version to 2, set up ring as usual
-		4. For getting the sockaddr_ll,
-		   use (void *)hdr + TPACKET_ALIGN(hdrlen) instead of
-		   (void *)hdr + TPACKET_ALIGN(sizeof(struct tpacket_hdr))
-
-TPACKET_V2 --> TPACKET_V3:
-	- Flexible buffer implementation for RX_RING:
-		1. Blocks can be configured with non-static frame-size
-		2. Read/poll is at a block-level (as opposed to packet-level)
-		3. Added poll timeout to avoid indefinite user-space wait
-		   on idle links
-		4. Added user-configurable knobs:
-			4.1 block::timeout
-			4.2 tpkt_hdr::sk_rxhash
-	- RX Hash data available in user space
-	- TX_RING semantics are conceptually similar to TPACKET_V2;
-	  use tpacket3_hdr instead of tpacket2_hdr, and TPACKET3_HDRLEN
-	  instead of TPACKET2_HDRLEN. In the current implementation,
-	  the tp_next_offset field in the tpacket3_hdr MUST be set to
-	  zero, indicating that the ring does not hold variable sized frames.
-	  Packets with non-zero values of tp_next_offset will be dropped.
-
--------------------------------------------------------------------------------
-+ AF_PACKET fanout mode
--------------------------------------------------------------------------------
-
-In the AF_PACKET fanout mode, packet reception can be load balanced among
-processes. This also works in combination with mmap(2) on packet sockets.
-
-Currently implemented fanout policies are:
-
-  - PACKET_FANOUT_HASH: schedule to socket by skb's packet hash
-  - PACKET_FANOUT_LB: schedule to socket by round-robin
-  - PACKET_FANOUT_CPU: schedule to socket by CPU packet arrives on
-  - PACKET_FANOUT_RND: schedule to socket by random selection
-  - PACKET_FANOUT_ROLLOVER: if one socket is full, rollover to another
-  - PACKET_FANOUT_QM: schedule to socket by skbs recorded queue_mapping
-
-Minimal example code by David S. Miller (try things like "./test eth0 hash",
-"./test eth0 lb", etc.):
-
-#include <stddef.h>
-#include <stdlib.h>
-#include <stdio.h>
-#include <string.h>
-
-#include <sys/types.h>
-#include <sys/wait.h>
-#include <sys/socket.h>
-#include <sys/ioctl.h>
-
-#include <unistd.h>
-
-#include <linux/if_ether.h>
-#include <linux/if_packet.h>
-
-#include <net/if.h>
-
-static const char *device_name;
-static int fanout_type;
-static int fanout_id;
-
-#ifndef PACKET_FANOUT
-# define PACKET_FANOUT			18
-# define PACKET_FANOUT_HASH		0
-# define PACKET_FANOUT_LB		1
-#endif
-
-static int setup_socket(void)
-{
-	int err, fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_IP));
-	struct sockaddr_ll ll;
-	struct ifreq ifr;
-	int fanout_arg;
-
-	if (fd < 0) {
-		perror("socket");
-		return EXIT_FAILURE;
-	}
-
-	memset(&ifr, 0, sizeof(ifr));
-	strcpy(ifr.ifr_name, device_name);
-	err = ioctl(fd, SIOCGIFINDEX, &ifr);
-	if (err < 0) {
-		perror("SIOCGIFINDEX");
-		return EXIT_FAILURE;
-	}
-
-	memset(&ll, 0, sizeof(ll));
-	ll.sll_family = AF_PACKET;
-	ll.sll_ifindex = ifr.ifr_ifindex;
-	err = bind(fd, (struct sockaddr *) &ll, sizeof(ll));
-	if (err < 0) {
-		perror("bind");
-		return EXIT_FAILURE;
-	}
-
-	fanout_arg = (fanout_id | (fanout_type << 16));
-	err = setsockopt(fd, SOL_PACKET, PACKET_FANOUT,
-			 &fanout_arg, sizeof(fanout_arg));
-	if (err) {
-		perror("setsockopt");
-		return EXIT_FAILURE;
-	}
-
-	return fd;
-}
-
-static void fanout_thread(void)
-{
-	int fd = setup_socket();
-	int limit = 10000;
-
-	if (fd < 0)
-		exit(fd);
-
-	while (limit-- > 0) {
-		char buf[1600];
-		int err;
-
-		err = read(fd, buf, sizeof(buf));
-		if (err < 0) {
-			perror("read");
-			exit(EXIT_FAILURE);
-		}
-		if ((limit % 10) == 0)
-			fprintf(stdout, "(%d) \n", getpid());
-	}
-
-	fprintf(stdout, "%d: Received 10000 packets\n", getpid());
-
-	close(fd);
-	exit(0);
-}
-
-int main(int argc, char **argp)
-{
-	int fd, err;
-	int i;
-
-	if (argc != 3) {
-		fprintf(stderr, "Usage: %s INTERFACE {hash|lb}\n", argp[0]);
-		return EXIT_FAILURE;
-	}
-
-	if (!strcmp(argp[2], "hash"))
-		fanout_type = PACKET_FANOUT_HASH;
-	else if (!strcmp(argp[2], "lb"))
-		fanout_type = PACKET_FANOUT_LB;
-	else {
-		fprintf(stderr, "Unknown fanout type [%s]\n", argp[2]);
-		exit(EXIT_FAILURE);
-	}
-
-	device_name = argp[1];
-	fanout_id = getpid() & 0xffff;
-
-	for (i = 0; i < 4; i++) {
-		pid_t pid = fork();
-
-		switch (pid) {
-		case 0:
-			fanout_thread();
-
-		case -1:
-			perror("fork");
-			exit(EXIT_FAILURE);
-		}
-	}
-
-	for (i = 0; i < 4; i++) {
-		int status;
-
-		wait(&status);
-	}
-
-	return 0;
-}
-
--------------------------------------------------------------------------------
-+ AF_PACKET TPACKET_V3 example
--------------------------------------------------------------------------------
-
-AF_PACKET's TPACKET_V3 ring buffer can be configured to use non-static frame
-sizes by doing it's own memory management. It is based on blocks where polling
-works on a per block basis instead of per ring as in TPACKET_V2 and predecessor.
-
-It is said that TPACKET_V3 brings the following benefits:
- *) ~15 - 20% reduction in CPU-usage
- *) ~20% increase in packet capture rate
- *) ~2x increase in packet density
- *) Port aggregation analysis
- *) Non static frame size to capture entire packet payload
-
-So it seems to be a good candidate to be used with packet fanout.
-
-Minimal example code by Daniel Borkmann based on Chetan Loke's lolpcap (compile
-it with gcc -Wall -O2 blob.c, and try things like "./a.out eth0", etc.):
-
-/* Written from scratch, but kernel-to-user space API usage
- * dissected from lolpcap:
- *  Copyright 2011, Chetan Loke <loke.chetan@gmail.com>
- *  License: GPL, version 2.0
- */
-
-#include <stdio.h>
-#include <stdlib.h>
-#include <stdint.h>
-#include <string.h>
-#include <assert.h>
-#include <net/if.h>
-#include <arpa/inet.h>
-#include <netdb.h>
-#include <poll.h>
-#include <unistd.h>
-#include <signal.h>
-#include <inttypes.h>
-#include <sys/socket.h>
-#include <sys/mman.h>
-#include <linux/if_packet.h>
-#include <linux/if_ether.h>
-#include <linux/ip.h>
-
-#ifndef likely
-# define likely(x)		__builtin_expect(!!(x), 1)
-#endif
-#ifndef unlikely
-# define unlikely(x)		__builtin_expect(!!(x), 0)
-#endif
-
-struct block_desc {
-	uint32_t version;
-	uint32_t offset_to_priv;
-	struct tpacket_hdr_v1 h1;
-};
-
-struct ring {
-	struct iovec *rd;
-	uint8_t *map;
-	struct tpacket_req3 req;
-};
-
-static unsigned long packets_total = 0, bytes_total = 0;
-static sig_atomic_t sigint = 0;
-
-static void sighandler(int num)
-{
-	sigint = 1;
-}
-
-static int setup_socket(struct ring *ring, char *netdev)
-{
-	int err, i, fd, v = TPACKET_V3;
-	struct sockaddr_ll ll;
-	unsigned int blocksiz = 1 << 22, framesiz = 1 << 11;
-	unsigned int blocknum = 64;
-
-	fd = socket(AF_PACKET, SOCK_RAW, htons(ETH_P_ALL));
-	if (fd < 0) {
-		perror("socket");
-		exit(1);
-	}
-
-	err = setsockopt(fd, SOL_PACKET, PACKET_VERSION, &v, sizeof(v));
-	if (err < 0) {
-		perror("setsockopt");
-		exit(1);
-	}
-
-	memset(&ring->req, 0, sizeof(ring->req));
-	ring->req.tp_block_size = blocksiz;
-	ring->req.tp_frame_size = framesiz;
-	ring->req.tp_block_nr = blocknum;
-	ring->req.tp_frame_nr = (blocksiz * blocknum) / framesiz;
-	ring->req.tp_retire_blk_tov = 60;
-	ring->req.tp_feature_req_word = TP_FT_REQ_FILL_RXHASH;
-
-	err = setsockopt(fd, SOL_PACKET, PACKET_RX_RING, &ring->req,
-			 sizeof(ring->req));
-	if (err < 0) {
-		perror("setsockopt");
-		exit(1);
-	}
-
-	ring->map = mmap(NULL, ring->req.tp_block_size * ring->req.tp_block_nr,
-			 PROT_READ | PROT_WRITE, MAP_SHARED | MAP_LOCKED, fd, 0);
-	if (ring->map == MAP_FAILED) {
-		perror("mmap");
-		exit(1);
-	}
-
-	ring->rd = malloc(ring->req.tp_block_nr * sizeof(*ring->rd));
-	assert(ring->rd);
-	for (i = 0; i < ring->req.tp_block_nr; ++i) {
-		ring->rd[i].iov_base = ring->map + (i * ring->req.tp_block_size);
-		ring->rd[i].iov_len = ring->req.tp_block_size;
-	}
-
-	memset(&ll, 0, sizeof(ll));
-	ll.sll_family = PF_PACKET;
-	ll.sll_protocol = htons(ETH_P_ALL);
-	ll.sll_ifindex = if_nametoindex(netdev);
-	ll.sll_hatype = 0;
-	ll.sll_pkttype = 0;
-	ll.sll_halen = 0;
-
-	err = bind(fd, (struct sockaddr *) &ll, sizeof(ll));
-	if (err < 0) {
-		perror("bind");
-		exit(1);
-	}
-
-	return fd;
-}
-
-static void display(struct tpacket3_hdr *ppd)
-{
-	struct ethhdr *eth = (struct ethhdr *) ((uint8_t *) ppd + ppd->tp_mac);
-	struct iphdr *ip = (struct iphdr *) ((uint8_t *) eth + ETH_HLEN);
-
-	if (eth->h_proto == htons(ETH_P_IP)) {
-		struct sockaddr_in ss, sd;
-		char sbuff[NI_MAXHOST], dbuff[NI_MAXHOST];
-
-		memset(&ss, 0, sizeof(ss));
-		ss.sin_family = PF_INET;
-		ss.sin_addr.s_addr = ip->saddr;
-		getnameinfo((struct sockaddr *) &ss, sizeof(ss),
-			    sbuff, sizeof(sbuff), NULL, 0, NI_NUMERICHOST);
-
-		memset(&sd, 0, sizeof(sd));
-		sd.sin_family = PF_INET;
-		sd.sin_addr.s_addr = ip->daddr;
-		getnameinfo((struct sockaddr *) &sd, sizeof(sd),
-			    dbuff, sizeof(dbuff), NULL, 0, NI_NUMERICHOST);
-
-		printf("%s -> %s, ", sbuff, dbuff);
-	}
-
-	printf("rxhash: 0x%x\n", ppd->hv1.tp_rxhash);
-}
-
-static void walk_block(struct block_desc *pbd, const int block_num)
-{
-	int num_pkts = pbd->h1.num_pkts, i;
-	unsigned long bytes = 0;
-	struct tpacket3_hdr *ppd;
-
-	ppd = (struct tpacket3_hdr *) ((uint8_t *) pbd +
-				       pbd->h1.offset_to_first_pkt);
-	for (i = 0; i < num_pkts; ++i) {
-		bytes += ppd->tp_snaplen;
-		display(ppd);
-
-		ppd = (struct tpacket3_hdr *) ((uint8_t *) ppd +
-					       ppd->tp_next_offset);
-	}
-
-	packets_total += num_pkts;
-	bytes_total += bytes;
-}
-
-static void flush_block(struct block_desc *pbd)
-{
-	pbd->h1.block_status = TP_STATUS_KERNEL;
-}
-
-static void teardown_socket(struct ring *ring, int fd)
-{
-	munmap(ring->map, ring->req.tp_block_size * ring->req.tp_block_nr);
-	free(ring->rd);
-	close(fd);
-}
-
-int main(int argc, char **argp)
-{
-	int fd, err;
-	socklen_t len;
-	struct ring ring;
-	struct pollfd pfd;
-	unsigned int block_num = 0, blocks = 64;
-	struct block_desc *pbd;
-	struct tpacket_stats_v3 stats;
-
-	if (argc != 2) {
-		fprintf(stderr, "Usage: %s INTERFACE\n", argp[0]);
-		return EXIT_FAILURE;
-	}
-
-	signal(SIGINT, sighandler);
-
-	memset(&ring, 0, sizeof(ring));
-	fd = setup_socket(&ring, argp[argc - 1]);
-	assert(fd > 0);
-
-	memset(&pfd, 0, sizeof(pfd));
-	pfd.fd = fd;
-	pfd.events = POLLIN | POLLERR;
-	pfd.revents = 0;
-
-	while (likely(!sigint)) {
-		pbd = (struct block_desc *) ring.rd[block_num].iov_base;
-
-		if ((pbd->h1.block_status & TP_STATUS_USER) == 0) {
-			poll(&pfd, 1, -1);
-			continue;
-		}
-
-		walk_block(pbd, block_num);
-		flush_block(pbd);
-		block_num = (block_num + 1) % blocks;
-	}
-
-	len = sizeof(stats);
-	err = getsockopt(fd, SOL_PACKET, PACKET_STATISTICS, &stats, &len);
-	if (err < 0) {
-		perror("getsockopt");
-		exit(1);
-	}
-
-	fflush(stdout);
-	printf("\nReceived %u packets, %lu bytes, %u dropped, freeze_q_cnt: %u\n",
-	       stats.tp_packets, bytes_total, stats.tp_drops,
-	       stats.tp_freeze_q_cnt);
-
-	teardown_socket(&ring, fd);
-	return 0;
-}
-
--------------------------------------------------------------------------------
-+ PACKET_QDISC_BYPASS
--------------------------------------------------------------------------------
-
-If there is a requirement to load the network with many packets in a similar
-fashion as pktgen does, you might set the following option after socket
-creation:
-
-    int one = 1;
-    setsockopt(fd, SOL_PACKET, PACKET_QDISC_BYPASS, &one, sizeof(one));
-
-This has the side-effect, that packets sent through PF_PACKET will bypass the
-kernel's qdisc layer and are forcedly pushed to the driver directly. Meaning,
-packet are not buffered, tc disciplines are ignored, increased loss can occur
-and such packets are also not visible to other PF_PACKET sockets anymore. So,
-you have been warned; generally, this can be useful for stress testing various
-components of a system.
-
-On default, PACKET_QDISC_BYPASS is disabled and needs to be explicitly enabled
-on PF_PACKET sockets.
-
--------------------------------------------------------------------------------
-+ PACKET_TIMESTAMP
--------------------------------------------------------------------------------
-
-The PACKET_TIMESTAMP setting determines the source of the timestamp in
-the packet meta information for mmap(2)ed RX_RING and TX_RINGs.  If your
-NIC is capable of timestamping packets in hardware, you can request those
-hardware timestamps to be used. Note: you may need to enable the generation
-of hardware timestamps with SIOCSHWTSTAMP (see related information from
-Documentation/networking/timestamping.txt).
-
-PACKET_TIMESTAMP accepts the same integer bit field as SO_TIMESTAMPING:
-
-    int req = SOF_TIMESTAMPING_RAW_HARDWARE;
-    setsockopt(fd, SOL_PACKET, PACKET_TIMESTAMP, (void *) &req, sizeof(req))
-
-For the mmap(2)ed ring buffers, such timestamps are stored in the
-tpacket{,2,3}_hdr structure's tp_sec and tp_{n,u}sec members. To determine
-what kind of timestamp has been reported, the tp_status field is binary |'ed
-with the following possible bits ...
-
-    TP_STATUS_TS_RAW_HARDWARE
-    TP_STATUS_TS_SOFTWARE
-
-... that are equivalent to its SOF_TIMESTAMPING_* counterparts. For the
-RX_RING, if neither is set (i.e. PACKET_TIMESTAMP is not set), then a
-software fallback was invoked *within* PF_PACKET's processing code (less
-precise).
-
-Getting timestamps for the TX_RING works as follows: i) fill the ring frames,
-ii) call sendto() e.g. in blocking mode, iii) wait for status of relevant
-frames to be updated resp. the frame handed over to the application, iv) walk
-through the frames to pick up the individual hw/sw timestamps.
-
-Only (!) if transmit timestamping is enabled, then these bits are combined
-with binary | with TP_STATUS_AVAILABLE, so you must check for that in your
-application (e.g. !(tp_status & (TP_STATUS_SEND_REQUEST | TP_STATUS_SENDING))
-in a first step to see if the frame belongs to the application, and then
-one can extract the type of timestamp in a second step from tp_status)!
-
-If you don't care about them, thus having it disabled, checking for
-TP_STATUS_AVAILABLE resp. TP_STATUS_WRONG_FORMAT is sufficient. If in the
-TX_RING part only TP_STATUS_AVAILABLE is set, then the tp_sec and tp_{n,u}sec
-members do not contain a valid value. For TX_RINGs, by default no timestamp
-is generated!
-
-See include/linux/net_tstamp.h and Documentation/networking/timestamping.txt
-for more information on hardware timestamps.
-
--------------------------------------------------------------------------------
-+ Miscellaneous bits
--------------------------------------------------------------------------------
-
-- Packet sockets work well together with Linux socket filters, thus you also
-  might want to have a look at Documentation/networking/filter.rst
-
---------------------------------------------------------------------------------
-+ THANKS
---------------------------------------------------------------------------------
-   
-   Jesse Brandeburg, for fixing my grammathical/spelling errors
-
-- 
cgit v1.2.3-59-g8ed1b


From 6e94eaaa400d66f13e25e071926047ef2e3d21e3 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Thu, 30 Apr 2020 18:04:12 +0200
Subject: docs: networking: convert phonet.txt to ReST
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- add SPDX header;
- adjust title markup;
- use copyright symbol;
- add notes markups;
- mark code blocks and literals as such;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Acked-by: Rémi Denis-Courmont <courmisch@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst       |   1 +
 Documentation/networking/packet_mmap.rst |   2 +-
 Documentation/networking/phonet.rst      | 230 +++++++++++++++++++++++++++++++
 Documentation/networking/phonet.txt      | 214 ----------------------------
 MAINTAINERS                              |   2 +-
 5 files changed, 233 insertions(+), 216 deletions(-)
 create mode 100644 Documentation/networking/phonet.rst
 delete mode 100644 Documentation/networking/phonet.txt

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 8262b535a83e..e460026331c6 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -90,6 +90,7 @@ Contents:
    openvswitch
    operstates
    packet_mmap
+   phonet
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/packet_mmap.rst b/Documentation/networking/packet_mmap.rst
index 5f213d17652f..884c7222b9e9 100644
--- a/Documentation/networking/packet_mmap.rst
+++ b/Documentation/networking/packet_mmap.rst
@@ -1076,7 +1076,7 @@ Miscellaneous bits
 ==================
 
 - Packet sockets work well together with Linux socket filters, thus you also
-  might want to have a look at Documentation/networking/filter.txt
+  might want to have a look at Documentation/networking/filter.rst
 
 THANKS
 ======
diff --git a/Documentation/networking/phonet.rst b/Documentation/networking/phonet.rst
new file mode 100644
index 000000000000..8668dcbc5e6a
--- /dev/null
+++ b/Documentation/networking/phonet.rst
@@ -0,0 +1,230 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+============================
+Linux Phonet protocol family
+============================
+
+Introduction
+------------
+
+Phonet is a packet protocol used by Nokia cellular modems for both IPC
+and RPC. With the Linux Phonet socket family, Linux host processes can
+receive and send messages from/to the modem, or any other external
+device attached to the modem. The modem takes care of routing.
+
+Phonet packets can be exchanged through various hardware connections
+depending on the device, such as:
+
+  - USB with the CDC Phonet interface,
+  - infrared,
+  - Bluetooth,
+  - an RS232 serial port (with a dedicated "FBUS" line discipline),
+  - the SSI bus with some TI OMAP processors.
+
+
+Packets format
+--------------
+
+Phonet packets have a common header as follows::
+
+  struct phonethdr {
+    uint8_t  pn_media;  /* Media type (link-layer identifier) */
+    uint8_t  pn_rdev;   /* Receiver device ID */
+    uint8_t  pn_sdev;   /* Sender device ID */
+    uint8_t  pn_res;    /* Resource ID or function */
+    uint16_t pn_length; /* Big-endian message byte length (minus 6) */
+    uint8_t  pn_robj;   /* Receiver object ID */
+    uint8_t  pn_sobj;   /* Sender object ID */
+  };
+
+On Linux, the link-layer header includes the pn_media byte (see below).
+The next 7 bytes are part of the network-layer header.
+
+The device ID is split: the 6 higher-order bits constitute the device
+address, while the 2 lower-order bits are used for multiplexing, as are
+the 8-bit object identifiers. As such, Phonet can be considered as a
+network layer with 6 bits of address space and 10 bits for transport
+protocol (much like port numbers in IP world).
+
+The modem always has address number zero. All other device have a their
+own 6-bit address.
+
+
+Link layer
+----------
+
+Phonet links are always point-to-point links. The link layer header
+consists of a single Phonet media type byte. It uniquely identifies the
+link through which the packet is transmitted, from the modem's
+perspective. Each Phonet network device shall prepend and set the media
+type byte as appropriate. For convenience, a common phonet_header_ops
+link-layer header operations structure is provided. It sets the
+media type according to the network device hardware address.
+
+Linux Phonet network interfaces support a dedicated link layer packets
+type (ETH_P_PHONET) which is out of the Ethernet type range. They can
+only send and receive Phonet packets.
+
+The virtual TUN tunnel device driver can also be used for Phonet. This
+requires IFF_TUN mode, _without_ the IFF_NO_PI flag. In this case,
+there is no link-layer header, so there is no Phonet media type byte.
+
+Note that Phonet interfaces are not allowed to re-order packets, so
+only the (default) Linux FIFO qdisc should be used with them.
+
+
+Network layer
+-------------
+
+The Phonet socket address family maps the Phonet packet header::
+
+  struct sockaddr_pn {
+    sa_family_t spn_family;    /* AF_PHONET */
+    uint8_t     spn_obj;       /* Object ID */
+    uint8_t     spn_dev;       /* Device ID */
+    uint8_t     spn_resource;  /* Resource or function */
+    uint8_t     spn_zero[...]; /* Padding */
+  };
+
+The resource field is only used when sending and receiving;
+It is ignored by bind() and getsockname().
+
+
+Low-level datagram protocol
+---------------------------
+
+Applications can send Phonet messages using the Phonet datagram socket
+protocol from the PF_PHONET family. Each socket is bound to one of the
+2^10 object IDs available, and can send and receive packets with any
+other peer.
+
+::
+
+  struct sockaddr_pn addr = { .spn_family = AF_PHONET, };
+  ssize_t len;
+  socklen_t addrlen = sizeof(addr);
+  int fd;
+
+  fd = socket(PF_PHONET, SOCK_DGRAM, 0);
+  bind(fd, (struct sockaddr *)&addr, sizeof(addr));
+  /* ... */
+
+  sendto(fd, msg, msglen, 0, (struct sockaddr *)&addr, sizeof(addr));
+  len = recvfrom(fd, buf, sizeof(buf), 0,
+		 (struct sockaddr *)&addr, &addrlen);
+
+This protocol follows the SOCK_DGRAM connection-less semantics.
+However, connect() and getpeername() are not supported, as they did
+not seem useful with Phonet usages (could be added easily).
+
+
+Resource subscription
+---------------------
+
+A Phonet datagram socket can be subscribed to any number of 8-bits
+Phonet resources, as follow::
+
+  uint32_t res = 0xXX;
+  ioctl(fd, SIOCPNADDRESOURCE, &res);
+
+Subscription is similarly cancelled using the SIOCPNDELRESOURCE I/O
+control request, or when the socket is closed.
+
+Note that no more than one socket can be subcribed to any given
+resource at a time. If not, ioctl() will return EBUSY.
+
+
+Phonet Pipe protocol
+--------------------
+
+The Phonet Pipe protocol is a simple sequenced packets protocol
+with end-to-end congestion control. It uses the passive listening
+socket paradigm. The listening socket is bound to an unique free object
+ID. Each listening socket can handle up to 255 simultaneous
+connections, one per accept()'d socket.
+
+::
+
+  int lfd, cfd;
+
+  lfd = socket(PF_PHONET, SOCK_SEQPACKET, PN_PROTO_PIPE);
+  listen (lfd, INT_MAX);
+
+  /* ... */
+  cfd = accept(lfd, NULL, NULL);
+  for (;;)
+  {
+    char buf[...];
+    ssize_t len = read(cfd, buf, sizeof(buf));
+
+    /* ... */
+
+    write(cfd, msg, msglen);
+  }
+
+Connections are traditionally established between two endpoints by a
+"third party" application. This means that both endpoints are passive.
+
+
+As of Linux kernel version 2.6.39, it is also possible to connect
+two endpoints directly, using connect() on the active side. This is
+intended to support the newer Nokia Wireless Modem API, as found in
+e.g. the Nokia Slim Modem in the ST-Ericsson U8500 platform::
+
+  struct sockaddr_spn spn;
+  int fd;
+
+  fd = socket(PF_PHONET, SOCK_SEQPACKET, PN_PROTO_PIPE);
+  memset(&spn, 0, sizeof(spn));
+  spn.spn_family = AF_PHONET;
+  spn.spn_obj = ...;
+  spn.spn_dev = ...;
+  spn.spn_resource = 0xD9;
+  connect(fd, (struct sockaddr *)&spn, sizeof(spn));
+  /* normal I/O here ... */
+  close(fd);
+
+
+.. Warning:
+
+   When polling a connected pipe socket for writability, there is an
+   intrinsic race condition whereby writability might be lost between the
+   polling and the writing system calls. In this case, the socket will
+   block until write becomes possible again, unless non-blocking mode
+   is enabled.
+
+
+The pipe protocol provides two socket options at the SOL_PNPIPE level:
+
+  PNPIPE_ENCAP accepts one integer value (int) of:
+
+    PNPIPE_ENCAP_NONE:
+      The socket operates normally (default).
+
+    PNPIPE_ENCAP_IP:
+      The socket is used as a backend for a virtual IP
+      interface. This requires CAP_NET_ADMIN capability. GPRS data
+      support on Nokia modems can use this. Note that the socket cannot
+      be reliably poll()'d or read() from while in this mode.
+
+  PNPIPE_IFINDEX
+      is a read-only integer value. It contains the
+      interface index of the network interface created by PNPIPE_ENCAP,
+      or zero if encapsulation is off.
+
+  PNPIPE_HANDLE
+      is a read-only integer value. It contains the underlying
+      identifier ("pipe handle") of the pipe. This is only defined for
+      socket descriptors that are already connected or being connected.
+
+
+Authors
+-------
+
+Linux Phonet was initially written by Sakari Ailus.
+
+Other contributors include Mikä Liljeberg, Andras Domokos,
+Carlos Chinea and Rémi Denis-Courmont.
+
+Copyright |copy| 2008 Nokia Corporation.
diff --git a/Documentation/networking/phonet.txt b/Documentation/networking/phonet.txt
deleted file mode 100644
index 81003581f47a..000000000000
--- a/Documentation/networking/phonet.txt
+++ /dev/null
@@ -1,214 +0,0 @@
-Linux Phonet protocol family
-============================
-
-Introduction
-------------
-
-Phonet is a packet protocol used by Nokia cellular modems for both IPC
-and RPC. With the Linux Phonet socket family, Linux host processes can
-receive and send messages from/to the modem, or any other external
-device attached to the modem. The modem takes care of routing.
-
-Phonet packets can be exchanged through various hardware connections
-depending on the device, such as:
-  - USB with the CDC Phonet interface,
-  - infrared,
-  - Bluetooth,
-  - an RS232 serial port (with a dedicated "FBUS" line discipline),
-  - the SSI bus with some TI OMAP processors.
-
-
-Packets format
---------------
-
-Phonet packets have a common header as follows:
-
-  struct phonethdr {
-    uint8_t  pn_media;  /* Media type (link-layer identifier) */
-    uint8_t  pn_rdev;   /* Receiver device ID */
-    uint8_t  pn_sdev;   /* Sender device ID */
-    uint8_t  pn_res;    /* Resource ID or function */
-    uint16_t pn_length; /* Big-endian message byte length (minus 6) */
-    uint8_t  pn_robj;   /* Receiver object ID */
-    uint8_t  pn_sobj;   /* Sender object ID */
-  };
-
-On Linux, the link-layer header includes the pn_media byte (see below).
-The next 7 bytes are part of the network-layer header.
-
-The device ID is split: the 6 higher-order bits constitute the device
-address, while the 2 lower-order bits are used for multiplexing, as are
-the 8-bit object identifiers. As such, Phonet can be considered as a
-network layer with 6 bits of address space and 10 bits for transport
-protocol (much like port numbers in IP world).
-
-The modem always has address number zero. All other device have a their
-own 6-bit address.
-
-
-Link layer
-----------
-
-Phonet links are always point-to-point links. The link layer header
-consists of a single Phonet media type byte. It uniquely identifies the
-link through which the packet is transmitted, from the modem's
-perspective. Each Phonet network device shall prepend and set the media
-type byte as appropriate. For convenience, a common phonet_header_ops
-link-layer header operations structure is provided. It sets the
-media type according to the network device hardware address.
-
-Linux Phonet network interfaces support a dedicated link layer packets
-type (ETH_P_PHONET) which is out of the Ethernet type range. They can
-only send and receive Phonet packets.
-
-The virtual TUN tunnel device driver can also be used for Phonet. This
-requires IFF_TUN mode, _without_ the IFF_NO_PI flag. In this case,
-there is no link-layer header, so there is no Phonet media type byte.
-
-Note that Phonet interfaces are not allowed to re-order packets, so
-only the (default) Linux FIFO qdisc should be used with them.
-
-
-Network layer
--------------
-
-The Phonet socket address family maps the Phonet packet header:
-
-  struct sockaddr_pn {
-    sa_family_t spn_family;    /* AF_PHONET */
-    uint8_t     spn_obj;       /* Object ID */
-    uint8_t     spn_dev;       /* Device ID */
-    uint8_t     spn_resource;  /* Resource or function */
-    uint8_t     spn_zero[...]; /* Padding */
-  };
-
-The resource field is only used when sending and receiving;
-It is ignored by bind() and getsockname().
-
-
-Low-level datagram protocol
----------------------------
-
-Applications can send Phonet messages using the Phonet datagram socket
-protocol from the PF_PHONET family. Each socket is bound to one of the
-2^10 object IDs available, and can send and receive packets with any
-other peer.
-
-  struct sockaddr_pn addr = { .spn_family = AF_PHONET, };
-  ssize_t len;
-  socklen_t addrlen = sizeof(addr);
-  int fd;
-
-  fd = socket(PF_PHONET, SOCK_DGRAM, 0);
-  bind(fd, (struct sockaddr *)&addr, sizeof(addr));
-  /* ... */
-
-  sendto(fd, msg, msglen, 0, (struct sockaddr *)&addr, sizeof(addr));
-  len = recvfrom(fd, buf, sizeof(buf), 0,
-                 (struct sockaddr *)&addr, &addrlen);
-
-This protocol follows the SOCK_DGRAM connection-less semantics.
-However, connect() and getpeername() are not supported, as they did
-not seem useful with Phonet usages (could be added easily).
-
-
-Resource subscription
----------------------
-
-A Phonet datagram socket can be subscribed to any number of 8-bits
-Phonet resources, as follow:
-
-  uint32_t res = 0xXX;
-  ioctl(fd, SIOCPNADDRESOURCE, &res);
-
-Subscription is similarly cancelled using the SIOCPNDELRESOURCE I/O
-control request, or when the socket is closed.
-
-Note that no more than one socket can be subcribed to any given
-resource at a time. If not, ioctl() will return EBUSY.
-
-
-Phonet Pipe protocol
---------------------
-
-The Phonet Pipe protocol is a simple sequenced packets protocol
-with end-to-end congestion control. It uses the passive listening
-socket paradigm. The listening socket is bound to an unique free object
-ID. Each listening socket can handle up to 255 simultaneous
-connections, one per accept()'d socket.
-
-  int lfd, cfd;
-
-  lfd = socket(PF_PHONET, SOCK_SEQPACKET, PN_PROTO_PIPE);
-  listen (lfd, INT_MAX);
-
-  /* ... */
-  cfd = accept(lfd, NULL, NULL);
-  for (;;)
-  {
-    char buf[...];
-    ssize_t len = read(cfd, buf, sizeof(buf));
-
-    /* ... */
-
-    write(cfd, msg, msglen);
-  }
-
-Connections are traditionally established between two endpoints by a
-"third party" application. This means that both endpoints are passive.
-
-
-As of Linux kernel version 2.6.39, it is also possible to connect
-two endpoints directly, using connect() on the active side. This is
-intended to support the newer Nokia Wireless Modem API, as found in
-e.g. the Nokia Slim Modem in the ST-Ericsson U8500 platform:
-
-  struct sockaddr_spn spn;
-  int fd;
-
-  fd = socket(PF_PHONET, SOCK_SEQPACKET, PN_PROTO_PIPE);
-  memset(&spn, 0, sizeof(spn));
-  spn.spn_family = AF_PHONET;
-  spn.spn_obj = ...;
-  spn.spn_dev = ...;
-  spn.spn_resource = 0xD9;
-  connect(fd, (struct sockaddr *)&spn, sizeof(spn));
-  /* normal I/O here ... */
-  close(fd);
-
-
-WARNING:
-When polling a connected pipe socket for writability, there is an
-intrinsic race condition whereby writability might be lost between the
-polling and the writing system calls. In this case, the socket will
-block until write becomes possible again, unless non-blocking mode
-is enabled.
-
-
-The pipe protocol provides two socket options at the SOL_PNPIPE level:
-
-  PNPIPE_ENCAP accepts one integer value (int) of:
-
-    PNPIPE_ENCAP_NONE: The socket operates normally (default).
-
-    PNPIPE_ENCAP_IP: The socket is used as a backend for a virtual IP
-      interface. This requires CAP_NET_ADMIN capability. GPRS data
-      support on Nokia modems can use this. Note that the socket cannot
-      be reliably poll()'d or read() from while in this mode.
-
-  PNPIPE_IFINDEX is a read-only integer value. It contains the
-    interface index of the network interface created by PNPIPE_ENCAP,
-    or zero if encapsulation is off.
-
-  PNPIPE_HANDLE is a read-only integer value. It contains the underlying
-    identifier ("pipe handle") of the pipe. This is only defined for
-    socket descriptors that are already connected or being connected.
-
-
-Authors
--------
-
-Linux Phonet was initially written by Sakari Ailus.
-Other contributors include Mikä Liljeberg, Andras Domokos,
-Carlos Chinea and Rémi Denis-Courmont.
-Copyright (C) 2008 Nokia Corporation.
diff --git a/MAINTAINERS b/MAINTAINERS
index 33bfc9e4aead..785f56e5f210 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -13262,7 +13262,7 @@ F:	drivers/input/joystick/pxrc.c
 PHONET PROTOCOL
 M:	Remi Denis-Courmont <courmisch@gmail.com>
 S:	Supported
-F:	Documentation/networking/phonet.txt
+F:	Documentation/networking/phonet.rst
 F:	include/linux/phonet.h
 F:	include/net/phonet/
 F:	include/uapi/linux/phonet.h
-- 
cgit v1.2.3-59-g8ed1b


From c1e4535f24bcfeef55a7ed409a5f50548e284426 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Thu, 30 Apr 2020 18:04:13 +0200
Subject: docs: networking: convert pktgen.txt to ReST

- add SPDX header;
- adjust title markup;
- use bold markups on a few places;
- mark code blocks and literals as such;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst  |   1 +
 Documentation/networking/pktgen.rst | 412 ++++++++++++++++++++++++++++++++++++
 Documentation/networking/pktgen.txt | 400 ----------------------------------
 net/Kconfig                         |   2 +-
 net/core/pktgen.c                   |   2 +-
 samples/pktgen/README.rst           |   2 +-
 6 files changed, 416 insertions(+), 403 deletions(-)
 create mode 100644 Documentation/networking/pktgen.rst
 delete mode 100644 Documentation/networking/pktgen.txt

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index e460026331c6..696181a96e3c 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -91,6 +91,7 @@ Contents:
    operstates
    packet_mmap
    phonet
+   pktgen
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/pktgen.rst b/Documentation/networking/pktgen.rst
new file mode 100644
index 000000000000..7afa1c9f1183
--- /dev/null
+++ b/Documentation/networking/pktgen.rst
@@ -0,0 +1,412 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====================================
+HOWTO for the linux packet generator
+====================================
+
+Enable CONFIG_NET_PKTGEN to compile and build pktgen either in-kernel
+or as a module.  A module is preferred; modprobe pktgen if needed.  Once
+running, pktgen creates a thread for each CPU with affinity to that CPU.
+Monitoring and controlling is done via /proc.  It is easiest to select a
+suitable sample script and configure that.
+
+On a dual CPU::
+
+    ps aux | grep pkt
+    root       129  0.3  0.0     0    0 ?        SW    2003 523:20 [kpktgend_0]
+    root       130  0.3  0.0     0    0 ?        SW    2003 509:50 [kpktgend_1]
+
+
+For monitoring and control pktgen creates::
+
+	/proc/net/pktgen/pgctrl
+	/proc/net/pktgen/kpktgend_X
+	/proc/net/pktgen/ethX
+
+
+Tuning NIC for max performance
+==============================
+
+The default NIC settings are (likely) not tuned for pktgen's artificial
+overload type of benchmarking, as this could hurt the normal use-case.
+
+Specifically increasing the TX ring buffer in the NIC::
+
+ # ethtool -G ethX tx 1024
+
+A larger TX ring can improve pktgen's performance, while it can hurt
+in the general case, 1) because the TX ring buffer might get larger
+than the CPU's L1/L2 cache, 2) because it allows more queueing in the
+NIC HW layer (which is bad for bufferbloat).
+
+One should hesitate to conclude that packets/descriptors in the HW
+TX ring cause delay.  Drivers usually delay cleaning up the
+ring-buffers for various performance reasons, and packets stalling
+the TX ring might just be waiting for cleanup.
+
+This cleanup issue is specifically the case for the driver ixgbe
+(Intel 82599 chip).  This driver (ixgbe) combines TX+RX ring cleanups,
+and the cleanup interval is affected by the ethtool --coalesce setting
+of parameter "rx-usecs".
+
+For ixgbe use e.g. "30" resulting in approx 33K interrupts/sec (1/30*10^6)::
+
+ # ethtool -C ethX rx-usecs 30
+
+
+Kernel threads
+==============
+Pktgen creates a thread for each CPU with affinity to that CPU.
+Which is controlled through procfile /proc/net/pktgen/kpktgend_X.
+
+Example: /proc/net/pktgen/kpktgend_0::
+
+ Running:
+ Stopped: eth4@0
+ Result: OK: add_device=eth4@0
+
+Most important are the devices assigned to the thread.
+
+The two basic thread commands are:
+
+ * add_device DEVICE@NAME -- adds a single device
+ * rem_device_all         -- remove all associated devices
+
+When adding a device to a thread, a corresponding procfile is created
+which is used for configuring this device. Thus, device names need to
+be unique.
+
+To support adding the same device to multiple threads, which is useful
+with multi queue NICs, the device naming scheme is extended with "@":
+device@something
+
+The part after "@" can be anything, but it is custom to use the thread
+number.
+
+Viewing devices
+===============
+
+The Params section holds configured information.  The Current section
+holds running statistics.  The Result is printed after a run or after
+interruption.  Example::
+
+    /proc/net/pktgen/eth4@0
+
+    Params: count 100000  min_pkt_size: 60  max_pkt_size: 60
+	frags: 0  delay: 0  clone_skb: 64  ifname: eth4@0
+	flows: 0 flowlen: 0
+	queue_map_min: 0  queue_map_max: 0
+	dst_min: 192.168.81.2  dst_max:
+	src_min:   src_max:
+	src_mac: 90:e2:ba:0a:56:b4 dst_mac: 00:1b:21:3c:9d:f8
+	udp_src_min: 9  udp_src_max: 109  udp_dst_min: 9  udp_dst_max: 9
+	src_mac_count: 0  dst_mac_count: 0
+	Flags: UDPSRC_RND  NO_TIMESTAMP  QUEUE_MAP_CPU
+    Current:
+	pkts-sofar: 100000  errors: 0
+	started: 623913381008us  stopped: 623913396439us idle: 25us
+	seq_num: 100001  cur_dst_mac_offset: 0  cur_src_mac_offset: 0
+	cur_saddr: 192.168.8.3  cur_daddr: 192.168.81.2
+	cur_udp_dst: 9  cur_udp_src: 42
+	cur_queue_map: 0
+	flows: 0
+    Result: OK: 15430(c15405+d25) usec, 100000 (60byte,0frags)
+    6480562pps 3110Mb/sec (3110669760bps) errors: 0
+
+
+Configuring devices
+===================
+This is done via the /proc interface, and most easily done via pgset
+as defined in the sample scripts.
+You need to specify PGDEV environment variable to use functions from sample
+scripts, i.e.::
+
+    export PGDEV=/proc/net/pktgen/eth4@0
+    source samples/pktgen/functions.sh
+
+Examples::
+
+ pg_ctrl start           starts injection.
+ pg_ctrl stop            aborts injection. Also, ^C aborts generator.
+
+ pgset "clone_skb 1"     sets the number of copies of the same packet
+ pgset "clone_skb 0"     use single SKB for all transmits
+ pgset "burst 8"         uses xmit_more API to queue 8 copies of the same
+			 packet and update HW tx queue tail pointer once.
+			 "burst 1" is the default
+ pgset "pkt_size 9014"   sets packet size to 9014
+ pgset "frags 5"         packet will consist of 5 fragments
+ pgset "count 200000"    sets number of packets to send, set to zero
+			 for continuous sends until explicitly stopped.
+
+ pgset "delay 5000"      adds delay to hard_start_xmit(). nanoseconds
+
+ pgset "dst 10.0.0.1"    sets IP destination address
+			 (BEWARE! This generator is very aggressive!)
+
+ pgset "dst_min 10.0.0.1"            Same as dst
+ pgset "dst_max 10.0.0.254"          Set the maximum destination IP.
+ pgset "src_min 10.0.0.1"            Set the minimum (or only) source IP.
+ pgset "src_max 10.0.0.254"          Set the maximum source IP.
+ pgset "dst6 fec0::1"     IPV6 destination address
+ pgset "src6 fec0::2"     IPV6 source address
+ pgset "dstmac 00:00:00:00:00:00"    sets MAC destination address
+ pgset "srcmac 00:00:00:00:00:00"    sets MAC source address
+
+ pgset "queue_map_min 0" Sets the min value of tx queue interval
+ pgset "queue_map_max 7" Sets the max value of tx queue interval, for multiqueue devices
+			 To select queue 1 of a given device,
+			 use queue_map_min=1 and queue_map_max=1
+
+ pgset "src_mac_count 1" Sets the number of MACs we'll range through.
+			 The 'minimum' MAC is what you set with srcmac.
+
+ pgset "dst_mac_count 1" Sets the number of MACs we'll range through.
+			 The 'minimum' MAC is what you set with dstmac.
+
+ pgset "flag [name]"     Set a flag to determine behaviour.  Current flags
+			 are: IPSRC_RND # IP source is random (between min/max)
+			      IPDST_RND # IP destination is random
+			      UDPSRC_RND, UDPDST_RND,
+			      MACSRC_RND, MACDST_RND
+			      TXSIZE_RND, IPV6,
+			      MPLS_RND, VID_RND, SVID_RND
+			      FLOW_SEQ,
+			      QUEUE_MAP_RND # queue map random
+			      QUEUE_MAP_CPU # queue map mirrors smp_processor_id()
+			      UDPCSUM,
+			      IPSEC # IPsec encapsulation (needs CONFIG_XFRM)
+			      NODE_ALLOC # node specific memory allocation
+			      NO_TIMESTAMP # disable timestamping
+ pgset 'flag ![name]'    Clear a flag to determine behaviour.
+			 Note that you might need to use single quote in
+			 interactive mode, so that your shell wouldn't expand
+			 the specified flag as a history command.
+
+ pgset "spi [SPI_VALUE]" Set specific SA used to transform packet.
+
+ pgset "udp_src_min 9"   set UDP source port min, If < udp_src_max, then
+			 cycle through the port range.
+
+ pgset "udp_src_max 9"   set UDP source port max.
+ pgset "udp_dst_min 9"   set UDP destination port min, If < udp_dst_max, then
+			 cycle through the port range.
+ pgset "udp_dst_max 9"   set UDP destination port max.
+
+ pgset "mpls 0001000a,0002000a,0000000a" set MPLS labels (in this example
+					 outer label=16,middle label=32,
+					 inner label=0 (IPv4 NULL)) Note that
+					 there must be no spaces between the
+					 arguments. Leading zeros are required.
+					 Do not set the bottom of stack bit,
+					 that's done automatically. If you do
+					 set the bottom of stack bit, that
+					 indicates that you want to randomly
+					 generate that address and the flag
+					 MPLS_RND will be turned on. You
+					 can have any mix of random and fixed
+					 labels in the label stack.
+
+ pgset "mpls 0"		  turn off mpls (or any invalid argument works too!)
+
+ pgset "vlan_id 77"       set VLAN ID 0-4095
+ pgset "vlan_p 3"         set priority bit 0-7 (default 0)
+ pgset "vlan_cfi 0"       set canonical format identifier 0-1 (default 0)
+
+ pgset "svlan_id 22"      set SVLAN ID 0-4095
+ pgset "svlan_p 3"        set priority bit 0-7 (default 0)
+ pgset "svlan_cfi 0"      set canonical format identifier 0-1 (default 0)
+
+ pgset "vlan_id 9999"     > 4095 remove vlan and svlan tags
+ pgset "svlan 9999"       > 4095 remove svlan tag
+
+
+ pgset "tos XX"           set former IPv4 TOS field (e.g. "tos 28" for AF11 no ECN, default 00)
+ pgset "traffic_class XX" set former IPv6 TRAFFIC CLASS (e.g. "traffic_class B8" for EF no ECN, default 00)
+
+ pgset "rate 300M"        set rate to 300 Mb/s
+ pgset "ratep 1000000"    set rate to 1Mpps
+
+ pgset "xmit_mode netif_receive"  RX inject into stack netif_receive_skb()
+				  Works with "burst" but not with "clone_skb".
+				  Default xmit_mode is "start_xmit".
+
+Sample scripts
+==============
+
+A collection of tutorial scripts and helpers for pktgen is in the
+samples/pktgen directory. The helper parameters.sh file support easy
+and consistent parameter parsing across the sample scripts.
+
+Usage example and help::
+
+ ./pktgen_sample01_simple.sh -i eth4 -m 00:1B:21:3C:9D:F8 -d 192.168.8.2
+
+Usage:::
+
+  ./pktgen_sample01_simple.sh [-vx] -i ethX
+
+  -i : ($DEV)       output interface/device (required)
+  -s : ($PKT_SIZE)  packet size
+  -d : ($DEST_IP)   destination IP
+  -m : ($DST_MAC)   destination MAC-addr
+  -t : ($THREADS)   threads to start
+  -c : ($SKB_CLONE) SKB clones send before alloc new SKB
+  -b : ($BURST)     HW level bursting of SKBs
+  -v : ($VERBOSE)   verbose
+  -x : ($DEBUG)     debug
+
+The global variables being set are also listed.  E.g. the required
+interface/device parameter "-i" sets variable $DEV.  Copy the
+pktgen_sampleXX scripts and modify them to fit your own needs.
+
+The old scripts::
+
+    pktgen.conf-1-2                  # 1 CPU 2 dev
+    pktgen.conf-1-1-rdos             # 1 CPU 1 dev w. route DoS
+    pktgen.conf-1-1-ip6              # 1 CPU 1 dev ipv6
+    pktgen.conf-1-1-ip6-rdos         # 1 CPU 1 dev ipv6  w. route DoS
+    pktgen.conf-1-1-flows            # 1 CPU 1 dev multiple flows.
+
+
+Interrupt affinity
+===================
+Note that when adding devices to a specific CPU it is a good idea to
+also assign /proc/irq/XX/smp_affinity so that the TX interrupts are bound
+to the same CPU.  This reduces cache bouncing when freeing skbs.
+
+Plus using the device flag QUEUE_MAP_CPU, which maps the SKBs TX queue
+to the running threads CPU (directly from smp_processor_id()).
+
+Enable IPsec
+============
+Default IPsec transformation with ESP encapsulation plus transport mode
+can be enabled by simply setting::
+
+    pgset "flag IPSEC"
+    pgset "flows 1"
+
+To avoid breaking existing testbed scripts for using AH type and tunnel mode,
+you can use "pgset spi SPI_VALUE" to specify which transformation mode
+to employ.
+
+
+Current commands and configuration options
+==========================================
+
+**Pgcontrol commands**::
+
+    start
+    stop
+    reset
+
+**Thread commands**::
+
+    add_device
+    rem_device_all
+
+
+**Device commands**::
+
+    count
+    clone_skb
+    burst
+    debug
+
+    frags
+    delay
+
+    src_mac_count
+    dst_mac_count
+
+    pkt_size
+    min_pkt_size
+    max_pkt_size
+
+    queue_map_min
+    queue_map_max
+    skb_priority
+
+    tos           (ipv4)
+    traffic_class (ipv6)
+
+    mpls
+
+    udp_src_min
+    udp_src_max
+
+    udp_dst_min
+    udp_dst_max
+
+    node
+
+    flag
+    IPSRC_RND
+    IPDST_RND
+    UDPSRC_RND
+    UDPDST_RND
+    MACSRC_RND
+    MACDST_RND
+    TXSIZE_RND
+    IPV6
+    MPLS_RND
+    VID_RND
+    SVID_RND
+    FLOW_SEQ
+    QUEUE_MAP_RND
+    QUEUE_MAP_CPU
+    UDPCSUM
+    IPSEC
+    NODE_ALLOC
+    NO_TIMESTAMP
+
+    spi (ipsec)
+
+    dst_min
+    dst_max
+
+    src_min
+    src_max
+
+    dst_mac
+    src_mac
+
+    clear_counters
+
+    src6
+    dst6
+    dst6_max
+    dst6_min
+
+    flows
+    flowlen
+
+    rate
+    ratep
+
+    xmit_mode <start_xmit|netif_receive>
+
+    vlan_cfi
+    vlan_id
+    vlan_p
+
+    svlan_cfi
+    svlan_id
+    svlan_p
+
+
+References:
+
+- ftp://robur.slu.se/pub/Linux/net-development/pktgen-testing/
+- tp://robur.slu.se/pub/Linux/net-development/pktgen-testing/examples/
+
+Paper from Linux-Kongress in Erlangen 2004.
+- ftp://robur.slu.se/pub/Linux/net-development/pktgen-testing/pktgen_paper.pdf
+
+Thanks to:
+
+Grant Grundler for testing on IA-64 and parisc, Harald Welte,  Lennert Buytenhek
+Stephen Hemminger, Andi Kleen, Dave Miller and many others.
+
+
+Good luck with the linux net-development.
diff --git a/Documentation/networking/pktgen.txt b/Documentation/networking/pktgen.txt
deleted file mode 100644
index d2fd78f85aa4..000000000000
--- a/Documentation/networking/pktgen.txt
+++ /dev/null
@@ -1,400 +0,0 @@
-
-
-                  HOWTO for the linux packet generator
-                  ------------------------------------
-
-Enable CONFIG_NET_PKTGEN to compile and build pktgen either in-kernel
-or as a module.  A module is preferred; modprobe pktgen if needed.  Once
-running, pktgen creates a thread for each CPU with affinity to that CPU.
-Monitoring and controlling is done via /proc.  It is easiest to select a
-suitable sample script and configure that.
-
-On a dual CPU:
-
-ps aux | grep pkt
-root       129  0.3  0.0     0    0 ?        SW    2003 523:20 [kpktgend_0]
-root       130  0.3  0.0     0    0 ?        SW    2003 509:50 [kpktgend_1]
-
-
-For monitoring and control pktgen creates:
-	/proc/net/pktgen/pgctrl
-	/proc/net/pktgen/kpktgend_X
-        /proc/net/pktgen/ethX
-
-
-Tuning NIC for max performance
-==============================
-
-The default NIC settings are (likely) not tuned for pktgen's artificial
-overload type of benchmarking, as this could hurt the normal use-case.
-
-Specifically increasing the TX ring buffer in the NIC:
- # ethtool -G ethX tx 1024
-
-A larger TX ring can improve pktgen's performance, while it can hurt
-in the general case, 1) because the TX ring buffer might get larger
-than the CPU's L1/L2 cache, 2) because it allows more queueing in the
-NIC HW layer (which is bad for bufferbloat).
-
-One should hesitate to conclude that packets/descriptors in the HW
-TX ring cause delay.  Drivers usually delay cleaning up the
-ring-buffers for various performance reasons, and packets stalling
-the TX ring might just be waiting for cleanup.
-
-This cleanup issue is specifically the case for the driver ixgbe
-(Intel 82599 chip).  This driver (ixgbe) combines TX+RX ring cleanups,
-and the cleanup interval is affected by the ethtool --coalesce setting
-of parameter "rx-usecs".
-
-For ixgbe use e.g. "30" resulting in approx 33K interrupts/sec (1/30*10^6):
- # ethtool -C ethX rx-usecs 30
-
-
-Kernel threads
-==============
-Pktgen creates a thread for each CPU with affinity to that CPU.
-Which is controlled through procfile /proc/net/pktgen/kpktgend_X.
-
-Example: /proc/net/pktgen/kpktgend_0
-
- Running:
- Stopped: eth4@0
- Result: OK: add_device=eth4@0
-
-Most important are the devices assigned to the thread.
-
-The two basic thread commands are:
- * add_device DEVICE@NAME -- adds a single device
- * rem_device_all         -- remove all associated devices
-
-When adding a device to a thread, a corresponding procfile is created
-which is used for configuring this device. Thus, device names need to
-be unique.
-
-To support adding the same device to multiple threads, which is useful
-with multi queue NICs, the device naming scheme is extended with "@":
- device@something
-
-The part after "@" can be anything, but it is custom to use the thread
-number.
-
-Viewing devices
-===============
-
-The Params section holds configured information.  The Current section
-holds running statistics.  The Result is printed after a run or after
-interruption.  Example:
-
-/proc/net/pktgen/eth4@0
-
- Params: count 100000  min_pkt_size: 60  max_pkt_size: 60
-     frags: 0  delay: 0  clone_skb: 64  ifname: eth4@0
-     flows: 0 flowlen: 0
-     queue_map_min: 0  queue_map_max: 0
-     dst_min: 192.168.81.2  dst_max:
-     src_min:   src_max:
-     src_mac: 90:e2:ba:0a:56:b4 dst_mac: 00:1b:21:3c:9d:f8
-     udp_src_min: 9  udp_src_max: 109  udp_dst_min: 9  udp_dst_max: 9
-     src_mac_count: 0  dst_mac_count: 0
-     Flags: UDPSRC_RND  NO_TIMESTAMP  QUEUE_MAP_CPU
- Current:
-     pkts-sofar: 100000  errors: 0
-     started: 623913381008us  stopped: 623913396439us idle: 25us
-     seq_num: 100001  cur_dst_mac_offset: 0  cur_src_mac_offset: 0
-     cur_saddr: 192.168.8.3  cur_daddr: 192.168.81.2
-     cur_udp_dst: 9  cur_udp_src: 42
-     cur_queue_map: 0
-     flows: 0
- Result: OK: 15430(c15405+d25) usec, 100000 (60byte,0frags)
-  6480562pps 3110Mb/sec (3110669760bps) errors: 0
-
-
-Configuring devices
-===================
-This is done via the /proc interface, and most easily done via pgset
-as defined in the sample scripts.
-You need to specify PGDEV environment variable to use functions from sample
-scripts, i.e.:
-export PGDEV=/proc/net/pktgen/eth4@0
-source samples/pktgen/functions.sh
-
-Examples:
-
- pg_ctrl start           starts injection.
- pg_ctrl stop            aborts injection. Also, ^C aborts generator.
-
- pgset "clone_skb 1"     sets the number of copies of the same packet
- pgset "clone_skb 0"     use single SKB for all transmits
- pgset "burst 8"         uses xmit_more API to queue 8 copies of the same
-                         packet and update HW tx queue tail pointer once.
-                         "burst 1" is the default
- pgset "pkt_size 9014"   sets packet size to 9014
- pgset "frags 5"         packet will consist of 5 fragments
- pgset "count 200000"    sets number of packets to send, set to zero
-                         for continuous sends until explicitly stopped.
-
- pgset "delay 5000"      adds delay to hard_start_xmit(). nanoseconds
-
- pgset "dst 10.0.0.1"    sets IP destination address
-                         (BEWARE! This generator is very aggressive!)
-
- pgset "dst_min 10.0.0.1"            Same as dst
- pgset "dst_max 10.0.0.254"          Set the maximum destination IP.
- pgset "src_min 10.0.0.1"            Set the minimum (or only) source IP.
- pgset "src_max 10.0.0.254"          Set the maximum source IP.
- pgset "dst6 fec0::1"     IPV6 destination address
- pgset "src6 fec0::2"     IPV6 source address
- pgset "dstmac 00:00:00:00:00:00"    sets MAC destination address
- pgset "srcmac 00:00:00:00:00:00"    sets MAC source address
-
- pgset "queue_map_min 0" Sets the min value of tx queue interval
- pgset "queue_map_max 7" Sets the max value of tx queue interval, for multiqueue devices
-                         To select queue 1 of a given device,
-                         use queue_map_min=1 and queue_map_max=1
-
- pgset "src_mac_count 1" Sets the number of MACs we'll range through.
-                         The 'minimum' MAC is what you set with srcmac.
-
- pgset "dst_mac_count 1" Sets the number of MACs we'll range through.
-                         The 'minimum' MAC is what you set with dstmac.
-
- pgset "flag [name]"     Set a flag to determine behaviour.  Current flags
-                         are: IPSRC_RND # IP source is random (between min/max)
-                              IPDST_RND # IP destination is random
-                              UDPSRC_RND, UDPDST_RND,
-                              MACSRC_RND, MACDST_RND
-                              TXSIZE_RND, IPV6,
-                              MPLS_RND, VID_RND, SVID_RND
-                              FLOW_SEQ,
-                              QUEUE_MAP_RND # queue map random
-                              QUEUE_MAP_CPU # queue map mirrors smp_processor_id()
-                              UDPCSUM,
-                              IPSEC # IPsec encapsulation (needs CONFIG_XFRM)
-                              NODE_ALLOC # node specific memory allocation
-                              NO_TIMESTAMP # disable timestamping
- pgset 'flag ![name]'    Clear a flag to determine behaviour.
-                         Note that you might need to use single quote in
-                         interactive mode, so that your shell wouldn't expand
-                         the specified flag as a history command.
-
- pgset "spi [SPI_VALUE]" Set specific SA used to transform packet.
-
- pgset "udp_src_min 9"   set UDP source port min, If < udp_src_max, then
-                         cycle through the port range.
-
- pgset "udp_src_max 9"   set UDP source port max.
- pgset "udp_dst_min 9"   set UDP destination port min, If < udp_dst_max, then
-                         cycle through the port range.
- pgset "udp_dst_max 9"   set UDP destination port max.
-
- pgset "mpls 0001000a,0002000a,0000000a" set MPLS labels (in this example
-                                         outer label=16,middle label=32,
-					 inner label=0 (IPv4 NULL)) Note that
-					 there must be no spaces between the
-					 arguments. Leading zeros are required.
-					 Do not set the bottom of stack bit,
-					 that's done automatically. If you do
-					 set the bottom of stack bit, that
-					 indicates that you want to randomly
-					 generate that address and the flag
-					 MPLS_RND will be turned on. You
-					 can have any mix of random and fixed
-					 labels in the label stack.
-
- pgset "mpls 0"		  turn off mpls (or any invalid argument works too!)
-
- pgset "vlan_id 77"       set VLAN ID 0-4095
- pgset "vlan_p 3"         set priority bit 0-7 (default 0)
- pgset "vlan_cfi 0"       set canonical format identifier 0-1 (default 0)
-
- pgset "svlan_id 22"      set SVLAN ID 0-4095
- pgset "svlan_p 3"        set priority bit 0-7 (default 0)
- pgset "svlan_cfi 0"      set canonical format identifier 0-1 (default 0)
-
- pgset "vlan_id 9999"     > 4095 remove vlan and svlan tags
- pgset "svlan 9999"       > 4095 remove svlan tag
-
-
- pgset "tos XX"           set former IPv4 TOS field (e.g. "tos 28" for AF11 no ECN, default 00)
- pgset "traffic_class XX" set former IPv6 TRAFFIC CLASS (e.g. "traffic_class B8" for EF no ECN, default 00)
-
- pgset "rate 300M"        set rate to 300 Mb/s
- pgset "ratep 1000000"    set rate to 1Mpps
-
- pgset "xmit_mode netif_receive"  RX inject into stack netif_receive_skb()
-				  Works with "burst" but not with "clone_skb".
-				  Default xmit_mode is "start_xmit".
-
-Sample scripts
-==============
-
-A collection of tutorial scripts and helpers for pktgen is in the
-samples/pktgen directory. The helper parameters.sh file support easy
-and consistent parameter parsing across the sample scripts.
-
-Usage example and help:
- ./pktgen_sample01_simple.sh -i eth4 -m 00:1B:21:3C:9D:F8 -d 192.168.8.2
-
-Usage: ./pktgen_sample01_simple.sh [-vx] -i ethX
-  -i : ($DEV)       output interface/device (required)
-  -s : ($PKT_SIZE)  packet size
-  -d : ($DEST_IP)   destination IP
-  -m : ($DST_MAC)   destination MAC-addr
-  -t : ($THREADS)   threads to start
-  -c : ($SKB_CLONE) SKB clones send before alloc new SKB
-  -b : ($BURST)     HW level bursting of SKBs
-  -v : ($VERBOSE)   verbose
-  -x : ($DEBUG)     debug
-
-The global variables being set are also listed.  E.g. the required
-interface/device parameter "-i" sets variable $DEV.  Copy the
-pktgen_sampleXX scripts and modify them to fit your own needs.
-
-The old scripts:
-
-pktgen.conf-1-2                  # 1 CPU 2 dev
-pktgen.conf-1-1-rdos             # 1 CPU 1 dev w. route DoS 
-pktgen.conf-1-1-ip6              # 1 CPU 1 dev ipv6
-pktgen.conf-1-1-ip6-rdos         # 1 CPU 1 dev ipv6  w. route DoS
-pktgen.conf-1-1-flows            # 1 CPU 1 dev multiple flows.
-
-
-Interrupt affinity
-===================
-Note that when adding devices to a specific CPU it is a good idea to
-also assign /proc/irq/XX/smp_affinity so that the TX interrupts are bound
-to the same CPU.  This reduces cache bouncing when freeing skbs.
-
-Plus using the device flag QUEUE_MAP_CPU, which maps the SKBs TX queue
-to the running threads CPU (directly from smp_processor_id()).
-
-Enable IPsec
-============
-Default IPsec transformation with ESP encapsulation plus transport mode
-can be enabled by simply setting:
-
-pgset "flag IPSEC"
-pgset "flows 1"
-
-To avoid breaking existing testbed scripts for using AH type and tunnel mode,
-you can use "pgset spi SPI_VALUE" to specify which transformation mode
-to employ.
-
-
-Current commands and configuration options
-==========================================
-
-** Pgcontrol commands:
-
-start
-stop
-reset
-
-** Thread commands:
-
-add_device
-rem_device_all
-
-
-** Device commands:
-
-count
-clone_skb
-burst
-debug
-
-frags
-delay
-
-src_mac_count
-dst_mac_count
-
-pkt_size
-min_pkt_size
-max_pkt_size
-
-queue_map_min
-queue_map_max
-skb_priority
-
-tos           (ipv4)
-traffic_class (ipv6)
-
-mpls
-
-udp_src_min
-udp_src_max
-
-udp_dst_min
-udp_dst_max
-
-node
-
-flag
-  IPSRC_RND
-  IPDST_RND
-  UDPSRC_RND
-  UDPDST_RND
-  MACSRC_RND
-  MACDST_RND
-  TXSIZE_RND
-  IPV6
-  MPLS_RND
-  VID_RND
-  SVID_RND
-  FLOW_SEQ
-  QUEUE_MAP_RND
-  QUEUE_MAP_CPU
-  UDPCSUM
-  IPSEC
-  NODE_ALLOC
-  NO_TIMESTAMP
-
-spi (ipsec)
-
-dst_min
-dst_max
-
-src_min
-src_max
-
-dst_mac
-src_mac
-
-clear_counters
-
-src6
-dst6
-dst6_max
-dst6_min
-
-flows
-flowlen
-
-rate
-ratep
-
-xmit_mode <start_xmit|netif_receive>
-
-vlan_cfi
-vlan_id
-vlan_p
-
-svlan_cfi
-svlan_id
-svlan_p
-
-
-References:
-ftp://robur.slu.se/pub/Linux/net-development/pktgen-testing/
-ftp://robur.slu.se/pub/Linux/net-development/pktgen-testing/examples/
-
-Paper from Linux-Kongress in Erlangen 2004.
-ftp://robur.slu.se/pub/Linux/net-development/pktgen-testing/pktgen_paper.pdf
-
-Thanks to:
-Grant Grundler for testing on IA-64 and parisc, Harald Welte,  Lennert Buytenhek
-Stephen Hemminger, Andi Kleen, Dave Miller and many others.
-
-
-Good luck with the linux net-development.
diff --git a/net/Kconfig b/net/Kconfig
index 8b1f85820a6b..c5ba2d180c43 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -344,7 +344,7 @@ config NET_PKTGEN
 	  what was just said, you don't need it: say N.
 
 	  Documentation on how to use the packet generator can be found
-	  at <file:Documentation/networking/pktgen.txt>.
+	  at <file:Documentation/networking/pktgen.rst>.
 
 	  To compile this code as a module, choose M here: the
 	  module will be called pktgen.
diff --git a/net/core/pktgen.c b/net/core/pktgen.c
index 08e2811b5274..b53b6d38c4df 100644
--- a/net/core/pktgen.c
+++ b/net/core/pktgen.c
@@ -56,7 +56,7 @@
  * Integrated to 2.5.x 021029 --Lucio Maciel (luciomaciel@zipmail.com.br)
  *
  * 021124 Finished major redesign and rewrite for new functionality.
- * See Documentation/networking/pktgen.txt for how to use this.
+ * See Documentation/networking/pktgen.rst for how to use this.
  *
  * The new operation:
  * For each CPU one thread/process is created at start. This process checks
diff --git a/samples/pktgen/README.rst b/samples/pktgen/README.rst
index 3f6483e8b2df..f9c53ca5cf93 100644
--- a/samples/pktgen/README.rst
+++ b/samples/pktgen/README.rst
@@ -3,7 +3,7 @@ Sample and benchmark scripts for pktgen (packet generator)
 This directory contains some pktgen sample and benchmark scripts, that
 can easily be copied and adjusted for your own use-case.
 
-General doc is located in kernel: Documentation/networking/pktgen.txt
+General doc is located in kernel: Documentation/networking/pktgen.rst
 
 Helper include files
 ====================
-- 
cgit v1.2.3-59-g8ed1b


From 32c01266c0aab060550f592b3fe59405be8ab022 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Thu, 30 Apr 2020 18:04:14 +0200
Subject: docs: networking: convert PLIP.txt to ReST

- add SPDX header;
- mark code blocks and literals as such;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/PLIP.txt  | 215 -----------------------------------
 Documentation/networking/index.rst |   1 +
 Documentation/networking/plip.rst  | 222 +++++++++++++++++++++++++++++++++++++
 drivers/net/plip/Kconfig           |   2 +-
 4 files changed, 224 insertions(+), 216 deletions(-)
 delete mode 100644 Documentation/networking/PLIP.txt
 create mode 100644 Documentation/networking/plip.rst

diff --git a/Documentation/networking/PLIP.txt b/Documentation/networking/PLIP.txt
deleted file mode 100644
index ad7e3f7c3bbf..000000000000
--- a/Documentation/networking/PLIP.txt
+++ /dev/null
@@ -1,215 +0,0 @@
-PLIP: The Parallel Line Internet Protocol Device
-
-Donald Becker (becker@super.org)
-I.D.A. Supercomputing Research Center, Bowie MD 20715
-
-At some point T. Thorn will probably contribute text,
-Tommy Thorn (tthorn@daimi.aau.dk)
-
-PLIP Introduction
------------------
-
-This document describes the parallel port packet pusher for Net/LGX.
-This device interface allows a point-to-point connection between two
-parallel ports to appear as a IP network interface.
-
-What is PLIP?
-=============
-
-PLIP is Parallel Line IP, that is, the transportation of IP packages
-over a parallel port. In the case of a PC, the obvious choice is the
-printer port.  PLIP is a non-standard, but [can use] uses the standard
-LapLink null-printer cable [can also work in turbo mode, with a PLIP
-cable]. [The protocol used to pack IP packages, is a simple one
-initiated by Crynwr.]
-
-Advantages of PLIP
-==================
-
-It's cheap, it's available everywhere, and it's easy.
-
-The PLIP cable is all that's needed to connect two Linux boxes, and it
-can be built for very few bucks.
-
-Connecting two Linux boxes takes only a second's decision and a few
-minutes' work, no need to search for a [supported] netcard. This might
-even be especially important in the case of notebooks, where netcards
-are not easily available.
-
-Not requiring a netcard also means that apart from connecting the
-cables, everything else is software configuration [which in principle
-could be made very easy.]
-
-Disadvantages of PLIP
-=====================
-
-Doesn't work over a modem, like SLIP and PPP. Limited range, 15 m.
-Can only be used to connect three (?) Linux boxes. Doesn't connect to
-an existing Ethernet. Isn't standard (not even de facto standard, like
-SLIP).
-
-Performance
-===========
-
-PLIP easily outperforms Ethernet cards....(ups, I was dreaming, but
-it *is* getting late. EOB)
-
-PLIP driver details
--------------------
-
-The Linux PLIP driver is an implementation of the original Crynwr protocol,
-that uses the parallel port subsystem of the kernel in order to properly
-share parallel ports between PLIP and other services.
-
-IRQs and trigger timeouts
-=========================
-
-When a parallel port used for a PLIP driver has an IRQ configured to it, the
-PLIP driver is signaled whenever data is sent to it via the cable, such that
-when no data is available, the driver isn't being used.
-
-However, on some machines it is hard, if not impossible, to configure an IRQ
-to a certain parallel port, mainly because it is used by some other device.
-On these machines, the PLIP driver can be used in IRQ-less mode, where
-the PLIP driver would constantly poll the parallel port for data waiting,
-and if such data is available, process it. This mode is less efficient than
-the IRQ mode, because the driver has to check the parallel port many times
-per second, even when no data at all is sent. Some rough measurements
-indicate that there isn't a noticeable performance drop when using IRQ-less
-mode as compared to IRQ mode as far as the data transfer speed is involved.
-There is a performance drop on the machine hosting the driver.
-
-When the PLIP driver is used in IRQ mode, the timeout used for triggering a
-data transfer (the maximal time the PLIP driver would allow the other side
-before announcing a timeout, when trying to handshake a transfer of some
-data) is, by default, 500usec. As IRQ delivery is more or less immediate,
-this timeout is quite sufficient. 
-
-When in IRQ-less mode, the PLIP driver polls the parallel port HZ times
-per second (where HZ is typically 100 on most platforms, and 1024 on an
-Alpha, as of this writing). Between two such polls, there are 10^6/HZ usecs.
-On an i386, for example, 10^6/100 = 10000usec. It is easy to see that it is
-quite possible for the trigger timeout to expire between two such polls, as
-the timeout is only 500usec long. As a result, it is required to change the
-trigger timeout on the *other* side of a PLIP connection, to about
-10^6/HZ usecs. If both sides of a PLIP connection are used in IRQ-less mode,
-this timeout is required on both sides.
-
-It appears that in practice, the trigger timeout can be shorter than in the
-above calculation. It isn't an important issue, unless the wire is faulty,
-in which case a long timeout would stall the machine when, for whatever
-reason, bits are dropped.
-
-A utility that can perform this change in Linux is plipconfig, which is part
-of the net-tools package (its location can be found in the
-Documentation/Changes file). An example command would be
-'plipconfig plipX trigger 10000', where plipX is the appropriate
-PLIP device.
-
-PLIP hardware interconnection
------------------------------
-
-PLIP uses several different data transfer methods.  The first (and the
-only one implemented in the early version of the code) uses a standard
-printer "null" cable to transfer data four bits at a time using
-data bit outputs connected to status bit inputs.
-
-The second data transfer method relies on both machines having
-bi-directional parallel ports, rather than output-only ``printer''
-ports.  This allows byte-wide transfers and avoids reconstructing
-nibbles into bytes, leading to much faster transfers.
-
-Parallel Transfer Mode 0 Cable
-==============================
-
-The cable for the first transfer mode is a standard
-printer "null" cable which transfers data four bits at a time using
-data bit outputs of the first port (machine T) connected to the
-status bit inputs of the second port (machine R).  There are five
-status inputs, and they are used as four data inputs and a clock (data
-strobe) input, arranged so that the data input bits appear as contiguous
-bits with standard status register implementation.
-
-A cable that implements this protocol is available commercially as a
-"Null Printer" or "Turbo Laplink" cable.  It can be constructed with
-two DB-25 male connectors symmetrically connected as follows:
-
-    STROBE output	1*
-    D0->ERROR	2 - 15		15 - 2
-    D1->SLCT	3 - 13		13 - 3
-    D2->PAPOUT	4 - 12		12 - 4
-    D3->ACK	5 - 10		10 - 5
-    D4->BUSY	6 - 11		11 - 6
-    D5,D6,D7 are   7*, 8*, 9*
-    AUTOFD output 14*
-    INIT   output 16*
-    SLCTIN	17 - 17
-    extra grounds are 18*,19*,20*,21*,22*,23*,24*
-    GROUND	25 - 25
-* Do not connect these pins on either end
-
-If the cable you are using has a metallic shield it should be
-connected to the metallic DB-25 shell at one end only.
-
-Parallel Transfer Mode 1
-========================
-
-The second data transfer method relies on both machines having
-bi-directional parallel ports, rather than output-only ``printer''
-ports.  This allows byte-wide transfers, and avoids reconstructing
-nibbles into bytes.  This cable should not be used on unidirectional
-``printer'' (as opposed to ``parallel'') ports or when the machine
-isn't configured for PLIP, as it will result in output driver
-conflicts and the (unlikely) possibility of damage.
-
-The cable for this transfer mode should be constructed as follows:
-
-    STROBE->BUSY 1 - 11
-    D0->D0	2 - 2
-    D1->D1	3 - 3
-    D2->D2	4 - 4
-    D3->D3	5 - 5
-    D4->D4	6 - 6
-    D5->D5	7 - 7
-    D6->D6	8 - 8
-    D7->D7	9 - 9
-    INIT -> ACK  16 - 10
-    AUTOFD->PAPOUT 14 - 12
-    SLCT->SLCTIN 13 - 17
-    GND->ERROR	18 - 15
-    extra grounds are 19*,20*,21*,22*,23*,24*
-    GROUND	25 - 25
-* Do not connect these pins on either end
-
-Once again, if the cable you are using has a metallic shield it should
-be connected to the metallic DB-25 shell at one end only.
-
-PLIP Mode 0 transfer protocol
-=============================
-
-The PLIP driver is compatible with the "Crynwr" parallel port transfer
-standard in Mode 0.  That standard specifies the following protocol:
-
-   send header nibble '0x8'
-   count-low octet
-   count-high octet
-   ... data octets
-   checksum octet
-
-Each octet is sent as
-	<wait for rx. '0x1?'>	<send 0x10+(octet&0x0F)>
-	<wait for rx. '0x0?'>	<send 0x00+((octet>>4)&0x0F)>
-
-To start a transfer the transmitting machine outputs a nibble 0x08.
-That raises the ACK line, triggering an interrupt in the receiving
-machine.  The receiving machine disables interrupts and raises its own ACK
-line. 
-
-Restated:
-
-(OUT is bit 0-4, OUT.j is bit j from OUT. IN likewise)
-Send_Byte:
-   OUT := low nibble, OUT.4 := 1
-   WAIT FOR IN.4 = 1
-   OUT := high nibble, OUT.4 := 0
-   WAIT FOR IN.4 = 0
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 696181a96e3c..18bb10239cad 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -92,6 +92,7 @@ Contents:
    packet_mmap
    phonet
    pktgen
+   plip
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/plip.rst b/Documentation/networking/plip.rst
new file mode 100644
index 000000000000..0eda745050ff
--- /dev/null
+++ b/Documentation/networking/plip.rst
@@ -0,0 +1,222 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+================================================
+PLIP: The Parallel Line Internet Protocol Device
+================================================
+
+Donald Becker (becker@super.org)
+I.D.A. Supercomputing Research Center, Bowie MD 20715
+
+At some point T. Thorn will probably contribute text,
+Tommy Thorn (tthorn@daimi.aau.dk)
+
+PLIP Introduction
+-----------------
+
+This document describes the parallel port packet pusher for Net/LGX.
+This device interface allows a point-to-point connection between two
+parallel ports to appear as a IP network interface.
+
+What is PLIP?
+=============
+
+PLIP is Parallel Line IP, that is, the transportation of IP packages
+over a parallel port. In the case of a PC, the obvious choice is the
+printer port.  PLIP is a non-standard, but [can use] uses the standard
+LapLink null-printer cable [can also work in turbo mode, with a PLIP
+cable]. [The protocol used to pack IP packages, is a simple one
+initiated by Crynwr.]
+
+Advantages of PLIP
+==================
+
+It's cheap, it's available everywhere, and it's easy.
+
+The PLIP cable is all that's needed to connect two Linux boxes, and it
+can be built for very few bucks.
+
+Connecting two Linux boxes takes only a second's decision and a few
+minutes' work, no need to search for a [supported] netcard. This might
+even be especially important in the case of notebooks, where netcards
+are not easily available.
+
+Not requiring a netcard also means that apart from connecting the
+cables, everything else is software configuration [which in principle
+could be made very easy.]
+
+Disadvantages of PLIP
+=====================
+
+Doesn't work over a modem, like SLIP and PPP. Limited range, 15 m.
+Can only be used to connect three (?) Linux boxes. Doesn't connect to
+an existing Ethernet. Isn't standard (not even de facto standard, like
+SLIP).
+
+Performance
+===========
+
+PLIP easily outperforms Ethernet cards....(ups, I was dreaming, but
+it *is* getting late. EOB)
+
+PLIP driver details
+-------------------
+
+The Linux PLIP driver is an implementation of the original Crynwr protocol,
+that uses the parallel port subsystem of the kernel in order to properly
+share parallel ports between PLIP and other services.
+
+IRQs and trigger timeouts
+=========================
+
+When a parallel port used for a PLIP driver has an IRQ configured to it, the
+PLIP driver is signaled whenever data is sent to it via the cable, such that
+when no data is available, the driver isn't being used.
+
+However, on some machines it is hard, if not impossible, to configure an IRQ
+to a certain parallel port, mainly because it is used by some other device.
+On these machines, the PLIP driver can be used in IRQ-less mode, where
+the PLIP driver would constantly poll the parallel port for data waiting,
+and if such data is available, process it. This mode is less efficient than
+the IRQ mode, because the driver has to check the parallel port many times
+per second, even when no data at all is sent. Some rough measurements
+indicate that there isn't a noticeable performance drop when using IRQ-less
+mode as compared to IRQ mode as far as the data transfer speed is involved.
+There is a performance drop on the machine hosting the driver.
+
+When the PLIP driver is used in IRQ mode, the timeout used for triggering a
+data transfer (the maximal time the PLIP driver would allow the other side
+before announcing a timeout, when trying to handshake a transfer of some
+data) is, by default, 500usec. As IRQ delivery is more or less immediate,
+this timeout is quite sufficient.
+
+When in IRQ-less mode, the PLIP driver polls the parallel port HZ times
+per second (where HZ is typically 100 on most platforms, and 1024 on an
+Alpha, as of this writing). Between two such polls, there are 10^6/HZ usecs.
+On an i386, for example, 10^6/100 = 10000usec. It is easy to see that it is
+quite possible for the trigger timeout to expire between two such polls, as
+the timeout is only 500usec long. As a result, it is required to change the
+trigger timeout on the *other* side of a PLIP connection, to about
+10^6/HZ usecs. If both sides of a PLIP connection are used in IRQ-less mode,
+this timeout is required on both sides.
+
+It appears that in practice, the trigger timeout can be shorter than in the
+above calculation. It isn't an important issue, unless the wire is faulty,
+in which case a long timeout would stall the machine when, for whatever
+reason, bits are dropped.
+
+A utility that can perform this change in Linux is plipconfig, which is part
+of the net-tools package (its location can be found in the
+Documentation/Changes file). An example command would be
+'plipconfig plipX trigger 10000', where plipX is the appropriate
+PLIP device.
+
+PLIP hardware interconnection
+-----------------------------
+
+PLIP uses several different data transfer methods.  The first (and the
+only one implemented in the early version of the code) uses a standard
+printer "null" cable to transfer data four bits at a time using
+data bit outputs connected to status bit inputs.
+
+The second data transfer method relies on both machines having
+bi-directional parallel ports, rather than output-only ``printer``
+ports.  This allows byte-wide transfers and avoids reconstructing
+nibbles into bytes, leading to much faster transfers.
+
+Parallel Transfer Mode 0 Cable
+==============================
+
+The cable for the first transfer mode is a standard
+printer "null" cable which transfers data four bits at a time using
+data bit outputs of the first port (machine T) connected to the
+status bit inputs of the second port (machine R).  There are five
+status inputs, and they are used as four data inputs and a clock (data
+strobe) input, arranged so that the data input bits appear as contiguous
+bits with standard status register implementation.
+
+A cable that implements this protocol is available commercially as a
+"Null Printer" or "Turbo Laplink" cable.  It can be constructed with
+two DB-25 male connectors symmetrically connected as follows::
+
+    STROBE output	1*
+    D0->ERROR	2 - 15		15 - 2
+    D1->SLCT	3 - 13		13 - 3
+    D2->PAPOUT	4 - 12		12 - 4
+    D3->ACK	5 - 10		10 - 5
+    D4->BUSY	6 - 11		11 - 6
+    D5,D6,D7 are   7*, 8*, 9*
+    AUTOFD output 14*
+    INIT   output 16*
+    SLCTIN	17 - 17
+    extra grounds are 18*,19*,20*,21*,22*,23*,24*
+    GROUND	25 - 25
+
+    * Do not connect these pins on either end
+
+If the cable you are using has a metallic shield it should be
+connected to the metallic DB-25 shell at one end only.
+
+Parallel Transfer Mode 1
+========================
+
+The second data transfer method relies on both machines having
+bi-directional parallel ports, rather than output-only ``printer``
+ports.  This allows byte-wide transfers, and avoids reconstructing
+nibbles into bytes.  This cable should not be used on unidirectional
+``printer`` (as opposed to ``parallel``) ports or when the machine
+isn't configured for PLIP, as it will result in output driver
+conflicts and the (unlikely) possibility of damage.
+
+The cable for this transfer mode should be constructed as follows::
+
+    STROBE->BUSY 1 - 11
+    D0->D0	2 - 2
+    D1->D1	3 - 3
+    D2->D2	4 - 4
+    D3->D3	5 - 5
+    D4->D4	6 - 6
+    D5->D5	7 - 7
+    D6->D6	8 - 8
+    D7->D7	9 - 9
+    INIT -> ACK  16 - 10
+    AUTOFD->PAPOUT 14 - 12
+    SLCT->SLCTIN 13 - 17
+    GND->ERROR	18 - 15
+    extra grounds are 19*,20*,21*,22*,23*,24*
+    GROUND	25 - 25
+
+    * Do not connect these pins on either end
+
+Once again, if the cable you are using has a metallic shield it should
+be connected to the metallic DB-25 shell at one end only.
+
+PLIP Mode 0 transfer protocol
+=============================
+
+The PLIP driver is compatible with the "Crynwr" parallel port transfer
+standard in Mode 0.  That standard specifies the following protocol::
+
+   send header nibble '0x8'
+   count-low octet
+   count-high octet
+   ... data octets
+   checksum octet
+
+Each octet is sent as::
+
+	<wait for rx. '0x1?'>	<send 0x10+(octet&0x0F)>
+	<wait for rx. '0x0?'>	<send 0x00+((octet>>4)&0x0F)>
+
+To start a transfer the transmitting machine outputs a nibble 0x08.
+That raises the ACK line, triggering an interrupt in the receiving
+machine.  The receiving machine disables interrupts and raises its own ACK
+line.
+
+Restated::
+
+  (OUT is bit 0-4, OUT.j is bit j from OUT. IN likewise)
+  Send_Byte:
+     OUT := low nibble, OUT.4 := 1
+     WAIT FOR IN.4 = 1
+     OUT := high nibble, OUT.4 := 0
+     WAIT FOR IN.4 = 0
diff --git a/drivers/net/plip/Kconfig b/drivers/net/plip/Kconfig
index b41035be2d51..e03556d1d0c2 100644
--- a/drivers/net/plip/Kconfig
+++ b/drivers/net/plip/Kconfig
@@ -21,7 +21,7 @@ config PLIP
 	  bits at a time (mode 0) or with special PLIP cables, to be used on
 	  bidirectional parallel ports only, which can transmit 8 bits at a
 	  time (mode 1); you can find the wiring of these cables in
-	  <file:Documentation/networking/PLIP.txt>.  The cables can be up to
+	  <file:Documentation/networking/plip.rst>.  The cables can be up to
 	  15m long.  Mode 0 works also if one of the machines runs DOS/Windows
 	  and has some PLIP software installed, e.g. the Crynwr PLIP packet
 	  driver (<http://oak.oakland.edu/simtel.net/msdos/pktdrvr-pre.html>)
-- 
cgit v1.2.3-59-g8ed1b


From 71120802ebeda1e645baf673b958978c4000a695 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Thu, 30 Apr 2020 18:04:15 +0200
Subject: docs: networking: convert ppp_generic.txt to ReST

- add SPDX header;
- adjust title markup;
- mark code blocks and literals as such;
- mark tables as such;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst       |   1 +
 Documentation/networking/ppp_generic.rst | 440 +++++++++++++++++++++++++++++++
 Documentation/networking/ppp_generic.txt | 428 ------------------------------
 3 files changed, 441 insertions(+), 428 deletions(-)
 create mode 100644 Documentation/networking/ppp_generic.rst
 delete mode 100644 Documentation/networking/ppp_generic.txt

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 18bb10239cad..f89535871481 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -93,6 +93,7 @@ Contents:
    phonet
    pktgen
    plip
+   ppp_generic
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/ppp_generic.rst b/Documentation/networking/ppp_generic.rst
new file mode 100644
index 000000000000..e60504377900
--- /dev/null
+++ b/Documentation/networking/ppp_generic.rst
@@ -0,0 +1,440 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+========================================
+PPP Generic Driver and Channel Interface
+========================================
+
+			   Paul Mackerras
+			   paulus@samba.org
+
+			      7 Feb 2002
+
+The generic PPP driver in linux-2.4 provides an implementation of the
+functionality which is of use in any PPP implementation, including:
+
+* the network interface unit (ppp0 etc.)
+* the interface to the networking code
+* PPP multilink: splitting datagrams between multiple links, and
+  ordering and combining received fragments
+* the interface to pppd, via a /dev/ppp character device
+* packet compression and decompression
+* TCP/IP header compression and decompression
+* detecting network traffic for demand dialling and for idle timeouts
+* simple packet filtering
+
+For sending and receiving PPP frames, the generic PPP driver calls on
+the services of PPP ``channels``.  A PPP channel encapsulates a
+mechanism for transporting PPP frames from one machine to another.  A
+PPP channel implementation can be arbitrarily complex internally but
+has a very simple interface with the generic PPP code: it merely has
+to be able to send PPP frames, receive PPP frames, and optionally
+handle ioctl requests.  Currently there are PPP channel
+implementations for asynchronous serial ports, synchronous serial
+ports, and for PPP over ethernet.
+
+This architecture makes it possible to implement PPP multilink in a
+natural and straightforward way, by allowing more than one channel to
+be linked to each ppp network interface unit.  The generic layer is
+responsible for splitting datagrams on transmit and recombining them
+on receive.
+
+
+PPP channel API
+---------------
+
+See include/linux/ppp_channel.h for the declaration of the types and
+functions used to communicate between the generic PPP layer and PPP
+channels.
+
+Each channel has to provide two functions to the generic PPP layer,
+via the ppp_channel.ops pointer:
+
+* start_xmit() is called by the generic layer when it has a frame to
+  send.  The channel has the option of rejecting the frame for
+  flow-control reasons.  In this case, start_xmit() should return 0
+  and the channel should call the ppp_output_wakeup() function at a
+  later time when it can accept frames again, and the generic layer
+  will then attempt to retransmit the rejected frame(s).  If the frame
+  is accepted, the start_xmit() function should return 1.
+
+* ioctl() provides an interface which can be used by a user-space
+  program to control aspects of the channel's behaviour.  This
+  procedure will be called when a user-space program does an ioctl
+  system call on an instance of /dev/ppp which is bound to the
+  channel.  (Usually it would only be pppd which would do this.)
+
+The generic PPP layer provides seven functions to channels:
+
+* ppp_register_channel() is called when a channel has been created, to
+  notify the PPP generic layer of its presence.  For example, setting
+  a serial port to the PPPDISC line discipline causes the ppp_async
+  channel code to call this function.
+
+* ppp_unregister_channel() is called when a channel is to be
+  destroyed.  For example, the ppp_async channel code calls this when
+  a hangup is detected on the serial port.
+
+* ppp_output_wakeup() is called by a channel when it has previously
+  rejected a call to its start_xmit function, and can now accept more
+  packets.
+
+* ppp_input() is called by a channel when it has received a complete
+  PPP frame.
+
+* ppp_input_error() is called by a channel when it has detected that a
+  frame has been lost or dropped (for example, because of a FCS (frame
+  check sequence) error).
+
+* ppp_channel_index() returns the channel index assigned by the PPP
+  generic layer to this channel.  The channel should provide some way
+  (e.g. an ioctl) to transmit this back to user-space, as user-space
+  will need it to attach an instance of /dev/ppp to this channel.
+
+* ppp_unit_number() returns the unit number of the ppp network
+  interface to which this channel is connected, or -1 if the channel
+  is not connected.
+
+Connecting a channel to the ppp generic layer is initiated from the
+channel code, rather than from the generic layer.  The channel is
+expected to have some way for a user-level process to control it
+independently of the ppp generic layer.  For example, with the
+ppp_async channel, this is provided by the file descriptor to the
+serial port.
+
+Generally a user-level process will initialize the underlying
+communications medium and prepare it to do PPP.  For example, with an
+async tty, this can involve setting the tty speed and modes, issuing
+modem commands, and then going through some sort of dialog with the
+remote system to invoke PPP service there.  We refer to this process
+as ``discovery``.  Then the user-level process tells the medium to
+become a PPP channel and register itself with the generic PPP layer.
+The channel then has to report the channel number assigned to it back
+to the user-level process.  From that point, the PPP negotiation code
+in the PPP daemon (pppd) can take over and perform the PPP
+negotiation, accessing the channel through the /dev/ppp interface.
+
+At the interface to the PPP generic layer, PPP frames are stored in
+skbuff structures and start with the two-byte PPP protocol number.
+The frame does *not* include the 0xff ``address`` byte or the 0x03
+``control`` byte that are optionally used in async PPP.  Nor is there
+any escaping of control characters, nor are there any FCS or framing
+characters included.  That is all the responsibility of the channel
+code, if it is needed for the particular medium.  That is, the skbuffs
+presented to the start_xmit() function contain only the 2-byte
+protocol number and the data, and the skbuffs presented to ppp_input()
+must be in the same format.
+
+The channel must provide an instance of a ppp_channel struct to
+represent the channel.  The channel is free to use the ``private`` field
+however it wishes.  The channel should initialize the ``mtu`` and
+``hdrlen`` fields before calling ppp_register_channel() and not change
+them until after ppp_unregister_channel() returns.  The ``mtu`` field
+represents the maximum size of the data part of the PPP frames, that
+is, it does not include the 2-byte protocol number.
+
+If the channel needs some headroom in the skbuffs presented to it for
+transmission (i.e., some space free in the skbuff data area before the
+start of the PPP frame), it should set the ``hdrlen`` field of the
+ppp_channel struct to the amount of headroom required.  The generic
+PPP layer will attempt to provide that much headroom but the channel
+should still check if there is sufficient headroom and copy the skbuff
+if there isn't.
+
+On the input side, channels should ideally provide at least 2 bytes of
+headroom in the skbuffs presented to ppp_input().  The generic PPP
+code does not require this but will be more efficient if this is done.
+
+
+Buffering and flow control
+--------------------------
+
+The generic PPP layer has been designed to minimize the amount of data
+that it buffers in the transmit direction.  It maintains a queue of
+transmit packets for the PPP unit (network interface device) plus a
+queue of transmit packets for each attached channel.  Normally the
+transmit queue for the unit will contain at most one packet; the
+exceptions are when pppd sends packets by writing to /dev/ppp, and
+when the core networking code calls the generic layer's start_xmit()
+function with the queue stopped, i.e. when the generic layer has
+called netif_stop_queue(), which only happens on a transmit timeout.
+The start_xmit function always accepts and queues the packet which it
+is asked to transmit.
+
+Transmit packets are dequeued from the PPP unit transmit queue and
+then subjected to TCP/IP header compression and packet compression
+(Deflate or BSD-Compress compression), as appropriate.  After this
+point the packets can no longer be reordered, as the decompression
+algorithms rely on receiving compressed packets in the same order that
+they were generated.
+
+If multilink is not in use, this packet is then passed to the attached
+channel's start_xmit() function.  If the channel refuses to take
+the packet, the generic layer saves it for later transmission.  The
+generic layer will call the channel's start_xmit() function again
+when the channel calls  ppp_output_wakeup() or when the core
+networking code calls the generic layer's start_xmit() function
+again.  The generic layer contains no timeout and retransmission
+logic; it relies on the core networking code for that.
+
+If multilink is in use, the generic layer divides the packet into one
+or more fragments and puts a multilink header on each fragment.  It
+decides how many fragments to use based on the length of the packet
+and the number of channels which are potentially able to accept a
+fragment at the moment.  A channel is potentially able to accept a
+fragment if it doesn't have any fragments currently queued up for it
+to transmit.  The channel may still refuse a fragment; in this case
+the fragment is queued up for the channel to transmit later.  This
+scheme has the effect that more fragments are given to higher-
+bandwidth channels.  It also means that under light load, the generic
+layer will tend to fragment large packets across all the channels,
+thus reducing latency, while under heavy load, packets will tend to be
+transmitted as single fragments, thus reducing the overhead of
+fragmentation.
+
+
+SMP safety
+----------
+
+The PPP generic layer has been designed to be SMP-safe.  Locks are
+used around accesses to the internal data structures where necessary
+to ensure their integrity.  As part of this, the generic layer
+requires that the channels adhere to certain requirements and in turn
+provides certain guarantees to the channels.  Essentially the channels
+are required to provide the appropriate locking on the ppp_channel
+structures that form the basis of the communication between the
+channel and the generic layer.  This is because the channel provides
+the storage for the ppp_channel structure, and so the channel is
+required to provide the guarantee that this storage exists and is
+valid at the appropriate times.
+
+The generic layer requires these guarantees from the channel:
+
+* The ppp_channel object must exist from the time that
+  ppp_register_channel() is called until after the call to
+  ppp_unregister_channel() returns.
+
+* No thread may be in a call to any of ppp_input(), ppp_input_error(),
+  ppp_output_wakeup(), ppp_channel_index() or ppp_unit_number() for a
+  channel at the time that ppp_unregister_channel() is called for that
+  channel.
+
+* ppp_register_channel() and ppp_unregister_channel() must be called
+  from process context, not interrupt or softirq/BH context.
+
+* The remaining generic layer functions may be called at softirq/BH
+  level but must not be called from a hardware interrupt handler.
+
+* The generic layer may call the channel start_xmit() function at
+  softirq/BH level but will not call it at interrupt level.  Thus the
+  start_xmit() function may not block.
+
+* The generic layer will only call the channel ioctl() function in
+  process context.
+
+The generic layer provides these guarantees to the channels:
+
+* The generic layer will not call the start_xmit() function for a
+  channel while any thread is already executing in that function for
+  that channel.
+
+* The generic layer will not call the ioctl() function for a channel
+  while any thread is already executing in that function for that
+  channel.
+
+* By the time a call to ppp_unregister_channel() returns, no thread
+  will be executing in a call from the generic layer to that channel's
+  start_xmit() or ioctl() function, and the generic layer will not
+  call either of those functions subsequently.
+
+
+Interface to pppd
+-----------------
+
+The PPP generic layer exports a character device interface called
+/dev/ppp.  This is used by pppd to control PPP interface units and
+channels.  Although there is only one /dev/ppp, each open instance of
+/dev/ppp acts independently and can be attached either to a PPP unit
+or a PPP channel.  This is achieved using the file->private_data field
+to point to a separate object for each open instance of /dev/ppp.  In
+this way an effect similar to Solaris' clone open is obtained,
+allowing us to control an arbitrary number of PPP interfaces and
+channels without having to fill up /dev with hundreds of device names.
+
+When /dev/ppp is opened, a new instance is created which is initially
+unattached.  Using an ioctl call, it can then be attached to an
+existing unit, attached to a newly-created unit, or attached to an
+existing channel.  An instance attached to a unit can be used to send
+and receive PPP control frames, using the read() and write() system
+calls, along with poll() if necessary.  Similarly, an instance
+attached to a channel can be used to send and receive PPP frames on
+that channel.
+
+In multilink terms, the unit represents the bundle, while the channels
+represent the individual physical links.  Thus, a PPP frame sent by a
+write to the unit (i.e., to an instance of /dev/ppp attached to the
+unit) will be subject to bundle-level compression and to fragmentation
+across the individual links (if multilink is in use).  In contrast, a
+PPP frame sent by a write to the channel will be sent as-is on that
+channel, without any multilink header.
+
+A channel is not initially attached to any unit.  In this state it can
+be used for PPP negotiation but not for the transfer of data packets.
+It can then be connected to a PPP unit with an ioctl call, which
+makes it available to send and receive data packets for that unit.
+
+The ioctl calls which are available on an instance of /dev/ppp depend
+on whether it is unattached, attached to a PPP interface, or attached
+to a PPP channel.  The ioctl calls which are available on an
+unattached instance are:
+
+* PPPIOCNEWUNIT creates a new PPP interface and makes this /dev/ppp
+  instance the "owner" of the interface.  The argument should point to
+  an int which is the desired unit number if >= 0, or -1 to assign the
+  lowest unused unit number.  Being the owner of the interface means
+  that the interface will be shut down if this instance of /dev/ppp is
+  closed.
+
+* PPPIOCATTACH attaches this instance to an existing PPP interface.
+  The argument should point to an int containing the unit number.
+  This does not make this instance the owner of the PPP interface.
+
+* PPPIOCATTCHAN attaches this instance to an existing PPP channel.
+  The argument should point to an int containing the channel number.
+
+The ioctl calls available on an instance of /dev/ppp attached to a
+channel are:
+
+* PPPIOCCONNECT connects this channel to a PPP interface.  The
+  argument should point to an int containing the interface unit
+  number.  It will return an EINVAL error if the channel is already
+  connected to an interface, or ENXIO if the requested interface does
+  not exist.
+
+* PPPIOCDISCONN disconnects this channel from the PPP interface that
+  it is connected to.  It will return an EINVAL error if the channel
+  is not connected to an interface.
+
+* All other ioctl commands are passed to the channel ioctl() function.
+
+The ioctl calls that are available on an instance that is attached to
+an interface unit are:
+
+* PPPIOCSMRU sets the MRU (maximum receive unit) for the interface.
+  The argument should point to an int containing the new MRU value.
+
+* PPPIOCSFLAGS sets flags which control the operation of the
+  interface.  The argument should be a pointer to an int containing
+  the new flags value.  The bits in the flags value that can be set
+  are:
+
+	================	========================================
+	SC_COMP_TCP		enable transmit TCP header compression
+	SC_NO_TCP_CCID		disable connection-id compression for
+				TCP header compression
+	SC_REJ_COMP_TCP		disable receive TCP header decompression
+	SC_CCP_OPEN		Compression Control Protocol (CCP) is
+				open, so inspect CCP packets
+	SC_CCP_UP		CCP is up, may (de)compress packets
+	SC_LOOP_TRAFFIC		send IP traffic to pppd
+	SC_MULTILINK		enable PPP multilink fragmentation on
+				transmitted packets
+	SC_MP_SHORTSEQ		expect short multilink sequence
+				numbers on received multilink fragments
+	SC_MP_XSHORTSEQ		transmit short multilink sequence nos.
+	================	========================================
+
+  The values of these flags are defined in <linux/ppp-ioctl.h>.  Note
+  that the values of the SC_MULTILINK, SC_MP_SHORTSEQ and
+  SC_MP_XSHORTSEQ bits are ignored if the CONFIG_PPP_MULTILINK option
+  is not selected.
+
+* PPPIOCGFLAGS returns the value of the status/control flags for the
+  interface unit.  The argument should point to an int where the ioctl
+  will store the flags value.  As well as the values listed above for
+  PPPIOCSFLAGS, the following bits may be set in the returned value:
+
+	================	=========================================
+	SC_COMP_RUN		CCP compressor is running
+	SC_DECOMP_RUN		CCP decompressor is running
+	SC_DC_ERROR		CCP decompressor detected non-fatal error
+	SC_DC_FERROR		CCP decompressor detected fatal error
+	================	=========================================
+
+* PPPIOCSCOMPRESS sets the parameters for packet compression or
+  decompression.  The argument should point to a ppp_option_data
+  structure (defined in <linux/ppp-ioctl.h>), which contains a
+  pointer/length pair which should describe a block of memory
+  containing a CCP option specifying a compression method and its
+  parameters.  The ppp_option_data struct also contains a ``transmit``
+  field.  If this is 0, the ioctl will affect the receive path,
+  otherwise the transmit path.
+
+* PPPIOCGUNIT returns, in the int pointed to by the argument, the unit
+  number of this interface unit.
+
+* PPPIOCSDEBUG sets the debug flags for the interface to the value in
+  the int pointed to by the argument.  Only the least significant bit
+  is used; if this is 1 the generic layer will print some debug
+  messages during its operation.  This is only intended for debugging
+  the generic PPP layer code; it is generally not helpful for working
+  out why a PPP connection is failing.
+
+* PPPIOCGDEBUG returns the debug flags for the interface in the int
+  pointed to by the argument.
+
+* PPPIOCGIDLE returns the time, in seconds, since the last data
+  packets were sent and received.  The argument should point to a
+  ppp_idle structure (defined in <linux/ppp_defs.h>).  If the
+  CONFIG_PPP_FILTER option is enabled, the set of packets which reset
+  the transmit and receive idle timers is restricted to those which
+  pass the ``active`` packet filter.
+  Two versions of this command exist, to deal with user space
+  expecting times as either 32-bit or 64-bit time_t seconds.
+
+* PPPIOCSMAXCID sets the maximum connection-ID parameter (and thus the
+  number of connection slots) for the TCP header compressor and
+  decompressor.  The lower 16 bits of the int pointed to by the
+  argument specify the maximum connection-ID for the compressor.  If
+  the upper 16 bits of that int are non-zero, they specify the maximum
+  connection-ID for the decompressor, otherwise the decompressor's
+  maximum connection-ID is set to 15.
+
+* PPPIOCSNPMODE sets the network-protocol mode for a given network
+  protocol.  The argument should point to an npioctl struct (defined
+  in <linux/ppp-ioctl.h>).  The ``protocol`` field gives the PPP protocol
+  number for the protocol to be affected, and the ``mode`` field
+  specifies what to do with packets for that protocol:
+
+	=============	==============================================
+	NPMODE_PASS	normal operation, transmit and receive packets
+	NPMODE_DROP	silently drop packets for this protocol
+	NPMODE_ERROR	drop packets and return an error on transmit
+	NPMODE_QUEUE	queue up packets for transmit, drop received
+			packets
+	=============	==============================================
+
+  At present NPMODE_ERROR and NPMODE_QUEUE have the same effect as
+  NPMODE_DROP.
+
+* PPPIOCGNPMODE returns the network-protocol mode for a given
+  protocol.  The argument should point to an npioctl struct with the
+  ``protocol`` field set to the PPP protocol number for the protocol of
+  interest.  On return the ``mode`` field will be set to the network-
+  protocol mode for that protocol.
+
+* PPPIOCSPASS and PPPIOCSACTIVE set the ``pass`` and ``active`` packet
+  filters.  These ioctls are only available if the CONFIG_PPP_FILTER
+  option is selected.  The argument should point to a sock_fprog
+  structure (defined in <linux/filter.h>) containing the compiled BPF
+  instructions for the filter.  Packets are dropped if they fail the
+  ``pass`` filter; otherwise, if they fail the ``active`` filter they are
+  passed but they do not reset the transmit or receive idle timer.
+
+* PPPIOCSMRRU enables or disables multilink processing for received
+  packets and sets the multilink MRRU (maximum reconstructed receive
+  unit).  The argument should point to an int containing the new MRRU
+  value.  If the MRRU value is 0, processing of received multilink
+  fragments is disabled.  This ioctl is only available if the
+  CONFIG_PPP_MULTILINK option is selected.
+
+Last modified: 7-feb-2002
diff --git a/Documentation/networking/ppp_generic.txt b/Documentation/networking/ppp_generic.txt
deleted file mode 100644
index fd563aff5fc9..000000000000
--- a/Documentation/networking/ppp_generic.txt
+++ /dev/null
@@ -1,428 +0,0 @@
-		PPP Generic Driver and Channel Interface
-		----------------------------------------
-
-			    Paul Mackerras
-			   paulus@samba.org
-			      7 Feb 2002
-
-The generic PPP driver in linux-2.4 provides an implementation of the
-functionality which is of use in any PPP implementation, including:
-
-* the network interface unit (ppp0 etc.)
-* the interface to the networking code
-* PPP multilink: splitting datagrams between multiple links, and
-  ordering and combining received fragments
-* the interface to pppd, via a /dev/ppp character device
-* packet compression and decompression
-* TCP/IP header compression and decompression
-* detecting network traffic for demand dialling and for idle timeouts
-* simple packet filtering
-
-For sending and receiving PPP frames, the generic PPP driver calls on
-the services of PPP `channels'.  A PPP channel encapsulates a
-mechanism for transporting PPP frames from one machine to another.  A
-PPP channel implementation can be arbitrarily complex internally but
-has a very simple interface with the generic PPP code: it merely has
-to be able to send PPP frames, receive PPP frames, and optionally
-handle ioctl requests.  Currently there are PPP channel
-implementations for asynchronous serial ports, synchronous serial
-ports, and for PPP over ethernet.
-
-This architecture makes it possible to implement PPP multilink in a
-natural and straightforward way, by allowing more than one channel to
-be linked to each ppp network interface unit.  The generic layer is
-responsible for splitting datagrams on transmit and recombining them
-on receive.
-
-
-PPP channel API
----------------
-
-See include/linux/ppp_channel.h for the declaration of the types and
-functions used to communicate between the generic PPP layer and PPP
-channels.
-
-Each channel has to provide two functions to the generic PPP layer,
-via the ppp_channel.ops pointer:
-
-* start_xmit() is called by the generic layer when it has a frame to
-  send.  The channel has the option of rejecting the frame for
-  flow-control reasons.  In this case, start_xmit() should return 0
-  and the channel should call the ppp_output_wakeup() function at a
-  later time when it can accept frames again, and the generic layer
-  will then attempt to retransmit the rejected frame(s).  If the frame
-  is accepted, the start_xmit() function should return 1.
-
-* ioctl() provides an interface which can be used by a user-space
-  program to control aspects of the channel's behaviour.  This
-  procedure will be called when a user-space program does an ioctl
-  system call on an instance of /dev/ppp which is bound to the
-  channel.  (Usually it would only be pppd which would do this.)
-
-The generic PPP layer provides seven functions to channels:
-
-* ppp_register_channel() is called when a channel has been created, to
-  notify the PPP generic layer of its presence.  For example, setting
-  a serial port to the PPPDISC line discipline causes the ppp_async
-  channel code to call this function.
-
-* ppp_unregister_channel() is called when a channel is to be
-  destroyed.  For example, the ppp_async channel code calls this when
-  a hangup is detected on the serial port.
-
-* ppp_output_wakeup() is called by a channel when it has previously
-  rejected a call to its start_xmit function, and can now accept more
-  packets.
-
-* ppp_input() is called by a channel when it has received a complete
-  PPP frame.
-
-* ppp_input_error() is called by a channel when it has detected that a
-  frame has been lost or dropped (for example, because of a FCS (frame
-  check sequence) error).
-
-* ppp_channel_index() returns the channel index assigned by the PPP
-  generic layer to this channel.  The channel should provide some way
-  (e.g. an ioctl) to transmit this back to user-space, as user-space
-  will need it to attach an instance of /dev/ppp to this channel.
-
-* ppp_unit_number() returns the unit number of the ppp network
-  interface to which this channel is connected, or -1 if the channel
-  is not connected.
-
-Connecting a channel to the ppp generic layer is initiated from the
-channel code, rather than from the generic layer.  The channel is
-expected to have some way for a user-level process to control it
-independently of the ppp generic layer.  For example, with the
-ppp_async channel, this is provided by the file descriptor to the
-serial port.
-
-Generally a user-level process will initialize the underlying
-communications medium and prepare it to do PPP.  For example, with an
-async tty, this can involve setting the tty speed and modes, issuing
-modem commands, and then going through some sort of dialog with the
-remote system to invoke PPP service there.  We refer to this process
-as `discovery'.  Then the user-level process tells the medium to
-become a PPP channel and register itself with the generic PPP layer.
-The channel then has to report the channel number assigned to it back
-to the user-level process.  From that point, the PPP negotiation code
-in the PPP daemon (pppd) can take over and perform the PPP
-negotiation, accessing the channel through the /dev/ppp interface.
-
-At the interface to the PPP generic layer, PPP frames are stored in
-skbuff structures and start with the two-byte PPP protocol number.
-The frame does *not* include the 0xff `address' byte or the 0x03
-`control' byte that are optionally used in async PPP.  Nor is there
-any escaping of control characters, nor are there any FCS or framing
-characters included.  That is all the responsibility of the channel
-code, if it is needed for the particular medium.  That is, the skbuffs
-presented to the start_xmit() function contain only the 2-byte
-protocol number and the data, and the skbuffs presented to ppp_input()
-must be in the same format.
-
-The channel must provide an instance of a ppp_channel struct to
-represent the channel.  The channel is free to use the `private' field
-however it wishes.  The channel should initialize the `mtu' and
-`hdrlen' fields before calling ppp_register_channel() and not change
-them until after ppp_unregister_channel() returns.  The `mtu' field
-represents the maximum size of the data part of the PPP frames, that
-is, it does not include the 2-byte protocol number.
-
-If the channel needs some headroom in the skbuffs presented to it for
-transmission (i.e., some space free in the skbuff data area before the
-start of the PPP frame), it should set the `hdrlen' field of the
-ppp_channel struct to the amount of headroom required.  The generic
-PPP layer will attempt to provide that much headroom but the channel
-should still check if there is sufficient headroom and copy the skbuff
-if there isn't.
-
-On the input side, channels should ideally provide at least 2 bytes of
-headroom in the skbuffs presented to ppp_input().  The generic PPP
-code does not require this but will be more efficient if this is done.
-
-
-Buffering and flow control
---------------------------
-
-The generic PPP layer has been designed to minimize the amount of data
-that it buffers in the transmit direction.  It maintains a queue of
-transmit packets for the PPP unit (network interface device) plus a
-queue of transmit packets for each attached channel.  Normally the
-transmit queue for the unit will contain at most one packet; the
-exceptions are when pppd sends packets by writing to /dev/ppp, and
-when the core networking code calls the generic layer's start_xmit()
-function with the queue stopped, i.e. when the generic layer has
-called netif_stop_queue(), which only happens on a transmit timeout.
-The start_xmit function always accepts and queues the packet which it
-is asked to transmit.
-
-Transmit packets are dequeued from the PPP unit transmit queue and
-then subjected to TCP/IP header compression and packet compression
-(Deflate or BSD-Compress compression), as appropriate.  After this
-point the packets can no longer be reordered, as the decompression
-algorithms rely on receiving compressed packets in the same order that
-they were generated.
-
-If multilink is not in use, this packet is then passed to the attached
-channel's start_xmit() function.  If the channel refuses to take
-the packet, the generic layer saves it for later transmission.  The
-generic layer will call the channel's start_xmit() function again
-when the channel calls  ppp_output_wakeup() or when the core
-networking code calls the generic layer's start_xmit() function
-again.  The generic layer contains no timeout and retransmission
-logic; it relies on the core networking code for that.
-
-If multilink is in use, the generic layer divides the packet into one
-or more fragments and puts a multilink header on each fragment.  It
-decides how many fragments to use based on the length of the packet
-and the number of channels which are potentially able to accept a
-fragment at the moment.  A channel is potentially able to accept a
-fragment if it doesn't have any fragments currently queued up for it
-to transmit.  The channel may still refuse a fragment; in this case
-the fragment is queued up for the channel to transmit later.  This
-scheme has the effect that more fragments are given to higher-
-bandwidth channels.  It also means that under light load, the generic
-layer will tend to fragment large packets across all the channels,
-thus reducing latency, while under heavy load, packets will tend to be
-transmitted as single fragments, thus reducing the overhead of
-fragmentation.
-
-
-SMP safety
-----------
-
-The PPP generic layer has been designed to be SMP-safe.  Locks are
-used around accesses to the internal data structures where necessary
-to ensure their integrity.  As part of this, the generic layer
-requires that the channels adhere to certain requirements and in turn
-provides certain guarantees to the channels.  Essentially the channels
-are required to provide the appropriate locking on the ppp_channel
-structures that form the basis of the communication between the
-channel and the generic layer.  This is because the channel provides
-the storage for the ppp_channel structure, and so the channel is
-required to provide the guarantee that this storage exists and is
-valid at the appropriate times.
-
-The generic layer requires these guarantees from the channel:
-
-* The ppp_channel object must exist from the time that
-  ppp_register_channel() is called until after the call to
-  ppp_unregister_channel() returns.
-
-* No thread may be in a call to any of ppp_input(), ppp_input_error(),
-  ppp_output_wakeup(), ppp_channel_index() or ppp_unit_number() for a
-  channel at the time that ppp_unregister_channel() is called for that
-  channel.
-
-* ppp_register_channel() and ppp_unregister_channel() must be called
-  from process context, not interrupt or softirq/BH context.
-
-* The remaining generic layer functions may be called at softirq/BH
-  level but must not be called from a hardware interrupt handler.
-
-* The generic layer may call the channel start_xmit() function at
-  softirq/BH level but will not call it at interrupt level.  Thus the
-  start_xmit() function may not block.
-
-* The generic layer will only call the channel ioctl() function in
-  process context.
-
-The generic layer provides these guarantees to the channels:
-
-* The generic layer will not call the start_xmit() function for a
-  channel while any thread is already executing in that function for
-  that channel.
-
-* The generic layer will not call the ioctl() function for a channel
-  while any thread is already executing in that function for that
-  channel.
-
-* By the time a call to ppp_unregister_channel() returns, no thread
-  will be executing in a call from the generic layer to that channel's
-  start_xmit() or ioctl() function, and the generic layer will not
-  call either of those functions subsequently.
-
-
-Interface to pppd
------------------
-
-The PPP generic layer exports a character device interface called
-/dev/ppp.  This is used by pppd to control PPP interface units and
-channels.  Although there is only one /dev/ppp, each open instance of
-/dev/ppp acts independently and can be attached either to a PPP unit
-or a PPP channel.  This is achieved using the file->private_data field
-to point to a separate object for each open instance of /dev/ppp.  In
-this way an effect similar to Solaris' clone open is obtained,
-allowing us to control an arbitrary number of PPP interfaces and
-channels without having to fill up /dev with hundreds of device names.
-
-When /dev/ppp is opened, a new instance is created which is initially
-unattached.  Using an ioctl call, it can then be attached to an
-existing unit, attached to a newly-created unit, or attached to an
-existing channel.  An instance attached to a unit can be used to send
-and receive PPP control frames, using the read() and write() system
-calls, along with poll() if necessary.  Similarly, an instance
-attached to a channel can be used to send and receive PPP frames on
-that channel.
-
-In multilink terms, the unit represents the bundle, while the channels
-represent the individual physical links.  Thus, a PPP frame sent by a
-write to the unit (i.e., to an instance of /dev/ppp attached to the
-unit) will be subject to bundle-level compression and to fragmentation
-across the individual links (if multilink is in use).  In contrast, a
-PPP frame sent by a write to the channel will be sent as-is on that
-channel, without any multilink header.
-
-A channel is not initially attached to any unit.  In this state it can
-be used for PPP negotiation but not for the transfer of data packets.
-It can then be connected to a PPP unit with an ioctl call, which
-makes it available to send and receive data packets for that unit.
-
-The ioctl calls which are available on an instance of /dev/ppp depend
-on whether it is unattached, attached to a PPP interface, or attached
-to a PPP channel.  The ioctl calls which are available on an
-unattached instance are:
-
-* PPPIOCNEWUNIT creates a new PPP interface and makes this /dev/ppp
-  instance the "owner" of the interface.  The argument should point to
-  an int which is the desired unit number if >= 0, or -1 to assign the
-  lowest unused unit number.  Being the owner of the interface means
-  that the interface will be shut down if this instance of /dev/ppp is
-  closed.
-
-* PPPIOCATTACH attaches this instance to an existing PPP interface.
-  The argument should point to an int containing the unit number.
-  This does not make this instance the owner of the PPP interface.
-
-* PPPIOCATTCHAN attaches this instance to an existing PPP channel.
-  The argument should point to an int containing the channel number.
-
-The ioctl calls available on an instance of /dev/ppp attached to a
-channel are:
-
-* PPPIOCCONNECT connects this channel to a PPP interface.  The
-  argument should point to an int containing the interface unit
-  number.  It will return an EINVAL error if the channel is already
-  connected to an interface, or ENXIO if the requested interface does
-  not exist.
-
-* PPPIOCDISCONN disconnects this channel from the PPP interface that
-  it is connected to.  It will return an EINVAL error if the channel
-  is not connected to an interface.
-
-* All other ioctl commands are passed to the channel ioctl() function.
-
-The ioctl calls that are available on an instance that is attached to
-an interface unit are:
-
-* PPPIOCSMRU sets the MRU (maximum receive unit) for the interface.
-  The argument should point to an int containing the new MRU value.
-
-* PPPIOCSFLAGS sets flags which control the operation of the
-  interface.  The argument should be a pointer to an int containing
-  the new flags value.  The bits in the flags value that can be set
-  are:
-	SC_COMP_TCP		enable transmit TCP header compression
-	SC_NO_TCP_CCID		disable connection-id compression for
-				TCP header compression
-	SC_REJ_COMP_TCP		disable receive TCP header decompression
-	SC_CCP_OPEN		Compression Control Protocol (CCP) is
-				open, so inspect CCP packets
-	SC_CCP_UP		CCP is up, may (de)compress packets
-	SC_LOOP_TRAFFIC		send IP traffic to pppd
-	SC_MULTILINK		enable PPP multilink fragmentation on
-				transmitted packets
-	SC_MP_SHORTSEQ		expect short multilink sequence
-				numbers on received multilink fragments
-	SC_MP_XSHORTSEQ		transmit short multilink sequence nos.
-
-  The values of these flags are defined in <linux/ppp-ioctl.h>.  Note
-  that the values of the SC_MULTILINK, SC_MP_SHORTSEQ and
-  SC_MP_XSHORTSEQ bits are ignored if the CONFIG_PPP_MULTILINK option
-  is not selected.
-
-* PPPIOCGFLAGS returns the value of the status/control flags for the
-  interface unit.  The argument should point to an int where the ioctl
-  will store the flags value.  As well as the values listed above for
-  PPPIOCSFLAGS, the following bits may be set in the returned value:
-	SC_COMP_RUN		CCP compressor is running
-	SC_DECOMP_RUN		CCP decompressor is running
-	SC_DC_ERROR		CCP decompressor detected non-fatal error
-	SC_DC_FERROR		CCP decompressor detected fatal error
-
-* PPPIOCSCOMPRESS sets the parameters for packet compression or
-  decompression.  The argument should point to a ppp_option_data
-  structure (defined in <linux/ppp-ioctl.h>), which contains a
-  pointer/length pair which should describe a block of memory
-  containing a CCP option specifying a compression method and its
-  parameters.  The ppp_option_data struct also contains a `transmit'
-  field.  If this is 0, the ioctl will affect the receive path,
-  otherwise the transmit path.
-
-* PPPIOCGUNIT returns, in the int pointed to by the argument, the unit
-  number of this interface unit.
-
-* PPPIOCSDEBUG sets the debug flags for the interface to the value in
-  the int pointed to by the argument.  Only the least significant bit
-  is used; if this is 1 the generic layer will print some debug
-  messages during its operation.  This is only intended for debugging
-  the generic PPP layer code; it is generally not helpful for working
-  out why a PPP connection is failing.
-
-* PPPIOCGDEBUG returns the debug flags for the interface in the int
-  pointed to by the argument.
-
-* PPPIOCGIDLE returns the time, in seconds, since the last data
-  packets were sent and received.  The argument should point to a
-  ppp_idle structure (defined in <linux/ppp_defs.h>).  If the
-  CONFIG_PPP_FILTER option is enabled, the set of packets which reset
-  the transmit and receive idle timers is restricted to those which
-  pass the `active' packet filter.
-  Two versions of this command exist, to deal with user space
-  expecting times as either 32-bit or 64-bit time_t seconds.
-
-* PPPIOCSMAXCID sets the maximum connection-ID parameter (and thus the
-  number of connection slots) for the TCP header compressor and
-  decompressor.  The lower 16 bits of the int pointed to by the
-  argument specify the maximum connection-ID for the compressor.  If
-  the upper 16 bits of that int are non-zero, they specify the maximum
-  connection-ID for the decompressor, otherwise the decompressor's
-  maximum connection-ID is set to 15.
-
-* PPPIOCSNPMODE sets the network-protocol mode for a given network
-  protocol.  The argument should point to an npioctl struct (defined
-  in <linux/ppp-ioctl.h>).  The `protocol' field gives the PPP protocol
-  number for the protocol to be affected, and the `mode' field
-  specifies what to do with packets for that protocol:
-
-	NPMODE_PASS	normal operation, transmit and receive packets
-	NPMODE_DROP	silently drop packets for this protocol
-	NPMODE_ERROR	drop packets and return an error on transmit
-	NPMODE_QUEUE	queue up packets for transmit, drop received
-			packets
-
-  At present NPMODE_ERROR and NPMODE_QUEUE have the same effect as
-  NPMODE_DROP.
-
-* PPPIOCGNPMODE returns the network-protocol mode for a given
-  protocol.  The argument should point to an npioctl struct with the
-  `protocol' field set to the PPP protocol number for the protocol of
-  interest.  On return the `mode' field will be set to the network-
-  protocol mode for that protocol.
-
-* PPPIOCSPASS and PPPIOCSACTIVE set the `pass' and `active' packet
-  filters.  These ioctls are only available if the CONFIG_PPP_FILTER
-  option is selected.  The argument should point to a sock_fprog
-  structure (defined in <linux/filter.h>) containing the compiled BPF
-  instructions for the filter.  Packets are dropped if they fail the
-  `pass' filter; otherwise, if they fail the `active' filter they are
-  passed but they do not reset the transmit or receive idle timer.
-
-* PPPIOCSMRRU enables or disables multilink processing for received
-  packets and sets the multilink MRRU (maximum reconstructed receive
-  unit).  The argument should point to an int containing the new MRRU
-  value.  If the MRRU value is 0, processing of received multilink
-  fragments is disabled.  This ioctl is only available if the
-  CONFIG_PPP_MULTILINK option is selected.
-
-Last modified: 7-feb-2002
-- 
cgit v1.2.3-59-g8ed1b


From 832619012c972dc1ca0723ad66ffff6e6e4cf5e0 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Thu, 30 Apr 2020 18:04:16 +0200
Subject: docs: networking: convert proc_net_tcp.txt to ReST

- add SPDX header;
- add a document title;
- mark code blocks and literals as such;
- mark tables as such;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst        |  1 +
 Documentation/networking/proc_net_tcp.rst | 57 +++++++++++++++++++++++++++++++
 Documentation/networking/proc_net_tcp.txt | 48 --------------------------
 3 files changed, 58 insertions(+), 48 deletions(-)
 create mode 100644 Documentation/networking/proc_net_tcp.rst
 delete mode 100644 Documentation/networking/proc_net_tcp.txt

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index f89535871481..0da7eb0ec85a 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -94,6 +94,7 @@ Contents:
    pktgen
    plip
    ppp_generic
+   proc_net_tcp
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/proc_net_tcp.rst b/Documentation/networking/proc_net_tcp.rst
new file mode 100644
index 000000000000..7d9dfe36af45
--- /dev/null
+++ b/Documentation/networking/proc_net_tcp.rst
@@ -0,0 +1,57 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============================================
+The proc/net/tcp and proc/net/tcp6 variables
+============================================
+
+This document describes the interfaces /proc/net/tcp and /proc/net/tcp6.
+Note that these interfaces are deprecated in favor of tcp_diag.
+
+These /proc interfaces provide information about currently active TCP
+connections, and are implemented by tcp4_seq_show() in net/ipv4/tcp_ipv4.c
+and tcp6_seq_show() in net/ipv6/tcp_ipv6.c, respectively.
+
+It will first list all listening TCP sockets, and next list all established
+TCP connections. A typical entry of /proc/net/tcp would look like this (split
+up into 3 parts because of the length of the line)::
+
+   46: 010310AC:9C4C 030310AC:1770 01
+   |      |      |      |      |   |--> connection state
+   |      |      |      |      |------> remote TCP port number
+   |      |      |      |-------------> remote IPv4 address
+   |      |      |--------------------> local TCP port number
+   |      |---------------------------> local IPv4 address
+   |----------------------------------> number of entry
+
+   00000150:00000000 01:00000019 00000000
+      |        |     |     |       |--> number of unrecovered RTO timeouts
+      |        |     |     |----------> number of jiffies until timer expires
+      |        |     |----------------> timer_active (see below)
+      |        |----------------------> receive-queue
+      |-------------------------------> transmit-queue
+
+   1000        0 54165785 4 cd1e6040 25 4 27 3 -1
+    |          |    |     |    |     |  | |  | |--> slow start size threshold,
+    |          |    |     |    |     |  | |  |      or -1 if the threshold
+    |          |    |     |    |     |  | |  |      is >= 0xFFFF
+    |          |    |     |    |     |  | |  |----> sending congestion window
+    |          |    |     |    |     |  | |-------> (ack.quick<<1)|ack.pingpong
+    |          |    |     |    |     |  |---------> Predicted tick of soft clock
+    |          |    |     |    |     |              (delayed ACK control data)
+    |          |    |     |    |     |------------> retransmit timeout
+    |          |    |     |    |------------------> location of socket in memory
+    |          |    |     |-----------------------> socket reference count
+    |          |    |-----------------------------> inode
+    |          |----------------------------------> unanswered 0-window probes
+    |---------------------------------------------> uid
+
+timer_active:
+
+ ==  ================================================================
+  0  no timer is pending
+  1  retransmit-timer is pending
+  2  another timer (e.g. delayed ack or keepalive) is pending
+  3  this is a socket in TIME_WAIT state. Not all fields will contain
+     data (or even exist)
+  4  zero window probe timer is pending
+ ==  ================================================================
diff --git a/Documentation/networking/proc_net_tcp.txt b/Documentation/networking/proc_net_tcp.txt
deleted file mode 100644
index 4a79209e77a7..000000000000
--- a/Documentation/networking/proc_net_tcp.txt
+++ /dev/null
@@ -1,48 +0,0 @@
-This document describes the interfaces /proc/net/tcp and /proc/net/tcp6.
-Note that these interfaces are deprecated in favor of tcp_diag.
-
-These /proc interfaces provide information about currently active TCP 
-connections, and are implemented by tcp4_seq_show() in net/ipv4/tcp_ipv4.c
-and tcp6_seq_show() in net/ipv6/tcp_ipv6.c, respectively.
-
-It will first list all listening TCP sockets, and next list all established
-TCP connections. A typical entry of /proc/net/tcp would look like this (split 
-up into 3 parts because of the length of the line):
-
-   46: 010310AC:9C4C 030310AC:1770 01 
-   |      |      |      |      |   |--> connection state
-   |      |      |      |      |------> remote TCP port number
-   |      |      |      |-------------> remote IPv4 address
-   |      |      |--------------------> local TCP port number
-   |      |---------------------------> local IPv4 address
-   |----------------------------------> number of entry
-
-   00000150:00000000 01:00000019 00000000  
-      |        |     |     |       |--> number of unrecovered RTO timeouts
-      |        |     |     |----------> number of jiffies until timer expires
-      |        |     |----------------> timer_active (see below)
-      |        |----------------------> receive-queue
-      |-------------------------------> transmit-queue
-
-   1000        0 54165785 4 cd1e6040 25 4 27 3 -1
-    |          |    |     |    |     |  | |  | |--> slow start size threshold, 
-    |          |    |     |    |     |  | |  |      or -1 if the threshold
-    |          |    |     |    |     |  | |  |      is >= 0xFFFF
-    |          |    |     |    |     |  | |  |----> sending congestion window
-    |          |    |     |    |     |  | |-------> (ack.quick<<1)|ack.pingpong
-    |          |    |     |    |     |  |---------> Predicted tick of soft clock
-    |          |    |     |    |     |              (delayed ACK control data)
-    |          |    |     |    |     |------------> retransmit timeout
-    |          |    |     |    |------------------> location of socket in memory
-    |          |    |     |-----------------------> socket reference count
-    |          |    |-----------------------------> inode
-    |          |----------------------------------> unanswered 0-window probes
-    |---------------------------------------------> uid
-
-timer_active:
-  0  no timer is pending
-  1  retransmit-timer is pending
-  2  another timer (e.g. delayed ack or keepalive) is pending
-  3  this is a socket in TIME_WAIT state. Not all fields will contain 
-     data (or even exist)
-  4  zero window probe timer is pending
-- 
cgit v1.2.3-59-g8ed1b


From 66d495d0a5aecd1692f6b5e3190de14f9a31e14b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Thu, 30 Apr 2020 18:04:17 +0200
Subject: docs: networking: convert radiotap-headers.txt to ReST

- add SPDX header;
- adjust title markup;
- mark code blocks and literals as such;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst              |   1 +
 Documentation/networking/mac80211-injection.rst |   2 +-
 Documentation/networking/radiotap-headers.rst   | 159 ++++++++++++++++++++++++
 Documentation/networking/radiotap-headers.txt   | 152 ----------------------
 include/net/cfg80211.h                          |   2 +-
 net/wireless/radiotap.c                         |   2 +-
 6 files changed, 163 insertions(+), 155 deletions(-)
 create mode 100644 Documentation/networking/radiotap-headers.rst
 delete mode 100644 Documentation/networking/radiotap-headers.txt

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 0da7eb0ec85a..85bc52d0b3a6 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -95,6 +95,7 @@ Contents:
    plip
    ppp_generic
    proc_net_tcp
+   radiotap-headers
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/mac80211-injection.rst b/Documentation/networking/mac80211-injection.rst
index 75d4edcae852..be65f886ff1f 100644
--- a/Documentation/networking/mac80211-injection.rst
+++ b/Documentation/networking/mac80211-injection.rst
@@ -13,7 +13,7 @@ following format::
  [ payload ]
 
 The radiotap format is discussed in
-./Documentation/networking/radiotap-headers.txt.
+./Documentation/networking/radiotap-headers.rst.
 
 Despite many radiotap parameters being currently defined, most only make sense
 to appear on received packets.  The following information is parsed from the
diff --git a/Documentation/networking/radiotap-headers.rst b/Documentation/networking/radiotap-headers.rst
new file mode 100644
index 000000000000..1a1bd1ec0650
--- /dev/null
+++ b/Documentation/networking/radiotap-headers.rst
@@ -0,0 +1,159 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================
+How to use radiotap headers
+===========================
+
+Pointer to the radiotap include file
+------------------------------------
+
+Radiotap headers are variable-length and extensible, you can get most of the
+information you need to know on them from::
+
+    ./include/net/ieee80211_radiotap.h
+
+This document gives an overview and warns on some corner cases.
+
+
+Structure of the header
+-----------------------
+
+There is a fixed portion at the start which contains a u32 bitmap that defines
+if the possible argument associated with that bit is present or not.  So if b0
+of the it_present member of ieee80211_radiotap_header is set, it means that
+the header for argument index 0 (IEEE80211_RADIOTAP_TSFT) is present in the
+argument area.
+
+::
+
+   < 8-byte ieee80211_radiotap_header >
+   [ <possible argument bitmap extensions ... > ]
+   [ <argument> ... ]
+
+At the moment there are only 13 possible argument indexes defined, but in case
+we run out of space in the u32 it_present member, it is defined that b31 set
+indicates that there is another u32 bitmap following (shown as "possible
+argument bitmap extensions..." above), and the start of the arguments is moved
+forward 4 bytes each time.
+
+Note also that the it_len member __le16 is set to the total number of bytes
+covered by the ieee80211_radiotap_header and any arguments following.
+
+
+Requirements for arguments
+--------------------------
+
+After the fixed part of the header, the arguments follow for each argument
+index whose matching bit is set in the it_present member of
+ieee80211_radiotap_header.
+
+ - the arguments are all stored little-endian!
+
+ - the argument payload for a given argument index has a fixed size.  So
+   IEEE80211_RADIOTAP_TSFT being present always indicates an 8-byte argument is
+   present.  See the comments in ./include/net/ieee80211_radiotap.h for a nice
+   breakdown of all the argument sizes
+
+ - the arguments must be aligned to a boundary of the argument size using
+   padding.  So a u16 argument must start on the next u16 boundary if it isn't
+   already on one, a u32 must start on the next u32 boundary and so on.
+
+ - "alignment" is relative to the start of the ieee80211_radiotap_header, ie,
+   the first byte of the radiotap header.  The absolute alignment of that first
+   byte isn't defined.  So even if the whole radiotap header is starting at, eg,
+   address 0x00000003, still the first byte of the radiotap header is treated as
+   0 for alignment purposes.
+
+ - the above point that there may be no absolute alignment for multibyte
+   entities in the fixed radiotap header or the argument region means that you
+   have to take special evasive action when trying to access these multibyte
+   entities.  Some arches like Blackfin cannot deal with an attempt to
+   dereference, eg, a u16 pointer that is pointing to an odd address.  Instead
+   you have to use a kernel API get_unaligned() to dereference the pointer,
+   which will do it bytewise on the arches that require that.
+
+ - The arguments for a given argument index can be a compound of multiple types
+   together.  For example IEEE80211_RADIOTAP_CHANNEL has an argument payload
+   consisting of two u16s of total length 4.  When this happens, the padding
+   rule is applied dealing with a u16, NOT dealing with a 4-byte single entity.
+
+
+Example valid radiotap header
+-----------------------------
+
+::
+
+	0x00, 0x00, // <-- radiotap version + pad byte
+	0x0b, 0x00, // <- radiotap header length
+	0x04, 0x0c, 0x00, 0x00, // <-- bitmap
+	0x6c, // <-- rate (in 500kHz units)
+	0x0c, //<-- tx power
+	0x01 //<-- antenna
+
+
+Using the Radiotap Parser
+-------------------------
+
+If you are having to parse a radiotap struct, you can radically simplify the
+job by using the radiotap parser that lives in net/wireless/radiotap.c and has
+its prototypes available in include/net/cfg80211.h.  You use it like this::
+
+    #include <net/cfg80211.h>
+
+    /* buf points to the start of the radiotap header part */
+
+    int MyFunction(u8 * buf, int buflen)
+    {
+	    int pkt_rate_100kHz = 0, antenna = 0, pwr = 0;
+	    struct ieee80211_radiotap_iterator iterator;
+	    int ret = ieee80211_radiotap_iterator_init(&iterator, buf, buflen);
+
+	    while (!ret) {
+
+		    ret = ieee80211_radiotap_iterator_next(&iterator);
+
+		    if (ret)
+			    continue;
+
+		    /* see if this argument is something we can use */
+
+		    switch (iterator.this_arg_index) {
+		    /*
+		    * You must take care when dereferencing iterator.this_arg
+		    * for multibyte types... the pointer is not aligned.  Use
+		    * get_unaligned((type *)iterator.this_arg) to dereference
+		    * iterator.this_arg for type "type" safely on all arches.
+		    */
+		    case IEEE80211_RADIOTAP_RATE:
+			    /* radiotap "rate" u8 is in
+			    * 500kbps units, eg, 0x02=1Mbps
+			    */
+			    pkt_rate_100kHz = (*iterator.this_arg) * 5;
+			    break;
+
+		    case IEEE80211_RADIOTAP_ANTENNA:
+			    /* radiotap uses 0 for 1st ant */
+			    antenna = *iterator.this_arg);
+			    break;
+
+		    case IEEE80211_RADIOTAP_DBM_TX_POWER:
+			    pwr = *iterator.this_arg;
+			    break;
+
+		    default:
+			    break;
+		    }
+	    }  /* while more rt headers */
+
+	    if (ret != -ENOENT)
+		    return TXRX_DROP;
+
+	    /* discard the radiotap header part */
+	    buf += iterator.max_length;
+	    buflen -= iterator.max_length;
+
+	    ...
+
+    }
+
+Andy Green <andy@warmcat.com>
diff --git a/Documentation/networking/radiotap-headers.txt b/Documentation/networking/radiotap-headers.txt
deleted file mode 100644
index 953331c7984f..000000000000
--- a/Documentation/networking/radiotap-headers.txt
+++ /dev/null
@@ -1,152 +0,0 @@
-How to use radiotap headers
-===========================
-
-Pointer to the radiotap include file
-------------------------------------
-
-Radiotap headers are variable-length and extensible, you can get most of the
-information you need to know on them from:
-
-./include/net/ieee80211_radiotap.h
-
-This document gives an overview and warns on some corner cases.
-
-
-Structure of the header
------------------------
-
-There is a fixed portion at the start which contains a u32 bitmap that defines
-if the possible argument associated with that bit is present or not.  So if b0
-of the it_present member of ieee80211_radiotap_header is set, it means that
-the header for argument index 0 (IEEE80211_RADIOTAP_TSFT) is present in the
-argument area.
-
-   < 8-byte ieee80211_radiotap_header >
-   [ <possible argument bitmap extensions ... > ]
-   [ <argument> ... ]
-
-At the moment there are only 13 possible argument indexes defined, but in case
-we run out of space in the u32 it_present member, it is defined that b31 set
-indicates that there is another u32 bitmap following (shown as "possible
-argument bitmap extensions..." above), and the start of the arguments is moved
-forward 4 bytes each time.
-
-Note also that the it_len member __le16 is set to the total number of bytes
-covered by the ieee80211_radiotap_header and any arguments following.
-
-
-Requirements for arguments
---------------------------
-
-After the fixed part of the header, the arguments follow for each argument
-index whose matching bit is set in the it_present member of
-ieee80211_radiotap_header.
-
- - the arguments are all stored little-endian!
-
- - the argument payload for a given argument index has a fixed size.  So
-   IEEE80211_RADIOTAP_TSFT being present always indicates an 8-byte argument is
-   present.  See the comments in ./include/net/ieee80211_radiotap.h for a nice
-   breakdown of all the argument sizes
-
- - the arguments must be aligned to a boundary of the argument size using
-   padding.  So a u16 argument must start on the next u16 boundary if it isn't
-   already on one, a u32 must start on the next u32 boundary and so on.
-
- - "alignment" is relative to the start of the ieee80211_radiotap_header, ie,
-   the first byte of the radiotap header.  The absolute alignment of that first
-   byte isn't defined.  So even if the whole radiotap header is starting at, eg,
-   address 0x00000003, still the first byte of the radiotap header is treated as
-   0 for alignment purposes.
-
- - the above point that there may be no absolute alignment for multibyte
-   entities in the fixed radiotap header or the argument region means that you
-   have to take special evasive action when trying to access these multibyte
-   entities.  Some arches like Blackfin cannot deal with an attempt to
-   dereference, eg, a u16 pointer that is pointing to an odd address.  Instead
-   you have to use a kernel API get_unaligned() to dereference the pointer,
-   which will do it bytewise on the arches that require that.
-
- - The arguments for a given argument index can be a compound of multiple types
-   together.  For example IEEE80211_RADIOTAP_CHANNEL has an argument payload
-   consisting of two u16s of total length 4.  When this happens, the padding
-   rule is applied dealing with a u16, NOT dealing with a 4-byte single entity.
-
-
-Example valid radiotap header
------------------------------
-
-	0x00, 0x00, // <-- radiotap version + pad byte
-	0x0b, 0x00, // <- radiotap header length
-	0x04, 0x0c, 0x00, 0x00, // <-- bitmap
-	0x6c, // <-- rate (in 500kHz units)
-	0x0c, //<-- tx power
-	0x01 //<-- antenna
-
-
-Using the Radiotap Parser
--------------------------
-
-If you are having to parse a radiotap struct, you can radically simplify the
-job by using the radiotap parser that lives in net/wireless/radiotap.c and has
-its prototypes available in include/net/cfg80211.h.  You use it like this:
-
-#include <net/cfg80211.h>
-
-/* buf points to the start of the radiotap header part */
-
-int MyFunction(u8 * buf, int buflen)
-{
-	int pkt_rate_100kHz = 0, antenna = 0, pwr = 0;
-	struct ieee80211_radiotap_iterator iterator;
-	int ret = ieee80211_radiotap_iterator_init(&iterator, buf, buflen);
-
-	while (!ret) {
-
-		ret = ieee80211_radiotap_iterator_next(&iterator);
-
-		if (ret)
-			continue;
-
-		/* see if this argument is something we can use */
-
-		switch (iterator.this_arg_index) {
-		/*
-		 * You must take care when dereferencing iterator.this_arg
-		 * for multibyte types... the pointer is not aligned.  Use
-		 * get_unaligned((type *)iterator.this_arg) to dereference
-		 * iterator.this_arg for type "type" safely on all arches.
-		 */
-		case IEEE80211_RADIOTAP_RATE:
-			/* radiotap "rate" u8 is in
-			 * 500kbps units, eg, 0x02=1Mbps
-			 */
-			pkt_rate_100kHz = (*iterator.this_arg) * 5;
-			break;
-
-		case IEEE80211_RADIOTAP_ANTENNA:
-			/* radiotap uses 0 for 1st ant */
-			antenna = *iterator.this_arg);
-			break;
-
-		case IEEE80211_RADIOTAP_DBM_TX_POWER:
-			pwr = *iterator.this_arg;
-			break;
-
-		default:
-			break;
-		}
-	}  /* while more rt headers */
-
-	if (ret != -ENOENT)
-		return TXRX_DROP;
-
-	/* discard the radiotap header part */
-	buf += iterator.max_length;
-	buflen -= iterator.max_length;
-
-	...
-
-}
-
-Andy Green <andy@warmcat.com>
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 70e48f66dac8..46ac80423b28 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -5211,7 +5211,7 @@ u32 ieee80211_mandatory_rates(struct ieee80211_supported_band *sband,
  * Radiotap parsing functions -- for controlled injection support
  *
  * Implemented in net/wireless/radiotap.c
- * Documentation in Documentation/networking/radiotap-headers.txt
+ * Documentation in Documentation/networking/radiotap-headers.rst
  */
 
 struct radiotap_align_size {
diff --git a/net/wireless/radiotap.c b/net/wireless/radiotap.c
index 6582d155e2fc..d5e28239e030 100644
--- a/net/wireless/radiotap.c
+++ b/net/wireless/radiotap.c
@@ -90,7 +90,7 @@ static const struct ieee80211_radiotap_namespace radiotap_ns = {
  * iterator.this_arg for type "type" safely on all arches.
  *
  * Example code:
- * See Documentation/networking/radiotap-headers.txt
+ * See Documentation/networking/radiotap-headers.rst
  */
 
 int ieee80211_radiotap_iterator_init(
-- 
cgit v1.2.3-59-g8ed1b


From 8c6e172002987202c17756e06e84c7e4d8916c44 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Thu, 30 Apr 2020 18:04:18 +0200
Subject: docs: networking: convert ray_cs.txt to ReST

- add SPDX header;
- use copyright symbol;
- add a document title;
- mark code blocks and literals as such;
- mark tables as such;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst  |   1 +
 Documentation/networking/ray_cs.rst | 165 ++++++++++++++++++++++++++++++++++++
 Documentation/networking/ray_cs.txt | 150 --------------------------------
 drivers/net/wireless/Kconfig        |   2 +-
 4 files changed, 167 insertions(+), 151 deletions(-)
 create mode 100644 Documentation/networking/ray_cs.rst
 delete mode 100644 Documentation/networking/ray_cs.txt

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 85bc52d0b3a6..b7e35b0d905c 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -96,6 +96,7 @@ Contents:
    ppp_generic
    proc_net_tcp
    radiotap-headers
+   ray_cs
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/ray_cs.rst b/Documentation/networking/ray_cs.rst
new file mode 100644
index 000000000000..9a46d1ae8f20
--- /dev/null
+++ b/Documentation/networking/ray_cs.rst
@@ -0,0 +1,165 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: <isonum.txt>
+
+=========================
+Raylink wireless LAN card
+=========================
+
+September 21, 1999
+
+Copyright |copy| 1998  Corey Thomas (corey@world.std.com)
+
+This file is the documentation for the Raylink Wireless LAN card driver for
+Linux.  The Raylink wireless LAN card is a PCMCIA card which provides IEEE
+802.11 compatible wireless network connectivity at 1 and 2 megabits/second.
+See http://www.raytheon.com/micro/raylink/ for more information on the Raylink
+card.  This driver is in early development and does have bugs.  See the known
+bugs and limitations at the end of this document for more information.
+This driver also works with WebGear's Aviator 2.4 and Aviator Pro
+wireless LAN cards.
+
+As of kernel 2.3.18, the ray_cs driver is part of the Linux kernel
+source.  My web page for the development of ray_cs is at
+http://web.ralinktech.com/ralink/Home/Support/Linux.html
+and I can be emailed at corey@world.std.com
+
+The kernel driver is based on ray_cs-1.62.tgz
+
+The driver at my web page is intended to be used as an add on to
+David Hinds pcmcia package.  All the command line parameters are
+available when compiled as a module.  When built into the kernel, only
+the essid= string parameter is available via the kernel command line.
+This will change after the method of sorting out parameters for all
+the PCMCIA drivers is agreed upon.  If you must have a built in driver
+with nondefault parameters, they can be edited in
+/usr/src/linux/drivers/net/pcmcia/ray_cs.c.  Searching for module_param
+will find them all.
+
+Information on card services is available at:
+
+	http://pcmcia-cs.sourceforge.net/
+
+
+Card services user programs are still required for PCMCIA devices.
+pcmcia-cs-3.1.1 or greater is required for the kernel version of
+the driver.
+
+Currently, ray_cs is not part of David Hinds card services package,
+so the following magic is required.
+
+At the end of the /etc/pcmcia/config.opts file, add the line:
+source ./ray_cs.opts
+This will make card services read the ray_cs.opts file
+when starting.  Create the file /etc/pcmcia/ray_cs.opts containing the
+following::
+
+  #### start of /etc/pcmcia/ray_cs.opts ###################
+  # Configuration options for Raylink Wireless LAN PCMCIA card
+  device "ray_cs"
+    class "network" module "misc/ray_cs"
+
+  card "RayLink PC Card WLAN Adapter"
+    manfid 0x01a6, 0x0000
+    bind "ray_cs"
+
+  module "misc/ray_cs" opts ""
+  #### end of /etc/pcmcia/ray_cs.opts #####################
+
+
+To join an existing network with
+different parameters, contact the network administrator for the
+configuration information, and edit /etc/pcmcia/ray_cs.opts.
+Add the parameters below between the empty quotes.
+
+Parameters for ray_cs driver which may be specified in ray_cs.opts:
+
+=============== =============== =============================================
+bc              integer         0 = normal mode (802.11 timing),
+				1 = slow down inter frame timing to allow
+				operation with older breezecom access
+				points.
+
+beacon_period	integer         beacon period in Kilo-microseconds,
+
+				legal values = must be integer multiple
+				of hop dwell
+
+				default = 256
+
+country         integer         1 = USA (default),
+				2 = Europe,
+				3 = Japan,
+				4 = Korea,
+				5 = Spain,
+				6 = France,
+				7 = Israel,
+				8 = Australia
+
+essid		string		ESS ID - network name to join
+
+				string with maximum length of 32 chars
+				default value = "ADHOC_ESSID"
+
+hop_dwell	integer         hop dwell time in Kilo-microseconds
+
+				legal values = 16,32,64,128(default),256
+
+irq_mask	integer         linux standard 16 bit value 1bit/IRQ
+
+				lsb is IRQ 0, bit 1 is IRQ 1 etc.
+				Used to restrict choice of IRQ's to use.
+				Recommended method for controlling
+				interrupts is in /etc/pcmcia/config.opts
+
+net_type	integer		0 (default) = adhoc network,
+				1 = infrastructure
+
+phy_addr	string          string containing new MAC address in
+				hex, must start with x eg
+				x00008f123456
+
+psm		integer         0 = continuously active,
+				1 = power save mode (not useful yet)
+
+pc_debug	integer		(0-5) larger values for more verbose
+				logging.  Replaces ray_debug.
+
+ray_debug	integer		Replaced with pc_debug
+
+ray_mem_speed   integer         defaults to 500
+
+sniffer         integer         0 = not sniffer (default),
+				1 = sniffer which can be used to record all
+				network traffic using tcpdump or similar,
+				but no normal network use is allowed.
+
+translate	integer		0 = no translation (encapsulate frames),
+				1 = translation    (RFC1042/802.1)
+=============== =============== =============================================
+
+More on sniffer mode:
+
+tcpdump does not understand 802.11 headers, so it can't
+interpret the contents, but it can record to a file.  This is only
+useful for debugging 802.11 lowlevel protocols that are not visible to
+linux.  If you want to watch ftp xfers, or do similar things, you
+don't need to use sniffer mode.  Also, some packet types are never
+sent up by the card, so you will never see them (ack, rts, cts, probe
+etc.)  There is a simple program (showcap) included in the ray_cs
+package which parses the 802.11 headers.
+
+Known Problems and missing features
+
+	Does not work with non x86
+
+	Does not work with SMP
+
+	Support for defragmenting frames is not yet debugged, and in
+	fact is known to not work.  I have never encountered a net set
+	up to fragment, but still, it should be fixed.
+
+	The ioctl support is incomplete.  The hardware address cannot be set
+	using ifconfig yet.  If a different hardware address is needed, it may
+	be set using the phy_addr parameter in ray_cs.opts.  This requires
+	a card insertion to take effect.
diff --git a/Documentation/networking/ray_cs.txt b/Documentation/networking/ray_cs.txt
deleted file mode 100644
index c0c12307ed9d..000000000000
--- a/Documentation/networking/ray_cs.txt
+++ /dev/null
@@ -1,150 +0,0 @@
-September 21, 1999
-
-Copyright (c) 1998  Corey Thomas (corey@world.std.com)
-
-This file is the documentation for the Raylink Wireless LAN card driver for
-Linux.  The Raylink wireless LAN card is a PCMCIA card which provides IEEE
-802.11 compatible wireless network connectivity at 1 and 2 megabits/second.
-See http://www.raytheon.com/micro/raylink/ for more information on the Raylink
-card.  This driver is in early development and does have bugs.  See the known
-bugs and limitations at the end of this document for more information.
-This driver also works with WebGear's Aviator 2.4 and Aviator Pro
-wireless LAN cards.
-
-As of kernel 2.3.18, the ray_cs driver is part of the Linux kernel
-source.  My web page for the development of ray_cs is at
-http://web.ralinktech.com/ralink/Home/Support/Linux.html 
-and I can be emailed at corey@world.std.com
-
-The kernel driver is based on ray_cs-1.62.tgz
-
-The driver at my web page is intended to be used as an add on to
-David Hinds pcmcia package.  All the command line parameters are
-available when compiled as a module.  When built into the kernel, only
-the essid= string parameter is available via the kernel command line.
-This will change after the method of sorting out parameters for all
-the PCMCIA drivers is agreed upon.  If you must have a built in driver
-with nondefault parameters, they can be edited in
-/usr/src/linux/drivers/net/pcmcia/ray_cs.c.  Searching for module_param
-will find them all.
-
-Information on card services is available at:
-	http://pcmcia-cs.sourceforge.net/
-
-
-Card services user programs are still required for PCMCIA devices.
-pcmcia-cs-3.1.1 or greater is required for the kernel version of
-the driver.
-
-Currently, ray_cs is not part of David Hinds card services package,
-so the following magic is required.
-
-At the end of the /etc/pcmcia/config.opts file, add the line: 
-source ./ray_cs.opts 
-This will make card services read the ray_cs.opts file
-when starting.  Create the file /etc/pcmcia/ray_cs.opts containing the
-following:
-
-#### start of /etc/pcmcia/ray_cs.opts ###################
-# Configuration options for Raylink Wireless LAN PCMCIA card
-device "ray_cs"
-  class "network" module "misc/ray_cs"
-
-card "RayLink PC Card WLAN Adapter"
-  manfid 0x01a6, 0x0000
-  bind "ray_cs"
-
-module "misc/ray_cs" opts ""
-#### end of /etc/pcmcia/ray_cs.opts #####################
-
-
-To join an existing network with
-different parameters, contact the network administrator for the 
-configuration information, and edit /etc/pcmcia/ray_cs.opts.
-Add the parameters below between the empty quotes.
-
-Parameters for ray_cs driver which may be specified in ray_cs.opts:
-
-bc              integer         0 = normal mode (802.11 timing)
-                                1 = slow down inter frame timing to allow
-                                    operation with older breezecom access
-                                    points.
-
-beacon_period	integer         beacon period in Kilo-microseconds
-				legal values = must be integer multiple 
-                                               of hop dwell
-                                default = 256
-
-country         integer         1 = USA (default)
-                                2 = Europe
-                                3 = Japan
-                                4 = Korea
-                                5 = Spain
-                                6 = France
-                                7 = Israel
-                                8 = Australia
-
-essid		string		ESS ID - network name to join
-				string with maximum length of 32 chars
-				default value = "ADHOC_ESSID"
-
-hop_dwell	integer         hop dwell time in Kilo-microseconds 
-				legal values = 16,32,64,128(default),256
-
-irq_mask	integer         linux standard 16 bit value 1bit/IRQ
-				lsb is IRQ 0, bit 1 is IRQ 1 etc.
-				Used to restrict choice of IRQ's to use.
-                                Recommended method for controlling
-                                interrupts is in /etc/pcmcia/config.opts
-
-net_type	integer		0 (default) = adhoc network, 
-				1 = infrastructure
-
-phy_addr	string          string containing new MAC address in
-				hex, must start with x eg
-				x00008f123456
-
-psm		integer         0 = continuously active
-				1 = power save mode (not useful yet)
-
-pc_debug	integer		(0-5) larger values for more verbose
-				logging.  Replaces ray_debug.
-
-ray_debug	integer		Replaced with pc_debug
-
-ray_mem_speed   integer         defaults to 500
-
-sniffer         integer         0 = not sniffer (default)
-                                1 = sniffer which can be used to record all
-                                    network traffic using tcpdump or similar, 
-                                    but no normal network use is allowed.
-
-translate	integer		0 = no translation (encapsulate frames)
-				1 = translation    (RFC1042/802.1)
-
-
-More on sniffer mode:
-
-tcpdump does not understand 802.11 headers, so it can't
-interpret the contents, but it can record to a file.  This is only
-useful for debugging 802.11 lowlevel protocols that are not visible to
-linux.  If you want to watch ftp xfers, or do similar things, you
-don't need to use sniffer mode.  Also, some packet types are never
-sent up by the card, so you will never see them (ack, rts, cts, probe
-etc.)  There is a simple program (showcap) included in the ray_cs
-package which parses the 802.11 headers.
-
-Known Problems and missing features
-
-        Does not work with non x86
-
-	Does not work with SMP
-
-	Support for defragmenting frames is not yet debugged, and in
-	fact is known to not work.  I have never encountered a net set
-	up to fragment, but still, it should be fixed.
-
-	The ioctl support is incomplete.  The hardware address cannot be set
-	using ifconfig yet.  If a different hardware address is needed, it may
-	be set using the phy_addr parameter in ray_cs.opts.  This requires
-	a card insertion to take effect.
diff --git a/drivers/net/wireless/Kconfig b/drivers/net/wireless/Kconfig
index 1c98d781ae49..15b0ad171f4c 100644
--- a/drivers/net/wireless/Kconfig
+++ b/drivers/net/wireless/Kconfig
@@ -57,7 +57,7 @@ config PCMCIA_RAYCS
 	---help---
 	  Say Y here if you intend to attach an Aviator/Raytheon PCMCIA
 	  (PC-card) wireless Ethernet networking card to your computer.
-	  Please read the file <file:Documentation/networking/ray_cs.txt> for
+	  Please read the file <file:Documentation/networking/ray_cs.rst> for
 	  details.
 
 	  To compile this driver as a module, choose M here: the module will be
-- 
cgit v1.2.3-59-g8ed1b


From bad5b6e223e8409c860c0574d5239ee4348f06b3 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Thu, 30 Apr 2020 18:04:19 +0200
Subject: docs: networking: convert rds.txt to ReST

- add SPDX header;
- add a document title;
- mark code blocks and literals as such;
- mark tables as such;
- mark lists as such;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst |   1 +
 Documentation/networking/rds.rst   | 448 +++++++++++++++++++++++++++++++++++++
 Documentation/networking/rds.txt   | 423 ----------------------------------
 MAINTAINERS                        |   2 +-
 4 files changed, 450 insertions(+), 424 deletions(-)
 create mode 100644 Documentation/networking/rds.rst
 delete mode 100644 Documentation/networking/rds.txt

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index b7e35b0d905c..e63a2cb2e4cb 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -97,6 +97,7 @@ Contents:
    proc_net_tcp
    radiotap-headers
    ray_cs
+   rds
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/rds.rst b/Documentation/networking/rds.rst
new file mode 100644
index 000000000000..44936c27ab3a
--- /dev/null
+++ b/Documentation/networking/rds.rst
@@ -0,0 +1,448 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==
+RDS
+===
+
+Overview
+========
+
+This readme tries to provide some background on the hows and whys of RDS,
+and will hopefully help you find your way around the code.
+
+In addition, please see this email about RDS origins:
+http://oss.oracle.com/pipermail/rds-devel/2007-November/000228.html
+
+RDS Architecture
+================
+
+RDS provides reliable, ordered datagram delivery by using a single
+reliable connection between any two nodes in the cluster. This allows
+applications to use a single socket to talk to any other process in the
+cluster - so in a cluster with N processes you need N sockets, in contrast
+to N*N if you use a connection-oriented socket transport like TCP.
+
+RDS is not Infiniband-specific; it was designed to support different
+transports.  The current implementation used to support RDS over TCP as well
+as IB.
+
+The high-level semantics of RDS from the application's point of view are
+
+ *	Addressing
+
+	RDS uses IPv4 addresses and 16bit port numbers to identify
+	the end point of a connection. All socket operations that involve
+	passing addresses between kernel and user space generally
+	use a struct sockaddr_in.
+
+	The fact that IPv4 addresses are used does not mean the underlying
+	transport has to be IP-based. In fact, RDS over IB uses a
+	reliable IB connection; the IP address is used exclusively to
+	locate the remote node's GID (by ARPing for the given IP).
+
+	The port space is entirely independent of UDP, TCP or any other
+	protocol.
+
+ *	Socket interface
+
+	RDS sockets work *mostly* as you would expect from a BSD
+	socket. The next section will cover the details. At any rate,
+	all I/O is performed through the standard BSD socket API.
+	Some additions like zerocopy support are implemented through
+	control messages, while other extensions use the getsockopt/
+	setsockopt calls.
+
+	Sockets must be bound before you can send or receive data.
+	This is needed because binding also selects a transport and
+	attaches it to the socket. Once bound, the transport assignment
+	does not change. RDS will tolerate IPs moving around (eg in
+	a active-active HA scenario), but only as long as the address
+	doesn't move to a different transport.
+
+ *	sysctls
+
+	RDS supports a number of sysctls in /proc/sys/net/rds
+
+
+Socket Interface
+================
+
+  AF_RDS, PF_RDS, SOL_RDS
+	AF_RDS and PF_RDS are the domain type to be used with socket(2)
+	to create RDS sockets. SOL_RDS is the socket-level to be used
+	with setsockopt(2) and getsockopt(2) for RDS specific socket
+	options.
+
+  fd = socket(PF_RDS, SOCK_SEQPACKET, 0);
+	This creates a new, unbound RDS socket.
+
+  setsockopt(SOL_SOCKET): send and receive buffer size
+	RDS honors the send and receive buffer size socket options.
+	You are not allowed to queue more than SO_SNDSIZE bytes to
+	a socket. A message is queued when sendmsg is called, and
+	it leaves the queue when the remote system acknowledges
+	its arrival.
+
+	The SO_RCVSIZE option controls the maximum receive queue length.
+	This is a soft limit rather than a hard limit - RDS will
+	continue to accept and queue incoming messages, even if that
+	takes the queue length over the limit. However, it will also
+	mark the port as "congested" and send a congestion update to
+	the source node. The source node is supposed to throttle any
+	processes sending to this congested port.
+
+  bind(fd, &sockaddr_in, ...)
+	This binds the socket to a local IP address and port, and a
+	transport, if one has not already been selected via the
+	SO_RDS_TRANSPORT socket option
+
+  sendmsg(fd, ...)
+	Sends a message to the indicated recipient. The kernel will
+	transparently establish the underlying reliable connection
+	if it isn't up yet.
+
+	An attempt to send a message that exceeds SO_SNDSIZE will
+	return with -EMSGSIZE
+
+	An attempt to send a message that would take the total number
+	of queued bytes over the SO_SNDSIZE threshold will return
+	EAGAIN.
+
+	An attempt to send a message to a destination that is marked
+	as "congested" will return ENOBUFS.
+
+  recvmsg(fd, ...)
+	Receives a message that was queued to this socket. The sockets
+	recv queue accounting is adjusted, and if the queue length
+	drops below SO_SNDSIZE, the port is marked uncongested, and
+	a congestion update is sent to all peers.
+
+	Applications can ask the RDS kernel module to receive
+	notifications via control messages (for instance, there is a
+	notification when a congestion update arrived, or when a RDMA
+	operation completes). These notifications are received through
+	the msg.msg_control buffer of struct msghdr. The format of the
+	messages is described in manpages.
+
+  poll(fd)
+	RDS supports the poll interface to allow the application
+	to implement async I/O.
+
+	POLLIN handling is pretty straightforward. When there's an
+	incoming message queued to the socket, or a pending notification,
+	we signal POLLIN.
+
+	POLLOUT is a little harder. Since you can essentially send
+	to any destination, RDS will always signal POLLOUT as long as
+	there's room on the send queue (ie the number of bytes queued
+	is less than the sendbuf size).
+
+	However, the kernel will refuse to accept messages to
+	a destination marked congested - in this case you will loop
+	forever if you rely on poll to tell you what to do.
+	This isn't a trivial problem, but applications can deal with
+	this - by using congestion notifications, and by checking for
+	ENOBUFS errors returned by sendmsg.
+
+  setsockopt(SOL_RDS, RDS_CANCEL_SENT_TO, &sockaddr_in)
+	This allows the application to discard all messages queued to a
+	specific destination on this particular socket.
+
+	This allows the application to cancel outstanding messages if
+	it detects a timeout. For instance, if it tried to send a message,
+	and the remote host is unreachable, RDS will keep trying forever.
+	The application may decide it's not worth it, and cancel the
+	operation. In this case, it would use RDS_CANCEL_SENT_TO to
+	nuke any pending messages.
+
+  ``setsockopt(fd, SOL_RDS, SO_RDS_TRANSPORT, (int *)&transport ..), getsockopt(fd, SOL_RDS, SO_RDS_TRANSPORT, (int *)&transport ..)``
+	Set or read an integer defining  the underlying
+	encapsulating transport to be used for RDS packets on the
+	socket. When setting the option, integer argument may be
+	one of RDS_TRANS_TCP or RDS_TRANS_IB. When retrieving the
+	value, RDS_TRANS_NONE will be returned on an unbound socket.
+	This socket option may only be set exactly once on the socket,
+	prior to binding it via the bind(2) system call. Attempts to
+	set SO_RDS_TRANSPORT on a socket for which the transport has
+	been previously attached explicitly (by SO_RDS_TRANSPORT) or
+	implicitly (via bind(2)) will return an error of EOPNOTSUPP.
+	An attempt to set SO_RDS_TRANSPORT to RDS_TRANS_NONE will
+	always return EINVAL.
+
+RDMA for RDS
+============
+
+  see rds-rdma(7) manpage (available in rds-tools)
+
+
+Congestion Notifications
+========================
+
+  see rds(7) manpage
+
+
+RDS Protocol
+============
+
+  Message header
+
+    The message header is a 'struct rds_header' (see rds.h):
+
+    Fields:
+
+      h_sequence:
+	  per-packet sequence number
+      h_ack:
+	  piggybacked acknowledgment of last packet received
+      h_len:
+	  length of data, not including header
+      h_sport:
+	  source port
+      h_dport:
+	  destination port
+      h_flags:
+	  Can be:
+
+	  =============  ==================================
+	  CONG_BITMAP    this is a congestion update bitmap
+	  ACK_REQUIRED   receiver must ack this packet
+	  RETRANSMITTED  packet has previously been sent
+	  =============  ==================================
+
+      h_credit:
+	  indicate to other end of connection that
+	  it has more credits available (i.e. there is
+	  more send room)
+      h_padding[4]:
+	  unused, for future use
+      h_csum:
+	  header checksum
+      h_exthdr:
+	  optional data can be passed here. This is currently used for
+	  passing RDMA-related information.
+
+  ACK and retransmit handling
+
+      One might think that with reliable IB connections you wouldn't need
+      to ack messages that have been received.  The problem is that IB
+      hardware generates an ack message before it has DMAed the message
+      into memory.  This creates a potential message loss if the HCA is
+      disabled for any reason between when it sends the ack and before
+      the message is DMAed and processed.  This is only a potential issue
+      if another HCA is available for fail-over.
+
+      Sending an ack immediately would allow the sender to free the sent
+      message from their send queue quickly, but could cause excessive
+      traffic to be used for acks. RDS piggybacks acks on sent data
+      packets.  Ack-only packets are reduced by only allowing one to be
+      in flight at a time, and by the sender only asking for acks when
+      its send buffers start to fill up. All retransmissions are also
+      acked.
+
+  Flow Control
+
+      RDS's IB transport uses a credit-based mechanism to verify that
+      there is space in the peer's receive buffers for more data. This
+      eliminates the need for hardware retries on the connection.
+
+  Congestion
+
+      Messages waiting in the receive queue on the receiving socket
+      are accounted against the sockets SO_RCVBUF option value.  Only
+      the payload bytes in the message are accounted for.  If the
+      number of bytes queued equals or exceeds rcvbuf then the socket
+      is congested.  All sends attempted to this socket's address
+      should return block or return -EWOULDBLOCK.
+
+      Applications are expected to be reasonably tuned such that this
+      situation very rarely occurs.  An application encountering this
+      "back-pressure" is considered a bug.
+
+      This is implemented by having each node maintain bitmaps which
+      indicate which ports on bound addresses are congested.  As the
+      bitmap changes it is sent through all the connections which
+      terminate in the local address of the bitmap which changed.
+
+      The bitmaps are allocated as connections are brought up.  This
+      avoids allocation in the interrupt handling path which queues
+      sages on sockets.  The dense bitmaps let transports send the
+      entire bitmap on any bitmap change reasonably efficiently.  This
+      is much easier to implement than some finer-grained
+      communication of per-port congestion.  The sender does a very
+      inexpensive bit test to test if the port it's about to send to
+      is congested or not.
+
+
+RDS Transport Layer
+===================
+
+  As mentioned above, RDS is not IB-specific. Its code is divided
+  into a general RDS layer and a transport layer.
+
+  The general layer handles the socket API, congestion handling,
+  loopback, stats, usermem pinning, and the connection state machine.
+
+  The transport layer handles the details of the transport. The IB
+  transport, for example, handles all the queue pairs, work requests,
+  CM event handlers, and other Infiniband details.
+
+
+RDS Kernel Structures
+=====================
+
+  struct rds_message
+    aka possibly "rds_outgoing", the generic RDS layer copies data to
+    be sent and sets header fields as needed, based on the socket API.
+    This is then queued for the individual connection and sent by the
+    connection's transport.
+
+  struct rds_incoming
+    a generic struct referring to incoming data that can be handed from
+    the transport to the general code and queued by the general code
+    while the socket is awoken. It is then passed back to the transport
+    code to handle the actual copy-to-user.
+
+  struct rds_socket
+    per-socket information
+
+  struct rds_connection
+    per-connection information
+
+  struct rds_transport
+    pointers to transport-specific functions
+
+  struct rds_statistics
+    non-transport-specific statistics
+
+  struct rds_cong_map
+    wraps the raw congestion bitmap, contains rbnode, waitq, etc.
+
+Connection management
+=====================
+
+  Connections may be in UP, DOWN, CONNECTING, DISCONNECTING, and
+  ERROR states.
+
+  The first time an attempt is made by an RDS socket to send data to
+  a node, a connection is allocated and connected. That connection is
+  then maintained forever -- if there are transport errors, the
+  connection will be dropped and re-established.
+
+  Dropping a connection while packets are queued will cause queued or
+  partially-sent datagrams to be retransmitted when the connection is
+  re-established.
+
+
+The send path
+=============
+
+  rds_sendmsg()
+    - struct rds_message built from incoming data
+    - CMSGs parsed (e.g. RDMA ops)
+    - transport connection alloced and connected if not already
+    - rds_message placed on send queue
+    - send worker awoken
+
+  rds_send_worker()
+    - calls rds_send_xmit() until queue is empty
+
+  rds_send_xmit()
+    - transmits congestion map if one is pending
+    - may set ACK_REQUIRED
+    - calls transport to send either non-RDMA or RDMA message
+      (RDMA ops never retransmitted)
+
+  rds_ib_xmit()
+    - allocs work requests from send ring
+    - adds any new send credits available to peer (h_credits)
+    - maps the rds_message's sg list
+    - piggybacks ack
+    - populates work requests
+    - post send to connection's queue pair
+
+The recv path
+=============
+
+  rds_ib_recv_cq_comp_handler()
+    - looks at write completions
+    - unmaps recv buffer from device
+    - no errors, call rds_ib_process_recv()
+    - refill recv ring
+
+  rds_ib_process_recv()
+    - validate header checksum
+    - copy header to rds_ib_incoming struct if start of a new datagram
+    - add to ibinc's fraglist
+    - if competed datagram:
+	 - update cong map if datagram was cong update
+	 - call rds_recv_incoming() otherwise
+	 - note if ack is required
+
+  rds_recv_incoming()
+    - drop duplicate packets
+    - respond to pings
+    - find the sock associated with this datagram
+    - add to sock queue
+    - wake up sock
+    - do some congestion calculations
+  rds_recvmsg
+    - copy data into user iovec
+    - handle CMSGs
+    - return to application
+
+Multipath RDS (mprds)
+=====================
+  Mprds is multipathed-RDS, primarily intended for RDS-over-TCP
+  (though the concept can be extended to other transports). The classical
+  implementation of RDS-over-TCP is implemented by demultiplexing multiple
+  PF_RDS sockets between any 2 endpoints (where endpoint == [IP address,
+  port]) over a single TCP socket between the 2 IP addresses involved. This
+  has the limitation that it ends up funneling multiple RDS flows over a
+  single TCP flow, thus it is
+  (a) upper-bounded to the single-flow bandwidth,
+  (b) suffers from head-of-line blocking for all the RDS sockets.
+
+  Better throughput (for a fixed small packet size, MTU) can be achieved
+  by having multiple TCP/IP flows per rds/tcp connection, i.e., multipathed
+  RDS (mprds).  Each such TCP/IP flow constitutes a path for the rds/tcp
+  connection. RDS sockets will be attached to a path based on some hash
+  (e.g., of local address and RDS port number) and packets for that RDS
+  socket will be sent over the attached path using TCP to segment/reassemble
+  RDS datagrams on that path.
+
+  Multipathed RDS is implemented by splitting the struct rds_connection into
+  a common (to all paths) part, and a per-path struct rds_conn_path. All
+  I/O workqs and reconnect threads are driven from the rds_conn_path.
+  Transports such as TCP that are multipath capable may then set up a
+  TCP socket per rds_conn_path, and this is managed by the transport via
+  the transport privatee cp_transport_data pointer.
+
+  Transports announce themselves as multipath capable by setting the
+  t_mp_capable bit during registration with the rds core module. When the
+  transport is multipath-capable, rds_sendmsg() hashes outgoing traffic
+  across multiple paths. The outgoing hash is computed based on the
+  local address and port that the PF_RDS socket is bound to.
+
+  Additionally, even if the transport is MP capable, we may be
+  peering with some node that does not support mprds, or supports
+  a different number of paths. As a result, the peering nodes need
+  to agree on the number of paths to be used for the connection.
+  This is done by sending out a control packet exchange before the
+  first data packet. The control packet exchange must have completed
+  prior to outgoing hash completion in rds_sendmsg() when the transport
+  is mutlipath capable.
+
+  The control packet is an RDS ping packet (i.e., packet to rds dest
+  port 0) with the ping packet having a rds extension header option  of
+  type RDS_EXTHDR_NPATHS, length 2 bytes, and the value is the
+  number of paths supported by the sender. The "probe" ping packet will
+  get sent from some reserved port, RDS_FLAG_PROBE_PORT (in <linux/rds.h>)
+  The receiver of a ping from RDS_FLAG_PROBE_PORT will thus immediately
+  be able to compute the min(sender_paths, rcvr_paths). The pong
+  sent in response to a probe-ping should contain the rcvr's npaths
+  when the rcvr is mprds-capable.
+
+  If the rcvr is not mprds-capable, the exthdr in the ping will be
+  ignored.  In this case the pong will not have any exthdrs, so the sender
+  of the probe-ping can default to single-path mprds.
+
diff --git a/Documentation/networking/rds.txt b/Documentation/networking/rds.txt
deleted file mode 100644
index eec61694e894..000000000000
--- a/Documentation/networking/rds.txt
+++ /dev/null
@@ -1,423 +0,0 @@
-
-Overview
-========
-
-This readme tries to provide some background on the hows and whys of RDS,
-and will hopefully help you find your way around the code.
-
-In addition, please see this email about RDS origins:
-http://oss.oracle.com/pipermail/rds-devel/2007-November/000228.html
-
-RDS Architecture
-================
-
-RDS provides reliable, ordered datagram delivery by using a single
-reliable connection between any two nodes in the cluster. This allows
-applications to use a single socket to talk to any other process in the
-cluster - so in a cluster with N processes you need N sockets, in contrast
-to N*N if you use a connection-oriented socket transport like TCP.
-
-RDS is not Infiniband-specific; it was designed to support different
-transports.  The current implementation used to support RDS over TCP as well
-as IB.
-
-The high-level semantics of RDS from the application's point of view are
-
- *	Addressing
-        RDS uses IPv4 addresses and 16bit port numbers to identify
-        the end point of a connection. All socket operations that involve
-        passing addresses between kernel and user space generally
-        use a struct sockaddr_in.
-
-        The fact that IPv4 addresses are used does not mean the underlying
-        transport has to be IP-based. In fact, RDS over IB uses a
-        reliable IB connection; the IP address is used exclusively to
-        locate the remote node's GID (by ARPing for the given IP).
-
-        The port space is entirely independent of UDP, TCP or any other
-        protocol.
-
- *	Socket interface
-        RDS sockets work *mostly* as you would expect from a BSD
-        socket. The next section will cover the details. At any rate,
-        all I/O is performed through the standard BSD socket API.
-        Some additions like zerocopy support are implemented through
-        control messages, while other extensions use the getsockopt/
-        setsockopt calls.
-
-        Sockets must be bound before you can send or receive data.
-        This is needed because binding also selects a transport and
-        attaches it to the socket. Once bound, the transport assignment
-        does not change. RDS will tolerate IPs moving around (eg in
-        a active-active HA scenario), but only as long as the address
-        doesn't move to a different transport.
-
- *	sysctls
-        RDS supports a number of sysctls in /proc/sys/net/rds
-
-
-Socket Interface
-================
-
-  AF_RDS, PF_RDS, SOL_RDS
-	AF_RDS and PF_RDS are the domain type to be used with socket(2)
-	to create RDS sockets. SOL_RDS is the socket-level to be used
-	with setsockopt(2) and getsockopt(2) for RDS specific socket
-	options.
-
-  fd = socket(PF_RDS, SOCK_SEQPACKET, 0);
-        This creates a new, unbound RDS socket.
-
-  setsockopt(SOL_SOCKET): send and receive buffer size
-        RDS honors the send and receive buffer size socket options.
-        You are not allowed to queue more than SO_SNDSIZE bytes to
-        a socket. A message is queued when sendmsg is called, and
-        it leaves the queue when the remote system acknowledges
-        its arrival.
-
-        The SO_RCVSIZE option controls the maximum receive queue length.
-        This is a soft limit rather than a hard limit - RDS will
-        continue to accept and queue incoming messages, even if that
-        takes the queue length over the limit. However, it will also
-        mark the port as "congested" and send a congestion update to
-        the source node. The source node is supposed to throttle any
-        processes sending to this congested port.
-
-  bind(fd, &sockaddr_in, ...)
-        This binds the socket to a local IP address and port, and a
-        transport, if one has not already been selected via the
-	SO_RDS_TRANSPORT socket option
-
-  sendmsg(fd, ...)
-        Sends a message to the indicated recipient. The kernel will
-        transparently establish the underlying reliable connection
-        if it isn't up yet.
-
-        An attempt to send a message that exceeds SO_SNDSIZE will
-        return with -EMSGSIZE
-
-        An attempt to send a message that would take the total number
-        of queued bytes over the SO_SNDSIZE threshold will return
-        EAGAIN.
-
-        An attempt to send a message to a destination that is marked
-        as "congested" will return ENOBUFS.
-
-  recvmsg(fd, ...)
-        Receives a message that was queued to this socket. The sockets
-        recv queue accounting is adjusted, and if the queue length
-        drops below SO_SNDSIZE, the port is marked uncongested, and
-        a congestion update is sent to all peers.
-
-        Applications can ask the RDS kernel module to receive
-        notifications via control messages (for instance, there is a
-        notification when a congestion update arrived, or when a RDMA
-        operation completes). These notifications are received through
-        the msg.msg_control buffer of struct msghdr. The format of the
-        messages is described in manpages.
-
-  poll(fd)
-        RDS supports the poll interface to allow the application
-        to implement async I/O.
-
-        POLLIN handling is pretty straightforward. When there's an
-        incoming message queued to the socket, or a pending notification,
-        we signal POLLIN.
-
-        POLLOUT is a little harder. Since you can essentially send
-        to any destination, RDS will always signal POLLOUT as long as
-        there's room on the send queue (ie the number of bytes queued
-        is less than the sendbuf size).
-
-        However, the kernel will refuse to accept messages to
-        a destination marked congested - in this case you will loop
-        forever if you rely on poll to tell you what to do.
-        This isn't a trivial problem, but applications can deal with
-        this - by using congestion notifications, and by checking for
-        ENOBUFS errors returned by sendmsg.
-
-  setsockopt(SOL_RDS, RDS_CANCEL_SENT_TO, &sockaddr_in)
-        This allows the application to discard all messages queued to a
-        specific destination on this particular socket.
-
-        This allows the application to cancel outstanding messages if
-        it detects a timeout. For instance, if it tried to send a message,
-        and the remote host is unreachable, RDS will keep trying forever.
-        The application may decide it's not worth it, and cancel the
-        operation. In this case, it would use RDS_CANCEL_SENT_TO to
-        nuke any pending messages.
-
-  setsockopt(fd, SOL_RDS, SO_RDS_TRANSPORT, (int *)&transport ..)
-  getsockopt(fd, SOL_RDS, SO_RDS_TRANSPORT, (int *)&transport ..)
-	Set or read an integer defining  the underlying
-	encapsulating transport to be used for RDS packets on the
-	socket. When setting the option, integer argument may be
-	one of RDS_TRANS_TCP or RDS_TRANS_IB. When retrieving the
-	value, RDS_TRANS_NONE will be returned on an unbound socket.
-	This socket option may only be set exactly once on the socket,
-	prior to binding it via the bind(2) system call. Attempts to
-	set SO_RDS_TRANSPORT on a socket for which the transport has
-	been previously attached explicitly (by SO_RDS_TRANSPORT) or
-	implicitly (via bind(2)) will return an error of EOPNOTSUPP.
-	An attempt to set SO_RDS_TRANSPORT to RDS_TRANS_NONE will
-	always return EINVAL.
-
-RDMA for RDS
-============
-
-  see rds-rdma(7) manpage (available in rds-tools)
-
-
-Congestion Notifications
-========================
-
-  see rds(7) manpage
-
-
-RDS Protocol
-============
-
-  Message header
-
-    The message header is a 'struct rds_header' (see rds.h):
-    Fields:
-      h_sequence:
-          per-packet sequence number
-      h_ack:
-          piggybacked acknowledgment of last packet received
-      h_len:
-          length of data, not including header
-      h_sport:
-          source port
-      h_dport:
-          destination port
-      h_flags:
-          CONG_BITMAP - this is a congestion update bitmap
-          ACK_REQUIRED - receiver must ack this packet
-          RETRANSMITTED - packet has previously been sent
-      h_credit:
-          indicate to other end of connection that
-          it has more credits available (i.e. there is
-          more send room)
-      h_padding[4]:
-          unused, for future use
-      h_csum:
-          header checksum
-      h_exthdr:
-          optional data can be passed here. This is currently used for
-          passing RDMA-related information.
-
-  ACK and retransmit handling
-
-      One might think that with reliable IB connections you wouldn't need
-      to ack messages that have been received.  The problem is that IB
-      hardware generates an ack message before it has DMAed the message
-      into memory.  This creates a potential message loss if the HCA is
-      disabled for any reason between when it sends the ack and before
-      the message is DMAed and processed.  This is only a potential issue
-      if another HCA is available for fail-over.
-
-      Sending an ack immediately would allow the sender to free the sent
-      message from their send queue quickly, but could cause excessive
-      traffic to be used for acks. RDS piggybacks acks on sent data
-      packets.  Ack-only packets are reduced by only allowing one to be
-      in flight at a time, and by the sender only asking for acks when
-      its send buffers start to fill up. All retransmissions are also
-      acked.
-
-  Flow Control
-
-      RDS's IB transport uses a credit-based mechanism to verify that
-      there is space in the peer's receive buffers for more data. This
-      eliminates the need for hardware retries on the connection.
-
-  Congestion
-
-      Messages waiting in the receive queue on the receiving socket
-      are accounted against the sockets SO_RCVBUF option value.  Only
-      the payload bytes in the message are accounted for.  If the
-      number of bytes queued equals or exceeds rcvbuf then the socket
-      is congested.  All sends attempted to this socket's address
-      should return block or return -EWOULDBLOCK.
-
-      Applications are expected to be reasonably tuned such that this
-      situation very rarely occurs.  An application encountering this
-      "back-pressure" is considered a bug.
-
-      This is implemented by having each node maintain bitmaps which
-      indicate which ports on bound addresses are congested.  As the
-      bitmap changes it is sent through all the connections which
-      terminate in the local address of the bitmap which changed.
-
-      The bitmaps are allocated as connections are brought up.  This
-      avoids allocation in the interrupt handling path which queues
-      sages on sockets.  The dense bitmaps let transports send the
-      entire bitmap on any bitmap change reasonably efficiently.  This
-      is much easier to implement than some finer-grained
-      communication of per-port congestion.  The sender does a very
-      inexpensive bit test to test if the port it's about to send to
-      is congested or not.
-
-
-RDS Transport Layer
-==================
-
-  As mentioned above, RDS is not IB-specific. Its code is divided
-  into a general RDS layer and a transport layer.
-
-  The general layer handles the socket API, congestion handling,
-  loopback, stats, usermem pinning, and the connection state machine.
-
-  The transport layer handles the details of the transport. The IB
-  transport, for example, handles all the queue pairs, work requests,
-  CM event handlers, and other Infiniband details.
-
-
-RDS Kernel Structures
-=====================
-
-  struct rds_message
-    aka possibly "rds_outgoing", the generic RDS layer copies data to
-    be sent and sets header fields as needed, based on the socket API.
-    This is then queued for the individual connection and sent by the
-    connection's transport.
-  struct rds_incoming
-    a generic struct referring to incoming data that can be handed from
-    the transport to the general code and queued by the general code
-    while the socket is awoken. It is then passed back to the transport
-    code to handle the actual copy-to-user.
-  struct rds_socket
-    per-socket information
-  struct rds_connection
-    per-connection information
-  struct rds_transport
-    pointers to transport-specific functions
-  struct rds_statistics
-    non-transport-specific statistics
-  struct rds_cong_map
-    wraps the raw congestion bitmap, contains rbnode, waitq, etc.
-
-Connection management
-=====================
-
-  Connections may be in UP, DOWN, CONNECTING, DISCONNECTING, and
-  ERROR states.
-
-  The first time an attempt is made by an RDS socket to send data to
-  a node, a connection is allocated and connected. That connection is
-  then maintained forever -- if there are transport errors, the
-  connection will be dropped and re-established.
-
-  Dropping a connection while packets are queued will cause queued or
-  partially-sent datagrams to be retransmitted when the connection is
-  re-established.
-
-
-The send path
-=============
-
-  rds_sendmsg()
-    struct rds_message built from incoming data
-    CMSGs parsed (e.g. RDMA ops)
-    transport connection alloced and connected if not already
-    rds_message placed on send queue
-    send worker awoken
-  rds_send_worker()
-    calls rds_send_xmit() until queue is empty
-  rds_send_xmit()
-    transmits congestion map if one is pending
-    may set ACK_REQUIRED
-    calls transport to send either non-RDMA or RDMA message
-    (RDMA ops never retransmitted)
-  rds_ib_xmit()
-    allocs work requests from send ring
-    adds any new send credits available to peer (h_credits)
-    maps the rds_message's sg list
-    piggybacks ack
-    populates work requests
-    post send to connection's queue pair
-
-The recv path
-=============
-
-  rds_ib_recv_cq_comp_handler()
-    looks at write completions
-    unmaps recv buffer from device
-    no errors, call rds_ib_process_recv()
-    refill recv ring
-  rds_ib_process_recv()
-    validate header checksum
-    copy header to rds_ib_incoming struct if start of a new datagram
-    add to ibinc's fraglist
-    if competed datagram:
-      update cong map if datagram was cong update
-      call rds_recv_incoming() otherwise
-      note if ack is required
-  rds_recv_incoming()
-    drop duplicate packets
-    respond to pings
-    find the sock associated with this datagram
-    add to sock queue
-    wake up sock
-    do some congestion calculations
-  rds_recvmsg
-    copy data into user iovec
-    handle CMSGs
-    return to application
-
-Multipath RDS (mprds)
-=====================
-  Mprds is multipathed-RDS, primarily intended for RDS-over-TCP
-  (though the concept can be extended to other transports). The classical
-  implementation of RDS-over-TCP is implemented by demultiplexing multiple
-  PF_RDS sockets between any 2 endpoints (where endpoint == [IP address,
-  port]) over a single TCP socket between the 2 IP addresses involved. This
-  has the limitation that it ends up funneling multiple RDS flows over a
-  single TCP flow, thus it is
-  (a) upper-bounded to the single-flow bandwidth,
-  (b) suffers from head-of-line blocking for all the RDS sockets.
-
-  Better throughput (for a fixed small packet size, MTU) can be achieved
-  by having multiple TCP/IP flows per rds/tcp connection, i.e., multipathed
-  RDS (mprds).  Each such TCP/IP flow constitutes a path for the rds/tcp
-  connection. RDS sockets will be attached to a path based on some hash
-  (e.g., of local address and RDS port number) and packets for that RDS
-  socket will be sent over the attached path using TCP to segment/reassemble
-  RDS datagrams on that path.
-
-  Multipathed RDS is implemented by splitting the struct rds_connection into
-  a common (to all paths) part, and a per-path struct rds_conn_path. All
-  I/O workqs and reconnect threads are driven from the rds_conn_path.
-  Transports such as TCP that are multipath capable may then set up a
-  TCP socket per rds_conn_path, and this is managed by the transport via
-  the transport privatee cp_transport_data pointer.
-
-  Transports announce themselves as multipath capable by setting the
-  t_mp_capable bit during registration with the rds core module. When the
-  transport is multipath-capable, rds_sendmsg() hashes outgoing traffic
-  across multiple paths. The outgoing hash is computed based on the
-  local address and port that the PF_RDS socket is bound to.
-
-  Additionally, even if the transport is MP capable, we may be
-  peering with some node that does not support mprds, or supports
-  a different number of paths. As a result, the peering nodes need
-  to agree on the number of paths to be used for the connection.
-  This is done by sending out a control packet exchange before the
-  first data packet. The control packet exchange must have completed
-  prior to outgoing hash completion in rds_sendmsg() when the transport
-  is mutlipath capable.
-
-  The control packet is an RDS ping packet (i.e., packet to rds dest
-  port 0) with the ping packet having a rds extension header option  of
-  type RDS_EXTHDR_NPATHS, length 2 bytes, and the value is the
-  number of paths supported by the sender. The "probe" ping packet will
-  get sent from some reserved port, RDS_FLAG_PROBE_PORT (in <linux/rds.h>)
-  The receiver of a ping from RDS_FLAG_PROBE_PORT will thus immediately
-  be able to compute the min(sender_paths, rcvr_paths). The pong
-  sent in response to a probe-ping should contain the rcvr's npaths
-  when the rcvr is mprds-capable.
-
-  If the rcvr is not mprds-capable, the exthdr in the ping will be
-  ignored.  In this case the pong will not have any exthdrs, so the sender
-  of the probe-ping can default to single-path mprds.
-
diff --git a/MAINTAINERS b/MAINTAINERS
index 785f56e5f210..ea5dd3d1df9d 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -14219,7 +14219,7 @@ L:	linux-rdma@vger.kernel.org
 L:	rds-devel@oss.oracle.com (moderated for non-subscribers)
 S:	Supported
 W:	https://oss.oracle.com/projects/rds/
-F:	Documentation/networking/rds.txt
+F:	Documentation/networking/rds.rst
 F:	net/rds/
 
 RDT - RESOURCE ALLOCATION
-- 
cgit v1.2.3-59-g8ed1b


From 98661e0c579dbda0e0910185f752fddd95e2d29c Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Thu, 30 Apr 2020 18:04:20 +0200
Subject: docs: networking: convert regulatory.txt to ReST

- add SPDX header;
- adjust title markup;
- mark code blocks and literals as such;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst      |   1 +
 Documentation/networking/regulatory.rst | 209 ++++++++++++++++++++++++++++++++
 Documentation/networking/regulatory.txt | 204 -------------------------------
 MAINTAINERS                             |   2 +-
 4 files changed, 211 insertions(+), 205 deletions(-)
 create mode 100644 Documentation/networking/regulatory.rst
 delete mode 100644 Documentation/networking/regulatory.txt

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index e63a2cb2e4cb..bc3b04a2edde 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -98,6 +98,7 @@ Contents:
    radiotap-headers
    ray_cs
    rds
+   regulatory
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/regulatory.rst b/Documentation/networking/regulatory.rst
new file mode 100644
index 000000000000..8701b91e81ee
--- /dev/null
+++ b/Documentation/networking/regulatory.rst
@@ -0,0 +1,209 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=======================================
+Linux wireless regulatory documentation
+=======================================
+
+This document gives a brief review over how the Linux wireless
+regulatory infrastructure works.
+
+More up to date information can be obtained at the project's web page:
+
+http://wireless.kernel.org/en/developers/Regulatory
+
+Keeping regulatory domains in userspace
+---------------------------------------
+
+Due to the dynamic nature of regulatory domains we keep them
+in userspace and provide a framework for userspace to upload
+to the kernel one regulatory domain to be used as the central
+core regulatory domain all wireless devices should adhere to.
+
+How to get regulatory domains to the kernel
+-------------------------------------------
+
+When the regulatory domain is first set up, the kernel will request a
+database file (regulatory.db) containing all the regulatory rules. It
+will then use that database when it needs to look up the rules for a
+given country.
+
+How to get regulatory domains to the kernel (old CRDA solution)
+---------------------------------------------------------------
+
+Userspace gets a regulatory domain in the kernel by having
+a userspace agent build it and send it via nl80211. Only
+expected regulatory domains will be respected by the kernel.
+
+A currently available userspace agent which can accomplish this
+is CRDA - central regulatory domain agent. Its documented here:
+
+http://wireless.kernel.org/en/developers/Regulatory/CRDA
+
+Essentially the kernel will send a udev event when it knows
+it needs a new regulatory domain. A udev rule can be put in place
+to trigger crda to send the respective regulatory domain for a
+specific ISO/IEC 3166 alpha2.
+
+Below is an example udev rule which can be used:
+
+# Example file, should be put in /etc/udev/rules.d/regulatory.rules
+KERNEL=="regulatory*", ACTION=="change", SUBSYSTEM=="platform", RUN+="/sbin/crda"
+
+The alpha2 is passed as an environment variable under the variable COUNTRY.
+
+Who asks for regulatory domains?
+--------------------------------
+
+* Users
+
+Users can use iw:
+
+http://wireless.kernel.org/en/users/Documentation/iw
+
+An example::
+
+  # set regulatory domain to "Costa Rica"
+  iw reg set CR
+
+This will request the kernel to set the regulatory domain to
+the specificied alpha2. The kernel in turn will then ask userspace
+to provide a regulatory domain for the alpha2 specified by the user
+by sending a uevent.
+
+* Wireless subsystems for Country Information elements
+
+The kernel will send a uevent to inform userspace a new
+regulatory domain is required. More on this to be added
+as its integration is added.
+
+* Drivers
+
+If drivers determine they need a specific regulatory domain
+set they can inform the wireless core using regulatory_hint().
+They have two options -- they either provide an alpha2 so that
+crda can provide back a regulatory domain for that country or
+they can build their own regulatory domain based on internal
+custom knowledge so the wireless core can respect it.
+
+*Most* drivers will rely on the first mechanism of providing a
+regulatory hint with an alpha2. For these drivers there is an additional
+check that can be used to ensure compliance based on custom EEPROM
+regulatory data. This additional check can be used by drivers by
+registering on its struct wiphy a reg_notifier() callback. This notifier
+is called when the core's regulatory domain has been changed. The driver
+can use this to review the changes made and also review who made them
+(driver, user, country IE) and determine what to allow based on its
+internal EEPROM data. Devices drivers wishing to be capable of world
+roaming should use this callback. More on world roaming will be
+added to this document when its support is enabled.
+
+Device drivers who provide their own built regulatory domain
+do not need a callback as the channels registered by them are
+the only ones that will be allowed and therefore *additional*
+channels cannot be enabled.
+
+Example code - drivers hinting an alpha2:
+------------------------------------------
+
+This example comes from the zd1211rw device driver. You can start
+by having a mapping of your device's EEPROM country/regulatory
+domain value to a specific alpha2 as follows::
+
+  static struct zd_reg_alpha2_map reg_alpha2_map[] = {
+	{ ZD_REGDOMAIN_FCC, "US" },
+	{ ZD_REGDOMAIN_IC, "CA" },
+	{ ZD_REGDOMAIN_ETSI, "DE" }, /* Generic ETSI, use most restrictive */
+	{ ZD_REGDOMAIN_JAPAN, "JP" },
+	{ ZD_REGDOMAIN_JAPAN_ADD, "JP" },
+	{ ZD_REGDOMAIN_SPAIN, "ES" },
+	{ ZD_REGDOMAIN_FRANCE, "FR" },
+
+Then you can define a routine to map your read EEPROM value to an alpha2,
+as follows::
+
+  static int zd_reg2alpha2(u8 regdomain, char *alpha2)
+  {
+	unsigned int i;
+	struct zd_reg_alpha2_map *reg_map;
+		for (i = 0; i < ARRAY_SIZE(reg_alpha2_map); i++) {
+			reg_map = &reg_alpha2_map[i];
+			if (regdomain == reg_map->reg) {
+			alpha2[0] = reg_map->alpha2[0];
+			alpha2[1] = reg_map->alpha2[1];
+			return 0;
+		}
+	}
+	return 1;
+  }
+
+Lastly, you can then hint to the core of your discovered alpha2, if a match
+was found. You need to do this after you have registered your wiphy. You
+are expected to do this during initialization.
+
+::
+
+	r = zd_reg2alpha2(mac->regdomain, alpha2);
+	if (!r)
+		regulatory_hint(hw->wiphy, alpha2);
+
+Example code - drivers providing a built in regulatory domain:
+--------------------------------------------------------------
+
+[NOTE: This API is not currently available, it can be added when required]
+
+If you have regulatory information you can obtain from your
+driver and you *need* to use this we let you build a regulatory domain
+structure and pass it to the wireless core. To do this you should
+kmalloc() a structure big enough to hold your regulatory domain
+structure and you should then fill it with your data. Finally you simply
+call regulatory_hint() with the regulatory domain structure in it.
+
+Bellow is a simple example, with a regulatory domain cached using the stack.
+Your implementation may vary (read EEPROM cache instead, for example).
+
+Example cache of some regulatory domain::
+
+  struct ieee80211_regdomain mydriver_jp_regdom = {
+	.n_reg_rules = 3,
+	.alpha2 =  "JP",
+	//.alpha2 =  "99", /* If I have no alpha2 to map it to */
+	.reg_rules = {
+		/* IEEE 802.11b/g, channels 1..14 */
+		REG_RULE(2412-10, 2484+10, 40, 6, 20, 0),
+		/* IEEE 802.11a, channels 34..48 */
+		REG_RULE(5170-10, 5240+10, 40, 6, 20,
+			NL80211_RRF_NO_IR),
+		/* IEEE 802.11a, channels 52..64 */
+		REG_RULE(5260-10, 5320+10, 40, 6, 20,
+			NL80211_RRF_NO_IR|
+			NL80211_RRF_DFS),
+	}
+  };
+
+Then in some part of your code after your wiphy has been registered::
+
+	struct ieee80211_regdomain *rd;
+	int size_of_regd;
+	int num_rules = mydriver_jp_regdom.n_reg_rules;
+	unsigned int i;
+
+	size_of_regd = sizeof(struct ieee80211_regdomain) +
+		(num_rules * sizeof(struct ieee80211_reg_rule));
+
+	rd = kzalloc(size_of_regd, GFP_KERNEL);
+	if (!rd)
+		return -ENOMEM;
+
+	memcpy(rd, &mydriver_jp_regdom, sizeof(struct ieee80211_regdomain));
+
+	for (i=0; i < num_rules; i++)
+		memcpy(&rd->reg_rules[i],
+		       &mydriver_jp_regdom.reg_rules[i],
+		       sizeof(struct ieee80211_reg_rule));
+	regulatory_struct_hint(rd);
+
+Statically compiled regulatory database
+---------------------------------------
+
+When a database should be fixed into the kernel, it can be provided as a
+firmware file at build time that is then linked into the kernel.
diff --git a/Documentation/networking/regulatory.txt b/Documentation/networking/regulatory.txt
deleted file mode 100644
index 381e5b23d61d..000000000000
--- a/Documentation/networking/regulatory.txt
+++ /dev/null
@@ -1,204 +0,0 @@
-Linux wireless regulatory documentation
----------------------------------------
-
-This document gives a brief review over how the Linux wireless
-regulatory infrastructure works.
-
-More up to date information can be obtained at the project's web page:
-
-http://wireless.kernel.org/en/developers/Regulatory
-
-Keeping regulatory domains in userspace
----------------------------------------
-
-Due to the dynamic nature of regulatory domains we keep them
-in userspace and provide a framework for userspace to upload
-to the kernel one regulatory domain to be used as the central
-core regulatory domain all wireless devices should adhere to.
-
-How to get regulatory domains to the kernel
--------------------------------------------
-
-When the regulatory domain is first set up, the kernel will request a
-database file (regulatory.db) containing all the regulatory rules. It
-will then use that database when it needs to look up the rules for a
-given country.
-
-How to get regulatory domains to the kernel (old CRDA solution)
----------------------------------------------------------------
-
-Userspace gets a regulatory domain in the kernel by having
-a userspace agent build it and send it via nl80211. Only
-expected regulatory domains will be respected by the kernel.
-
-A currently available userspace agent which can accomplish this
-is CRDA - central regulatory domain agent. Its documented here:
-
-http://wireless.kernel.org/en/developers/Regulatory/CRDA
-
-Essentially the kernel will send a udev event when it knows
-it needs a new regulatory domain. A udev rule can be put in place
-to trigger crda to send the respective regulatory domain for a
-specific ISO/IEC 3166 alpha2.
-
-Below is an example udev rule which can be used:
-
-# Example file, should be put in /etc/udev/rules.d/regulatory.rules
-KERNEL=="regulatory*", ACTION=="change", SUBSYSTEM=="platform", RUN+="/sbin/crda"
-
-The alpha2 is passed as an environment variable under the variable COUNTRY.
-
-Who asks for regulatory domains?
---------------------------------
-
-* Users
-
-Users can use iw:
-
-http://wireless.kernel.org/en/users/Documentation/iw
-
-An example:
-
-  # set regulatory domain to "Costa Rica"
-  iw reg set CR
-
-This will request the kernel to set the regulatory domain to
-the specificied alpha2. The kernel in turn will then ask userspace
-to provide a regulatory domain for the alpha2 specified by the user
-by sending a uevent.
-
-* Wireless subsystems for Country Information elements
-
-The kernel will send a uevent to inform userspace a new
-regulatory domain is required. More on this to be added
-as its integration is added.
-
-* Drivers
-
-If drivers determine they need a specific regulatory domain
-set they can inform the wireless core using regulatory_hint().
-They have two options -- they either provide an alpha2 so that
-crda can provide back a regulatory domain for that country or
-they can build their own regulatory domain based on internal
-custom knowledge so the wireless core can respect it.
-
-*Most* drivers will rely on the first mechanism of providing a
-regulatory hint with an alpha2. For these drivers there is an additional
-check that can be used to ensure compliance based on custom EEPROM
-regulatory data. This additional check can be used by drivers by
-registering on its struct wiphy a reg_notifier() callback. This notifier
-is called when the core's regulatory domain has been changed. The driver
-can use this to review the changes made and also review who made them
-(driver, user, country IE) and determine what to allow based on its
-internal EEPROM data. Devices drivers wishing to be capable of world
-roaming should use this callback. More on world roaming will be
-added to this document when its support is enabled.
-
-Device drivers who provide their own built regulatory domain
-do not need a callback as the channels registered by them are
-the only ones that will be allowed and therefore *additional*
-channels cannot be enabled.
-
-Example code - drivers hinting an alpha2:
-------------------------------------------
-
-This example comes from the zd1211rw device driver. You can start
-by having a mapping of your device's EEPROM country/regulatory
-domain value to a specific alpha2 as follows:
-
-static struct zd_reg_alpha2_map reg_alpha2_map[] = {
-	{ ZD_REGDOMAIN_FCC, "US" },
-	{ ZD_REGDOMAIN_IC, "CA" },
-	{ ZD_REGDOMAIN_ETSI, "DE" }, /* Generic ETSI, use most restrictive */
-	{ ZD_REGDOMAIN_JAPAN, "JP" },
-	{ ZD_REGDOMAIN_JAPAN_ADD, "JP" },
-	{ ZD_REGDOMAIN_SPAIN, "ES" },
-	{ ZD_REGDOMAIN_FRANCE, "FR" },
-
-Then you can define a routine to map your read EEPROM value to an alpha2,
-as follows:
-
-static int zd_reg2alpha2(u8 regdomain, char *alpha2)
-{
-	unsigned int i;
-	struct zd_reg_alpha2_map *reg_map;
-		for (i = 0; i < ARRAY_SIZE(reg_alpha2_map); i++) {
-			reg_map = &reg_alpha2_map[i];
-			if (regdomain == reg_map->reg) {
-			alpha2[0] = reg_map->alpha2[0];
-			alpha2[1] = reg_map->alpha2[1];
-			return 0;
-		}
-	}
-	return 1;
-}
-
-Lastly, you can then hint to the core of your discovered alpha2, if a match
-was found. You need to do this after you have registered your wiphy. You
-are expected to do this during initialization.
-
-	r = zd_reg2alpha2(mac->regdomain, alpha2);
-	if (!r)
-		regulatory_hint(hw->wiphy, alpha2);
-
-Example code - drivers providing a built in regulatory domain:
---------------------------------------------------------------
-
-[NOTE: This API is not currently available, it can be added when required]
-
-If you have regulatory information you can obtain from your
-driver and you *need* to use this we let you build a regulatory domain
-structure and pass it to the wireless core. To do this you should
-kmalloc() a structure big enough to hold your regulatory domain
-structure and you should then fill it with your data. Finally you simply
-call regulatory_hint() with the regulatory domain structure in it.
-
-Bellow is a simple example, with a regulatory domain cached using the stack.
-Your implementation may vary (read EEPROM cache instead, for example).
-
-Example cache of some regulatory domain
-
-struct ieee80211_regdomain mydriver_jp_regdom = {
-	.n_reg_rules = 3,
-	.alpha2 =  "JP",
-	//.alpha2 =  "99", /* If I have no alpha2 to map it to */
-	.reg_rules = {
-		/* IEEE 802.11b/g, channels 1..14 */
-		REG_RULE(2412-10, 2484+10, 40, 6, 20, 0),
-		/* IEEE 802.11a, channels 34..48 */
-		REG_RULE(5170-10, 5240+10, 40, 6, 20,
-			NL80211_RRF_NO_IR),
-		/* IEEE 802.11a, channels 52..64 */
-		REG_RULE(5260-10, 5320+10, 40, 6, 20,
-			NL80211_RRF_NO_IR|
-			NL80211_RRF_DFS),
-	}
-};
-
-Then in some part of your code after your wiphy has been registered:
-
-	struct ieee80211_regdomain *rd;
-	int size_of_regd;
-	int num_rules = mydriver_jp_regdom.n_reg_rules;
-	unsigned int i;
-
-	size_of_regd = sizeof(struct ieee80211_regdomain) +
-		(num_rules * sizeof(struct ieee80211_reg_rule));
-
-	rd = kzalloc(size_of_regd, GFP_KERNEL);
-	if (!rd)
-		return -ENOMEM;
-
-	memcpy(rd, &mydriver_jp_regdom, sizeof(struct ieee80211_regdomain));
-
-	for (i=0; i < num_rules; i++)
-		memcpy(&rd->reg_rules[i],
-		       &mydriver_jp_regdom.reg_rules[i],
-		       sizeof(struct ieee80211_reg_rule));
-	regulatory_struct_hint(rd);
-
-Statically compiled regulatory database
----------------------------------------
-
-When a database should be fixed into the kernel, it can be provided as a
-firmware file at build time that is then linked into the kernel.
diff --git a/MAINTAINERS b/MAINTAINERS
index ea5dd3d1df9d..b28823ab48c5 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -193,7 +193,7 @@ W:	https://wireless.wiki.kernel.org/
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211.git
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/jberg/mac80211-next.git
 F:	Documentation/driver-api/80211/cfg80211.rst
-F:	Documentation/networking/regulatory.txt
+F:	Documentation/networking/regulatory.rst
 F:	include/linux/ieee80211.h
 F:	include/net/cfg80211.h
 F:	include/net/ieee80211_radiotap.h
-- 
cgit v1.2.3-59-g8ed1b


From 9f72374cb5959556870be8078b128158edde5d3e Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Thu, 30 Apr 2020 18:04:21 +0200
Subject: docs: networking: convert rxrpc.txt to ReST

- add SPDX header;
- adjust title markup;
- use autonumbered list markups;
- mark code blocks and literals as such;
- mark tables as such;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/filesystems/afs.rst  |    2 +-
 Documentation/networking/index.rst |    1 +
 Documentation/networking/rxrpc.rst | 1169 ++++++++++++++++++++++++++++++++++++
 Documentation/networking/rxrpc.txt | 1155 -----------------------------------
 MAINTAINERS                        |    2 +-
 net/rxrpc/Kconfig                  |    6 +-
 net/rxrpc/sysctl.c                 |    2 +-
 7 files changed, 1176 insertions(+), 1161 deletions(-)
 create mode 100644 Documentation/networking/rxrpc.rst
 delete mode 100644 Documentation/networking/rxrpc.txt

diff --git a/Documentation/filesystems/afs.rst b/Documentation/filesystems/afs.rst
index c4ec39a5966e..cada9464d6bd 100644
--- a/Documentation/filesystems/afs.rst
+++ b/Documentation/filesystems/afs.rst
@@ -70,7 +70,7 @@ list of volume location server IP addresses::
 The first module is the AF_RXRPC network protocol driver.  This provides the
 RxRPC remote operation protocol and may also be accessed from userspace.  See:
 
-	Documentation/networking/rxrpc.txt
+	Documentation/networking/rxrpc.rst
 
 The second module is the kerberos RxRPC security driver, and the third module
 is the actual filesystem driver for the AFS filesystem.
diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index bc3b04a2edde..cd307b9601fa 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -99,6 +99,7 @@ Contents:
    ray_cs
    rds
    regulatory
+   rxrpc
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/rxrpc.rst b/Documentation/networking/rxrpc.rst
new file mode 100644
index 000000000000..5ad35113d0f4
--- /dev/null
+++ b/Documentation/networking/rxrpc.rst
@@ -0,0 +1,1169 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================
+RxRPC Network Protocol
+======================
+
+The RxRPC protocol driver provides a reliable two-phase transport on top of UDP
+that can be used to perform RxRPC remote operations.  This is done over sockets
+of AF_RXRPC family, using sendmsg() and recvmsg() with control data to send and
+receive data, aborts and errors.
+
+Contents of this document:
+
+ (#) Overview.
+
+ (#) RxRPC protocol summary.
+
+ (#) AF_RXRPC driver model.
+
+ (#) Control messages.
+
+ (#) Socket options.
+
+ (#) Security.
+
+ (#) Example client usage.
+
+ (#) Example server usage.
+
+ (#) AF_RXRPC kernel interface.
+
+ (#) Configurable parameters.
+
+
+Overview
+========
+
+RxRPC is a two-layer protocol.  There is a session layer which provides
+reliable virtual connections using UDP over IPv4 (or IPv6) as the transport
+layer, but implements a real network protocol; and there's the presentation
+layer which renders structured data to binary blobs and back again using XDR
+(as does SunRPC)::
+
+		+-------------+
+		| Application |
+		+-------------+
+		|     XDR     |		Presentation
+		+-------------+
+		|    RxRPC    |		Session
+		+-------------+
+		|     UDP     |		Transport
+		+-------------+
+
+
+AF_RXRPC provides:
+
+ (1) Part of an RxRPC facility for both kernel and userspace applications by
+     making the session part of it a Linux network protocol (AF_RXRPC).
+
+ (2) A two-phase protocol.  The client transmits a blob (the request) and then
+     receives a blob (the reply), and the server receives the request and then
+     transmits the reply.
+
+ (3) Retention of the reusable bits of the transport system set up for one call
+     to speed up subsequent calls.
+
+ (4) A secure protocol, using the Linux kernel's key retention facility to
+     manage security on the client end.  The server end must of necessity be
+     more active in security negotiations.
+
+AF_RXRPC does not provide XDR marshalling/presentation facilities.  That is
+left to the application.  AF_RXRPC only deals in blobs.  Even the operation ID
+is just the first four bytes of the request blob, and as such is beyond the
+kernel's interest.
+
+
+Sockets of AF_RXRPC family are:
+
+ (1) created as type SOCK_DGRAM;
+
+ (2) provided with a protocol of the type of underlying transport they're going
+     to use - currently only PF_INET is supported.
+
+
+The Andrew File System (AFS) is an example of an application that uses this and
+that has both kernel (filesystem) and userspace (utility) components.
+
+
+RxRPC Protocol Summary
+======================
+
+An overview of the RxRPC protocol:
+
+ (#) RxRPC sits on top of another networking protocol (UDP is the only option
+     currently), and uses this to provide network transport.  UDP ports, for
+     example, provide transport endpoints.
+
+ (#) RxRPC supports multiple virtual "connections" from any given transport
+     endpoint, thus allowing the endpoints to be shared, even to the same
+     remote endpoint.
+
+ (#) Each connection goes to a particular "service".  A connection may not go
+     to multiple services.  A service may be considered the RxRPC equivalent of
+     a port number.  AF_RXRPC permits multiple services to share an endpoint.
+
+ (#) Client-originating packets are marked, thus a transport endpoint can be
+     shared between client and server connections (connections have a
+     direction).
+
+ (#) Up to a billion connections may be supported concurrently between one
+     local transport endpoint and one service on one remote endpoint.  An RxRPC
+     connection is described by seven numbers::
+
+	Local address	}
+	Local port	} Transport (UDP) address
+	Remote address	}
+	Remote port	}
+	Direction
+	Connection ID
+	Service ID
+
+ (#) Each RxRPC operation is a "call".  A connection may make up to four
+     billion calls, but only up to four calls may be in progress on a
+     connection at any one time.
+
+ (#) Calls are two-phase and asymmetric: the client sends its request data,
+     which the service receives; then the service transmits the reply data
+     which the client receives.
+
+ (#) The data blobs are of indefinite size, the end of a phase is marked with a
+     flag in the packet.  The number of packets of data making up one blob may
+     not exceed 4 billion, however, as this would cause the sequence number to
+     wrap.
+
+ (#) The first four bytes of the request data are the service operation ID.
+
+ (#) Security is negotiated on a per-connection basis.  The connection is
+     initiated by the first data packet on it arriving.  If security is
+     requested, the server then issues a "challenge" and then the client
+     replies with a "response".  If the response is successful, the security is
+     set for the lifetime of that connection, and all subsequent calls made
+     upon it use that same security.  In the event that the server lets a
+     connection lapse before the client, the security will be renegotiated if
+     the client uses the connection again.
+
+ (#) Calls use ACK packets to handle reliability.  Data packets are also
+     explicitly sequenced per call.
+
+ (#) There are two types of positive acknowledgment: hard-ACKs and soft-ACKs.
+     A hard-ACK indicates to the far side that all the data received to a point
+     has been received and processed; a soft-ACK indicates that the data has
+     been received but may yet be discarded and re-requested.  The sender may
+     not discard any transmittable packets until they've been hard-ACK'd.
+
+ (#) Reception of a reply data packet implicitly hard-ACK's all the data
+     packets that make up the request.
+
+ (#) An call is complete when the request has been sent, the reply has been
+     received and the final hard-ACK on the last packet of the reply has
+     reached the server.
+
+ (#) An call may be aborted by either end at any time up to its completion.
+
+
+AF_RXRPC Driver Model
+=====================
+
+About the AF_RXRPC driver:
+
+ (#) The AF_RXRPC protocol transparently uses internal sockets of the transport
+     protocol to represent transport endpoints.
+
+ (#) AF_RXRPC sockets map onto RxRPC connection bundles.  Actual RxRPC
+     connections are handled transparently.  One client socket may be used to
+     make multiple simultaneous calls to the same service.  One server socket
+     may handle calls from many clients.
+
+ (#) Additional parallel client connections will be initiated to support extra
+     concurrent calls, up to a tunable limit.
+
+ (#) Each connection is retained for a certain amount of time [tunable] after
+     the last call currently using it has completed in case a new call is made
+     that could reuse it.
+
+ (#) Each internal UDP socket is retained [tunable] for a certain amount of
+     time [tunable] after the last connection using it discarded, in case a new
+     connection is made that could use it.
+
+ (#) A client-side connection is only shared between calls if they have have
+     the same key struct describing their security (and assuming the calls
+     would otherwise share the connection).  Non-secured calls would also be
+     able to share connections with each other.
+
+ (#) A server-side connection is shared if the client says it is.
+
+ (#) ACK'ing is handled by the protocol driver automatically, including ping
+     replying.
+
+ (#) SO_KEEPALIVE automatically pings the other side to keep the connection
+     alive [TODO].
+
+ (#) If an ICMP error is received, all calls affected by that error will be
+     aborted with an appropriate network error passed through recvmsg().
+
+
+Interaction with the user of the RxRPC socket:
+
+ (#) A socket is made into a server socket by binding an address with a
+     non-zero service ID.
+
+ (#) In the client, sending a request is achieved with one or more sendmsgs,
+     followed by the reply being received with one or more recvmsgs.
+
+ (#) The first sendmsg for a request to be sent from a client contains a tag to
+     be used in all other sendmsgs or recvmsgs associated with that call.  The
+     tag is carried in the control data.
+
+ (#) connect() is used to supply a default destination address for a client
+     socket.  This may be overridden by supplying an alternate address to the
+     first sendmsg() of a call (struct msghdr::msg_name).
+
+ (#) If connect() is called on an unbound client, a random local port will
+     bound before the operation takes place.
+
+ (#) A server socket may also be used to make client calls.  To do this, the
+     first sendmsg() of the call must specify the target address.  The server's
+     transport endpoint is used to send the packets.
+
+ (#) Once the application has received the last message associated with a call,
+     the tag is guaranteed not to be seen again, and so it can be used to pin
+     client resources.  A new call can then be initiated with the same tag
+     without fear of interference.
+
+ (#) In the server, a request is received with one or more recvmsgs, then the
+     the reply is transmitted with one or more sendmsgs, and then the final ACK
+     is received with a last recvmsg.
+
+ (#) When sending data for a call, sendmsg is given MSG_MORE if there's more
+     data to come on that call.
+
+ (#) When receiving data for a call, recvmsg flags MSG_MORE if there's more
+     data to come for that call.
+
+ (#) When receiving data or messages for a call, MSG_EOR is flagged by recvmsg
+     to indicate the terminal message for that call.
+
+ (#) A call may be aborted by adding an abort control message to the control
+     data.  Issuing an abort terminates the kernel's use of that call's tag.
+     Any messages waiting in the receive queue for that call will be discarded.
+
+ (#) Aborts, busy notifications and challenge packets are delivered by recvmsg,
+     and control data messages will be set to indicate the context.  Receiving
+     an abort or a busy message terminates the kernel's use of that call's tag.
+
+ (#) The control data part of the msghdr struct is used for a number of things:
+
+     (#) The tag of the intended or affected call.
+
+     (#) Sending or receiving errors, aborts and busy notifications.
+
+     (#) Notifications of incoming calls.
+
+     (#) Sending debug requests and receiving debug replies [TODO].
+
+ (#) When the kernel has received and set up an incoming call, it sends a
+     message to server application to let it know there's a new call awaiting
+     its acceptance [recvmsg reports a special control message].  The server
+     application then uses sendmsg to assign a tag to the new call.  Once that
+     is done, the first part of the request data will be delivered by recvmsg.
+
+ (#) The server application has to provide the server socket with a keyring of
+     secret keys corresponding to the security types it permits.  When a secure
+     connection is being set up, the kernel looks up the appropriate secret key
+     in the keyring and then sends a challenge packet to the client and
+     receives a response packet.  The kernel then checks the authorisation of
+     the packet and either aborts the connection or sets up the security.
+
+ (#) The name of the key a client will use to secure its communications is
+     nominated by a socket option.
+
+
+Notes on sendmsg:
+
+ (#) MSG_WAITALL can be set to tell sendmsg to ignore signals if the peer is
+     making progress at accepting packets within a reasonable time such that we
+     manage to queue up all the data for transmission.  This requires the
+     client to accept at least one packet per 2*RTT time period.
+
+     If this isn't set, sendmsg() will return immediately, either returning
+     EINTR/ERESTARTSYS if nothing was consumed or returning the amount of data
+     consumed.
+
+
+Notes on recvmsg:
+
+ (#) If there's a sequence of data messages belonging to a particular call on
+     the receive queue, then recvmsg will keep working through them until:
+
+     (a) it meets the end of that call's received data,
+
+     (b) it meets a non-data message,
+
+     (c) it meets a message belonging to a different call, or
+
+     (d) it fills the user buffer.
+
+     If recvmsg is called in blocking mode, it will keep sleeping, awaiting the
+     reception of further data, until one of the above four conditions is met.
+
+ (2) MSG_PEEK operates similarly, but will return immediately if it has put any
+     data in the buffer rather than sleeping until it can fill the buffer.
+
+ (3) If a data message is only partially consumed in filling a user buffer,
+     then the remainder of that message will be left on the front of the queue
+     for the next taker.  MSG_TRUNC will never be flagged.
+
+ (4) If there is more data to be had on a call (it hasn't copied the last byte
+     of the last data message in that phase yet), then MSG_MORE will be
+     flagged.
+
+
+Control Messages
+================
+
+AF_RXRPC makes use of control messages in sendmsg() and recvmsg() to multiplex
+calls, to invoke certain actions and to report certain conditions.  These are:
+
+	=======================	=== ===========	===============================
+	MESSAGE ID		SRT DATA	MEANING
+	=======================	=== ===========	===============================
+	RXRPC_USER_CALL_ID	sr- User ID	App's call specifier
+	RXRPC_ABORT		srt Abort code	Abort code to issue/received
+	RXRPC_ACK		-rt n/a		Final ACK received
+	RXRPC_NET_ERROR		-rt error num	Network error on call
+	RXRPC_BUSY		-rt n/a		Call rejected (server busy)
+	RXRPC_LOCAL_ERROR	-rt error num	Local error encountered
+	RXRPC_NEW_CALL		-r- n/a		New call received
+	RXRPC_ACCEPT		s-- n/a		Accept new call
+	RXRPC_EXCLUSIVE_CALL	s-- n/a		Make an exclusive client call
+	RXRPC_UPGRADE_SERVICE	s-- n/a		Client call can be upgraded
+	RXRPC_TX_LENGTH		s-- data len	Total length of Tx data
+	=======================	=== ===========	===============================
+
+	(SRT = usable in Sendmsg / delivered by Recvmsg / Terminal message)
+
+ (#) RXRPC_USER_CALL_ID
+
+     This is used to indicate the application's call ID.  It's an unsigned long
+     that the app specifies in the client by attaching it to the first data
+     message or in the server by passing it in association with an RXRPC_ACCEPT
+     message.  recvmsg() passes it in conjunction with all messages except
+     those of the RXRPC_NEW_CALL message.
+
+ (#) RXRPC_ABORT
+
+     This is can be used by an application to abort a call by passing it to
+     sendmsg, or it can be delivered by recvmsg to indicate a remote abort was
+     received.  Either way, it must be associated with an RXRPC_USER_CALL_ID to
+     specify the call affected.  If an abort is being sent, then error EBADSLT
+     will be returned if there is no call with that user ID.
+
+ (#) RXRPC_ACK
+
+     This is delivered to a server application to indicate that the final ACK
+     of a call was received from the client.  It will be associated with an
+     RXRPC_USER_CALL_ID to indicate the call that's now complete.
+
+ (#) RXRPC_NET_ERROR
+
+     This is delivered to an application to indicate that an ICMP error message
+     was encountered in the process of trying to talk to the peer.  An
+     errno-class integer value will be included in the control message data
+     indicating the problem, and an RXRPC_USER_CALL_ID will indicate the call
+     affected.
+
+ (#) RXRPC_BUSY
+
+     This is delivered to a client application to indicate that a call was
+     rejected by the server due to the server being busy.  It will be
+     associated with an RXRPC_USER_CALL_ID to indicate the rejected call.
+
+ (#) RXRPC_LOCAL_ERROR
+
+     This is delivered to an application to indicate that a local error was
+     encountered and that a call has been aborted because of it.  An
+     errno-class integer value will be included in the control message data
+     indicating the problem, and an RXRPC_USER_CALL_ID will indicate the call
+     affected.
+
+ (#) RXRPC_NEW_CALL
+
+     This is delivered to indicate to a server application that a new call has
+     arrived and is awaiting acceptance.  No user ID is associated with this,
+     as a user ID must subsequently be assigned by doing an RXRPC_ACCEPT.
+
+ (#) RXRPC_ACCEPT
+
+     This is used by a server application to attempt to accept a call and
+     assign it a user ID.  It should be associated with an RXRPC_USER_CALL_ID
+     to indicate the user ID to be assigned.  If there is no call to be
+     accepted (it may have timed out, been aborted, etc.), then sendmsg will
+     return error ENODATA.  If the user ID is already in use by another call,
+     then error EBADSLT will be returned.
+
+ (#) RXRPC_EXCLUSIVE_CALL
+
+     This is used to indicate that a client call should be made on a one-off
+     connection.  The connection is discarded once the call has terminated.
+
+ (#) RXRPC_UPGRADE_SERVICE
+
+     This is used to make a client call to probe if the specified service ID
+     may be upgraded by the server.  The caller must check msg_name returned to
+     recvmsg() for the service ID actually in use.  The operation probed must
+     be one that takes the same arguments in both services.
+
+     Once this has been used to establish the upgrade capability (or lack
+     thereof) of the server, the service ID returned should be used for all
+     future communication to that server and RXRPC_UPGRADE_SERVICE should no
+     longer be set.
+
+ (#) RXRPC_TX_LENGTH
+
+     This is used to inform the kernel of the total amount of data that is
+     going to be transmitted by a call (whether in a client request or a
+     service response).  If given, it allows the kernel to encrypt from the
+     userspace buffer directly to the packet buffers, rather than copying into
+     the buffer and then encrypting in place.  This may only be given with the
+     first sendmsg() providing data for a call.  EMSGSIZE will be generated if
+     the amount of data actually given is different.
+
+     This takes a parameter of __s64 type that indicates how much will be
+     transmitted.  This may not be less than zero.
+
+The symbol RXRPC__SUPPORTED is defined as one more than the highest control
+message type supported.  At run time this can be queried by means of the
+RXRPC_SUPPORTED_CMSG socket option (see below).
+
+
+==============
+SOCKET OPTIONS
+==============
+
+AF_RXRPC sockets support a few socket options at the SOL_RXRPC level:
+
+ (#) RXRPC_SECURITY_KEY
+
+     This is used to specify the description of the key to be used.  The key is
+     extracted from the calling process's keyrings with request_key() and
+     should be of "rxrpc" type.
+
+     The optval pointer points to the description string, and optlen indicates
+     how long the string is, without the NUL terminator.
+
+ (#) RXRPC_SECURITY_KEYRING
+
+     Similar to above but specifies a keyring of server secret keys to use (key
+     type "keyring").  See the "Security" section.
+
+ (#) RXRPC_EXCLUSIVE_CONNECTION
+
+     This is used to request that new connections should be used for each call
+     made subsequently on this socket.  optval should be NULL and optlen 0.
+
+ (#) RXRPC_MIN_SECURITY_LEVEL
+
+     This is used to specify the minimum security level required for calls on
+     this socket.  optval must point to an int containing one of the following
+     values:
+
+     (a) RXRPC_SECURITY_PLAIN
+
+	 Encrypted checksum only.
+
+     (b) RXRPC_SECURITY_AUTH
+
+	 Encrypted checksum plus packet padded and first eight bytes of packet
+	 encrypted - which includes the actual packet length.
+
+     (c) RXRPC_SECURITY_ENCRYPTED
+
+	 Encrypted checksum plus entire packet padded and encrypted, including
+	 actual packet length.
+
+ (#) RXRPC_UPGRADEABLE_SERVICE
+
+     This is used to indicate that a service socket with two bindings may
+     upgrade one bound service to the other if requested by the client.  optval
+     must point to an array of two unsigned short ints.  The first is the
+     service ID to upgrade from and the second the service ID to upgrade to.
+
+ (#) RXRPC_SUPPORTED_CMSG
+
+     This is a read-only option that writes an int into the buffer indicating
+     the highest control message type supported.
+
+
+========
+SECURITY
+========
+
+Currently, only the kerberos 4 equivalent protocol has been implemented
+(security index 2 - rxkad).  This requires the rxkad module to be loaded and,
+on the client, tickets of the appropriate type to be obtained from the AFS
+kaserver or the kerberos server and installed as "rxrpc" type keys.  This is
+normally done using the klog program.  An example simple klog program can be
+found at:
+
+	http://people.redhat.com/~dhowells/rxrpc/klog.c
+
+The payload provided to add_key() on the client should be of the following
+form::
+
+	struct rxrpc_key_sec2_v1 {
+		uint16_t	security_index;	/* 2 */
+		uint16_t	ticket_length;	/* length of ticket[] */
+		uint32_t	expiry;		/* time at which expires */
+		uint8_t		kvno;		/* key version number */
+		uint8_t		__pad[3];
+		uint8_t		session_key[8];	/* DES session key */
+		uint8_t		ticket[0];	/* the encrypted ticket */
+	};
+
+Where the ticket blob is just appended to the above structure.
+
+
+For the server, keys of type "rxrpc_s" must be made available to the server.
+They have a description of "<serviceID>:<securityIndex>" (eg: "52:2" for an
+rxkad key for the AFS VL service).  When such a key is created, it should be
+given the server's secret key as the instantiation data (see the example
+below).
+
+	add_key("rxrpc_s", "52:2", secret_key, 8, keyring);
+
+A keyring is passed to the server socket by naming it in a sockopt.  The server
+socket then looks the server secret keys up in this keyring when secure
+incoming connections are made.  This can be seen in an example program that can
+be found at:
+
+	http://people.redhat.com/~dhowells/rxrpc/listen.c
+
+
+====================
+EXAMPLE CLIENT USAGE
+====================
+
+A client would issue an operation by:
+
+ (1) An RxRPC socket is set up by::
+
+	client = socket(AF_RXRPC, SOCK_DGRAM, PF_INET);
+
+     Where the third parameter indicates the protocol family of the transport
+     socket used - usually IPv4 but it can also be IPv6 [TODO].
+
+ (2) A local address can optionally be bound::
+
+	struct sockaddr_rxrpc srx = {
+		.srx_family	= AF_RXRPC,
+		.srx_service	= 0,  /* we're a client */
+		.transport_type	= SOCK_DGRAM,	/* type of transport socket */
+		.transport.sin_family	= AF_INET,
+		.transport.sin_port	= htons(7000), /* AFS callback */
+		.transport.sin_address	= 0,  /* all local interfaces */
+	};
+	bind(client, &srx, sizeof(srx));
+
+     This specifies the local UDP port to be used.  If not given, a random
+     non-privileged port will be used.  A UDP port may be shared between
+     several unrelated RxRPC sockets.  Security is handled on a basis of
+     per-RxRPC virtual connection.
+
+ (3) The security is set::
+
+	const char *key = "AFS:cambridge.redhat.com";
+	setsockopt(client, SOL_RXRPC, RXRPC_SECURITY_KEY, key, strlen(key));
+
+     This issues a request_key() to get the key representing the security
+     context.  The minimum security level can be set::
+
+	unsigned int sec = RXRPC_SECURITY_ENCRYPTED;
+	setsockopt(client, SOL_RXRPC, RXRPC_MIN_SECURITY_LEVEL,
+		   &sec, sizeof(sec));
+
+ (4) The server to be contacted can then be specified (alternatively this can
+     be done through sendmsg)::
+
+	struct sockaddr_rxrpc srx = {
+		.srx_family	= AF_RXRPC,
+		.srx_service	= VL_SERVICE_ID,
+		.transport_type	= SOCK_DGRAM,	/* type of transport socket */
+		.transport.sin_family	= AF_INET,
+		.transport.sin_port	= htons(7005), /* AFS volume manager */
+		.transport.sin_address	= ...,
+	};
+	connect(client, &srx, sizeof(srx));
+
+ (5) The request data should then be posted to the server socket using a series
+     of sendmsg() calls, each with the following control message attached:
+
+	==================	===================================
+	RXRPC_USER_CALL_ID	specifies the user ID for this call
+	==================	===================================
+
+     MSG_MORE should be set in msghdr::msg_flags on all but the last part of
+     the request.  Multiple requests may be made simultaneously.
+
+     An RXRPC_TX_LENGTH control message can also be specified on the first
+     sendmsg() call.
+
+     If a call is intended to go to a destination other than the default
+     specified through connect(), then msghdr::msg_name should be set on the
+     first request message of that call.
+
+ (6) The reply data will then be posted to the server socket for recvmsg() to
+     pick up.  MSG_MORE will be flagged by recvmsg() if there's more reply data
+     for a particular call to be read.  MSG_EOR will be set on the terminal
+     read for a call.
+
+     All data will be delivered with the following control message attached:
+
+	RXRPC_USER_CALL_ID	- specifies the user ID for this call
+
+     If an abort or error occurred, this will be returned in the control data
+     buffer instead, and MSG_EOR will be flagged to indicate the end of that
+     call.
+
+A client may ask for a service ID it knows and ask that this be upgraded to a
+better service if one is available by supplying RXRPC_UPGRADE_SERVICE on the
+first sendmsg() of a call.  The client should then check srx_service in the
+msg_name filled in by recvmsg() when collecting the result.  srx_service will
+hold the same value as given to sendmsg() if the upgrade request was ignored by
+the service - otherwise it will be altered to indicate the service ID the
+server upgraded to.  Note that the upgraded service ID is chosen by the server.
+The caller has to wait until it sees the service ID in the reply before sending
+any more calls (further calls to the same destination will be blocked until the
+probe is concluded).
+
+
+Example Server Usage
+====================
+
+A server would be set up to accept operations in the following manner:
+
+ (1) An RxRPC socket is created by::
+
+	server = socket(AF_RXRPC, SOCK_DGRAM, PF_INET);
+
+     Where the third parameter indicates the address type of the transport
+     socket used - usually IPv4.
+
+ (2) Security is set up if desired by giving the socket a keyring with server
+     secret keys in it::
+
+	keyring = add_key("keyring", "AFSkeys", NULL, 0,
+			  KEY_SPEC_PROCESS_KEYRING);
+
+	const char secret_key[8] = {
+		0xa7, 0x83, 0x8a, 0xcb, 0xc7, 0x83, 0xec, 0x94 };
+	add_key("rxrpc_s", "52:2", secret_key, 8, keyring);
+
+	setsockopt(server, SOL_RXRPC, RXRPC_SECURITY_KEYRING, "AFSkeys", 7);
+
+     The keyring can be manipulated after it has been given to the socket. This
+     permits the server to add more keys, replace keys, etc. while it is live.
+
+ (3) A local address must then be bound::
+
+	struct sockaddr_rxrpc srx = {
+		.srx_family	= AF_RXRPC,
+		.srx_service	= VL_SERVICE_ID, /* RxRPC service ID */
+		.transport_type	= SOCK_DGRAM,	/* type of transport socket */
+		.transport.sin_family	= AF_INET,
+		.transport.sin_port	= htons(7000), /* AFS callback */
+		.transport.sin_address	= 0,  /* all local interfaces */
+	};
+	bind(server, &srx, sizeof(srx));
+
+     More than one service ID may be bound to a socket, provided the transport
+     parameters are the same.  The limit is currently two.  To do this, bind()
+     should be called twice.
+
+ (4) If service upgrading is required, first two service IDs must have been
+     bound and then the following option must be set::
+
+	unsigned short service_ids[2] = { from_ID, to_ID };
+	setsockopt(server, SOL_RXRPC, RXRPC_UPGRADEABLE_SERVICE,
+		   service_ids, sizeof(service_ids));
+
+     This will automatically upgrade connections on service from_ID to service
+     to_ID if they request it.  This will be reflected in msg_name obtained
+     through recvmsg() when the request data is delivered to userspace.
+
+ (5) The server is then set to listen out for incoming calls::
+
+	listen(server, 100);
+
+ (6) The kernel notifies the server of pending incoming connections by sending
+     it a message for each.  This is received with recvmsg() on the server
+     socket.  It has no data, and has a single dataless control message
+     attached::
+
+	RXRPC_NEW_CALL
+
+     The address that can be passed back by recvmsg() at this point should be
+     ignored since the call for which the message was posted may have gone by
+     the time it is accepted - in which case the first call still on the queue
+     will be accepted.
+
+ (7) The server then accepts the new call by issuing a sendmsg() with two
+     pieces of control data and no actual data:
+
+	==================	==============================
+	RXRPC_ACCEPT		indicate connection acceptance
+	RXRPC_USER_CALL_ID	specify user ID for this call
+	==================	==============================
+
+ (8) The first request data packet will then be posted to the server socket for
+     recvmsg() to pick up.  At that point, the RxRPC address for the call can
+     be read from the address fields in the msghdr struct.
+
+     Subsequent request data will be posted to the server socket for recvmsg()
+     to collect as it arrives.  All but the last piece of the request data will
+     be delivered with MSG_MORE flagged.
+
+     All data will be delivered with the following control message attached:
+
+
+	==================	===================================
+	RXRPC_USER_CALL_ID	specifies the user ID for this call
+	==================	===================================
+
+ (9) The reply data should then be posted to the server socket using a series
+     of sendmsg() calls, each with the following control messages attached:
+
+	==================	===================================
+	RXRPC_USER_CALL_ID	specifies the user ID for this call
+	==================	===================================
+
+     MSG_MORE should be set in msghdr::msg_flags on all but the last message
+     for a particular call.
+
+(10) The final ACK from the client will be posted for retrieval by recvmsg()
+     when it is received.  It will take the form of a dataless message with two
+     control messages attached:
+
+	==================	===================================
+	RXRPC_USER_CALL_ID	specifies the user ID for this call
+	RXRPC_ACK		indicates final ACK (no data)
+	==================	===================================
+
+     MSG_EOR will be flagged to indicate that this is the final message for
+     this call.
+
+(11) Up to the point the final packet of reply data is sent, the call can be
+     aborted by calling sendmsg() with a dataless message with the following
+     control messages attached:
+
+	==================	===================================
+	RXRPC_USER_CALL_ID	specifies the user ID for this call
+	RXRPC_ABORT		indicates abort code (4 byte data)
+	==================	===================================
+
+     Any packets waiting in the socket's receive queue will be discarded if
+     this is issued.
+
+Note that all the communications for a particular service take place through
+the one server socket, using control messages on sendmsg() and recvmsg() to
+determine the call affected.
+
+
+AF_RXRPC Kernel Interface
+=========================
+
+The AF_RXRPC module also provides an interface for use by in-kernel utilities
+such as the AFS filesystem.  This permits such a utility to:
+
+ (1) Use different keys directly on individual client calls on one socket
+     rather than having to open a whole slew of sockets, one for each key it
+     might want to use.
+
+ (2) Avoid having RxRPC call request_key() at the point of issue of a call or
+     opening of a socket.  Instead the utility is responsible for requesting a
+     key at the appropriate point.  AFS, for instance, would do this during VFS
+     operations such as open() or unlink().  The key is then handed through
+     when the call is initiated.
+
+ (3) Request the use of something other than GFP_KERNEL to allocate memory.
+
+ (4) Avoid the overhead of using the recvmsg() call.  RxRPC messages can be
+     intercepted before they get put into the socket Rx queue and the socket
+     buffers manipulated directly.
+
+To use the RxRPC facility, a kernel utility must still open an AF_RXRPC socket,
+bind an address as appropriate and listen if it's to be a server socket, but
+then it passes this to the kernel interface functions.
+
+The kernel interface functions are as follows:
+
+ (#) Begin a new client call::
+
+	struct rxrpc_call *
+	rxrpc_kernel_begin_call(struct socket *sock,
+				struct sockaddr_rxrpc *srx,
+				struct key *key,
+				unsigned long user_call_ID,
+				s64 tx_total_len,
+				gfp_t gfp,
+				rxrpc_notify_rx_t notify_rx,
+				bool upgrade,
+				bool intr,
+				unsigned int debug_id);
+
+     This allocates the infrastructure to make a new RxRPC call and assigns
+     call and connection numbers.  The call will be made on the UDP port that
+     the socket is bound to.  The call will go to the destination address of a
+     connected client socket unless an alternative is supplied (srx is
+     non-NULL).
+
+     If a key is supplied then this will be used to secure the call instead of
+     the key bound to the socket with the RXRPC_SECURITY_KEY sockopt.  Calls
+     secured in this way will still share connections if at all possible.
+
+     The user_call_ID is equivalent to that supplied to sendmsg() in the
+     control data buffer.  It is entirely feasible to use this to point to a
+     kernel data structure.
+
+     tx_total_len is the amount of data the caller is intending to transmit
+     with this call (or -1 if unknown at this point).  Setting the data size
+     allows the kernel to encrypt directly to the packet buffers, thereby
+     saving a copy.  The value may not be less than -1.
+
+     notify_rx is a pointer to a function to be called when events such as
+     incoming data packets or remote aborts happen.
+
+     upgrade should be set to true if a client operation should request that
+     the server upgrade the service to a better one.  The resultant service ID
+     is returned by rxrpc_kernel_recv_data().
+
+     intr should be set to true if the call should be interruptible.  If this
+     is not set, this function may not return until a channel has been
+     allocated; if it is set, the function may return -ERESTARTSYS.
+
+     debug_id is the call debugging ID to be used for tracing.  This can be
+     obtained by atomically incrementing rxrpc_debug_id.
+
+     If this function is successful, an opaque reference to the RxRPC call is
+     returned.  The caller now holds a reference on this and it must be
+     properly ended.
+
+ (#) End a client call::
+
+	void rxrpc_kernel_end_call(struct socket *sock,
+				   struct rxrpc_call *call);
+
+     This is used to end a previously begun call.  The user_call_ID is expunged
+     from AF_RXRPC's knowledge and will not be seen again in association with
+     the specified call.
+
+ (#) Send data through a call::
+
+	typedef void (*rxrpc_notify_end_tx_t)(struct sock *sk,
+					      unsigned long user_call_ID,
+					      struct sk_buff *skb);
+
+	int rxrpc_kernel_send_data(struct socket *sock,
+				   struct rxrpc_call *call,
+				   struct msghdr *msg,
+				   size_t len,
+				   rxrpc_notify_end_tx_t notify_end_rx);
+
+     This is used to supply either the request part of a client call or the
+     reply part of a server call.  msg.msg_iovlen and msg.msg_iov specify the
+     data buffers to be used.  msg_iov may not be NULL and must point
+     exclusively to in-kernel virtual addresses.  msg.msg_flags may be given
+     MSG_MORE if there will be subsequent data sends for this call.
+
+     The msg must not specify a destination address, control data or any flags
+     other than MSG_MORE.  len is the total amount of data to transmit.
+
+     notify_end_rx can be NULL or it can be used to specify a function to be
+     called when the call changes state to end the Tx phase.  This function is
+     called with the call-state spinlock held to prevent any reply or final ACK
+     from being delivered first.
+
+ (#) Receive data from a call::
+
+	int rxrpc_kernel_recv_data(struct socket *sock,
+				   struct rxrpc_call *call,
+				   void *buf,
+				   size_t size,
+				   size_t *_offset,
+				   bool want_more,
+				   u32 *_abort,
+				   u16 *_service)
+
+      This is used to receive data from either the reply part of a client call
+      or the request part of a service call.  buf and size specify how much
+      data is desired and where to store it.  *_offset is added on to buf and
+      subtracted from size internally; the amount copied into the buffer is
+      added to *_offset before returning.
+
+      want_more should be true if further data will be required after this is
+      satisfied and false if this is the last item of the receive phase.
+
+      There are three normal returns: 0 if the buffer was filled and want_more
+      was true; 1 if the buffer was filled, the last DATA packet has been
+      emptied and want_more was false; and -EAGAIN if the function needs to be
+      called again.
+
+      If the last DATA packet is processed but the buffer contains less than
+      the amount requested, EBADMSG is returned.  If want_more wasn't set, but
+      more data was available, EMSGSIZE is returned.
+
+      If a remote ABORT is detected, the abort code received will be stored in
+      ``*_abort`` and ECONNABORTED will be returned.
+
+      The service ID that the call ended up with is returned into *_service.
+      This can be used to see if a call got a service upgrade.
+
+ (#) Abort a call??
+
+     ::
+
+	void rxrpc_kernel_abort_call(struct socket *sock,
+				     struct rxrpc_call *call,
+				     u32 abort_code);
+
+     This is used to abort a call if it's still in an abortable state.  The
+     abort code specified will be placed in the ABORT message sent.
+
+ (#) Intercept received RxRPC messages::
+
+	typedef void (*rxrpc_interceptor_t)(struct sock *sk,
+					    unsigned long user_call_ID,
+					    struct sk_buff *skb);
+
+	void
+	rxrpc_kernel_intercept_rx_messages(struct socket *sock,
+					   rxrpc_interceptor_t interceptor);
+
+     This installs an interceptor function on the specified AF_RXRPC socket.
+     All messages that would otherwise wind up in the socket's Rx queue are
+     then diverted to this function.  Note that care must be taken to process
+     the messages in the right order to maintain DATA message sequentiality.
+
+     The interceptor function itself is provided with the address of the socket
+     and handling the incoming message, the ID assigned by the kernel utility
+     to the call and the socket buffer containing the message.
+
+     The skb->mark field indicates the type of message:
+
+	===============================	=======================================
+	Mark				Meaning
+	===============================	=======================================
+	RXRPC_SKB_MARK_DATA		Data message
+	RXRPC_SKB_MARK_FINAL_ACK	Final ACK received for an incoming call
+	RXRPC_SKB_MARK_BUSY		Client call rejected as server busy
+	RXRPC_SKB_MARK_REMOTE_ABORT	Call aborted by peer
+	RXRPC_SKB_MARK_NET_ERROR	Network error detected
+	RXRPC_SKB_MARK_LOCAL_ERROR	Local error encountered
+	RXRPC_SKB_MARK_NEW_CALL		New incoming call awaiting acceptance
+	===============================	=======================================
+
+     The remote abort message can be probed with rxrpc_kernel_get_abort_code().
+     The two error messages can be probed with rxrpc_kernel_get_error_number().
+     A new call can be accepted with rxrpc_kernel_accept_call().
+
+     Data messages can have their contents extracted with the usual bunch of
+     socket buffer manipulation functions.  A data message can be determined to
+     be the last one in a sequence with rxrpc_kernel_is_data_last().  When a
+     data message has been used up, rxrpc_kernel_data_consumed() should be
+     called on it.
+
+     Messages should be handled to rxrpc_kernel_free_skb() to dispose of.  It
+     is possible to get extra refs on all types of message for later freeing,
+     but this may pin the state of a call until the message is finally freed.
+
+ (#) Accept an incoming call::
+
+	struct rxrpc_call *
+	rxrpc_kernel_accept_call(struct socket *sock,
+				 unsigned long user_call_ID);
+
+     This is used to accept an incoming call and to assign it a call ID.  This
+     function is similar to rxrpc_kernel_begin_call() and calls accepted must
+     be ended in the same way.
+
+     If this function is successful, an opaque reference to the RxRPC call is
+     returned.  The caller now holds a reference on this and it must be
+     properly ended.
+
+ (#) Reject an incoming call::
+
+	int rxrpc_kernel_reject_call(struct socket *sock);
+
+     This is used to reject the first incoming call on the socket's queue with
+     a BUSY message.  -ENODATA is returned if there were no incoming calls.
+     Other errors may be returned if the call had been aborted (-ECONNABORTED)
+     or had timed out (-ETIME).
+
+ (#) Allocate a null key for doing anonymous security::
+
+	struct key *rxrpc_get_null_key(const char *keyname);
+
+     This is used to allocate a null RxRPC key that can be used to indicate
+     anonymous security for a particular domain.
+
+ (#) Get the peer address of a call::
+
+	void rxrpc_kernel_get_peer(struct socket *sock, struct rxrpc_call *call,
+				   struct sockaddr_rxrpc *_srx);
+
+     This is used to find the remote peer address of a call.
+
+ (#) Set the total transmit data size on a call::
+
+	void rxrpc_kernel_set_tx_length(struct socket *sock,
+					struct rxrpc_call *call,
+					s64 tx_total_len);
+
+     This sets the amount of data that the caller is intending to transmit on a
+     call.  It's intended to be used for setting the reply size as the request
+     size should be set when the call is begun.  tx_total_len may not be less
+     than zero.
+
+ (#) Get call RTT::
+
+	u64 rxrpc_kernel_get_rtt(struct socket *sock, struct rxrpc_call *call);
+
+     Get the RTT time to the peer in use by a call.  The value returned is in
+     nanoseconds.
+
+ (#) Check call still alive::
+
+	bool rxrpc_kernel_check_life(struct socket *sock,
+				     struct rxrpc_call *call,
+				     u32 *_life);
+	void rxrpc_kernel_probe_life(struct socket *sock,
+				     struct rxrpc_call *call);
+
+     The first function passes back in ``*_life`` a number that is updated when
+     ACKs are received from the peer (notably including PING RESPONSE ACKs
+     which we can elicit by sending PING ACKs to see if the call still exists
+     on the server).  The caller should compare the numbers of two calls to see
+     if the call is still alive after waiting for a suitable interval.  It also
+     returns true as long as the call hasn't yet reached the completed state.
+
+     This allows the caller to work out if the server is still contactable and
+     if the call is still alive on the server while waiting for the server to
+     process a client operation.
+
+     The second function causes a ping ACK to be transmitted to try to provoke
+     the peer into responding, which would then cause the value returned by the
+     first function to change.  Note that this must be called in TASK_RUNNING
+     state.
+
+ (#) Get reply timestamp::
+
+	bool rxrpc_kernel_get_reply_time(struct socket *sock,
+					 struct rxrpc_call *call,
+					 ktime_t *_ts)
+
+     This allows the timestamp on the first DATA packet of the reply of a
+     client call to be queried, provided that it is still in the Rx ring.  If
+     successful, the timestamp will be stored into ``*_ts`` and true will be
+     returned; false will be returned otherwise.
+
+ (#) Get remote client epoch::
+
+	u32 rxrpc_kernel_get_epoch(struct socket *sock,
+				   struct rxrpc_call *call)
+
+     This allows the epoch that's contained in packets of an incoming client
+     call to be queried.  This value is returned.  The function always
+     successful if the call is still in progress.  It shouldn't be called once
+     the call has expired.  Note that calling this on a local client call only
+     returns the local epoch.
+
+     This value can be used to determine if the remote client has been
+     restarted as it shouldn't change otherwise.
+
+ (#) Set the maxmimum lifespan on a call::
+
+	void rxrpc_kernel_set_max_life(struct socket *sock,
+				       struct rxrpc_call *call,
+				       unsigned long hard_timeout)
+
+     This sets the maximum lifespan on a call to hard_timeout (which is in
+     jiffies).  In the event of the timeout occurring, the call will be
+     aborted and -ETIME or -ETIMEDOUT will be returned.
+
+
+Configurable Parameters
+=======================
+
+The RxRPC protocol driver has a number of configurable parameters that can be
+adjusted through sysctls in /proc/net/rxrpc/:
+
+ (#) req_ack_delay
+
+     The amount of time in milliseconds after receiving a packet with the
+     request-ack flag set before we honour the flag and actually send the
+     requested ack.
+
+     Usually the other side won't stop sending packets until the advertised
+     reception window is full (to a maximum of 255 packets), so delaying the
+     ACK permits several packets to be ACK'd in one go.
+
+ (#) soft_ack_delay
+
+     The amount of time in milliseconds after receiving a new packet before we
+     generate a soft-ACK to tell the sender that it doesn't need to resend.
+
+ (#) idle_ack_delay
+
+     The amount of time in milliseconds after all the packets currently in the
+     received queue have been consumed before we generate a hard-ACK to tell
+     the sender it can free its buffers, assuming no other reason occurs that
+     we would send an ACK.
+
+ (#) resend_timeout
+
+     The amount of time in milliseconds after transmitting a packet before we
+     transmit it again, assuming no ACK is received from the receiver telling
+     us they got it.
+
+ (#) max_call_lifetime
+
+     The maximum amount of time in seconds that a call may be in progress
+     before we preemptively kill it.
+
+ (#) dead_call_expiry
+
+     The amount of time in seconds before we remove a dead call from the call
+     list.  Dead calls are kept around for a little while for the purpose of
+     repeating ACK and ABORT packets.
+
+ (#) connection_expiry
+
+     The amount of time in seconds after a connection was last used before we
+     remove it from the connection list.  While a connection is in existence,
+     it serves as a placeholder for negotiated security; when it is deleted,
+     the security must be renegotiated.
+
+ (#) transport_expiry
+
+     The amount of time in seconds after a transport was last used before we
+     remove it from the transport list.  While a transport is in existence, it
+     serves to anchor the peer data and keeps the connection ID counter.
+
+ (#) rxrpc_rx_window_size
+
+     The size of the receive window in packets.  This is the maximum number of
+     unconsumed received packets we're willing to hold in memory for any
+     particular call.
+
+ (#) rxrpc_rx_mtu
+
+     The maximum packet MTU size that we're willing to receive in bytes.  This
+     indicates to the peer whether we're willing to accept jumbo packets.
+
+ (#) rxrpc_rx_jumbo_max
+
+     The maximum number of packets that we're willing to accept in a jumbo
+     packet.  Non-terminal packets in a jumbo packet must contain a four byte
+     header plus exactly 1412 bytes of data.  The terminal packet must contain
+     a four byte header plus any amount of data.  In any event, a jumbo packet
+     may not exceed rxrpc_rx_mtu in size.
diff --git a/Documentation/networking/rxrpc.txt b/Documentation/networking/rxrpc.txt
deleted file mode 100644
index 180e07d956a7..000000000000
--- a/Documentation/networking/rxrpc.txt
+++ /dev/null
@@ -1,1155 +0,0 @@
-			    ======================
-			    RxRPC NETWORK PROTOCOL
-			    ======================
-
-The RxRPC protocol driver provides a reliable two-phase transport on top of UDP
-that can be used to perform RxRPC remote operations.  This is done over sockets
-of AF_RXRPC family, using sendmsg() and recvmsg() with control data to send and
-receive data, aborts and errors.
-
-Contents of this document:
-
- (*) Overview.
-
- (*) RxRPC protocol summary.
-
- (*) AF_RXRPC driver model.
-
- (*) Control messages.
-
- (*) Socket options.
-
- (*) Security.
-
- (*) Example client usage.
-
- (*) Example server usage.
-
- (*) AF_RXRPC kernel interface.
-
- (*) Configurable parameters.
-
-
-========
-OVERVIEW
-========
-
-RxRPC is a two-layer protocol.  There is a session layer which provides
-reliable virtual connections using UDP over IPv4 (or IPv6) as the transport
-layer, but implements a real network protocol; and there's the presentation
-layer which renders structured data to binary blobs and back again using XDR
-(as does SunRPC):
-
-		+-------------+
-		| Application |
-		+-------------+
-		|     XDR     |		Presentation
-		+-------------+
-		|    RxRPC    |		Session
-		+-------------+
-		|     UDP     |		Transport
-		+-------------+
-
-
-AF_RXRPC provides:
-
- (1) Part of an RxRPC facility for both kernel and userspace applications by
-     making the session part of it a Linux network protocol (AF_RXRPC).
-
- (2) A two-phase protocol.  The client transmits a blob (the request) and then
-     receives a blob (the reply), and the server receives the request and then
-     transmits the reply.
-
- (3) Retention of the reusable bits of the transport system set up for one call
-     to speed up subsequent calls.
-
- (4) A secure protocol, using the Linux kernel's key retention facility to
-     manage security on the client end.  The server end must of necessity be
-     more active in security negotiations.
-
-AF_RXRPC does not provide XDR marshalling/presentation facilities.  That is
-left to the application.  AF_RXRPC only deals in blobs.  Even the operation ID
-is just the first four bytes of the request blob, and as such is beyond the
-kernel's interest.
-
-
-Sockets of AF_RXRPC family are:
-
- (1) created as type SOCK_DGRAM;
-
- (2) provided with a protocol of the type of underlying transport they're going
-     to use - currently only PF_INET is supported.
-
-
-The Andrew File System (AFS) is an example of an application that uses this and
-that has both kernel (filesystem) and userspace (utility) components.
-
-
-======================
-RXRPC PROTOCOL SUMMARY
-======================
-
-An overview of the RxRPC protocol:
-
- (*) RxRPC sits on top of another networking protocol (UDP is the only option
-     currently), and uses this to provide network transport.  UDP ports, for
-     example, provide transport endpoints.
-
- (*) RxRPC supports multiple virtual "connections" from any given transport
-     endpoint, thus allowing the endpoints to be shared, even to the same
-     remote endpoint.
-
- (*) Each connection goes to a particular "service".  A connection may not go
-     to multiple services.  A service may be considered the RxRPC equivalent of
-     a port number.  AF_RXRPC permits multiple services to share an endpoint.
-
- (*) Client-originating packets are marked, thus a transport endpoint can be
-     shared between client and server connections (connections have a
-     direction).
-
- (*) Up to a billion connections may be supported concurrently between one
-     local transport endpoint and one service on one remote endpoint.  An RxRPC
-     connection is described by seven numbers:
-
-	Local address	}
-	Local port	} Transport (UDP) address
-	Remote address	}
-	Remote port	}
-	Direction
-	Connection ID
-	Service ID
-
- (*) Each RxRPC operation is a "call".  A connection may make up to four
-     billion calls, but only up to four calls may be in progress on a
-     connection at any one time.
-
- (*) Calls are two-phase and asymmetric: the client sends its request data,
-     which the service receives; then the service transmits the reply data
-     which the client receives.
-
- (*) The data blobs are of indefinite size, the end of a phase is marked with a
-     flag in the packet.  The number of packets of data making up one blob may
-     not exceed 4 billion, however, as this would cause the sequence number to
-     wrap.
-
- (*) The first four bytes of the request data are the service operation ID.
-
- (*) Security is negotiated on a per-connection basis.  The connection is
-     initiated by the first data packet on it arriving.  If security is
-     requested, the server then issues a "challenge" and then the client
-     replies with a "response".  If the response is successful, the security is
-     set for the lifetime of that connection, and all subsequent calls made
-     upon it use that same security.  In the event that the server lets a
-     connection lapse before the client, the security will be renegotiated if
-     the client uses the connection again.
-
- (*) Calls use ACK packets to handle reliability.  Data packets are also
-     explicitly sequenced per call.
-
- (*) There are two types of positive acknowledgment: hard-ACKs and soft-ACKs.
-     A hard-ACK indicates to the far side that all the data received to a point
-     has been received and processed; a soft-ACK indicates that the data has
-     been received but may yet be discarded and re-requested.  The sender may
-     not discard any transmittable packets until they've been hard-ACK'd.
-
- (*) Reception of a reply data packet implicitly hard-ACK's all the data
-     packets that make up the request.
-
- (*) An call is complete when the request has been sent, the reply has been
-     received and the final hard-ACK on the last packet of the reply has
-     reached the server.
-
- (*) An call may be aborted by either end at any time up to its completion.
-
-
-=====================
-AF_RXRPC DRIVER MODEL
-=====================
-
-About the AF_RXRPC driver:
-
- (*) The AF_RXRPC protocol transparently uses internal sockets of the transport
-     protocol to represent transport endpoints.
-
- (*) AF_RXRPC sockets map onto RxRPC connection bundles.  Actual RxRPC
-     connections are handled transparently.  One client socket may be used to
-     make multiple simultaneous calls to the same service.  One server socket
-     may handle calls from many clients.
-
- (*) Additional parallel client connections will be initiated to support extra
-     concurrent calls, up to a tunable limit.
-
- (*) Each connection is retained for a certain amount of time [tunable] after
-     the last call currently using it has completed in case a new call is made
-     that could reuse it.
-
- (*) Each internal UDP socket is retained [tunable] for a certain amount of
-     time [tunable] after the last connection using it discarded, in case a new
-     connection is made that could use it.
-
- (*) A client-side connection is only shared between calls if they have have
-     the same key struct describing their security (and assuming the calls
-     would otherwise share the connection).  Non-secured calls would also be
-     able to share connections with each other.
-
- (*) A server-side connection is shared if the client says it is.
-
- (*) ACK'ing is handled by the protocol driver automatically, including ping
-     replying.
-
- (*) SO_KEEPALIVE automatically pings the other side to keep the connection
-     alive [TODO].
-
- (*) If an ICMP error is received, all calls affected by that error will be
-     aborted with an appropriate network error passed through recvmsg().
-
-
-Interaction with the user of the RxRPC socket:
-
- (*) A socket is made into a server socket by binding an address with a
-     non-zero service ID.
-
- (*) In the client, sending a request is achieved with one or more sendmsgs,
-     followed by the reply being received with one or more recvmsgs.
-
- (*) The first sendmsg for a request to be sent from a client contains a tag to
-     be used in all other sendmsgs or recvmsgs associated with that call.  The
-     tag is carried in the control data.
-
- (*) connect() is used to supply a default destination address for a client
-     socket.  This may be overridden by supplying an alternate address to the
-     first sendmsg() of a call (struct msghdr::msg_name).
-
- (*) If connect() is called on an unbound client, a random local port will
-     bound before the operation takes place.
-
- (*) A server socket may also be used to make client calls.  To do this, the
-     first sendmsg() of the call must specify the target address.  The server's
-     transport endpoint is used to send the packets.
-
- (*) Once the application has received the last message associated with a call,
-     the tag is guaranteed not to be seen again, and so it can be used to pin
-     client resources.  A new call can then be initiated with the same tag
-     without fear of interference.
-
- (*) In the server, a request is received with one or more recvmsgs, then the
-     the reply is transmitted with one or more sendmsgs, and then the final ACK
-     is received with a last recvmsg.
-
- (*) When sending data for a call, sendmsg is given MSG_MORE if there's more
-     data to come on that call.
-
- (*) When receiving data for a call, recvmsg flags MSG_MORE if there's more
-     data to come for that call.
-
- (*) When receiving data or messages for a call, MSG_EOR is flagged by recvmsg
-     to indicate the terminal message for that call.
-
- (*) A call may be aborted by adding an abort control message to the control
-     data.  Issuing an abort terminates the kernel's use of that call's tag.
-     Any messages waiting in the receive queue for that call will be discarded.
-
- (*) Aborts, busy notifications and challenge packets are delivered by recvmsg,
-     and control data messages will be set to indicate the context.  Receiving
-     an abort or a busy message terminates the kernel's use of that call's tag.
-
- (*) The control data part of the msghdr struct is used for a number of things:
-
-     (*) The tag of the intended or affected call.
-
-     (*) Sending or receiving errors, aborts and busy notifications.
-
-     (*) Notifications of incoming calls.
-
-     (*) Sending debug requests and receiving debug replies [TODO].
-
- (*) When the kernel has received and set up an incoming call, it sends a
-     message to server application to let it know there's a new call awaiting
-     its acceptance [recvmsg reports a special control message].  The server
-     application then uses sendmsg to assign a tag to the new call.  Once that
-     is done, the first part of the request data will be delivered by recvmsg.
-
- (*) The server application has to provide the server socket with a keyring of
-     secret keys corresponding to the security types it permits.  When a secure
-     connection is being set up, the kernel looks up the appropriate secret key
-     in the keyring and then sends a challenge packet to the client and
-     receives a response packet.  The kernel then checks the authorisation of
-     the packet and either aborts the connection or sets up the security.
-
- (*) The name of the key a client will use to secure its communications is
-     nominated by a socket option.
-
-
-Notes on sendmsg:
-
- (*) MSG_WAITALL can be set to tell sendmsg to ignore signals if the peer is
-     making progress at accepting packets within a reasonable time such that we
-     manage to queue up all the data for transmission.  This requires the
-     client to accept at least one packet per 2*RTT time period.
-
-     If this isn't set, sendmsg() will return immediately, either returning
-     EINTR/ERESTARTSYS if nothing was consumed or returning the amount of data
-     consumed.
-
-
-Notes on recvmsg:
-
- (*) If there's a sequence of data messages belonging to a particular call on
-     the receive queue, then recvmsg will keep working through them until:
-
-     (a) it meets the end of that call's received data,
-
-     (b) it meets a non-data message,
-
-     (c) it meets a message belonging to a different call, or
-
-     (d) it fills the user buffer.
-
-     If recvmsg is called in blocking mode, it will keep sleeping, awaiting the
-     reception of further data, until one of the above four conditions is met.
-
- (2) MSG_PEEK operates similarly, but will return immediately if it has put any
-     data in the buffer rather than sleeping until it can fill the buffer.
-
- (3) If a data message is only partially consumed in filling a user buffer,
-     then the remainder of that message will be left on the front of the queue
-     for the next taker.  MSG_TRUNC will never be flagged.
-
- (4) If there is more data to be had on a call (it hasn't copied the last byte
-     of the last data message in that phase yet), then MSG_MORE will be
-     flagged.
-
-
-================
-CONTROL MESSAGES
-================
-
-AF_RXRPC makes use of control messages in sendmsg() and recvmsg() to multiplex
-calls, to invoke certain actions and to report certain conditions.  These are:
-
-	MESSAGE ID		SRT DATA	MEANING
-	=======================	=== ===========	===============================
-	RXRPC_USER_CALL_ID	sr- User ID	App's call specifier
-	RXRPC_ABORT		srt Abort code	Abort code to issue/received
-	RXRPC_ACK		-rt n/a		Final ACK received
-	RXRPC_NET_ERROR		-rt error num	Network error on call
-	RXRPC_BUSY		-rt n/a		Call rejected (server busy)
-	RXRPC_LOCAL_ERROR	-rt error num	Local error encountered
-	RXRPC_NEW_CALL		-r- n/a		New call received
-	RXRPC_ACCEPT		s-- n/a		Accept new call
-	RXRPC_EXCLUSIVE_CALL	s-- n/a		Make an exclusive client call
-	RXRPC_UPGRADE_SERVICE	s-- n/a		Client call can be upgraded
-	RXRPC_TX_LENGTH		s-- data len	Total length of Tx data
-
-	(SRT = usable in Sendmsg / delivered by Recvmsg / Terminal message)
-
- (*) RXRPC_USER_CALL_ID
-
-     This is used to indicate the application's call ID.  It's an unsigned long
-     that the app specifies in the client by attaching it to the first data
-     message or in the server by passing it in association with an RXRPC_ACCEPT
-     message.  recvmsg() passes it in conjunction with all messages except
-     those of the RXRPC_NEW_CALL message.
-
- (*) RXRPC_ABORT
-
-     This is can be used by an application to abort a call by passing it to
-     sendmsg, or it can be delivered by recvmsg to indicate a remote abort was
-     received.  Either way, it must be associated with an RXRPC_USER_CALL_ID to
-     specify the call affected.  If an abort is being sent, then error EBADSLT
-     will be returned if there is no call with that user ID.
-
- (*) RXRPC_ACK
-
-     This is delivered to a server application to indicate that the final ACK
-     of a call was received from the client.  It will be associated with an
-     RXRPC_USER_CALL_ID to indicate the call that's now complete.
-
- (*) RXRPC_NET_ERROR
-
-     This is delivered to an application to indicate that an ICMP error message
-     was encountered in the process of trying to talk to the peer.  An
-     errno-class integer value will be included in the control message data
-     indicating the problem, and an RXRPC_USER_CALL_ID will indicate the call
-     affected.
-
- (*) RXRPC_BUSY
-
-     This is delivered to a client application to indicate that a call was
-     rejected by the server due to the server being busy.  It will be
-     associated with an RXRPC_USER_CALL_ID to indicate the rejected call.
-
- (*) RXRPC_LOCAL_ERROR
-
-     This is delivered to an application to indicate that a local error was
-     encountered and that a call has been aborted because of it.  An
-     errno-class integer value will be included in the control message data
-     indicating the problem, and an RXRPC_USER_CALL_ID will indicate the call
-     affected.
-
- (*) RXRPC_NEW_CALL
-
-     This is delivered to indicate to a server application that a new call has
-     arrived and is awaiting acceptance.  No user ID is associated with this,
-     as a user ID must subsequently be assigned by doing an RXRPC_ACCEPT.
-
- (*) RXRPC_ACCEPT
-
-     This is used by a server application to attempt to accept a call and
-     assign it a user ID.  It should be associated with an RXRPC_USER_CALL_ID
-     to indicate the user ID to be assigned.  If there is no call to be
-     accepted (it may have timed out, been aborted, etc.), then sendmsg will
-     return error ENODATA.  If the user ID is already in use by another call,
-     then error EBADSLT will be returned.
-
- (*) RXRPC_EXCLUSIVE_CALL
-
-     This is used to indicate that a client call should be made on a one-off
-     connection.  The connection is discarded once the call has terminated.
-
- (*) RXRPC_UPGRADE_SERVICE
-
-     This is used to make a client call to probe if the specified service ID
-     may be upgraded by the server.  The caller must check msg_name returned to
-     recvmsg() for the service ID actually in use.  The operation probed must
-     be one that takes the same arguments in both services.
-
-     Once this has been used to establish the upgrade capability (or lack
-     thereof) of the server, the service ID returned should be used for all
-     future communication to that server and RXRPC_UPGRADE_SERVICE should no
-     longer be set.
-
- (*) RXRPC_TX_LENGTH
-
-     This is used to inform the kernel of the total amount of data that is
-     going to be transmitted by a call (whether in a client request or a
-     service response).  If given, it allows the kernel to encrypt from the
-     userspace buffer directly to the packet buffers, rather than copying into
-     the buffer and then encrypting in place.  This may only be given with the
-     first sendmsg() providing data for a call.  EMSGSIZE will be generated if
-     the amount of data actually given is different.
-
-     This takes a parameter of __s64 type that indicates how much will be
-     transmitted.  This may not be less than zero.
-
-The symbol RXRPC__SUPPORTED is defined as one more than the highest control
-message type supported.  At run time this can be queried by means of the
-RXRPC_SUPPORTED_CMSG socket option (see below).
-
-
-==============
-SOCKET OPTIONS
-==============
-
-AF_RXRPC sockets support a few socket options at the SOL_RXRPC level:
-
- (*) RXRPC_SECURITY_KEY
-
-     This is used to specify the description of the key to be used.  The key is
-     extracted from the calling process's keyrings with request_key() and
-     should be of "rxrpc" type.
-
-     The optval pointer points to the description string, and optlen indicates
-     how long the string is, without the NUL terminator.
-
- (*) RXRPC_SECURITY_KEYRING
-
-     Similar to above but specifies a keyring of server secret keys to use (key
-     type "keyring").  See the "Security" section.
-
- (*) RXRPC_EXCLUSIVE_CONNECTION
-
-     This is used to request that new connections should be used for each call
-     made subsequently on this socket.  optval should be NULL and optlen 0.
-
- (*) RXRPC_MIN_SECURITY_LEVEL
-
-     This is used to specify the minimum security level required for calls on
-     this socket.  optval must point to an int containing one of the following
-     values:
-
-     (a) RXRPC_SECURITY_PLAIN
-
-	 Encrypted checksum only.
-
-     (b) RXRPC_SECURITY_AUTH
-
-	 Encrypted checksum plus packet padded and first eight bytes of packet
-	 encrypted - which includes the actual packet length.
-
-     (c) RXRPC_SECURITY_ENCRYPTED
-
-	 Encrypted checksum plus entire packet padded and encrypted, including
-	 actual packet length.
-
- (*) RXRPC_UPGRADEABLE_SERVICE
-
-     This is used to indicate that a service socket with two bindings may
-     upgrade one bound service to the other if requested by the client.  optval
-     must point to an array of two unsigned short ints.  The first is the
-     service ID to upgrade from and the second the service ID to upgrade to.
-
- (*) RXRPC_SUPPORTED_CMSG
-
-     This is a read-only option that writes an int into the buffer indicating
-     the highest control message type supported.
-
-
-========
-SECURITY
-========
-
-Currently, only the kerberos 4 equivalent protocol has been implemented
-(security index 2 - rxkad).  This requires the rxkad module to be loaded and,
-on the client, tickets of the appropriate type to be obtained from the AFS
-kaserver or the kerberos server and installed as "rxrpc" type keys.  This is
-normally done using the klog program.  An example simple klog program can be
-found at:
-
-	http://people.redhat.com/~dhowells/rxrpc/klog.c
-
-The payload provided to add_key() on the client should be of the following
-form:
-
-	struct rxrpc_key_sec2_v1 {
-		uint16_t	security_index;	/* 2 */
-		uint16_t	ticket_length;	/* length of ticket[] */
-		uint32_t	expiry;		/* time at which expires */
-		uint8_t		kvno;		/* key version number */
-		uint8_t		__pad[3];
-		uint8_t		session_key[8];	/* DES session key */
-		uint8_t		ticket[0];	/* the encrypted ticket */
-	};
-
-Where the ticket blob is just appended to the above structure.
-
-
-For the server, keys of type "rxrpc_s" must be made available to the server.
-They have a description of "<serviceID>:<securityIndex>" (eg: "52:2" for an
-rxkad key for the AFS VL service).  When such a key is created, it should be
-given the server's secret key as the instantiation data (see the example
-below).
-
-	add_key("rxrpc_s", "52:2", secret_key, 8, keyring);
-
-A keyring is passed to the server socket by naming it in a sockopt.  The server
-socket then looks the server secret keys up in this keyring when secure
-incoming connections are made.  This can be seen in an example program that can
-be found at:
-
-	http://people.redhat.com/~dhowells/rxrpc/listen.c
-
-
-====================
-EXAMPLE CLIENT USAGE
-====================
-
-A client would issue an operation by:
-
- (1) An RxRPC socket is set up by:
-
-	client = socket(AF_RXRPC, SOCK_DGRAM, PF_INET);
-
-     Where the third parameter indicates the protocol family of the transport
-     socket used - usually IPv4 but it can also be IPv6 [TODO].
-
- (2) A local address can optionally be bound:
-
-	struct sockaddr_rxrpc srx = {
-		.srx_family	= AF_RXRPC,
-		.srx_service	= 0,  /* we're a client */
-		.transport_type	= SOCK_DGRAM,	/* type of transport socket */
-		.transport.sin_family	= AF_INET,
-		.transport.sin_port	= htons(7000), /* AFS callback */
-		.transport.sin_address	= 0,  /* all local interfaces */
-	};
-	bind(client, &srx, sizeof(srx));
-
-     This specifies the local UDP port to be used.  If not given, a random
-     non-privileged port will be used.  A UDP port may be shared between
-     several unrelated RxRPC sockets.  Security is handled on a basis of
-     per-RxRPC virtual connection.
-
- (3) The security is set:
-
-	const char *key = "AFS:cambridge.redhat.com";
-	setsockopt(client, SOL_RXRPC, RXRPC_SECURITY_KEY, key, strlen(key));
-
-     This issues a request_key() to get the key representing the security
-     context.  The minimum security level can be set:
-
-	unsigned int sec = RXRPC_SECURITY_ENCRYPTED;
-	setsockopt(client, SOL_RXRPC, RXRPC_MIN_SECURITY_LEVEL,
-		   &sec, sizeof(sec));
-
- (4) The server to be contacted can then be specified (alternatively this can
-     be done through sendmsg):
-
-	struct sockaddr_rxrpc srx = {
-		.srx_family	= AF_RXRPC,
-		.srx_service	= VL_SERVICE_ID,
-		.transport_type	= SOCK_DGRAM,	/* type of transport socket */
-		.transport.sin_family	= AF_INET,
-		.transport.sin_port	= htons(7005), /* AFS volume manager */
-		.transport.sin_address	= ...,
-	};
-	connect(client, &srx, sizeof(srx));
-
- (5) The request data should then be posted to the server socket using a series
-     of sendmsg() calls, each with the following control message attached:
-
-	RXRPC_USER_CALL_ID	- specifies the user ID for this call
-
-     MSG_MORE should be set in msghdr::msg_flags on all but the last part of
-     the request.  Multiple requests may be made simultaneously.
-
-     An RXRPC_TX_LENGTH control message can also be specified on the first
-     sendmsg() call.
-
-     If a call is intended to go to a destination other than the default
-     specified through connect(), then msghdr::msg_name should be set on the
-     first request message of that call.
-
- (6) The reply data will then be posted to the server socket for recvmsg() to
-     pick up.  MSG_MORE will be flagged by recvmsg() if there's more reply data
-     for a particular call to be read.  MSG_EOR will be set on the terminal
-     read for a call.
-
-     All data will be delivered with the following control message attached:
-
-	RXRPC_USER_CALL_ID	- specifies the user ID for this call
-
-     If an abort or error occurred, this will be returned in the control data
-     buffer instead, and MSG_EOR will be flagged to indicate the end of that
-     call.
-
-A client may ask for a service ID it knows and ask that this be upgraded to a
-better service if one is available by supplying RXRPC_UPGRADE_SERVICE on the
-first sendmsg() of a call.  The client should then check srx_service in the
-msg_name filled in by recvmsg() when collecting the result.  srx_service will
-hold the same value as given to sendmsg() if the upgrade request was ignored by
-the service - otherwise it will be altered to indicate the service ID the
-server upgraded to.  Note that the upgraded service ID is chosen by the server.
-The caller has to wait until it sees the service ID in the reply before sending
-any more calls (further calls to the same destination will be blocked until the
-probe is concluded).
-
-
-====================
-EXAMPLE SERVER USAGE
-====================
-
-A server would be set up to accept operations in the following manner:
-
- (1) An RxRPC socket is created by:
-
-	server = socket(AF_RXRPC, SOCK_DGRAM, PF_INET);
-
-     Where the third parameter indicates the address type of the transport
-     socket used - usually IPv4.
-
- (2) Security is set up if desired by giving the socket a keyring with server
-     secret keys in it:
-
-	keyring = add_key("keyring", "AFSkeys", NULL, 0,
-			  KEY_SPEC_PROCESS_KEYRING);
-
-	const char secret_key[8] = {
-		0xa7, 0x83, 0x8a, 0xcb, 0xc7, 0x83, 0xec, 0x94 };
-	add_key("rxrpc_s", "52:2", secret_key, 8, keyring);
-
-	setsockopt(server, SOL_RXRPC, RXRPC_SECURITY_KEYRING, "AFSkeys", 7);
-
-     The keyring can be manipulated after it has been given to the socket. This
-     permits the server to add more keys, replace keys, etc. while it is live.
-
- (3) A local address must then be bound:
-
-	struct sockaddr_rxrpc srx = {
-		.srx_family	= AF_RXRPC,
-		.srx_service	= VL_SERVICE_ID, /* RxRPC service ID */
-		.transport_type	= SOCK_DGRAM,	/* type of transport socket */
-		.transport.sin_family	= AF_INET,
-		.transport.sin_port	= htons(7000), /* AFS callback */
-		.transport.sin_address	= 0,  /* all local interfaces */
-	};
-	bind(server, &srx, sizeof(srx));
-
-     More than one service ID may be bound to a socket, provided the transport
-     parameters are the same.  The limit is currently two.  To do this, bind()
-     should be called twice.
-
- (4) If service upgrading is required, first two service IDs must have been
-     bound and then the following option must be set:
-
-	unsigned short service_ids[2] = { from_ID, to_ID };
-	setsockopt(server, SOL_RXRPC, RXRPC_UPGRADEABLE_SERVICE,
-		   service_ids, sizeof(service_ids));
-
-     This will automatically upgrade connections on service from_ID to service
-     to_ID if they request it.  This will be reflected in msg_name obtained
-     through recvmsg() when the request data is delivered to userspace.
-
- (5) The server is then set to listen out for incoming calls:
-
-	listen(server, 100);
-
- (6) The kernel notifies the server of pending incoming connections by sending
-     it a message for each.  This is received with recvmsg() on the server
-     socket.  It has no data, and has a single dataless control message
-     attached:
-
-	RXRPC_NEW_CALL
-
-     The address that can be passed back by recvmsg() at this point should be
-     ignored since the call for which the message was posted may have gone by
-     the time it is accepted - in which case the first call still on the queue
-     will be accepted.
-
- (7) The server then accepts the new call by issuing a sendmsg() with two
-     pieces of control data and no actual data:
-
-	RXRPC_ACCEPT		- indicate connection acceptance
-	RXRPC_USER_CALL_ID	- specify user ID for this call
-
- (8) The first request data packet will then be posted to the server socket for
-     recvmsg() to pick up.  At that point, the RxRPC address for the call can
-     be read from the address fields in the msghdr struct.
-
-     Subsequent request data will be posted to the server socket for recvmsg()
-     to collect as it arrives.  All but the last piece of the request data will
-     be delivered with MSG_MORE flagged.
-
-     All data will be delivered with the following control message attached:
-
-	RXRPC_USER_CALL_ID	- specifies the user ID for this call
-
- (9) The reply data should then be posted to the server socket using a series
-     of sendmsg() calls, each with the following control messages attached:
-
-	RXRPC_USER_CALL_ID	- specifies the user ID for this call
-
-     MSG_MORE should be set in msghdr::msg_flags on all but the last message
-     for a particular call.
-
-(10) The final ACK from the client will be posted for retrieval by recvmsg()
-     when it is received.  It will take the form of a dataless message with two
-     control messages attached:
-
-	RXRPC_USER_CALL_ID	- specifies the user ID for this call
-	RXRPC_ACK		- indicates final ACK (no data)
-
-     MSG_EOR will be flagged to indicate that this is the final message for
-     this call.
-
-(11) Up to the point the final packet of reply data is sent, the call can be
-     aborted by calling sendmsg() with a dataless message with the following
-     control messages attached:
-
-	RXRPC_USER_CALL_ID	- specifies the user ID for this call
-	RXRPC_ABORT		- indicates abort code (4 byte data)
-
-     Any packets waiting in the socket's receive queue will be discarded if
-     this is issued.
-
-Note that all the communications for a particular service take place through
-the one server socket, using control messages on sendmsg() and recvmsg() to
-determine the call affected.
-
-
-=========================
-AF_RXRPC KERNEL INTERFACE
-=========================
-
-The AF_RXRPC module also provides an interface for use by in-kernel utilities
-such as the AFS filesystem.  This permits such a utility to:
-
- (1) Use different keys directly on individual client calls on one socket
-     rather than having to open a whole slew of sockets, one for each key it
-     might want to use.
-
- (2) Avoid having RxRPC call request_key() at the point of issue of a call or
-     opening of a socket.  Instead the utility is responsible for requesting a
-     key at the appropriate point.  AFS, for instance, would do this during VFS
-     operations such as open() or unlink().  The key is then handed through
-     when the call is initiated.
-
- (3) Request the use of something other than GFP_KERNEL to allocate memory.
-
- (4) Avoid the overhead of using the recvmsg() call.  RxRPC messages can be
-     intercepted before they get put into the socket Rx queue and the socket
-     buffers manipulated directly.
-
-To use the RxRPC facility, a kernel utility must still open an AF_RXRPC socket,
-bind an address as appropriate and listen if it's to be a server socket, but
-then it passes this to the kernel interface functions.
-
-The kernel interface functions are as follows:
-
- (*) Begin a new client call.
-
-	struct rxrpc_call *
-	rxrpc_kernel_begin_call(struct socket *sock,
-				struct sockaddr_rxrpc *srx,
-				struct key *key,
-				unsigned long user_call_ID,
-				s64 tx_total_len,
-				gfp_t gfp,
-				rxrpc_notify_rx_t notify_rx,
-				bool upgrade,
-				bool intr,
-				unsigned int debug_id);
-
-     This allocates the infrastructure to make a new RxRPC call and assigns
-     call and connection numbers.  The call will be made on the UDP port that
-     the socket is bound to.  The call will go to the destination address of a
-     connected client socket unless an alternative is supplied (srx is
-     non-NULL).
-
-     If a key is supplied then this will be used to secure the call instead of
-     the key bound to the socket with the RXRPC_SECURITY_KEY sockopt.  Calls
-     secured in this way will still share connections if at all possible.
-
-     The user_call_ID is equivalent to that supplied to sendmsg() in the
-     control data buffer.  It is entirely feasible to use this to point to a
-     kernel data structure.
-
-     tx_total_len is the amount of data the caller is intending to transmit
-     with this call (or -1 if unknown at this point).  Setting the data size
-     allows the kernel to encrypt directly to the packet buffers, thereby
-     saving a copy.  The value may not be less than -1.
-
-     notify_rx is a pointer to a function to be called when events such as
-     incoming data packets or remote aborts happen.
-
-     upgrade should be set to true if a client operation should request that
-     the server upgrade the service to a better one.  The resultant service ID
-     is returned by rxrpc_kernel_recv_data().
-
-     intr should be set to true if the call should be interruptible.  If this
-     is not set, this function may not return until a channel has been
-     allocated; if it is set, the function may return -ERESTARTSYS.
-
-     debug_id is the call debugging ID to be used for tracing.  This can be
-     obtained by atomically incrementing rxrpc_debug_id.
-
-     If this function is successful, an opaque reference to the RxRPC call is
-     returned.  The caller now holds a reference on this and it must be
-     properly ended.
-
- (*) End a client call.
-
-	void rxrpc_kernel_end_call(struct socket *sock,
-				   struct rxrpc_call *call);
-
-     This is used to end a previously begun call.  The user_call_ID is expunged
-     from AF_RXRPC's knowledge and will not be seen again in association with
-     the specified call.
-
- (*) Send data through a call.
-
-	typedef void (*rxrpc_notify_end_tx_t)(struct sock *sk,
-					      unsigned long user_call_ID,
-					      struct sk_buff *skb);
-
-	int rxrpc_kernel_send_data(struct socket *sock,
-				   struct rxrpc_call *call,
-				   struct msghdr *msg,
-				   size_t len,
-				   rxrpc_notify_end_tx_t notify_end_rx);
-
-     This is used to supply either the request part of a client call or the
-     reply part of a server call.  msg.msg_iovlen and msg.msg_iov specify the
-     data buffers to be used.  msg_iov may not be NULL and must point
-     exclusively to in-kernel virtual addresses.  msg.msg_flags may be given
-     MSG_MORE if there will be subsequent data sends for this call.
-
-     The msg must not specify a destination address, control data or any flags
-     other than MSG_MORE.  len is the total amount of data to transmit.
-
-     notify_end_rx can be NULL or it can be used to specify a function to be
-     called when the call changes state to end the Tx phase.  This function is
-     called with the call-state spinlock held to prevent any reply or final ACK
-     from being delivered first.
-
- (*) Receive data from a call.
-
-	int rxrpc_kernel_recv_data(struct socket *sock,
-				   struct rxrpc_call *call,
-				   void *buf,
-				   size_t size,
-				   size_t *_offset,
-				   bool want_more,
-				   u32 *_abort,
-				   u16 *_service)
-
-      This is used to receive data from either the reply part of a client call
-      or the request part of a service call.  buf and size specify how much
-      data is desired and where to store it.  *_offset is added on to buf and
-      subtracted from size internally; the amount copied into the buffer is
-      added to *_offset before returning.
-
-      want_more should be true if further data will be required after this is
-      satisfied and false if this is the last item of the receive phase.
-
-      There are three normal returns: 0 if the buffer was filled and want_more
-      was true; 1 if the buffer was filled, the last DATA packet has been
-      emptied and want_more was false; and -EAGAIN if the function needs to be
-      called again.
-
-      If the last DATA packet is processed but the buffer contains less than
-      the amount requested, EBADMSG is returned.  If want_more wasn't set, but
-      more data was available, EMSGSIZE is returned.
-
-      If a remote ABORT is detected, the abort code received will be stored in
-      *_abort and ECONNABORTED will be returned.
-
-      The service ID that the call ended up with is returned into *_service.
-      This can be used to see if a call got a service upgrade.
-
- (*) Abort a call.
-
-	void rxrpc_kernel_abort_call(struct socket *sock,
-				     struct rxrpc_call *call,
-				     u32 abort_code);
-
-     This is used to abort a call if it's still in an abortable state.  The
-     abort code specified will be placed in the ABORT message sent.
-
- (*) Intercept received RxRPC messages.
-
-	typedef void (*rxrpc_interceptor_t)(struct sock *sk,
-					    unsigned long user_call_ID,
-					    struct sk_buff *skb);
-
-	void
-	rxrpc_kernel_intercept_rx_messages(struct socket *sock,
-					   rxrpc_interceptor_t interceptor);
-
-     This installs an interceptor function on the specified AF_RXRPC socket.
-     All messages that would otherwise wind up in the socket's Rx queue are
-     then diverted to this function.  Note that care must be taken to process
-     the messages in the right order to maintain DATA message sequentiality.
-
-     The interceptor function itself is provided with the address of the socket
-     and handling the incoming message, the ID assigned by the kernel utility
-     to the call and the socket buffer containing the message.
-
-     The skb->mark field indicates the type of message:
-
-	MARK				MEANING
-	===============================	=======================================
-	RXRPC_SKB_MARK_DATA		Data message
-	RXRPC_SKB_MARK_FINAL_ACK	Final ACK received for an incoming call
-	RXRPC_SKB_MARK_BUSY		Client call rejected as server busy
-	RXRPC_SKB_MARK_REMOTE_ABORT	Call aborted by peer
-	RXRPC_SKB_MARK_NET_ERROR	Network error detected
-	RXRPC_SKB_MARK_LOCAL_ERROR	Local error encountered
-	RXRPC_SKB_MARK_NEW_CALL		New incoming call awaiting acceptance
-
-     The remote abort message can be probed with rxrpc_kernel_get_abort_code().
-     The two error messages can be probed with rxrpc_kernel_get_error_number().
-     A new call can be accepted with rxrpc_kernel_accept_call().
-
-     Data messages can have their contents extracted with the usual bunch of
-     socket buffer manipulation functions.  A data message can be determined to
-     be the last one in a sequence with rxrpc_kernel_is_data_last().  When a
-     data message has been used up, rxrpc_kernel_data_consumed() should be
-     called on it.
-
-     Messages should be handled to rxrpc_kernel_free_skb() to dispose of.  It
-     is possible to get extra refs on all types of message for later freeing,
-     but this may pin the state of a call until the message is finally freed.
-
- (*) Accept an incoming call.
-
-	struct rxrpc_call *
-	rxrpc_kernel_accept_call(struct socket *sock,
-				 unsigned long user_call_ID);
-
-     This is used to accept an incoming call and to assign it a call ID.  This
-     function is similar to rxrpc_kernel_begin_call() and calls accepted must
-     be ended in the same way.
-
-     If this function is successful, an opaque reference to the RxRPC call is
-     returned.  The caller now holds a reference on this and it must be
-     properly ended.
-
- (*) Reject an incoming call.
-
-	int rxrpc_kernel_reject_call(struct socket *sock);
-
-     This is used to reject the first incoming call on the socket's queue with
-     a BUSY message.  -ENODATA is returned if there were no incoming calls.
-     Other errors may be returned if the call had been aborted (-ECONNABORTED)
-     or had timed out (-ETIME).
-
- (*) Allocate a null key for doing anonymous security.
-
-	struct key *rxrpc_get_null_key(const char *keyname);
-
-     This is used to allocate a null RxRPC key that can be used to indicate
-     anonymous security for a particular domain.
-
- (*) Get the peer address of a call.
-
-	void rxrpc_kernel_get_peer(struct socket *sock, struct rxrpc_call *call,
-				   struct sockaddr_rxrpc *_srx);
-
-     This is used to find the remote peer address of a call.
-
- (*) Set the total transmit data size on a call.
-
-	void rxrpc_kernel_set_tx_length(struct socket *sock,
-					struct rxrpc_call *call,
-					s64 tx_total_len);
-
-     This sets the amount of data that the caller is intending to transmit on a
-     call.  It's intended to be used for setting the reply size as the request
-     size should be set when the call is begun.  tx_total_len may not be less
-     than zero.
-
- (*) Get call RTT.
-
-	u64 rxrpc_kernel_get_rtt(struct socket *sock, struct rxrpc_call *call);
-
-     Get the RTT time to the peer in use by a call.  The value returned is in
-     nanoseconds.
-
- (*) Check call still alive.
-
-	bool rxrpc_kernel_check_life(struct socket *sock,
-				     struct rxrpc_call *call,
-				     u32 *_life);
-	void rxrpc_kernel_probe_life(struct socket *sock,
-				     struct rxrpc_call *call);
-
-     The first function passes back in *_life a number that is updated when
-     ACKs are received from the peer (notably including PING RESPONSE ACKs
-     which we can elicit by sending PING ACKs to see if the call still exists
-     on the server).  The caller should compare the numbers of two calls to see
-     if the call is still alive after waiting for a suitable interval.  It also
-     returns true as long as the call hasn't yet reached the completed state.
-
-     This allows the caller to work out if the server is still contactable and
-     if the call is still alive on the server while waiting for the server to
-     process a client operation.
-
-     The second function causes a ping ACK to be transmitted to try to provoke
-     the peer into responding, which would then cause the value returned by the
-     first function to change.  Note that this must be called in TASK_RUNNING
-     state.
-
- (*) Get reply timestamp.
-
-	bool rxrpc_kernel_get_reply_time(struct socket *sock,
-					 struct rxrpc_call *call,
-					 ktime_t *_ts)
-
-     This allows the timestamp on the first DATA packet of the reply of a
-     client call to be queried, provided that it is still in the Rx ring.  If
-     successful, the timestamp will be stored into *_ts and true will be
-     returned; false will be returned otherwise.
-
- (*) Get remote client epoch.
-
-	u32 rxrpc_kernel_get_epoch(struct socket *sock,
-				   struct rxrpc_call *call)
-
-     This allows the epoch that's contained in packets of an incoming client
-     call to be queried.  This value is returned.  The function always
-     successful if the call is still in progress.  It shouldn't be called once
-     the call has expired.  Note that calling this on a local client call only
-     returns the local epoch.
-
-     This value can be used to determine if the remote client has been
-     restarted as it shouldn't change otherwise.
-
- (*) Set the maxmimum lifespan on a call.
-
-	void rxrpc_kernel_set_max_life(struct socket *sock,
-				       struct rxrpc_call *call,
-				       unsigned long hard_timeout)
-
-     This sets the maximum lifespan on a call to hard_timeout (which is in
-     jiffies).  In the event of the timeout occurring, the call will be
-     aborted and -ETIME or -ETIMEDOUT will be returned.
-
-
-=======================
-CONFIGURABLE PARAMETERS
-=======================
-
-The RxRPC protocol driver has a number of configurable parameters that can be
-adjusted through sysctls in /proc/net/rxrpc/:
-
- (*) req_ack_delay
-
-     The amount of time in milliseconds after receiving a packet with the
-     request-ack flag set before we honour the flag and actually send the
-     requested ack.
-
-     Usually the other side won't stop sending packets until the advertised
-     reception window is full (to a maximum of 255 packets), so delaying the
-     ACK permits several packets to be ACK'd in one go.
-
- (*) soft_ack_delay
-
-     The amount of time in milliseconds after receiving a new packet before we
-     generate a soft-ACK to tell the sender that it doesn't need to resend.
-
- (*) idle_ack_delay
-
-     The amount of time in milliseconds after all the packets currently in the
-     received queue have been consumed before we generate a hard-ACK to tell
-     the sender it can free its buffers, assuming no other reason occurs that
-     we would send an ACK.
-
- (*) resend_timeout
-
-     The amount of time in milliseconds after transmitting a packet before we
-     transmit it again, assuming no ACK is received from the receiver telling
-     us they got it.
-
- (*) max_call_lifetime
-
-     The maximum amount of time in seconds that a call may be in progress
-     before we preemptively kill it.
-
- (*) dead_call_expiry
-
-     The amount of time in seconds before we remove a dead call from the call
-     list.  Dead calls are kept around for a little while for the purpose of
-     repeating ACK and ABORT packets.
-
- (*) connection_expiry
-
-     The amount of time in seconds after a connection was last used before we
-     remove it from the connection list.  While a connection is in existence,
-     it serves as a placeholder for negotiated security; when it is deleted,
-     the security must be renegotiated.
-
- (*) transport_expiry
-
-     The amount of time in seconds after a transport was last used before we
-     remove it from the transport list.  While a transport is in existence, it
-     serves to anchor the peer data and keeps the connection ID counter.
-
- (*) rxrpc_rx_window_size
-
-     The size of the receive window in packets.  This is the maximum number of
-     unconsumed received packets we're willing to hold in memory for any
-     particular call.
-
- (*) rxrpc_rx_mtu
-
-     The maximum packet MTU size that we're willing to receive in bytes.  This
-     indicates to the peer whether we're willing to accept jumbo packets.
-
- (*) rxrpc_rx_jumbo_max
-
-     The maximum number of packets that we're willing to accept in a jumbo
-     packet.  Non-terminal packets in a jumbo packet must contain a four byte
-     header plus exactly 1412 bytes of data.  The terminal packet must contain
-     a four byte header plus any amount of data.  In any event, a jumbo packet
-     may not exceed rxrpc_rx_mtu in size.
diff --git a/MAINTAINERS b/MAINTAINERS
index b28823ab48c5..866a0dcd66ef 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -14593,7 +14593,7 @@ M:	David Howells <dhowells@redhat.com>
 L:	linux-afs@lists.infradead.org
 S:	Supported
 W:	https://www.infradead.org/~dhowells/kafs/
-F:	Documentation/networking/rxrpc.txt
+F:	Documentation/networking/rxrpc.rst
 F:	include/keys/rxrpc-type.h
 F:	include/net/af_rxrpc.h
 F:	include/trace/events/rxrpc.h
diff --git a/net/rxrpc/Kconfig b/net/rxrpc/Kconfig
index 57ebb29c26ad..d706bb408365 100644
--- a/net/rxrpc/Kconfig
+++ b/net/rxrpc/Kconfig
@@ -18,7 +18,7 @@ config AF_RXRPC
 	  This module at the moment only supports client operations and is
 	  currently incomplete.
 
-	  See Documentation/networking/rxrpc.txt.
+	  See Documentation/networking/rxrpc.rst.
 
 config AF_RXRPC_IPV6
 	bool "IPv6 support for RxRPC"
@@ -41,7 +41,7 @@ config AF_RXRPC_DEBUG
 	help
 	  Say Y here to make runtime controllable debugging messages appear.
 
-	  See Documentation/networking/rxrpc.txt.
+	  See Documentation/networking/rxrpc.rst.
 
 
 config RXKAD
@@ -56,4 +56,4 @@ config RXKAD
 	  Provide kerberos 4 and AFS kaserver security handling for AF_RXRPC
 	  through the use of the key retention service.
 
-	  See Documentation/networking/rxrpc.txt.
+	  See Documentation/networking/rxrpc.rst.
diff --git a/net/rxrpc/sysctl.c b/net/rxrpc/sysctl.c
index 2bbb38161851..174e903e18de 100644
--- a/net/rxrpc/sysctl.c
+++ b/net/rxrpc/sysctl.c
@@ -21,7 +21,7 @@ static const unsigned long max_jiffies = MAX_JIFFY_OFFSET;
 /*
  * RxRPC operating parameters.
  *
- * See Documentation/networking/rxrpc.txt and the variable definitions for more
+ * See Documentation/networking/rxrpc.rst and the variable definitions for more
  * information on the individual parameters.
  */
 static struct ctl_table rxrpc_sysctl_table[] = {
-- 
cgit v1.2.3-59-g8ed1b


From 671d114d8cde3ba4390714b850c86d8b39d31009 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Thu, 30 Apr 2020 18:04:22 +0200
Subject: docs: networking: convert sctp.txt to ReST

- add SPDX header;
- add a document title;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst |  1 +
 Documentation/networking/sctp.rst  | 42 ++++++++++++++++++++++++++++++++++++++
 Documentation/networking/sctp.txt  | 35 -------------------------------
 MAINTAINERS                        |  2 +-
 4 files changed, 44 insertions(+), 36 deletions(-)
 create mode 100644 Documentation/networking/sctp.rst
 delete mode 100644 Documentation/networking/sctp.txt

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index cd307b9601fa..1761eb715061 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -100,6 +100,7 @@ Contents:
    rds
    regulatory
    rxrpc
+   sctp
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/sctp.rst b/Documentation/networking/sctp.rst
new file mode 100644
index 000000000000..9f4d9c8a925b
--- /dev/null
+++ b/Documentation/networking/sctp.rst
@@ -0,0 +1,42 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=================
+Linux Kernel SCTP
+=================
+
+This is the current BETA release of the Linux Kernel SCTP reference
+implementation.
+
+SCTP (Stream Control Transmission Protocol) is a IP based, message oriented,
+reliable transport protocol, with congestion control, support for
+transparent multi-homing, and multiple ordered streams of messages.
+RFC2960 defines the core protocol.  The IETF SIGTRAN working group originally
+developed the SCTP protocol and later handed the protocol over to the
+Transport Area (TSVWG) working group for the continued evolvement of SCTP as a
+general purpose transport.
+
+See the IETF website (http://www.ietf.org) for further documents on SCTP.
+See http://www.ietf.org/rfc/rfc2960.txt
+
+The initial project goal is to create an Linux kernel reference implementation
+of SCTP that is RFC 2960 compliant and provides an programming interface
+referred to as the  UDP-style API of the Sockets Extensions for SCTP, as
+proposed in IETF Internet-Drafts.
+
+Caveats
+=======
+
+- lksctp can be built as statically or as a module.  However, be aware that
+  module removal of lksctp is not yet a safe activity.
+
+- There is tentative support for IPv6, but most work has gone towards
+  implementation and testing lksctp on IPv4.
+
+
+For more information, please visit the lksctp project website:
+
+   http://www.sf.net/projects/lksctp
+
+Or contact the lksctp developers through the mailing list:
+
+   <linux-sctp@vger.kernel.org>
diff --git a/Documentation/networking/sctp.txt b/Documentation/networking/sctp.txt
deleted file mode 100644
index 97b810ca9082..000000000000
--- a/Documentation/networking/sctp.txt
+++ /dev/null
@@ -1,35 +0,0 @@
-Linux Kernel SCTP 
-
-This is the current BETA release of the Linux Kernel SCTP reference
-implementation.  
-
-SCTP (Stream Control Transmission Protocol) is a IP based, message oriented,
-reliable transport protocol, with congestion control, support for
-transparent multi-homing, and multiple ordered streams of messages.
-RFC2960 defines the core protocol.  The IETF SIGTRAN working group originally
-developed the SCTP protocol and later handed the protocol over to the 
-Transport Area (TSVWG) working group for the continued evolvement of SCTP as a 
-general purpose transport.  
-
-See the IETF website (http://www.ietf.org) for further documents on SCTP. 
-See http://www.ietf.org/rfc/rfc2960.txt 
-
-The initial project goal is to create an Linux kernel reference implementation
-of SCTP that is RFC 2960 compliant and provides an programming interface 
-referred to as the  UDP-style API of the Sockets Extensions for SCTP, as 
-proposed in IETF Internet-Drafts.    
-
-Caveats:  
-
--lksctp can be built as statically or as a module.  However, be aware that 
-module removal of lksctp is not yet a safe activity.   
-
--There is tentative support for IPv6, but most work has gone towards 
-implementation and testing lksctp on IPv4.   
-
-
-For more information, please visit the lksctp project website:
-   http://www.sf.net/projects/lksctp
-
-Or contact the lksctp developers through the mailing list:
-   <linux-sctp@vger.kernel.org>
diff --git a/MAINTAINERS b/MAINTAINERS
index 866a0dcd66ef..0ac9cec0bce6 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -14999,7 +14999,7 @@ M:	Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
 L:	linux-sctp@vger.kernel.org
 S:	Maintained
 W:	http://lksctp.sourceforge.net
-F:	Documentation/networking/sctp.txt
+F:	Documentation/networking/sctp.rst
 F:	include/linux/sctp.h
 F:	include/net/sctp/
 F:	include/uapi/linux/sctp.h
-- 
cgit v1.2.3-59-g8ed1b


From de1fd4a7b0f2351b775b673f092430dff64b221e Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Thu, 30 Apr 2020 18:04:23 +0200
Subject: docs: networking: convert secid.txt to ReST

Not much to be done here:
- add SPDX header;
- add a document title;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst |  1 +
 Documentation/networking/secid.rst | 20 ++++++++++++++++++++
 Documentation/networking/secid.txt | 14 --------------
 3 files changed, 21 insertions(+), 14 deletions(-)
 create mode 100644 Documentation/networking/secid.rst
 delete mode 100644 Documentation/networking/secid.txt

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 1761eb715061..8b672f252f67 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -101,6 +101,7 @@ Contents:
    regulatory
    rxrpc
    sctp
+   secid
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/secid.rst b/Documentation/networking/secid.rst
new file mode 100644
index 000000000000..b45141a98027
--- /dev/null
+++ b/Documentation/networking/secid.rst
@@ -0,0 +1,20 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=================
+LSM/SeLinux secid
+=================
+
+flowi structure:
+
+The secid member in the flow structure is used in LSMs (e.g. SELinux) to indicate
+the label of the flow. This label of the flow is currently used in selecting
+matching labeled xfrm(s).
+
+If this is an outbound flow, the label is derived from the socket, if any, or
+the incoming packet this flow is being generated as a response to (e.g. tcp
+resets, timewait ack, etc.). It is also conceivable that the label could be
+derived from other sources such as process context, device, etc., in special
+cases, as may be appropriate.
+
+If this is an inbound flow, the label is derived from the IPSec security
+associations, if any, used by the packet.
diff --git a/Documentation/networking/secid.txt b/Documentation/networking/secid.txt
deleted file mode 100644
index 95ea06784333..000000000000
--- a/Documentation/networking/secid.txt
+++ /dev/null
@@ -1,14 +0,0 @@
-flowi structure:
-
-The secid member in the flow structure is used in LSMs (e.g. SELinux) to indicate
-the label of the flow. This label of the flow is currently used in selecting
-matching labeled xfrm(s).
-
-If this is an outbound flow, the label is derived from the socket, if any, or
-the incoming packet this flow is being generated as a response to (e.g. tcp
-resets, timewait ack, etc.). It is also conceivable that the label could be
-derived from other sources such as process context, device, etc., in special
-cases, as may be appropriate.
-
-If this is an inbound flow, the label is derived from the IPSec security
-associations, if any, used by the packet.
-- 
cgit v1.2.3-59-g8ed1b


From d6c48bc6f8da38590946d23a27138d5258aca261 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Thu, 30 Apr 2020 18:04:24 +0200
Subject: docs: networking: convert seg6-sysctl.txt to ReST

- add SPDX header;
- mark code blocks and literals as such;
- add a document title;
- adjust chapters, adding proper markups;
- mark lists as such;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst       |  1 +
 Documentation/networking/seg6-sysctl.rst | 26 ++++++++++++++++++++++++++
 Documentation/networking/seg6-sysctl.txt | 18 ------------------
 3 files changed, 27 insertions(+), 18 deletions(-)
 create mode 100644 Documentation/networking/seg6-sysctl.rst
 delete mode 100644 Documentation/networking/seg6-sysctl.txt

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 8b672f252f67..716744c568b7 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -102,6 +102,7 @@ Contents:
    rxrpc
    sctp
    secid
+   seg6-sysctl
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/seg6-sysctl.rst b/Documentation/networking/seg6-sysctl.rst
new file mode 100644
index 000000000000..ec73e1445030
--- /dev/null
+++ b/Documentation/networking/seg6-sysctl.rst
@@ -0,0 +1,26 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====================
+Seg6 Sysfs variables
+====================
+
+
+/proc/sys/net/conf/<iface>/seg6_* variables:
+============================================
+
+seg6_enabled - BOOL
+	Accept or drop SR-enabled IPv6 packets on this interface.
+
+	Relevant packets are those with SRH present and DA = local.
+
+	* 0 - disabled (default)
+	* not 0 - enabled
+
+seg6_require_hmac - INTEGER
+	Define HMAC policy for ingress SR-enabled packets on this interface.
+
+	* -1 - Ignore HMAC field
+	* 0 - Accept SR packets without HMAC, validate SR packets with HMAC
+	* 1 - Drop SR packets without HMAC, validate SR packets with HMAC
+
+	Default is 0.
diff --git a/Documentation/networking/seg6-sysctl.txt b/Documentation/networking/seg6-sysctl.txt
deleted file mode 100644
index bdbde23b19cb..000000000000
--- a/Documentation/networking/seg6-sysctl.txt
+++ /dev/null
@@ -1,18 +0,0 @@
-/proc/sys/net/conf/<iface>/seg6_* variables:
-
-seg6_enabled - BOOL
-	Accept or drop SR-enabled IPv6 packets on this interface.
-
-	Relevant packets are those with SRH present and DA = local.
-
-	0 - disabled (default)
-	not 0 - enabled
-
-seg6_require_hmac - INTEGER
-	Define HMAC policy for ingress SR-enabled packets on this interface.
-
-	-1 - Ignore HMAC field
-	0 - Accept SR packets without HMAC, validate SR packets with HMAC
-	1 - Drop SR packets without HMAC, validate SR packets with HMAC
-
-	Default is 0.
-- 
cgit v1.2.3-59-g8ed1b


From fe3dfe418cbbd1ee168d9294725d482029097694 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Thu, 30 Apr 2020 18:04:25 +0200
Subject: docs: networking: convert skfp.txt to ReST

- add SPDX header;
- use copyright symbol;
- add a document title;
- adjust titles and chapters, adding proper markups;
- comment out text-only TOC from html/pdf output;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst |   1 +
 Documentation/networking/skfp.rst  | 253 +++++++++++++++++++++++++++++++++++++
 Documentation/networking/skfp.txt  | 220 --------------------------------
 drivers/net/fddi/Kconfig           |   2 +-
 4 files changed, 255 insertions(+), 221 deletions(-)
 create mode 100644 Documentation/networking/skfp.rst
 delete mode 100644 Documentation/networking/skfp.txt

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 716744c568b7..d19ddcbe66e5 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -103,6 +103,7 @@ Contents:
    sctp
    secid
    seg6-sysctl
+   skfp
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/skfp.rst b/Documentation/networking/skfp.rst
new file mode 100644
index 000000000000..58f548105c1d
--- /dev/null
+++ b/Documentation/networking/skfp.rst
@@ -0,0 +1,253 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+.. include:: <isonum.txt>
+
+========================
+SysKonnect driver - SKFP
+========================
+
+|copy| Copyright 1998-2000 SysKonnect,
+
+skfp.txt created 11-May-2000
+
+Readme File for skfp.o v2.06
+
+
+.. This file contains
+
+   (1) OVERVIEW
+   (2) SUPPORTED ADAPTERS
+   (3) GENERAL INFORMATION
+   (4) INSTALLATION
+   (5) INCLUSION OF THE ADAPTER IN SYSTEM START
+   (6) TROUBLESHOOTING
+   (7) FUNCTION OF THE ADAPTER LEDS
+   (8) HISTORY
+
+
+1. Overview
+===========
+
+This README explains how to use the driver 'skfp' for Linux with your
+network adapter.
+
+Chapter 2: Contains a list of all network adapters that are supported by
+this driver.
+
+Chapter 3:
+	   Gives some general information.
+
+Chapter 4: Describes common problems and solutions.
+
+Chapter 5: Shows the changed functionality of the adapter LEDs.
+
+Chapter 6: History of development.
+
+
+2. Supported adapters
+=====================
+
+The network driver 'skfp' supports the following network adapters:
+SysKonnect adapters:
+
+  - SK-5521 (SK-NET FDDI-UP)
+  - SK-5522 (SK-NET FDDI-UP DAS)
+  - SK-5541 (SK-NET FDDI-FP)
+  - SK-5543 (SK-NET FDDI-LP)
+  - SK-5544 (SK-NET FDDI-LP DAS)
+  - SK-5821 (SK-NET FDDI-UP64)
+  - SK-5822 (SK-NET FDDI-UP64 DAS)
+  - SK-5841 (SK-NET FDDI-FP64)
+  - SK-5843 (SK-NET FDDI-LP64)
+  - SK-5844 (SK-NET FDDI-LP64 DAS)
+
+Compaq adapters (not tested):
+
+  - Netelligent 100 FDDI DAS Fibre SC
+  - Netelligent 100 FDDI SAS Fibre SC
+  - Netelligent 100 FDDI DAS UTP
+  - Netelligent 100 FDDI SAS UTP
+  - Netelligent 100 FDDI SAS Fibre MIC
+
+
+3. General Information
+======================
+
+From v2.01 on, the driver is integrated in the linux kernel sources.
+Therefore, the installation is the same as for any other adapter
+supported by the kernel.
+
+Refer to the manual of your distribution about the installation
+of network adapters.
+
+Makes my life much easier :-)
+
+4. Troubleshooting
+==================
+
+If you run into problems during installation, check those items:
+
+Problem:
+	  The FDDI adapter cannot be found by the driver.
+
+Reason:
+	  Look in /proc/pci for the following entry:
+
+	     'FDDI network controller: SysKonnect SK-FDDI-PCI ...'
+
+	  If this entry exists, then the FDDI adapter has been
+	  found by the system and should be able to be used.
+
+	  If this entry does not exist or if the file '/proc/pci'
+	  is not there, then you may have a hardware problem or PCI
+	  support may not be enabled in your kernel.
+
+	  The adapter can be checked using the diagnostic program
+	  which is available from the SysKonnect web site:
+
+	      www.syskonnect.de
+
+	  Some COMPAQ machines have a problem with PCI under
+	  Linux. This is described in the 'PCI howto' document
+	  (included in some distributions or available from the
+	  www, e.g. at 'www.linux.org') and no workaround is available.
+
+Problem:
+	  You want to use your computer as a router between
+	  multiple IP subnetworks (using multiple adapters), but
+	  you cannot reach computers in other subnetworks.
+
+Reason:
+	  Either the router's kernel is not configured for IP
+	  forwarding or there is a problem with the routing table
+	  and gateway configuration in at least one of the
+	  computers.
+
+If your problem is not listed here, please contact our
+technical support for help.
+
+You can send email to: linux@syskonnect.de
+
+When contacting our technical support,
+please ensure that the following information is available:
+
+- System Manufacturer and Model
+- Boards in your system
+- Distribution
+- Kernel version
+
+
+5. Function of the Adapter LEDs
+===============================
+
+	The functionality of the LED's on the FDDI network adapters was
+	changed in SMT version v2.82. With this new SMT version, the yellow
+	LED works as a ring operational indicator. An active yellow LED
+	indicates that the ring is down. The green LED on the adapter now
+	works as a link indicator where an active GREEN LED indicates that
+	the respective port has a physical connection.
+
+	With versions of SMT prior to v2.82 a ring up was indicated if the
+	yellow LED was off while the green LED(s) showed the connection
+	status of the adapter. During a ring down the green LED was off and
+	the yellow LED was on.
+
+	All implementations indicate that a driver is not loaded if
+	all LEDs are off.
+
+
+6. History
+==========
+
+v2.06 (20000511) (In-Kernel version)
+    New features:
+
+	- 64 bit support
+	- new pci dma interface
+	- in kernel 2.3.99
+
+v2.05 (20000217) (In-Kernel version)
+    New features:
+
+	- Changes for 2.3.45 kernel
+
+v2.04 (20000207) (Standalone version)
+    New features:
+
+	- Added rx/tx byte counter
+
+v2.03 (20000111) (Standalone version)
+    Problems fixed:
+
+	- Fixed printk statements from v2.02
+
+v2.02 (991215) (Standalone version)
+    Problems fixed:
+
+	- Removed unnecessary output
+	- Fixed path for "printver.sh" in makefile
+
+v2.01 (991122) (In-Kernel version)
+    New features:
+
+	- Integration in Linux kernel sources
+	- Support for memory mapped I/O.
+
+v2.00 (991112)
+    New features:
+
+	- Full source released under GPL
+
+v1.05 (991023)
+    Problems fixed:
+
+	- Compilation with kernel version 2.2.13 failed
+
+v1.04 (990427)
+    Changes:
+
+	- New SMT module included, changing LED functionality
+
+    Problems fixed:
+
+	- Synchronization on SMP machines was buggy
+
+v1.03 (990325)
+    Problems fixed:
+
+	- Interrupt routing on SMP machines could be incorrect
+
+v1.02 (990310)
+    New features:
+
+	- Support for kernel versions 2.2.x added
+	- Kernel patch instead of private duplicate of kernel functions
+
+v1.01 (980812)
+    Problems fixed:
+
+	Connection hangup with telnet
+	Slow telnet connection
+
+v1.00 beta 01 (980507)
+    New features:
+
+	None.
+
+    Problems fixed:
+
+	None.
+
+    Known limitations:
+
+	- tar archive instead of standard package format (rpm).
+	- FDDI statistic is empty.
+	- not tested with 2.1.xx kernels
+	- integration in kernel not tested
+	- not tested simultaneously with FDDI adapters from other vendors.
+	- only X86 processors supported.
+	- SBA (Synchronous Bandwidth Allocator) parameters can
+	  not be configured.
+	- does not work on some COMPAQ machines. See the PCI howto
+	  document for details about this problem.
+	- data corruption with kernel versions below 2.0.33.
diff --git a/Documentation/networking/skfp.txt b/Documentation/networking/skfp.txt
deleted file mode 100644
index 203ec66c9fb4..000000000000
--- a/Documentation/networking/skfp.txt
+++ /dev/null
@@ -1,220 +0,0 @@
-(C)Copyright 1998-2000 SysKonnect,
-===========================================================================
-
-skfp.txt created 11-May-2000
-
-Readme File for skfp.o v2.06
-
-
-This file contains
-(1) OVERVIEW
-(2) SUPPORTED ADAPTERS
-(3) GENERAL INFORMATION
-(4) INSTALLATION
-(5) INCLUSION OF THE ADAPTER IN SYSTEM START
-(6) TROUBLESHOOTING
-(7) FUNCTION OF THE ADAPTER LEDS
-(8) HISTORY
-
-===========================================================================
-
-
-
-(1) OVERVIEW
-============
-
-This README explains how to use the driver 'skfp' for Linux with your
-network adapter.
-
-Chapter 2: Contains a list of all network adapters that are supported by
-	   this driver.
-
-Chapter 3: Gives some general information.
-
-Chapter 4: Describes common problems and solutions.
-
-Chapter 5: Shows the changed functionality of the adapter LEDs.
-
-Chapter 6: History of development.
-
-***
-
-
-(2) SUPPORTED ADAPTERS
-======================
-
-The network driver 'skfp' supports the following network adapters:
-SysKonnect adapters:
-  - SK-5521 (SK-NET FDDI-UP)
-  - SK-5522 (SK-NET FDDI-UP DAS)
-  - SK-5541 (SK-NET FDDI-FP)
-  - SK-5543 (SK-NET FDDI-LP)
-  - SK-5544 (SK-NET FDDI-LP DAS)
-  - SK-5821 (SK-NET FDDI-UP64)
-  - SK-5822 (SK-NET FDDI-UP64 DAS)
-  - SK-5841 (SK-NET FDDI-FP64)
-  - SK-5843 (SK-NET FDDI-LP64)
-  - SK-5844 (SK-NET FDDI-LP64 DAS)
-Compaq adapters (not tested):
-  - Netelligent 100 FDDI DAS Fibre SC
-  - Netelligent 100 FDDI SAS Fibre SC
-  - Netelligent 100 FDDI DAS UTP
-  - Netelligent 100 FDDI SAS UTP
-  - Netelligent 100 FDDI SAS Fibre MIC
-***
-
-
-(3) GENERAL INFORMATION
-=======================
-
-From v2.01 on, the driver is integrated in the linux kernel sources.
-Therefore, the installation is the same as for any other adapter
-supported by the kernel.
-Refer to the manual of your distribution about the installation
-of network adapters.
-Makes my life much easier :-)
-***
-
-
-(4) TROUBLESHOOTING
-===================
-
-If you run into problems during installation, check those items:
-
-Problem:  The FDDI adapter cannot be found by the driver.
-Reason:   Look in /proc/pci for the following entry:
-             'FDDI network controller: SysKonnect SK-FDDI-PCI ...'
-	  If this entry exists, then the FDDI adapter has been
-	  found by the system and should be able to be used.
-	  If this entry does not exist or if the file '/proc/pci'
-	  is not there, then you may have a hardware problem or PCI
-	  support may not be enabled in your kernel.
-	  The adapter can be checked using the diagnostic program
-	  which is available from the SysKonnect web site:
-	      www.syskonnect.de
-	  Some COMPAQ machines have a problem with PCI under
-	  Linux. This is described in the 'PCI howto' document
-	  (included in some distributions or available from the
-	  www, e.g. at 'www.linux.org') and no workaround is available.
-
-Problem:  You want to use your computer as a router between
-          multiple IP subnetworks (using multiple adapters), but
-	  you cannot reach computers in other subnetworks.
-Reason:   Either the router's kernel is not configured for IP
-	  forwarding or there is a problem with the routing table
-	  and gateway configuration in at least one of the
-	  computers.
-
-If your problem is not listed here, please contact our
-technical support for help. 
-You can send email to:
-  linux@syskonnect.de
-When contacting our technical support,
-please ensure that the following information is available:
-- System Manufacturer and Model
-- Boards in your system
-- Distribution
-- Kernel version
-
-***
-
-
-(5) FUNCTION OF THE ADAPTER LEDS
-================================
-
-        The functionality of the LED's on the FDDI network adapters was
-        changed in SMT version v2.82. With this new SMT version, the yellow
-        LED works as a ring operational indicator. An active yellow LED
-        indicates that the ring is down. The green LED on the adapter now
-        works as a link indicator where an active GREEN LED indicates that
-        the respective port has a physical connection.
-
-        With versions of SMT prior to v2.82 a ring up was indicated if the
-        yellow LED was off while the green LED(s) showed the connection
-        status of the adapter. During a ring down the green LED was off and
-        the yellow LED was on.
-
-        All implementations indicate that a driver is not loaded if
-        all LEDs are off.
-
-***
-
-
-(6) HISTORY
-===========
-
-v2.06 (20000511) (In-Kernel version)
-    New features:
-	- 64 bit support
-	- new pci dma interface
-	- in kernel 2.3.99
-
-v2.05 (20000217) (In-Kernel version)
-    New features:
-	- Changes for 2.3.45 kernel
-
-v2.04 (20000207) (Standalone version)
-    New features:
-	- Added rx/tx byte counter
-
-v2.03 (20000111) (Standalone version)
-    Problems fixed:
-	- Fixed printk statements from v2.02
-
-v2.02 (991215) (Standalone version)
-    Problems fixed:
-	- Removed unnecessary output
-	- Fixed path for "printver.sh" in makefile
-
-v2.01 (991122) (In-Kernel version)
-    New features:
-	- Integration in Linux kernel sources
-	- Support for memory mapped I/O.
-
-v2.00 (991112)
-    New features:
-	- Full source released under GPL
-
-v1.05 (991023)
-    Problems fixed:
-	- Compilation with kernel version 2.2.13 failed
-
-v1.04 (990427)
-    Changes:
-	- New SMT module included, changing LED functionality
-    Problems fixed:
-	- Synchronization on SMP machines was buggy
-
-v1.03 (990325)
-    Problems fixed:
-	- Interrupt routing on SMP machines could be incorrect
-
-v1.02 (990310)
-    New features:
-	- Support for kernel versions 2.2.x added
-	- Kernel patch instead of private duplicate of kernel functions
-
-v1.01 (980812)
-    Problems fixed:
-	Connection hangup with telnet
-	Slow telnet connection
-
-v1.00 beta 01 (980507)
-    New features:
-	None.
-    Problems fixed:
-	None.
-    Known limitations:
-        - tar archive instead of standard package format (rpm).
-	- FDDI statistic is empty.
-	- not tested with 2.1.xx kernels
-	- integration in kernel not tested
-	- not tested simultaneously with FDDI adapters from other vendors.
-	- only X86 processors supported.
-	- SBA (Synchronous Bandwidth Allocator) parameters can
-	  not be configured.
-	- does not work on some COMPAQ machines. See the PCI howto
-	  document for details about this problem.
-	- data corruption with kernel versions below 2.0.33.
-
-*** End of information file ***
diff --git a/drivers/net/fddi/Kconfig b/drivers/net/fddi/Kconfig
index 3b412a56f2cb..da4f58eed08f 100644
--- a/drivers/net/fddi/Kconfig
+++ b/drivers/net/fddi/Kconfig
@@ -77,7 +77,7 @@ config SKFP
 	  - Netelligent 100 FDDI SAS UTP
 	  - Netelligent 100 FDDI SAS Fibre MIC
 
-	  Read <file:Documentation/networking/skfp.txt> for information about
+	  Read <file:Documentation/networking/skfp.rst> for information about
 	  the driver.
 
 	  Questions concerning this driver can be addressed to:
-- 
cgit v1.2.3-59-g8ed1b


From 060d9d3e1282e3ccf4bbb3cc4ea94bed3ce69310 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Thu, 30 Apr 2020 18:04:26 +0200
Subject: docs: networking: convert strparser.txt to ReST

- add SPDX header;
- adjust title markup;
- mark code blocks and literals as such;
- mark tables as such;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst     |   1 +
 Documentation/networking/strparser.rst | 240 +++++++++++++++++++++++++++++++++
 Documentation/networking/strparser.txt | 207 ----------------------------
 3 files changed, 241 insertions(+), 207 deletions(-)
 create mode 100644 Documentation/networking/strparser.rst
 delete mode 100644 Documentation/networking/strparser.txt

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index d19ddcbe66e5..e5a705024c6a 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -104,6 +104,7 @@ Contents:
    secid
    seg6-sysctl
    skfp
+   strparser
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/strparser.rst b/Documentation/networking/strparser.rst
new file mode 100644
index 000000000000..6cab1f74ae05
--- /dev/null
+++ b/Documentation/networking/strparser.rst
@@ -0,0 +1,240 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========================
+Stream Parser (strparser)
+=========================
+
+Introduction
+============
+
+The stream parser (strparser) is a utility that parses messages of an
+application layer protocol running over a data stream. The stream
+parser works in conjunction with an upper layer in the kernel to provide
+kernel support for application layer messages. For instance, Kernel
+Connection Multiplexor (KCM) uses the Stream Parser to parse messages
+using a BPF program.
+
+The strparser works in one of two modes: receive callback or general
+mode.
+
+In receive callback mode, the strparser is called from the data_ready
+callback of a TCP socket. Messages are parsed and delivered as they are
+received on the socket.
+
+In general mode, a sequence of skbs are fed to strparser from an
+outside source. Message are parsed and delivered as the sequence is
+processed. This modes allows strparser to be applied to arbitrary
+streams of data.
+
+Interface
+=========
+
+The API includes a context structure, a set of callbacks, utility
+functions, and a data_ready function for receive callback mode. The
+callbacks include a parse_msg function that is called to perform
+parsing (e.g.  BPF parsing in case of KCM), and a rcv_msg function
+that is called when a full message has been completed.
+
+Functions
+=========
+
+     ::
+
+	strp_init(struct strparser *strp, struct sock *sk,
+		const struct strp_callbacks *cb)
+
+     Called to initialize a stream parser. strp is a struct of type
+     strparser that is allocated by the upper layer. sk is the TCP
+     socket associated with the stream parser for use with receive
+     callback mode; in general mode this is set to NULL. Callbacks
+     are called by the stream parser (the callbacks are listed below).
+
+     ::
+
+	void strp_pause(struct strparser *strp)
+
+     Temporarily pause a stream parser. Message parsing is suspended
+     and no new messages are delivered to the upper layer.
+
+     ::
+
+	void strp_unpause(struct strparser *strp)
+
+     Unpause a paused stream parser.
+
+     ::
+
+	void strp_stop(struct strparser *strp);
+
+     strp_stop is called to completely stop stream parser operations.
+     This is called internally when the stream parser encounters an
+     error, and it is called from the upper layer to stop parsing
+     operations.
+
+     ::
+
+	void strp_done(struct strparser *strp);
+
+     strp_done is called to release any resources held by the stream
+     parser instance. This must be called after the stream processor
+     has been stopped.
+
+     ::
+
+	int strp_process(struct strparser *strp, struct sk_buff *orig_skb,
+			 unsigned int orig_offset, size_t orig_len,
+			 size_t max_msg_size, long timeo)
+
+    strp_process is called in general mode for a stream parser to
+    parse an sk_buff. The number of bytes processed or a negative
+    error number is returned. Note that strp_process does not
+    consume the sk_buff. max_msg_size is maximum size the stream
+    parser will parse. timeo is timeout for completing a message.
+
+    ::
+
+	void strp_data_ready(struct strparser *strp);
+
+    The upper layer calls strp_tcp_data_ready when data is ready on
+    the lower socket for strparser to process. This should be called
+    from a data_ready callback that is set on the socket. Note that
+    maximum messages size is the limit of the receive socket
+    buffer and message timeout is the receive timeout for the socket.
+
+    ::
+
+	void strp_check_rcv(struct strparser *strp);
+
+    strp_check_rcv is called to check for new messages on the socket.
+    This is normally called at initialization of a stream parser
+    instance or after strp_unpause.
+
+Callbacks
+=========
+
+There are six callbacks:
+
+    ::
+
+	int (*parse_msg)(struct strparser *strp, struct sk_buff *skb);
+
+    parse_msg is called to determine the length of the next message
+    in the stream. The upper layer must implement this function. It
+    should parse the sk_buff as containing the headers for the
+    next application layer message in the stream.
+
+    The skb->cb in the input skb is a struct strp_msg. Only
+    the offset field is relevant in parse_msg and gives the offset
+    where the message starts in the skb.
+
+    The return values of this function are:
+
+    =========    ===========================================================
+    >0           indicates length of successfully parsed message
+    0            indicates more data must be received to parse the message
+    -ESTRPIPE    current message should not be processed by the
+		 kernel, return control of the socket to userspace which
+		 can proceed to read the messages itself
+    other < 0    Error in parsing, give control back to userspace
+		 assuming that synchronization is lost and the stream
+		 is unrecoverable (application expected to close TCP socket)
+    =========    ===========================================================
+
+    In the case that an error is returned (return value is less than
+    zero) and the parser is in receive callback mode, then it will set
+    the error on TCP socket and wake it up. If parse_msg returned
+    -ESTRPIPE and the stream parser had previously read some bytes for
+    the current message, then the error set on the attached socket is
+    ENODATA since the stream is unrecoverable in that case.
+
+    ::
+
+	void (*lock)(struct strparser *strp)
+
+    The lock callback is called to lock the strp structure when
+    the strparser is performing an asynchronous operation (such as
+    processing a timeout). In receive callback mode the default
+    function is to lock_sock for the associated socket. In general
+    mode the callback must be set appropriately.
+
+    ::
+
+	void (*unlock)(struct strparser *strp)
+
+    The unlock callback is called to release the lock obtained
+    by the lock callback. In receive callback mode the default
+    function is release_sock for the associated socket. In general
+    mode the callback must be set appropriately.
+
+    ::
+
+	void (*rcv_msg)(struct strparser *strp, struct sk_buff *skb);
+
+    rcv_msg is called when a full message has been received and
+    is queued. The callee must consume the sk_buff; it can
+    call strp_pause to prevent any further messages from being
+    received in rcv_msg (see strp_pause above). This callback
+    must be set.
+
+    The skb->cb in the input skb is a struct strp_msg. This
+    struct contains two fields: offset and full_len. Offset is
+    where the message starts in the skb, and full_len is the
+    the length of the message. skb->len - offset may be greater
+    then full_len since strparser does not trim the skb.
+
+    ::
+
+	int (*read_sock_done)(struct strparser *strp, int err);
+
+     read_sock_done is called when the stream parser is done reading
+     the TCP socket in receive callback mode. The stream parser may
+     read multiple messages in a loop and this function allows cleanup
+     to occur when exiting the loop. If the callback is not set (NULL
+     in strp_init) a default function is used.
+
+     ::
+
+	void (*abort_parser)(struct strparser *strp, int err);
+
+     This function is called when stream parser encounters an error
+     in parsing. The default function stops the stream parser and
+     sets the error in the socket if the parser is in receive callback
+     mode. The default function can be changed by setting the callback
+     to non-NULL in strp_init.
+
+Statistics
+==========
+
+Various counters are kept for each stream parser instance. These are in
+the strp_stats structure. strp_aggr_stats is a convenience structure for
+accumulating statistics for multiple stream parser instances.
+save_strp_stats and aggregate_strp_stats are helper functions to save
+and aggregate statistics.
+
+Message assembly limits
+=======================
+
+The stream parser provide mechanisms to limit the resources consumed by
+message assembly.
+
+A timer is set when assembly starts for a new message. In receive
+callback mode the message timeout is taken from rcvtime for the
+associated TCP socket. In general mode, the timeout is passed as an
+argument in strp_process. If the timer fires before assembly completes
+the stream parser is aborted and the ETIMEDOUT error is set on the TCP
+socket if in receive callback mode.
+
+In receive callback mode, message length is limited to the receive
+buffer size of the associated TCP socket. If the length returned by
+parse_msg is greater than the socket buffer size then the stream parser
+is aborted with EMSGSIZE error set on the TCP socket. Note that this
+makes the maximum size of receive skbuffs for a socket with a stream
+parser to be 2*sk_rcvbuf of the TCP socket.
+
+In general mode the message length limit is passed in as an argument
+to strp_process.
+
+Author
+======
+
+Tom Herbert (tom@quantonium.net)
diff --git a/Documentation/networking/strparser.txt b/Documentation/networking/strparser.txt
deleted file mode 100644
index a7d354ddda7b..000000000000
--- a/Documentation/networking/strparser.txt
+++ /dev/null
@@ -1,207 +0,0 @@
-Stream Parser (strparser)
-
-Introduction
-============
-
-The stream parser (strparser) is a utility that parses messages of an
-application layer protocol running over a data stream. The stream
-parser works in conjunction with an upper layer in the kernel to provide
-kernel support for application layer messages. For instance, Kernel
-Connection Multiplexor (KCM) uses the Stream Parser to parse messages
-using a BPF program.
-
-The strparser works in one of two modes: receive callback or general
-mode.
-
-In receive callback mode, the strparser is called from the data_ready
-callback of a TCP socket. Messages are parsed and delivered as they are
-received on the socket.
-
-In general mode, a sequence of skbs are fed to strparser from an
-outside source. Message are parsed and delivered as the sequence is
-processed. This modes allows strparser to be applied to arbitrary
-streams of data.
-
-Interface
-=========
-
-The API includes a context structure, a set of callbacks, utility
-functions, and a data_ready function for receive callback mode. The
-callbacks include a parse_msg function that is called to perform
-parsing (e.g.  BPF parsing in case of KCM), and a rcv_msg function
-that is called when a full message has been completed.
-
-Functions
-=========
-
-strp_init(struct strparser *strp, struct sock *sk,
-	  const struct strp_callbacks *cb)
-
-     Called to initialize a stream parser. strp is a struct of type
-     strparser that is allocated by the upper layer. sk is the TCP
-     socket associated with the stream parser for use with receive
-     callback mode; in general mode this is set to NULL. Callbacks
-     are called by the stream parser (the callbacks are listed below).
-
-void strp_pause(struct strparser *strp)
-
-     Temporarily pause a stream parser. Message parsing is suspended
-     and no new messages are delivered to the upper layer.
-
-void strp_unpause(struct strparser *strp)
-
-     Unpause a paused stream parser.
-
-void strp_stop(struct strparser *strp);
-
-     strp_stop is called to completely stop stream parser operations.
-     This is called internally when the stream parser encounters an
-     error, and it is called from the upper layer to stop parsing
-     operations.
-
-void strp_done(struct strparser *strp);
-
-     strp_done is called to release any resources held by the stream
-     parser instance. This must be called after the stream processor
-     has been stopped.
-
-int strp_process(struct strparser *strp, struct sk_buff *orig_skb,
-		 unsigned int orig_offset, size_t orig_len,
-		 size_t max_msg_size, long timeo)
-
-    strp_process is called in general mode for a stream parser to
-    parse an sk_buff. The number of bytes processed or a negative
-    error number is returned. Note that strp_process does not
-    consume the sk_buff. max_msg_size is maximum size the stream
-    parser will parse. timeo is timeout for completing a message.
-
-void strp_data_ready(struct strparser *strp);
-
-    The upper layer calls strp_tcp_data_ready when data is ready on
-    the lower socket for strparser to process. This should be called
-    from a data_ready callback that is set on the socket. Note that
-    maximum messages size is the limit of the receive socket
-    buffer and message timeout is the receive timeout for the socket.
-
-void strp_check_rcv(struct strparser *strp);
-
-    strp_check_rcv is called to check for new messages on the socket.
-    This is normally called at initialization of a stream parser
-    instance or after strp_unpause.
-
-Callbacks
-=========
-
-There are six callbacks:
-
-int (*parse_msg)(struct strparser *strp, struct sk_buff *skb);
-
-    parse_msg is called to determine the length of the next message
-    in the stream. The upper layer must implement this function. It
-    should parse the sk_buff as containing the headers for the
-    next application layer message in the stream.
-
-    The skb->cb in the input skb is a struct strp_msg. Only
-    the offset field is relevant in parse_msg and gives the offset
-    where the message starts in the skb.
-
-    The return values of this function are:
-
-    >0 : indicates length of successfully parsed message
-    0  : indicates more data must be received to parse the message
-    -ESTRPIPE : current message should not be processed by the
-          kernel, return control of the socket to userspace which
-          can proceed to read the messages itself
-    other < 0 : Error in parsing, give control back to userspace
-          assuming that synchronization is lost and the stream
-          is unrecoverable (application expected to close TCP socket)
-
-    In the case that an error is returned (return value is less than
-    zero) and the parser is in receive callback mode, then it will set
-    the error on TCP socket and wake it up. If parse_msg returned
-    -ESTRPIPE and the stream parser had previously read some bytes for
-    the current message, then the error set on the attached socket is
-    ENODATA since the stream is unrecoverable in that case.
-
-void (*lock)(struct strparser *strp)
-
-    The lock callback is called to lock the strp structure when
-    the strparser is performing an asynchronous operation (such as
-    processing a timeout). In receive callback mode the default
-    function is to lock_sock for the associated socket. In general
-    mode the callback must be set appropriately.
-
-void (*unlock)(struct strparser *strp)
-
-    The unlock callback is called to release the lock obtained
-    by the lock callback. In receive callback mode the default
-    function is release_sock for the associated socket. In general
-    mode the callback must be set appropriately.
-
-void (*rcv_msg)(struct strparser *strp, struct sk_buff *skb);
-
-    rcv_msg is called when a full message has been received and
-    is queued. The callee must consume the sk_buff; it can
-    call strp_pause to prevent any further messages from being
-    received in rcv_msg (see strp_pause above). This callback
-    must be set.
-
-    The skb->cb in the input skb is a struct strp_msg. This
-    struct contains two fields: offset and full_len. Offset is
-    where the message starts in the skb, and full_len is the
-    the length of the message. skb->len - offset may be greater
-    then full_len since strparser does not trim the skb.
-
-int (*read_sock_done)(struct strparser *strp, int err);
-
-     read_sock_done is called when the stream parser is done reading
-     the TCP socket in receive callback mode. The stream parser may
-     read multiple messages in a loop and this function allows cleanup
-     to occur when exiting the loop. If the callback is not set (NULL
-     in strp_init) a default function is used.
-
-void (*abort_parser)(struct strparser *strp, int err);
-
-     This function is called when stream parser encounters an error
-     in parsing. The default function stops the stream parser and
-     sets the error in the socket if the parser is in receive callback
-     mode. The default function can be changed by setting the callback
-     to non-NULL in strp_init.
-
-Statistics
-==========
-
-Various counters are kept for each stream parser instance. These are in
-the strp_stats structure. strp_aggr_stats is a convenience structure for
-accumulating statistics for multiple stream parser instances.
-save_strp_stats and aggregate_strp_stats are helper functions to save
-and aggregate statistics.
-
-Message assembly limits
-=======================
-
-The stream parser provide mechanisms to limit the resources consumed by
-message assembly.
-
-A timer is set when assembly starts for a new message. In receive
-callback mode the message timeout is taken from rcvtime for the
-associated TCP socket. In general mode, the timeout is passed as an
-argument in strp_process. If the timer fires before assembly completes
-the stream parser is aborted and the ETIMEDOUT error is set on the TCP
-socket if in receive callback mode.
-
-In receive callback mode, message length is limited to the receive
-buffer size of the associated TCP socket. If the length returned by
-parse_msg is greater than the socket buffer size then the stream parser
-is aborted with EMSGSIZE error set on the TCP socket. Note that this
-makes the maximum size of receive skbuffs for a socket with a stream
-parser to be 2*sk_rcvbuf of the TCP socket.
-
-In general mode the message length limit is passed in as an argument
-to strp_process.
-
-Author
-======
-
-Tom Herbert (tom@quantonium.net)
-
-- 
cgit v1.2.3-59-g8ed1b


From 32c0f0bed5bb08625083ed7f5b661c842d63ebd1 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Thu, 30 Apr 2020 18:04:27 +0200
Subject: docs: networking: convert switchdev.txt to ReST

- add SPDX header;
- use copyright symbol;
- adjust title markup;
- mark code blocks and literals as such;
- mark tables as such;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst     |   1 +
 Documentation/networking/switchdev.rst | 387 +++++++++++++++++++++++++++++++++
 Documentation/networking/switchdev.txt | 373 -------------------------------
 drivers/staging/fsl-dpaa2/ethsw/README |   2 +-
 4 files changed, 389 insertions(+), 374 deletions(-)
 create mode 100644 Documentation/networking/switchdev.rst
 delete mode 100644 Documentation/networking/switchdev.txt

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index e5a705024c6a..5e495804f96f 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -105,6 +105,7 @@ Contents:
    seg6-sysctl
    skfp
    strparser
+   switchdev
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/switchdev.rst b/Documentation/networking/switchdev.rst
new file mode 100644
index 000000000000..ddc3f35775dc
--- /dev/null
+++ b/Documentation/networking/switchdev.rst
@@ -0,0 +1,387 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+===============================================
+Ethernet switch device driver model (switchdev)
+===============================================
+
+Copyright |copy| 2014 Jiri Pirko <jiri@resnulli.us>
+
+Copyright |copy| 2014-2015 Scott Feldman <sfeldma@gmail.com>
+
+
+The Ethernet switch device driver model (switchdev) is an in-kernel driver
+model for switch devices which offload the forwarding (data) plane from the
+kernel.
+
+Figure 1 is a block diagram showing the components of the switchdev model for
+an example setup using a data-center-class switch ASIC chip.  Other setups
+with SR-IOV or soft switches, such as OVS, are possible.
+
+::
+
+
+			     User-space tools
+
+       user space                   |
+      +-------------------------------------------------------------------+
+       kernel                       | Netlink
+				    |
+		     +--------------+-------------------------------+
+		     |         Network stack                        |
+		     |           (Linux)                            |
+		     |                                              |
+		     +----------------------------------------------+
+
+			   sw1p2     sw1p4     sw1p6
+		      sw1p1  +  sw1p3  +  sw1p5  +          eth1
+			+    |    +    |    +    |            +
+			|    |    |    |    |    |            |
+		     +--+----+----+----+----+----+---+  +-----+-----+
+		     |         Switch driver         |  |    mgmt   |
+		     |        (this document)        |  |   driver  |
+		     |                               |  |           |
+		     +--------------+----------------+  +-----------+
+				    |
+       kernel                       | HW bus (eg PCI)
+      +-------------------------------------------------------------------+
+       hardware                     |
+		     +--------------+----------------+
+		     |         Switch device (sw1)   |
+		     |  +----+                       +--------+
+		     |  |    v offloaded data path   | mgmt port
+		     |  |    |                       |
+		     +--|----|----+----+----+----+---+
+			|    |    |    |    |    |
+			+    +    +    +    +    +
+		       p1   p2   p3   p4   p5   p6
+
+			     front-panel ports
+
+
+				    Fig 1.
+
+
+Include Files
+-------------
+
+::
+
+    #include <linux/netdevice.h>
+    #include <net/switchdev.h>
+
+
+Configuration
+-------------
+
+Use "depends NET_SWITCHDEV" in driver's Kconfig to ensure switchdev model
+support is built for driver.
+
+
+Switch Ports
+------------
+
+On switchdev driver initialization, the driver will allocate and register a
+struct net_device (using register_netdev()) for each enumerated physical switch
+port, called the port netdev.  A port netdev is the software representation of
+the physical port and provides a conduit for control traffic to/from the
+controller (the kernel) and the network, as well as an anchor point for higher
+level constructs such as bridges, bonds, VLANs, tunnels, and L3 routers.  Using
+standard netdev tools (iproute2, ethtool, etc), the port netdev can also
+provide to the user access to the physical properties of the switch port such
+as PHY link state and I/O statistics.
+
+There is (currently) no higher-level kernel object for the switch beyond the
+port netdevs.  All of the switchdev driver ops are netdev ops or switchdev ops.
+
+A switch management port is outside the scope of the switchdev driver model.
+Typically, the management port is not participating in offloaded data plane and
+is loaded with a different driver, such as a NIC driver, on the management port
+device.
+
+Switch ID
+^^^^^^^^^
+
+The switchdev driver must implement the net_device operation
+ndo_get_port_parent_id for each port netdev, returning the same physical ID for
+each port of a switch. The ID must be unique between switches on the same
+system. The ID does not need to be unique between switches on different
+systems.
+
+The switch ID is used to locate ports on a switch and to know if aggregated
+ports belong to the same switch.
+
+Port Netdev Naming
+^^^^^^^^^^^^^^^^^^
+
+Udev rules should be used for port netdev naming, using some unique attribute
+of the port as a key, for example the port MAC address or the port PHYS name.
+Hard-coding of kernel netdev names within the driver is discouraged; let the
+kernel pick the default netdev name, and let udev set the final name based on a
+port attribute.
+
+Using port PHYS name (ndo_get_phys_port_name) for the key is particularly
+useful for dynamically-named ports where the device names its ports based on
+external configuration.  For example, if a physical 40G port is split logically
+into 4 10G ports, resulting in 4 port netdevs, the device can give a unique
+name for each port using port PHYS name.  The udev rule would be::
+
+    SUBSYSTEM=="net", ACTION=="add", ATTR{phys_switch_id}=="<phys_switch_id>", \
+	    ATTR{phys_port_name}!="", NAME="swX$attr{phys_port_name}"
+
+Suggested naming convention is "swXpYsZ", where X is the switch name or ID, Y
+is the port name or ID, and Z is the sub-port name or ID.  For example, sw1p1s0
+would be sub-port 0 on port 1 on switch 1.
+
+Port Features
+^^^^^^^^^^^^^
+
+NETIF_F_NETNS_LOCAL
+
+If the switchdev driver (and device) only supports offloading of the default
+network namespace (netns), the driver should set this feature flag to prevent
+the port netdev from being moved out of the default netns.  A netns-aware
+driver/device would not set this flag and be responsible for partitioning
+hardware to preserve netns containment.  This means hardware cannot forward
+traffic from a port in one namespace to another port in another namespace.
+
+Port Topology
+^^^^^^^^^^^^^
+
+The port netdevs representing the physical switch ports can be organized into
+higher-level switching constructs.  The default construct is a standalone
+router port, used to offload L3 forwarding.  Two or more ports can be bonded
+together to form a LAG.  Two or more ports (or LAGs) can be bridged to bridge
+L2 networks.  VLANs can be applied to sub-divide L2 networks.  L2-over-L3
+tunnels can be built on ports.  These constructs are built using standard Linux
+tools such as the bridge driver, the bonding/team drivers, and netlink-based
+tools such as iproute2.
+
+The switchdev driver can know a particular port's position in the topology by
+monitoring NETDEV_CHANGEUPPER notifications.  For example, a port moved into a
+bond will see it's upper master change.  If that bond is moved into a bridge,
+the bond's upper master will change.  And so on.  The driver will track such
+movements to know what position a port is in in the overall topology by
+registering for netdevice events and acting on NETDEV_CHANGEUPPER.
+
+L2 Forwarding Offload
+---------------------
+
+The idea is to offload the L2 data forwarding (switching) path from the kernel
+to the switchdev device by mirroring bridge FDB entries down to the device.  An
+FDB entry is the {port, MAC, VLAN} tuple forwarding destination.
+
+To offloading L2 bridging, the switchdev driver/device should support:
+
+	- Static FDB entries installed on a bridge port
+	- Notification of learned/forgotten src mac/vlans from device
+	- STP state changes on the port
+	- VLAN flooding of multicast/broadcast and unknown unicast packets
+
+Static FDB Entries
+^^^^^^^^^^^^^^^^^^
+
+The switchdev driver should implement ndo_fdb_add, ndo_fdb_del and ndo_fdb_dump
+to support static FDB entries installed to the device.  Static bridge FDB
+entries are installed, for example, using iproute2 bridge cmd::
+
+	bridge fdb add ADDR dev DEV [vlan VID] [self]
+
+The driver should use the helper switchdev_port_fdb_xxx ops for ndo_fdb_xxx
+ops, and handle add/delete/dump of SWITCHDEV_OBJ_ID_PORT_FDB object using
+switchdev_port_obj_xxx ops.
+
+XXX: what should be done if offloading this rule to hardware fails (for
+example, due to full capacity in hardware tables) ?
+
+Note: by default, the bridge does not filter on VLAN and only bridges untagged
+traffic.  To enable VLAN support, turn on VLAN filtering::
+
+	echo 1 >/sys/class/net/<bridge>/bridge/vlan_filtering
+
+Notification of Learned/Forgotten Source MAC/VLANs
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The switch device will learn/forget source MAC address/VLAN on ingress packets
+and notify the switch driver of the mac/vlan/port tuples.  The switch driver,
+in turn, will notify the bridge driver using the switchdev notifier call::
+
+	err = call_switchdev_notifiers(val, dev, info, extack);
+
+Where val is SWITCHDEV_FDB_ADD when learning and SWITCHDEV_FDB_DEL when
+forgetting, and info points to a struct switchdev_notifier_fdb_info.  On
+SWITCHDEV_FDB_ADD, the bridge driver will install the FDB entry into the
+bridge's FDB and mark the entry as NTF_EXT_LEARNED.  The iproute2 bridge
+command will label these entries "offload"::
+
+	$ bridge fdb
+	52:54:00:12:35:01 dev sw1p1 master br0 permanent
+	00:02:00:00:02:00 dev sw1p1 master br0 offload
+	00:02:00:00:02:00 dev sw1p1 self
+	52:54:00:12:35:02 dev sw1p2 master br0 permanent
+	00:02:00:00:03:00 dev sw1p2 master br0 offload
+	00:02:00:00:03:00 dev sw1p2 self
+	33:33:00:00:00:01 dev eth0 self permanent
+	01:00:5e:00:00:01 dev eth0 self permanent
+	33:33:ff:00:00:00 dev eth0 self permanent
+	01:80:c2:00:00:0e dev eth0 self permanent
+	33:33:00:00:00:01 dev br0 self permanent
+	01:00:5e:00:00:01 dev br0 self permanent
+	33:33:ff:12:35:01 dev br0 self permanent
+
+Learning on the port should be disabled on the bridge using the bridge command::
+
+	bridge link set dev DEV learning off
+
+Learning on the device port should be enabled, as well as learning_sync::
+
+	bridge link set dev DEV learning on self
+	bridge link set dev DEV learning_sync on self
+
+Learning_sync attribute enables syncing of the learned/forgotten FDB entry to
+the bridge's FDB.  It's possible, but not optimal, to enable learning on the
+device port and on the bridge port, and disable learning_sync.
+
+To support learning, the driver implements switchdev op
+switchdev_port_attr_set for SWITCHDEV_ATTR_PORT_ID_{PRE}_BRIDGE_FLAGS.
+
+FDB Ageing
+^^^^^^^^^^
+
+The bridge will skip ageing FDB entries marked with NTF_EXT_LEARNED and it is
+the responsibility of the port driver/device to age out these entries.  If the
+port device supports ageing, when the FDB entry expires, it will notify the
+driver which in turn will notify the bridge with SWITCHDEV_FDB_DEL.  If the
+device does not support ageing, the driver can simulate ageing using a
+garbage collection timer to monitor FDB entries.  Expired entries will be
+notified to the bridge using SWITCHDEV_FDB_DEL.  See rocker driver for
+example of driver running ageing timer.
+
+To keep an NTF_EXT_LEARNED entry "alive", the driver should refresh the FDB
+entry by calling call_switchdev_notifiers(SWITCHDEV_FDB_ADD, ...).  The
+notification will reset the FDB entry's last-used time to now.  The driver
+should rate limit refresh notifications, for example, no more than once a
+second.  (The last-used time is visible using the bridge -s fdb option).
+
+STP State Change on Port
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+Internally or with a third-party STP protocol implementation (e.g. mstpd), the
+bridge driver maintains the STP state for ports, and will notify the switch
+driver of STP state change on a port using the switchdev op
+switchdev_attr_port_set for SWITCHDEV_ATTR_PORT_ID_STP_UPDATE.
+
+State is one of BR_STATE_*.  The switch driver can use STP state updates to
+update ingress packet filter list for the port.  For example, if port is
+DISABLED, no packets should pass, but if port moves to BLOCKED, then STP BPDUs
+and other IEEE 01:80:c2:xx:xx:xx link-local multicast packets can pass.
+
+Note that STP BDPUs are untagged and STP state applies to all VLANs on the port
+so packet filters should be applied consistently across untagged and tagged
+VLANs on the port.
+
+Flooding L2 domain
+^^^^^^^^^^^^^^^^^^
+
+For a given L2 VLAN domain, the switch device should flood multicast/broadcast
+and unknown unicast packets to all ports in domain, if allowed by port's
+current STP state.  The switch driver, knowing which ports are within which
+vlan L2 domain, can program the switch device for flooding.  The packet may
+be sent to the port netdev for processing by the bridge driver.  The
+bridge should not reflood the packet to the same ports the device flooded,
+otherwise there will be duplicate packets on the wire.
+
+To avoid duplicate packets, the switch driver should mark a packet as already
+forwarded by setting the skb->offload_fwd_mark bit. The bridge driver will mark
+the skb using the ingress bridge port's mark and prevent it from being forwarded
+through any bridge port with the same mark.
+
+It is possible for the switch device to not handle flooding and push the
+packets up to the bridge driver for flooding.  This is not ideal as the number
+of ports scale in the L2 domain as the device is much more efficient at
+flooding packets that software.
+
+If supported by the device, flood control can be offloaded to it, preventing
+certain netdevs from flooding unicast traffic for which there is no FDB entry.
+
+IGMP Snooping
+^^^^^^^^^^^^^
+
+In order to support IGMP snooping, the port netdevs should trap to the bridge
+driver all IGMP join and leave messages.
+The bridge multicast module will notify port netdevs on every multicast group
+changed whether it is static configured or dynamically joined/leave.
+The hardware implementation should be forwarding all registered multicast
+traffic groups only to the configured ports.
+
+L3 Routing Offload
+------------------
+
+Offloading L3 routing requires that device be programmed with FIB entries from
+the kernel, with the device doing the FIB lookup and forwarding.  The device
+does a longest prefix match (LPM) on FIB entries matching route prefix and
+forwards the packet to the matching FIB entry's nexthop(s) egress ports.
+
+To program the device, the driver has to register a FIB notifier handler
+using register_fib_notifier. The following events are available:
+
+===================  ===================================================
+FIB_EVENT_ENTRY_ADD  used for both adding a new FIB entry to the device,
+		     or modifying an existing entry on the device.
+FIB_EVENT_ENTRY_DEL  used for removing a FIB entry
+FIB_EVENT_RULE_ADD,
+FIB_EVENT_RULE_DEL   used to propagate FIB rule changes
+===================  ===================================================
+
+FIB_EVENT_ENTRY_ADD and FIB_EVENT_ENTRY_DEL events pass::
+
+	struct fib_entry_notifier_info {
+		struct fib_notifier_info info; /* must be first */
+		u32 dst;
+		int dst_len;
+		struct fib_info *fi;
+		u8 tos;
+		u8 type;
+		u32 tb_id;
+		u32 nlflags;
+	};
+
+to add/modify/delete IPv4 dst/dest_len prefix on table tb_id.  The ``*fi``
+structure holds details on the route and route's nexthops.  ``*dev`` is one
+of the port netdevs mentioned in the route's next hop list.
+
+Routes offloaded to the device are labeled with "offload" in the ip route
+listing::
+
+	$ ip route show
+	default via 192.168.0.2 dev eth0
+	11.0.0.0/30 dev sw1p1  proto kernel  scope link  src 11.0.0.2 offload
+	11.0.0.4/30 via 11.0.0.1 dev sw1p1  proto zebra  metric 20 offload
+	11.0.0.8/30 dev sw1p2  proto kernel  scope link  src 11.0.0.10 offload
+	11.0.0.12/30 via 11.0.0.9 dev sw1p2  proto zebra  metric 20 offload
+	12.0.0.2  proto zebra  metric 30 offload
+		nexthop via 11.0.0.1  dev sw1p1 weight 1
+		nexthop via 11.0.0.9  dev sw1p2 weight 1
+	12.0.0.3 via 11.0.0.1 dev sw1p1  proto zebra  metric 20 offload
+	12.0.0.4 via 11.0.0.9 dev sw1p2  proto zebra  metric 20 offload
+	192.168.0.0/24 dev eth0  proto kernel  scope link  src 192.168.0.15
+
+The "offload" flag is set in case at least one device offloads the FIB entry.
+
+XXX: add/mod/del IPv6 FIB API
+
+Nexthop Resolution
+^^^^^^^^^^^^^^^^^^
+
+The FIB entry's nexthop list contains the nexthop tuple (gateway, dev), but for
+the switch device to forward the packet with the correct dst mac address, the
+nexthop gateways must be resolved to the neighbor's mac address.  Neighbor mac
+address discovery comes via the ARP (or ND) process and is available via the
+arp_tbl neighbor table.  To resolve the routes nexthop gateways, the driver
+should trigger the kernel's neighbor resolution process.  See the rocker
+driver's rocker_port_ipv4_resolve() for an example.
+
+The driver can monitor for updates to arp_tbl using the netevent notifier
+NETEVENT_NEIGH_UPDATE.  The device can be programmed with resolved nexthops
+for the routes as arp_tbl updates.  The driver implements ndo_neigh_destroy
+to know when arp_tbl neighbor entries are purged from the port.
diff --git a/Documentation/networking/switchdev.txt b/Documentation/networking/switchdev.txt
deleted file mode 100644
index 86174ce8cd13..000000000000
--- a/Documentation/networking/switchdev.txt
+++ /dev/null
@@ -1,373 +0,0 @@
-Ethernet switch device driver model (switchdev)
-===============================================
-Copyright (c) 2014 Jiri Pirko <jiri@resnulli.us>
-Copyright (c) 2014-2015 Scott Feldman <sfeldma@gmail.com>
-
-
-The Ethernet switch device driver model (switchdev) is an in-kernel driver
-model for switch devices which offload the forwarding (data) plane from the
-kernel.
-
-Figure 1 is a block diagram showing the components of the switchdev model for
-an example setup using a data-center-class switch ASIC chip.  Other setups
-with SR-IOV or soft switches, such as OVS, are possible.
-
-
-                             User-space tools
-
-       user space                   |
-      +-------------------------------------------------------------------+
-       kernel                       | Netlink
-                                    |
-                     +--------------+-------------------------------+
-                     |         Network stack                        |
-                     |           (Linux)                            |
-                     |                                              |
-                     +----------------------------------------------+
-
-                           sw1p2     sw1p4     sw1p6
-                      sw1p1  +  sw1p3  +  sw1p5  +          eth1
-                        +    |    +    |    +    |            +
-                        |    |    |    |    |    |            |
-                     +--+----+----+----+----+----+---+  +-----+-----+
-                     |         Switch driver         |  |    mgmt   |
-                     |        (this document)        |  |   driver  |
-                     |                               |  |           |
-                     +--------------+----------------+  +-----------+
-                                    |
-       kernel                       | HW bus (eg PCI)
-      +-------------------------------------------------------------------+
-       hardware                     |
-                     +--------------+----------------+
-                     |         Switch device (sw1)   |
-                     |  +----+                       +--------+
-                     |  |    v offloaded data path   | mgmt port
-                     |  |    |                       |
-                     +--|----|----+----+----+----+---+
-                        |    |    |    |    |    |
-                        +    +    +    +    +    +
-                       p1   p2   p3   p4   p5   p6
-
-                             front-panel ports
-
-
-                                    Fig 1.
-
-
-Include Files
--------------
-
-#include <linux/netdevice.h>
-#include <net/switchdev.h>
-
-
-Configuration
--------------
-
-Use "depends NET_SWITCHDEV" in driver's Kconfig to ensure switchdev model
-support is built for driver.
-
-
-Switch Ports
-------------
-
-On switchdev driver initialization, the driver will allocate and register a
-struct net_device (using register_netdev()) for each enumerated physical switch
-port, called the port netdev.  A port netdev is the software representation of
-the physical port and provides a conduit for control traffic to/from the
-controller (the kernel) and the network, as well as an anchor point for higher
-level constructs such as bridges, bonds, VLANs, tunnels, and L3 routers.  Using
-standard netdev tools (iproute2, ethtool, etc), the port netdev can also
-provide to the user access to the physical properties of the switch port such
-as PHY link state and I/O statistics.
-
-There is (currently) no higher-level kernel object for the switch beyond the
-port netdevs.  All of the switchdev driver ops are netdev ops or switchdev ops.
-
-A switch management port is outside the scope of the switchdev driver model.
-Typically, the management port is not participating in offloaded data plane and
-is loaded with a different driver, such as a NIC driver, on the management port
-device.
-
-Switch ID
-^^^^^^^^^
-
-The switchdev driver must implement the net_device operation
-ndo_get_port_parent_id for each port netdev, returning the same physical ID for
-each port of a switch. The ID must be unique between switches on the same
-system. The ID does not need to be unique between switches on different
-systems.
-
-The switch ID is used to locate ports on a switch and to know if aggregated
-ports belong to the same switch.
-
-Port Netdev Naming
-^^^^^^^^^^^^^^^^^^
-
-Udev rules should be used for port netdev naming, using some unique attribute
-of the port as a key, for example the port MAC address or the port PHYS name.
-Hard-coding of kernel netdev names within the driver is discouraged; let the
-kernel pick the default netdev name, and let udev set the final name based on a
-port attribute.
-
-Using port PHYS name (ndo_get_phys_port_name) for the key is particularly
-useful for dynamically-named ports where the device names its ports based on
-external configuration.  For example, if a physical 40G port is split logically
-into 4 10G ports, resulting in 4 port netdevs, the device can give a unique
-name for each port using port PHYS name.  The udev rule would be:
-
-SUBSYSTEM=="net", ACTION=="add", ATTR{phys_switch_id}=="<phys_switch_id>", \
-	ATTR{phys_port_name}!="", NAME="swX$attr{phys_port_name}"
-
-Suggested naming convention is "swXpYsZ", where X is the switch name or ID, Y
-is the port name or ID, and Z is the sub-port name or ID.  For example, sw1p1s0
-would be sub-port 0 on port 1 on switch 1.
-
-Port Features
-^^^^^^^^^^^^^
-
-NETIF_F_NETNS_LOCAL
-
-If the switchdev driver (and device) only supports offloading of the default
-network namespace (netns), the driver should set this feature flag to prevent
-the port netdev from being moved out of the default netns.  A netns-aware
-driver/device would not set this flag and be responsible for partitioning
-hardware to preserve netns containment.  This means hardware cannot forward
-traffic from a port in one namespace to another port in another namespace.
-
-Port Topology
-^^^^^^^^^^^^^
-
-The port netdevs representing the physical switch ports can be organized into
-higher-level switching constructs.  The default construct is a standalone
-router port, used to offload L3 forwarding.  Two or more ports can be bonded
-together to form a LAG.  Two or more ports (or LAGs) can be bridged to bridge
-L2 networks.  VLANs can be applied to sub-divide L2 networks.  L2-over-L3
-tunnels can be built on ports.  These constructs are built using standard Linux
-tools such as the bridge driver, the bonding/team drivers, and netlink-based
-tools such as iproute2.
-
-The switchdev driver can know a particular port's position in the topology by
-monitoring NETDEV_CHANGEUPPER notifications.  For example, a port moved into a
-bond will see it's upper master change.  If that bond is moved into a bridge,
-the bond's upper master will change.  And so on.  The driver will track such
-movements to know what position a port is in in the overall topology by
-registering for netdevice events and acting on NETDEV_CHANGEUPPER.
-
-L2 Forwarding Offload
----------------------
-
-The idea is to offload the L2 data forwarding (switching) path from the kernel
-to the switchdev device by mirroring bridge FDB entries down to the device.  An
-FDB entry is the {port, MAC, VLAN} tuple forwarding destination.
-
-To offloading L2 bridging, the switchdev driver/device should support:
-
-	- Static FDB entries installed on a bridge port
-	- Notification of learned/forgotten src mac/vlans from device
-	- STP state changes on the port
-	- VLAN flooding of multicast/broadcast and unknown unicast packets
-
-Static FDB Entries
-^^^^^^^^^^^^^^^^^^
-
-The switchdev driver should implement ndo_fdb_add, ndo_fdb_del and ndo_fdb_dump
-to support static FDB entries installed to the device.  Static bridge FDB
-entries are installed, for example, using iproute2 bridge cmd:
-
-	bridge fdb add ADDR dev DEV [vlan VID] [self]
-
-The driver should use the helper switchdev_port_fdb_xxx ops for ndo_fdb_xxx
-ops, and handle add/delete/dump of SWITCHDEV_OBJ_ID_PORT_FDB object using
-switchdev_port_obj_xxx ops.
-
-XXX: what should be done if offloading this rule to hardware fails (for
-example, due to full capacity in hardware tables) ?
-
-Note: by default, the bridge does not filter on VLAN and only bridges untagged
-traffic.  To enable VLAN support, turn on VLAN filtering:
-
-	echo 1 >/sys/class/net/<bridge>/bridge/vlan_filtering
-
-Notification of Learned/Forgotten Source MAC/VLANs
-^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
-
-The switch device will learn/forget source MAC address/VLAN on ingress packets
-and notify the switch driver of the mac/vlan/port tuples.  The switch driver,
-in turn, will notify the bridge driver using the switchdev notifier call:
-
-	err = call_switchdev_notifiers(val, dev, info, extack);
-
-Where val is SWITCHDEV_FDB_ADD when learning and SWITCHDEV_FDB_DEL when
-forgetting, and info points to a struct switchdev_notifier_fdb_info.  On
-SWITCHDEV_FDB_ADD, the bridge driver will install the FDB entry into the
-bridge's FDB and mark the entry as NTF_EXT_LEARNED.  The iproute2 bridge
-command will label these entries "offload":
-
-	$ bridge fdb
-	52:54:00:12:35:01 dev sw1p1 master br0 permanent
-	00:02:00:00:02:00 dev sw1p1 master br0 offload
-	00:02:00:00:02:00 dev sw1p1 self
-	52:54:00:12:35:02 dev sw1p2 master br0 permanent
-	00:02:00:00:03:00 dev sw1p2 master br0 offload
-	00:02:00:00:03:00 dev sw1p2 self
-	33:33:00:00:00:01 dev eth0 self permanent
-	01:00:5e:00:00:01 dev eth0 self permanent
-	33:33:ff:00:00:00 dev eth0 self permanent
-	01:80:c2:00:00:0e dev eth0 self permanent
-	33:33:00:00:00:01 dev br0 self permanent
-	01:00:5e:00:00:01 dev br0 self permanent
-	33:33:ff:12:35:01 dev br0 self permanent
-
-Learning on the port should be disabled on the bridge using the bridge command:
-
-	bridge link set dev DEV learning off
-
-Learning on the device port should be enabled, as well as learning_sync:
-
-	bridge link set dev DEV learning on self
-	bridge link set dev DEV learning_sync on self
-
-Learning_sync attribute enables syncing of the learned/forgotten FDB entry to
-the bridge's FDB.  It's possible, but not optimal, to enable learning on the
-device port and on the bridge port, and disable learning_sync.
-
-To support learning, the driver implements switchdev op
-switchdev_port_attr_set for SWITCHDEV_ATTR_PORT_ID_{PRE}_BRIDGE_FLAGS.
-
-FDB Ageing
-^^^^^^^^^^
-
-The bridge will skip ageing FDB entries marked with NTF_EXT_LEARNED and it is
-the responsibility of the port driver/device to age out these entries.  If the
-port device supports ageing, when the FDB entry expires, it will notify the
-driver which in turn will notify the bridge with SWITCHDEV_FDB_DEL.  If the
-device does not support ageing, the driver can simulate ageing using a
-garbage collection timer to monitor FDB entries.  Expired entries will be
-notified to the bridge using SWITCHDEV_FDB_DEL.  See rocker driver for
-example of driver running ageing timer.
-
-To keep an NTF_EXT_LEARNED entry "alive", the driver should refresh the FDB
-entry by calling call_switchdev_notifiers(SWITCHDEV_FDB_ADD, ...).  The
-notification will reset the FDB entry's last-used time to now.  The driver
-should rate limit refresh notifications, for example, no more than once a
-second.  (The last-used time is visible using the bridge -s fdb option).
-
-STP State Change on Port
-^^^^^^^^^^^^^^^^^^^^^^^^
-
-Internally or with a third-party STP protocol implementation (e.g. mstpd), the
-bridge driver maintains the STP state for ports, and will notify the switch
-driver of STP state change on a port using the switchdev op
-switchdev_attr_port_set for SWITCHDEV_ATTR_PORT_ID_STP_UPDATE.
-
-State is one of BR_STATE_*.  The switch driver can use STP state updates to
-update ingress packet filter list for the port.  For example, if port is
-DISABLED, no packets should pass, but if port moves to BLOCKED, then STP BPDUs
-and other IEEE 01:80:c2:xx:xx:xx link-local multicast packets can pass.
-
-Note that STP BDPUs are untagged and STP state applies to all VLANs on the port
-so packet filters should be applied consistently across untagged and tagged
-VLANs on the port.
-
-Flooding L2 domain
-^^^^^^^^^^^^^^^^^^
-
-For a given L2 VLAN domain, the switch device should flood multicast/broadcast
-and unknown unicast packets to all ports in domain, if allowed by port's
-current STP state.  The switch driver, knowing which ports are within which
-vlan L2 domain, can program the switch device for flooding.  The packet may
-be sent to the port netdev for processing by the bridge driver.  The
-bridge should not reflood the packet to the same ports the device flooded,
-otherwise there will be duplicate packets on the wire.
-
-To avoid duplicate packets, the switch driver should mark a packet as already
-forwarded by setting the skb->offload_fwd_mark bit. The bridge driver will mark
-the skb using the ingress bridge port's mark and prevent it from being forwarded
-through any bridge port with the same mark.
-
-It is possible for the switch device to not handle flooding and push the
-packets up to the bridge driver for flooding.  This is not ideal as the number
-of ports scale in the L2 domain as the device is much more efficient at
-flooding packets that software.
-
-If supported by the device, flood control can be offloaded to it, preventing
-certain netdevs from flooding unicast traffic for which there is no FDB entry.
-
-IGMP Snooping
-^^^^^^^^^^^^^
-
-In order to support IGMP snooping, the port netdevs should trap to the bridge
-driver all IGMP join and leave messages.
-The bridge multicast module will notify port netdevs on every multicast group
-changed whether it is static configured or dynamically joined/leave.
-The hardware implementation should be forwarding all registered multicast
-traffic groups only to the configured ports.
-
-L3 Routing Offload
-------------------
-
-Offloading L3 routing requires that device be programmed with FIB entries from
-the kernel, with the device doing the FIB lookup and forwarding.  The device
-does a longest prefix match (LPM) on FIB entries matching route prefix and
-forwards the packet to the matching FIB entry's nexthop(s) egress ports.
-
-To program the device, the driver has to register a FIB notifier handler
-using register_fib_notifier. The following events are available:
-FIB_EVENT_ENTRY_ADD: used for both adding a new FIB entry to the device,
-                     or modifying an existing entry on the device.
-FIB_EVENT_ENTRY_DEL: used for removing a FIB entry
-FIB_EVENT_RULE_ADD, FIB_EVENT_RULE_DEL: used to propagate FIB rule changes
-
-FIB_EVENT_ENTRY_ADD and FIB_EVENT_ENTRY_DEL events pass:
-
-	struct fib_entry_notifier_info {
-		struct fib_notifier_info info; /* must be first */
-		u32 dst;
-		int dst_len;
-		struct fib_info *fi;
-		u8 tos;
-		u8 type;
-		u32 tb_id;
-		u32 nlflags;
-	};
-
-to add/modify/delete IPv4 dst/dest_len prefix on table tb_id.  The *fi
-structure holds details on the route and route's nexthops.  *dev is one of the
-port netdevs mentioned in the route's next hop list.
-
-Routes offloaded to the device are labeled with "offload" in the ip route
-listing:
-
-	$ ip route show
-	default via 192.168.0.2 dev eth0
-	11.0.0.0/30 dev sw1p1  proto kernel  scope link  src 11.0.0.2 offload
-	11.0.0.4/30 via 11.0.0.1 dev sw1p1  proto zebra  metric 20 offload
-	11.0.0.8/30 dev sw1p2  proto kernel  scope link  src 11.0.0.10 offload
-	11.0.0.12/30 via 11.0.0.9 dev sw1p2  proto zebra  metric 20 offload
-	12.0.0.2  proto zebra  metric 30 offload
-		nexthop via 11.0.0.1  dev sw1p1 weight 1
-		nexthop via 11.0.0.9  dev sw1p2 weight 1
-	12.0.0.3 via 11.0.0.1 dev sw1p1  proto zebra  metric 20 offload
-	12.0.0.4 via 11.0.0.9 dev sw1p2  proto zebra  metric 20 offload
-	192.168.0.0/24 dev eth0  proto kernel  scope link  src 192.168.0.15
-
-The "offload" flag is set in case at least one device offloads the FIB entry.
-
-XXX: add/mod/del IPv6 FIB API
-
-Nexthop Resolution
-^^^^^^^^^^^^^^^^^^
-
-The FIB entry's nexthop list contains the nexthop tuple (gateway, dev), but for
-the switch device to forward the packet with the correct dst mac address, the
-nexthop gateways must be resolved to the neighbor's mac address.  Neighbor mac
-address discovery comes via the ARP (or ND) process and is available via the
-arp_tbl neighbor table.  To resolve the routes nexthop gateways, the driver
-should trigger the kernel's neighbor resolution process.  See the rocker
-driver's rocker_port_ipv4_resolve() for an example.
-
-The driver can monitor for updates to arp_tbl using the netevent notifier
-NETEVENT_NEIGH_UPDATE.  The device can be programmed with resolved nexthops
-for the routes as arp_tbl updates.  The driver implements ndo_neigh_destroy
-to know when arp_tbl neighbor entries are purged from the port.
diff --git a/drivers/staging/fsl-dpaa2/ethsw/README b/drivers/staging/fsl-dpaa2/ethsw/README
index f6fc07f780d1..b48dcbf7c5fb 100644
--- a/drivers/staging/fsl-dpaa2/ethsw/README
+++ b/drivers/staging/fsl-dpaa2/ethsw/README
@@ -79,7 +79,7 @@ The DPSW can have ports connected to DPNIs or to PHYs via DPMACs.
 
 For a more detailed description of the Ethernet switch device driver model
 see:
-	Documentation/networking/switchdev.txt
+	Documentation/networking/switchdev.rst
 
 Creating an Ethernet Switch
 ===========================
-- 
cgit v1.2.3-59-g8ed1b


From d2461edde7d15121a2cd39d48cd39edbd3fa019a Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Thu, 30 Apr 2020 18:04:28 +0200
Subject: docs: networking: convert tc-actions-env-rules.txt to ReST

- add SPDX header;
- add a document title;
- use the right numbered list markup;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst                |  1 +
 Documentation/networking/tc-actions-env-rules.rst | 29 +++++++++++++++++++++++
 Documentation/networking/tc-actions-env-rules.txt | 24 -------------------
 3 files changed, 30 insertions(+), 24 deletions(-)
 create mode 100644 Documentation/networking/tc-actions-env-rules.rst
 delete mode 100644 Documentation/networking/tc-actions-env-rules.txt

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 5e495804f96f..f53d89b5679a 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -106,6 +106,7 @@ Contents:
    skfp
    strparser
    switchdev
+   tc-actions-env-rules
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/tc-actions-env-rules.rst b/Documentation/networking/tc-actions-env-rules.rst
new file mode 100644
index 000000000000..86884b8fb4e0
--- /dev/null
+++ b/Documentation/networking/tc-actions-env-rules.rst
@@ -0,0 +1,29 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+================================
+TC Actions - Environmental Rules
+================================
+
+
+The "environmental" rules for authors of any new tc actions are:
+
+1) If you stealeth or borroweth any packet thou shalt be branching
+   from the righteous path and thou shalt cloneth.
+
+   For example if your action queues a packet to be processed later,
+   or intentionally branches by redirecting a packet, then you need to
+   clone the packet.
+
+2) If you munge any packet thou shalt call pskb_expand_head in the case
+   someone else is referencing the skb. After that you "own" the skb.
+
+3) Dropping packets you don't own is a no-no. You simply return
+   TC_ACT_SHOT to the caller and they will drop it.
+
+The "environmental" rules for callers of actions (qdiscs etc) are:
+
+#) Thou art responsible for freeing anything returned as being
+   TC_ACT_SHOT/STOLEN/QUEUED. If none of TC_ACT_SHOT/STOLEN/QUEUED is
+   returned, then all is great and you don't need to do anything.
+
+Post on netdev if something is unclear.
diff --git a/Documentation/networking/tc-actions-env-rules.txt b/Documentation/networking/tc-actions-env-rules.txt
deleted file mode 100644
index f37814693ad3..000000000000
--- a/Documentation/networking/tc-actions-env-rules.txt
+++ /dev/null
@@ -1,24 +0,0 @@
-
-The "environmental" rules for authors of any new tc actions are:
-
-1) If you stealeth or borroweth any packet thou shalt be branching
-from the righteous path and thou shalt cloneth.
-
-For example if your action queues a packet to be processed later,
-or intentionally branches by redirecting a packet, then you need to
-clone the packet.
-
-2) If you munge any packet thou shalt call pskb_expand_head in the case
-someone else is referencing the skb. After that you "own" the skb.
-
-3) Dropping packets you don't own is a no-no. You simply return
-TC_ACT_SHOT to the caller and they will drop it.
-
-The "environmental" rules for callers of actions (qdiscs etc) are:
-
-*) Thou art responsible for freeing anything returned as being
-TC_ACT_SHOT/STOLEN/QUEUED. If none of TC_ACT_SHOT/STOLEN/QUEUED is
-returned, then all is great and you don't need to do anything.
-
-Post on netdev if something is unclear.
-
-- 
cgit v1.2.3-59-g8ed1b


From ff159f4f1152b7012d378c69ffd253ed53227865 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Thu, 30 Apr 2020 18:04:29 +0200
Subject: docs: networking: convert tcp-thin.txt to ReST

Not much to be done here:

- add SPDX header;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst     |  1 +
 Documentation/networking/ip-sysctl.rst |  2 +-
 Documentation/networking/tcp-thin.rst  | 52 ++++++++++++++++++++++++++++++++++
 Documentation/networking/tcp-thin.txt  | 47 ------------------------------
 4 files changed, 54 insertions(+), 48 deletions(-)
 create mode 100644 Documentation/networking/tcp-thin.rst
 delete mode 100644 Documentation/networking/tcp-thin.txt

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index f53d89b5679a..89b02fbfc2eb 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -107,6 +107,7 @@ Contents:
    strparser
    switchdev
    tc-actions-env-rules
+   tcp-thin
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index 38f811d4b2f0..3266aee9e052 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -886,7 +886,7 @@ tcp_thin_linear_timeouts - BOOLEAN
 	initiated. This improves retransmission latency for
 	non-aggressive thin streams, often found to be time-dependent.
 	For more information on thin streams, see
-	Documentation/networking/tcp-thin.txt
+	Documentation/networking/tcp-thin.rst
 
 	Default: 0
 
diff --git a/Documentation/networking/tcp-thin.rst b/Documentation/networking/tcp-thin.rst
new file mode 100644
index 000000000000..b06765c96ea1
--- /dev/null
+++ b/Documentation/networking/tcp-thin.rst
@@ -0,0 +1,52 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====================
+Thin-streams and TCP
+====================
+
+A wide range of Internet-based services that use reliable transport
+protocols display what we call thin-stream properties. This means
+that the application sends data with such a low rate that the
+retransmission mechanisms of the transport protocol are not fully
+effective. In time-dependent scenarios (like online games, control
+systems, stock trading etc.) where the user experience depends
+on the data delivery latency, packet loss can be devastating for
+the service quality. Extreme latencies are caused by TCP's
+dependency on the arrival of new data from the application to trigger
+retransmissions effectively through fast retransmit instead of
+waiting for long timeouts.
+
+After analysing a large number of time-dependent interactive
+applications, we have seen that they often produce thin streams
+and also stay with this traffic pattern throughout its entire
+lifespan. The combination of time-dependency and the fact that the
+streams provoke high latencies when using TCP is unfortunate.
+
+In order to reduce application-layer latency when packets are lost,
+a set of mechanisms has been made, which address these latency issues
+for thin streams. In short, if the kernel detects a thin stream,
+the retransmission mechanisms are modified in the following manner:
+
+1) If the stream is thin, fast retransmit on the first dupACK.
+2) If the stream is thin, do not apply exponential backoff.
+
+These enhancements are applied only if the stream is detected as
+thin. This is accomplished by defining a threshold for the number
+of packets in flight. If there are less than 4 packets in flight,
+fast retransmissions can not be triggered, and the stream is prone
+to experience high retransmission latencies.
+
+Since these mechanisms are targeted at time-dependent applications,
+they must be specifically activated by the application using the
+TCP_THIN_LINEAR_TIMEOUTS and TCP_THIN_DUPACK IOCTLS or the
+tcp_thin_linear_timeouts and tcp_thin_dupack sysctls. Both
+modifications are turned off by default.
+
+References
+==========
+More information on the modifications, as well as a wide range of
+experimental data can be found here:
+
+"Improving latency for interactive, thin-stream applications over
+reliable transport"
+http://simula.no/research/nd/publications/Simula.nd.477/simula_pdf_file
diff --git a/Documentation/networking/tcp-thin.txt b/Documentation/networking/tcp-thin.txt
deleted file mode 100644
index 151e229980f1..000000000000
--- a/Documentation/networking/tcp-thin.txt
+++ /dev/null
@@ -1,47 +0,0 @@
-Thin-streams and TCP
-====================
-A wide range of Internet-based services that use reliable transport
-protocols display what we call thin-stream properties. This means
-that the application sends data with such a low rate that the
-retransmission mechanisms of the transport protocol are not fully
-effective. In time-dependent scenarios (like online games, control
-systems, stock trading etc.) where the user experience depends
-on the data delivery latency, packet loss can be devastating for
-the service quality. Extreme latencies are caused by TCP's
-dependency on the arrival of new data from the application to trigger
-retransmissions effectively through fast retransmit instead of
-waiting for long timeouts.
-
-After analysing a large number of time-dependent interactive
-applications, we have seen that they often produce thin streams
-and also stay with this traffic pattern throughout its entire
-lifespan. The combination of time-dependency and the fact that the
-streams provoke high latencies when using TCP is unfortunate.
-
-In order to reduce application-layer latency when packets are lost,
-a set of mechanisms has been made, which address these latency issues
-for thin streams. In short, if the kernel detects a thin stream,
-the retransmission mechanisms are modified in the following manner:
-
-1) If the stream is thin, fast retransmit on the first dupACK.
-2) If the stream is thin, do not apply exponential backoff.
-
-These enhancements are applied only if the stream is detected as
-thin. This is accomplished by defining a threshold for the number
-of packets in flight. If there are less than 4 packets in flight,
-fast retransmissions can not be triggered, and the stream is prone
-to experience high retransmission latencies.
-
-Since these mechanisms are targeted at time-dependent applications,
-they must be specifically activated by the application using the
-TCP_THIN_LINEAR_TIMEOUTS and TCP_THIN_DUPACK IOCTLS or the
-tcp_thin_linear_timeouts and tcp_thin_dupack sysctls. Both
-modifications are turned off by default.
-
-References
-==========
-More information on the modifications, as well as a wide range of
-experimental data can be found here:
-"Improving latency for interactive, thin-stream applications over
-reliable transport"
-http://simula.no/research/nd/publications/Simula.nd.477/simula_pdf_file
-- 
cgit v1.2.3-59-g8ed1b


From aa8a6ee3e3fc4001e952de37660fe71826da8189 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Thu, 30 Apr 2020 18:04:30 +0200
Subject: docs: networking: convert team.txt to ReST

Not much to be done here:
- add SPDX header;
- add a document title;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst | 1 +
 Documentation/networking/team.rst  | 8 ++++++++
 Documentation/networking/team.txt  | 2 --
 3 files changed, 9 insertions(+), 2 deletions(-)
 create mode 100644 Documentation/networking/team.rst
 delete mode 100644 Documentation/networking/team.txt

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 89b02fbfc2eb..be65ee509669 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -108,6 +108,7 @@ Contents:
    switchdev
    tc-actions-env-rules
    tcp-thin
+   team
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/team.rst b/Documentation/networking/team.rst
new file mode 100644
index 000000000000..0a7f3a059586
--- /dev/null
+++ b/Documentation/networking/team.rst
@@ -0,0 +1,8 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====
+Team
+====
+
+Team devices are driven from userspace via libteam library which is here:
+	https://github.com/jpirko/libteam
diff --git a/Documentation/networking/team.txt b/Documentation/networking/team.txt
deleted file mode 100644
index 5a013686b9ea..000000000000
--- a/Documentation/networking/team.txt
+++ /dev/null
@@ -1,2 +0,0 @@
-Team devices are driven from userspace via libteam library which is here:
-	https://github.com/jpirko/libteam
-- 
cgit v1.2.3-59-g8ed1b


From 06bfa47e72c83550fefc93c62a1ace5fff72e212 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Thu, 30 Apr 2020 18:04:31 +0200
Subject: docs: networking: convert timestamping.txt to ReST

- add SPDX header;
- add a document title;
- adjust titles and chapters, adding proper markups;
- mark code blocks and literals as such;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst        |   1 +
 Documentation/networking/packet_mmap.rst  |   4 +-
 Documentation/networking/timestamping.rst | 591 ++++++++++++++++++++++++++++++
 Documentation/networking/timestamping.txt | 571 -----------------------------
 include/uapi/linux/errqueue.h             |   2 +-
 5 files changed, 595 insertions(+), 574 deletions(-)
 create mode 100644 Documentation/networking/timestamping.rst
 delete mode 100644 Documentation/networking/timestamping.txt

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index be65ee509669..8f9a84b8e3f2 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -109,6 +109,7 @@ Contents:
    tc-actions-env-rules
    tcp-thin
    team
+   timestamping
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/packet_mmap.rst b/Documentation/networking/packet_mmap.rst
index 884c7222b9e9..6c009ceb1183 100644
--- a/Documentation/networking/packet_mmap.rst
+++ b/Documentation/networking/packet_mmap.rst
@@ -1030,7 +1030,7 @@ the packet meta information for mmap(2)ed RX_RING and TX_RINGs.  If your
 NIC is capable of timestamping packets in hardware, you can request those
 hardware timestamps to be used. Note: you may need to enable the generation
 of hardware timestamps with SIOCSHWTSTAMP (see related information from
-Documentation/networking/timestamping.txt).
+Documentation/networking/timestamping.rst).
 
 PACKET_TIMESTAMP accepts the same integer bit field as SO_TIMESTAMPING::
 
@@ -1069,7 +1069,7 @@ TX_RING part only TP_STATUS_AVAILABLE is set, then the tp_sec and tp_{n,u}sec
 members do not contain a valid value. For TX_RINGs, by default no timestamp
 is generated!
 
-See include/linux/net_tstamp.h and Documentation/networking/timestamping.txt
+See include/linux/net_tstamp.h and Documentation/networking/timestamping.rst
 for more information on hardware timestamps.
 
 Miscellaneous bits
diff --git a/Documentation/networking/timestamping.rst b/Documentation/networking/timestamping.rst
new file mode 100644
index 000000000000..1adead6a4527
--- /dev/null
+++ b/Documentation/networking/timestamping.rst
@@ -0,0 +1,591 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============
+Timestamping
+============
+
+
+1. Control Interfaces
+=====================
+
+The interfaces for receiving network packages timestamps are:
+
+SO_TIMESTAMP
+  Generates a timestamp for each incoming packet in (not necessarily
+  monotonic) system time. Reports the timestamp via recvmsg() in a
+  control message in usec resolution.
+  SO_TIMESTAMP is defined as SO_TIMESTAMP_NEW or SO_TIMESTAMP_OLD
+  based on the architecture type and time_t representation of libc.
+  Control message format is in struct __kernel_old_timeval for
+  SO_TIMESTAMP_OLD and in struct __kernel_sock_timeval for
+  SO_TIMESTAMP_NEW options respectively.
+
+SO_TIMESTAMPNS
+  Same timestamping mechanism as SO_TIMESTAMP, but reports the
+  timestamp as struct timespec in nsec resolution.
+  SO_TIMESTAMPNS is defined as SO_TIMESTAMPNS_NEW or SO_TIMESTAMPNS_OLD
+  based on the architecture type and time_t representation of libc.
+  Control message format is in struct timespec for SO_TIMESTAMPNS_OLD
+  and in struct __kernel_timespec for SO_TIMESTAMPNS_NEW options
+  respectively.
+
+IP_MULTICAST_LOOP + SO_TIMESTAMP[NS]
+  Only for multicast:approximate transmit timestamp obtained by
+  reading the looped packet receive timestamp.
+
+SO_TIMESTAMPING
+  Generates timestamps on reception, transmission or both. Supports
+  multiple timestamp sources, including hardware. Supports generating
+  timestamps for stream sockets.
+
+
+1.1 SO_TIMESTAMP (also SO_TIMESTAMP_OLD and SO_TIMESTAMP_NEW)
+-------------------------------------------------------------
+
+This socket option enables timestamping of datagrams on the reception
+path. Because the destination socket, if any, is not known early in
+the network stack, the feature has to be enabled for all packets. The
+same is true for all early receive timestamp options.
+
+For interface details, see `man 7 socket`.
+
+Always use SO_TIMESTAMP_NEW timestamp to always get timestamp in
+struct __kernel_sock_timeval format.
+
+SO_TIMESTAMP_OLD returns incorrect timestamps after the year 2038
+on 32 bit machines.
+
+1.2 SO_TIMESTAMPNS (also SO_TIMESTAMPNS_OLD and SO_TIMESTAMPNS_NEW):
+
+This option is identical to SO_TIMESTAMP except for the returned data type.
+Its struct timespec allows for higher resolution (ns) timestamps than the
+timeval of SO_TIMESTAMP (ms).
+
+Always use SO_TIMESTAMPNS_NEW timestamp to always get timestamp in
+struct __kernel_timespec format.
+
+SO_TIMESTAMPNS_OLD returns incorrect timestamps after the year 2038
+on 32 bit machines.
+
+1.3 SO_TIMESTAMPING (also SO_TIMESTAMPING_OLD and SO_TIMESTAMPING_NEW)
+----------------------------------------------------------------------
+
+Supports multiple types of timestamp requests. As a result, this
+socket option takes a bitmap of flags, not a boolean. In::
+
+  err = setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, &val, sizeof(val));
+
+val is an integer with any of the following bits set. Setting other
+bit returns EINVAL and does not change the current state.
+
+The socket option configures timestamp generation for individual
+sk_buffs (1.3.1), timestamp reporting to the socket's error
+queue (1.3.2) and options (1.3.3). Timestamp generation can also
+be enabled for individual sendmsg calls using cmsg (1.3.4).
+
+
+1.3.1 Timestamp Generation
+^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Some bits are requests to the stack to try to generate timestamps. Any
+combination of them is valid. Changes to these bits apply to newly
+created packets, not to packets already in the stack. As a result, it
+is possible to selectively request timestamps for a subset of packets
+(e.g., for sampling) by embedding an send() call within two setsockopt
+calls, one to enable timestamp generation and one to disable it.
+Timestamps may also be generated for reasons other than being
+requested by a particular socket, such as when receive timestamping is
+enabled system wide, as explained earlier.
+
+SOF_TIMESTAMPING_RX_HARDWARE:
+  Request rx timestamps generated by the network adapter.
+
+SOF_TIMESTAMPING_RX_SOFTWARE:
+  Request rx timestamps when data enters the kernel. These timestamps
+  are generated just after a device driver hands a packet to the
+  kernel receive stack.
+
+SOF_TIMESTAMPING_TX_HARDWARE:
+  Request tx timestamps generated by the network adapter. This flag
+  can be enabled via both socket options and control messages.
+
+SOF_TIMESTAMPING_TX_SOFTWARE:
+  Request tx timestamps when data leaves the kernel. These timestamps
+  are generated in the device driver as close as possible, but always
+  prior to, passing the packet to the network interface. Hence, they
+  require driver support and may not be available for all devices.
+  This flag can be enabled via both socket options and control messages.
+
+SOF_TIMESTAMPING_TX_SCHED:
+  Request tx timestamps prior to entering the packet scheduler. Kernel
+  transmit latency is, if long, often dominated by queuing delay. The
+  difference between this timestamp and one taken at
+  SOF_TIMESTAMPING_TX_SOFTWARE will expose this latency independent
+  of protocol processing. The latency incurred in protocol
+  processing, if any, can be computed by subtracting a userspace
+  timestamp taken immediately before send() from this timestamp. On
+  machines with virtual devices where a transmitted packet travels
+  through multiple devices and, hence, multiple packet schedulers,
+  a timestamp is generated at each layer. This allows for fine
+  grained measurement of queuing delay. This flag can be enabled
+  via both socket options and control messages.
+
+SOF_TIMESTAMPING_TX_ACK:
+  Request tx timestamps when all data in the send buffer has been
+  acknowledged. This only makes sense for reliable protocols. It is
+  currently only implemented for TCP. For that protocol, it may
+  over-report measurement, because the timestamp is generated when all
+  data up to and including the buffer at send() was acknowledged: the
+  cumulative acknowledgment. The mechanism ignores SACK and FACK.
+  This flag can be enabled via both socket options and control messages.
+
+
+1.3.2 Timestamp Reporting
+^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The other three bits control which timestamps will be reported in a
+generated control message. Changes to the bits take immediate
+effect at the timestamp reporting locations in the stack. Timestamps
+are only reported for packets that also have the relevant timestamp
+generation request set.
+
+SOF_TIMESTAMPING_SOFTWARE:
+  Report any software timestamps when available.
+
+SOF_TIMESTAMPING_SYS_HARDWARE:
+  This option is deprecated and ignored.
+
+SOF_TIMESTAMPING_RAW_HARDWARE:
+  Report hardware timestamps as generated by
+  SOF_TIMESTAMPING_TX_HARDWARE when available.
+
+
+1.3.3 Timestamp Options
+^^^^^^^^^^^^^^^^^^^^^^^
+
+The interface supports the options
+
+SOF_TIMESTAMPING_OPT_ID:
+  Generate a unique identifier along with each packet. A process can
+  have multiple concurrent timestamping requests outstanding. Packets
+  can be reordered in the transmit path, for instance in the packet
+  scheduler. In that case timestamps will be queued onto the error
+  queue out of order from the original send() calls. It is not always
+  possible to uniquely match timestamps to the original send() calls
+  based on timestamp order or payload inspection alone, then.
+
+  This option associates each packet at send() with a unique
+  identifier and returns that along with the timestamp. The identifier
+  is derived from a per-socket u32 counter (that wraps). For datagram
+  sockets, the counter increments with each sent packet. For stream
+  sockets, it increments with every byte.
+
+  The counter starts at zero. It is initialized the first time that
+  the socket option is enabled. It is reset each time the option is
+  enabled after having been disabled. Resetting the counter does not
+  change the identifiers of existing packets in the system.
+
+  This option is implemented only for transmit timestamps. There, the
+  timestamp is always looped along with a struct sock_extended_err.
+  The option modifies field ee_data to pass an id that is unique
+  among all possibly concurrently outstanding timestamp requests for
+  that socket.
+
+
+SOF_TIMESTAMPING_OPT_CMSG:
+  Support recv() cmsg for all timestamped packets. Control messages
+  are already supported unconditionally on all packets with receive
+  timestamps and on IPv6 packets with transmit timestamp. This option
+  extends them to IPv4 packets with transmit timestamp. One use case
+  is to correlate packets with their egress device, by enabling socket
+  option IP_PKTINFO simultaneously.
+
+
+SOF_TIMESTAMPING_OPT_TSONLY:
+  Applies to transmit timestamps only. Makes the kernel return the
+  timestamp as a cmsg alongside an empty packet, as opposed to
+  alongside the original packet. This reduces the amount of memory
+  charged to the socket's receive budget (SO_RCVBUF) and delivers
+  the timestamp even if sysctl net.core.tstamp_allow_data is 0.
+  This option disables SOF_TIMESTAMPING_OPT_CMSG.
+
+SOF_TIMESTAMPING_OPT_STATS:
+  Optional stats that are obtained along with the transmit timestamps.
+  It must be used together with SOF_TIMESTAMPING_OPT_TSONLY. When the
+  transmit timestamp is available, the stats are available in a
+  separate control message of type SCM_TIMESTAMPING_OPT_STATS, as a
+  list of TLVs (struct nlattr) of types. These stats allow the
+  application to associate various transport layer stats with
+  the transmit timestamps, such as how long a certain block of
+  data was limited by peer's receiver window.
+
+SOF_TIMESTAMPING_OPT_PKTINFO:
+  Enable the SCM_TIMESTAMPING_PKTINFO control message for incoming
+  packets with hardware timestamps. The message contains struct
+  scm_ts_pktinfo, which supplies the index of the real interface which
+  received the packet and its length at layer 2. A valid (non-zero)
+  interface index will be returned only if CONFIG_NET_RX_BUSY_POLL is
+  enabled and the driver is using NAPI. The struct contains also two
+  other fields, but they are reserved and undefined.
+
+SOF_TIMESTAMPING_OPT_TX_SWHW:
+  Request both hardware and software timestamps for outgoing packets
+  when SOF_TIMESTAMPING_TX_HARDWARE and SOF_TIMESTAMPING_TX_SOFTWARE
+  are enabled at the same time. If both timestamps are generated,
+  two separate messages will be looped to the socket's error queue,
+  each containing just one timestamp.
+
+New applications are encouraged to pass SOF_TIMESTAMPING_OPT_ID to
+disambiguate timestamps and SOF_TIMESTAMPING_OPT_TSONLY to operate
+regardless of the setting of sysctl net.core.tstamp_allow_data.
+
+An exception is when a process needs additional cmsg data, for
+instance SOL_IP/IP_PKTINFO to detect the egress network interface.
+Then pass option SOF_TIMESTAMPING_OPT_CMSG. This option depends on
+having access to the contents of the original packet, so cannot be
+combined with SOF_TIMESTAMPING_OPT_TSONLY.
+
+
+1.3.4. Enabling timestamps via control messages
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+In addition to socket options, timestamp generation can be requested
+per write via cmsg, only for SOF_TIMESTAMPING_TX_* (see Section 1.3.1).
+Using this feature, applications can sample timestamps per sendmsg()
+without paying the overhead of enabling and disabling timestamps via
+setsockopt::
+
+  struct msghdr *msg;
+  ...
+  cmsg			       = CMSG_FIRSTHDR(msg);
+  cmsg->cmsg_level	       = SOL_SOCKET;
+  cmsg->cmsg_type	       = SO_TIMESTAMPING;
+  cmsg->cmsg_len	       = CMSG_LEN(sizeof(__u32));
+  *((__u32 *) CMSG_DATA(cmsg)) = SOF_TIMESTAMPING_TX_SCHED |
+				 SOF_TIMESTAMPING_TX_SOFTWARE |
+				 SOF_TIMESTAMPING_TX_ACK;
+  err = sendmsg(fd, msg, 0);
+
+The SOF_TIMESTAMPING_TX_* flags set via cmsg will override
+the SOF_TIMESTAMPING_TX_* flags set via setsockopt.
+
+Moreover, applications must still enable timestamp reporting via
+setsockopt to receive timestamps::
+
+  __u32 val = SOF_TIMESTAMPING_SOFTWARE |
+	      SOF_TIMESTAMPING_OPT_ID /* or any other flag */;
+  err = setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, &val, sizeof(val));
+
+
+1.4 Bytestream Timestamps
+-------------------------
+
+The SO_TIMESTAMPING interface supports timestamping of bytes in a
+bytestream. Each request is interpreted as a request for when the
+entire contents of the buffer has passed a timestamping point. That
+is, for streams option SOF_TIMESTAMPING_TX_SOFTWARE will record
+when all bytes have reached the device driver, regardless of how
+many packets the data has been converted into.
+
+In general, bytestreams have no natural delimiters and therefore
+correlating a timestamp with data is non-trivial. A range of bytes
+may be split across segments, any segments may be merged (possibly
+coalescing sections of previously segmented buffers associated with
+independent send() calls). Segments can be reordered and the same
+byte range can coexist in multiple segments for protocols that
+implement retransmissions.
+
+It is essential that all timestamps implement the same semantics,
+regardless of these possible transformations, as otherwise they are
+incomparable. Handling "rare" corner cases differently from the
+simple case (a 1:1 mapping from buffer to skb) is insufficient
+because performance debugging often needs to focus on such outliers.
+
+In practice, timestamps can be correlated with segments of a
+bytestream consistently, if both semantics of the timestamp and the
+timing of measurement are chosen correctly. This challenge is no
+different from deciding on a strategy for IP fragmentation. There, the
+definition is that only the first fragment is timestamped. For
+bytestreams, we chose that a timestamp is generated only when all
+bytes have passed a point. SOF_TIMESTAMPING_TX_ACK as defined is easy to
+implement and reason about. An implementation that has to take into
+account SACK would be more complex due to possible transmission holes
+and out of order arrival.
+
+On the host, TCP can also break the simple 1:1 mapping from buffer to
+skbuff as a result of Nagle, cork, autocork, segmentation and GSO. The
+implementation ensures correctness in all cases by tracking the
+individual last byte passed to send(), even if it is no longer the
+last byte after an skbuff extend or merge operation. It stores the
+relevant sequence number in skb_shinfo(skb)->tskey. Because an skbuff
+has only one such field, only one timestamp can be generated.
+
+In rare cases, a timestamp request can be missed if two requests are
+collapsed onto the same skb. A process can detect this situation by
+enabling SOF_TIMESTAMPING_OPT_ID and comparing the byte offset at
+send time with the value returned for each timestamp. It can prevent
+the situation by always flushing the TCP stack in between requests,
+for instance by enabling TCP_NODELAY and disabling TCP_CORK and
+autocork.
+
+These precautions ensure that the timestamp is generated only when all
+bytes have passed a timestamp point, assuming that the network stack
+itself does not reorder the segments. The stack indeed tries to avoid
+reordering. The one exception is under administrator control: it is
+possible to construct a packet scheduler configuration that delays
+segments from the same stream differently. Such a setup would be
+unusual.
+
+
+2 Data Interfaces
+==================
+
+Timestamps are read using the ancillary data feature of recvmsg().
+See `man 3 cmsg` for details of this interface. The socket manual
+page (`man 7 socket`) describes how timestamps generated with
+SO_TIMESTAMP and SO_TIMESTAMPNS records can be retrieved.
+
+
+2.1 SCM_TIMESTAMPING records
+----------------------------
+
+These timestamps are returned in a control message with cmsg_level
+SOL_SOCKET, cmsg_type SCM_TIMESTAMPING, and payload of type
+
+For SO_TIMESTAMPING_OLD::
+
+	struct scm_timestamping {
+		struct timespec ts[3];
+	};
+
+For SO_TIMESTAMPING_NEW::
+
+	struct scm_timestamping64 {
+		struct __kernel_timespec ts[3];
+
+Always use SO_TIMESTAMPING_NEW timestamp to always get timestamp in
+struct scm_timestamping64 format.
+
+SO_TIMESTAMPING_OLD returns incorrect timestamps after the year 2038
+on 32 bit machines.
+
+The structure can return up to three timestamps. This is a legacy
+feature. At least one field is non-zero at any time. Most timestamps
+are passed in ts[0]. Hardware timestamps are passed in ts[2].
+
+ts[1] used to hold hardware timestamps converted to system time.
+Instead, expose the hardware clock device on the NIC directly as
+a HW PTP clock source, to allow time conversion in userspace and
+optionally synchronize system time with a userspace PTP stack such
+as linuxptp. For the PTP clock API, see Documentation/driver-api/ptp.rst.
+
+Note that if the SO_TIMESTAMP or SO_TIMESTAMPNS option is enabled
+together with SO_TIMESTAMPING using SOF_TIMESTAMPING_SOFTWARE, a false
+software timestamp will be generated in the recvmsg() call and passed
+in ts[0] when a real software timestamp is missing. This happens also
+on hardware transmit timestamps.
+
+2.1.1 Transmit timestamps with MSG_ERRQUEUE
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+For transmit timestamps the outgoing packet is looped back to the
+socket's error queue with the send timestamp(s) attached. A process
+receives the timestamps by calling recvmsg() with flag MSG_ERRQUEUE
+set and with a msg_control buffer sufficiently large to receive the
+relevant metadata structures. The recvmsg call returns the original
+outgoing data packet with two ancillary messages attached.
+
+A message of cm_level SOL_IP(V6) and cm_type IP(V6)_RECVERR
+embeds a struct sock_extended_err. This defines the error type. For
+timestamps, the ee_errno field is ENOMSG. The other ancillary message
+will have cm_level SOL_SOCKET and cm_type SCM_TIMESTAMPING. This
+embeds the struct scm_timestamping.
+
+
+2.1.1.2 Timestamp types
+~~~~~~~~~~~~~~~~~~~~~~~
+
+The semantics of the three struct timespec are defined by field
+ee_info in the extended error structure. It contains a value of
+type SCM_TSTAMP_* to define the actual timestamp passed in
+scm_timestamping.
+
+The SCM_TSTAMP_* types are 1:1 matches to the SOF_TIMESTAMPING_*
+control fields discussed previously, with one exception. For legacy
+reasons, SCM_TSTAMP_SND is equal to zero and can be set for both
+SOF_TIMESTAMPING_TX_HARDWARE and SOF_TIMESTAMPING_TX_SOFTWARE. It
+is the first if ts[2] is non-zero, the second otherwise, in which
+case the timestamp is stored in ts[0].
+
+
+2.1.1.3 Fragmentation
+~~~~~~~~~~~~~~~~~~~~~
+
+Fragmentation of outgoing datagrams is rare, but is possible, e.g., by
+explicitly disabling PMTU discovery. If an outgoing packet is fragmented,
+then only the first fragment is timestamped and returned to the sending
+socket.
+
+
+2.1.1.4 Packet Payload
+~~~~~~~~~~~~~~~~~~~~~~
+
+The calling application is often not interested in receiving the whole
+packet payload that it passed to the stack originally: the socket
+error queue mechanism is just a method to piggyback the timestamp on.
+In this case, the application can choose to read datagrams with a
+smaller buffer, possibly even of length 0. The payload is truncated
+accordingly. Until the process calls recvmsg() on the error queue,
+however, the full packet is queued, taking up budget from SO_RCVBUF.
+
+
+2.1.1.5 Blocking Read
+~~~~~~~~~~~~~~~~~~~~~
+
+Reading from the error queue is always a non-blocking operation. To
+block waiting on a timestamp, use poll or select. poll() will return
+POLLERR in pollfd.revents if any data is ready on the error queue.
+There is no need to pass this flag in pollfd.events. This flag is
+ignored on request. See also `man 2 poll`.
+
+
+2.1.2 Receive timestamps
+^^^^^^^^^^^^^^^^^^^^^^^^
+
+On reception, there is no reason to read from the socket error queue.
+The SCM_TIMESTAMPING ancillary data is sent along with the packet data
+on a normal recvmsg(). Since this is not a socket error, it is not
+accompanied by a message SOL_IP(V6)/IP(V6)_RECVERROR. In this case,
+the meaning of the three fields in struct scm_timestamping is
+implicitly defined. ts[0] holds a software timestamp if set, ts[1]
+is again deprecated and ts[2] holds a hardware timestamp if set.
+
+
+3. Hardware Timestamping configuration: SIOCSHWTSTAMP and SIOCGHWTSTAMP
+=======================================================================
+
+Hardware time stamping must also be initialized for each device driver
+that is expected to do hardware time stamping. The parameter is defined in
+include/uapi/linux/net_tstamp.h as::
+
+	struct hwtstamp_config {
+		int flags;	/* no flags defined right now, must be zero */
+		int tx_type;	/* HWTSTAMP_TX_* */
+		int rx_filter;	/* HWTSTAMP_FILTER_* */
+	};
+
+Desired behavior is passed into the kernel and to a specific device by
+calling ioctl(SIOCSHWTSTAMP) with a pointer to a struct ifreq whose
+ifr_data points to a struct hwtstamp_config. The tx_type and
+rx_filter are hints to the driver what it is expected to do. If
+the requested fine-grained filtering for incoming packets is not
+supported, the driver may time stamp more than just the requested types
+of packets.
+
+Drivers are free to use a more permissive configuration than the requested
+configuration. It is expected that drivers should only implement directly the
+most generic mode that can be supported. For example if the hardware can
+support HWTSTAMP_FILTER_V2_EVENT, then it should generally always upscale
+HWTSTAMP_FILTER_V2_L2_SYNC_MESSAGE, and so forth, as HWTSTAMP_FILTER_V2_EVENT
+is more generic (and more useful to applications).
+
+A driver which supports hardware time stamping shall update the struct
+with the actual, possibly more permissive configuration. If the
+requested packets cannot be time stamped, then nothing should be
+changed and ERANGE shall be returned (in contrast to EINVAL, which
+indicates that SIOCSHWTSTAMP is not supported at all).
+
+Only a processes with admin rights may change the configuration. User
+space is responsible to ensure that multiple processes don't interfere
+with each other and that the settings are reset.
+
+Any process can read the actual configuration by passing this
+structure to ioctl(SIOCGHWTSTAMP) in the same way.  However, this has
+not been implemented in all drivers.
+
+::
+
+    /* possible values for hwtstamp_config->tx_type */
+    enum {
+	    /*
+	    * no outgoing packet will need hardware time stamping;
+	    * should a packet arrive which asks for it, no hardware
+	    * time stamping will be done
+	    */
+	    HWTSTAMP_TX_OFF,
+
+	    /*
+	    * enables hardware time stamping for outgoing packets;
+	    * the sender of the packet decides which are to be
+	    * time stamped by setting SOF_TIMESTAMPING_TX_SOFTWARE
+	    * before sending the packet
+	    */
+	    HWTSTAMP_TX_ON,
+    };
+
+    /* possible values for hwtstamp_config->rx_filter */
+    enum {
+	    /* time stamp no incoming packet at all */
+	    HWTSTAMP_FILTER_NONE,
+
+	    /* time stamp any incoming packet */
+	    HWTSTAMP_FILTER_ALL,
+
+	    /* return value: time stamp all packets requested plus some others */
+	    HWTSTAMP_FILTER_SOME,
+
+	    /* PTP v1, UDP, any kind of event packet */
+	    HWTSTAMP_FILTER_PTP_V1_L4_EVENT,
+
+	    /* for the complete list of values, please check
+	    * the include file include/uapi/linux/net_tstamp.h
+	    */
+    };
+
+3.1 Hardware Timestamping Implementation: Device Drivers
+--------------------------------------------------------
+
+A driver which supports hardware time stamping must support the
+SIOCSHWTSTAMP ioctl and update the supplied struct hwtstamp_config with
+the actual values as described in the section on SIOCSHWTSTAMP.  It
+should also support SIOCGHWTSTAMP.
+
+Time stamps for received packets must be stored in the skb. To get a pointer
+to the shared time stamp structure of the skb call skb_hwtstamps(). Then
+set the time stamps in the structure::
+
+    struct skb_shared_hwtstamps {
+	    /* hardware time stamp transformed into duration
+	    * since arbitrary point in time
+	    */
+	    ktime_t	hwtstamp;
+    };
+
+Time stamps for outgoing packets are to be generated as follows:
+
+- In hard_start_xmit(), check if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)
+  is set no-zero. If yes, then the driver is expected to do hardware time
+  stamping.
+- If this is possible for the skb and requested, then declare
+  that the driver is doing the time stamping by setting the flag
+  SKBTX_IN_PROGRESS in skb_shinfo(skb)->tx_flags , e.g. with::
+
+      skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+
+  You might want to keep a pointer to the associated skb for the next step
+  and not free the skb. A driver not supporting hardware time stamping doesn't
+  do that. A driver must never touch sk_buff::tstamp! It is used to store
+  software generated time stamps by the network subsystem.
+- Driver should call skb_tx_timestamp() as close to passing sk_buff to hardware
+  as possible. skb_tx_timestamp() provides a software time stamp if requested
+  and hardware timestamping is not possible (SKBTX_IN_PROGRESS not set).
+- As soon as the driver has sent the packet and/or obtained a
+  hardware time stamp for it, it passes the time stamp back by
+  calling skb_hwtstamp_tx() with the original skb, the raw
+  hardware time stamp. skb_hwtstamp_tx() clones the original skb and
+  adds the timestamps, therefore the original skb has to be freed now.
+  If obtaining the hardware time stamp somehow fails, then the driver
+  should not fall back to software time stamping. The rationale is that
+  this would occur at a later time in the processing pipeline than other
+  software time stamping and therefore could lead to unexpected deltas
+  between time stamps.
diff --git a/Documentation/networking/timestamping.txt b/Documentation/networking/timestamping.txt
deleted file mode 100644
index 8dd6333c3270..000000000000
--- a/Documentation/networking/timestamping.txt
+++ /dev/null
@@ -1,571 +0,0 @@
-
-1. Control Interfaces
-
-The interfaces for receiving network packages timestamps are:
-
-* SO_TIMESTAMP
-  Generates a timestamp for each incoming packet in (not necessarily
-  monotonic) system time. Reports the timestamp via recvmsg() in a
-  control message in usec resolution.
-  SO_TIMESTAMP is defined as SO_TIMESTAMP_NEW or SO_TIMESTAMP_OLD
-  based on the architecture type and time_t representation of libc.
-  Control message format is in struct __kernel_old_timeval for
-  SO_TIMESTAMP_OLD and in struct __kernel_sock_timeval for
-  SO_TIMESTAMP_NEW options respectively.
-
-* SO_TIMESTAMPNS
-  Same timestamping mechanism as SO_TIMESTAMP, but reports the
-  timestamp as struct timespec in nsec resolution.
-  SO_TIMESTAMPNS is defined as SO_TIMESTAMPNS_NEW or SO_TIMESTAMPNS_OLD
-  based on the architecture type and time_t representation of libc.
-  Control message format is in struct timespec for SO_TIMESTAMPNS_OLD
-  and in struct __kernel_timespec for SO_TIMESTAMPNS_NEW options
-  respectively.
-
-* IP_MULTICAST_LOOP + SO_TIMESTAMP[NS]
-  Only for multicast:approximate transmit timestamp obtained by
-  reading the looped packet receive timestamp.
-
-* SO_TIMESTAMPING
-  Generates timestamps on reception, transmission or both. Supports
-  multiple timestamp sources, including hardware. Supports generating
-  timestamps for stream sockets.
-
-
-1.1 SO_TIMESTAMP (also SO_TIMESTAMP_OLD and SO_TIMESTAMP_NEW):
-
-This socket option enables timestamping of datagrams on the reception
-path. Because the destination socket, if any, is not known early in
-the network stack, the feature has to be enabled for all packets. The
-same is true for all early receive timestamp options.
-
-For interface details, see `man 7 socket`.
-
-Always use SO_TIMESTAMP_NEW timestamp to always get timestamp in
-struct __kernel_sock_timeval format.
-
-SO_TIMESTAMP_OLD returns incorrect timestamps after the year 2038
-on 32 bit machines.
-
-1.2 SO_TIMESTAMPNS (also SO_TIMESTAMPNS_OLD and SO_TIMESTAMPNS_NEW):
-
-This option is identical to SO_TIMESTAMP except for the returned data type.
-Its struct timespec allows for higher resolution (ns) timestamps than the
-timeval of SO_TIMESTAMP (ms).
-
-Always use SO_TIMESTAMPNS_NEW timestamp to always get timestamp in
-struct __kernel_timespec format.
-
-SO_TIMESTAMPNS_OLD returns incorrect timestamps after the year 2038
-on 32 bit machines.
-
-1.3 SO_TIMESTAMPING (also SO_TIMESTAMPING_OLD and SO_TIMESTAMPING_NEW):
-
-Supports multiple types of timestamp requests. As a result, this
-socket option takes a bitmap of flags, not a boolean. In
-
-  err = setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, &val, sizeof(val));
-
-val is an integer with any of the following bits set. Setting other
-bit returns EINVAL and does not change the current state.
-
-The socket option configures timestamp generation for individual
-sk_buffs (1.3.1), timestamp reporting to the socket's error
-queue (1.3.2) and options (1.3.3). Timestamp generation can also
-be enabled for individual sendmsg calls using cmsg (1.3.4).
-
-
-1.3.1 Timestamp Generation
-
-Some bits are requests to the stack to try to generate timestamps. Any
-combination of them is valid. Changes to these bits apply to newly
-created packets, not to packets already in the stack. As a result, it
-is possible to selectively request timestamps for a subset of packets
-(e.g., for sampling) by embedding an send() call within two setsockopt
-calls, one to enable timestamp generation and one to disable it.
-Timestamps may also be generated for reasons other than being
-requested by a particular socket, such as when receive timestamping is
-enabled system wide, as explained earlier.
-
-SOF_TIMESTAMPING_RX_HARDWARE:
-  Request rx timestamps generated by the network adapter.
-
-SOF_TIMESTAMPING_RX_SOFTWARE:
-  Request rx timestamps when data enters the kernel. These timestamps
-  are generated just after a device driver hands a packet to the
-  kernel receive stack.
-
-SOF_TIMESTAMPING_TX_HARDWARE:
-  Request tx timestamps generated by the network adapter. This flag
-  can be enabled via both socket options and control messages.
-
-SOF_TIMESTAMPING_TX_SOFTWARE:
-  Request tx timestamps when data leaves the kernel. These timestamps
-  are generated in the device driver as close as possible, but always
-  prior to, passing the packet to the network interface. Hence, they
-  require driver support and may not be available for all devices.
-  This flag can be enabled via both socket options and control messages.
-
-
-SOF_TIMESTAMPING_TX_SCHED:
-  Request tx timestamps prior to entering the packet scheduler. Kernel
-  transmit latency is, if long, often dominated by queuing delay. The
-  difference between this timestamp and one taken at
-  SOF_TIMESTAMPING_TX_SOFTWARE will expose this latency independent
-  of protocol processing. The latency incurred in protocol
-  processing, if any, can be computed by subtracting a userspace
-  timestamp taken immediately before send() from this timestamp. On
-  machines with virtual devices where a transmitted packet travels
-  through multiple devices and, hence, multiple packet schedulers,
-  a timestamp is generated at each layer. This allows for fine
-  grained measurement of queuing delay. This flag can be enabled
-  via both socket options and control messages.
-
-SOF_TIMESTAMPING_TX_ACK:
-  Request tx timestamps when all data in the send buffer has been
-  acknowledged. This only makes sense for reliable protocols. It is
-  currently only implemented for TCP. For that protocol, it may
-  over-report measurement, because the timestamp is generated when all
-  data up to and including the buffer at send() was acknowledged: the
-  cumulative acknowledgment. The mechanism ignores SACK and FACK.
-  This flag can be enabled via both socket options and control messages.
-
-
-1.3.2 Timestamp Reporting
-
-The other three bits control which timestamps will be reported in a
-generated control message. Changes to the bits take immediate
-effect at the timestamp reporting locations in the stack. Timestamps
-are only reported for packets that also have the relevant timestamp
-generation request set.
-
-SOF_TIMESTAMPING_SOFTWARE:
-  Report any software timestamps when available.
-
-SOF_TIMESTAMPING_SYS_HARDWARE:
-  This option is deprecated and ignored.
-
-SOF_TIMESTAMPING_RAW_HARDWARE:
-  Report hardware timestamps as generated by
-  SOF_TIMESTAMPING_TX_HARDWARE when available.
-
-
-1.3.3 Timestamp Options
-
-The interface supports the options
-
-SOF_TIMESTAMPING_OPT_ID:
-
-  Generate a unique identifier along with each packet. A process can
-  have multiple concurrent timestamping requests outstanding. Packets
-  can be reordered in the transmit path, for instance in the packet
-  scheduler. In that case timestamps will be queued onto the error
-  queue out of order from the original send() calls. It is not always
-  possible to uniquely match timestamps to the original send() calls
-  based on timestamp order or payload inspection alone, then.
-
-  This option associates each packet at send() with a unique
-  identifier and returns that along with the timestamp. The identifier
-  is derived from a per-socket u32 counter (that wraps). For datagram
-  sockets, the counter increments with each sent packet. For stream
-  sockets, it increments with every byte.
-
-  The counter starts at zero. It is initialized the first time that
-  the socket option is enabled. It is reset each time the option is
-  enabled after having been disabled. Resetting the counter does not
-  change the identifiers of existing packets in the system.
-
-  This option is implemented only for transmit timestamps. There, the
-  timestamp is always looped along with a struct sock_extended_err.
-  The option modifies field ee_data to pass an id that is unique
-  among all possibly concurrently outstanding timestamp requests for
-  that socket.
-
-
-SOF_TIMESTAMPING_OPT_CMSG:
-
-  Support recv() cmsg for all timestamped packets. Control messages
-  are already supported unconditionally on all packets with receive
-  timestamps and on IPv6 packets with transmit timestamp. This option
-  extends them to IPv4 packets with transmit timestamp. One use case
-  is to correlate packets with their egress device, by enabling socket
-  option IP_PKTINFO simultaneously.
-
-
-SOF_TIMESTAMPING_OPT_TSONLY:
-
-  Applies to transmit timestamps only. Makes the kernel return the
-  timestamp as a cmsg alongside an empty packet, as opposed to
-  alongside the original packet. This reduces the amount of memory
-  charged to the socket's receive budget (SO_RCVBUF) and delivers
-  the timestamp even if sysctl net.core.tstamp_allow_data is 0.
-  This option disables SOF_TIMESTAMPING_OPT_CMSG.
-
-SOF_TIMESTAMPING_OPT_STATS:
-
-  Optional stats that are obtained along with the transmit timestamps.
-  It must be used together with SOF_TIMESTAMPING_OPT_TSONLY. When the
-  transmit timestamp is available, the stats are available in a
-  separate control message of type SCM_TIMESTAMPING_OPT_STATS, as a
-  list of TLVs (struct nlattr) of types. These stats allow the
-  application to associate various transport layer stats with
-  the transmit timestamps, such as how long a certain block of
-  data was limited by peer's receiver window.
-
-SOF_TIMESTAMPING_OPT_PKTINFO:
-
-  Enable the SCM_TIMESTAMPING_PKTINFO control message for incoming
-  packets with hardware timestamps. The message contains struct
-  scm_ts_pktinfo, which supplies the index of the real interface which
-  received the packet and its length at layer 2. A valid (non-zero)
-  interface index will be returned only if CONFIG_NET_RX_BUSY_POLL is
-  enabled and the driver is using NAPI. The struct contains also two
-  other fields, but they are reserved and undefined.
-
-SOF_TIMESTAMPING_OPT_TX_SWHW:
-
-  Request both hardware and software timestamps for outgoing packets
-  when SOF_TIMESTAMPING_TX_HARDWARE and SOF_TIMESTAMPING_TX_SOFTWARE
-  are enabled at the same time. If both timestamps are generated,
-  two separate messages will be looped to the socket's error queue,
-  each containing just one timestamp.
-
-New applications are encouraged to pass SOF_TIMESTAMPING_OPT_ID to
-disambiguate timestamps and SOF_TIMESTAMPING_OPT_TSONLY to operate
-regardless of the setting of sysctl net.core.tstamp_allow_data.
-
-An exception is when a process needs additional cmsg data, for
-instance SOL_IP/IP_PKTINFO to detect the egress network interface.
-Then pass option SOF_TIMESTAMPING_OPT_CMSG. This option depends on
-having access to the contents of the original packet, so cannot be
-combined with SOF_TIMESTAMPING_OPT_TSONLY.
-
-
-1.3.4. Enabling timestamps via control messages
-
-In addition to socket options, timestamp generation can be requested
-per write via cmsg, only for SOF_TIMESTAMPING_TX_* (see Section 1.3.1).
-Using this feature, applications can sample timestamps per sendmsg()
-without paying the overhead of enabling and disabling timestamps via
-setsockopt:
-
-  struct msghdr *msg;
-  ...
-  cmsg			       = CMSG_FIRSTHDR(msg);
-  cmsg->cmsg_level	       = SOL_SOCKET;
-  cmsg->cmsg_type	       = SO_TIMESTAMPING;
-  cmsg->cmsg_len	       = CMSG_LEN(sizeof(__u32));
-  *((__u32 *) CMSG_DATA(cmsg)) = SOF_TIMESTAMPING_TX_SCHED |
-				 SOF_TIMESTAMPING_TX_SOFTWARE |
-				 SOF_TIMESTAMPING_TX_ACK;
-  err = sendmsg(fd, msg, 0);
-
-The SOF_TIMESTAMPING_TX_* flags set via cmsg will override
-the SOF_TIMESTAMPING_TX_* flags set via setsockopt.
-
-Moreover, applications must still enable timestamp reporting via
-setsockopt to receive timestamps:
-
-  __u32 val = SOF_TIMESTAMPING_SOFTWARE |
-	      SOF_TIMESTAMPING_OPT_ID /* or any other flag */;
-  err = setsockopt(fd, SOL_SOCKET, SO_TIMESTAMPING, &val, sizeof(val));
-
-
-1.4 Bytestream Timestamps
-
-The SO_TIMESTAMPING interface supports timestamping of bytes in a
-bytestream. Each request is interpreted as a request for when the
-entire contents of the buffer has passed a timestamping point. That
-is, for streams option SOF_TIMESTAMPING_TX_SOFTWARE will record
-when all bytes have reached the device driver, regardless of how
-many packets the data has been converted into.
-
-In general, bytestreams have no natural delimiters and therefore
-correlating a timestamp with data is non-trivial. A range of bytes
-may be split across segments, any segments may be merged (possibly
-coalescing sections of previously segmented buffers associated with
-independent send() calls). Segments can be reordered and the same
-byte range can coexist in multiple segments for protocols that
-implement retransmissions.
-
-It is essential that all timestamps implement the same semantics,
-regardless of these possible transformations, as otherwise they are
-incomparable. Handling "rare" corner cases differently from the
-simple case (a 1:1 mapping from buffer to skb) is insufficient
-because performance debugging often needs to focus on such outliers.
-
-In practice, timestamps can be correlated with segments of a
-bytestream consistently, if both semantics of the timestamp and the
-timing of measurement are chosen correctly. This challenge is no
-different from deciding on a strategy for IP fragmentation. There, the
-definition is that only the first fragment is timestamped. For
-bytestreams, we chose that a timestamp is generated only when all
-bytes have passed a point. SOF_TIMESTAMPING_TX_ACK as defined is easy to
-implement and reason about. An implementation that has to take into
-account SACK would be more complex due to possible transmission holes
-and out of order arrival.
-
-On the host, TCP can also break the simple 1:1 mapping from buffer to
-skbuff as a result of Nagle, cork, autocork, segmentation and GSO. The
-implementation ensures correctness in all cases by tracking the
-individual last byte passed to send(), even if it is no longer the
-last byte after an skbuff extend or merge operation. It stores the
-relevant sequence number in skb_shinfo(skb)->tskey. Because an skbuff
-has only one such field, only one timestamp can be generated.
-
-In rare cases, a timestamp request can be missed if two requests are
-collapsed onto the same skb. A process can detect this situation by
-enabling SOF_TIMESTAMPING_OPT_ID and comparing the byte offset at
-send time with the value returned for each timestamp. It can prevent
-the situation by always flushing the TCP stack in between requests,
-for instance by enabling TCP_NODELAY and disabling TCP_CORK and
-autocork.
-
-These precautions ensure that the timestamp is generated only when all
-bytes have passed a timestamp point, assuming that the network stack
-itself does not reorder the segments. The stack indeed tries to avoid
-reordering. The one exception is under administrator control: it is
-possible to construct a packet scheduler configuration that delays
-segments from the same stream differently. Such a setup would be
-unusual.
-
-
-2 Data Interfaces
-
-Timestamps are read using the ancillary data feature of recvmsg().
-See `man 3 cmsg` for details of this interface. The socket manual
-page (`man 7 socket`) describes how timestamps generated with
-SO_TIMESTAMP and SO_TIMESTAMPNS records can be retrieved.
-
-
-2.1 SCM_TIMESTAMPING records
-
-These timestamps are returned in a control message with cmsg_level
-SOL_SOCKET, cmsg_type SCM_TIMESTAMPING, and payload of type
-
-For SO_TIMESTAMPING_OLD:
-
-struct scm_timestamping {
-	struct timespec ts[3];
-};
-
-For SO_TIMESTAMPING_NEW:
-
-struct scm_timestamping64 {
-	struct __kernel_timespec ts[3];
-
-Always use SO_TIMESTAMPING_NEW timestamp to always get timestamp in
-struct scm_timestamping64 format.
-
-SO_TIMESTAMPING_OLD returns incorrect timestamps after the year 2038
-on 32 bit machines.
-
-The structure can return up to three timestamps. This is a legacy
-feature. At least one field is non-zero at any time. Most timestamps
-are passed in ts[0]. Hardware timestamps are passed in ts[2].
-
-ts[1] used to hold hardware timestamps converted to system time.
-Instead, expose the hardware clock device on the NIC directly as
-a HW PTP clock source, to allow time conversion in userspace and
-optionally synchronize system time with a userspace PTP stack such
-as linuxptp. For the PTP clock API, see Documentation/driver-api/ptp.rst.
-
-Note that if the SO_TIMESTAMP or SO_TIMESTAMPNS option is enabled
-together with SO_TIMESTAMPING using SOF_TIMESTAMPING_SOFTWARE, a false
-software timestamp will be generated in the recvmsg() call and passed
-in ts[0] when a real software timestamp is missing. This happens also
-on hardware transmit timestamps.
-
-2.1.1 Transmit timestamps with MSG_ERRQUEUE
-
-For transmit timestamps the outgoing packet is looped back to the
-socket's error queue with the send timestamp(s) attached. A process
-receives the timestamps by calling recvmsg() with flag MSG_ERRQUEUE
-set and with a msg_control buffer sufficiently large to receive the
-relevant metadata structures. The recvmsg call returns the original
-outgoing data packet with two ancillary messages attached.
-
-A message of cm_level SOL_IP(V6) and cm_type IP(V6)_RECVERR
-embeds a struct sock_extended_err. This defines the error type. For
-timestamps, the ee_errno field is ENOMSG. The other ancillary message
-will have cm_level SOL_SOCKET and cm_type SCM_TIMESTAMPING. This
-embeds the struct scm_timestamping.
-
-
-2.1.1.2 Timestamp types
-
-The semantics of the three struct timespec are defined by field
-ee_info in the extended error structure. It contains a value of
-type SCM_TSTAMP_* to define the actual timestamp passed in
-scm_timestamping.
-
-The SCM_TSTAMP_* types are 1:1 matches to the SOF_TIMESTAMPING_*
-control fields discussed previously, with one exception. For legacy
-reasons, SCM_TSTAMP_SND is equal to zero and can be set for both
-SOF_TIMESTAMPING_TX_HARDWARE and SOF_TIMESTAMPING_TX_SOFTWARE. It
-is the first if ts[2] is non-zero, the second otherwise, in which
-case the timestamp is stored in ts[0].
-
-
-2.1.1.3 Fragmentation
-
-Fragmentation of outgoing datagrams is rare, but is possible, e.g., by
-explicitly disabling PMTU discovery. If an outgoing packet is fragmented,
-then only the first fragment is timestamped and returned to the sending
-socket.
-
-
-2.1.1.4 Packet Payload
-
-The calling application is often not interested in receiving the whole
-packet payload that it passed to the stack originally: the socket
-error queue mechanism is just a method to piggyback the timestamp on.
-In this case, the application can choose to read datagrams with a
-smaller buffer, possibly even of length 0. The payload is truncated
-accordingly. Until the process calls recvmsg() on the error queue,
-however, the full packet is queued, taking up budget from SO_RCVBUF.
-
-
-2.1.1.5 Blocking Read
-
-Reading from the error queue is always a non-blocking operation. To
-block waiting on a timestamp, use poll or select. poll() will return
-POLLERR in pollfd.revents if any data is ready on the error queue.
-There is no need to pass this flag in pollfd.events. This flag is
-ignored on request. See also `man 2 poll`.
-
-
-2.1.2 Receive timestamps
-
-On reception, there is no reason to read from the socket error queue.
-The SCM_TIMESTAMPING ancillary data is sent along with the packet data
-on a normal recvmsg(). Since this is not a socket error, it is not
-accompanied by a message SOL_IP(V6)/IP(V6)_RECVERROR. In this case,
-the meaning of the three fields in struct scm_timestamping is
-implicitly defined. ts[0] holds a software timestamp if set, ts[1]
-is again deprecated and ts[2] holds a hardware timestamp if set.
-
-
-3. Hardware Timestamping configuration: SIOCSHWTSTAMP and SIOCGHWTSTAMP
-
-Hardware time stamping must also be initialized for each device driver
-that is expected to do hardware time stamping. The parameter is defined in
-include/uapi/linux/net_tstamp.h as:
-
-struct hwtstamp_config {
-	int flags;	/* no flags defined right now, must be zero */
-	int tx_type;	/* HWTSTAMP_TX_* */
-	int rx_filter;	/* HWTSTAMP_FILTER_* */
-};
-
-Desired behavior is passed into the kernel and to a specific device by
-calling ioctl(SIOCSHWTSTAMP) with a pointer to a struct ifreq whose
-ifr_data points to a struct hwtstamp_config. The tx_type and
-rx_filter are hints to the driver what it is expected to do. If
-the requested fine-grained filtering for incoming packets is not
-supported, the driver may time stamp more than just the requested types
-of packets.
-
-Drivers are free to use a more permissive configuration than the requested
-configuration. It is expected that drivers should only implement directly the
-most generic mode that can be supported. For example if the hardware can
-support HWTSTAMP_FILTER_V2_EVENT, then it should generally always upscale
-HWTSTAMP_FILTER_V2_L2_SYNC_MESSAGE, and so forth, as HWTSTAMP_FILTER_V2_EVENT
-is more generic (and more useful to applications).
-
-A driver which supports hardware time stamping shall update the struct
-with the actual, possibly more permissive configuration. If the
-requested packets cannot be time stamped, then nothing should be
-changed and ERANGE shall be returned (in contrast to EINVAL, which
-indicates that SIOCSHWTSTAMP is not supported at all).
-
-Only a processes with admin rights may change the configuration. User
-space is responsible to ensure that multiple processes don't interfere
-with each other and that the settings are reset.
-
-Any process can read the actual configuration by passing this
-structure to ioctl(SIOCGHWTSTAMP) in the same way.  However, this has
-not been implemented in all drivers.
-
-/* possible values for hwtstamp_config->tx_type */
-enum {
-	/*
-	 * no outgoing packet will need hardware time stamping;
-	 * should a packet arrive which asks for it, no hardware
-	 * time stamping will be done
-	 */
-	HWTSTAMP_TX_OFF,
-
-	/*
-	 * enables hardware time stamping for outgoing packets;
-	 * the sender of the packet decides which are to be
-	 * time stamped by setting SOF_TIMESTAMPING_TX_SOFTWARE
-	 * before sending the packet
-	 */
-	HWTSTAMP_TX_ON,
-};
-
-/* possible values for hwtstamp_config->rx_filter */
-enum {
-	/* time stamp no incoming packet at all */
-	HWTSTAMP_FILTER_NONE,
-
-	/* time stamp any incoming packet */
-	HWTSTAMP_FILTER_ALL,
-
-	/* return value: time stamp all packets requested plus some others */
-	HWTSTAMP_FILTER_SOME,
-
-	/* PTP v1, UDP, any kind of event packet */
-	HWTSTAMP_FILTER_PTP_V1_L4_EVENT,
-
-	/* for the complete list of values, please check
-	 * the include file include/uapi/linux/net_tstamp.h
-	 */
-};
-
-3.1 Hardware Timestamping Implementation: Device Drivers
-
-A driver which supports hardware time stamping must support the
-SIOCSHWTSTAMP ioctl and update the supplied struct hwtstamp_config with
-the actual values as described in the section on SIOCSHWTSTAMP.  It
-should also support SIOCGHWTSTAMP.
-
-Time stamps for received packets must be stored in the skb. To get a pointer
-to the shared time stamp structure of the skb call skb_hwtstamps(). Then
-set the time stamps in the structure:
-
-struct skb_shared_hwtstamps {
-	/* hardware time stamp transformed into duration
-	 * since arbitrary point in time
-	 */
-	ktime_t	hwtstamp;
-};
-
-Time stamps for outgoing packets are to be generated as follows:
-- In hard_start_xmit(), check if (skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP)
-  is set no-zero. If yes, then the driver is expected to do hardware time
-  stamping.
-- If this is possible for the skb and requested, then declare
-  that the driver is doing the time stamping by setting the flag
-  SKBTX_IN_PROGRESS in skb_shinfo(skb)->tx_flags , e.g. with
-
-      skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
-
-  You might want to keep a pointer to the associated skb for the next step
-  and not free the skb. A driver not supporting hardware time stamping doesn't
-  do that. A driver must never touch sk_buff::tstamp! It is used to store
-  software generated time stamps by the network subsystem.
-- Driver should call skb_tx_timestamp() as close to passing sk_buff to hardware
-  as possible. skb_tx_timestamp() provides a software time stamp if requested
-  and hardware timestamping is not possible (SKBTX_IN_PROGRESS not set).
-- As soon as the driver has sent the packet and/or obtained a
-  hardware time stamp for it, it passes the time stamp back by
-  calling skb_hwtstamp_tx() with the original skb, the raw
-  hardware time stamp. skb_hwtstamp_tx() clones the original skb and
-  adds the timestamps, therefore the original skb has to be freed now.
-  If obtaining the hardware time stamp somehow fails, then the driver
-  should not fall back to software time stamping. The rationale is that
-  this would occur at a later time in the processing pipeline than other
-  software time stamping and therefore could lead to unexpected deltas
-  between time stamps.
diff --git a/include/uapi/linux/errqueue.h b/include/uapi/linux/errqueue.h
index 0cca19670fd2..ca5cb3e3c6df 100644
--- a/include/uapi/linux/errqueue.h
+++ b/include/uapi/linux/errqueue.h
@@ -36,7 +36,7 @@ struct sock_extended_err {
  *
  *	The timestamping interfaces SO_TIMESTAMPING, MSG_TSTAMP_*
  *	communicate network timestamps by passing this struct in a cmsg with
- *	recvmsg(). See Documentation/networking/timestamping.txt for details.
+ *	recvmsg(). See Documentation/networking/timestamping.rst for details.
  *	User space sees a timespec definition that matches either
  *	__kernel_timespec or __kernel_old_timespec, in the kernel we
  *	require two structure definitions to provide both.
-- 
cgit v1.2.3-59-g8ed1b


From 4ac0b122ee63d89b5aaf2e3e376092d8ac02a567 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Thu, 30 Apr 2020 18:04:32 +0200
Subject: docs: networking: convert tproxy.txt to ReST

- add SPDX header;
- adjust title markup;
- mark code blocks and literals as such;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst  |   1 +
 Documentation/networking/tproxy.rst | 109 ++++++++++++++++++++++++++++++++++++
 Documentation/networking/tproxy.txt | 104 ----------------------------------
 net/netfilter/Kconfig               |   2 +-
 4 files changed, 111 insertions(+), 105 deletions(-)
 create mode 100644 Documentation/networking/tproxy.rst
 delete mode 100644 Documentation/networking/tproxy.txt

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 8f9a84b8e3f2..b423b2db5f96 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -110,6 +110,7 @@ Contents:
    tcp-thin
    team
    timestamping
+   tproxy
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/tproxy.rst b/Documentation/networking/tproxy.rst
new file mode 100644
index 000000000000..00dc3a1a66b4
--- /dev/null
+++ b/Documentation/networking/tproxy.rst
@@ -0,0 +1,109 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========================
+Transparent proxy support
+=========================
+
+This feature adds Linux 2.2-like transparent proxy support to current kernels.
+To use it, enable the socket match and the TPROXY target in your kernel config.
+You will need policy routing too, so be sure to enable that as well.
+
+From Linux 4.18 transparent proxy support is also available in nf_tables.
+
+1. Making non-local sockets work
+================================
+
+The idea is that you identify packets with destination address matching a local
+socket on your box, set the packet mark to a certain value::
+
+    # iptables -t mangle -N DIVERT
+    # iptables -t mangle -A PREROUTING -p tcp -m socket -j DIVERT
+    # iptables -t mangle -A DIVERT -j MARK --set-mark 1
+    # iptables -t mangle -A DIVERT -j ACCEPT
+
+Alternatively you can do this in nft with the following commands::
+
+    # nft add table filter
+    # nft add chain filter divert "{ type filter hook prerouting priority -150; }"
+    # nft add rule filter divert meta l4proto tcp socket transparent 1 meta mark set 1 accept
+
+And then match on that value using policy routing to have those packets
+delivered locally::
+
+    # ip rule add fwmark 1 lookup 100
+    # ip route add local 0.0.0.0/0 dev lo table 100
+
+Because of certain restrictions in the IPv4 routing output code you'll have to
+modify your application to allow it to send datagrams _from_ non-local IP
+addresses. All you have to do is enable the (SOL_IP, IP_TRANSPARENT) socket
+option before calling bind::
+
+    fd = socket(AF_INET, SOCK_STREAM, 0);
+    /* - 8< -*/
+    int value = 1;
+    setsockopt(fd, SOL_IP, IP_TRANSPARENT, &value, sizeof(value));
+    /* - 8< -*/
+    name.sin_family = AF_INET;
+    name.sin_port = htons(0xCAFE);
+    name.sin_addr.s_addr = htonl(0xDEADBEEF);
+    bind(fd, &name, sizeof(name));
+
+A trivial patch for netcat is available here:
+http://people.netfilter.org/hidden/tproxy/netcat-ip_transparent-support.patch
+
+
+2. Redirecting traffic
+======================
+
+Transparent proxying often involves "intercepting" traffic on a router. This is
+usually done with the iptables REDIRECT target; however, there are serious
+limitations of that method. One of the major issues is that it actually
+modifies the packets to change the destination address -- which might not be
+acceptable in certain situations. (Think of proxying UDP for example: you won't
+be able to find out the original destination address. Even in case of TCP
+getting the original destination address is racy.)
+
+The 'TPROXY' target provides similar functionality without relying on NAT. Simply
+add rules like this to the iptables ruleset above::
+
+    # iptables -t mangle -A PREROUTING -p tcp --dport 80 -j TPROXY \
+      --tproxy-mark 0x1/0x1 --on-port 50080
+
+Or the following rule to nft:
+
+# nft add rule filter divert tcp dport 80 tproxy to :50080 meta mark set 1 accept
+
+Note that for this to work you'll have to modify the proxy to enable (SOL_IP,
+IP_TRANSPARENT) for the listening socket.
+
+As an example implementation, tcprdr is available here:
+https://git.breakpoint.cc/cgit/fw/tcprdr.git/
+This tool is written by Florian Westphal and it was used for testing during the
+nf_tables implementation.
+
+3. Iptables and nf_tables extensions
+====================================
+
+To use tproxy you'll need to have the following modules compiled for iptables:
+
+ - NETFILTER_XT_MATCH_SOCKET
+ - NETFILTER_XT_TARGET_TPROXY
+
+Or the floowing modules for nf_tables:
+
+ - NFT_SOCKET
+ - NFT_TPROXY
+
+4. Application support
+======================
+
+4.1. Squid
+----------
+
+Squid 3.HEAD has support built-in. To use it, pass
+'--enable-linux-netfilter' to configure and set the 'tproxy' option on
+the HTTP listener you redirect traffic to with the TPROXY iptables
+target.
+
+For more information please consult the following page on the Squid
+wiki: http://wiki.squid-cache.org/Features/Tproxy4
diff --git a/Documentation/networking/tproxy.txt b/Documentation/networking/tproxy.txt
deleted file mode 100644
index b9a188823d9f..000000000000
--- a/Documentation/networking/tproxy.txt
+++ /dev/null
@@ -1,104 +0,0 @@
-Transparent proxy support
-=========================
-
-This feature adds Linux 2.2-like transparent proxy support to current kernels.
-To use it, enable the socket match and the TPROXY target in your kernel config.
-You will need policy routing too, so be sure to enable that as well.
-
-From Linux 4.18 transparent proxy support is also available in nf_tables.
-
-1. Making non-local sockets work
-================================
-
-The idea is that you identify packets with destination address matching a local
-socket on your box, set the packet mark to a certain value:
-
-# iptables -t mangle -N DIVERT
-# iptables -t mangle -A PREROUTING -p tcp -m socket -j DIVERT
-# iptables -t mangle -A DIVERT -j MARK --set-mark 1
-# iptables -t mangle -A DIVERT -j ACCEPT
-
-Alternatively you can do this in nft with the following commands:
-
-# nft add table filter
-# nft add chain filter divert "{ type filter hook prerouting priority -150; }"
-# nft add rule filter divert meta l4proto tcp socket transparent 1 meta mark set 1 accept
-
-And then match on that value using policy routing to have those packets
-delivered locally:
-
-# ip rule add fwmark 1 lookup 100
-# ip route add local 0.0.0.0/0 dev lo table 100
-
-Because of certain restrictions in the IPv4 routing output code you'll have to
-modify your application to allow it to send datagrams _from_ non-local IP
-addresses. All you have to do is enable the (SOL_IP, IP_TRANSPARENT) socket
-option before calling bind:
-
-fd = socket(AF_INET, SOCK_STREAM, 0);
-/* - 8< -*/
-int value = 1;
-setsockopt(fd, SOL_IP, IP_TRANSPARENT, &value, sizeof(value));
-/* - 8< -*/
-name.sin_family = AF_INET;
-name.sin_port = htons(0xCAFE);
-name.sin_addr.s_addr = htonl(0xDEADBEEF);
-bind(fd, &name, sizeof(name));
-
-A trivial patch for netcat is available here:
-http://people.netfilter.org/hidden/tproxy/netcat-ip_transparent-support.patch
-
-
-2. Redirecting traffic
-======================
-
-Transparent proxying often involves "intercepting" traffic on a router. This is
-usually done with the iptables REDIRECT target; however, there are serious
-limitations of that method. One of the major issues is that it actually
-modifies the packets to change the destination address -- which might not be
-acceptable in certain situations. (Think of proxying UDP for example: you won't
-be able to find out the original destination address. Even in case of TCP
-getting the original destination address is racy.)
-
-The 'TPROXY' target provides similar functionality without relying on NAT. Simply
-add rules like this to the iptables ruleset above:
-
-# iptables -t mangle -A PREROUTING -p tcp --dport 80 -j TPROXY \
-  --tproxy-mark 0x1/0x1 --on-port 50080
-
-Or the following rule to nft:
-
-# nft add rule filter divert tcp dport 80 tproxy to :50080 meta mark set 1 accept
-
-Note that for this to work you'll have to modify the proxy to enable (SOL_IP,
-IP_TRANSPARENT) for the listening socket.
-
-As an example implementation, tcprdr is available here:
-https://git.breakpoint.cc/cgit/fw/tcprdr.git/
-This tool is written by Florian Westphal and it was used for testing during the
-nf_tables implementation.
-
-3. Iptables and nf_tables extensions
-====================================
-
-To use tproxy you'll need to have the following modules compiled for iptables:
- - NETFILTER_XT_MATCH_SOCKET
- - NETFILTER_XT_TARGET_TPROXY
-
-Or the floowing modules for nf_tables:
- - NFT_SOCKET
- - NFT_TPROXY
-
-4. Application support
-======================
-
-4.1. Squid
-----------
-
-Squid 3.HEAD has support built-in. To use it, pass
-'--enable-linux-netfilter' to configure and set the 'tproxy' option on
-the HTTP listener you redirect traffic to with the TPROXY iptables
-target.
-
-For more information please consult the following page on the Squid
-wiki: http://wiki.squid-cache.org/Features/Tproxy4
diff --git a/net/netfilter/Kconfig b/net/netfilter/Kconfig
index 468fea1aebba..3a3915d2e1ea 100644
--- a/net/netfilter/Kconfig
+++ b/net/netfilter/Kconfig
@@ -1043,7 +1043,7 @@ config NETFILTER_XT_TARGET_TPROXY
 	  on Netfilter connection tracking and NAT, unlike REDIRECT.
 	  For it to work you will have to configure certain iptables rules
 	  and use policy routing. For more information on how to set it up
-	  see Documentation/networking/tproxy.txt.
+	  see Documentation/networking/tproxy.rst.
 
 	  To compile it as a module, choose M here.  If unsure, say N.
 
-- 
cgit v1.2.3-59-g8ed1b


From 466010342e89240c45746f65767c7290b96a4b36 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Thu, 30 Apr 2020 20:01:08 +0300
Subject: mlxsw: spectrum_span: Add APIs to get / put a SPAN agent

Given a netdev that packets should be mirrored to, create a SPAN agent
and return its identifier to the caller.

The SPAN agent is reference counted, as multiple tc-mirred actions can
point to the same destination netdev.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_span.c    | 43 ++++++++++++++++++++++
 .../net/ethernet/mellanox/mlxsw/spectrum_span.h    |  4 ++
 2 files changed, 47 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
index ae3c8a1e9a43..c4159f4a66e2 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
@@ -1039,3 +1039,46 @@ void mlxsw_sp_span_respin(struct mlxsw_sp *mlxsw_sp)
 		return;
 	mlxsw_core_schedule_work(&mlxsw_sp->span->work);
 }
+
+int mlxsw_sp_span_agent_get(struct mlxsw_sp *mlxsw_sp,
+			    const struct net_device *to_dev, int *p_span_id)
+{
+	const struct mlxsw_sp_span_entry_ops *ops;
+	struct mlxsw_sp_span_entry *span_entry;
+	struct mlxsw_sp_span_parms sparms;
+	int err;
+
+	ASSERT_RTNL();
+
+	ops = mlxsw_sp_span_entry_ops(mlxsw_sp, to_dev);
+	if (!ops) {
+		dev_err(mlxsw_sp->bus_info->dev, "Cannot mirror to requested destination\n");
+		return -EOPNOTSUPP;
+	}
+
+	memset(&sparms, 0, sizeof(sparms));
+	err = ops->parms_set(to_dev, &sparms);
+	if (err)
+		return err;
+
+	span_entry = mlxsw_sp_span_entry_get(mlxsw_sp, to_dev, ops, sparms);
+	if (!span_entry)
+		return -ENOBUFS;
+
+	*p_span_id = span_entry->id;
+
+	return 0;
+}
+
+void mlxsw_sp_span_agent_put(struct mlxsw_sp *mlxsw_sp, int span_id)
+{
+	struct mlxsw_sp_span_entry *span_entry;
+
+	ASSERT_RTNL();
+
+	span_entry = mlxsw_sp_span_entry_find_by_id(mlxsw_sp, span_id);
+	if (WARN_ON_ONCE(!span_entry))
+		return;
+
+	mlxsw_sp_span_entry_put(mlxsw_sp, span_entry);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
index d23abdf957fa..b79de9a125bb 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
@@ -77,4 +77,8 @@ void mlxsw_sp_span_entry_invalidate(struct mlxsw_sp *mlxsw_sp,
 int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu);
 void mlxsw_sp_span_speed_update_work(struct work_struct *work);
 
+int mlxsw_sp_span_agent_get(struct mlxsw_sp *mlxsw_sp,
+			    const struct net_device *to_dev, int *p_span_id);
+void mlxsw_sp_span_agent_put(struct mlxsw_sp *mlxsw_sp, int span_id);
+
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From ed04458d4a900005d96b991fc0eb938875091047 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Thu, 30 Apr 2020 20:01:09 +0300
Subject: mlxsw: spectrum_span: Add APIs to get / put an analyzed port

An analyzed port is a port whose incoming / outgoing traffic is mirrored
to a SPAN agent and analyzed on a remote server.

A port can be analyzed by multiple tc filters and therefore the
corresponding analyzed port entry needs to be reference counted. This is
significant because ports whose outgoing traffic is analyzed need to
have an egress mirror buffer.

Add APIs to get / put an analyzed port. Allocate an egress mirror buffer
on a port when it is first inspected at egress and free the buffer when
it is no longer inspected at egress.

Protect the list of analyzed ports with a mutex, as a later patch will
traverse it from a context in which RTNL lock is not held.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_span.c    | 136 +++++++++++++++++++++
 .../net/ethernet/mellanox/mlxsw/spectrum_span.h    |   4 +
 2 files changed, 140 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
index c4159f4a66e2..5edf9d1bf937 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
@@ -3,6 +3,7 @@
 
 #include <linux/if_bridge.h>
 #include <linux/list.h>
+#include <linux/mutex.h>
 #include <linux/refcount.h>
 #include <linux/rtnetlink.h>
 #include <linux/workqueue.h>
@@ -20,11 +21,20 @@
 struct mlxsw_sp_span {
 	struct work_struct work;
 	struct mlxsw_sp *mlxsw_sp;
+	struct list_head analyzed_ports_list;
+	struct mutex analyzed_ports_lock; /* Protects analyzed_ports_list */
 	atomic_t active_entries_count;
 	int entries_count;
 	struct mlxsw_sp_span_entry entries[];
 };
 
+struct mlxsw_sp_span_analyzed_port {
+	struct list_head list; /* Member of analyzed_ports_list */
+	refcount_t ref_count;
+	u8 local_port;
+	bool ingress;
+};
+
 static void mlxsw_sp_span_respin_work(struct work_struct *work);
 
 static u64 mlxsw_sp_span_occ_get(void *priv)
@@ -49,6 +59,8 @@ int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp)
 		return -ENOMEM;
 	span->entries_count = entries_count;
 	atomic_set(&span->active_entries_count, 0);
+	mutex_init(&span->analyzed_ports_lock);
+	INIT_LIST_HEAD(&span->analyzed_ports_list);
 	span->mlxsw_sp = mlxsw_sp;
 	mlxsw_sp->span = span;
 
@@ -79,6 +91,8 @@ void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp)
 
 		WARN_ON_ONCE(!list_empty(&curr->bound_ports_list));
 	}
+	WARN_ON_ONCE(!list_empty(&mlxsw_sp->span->analyzed_ports_list));
+	mutex_destroy(&mlxsw_sp->span->analyzed_ports_lock);
 	kfree(mlxsw_sp->span);
 }
 
@@ -1082,3 +1096,125 @@ void mlxsw_sp_span_agent_put(struct mlxsw_sp *mlxsw_sp, int span_id)
 
 	mlxsw_sp_span_entry_put(mlxsw_sp, span_entry);
 }
+
+static struct mlxsw_sp_span_analyzed_port *
+mlxsw_sp_span_analyzed_port_find(struct mlxsw_sp_span *span, u8 local_port,
+				 bool ingress)
+{
+	struct mlxsw_sp_span_analyzed_port *analyzed_port;
+
+	list_for_each_entry(analyzed_port, &span->analyzed_ports_list, list) {
+		if (analyzed_port->local_port == local_port &&
+		    analyzed_port->ingress == ingress)
+			return analyzed_port;
+	}
+
+	return NULL;
+}
+
+static struct mlxsw_sp_span_analyzed_port *
+mlxsw_sp_span_analyzed_port_create(struct mlxsw_sp_span *span,
+				   struct mlxsw_sp_port *mlxsw_sp_port,
+				   bool ingress)
+{
+	struct mlxsw_sp_span_analyzed_port *analyzed_port;
+	int err;
+
+	analyzed_port = kzalloc(sizeof(*analyzed_port), GFP_KERNEL);
+	if (!analyzed_port)
+		return ERR_PTR(-ENOMEM);
+
+	refcount_set(&analyzed_port->ref_count, 1);
+	analyzed_port->local_port = mlxsw_sp_port->local_port;
+	analyzed_port->ingress = ingress;
+	list_add_tail(&analyzed_port->list, &span->analyzed_ports_list);
+
+	/* An egress mirror buffer should be allocated on the egress port which
+	 * does the mirroring.
+	 */
+	if (!ingress) {
+		u16 mtu = mlxsw_sp_port->dev->mtu;
+
+		err = mlxsw_sp_span_port_buffsize_update(mlxsw_sp_port, mtu);
+		if (err)
+			goto err_buffsize_update;
+	}
+
+	return analyzed_port;
+
+err_buffsize_update:
+	list_del(&analyzed_port->list);
+	kfree(analyzed_port);
+	return ERR_PTR(err);
+}
+
+static void
+mlxsw_sp_span_analyzed_port_destroy(struct mlxsw_sp_span *span,
+				    struct mlxsw_sp_span_analyzed_port *
+				    analyzed_port)
+{
+	struct mlxsw_sp *mlxsw_sp = span->mlxsw_sp;
+	char sbib_pl[MLXSW_REG_SBIB_LEN];
+
+	/* Remove egress mirror buffer now that port is no longer analyzed
+	 * at egress.
+	 */
+	if (!analyzed_port->ingress) {
+		mlxsw_reg_sbib_pack(sbib_pl, analyzed_port->local_port, 0);
+		mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
+	}
+
+	list_del(&analyzed_port->list);
+	kfree(analyzed_port);
+}
+
+int mlxsw_sp_span_analyzed_port_get(struct mlxsw_sp_port *mlxsw_sp_port,
+				    bool ingress)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	struct mlxsw_sp_span_analyzed_port *analyzed_port;
+	u8 local_port = mlxsw_sp_port->local_port;
+	int err = 0;
+
+	mutex_lock(&mlxsw_sp->span->analyzed_ports_lock);
+
+	analyzed_port = mlxsw_sp_span_analyzed_port_find(mlxsw_sp->span,
+							 local_port, ingress);
+	if (analyzed_port) {
+		refcount_inc(&analyzed_port->ref_count);
+		goto out_unlock;
+	}
+
+	analyzed_port = mlxsw_sp_span_analyzed_port_create(mlxsw_sp->span,
+							   mlxsw_sp_port,
+							   ingress);
+	if (IS_ERR(analyzed_port))
+		err = PTR_ERR(analyzed_port);
+
+out_unlock:
+	mutex_unlock(&mlxsw_sp->span->analyzed_ports_lock);
+	return err;
+}
+
+void mlxsw_sp_span_analyzed_port_put(struct mlxsw_sp_port *mlxsw_sp_port,
+				     bool ingress)
+{
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	struct mlxsw_sp_span_analyzed_port *analyzed_port;
+	u8 local_port = mlxsw_sp_port->local_port;
+
+	mutex_lock(&mlxsw_sp->span->analyzed_ports_lock);
+
+	analyzed_port = mlxsw_sp_span_analyzed_port_find(mlxsw_sp->span,
+							 local_port, ingress);
+	if (WARN_ON_ONCE(!analyzed_port))
+		goto out_unlock;
+
+	if (!refcount_dec_and_test(&analyzed_port->ref_count))
+		goto out_unlock;
+
+	mlxsw_sp_span_analyzed_port_destroy(mlxsw_sp->span, analyzed_port);
+
+out_unlock:
+	mutex_unlock(&mlxsw_sp->span->analyzed_ports_lock);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
index b79de9a125bb..1345eda5cc34 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
@@ -80,5 +80,9 @@ void mlxsw_sp_span_speed_update_work(struct work_struct *work);
 int mlxsw_sp_span_agent_get(struct mlxsw_sp *mlxsw_sp,
 			    const struct net_device *to_dev, int *p_span_id);
 void mlxsw_sp_span_agent_put(struct mlxsw_sp *mlxsw_sp, int span_id);
+int mlxsw_sp_span_analyzed_port_get(struct mlxsw_sp_port *mlxsw_sp_port,
+				    bool ingress);
+void mlxsw_sp_span_analyzed_port_put(struct mlxsw_sp_port *mlxsw_sp_port,
+				     bool ingress);
 
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From eb773c3a2d98e1e1cb9076419f14ebb0a2f40951 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Thu, 30 Apr 2020 20:01:10 +0300
Subject: mlxsw: spectrum_span: Rename function

Next patch will introduce mlxsw_sp_span_port_buffer_disable() function
that disables the egress buffer on an analyzed port. Rename the opposite
function that updates the buffer on an analyzed port accordingly.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Suggested-by: Petr Machata <petrm@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
index 5edf9d1bf937..c52f79a97f36 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
@@ -784,7 +784,7 @@ static bool mlxsw_sp_span_is_egress_mirror(struct mlxsw_sp_port *port)
 }
 
 static int
-mlxsw_sp_span_port_buffsize_update(struct mlxsw_sp_port *mlxsw_sp_port, u16 mtu)
+mlxsw_sp_span_port_buffer_update(struct mlxsw_sp_port *mlxsw_sp_port, u16 mtu)
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
 	char sbib_pl[MLXSW_REG_SBIB_LEN];
@@ -809,7 +809,7 @@ int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu)
 	 * updated according to the mtu value
 	 */
 	if (mlxsw_sp_span_is_egress_mirror(port))
-		return mlxsw_sp_span_port_buffsize_update(port, mtu);
+		return mlxsw_sp_span_port_buffer_update(port, mtu);
 	return 0;
 }
 
@@ -825,8 +825,8 @@ void mlxsw_sp_span_speed_update_work(struct work_struct *work)
 	 * updated according to the speed value.
 	 */
 	if (mlxsw_sp_span_is_egress_mirror(mlxsw_sp_port))
-		mlxsw_sp_span_port_buffsize_update(mlxsw_sp_port,
-						   mlxsw_sp_port->dev->mtu);
+		mlxsw_sp_span_port_buffer_update(mlxsw_sp_port,
+						 mlxsw_sp_port->dev->mtu);
 }
 
 static struct mlxsw_sp_span_inspected_port *
@@ -888,7 +888,7 @@ mlxsw_sp_span_inspected_port_add(struct mlxsw_sp_port *port,
 
 	/* if it is an egress SPAN, bind a shared buffer to it */
 	if (type == MLXSW_SP_SPAN_EGRESS) {
-		err = mlxsw_sp_span_port_buffsize_update(port, port->dev->mtu);
+		err = mlxsw_sp_span_port_buffer_update(port, port->dev->mtu);
 		if (err)
 			return err;
 	}
@@ -1135,14 +1135,14 @@ mlxsw_sp_span_analyzed_port_create(struct mlxsw_sp_span *span,
 	if (!ingress) {
 		u16 mtu = mlxsw_sp_port->dev->mtu;
 
-		err = mlxsw_sp_span_port_buffsize_update(mlxsw_sp_port, mtu);
+		err = mlxsw_sp_span_port_buffer_update(mlxsw_sp_port, mtu);
 		if (err)
-			goto err_buffsize_update;
+			goto err_buffer_update;
 	}
 
 	return analyzed_port;
 
-err_buffsize_update:
+err_buffer_update:
 	list_del(&analyzed_port->list);
 	kfree(analyzed_port);
 	return ERR_PTR(err);
-- 
cgit v1.2.3-59-g8ed1b


From 14366da6b59203ef5fc3cf21660113b60c2f1421 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Thu, 30 Apr 2020 20:01:11 +0300
Subject: mlxsw: spectrum_span: Wrap buffer change in a function

The code that adjusts the egress buffer size is not symmetric at the
moment. The update is done via a call to
mlxsw_sp_span_port_buffer_update(), but the disablement is done inline
by invoking the write to SBIB register directly.

Wrap the disablement code in mlxsw_sp_span_port_buffer_disable().

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Suggested-by: Petr Machata <petrm@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
index c52f79a97f36..2b9d8ce93b13 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
@@ -803,6 +803,15 @@ mlxsw_sp_span_port_buffer_update(struct mlxsw_sp_port *mlxsw_sp_port, u16 mtu)
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
 }
 
+static void mlxsw_sp_span_port_buffer_disable(struct mlxsw_sp *mlxsw_sp,
+					      u8 local_port)
+{
+	char sbib_pl[MLXSW_REG_SBIB_LEN];
+
+	mlxsw_reg_sbib_pack(sbib_pl, local_port, 0);
+	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
+}
+
 int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu)
 {
 	/* If port is egress mirrored, the shared buffer size should be
@@ -1154,15 +1163,13 @@ mlxsw_sp_span_analyzed_port_destroy(struct mlxsw_sp_span *span,
 				    analyzed_port)
 {
 	struct mlxsw_sp *mlxsw_sp = span->mlxsw_sp;
-	char sbib_pl[MLXSW_REG_SBIB_LEN];
 
 	/* Remove egress mirror buffer now that port is no longer analyzed
 	 * at egress.
 	 */
-	if (!analyzed_port->ingress) {
-		mlxsw_reg_sbib_pack(sbib_pl, analyzed_port->local_port, 0);
-		mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
-	}
+	if (!analyzed_port->ingress)
+		mlxsw_sp_span_port_buffer_disable(mlxsw_sp,
+						  analyzed_port->local_port);
 
 	list_del(&analyzed_port->list);
 	kfree(analyzed_port);
-- 
cgit v1.2.3-59-g8ed1b


From c056618c53a771bacf4e077e8be01de4405439ae Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Thu, 30 Apr 2020 20:01:12 +0300
Subject: mlxsw: spectrum_span: Add APIs to bind / unbind a SPAN agent

Currently, a SPAN agent can only be bound to a per-port trigger where
the trigger is either an incoming packet (INGRESS) or an outgoing packet
(EGRESS) to / from the port.

A follow-up patch set will introduce the concept of global triggers and
per-{port, TC} enablement. With global triggers, the trigger entry is
only keyed by a trigger and not by a port and a trigger. The trigger can
be, for example, a packet that was early dropped.

While the binding between the SPAN agent and the trigger is performed
only once, the trigger entry needs to be reference counted, as the
trigger can be enabled on multiple ports.

Add APIs to bind / unbind a SPAN agent to a trigger and reference count
the trigger entry in preparation for global triggers.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_span.c    | 169 +++++++++++++++++++++
 .../net/ethernet/mellanox/mlxsw/spectrum_span.h    |  18 +++
 2 files changed, 187 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
index 2b9d8ce93b13..de9012ab94d5 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
@@ -23,6 +23,7 @@ struct mlxsw_sp_span {
 	struct mlxsw_sp *mlxsw_sp;
 	struct list_head analyzed_ports_list;
 	struct mutex analyzed_ports_lock; /* Protects analyzed_ports_list */
+	struct list_head trigger_entries_list;
 	atomic_t active_entries_count;
 	int entries_count;
 	struct mlxsw_sp_span_entry entries[];
@@ -35,6 +36,14 @@ struct mlxsw_sp_span_analyzed_port {
 	bool ingress;
 };
 
+struct mlxsw_sp_span_trigger_entry {
+	struct list_head list; /* Member of trigger_entries_list */
+	refcount_t ref_count;
+	u8 local_port;
+	enum mlxsw_sp_span_trigger trigger;
+	struct mlxsw_sp_span_trigger_parms parms;
+};
+
 static void mlxsw_sp_span_respin_work(struct work_struct *work);
 
 static u64 mlxsw_sp_span_occ_get(void *priv)
@@ -61,6 +70,7 @@ int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp)
 	atomic_set(&span->active_entries_count, 0);
 	mutex_init(&span->analyzed_ports_lock);
 	INIT_LIST_HEAD(&span->analyzed_ports_list);
+	INIT_LIST_HEAD(&span->trigger_entries_list);
 	span->mlxsw_sp = mlxsw_sp;
 	mlxsw_sp->span = span;
 
@@ -91,6 +101,7 @@ void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp)
 
 		WARN_ON_ONCE(!list_empty(&curr->bound_ports_list));
 	}
+	WARN_ON_ONCE(!list_empty(&mlxsw_sp->span->trigger_entries_list));
 	WARN_ON_ONCE(!list_empty(&mlxsw_sp->span->analyzed_ports_list));
 	mutex_destroy(&mlxsw_sp->span->analyzed_ports_lock);
 	kfree(mlxsw_sp->span);
@@ -1225,3 +1236,161 @@ void mlxsw_sp_span_analyzed_port_put(struct mlxsw_sp_port *mlxsw_sp_port,
 out_unlock:
 	mutex_unlock(&mlxsw_sp->span->analyzed_ports_lock);
 }
+
+static int
+__mlxsw_sp_span_trigger_entry_bind(struct mlxsw_sp_span *span,
+				   struct mlxsw_sp_span_trigger_entry *
+				   trigger_entry, bool enable)
+{
+	char mpar_pl[MLXSW_REG_MPAR_LEN];
+	enum mlxsw_reg_mpar_i_e i_e;
+
+	switch (trigger_entry->trigger) {
+	case MLXSW_SP_SPAN_TRIGGER_INGRESS:
+		i_e = MLXSW_REG_MPAR_TYPE_INGRESS;
+		break;
+	case MLXSW_SP_SPAN_TRIGGER_EGRESS:
+		i_e = MLXSW_REG_MPAR_TYPE_EGRESS;
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		return -EINVAL;
+	}
+
+	mlxsw_reg_mpar_pack(mpar_pl, trigger_entry->local_port, i_e, enable,
+			    trigger_entry->parms.span_id);
+	return mlxsw_reg_write(span->mlxsw_sp->core, MLXSW_REG(mpar), mpar_pl);
+}
+
+static int
+mlxsw_sp_span_trigger_entry_bind(struct mlxsw_sp_span *span,
+				 struct mlxsw_sp_span_trigger_entry *
+				 trigger_entry)
+{
+	return __mlxsw_sp_span_trigger_entry_bind(span, trigger_entry, true);
+}
+
+static void
+mlxsw_sp_span_trigger_entry_unbind(struct mlxsw_sp_span *span,
+				   struct mlxsw_sp_span_trigger_entry *
+				   trigger_entry)
+{
+	__mlxsw_sp_span_trigger_entry_bind(span, trigger_entry, false);
+}
+
+static struct mlxsw_sp_span_trigger_entry *
+mlxsw_sp_span_trigger_entry_create(struct mlxsw_sp_span *span,
+				   enum mlxsw_sp_span_trigger trigger,
+				   struct mlxsw_sp_port *mlxsw_sp_port,
+				   const struct mlxsw_sp_span_trigger_parms
+				   *parms)
+{
+	struct mlxsw_sp_span_trigger_entry *trigger_entry;
+	int err;
+
+	trigger_entry = kzalloc(sizeof(*trigger_entry), GFP_KERNEL);
+	if (!trigger_entry)
+		return ERR_PTR(-ENOMEM);
+
+	refcount_set(&trigger_entry->ref_count, 1);
+	trigger_entry->local_port = mlxsw_sp_port->local_port;
+	trigger_entry->trigger = trigger;
+	memcpy(&trigger_entry->parms, parms, sizeof(trigger_entry->parms));
+	list_add_tail(&trigger_entry->list, &span->trigger_entries_list);
+
+	err = mlxsw_sp_span_trigger_entry_bind(span, trigger_entry);
+	if (err)
+		goto err_trigger_entry_bind;
+
+	return trigger_entry;
+
+err_trigger_entry_bind:
+	list_del(&trigger_entry->list);
+	kfree(trigger_entry);
+	return ERR_PTR(err);
+}
+
+static void
+mlxsw_sp_span_trigger_entry_destroy(struct mlxsw_sp_span *span,
+				    struct mlxsw_sp_span_trigger_entry *
+				    trigger_entry)
+{
+	mlxsw_sp_span_trigger_entry_unbind(span, trigger_entry);
+	list_del(&trigger_entry->list);
+	kfree(trigger_entry);
+}
+
+static struct mlxsw_sp_span_trigger_entry *
+mlxsw_sp_span_trigger_entry_find(struct mlxsw_sp_span *span,
+				 enum mlxsw_sp_span_trigger trigger,
+				 struct mlxsw_sp_port *mlxsw_sp_port)
+{
+	struct mlxsw_sp_span_trigger_entry *trigger_entry;
+
+	list_for_each_entry(trigger_entry, &span->trigger_entries_list, list) {
+		if (trigger_entry->trigger == trigger &&
+		    trigger_entry->local_port == mlxsw_sp_port->local_port)
+			return trigger_entry;
+	}
+
+	return NULL;
+}
+
+int mlxsw_sp_span_agent_bind(struct mlxsw_sp *mlxsw_sp,
+			     enum mlxsw_sp_span_trigger trigger,
+			     struct mlxsw_sp_port *mlxsw_sp_port,
+			     const struct mlxsw_sp_span_trigger_parms *parms)
+{
+	struct mlxsw_sp_span_trigger_entry *trigger_entry;
+	int err = 0;
+
+	ASSERT_RTNL();
+
+	if (!mlxsw_sp_span_entry_find_by_id(mlxsw_sp, parms->span_id))
+		return -EINVAL;
+
+	trigger_entry = mlxsw_sp_span_trigger_entry_find(mlxsw_sp->span,
+							 trigger,
+							 mlxsw_sp_port);
+	if (trigger_entry) {
+		if (trigger_entry->parms.span_id != parms->span_id)
+			return -EINVAL;
+		refcount_inc(&trigger_entry->ref_count);
+		goto out;
+	}
+
+	trigger_entry = mlxsw_sp_span_trigger_entry_create(mlxsw_sp->span,
+							   trigger,
+							   mlxsw_sp_port,
+							   parms);
+	if (IS_ERR(trigger_entry))
+		err = PTR_ERR(trigger_entry);
+
+out:
+	return err;
+}
+
+void mlxsw_sp_span_agent_unbind(struct mlxsw_sp *mlxsw_sp,
+				enum mlxsw_sp_span_trigger trigger,
+				struct mlxsw_sp_port *mlxsw_sp_port,
+				const struct mlxsw_sp_span_trigger_parms *parms)
+{
+	struct mlxsw_sp_span_trigger_entry *trigger_entry;
+
+	ASSERT_RTNL();
+
+	if (WARN_ON_ONCE(!mlxsw_sp_span_entry_find_by_id(mlxsw_sp,
+							 parms->span_id)))
+		return;
+
+	trigger_entry = mlxsw_sp_span_trigger_entry_find(mlxsw_sp->span,
+							 trigger,
+							 mlxsw_sp_port);
+	if (WARN_ON_ONCE(!trigger_entry))
+		return;
+
+	if (!refcount_dec_and_test(&trigger_entry->ref_count))
+		return;
+
+	mlxsw_sp_span_trigger_entry_destroy(mlxsw_sp->span, trigger_entry);
+}
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
index 1345eda5cc34..6821eeb3906b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
@@ -37,6 +37,15 @@ struct mlxsw_sp_span_parms {
 	u16 vid;
 };
 
+enum mlxsw_sp_span_trigger {
+	MLXSW_SP_SPAN_TRIGGER_INGRESS,
+	MLXSW_SP_SPAN_TRIGGER_EGRESS,
+};
+
+struct mlxsw_sp_span_trigger_parms {
+	int span_id;
+};
+
 struct mlxsw_sp_span_entry_ops;
 
 struct mlxsw_sp_span_entry {
@@ -84,5 +93,14 @@ int mlxsw_sp_span_analyzed_port_get(struct mlxsw_sp_port *mlxsw_sp_port,
 				    bool ingress);
 void mlxsw_sp_span_analyzed_port_put(struct mlxsw_sp_port *mlxsw_sp_port,
 				     bool ingress);
+int mlxsw_sp_span_agent_bind(struct mlxsw_sp *mlxsw_sp,
+			     enum mlxsw_sp_span_trigger trigger,
+			     struct mlxsw_sp_port *mlxsw_sp_port,
+			     const struct mlxsw_sp_span_trigger_parms *parms);
+void
+mlxsw_sp_span_agent_unbind(struct mlxsw_sp *mlxsw_sp,
+			   enum mlxsw_sp_span_trigger trigger,
+			   struct mlxsw_sp_port *mlxsw_sp_port,
+			   const struct mlxsw_sp_span_trigger_parms *parms);
 
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From c1d7845dfbd3dfeed2cd7bf92ec10ea97ef5d7fd Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Thu, 30 Apr 2020 20:01:13 +0300
Subject: mlxsw: spectrum: Convert matchall-based mirroring to new SPAN API

In matchall-based mirroring, mirroring is not done with ACLs, but a SPAN
agent is bound to the ingress / egress of a port and all incoming /
outgoing traffic is mirrored.

Convert this type of mirroring to use the new API.

First the SPAN agent is resolved, then the port is marked as analyzed
and its egress mirror buffer is potentially allocated. Lastly, the
binding is performed.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/mellanox/mlxsw/spectrum_matchall.c    | 52 ++++++++++++++++------
 1 file changed, 39 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
index 889da63072be..da1c05f44cec 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
@@ -48,31 +48,57 @@ static int
 mlxsw_sp_mall_port_mirror_add(struct mlxsw_sp_port *mlxsw_sp_port,
 			      struct mlxsw_sp_mall_entry *mall_entry)
 {
-	enum mlxsw_sp_span_type span_type;
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	struct mlxsw_sp_span_trigger_parms parms;
+	enum mlxsw_sp_span_trigger trigger;
+	int err;
 
 	if (!mall_entry->mirror.to_dev) {
 		netdev_err(mlxsw_sp_port->dev, "Could not find requested device\n");
 		return -EINVAL;
 	}
 
-	span_type = mall_entry->ingress ? MLXSW_SP_SPAN_INGRESS :
-					  MLXSW_SP_SPAN_EGRESS;
-	return mlxsw_sp_span_mirror_add(mlxsw_sp_port,
-					mall_entry->mirror.to_dev,
-					span_type, true,
-					&mall_entry->mirror.span_id);
+	err = mlxsw_sp_span_agent_get(mlxsw_sp, mall_entry->mirror.to_dev,
+				      &mall_entry->mirror.span_id);
+	if (err)
+		return err;
+
+	err = mlxsw_sp_span_analyzed_port_get(mlxsw_sp_port,
+					      mall_entry->ingress);
+	if (err)
+		goto err_analyzed_port_get;
+
+	trigger = mall_entry->ingress ? MLXSW_SP_SPAN_TRIGGER_INGRESS :
+					MLXSW_SP_SPAN_TRIGGER_EGRESS;
+	parms.span_id = mall_entry->mirror.span_id;
+	err = mlxsw_sp_span_agent_bind(mlxsw_sp, trigger, mlxsw_sp_port,
+				       &parms);
+	if (err)
+		goto err_agent_bind;
+
+	return 0;
+
+err_agent_bind:
+	mlxsw_sp_span_analyzed_port_put(mlxsw_sp_port, mall_entry->ingress);
+err_analyzed_port_get:
+	mlxsw_sp_span_agent_put(mlxsw_sp, mall_entry->mirror.span_id);
+	return err;
 }
 
 static void
 mlxsw_sp_mall_port_mirror_del(struct mlxsw_sp_port *mlxsw_sp_port,
 			      struct mlxsw_sp_mall_entry *mall_entry)
 {
-	enum mlxsw_sp_span_type span_type;
-
-	span_type = mall_entry->ingress ? MLXSW_SP_SPAN_INGRESS :
-					  MLXSW_SP_SPAN_EGRESS;
-	mlxsw_sp_span_mirror_del(mlxsw_sp_port, mall_entry->mirror.span_id,
-				 span_type, true);
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	struct mlxsw_sp_span_trigger_parms parms;
+	enum mlxsw_sp_span_trigger trigger;
+
+	trigger = mall_entry->ingress ? MLXSW_SP_SPAN_TRIGGER_INGRESS :
+					MLXSW_SP_SPAN_TRIGGER_EGRESS;
+	parms.span_id = mall_entry->mirror.span_id;
+	mlxsw_sp_span_agent_unbind(mlxsw_sp, trigger, mlxsw_sp_port, &parms);
+	mlxsw_sp_span_analyzed_port_put(mlxsw_sp_port, mall_entry->ingress);
+	mlxsw_sp_span_agent_put(mlxsw_sp, mall_entry->mirror.span_id);
 }
 
 static int mlxsw_sp_mall_port_sample_set(struct mlxsw_sp_port *mlxsw_sp_port,
-- 
cgit v1.2.3-59-g8ed1b


From 7240db69c332f6625da044b048d06839ab0c2649 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Thu, 30 Apr 2020 20:01:14 +0300
Subject: mlxsw: spectrum_acl: Convert flower-based mirroring to new SPAN API

In flower-based mirroring, mirroring is done with ACLs and the SPAN
agent is not bound to a port. Instead its identifier is specified in an
ACL action.

Convert this type of mirroring to use the new API.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../mellanox/mlxsw/spectrum_acl_flex_actions.c     | 31 +++++++++++++---------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
index e47d1d286e93..73d56012654b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_flex_actions.c
@@ -136,28 +136,35 @@ mlxsw_sp_act_mirror_add(void *priv, u8 local_in_port,
 			const struct net_device *out_dev,
 			bool ingress, int *p_span_id)
 {
-	struct mlxsw_sp_port *in_port;
+	struct mlxsw_sp_port *mlxsw_sp_port;
 	struct mlxsw_sp *mlxsw_sp = priv;
-	enum mlxsw_sp_span_type type;
+	int err;
 
-	type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
-	in_port = mlxsw_sp->ports[local_in_port];
+	err = mlxsw_sp_span_agent_get(mlxsw_sp, out_dev, p_span_id);
+	if (err)
+		return err;
+
+	mlxsw_sp_port = mlxsw_sp->ports[local_in_port];
+	err = mlxsw_sp_span_analyzed_port_get(mlxsw_sp_port, ingress);
+	if (err)
+		goto err_analyzed_port_get;
 
-	return mlxsw_sp_span_mirror_add(in_port, out_dev, type,
-					false, p_span_id);
+	return 0;
+
+err_analyzed_port_get:
+	mlxsw_sp_span_agent_put(mlxsw_sp, *p_span_id);
+	return err;
 }
 
 static void
 mlxsw_sp_act_mirror_del(void *priv, u8 local_in_port, int span_id, bool ingress)
 {
+	struct mlxsw_sp_port *mlxsw_sp_port;
 	struct mlxsw_sp *mlxsw_sp = priv;
-	struct mlxsw_sp_port *in_port;
-	enum mlxsw_sp_span_type type;
-
-	type = ingress ? MLXSW_SP_SPAN_INGRESS : MLXSW_SP_SPAN_EGRESS;
-	in_port = mlxsw_sp->ports[local_in_port];
 
-	mlxsw_sp_span_mirror_del(in_port, span_id, type, false);
+	mlxsw_sp_port = mlxsw_sp->ports[local_in_port];
+	mlxsw_sp_span_analyzed_port_put(mlxsw_sp_port, ingress);
+	mlxsw_sp_span_agent_put(mlxsw_sp, span_id);
 }
 
 const struct mlxsw_afa_ops mlxsw_sp1_act_afa_ops = {
-- 
cgit v1.2.3-59-g8ed1b


From 835d6b8c1a35eafba3faaf4b809f6d84517467f1 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Thu, 30 Apr 2020 20:01:15 +0300
Subject: mlxsw: spectrum_span: Use new analyzed ports list during speed / MTU
 change

As previously explained, each port whose outgoing traffic is analyzed
needs to have an egress mirror buffer.

The size of the egress mirror buffer is calculated based on various
parameters, two of which are the speed and the MTU of the port.

Therefore, when the MTU or the speed of a port change, the SPAN code is
called to see if the egress mirror buffer of the port needs to be
adjusted.

Currently, this is done by traversing all the SPAN agents and for each
SPAN agent the list of bound ports is traversed.

Instead of the above, traverse the recently added list of analyzed
ports.

This will later allow us to remove the old SPAN API.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_span.c    | 72 +++++++++++-----------
 1 file changed, 35 insertions(+), 37 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
index de9012ab94d5..9cb8b509b849 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
@@ -776,24 +776,6 @@ static int mlxsw_sp_span_entry_put(struct mlxsw_sp *mlxsw_sp,
 	return 0;
 }
 
-static bool mlxsw_sp_span_is_egress_mirror(struct mlxsw_sp_port *port)
-{
-	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
-	struct mlxsw_sp_span_inspected_port *p;
-	int i;
-
-	for (i = 0; i < mlxsw_sp->span->entries_count; i++) {
-		struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span->entries[i];
-
-		list_for_each_entry(p, &curr->bound_ports_list, list)
-			if (p->local_port == port->local_port &&
-			    p->type == MLXSW_SP_SPAN_EGRESS)
-				return true;
-	}
-
-	return false;
-}
-
 static int
 mlxsw_sp_span_port_buffer_update(struct mlxsw_sp_port *mlxsw_sp_port, u16 mtu)
 {
@@ -823,20 +805,45 @@ static void mlxsw_sp_span_port_buffer_disable(struct mlxsw_sp *mlxsw_sp,
 	mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
 }
 
+static struct mlxsw_sp_span_analyzed_port *
+mlxsw_sp_span_analyzed_port_find(struct mlxsw_sp_span *span, u8 local_port,
+				 bool ingress)
+{
+	struct mlxsw_sp_span_analyzed_port *analyzed_port;
+
+	list_for_each_entry(analyzed_port, &span->analyzed_ports_list, list) {
+		if (analyzed_port->local_port == local_port &&
+		    analyzed_port->ingress == ingress)
+			return analyzed_port;
+	}
+
+	return NULL;
+}
+
 int mlxsw_sp_span_port_mtu_update(struct mlxsw_sp_port *port, u16 mtu)
 {
+	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
+	int err = 0;
+
 	/* If port is egress mirrored, the shared buffer size should be
 	 * updated according to the mtu value
 	 */
-	if (mlxsw_sp_span_is_egress_mirror(port))
-		return mlxsw_sp_span_port_buffer_update(port, mtu);
-	return 0;
+	mutex_lock(&mlxsw_sp->span->analyzed_ports_lock);
+
+	if (mlxsw_sp_span_analyzed_port_find(mlxsw_sp->span, port->local_port,
+					     false))
+		err = mlxsw_sp_span_port_buffer_update(port, mtu);
+
+	mutex_unlock(&mlxsw_sp->span->analyzed_ports_lock);
+
+	return err;
 }
 
 void mlxsw_sp_span_speed_update_work(struct work_struct *work)
 {
 	struct delayed_work *dwork = to_delayed_work(work);
 	struct mlxsw_sp_port *mlxsw_sp_port;
+	struct mlxsw_sp *mlxsw_sp;
 
 	mlxsw_sp_port = container_of(dwork, struct mlxsw_sp_port,
 				     span.speed_update_dw);
@@ -844,9 +851,15 @@ void mlxsw_sp_span_speed_update_work(struct work_struct *work)
 	/* If port is egress mirrored, the shared buffer size should be
 	 * updated according to the speed value.
 	 */
-	if (mlxsw_sp_span_is_egress_mirror(mlxsw_sp_port))
+	mlxsw_sp = mlxsw_sp_port->mlxsw_sp;
+	mutex_lock(&mlxsw_sp->span->analyzed_ports_lock);
+
+	if (mlxsw_sp_span_analyzed_port_find(mlxsw_sp->span,
+					     mlxsw_sp_port->local_port, false))
 		mlxsw_sp_span_port_buffer_update(mlxsw_sp_port,
 						 mlxsw_sp_port->dev->mtu);
+
+	mutex_unlock(&mlxsw_sp->span->analyzed_ports_lock);
 }
 
 static struct mlxsw_sp_span_inspected_port *
@@ -1117,21 +1130,6 @@ void mlxsw_sp_span_agent_put(struct mlxsw_sp *mlxsw_sp, int span_id)
 	mlxsw_sp_span_entry_put(mlxsw_sp, span_entry);
 }
 
-static struct mlxsw_sp_span_analyzed_port *
-mlxsw_sp_span_analyzed_port_find(struct mlxsw_sp_span *span, u8 local_port,
-				 bool ingress)
-{
-	struct mlxsw_sp_span_analyzed_port *analyzed_port;
-
-	list_for_each_entry(analyzed_port, &span->analyzed_ports_list, list) {
-		if (analyzed_port->local_port == local_port &&
-		    analyzed_port->ingress == ingress)
-			return analyzed_port;
-	}
-
-	return NULL;
-}
-
 static struct mlxsw_sp_span_analyzed_port *
 mlxsw_sp_span_analyzed_port_create(struct mlxsw_sp_span *span,
 				   struct mlxsw_sp_port *mlxsw_sp_port,
-- 
cgit v1.2.3-59-g8ed1b


From ca0892235ae68b097f42cd39e03b17cfead7c7c7 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Thu, 30 Apr 2020 20:01:16 +0300
Subject: mlxsw: spectrum_span: Remove old SPAN API

Remove the old SPAN API now that matchall-based and flower-based
mirroring were converted to use the new API.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_span.c    | 190 +--------------------
 .../net/ethernet/mellanox/mlxsw/spectrum_span.h    |  21 ---
 2 files changed, 2 insertions(+), 209 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
index 9cb8b509b849..304eb8c3d8bd 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.c
@@ -74,12 +74,8 @@ int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp)
 	span->mlxsw_sp = mlxsw_sp;
 	mlxsw_sp->span = span;
 
-	for (i = 0; i < mlxsw_sp->span->entries_count; i++) {
-		struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span->entries[i];
-
-		INIT_LIST_HEAD(&curr->bound_ports_list);
-		curr->id = i;
-	}
+	for (i = 0; i < mlxsw_sp->span->entries_count; i++)
+		mlxsw_sp->span->entries[i].id = i;
 
 	devlink_resource_occ_get_register(devlink, MLXSW_SP_RESOURCE_SPAN,
 					  mlxsw_sp_span_occ_get, mlxsw_sp);
@@ -91,16 +87,10 @@ int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp)
 void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp)
 {
 	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
-	int i;
 
 	cancel_work_sync(&mlxsw_sp->span->work);
 	devlink_resource_occ_get_unregister(devlink, MLXSW_SP_RESOURCE_SPAN);
 
-	for (i = 0; i < mlxsw_sp->span->entries_count; i++) {
-		struct mlxsw_sp_span_entry *curr = &mlxsw_sp->span->entries[i];
-
-		WARN_ON_ONCE(!list_empty(&curr->bound_ports_list));
-	}
 	WARN_ON_ONCE(!list_empty(&mlxsw_sp->span->trigger_entries_list));
 	WARN_ON_ONCE(!list_empty(&mlxsw_sp->span->analyzed_ports_list));
 	mutex_destroy(&mlxsw_sp->span->analyzed_ports_lock);
@@ -862,131 +852,6 @@ void mlxsw_sp_span_speed_update_work(struct work_struct *work)
 	mutex_unlock(&mlxsw_sp->span->analyzed_ports_lock);
 }
 
-static struct mlxsw_sp_span_inspected_port *
-mlxsw_sp_span_entry_bound_port_find(struct mlxsw_sp_span_entry *span_entry,
-				    enum mlxsw_sp_span_type type,
-				    struct mlxsw_sp_port *port,
-				    bool bind)
-{
-	struct mlxsw_sp_span_inspected_port *p;
-
-	list_for_each_entry(p, &span_entry->bound_ports_list, list)
-		if (type == p->type &&
-		    port->local_port == p->local_port &&
-		    bind == p->bound)
-			return p;
-	return NULL;
-}
-
-static int
-mlxsw_sp_span_inspected_port_bind(struct mlxsw_sp_port *port,
-				  struct mlxsw_sp_span_entry *span_entry,
-				  enum mlxsw_sp_span_type type,
-				  bool bind)
-{
-	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
-	char mpar_pl[MLXSW_REG_MPAR_LEN];
-	int pa_id = span_entry->id;
-
-	/* bind the port to the SPAN entry */
-	mlxsw_reg_mpar_pack(mpar_pl, port->local_port,
-			    (enum mlxsw_reg_mpar_i_e)type, bind, pa_id);
-	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(mpar), mpar_pl);
-}
-
-static int
-mlxsw_sp_span_inspected_port_add(struct mlxsw_sp_port *port,
-				 struct mlxsw_sp_span_entry *span_entry,
-				 enum mlxsw_sp_span_type type,
-				 bool bind)
-{
-	struct mlxsw_sp_span_inspected_port *inspected_port;
-	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
-	char sbib_pl[MLXSW_REG_SBIB_LEN];
-	int i;
-	int err;
-
-	/* A given (source port, direction) can only be bound to one analyzer,
-	 * so if a binding is requested, check for conflicts.
-	 */
-	if (bind)
-		for (i = 0; i < mlxsw_sp->span->entries_count; i++) {
-			struct mlxsw_sp_span_entry *curr =
-				&mlxsw_sp->span->entries[i];
-
-			if (mlxsw_sp_span_entry_bound_port_find(curr, type,
-								port, bind))
-				return -EEXIST;
-		}
-
-	/* if it is an egress SPAN, bind a shared buffer to it */
-	if (type == MLXSW_SP_SPAN_EGRESS) {
-		err = mlxsw_sp_span_port_buffer_update(port, port->dev->mtu);
-		if (err)
-			return err;
-	}
-
-	if (bind) {
-		err = mlxsw_sp_span_inspected_port_bind(port, span_entry, type,
-							true);
-		if (err)
-			goto err_port_bind;
-	}
-
-	inspected_port = kzalloc(sizeof(*inspected_port), GFP_KERNEL);
-	if (!inspected_port) {
-		err = -ENOMEM;
-		goto err_inspected_port_alloc;
-	}
-	inspected_port->local_port = port->local_port;
-	inspected_port->type = type;
-	inspected_port->bound = bind;
-	list_add_tail(&inspected_port->list, &span_entry->bound_ports_list);
-
-	return 0;
-
-err_inspected_port_alloc:
-	if (bind)
-		mlxsw_sp_span_inspected_port_bind(port, span_entry, type,
-						  false);
-err_port_bind:
-	if (type == MLXSW_SP_SPAN_EGRESS) {
-		mlxsw_reg_sbib_pack(sbib_pl, port->local_port, 0);
-		mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
-	}
-	return err;
-}
-
-static void
-mlxsw_sp_span_inspected_port_del(struct mlxsw_sp_port *port,
-				 struct mlxsw_sp_span_entry *span_entry,
-				 enum mlxsw_sp_span_type type,
-				 bool bind)
-{
-	struct mlxsw_sp_span_inspected_port *inspected_port;
-	struct mlxsw_sp *mlxsw_sp = port->mlxsw_sp;
-	char sbib_pl[MLXSW_REG_SBIB_LEN];
-
-	inspected_port = mlxsw_sp_span_entry_bound_port_find(span_entry, type,
-							     port, bind);
-	if (!inspected_port)
-		return;
-
-	if (bind)
-		mlxsw_sp_span_inspected_port_bind(port, span_entry, type,
-						  false);
-	/* remove the SBIB buffer if it was egress SPAN */
-	if (type == MLXSW_SP_SPAN_EGRESS) {
-		mlxsw_reg_sbib_pack(sbib_pl, port->local_port, 0);
-		mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(sbib), sbib_pl);
-	}
-
-	mlxsw_sp_span_entry_put(mlxsw_sp, span_entry);
-
-	list_del(&inspected_port->list);
-	kfree(inspected_port);
-}
-
 static const struct mlxsw_sp_span_entry_ops *
 mlxsw_sp_span_entry_ops(struct mlxsw_sp *mlxsw_sp,
 			const struct net_device *to_dev)
@@ -1000,57 +865,6 @@ mlxsw_sp_span_entry_ops(struct mlxsw_sp *mlxsw_sp,
 	return NULL;
 }
 
-int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
-			     const struct net_device *to_dev,
-			     enum mlxsw_sp_span_type type, bool bind,
-			     int *p_span_id)
-{
-	struct mlxsw_sp *mlxsw_sp = from->mlxsw_sp;
-	const struct mlxsw_sp_span_entry_ops *ops;
-	struct mlxsw_sp_span_parms sparms = {NULL};
-	struct mlxsw_sp_span_entry *span_entry;
-	int err;
-
-	ops = mlxsw_sp_span_entry_ops(mlxsw_sp, to_dev);
-	if (!ops) {
-		netdev_err(to_dev, "Cannot mirror to %s", to_dev->name);
-		return -EOPNOTSUPP;
-	}
-
-	err = ops->parms_set(to_dev, &sparms);
-	if (err)
-		return err;
-
-	span_entry = mlxsw_sp_span_entry_get(mlxsw_sp, to_dev, ops, sparms);
-	if (!span_entry)
-		return -ENOBUFS;
-
-	err = mlxsw_sp_span_inspected_port_add(from, span_entry, type, bind);
-	if (err)
-		goto err_port_bind;
-
-	*p_span_id = span_entry->id;
-	return 0;
-
-err_port_bind:
-	mlxsw_sp_span_entry_put(mlxsw_sp, span_entry);
-	return err;
-}
-
-void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, int span_id,
-			      enum mlxsw_sp_span_type type, bool bind)
-{
-	struct mlxsw_sp_span_entry *span_entry;
-
-	span_entry = mlxsw_sp_span_entry_find_by_id(from->mlxsw_sp, span_id);
-	if (!span_entry) {
-		netdev_err(from->dev, "no span entry found\n");
-		return;
-	}
-
-	mlxsw_sp_span_inspected_port_del(from, span_entry, type, bind);
-}
-
 static void mlxsw_sp_span_respin_work(struct work_struct *work)
 {
 	struct mlxsw_sp_span *span;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
index 6821eeb3906b..9f6dd2d0f4e6 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_span.h
@@ -13,20 +13,6 @@
 struct mlxsw_sp;
 struct mlxsw_sp_port;
 
-enum mlxsw_sp_span_type {
-	MLXSW_SP_SPAN_EGRESS,
-	MLXSW_SP_SPAN_INGRESS
-};
-
-struct mlxsw_sp_span_inspected_port {
-	struct list_head list;
-	enum mlxsw_sp_span_type type;
-	u8 local_port;
-
-	/* Whether this is a directly bound mirror (port-to-port) or an ACL. */
-	bool bound;
-};
-
 struct mlxsw_sp_span_parms {
 	struct mlxsw_sp_port *dest_port; /* NULL for unoffloaded SPAN. */
 	unsigned int ttl;
@@ -52,7 +38,6 @@ struct mlxsw_sp_span_entry {
 	const struct net_device *to_dev;
 	const struct mlxsw_sp_span_entry_ops *ops;
 	struct mlxsw_sp_span_parms parms;
-	struct list_head bound_ports_list;
 	refcount_t ref_count;
 	int id;
 };
@@ -70,12 +55,6 @@ int mlxsw_sp_span_init(struct mlxsw_sp *mlxsw_sp);
 void mlxsw_sp_span_fini(struct mlxsw_sp *mlxsw_sp);
 void mlxsw_sp_span_respin(struct mlxsw_sp *mlxsw_sp);
 
-int mlxsw_sp_span_mirror_add(struct mlxsw_sp_port *from,
-			     const struct net_device *to_dev,
-			     enum mlxsw_sp_span_type type,
-			     bool bind, int *p_span_id);
-void mlxsw_sp_span_mirror_del(struct mlxsw_sp_port *from, int span_id,
-			      enum mlxsw_sp_span_type type, bool bind);
 struct mlxsw_sp_span_entry *
 mlxsw_sp_span_entry_find_by_port(struct mlxsw_sp *mlxsw_sp,
 				 const struct net_device *to_dev);
-- 
cgit v1.2.3-59-g8ed1b


From 2b195850128f5bafde177b12489d9fa27962cc1e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 30 Apr 2020 10:35:41 -0700
Subject: tcp: add tp->dup_ack_counter

In commit 86de5921a3d5 ("tcp: defer SACK compression after DupThresh")
I added a TCP_FASTRETRANS_THRESH bias to tp->compressed_ack in order
to enable sack compression only after 3 dupacks.

Since we plan to relax this rule for flows that involve
stacks not requiring this old rule, this patch adds
a distinct tp->dup_ack_counter.

This means the TCP_FASTRETRANS_THRESH value is now used
in a single location that a future patch can adjust:

	if (tp->dup_ack_counter < TCP_FASTRETRANS_THRESH) {
		tp->dup_ack_counter++;
		goto send_now;
	}

This patch also introduces tcp_sack_compress_send_ack()
helper to ease following patch comprehension.

This patch refines LINUX_MIB_TCPACKCOMPRESSED to not
count the acks that we had to send if the timer expires
or tcp_sack_compress_send_ack() is sending an ack.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h   |  1 +
 net/ipv4/tcp_input.c  | 36 +++++++++++++++++++++++++++---------
 net/ipv4/tcp_output.c |  6 +++---
 net/ipv4/tcp_timer.c  |  8 +++++++-
 4 files changed, 38 insertions(+), 13 deletions(-)

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 421c99c12291..2c6f87e9f0cf 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -268,6 +268,7 @@ struct tcp_sock {
 	} rack;
 	u16	advmss;		/* Advertised MSS			*/
 	u8	compressed_ack;
+	u8	dup_ack_counter;
 	u32	chrono_start;	/* Start time in jiffies of a TCP chrono */
 	u32	chrono_stat[3];	/* Time in jiffies for chrono_stat stats */
 	u8	chrono_type:2,	/* current chronograph type */
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index bf4ced9273e8..da777df0a0ba 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4327,6 +4327,27 @@ static void tcp_sack_maybe_coalesce(struct tcp_sock *tp)
 	}
 }
 
+static void tcp_sack_compress_send_ack(struct sock *sk)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	if (!tp->compressed_ack)
+		return;
+
+	if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1)
+		__sock_put(sk);
+
+	/* Since we have to send one ack finally,
+	 * substract one from tp->compressed_ack to keep
+	 * LINUX_MIB_TCPACKCOMPRESSED accurate.
+	 */
+	NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
+		      tp->compressed_ack - 1);
+
+	tp->compressed_ack = 0;
+	tcp_send_ack(sk);
+}
+
 static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -4355,8 +4376,7 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
 	 * If the sack array is full, forget about the last one.
 	 */
 	if (this_sack >= TCP_NUM_SACKS) {
-		if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
-			tcp_send_ack(sk);
+		tcp_sack_compress_send_ack(sk);
 		this_sack--;
 		tp->rx_opt.num_sacks--;
 		sp--;
@@ -5275,15 +5295,13 @@ send_now:
 
 	if (tp->compressed_ack_rcv_nxt != tp->rcv_nxt) {
 		tp->compressed_ack_rcv_nxt = tp->rcv_nxt;
-		if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
-			NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
-				      tp->compressed_ack - TCP_FASTRETRANS_THRESH);
-		tp->compressed_ack = 0;
+		tp->dup_ack_counter = 0;
 	}
-
-	if (++tp->compressed_ack <= TCP_FASTRETRANS_THRESH)
+	if (tp->dup_ack_counter < TCP_FASTRETRANS_THRESH) {
+		tp->dup_ack_counter++;
 		goto send_now;
-
+	}
+	tp->compressed_ack++;
 	if (hrtimer_is_queued(&tp->compressed_ack_timer))
 		return;
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index ba4482130f08..c414aeb1efa9 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -184,10 +184,10 @@ static inline void tcp_event_ack_sent(struct sock *sk, unsigned int pkts,
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 
-	if (unlikely(tp->compressed_ack > TCP_FASTRETRANS_THRESH)) {
+	if (unlikely(tp->compressed_ack)) {
 		NET_ADD_STATS(sock_net(sk), LINUX_MIB_TCPACKCOMPRESSED,
-			      tp->compressed_ack - TCP_FASTRETRANS_THRESH);
-		tp->compressed_ack = TCP_FASTRETRANS_THRESH;
+			      tp->compressed_ack);
+		tp->compressed_ack = 0;
 		if (hrtimer_try_to_cancel(&tp->compressed_ack_timer) == 1)
 			__sock_put(sk);
 	}
diff --git a/net/ipv4/tcp_timer.c b/net/ipv4/tcp_timer.c
index c3f26dcd6704..ada046f425d2 100644
--- a/net/ipv4/tcp_timer.c
+++ b/net/ipv4/tcp_timer.c
@@ -753,8 +753,14 @@ static enum hrtimer_restart tcp_compressed_ack_kick(struct hrtimer *timer)
 
 	bh_lock_sock(sk);
 	if (!sock_owned_by_user(sk)) {
-		if (tp->compressed_ack > TCP_FASTRETRANS_THRESH)
+		if (tp->compressed_ack) {
+			/* Since we have to send one ack finally,
+			 * substract one from tp->compressed_ack to keep
+			 * LINUX_MIB_TCPACKCOMPRESSED accurate.
+			 */
+			tp->compressed_ack--;
 			tcp_send_ack(sk);
+		}
 	} else {
 		if (!test_and_set_bit(TCP_DELACK_TIMER_DEFERRED,
 				      &sk->sk_tsq_flags))
-- 
cgit v1.2.3-59-g8ed1b


From ccd0628fca440268711560a1dbacc727b4f9e214 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 30 Apr 2020 10:35:42 -0700
Subject: tcp: tcp_sack_new_ofo_skb() should be more conservative

Currently, tcp_sack_new_ofo_skb() sends an ack if prior
acks were 'compressed', if room has to be made in tp->selective_acks[]

But there is no guarantee all four sack ranges can be included
in SACK option. As a matter of fact, when TCP timestamps option
is used, only three SACK ranges can be included.

Lets assume only two ranges can be included, and force the ack:

- When we touch more than 2 ranges in the reordering
  done if tcp_sack_extend() could be done.

- If we have at least 2 ranges when adding a new one.

This enforces that before a range is in third or fourth
position, at least one ACK packet included it in first/second
position.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index da777df0a0ba..ef921ecba415 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -4348,6 +4348,12 @@ static void tcp_sack_compress_send_ack(struct sock *sk)
 	tcp_send_ack(sk);
 }
 
+/* Reasonable amount of sack blocks included in TCP SACK option
+ * The max is 4, but this becomes 3 if TCP timestamps are there.
+ * Given that SACK packets might be lost, be conservative and use 2.
+ */
+#define TCP_SACK_BLOCKS_EXPECTED 2
+
 static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -4360,6 +4366,8 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
 
 	for (this_sack = 0; this_sack < cur_sacks; this_sack++, sp++) {
 		if (tcp_sack_extend(sp, seq, end_seq)) {
+			if (this_sack >= TCP_SACK_BLOCKS_EXPECTED)
+				tcp_sack_compress_send_ack(sk);
 			/* Rotate this_sack to the first one. */
 			for (; this_sack > 0; this_sack--, sp--)
 				swap(*sp, *(sp - 1));
@@ -4369,6 +4377,9 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
 		}
 	}
 
+	if (this_sack >= TCP_SACK_BLOCKS_EXPECTED)
+		tcp_sack_compress_send_ack(sk);
+
 	/* Could not find an adjacent existing SACK, build a new one,
 	 * put it at the front, and shift everyone else down.  We
 	 * always know there is at least one SACK present already here.
@@ -4376,7 +4387,6 @@ static void tcp_sack_new_ofo_skb(struct sock *sk, u32 seq, u32 end_seq)
 	 * If the sack array is full, forget about the last one.
 	 */
 	if (this_sack >= TCP_NUM_SACKS) {
-		tcp_sack_compress_send_ack(sk);
 		this_sack--;
 		tp->rx_opt.num_sacks--;
 		sp--;
-- 
cgit v1.2.3-59-g8ed1b


From a70437cc09a11771870e9f6bfc0ba1237161daa8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 30 Apr 2020 10:35:43 -0700
Subject: tcp: add hrtimer slack to sack compression

Add a sysctl to control hrtimer slack, default of 100 usec.

This gives the opportunity to reduce system overhead,
and help very short RTT flows.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Soheil Hassas Yeganeh <soheil@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.rst | 8 ++++++++
 include/net/netns/ipv4.h               | 1 +
 net/ipv4/sysctl_net_ipv4.c             | 7 +++++++
 net/ipv4/tcp_input.c                   | 5 +++--
 net/ipv4/tcp_ipv4.c                    | 1 +
 5 files changed, 20 insertions(+), 2 deletions(-)

diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index 3266aee9e052..50b440d29a13 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -651,6 +651,14 @@ tcp_comp_sack_delay_ns - LONG INTEGER
 
 	Default : 1,000,000 ns (1 ms)
 
+tcp_comp_sack_slack_ns - LONG INTEGER
+	This sysctl control the slack used when arming the
+	timer used by SACK compression. This gives extra time
+	for small RTT flows, and reduces system overhead by allowing
+	opportunistic reduction of timer interrupts.
+
+	Default : 100,000 ns (100 us)
+
 tcp_comp_sack_nr - INTEGER
 	Max number of SACK that can be compressed.
 	Using 0 disables SACK compression.
diff --git a/include/net/netns/ipv4.h b/include/net/netns/ipv4.h
index 5acdb4d414c4..9e36738c1fe1 100644
--- a/include/net/netns/ipv4.h
+++ b/include/net/netns/ipv4.h
@@ -173,6 +173,7 @@ struct netns_ipv4 {
 	int sysctl_tcp_rmem[3];
 	int sysctl_tcp_comp_sack_nr;
 	unsigned long sysctl_tcp_comp_sack_delay_ns;
+	unsigned long sysctl_tcp_comp_sack_slack_ns;
 	struct inet_timewait_death_row tcp_death_row;
 	int sysctl_max_syn_backlog;
 	int sysctl_tcp_fastopen;
diff --git a/net/ipv4/sysctl_net_ipv4.c b/net/ipv4/sysctl_net_ipv4.c
index 95ad71e76cc3..3a628423d27b 100644
--- a/net/ipv4/sysctl_net_ipv4.c
+++ b/net/ipv4/sysctl_net_ipv4.c
@@ -1329,6 +1329,13 @@ static struct ctl_table ipv4_net_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_doulongvec_minmax,
 	},
+	{
+		.procname	= "tcp_comp_sack_slack_ns",
+		.data		= &init_net.ipv4.sysctl_tcp_comp_sack_slack_ns,
+		.maxlen		= sizeof(unsigned long),
+		.mode		= 0644,
+		.proc_handler	= proc_doulongvec_minmax,
+	},
 	{
 		.procname	= "tcp_comp_sack_nr",
 		.data		= &init_net.ipv4.sysctl_tcp_comp_sack_nr,
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index ef921ecba415..d68128a672ab 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -5324,8 +5324,9 @@ send_now:
 	delay = min_t(unsigned long, sock_net(sk)->ipv4.sysctl_tcp_comp_sack_delay_ns,
 		      rtt * (NSEC_PER_USEC >> 3)/20);
 	sock_hold(sk);
-	hrtimer_start(&tp->compressed_ack_timer, ns_to_ktime(delay),
-		      HRTIMER_MODE_REL_PINNED_SOFT);
+	hrtimer_start_range_ns(&tp->compressed_ack_timer, ns_to_ktime(delay),
+			       sock_net(sk)->ipv4.sysctl_tcp_comp_sack_slack_ns,
+			       HRTIMER_MODE_REL_PINNED_SOFT);
 }
 
 static inline void tcp_ack_snd_check(struct sock *sk)
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 83a5d24e13b8..6c05f1ceb538 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -2780,6 +2780,7 @@ static int __net_init tcp_sk_init(struct net *net)
 		       sizeof(init_net.ipv4.sysctl_tcp_wmem));
 	}
 	net->ipv4.sysctl_tcp_comp_sack_delay_ns = NSEC_PER_MSEC;
+	net->ipv4.sysctl_tcp_comp_sack_slack_ns = 100 * NSEC_PER_USEC;
 	net->ipv4.sysctl_tcp_comp_sack_nr = 44;
 	net->ipv4.sysctl_tcp_fastopen = TFO_CLIENT_ENABLE;
 	spin_lock_init(&net->ipv4.tcp_fastopen_ctx_lock);
-- 
cgit v1.2.3-59-g8ed1b


From 34a9c361dd480041d790fff3d6ea58513c8769e8 Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Thu, 30 Apr 2020 17:37:02 +0000
Subject: hsr: remove hsr interface if all slaves are removed

When all hsr slave interfaces are removed, hsr interface doesn't work.
At that moment, it's fine to remove an unused hsr interface automatically
for saving resources.
That's a common behavior of virtual interfaces.

Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/hsr/hsr_main.c | 22 ++++++++++++++++++++--
 1 file changed, 20 insertions(+), 2 deletions(-)

diff --git a/net/hsr/hsr_main.c b/net/hsr/hsr_main.c
index 26d6c39f24e1..e2564de67603 100644
--- a/net/hsr/hsr_main.c
+++ b/net/hsr/hsr_main.c
@@ -15,12 +15,23 @@
 #include "hsr_framereg.h"
 #include "hsr_slave.h"
 
+static bool hsr_slave_empty(struct hsr_priv *hsr)
+{
+	struct hsr_port *port;
+
+	hsr_for_each_port(hsr, port)
+		if (port->type != HSR_PT_MASTER)
+			return false;
+	return true;
+}
+
 static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event,
 			     void *ptr)
 {
-	struct net_device *dev;
 	struct hsr_port *port, *master;
+	struct net_device *dev;
 	struct hsr_priv *hsr;
+	LIST_HEAD(list_kill);
 	int mtu_max;
 	int res;
 
@@ -85,8 +96,15 @@ static int hsr_netdev_notify(struct notifier_block *nb, unsigned long event,
 		master->dev->mtu = mtu_max;
 		break;
 	case NETDEV_UNREGISTER:
-		if (!is_hsr_master(dev))
+		if (!is_hsr_master(dev)) {
+			master = hsr_port_get_hsr(port->hsr, HSR_PT_MASTER);
 			hsr_del_port(port);
+			if (hsr_slave_empty(master->hsr)) {
+				unregister_netdevice_queue(master->dev,
+							   &list_kill);
+				unregister_netdevice_many(&list_kill);
+			}
+		}
 		break;
 	case NETDEV_PRE_TYPE_CHANGE:
 		/* HSR works only on Ethernet devices. Refuse slave to change
-- 
cgit v1.2.3-59-g8ed1b


From ef2c0a78aee10113d1299eb81e642470308e32ca Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Thu, 30 Apr 2020 21:55:36 +0200
Subject: r8169: don't pass net_device to irq coalescing sub-functions

The net_device argument is just used to get a struct rtl8169_private
pointer via netdev_priv(). Therefore pass the struct rtl8169_private
pointer directly.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 68d5255568a5..1c3974ad88eb 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -1805,9 +1805,9 @@ static const struct rtl_coalesce_info rtl_coalesce_info_8168_8136[] = {
 #undef rxtx_x1822
 
 /* get rx/tx scale vector corresponding to current speed */
-static const struct rtl_coalesce_info *rtl_coalesce_info(struct net_device *dev)
+static const struct rtl_coalesce_info *
+rtl_coalesce_info(struct rtl8169_private *tp)
 {
-	struct rtl8169_private *tp = netdev_priv(dev);
 	const struct rtl_coalesce_info *ci;
 
 	if (tp->mac_version <= RTL_GIGA_MAC_VER_06)
@@ -1844,7 +1844,7 @@ static int rtl_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
 	memset(ec, 0, sizeof(*ec));
 
 	/* get rx/tx scale corresponding to current speed and CPlusCmd[0:1] */
-	ci = rtl_coalesce_info(dev);
+	ci = rtl_coalesce_info(tp);
 	if (IS_ERR(ci))
 		return PTR_ERR(ci);
 
@@ -1874,12 +1874,12 @@ static int rtl_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
 
 /* choose appropriate scale factor and CPlusCmd[0:1] for (speed, nsec) */
 static const struct rtl_coalesce_scale *rtl_coalesce_choose_scale(
-			struct net_device *dev, u32 nsec, u16 *cp01)
+			struct rtl8169_private *tp, u32 nsec, u16 *cp01)
 {
 	const struct rtl_coalesce_info *ci;
 	u16 i;
 
-	ci = rtl_coalesce_info(dev);
+	ci = rtl_coalesce_info(tp);
 	if (IS_ERR(ci))
 		return ERR_CAST(ci);
 
@@ -1912,7 +1912,7 @@ static int rtl_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
 	if (rtl_is_8125(tp))
 		return -EOPNOTSUPP;
 
-	scale = rtl_coalesce_choose_scale(dev,
+	scale = rtl_coalesce_choose_scale(tp,
 			max(p[0].usecs, p[1].usecs) * 1000, &cp01);
 	if (IS_ERR(scale))
 		return PTR_ERR(scale);
-- 
cgit v1.2.3-59-g8ed1b


From 2815b30535a0613ee07d477d0c628100f40b6059 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Thu, 30 Apr 2020 21:56:20 +0200
Subject: r8169: merge scale for tx and rx irq coalescing

Rx and tx scale are the same always. Simplify the code by using one
scale for rx and tx only.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 65 ++++++++++++-------------------
 1 file changed, 25 insertions(+), 40 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 1c3974ad88eb..9932b6ffae02 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -1768,41 +1768,29 @@ static void rtl8169_get_strings(struct net_device *dev, u32 stringset, u8 *data)
  * 1 1                     160us           81.92us         1.31ms
  */
 
-/* rx/tx scale factors for one particular CPlusCmd[0:1] value */
-struct rtl_coalesce_scale {
-	/* Rx / Tx */
-	u32 nsecs[2];
-};
-
 /* rx/tx scale factors for all CPlusCmd[0:1] cases */
 struct rtl_coalesce_info {
 	u32 speed;
-	struct rtl_coalesce_scale scalev[4];	/* each CPlusCmd[0:1] case */
+	u32 scale_nsecs[4];
 };
 
-/* produce (r,t) pairs with each being in series of *1, *8, *8*2, *8*2*2 */
-#define rxtx_x1822(r, t) {		\
-	{{(r),		(t)}},		\
-	{{(r)*8,	(t)*8}},	\
-	{{(r)*8*2,	(t)*8*2}},	\
-	{{(r)*8*2*2,	(t)*8*2*2}},	\
-}
+/* produce array with base delay *1, *8, *8*2, *8*2*2 */
+#define COALESCE_DELAY(d) { (d), 8 * (d), 16 * (d), 32 * (d) }
+
 static const struct rtl_coalesce_info rtl_coalesce_info_8169[] = {
-	/* speed	delays:     rx00   tx00	*/
-	{ SPEED_10,	rxtx_x1822(40960, 40960)	},
-	{ SPEED_100,	rxtx_x1822( 2560,  2560)	},
-	{ SPEED_1000,	rxtx_x1822(  320,   320)	},
+	{ SPEED_10,	COALESCE_DELAY(40960) },
+	{ SPEED_100,	COALESCE_DELAY(2560) },
+	{ SPEED_1000,	COALESCE_DELAY(320) },
 	{ 0 },
 };
 
 static const struct rtl_coalesce_info rtl_coalesce_info_8168_8136[] = {
-	/* speed	delays:     rx00   tx00	*/
-	{ SPEED_10,	rxtx_x1822(40960, 40960)	},
-	{ SPEED_100,	rxtx_x1822( 2560,  2560)	},
-	{ SPEED_1000,	rxtx_x1822( 5000,  5000)	},
+	{ SPEED_10,	COALESCE_DELAY(40960) },
+	{ SPEED_100,	COALESCE_DELAY(2560) },
+	{ SPEED_1000,	COALESCE_DELAY(5000) },
 	{ 0 },
 };
-#undef rxtx_x1822
+#undef COALESCE_DELAY
 
 /* get rx/tx scale vector corresponding to current speed */
 static const struct rtl_coalesce_info *
@@ -1827,7 +1815,6 @@ static int rtl_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
 	const struct rtl_coalesce_info *ci;
-	const struct rtl_coalesce_scale *scale;
 	struct {
 		u32 *max_frames;
 		u32 *usecs;
@@ -1835,6 +1822,7 @@ static int rtl_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
 		{ &ec->rx_max_coalesced_frames, &ec->rx_coalesce_usecs },
 		{ &ec->tx_max_coalesced_frames, &ec->tx_coalesce_usecs }
 	}, *p = coal_settings;
+	u32 scale;
 	int i;
 	u16 w;
 
@@ -1848,7 +1836,7 @@ static int rtl_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
 	if (IS_ERR(ci))
 		return PTR_ERR(ci);
 
-	scale = &ci->scalev[tp->cp_cmd & INTT_MASK];
+	scale = ci->scale_nsecs[tp->cp_cmd & INTT_MASK];
 
 	/* read IntrMitigate and adjust according to scale */
 	for (w = RTL_R16(tp, IntrMitigate); w; w >>= RTL_COALESCE_SHIFT, p++) {
@@ -1859,7 +1847,7 @@ static int rtl_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
 
 	for (i = 0; i < 2; i++) {
 		p = coal_settings + i;
-		*p->usecs = (*p->usecs * scale->nsecs[i]) / 1000;
+		*p->usecs = (*p->usecs * scale) / 1000;
 
 		/*
 		 * ethtool_coalesce says it is illegal to set both usecs and
@@ -1873,32 +1861,29 @@ static int rtl_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
 }
 
 /* choose appropriate scale factor and CPlusCmd[0:1] for (speed, nsec) */
-static const struct rtl_coalesce_scale *rtl_coalesce_choose_scale(
-			struct rtl8169_private *tp, u32 nsec, u16 *cp01)
+static int rtl_coalesce_choose_scale(struct rtl8169_private *tp, u32 nsec,
+				     u16 *cp01)
 {
 	const struct rtl_coalesce_info *ci;
 	u16 i;
 
 	ci = rtl_coalesce_info(tp);
 	if (IS_ERR(ci))
-		return ERR_CAST(ci);
+		return PTR_ERR(ci);
 
 	for (i = 0; i < 4; i++) {
-		u32 rxtx_maxscale = max(ci->scalev[i].nsecs[0],
-					ci->scalev[i].nsecs[1]);
-		if (nsec <= rxtx_maxscale * RTL_COALESCE_T_MAX) {
+		if (nsec <= ci->scale_nsecs[i] * RTL_COALESCE_T_MAX) {
 			*cp01 = i;
-			return &ci->scalev[i];
+			return ci->scale_nsecs[i];
 		}
 	}
 
-	return ERR_PTR(-EINVAL);
+	return -EINVAL;
 }
 
 static int rtl_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	const struct rtl_coalesce_scale *scale;
 	struct {
 		u32 frames;
 		u32 usecs;
@@ -1906,16 +1891,16 @@ static int rtl_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
 		{ ec->rx_max_coalesced_frames, ec->rx_coalesce_usecs },
 		{ ec->tx_max_coalesced_frames, ec->tx_coalesce_usecs }
 	}, *p = coal_settings;
-	u16 w = 0, cp01;
-	int i;
+	u16 w = 0, cp01 = 0;
+	int scale, i;
 
 	if (rtl_is_8125(tp))
 		return -EOPNOTSUPP;
 
 	scale = rtl_coalesce_choose_scale(tp,
 			max(p[0].usecs, p[1].usecs) * 1000, &cp01);
-	if (IS_ERR(scale))
-		return PTR_ERR(scale);
+	if (scale < 0)
+		return scale;
 
 	for (i = 0; i < 2; i++, p++) {
 		u32 units;
@@ -1936,7 +1921,7 @@ static int rtl_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
 			p->frames = 0;
 		}
 
-		units = p->usecs * 1000 / scale->nsecs[i];
+		units = p->usecs * 1000 / scale;
 		if (p->frames > RTL_COALESCE_FRAME_MAX || p->frames % 4)
 			return -EINVAL;
 
-- 
cgit v1.2.3-59-g8ed1b


From 6cf96dd4272537baf4ceab452f6276da1b8d82af Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Thu, 30 Apr 2020 21:56:58 +0200
Subject: r8169: improve rtl_get_coalesce

Use FIELD_GET() macro to make the code better readable. In addition
change the logic to round the time limit up, not down. Reason is that
a time limit <1us would be rounded to 0 currently, what would be
interpreted as "no time limit set".

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 46 ++++++++++++++-----------------
 1 file changed, 20 insertions(+), 26 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 9932b6ffae02..a36a48f713fa 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -27,6 +27,7 @@
 #include <linux/interrupt.h>
 #include <linux/dma-mapping.h>
 #include <linux/pm_runtime.h>
+#include <linux/bitfield.h>
 #include <linux/prefetch.h>
 #include <linux/ipv6.h>
 #include <net/ip6_checksum.h>
@@ -229,6 +230,11 @@ enum rtl_registers {
 	CPlusCmd	= 0xe0,
 	IntrMitigate	= 0xe2,
 
+#define RTL_COALESCE_TX_USECS	GENMASK(15, 12)
+#define RTL_COALESCE_TX_FRAMES	GENMASK(11, 8)
+#define RTL_COALESCE_RX_USECS	GENMASK(7, 4)
+#define RTL_COALESCE_RX_FRAMES	GENMASK(3, 0)
+
 #define RTL_COALESCE_MASK	0x0f
 #define RTL_COALESCE_SHIFT	4
 #define RTL_COALESCE_T_MAX	(RTL_COALESCE_MASK)
@@ -1815,16 +1821,8 @@ static int rtl_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
 	const struct rtl_coalesce_info *ci;
-	struct {
-		u32 *max_frames;
-		u32 *usecs;
-	} coal_settings [] = {
-		{ &ec->rx_max_coalesced_frames, &ec->rx_coalesce_usecs },
-		{ &ec->tx_max_coalesced_frames, &ec->tx_coalesce_usecs }
-	}, *p = coal_settings;
-	u32 scale;
-	int i;
-	u16 w;
+	u32 scale, c_us, c_fr;
+	u16 intrmit;
 
 	if (rtl_is_8125(tp))
 		return -EOPNOTSUPP;
@@ -1838,24 +1836,20 @@ static int rtl_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
 
 	scale = ci->scale_nsecs[tp->cp_cmd & INTT_MASK];
 
-	/* read IntrMitigate and adjust according to scale */
-	for (w = RTL_R16(tp, IntrMitigate); w; w >>= RTL_COALESCE_SHIFT, p++) {
-		*p->max_frames = (w & RTL_COALESCE_MASK) << 2;
-		w >>= RTL_COALESCE_SHIFT;
-		*p->usecs = w & RTL_COALESCE_MASK;
-	}
+	intrmit = RTL_R16(tp, IntrMitigate);
 
-	for (i = 0; i < 2; i++) {
-		p = coal_settings + i;
-		*p->usecs = (*p->usecs * scale) / 1000;
+	c_us = FIELD_GET(RTL_COALESCE_TX_USECS, intrmit);
+	ec->tx_coalesce_usecs = DIV_ROUND_UP(c_us * scale, 1000);
 
-		/*
-		 * ethtool_coalesce says it is illegal to set both usecs and
-		 * max_frames to 0.
-		 */
-		if (!*p->usecs && !*p->max_frames)
-			*p->max_frames = 1;
-	}
+	c_fr = FIELD_GET(RTL_COALESCE_TX_FRAMES, intrmit);
+	/* ethtool_coalesce states usecs and max_frames must not both be 0 */
+	ec->tx_max_coalesced_frames = (c_us || c_fr) ? c_fr * 4 : 1;
+
+	c_us = FIELD_GET(RTL_COALESCE_RX_USECS, intrmit);
+	ec->rx_coalesce_usecs = DIV_ROUND_UP(c_us * scale, 1000);
+
+	c_fr = FIELD_GET(RTL_COALESCE_RX_FRAMES, intrmit);
+	ec->rx_max_coalesced_frames = (c_us || c_fr) ? c_fr * 4 : 1;
 
 	return 0;
 }
-- 
cgit v1.2.3-59-g8ed1b


From cb9d97de05646de69b997da0137b94e00cba7f99 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Thu, 30 Apr 2020 21:57:32 +0200
Subject: r8169: improve rtl_coalesce_choose_scale

The time limit provided by userspace is multiplied with 1000,
what could result in an overflow. Therefore change the time limit
parameter unit from ns to us, and avoid the problematic operation.
If there's no matching scale because provided time limit is too big,
return ERANGE instead of EINVAL to provide a hint to the user what's
wrong.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index a36a48f713fa..6c17c234bc06 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -1854,8 +1854,8 @@ static int rtl_get_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
 	return 0;
 }
 
-/* choose appropriate scale factor and CPlusCmd[0:1] for (speed, nsec) */
-static int rtl_coalesce_choose_scale(struct rtl8169_private *tp, u32 nsec,
+/* choose appropriate scale factor and CPlusCmd[0:1] for (speed, usec) */
+static int rtl_coalesce_choose_scale(struct rtl8169_private *tp, u32 usec,
 				     u16 *cp01)
 {
 	const struct rtl_coalesce_info *ci;
@@ -1866,13 +1866,13 @@ static int rtl_coalesce_choose_scale(struct rtl8169_private *tp, u32 nsec,
 		return PTR_ERR(ci);
 
 	for (i = 0; i < 4; i++) {
-		if (nsec <= ci->scale_nsecs[i] * RTL_COALESCE_T_MAX) {
+		if (usec <= ci->scale_nsecs[i] * RTL_COALESCE_T_MAX / 1000U) {
 			*cp01 = i;
 			return ci->scale_nsecs[i];
 		}
 	}
 
-	return -EINVAL;
+	return -ERANGE;
 }
 
 static int rtl_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
@@ -1886,13 +1886,14 @@ static int rtl_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
 		{ ec->tx_max_coalesced_frames, ec->tx_coalesce_usecs }
 	}, *p = coal_settings;
 	u16 w = 0, cp01 = 0;
+	u32 coal_usec_max;
 	int scale, i;
 
 	if (rtl_is_8125(tp))
 		return -EOPNOTSUPP;
 
-	scale = rtl_coalesce_choose_scale(tp,
-			max(p[0].usecs, p[1].usecs) * 1000, &cp01);
+	coal_usec_max = max(ec->rx_coalesce_usecs, ec->tx_coalesce_usecs);
+	scale = rtl_coalesce_choose_scale(tp, coal_usec_max, &cp01);
 	if (scale < 0)
 		return scale;
 
-- 
cgit v1.2.3-59-g8ed1b


From bdd2be3adb7d139a598f2277af7fa625fc399af1 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Thu, 30 Apr 2020 21:58:06 +0200
Subject: r8169: improve interrupt coalescing parameter handling

The chip supports only frame limits 0, 4, 8, .. 60 internally.
Returning EINVAL for all val % 4 != 0 seems to be a little bit too
unfriendly to the user. Therefore round up the frame limit to the next
supported value. In addition round up the time limit, else a very low
limit could be rounded down to 0, and interpreted as "ignore value"
by the chip.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 6c17c234bc06..a81d46abe3c2 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -1909,21 +1909,21 @@ static int rtl_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
 		 * - then user does `ethtool -C eth0 rx-usecs 100`
 		 *
 		 * since ethtool sends to kernel whole ethtool_coalesce
-		 * settings, if we do not handle rx_usecs=!0, rx_frames=1
-		 * we'll reject it below in `frames % 4 != 0`.
+		 * settings, if we want to ignore rx_frames then it has
+		 * to be set to 0.
 		 */
 		if (p->frames == 1) {
 			p->frames = 0;
 		}
 
-		units = p->usecs * 1000 / scale;
-		if (p->frames > RTL_COALESCE_FRAME_MAX || p->frames % 4)
-			return -EINVAL;
+		units = DIV_ROUND_UP(p->usecs * 1000, scale);
+		if (p->frames > RTL_COALESCE_FRAME_MAX)
+			return -ERANGE;
 
 		w <<= RTL_COALESCE_SHIFT;
 		w |= units;
 		w <<= RTL_COALESCE_SHIFT;
-		w |= p->frames >> 2;
+		w |= DIV_ROUND_UP(p->frames, 4);
 	}
 
 	rtl_lock_work(tp);
-- 
cgit v1.2.3-59-g8ed1b


From 2b3e48b66516602d7d63142cda84b554f908eb54 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Thu, 30 Apr 2020 21:58:47 +0200
Subject: r8169: improve rtl_set_coalesce

Use FIELD_PREP() to make the code better readable, and avoid the loop.
No functional change intended.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 70 +++++++++++++------------------
 1 file changed, 30 insertions(+), 40 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index a81d46abe3c2..4fe8b1d35b69 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -235,10 +235,8 @@ enum rtl_registers {
 #define RTL_COALESCE_RX_USECS	GENMASK(7, 4)
 #define RTL_COALESCE_RX_FRAMES	GENMASK(3, 0)
 
-#define RTL_COALESCE_MASK	0x0f
-#define RTL_COALESCE_SHIFT	4
-#define RTL_COALESCE_T_MAX	(RTL_COALESCE_MASK)
-#define RTL_COALESCE_FRAME_MAX	(RTL_COALESCE_MASK << 2)
+#define RTL_COALESCE_T_MAX	0x0fU
+#define RTL_COALESCE_FRAME_MAX	(RTL_COALESCE_T_MAX * 4)
 
 	RxDescAddrLow	= 0xe4,
 	RxDescAddrHigh	= 0xe8,
@@ -1878,57 +1876,49 @@ static int rtl_coalesce_choose_scale(struct rtl8169_private *tp, u32 usec,
 static int rtl_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
 {
 	struct rtl8169_private *tp = netdev_priv(dev);
-	struct {
-		u32 frames;
-		u32 usecs;
-	} coal_settings [] = {
-		{ ec->rx_max_coalesced_frames, ec->rx_coalesce_usecs },
-		{ ec->tx_max_coalesced_frames, ec->tx_coalesce_usecs }
-	}, *p = coal_settings;
+	u32 tx_fr = ec->tx_max_coalesced_frames;
+	u32 rx_fr = ec->rx_max_coalesced_frames;
+	u32 coal_usec_max, units;
 	u16 w = 0, cp01 = 0;
-	u32 coal_usec_max;
-	int scale, i;
+	int scale;
 
 	if (rtl_is_8125(tp))
 		return -EOPNOTSUPP;
 
+	if (rx_fr > RTL_COALESCE_FRAME_MAX || tx_fr > RTL_COALESCE_FRAME_MAX)
+		return -ERANGE;
+
 	coal_usec_max = max(ec->rx_coalesce_usecs, ec->tx_coalesce_usecs);
 	scale = rtl_coalesce_choose_scale(tp, coal_usec_max, &cp01);
 	if (scale < 0)
 		return scale;
 
-	for (i = 0; i < 2; i++, p++) {
-		u32 units;
-
-		/*
-		 * accept max_frames=1 we returned in rtl_get_coalesce.
-		 * accept it not only when usecs=0 because of e.g. the following scenario:
-		 *
-		 * - both rx_usecs=0 & rx_frames=0 in hardware (no delay on RX)
-		 * - rtl_get_coalesce returns rx_usecs=0, rx_frames=1
-		 * - then user does `ethtool -C eth0 rx-usecs 100`
-		 *
-		 * since ethtool sends to kernel whole ethtool_coalesce
-		 * settings, if we want to ignore rx_frames then it has
-		 * to be set to 0.
-		 */
-		if (p->frames == 1) {
-			p->frames = 0;
-		}
+	/* Accept max_frames=1 we returned in rtl_get_coalesce. Accept it
+	 * not only when usecs=0 because of e.g. the following scenario:
+	 *
+	 * - both rx_usecs=0 & rx_frames=0 in hardware (no delay on RX)
+	 * - rtl_get_coalesce returns rx_usecs=0, rx_frames=1
+	 * - then user does `ethtool -C eth0 rx-usecs 100`
+	 *
+	 * Since ethtool sends to kernel whole ethtool_coalesce settings,
+	 * if we want to ignore rx_frames then it has to be set to 0.
+	 */
+	if (rx_fr == 1)
+		rx_fr = 0;
+	if (tx_fr == 1)
+		tx_fr = 0;
 
-		units = DIV_ROUND_UP(p->usecs * 1000, scale);
-		if (p->frames > RTL_COALESCE_FRAME_MAX)
-			return -ERANGE;
+	w |= FIELD_PREP(RTL_COALESCE_TX_FRAMES, DIV_ROUND_UP(tx_fr, 4));
+	w |= FIELD_PREP(RTL_COALESCE_RX_FRAMES, DIV_ROUND_UP(rx_fr, 4));
 
-		w <<= RTL_COALESCE_SHIFT;
-		w |= units;
-		w <<= RTL_COALESCE_SHIFT;
-		w |= DIV_ROUND_UP(p->frames, 4);
-	}
+	units = DIV_ROUND_UP(ec->tx_coalesce_usecs * 1000U, scale);
+	w |= FIELD_PREP(RTL_COALESCE_TX_USECS, units);
+	units = DIV_ROUND_UP(ec->rx_coalesce_usecs * 1000U, scale);
+	w |= FIELD_PREP(RTL_COALESCE_RX_USECS, units);
 
 	rtl_lock_work(tp);
 
-	RTL_W16(tp, IntrMitigate, swab16(w));
+	RTL_W16(tp, IntrMitigate, w);
 
 	tp->cp_cmd = (tp->cp_cmd & ~INTT_MASK) | cp01;
 	RTL_W16(tp, CPlusCmd, tp->cp_cmd);
-- 
cgit v1.2.3-59-g8ed1b


From 81496b72e9ba1999d4ed7bb7fa407a1edef020a4 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Thu, 30 Apr 2020 21:59:48 +0200
Subject: r8169: add check for invalid parameter combination in
 rtl_set_coalesce

Realtek provided information about a HW constraint that time limit must
not be set to 0 if the frame limit is >0. Add a check for this and
reject invalid parameter combinations.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 4fe8b1d35b69..aa3e63e031da 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -1908,6 +1908,11 @@ static int rtl_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
 	if (tx_fr == 1)
 		tx_fr = 0;
 
+	/* HW requires time limit to be set if frame limit is set */
+	if ((tx_fr && !ec->tx_coalesce_usecs) ||
+	    (rx_fr && !ec->rx_coalesce_usecs))
+		return -EINVAL;
+
 	w |= FIELD_PREP(RTL_COALESCE_TX_FRAMES, DIV_ROUND_UP(tx_fr, 4));
 	w |= FIELD_PREP(RTL_COALESCE_RX_FRAMES, DIV_ROUND_UP(rx_fr, 4));
 
-- 
cgit v1.2.3-59-g8ed1b


From 673e69a67dd63fc3b40f109d1677a5dc72185fbb Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 30 Apr 2020 11:49:08 -0700
Subject: net: dsa: b53: Rename num_arl_entries to num_arl_bins

The variable currently holds the number of ARL bins per ARL buckets,
which is different from the number of ARL entries which would be bins
times buckets. We will be adding a num_arl_buckets in a subsequent patch
so get variables straight now.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/b53/b53_common.c | 52 ++++++++++++++++++++--------------------
 drivers/net/dsa/b53/b53_priv.h   |  2 +-
 2 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index c283593bef17..41b75f41677a 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -1495,10 +1495,10 @@ static int b53_arl_read(struct b53_device *dev, u64 mac,
 	if (ret)
 		return ret;
 
-	bitmap_zero(free_bins, dev->num_arl_entries);
+	bitmap_zero(free_bins, dev->num_arl_bins);
 
 	/* Read the bins */
-	for (i = 0; i < dev->num_arl_entries; i++) {
+	for (i = 0; i < dev->num_arl_bins; i++) {
 		u64 mac_vid;
 		u32 fwd_entry;
 
@@ -1521,10 +1521,10 @@ static int b53_arl_read(struct b53_device *dev, u64 mac,
 		return 0;
 	}
 
-	if (bitmap_weight(free_bins, dev->num_arl_entries) == 0)
+	if (bitmap_weight(free_bins, dev->num_arl_bins) == 0)
 		return -ENOSPC;
 
-	*idx = find_first_bit(free_bins, dev->num_arl_entries);
+	*idx = find_first_bit(free_bins, dev->num_arl_bins);
 
 	return -ENOENT;
 }
@@ -1692,7 +1692,7 @@ int b53_fdb_dump(struct dsa_switch *ds, int port,
 		if (ret)
 			return ret;
 
-		if (priv->num_arl_entries > 2) {
+		if (priv->num_arl_bins > 2) {
 			b53_arl_search_rd(priv, 1, &results[1]);
 			ret = b53_fdb_copy(port, &results[1], cb, data);
 			if (ret)
@@ -2185,7 +2185,7 @@ struct b53_chip_data {
 	u16 enabled_ports;
 	u8 cpu_port;
 	u8 vta_regs[3];
-	u8 arl_entries;
+	u8 arl_bins;
 	u8 duplex_reg;
 	u8 jumbo_pm_reg;
 	u8 jumbo_size_reg;
@@ -2204,7 +2204,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.dev_name = "BCM5325",
 		.vlans = 16,
 		.enabled_ports = 0x1f,
-		.arl_entries = 2,
+		.arl_bins = 2,
 		.cpu_port = B53_CPU_PORT_25,
 		.duplex_reg = B53_DUPLEX_STAT_FE,
 	},
@@ -2213,7 +2213,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.dev_name = "BCM5365",
 		.vlans = 256,
 		.enabled_ports = 0x1f,
-		.arl_entries = 2,
+		.arl_bins = 2,
 		.cpu_port = B53_CPU_PORT_25,
 		.duplex_reg = B53_DUPLEX_STAT_FE,
 	},
@@ -2222,7 +2222,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.dev_name = "BCM5389",
 		.vlans = 4096,
 		.enabled_ports = 0x1f,
-		.arl_entries = 4,
+		.arl_bins = 4,
 		.cpu_port = B53_CPU_PORT,
 		.vta_regs = B53_VTA_REGS,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
@@ -2234,7 +2234,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.dev_name = "BCM5395",
 		.vlans = 4096,
 		.enabled_ports = 0x1f,
-		.arl_entries = 4,
+		.arl_bins = 4,
 		.cpu_port = B53_CPU_PORT,
 		.vta_regs = B53_VTA_REGS,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
@@ -2246,7 +2246,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.dev_name = "BCM5397",
 		.vlans = 4096,
 		.enabled_ports = 0x1f,
-		.arl_entries = 4,
+		.arl_bins = 4,
 		.cpu_port = B53_CPU_PORT,
 		.vta_regs = B53_VTA_REGS_9798,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
@@ -2258,7 +2258,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.dev_name = "BCM5398",
 		.vlans = 4096,
 		.enabled_ports = 0x7f,
-		.arl_entries = 4,
+		.arl_bins = 4,
 		.cpu_port = B53_CPU_PORT,
 		.vta_regs = B53_VTA_REGS_9798,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
@@ -2270,7 +2270,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.dev_name = "BCM53115",
 		.vlans = 4096,
 		.enabled_ports = 0x1f,
-		.arl_entries = 4,
+		.arl_bins = 4,
 		.vta_regs = B53_VTA_REGS,
 		.cpu_port = B53_CPU_PORT,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
@@ -2282,7 +2282,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.dev_name = "BCM53125",
 		.vlans = 4096,
 		.enabled_ports = 0xff,
-		.arl_entries = 4,
+		.arl_bins = 4,
 		.cpu_port = B53_CPU_PORT,
 		.vta_regs = B53_VTA_REGS,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
@@ -2294,7 +2294,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.dev_name = "BCM53128",
 		.vlans = 4096,
 		.enabled_ports = 0x1ff,
-		.arl_entries = 4,
+		.arl_bins = 4,
 		.cpu_port = B53_CPU_PORT,
 		.vta_regs = B53_VTA_REGS,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
@@ -2306,7 +2306,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.dev_name = "BCM63xx",
 		.vlans = 4096,
 		.enabled_ports = 0, /* pdata must provide them */
-		.arl_entries = 4,
+		.arl_bins = 4,
 		.cpu_port = B53_CPU_PORT,
 		.vta_regs = B53_VTA_REGS_63XX,
 		.duplex_reg = B53_DUPLEX_STAT_63XX,
@@ -2318,7 +2318,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.dev_name = "BCM53010",
 		.vlans = 4096,
 		.enabled_ports = 0x1f,
-		.arl_entries = 4,
+		.arl_bins = 4,
 		.cpu_port = B53_CPU_PORT_25, /* TODO: auto detect */
 		.vta_regs = B53_VTA_REGS,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
@@ -2330,7 +2330,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.dev_name = "BCM53011",
 		.vlans = 4096,
 		.enabled_ports = 0x1bf,
-		.arl_entries = 4,
+		.arl_bins = 4,
 		.cpu_port = B53_CPU_PORT_25, /* TODO: auto detect */
 		.vta_regs = B53_VTA_REGS,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
@@ -2342,7 +2342,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.dev_name = "BCM53012",
 		.vlans = 4096,
 		.enabled_ports = 0x1bf,
-		.arl_entries = 4,
+		.arl_bins = 4,
 		.cpu_port = B53_CPU_PORT_25, /* TODO: auto detect */
 		.vta_regs = B53_VTA_REGS,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
@@ -2354,7 +2354,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.dev_name = "BCM53018",
 		.vlans = 4096,
 		.enabled_ports = 0x1f,
-		.arl_entries = 4,
+		.arl_bins = 4,
 		.cpu_port = B53_CPU_PORT_25, /* TODO: auto detect */
 		.vta_regs = B53_VTA_REGS,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
@@ -2366,7 +2366,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.dev_name = "BCM53019",
 		.vlans = 4096,
 		.enabled_ports = 0x1f,
-		.arl_entries = 4,
+		.arl_bins = 4,
 		.cpu_port = B53_CPU_PORT_25, /* TODO: auto detect */
 		.vta_regs = B53_VTA_REGS,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
@@ -2378,7 +2378,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.dev_name = "BCM585xx/586xx/88312",
 		.vlans	= 4096,
 		.enabled_ports = 0x1ff,
-		.arl_entries = 4,
+		.arl_bins = 4,
 		.cpu_port = B53_CPU_PORT,
 		.vta_regs = B53_VTA_REGS,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
@@ -2390,7 +2390,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.dev_name = "BCM583xx/11360",
 		.vlans = 4096,
 		.enabled_ports = 0x103,
-		.arl_entries = 4,
+		.arl_bins = 4,
 		.cpu_port = B53_CPU_PORT,
 		.vta_regs = B53_VTA_REGS,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
@@ -2402,7 +2402,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.dev_name = "BCM7445",
 		.vlans	= 4096,
 		.enabled_ports = 0x1ff,
-		.arl_entries = 4,
+		.arl_bins = 4,
 		.cpu_port = B53_CPU_PORT,
 		.vta_regs = B53_VTA_REGS,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
@@ -2414,7 +2414,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.dev_name = "BCM7278",
 		.vlans = 4096,
 		.enabled_ports = 0x1ff,
-		.arl_entries= 4,
+		.arl_bins = 4,
 		.cpu_port = B53_CPU_PORT,
 		.vta_regs = B53_VTA_REGS,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
@@ -2442,7 +2442,7 @@ static int b53_switch_init(struct b53_device *dev)
 			dev->jumbo_pm_reg = chip->jumbo_pm_reg;
 			dev->cpu_port = chip->cpu_port;
 			dev->num_vlans = chip->vlans;
-			dev->num_arl_entries = chip->arl_entries;
+			dev->num_arl_bins = chip->arl_bins;
 			break;
 		}
 	}
diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h
index 3d42318bc3f1..1c5c443d571f 100644
--- a/drivers/net/dsa/b53/b53_priv.h
+++ b/drivers/net/dsa/b53/b53_priv.h
@@ -117,7 +117,7 @@ struct b53_device {
 	u8 jumbo_pm_reg;
 	u8 jumbo_size_reg;
 	int reset_gpio;
-	u8 num_arl_entries;
+	u8 num_arl_bins;
 	enum dsa_tag_protocol tag_protocol;
 
 	/* used ports mask */
-- 
cgit v1.2.3-59-g8ed1b


From e3da4038f4ca1094596a7604c6edac4a6a4f6ee9 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 30 Apr 2020 11:49:09 -0700
Subject: net: dsa: b53: Provide number of ARL buckets

In preparation for doing proper upper bound checking of FDB/MDB entries
being added to the ARL, provide the number of ARL buckets for each
switch chip we support. All chips have 1024 buckets, except 7278 which
has only 256.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/b53/b53_common.c | 21 +++++++++++++++++++++
 drivers/net/dsa/b53/b53_priv.h   |  1 +
 2 files changed, 22 insertions(+)

diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index 41b75f41677a..aa0836ac751c 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -2186,6 +2186,7 @@ struct b53_chip_data {
 	u8 cpu_port;
 	u8 vta_regs[3];
 	u8 arl_bins;
+	u16 arl_buckets;
 	u8 duplex_reg;
 	u8 jumbo_pm_reg;
 	u8 jumbo_size_reg;
@@ -2205,6 +2206,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.vlans = 16,
 		.enabled_ports = 0x1f,
 		.arl_bins = 2,
+		.arl_buckets = 1024,
 		.cpu_port = B53_CPU_PORT_25,
 		.duplex_reg = B53_DUPLEX_STAT_FE,
 	},
@@ -2214,6 +2216,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.vlans = 256,
 		.enabled_ports = 0x1f,
 		.arl_bins = 2,
+		.arl_buckets = 1024,
 		.cpu_port = B53_CPU_PORT_25,
 		.duplex_reg = B53_DUPLEX_STAT_FE,
 	},
@@ -2223,6 +2226,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.vlans = 4096,
 		.enabled_ports = 0x1f,
 		.arl_bins = 4,
+		.arl_buckets = 1024,
 		.cpu_port = B53_CPU_PORT,
 		.vta_regs = B53_VTA_REGS,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
@@ -2235,6 +2239,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.vlans = 4096,
 		.enabled_ports = 0x1f,
 		.arl_bins = 4,
+		.arl_buckets = 1024,
 		.cpu_port = B53_CPU_PORT,
 		.vta_regs = B53_VTA_REGS,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
@@ -2247,6 +2252,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.vlans = 4096,
 		.enabled_ports = 0x1f,
 		.arl_bins = 4,
+		.arl_buckets = 1024,
 		.cpu_port = B53_CPU_PORT,
 		.vta_regs = B53_VTA_REGS_9798,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
@@ -2259,6 +2265,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.vlans = 4096,
 		.enabled_ports = 0x7f,
 		.arl_bins = 4,
+		.arl_buckets = 1024,
 		.cpu_port = B53_CPU_PORT,
 		.vta_regs = B53_VTA_REGS_9798,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
@@ -2271,6 +2278,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.vlans = 4096,
 		.enabled_ports = 0x1f,
 		.arl_bins = 4,
+		.arl_buckets = 1024,
 		.vta_regs = B53_VTA_REGS,
 		.cpu_port = B53_CPU_PORT,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
@@ -2283,6 +2291,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.vlans = 4096,
 		.enabled_ports = 0xff,
 		.arl_bins = 4,
+		.arl_buckets = 1024,
 		.cpu_port = B53_CPU_PORT,
 		.vta_regs = B53_VTA_REGS,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
@@ -2295,6 +2304,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.vlans = 4096,
 		.enabled_ports = 0x1ff,
 		.arl_bins = 4,
+		.arl_buckets = 1024,
 		.cpu_port = B53_CPU_PORT,
 		.vta_regs = B53_VTA_REGS,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
@@ -2307,6 +2317,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.vlans = 4096,
 		.enabled_ports = 0, /* pdata must provide them */
 		.arl_bins = 4,
+		.arl_buckets = 1024,
 		.cpu_port = B53_CPU_PORT,
 		.vta_regs = B53_VTA_REGS_63XX,
 		.duplex_reg = B53_DUPLEX_STAT_63XX,
@@ -2319,6 +2330,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.vlans = 4096,
 		.enabled_ports = 0x1f,
 		.arl_bins = 4,
+		.arl_buckets = 1024,
 		.cpu_port = B53_CPU_PORT_25, /* TODO: auto detect */
 		.vta_regs = B53_VTA_REGS,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
@@ -2331,6 +2343,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.vlans = 4096,
 		.enabled_ports = 0x1bf,
 		.arl_bins = 4,
+		.arl_buckets = 1024,
 		.cpu_port = B53_CPU_PORT_25, /* TODO: auto detect */
 		.vta_regs = B53_VTA_REGS,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
@@ -2343,6 +2356,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.vlans = 4096,
 		.enabled_ports = 0x1bf,
 		.arl_bins = 4,
+		.arl_buckets = 1024,
 		.cpu_port = B53_CPU_PORT_25, /* TODO: auto detect */
 		.vta_regs = B53_VTA_REGS,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
@@ -2355,6 +2369,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.vlans = 4096,
 		.enabled_ports = 0x1f,
 		.arl_bins = 4,
+		.arl_buckets = 1024,
 		.cpu_port = B53_CPU_PORT_25, /* TODO: auto detect */
 		.vta_regs = B53_VTA_REGS,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
@@ -2367,6 +2382,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.vlans = 4096,
 		.enabled_ports = 0x1f,
 		.arl_bins = 4,
+		.arl_buckets = 1024,
 		.cpu_port = B53_CPU_PORT_25, /* TODO: auto detect */
 		.vta_regs = B53_VTA_REGS,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
@@ -2379,6 +2395,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.vlans	= 4096,
 		.enabled_ports = 0x1ff,
 		.arl_bins = 4,
+		.arl_buckets = 1024,
 		.cpu_port = B53_CPU_PORT,
 		.vta_regs = B53_VTA_REGS,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
@@ -2391,6 +2408,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.vlans = 4096,
 		.enabled_ports = 0x103,
 		.arl_bins = 4,
+		.arl_buckets = 1024,
 		.cpu_port = B53_CPU_PORT,
 		.vta_regs = B53_VTA_REGS,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
@@ -2403,6 +2421,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.vlans	= 4096,
 		.enabled_ports = 0x1ff,
 		.arl_bins = 4,
+		.arl_buckets = 1024,
 		.cpu_port = B53_CPU_PORT,
 		.vta_regs = B53_VTA_REGS,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
@@ -2415,6 +2434,7 @@ static const struct b53_chip_data b53_switch_chips[] = {
 		.vlans = 4096,
 		.enabled_ports = 0x1ff,
 		.arl_bins = 4,
+		.arl_buckets = 256,
 		.cpu_port = B53_CPU_PORT,
 		.vta_regs = B53_VTA_REGS,
 		.duplex_reg = B53_DUPLEX_STAT_GE,
@@ -2443,6 +2463,7 @@ static int b53_switch_init(struct b53_device *dev)
 			dev->cpu_port = chip->cpu_port;
 			dev->num_vlans = chip->vlans;
 			dev->num_arl_bins = chip->arl_bins;
+			dev->num_arl_buckets = chip->arl_buckets;
 			break;
 		}
 	}
diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h
index 1c5c443d571f..694e26cdfd4d 100644
--- a/drivers/net/dsa/b53/b53_priv.h
+++ b/drivers/net/dsa/b53/b53_priv.h
@@ -118,6 +118,7 @@ struct b53_device {
 	u8 jumbo_size_reg;
 	int reset_gpio;
 	u8 num_arl_bins;
+	u16 num_arl_buckets;
 	enum dsa_tag_protocol tag_protocol;
 
 	/* used ports mask */
-- 
cgit v1.2.3-59-g8ed1b


From cd169d799beeb738fa2d3e891960924cdcaf8414 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 30 Apr 2020 11:49:10 -0700
Subject: net: dsa: b53: Bound check ARL searches

ARL searches are done by reading two ARL entries at a time, do not cap
the search at 1024 which would only limit us to half of the possible ARL
capacity, but use b53_max_arl_entries() instead which does the right
multiplication between bins and indexes.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/b53/b53_common.c | 2 +-
 drivers/net/dsa/b53/b53_priv.h   | 5 +++++
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index aa0836ac751c..9550d972f8c5 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -1702,7 +1702,7 @@ int b53_fdb_dump(struct dsa_switch *ds, int port,
 				break;
 		}
 
-	} while (count++ < 1024);
+	} while (count++ < b53_max_arl_entries(priv) / 2);
 
 	return 0;
 }
diff --git a/drivers/net/dsa/b53/b53_priv.h b/drivers/net/dsa/b53/b53_priv.h
index 694e26cdfd4d..e942c60e4365 100644
--- a/drivers/net/dsa/b53/b53_priv.h
+++ b/drivers/net/dsa/b53/b53_priv.h
@@ -213,6 +213,11 @@ static inline int is58xx(struct b53_device *dev)
 #define B53_CPU_PORT_25	5
 #define B53_CPU_PORT	8
 
+static inline unsigned int b53_max_arl_entries(struct b53_device *dev)
+{
+	return dev->num_arl_buckets * dev->num_arl_bins;
+}
+
 struct b53_device *b53_switch_alloc(struct device *base,
 				    const struct b53_io_ops *ops,
 				    void *priv);
-- 
cgit v1.2.3-59-g8ed1b


From ef2a0bd99b1549a3a4253355be247d5dff25d720 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Thu, 30 Apr 2020 11:49:11 -0700
Subject: net: dsa: b53: Remove is_static argument to b53_read_op()

This argument is not used.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/b53/b53_common.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index 9550d972f8c5..ceb8be653182 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -1484,8 +1484,7 @@ static int b53_arl_rw_op(struct b53_device *dev, unsigned int op)
 }
 
 static int b53_arl_read(struct b53_device *dev, u64 mac,
-			u16 vid, struct b53_arl_entry *ent, u8 *idx,
-			bool is_valid)
+			u16 vid, struct b53_arl_entry *ent, u8 *idx)
 {
 	DECLARE_BITMAP(free_bins, B53_ARLTBL_MAX_BIN_ENTRIES);
 	unsigned int i;
@@ -1550,7 +1549,8 @@ static int b53_arl_op(struct b53_device *dev, int op, int port,
 	if (ret)
 		return ret;
 
-	ret = b53_arl_read(dev, mac, vid, &ent, &idx, is_valid);
+	ret = b53_arl_read(dev, mac, vid, &ent, &idx);
+
 	/* If this is a read, just finish now */
 	if (op)
 		return ret;
-- 
cgit v1.2.3-59-g8ed1b


From 47a1494b8208461094923400c396ce4b8163c064 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 30 Apr 2020 22:13:05 +0200
Subject: netlink: remove type-unsafe validation_data pointer

In the netlink policy, we currently have a void *validation_data
that's pointing to different things:
 * a u32 value for bitfield32,
 * the netlink policy for nested/nested array
 * the string for NLA_REJECT

Remove the pointer and place appropriate type-safe items in the
union instead.

While at it, completely dissolve the pointer for the bitfield32
case and just put the value there directly.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netlink.h | 60 +++++++++++++++++++++++++++------------------------
 lib/nlattr.c          | 20 ++++++++---------
 net/sched/act_api.c   | 13 +++--------
 net/sched/sch_red.c   |  9 ++++----
 4 files changed, 49 insertions(+), 53 deletions(-)

diff --git a/include/net/netlink.h b/include/net/netlink.h
index 67c57d6942e3..671b29d170a8 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -217,7 +217,7 @@ enum nla_policy_validation {
  *    NLA_NESTED,
  *    NLA_NESTED_ARRAY     Length verification is done by checking len of
  *                         nested header (or empty); len field is used if
- *                         validation_data is also used, for the max attr
+ *                         nested_policy is also used, for the max attr
  *                         number in the nested policy.
  *    NLA_U8, NLA_U16,
  *    NLA_U32, NLA_U64,
@@ -235,27 +235,25 @@ enum nla_policy_validation {
  *    NLA_MIN_LEN          Minimum length of attribute payload
  *    All other            Minimum length of attribute payload
  *
- * Meaning of `validation_data' field:
+ * Meaning of validation union:
  *    NLA_BITFIELD32       This is a 32-bit bitmap/bitselector attribute and
- *                         validation data must point to a u32 value of valid
- *                         flags
- *    NLA_REJECT           This attribute is always rejected and validation data
+ *                         `bitfield32_valid' is the u32 value of valid flags
+ *    NLA_REJECT           This attribute is always rejected and `reject_message'
  *                         may point to a string to report as the error instead
  *                         of the generic one in extended ACK.
- *    NLA_NESTED           Points to a nested policy to validate, must also set
- *                         `len' to the max attribute number.
+ *    NLA_NESTED           `nested_policy' to a nested policy to validate, must
+ *                         also set `len' to the max attribute number. Use the
+ *                         provided NLA_POLICY_NESTED() macro.
  *                         Note that nla_parse() will validate, but of course not
  *                         parse, the nested sub-policies.
- *    NLA_NESTED_ARRAY     Points to a nested policy to validate, must also set
- *                         `len' to the max attribute number. The difference to
- *                         NLA_NESTED is the structure - NLA_NESTED has the
- *                         nested attributes directly inside, while an array has
- *                         the nested attributes at another level down and the
- *                         attributes directly in the nesting don't matter.
- *    All other            Unused - but note that it's a union
- *
- * Meaning of `min' and `max' fields, use via NLA_POLICY_MIN, NLA_POLICY_MAX
- * and NLA_POLICY_RANGE:
+ *    NLA_NESTED_ARRAY     `nested_policy' points to a nested policy to validate,
+ *                         must also set `len' to the max attribute number. Use
+ *                         the provided NLA_POLICY_NESTED_ARRAY() macro.
+ *                         The difference to NLA_NESTED is the structure:
+ *                         NLA_NESTED has the nested attributes directly inside
+ *                         while an array has the nested attributes at another
+ *                         level down and the attribute types directly in the
+ *                         nesting don't matter.
  *    NLA_U8,
  *    NLA_U16,
  *    NLA_U32,
@@ -263,29 +261,31 @@ enum nla_policy_validation {
  *    NLA_S8,
  *    NLA_S16,
  *    NLA_S32,
- *    NLA_S64              These are used depending on the validation_type
- *                         field, if that is min/max/range then the minimum,
- *                         maximum and both are used (respectively) to check
+ *    NLA_S64              The `min' and `max' fields are used depending on the
+ *                         validation_type field, if that is min/max/range then
+ *                         the min, max or both are used (respectively) to check
  *                         the value of the integer attribute.
  *                         Note that in the interest of code simplicity and
  *                         struct size both limits are s16, so you cannot
  *                         enforce a range that doesn't fall within the range
  *                         of s16 - do that as usual in the code instead.
+ *                         Use the NLA_POLICY_MIN(), NLA_POLICY_MAX() and
+ *                         NLA_POLICY_RANGE() macros.
  *    All other            Unused - but note that it's a union
  *
  * Meaning of `validate' field, use via NLA_POLICY_VALIDATE_FN:
- *    NLA_BINARY           Validation function called for the attribute,
- *                         not compatible with use of the validation_data
- *                         as in NLA_BITFIELD32, NLA_REJECT, NLA_NESTED and
- *                         NLA_NESTED_ARRAY.
+ *    NLA_BINARY           Validation function called for the attribute.
  *    All other            Unused - but note that it's a union
  *
  * Example:
+ *
+ * static const u32 myvalidflags = 0xff231023;
+ *
  * static const struct nla_policy my_policy[ATTR_MAX+1] = {
  * 	[ATTR_FOO] = { .type = NLA_U16 },
  *	[ATTR_BAR] = { .type = NLA_STRING, .len = BARSIZ },
  *	[ATTR_BAZ] = { .type = NLA_EXACT_LEN, .len = sizeof(struct mystruct) },
- *	[ATTR_GOO] = { .type = NLA_BITFIELD32, .validation_data = &myvalidflags },
+ *	[ATTR_GOO] = NLA_POLICY_BITFIELD32(myvalidflags),
  * };
  */
 struct nla_policy {
@@ -293,7 +293,9 @@ struct nla_policy {
 	u8		validation_type;
 	u16		len;
 	union {
-		const void *validation_data;
+		const u32 bitfield32_valid;
+		const char *reject_message;
+		const struct nla_policy *nested_policy;
 		struct {
 			s16 min, max;
 		};
@@ -329,13 +331,15 @@ struct nla_policy {
 #define NLA_POLICY_ETH_ADDR_COMPAT	NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN)
 
 #define _NLA_POLICY_NESTED(maxattr, policy) \
-	{ .type = NLA_NESTED, .validation_data = policy, .len = maxattr }
+	{ .type = NLA_NESTED, .nested_policy = policy, .len = maxattr }
 #define _NLA_POLICY_NESTED_ARRAY(maxattr, policy) \
-	{ .type = NLA_NESTED_ARRAY, .validation_data = policy, .len = maxattr }
+	{ .type = NLA_NESTED_ARRAY, .nested_policy = policy, .len = maxattr }
 #define NLA_POLICY_NESTED(policy) \
 	_NLA_POLICY_NESTED(ARRAY_SIZE(policy) - 1, policy)
 #define NLA_POLICY_NESTED_ARRAY(policy) \
 	_NLA_POLICY_NESTED_ARRAY(ARRAY_SIZE(policy) - 1, policy)
+#define NLA_POLICY_BITFIELD32(valid) \
+	{ .type = NLA_BITFIELD32, .bitfield32_valid = valid }
 
 #define __NLA_ENSURE(condition) BUILD_BUG_ON_ZERO(!(condition))
 #define NLA_ENSURE_INT_TYPE(tp)				\
diff --git a/lib/nlattr.c b/lib/nlattr.c
index cace9b307781..3df05db732ca 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -45,7 +45,7 @@ static const u8 nla_attr_minlen[NLA_TYPE_MAX+1] = {
 };
 
 static int validate_nla_bitfield32(const struct nlattr *nla,
-				   const u32 *valid_flags_mask)
+				   const u32 valid_flags_mask)
 {
 	const struct nla_bitfield32 *bf = nla_data(nla);
 
@@ -53,11 +53,11 @@ static int validate_nla_bitfield32(const struct nlattr *nla,
 		return -EINVAL;
 
 	/*disallow invalid bit selector */
-	if (bf->selector & ~*valid_flags_mask)
+	if (bf->selector & ~valid_flags_mask)
 		return -EINVAL;
 
 	/*disallow invalid bit values */
-	if (bf->value & ~*valid_flags_mask)
+	if (bf->value & ~valid_flags_mask)
 		return -EINVAL;
 
 	/*disallow valid bit values that are not selected*/
@@ -206,9 +206,9 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
 		break;
 
 	case NLA_REJECT:
-		if (extack && pt->validation_data) {
+		if (extack && pt->reject_message) {
 			NL_SET_BAD_ATTR(extack, nla);
-			extack->_msg = pt->validation_data;
+			extack->_msg = pt->reject_message;
 			return -EINVAL;
 		}
 		err = -EINVAL;
@@ -223,7 +223,7 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
 		if (attrlen != sizeof(struct nla_bitfield32))
 			goto out_err;
 
-		err = validate_nla_bitfield32(nla, pt->validation_data);
+		err = validate_nla_bitfield32(nla, pt->bitfield32_valid);
 		if (err)
 			goto out_err;
 		break;
@@ -268,9 +268,9 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
 			break;
 		if (attrlen < NLA_HDRLEN)
 			goto out_err;
-		if (pt->validation_data) {
+		if (pt->nested_policy) {
 			err = __nla_validate(nla_data(nla), nla_len(nla), pt->len,
-					     pt->validation_data, validate,
+					     pt->nested_policy, validate,
 					     extack);
 			if (err < 0) {
 				/*
@@ -289,11 +289,11 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
 			break;
 		if (attrlen < NLA_HDRLEN)
 			goto out_err;
-		if (pt->validation_data) {
+		if (pt->nested_policy) {
 			int err;
 
 			err = nla_validate_array(nla_data(nla), nla_len(nla),
-						 pt->len, pt->validation_data,
+						 pt->len, pt->nested_policy,
 						 extack, validate);
 			if (err < 0) {
 				/*
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index df4560909157..fbbec2e562f5 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -876,19 +876,14 @@ static u8 tcf_action_hw_stats_get(struct nlattr *hw_stats_attr)
 	return hw_stats_bf.value;
 }
 
-static const u32 tca_act_flags_allowed = TCA_ACT_FLAGS_NO_PERCPU_STATS;
-static const u32 tca_act_hw_stats_allowed = TCA_ACT_HW_STATS_ANY;
-
 static const struct nla_policy tcf_action_policy[TCA_ACT_MAX + 1] = {
 	[TCA_ACT_KIND]		= { .type = NLA_STRING },
 	[TCA_ACT_INDEX]		= { .type = NLA_U32 },
 	[TCA_ACT_COOKIE]	= { .type = NLA_BINARY,
 				    .len = TC_COOKIE_MAX_SIZE },
 	[TCA_ACT_OPTIONS]	= { .type = NLA_NESTED },
-	[TCA_ACT_FLAGS]		= { .type = NLA_BITFIELD32,
-				    .validation_data = &tca_act_flags_allowed },
-	[TCA_ACT_HW_STATS]	= { .type = NLA_BITFIELD32,
-				    .validation_data = &tca_act_hw_stats_allowed },
+	[TCA_ACT_FLAGS]		= NLA_POLICY_BITFIELD32(TCA_ACT_FLAGS_NO_PERCPU_STATS),
+	[TCA_ACT_HW_STATS]	= NLA_POLICY_BITFIELD32(TCA_ACT_HW_STATS_ANY),
 };
 
 struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
@@ -1454,10 +1449,8 @@ static int tcf_action_add(struct net *net, struct nlattr *nla,
 	return ret;
 }
 
-static u32 tcaa_root_flags_allowed = TCA_FLAG_LARGE_DUMP_ON;
 static const struct nla_policy tcaa_policy[TCA_ROOT_MAX + 1] = {
-	[TCA_ROOT_FLAGS] = { .type = NLA_BITFIELD32,
-			     .validation_data = &tcaa_root_flags_allowed },
+	[TCA_ROOT_FLAGS] = NLA_POLICY_BITFIELD32(TCA_FLAG_LARGE_DUMP_ON),
 	[TCA_ROOT_TIME_DELTA]      = { .type = NLA_U32 },
 };
 
diff --git a/net/sched/sch_red.c b/net/sched/sch_red.c
index c7de47c942e3..555a1b9e467f 100644
--- a/net/sched/sch_red.c
+++ b/net/sched/sch_red.c
@@ -48,7 +48,7 @@ struct red_sched_data {
 	struct Qdisc		*qdisc;
 };
 
-static const u32 red_supported_flags = TC_RED_HISTORIC_FLAGS | TC_RED_NODROP;
+#define TC_RED_SUPPORTED_FLAGS (TC_RED_HISTORIC_FLAGS | TC_RED_NODROP)
 
 static inline int red_use_ecn(struct red_sched_data *q)
 {
@@ -212,8 +212,7 @@ static const struct nla_policy red_policy[TCA_RED_MAX + 1] = {
 	[TCA_RED_PARMS]	= { .len = sizeof(struct tc_red_qopt) },
 	[TCA_RED_STAB]	= { .len = RED_STAB_SIZE },
 	[TCA_RED_MAX_P] = { .type = NLA_U32 },
-	[TCA_RED_FLAGS] = { .type = NLA_BITFIELD32,
-			    .validation_data = &red_supported_flags },
+	[TCA_RED_FLAGS] = NLA_POLICY_BITFIELD32(TC_RED_SUPPORTED_FLAGS),
 };
 
 static int red_change(struct Qdisc *sch, struct nlattr *opt,
@@ -248,7 +247,7 @@ static int red_change(struct Qdisc *sch, struct nlattr *opt,
 		return -EINVAL;
 
 	err = red_get_flags(ctl->flags, TC_RED_HISTORIC_FLAGS,
-			    tb[TCA_RED_FLAGS], red_supported_flags,
+			    tb[TCA_RED_FLAGS], TC_RED_SUPPORTED_FLAGS,
 			    &flags_bf, &userbits, extack);
 	if (err)
 		return err;
@@ -372,7 +371,7 @@ static int red_dump(struct Qdisc *sch, struct sk_buff *skb)
 	if (nla_put(skb, TCA_RED_PARMS, sizeof(opt), &opt) ||
 	    nla_put_u32(skb, TCA_RED_MAX_P, q->parms.max_P) ||
 	    nla_put_bitfield32(skb, TCA_RED_FLAGS,
-			       q->flags, red_supported_flags))
+			       q->flags, TC_RED_SUPPORTED_FLAGS))
 		goto nla_put_failure;
 	return nla_nest_end(skb, opts);
 
-- 
cgit v1.2.3-59-g8ed1b


From 7690aa1cdf7c4565ad6b013b324c28b685505e24 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 30 Apr 2020 22:13:06 +0200
Subject: netlink: limit recursion depth in policy validation

Now that we have nested policies, we can theoretically
recurse forever parsing attributes if a (sub-)policy
refers back to a higher level one. This is a situation
that has happened in nl80211, and we've avoided it there
by not linking it.

Add some code to netlink parsing to limit recursion depth.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 lib/nlattr.c | 46 ++++++++++++++++++++++++++++++++++------------
 1 file changed, 34 insertions(+), 12 deletions(-)

diff --git a/lib/nlattr.c b/lib/nlattr.c
index 3df05db732ca..7f7ebd89caa4 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -44,6 +44,20 @@ static const u8 nla_attr_minlen[NLA_TYPE_MAX+1] = {
 	[NLA_S64]	= sizeof(s64),
 };
 
+/*
+ * Nested policies might refer back to the original
+ * policy in some cases, and userspace could try to
+ * abuse that and recurse by nesting in the right
+ * ways. Limit recursion to avoid this problem.
+ */
+#define MAX_POLICY_RECURSION_DEPTH	10
+
+static int __nla_validate_parse(const struct nlattr *head, int len, int maxtype,
+				const struct nla_policy *policy,
+				unsigned int validate,
+				struct netlink_ext_ack *extack,
+				struct nlattr **tb, unsigned int depth);
+
 static int validate_nla_bitfield32(const struct nlattr *nla,
 				   const u32 valid_flags_mask)
 {
@@ -70,7 +84,7 @@ static int validate_nla_bitfield32(const struct nlattr *nla,
 static int nla_validate_array(const struct nlattr *head, int len, int maxtype,
 			      const struct nla_policy *policy,
 			      struct netlink_ext_ack *extack,
-			      unsigned int validate)
+			      unsigned int validate, unsigned int depth)
 {
 	const struct nlattr *entry;
 	int rem;
@@ -87,8 +101,9 @@ static int nla_validate_array(const struct nlattr *head, int len, int maxtype,
 			return -ERANGE;
 		}
 
-		ret = __nla_validate(nla_data(entry), nla_len(entry),
-				     maxtype, policy, validate, extack);
+		ret = __nla_validate_parse(nla_data(entry), nla_len(entry),
+					   maxtype, policy, validate, extack,
+					   NULL, depth + 1);
 		if (ret < 0)
 			return ret;
 	}
@@ -156,7 +171,7 @@ static int nla_validate_int_range(const struct nla_policy *pt,
 
 static int validate_nla(const struct nlattr *nla, int maxtype,
 			const struct nla_policy *policy, unsigned int validate,
-			struct netlink_ext_ack *extack)
+			struct netlink_ext_ack *extack, unsigned int depth)
 {
 	u16 strict_start_type = policy[0].strict_start_type;
 	const struct nla_policy *pt;
@@ -269,9 +284,10 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
 		if (attrlen < NLA_HDRLEN)
 			goto out_err;
 		if (pt->nested_policy) {
-			err = __nla_validate(nla_data(nla), nla_len(nla), pt->len,
-					     pt->nested_policy, validate,
-					     extack);
+			err = __nla_validate_parse(nla_data(nla), nla_len(nla),
+						   pt->len, pt->nested_policy,
+						   validate, extack, NULL,
+						   depth + 1);
 			if (err < 0) {
 				/*
 				 * return directly to preserve the inner
@@ -294,7 +310,7 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
 
 			err = nla_validate_array(nla_data(nla), nla_len(nla),
 						 pt->len, pt->nested_policy,
-						 extack, validate);
+						 extack, validate, depth);
 			if (err < 0) {
 				/*
 				 * return directly to preserve the inner
@@ -358,11 +374,17 @@ static int __nla_validate_parse(const struct nlattr *head, int len, int maxtype,
 				const struct nla_policy *policy,
 				unsigned int validate,
 				struct netlink_ext_ack *extack,
-				struct nlattr **tb)
+				struct nlattr **tb, unsigned int depth)
 {
 	const struct nlattr *nla;
 	int rem;
 
+	if (depth >= MAX_POLICY_RECURSION_DEPTH) {
+		NL_SET_ERR_MSG(extack,
+			       "allowed policy recursion depth exceeded");
+		return -EINVAL;
+	}
+
 	if (tb)
 		memset(tb, 0, sizeof(struct nlattr *) * (maxtype + 1));
 
@@ -379,7 +401,7 @@ static int __nla_validate_parse(const struct nlattr *head, int len, int maxtype,
 		}
 		if (policy) {
 			int err = validate_nla(nla, maxtype, policy,
-					       validate, extack);
+					       validate, extack, depth);
 
 			if (err < 0)
 				return err;
@@ -421,7 +443,7 @@ int __nla_validate(const struct nlattr *head, int len, int maxtype,
 		   struct netlink_ext_ack *extack)
 {
 	return __nla_validate_parse(head, len, maxtype, policy, validate,
-				    extack, NULL);
+				    extack, NULL, 0);
 }
 EXPORT_SYMBOL(__nla_validate);
 
@@ -476,7 +498,7 @@ int __nla_parse(struct nlattr **tb, int maxtype,
 		struct netlink_ext_ack *extack)
 {
 	return __nla_validate_parse(head, len, maxtype, policy, validate,
-				    extack, tb);
+				    extack, tb, 0);
 }
 EXPORT_SYMBOL(__nla_parse);
 
-- 
cgit v1.2.3-59-g8ed1b


From d15da2a2e813679aeac8bff3be38d3adc849c1a6 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 30 Apr 2020 22:13:07 +0200
Subject: nl80211: link recursive netlink nested policy

Now that we have limited recursive policy validation to avoid
stack overflows, change nl80211 to actually link the nested
policy (linking back to itself eventually), which allows some
code cleanups.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/wireless/nl80211.c | 10 ++++------
 net/wireless/nl80211.h |  2 --
 net/wireless/pmsr.c    |  3 +--
 3 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 692bcd35f809..57c618b6cb0e 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -253,6 +253,8 @@ static int validate_ie_attr(const struct nlattr *attr,
 }
 
 /* policy for the attributes */
+static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR];
+
 static const struct nla_policy
 nl80211_ftm_responder_policy[NL80211_FTM_RESP_ATTR_MAX + 1] = {
 	[NL80211_FTM_RESP_ATTR_ENABLED] = { .type = NLA_FLAG, },
@@ -296,11 +298,7 @@ nl80211_pmsr_req_attr_policy[NL80211_PMSR_REQ_ATTR_MAX + 1] = {
 static const struct nla_policy
 nl80211_psmr_peer_attr_policy[NL80211_PMSR_PEER_ATTR_MAX + 1] = {
 	[NL80211_PMSR_PEER_ATTR_ADDR] = NLA_POLICY_ETH_ADDR,
-	/*
-	 * we could specify this again to be the top-level policy,
-	 * but that would open us up to recursion problems ...
-	 */
-	[NL80211_PMSR_PEER_ATTR_CHAN] = { .type = NLA_NESTED },
+	[NL80211_PMSR_PEER_ATTR_CHAN] = NLA_POLICY_NESTED(nl80211_policy),
 	[NL80211_PMSR_PEER_ATTR_REQ] =
 		NLA_POLICY_NESTED(nl80211_pmsr_req_attr_policy),
 	[NL80211_PMSR_PEER_ATTR_RESP] = { .type = NLA_REJECT },
@@ -347,7 +345,7 @@ nl80211_tid_config_attr_policy[NL80211_TID_CONFIG_ATTR_MAX + 1] = {
 			NLA_POLICY_MAX(NLA_U8, NL80211_TID_CONFIG_DISABLE),
 };
 
-const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
+static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	[0] = { .strict_start_type = NL80211_ATTR_HE_OBSS_PD },
 	[NL80211_ATTR_WIPHY] = { .type = NLA_U32 },
 	[NL80211_ATTR_WIPHY_NAME] = { .type = NLA_NUL_STRING,
diff --git a/net/wireless/nl80211.h b/net/wireless/nl80211.h
index a41e94a49a89..d3e8e426c486 100644
--- a/net/wireless/nl80211.h
+++ b/net/wireless/nl80211.h
@@ -11,8 +11,6 @@
 int nl80211_init(void);
 void nl80211_exit(void);
 
-extern const struct nla_policy nl80211_policy[NUM_NL80211_ATTR];
-
 void *nl80211hdr_put(struct sk_buff *skb, u32 portid, u32 seq,
 		     int flags, u8 cmd);
 bool nl80211_put_sta_rate(struct sk_buff *msg, struct rate_info *info,
diff --git a/net/wireless/pmsr.c b/net/wireless/pmsr.c
index 63dc8023447f..a95c79d18349 100644
--- a/net/wireless/pmsr.c
+++ b/net/wireless/pmsr.c
@@ -187,10 +187,9 @@ static int pmsr_parse_peer(struct cfg80211_registered_device *rdev,
 
 	/* reuse info->attrs */
 	memset(info->attrs, 0, sizeof(*info->attrs) * (NL80211_ATTR_MAX + 1));
-	/* need to validate here, we don't want to have validation recursion */
 	err = nla_parse_nested_deprecated(info->attrs, NL80211_ATTR_MAX,
 					  tb[NL80211_PMSR_PEER_ATTR_CHAN],
-					  nl80211_policy, info->extack);
+					  NULL, info->extack);
 	if (err)
 		return err;
 
-- 
cgit v1.2.3-59-g8ed1b


From d06a09b94c618c96ced584dd4611a888c8856b8d Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 30 Apr 2020 22:13:08 +0200
Subject: netlink: extend policy range validation

Using a pointer to a struct indicating the min/max values,
extend the ability to do range validation for arbitrary
values. Small values in the s16 range can be kept in the
policy directly.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netlink.h |  45 ++++++++++++++++++++
 lib/nlattr.c          | 113 ++++++++++++++++++++++++++++++++++++++++----------
 2 files changed, 137 insertions(+), 21 deletions(-)

diff --git a/include/net/netlink.h b/include/net/netlink.h
index 671b29d170a8..94a7df4ab122 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -189,11 +189,20 @@ enum {
 
 #define NLA_TYPE_MAX (__NLA_TYPE_MAX - 1)
 
+struct netlink_range_validation {
+	u64 min, max;
+};
+
+struct netlink_range_validation_signed {
+	s64 min, max;
+};
+
 enum nla_policy_validation {
 	NLA_VALIDATE_NONE,
 	NLA_VALIDATE_RANGE,
 	NLA_VALIDATE_MIN,
 	NLA_VALIDATE_MAX,
+	NLA_VALIDATE_RANGE_PTR,
 	NLA_VALIDATE_FUNCTION,
 };
 
@@ -271,6 +280,22 @@ enum nla_policy_validation {
  *                         of s16 - do that as usual in the code instead.
  *                         Use the NLA_POLICY_MIN(), NLA_POLICY_MAX() and
  *                         NLA_POLICY_RANGE() macros.
+ *    NLA_U8,
+ *    NLA_U16,
+ *    NLA_U32,
+ *    NLA_U64              If the validation_type field instead is set to
+ *                         NLA_VALIDATE_RANGE_PTR, `range' must be a pointer
+ *                         to a struct netlink_range_validation that indicates
+ *                         the min/max values.
+ *                         Use NLA_POLICY_FULL_RANGE().
+ *    NLA_S8,
+ *    NLA_S16,
+ *    NLA_S32,
+ *    NLA_S64              If the validation_type field instead is set to
+ *                         NLA_VALIDATE_RANGE_PTR, `range_signed' must be a
+ *                         pointer to a struct netlink_range_validation_signed
+ *                         that indicates the min/max values.
+ *                         Use NLA_POLICY_FULL_RANGE_SIGNED().
  *    All other            Unused - but note that it's a union
  *
  * Meaning of `validate' field, use via NLA_POLICY_VALIDATE_FN:
@@ -296,6 +321,8 @@ struct nla_policy {
 		const u32 bitfield32_valid;
 		const char *reject_message;
 		const struct nla_policy *nested_policy;
+		struct netlink_range_validation *range;
+		struct netlink_range_validation_signed *range_signed;
 		struct {
 			s16 min, max;
 		};
@@ -342,6 +369,12 @@ struct nla_policy {
 	{ .type = NLA_BITFIELD32, .bitfield32_valid = valid }
 
 #define __NLA_ENSURE(condition) BUILD_BUG_ON_ZERO(!(condition))
+#define NLA_ENSURE_UINT_TYPE(tp)			\
+	(__NLA_ENSURE(tp == NLA_U8 || tp == NLA_U16 ||	\
+		      tp == NLA_U32 || tp == NLA_U64) + tp)
+#define NLA_ENSURE_SINT_TYPE(tp)			\
+	(__NLA_ENSURE(tp == NLA_S8 || tp == NLA_S16  ||	\
+		      tp == NLA_S32 || tp == NLA_S64) + tp)
 #define NLA_ENSURE_INT_TYPE(tp)				\
 	(__NLA_ENSURE(tp == NLA_S8 || tp == NLA_U8 ||	\
 		      tp == NLA_S16 || tp == NLA_U16 ||	\
@@ -360,6 +393,18 @@ struct nla_policy {
 	.max = _max					\
 }
 
+#define NLA_POLICY_FULL_RANGE(tp, _range) {		\
+	.type = NLA_ENSURE_UINT_TYPE(tp),		\
+	.validation_type = NLA_VALIDATE_RANGE_PTR,	\
+	.range = _range,				\
+}
+
+#define NLA_POLICY_FULL_RANGE_SIGNED(tp, _range) {	\
+	.type = NLA_ENSURE_SINT_TYPE(tp),		\
+	.validation_type = NLA_VALIDATE_RANGE_PTR,	\
+	.range_signed = _range,				\
+}
+
 #define NLA_POLICY_MIN(tp, _min) {			\
 	.type = NLA_ENSURE_INT_TYPE(tp),		\
 	.validation_type = NLA_VALIDATE_MIN,		\
diff --git a/lib/nlattr.c b/lib/nlattr.c
index 7f7ebd89caa4..a8beb173f558 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -111,17 +111,34 @@ static int nla_validate_array(const struct nlattr *head, int len, int maxtype,
 	return 0;
 }
 
-static int nla_validate_int_range(const struct nla_policy *pt,
-				  const struct nlattr *nla,
-				  struct netlink_ext_ack *extack)
+static int nla_validate_int_range_unsigned(const struct nla_policy *pt,
+					   const struct nlattr *nla,
+					   struct netlink_ext_ack *extack)
 {
-	bool validate_min, validate_max;
-	s64 value;
+	struct netlink_range_validation _range = {
+		.min = 0,
+		.max = U64_MAX,
+	}, *range = &_range;
+	u64 value;
 
-	validate_min = pt->validation_type == NLA_VALIDATE_RANGE ||
-		       pt->validation_type == NLA_VALIDATE_MIN;
-	validate_max = pt->validation_type == NLA_VALIDATE_RANGE ||
-		       pt->validation_type == NLA_VALIDATE_MAX;
+	WARN_ON_ONCE(pt->validation_type != NLA_VALIDATE_RANGE_PTR &&
+		     (pt->min < 0 || pt->max < 0));
+
+	switch (pt->validation_type) {
+	case NLA_VALIDATE_RANGE:
+		range->min = pt->min;
+		range->max = pt->max;
+		break;
+	case NLA_VALIDATE_RANGE_PTR:
+		range = pt->range;
+		break;
+	case NLA_VALIDATE_MIN:
+		range->min = pt->min;
+		break;
+	case NLA_VALIDATE_MAX:
+		range->max = pt->max;
+		break;
+	}
 
 	switch (pt->type) {
 	case NLA_U8:
@@ -133,6 +150,49 @@ static int nla_validate_int_range(const struct nla_policy *pt,
 	case NLA_U32:
 		value = nla_get_u32(nla);
 		break;
+	case NLA_U64:
+		value = nla_get_u64(nla);
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	if (value < range->min || value > range->max) {
+		NL_SET_ERR_MSG_ATTR(extack, nla,
+				    "integer out of range");
+		return -ERANGE;
+	}
+
+	return 0;
+}
+
+static int nla_validate_int_range_signed(const struct nla_policy *pt,
+					 const struct nlattr *nla,
+					 struct netlink_ext_ack *extack)
+{
+	struct netlink_range_validation_signed _range = {
+		.min = S64_MIN,
+		.max = S64_MAX,
+	}, *range = &_range;
+	s64 value;
+
+	switch (pt->validation_type) {
+	case NLA_VALIDATE_RANGE:
+		range->min = pt->min;
+		range->max = pt->max;
+		break;
+	case NLA_VALIDATE_RANGE_PTR:
+		range = pt->range_signed;
+		break;
+	case NLA_VALIDATE_MIN:
+		range->min = pt->min;
+		break;
+	case NLA_VALIDATE_MAX:
+		range->max = pt->max;
+		break;
+	}
+
+	switch (pt->type) {
 	case NLA_S8:
 		value = nla_get_s8(nla);
 		break;
@@ -145,22 +205,11 @@ static int nla_validate_int_range(const struct nla_policy *pt,
 	case NLA_S64:
 		value = nla_get_s64(nla);
 		break;
-	case NLA_U64:
-		/* treat this one specially, since it may not fit into s64 */
-		if ((validate_min && nla_get_u64(nla) < pt->min) ||
-		    (validate_max && nla_get_u64(nla) > pt->max)) {
-			NL_SET_ERR_MSG_ATTR(extack, nla,
-					    "integer out of range");
-			return -ERANGE;
-		}
-		return 0;
 	default:
-		WARN_ON(1);
 		return -EINVAL;
 	}
 
-	if ((validate_min && value < pt->min) ||
-	    (validate_max && value > pt->max)) {
+	if (value < range->min || value > range->max) {
 		NL_SET_ERR_MSG_ATTR(extack, nla,
 				    "integer out of range");
 		return -ERANGE;
@@ -169,6 +218,27 @@ static int nla_validate_int_range(const struct nla_policy *pt,
 	return 0;
 }
 
+static int nla_validate_int_range(const struct nla_policy *pt,
+				  const struct nlattr *nla,
+				  struct netlink_ext_ack *extack)
+{
+	switch (pt->type) {
+	case NLA_U8:
+	case NLA_U16:
+	case NLA_U32:
+	case NLA_U64:
+		return nla_validate_int_range_unsigned(pt, nla, extack);
+	case NLA_S8:
+	case NLA_S16:
+	case NLA_S32:
+	case NLA_S64:
+		return nla_validate_int_range_signed(pt, nla, extack);
+	default:
+		WARN_ON(1);
+		return -EINVAL;
+	}
+}
+
 static int validate_nla(const struct nlattr *nla, int maxtype,
 			const struct nla_policy *policy, unsigned int validate,
 			struct netlink_ext_ack *extack, unsigned int depth)
@@ -348,6 +418,7 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
 	case NLA_VALIDATE_NONE:
 		/* nothing to do */
 		break;
+	case NLA_VALIDATE_RANGE_PTR:
 	case NLA_VALIDATE_RANGE:
 	case NLA_VALIDATE_MIN:
 	case NLA_VALIDATE_MAX:
-- 
cgit v1.2.3-59-g8ed1b


From da4063bdfcfa70ec57a6c25f772ac6378b1584ad Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 30 Apr 2020 22:13:09 +0200
Subject: netlink: allow NLA_MSECS to have range validation

Since NLA_MSECS is really equivalent to NLA_U64, allow
it to have range validation as well.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netlink.h | 6 ++++--
 lib/nlattr.c          | 2 ++
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/include/net/netlink.h b/include/net/netlink.h
index 94a7df4ab122..4acd7165e900 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -371,7 +371,8 @@ struct nla_policy {
 #define __NLA_ENSURE(condition) BUILD_BUG_ON_ZERO(!(condition))
 #define NLA_ENSURE_UINT_TYPE(tp)			\
 	(__NLA_ENSURE(tp == NLA_U8 || tp == NLA_U16 ||	\
-		      tp == NLA_U32 || tp == NLA_U64) + tp)
+		      tp == NLA_U32 || tp == NLA_U64 ||	\
+		      tp == NLA_MSECS) + tp)
 #define NLA_ENSURE_SINT_TYPE(tp)			\
 	(__NLA_ENSURE(tp == NLA_S8 || tp == NLA_S16  ||	\
 		      tp == NLA_S32 || tp == NLA_S64) + tp)
@@ -379,7 +380,8 @@ struct nla_policy {
 	(__NLA_ENSURE(tp == NLA_S8 || tp == NLA_U8 ||	\
 		      tp == NLA_S16 || tp == NLA_U16 ||	\
 		      tp == NLA_S32 || tp == NLA_U32 ||	\
-		      tp == NLA_S64 || tp == NLA_U64) + tp)
+		      tp == NLA_S64 || tp == NLA_U64 ||	\
+		      tp == NLA_MSECS) + tp)
 #define NLA_ENSURE_NO_VALIDATION_PTR(tp)		\
 	(__NLA_ENSURE(tp != NLA_BITFIELD32 &&		\
 		      tp != NLA_REJECT &&		\
diff --git a/lib/nlattr.c b/lib/nlattr.c
index a8beb173f558..21ef3998b9d9 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -151,6 +151,7 @@ static int nla_validate_int_range_unsigned(const struct nla_policy *pt,
 		value = nla_get_u32(nla);
 		break;
 	case NLA_U64:
+	case NLA_MSECS:
 		value = nla_get_u64(nla);
 		break;
 	default:
@@ -227,6 +228,7 @@ static int nla_validate_int_range(const struct nla_policy *pt,
 	case NLA_U16:
 	case NLA_U32:
 	case NLA_U64:
+	case NLA_MSECS:
 		return nla_validate_int_range_unsigned(pt, nla, extack);
 	case NLA_S8:
 	case NLA_S16:
-- 
cgit v1.2.3-59-g8ed1b


From c7721c05a6217491810f406ec28df80a9bcf3546 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 30 Apr 2020 22:13:10 +0200
Subject: netlink: remove NLA_EXACT_LEN_WARN

Use a validation type instead, so we can later expose
the NLA_* values to userspace for policy descriptions.

Some transformations were done with this spatch:

    @@
    identifier p;
    expression X, L, A;
    @@
    struct nla_policy p[X] = {
    [A] =
    -{ .type = NLA_EXACT_LEN_WARN, .len = L },
    +NLA_POLICY_EXACT_LEN_WARN(L),
    ...
    };

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netlink.h  | 15 +++++-----
 lib/nlattr.c           | 16 ++++++----
 net/wireless/nl80211.c | 81 +++++++++++---------------------------------------
 3 files changed, 36 insertions(+), 76 deletions(-)

diff --git a/include/net/netlink.h b/include/net/netlink.h
index 4acd7165e900..4d4a733f1e8d 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -182,7 +182,6 @@ enum {
 	NLA_BITFIELD32,
 	NLA_REJECT,
 	NLA_EXACT_LEN,
-	NLA_EXACT_LEN_WARN,
 	NLA_MIN_LEN,
 	__NLA_TYPE_MAX,
 };
@@ -204,6 +203,7 @@ enum nla_policy_validation {
 	NLA_VALIDATE_MAX,
 	NLA_VALIDATE_RANGE_PTR,
 	NLA_VALIDATE_FUNCTION,
+	NLA_VALIDATE_WARN_TOO_LONG,
 };
 
 /**
@@ -237,10 +237,10 @@ enum nla_policy_validation {
  *                         just like "All other"
  *    NLA_BITFIELD32       Unused
  *    NLA_REJECT           Unused
- *    NLA_EXACT_LEN        Attribute must have exactly this length, otherwise
- *                         it is rejected.
- *    NLA_EXACT_LEN_WARN   Attribute should have exactly this length, a warning
- *                         is logged if it is longer, shorter is rejected.
+ *    NLA_EXACT_LEN        Attribute should have exactly this length, otherwise
+ *                         it is rejected or warned about, the latter happening
+ *                         if and only if the `validation_type' is set to
+ *                         NLA_VALIDATE_WARN_TOO_LONG.
  *    NLA_MIN_LEN          Minimum length of attribute payload
  *    All other            Minimum length of attribute payload
  *
@@ -350,8 +350,9 @@ struct nla_policy {
 };
 
 #define NLA_POLICY_EXACT_LEN(_len)	{ .type = NLA_EXACT_LEN, .len = _len }
-#define NLA_POLICY_EXACT_LEN_WARN(_len)	{ .type = NLA_EXACT_LEN_WARN, \
-					  .len = _len }
+#define NLA_POLICY_EXACT_LEN_WARN(_len) \
+	{ .type = NLA_EXACT_LEN, .len = _len, \
+	  .validation_type = NLA_VALIDATE_WARN_TOO_LONG, }
 #define NLA_POLICY_MIN_LEN(_len)	{ .type = NLA_MIN_LEN, .len = _len }
 
 #define NLA_POLICY_ETH_ADDR		NLA_POLICY_EXACT_LEN(ETH_ALEN)
diff --git a/lib/nlattr.c b/lib/nlattr.c
index 21ef3998b9d9..6dcbe1bedd3b 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -261,7 +261,9 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
 	BUG_ON(pt->type > NLA_TYPE_MAX);
 
 	if ((nla_attr_len[pt->type] && attrlen != nla_attr_len[pt->type]) ||
-	    (pt->type == NLA_EXACT_LEN_WARN && attrlen != pt->len)) {
+	    (pt->type == NLA_EXACT_LEN &&
+	     pt->validation_type == NLA_VALIDATE_WARN_TOO_LONG &&
+	     attrlen != pt->len)) {
 		pr_warn_ratelimited("netlink: '%s': attribute type %d has an invalid length.\n",
 				    current->comm, type);
 		if (validate & NL_VALIDATE_STRICT_ATTRS) {
@@ -287,11 +289,6 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
 	}
 
 	switch (pt->type) {
-	case NLA_EXACT_LEN:
-		if (attrlen != pt->len)
-			goto out_err;
-		break;
-
 	case NLA_REJECT:
 		if (extack && pt->reject_message) {
 			NL_SET_BAD_ATTR(extack, nla);
@@ -405,6 +402,13 @@ static int validate_nla(const struct nlattr *nla, int maxtype,
 			goto out_err;
 		break;
 
+	case NLA_EXACT_LEN:
+		if (pt->validation_type != NLA_VALIDATE_WARN_TOO_LONG) {
+			if (attrlen != pt->len)
+				goto out_err;
+			break;
+		}
+		/* fall through */
 	default:
 		if (pt->len)
 			minlen = pt->len;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 57c618b6cb0e..519414468b5d 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -376,11 +376,8 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	[NL80211_ATTR_IFINDEX] = { .type = NLA_U32 },
 	[NL80211_ATTR_IFNAME] = { .type = NLA_NUL_STRING, .len = IFNAMSIZ-1 },
 
-	[NL80211_ATTR_MAC] = { .type = NLA_EXACT_LEN_WARN, .len = ETH_ALEN },
-	[NL80211_ATTR_PREV_BSSID] = {
-		.type = NLA_EXACT_LEN_WARN,
-		.len = ETH_ALEN
-	},
+	[NL80211_ATTR_MAC] = NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN),
+	[NL80211_ATTR_PREV_BSSID] = NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN),
 
 	[NL80211_ATTR_KEY] = { .type = NLA_NESTED, },
 	[NL80211_ATTR_KEY_DATA] = { .type = NLA_BINARY,
@@ -432,10 +429,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	[NL80211_ATTR_MESH_CONFIG] = { .type = NLA_NESTED },
 	[NL80211_ATTR_SUPPORT_MESH_AUTH] = { .type = NLA_FLAG },
 
-	[NL80211_ATTR_HT_CAPABILITY] = {
-		.type = NLA_EXACT_LEN_WARN,
-		.len = NL80211_HT_CAPABILITY_LEN
-	},
+	[NL80211_ATTR_HT_CAPABILITY] = NLA_POLICY_EXACT_LEN_WARN(NL80211_HT_CAPABILITY_LEN),
 
 	[NL80211_ATTR_MGMT_SUBTYPE] = { .type = NLA_U8 },
 	[NL80211_ATTR_IE] = NLA_POLICY_VALIDATE_FN(NLA_BINARY,
@@ -466,10 +460,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	[NL80211_ATTR_WPA_VERSIONS] = { .type = NLA_U32 },
 	[NL80211_ATTR_PID] = { .type = NLA_U32 },
 	[NL80211_ATTR_4ADDR] = { .type = NLA_U8 },
-	[NL80211_ATTR_PMKID] = {
-		.type = NLA_EXACT_LEN_WARN,
-		.len = WLAN_PMKID_LEN
-	},
+	[NL80211_ATTR_PMKID] = NLA_POLICY_EXACT_LEN_WARN(WLAN_PMKID_LEN),
 	[NL80211_ATTR_DURATION] = { .type = NLA_U32 },
 	[NL80211_ATTR_COOKIE] = { .type = NLA_U64 },
 	[NL80211_ATTR_TX_RATES] = { .type = NLA_NESTED },
@@ -533,10 +524,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	[NL80211_ATTR_WDEV] = { .type = NLA_U64 },
 	[NL80211_ATTR_USER_REG_HINT_TYPE] = { .type = NLA_U32 },
 	[NL80211_ATTR_AUTH_DATA] = { .type = NLA_BINARY, },
-	[NL80211_ATTR_VHT_CAPABILITY] = {
-		.type = NLA_EXACT_LEN_WARN,
-		.len = NL80211_VHT_CAPABILITY_LEN
-	},
+	[NL80211_ATTR_VHT_CAPABILITY] = NLA_POLICY_EXACT_LEN_WARN(NL80211_VHT_CAPABILITY_LEN),
 	[NL80211_ATTR_SCAN_FLAGS] = { .type = NLA_U32 },
 	[NL80211_ATTR_P2P_CTWINDOW] = NLA_POLICY_MAX(NLA_U8, 127),
 	[NL80211_ATTR_P2P_OPPPS] = NLA_POLICY_MAX(NLA_U8, 1),
@@ -574,10 +562,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	[NL80211_ATTR_VENDOR_DATA] = { .type = NLA_BINARY },
 	[NL80211_ATTR_QOS_MAP] = { .type = NLA_BINARY,
 				   .len = IEEE80211_QOS_MAP_LEN_MAX },
-	[NL80211_ATTR_MAC_HINT] = {
-		.type = NLA_EXACT_LEN_WARN,
-		.len = ETH_ALEN
-	},
+	[NL80211_ATTR_MAC_HINT] = NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN),
 	[NL80211_ATTR_WIPHY_FREQ_HINT] = { .type = NLA_U32 },
 	[NL80211_ATTR_TDLS_PEER_CAPABILITY] = { .type = NLA_U32 },
 	[NL80211_ATTR_SOCKET_OWNER] = { .type = NLA_FLAG },
@@ -589,10 +574,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	[NL80211_ATTR_ADMITTED_TIME] = { .type = NLA_U16 },
 	[NL80211_ATTR_SMPS_MODE] = { .type = NLA_U8 },
 	[NL80211_ATTR_OPER_CLASS] = { .type = NLA_U8 },
-	[NL80211_ATTR_MAC_MASK] = {
-		.type = NLA_EXACT_LEN_WARN,
-		.len = ETH_ALEN
-	},
+	[NL80211_ATTR_MAC_MASK] = NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN),
 	[NL80211_ATTR_WIPHY_SELF_MANAGED_REG] = { .type = NLA_FLAG },
 	[NL80211_ATTR_NETNS_FD] = { .type = NLA_U32 },
 	[NL80211_ATTR_SCHED_SCAN_DELAY] = { .type = NLA_U32 },
@@ -604,21 +586,15 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	[NL80211_ATTR_MU_MIMO_GROUP_DATA] = {
 		.len = VHT_MUMIMO_GROUPS_DATA_LEN
 	},
-	[NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR] = {
-		.type = NLA_EXACT_LEN_WARN,
-		.len = ETH_ALEN
-	},
+	[NL80211_ATTR_MU_MIMO_FOLLOW_MAC_ADDR] = NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN),
 	[NL80211_ATTR_NAN_MASTER_PREF] = NLA_POLICY_MIN(NLA_U8, 1),
 	[NL80211_ATTR_BANDS] = { .type = NLA_U32 },
 	[NL80211_ATTR_NAN_FUNC] = { .type = NLA_NESTED },
 	[NL80211_ATTR_FILS_KEK] = { .type = NLA_BINARY,
 				    .len = FILS_MAX_KEK_LEN },
-	[NL80211_ATTR_FILS_NONCES] = {
-		.type = NLA_EXACT_LEN_WARN,
-		.len = 2 * FILS_NONCE_LEN
-	},
+	[NL80211_ATTR_FILS_NONCES] = NLA_POLICY_EXACT_LEN_WARN(2 * FILS_NONCE_LEN),
 	[NL80211_ATTR_MULTICAST_TO_UNICAST_ENABLED] = { .type = NLA_FLAG, },
-	[NL80211_ATTR_BSSID] = { .type = NLA_EXACT_LEN_WARN, .len = ETH_ALEN },
+	[NL80211_ATTR_BSSID] = NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN),
 	[NL80211_ATTR_SCHED_SCAN_RELATIVE_RSSI] = { .type = NLA_S8 },
 	[NL80211_ATTR_SCHED_SCAN_RSSI_ADJUST] = {
 		.len = sizeof(struct nl80211_bss_select_rssi_adjust)
@@ -631,7 +607,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	[NL80211_ATTR_FILS_ERP_NEXT_SEQ_NUM] = { .type = NLA_U16 },
 	[NL80211_ATTR_FILS_ERP_RRK] = { .type = NLA_BINARY,
 					.len = FILS_ERP_MAX_RRK_LEN },
-	[NL80211_ATTR_FILS_CACHE_ID] = { .type = NLA_EXACT_LEN_WARN, .len = 2 },
+	[NL80211_ATTR_FILS_CACHE_ID] = NLA_POLICY_EXACT_LEN_WARN(2),
 	[NL80211_ATTR_PMK] = { .type = NLA_BINARY, .len = PMK_MAX_LEN },
 	[NL80211_ATTR_SCHED_SCAN_MULTI] = { .type = NLA_FLAG },
 	[NL80211_ATTR_EXTERNAL_AUTH_SUPPORT] = { .type = NLA_FLAG },
@@ -701,10 +677,7 @@ static const struct nla_policy
 nl80211_wowlan_tcp_policy[NUM_NL80211_WOWLAN_TCP] = {
 	[NL80211_WOWLAN_TCP_SRC_IPV4] = { .type = NLA_U32 },
 	[NL80211_WOWLAN_TCP_DST_IPV4] = { .type = NLA_U32 },
-	[NL80211_WOWLAN_TCP_DST_MAC] = {
-		.type = NLA_EXACT_LEN_WARN,
-		.len = ETH_ALEN
-	},
+	[NL80211_WOWLAN_TCP_DST_MAC] = NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN),
 	[NL80211_WOWLAN_TCP_SRC_PORT] = { .type = NLA_U16 },
 	[NL80211_WOWLAN_TCP_DST_PORT] = { .type = NLA_U16 },
 	[NL80211_WOWLAN_TCP_DATA_PAYLOAD] = { .type = NLA_MIN_LEN, .len = 1 },
@@ -734,18 +707,9 @@ nl80211_coalesce_policy[NUM_NL80211_ATTR_COALESCE_RULE] = {
 /* policy for GTK rekey offload attributes */
 static const struct nla_policy
 nl80211_rekey_policy[NUM_NL80211_REKEY_DATA] = {
-	[NL80211_REKEY_DATA_KEK] = {
-		.type = NLA_EXACT_LEN_WARN,
-		.len = NL80211_KEK_LEN,
-	},
-	[NL80211_REKEY_DATA_KCK] = {
-		.type = NLA_EXACT_LEN_WARN,
-		.len = NL80211_KCK_LEN,
-	},
-	[NL80211_REKEY_DATA_REPLAY_CTR] = {
-		.type = NLA_EXACT_LEN_WARN,
-		.len = NL80211_REPLAY_CTR_LEN
-	},
+	[NL80211_REKEY_DATA_KEK] = NLA_POLICY_EXACT_LEN_WARN(NL80211_KEK_LEN),
+	[NL80211_REKEY_DATA_KCK] = NLA_POLICY_EXACT_LEN_WARN(NL80211_KCK_LEN),
+	[NL80211_REKEY_DATA_REPLAY_CTR] = NLA_POLICY_EXACT_LEN_WARN(NL80211_REPLAY_CTR_LEN),
 };
 
 static const struct nla_policy
@@ -760,10 +724,7 @@ static const struct nla_policy
 nl80211_match_policy[NL80211_SCHED_SCAN_MATCH_ATTR_MAX + 1] = {
 	[NL80211_SCHED_SCAN_MATCH_ATTR_SSID] = { .type = NLA_BINARY,
 						 .len = IEEE80211_MAX_SSID_LEN },
-	[NL80211_SCHED_SCAN_MATCH_ATTR_BSSID] = {
-		.type = NLA_EXACT_LEN_WARN,
-		.len = ETH_ALEN
-	},
+	[NL80211_SCHED_SCAN_MATCH_ATTR_BSSID] = NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN),
 	[NL80211_SCHED_SCAN_MATCH_ATTR_RSSI] = { .type = NLA_U32 },
 	[NL80211_SCHED_SCAN_MATCH_PER_BAND_RSSI] =
 		NLA_POLICY_NESTED(nl80211_match_band_rssi_policy),
@@ -795,10 +756,7 @@ nl80211_nan_func_policy[NL80211_NAN_FUNC_ATTR_MAX + 1] = {
 	[NL80211_NAN_FUNC_SUBSCRIBE_ACTIVE] = { .type = NLA_FLAG },
 	[NL80211_NAN_FUNC_FOLLOW_UP_ID] = { .type = NLA_U8 },
 	[NL80211_NAN_FUNC_FOLLOW_UP_REQ_ID] = { .type = NLA_U8 },
-	[NL80211_NAN_FUNC_FOLLOW_UP_DEST] = {
-		.type = NLA_EXACT_LEN_WARN,
-		.len = ETH_ALEN
-	},
+	[NL80211_NAN_FUNC_FOLLOW_UP_DEST] = NLA_POLICY_EXACT_LEN_WARN(ETH_ALEN),
 	[NL80211_NAN_FUNC_CLOSE_RANGE] = { .type = NLA_FLAG },
 	[NL80211_NAN_FUNC_TTL] = { .type = NLA_U32 },
 	[NL80211_NAN_FUNC_SERVICE_INFO] = { .type = NLA_BINARY,
@@ -4404,10 +4362,7 @@ static const struct nla_policy nl80211_txattr_policy[NL80211_TXRATE_MAX + 1] = {
 				    .len = NL80211_MAX_SUPP_RATES },
 	[NL80211_TXRATE_HT] = { .type = NLA_BINARY,
 				.len = NL80211_MAX_SUPP_HT_RATES },
-	[NL80211_TXRATE_VHT] = {
-		.type = NLA_EXACT_LEN_WARN,
-		.len = sizeof(struct nl80211_txrate_vht),
-	},
+	[NL80211_TXRATE_VHT] = NLA_POLICY_EXACT_LEN_WARN(sizeof(struct nl80211_txrate_vht)),
 	[NL80211_TXRATE_GI] = { .type = NLA_U8 },
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From 2c28ae48f24d84fcda31fb8acaf2edca6ec46c49 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 30 Apr 2020 22:13:11 +0200
Subject: netlink: factor out policy range helpers

Add helpers to get the policy's signed/unsigned range
validation data.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netlink.h |  5 +++
 lib/nlattr.c          | 95 +++++++++++++++++++++++++++++++++++++++------------
 2 files changed, 79 insertions(+), 21 deletions(-)

diff --git a/include/net/netlink.h b/include/net/netlink.h
index 4d4a733f1e8d..557b67f1db99 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -1928,4 +1928,9 @@ static inline bool nla_is_last(const struct nlattr *nla, int rem)
 	return nla->nla_len == rem;
 }
 
+void nla_get_range_unsigned(const struct nla_policy *pt,
+			    struct netlink_range_validation *range);
+void nla_get_range_signed(const struct nla_policy *pt,
+			  struct netlink_range_validation_signed *range);
+
 #endif
diff --git a/lib/nlattr.c b/lib/nlattr.c
index 6dcbe1bedd3b..bc5b5cf608c4 100644
--- a/lib/nlattr.c
+++ b/lib/nlattr.c
@@ -111,26 +111,40 @@ static int nla_validate_array(const struct nlattr *head, int len, int maxtype,
 	return 0;
 }
 
-static int nla_validate_int_range_unsigned(const struct nla_policy *pt,
-					   const struct nlattr *nla,
-					   struct netlink_ext_ack *extack)
+void nla_get_range_unsigned(const struct nla_policy *pt,
+			    struct netlink_range_validation *range)
 {
-	struct netlink_range_validation _range = {
-		.min = 0,
-		.max = U64_MAX,
-	}, *range = &_range;
-	u64 value;
-
 	WARN_ON_ONCE(pt->validation_type != NLA_VALIDATE_RANGE_PTR &&
 		     (pt->min < 0 || pt->max < 0));
 
+	range->min = 0;
+
+	switch (pt->type) {
+	case NLA_U8:
+		range->max = U8_MAX;
+		break;
+	case NLA_U16:
+		range->max = U16_MAX;
+		break;
+	case NLA_U32:
+		range->max = U32_MAX;
+		break;
+	case NLA_U64:
+	case NLA_MSECS:
+		range->max = U64_MAX;
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		return;
+	}
+
 	switch (pt->validation_type) {
 	case NLA_VALIDATE_RANGE:
 		range->min = pt->min;
 		range->max = pt->max;
 		break;
 	case NLA_VALIDATE_RANGE_PTR:
-		range = pt->range;
+		*range = *pt->range;
 		break;
 	case NLA_VALIDATE_MIN:
 		range->min = pt->min;
@@ -138,7 +152,17 @@ static int nla_validate_int_range_unsigned(const struct nla_policy *pt,
 	case NLA_VALIDATE_MAX:
 		range->max = pt->max;
 		break;
+	default:
+		break;
 	}
+}
+
+static int nla_validate_int_range_unsigned(const struct nla_policy *pt,
+					   const struct nlattr *nla,
+					   struct netlink_ext_ack *extack)
+{
+	struct netlink_range_validation range;
+	u64 value;
 
 	switch (pt->type) {
 	case NLA_U8:
@@ -158,7 +182,9 @@ static int nla_validate_int_range_unsigned(const struct nla_policy *pt,
 		return -EINVAL;
 	}
 
-	if (value < range->min || value > range->max) {
+	nla_get_range_unsigned(pt, &range);
+
+	if (value < range.min || value > range.max) {
 		NL_SET_ERR_MSG_ATTR(extack, nla,
 				    "integer out of range");
 		return -ERANGE;
@@ -167,15 +193,30 @@ static int nla_validate_int_range_unsigned(const struct nla_policy *pt,
 	return 0;
 }
 
-static int nla_validate_int_range_signed(const struct nla_policy *pt,
-					 const struct nlattr *nla,
-					 struct netlink_ext_ack *extack)
+void nla_get_range_signed(const struct nla_policy *pt,
+			  struct netlink_range_validation_signed *range)
 {
-	struct netlink_range_validation_signed _range = {
-		.min = S64_MIN,
-		.max = S64_MAX,
-	}, *range = &_range;
-	s64 value;
+	switch (pt->type) {
+	case NLA_S8:
+		range->min = S8_MIN;
+		range->max = S8_MAX;
+		break;
+	case NLA_S16:
+		range->min = S16_MIN;
+		range->max = S16_MAX;
+		break;
+	case NLA_S32:
+		range->min = S32_MIN;
+		range->max = S32_MAX;
+		break;
+	case NLA_S64:
+		range->min = S64_MIN;
+		range->max = S64_MAX;
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		return;
+	}
 
 	switch (pt->validation_type) {
 	case NLA_VALIDATE_RANGE:
@@ -183,7 +224,7 @@ static int nla_validate_int_range_signed(const struct nla_policy *pt,
 		range->max = pt->max;
 		break;
 	case NLA_VALIDATE_RANGE_PTR:
-		range = pt->range_signed;
+		*range = *pt->range_signed;
 		break;
 	case NLA_VALIDATE_MIN:
 		range->min = pt->min;
@@ -191,7 +232,17 @@ static int nla_validate_int_range_signed(const struct nla_policy *pt,
 	case NLA_VALIDATE_MAX:
 		range->max = pt->max;
 		break;
+	default:
+		break;
 	}
+}
+
+static int nla_validate_int_range_signed(const struct nla_policy *pt,
+					 const struct nlattr *nla,
+					 struct netlink_ext_ack *extack)
+{
+	struct netlink_range_validation_signed range;
+	s64 value;
 
 	switch (pt->type) {
 	case NLA_S8:
@@ -210,7 +261,9 @@ static int nla_validate_int_range_signed(const struct nla_policy *pt,
 		return -EINVAL;
 	}
 
-	if (value < range->min || value > range->max) {
+	nla_get_range_signed(pt, &range);
+
+	if (value < range.min || value > range.max) {
 		NL_SET_ERR_MSG_ATTR(extack, nla,
 				    "integer out of range");
 		return -ERANGE;
-- 
cgit v1.2.3-59-g8ed1b


From d07dcf9aadd6b2842b439e8668ff7ea2873f28d7 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 30 Apr 2020 22:13:12 +0200
Subject: netlink: add infrastructure to expose policies to userspace

Add, and use in generic netlink, helpers to dump out a netlink
policy to userspace, including all the range validation data,
nested policies etc.

This lets userspace discover what the kernel understands.

For families/commands other than generic netlink, the helpers
need to be used directly in an appropriate command, or we can
add some infrastructure (a new netlink family) that those can
register their policies with for introspection. I'm not that
familiar with non-generic netlink, so that's left out for now.

The data exposed to userspace also includes min and max length
for binary/string data, I've done that instead of letting the
userspace tools figure out whether min/max is intended based
on the type so that we can extend this later in the kernel, we
might want to just use the range data for example.

Because of this, I opted to not directly expose the NLA_*
values, even if some of them are already exposed via BPF, as
with min/max length we don't need to have different types here
for NLA_BINARY/NLA_MIN_LEN/NLA_EXACT_LEN, we just make them
all NL_ATTR_TYPE_BINARY with min/max length optionally set.

Similarly, we don't really need NLA_MSECS, and perhaps can
remove it in the future - but not if we encode it into the
userspace API now. It gets mapped to NL_ATTR_TYPE_U64 here.

Note that the exposing here corresponds to the strict policy
interpretation, and NLA_UNSPEC items are omitted entirely.
To get those, change them to NLA_MIN_LEN which behaves in
exactly the same way, but is exposed.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netlink.h          |   6 +
 include/uapi/linux/genetlink.h |   2 +
 include/uapi/linux/netlink.h   | 103 ++++++++++++++
 net/netlink/Makefile           |   2 +-
 net/netlink/genetlink.c        |  78 +++++++++++
 net/netlink/policy.c           | 308 +++++++++++++++++++++++++++++++++++++++++
 6 files changed, 498 insertions(+), 1 deletion(-)
 create mode 100644 net/netlink/policy.c

diff --git a/include/net/netlink.h b/include/net/netlink.h
index 557b67f1db99..c0411f14fb53 100644
--- a/include/net/netlink.h
+++ b/include/net/netlink.h
@@ -1933,4 +1933,10 @@ void nla_get_range_unsigned(const struct nla_policy *pt,
 void nla_get_range_signed(const struct nla_policy *pt,
 			  struct netlink_range_validation_signed *range);
 
+int netlink_policy_dump_start(const struct nla_policy *policy,
+			      unsigned int maxtype,
+			      unsigned long *state);
+bool netlink_policy_dump_loop(unsigned long *state);
+int netlink_policy_dump_write(struct sk_buff *skb, unsigned long state);
+
 #endif
diff --git a/include/uapi/linux/genetlink.h b/include/uapi/linux/genetlink.h
index 877f7fa95466..9c0636ec2286 100644
--- a/include/uapi/linux/genetlink.h
+++ b/include/uapi/linux/genetlink.h
@@ -48,6 +48,7 @@ enum {
 	CTRL_CMD_NEWMCAST_GRP,
 	CTRL_CMD_DELMCAST_GRP,
 	CTRL_CMD_GETMCAST_GRP, /* unused */
+	CTRL_CMD_GETPOLICY,
 	__CTRL_CMD_MAX,
 };
 
@@ -62,6 +63,7 @@ enum {
 	CTRL_ATTR_MAXATTR,
 	CTRL_ATTR_OPS,
 	CTRL_ATTR_MCAST_GROUPS,
+	CTRL_ATTR_POLICY,
 	__CTRL_ATTR_MAX,
 };
 
diff --git a/include/uapi/linux/netlink.h b/include/uapi/linux/netlink.h
index 0a4d73317759..eac8a6a648ea 100644
--- a/include/uapi/linux/netlink.h
+++ b/include/uapi/linux/netlink.h
@@ -249,4 +249,107 @@ struct nla_bitfield32 {
 	__u32 selector;
 };
 
+/*
+ * policy descriptions - it's specific to each family how this is used
+ * Normally, it should be retrieved via a dump inside another attribute
+ * specifying where it applies.
+ */
+
+/**
+ * enum netlink_attribute_type - type of an attribute
+ * @NL_ATTR_TYPE_INVALID: unused
+ * @NL_ATTR_TYPE_FLAG: flag attribute (present/not present)
+ * @NL_ATTR_TYPE_U8: 8-bit unsigned attribute
+ * @NL_ATTR_TYPE_U16: 16-bit unsigned attribute
+ * @NL_ATTR_TYPE_U32: 32-bit unsigned attribute
+ * @NL_ATTR_TYPE_U64: 64-bit unsigned attribute
+ * @NL_ATTR_TYPE_S8: 8-bit signed attribute
+ * @NL_ATTR_TYPE_S16: 16-bit signed attribute
+ * @NL_ATTR_TYPE_S32: 32-bit signed attribute
+ * @NL_ATTR_TYPE_S64: 64-bit signed attribute
+ * @NL_ATTR_TYPE_BINARY: binary data, min/max length may be specified
+ * @NL_ATTR_TYPE_STRING: string, min/max length may be specified
+ * @NL_ATTR_TYPE_NUL_STRING: NUL-terminated string,
+ *	min/max length may be specified
+ * @NL_ATTR_TYPE_NESTED: nested, i.e. the content of this attribute
+ *	consists of sub-attributes. The nested policy and maxtype
+ *	inside may be specified.
+ * @NL_ATTR_TYPE_NESTED_ARRAY: nested array, i.e. the content of this
+ *	attribute contains sub-attributes whose type is irrelevant
+ *	(just used to separate the array entries) and each such array
+ *	entry has attributes again, the policy for those inner ones
+ *	and the corresponding maxtype may be specified.
+ * @NL_ATTR_TYPE_BITFIELD32: &struct nla_bitfield32 attribute
+ */
+enum netlink_attribute_type {
+	NL_ATTR_TYPE_INVALID,
+
+	NL_ATTR_TYPE_FLAG,
+
+	NL_ATTR_TYPE_U8,
+	NL_ATTR_TYPE_U16,
+	NL_ATTR_TYPE_U32,
+	NL_ATTR_TYPE_U64,
+
+	NL_ATTR_TYPE_S8,
+	NL_ATTR_TYPE_S16,
+	NL_ATTR_TYPE_S32,
+	NL_ATTR_TYPE_S64,
+
+	NL_ATTR_TYPE_BINARY,
+	NL_ATTR_TYPE_STRING,
+	NL_ATTR_TYPE_NUL_STRING,
+
+	NL_ATTR_TYPE_NESTED,
+	NL_ATTR_TYPE_NESTED_ARRAY,
+
+	NL_ATTR_TYPE_BITFIELD32,
+};
+
+/**
+ * enum netlink_policy_type_attr - policy type attributes
+ * @NL_POLICY_TYPE_ATTR_UNSPEC: unused
+ * @NL_POLICY_TYPE_ATTR_TYPE: type of the attribute,
+ *	&enum netlink_attribute_type (U32)
+ * @NL_POLICY_TYPE_ATTR_MIN_VALUE_S: minimum value for signed
+ *	integers (S64)
+ * @NL_POLICY_TYPE_ATTR_MAX_VALUE_S: maximum value for signed
+ *	integers (S64)
+ * @NL_POLICY_TYPE_ATTR_MIN_VALUE_U: minimum value for unsigned
+ *	integers (U64)
+ * @NL_POLICY_TYPE_ATTR_MAX_VALUE_U: maximum value for unsigned
+ *	integers (U64)
+ * @NL_POLICY_TYPE_ATTR_MIN_LENGTH: minimum length for binary
+ *	attributes, no minimum if not given (U32)
+ * @NL_POLICY_TYPE_ATTR_MAX_LENGTH: maximum length for binary
+ *	attributes, no maximum if not given (U32)
+ * @NL_POLICY_TYPE_ATTR_POLICY_IDX: sub policy for nested and
+ *	nested array types (U32)
+ * @NL_POLICY_TYPE_ATTR_POLICY_MAXTYPE: maximum sub policy
+ *	attribute for nested and nested array types, this can
+ *	in theory be < the size of the policy pointed to by
+ *	the index, if limited inside the nesting (U32)
+ * @NL_POLICY_TYPE_ATTR_BITFIELD32_MASK: valid mask for the
+ *	bitfield32 type (U32)
+ * @NL_POLICY_TYPE_ATTR_PAD: pad attribute for 64-bit alignment
+ */
+enum netlink_policy_type_attr {
+	NL_POLICY_TYPE_ATTR_UNSPEC,
+	NL_POLICY_TYPE_ATTR_TYPE,
+	NL_POLICY_TYPE_ATTR_MIN_VALUE_S,
+	NL_POLICY_TYPE_ATTR_MAX_VALUE_S,
+	NL_POLICY_TYPE_ATTR_MIN_VALUE_U,
+	NL_POLICY_TYPE_ATTR_MAX_VALUE_U,
+	NL_POLICY_TYPE_ATTR_MIN_LENGTH,
+	NL_POLICY_TYPE_ATTR_MAX_LENGTH,
+	NL_POLICY_TYPE_ATTR_POLICY_IDX,
+	NL_POLICY_TYPE_ATTR_POLICY_MAXTYPE,
+	NL_POLICY_TYPE_ATTR_BITFIELD32_MASK,
+	NL_POLICY_TYPE_ATTR_PAD,
+
+	/* keep last */
+	__NL_POLICY_TYPE_ATTR_MAX,
+	NL_POLICY_TYPE_ATTR_MAX = __NL_POLICY_TYPE_ATTR_MAX - 1
+};
+
 #endif /* _UAPI__LINUX_NETLINK_H */
diff --git a/net/netlink/Makefile b/net/netlink/Makefile
index de42df7f0068..e05202708c90 100644
--- a/net/netlink/Makefile
+++ b/net/netlink/Makefile
@@ -3,7 +3,7 @@
 # Makefile for the netlink driver.
 #
 
-obj-y  				:= af_netlink.o genetlink.o
+obj-y  				:= af_netlink.o genetlink.o policy.o
 
 obj-$(CONFIG_NETLINK_DIAG)	+= netlink_diag.o
 netlink_diag-y			:= diag.o
diff --git a/net/netlink/genetlink.c b/net/netlink/genetlink.c
index 9f357aa22b94..2f049692e012 100644
--- a/net/netlink/genetlink.c
+++ b/net/netlink/genetlink.c
@@ -1043,6 +1043,80 @@ static int genl_ctrl_event(int event, const struct genl_family *family,
 	return 0;
 }
 
+static int ctrl_dumppolicy(struct sk_buff *skb, struct netlink_callback *cb)
+{
+	const struct genl_family *rt;
+	unsigned int fam_id = cb->args[0];
+	int err;
+
+	if (!fam_id) {
+		struct nlattr *tb[CTRL_ATTR_MAX + 1];
+
+		err = genlmsg_parse(cb->nlh, &genl_ctrl, tb,
+				    genl_ctrl.maxattr,
+				    genl_ctrl.policy, cb->extack);
+		if (err)
+			return err;
+
+		if (!tb[CTRL_ATTR_FAMILY_ID] && !tb[CTRL_ATTR_FAMILY_NAME])
+			return -EINVAL;
+
+		if (tb[CTRL_ATTR_FAMILY_ID]) {
+			fam_id = nla_get_u16(tb[CTRL_ATTR_FAMILY_ID]);
+		} else {
+			rt = genl_family_find_byname(
+				nla_data(tb[CTRL_ATTR_FAMILY_NAME]));
+			if (!rt)
+				return -ENOENT;
+			fam_id = rt->id;
+		}
+	}
+
+	rt = genl_family_find_byid(fam_id);
+	if (!rt)
+		return -ENOENT;
+
+	if (!rt->policy)
+		return -ENODATA;
+
+	err = netlink_policy_dump_start(rt->policy, rt->maxattr, &cb->args[1]);
+	if (err)
+		return err;
+
+	while (netlink_policy_dump_loop(&cb->args[1])) {
+		void *hdr;
+		struct nlattr *nest;
+
+		hdr = genlmsg_put(skb, NETLINK_CB(cb->skb).portid,
+				  cb->nlh->nlmsg_seq, &genl_ctrl,
+				  NLM_F_MULTI, CTRL_CMD_GETPOLICY);
+		if (!hdr)
+			goto nla_put_failure;
+
+		if (nla_put_u16(skb, CTRL_ATTR_FAMILY_ID, rt->id))
+			goto nla_put_failure;
+
+		nest = nla_nest_start(skb, CTRL_ATTR_POLICY);
+		if (!nest)
+			goto nla_put_failure;
+
+		if (netlink_policy_dump_write(skb, cb->args[1]))
+			goto nla_put_failure;
+
+		nla_nest_end(skb, nest);
+
+		genlmsg_end(skb, hdr);
+		continue;
+
+nla_put_failure:
+		genlmsg_cancel(skb, hdr);
+		break;
+	}
+
+	cb->args[0] = fam_id;
+	return skb->len;
+}
+
 static const struct genl_ops genl_ctrl_ops[] = {
 	{
 		.cmd		= CTRL_CMD_GETFAMILY,
@@ -1050,6 +1124,10 @@ static const struct genl_ops genl_ctrl_ops[] = {
 		.doit		= ctrl_getfamily,
 		.dumpit		= ctrl_dumpfamily,
 	},
+	{
+		.cmd		= CTRL_CMD_GETPOLICY,
+		.dumpit		= ctrl_dumppolicy,
+	},
 };
 
 static const struct genl_multicast_group genl_ctrl_groups[] = {
diff --git a/net/netlink/policy.c b/net/netlink/policy.c
new file mode 100644
index 000000000000..f6491853c797
--- /dev/null
+++ b/net/netlink/policy.c
@@ -0,0 +1,308 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * NETLINK      Policy advertisement to userspace
+ *
+ * 		Authors:	Johannes Berg <johannes@sipsolutions.net>
+ *
+ * Copyright 2019 Intel Corporation
+ */
+
+#include <linux/kernel.h>
+#include <linux/errno.h>
+#include <linux/types.h>
+#include <net/netlink.h>
+
+#define INITIAL_POLICIES_ALLOC	10
+
+struct nl_policy_dump {
+	unsigned int policy_idx;
+	unsigned int attr_idx;
+	unsigned int n_alloc;
+	struct {
+		const struct nla_policy *policy;
+		unsigned int maxtype;
+	} policies[];
+};
+
+static int add_policy(struct nl_policy_dump **statep,
+		      const struct nla_policy *policy,
+		      unsigned int maxtype)
+{
+	struct nl_policy_dump *state = *statep;
+	unsigned int n_alloc, i;
+
+	if (!policy || !maxtype)
+		return 0;
+
+	for (i = 0; i < state->n_alloc; i++) {
+		if (state->policies[i].policy == policy)
+			return 0;
+
+		if (!state->policies[i].policy) {
+			state->policies[i].policy = policy;
+			state->policies[i].maxtype = maxtype;
+			return 0;
+		}
+	}
+
+	n_alloc = state->n_alloc + INITIAL_POLICIES_ALLOC;
+	state = krealloc(state, struct_size(state, policies, n_alloc),
+			 GFP_KERNEL);
+	if (!state)
+		return -ENOMEM;
+
+	state->policies[state->n_alloc].policy = policy;
+	state->policies[state->n_alloc].maxtype = maxtype;
+	state->n_alloc = n_alloc;
+	*statep = state;
+
+	return 0;
+}
+
+static unsigned int get_policy_idx(struct nl_policy_dump *state,
+				   const struct nla_policy *policy)
+{
+	unsigned int i;
+
+	for (i = 0; i < state->n_alloc; i++) {
+		if (state->policies[i].policy == policy)
+			return i;
+	}
+
+	WARN_ON_ONCE(1);
+	return -1;
+}
+
+int netlink_policy_dump_start(const struct nla_policy *policy,
+			      unsigned int maxtype,
+                              unsigned long *_state)
+{
+	struct nl_policy_dump *state;
+	unsigned int policy_idx;
+	int err;
+
+	/* also returns 0 if "*_state" is our ERR_PTR() end marker */
+	if (*_state)
+		return 0;
+
+	/*
+	 * walk the policies and nested ones first, and build
+	 * a linear list of them.
+	 */
+
+	state = kzalloc(struct_size(state, policies, INITIAL_POLICIES_ALLOC),
+			GFP_KERNEL);
+	if (!state)
+		return -ENOMEM;
+	state->n_alloc = INITIAL_POLICIES_ALLOC;
+
+	err = add_policy(&state, policy, maxtype);
+	if (err)
+		return err;
+
+	for (policy_idx = 0;
+	     policy_idx < state->n_alloc && state->policies[policy_idx].policy;
+	     policy_idx++) {
+		const struct nla_policy *policy;
+		unsigned int type;
+
+		policy = state->policies[policy_idx].policy;
+
+		for (type = 0;
+		     type <= state->policies[policy_idx].maxtype;
+		     type++) {
+			switch (policy[type].type) {
+			case NLA_NESTED:
+			case NLA_NESTED_ARRAY:
+				err = add_policy(&state,
+						 policy[type].nested_policy,
+						 policy[type].len);
+				if (err)
+					return err;
+				break;
+			default:
+				break;
+			}
+		}
+	}
+
+	*_state = (unsigned long)state;
+
+	return 0;
+}
+
+static bool netlink_policy_dump_finished(struct nl_policy_dump *state)
+{
+	return state->policy_idx >= state->n_alloc ||
+	       !state->policies[state->policy_idx].policy;
+}
+
+bool netlink_policy_dump_loop(unsigned long *_state)
+{
+	struct nl_policy_dump *state = (void *)*_state;
+
+	if (IS_ERR(state))
+		return false;
+
+	if (netlink_policy_dump_finished(state)) {
+		kfree(state);
+		/* store end marker instead of freed state */
+		*_state = (unsigned long)ERR_PTR(-ENOENT);
+		return false;
+	}
+
+	return true;
+}
+
+int netlink_policy_dump_write(struct sk_buff *skb, unsigned long _state)
+{
+	struct nl_policy_dump *state = (void *)_state;
+	const struct nla_policy *pt;
+	struct nlattr *policy, *attr;
+	enum netlink_attribute_type type;
+	bool again;
+
+send_attribute:
+	again = false;
+
+	pt = &state->policies[state->policy_idx].policy[state->attr_idx];
+
+	policy = nla_nest_start(skb, state->policy_idx);
+	if (!policy)
+		return -ENOBUFS;
+
+	attr = nla_nest_start(skb, state->attr_idx);
+	if (!attr)
+		goto nla_put_failure;
+
+	switch (pt->type) {
+	default:
+	case NLA_UNSPEC:
+	case NLA_REJECT:
+		/* skip - use NLA_MIN_LEN to advertise such */
+		nla_nest_cancel(skb, policy);
+		again = true;
+		goto next;
+	case NLA_NESTED:
+		type = NL_ATTR_TYPE_NESTED;
+		/* fall through */
+	case NLA_NESTED_ARRAY:
+		if (pt->type == NLA_NESTED_ARRAY)
+			type = NL_ATTR_TYPE_NESTED_ARRAY;
+		if (pt->nested_policy && pt->len &&
+		    (nla_put_u32(skb, NL_POLICY_TYPE_ATTR_POLICY_IDX,
+				 get_policy_idx(state, pt->nested_policy)) ||
+		     nla_put_u32(skb, NL_POLICY_TYPE_ATTR_POLICY_MAXTYPE,
+				 pt->len)))
+			goto nla_put_failure;
+		break;
+	case NLA_U8:
+	case NLA_U16:
+	case NLA_U32:
+	case NLA_U64:
+	case NLA_MSECS: {
+		struct netlink_range_validation range;
+
+		if (pt->type == NLA_U8)
+			type = NL_ATTR_TYPE_U8;
+		else if (pt->type == NLA_U16)
+			type = NL_ATTR_TYPE_U16;
+		else if (pt->type == NLA_U32)
+			type = NL_ATTR_TYPE_U32;
+		else
+			type = NL_ATTR_TYPE_U64;
+
+		nla_get_range_unsigned(pt, &range);
+
+		if (nla_put_u64_64bit(skb, NL_POLICY_TYPE_ATTR_MIN_VALUE_U,
+				      range.min, NL_POLICY_TYPE_ATTR_PAD) ||
+		    nla_put_u64_64bit(skb, NL_POLICY_TYPE_ATTR_MAX_VALUE_U,
+				      range.max, NL_POLICY_TYPE_ATTR_PAD))
+			goto nla_put_failure;
+		break;
+	}
+	case NLA_S8:
+	case NLA_S16:
+	case NLA_S32:
+	case NLA_S64: {
+		struct netlink_range_validation_signed range;
+
+		if (pt->type == NLA_S8)
+			type = NL_ATTR_TYPE_S8;
+		else if (pt->type == NLA_S16)
+			type = NL_ATTR_TYPE_S16;
+		else if (pt->type == NLA_S32)
+			type = NL_ATTR_TYPE_S32;
+		else
+			type = NL_ATTR_TYPE_S64;
+
+		nla_get_range_signed(pt, &range);
+
+		if (nla_put_s64(skb, NL_POLICY_TYPE_ATTR_MIN_VALUE_S,
+				range.min, NL_POLICY_TYPE_ATTR_PAD) ||
+		    nla_put_s64(skb, NL_POLICY_TYPE_ATTR_MAX_VALUE_S,
+				range.max, NL_POLICY_TYPE_ATTR_PAD))
+			goto nla_put_failure;
+		break;
+	}
+	case NLA_BITFIELD32:
+		type = NL_ATTR_TYPE_BITFIELD32;
+		if (nla_put_u32(skb, NL_POLICY_TYPE_ATTR_BITFIELD32_MASK,
+				pt->bitfield32_valid))
+			goto nla_put_failure;
+		break;
+	case NLA_EXACT_LEN:
+		type = NL_ATTR_TYPE_BINARY;
+		if (nla_put_u32(skb, NL_POLICY_TYPE_ATTR_MIN_LENGTH, pt->len) ||
+		    nla_put_u32(skb, NL_POLICY_TYPE_ATTR_MAX_LENGTH, pt->len))
+			goto nla_put_failure;
+		break;
+	case NLA_STRING:
+	case NLA_NUL_STRING:
+	case NLA_BINARY:
+		if (pt->type == NLA_STRING)
+			type = NL_ATTR_TYPE_STRING;
+		else if (pt->type == NLA_NUL_STRING)
+			type = NL_ATTR_TYPE_NUL_STRING;
+		else
+			type = NL_ATTR_TYPE_BINARY;
+		if (pt->len && nla_put_u32(skb, NL_POLICY_TYPE_ATTR_MAX_LENGTH,
+					   pt->len))
+			goto nla_put_failure;
+		break;
+	case NLA_MIN_LEN:
+		type = NL_ATTR_TYPE_BINARY;
+		if (nla_put_u32(skb, NL_POLICY_TYPE_ATTR_MIN_LENGTH, pt->len))
+			goto nla_put_failure;
+		break;
+	case NLA_FLAG:
+		type = NL_ATTR_TYPE_FLAG;
+		break;
+	}
+
+	if (nla_put_u32(skb, NL_POLICY_TYPE_ATTR_TYPE, type))
+		goto nla_put_failure;
+
+	/* finish and move state to next attribute */
+	nla_nest_end(skb, attr);
+	nla_nest_end(skb, policy);
+
+next:
+	state->attr_idx += 1;
+	if (state->attr_idx > state->policies[state->policy_idx].maxtype) {
+		state->attr_idx = 0;
+		state->policy_idx++;
+	}
+
+	if (again) {
+		if (netlink_policy_dump_finished(state))
+			return -ENODATA;
+		goto send_attribute;
+	}
+
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, policy);
+	return -ENOBUFS;
+}
-- 
cgit v1.2.3-59-g8ed1b


From f256356f65e6449a9fcf6089ea25882c91768665 Mon Sep 17 00:00:00 2001
From: Yangbo Lu <yangbo.lu@nxp.com>
Date: Mon, 27 Apr 2020 11:39:03 +0800
Subject: ptp_qoriq: output PPS signal on FIPER2 in default

Output PPS signal on FIPER2 (Fixed Period Interval Pulse) in default
which is more desired by user.

Signed-off-by: Yangbo Lu <yangbo.lu@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/fsl/ptp_qoriq.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/fsl/ptp_qoriq.h b/include/linux/fsl/ptp_qoriq.h
index 75884563059f..884b8f8ca06d 100644
--- a/include/linux/fsl/ptp_qoriq.h
+++ b/include/linux/fsl/ptp_qoriq.h
@@ -135,7 +135,7 @@ struct ptp_qoriq_registers {
 #define DEFAULT_CKSEL		1
 #define DEFAULT_TMR_PRSC	2
 #define DEFAULT_FIPER1_PERIOD	1000000000
-#define DEFAULT_FIPER2_PERIOD	100000
+#define DEFAULT_FIPER2_PERIOD	1000000000
 
 struct ptp_qoriq {
 	void __iomem *base;
-- 
cgit v1.2.3-59-g8ed1b


From 7ae9a4f483eccfd46f40ede8d2bd28fd2f7a9c5f Mon Sep 17 00:00:00 2001
From: Aishwarya Ramakrishnan <aishwaryarj100@gmail.com>
Date: Mon, 27 Apr 2020 16:02:30 +0530
Subject: dpaa_eth: Fix comparing pointer to 0

Fixes coccicheck warning:
./drivers/net/ethernet/freescale/dpaa/dpaa_eth.c:2110:30-31:
WARNING comparing pointer to 0

Avoid pointer type value compared to 0.

Signed-off-by: Aishwarya Ramakrishnan <aishwaryarj100@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/dpaa/dpaa_eth.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
index 2cd1f8efdfa3..c4416a5f8816 100644
--- a/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
+++ b/drivers/net/ethernet/freescale/dpaa/dpaa_eth.c
@@ -2107,7 +2107,7 @@ workaround:
 
 	/* Workaround for DPAA_A050385 requires data start to be aligned */
 	start = PTR_ALIGN(new_skb->data, DPAA_A050385_ALIGN);
-	if (start - new_skb->data != 0)
+	if (start - new_skb->data)
 		skb_reserve(new_skb, start - new_skb->data);
 
 	skb_put(new_skb, skb->len);
-- 
cgit v1.2.3-59-g8ed1b


From 654cad8b6a17dcb00077070b27bc65873951a568 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Mon, 27 Apr 2020 12:11:10 +0000
Subject: octeontx2-pf: Fix error return code in otx2_probe()

Fix to return negative error code -ENOMEM from the error handling
case instead of 0, as done elsewhere in this function.

Fixes: 5a6d7c9daef3 ("octeontx2-pf: Mailbox communication with AF")
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
index 411e5ea1031e..64786568af0d 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_pf.c
@@ -1856,13 +1856,17 @@ static int otx2_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	num_vec = pci_msix_vec_count(pdev);
 	hw->irq_name = devm_kmalloc_array(&hw->pdev->dev, num_vec, NAME_SIZE,
 					  GFP_KERNEL);
-	if (!hw->irq_name)
+	if (!hw->irq_name) {
+		err = -ENOMEM;
 		goto err_free_netdev;
+	}
 
 	hw->affinity_mask = devm_kcalloc(&hw->pdev->dev, num_vec,
 					 sizeof(cpumask_var_t), GFP_KERNEL);
-	if (!hw->affinity_mask)
+	if (!hw->affinity_mask) {
+		err = -ENOMEM;
 		goto err_free_netdev;
+	}
 
 	/* Map CSRs */
 	pf->reg_base = pcim_iomap(pdev, PCI_CFG_REG_BAR_NUM, 0);
-- 
cgit v1.2.3-59-g8ed1b


From f8d530ac29fe9248f5e58ca5bcf4c368f8393ccf Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Mon, 27 Apr 2020 12:12:28 +0000
Subject: ice: Fix error return code in ice_add_prof()

Fix to return a error code from the error handling case
instead of 0, as done elsewhere in this function.

Fixes: 31ad4e4ee1e4 ("ice: Allocate flow profile")
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/intel/ice/ice_flex_pipe.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
index 42bac3ec5526..e7a2671222d2 100644
--- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
+++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
@@ -2962,8 +2962,10 @@ ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[],
 
 	/* add profile info */
 	prof = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*prof), GFP_KERNEL);
-	if (!prof)
+	if (!prof) {
+		status = ICE_ERR_NO_MEMORY;
 		goto err_ice_add_prof;
+	}
 
 	prof->profile_cookie = id;
 	prof->prof_id = prof_id;
-- 
cgit v1.2.3-59-g8ed1b


From 88ec7cb22ddde725ed4ce15991f0bd9dd817fd85 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Mon, 27 Apr 2020 12:15:07 +0000
Subject: net: lpc-enet: fix error return code in lpc_mii_init()

Fix to return a negative error code from the error handling
case instead of 0, as done elsewhere in this function.

Fixes: b7370112f519 ("lpc32xx: Added ethernet driver")
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Acked-by: Vladimir Zapolskiy <vz@mleia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/nxp/lpc_eth.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c
index d20cf03a3ea0..311454d9b0bc 100644
--- a/drivers/net/ethernet/nxp/lpc_eth.c
+++ b/drivers/net/ethernet/nxp/lpc_eth.c
@@ -823,7 +823,8 @@ static int lpc_mii_init(struct netdata_local *pldat)
 	if (err)
 		goto err_out_unregister_bus;
 
-	if (lpc_mii_probe(pldat->ndev) != 0)
+	err = lpc_mii_probe(pldat->ndev);
+	if (err)
 		goto err_out_unregister_bus;
 
 	return 0;
-- 
cgit v1.2.3-59-g8ed1b


From 0a699302be5986307b3dcf84ac7a0dd30f9e9305 Mon Sep 17 00:00:00 2001
From: Fugang Duan <fugang.duan@nxp.com>
Date: Mon, 27 Apr 2020 22:08:04 +0800
Subject: net: ethernet: fec: Revert "net: ethernet: fec: Replace interrupt
 driven MDIO with polled IO"

This reverts commit 29ae6bd1b0d8a57d7c00ab12cbb949fc41986eef.

The commit breaks ethernet function on i.MX6SX, i.MX7D, i.MX8MM,
i.MX8MQ, and i.MX8QXP platforms. Boot yocto system by NFS mounting
rootfs will be failed with the commit.

Signed-off-by: Fugang Duan <fugang.duan@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/fec.h      |  4 +-
 drivers/net/ethernet/freescale/fec_main.c | 67 +++++++++++++++----------------
 2 files changed, 36 insertions(+), 35 deletions(-)

diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index a6cdd5b61921..e74dd1f86bba 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -376,7 +376,8 @@ struct bufdesc_ex {
 #define FEC_ENET_TS_AVAIL       ((uint)0x00010000)
 #define FEC_ENET_TS_TIMER       ((uint)0x00008000)
 
-#define FEC_DEFAULT_IMASK (FEC_ENET_TXF | FEC_ENET_RXF)
+#define FEC_DEFAULT_IMASK (FEC_ENET_TXF | FEC_ENET_RXF | FEC_ENET_MII)
+#define FEC_NAPI_IMASK	FEC_ENET_MII
 #define FEC_RX_DISABLED_IMASK (FEC_DEFAULT_IMASK & (~FEC_ENET_RXF))
 
 /* ENET interrupt coalescing macro define */
@@ -542,6 +543,7 @@ struct fec_enet_private {
 	int	link;
 	int	full_duplex;
 	int	speed;
+	struct	completion mdio_done;
 	int	irq[FEC_IRQ_NUM];
 	bool	bufdesc_ex;
 	int	pause_flag;
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 1ae075a246a3..c7b84bb22f75 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -976,8 +976,8 @@ fec_restart(struct net_device *ndev)
 	writel((__force u32)cpu_to_be32(temp_mac[1]),
 	       fep->hwp + FEC_ADDR_HIGH);
 
-	/* Clear any outstanding interrupt, except MDIO. */
-	writel((0xffffffff & ~FEC_ENET_MII), fep->hwp + FEC_IEVENT);
+	/* Clear any outstanding interrupt. */
+	writel(0xffffffff, fep->hwp + FEC_IEVENT);
 
 	fec_enet_bd_init(ndev);
 
@@ -1123,7 +1123,7 @@ fec_restart(struct net_device *ndev)
 	if (fep->link)
 		writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK);
 	else
-		writel(0, fep->hwp + FEC_IMASK);
+		writel(FEC_ENET_MII, fep->hwp + FEC_IMASK);
 
 	/* Init the interrupt coalescing */
 	fec_enet_itr_coal_init(ndev);
@@ -1652,10 +1652,6 @@ fec_enet_interrupt(int irq, void *dev_id)
 	irqreturn_t ret = IRQ_NONE;
 
 	int_events = readl(fep->hwp + FEC_IEVENT);
-
-	/* Don't clear MDIO events, we poll for those */
-	int_events &= ~FEC_ENET_MII;
-
 	writel(int_events, fep->hwp + FEC_IEVENT);
 	fec_enet_collect_events(fep, int_events);
 
@@ -1663,12 +1659,16 @@ fec_enet_interrupt(int irq, void *dev_id)
 		ret = IRQ_HANDLED;
 
 		if (napi_schedule_prep(&fep->napi)) {
-			/* Disable interrupts */
-			writel(0, fep->hwp + FEC_IMASK);
+			/* Disable the NAPI interrupts */
+			writel(FEC_NAPI_IMASK, fep->hwp + FEC_IMASK);
 			__napi_schedule(&fep->napi);
 		}
 	}
 
+	if (int_events & FEC_ENET_MII) {
+		ret = IRQ_HANDLED;
+		complete(&fep->mdio_done);
+	}
 	return ret;
 }
 
@@ -1818,24 +1818,11 @@ static void fec_enet_adjust_link(struct net_device *ndev)
 		phy_print_status(phy_dev);
 }
 
-static int fec_enet_mdio_wait(struct fec_enet_private *fep)
-{
-	uint ievent;
-	int ret;
-
-	ret = readl_poll_timeout_atomic(fep->hwp + FEC_IEVENT, ievent,
-					ievent & FEC_ENET_MII, 2, 30000);
-
-	if (!ret)
-		writel(FEC_ENET_MII, fep->hwp + FEC_IEVENT);
-
-	return ret;
-}
-
 static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
 {
 	struct fec_enet_private *fep = bus->priv;
 	struct device *dev = &fep->pdev->dev;
+	unsigned long time_left;
 	int ret = 0, frame_start, frame_addr, frame_op;
 	bool is_c45 = !!(regnum & MII_ADDR_C45);
 
@@ -1843,6 +1830,8 @@ static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
 	if (ret < 0)
 		return ret;
 
+	reinit_completion(&fep->mdio_done);
+
 	if (is_c45) {
 		frame_start = FEC_MMFR_ST_C45;
 
@@ -1854,9 +1843,11 @@ static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
 		       fep->hwp + FEC_MII_DATA);
 
 		/* wait for end of transfer */
-		ret = fec_enet_mdio_wait(fep);
-		if (ret) {
+		time_left = wait_for_completion_timeout(&fep->mdio_done,
+				usecs_to_jiffies(FEC_MII_TIMEOUT));
+		if (time_left == 0) {
 			netdev_err(fep->netdev, "MDIO address write timeout\n");
+			ret = -ETIMEDOUT;
 			goto out;
 		}
 
@@ -1875,9 +1866,11 @@ static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
 		FEC_MMFR_TA, fep->hwp + FEC_MII_DATA);
 
 	/* wait for end of transfer */
-	ret = fec_enet_mdio_wait(fep);
-	if (ret) {
+	time_left = wait_for_completion_timeout(&fep->mdio_done,
+			usecs_to_jiffies(FEC_MII_TIMEOUT));
+	if (time_left == 0) {
 		netdev_err(fep->netdev, "MDIO read timeout\n");
+		ret = -ETIMEDOUT;
 		goto out;
 	}
 
@@ -1895,6 +1888,7 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
 {
 	struct fec_enet_private *fep = bus->priv;
 	struct device *dev = &fep->pdev->dev;
+	unsigned long time_left;
 	int ret, frame_start, frame_addr;
 	bool is_c45 = !!(regnum & MII_ADDR_C45);
 
@@ -1904,6 +1898,8 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
 	else
 		ret = 0;
 
+	reinit_completion(&fep->mdio_done);
+
 	if (is_c45) {
 		frame_start = FEC_MMFR_ST_C45;
 
@@ -1915,9 +1911,11 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
 		       fep->hwp + FEC_MII_DATA);
 
 		/* wait for end of transfer */
-		ret = fec_enet_mdio_wait(fep);
-		if (ret) {
+		time_left = wait_for_completion_timeout(&fep->mdio_done,
+			usecs_to_jiffies(FEC_MII_TIMEOUT));
+		if (time_left == 0) {
 			netdev_err(fep->netdev, "MDIO address write timeout\n");
+			ret = -ETIMEDOUT;
 			goto out;
 		}
 	} else {
@@ -1933,9 +1931,12 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
 		fep->hwp + FEC_MII_DATA);
 
 	/* wait for end of transfer */
-	ret = fec_enet_mdio_wait(fep);
-	if (ret)
+	time_left = wait_for_completion_timeout(&fep->mdio_done,
+			usecs_to_jiffies(FEC_MII_TIMEOUT));
+	if (time_left == 0) {
 		netdev_err(fep->netdev, "MDIO write timeout\n");
+		ret  = -ETIMEDOUT;
+	}
 
 out:
 	pm_runtime_mark_last_busy(dev);
@@ -2144,9 +2145,6 @@ static int fec_enet_mii_init(struct platform_device *pdev)
 
 	writel(fep->phy_speed, fep->hwp + FEC_MII_SPEED);
 
-	/* Clear any pending transaction complete indication */
-	writel(FEC_ENET_MII, fep->hwp + FEC_IEVENT);
-
 	fep->mii_bus = mdiobus_alloc();
 	if (fep->mii_bus == NULL) {
 		err = -ENOMEM;
@@ -3688,6 +3686,7 @@ fec_probe(struct platform_device *pdev)
 		fep->irq[i] = irq;
 	}
 
+	init_completion(&fep->mdio_done);
 	ret = fec_enet_mii_init(pdev);
 	if (ret)
 		goto failed_mii_init;
-- 
cgit v1.2.3-59-g8ed1b


From c4db9934a33e5f276965a14b3eea7a6d64c85065 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Mon, 27 Apr 2020 09:40:52 +0000
Subject: net: ll_temac: Fix return value check in temac_probe()

In case of error, the function devm_ioremap() returns NULL pointer
not ERR_PTR(). The IS_ERR() test in the return value check should
be replaced with NULL test.

Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Acked-by: Esben Haabendal <esben@geanix.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/xilinx/ll_temac_main.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/xilinx/ll_temac_main.c b/drivers/net/ethernet/xilinx/ll_temac_main.c
index 3e313e71ae36..929244064abd 100644
--- a/drivers/net/ethernet/xilinx/ll_temac_main.c
+++ b/drivers/net/ethernet/xilinx/ll_temac_main.c
@@ -1410,9 +1410,9 @@ static int temac_probe(struct platform_device *pdev)
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
 	lp->regs = devm_ioremap(&pdev->dev, res->start,
 					resource_size(res));
-	if (IS_ERR(lp->regs)) {
+	if (!lp->regs) {
 		dev_err(&pdev->dev, "could not map TEMAC registers\n");
-		return PTR_ERR(lp->regs);
+		return -ENOMEM;
 	}
 
 	/* Select register access functions with the specified
@@ -1505,10 +1505,10 @@ static int temac_probe(struct platform_device *pdev)
 		res = platform_get_resource(pdev, IORESOURCE_MEM, 1);
 		lp->sdma_regs = devm_ioremap(&pdev->dev, res->start,
 						     resource_size(res));
-		if (IS_ERR(lp->sdma_regs)) {
+		if (!lp->sdma_regs) {
 			dev_err(&pdev->dev,
 				"could not map DMA registers\n");
-			return PTR_ERR(lp->sdma_regs);
+			return -ENOMEM;
 		}
 		if (pdata->dma_little_endian) {
 			lp->dma_in = temac_dma_in32_le;
-- 
cgit v1.2.3-59-g8ed1b


From 97fff7c8de1e54e5326dfeb66085796864bceb64 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Mon, 27 Apr 2020 10:43:22 +0000
Subject: dpaa2-eth: fix error return code in setup_dpni()

Fix to return negative error code -ENOMEM from the error handling
case instead of 0, as done elsewhere in this function.

Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index 8ec435ba7d27..11accab81ea1 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -2702,8 +2702,10 @@ static int setup_dpni(struct fsl_mc_device *ls_dev)
 
 	priv->cls_rules = devm_kzalloc(dev, sizeof(struct dpaa2_eth_cls_rule) *
 				       dpaa2_eth_fs_count(priv), GFP_KERNEL);
-	if (!priv->cls_rules)
+	if (!priv->cls_rules) {
+		err = -ENOMEM;
 		goto close;
+	}
 
 	return 0;
 
-- 
cgit v1.2.3-59-g8ed1b


From 78734404ef9c133eac70339415c8028dbe19109a Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Tue, 28 Apr 2020 00:01:39 -0700
Subject: net: usb: ax88179_178a: Implement ethtool_ops set_eeprom

The vendor driver does upon failing to read a valid MAC address from
EEPROM write the netdev's address back to EEPROM and invoking a EEPROM
reload operation. Based on this we can implement the ethtool_ops
set_eeprom and provide the means to populate the EEPROM from within
Linux.

It's worth noting that ax88179_get_eeprom() will return some default
data unless the content of the EEPROM is deemed "complete", so until the
EEPROM is fully populated (e.g. by running ethtool -e | ethtool -E)
data written with ax88179_set_eeprom() will appear not to stick.

The implementation is based on asix_set_eeprom(), from asix_common.c

Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/ax88179_178a.c | 77 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 77 insertions(+)

diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c
index 93044cf1417a..b05bb11a02cb 100644
--- a/drivers/net/usb/ax88179_178a.c
+++ b/drivers/net/usb/ax88179_178a.c
@@ -31,6 +31,7 @@
 #define AX_ACCESS_PHY				0x02
 #define AX_ACCESS_EEPROM			0x04
 #define AX_ACCESS_EFUS				0x05
+#define AX_RELOAD_EEPROM_EFUSE			0x06
 #define AX_PAUSE_WATERLVL_HIGH			0x54
 #define AX_PAUSE_WATERLVL_LOW			0x55
 
@@ -611,6 +612,81 @@ ax88179_get_eeprom(struct net_device *net, struct ethtool_eeprom *eeprom,
 	return 0;
 }
 
+static int
+ax88179_set_eeprom(struct net_device *net, struct ethtool_eeprom *eeprom,
+		   u8 *data)
+{
+	struct usbnet *dev = netdev_priv(net);
+	u16 *eeprom_buff;
+	int first_word;
+	int last_word;
+	int ret;
+	int i;
+
+	netdev_dbg(net, "write EEPROM len %d, offset %d, magic 0x%x\n",
+		   eeprom->len, eeprom->offset, eeprom->magic);
+
+	if (eeprom->len == 0)
+		return -EINVAL;
+
+	if (eeprom->magic != AX88179_EEPROM_MAGIC)
+		return -EINVAL;
+
+	first_word = eeprom->offset >> 1;
+	last_word = (eeprom->offset + eeprom->len - 1) >> 1;
+
+	eeprom_buff = kmalloc_array(last_word - first_word + 1, sizeof(u16),
+				    GFP_KERNEL);
+	if (!eeprom_buff)
+		return -ENOMEM;
+
+	/* align data to 16 bit boundaries, read the missing data from
+	   the EEPROM */
+	if (eeprom->offset & 1) {
+		ret = ax88179_read_cmd(dev, AX_ACCESS_EEPROM, first_word, 1, 2,
+				       &eeprom_buff[0]);
+		if (ret < 0) {
+			netdev_err(net, "Failed to read EEPROM at offset 0x%02x.\n", first_word);
+			goto free;
+		}
+	}
+
+	if ((eeprom->offset + eeprom->len) & 1) {
+		ret = ax88179_read_cmd(dev, AX_ACCESS_EEPROM, last_word, 1, 2,
+				       &eeprom_buff[last_word - first_word]);
+		if (ret < 0) {
+			netdev_err(net, "Failed to read EEPROM at offset 0x%02x.\n", last_word);
+			goto free;
+		}
+	}
+
+	memcpy((u8 *)eeprom_buff + (eeprom->offset & 1), data, eeprom->len);
+
+	for (i = first_word; i <= last_word; i++) {
+		netdev_dbg(net, "write to EEPROM at offset 0x%02x, data 0x%04x\n",
+			   i, eeprom_buff[i - first_word]);
+		ret = ax88179_write_cmd(dev, AX_ACCESS_EEPROM, i, 1, 2,
+					&eeprom_buff[i - first_word]);
+		if (ret < 0) {
+			netdev_err(net, "Failed to write EEPROM at offset 0x%02x.\n", i);
+			goto free;
+		}
+		msleep(20);
+	}
+
+	/* reload EEPROM data */
+	ret = ax88179_write_cmd(dev, AX_RELOAD_EEPROM_EFUSE, 0x0000, 0, 0, NULL);
+	if (ret < 0) {
+		netdev_err(net, "Failed to reload EEPROM data\n");
+		goto free;
+	}
+
+	ret = 0;
+free:
+	kfree(eeprom_buff);
+	return ret;
+}
+
 static int ax88179_get_link_ksettings(struct net_device *net,
 				      struct ethtool_link_ksettings *cmd)
 {
@@ -822,6 +898,7 @@ static const struct ethtool_ops ax88179_ethtool_ops = {
 	.set_wol		= ax88179_set_wol,
 	.get_eeprom_len		= ax88179_get_eeprom_len,
 	.get_eeprom		= ax88179_get_eeprom,
+	.set_eeprom		= ax88179_set_eeprom,
 	.get_eee		= ax88179_get_eee,
 	.set_eee		= ax88179_set_eee,
 	.nway_reset		= usbnet_nway_reset,
-- 
cgit v1.2.3-59-g8ed1b


From b410439ca371334cd9e5772e411b5e2b6b2d0c9a Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Tue, 28 Apr 2020 22:54:12 +0200
Subject: r8169: improve max jumbo packet size definition

Sync definition of max jumbo packet size with vendor driver and reserve
22 bytes for VLAN ethernet header plus checksum.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index aa3e63e031da..82f2ae7b6cf7 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -88,10 +88,10 @@
 #define RTL_R16(tp, reg)		readw(tp->mmio_addr + (reg))
 #define RTL_R32(tp, reg)		readl(tp->mmio_addr + (reg))
 
-#define JUMBO_4K	(4*1024 - ETH_HLEN - 2)
-#define JUMBO_6K	(6*1024 - ETH_HLEN - 2)
-#define JUMBO_7K	(7*1024 - ETH_HLEN - 2)
-#define JUMBO_9K	(9*1024 - ETH_HLEN - 2)
+#define JUMBO_4K	(4 * SZ_1K - VLAN_ETH_HLEN - ETH_FCS_LEN)
+#define JUMBO_6K	(6 * SZ_1K - VLAN_ETH_HLEN - ETH_FCS_LEN)
+#define JUMBO_7K	(7 * SZ_1K - VLAN_ETH_HLEN - ETH_FCS_LEN)
+#define JUMBO_9K	(9 * SZ_1K - VLAN_ETH_HLEN - ETH_FCS_LEN)
 
 static const struct {
 	const char *name;
-- 
cgit v1.2.3-59-g8ed1b


From 838974e1e08a9724525fa13ef15d6e021c23c99d Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Tue, 28 Apr 2020 22:55:59 +0200
Subject: r8169: configure PME_SIGNAL for RTL8125 too

RTL8125 supports the same PME_SIGNAL handling as all later RTL8168
chip variants.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 82f2ae7b6cf7..0ac3976e3204 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -1429,7 +1429,7 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
 		break;
 	case RTL_GIGA_MAC_VER_34:
 	case RTL_GIGA_MAC_VER_37:
-	case RTL_GIGA_MAC_VER_39 ... RTL_GIGA_MAC_VER_52:
+	case RTL_GIGA_MAC_VER_39 ... RTL_GIGA_MAC_VER_61:
 		options = RTL_R8(tp, Config2) & ~PME_SIGNAL;
 		if (wolopts)
 			options |= PME_SIGNAL;
-- 
cgit v1.2.3-59-g8ed1b


From cde0f4f81d1c11ccc214146e1c550bfe48629fac Mon Sep 17 00:00:00 2001
From: Michael Walle <michael@walle.cc>
Date: Tue, 28 Apr 2020 23:15:02 +0200
Subject: net: phy: at803x: add downshift support

The AR8031 and AR8035 support the link speed downshift. Add driver
support for it. One peculiarity of these PHYs is that it needs a
software reset after changing the setting, thus add the .soft_reset()
op and do a phy_init_hw() if necessary.

This was tested on a custom board with the AR8031.

Signed-off-by: Michael Walle <michael@walle.cc>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/at803x.c | 87 ++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 87 insertions(+)

diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c
index 31b6edcc1fd1..f4fec5f644e9 100644
--- a/drivers/net/phy/at803x.c
+++ b/drivers/net/phy/at803x.c
@@ -43,6 +43,9 @@
 #define AT803X_INTR_STATUS			0x13
 
 #define AT803X_SMART_SPEED			0x14
+#define AT803X_SMART_SPEED_ENABLE		BIT(5)
+#define AT803X_SMART_SPEED_RETRY_LIMIT_MASK	GENMASK(4, 2)
+#define AT803X_SMART_SPEED_BYPASS_TIMER		BIT(1)
 #define AT803X_LED_CONTROL			0x18
 
 #define AT803X_DEVICE_ADDR			0x03
@@ -103,6 +106,10 @@
 #define AT803X_CLK_OUT_STRENGTH_HALF		1
 #define AT803X_CLK_OUT_STRENGTH_QUARTER		2
 
+#define AT803X_DEFAULT_DOWNSHIFT 5
+#define AT803X_MIN_DOWNSHIFT 2
+#define AT803X_MAX_DOWNSHIFT 9
+
 #define ATH9331_PHY_ID 0x004dd041
 #define ATH8030_PHY_ID 0x004dd076
 #define ATH8031_PHY_ID 0x004dd074
@@ -713,6 +720,80 @@ static int at803x_read_status(struct phy_device *phydev)
 	return 0;
 }
 
+static int at803x_get_downshift(struct phy_device *phydev, u8 *d)
+{
+	int val;
+
+	val = phy_read(phydev, AT803X_SMART_SPEED);
+	if (val < 0)
+		return val;
+
+	if (val & AT803X_SMART_SPEED_ENABLE)
+		*d = FIELD_GET(AT803X_SMART_SPEED_RETRY_LIMIT_MASK, val) + 2;
+	else
+		*d = DOWNSHIFT_DEV_DISABLE;
+
+	return 0;
+}
+
+static int at803x_set_downshift(struct phy_device *phydev, u8 cnt)
+{
+	u16 mask, set;
+	int ret;
+
+	switch (cnt) {
+	case DOWNSHIFT_DEV_DEFAULT_COUNT:
+		cnt = AT803X_DEFAULT_DOWNSHIFT;
+		fallthrough;
+	case AT803X_MIN_DOWNSHIFT ... AT803X_MAX_DOWNSHIFT:
+		set = AT803X_SMART_SPEED_ENABLE |
+		      AT803X_SMART_SPEED_BYPASS_TIMER |
+		      FIELD_PREP(AT803X_SMART_SPEED_RETRY_LIMIT_MASK, cnt - 2);
+		mask = AT803X_SMART_SPEED_RETRY_LIMIT_MASK;
+		break;
+	case DOWNSHIFT_DEV_DISABLE:
+		set = 0;
+		mask = AT803X_SMART_SPEED_ENABLE |
+		       AT803X_SMART_SPEED_BYPASS_TIMER;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	ret = phy_modify_changed(phydev, AT803X_SMART_SPEED, mask, set);
+
+	/* After changing the smart speed settings, we need to perform a
+	 * software reset, use phy_init_hw() to make sure we set the
+	 * reapply any values which might got lost during software reset.
+	 */
+	if (ret == 1)
+		ret = phy_init_hw(phydev);
+
+	return ret;
+}
+
+static int at803x_get_tunable(struct phy_device *phydev,
+			      struct ethtool_tunable *tuna, void *data)
+{
+	switch (tuna->id) {
+	case ETHTOOL_PHY_DOWNSHIFT:
+		return at803x_get_downshift(phydev, data);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int at803x_set_tunable(struct phy_device *phydev,
+			      struct ethtool_tunable *tuna, const void *data)
+{
+	switch (tuna->id) {
+	case ETHTOOL_PHY_DOWNSHIFT:
+		return at803x_set_downshift(phydev, *(const u8 *)data);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 static struct phy_driver at803x_driver[] = {
 {
 	/* Qualcomm Atheros AR8035 */
@@ -722,6 +803,7 @@ static struct phy_driver at803x_driver[] = {
 	.probe			= at803x_probe,
 	.remove			= at803x_remove,
 	.config_init		= at803x_config_init,
+	.soft_reset		= genphy_soft_reset,
 	.set_wol		= at803x_set_wol,
 	.get_wol		= at803x_get_wol,
 	.suspend		= at803x_suspend,
@@ -730,6 +812,8 @@ static struct phy_driver at803x_driver[] = {
 	.read_status		= at803x_read_status,
 	.ack_interrupt		= at803x_ack_interrupt,
 	.config_intr		= at803x_config_intr,
+	.get_tunable		= at803x_get_tunable,
+	.set_tunable		= at803x_set_tunable,
 }, {
 	/* Qualcomm Atheros AR8030 */
 	.phy_id			= ATH8030_PHY_ID,
@@ -754,6 +838,7 @@ static struct phy_driver at803x_driver[] = {
 	.probe			= at803x_probe,
 	.remove			= at803x_remove,
 	.config_init		= at803x_config_init,
+	.soft_reset		= genphy_soft_reset,
 	.set_wol		= at803x_set_wol,
 	.get_wol		= at803x_get_wol,
 	.suspend		= at803x_suspend,
@@ -763,6 +848,8 @@ static struct phy_driver at803x_driver[] = {
 	.aneg_done		= at803x_aneg_done,
 	.ack_interrupt		= &at803x_ack_interrupt,
 	.config_intr		= &at803x_config_intr,
+	.get_tunable		= at803x_get_tunable,
+	.set_tunable		= at803x_set_tunable,
 }, {
 	/* Qualcomm Atheros AR8032 */
 	PHY_ID_MATCH_EXACT(ATH8032_PHY_ID),
-- 
cgit v1.2.3-59-g8ed1b


From 86570d8a2f768485a515f646d702a37b34b27260 Mon Sep 17 00:00:00 2001
From: Michael Walle <michael@walle.cc>
Date: Wed, 29 Apr 2020 01:06:56 +0200
Subject: net: phy: bcm54140: use genphy_soft_reset()

Set the .soft_reset() op to be sure there will be a reset even if there
is no hardware reset line registered.

Signed-off-by: Michael Walle <michael@walle.cc>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/bcm54140.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/phy/bcm54140.c b/drivers/net/phy/bcm54140.c
index c009ac2856a5..18d1e798a4c3 100644
--- a/drivers/net/phy/bcm54140.c
+++ b/drivers/net/phy/bcm54140.c
@@ -862,6 +862,7 @@ static struct phy_driver bcm54140_drivers[] = {
 		.probe		= bcm54140_probe,
 		.suspend	= genphy_suspend,
 		.resume		= genphy_resume,
+		.soft_reset	= genphy_soft_reset,
 		.get_tunable	= bcm54140_get_tunable,
 		.set_tunable	= bcm54140_set_tunable,
 	},
-- 
cgit v1.2.3-59-g8ed1b


From afcecca56f0cd287c7895511dc380b95c633b1a2 Mon Sep 17 00:00:00 2001
From: Michael Walle <michael@walle.cc>
Date: Wed, 29 Apr 2020 01:06:57 +0200
Subject: net: phy: bcm54140: fix phy_id_mask

Broadcom defines the bits for this PHY as follows:
  { oui[24:3], model[6:0], revision[2:0] }

Thus we have to mask the lower three bits only.

Fixes: 6937602ed3f9 ("net: phy: add Broadcom BCM54140 support")
Signed-off-by: Michael Walle <michael@walle.cc>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/bcm54140.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/phy/bcm54140.c b/drivers/net/phy/bcm54140.c
index 18d1e798a4c3..63acf34663d9 100644
--- a/drivers/net/phy/bcm54140.c
+++ b/drivers/net/phy/bcm54140.c
@@ -852,7 +852,7 @@ static int bcm54140_set_tunable(struct phy_device *phydev,
 static struct phy_driver bcm54140_drivers[] = {
 	{
 		.phy_id         = PHY_ID_BCM54140,
-		.phy_id_mask    = 0xfffffff0,
+		.phy_id_mask    = 0xfffffff8,
 		.name           = "Broadcom BCM54140",
 		.features       = PHY_GBIT_FEATURES,
 		.config_init    = bcm54140_config_init,
@@ -870,7 +870,7 @@ static struct phy_driver bcm54140_drivers[] = {
 module_phy_driver(bcm54140_drivers);
 
 static struct mdio_device_id __maybe_unused bcm54140_tbl[] = {
-	{ PHY_ID_BCM54140, 0xfffffff0 },
+	{ PHY_ID_BCM54140, 0xfffffff8 },
 	{ }
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From e9a66851de722dfa97a41e982266603cdb97ea3b Mon Sep 17 00:00:00 2001
From: Michael Walle <michael@walle.cc>
Date: Wed, 29 Apr 2020 01:06:58 +0200
Subject: net: phy: bcm54140: apply the workaround on b0 chips

The lower three bits of the phy_id specifies the chip stepping. The
workaround is specifically for the B0 stepping. Apply it only on these
chips.

Signed-off-by: Michael Walle <michael@walle.cc>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/bcm54140.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/net/phy/bcm54140.c b/drivers/net/phy/bcm54140.c
index 63acf34663d9..d0498ed47878 100644
--- a/drivers/net/phy/bcm54140.c
+++ b/drivers/net/phy/bcm54140.c
@@ -115,6 +115,9 @@
 #define BCM54140_HWMON_IN_ALARM_BIT(ch) ((ch) ? BCM54140_RDB_MON_ISR_3V3 \
 					      : BCM54140_RDB_MON_ISR_1V0)
 
+#define BCM54140_PHY_ID_REV(phy_id)	((phy_id) & 0x7)
+#define BCM54140_REV_B0			1
+
 #define BCM54140_DEFAULT_DOWNSHIFT 5
 #define BCM54140_MAX_DOWNSHIFT 9
 
@@ -632,9 +635,11 @@ static int bcm54140_config_init(struct phy_device *phydev)
 	int ret;
 
 	/* Apply hardware errata */
-	ret = bcm54140_b0_workaround(phydev);
-	if (ret)
-		return ret;
+	if (BCM54140_PHY_ID_REV(phydev->phy_id) == BCM54140_REV_B0) {
+		ret = bcm54140_b0_workaround(phydev);
+		if (ret)
+			return ret;
+	}
 
 	/* Unmask events we are interested in. */
 	reg &= ~(BCM54140_RDB_INT_DUPLEX |
-- 
cgit v1.2.3-59-g8ed1b


From e4e51da66dc812176cca16b0f8a5b87b173deb5d Mon Sep 17 00:00:00 2001
From: Michael Walle <michael@walle.cc>
Date: Wed, 29 Apr 2020 01:06:59 +0200
Subject: net: phy: bcm54140: add second PHY ID

This PHY has two PHY IDs depending on its mode. Adjust the mask so that
it includes both IDs.

Signed-off-by: Michael Walle <michael@walle.cc>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/bcm54140.c | 11 +++++++++--
 include/linux/brcmphy.h    |  2 +-
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/net/phy/bcm54140.c b/drivers/net/phy/bcm54140.c
index d0498ed47878..400d7c3c405a 100644
--- a/drivers/net/phy/bcm54140.c
+++ b/drivers/net/phy/bcm54140.c
@@ -115,6 +115,13 @@
 #define BCM54140_HWMON_IN_ALARM_BIT(ch) ((ch) ? BCM54140_RDB_MON_ISR_3V3 \
 					      : BCM54140_RDB_MON_ISR_1V0)
 
+/* This PHY has two different PHY IDs depening on its MODE_SEL pin. This
+ * pin choses between 4x SGMII and QSGMII mode:
+ *   AE02_5009 4x SGMII
+ *   AE02_5019 QSGMII
+ */
+#define BCM54140_PHY_ID_MASK	0xffffffe8
+
 #define BCM54140_PHY_ID_REV(phy_id)	((phy_id) & 0x7)
 #define BCM54140_REV_B0			1
 
@@ -857,7 +864,7 @@ static int bcm54140_set_tunable(struct phy_device *phydev,
 static struct phy_driver bcm54140_drivers[] = {
 	{
 		.phy_id         = PHY_ID_BCM54140,
-		.phy_id_mask    = 0xfffffff8,
+		.phy_id_mask    = BCM54140_PHY_ID_MASK,
 		.name           = "Broadcom BCM54140",
 		.features       = PHY_GBIT_FEATURES,
 		.config_init    = bcm54140_config_init,
@@ -875,7 +882,7 @@ static struct phy_driver bcm54140_drivers[] = {
 module_phy_driver(bcm54140_drivers);
 
 static struct mdio_device_id __maybe_unused bcm54140_tbl[] = {
-	{ PHY_ID_BCM54140, 0xfffffff8 },
+	{ PHY_ID_BCM54140, BCM54140_PHY_ID_MASK },
 	{ }
 };
 
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 8be150e69c7c..58d0150acc3e 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -25,7 +25,7 @@
 #define PHY_ID_BCM5461			0x002060c0
 #define PHY_ID_BCM54612E		0x03625e60
 #define PHY_ID_BCM54616S		0x03625d10
-#define PHY_ID_BCM54140			0xae025019
+#define PHY_ID_BCM54140			0xae025009
 #define PHY_ID_BCM57780			0x03625d90
 #define PHY_ID_BCM89610			0x03625cd0
 
-- 
cgit v1.2.3-59-g8ed1b


From 2597912514925f6f06e85807cc9fc0fdfa630d11 Mon Sep 17 00:00:00 2001
From: Zheng Bin <zhengbin13@huawei.com>
Date: Wed, 29 Apr 2020 09:58:24 +0800
Subject: hinic: make symbol 'dump_mox_reg' static

Fix sparse warnings:

drivers/net/ethernet/huawei/hinic/hinic_hw_mbox.c:601:6: warning: symbol 'dump_mox_reg' was not declared. Should it be static?

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zheng Bin <zhengbin13@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/huawei/hinic/hinic_hw_mbox.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_mbox.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_mbox.c
index f8626dfd192e..d5cf31529dbf 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_mbox.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_mbox.c
@@ -598,7 +598,7 @@ static void write_mbox_msg_attr(struct hinic_mbox_func_to_func *func_to_func,
 			     HINIC_FUNC_CSR_MAILBOX_CONTROL_OFF, mbox_ctrl);
 }
 
-void dump_mox_reg(struct hinic_hwdev *hwdev)
+static void dump_mox_reg(struct hinic_hwdev *hwdev)
 {
 	u32 val;
 
-- 
cgit v1.2.3-59-g8ed1b


From 37ecb5b8b8cd3156e739fd1c56a8e3842b72ebad Mon Sep 17 00:00:00 2001
From: Zou Wei <zou_wei@huawei.com>
Date: Wed, 29 Apr 2020 11:35:28 +0800
Subject: hinic: Use kmemdup instead of kzalloc and memcpy

Fixes coccicheck warnings:

 drivers/net/ethernet/huawei/hinic/hinic_hw_mbox.c:452:17-24: WARNING opportunity for kmemdup
 drivers/net/ethernet/huawei/hinic/hinic_hw_mbox.c:458:23-30: WARNING opportunity for kmemdup

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zou Wei <zou_wei@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/huawei/hinic/hinic_hw_mbox.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_mbox.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_mbox.c
index d5cf31529dbf..564fb2294a29 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_mbox.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_mbox.c
@@ -449,18 +449,15 @@ static void recv_mbox_handler(struct hinic_mbox_func_to_func *func_to_func,
 		return;
 	}
 
-	rcv_mbox_temp = kzalloc(sizeof(*rcv_mbox_temp), GFP_KERNEL);
+	rcv_mbox_temp = kmemdup(recv_mbox, sizeof(*rcv_mbox_temp), GFP_KERNEL);
 	if (!rcv_mbox_temp)
 		return;
 
-	memcpy(rcv_mbox_temp, recv_mbox, sizeof(*rcv_mbox_temp));
-
-	rcv_mbox_temp->mbox = kzalloc(MBOX_MAX_BUF_SZ, GFP_KERNEL);
+	rcv_mbox_temp->mbox = kmemdup(recv_mbox->mbox, MBOX_MAX_BUF_SZ,
+				      GFP_KERNEL);
 	if (!rcv_mbox_temp->mbox)
 		goto err_alloc_rcv_mbox_msg;
 
-	memcpy(rcv_mbox_temp->mbox, recv_mbox->mbox, MBOX_MAX_BUF_SZ);
-
 	rcv_mbox_temp->buf_out = kzalloc(MBOX_MAX_BUF_SZ, GFP_KERNEL);
 	if (!rcv_mbox_temp->buf_out)
 		goto err_alloc_rcv_mbox_buf;
-- 
cgit v1.2.3-59-g8ed1b


From d46edd671a147032e22cfeb271a5734703093649 Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Thu, 30 Apr 2020 00:15:04 -0700
Subject: bpf: Sharing bpf runtime stats with BPF_ENABLE_STATS

Currently, sysctl kernel.bpf_stats_enabled controls BPF runtime stats.
Typical userspace tools use kernel.bpf_stats_enabled as follows:

  1. Enable kernel.bpf_stats_enabled;
  2. Check program run_time_ns;
  3. Sleep for the monitoring period;
  4. Check program run_time_ns again, calculate the difference;
  5. Disable kernel.bpf_stats_enabled.

The problem with this approach is that only one userspace tool can toggle
this sysctl. If multiple tools toggle the sysctl at the same time, the
measurement may be inaccurate.

To fix this problem while keep backward compatibility, introduce a new
bpf command BPF_ENABLE_STATS. On success, this command enables stats and
returns a valid fd. BPF_ENABLE_STATS takes argument "type". Currently,
only one type, BPF_STATS_RUN_TIME, is supported. We can extend the
command to support other types of stats in the future.

With BPF_ENABLE_STATS, user space tool would have the following flow:

  1. Get a fd with BPF_ENABLE_STATS, and make sure it is valid;
  2. Check program run_time_ns;
  3. Sleep for the monitoring period;
  4. Check program run_time_ns again, calculate the difference;
  5. Close the fd.

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200430071506.1408910-2-songliubraving@fb.com
---
 include/linux/bpf.h            |  1 +
 include/uapi/linux/bpf.h       | 11 ++++++++
 kernel/bpf/syscall.c           | 57 ++++++++++++++++++++++++++++++++++++++++++
 kernel/sysctl.c                | 36 +++++++++++++++++++++++++-
 tools/include/uapi/linux/bpf.h | 11 ++++++++
 5 files changed, 115 insertions(+), 1 deletion(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index c07b1d2f3824..1262ec460ab3 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -987,6 +987,7 @@ _out:							\
 
 #ifdef CONFIG_BPF_SYSCALL
 DECLARE_PER_CPU(int, bpf_prog_active);
+extern struct mutex bpf_stats_enabled_mutex;
 
 /*
  * Block execution of BPF programs attached to instrumentation (perf,
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 0eccafae55bb..705e4822f997 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -115,6 +115,7 @@ enum bpf_cmd {
 	BPF_LINK_UPDATE,
 	BPF_LINK_GET_FD_BY_ID,
 	BPF_LINK_GET_NEXT_ID,
+	BPF_ENABLE_STATS,
 };
 
 enum bpf_map_type {
@@ -390,6 +391,12 @@ enum {
  */
 #define BPF_F_QUERY_EFFECTIVE	(1U << 0)
 
+/* type for BPF_ENABLE_STATS */
+enum bpf_stats_type {
+	/* enabled run_time_ns and run_cnt */
+	BPF_STATS_RUN_TIME = 0,
+};
+
 enum bpf_stack_build_id_status {
 	/* user space need an empty entry to identify end of a trace */
 	BPF_STACK_BUILD_ID_EMPTY = 0,
@@ -601,6 +608,10 @@ union bpf_attr {
 		__u32		old_prog_fd;
 	} link_update;
 
+	struct { /* struct used by BPF_ENABLE_STATS command */
+		__u32		type;
+	} enable_stats;
+
 } __attribute__((aligned(8)));
 
 /* The description below is an attempt at providing documentation to eBPF
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index c75b2dd2459c..4f34eecec9ce 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3872,6 +3872,60 @@ static int bpf_link_get_fd_by_id(const union bpf_attr *attr)
 	return fd;
 }
 
+DEFINE_MUTEX(bpf_stats_enabled_mutex);
+
+static int bpf_stats_release(struct inode *inode, struct file *file)
+{
+	mutex_lock(&bpf_stats_enabled_mutex);
+	static_key_slow_dec(&bpf_stats_enabled_key.key);
+	mutex_unlock(&bpf_stats_enabled_mutex);
+	return 0;
+}
+
+static const struct file_operations bpf_stats_fops = {
+	.release = bpf_stats_release,
+};
+
+static int bpf_enable_runtime_stats(void)
+{
+	int fd;
+
+	mutex_lock(&bpf_stats_enabled_mutex);
+
+	/* Set a very high limit to avoid overflow */
+	if (static_key_count(&bpf_stats_enabled_key.key) > INT_MAX / 2) {
+		mutex_unlock(&bpf_stats_enabled_mutex);
+		return -EBUSY;
+	}
+
+	fd = anon_inode_getfd("bpf-stats", &bpf_stats_fops, NULL, O_CLOEXEC);
+	if (fd >= 0)
+		static_key_slow_inc(&bpf_stats_enabled_key.key);
+
+	mutex_unlock(&bpf_stats_enabled_mutex);
+	return fd;
+}
+
+#define BPF_ENABLE_STATS_LAST_FIELD enable_stats.type
+
+static int bpf_enable_stats(union bpf_attr *attr)
+{
+
+	if (CHECK_ATTR(BPF_ENABLE_STATS))
+		return -EINVAL;
+
+	if (!capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	switch (attr->enable_stats.type) {
+	case BPF_STATS_RUN_TIME:
+		return bpf_enable_runtime_stats();
+	default:
+		break;
+	}
+	return -EINVAL;
+}
+
 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
 {
 	union bpf_attr attr;
@@ -3996,6 +4050,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 		err = bpf_obj_get_next_id(&attr, uattr,
 					  &link_idr, &link_idr_lock);
 		break;
+	case BPF_ENABLE_STATS:
+		err = bpf_enable_stats(&attr);
+		break;
 	default:
 		err = -EINVAL;
 		break;
diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index e961286d0e14..7adfe5dbce9d 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -201,6 +201,40 @@ static int max_extfrag_threshold = 1000;
 
 #endif /* CONFIG_SYSCTL */
 
+#ifdef CONFIG_BPF_SYSCALL
+static int bpf_stats_handler(struct ctl_table *table, int write,
+			     void __user *buffer, size_t *lenp,
+			     loff_t *ppos)
+{
+	struct static_key *key = (struct static_key *)table->data;
+	static int saved_val;
+	int val, ret;
+	struct ctl_table tmp = {
+		.data   = &val,
+		.maxlen = sizeof(val),
+		.mode   = table->mode,
+		.extra1 = SYSCTL_ZERO,
+		.extra2 = SYSCTL_ONE,
+	};
+
+	if (write && !capable(CAP_SYS_ADMIN))
+		return -EPERM;
+
+	mutex_lock(&bpf_stats_enabled_mutex);
+	val = saved_val;
+	ret = proc_dointvec_minmax(&tmp, write, buffer, lenp, ppos);
+	if (write && !ret && val != saved_val) {
+		if (val)
+			static_key_slow_inc(key);
+		else
+			static_key_slow_dec(key);
+		saved_val = val;
+	}
+	mutex_unlock(&bpf_stats_enabled_mutex);
+	return ret;
+}
+#endif
+
 /*
  * /proc/sys support
  */
@@ -2549,7 +2583,7 @@ static struct ctl_table kern_table[] = {
 		.data		= &bpf_stats_enabled_key.key,
 		.maxlen		= sizeof(bpf_stats_enabled_key),
 		.mode		= 0644,
-		.proc_handler	= proc_do_static_key,
+		.proc_handler	= bpf_stats_handler,
 	},
 #endif
 #if defined(CONFIG_TREE_RCU)
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 0eccafae55bb..705e4822f997 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -115,6 +115,7 @@ enum bpf_cmd {
 	BPF_LINK_UPDATE,
 	BPF_LINK_GET_FD_BY_ID,
 	BPF_LINK_GET_NEXT_ID,
+	BPF_ENABLE_STATS,
 };
 
 enum bpf_map_type {
@@ -390,6 +391,12 @@ enum {
  */
 #define BPF_F_QUERY_EFFECTIVE	(1U << 0)
 
+/* type for BPF_ENABLE_STATS */
+enum bpf_stats_type {
+	/* enabled run_time_ns and run_cnt */
+	BPF_STATS_RUN_TIME = 0,
+};
+
 enum bpf_stack_build_id_status {
 	/* user space need an empty entry to identify end of a trace */
 	BPF_STACK_BUILD_ID_EMPTY = 0,
@@ -601,6 +608,10 @@ union bpf_attr {
 		__u32		old_prog_fd;
 	} link_update;
 
+	struct { /* struct used by BPF_ENABLE_STATS command */
+		__u32		type;
+	} enable_stats;
+
 } __attribute__((aligned(8)));
 
 /* The description below is an attempt at providing documentation to eBPF
-- 
cgit v1.2.3-59-g8ed1b


From 0bee106716cfb2c6da81916b968395db22bd7755 Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Thu, 30 Apr 2020 00:15:05 -0700
Subject: libbpf: Add support for command BPF_ENABLE_STATS

bpf_enable_stats() is added to enable given stats.

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200430071506.1408910-3-songliubraving@fb.com
---
 tools/lib/bpf/bpf.c      | 10 ++++++++++
 tools/lib/bpf/bpf.h      |  1 +
 tools/lib/bpf/libbpf.map |  1 +
 3 files changed, 12 insertions(+)

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 8f2f0958d446..43322f0d6c7f 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -841,3 +841,13 @@ int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf, __u32 *buf_len,
 
 	return err;
 }
+
+int bpf_enable_stats(enum bpf_stats_type type)
+{
+	union bpf_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.enable_stats.type = type;
+
+	return sys_bpf(BPF_ENABLE_STATS, &attr, sizeof(attr));
+}
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 335b457b3a25..1901b2777854 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -231,6 +231,7 @@ LIBBPF_API int bpf_load_btf(void *btf, __u32 btf_size, char *log_buf,
 LIBBPF_API int bpf_task_fd_query(int pid, int fd, __u32 flags, char *buf,
 				 __u32 *buf_len, __u32 *prog_id, __u32 *fd_type,
 				 __u64 *probe_offset, __u64 *probe_addr);
+LIBBPF_API int bpf_enable_stats(enum bpf_stats_type type);
 
 #ifdef __cplusplus
 } /* extern "C" */
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 7cd49aa38005..e03bd4db827e 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -257,6 +257,7 @@ LIBBPF_0.0.8 {
 
 LIBBPF_0.0.9 {
 	global:
+		bpf_enable_stats;
 		bpf_link_get_fd_by_id;
 		bpf_link_get_next_id;
 } LIBBPF_0.0.8;
-- 
cgit v1.2.3-59-g8ed1b


From 31a9f7fe93378ab587d758d5b2e96a237caa7b8c Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Thu, 30 Apr 2020 00:15:06 -0700
Subject: bpf: Add selftest for BPF_ENABLE_STATS

Add test for BPF_ENABLE_STATS, which should enable run_time_ns stats.

~/selftests/bpf# ./test_progs -t enable_stats  -v
test_enable_stats:PASS:skel_open_and_load 0 nsec
test_enable_stats:PASS:get_stats_fd 0 nsec
test_enable_stats:PASS:attach_raw_tp 0 nsec
test_enable_stats:PASS:get_prog_info 0 nsec
test_enable_stats:PASS:check_stats_enabled 0 nsec
test_enable_stats:PASS:check_run_cnt_valid 0 nsec
Summary: 1/0 PASSED, 0 SKIPPED, 0 FAILED

Signed-off-by: Song Liu <songliubraving@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200430071506.1408910-4-songliubraving@fb.com
---
 .../selftests/bpf/prog_tests/enable_stats.c        | 45 ++++++++++++++++++++++
 .../selftests/bpf/progs/test_enable_stats.c        | 18 +++++++++
 2 files changed, 63 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/enable_stats.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_enable_stats.c

diff --git a/tools/testing/selftests/bpf/prog_tests/enable_stats.c b/tools/testing/selftests/bpf/prog_tests/enable_stats.c
new file mode 100644
index 000000000000..2cb2085917e7
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/enable_stats.c
@@ -0,0 +1,45 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include "test_enable_stats.skel.h"
+
+void test_enable_stats(void)
+{
+	struct test_enable_stats *skel;
+	int stats_fd, err, prog_fd;
+	struct bpf_prog_info info;
+	__u32 info_len = sizeof(info);
+	int duration = 0;
+
+	skel = test_enable_stats__open_and_load();
+	if (CHECK(!skel, "skel_open_and_load", "skeleton open/load failed\n"))
+		return;
+
+	stats_fd = bpf_enable_stats(BPF_STATS_RUN_TIME);
+	if (CHECK(stats_fd < 0, "get_stats_fd", "failed %d\n", errno)) {
+		test_enable_stats__destroy(skel);
+		return;
+	}
+
+	err = test_enable_stats__attach(skel);
+	if (CHECK(err, "attach_raw_tp", "err %d\n", err))
+		goto cleanup;
+
+	test_enable_stats__detach(skel);
+
+	prog_fd = bpf_program__fd(skel->progs.test_enable_stats);
+	memset(&info, 0, info_len);
+	err = bpf_obj_get_info_by_fd(prog_fd, &info, &info_len);
+	if (CHECK(err, "get_prog_info",
+		  "failed to get bpf_prog_info for fd %d\n", prog_fd))
+		goto cleanup;
+	if (CHECK(info.run_time_ns == 0, "check_stats_enabled",
+		  "failed to enable run_time_ns stats\n"))
+		goto cleanup;
+
+	CHECK(info.run_cnt != skel->bss->count, "check_run_cnt_valid",
+	      "invalid run_cnt stats\n");
+
+cleanup:
+	test_enable_stats__destroy(skel);
+	close(stats_fd);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_enable_stats.c b/tools/testing/selftests/bpf/progs/test_enable_stats.c
new file mode 100644
index 000000000000..01a002ade529
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_enable_stats.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <linux/types.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u64 count = 0;
+
+SEC("raw_tracepoint/sys_enter")
+int test_enable_stats(void *ctx)
+{
+	count += 1;
+	return 0;
+}
-- 
cgit v1.2.3-59-g8ed1b


From ef891284b130c365a7b60afaf7fe3c92c4260bfb Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Fri, 1 May 2020 10:11:28 +0200
Subject: r8169: remove not needed parameter in rtl8169_set_magic_reg

Remove a not needed parameter in rtl8169_set_magic_reg.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 0ac3976e3204..0f869a761d8c 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -2608,7 +2608,7 @@ static void rtl_set_rx_tx_desc_registers(struct rtl8169_private *tp)
 	RTL_W32(tp, RxDescAddrLow, ((u64) tp->RxPhyAddr) & DMA_BIT_MASK(32));
 }
 
-static void rtl8169_set_magic_reg(struct rtl8169_private *tp, unsigned mac_version)
+static void rtl8169_set_magic_reg(struct rtl8169_private *tp)
 {
 	u32 val;
 
@@ -3811,7 +3811,7 @@ static void rtl_hw_start_8169(struct rtl8169_private *tp)
 
 	RTL_W16(tp, CPlusCmd, tp->cp_cmd);
 
-	rtl8169_set_magic_reg(tp, tp->mac_version);
+	rtl8169_set_magic_reg(tp);
 
 	/* disable interrupt coalescing */
 	RTL_W16(tp, IntrMitigate, 0x0000);
-- 
cgit v1.2.3-59-g8ed1b


From cff9f12b18915d957a2130885a00f8ab15cff7e4 Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@mellanox.com>
Date: Thu, 30 Apr 2020 22:21:31 +0300
Subject: net/core: Introduce netdev_get_xmit_slave

Add new ndo to get the xmit slave of master device. The reference
counters are not incremented so the caller must be careful with locks.
User can ask to get the xmit slave assume all the slaves can
transmit by set all_slaves arg to true.

Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/netdevice.h | 12 ++++++++++++
 net/core/dev.c            | 22 ++++++++++++++++++++++
 2 files changed, 34 insertions(+)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 130a668049ab..26bc0f11b7ad 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1146,6 +1146,12 @@ struct netdev_net_notifier {
  * int (*ndo_del_slave)(struct net_device *dev, struct net_device *slave_dev);
  *	Called to release previously enslaved netdev.
  *
+ * struct net_device *(*ndo_get_xmit_slave)(struct net_device *dev,
+ *					    struct sk_buff *skb,
+ *					    bool all_slaves);
+ *	Get the xmit slave of master device. If all_slaves is true, function
+ *	assume all the slaves can transmit.
+ *
  *      Feature/offload setting functions.
  * netdev_features_t (*ndo_fix_features)(struct net_device *dev,
  *		netdev_features_t features);
@@ -1389,6 +1395,9 @@ struct net_device_ops {
 						 struct netlink_ext_ack *extack);
 	int			(*ndo_del_slave)(struct net_device *dev,
 						 struct net_device *slave_dev);
+	struct net_device*	(*ndo_get_xmit_slave)(struct net_device *dev,
+						      struct sk_buff *skb,
+						      bool all_slaves);
 	netdev_features_t	(*ndo_fix_features)(struct net_device *dev,
 						    netdev_features_t features);
 	int			(*ndo_set_features)(struct net_device *dev,
@@ -2731,6 +2740,9 @@ void netdev_freemem(struct net_device *dev);
 void synchronize_net(void);
 int init_dummy_netdev(struct net_device *dev);
 
+struct net_device *netdev_get_xmit_slave(struct net_device *dev,
+					 struct sk_buff *skb,
+					 bool all_slaves);
 struct net_device *dev_get_by_index(struct net *net, int ifindex);
 struct net_device *__dev_get_by_index(struct net *net, int ifindex);
 struct net_device *dev_get_by_index_rcu(struct net *net, int ifindex);
diff --git a/net/core/dev.c b/net/core/dev.c
index 9c9e763bfe0e..e6c10980abfd 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -7785,6 +7785,28 @@ void netdev_bonding_info_change(struct net_device *dev,
 }
 EXPORT_SYMBOL(netdev_bonding_info_change);
 
+/**
+ * netdev_get_xmit_slave - Get the xmit slave of master device
+ * @skb: The packet
+ * @all_slaves: assume all the slaves are active
+ *
+ * The reference counters are not incremented so the caller must be
+ * careful with locks. The caller must hold RCU lock.
+ * %NULL is returned if no slave is found.
+ */
+
+struct net_device *netdev_get_xmit_slave(struct net_device *dev,
+					 struct sk_buff *skb,
+					 bool all_slaves)
+{
+	const struct net_device_ops *ops = dev->netdev_ops;
+
+	if (!ops->ndo_get_xmit_slave)
+		return NULL;
+	return ops->ndo_get_xmit_slave(dev, skb, all_slaves);
+}
+EXPORT_SYMBOL(netdev_get_xmit_slave);
+
 static void netdev_adjacent_add_links(struct net_device *dev)
 {
 	struct netdev_adjacent *iter;
-- 
cgit v1.2.3-59-g8ed1b


From 119d48fd4298594beccf4f2ecd00627826ce2646 Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@mellanox.com>
Date: Thu, 30 Apr 2020 22:21:32 +0300
Subject: bonding: Export skip slave logic to function

As a preparation for following change that add array of
all slaves, extract code that skip slave to function.

Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Reviewed-by: Jay Vosburgh <jay.vosburgh@canonical.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/bonding/bond_main.c | 47 +++++++++++++++++++++++------------------
 1 file changed, 26 insertions(+), 21 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 2e70e43c5df5..f7aded014f08 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4087,6 +4087,29 @@ err:
 	bond_slave_arr_work_rearm(bond, 1);
 }
 
+static void bond_skip_slave(struct bond_up_slave *slaves,
+			    struct slave *skipslave)
+{
+	int idx;
+
+	/* Rare situation where caller has asked to skip a specific
+	 * slave but allocation failed (most likely!). BTW this is
+	 * only possible when the call is initiated from
+	 * __bond_release_one(). In this situation; overwrite the
+	 * skipslave entry in the array with the last entry from the
+	 * array to avoid a situation where the xmit path may choose
+	 * this to-be-skipped slave to send a packet out.
+	 */
+	for (idx = 0; slaves && idx < slaves->count; idx++) {
+		if (skipslave == slaves->arr[idx]) {
+			slaves->arr[idx] =
+				slaves->arr[slaves->count - 1];
+			slaves->count--;
+			break;
+		}
+	}
+}
+
 /* Build the usable slaves array in control path for modes that use xmit-hash
  * to determine the slave interface -
  * (a) BOND_MODE_8023AD
@@ -4156,27 +4179,9 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave)
 	if (old_arr)
 		kfree_rcu(old_arr, rcu);
 out:
-	if (ret != 0 && skipslave) {
-		int idx;
-
-		/* Rare situation where caller has asked to skip a specific
-		 * slave but allocation failed (most likely!). BTW this is
-		 * only possible when the call is initiated from
-		 * __bond_release_one(). In this situation; overwrite the
-		 * skipslave entry in the array with the last entry from the
-		 * array to avoid a situation where the xmit path may choose
-		 * this to-be-skipped slave to send a packet out.
-		 */
-		old_arr = rtnl_dereference(bond->slave_arr);
-		for (idx = 0; old_arr != NULL && idx < old_arr->count; idx++) {
-			if (skipslave == old_arr->arr[idx]) {
-				old_arr->arr[idx] =
-				    old_arr->arr[old_arr->count-1];
-				old_arr->count--;
-				break;
-			}
-		}
-	}
+	if (ret != 0 && skipslave)
+		bond_skip_slave(rtnl_dereference(bond->slave_arr), skipslave);
+
 	return ret;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From ed7d4f023b1a9b0578f20d66557c66452ab845ec Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@mellanox.com>
Date: Thu, 30 Apr 2020 22:21:33 +0300
Subject: bonding: Rename slave_arr to usable_slaves

Rename slave_arr to usable_slaves, since we will have two arrays,
one for the usable slaves and the other to all slaves.

Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Reviewed-by: Jay Vosburgh <jay.vosburgh@canonical.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/bonding/bond_alb.c  |  4 ++--
 drivers/net/bonding/bond_main.c | 40 ++++++++++++++++++++--------------------
 include/net/bonding.h           |  2 +-
 3 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index c81698550e5a..7bb49b049dcc 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -1360,7 +1360,7 @@ netdev_tx_t bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
 				struct bond_up_slave *slaves;
 				unsigned int count;
 
-				slaves = rcu_dereference(bond->slave_arr);
+				slaves = rcu_dereference(bond->usable_slaves);
 				count = slaves ? READ_ONCE(slaves->count) : 0;
 				if (likely(count))
 					tx_slave = slaves->arr[hash_index %
@@ -1494,7 +1494,7 @@ netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
 			struct bond_up_slave *slaves;
 			unsigned int count;
 
-			slaves = rcu_dereference(bond->slave_arr);
+			slaves = rcu_dereference(bond->usable_slaves);
 			count = slaves ? READ_ONCE(slaves->count) : 0;
 			if (likely(count))
 				tx_slave = slaves->arr[bond_xmit_hash(bond, skb) %
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index f7aded014f08..2cb41d480ae2 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4120,9 +4120,9 @@ static void bond_skip_slave(struct bond_up_slave *slaves,
  */
 int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave)
 {
+	struct bond_up_slave *usable_slaves, *old_usable_slaves;
 	struct slave *slave;
 	struct list_head *iter;
-	struct bond_up_slave *new_arr, *old_arr;
 	int agg_id = 0;
 	int ret = 0;
 
@@ -4130,11 +4130,10 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave)
 	WARN_ON(lockdep_is_held(&bond->mode_lock));
 #endif
 
-	new_arr = kzalloc(offsetof(struct bond_up_slave, arr[bond->slave_cnt]),
-			  GFP_KERNEL);
-	if (!new_arr) {
+	usable_slaves = kzalloc(struct_size(usable_slaves, arr,
+					    bond->slave_cnt), GFP_KERNEL);
+	if (!usable_slaves) {
 		ret = -ENOMEM;
-		pr_err("Failed to build slave-array.\n");
 		goto out;
 	}
 	if (BOND_MODE(bond) == BOND_MODE_8023AD) {
@@ -4142,14 +4141,14 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave)
 
 		if (bond_3ad_get_active_agg_info(bond, &ad_info)) {
 			pr_debug("bond_3ad_get_active_agg_info failed\n");
-			kfree_rcu(new_arr, rcu);
+			kfree_rcu(usable_slaves, rcu);
 			/* No active aggragator means it's not safe to use
 			 * the previous array.
 			 */
-			old_arr = rtnl_dereference(bond->slave_arr);
-			if (old_arr) {
-				RCU_INIT_POINTER(bond->slave_arr, NULL);
-				kfree_rcu(old_arr, rcu);
+			old_usable_slaves = rtnl_dereference(bond->usable_slaves);
+			if (old_usable_slaves) {
+				RCU_INIT_POINTER(bond->usable_slaves, NULL);
+				kfree_rcu(old_usable_slaves, rcu);
 			}
 			goto out;
 		}
@@ -4169,18 +4168,19 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave)
 			continue;
 
 		slave_dbg(bond->dev, slave->dev, "Adding slave to tx hash array[%d]\n",
-			  new_arr->count);
+			  usable_slaves->count);
 
-		new_arr->arr[new_arr->count++] = slave;
+		usable_slaves->arr[usable_slaves->count++] = slave;
 	}
 
-	old_arr = rtnl_dereference(bond->slave_arr);
-	rcu_assign_pointer(bond->slave_arr, new_arr);
-	if (old_arr)
-		kfree_rcu(old_arr, rcu);
+	old_usable_slaves = rtnl_dereference(bond->usable_slaves);
+	rcu_assign_pointer(bond->usable_slaves, usable_slaves);
+	if (old_usable_slaves)
+		kfree_rcu(old_usable_slaves, rcu);
 out:
 	if (ret != 0 && skipslave)
-		bond_skip_slave(rtnl_dereference(bond->slave_arr), skipslave);
+		bond_skip_slave(rtnl_dereference(bond->usable_slaves),
+				skipslave);
 
 	return ret;
 }
@@ -4197,7 +4197,7 @@ static netdev_tx_t bond_3ad_xor_xmit(struct sk_buff *skb,
 	struct bond_up_slave *slaves;
 	unsigned int count;
 
-	slaves = rcu_dereference(bond->slave_arr);
+	slaves = rcu_dereference(bond->usable_slaves);
 	count = slaves ? READ_ONCE(slaves->count) : 0;
 	if (likely(count)) {
 		slave = slaves->arr[bond_xmit_hash(bond, skb) % count];
@@ -4488,9 +4488,9 @@ static void bond_uninit(struct net_device *bond_dev)
 		__bond_release_one(bond_dev, slave->dev, true, true);
 	netdev_info(bond_dev, "Released all slaves\n");
 
-	arr = rtnl_dereference(bond->slave_arr);
+	arr = rtnl_dereference(bond->usable_slaves);
 	if (arr) {
-		RCU_INIT_POINTER(bond->slave_arr, NULL);
+		RCU_INIT_POINTER(bond->usable_slaves, NULL);
 		kfree_rcu(arr, rcu);
 	}
 
diff --git a/include/net/bonding.h b/include/net/bonding.h
index dc2ce31a1f52..33bdb6d5182d 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -200,7 +200,7 @@ struct bonding {
 	struct   slave __rcu *curr_active_slave;
 	struct   slave __rcu *current_arp_slave;
 	struct   slave __rcu *primary_slave;
-	struct   bond_up_slave __rcu *slave_arr; /* Array of usable slaves */
+	struct   bond_up_slave __rcu *usable_slaves; /* Array of usable slaves */
 	bool     force_primary;
 	s32      slave_cnt; /* never change this value outside the attach/detach wrappers */
 	int     (*recv_probe)(const struct sk_buff *, struct bonding *,
-- 
cgit v1.2.3-59-g8ed1b


From 34b37e204dfc8b20a09bb7b7f4c5e970c87420dd Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@mellanox.com>
Date: Thu, 30 Apr 2020 22:21:34 +0300
Subject: bonding/alb: Add helper functions to get the xmit slave

Add two helper functions to get the xmit slave of bond in alb or tlb
mode. Extract the logic of find the xmit slave from the xmit flow
to function. Xmit flow will xmit through this slave and in the
following patches the new .ndo will call to the helper function
to return the xmit slave.

Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Reviewed-by: Jay Vosburgh <jay.vosburgh@canonical.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/bonding/bond_alb.c | 35 ++++++++++++++++++++++++++---------
 include/net/bond_alb.h         |  4 ++++
 2 files changed, 30 insertions(+), 9 deletions(-)

diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index 7bb49b049dcc..e863c694c309 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -1334,11 +1334,11 @@ out:
 	return NETDEV_TX_OK;
 }
 
-netdev_tx_t bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
+struct slave *bond_xmit_tlb_slave_get(struct bonding *bond,
+				      struct sk_buff *skb)
 {
-	struct bonding *bond = netdev_priv(bond_dev);
-	struct ethhdr *eth_data;
 	struct slave *tx_slave = NULL;
+	struct ethhdr *eth_data;
 	u32 hash_index;
 
 	skb_reset_mac_header(skb);
@@ -1369,20 +1369,29 @@ netdev_tx_t bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
 			break;
 		}
 	}
-	return bond_do_alb_xmit(skb, bond, tx_slave);
+	return tx_slave;
 }
 
-netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
+netdev_tx_t bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
 {
 	struct bonding *bond = netdev_priv(bond_dev);
-	struct ethhdr *eth_data;
+	struct slave *tx_slave;
+
+	tx_slave = bond_xmit_tlb_slave_get(bond, skb);
+	return bond_do_alb_xmit(skb, bond, tx_slave);
+}
+
+struct slave *bond_xmit_alb_slave_get(struct bonding *bond,
+				      struct sk_buff *skb)
+{
 	struct alb_bond_info *bond_info = &(BOND_ALB_INFO(bond));
-	struct slave *tx_slave = NULL;
 	static const __be32 ip_bcast = htonl(0xffffffff);
-	int hash_size = 0;
+	struct slave *tx_slave = NULL;
+	const u8 *hash_start = NULL;
 	bool do_tx_balance = true;
+	struct ethhdr *eth_data;
 	u32 hash_index = 0;
-	const u8 *hash_start = NULL;
+	int hash_size = 0;
 
 	skb_reset_mac_header(skb);
 	eth_data = eth_hdr(skb);
@@ -1501,7 +1510,15 @@ netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
 						       count];
 		}
 	}
+	return tx_slave;
+}
+
+netdev_tx_t bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
+{
+	struct bonding *bond = netdev_priv(bond_dev);
+	struct slave *tx_slave = NULL;
 
+	tx_slave = bond_xmit_alb_slave_get(bond, skb);
 	return bond_do_alb_xmit(skb, bond, tx_slave);
 }
 
diff --git a/include/net/bond_alb.h b/include/net/bond_alb.h
index b3504fcd773d..f6af76c87a6c 100644
--- a/include/net/bond_alb.h
+++ b/include/net/bond_alb.h
@@ -158,6 +158,10 @@ void bond_alb_handle_link_change(struct bonding *bond, struct slave *slave, char
 void bond_alb_handle_active_change(struct bonding *bond, struct slave *new_slave);
 int bond_alb_xmit(struct sk_buff *skb, struct net_device *bond_dev);
 int bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev);
+struct slave *bond_xmit_alb_slave_get(struct bonding *bond,
+				      struct sk_buff *skb);
+struct slave *bond_xmit_tlb_slave_get(struct bonding *bond,
+				      struct sk_buff *skb);
 void bond_alb_monitor(struct work_struct *);
 int bond_alb_set_mac_address(struct net_device *bond_dev, void *addr);
 void bond_alb_clear_vlan(struct bonding *bond, unsigned short vlan_id);
-- 
cgit v1.2.3-59-g8ed1b


From c071d91d2a89b0dac1354673810b36453aed62c4 Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@mellanox.com>
Date: Thu, 30 Apr 2020 22:21:35 +0300
Subject: bonding: Add helper function to get the xmit slave based on hash

Both xor and 802.3ad modes use bond_xmit_hash to get the xmit slave.
Export the logic to helper function so it could be used in the
following patches by the .ndo to get the xmit slave.

Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Reviewed-by: Jay Vosburgh <jay.vosburgh@canonical.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/bonding/bond_main.c | 28 +++++++++++++++++++++-------
 1 file changed, 21 insertions(+), 7 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 2cb41d480ae2..8e6305955c75 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4185,6 +4185,23 @@ out:
 	return ret;
 }
 
+static struct slave *bond_xmit_3ad_xor_slave_get(struct bonding *bond,
+						 struct sk_buff *skb,
+						 struct bond_up_slave *slaves)
+{
+	struct slave *slave;
+	unsigned int count;
+	u32 hash;
+
+	hash = bond_xmit_hash(bond, skb);
+	count = slaves ? READ_ONCE(slaves->count) : 0;
+	if (unlikely(!count))
+		return NULL;
+
+	slave = slaves->arr[hash % count];
+	return slave;
+}
+
 /* Use this Xmit function for 3AD as well as XOR modes. The current
  * usable slave array is formed in the control path. The xmit function
  * just calculates hash and sends the packet out.
@@ -4193,18 +4210,15 @@ static netdev_tx_t bond_3ad_xor_xmit(struct sk_buff *skb,
 				     struct net_device *dev)
 {
 	struct bonding *bond = netdev_priv(dev);
-	struct slave *slave;
 	struct bond_up_slave *slaves;
-	unsigned int count;
+	struct slave *slave;
 
 	slaves = rcu_dereference(bond->usable_slaves);
-	count = slaves ? READ_ONCE(slaves->count) : 0;
-	if (likely(count)) {
-		slave = slaves->arr[bond_xmit_hash(bond, skb) % count];
+	slave = bond_xmit_3ad_xor_slave_get(bond, skb, slaves);
+	if (likely(slave))
 		bond_dev_queue_xmit(bond, skb, slave->dev);
-	} else {
+	else
 		bond_tx_drop(dev, skb);
-	}
 
 	return NETDEV_TX_OK;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 29d5bbccb3a171eb146c94efeb3d752fad3ddf7d Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@mellanox.com>
Date: Thu, 30 Apr 2020 22:21:36 +0300
Subject: bonding: Add helper function to get the xmit slave in rr mode

Add helper function to get the xmit slave when bond is in round
robin mode. Change bond_xmit_slave_id to bond_get_slave_by_id, then
the logic for find the next slave for transmit could be used
both by the xmit flow and the .ndo to get the xmit slave.

Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Reviewed-by: Jay Vosburgh <jay.vosburgh@canonical.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/bonding/bond_main.c | 56 ++++++++++++++++++++++-------------------
 1 file changed, 30 insertions(+), 26 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 8e6305955c75..09c8485e965d 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -3923,16 +3923,15 @@ unwind:
 }
 
 /**
- * bond_xmit_slave_id - transmit skb through slave with slave_id
+ * bond_get_slave_by_id - get xmit slave with slave_id
  * @bond: bonding device that is transmitting
- * @skb: buffer to transmit
  * @slave_id: slave id up to slave_cnt-1 through which to transmit
  *
- * This function tries to transmit through slave with slave_id but in case
+ * This function tries to get slave with slave_id but in case
  * it fails, it tries to find the first available slave for transmission.
- * The skb is consumed in all cases, thus the function is void.
  */
-static void bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int slave_id)
+static struct slave *bond_get_slave_by_id(struct bonding *bond,
+					  int slave_id)
 {
 	struct list_head *iter;
 	struct slave *slave;
@@ -3941,10 +3940,8 @@ static void bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int sl
 	/* Here we start from the slave with slave_id */
 	bond_for_each_slave_rcu(bond, slave, iter) {
 		if (--i < 0) {
-			if (bond_slave_can_tx(slave)) {
-				bond_dev_queue_xmit(bond, skb, slave->dev);
-				return;
-			}
+			if (bond_slave_can_tx(slave))
+				return slave;
 		}
 	}
 
@@ -3953,13 +3950,11 @@ static void bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int sl
 	bond_for_each_slave_rcu(bond, slave, iter) {
 		if (--i < 0)
 			break;
-		if (bond_slave_can_tx(slave)) {
-			bond_dev_queue_xmit(bond, skb, slave->dev);
-			return;
-		}
+		if (bond_slave_can_tx(slave))
+			return slave;
 	}
-	/* no slave that can tx has been found */
-	bond_tx_drop(bond->dev, skb);
+
+	return NULL;
 }
 
 /**
@@ -3995,10 +3990,9 @@ static u32 bond_rr_gen_slave_id(struct bonding *bond)
 	return slave_id;
 }
 
-static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb,
-					struct net_device *bond_dev)
+static struct slave *bond_xmit_roundrobin_slave_get(struct bonding *bond,
+						    struct sk_buff *skb)
 {
-	struct bonding *bond = netdev_priv(bond_dev);
 	struct slave *slave;
 	int slave_cnt;
 	u32 slave_id;
@@ -4020,21 +4014,31 @@ static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb,
 		if (iph->protocol == IPPROTO_IGMP) {
 			slave = rcu_dereference(bond->curr_active_slave);
 			if (slave)
-				bond_dev_queue_xmit(bond, skb, slave->dev);
-			else
-				bond_xmit_slave_id(bond, skb, 0);
-			return NETDEV_TX_OK;
+				return slave;
+			return bond_get_slave_by_id(bond, 0);
 		}
 	}
 
 non_igmp:
 	slave_cnt = READ_ONCE(bond->slave_cnt);
 	if (likely(slave_cnt)) {
-		slave_id = bond_rr_gen_slave_id(bond);
-		bond_xmit_slave_id(bond, skb, slave_id % slave_cnt);
-	} else {
-		bond_tx_drop(bond_dev, skb);
+		slave_id = bond_rr_gen_slave_id(bond) % slave_cnt;
+		return bond_get_slave_by_id(bond, slave_id);
 	}
+	return NULL;
+}
+
+static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb,
+					struct net_device *bond_dev)
+{
+	struct bonding *bond = netdev_priv(bond_dev);
+	struct slave *slave;
+
+	slave = bond_xmit_roundrobin_slave_get(bond, skb);
+	if (slave)
+		bond_dev_queue_xmit(bond, skb, slave->dev);
+	else
+		bond_tx_drop(bond_dev, skb);
 	return NETDEV_TX_OK;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 5a19f1c1a2a0f7d5fb80b130ab4a15fa99e792d7 Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@mellanox.com>
Date: Thu, 30 Apr 2020 22:21:37 +0300
Subject: bonding: Add function to get the xmit slave in active-backup mode

Add helper function to get the xmit slave in active-backup mode.
It's only one line function that return the curr_active_slave,
but it will used both in the xmit flow and by the new .ndo to get
the xmit slave.

Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Reviewed-by: Jay Vosburgh <jay.vosburgh@canonical.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/bonding/bond_main.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 09c8485e965d..1b0ae750d732 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4042,6 +4042,12 @@ static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb,
 	return NETDEV_TX_OK;
 }
 
+static struct slave *bond_xmit_activebackup_slave_get(struct bonding *bond,
+						      struct sk_buff *skb)
+{
+	return rcu_dereference(bond->curr_active_slave);
+}
+
 /* In active-backup mode, we know that bond->curr_active_slave is always valid if
  * the bond has a usable interface.
  */
@@ -4051,7 +4057,7 @@ static netdev_tx_t bond_xmit_activebackup(struct sk_buff *skb,
 	struct bonding *bond = netdev_priv(bond_dev);
 	struct slave *slave;
 
-	slave = rcu_dereference(bond->curr_active_slave);
+	slave = bond_xmit_activebackup_slave_get(bond, skb);
 	if (slave)
 		bond_dev_queue_xmit(bond, skb, slave->dev);
 	else
-- 
cgit v1.2.3-59-g8ed1b


From 6b447e76ed44cc354cd0a346b86efe393e603e0d Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@mellanox.com>
Date: Thu, 30 Apr 2020 22:21:38 +0300
Subject: bonding: Add array of all slaves

Keep all slaves in array so it could be used to get the xmit slave
assume all the slaves are active.
The logic to add slave to the array is like the usable slaves, except
that we also add slaves that currently can't transmit - not up or active.

Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Reviewed-by: Jay Vosburgh <jay.vosburgh@canonical.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/bonding/bond_main.c | 78 +++++++++++++++++++++++++++++++----------
 include/net/bonding.h           |  3 +-
 2 files changed, 61 insertions(+), 20 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 1b0ae750d732..2de693f0262e 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4120,6 +4120,38 @@ static void bond_skip_slave(struct bond_up_slave *slaves,
 	}
 }
 
+static void bond_set_slave_arr(struct bonding *bond,
+			       struct bond_up_slave *usable_slaves,
+			       struct bond_up_slave *all_slaves)
+{
+	struct bond_up_slave *usable, *all;
+
+	usable = rtnl_dereference(bond->usable_slaves);
+	rcu_assign_pointer(bond->usable_slaves, usable_slaves);
+	kfree_rcu(usable, rcu);
+
+	all = rtnl_dereference(bond->all_slaves);
+	rcu_assign_pointer(bond->all_slaves, all_slaves);
+	kfree_rcu(all, rcu);
+}
+
+static void bond_reset_slave_arr(struct bonding *bond)
+{
+	struct bond_up_slave *usable, *all;
+
+	usable = rtnl_dereference(bond->usable_slaves);
+	if (usable) {
+		RCU_INIT_POINTER(bond->usable_slaves, NULL);
+		kfree_rcu(usable, rcu);
+	}
+
+	all = rtnl_dereference(bond->all_slaves);
+	if (all) {
+		RCU_INIT_POINTER(bond->all_slaves, NULL);
+		kfree_rcu(all, rcu);
+	}
+}
+
 /* Build the usable slaves array in control path for modes that use xmit-hash
  * to determine the slave interface -
  * (a) BOND_MODE_8023AD
@@ -4130,7 +4162,7 @@ static void bond_skip_slave(struct bond_up_slave *slaves,
  */
 int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave)
 {
-	struct bond_up_slave *usable_slaves, *old_usable_slaves;
+	struct bond_up_slave *usable_slaves = NULL, *all_slaves = NULL;
 	struct slave *slave;
 	struct list_head *iter;
 	int agg_id = 0;
@@ -4142,7 +4174,9 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave)
 
 	usable_slaves = kzalloc(struct_size(usable_slaves, arr,
 					    bond->slave_cnt), GFP_KERNEL);
-	if (!usable_slaves) {
+	all_slaves = kzalloc(struct_size(all_slaves, arr,
+					 bond->slave_cnt), GFP_KERNEL);
+	if (!usable_slaves || !all_slaves) {
 		ret = -ENOMEM;
 		goto out;
 	}
@@ -4151,20 +4185,19 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave)
 
 		if (bond_3ad_get_active_agg_info(bond, &ad_info)) {
 			pr_debug("bond_3ad_get_active_agg_info failed\n");
-			kfree_rcu(usable_slaves, rcu);
 			/* No active aggragator means it's not safe to use
 			 * the previous array.
 			 */
-			old_usable_slaves = rtnl_dereference(bond->usable_slaves);
-			if (old_usable_slaves) {
-				RCU_INIT_POINTER(bond->usable_slaves, NULL);
-				kfree_rcu(old_usable_slaves, rcu);
-			}
+			bond_reset_slave_arr(bond);
 			goto out;
 		}
 		agg_id = ad_info.aggregator_id;
 	}
 	bond_for_each_slave(bond, slave, iter) {
+		if (skipslave == slave)
+			continue;
+
+		all_slaves->arr[all_slaves->count++] = slave;
 		if (BOND_MODE(bond) == BOND_MODE_8023AD) {
 			struct aggregator *agg;
 
@@ -4174,8 +4207,6 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave)
 		}
 		if (!bond_slave_can_tx(slave))
 			continue;
-		if (skipslave == slave)
-			continue;
 
 		slave_dbg(bond->dev, slave->dev, "Adding slave to tx hash array[%d]\n",
 			  usable_slaves->count);
@@ -4183,14 +4214,17 @@ int bond_update_slave_arr(struct bonding *bond, struct slave *skipslave)
 		usable_slaves->arr[usable_slaves->count++] = slave;
 	}
 
-	old_usable_slaves = rtnl_dereference(bond->usable_slaves);
-	rcu_assign_pointer(bond->usable_slaves, usable_slaves);
-	if (old_usable_slaves)
-		kfree_rcu(old_usable_slaves, rcu);
+	bond_set_slave_arr(bond, usable_slaves, all_slaves);
+	return ret;
 out:
-	if (ret != 0 && skipslave)
+	if (ret != 0 && skipslave) {
+		bond_skip_slave(rtnl_dereference(bond->all_slaves),
+				skipslave);
 		bond_skip_slave(rtnl_dereference(bond->usable_slaves),
 				skipslave);
+	}
+	kfree_rcu(all_slaves, rcu);
+	kfree_rcu(usable_slaves, rcu);
 
 	return ret;
 }
@@ -4501,9 +4535,9 @@ void bond_setup(struct net_device *bond_dev)
 static void bond_uninit(struct net_device *bond_dev)
 {
 	struct bonding *bond = netdev_priv(bond_dev);
+	struct bond_up_slave *usable, *all;
 	struct list_head *iter;
 	struct slave *slave;
-	struct bond_up_slave *arr;
 
 	bond_netpoll_cleanup(bond_dev);
 
@@ -4512,10 +4546,16 @@ static void bond_uninit(struct net_device *bond_dev)
 		__bond_release_one(bond_dev, slave->dev, true, true);
 	netdev_info(bond_dev, "Released all slaves\n");
 
-	arr = rtnl_dereference(bond->usable_slaves);
-	if (arr) {
+	usable = rtnl_dereference(bond->usable_slaves);
+	if (usable) {
 		RCU_INIT_POINTER(bond->usable_slaves, NULL);
-		kfree_rcu(arr, rcu);
+		kfree_rcu(usable, rcu);
+	}
+
+	all = rtnl_dereference(bond->all_slaves);
+	if (all) {
+		RCU_INIT_POINTER(bond->all_slaves, NULL);
+		kfree_rcu(all, rcu);
 	}
 
 	list_del(&bond->bond_list);
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 33bdb6d5182d..b5e49bedbc9f 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -200,7 +200,8 @@ struct bonding {
 	struct   slave __rcu *curr_active_slave;
 	struct   slave __rcu *current_arp_slave;
 	struct   slave __rcu *primary_slave;
-	struct   bond_up_slave __rcu *usable_slaves; /* Array of usable slaves */
+	struct   bond_up_slave __rcu *usable_slaves;
+	struct   bond_up_slave __rcu *all_slaves;
 	bool     force_primary;
 	s32      slave_cnt; /* never change this value outside the attach/detach wrappers */
 	int     (*recv_probe)(const struct sk_buff *, struct bonding *,
-- 
cgit v1.2.3-59-g8ed1b


From 33720aaf8c2af5c0ff341a16b5048b9c7ecae569 Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@mellanox.com>
Date: Thu, 30 Apr 2020 22:21:39 +0300
Subject: bonding: Implement ndo_get_xmit_slave

Add implementation of ndo_get_xmit_slave. Find the slave by using the
helper function according to the bond mode. If the caller set all_slaves
to true, then it assumes that all slaves are available to transmit.

Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Reviewed-by: Jay Vosburgh <jay.vosburgh@canonical.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/bonding/bond_main.c | 43 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 2de693f0262e..39b1ad7edbb4 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4347,6 +4347,48 @@ static u16 bond_select_queue(struct net_device *dev, struct sk_buff *skb,
 	return txq;
 }
 
+static struct net_device *bond_xmit_get_slave(struct net_device *master_dev,
+					      struct sk_buff *skb,
+					      bool all_slaves)
+{
+	struct bonding *bond = netdev_priv(master_dev);
+	struct bond_up_slave *slaves;
+	struct slave *slave = NULL;
+
+	switch (BOND_MODE(bond)) {
+	case BOND_MODE_ROUNDROBIN:
+		slave = bond_xmit_roundrobin_slave_get(bond, skb);
+		break;
+	case BOND_MODE_ACTIVEBACKUP:
+		slave = bond_xmit_activebackup_slave_get(bond, skb);
+		break;
+	case BOND_MODE_8023AD:
+	case BOND_MODE_XOR:
+		if (all_slaves)
+			slaves = rcu_dereference(bond->all_slaves);
+		else
+			slaves = rcu_dereference(bond->usable_slaves);
+		slave = bond_xmit_3ad_xor_slave_get(bond, skb, slaves);
+		break;
+	case BOND_MODE_BROADCAST:
+		break;
+	case BOND_MODE_ALB:
+		slave = bond_xmit_alb_slave_get(bond, skb);
+		break;
+	case BOND_MODE_TLB:
+		slave = bond_xmit_tlb_slave_get(bond, skb);
+		break;
+	default:
+		/* Should never happen, mode already checked */
+		WARN_ONCE(true, "Unknown bonding mode");
+		break;
+	}
+
+	if (slave)
+		return slave->dev;
+	return NULL;
+}
+
 static netdev_tx_t __bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct bonding *bond = netdev_priv(dev);
@@ -4468,6 +4510,7 @@ static const struct net_device_ops bond_netdev_ops = {
 	.ndo_del_slave		= bond_release,
 	.ndo_fix_features	= bond_fix_features,
 	.ndo_features_check	= passthru_features_check,
+	.ndo_get_xmit_slave	= bond_xmit_get_slave,
 };
 
 static const struct device_type bond_type = {
-- 
cgit v1.2.3-59-g8ed1b


From 64363e61c7bbcfa4c7d6697d96ef2e18fc311cf3 Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@mellanox.com>
Date: Thu, 30 Apr 2020 22:21:40 +0300
Subject: net/mlx5: Change lag mutex lock to spin lock

The lag lock could be a spin lock, the critical section is short
and there is no need that the thread will sleep.
Change the lock that protects the LAG structure from mutex
to spin lock. It is required for next patch that need to
access this structure from context that we can't sleep.
In addition there is no need to hold this lock when query the
congestion counters.

Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/lag.c | 42 +++++++++++++--------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
index c6ad5ca46877..b17b80bcd045 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
@@ -42,7 +42,7 @@
  * Beware of lock dependencies (preferably, no locks should be acquired
  * under it).
  */
-static DEFINE_MUTEX(lag_mutex);
+static DEFINE_SPINLOCK(lag_lock);
 
 static int mlx5_cmd_create_lag(struct mlx5_core_dev *dev, u8 remap_port1,
 			       u8 remap_port2)
@@ -274,9 +274,9 @@ static void mlx5_do_bond(struct mlx5_lag *ldev)
 	if (!dev0 || !dev1)
 		return;
 
-	mutex_lock(&lag_mutex);
+	spin_lock(&lag_lock);
 	tracker = ldev->tracker;
-	mutex_unlock(&lag_mutex);
+	spin_unlock(&lag_lock);
 
 	do_bond = tracker.is_bonded && mlx5_lag_check_prereq(ldev);
 
@@ -458,9 +458,9 @@ static int mlx5_lag_netdev_event(struct notifier_block *this,
 		break;
 	}
 
-	mutex_lock(&lag_mutex);
+	spin_lock(&lag_lock);
 	ldev->tracker = tracker;
-	mutex_unlock(&lag_mutex);
+	spin_unlock(&lag_lock);
 
 	if (changed)
 		mlx5_queue_bond_work(ldev, 0);
@@ -502,7 +502,7 @@ static void mlx5_lag_dev_add_pf(struct mlx5_lag *ldev,
 	if (fn >= MLX5_MAX_PORTS)
 		return;
 
-	mutex_lock(&lag_mutex);
+	spin_lock(&lag_lock);
 	ldev->pf[fn].dev    = dev;
 	ldev->pf[fn].netdev = netdev;
 	ldev->tracker.netdev_state[fn].link_up = 0;
@@ -510,7 +510,7 @@ static void mlx5_lag_dev_add_pf(struct mlx5_lag *ldev,
 
 	dev->priv.lag = ldev;
 
-	mutex_unlock(&lag_mutex);
+	spin_unlock(&lag_lock);
 }
 
 static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev,
@@ -525,11 +525,11 @@ static void mlx5_lag_dev_remove_pf(struct mlx5_lag *ldev,
 	if (i == MLX5_MAX_PORTS)
 		return;
 
-	mutex_lock(&lag_mutex);
+	spin_lock(&lag_lock);
 	memset(&ldev->pf[i], 0, sizeof(*ldev->pf));
 
 	dev->priv.lag = NULL;
-	mutex_unlock(&lag_mutex);
+	spin_unlock(&lag_lock);
 }
 
 /* Must be called with intf_mutex held */
@@ -607,10 +607,10 @@ bool mlx5_lag_is_roce(struct mlx5_core_dev *dev)
 	struct mlx5_lag *ldev;
 	bool res;
 
-	mutex_lock(&lag_mutex);
+	spin_lock(&lag_lock);
 	ldev = mlx5_lag_dev_get(dev);
 	res  = ldev && __mlx5_lag_is_roce(ldev);
-	mutex_unlock(&lag_mutex);
+	spin_unlock(&lag_lock);
 
 	return res;
 }
@@ -621,10 +621,10 @@ bool mlx5_lag_is_active(struct mlx5_core_dev *dev)
 	struct mlx5_lag *ldev;
 	bool res;
 
-	mutex_lock(&lag_mutex);
+	spin_lock(&lag_lock);
 	ldev = mlx5_lag_dev_get(dev);
 	res  = ldev && __mlx5_lag_is_active(ldev);
-	mutex_unlock(&lag_mutex);
+	spin_unlock(&lag_lock);
 
 	return res;
 }
@@ -635,10 +635,10 @@ bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev)
 	struct mlx5_lag *ldev;
 	bool res;
 
-	mutex_lock(&lag_mutex);
+	spin_lock(&lag_lock);
 	ldev = mlx5_lag_dev_get(dev);
 	res  = ldev && __mlx5_lag_is_sriov(ldev);
-	mutex_unlock(&lag_mutex);
+	spin_unlock(&lag_lock);
 
 	return res;
 }
@@ -664,7 +664,7 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
 	struct net_device *ndev = NULL;
 	struct mlx5_lag *ldev;
 
-	mutex_lock(&lag_mutex);
+	spin_lock(&lag_lock);
 	ldev = mlx5_lag_dev_get(dev);
 
 	if (!(ldev && __mlx5_lag_is_roce(ldev)))
@@ -681,7 +681,7 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev)
 		dev_hold(ndev);
 
 unlock:
-	mutex_unlock(&lag_mutex);
+	spin_unlock(&lag_lock);
 
 	return ndev;
 }
@@ -723,7 +723,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
 
 	memset(values, 0, sizeof(*values) * num_counters);
 
-	mutex_lock(&lag_mutex);
+	spin_lock(&lag_lock);
 	ldev = mlx5_lag_dev_get(dev);
 	if (ldev && __mlx5_lag_is_roce(ldev)) {
 		num_ports = MLX5_MAX_PORTS;
@@ -733,6 +733,7 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
 		num_ports = 1;
 		mdev[MLX5_LAG_P1] = dev;
 	}
+	spin_unlock(&lag_lock);
 
 	for (i = 0; i < num_ports; ++i) {
 		u32 in[MLX5_ST_SZ_DW(query_cong_statistics_in)] = {};
@@ -742,14 +743,13 @@ int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
 		ret = mlx5_cmd_exec_inout(mdev[i], query_cong_statistics, in,
 					  out);
 		if (ret)
-			goto unlock;
+			goto free;
 
 		for (j = 0; j < num_counters; ++j)
 			values[j] += be64_to_cpup((__be64 *)(out + offsets[j]));
 	}
 
-unlock:
-	mutex_unlock(&lag_mutex);
+free:
 	kvfree(out);
 	return ret;
 }
-- 
cgit v1.2.3-59-g8ed1b


From c6bc6041b10f70b617f2d13894311fe62027d292 Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@mellanox.com>
Date: Thu, 30 Apr 2020 22:21:41 +0300
Subject: net/mlx5: Add support to get lag physical port

Add function to get the device physical port of the lag slave.

Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Acked-by: David S. Miller <davem@davemloft.net>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/lag.c | 24 ++++++++++++++++++++++++
 include/linux/mlx5/driver.h                   |  2 ++
 2 files changed, 26 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lag.c b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
index b17b80bcd045..874c70e8cc54 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lag.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lag.c
@@ -687,6 +687,30 @@ unlock:
 }
 EXPORT_SYMBOL(mlx5_lag_get_roce_netdev);
 
+u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
+			   struct net_device *slave)
+{
+	struct mlx5_lag *ldev;
+	u8 port = 0;
+
+	spin_lock(&lag_lock);
+	ldev = mlx5_lag_dev_get(dev);
+	if (!(ldev && __mlx5_lag_is_roce(ldev)))
+		goto unlock;
+
+	if (ldev->pf[MLX5_LAG_P1].netdev == slave)
+		port = MLX5_LAG_P1;
+	else
+		port = MLX5_LAG_P2;
+
+	port = ldev->v2p_map[port];
+
+unlock:
+	spin_unlock(&lag_lock);
+	return port;
+}
+EXPORT_SYMBOL(mlx5_lag_get_slave_port);
+
 bool mlx5_lag_intf_add(struct mlx5_interface *intf, struct mlx5_priv *priv)
 {
 	struct mlx5_core_dev *dev = container_of(priv, struct mlx5_core_dev,
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index d82dbbab8179..267dfcc5493e 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -1074,6 +1074,8 @@ bool mlx5_lag_is_sriov(struct mlx5_core_dev *dev);
 bool mlx5_lag_is_multipath(struct mlx5_core_dev *dev);
 bool mlx5_lag_is_active(struct mlx5_core_dev *dev);
 struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev);
+u8 mlx5_lag_get_slave_port(struct mlx5_core_dev *dev,
+			   struct net_device *slave);
 int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev,
 				 u64 *values,
 				 int num_counters,
-- 
cgit v1.2.3-59-g8ed1b


From 973d55e590beeca13fece60596ee3b511d36d9da Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 1 May 2020 16:44:23 +0200
Subject: docs: networking: convert tuntap.txt to ReST

- add SPDX header;
- use copyright symbol;
- adjust titles and chapters, adding proper markups;
- mark code blocks and literals as such;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst  |   1 +
 Documentation/networking/tuntap.rst | 259 ++++++++++++++++++++++++++++++++++++
 Documentation/networking/tuntap.txt | 227 -------------------------------
 MAINTAINERS                         |   2 +-
 drivers/net/Kconfig                 |   2 +-
 5 files changed, 262 insertions(+), 229 deletions(-)
 create mode 100644 Documentation/networking/tuntap.rst
 delete mode 100644 Documentation/networking/tuntap.txt

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index b423b2db5f96..e7a683f0528d 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -111,6 +111,7 @@ Contents:
    team
    timestamping
    tproxy
+   tuntap
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/tuntap.rst b/Documentation/networking/tuntap.rst
new file mode 100644
index 000000000000..a59d1dd6fdcc
--- /dev/null
+++ b/Documentation/networking/tuntap.rst
@@ -0,0 +1,259 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+===============================
+Universal TUN/TAP device driver
+===============================
+
+Copyright |copy| 1999-2000 Maxim Krasnyansky <max_mk@yahoo.com>
+
+  Linux, Solaris drivers
+  Copyright |copy| 1999-2000 Maxim Krasnyansky <max_mk@yahoo.com>
+
+  FreeBSD TAP driver
+  Copyright |copy| 1999-2000 Maksim Yevmenkin <m_evmenkin@yahoo.com>
+
+  Revision of this document 2002 by Florian Thiel <florian.thiel@gmx.net>
+
+1. Description
+==============
+
+  TUN/TAP provides packet reception and transmission for user space programs.
+  It can be seen as a simple Point-to-Point or Ethernet device, which,
+  instead of receiving packets from physical media, receives them from
+  user space program and instead of sending packets via physical media
+  writes them to the user space program.
+
+  In order to use the driver a program has to open /dev/net/tun and issue a
+  corresponding ioctl() to register a network device with the kernel. A network
+  device will appear as tunXX or tapXX, depending on the options chosen. When
+  the program closes the file descriptor, the network device and all
+  corresponding routes will disappear.
+
+  Depending on the type of device chosen the userspace program has to read/write
+  IP packets (with tun) or ethernet frames (with tap). Which one is being used
+  depends on the flags given with the ioctl().
+
+  The package from http://vtun.sourceforge.net/tun contains two simple examples
+  for how to use tun and tap devices. Both programs work like a bridge between
+  two network interfaces.
+  br_select.c - bridge based on select system call.
+  br_sigio.c  - bridge based on async io and SIGIO signal.
+  However, the best example is VTun http://vtun.sourceforge.net :))
+
+2. Configuration
+================
+
+  Create device node::
+
+     mkdir /dev/net (if it doesn't exist already)
+     mknod /dev/net/tun c 10 200
+
+  Set permissions::
+
+     e.g. chmod 0666 /dev/net/tun
+
+  There's no harm in allowing the device to be accessible by non-root users,
+  since CAP_NET_ADMIN is required for creating network devices or for
+  connecting to network devices which aren't owned by the user in question.
+  If you want to create persistent devices and give ownership of them to
+  unprivileged users, then you need the /dev/net/tun device to be usable by
+  those users.
+
+  Driver module autoloading
+
+     Make sure that "Kernel module loader" - module auto-loading
+     support is enabled in your kernel.  The kernel should load it on
+     first access.
+
+  Manual loading
+
+     insert the module by hand::
+
+	modprobe tun
+
+  If you do it the latter way, you have to load the module every time you
+  need it, if you do it the other way it will be automatically loaded when
+  /dev/net/tun is being opened.
+
+3. Program interface
+====================
+
+3.1 Network device allocation
+-----------------------------
+
+``char *dev`` should be the name of the device with a format string (e.g.
+"tun%d"), but (as far as I can see) this can be any valid network device name.
+Note that the character pointer becomes overwritten with the real device name
+(e.g. "tun0")::
+
+  #include <linux/if.h>
+  #include <linux/if_tun.h>
+
+  int tun_alloc(char *dev)
+  {
+      struct ifreq ifr;
+      int fd, err;
+
+      if( (fd = open("/dev/net/tun", O_RDWR)) < 0 )
+	 return tun_alloc_old(dev);
+
+      memset(&ifr, 0, sizeof(ifr));
+
+      /* Flags: IFF_TUN   - TUN device (no Ethernet headers)
+       *        IFF_TAP   - TAP device
+       *
+       *        IFF_NO_PI - Do not provide packet information
+       */
+      ifr.ifr_flags = IFF_TUN;
+      if( *dev )
+	 strncpy(ifr.ifr_name, dev, IFNAMSIZ);
+
+      if( (err = ioctl(fd, TUNSETIFF, (void *) &ifr)) < 0 ){
+	 close(fd);
+	 return err;
+      }
+      strcpy(dev, ifr.ifr_name);
+      return fd;
+  }
+
+3.2 Frame format
+----------------
+
+If flag IFF_NO_PI is not set each frame format is::
+
+     Flags [2 bytes]
+     Proto [2 bytes]
+     Raw protocol(IP, IPv6, etc) frame.
+
+3.3 Multiqueue tuntap interface
+-------------------------------
+
+From version 3.8, Linux supports multiqueue tuntap which can uses multiple
+file descriptors (queues) to parallelize packets sending or receiving. The
+device allocation is the same as before, and if user wants to create multiple
+queues, TUNSETIFF with the same device name must be called many times with
+IFF_MULTI_QUEUE flag.
+
+``char *dev`` should be the name of the device, queues is the number of queues
+to be created, fds is used to store and return the file descriptors (queues)
+created to the caller. Each file descriptor were served as the interface of a
+queue which could be accessed by userspace.
+
+::
+
+  #include <linux/if.h>
+  #include <linux/if_tun.h>
+
+  int tun_alloc_mq(char *dev, int queues, int *fds)
+  {
+      struct ifreq ifr;
+      int fd, err, i;
+
+      if (!dev)
+	  return -1;
+
+      memset(&ifr, 0, sizeof(ifr));
+      /* Flags: IFF_TUN   - TUN device (no Ethernet headers)
+       *        IFF_TAP   - TAP device
+       *
+       *        IFF_NO_PI - Do not provide packet information
+       *        IFF_MULTI_QUEUE - Create a queue of multiqueue device
+       */
+      ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_MULTI_QUEUE;
+      strcpy(ifr.ifr_name, dev);
+
+      for (i = 0; i < queues; i++) {
+	  if ((fd = open("/dev/net/tun", O_RDWR)) < 0)
+	     goto err;
+	  err = ioctl(fd, TUNSETIFF, (void *)&ifr);
+	  if (err) {
+	     close(fd);
+	     goto err;
+	  }
+	  fds[i] = fd;
+      }
+
+      return 0;
+  err:
+      for (--i; i >= 0; i--)
+	  close(fds[i]);
+      return err;
+  }
+
+A new ioctl(TUNSETQUEUE) were introduced to enable or disable a queue. When
+calling it with IFF_DETACH_QUEUE flag, the queue were disabled. And when
+calling it with IFF_ATTACH_QUEUE flag, the queue were enabled. The queue were
+enabled by default after it was created through TUNSETIFF.
+
+fd is the file descriptor (queue) that we want to enable or disable, when
+enable is true we enable it, otherwise we disable it::
+
+  #include <linux/if.h>
+  #include <linux/if_tun.h>
+
+  int tun_set_queue(int fd, int enable)
+  {
+      struct ifreq ifr;
+
+      memset(&ifr, 0, sizeof(ifr));
+
+      if (enable)
+	 ifr.ifr_flags = IFF_ATTACH_QUEUE;
+      else
+	 ifr.ifr_flags = IFF_DETACH_QUEUE;
+
+      return ioctl(fd, TUNSETQUEUE, (void *)&ifr);
+  }
+
+Universal TUN/TAP device driver Frequently Asked Question
+=========================================================
+
+1. What platforms are supported by TUN/TAP driver ?
+
+Currently driver has been written for 3 Unices:
+
+  - Linux kernels 2.2.x, 2.4.x
+  - FreeBSD 3.x, 4.x, 5.x
+  - Solaris 2.6, 7.0, 8.0
+
+2. What is TUN/TAP driver used for?
+
+As mentioned above, main purpose of TUN/TAP driver is tunneling.
+It is used by VTun (http://vtun.sourceforge.net).
+
+Another interesting application using TUN/TAP is pipsecd
+(http://perso.enst.fr/~beyssac/pipsec/), a userspace IPSec
+implementation that can use complete kernel routing (unlike FreeS/WAN).
+
+3. How does Virtual network device actually work ?
+
+Virtual network device can be viewed as a simple Point-to-Point or
+Ethernet device, which instead of receiving packets from a physical
+media, receives them from user space program and instead of sending
+packets via physical media sends them to the user space program.
+
+Let's say that you configured IPv6 on the tap0, then whenever
+the kernel sends an IPv6 packet to tap0, it is passed to the application
+(VTun for example). The application encrypts, compresses and sends it to
+the other side over TCP or UDP. The application on the other side decompresses
+and decrypts the data received and writes the packet to the TAP device,
+the kernel handles the packet like it came from real physical device.
+
+4. What is the difference between TUN driver and TAP driver?
+
+TUN works with IP frames. TAP works with Ethernet frames.
+
+This means that you have to read/write IP packets when you are using tun and
+ethernet frames when using tap.
+
+5. What is the difference between BPF and TUN/TAP driver?
+
+BPF is an advanced packet filter. It can be attached to existing
+network interface. It does not provide a virtual network interface.
+A TUN/TAP driver does provide a virtual network interface and it is possible
+to attach BPF to this interface.
+
+6. Does TAP driver support kernel Ethernet bridging?
+
+Yes. Linux and FreeBSD drivers support Ethernet bridging.
diff --git a/Documentation/networking/tuntap.txt b/Documentation/networking/tuntap.txt
deleted file mode 100644
index 0104830d5075..000000000000
--- a/Documentation/networking/tuntap.txt
+++ /dev/null
@@ -1,227 +0,0 @@
-Universal TUN/TAP device driver.
-Copyright (C) 1999-2000 Maxim Krasnyansky <max_mk@yahoo.com>
-
-  Linux, Solaris drivers 
-  Copyright (C) 1999-2000 Maxim Krasnyansky <max_mk@yahoo.com>
-
-  FreeBSD TAP driver 
-  Copyright (c) 1999-2000 Maksim Yevmenkin <m_evmenkin@yahoo.com>
-
-  Revision of this document 2002 by Florian Thiel <florian.thiel@gmx.net>
-
-1. Description
-  TUN/TAP provides packet reception and transmission for user space programs. 
-  It can be seen as a simple Point-to-Point or Ethernet device, which,
-  instead of receiving packets from physical media, receives them from 
-  user space program and instead of sending packets via physical media 
-  writes them to the user space program. 
-
-  In order to use the driver a program has to open /dev/net/tun and issue a
-  corresponding ioctl() to register a network device with the kernel. A network
-  device will appear as tunXX or tapXX, depending on the options chosen. When
-  the program closes the file descriptor, the network device and all
-  corresponding routes will disappear.
-
-  Depending on the type of device chosen the userspace program has to read/write
-  IP packets (with tun) or ethernet frames (with tap). Which one is being used
-  depends on the flags given with the ioctl().
-
-  The package from http://vtun.sourceforge.net/tun contains two simple examples
-  for how to use tun and tap devices. Both programs work like a bridge between
-  two network interfaces.
-  br_select.c - bridge based on select system call.
-  br_sigio.c  - bridge based on async io and SIGIO signal.
-  However, the best example is VTun http://vtun.sourceforge.net :))
-
-2. Configuration 
-  Create device node:
-     mkdir /dev/net (if it doesn't exist already)
-     mknod /dev/net/tun c 10 200
-  
-  Set permissions:
-     e.g. chmod 0666 /dev/net/tun
-     There's no harm in allowing the device to be accessible by non-root users,
-     since CAP_NET_ADMIN is required for creating network devices or for 
-     connecting to network devices which aren't owned by the user in question.
-     If you want to create persistent devices and give ownership of them to 
-     unprivileged users, then you need the /dev/net/tun device to be usable by
-     those users.
-
-  Driver module autoloading
-
-     Make sure that "Kernel module loader" - module auto-loading
-     support is enabled in your kernel.  The kernel should load it on
-     first access.
-  
-  Manual loading 
-     insert the module by hand:
-        modprobe tun
-
-  If you do it the latter way, you have to load the module every time you
-  need it, if you do it the other way it will be automatically loaded when
-  /dev/net/tun is being opened.
-
-3. Program interface 
-  3.1 Network device allocation:
-
-  char *dev should be the name of the device with a format string (e.g.
-  "tun%d"), but (as far as I can see) this can be any valid network device name.
-  Note that the character pointer becomes overwritten with the real device name
-  (e.g. "tun0")
-
-  #include <linux/if.h>
-  #include <linux/if_tun.h>
-
-  int tun_alloc(char *dev)
-  {
-      struct ifreq ifr;
-      int fd, err;
-
-      if( (fd = open("/dev/net/tun", O_RDWR)) < 0 )
-         return tun_alloc_old(dev);
-
-      memset(&ifr, 0, sizeof(ifr));
-
-      /* Flags: IFF_TUN   - TUN device (no Ethernet headers) 
-       *        IFF_TAP   - TAP device  
-       *
-       *        IFF_NO_PI - Do not provide packet information  
-       */ 
-      ifr.ifr_flags = IFF_TUN; 
-      if( *dev )
-         strncpy(ifr.ifr_name, dev, IFNAMSIZ);
-
-      if( (err = ioctl(fd, TUNSETIFF, (void *) &ifr)) < 0 ){
-         close(fd);
-         return err;
-      }
-      strcpy(dev, ifr.ifr_name);
-      return fd;
-  }              
- 
-  3.2 Frame format:
-  If flag IFF_NO_PI is not set each frame format is: 
-     Flags [2 bytes]
-     Proto [2 bytes]
-     Raw protocol(IP, IPv6, etc) frame.
-
-  3.3 Multiqueue tuntap interface:
-
-  From version 3.8, Linux supports multiqueue tuntap which can uses multiple
-  file descriptors (queues) to parallelize packets sending or receiving. The
-  device allocation is the same as before, and if user wants to create multiple
-  queues, TUNSETIFF with the same device name must be called many times with
-  IFF_MULTI_QUEUE flag.
-
-  char *dev should be the name of the device, queues is the number of queues to
-  be created, fds is used to store and return the file descriptors (queues)
-  created to the caller. Each file descriptor were served as the interface of a
-  queue which could be accessed by userspace.
-
-  #include <linux/if.h>
-  #include <linux/if_tun.h>
-
-  int tun_alloc_mq(char *dev, int queues, int *fds)
-  {
-      struct ifreq ifr;
-      int fd, err, i;
-
-      if (!dev)
-          return -1;
-
-      memset(&ifr, 0, sizeof(ifr));
-      /* Flags: IFF_TUN   - TUN device (no Ethernet headers)
-       *        IFF_TAP   - TAP device
-       *
-       *        IFF_NO_PI - Do not provide packet information
-       *        IFF_MULTI_QUEUE - Create a queue of multiqueue device
-       */
-      ifr.ifr_flags = IFF_TAP | IFF_NO_PI | IFF_MULTI_QUEUE;
-      strcpy(ifr.ifr_name, dev);
-
-      for (i = 0; i < queues; i++) {
-          if ((fd = open("/dev/net/tun", O_RDWR)) < 0)
-             goto err;
-          err = ioctl(fd, TUNSETIFF, (void *)&ifr);
-          if (err) {
-             close(fd);
-             goto err;
-          }
-          fds[i] = fd;
-      }
-
-      return 0;
-  err:
-      for (--i; i >= 0; i--)
-          close(fds[i]);
-      return err;
-  }
-
-  A new ioctl(TUNSETQUEUE) were introduced to enable or disable a queue. When
-  calling it with IFF_DETACH_QUEUE flag, the queue were disabled. And when
-  calling it with IFF_ATTACH_QUEUE flag, the queue were enabled. The queue were
-  enabled by default after it was created through TUNSETIFF.
-
-  fd is the file descriptor (queue) that we want to enable or disable, when
-  enable is true we enable it, otherwise we disable it
-
-  #include <linux/if.h>
-  #include <linux/if_tun.h>
-
-  int tun_set_queue(int fd, int enable)
-  {
-      struct ifreq ifr;
-
-      memset(&ifr, 0, sizeof(ifr));
-
-      if (enable)
-         ifr.ifr_flags = IFF_ATTACH_QUEUE;
-      else
-         ifr.ifr_flags = IFF_DETACH_QUEUE;
-
-      return ioctl(fd, TUNSETQUEUE, (void *)&ifr);
-  }
-
-Universal TUN/TAP device driver Frequently Asked Question.
-   
-1. What platforms are supported by TUN/TAP driver ?
-Currently driver has been written for 3 Unices:
-   Linux kernels 2.2.x, 2.4.x 
-   FreeBSD 3.x, 4.x, 5.x
-   Solaris 2.6, 7.0, 8.0
-
-2. What is TUN/TAP driver used for?
-As mentioned above, main purpose of TUN/TAP driver is tunneling. 
-It is used by VTun (http://vtun.sourceforge.net).
-
-Another interesting application using TUN/TAP is pipsecd
-(http://perso.enst.fr/~beyssac/pipsec/), a userspace IPSec
-implementation that can use complete kernel routing (unlike FreeS/WAN).
-
-3. How does Virtual network device actually work ? 
-Virtual network device can be viewed as a simple Point-to-Point or
-Ethernet device, which instead of receiving packets from a physical 
-media, receives them from user space program and instead of sending 
-packets via physical media sends them to the user space program. 
-
-Let's say that you configured IPv6 on the tap0, then whenever
-the kernel sends an IPv6 packet to tap0, it is passed to the application
-(VTun for example). The application encrypts, compresses and sends it to 
-the other side over TCP or UDP. The application on the other side decompresses
-and decrypts the data received and writes the packet to the TAP device, 
-the kernel handles the packet like it came from real physical device.
-
-4. What is the difference between TUN driver and TAP driver?
-TUN works with IP frames. TAP works with Ethernet frames.
-
-This means that you have to read/write IP packets when you are using tun and
-ethernet frames when using tap.
-
-5. What is the difference between BPF and TUN/TAP driver?
-BPF is an advanced packet filter. It can be attached to existing
-network interface. It does not provide a virtual network interface.
-A TUN/TAP driver does provide a virtual network interface and it is possible
-to attach BPF to this interface.
-
-6. Does TAP driver support kernel Ethernet bridging?
-Yes. Linux and FreeBSD drivers support Ethernet bridging. 
diff --git a/MAINTAINERS b/MAINTAINERS
index 0ac9cec0bce6..6456c5bb02f1 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -17161,7 +17161,7 @@ TUN/TAP driver
 M:	Maxim Krasnyansky <maxk@qti.qualcomm.com>
 S:	Maintained
 W:	http://vtun.sourceforge.net/tun
-F:	Documentation/networking/tuntap.txt
+F:	Documentation/networking/tuntap.rst
 F:	arch/um/os-Linux/drivers/
 
 TURBOCHANNEL SUBSYSTEM
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index ad64be98330f..3f2c98a7906c 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -355,7 +355,7 @@ config TUN
 	  devices, driver will automatically delete tunXX or tapXX device and
 	  all routes corresponding to it.
 
-	  Please read <file:Documentation/networking/tuntap.txt> for more
+	  Please read <file:Documentation/networking/tuntap.rst> for more
 	  information.
 
 	  To compile this driver as a module, choose M here: the module
-- 
cgit v1.2.3-59-g8ed1b


From 961fb1ff412a2cefaf50f4f56bb60a10ed071df5 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 1 May 2020 16:44:24 +0200
Subject: docs: networking: convert udplite.txt to ReST

- add SPDX header;
- adjust titles and chapters, adding proper markups;
- mark lists as such;
- mark tables as such;
- mark code blocks and literals as such;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst   |   1 +
 Documentation/networking/udplite.rst | 291 +++++++++++++++++++++++++++++++++++
 Documentation/networking/udplite.txt | 278 ---------------------------------
 3 files changed, 292 insertions(+), 278 deletions(-)
 create mode 100644 Documentation/networking/udplite.rst
 delete mode 100644 Documentation/networking/udplite.txt

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index e7a683f0528d..ca0b0dbfd9ad 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -112,6 +112,7 @@ Contents:
    timestamping
    tproxy
    tuntap
+   udplite
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/udplite.rst b/Documentation/networking/udplite.rst
new file mode 100644
index 000000000000..2c225f28b7b2
--- /dev/null
+++ b/Documentation/networking/udplite.rst
@@ -0,0 +1,291 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+================================
+The UDP-Lite protocol (RFC 3828)
+================================
+
+
+  UDP-Lite is a Standards-Track IETF transport protocol whose characteristic
+  is a variable-length checksum. This has advantages for transport of multimedia
+  (video, VoIP) over wireless networks, as partly damaged packets can still be
+  fed into the codec instead of being discarded due to a failed checksum test.
+
+  This file briefly describes the existing kernel support and the socket API.
+  For in-depth information, you can consult:
+
+   - The UDP-Lite Homepage:
+     http://web.archive.org/web/%2E/http://www.erg.abdn.ac.uk/users/gerrit/udp-lite/
+
+     From here you can also download some example application source code.
+
+   - The UDP-Lite HOWTO on
+     http://web.archive.org/web/%2E/http://www.erg.abdn.ac.uk/users/gerrit/udp-lite/files/UDP-Lite-HOWTO.txt
+
+   - The Wireshark UDP-Lite WiKi (with capture files):
+     https://wiki.wireshark.org/Lightweight_User_Datagram_Protocol
+
+   - The Protocol Spec, RFC 3828, http://www.ietf.org/rfc/rfc3828.txt
+
+
+1. Applications
+===============
+
+  Several applications have been ported successfully to UDP-Lite. Ethereal
+  (now called wireshark) has UDP-Litev4/v6 support by default.
+
+  Porting applications to UDP-Lite is straightforward: only socket level and
+  IPPROTO need to be changed; senders additionally set the checksum coverage
+  length (default = header length = 8). Details are in the next section.
+
+2. Programming API
+==================
+
+  UDP-Lite provides a connectionless, unreliable datagram service and hence
+  uses the same socket type as UDP. In fact, porting from UDP to UDP-Lite is
+  very easy: simply add ``IPPROTO_UDPLITE`` as the last argument of the
+  socket(2) call so that the statement looks like::
+
+      s = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDPLITE);
+
+  or, respectively,
+
+  ::
+
+      s = socket(PF_INET6, SOCK_DGRAM, IPPROTO_UDPLITE);
+
+  With just the above change you are able to run UDP-Lite services or connect
+  to UDP-Lite servers. The kernel will assume that you are not interested in
+  using partial checksum coverage and so emulate UDP mode (full coverage).
+
+  To make use of the partial checksum coverage facilities requires setting a
+  single socket option, which takes an integer specifying the coverage length:
+
+    * Sender checksum coverage: UDPLITE_SEND_CSCOV
+
+      For example::
+
+	int val = 20;
+	setsockopt(s, SOL_UDPLITE, UDPLITE_SEND_CSCOV, &val, sizeof(int));
+
+      sets the checksum coverage length to 20 bytes (12b data + 8b header).
+      Of each packet only the first 20 bytes (plus the pseudo-header) will be
+      checksummed. This is useful for RTP applications which have a 12-byte
+      base header.
+
+
+    * Receiver checksum coverage: UDPLITE_RECV_CSCOV
+
+      This option is the receiver-side analogue. It is truly optional, i.e. not
+      required to enable traffic with partial checksum coverage. Its function is
+      that of a traffic filter: when enabled, it instructs the kernel to drop
+      all packets which have a coverage _less_ than this value. For example, if
+      RTP and UDP headers are to be protected, a receiver can enforce that only
+      packets with a minimum coverage of 20 are admitted::
+
+	int min = 20;
+	setsockopt(s, SOL_UDPLITE, UDPLITE_RECV_CSCOV, &min, sizeof(int));
+
+  The calls to getsockopt(2) are analogous. Being an extension and not a stand-
+  alone protocol, all socket options known from UDP can be used in exactly the
+  same manner as before, e.g. UDP_CORK or UDP_ENCAP.
+
+  A detailed discussion of UDP-Lite checksum coverage options is in section IV.
+
+3. Header Files
+===============
+
+  The socket API requires support through header files in /usr/include:
+
+    * /usr/include/netinet/in.h
+      to define IPPROTO_UDPLITE
+
+    * /usr/include/netinet/udplite.h
+      for UDP-Lite header fields and protocol constants
+
+  For testing purposes, the following can serve as a ``mini`` header file::
+
+    #define IPPROTO_UDPLITE       136
+    #define SOL_UDPLITE           136
+    #define UDPLITE_SEND_CSCOV     10
+    #define UDPLITE_RECV_CSCOV     11
+
+  Ready-made header files for various distros are in the UDP-Lite tarball.
+
+4. Kernel Behaviour with Regards to the Various Socket Options
+==============================================================
+
+
+  To enable debugging messages, the log level need to be set to 8, as most
+  messages use the KERN_DEBUG level (7).
+
+  1) Sender Socket Options
+
+  If the sender specifies a value of 0 as coverage length, the module
+  assumes full coverage, transmits a packet with coverage length of 0
+  and according checksum.  If the sender specifies a coverage < 8 and
+  different from 0, the kernel assumes 8 as default value.  Finally,
+  if the specified coverage length exceeds the packet length, the packet
+  length is used instead as coverage length.
+
+  2) Receiver Socket Options
+
+  The receiver specifies the minimum value of the coverage length it
+  is willing to accept.  A value of 0 here indicates that the receiver
+  always wants the whole of the packet covered. In this case, all
+  partially covered packets are dropped and an error is logged.
+
+  It is not possible to specify illegal values (<0 and <8); in these
+  cases the default of 8 is assumed.
+
+  All packets arriving with a coverage value less than the specified
+  threshold are discarded, these events are also logged.
+
+  3) Disabling the Checksum Computation
+
+  On both sender and receiver, checksumming will always be performed
+  and cannot be disabled using SO_NO_CHECK. Thus::
+
+	setsockopt(sockfd, SOL_SOCKET, SO_NO_CHECK,  ... );
+
+  will always will be ignored, while the value of::
+
+	getsockopt(sockfd, SOL_SOCKET, SO_NO_CHECK, &value, ...);
+
+  is meaningless (as in TCP). Packets with a zero checksum field are
+  illegal (cf. RFC 3828, sec. 3.1) and will be silently discarded.
+
+  4) Fragmentation
+
+  The checksum computation respects both buffersize and MTU. The size
+  of UDP-Lite packets is determined by the size of the send buffer. The
+  minimum size of the send buffer is 2048 (defined as SOCK_MIN_SNDBUF
+  in include/net/sock.h), the default value is configurable as
+  net.core.wmem_default or via setting the SO_SNDBUF socket(7)
+  option. The maximum upper bound for the send buffer is determined
+  by net.core.wmem_max.
+
+  Given a payload size larger than the send buffer size, UDP-Lite will
+  split the payload into several individual packets, filling up the
+  send buffer size in each case.
+
+  The precise value also depends on the interface MTU. The interface MTU,
+  in turn, may trigger IP fragmentation. In this case, the generated
+  UDP-Lite packet is split into several IP packets, of which only the
+  first one contains the L4 header.
+
+  The send buffer size has implications on the checksum coverage length.
+  Consider the following example::
+
+    Payload: 1536 bytes          Send Buffer:     1024 bytes
+    MTU:     1500 bytes          Coverage Length:  856 bytes
+
+  UDP-Lite will ship the 1536 bytes in two separate packets::
+
+    Packet 1: 1024 payload + 8 byte header + 20 byte IP header = 1052 bytes
+    Packet 2:  512 payload + 8 byte header + 20 byte IP header =  540 bytes
+
+  The coverage packet covers the UDP-Lite header and 848 bytes of the
+  payload in the first packet, the second packet is fully covered. Note
+  that for the second packet, the coverage length exceeds the packet
+  length. The kernel always re-adjusts the coverage length to the packet
+  length in such cases.
+
+  As an example of what happens when one UDP-Lite packet is split into
+  several tiny fragments, consider the following example::
+
+    Payload: 1024 bytes            Send buffer size: 1024 bytes
+    MTU:      300 bytes            Coverage length:   575 bytes
+
+    +-+-----------+--------------+--------------+--------------+
+    |8|    272    |      280     |     280      |     280      |
+    +-+-----------+--------------+--------------+--------------+
+		280            560            840           1032
+					^
+    *****checksum coverage*************
+
+  The UDP-Lite module generates one 1032 byte packet (1024 + 8 byte
+  header). According to the interface MTU, these are split into 4 IP
+  packets (280 byte IP payload + 20 byte IP header). The kernel module
+  sums the contents of the entire first two packets, plus 15 bytes of
+  the last packet before releasing the fragments to the IP module.
+
+  To see the analogous case for IPv6 fragmentation, consider a link
+  MTU of 1280 bytes and a write buffer of 3356 bytes. If the checksum
+  coverage is less than 1232 bytes (MTU minus IPv6/fragment header
+  lengths), only the first fragment needs to be considered. When using
+  larger checksum coverage lengths, each eligible fragment needs to be
+  checksummed. Suppose we have a checksum coverage of 3062. The buffer
+  of 3356 bytes will be split into the following fragments::
+
+    Fragment 1: 1280 bytes carrying  1232 bytes of UDP-Lite data
+    Fragment 2: 1280 bytes carrying  1232 bytes of UDP-Lite data
+    Fragment 3:  948 bytes carrying   900 bytes of UDP-Lite data
+
+  The first two fragments have to be checksummed in full, of the last
+  fragment only 598 (= 3062 - 2*1232) bytes are checksummed.
+
+  While it is important that such cases are dealt with correctly, they
+  are (annoyingly) rare: UDP-Lite is designed for optimising multimedia
+  performance over wireless (or generally noisy) links and thus smaller
+  coverage lengths are likely to be expected.
+
+5. UDP-Lite Runtime Statistics and their Meaning
+================================================
+
+  Exceptional and error conditions are logged to syslog at the KERN_DEBUG
+  level.  Live statistics about UDP-Lite are available in /proc/net/snmp
+  and can (with newer versions of netstat) be viewed using::
+
+			    netstat -svu
+
+  This displays UDP-Lite statistics variables, whose meaning is as follows.
+
+   ============     =====================================================
+   InDatagrams      The total number of datagrams delivered to users.
+
+   NoPorts          Number of packets received to an unknown port.
+		    These cases are counted separately (not as InErrors).
+
+   InErrors         Number of erroneous UDP-Lite packets. Errors include:
+
+		      * internal socket queue receive errors
+		      * packet too short (less than 8 bytes or stated
+			coverage length exceeds received length)
+		      * xfrm4_policy_check() returned with error
+		      * application has specified larger min. coverage
+			length than that of incoming packet
+		      * checksum coverage violated
+		      * bad checksum
+
+   OutDatagrams     Total number of sent datagrams.
+   ============     =====================================================
+
+   These statistics derive from the UDP MIB (RFC 2013).
+
+6. IPtables
+===========
+
+  There is packet match support for UDP-Lite as well as support for the LOG target.
+  If you copy and paste the following line into /etc/protocols::
+
+    udplite 136     UDP-Lite        # UDP-Lite [RFC 3828]
+
+  then::
+
+	      iptables -A INPUT -p udplite -j LOG
+
+  will produce logging output to syslog. Dropping and rejecting packets also works.
+
+7. Maintainer Address
+=====================
+
+  The UDP-Lite patch was developed at
+
+		    University of Aberdeen
+		    Electronics Research Group
+		    Department of Engineering
+		    Fraser Noble Building
+		    Aberdeen AB24 3UE; UK
+
+  The current maintainer is Gerrit Renker, <gerrit@erg.abdn.ac.uk>. Initial
+  code was developed by William  Stanislaus, <william@erg.abdn.ac.uk>.
diff --git a/Documentation/networking/udplite.txt b/Documentation/networking/udplite.txt
deleted file mode 100644
index 53a726855e49..000000000000
--- a/Documentation/networking/udplite.txt
+++ /dev/null
@@ -1,278 +0,0 @@
-  ===========================================================================
-                      The UDP-Lite protocol (RFC 3828)
-  ===========================================================================
-
-
-  UDP-Lite is a Standards-Track IETF transport protocol whose characteristic
-  is a variable-length checksum. This has advantages for transport of multimedia
-  (video, VoIP) over wireless networks, as partly damaged packets can still be
-  fed into the codec instead of being discarded due to a failed checksum test.
-
-  This file briefly describes the existing kernel support and the socket API.
-  For in-depth information, you can consult:
-
-   o The UDP-Lite Homepage:
-	http://web.archive.org/web/*/http://www.erg.abdn.ac.uk/users/gerrit/udp-lite/ 
-       From here you can also download some example application source code.
-
-   o The UDP-Lite HOWTO on
-       http://web.archive.org/web/*/http://www.erg.abdn.ac.uk/users/gerrit/udp-lite/
-	files/UDP-Lite-HOWTO.txt
-
-   o The Wireshark UDP-Lite WiKi (with capture files):
-       https://wiki.wireshark.org/Lightweight_User_Datagram_Protocol
-
-   o The Protocol Spec, RFC 3828, http://www.ietf.org/rfc/rfc3828.txt
-
-
-  I) APPLICATIONS
-
-  Several applications have been ported successfully to UDP-Lite. Ethereal
-  (now called wireshark) has UDP-Litev4/v6 support by default. 
-  Porting applications to UDP-Lite is straightforward: only socket level and
-  IPPROTO need to be changed; senders additionally set the checksum coverage
-  length (default = header length = 8). Details are in the next section.
-
-
-  II) PROGRAMMING API
-
-  UDP-Lite provides a connectionless, unreliable datagram service and hence
-  uses the same socket type as UDP. In fact, porting from UDP to UDP-Lite is
-  very easy: simply add `IPPROTO_UDPLITE' as the last argument of the socket(2)
-  call so that the statement looks like:
-
-      s = socket(PF_INET, SOCK_DGRAM, IPPROTO_UDPLITE);
-
-                      or, respectively,
-
-      s = socket(PF_INET6, SOCK_DGRAM, IPPROTO_UDPLITE);
-
-  With just the above change you are able to run UDP-Lite services or connect
-  to UDP-Lite servers. The kernel will assume that you are not interested in
-  using partial checksum coverage and so emulate UDP mode (full coverage).
-
-  To make use of the partial checksum coverage facilities requires setting a
-  single socket option, which takes an integer specifying the coverage length:
-
-    * Sender checksum coverage: UDPLITE_SEND_CSCOV
-
-      For example,
-
-        int val = 20;
-        setsockopt(s, SOL_UDPLITE, UDPLITE_SEND_CSCOV, &val, sizeof(int));
-
-      sets the checksum coverage length to 20 bytes (12b data + 8b header).
-      Of each packet only the first 20 bytes (plus the pseudo-header) will be
-      checksummed. This is useful for RTP applications which have a 12-byte
-      base header.
-
-
-    * Receiver checksum coverage: UDPLITE_RECV_CSCOV
-
-      This option is the receiver-side analogue. It is truly optional, i.e. not
-      required to enable traffic with partial checksum coverage. Its function is
-      that of a traffic filter: when enabled, it instructs the kernel to drop
-      all packets which have a coverage _less_ than this value. For example, if
-      RTP and UDP headers are to be protected, a receiver can enforce that only
-      packets with a minimum coverage of 20 are admitted:
-
-        int min = 20;
-        setsockopt(s, SOL_UDPLITE, UDPLITE_RECV_CSCOV, &min, sizeof(int));
-
-  The calls to getsockopt(2) are analogous. Being an extension and not a stand-
-  alone protocol, all socket options known from UDP can be used in exactly the
-  same manner as before, e.g. UDP_CORK or UDP_ENCAP.
-
-  A detailed discussion of UDP-Lite checksum coverage options is in section IV.
-
-
-  III) HEADER FILES
-
-  The socket API requires support through header files in /usr/include:
-
-    * /usr/include/netinet/in.h
-        to define IPPROTO_UDPLITE
-
-    * /usr/include/netinet/udplite.h
-        for UDP-Lite header fields and protocol constants
-
-  For testing purposes, the following can serve as a `mini' header file:
-
-    #define IPPROTO_UDPLITE       136
-    #define SOL_UDPLITE           136
-    #define UDPLITE_SEND_CSCOV     10
-    #define UDPLITE_RECV_CSCOV     11
-
-  Ready-made header files for various distros are in the UDP-Lite tarball.
-
-
-  IV) KERNEL BEHAVIOUR WITH REGARD TO THE VARIOUS SOCKET OPTIONS
-
-  To enable debugging messages, the log level need to be set to 8, as most
-  messages use the KERN_DEBUG level (7).
-
-  1) Sender Socket Options
-
-  If the sender specifies a value of 0 as coverage length, the module
-  assumes full coverage, transmits a packet with coverage length of 0
-  and according checksum.  If the sender specifies a coverage < 8 and
-  different from 0, the kernel assumes 8 as default value.  Finally,
-  if the specified coverage length exceeds the packet length, the packet
-  length is used instead as coverage length.
-
-  2) Receiver Socket Options
-
-  The receiver specifies the minimum value of the coverage length it
-  is willing to accept.  A value of 0 here indicates that the receiver
-  always wants the whole of the packet covered. In this case, all
-  partially covered packets are dropped and an error is logged.
-
-  It is not possible to specify illegal values (<0 and <8); in these
-  cases the default of 8 is assumed.
-
-  All packets arriving with a coverage value less than the specified
-  threshold are discarded, these events are also logged.
-
-  3) Disabling the Checksum Computation
-
-  On both sender and receiver, checksumming will always be performed
-  and cannot be disabled using SO_NO_CHECK. Thus
-
-        setsockopt(sockfd, SOL_SOCKET, SO_NO_CHECK,  ... );
-
-  will always will be ignored, while the value of
-
-        getsockopt(sockfd, SOL_SOCKET, SO_NO_CHECK, &value, ...);
-
-  is meaningless (as in TCP). Packets with a zero checksum field are
-  illegal (cf. RFC 3828, sec. 3.1) and will be silently discarded.
-
-  4) Fragmentation
-
-  The checksum computation respects both buffersize and MTU. The size
-  of UDP-Lite packets is determined by the size of the send buffer. The
-  minimum size of the send buffer is 2048 (defined as SOCK_MIN_SNDBUF
-  in include/net/sock.h), the default value is configurable as
-  net.core.wmem_default or via setting the SO_SNDBUF socket(7)
-  option. The maximum upper bound for the send buffer is determined
-  by net.core.wmem_max.
-
-  Given a payload size larger than the send buffer size, UDP-Lite will
-  split the payload into several individual packets, filling up the
-  send buffer size in each case.
-
-  The precise value also depends on the interface MTU. The interface MTU,
-  in turn, may trigger IP fragmentation. In this case, the generated
-  UDP-Lite packet is split into several IP packets, of which only the
-  first one contains the L4 header.
-
-  The send buffer size has implications on the checksum coverage length.
-  Consider the following example:
-
-  Payload: 1536 bytes          Send Buffer:     1024 bytes
-  MTU:     1500 bytes          Coverage Length:  856 bytes
-
-  UDP-Lite will ship the 1536 bytes in two separate packets:
-
-  Packet 1: 1024 payload + 8 byte header + 20 byte IP header = 1052 bytes
-  Packet 2:  512 payload + 8 byte header + 20 byte IP header =  540 bytes
-
-  The coverage packet covers the UDP-Lite header and 848 bytes of the
-  payload in the first packet, the second packet is fully covered. Note
-  that for the second packet, the coverage length exceeds the packet
-  length. The kernel always re-adjusts the coverage length to the packet
-  length in such cases.
-
-  As an example of what happens when one UDP-Lite packet is split into
-  several tiny fragments, consider the following example.
-
-  Payload: 1024 bytes            Send buffer size: 1024 bytes
-  MTU:      300 bytes            Coverage length:   575 bytes
-
-  +-+-----------+--------------+--------------+--------------+
-  |8|    272    |      280     |     280      |     280      |
-  +-+-----------+--------------+--------------+--------------+
-               280            560            840           1032
-                                    ^
-  *****checksum coverage*************
-
-  The UDP-Lite module generates one 1032 byte packet (1024 + 8 byte
-  header). According to the interface MTU, these are split into 4 IP
-  packets (280 byte IP payload + 20 byte IP header). The kernel module
-  sums the contents of the entire first two packets, plus 15 bytes of
-  the last packet before releasing the fragments to the IP module.
-
-  To see the analogous case for IPv6 fragmentation, consider a link
-  MTU of 1280 bytes and a write buffer of 3356 bytes. If the checksum
-  coverage is less than 1232 bytes (MTU minus IPv6/fragment header
-  lengths), only the first fragment needs to be considered. When using
-  larger checksum coverage lengths, each eligible fragment needs to be
-  checksummed. Suppose we have a checksum coverage of 3062. The buffer
-  of 3356 bytes will be split into the following fragments:
-
-    Fragment 1: 1280 bytes carrying  1232 bytes of UDP-Lite data
-    Fragment 2: 1280 bytes carrying  1232 bytes of UDP-Lite data
-    Fragment 3:  948 bytes carrying   900 bytes of UDP-Lite data
-
-  The first two fragments have to be checksummed in full, of the last
-  fragment only 598 (= 3062 - 2*1232) bytes are checksummed.
-
-  While it is important that such cases are dealt with correctly, they
-  are (annoyingly) rare: UDP-Lite is designed for optimising multimedia
-  performance over wireless (or generally noisy) links and thus smaller
-  coverage lengths are likely to be expected.
-
-
-  V) UDP-LITE RUNTIME STATISTICS AND THEIR MEANING
-
-  Exceptional and error conditions are logged to syslog at the KERN_DEBUG
-  level.  Live statistics about UDP-Lite are available in /proc/net/snmp
-  and can (with newer versions of netstat) be viewed using
-
-                            netstat -svu
-
-  This displays UDP-Lite statistics variables, whose meaning is as follows.
-
-   InDatagrams:     The total number of datagrams delivered to users.
-
-   NoPorts:         Number of packets received to an unknown port.
-                    These cases are counted separately (not as InErrors).
-
-   InErrors:        Number of erroneous UDP-Lite packets. Errors include:
-                      * internal socket queue receive errors
-                      * packet too short (less than 8 bytes or stated
-                        coverage length exceeds received length)
-                      * xfrm4_policy_check() returned with error
-                      * application has specified larger min. coverage
-                        length than that of incoming packet
-                      * checksum coverage violated
-                      * bad checksum
-
-   OutDatagrams:    Total number of sent datagrams.
-
-   These statistics derive from the UDP MIB (RFC 2013).
-
-
-  VI) IPTABLES
-
-  There is packet match support for UDP-Lite as well as support for the LOG target.
-  If you copy and paste the following line into /etc/protocols,
-
-  udplite 136     UDP-Lite        # UDP-Lite [RFC 3828]
-
-  then
-              iptables -A INPUT -p udplite -j LOG
-
-  will produce logging output to syslog. Dropping and rejecting packets also works.
-
-
-  VII) MAINTAINER ADDRESS
-
-  The UDP-Lite patch was developed at
-                    University of Aberdeen
-                    Electronics Research Group
-                    Department of Engineering
-                    Fraser Noble Building
-                    Aberdeen AB24 3UE; UK
-  The current maintainer is Gerrit Renker, <gerrit@erg.abdn.ac.uk>. Initial
-  code was developed by William  Stanislaus, <william@erg.abdn.ac.uk>.
-- 
cgit v1.2.3-59-g8ed1b


From 58ccb2b2e87d52ec0b4cbd40b94e0b63e90af873 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 1 May 2020 16:44:25 +0200
Subject: docs: networking: convert vrf.txt to ReST

- add SPDX header;
- adjust title markup;
- Add a subtitle for the first section;
- mark code blocks and literals as such;
- adjust identation, whitespaces and blank lines;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Acked-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst |   1 +
 Documentation/networking/vrf.rst   | 451 +++++++++++++++++++++++++++++++++++++
 Documentation/networking/vrf.txt   | 418 ----------------------------------
 MAINTAINERS                        |   2 +-
 4 files changed, 453 insertions(+), 419 deletions(-)
 create mode 100644 Documentation/networking/vrf.rst
 delete mode 100644 Documentation/networking/vrf.txt

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index ca0b0dbfd9ad..2227b9f4509d 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -113,6 +113,7 @@ Contents:
    tproxy
    tuntap
    udplite
+   vrf
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/vrf.rst b/Documentation/networking/vrf.rst
new file mode 100644
index 000000000000..0dde145043bc
--- /dev/null
+++ b/Documentation/networking/vrf.rst
@@ -0,0 +1,451 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====================================
+Virtual Routing and Forwarding (VRF)
+====================================
+
+The VRF Device
+==============
+
+The VRF device combined with ip rules provides the ability to create virtual
+routing and forwarding domains (aka VRFs, VRF-lite to be specific) in the
+Linux network stack. One use case is the multi-tenancy problem where each
+tenant has their own unique routing tables and in the very least need
+different default gateways.
+
+Processes can be "VRF aware" by binding a socket to the VRF device. Packets
+through the socket then use the routing table associated with the VRF
+device. An important feature of the VRF device implementation is that it
+impacts only Layer 3 and above so L2 tools (e.g., LLDP) are not affected
+(ie., they do not need to be run in each VRF). The design also allows
+the use of higher priority ip rules (Policy Based Routing, PBR) to take
+precedence over the VRF device rules directing specific traffic as desired.
+
+In addition, VRF devices allow VRFs to be nested within namespaces. For
+example network namespaces provide separation of network interfaces at the
+device layer, VLANs on the interfaces within a namespace provide L2 separation
+and then VRF devices provide L3 separation.
+
+Design
+------
+A VRF device is created with an associated route table. Network interfaces
+are then enslaved to a VRF device::
+
+	 +-----------------------------+
+	 |           vrf-blue          |  ===> route table 10
+	 +-----------------------------+
+	    |        |            |
+	 +------+ +------+     +-------------+
+	 | eth1 | | eth2 | ... |    bond1    |
+	 +------+ +------+     +-------------+
+				  |       |
+			      +------+ +------+
+			      | eth8 | | eth9 |
+			      +------+ +------+
+
+Packets received on an enslaved device and are switched to the VRF device
+in the IPv4 and IPv6 processing stacks giving the impression that packets
+flow through the VRF device. Similarly on egress routing rules are used to
+send packets to the VRF device driver before getting sent out the actual
+interface. This allows tcpdump on a VRF device to capture all packets into
+and out of the VRF as a whole\ [1]_. Similarly, netfilter\ [2]_ and tc rules
+can be applied using the VRF device to specify rules that apply to the VRF
+domain as a whole.
+
+.. [1] Packets in the forwarded state do not flow through the device, so those
+       packets are not seen by tcpdump. Will revisit this limitation in a
+       future release.
+
+.. [2] Iptables on ingress supports PREROUTING with skb->dev set to the real
+       ingress device and both INPUT and PREROUTING rules with skb->dev set to
+       the VRF device. For egress POSTROUTING and OUTPUT rules can be written
+       using either the VRF device or real egress device.
+
+Setup
+-----
+1. VRF device is created with an association to a FIB table.
+   e.g,::
+
+	ip link add vrf-blue type vrf table 10
+	ip link set dev vrf-blue up
+
+2. An l3mdev FIB rule directs lookups to the table associated with the device.
+   A single l3mdev rule is sufficient for all VRFs. The VRF device adds the
+   l3mdev rule for IPv4 and IPv6 when the first device is created with a
+   default preference of 1000. Users may delete the rule if desired and add
+   with a different priority or install per-VRF rules.
+
+   Prior to the v4.8 kernel iif and oif rules are needed for each VRF device::
+
+       ip ru add oif vrf-blue table 10
+       ip ru add iif vrf-blue table 10
+
+3. Set the default route for the table (and hence default route for the VRF)::
+
+       ip route add table 10 unreachable default metric 4278198272
+
+   This high metric value ensures that the default unreachable route can
+   be overridden by a routing protocol suite.  FRRouting interprets
+   kernel metrics as a combined admin distance (upper byte) and priority
+   (lower 3 bytes).  Thus the above metric translates to [255/8192].
+
+4. Enslave L3 interfaces to a VRF device::
+
+       ip link set dev eth1 master vrf-blue
+
+   Local and connected routes for enslaved devices are automatically moved to
+   the table associated with VRF device. Any additional routes depending on
+   the enslaved device are dropped and will need to be reinserted to the VRF
+   FIB table following the enslavement.
+
+   The IPv6 sysctl option keep_addr_on_down can be enabled to keep IPv6 global
+   addresses as VRF enslavement changes::
+
+       sysctl -w net.ipv6.conf.all.keep_addr_on_down=1
+
+5. Additional VRF routes are added to associated table::
+
+       ip route add table 10 ...
+
+
+Applications
+------------
+Applications that are to work within a VRF need to bind their socket to the
+VRF device::
+
+    setsockopt(sd, SOL_SOCKET, SO_BINDTODEVICE, dev, strlen(dev)+1);
+
+or to specify the output device using cmsg and IP_PKTINFO.
+
+By default the scope of the port bindings for unbound sockets is
+limited to the default VRF. That is, it will not be matched by packets
+arriving on interfaces enslaved to an l3mdev and processes may bind to
+the same port if they bind to an l3mdev.
+
+TCP & UDP services running in the default VRF context (ie., not bound
+to any VRF device) can work across all VRF domains by enabling the
+tcp_l3mdev_accept and udp_l3mdev_accept sysctl options::
+
+    sysctl -w net.ipv4.tcp_l3mdev_accept=1
+    sysctl -w net.ipv4.udp_l3mdev_accept=1
+
+These options are disabled by default so that a socket in a VRF is only
+selected for packets in that VRF. There is a similar option for RAW
+sockets, which is enabled by default for reasons of backwards compatibility.
+This is so as to specify the output device with cmsg and IP_PKTINFO, but
+using a socket not bound to the corresponding VRF. This allows e.g. older ping
+implementations to be run with specifying the device but without executing it
+in the VRF. This option can be disabled so that packets received in a VRF
+context are only handled by a raw socket bound to the VRF, and packets in the
+default VRF are only handled by a socket not bound to any VRF::
+
+    sysctl -w net.ipv4.raw_l3mdev_accept=0
+
+netfilter rules on the VRF device can be used to limit access to services
+running in the default VRF context as well.
+
+--------------------------------------------------------------------------------
+
+Using iproute2 for VRFs
+=======================
+iproute2 supports the vrf keyword as of v4.7. For backwards compatibility this
+section lists both commands where appropriate -- with the vrf keyword and the
+older form without it.
+
+1. Create a VRF
+
+   To instantiate a VRF device and associate it with a table::
+
+       $ ip link add dev NAME type vrf table ID
+
+   As of v4.8 the kernel supports the l3mdev FIB rule where a single rule
+   covers all VRFs. The l3mdev rule is created for IPv4 and IPv6 on first
+   device create.
+
+2. List VRFs
+
+   To list VRFs that have been created::
+
+       $ ip [-d] link show type vrf
+	 NOTE: The -d option is needed to show the table id
+
+   For example::
+
+       $ ip -d link show type vrf
+       11: mgmt: <NOARP,MASTER,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT group default qlen 1000
+	   link/ether 72:b3:ba:91:e2:24 brd ff:ff:ff:ff:ff:ff promiscuity 0
+	   vrf table 1 addrgenmode eui64
+       12: red: <NOARP,MASTER,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT group default qlen 1000
+	   link/ether b6:6f:6e:f6:da:73 brd ff:ff:ff:ff:ff:ff promiscuity 0
+	   vrf table 10 addrgenmode eui64
+       13: blue: <NOARP,MASTER,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT group default qlen 1000
+	   link/ether 36:62:e8:7d:bb:8c brd ff:ff:ff:ff:ff:ff promiscuity 0
+	   vrf table 66 addrgenmode eui64
+       14: green: <NOARP,MASTER,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT group default qlen 1000
+	   link/ether e6:28:b8:63:70:bb brd ff:ff:ff:ff:ff:ff promiscuity 0
+	   vrf table 81 addrgenmode eui64
+
+
+   Or in brief output::
+
+       $ ip -br link show type vrf
+       mgmt         UP             72:b3:ba:91:e2:24 <NOARP,MASTER,UP,LOWER_UP>
+       red          UP             b6:6f:6e:f6:da:73 <NOARP,MASTER,UP,LOWER_UP>
+       blue         UP             36:62:e8:7d:bb:8c <NOARP,MASTER,UP,LOWER_UP>
+       green        UP             e6:28:b8:63:70:bb <NOARP,MASTER,UP,LOWER_UP>
+
+
+3. Assign a Network Interface to a VRF
+
+   Network interfaces are assigned to a VRF by enslaving the netdevice to a
+   VRF device::
+
+       $ ip link set dev NAME master NAME
+
+   On enslavement connected and local routes are automatically moved to the
+   table associated with the VRF device.
+
+   For example::
+
+       $ ip link set dev eth0 master mgmt
+
+
+4. Show Devices Assigned to a VRF
+
+   To show devices that have been assigned to a specific VRF add the master
+   option to the ip command::
+
+       $ ip link show vrf NAME
+       $ ip link show master NAME
+
+   For example::
+
+       $ ip link show vrf red
+       3: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast master red state UP mode DEFAULT group default qlen 1000
+	   link/ether 02:00:00:00:02:02 brd ff:ff:ff:ff:ff:ff
+       4: eth2: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast master red state UP mode DEFAULT group default qlen 1000
+	   link/ether 02:00:00:00:02:03 brd ff:ff:ff:ff:ff:ff
+       7: eth5: <BROADCAST,MULTICAST> mtu 1500 qdisc noop master red state DOWN mode DEFAULT group default qlen 1000
+	   link/ether 02:00:00:00:02:06 brd ff:ff:ff:ff:ff:ff
+
+
+   Or using the brief output::
+
+       $ ip -br link show vrf red
+       eth1             UP             02:00:00:00:02:02 <BROADCAST,MULTICAST,UP,LOWER_UP>
+       eth2             UP             02:00:00:00:02:03 <BROADCAST,MULTICAST,UP,LOWER_UP>
+       eth5             DOWN           02:00:00:00:02:06 <BROADCAST,MULTICAST>
+
+
+5. Show Neighbor Entries for a VRF
+
+   To list neighbor entries associated with devices enslaved to a VRF device
+   add the master option to the ip command::
+
+       $ ip [-6] neigh show vrf NAME
+       $ ip [-6] neigh show master NAME
+
+   For example::
+
+       $  ip neigh show vrf red
+       10.2.1.254 dev eth1 lladdr a6:d9:c7:4f:06:23 REACHABLE
+       10.2.2.254 dev eth2 lladdr 5e:54:01:6a:ee:80 REACHABLE
+
+       $ ip -6 neigh show vrf red
+       2002:1::64 dev eth1 lladdr a6:d9:c7:4f:06:23 REACHABLE
+
+
+6. Show Addresses for a VRF
+
+   To show addresses for interfaces associated with a VRF add the master
+   option to the ip command::
+
+       $ ip addr show vrf NAME
+       $ ip addr show master NAME
+
+   For example::
+
+	$ ip addr show vrf red
+	3: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast master red state UP group default qlen 1000
+	    link/ether 02:00:00:00:02:02 brd ff:ff:ff:ff:ff:ff
+	    inet 10.2.1.2/24 brd 10.2.1.255 scope global eth1
+	       valid_lft forever preferred_lft forever
+	    inet6 2002:1::2/120 scope global
+	       valid_lft forever preferred_lft forever
+	    inet6 fe80::ff:fe00:202/64 scope link
+	       valid_lft forever preferred_lft forever
+	4: eth2: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast master red state UP group default qlen 1000
+	    link/ether 02:00:00:00:02:03 brd ff:ff:ff:ff:ff:ff
+	    inet 10.2.2.2/24 brd 10.2.2.255 scope global eth2
+	       valid_lft forever preferred_lft forever
+	    inet6 2002:2::2/120 scope global
+	       valid_lft forever preferred_lft forever
+	    inet6 fe80::ff:fe00:203/64 scope link
+	       valid_lft forever preferred_lft forever
+	7: eth5: <BROADCAST,MULTICAST> mtu 1500 qdisc noop master red state DOWN group default qlen 1000
+	    link/ether 02:00:00:00:02:06 brd ff:ff:ff:ff:ff:ff
+
+   Or in brief format::
+
+	$ ip -br addr show vrf red
+	eth1             UP             10.2.1.2/24 2002:1::2/120 fe80::ff:fe00:202/64
+	eth2             UP             10.2.2.2/24 2002:2::2/120 fe80::ff:fe00:203/64
+	eth5             DOWN
+
+
+7. Show Routes for a VRF
+
+   To show routes for a VRF use the ip command to display the table associated
+   with the VRF device::
+
+       $ ip [-6] route show vrf NAME
+       $ ip [-6] route show table ID
+
+   For example::
+
+	$ ip route show vrf red
+	unreachable default  metric 4278198272
+	broadcast 10.2.1.0 dev eth1  proto kernel  scope link  src 10.2.1.2
+	10.2.1.0/24 dev eth1  proto kernel  scope link  src 10.2.1.2
+	local 10.2.1.2 dev eth1  proto kernel  scope host  src 10.2.1.2
+	broadcast 10.2.1.255 dev eth1  proto kernel  scope link  src 10.2.1.2
+	broadcast 10.2.2.0 dev eth2  proto kernel  scope link  src 10.2.2.2
+	10.2.2.0/24 dev eth2  proto kernel  scope link  src 10.2.2.2
+	local 10.2.2.2 dev eth2  proto kernel  scope host  src 10.2.2.2
+	broadcast 10.2.2.255 dev eth2  proto kernel  scope link  src 10.2.2.2
+
+	$ ip -6 route show vrf red
+	local 2002:1:: dev lo  proto none  metric 0  pref medium
+	local 2002:1::2 dev lo  proto none  metric 0  pref medium
+	2002:1::/120 dev eth1  proto kernel  metric 256  pref medium
+	local 2002:2:: dev lo  proto none  metric 0  pref medium
+	local 2002:2::2 dev lo  proto none  metric 0  pref medium
+	2002:2::/120 dev eth2  proto kernel  metric 256  pref medium
+	local fe80:: dev lo  proto none  metric 0  pref medium
+	local fe80:: dev lo  proto none  metric 0  pref medium
+	local fe80::ff:fe00:202 dev lo  proto none  metric 0  pref medium
+	local fe80::ff:fe00:203 dev lo  proto none  metric 0  pref medium
+	fe80::/64 dev eth1  proto kernel  metric 256  pref medium
+	fe80::/64 dev eth2  proto kernel  metric 256  pref medium
+	ff00::/8 dev red  metric 256  pref medium
+	ff00::/8 dev eth1  metric 256  pref medium
+	ff00::/8 dev eth2  metric 256  pref medium
+	unreachable default dev lo  metric 4278198272  error -101 pref medium
+
+8. Route Lookup for a VRF
+
+   A test route lookup can be done for a VRF::
+
+       $ ip [-6] route get vrf NAME ADDRESS
+       $ ip [-6] route get oif NAME ADDRESS
+
+   For example::
+
+	$ ip route get 10.2.1.40 vrf red
+	10.2.1.40 dev eth1  table red  src 10.2.1.2
+	    cache
+
+	$ ip -6 route get 2002:1::32 vrf red
+	2002:1::32 from :: dev eth1  table red  proto kernel  src 2002:1::2  metric 256  pref medium
+
+
+9. Removing Network Interface from a VRF
+
+   Network interfaces are removed from a VRF by breaking the enslavement to
+   the VRF device::
+
+       $ ip link set dev NAME nomaster
+
+   Connected routes are moved back to the default table and local entries are
+   moved to the local table.
+
+   For example::
+
+    $ ip link set dev eth0 nomaster
+
+--------------------------------------------------------------------------------
+
+Commands used in this example::
+
+     cat >> /etc/iproute2/rt_tables.d/vrf.conf <<EOF
+     1  mgmt
+     10 red
+     66 blue
+     81 green
+     EOF
+
+     function vrf_create
+     {
+	 VRF=$1
+	 TBID=$2
+
+	 # create VRF device
+	 ip link add ${VRF} type vrf table ${TBID}
+
+	 if [ "${VRF}" != "mgmt" ]; then
+	     ip route add table ${TBID} unreachable default metric 4278198272
+	 fi
+	 ip link set dev ${VRF} up
+     }
+
+     vrf_create mgmt 1
+     ip link set dev eth0 master mgmt
+
+     vrf_create red 10
+     ip link set dev eth1 master red
+     ip link set dev eth2 master red
+     ip link set dev eth5 master red
+
+     vrf_create blue 66
+     ip link set dev eth3 master blue
+
+     vrf_create green 81
+     ip link set dev eth4 master green
+
+
+     Interface addresses from /etc/network/interfaces:
+     auto eth0
+     iface eth0 inet static
+	   address 10.0.0.2
+	   netmask 255.255.255.0
+	   gateway 10.0.0.254
+
+     iface eth0 inet6 static
+	   address 2000:1::2
+	   netmask 120
+
+     auto eth1
+     iface eth1 inet static
+	   address 10.2.1.2
+	   netmask 255.255.255.0
+
+     iface eth1 inet6 static
+	   address 2002:1::2
+	   netmask 120
+
+     auto eth2
+     iface eth2 inet static
+	   address 10.2.2.2
+	   netmask 255.255.255.0
+
+     iface eth2 inet6 static
+	   address 2002:2::2
+	   netmask 120
+
+     auto eth3
+     iface eth3 inet static
+	   address 10.2.3.2
+	   netmask 255.255.255.0
+
+     iface eth3 inet6 static
+	   address 2002:3::2
+	   netmask 120
+
+     auto eth4
+     iface eth4 inet static
+	   address 10.2.4.2
+	   netmask 255.255.255.0
+
+     iface eth4 inet6 static
+	   address 2002:4::2
+	   netmask 120
diff --git a/Documentation/networking/vrf.txt b/Documentation/networking/vrf.txt
deleted file mode 100644
index a5f103b083a0..000000000000
--- a/Documentation/networking/vrf.txt
+++ /dev/null
@@ -1,418 +0,0 @@
-Virtual Routing and Forwarding (VRF)
-====================================
-The VRF device combined with ip rules provides the ability to create virtual
-routing and forwarding domains (aka VRFs, VRF-lite to be specific) in the
-Linux network stack. One use case is the multi-tenancy problem where each
-tenant has their own unique routing tables and in the very least need
-different default gateways.
-
-Processes can be "VRF aware" by binding a socket to the VRF device. Packets
-through the socket then use the routing table associated with the VRF
-device. An important feature of the VRF device implementation is that it
-impacts only Layer 3 and above so L2 tools (e.g., LLDP) are not affected
-(ie., they do not need to be run in each VRF). The design also allows
-the use of higher priority ip rules (Policy Based Routing, PBR) to take
-precedence over the VRF device rules directing specific traffic as desired.
-
-In addition, VRF devices allow VRFs to be nested within namespaces. For
-example network namespaces provide separation of network interfaces at the
-device layer, VLANs on the interfaces within a namespace provide L2 separation
-and then VRF devices provide L3 separation.
-
-Design
-------
-A VRF device is created with an associated route table. Network interfaces
-are then enslaved to a VRF device:
-
-         +-----------------------------+
-         |           vrf-blue          |  ===> route table 10
-         +-----------------------------+
-            |        |            |
-         +------+ +------+     +-------------+
-         | eth1 | | eth2 | ... |    bond1    |
-         +------+ +------+     +-------------+
-                                  |       |
-                              +------+ +------+
-                              | eth8 | | eth9 |
-                              +------+ +------+
-
-Packets received on an enslaved device and are switched to the VRF device
-in the IPv4 and IPv6 processing stacks giving the impression that packets
-flow through the VRF device. Similarly on egress routing rules are used to
-send packets to the VRF device driver before getting sent out the actual
-interface. This allows tcpdump on a VRF device to capture all packets into
-and out of the VRF as a whole.[1] Similarly, netfilter[2] and tc rules can be
-applied using the VRF device to specify rules that apply to the VRF domain
-as a whole.
-
-[1] Packets in the forwarded state do not flow through the device, so those
-    packets are not seen by tcpdump. Will revisit this limitation in a
-    future release.
-
-[2] Iptables on ingress supports PREROUTING with skb->dev set to the real
-    ingress device and both INPUT and PREROUTING rules with skb->dev set to
-    the VRF device. For egress POSTROUTING and OUTPUT rules can be written
-    using either the VRF device or real egress device.
-
-Setup
------
-1. VRF device is created with an association to a FIB table.
-   e.g, ip link add vrf-blue type vrf table 10
-        ip link set dev vrf-blue up
-
-2. An l3mdev FIB rule directs lookups to the table associated with the device.
-   A single l3mdev rule is sufficient for all VRFs. The VRF device adds the
-   l3mdev rule for IPv4 and IPv6 when the first device is created with a
-   default preference of 1000. Users may delete the rule if desired and add
-   with a different priority or install per-VRF rules.
-
-   Prior to the v4.8 kernel iif and oif rules are needed for each VRF device:
-       ip ru add oif vrf-blue table 10
-       ip ru add iif vrf-blue table 10
-
-3. Set the default route for the table (and hence default route for the VRF).
-       ip route add table 10 unreachable default metric 4278198272
-
-   This high metric value ensures that the default unreachable route can
-   be overridden by a routing protocol suite.  FRRouting interprets
-   kernel metrics as a combined admin distance (upper byte) and priority
-   (lower 3 bytes).  Thus the above metric translates to [255/8192].
-
-4. Enslave L3 interfaces to a VRF device.
-       ip link set dev eth1 master vrf-blue
-
-   Local and connected routes for enslaved devices are automatically moved to
-   the table associated with VRF device. Any additional routes depending on
-   the enslaved device are dropped and will need to be reinserted to the VRF
-   FIB table following the enslavement.
-
-   The IPv6 sysctl option keep_addr_on_down can be enabled to keep IPv6 global
-   addresses as VRF enslavement changes.
-       sysctl -w net.ipv6.conf.all.keep_addr_on_down=1
-
-5. Additional VRF routes are added to associated table.
-       ip route add table 10 ...
-
-
-Applications
-------------
-Applications that are to work within a VRF need to bind their socket to the
-VRF device:
-
-    setsockopt(sd, SOL_SOCKET, SO_BINDTODEVICE, dev, strlen(dev)+1);
-
-or to specify the output device using cmsg and IP_PKTINFO.
-
-By default the scope of the port bindings for unbound sockets is
-limited to the default VRF. That is, it will not be matched by packets
-arriving on interfaces enslaved to an l3mdev and processes may bind to
-the same port if they bind to an l3mdev.
-
-TCP & UDP services running in the default VRF context (ie., not bound
-to any VRF device) can work across all VRF domains by enabling the
-tcp_l3mdev_accept and udp_l3mdev_accept sysctl options:
-
-    sysctl -w net.ipv4.tcp_l3mdev_accept=1
-    sysctl -w net.ipv4.udp_l3mdev_accept=1
-
-These options are disabled by default so that a socket in a VRF is only
-selected for packets in that VRF. There is a similar option for RAW
-sockets, which is enabled by default for reasons of backwards compatibility.
-This is so as to specify the output device with cmsg and IP_PKTINFO, but
-using a socket not bound to the corresponding VRF. This allows e.g. older ping
-implementations to be run with specifying the device but without executing it
-in the VRF. This option can be disabled so that packets received in a VRF
-context are only handled by a raw socket bound to the VRF, and packets in the
-default VRF are only handled by a socket not bound to any VRF:
-
-    sysctl -w net.ipv4.raw_l3mdev_accept=0
-
-netfilter rules on the VRF device can be used to limit access to services
-running in the default VRF context as well.
-
-################################################################################
-
-Using iproute2 for VRFs
-=======================
-iproute2 supports the vrf keyword as of v4.7. For backwards compatibility this
-section lists both commands where appropriate -- with the vrf keyword and the
-older form without it.
-
-1. Create a VRF
-
-   To instantiate a VRF device and associate it with a table:
-       $ ip link add dev NAME type vrf table ID
-
-   As of v4.8 the kernel supports the l3mdev FIB rule where a single rule
-   covers all VRFs. The l3mdev rule is created for IPv4 and IPv6 on first
-   device create.
-
-2. List VRFs
-
-   To list VRFs that have been created:
-       $ ip [-d] link show type vrf
-         NOTE: The -d option is needed to show the table id
-
-   For example:
-   $ ip -d link show type vrf
-   11: mgmt: <NOARP,MASTER,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT group default qlen 1000
-       link/ether 72:b3:ba:91:e2:24 brd ff:ff:ff:ff:ff:ff promiscuity 0
-       vrf table 1 addrgenmode eui64
-   12: red: <NOARP,MASTER,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT group default qlen 1000
-       link/ether b6:6f:6e:f6:da:73 brd ff:ff:ff:ff:ff:ff promiscuity 0
-       vrf table 10 addrgenmode eui64
-   13: blue: <NOARP,MASTER,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT group default qlen 1000
-       link/ether 36:62:e8:7d:bb:8c brd ff:ff:ff:ff:ff:ff promiscuity 0
-       vrf table 66 addrgenmode eui64
-   14: green: <NOARP,MASTER,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast state UP mode DEFAULT group default qlen 1000
-       link/ether e6:28:b8:63:70:bb brd ff:ff:ff:ff:ff:ff promiscuity 0
-       vrf table 81 addrgenmode eui64
-
-
-   Or in brief output:
-
-   $ ip -br link show type vrf
-   mgmt         UP             72:b3:ba:91:e2:24 <NOARP,MASTER,UP,LOWER_UP>
-   red          UP             b6:6f:6e:f6:da:73 <NOARP,MASTER,UP,LOWER_UP>
-   blue         UP             36:62:e8:7d:bb:8c <NOARP,MASTER,UP,LOWER_UP>
-   green        UP             e6:28:b8:63:70:bb <NOARP,MASTER,UP,LOWER_UP>
-
-
-3. Assign a Network Interface to a VRF
-
-   Network interfaces are assigned to a VRF by enslaving the netdevice to a
-   VRF device:
-       $ ip link set dev NAME master NAME
-
-   On enslavement connected and local routes are automatically moved to the
-   table associated with the VRF device.
-
-   For example:
-   $ ip link set dev eth0 master mgmt
-
-
-4. Show Devices Assigned to a VRF
-
-   To show devices that have been assigned to a specific VRF add the master
-   option to the ip command:
-       $ ip link show vrf NAME
-       $ ip link show master NAME
-
-   For example:
-   $ ip link show vrf red
-   3: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast master red state UP mode DEFAULT group default qlen 1000
-       link/ether 02:00:00:00:02:02 brd ff:ff:ff:ff:ff:ff
-   4: eth2: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast master red state UP mode DEFAULT group default qlen 1000
-       link/ether 02:00:00:00:02:03 brd ff:ff:ff:ff:ff:ff
-   7: eth5: <BROADCAST,MULTICAST> mtu 1500 qdisc noop master red state DOWN mode DEFAULT group default qlen 1000
-       link/ether 02:00:00:00:02:06 brd ff:ff:ff:ff:ff:ff
-
-
-   Or using the brief output:
-   $ ip -br link show vrf red
-   eth1             UP             02:00:00:00:02:02 <BROADCAST,MULTICAST,UP,LOWER_UP>
-   eth2             UP             02:00:00:00:02:03 <BROADCAST,MULTICAST,UP,LOWER_UP>
-   eth5             DOWN           02:00:00:00:02:06 <BROADCAST,MULTICAST>
-
-
-5. Show Neighbor Entries for a VRF
-
-   To list neighbor entries associated with devices enslaved to a VRF device
-   add the master option to the ip command:
-       $ ip [-6] neigh show vrf NAME
-       $ ip [-6] neigh show master NAME
-
-   For example:
-   $  ip neigh show vrf red
-   10.2.1.254 dev eth1 lladdr a6:d9:c7:4f:06:23 REACHABLE
-   10.2.2.254 dev eth2 lladdr 5e:54:01:6a:ee:80 REACHABLE
-
-   $ ip -6 neigh show vrf red
-   2002:1::64 dev eth1 lladdr a6:d9:c7:4f:06:23 REACHABLE
-
-
-6. Show Addresses for a VRF
-
-   To show addresses for interfaces associated with a VRF add the master
-   option to the ip command:
-       $ ip addr show vrf NAME
-       $ ip addr show master NAME
-
-   For example:
-   $ ip addr show vrf red
-   3: eth1: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast master red state UP group default qlen 1000
-       link/ether 02:00:00:00:02:02 brd ff:ff:ff:ff:ff:ff
-       inet 10.2.1.2/24 brd 10.2.1.255 scope global eth1
-          valid_lft forever preferred_lft forever
-       inet6 2002:1::2/120 scope global
-          valid_lft forever preferred_lft forever
-       inet6 fe80::ff:fe00:202/64 scope link
-          valid_lft forever preferred_lft forever
-   4: eth2: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc pfifo_fast master red state UP group default qlen 1000
-       link/ether 02:00:00:00:02:03 brd ff:ff:ff:ff:ff:ff
-       inet 10.2.2.2/24 brd 10.2.2.255 scope global eth2
-          valid_lft forever preferred_lft forever
-       inet6 2002:2::2/120 scope global
-          valid_lft forever preferred_lft forever
-       inet6 fe80::ff:fe00:203/64 scope link
-          valid_lft forever preferred_lft forever
-   7: eth5: <BROADCAST,MULTICAST> mtu 1500 qdisc noop master red state DOWN group default qlen 1000
-       link/ether 02:00:00:00:02:06 brd ff:ff:ff:ff:ff:ff
-
-   Or in brief format:
-   $ ip -br addr show vrf red
-   eth1             UP             10.2.1.2/24 2002:1::2/120 fe80::ff:fe00:202/64
-   eth2             UP             10.2.2.2/24 2002:2::2/120 fe80::ff:fe00:203/64
-   eth5             DOWN
-
-
-7. Show Routes for a VRF
-
-   To show routes for a VRF use the ip command to display the table associated
-   with the VRF device:
-       $ ip [-6] route show vrf NAME
-       $ ip [-6] route show table ID
-
-   For example:
-   $ ip route show vrf red
-   unreachable default  metric 4278198272
-   broadcast 10.2.1.0 dev eth1  proto kernel  scope link  src 10.2.1.2
-   10.2.1.0/24 dev eth1  proto kernel  scope link  src 10.2.1.2
-   local 10.2.1.2 dev eth1  proto kernel  scope host  src 10.2.1.2
-   broadcast 10.2.1.255 dev eth1  proto kernel  scope link  src 10.2.1.2
-   broadcast 10.2.2.0 dev eth2  proto kernel  scope link  src 10.2.2.2
-   10.2.2.0/24 dev eth2  proto kernel  scope link  src 10.2.2.2
-   local 10.2.2.2 dev eth2  proto kernel  scope host  src 10.2.2.2
-   broadcast 10.2.2.255 dev eth2  proto kernel  scope link  src 10.2.2.2
-
-   $ ip -6 route show vrf red
-   local 2002:1:: dev lo  proto none  metric 0  pref medium
-   local 2002:1::2 dev lo  proto none  metric 0  pref medium
-   2002:1::/120 dev eth1  proto kernel  metric 256  pref medium
-   local 2002:2:: dev lo  proto none  metric 0  pref medium
-   local 2002:2::2 dev lo  proto none  metric 0  pref medium
-   2002:2::/120 dev eth2  proto kernel  metric 256  pref medium
-   local fe80:: dev lo  proto none  metric 0  pref medium
-   local fe80:: dev lo  proto none  metric 0  pref medium
-   local fe80::ff:fe00:202 dev lo  proto none  metric 0  pref medium
-   local fe80::ff:fe00:203 dev lo  proto none  metric 0  pref medium
-   fe80::/64 dev eth1  proto kernel  metric 256  pref medium
-   fe80::/64 dev eth2  proto kernel  metric 256  pref medium
-   ff00::/8 dev red  metric 256  pref medium
-   ff00::/8 dev eth1  metric 256  pref medium
-   ff00::/8 dev eth2  metric 256  pref medium
-   unreachable default dev lo  metric 4278198272  error -101 pref medium
-
-8. Route Lookup for a VRF
-
-   A test route lookup can be done for a VRF:
-       $ ip [-6] route get vrf NAME ADDRESS
-       $ ip [-6] route get oif NAME ADDRESS
-
-   For example:
-   $ ip route get 10.2.1.40 vrf red
-   10.2.1.40 dev eth1  table red  src 10.2.1.2
-       cache
-
-   $ ip -6 route get 2002:1::32 vrf red
-   2002:1::32 from :: dev eth1  table red  proto kernel  src 2002:1::2  metric 256  pref medium
-
-
-9. Removing Network Interface from a VRF
-
-   Network interfaces are removed from a VRF by breaking the enslavement to
-   the VRF device:
-       $ ip link set dev NAME nomaster
-
-   Connected routes are moved back to the default table and local entries are
-   moved to the local table.
-
-   For example:
-   $ ip link set dev eth0 nomaster
-
---------------------------------------------------------------------------------
-
-Commands used in this example:
-
-cat >> /etc/iproute2/rt_tables.d/vrf.conf <<EOF
-1  mgmt
-10 red
-66 blue
-81 green
-EOF
-
-function vrf_create
-{
-    VRF=$1
-    TBID=$2
-
-    # create VRF device
-    ip link add ${VRF} type vrf table ${TBID}
-
-    if [ "${VRF}" != "mgmt" ]; then
-        ip route add table ${TBID} unreachable default metric 4278198272
-    fi
-    ip link set dev ${VRF} up
-}
-
-vrf_create mgmt 1
-ip link set dev eth0 master mgmt
-
-vrf_create red 10
-ip link set dev eth1 master red
-ip link set dev eth2 master red
-ip link set dev eth5 master red
-
-vrf_create blue 66
-ip link set dev eth3 master blue
-
-vrf_create green 81
-ip link set dev eth4 master green
-
-
-Interface addresses from /etc/network/interfaces:
-auto eth0
-iface eth0 inet static
-      address 10.0.0.2
-      netmask 255.255.255.0
-      gateway 10.0.0.254
-
-iface eth0 inet6 static
-      address 2000:1::2
-      netmask 120
-
-auto eth1
-iface eth1 inet static
-      address 10.2.1.2
-      netmask 255.255.255.0
-
-iface eth1 inet6 static
-      address 2002:1::2
-      netmask 120
-
-auto eth2
-iface eth2 inet static
-      address 10.2.2.2
-      netmask 255.255.255.0
-
-iface eth2 inet6 static
-      address 2002:2::2
-      netmask 120
-
-auto eth3
-iface eth3 inet static
-      address 10.2.3.2
-      netmask 255.255.255.0
-
-iface eth3 inet6 static
-      address 2002:3::2
-      netmask 120
-
-auto eth4
-iface eth4 inet static
-      address 10.2.4.2
-      netmask 255.255.255.0
-
-iface eth4 inet6 static
-      address 2002:4::2
-      netmask 120
diff --git a/MAINTAINERS b/MAINTAINERS
index 6456c5bb02f1..d59455c27c42 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -18106,7 +18106,7 @@ M:	David Ahern <dsahern@kernel.org>
 M:	Shrijeet Mukherjee <shrijeet@gmail.com>
 L:	netdev@vger.kernel.org
 S:	Maintained
-F:	Documentation/networking/vrf.txt
+F:	Documentation/networking/vrf.rst
 F:	drivers/net/vrf.c
 
 VSPRINTF
-- 
cgit v1.2.3-59-g8ed1b


From d2a85c184ac6e738daa5e42f89b1f353910d6a89 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 1 May 2020 16:44:26 +0200
Subject: docs: networking: convert vxlan.txt to ReST

- add SPDX header;
- adjust title markup;
- mark code blocks and literals as such;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst |  1 +
 Documentation/networking/vxlan.rst | 60 ++++++++++++++++++++++++++++++++++++++
 Documentation/networking/vxlan.txt | 51 --------------------------------
 3 files changed, 61 insertions(+), 51 deletions(-)
 create mode 100644 Documentation/networking/vxlan.rst
 delete mode 100644 Documentation/networking/vxlan.txt

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 2227b9f4509d..a72fdfb391b6 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -114,6 +114,7 @@ Contents:
    tuntap
    udplite
    vrf
+   vxlan
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/vxlan.rst b/Documentation/networking/vxlan.rst
new file mode 100644
index 000000000000..ce239fa01848
--- /dev/null
+++ b/Documentation/networking/vxlan.rst
@@ -0,0 +1,60 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================================================
+Virtual eXtensible Local Area Networking documentation
+======================================================
+
+The VXLAN protocol is a tunnelling protocol designed to solve the
+problem of limited VLAN IDs (4096) in IEEE 802.1q.  With VXLAN the
+size of the identifier is expanded to 24 bits (16777216).
+
+VXLAN is described by IETF RFC 7348, and has been implemented by a
+number of vendors.  The protocol runs over UDP using a single
+destination port.  This document describes the Linux kernel tunnel
+device, there is also a separate implementation of VXLAN for
+Openvswitch.
+
+Unlike most tunnels, a VXLAN is a 1 to N network, not just point to
+point. A VXLAN device can learn the IP address of the other endpoint
+either dynamically in a manner similar to a learning bridge, or make
+use of statically-configured forwarding entries.
+
+The management of vxlan is done in a manner similar to its two closest
+neighbors GRE and VLAN. Configuring VXLAN requires the version of
+iproute2 that matches the kernel release where VXLAN was first merged
+upstream.
+
+1. Create vxlan device::
+
+    # ip link add vxlan0 type vxlan id 42 group 239.1.1.1 dev eth1 dstport 4789
+
+This creates a new device named vxlan0.  The device uses the multicast
+group 239.1.1.1 over eth1 to handle traffic for which there is no
+entry in the forwarding table.  The destination port number is set to
+the IANA-assigned value of 4789.  The Linux implementation of VXLAN
+pre-dates the IANA's selection of a standard destination port number
+and uses the Linux-selected value by default to maintain backwards
+compatibility.
+
+2. Delete vxlan device::
+
+    # ip link delete vxlan0
+
+3. Show vxlan info::
+
+    # ip -d link show vxlan0
+
+It is possible to create, destroy and display the vxlan
+forwarding table using the new bridge command.
+
+1. Create forwarding table entry::
+
+    # bridge fdb add to 00:17:42:8a:b4:05 dst 192.19.0.2 dev vxlan0
+
+2. Delete forwarding table entry::
+
+    # bridge fdb delete 00:17:42:8a:b4:05 dev vxlan0
+
+3. Show forwarding table::
+
+    # bridge fdb show dev vxlan0
diff --git a/Documentation/networking/vxlan.txt b/Documentation/networking/vxlan.txt
deleted file mode 100644
index c28f4989c3f0..000000000000
--- a/Documentation/networking/vxlan.txt
+++ /dev/null
@@ -1,51 +0,0 @@
-Virtual eXtensible Local Area Networking documentation
-======================================================
-
-The VXLAN protocol is a tunnelling protocol designed to solve the
-problem of limited VLAN IDs (4096) in IEEE 802.1q.  With VXLAN the
-size of the identifier is expanded to 24 bits (16777216).
-
-VXLAN is described by IETF RFC 7348, and has been implemented by a
-number of vendors.  The protocol runs over UDP using a single
-destination port.  This document describes the Linux kernel tunnel
-device, there is also a separate implementation of VXLAN for
-Openvswitch.
-
-Unlike most tunnels, a VXLAN is a 1 to N network, not just point to
-point. A VXLAN device can learn the IP address of the other endpoint
-either dynamically in a manner similar to a learning bridge, or make
-use of statically-configured forwarding entries.
-
-The management of vxlan is done in a manner similar to its two closest
-neighbors GRE and VLAN. Configuring VXLAN requires the version of
-iproute2 that matches the kernel release where VXLAN was first merged
-upstream.
-
-1. Create vxlan device
- # ip link add vxlan0 type vxlan id 42 group 239.1.1.1 dev eth1 dstport 4789
-
-This creates a new device named vxlan0.  The device uses the multicast
-group 239.1.1.1 over eth1 to handle traffic for which there is no
-entry in the forwarding table.  The destination port number is set to
-the IANA-assigned value of 4789.  The Linux implementation of VXLAN
-pre-dates the IANA's selection of a standard destination port number
-and uses the Linux-selected value by default to maintain backwards
-compatibility.
-
-2. Delete vxlan device
-  # ip link delete vxlan0
-
-3. Show vxlan info
-  # ip -d link show vxlan0
-
-It is possible to create, destroy and display the vxlan
-forwarding table using the new bridge command.
-
-1. Create forwarding table entry
-  # bridge fdb add to 00:17:42:8a:b4:05 dst 192.19.0.2 dev vxlan0
-
-2. Delete forwarding table entry
-  # bridge fdb delete 00:17:42:8a:b4:05 dev vxlan0
-
-3. Show forwarding table
-  # bridge fdb show dev vxlan0
-- 
cgit v1.2.3-59-g8ed1b


From 883780af72090daf9ab53779a3085a6ddfc468ca Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 1 May 2020 16:44:27 +0200
Subject: docs: networking: convert x25-iface.txt to ReST

Not much to be done here:

- add SPDX header;
- adjust title markup;
- remove a tail whitespace;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst     |   1 +
 Documentation/networking/x25-iface.rst | 129 +++++++++++++++++++++++++++++++++
 Documentation/networking/x25-iface.txt | 123 -------------------------------
 include/uapi/linux/if_x25.h            |   2 +-
 net/x25/Kconfig                        |   2 +-
 5 files changed, 132 insertions(+), 125 deletions(-)
 create mode 100644 Documentation/networking/x25-iface.rst
 delete mode 100644 Documentation/networking/x25-iface.txt

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index a72fdfb391b6..7a4bdbc111b0 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -115,6 +115,7 @@ Contents:
    udplite
    vrf
    vxlan
+   x25-iface
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/x25-iface.rst b/Documentation/networking/x25-iface.rst
new file mode 100644
index 000000000000..df401891dce6
--- /dev/null
+++ b/Documentation/networking/x25-iface.rst
@@ -0,0 +1,129 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============================-
+X.25 Device Driver Interface
+============================-
+
+Version 1.1
+
+			   Jonathan Naylor 26.12.96
+
+This is a description of the messages to be passed between the X.25 Packet
+Layer and the X.25 device driver. They are designed to allow for the easy
+setting of the LAPB mode from within the Packet Layer.
+
+The X.25 device driver will be coded normally as per the Linux device driver
+standards. Most X.25 device drivers will be moderately similar to the
+already existing Ethernet device drivers. However unlike those drivers, the
+X.25 device driver has a state associated with it, and this information
+needs to be passed to and from the Packet Layer for proper operation.
+
+All messages are held in sk_buff's just like real data to be transmitted
+over the LAPB link. The first byte of the skbuff indicates the meaning of
+the rest of the skbuff, if any more information does exist.
+
+
+Packet Layer to Device Driver
+-----------------------------
+
+First Byte = 0x00 (X25_IFACE_DATA)
+
+This indicates that the rest of the skbuff contains data to be transmitted
+over the LAPB link. The LAPB link should already exist before any data is
+passed down.
+
+First Byte = 0x01 (X25_IFACE_CONNECT)
+
+Establish the LAPB link. If the link is already established then the connect
+confirmation message should be returned as soon as possible.
+
+First Byte = 0x02 (X25_IFACE_DISCONNECT)
+
+Terminate the LAPB link. If it is already disconnected then the disconnect
+confirmation message should be returned as soon as possible.
+
+First Byte = 0x03 (X25_IFACE_PARAMS)
+
+LAPB parameters. To be defined.
+
+
+Device Driver to Packet Layer
+-----------------------------
+
+First Byte = 0x00 (X25_IFACE_DATA)
+
+This indicates that the rest of the skbuff contains data that has been
+received over the LAPB link.
+
+First Byte = 0x01 (X25_IFACE_CONNECT)
+
+LAPB link has been established. The same message is used for both a LAPB
+link connect_confirmation and a connect_indication.
+
+First Byte = 0x02 (X25_IFACE_DISCONNECT)
+
+LAPB link has been terminated. This same message is used for both a LAPB
+link disconnect_confirmation and a disconnect_indication.
+
+First Byte = 0x03 (X25_IFACE_PARAMS)
+
+LAPB parameters. To be defined.
+
+
+
+Possible Problems
+=================
+
+(Henner Eisen, 2000-10-28)
+
+The X.25 packet layer protocol depends on a reliable datalink service.
+The LAPB protocol provides such reliable service. But this reliability
+is not preserved by the Linux network device driver interface:
+
+- With Linux 2.4.x (and above) SMP kernels, packet ordering is not
+  preserved. Even if a device driver calls netif_rx(skb1) and later
+  netif_rx(skb2), skb2 might be delivered to the network layer
+  earlier that skb1.
+- Data passed upstream by means of netif_rx() might be dropped by the
+  kernel if the backlog queue is congested.
+
+The X.25 packet layer protocol will detect this and reset the virtual
+call in question. But many upper layer protocols are not designed to
+handle such N-Reset events gracefully. And frequent N-Reset events
+will always degrade performance.
+
+Thus, driver authors should make netif_rx() as reliable as possible:
+
+SMP re-ordering will not occur if the driver's interrupt handler is
+always executed on the same CPU. Thus,
+
+- Driver authors should use irq affinity for the interrupt handler.
+
+The probability of packet loss due to backlog congestion can be
+reduced by the following measures or a combination thereof:
+
+(1) Drivers for kernel versions 2.4.x and above should always check the
+    return value of netif_rx(). If it returns NET_RX_DROP, the
+    driver's LAPB protocol must not confirm reception of the frame
+    to the peer.
+    This will reliably suppress packet loss. The LAPB protocol will
+    automatically cause the peer to re-transmit the dropped packet
+    later.
+    The lapb module interface was modified to support this. Its
+    data_indication() method should now transparently pass the
+    netif_rx() return value to the (lapb module) caller.
+(2) Drivers for kernel versions 2.2.x should always check the global
+    variable netdev_dropping when a new frame is received. The driver
+    should only call netif_rx() if netdev_dropping is zero. Otherwise
+    the driver should not confirm delivery of the frame and drop it.
+    Alternatively, the driver can queue the frame internally and call
+    netif_rx() later when netif_dropping is 0 again. In that case, delivery
+    confirmation should also be deferred such that the internal queue
+    cannot grow to much.
+    This will not reliably avoid packet loss, but the probability
+    of packet loss in netif_rx() path will be significantly reduced.
+(3) Additionally, driver authors might consider to support
+    CONFIG_NET_HW_FLOWCONTROL. This allows the driver to be woken up
+    when a previously congested backlog queue becomes empty again.
+    The driver could uses this for flow-controlling the peer by means
+    of the LAPB protocol's flow-control service.
diff --git a/Documentation/networking/x25-iface.txt b/Documentation/networking/x25-iface.txt
deleted file mode 100644
index 7f213b556e85..000000000000
--- a/Documentation/networking/x25-iface.txt
+++ /dev/null
@@ -1,123 +0,0 @@
-			X.25 Device Driver Interface 1.1
-
-			   Jonathan Naylor 26.12.96
-
-This is a description of the messages to be passed between the X.25 Packet
-Layer and the X.25 device driver. They are designed to allow for the easy
-setting of the LAPB mode from within the Packet Layer.
-
-The X.25 device driver will be coded normally as per the Linux device driver
-standards. Most X.25 device drivers will be moderately similar to the
-already existing Ethernet device drivers. However unlike those drivers, the
-X.25 device driver has a state associated with it, and this information
-needs to be passed to and from the Packet Layer for proper operation.
-
-All messages are held in sk_buff's just like real data to be transmitted
-over the LAPB link. The first byte of the skbuff indicates the meaning of
-the rest of the skbuff, if any more information does exist.
-
-
-Packet Layer to Device Driver
------------------------------
-
-First Byte = 0x00 (X25_IFACE_DATA)
-
-This indicates that the rest of the skbuff contains data to be transmitted
-over the LAPB link. The LAPB link should already exist before any data is
-passed down.
-
-First Byte = 0x01 (X25_IFACE_CONNECT)
-
-Establish the LAPB link. If the link is already established then the connect
-confirmation message should be returned as soon as possible.
-
-First Byte = 0x02 (X25_IFACE_DISCONNECT)
-
-Terminate the LAPB link. If it is already disconnected then the disconnect
-confirmation message should be returned as soon as possible.
-
-First Byte = 0x03 (X25_IFACE_PARAMS)
-
-LAPB parameters. To be defined.
-
-
-Device Driver to Packet Layer
------------------------------
-
-First Byte = 0x00 (X25_IFACE_DATA)
-
-This indicates that the rest of the skbuff contains data that has been
-received over the LAPB link.
-
-First Byte = 0x01 (X25_IFACE_CONNECT)
-
-LAPB link has been established. The same message is used for both a LAPB
-link connect_confirmation and a connect_indication.
-
-First Byte = 0x02 (X25_IFACE_DISCONNECT)
-
-LAPB link has been terminated. This same message is used for both a LAPB
-link disconnect_confirmation and a disconnect_indication.
-
-First Byte = 0x03 (X25_IFACE_PARAMS)
-
-LAPB parameters. To be defined.
-
-
-
-Possible Problems
-=================
-
-(Henner Eisen, 2000-10-28)
-
-The X.25 packet layer protocol depends on a reliable datalink service.
-The LAPB protocol provides such reliable service. But this reliability
-is not preserved by the Linux network device driver interface:
-
-- With Linux 2.4.x (and above) SMP kernels, packet ordering is not
-  preserved. Even if a device driver calls netif_rx(skb1) and later
-  netif_rx(skb2), skb2 might be delivered to the network layer
-  earlier that skb1.
-- Data passed upstream by means of netif_rx() might be dropped by the
-  kernel if the backlog queue is congested.
-
-The X.25 packet layer protocol will detect this and reset the virtual
-call in question. But many upper layer protocols are not designed to
-handle such N-Reset events gracefully. And frequent N-Reset events
-will always degrade performance.
-
-Thus, driver authors should make netif_rx() as reliable as possible:
-
-SMP re-ordering will not occur if the driver's interrupt handler is
-always executed on the same CPU. Thus,
-
-- Driver authors should use irq affinity for the interrupt handler.
-
-The probability of packet loss due to backlog congestion can be
-reduced by the following measures or a combination thereof:
-
-(1) Drivers for kernel versions 2.4.x and above should always check the
-    return value of netif_rx(). If it returns NET_RX_DROP, the
-    driver's LAPB protocol must not confirm reception of the frame
-    to the peer. 
-    This will reliably suppress packet loss. The LAPB protocol will
-    automatically cause the peer to re-transmit the dropped packet
-    later.
-    The lapb module interface was modified to support this. Its
-    data_indication() method should now transparently pass the
-    netif_rx() return value to the (lapb module) caller.
-(2) Drivers for kernel versions 2.2.x should always check the global
-    variable netdev_dropping when a new frame is received. The driver
-    should only call netif_rx() if netdev_dropping is zero. Otherwise
-    the driver should not confirm delivery of the frame and drop it.
-    Alternatively, the driver can queue the frame internally and call
-    netif_rx() later when netif_dropping is 0 again. In that case, delivery
-    confirmation should also be deferred such that the internal queue
-    cannot grow to much.
-    This will not reliably avoid packet loss, but the probability
-    of packet loss in netif_rx() path will be significantly reduced.
-(3) Additionally, driver authors might consider to support
-    CONFIG_NET_HW_FLOWCONTROL. This allows the driver to be woken up
-    when a previously congested backlog queue becomes empty again.
-    The driver could uses this for flow-controlling the peer by means
-    of the LAPB protocol's flow-control service.
diff --git a/include/uapi/linux/if_x25.h b/include/uapi/linux/if_x25.h
index 5d962448345f..3a5938e38370 100644
--- a/include/uapi/linux/if_x25.h
+++ b/include/uapi/linux/if_x25.h
@@ -18,7 +18,7 @@
 
 #include <linux/types.h>
 
-/* Documentation/networking/x25-iface.txt */
+/* Documentation/networking/x25-iface.rst */
 #define X25_IFACE_DATA		0x00
 #define X25_IFACE_CONNECT	0x01
 #define X25_IFACE_DISCONNECT	0x02
diff --git a/net/x25/Kconfig b/net/x25/Kconfig
index 2ecb2e5e241e..a328f79885d1 100644
--- a/net/x25/Kconfig
+++ b/net/x25/Kconfig
@@ -21,7 +21,7 @@ config X25
 	  <http://docwiki.cisco.com/wiki/X.25>.
 	  Information about X.25 for Linux is contained in the files
 	  <file:Documentation/networking/x25.txt> and
-	  <file:Documentation/networking/x25-iface.txt>.
+	  <file:Documentation/networking/x25-iface.rst>.
 
 	  One connects to an X.25 network either with a dedicated network card
 	  using the X.21 protocol (not yet supported by Linux) or one can do
-- 
cgit v1.2.3-59-g8ed1b


From c4ea03fdfd122b4ff293bff643c2369852e9cc1c Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 1 May 2020 16:44:28 +0200
Subject: docs: networking: convert x25.txt to ReST

Not much to be done here:
- add SPDX header;
- add a document title;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst |  1 +
 Documentation/networking/x25.rst   | 48 ++++++++++++++++++++++++++++++++++++++
 Documentation/networking/x25.txt   | 44 ----------------------------------
 net/x25/Kconfig                    |  2 +-
 4 files changed, 50 insertions(+), 45 deletions(-)
 create mode 100644 Documentation/networking/x25.rst
 delete mode 100644 Documentation/networking/x25.txt

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 7a4bdbc111b0..75521e6c473b 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -116,6 +116,7 @@ Contents:
    vrf
    vxlan
    x25-iface
+   x25
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/x25.rst b/Documentation/networking/x25.rst
new file mode 100644
index 000000000000..00e45d384ba0
--- /dev/null
+++ b/Documentation/networking/x25.rst
@@ -0,0 +1,48 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==================
+Linux X.25 Project
+==================
+
+As my third year dissertation at University I have taken it upon myself to
+write an X.25 implementation for Linux. My aim is to provide a complete X.25
+Packet Layer and a LAPB module to allow for "normal" X.25 to be run using
+Linux. There are two sorts of X.25 cards available, intelligent ones that
+implement LAPB on the card itself, and unintelligent ones that simply do
+framing, bit-stuffing and checksumming. These both need to be handled by the
+system.
+
+I therefore decided to write the implementation such that as far as the
+Packet Layer is concerned, the link layer was being performed by a lower
+layer of the Linux kernel and therefore it did not concern itself with
+implementation of LAPB. Therefore the LAPB modules would be called by
+unintelligent X.25 card drivers and not by intelligent ones, this would
+provide a uniform device driver interface, and simplify configuration.
+
+To confuse matters a little, an 802.2 LLC implementation for Linux is being
+written which will allow X.25 to be run over an Ethernet (or Token Ring) and
+conform with the JNT "Pink Book", this will have a different interface to
+the Packet Layer but there will be no confusion since the class of device
+being served by the LLC will be completely separate from LAPB. The LLC
+implementation is being done as part of another protocol project (SNA) and
+by a different author.
+
+Just when you thought that it could not become more confusing, another
+option appeared, XOT. This allows X.25 Packet Layer frames to operate over
+the Internet using TCP/IP as a reliable link layer. RFC1613 specifies the
+format and behaviour of the protocol. If time permits this option will also
+be actively considered.
+
+A linux-x25 mailing list has been created at vger.kernel.org to support the
+development and use of Linux X.25. It is early days yet, but interested
+parties are welcome to subscribe to it. Just send a message to
+majordomo@vger.kernel.org with the following in the message body:
+
+subscribe linux-x25
+end
+
+The contents of the Subject line are ignored.
+
+Jonathan
+
+g4klx@g4klx.demon.co.uk
diff --git a/Documentation/networking/x25.txt b/Documentation/networking/x25.txt
deleted file mode 100644
index c91c6d7159ff..000000000000
--- a/Documentation/networking/x25.txt
+++ /dev/null
@@ -1,44 +0,0 @@
-Linux X.25 Project
-
-As my third year dissertation at University I have taken it upon myself to
-write an X.25 implementation for Linux. My aim is to provide a complete X.25
-Packet Layer and a LAPB module to allow for "normal" X.25 to be run using
-Linux. There are two sorts of X.25 cards available, intelligent ones that
-implement LAPB on the card itself, and unintelligent ones that simply do
-framing, bit-stuffing and checksumming. These both need to be handled by the
-system.
-
-I therefore decided to write the implementation such that as far as the
-Packet Layer is concerned, the link layer was being performed by a lower
-layer of the Linux kernel and therefore it did not concern itself with
-implementation of LAPB. Therefore the LAPB modules would be called by
-unintelligent X.25 card drivers and not by intelligent ones, this would
-provide a uniform device driver interface, and simplify configuration.
-
-To confuse matters a little, an 802.2 LLC implementation for Linux is being
-written which will allow X.25 to be run over an Ethernet (or Token Ring) and
-conform with the JNT "Pink Book", this will have a different interface to
-the Packet Layer but there will be no confusion since the class of device
-being served by the LLC will be completely separate from LAPB. The LLC
-implementation is being done as part of another protocol project (SNA) and
-by a different author.
-
-Just when you thought that it could not become more confusing, another
-option appeared, XOT. This allows X.25 Packet Layer frames to operate over
-the Internet using TCP/IP as a reliable link layer. RFC1613 specifies the
-format and behaviour of the protocol. If time permits this option will also
-be actively considered.
-
-A linux-x25 mailing list has been created at vger.kernel.org to support the
-development and use of Linux X.25. It is early days yet, but interested
-parties are welcome to subscribe to it. Just send a message to
-majordomo@vger.kernel.org with the following in the message body:
-
-subscribe linux-x25
-end
-
-The contents of the Subject line are ignored.
-
-Jonathan
-
-g4klx@g4klx.demon.co.uk
diff --git a/net/x25/Kconfig b/net/x25/Kconfig
index a328f79885d1..9f0d58b0b90b 100644
--- a/net/x25/Kconfig
+++ b/net/x25/Kconfig
@@ -20,7 +20,7 @@ config X25
 	  You can read more about X.25 at <http://www.sangoma.com/tutorials/x25/> and
 	  <http://docwiki.cisco.com/wiki/X.25>.
 	  Information about X.25 for Linux is contained in the files
-	  <file:Documentation/networking/x25.txt> and
+	  <file:Documentation/networking/x25.rst> and
 	  <file:Documentation/networking/x25-iface.rst>.
 
 	  One connects to an X.25 network either with a dedicated network card
-- 
cgit v1.2.3-59-g8ed1b


From c4a0eb9350183d1d188793c534e4141bcf2ccea8 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 1 May 2020 16:44:29 +0200
Subject: docs: networking: convert xfrm_device.txt to ReST

- add SPDX header;
- mark code blocks and literals as such;
- mark tables as such;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst       |   1 +
 Documentation/networking/xfrm_device.rst | 151 +++++++++++++++++++++++++++++++
 Documentation/networking/xfrm_device.txt | 140 ----------------------------
 3 files changed, 152 insertions(+), 140 deletions(-)
 create mode 100644 Documentation/networking/xfrm_device.rst
 delete mode 100644 Documentation/networking/xfrm_device.txt

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 75521e6c473b..e31f6cb564b4 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -117,6 +117,7 @@ Contents:
    vxlan
    x25-iface
    x25
+   xfrm_device
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/xfrm_device.rst b/Documentation/networking/xfrm_device.rst
new file mode 100644
index 000000000000..da1073acda96
--- /dev/null
+++ b/Documentation/networking/xfrm_device.rst
@@ -0,0 +1,151 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===============================================
+XFRM device - offloading the IPsec computations
+===============================================
+
+Shannon Nelson <shannon.nelson@oracle.com>
+
+
+Overview
+========
+
+IPsec is a useful feature for securing network traffic, but the
+computational cost is high: a 10Gbps link can easily be brought down
+to under 1Gbps, depending on the traffic and link configuration.
+Luckily, there are NICs that offer a hardware based IPsec offload which
+can radically increase throughput and decrease CPU utilization.  The XFRM
+Device interface allows NIC drivers to offer to the stack access to the
+hardware offload.
+
+Userland access to the offload is typically through a system such as
+libreswan or KAME/raccoon, but the iproute2 'ip xfrm' command set can
+be handy when experimenting.  An example command might look something
+like this::
+
+  ip x s add proto esp dst 14.0.0.70 src 14.0.0.52 spi 0x07 mode transport \
+     reqid 0x07 replay-window 32 \
+     aead 'rfc4106(gcm(aes))' 0x44434241343332312423222114131211f4f3f2f1 128 \
+     sel src 14.0.0.52/24 dst 14.0.0.70/24 proto tcp \
+     offload dev eth4 dir in
+
+Yes, that's ugly, but that's what shell scripts and/or libreswan are for.
+
+
+
+Callbacks to implement
+======================
+
+::
+
+  /* from include/linux/netdevice.h */
+  struct xfrmdev_ops {
+	int	(*xdo_dev_state_add) (struct xfrm_state *x);
+	void	(*xdo_dev_state_delete) (struct xfrm_state *x);
+	void	(*xdo_dev_state_free) (struct xfrm_state *x);
+	bool	(*xdo_dev_offload_ok) (struct sk_buff *skb,
+				       struct xfrm_state *x);
+	void    (*xdo_dev_state_advance_esn) (struct xfrm_state *x);
+  };
+
+The NIC driver offering ipsec offload will need to implement these
+callbacks to make the offload available to the network stack's
+XFRM subsytem.  Additionally, the feature bits NETIF_F_HW_ESP and
+NETIF_F_HW_ESP_TX_CSUM will signal the availability of the offload.
+
+
+
+Flow
+====
+
+At probe time and before the call to register_netdev(), the driver should
+set up local data structures and XFRM callbacks, and set the feature bits.
+The XFRM code's listener will finish the setup on NETDEV_REGISTER.
+
+::
+
+		adapter->netdev->xfrmdev_ops = &ixgbe_xfrmdev_ops;
+		adapter->netdev->features |= NETIF_F_HW_ESP;
+		adapter->netdev->hw_enc_features |= NETIF_F_HW_ESP;
+
+When new SAs are set up with a request for "offload" feature, the
+driver's xdo_dev_state_add() will be given the new SA to be offloaded
+and an indication of whether it is for Rx or Tx.  The driver should
+
+	- verify the algorithm is supported for offloads
+	- store the SA information (key, salt, target-ip, protocol, etc)
+	- enable the HW offload of the SA
+	- return status value:
+
+		===========   ===================================
+		0             success
+		-EOPNETSUPP   offload not supported, try SW IPsec
+		other         fail the request
+		===========   ===================================
+
+The driver can also set an offload_handle in the SA, an opaque void pointer
+that can be used to convey context into the fast-path offload requests::
+
+		xs->xso.offload_handle = context;
+
+
+When the network stack is preparing an IPsec packet for an SA that has
+been setup for offload, it first calls into xdo_dev_offload_ok() with
+the skb and the intended offload state to ask the driver if the offload
+will serviceable.  This can check the packet information to be sure the
+offload can be supported (e.g. IPv4 or IPv6, no IPv4 options, etc) and
+return true of false to signify its support.
+
+When ready to send, the driver needs to inspect the Tx packet for the
+offload information, including the opaque context, and set up the packet
+send accordingly::
+
+		xs = xfrm_input_state(skb);
+		context = xs->xso.offload_handle;
+		set up HW for send
+
+The stack has already inserted the appropriate IPsec headers in the
+packet data, the offload just needs to do the encryption and fix up the
+header values.
+
+
+When a packet is received and the HW has indicated that it offloaded a
+decryption, the driver needs to add a reference to the decoded SA into
+the packet's skb.  At this point the data should be decrypted but the
+IPsec headers are still in the packet data; they are removed later up
+the stack in xfrm_input().
+
+	find and hold the SA that was used to the Rx skb::
+
+		get spi, protocol, and destination IP from packet headers
+		xs = find xs from (spi, protocol, dest_IP)
+		xfrm_state_hold(xs);
+
+	store the state information into the skb::
+
+		sp = secpath_set(skb);
+		if (!sp) return;
+		sp->xvec[sp->len++] = xs;
+		sp->olen++;
+
+	indicate the success and/or error status of the offload::
+
+		xo = xfrm_offload(skb);
+		xo->flags = CRYPTO_DONE;
+		xo->status = crypto_status;
+
+	hand the packet to napi_gro_receive() as usual
+
+In ESN mode, xdo_dev_state_advance_esn() is called from xfrm_replay_advance_esn().
+Driver will check packet seq number and update HW ESN state machine if needed.
+
+When the SA is removed by the user, the driver's xdo_dev_state_delete()
+is asked to disable the offload.  Later, xdo_dev_state_free() is called
+from a garbage collection routine after all reference counts to the state
+have been removed and any remaining resources can be cleared for the
+offload state.  How these are used by the driver will depend on specific
+hardware needs.
+
+As a netdev is set to DOWN the XFRM stack's netdev listener will call
+xdo_dev_state_delete() and xdo_dev_state_free() on any remaining offloaded
+states.
diff --git a/Documentation/networking/xfrm_device.txt b/Documentation/networking/xfrm_device.txt
deleted file mode 100644
index a1c904dc70dc..000000000000
--- a/Documentation/networking/xfrm_device.txt
+++ /dev/null
@@ -1,140 +0,0 @@
-
-===============================================
-XFRM device - offloading the IPsec computations
-===============================================
-Shannon Nelson <shannon.nelson@oracle.com>
-
-
-Overview
-========
-
-IPsec is a useful feature for securing network traffic, but the
-computational cost is high: a 10Gbps link can easily be brought down
-to under 1Gbps, depending on the traffic and link configuration.
-Luckily, there are NICs that offer a hardware based IPsec offload which
-can radically increase throughput and decrease CPU utilization.  The XFRM
-Device interface allows NIC drivers to offer to the stack access to the
-hardware offload.
-
-Userland access to the offload is typically through a system such as
-libreswan or KAME/raccoon, but the iproute2 'ip xfrm' command set can
-be handy when experimenting.  An example command might look something
-like this:
-
-  ip x s add proto esp dst 14.0.0.70 src 14.0.0.52 spi 0x07 mode transport \
-     reqid 0x07 replay-window 32 \
-     aead 'rfc4106(gcm(aes))' 0x44434241343332312423222114131211f4f3f2f1 128 \
-     sel src 14.0.0.52/24 dst 14.0.0.70/24 proto tcp \
-     offload dev eth4 dir in
-
-Yes, that's ugly, but that's what shell scripts and/or libreswan are for.
-
-
-
-Callbacks to implement
-======================
-
-/* from include/linux/netdevice.h */
-struct xfrmdev_ops {
-	int	(*xdo_dev_state_add) (struct xfrm_state *x);
-	void	(*xdo_dev_state_delete) (struct xfrm_state *x);
-	void	(*xdo_dev_state_free) (struct xfrm_state *x);
-	bool	(*xdo_dev_offload_ok) (struct sk_buff *skb,
-				       struct xfrm_state *x);
-	void    (*xdo_dev_state_advance_esn) (struct xfrm_state *x);
-};
-
-The NIC driver offering ipsec offload will need to implement these
-callbacks to make the offload available to the network stack's
-XFRM subsytem.  Additionally, the feature bits NETIF_F_HW_ESP and
-NETIF_F_HW_ESP_TX_CSUM will signal the availability of the offload.
-
-
-
-Flow
-====
-
-At probe time and before the call to register_netdev(), the driver should
-set up local data structures and XFRM callbacks, and set the feature bits.
-The XFRM code's listener will finish the setup on NETDEV_REGISTER.
-
-		adapter->netdev->xfrmdev_ops = &ixgbe_xfrmdev_ops;
-		adapter->netdev->features |= NETIF_F_HW_ESP;
-		adapter->netdev->hw_enc_features |= NETIF_F_HW_ESP;
-
-When new SAs are set up with a request for "offload" feature, the
-driver's xdo_dev_state_add() will be given the new SA to be offloaded
-and an indication of whether it is for Rx or Tx.  The driver should
-	- verify the algorithm is supported for offloads
-	- store the SA information (key, salt, target-ip, protocol, etc)
-	- enable the HW offload of the SA
-	- return status value:
-		0             success
-		-EOPNETSUPP   offload not supported, try SW IPsec
-		other         fail the request
-
-The driver can also set an offload_handle in the SA, an opaque void pointer
-that can be used to convey context into the fast-path offload requests.
-
-		xs->xso.offload_handle = context;
-
-
-When the network stack is preparing an IPsec packet for an SA that has
-been setup for offload, it first calls into xdo_dev_offload_ok() with
-the skb and the intended offload state to ask the driver if the offload
-will serviceable.  This can check the packet information to be sure the
-offload can be supported (e.g. IPv4 or IPv6, no IPv4 options, etc) and
-return true of false to signify its support.
-
-When ready to send, the driver needs to inspect the Tx packet for the
-offload information, including the opaque context, and set up the packet
-send accordingly.
-
-		xs = xfrm_input_state(skb);
-		context = xs->xso.offload_handle;
-		set up HW for send
-
-The stack has already inserted the appropriate IPsec headers in the
-packet data, the offload just needs to do the encryption and fix up the
-header values.
-
-
-When a packet is received and the HW has indicated that it offloaded a
-decryption, the driver needs to add a reference to the decoded SA into
-the packet's skb.  At this point the data should be decrypted but the
-IPsec headers are still in the packet data; they are removed later up
-the stack in xfrm_input().
-
-	find and hold the SA that was used to the Rx skb
-		get spi, protocol, and destination IP from packet headers
-		xs = find xs from (spi, protocol, dest_IP)
-		xfrm_state_hold(xs);
-
-	store the state information into the skb
-		sp = secpath_set(skb);
-		if (!sp) return;
-		sp->xvec[sp->len++] = xs;
-		sp->olen++;
-
-	indicate the success and/or error status of the offload
-		xo = xfrm_offload(skb);
-		xo->flags = CRYPTO_DONE;
-		xo->status = crypto_status;
-
-	hand the packet to napi_gro_receive() as usual
-
-In ESN mode, xdo_dev_state_advance_esn() is called from xfrm_replay_advance_esn().
-Driver will check packet seq number and update HW ESN state machine if needed.
-
-When the SA is removed by the user, the driver's xdo_dev_state_delete()
-is asked to disable the offload.  Later, xdo_dev_state_free() is called
-from a garbage collection routine after all reference counts to the state
-have been removed and any remaining resources can be cleared for the
-offload state.  How these are used by the driver will depend on specific
-hardware needs.
-
-As a netdev is set to DOWN the XFRM stack's netdev listener will call
-xdo_dev_state_delete() and xdo_dev_state_free() on any remaining offloaded
-states.
-
-
-- 
cgit v1.2.3-59-g8ed1b


From da62baada5cc94037ef91ed0c414a930a3a06520 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 1 May 2020 16:44:30 +0200
Subject: docs: networking: convert xfrm_proc.txt to ReST

- add SPDX header;
- adjust title markup;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst     |   1 +
 Documentation/networking/xfrm_proc.rst | 113 +++++++++++++++++++++++++++++++++
 Documentation/networking/xfrm_proc.txt |  82 ------------------------
 3 files changed, 114 insertions(+), 82 deletions(-)
 create mode 100644 Documentation/networking/xfrm_proc.rst
 delete mode 100644 Documentation/networking/xfrm_proc.txt

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index e31f6cb564b4..3fe70efb632e 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -118,6 +118,7 @@ Contents:
    x25-iface
    x25
    xfrm_device
+   xfrm_proc
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/xfrm_proc.rst b/Documentation/networking/xfrm_proc.rst
new file mode 100644
index 000000000000..0a771c5a7399
--- /dev/null
+++ b/Documentation/networking/xfrm_proc.rst
@@ -0,0 +1,113 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==================================
+XFRM proc - /proc/net/xfrm_* files
+==================================
+
+Masahide NAKAMURA <nakam@linux-ipv6.org>
+
+
+Transformation Statistics
+-------------------------
+
+The xfrm_proc code is a set of statistics showing numbers of packets
+dropped by the transformation code and why.  These counters are defined
+as part of the linux private MIB.  These counters can be viewed in
+/proc/net/xfrm_stat.
+
+
+Inbound errors
+~~~~~~~~~~~~~~
+
+XfrmInError:
+	All errors which is not matched others
+
+XfrmInBufferError:
+	No buffer is left
+
+XfrmInHdrError:
+	Header error
+
+XfrmInNoStates:
+	No state is found
+	i.e. Either inbound SPI, address, or IPsec protocol at SA is wrong
+
+XfrmInStateProtoError:
+	Transformation protocol specific error
+	e.g. SA key is wrong
+
+XfrmInStateModeError:
+	Transformation mode specific error
+
+XfrmInStateSeqError:
+	Sequence error
+	i.e. Sequence number is out of window
+
+XfrmInStateExpired:
+	State is expired
+
+XfrmInStateMismatch:
+	State has mismatch option
+	e.g. UDP encapsulation type is mismatch
+
+XfrmInStateInvalid:
+	State is invalid
+
+XfrmInTmplMismatch:
+	No matching template for states
+	e.g. Inbound SAs are correct but SP rule is wrong
+
+XfrmInNoPols:
+	No policy is found for states
+	e.g. Inbound SAs are correct but no SP is found
+
+XfrmInPolBlock:
+	Policy discards
+
+XfrmInPolError:
+	Policy error
+
+XfrmAcquireError:
+	State hasn't been fully acquired before use
+
+XfrmFwdHdrError:
+	Forward routing of a packet is not allowed
+
+Outbound errors
+~~~~~~~~~~~~~~~
+XfrmOutError:
+	All errors which is not matched others
+
+XfrmOutBundleGenError:
+	Bundle generation error
+
+XfrmOutBundleCheckError:
+	Bundle check error
+
+XfrmOutNoStates:
+	No state is found
+
+XfrmOutStateProtoError:
+	Transformation protocol specific error
+
+XfrmOutStateModeError:
+	Transformation mode specific error
+
+XfrmOutStateSeqError:
+	Sequence error
+	i.e. Sequence number overflow
+
+XfrmOutStateExpired:
+	State is expired
+
+XfrmOutPolBlock:
+	Policy discards
+
+XfrmOutPolDead:
+	Policy is dead
+
+XfrmOutPolError:
+	Policy error
+
+XfrmOutStateInvalid:
+	State is invalid, perhaps expired
diff --git a/Documentation/networking/xfrm_proc.txt b/Documentation/networking/xfrm_proc.txt
deleted file mode 100644
index 2eae619ab67b..000000000000
--- a/Documentation/networking/xfrm_proc.txt
+++ /dev/null
@@ -1,82 +0,0 @@
-XFRM proc - /proc/net/xfrm_* files
-==================================
-Masahide NAKAMURA <nakam@linux-ipv6.org>
-
-
-Transformation Statistics
--------------------------
-
-The xfrm_proc code is a set of statistics showing numbers of packets
-dropped by the transformation code and why.  These counters are defined
-as part of the linux private MIB.  These counters can be viewed in
-/proc/net/xfrm_stat.
-
-
-Inbound errors
-~~~~~~~~~~~~~~
-XfrmInError:
-	All errors which is not matched others
-XfrmInBufferError:
-	No buffer is left
-XfrmInHdrError:
-	Header error
-XfrmInNoStates:
-	No state is found
-	i.e. Either inbound SPI, address, or IPsec protocol at SA is wrong
-XfrmInStateProtoError:
-	Transformation protocol specific error
-	e.g. SA key is wrong
-XfrmInStateModeError:
-	Transformation mode specific error
-XfrmInStateSeqError:
-	Sequence error
-	i.e. Sequence number is out of window
-XfrmInStateExpired:
-	State is expired
-XfrmInStateMismatch:
-	State has mismatch option
-	e.g. UDP encapsulation type is mismatch
-XfrmInStateInvalid:
-	State is invalid
-XfrmInTmplMismatch:
-	No matching template for states
-	e.g. Inbound SAs are correct but SP rule is wrong
-XfrmInNoPols:
-	No policy is found for states
-	e.g. Inbound SAs are correct but no SP is found
-XfrmInPolBlock:
-	Policy discards
-XfrmInPolError:
-	Policy error
-XfrmAcquireError:
-	State hasn't been fully acquired before use
-XfrmFwdHdrError:
-	Forward routing of a packet is not allowed
-
-Outbound errors
-~~~~~~~~~~~~~~~
-XfrmOutError:
-	All errors which is not matched others
-XfrmOutBundleGenError:
-	Bundle generation error
-XfrmOutBundleCheckError:
-	Bundle check error
-XfrmOutNoStates:
-	No state is found
-XfrmOutStateProtoError:
-	Transformation protocol specific error
-XfrmOutStateModeError:
-	Transformation mode specific error
-XfrmOutStateSeqError:
-	Sequence error
-	i.e. Sequence number overflow
-XfrmOutStateExpired:
-	State is expired
-XfrmOutPolBlock:
-	Policy discards
-XfrmOutPolDead:
-	Policy is dead
-XfrmOutPolError:
-	Policy error
-XfrmOutStateInvalid:
-	State is invalid, perhaps expired
-- 
cgit v1.2.3-59-g8ed1b


From a5cfea33e5e54854fa541deb08b85b782f21bab5 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 1 May 2020 16:44:31 +0200
Subject: docs: networking: convert xfrm_sync.txt to ReST

- add SPDX header;
- add a document title;
- adjust titles and chapters, adding proper markups;
- mark code blocks and literals as such;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst     |   1 +
 Documentation/networking/xfrm_sync.rst | 189 +++++++++++++++++++++++++++++++++
 Documentation/networking/xfrm_sync.txt | 169 -----------------------------
 3 files changed, 190 insertions(+), 169 deletions(-)
 create mode 100644 Documentation/networking/xfrm_sync.rst
 delete mode 100644 Documentation/networking/xfrm_sync.txt

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 3fe70efb632e..ec83bd95e4e9 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -119,6 +119,7 @@ Contents:
    x25
    xfrm_device
    xfrm_proc
+   xfrm_sync
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/xfrm_sync.rst b/Documentation/networking/xfrm_sync.rst
new file mode 100644
index 000000000000..6246503ceab2
--- /dev/null
+++ b/Documentation/networking/xfrm_sync.rst
@@ -0,0 +1,189 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+====
+XFRM
+====
+
+The sync patches work is based on initial patches from
+Krisztian <hidden@balabit.hu> and others and additional patches
+from Jamal <hadi@cyberus.ca>.
+
+The end goal for syncing is to be able to insert attributes + generate
+events so that the SA can be safely moved from one machine to another
+for HA purposes.
+The idea is to synchronize the SA so that the takeover machine can do
+the processing of the SA as accurate as possible if it has access to it.
+
+We already have the ability to generate SA add/del/upd events.
+These patches add ability to sync and have accurate lifetime byte (to
+ensure proper decay of SAs) and replay counters to avoid replay attacks
+with as minimal loss at failover time.
+This way a backup stays as closely up-to-date as an active member.
+
+Because the above items change for every packet the SA receives,
+it is possible for a lot of the events to be generated.
+For this reason, we also add a nagle-like algorithm to restrict
+the events. i.e we are going to set thresholds to say "let me
+know if the replay sequence threshold is reached or 10 secs have passed"
+These thresholds are set system-wide via sysctls or can be updated
+per SA.
+
+The identified items that need to be synchronized are:
+- the lifetime byte counter
+note that: lifetime time limit is not important if you assume the failover
+machine is known ahead of time since the decay of the time countdown
+is not driven by packet arrival.
+- the replay sequence for both inbound and outbound
+
+1) Message Structure
+----------------------
+
+nlmsghdr:aevent_id:optional-TLVs.
+
+The netlink message types are:
+
+XFRM_MSG_NEWAE and XFRM_MSG_GETAE.
+
+A XFRM_MSG_GETAE does not have TLVs.
+
+A XFRM_MSG_NEWAE will have at least two TLVs (as is
+discussed further below).
+
+aevent_id structure looks like::
+
+   struct xfrm_aevent_id {
+	     struct xfrm_usersa_id           sa_id;
+	     xfrm_address_t                  saddr;
+	     __u32                           flags;
+	     __u32                           reqid;
+   };
+
+The unique SA is identified by the combination of xfrm_usersa_id,
+reqid and saddr.
+
+flags are used to indicate different things. The possible
+flags are::
+
+	XFRM_AE_RTHR=1, /* replay threshold*/
+	XFRM_AE_RVAL=2, /* replay value */
+	XFRM_AE_LVAL=4, /* lifetime value */
+	XFRM_AE_ETHR=8, /* expiry timer threshold */
+	XFRM_AE_CR=16, /* Event cause is replay update */
+	XFRM_AE_CE=32, /* Event cause is timer expiry */
+	XFRM_AE_CU=64, /* Event cause is policy update */
+
+How these flags are used is dependent on the direction of the
+message (kernel<->user) as well the cause (config, query or event).
+This is described below in the different messages.
+
+The pid will be set appropriately in netlink to recognize direction
+(0 to the kernel and pid = processid that created the event
+when going from kernel to user space)
+
+A program needs to subscribe to multicast group XFRMNLGRP_AEVENTS
+to get notified of these events.
+
+2) TLVS reflect the different parameters:
+-----------------------------------------
+
+a) byte value (XFRMA_LTIME_VAL)
+
+This TLV carries the running/current counter for byte lifetime since
+last event.
+
+b)replay value (XFRMA_REPLAY_VAL)
+
+This TLV carries the running/current counter for replay sequence since
+last event.
+
+c)replay threshold (XFRMA_REPLAY_THRESH)
+
+This TLV carries the threshold being used by the kernel to trigger events
+when the replay sequence is exceeded.
+
+d) expiry timer (XFRMA_ETIMER_THRESH)
+
+This is a timer value in milliseconds which is used as the nagle
+value to rate limit the events.
+
+3) Default configurations for the parameters:
+---------------------------------------------
+
+By default these events should be turned off unless there is
+at least one listener registered to listen to the multicast
+group XFRMNLGRP_AEVENTS.
+
+Programs installing SAs will need to specify the two thresholds, however,
+in order to not change existing applications such as racoon
+we also provide default threshold values for these different parameters
+in case they are not specified.
+
+the two sysctls/proc entries are:
+
+a) /proc/sys/net/core/sysctl_xfrm_aevent_etime
+used to provide default values for the XFRMA_ETIMER_THRESH in incremental
+units of time of 100ms. The default is 10 (1 second)
+
+b) /proc/sys/net/core/sysctl_xfrm_aevent_rseqth
+used to provide default values for XFRMA_REPLAY_THRESH parameter
+in incremental packet count. The default is two packets.
+
+4) Message types
+----------------
+
+a) XFRM_MSG_GETAE issued by user-->kernel.
+   XFRM_MSG_GETAE does not carry any TLVs.
+
+The response is a XFRM_MSG_NEWAE which is formatted based on what
+XFRM_MSG_GETAE queried for.
+
+The response will always have XFRMA_LTIME_VAL and XFRMA_REPLAY_VAL TLVs.
+* if XFRM_AE_RTHR flag is set, then XFRMA_REPLAY_THRESH is also retrieved
+* if XFRM_AE_ETHR flag is set, then XFRMA_ETIMER_THRESH is also retrieved
+
+b) XFRM_MSG_NEWAE is issued by either user space to configure
+   or kernel to announce events or respond to a XFRM_MSG_GETAE.
+
+i) user --> kernel to configure a specific SA.
+
+any of the values or threshold parameters can be updated by passing the
+appropriate TLV.
+
+A response is issued back to the sender in user space to indicate success
+or failure.
+
+In the case of success, additionally an event with
+XFRM_MSG_NEWAE is also issued to any listeners as described in iii).
+
+ii) kernel->user direction as a response to XFRM_MSG_GETAE
+
+The response will always have XFRMA_LTIME_VAL and XFRMA_REPLAY_VAL TLVs.
+
+The threshold TLVs will be included if explicitly requested in
+the XFRM_MSG_GETAE message.
+
+iii) kernel->user to report as event if someone sets any values or
+     thresholds for an SA using XFRM_MSG_NEWAE (as described in #i above).
+     In such a case XFRM_AE_CU flag is set to inform the user that
+     the change happened as a result of an update.
+     The message will always have XFRMA_LTIME_VAL and XFRMA_REPLAY_VAL TLVs.
+
+iv) kernel->user to report event when replay threshold or a timeout
+    is exceeded.
+
+In such a case either XFRM_AE_CR (replay exceeded) or XFRM_AE_CE (timeout
+happened) is set to inform the user what happened.
+Note the two flags are mutually exclusive.
+The message will always have XFRMA_LTIME_VAL and XFRMA_REPLAY_VAL TLVs.
+
+Exceptions to threshold settings
+--------------------------------
+
+If you have an SA that is getting hit by traffic in bursts such that
+there is a period where the timer threshold expires with no packets
+seen, then an odd behavior is seen as follows:
+The first packet arrival after a timer expiry will trigger a timeout
+event; i.e we don't wait for a timeout period or a packet threshold
+to be reached. This is done for simplicity and efficiency reasons.
+
+-JHS
diff --git a/Documentation/networking/xfrm_sync.txt b/Documentation/networking/xfrm_sync.txt
deleted file mode 100644
index 8d88e0f2ec49..000000000000
--- a/Documentation/networking/xfrm_sync.txt
+++ /dev/null
@@ -1,169 +0,0 @@
-
-The sync patches work is based on initial patches from
-Krisztian <hidden@balabit.hu> and others and additional patches
-from Jamal <hadi@cyberus.ca>.
-
-The end goal for syncing is to be able to insert attributes + generate
-events so that the SA can be safely moved from one machine to another
-for HA purposes.
-The idea is to synchronize the SA so that the takeover machine can do
-the processing of the SA as accurate as possible if it has access to it.
-
-We already have the ability to generate SA add/del/upd events.
-These patches add ability to sync and have accurate lifetime byte (to
-ensure proper decay of SAs) and replay counters to avoid replay attacks
-with as minimal loss at failover time.
-This way a backup stays as closely up-to-date as an active member.
-
-Because the above items change for every packet the SA receives,
-it is possible for a lot of the events to be generated.
-For this reason, we also add a nagle-like algorithm to restrict
-the events. i.e we are going to set thresholds to say "let me
-know if the replay sequence threshold is reached or 10 secs have passed"
-These thresholds are set system-wide via sysctls or can be updated
-per SA.
-
-The identified items that need to be synchronized are:
-- the lifetime byte counter
-note that: lifetime time limit is not important if you assume the failover
-machine is known ahead of time since the decay of the time countdown
-is not driven by packet arrival.
-- the replay sequence for both inbound and outbound
-
-1) Message Structure
-----------------------
-
-nlmsghdr:aevent_id:optional-TLVs.
-
-The netlink message types are:
-
-XFRM_MSG_NEWAE and XFRM_MSG_GETAE.
-
-A XFRM_MSG_GETAE does not have TLVs.
-A XFRM_MSG_NEWAE will have at least two TLVs (as is
-discussed further below).
-
-aevent_id structure looks like:
-
-   struct xfrm_aevent_id {
-             struct xfrm_usersa_id           sa_id;
-             xfrm_address_t                  saddr;
-             __u32                           flags;
-             __u32                           reqid;
-   };
-
-The unique SA is identified by the combination of xfrm_usersa_id,
-reqid and saddr.
-
-flags are used to indicate different things. The possible
-flags are:
-        XFRM_AE_RTHR=1, /* replay threshold*/
-        XFRM_AE_RVAL=2, /* replay value */
-        XFRM_AE_LVAL=4, /* lifetime value */
-        XFRM_AE_ETHR=8, /* expiry timer threshold */
-        XFRM_AE_CR=16, /* Event cause is replay update */
-        XFRM_AE_CE=32, /* Event cause is timer expiry */
-        XFRM_AE_CU=64, /* Event cause is policy update */
-
-How these flags are used is dependent on the direction of the
-message (kernel<->user) as well the cause (config, query or event).
-This is described below in the different messages.
-
-The pid will be set appropriately in netlink to recognize direction
-(0 to the kernel and pid = processid that created the event
-when going from kernel to user space)
-
-A program needs to subscribe to multicast group XFRMNLGRP_AEVENTS
-to get notified of these events.
-
-2) TLVS reflect the different parameters:
------------------------------------------
-
-a) byte value (XFRMA_LTIME_VAL)
-This TLV carries the running/current counter for byte lifetime since
-last event.
-
-b)replay value (XFRMA_REPLAY_VAL)
-This TLV carries the running/current counter for replay sequence since
-last event.
-
-c)replay threshold (XFRMA_REPLAY_THRESH)
-This TLV carries the threshold being used by the kernel to trigger events
-when the replay sequence is exceeded.
-
-d) expiry timer (XFRMA_ETIMER_THRESH)
-This is a timer value in milliseconds which is used as the nagle
-value to rate limit the events.
-
-3) Default configurations for the parameters:
-----------------------------------------------
-
-By default these events should be turned off unless there is
-at least one listener registered to listen to the multicast
-group XFRMNLGRP_AEVENTS.
-
-Programs installing SAs will need to specify the two thresholds, however,
-in order to not change existing applications such as racoon
-we also provide default threshold values for these different parameters
-in case they are not specified.
-
-the two sysctls/proc entries are:
-a) /proc/sys/net/core/sysctl_xfrm_aevent_etime
-used to provide default values for the XFRMA_ETIMER_THRESH in incremental
-units of time of 100ms. The default is 10 (1 second)
-
-b) /proc/sys/net/core/sysctl_xfrm_aevent_rseqth
-used to provide default values for XFRMA_REPLAY_THRESH parameter
-in incremental packet count. The default is two packets.
-
-4) Message types
-----------------
-
-a) XFRM_MSG_GETAE issued by user-->kernel.
-XFRM_MSG_GETAE does not carry any TLVs.
-The response is a XFRM_MSG_NEWAE which is formatted based on what
-XFRM_MSG_GETAE queried for.
-The response will always have XFRMA_LTIME_VAL and XFRMA_REPLAY_VAL TLVs.
-*if XFRM_AE_RTHR flag is set, then XFRMA_REPLAY_THRESH is also retrieved
-*if XFRM_AE_ETHR flag is set, then XFRMA_ETIMER_THRESH is also retrieved
-
-b) XFRM_MSG_NEWAE is issued by either user space to configure
-or kernel to announce events or respond to a XFRM_MSG_GETAE.
-
-i) user --> kernel to configure a specific SA.
-any of the values or threshold parameters can be updated by passing the
-appropriate TLV.
-A response is issued back to the sender in user space to indicate success
-or failure.
-In the case of success, additionally an event with
-XFRM_MSG_NEWAE is also issued to any listeners as described in iii).
-
-ii) kernel->user direction as a response to XFRM_MSG_GETAE
-The response will always have XFRMA_LTIME_VAL and XFRMA_REPLAY_VAL TLVs.
-The threshold TLVs will be included if explicitly requested in
-the XFRM_MSG_GETAE message.
-
-iii) kernel->user to report as event if someone sets any values or
-thresholds for an SA using XFRM_MSG_NEWAE (as described in #i above).
-In such a case XFRM_AE_CU flag is set to inform the user that
-the change happened as a result of an update.
-The message will always have XFRMA_LTIME_VAL and XFRMA_REPLAY_VAL TLVs.
-
-iv) kernel->user to report event when replay threshold or a timeout
-is exceeded.
-In such a case either XFRM_AE_CR (replay exceeded) or XFRM_AE_CE (timeout
-happened) is set to inform the user what happened.
-Note the two flags are mutually exclusive.
-The message will always have XFRMA_LTIME_VAL and XFRMA_REPLAY_VAL TLVs.
-
-Exceptions to threshold settings
---------------------------------
-
-If you have an SA that is getting hit by traffic in bursts such that
-there is a period where the timer threshold expires with no packets
-seen, then an odd behavior is seen as follows:
-The first packet arrival after a timer expiry will trigger a timeout
-event; i.e we don't wait for a timeout period or a packet threshold
-to be reached. This is done for simplicity and efficiency reasons.
-
--JHS
-- 
cgit v1.2.3-59-g8ed1b


From a6c34b476ca27d0e5c14e58aefdbbdc4c509dd5f Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 1 May 2020 16:44:32 +0200
Subject: docs: networking: convert xfrm_sysctl.txt to ReST

Not much to be done here:

- add SPDX header;
- add a document title;
- add a chapter's markup;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst       |  1 +
 Documentation/networking/xfrm_sysctl.rst | 11 +++++++++++
 Documentation/networking/xfrm_sysctl.txt |  4 ----
 3 files changed, 12 insertions(+), 4 deletions(-)
 create mode 100644 Documentation/networking/xfrm_sysctl.rst
 delete mode 100644 Documentation/networking/xfrm_sysctl.txt

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index ec83bd95e4e9..1630801cec19 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -120,6 +120,7 @@ Contents:
    xfrm_device
    xfrm_proc
    xfrm_sync
+   xfrm_sysctl
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/xfrm_sysctl.rst b/Documentation/networking/xfrm_sysctl.rst
new file mode 100644
index 000000000000..47b9bbdd0179
--- /dev/null
+++ b/Documentation/networking/xfrm_sysctl.rst
@@ -0,0 +1,11 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============
+XFRM Syscall
+============
+
+/proc/sys/net/core/xfrm_* Variables:
+====================================
+
+xfrm_acq_expires - INTEGER
+	default 30 - hard timeout in seconds for acquire requests
diff --git a/Documentation/networking/xfrm_sysctl.txt b/Documentation/networking/xfrm_sysctl.txt
deleted file mode 100644
index 5bbd16792fe1..000000000000
--- a/Documentation/networking/xfrm_sysctl.txt
+++ /dev/null
@@ -1,4 +0,0 @@
-/proc/sys/net/core/xfrm_* Variables:
-
-xfrm_acq_expires - INTEGER
-	default 30 - hard timeout in seconds for acquire requests
-- 
cgit v1.2.3-59-g8ed1b


From 0046db09d539523ef1470bcad2f2614cc3ef7ddf Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 1 May 2020 16:44:33 +0200
Subject: docs: networking: convert z8530drv.txt to ReST

- add SPDX header;
- use copyright symbol;
- adjust titles and chapters, adding proper markups;
- mark tables as such;
- mark code blocks and literals as such;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst    |   1 +
 Documentation/networking/z8530drv.rst | 686 ++++++++++++++++++++++++++++++++++
 Documentation/networking/z8530drv.txt | 657 --------------------------------
 MAINTAINERS                           |   2 +-
 drivers/net/hamradio/Kconfig          |   4 +-
 drivers/net/hamradio/scc.c            |   2 +-
 6 files changed, 691 insertions(+), 661 deletions(-)
 create mode 100644 Documentation/networking/z8530drv.rst
 delete mode 100644 Documentation/networking/z8530drv.txt

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index 1630801cec19..f5733ca4fbcb 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -121,6 +121,7 @@ Contents:
    xfrm_proc
    xfrm_sync
    xfrm_sysctl
+   z8530drv
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/z8530drv.rst b/Documentation/networking/z8530drv.rst
new file mode 100644
index 000000000000..d2942760f167
--- /dev/null
+++ b/Documentation/networking/z8530drv.rst
@@ -0,0 +1,686 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+=========================================================
+SCC.C - Linux driver for Z8530 based HDLC cards for AX.25
+=========================================================
+
+
+This is a subset of the documentation. To use this driver you MUST have the
+full package from:
+
+Internet:
+
+    1. ftp://ftp.ccac.rwth-aachen.de/pub/jr/z8530drv-utils_3.0-3.tar.gz
+
+    2. ftp://ftp.pspt.fi/pub/ham/linux/ax25/z8530drv-utils_3.0-3.tar.gz
+
+Please note that the information in this document may be hopelessly outdated.
+A new version of the documentation, along with links to other important
+Linux Kernel AX.25 documentation and programs, is available on
+http://yaina.de/jreuter
+
+Copyright |copy| 1993,2000 by Joerg Reuter DL1BKE <jreuter@yaina.de>
+
+portions Copyright |copy| 1993 Guido ten Dolle PE1NNZ
+
+for the complete copyright notice see >> Copying.Z8530DRV <<
+
+1. Initialization of the driver
+===============================
+
+To use the driver, 3 steps must be performed:
+
+     1. if compiled as module: loading the module
+     2. Setup of hardware, MODEM and KISS parameters with sccinit
+     3. Attach each channel to the Linux kernel AX.25 with "ifconfig"
+
+Unlike the versions below 2.4 this driver is a real network device
+driver. If you want to run xNOS instead of our fine kernel AX.25
+use a 2.x version (available from above sites) or read the
+AX.25-HOWTO on how to emulate a KISS TNC on network device drivers.
+
+
+1.1 Loading the module
+======================
+
+(If you're going to compile the driver as a part of the kernel image,
+ skip this chapter and continue with 1.2)
+
+Before you can use a module, you'll have to load it with::
+
+	insmod scc.o
+
+please read 'man insmod' that comes with module-init-tools.
+
+You should include the insmod in one of the /etc/rc.d/rc.* files,
+and don't forget to insert a call of sccinit after that. It
+will read your /etc/z8530drv.conf.
+
+1.2. /etc/z8530drv.conf
+=======================
+
+To setup all parameters you must run /sbin/sccinit from one
+of your rc.*-files. This has to be done BEFORE you can
+"ifconfig" an interface. Sccinit reads the file /etc/z8530drv.conf
+and sets the hardware, MODEM and KISS parameters. A sample file is
+delivered with this package. Change it to your needs.
+
+The file itself consists of two main sections.
+
+1.2.1 configuration of hardware parameters
+==========================================
+
+The hardware setup section defines the following parameters for each
+Z8530::
+
+    chip    1
+    data_a  0x300                   # data port A
+    ctrl_a  0x304                   # control port A
+    data_b  0x301                   # data port B
+    ctrl_b  0x305                   # control port B
+    irq     5                       # IRQ No. 5
+    pclock  4915200                 # clock
+    board   BAYCOM                  # hardware type
+    escc    no                      # enhanced SCC chip? (8580/85180/85280)
+    vector  0                       # latch for interrupt vector
+    special no                      # address of special function register
+    option  0                       # option to set via sfr
+
+
+chip
+	- this is just a delimiter to make sccinit a bit simpler to
+	  program. A parameter has no effect.
+
+data_a
+	- the address of the data port A of this Z8530 (needed)
+ctrl_a
+	- the address of the control port A (needed)
+data_b
+	- the address of the data port B (needed)
+ctrl_b
+	- the address of the control port B (needed)
+
+irq
+	- the used IRQ for this chip. Different chips can use different
+	  IRQs or the same. If they share an interrupt, it needs to be
+	  specified within one chip-definition only.
+
+pclock  - the clock at the PCLK pin of the Z8530 (option, 4915200 is
+	  default), measured in Hertz
+
+board
+	- the "type" of the board:
+
+	   =======================  ========
+	   SCC type                 value
+	   =======================  ========
+	   PA0HZP SCC card          PA0HZP
+	   EAGLE card               EAGLE
+	   PC100 card               PC100
+	   PRIMUS-PC (DG9BL) card   PRIMUS
+	   BayCom (U)SCC card       BAYCOM
+	   =======================  ========
+
+escc
+	- if you want support for ESCC chips (8580, 85180, 85280), set
+	  this to "yes" (option, defaults to "no")
+
+vector
+	- address of the vector latch (aka "intack port") for PA0HZP
+	  cards. There can be only one vector latch for all chips!
+	  (option, defaults to 0)
+
+special
+	- address of the special function register on several cards.
+	  (option, defaults to 0)
+
+option  - The value you write into that register (option, default is 0)
+
+You can specify up to four chips (8 channels). If this is not enough,
+just change::
+
+	#define MAXSCC 4
+
+to a higher value.
+
+Example for the BAYCOM USCC:
+----------------------------
+
+::
+
+	chip    1
+	data_a  0x300                   # data port A
+	ctrl_a  0x304                   # control port A
+	data_b  0x301                   # data port B
+	ctrl_b  0x305                   # control port B
+	irq     5                       # IRQ No. 5 (#)
+	board   BAYCOM                  # hardware type (*)
+	#
+	# SCC chip 2
+	#
+	chip    2
+	data_a  0x302
+	ctrl_a  0x306
+	data_b  0x303
+	ctrl_b  0x307
+	board   BAYCOM
+
+An example for a PA0HZP card:
+-----------------------------
+
+::
+
+	chip 1
+	data_a 0x153
+	data_b 0x151
+	ctrl_a 0x152
+	ctrl_b 0x150
+	irq 9
+	pclock 4915200
+	board PA0HZP
+	vector 0x168
+	escc no
+	#
+	#
+	#
+	chip 2
+	data_a 0x157
+	data_b 0x155
+	ctrl_a 0x156
+	ctrl_b 0x154
+	irq 9
+	pclock 4915200
+	board PA0HZP
+	vector 0x168
+	escc no
+
+A DRSI would should probably work with this:
+--------------------------------------------
+(actually: two DRSI cards...)
+
+::
+
+	chip 1
+	data_a 0x303
+	data_b 0x301
+	ctrl_a 0x302
+	ctrl_b 0x300
+	irq 7
+	pclock 4915200
+	board DRSI
+	escc no
+	#
+	#
+	#
+	chip 2
+	data_a 0x313
+	data_b 0x311
+	ctrl_a 0x312
+	ctrl_b 0x310
+	irq 7
+	pclock 4915200
+	board DRSI
+	escc no
+
+Note that you cannot use the on-board baudrate generator off DRSI
+cards. Use "mode dpll" for clock source (see below).
+
+This is based on information provided by Mike Bilow (and verified
+by Paul Helay)
+
+The utility "gencfg"
+--------------------
+
+If you only know the parameters for the PE1CHL driver for DOS,
+run gencfg. It will generate the correct port addresses (I hope).
+Its parameters are exactly the same as the ones you use with
+the "attach scc" command in net, except that the string "init" must
+not appear. Example::
+
+	gencfg 2 0x150 4 2 0 1 0x168 9 4915200
+
+will print a skeleton z8530drv.conf for the OptoSCC to stdout.
+
+::
+
+	gencfg 2 0x300 2 4 5 -4 0 7 4915200 0x10
+
+does the same for the BAYCOM USCC card. In my opinion it is much easier
+to edit scc_config.h...
+
+
+1.2.2 channel configuration
+===========================
+
+The channel definition is divided into three sub sections for each
+channel:
+
+An example for scc0::
+
+	# DEVICE
+
+	device scc0	# the device for the following params
+
+	# MODEM / BUFFERS
+
+	speed 1200		# the default baudrate
+	clock dpll		# clock source:
+				# 	dpll     = normal half duplex operation
+				# 	external = MODEM provides own Rx/Tx clock
+				#	divider  = use full duplex divider if
+				#		   installed (1)
+	mode nrzi		# HDLC encoding mode
+				#	nrzi = 1k2 MODEM, G3RUH 9k6 MODEM
+				#	nrz  = DF9IC 9k6 MODEM
+				#
+	bufsize	384		# size of buffers. Note that this must include
+				# the AX.25 header, not only the data field!
+				# (optional, defaults to 384)
+
+	# KISS (Layer 1)
+
+	txdelay 36              # (see chapter 1.4)
+	persist 64
+	slot    8
+	tail    8
+	fulldup 0
+	wait    12
+	min     3
+	maxkey  7
+	idle    3
+	maxdef  120
+	group   0
+	txoff   off
+	softdcd on
+	slip    off
+
+The order WITHIN these sections is unimportant. The order OF these
+sections IS important. The MODEM parameters are set with the first
+recognized KISS parameter...
+
+Please note that you can initialize the board only once after boot
+(or insmod). You can change all parameters but "mode" and "clock"
+later with the Sccparam program or through KISS. Just to avoid
+security holes...
+
+(1) this divider is usually mounted on the SCC-PBC (PA0HZP) or not
+    present at all (BayCom). It feeds back the output of the DPLL
+    (digital pll) as transmit clock. Using this mode without a divider
+    installed will normally result in keying the transceiver until
+    maxkey expires --- of course without sending anything (useful).
+
+2. Attachment of a channel by your AX.25 software
+=================================================
+
+2.1 Kernel AX.25
+================
+
+To set up an AX.25 device you can simply type::
+
+	ifconfig scc0 44.128.1.1 hw ax25 dl0tha-7
+
+This will create a network interface with the IP number 44.128.20.107
+and the callsign "dl0tha". If you do not have any IP number (yet) you
+can use any of the 44.128.0.0 network. Note that you do not need
+axattach. The purpose of axattach (like slattach) is to create a KISS
+network device linked to a TTY. Please read the documentation of the
+ax25-utils and the AX.25-HOWTO to learn how to set the parameters of
+the kernel AX.25.
+
+2.2 NOS, NET and TFKISS
+=======================
+
+Since the TTY driver (aka KISS TNC emulation) is gone you need
+to emulate the old behaviour. The cost of using these programs is
+that you probably need to compile the kernel AX.25, regardless of whether
+you actually use it or not. First setup your /etc/ax25/axports,
+for example::
+
+	9k6	dl0tha-9  9600  255 4 9600 baud port (scc3)
+	axlink	dl0tha-15 38400 255 4 Link to NOS
+
+Now "ifconfig" the scc device::
+
+	ifconfig scc3 44.128.1.1 hw ax25 dl0tha-9
+
+You can now axattach a pseudo-TTY::
+
+	axattach /dev/ptys0 axlink
+
+and start your NOS and attach /dev/ptys0 there. The problem is that
+NOS is reachable only via digipeating through the kernel AX.25
+(disastrous on a DAMA controlled channel). To solve this problem,
+configure "rxecho" to echo the incoming frames from "9k6" to "axlink"
+and outgoing frames from "axlink" to "9k6" and start::
+
+	rxecho
+
+Or simply use "kissbridge" coming with z8530drv-utils::
+
+	ifconfig scc3 hw ax25 dl0tha-9
+	kissbridge scc3 /dev/ptys0
+
+
+3. Adjustment and Display of parameters
+=======================================
+
+3.1 Displaying SCC Parameters:
+==============================
+
+Once a SCC channel has been attached, the parameter settings and
+some statistic information can be shown using the param program::
+
+	dl1bke-u:~$ sccstat scc0
+
+	Parameters:
+
+	speed       : 1200 baud
+	txdelay     : 36
+	persist     : 255
+	slottime    : 0
+	txtail      : 8
+	fulldup     : 1
+	waittime    : 12
+	mintime     : 3 sec
+	maxkeyup    : 7 sec
+	idletime    : 3 sec
+	maxdefer    : 120 sec
+	group       : 0x00
+	txoff       : off
+	softdcd     : on
+	SLIP        : off
+
+	Status:
+
+	HDLC                  Z8530           Interrupts         Buffers
+	-----------------------------------------------------------------------
+	Sent       :     273  RxOver :     0  RxInts :   125074  Size    :  384
+	Received   :    1095  TxUnder:     0  TxInts :     4684  NoSpace :    0
+	RxErrors   :    1591                  ExInts :    11776
+	TxErrors   :       0                  SpInts :     1503
+	Tx State   :    idle
+
+
+The status info shown is:
+
+==============	==============================================================
+Sent		number of frames transmitted
+Received	number of frames received
+RxErrors	number of receive errors (CRC, ABORT)
+TxErrors	number of discarded Tx frames (due to various reasons)
+Tx State	status of the Tx interrupt handler: idle/busy/active/tail (2)
+RxOver		number of receiver overruns
+TxUnder		number of transmitter underruns
+RxInts		number of receiver interrupts
+TxInts		number of transmitter interrupts
+EpInts		number of receiver special condition interrupts
+SpInts		number of external/status interrupts
+Size		maximum size of an AX.25 frame (*with* AX.25 headers!)
+NoSpace		number of times a buffer could not get allocated
+==============	==============================================================
+
+An overrun is abnormal. If lots of these occur, the product of
+baudrate and number of interfaces is too high for the processing
+power of your computer. NoSpace errors are unlikely to be caused by the
+driver or the kernel AX.25.
+
+
+3.2 Setting Parameters
+======================
+
+
+The setting of parameters of the emulated KISS TNC is done in the
+same way in the SCC driver. You can change parameters by using
+the kissparms program from the ax25-utils package or use the program
+"sccparam"::
+
+     sccparam <device> <paramname> <decimal-|hexadecimal value>
+
+You can change the following parameters:
+
+===========   =====
+param	      value
+===========   =====
+speed         1200
+txdelay       36
+persist       255
+slottime      0
+txtail        8
+fulldup       1
+waittime      12
+mintime       3
+maxkeyup      7
+idletime      3
+maxdefer      120
+group         0x00
+txoff         off
+softdcd       on
+SLIP          off
+===========   =====
+
+
+The parameters have the following meaning:
+
+speed:
+     The baudrate on this channel in bits/sec
+
+     Example: sccparam /dev/scc3 speed 9600
+
+txdelay:
+     The delay (in units of 10 ms) after keying of the
+     transmitter, until the first byte is sent. This is usually
+     called "TXDELAY" in a TNC.  When 0 is specified, the driver
+     will just wait until the CTS signal is asserted. This
+     assumes the presence of a timer or other circuitry in the
+     MODEM and/or transmitter, that asserts CTS when the
+     transmitter is ready for data.
+     A normal value of this parameter is 30-36.
+
+     Example: sccparam /dev/scc0 txd 20
+
+persist:
+     This is the probability that the transmitter will be keyed
+     when the channel is found to be free.  It is a value from 0
+     to 255, and the probability is (value+1)/256.  The value
+     should be somewhere near 50-60, and should be lowered when
+     the channel is used more heavily.
+
+     Example: sccparam /dev/scc2 persist 20
+
+slottime:
+     This is the time between samples of the channel. It is
+     expressed in units of 10 ms.  About 200-300 ms (value 20-30)
+     seems to be a good value.
+
+     Example: sccparam /dev/scc0 slot 20
+
+tail:
+     The time the transmitter will remain keyed after the last
+     byte of a packet has been transferred to the SCC. This is
+     necessary because the CRC and a flag still have to leave the
+     SCC before the transmitter is keyed down. The value depends
+     on the baudrate selected.  A few character times should be
+     sufficient, e.g. 40ms at 1200 baud. (value 4)
+     The value of this parameter is in 10 ms units.
+
+     Example: sccparam /dev/scc2 4
+
+full:
+     The full-duplex mode switch. This can be one of the following
+     values:
+
+     0:   The interface will operate in CSMA mode (the normal
+	  half-duplex packet radio operation)
+     1:   Fullduplex mode, i.e. the transmitter will be keyed at
+	  any time, without checking the received carrier.  It
+	  will be unkeyed when there are no packets to be sent.
+     2:   Like 1, but the transmitter will remain keyed, also
+	  when there are no packets to be sent.  Flags will be
+	  sent in that case, until a timeout (parameter 10)
+	  occurs.
+
+     Example: sccparam /dev/scc0 fulldup off
+
+wait:
+     The initial waittime before any transmit attempt, after the
+     frame has been queue for transmit.  This is the length of
+     the first slot in CSMA mode.  In full duplex modes it is
+     set to 0 for maximum performance.
+     The value of this parameter is in 10 ms units.
+
+     Example: sccparam /dev/scc1 wait 4
+
+maxkey:
+     The maximal time the transmitter will be keyed to send
+     packets, in seconds.  This can be useful on busy CSMA
+     channels, to avoid "getting a bad reputation" when you are
+     generating a lot of traffic.  After the specified time has
+     elapsed, no new frame will be started. Instead, the trans-
+     mitter will be switched off for a specified time (parameter
+     min), and then the selected algorithm for keyup will be
+     started again.
+     The value 0 as well as "off" will disable this feature,
+     and allow infinite transmission time.
+
+     Example: sccparam /dev/scc0 maxk 20
+
+min:
+     This is the time the transmitter will be switched off when
+     the maximum transmission time is exceeded.
+
+     Example: sccparam /dev/scc3 min 10
+
+idle:
+     This parameter specifies the maximum idle time in full duplex
+     2 mode, in seconds.  When no frames have been sent for this
+     time, the transmitter will be keyed down.  A value of 0 is
+     has same result as the fullduplex mode 1. This parameter
+     can be disabled.
+
+     Example: sccparam /dev/scc2 idle off	# transmit forever
+
+maxdefer
+     This is the maximum time (in seconds) to wait for a free channel
+     to send. When this timer expires the transmitter will be keyed
+     IMMEDIATELY. If you love to get trouble with other users you
+     should set this to a very low value ;-)
+
+     Example: sccparam /dev/scc0 maxdefer 240	# 2 minutes
+
+
+txoff:
+     When this parameter has the value 0, the transmission of packets
+     is enable. Otherwise it is disabled.
+
+     Example: sccparam /dev/scc2 txoff on
+
+group:
+     It is possible to build special radio equipment to use more than
+     one frequency on the same band, e.g. using several receivers and
+     only one transmitter that can be switched between frequencies.
+     Also, you can connect several radios that are active on the same
+     band.  In these cases, it is not possible, or not a good idea, to
+     transmit on more than one frequency.  The SCC driver provides a
+     method to lock transmitters on different interfaces, using the
+     "param <interface> group <x>" command.  This will only work when
+     you are using CSMA mode (parameter full = 0).
+
+     The number <x> must be 0 if you want no group restrictions, and
+     can be computed as follows to create restricted groups:
+     <x> is the sum of some OCTAL numbers:
+
+
+     ===  =======================================================
+     200  This transmitter will only be keyed when all other
+	  transmitters in the group are off.
+     100  This transmitter will only be keyed when the carrier
+	  detect of all other interfaces in the group is off.
+     0xx  A byte that can be used to define different groups.
+	  Interfaces are in the same group, when the logical AND
+	  between their xx values is nonzero.
+     ===  =======================================================
+
+     Examples:
+
+     When 2 interfaces use group 201, their transmitters will never be
+     keyed at the same time.
+
+     When 2 interfaces use group 101, the transmitters will only key
+     when both channels are clear at the same time.  When group 301,
+     the transmitters will not be keyed at the same time.
+
+     Don't forget to convert the octal numbers into decimal before
+     you set the parameter.
+
+     Example: (to be written)
+
+softdcd:
+     use a software dcd instead of the real one... Useful for a very
+     slow squelch.
+
+     Example: sccparam /dev/scc0 soft on
+
+
+4. Problems
+===========
+
+If you have tx-problems with your BayCom USCC card please check
+the manufacturer of the 8530. SGS chips have a slightly
+different timing. Try Zilog...  A solution is to write to register 8
+instead to the data port, but this won't work with the ESCC chips.
+*SIGH!*
+
+A very common problem is that the PTT locks until the maxkeyup timer
+expires, although interrupts and clock source are correct. In most
+cases compiling the driver with CONFIG_SCC_DELAY (set with
+make config) solves the problems. For more hints read the (pseudo) FAQ
+and the documentation coming with z8530drv-utils.
+
+I got reports that the driver has problems on some 386-based systems.
+(i.e. Amstrad) Those systems have a bogus AT bus timing which will
+lead to delayed answers on interrupts. You can recognize these
+problems by looking at the output of Sccstat for the suspected
+port. If it shows under- and overruns you own such a system.
+
+Delayed processing of received data: This depends on
+
+- the kernel version
+
+- kernel profiling compiled or not
+
+- a high interrupt load
+
+- a high load of the machine --- running X, Xmorph, XV and Povray,
+  while compiling the kernel... hmm ... even with 32 MB RAM ...  ;-)
+  Or running a named for the whole .ampr.org domain on an 8 MB
+  box...
+
+- using information from rxecho or kissbridge.
+
+Kernel panics: please read /linux/README and find out if it
+really occurred within the scc driver.
+
+If you cannot solve a problem, send me
+
+- a description of the problem,
+- information on your hardware (computer system, scc board, modem)
+- your kernel version
+- the output of cat /proc/net/z8530
+
+4. Thor RLC100
+==============
+
+Mysteriously this board seems not to work with the driver. Anyone
+got it up-and-running?
+
+
+Many thanks to Linus Torvalds and Alan Cox for including the driver
+in the Linux standard distribution and their support.
+
+::
+
+	Joerg Reuter	ampr-net: dl1bke@db0pra.ampr.org
+			AX-25   : DL1BKE @ DB0ABH.#BAY.DEU.EU
+			Internet: jreuter@yaina.de
+			WWW     : http://yaina.de/jreuter
diff --git a/Documentation/networking/z8530drv.txt b/Documentation/networking/z8530drv.txt
deleted file mode 100644
index 2206abbc3e1b..000000000000
--- a/Documentation/networking/z8530drv.txt
+++ /dev/null
@@ -1,657 +0,0 @@
-This is a subset of the documentation. To use this driver you MUST have the
-full package from:
-
-Internet:
-=========
-
-1. ftp://ftp.ccac.rwth-aachen.de/pub/jr/z8530drv-utils_3.0-3.tar.gz
-
-2. ftp://ftp.pspt.fi/pub/ham/linux/ax25/z8530drv-utils_3.0-3.tar.gz
-
-Please note that the information in this document may be hopelessly outdated.
-A new version of the documentation, along with links to other important
-Linux Kernel AX.25 documentation and programs, is available on
-http://yaina.de/jreuter
-
------------------------------------------------------------------------------
-
-
-	 SCC.C - Linux driver for Z8530 based HDLC cards for AX.25      
-
-   ********************************************************************
-
-        (c) 1993,2000 by Joerg Reuter DL1BKE <jreuter@yaina.de>
-
-        portions (c) 1993 Guido ten Dolle PE1NNZ
-
-        for the complete copyright notice see >> Copying.Z8530DRV <<
-
-   ******************************************************************** 
-
-
-1. Initialization of the driver
-===============================
-
-To use the driver, 3 steps must be performed:
-
-     1. if compiled as module: loading the module
-     2. Setup of hardware, MODEM and KISS parameters with sccinit
-     3. Attach each channel to the Linux kernel AX.25 with "ifconfig"
-
-Unlike the versions below 2.4 this driver is a real network device
-driver. If you want to run xNOS instead of our fine kernel AX.25
-use a 2.x version (available from above sites) or read the
-AX.25-HOWTO on how to emulate a KISS TNC on network device drivers.
-
-
-1.1 Loading the module
-======================
-
-(If you're going to compile the driver as a part of the kernel image,
- skip this chapter and continue with 1.2)
-
-Before you can use a module, you'll have to load it with
-
-	insmod scc.o
-
-please read 'man insmod' that comes with module-init-tools.
-
-You should include the insmod in one of the /etc/rc.d/rc.* files,
-and don't forget to insert a call of sccinit after that. It
-will read your /etc/z8530drv.conf.
-
-1.2. /etc/z8530drv.conf
-=======================
-
-To setup all parameters you must run /sbin/sccinit from one
-of your rc.*-files. This has to be done BEFORE you can
-"ifconfig" an interface. Sccinit reads the file /etc/z8530drv.conf
-and sets the hardware, MODEM and KISS parameters. A sample file is
-delivered with this package. Change it to your needs.
-
-The file itself consists of two main sections.
-
-1.2.1 configuration of hardware parameters
-==========================================
-
-The hardware setup section defines the following parameters for each
-Z8530:
-
-chip    1
-data_a  0x300                   # data port A
-ctrl_a  0x304                   # control port A
-data_b  0x301                   # data port B
-ctrl_b  0x305                   # control port B
-irq     5                       # IRQ No. 5
-pclock  4915200                 # clock
-board   BAYCOM                  # hardware type
-escc    no                      # enhanced SCC chip? (8580/85180/85280)
-vector  0                       # latch for interrupt vector
-special no                      # address of special function register
-option  0                       # option to set via sfr
-
-
-chip	- this is just a delimiter to make sccinit a bit simpler to
-	  program. A parameter has no effect.
-
-data_a  - the address of the data port A of this Z8530 (needed)
-ctrl_a  - the address of the control port A (needed)
-data_b  - the address of the data port B (needed)
-ctrl_b  - the address of the control port B (needed)
-
-irq     - the used IRQ for this chip. Different chips can use different
-          IRQs or the same. If they share an interrupt, it needs to be
-	  specified within one chip-definition only.
-
-pclock  - the clock at the PCLK pin of the Z8530 (option, 4915200 is
-          default), measured in Hertz
-
-board   - the "type" of the board:
-
-	   SCC type                 value
-	   ---------------------------------
-	   PA0HZP SCC card          PA0HZP
-	   EAGLE card               EAGLE
-	   PC100 card               PC100
-	   PRIMUS-PC (DG9BL) card   PRIMUS
-	   BayCom (U)SCC card       BAYCOM
-
-escc    - if you want support for ESCC chips (8580, 85180, 85280), set
-          this to "yes" (option, defaults to "no")
-
-vector  - address of the vector latch (aka "intack port") for PA0HZP
-          cards. There can be only one vector latch for all chips!
-	  (option, defaults to 0)
-
-special - address of the special function register on several cards.
-          (option, defaults to 0)
-
-option  - The value you write into that register (option, default is 0)
-
-You can specify up to four chips (8 channels). If this is not enough,
-just change
-
-	#define MAXSCC 4
-
-to a higher value.
-
-Example for the BAYCOM USCC:
-----------------------------
-
-chip    1
-data_a  0x300                   # data port A
-ctrl_a  0x304                   # control port A
-data_b  0x301                   # data port B
-ctrl_b  0x305                   # control port B
-irq     5                       # IRQ No. 5 (#)
-board   BAYCOM                  # hardware type (*)
-#
-# SCC chip 2
-#
-chip    2
-data_a  0x302
-ctrl_a  0x306
-data_b  0x303
-ctrl_b  0x307
-board   BAYCOM
-
-An example for a PA0HZP card:
------------------------------
-
-chip 1
-data_a 0x153
-data_b 0x151
-ctrl_a 0x152
-ctrl_b 0x150
-irq 9
-pclock 4915200
-board PA0HZP
-vector 0x168
-escc no
-#
-#
-#
-chip 2
-data_a 0x157
-data_b 0x155
-ctrl_a 0x156
-ctrl_b 0x154
-irq 9
-pclock 4915200
-board PA0HZP
-vector 0x168
-escc no
-
-A DRSI would should probably work with this:
---------------------------------------------
-(actually: two DRSI cards...)
-
-chip 1
-data_a 0x303
-data_b 0x301
-ctrl_a 0x302
-ctrl_b 0x300
-irq 7
-pclock 4915200
-board DRSI
-escc no
-#
-#
-#
-chip 2
-data_a 0x313
-data_b 0x311
-ctrl_a 0x312
-ctrl_b 0x310
-irq 7
-pclock 4915200
-board DRSI
-escc no
-
-Note that you cannot use the on-board baudrate generator off DRSI
-cards. Use "mode dpll" for clock source (see below).
-
-This is based on information provided by Mike Bilow (and verified
-by Paul Helay)
-
-The utility "gencfg"
---------------------
-
-If you only know the parameters for the PE1CHL driver for DOS,
-run gencfg. It will generate the correct port addresses (I hope).
-Its parameters are exactly the same as the ones you use with
-the "attach scc" command in net, except that the string "init" must 
-not appear. Example:
-
-gencfg 2 0x150 4 2 0 1 0x168 9 4915200 
-
-will print a skeleton z8530drv.conf for the OptoSCC to stdout.
-
-gencfg 2 0x300 2 4 5 -4 0 7 4915200 0x10
-
-does the same for the BAYCOM USCC card. In my opinion it is much easier
-to edit scc_config.h... 
-
-
-1.2.2 channel configuration
-===========================
-
-The channel definition is divided into three sub sections for each
-channel:
-
-An example for scc0:
-
-# DEVICE
-
-device scc0	# the device for the following params
-
-# MODEM / BUFFERS
-
-speed 1200		# the default baudrate
-clock dpll		# clock source: 
-			# 	dpll     = normal half duplex operation
-			# 	external = MODEM provides own Rx/Tx clock
-			#	divider  = use full duplex divider if
-			#		   installed (1)
-mode nrzi		# HDLC encoding mode
-			#	nrzi = 1k2 MODEM, G3RUH 9k6 MODEM
-			#	nrz  = DF9IC 9k6 MODEM
-			#
-bufsize	384		# size of buffers. Note that this must include
-			# the AX.25 header, not only the data field!
-			# (optional, defaults to 384)
-
-# KISS (Layer 1)
-
-txdelay 36              # (see chapter 1.4)
-persist 64
-slot    8
-tail    8
-fulldup 0
-wait    12
-min     3
-maxkey  7
-idle    3
-maxdef  120
-group   0
-txoff   off
-softdcd on                   
-slip    off
-
-The order WITHIN these sections is unimportant. The order OF these
-sections IS important. The MODEM parameters are set with the first
-recognized KISS parameter...
-
-Please note that you can initialize the board only once after boot
-(or insmod). You can change all parameters but "mode" and "clock" 
-later with the Sccparam program or through KISS. Just to avoid 
-security holes... 
-
-(1) this divider is usually mounted on the SCC-PBC (PA0HZP) or not
-    present at all (BayCom). It feeds back the output of the DPLL 
-    (digital pll) as transmit clock. Using this mode without a divider 
-    installed will normally result in keying the transceiver until 
-    maxkey expires --- of course without sending anything (useful).
-
-2. Attachment of a channel by your AX.25 software
-=================================================
-
-2.1 Kernel AX.25
-================
-
-To set up an AX.25 device you can simply type:
-
-	ifconfig scc0 44.128.1.1 hw ax25 dl0tha-7
-
-This will create a network interface with the IP number 44.128.20.107 
-and the callsign "dl0tha". If you do not have any IP number (yet) you 
-can use any of the 44.128.0.0 network. Note that you do not need 
-axattach. The purpose of axattach (like slattach) is to create a KISS 
-network device linked to a TTY. Please read the documentation of the 
-ax25-utils and the AX.25-HOWTO to learn how to set the parameters of
-the kernel AX.25.
-
-2.2 NOS, NET and TFKISS
-=======================
-
-Since the TTY driver (aka KISS TNC emulation) is gone you need
-to emulate the old behaviour. The cost of using these programs is
-that you probably need to compile the kernel AX.25, regardless of whether
-you actually use it or not. First setup your /etc/ax25/axports,
-for example:
-
-	9k6	dl0tha-9  9600  255 4 9600 baud port (scc3)
-	axlink	dl0tha-15 38400 255 4 Link to NOS
-
-Now "ifconfig" the scc device:
-
-	ifconfig scc3 44.128.1.1 hw ax25 dl0tha-9
-
-You can now axattach a pseudo-TTY:
-
-	axattach /dev/ptys0 axlink
-
-and start your NOS and attach /dev/ptys0 there. The problem is that
-NOS is reachable only via digipeating through the kernel AX.25
-(disastrous on a DAMA controlled channel). To solve this problem,
-configure "rxecho" to echo the incoming frames from "9k6" to "axlink"
-and outgoing frames from "axlink" to "9k6" and start:
-
-	rxecho
-
-Or simply use "kissbridge" coming with z8530drv-utils:
-
-	ifconfig scc3 hw ax25 dl0tha-9
-	kissbridge scc3 /dev/ptys0
-
-
-3. Adjustment and Display of parameters
-=======================================
-
-3.1 Displaying SCC Parameters:
-==============================
-
-Once a SCC channel has been attached, the parameter settings and 
-some statistic information can be shown using the param program:
-
-dl1bke-u:~$ sccstat scc0
-
-Parameters:
-
-speed       : 1200 baud
-txdelay     : 36
-persist     : 255
-slottime    : 0
-txtail      : 8
-fulldup     : 1
-waittime    : 12
-mintime     : 3 sec
-maxkeyup    : 7 sec
-idletime    : 3 sec
-maxdefer    : 120 sec
-group       : 0x00
-txoff       : off
-softdcd     : on
-SLIP        : off
-
-Status:
-
-HDLC                  Z8530           Interrupts         Buffers
------------------------------------------------------------------------
-Sent       :     273  RxOver :     0  RxInts :   125074  Size    :  384
-Received   :    1095  TxUnder:     0  TxInts :     4684  NoSpace :    0
-RxErrors   :    1591                  ExInts :    11776
-TxErrors   :       0                  SpInts :     1503
-Tx State   :    idle
-
-
-The status info shown is:
-
-Sent		- number of frames transmitted
-Received	- number of frames received
-RxErrors	- number of receive errors (CRC, ABORT)
-TxErrors	- number of discarded Tx frames (due to various reasons) 
-Tx State	- status of the Tx interrupt handler: idle/busy/active/tail (2)
-RxOver		- number of receiver overruns
-TxUnder		- number of transmitter underruns
-RxInts		- number of receiver interrupts
-TxInts		- number of transmitter interrupts
-EpInts		- number of receiver special condition interrupts
-SpInts		- number of external/status interrupts
-Size		- maximum size of an AX.25 frame (*with* AX.25 headers!)
-NoSpace		- number of times a buffer could not get allocated
-
-An overrun is abnormal. If lots of these occur, the product of
-baudrate and number of interfaces is too high for the processing
-power of your computer. NoSpace errors are unlikely to be caused by the
-driver or the kernel AX.25.
-
-
-3.2 Setting Parameters
-======================
-
-
-The setting of parameters of the emulated KISS TNC is done in the 
-same way in the SCC driver. You can change parameters by using
-the kissparms program from the ax25-utils package or use the program 
-"sccparam":
-
-     sccparam <device> <paramname> <decimal-|hexadecimal value>
-
-You can change the following parameters:
-
-param	    : value
-------------------------
-speed       : 1200
-txdelay     : 36
-persist     : 255
-slottime    : 0
-txtail      : 8
-fulldup     : 1
-waittime    : 12
-mintime     : 3
-maxkeyup    : 7
-idletime    : 3
-maxdefer    : 120
-group       : 0x00
-txoff       : off
-softdcd     : on
-SLIP        : off
-
-
-The parameters have the following meaning:
-
-speed:
-     The baudrate on this channel in bits/sec
-
-     Example: sccparam /dev/scc3 speed 9600
-
-txdelay:
-     The delay (in units of 10 ms) after keying of the 
-     transmitter, until the first byte is sent. This is usually 
-     called "TXDELAY" in a TNC.  When 0 is specified, the driver 
-     will just wait until the CTS signal is asserted. This 
-     assumes the presence of a timer or other circuitry in the 
-     MODEM and/or transmitter, that asserts CTS when the 
-     transmitter is ready for data.
-     A normal value of this parameter is 30-36.
-
-     Example: sccparam /dev/scc0 txd 20
-
-persist:
-     This is the probability that the transmitter will be keyed 
-     when the channel is found to be free.  It is a value from 0 
-     to 255, and the probability is (value+1)/256.  The value 
-     should be somewhere near 50-60, and should be lowered when 
-     the channel is used more heavily.
-
-     Example: sccparam /dev/scc2 persist 20
-
-slottime:
-     This is the time between samples of the channel. It is 
-     expressed in units of 10 ms.  About 200-300 ms (value 20-30) 
-     seems to be a good value.
-
-     Example: sccparam /dev/scc0 slot 20
-
-tail:
-     The time the transmitter will remain keyed after the last 
-     byte of a packet has been transferred to the SCC. This is 
-     necessary because the CRC and a flag still have to leave the 
-     SCC before the transmitter is keyed down. The value depends 
-     on the baudrate selected.  A few character times should be 
-     sufficient, e.g. 40ms at 1200 baud. (value 4)
-     The value of this parameter is in 10 ms units.
-
-     Example: sccparam /dev/scc2 4
-
-full:
-     The full-duplex mode switch. This can be one of the following 
-     values:
-
-     0:   The interface will operate in CSMA mode (the normal 
-          half-duplex packet radio operation)
-     1:   Fullduplex mode, i.e. the transmitter will be keyed at 
-          any time, without checking the received carrier.  It 
-          will be unkeyed when there are no packets to be sent.
-     2:   Like 1, but the transmitter will remain keyed, also 
-          when there are no packets to be sent.  Flags will be 
-          sent in that case, until a timeout (parameter 10) 
-          occurs.
-
-     Example: sccparam /dev/scc0 fulldup off
-
-wait:
-     The initial waittime before any transmit attempt, after the 
-     frame has been queue for transmit.  This is the length of 
-     the first slot in CSMA mode.  In full duplex modes it is
-     set to 0 for maximum performance.
-     The value of this parameter is in 10 ms units. 
-
-     Example: sccparam /dev/scc1 wait 4
-
-maxkey:
-     The maximal time the transmitter will be keyed to send 
-     packets, in seconds.  This can be useful on busy CSMA 
-     channels, to avoid "getting a bad reputation" when you are 
-     generating a lot of traffic.  After the specified time has 
-     elapsed, no new frame will be started. Instead, the trans-
-     mitter will be switched off for a specified time (parameter 
-     min), and then the selected algorithm for keyup will be 
-     started again.
-     The value 0 as well as "off" will disable this feature, 
-     and allow infinite transmission time. 
-
-     Example: sccparam /dev/scc0 maxk 20
-
-min:
-     This is the time the transmitter will be switched off when 
-     the maximum transmission time is exceeded.
-
-     Example: sccparam /dev/scc3 min 10
-
-idle
-     This parameter specifies the maximum idle time in full duplex 
-     2 mode, in seconds.  When no frames have been sent for this 
-     time, the transmitter will be keyed down.  A value of 0 is
-     has same result as the fullduplex mode 1. This parameter
-     can be disabled.
-
-     Example: sccparam /dev/scc2 idle off	# transmit forever
-
-maxdefer
-     This is the maximum time (in seconds) to wait for a free channel
-     to send. When this timer expires the transmitter will be keyed 
-     IMMEDIATELY. If you love to get trouble with other users you
-     should set this to a very low value ;-)
-
-     Example: sccparam /dev/scc0 maxdefer 240	# 2 minutes
-
-
-txoff:
-     When this parameter has the value 0, the transmission of packets
-     is enable. Otherwise it is disabled.
-
-     Example: sccparam /dev/scc2 txoff on
-
-group:
-     It is possible to build special radio equipment to use more than 
-     one frequency on the same band, e.g. using several receivers and 
-     only one transmitter that can be switched between frequencies.
-     Also, you can connect several radios that are active on the same 
-     band.  In these cases, it is not possible, or not a good idea, to 
-     transmit on more than one frequency.  The SCC driver provides a 
-     method to lock transmitters on different interfaces, using the 
-     "param <interface> group <x>" command.  This will only work when 
-     you are using CSMA mode (parameter full = 0).
-     The number <x> must be 0 if you want no group restrictions, and 
-     can be computed as follows to create restricted groups:
-     <x> is the sum of some OCTAL numbers:
-
-     200  This transmitter will only be keyed when all other 
-          transmitters in the group are off.
-     100  This transmitter will only be keyed when the carrier 
-          detect of all other interfaces in the group is off.
-     0xx  A byte that can be used to define different groups.  
-          Interfaces are in the same group, when the logical AND 
-          between their xx values is nonzero.
-
-     Examples:
-     When 2 interfaces use group 201, their transmitters will never be 
-     keyed at the same time.
-     When 2 interfaces use group 101, the transmitters will only key 
-     when both channels are clear at the same time.  When group 301, 
-     the transmitters will not be keyed at the same time.
-
-     Don't forget to convert the octal numbers into decimal before
-     you set the parameter.
-
-     Example: (to be written)
-
-softdcd:
-     use a software dcd instead of the real one... Useful for a very
-     slow squelch.
-
-     Example: sccparam /dev/scc0 soft on
-
-
-4. Problems 
-===========
-
-If you have tx-problems with your BayCom USCC card please check
-the manufacturer of the 8530. SGS chips have a slightly
-different timing. Try Zilog...  A solution is to write to register 8 
-instead to the data port, but this won't work with the ESCC chips. 
-*SIGH!*
-
-A very common problem is that the PTT locks until the maxkeyup timer
-expires, although interrupts and clock source are correct. In most
-cases compiling the driver with CONFIG_SCC_DELAY (set with
-make config) solves the problems. For more hints read the (pseudo) FAQ 
-and the documentation coming with z8530drv-utils.
-
-I got reports that the driver has problems on some 386-based systems.
-(i.e. Amstrad) Those systems have a bogus AT bus timing which will
-lead to delayed answers on interrupts. You can recognize these
-problems by looking at the output of Sccstat for the suspected
-port. If it shows under- and overruns you own such a system.
-
-Delayed processing of received data: This depends on
-
-- the kernel version
-
-- kernel profiling compiled or not
-
-- a high interrupt load
-
-- a high load of the machine --- running X, Xmorph, XV and Povray,
-  while compiling the kernel... hmm ... even with 32 MB RAM ...  ;-)
-  Or running a named for the whole .ampr.org domain on an 8 MB
-  box...
-
-- using information from rxecho or kissbridge.
-
-Kernel panics: please read /linux/README and find out if it
-really occurred within the scc driver.
-
-If you cannot solve a problem, send me
-
-- a description of the problem,
-- information on your hardware (computer system, scc board, modem)
-- your kernel version
-- the output of cat /proc/net/z8530
-
-4. Thor RLC100
-==============
-
-Mysteriously this board seems not to work with the driver. Anyone
-got it up-and-running?
-
-
-Many thanks to Linus Torvalds and Alan Cox for including the driver
-in the Linux standard distribution and their support.
-
-Joerg Reuter	ampr-net: dl1bke@db0pra.ampr.org
-		AX-25   : DL1BKE @ DB0ABH.#BAY.DEU.EU
-		Internet: jreuter@yaina.de
-		WWW     : http://yaina.de/jreuter
diff --git a/MAINTAINERS b/MAINTAINERS
index d59455c27c42..bee65ebdc67e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -18644,7 +18644,7 @@ L:	linux-hams@vger.kernel.org
 S:	Maintained
 W:	http://yaina.de/jreuter/
 W:	http://www.qsl.net/dl1bke/
-F:	Documentation/networking/z8530drv.txt
+F:	Documentation/networking/z8530drv.rst
 F:	drivers/net/hamradio/*scc.c
 F:	drivers/net/hamradio/z8530.h
 
diff --git a/drivers/net/hamradio/Kconfig b/drivers/net/hamradio/Kconfig
index fe409819b56d..f4500f04147d 100644
--- a/drivers/net/hamradio/Kconfig
+++ b/drivers/net/hamradio/Kconfig
@@ -84,7 +84,7 @@ config SCC
 	---help---
 	  These cards are used to connect your Linux box to an amateur radio
 	  in order to communicate with other computers. If you want to use
-	  this, read <file:Documentation/networking/z8530drv.txt> and the
+	  this, read <file:Documentation/networking/z8530drv.rst> and the
 	  AX25-HOWTO, available from
 	  <http://www.tldp.org/docs.html#howto>. Also make sure to say Y
 	  to "Amateur Radio AX.25 Level 2" support.
@@ -98,7 +98,7 @@ config SCC_DELAY
 	help
 	  Say Y here if you experience problems with the SCC driver not
 	  working properly; please read
-	  <file:Documentation/networking/z8530drv.txt> for details.
+	  <file:Documentation/networking/z8530drv.rst> for details.
 
 	  If unsure, say N.
 
diff --git a/drivers/net/hamradio/scc.c b/drivers/net/hamradio/scc.c
index 6c03932d8a6b..33fdd55c6122 100644
--- a/drivers/net/hamradio/scc.c
+++ b/drivers/net/hamradio/scc.c
@@ -7,7 +7,7 @@
  *            ------------------
  *
  * You can find a subset of the documentation in 
- * Documentation/networking/z8530drv.txt.
+ * Documentation/networking/z8530drv.rst.
  */
 
 /*
-- 
cgit v1.2.3-59-g8ed1b


From c79773e83e66fb2c22627eda3cd768f9e2bc10b5 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 1 May 2020 16:44:34 +0200
Subject: docs: networking: device drivers: convert 3com/3c509.txt to ReST

- add SPDX header;
- adjust titles and chapters, adding proper markups;
- mark code blocks and literals as such;
- add notes markups;
- mark tables as such;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../networking/device_drivers/3com/3c509.rst       | 249 +++++++++++++++++++++
 .../networking/device_drivers/3com/3c509.txt       | 213 ------------------
 Documentation/networking/device_drivers/index.rst  |   1 +
 3 files changed, 250 insertions(+), 213 deletions(-)
 create mode 100644 Documentation/networking/device_drivers/3com/3c509.rst
 delete mode 100644 Documentation/networking/device_drivers/3com/3c509.txt

diff --git a/Documentation/networking/device_drivers/3com/3c509.rst b/Documentation/networking/device_drivers/3com/3c509.rst
new file mode 100644
index 000000000000..47f706bacdd9
--- /dev/null
+++ b/Documentation/networking/device_drivers/3com/3c509.rst
@@ -0,0 +1,249 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=============================================================================
+Linux and the 3Com EtherLink III Series Ethercards (driver v1.18c and higher)
+=============================================================================
+
+This file contains the instructions and caveats for v1.18c and higher versions
+of the 3c509 driver. You should not use the driver without reading this file.
+
+release 1.0
+
+28 February 2002
+
+Current maintainer (corrections to):
+  David Ruggiero <jdr@farfalle.com>
+
+Introduction
+============
+
+The following are notes and information on using the 3Com EtherLink III series
+ethercards in Linux. These cards are commonly known by the most widely-used
+card's 3Com model number, 3c509. They are all 10mb/s ISA-bus cards and shouldn't
+be (but sometimes are) confused with the similarly-numbered PCI-bus "3c905"
+(aka "Vortex" or "Boomerang") series.  Kernel support for the 3c509 family is
+provided by the module 3c509.c, which has code to support all of the following
+models:
+
+ - 3c509 (original ISA card)
+ - 3c509B (later revision of the ISA card; supports full-duplex)
+ - 3c589 (PCMCIA)
+ - 3c589B (later revision of the 3c589; supports full-duplex)
+ - 3c579 (EISA)
+
+Large portions of this documentation were heavily borrowed from the guide
+written the original author of the 3c509 driver, Donald Becker. The master
+copy of that document, which contains notes on older versions of the driver,
+currently resides on Scyld web server: http://www.scyld.com/.
+
+
+Special Driver Features
+=======================
+
+Overriding card settings
+
+The driver allows boot- or load-time overriding of the card's detected IOADDR,
+IRQ, and transceiver settings, although this capability shouldn't generally be
+needed except to enable full-duplex mode (see below). An example of the syntax
+for LILO parameters for doing this::
+
+    ether=10,0x310,3,0x3c509,eth0
+
+This configures the first found 3c509 card for IRQ 10, base I/O 0x310, and
+transceiver type 3 (10base2). The flag "0x3c509" must be set to avoid conflicts
+with other card types when overriding the I/O address. When the driver is
+loaded as a module, only the IRQ may be overridden. For example,
+setting two cards to IRQ10 and IRQ11 is done by using the irq module
+option::
+
+   options 3c509 irq=10,11
+
+
+Full-duplex mode
+================
+
+The v1.18c driver added support for the 3c509B's full-duplex capabilities.
+In order to enable and successfully use full-duplex mode, three conditions
+must be met:
+
+(a) You must have a Etherlink III card model whose hardware supports full-
+duplex operations. Currently, the only members of the 3c509 family that are
+positively known to support full-duplex are the 3c509B (ISA bus) and 3c589B
+(PCMCIA) cards. Cards without the "B" model designation do *not* support
+full-duplex mode; these include the original 3c509 (no "B"), the original
+3c589, the 3c529 (MCA bus), and the 3c579 (EISA bus).
+
+(b) You must be using your card's 10baseT transceiver (i.e., the RJ-45
+connector), not its AUI (thick-net) or 10base2 (thin-net/coax) interfaces.
+AUI and 10base2 network cabling is physically incapable of full-duplex
+operation.
+
+(c) Most importantly, your 3c509B must be connected to a link partner that is
+itself full-duplex capable. This is almost certainly one of two things: a full-
+duplex-capable  Ethernet switch (*not* a hub), or a full-duplex-capable NIC on
+another system that's connected directly to the 3c509B via a crossover cable.
+
+Full-duplex mode can be enabled using 'ethtool'.
+
+.. warning::
+
+  Extremely important caution concerning full-duplex mode
+
+  Understand that the 3c509B's hardware's full-duplex support is much more
+  limited than that provide by more modern network interface cards. Although
+  at the physical layer of the network it fully supports full-duplex operation,
+  the card was designed before the current Ethernet auto-negotiation (N-way)
+  spec was written. This means that the 3c509B family ***cannot and will not
+  auto-negotiate a full-duplex connection with its link partner under any
+  circumstances, no matter how it is initialized***. If the full-duplex mode
+  of the 3c509B is enabled, its link partner will very likely need to be
+  independently _forced_ into full-duplex mode as well; otherwise various nasty
+  failures will occur - at the very least, you'll see massive numbers of packet
+  collisions. This is one of very rare circumstances where disabling auto-
+  negotiation and forcing the duplex mode of a network interface card or switch
+  would ever be necessary or desirable.
+
+
+Available Transceiver Types
+===========================
+
+For versions of the driver v1.18c and above, the available transceiver types are:
+
+== =========================================================================
+0  transceiver type from EEPROM config (normally 10baseT); force half-duplex
+1  AUI (thick-net / DB15 connector)
+2  (undefined)
+3  10base2 (thin-net == coax / BNC connector)
+4  10baseT (RJ-45 connector); force half-duplex mode
+8  transceiver type and duplex mode taken from card's EEPROM config settings
+12 10baseT (RJ-45 connector); force full-duplex mode
+== =========================================================================
+
+Prior to driver version 1.18c, only transceiver codes 0-4 were supported. Note
+that the new transceiver codes 8 and 12 are the *only* ones that will enable
+full-duplex mode, no matter what the card's detected EEPROM settings might be.
+This insured that merely upgrading the driver from an earlier version would
+never automatically enable full-duplex mode in an existing installation;
+it must always be explicitly enabled via one of these code in order to be
+activated.
+
+The transceiver type can be changed using 'ethtool'.
+
+
+Interpretation of error messages and common problems
+----------------------------------------------------
+
+Error Messages
+^^^^^^^^^^^^^^
+
+eth0: Infinite loop in interrupt, status 2011.
+These are "mostly harmless" message indicating that the driver had too much
+work during that interrupt cycle. With a status of 0x2011 you are receiving
+packets faster than they can be removed from the card. This should be rare
+or impossible in normal operation. Possible causes of this error report are:
+
+   - a "green" mode enabled that slows the processor down when there is no
+     keyboard activity.
+
+   - some other device or device driver hogging the bus or disabling interrupts.
+     Check /proc/interrupts for excessive interrupt counts. The timer tick
+     interrupt should always be incrementing faster than the others.
+
+No received packets
+^^^^^^^^^^^^^^^^^^^
+
+If a 3c509, 3c562 or 3c589 can successfully transmit packets, but never
+receives packets (as reported by /proc/net/dev or 'ifconfig') you likely
+have an interrupt line problem. Check /proc/interrupts to verify that the
+card is actually generating interrupts. If the interrupt count is not
+increasing you likely have a physical conflict with two devices trying to
+use the same ISA IRQ line. The common conflict is with a sound card on IRQ10
+or IRQ5, and the easiest solution is to move the 3c509 to a different
+interrupt line. If the device is receiving packets but 'ping' doesn't work,
+you have a routing problem.
+
+Tx Carrier Errors Reported in /proc/net/dev
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+
+If an EtherLink III appears to transmit packets, but the "Tx carrier errors"
+field in /proc/net/dev increments as quickly as the Tx packet count, you
+likely have an unterminated network or the incorrect media transceiver selected.
+
+3c509B card is not detected on machines with an ISA PnP BIOS.
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+While the updated driver works with most PnP BIOS programs, it does not work
+with all. This can be fixed by disabling PnP support using the 3Com-supplied
+setup program.
+
+3c509 card is not detected on overclocked machines
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+Increase the delay time in id_read_eeprom() from the current value, 500,
+to an absurdly high value, such as 5000.
+
+
+Decoding Status and Error Messages
+----------------------------------
+
+
+The bits in the main status register are:
+
+=====	======================================
+value 	description
+=====	======================================
+0x01 	Interrupt latch
+0x02 	Tx overrun, or Rx underrun
+0x04 	Tx complete
+0x08 	Tx FIFO room available
+0x10 	A complete Rx packet has arrived
+0x20 	A Rx packet has started to arrive
+0x40 	The driver has requested an interrupt
+0x80 	Statistics counter nearly full
+=====	======================================
+
+The bits in the transmit (Tx) status word are:
+
+=====	============================================
+value	description
+=====	============================================
+0x02	Out-of-window collision.
+0x04	Status stack overflow (normally impossible).
+0x08	16 collisions.
+0x10	Tx underrun (not enough PCI bus bandwidth).
+0x20	Tx jabber.
+0x40	Tx interrupt requested.
+0x80	Status is valid (this should always be set).
+=====	============================================
+
+
+When a transmit error occurs the driver produces a status message such as::
+
+   eth0: Transmit error, Tx status register 82
+
+The two values typically seen here are:
+
+0x82
+^^^^
+
+Out of window collision. This typically occurs when some other Ethernet
+host is incorrectly set to full duplex on a half duplex network.
+
+0x88
+^^^^
+
+16 collisions. This typically occurs when the network is exceptionally busy
+or when another host doesn't correctly back off after a collision. If this
+error is mixed with 0x82 errors it is the result of a host incorrectly set
+to full duplex (see above).
+
+Both of these errors are the result of network problems that should be
+corrected. They do not represent driver malfunction.
+
+
+Revision history (this file)
+============================
+
+28Feb02 v1.0  DR   New; major portions based on Becker original 3c509 docs
+
diff --git a/Documentation/networking/device_drivers/3com/3c509.txt b/Documentation/networking/device_drivers/3com/3c509.txt
deleted file mode 100644
index fbf722e15ac3..000000000000
--- a/Documentation/networking/device_drivers/3com/3c509.txt
+++ /dev/null
@@ -1,213 +0,0 @@
-Linux and the 3Com EtherLink III Series Ethercards (driver v1.18c and higher)
-----------------------------------------------------------------------------
-
-This file contains the instructions and caveats for v1.18c and higher versions
-of the 3c509 driver. You should not use the driver without reading this file.
-
-release 1.0
-28 February 2002
-Current maintainer (corrections to):
-  David Ruggiero <jdr@farfalle.com>
-
-----------------------------------------------------------------------------
-
-(0) Introduction
-
-The following are notes and information on using the 3Com EtherLink III series
-ethercards in Linux. These cards are commonly known by the most widely-used
-card's 3Com model number, 3c509. They are all 10mb/s ISA-bus cards and shouldn't
-be (but sometimes are) confused with the similarly-numbered PCI-bus "3c905"
-(aka "Vortex" or "Boomerang") series.  Kernel support for the 3c509 family is
-provided by the module 3c509.c, which has code to support all of the following
-models:
-
-  3c509 (original ISA card)
-  3c509B (later revision of the ISA card; supports full-duplex)
-  3c589 (PCMCIA)
-  3c589B (later revision of the 3c589; supports full-duplex)
-  3c579 (EISA)
-
-Large portions of this documentation were heavily borrowed from the guide
-written the original author of the 3c509 driver, Donald Becker. The master
-copy of that document, which contains notes on older versions of the driver,
-currently resides on Scyld web server: http://www.scyld.com/.
-
-
-(1) Special Driver Features
-
-Overriding card settings
-
-The driver allows boot- or load-time overriding of the card's detected IOADDR,
-IRQ, and transceiver settings, although this capability shouldn't generally be
-needed except to enable full-duplex mode (see below). An example of the syntax
-for LILO parameters for doing this:
-
-    ether=10,0x310,3,0x3c509,eth0 
-
-This configures the first found 3c509 card for IRQ 10, base I/O 0x310, and
-transceiver type 3 (10base2). The flag "0x3c509" must be set to avoid conflicts
-with other card types when overriding the I/O address. When the driver is
-loaded as a module, only the IRQ may be overridden. For example,
-setting two cards to IRQ10 and IRQ11 is done by using the irq module
-option:
-
-   options 3c509 irq=10,11
-
-
-(2) Full-duplex mode
-
-The v1.18c driver added support for the 3c509B's full-duplex capabilities.
-In order to enable and successfully use full-duplex mode, three conditions
-must be met: 
-
-(a) You must have a Etherlink III card model whose hardware supports full-
-duplex operations. Currently, the only members of the 3c509 family that are
-positively known to support full-duplex are the 3c509B (ISA bus) and 3c589B
-(PCMCIA) cards. Cards without the "B" model designation do *not* support
-full-duplex mode; these include the original 3c509 (no "B"), the original
-3c589, the 3c529 (MCA bus), and the 3c579 (EISA bus).
-
-(b) You must be using your card's 10baseT transceiver (i.e., the RJ-45
-connector), not its AUI (thick-net) or 10base2 (thin-net/coax) interfaces.
-AUI and 10base2 network cabling is physically incapable of full-duplex
-operation.
-
-(c) Most importantly, your 3c509B must be connected to a link partner that is
-itself full-duplex capable. This is almost certainly one of two things: a full-
-duplex-capable  Ethernet switch (*not* a hub), or a full-duplex-capable NIC on
-another system that's connected directly to the 3c509B via a crossover cable.
-
-Full-duplex mode can be enabled using 'ethtool'.
- 
-/////Extremely important caution concerning full-duplex mode/////
-Understand that the 3c509B's hardware's full-duplex support is much more
-limited than that provide by more modern network interface cards. Although
-at the physical layer of the network it fully supports full-duplex operation,
-the card was designed before the current Ethernet auto-negotiation (N-way)
-spec was written. This means that the 3c509B family ***cannot and will not
-auto-negotiate a full-duplex connection with its link partner under any
-circumstances, no matter how it is initialized***. If the full-duplex mode
-of the 3c509B is enabled, its link partner will very likely need to be
-independently _forced_ into full-duplex mode as well; otherwise various nasty
-failures will occur - at the very least, you'll see massive numbers of packet
-collisions. This is one of very rare circumstances where disabling auto-
-negotiation and forcing the duplex mode of a network interface card or switch
-would ever be necessary or desirable.
-
-
-(3) Available Transceiver Types
-
-For versions of the driver v1.18c and above, the available transceiver types are:
- 
-0  transceiver type from EEPROM config (normally 10baseT); force half-duplex
-1  AUI (thick-net / DB15 connector)
-2  (undefined)
-3  10base2 (thin-net == coax / BNC connector)
-4  10baseT (RJ-45 connector); force half-duplex mode
-8  transceiver type and duplex mode taken from card's EEPROM config settings
-12 10baseT (RJ-45 connector); force full-duplex mode
-
-Prior to driver version 1.18c, only transceiver codes 0-4 were supported. Note
-that the new transceiver codes 8 and 12 are the *only* ones that will enable
-full-duplex mode, no matter what the card's detected EEPROM settings might be.
-This insured that merely upgrading the driver from an earlier version would
-never automatically enable full-duplex mode in an existing installation;
-it must always be explicitly enabled via one of these code in order to be
-activated.
-
-The transceiver type can be changed using 'ethtool'.
-  
-
-(4a) Interpretation of error messages and common problems
-
-Error Messages
-
-eth0: Infinite loop in interrupt, status 2011. 
-These are "mostly harmless" message indicating that the driver had too much
-work during that interrupt cycle. With a status of 0x2011 you are receiving
-packets faster than they can be removed from the card. This should be rare
-or impossible in normal operation. Possible causes of this error report are:
- 
-   - a "green" mode enabled that slows the processor down when there is no
-     keyboard activity. 
-
-   - some other device or device driver hogging the bus or disabling interrupts.
-     Check /proc/interrupts for excessive interrupt counts. The timer tick
-     interrupt should always be incrementing faster than the others. 
-
-No received packets 
-If a 3c509, 3c562 or 3c589 can successfully transmit packets, but never
-receives packets (as reported by /proc/net/dev or 'ifconfig') you likely
-have an interrupt line problem. Check /proc/interrupts to verify that the
-card is actually generating interrupts. If the interrupt count is not
-increasing you likely have a physical conflict with two devices trying to
-use the same ISA IRQ line. The common conflict is with a sound card on IRQ10
-or IRQ5, and the easiest solution is to move the 3c509 to a different
-interrupt line. If the device is receiving packets but 'ping' doesn't work,
-you have a routing problem.
-
-Tx Carrier Errors Reported in /proc/net/dev 
-If an EtherLink III appears to transmit packets, but the "Tx carrier errors"
-field in /proc/net/dev increments as quickly as the Tx packet count, you
-likely have an unterminated network or the incorrect media transceiver selected. 
-
-3c509B card is not detected on machines with an ISA PnP BIOS. 
-While the updated driver works with most PnP BIOS programs, it does not work
-with all. This can be fixed by disabling PnP support using the 3Com-supplied
-setup program. 
-
-3c509 card is not detected on overclocked machines 
-Increase the delay time in id_read_eeprom() from the current value, 500,
-to an absurdly high value, such as 5000. 
-
-
-(4b) Decoding Status and Error Messages
-
-The bits in the main status register are: 
-
-value 	description
-0x01 	Interrupt latch
-0x02 	Tx overrun, or Rx underrun
-0x04 	Tx complete
-0x08 	Tx FIFO room available
-0x10 	A complete Rx packet has arrived
-0x20 	A Rx packet has started to arrive
-0x40 	The driver has requested an interrupt
-0x80 	Statistics counter nearly full
-
-The bits in the transmit (Tx) status word are: 
-
-value 	description
-0x02 	Out-of-window collision.
-0x04 	Status stack overflow (normally impossible).
-0x08 	16 collisions.
-0x10 	Tx underrun (not enough PCI bus bandwidth).
-0x20 	Tx jabber.
-0x40 	Tx interrupt requested.
-0x80 	Status is valid (this should always be set).
-
-
-When a transmit error occurs the driver produces a status message such as 
-
-   eth0: Transmit error, Tx status register 82
-
-The two values typically seen here are:
-
-0x82 
-Out of window collision. This typically occurs when some other Ethernet
-host is incorrectly set to full duplex on a half duplex network. 
-
-0x88 
-16 collisions. This typically occurs when the network is exceptionally busy
-or when another host doesn't correctly back off after a collision. If this
-error is mixed with 0x82 errors it is the result of a host incorrectly set
-to full duplex (see above).
-
-Both of these errors are the result of network problems that should be
-corrected. They do not represent driver malfunction.
-
-
-(5) Revision history (this file)
-
-28Feb02 v1.0  DR   New; major portions based on Becker original 3c509 docs
-
diff --git a/Documentation/networking/device_drivers/index.rst b/Documentation/networking/device_drivers/index.rst
index a191faaf97de..402a9188f446 100644
--- a/Documentation/networking/device_drivers/index.rst
+++ b/Documentation/networking/device_drivers/index.rst
@@ -27,6 +27,7 @@ Contents:
    netronome/nfp
    pensando/ionic
    stmicro/stmmac
+   3com/3c509
 
 .. only::  subproject and html
 
-- 
cgit v1.2.3-59-g8ed1b


From 9ea2af8d16f5612168ed52cb0ec6752bac0877a9 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 1 May 2020 16:44:35 +0200
Subject: docs: networking: device drivers: convert 3com/vortex.txt to ReST

- add SPDX header;
- add a document title;
- mark code blocks and literals as such;
- mark tables as such;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../networking/device_drivers/3com/vortex.rst      | 461 +++++++++++++++++++++
 .../networking/device_drivers/3com/vortex.txt      | 448 --------------------
 Documentation/networking/device_drivers/index.rst  |   1 +
 MAINTAINERS                                        |   2 +-
 drivers/net/ethernet/3com/3c59x.c                  |   4 +-
 drivers/net/ethernet/3com/Kconfig                  |   2 +-
 6 files changed, 466 insertions(+), 452 deletions(-)
 create mode 100644 Documentation/networking/device_drivers/3com/vortex.rst
 delete mode 100644 Documentation/networking/device_drivers/3com/vortex.txt

diff --git a/Documentation/networking/device_drivers/3com/vortex.rst b/Documentation/networking/device_drivers/3com/vortex.rst
new file mode 100644
index 000000000000..800add5be338
--- /dev/null
+++ b/Documentation/networking/device_drivers/3com/vortex.rst
@@ -0,0 +1,461 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========================
+3Com Vortex device driver
+=========================
+
+Documentation/networking/device_drivers/3com/vortex.rst
+
+Andrew Morton
+
+30 April 2000
+
+
+This document describes the usage and errata of the 3Com "Vortex" device
+driver for Linux, 3c59x.c.
+
+The driver was written by Donald Becker <becker@scyld.com>
+
+Don is no longer the prime maintainer of this version of the driver.
+Please report problems to one or more of:
+
+- Andrew Morton
+- Netdev mailing list <netdev@vger.kernel.org>
+- Linux kernel mailing list <linux-kernel@vger.kernel.org>
+
+Please note the 'Reporting and Diagnosing Problems' section at the end
+of this file.
+
+
+Since kernel 2.3.99-pre6, this driver incorporates the support for the
+3c575-series Cardbus cards which used to be handled by 3c575_cb.c.
+
+This driver supports the following hardware:
+
+	- 3c590 Vortex 10Mbps
+	- 3c592 EISA 10Mbps Demon/Vortex
+	- 3c597 EISA Fast Demon/Vortex
+	- 3c595 Vortex 100baseTx
+	- 3c595 Vortex 100baseT4
+	- 3c595 Vortex 100base-MII
+	- 3c900 Boomerang 10baseT
+	- 3c900 Boomerang 10Mbps Combo
+	- 3c900 Cyclone 10Mbps TPO
+	- 3c900 Cyclone 10Mbps Combo
+	- 3c900 Cyclone 10Mbps TPC
+	- 3c900B-FL Cyclone 10base-FL
+	- 3c905 Boomerang 100baseTx
+	- 3c905 Boomerang 100baseT4
+	- 3c905B Cyclone 100baseTx
+	- 3c905B Cyclone 10/100/BNC
+	- 3c905B-FX Cyclone 100baseFx
+	- 3c905C Tornado
+	- 3c920B-EMB-WNM (ATI Radeon 9100 IGP)
+	- 3c980 Cyclone
+	- 3c980C Python-T
+	- 3cSOHO100-TX Hurricane
+	- 3c555 Laptop Hurricane
+	- 3c556 Laptop Tornado
+	- 3c556B Laptop Hurricane
+	- 3c575 [Megahertz] 10/100 LAN  CardBus
+	- 3c575 Boomerang CardBus
+	- 3CCFE575BT Cyclone CardBus
+	- 3CCFE575CT Tornado CardBus
+	- 3CCFE656 Cyclone CardBus
+	- 3CCFEM656B Cyclone+Winmodem CardBus
+	- 3CXFEM656C Tornado+Winmodem CardBus
+	- 3c450 HomePNA Tornado
+	- 3c920 Tornado
+	- 3c982 Hydra Dual Port A
+	- 3c982 Hydra Dual Port B
+	- 3c905B-T4
+	- 3c920B-EMB-WNM Tornado
+
+Module parameters
+=================
+
+There are several parameters which may be provided to the driver when
+its module is loaded.  These are usually placed in ``/etc/modprobe.d/*.conf``
+configuration files.  Example::
+
+    options 3c59x debug=3 rx_copybreak=300
+
+If you are using the PCMCIA tools (cardmgr) then the options may be
+placed in /etc/pcmcia/config.opts::
+
+    module "3c59x" opts "debug=3 rx_copybreak=300"
+
+
+The supported parameters are:
+
+debug=N
+
+  Where N is a number from 0 to 7.  Anything above 3 produces a lot
+  of output in your system logs.  debug=1 is default.
+
+options=N1,N2,N3,...
+
+  Each number in the list provides an option to the corresponding
+  network card.  So if you have two 3c905's and you wish to provide
+  them with option 0x204 you would use::
+
+    options=0x204,0x204
+
+  The individual options are composed of a number of bitfields which
+  have the following meanings:
+
+  Possible media type settings
+
+	==	=================================
+	0	10baseT
+	1	10Mbs AUI
+	2	undefined
+	3	10base2 (BNC)
+	4	100base-TX
+	5	100base-FX
+	6	MII (Media Independent Interface)
+	7	Use default setting from EEPROM
+	8       Autonegotiate
+	9       External MII
+	10      Use default setting from EEPROM
+	==	=================================
+
+  When generating a value for the 'options' setting, the above media
+  selection values may be OR'ed (or added to) the following:
+
+  ======  =============================================
+  0x8000  Set driver debugging level to 7
+  0x4000  Set driver debugging level to 2
+  0x0400  Enable Wake-on-LAN
+  0x0200  Force full duplex mode.
+  0x0010  Bus-master enable bit (Old Vortex cards only)
+  ======  =============================================
+
+  For example::
+
+    insmod 3c59x options=0x204
+
+  will force full-duplex 100base-TX, rather than allowing the usual
+  autonegotiation.
+
+global_options=N
+
+  Sets the ``options`` parameter for all 3c59x NICs in the machine.
+  Entries in the ``options`` array above will override any setting of
+  this.
+
+full_duplex=N1,N2,N3...
+
+  Similar to bit 9 of 'options'.  Forces the corresponding card into
+  full-duplex mode.  Please use this in preference to the ``options``
+  parameter.
+
+  In fact, please don't use this at all! You're better off getting
+  autonegotiation working properly.
+
+global_full_duplex=N1
+
+  Sets full duplex mode for all 3c59x NICs in the machine.  Entries
+  in the ``full_duplex`` array above will override any setting of this.
+
+flow_ctrl=N1,N2,N3...
+
+  Use 802.3x MAC-layer flow control.  The 3com cards only support the
+  PAUSE command, which means that they will stop sending packets for a
+  short period if they receive a PAUSE frame from the link partner.
+
+  The driver only allows flow control on a link which is operating in
+  full duplex mode.
+
+  This feature does not appear to work on the 3c905 - only 3c905B and
+  3c905C have been tested.
+
+  The 3com cards appear to only respond to PAUSE frames which are
+  sent to the reserved destination address of 01:80:c2:00:00:01.  They
+  do not honour PAUSE frames which are sent to the station MAC address.
+
+rx_copybreak=M
+
+  The driver preallocates 32 full-sized (1536 byte) network buffers
+  for receiving.  When a packet arrives, the driver has to decide
+  whether to leave the packet in its full-sized buffer, or to allocate
+  a smaller buffer and copy the packet across into it.
+
+  This is a speed/space tradeoff.
+
+  The value of rx_copybreak is used to decide when to make the copy.
+  If the packet size is less than rx_copybreak, the packet is copied.
+  The default value for rx_copybreak is 200 bytes.
+
+max_interrupt_work=N
+
+  The driver's interrupt service routine can handle many receive and
+  transmit packets in a single invocation.  It does this in a loop.
+  The value of max_interrupt_work governs how many times the interrupt
+  service routine will loop.  The default value is 32 loops.  If this
+  is exceeded the interrupt service routine gives up and generates a
+  warning message "eth0: Too much work in interrupt".
+
+hw_checksums=N1,N2,N3,...
+
+  Recent 3com NICs are able to generate IPv4, TCP and UDP checksums
+  in hardware.  Linux has used the Rx checksumming for a long time.
+  The "zero copy" patch which is planned for the 2.4 kernel series
+  allows you to make use of the NIC's DMA scatter/gather and transmit
+  checksumming as well.
+
+  The driver is set up so that, when the zerocopy patch is applied,
+  all Tornado and Cyclone devices will use S/G and Tx checksums.
+
+  This module parameter has been provided so you can override this
+  decision.  If you think that Tx checksums are causing a problem, you
+  may disable the feature with ``hw_checksums=0``.
+
+  If you think your NIC should be performing Tx checksumming and the
+  driver isn't enabling it, you can force the use of hardware Tx
+  checksumming with ``hw_checksums=1``.
+
+  The driver drops a message in the logfiles to indicate whether or
+  not it is using hardware scatter/gather and hardware Tx checksums.
+
+  Scatter/gather and hardware checksums provide considerable
+  performance improvement for the sendfile() system call, but a small
+  decrease in throughput for send().  There is no effect upon receive
+  efficiency.
+
+compaq_ioaddr=N,
+compaq_irq=N,
+compaq_device_id=N
+
+  "Variables to work-around the Compaq PCI BIOS32 problem"....
+
+watchdog=N
+
+  Sets the time duration (in milliseconds) after which the kernel
+  decides that the transmitter has become stuck and needs to be reset.
+  This is mainly for debugging purposes, although it may be advantageous
+  to increase this value on LANs which have very high collision rates.
+  The default value is 5000 (5.0 seconds).
+
+enable_wol=N1,N2,N3,...
+
+  Enable Wake-on-LAN support for the relevant interface.  Donald
+  Becker's ``ether-wake`` application may be used to wake suspended
+  machines.
+
+  Also enables the NIC's power management support.
+
+global_enable_wol=N
+
+  Sets enable_wol mode for all 3c59x NICs in the machine.  Entries in
+  the ``enable_wol`` array above will override any setting of this.
+
+Media selection
+---------------
+
+A number of the older NICs such as the 3c590 and 3c900 series have
+10base2 and AUI interfaces.
+
+Prior to January, 2001 this driver would autoeselect the 10base2 or AUI
+port if it didn't detect activity on the 10baseT port.  It would then
+get stuck on the 10base2 port and a driver reload was necessary to
+switch back to 10baseT.  This behaviour could not be prevented with a
+module option override.
+
+Later (current) versions of the driver _do_ support locking of the
+media type.  So if you load the driver module with
+
+	modprobe 3c59x options=0
+
+it will permanently select the 10baseT port.  Automatic selection of
+other media types does not occur.
+
+
+Transmit error, Tx status register 82
+-------------------------------------
+
+This is a common error which is almost always caused by another host on
+the same network being in full-duplex mode, while this host is in
+half-duplex mode.  You need to find that other host and make it run in
+half-duplex mode or fix this host to run in full-duplex mode.
+
+As a last resort, you can force the 3c59x driver into full-duplex mode
+with
+
+	options 3c59x full_duplex=1
+
+but this has to be viewed as a workaround for broken network gear and
+should only really be used for equipment which cannot autonegotiate.
+
+
+Additional resources
+--------------------
+
+Details of the device driver implementation are at the top of the source file.
+
+Additional documentation is available at Don Becker's Linux Drivers site:
+
+     http://www.scyld.com/vortex.html
+
+Donald Becker's driver development site:
+
+     http://www.scyld.com/network.html
+
+Donald's vortex-diag program is useful for inspecting the NIC's state:
+
+     http://www.scyld.com/ethercard_diag.html
+
+Donald's mii-diag program may be used for inspecting and manipulating
+the NIC's Media Independent Interface subsystem:
+
+     http://www.scyld.com/ethercard_diag.html#mii-diag
+
+Donald's wake-on-LAN page:
+
+     http://www.scyld.com/wakeonlan.html
+
+3Com's DOS-based application for setting up the NICs EEPROMs:
+
+	ftp://ftp.3com.com/pub/nic/3c90x/3c90xx2.exe
+
+
+Autonegotiation notes
+---------------------
+
+  The driver uses a one-minute heartbeat for adapting to changes in
+  the external LAN environment if link is up and 5 seconds if link is down.
+  This means that when, for example, a machine is unplugged from a hubbed
+  10baseT LAN plugged into a  switched 100baseT LAN, the throughput
+  will be quite dreadful for up to sixty seconds.  Be patient.
+
+  Cisco interoperability note from Walter Wong <wcw+@CMU.EDU>:
+
+  On a side note, adding HAS_NWAY seems to share a problem with the
+  Cisco 6509 switch.  Specifically, you need to change the spanning
+  tree parameter for the port the machine is plugged into to 'portfast'
+  mode.  Otherwise, the negotiation fails.  This has been an issue
+  we've noticed for a while but haven't had the time to track down.
+
+  Cisco switches    (Jeff Busch <jbusch@deja.com>)
+
+    My "standard config" for ports to which PC's/servers connect directly::
+
+	interface FastEthernet0/N
+	description machinename
+	load-interval 30
+	spanning-tree portfast
+
+    If autonegotiation is a problem, you may need to specify "speed
+    100" and "duplex full" as well (or "speed 10" and "duplex half").
+
+    WARNING: DO NOT hook up hubs/switches/bridges to these
+    specially-configured ports! The switch will become very confused.
+
+
+Reporting and diagnosing problems
+---------------------------------
+
+Maintainers find that accurate and complete problem reports are
+invaluable in resolving driver problems.  We are frequently not able to
+reproduce problems and must rely on your patience and efforts to get to
+the bottom of the problem.
+
+If you believe you have a driver problem here are some of the
+steps you should take:
+
+- Is it really a driver problem?
+
+   Eliminate some variables: try different cards, different
+   computers, different cables, different ports on the switch/hub,
+   different versions of the kernel or of the driver, etc.
+
+- OK, it's a driver problem.
+
+   You need to generate a report.  Typically this is an email to the
+   maintainer and/or netdev@vger.kernel.org.  The maintainer's
+   email address will be in the driver source or in the MAINTAINERS file.
+
+- The contents of your report will vary a lot depending upon the
+  problem.  If it's a kernel crash then you should refer to the
+  admin-guide/reporting-bugs.rst file.
+
+  But for most problems it is useful to provide the following:
+
+   - Kernel version, driver version
+
+   - A copy of the banner message which the driver generates when
+     it is initialised.  For example:
+
+     eth0: 3Com PCI 3c905C Tornado at 0xa400,  00:50:da:6a:88:f0, IRQ 19
+     8K byte-wide RAM 5:3 Rx:Tx split, autoselect/Autonegotiate interface.
+     MII transceiver found at address 24, status 782d.
+     Enabling bus-master transmits and whole-frame receives.
+
+     NOTE: You must provide the ``debug=2`` modprobe option to generate
+     a full detection message.  Please do this::
+
+	modprobe 3c59x debug=2
+
+   - If it is a PCI device, the relevant output from 'lspci -vx', eg::
+
+       00:09.0 Ethernet controller: 3Com Corporation 3c905C-TX [Fast Etherlink] (rev 74)
+	       Subsystem: 3Com Corporation: Unknown device 9200
+	       Flags: bus master, medium devsel, latency 32, IRQ 19
+	       I/O ports at a400 [size=128]
+	       Memory at db000000 (32-bit, non-prefetchable) [size=128]
+	       Expansion ROM at <unassigned> [disabled] [size=128K]
+	       Capabilities: [dc] Power Management version 2
+       00: b7 10 00 92 07 00 10 02 74 00 00 02 08 20 00 00
+       10: 01 a4 00 00 00 00 00 db 00 00 00 00 00 00 00 00
+       20: 00 00 00 00 00 00 00 00 00 00 00 00 b7 10 00 10
+       30: 00 00 00 00 dc 00 00 00 00 00 00 00 05 01 0a 0a
+
+   - A description of the environment: 10baseT? 100baseT?
+     full/half duplex? switched or hubbed?
+
+   - Any additional module parameters which you may be providing to the driver.
+
+   - Any kernel logs which are produced.  The more the merrier.
+     If this is a large file and you are sending your report to a
+     mailing list, mention that you have the logfile, but don't send
+     it.  If you're reporting direct to the maintainer then just send
+     it.
+
+     To ensure that all kernel logs are available, add the
+     following line to /etc/syslog.conf::
+
+	 kern.* /var/log/messages
+
+     Then restart syslogd with::
+
+	 /etc/rc.d/init.d/syslog restart
+
+     (The above may vary, depending upon which Linux distribution you use).
+
+    - If your problem is reproducible then that's great.  Try the
+      following:
+
+      1) Increase the debug level.  Usually this is done via:
+
+	 a) modprobe driver debug=7
+	 b) In /etc/modprobe.d/driver.conf:
+	    options driver debug=7
+
+      2) Recreate the problem with the higher debug level,
+	 send all logs to the maintainer.
+
+      3) Download you card's diagnostic tool from Donald
+	 Becker's website <http://www.scyld.com/ethercard_diag.html>.
+	 Download mii-diag.c as well.  Build these.
+
+	 a) Run 'vortex-diag -aaee' and 'mii-diag -v' when the card is
+	    working correctly.  Save the output.
+
+	 b) Run the above commands when the card is malfunctioning.  Send
+	    both sets of output.
+
+Finally, please be patient and be prepared to do some work.  You may
+end up working on this problem for a week or more as the maintainer
+asks more questions, asks for more tests, asks for patches to be
+applied, etc.  At the end of it all, the problem may even remain
+unresolved.
diff --git a/Documentation/networking/device_drivers/3com/vortex.txt b/Documentation/networking/device_drivers/3com/vortex.txt
deleted file mode 100644
index 587f3fcfbcae..000000000000
--- a/Documentation/networking/device_drivers/3com/vortex.txt
+++ /dev/null
@@ -1,448 +0,0 @@
-Documentation/networking/device_drivers/3com/vortex.txt
-Andrew Morton
-30 April 2000
-
-
-This document describes the usage and errata of the 3Com "Vortex" device
-driver for Linux, 3c59x.c.
-
-The driver was written by Donald Becker <becker@scyld.com>
-
-Don is no longer the prime maintainer of this version of the driver. 
-Please report problems to one or more of:
-
-  Andrew Morton
-  Netdev mailing list <netdev@vger.kernel.org>
-  Linux kernel mailing list <linux-kernel@vger.kernel.org>
-
-Please note the 'Reporting and Diagnosing Problems' section at the end
-of this file.
-
-
-Since kernel 2.3.99-pre6, this driver incorporates the support for the
-3c575-series Cardbus cards which used to be handled by 3c575_cb.c.
-
-This driver supports the following hardware:
-
-	3c590 Vortex 10Mbps
-	3c592 EISA 10Mbps Demon/Vortex
-	3c597 EISA Fast Demon/Vortex
-	3c595 Vortex 100baseTx
-	3c595 Vortex 100baseT4
-	3c595 Vortex 100base-MII
-	3c900 Boomerang 10baseT
-	3c900 Boomerang 10Mbps Combo
-	3c900 Cyclone 10Mbps TPO
-	3c900 Cyclone 10Mbps Combo
-	3c900 Cyclone 10Mbps TPC
-	3c900B-FL Cyclone 10base-FL
-	3c905 Boomerang 100baseTx
-	3c905 Boomerang 100baseT4
-	3c905B Cyclone 100baseTx
-	3c905B Cyclone 10/100/BNC
-	3c905B-FX Cyclone 100baseFx
-	3c905C Tornado
-	3c920B-EMB-WNM (ATI Radeon 9100 IGP)
-	3c980 Cyclone
-	3c980C Python-T
-	3cSOHO100-TX Hurricane
-	3c555 Laptop Hurricane
-	3c556 Laptop Tornado
-	3c556B Laptop Hurricane
-	3c575 [Megahertz] 10/100 LAN  CardBus
-	3c575 Boomerang CardBus
-	3CCFE575BT Cyclone CardBus
-	3CCFE575CT Tornado CardBus
-	3CCFE656 Cyclone CardBus
-	3CCFEM656B Cyclone+Winmodem CardBus
-	3CXFEM656C Tornado+Winmodem CardBus
-	3c450 HomePNA Tornado
-	3c920 Tornado
-	3c982 Hydra Dual Port A
-	3c982 Hydra Dual Port B
-	3c905B-T4
-	3c920B-EMB-WNM Tornado
-
-Module parameters
-=================
-
-There are several parameters which may be provided to the driver when
-its module is loaded.  These are usually placed in /etc/modprobe.d/*.conf
-configuration files.  Example:
-
-options 3c59x debug=3 rx_copybreak=300
-
-If you are using the PCMCIA tools (cardmgr) then the options may be
-placed in /etc/pcmcia/config.opts:
-
-module "3c59x" opts "debug=3 rx_copybreak=300"
-
-
-The supported parameters are:
-
-debug=N
-
-  Where N is a number from 0 to 7.  Anything above 3 produces a lot
-  of output in your system logs.  debug=1 is default.
-
-options=N1,N2,N3,...
-
-  Each number in the list provides an option to the corresponding
-  network card.  So if you have two 3c905's and you wish to provide
-  them with option 0x204 you would use:
-
-    options=0x204,0x204
-
-  The individual options are composed of a number of bitfields which
-  have the following meanings:
-
-  Possible media type settings
-	0	10baseT
-	1	10Mbs AUI
-	2	undefined
-	3	10base2 (BNC)
-	4	100base-TX
-	5	100base-FX
-	6	MII (Media Independent Interface)
-	7	Use default setting from EEPROM
-	8       Autonegotiate
-	9       External MII
-	10      Use default setting from EEPROM
-
-  When generating a value for the 'options' setting, the above media
-  selection values may be OR'ed (or added to) the following:
-
-  0x8000  Set driver debugging level to 7
-  0x4000  Set driver debugging level to 2
-  0x0400  Enable Wake-on-LAN
-  0x0200  Force full duplex mode.
-  0x0010  Bus-master enable bit (Old Vortex cards only)
-
-  For example:
-
-    insmod 3c59x options=0x204
-
-  will force full-duplex 100base-TX, rather than allowing the usual
-  autonegotiation.
-
-global_options=N
-
-  Sets the `options' parameter for all 3c59x NICs in the machine. 
-  Entries in the `options' array above will override any setting of
-  this.
-
-full_duplex=N1,N2,N3...
-
-  Similar to bit 9 of 'options'.  Forces the corresponding card into
-  full-duplex mode.  Please use this in preference to the `options'
-  parameter.
-
-  In fact, please don't use this at all! You're better off getting
-  autonegotiation working properly.
-
-global_full_duplex=N1
-
-  Sets full duplex mode for all 3c59x NICs in the machine.  Entries
-  in the `full_duplex' array above will override any setting of this.
-
-flow_ctrl=N1,N2,N3...
-
-  Use 802.3x MAC-layer flow control.  The 3com cards only support the
-  PAUSE command, which means that they will stop sending packets for a
-  short period if they receive a PAUSE frame from the link partner. 
-
-  The driver only allows flow control on a link which is operating in
-  full duplex mode.
-
-  This feature does not appear to work on the 3c905 - only 3c905B and
-  3c905C have been tested.
-
-  The 3com cards appear to only respond to PAUSE frames which are
-  sent to the reserved destination address of 01:80:c2:00:00:01.  They
-  do not honour PAUSE frames which are sent to the station MAC address.
-
-rx_copybreak=M
-
-  The driver preallocates 32 full-sized (1536 byte) network buffers
-  for receiving.  When a packet arrives, the driver has to decide
-  whether to leave the packet in its full-sized buffer, or to allocate
-  a smaller buffer and copy the packet across into it.
-
-  This is a speed/space tradeoff.
-
-  The value of rx_copybreak is used to decide when to make the copy. 
-  If the packet size is less than rx_copybreak, the packet is copied. 
-  The default value for rx_copybreak is 200 bytes.
-
-max_interrupt_work=N
-
-  The driver's interrupt service routine can handle many receive and
-  transmit packets in a single invocation.  It does this in a loop. 
-  The value of max_interrupt_work governs how many times the interrupt
-  service routine will loop.  The default value is 32 loops.  If this
-  is exceeded the interrupt service routine gives up and generates a
-  warning message "eth0: Too much work in interrupt".
-
-hw_checksums=N1,N2,N3,...
-
-  Recent 3com NICs are able to generate IPv4, TCP and UDP checksums
-  in hardware.  Linux has used the Rx checksumming for a long time. 
-  The "zero copy" patch which is planned for the 2.4 kernel series
-  allows you to make use of the NIC's DMA scatter/gather and transmit
-  checksumming as well.
-
-  The driver is set up so that, when the zerocopy patch is applied,
-  all Tornado and Cyclone devices will use S/G and Tx checksums.
-
-  This module parameter has been provided so you can override this
-  decision.  If you think that Tx checksums are causing a problem, you
-  may disable the feature with `hw_checksums=0'.
-
-  If you think your NIC should be performing Tx checksumming and the
-  driver isn't enabling it, you can force the use of hardware Tx
-  checksumming with `hw_checksums=1'.
-
-  The driver drops a message in the logfiles to indicate whether or
-  not it is using hardware scatter/gather and hardware Tx checksums.
-
-  Scatter/gather and hardware checksums provide considerable
-  performance improvement for the sendfile() system call, but a small
-  decrease in throughput for send().  There is no effect upon receive
-  efficiency.
-
-compaq_ioaddr=N
-compaq_irq=N
-compaq_device_id=N
-
-  "Variables to work-around the Compaq PCI BIOS32 problem"....
-
-watchdog=N
-
-  Sets the time duration (in milliseconds) after which the kernel
-  decides that the transmitter has become stuck and needs to be reset. 
-  This is mainly for debugging purposes, although it may be advantageous
-  to increase this value on LANs which have very high collision rates.
-  The default value is 5000 (5.0 seconds).
-
-enable_wol=N1,N2,N3,...
-
-  Enable Wake-on-LAN support for the relevant interface.  Donald
-  Becker's `ether-wake' application may be used to wake suspended
-  machines.
-
-  Also enables the NIC's power management support.
-
-global_enable_wol=N
-
-  Sets enable_wol mode for all 3c59x NICs in the machine.  Entries in
-  the `enable_wol' array above will override any setting of this.
-
-Media selection
----------------
-
-A number of the older NICs such as the 3c590 and 3c900 series have
-10base2 and AUI interfaces.
-
-Prior to January, 2001 this driver would autoeselect the 10base2 or AUI
-port if it didn't detect activity on the 10baseT port.  It would then
-get stuck on the 10base2 port and a driver reload was necessary to
-switch back to 10baseT.  This behaviour could not be prevented with a
-module option override.
-
-Later (current) versions of the driver _do_ support locking of the
-media type.  So if you load the driver module with
-
-	modprobe 3c59x options=0
-
-it will permanently select the 10baseT port.  Automatic selection of
-other media types does not occur.
-
-
-Transmit error, Tx status register 82
--------------------------------------
-
-This is a common error which is almost always caused by another host on
-the same network being in full-duplex mode, while this host is in
-half-duplex mode.  You need to find that other host and make it run in
-half-duplex mode or fix this host to run in full-duplex mode.
-
-As a last resort, you can force the 3c59x driver into full-duplex mode
-with
-
-	options 3c59x full_duplex=1
-
-but this has to be viewed as a workaround for broken network gear and
-should only really be used for equipment which cannot autonegotiate.
-
-
-Additional resources
---------------------
-
-Details of the device driver implementation are at the top of the source file.
-
-Additional documentation is available at Don Becker's Linux Drivers site:
-
-     http://www.scyld.com/vortex.html
-
-Donald Becker's driver development site:
-
-     http://www.scyld.com/network.html
-
-Donald's vortex-diag program is useful for inspecting the NIC's state:
-
-     http://www.scyld.com/ethercard_diag.html
-
-Donald's mii-diag program may be used for inspecting and manipulating
-the NIC's Media Independent Interface subsystem:
-
-     http://www.scyld.com/ethercard_diag.html#mii-diag
-
-Donald's wake-on-LAN page:
-
-     http://www.scyld.com/wakeonlan.html
-
-3Com's DOS-based application for setting up the NICs EEPROMs:
-
-	ftp://ftp.3com.com/pub/nic/3c90x/3c90xx2.exe
-
-
-Autonegotiation notes
----------------------
-
-  The driver uses a one-minute heartbeat for adapting to changes in
-  the external LAN environment if link is up and 5 seconds if link is down.
-  This means that when, for example, a machine is unplugged from a hubbed
-  10baseT LAN plugged into a  switched 100baseT LAN, the throughput
-  will be quite dreadful for up to sixty seconds.  Be patient.
-
-  Cisco interoperability note from Walter Wong <wcw+@CMU.EDU>:
-
-  On a side note, adding HAS_NWAY seems to share a problem with the
-  Cisco 6509 switch.  Specifically, you need to change the spanning
-  tree parameter for the port the machine is plugged into to 'portfast'
-  mode.  Otherwise, the negotiation fails.  This has been an issue
-  we've noticed for a while but haven't had the time to track down.
-
-  Cisco switches    (Jeff Busch <jbusch@deja.com>)
-
-    My "standard config" for ports to which PC's/servers connect directly:
-
-        interface FastEthernet0/N
-        description machinename
-        load-interval 30
-        spanning-tree portfast
-
-    If autonegotiation is a problem, you may need to specify "speed
-    100" and "duplex full" as well (or "speed 10" and "duplex half").
-
-    WARNING: DO NOT hook up hubs/switches/bridges to these
-    specially-configured ports! The switch will become very confused.
-
-
-Reporting and diagnosing problems
----------------------------------
-
-Maintainers find that accurate and complete problem reports are
-invaluable in resolving driver problems.  We are frequently not able to
-reproduce problems and must rely on your patience and efforts to get to
-the bottom of the problem.
-
-If you believe you have a driver problem here are some of the
-steps you should take:
-
-- Is it really a driver problem?
-
-   Eliminate some variables: try different cards, different
-   computers, different cables, different ports on the switch/hub,
-   different versions of the kernel or of the driver, etc.
-
-- OK, it's a driver problem.
-
-   You need to generate a report.  Typically this is an email to the
-   maintainer and/or netdev@vger.kernel.org.  The maintainer's
-   email address will be in the driver source or in the MAINTAINERS file.
-
-- The contents of your report will vary a lot depending upon the
-  problem.  If it's a kernel crash then you should refer to the
-  admin-guide/reporting-bugs.rst file.
-
-  But for most problems it is useful to provide the following:
-
-   o Kernel version, driver version
-
-   o A copy of the banner message which the driver generates when
-     it is initialised.  For example:
-
-     eth0: 3Com PCI 3c905C Tornado at 0xa400,  00:50:da:6a:88:f0, IRQ 19
-     8K byte-wide RAM 5:3 Rx:Tx split, autoselect/Autonegotiate interface.
-     MII transceiver found at address 24, status 782d.
-     Enabling bus-master transmits and whole-frame receives.
-
-     NOTE: You must provide the `debug=2' modprobe option to generate
-     a full detection message.  Please do this:
-
-	modprobe 3c59x debug=2
-
-   o If it is a PCI device, the relevant output from 'lspci -vx', eg:
-
-     00:09.0 Ethernet controller: 3Com Corporation 3c905C-TX [Fast Etherlink] (rev 74)
-             Subsystem: 3Com Corporation: Unknown device 9200
-             Flags: bus master, medium devsel, latency 32, IRQ 19
-             I/O ports at a400 [size=128]
-             Memory at db000000 (32-bit, non-prefetchable) [size=128]
-             Expansion ROM at <unassigned> [disabled] [size=128K]
-             Capabilities: [dc] Power Management version 2
-     00: b7 10 00 92 07 00 10 02 74 00 00 02 08 20 00 00
-     10: 01 a4 00 00 00 00 00 db 00 00 00 00 00 00 00 00
-     20: 00 00 00 00 00 00 00 00 00 00 00 00 b7 10 00 10
-     30: 00 00 00 00 dc 00 00 00 00 00 00 00 05 01 0a 0a
-
-   o A description of the environment: 10baseT? 100baseT?
-     full/half duplex? switched or hubbed?
-
-   o Any additional module parameters which you may be providing to the driver.
-
-   o Any kernel logs which are produced.  The more the merrier. 
-     If this is a large file and you are sending your report to a
-     mailing list, mention that you have the logfile, but don't send
-     it.  If you're reporting direct to the maintainer then just send
-     it.
-
-     To ensure that all kernel logs are available, add the
-     following line to /etc/syslog.conf:
-
-         kern.* /var/log/messages
-
-     Then restart syslogd with:
-
-         /etc/rc.d/init.d/syslog restart
-
-     (The above may vary, depending upon which Linux distribution you use).
-
-    o If your problem is reproducible then that's great.  Try the
-      following:
-
-      1) Increase the debug level.  Usually this is done via:
-
-         a) modprobe driver debug=7
-         b) In /etc/modprobe.d/driver.conf:
-            options driver debug=7
-
-      2) Recreate the problem with the higher debug level,
-         send all logs to the maintainer.
-
-      3) Download you card's diagnostic tool from Donald
-         Becker's website <http://www.scyld.com/ethercard_diag.html>.
-         Download mii-diag.c as well.  Build these.
-
-         a) Run 'vortex-diag -aaee' and 'mii-diag -v' when the card is
-            working correctly.  Save the output.
-
-         b) Run the above commands when the card is malfunctioning.  Send
-            both sets of output.
-
-Finally, please be patient and be prepared to do some work.  You may
-end up working on this problem for a week or more as the maintainer
-asks more questions, asks for more tests, asks for patches to be
-applied, etc.  At the end of it all, the problem may even remain
-unresolved.
diff --git a/Documentation/networking/device_drivers/index.rst b/Documentation/networking/device_drivers/index.rst
index 402a9188f446..aaac502b81ea 100644
--- a/Documentation/networking/device_drivers/index.rst
+++ b/Documentation/networking/device_drivers/index.rst
@@ -28,6 +28,7 @@ Contents:
    pensando/ionic
    stmicro/stmmac
    3com/3c509
+   3com/vortex
 
 .. only::  subproject and html
 
diff --git a/MAINTAINERS b/MAINTAINERS
index bee65ebdc67e..eaea5f1994c9 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -147,7 +147,7 @@ Maintainers List
 M:	Steffen Klassert <klassert@kernel.org>
 L:	netdev@vger.kernel.org
 S:	Odd Fixes
-F:	Documentation/networking/device_drivers/3com/vortex.txt
+F:	Documentation/networking/device_drivers/3com/vortex.rst
 F:	drivers/net/ethernet/3com/3c59x.c
 
 3CR990 NETWORK DRIVER
diff --git a/drivers/net/ethernet/3com/3c59x.c b/drivers/net/ethernet/3com/3c59x.c
index a2b7f7ab8170..5984b7033999 100644
--- a/drivers/net/ethernet/3com/3c59x.c
+++ b/drivers/net/ethernet/3com/3c59x.c
@@ -1149,7 +1149,7 @@ static int vortex_probe1(struct device *gendev, void __iomem *ioaddr, int irq,
 
 	print_info = (vortex_debug > 1);
 	if (print_info)
-		pr_info("See Documentation/networking/device_drivers/3com/vortex.txt\n");
+		pr_info("See Documentation/networking/device_drivers/3com/vortex.rst\n");
 
 	pr_info("%s: 3Com %s %s at %p.\n",
 	       print_name,
@@ -1954,7 +1954,7 @@ vortex_error(struct net_device *dev, int status)
 				   dev->name, tx_status);
 			if (tx_status == 0x82) {
 				pr_err("Probably a duplex mismatch.  See "
-						"Documentation/networking/device_drivers/3com/vortex.txt\n");
+						"Documentation/networking/device_drivers/3com/vortex.rst\n");
 			}
 			dump_tx_ring(dev);
 		}
diff --git a/drivers/net/ethernet/3com/Kconfig b/drivers/net/ethernet/3com/Kconfig
index 3a6fc99c6f32..7cc259893cb9 100644
--- a/drivers/net/ethernet/3com/Kconfig
+++ b/drivers/net/ethernet/3com/Kconfig
@@ -76,7 +76,7 @@ config VORTEX
 	  "Hurricane" (3c555/3cSOHO)                           PCI
 
 	  If you have such a card, say Y here.  More specific information is in
-	  <file:Documentation/networking/device_drivers/3com/vortex.txt> and
+	  <file:Documentation/networking/device_drivers/3com/vortex.rst> and
 	  in the comments at the beginning of
 	  <file:drivers/net/ethernet/3com/3c59x.c>.
 
-- 
cgit v1.2.3-59-g8ed1b


From 8d299c7e912bd8ebb88b9ac2b8e336c9878783aa Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 1 May 2020 16:44:36 +0200
Subject: docs: networking: device drivers: convert amazon/ena.txt to ReST

- add SPDX header;
- adjust titles and chapters, adding proper markups;
- mark code blocks and literals as such;
- mark tables as such;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../networking/device_drivers/amazon/ena.rst       | 344 +++++++++++++++++++++
 .../networking/device_drivers/amazon/ena.txt       | 308 ------------------
 Documentation/networking/device_drivers/index.rst  |   1 +
 MAINTAINERS                                        |   2 +-
 4 files changed, 346 insertions(+), 309 deletions(-)
 create mode 100644 Documentation/networking/device_drivers/amazon/ena.rst
 delete mode 100644 Documentation/networking/device_drivers/amazon/ena.txt

diff --git a/Documentation/networking/device_drivers/amazon/ena.rst b/Documentation/networking/device_drivers/amazon/ena.rst
new file mode 100644
index 000000000000..11af6388ea87
--- /dev/null
+++ b/Documentation/networking/device_drivers/amazon/ena.rst
@@ -0,0 +1,344 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============================================================
+Linux kernel driver for Elastic Network Adapter (ENA) family
+============================================================
+
+Overview
+========
+
+ENA is a networking interface designed to make good use of modern CPU
+features and system architectures.
+
+The ENA device exposes a lightweight management interface with a
+minimal set of memory mapped registers and extendable command set
+through an Admin Queue.
+
+The driver supports a range of ENA devices, is link-speed independent
+(i.e., the same driver is used for 10GbE, 25GbE, 40GbE, etc.), and has
+a negotiated and extendable feature set.
+
+Some ENA devices support SR-IOV. This driver is used for both the
+SR-IOV Physical Function (PF) and Virtual Function (VF) devices.
+
+ENA devices enable high speed and low overhead network traffic
+processing by providing multiple Tx/Rx queue pairs (the maximum number
+is advertised by the device via the Admin Queue), a dedicated MSI-X
+interrupt vector per Tx/Rx queue pair, adaptive interrupt moderation,
+and CPU cacheline optimized data placement.
+
+The ENA driver supports industry standard TCP/IP offload features such
+as checksum offload and TCP transmit segmentation offload (TSO).
+Receive-side scaling (RSS) is supported for multi-core scaling.
+
+The ENA driver and its corresponding devices implement health
+monitoring mechanisms such as watchdog, enabling the device and driver
+to recover in a manner transparent to the application, as well as
+debug logs.
+
+Some of the ENA devices support a working mode called Low-latency
+Queue (LLQ), which saves several more microseconds.
+
+Supported PCI vendor ID/device IDs
+==================================
+
+=========   =======================
+1d0f:0ec2   ENA PF
+1d0f:1ec2   ENA PF with LLQ support
+1d0f:ec20   ENA VF
+1d0f:ec21   ENA VF with LLQ support
+=========   =======================
+
+ENA Source Code Directory Structure
+===================================
+
+=================   ======================================================
+ena_com.[ch]        Management communication layer. This layer is
+		    responsible for the handling all the management
+		    (admin) communication between the device and the
+		    driver.
+ena_eth_com.[ch]    Tx/Rx data path.
+ena_admin_defs.h    Definition of ENA management interface.
+ena_eth_io_defs.h   Definition of ENA data path interface.
+ena_common_defs.h   Common definitions for ena_com layer.
+ena_regs_defs.h     Definition of ENA PCI memory-mapped (MMIO) registers.
+ena_netdev.[ch]     Main Linux kernel driver.
+ena_syfsfs.[ch]     Sysfs files.
+ena_ethtool.c       ethtool callbacks.
+ena_pci_id_tbl.h    Supported device IDs.
+=================   ======================================================
+
+Management Interface:
+=====================
+
+ENA management interface is exposed by means of:
+
+- PCIe Configuration Space
+- Device Registers
+- Admin Queue (AQ) and Admin Completion Queue (ACQ)
+- Asynchronous Event Notification Queue (AENQ)
+
+ENA device MMIO Registers are accessed only during driver
+initialization and are not involved in further normal device
+operation.
+
+AQ is used for submitting management commands, and the
+results/responses are reported asynchronously through ACQ.
+
+ENA introduces a small set of management commands with room for
+vendor-specific extensions. Most of the management operations are
+framed in a generic Get/Set feature command.
+
+The following admin queue commands are supported:
+
+- Create I/O submission queue
+- Create I/O completion queue
+- Destroy I/O submission queue
+- Destroy I/O completion queue
+- Get feature
+- Set feature
+- Configure AENQ
+- Get statistics
+
+Refer to ena_admin_defs.h for the list of supported Get/Set Feature
+properties.
+
+The Asynchronous Event Notification Queue (AENQ) is a uni-directional
+queue used by the ENA device to send to the driver events that cannot
+be reported using ACQ. AENQ events are subdivided into groups. Each
+group may have multiple syndromes, as shown below
+
+The events are:
+
+	====================	===============
+	Group			Syndrome
+	====================	===============
+	Link state change	**X**
+	Fatal error		**X**
+	Notification		Suspend traffic
+	Notification		Resume traffic
+	Keep-Alive		**X**
+	====================	===============
+
+ACQ and AENQ share the same MSI-X vector.
+
+Keep-Alive is a special mechanism that allows monitoring of the
+device's health. The driver maintains a watchdog (WD) handler which,
+if fired, logs the current state and statistics then resets and
+restarts the ENA device and driver. A Keep-Alive event is delivered by
+the device every second. The driver re-arms the WD upon reception of a
+Keep-Alive event. A missed Keep-Alive event causes the WD handler to
+fire.
+
+Data Path Interface
+===================
+I/O operations are based on Tx and Rx Submission Queues (Tx SQ and Rx
+SQ correspondingly). Each SQ has a completion queue (CQ) associated
+with it.
+
+The SQs and CQs are implemented as descriptor rings in contiguous
+physical memory.
+
+The ENA driver supports two Queue Operation modes for Tx SQs:
+
+- Regular mode
+
+  * In this mode the Tx SQs reside in the host's memory. The ENA
+    device fetches the ENA Tx descriptors and packet data from host
+    memory.
+
+- Low Latency Queue (LLQ) mode or "push-mode".
+
+  * In this mode the driver pushes the transmit descriptors and the
+    first 128 bytes of the packet directly to the ENA device memory
+    space. The rest of the packet payload is fetched by the
+    device. For this operation mode, the driver uses a dedicated PCI
+    device memory BAR, which is mapped with write-combine capability.
+
+The Rx SQs support only the regular mode.
+
+Note: Not all ENA devices support LLQ, and this feature is negotiated
+      with the device upon initialization. If the ENA device does not
+      support LLQ mode, the driver falls back to the regular mode.
+
+The driver supports multi-queue for both Tx and Rx. This has various
+benefits:
+
+- Reduced CPU/thread/process contention on a given Ethernet interface.
+- Cache miss rate on completion is reduced, particularly for data
+  cache lines that hold the sk_buff structures.
+- Increased process-level parallelism when handling received packets.
+- Increased data cache hit rate, by steering kernel processing of
+  packets to the CPU, where the application thread consuming the
+  packet is running.
+- In hardware interrupt re-direction.
+
+Interrupt Modes
+===============
+The driver assigns a single MSI-X vector per queue pair (for both Tx
+and Rx directions). The driver assigns an additional dedicated MSI-X vector
+for management (for ACQ and AENQ).
+
+Management interrupt registration is performed when the Linux kernel
+probes the adapter, and it is de-registered when the adapter is
+removed. I/O queue interrupt registration is performed when the Linux
+interface of the adapter is opened, and it is de-registered when the
+interface is closed.
+
+The management interrupt is named::
+
+   ena-mgmnt@pci:<PCI domain:bus:slot.function>
+
+and for each queue pair, an interrupt is named::
+
+   <interface name>-Tx-Rx-<queue index>
+
+The ENA device operates in auto-mask and auto-clear interrupt
+modes. That is, once MSI-X is delivered to the host, its Cause bit is
+automatically cleared and the interrupt is masked. The interrupt is
+unmasked by the driver after NAPI processing is complete.
+
+Interrupt Moderation
+====================
+ENA driver and device can operate in conventional or adaptive interrupt
+moderation mode.
+
+In conventional mode the driver instructs device to postpone interrupt
+posting according to static interrupt delay value. The interrupt delay
+value can be configured through ethtool(8). The following ethtool
+parameters are supported by the driver: tx-usecs, rx-usecs
+
+In adaptive interrupt moderation mode the interrupt delay value is
+updated by the driver dynamically and adjusted every NAPI cycle
+according to the traffic nature.
+
+By default ENA driver applies adaptive coalescing on Rx traffic and
+conventional coalescing on Tx traffic.
+
+Adaptive coalescing can be switched on/off through ethtool(8)
+adaptive_rx on|off parameter.
+
+The driver chooses interrupt delay value according to the number of
+bytes and packets received between interrupt unmasking and interrupt
+posting. The driver uses interrupt delay table that subdivides the
+range of received bytes/packets into 5 levels and assigns interrupt
+delay value to each level.
+
+The user can enable/disable adaptive moderation, modify the interrupt
+delay table and restore its default values through sysfs.
+
+RX copybreak
+============
+The rx_copybreak is initialized by default to ENA_DEFAULT_RX_COPYBREAK
+and can be configured by the ETHTOOL_STUNABLE command of the
+SIOCETHTOOL ioctl.
+
+SKB
+===
+The driver-allocated SKB for frames received from Rx handling using
+NAPI context. The allocation method depends on the size of the packet.
+If the frame length is larger than rx_copybreak, napi_get_frags()
+is used, otherwise netdev_alloc_skb_ip_align() is used, the buffer
+content is copied (by CPU) to the SKB, and the buffer is recycled.
+
+Statistics
+==========
+The user can obtain ENA device and driver statistics using ethtool.
+The driver can collect regular or extended statistics (including
+per-queue stats) from the device.
+
+In addition the driver logs the stats to syslog upon device reset.
+
+MTU
+===
+The driver supports an arbitrarily large MTU with a maximum that is
+negotiated with the device. The driver configures MTU using the
+SetFeature command (ENA_ADMIN_MTU property). The user can change MTU
+via ip(8) and similar legacy tools.
+
+Stateless Offloads
+==================
+The ENA driver supports:
+
+- TSO over IPv4/IPv6
+- TSO with ECN
+- IPv4 header checksum offload
+- TCP/UDP over IPv4/IPv6 checksum offloads
+
+RSS
+===
+- The ENA device supports RSS that allows flexible Rx traffic
+  steering.
+- Toeplitz and CRC32 hash functions are supported.
+- Different combinations of L2/L3/L4 fields can be configured as
+  inputs for hash functions.
+- The driver configures RSS settings using the AQ SetFeature command
+  (ENA_ADMIN_RSS_HASH_FUNCTION, ENA_ADMIN_RSS_HASH_INPUT and
+  ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG properties).
+- If the NETIF_F_RXHASH flag is set, the 32-bit result of the hash
+  function delivered in the Rx CQ descriptor is set in the received
+  SKB.
+- The user can provide a hash key, hash function, and configure the
+  indirection table through ethtool(8).
+
+DATA PATH
+=========
+Tx
+--
+
+end_start_xmit() is called by the stack. This function does the following:
+
+- Maps data buffers (skb->data and frags).
+- Populates ena_buf for the push buffer (if the driver and device are
+  in push mode.)
+- Prepares ENA bufs for the remaining frags.
+- Allocates a new request ID from the empty req_id ring. The request
+  ID is the index of the packet in the Tx info. This is used for
+  out-of-order TX completions.
+- Adds the packet to the proper place in the Tx ring.
+- Calls ena_com_prepare_tx(), an ENA communication layer that converts
+  the ena_bufs to ENA descriptors (and adds meta ENA descriptors as
+  needed.)
+
+  * This function also copies the ENA descriptors and the push buffer
+    to the Device memory space (if in push mode.)
+
+- Writes doorbell to the ENA device.
+- When the ENA device finishes sending the packet, a completion
+  interrupt is raised.
+- The interrupt handler schedules NAPI.
+- The ena_clean_tx_irq() function is called. This function handles the
+  completion descriptors generated by the ENA, with a single
+  completion descriptor per completed packet.
+
+  * req_id is retrieved from the completion descriptor. The tx_info of
+    the packet is retrieved via the req_id. The data buffers are
+    unmapped and req_id is returned to the empty req_id ring.
+  * The function stops when the completion descriptors are completed or
+    the budget is reached.
+
+Rx
+--
+
+- When a packet is received from the ENA device.
+- The interrupt handler schedules NAPI.
+- The ena_clean_rx_irq() function is called. This function calls
+  ena_rx_pkt(), an ENA communication layer function, which returns the
+  number of descriptors used for a new unhandled packet, and zero if
+  no new packet is found.
+- Then it calls the ena_clean_rx_irq() function.
+- ena_eth_rx_skb() checks packet length:
+
+  * If the packet is small (len < rx_copybreak), the driver allocates
+    a SKB for the new packet, and copies the packet payload into the
+    SKB data buffer.
+
+    - In this way the original data buffer is not passed to the stack
+      and is reused for future Rx packets.
+
+  * Otherwise the function unmaps the Rx buffer, then allocates the
+    new SKB structure and hooks the Rx buffer to the SKB frags.
+
+- The new SKB is updated with the necessary information (protocol,
+  checksum hw verify result, etc.), and then passed to the network
+  stack, using the NAPI interface function napi_gro_receive().
diff --git a/Documentation/networking/device_drivers/amazon/ena.txt b/Documentation/networking/device_drivers/amazon/ena.txt
deleted file mode 100644
index 1bb55c7b604c..000000000000
--- a/Documentation/networking/device_drivers/amazon/ena.txt
+++ /dev/null
@@ -1,308 +0,0 @@
-Linux kernel driver for Elastic Network Adapter (ENA) family:
-=============================================================
-
-Overview:
-=========
-ENA is a networking interface designed to make good use of modern CPU
-features and system architectures.
-
-The ENA device exposes a lightweight management interface with a
-minimal set of memory mapped registers and extendable command set
-through an Admin Queue.
-
-The driver supports a range of ENA devices, is link-speed independent
-(i.e., the same driver is used for 10GbE, 25GbE, 40GbE, etc.), and has
-a negotiated and extendable feature set.
-
-Some ENA devices support SR-IOV. This driver is used for both the
-SR-IOV Physical Function (PF) and Virtual Function (VF) devices.
-
-ENA devices enable high speed and low overhead network traffic
-processing by providing multiple Tx/Rx queue pairs (the maximum number
-is advertised by the device via the Admin Queue), a dedicated MSI-X
-interrupt vector per Tx/Rx queue pair, adaptive interrupt moderation,
-and CPU cacheline optimized data placement.
-
-The ENA driver supports industry standard TCP/IP offload features such
-as checksum offload and TCP transmit segmentation offload (TSO).
-Receive-side scaling (RSS) is supported for multi-core scaling.
-
-The ENA driver and its corresponding devices implement health
-monitoring mechanisms such as watchdog, enabling the device and driver
-to recover in a manner transparent to the application, as well as
-debug logs.
-
-Some of the ENA devices support a working mode called Low-latency
-Queue (LLQ), which saves several more microseconds.
-
-Supported PCI vendor ID/device IDs:
-===================================
-1d0f:0ec2 - ENA PF
-1d0f:1ec2 - ENA PF with LLQ support
-1d0f:ec20 - ENA VF
-1d0f:ec21 - ENA VF with LLQ support
-
-ENA Source Code Directory Structure:
-====================================
-ena_com.[ch]      - Management communication layer. This layer is
-                    responsible for the handling all the management
-                    (admin) communication between the device and the
-                    driver.
-ena_eth_com.[ch]  - Tx/Rx data path.
-ena_admin_defs.h  - Definition of ENA management interface.
-ena_eth_io_defs.h - Definition of ENA data path interface.
-ena_common_defs.h - Common definitions for ena_com layer.
-ena_regs_defs.h   - Definition of ENA PCI memory-mapped (MMIO) registers.
-ena_netdev.[ch]   - Main Linux kernel driver.
-ena_syfsfs.[ch]   - Sysfs files.
-ena_ethtool.c     - ethtool callbacks.
-ena_pci_id_tbl.h  - Supported device IDs.
-
-Management Interface:
-=====================
-ENA management interface is exposed by means of:
-- PCIe Configuration Space
-- Device Registers
-- Admin Queue (AQ) and Admin Completion Queue (ACQ)
-- Asynchronous Event Notification Queue (AENQ)
-
-ENA device MMIO Registers are accessed only during driver
-initialization and are not involved in further normal device
-operation.
-
-AQ is used for submitting management commands, and the
-results/responses are reported asynchronously through ACQ.
-
-ENA introduces a small set of management commands with room for
-vendor-specific extensions. Most of the management operations are
-framed in a generic Get/Set feature command.
-
-The following admin queue commands are supported:
-- Create I/O submission queue
-- Create I/O completion queue
-- Destroy I/O submission queue
-- Destroy I/O completion queue
-- Get feature
-- Set feature
-- Configure AENQ
-- Get statistics
-
-Refer to ena_admin_defs.h for the list of supported Get/Set Feature
-properties.
-
-The Asynchronous Event Notification Queue (AENQ) is a uni-directional
-queue used by the ENA device to send to the driver events that cannot
-be reported using ACQ. AENQ events are subdivided into groups. Each
-group may have multiple syndromes, as shown below
-
-The events are:
-	Group			Syndrome
-	Link state change	- X -
-	Fatal error		- X -
-	Notification		Suspend traffic
-	Notification		Resume traffic
-	Keep-Alive		- X -
-
-ACQ and AENQ share the same MSI-X vector.
-
-Keep-Alive is a special mechanism that allows monitoring of the
-device's health. The driver maintains a watchdog (WD) handler which,
-if fired, logs the current state and statistics then resets and
-restarts the ENA device and driver. A Keep-Alive event is delivered by
-the device every second. The driver re-arms the WD upon reception of a
-Keep-Alive event. A missed Keep-Alive event causes the WD handler to
-fire.
-
-Data Path Interface:
-====================
-I/O operations are based on Tx and Rx Submission Queues (Tx SQ and Rx
-SQ correspondingly). Each SQ has a completion queue (CQ) associated
-with it.
-
-The SQs and CQs are implemented as descriptor rings in contiguous
-physical memory.
-
-The ENA driver supports two Queue Operation modes for Tx SQs:
-- Regular mode
-  * In this mode the Tx SQs reside in the host's memory. The ENA
-    device fetches the ENA Tx descriptors and packet data from host
-    memory.
-- Low Latency Queue (LLQ) mode or "push-mode".
-  * In this mode the driver pushes the transmit descriptors and the
-    first 128 bytes of the packet directly to the ENA device memory
-    space. The rest of the packet payload is fetched by the
-    device. For this operation mode, the driver uses a dedicated PCI
-    device memory BAR, which is mapped with write-combine capability.
-
-The Rx SQs support only the regular mode.
-
-Note: Not all ENA devices support LLQ, and this feature is negotiated
-      with the device upon initialization. If the ENA device does not
-      support LLQ mode, the driver falls back to the regular mode.
-
-The driver supports multi-queue for both Tx and Rx. This has various
-benefits:
-- Reduced CPU/thread/process contention on a given Ethernet interface.
-- Cache miss rate on completion is reduced, particularly for data
-  cache lines that hold the sk_buff structures.
-- Increased process-level parallelism when handling received packets.
-- Increased data cache hit rate, by steering kernel processing of
-  packets to the CPU, where the application thread consuming the
-  packet is running.
-- In hardware interrupt re-direction.
-
-Interrupt Modes:
-================
-The driver assigns a single MSI-X vector per queue pair (for both Tx
-and Rx directions). The driver assigns an additional dedicated MSI-X vector
-for management (for ACQ and AENQ).
-
-Management interrupt registration is performed when the Linux kernel
-probes the adapter, and it is de-registered when the adapter is
-removed. I/O queue interrupt registration is performed when the Linux
-interface of the adapter is opened, and it is de-registered when the
-interface is closed.
-
-The management interrupt is named:
-   ena-mgmnt@pci:<PCI domain:bus:slot.function>
-and for each queue pair, an interrupt is named:
-   <interface name>-Tx-Rx-<queue index>
-
-The ENA device operates in auto-mask and auto-clear interrupt
-modes. That is, once MSI-X is delivered to the host, its Cause bit is
-automatically cleared and the interrupt is masked. The interrupt is
-unmasked by the driver after NAPI processing is complete.
-
-Interrupt Moderation:
-=====================
-ENA driver and device can operate in conventional or adaptive interrupt
-moderation mode.
-
-In conventional mode the driver instructs device to postpone interrupt
-posting according to static interrupt delay value. The interrupt delay
-value can be configured through ethtool(8). The following ethtool
-parameters are supported by the driver: tx-usecs, rx-usecs
-
-In adaptive interrupt moderation mode the interrupt delay value is
-updated by the driver dynamically and adjusted every NAPI cycle
-according to the traffic nature.
-
-By default ENA driver applies adaptive coalescing on Rx traffic and
-conventional coalescing on Tx traffic.
-
-Adaptive coalescing can be switched on/off through ethtool(8)
-adaptive_rx on|off parameter.
-
-The driver chooses interrupt delay value according to the number of
-bytes and packets received between interrupt unmasking and interrupt
-posting. The driver uses interrupt delay table that subdivides the
-range of received bytes/packets into 5 levels and assigns interrupt
-delay value to each level.
-
-The user can enable/disable adaptive moderation, modify the interrupt
-delay table and restore its default values through sysfs.
-
-RX copybreak:
-=============
-The rx_copybreak is initialized by default to ENA_DEFAULT_RX_COPYBREAK
-and can be configured by the ETHTOOL_STUNABLE command of the
-SIOCETHTOOL ioctl.
-
-SKB:
-====
-The driver-allocated SKB for frames received from Rx handling using
-NAPI context. The allocation method depends on the size of the packet.
-If the frame length is larger than rx_copybreak, napi_get_frags()
-is used, otherwise netdev_alloc_skb_ip_align() is used, the buffer
-content is copied (by CPU) to the SKB, and the buffer is recycled.
-
-Statistics:
-===========
-The user can obtain ENA device and driver statistics using ethtool.
-The driver can collect regular or extended statistics (including
-per-queue stats) from the device.
-
-In addition the driver logs the stats to syslog upon device reset.
-
-MTU:
-====
-The driver supports an arbitrarily large MTU with a maximum that is
-negotiated with the device. The driver configures MTU using the
-SetFeature command (ENA_ADMIN_MTU property). The user can change MTU
-via ip(8) and similar legacy tools.
-
-Stateless Offloads:
-===================
-The ENA driver supports:
-- TSO over IPv4/IPv6
-- TSO with ECN
-- IPv4 header checksum offload
-- TCP/UDP over IPv4/IPv6 checksum offloads
-
-RSS:
-====
-- The ENA device supports RSS that allows flexible Rx traffic
-  steering.
-- Toeplitz and CRC32 hash functions are supported.
-- Different combinations of L2/L3/L4 fields can be configured as
-  inputs for hash functions.
-- The driver configures RSS settings using the AQ SetFeature command
-  (ENA_ADMIN_RSS_HASH_FUNCTION, ENA_ADMIN_RSS_HASH_INPUT and
-  ENA_ADMIN_RSS_REDIRECTION_TABLE_CONFIG properties).
-- If the NETIF_F_RXHASH flag is set, the 32-bit result of the hash
-  function delivered in the Rx CQ descriptor is set in the received
-  SKB.
-- The user can provide a hash key, hash function, and configure the
-  indirection table through ethtool(8).
-
-DATA PATH:
-==========
-Tx:
----
-end_start_xmit() is called by the stack. This function does the following:
-- Maps data buffers (skb->data and frags).
-- Populates ena_buf for the push buffer (if the driver and device are
-  in push mode.)
-- Prepares ENA bufs for the remaining frags.
-- Allocates a new request ID from the empty req_id ring. The request
-  ID is the index of the packet in the Tx info. This is used for
-  out-of-order TX completions.
-- Adds the packet to the proper place in the Tx ring.
-- Calls ena_com_prepare_tx(), an ENA communication layer that converts
-  the ena_bufs to ENA descriptors (and adds meta ENA descriptors as
-  needed.)
-  * This function also copies the ENA descriptors and the push buffer
-    to the Device memory space (if in push mode.)
-- Writes doorbell to the ENA device.
-- When the ENA device finishes sending the packet, a completion
-  interrupt is raised.
-- The interrupt handler schedules NAPI.
-- The ena_clean_tx_irq() function is called. This function handles the
-  completion descriptors generated by the ENA, with a single
-  completion descriptor per completed packet.
-  * req_id is retrieved from the completion descriptor. The tx_info of
-    the packet is retrieved via the req_id. The data buffers are
-    unmapped and req_id is returned to the empty req_id ring.
-  * The function stops when the completion descriptors are completed or
-    the budget is reached.
-
-Rx:
----
-- When a packet is received from the ENA device.
-- The interrupt handler schedules NAPI.
-- The ena_clean_rx_irq() function is called. This function calls
-  ena_rx_pkt(), an ENA communication layer function, which returns the
-  number of descriptors used for a new unhandled packet, and zero if
-  no new packet is found.
-- Then it calls the ena_clean_rx_irq() function.
-- ena_eth_rx_skb() checks packet length:
-  * If the packet is small (len < rx_copybreak), the driver allocates
-    a SKB for the new packet, and copies the packet payload into the
-    SKB data buffer.
-    - In this way the original data buffer is not passed to the stack
-      and is reused for future Rx packets.
-  * Otherwise the function unmaps the Rx buffer, then allocates the
-    new SKB structure and hooks the Rx buffer to the SKB frags.
-- The new SKB is updated with the necessary information (protocol,
-  checksum hw verify result, etc.), and then passed to the network
-  stack, using the NAPI interface function napi_gro_receive().
diff --git a/Documentation/networking/device_drivers/index.rst b/Documentation/networking/device_drivers/index.rst
index aaac502b81ea..019a0d2efe67 100644
--- a/Documentation/networking/device_drivers/index.rst
+++ b/Documentation/networking/device_drivers/index.rst
@@ -29,6 +29,7 @@ Contents:
    stmicro/stmmac
    3com/3c509
    3com/vortex
+   amazon/ena
 
 .. only::  subproject and html
 
diff --git a/MAINTAINERS b/MAINTAINERS
index eaea5f1994c9..7b6c13cc832f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -815,7 +815,7 @@ R:	Saeed Bishara <saeedb@amazon.com>
 R:	Zorik Machulsky <zorik@amazon.com>
 L:	netdev@vger.kernel.org
 S:	Supported
-F:	Documentation/networking/device_drivers/amazon/ena.txt
+F:	Documentation/networking/device_drivers/amazon/ena.rst
 F:	drivers/net/ethernet/amazon/
 
 AMAZON RDMA EFA DRIVER
-- 
cgit v1.2.3-59-g8ed1b


From c958119a487ec4578f50b352f45e965a30daa020 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 1 May 2020 16:44:37 +0200
Subject: docs: networking: device drivers: convert aquantia/atlantic.txt to
 ReST

- add SPDX header;
- use copyright symbol;
- adjust title and its markup;
- comment out text-only TOC from html/pdf output;
- mark code blocks and literals as such;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../device_drivers/aquantia/atlantic.rst           | 556 +++++++++++++++++++++
 .../device_drivers/aquantia/atlantic.txt           | 479 ------------------
 Documentation/networking/device_drivers/index.rst  |   1 +
 MAINTAINERS                                        |   2 +-
 4 files changed, 558 insertions(+), 480 deletions(-)
 create mode 100644 Documentation/networking/device_drivers/aquantia/atlantic.rst
 delete mode 100644 Documentation/networking/device_drivers/aquantia/atlantic.txt

diff --git a/Documentation/networking/device_drivers/aquantia/atlantic.rst b/Documentation/networking/device_drivers/aquantia/atlantic.rst
new file mode 100644
index 000000000000..595ddef1c8b3
--- /dev/null
+++ b/Documentation/networking/device_drivers/aquantia/atlantic.rst
@@ -0,0 +1,556 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+===============================
+Marvell(Aquantia) AQtion Driver
+===============================
+
+For the aQuantia Multi-Gigabit PCI Express Family of Ethernet Adapters
+
+.. Contents
+
+    - Identifying Your Adapter
+    - Configuration
+    - Supported ethtool options
+    - Command Line Parameters
+    - Config file parameters
+    - Support
+    - License
+
+Identifying Your Adapter
+========================
+
+The driver in this release is compatible with AQC-100, AQC-107, AQC-108
+based ethernet adapters.
+
+
+SFP+ Devices (for AQC-100 based adapters)
+-----------------------------------------
+
+This release tested with passive Direct Attach Cables (DAC) and SFP+/LC
+Optical Transceiver.
+
+Configuration
+=============
+
+Viewing Link Messages
+---------------------
+  Link messages will not be displayed to the console if the distribution is
+  restricting system messages. In order to see network driver link messages on
+  your console, set dmesg to eight by entering the following::
+
+       dmesg -n 8
+
+  .. note::
+
+     This setting is not saved across reboots.
+
+Jumbo Frames
+------------
+  The driver supports Jumbo Frames for all adapters. Jumbo Frames support is
+  enabled by changing the MTU to a value larger than the default of 1500.
+  The maximum value for the MTU is 16000.  Use the `ip` command to
+  increase the MTU size.  For example::
+
+	ip link set mtu 16000 dev enp1s0
+
+ethtool
+-------
+  The driver utilizes the ethtool interface for driver configuration and
+  diagnostics, as well as displaying statistical information. The latest
+  ethtool version is required for this functionality.
+
+NAPI
+----
+  NAPI (Rx polling mode) is supported in the atlantic driver.
+
+Supported ethtool options
+=========================
+
+Viewing adapter settings
+------------------------
+
+ ::
+
+    ethtool <ethX>
+
+ Output example::
+
+  Settings for enp1s0:
+    Supported ports: [ TP ]
+    Supported link modes:   100baseT/Full
+			    1000baseT/Full
+			    10000baseT/Full
+			    2500baseT/Full
+			    5000baseT/Full
+    Supported pause frame use: Symmetric
+    Supports auto-negotiation: Yes
+    Supported FEC modes: Not reported
+    Advertised link modes:  100baseT/Full
+			    1000baseT/Full
+			    10000baseT/Full
+			    2500baseT/Full
+			    5000baseT/Full
+    Advertised pause frame use: Symmetric
+    Advertised auto-negotiation: Yes
+    Advertised FEC modes: Not reported
+    Speed: 10000Mb/s
+    Duplex: Full
+    Port: Twisted Pair
+    PHYAD: 0
+    Transceiver: internal
+    Auto-negotiation: on
+    MDI-X: Unknown
+    Supports Wake-on: g
+    Wake-on: d
+    Link detected: yes
+
+
+ .. note::
+
+    AQrate speeds (2.5/5 Gb/s) will be displayed only with linux kernels > 4.10.
+    But you can still use these speeds::
+
+	ethtool -s eth0 autoneg off speed 2500
+
+Viewing adapter information
+---------------------------
+
+ ::
+
+  ethtool -i <ethX>
+
+ Output example::
+
+  driver: atlantic
+  version: 5.2.0-050200rc5-generic-kern
+  firmware-version: 3.1.78
+  expansion-rom-version:
+  bus-info: 0000:01:00.0
+  supports-statistics: yes
+  supports-test: no
+  supports-eeprom-access: no
+  supports-register-dump: yes
+  supports-priv-flags: no
+
+
+Viewing Ethernet adapter statistics
+-----------------------------------
+
+ ::
+
+    ethtool -S <ethX>
+
+ Output example::
+
+  NIC statistics:
+     InPackets: 13238607
+     InUCast: 13293852
+     InMCast: 52
+     InBCast: 3
+     InErrors: 0
+     OutPackets: 23703019
+     OutUCast: 23704941
+     OutMCast: 67
+     OutBCast: 11
+     InUCastOctects: 213182760
+     OutUCastOctects: 22698443
+     InMCastOctects: 6600
+     OutMCastOctects: 8776
+     InBCastOctects: 192
+     OutBCastOctects: 704
+     InOctects: 2131839552
+     OutOctects: 226938073
+     InPacketsDma: 95532300
+     OutPacketsDma: 59503397
+     InOctetsDma: 1137102462
+     OutOctetsDma: 2394339518
+     InDroppedDma: 0
+     Queue[0] InPackets: 23567131
+     Queue[0] OutPackets: 20070028
+     Queue[0] InJumboPackets: 0
+     Queue[0] InLroPackets: 0
+     Queue[0] InErrors: 0
+     Queue[1] InPackets: 45428967
+     Queue[1] OutPackets: 11306178
+     Queue[1] InJumboPackets: 0
+     Queue[1] InLroPackets: 0
+     Queue[1] InErrors: 0
+     Queue[2] InPackets: 3187011
+     Queue[2] OutPackets: 13080381
+     Queue[2] InJumboPackets: 0
+     Queue[2] InLroPackets: 0
+     Queue[2] InErrors: 0
+     Queue[3] InPackets: 23349136
+     Queue[3] OutPackets: 15046810
+     Queue[3] InJumboPackets: 0
+     Queue[3] InLroPackets: 0
+     Queue[3] InErrors: 0
+
+Interrupt coalescing support
+----------------------------
+
+ ITR mode, TX/RX coalescing timings could be viewed with::
+
+    ethtool -c <ethX>
+
+ and changed with::
+
+    ethtool -C <ethX> tx-usecs <usecs> rx-usecs <usecs>
+
+ To disable coalescing::
+
+    ethtool -C <ethX> tx-usecs 0 rx-usecs 0 tx-max-frames 1 tx-max-frames 1
+
+Wake on LAN support
+-------------------
+
+ WOL support by magic packet::
+
+    ethtool -s <ethX> wol g
+
+ To disable WOL::
+
+    ethtool -s <ethX> wol d
+
+Set and check the driver message level
+--------------------------------------
+
+ Set message level
+
+ ::
+
+    ethtool -s <ethX> msglvl <level>
+
+ Level values:
+
+ ======   =============================
+ 0x0001   general driver status.
+ 0x0002   hardware probing.
+ 0x0004   link state.
+ 0x0008   periodic status check.
+ 0x0010   interface being brought down.
+ 0x0020   interface being brought up.
+ 0x0040   receive error.
+ 0x0080   transmit error.
+ 0x0200   interrupt handling.
+ 0x0400   transmit completion.
+ 0x0800   receive completion.
+ 0x1000   packet contents.
+ 0x2000   hardware status.
+ 0x4000   Wake-on-LAN status.
+ ======   =============================
+
+ By default, the level of debugging messages is set 0x0001(general driver status).
+
+ Check message level
+
+ ::
+
+    ethtool <ethX> | grep "Current message level"
+
+ If you want to disable the output of messages::
+
+    ethtool -s <ethX> msglvl 0
+
+RX flow rules (ntuple filters)
+------------------------------
+
+ There are separate rules supported, that applies in that order:
+
+ 1. 16 VLAN ID rules
+ 2. 16 L2 EtherType rules
+ 3. 8 L3/L4 5-Tuple rules
+
+
+ The driver utilizes the ethtool interface for configuring ntuple filters,
+ via ``ethtool -N <device> <filter>``.
+
+ To enable or disable the RX flow rules::
+
+    ethtool -K ethX ntuple <on|off>
+
+ When disabling ntuple filters, all the user programed filters are
+ flushed from the driver cache and hardware. All needed filters must
+ be re-added when ntuple is re-enabled.
+
+ Because of the fixed order of the rules, the location of filters is also fixed:
+
+ - Locations 0 - 15 for VLAN ID filters
+ - Locations 16 - 31 for L2 EtherType filters
+ - Locations 32 - 39 for L3/L4 5-tuple filters (locations 32, 36 for IPv6)
+
+ The L3/L4 5-tuple (protocol, source and destination IP address, source and
+ destination TCP/UDP/SCTP port) is compared against 8 filters. For IPv4, up to
+ 8 source and destination addresses can be matched. For IPv6, up to 2 pairs of
+ addresses can be supported. Source and destination ports are only compared for
+ TCP/UDP/SCTP packets.
+
+ To add a filter that directs packet to queue 5, use
+ ``<-N|-U|--config-nfc|--config-ntuple>`` switch::
+
+    ethtool -N <ethX> flow-type udp4 src-ip 10.0.0.1 dst-ip 10.0.0.2 src-port 2000 dst-port 2001 action 5 <loc 32>
+
+ - action is the queue number.
+ - loc is the rule number.
+
+ For ``flow-type ip4|udp4|tcp4|sctp4|ip6|udp6|tcp6|sctp6`` you must set the loc
+ number within 32 - 39.
+ For ``flow-type ip4|udp4|tcp4|sctp4|ip6|udp6|tcp6|sctp6`` you can set 8 rules
+ for traffic IPv4 or you can set 2 rules for traffic IPv6. Loc number traffic
+ IPv6 is 32 and 36.
+ At the moment you can not use IPv4 and IPv6 filters at the same time.
+
+ Example filter for IPv6 filter traffic::
+
+    sudo ethtool -N <ethX> flow-type tcp6 src-ip 2001:db8:0:f101::1 dst-ip 2001:db8:0:f101::2 action 1 loc 32
+    sudo ethtool -N <ethX> flow-type ip6 src-ip 2001:db8:0:f101::2 dst-ip 2001:db8:0:f101::5 action -1 loc 36
+
+ Example filter for IPv4 filter traffic::
+
+    sudo ethtool -N <ethX> flow-type udp4 src-ip 10.0.0.4 dst-ip 10.0.0.7 src-port 2000 dst-port 2001 loc 32
+    sudo ethtool -N <ethX> flow-type tcp4 src-ip 10.0.0.3 dst-ip 10.0.0.9 src-port 2000 dst-port 2001 loc 33
+    sudo ethtool -N <ethX> flow-type ip4 src-ip 10.0.0.6 dst-ip 10.0.0.4 loc 34
+
+ If you set action -1, then all traffic corresponding to the filter will be discarded.
+
+ The maximum value action is 31.
+
+
+ The VLAN filter (VLAN id) is compared against 16 filters.
+ VLAN id must be accompanied by mask 0xF000. That is to distinguish VLAN filter
+ from L2 Ethertype filter with UserPriority since both User Priority and VLAN ID
+ are passed in the same 'vlan' parameter.
+
+ To add a filter that directs packets from VLAN 2001 to queue 5::
+
+    ethtool -N <ethX> flow-type ip4 vlan 2001 m 0xF000 action 1 loc 0
+
+
+ L2 EtherType filters allows filter packet by EtherType field or both EtherType
+ and User Priority (PCP) field of 802.1Q.
+ UserPriority (vlan) parameter must be accompanied by mask 0x1FFF. That is to
+ distinguish VLAN filter from L2 Ethertype filter with UserPriority since both
+ User Priority and VLAN ID are passed in the same 'vlan' parameter.
+
+ To add a filter that directs IP4 packess of priority 3 to queue 3::
+
+    ethtool -N <ethX> flow-type ether proto 0x800 vlan 0x600 m 0x1FFF action 3 loc 16
+
+ To see the list of filters currently present::
+
+    ethtool <-u|-n|--show-nfc|--show-ntuple> <ethX>
+
+ Rules may be deleted from the table itself. This is done using::
+
+    sudo ethtool <-N|-U|--config-nfc|--config-ntuple> <ethX> delete <loc>
+
+ - loc is the rule number to be deleted.
+
+ Rx filters is an interface to load the filter table that funnels all flow
+ into queue 0 unless an alternative queue is specified using "action". In that
+ case, any flow that matches the filter criteria will be directed to the
+ appropriate queue. RX filters is supported on all kernels 2.6.30 and later.
+
+RSS for UDP
+-----------
+
+ Currently, NIC does not support RSS for fragmented IP packets, which leads to
+ incorrect working of RSS for fragmented UDP traffic. To disable RSS for UDP the
+ RX Flow L3/L4 rule may be used.
+
+ Example::
+
+    ethtool -N eth0 flow-type udp4 action 0 loc 32
+
+UDP GSO hardware offload
+------------------------
+
+ UDP GSO allows to boost UDP tx rates by offloading UDP headers allocation
+ into hardware. A special userspace socket option is required for this,
+ could be validated with /kernel/tools/testing/selftests/net/::
+
+    udpgso_bench_tx -u -4 -D 10.0.1.1 -s 6300 -S 100
+
+ Will cause sending out of 100 byte sized UDP packets formed from single
+ 6300 bytes user buffer.
+
+ UDP GSO is configured by::
+
+    ethtool -K eth0 tx-udp-segmentation on
+
+Private flags (testing)
+-----------------------
+
+ Atlantic driver supports private flags for hardware custom features::
+
+	$ ethtool --show-priv-flags ethX
+
+	Private flags for ethX:
+	DMASystemLoopback  : off
+	PKTSystemLoopback  : off
+	DMANetworkLoopback : off
+	PHYInternalLoopback: off
+	PHYExternalLoopback: off
+
+ Example::
+
+	$ ethtool --set-priv-flags ethX DMASystemLoopback on
+
+ DMASystemLoopback:   DMA Host loopback.
+ PKTSystemLoopback:   Packet buffer host loopback.
+ DMANetworkLoopback:  Network side loopback on DMA block.
+ PHYInternalLoopback: Internal loopback on Phy.
+ PHYExternalLoopback: External loopback on Phy (with loopback ethernet cable).
+
+
+Command Line Parameters
+=======================
+The following command line parameters are available on atlantic driver:
+
+aq_itr -Interrupt throttling mode
+---------------------------------
+Accepted values: 0, 1, 0xFFFF
+
+Default value: 0xFFFF
+
+======   ==============================================================
+0        Disable interrupt throttling.
+1        Enable interrupt throttling and use specified tx and rx rates.
+0xFFFF   Auto throttling mode. Driver will choose the best RX and TX
+	 interrupt throtting settings based on link speed.
+======   ==============================================================
+
+aq_itr_tx - TX interrupt throttle rate
+--------------------------------------
+
+Accepted values: 0 - 0x1FF
+
+Default value: 0
+
+TX side throttling in microseconds. Adapter will setup maximum interrupt delay
+to this value. Minimum interrupt delay will be a half of this value
+
+aq_itr_rx - RX interrupt throttle rate
+--------------------------------------
+
+Accepted values: 0 - 0x1FF
+
+Default value: 0
+
+RX side throttling in microseconds. Adapter will setup maximum interrupt delay
+to this value. Minimum interrupt delay will be a half of this value
+
+.. note::
+
+   ITR settings could be changed in runtime by ethtool -c means (see below)
+
+Config file parameters
+======================
+
+For some fine tuning and performance optimizations,
+some parameters can be changed in the {source_dir}/aq_cfg.h file.
+
+AQ_CFG_RX_PAGEORDER
+-------------------
+
+Default value: 0
+
+RX page order override. Thats a power of 2 number of RX pages allocated for
+each descriptor. Received descriptor size is still limited by
+AQ_CFG_RX_FRAME_MAX.
+
+Increasing pageorder makes page reuse better (actual on iommu enabled systems).
+
+AQ_CFG_RX_REFILL_THRES
+----------------------
+
+Default value: 32
+
+RX refill threshold. RX path will not refill freed descriptors until the
+specified number of free descriptors is observed. Larger values may help
+better page reuse but may lead to packet drops as well.
+
+AQ_CFG_VECS_DEF
+---------------
+
+Number of queues
+
+Valid Range: 0 - 8 (up to AQ_CFG_VECS_MAX)
+
+Default value: 8
+
+Notice this value will be capped by the number of cores available on the system.
+
+AQ_CFG_IS_RSS_DEF
+-----------------
+
+Enable/disable Receive Side Scaling
+
+This feature allows the adapter to distribute receive processing
+across multiple CPU-cores and to prevent from overloading a single CPU core.
+
+Valid values
+
+==  ========
+0   disabled
+1   enabled
+==  ========
+
+Default value: 1
+
+AQ_CFG_NUM_RSS_QUEUES_DEF
+-------------------------
+
+Number of queues for Receive Side Scaling
+
+Valid Range: 0 - 8 (up to AQ_CFG_VECS_DEF)
+
+Default value: AQ_CFG_VECS_DEF
+
+AQ_CFG_IS_LRO_DEF
+-----------------
+
+Enable/disable Large Receive Offload
+
+This offload enables the adapter to coalesce multiple TCP segments and indicate
+them as a single coalesced unit to the OS networking subsystem.
+
+The system consumes less energy but it also introduces more latency in packets
+processing.
+
+Valid values
+
+==  ========
+0   disabled
+1   enabled
+==  ========
+
+Default value: 1
+
+AQ_CFG_TX_CLEAN_BUDGET
+----------------------
+
+Maximum descriptors to cleanup on TX at once.
+
+Default value: 256
+
+After the aq_cfg.h file changed the driver must be rebuilt to take effect.
+
+Support
+=======
+
+If an issue is identified with the released source code on the supported
+kernel with a supported adapter, email the specific information related
+to the issue to aqn_support@marvell.com
+
+License
+=======
+
+aQuantia Corporation Network Driver
+
+Copyright |copy| 2014 - 2019 aQuantia Corporation.
+
+This program is free software; you can redistribute it and/or modify it
+under the terms and conditions of the GNU General Public License,
+version 2, as published by the Free Software Foundation.
diff --git a/Documentation/networking/device_drivers/aquantia/atlantic.txt b/Documentation/networking/device_drivers/aquantia/atlantic.txt
deleted file mode 100644
index 2013fcedc2da..000000000000
--- a/Documentation/networking/device_drivers/aquantia/atlantic.txt
+++ /dev/null
@@ -1,479 +0,0 @@
-Marvell(Aquantia) AQtion Driver for the aQuantia Multi-Gigabit PCI Express
-Family of Ethernet Adapters
-=============================================================================
-
-Contents
-========
-
-- Identifying Your Adapter
-- Configuration
-- Supported ethtool options
-- Command Line Parameters
-- Config file parameters
-- Support
-- License
-
-Identifying Your Adapter
-========================
-
-The driver in this release is compatible with AQC-100, AQC-107, AQC-108 based ethernet adapters.
-
-
-SFP+ Devices (for AQC-100 based adapters)
-----------------------------------
-
-This release tested with passive Direct Attach Cables (DAC) and SFP+/LC Optical Transceiver.
-
-Configuration
-=========================
-  Viewing Link Messages
-  ---------------------
-  Link messages will not be displayed to the console if the distribution is
-  restricting system messages. In order to see network driver link messages on
-  your console, set dmesg to eight by entering the following:
-
-       dmesg -n 8
-
-  NOTE: This setting is not saved across reboots.
-
-  Jumbo Frames
-  ------------
-  The driver supports Jumbo Frames for all adapters. Jumbo Frames support is
-  enabled by changing the MTU to a value larger than the default of 1500.
-  The maximum value for the MTU is 16000.  Use the `ip` command to
-  increase the MTU size.  For example:
-
-        ip link set mtu 16000 dev enp1s0
-
-  ethtool
-  -------
-  The driver utilizes the ethtool interface for driver configuration and
-  diagnostics, as well as displaying statistical information. The latest
-  ethtool version is required for this functionality.
-
-  NAPI
-  ----
-  NAPI (Rx polling mode) is supported in the atlantic driver.
-
-Supported ethtool options
-============================
- Viewing adapter settings
- ---------------------
- ethtool <ethX>
-
- Output example:
-
-  Settings for enp1s0:
-    Supported ports: [ TP ]
-    Supported link modes:   100baseT/Full
-                            1000baseT/Full
-                            10000baseT/Full
-                            2500baseT/Full
-                            5000baseT/Full
-    Supported pause frame use: Symmetric
-    Supports auto-negotiation: Yes
-    Supported FEC modes: Not reported
-    Advertised link modes:  100baseT/Full
-                            1000baseT/Full
-                            10000baseT/Full
-                            2500baseT/Full
-                            5000baseT/Full
-    Advertised pause frame use: Symmetric
-    Advertised auto-negotiation: Yes
-    Advertised FEC modes: Not reported
-    Speed: 10000Mb/s
-    Duplex: Full
-    Port: Twisted Pair
-    PHYAD: 0
-    Transceiver: internal
-    Auto-negotiation: on
-    MDI-X: Unknown
-    Supports Wake-on: g
-    Wake-on: d
-    Link detected: yes
-
- ---
- Note: AQrate speeds (2.5/5 Gb/s) will be displayed only with linux kernels > 4.10.
-    But you can still use these speeds:
-	ethtool -s eth0 autoneg off speed 2500
-
- Viewing adapter information
- ---------------------
- ethtool -i <ethX>
-
- Output example:
-
-  driver: atlantic
-  version: 5.2.0-050200rc5-generic-kern
-  firmware-version: 3.1.78
-  expansion-rom-version:
-  bus-info: 0000:01:00.0
-  supports-statistics: yes
-  supports-test: no
-  supports-eeprom-access: no
-  supports-register-dump: yes
-  supports-priv-flags: no
-
-
- Viewing Ethernet adapter statistics:
- ---------------------
- ethtool -S <ethX>
-
- Output example:
- NIC statistics:
-     InPackets: 13238607
-     InUCast: 13293852
-     InMCast: 52
-     InBCast: 3
-     InErrors: 0
-     OutPackets: 23703019
-     OutUCast: 23704941
-     OutMCast: 67
-     OutBCast: 11
-     InUCastOctects: 213182760
-     OutUCastOctects: 22698443
-     InMCastOctects: 6600
-     OutMCastOctects: 8776
-     InBCastOctects: 192
-     OutBCastOctects: 704
-     InOctects: 2131839552
-     OutOctects: 226938073
-     InPacketsDma: 95532300
-     OutPacketsDma: 59503397
-     InOctetsDma: 1137102462
-     OutOctetsDma: 2394339518
-     InDroppedDma: 0
-     Queue[0] InPackets: 23567131
-     Queue[0] OutPackets: 20070028
-     Queue[0] InJumboPackets: 0
-     Queue[0] InLroPackets: 0
-     Queue[0] InErrors: 0
-     Queue[1] InPackets: 45428967
-     Queue[1] OutPackets: 11306178
-     Queue[1] InJumboPackets: 0
-     Queue[1] InLroPackets: 0
-     Queue[1] InErrors: 0
-     Queue[2] InPackets: 3187011
-     Queue[2] OutPackets: 13080381
-     Queue[2] InJumboPackets: 0
-     Queue[2] InLroPackets: 0
-     Queue[2] InErrors: 0
-     Queue[3] InPackets: 23349136
-     Queue[3] OutPackets: 15046810
-     Queue[3] InJumboPackets: 0
-     Queue[3] InLroPackets: 0
-     Queue[3] InErrors: 0
-
- Interrupt coalescing support
- ---------------------------------
- ITR mode, TX/RX coalescing timings could be viewed with:
-
- ethtool -c <ethX>
-
- and changed with:
-
- ethtool -C <ethX> tx-usecs <usecs> rx-usecs <usecs>
-
- To disable coalescing:
-
- ethtool -C <ethX> tx-usecs 0 rx-usecs 0 tx-max-frames 1 tx-max-frames 1
-
- Wake on LAN support
- ---------------------------------
-
- WOL support by magic packet:
-
- ethtool -s <ethX> wol g
-
- To disable WOL:
-
- ethtool -s <ethX> wol d
-
- Set and check the driver message level
- ---------------------------------
-
- Set message level
-
- ethtool -s <ethX> msglvl <level>
-
- Level values:
-
- 0x0001 - general driver status.
- 0x0002 - hardware probing.
- 0x0004 - link state.
- 0x0008 - periodic status check.
- 0x0010 - interface being brought down.
- 0x0020 - interface being brought up.
- 0x0040 - receive error.
- 0x0080 - transmit error.
- 0x0200 - interrupt handling.
- 0x0400 - transmit completion.
- 0x0800 - receive completion.
- 0x1000 - packet contents.
- 0x2000 - hardware status.
- 0x4000 - Wake-on-LAN status.
-
- By default, the level of debugging messages is set 0x0001(general driver status).
-
- Check message level
-
- ethtool <ethX> | grep "Current message level"
-
- If you want to disable the output of messages
-
- ethtool -s <ethX> msglvl 0
-
- RX flow rules (ntuple filters)
- ---------------------------------
- There are separate rules supported, that applies in that order:
- 1. 16 VLAN ID rules
- 2. 16 L2 EtherType rules
- 3. 8 L3/L4 5-Tuple rules
-
-
- The driver utilizes the ethtool interface for configuring ntuple filters,
- via "ethtool -N <device> <filter>".
-
- To enable or disable the RX flow rules:
-
- ethtool -K ethX ntuple <on|off>
-
- When disabling ntuple filters, all the user programed filters are
- flushed from the driver cache and hardware. All needed filters must
- be re-added when ntuple is re-enabled.
-
- Because of the fixed order of the rules, the location of filters is also fixed:
- - Locations 0 - 15 for VLAN ID filters
- - Locations 16 - 31 for L2 EtherType filters
- - Locations 32 - 39 for L3/L4 5-tuple filters (locations 32, 36 for IPv6)
-
- The L3/L4 5-tuple (protocol, source and destination IP address, source and
- destination TCP/UDP/SCTP port) is compared against 8 filters. For IPv4, up to
- 8 source and destination addresses can be matched. For IPv6, up to 2 pairs of
- addresses can be supported. Source and destination ports are only compared for
- TCP/UDP/SCTP packets.
-
- To add a filter that directs packet to queue 5, use <-N|-U|--config-nfc|--config-ntuple> switch:
-
- ethtool -N <ethX> flow-type udp4 src-ip 10.0.0.1 dst-ip 10.0.0.2 src-port 2000 dst-port 2001 action 5 <loc 32>
-
- - action is the queue number.
- - loc is the rule number.
-
- For "flow-type ip4|udp4|tcp4|sctp4|ip6|udp6|tcp6|sctp6" you must set the loc
- number within 32 - 39.
- For "flow-type ip4|udp4|tcp4|sctp4|ip6|udp6|tcp6|sctp6" you can set 8 rules
- for traffic IPv4 or you can set 2 rules for traffic IPv6. Loc number traffic
- IPv6 is 32 and 36.
- At the moment you can not use IPv4 and IPv6 filters at the same time.
-
- Example filter for IPv6 filter traffic:
-
- sudo ethtool -N <ethX> flow-type tcp6 src-ip 2001:db8:0:f101::1 dst-ip 2001:db8:0:f101::2 action 1 loc 32
- sudo ethtool -N <ethX> flow-type ip6 src-ip 2001:db8:0:f101::2 dst-ip 2001:db8:0:f101::5 action -1 loc 36
-
- Example filter for IPv4 filter traffic:
-
- sudo ethtool -N <ethX> flow-type udp4 src-ip 10.0.0.4 dst-ip 10.0.0.7 src-port 2000 dst-port 2001 loc 32
- sudo ethtool -N <ethX> flow-type tcp4 src-ip 10.0.0.3 dst-ip 10.0.0.9 src-port 2000 dst-port 2001 loc 33
- sudo ethtool -N <ethX> flow-type ip4 src-ip 10.0.0.6 dst-ip 10.0.0.4 loc 34
-
- If you set action -1, then all traffic corresponding to the filter will be discarded.
- The maximum value action is 31.
-
-
- The VLAN filter (VLAN id) is compared against 16 filters.
- VLAN id must be accompanied by mask 0xF000. That is to distinguish VLAN filter
- from L2 Ethertype filter with UserPriority since both User Priority and VLAN ID
- are passed in the same 'vlan' parameter.
-
- To add a filter that directs packets from VLAN 2001 to queue 5:
- ethtool -N <ethX> flow-type ip4 vlan 2001 m 0xF000 action 1 loc 0
-
-
- L2 EtherType filters allows filter packet by EtherType field or both EtherType
- and User Priority (PCP) field of 802.1Q.
- UserPriority (vlan) parameter must be accompanied by mask 0x1FFF. That is to
- distinguish VLAN filter from L2 Ethertype filter with UserPriority since both
- User Priority and VLAN ID are passed in the same 'vlan' parameter.
-
- To add a filter that directs IP4 packess of priority 3 to queue 3:
- ethtool -N <ethX> flow-type ether proto 0x800 vlan 0x600 m 0x1FFF action 3 loc 16
-
-
- To see the list of filters currently present:
-
- ethtool <-u|-n|--show-nfc|--show-ntuple> <ethX>
-
- Rules may be deleted from the table itself. This is done using:
-
- sudo ethtool <-N|-U|--config-nfc|--config-ntuple> <ethX> delete <loc>
-
- - loc is the rule number to be deleted.
-
- Rx filters is an interface to load the filter table that funnels all flow
- into queue 0 unless an alternative queue is specified using "action". In that
- case, any flow that matches the filter criteria will be directed to the
- appropriate queue. RX filters is supported on all kernels 2.6.30 and later.
-
- RSS for UDP
- ---------------------------------
- Currently, NIC does not support RSS for fragmented IP packets, which leads to
- incorrect working of RSS for fragmented UDP traffic. To disable RSS for UDP the
- RX Flow L3/L4 rule may be used.
-
- Example:
- ethtool -N eth0 flow-type udp4 action 0 loc 32
-
- UDP GSO hardware offload
- ---------------------------------
- UDP GSO allows to boost UDP tx rates by offloading UDP headers allocation
- into hardware. A special userspace socket option is required for this,
- could be validated with /kernel/tools/testing/selftests/net/
-
-    udpgso_bench_tx -u -4 -D 10.0.1.1 -s 6300 -S 100
-
- Will cause sending out of 100 byte sized UDP packets formed from single
- 6300 bytes user buffer.
-
- UDP GSO is configured by:
-
-    ethtool -K eth0 tx-udp-segmentation on
-
- Private flags (testing)
- ---------------------------------
-
- Atlantic driver supports private flags for hardware custom features:
-
-	$ ethtool --show-priv-flags ethX
-
-	Private flags for ethX:
-	DMASystemLoopback  : off
-	PKTSystemLoopback  : off
-	DMANetworkLoopback : off
-	PHYInternalLoopback: off
-	PHYExternalLoopback: off
-
- Example:
-
-	$ ethtool --set-priv-flags ethX DMASystemLoopback on
-
- DMASystemLoopback:   DMA Host loopback.
- PKTSystemLoopback:   Packet buffer host loopback.
- DMANetworkLoopback:  Network side loopback on DMA block.
- PHYInternalLoopback: Internal loopback on Phy.
- PHYExternalLoopback: External loopback on Phy (with loopback ethernet cable).
-
-
-Command Line Parameters
-=======================
-The following command line parameters are available on atlantic driver:
-
-aq_itr -Interrupt throttling mode
-----------------------------------------
-Accepted values: 0, 1, 0xFFFF
-Default value: 0xFFFF
-0      - Disable interrupt throttling.
-1      - Enable interrupt throttling and use specified tx and rx rates.
-0xFFFF - Auto throttling mode. Driver will choose the best RX and TX
-         interrupt throtting settings based on link speed.
-
-aq_itr_tx - TX interrupt throttle rate
-----------------------------------------
-Accepted values: 0 - 0x1FF
-Default value: 0
-TX side throttling in microseconds. Adapter will setup maximum interrupt delay
-to this value. Minimum interrupt delay will be a half of this value
-
-aq_itr_rx - RX interrupt throttle rate
-----------------------------------------
-Accepted values: 0 - 0x1FF
-Default value: 0
-RX side throttling in microseconds. Adapter will setup maximum interrupt delay
-to this value. Minimum interrupt delay will be a half of this value
-
-Note: ITR settings could be changed in runtime by ethtool -c means (see below)
-
-Config file parameters
-=======================
-For some fine tuning and performance optimizations,
-some parameters can be changed in the {source_dir}/aq_cfg.h file.
-
-AQ_CFG_RX_PAGEORDER
-----------------------------------------
-Default value: 0
-RX page order override. Thats a power of 2 number of RX pages allocated for
-each descriptor. Received descriptor size is still limited by AQ_CFG_RX_FRAME_MAX.
-Increasing pageorder makes page reuse better (actual on iommu enabled systems).
-
-AQ_CFG_RX_REFILL_THRES
-----------------------------------------
-Default value: 32
-RX refill threshold. RX path will not refill freed descriptors until the
-specified number of free descriptors is observed. Larger values may help
-better page reuse but may lead to packet drops as well.
-
-AQ_CFG_VECS_DEF
-------------------------------------------------------------
-Number of queues
-Valid Range: 0 - 8 (up to AQ_CFG_VECS_MAX)
-Default value: 8
-Notice this value will be capped by the number of cores available on the system.
-
-AQ_CFG_IS_RSS_DEF
-------------------------------------------------------------
-Enable/disable Receive Side Scaling
-
-This feature allows the adapter to distribute receive processing
-across multiple CPU-cores and to prevent from overloading a single CPU core.
-
-Valid values
-0 - disabled
-1 - enabled
-
-Default value: 1
-
-AQ_CFG_NUM_RSS_QUEUES_DEF
-------------------------------------------------------------
-Number of queues for Receive Side Scaling
-Valid Range: 0 - 8 (up to AQ_CFG_VECS_DEF)
-
-Default value: AQ_CFG_VECS_DEF
-
-AQ_CFG_IS_LRO_DEF
-------------------------------------------------------------
-Enable/disable Large Receive Offload
-
-This offload enables the adapter to coalesce multiple TCP segments and indicate
-them as a single coalesced unit to the OS networking subsystem.
-The system consumes less energy but it also introduces more latency in packets processing.
-
-Valid values
-0 - disabled
-1 - enabled
-
-Default value: 1
-
-AQ_CFG_TX_CLEAN_BUDGET
-----------------------------------------
-Maximum descriptors to cleanup on TX at once.
-Default value: 256
-
-After the aq_cfg.h file changed the driver must be rebuilt to take effect.
-
-Support
-=======
-
-If an issue is identified with the released source code on the supported
-kernel with a supported adapter, email the specific information related
-to the issue to aqn_support@marvell.com
-
-License
-=======
-
-aQuantia Corporation Network Driver
-Copyright(c) 2014 - 2019 aQuantia Corporation.
-
-This program is free software; you can redistribute it and/or modify it
-under the terms and conditions of the GNU General Public License,
-version 2, as published by the Free Software Foundation.
diff --git a/Documentation/networking/device_drivers/index.rst b/Documentation/networking/device_drivers/index.rst
index 019a0d2efe67..7dde314fc957 100644
--- a/Documentation/networking/device_drivers/index.rst
+++ b/Documentation/networking/device_drivers/index.rst
@@ -30,6 +30,7 @@ Contents:
    3com/3c509
    3com/vortex
    amazon/ena
+   aquantia/atlantic
 
 .. only::  subproject and html
 
diff --git a/MAINTAINERS b/MAINTAINERS
index 7b6c13cc832f..b5cfee17635e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -1275,7 +1275,7 @@ L:	netdev@vger.kernel.org
 S:	Supported
 W:	https://www.marvell.com/
 Q:	http://patchwork.ozlabs.org/project/netdev/list/
-F:	Documentation/networking/device_drivers/aquantia/atlantic.txt
+F:	Documentation/networking/device_drivers/aquantia/atlantic.rst
 F:	drivers/net/ethernet/aquantia/atlantic/
 
 AQUANTIA ETHERNET DRIVER PTP SUBSYSTEM
-- 
cgit v1.2.3-59-g8ed1b


From c839ce557b35de084d06f91c4e37948bdcef9709 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 1 May 2020 16:44:38 +0200
Subject: docs: networking: device drivers: convert chelsio/cxgb.txt to ReST

- add SPDX header;
- use copyright symbol;
- adjust titles and chapters, adding proper markups;
- comment out text-only TOC from html/pdf output;
- mark code blocks and literals as such;
- add notes markups;
- mark tables as such;
- mark lists as such;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../networking/device_drivers/chelsio/cxgb.rst     | 393 +++++++++++++++++++++
 .../networking/device_drivers/chelsio/cxgb.txt     | 352 ------------------
 Documentation/networking/device_drivers/index.rst  |   1 +
 drivers/net/ethernet/chelsio/Kconfig               |   2 +-
 4 files changed, 395 insertions(+), 353 deletions(-)
 create mode 100644 Documentation/networking/device_drivers/chelsio/cxgb.rst
 delete mode 100644 Documentation/networking/device_drivers/chelsio/cxgb.txt

diff --git a/Documentation/networking/device_drivers/chelsio/cxgb.rst b/Documentation/networking/device_drivers/chelsio/cxgb.rst
new file mode 100644
index 000000000000..435dce5fa2c7
--- /dev/null
+++ b/Documentation/networking/device_drivers/chelsio/cxgb.rst
@@ -0,0 +1,393 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+=============================================
+Chelsio N210 10Gb Ethernet Network Controller
+=============================================
+
+Driver Release Notes for Linux
+
+Version 2.1.1
+
+June 20, 2005
+
+.. Contents
+
+ INTRODUCTION
+ FEATURES
+ PERFORMANCE
+ DRIVER MESSAGES
+ KNOWN ISSUES
+ SUPPORT
+
+
+Introduction
+============
+
+ This document describes the Linux driver for Chelsio 10Gb Ethernet Network
+ Controller. This driver supports the Chelsio N210 NIC and is backward
+ compatible with the Chelsio N110 model 10Gb NICs.
+
+
+Features
+========
+
+Adaptive Interrupts (adaptive-rx)
+---------------------------------
+
+  This feature provides an adaptive algorithm that adjusts the interrupt
+  coalescing parameters, allowing the driver to dynamically adapt the latency
+  settings to achieve the highest performance during various types of network
+  load.
+
+  The interface used to control this feature is ethtool. Please see the
+  ethtool manpage for additional usage information.
+
+  By default, adaptive-rx is disabled.
+  To enable adaptive-rx::
+
+      ethtool -C <interface> adaptive-rx on
+
+  To disable adaptive-rx, use ethtool::
+
+      ethtool -C <interface> adaptive-rx off
+
+  After disabling adaptive-rx, the timer latency value will be set to 50us.
+  You may set the timer latency after disabling adaptive-rx::
+
+      ethtool -C <interface> rx-usecs <microseconds>
+
+  An example to set the timer latency value to 100us on eth0::
+
+      ethtool -C eth0 rx-usecs 100
+
+  You may also provide a timer latency value while disabling adaptive-rx::
+
+      ethtool -C <interface> adaptive-rx off rx-usecs <microseconds>
+
+  If adaptive-rx is disabled and a timer latency value is specified, the timer
+  will be set to the specified value until changed by the user or until
+  adaptive-rx is enabled.
+
+  To view the status of the adaptive-rx and timer latency values::
+
+      ethtool -c <interface>
+
+
+TCP Segmentation Offloading (TSO) Support
+-----------------------------------------
+
+  This feature, also known as "large send", enables a system's protocol stack
+  to offload portions of outbound TCP processing to a network interface card
+  thereby reducing system CPU utilization and enhancing performance.
+
+  The interface used to control this feature is ethtool version 1.8 or higher.
+  Please see the ethtool manpage for additional usage information.
+
+  By default, TSO is enabled.
+  To disable TSO::
+
+      ethtool -K <interface> tso off
+
+  To enable TSO::
+
+      ethtool -K <interface> tso on
+
+  To view the status of TSO::
+
+      ethtool -k <interface>
+
+
+Performance
+===========
+
+ The following information is provided as an example of how to change system
+ parameters for "performance tuning" an what value to use. You may or may not
+ want to change these system parameters, depending on your server/workstation
+ application. Doing so is not warranted in any way by Chelsio Communications,
+ and is done at "YOUR OWN RISK". Chelsio will not be held responsible for loss
+ of data or damage to equipment.
+
+ Your distribution may have a different way of doing things, or you may prefer
+ a different method. These commands are shown only to provide an example of
+ what to do and are by no means definitive.
+
+ Making any of the following system changes will only last until you reboot
+ your system. You may want to write a script that runs at boot-up which
+ includes the optimal settings for your system.
+
+  Setting PCI Latency Timer::
+
+      setpci -d 1425::
+
+* 0x0c.l=0x0000F800
+
+  Disabling TCP timestamp::
+
+      sysctl -w net.ipv4.tcp_timestamps=0
+
+  Disabling SACK::
+
+      sysctl -w net.ipv4.tcp_sack=0
+
+  Setting large number of incoming connection requests::
+
+      sysctl -w net.ipv4.tcp_max_syn_backlog=3000
+
+  Setting maximum receive socket buffer size::
+
+      sysctl -w net.core.rmem_max=1024000
+
+  Setting maximum send socket buffer size::
+
+      sysctl -w net.core.wmem_max=1024000
+
+  Set smp_affinity (on a multiprocessor system) to a single CPU::
+
+      echo 1 > /proc/irq/<interrupt_number>/smp_affinity
+
+  Setting default receive socket buffer size::
+
+      sysctl -w net.core.rmem_default=524287
+
+  Setting default send socket buffer size::
+
+      sysctl -w net.core.wmem_default=524287
+
+  Setting maximum option memory buffers::
+
+      sysctl -w net.core.optmem_max=524287
+
+  Setting maximum backlog (# of unprocessed packets before kernel drops)::
+
+      sysctl -w net.core.netdev_max_backlog=300000
+
+  Setting TCP read buffers (min/default/max)::
+
+      sysctl -w net.ipv4.tcp_rmem="10000000 10000000 10000000"
+
+  Setting TCP write buffers (min/pressure/max)::
+
+      sysctl -w net.ipv4.tcp_wmem="10000000 10000000 10000000"
+
+  Setting TCP buffer space (min/pressure/max)::
+
+      sysctl -w net.ipv4.tcp_mem="10000000 10000000 10000000"
+
+  TCP window size for single connections:
+
+   The receive buffer (RX_WINDOW) size must be at least as large as the
+   Bandwidth-Delay Product of the communication link between the sender and
+   receiver. Due to the variations of RTT, you may want to increase the buffer
+   size up to 2 times the Bandwidth-Delay Product. Reference page 289 of
+   "TCP/IP Illustrated, Volume 1, The Protocols" by W. Richard Stevens.
+
+   At 10Gb speeds, use the following formula::
+
+       RX_WINDOW >= 1.25MBytes * RTT(in milliseconds)
+       Example for RTT with 100us: RX_WINDOW = (1,250,000 * 0.1) = 125,000
+
+   RX_WINDOW sizes of 256KB - 512KB should be sufficient.
+
+   Setting the min, max, and default receive buffer (RX_WINDOW) size::
+
+       sysctl -w net.ipv4.tcp_rmem="<min> <default> <max>"
+
+  TCP window size for multiple connections:
+   The receive buffer (RX_WINDOW) size may be calculated the same as single
+   connections, but should be divided by the number of connections. The
+   smaller window prevents congestion and facilitates better pacing,
+   especially if/when MAC level flow control does not work well or when it is
+   not supported on the machine. Experimentation may be necessary to attain
+   the correct value. This method is provided as a starting point for the
+   correct receive buffer size.
+
+   Setting the min, max, and default receive buffer (RX_WINDOW) size is
+   performed in the same manner as single connection.
+
+
+Driver Messages
+===============
+
+ The following messages are the most common messages logged by syslog. These
+ may be found in /var/log/messages.
+
+  Driver up::
+
+     Chelsio Network Driver - version 2.1.1
+
+  NIC detected::
+
+     eth#: Chelsio N210 1x10GBaseX NIC (rev #), PCIX 133MHz/64-bit
+
+  Link up::
+
+     eth#: link is up at 10 Gbps, full duplex
+
+  Link down::
+
+     eth#: link is down
+
+
+Known Issues
+============
+
+ These issues have been identified during testing. The following information
+ is provided as a workaround to the problem. In some cases, this problem is
+ inherent to Linux or to a particular Linux Distribution and/or hardware
+ platform.
+
+  1. Large number of TCP retransmits on a multiprocessor (SMP) system.
+
+      On a system with multiple CPUs, the interrupt (IRQ) for the network
+      controller may be bound to more than one CPU. This will cause TCP
+      retransmits if the packet data were to be split across different CPUs
+      and re-assembled in a different order than expected.
+
+      To eliminate the TCP retransmits, set smp_affinity on the particular
+      interrupt to a single CPU. You can locate the interrupt (IRQ) used on
+      the N110/N210 by using ifconfig::
+
+	  ifconfig <dev_name> | grep Interrupt
+
+      Set the smp_affinity to a single CPU::
+
+	  echo 1 > /proc/irq/<interrupt_number>/smp_affinity
+
+      It is highly suggested that you do not run the irqbalance daemon on your
+      system, as this will change any smp_affinity setting you have applied.
+      The irqbalance daemon runs on a 10 second interval and binds interrupts
+      to the least loaded CPU determined by the daemon. To disable this daemon::
+
+	  chkconfig --level 2345 irqbalance off
+
+      By default, some Linux distributions enable the kernel feature,
+      irqbalance, which performs the same function as the daemon. To disable
+      this feature, add the following line to your bootloader::
+
+	  noirqbalance
+
+	  Example using the Grub bootloader::
+
+	      title Red Hat Enterprise Linux AS (2.4.21-27.ELsmp)
+	      root (hd0,0)
+	      kernel /vmlinuz-2.4.21-27.ELsmp ro root=/dev/hda3 noirqbalance
+	      initrd /initrd-2.4.21-27.ELsmp.img
+
+  2. After running insmod, the driver is loaded and the incorrect network
+     interface is brought up without running ifup.
+
+      When using 2.4.x kernels, including RHEL kernels, the Linux kernel
+      invokes a script named "hotplug". This script is primarily used to
+      automatically bring up USB devices when they are plugged in, however,
+      the script also attempts to automatically bring up a network interface
+      after loading the kernel module. The hotplug script does this by scanning
+      the ifcfg-eth# config files in /etc/sysconfig/network-scripts, looking
+      for HWADDR=<mac_address>.
+
+      If the hotplug script does not find the HWADDRR within any of the
+      ifcfg-eth# files, it will bring up the device with the next available
+      interface name. If this interface is already configured for a different
+      network card, your new interface will have incorrect IP address and
+      network settings.
+
+      To solve this issue, you can add the HWADDR=<mac_address> key to the
+      interface config file of your network controller.
+
+      To disable this "hotplug" feature, you may add the driver (module name)
+      to the "blacklist" file located in /etc/hotplug. It has been noted that
+      this does not work for network devices because the net.agent script
+      does not use the blacklist file. Simply remove, or rename, the net.agent
+      script located in /etc/hotplug to disable this feature.
+
+  3. Transport Protocol (TP) hangs when running heavy multi-connection traffic
+     on an AMD Opteron system with HyperTransport PCI-X Tunnel chipset.
+
+      If your AMD Opteron system uses the AMD-8131 HyperTransport PCI-X Tunnel
+      chipset, you may experience the "133-Mhz Mode Split Completion Data
+      Corruption" bug identified by AMD while using a 133Mhz PCI-X card on the
+      bus PCI-X bus.
+
+      AMD states, "Under highly specific conditions, the AMD-8131 PCI-X Tunnel
+      can provide stale data via split completion cycles to a PCI-X card that
+      is operating at 133 Mhz", causing data corruption.
+
+      AMD's provides three workarounds for this problem, however, Chelsio
+      recommends the first option for best performance with this bug:
+
+	For 133Mhz secondary bus operation, limit the transaction length and
+	the number of outstanding transactions, via BIOS configuration
+	programming of the PCI-X card, to the following:
+
+	   Data Length (bytes): 1k
+
+	   Total allowed outstanding transactions: 2
+
+      Please refer to AMD 8131-HT/PCI-X Errata 26310 Rev 3.08 August 2004,
+      section 56, "133-MHz Mode Split Completion Data Corruption" for more
+      details with this bug and workarounds suggested by AMD.
+
+      It may be possible to work outside AMD's recommended PCI-X settings, try
+      increasing the Data Length to 2k bytes for increased performance. If you
+      have issues with these settings, please revert to the "safe" settings
+      and duplicate the problem before submitting a bug or asking for support.
+
+      .. note::
+
+	    The default setting on most systems is 8 outstanding transactions
+	    and 2k bytes data length.
+
+  4. On multiprocessor systems, it has been noted that an application which
+     is handling 10Gb networking can switch between CPUs causing degraded
+     and/or unstable performance.
+
+      If running on an SMP system and taking performance measurements, it
+      is suggested you either run the latest netperf-2.4.0+ or use a binding
+      tool such as Tim Hockin's procstate utilities (runon)
+      <http://www.hockin.org/~thockin/procstate/>.
+
+      Binding netserver and netperf (or other applications) to particular
+      CPUs will have a significant difference in performance measurements.
+      You may need to experiment which CPU to bind the application to in
+      order to achieve the best performance for your system.
+
+      If you are developing an application designed for 10Gb networking,
+      please keep in mind you may want to look at kernel functions
+      sched_setaffinity & sched_getaffinity to bind your application.
+
+      If you are just running user-space applications such as ftp, telnet,
+      etc., you may want to try the runon tool provided by Tim Hockin's
+      procstate utility. You could also try binding the interface to a
+      particular CPU: runon 0 ifup eth0
+
+
+Support
+=======
+
+ If you have problems with the software or hardware, please contact our
+ customer support team via email at support@chelsio.com or check our website
+ at http://www.chelsio.com
+
+-------------------------------------------------------------------------------
+
+::
+
+ Chelsio Communications
+ 370 San Aleso Ave.
+ Suite 100
+ Sunnyvale, CA 94085
+ http://www.chelsio.com
+
+This program is free software; you can redistribute it and/or modify
+it under the terms of the GNU General Public License, version 2, as
+published by the Free Software Foundation.
+
+You should have received a copy of the GNU General Public License along
+with this program; if not, write to the Free Software Foundation, Inc.,
+59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+
+THIS SOFTWARE IS PROVIDED ``AS IS`` AND WITHOUT ANY EXPRESS OR IMPLIED
+WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
+MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
+
+Copyright |copy| 2003-2005 Chelsio Communications. All rights reserved.
diff --git a/Documentation/networking/device_drivers/chelsio/cxgb.txt b/Documentation/networking/device_drivers/chelsio/cxgb.txt
deleted file mode 100644
index 20a887615c4a..000000000000
--- a/Documentation/networking/device_drivers/chelsio/cxgb.txt
+++ /dev/null
@@ -1,352 +0,0 @@
-                 Chelsio N210 10Gb Ethernet Network Controller
-
-                         Driver Release Notes for Linux
-
-                                 Version 2.1.1
-
-                                 June 20, 2005
-
-CONTENTS
-========
- INTRODUCTION
- FEATURES
- PERFORMANCE
- DRIVER MESSAGES
- KNOWN ISSUES
- SUPPORT
-
-
-INTRODUCTION
-============
-
- This document describes the Linux driver for Chelsio 10Gb Ethernet Network
- Controller. This driver supports the Chelsio N210 NIC and is backward
- compatible with the Chelsio N110 model 10Gb NICs.
-
-
-FEATURES
-========
-
- Adaptive Interrupts (adaptive-rx)
- ---------------------------------
-
-  This feature provides an adaptive algorithm that adjusts the interrupt
-  coalescing parameters, allowing the driver to dynamically adapt the latency
-  settings to achieve the highest performance during various types of network
-  load.
-
-  The interface used to control this feature is ethtool. Please see the
-  ethtool manpage for additional usage information.
-
-  By default, adaptive-rx is disabled.
-  To enable adaptive-rx:
-
-      ethtool -C <interface> adaptive-rx on
-
-  To disable adaptive-rx, use ethtool:
-
-      ethtool -C <interface> adaptive-rx off
-
-  After disabling adaptive-rx, the timer latency value will be set to 50us.
-  You may set the timer latency after disabling adaptive-rx:
-
-      ethtool -C <interface> rx-usecs <microseconds>
-
-  An example to set the timer latency value to 100us on eth0:
-
-      ethtool -C eth0 rx-usecs 100
-
-  You may also provide a timer latency value while disabling adaptive-rx:
-
-      ethtool -C <interface> adaptive-rx off rx-usecs <microseconds>
-
-  If adaptive-rx is disabled and a timer latency value is specified, the timer
-  will be set to the specified value until changed by the user or until
-  adaptive-rx is enabled.
-
-  To view the status of the adaptive-rx and timer latency values:
-
-      ethtool -c <interface>
-
-
- TCP Segmentation Offloading (TSO) Support
- -----------------------------------------
-
-  This feature, also known as "large send", enables a system's protocol stack
-  to offload portions of outbound TCP processing to a network interface card
-  thereby reducing system CPU utilization and enhancing performance.
-
-  The interface used to control this feature is ethtool version 1.8 or higher.
-  Please see the ethtool manpage for additional usage information.
-
-  By default, TSO is enabled.
-  To disable TSO:
-
-      ethtool -K <interface> tso off
-
-  To enable TSO:
-
-      ethtool -K <interface> tso on
-
-  To view the status of TSO:
-
-      ethtool -k <interface>
-
-
-PERFORMANCE
-===========
-
- The following information is provided as an example of how to change system
- parameters for "performance tuning" an what value to use. You may or may not
- want to change these system parameters, depending on your server/workstation
- application. Doing so is not warranted in any way by Chelsio Communications,
- and is done at "YOUR OWN RISK". Chelsio will not be held responsible for loss
- of data or damage to equipment.
-
- Your distribution may have a different way of doing things, or you may prefer
- a different method. These commands are shown only to provide an example of
- what to do and are by no means definitive.
-
- Making any of the following system changes will only last until you reboot
- your system. You may want to write a script that runs at boot-up which
- includes the optimal settings for your system.
-
-  Setting PCI Latency Timer:
-      setpci -d 1425:* 0x0c.l=0x0000F800
-
-  Disabling TCP timestamp:
-      sysctl -w net.ipv4.tcp_timestamps=0
-
-  Disabling SACK:
-      sysctl -w net.ipv4.tcp_sack=0
-
-  Setting large number of incoming connection requests:
-      sysctl -w net.ipv4.tcp_max_syn_backlog=3000
-
-  Setting maximum receive socket buffer size:
-      sysctl -w net.core.rmem_max=1024000
-
-  Setting maximum send socket buffer size:
-      sysctl -w net.core.wmem_max=1024000
-
-  Set smp_affinity (on a multiprocessor system) to a single CPU:
-      echo 1 > /proc/irq/<interrupt_number>/smp_affinity
-
-  Setting default receive socket buffer size:
-      sysctl -w net.core.rmem_default=524287
-
-  Setting default send socket buffer size:
-      sysctl -w net.core.wmem_default=524287
-
-  Setting maximum option memory buffers:
-      sysctl -w net.core.optmem_max=524287
-
-  Setting maximum backlog (# of unprocessed packets before kernel drops):
-      sysctl -w net.core.netdev_max_backlog=300000
-
-  Setting TCP read buffers (min/default/max):
-      sysctl -w net.ipv4.tcp_rmem="10000000 10000000 10000000"
-
-  Setting TCP write buffers (min/pressure/max):
-      sysctl -w net.ipv4.tcp_wmem="10000000 10000000 10000000"
-
-  Setting TCP buffer space (min/pressure/max):
-      sysctl -w net.ipv4.tcp_mem="10000000 10000000 10000000"
-
-  TCP window size for single connections:
-   The receive buffer (RX_WINDOW) size must be at least as large as the
-   Bandwidth-Delay Product of the communication link between the sender and
-   receiver. Due to the variations of RTT, you may want to increase the buffer
-   size up to 2 times the Bandwidth-Delay Product. Reference page 289 of
-   "TCP/IP Illustrated, Volume 1, The Protocols" by W. Richard Stevens.
-   At 10Gb speeds, use the following formula:
-       RX_WINDOW >= 1.25MBytes * RTT(in milliseconds)
-       Example for RTT with 100us: RX_WINDOW = (1,250,000 * 0.1) = 125,000
-   RX_WINDOW sizes of 256KB - 512KB should be sufficient.
-   Setting the min, max, and default receive buffer (RX_WINDOW) size:
-       sysctl -w net.ipv4.tcp_rmem="<min> <default> <max>"
-
-  TCP window size for multiple connections:
-   The receive buffer (RX_WINDOW) size may be calculated the same as single
-   connections, but should be divided by the number of connections. The
-   smaller window prevents congestion and facilitates better pacing,
-   especially if/when MAC level flow control does not work well or when it is
-   not supported on the machine. Experimentation may be necessary to attain
-   the correct value. This method is provided as a starting point for the
-   correct receive buffer size.
-   Setting the min, max, and default receive buffer (RX_WINDOW) size is
-   performed in the same manner as single connection.
-
-
-DRIVER MESSAGES
-===============
-
- The following messages are the most common messages logged by syslog. These
- may be found in /var/log/messages.
-
-  Driver up:
-     Chelsio Network Driver - version 2.1.1
-
-  NIC detected:
-     eth#: Chelsio N210 1x10GBaseX NIC (rev #), PCIX 133MHz/64-bit
-
-  Link up:
-     eth#: link is up at 10 Gbps, full duplex
-
-  Link down:
-     eth#: link is down
-
-
-KNOWN ISSUES
-============
-
- These issues have been identified during testing. The following information
- is provided as a workaround to the problem. In some cases, this problem is
- inherent to Linux or to a particular Linux Distribution and/or hardware
- platform.
-
-  1. Large number of TCP retransmits on a multiprocessor (SMP) system.
-
-      On a system with multiple CPUs, the interrupt (IRQ) for the network
-      controller may be bound to more than one CPU. This will cause TCP
-      retransmits if the packet data were to be split across different CPUs
-      and re-assembled in a different order than expected.
-
-      To eliminate the TCP retransmits, set smp_affinity on the particular
-      interrupt to a single CPU. You can locate the interrupt (IRQ) used on
-      the N110/N210 by using ifconfig:
-          ifconfig <dev_name> | grep Interrupt
-      Set the smp_affinity to a single CPU:
-          echo 1 > /proc/irq/<interrupt_number>/smp_affinity
-
-      It is highly suggested that you do not run the irqbalance daemon on your
-      system, as this will change any smp_affinity setting you have applied.
-      The irqbalance daemon runs on a 10 second interval and binds interrupts
-      to the least loaded CPU determined by the daemon. To disable this daemon:
-          chkconfig --level 2345 irqbalance off
-
-      By default, some Linux distributions enable the kernel feature,
-      irqbalance, which performs the same function as the daemon. To disable
-      this feature, add the following line to your bootloader:
-          noirqbalance
-
-          Example using the Grub bootloader:
-              title Red Hat Enterprise Linux AS (2.4.21-27.ELsmp)
-              root (hd0,0)
-              kernel /vmlinuz-2.4.21-27.ELsmp ro root=/dev/hda3 noirqbalance
-              initrd /initrd-2.4.21-27.ELsmp.img
-
-  2. After running insmod, the driver is loaded and the incorrect network
-     interface is brought up without running ifup.
-
-      When using 2.4.x kernels, including RHEL kernels, the Linux kernel
-      invokes a script named "hotplug". This script is primarily used to
-      automatically bring up USB devices when they are plugged in, however,
-      the script also attempts to automatically bring up a network interface
-      after loading the kernel module. The hotplug script does this by scanning
-      the ifcfg-eth# config files in /etc/sysconfig/network-scripts, looking
-      for HWADDR=<mac_address>.
-
-      If the hotplug script does not find the HWADDRR within any of the
-      ifcfg-eth# files, it will bring up the device with the next available
-      interface name. If this interface is already configured for a different
-      network card, your new interface will have incorrect IP address and
-      network settings.
-
-      To solve this issue, you can add the HWADDR=<mac_address> key to the
-      interface config file of your network controller.
-
-      To disable this "hotplug" feature, you may add the driver (module name)
-      to the "blacklist" file located in /etc/hotplug. It has been noted that
-      this does not work for network devices because the net.agent script
-      does not use the blacklist file. Simply remove, or rename, the net.agent
-      script located in /etc/hotplug to disable this feature.
-
-  3. Transport Protocol (TP) hangs when running heavy multi-connection traffic
-     on an AMD Opteron system with HyperTransport PCI-X Tunnel chipset.
-
-      If your AMD Opteron system uses the AMD-8131 HyperTransport PCI-X Tunnel
-      chipset, you may experience the "133-Mhz Mode Split Completion Data
-      Corruption" bug identified by AMD while using a 133Mhz PCI-X card on the
-      bus PCI-X bus.
-
-      AMD states, "Under highly specific conditions, the AMD-8131 PCI-X Tunnel
-      can provide stale data via split completion cycles to a PCI-X card that
-      is operating at 133 Mhz", causing data corruption.
-
-      AMD's provides three workarounds for this problem, however, Chelsio
-      recommends the first option for best performance with this bug:
-
-        For 133Mhz secondary bus operation, limit the transaction length and
-        the number of outstanding transactions, via BIOS configuration
-        programming of the PCI-X card, to the following:
-
-           Data Length (bytes): 1k
-           Total allowed outstanding transactions: 2
-
-      Please refer to AMD 8131-HT/PCI-X Errata 26310 Rev 3.08 August 2004,
-      section 56, "133-MHz Mode Split Completion Data Corruption" for more
-      details with this bug and workarounds suggested by AMD.
-
-      It may be possible to work outside AMD's recommended PCI-X settings, try
-      increasing the Data Length to 2k bytes for increased performance. If you
-      have issues with these settings, please revert to the "safe" settings
-      and duplicate the problem before submitting a bug or asking for support.
-
-      NOTE: The default setting on most systems is 8 outstanding transactions
-            and 2k bytes data length.
-
-  4. On multiprocessor systems, it has been noted that an application which
-     is handling 10Gb networking can switch between CPUs causing degraded
-     and/or unstable performance.
-
-      If running on an SMP system and taking performance measurements, it
-      is suggested you either run the latest netperf-2.4.0+ or use a binding
-      tool such as Tim Hockin's procstate utilities (runon)
-      <http://www.hockin.org/~thockin/procstate/>.
-
-      Binding netserver and netperf (or other applications) to particular
-      CPUs will have a significant difference in performance measurements.
-      You may need to experiment which CPU to bind the application to in
-      order to achieve the best performance for your system.
-
-      If you are developing an application designed for 10Gb networking,
-      please keep in mind you may want to look at kernel functions
-      sched_setaffinity & sched_getaffinity to bind your application.
-
-      If you are just running user-space applications such as ftp, telnet,
-      etc., you may want to try the runon tool provided by Tim Hockin's
-      procstate utility. You could also try binding the interface to a
-      particular CPU: runon 0 ifup eth0
-
-
-SUPPORT
-=======
-
- If you have problems with the software or hardware, please contact our
- customer support team via email at support@chelsio.com or check our website
- at http://www.chelsio.com
-
-===============================================================================
-
- Chelsio Communications
- 370 San Aleso Ave.
- Suite 100
- Sunnyvale, CA 94085
- http://www.chelsio.com
-
-This program is free software; you can redistribute it and/or modify
-it under the terms of the GNU General Public License, version 2, as
-published by the Free Software Foundation.
-
-You should have received a copy of the GNU General Public License along
-with this program; if not, write to the Free Software Foundation, Inc.,
-59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-
-THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED
-WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF
-MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
-
- Copyright (c) 2003-2005 Chelsio Communications. All rights reserved.
-
-===============================================================================
diff --git a/Documentation/networking/device_drivers/index.rst b/Documentation/networking/device_drivers/index.rst
index 7dde314fc957..23c4ec9c9125 100644
--- a/Documentation/networking/device_drivers/index.rst
+++ b/Documentation/networking/device_drivers/index.rst
@@ -31,6 +31,7 @@ Contents:
    3com/vortex
    amazon/ena
    aquantia/atlantic
+   chelsio/cxgb
 
 .. only::  subproject and html
 
diff --git a/drivers/net/ethernet/chelsio/Kconfig b/drivers/net/ethernet/chelsio/Kconfig
index 9909bfda167e..82cdfa51ce37 100644
--- a/drivers/net/ethernet/chelsio/Kconfig
+++ b/drivers/net/ethernet/chelsio/Kconfig
@@ -26,7 +26,7 @@ config CHELSIO_T1
 	  This driver supports Chelsio gigabit and 10-gigabit
 	  Ethernet cards. More information about adapter features and
 	  performance tuning is in
-	  <file:Documentation/networking/device_drivers/chelsio/cxgb.txt>.
+	  <file:Documentation/networking/device_drivers/chelsio/cxgb.rst>.
 
 	  For general information about Chelsio and our products, visit
 	  our website at <http://www.chelsio.com>.
-- 
cgit v1.2.3-59-g8ed1b


From 714a4da450c03bdc53a0d5fa6a4b3192b30c5cda Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 1 May 2020 16:44:39 +0200
Subject: docs: networking: device drivers: convert cirrus/cs89x0.txt to ReST

- add SPDX header;
- adjust title markup;
- mark code blocks and literals as such;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../networking/device_drivers/cirrus/cs89x0.rst    | 647 +++++++++++++++++++++
 .../networking/device_drivers/cirrus/cs89x0.txt    | 624 --------------------
 Documentation/networking/device_drivers/index.rst  |   1 +
 drivers/net/ethernet/cirrus/Kconfig                |   2 +-
 4 files changed, 649 insertions(+), 625 deletions(-)
 create mode 100644 Documentation/networking/device_drivers/cirrus/cs89x0.rst
 delete mode 100644 Documentation/networking/device_drivers/cirrus/cs89x0.txt

diff --git a/Documentation/networking/device_drivers/cirrus/cs89x0.rst b/Documentation/networking/device_drivers/cirrus/cs89x0.rst
new file mode 100644
index 000000000000..e5c283940ac5
--- /dev/null
+++ b/Documentation/networking/device_drivers/cirrus/cs89x0.rst
@@ -0,0 +1,647 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+================================================
+Cirrus Logic LAN CS8900/CS8920 Ethernet Adapters
+================================================
+
+.. note::
+
+   This document was contributed by Cirrus Logic for kernel 2.2.5.  This version
+   has been updated for 2.3.48 by Andrew Morton.
+
+   Still, this is too outdated! A major cleanup is needed here.
+
+Cirrus make a copy of this driver available at their website, as
+described below.  In general, you should use the driver version which
+comes with your Linux distribution.
+
+
+Linux Network Interface Driver ver. 2.00 <kernel 2.3.48>
+
+
+.. TABLE OF CONTENTS
+
+   1.0 CIRRUS LOGIC LAN CS8900/CS8920 ETHERNET ADAPTERS
+	1.1 Product Overview
+	1.2 Driver Description
+	    1.2.1 Driver Name
+	    1.2.2 File in the Driver Package
+	1.3 System Requirements
+	1.4 Licensing Information
+
+   2.0 ADAPTER INSTALLATION and CONFIGURATION
+	2.1 CS8900-based Adapter Configuration
+	2.2 CS8920-based Adapter Configuration
+
+   3.0 LOADING THE DRIVER AS A MODULE
+
+   4.0 COMPILING THE DRIVER
+	4.1 Compiling the Driver as a Loadable Module
+	4.2 Compiling the driver to support memory mode
+	4.3 Compiling the driver to support Rx DMA
+
+   5.0 TESTING AND TROUBLESHOOTING
+	5.1 Known Defects and Limitations
+	5.2 Testing the Adapter
+	    5.2.1 Diagnostic Self-Test
+	    5.2.2 Diagnostic Network Test
+	5.3 Using the Adapter's LEDs
+	5.4 Resolving I/O Conflicts
+
+   6.0 TECHNICAL SUPPORT
+	6.1 Contacting Cirrus Logic's Technical Support
+	6.2 Information Required Before Contacting Technical Support
+	6.3 Obtaining the Latest Driver Version
+	6.4 Current maintainer
+	6.5 Kernel boot parameters
+
+
+1. Cirrus Logic LAN CS8900/CS8920 Ethernet Adapters
+===================================================
+
+
+1.1. Product Overview
+=====================
+
+The CS8900-based ISA Ethernet Adapters from Cirrus Logic follow
+IEEE 802.3 standards and support half or full-duplex operation in ISA bus
+computers on 10 Mbps Ethernet networks.  The adapters are designed for operation
+in 16-bit ISA or EISA bus expansion slots and are available in
+10BaseT-only or 3-media configurations (10BaseT, 10Base2, and AUI for 10Base-5
+or fiber networks).
+
+CS8920-based adapters are similar to the CS8900-based adapter with additional
+features for Plug and Play (PnP) support and Wakeup Frame recognition.  As
+such, the configuration procedures differ somewhat between the two types of
+adapters.  Refer to the "Adapter Configuration" section for details on
+configuring both types of adapters.
+
+
+1.2. Driver Description
+=======================
+
+The CS8900/CS8920 Ethernet Adapter driver for Linux supports the Linux
+v2.3.48 or greater kernel.  It can be compiled directly into the kernel
+or loaded at run-time as a device driver module.
+
+1.2.1 Driver Name: cs89x0
+
+1.2.2 Files in the Driver Archive:
+
+The files in the driver at Cirrus' website include:
+
+  ===================  ====================================================
+  readme.txt           this file
+  build                batch file to compile cs89x0.c.
+  cs89x0.c             driver C code
+  cs89x0.h             driver header file
+  cs89x0.o             pre-compiled module (for v2.2.5 kernel)
+  config/Config.in     sample file to include cs89x0 driver in the kernel.
+  config/Makefile      sample file to include cs89x0 driver in the kernel.
+  config/Space.c       sample file to include cs89x0 driver in the kernel.
+  ===================  ====================================================
+
+
+
+1.3. System Requirements
+------------------------
+
+The following hardware is required:
+
+   * Cirrus Logic LAN (CS8900/20-based) Ethernet ISA Adapter
+
+   * IBM or IBM-compatible PC with:
+     * An 80386 or higher processor
+     * 16 bytes of contiguous IO space available between 210h - 370h
+     * One available IRQ (5,10,11,or 12 for the CS8900, 3-7,9-15 for CS8920).
+
+   * Appropriate cable (and connector for AUI, 10BASE-2) for your network
+     topology.
+
+The following software is required:
+
+* LINUX kernel version 2.3.48 or higher
+
+   * CS8900/20 Setup Utility (DOS-based)
+
+   * LINUX kernel sources for your kernel (if compiling into kernel)
+
+   * GNU Toolkit (gcc and make) v2.6 or above (if compiling into kernel
+     or a module)
+
+
+
+1.4. Licensing Information
+--------------------------
+
+This program is free software; you can redistribute it and/or modify it under
+the terms of the GNU General Public License as published by the Free Software
+Foundation, version 1.
+
+This program is distributed in the hope that it will be useful, but WITHOUT
+ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+more details.
+
+For a full copy of the GNU General Public License, write to the Free Software
+Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
+
+
+
+2. Adapter Installation and Configuration
+=========================================
+
+Both the CS8900 and CS8920-based adapters can be configured using parameters
+stored in an on-board EEPROM. You must use the DOS-based CS8900/20 Setup
+Utility if you want to change the adapter's configuration in EEPROM.
+
+When loading the driver as a module, you can specify many of the adapter's
+configuration parameters on the command-line to override the EEPROM's settings
+or for interface configuration when an EEPROM is not used. (CS8920-based
+adapters must use an EEPROM.) See Section 3.0 LOADING THE DRIVER AS A MODULE.
+
+Since the CS8900/20 Setup Utility is a DOS-based application, you must install
+and configure the adapter in a DOS-based system using the CS8900/20 Setup
+Utility before installation in the target LINUX system.  (Not required if
+installing a CS8900-based adapter and the default configuration is acceptable.)
+
+
+2.1. CS8900-based Adapter Configuration
+---------------------------------------
+
+CS8900-based adapters shipped from Cirrus Logic have been configured
+with the following "default" settings::
+
+  Operation Mode:      Memory Mode
+  IRQ:                 10
+  Base I/O Address:    300
+  Memory Base Address: D0000
+  Optimization:	       DOS Client
+  Transmission Mode:   Half-duplex
+  BootProm:            None
+  Media Type:	       Autodetect (3-media cards) or
+		       10BASE-T (10BASE-T only adapter)
+
+You should only change the default configuration settings if conflicts with
+another adapter exists. To change the adapter's configuration, run the
+CS8900/20 Setup Utility.
+
+
+2.2. CS8920-based Adapter Configuration
+---------------------------------------
+
+CS8920-based adapters are shipped from Cirrus Logic configured as Plug
+and Play (PnP) enabled.  However, since the cs89x0 driver does NOT
+support PnP, you must install the CS8920 adapter in a DOS-based PC and
+run the CS8900/20 Setup Utility to disable PnP and configure the
+adapter before installation in the target Linux system.  Failure to do
+this will leave the adapter inactive and the driver will be unable to
+communicate with the adapter.
+
+::
+
+	****************************************************************
+	*                    CS8920-BASED ADAPTERS:                    *
+	*                                                              *
+	* CS8920-BASED ADAPTERS ARE PLUG and PLAY ENABLED BY DEFAULT.  *
+	* THE CS89X0 DRIVER DOES NOT SUPPORT PnP. THEREFORE, YOU MUST  *
+	* RUN THE CS8900/20 SETUP UTILITY TO DISABLE PnP SUPPORT AND   *
+	* TO ACTIVATE THE ADAPTER.                                     *
+	****************************************************************
+
+
+
+
+3. Loading the Driver as a Module
+=================================
+
+If the driver is compiled as a loadable module, you can load the driver module
+with the 'modprobe' command.  Many of the adapter's configuration parameters can
+be specified as command-line arguments to the load command.  This facility
+provides a means to override the EEPROM's settings or for interface
+configuration when an EEPROM is not used.
+
+Example::
+
+    insmod cs89x0.o io=0x200 irq=0xA media=aui
+
+This example loads the module and configures the adapter to use an IO port base
+address of 200h, interrupt 10, and use the AUI media connection.  The following
+configuration options are available on the command line::
+
+  io=###               - specify IO address (200h-360h)
+  irq=##               - specify interrupt level
+  use_dma=1            - Enable DMA
+  dma=#                - specify dma channel (Driver is compiled to support
+			 Rx DMA only)
+  dmasize=# (16 or 64) - DMA size 16K or 64K.  Default value is set to 16.
+  media=rj45           - specify media type
+   or media=bnc
+   or media=aui
+   or media=auto
+  duplex=full          - specify forced half/full/autonegotiate duplex
+   or duplex=half
+   or duplex=auto
+  debug=#              - debug level (only available if the driver was compiled
+			 for debugging)
+
+**Notes:**
+
+a) If an EEPROM is present, any specified command-line parameter
+   will override the corresponding configuration value stored in
+   EEPROM.
+
+b) The "io" parameter must be specified on the command-line.
+
+c) The driver's hardware probe routine is designed to avoid
+   writing to I/O space until it knows that there is a cs89x0
+   card at the written addresses.  This could cause problems
+   with device probing.  To avoid this behaviour, add one
+   to the ``io=`` module parameter.  This doesn't actually change
+   the I/O address, but it is a flag to tell the driver
+   to partially initialise the hardware before trying to
+   identify the card.  This could be dangerous if you are
+   not sure that there is a cs89x0 card at the provided address.
+
+   For example, to scan for an adapter located at IO base 0x300,
+   specify an IO address of 0x301.
+
+d) The "duplex=auto" parameter is only supported for the CS8920.
+
+e) The minimum command-line configuration required if an EEPROM is
+   not present is:
+
+   io
+   irq
+   media type (no autodetect)
+
+f) The following additional parameters are CS89XX defaults (values
+   used with no EEPROM or command-line argument).
+
+   * DMA Burst = enabled
+   * IOCHRDY Enabled = enabled
+   * UseSA = enabled
+   * CS8900 defaults to half-duplex if not specified on command-line
+   * CS8920 defaults to autoneg if not specified on command-line
+   * Use reset defaults for other config parameters
+   * dma_mode = 0
+
+g) You can use ifconfig to set the adapter's Ethernet address.
+
+h) Many Linux distributions use the 'modprobe' command to load
+   modules.  This program uses the '/etc/conf.modules' file to
+   determine configuration information which is passed to a driver
+   module when it is loaded.  All the configuration options which are
+   described above may be placed within /etc/conf.modules.
+
+   For example::
+
+     > cat /etc/conf.modules
+     ...
+     alias eth0 cs89x0
+     options cs89x0 io=0x0200 dma=5 use_dma=1
+     ...
+
+   In this example we are telling the module system that the
+   ethernet driver for this machine should use the cs89x0 driver.  We
+   are asking 'modprobe' to pass the 'io', 'dma' and 'use_dma'
+   arguments to the driver when it is loaded.
+
+i) Cirrus recommend that the cs89x0 use the ISA DMA channels 5, 6 or
+   7.  You will probably find that other DMA channels will not work.
+
+j) The cs89x0 supports DMA for receiving only.  DMA mode is
+   significantly more efficient.  Flooding a 400 MHz Celeron machine
+   with large ping packets consumes 82% of its CPU capacity in non-DMA
+   mode.  With DMA this is reduced to 45%.
+
+k) If your Linux kernel was compiled with inbuilt plug-and-play
+   support you will be able to find information about the cs89x0 card
+   with the command::
+
+     cat /proc/isapnp
+
+l) If during DMA operation you find erratic behavior or network data
+   corruption you should use your PC's BIOS to slow the EISA bus clock.
+
+m) If the cs89x0 driver is compiled directly into the kernel
+   (non-modular) then its I/O address is automatically determined by
+   ISA bus probing.  The IRQ number, media options, etc are determined
+   from the card's EEPROM.
+
+n) If the cs89x0 driver is compiled directly into the kernel, DMA
+   mode may be selected by providing the kernel with a boot option
+   'cs89x0_dma=N' where 'N' is the desired DMA channel number (5, 6 or 7).
+
+   Kernel boot options may be provided on the LILO command line::
+
+	LILO boot: linux cs89x0_dma=5
+
+   or they may be placed in /etc/lilo.conf::
+
+	image=/boot/bzImage-2.3.48
+	  append="cs89x0_dma=5"
+	  label=linux
+	  root=/dev/hda5
+	  read-only
+
+   The DMA Rx buffer size is hardwired to 16 kbytes in this mode.
+   (64k mode is not available).
+
+
+4. Compiling the Driver
+=======================
+
+The cs89x0 driver can be compiled directly into the kernel or compiled into
+a loadable device driver module.
+
+Just use the standard way to configure the driver and compile the Kernel.
+
+
+4.1. Compiling the Driver to Support Rx DMA
+-------------------------------------------
+
+The compile-time optionality for DMA was removed in the 2.3 kernel
+series.  DMA support is now unconditionally part of the driver.  It is
+enabled by the 'use_dma=1' module option.
+
+
+5. Testing and Troubleshooting
+==============================
+
+5.1. Known Defects and Limitations
+----------------------------------
+
+Refer to the RELEASE.TXT file distributed as part of this archive for a list of
+known defects, driver limitations, and work arounds.
+
+
+5.2. Testing the Adapter
+------------------------
+
+Once the adapter has been installed and configured, the diagnostic option of
+the CS8900/20 Setup Utility can be used to test the functionality of the
+adapter and its network connection.  Use the diagnostics 'Self Test' option to
+test the functionality of the adapter with the hardware configuration you have
+assigned. You can use the diagnostics 'Network Test' to test the ability of the
+adapter to communicate across the Ethernet with another PC equipped with a
+CS8900/20-based adapter card (it must also be running the CS8900/20 Setup
+Utility).
+
+.. note::
+
+	 The Setup Utility's diagnostics are designed to run in a
+	 DOS-only operating system environment.  DO NOT run the diagnostics
+	 from a DOS or command prompt session under Windows 95, Windows NT,
+	 OS/2, or other operating system.
+
+To run the diagnostics tests on the CS8900/20 adapter:
+
+   1.  Boot DOS on the PC and start the CS8900/20 Setup Utility.
+
+   2.  The adapter's current configuration is displayed.  Hit the ENTER key to
+       get to the main menu.
+
+   4.  Select 'Diagnostics' (ALT-G) from the main menu.
+       * Select 'Self-Test' to test the adapter's basic functionality.
+       * Select 'Network Test' to test the network connection and cabling.
+
+
+5.2.1. Diagnostic Self-test
+^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The diagnostic self-test checks the adapter's basic functionality as well as
+its ability to communicate across the ISA bus based on the system resources
+assigned during hardware configuration.  The following tests are performed:
+
+   * IO Register Read/Write Test
+
+     The IO Register Read/Write test insures that the CS8900/20 can be
+     accessed in IO mode, and that the IO base address is correct.
+
+   * Shared Memory Test
+
+     The Shared Memory test insures the CS8900/20 can be accessed in memory
+     mode and that the range of memory addresses assigned does not conflict
+     with other devices in the system.
+
+   * Interrupt Test
+
+     The Interrupt test insures there are no conflicts with the assigned IRQ
+     signal.
+
+   * EEPROM Test
+
+     The EEPROM test insures the EEPROM can be read.
+
+   * Chip RAM Test
+
+     The Chip RAM test insures the 4K of memory internal to the CS8900/20 is
+     working properly.
+
+   * Internal Loop-back Test
+
+     The Internal Loop Back test insures the adapter's transmitter and
+     receiver are operating properly.  If this test fails, make sure the
+     adapter's cable is connected to the network (check for LED activity for
+     example).
+
+   * Boot PROM Test
+
+     The Boot PROM  test insures the Boot PROM is present, and can be read.
+     Failure indicates the Boot PROM  was not successfully read due to a
+     hardware problem or due to a conflicts on the Boot PROM address
+     assignment. (Test only applies if the adapter is configured to use the
+     Boot PROM option.)
+
+Failure of a test item indicates a possible system resource conflict with
+another device on the ISA bus.  In this case, you should use the Manual Setup
+option to reconfigure the adapter by selecting a different value for the system
+resource that failed.
+
+
+5.2.2. Diagnostic Network Test
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+The Diagnostic Network Test verifies a working network connection by
+transferring data between two CS8900/20 adapters installed in different PCs
+on the same network. (Note: the diagnostic network test should not be run
+between two nodes across a router.)
+
+This test requires that each of the two PCs have a CS8900/20-based adapter
+installed and have the CS8900/20 Setup Utility running.  The first PC is
+configured as a Responder and the other PC is configured as an Initiator.
+Once the Initiator is started, it sends data frames to the Responder which
+returns the frames to the Initiator.
+
+The total number of frames received and transmitted are displayed on the
+Initiator's display, along with a count of the number of frames received and
+transmitted OK or in error.  The test can be terminated anytime by the user at
+either PC.
+
+To setup the Diagnostic Network Test:
+
+    1.  Select a PC with a CS8900/20-based adapter and a known working network
+	connection to act as the Responder.  Run the CS8900/20 Setup Utility
+	and select 'Diagnostics -> Network Test -> Responder' from the main
+	menu.  Hit ENTER to start the Responder.
+
+    2.  Return to the PC with the CS8900/20-based adapter you want to test and
+	start the CS8900/20 Setup Utility.
+
+    3.  From the main menu, Select 'Diagnostic -> Network Test -> Initiator'.
+	Hit ENTER to start the test.
+
+You may stop the test on the Initiator at any time while allowing the Responder
+to continue running.  In this manner, you can move to additional PCs and test
+them by starting the Initiator on another PC without having to stop/start the
+Responder.
+
+
+
+5.3. Using the Adapter's LEDs
+-----------------------------
+
+The 2 and 3-media adapters have two LEDs visible on the back end of the board
+located near the 10Base-T connector.
+
+Link Integrity LED: A "steady" ON of the green LED indicates a valid 10Base-T
+connection.  (Only applies to 10Base-T.  The green LED has no significance for
+a 10Base-2 or AUI connection.)
+
+TX/RX LED: The yellow LED lights briefly each time the adapter transmits or
+receives data. (The yellow LED will appear to "flicker" on a typical network.)
+
+
+5.4. Resolving I/O Conflicts
+----------------------------
+
+An IO conflict occurs when two or more adapter use the same ISA resource (IO
+address, memory address or IRQ).  You can usually detect an IO conflict in one
+of four ways after installing and or configuring the CS8900/20-based adapter:
+
+    1.  The system does not boot properly (or at all).
+
+    2.  The driver cannot communicate with the adapter, reporting an "Adapter
+	not found" error message.
+
+    3.  You cannot connect to the network or the driver will not load.
+
+    4.  If you have configured the adapter to run in memory mode but the driver
+	reports it is using IO mode when loading, this is an indication of a
+	memory address conflict.
+
+If an IO conflict occurs, run the CS8900/20 Setup Utility and perform a
+diagnostic self-test.  Normally, the ISA resource in conflict will fail the
+self-test.  If so, reconfigure the adapter selecting another choice for the
+resource in conflict.  Run the diagnostics again to check for further IO
+conflicts.
+
+In some cases, such as when the PC will not boot, it may be necessary to remove
+the adapter and reconfigure it by installing it in another PC to run the
+CS8900/20 Setup Utility.  Once reinstalled in the target system, run the
+diagnostics self-test to ensure the new configuration is free of conflicts
+before loading the driver again.
+
+When manually configuring the adapter, keep in mind the typical ISA system
+resource usage as indicated in the tables below.
+
+::
+
+  I/O Address    	Device                        IRQ      Device
+  -----------    	--------                      ---      --------
+     200-20F       	Game I/O adapter               3       COM2, Bus Mouse
+     230-23F       	Bus Mouse                      4       COM1
+     270-27F       	LPT3: third parallel port      5       LPT2
+     2F0-2FF       	COM2: second serial port       6       Floppy Disk controller
+     320-32F       	Fixed disk controller          7       LPT1
+							 8       Real-time Clock
+						     9       EGA/VGA display adapter
+						    12       Mouse (PS/2)
+  Memory Address  Device                          13       Math Coprocessor
+  --------------  ---------------------           14       Hard Disk controller
+  A000-BFFF	EGA Graphics Adapter
+  A000-C7FF	VGA Graphics Adapter
+  B000-BFFF	Mono Graphics Adapter
+  B800-BFFF	Color Graphics Adapter
+  E000-FFFF	AT BIOS
+
+
+
+
+6. Technical Support
+====================
+
+6.1. Contacting Cirrus Logic's Technical Support
+------------------------------------------------
+
+Cirrus Logic's CS89XX Technical Application Support can be reached at::
+
+  Telephone  :(800) 888-5016 (from inside U.S. and Canada)
+	     :(512) 442-7555 (from outside the U.S. and Canada)
+  Fax        :(512) 912-3871
+  Email      :ethernet@crystal.cirrus.com
+  WWW        :http://www.cirrus.com
+
+
+6.2. Information Required before Contacting Technical Support
+-------------------------------------------------------------
+
+Before contacting Cirrus Logic for technical support, be prepared to provide as
+Much of the following information as possible.
+
+1.) Adapter type (CRD8900, CDB8900, CDB8920, etc.)
+
+2.) Adapter configuration
+
+    * IO Base, Memory Base, IO or memory mode enabled, IRQ, DMA channel
+    * Plug and Play enabled/disabled (CS8920-based adapters only)
+    * Configured for media auto-detect or specific media type (which type).
+
+3.) PC System's Configuration
+
+    * Plug and Play system (yes/no)
+    * BIOS (make and version)
+    * System make and model
+    * CPU (type and speed)
+    * System RAM
+    * SCSI Adapter
+
+4.) Software
+
+    * CS89XX driver and version
+    * Your network operating system and version
+    * Your system's OS version
+    * Version of all protocol support files
+
+5.) Any Error Message displayed.
+
+
+
+6.3 Obtaining the Latest Driver Version
+---------------------------------------
+
+You can obtain the latest CS89XX drivers and support software from Cirrus Logic's
+Web site.  You can also contact Cirrus Logic's Technical Support (email:
+ethernet@crystal.cirrus.com) and request that you be registered for automatic
+software-update notification.
+
+Cirrus Logic maintains a web page at http://www.cirrus.com with the
+latest drivers and technical publications.
+
+
+6.4. Current maintainer
+-----------------------
+
+In February 2000 the maintenance of this driver was assumed by Andrew
+Morton.
+
+6.5 Kernel module parameters
+----------------------------
+
+For use in embedded environments with no cs89x0 EEPROM, the kernel boot
+parameter ``cs89x0_media=`` has been implemented.  Usage is::
+
+	cs89x0_media=rj45    or
+	cs89x0_media=aui     or
+	cs89x0_media=bnc
diff --git a/Documentation/networking/device_drivers/cirrus/cs89x0.txt b/Documentation/networking/device_drivers/cirrus/cs89x0.txt
deleted file mode 100644
index 0e190180eec8..000000000000
--- a/Documentation/networking/device_drivers/cirrus/cs89x0.txt
+++ /dev/null
@@ -1,624 +0,0 @@
-
-NOTE
-----
-
-This document was contributed by Cirrus Logic for kernel 2.2.5.  This version
-has been updated for 2.3.48 by Andrew Morton.
-
-Cirrus make a copy of this driver available at their website, as
-described below.  In general, you should use the driver version which
-comes with your Linux distribution.
-
-
-
-CIRRUS LOGIC LAN CS8900/CS8920 ETHERNET ADAPTERS
-Linux Network Interface Driver ver. 2.00 <kernel 2.3.48>
-===============================================================================
- 
-
-TABLE OF CONTENTS
-
-1.0 CIRRUS LOGIC LAN CS8900/CS8920 ETHERNET ADAPTERS
-    1.1 Product Overview 
-    1.2 Driver Description
-	1.2.1 Driver Name
-	1.2.2 File in the Driver Package
-    1.3 System Requirements
-    1.4 Licensing Information
-
-2.0 ADAPTER INSTALLATION and CONFIGURATION
-    2.1 CS8900-based Adapter Configuration
-    2.2 CS8920-based Adapter Configuration 
-
-3.0 LOADING THE DRIVER AS A MODULE
-
-4.0 COMPILING THE DRIVER
-    4.1 Compiling the Driver as a Loadable Module
-    4.2 Compiling the driver to support memory mode
-    4.3 Compiling the driver to support Rx DMA 
-
-5.0 TESTING AND TROUBLESHOOTING
-    5.1 Known Defects and Limitations
-    5.2 Testing the Adapter
-        5.2.1 Diagnostic Self-Test
-        5.2.2 Diagnostic Network Test
-    5.3 Using the Adapter's LEDs
-    5.4 Resolving I/O Conflicts
-
-6.0 TECHNICAL SUPPORT
-    6.1 Contacting Cirrus Logic's Technical Support
-    6.2 Information Required Before Contacting Technical Support
-    6.3 Obtaining the Latest Driver Version
-    6.4 Current maintainer
-    6.5 Kernel boot parameters
-
-
-1.0 CIRRUS LOGIC LAN CS8900/CS8920 ETHERNET ADAPTERS
-===============================================================================
-
-
-1.1 PRODUCT OVERVIEW
-
-The CS8900-based ISA Ethernet Adapters from Cirrus Logic follow 
-IEEE 802.3 standards and support half or full-duplex operation in ISA bus 
-computers on 10 Mbps Ethernet networks.  The adapters are designed for operation 
-in 16-bit ISA or EISA bus expansion slots and are available in 
-10BaseT-only or 3-media configurations (10BaseT, 10Base2, and AUI for 10Base-5 
-or fiber networks).  
-
-CS8920-based adapters are similar to the CS8900-based adapter with additional 
-features for Plug and Play (PnP) support and Wakeup Frame recognition.  As 
-such, the configuration procedures differ somewhat between the two types of 
-adapters.  Refer to the "Adapter Configuration" section for details on 
-configuring both types of adapters.
-
-
-1.2 DRIVER DESCRIPTION
-
-The CS8900/CS8920 Ethernet Adapter driver for Linux supports the Linux
-v2.3.48 or greater kernel.  It can be compiled directly into the kernel
-or loaded at run-time as a device driver module.
-
-1.2.1 Driver Name: cs89x0
-
-1.2.2 Files in the Driver Archive:
-
-The files in the driver at Cirrus' website include:
-
-  readme.txt         - this file
-  build              - batch file to compile cs89x0.c.
-  cs89x0.c           - driver C code
-  cs89x0.h           - driver header file
-  cs89x0.o           - pre-compiled module (for v2.2.5 kernel)
-  config/Config.in   - sample file to include cs89x0 driver in the kernel.
-  config/Makefile    - sample file to include cs89x0 driver in the kernel.
-  config/Space.c     - sample file to include cs89x0 driver in the kernel.
-
-
-
-1.3 SYSTEM REQUIREMENTS
-
-The following hardware is required:
-
-   * Cirrus Logic LAN (CS8900/20-based) Ethernet ISA Adapter   
-
-   * IBM or IBM-compatible PC with:
-     * An 80386 or higher processor
-     * 16 bytes of contiguous IO space available between 210h - 370h
-     * One available IRQ (5,10,11,or 12 for the CS8900, 3-7,9-15 for CS8920).
-
-   * Appropriate cable (and connector for AUI, 10BASE-2) for your network
-     topology.
-
-The following software is required:
-
-* LINUX kernel version 2.3.48 or higher
-
-   * CS8900/20 Setup Utility (DOS-based)
-
-   * LINUX kernel sources for your kernel (if compiling into kernel)
-
-   * GNU Toolkit (gcc and make) v2.6 or above (if compiling into kernel 
-     or a module)   
-
-
-
-1.4 LICENSING INFORMATION
-
-This program is free software; you can redistribute it and/or modify it under
-the terms of the GNU General Public License as published by the Free Software
-Foundation, version 1.
-
-This program is distributed in the hope that it will be useful, but WITHOUT
-ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
-FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for 
-more details.
-
-For a full copy of the GNU General Public License, write to the Free Software
-Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
-
-
-
-2.0 ADAPTER INSTALLATION and CONFIGURATION
-===============================================================================
-
-Both the CS8900 and CS8920-based adapters can be configured using parameters 
-stored in an on-board EEPROM. You must use the DOS-based CS8900/20 Setup 
-Utility if you want to change the adapter's configuration in EEPROM.  
-
-When loading the driver as a module, you can specify many of the adapter's 
-configuration parameters on the command-line to override the EEPROM's settings 
-or for interface configuration when an EEPROM is not used. (CS8920-based 
-adapters must use an EEPROM.) See Section 3.0 LOADING THE DRIVER AS A MODULE.
-
-Since the CS8900/20 Setup Utility is a DOS-based application, you must install 
-and configure the adapter in a DOS-based system using the CS8900/20 Setup 
-Utility before installation in the target LINUX system.  (Not required if 
-installing a CS8900-based adapter and the default configuration is acceptable.)
-     
-
-2.1 CS8900-BASED ADAPTER CONFIGURATION
-
-CS8900-based adapters shipped from Cirrus Logic have been configured 
-with the following "default" settings:
-
-  Operation Mode:      Memory Mode
-  IRQ:                 10
-  Base I/O Address:    300
-  Memory Base Address: D0000
-  Optimization:	       DOS Client
-  Transmission Mode:   Half-duplex
-  BootProm:            None
-  Media Type:	       Autodetect (3-media cards) or 
-                       10BASE-T (10BASE-T only adapter)
-
-You should only change the default configuration settings if conflicts with 
-another adapter exists. To change the adapter's configuration, run the 
-CS8900/20 Setup Utility. 
-
-
-2.2 CS8920-BASED ADAPTER CONFIGURATION
-
-CS8920-based adapters are shipped from Cirrus Logic configured as Plug
-and Play (PnP) enabled.  However, since the cs89x0 driver does NOT
-support PnP, you must install the CS8920 adapter in a DOS-based PC and
-run the CS8900/20 Setup Utility to disable PnP and configure the
-adapter before installation in the target Linux system.  Failure to do
-this will leave the adapter inactive and the driver will be unable to
-communicate with the adapter.  
-
-
-        **************************************************************** 
-        *                    CS8920-BASED ADAPTERS:                    *
-        *                                                              * 
-        * CS8920-BASED ADAPTERS ARE PLUG and PLAY ENABLED BY DEFAULT.  * 
-        * THE CS89X0 DRIVER DOES NOT SUPPORT PnP. THEREFORE, YOU MUST  *
-        * RUN THE CS8900/20 SETUP UTILITY TO DISABLE PnP SUPPORT AND   *
-        * TO ACTIVATE THE ADAPTER.                                     *
-        ****************************************************************
-
-
-
-
-3.0 LOADING THE DRIVER AS A MODULE
-===============================================================================
-
-If the driver is compiled as a loadable module, you can load the driver module
-with the 'modprobe' command.  Many of the adapter's configuration parameters can 
-be specified as command-line arguments to the load command.  This facility 
-provides a means to override the EEPROM's settings or for interface 
-configuration when an EEPROM is not used.
-
-Example:
-
-    insmod cs89x0.o io=0x200 irq=0xA media=aui
-
-This example loads the module and configures the adapter to use an IO port base
-address of 200h, interrupt 10, and use the AUI media connection.  The following
-configuration options are available on the command line:
-
-* io=###               - specify IO address (200h-360h)
-* irq=##               - specify interrupt level
-* use_dma=1            - Enable DMA
-* dma=#                - specify dma channel (Driver is compiled to support
-                         Rx DMA only)
-* dmasize=# (16 or 64) - DMA size 16K or 64K.  Default value is set to 16.
-* media=rj45           - specify media type
-   or media=bnc
-   or media=aui
-   or media=auto
-* duplex=full          - specify forced half/full/autonegotiate duplex
-   or duplex=half
-   or duplex=auto
-* debug=#              - debug level (only available if the driver was compiled
-                         for debugging)
-
-NOTES:
-
-a) If an EEPROM is present, any specified command-line parameter
-   will override the corresponding configuration value stored in
-   EEPROM.
-
-b) The "io" parameter must be specified on the command-line.  
-
-c) The driver's hardware probe routine is designed to avoid
-   writing to I/O space until it knows that there is a cs89x0
-   card at the written addresses.  This could cause problems
-   with device probing.  To avoid this behaviour, add one
-   to the `io=' module parameter.  This doesn't actually change
-   the I/O address, but it is a flag to tell the driver
-   to partially initialise the hardware before trying to
-   identify the card.  This could be dangerous if you are
-   not sure that there is a cs89x0 card at the provided address.
-
-   For example, to scan for an adapter located at IO base 0x300,
-   specify an IO address of 0x301.  
-
-d) The "duplex=auto" parameter is only supported for the CS8920.
-
-e) The minimum command-line configuration required if an EEPROM is
-   not present is:
-
-   io 
-   irq 
-   media type (no autodetect)
-
-f) The following additional parameters are CS89XX defaults (values
-   used with no EEPROM or command-line argument).
-
-   * DMA Burst = enabled
-   * IOCHRDY Enabled = enabled
-   * UseSA = enabled
-   * CS8900 defaults to half-duplex if not specified on command-line
-   * CS8920 defaults to autoneg if not specified on command-line
-   * Use reset defaults for other config parameters
-   * dma_mode = 0
-
-g) You can use ifconfig to set the adapter's Ethernet address.
-
-h) Many Linux distributions use the 'modprobe' command to load
-   modules.  This program uses the '/etc/conf.modules' file to
-   determine configuration information which is passed to a driver
-   module when it is loaded.  All the configuration options which are
-   described above may be placed within /etc/conf.modules.
-
-   For example:
-
-   > cat /etc/conf.modules
-   ...
-   alias eth0 cs89x0
-   options cs89x0 io=0x0200 dma=5 use_dma=1
-   ...
-
-   In this example we are telling the module system that the
-   ethernet driver for this machine should use the cs89x0 driver.  We
-   are asking 'modprobe' to pass the 'io', 'dma' and 'use_dma'
-   arguments to the driver when it is loaded.
-
-i) Cirrus recommend that the cs89x0 use the ISA DMA channels 5, 6 or
-   7.  You will probably find that other DMA channels will not work.
-
-j) The cs89x0 supports DMA for receiving only.  DMA mode is
-   significantly more efficient.  Flooding a 400 MHz Celeron machine
-   with large ping packets consumes 82% of its CPU capacity in non-DMA
-   mode.  With DMA this is reduced to 45%.
-
-k) If your Linux kernel was compiled with inbuilt plug-and-play
-   support you will be able to find information about the cs89x0 card
-   with the command
-
-   cat /proc/isapnp
-
-l) If during DMA operation you find erratic behavior or network data
-   corruption you should use your PC's BIOS to slow the EISA bus clock.
-
-m) If the cs89x0 driver is compiled directly into the kernel
-   (non-modular) then its I/O address is automatically determined by
-   ISA bus probing.  The IRQ number, media options, etc are determined
-   from the card's EEPROM.
-
-n) If the cs89x0 driver is compiled directly into the kernel, DMA
-   mode may be selected by providing the kernel with a boot option
-   'cs89x0_dma=N' where 'N' is the desired DMA channel number (5, 6 or 7).
-
-   Kernel boot options may be provided on the LILO command line:
-
-	LILO boot: linux cs89x0_dma=5
-
-   or they may be placed in /etc/lilo.conf:
-
-	image=/boot/bzImage-2.3.48
-	  append="cs89x0_dma=5"
-	  label=linux
-	  root=/dev/hda5
-	  read-only
-
-   The DMA Rx buffer size is hardwired to 16 kbytes in this mode.
-   (64k mode is not available).
-
-
-4.0 COMPILING THE DRIVER
-===============================================================================
-
-The cs89x0 driver can be compiled directly into the kernel or compiled into
-a loadable device driver module.
-
-
-4.1 COMPILING THE DRIVER AS A LOADABLE MODULE
-
-To compile the driver into a loadable module, use the following command 
-(single command line, without quotes):
-
-"gcc -D__KERNEL__ -I/usr/src/linux/include -I/usr/src/linux/net/inet -Wall 
--Wstrict-prototypes -O2 -fomit-frame-pointer -DMODULE -DCONFIG_MODVERSIONS 
--c cs89x0.c"
-
-4.2 COMPILING THE DRIVER TO SUPPORT MEMORY MODE
-
-Support for memory mode was not carried over into the 2.3 series kernels.
-
-4.3 COMPILING THE DRIVER TO SUPPORT Rx DMA
-
-The compile-time optionality for DMA was removed in the 2.3 kernel
-series.  DMA support is now unconditionally part of the driver.  It is
-enabled by the 'use_dma=1' module option.
-
-
-5.0 TESTING AND TROUBLESHOOTING
-===============================================================================
-
-5.1 KNOWN DEFECTS and LIMITATIONS
-
-Refer to the RELEASE.TXT file distributed as part of this archive for a list of 
-known defects, driver limitations, and work arounds.
-
-
-5.2 TESTING THE ADAPTER
-
-Once the adapter has been installed and configured, the diagnostic option of 
-the CS8900/20 Setup Utility can be used to test the functionality of the 
-adapter and its network connection.  Use the diagnostics 'Self Test' option to
-test the functionality of the adapter with the hardware configuration you have
-assigned. You can use the diagnostics 'Network Test' to test the ability of the
-adapter to communicate across the Ethernet with another PC equipped with a 
-CS8900/20-based adapter card (it must also be running the CS8900/20 Setup 
-Utility).
-
-         NOTE: The Setup Utility's diagnostics are designed to run in a
-         DOS-only operating system environment.  DO NOT run the diagnostics 
-         from a DOS or command prompt session under Windows 95, Windows NT, 
-         OS/2, or other operating system.
-
-To run the diagnostics tests on the CS8900/20 adapter:
-
-   1.) Boot DOS on the PC and start the CS8900/20 Setup Utility.
-
-   2.) The adapter's current configuration is displayed.  Hit the ENTER key to
-       get to the main menu.
-
-   4.) Select 'Diagnostics' (ALT-G) from the main menu.  
-       * Select 'Self-Test' to test the adapter's basic functionality.
-       * Select 'Network Test' to test the network connection and cabling.
-
-
-5.2.1 DIAGNOSTIC SELF-TEST
-
-The diagnostic self-test checks the adapter's basic functionality as well as 
-its ability to communicate across the ISA bus based on the system resources 
-assigned during hardware configuration.  The following tests are performed:
-
-   * IO Register Read/Write Test
-     The IO Register Read/Write test insures that the CS8900/20 can be 
-     accessed in IO mode, and that the IO base address is correct.
-
-   * Shared Memory Test
-     The Shared Memory test insures the CS8900/20 can be accessed in memory 
-     mode and that the range of memory addresses assigned does not conflict 
-     with other devices in the system.
-
-   * Interrupt Test
-     The Interrupt test insures there are no conflicts with the assigned IRQ
-     signal.
-
-   * EEPROM Test
-     The EEPROM test insures the EEPROM can be read.
-
-   * Chip RAM Test
-     The Chip RAM test insures the 4K of memory internal to the CS8900/20 is
-     working properly.
-
-   * Internal Loop-back Test
-     The Internal Loop Back test insures the adapter's transmitter and 
-     receiver are operating properly.  If this test fails, make sure the 
-     adapter's cable is connected to the network (check for LED activity for 
-     example).
-
-   * Boot PROM Test
-     The Boot PROM  test insures the Boot PROM is present, and can be read.
-     Failure indicates the Boot PROM  was not successfully read due to a
-     hardware problem or due to a conflicts on the Boot PROM address
-     assignment. (Test only applies if the adapter is configured to use the
-     Boot PROM option.)
-
-Failure of a test item indicates a possible system resource conflict with 
-another device on the ISA bus.  In this case, you should use the Manual Setup 
-option to reconfigure the adapter by selecting a different value for the system
-resource that failed.
-
-
-5.2.2 DIAGNOSTIC NETWORK TEST
-
-The Diagnostic Network Test verifies a working network connection by 
-transferring data between two CS8900/20 adapters installed in different PCs 
-on the same network. (Note: the diagnostic network test should not be run 
-between two nodes across a router.) 
-
-This test requires that each of the two PCs have a CS8900/20-based adapter
-installed and have the CS8900/20 Setup Utility running.  The first PC is 
-configured as a Responder and the other PC is configured as an Initiator.  
-Once the Initiator is started, it sends data frames to the Responder which 
-returns the frames to the Initiator.
-
-The total number of frames received and transmitted are displayed on the 
-Initiator's display, along with a count of the number of frames received and 
-transmitted OK or in error.  The test can be terminated anytime by the user at 
-either PC.
-
-To setup the Diagnostic Network Test:
-
-    1.) Select a PC with a CS8900/20-based adapter and a known working network
-        connection to act as the Responder.  Run the CS8900/20 Setup Utility 
-        and select 'Diagnostics -> Network Test -> Responder' from the main 
-        menu.  Hit ENTER to start the Responder.
-
-    2.) Return to the PC with the CS8900/20-based adapter you want to test and
-        start the CS8900/20 Setup Utility. 
-
-    3.) From the main menu, Select 'Diagnostic -> Network Test -> Initiator'.
-        Hit ENTER to start the test.
- 
-You may stop the test on the Initiator at any time while allowing the Responder
-to continue running.  In this manner, you can move to additional PCs and test 
-them by starting the Initiator on another PC without having to stop/start the 
-Responder.
- 
-
-
-5.3 USING THE ADAPTER'S LEDs
-
-The 2 and 3-media adapters have two LEDs visible on the back end of the board 
-located near the 10Base-T connector.  
-
-Link Integrity LED: A "steady" ON of the green LED indicates a valid 10Base-T 
-connection.  (Only applies to 10Base-T.  The green LED has no significance for
-a 10Base-2 or AUI connection.)
-
-TX/RX LED: The yellow LED lights briefly each time the adapter transmits or 
-receives data. (The yellow LED will appear to "flicker" on a typical network.)
-
-
-5.4 RESOLVING I/O CONFLICTS
-
-An IO conflict occurs when two or more adapter use the same ISA resource (IO 
-address, memory address or IRQ).  You can usually detect an IO conflict in one 
-of four ways after installing and or configuring the CS8900/20-based adapter:
-
-    1.) The system does not boot properly (or at all).
-
-    2.) The driver cannot communicate with the adapter, reporting an "Adapter
-        not found" error message.
-
-    3.) You cannot connect to the network or the driver will not load.
-
-    4.) If you have configured the adapter to run in memory mode but the driver
-        reports it is using IO mode when loading, this is an indication of a
-        memory address conflict.
-
-If an IO conflict occurs, run the CS8900/20 Setup Utility and perform a 
-diagnostic self-test.  Normally, the ISA resource in conflict will fail the 
-self-test.  If so, reconfigure the adapter selecting another choice for the 
-resource in conflict.  Run the diagnostics again to check for further IO 
-conflicts.
-
-In some cases, such as when the PC will not boot, it may be necessary to remove
-the adapter and reconfigure it by installing it in another PC to run the 
-CS8900/20 Setup Utility.  Once reinstalled in the target system, run the 
-diagnostics self-test to ensure the new configuration is free of conflicts 
-before loading the driver again.
-
-When manually configuring the adapter, keep in mind the typical ISA system 
-resource usage as indicated in the tables below.
-
-I/O Address    	Device                        IRQ      Device
------------    	--------                      ---      --------
- 200-20F       	Game I/O adapter               3       COM2, Bus Mouse
- 230-23F       	Bus Mouse                      4       COM1
- 270-27F       	LPT3: third parallel port      5       LPT2
- 2F0-2FF       	COM2: second serial port       6       Floppy Disk controller
- 320-32F       	Fixed disk controller          7       LPT1
-                                      	       8       Real-time Clock
-                                                 9       EGA/VGA display adapter    
-                                                12       Mouse (PS/2)                              
-Memory Address  Device                          13       Math Coprocessor
---------------  ---------------------           14       Hard Disk controller
-A000-BFFF	EGA Graphics Adapter
-A000-C7FF	VGA Graphics Adapter
-B000-BFFF	Mono Graphics Adapter
-B800-BFFF	Color Graphics Adapter
-E000-FFFF	AT BIOS
-
-
-
-
-6.0 TECHNICAL SUPPORT
-===============================================================================
-
-6.1 CONTACTING CIRRUS LOGIC'S TECHNICAL SUPPORT
-
-Cirrus Logic's CS89XX Technical Application Support can be reached at:
-
-Telephone  :(800) 888-5016 (from inside U.S. and Canada)
-           :(512) 442-7555 (from outside the U.S. and Canada)
-Fax        :(512) 912-3871
-Email      :ethernet@crystal.cirrus.com
-WWW        :http://www.cirrus.com
-
-
-6.2 INFORMATION REQUIRED BEFORE CONTACTING TECHNICAL SUPPORT
-
-Before contacting Cirrus Logic for technical support, be prepared to provide as 
-Much of the following information as possible. 
-
-1.) Adapter type (CRD8900, CDB8900, CDB8920, etc.)
-
-2.) Adapter configuration
-
-    * IO Base, Memory Base, IO or memory mode enabled, IRQ, DMA channel
-    * Plug and Play enabled/disabled (CS8920-based adapters only)
-    * Configured for media auto-detect or specific media type (which type).    
-
-3.) PC System's Configuration
-
-    * Plug and Play system (yes/no)
-    * BIOS (make and version)
-    * System make and model
-    * CPU (type and speed)
-    * System RAM
-    * SCSI Adapter
-
-4.) Software
-
-    * CS89XX driver and version
-    * Your network operating system and version
-    * Your system's OS version 
-    * Version of all protocol support files
-
-5.) Any Error Message displayed.
-
-
-
-6.3 OBTAINING THE LATEST DRIVER VERSION
-
-You can obtain the latest CS89XX drivers and support software from Cirrus Logic's 
-Web site.  You can also contact Cirrus Logic's Technical Support (email:
-ethernet@crystal.cirrus.com) and request that you be registered for automatic 
-software-update notification.
-
-Cirrus Logic maintains a web page at http://www.cirrus.com with the
-latest drivers and technical publications.
-
-
-6.4 Current maintainer
-
-In February 2000 the maintenance of this driver was assumed by Andrew
-Morton.
-
-6.5 Kernel module parameters
-
-For use in embedded environments with no cs89x0 EEPROM, the kernel boot
-parameter `cs89x0_media=' has been implemented.  Usage is:
-
-	cs89x0_media=rj45    or
-	cs89x0_media=aui     or
-	cs89x0_media=bnc
-
diff --git a/Documentation/networking/device_drivers/index.rst b/Documentation/networking/device_drivers/index.rst
index 23c4ec9c9125..0b39342e2a1f 100644
--- a/Documentation/networking/device_drivers/index.rst
+++ b/Documentation/networking/device_drivers/index.rst
@@ -32,6 +32,7 @@ Contents:
    amazon/ena
    aquantia/atlantic
    chelsio/cxgb
+   cirrus/cs89x0
 
 .. only::  subproject and html
 
diff --git a/drivers/net/ethernet/cirrus/Kconfig b/drivers/net/ethernet/cirrus/Kconfig
index 48f3198381bc..8d845f5ee0c5 100644
--- a/drivers/net/ethernet/cirrus/Kconfig
+++ b/drivers/net/ethernet/cirrus/Kconfig
@@ -24,7 +24,7 @@ config CS89x0
 	---help---
 	  Support for CS89x0 chipset based Ethernet cards. If you have a
 	  network (Ethernet) card of this type, say Y and read the file
-	  <file:Documentation/networking/device_drivers/cirrus/cs89x0.txt>.
+	  <file:Documentation/networking/device_drivers/cirrus/cs89x0.rst>.
 
 	  To compile this driver as a module, choose M here. The module
 	  will be called cs89x0.
-- 
cgit v1.2.3-59-g8ed1b


From e1ddedb5cbd6f8ec2f41874bc06e03023fbd9d99 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 1 May 2020 16:44:40 +0200
Subject: docs: networking: device drivers: convert davicom/dm9000.txt to ReST

- add SPDX header;
- add a document title;
- mark lists as such;
- mark tables as such;
- mark code blocks and literals as such;
- use the right horizontal tag markup;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../networking/device_drivers/davicom/dm9000.rst   | 171 +++++++++++++++++++++
 .../networking/device_drivers/davicom/dm9000.txt   | 167 --------------------
 Documentation/networking/device_drivers/index.rst  |   1 +
 3 files changed, 172 insertions(+), 167 deletions(-)
 create mode 100644 Documentation/networking/device_drivers/davicom/dm9000.rst
 delete mode 100644 Documentation/networking/device_drivers/davicom/dm9000.txt

diff --git a/Documentation/networking/device_drivers/davicom/dm9000.rst b/Documentation/networking/device_drivers/davicom/dm9000.rst
new file mode 100644
index 000000000000..d5458da01083
--- /dev/null
+++ b/Documentation/networking/device_drivers/davicom/dm9000.rst
@@ -0,0 +1,171 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================
+DM9000 Network driver
+=====================
+
+Copyright 2008 Simtec Electronics,
+
+	  Ben Dooks <ben@simtec.co.uk> <ben-linux@fluff.org>
+
+
+Introduction
+------------
+
+This file describes how to use the DM9000 platform-device based network driver
+that is contained in the files drivers/net/dm9000.c and drivers/net/dm9000.h.
+
+The driver supports three DM9000 variants, the DM9000E which is the first chip
+supported as well as the newer DM9000A and DM9000B devices. It is currently
+maintained and tested by Ben Dooks, who should be CC: to any patches for this
+driver.
+
+
+Defining the platform device
+----------------------------
+
+The minimum set of resources attached to the platform device are as follows:
+
+    1) The physical address of the address register
+    2) The physical address of the data register
+    3) The IRQ line the device's interrupt pin is connected to.
+
+These resources should be specified in that order, as the ordering of the
+two address regions is important (the driver expects these to be address
+and then data).
+
+An example from arch/arm/mach-s3c2410/mach-bast.c is::
+
+  static struct resource bast_dm9k_resource[] = {
+	[0] = {
+		.start = S3C2410_CS5 + BAST_PA_DM9000,
+		.end   = S3C2410_CS5 + BAST_PA_DM9000 + 3,
+		.flags = IORESOURCE_MEM,
+	},
+	[1] = {
+		.start = S3C2410_CS5 + BAST_PA_DM9000 + 0x40,
+		.end   = S3C2410_CS5 + BAST_PA_DM9000 + 0x40 + 0x3f,
+		.flags = IORESOURCE_MEM,
+	},
+	[2] = {
+		.start = IRQ_DM9000,
+		.end   = IRQ_DM9000,
+		.flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL,
+	}
+  };
+
+  static struct platform_device bast_device_dm9k = {
+	.name		= "dm9000",
+	.id		= 0,
+	.num_resources	= ARRAY_SIZE(bast_dm9k_resource),
+	.resource	= bast_dm9k_resource,
+  };
+
+Note the setting of the IRQ trigger flag in bast_dm9k_resource[2].flags,
+as this will generate a warning if it is not present. The trigger from
+the flags field will be passed to request_irq() when registering the IRQ
+handler to ensure that the IRQ is setup correctly.
+
+This shows a typical platform device, without the optional configuration
+platform data supplied. The next example uses the same resources, but adds
+the optional platform data to pass extra configuration data::
+
+  static struct dm9000_plat_data bast_dm9k_platdata = {
+	.flags		= DM9000_PLATF_16BITONLY,
+  };
+
+  static struct platform_device bast_device_dm9k = {
+	.name		= "dm9000",
+	.id		= 0,
+	.num_resources	= ARRAY_SIZE(bast_dm9k_resource),
+	.resource	= bast_dm9k_resource,
+	.dev		= {
+		.platform_data = &bast_dm9k_platdata,
+	}
+  };
+
+The platform data is defined in include/linux/dm9000.h and described below.
+
+
+Platform data
+-------------
+
+Extra platform data for the DM9000 can describe the IO bus width to the
+device, whether or not an external PHY is attached to the device and
+the availability of an external configuration EEPROM.
+
+The flags for the platform data .flags field are as follows:
+
+DM9000_PLATF_8BITONLY
+
+	The IO should be done with 8bit operations.
+
+DM9000_PLATF_16BITONLY
+
+	The IO should be done with 16bit operations.
+
+DM9000_PLATF_32BITONLY
+
+	The IO should be done with 32bit operations.
+
+DM9000_PLATF_EXT_PHY
+
+	The chip is connected to an external PHY.
+
+DM9000_PLATF_NO_EEPROM
+
+	This can be used to signify that the board does not have an
+	EEPROM, or that the EEPROM should be hidden from the user.
+
+DM9000_PLATF_SIMPLE_PHY
+
+	Switch to using the simpler PHY polling method which does not
+	try and read the MII PHY state regularly. This is only available
+	when using the internal PHY. See the section on link state polling
+	for more information.
+
+	The config symbol DM9000_FORCE_SIMPLE_PHY_POLL, Kconfig entry
+	"Force simple NSR based PHY polling" allows this flag to be
+	forced on at build time.
+
+
+PHY Link state polling
+----------------------
+
+The driver keeps track of the link state and informs the network core
+about link (carrier) availability. This is managed by several methods
+depending on the version of the chip and on which PHY is being used.
+
+For the internal PHY, the original (and currently default) method is
+to read the MII state, either when the status changes if we have the
+necessary interrupt support in the chip or every two seconds via a
+periodic timer.
+
+To reduce the overhead for the internal PHY, there is now the option
+of using the DM9000_FORCE_SIMPLE_PHY_POLL config, or DM9000_PLATF_SIMPLE_PHY
+platform data option to read the summary information without the
+expensive MII accesses. This method is faster, but does not print
+as much information.
+
+When using an external PHY, the driver currently has to poll the MII
+link status as there is no method for getting an interrupt on link change.
+
+
+DM9000A / DM9000B
+-----------------
+
+These chips are functionally similar to the DM9000E and are supported easily
+by the same driver. The features are:
+
+   1) Interrupt on internal PHY state change. This means that the periodic
+      polling of the PHY status may be disabled on these devices when using
+      the internal PHY.
+
+   2) TCP/UDP checksum offloading, which the driver does not currently support.
+
+
+ethtool
+-------
+
+The driver supports the ethtool interface for access to the driver
+state information, the PHY state and the EEPROM.
diff --git a/Documentation/networking/device_drivers/davicom/dm9000.txt b/Documentation/networking/device_drivers/davicom/dm9000.txt
deleted file mode 100644
index 5552e2e575c5..000000000000
--- a/Documentation/networking/device_drivers/davicom/dm9000.txt
+++ /dev/null
@@ -1,167 +0,0 @@
-DM9000 Network driver
-=====================
-
-Copyright 2008 Simtec Electronics,
-	  Ben Dooks <ben@simtec.co.uk> <ben-linux@fluff.org>
-
-
-Introduction
-------------
-
-This file describes how to use the DM9000 platform-device based network driver
-that is contained in the files drivers/net/dm9000.c and drivers/net/dm9000.h.
-
-The driver supports three DM9000 variants, the DM9000E which is the first chip
-supported as well as the newer DM9000A and DM9000B devices. It is currently
-maintained and tested by Ben Dooks, who should be CC: to any patches for this
-driver.
-
-
-Defining the platform device
-----------------------------
-
-The minimum set of resources attached to the platform device are as follows:
-
-    1) The physical address of the address register
-    2) The physical address of the data register
-    3) The IRQ line the device's interrupt pin is connected to.
-
-These resources should be specified in that order, as the ordering of the
-two address regions is important (the driver expects these to be address
-and then data).
-
-An example from arch/arm/mach-s3c2410/mach-bast.c is:
-
-static struct resource bast_dm9k_resource[] = {
-	[0] = {
-		.start = S3C2410_CS5 + BAST_PA_DM9000,
-		.end   = S3C2410_CS5 + BAST_PA_DM9000 + 3,
-		.flags = IORESOURCE_MEM,
-	},
-	[1] = {
-		.start = S3C2410_CS5 + BAST_PA_DM9000 + 0x40,
-		.end   = S3C2410_CS5 + BAST_PA_DM9000 + 0x40 + 0x3f,
-		.flags = IORESOURCE_MEM,
-	},
-	[2] = {
-		.start = IRQ_DM9000,
-		.end   = IRQ_DM9000,
-		.flags = IORESOURCE_IRQ | IORESOURCE_IRQ_HIGHLEVEL,
-	}
-};
-
-static struct platform_device bast_device_dm9k = {
-	.name		= "dm9000",
-	.id		= 0,
-	.num_resources	= ARRAY_SIZE(bast_dm9k_resource),
-	.resource	= bast_dm9k_resource,
-};
-
-Note the setting of the IRQ trigger flag in bast_dm9k_resource[2].flags,
-as this will generate a warning if it is not present. The trigger from
-the flags field will be passed to request_irq() when registering the IRQ
-handler to ensure that the IRQ is setup correctly.
-
-This shows a typical platform device, without the optional configuration
-platform data supplied. The next example uses the same resources, but adds
-the optional platform data to pass extra configuration data:
-
-static struct dm9000_plat_data bast_dm9k_platdata = {
-	.flags		= DM9000_PLATF_16BITONLY,
-};
-
-static struct platform_device bast_device_dm9k = {
-	.name		= "dm9000",
-	.id		= 0,
-	.num_resources	= ARRAY_SIZE(bast_dm9k_resource),
-	.resource	= bast_dm9k_resource,
-	.dev		= {
-		.platform_data = &bast_dm9k_platdata,
-	}
-};
-
-The platform data is defined in include/linux/dm9000.h and described below.
-
-
-Platform data
--------------
-
-Extra platform data for the DM9000 can describe the IO bus width to the
-device, whether or not an external PHY is attached to the device and
-the availability of an external configuration EEPROM.
-
-The flags for the platform data .flags field are as follows:
-
-DM9000_PLATF_8BITONLY
-
-	The IO should be done with 8bit operations.
-
-DM9000_PLATF_16BITONLY
-
-	The IO should be done with 16bit operations.
-
-DM9000_PLATF_32BITONLY
-
-	The IO should be done with 32bit operations.
-
-DM9000_PLATF_EXT_PHY
-
-	The chip is connected to an external PHY.
-
-DM9000_PLATF_NO_EEPROM
-
-	This can be used to signify that the board does not have an
-	EEPROM, or that the EEPROM should be hidden from the user.
-
-DM9000_PLATF_SIMPLE_PHY
-
-	Switch to using the simpler PHY polling method which does not
-	try and read the MII PHY state regularly. This is only available
-	when using the internal PHY. See the section on link state polling
-	for more information.
-
-	The config symbol DM9000_FORCE_SIMPLE_PHY_POLL, Kconfig entry
-	"Force simple NSR based PHY polling" allows this flag to be
-	forced on at build time.
-
-
-PHY Link state polling
-----------------------
-
-The driver keeps track of the link state and informs the network core
-about link (carrier) availability. This is managed by several methods
-depending on the version of the chip and on which PHY is being used.
-
-For the internal PHY, the original (and currently default) method is
-to read the MII state, either when the status changes if we have the
-necessary interrupt support in the chip or every two seconds via a
-periodic timer.
-
-To reduce the overhead for the internal PHY, there is now the option
-of using the DM9000_FORCE_SIMPLE_PHY_POLL config, or DM9000_PLATF_SIMPLE_PHY
-platform data option to read the summary information without the
-expensive MII accesses. This method is faster, but does not print
-as much information.
-
-When using an external PHY, the driver currently has to poll the MII
-link status as there is no method for getting an interrupt on link change.
-
-
-DM9000A / DM9000B
------------------
-
-These chips are functionally similar to the DM9000E and are supported easily
-by the same driver. The features are:
-
-   1) Interrupt on internal PHY state change. This means that the periodic
-      polling of the PHY status may be disabled on these devices when using
-      the internal PHY.
-
-   2) TCP/UDP checksum offloading, which the driver does not currently support.
-
-
-ethtool
--------
-
-The driver supports the ethtool interface for access to the driver
-state information, the PHY state and the EEPROM.
diff --git a/Documentation/networking/device_drivers/index.rst b/Documentation/networking/device_drivers/index.rst
index 0b39342e2a1f..e8db57fef2e9 100644
--- a/Documentation/networking/device_drivers/index.rst
+++ b/Documentation/networking/device_drivers/index.rst
@@ -33,6 +33,7 @@ Contents:
    aquantia/atlantic
    chelsio/cxgb
    cirrus/cs89x0
+   davicom/dm9000
 
 .. only::  subproject and html
 
-- 
cgit v1.2.3-59-g8ed1b


From b6671d71ca811aed02f136a6cd812a542f88c483 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 1 May 2020 16:44:41 +0200
Subject: docs: networking: device drivers: convert dec/de4x5.txt to ReST

- add SPDX header;
- add a document title;
- mark code blocks and literals as such;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../networking/device_drivers/dec/de4x5.rst        | 189 +++++++++++++++++++++
 .../networking/device_drivers/dec/de4x5.txt        | 178 -------------------
 Documentation/networking/device_drivers/index.rst  |   1 +
 drivers/net/ethernet/dec/tulip/Kconfig             |   2 +-
 4 files changed, 191 insertions(+), 179 deletions(-)
 create mode 100644 Documentation/networking/device_drivers/dec/de4x5.rst
 delete mode 100644 Documentation/networking/device_drivers/dec/de4x5.txt

diff --git a/Documentation/networking/device_drivers/dec/de4x5.rst b/Documentation/networking/device_drivers/dec/de4x5.rst
new file mode 100644
index 000000000000..e03e9c631879
--- /dev/null
+++ b/Documentation/networking/device_drivers/dec/de4x5.rst
@@ -0,0 +1,189 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================================
+DEC EtherWORKS Ethernet De4x5 cards
+===================================
+
+    Originally,   this  driver  was    written  for the  Digital   Equipment
+    Corporation series of EtherWORKS Ethernet cards:
+
+	 - DE425 TP/COAX EISA
+	 - DE434 TP PCI
+	 - DE435 TP/COAX/AUI PCI
+	 - DE450 TP/COAX/AUI PCI
+	 - DE500 10/100 PCI Fasternet
+
+    but it  will  now attempt  to  support all  cards which   conform to the
+    Digital Semiconductor   SROM   Specification.    The  driver   currently
+    recognises the following chips:
+
+	 - DC21040  (no SROM)
+	 - DC21041[A]
+	 - DC21140[A]
+	 - DC21142
+	 - DC21143
+
+    So far the driver is known to work with the following cards:
+
+	 - KINGSTON
+	 - Linksys
+	 - ZNYX342
+	 - SMC8432
+	 - SMC9332 (w/new SROM)
+	 - ZNYX31[45]
+	 - ZNYX346 10/100 4 port (can act as a 10/100 bridge!)
+
+    The driver has been tested on a relatively busy network using the DE425,
+    DE434, DE435 and DE500 cards and benchmarked with 'ttcp': it transferred
+    16M of data to a DECstation 5000/200 as follows::
+
+		  TCP           UDP
+	       TX     RX     TX     RX
+      DE425   1030k  997k   1170k  1128k
+      DE434   1063k  995k   1170k  1125k
+      DE435   1063k  995k   1170k  1125k
+      DE500   1063k  998k   1170k  1125k  in 10Mb/s mode
+
+    All  values are typical (in   kBytes/sec) from a  sample  of 4 for  each
+    measurement. Their error is +/-20k on a quiet (private) network and also
+    depend on what load the CPU has.
+
+----------------------------------------------------------------------------
+
+    The ability to load this  driver as a loadable  module has been included
+    and used extensively  during the driver development  (to save those long
+    reboot sequences).  Loadable module support  under PCI and EISA has been
+    achieved by letting the driver autoprobe as if it were compiled into the
+    kernel. Do make sure  you're not sharing  interrupts with anything  that
+    cannot accommodate  interrupt  sharing!
+
+    To utilise this ability, you have to do 8 things:
+
+    0) have a copy of the loadable modules code installed on your system.
+    1) copy de4x5.c from the  /linux/drivers/net directory to your favourite
+       temporary directory.
+    2) for fixed  autoprobes (not  recommended),  edit the source code  near
+       line 5594 to reflect the I/O address  you're using, or assign these when
+       loading by::
+
+		   insmod de4x5 io=0xghh           where g = bus number
+							hh = device number
+
+       .. note::
+
+	   autoprobing for modules is now supported by default. You may just
+	   use::
+
+		   insmod de4x5
+
+	   to load all available boards. For a specific board, still use
+	   the 'io=?' above.
+    3) compile  de4x5.c, but include -DMODULE in  the command line to ensure
+       that the correct bits are compiled (see end of source code).
+    4) if you are wanting to add a new  card, goto 5. Otherwise, recompile a
+       kernel with the de4x5 configuration turned off and reboot.
+    5) insmod de4x5 [io=0xghh]
+    6) run the net startup bits for your new eth?? interface(s) manually
+       (usually /etc/rc.inet[12] at boot time).
+    7) enjoy!
+
+    To unload a module, turn off the associated interface(s)
+    'ifconfig eth?? down' then 'rmmod de4x5'.
+
+    Automedia detection is included so that in  principle you can disconnect
+    from, e.g.  TP, reconnect  to BNC  and  things will still work  (after a
+    pause while the   driver figures out   where its media went).  My tests
+    using ping showed that it appears to work....
+
+    By  default,  the driver will  now   autodetect any  DECchip based card.
+    Should you have a need to restrict the driver to DIGITAL only cards, you
+    can compile with a  DEC_ONLY define, or if  loading as a module, use the
+    'dec_only=1'  parameter.
+
+    I've changed the timing routines to  use the kernel timer and scheduling
+    functions  so that the  hangs  and other assorted problems that occurred
+    while autosensing the  media  should be gone.  A  bonus  for the DC21040
+    auto  media sense algorithm is  that it can now  use one that is more in
+    line with the  rest (the DC21040  chip doesn't  have a hardware  timer).
+    The downside is the 1 'jiffies' (10ms) resolution.
+
+    IEEE 802.3u MII interface code has  been added in anticipation that some
+    products may use it in the future.
+
+    The SMC9332 card  has a non-compliant SROM  which needs fixing -  I have
+    patched this  driver to detect it  because the SROM format used complies
+    to a previous DEC-STD format.
+
+    I have removed the buffer copies needed for receive on Intels.  I cannot
+    remove them for   Alphas since  the  Tulip hardware   only does longword
+    aligned  DMA transfers  and  the  Alphas get   alignment traps with  non
+    longword aligned data copies (which makes them really slow). No comment.
+
+    I  have added SROM decoding  routines to make this  driver work with any
+    card that  supports the Digital  Semiconductor SROM spec. This will help
+    all  cards running the dc2114x  series chips in particular.  Cards using
+    the dc2104x  chips should run correctly with  the basic  driver.  I'm in
+    debt to <mjacob@feral.com> for the  testing and feedback that helped get
+    this feature working.  So far we have  tested KINGSTON, SMC8432, SMC9332
+    (with the latest SROM complying  with the SROM spec  V3: their first was
+    broken), ZNYX342  and  LinkSys. ZNYX314 (dual  21041  MAC) and  ZNYX 315
+    (quad 21041 MAC)  cards also  appear  to work despite their  incorrectly
+    wired IRQs.
+
+    I have added a temporary fix for interrupt problems when some SCSI cards
+    share the same interrupt as the DECchip based  cards. The problem occurs
+    because  the SCSI card wants to  grab the interrupt  as a fast interrupt
+    (runs the   service routine with interrupts turned   off) vs.  this card
+    which really needs to run the service routine with interrupts turned on.
+    This driver will  now   add the interrupt service   routine  as  a  fast
+    interrupt if it   is bounced from the   slow interrupt.  THIS IS NOT   A
+    RECOMMENDED WAY TO RUN THE DRIVER  and has been done  for a limited time
+    until  people   sort  out their  compatibility    issues and the  kernel
+    interrupt  service code  is  fixed.   YOU  SHOULD SEPARATE OUT  THE FAST
+    INTERRUPT CARDS FROM THE SLOW INTERRUPT CARDS to ensure that they do not
+    run on the same interrupt. PCMCIA/CardBus is another can of worms...
+
+    Finally, I think  I have really  fixed  the module  loading problem with
+    more than one DECchip based  card.  As a  side effect, I don't mess with
+    the  device structure any  more which means that  if more than 1 card in
+    2.0.x is    installed (4  in   2.1.x),  the  user   will have   to  edit
+    linux/drivers/net/Space.c  to make room for  them. Hence, module loading
+    is  the preferred way to use   this driver, since  it  doesn't have this
+    limitation.
+
+    Where SROM media  detection is used and  full duplex is specified in the
+    SROM,  the feature is  ignored unless  lp->params.fdx  is set at compile
+    time  OR during  a   module load  (insmod  de4x5   args='eth??:fdx' [see
+    below]).  This is because there  is no way  to automatically detect full
+    duplex   links  except through   autonegotiation.    When I  include the
+    autonegotiation feature in  the SROM autoconf  code, this detection will
+    occur automatically for that case.
+
+    Command line  arguments are  now allowed, similar to  passing  arguments
+    through LILO. This will allow a per adapter board set  up of full duplex
+    and media. The only lexical constraints are:  the board name (dev->name)
+    appears in  the list before its parameters.  The list of parameters ends
+    either at the end of the parameter list or with another board name.  The
+    following parameters are allowed:
+
+	    =========  ===============================================
+	    fdx        for full duplex
+	    autosense  to set the media/speed; with the following
+		       sub-parameters:
+		       TP, TP_NW, BNC, AUI, BNC_AUI, 100Mb, 10Mb, AUTO
+	    =========  ===============================================
+
+    Case sensitivity is important  for  the sub-parameters. They *must*   be
+    upper case. Examples::
+
+	insmod de4x5 args='eth1:fdx autosense=BNC eth0:autosense=100Mb'.
+
+    For a compiled in driver, in linux/drivers/net/CONFIG, place e.g.::
+
+	DE4X5_OPTS = -DDE4X5_PARM='"eth0:fdx autosense=AUI eth2:autosense=TP"'
+
+    Yes,  I know full duplex  isn't permissible on BNC  or AUI; they're just
+    examples. By default, full duplex is turned  off and AUTO is the default
+    autosense setting. In  reality, I expect only the  full duplex option to
+    be used. Note the use of single quotes in the two examples above and the
+    lack of commas to separate items.
diff --git a/Documentation/networking/device_drivers/dec/de4x5.txt b/Documentation/networking/device_drivers/dec/de4x5.txt
deleted file mode 100644
index 452aac58341d..000000000000
--- a/Documentation/networking/device_drivers/dec/de4x5.txt
+++ /dev/null
@@ -1,178 +0,0 @@
-    Originally,   this  driver  was    written  for the  Digital   Equipment
-    Corporation series of EtherWORKS Ethernet cards:
-
-        DE425 TP/COAX EISA
-	DE434 TP PCI
-	DE435 TP/COAX/AUI PCI
-	DE450 TP/COAX/AUI PCI
-	DE500 10/100 PCI Fasternet
-
-    but it  will  now attempt  to  support all  cards which   conform to the
-    Digital Semiconductor   SROM   Specification.    The  driver   currently
-    recognises the following chips:
-
-        DC21040  (no SROM) 
-	DC21041[A]  
-	DC21140[A] 
-	DC21142 
-	DC21143 
-
-    So far the driver is known to work with the following cards:
-
-        KINGSTON
-	Linksys
-	ZNYX342
-	SMC8432
-	SMC9332 (w/new SROM)
-	ZNYX31[45]
-	ZNYX346 10/100 4 port (can act as a 10/100 bridge!) 
-
-    The driver has been tested on a relatively busy network using the DE425,
-    DE434, DE435 and DE500 cards and benchmarked with 'ttcp': it transferred
-    16M of data to a DECstation 5000/200 as follows:
-
-                TCP           UDP
-             TX     RX     TX     RX
-    DE425   1030k  997k   1170k  1128k
-    DE434   1063k  995k   1170k  1125k
-    DE435   1063k  995k   1170k  1125k
-    DE500   1063k  998k   1170k  1125k  in 10Mb/s mode
-
-    All  values are typical (in   kBytes/sec) from a  sample  of 4 for  each
-    measurement. Their error is +/-20k on a quiet (private) network and also
-    depend on what load the CPU has.
-
-    =========================================================================
-
-    The ability to load this  driver as a loadable  module has been included
-    and used extensively  during the driver development  (to save those long
-    reboot sequences).  Loadable module support  under PCI and EISA has been
-    achieved by letting the driver autoprobe as if it were compiled into the
-    kernel. Do make sure  you're not sharing  interrupts with anything  that
-    cannot accommodate  interrupt  sharing!
-
-    To utilise this ability, you have to do 8 things:
-
-    0) have a copy of the loadable modules code installed on your system.
-    1) copy de4x5.c from the  /linux/drivers/net directory to your favourite
-    temporary directory.
-    2) for fixed  autoprobes (not  recommended),  edit the source code  near
-    line 5594 to reflect the I/O address  you're using, or assign these when
-    loading by:
-
-                   insmod de4x5 io=0xghh           where g = bus number
-		                                        hh = device number   
-
-       NB: autoprobing for modules is now supported by default. You may just
-           use:
-
-                   insmod de4x5
-
-           to load all available boards. For a specific board, still use
-	   the 'io=?' above.
-    3) compile  de4x5.c, but include -DMODULE in  the command line to ensure
-    that the correct bits are compiled (see end of source code).
-    4) if you are wanting to add a new  card, goto 5. Otherwise, recompile a
-    kernel with the de4x5 configuration turned off and reboot.
-    5) insmod de4x5 [io=0xghh]
-    6) run the net startup bits for your new eth?? interface(s) manually 
-    (usually /etc/rc.inet[12] at boot time). 
-    7) enjoy!
-
-    To unload a module, turn off the associated interface(s) 
-    'ifconfig eth?? down' then 'rmmod de4x5'.
-
-    Automedia detection is included so that in  principle you can disconnect
-    from, e.g.  TP, reconnect  to BNC  and  things will still work  (after a
-    pause while the   driver figures out   where its media went).  My tests
-    using ping showed that it appears to work....
-
-    By  default,  the driver will  now   autodetect any  DECchip based card.
-    Should you have a need to restrict the driver to DIGITAL only cards, you
-    can compile with a  DEC_ONLY define, or if  loading as a module, use the
-    'dec_only=1'  parameter. 
-
-    I've changed the timing routines to  use the kernel timer and scheduling
-    functions  so that the  hangs  and other assorted problems that occurred
-    while autosensing the  media  should be gone.  A  bonus  for the DC21040
-    auto  media sense algorithm is  that it can now  use one that is more in
-    line with the  rest (the DC21040  chip doesn't  have a hardware  timer).
-    The downside is the 1 'jiffies' (10ms) resolution.
-
-    IEEE 802.3u MII interface code has  been added in anticipation that some
-    products may use it in the future.
-
-    The SMC9332 card  has a non-compliant SROM  which needs fixing -  I have
-    patched this  driver to detect it  because the SROM format used complies
-    to a previous DEC-STD format.
-
-    I have removed the buffer copies needed for receive on Intels.  I cannot
-    remove them for   Alphas since  the  Tulip hardware   only does longword
-    aligned  DMA transfers  and  the  Alphas get   alignment traps with  non
-    longword aligned data copies (which makes them really slow). No comment.
-
-    I  have added SROM decoding  routines to make this  driver work with any
-    card that  supports the Digital  Semiconductor SROM spec. This will help
-    all  cards running the dc2114x  series chips in particular.  Cards using
-    the dc2104x  chips should run correctly with  the basic  driver.  I'm in
-    debt to <mjacob@feral.com> for the  testing and feedback that helped get
-    this feature working.  So far we have  tested KINGSTON, SMC8432, SMC9332
-    (with the latest SROM complying  with the SROM spec  V3: their first was
-    broken), ZNYX342  and  LinkSys. ZNYX314 (dual  21041  MAC) and  ZNYX 315
-    (quad 21041 MAC)  cards also  appear  to work despite their  incorrectly
-    wired IRQs.
-
-    I have added a temporary fix for interrupt problems when some SCSI cards
-    share the same interrupt as the DECchip based  cards. The problem occurs
-    because  the SCSI card wants to  grab the interrupt  as a fast interrupt
-    (runs the   service routine with interrupts turned   off) vs.  this card
-    which really needs to run the service routine with interrupts turned on.
-    This driver will  now   add the interrupt service   routine  as  a  fast
-    interrupt if it   is bounced from the   slow interrupt.  THIS IS NOT   A
-    RECOMMENDED WAY TO RUN THE DRIVER  and has been done  for a limited time
-    until  people   sort  out their  compatibility    issues and the  kernel
-    interrupt  service code  is  fixed.   YOU  SHOULD SEPARATE OUT  THE FAST
-    INTERRUPT CARDS FROM THE SLOW INTERRUPT CARDS to ensure that they do not
-    run on the same interrupt. PCMCIA/CardBus is another can of worms...
-
-    Finally, I think  I have really  fixed  the module  loading problem with
-    more than one DECchip based  card.  As a  side effect, I don't mess with
-    the  device structure any  more which means that  if more than 1 card in
-    2.0.x is    installed (4  in   2.1.x),  the  user   will have   to  edit
-    linux/drivers/net/Space.c  to make room for  them. Hence, module loading
-    is  the preferred way to use   this driver, since  it  doesn't have this
-    limitation.
-
-    Where SROM media  detection is used and  full duplex is specified in the
-    SROM,  the feature is  ignored unless  lp->params.fdx  is set at compile
-    time  OR during  a   module load  (insmod  de4x5   args='eth??:fdx' [see
-    below]).  This is because there  is no way  to automatically detect full
-    duplex   links  except through   autonegotiation.    When I  include the
-    autonegotiation feature in  the SROM autoconf  code, this detection will
-    occur automatically for that case.
-
-    Command line  arguments are  now allowed, similar to  passing  arguments
-    through LILO. This will allow a per adapter board set  up of full duplex
-    and media. The only lexical constraints are:  the board name (dev->name)
-    appears in  the list before its parameters.  The list of parameters ends
-    either at the end of the parameter list or with another board name.  The
-    following parameters are allowed:
-
-            fdx        for full duplex
-	    autosense  to set the media/speed; with the following 
-	               sub-parameters:
-		       TP, TP_NW, BNC, AUI, BNC_AUI, 100Mb, 10Mb, AUTO
-
-    Case sensitivity is important  for  the sub-parameters. They *must*   be
-    upper case. Examples:
-
-        insmod de4x5 args='eth1:fdx autosense=BNC eth0:autosense=100Mb'.
-
-    For a compiled in driver, in linux/drivers/net/CONFIG, place e.g.
-	DE4X5_OPTS = -DDE4X5_PARM='"eth0:fdx autosense=AUI eth2:autosense=TP"' 
-
-    Yes,  I know full duplex  isn't permissible on BNC  or AUI; they're just
-    examples. By default, full duplex is turned  off and AUTO is the default
-    autosense setting. In  reality, I expect only the  full duplex option to
-    be used. Note the use of single quotes in the two examples above and the
-    lack of commas to separate items.
diff --git a/Documentation/networking/device_drivers/index.rst b/Documentation/networking/device_drivers/index.rst
index e8db57fef2e9..4ad13ffb5800 100644
--- a/Documentation/networking/device_drivers/index.rst
+++ b/Documentation/networking/device_drivers/index.rst
@@ -34,6 +34,7 @@ Contents:
    chelsio/cxgb
    cirrus/cs89x0
    davicom/dm9000
+   dec/de4x5
 
 .. only::  subproject and html
 
diff --git a/drivers/net/ethernet/dec/tulip/Kconfig b/drivers/net/ethernet/dec/tulip/Kconfig
index 8ce6888ea722..8c4245d94bb2 100644
--- a/drivers/net/ethernet/dec/tulip/Kconfig
+++ b/drivers/net/ethernet/dec/tulip/Kconfig
@@ -114,7 +114,7 @@ config DE4X5
 	  These include the DE425, DE434, DE435, DE450 and DE500 models.  If
 	  you have a network card of this type, say Y.  More specific
 	  information is contained in
-	  <file:Documentation/networking/device_drivers/dec/de4x5.txt>.
+	  <file:Documentation/networking/device_drivers/dec/de4x5.rst>.
 
 	  To compile this driver as a module, choose M here. The module will
 	  be called de4x5.
-- 
cgit v1.2.3-59-g8ed1b


From c981977d3a5ce55c96b1b77f42d0a9df0a79244e Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 1 May 2020 16:44:42 +0200
Subject: docs: networking: device drivers: convert dec/dmfe.txt to ReST

- add SPDX header;
- adjust titles and chapters, adding proper markups;
- comment out text-only TOC from html/pdf output;
- mark code blocks and literals as such;
- mark tables as such;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../networking/device_drivers/dec/dmfe.rst         | 71 ++++++++++++++++++++++
 .../networking/device_drivers/dec/dmfe.txt         | 66 --------------------
 Documentation/networking/device_drivers/index.rst  |  1 +
 MAINTAINERS                                        |  2 +-
 drivers/net/ethernet/dec/tulip/Kconfig             |  2 +-
 5 files changed, 74 insertions(+), 68 deletions(-)
 create mode 100644 Documentation/networking/device_drivers/dec/dmfe.rst
 delete mode 100644 Documentation/networking/device_drivers/dec/dmfe.txt

diff --git a/Documentation/networking/device_drivers/dec/dmfe.rst b/Documentation/networking/device_drivers/dec/dmfe.rst
new file mode 100644
index 000000000000..c4cf809cad84
--- /dev/null
+++ b/Documentation/networking/device_drivers/dec/dmfe.rst
@@ -0,0 +1,71 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============================================================
+Davicom DM9102(A)/DM9132/DM9801 fast ethernet driver for Linux
+==============================================================
+
+Note: This driver doesn't have a maintainer.
+
+
+This program is free software; you can redistribute it and/or
+modify it under the terms of the GNU General   Public License
+as published by the Free Software Foundation; either version 2
+of the License, or (at your option) any later version.
+
+This program is distributed in the hope that it will be useful,
+but WITHOUT ANY WARRANTY; without even the implied warranty of
+MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+GNU General Public License for more details.
+
+
+This driver provides kernel support for Davicom DM9102(A)/DM9132/DM9801 ethernet cards ( CNET
+10/100 ethernet cards uses Davicom chipset too, so this driver supports CNET cards too ).If you
+didn't compile this driver as a module, it will automatically load itself on boot and print a
+line similar to::
+
+	dmfe: Davicom DM9xxx net driver, version 1.36.4 (2002-01-17)
+
+If you compiled this driver as a module, you have to load it on boot.You can load it with command::
+
+	insmod dmfe
+
+This way it will autodetect the device mode.This is the suggested way to load the module.Or you can pass
+a mode= setting to module while loading, like::
+
+	insmod dmfe mode=0 # Force 10M Half Duplex
+	insmod dmfe mode=1 # Force 100M Half Duplex
+	insmod dmfe mode=4 # Force 10M Full Duplex
+	insmod dmfe mode=5 # Force 100M Full Duplex
+
+Next you should configure your network interface with a command similar to::
+
+	ifconfig eth0 172.22.3.18
+		      ^^^^^^^^^^^
+		     Your IP Address
+
+Then you may have to modify the default routing table with command::
+
+	route add default eth0
+
+
+Now your ethernet card should be up and running.
+
+
+TODO:
+
+- Implement pci_driver::suspend() and pci_driver::resume() power management methods.
+- Check on 64 bit boxes.
+- Check and fix on big endian boxes.
+- Test and make sure PCI latency is now correct for all cases.
+
+
+Authors:
+
+Sten Wang <sten_wang@davicom.com.tw >   : Original Author
+
+Contributors:
+
+- Marcelo Tosatti <marcelo@conectiva.com.br>
+- Alan Cox <alan@lxorguk.ukuu.org.uk>
+- Jeff Garzik <jgarzik@pobox.com>
+- Vojtech Pavlik <vojtech@suse.cz>
diff --git a/Documentation/networking/device_drivers/dec/dmfe.txt b/Documentation/networking/device_drivers/dec/dmfe.txt
deleted file mode 100644
index 25320bf19c86..000000000000
--- a/Documentation/networking/device_drivers/dec/dmfe.txt
+++ /dev/null
@@ -1,66 +0,0 @@
-Note: This driver doesn't have a maintainer.
-
-Davicom DM9102(A)/DM9132/DM9801 fast ethernet driver for Linux.
-
-This program is free software; you can redistribute it and/or
-modify it under the terms of the GNU General   Public License
-as published by the Free Software Foundation; either version 2
-of the License, or (at your option) any later version.
-
-This program is distributed in the hope that it will be useful,
-but WITHOUT ANY WARRANTY; without even the implied warranty of
-MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
-GNU General Public License for more details.
-
-
-This driver provides kernel support for Davicom DM9102(A)/DM9132/DM9801 ethernet cards ( CNET
-10/100 ethernet cards uses Davicom chipset too, so this driver supports CNET cards too ).If you
-didn't compile this driver as a module, it will automatically load itself on boot and print a
-line similar to :
-
-	dmfe: Davicom DM9xxx net driver, version 1.36.4 (2002-01-17)
-
-If you compiled this driver as a module, you have to load it on boot.You can load it with command :
-
-	insmod dmfe
-
-This way it will autodetect the device mode.This is the suggested way to load the module.Or you can pass
-a mode= setting to module while loading, like :
-
-	insmod dmfe mode=0 # Force 10M Half Duplex
-	insmod dmfe mode=1 # Force 100M Half Duplex
-	insmod dmfe mode=4 # Force 10M Full Duplex
-	insmod dmfe mode=5 # Force 100M Full Duplex
-
-Next you should configure your network interface with a command similar to :
-
-	ifconfig eth0 172.22.3.18
-                      ^^^^^^^^^^^
-		     Your IP Address
-
-Then you may have to modify the default routing table with command :
-
-	route add default eth0
-
-
-Now your ethernet card should be up and running.
-
-
-TODO:
-
-Implement pci_driver::suspend() and pci_driver::resume() power management methods.
-Check on 64 bit boxes.
-Check and fix on big endian boxes.
-Test and make sure PCI latency is now correct for all cases.
-
-
-Authors:
-
-Sten Wang <sten_wang@davicom.com.tw >   : Original Author
-
-Contributors:
-
-Marcelo Tosatti <marcelo@conectiva.com.br>
-Alan Cox <alan@lxorguk.ukuu.org.uk>
-Jeff Garzik <jgarzik@pobox.com>
-Vojtech Pavlik <vojtech@suse.cz>
diff --git a/Documentation/networking/device_drivers/index.rst b/Documentation/networking/device_drivers/index.rst
index 4ad13ffb5800..09728e964ce1 100644
--- a/Documentation/networking/device_drivers/index.rst
+++ b/Documentation/networking/device_drivers/index.rst
@@ -35,6 +35,7 @@ Contents:
    cirrus/cs89x0
    davicom/dm9000
    dec/de4x5
+   dec/dmfe
 
 .. only::  subproject and html
 
diff --git a/MAINTAINERS b/MAINTAINERS
index b5cfee17635e..f0b18c156176 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -4694,7 +4694,7 @@ F:	net/ax25/sysctl_net_ax25.c
 DAVICOM FAST ETHERNET (DMFE) NETWORK DRIVER
 L:	netdev@vger.kernel.org
 S:	Orphan
-F:	Documentation/networking/device_drivers/dec/dmfe.txt
+F:	Documentation/networking/device_drivers/dec/dmfe.rst
 F:	drivers/net/ethernet/dec/tulip/dmfe.c
 
 DC390/AM53C974 SCSI driver
diff --git a/drivers/net/ethernet/dec/tulip/Kconfig b/drivers/net/ethernet/dec/tulip/Kconfig
index 8c4245d94bb2..177f36f4b89d 100644
--- a/drivers/net/ethernet/dec/tulip/Kconfig
+++ b/drivers/net/ethernet/dec/tulip/Kconfig
@@ -138,7 +138,7 @@ config DM9102
 	  This driver is for DM9102(A)/DM9132/DM9801 compatible PCI cards from
 	  Davicom (<http://www.davicom.com.tw/>).  If you have such a network
 	  (Ethernet) card, say Y.  Some information is contained in the file
-	  <file:Documentation/networking/device_drivers/dec/dmfe.txt>.
+	  <file:Documentation/networking/device_drivers/dec/dmfe.rst>.
 
 	  To compile this driver as a module, choose M here. The module will
 	  be called dmfe.
-- 
cgit v1.2.3-59-g8ed1b


From ca705e4793f024afb8e86030e08b1e0f16dcc07c Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 1 May 2020 16:44:43 +0200
Subject: docs: networking: device drivers: convert dlink/dl2k.txt to ReST

- add SPDX header;
- mark code blocks and literals as such;
- mark lists as such;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../networking/device_drivers/dlink/dl2k.rst       | 314 +++++++++++++++++++++
 .../networking/device_drivers/dlink/dl2k.txt       | 282 ------------------
 Documentation/networking/device_drivers/index.rst  |   1 +
 drivers/net/ethernet/dlink/dl2k.c                  |   2 +-
 4 files changed, 316 insertions(+), 283 deletions(-)
 create mode 100644 Documentation/networking/device_drivers/dlink/dl2k.rst
 delete mode 100644 Documentation/networking/device_drivers/dlink/dl2k.txt

diff --git a/Documentation/networking/device_drivers/dlink/dl2k.rst b/Documentation/networking/device_drivers/dlink/dl2k.rst
new file mode 100644
index 000000000000..ccdb5d0d7460
--- /dev/null
+++ b/Documentation/networking/device_drivers/dlink/dl2k.rst
@@ -0,0 +1,314 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========================================================
+D-Link DL2000-based Gigabit Ethernet Adapter Installation
+=========================================================
+
+May 23, 2002
+
+.. Contents
+
+ - Compatibility List
+ - Quick Install
+ - Compiling the Driver
+ - Installing the Driver
+ - Option parameter
+ - Configuration Script Sample
+ - Troubleshooting
+
+
+Compatibility List
+==================
+
+Adapter Support:
+
+- D-Link DGE-550T Gigabit Ethernet Adapter.
+- D-Link DGE-550SX Gigabit Ethernet Adapter.
+- D-Link DL2000-based Gigabit Ethernet Adapter.
+
+
+The driver support Linux kernel 2.4.7 later. We had tested it
+on the environments below.
+
+ . Red Hat v6.2 (update kernel to 2.4.7)
+ . Red Hat v7.0 (update kernel to 2.4.7)
+ . Red Hat v7.1 (kernel 2.4.7)
+ . Red Hat v7.2 (kernel 2.4.7-10)
+
+
+Quick Install
+=============
+Install linux driver as following command::
+
+    1. make all
+    2. insmod dl2k.ko
+    3. ifconfig eth0 up 10.xxx.xxx.xxx netmask 255.0.0.0
+			^^^^^^^^^^^^^^^\	    ^^^^^^^^\
+					IP		     NETMASK
+
+Now eth0 should active, you can test it by "ping" or get more information by
+"ifconfig". If tested ok, continue the next step.
+
+4. ``cp dl2k.ko /lib/modules/`uname -r`/kernel/drivers/net``
+5. Add the following line to /etc/modprobe.d/dl2k.conf::
+
+	alias eth0 dl2k
+
+6. Run ``depmod`` to updated module indexes.
+7. Run ``netconfig`` or ``netconf`` to create configuration script ifcfg-eth0
+   located at /etc/sysconfig/network-scripts or create it manually.
+
+   [see - Configuration Script Sample]
+8. Driver will automatically load and configure at next boot time.
+
+Compiling the Driver
+====================
+In Linux, NIC drivers are most commonly configured as loadable modules.
+The approach of building a monolithic kernel has become obsolete. The driver
+can be compiled as part of a monolithic kernel, but is strongly discouraged.
+The remainder of this section assumes the driver is built as a loadable module.
+In the Linux environment, it is a good idea to rebuild the driver from the
+source instead of relying on a precompiled version. This approach provides
+better reliability since a precompiled driver might depend on libraries or
+kernel features that are not present in a given Linux installation.
+
+The 3 files necessary to build Linux device driver are dl2k.c, dl2k.h and
+Makefile. To compile, the Linux installation must include the gcc compiler,
+the kernel source, and the kernel headers. The Linux driver supports Linux
+Kernels 2.4.7. Copy the files to a directory and enter the following command
+to compile and link the driver:
+
+CD-ROM drive
+------------
+
+::
+
+    [root@XXX /] mkdir cdrom
+    [root@XXX /] mount -r -t iso9660 -o conv=auto /dev/cdrom /cdrom
+    [root@XXX /] cd root
+    [root@XXX /root] mkdir dl2k
+    [root@XXX /root] cd dl2k
+    [root@XXX dl2k] cp /cdrom/linux/dl2k.tgz /root/dl2k
+    [root@XXX dl2k] tar xfvz dl2k.tgz
+    [root@XXX dl2k] make all
+
+Floppy disc drive
+-----------------
+
+::
+
+    [root@XXX /] cd root
+    [root@XXX /root] mkdir dl2k
+    [root@XXX /root] cd dl2k
+    [root@XXX dl2k] mcopy a:/linux/dl2k.tgz /root/dl2k
+    [root@XXX dl2k] tar xfvz dl2k.tgz
+    [root@XXX dl2k] make all
+
+Installing the Driver
+=====================
+
+Manual Installation
+-------------------
+
+  Once the driver has been compiled, it must be loaded, enabled, and bound
+  to a protocol stack in order to establish network connectivity. To load a
+  module enter the command::
+
+    insmod dl2k.o
+
+  or::
+
+    insmod dl2k.o <optional parameter>	; add parameter
+
+---------------------------------------------------------
+
+  example::
+
+    insmod dl2k.o media=100mbps_hd
+
+   or::
+
+    insmod dl2k.o media=3
+
+   or::
+
+    insmod dl2k.o media=3,2	; for 2 cards
+
+---------------------------------------------------------
+
+  Please reference the list of the command line parameters supported by
+  the Linux device driver below.
+
+  The insmod command only loads the driver and gives it a name of the form
+  eth0, eth1, etc. To bring the NIC into an operational state,
+  it is necessary to issue the following command::
+
+    ifconfig eth0 up
+
+  Finally, to bind the driver to the active protocol (e.g., TCP/IP with
+  Linux), enter the following command::
+
+    ifup eth0
+
+  Note that this is meaningful only if the system can find a configuration
+  script that contains the necessary network information. A sample will be
+  given in the next paragraph.
+
+  The commands to unload a driver are as follows::
+
+    ifdown eth0
+    ifconfig eth0 down
+    rmmod dl2k.o
+
+  The following are the commands to list the currently loaded modules and
+  to see the current network configuration::
+
+    lsmod
+    ifconfig
+
+
+Automated Installation
+----------------------
+  This section describes how to install the driver such that it is
+  automatically loaded and configured at boot time. The following description
+  is based on a Red Hat 6.0/7.0 distribution, but it can easily be ported to
+  other distributions as well.
+
+Red Hat v6.x/v7.x
+-----------------
+  1. Copy dl2k.o to the network modules directory, typically
+     /lib/modules/2.x.x-xx/net or /lib/modules/2.x.x/kernel/drivers/net.
+  2. Locate the boot module configuration file, most commonly in the
+     /etc/modprobe.d/ directory. Add the following lines::
+
+	alias ethx dl2k
+	options dl2k <optional parameters>
+
+     where ethx will be eth0 if the NIC is the only ethernet adapter, eth1 if
+     one other ethernet adapter is installed, etc. Refer to the table in the
+     previous section for the list of optional parameters.
+  3. Locate the network configuration scripts, normally the
+     /etc/sysconfig/network-scripts directory, and create a configuration
+     script named ifcfg-ethx that contains network information.
+  4. Note that for most Linux distributions, Red Hat included, a configuration
+     utility with a graphical user interface is provided to perform steps 2
+     and 3 above.
+
+
+Parameter Description
+=====================
+You can install this driver without any additional parameter. However, if you
+are going to have extensive functions then it is necessary to set extra
+parameter. Below is a list of the command line parameters supported by the
+Linux device
+driver.
+
+
+===============================   ==============================================
+mtu=packet_size			  Specifies the maximum packet size. default
+				  is 1500.
+
+media=media_type		  Specifies the media type the NIC operates at.
+				  autosense	Autosensing active media.
+
+				  ===========	=========================
+				  10mbps_hd	10Mbps half duplex.
+				  10mbps_fd	10Mbps full duplex.
+				  100mbps_hd	100Mbps half duplex.
+				  100mbps_fd	100Mbps full duplex.
+				  1000mbps_fd	1000Mbps full duplex.
+				  1000mbps_hd	1000Mbps half duplex.
+				  0		Autosensing active media.
+				  1		10Mbps half duplex.
+				  2		10Mbps full duplex.
+				  3		100Mbps half duplex.
+				  4		100Mbps full duplex.
+				  5          	1000Mbps half duplex.
+				  6          	1000Mbps full duplex.
+				  ===========	=========================
+
+				  By default, the NIC operates at autosense.
+				  1000mbps_fd and 1000mbps_hd types are only
+				  available for fiber adapter.
+
+vlan=n				  Specifies the VLAN ID. If vlan=0, the
+				  Virtual Local Area Network (VLAN) function is
+				  disable.
+
+jumbo=[0|1]			  Specifies the jumbo frame support. If jumbo=1,
+				  the NIC accept jumbo frames. By default, this
+				  function is disabled.
+				  Jumbo frame usually improve the performance
+				  int gigabit.
+				  This feature need jumbo frame compatible
+				  remote.
+
+rx_coalesce=m			  Number of rx frame handled each interrupt.
+rx_timeout=n			  Rx DMA wait time for an interrupt.
+				  If set rx_coalesce > 0, hardware only assert
+				  an interrupt for m frames. Hardware won't
+				  assert rx interrupt until m frames received or
+				  reach timeout of n * 640 nano seconds.
+				  Set proper rx_coalesce and rx_timeout can
+				  reduce congestion collapse and overload which
+				  has been a bottleneck for high speed network.
+
+				  For example, rx_coalesce=10 rx_timeout=800.
+				  that is, hardware assert only 1 interrupt
+				  for 10 frames received or timeout of 512 us.
+
+tx_coalesce=n			  Number of tx frame handled each interrupt.
+				  Set n > 1 can reduce the interrupts
+				  congestion usually lower performance of
+				  high speed network card. Default is 16.
+
+tx_flow=[1|0]			  Specifies the Tx flow control. If tx_flow=0,
+				  the Tx flow control disable else driver
+				  autodetect.
+rx_flow=[1|0]			  Specifies the Rx flow control. If rx_flow=0,
+				  the Rx flow control enable else driver
+				  autodetect.
+===============================   ==============================================
+
+
+Configuration Script Sample
+===========================
+Here is a sample of a simple configuration script::
+
+    DEVICE=eth0
+    USERCTL=no
+    ONBOOT=yes
+    POOTPROTO=none
+    BROADCAST=207.200.5.255
+    NETWORK=207.200.5.0
+    NETMASK=255.255.255.0
+    IPADDR=207.200.5.2
+
+
+Troubleshooting
+===============
+Q1. Source files contain ^ M behind every line.
+
+    Make sure all files are Unix file format (no LF). Try the following
+    shell command to convert files::
+
+	cat dl2k.c | col -b > dl2k.tmp
+	mv dl2k.tmp dl2k.c
+
+    OR::
+
+	cat dl2k.c | tr -d "\r" > dl2k.tmp
+	mv dl2k.tmp dl2k.c
+
+Q2: Could not find header files (``*.h``)?
+
+    To compile the driver, you need kernel header files. After
+    installing the kernel source, the header files are usually located in
+    /usr/src/linux/include, which is the default include directory configured
+    in Makefile. For some distributions, there is a copy of header files in
+    /usr/src/include/linux and /usr/src/include/asm, that you can change the
+    INCLUDEDIR in Makefile to /usr/include without installing kernel source.
+
+    Note that RH 7.0 didn't provide correct header files in /usr/include,
+    including those files will make a wrong version driver.
+
diff --git a/Documentation/networking/device_drivers/dlink/dl2k.txt b/Documentation/networking/device_drivers/dlink/dl2k.txt
deleted file mode 100644
index cba74f7a3abc..000000000000
--- a/Documentation/networking/device_drivers/dlink/dl2k.txt
+++ /dev/null
@@ -1,282 +0,0 @@
-
-    D-Link DL2000-based Gigabit Ethernet Adapter Installation
-    for Linux
-    May 23, 2002
-
-Contents
-========
- - Compatibility List
- - Quick Install
- - Compiling the Driver
- - Installing the Driver
- - Option parameter
- - Configuration Script Sample
- - Troubleshooting
-
-
-Compatibility List
-=================
-Adapter Support:
-
-D-Link DGE-550T Gigabit Ethernet Adapter.
-D-Link DGE-550SX Gigabit Ethernet Adapter.
-D-Link DL2000-based Gigabit Ethernet Adapter.
-
-
-The driver support Linux kernel 2.4.7 later. We had tested it
-on the environments below.
-
- . Red Hat v6.2 (update kernel to 2.4.7)
- . Red Hat v7.0 (update kernel to 2.4.7)
- . Red Hat v7.1 (kernel 2.4.7)
- . Red Hat v7.2 (kernel 2.4.7-10)
-
-
-Quick Install
-=============
-Install linux driver as following command:
-
-1. make all
-2. insmod dl2k.ko
-3. ifconfig eth0 up 10.xxx.xxx.xxx netmask 255.0.0.0
-		    ^^^^^^^^^^^^^^^\	    ^^^^^^^^\
-				    IP		     NETMASK
-Now eth0 should active, you can test it by "ping" or get more information by
-"ifconfig". If tested ok, continue the next step.
-
-4. cp dl2k.ko /lib/modules/`uname -r`/kernel/drivers/net
-5. Add the following line to /etc/modprobe.d/dl2k.conf:
-	alias eth0 dl2k
-6. Run depmod to updated module indexes.
-7. Run "netconfig" or "netconf" to create configuration script ifcfg-eth0
-   located at /etc/sysconfig/network-scripts or create it manually.
-   [see - Configuration Script Sample]
-8. Driver will automatically load and configure at next boot time.
-
-Compiling the Driver
-====================
-  In Linux, NIC drivers are most commonly configured as loadable modules.
-The approach of building a monolithic kernel has become obsolete. The driver
-can be compiled as part of a monolithic kernel, but is strongly discouraged.
-The remainder of this section assumes the driver is built as a loadable module.
-In the Linux environment, it is a good idea to rebuild the driver from the
-source instead of relying on a precompiled version. This approach provides
-better reliability since a precompiled driver might depend on libraries or
-kernel features that are not present in a given Linux installation.
-
-The 3 files necessary to build Linux device driver are dl2k.c, dl2k.h and
-Makefile. To compile, the Linux installation must include the gcc compiler,
-the kernel source, and the kernel headers. The Linux driver supports Linux
-Kernels 2.4.7. Copy the files to a directory and enter the following command
-to compile and link the driver:
-
-CD-ROM drive
-------------
-
-[root@XXX /] mkdir cdrom
-[root@XXX /] mount -r -t iso9660 -o conv=auto /dev/cdrom /cdrom
-[root@XXX /] cd root
-[root@XXX /root] mkdir dl2k
-[root@XXX /root] cd dl2k
-[root@XXX dl2k] cp /cdrom/linux/dl2k.tgz /root/dl2k
-[root@XXX dl2k] tar xfvz dl2k.tgz
-[root@XXX dl2k] make all
-
-Floppy disc drive
------------------
-
-[root@XXX /] cd root
-[root@XXX /root] mkdir dl2k
-[root@XXX /root] cd dl2k
-[root@XXX dl2k] mcopy a:/linux/dl2k.tgz /root/dl2k
-[root@XXX dl2k] tar xfvz dl2k.tgz
-[root@XXX dl2k] make all
-
-Installing the Driver
-=====================
-
-  Manual Installation
-  -------------------
-  Once the driver has been compiled, it must be loaded, enabled, and bound
-  to a protocol stack in order to establish network connectivity. To load a
-  module enter the command:
-
-  insmod dl2k.o
-
-  or
-
-  insmod dl2k.o <optional parameter>	; add parameter
-
-  ===============================================================
-   example: insmod dl2k.o media=100mbps_hd
-   or	    insmod dl2k.o media=3
-   or	    insmod dl2k.o media=3,2	; for 2 cards
-  ===============================================================
-
-  Please reference the list of the command line parameters supported by
-  the Linux device driver below.
-
-  The insmod command only loads the driver and gives it a name of the form
-  eth0, eth1, etc. To bring the NIC into an operational state,
-  it is necessary to issue the following command:
-
-  ifconfig eth0 up
-
-  Finally, to bind the driver to the active protocol (e.g., TCP/IP with
-  Linux), enter the following command:
-
-  ifup eth0
-
-  Note that this is meaningful only if the system can find a configuration
-  script that contains the necessary network information. A sample will be
-  given in the next paragraph.
-
-  The commands to unload a driver are as follows:
-
-  ifdown eth0
-  ifconfig eth0 down
-  rmmod dl2k.o
-
-  The following are the commands to list the currently loaded modules and
-  to see the current network configuration.
-
-  lsmod
-  ifconfig
-
-
-  Automated Installation
-  ----------------------
-  This section describes how to install the driver such that it is
-  automatically loaded and configured at boot time. The following description
-  is based on a Red Hat 6.0/7.0 distribution, but it can easily be ported to
-  other distributions as well.
-
-  Red Hat v6.x/v7.x
-  -----------------
-  1. Copy dl2k.o to the network modules directory, typically
-     /lib/modules/2.x.x-xx/net or /lib/modules/2.x.x/kernel/drivers/net.
-  2. Locate the boot module configuration file, most commonly in the
-     /etc/modprobe.d/ directory. Add the following lines:
-
-     alias ethx dl2k
-     options dl2k <optional parameters>
-
-     where ethx will be eth0 if the NIC is the only ethernet adapter, eth1 if
-     one other ethernet adapter is installed, etc. Refer to the table in the
-     previous section for the list of optional parameters.
-  3. Locate the network configuration scripts, normally the
-     /etc/sysconfig/network-scripts directory, and create a configuration
-     script named ifcfg-ethx that contains network information.
-  4. Note that for most Linux distributions, Red Hat included, a configuration
-     utility with a graphical user interface is provided to perform steps 2
-     and 3 above.
-
-
-Parameter Description
-=====================
-You can install this driver without any additional parameter. However, if you
-are going to have extensive functions then it is necessary to set extra
-parameter. Below is a list of the command line parameters supported by the
-Linux device
-driver.
-
-mtu=packet_size			- Specifies the maximum packet size. default
-				  is 1500.
-
-media=media_type		- Specifies the media type the NIC operates at.
-				  autosense	Autosensing active media.
-				  10mbps_hd	10Mbps half duplex.
-				  10mbps_fd	10Mbps full duplex.
-				  100mbps_hd	100Mbps half duplex.
-				  100mbps_fd	100Mbps full duplex.
-				  1000mbps_fd	1000Mbps full duplex.
-				  1000mbps_hd	1000Mbps half duplex.
-				  0		Autosensing active media.
-				  1		10Mbps half duplex.
-				  2		10Mbps full duplex.
-				  3		100Mbps half duplex.
-				  4		100Mbps full duplex.
-				  5          	1000Mbps half duplex.
-				  6          	1000Mbps full duplex.
-
-				  By default, the NIC operates at autosense.
-				  1000mbps_fd and 1000mbps_hd types are only
-				  available for fiber adapter.
-
-vlan=n				- Specifies the VLAN ID. If vlan=0, the
-				  Virtual Local Area Network (VLAN) function is
-				  disable.
-
-jumbo=[0|1]			- Specifies the jumbo frame support. If jumbo=1,
-				  the NIC accept jumbo frames. By default, this
-				  function is disabled.
-				  Jumbo frame usually improve the performance
-				  int gigabit.
-				  This feature need jumbo frame compatible 
-				  remote.
-				  
-rx_coalesce=m			- Number of rx frame handled each interrupt.
-rx_timeout=n			- Rx DMA wait time for an interrupt. 
-				  If set rx_coalesce > 0, hardware only assert 
-				  an interrupt for m frames. Hardware won't 
-				  assert rx interrupt until m frames received or
-				  reach timeout of n * 640 nano seconds. 
-				  Set proper rx_coalesce and rx_timeout can 
-				  reduce congestion collapse and overload which
-				  has been a bottleneck for high speed network.
-				  
-				  For example, rx_coalesce=10 rx_timeout=800.
-				  that is, hardware assert only 1 interrupt 
-				  for 10 frames received or timeout of 512 us. 
-
-tx_coalesce=n			- Number of tx frame handled each interrupt.
-				  Set n > 1 can reduce the interrupts 
-				  congestion usually lower performance of
-				  high speed network card. Default is 16.
-				  
-tx_flow=[1|0]			- Specifies the Tx flow control. If tx_flow=0, 
-				  the Tx flow control disable else driver
-				  autodetect.
-rx_flow=[1|0]			- Specifies the Rx flow control. If rx_flow=0, 
-				  the Rx flow control enable else driver
-				  autodetect.
-
-
-Configuration Script Sample
-===========================
-Here is a sample of a simple configuration script:
-
-DEVICE=eth0
-USERCTL=no
-ONBOOT=yes
-POOTPROTO=none
-BROADCAST=207.200.5.255
-NETWORK=207.200.5.0
-NETMASK=255.255.255.0
-IPADDR=207.200.5.2
-
-
-Troubleshooting
-===============
-Q1. Source files contain ^ M behind every line.
-	Make sure all files are Unix file format (no LF). Try the following
-    shell command to convert files.
-
-	cat dl2k.c | col -b > dl2k.tmp
-	mv dl2k.tmp dl2k.c
-
-	OR
-
-	cat dl2k.c | tr -d "\r" > dl2k.tmp
-	mv dl2k.tmp dl2k.c
-
-Q2: Could not find header files (*.h) ?
-	To compile the driver, you need kernel header files. After
-    installing the kernel source, the header files are usually located in
-    /usr/src/linux/include, which is the default include directory configured
-    in Makefile. For some distributions, there is a copy of header files in
-    /usr/src/include/linux and /usr/src/include/asm, that you can change the
-    INCLUDEDIR in Makefile to /usr/include without installing kernel source.
-	Note that RH 7.0 didn't provide correct header files in /usr/include,
-    including those files will make a wrong version driver.
-
diff --git a/Documentation/networking/device_drivers/index.rst b/Documentation/networking/device_drivers/index.rst
index 09728e964ce1..e5d1863379cb 100644
--- a/Documentation/networking/device_drivers/index.rst
+++ b/Documentation/networking/device_drivers/index.rst
@@ -36,6 +36,7 @@ Contents:
    davicom/dm9000
    dec/de4x5
    dec/dmfe
+   dlink/dl2k
 
 .. only::  subproject and html
 
diff --git a/drivers/net/ethernet/dlink/dl2k.c b/drivers/net/ethernet/dlink/dl2k.c
index 643090555cc7..5143722c4419 100644
--- a/drivers/net/ethernet/dlink/dl2k.c
+++ b/drivers/net/ethernet/dlink/dl2k.c
@@ -1869,7 +1869,7 @@ Compile command:
 
 gcc -D__KERNEL__ -DMODULE -I/usr/src/linux/include -Wall -Wstrict-prototypes -O2 -c dl2k.c
 
-Read Documentation/networking/device_drivers/dlink/dl2k.txt for details.
+Read Documentation/networking/device_drivers/dlink/dl2k.rst for details.
 
 */
 
-- 
cgit v1.2.3-59-g8ed1b


From 0d0d976f59a57e5536ff01825f0bc8a0dbb0fe6b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 1 May 2020 16:44:44 +0200
Subject: docs: networking: device drivers: convert freescale/dpaa.txt to ReST

- add SPDX header;
- adjust titles and chapters, adding proper markups;
- mark code blocks and literals as such;
- use :field: markup;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../networking/device_drivers/freescale/dpaa.rst   | 269 +++++++++++++++++++++
 .../networking/device_drivers/freescale/dpaa.txt   | 260 --------------------
 Documentation/networking/device_drivers/index.rst  |   1 +
 3 files changed, 270 insertions(+), 260 deletions(-)
 create mode 100644 Documentation/networking/device_drivers/freescale/dpaa.rst
 delete mode 100644 Documentation/networking/device_drivers/freescale/dpaa.txt

diff --git a/Documentation/networking/device_drivers/freescale/dpaa.rst b/Documentation/networking/device_drivers/freescale/dpaa.rst
new file mode 100644
index 000000000000..241c6c6f6e68
--- /dev/null
+++ b/Documentation/networking/device_drivers/freescale/dpaa.rst
@@ -0,0 +1,269 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============================
+The QorIQ DPAA Ethernet Driver
+==============================
+
+Authors:
+- Madalin Bucur <madalin.bucur@nxp.com>
+- Camelia Groza <camelia.groza@nxp.com>
+
+.. Contents
+
+	- DPAA Ethernet Overview
+	- DPAA Ethernet Supported SoCs
+	- Configuring DPAA Ethernet in your kernel
+	- DPAA Ethernet Frame Processing
+	- DPAA Ethernet Features
+	- DPAA IRQ Affinity and Receive Side Scaling
+	- Debugging
+
+DPAA Ethernet Overview
+======================
+
+DPAA stands for Data Path Acceleration Architecture and it is a
+set of networking acceleration IPs that are available on several
+generations of SoCs, both on PowerPC and ARM64.
+
+The Freescale DPAA architecture consists of a series of hardware blocks
+that support Ethernet connectivity. The Ethernet driver depends upon the
+following drivers in the Linux kernel:
+
+ - Peripheral Access Memory Unit (PAMU) (* needed only for PPC platforms)
+    drivers/iommu/fsl_*
+ - Frame Manager (FMan)
+    drivers/net/ethernet/freescale/fman
+ - Queue Manager (QMan), Buffer Manager (BMan)
+    drivers/soc/fsl/qbman
+
+A simplified view of the dpaa_eth interfaces mapped to FMan MACs::
+
+  dpaa_eth       /eth0\     ...       /ethN\
+  driver        |      |             |      |
+  -------------   ----   -----------   ----   -------------
+       -Ports  / Tx  Rx \    ...    / Tx  Rx \
+  FMan        |          |         |          |
+       -MACs  |   MAC0   |         |   MACN   |
+	     /   dtsec0   \  ...  /   dtsecN   \ (or tgec)
+	    /              \     /              \(or memac)
+  ---------  --------------  ---  --------------  ---------
+      FMan, FMan Port, FMan SP, FMan MURAM drivers
+  ---------------------------------------------------------
+      FMan HW blocks: MURAM, MACs, Ports, SP
+  ---------------------------------------------------------
+
+The dpaa_eth relation to the QMan, BMan and FMan::
+
+	      ________________________________
+  dpaa_eth   /            eth0                \
+  driver    /                                  \
+  ---------   -^-   -^-   -^-   ---    ---------
+  QMan driver / \   / \   / \  \   /  | BMan    |
+	     |Rx | |Rx | |Tx | |Tx |  | driver  |
+  ---------  |Dfl| |Err| |Cnf| |FQs|  |         |
+  QMan HW    |FQ | |FQ | |FQs| |   |  |         |
+	     /   \ /   \ /   \  \ /   |         |
+  ---------   ---   ---   ---   -v-    ---------
+	    |        FMan QMI         |         |
+	    | FMan HW       FMan BMI  | BMan HW |
+	      -----------------------   --------
+
+where the acronyms used above (and in the code) are:
+
+=============== ===========================================================
+DPAA 		Data Path Acceleration Architecture
+FMan 		DPAA Frame Manager
+QMan 		DPAA Queue Manager
+BMan 		DPAA Buffers Manager
+QMI 		QMan interface in FMan
+BMI 		BMan interface in FMan
+FMan SP 	FMan Storage Profiles
+MURAM 		Multi-user RAM in FMan
+FQ 		QMan Frame Queue
+Rx Dfl FQ 	default reception FQ
+Rx Err FQ 	Rx error frames FQ
+Tx Cnf FQ 	Tx confirmation FQs
+Tx FQs 		transmission frame queues
+dtsec 		datapath three speed Ethernet controller (10/100/1000 Mbps)
+tgec 		ten gigabit Ethernet controller (10 Gbps)
+memac 		multirate Ethernet MAC (10/100/1000/10000)
+=============== ===========================================================
+
+DPAA Ethernet Supported SoCs
+============================
+
+The DPAA drivers enable the Ethernet controllers present on the following SoCs:
+
+PPC
+- P1023
+- P2041
+- P3041
+- P4080
+- P5020
+- P5040
+- T1023
+- T1024
+- T1040
+- T1042
+- T2080
+- T4240
+- B4860
+
+ARM
+- LS1043A
+- LS1046A
+
+Configuring DPAA Ethernet in your kernel
+========================================
+
+To enable the DPAA Ethernet driver, the following Kconfig options are required::
+
+  # common for arch/arm64 and arch/powerpc platforms
+  CONFIG_FSL_DPAA=y
+  CONFIG_FSL_FMAN=y
+  CONFIG_FSL_DPAA_ETH=y
+  CONFIG_FSL_XGMAC_MDIO=y
+
+  # for arch/powerpc only
+  CONFIG_FSL_PAMU=y
+
+  # common options needed for the PHYs used on the RDBs
+  CONFIG_VITESSE_PHY=y
+  CONFIG_REALTEK_PHY=y
+  CONFIG_AQUANTIA_PHY=y
+
+DPAA Ethernet Frame Processing
+==============================
+
+On Rx, buffers for the incoming frames are retrieved from the buffers found
+in the dedicated interface buffer pool. The driver initializes and seeds these
+with one page buffers.
+
+On Tx, all transmitted frames are returned to the driver through Tx
+confirmation frame queues. The driver is then responsible for freeing the
+buffers. In order to do this properly, a backpointer is added to the buffer
+before transmission that points to the skb. When the buffer returns to the
+driver on a confirmation FQ, the skb can be correctly consumed.
+
+DPAA Ethernet Features
+======================
+
+Currently the DPAA Ethernet driver enables the basic features required for
+a Linux Ethernet driver. The support for advanced features will be added
+gradually.
+
+The driver has Rx and Tx checksum offloading for UDP and TCP. Currently the Rx
+checksum offload feature is enabled by default and cannot be controlled through
+ethtool. Also, rx-flow-hash and rx-hashing was added. The addition of RSS
+provides a big performance boost for the forwarding scenarios, allowing
+different traffic flows received by one interface to be processed by different
+CPUs in parallel.
+
+The driver has support for multiple prioritized Tx traffic classes. Priorities
+range from 0 (lowest) to 3 (highest). These are mapped to HW workqueues with
+strict priority levels. Each traffic class contains NR_CPU TX queues. By
+default, only one traffic class is enabled and the lowest priority Tx queues
+are used. Higher priority traffic classes can be enabled with the mqprio
+qdisc. For example, all four traffic classes are enabled on an interface with
+the following command. Furthermore, skb priority levels are mapped to traffic
+classes as follows:
+
+	* priorities 0 to 3 - traffic class 0 (low priority)
+	* priorities 4 to 7 - traffic class 1 (medium-low priority)
+	* priorities 8 to 11 - traffic class 2 (medium-high priority)
+	* priorities 12 to 15 - traffic class 3 (high priority)
+
+::
+
+  tc qdisc add dev <int> root handle 1: \
+	 mqprio num_tc 4 map 0 0 0 0 1 1 1 1 2 2 2 2 3 3 3 3 hw 1
+
+DPAA IRQ Affinity and Receive Side Scaling
+==========================================
+
+Traffic coming on the DPAA Rx queues or on the DPAA Tx confirmation
+queues is seen by the CPU as ingress traffic on a certain portal.
+The DPAA QMan portal interrupts are affined each to a certain CPU.
+The same portal interrupt services all the QMan portal consumers.
+
+By default the DPAA Ethernet driver enables RSS, making use of the
+DPAA FMan Parser and Keygen blocks to distribute traffic on 128
+hardware frame queues using a hash on IP v4/v6 source and destination
+and L4 source and destination ports, in present in the received frame.
+When RSS is disabled, all traffic received by a certain interface is
+received on the default Rx frame queue. The default DPAA Rx frame
+queues are configured to put the received traffic into a pool channel
+that allows any available CPU portal to dequeue the ingress traffic.
+The default frame queues have the HOLDACTIVE option set, ensuring that
+traffic bursts from a certain queue are serviced by the same CPU.
+This ensures a very low rate of frame reordering. A drawback of this
+is that only one CPU at a time can service the traffic received by a
+certain interface when RSS is not enabled.
+
+To implement RSS, the DPAA Ethernet driver allocates an extra set of
+128 Rx frame queues that are configured to dedicated channels, in a
+round-robin manner. The mapping of the frame queues to CPUs is now
+hardcoded, there is no indirection table to move traffic for a certain
+FQ (hash result) to another CPU. The ingress traffic arriving on one
+of these frame queues will arrive at the same portal and will always
+be processed by the same CPU. This ensures intra-flow order preservation
+and workload distribution for multiple traffic flows.
+
+RSS can be turned off for a certain interface using ethtool, i.e.::
+
+	# ethtool -N fm1-mac9 rx-flow-hash tcp4 ""
+
+To turn it back on, one needs to set rx-flow-hash for tcp4/6 or udp4/6::
+
+	# ethtool -N fm1-mac9 rx-flow-hash udp4 sfdn
+
+There is no independent control for individual protocols, any command
+run for one of tcp4|udp4|ah4|esp4|sctp4|tcp6|udp6|ah6|esp6|sctp6 is
+going to control the rx-flow-hashing for all protocols on that interface.
+
+Besides using the FMan Keygen computed hash for spreading traffic on the
+128 Rx FQs, the DPAA Ethernet driver also sets the skb hash value when
+the NETIF_F_RXHASH feature is on (active by default). This can be turned
+on or off through ethtool, i.e.::
+
+	# ethtool -K fm1-mac9 rx-hashing off
+	# ethtool -k fm1-mac9 | grep hash
+	receive-hashing: off
+	# ethtool -K fm1-mac9 rx-hashing on
+	Actual changes:
+	receive-hashing: on
+	# ethtool -k fm1-mac9 | grep hash
+	receive-hashing: on
+
+Please note that Rx hashing depends upon the rx-flow-hashing being on
+for that interface - turning off rx-flow-hashing will also disable the
+rx-hashing (without ethtool reporting it as off as that depends on the
+NETIF_F_RXHASH feature flag).
+
+Debugging
+=========
+
+The following statistics are exported for each interface through ethtool:
+
+	- interrupt count per CPU
+	- Rx packets count per CPU
+	- Tx packets count per CPU
+	- Tx confirmed packets count per CPU
+	- Tx S/G frames count per CPU
+	- Tx error count per CPU
+	- Rx error count per CPU
+	- Rx error count per type
+	- congestion related statistics:
+
+		- congestion status
+		- time spent in congestion
+		- number of time the device entered congestion
+		- dropped packets count per cause
+
+The driver also exports the following information in sysfs:
+
+	- the FQ IDs for each FQ type
+	  /sys/devices/platform/soc/<addr>.fman/<addr>.ethernet/dpaa-ethernet.<id>/net/fm<nr>-mac<nr>/fqids
+
+	- the ID of the buffer pool in use
+	  /sys/devices/platform/soc/<addr>.fman/<addr>.ethernet/dpaa-ethernet.<id>/net/fm<nr>-mac<nr>/bpids
diff --git a/Documentation/networking/device_drivers/freescale/dpaa.txt b/Documentation/networking/device_drivers/freescale/dpaa.txt
deleted file mode 100644
index b06601ff9200..000000000000
--- a/Documentation/networking/device_drivers/freescale/dpaa.txt
+++ /dev/null
@@ -1,260 +0,0 @@
-The QorIQ DPAA Ethernet Driver
-==============================
-
-Authors:
-Madalin Bucur <madalin.bucur@nxp.com>
-Camelia Groza <camelia.groza@nxp.com>
-
-Contents
-========
-
-	- DPAA Ethernet Overview
-	- DPAA Ethernet Supported SoCs
-	- Configuring DPAA Ethernet in your kernel
-	- DPAA Ethernet Frame Processing
-	- DPAA Ethernet Features
-	- DPAA IRQ Affinity and Receive Side Scaling
-	- Debugging
-
-DPAA Ethernet Overview
-======================
-
-DPAA stands for Data Path Acceleration Architecture and it is a
-set of networking acceleration IPs that are available on several
-generations of SoCs, both on PowerPC and ARM64.
-
-The Freescale DPAA architecture consists of a series of hardware blocks
-that support Ethernet connectivity. The Ethernet driver depends upon the
-following drivers in the Linux kernel:
-
- - Peripheral Access Memory Unit (PAMU) (* needed only for PPC platforms)
-    drivers/iommu/fsl_*
- - Frame Manager (FMan)
-    drivers/net/ethernet/freescale/fman
- - Queue Manager (QMan), Buffer Manager (BMan)
-    drivers/soc/fsl/qbman
-
-A simplified view of the dpaa_eth interfaces mapped to FMan MACs:
-
-  dpaa_eth       /eth0\     ...       /ethN\
-  driver        |      |             |      |
-  -------------   ----   -----------   ----   -------------
-       -Ports  / Tx  Rx \    ...    / Tx  Rx \
-  FMan        |          |         |          |
-       -MACs  |   MAC0   |         |   MACN   |
-             /   dtsec0   \  ...  /   dtsecN   \ (or tgec)
-            /              \     /              \(or memac)
-  ---------  --------------  ---  --------------  ---------
-      FMan, FMan Port, FMan SP, FMan MURAM drivers
-  ---------------------------------------------------------
-      FMan HW blocks: MURAM, MACs, Ports, SP
-  ---------------------------------------------------------
-
-The dpaa_eth relation to the QMan, BMan and FMan:
-              ________________________________
-  dpaa_eth   /            eth0                \
-  driver    /                                  \
-  ---------   -^-   -^-   -^-   ---    ---------
-  QMan driver / \   / \   / \  \   /  | BMan    |
-             |Rx | |Rx | |Tx | |Tx |  | driver  |
-  ---------  |Dfl| |Err| |Cnf| |FQs|  |         |
-  QMan HW    |FQ | |FQ | |FQs| |   |  |         |
-             /   \ /   \ /   \  \ /   |         |
-  ---------   ---   ---   ---   -v-    ---------
-            |        FMan QMI         |         |
-            | FMan HW       FMan BMI  | BMan HW |
-              -----------------------   --------
-
-where the acronyms used above (and in the code) are:
-DPAA = Data Path Acceleration Architecture
-FMan = DPAA Frame Manager
-QMan = DPAA Queue Manager
-BMan = DPAA Buffers Manager
-QMI = QMan interface in FMan
-BMI = BMan interface in FMan
-FMan SP = FMan Storage Profiles
-MURAM = Multi-user RAM in FMan
-FQ = QMan Frame Queue
-Rx Dfl FQ = default reception FQ
-Rx Err FQ = Rx error frames FQ
-Tx Cnf FQ = Tx confirmation FQs
-Tx FQs = transmission frame queues
-dtsec = datapath three speed Ethernet controller (10/100/1000 Mbps)
-tgec = ten gigabit Ethernet controller (10 Gbps)
-memac = multirate Ethernet MAC (10/100/1000/10000)
-
-DPAA Ethernet Supported SoCs
-============================
-
-The DPAA drivers enable the Ethernet controllers present on the following SoCs:
-
-# PPC
-P1023
-P2041
-P3041
-P4080
-P5020
-P5040
-T1023
-T1024
-T1040
-T1042
-T2080
-T4240
-B4860
-
-# ARM
-LS1043A
-LS1046A
-
-Configuring DPAA Ethernet in your kernel
-========================================
-
-To enable the DPAA Ethernet driver, the following Kconfig options are required:
-
-# common for arch/arm64 and arch/powerpc platforms
-CONFIG_FSL_DPAA=y
-CONFIG_FSL_FMAN=y
-CONFIG_FSL_DPAA_ETH=y
-CONFIG_FSL_XGMAC_MDIO=y
-
-# for arch/powerpc only
-CONFIG_FSL_PAMU=y
-
-# common options needed for the PHYs used on the RDBs
-CONFIG_VITESSE_PHY=y
-CONFIG_REALTEK_PHY=y
-CONFIG_AQUANTIA_PHY=y
-
-DPAA Ethernet Frame Processing
-==============================
-
-On Rx, buffers for the incoming frames are retrieved from the buffers found
-in the dedicated interface buffer pool. The driver initializes and seeds these
-with one page buffers.
-
-On Tx, all transmitted frames are returned to the driver through Tx
-confirmation frame queues. The driver is then responsible for freeing the
-buffers. In order to do this properly, a backpointer is added to the buffer
-before transmission that points to the skb. When the buffer returns to the
-driver on a confirmation FQ, the skb can be correctly consumed.
-
-DPAA Ethernet Features
-======================
-
-Currently the DPAA Ethernet driver enables the basic features required for
-a Linux Ethernet driver. The support for advanced features will be added
-gradually.
-
-The driver has Rx and Tx checksum offloading for UDP and TCP. Currently the Rx
-checksum offload feature is enabled by default and cannot be controlled through
-ethtool. Also, rx-flow-hash and rx-hashing was added. The addition of RSS
-provides a big performance boost for the forwarding scenarios, allowing
-different traffic flows received by one interface to be processed by different
-CPUs in parallel.
-
-The driver has support for multiple prioritized Tx traffic classes. Priorities
-range from 0 (lowest) to 3 (highest). These are mapped to HW workqueues with
-strict priority levels. Each traffic class contains NR_CPU TX queues. By
-default, only one traffic class is enabled and the lowest priority Tx queues
-are used. Higher priority traffic classes can be enabled with the mqprio
-qdisc. For example, all four traffic classes are enabled on an interface with
-the following command. Furthermore, skb priority levels are mapped to traffic
-classes as follows:
-
-	* priorities 0 to 3 - traffic class 0 (low priority)
-	* priorities 4 to 7 - traffic class 1 (medium-low priority)
-	* priorities 8 to 11 - traffic class 2 (medium-high priority)
-	* priorities 12 to 15 - traffic class 3 (high priority)
-
-tc qdisc add dev <int> root handle 1: \
-	 mqprio num_tc 4 map 0 0 0 0 1 1 1 1 2 2 2 2 3 3 3 3 hw 1
-
-DPAA IRQ Affinity and Receive Side Scaling
-==========================================
-
-Traffic coming on the DPAA Rx queues or on the DPAA Tx confirmation
-queues is seen by the CPU as ingress traffic on a certain portal.
-The DPAA QMan portal interrupts are affined each to a certain CPU.
-The same portal interrupt services all the QMan portal consumers.
-
-By default the DPAA Ethernet driver enables RSS, making use of the
-DPAA FMan Parser and Keygen blocks to distribute traffic on 128
-hardware frame queues using a hash on IP v4/v6 source and destination
-and L4 source and destination ports, in present in the received frame.
-When RSS is disabled, all traffic received by a certain interface is
-received on the default Rx frame queue. The default DPAA Rx frame
-queues are configured to put the received traffic into a pool channel
-that allows any available CPU portal to dequeue the ingress traffic.
-The default frame queues have the HOLDACTIVE option set, ensuring that
-traffic bursts from a certain queue are serviced by the same CPU.
-This ensures a very low rate of frame reordering. A drawback of this
-is that only one CPU at a time can service the traffic received by a
-certain interface when RSS is not enabled.
-
-To implement RSS, the DPAA Ethernet driver allocates an extra set of
-128 Rx frame queues that are configured to dedicated channels, in a
-round-robin manner. The mapping of the frame queues to CPUs is now
-hardcoded, there is no indirection table to move traffic for a certain
-FQ (hash result) to another CPU. The ingress traffic arriving on one
-of these frame queues will arrive at the same portal and will always
-be processed by the same CPU. This ensures intra-flow order preservation
-and workload distribution for multiple traffic flows.
-
-RSS can be turned off for a certain interface using ethtool, i.e.
-
-	# ethtool -N fm1-mac9 rx-flow-hash tcp4 ""
-
-To turn it back on, one needs to set rx-flow-hash for tcp4/6 or udp4/6:
-
-	# ethtool -N fm1-mac9 rx-flow-hash udp4 sfdn
-
-There is no independent control for individual protocols, any command
-run for one of tcp4|udp4|ah4|esp4|sctp4|tcp6|udp6|ah6|esp6|sctp6 is
-going to control the rx-flow-hashing for all protocols on that interface.
-
-Besides using the FMan Keygen computed hash for spreading traffic on the
-128 Rx FQs, the DPAA Ethernet driver also sets the skb hash value when
-the NETIF_F_RXHASH feature is on (active by default). This can be turned
-on or off through ethtool, i.e.:
-
-	# ethtool -K fm1-mac9 rx-hashing off
-	# ethtool -k fm1-mac9 | grep hash
-	receive-hashing: off
-	# ethtool -K fm1-mac9 rx-hashing on
-	Actual changes:
-	receive-hashing: on
-	# ethtool -k fm1-mac9 | grep hash
-	receive-hashing: on
-
-Please note that Rx hashing depends upon the rx-flow-hashing being on
-for that interface - turning off rx-flow-hashing will also disable the
-rx-hashing (without ethtool reporting it as off as that depends on the
-NETIF_F_RXHASH feature flag).
-
-Debugging
-=========
-
-The following statistics are exported for each interface through ethtool:
-
-	- interrupt count per CPU
-	- Rx packets count per CPU
-	- Tx packets count per CPU
-	- Tx confirmed packets count per CPU
-	- Tx S/G frames count per CPU
-	- Tx error count per CPU
-	- Rx error count per CPU
-	- Rx error count per type
-	- congestion related statistics:
-		- congestion status
-		- time spent in congestion
-		- number of time the device entered congestion
-		- dropped packets count per cause
-
-The driver also exports the following information in sysfs:
-
-	- the FQ IDs for each FQ type
-	/sys/devices/platform/soc/<addr>.fman/<addr>.ethernet/dpaa-ethernet.<id>/net/fm<nr>-mac<nr>/fqids
-
-	- the ID of the buffer pool in use
-	/sys/devices/platform/soc/<addr>.fman/<addr>.ethernet/dpaa-ethernet.<id>/net/fm<nr>-mac<nr>/bpids
diff --git a/Documentation/networking/device_drivers/index.rst b/Documentation/networking/device_drivers/index.rst
index e5d1863379cb..7e59ee43c030 100644
--- a/Documentation/networking/device_drivers/index.rst
+++ b/Documentation/networking/device_drivers/index.rst
@@ -37,6 +37,7 @@ Contents:
    dec/de4x5
    dec/dmfe
    dlink/dl2k
+   freescale/dpaa
 
 .. only::  subproject and html
 
-- 
cgit v1.2.3-59-g8ed1b


From dc67e91e7f7b2245a3a341e62217cb5e7163d60b Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 1 May 2020 16:44:45 +0200
Subject: docs: networking: device drivers: convert freescale/gianfar.txt to
 ReST

- add SPDX header;
- adjust titles and chapters, adding proper markups;
- use :field: markup;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../device_drivers/freescale/gianfar.rst           | 51 ++++++++++++++++++++++
 .../device_drivers/freescale/gianfar.txt           | 42 ------------------
 Documentation/networking/device_drivers/index.rst  |  1 +
 3 files changed, 52 insertions(+), 42 deletions(-)
 create mode 100644 Documentation/networking/device_drivers/freescale/gianfar.rst
 delete mode 100644 Documentation/networking/device_drivers/freescale/gianfar.txt

diff --git a/Documentation/networking/device_drivers/freescale/gianfar.rst b/Documentation/networking/device_drivers/freescale/gianfar.rst
new file mode 100644
index 000000000000..9c4a91d3824b
--- /dev/null
+++ b/Documentation/networking/device_drivers/freescale/gianfar.rst
@@ -0,0 +1,51 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================
+The Gianfar Ethernet Driver
+===========================
+
+:Author: Andy Fleming <afleming@freescale.com>
+:Updated: 2005-07-28
+
+
+Checksum Offloading
+===================
+
+The eTSEC controller (first included in parts from late 2005 like
+the 8548) has the ability to perform TCP, UDP, and IP checksums
+in hardware.  The Linux kernel only offloads the TCP and UDP
+checksums (and always performs the pseudo header checksums), so
+the driver only supports checksumming for TCP/IP and UDP/IP
+packets.  Use ethtool to enable or disable this feature for RX
+and TX.
+
+VLAN
+====
+
+In order to use VLAN, please consult Linux documentation on
+configuring VLANs.  The gianfar driver supports hardware insertion and
+extraction of VLAN headers, but not filtering.  Filtering will be
+done by the kernel.
+
+Multicasting
+============
+
+The gianfar driver supports using the group hash table on the
+TSEC (and the extended hash table on the eTSEC) for multicast
+filtering.  On the eTSEC, the exact-match MAC registers are used
+before the hash tables.  See Linux documentation on how to join
+multicast groups.
+
+Padding
+=======
+
+The gianfar driver supports padding received frames with 2 bytes
+to align the IP header to a 16-byte boundary, when supported by
+hardware.
+
+Ethtool
+=======
+
+The gianfar driver supports the use of ethtool for many
+configuration options.  You must run ethtool only on currently
+open interfaces.  See ethtool documentation for details.
diff --git a/Documentation/networking/device_drivers/freescale/gianfar.txt b/Documentation/networking/device_drivers/freescale/gianfar.txt
deleted file mode 100644
index ba1daea7f2e4..000000000000
--- a/Documentation/networking/device_drivers/freescale/gianfar.txt
+++ /dev/null
@@ -1,42 +0,0 @@
-The Gianfar Ethernet Driver
-
-Author: Andy Fleming <afleming@freescale.com>
-Updated: 2005-07-28
-
-
-CHECKSUM OFFLOADING
-
-The eTSEC controller (first included in parts from late 2005 like
-the 8548) has the ability to perform TCP, UDP, and IP checksums
-in hardware.  The Linux kernel only offloads the TCP and UDP
-checksums (and always performs the pseudo header checksums), so
-the driver only supports checksumming for TCP/IP and UDP/IP
-packets.  Use ethtool to enable or disable this feature for RX
-and TX.
-
-VLAN
-
-In order to use VLAN, please consult Linux documentation on
-configuring VLANs.  The gianfar driver supports hardware insertion and
-extraction of VLAN headers, but not filtering.  Filtering will be
-done by the kernel.
-
-MULTICASTING
-
-The gianfar driver supports using the group hash table on the
-TSEC (and the extended hash table on the eTSEC) for multicast
-filtering.  On the eTSEC, the exact-match MAC registers are used
-before the hash tables.  See Linux documentation on how to join
-multicast groups.
-
-PADDING
-
-The gianfar driver supports padding received frames with 2 bytes
-to align the IP header to a 16-byte boundary, when supported by
-hardware.
-
-ETHTOOL
-
-The gianfar driver supports the use of ethtool for many
-configuration options.  You must run ethtool only on currently
-open interfaces.  See ethtool documentation for details.
diff --git a/Documentation/networking/device_drivers/index.rst b/Documentation/networking/device_drivers/index.rst
index 7e59ee43c030..cec3415ee459 100644
--- a/Documentation/networking/device_drivers/index.rst
+++ b/Documentation/networking/device_drivers/index.rst
@@ -38,6 +38,7 @@ Contents:
    dec/dmfe
    dlink/dl2k
    freescale/dpaa
+   freescale/gianfar
 
 .. only::  subproject and html
 
-- 
cgit v1.2.3-59-g8ed1b


From cf7eba49b2b160f98106b33ca12039b05d812140 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 1 May 2020 16:44:46 +0200
Subject: docs: networking: device drivers: convert intel/ipw2100.txt to ReST

- add SPDX header;
- adjust titles and chapters, adding proper markups;
- comment out text-only TOC from html/pdf output;
- use copyright symbol;
- use :field: markup;
- mark code blocks and literals as such;
- mark tables as such;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/device_drivers/index.rst  |   1 +
 .../networking/device_drivers/intel/ipw2100.rst    | 323 +++++++++++++++++++++
 .../networking/device_drivers/intel/ipw2100.txt    | 293 -------------------
 MAINTAINERS                                        |   2 +-
 drivers/net/wireless/intel/ipw2x00/Kconfig         |   2 +-
 drivers/net/wireless/intel/ipw2x00/ipw2100.c       |   2 +-
 6 files changed, 327 insertions(+), 296 deletions(-)
 create mode 100644 Documentation/networking/device_drivers/intel/ipw2100.rst
 delete mode 100644 Documentation/networking/device_drivers/intel/ipw2100.txt

diff --git a/Documentation/networking/device_drivers/index.rst b/Documentation/networking/device_drivers/index.rst
index cec3415ee459..54ed10f3d1a7 100644
--- a/Documentation/networking/device_drivers/index.rst
+++ b/Documentation/networking/device_drivers/index.rst
@@ -39,6 +39,7 @@ Contents:
    dlink/dl2k
    freescale/dpaa
    freescale/gianfar
+   intel/ipw2100
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/device_drivers/intel/ipw2100.rst b/Documentation/networking/device_drivers/intel/ipw2100.rst
new file mode 100644
index 000000000000..d54ad522f937
--- /dev/null
+++ b/Documentation/networking/device_drivers/intel/ipw2100.rst
@@ -0,0 +1,323 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+===========================================
+Intel(R) PRO/Wireless 2100 Driver for Linux
+===========================================
+
+Support for:
+
+- Intel(R) PRO/Wireless 2100 Network Connection
+
+Copyright |copy| 2003-2006, Intel Corporation
+
+README.ipw2100
+
+:Version: git-1.1.5
+:Date:    January 25, 2006
+
+.. Index
+
+    0. IMPORTANT INFORMATION BEFORE USING THIS DRIVER
+    1. Introduction
+    2. Release git-1.1.5 Current Features
+    3. Command Line Parameters
+    4. Sysfs Helper Files
+    5. Radio Kill Switch
+    6. Dynamic Firmware
+    7. Power Management
+    8. Support
+    9. License
+
+
+0. IMPORTANT INFORMATION BEFORE USING THIS DRIVER
+=================================================
+
+Important Notice FOR ALL USERS OR DISTRIBUTORS!!!!
+
+Intel wireless LAN adapters are engineered, manufactured, tested, and
+quality checked to ensure that they meet all necessary local and
+governmental regulatory agency requirements for the regions that they
+are designated and/or marked to ship into. Since wireless LANs are
+generally unlicensed devices that share spectrum with radars,
+satellites, and other licensed and unlicensed devices, it is sometimes
+necessary to dynamically detect, avoid, and limit usage to avoid
+interference with these devices. In many instances Intel is required to
+provide test data to prove regional and local compliance to regional and
+governmental regulations before certification or approval to use the
+product is granted. Intel's wireless LAN's EEPROM, firmware, and
+software driver are designed to carefully control parameters that affect
+radio operation and to ensure electromagnetic compliance (EMC). These
+parameters include, without limitation, RF power, spectrum usage,
+channel scanning, and human exposure.
+
+For these reasons Intel cannot permit any manipulation by third parties
+of the software provided in binary format with the wireless WLAN
+adapters (e.g., the EEPROM and firmware). Furthermore, if you use any
+patches, utilities, or code with the Intel wireless LAN adapters that
+have been manipulated by an unauthorized party (i.e., patches,
+utilities, or code (including open source code modifications) which have
+not been validated by Intel), (i) you will be solely responsible for
+ensuring the regulatory compliance of the products, (ii) Intel will bear
+no liability, under any theory of liability for any issues associated
+with the modified products, including without limitation, claims under
+the warranty and/or issues arising from regulatory non-compliance, and
+(iii) Intel will not provide or be required to assist in providing
+support to any third parties for such modified products.
+
+Note: Many regulatory agencies consider Wireless LAN adapters to be
+modules, and accordingly, condition system-level regulatory approval
+upon receipt and review of test data documenting that the antennas and
+system configuration do not cause the EMC and radio operation to be
+non-compliant.
+
+The drivers available for download from SourceForge are provided as a
+part of a development project.  Conformance to local regulatory
+requirements is the responsibility of the individual developer.  As
+such, if you are interested in deploying or shipping a driver as part of
+solution intended to be used for purposes other than development, please
+obtain a tested driver from Intel Customer Support at:
+
+http://www.intel.com/support/wireless/sb/CS-006408.htm
+
+1. Introduction
+===============
+
+This document provides a brief overview of the features supported by the
+IPW2100 driver project.  The main project website, where the latest
+development version of the driver can be found, is:
+
+	http://ipw2100.sourceforge.net
+
+There you can find the not only the latest releases, but also information about
+potential fixes and patches, as well as links to the development mailing list
+for the driver project.
+
+
+2. Release git-1.1.5 Current Supported Features
+===============================================
+
+- Managed (BSS) and Ad-Hoc (IBSS)
+- WEP (shared key and open)
+- Wireless Tools support
+- 802.1x (tested with XSupplicant 1.0.1)
+
+Enabled (but not supported) features:
+- Monitor/RFMon mode
+- WPA/WPA2
+
+The distinction between officially supported and enabled is a reflection
+on the amount of validation and interoperability testing that has been
+performed on a given feature.
+
+
+3. Command Line Parameters
+==========================
+
+If the driver is built as a module, the following optional parameters are used
+by entering them on the command line with the modprobe command using this
+syntax::
+
+	modprobe ipw2100 [<option>=<VAL1><,VAL2>...]
+
+For example, to disable the radio on driver loading, enter:
+
+	modprobe ipw2100 disable=1
+
+The ipw2100 driver supports the following module parameters:
+
+=========	==============	============  ==============================
+Name		Value		Example       Meaning
+=========	==============	============  ==============================
+debug		0x0-0xffffffff	debug=1024    Debug level set to 1024
+mode		0,1,2		mode=1        AdHoc
+channel		int		channel=3     Only valid in AdHoc or Monitor
+associate	boolean		associate=0   Do NOT auto associate
+disable		boolean		disable=1     Do not power the HW
+=========	==============	============  ==============================
+
+
+4. Sysfs Helper Files
+=====================
+
+There are several ways to control the behavior of the driver.  Many of the
+general capabilities are exposed through the Wireless Tools (iwconfig).  There
+are a few capabilities that are exposed through entries in the Linux Sysfs.
+
+
+**Driver Level**
+
+For the driver level files, look in /sys/bus/pci/drivers/ipw2100/
+
+  debug_level
+	This controls the same global as the 'debug' module parameter.  For
+	information on the various debugging levels available, run the 'dvals'
+	script found in the driver source directory.
+
+	.. note::
+
+	      'debug_level' is only enabled if CONFIG_IPW2100_DEBUG is turn on.
+
+**Device Level**
+
+For the device level files look in::
+
+	/sys/bus/pci/drivers/ipw2100/{PCI-ID}/
+
+For example::
+
+	/sys/bus/pci/drivers/ipw2100/0000:02:01.0
+
+For the device level files, see /sys/bus/pci/drivers/ipw2100:
+
+  rf_kill
+	read
+
+	==  =========================================
+	0   RF kill not enabled (radio on)
+	1   SW based RF kill active (radio off)
+	2   HW based RF kill active (radio off)
+	3   Both HW and SW RF kill active (radio off)
+	==  =========================================
+
+	write
+
+	==  ==================================================
+	0   If SW based RF kill active, turn the radio back on
+	1   If radio is on, activate SW based RF kill
+	==  ==================================================
+
+	.. note::
+
+	   If you enable the SW based RF kill and then toggle the HW
+	   based RF kill from ON -> OFF -> ON, the radio will NOT come back on
+
+
+5. Radio Kill Switch
+====================
+
+Most laptops provide the ability for the user to physically disable the radio.
+Some vendors have implemented this as a physical switch that requires no
+software to turn the radio off and on.  On other laptops, however, the switch
+is controlled through a button being pressed and a software driver then making
+calls to turn the radio off and on.  This is referred to as a "software based
+RF kill switch"
+
+See the Sysfs helper file 'rf_kill' for determining the state of the RF switch
+on your system.
+
+
+6. Dynamic Firmware
+===================
+
+As the firmware is licensed under a restricted use license, it can not be
+included within the kernel sources.  To enable the IPW2100 you will need a
+firmware image to load into the wireless NIC's processors.
+
+You can obtain these images from <http://ipw2100.sf.net/firmware.php>.
+
+See INSTALL for instructions on installing the firmware.
+
+
+7. Power Management
+===================
+
+The IPW2100 supports the configuration of the Power Save Protocol
+through a private wireless extension interface.  The IPW2100 supports
+the following different modes:
+
+	===	===========================================================
+	off	No power management.  Radio is always on.
+	on	Automatic power management
+	1-5	Different levels of power management.  The higher the
+		number the greater the power savings, but with an impact to
+		packet latencies.
+	===	===========================================================
+
+Power management works by powering down the radio after a certain
+interval of time has passed where no packets are passed through the
+radio.  Once powered down, the radio remains in that state for a given
+period of time.  For higher power savings, the interval between last
+packet processed to sleep is shorter and the sleep period is longer.
+
+When the radio is asleep, the access point sending data to the station
+must buffer packets at the AP until the station wakes up and requests
+any buffered packets.  If you have an AP that does not correctly support
+the PSP protocol you may experience packet loss or very poor performance
+while power management is enabled.  If this is the case, you will need
+to try and find a firmware update for your AP, or disable power
+management (via ``iwconfig eth1 power off``)
+
+To configure the power level on the IPW2100 you use a combination of
+iwconfig and iwpriv.  iwconfig is used to turn power management on, off,
+and set it to auto.
+
+	=========================  ====================================
+	iwconfig eth1 power off    Disables radio power down
+	iwconfig eth1 power on     Enables radio power management to
+				   last set level (defaults to AUTO)
+	iwpriv eth1 set_power 0    Sets power level to AUTO and enables
+				   power management if not previously
+				   enabled.
+	iwpriv eth1 set_power 1-5  Set the power level as specified,
+				   enabling power management if not
+				   previously enabled.
+	=========================  ====================================
+
+You can view the current power level setting via::
+
+	iwpriv eth1 get_power
+
+It will return the current period or timeout that is configured as a string
+in the form of xxxx/yyyy (z) where xxxx is the timeout interval (amount of
+time after packet processing), yyyy is the period to sleep (amount of time to
+wait before powering the radio and querying the access point for buffered
+packets), and z is the 'power level'.  If power management is turned off the
+xxxx/yyyy will be replaced with 'off' -- the level reported will be the active
+level if `iwconfig eth1 power on` is invoked.
+
+
+8. Support
+==========
+
+For general development information and support,
+go to:
+
+    http://ipw2100.sf.net/
+
+The ipw2100 1.1.0 driver and firmware can be downloaded from:
+
+    http://support.intel.com
+
+For installation support on the ipw2100 1.1.0 driver on Linux kernels
+2.6.8 or greater, email support is available from:
+
+    http://supportmail.intel.com
+
+9. License
+==========
+
+  Copyright |copy| 2003 - 2006 Intel Corporation. All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms of the GNU General Public License (version 2) as
+  published by the Free Software Foundation.
+
+  This program is distributed in the hope that it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc., 59
+  Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+
+  The full GNU General Public License is included in this distribution in the
+  file called LICENSE.
+
+  License Contact Information:
+
+  James P. Ketrenos <ipw2100-admin@linux.intel.com>
+
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
diff --git a/Documentation/networking/device_drivers/intel/ipw2100.txt b/Documentation/networking/device_drivers/intel/ipw2100.txt
deleted file mode 100644
index 6f85e1d06031..000000000000
--- a/Documentation/networking/device_drivers/intel/ipw2100.txt
+++ /dev/null
@@ -1,293 +0,0 @@
-
-Intel(R) PRO/Wireless 2100 Driver for Linux in support of:
-
-Intel(R) PRO/Wireless 2100 Network Connection
-
-Copyright (C) 2003-2006, Intel Corporation
-
-README.ipw2100
-
-Version: git-1.1.5
-Date   : January 25, 2006
-
-Index
------------------------------------------------
-0. IMPORTANT INFORMATION BEFORE USING THIS DRIVER
-1. Introduction
-2. Release git-1.1.5 Current Features
-3. Command Line Parameters
-4. Sysfs Helper Files
-5. Radio Kill Switch
-6. Dynamic Firmware
-7. Power Management
-8. Support
-9. License
-
-
-0.   IMPORTANT INFORMATION BEFORE USING THIS DRIVER
------------------------------------------------
-
-Important Notice FOR ALL USERS OR DISTRIBUTORS!!!!
-
-Intel wireless LAN adapters are engineered, manufactured, tested, and
-quality checked to ensure that they meet all necessary local and
-governmental regulatory agency requirements for the regions that they
-are designated and/or marked to ship into. Since wireless LANs are
-generally unlicensed devices that share spectrum with radars,
-satellites, and other licensed and unlicensed devices, it is sometimes
-necessary to dynamically detect, avoid, and limit usage to avoid
-interference with these devices. In many instances Intel is required to
-provide test data to prove regional and local compliance to regional and
-governmental regulations before certification or approval to use the
-product is granted. Intel's wireless LAN's EEPROM, firmware, and
-software driver are designed to carefully control parameters that affect
-radio operation and to ensure electromagnetic compliance (EMC). These
-parameters include, without limitation, RF power, spectrum usage,
-channel scanning, and human exposure.
-
-For these reasons Intel cannot permit any manipulation by third parties
-of the software provided in binary format with the wireless WLAN
-adapters (e.g., the EEPROM and firmware). Furthermore, if you use any
-patches, utilities, or code with the Intel wireless LAN adapters that
-have been manipulated by an unauthorized party (i.e., patches,
-utilities, or code (including open source code modifications) which have
-not been validated by Intel), (i) you will be solely responsible for
-ensuring the regulatory compliance of the products, (ii) Intel will bear
-no liability, under any theory of liability for any issues associated
-with the modified products, including without limitation, claims under
-the warranty and/or issues arising from regulatory non-compliance, and
-(iii) Intel will not provide or be required to assist in providing
-support to any third parties for such modified products.
-
-Note: Many regulatory agencies consider Wireless LAN adapters to be
-modules, and accordingly, condition system-level regulatory approval
-upon receipt and review of test data documenting that the antennas and
-system configuration do not cause the EMC and radio operation to be
-non-compliant.
-
-The drivers available for download from SourceForge are provided as a
-part of a development project.  Conformance to local regulatory
-requirements is the responsibility of the individual developer.  As
-such, if you are interested in deploying or shipping a driver as part of
-solution intended to be used for purposes other than development, please
-obtain a tested driver from Intel Customer Support at:
-
-http://www.intel.com/support/wireless/sb/CS-006408.htm
-
-1. Introduction
------------------------------------------------
-
-This document provides a brief overview of the features supported by the 
-IPW2100 driver project.  The main project website, where the latest 
-development version of the driver can be found, is:
-
-	http://ipw2100.sourceforge.net
-
-There you can find the not only the latest releases, but also information about
-potential fixes and patches, as well as links to the development mailing list
-for the driver project.
-
-
-2. Release git-1.1.5 Current Supported Features
------------------------------------------------
-- Managed (BSS) and Ad-Hoc (IBSS)
-- WEP (shared key and open)
-- Wireless Tools support 
-- 802.1x (tested with XSupplicant 1.0.1)
-
-Enabled (but not supported) features:
-- Monitor/RFMon mode
-- WPA/WPA2
-
-The distinction between officially supported and enabled is a reflection
-on the amount of validation and interoperability testing that has been
-performed on a given feature.
-
-
-3. Command Line Parameters
------------------------------------------------
-
-If the driver is built as a module, the following optional parameters are used
-by entering them on the command line with the modprobe command using this
-syntax:
-
-	modprobe ipw2100 [<option>=<VAL1><,VAL2>...]
-
-For example, to disable the radio on driver loading, enter:
-
-	modprobe ipw2100 disable=1
-
-The ipw2100 driver supports the following module parameters:
-
-Name		Value		Example:
-debug		0x0-0xffffffff	debug=1024
-mode		0,1,2		mode=1   /* AdHoc */
-channel		int		channel=3 /* Only valid in AdHoc or Monitor */
-associate	boolean		associate=0 /* Do NOT auto associate */
-disable		boolean		disable=1 /* Do not power the HW */
-
-
-4. Sysfs Helper Files
----------------------------     
------------------------------------------------
-
-There are several ways to control the behavior of the driver.  Many of the 
-general capabilities are exposed through the Wireless Tools (iwconfig).  There
-are a few capabilities that are exposed through entries in the Linux Sysfs.
-
-
------ Driver Level ------
-For the driver level files, look in /sys/bus/pci/drivers/ipw2100/
-
-  debug_level  
-	
-	This controls the same global as the 'debug' module parameter.  For 
-        information on the various debugging levels available, run the 'dvals'
-	script found in the driver source directory.
-
-	NOTE:  'debug_level' is only enabled if CONFIG_IPW2100_DEBUG is turn
-	       on.
-
------ Device Level ------
-For the device level files look in
-	
-	/sys/bus/pci/drivers/ipw2100/{PCI-ID}/
-
-For example:
-	/sys/bus/pci/drivers/ipw2100/0000:02:01.0
-
-For the device level files, see /sys/bus/pci/drivers/ipw2100:
-
-  rf_kill
-	read - 
-	0 = RF kill not enabled (radio on)
-	1 = SW based RF kill active (radio off)
-	2 = HW based RF kill active (radio off)
-	3 = Both HW and SW RF kill active (radio off)
-	write -
-	0 = If SW based RF kill active, turn the radio back on
-	1 = If radio is on, activate SW based RF kill
-
-	NOTE: If you enable the SW based RF kill and then toggle the HW
-  	based RF kill from ON -> OFF -> ON, the radio will NOT come back on
-
-
-5. Radio Kill Switch
------------------------------------------------
-Most laptops provide the ability for the user to physically disable the radio.
-Some vendors have implemented this as a physical switch that requires no
-software to turn the radio off and on.  On other laptops, however, the switch
-is controlled through a button being pressed and a software driver then making
-calls to turn the radio off and on.  This is referred to as a "software based
-RF kill switch"
-
-See the Sysfs helper file 'rf_kill' for determining the state of the RF switch
-on your system.
-
-
-6. Dynamic Firmware
------------------------------------------------
-As the firmware is licensed under a restricted use license, it can not be 
-included within the kernel sources.  To enable the IPW2100 you will need a 
-firmware image to load into the wireless NIC's processors.
-
-You can obtain these images from <http://ipw2100.sf.net/firmware.php>.
-
-See INSTALL for instructions on installing the firmware.
-
-
-7. Power Management
------------------------------------------------
-The IPW2100 supports the configuration of the Power Save Protocol 
-through a private wireless extension interface.  The IPW2100 supports 
-the following different modes:
-
-	off	No power management.  Radio is always on.
-	on	Automatic power management
-	1-5	Different levels of power management.  The higher the 
-		number the greater the power savings, but with an impact to 
-		packet latencies. 
-
-Power management works by powering down the radio after a certain 
-interval of time has passed where no packets are passed through the 
-radio.  Once powered down, the radio remains in that state for a given 
-period of time.  For higher power savings, the interval between last 
-packet processed to sleep is shorter and the sleep period is longer.
-
-When the radio is asleep, the access point sending data to the station 
-must buffer packets at the AP until the station wakes up and requests 
-any buffered packets.  If you have an AP that does not correctly support 
-the PSP protocol you may experience packet loss or very poor performance 
-while power management is enabled.  If this is the case, you will need 
-to try and find a firmware update for your AP, or disable power 
-management (via `iwconfig eth1 power off`)
-
-To configure the power level on the IPW2100 you use a combination of 
-iwconfig and iwpriv.  iwconfig is used to turn power management on, off, 
-and set it to auto.
-
-	iwconfig eth1 power off    Disables radio power down
-	iwconfig eth1 power on     Enables radio power management to 
-				   last set level (defaults to AUTO)
-	iwpriv eth1 set_power 0    Sets power level to AUTO and enables 
-				   power management if not previously 
-				   enabled.
-	iwpriv eth1 set_power 1-5  Set the power level as specified, 
-				   enabling power management if not 
-				   previously enabled.
-
-You can view the current power level setting via:
-	
-	iwpriv eth1 get_power
-
-It will return the current period or timeout that is configured as a string
-in the form of xxxx/yyyy (z) where xxxx is the timeout interval (amount of
-time after packet processing), yyyy is the period to sleep (amount of time to 
-wait before powering the radio and querying the access point for buffered
-packets), and z is the 'power level'.  If power management is turned off the
-xxxx/yyyy will be replaced with 'off' -- the level reported will be the active
-level if `iwconfig eth1 power on` is invoked.
-
-
-8. Support
------------------------------------------------
-
-For general development information and support,
-go to:
-	
-    http://ipw2100.sf.net/
-
-The ipw2100 1.1.0 driver and firmware can be downloaded from:  
-
-    http://support.intel.com
-
-For installation support on the ipw2100 1.1.0 driver on Linux kernels 
-2.6.8 or greater, email support is available from:  
-
-    http://supportmail.intel.com
-
-9. License
------------------------------------------------
-
-  Copyright(c) 2003 - 2006 Intel Corporation. All rights reserved.
-
-  This program is free software; you can redistribute it and/or modify it 
-  under the terms of the GNU General Public License (version 2) as 
-  published by the Free Software Foundation.
-  
-  This program is distributed in the hope that it will be useful, but WITHOUT 
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for 
-  more details.
-  
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc., 59 
-  Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-  
-  The full GNU General Public License is included in this distribution in the
-  file called LICENSE.
-  
-  License Contact Information:
-  James P. Ketrenos <ipw2100-admin@linux.intel.com>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
diff --git a/MAINTAINERS b/MAINTAINERS
index f0b18c156176..887c4e7e6102 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8742,7 +8742,7 @@ INTEL PRO/WIRELESS 2100, 2200BG, 2915ABG NETWORK CONNECTION SUPPORT
 M:	Stanislav Yakovlev <stas.yakovlev@gmail.com>
 L:	linux-wireless@vger.kernel.org
 S:	Maintained
-F:	Documentation/networking/device_drivers/intel/ipw2100.txt
+F:	Documentation/networking/device_drivers/intel/ipw2100.rst
 F:	Documentation/networking/device_drivers/intel/ipw2200.txt
 F:	drivers/net/wireless/intel/ipw2x00/
 
diff --git a/drivers/net/wireless/intel/ipw2x00/Kconfig b/drivers/net/wireless/intel/ipw2x00/Kconfig
index ab17903ba9f8..b0b3cd6296f3 100644
--- a/drivers/net/wireless/intel/ipw2x00/Kconfig
+++ b/drivers/net/wireless/intel/ipw2x00/Kconfig
@@ -16,7 +16,7 @@ config IPW2100
 	  A driver for the Intel PRO/Wireless 2100 Network
 	  Connection 802.11b wireless network adapter.
 
-	  See <file:Documentation/networking/device_drivers/intel/ipw2100.txt>
+	  See <file:Documentation/networking/device_drivers/intel/ipw2100.rst>
 	  for information on the capabilities currently enabled in this driver
 	  and for tips for debugging issues and problems.
 
diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2100.c b/drivers/net/wireless/intel/ipw2x00/ipw2100.c
index 97ea6e2035e6..624fe721e2b5 100644
--- a/drivers/net/wireless/intel/ipw2x00/ipw2100.c
+++ b/drivers/net/wireless/intel/ipw2x00/ipw2100.c
@@ -8352,7 +8352,7 @@ static int ipw2100_mod_firmware_load(struct ipw2100_fw *fw)
 	if (IPW2100_FW_MAJOR(h->version) != IPW2100_FW_MAJOR_VERSION) {
 		printk(KERN_WARNING DRV_NAME ": Firmware image not compatible "
 		       "(detected version id of %u). "
-		       "See Documentation/networking/device_drivers/intel/ipw2100.txt\n",
+		       "See Documentation/networking/device_drivers/intel/ipw2100.rst\n",
 		       h->version);
 		return 1;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From c81f195703270a330f04ae41b9890b13c101a63f Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 1 May 2020 16:44:47 +0200
Subject: docs: networking: device drivers: convert intel/ipw2200.txt to ReST

- add SPDX header;
- adjust titles and chapters, adding proper markups;
- comment out text-only TOC from html/pdf output;
- use copyright symbol;
- use :field: markup;
- mark code blocks and literals as such;
- mark tables as such;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/device_drivers/index.rst  |   1 +
 .../networking/device_drivers/intel/ipw2200.rst    | 526 +++++++++++++++++++++
 .../networking/device_drivers/intel/ipw2200.txt    | 472 ------------------
 MAINTAINERS                                        |   2 +-
 drivers/net/wireless/intel/ipw2x00/Kconfig         |   2 +-
 5 files changed, 529 insertions(+), 474 deletions(-)
 create mode 100644 Documentation/networking/device_drivers/intel/ipw2200.rst
 delete mode 100644 Documentation/networking/device_drivers/intel/ipw2200.txt

diff --git a/Documentation/networking/device_drivers/index.rst b/Documentation/networking/device_drivers/index.rst
index 54ed10f3d1a7..f9ce0089ec7d 100644
--- a/Documentation/networking/device_drivers/index.rst
+++ b/Documentation/networking/device_drivers/index.rst
@@ -40,6 +40,7 @@ Contents:
    freescale/dpaa
    freescale/gianfar
    intel/ipw2100
+   intel/ipw2200
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/device_drivers/intel/ipw2200.rst b/Documentation/networking/device_drivers/intel/ipw2200.rst
new file mode 100644
index 000000000000..0cb42d2fd7e5
--- /dev/null
+++ b/Documentation/networking/device_drivers/intel/ipw2200.rst
@@ -0,0 +1,526 @@
+.. SPDX-License-Identifier: GPL-2.0
+.. include:: <isonum.txt>
+
+==============================================
+Intel(R) PRO/Wireless 2915ABG Driver for Linux
+==============================================
+
+
+Support for:
+
+- Intel(R) PRO/Wireless 2200BG Network Connection
+- Intel(R) PRO/Wireless 2915ABG Network Connection
+
+Note: The Intel(R) PRO/Wireless 2915ABG Driver for Linux and Intel(R)
+PRO/Wireless 2200BG Driver for Linux is a unified driver that works on
+both hardware adapters listed above. In this document the Intel(R)
+PRO/Wireless 2915ABG Driver for Linux will be used to reference the
+unified driver.
+
+Copyright |copy| 2004-2006, Intel Corporation
+
+README.ipw2200
+
+:Version: 1.1.2
+:Date: March 30, 2006
+
+
+.. Index
+
+    0.   IMPORTANT INFORMATION BEFORE USING THIS DRIVER
+    1.   Introduction
+    1.1. Overview of features
+    1.2. Module parameters
+    1.3. Wireless Extension Private Methods
+    1.4. Sysfs Helper Files
+    1.5. Supported channels
+    2.   Ad-Hoc Networking
+    3.   Interacting with Wireless Tools
+    3.1. iwconfig mode
+    3.2. iwconfig sens
+    4.   About the Version Numbers
+    5.   Firmware installation
+    6.   Support
+    7.   License
+
+
+0. IMPORTANT INFORMATION BEFORE USING THIS DRIVER
+=================================================
+
+Important Notice FOR ALL USERS OR DISTRIBUTORS!!!!
+
+Intel wireless LAN adapters are engineered, manufactured, tested, and
+quality checked to ensure that they meet all necessary local and
+governmental regulatory agency requirements for the regions that they
+are designated and/or marked to ship into. Since wireless LANs are
+generally unlicensed devices that share spectrum with radars,
+satellites, and other licensed and unlicensed devices, it is sometimes
+necessary to dynamically detect, avoid, and limit usage to avoid
+interference with these devices. In many instances Intel is required to
+provide test data to prove regional and local compliance to regional and
+governmental regulations before certification or approval to use the
+product is granted. Intel's wireless LAN's EEPROM, firmware, and
+software driver are designed to carefully control parameters that affect
+radio operation and to ensure electromagnetic compliance (EMC). These
+parameters include, without limitation, RF power, spectrum usage,
+channel scanning, and human exposure.
+
+For these reasons Intel cannot permit any manipulation by third parties
+of the software provided in binary format with the wireless WLAN
+adapters (e.g., the EEPROM and firmware). Furthermore, if you use any
+patches, utilities, or code with the Intel wireless LAN adapters that
+have been manipulated by an unauthorized party (i.e., patches,
+utilities, or code (including open source code modifications) which have
+not been validated by Intel), (i) you will be solely responsible for
+ensuring the regulatory compliance of the products, (ii) Intel will bear
+no liability, under any theory of liability for any issues associated
+with the modified products, including without limitation, claims under
+the warranty and/or issues arising from regulatory non-compliance, and
+(iii) Intel will not provide or be required to assist in providing
+support to any third parties for such modified products.
+
+Note: Many regulatory agencies consider Wireless LAN adapters to be
+modules, and accordingly, condition system-level regulatory approval
+upon receipt and review of test data documenting that the antennas and
+system configuration do not cause the EMC and radio operation to be
+non-compliant.
+
+The drivers available for download from SourceForge are provided as a
+part of a development project.  Conformance to local regulatory
+requirements is the responsibility of the individual developer.  As
+such, if you are interested in deploying or shipping a driver as part of
+solution intended to be used for purposes other than development, please
+obtain a tested driver from Intel Customer Support at:
+
+http://support.intel.com
+
+
+1. Introduction
+===============
+
+The following sections attempt to provide a brief introduction to using
+the Intel(R) PRO/Wireless 2915ABG Driver for Linux.
+
+This document is not meant to be a comprehensive manual on
+understanding or using wireless technologies, but should be sufficient
+to get you moving without wires on Linux.
+
+For information on building and installing the driver, see the INSTALL
+file.
+
+
+1.1. Overview of Features
+-------------------------
+The current release (1.1.2) supports the following features:
+
++ BSS mode (Infrastructure, Managed)
++ IBSS mode (Ad-Hoc)
++ WEP (OPEN and SHARED KEY mode)
++ 802.1x EAP via wpa_supplicant and xsupplicant
++ Wireless Extension support
++ Full B and G rate support (2200 and 2915)
++ Full A rate support (2915 only)
++ Transmit power control
++ S state support (ACPI suspend/resume)
+
+The following features are currently enabled, but not officially
+supported:
+
++ WPA
++ long/short preamble support
++ Monitor mode (aka RFMon)
+
+The distinction between officially supported and enabled is a reflection
+on the amount of validation and interoperability testing that has been
+performed on a given feature.
+
+
+
+1.2. Command Line Parameters
+----------------------------
+
+Like many modules used in the Linux kernel, the Intel(R) PRO/Wireless
+2915ABG Driver for Linux allows configuration options to be provided
+as module parameters.  The most common way to specify a module parameter
+is via the command line.
+
+The general form is::
+
+    % modprobe ipw2200 parameter=value
+
+Where the supported parameter are:
+
+  associate
+	Set to 0 to disable the auto scan-and-associate functionality of the
+	driver.  If disabled, the driver will not attempt to scan
+	for and associate to a network until it has been configured with
+	one or more properties for the target network, for example configuring
+	the network SSID.  Default is 0 (do not auto-associate)
+
+	Example: % modprobe ipw2200 associate=0
+
+  auto_create
+	Set to 0 to disable the auto creation of an Ad-Hoc network
+	matching the channel and network name parameters provided.
+	Default is 1.
+
+  channel
+	channel number for association.  The normal method for setting
+	the channel would be to use the standard wireless tools
+	(i.e. `iwconfig eth1 channel 10`), but it is useful sometimes
+	to set this while debugging.  Channel 0 means 'ANY'
+
+  debug
+	If using a debug build, this is used to control the amount of debug
+	info is logged.  See the 'dvals' and 'load' script for more info on
+	how to use this (the dvals and load scripts are provided as part
+	of the ipw2200 development snapshot releases available from the
+	SourceForge project at http://ipw2200.sf.net)
+
+  led
+	Can be used to turn on experimental LED code.
+	0 = Off, 1 = On.  Default is 1.
+
+  mode
+	Can be used to set the default mode of the adapter.
+	0 = Managed, 1 = Ad-Hoc, 2 = Monitor
+
+
+1.3. Wireless Extension Private Methods
+---------------------------------------
+
+As an interface designed to handle generic hardware, there are certain
+capabilities not exposed through the normal Wireless Tool interface.  As
+such, a provision is provided for a driver to declare custom, or
+private, methods.  The Intel(R) PRO/Wireless 2915ABG Driver for Linux
+defines several of these to configure various settings.
+
+The general form of using the private wireless methods is::
+
+	% iwpriv $IFNAME method parameters
+
+Where $IFNAME is the interface name the device is registered with
+(typically eth1, customized via one of the various network interface
+name managers, such as ifrename)
+
+The supported private methods are:
+
+  get_mode
+	Can be used to report out which IEEE mode the driver is
+	configured to support.  Example:
+
+	% iwpriv eth1 get_mode
+	eth1	get_mode:802.11bg (6)
+
+  set_mode
+	Can be used to configure which IEEE mode the driver will
+	support.
+
+	Usage::
+
+	    % iwpriv eth1 set_mode {mode}
+
+	Where {mode} is a number in the range 1-7:
+
+	==	=====================
+	1	802.11a (2915 only)
+	2	802.11b
+	3	802.11ab (2915 only)
+	4	802.11g
+	5	802.11ag (2915 only)
+	6	802.11bg
+	7	802.11abg (2915 only)
+	==	=====================
+
+  get_preamble
+	Can be used to report configuration of preamble length.
+
+  set_preamble
+	Can be used to set the configuration of preamble length:
+
+	Usage::
+
+	    % iwpriv eth1 set_preamble {mode}
+
+	Where {mode} is one of:
+
+	==	========================================
+	1	Long preamble only
+	0	Auto (long or short based on connection)
+	==	========================================
+
+
+1.4. Sysfs Helper Files
+-----------------------
+
+The Linux kernel provides a pseudo file system that can be used to
+access various components of the operating system.  The Intel(R)
+PRO/Wireless 2915ABG Driver for Linux exposes several configuration
+parameters through this mechanism.
+
+An entry in the sysfs can support reading and/or writing.  You can
+typically query the contents of a sysfs entry through the use of cat,
+and can set the contents via echo.  For example::
+
+    % cat /sys/bus/pci/drivers/ipw2200/debug_level
+
+Will report the current debug level of the driver's logging subsystem
+(only available if CONFIG_IPW2200_DEBUG was configured when the driver
+was built).
+
+You can set the debug level via::
+
+    % echo $VALUE > /sys/bus/pci/drivers/ipw2200/debug_level
+
+Where $VALUE would be a number in the case of this sysfs entry.  The
+input to sysfs files does not have to be a number.  For example, the
+firmware loader used by hotplug utilizes sysfs entries for transferring
+the firmware image from user space into the driver.
+
+The Intel(R) PRO/Wireless 2915ABG Driver for Linux exposes sysfs entries
+at two levels -- driver level, which apply to all instances of the driver
+(in the event that there are more than one device installed) and device
+level, which applies only to the single specific instance.
+
+
+1.4.1 Driver Level Sysfs Helper Files
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+For the driver level files, look in /sys/bus/pci/drivers/ipw2200/
+
+  debug_level
+	This controls the same global as the 'debug' module parameter
+
+
+
+1.4.2 Device Level Sysfs Helper Files
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
+
+For the device level files, look in::
+
+	/sys/bus/pci/drivers/ipw2200/{PCI-ID}/
+
+For example:::
+
+	/sys/bus/pci/drivers/ipw2200/0000:02:01.0
+
+For the device level files, see /sys/bus/pci/drivers/ipw2200:
+
+  rf_kill
+	read -
+
+	==  =========================================
+	0   RF kill not enabled (radio on)
+	1   SW based RF kill active (radio off)
+	2   HW based RF kill active (radio off)
+	3   Both HW and SW RF kill active (radio off)
+	==  =========================================
+
+	write -
+
+	==  ==================================================
+	0   If SW based RF kill active, turn the radio back on
+	1   If radio is on, activate SW based RF kill
+	==  ==================================================
+
+	.. note::
+
+	   If you enable the SW based RF kill and then toggle the HW
+	   based RF kill from ON -> OFF -> ON, the radio will NOT come back on
+
+  ucode
+	read-only access to the ucode version number
+
+  led
+	read -
+
+	==  =================
+	0   LED code disabled
+	1   LED code enabled
+	==  =================
+
+	write -
+
+	==  ================
+	0   Disable LED code
+	1   Enable LED code
+	==  ================
+
+
+	.. note::
+
+	   The LED code has been reported to hang some systems when
+	   running ifconfig and is therefore disabled by default.
+
+
+1.5. Supported channels
+-----------------------
+
+Upon loading the Intel(R) PRO/Wireless 2915ABG Driver for Linux, a
+message stating the detected geography code and the number of 802.11
+channels supported by the card will be displayed in the log.
+
+The geography code corresponds to a regulatory domain as shown in the
+table below.
+
+	+------+----------------------------+--------------------+
+	|      |			    | Supported channels |
+	| Code |        Geography	    +----------+---------+
+	|      |			    | 802.11bg | 802.11a |
+	+======+============================+==========+=========+
+	| ---  | Restricted 		    |  11      |   0     |
+	+------+----------------------------+----------+---------+
+	| ZZF  | Custom US/Canada 	    |  11      |   8     |
+	+------+----------------------------+----------+---------+
+	| ZZD  | Rest of World 		    |  13      |   0     |
+	+------+----------------------------+----------+---------+
+	| ZZA  | Custom USA & Europe & High |  11      |  13     |
+	+------+----------------------------+----------+---------+
+	| ZZB  | Custom NA & Europe	    |  11      |  13     |
+	+------+----------------------------+----------+---------+
+	| ZZC  | Custom Japan 		    |  11      |   4     |
+	+------+----------------------------+----------+---------+
+	| ZZM  | Custom  		    |  11      |   0     |
+	+------+----------------------------+----------+---------+
+	| ZZE  | Europe 		    |  13      |  19     |
+	+------+----------------------------+----------+---------+
+	| ZZJ  | Custom Japan 		    |  14      |   4     |
+	+------+----------------------------+----------+---------+
+	| ZZR  | Rest of World		    |  14      |   0     |
+	+------+----------------------------+----------+---------+
+	| ZZH  | High Band		    |  13      |   4     |
+	+------+----------------------------+----------+---------+
+	| ZZG  | Custom Europe		    |  13      |   4     |
+	+------+----------------------------+----------+---------+
+	| ZZK  | Europe 		    |  13      |  24     |
+	+------+----------------------------+----------+---------+
+	| ZZL  | Europe 		    |  11      |  13     |
+	+------+----------------------------+----------+---------+
+
+2.  Ad-Hoc Networking
+=====================
+
+When using a device in an Ad-Hoc network, it is useful to understand the
+sequence and requirements for the driver to be able to create, join, or
+merge networks.
+
+The following attempts to provide enough information so that you can
+have a consistent experience while using the driver as a member of an
+Ad-Hoc network.
+
+2.1. Joining an Ad-Hoc Network
+------------------------------
+
+The easiest way to get onto an Ad-Hoc network is to join one that
+already exists.
+
+2.2. Creating an Ad-Hoc Network
+-------------------------------
+
+An Ad-Hoc networks is created using the syntax of the Wireless tool.
+
+For Example:
+iwconfig eth1 mode ad-hoc essid testing channel 2
+
+2.3. Merging Ad-Hoc Networks
+----------------------------
+
+
+3. Interaction with Wireless Tools
+==================================
+
+3.1 iwconfig mode
+-----------------
+
+When configuring the mode of the adapter, all run-time configured parameters
+are reset to the value used when the module was loaded.  This includes
+channels, rates, ESSID, etc.
+
+3.2 iwconfig sens
+-----------------
+
+The 'iwconfig ethX sens XX' command will not set the signal sensitivity
+threshold, as described in iwconfig documentation, but rather the number
+of consecutive missed beacons that will trigger handover, i.e. roaming
+to another access point. At the same time, it will set the disassociation
+threshold to 3 times the given value.
+
+
+4.  About the Version Numbers
+=============================
+
+Due to the nature of open source development projects, there are
+frequently changes being incorporated that have not gone through
+a complete validation process.  These changes are incorporated into
+development snapshot releases.
+
+Releases are numbered with a three level scheme:
+
+	major.minor.development
+
+Any version where the 'development' portion is 0 (for example
+1.0.0, 1.1.0, etc.) indicates a stable version that will be made
+available for kernel inclusion.
+
+Any version where the 'development' portion is not a 0 (for
+example 1.0.1, 1.1.5, etc.) indicates a development version that is
+being made available for testing and cutting edge users.  The stability
+and functionality of the development releases are not know.  We make
+efforts to try and keep all snapshots reasonably stable, but due to the
+frequency of their release, and the desire to get those releases
+available as quickly as possible, unknown anomalies should be expected.
+
+The major version number will be incremented when significant changes
+are made to the driver.  Currently, there are no major changes planned.
+
+5. Firmware installation
+========================
+
+The driver requires a firmware image, download it and extract the
+files under /lib/firmware (or wherever your hotplug's firmware.agent
+will look for firmware files)
+
+The firmware can be downloaded from the following URL:
+
+    http://ipw2200.sf.net/
+
+
+6. Support
+==========
+
+For direct support of the 1.0.0 version, you can contact
+http://supportmail.intel.com, or you can use the open source project
+support.
+
+For general information and support, go to:
+
+    http://ipw2200.sf.net/
+
+
+7. License
+==========
+
+  Copyright |copy| 2003 - 2006 Intel Corporation. All rights reserved.
+
+  This program is free software; you can redistribute it and/or modify it
+  under the terms of the GNU General Public License version 2 as
+  published by the Free Software Foundation.
+
+  This program is distributed in the hope that it will be useful, but WITHOUT
+  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or
+  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for
+  more details.
+
+  You should have received a copy of the GNU General Public License along with
+  this program; if not, write to the Free Software Foundation, Inc., 59
+  Temple Place - Suite 330, Boston, MA  02111-1307, USA.
+
+  The full GNU General Public License is included in this distribution in the
+  file called LICENSE.
+
+  Contact Information:
+
+  James P. Ketrenos <ipw2100-admin@linux.intel.com>
+
+  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
+
diff --git a/Documentation/networking/device_drivers/intel/ipw2200.txt b/Documentation/networking/device_drivers/intel/ipw2200.txt
deleted file mode 100644
index b7658bed4906..000000000000
--- a/Documentation/networking/device_drivers/intel/ipw2200.txt
+++ /dev/null
@@ -1,472 +0,0 @@
-
-Intel(R) PRO/Wireless 2915ABG Driver for Linux in support of:
-
-Intel(R) PRO/Wireless 2200BG Network Connection
-Intel(R) PRO/Wireless 2915ABG Network Connection
-
-Note: The Intel(R) PRO/Wireless 2915ABG Driver for Linux and Intel(R)
-PRO/Wireless 2200BG Driver for Linux is a unified driver that works on
-both hardware adapters listed above. In this document the Intel(R)
-PRO/Wireless 2915ABG Driver for Linux will be used to reference the
-unified driver.
-
-Copyright (C) 2004-2006, Intel Corporation
-
-README.ipw2200
-
-Version: 1.1.2
-Date   : March 30, 2006
-
-
-Index
------------------------------------------------
-0.   IMPORTANT INFORMATION BEFORE USING THIS DRIVER
-1.   Introduction
-1.1. Overview of features
-1.2. Module parameters
-1.3. Wireless Extension Private Methods
-1.4. Sysfs Helper Files
-1.5. Supported channels
-2.   Ad-Hoc Networking
-3.   Interacting with Wireless Tools
-3.1. iwconfig mode
-3.2. iwconfig sens
-4.   About the Version Numbers
-5.   Firmware installation
-6.   Support
-7.   License
-
-
-0.   IMPORTANT INFORMATION BEFORE USING THIS DRIVER
------------------------------------------------
-
-Important Notice FOR ALL USERS OR DISTRIBUTORS!!!! 
-
-Intel wireless LAN adapters are engineered, manufactured, tested, and
-quality checked to ensure that they meet all necessary local and
-governmental regulatory agency requirements for the regions that they
-are designated and/or marked to ship into. Since wireless LANs are
-generally unlicensed devices that share spectrum with radars,
-satellites, and other licensed and unlicensed devices, it is sometimes
-necessary to dynamically detect, avoid, and limit usage to avoid
-interference with these devices. In many instances Intel is required to
-provide test data to prove regional and local compliance to regional and
-governmental regulations before certification or approval to use the
-product is granted. Intel's wireless LAN's EEPROM, firmware, and
-software driver are designed to carefully control parameters that affect
-radio operation and to ensure electromagnetic compliance (EMC). These
-parameters include, without limitation, RF power, spectrum usage,
-channel scanning, and human exposure. 
-
-For these reasons Intel cannot permit any manipulation by third parties
-of the software provided in binary format with the wireless WLAN
-adapters (e.g., the EEPROM and firmware). Furthermore, if you use any
-patches, utilities, or code with the Intel wireless LAN adapters that
-have been manipulated by an unauthorized party (i.e., patches,
-utilities, or code (including open source code modifications) which have
-not been validated by Intel), (i) you will be solely responsible for
-ensuring the regulatory compliance of the products, (ii) Intel will bear
-no liability, under any theory of liability for any issues associated
-with the modified products, including without limitation, claims under
-the warranty and/or issues arising from regulatory non-compliance, and
-(iii) Intel will not provide or be required to assist in providing
-support to any third parties for such modified products.  
-
-Note: Many regulatory agencies consider Wireless LAN adapters to be
-modules, and accordingly, condition system-level regulatory approval
-upon receipt and review of test data documenting that the antennas and
-system configuration do not cause the EMC and radio operation to be
-non-compliant.
-
-The drivers available for download from SourceForge are provided as a 
-part of a development project.  Conformance to local regulatory 
-requirements is the responsibility of the individual developer.  As 
-such, if you are interested in deploying or shipping a driver as part of 
-solution intended to be used for purposes other than development, please 
-obtain a tested driver from Intel Customer Support at:
-
-http://support.intel.com
-
-
-1.   Introduction
------------------------------------------------
-The following sections attempt to provide a brief introduction to using 
-the Intel(R) PRO/Wireless 2915ABG Driver for Linux.
-
-This document is not meant to be a comprehensive manual on 
-understanding or using wireless technologies, but should be sufficient 
-to get you moving without wires on Linux.
-
-For information on building and installing the driver, see the INSTALL
-file.
-
-
-1.1. Overview of Features
------------------------------------------------
-The current release (1.1.2) supports the following features:
-
-+ BSS mode (Infrastructure, Managed)
-+ IBSS mode (Ad-Hoc)
-+ WEP (OPEN and SHARED KEY mode)
-+ 802.1x EAP via wpa_supplicant and xsupplicant
-+ Wireless Extension support 
-+ Full B and G rate support (2200 and 2915)
-+ Full A rate support (2915 only)
-+ Transmit power control
-+ S state support (ACPI suspend/resume)
-
-The following features are currently enabled, but not officially
-supported:
-
-+ WPA
-+ long/short preamble support
-+ Monitor mode (aka RFMon)
-
-The distinction between officially supported and enabled is a reflection 
-on the amount of validation and interoperability testing that has been
-performed on a given feature. 
-
-
-
-1.2. Command Line Parameters
------------------------------------------------
-
-Like many modules used in the Linux kernel, the Intel(R) PRO/Wireless
-2915ABG Driver for Linux allows configuration options to be provided 
-as module parameters.  The most common way to specify a module parameter 
-is via the command line.  
-
-The general form is:
-
-% modprobe ipw2200 parameter=value
-
-Where the supported parameter are:
-
-  associate
-	Set to 0 to disable the auto scan-and-associate functionality of the
-	driver.  If disabled, the driver will not attempt to scan 
-	for and associate to a network until it has been configured with 
-	one or more properties for the target network, for example configuring 
-	the network SSID.  Default is 0 (do not auto-associate)
-	
-	Example: % modprobe ipw2200 associate=0
-
-  auto_create
-	Set to 0 to disable the auto creation of an Ad-Hoc network 
-	matching the channel and network name parameters provided.  
-	Default is 1.
-
-  channel
-	channel number for association.  The normal method for setting
-        the channel would be to use the standard wireless tools
-        (i.e. `iwconfig eth1 channel 10`), but it is useful sometimes
-	to set this while debugging.  Channel 0 means 'ANY'
-
-  debug
-	If using a debug build, this is used to control the amount of debug
-	info is logged.  See the 'dvals' and 'load' script for more info on
-	how to use this (the dvals and load scripts are provided as part 
-	of the ipw2200 development snapshot releases available from the 
-	SourceForge project at http://ipw2200.sf.net)
-  
-  led
-	Can be used to turn on experimental LED code.
-	0 = Off, 1 = On.  Default is 1.
-
-  mode
-	Can be used to set the default mode of the adapter.  
-	0 = Managed, 1 = Ad-Hoc, 2 = Monitor
-
-
-1.3. Wireless Extension Private Methods
------------------------------------------------
-
-As an interface designed to handle generic hardware, there are certain 
-capabilities not exposed through the normal Wireless Tool interface.  As 
-such, a provision is provided for a driver to declare custom, or 
-private, methods.  The Intel(R) PRO/Wireless 2915ABG Driver for Linux 
-defines several of these to configure various settings.
-
-The general form of using the private wireless methods is:
-
-	% iwpriv $IFNAME method parameters
-
-Where $IFNAME is the interface name the device is registered with 
-(typically eth1, customized via one of the various network interface
-name managers, such as ifrename)
-
-The supported private methods are:
-
-  get_mode
-	Can be used to report out which IEEE mode the driver is 
-	configured to support.  Example:
-	
-	% iwpriv eth1 get_mode
-	eth1	get_mode:802.11bg (6)
-
-  set_mode
-	Can be used to configure which IEEE mode the driver will 
-	support.  
-
-	Usage:
-	% iwpriv eth1 set_mode {mode}
-	Where {mode} is a number in the range 1-7:
-	1	802.11a (2915 only)
-	2	802.11b
-	3	802.11ab (2915 only)
-	4	802.11g 
-	5	802.11ag (2915 only)
-	6	802.11bg
-	7	802.11abg (2915 only)
-
-  get_preamble
-	Can be used to report configuration of preamble length.
-
-  set_preamble
-	Can be used to set the configuration of preamble length:
-
-	Usage:
-	% iwpriv eth1 set_preamble {mode}
-	Where {mode} is one of:
-	1	Long preamble only
-	0	Auto (long or short based on connection)
-	
-
-1.4. Sysfs Helper Files:
------------------------------------------------
-
-The Linux kernel provides a pseudo file system that can be used to 
-access various components of the operating system.  The Intel(R)
-PRO/Wireless 2915ABG Driver for Linux exposes several configuration
-parameters through this mechanism.
-
-An entry in the sysfs can support reading and/or writing.  You can 
-typically query the contents of a sysfs entry through the use of cat, 
-and can set the contents via echo.  For example:
-
-% cat /sys/bus/pci/drivers/ipw2200/debug_level
-
-Will report the current debug level of the driver's logging subsystem 
-(only available if CONFIG_IPW2200_DEBUG was configured when the driver
-was built).
-
-You can set the debug level via:
-
-% echo $VALUE > /sys/bus/pci/drivers/ipw2200/debug_level
-
-Where $VALUE would be a number in the case of this sysfs entry.  The 
-input to sysfs files does not have to be a number.  For example, the 
-firmware loader used by hotplug utilizes sysfs entries for transferring 
-the firmware image from user space into the driver.
-
-The Intel(R) PRO/Wireless 2915ABG Driver for Linux exposes sysfs entries 
-at two levels -- driver level, which apply to all instances of the driver 
-(in the event that there are more than one device installed) and device 
-level, which applies only to the single specific instance.
-
-
-1.4.1 Driver Level Sysfs Helper Files
------------------------------------------------
-
-For the driver level files, look in /sys/bus/pci/drivers/ipw2200/
-
-  debug_level  
-	
-	This controls the same global as the 'debug' module parameter
-
-
-
-1.4.2 Device Level Sysfs Helper Files
------------------------------------------------
-
-For the device level files, look in
-	
-	/sys/bus/pci/drivers/ipw2200/{PCI-ID}/
-
-For example:
-	/sys/bus/pci/drivers/ipw2200/0000:02:01.0
-
-For the device level files, see /sys/bus/pci/drivers/ipw2200:
-
-  rf_kill
-	read - 
-	0 = RF kill not enabled (radio on)
-	1 = SW based RF kill active (radio off)
-	2 = HW based RF kill active (radio off)
-	3 = Both HW and SW RF kill active (radio off)
-	write -
-	0 = If SW based RF kill active, turn the radio back on
-	1 = If radio is on, activate SW based RF kill
-
-	NOTE: If you enable the SW based RF kill and then toggle the HW
-  	based RF kill from ON -> OFF -> ON, the radio will NOT come back on
-	
-  ucode 
-	read-only access to the ucode version number
-
-  led
-	read -
-	0 = LED code disabled
-	1 = LED code enabled
-	write -
-	0 = Disable LED code
-	1 = Enable LED code
-
-	NOTE: The LED code has been reported to hang some systems when 
-	running ifconfig and is therefore disabled by default.
-
-
-1.5. Supported channels
------------------------------------------------
-
-Upon loading the Intel(R) PRO/Wireless 2915ABG Driver for Linux, a
-message stating the detected geography code and the number of 802.11
-channels supported by the card will be displayed in the log.
-
-The geography code corresponds to a regulatory domain as shown in the
-table below.
-
-					  Supported channels
-Code	Geography			802.11bg	802.11a
-
----	Restricted			11 	 	 0
-ZZF	Custom US/Canada		11	 	 8
-ZZD	Rest of World			13	 	 0
-ZZA	Custom USA & Europe & High	11		13
-ZZB	Custom NA & Europe    		11		13
-ZZC	Custom Japan			11	 	 4
-ZZM	Custom 				11	 	 0
-ZZE	Europe				13		19
-ZZJ	Custom Japan			14	 	 4
-ZZR	Rest of World			14	 	 0
-ZZH	High Band			13	 	 4
-ZZG	Custom Europe			13	 	 4
-ZZK	Europe 				13		24
-ZZL	Europe				11		13
-
-
-2.   Ad-Hoc Networking
------------------------------------------------
-
-When using a device in an Ad-Hoc network, it is useful to understand the 
-sequence and requirements for the driver to be able to create, join, or 
-merge networks.
-
-The following attempts to provide enough information so that you can 
-have a consistent experience while using the driver as a member of an 
-Ad-Hoc network.
-
-2.1. Joining an Ad-Hoc Network
------------------------------------------------
-
-The easiest way to get onto an Ad-Hoc network is to join one that 
-already exists.
-
-2.2. Creating an Ad-Hoc Network
------------------------------------------------
-
-An Ad-Hoc networks is created using the syntax of the Wireless tool.
-
-For Example:
-iwconfig eth1 mode ad-hoc essid testing channel 2
-
-2.3. Merging Ad-Hoc Networks
------------------------------------------------
-
-
-3.  Interaction with Wireless Tools
------------------------------------------------
-
-3.1 iwconfig mode
------------------------------------------------
-
-When configuring the mode of the adapter, all run-time configured parameters
-are reset to the value used when the module was loaded.  This includes
-channels, rates, ESSID, etc.
-
-3.2 iwconfig sens
------------------------------------------------
-
-The 'iwconfig ethX sens XX' command will not set the signal sensitivity
-threshold, as described in iwconfig documentation, but rather the number
-of consecutive missed beacons that will trigger handover, i.e. roaming
-to another access point. At the same time, it will set the disassociation
-threshold to 3 times the given value.
-
-
-4.   About the Version Numbers
------------------------------------------------
-
-Due to the nature of open source development projects, there are 
-frequently changes being incorporated that have not gone through 
-a complete validation process.  These changes are incorporated into 
-development snapshot releases.
-
-Releases are numbered with a three level scheme: 
-
-	major.minor.development
-
-Any version where the 'development' portion is 0 (for example
-1.0.0, 1.1.0, etc.) indicates a stable version that will be made 
-available for kernel inclusion.
-
-Any version where the 'development' portion is not a 0 (for
-example 1.0.1, 1.1.5, etc.) indicates a development version that is
-being made available for testing and cutting edge users.  The stability 
-and functionality of the development releases are not know.  We make
-efforts to try and keep all snapshots reasonably stable, but due to the
-frequency of their release, and the desire to get those releases 
-available as quickly as possible, unknown anomalies should be expected.
-
-The major version number will be incremented when significant changes
-are made to the driver.  Currently, there are no major changes planned.
-
-5.  Firmware installation
-----------------------------------------------
-
-The driver requires a firmware image, download it and extract the
-files under /lib/firmware (or wherever your hotplug's firmware.agent
-will look for firmware files)
-
-The firmware can be downloaded from the following URL:
-
-    http://ipw2200.sf.net/
-
-
-6.  Support
------------------------------------------------
-
-For direct support of the 1.0.0 version, you can contact 
-http://supportmail.intel.com, or you can use the open source project
-support.
-
-For general information and support, go to:
-	
-    http://ipw2200.sf.net/
-
-
-7.  License
------------------------------------------------
-
-  Copyright(c) 2003 - 2006 Intel Corporation. All rights reserved.
-
-  This program is free software; you can redistribute it and/or modify it 
-  under the terms of the GNU General Public License version 2 as 
-  published by the Free Software Foundation.
-  
-  This program is distributed in the hope that it will be useful, but WITHOUT 
-  ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or 
-  FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General Public License for 
-  more details.
-  
-  You should have received a copy of the GNU General Public License along with
-  this program; if not, write to the Free Software Foundation, Inc., 59 
-  Temple Place - Suite 330, Boston, MA  02111-1307, USA.
-  
-  The full GNU General Public License is included in this distribution in the
-  file called LICENSE.
-  
-  Contact Information:
-  James P. Ketrenos <ipw2100-admin@linux.intel.com>
-  Intel Corporation, 5200 N.E. Elam Young Parkway, Hillsboro, OR 97124-6497
-
diff --git a/MAINTAINERS b/MAINTAINERS
index 887c4e7e6102..107decaf0ac0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -8743,7 +8743,7 @@ M:	Stanislav Yakovlev <stas.yakovlev@gmail.com>
 L:	linux-wireless@vger.kernel.org
 S:	Maintained
 F:	Documentation/networking/device_drivers/intel/ipw2100.rst
-F:	Documentation/networking/device_drivers/intel/ipw2200.txt
+F:	Documentation/networking/device_drivers/intel/ipw2200.rst
 F:	drivers/net/wireless/intel/ipw2x00/
 
 INTEL PSTATE DRIVER
diff --git a/drivers/net/wireless/intel/ipw2x00/Kconfig b/drivers/net/wireless/intel/ipw2x00/Kconfig
index b0b3cd6296f3..f42b3cdce611 100644
--- a/drivers/net/wireless/intel/ipw2x00/Kconfig
+++ b/drivers/net/wireless/intel/ipw2x00/Kconfig
@@ -78,7 +78,7 @@ config IPW2200
 	  A driver for the Intel PRO/Wireless 2200BG and 2915ABG Network
 	  Connection adapters.
 
-	  See <file:Documentation/networking/device_drivers/intel/ipw2200.txt>
+	  See <file:Documentation/networking/device_drivers/intel/ipw2200.rst>
 	  for information on the capabilities currently enabled in this
 	  driver and for tips for debugging issues and problems.
 
-- 
cgit v1.2.3-59-g8ed1b


From 011531f7e525983f0bf2060fb4f048f580606d74 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 1 May 2020 16:44:48 +0200
Subject: docs: networking: device drivers: convert microsoft/netvsc.txt to
 ReST

- add SPDX header;
- adjust titles and chapters, adding proper markups;
- mark code blocks and literals as such;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/device_drivers/index.rst  |   1 +
 .../networking/device_drivers/microsoft/netvsc.rst | 116 +++++++++++++++++++++
 .../networking/device_drivers/microsoft/netvsc.txt | 105 -------------------
 MAINTAINERS                                        |   2 +-
 4 files changed, 118 insertions(+), 106 deletions(-)
 create mode 100644 Documentation/networking/device_drivers/microsoft/netvsc.rst
 delete mode 100644 Documentation/networking/device_drivers/microsoft/netvsc.txt

diff --git a/Documentation/networking/device_drivers/index.rst b/Documentation/networking/device_drivers/index.rst
index f9ce0089ec7d..575f0043b03e 100644
--- a/Documentation/networking/device_drivers/index.rst
+++ b/Documentation/networking/device_drivers/index.rst
@@ -41,6 +41,7 @@ Contents:
    freescale/gianfar
    intel/ipw2100
    intel/ipw2200
+   microsoft/netvsc
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/device_drivers/microsoft/netvsc.rst b/Documentation/networking/device_drivers/microsoft/netvsc.rst
new file mode 100644
index 000000000000..c3f51c672a68
--- /dev/null
+++ b/Documentation/networking/device_drivers/microsoft/netvsc.rst
@@ -0,0 +1,116 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================
+Hyper-V network driver
+======================
+
+Compatibility
+=============
+
+This driver is compatible with Windows Server 2012 R2, 2016 and
+Windows 10.
+
+Features
+========
+
+Checksum offload
+----------------
+  The netvsc driver supports checksum offload as long as the
+  Hyper-V host version does. Windows Server 2016 and Azure
+  support checksum offload for TCP and UDP for both IPv4 and
+  IPv6. Windows Server 2012 only supports checksum offload for TCP.
+
+Receive Side Scaling
+--------------------
+  Hyper-V supports receive side scaling. For TCP & UDP, packets can
+  be distributed among available queues based on IP address and port
+  number.
+
+  For TCP & UDP, we can switch hash level between L3 and L4 by ethtool
+  command. TCP/UDP over IPv4 and v6 can be set differently. The default
+  hash level is L4. We currently only allow switching TX hash level
+  from within the guests.
+
+  On Azure, fragmented UDP packets have high loss rate with L4
+  hashing. Using L3 hashing is recommended in this case.
+
+  For example, for UDP over IPv4 on eth0:
+
+  To include UDP port numbers in hashing::
+
+	ethtool -N eth0 rx-flow-hash udp4 sdfn
+
+  To exclude UDP port numbers in hashing::
+
+	ethtool -N eth0 rx-flow-hash udp4 sd
+
+  To show UDP hash level::
+
+	ethtool -n eth0 rx-flow-hash udp4
+
+Generic Receive Offload, aka GRO
+--------------------------------
+  The driver supports GRO and it is enabled by default. GRO coalesces
+  like packets and significantly reduces CPU usage under heavy Rx
+  load.
+
+Large Receive Offload (LRO), or Receive Side Coalescing (RSC)
+-------------------------------------------------------------
+  The driver supports LRO/RSC in the vSwitch feature. It reduces the per packet
+  processing overhead by coalescing multiple TCP segments when possible. The
+  feature is enabled by default on VMs running on Windows Server 2019 and
+  later. It may be changed by ethtool command::
+
+	ethtool -K eth0 lro on
+	ethtool -K eth0 lro off
+
+SR-IOV support
+--------------
+  Hyper-V supports SR-IOV as a hardware acceleration option. If SR-IOV
+  is enabled in both the vSwitch and the guest configuration, then the
+  Virtual Function (VF) device is passed to the guest as a PCI
+  device. In this case, both a synthetic (netvsc) and VF device are
+  visible in the guest OS and both NIC's have the same MAC address.
+
+  The VF is enslaved by netvsc device.  The netvsc driver will transparently
+  switch the data path to the VF when it is available and up.
+  Network state (addresses, firewall, etc) should be applied only to the
+  netvsc device; the slave device should not be accessed directly in
+  most cases.  The exceptions are if some special queue discipline or
+  flow direction is desired, these should be applied directly to the
+  VF slave device.
+
+Receive Buffer
+--------------
+  Packets are received into a receive area which is created when device
+  is probed. The receive area is broken into MTU sized chunks and each may
+  contain one or more packets. The number of receive sections may be changed
+  via ethtool Rx ring parameters.
+
+  There is a similar send buffer which is used to aggregate packets for sending.
+  The send area is broken into chunks of 6144 bytes, each of section may
+  contain one or more packets. The send buffer is an optimization, the driver
+  will use slower method to handle very large packets or if the send buffer
+  area is exhausted.
+
+XDP support
+-----------
+  XDP (eXpress Data Path) is a feature that runs eBPF bytecode at the early
+  stage when packets arrive at a NIC card. The goal is to increase performance
+  for packet processing, reducing the overhead of SKB allocation and other
+  upper network layers.
+
+  hv_netvsc supports XDP in native mode, and transparently sets the XDP
+  program on the associated VF NIC as well.
+
+  Setting / unsetting XDP program on synthetic NIC (netvsc) propagates to
+  VF NIC automatically. Setting / unsetting XDP program on VF NIC directly
+  is not recommended, also not propagated to synthetic NIC, and may be
+  overwritten by setting of synthetic NIC.
+
+  XDP program cannot run with LRO (RSC) enabled, so you need to disable LRO
+  before running XDP::
+
+	ethtool -K eth0 lro off
+
+  XDP_REDIRECT action is not yet supported.
diff --git a/Documentation/networking/device_drivers/microsoft/netvsc.txt b/Documentation/networking/device_drivers/microsoft/netvsc.txt
deleted file mode 100644
index cd63556b27a0..000000000000
--- a/Documentation/networking/device_drivers/microsoft/netvsc.txt
+++ /dev/null
@@ -1,105 +0,0 @@
-Hyper-V network driver
-======================
-
-Compatibility
-=============
-
-This driver is compatible with Windows Server 2012 R2, 2016 and
-Windows 10.
-
-Features
-========
-
-  Checksum offload
-  ----------------
-  The netvsc driver supports checksum offload as long as the
-  Hyper-V host version does. Windows Server 2016 and Azure
-  support checksum offload for TCP and UDP for both IPv4 and
-  IPv6. Windows Server 2012 only supports checksum offload for TCP.
-
-  Receive Side Scaling
-  --------------------
-  Hyper-V supports receive side scaling. For TCP & UDP, packets can
-  be distributed among available queues based on IP address and port
-  number.
-
-  For TCP & UDP, we can switch hash level between L3 and L4 by ethtool
-  command. TCP/UDP over IPv4 and v6 can be set differently. The default
-  hash level is L4. We currently only allow switching TX hash level
-  from within the guests.
-
-  On Azure, fragmented UDP packets have high loss rate with L4
-  hashing. Using L3 hashing is recommended in this case.
-
-  For example, for UDP over IPv4 on eth0:
-  To include UDP port numbers in hashing:
-        ethtool -N eth0 rx-flow-hash udp4 sdfn
-  To exclude UDP port numbers in hashing:
-        ethtool -N eth0 rx-flow-hash udp4 sd
-  To show UDP hash level:
-        ethtool -n eth0 rx-flow-hash udp4
-
-  Generic Receive Offload, aka GRO
-  --------------------------------
-  The driver supports GRO and it is enabled by default. GRO coalesces
-  like packets and significantly reduces CPU usage under heavy Rx
-  load.
-
-  Large Receive Offload (LRO), or Receive Side Coalescing (RSC)
-  -------------------------------------------------------------
-  The driver supports LRO/RSC in the vSwitch feature. It reduces the per packet
-  processing overhead by coalescing multiple TCP segments when possible. The
-  feature is enabled by default on VMs running on Windows Server 2019 and
-  later. It may be changed by ethtool command:
-	ethtool -K eth0 lro on
-	ethtool -K eth0 lro off
-
-  SR-IOV support
-  --------------
-  Hyper-V supports SR-IOV as a hardware acceleration option. If SR-IOV
-  is enabled in both the vSwitch and the guest configuration, then the
-  Virtual Function (VF) device is passed to the guest as a PCI
-  device. In this case, both a synthetic (netvsc) and VF device are
-  visible in the guest OS and both NIC's have the same MAC address.
-
-  The VF is enslaved by netvsc device.  The netvsc driver will transparently
-  switch the data path to the VF when it is available and up.
-  Network state (addresses, firewall, etc) should be applied only to the
-  netvsc device; the slave device should not be accessed directly in
-  most cases.  The exceptions are if some special queue discipline or
-  flow direction is desired, these should be applied directly to the
-  VF slave device.
-
-  Receive Buffer
-  --------------
-  Packets are received into a receive area which is created when device
-  is probed. The receive area is broken into MTU sized chunks and each may
-  contain one or more packets. The number of receive sections may be changed
-  via ethtool Rx ring parameters.
-
-  There is a similar send buffer which is used to aggregate packets for sending.
-  The send area is broken into chunks of 6144 bytes, each of section may
-  contain one or more packets. The send buffer is an optimization, the driver
-  will use slower method to handle very large packets or if the send buffer
-  area is exhausted.
-
-  XDP support
-  -----------
-  XDP (eXpress Data Path) is a feature that runs eBPF bytecode at the early
-  stage when packets arrive at a NIC card. The goal is to increase performance
-  for packet processing, reducing the overhead of SKB allocation and other
-  upper network layers.
-
-  hv_netvsc supports XDP in native mode, and transparently sets the XDP
-  program on the associated VF NIC as well.
-
-  Setting / unsetting XDP program on synthetic NIC (netvsc) propagates to
-  VF NIC automatically. Setting / unsetting XDP program on VF NIC directly
-  is not recommended, also not propagated to synthetic NIC, and may be
-  overwritten by setting of synthetic NIC.
-
-  XDP program cannot run with LRO (RSC) enabled, so you need to disable LRO
-  before running XDP:
-	ethtool -K eth0 lro off
-
-  XDP_REDIRECT action is not yet supported.
diff --git a/MAINTAINERS b/MAINTAINERS
index 107decaf0ac0..ba8bb932e3da 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7867,7 +7867,7 @@ S:	Supported
 T:	git git://git.kernel.org/pub/scm/linux/kernel/git/hyperv/linux.git
 F:	Documentation/ABI/stable/sysfs-bus-vmbus
 F:	Documentation/ABI/testing/debugfs-hyperv
-F:	Documentation/networking/device_drivers/microsoft/netvsc.txt
+F:	Documentation/networking/device_drivers/microsoft/netvsc.rst
 F:	arch/x86/hyperv
 F:	arch/x86/include/asm/hyperv-tlfs.h
 F:	arch/x86/include/asm/mshyperv.h
-- 
cgit v1.2.3-59-g8ed1b


From 7762f5c514dce027ad2a2031390c0c19c24547af Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 1 May 2020 16:44:49 +0200
Subject: docs: networking: device drivers: convert neterion/s2io.txt to ReST

- add SPDX header;
- add a document title;
- comment out text-only TOC from html/pdf output;
- mark code blocks and literals as such;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/device_drivers/index.rst  |   1 +
 .../networking/device_drivers/neterion/s2io.rst    | 196 +++++++++++++++++++++
 .../networking/device_drivers/neterion/s2io.txt    | 141 ---------------
 MAINTAINERS                                        |   2 +-
 drivers/net/ethernet/neterion/Kconfig              |   2 +-
 5 files changed, 199 insertions(+), 143 deletions(-)
 create mode 100644 Documentation/networking/device_drivers/neterion/s2io.rst
 delete mode 100644 Documentation/networking/device_drivers/neterion/s2io.txt

diff --git a/Documentation/networking/device_drivers/index.rst b/Documentation/networking/device_drivers/index.rst
index 575f0043b03e..da1f8438d4ea 100644
--- a/Documentation/networking/device_drivers/index.rst
+++ b/Documentation/networking/device_drivers/index.rst
@@ -42,6 +42,7 @@ Contents:
    intel/ipw2100
    intel/ipw2200
    microsoft/netvsc
+   neterion/s2io
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/device_drivers/neterion/s2io.rst b/Documentation/networking/device_drivers/neterion/s2io.rst
new file mode 100644
index 000000000000..c5673ec4559b
--- /dev/null
+++ b/Documentation/networking/device_drivers/neterion/s2io.rst
@@ -0,0 +1,196 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=========================================================
+Neterion's (Formerly S2io) Xframe I/II PCI-X 10GbE driver
+=========================================================
+
+Release notes for Neterion's (Formerly S2io) Xframe I/II PCI-X 10GbE driver.
+
+.. Contents
+  - 1.  Introduction
+  - 2.  Identifying the adapter/interface
+  - 3.  Features supported
+  - 4.  Command line parameters
+  - 5.  Performance suggestions
+  - 6.  Available Downloads
+
+
+1. Introduction
+===============
+This Linux driver supports Neterion's Xframe I PCI-X 1.0 and
+Xframe II PCI-X 2.0 adapters. It supports several features
+such as jumbo frames, MSI/MSI-X, checksum offloads, TSO, UFO and so on.
+See below for complete list of features.
+
+All features are supported for both IPv4 and IPv6.
+
+2. Identifying the adapter/interface
+====================================
+
+a. Insert the adapter(s) in your system.
+b. Build and load driver::
+
+	# insmod s2io.ko
+
+c. View log messages::
+
+	# dmesg | tail -40
+
+You will see messages similar to::
+
+	eth3: Neterion Xframe I 10GbE adapter (rev 3), Version 2.0.9.1, Intr type INTA
+	eth4: Neterion Xframe II 10GbE adapter (rev 2), Version 2.0.9.1, Intr type INTA
+	eth4: Device is on 64 bit 133MHz PCIX(M1) bus
+
+The above messages identify the adapter type(Xframe I/II), adapter revision,
+driver version, interface name(eth3, eth4), Interrupt type(INTA, MSI, MSI-X).
+In case of Xframe II, the PCI/PCI-X bus width and frequency are displayed
+as well.
+
+To associate an interface with a physical adapter use "ethtool -p <ethX>".
+The corresponding adapter's LED will blink multiple times.
+
+3. Features supported
+=====================
+a. Jumbo frames. Xframe I/II supports MTU up to 9600 bytes,
+   modifiable using ip command.
+
+b. Offloads. Supports checksum offload(TCP/UDP/IP) on transmit
+   and receive, TSO.
+
+c. Multi-buffer receive mode. Scattering of packet across multiple
+   buffers. Currently driver supports 2-buffer mode which yields
+   significant performance improvement on certain platforms(SGI Altix,
+   IBM xSeries).
+
+d. MSI/MSI-X. Can be enabled on platforms which support this feature
+   (IA64, Xeon) resulting in noticeable performance improvement(up to 7%
+   on certain platforms).
+
+e. Statistics. Comprehensive MAC-level and software statistics displayed
+   using "ethtool -S" option.
+
+f. Multi-FIFO/Ring. Supports up to 8 transmit queues and receive rings,
+   with multiple steering options.
+
+4. Command line parameters
+==========================
+
+a. tx_fifo_num
+	Number of transmit queues
+
+Valid range: 1-8
+
+Default: 1
+
+b. rx_ring_num
+	Number of receive rings
+
+Valid range: 1-8
+
+Default: 1
+
+c. tx_fifo_len
+	Size of each transmit queue
+
+Valid range: Total length of all queues should not exceed 8192
+
+Default: 4096
+
+d. rx_ring_sz
+	Size of each receive ring(in 4K blocks)
+
+Valid range: Limited by memory on system
+
+Default: 30
+
+e. intr_type
+	Specifies interrupt type. Possible values 0(INTA), 2(MSI-X)
+
+Valid values: 0, 2
+
+Default: 2
+
+5. Performance suggestions
+==========================
+
+General:
+
+a. Set MTU to maximum(9000 for switch setup, 9600 in back-to-back configuration)
+b. Set TCP windows size to optimal value.
+
+For instance, for MTU=1500 a value of 210K has been observed to result in
+good performance::
+
+	# sysctl -w net.ipv4.tcp_rmem="210000 210000 210000"
+	# sysctl -w net.ipv4.tcp_wmem="210000 210000 210000"
+
+For MTU=9000, TCP window size of 10 MB is recommended::
+
+	# sysctl -w net.ipv4.tcp_rmem="10000000 10000000 10000000"
+	# sysctl -w net.ipv4.tcp_wmem="10000000 10000000 10000000"
+
+Transmit performance:
+
+a. By default, the driver respects BIOS settings for PCI bus parameters.
+   However, you may want to experiment with PCI bus parameters
+   max-split-transactions(MOST) and MMRBC (use setpci command).
+
+   A MOST value of 2 has been found optimal for Opterons and 3 for Itanium.
+
+   It could be different for your hardware.
+
+   Set MMRBC to 4K**.
+
+   For example you can set
+
+   For opteron::
+
+	#setpci -d 17d5:* 62=1d
+
+   For Itanium::
+
+	#setpci -d 17d5:* 62=3d
+
+   For detailed description of the PCI registers, please see Xframe User Guide.
+
+b. Ensure Transmit Checksum offload is enabled. Use ethtool to set/verify this
+   parameter.
+
+c. Turn on TSO(using "ethtool -K")::
+
+	# ethtool -K <ethX> tso on
+
+Receive performance:
+
+a. By default, the driver respects BIOS settings for PCI bus parameters.
+   However, you may want to set PCI latency timer to 248::
+
+	#setpci -d 17d5:* LATENCY_TIMER=f8
+
+   For detailed description of the PCI registers, please see Xframe User Guide.
+
+b. Use 2-buffer mode. This results in large performance boost on
+   certain platforms(eg. SGI Altix, IBM xSeries).
+
+c. Ensure Receive Checksum offload is enabled. Use "ethtool -K ethX" command to
+   set/verify this option.
+
+d. Enable NAPI feature(in kernel configuration Device Drivers ---> Network
+   device support --->  Ethernet (10000 Mbit) ---> S2IO 10Gbe Xframe NIC) to
+   bring down CPU utilization.
+
+.. note::
+
+   For AMD opteron platforms with 8131 chipset, MMRBC=1 and MOST=1 are
+   recommended as safe parameters.
+
+For more information, please review the AMD8131 errata at
+http://vip.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/
+26310_AMD-8131_HyperTransport_PCI-X_Tunnel_Revision_Guide_rev_3_18.pdf
+
+6. Support
+==========
+
+For further support please contact either your 10GbE Xframe NIC vendor (IBM,
+HP, SGI etc.)
diff --git a/Documentation/networking/device_drivers/neterion/s2io.txt b/Documentation/networking/device_drivers/neterion/s2io.txt
deleted file mode 100644
index 0362a42f7cf4..000000000000
--- a/Documentation/networking/device_drivers/neterion/s2io.txt
+++ /dev/null
@@ -1,141 +0,0 @@
-Release notes for Neterion's (Formerly S2io) Xframe I/II PCI-X 10GbE driver.
-
-Contents
-=======
-- 1.  Introduction
-- 2.  Identifying the adapter/interface
-- 3.  Features supported
-- 4.  Command line parameters
-- 5.  Performance suggestions
-- 6.  Available Downloads 
-
-
-1.	Introduction:
-This Linux driver supports Neterion's Xframe I PCI-X 1.0 and
-Xframe II PCI-X 2.0 adapters. It supports several features 
-such as jumbo frames, MSI/MSI-X, checksum offloads, TSO, UFO and so on.
-See below for complete list of features.
-All features are supported for both IPv4 and IPv6.
-
-2.	Identifying the adapter/interface:
-a. Insert the adapter(s) in your system.
-b. Build and load driver 
-# insmod s2io.ko
-c. View log messages
-# dmesg | tail -40
-You will see messages similar to:
-eth3: Neterion Xframe I 10GbE adapter (rev 3), Version 2.0.9.1, Intr type INTA
-eth4: Neterion Xframe II 10GbE adapter (rev 2), Version 2.0.9.1, Intr type INTA
-eth4: Device is on 64 bit 133MHz PCIX(M1) bus
-
-The above messages identify the adapter type(Xframe I/II), adapter revision,
-driver version, interface name(eth3, eth4), Interrupt type(INTA, MSI, MSI-X).
-In case of Xframe II, the PCI/PCI-X bus width and frequency are displayed
-as well.
-
-To associate an interface with a physical adapter use "ethtool -p <ethX>".
-The corresponding adapter's LED will blink multiple times.
-
-3.	Features supported:
-a. Jumbo frames. Xframe I/II supports MTU up to 9600 bytes,
-modifiable using ip command.
-
-b. Offloads. Supports checksum offload(TCP/UDP/IP) on transmit
-and receive, TSO.
-
-c. Multi-buffer receive mode. Scattering of packet across multiple
-buffers. Currently driver supports 2-buffer mode which yields
-significant performance improvement on certain platforms(SGI Altix,
-IBM xSeries).
-
-d. MSI/MSI-X. Can be enabled on platforms which support this feature
-(IA64, Xeon) resulting in noticeable performance improvement(up to 7%
-on certain platforms).
-
-e. Statistics. Comprehensive MAC-level and software statistics displayed
-using "ethtool -S" option.
-
-f. Multi-FIFO/Ring. Supports up to 8 transmit queues and receive rings,
-with multiple steering options.
-
-4.  Command line parameters
-a. tx_fifo_num
-Number of transmit queues
-Valid range: 1-8
-Default: 1
-
-b. rx_ring_num
-Number of receive rings
-Valid range: 1-8
-Default: 1
-
-c. tx_fifo_len
-Size of each transmit queue
-Valid range: Total length of all queues should not exceed 8192
-Default: 4096
-
-d. rx_ring_sz 
-Size of each receive ring(in 4K blocks)
-Valid range: Limited by memory on system
-Default: 30 
-
-e. intr_type
-Specifies interrupt type. Possible values 0(INTA), 2(MSI-X)
-Valid values: 0, 2
-Default: 2
-
-5.  Performance suggestions
-General:
-a. Set MTU to maximum(9000 for switch setup, 9600 in back-to-back configuration)
-b. Set TCP windows size to optimal value. 
-For instance, for MTU=1500 a value of 210K has been observed to result in 
-good performance.
-# sysctl -w net.ipv4.tcp_rmem="210000 210000 210000"
-# sysctl -w net.ipv4.tcp_wmem="210000 210000 210000"
-For MTU=9000, TCP window size of 10 MB is recommended.
-# sysctl -w net.ipv4.tcp_rmem="10000000 10000000 10000000"
-# sysctl -w net.ipv4.tcp_wmem="10000000 10000000 10000000"
-
-Transmit performance:
-a. By default, the driver respects BIOS settings for PCI bus parameters. 
-However, you may want to experiment with PCI bus parameters 
-max-split-transactions(MOST) and MMRBC (use setpci command). 
-A MOST value of 2 has been found optimal for Opterons and 3 for Itanium.  
-It could be different for your hardware.  
-Set MMRBC to 4K**.
-
-For example you can set 
-For opteron
-#setpci -d 17d5:* 62=1d 
-For Itanium
-#setpci -d 17d5:* 62=3d 
-
-For detailed description of the PCI registers, please see Xframe User Guide.
-
-b. Ensure Transmit Checksum offload is enabled. Use ethtool to set/verify this 
-parameter.
-c. Turn on TSO(using "ethtool -K")
-# ethtool -K <ethX> tso on
-
-Receive performance:
-a. By default, the driver respects BIOS settings for PCI bus parameters. 
-However, you may want to set PCI latency timer to 248.
-#setpci -d 17d5:* LATENCY_TIMER=f8
-For detailed description of the PCI registers, please see Xframe User Guide.
-b. Use 2-buffer mode. This results in large performance boost on
-certain platforms(eg. SGI Altix, IBM xSeries).
-c. Ensure Receive Checksum offload is enabled. Use "ethtool -K ethX" command to 
-set/verify this option.
-d. Enable NAPI feature(in kernel configuration Device Drivers ---> Network 
-device support --->  Ethernet (10000 Mbit) ---> S2IO 10Gbe Xframe NIC) to 
-bring down CPU utilization.
-
-** For AMD opteron platforms with 8131 chipset, MMRBC=1 and MOST=1 are 
-recommended as safe parameters.
-For more information, please review the AMD8131 errata at
-http://vip.amd.com/us-en/assets/content_type/white_papers_and_tech_docs/
-26310_AMD-8131_HyperTransport_PCI-X_Tunnel_Revision_Guide_rev_3_18.pdf
-
-6. Support
-For further support please contact either your 10GbE Xframe NIC vendor (IBM, 
-HP, SGI etc.)
diff --git a/MAINTAINERS b/MAINTAINERS
index ba8bb932e3da..4e3f96ee0d98 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11656,7 +11656,7 @@ NETERION 10GbE DRIVERS (s2io/vxge)
 M:	Jon Mason <jdmason@kudzu.us>
 L:	netdev@vger.kernel.org
 S:	Supported
-F:	Documentation/networking/device_drivers/neterion/s2io.txt
+F:	Documentation/networking/device_drivers/neterion/s2io.rst
 F:	Documentation/networking/device_drivers/neterion/vxge.txt
 F:	drivers/net/ethernet/neterion/
 
diff --git a/drivers/net/ethernet/neterion/Kconfig b/drivers/net/ethernet/neterion/Kconfig
index 5e630f3a0189..c375ee08f6ea 100644
--- a/drivers/net/ethernet/neterion/Kconfig
+++ b/drivers/net/ethernet/neterion/Kconfig
@@ -27,7 +27,7 @@ config S2IO
 	  on its age.
 
 	  More specific information on configuring the driver is in
-	  <file:Documentation/networking/device_drivers/neterion/s2io.txt>.
+	  <file:Documentation/networking/device_drivers/neterion/s2io.rst>.
 
 	  To compile this driver as a module, choose M here. The module
 	  will be called s2io.
-- 
cgit v1.2.3-59-g8ed1b


From f10727d3b68c8e03111436de94c922ffe304e21e Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 1 May 2020 16:44:50 +0200
Subject: docs: networking: device drivers: convert neterion/vxge.txt to ReST

- add SPDX header;
- adjust titles and chapters, adding proper markups;
- comment out text-only TOC from html/pdf output;
- mark code blocks and literals as such;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/device_drivers/index.rst  |   1 +
 .../networking/device_drivers/neterion/vxge.rst    | 115 +++++++++++++++++++++
 .../networking/device_drivers/neterion/vxge.txt    |  93 -----------------
 MAINTAINERS                                        |   2 +-
 drivers/net/ethernet/neterion/Kconfig              |   2 +-
 5 files changed, 118 insertions(+), 95 deletions(-)
 create mode 100644 Documentation/networking/device_drivers/neterion/vxge.rst
 delete mode 100644 Documentation/networking/device_drivers/neterion/vxge.txt

diff --git a/Documentation/networking/device_drivers/index.rst b/Documentation/networking/device_drivers/index.rst
index da1f8438d4ea..55837244eaad 100644
--- a/Documentation/networking/device_drivers/index.rst
+++ b/Documentation/networking/device_drivers/index.rst
@@ -43,6 +43,7 @@ Contents:
    intel/ipw2200
    microsoft/netvsc
    neterion/s2io
+   neterion/vxge
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/device_drivers/neterion/vxge.rst b/Documentation/networking/device_drivers/neterion/vxge.rst
new file mode 100644
index 000000000000..589c6b15c63d
--- /dev/null
+++ b/Documentation/networking/device_drivers/neterion/vxge.rst
@@ -0,0 +1,115 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+==============================================================================
+Neterion's (Formerly S2io) X3100 Series 10GbE PCIe Server Adapter Linux driver
+==============================================================================
+
+.. Contents
+
+  1) Introduction
+  2) Features supported
+  3) Configurable driver parameters
+  4) Troubleshooting
+
+1. Introduction
+===============
+
+This Linux driver supports all Neterion's X3100 series 10 GbE PCIe I/O
+Virtualized Server adapters.
+
+The X3100 series supports four modes of operation, configurable via
+firmware:
+
+	- Single function mode
+	- Multi function mode
+	- SRIOV mode
+	- MRIOV mode
+
+The functions share a 10GbE link and the pci-e bus, but hardly anything else
+inside the ASIC. Features like independent hw reset, statistics, bandwidth/
+priority allocation and guarantees, GRO, TSO, interrupt moderation etc are
+supported independently on each function.
+
+(See below for a complete list of features supported for both IPv4 and IPv6)
+
+2. Features supported
+=====================
+
+i)   Single function mode (up to 17 queues)
+
+ii)  Multi function mode (up to 17 functions)
+
+iii) PCI-SIG's I/O Virtualization
+
+       - Single Root mode: v1.0 (up to 17 functions)
+       - Multi-Root mode: v1.0 (up to 17 functions)
+
+iv)  Jumbo frames
+
+       X3100 Series supports MTU up to 9600 bytes, modifiable using
+       ip command.
+
+v)   Offloads supported: (Enabled by default)
+
+       - Checksum offload (TCP/UDP/IP) on transmit and receive paths
+       - TCP Segmentation Offload (TSO) on transmit path
+       - Generic Receive Offload (GRO) on receive path
+
+vi)  MSI-X: (Enabled by default)
+
+       Resulting in noticeable performance improvement (up to 7% on certain
+       platforms).
+
+vii) NAPI: (Enabled by default)
+
+       For better Rx interrupt moderation.
+
+viii)RTH (Receive Traffic Hash): (Enabled by default)
+
+       Receive side steering for better scaling.
+
+ix)  Statistics
+
+       Comprehensive MAC-level and software statistics displayed using
+       "ethtool -S" option.
+
+x)   Multiple hardware queues: (Enabled by default)
+
+       Up to 17 hardware based transmit and receive data channels, with
+       multiple steering options (transmit multiqueue enabled by default).
+
+3) Configurable driver parameters:
+----------------------------------
+
+i)  max_config_dev
+       Specifies maximum device functions to be enabled.
+
+       Valid range: 1-8
+
+ii) max_config_port
+       Specifies number of ports to be enabled.
+
+       Valid range: 1,2
+
+       Default: 1
+
+iii) max_config_vpath
+       Specifies maximum VPATH(s) configured for each device function.
+
+       Valid range: 1-17
+
+iv) vlan_tag_strip
+       Enables/disables vlan tag stripping from all received tagged frames that
+       are not replicated at the internal L2 switch.
+
+       Valid range: 0,1 (disabled, enabled respectively)
+
+       Default: 1
+
+v)  addr_learn_en
+       Enable learning the mac address of the guest OS interface in
+       virtualization environment.
+
+       Valid range: 0,1 (disabled, enabled respectively)
+
+       Default: 0
diff --git a/Documentation/networking/device_drivers/neterion/vxge.txt b/Documentation/networking/device_drivers/neterion/vxge.txt
deleted file mode 100644
index abfec245f97c..000000000000
--- a/Documentation/networking/device_drivers/neterion/vxge.txt
+++ /dev/null
@@ -1,93 +0,0 @@
-Neterion's (Formerly S2io) X3100 Series 10GbE PCIe Server Adapter Linux driver
-==============================================================================
-
-Contents
---------
-
-1) Introduction
-2) Features supported
-3) Configurable driver parameters
-4) Troubleshooting
-
-1) Introduction:
-----------------
-This Linux driver supports all Neterion's X3100 series 10 GbE PCIe I/O
-Virtualized Server adapters.
-The X3100 series supports four modes of operation, configurable via
-firmware -
-	Single function mode
-	Multi function mode
-	SRIOV mode
-	MRIOV mode
-The functions share a 10GbE link and the pci-e bus, but hardly anything else
-inside the ASIC. Features like independent hw reset, statistics, bandwidth/
-priority allocation and guarantees, GRO, TSO, interrupt moderation etc are
-supported independently on each function.
-
-(See below for a complete list of features supported for both IPv4 and IPv6)
-
-2) Features supported:
-----------------------
-
-i)   Single function mode (up to 17 queues)
-
-ii)  Multi function mode (up to 17 functions)
-
-iii) PCI-SIG's I/O Virtualization
-       - Single Root mode: v1.0 (up to 17 functions)
-       - Multi-Root mode: v1.0 (up to 17 functions)
-
-iv)  Jumbo frames
-       X3100 Series supports MTU up to 9600 bytes, modifiable using
-       ip command.
-
-v)   Offloads supported: (Enabled by default)
-       Checksum offload (TCP/UDP/IP) on transmit and receive paths
-       TCP Segmentation Offload (TSO) on transmit path
-       Generic Receive Offload (GRO) on receive path
-
-vi)  MSI-X: (Enabled by default)
-       Resulting in noticeable performance improvement (up to 7% on certain
-       platforms).
-
-vii) NAPI: (Enabled by default)
-       For better Rx interrupt moderation.
-
-viii)RTH (Receive Traffic Hash): (Enabled by default)
-       Receive side steering for better scaling.
-
-ix)  Statistics
-       Comprehensive MAC-level and software statistics displayed using
-       "ethtool -S" option.
-
-x)   Multiple hardware queues: (Enabled by default)
-       Up to 17 hardware based transmit and receive data channels, with
-       multiple steering options (transmit multiqueue enabled by default).
-
-3) Configurable driver parameters:
-----------------------------------
-
-i)  max_config_dev
-       Specifies maximum device functions to be enabled.
-       Valid range: 1-8
-
-ii) max_config_port
-       Specifies number of ports to be enabled.
-       Valid range: 1,2
-       Default: 1
-
-iii)max_config_vpath
-       Specifies maximum VPATH(s) configured for each device function.
-       Valid range: 1-17
-
-iv) vlan_tag_strip
-       Enables/disables vlan tag stripping from all received tagged frames that
-       are not replicated at the internal L2 switch.
-       Valid range: 0,1 (disabled, enabled respectively)
-       Default: 1
-
-v)  addr_learn_en
-       Enable learning the mac address of the guest OS interface in
-       virtualization environment.
-       Valid range: 0,1 (disabled, enabled respectively)
-       Default: 0
diff --git a/MAINTAINERS b/MAINTAINERS
index 4e3f96ee0d98..88e9e8430581 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -11657,7 +11657,7 @@ M:	Jon Mason <jdmason@kudzu.us>
 L:	netdev@vger.kernel.org
 S:	Supported
 F:	Documentation/networking/device_drivers/neterion/s2io.rst
-F:	Documentation/networking/device_drivers/neterion/vxge.txt
+F:	Documentation/networking/device_drivers/neterion/vxge.rst
 F:	drivers/net/ethernet/neterion/
 
 NETFILTER
diff --git a/drivers/net/ethernet/neterion/Kconfig b/drivers/net/ethernet/neterion/Kconfig
index c375ee08f6ea..a82a37094579 100644
--- a/drivers/net/ethernet/neterion/Kconfig
+++ b/drivers/net/ethernet/neterion/Kconfig
@@ -42,7 +42,7 @@ config VXGE
 	  labeled as either one, depending on its age.
 
 	  More specific information on configuring the driver is in
-	  <file:Documentation/networking/device_drivers/neterion/vxge.txt>.
+	  <file:Documentation/networking/device_drivers/neterion/vxge.rst>.
 
 	  To compile this driver as a module, choose M here. The module
 	  will be called vxge.
-- 
cgit v1.2.3-59-g8ed1b


From acfcf23597d62700f1c8e1975bca34070e0251ef Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 1 May 2020 16:44:51 +0200
Subject: docs: networking: device drivers: convert qualcomm/rmnet.txt to ReST

- add SPDX header;
- add a document title;
- mark code blocks and literals as such;
- mark tables as such;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/device_drivers/index.rst  |  1 +
 .../networking/device_drivers/qualcomm/rmnet.rst   | 95 ++++++++++++++++++++++
 .../networking/device_drivers/qualcomm/rmnet.txt   | 82 -------------------
 MAINTAINERS                                        |  2 +-
 4 files changed, 97 insertions(+), 83 deletions(-)
 create mode 100644 Documentation/networking/device_drivers/qualcomm/rmnet.rst
 delete mode 100644 Documentation/networking/device_drivers/qualcomm/rmnet.txt

diff --git a/Documentation/networking/device_drivers/index.rst b/Documentation/networking/device_drivers/index.rst
index 55837244eaad..66ed884548cc 100644
--- a/Documentation/networking/device_drivers/index.rst
+++ b/Documentation/networking/device_drivers/index.rst
@@ -44,6 +44,7 @@ Contents:
    microsoft/netvsc
    neterion/s2io
    neterion/vxge
+   qualcomm/rmnet
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/device_drivers/qualcomm/rmnet.rst b/Documentation/networking/device_drivers/qualcomm/rmnet.rst
new file mode 100644
index 000000000000..70643b58de05
--- /dev/null
+++ b/Documentation/networking/device_drivers/qualcomm/rmnet.rst
@@ -0,0 +1,95 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+============
+Rmnet Driver
+============
+
+1. Introduction
+===============
+
+rmnet driver is used for supporting the Multiplexing and aggregation
+Protocol (MAP). This protocol is used by all recent chipsets using Qualcomm
+Technologies, Inc. modems.
+
+This driver can be used to register onto any physical network device in
+IP mode. Physical transports include USB, HSIC, PCIe and IP accelerator.
+
+Multiplexing allows for creation of logical netdevices (rmnet devices) to
+handle multiple private data networks (PDN) like a default internet, tethering,
+multimedia messaging service (MMS) or IP media subsystem (IMS). Hardware sends
+packets with MAP headers to rmnet. Based on the multiplexer id, rmnet
+routes to the appropriate PDN after removing the MAP header.
+
+Aggregation is required to achieve high data rates. This involves hardware
+sending aggregated bunch of MAP frames. rmnet driver will de-aggregate
+these MAP frames and send them to appropriate PDN's.
+
+2. Packet format
+================
+
+a. MAP packet (data / control)
+
+MAP header has the same endianness of the IP packet.
+
+Packet format::
+
+  Bit             0             1           2-7      8 - 15           16 - 31
+  Function   Command / Data   Reserved     Pad   Multiplexer ID    Payload length
+  Bit            32 - x
+  Function     Raw  Bytes
+
+Command (1)/ Data (0) bit value is to indicate if the packet is a MAP command
+or data packet. Control packet is used for transport level flow control. Data
+packets are standard IP packets.
+
+Reserved bits are usually zeroed out and to be ignored by receiver.
+
+Padding is number of bytes to be added for 4 byte alignment if required by
+hardware.
+
+Multiplexer ID is to indicate the PDN on which data has to be sent.
+
+Payload length includes the padding length but does not include MAP header
+length.
+
+b. MAP packet (command specific)::
+
+    Bit             0             1           2-7      8 - 15           16 - 31
+    Function   Command         Reserved     Pad   Multiplexer ID    Payload length
+    Bit          32 - 39        40 - 45    46 - 47       48 - 63
+    Function   Command name    Reserved   Command Type   Reserved
+    Bit          64 - 95
+    Function   Transaction ID
+    Bit          96 - 127
+    Function   Command data
+
+Command 1 indicates disabling flow while 2 is enabling flow
+
+Command types
+
+= ==========================================
+0 for MAP command request
+1 is to acknowledge the receipt of a command
+2 is for unsupported commands
+3 is for error during processing of commands
+= ==========================================
+
+c. Aggregation
+
+Aggregation is multiple MAP packets (can be data or command) delivered to
+rmnet in a single linear skb. rmnet will process the individual
+packets and either ACK the MAP command or deliver the IP packet to the
+network stack as needed
+
+MAP header|IP Packet|Optional padding|MAP header|IP Packet|Optional padding....
+
+MAP header|IP Packet|Optional padding|MAP header|Command Packet|Optional pad...
+
+3. Userspace configuration
+==========================
+
+rmnet userspace configuration is done through netlink library librmnetctl
+and command line utility rmnetcli. Utility is hosted in codeaurora forum git.
+The driver uses rtnl_link_ops for communication.
+
+https://source.codeaurora.org/quic/la/platform/vendor/qcom-opensource/dataservices/tree/rmnetctl
diff --git a/Documentation/networking/device_drivers/qualcomm/rmnet.txt b/Documentation/networking/device_drivers/qualcomm/rmnet.txt
deleted file mode 100644
index 6b341eaf2062..000000000000
--- a/Documentation/networking/device_drivers/qualcomm/rmnet.txt
+++ /dev/null
@@ -1,82 +0,0 @@
-1. Introduction
-
-rmnet driver is used for supporting the Multiplexing and aggregation
-Protocol (MAP). This protocol is used by all recent chipsets using Qualcomm
-Technologies, Inc. modems.
-
-This driver can be used to register onto any physical network device in
-IP mode. Physical transports include USB, HSIC, PCIe and IP accelerator.
-
-Multiplexing allows for creation of logical netdevices (rmnet devices) to
-handle multiple private data networks (PDN) like a default internet, tethering,
-multimedia messaging service (MMS) or IP media subsystem (IMS). Hardware sends
-packets with MAP headers to rmnet. Based on the multiplexer id, rmnet
-routes to the appropriate PDN after removing the MAP header.
-
-Aggregation is required to achieve high data rates. This involves hardware
-sending aggregated bunch of MAP frames. rmnet driver will de-aggregate
-these MAP frames and send them to appropriate PDN's.
-
-2. Packet format
-
-a. MAP packet (data / control)
-
-MAP header has the same endianness of the IP packet.
-
-Packet format -
-
-Bit             0             1           2-7      8 - 15           16 - 31
-Function   Command / Data   Reserved     Pad   Multiplexer ID    Payload length
-Bit            32 - x
-Function     Raw  Bytes
-
-Command (1)/ Data (0) bit value is to indicate if the packet is a MAP command
-or data packet. Control packet is used for transport level flow control. Data
-packets are standard IP packets.
-
-Reserved bits are usually zeroed out and to be ignored by receiver.
-
-Padding is number of bytes to be added for 4 byte alignment if required by
-hardware.
-
-Multiplexer ID is to indicate the PDN on which data has to be sent.
-
-Payload length includes the padding length but does not include MAP header
-length.
-
-b. MAP packet (command specific)
-
-Bit             0             1           2-7      8 - 15           16 - 31
-Function   Command         Reserved     Pad   Multiplexer ID    Payload length
-Bit          32 - 39        40 - 45    46 - 47       48 - 63
-Function   Command name    Reserved   Command Type   Reserved
-Bit          64 - 95
-Function   Transaction ID
-Bit          96 - 127
-Function   Command data
-
-Command 1 indicates disabling flow while 2 is enabling flow
-
-Command types -
-0 for MAP command request
-1 is to acknowledge the receipt of a command
-2 is for unsupported commands
-3 is for error during processing of commands
-
-c. Aggregation
-
-Aggregation is multiple MAP packets (can be data or command) delivered to
-rmnet in a single linear skb. rmnet will process the individual
-packets and either ACK the MAP command or deliver the IP packet to the
-network stack as needed
-
-MAP header|IP Packet|Optional padding|MAP header|IP Packet|Optional padding....
-MAP header|IP Packet|Optional padding|MAP header|Command Packet|Optional pad...
-
-3. Userspace configuration
-
-rmnet userspace configuration is done through netlink library librmnetctl
-and command line utility rmnetcli. Utility is hosted in codeaurora forum git.
-The driver uses rtnl_link_ops for communication.
-
-https://source.codeaurora.org/quic/la/platform/vendor/qcom-opensource/dataservices/tree/rmnetctl
diff --git a/MAINTAINERS b/MAINTAINERS
index 88e9e8430581..94afbf577a06 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -14031,7 +14031,7 @@ M:	Subash Abhinov Kasiviswanathan <subashab@codeaurora.org>
 M:	Sean Tranchetti <stranche@codeaurora.org>
 L:	netdev@vger.kernel.org
 S:	Maintained
-F:	Documentation/networking/device_drivers/qualcomm/rmnet.txt
+F:	Documentation/networking/device_drivers/qualcomm/rmnet.rst
 F:	drivers/net/ethernet/qualcomm/rmnet/
 F:	include/linux/if_rmnet.h
 
-- 
cgit v1.2.3-59-g8ed1b


From 21620b6586f7866aa76fc1a29c572b70fc12b1dc Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 1 May 2020 16:44:52 +0200
Subject: docs: networking: device drivers: convert sb1000.txt to ReST

- add SPDX header;
- add a document title;
- adjust titles and chapters, adding proper markups;
- mark code blocks and literals as such;
- mark lists as such;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/device_drivers/index.rst  |   1 +
 Documentation/networking/device_drivers/sb1000.rst | 222 +++++++++++++++++++++
 Documentation/networking/device_drivers/sb1000.txt | 207 -------------------
 drivers/net/Kconfig                                |   2 +-
 4 files changed, 224 insertions(+), 208 deletions(-)
 create mode 100644 Documentation/networking/device_drivers/sb1000.rst
 delete mode 100644 Documentation/networking/device_drivers/sb1000.txt

diff --git a/Documentation/networking/device_drivers/index.rst b/Documentation/networking/device_drivers/index.rst
index 66ed884548cc..77270d59943b 100644
--- a/Documentation/networking/device_drivers/index.rst
+++ b/Documentation/networking/device_drivers/index.rst
@@ -45,6 +45,7 @@ Contents:
    neterion/s2io
    neterion/vxge
    qualcomm/rmnet
+   sb1000
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/device_drivers/sb1000.rst b/Documentation/networking/device_drivers/sb1000.rst
new file mode 100644
index 000000000000..c8582ca4034d
--- /dev/null
+++ b/Documentation/networking/device_drivers/sb1000.rst
@@ -0,0 +1,222 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===================
+SB100 device driver
+===================
+
+sb1000 is a module network device driver for the General Instrument (also known
+as NextLevel) SURFboard1000 internal cable modem board.  This is an ISA card
+which is used by a number of cable TV companies to provide cable modem access.
+It's a one-way downstream-only cable modem, meaning that your upstream net link
+is provided by your regular phone modem.
+
+This driver was written by Franco Venturi <fventuri@mediaone.net>.  He deserves
+a great deal of thanks for this wonderful piece of code!
+
+Needed tools
+============
+
+Support for this device is now a part of the standard Linux kernel.  The
+driver source code file is drivers/net/sb1000.c.  In addition to this
+you will need:
+
+1. The "cmconfig" program.  This is a utility which supplements "ifconfig"
+   to configure the cable modem and network interface (usually called "cm0");
+
+2. Several PPP scripts which live in /etc/ppp to make connecting via your
+   cable modem easy.
+
+   These utilities can be obtained from:
+
+      http://www.jacksonville.net/~fventuri/
+
+   in Franco's original source code distribution .tar.gz file.  Support for
+   the sb1000 driver can be found at:
+
+      - http://web.archive.org/web/%2E/http://home.adelphia.net/~siglercm/sb1000.html
+      - http://web.archive.org/web/%2E/http://linuxpower.cx/~cable/
+
+   along with these utilities.
+
+3. The standard isapnp tools.  These are necessary to configure your SB1000
+   card at boot time (or afterwards by hand) since it's a PnP card.
+
+   If you don't have these installed as a standard part of your Linux
+   distribution, you can find them at:
+
+      http://www.roestock.demon.co.uk/isapnptools/
+
+   or check your Linux distribution binary CD or their web site.  For help with
+   isapnp, pnpdump, or /etc/isapnp.conf, go to:
+
+      http://www.roestock.demon.co.uk/isapnptools/isapnpfaq.html
+
+Using the driver
+================
+
+To make the SB1000 card work, follow these steps:
+
+1. Run ``make config``, or ``make menuconfig``, or ``make xconfig``, whichever
+   you prefer, in the top kernel tree directory to set up your kernel
+   configuration.  Make sure to say "Y" to "Prompt for development drivers"
+   and to say "M" to the sb1000 driver.  Also say "Y" or "M" to all the standard
+   networking questions to get TCP/IP and PPP networking support.
+
+2. **BEFORE** you build the kernel, edit drivers/net/sb1000.c.  Make sure
+   to redefine the value of READ_DATA_PORT to match the I/O address used
+   by isapnp to access your PnP cards.  This is the value of READPORT in
+   /etc/isapnp.conf or given by the output of pnpdump.
+
+3. Build and install the kernel and modules as usual.
+
+4. Boot your new kernel following the usual procedures.
+
+5. Set up to configure the new SB1000 PnP card by capturing the output
+   of "pnpdump" to a file and editing this file to set the correct I/O ports,
+   IRQ, and DMA settings for all your PnP cards.  Make sure none of the settings
+   conflict with one another.  Then test this configuration by running the
+   "isapnp" command with your new config file as the input.  Check for
+   errors and fix as necessary.  (As an aside, I use I/O ports 0x110 and
+   0x310 and IRQ 11 for my SB1000 card and these work well for me.  YMMV.)
+   Then save the finished config file as /etc/isapnp.conf for proper
+   configuration on subsequent reboots.
+
+6. Download the original file sb1000-1.1.2.tar.gz from Franco's site or one of
+   the others referenced above.  As root, unpack it into a temporary directory
+   and do a ``make cmconfig`` and then ``install -c cmconfig /usr/local/sbin``.
+   Don't do ``make install`` because it expects to find all the utilities built
+   and ready for installation, not just cmconfig.
+
+7. As root, copy all the files under the ppp/ subdirectory in Franco's
+   tar file into /etc/ppp, being careful not to overwrite any files that are
+   already in there.  Then modify ppp@gi-on to set the correct login name,
+   phone number, and frequency for the cable modem.  Also edit pap-secrets
+   to specify your login name and password and any site-specific information
+   you need.
+
+8. Be sure to modify /etc/ppp/firewall to use ipchains instead of
+   the older ipfwadm commands from the 2.0.x kernels.  There's a neat utility to
+   convert ipfwadm commands to ipchains commands:
+
+	http://users.dhp.com/~whisper/ipfwadm2ipchains/
+
+   You may also wish to modify the firewall script to implement a different
+   firewalling scheme.
+
+9. Start the PPP connection via the script /etc/ppp/ppp@gi-on.  You must be
+   root to do this.  It's better to use a utility like sudo to execute
+   frequently used commands like this with root permissions if possible.  If you
+   connect successfully the cable modem interface will come up and you'll see a
+   driver message like this at the console::
+
+	 cm0: sb1000 at (0x110,0x310), csn 1, S/N 0x2a0d16d8, IRQ 11.
+	 sb1000.c:v1.1.2 6/01/98 (fventuri@mediaone.net)
+
+   The "ifconfig" command should show two new interfaces, ppp0 and cm0.
+
+   The command "cmconfig cm0" will give you information about the cable modem
+   interface.
+
+10. Try pinging a site via ``ping -c 5 www.yahoo.com``, for example.  You should
+    see packets received.
+
+11. If you can't get site names (like www.yahoo.com) to resolve into
+    IP addresses (like 204.71.200.67), be sure your /etc/resolv.conf file
+    has no syntax errors and has the right nameserver IP addresses in it.
+    If this doesn't help, try something like ``ping -c 5 204.71.200.67`` to
+    see if the networking is running but the DNS resolution is where the
+    problem lies.
+
+12. If you still have problems, go to the support web sites mentioned above
+    and read the information and documentation there.
+
+Common problems
+===============
+
+1. Packets go out on the ppp0 interface but don't come back on the cm0
+   interface.  It looks like I'm connected but I can't even ping any
+   numerical IP addresses.  (This happens predominantly on Debian systems due
+   to a default boot-time configuration script.)
+
+Solution
+   As root ``echo 0 > /proc/sys/net/ipv4/conf/cm0/rp_filter`` so it
+   can share the same IP address as the ppp0 interface.  Note that this
+   command should probably be added to the /etc/ppp/cablemodem script
+   *right*between* the "/sbin/ifconfig" and "/sbin/cmconfig" commands.
+   You may need to do this to /proc/sys/net/ipv4/conf/ppp0/rp_filter as well.
+   If you do this to /proc/sys/net/ipv4/conf/default/rp_filter on each reboot
+   (in rc.local or some such) then any interfaces can share the same IP
+   addresses.
+
+2. I get "unresolved symbol" error messages on executing ``insmod sb1000.o``.
+
+Solution
+   You probably have a non-matching kernel source tree and
+   /usr/include/linux and /usr/include/asm header files.  Make sure you
+   install the correct versions of the header files in these two directories.
+   Then rebuild and reinstall the kernel.
+
+3. When isapnp runs it reports an error, and my SB1000 card isn't working.
+
+Solution
+   There's a problem with later versions of isapnp using the "(CHECK)"
+   option in the lines that allocate the two I/O addresses for the SB1000 card.
+   This first popped up on RH 6.0.  Delete "(CHECK)" for the SB1000 I/O addresses.
+   Make sure they don't conflict with any other pieces of hardware first!  Then
+   rerun isapnp and go from there.
+
+4. I can't execute the /etc/ppp/ppp@gi-on file.
+
+Solution
+   As root do ``chmod ug+x /etc/ppp/ppp@gi-on``.
+
+5. The firewall script isn't working (with 2.2.x and higher kernels).
+
+Solution
+   Use the ipfwadm2ipchains script referenced above to convert the
+   /etc/ppp/firewall script from the deprecated ipfwadm commands to ipchains.
+
+6. I'm getting *tons* of firewall deny messages in the /var/kern.log,
+   /var/messages, and/or /var/syslog files, and they're filling up my /var
+   partition!!!
+
+Solution
+   First, tell your ISP that you're receiving DoS (Denial of Service)
+   and/or portscanning (UDP connection attempts) attacks!  Look over the deny
+   messages to figure out what the attack is and where it's coming from.  Next,
+   edit /etc/ppp/cablemodem and make sure the ",nobroadcast" option is turned on
+   to the "cmconfig" command (uncomment that line).  If you're not receiving these
+   denied packets on your broadcast interface (IP address xxx.yyy.zzz.255
+   typically), then someone is attacking your machine in particular.  Be careful
+   out there....
+
+7. Everything seems to work fine but my computer locks up after a while
+   (and typically during a lengthy download through the cable modem)!
+
+Solution
+   You may need to add a short delay in the driver to 'slow down' the
+   SURFboard because your PC might not be able to keep up with the transfer rate
+   of the SB1000. To do this, it's probably best to download Franco's
+   sb1000-1.1.2.tar.gz archive and build and install sb1000.o manually.  You'll
+   want to edit the 'Makefile' and look for the 'SB1000_DELAY'
+   define.  Uncomment those 'CFLAGS' lines (and comment out the default ones)
+   and try setting the delay to something like 60 microseconds with:
+   '-DSB1000_DELAY=60'.  Then do ``make`` and as root ``make install`` and try
+   it out.  If it still doesn't work or you like playing with the driver, you may
+   try other numbers.  Remember though that the higher the delay, the slower the
+   driver (which slows down the rest of the PC too when it is actively
+   used). Thanks to Ed Daiga for this tip!
+
+Credits
+=======
+
+This README came from Franco Venturi's original README file which is
+still supplied with his driver .tar.gz archive.  I and all other sb1000 users
+owe Franco a tremendous "Thank you!"  Additional thanks goes to Carl Patten
+and Ralph Bonnell who are now managing the Linux SB1000 web site, and to
+the SB1000 users who reported and helped debug the common problems listed
+above.
+
+
+					Clemmitt Sigler
+					csigler@vt.edu
diff --git a/Documentation/networking/device_drivers/sb1000.txt b/Documentation/networking/device_drivers/sb1000.txt
deleted file mode 100644
index f92c2aac56a9..000000000000
--- a/Documentation/networking/device_drivers/sb1000.txt
+++ /dev/null
@@ -1,207 +0,0 @@
-sb1000 is a module network device driver for the General Instrument (also known
-as NextLevel) SURFboard1000 internal cable modem board.  This is an ISA card
-which is used by a number of cable TV companies to provide cable modem access.
-It's a one-way downstream-only cable modem, meaning that your upstream net link
-is provided by your regular phone modem.
-
-This driver was written by Franco Venturi <fventuri@mediaone.net>.  He deserves
-a great deal of thanks for this wonderful piece of code!
-
------------------------------------------------------------------------------
-
-Support for this device is now a part of the standard Linux kernel.  The
-driver source code file is drivers/net/sb1000.c.  In addition to this
-you will need:
-
-1.) The "cmconfig" program.  This is a utility which supplements "ifconfig"
-to configure the cable modem and network interface (usually called "cm0");
-and
-
-2.) Several PPP scripts which live in /etc/ppp to make connecting via your
-cable modem easy.
-
-   These utilities can be obtained from:
-
-      http://www.jacksonville.net/~fventuri/
-
-   in Franco's original source code distribution .tar.gz file.  Support for
-   the sb1000 driver can be found at:
-
-      http://web.archive.org/web/*/http://home.adelphia.net/~siglercm/sb1000.html
-      http://web.archive.org/web/*/http://linuxpower.cx/~cable/
-
-   along with these utilities.
-
-3.) The standard isapnp tools.  These are necessary to configure your SB1000
-card at boot time (or afterwards by hand) since it's a PnP card.
-
-   If you don't have these installed as a standard part of your Linux
-   distribution, you can find them at:
-
-      http://www.roestock.demon.co.uk/isapnptools/
-
-   or check your Linux distribution binary CD or their web site.  For help with
-   isapnp, pnpdump, or /etc/isapnp.conf, go to:
-
-      http://www.roestock.demon.co.uk/isapnptools/isapnpfaq.html
-
------------------------------------------------------------------------------
-
-To make the SB1000 card work, follow these steps:
-
-1.) Run `make config', or `make menuconfig', or `make xconfig', whichever
-you prefer, in the top kernel tree directory to set up your kernel
-configuration.  Make sure to say "Y" to "Prompt for development drivers"
-and to say "M" to the sb1000 driver.  Also say "Y" or "M" to all the standard
-networking questions to get TCP/IP and PPP networking support.
-
-2.) *BEFORE* you build the kernel, edit drivers/net/sb1000.c.  Make sure
-to redefine the value of READ_DATA_PORT to match the I/O address used
-by isapnp to access your PnP cards.  This is the value of READPORT in
-/etc/isapnp.conf or given by the output of pnpdump.
-
-3.) Build and install the kernel and modules as usual.
-
-4.) Boot your new kernel following the usual procedures.
-
-5.) Set up to configure the new SB1000 PnP card by capturing the output
-of "pnpdump" to a file and editing this file to set the correct I/O ports,
-IRQ, and DMA settings for all your PnP cards.  Make sure none of the settings
-conflict with one another.  Then test this configuration by running the
-"isapnp" command with your new config file as the input.  Check for
-errors and fix as necessary.  (As an aside, I use I/O ports 0x110 and
-0x310 and IRQ 11 for my SB1000 card and these work well for me.  YMMV.)
-Then save the finished config file as /etc/isapnp.conf for proper configuration
-on subsequent reboots.
-
-6.) Download the original file sb1000-1.1.2.tar.gz from Franco's site or one of
-the others referenced above.  As root, unpack it into a temporary directory and
-do a `make cmconfig' and then `install -c cmconfig /usr/local/sbin'.  Don't do
-`make install' because it expects to find all the utilities built and ready for
-installation, not just cmconfig.
-
-7.) As root, copy all the files under the ppp/ subdirectory in Franco's
-tar file into /etc/ppp, being careful not to overwrite any files that are
-already in there.  Then modify ppp@gi-on to set the correct login name,
-phone number, and frequency for the cable modem.  Also edit pap-secrets
-to specify your login name and password and any site-specific information
-you need.
-
-8.) Be sure to modify /etc/ppp/firewall to use ipchains instead of
-the older ipfwadm commands from the 2.0.x kernels.  There's a neat utility to
-convert ipfwadm commands to ipchains commands:
-
-   http://users.dhp.com/~whisper/ipfwadm2ipchains/
-
-You may also wish to modify the firewall script to implement a different
-firewalling scheme.
-
-9.) Start the PPP connection via the script /etc/ppp/ppp@gi-on.  You must be
-root to do this.  It's better to use a utility like sudo to execute
-frequently used commands like this with root permissions if possible.  If you
-connect successfully the cable modem interface will come up and you'll see a
-driver message like this at the console:
-
-         cm0: sb1000 at (0x110,0x310), csn 1, S/N 0x2a0d16d8, IRQ 11.
-         sb1000.c:v1.1.2 6/01/98 (fventuri@mediaone.net)
-
-The "ifconfig" command should show two new interfaces, ppp0 and cm0.
-The command "cmconfig cm0" will give you information about the cable modem
-interface.
-
-10.) Try pinging a site via `ping -c 5 www.yahoo.com', for example.  You should
-see packets received.
-
-11.) If you can't get site names (like www.yahoo.com) to resolve into
-IP addresses (like 204.71.200.67), be sure your /etc/resolv.conf file
-has no syntax errors and has the right nameserver IP addresses in it.
-If this doesn't help, try something like `ping -c 5 204.71.200.67' to
-see if the networking is running but the DNS resolution is where the
-problem lies.
-
-12.) If you still have problems, go to the support web sites mentioned above
-and read the information and documentation there.
-
------------------------------------------------------------------------------
-
-Common problems:
-
-1.) Packets go out on the ppp0 interface but don't come back on the cm0
-interface.  It looks like I'm connected but I can't even ping any
-numerical IP addresses.  (This happens predominantly on Debian systems due
-to a default boot-time configuration script.)
-
-Solution -- As root `echo 0 > /proc/sys/net/ipv4/conf/cm0/rp_filter' so it
-can share the same IP address as the ppp0 interface.  Note that this
-command should probably be added to the /etc/ppp/cablemodem script
-*right*between* the "/sbin/ifconfig" and "/sbin/cmconfig" commands.
-You may need to do this to /proc/sys/net/ipv4/conf/ppp0/rp_filter as well.
-If you do this to /proc/sys/net/ipv4/conf/default/rp_filter on each reboot
-(in rc.local or some such) then any interfaces can share the same IP
-addresses.
-
-2.) I get "unresolved symbol" error messages on executing `insmod sb1000.o'.
-
-Solution -- You probably have a non-matching kernel source tree and
-/usr/include/linux and /usr/include/asm header files.  Make sure you
-install the correct versions of the header files in these two directories.
-Then rebuild and reinstall the kernel.
-
-3.) When isapnp runs it reports an error, and my SB1000 card isn't working.
-
-Solution -- There's a problem with later versions of isapnp using the "(CHECK)"
-option in the lines that allocate the two I/O addresses for the SB1000 card.
-This first popped up on RH 6.0.  Delete "(CHECK)" for the SB1000 I/O addresses.
-Make sure they don't conflict with any other pieces of hardware first!  Then
-rerun isapnp and go from there.
-
-4.) I can't execute the /etc/ppp/ppp@gi-on file.
-
-Solution -- As root do `chmod ug+x /etc/ppp/ppp@gi-on'.
-
-5.) The firewall script isn't working (with 2.2.x and higher kernels).
-
-Solution -- Use the ipfwadm2ipchains script referenced above to convert the
-/etc/ppp/firewall script from the deprecated ipfwadm commands to ipchains.
-
-6.) I'm getting *tons* of firewall deny messages in the /var/kern.log,
-/var/messages, and/or /var/syslog files, and they're filling up my /var
-partition!!!
-
-Solution -- First, tell your ISP that you're receiving DoS (Denial of Service)
-and/or portscanning (UDP connection attempts) attacks!  Look over the deny
-messages to figure out what the attack is and where it's coming from.  Next,
-edit /etc/ppp/cablemodem and make sure the ",nobroadcast" option is turned on
-to the "cmconfig" command (uncomment that line).  If you're not receiving these
-denied packets on your broadcast interface (IP address xxx.yyy.zzz.255
-typically), then someone is attacking your machine in particular.  Be careful
-out there....
-
-7.) Everything seems to work fine but my computer locks up after a while
-(and typically during a lengthy download through the cable modem)!
-
-Solution -- You may need to add a short delay in the driver to 'slow down' the
-SURFboard because your PC might not be able to keep up with the transfer rate
-of the SB1000. To do this, it's probably best to download Franco's
-sb1000-1.1.2.tar.gz archive and build and install sb1000.o manually.  You'll
-want to edit the 'Makefile' and look for the 'SB1000_DELAY'
-define.  Uncomment those 'CFLAGS' lines (and comment out the default ones)
-and try setting the delay to something like 60 microseconds with:
-'-DSB1000_DELAY=60'.  Then do `make' and as root `make install' and try
-it out.  If it still doesn't work or you like playing with the driver, you may
-try other numbers.  Remember though that the higher the delay, the slower the
-driver (which slows down the rest of the PC too when it is actively
-used). Thanks to Ed Daiga for this tip!
-
------------------------------------------------------------------------------
-
-Credits:  This README came from Franco Venturi's original README file which is
-still supplied with his driver .tar.gz archive.  I and all other sb1000 users
-owe Franco a tremendous "Thank you!"  Additional thanks goes to Carl Patten
-and Ralph Bonnell who are now managing the Linux SB1000 web site, and to
-the SB1000 users who reported and helped debug the common problems listed
-above.
-
-
-					Clemmitt Sigler
-					csigler@vt.edu
diff --git a/drivers/net/Kconfig b/drivers/net/Kconfig
index 3f2c98a7906c..c7d310ef1c83 100644
--- a/drivers/net/Kconfig
+++ b/drivers/net/Kconfig
@@ -460,7 +460,7 @@ config NET_SB1000
 
 	  At present this driver only compiles as a module, so say M here if
 	  you have this card. The module will be called sb1000. Then read
-	  <file:Documentation/networking/device_drivers/sb1000.txt> for
+	  <file:Documentation/networking/device_drivers/sb1000.rst> for
 	  information on how to use this module, as it needs special ppp
 	  scripts for establishing a connection. Further documentation
 	  and the necessary scripts can be found at:
-- 
cgit v1.2.3-59-g8ed1b


From d6a998732b6d00b365c157a5f5ec321f3ac20cf0 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 1 May 2020 16:44:53 +0200
Subject: docs: networking: device drivers: convert smsc/smc9.txt to ReST

- add SPDX header;
- mark code blocks and literals as such;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/device_drivers/index.rst  |  1 +
 .../networking/device_drivers/smsc/smc9.rst        | 48 ++++++++++++++++++++++
 .../networking/device_drivers/smsc/smc9.txt        | 42 -------------------
 drivers/net/ethernet/smsc/Kconfig                  |  4 +-
 4 files changed, 51 insertions(+), 44 deletions(-)
 create mode 100644 Documentation/networking/device_drivers/smsc/smc9.rst
 delete mode 100644 Documentation/networking/device_drivers/smsc/smc9.txt

diff --git a/Documentation/networking/device_drivers/index.rst b/Documentation/networking/device_drivers/index.rst
index 77270d59943b..3479e6f576c3 100644
--- a/Documentation/networking/device_drivers/index.rst
+++ b/Documentation/networking/device_drivers/index.rst
@@ -46,6 +46,7 @@ Contents:
    neterion/vxge
    qualcomm/rmnet
    sb1000
+   smsc/smc9
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/device_drivers/smsc/smc9.rst b/Documentation/networking/device_drivers/smsc/smc9.rst
new file mode 100644
index 000000000000..e5eac896a631
--- /dev/null
+++ b/Documentation/networking/device_drivers/smsc/smc9.rst
@@ -0,0 +1,48 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+================
+SMC 9xxxx Driver
+================
+
+Revision 0.12
+
+3/5/96
+
+Copyright 1996  Erik Stahlman
+
+Released under terms of the GNU General Public License.
+
+This file contains the instructions and caveats for my SMC9xxx driver.  You
+should not be using the driver without reading this file.
+
+Things to note about installation:
+
+  1. The driver should work on all kernels from 1.2.13 until 1.3.71.
+     (A kernel patch is supplied for 1.3.71 )
+
+  2. If you include this into the kernel, you might need to change some
+     options, such as for forcing IRQ.
+
+
+  3.  To compile as a module, run 'make'.
+      Make will give you the appropriate options for various kernel support.
+
+  4.  Loading the driver as a module::
+
+	use:   insmod smc9194.o
+	optional parameters:
+		io=xxxx    : your base address
+		irq=xx	   : your irq
+		ifport=x   :	0 for whatever is default
+				1 for twisted pair
+				2 for AUI  ( or BNC on some cards )
+
+How to obtain the latest version?
+
+FTP:
+	ftp://fenris.campus.vt.edu/smc9/smc9-12.tar.gz
+	ftp://sfbox.vt.edu/filebox/F/fenris/smc9/smc9-12.tar.gz
+
+
+Contacting me:
+    erik@mail.vt.edu
diff --git a/Documentation/networking/device_drivers/smsc/smc9.txt b/Documentation/networking/device_drivers/smsc/smc9.txt
deleted file mode 100644
index d1e15074e43d..000000000000
--- a/Documentation/networking/device_drivers/smsc/smc9.txt
+++ /dev/null
@@ -1,42 +0,0 @@
-
-SMC 9xxxx Driver 
-Revision 0.12 
-3/5/96
-Copyright 1996  Erik Stahlman 
-Released under terms of the GNU General Public License. 
-
-This file contains the instructions and caveats for my SMC9xxx driver.  You
-should not be using the driver without reading this file.  
-
-Things to note about installation:
-
-  1. The driver should work on all kernels from 1.2.13 until 1.3.71.
-	(A kernel patch is supplied for 1.3.71 )
-
-  2. If you include this into the kernel, you might need to change some
-	options, such as for forcing IRQ.   
-
- 
-  3.  To compile as a module, run 'make' .   
-	Make will give you the appropriate options for various kernel support.
- 
-  4.  Loading the driver as a module :
-
-	use:   insmod smc9194.o 
-	optional parameters:
-		io=xxxx    : your base address
-		irq=xx	   : your irq 
-		ifport=x   :	0 for whatever is default
-				1 for twisted pair
-				2 for AUI  ( or BNC on some cards )
-
-How to obtain the latest version? 
-	
-FTP:  
-	ftp://fenris.campus.vt.edu/smc9/smc9-12.tar.gz
-	ftp://sfbox.vt.edu/filebox/F/fenris/smc9/smc9-12.tar.gz 
-   
-
-Contacting me:
-    erik@mail.vt.edu
- 
diff --git a/drivers/net/ethernet/smsc/Kconfig b/drivers/net/ethernet/smsc/Kconfig
index 9e1c3752b200..4d2d91ec8b41 100644
--- a/drivers/net/ethernet/smsc/Kconfig
+++ b/drivers/net/ethernet/smsc/Kconfig
@@ -28,7 +28,7 @@ config SMC9194
 	  option if you have a DELL laptop with the docking station, or
 	  another SMC9192/9194 based chipset.  Say Y if you want it compiled
 	  into the kernel, and read the file
-	  <file:Documentation/networking/device_drivers/smsc/smc9.txt>.
+	  <file:Documentation/networking/device_drivers/smsc/smc9.rst>.
 
 	  To compile this driver as a module, choose M here. The module
 	  will be called smc9194.
@@ -44,7 +44,7 @@ config SMC91X
 	  This is a driver for SMC's 91x series of Ethernet chipsets,
 	  including the SMC91C94 and the SMC91C111. Say Y if you want it
 	  compiled into the kernel, and read the file
-	  <file:Documentation/networking/device_drivers/smsc/smc9.txt>.
+	  <file:Documentation/networking/device_drivers/smsc/smc9.rst>.
 
 	  This driver is also available as a module ( = code which can be
 	  inserted in and removed from the running kernel whenever you want).
-- 
cgit v1.2.3-59-g8ed1b


From 8470f18ea651950094b4d3eba513be194e9141f8 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 1 May 2020 16:44:54 +0200
Subject: docs: networking: device drivers: convert ti/cpsw_switchdev.txt to
 ReST

- add SPDX header;
- adjust titles and chapters, adding proper markups;
- use :field: markup;
- mark code blocks and literals as such;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/device_drivers/index.rst  |   1 +
 .../device_drivers/ti/cpsw_switchdev.rst           | 242 +++++++++++++++++++++
 .../device_drivers/ti/cpsw_switchdev.txt           | 209 ------------------
 3 files changed, 243 insertions(+), 209 deletions(-)
 create mode 100644 Documentation/networking/device_drivers/ti/cpsw_switchdev.rst
 delete mode 100644 Documentation/networking/device_drivers/ti/cpsw_switchdev.txt

diff --git a/Documentation/networking/device_drivers/index.rst b/Documentation/networking/device_drivers/index.rst
index 3479e6f576c3..b3c0c473de2b 100644
--- a/Documentation/networking/device_drivers/index.rst
+++ b/Documentation/networking/device_drivers/index.rst
@@ -47,6 +47,7 @@ Contents:
    qualcomm/rmnet
    sb1000
    smsc/smc9
+   ti/cpsw_switchdev
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/device_drivers/ti/cpsw_switchdev.rst b/Documentation/networking/device_drivers/ti/cpsw_switchdev.rst
new file mode 100644
index 000000000000..1241ecac73bd
--- /dev/null
+++ b/Documentation/networking/device_drivers/ti/cpsw_switchdev.rst
@@ -0,0 +1,242 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================================================
+Texas Instruments CPSW switchdev based ethernet driver
+======================================================
+
+:Version: 2.0
+
+Port renaming
+=============
+
+On older udev versions renaming of ethX to swXpY will not be automatically
+supported
+
+In order to rename via udev::
+
+    ip -d link show dev sw0p1 | grep switchid
+
+    SUBSYSTEM=="net", ACTION=="add", ATTR{phys_switch_id}==<switchid>, \
+	    ATTR{phys_port_name}!="", NAME="sw0$attr{phys_port_name}"
+
+
+Dual mac mode
+=============
+
+- The new (cpsw_new.c) driver is operating in dual-emac mode by default, thus
+  working as 2 individual network interfaces. Main differences from legacy CPSW
+  driver are:
+
+ - optimized promiscuous mode: The P0_UNI_FLOOD (both ports) is enabled in
+   addition to ALLMULTI (current port) instead of ALE_BYPASS.
+   So, Ports in promiscuous mode will keep possibility of mcast and vlan
+   filtering, which is provides significant benefits when ports are joined
+   to the same bridge, but without enabling "switch" mode, or to different
+   bridges.
+ - learning disabled on ports as it make not too much sense for
+   segregated ports - no forwarding in HW.
+ - enabled basic support for devlink.
+
+   ::
+
+	devlink dev show
+		platform/48484000.switch
+
+	devlink dev param show
+	platform/48484000.switch:
+	name switch_mode type driver-specific
+	values:
+		cmode runtime value false
+	name ale_bypass type driver-specific
+	values:
+		cmode runtime value false
+
+Devlink configuration parameters
+================================
+
+See Documentation/networking/devlink/ti-cpsw-switch.rst
+
+Bridging in dual mac mode
+=========================
+
+The dual_mac mode requires two vids to be reserved for internal purposes,
+which, by default, equal CPSW Port numbers. As result, bridge has to be
+configured in vlan unaware mode or default_pvid has to be adjusted::
+
+	ip link add name br0 type bridge
+	ip link set dev br0 type bridge vlan_filtering 0
+	echo 0 > /sys/class/net/br0/bridge/default_pvid
+	ip link set dev sw0p1 master br0
+	ip link set dev sw0p2 master br0
+
+or::
+
+	ip link add name br0 type bridge
+	ip link set dev br0 type bridge vlan_filtering 0
+	echo 100 > /sys/class/net/br0/bridge/default_pvid
+	ip link set dev br0 type bridge vlan_filtering 1
+	ip link set dev sw0p1 master br0
+	ip link set dev sw0p2 master br0
+
+Enabling "switch"
+=================
+
+The Switch mode can be enabled by configuring devlink driver parameter
+"switch_mode" to 1/true::
+
+	devlink dev param set platform/48484000.switch \
+	name switch_mode value 1 cmode runtime
+
+This can be done regardless of the state of Port's netdev devices - UP/DOWN, but
+Port's netdev devices have to be in UP before joining to the bridge to avoid
+overwriting of bridge configuration as CPSW switch driver copletly reloads its
+configuration when first Port changes its state to UP.
+
+When the both interfaces joined the bridge - CPSW switch driver will enable
+marking packets with offload_fwd_mark flag unless "ale_bypass=0"
+
+All configuration is implemented via switchdev API.
+
+Bridge setup
+============
+
+::
+
+	devlink dev param set platform/48484000.switch \
+	name switch_mode value 1 cmode runtime
+
+	ip link add name br0 type bridge
+	ip link set dev br0 type bridge ageing_time 1000
+	ip link set dev sw0p1 up
+	ip link set dev sw0p2 up
+	ip link set dev sw0p1 master br0
+	ip link set dev sw0p2 master br0
+
+	[*] bridge vlan add dev br0 vid 1 pvid untagged self
+
+	[*] if vlan_filtering=1. where default_pvid=1
+
+	Note. Steps [*] are mandatory.
+
+
+On/off STP
+==========
+
+::
+
+	ip link set dev BRDEV type bridge stp_state 1/0
+
+VLAN configuration
+==================
+
+::
+
+  bridge vlan add dev br0 vid 1 pvid untagged self <---- add cpu port to VLAN 1
+
+Note. This step is mandatory for bridge/default_pvid.
+
+Add extra VLANs
+===============
+
+ 1. untagged::
+
+	bridge vlan add dev sw0p1 vid 100 pvid untagged master
+	bridge vlan add dev sw0p2 vid 100 pvid untagged master
+	bridge vlan add dev br0 vid 100 pvid untagged self <---- Add cpu port to VLAN100
+
+ 2. tagged::
+
+	bridge vlan add dev sw0p1 vid 100 master
+	bridge vlan add dev sw0p2 vid 100 master
+	bridge vlan add dev br0 vid 100 pvid tagged self <---- Add cpu port to VLAN100
+
+FDBs
+----
+
+FDBs are automatically added on the appropriate switch port upon detection
+
+Manually adding FDBs::
+
+    bridge fdb add aa:bb:cc:dd:ee:ff dev sw0p1 master vlan 100
+    bridge fdb add aa:bb:cc:dd:ee:fe dev sw0p2 master <---- Add on all VLANs
+
+MDBs
+----
+
+MDBs are automatically added on the appropriate switch port upon detection
+
+Manually adding MDBs::
+
+  bridge mdb add dev br0 port sw0p1 grp 239.1.1.1 permanent vid 100
+  bridge mdb add dev br0 port sw0p1 grp 239.1.1.1 permanent <---- Add on all VLANs
+
+Multicast flooding
+==================
+CPU port mcast_flooding is always on
+
+Turning flooding on/off on swithch ports:
+bridge link set dev sw0p1 mcast_flood on/off
+
+Access and Trunk port
+=====================
+
+::
+
+ bridge vlan add dev sw0p1 vid 100 pvid untagged master
+ bridge vlan add dev sw0p2 vid 100 master
+
+
+ bridge vlan add dev br0 vid 100 self
+ ip link add link br0 name br0.100 type vlan id 100
+
+Note. Setting PVID on Bridge device itself working only for
+default VLAN (default_pvid).
+
+NFS
+===
+
+The only way for NFS to work is by chrooting to a minimal environment when
+switch configuration that will affect connectivity is needed.
+Assuming you are booting NFS with eth1 interface(the script is hacky and
+it's just there to prove NFS is doable).
+
+setup.sh::
+
+	#!/bin/sh
+	mkdir proc
+	mount -t proc none /proc
+	ifconfig br0  > /dev/null
+	if [ $? -ne 0 ]; then
+		echo "Setting up bridge"
+		ip link add name br0 type bridge
+		ip link set dev br0 type bridge ageing_time 1000
+		ip link set dev br0 type bridge vlan_filtering 1
+
+		ip link set eth1 down
+		ip link set eth1 name sw0p1
+		ip link set dev sw0p1 up
+		ip link set dev sw0p2 up
+		ip link set dev sw0p2 master br0
+		ip link set dev sw0p1 master br0
+		bridge vlan add dev br0 vid 1 pvid untagged self
+		ifconfig sw0p1 0.0.0.0
+		udhchc -i br0
+	fi
+	umount /proc
+
+run_nfs.sh:::
+
+	#!/bin/sh
+	mkdir /tmp/root/bin -p
+	mkdir /tmp/root/lib -p
+
+	cp -r /lib/ /tmp/root/
+	cp -r /bin/ /tmp/root/
+	cp /sbin/ip /tmp/root/bin
+	cp /sbin/bridge /tmp/root/bin
+	cp /sbin/ifconfig /tmp/root/bin
+	cp /sbin/udhcpc /tmp/root/bin
+	cp /path/to/setup.sh /tmp/root/bin
+	chroot /tmp/root/ busybox sh /bin/setup.sh
+
+	run ./run_nfs.sh
diff --git a/Documentation/networking/device_drivers/ti/cpsw_switchdev.txt b/Documentation/networking/device_drivers/ti/cpsw_switchdev.txt
deleted file mode 100644
index 12855ab268b8..000000000000
--- a/Documentation/networking/device_drivers/ti/cpsw_switchdev.txt
+++ /dev/null
@@ -1,209 +0,0 @@
-* Texas Instruments CPSW switchdev based ethernet driver 2.0
-
-- Port renaming
-On older udev versions renaming of ethX to swXpY will not be automatically
-supported
-In order to rename via udev:
-ip -d link show dev sw0p1 | grep switchid
-
-SUBSYSTEM=="net", ACTION=="add", ATTR{phys_switch_id}==<switchid>, \
-        ATTR{phys_port_name}!="", NAME="sw0$attr{phys_port_name}"
-
-
-====================
-# Dual mac mode
-====================
-- The new (cpsw_new.c) driver is operating in dual-emac mode by default, thus
-working as 2 individual network interfaces. Main differences from legacy CPSW
-driver are:
- - optimized promiscuous mode: The P0_UNI_FLOOD (both ports) is enabled in
-addition to ALLMULTI (current port) instead of ALE_BYPASS.
-So, Ports in promiscuous mode will keep possibility of mcast and vlan filtering,
-which is provides significant benefits when ports are joined to the same bridge,
-but without enabling "switch" mode, or to different bridges.
- - learning disabled on ports as it make not too much sense for
-   segregated ports - no forwarding in HW.
- - enabled basic support for devlink.
-
-	devlink dev show
-		platform/48484000.switch
-
-	devlink dev param show
-	platform/48484000.switch:
-	name switch_mode type driver-specific
-	values:
-		cmode runtime value false
-	name ale_bypass type driver-specific
-	values:
-		cmode runtime value false
-
-Devlink configuration parameters
-====================
-See Documentation/networking/devlink/ti-cpsw-switch.rst
-
-====================
-# Bridging in dual mac mode
-====================
-The dual_mac mode requires two vids to be reserved for internal purposes,
-which, by default, equal CPSW Port numbers. As result, bridge has to be
-configured in vlan unaware mode or default_pvid has to be adjusted.
-
-	ip link add name br0 type bridge
-	ip link set dev br0 type bridge vlan_filtering 0
-	echo 0 > /sys/class/net/br0/bridge/default_pvid
-	ip link set dev sw0p1 master br0
-	ip link set dev sw0p2 master br0
- - or -
-	ip link add name br0 type bridge
-	ip link set dev br0 type bridge vlan_filtering 0
-	echo 100 > /sys/class/net/br0/bridge/default_pvid
-	ip link set dev br0 type bridge vlan_filtering 1
-	ip link set dev sw0p1 master br0
-	ip link set dev sw0p2 master br0
-
-====================
-# Enabling "switch"
-====================
-The Switch mode can be enabled by configuring devlink driver parameter
-"switch_mode" to 1/true:
-	devlink dev param set platform/48484000.switch \
-	name switch_mode value 1 cmode runtime
-
-This can be done regardless of the state of Port's netdev devices - UP/DOWN, but
-Port's netdev devices have to be in UP before joining to the bridge to avoid
-overwriting of bridge configuration as CPSW switch driver copletly reloads its
-configuration when first Port changes its state to UP.
-
-When the both interfaces joined the bridge - CPSW switch driver will enable
-marking packets with offload_fwd_mark flag unless "ale_bypass=0"
-
-All configuration is implemented via switchdev API.
-
-====================
-# Bridge setup
-====================
-	devlink dev param set platform/48484000.switch \
-	name switch_mode value 1 cmode runtime
-
-	ip link add name br0 type bridge
-	ip link set dev br0 type bridge ageing_time 1000
-	ip link set dev sw0p1 up
-	ip link set dev sw0p2 up
-	ip link set dev sw0p1 master br0
-	ip link set dev sw0p2 master br0
-	[*] bridge vlan add dev br0 vid 1 pvid untagged self
-
-[*] if vlan_filtering=1. where default_pvid=1
-
-=================
-# On/off STP
-=================
-ip link set dev BRDEV type bridge stp_state 1/0
-
-Note. Steps [*] are mandatory.
-
-====================
-# VLAN configuration
-====================
-bridge vlan add dev br0 vid 1 pvid untagged self <---- add cpu port to VLAN 1
-
-Note. This step is mandatory for bridge/default_pvid.
-
-=================
-# Add extra VLANs
-=================
- 1. untagged:
-    bridge vlan add dev sw0p1 vid 100 pvid untagged master
-    bridge vlan add dev sw0p2 vid 100 pvid untagged master
-    bridge vlan add dev br0 vid 100 pvid untagged self <---- Add cpu port to VLAN100
-
- 2. tagged:
-    bridge vlan add dev sw0p1 vid 100 master
-    bridge vlan add dev sw0p2 vid 100 master
-    bridge vlan add dev br0 vid 100 pvid tagged self <---- Add cpu port to VLAN100
-
-====
-FDBs
-====
-FDBs are automatically added on the appropriate switch port upon detection
-
-Manually adding FDBs:
-bridge fdb add aa:bb:cc:dd:ee:ff dev sw0p1 master vlan 100
-bridge fdb add aa:bb:cc:dd:ee:fe dev sw0p2 master <---- Add on all VLANs
-
-====
-MDBs
-====
-MDBs are automatically added on the appropriate switch port upon detection
-
-Manually adding MDBs:
-bridge mdb add dev br0 port sw0p1 grp 239.1.1.1 permanent vid 100
-bridge mdb add dev br0 port sw0p1 grp 239.1.1.1 permanent <---- Add on all VLANs
-
-==================
-Multicast flooding
-==================
-CPU port mcast_flooding is always on
-
-Turning flooding on/off on swithch ports:
-bridge link set dev sw0p1 mcast_flood on/off
-
-==================
-Access and Trunk port
-==================
- bridge vlan add dev sw0p1 vid 100 pvid untagged master
- bridge vlan add dev sw0p2 vid 100 master
-
-
- bridge vlan add dev br0 vid 100 self
- ip link add link br0 name br0.100 type vlan id 100
-
- Note. Setting PVID on Bridge device itself working only for
- default VLAN (default_pvid).
-
-=====================
- NFS
-=====================
-The only way for NFS to work is by chrooting to a minimal environment when
-switch configuration that will affect connectivity is needed.
-Assuming you are booting NFS with eth1 interface(the script is hacky and
-it's just there to prove NFS is doable).
-
-setup.sh:
-#!/bin/sh
-mkdir proc
-mount -t proc none /proc
-ifconfig br0  > /dev/null
-if [ $? -ne 0 ]; then
-        echo "Setting up bridge"
-        ip link add name br0 type bridge
-        ip link set dev br0 type bridge ageing_time 1000
-        ip link set dev br0 type bridge vlan_filtering 1
-
-        ip link set eth1 down
-        ip link set eth1 name sw0p1
-        ip link set dev sw0p1 up
-        ip link set dev sw0p2 up
-        ip link set dev sw0p2 master br0
-        ip link set dev sw0p1 master br0
-        bridge vlan add dev br0 vid 1 pvid untagged self
-        ifconfig sw0p1 0.0.0.0
-        udhchc -i br0
-fi
-umount /proc
-
-run_nfs.sh:
-#!/bin/sh
-mkdir /tmp/root/bin -p
-mkdir /tmp/root/lib -p
-
-cp -r /lib/ /tmp/root/
-cp -r /bin/ /tmp/root/
-cp /sbin/ip /tmp/root/bin
-cp /sbin/bridge /tmp/root/bin
-cp /sbin/ifconfig /tmp/root/bin
-cp /sbin/udhcpc /tmp/root/bin
-cp /path/to/setup.sh /tmp/root/bin
-chroot /tmp/root/ busybox sh /bin/setup.sh
-
-run ./run_nfs.sh
-- 
cgit v1.2.3-59-g8ed1b


From 6f22789da76d1183ed6fbb168ac4f99a5ee540a3 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 1 May 2020 16:44:55 +0200
Subject: docs: networking: device drivers: convert ti/cpsw.txt to ReST

- add SPDX header;
- adjust titles and chapters, adding proper markups;
- mark code blocks and literals as such;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/device_drivers/index.rst  |   1 +
 .../networking/device_drivers/ti/cpsw.rst          | 587 +++++++++++++++++++++
 .../networking/device_drivers/ti/cpsw.txt          | 541 -------------------
 3 files changed, 588 insertions(+), 541 deletions(-)
 create mode 100644 Documentation/networking/device_drivers/ti/cpsw.rst
 delete mode 100644 Documentation/networking/device_drivers/ti/cpsw.txt

diff --git a/Documentation/networking/device_drivers/index.rst b/Documentation/networking/device_drivers/index.rst
index b3c0c473de2b..1d3b664e6921 100644
--- a/Documentation/networking/device_drivers/index.rst
+++ b/Documentation/networking/device_drivers/index.rst
@@ -48,6 +48,7 @@ Contents:
    sb1000
    smsc/smc9
    ti/cpsw_switchdev
+   ti/cpsw
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/device_drivers/ti/cpsw.rst b/Documentation/networking/device_drivers/ti/cpsw.rst
new file mode 100644
index 000000000000..a88946bd188b
--- /dev/null
+++ b/Documentation/networking/device_drivers/ti/cpsw.rst
@@ -0,0 +1,587 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+======================================
+Texas Instruments CPSW ethernet driver
+======================================
+
+Multiqueue & CBS & MQPRIO
+=========================
+
+
+The cpsw has 3 CBS shapers for each external ports. This document
+describes MQPRIO and CBS Qdisc offload configuration for cpsw driver
+based on examples. It potentially can be used in audio video bridging
+(AVB) and time sensitive networking (TSN).
+
+The following examples were tested on AM572x EVM and BBB boards.
+
+Test setup
+==========
+
+Under consideration two examples with AM572x EVM running cpsw driver
+in dual_emac mode.
+
+Several prerequisites:
+
+- TX queues must be rated starting from txq0 that has highest priority
+- Traffic classes are used starting from 0, that has highest priority
+- CBS shapers should be used with rated queues
+- The bandwidth for CBS shapers has to be set a little bit more then
+  potential incoming rate, thus, rate of all incoming tx queues has
+  to be a little less
+- Real rates can differ, due to discreetness
+- Map skb-priority to txq is not enough, also skb-priority to l2 prio
+  map has to be created with ip or vconfig tool
+- Any l2/socket prio (0 - 7) for classes can be used, but for
+  simplicity default values are used: 3 and 2
+- only 2 classes tested: A and B, but checked and can work with more,
+  maximum allowed 4, but only for 3 rate can be set.
+
+Test setup for examples
+=======================
+
+::
+
+					+-------------------------------+
+					|--+                            |
+					|  |      Workstation0          |
+					|E |  MAC 18:03:73:66:87:42     |
+    +-----------------------------+  +--|t |                            |
+    |                    | 1  | E |  |  |h |./tsn_listener -d \         |
+    |  Target board:     | 0  | t |--+  |0 | 18:03:73:66:87:42 -i eth0 \|
+    |  AM572x EVM        | 0  | h |     |  | -s 1500                    |
+    |                    | 0  | 0 |     |--+                            |
+    |  Only 2 classes:   |Mb  +---|     +-------------------------------+
+    |  class A, class B  |        |
+    |                    |    +---|     +-------------------------------+
+    |                    | 1  | E |     |--+                            |
+    |                    | 0  | t |     |  |      Workstation1          |
+    |                    | 0  | h |--+  |E |  MAC 20:cf:30:85:7d:fd     |
+    |                    |Mb  | 1 |  +--|t |                            |
+    +-----------------------------+     |h |./tsn_listener -d \         |
+					|0 | 20:cf:30:85:7d:fd -i eth0 \|
+					|  | -s 1500                    |
+					|--+                            |
+					+-------------------------------+
+
+
+Example 1: One port tx AVB configuration scheme for target board
+----------------------------------------------------------------
+
+(prints and scheme for AM572x evm, applicable for single port boards)
+
+- tc - traffic class
+- txq - transmit queue
+- p - priority
+- f - fifo (cpsw fifo)
+- S - shaper configured
+
+::
+
+    +------------------------------------------------------------------+ u
+    | +---------------+  +---------------+  +------+ +------+          | s
+    | |               |  |               |  |      | |      |          | e
+    | | App 1         |  | App 2         |  | Apps | | Apps |          | r
+    | | Class A       |  | Class B       |  | Rest | | Rest |          |
+    | | Eth0          |  | Eth0          |  | Eth0 | | Eth1 |          | s
+    | | VLAN100       |  | VLAN100       |  |   |  | |   |  |          | p
+    | | 40 Mb/s       |  | 20 Mb/s       |  |   |  | |   |  |          | a
+    | | SO_PRIORITY=3 |  | SO_PRIORITY=2 |  |   |  | |   |  |          | c
+    | |   |           |  |   |           |  |   |  | |   |  |          | e
+    | +---|-----------+  +---|-----------+  +---|--+ +---|--+          |
+    +-----|------------------|------------------|--------|-------------+
+	+-+     +------------+                  |        |
+	|       |             +-----------------+     +--+
+	|       |             |                       |
+    +---|-------|-------------|-----------------------|----------------+
+    | +----+ +----+ +----+ +----+                   +----+             |
+    | | p3 | | p2 | | p1 | | p0 |                   | p0 |             | k
+    | \    / \    / \    / \    /                   \    /             | e
+    |  \  /   \  /   \  /   \  /                     \  /              | r
+    |   \/     \/     \/     \/                       \/               | n
+    |    |     |             |                        |                | e
+    |    |     |       +-----+                        |                | l
+    |    |     |       |                              |                |
+    | +----+ +----+ +----+                          +----+             | s
+    | |tc0 | |tc1 | |tc2 |                          |tc0 |             | p
+    | \    / \    / \    /                          \    /             | a
+    |  \  /   \  /   \  /                            \  /              | c
+    |   \/     \/     \/                              \/               | e
+    |   |      |       +-----+                        |                |
+    |   |      |       |     |                        |                |
+    |   |      |       |     |                        |                |
+    |   |      |       |     |                        |                |
+    | +----+ +----+ +----+ +----+                   +----+             |
+    | |txq0| |txq1| |txq2| |txq3|                   |txq4|             |
+    | \    / \    / \    / \    /                   \    /             |
+    |  \  /   \  /   \  /   \  /                     \  /              |
+    |   \/     \/     \/     \/                       \/               |
+    | +-|------|------|------|--+                  +--|--------------+ |
+    | | |      |      |      |  | Eth0.100         |  |     Eth1     | |
+    +---|------|------|------|------------------------|----------------+
+	|      |      |      |                        |
+	p      p      p      p                        |
+	3      2      0-1, 4-7  <- L2 priority        |
+	|      |      |      |                        |
+	|      |      |      |                        |
+    +---|------|------|------|------------------------|----------------+
+    |   |      |      |      |             |----------+                |
+    | +----+ +----+ +----+ +----+       +----+                         |
+    | |dma7| |dma6| |dma5| |dma4|       |dma3|                         |
+    | \    / \    / \    / \    /       \    /                         | c
+    |  \S /   \S /   \  /   \  /         \  /                          | p
+    |   \/     \/     \/     \/           \/                           | s
+    |   |      |      | +-----            |                            | w
+    |   |      |      | |                 |                            |
+    |   |      |      | |                 |                            | d
+    | +----+ +----+ +----+p            p+----+                         | r
+    | |    | |    | |    |o            o|    |                         | i
+    | | f3 | | f2 | | f0 |r            r| f0 |                         | v
+    | |tc0 | |tc1 | |tc2 |t            t|tc0 |                         | e
+    | \CBS / \CBS / \CBS /1            2\CBS /                         | r
+    |  \S /   \S /   \  /                \  /                          |
+    |   \/     \/     \/                  \/                           |
+    +------------------------------------------------------------------+
+
+
+1) ::
+
+
+	// Add 4 tx queues, for interface Eth0, and 1 tx queue for Eth1
+	$ ethtool -L eth0 rx 1 tx 5
+	rx unmodified, ignoring
+
+2) ::
+
+	// Check if num of queues is set correctly:
+	$ ethtool -l eth0
+	Channel parameters for eth0:
+	Pre-set maximums:
+	RX:             8
+	TX:             8
+	Other:          0
+	Combined:       0
+	Current hardware settings:
+	RX:             1
+	TX:             5
+	Other:          0
+	Combined:       0
+
+3) ::
+
+	// TX queues must be rated starting from 0, so set bws for tx0 and tx1
+	// Set rates 40 and 20 Mb/s appropriately.
+	// Pay attention, real speed can differ a bit due to discreetness.
+	// Leave last 2 tx queues not rated.
+	$ echo 40 > /sys/class/net/eth0/queues/tx-0/tx_maxrate
+	$ echo 20 > /sys/class/net/eth0/queues/tx-1/tx_maxrate
+
+4) ::
+
+	// Check maximum rate of tx (cpdma) queues:
+	$ cat /sys/class/net/eth0/queues/tx-*/tx_maxrate
+	40
+	20
+	0
+	0
+	0
+
+5) ::
+
+	// Map skb->priority to traffic class:
+	// 3pri -> tc0, 2pri -> tc1, (0,1,4-7)pri -> tc2
+	// Map traffic class to transmit queue:
+	// tc0 -> txq0, tc1 -> txq1, tc2 -> (txq2, txq3)
+	$ tc qdisc replace dev eth0 handle 100: parent root mqprio num_tc 3 \
+	map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@0 1@1 2@2 hw 1
+
+5a) ::
+
+	// As two interface sharing same set of tx queues, assign all traffic
+	// coming to interface Eth1 to separate queue in order to not mix it
+	// with traffic from interface Eth0, so use separate txq to send
+	// packets to Eth1, so all prio -> tc0 and tc0 -> txq4
+	// Here hw 0, so here still default configuration for eth1 in hw
+	$ tc qdisc replace dev eth1 handle 100: parent root mqprio num_tc 1 \
+	map 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 queues 1@4 hw 0
+
+6) ::
+
+	// Check classes settings
+	$ tc -g class show dev eth0
+	+---(100:ffe2) mqprio
+	|    +---(100:3) mqprio
+	|    +---(100:4) mqprio
+	|
+	+---(100:ffe1) mqprio
+	|    +---(100:2) mqprio
+	|
+	+---(100:ffe0) mqprio
+	    +---(100:1) mqprio
+
+	$ tc -g class show dev eth1
+	+---(100:ffe0) mqprio
+	    +---(100:5) mqprio
+
+7) ::
+
+	// Set rate for class A - 41 Mbit (tc0, txq0) using CBS Qdisc
+	// Set it +1 Mb for reserve (important!)
+	// here only idle slope is important, others arg are ignored
+	// Pay attention, real speed can differ a bit due to discreetness
+	$ tc qdisc add dev eth0 parent 100:1 cbs locredit -1438 \
+	hicredit 62 sendslope -959000 idleslope 41000 offload 1
+	net eth0: set FIFO3 bw = 50
+
+8) ::
+
+	// Set rate for class B - 21 Mbit (tc1, txq1) using CBS Qdisc:
+	// Set it +1 Mb for reserve (important!)
+	$ tc qdisc add dev eth0 parent 100:2 cbs locredit -1468 \
+	hicredit 65 sendslope -979000 idleslope 21000 offload 1
+	net eth0: set FIFO2 bw = 30
+
+9) ::
+
+	// Create vlan 100 to map sk->priority to vlan qos
+	$ ip link add link eth0 name eth0.100 type vlan id 100
+	8021q: 802.1Q VLAN Support v1.8
+	8021q: adding VLAN 0 to HW filter on device eth0
+	8021q: adding VLAN 0 to HW filter on device eth1
+	net eth0: Adding vlanid 100 to vlan filter
+
+10) ::
+
+	// Map skb->priority to L2 prio, 1 to 1
+	$ ip link set eth0.100 type vlan \
+	egress 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+
+11) ::
+
+	// Check egress map for vlan 100
+	$ cat /proc/net/vlan/eth0.100
+	[...]
+	INGRESS priority mappings: 0:0  1:0  2:0  3:0  4:0  5:0  6:0 7:0
+	EGRESS priority mappings: 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+
+12) ::
+
+	// Run your appropriate tools with socket option "SO_PRIORITY"
+	// to 3 for class A and/or to 2 for class B
+	// (I took at https://www.spinics.net/lists/netdev/msg460869.html)
+	./tsn_talker -d 18:03:73:66:87:42 -i eth0.100 -p3 -s 1500&
+	./tsn_talker -d 18:03:73:66:87:42 -i eth0.100 -p2 -s 1500&
+
+13) ::
+
+	// run your listener on workstation (should be in same vlan)
+	// (I took at https://www.spinics.net/lists/netdev/msg460869.html)
+	./tsn_listener -d 18:03:73:66:87:42 -i enp5s0 -s 1500
+	Receiving data rate: 39012 kbps
+	Receiving data rate: 39012 kbps
+	Receiving data rate: 39012 kbps
+	Receiving data rate: 39012 kbps
+	Receiving data rate: 39012 kbps
+	Receiving data rate: 39012 kbps
+	Receiving data rate: 39012 kbps
+	Receiving data rate: 39012 kbps
+	Receiving data rate: 39012 kbps
+	Receiving data rate: 39012 kbps
+	Receiving data rate: 39012 kbps
+	Receiving data rate: 39012 kbps
+	Receiving data rate: 39000 kbps
+
+14) ::
+
+	// Restore default configuration if needed
+	$ ip link del eth0.100
+	$ tc qdisc del dev eth1 root
+	$ tc qdisc del dev eth0 root
+	net eth0: Prev FIFO2 is shaped
+	net eth0: set FIFO3 bw = 0
+	net eth0: set FIFO2 bw = 0
+	$ ethtool -L eth0 rx 1 tx 1
+
+Example 2: Two port tx AVB configuration scheme for target board
+----------------------------------------------------------------
+
+(prints and scheme for AM572x evm, for dual emac boards only)
+
+::
+
+    +------------------------------------------------------------------+ u
+    | +----------+  +----------+  +------+  +----------+  +----------+ | s
+    | |          |  |          |  |      |  |          |  |          | | e
+    | | App 1    |  | App 2    |  | Apps |  | App 3    |  | App 4    | | r
+    | | Class A  |  | Class B  |  | Rest |  | Class B  |  | Class A  | |
+    | | Eth0     |  | Eth0     |  |   |  |  | Eth1     |  | Eth1     | | s
+    | | VLAN100  |  | VLAN100  |  |   |  |  | VLAN100  |  | VLAN100  | | p
+    | | 40 Mb/s  |  | 20 Mb/s  |  |   |  |  | 10 Mb/s  |  | 30 Mb/s  | | a
+    | | SO_PRI=3 |  | SO_PRI=2 |  |   |  |  | SO_PRI=3 |  | SO_PRI=2 | | c
+    | |   |      |  |   |      |  |   |  |  |   |      |  |   |      | | e
+    | +---|------+  +---|------+  +---|--+  +---|------+  +---|------+ |
+    +-----|-------------|-------------|---------|-------------|--------+
+	+-+     +-------+             |         +----------+  +----+
+	|       |             +-------+------+             |       |
+	|       |             |              |             |       |
+    +---|-------|-------------|--------------|-------------|-------|---+
+    | +----+ +----+ +----+ +----+          +----+ +----+ +----+ +----+ |
+    | | p3 | | p2 | | p1 | | p0 |          | p0 | | p1 | | p2 | | p3 | | k
+    | \    / \    / \    / \    /          \    / \    / \    / \    / | e
+    |  \  /   \  /   \  /   \  /            \  /   \  /   \  /   \  /  | r
+    |   \/     \/     \/     \/              \/     \/     \/     \/   | n
+    |   |      |             |                |             |      |   | e
+    |   |      |        +----+                +----+        |      |   | l
+    |   |      |        |                          |        |      |   |
+    | +----+ +----+ +----+                        +----+ +----+ +----+ | s
+    | |tc0 | |tc1 | |tc2 |                        |tc2 | |tc1 | |tc0 | | p
+    | \    / \    / \    /                        \    / \    / \    / | a
+    |  \  /   \  /   \  /                          \  /   \  /   \  /  | c
+    |   \/     \/     \/                            \/     \/     \/   | e
+    |   |      |       +-----+                +-----+      |       |   |
+    |   |      |       |     |                |     |      |       |   |
+    |   |      |       |     |                |     |      |       |   |
+    |   |      |       |     |    E      E    |     |      |       |   |
+    | +----+ +----+ +----+ +----+ t      t +----+ +----+ +----+ +----+ |
+    | |txq0| |txq1| |txq4| |txq5| h      h |txq6| |txq7| |txq3| |txq2| |
+    | \    / \    / \    / \    / 0      1 \    / \    / \    / \    / |
+    |  \  /   \  /   \  /   \  /  .      .  \  /   \  /   \  /   \  /  |
+    |   \/     \/     \/     \/   1      1   \/     \/     \/     \/   |
+    | +-|------|------|------|--+ 0      0 +-|------|------|------|--+ |
+    | | |      |      |      |  | 0      0 | |      |      |      |  | |
+    +---|------|------|------|---------------|------|------|------|----+
+	|      |      |      |               |      |      |      |
+	p      p      p      p               p      p      p      p
+	3      2      0-1, 4-7   <-L2 pri->  0-1, 4-7      2      3
+	|      |      |      |               |      |      |      |
+	|      |      |      |               |      |      |      |
+    +---|------|------|------|---------------|------|------|------|----+
+    |   |      |      |      |               |      |      |      |    |
+    | +----+ +----+ +----+ +----+          +----+ +----+ +----+ +----+ |
+    | |dma7| |dma6| |dma3| |dma2|          |dma1| |dma0| |dma4| |dma5| |
+    | \    / \    / \    / \    /          \    / \    / \    / \    / | c
+    |  \S /   \S /   \  /   \  /            \  /   \  /   \S /   \S /  | p
+    |   \/     \/     \/     \/              \/     \/     \/     \/   | s
+    |   |      |      | +-----                |      |      |      |   | w
+    |   |      |      | |                     +----+ |      |      |   |
+    |   |      |      | |                          | |      |      |   | d
+    | +----+ +----+ +----+p                      p+----+ +----+ +----+ | r
+    | |    | |    | |    |o                      o|    | |    | |    | | i
+    | | f3 | | f2 | | f0 |r        CPSW          r| f3 | | f2 | | f0 | | v
+    | |tc0 | |tc1 | |tc2 |t                      t|tc0 | |tc1 | |tc2 | | e
+    | \CBS / \CBS / \CBS /1                      2\CBS / \CBS / \CBS / | r
+    |  \S /   \S /   \  /                          \S /   \S /   \  /  |
+    |   \/     \/     \/                            \/     \/     \/   |
+    +------------------------------------------------------------------+
+    ========================================Eth==========================>
+
+1) ::
+
+	// Add 8 tx queues, for interface Eth0, but they are common, so are accessed
+	// by two interfaces Eth0 and Eth1.
+	$ ethtool -L eth1 rx 1 tx 8
+	rx unmodified, ignoring
+
+2) ::
+
+	// Check if num of queues is set correctly:
+	$ ethtool -l eth0
+	Channel parameters for eth0:
+	Pre-set maximums:
+	RX:             8
+	TX:             8
+	Other:          0
+	Combined:       0
+	Current hardware settings:
+	RX:             1
+	TX:             8
+	Other:          0
+	Combined:       0
+
+3) ::
+
+	// TX queues must be rated starting from 0, so set bws for tx0 and tx1 for Eth0
+	// and for tx2 and tx3 for Eth1. That is, rates 40 and 20 Mb/s appropriately
+	// for Eth0 and 30 and 10 Mb/s for Eth1.
+	// Real speed can differ a bit due to discreetness
+	// Leave last 4 tx queues as not rated
+	$ echo 40 > /sys/class/net/eth0/queues/tx-0/tx_maxrate
+	$ echo 20 > /sys/class/net/eth0/queues/tx-1/tx_maxrate
+	$ echo 30 > /sys/class/net/eth1/queues/tx-2/tx_maxrate
+	$ echo 10 > /sys/class/net/eth1/queues/tx-3/tx_maxrate
+
+4) ::
+
+	// Check maximum rate of tx (cpdma) queues:
+	$ cat /sys/class/net/eth0/queues/tx-*/tx_maxrate
+	40
+	20
+	30
+	10
+	0
+	0
+	0
+	0
+
+5) ::
+
+	// Map skb->priority to traffic class for Eth0:
+	// 3pri -> tc0, 2pri -> tc1, (0,1,4-7)pri -> tc2
+	// Map traffic class to transmit queue:
+	// tc0 -> txq0, tc1 -> txq1, tc2 -> (txq4, txq5)
+	$ tc qdisc replace dev eth0 handle 100: parent root mqprio num_tc 3 \
+	map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@0 1@1 2@4 hw 1
+
+6) ::
+
+	// Check classes settings
+	$ tc -g class show dev eth0
+	+---(100:ffe2) mqprio
+	|    +---(100:5) mqprio
+	|    +---(100:6) mqprio
+	|
+	+---(100:ffe1) mqprio
+	|    +---(100:2) mqprio
+	|
+	+---(100:ffe0) mqprio
+	    +---(100:1) mqprio
+
+7) ::
+
+	// Set rate for class A - 41 Mbit (tc0, txq0) using CBS Qdisc for Eth0
+	// here only idle slope is important, others ignored
+	// Real speed can differ a bit due to discreetness
+	$ tc qdisc add dev eth0 parent 100:1 cbs locredit -1470 \
+	hicredit 62 sendslope -959000 idleslope 41000 offload 1
+	net eth0: set FIFO3 bw = 50
+
+8) ::
+
+	// Set rate for class B - 21 Mbit (tc1, txq1) using CBS Qdisc for Eth0
+	$ tc qdisc add dev eth0 parent 100:2 cbs locredit -1470 \
+	hicredit 65 sendslope -979000 idleslope 21000 offload 1
+	net eth0: set FIFO2 bw = 30
+
+9) ::
+
+	// Create vlan 100 to map sk->priority to vlan qos for Eth0
+	$ ip link add link eth0 name eth0.100 type vlan id 100
+	net eth0: Adding vlanid 100 to vlan filter
+
+10) ::
+
+	// Map skb->priority to L2 prio for Eth0.100, one to one
+	$ ip link set eth0.100 type vlan \
+	egress 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+
+11) ::
+
+	// Check egress map for vlan 100
+	$ cat /proc/net/vlan/eth0.100
+	[...]
+	INGRESS priority mappings: 0:0  1:0  2:0  3:0  4:0  5:0  6:0 7:0
+	EGRESS priority mappings: 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+
+12) ::
+
+	// Map skb->priority to traffic class for Eth1:
+	// 3pri -> tc0, 2pri -> tc1, (0,1,4-7)pri -> tc2
+	// Map traffic class to transmit queue:
+	// tc0 -> txq2, tc1 -> txq3, tc2 -> (txq6, txq7)
+	$ tc qdisc replace dev eth1 handle 100: parent root mqprio num_tc 3 \
+	map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@2 1@3 2@6 hw 1
+
+13) ::
+
+	// Check classes settings
+	$ tc -g class show dev eth1
+	+---(100:ffe2) mqprio
+	|    +---(100:7) mqprio
+	|    +---(100:8) mqprio
+	|
+	+---(100:ffe1) mqprio
+	|    +---(100:4) mqprio
+	|
+	+---(100:ffe0) mqprio
+	    +---(100:3) mqprio
+
+14) ::
+
+	// Set rate for class A - 31 Mbit (tc0, txq2) using CBS Qdisc for Eth1
+	// here only idle slope is important, others ignored, but calculated
+	// for interface speed - 100Mb for eth1 port.
+	// Set it +1 Mb for reserve (important!)
+	$ tc qdisc add dev eth1 parent 100:3 cbs locredit -1035 \
+	hicredit 465 sendslope -69000 idleslope 31000 offload 1
+	net eth1: set FIFO3 bw = 31
+
+15) ::
+
+	// Set rate for class B - 11 Mbit (tc1, txq3) using CBS Qdisc for Eth1
+	// Set it +1 Mb for reserve (important!)
+	$ tc qdisc add dev eth1 parent 100:4 cbs locredit -1335 \
+	hicredit 405 sendslope -89000 idleslope 11000 offload 1
+	net eth1: set FIFO2 bw = 11
+
+16) ::
+
+	// Create vlan 100 to map sk->priority to vlan qos for Eth1
+	$ ip link add link eth1 name eth1.100 type vlan id 100
+	net eth1: Adding vlanid 100 to vlan filter
+
+17) ::
+
+	// Map skb->priority to L2 prio for Eth1.100, one to one
+	$ ip link set eth1.100 type vlan \
+	egress 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+
+18) ::
+
+	// Check egress map for vlan 100
+	$ cat /proc/net/vlan/eth1.100
+	[...]
+	INGRESS priority mappings: 0:0  1:0  2:0  3:0  4:0  5:0  6:0 7:0
+	EGRESS priority mappings: 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
+
+19) ::
+
+	// Run appropriate tools with socket option "SO_PRIORITY" to 3
+	// for class A and to 2 for class B. For both interfaces
+	./tsn_talker -d 18:03:73:66:87:42 -i eth0.100 -p2 -s 1500&
+	./tsn_talker -d 18:03:73:66:87:42 -i eth0.100 -p3 -s 1500&
+	./tsn_talker -d 20:cf:30:85:7d:fd -i eth1.100 -p2 -s 1500&
+	./tsn_talker -d 20:cf:30:85:7d:fd -i eth1.100 -p3 -s 1500&
+
+20) ::
+
+	// run your listener on workstation (should be in same vlan)
+	// (I took at https://www.spinics.net/lists/netdev/msg460869.html)
+	./tsn_listener -d 18:03:73:66:87:42 -i enp5s0 -s 1500
+	Receiving data rate: 39012 kbps
+	Receiving data rate: 39012 kbps
+	Receiving data rate: 39012 kbps
+	Receiving data rate: 39012 kbps
+	Receiving data rate: 39012 kbps
+	Receiving data rate: 39012 kbps
+	Receiving data rate: 39012 kbps
+	Receiving data rate: 39012 kbps
+	Receiving data rate: 39012 kbps
+	Receiving data rate: 39012 kbps
+	Receiving data rate: 39012 kbps
+	Receiving data rate: 39012 kbps
+	Receiving data rate: 39000 kbps
+
+21) ::
+
+	// Restore default configuration if needed
+	$ ip link del eth1.100
+	$ ip link del eth0.100
+	$ tc qdisc del dev eth1 root
+	net eth1: Prev FIFO2 is shaped
+	net eth1: set FIFO3 bw = 0
+	net eth1: set FIFO2 bw = 0
+	$ tc qdisc del dev eth0 root
+	net eth0: Prev FIFO2 is shaped
+	net eth0: set FIFO3 bw = 0
+	net eth0: set FIFO2 bw = 0
+	$ ethtool -L eth0 rx 1 tx 1
diff --git a/Documentation/networking/device_drivers/ti/cpsw.txt b/Documentation/networking/device_drivers/ti/cpsw.txt
deleted file mode 100644
index d4d4c0751a09..000000000000
--- a/Documentation/networking/device_drivers/ti/cpsw.txt
+++ /dev/null
@@ -1,541 +0,0 @@
-* Texas Instruments CPSW ethernet driver
-
-Multiqueue & CBS & MQPRIO
-=====================================================================
-=====================================================================
-
-The cpsw has 3 CBS shapers for each external ports. This document
-describes MQPRIO and CBS Qdisc offload configuration for cpsw driver
-based on examples. It potentially can be used in audio video bridging
-(AVB) and time sensitive networking (TSN).
-
-The following examples were tested on AM572x EVM and BBB boards.
-
-Test setup
-==========
-
-Under consideration two examples with AM572x EVM running cpsw driver
-in dual_emac mode.
-
-Several prerequisites:
-- TX queues must be rated starting from txq0 that has highest priority
-- Traffic classes are used starting from 0, that has highest priority
-- CBS shapers should be used with rated queues
-- The bandwidth for CBS shapers has to be set a little bit more then
-  potential incoming rate, thus, rate of all incoming tx queues has
-  to be a little less
-- Real rates can differ, due to discreetness
-- Map skb-priority to txq is not enough, also skb-priority to l2 prio
-  map has to be created with ip or vconfig tool
-- Any l2/socket prio (0 - 7) for classes can be used, but for
-  simplicity default values are used: 3 and 2
-- only 2 classes tested: A and B, but checked and can work with more,
-  maximum allowed 4, but only for 3 rate can be set.
-
-Test setup for examples
-=======================
-                                    +-------------------------------+
-                                    |--+                            |
-                                    |  |      Workstation0          |
-                                    |E |  MAC 18:03:73:66:87:42     |
-+-----------------------------+  +--|t |                            |
-|                    | 1  | E |  |  |h |./tsn_listener -d \         |
-|  Target board:     | 0  | t |--+  |0 | 18:03:73:66:87:42 -i eth0 \|
-|  AM572x EVM        | 0  | h |     |  | -s 1500                    |
-|                    | 0  | 0 |     |--+                            |
-|  Only 2 classes:   |Mb  +---|     +-------------------------------+
-|  class A, class B  |        |
-|                    |    +---|     +-------------------------------+
-|                    | 1  | E |     |--+                            |
-|                    | 0  | t |     |  |      Workstation1          |
-|                    | 0  | h |--+  |E |  MAC 20:cf:30:85:7d:fd     |
-|                    |Mb  | 1 |  +--|t |                            |
-+-----------------------------+     |h |./tsn_listener -d \         |
-                                    |0 | 20:cf:30:85:7d:fd -i eth0 \|
-                                    |  | -s 1500                    |
-                                    |--+                            |
-                                    +-------------------------------+
-
-*********************************************************************
-*********************************************************************
-*********************************************************************
-Example 1: One port tx AVB configuration scheme for target board
-----------------------------------------------------------------------
-(prints and scheme for AM572x evm, applicable for single port boards)
-
-tc - traffic class
-txq - transmit queue
-p - priority
-f - fifo (cpsw fifo)
-S - shaper configured
-
-+------------------------------------------------------------------+ u
-| +---------------+  +---------------+  +------+ +------+          | s
-| |               |  |               |  |      | |      |          | e
-| | App 1         |  | App 2         |  | Apps | | Apps |          | r
-| | Class A       |  | Class B       |  | Rest | | Rest |          |
-| | Eth0          |  | Eth0          |  | Eth0 | | Eth1 |          | s
-| | VLAN100       |  | VLAN100       |  |   |  | |   |  |          | p
-| | 40 Mb/s       |  | 20 Mb/s       |  |   |  | |   |  |          | a
-| | SO_PRIORITY=3 |  | SO_PRIORITY=2 |  |   |  | |   |  |          | c
-| |   |           |  |   |           |  |   |  | |   |  |          | e
-| +---|-----------+  +---|-----------+  +---|--+ +---|--+          |
-+-----|------------------|------------------|--------|-------------+
-    +-+     +------------+                  |        |
-    |       |             +-----------------+     +--+
-    |       |             |                       |
-+---|-------|-------------|-----------------------|----------------+
-| +----+ +----+ +----+ +----+                   +----+             |
-| | p3 | | p2 | | p1 | | p0 |                   | p0 |             | k
-| \    / \    / \    / \    /                   \    /             | e
-|  \  /   \  /   \  /   \  /                     \  /              | r
-|   \/     \/     \/     \/                       \/               | n
-|    |     |             |                        |                | e
-|    |     |       +-----+                        |                | l
-|    |     |       |                              |                |
-| +----+ +----+ +----+                          +----+             | s
-| |tc0 | |tc1 | |tc2 |                          |tc0 |             | p
-| \    / \    / \    /                          \    /             | a
-|  \  /   \  /   \  /                            \  /              | c
-|   \/     \/     \/                              \/               | e
-|   |      |       +-----+                        |                |
-|   |      |       |     |                        |                |
-|   |      |       |     |                        |                |
-|   |      |       |     |                        |                |
-| +----+ +----+ +----+ +----+                   +----+             |
-| |txq0| |txq1| |txq2| |txq3|                   |txq4|             |
-| \    / \    / \    / \    /                   \    /             |
-|  \  /   \  /   \  /   \  /                     \  /              |
-|   \/     \/     \/     \/                       \/               |
-| +-|------|------|------|--+                  +--|--------------+ |
-| | |      |      |      |  | Eth0.100         |  |     Eth1     | |
-+---|------|------|------|------------------------|----------------+
-    |      |      |      |                        |
-    p      p      p      p                        |
-    3      2      0-1, 4-7  <- L2 priority        |
-    |      |      |      |                        |
-    |      |      |      |                        |
-+---|------|------|------|------------------------|----------------+
-|   |      |      |      |             |----------+                |
-| +----+ +----+ +----+ +----+       +----+                         |
-| |dma7| |dma6| |dma5| |dma4|       |dma3|                         |
-| \    / \    / \    / \    /       \    /                         | c
-|  \S /   \S /   \  /   \  /         \  /                          | p
-|   \/     \/     \/     \/           \/                           | s
-|   |      |      | +-----            |                            | w
-|   |      |      | |                 |                            |
-|   |      |      | |                 |                            | d
-| +----+ +----+ +----+p            p+----+                         | r
-| |    | |    | |    |o            o|    |                         | i
-| | f3 | | f2 | | f0 |r            r| f0 |                         | v
-| |tc0 | |tc1 | |tc2 |t            t|tc0 |                         | e
-| \CBS / \CBS / \CBS /1            2\CBS /                         | r
-|  \S /   \S /   \  /                \  /                          |
-|   \/     \/     \/                  \/                           |
-+------------------------------------------------------------------+
-========================================Eth==========================>
-
-1)
-// Add 4 tx queues, for interface Eth0, and 1 tx queue for Eth1
-$ ethtool -L eth0 rx 1 tx 5
-rx unmodified, ignoring
-
-2)
-// Check if num of queues is set correctly:
-$ ethtool -l eth0
-Channel parameters for eth0:
-Pre-set maximums:
-RX:             8
-TX:             8
-Other:          0
-Combined:       0
-Current hardware settings:
-RX:             1
-TX:             5
-Other:          0
-Combined:       0
-
-3)
-// TX queues must be rated starting from 0, so set bws for tx0 and tx1
-// Set rates 40 and 20 Mb/s appropriately.
-// Pay attention, real speed can differ a bit due to discreetness.
-// Leave last 2 tx queues not rated.
-$ echo 40 > /sys/class/net/eth0/queues/tx-0/tx_maxrate
-$ echo 20 > /sys/class/net/eth0/queues/tx-1/tx_maxrate
-
-4)
-// Check maximum rate of tx (cpdma) queues:
-$ cat /sys/class/net/eth0/queues/tx-*/tx_maxrate
-40
-20
-0
-0
-0
-
-5)
-// Map skb->priority to traffic class:
-// 3pri -> tc0, 2pri -> tc1, (0,1,4-7)pri -> tc2
-// Map traffic class to transmit queue:
-// tc0 -> txq0, tc1 -> txq1, tc2 -> (txq2, txq3)
-$ tc qdisc replace dev eth0 handle 100: parent root mqprio num_tc 3 \
-map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@0 1@1 2@2 hw 1
-
-5a)
-// As two interface sharing same set of tx queues, assign all traffic
-// coming to interface Eth1 to separate queue in order to not mix it
-// with traffic from interface Eth0, so use separate txq to send
-// packets to Eth1, so all prio -> tc0 and tc0 -> txq4
-// Here hw 0, so here still default configuration for eth1 in hw
-$ tc qdisc replace dev eth1 handle 100: parent root mqprio num_tc 1 \
-map 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 queues 1@4 hw 0
-
-6)
-// Check classes settings
-$ tc -g class show dev eth0
-+---(100:ffe2) mqprio
-|    +---(100:3) mqprio
-|    +---(100:4) mqprio
-|
-+---(100:ffe1) mqprio
-|    +---(100:2) mqprio
-|
-+---(100:ffe0) mqprio
-     +---(100:1) mqprio
-
-$ tc -g class show dev eth1
-+---(100:ffe0) mqprio
-     +---(100:5) mqprio
-
-7)
-// Set rate for class A - 41 Mbit (tc0, txq0) using CBS Qdisc
-// Set it +1 Mb for reserve (important!)
-// here only idle slope is important, others arg are ignored
-// Pay attention, real speed can differ a bit due to discreetness
-$ tc qdisc add dev eth0 parent 100:1 cbs locredit -1438 \
-hicredit 62 sendslope -959000 idleslope 41000 offload 1
-net eth0: set FIFO3 bw = 50
-
-8)
-// Set rate for class B - 21 Mbit (tc1, txq1) using CBS Qdisc:
-// Set it +1 Mb for reserve (important!)
-$ tc qdisc add dev eth0 parent 100:2 cbs locredit -1468 \
-hicredit 65 sendslope -979000 idleslope 21000 offload 1
-net eth0: set FIFO2 bw = 30
-
-9)
-// Create vlan 100 to map sk->priority to vlan qos
-$ ip link add link eth0 name eth0.100 type vlan id 100
-8021q: 802.1Q VLAN Support v1.8
-8021q: adding VLAN 0 to HW filter on device eth0
-8021q: adding VLAN 0 to HW filter on device eth1
-net eth0: Adding vlanid 100 to vlan filter
-
-10)
-// Map skb->priority to L2 prio, 1 to 1
-$ ip link set eth0.100 type vlan \
-egress 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
-
-11)
-// Check egress map for vlan 100
-$ cat /proc/net/vlan/eth0.100
-[...]
-INGRESS priority mappings: 0:0  1:0  2:0  3:0  4:0  5:0  6:0 7:0
-EGRESS priority mappings: 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
-
-12)
-// Run your appropriate tools with socket option "SO_PRIORITY"
-// to 3 for class A and/or to 2 for class B
-// (I took at https://www.spinics.net/lists/netdev/msg460869.html)
-./tsn_talker -d 18:03:73:66:87:42 -i eth0.100 -p3 -s 1500&
-./tsn_talker -d 18:03:73:66:87:42 -i eth0.100 -p2 -s 1500&
-
-13)
-// run your listener on workstation (should be in same vlan)
-// (I took at https://www.spinics.net/lists/netdev/msg460869.html)
-./tsn_listener -d 18:03:73:66:87:42 -i enp5s0 -s 1500
-Receiving data rate: 39012 kbps
-Receiving data rate: 39012 kbps
-Receiving data rate: 39012 kbps
-Receiving data rate: 39012 kbps
-Receiving data rate: 39012 kbps
-Receiving data rate: 39012 kbps
-Receiving data rate: 39012 kbps
-Receiving data rate: 39012 kbps
-Receiving data rate: 39012 kbps
-Receiving data rate: 39012 kbps
-Receiving data rate: 39012 kbps
-Receiving data rate: 39012 kbps
-Receiving data rate: 39000 kbps
-
-14)
-// Restore default configuration if needed
-$ ip link del eth0.100
-$ tc qdisc del dev eth1 root
-$ tc qdisc del dev eth0 root
-net eth0: Prev FIFO2 is shaped
-net eth0: set FIFO3 bw = 0
-net eth0: set FIFO2 bw = 0
-$ ethtool -L eth0 rx 1 tx 1
-
-*********************************************************************
-*********************************************************************
-*********************************************************************
-Example 2: Two port tx AVB configuration scheme for target board
-----------------------------------------------------------------------
-(prints and scheme for AM572x evm, for dual emac boards only)
-
-+------------------------------------------------------------------+ u
-| +----------+  +----------+  +------+  +----------+  +----------+ | s
-| |          |  |          |  |      |  |          |  |          | | e
-| | App 1    |  | App 2    |  | Apps |  | App 3    |  | App 4    | | r
-| | Class A  |  | Class B  |  | Rest |  | Class B  |  | Class A  | |
-| | Eth0     |  | Eth0     |  |   |  |  | Eth1     |  | Eth1     | | s
-| | VLAN100  |  | VLAN100  |  |   |  |  | VLAN100  |  | VLAN100  | | p
-| | 40 Mb/s  |  | 20 Mb/s  |  |   |  |  | 10 Mb/s  |  | 30 Mb/s  | | a
-| | SO_PRI=3 |  | SO_PRI=2 |  |   |  |  | SO_PRI=3 |  | SO_PRI=2 | | c
-| |   |      |  |   |      |  |   |  |  |   |      |  |   |      | | e
-| +---|------+  +---|------+  +---|--+  +---|------+  +---|------+ |
-+-----|-------------|-------------|---------|-------------|--------+
-    +-+     +-------+             |         +----------+  +----+
-    |       |             +-------+------+             |       |
-    |       |             |              |             |       |
-+---|-------|-------------|--------------|-------------|-------|---+
-| +----+ +----+ +----+ +----+          +----+ +----+ +----+ +----+ |
-| | p3 | | p2 | | p1 | | p0 |          | p0 | | p1 | | p2 | | p3 | | k
-| \    / \    / \    / \    /          \    / \    / \    / \    / | e
-|  \  /   \  /   \  /   \  /            \  /   \  /   \  /   \  /  | r
-|   \/     \/     \/     \/              \/     \/     \/     \/   | n
-|   |      |             |                |             |      |   | e
-|   |      |        +----+                +----+        |      |   | l
-|   |      |        |                          |        |      |   |
-| +----+ +----+ +----+                        +----+ +----+ +----+ | s
-| |tc0 | |tc1 | |tc2 |                        |tc2 | |tc1 | |tc0 | | p
-| \    / \    / \    /                        \    / \    / \    / | a
-|  \  /   \  /   \  /                          \  /   \  /   \  /  | c
-|   \/     \/     \/                            \/     \/     \/   | e
-|   |      |       +-----+                +-----+      |       |   |
-|   |      |       |     |                |     |      |       |   |
-|   |      |       |     |                |     |      |       |   |
-|   |      |       |     |    E      E    |     |      |       |   |
-| +----+ +----+ +----+ +----+ t      t +----+ +----+ +----+ +----+ |
-| |txq0| |txq1| |txq4| |txq5| h      h |txq6| |txq7| |txq3| |txq2| |
-| \    / \    / \    / \    / 0      1 \    / \    / \    / \    / |
-|  \  /   \  /   \  /   \  /  .      .  \  /   \  /   \  /   \  /  |
-|   \/     \/     \/     \/   1      1   \/     \/     \/     \/   |
-| +-|------|------|------|--+ 0      0 +-|------|------|------|--+ |
-| | |      |      |      |  | 0      0 | |      |      |      |  | |
-+---|------|------|------|---------------|------|------|------|----+
-    |      |      |      |               |      |      |      |
-    p      p      p      p               p      p      p      p
-    3      2      0-1, 4-7   <-L2 pri->  0-1, 4-7      2      3
-    |      |      |      |               |      |      |      |
-    |      |      |      |               |      |      |      |
-+---|------|------|------|---------------|------|------|------|----+
-|   |      |      |      |               |      |      |      |    |
-| +----+ +----+ +----+ +----+          +----+ +----+ +----+ +----+ |
-| |dma7| |dma6| |dma3| |dma2|          |dma1| |dma0| |dma4| |dma5| |
-| \    / \    / \    / \    /          \    / \    / \    / \    / | c
-|  \S /   \S /   \  /   \  /            \  /   \  /   \S /   \S /  | p
-|   \/     \/     \/     \/              \/     \/     \/     \/   | s
-|   |      |      | +-----                |      |      |      |   | w
-|   |      |      | |                     +----+ |      |      |   |
-|   |      |      | |                          | |      |      |   | d
-| +----+ +----+ +----+p                      p+----+ +----+ +----+ | r
-| |    | |    | |    |o                      o|    | |    | |    | | i
-| | f3 | | f2 | | f0 |r        CPSW          r| f3 | | f2 | | f0 | | v
-| |tc0 | |tc1 | |tc2 |t                      t|tc0 | |tc1 | |tc2 | | e
-| \CBS / \CBS / \CBS /1                      2\CBS / \CBS / \CBS / | r
-|  \S /   \S /   \  /                          \S /   \S /   \  /  |
-|   \/     \/     \/                            \/     \/     \/   |
-+------------------------------------------------------------------+
-========================================Eth==========================>
-
-1)
-// Add 8 tx queues, for interface Eth0, but they are common, so are accessed
-// by two interfaces Eth0 and Eth1.
-$ ethtool -L eth1 rx 1 tx 8
-rx unmodified, ignoring
-
-2)
-// Check if num of queues is set correctly:
-$ ethtool -l eth0
-Channel parameters for eth0:
-Pre-set maximums:
-RX:             8
-TX:             8
-Other:          0
-Combined:       0
-Current hardware settings:
-RX:             1
-TX:             8
-Other:          0
-Combined:       0
-
-3)
-// TX queues must be rated starting from 0, so set bws for tx0 and tx1 for Eth0
-// and for tx2 and tx3 for Eth1. That is, rates 40 and 20 Mb/s appropriately
-// for Eth0 and 30 and 10 Mb/s for Eth1.
-// Real speed can differ a bit due to discreetness
-// Leave last 4 tx queues as not rated
-$ echo 40 > /sys/class/net/eth0/queues/tx-0/tx_maxrate
-$ echo 20 > /sys/class/net/eth0/queues/tx-1/tx_maxrate
-$ echo 30 > /sys/class/net/eth1/queues/tx-2/tx_maxrate
-$ echo 10 > /sys/class/net/eth1/queues/tx-3/tx_maxrate
-
-4)
-// Check maximum rate of tx (cpdma) queues:
-$ cat /sys/class/net/eth0/queues/tx-*/tx_maxrate
-40
-20
-30
-10
-0
-0
-0
-0
-
-5)
-// Map skb->priority to traffic class for Eth0:
-// 3pri -> tc0, 2pri -> tc1, (0,1,4-7)pri -> tc2
-// Map traffic class to transmit queue:
-// tc0 -> txq0, tc1 -> txq1, tc2 -> (txq4, txq5)
-$ tc qdisc replace dev eth0 handle 100: parent root mqprio num_tc 3 \
-map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@0 1@1 2@4 hw 1
-
-6)
-// Check classes settings
-$ tc -g class show dev eth0
-+---(100:ffe2) mqprio
-|    +---(100:5) mqprio
-|    +---(100:6) mqprio
-|
-+---(100:ffe1) mqprio
-|    +---(100:2) mqprio
-|
-+---(100:ffe0) mqprio
-     +---(100:1) mqprio
-
-7)
-// Set rate for class A - 41 Mbit (tc0, txq0) using CBS Qdisc for Eth0
-// here only idle slope is important, others ignored
-// Real speed can differ a bit due to discreetness
-$ tc qdisc add dev eth0 parent 100:1 cbs locredit -1470 \
-hicredit 62 sendslope -959000 idleslope 41000 offload 1
-net eth0: set FIFO3 bw = 50
-
-8)
-// Set rate for class B - 21 Mbit (tc1, txq1) using CBS Qdisc for Eth0
-$ tc qdisc add dev eth0 parent 100:2 cbs locredit -1470 \
-hicredit 65 sendslope -979000 idleslope 21000 offload 1
-net eth0: set FIFO2 bw = 30
-
-9)
-// Create vlan 100 to map sk->priority to vlan qos for Eth0
-$ ip link add link eth0 name eth0.100 type vlan id 100
-net eth0: Adding vlanid 100 to vlan filter
-
-10)
-// Map skb->priority to L2 prio for Eth0.100, one to one
-$ ip link set eth0.100 type vlan \
-egress 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
-
-11)
-// Check egress map for vlan 100
-$ cat /proc/net/vlan/eth0.100
-[...]
-INGRESS priority mappings: 0:0  1:0  2:0  3:0  4:0  5:0  6:0 7:0
-EGRESS priority mappings: 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
-
-12)
-// Map skb->priority to traffic class for Eth1:
-// 3pri -> tc0, 2pri -> tc1, (0,1,4-7)pri -> tc2
-// Map traffic class to transmit queue:
-// tc0 -> txq2, tc1 -> txq3, tc2 -> (txq6, txq7)
-$ tc qdisc replace dev eth1 handle 100: parent root mqprio num_tc 3 \
-map 2 2 1 0 2 2 2 2 2 2 2 2 2 2 2 2 queues 1@2 1@3 2@6 hw 1
-
-13)
-// Check classes settings
-$ tc -g class show dev eth1
-+---(100:ffe2) mqprio
-|    +---(100:7) mqprio
-|    +---(100:8) mqprio
-|
-+---(100:ffe1) mqprio
-|    +---(100:4) mqprio
-|
-+---(100:ffe0) mqprio
-     +---(100:3) mqprio
-
-14)
-// Set rate for class A - 31 Mbit (tc0, txq2) using CBS Qdisc for Eth1
-// here only idle slope is important, others ignored, but calculated
-// for interface speed - 100Mb for eth1 port.
-// Set it +1 Mb for reserve (important!)
-$ tc qdisc add dev eth1 parent 100:3 cbs locredit -1035 \
-hicredit 465 sendslope -69000 idleslope 31000 offload 1
-net eth1: set FIFO3 bw = 31
-
-15)
-// Set rate for class B - 11 Mbit (tc1, txq3) using CBS Qdisc for Eth1
-// Set it +1 Mb for reserve (important!)
-$ tc qdisc add dev eth1 parent 100:4 cbs locredit -1335 \
-hicredit 405 sendslope -89000 idleslope 11000 offload 1
-net eth1: set FIFO2 bw = 11
-
-16)
-// Create vlan 100 to map sk->priority to vlan qos for Eth1
-$ ip link add link eth1 name eth1.100 type vlan id 100
-net eth1: Adding vlanid 100 to vlan filter
-
-17)
-// Map skb->priority to L2 prio for Eth1.100, one to one
-$ ip link set eth1.100 type vlan \
-egress 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
-
-18)
-// Check egress map for vlan 100
-$ cat /proc/net/vlan/eth1.100
-[...]
-INGRESS priority mappings: 0:0  1:0  2:0  3:0  4:0  5:0  6:0 7:0
-EGRESS priority mappings: 0:0 1:1 2:2 3:3 4:4 5:5 6:6 7:7
-
-19)
-// Run appropriate tools with socket option "SO_PRIORITY" to 3
-// for class A and to 2 for class B. For both interfaces
-./tsn_talker -d 18:03:73:66:87:42 -i eth0.100 -p2 -s 1500&
-./tsn_talker -d 18:03:73:66:87:42 -i eth0.100 -p3 -s 1500&
-./tsn_talker -d 20:cf:30:85:7d:fd -i eth1.100 -p2 -s 1500&
-./tsn_talker -d 20:cf:30:85:7d:fd -i eth1.100 -p3 -s 1500&
-
-20)
-// run your listener on workstation (should be in same vlan)
-// (I took at https://www.spinics.net/lists/netdev/msg460869.html)
-./tsn_listener -d 18:03:73:66:87:42 -i enp5s0 -s 1500
-Receiving data rate: 39012 kbps
-Receiving data rate: 39012 kbps
-Receiving data rate: 39012 kbps
-Receiving data rate: 39012 kbps
-Receiving data rate: 39012 kbps
-Receiving data rate: 39012 kbps
-Receiving data rate: 39012 kbps
-Receiving data rate: 39012 kbps
-Receiving data rate: 39012 kbps
-Receiving data rate: 39012 kbps
-Receiving data rate: 39012 kbps
-Receiving data rate: 39012 kbps
-Receiving data rate: 39000 kbps
-
-21)
-// Restore default configuration if needed
-$ ip link del eth1.100
-$ ip link del eth0.100
-$ tc qdisc del dev eth1 root
-net eth1: Prev FIFO2 is shaped
-net eth1: set FIFO3 bw = 0
-net eth1: set FIFO2 bw = 0
-$ tc qdisc del dev eth0 root
-net eth0: Prev FIFO2 is shaped
-net eth0: set FIFO3 bw = 0
-net eth0: set FIFO2 bw = 0
-$ ethtool -L eth0 rx 1 tx 1
-- 
cgit v1.2.3-59-g8ed1b


From e9a5475e735c9603b870c6ee5189de7cd32bb080 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 1 May 2020 16:44:56 +0200
Subject: docs: networking: device drivers: convert ti/tlan.txt to ReST

- add SPDX header;
- adjust titles and chapters, adding proper markups;
- mark tables as such;
- mark code blocks and literals as such;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/device_drivers/index.rst  |   1 +
 .../networking/device_drivers/ti/tlan.rst          | 140 +++++++++++++++++++++
 .../networking/device_drivers/ti/tlan.txt          | 117 -----------------
 MAINTAINERS                                        |   2 +-
 drivers/net/ethernet/ti/Kconfig                    |   2 +-
 drivers/net/ethernet/ti/tlan.c                     |   2 +-
 6 files changed, 144 insertions(+), 120 deletions(-)
 create mode 100644 Documentation/networking/device_drivers/ti/tlan.rst
 delete mode 100644 Documentation/networking/device_drivers/ti/tlan.txt

diff --git a/Documentation/networking/device_drivers/index.rst b/Documentation/networking/device_drivers/index.rst
index 1d3b664e6921..adc0bf65fb02 100644
--- a/Documentation/networking/device_drivers/index.rst
+++ b/Documentation/networking/device_drivers/index.rst
@@ -49,6 +49,7 @@ Contents:
    smsc/smc9
    ti/cpsw_switchdev
    ti/cpsw
+   ti/tlan
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/device_drivers/ti/tlan.rst b/Documentation/networking/device_drivers/ti/tlan.rst
new file mode 100644
index 000000000000..4fdc0907f4fc
--- /dev/null
+++ b/Documentation/networking/device_drivers/ti/tlan.rst
@@ -0,0 +1,140 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+=====================
+TLAN driver for Linux
+=====================
+
+:Version: 1.14a
+
+(C) 1997-1998 Caldera, Inc.
+
+(C) 1998 James Banks
+
+(C) 1999-2001 Torben Mathiasen <tmm@image.dk, torben.mathiasen@compaq.com>
+
+For driver information/updates visit http://www.compaq.com
+
+
+
+
+
+I. Supported Devices
+====================
+
+    Only PCI devices will work with this driver.
+
+    Supported:
+
+    =========	=========	===========================================
+    Vendor ID	Device ID	Name
+    =========	=========	===========================================
+    0e11	ae32		Compaq Netelligent 10/100 TX PCI UTP
+    0e11	ae34		Compaq Netelligent 10 T PCI UTP
+    0e11	ae35		Compaq Integrated NetFlex 3/P
+    0e11	ae40		Compaq Netelligent Dual 10/100 TX PCI UTP
+    0e11	ae43		Compaq Netelligent Integrated 10/100 TX UTP
+    0e11	b011		Compaq Netelligent 10/100 TX Embedded UTP
+    0e11	b012		Compaq Netelligent 10 T/2 PCI UTP/Coax
+    0e11	b030		Compaq Netelligent 10/100 TX UTP
+    0e11	f130		Compaq NetFlex 3/P
+    0e11	f150		Compaq NetFlex 3/P
+    108d	0012		Olicom OC-2325
+    108d	0013		Olicom OC-2183
+    108d	0014		Olicom OC-2326
+    =========	=========	===========================================
+
+
+    Caveats:
+
+    I am not sure if 100BaseTX daughterboards (for those cards which
+    support such things) will work.  I haven't had any solid evidence
+    either way.
+
+    However, if a card supports 100BaseTx without requiring an add
+    on daughterboard, it should work with 100BaseTx.
+
+    The "Netelligent 10 T/2 PCI UTP/Coax" (b012) device is untested,
+    but I do not expect any problems.
+
+
+II. Driver Options
+==================
+
+	1. You can append debug=x to the end of the insmod line to get
+	   debug messages, where x is a bit field where the bits mean
+	   the following:
+
+	   ====		=====================================
+	   0x01		Turn on general debugging messages.
+	   0x02		Turn on receive debugging messages.
+	   0x04		Turn on transmit debugging messages.
+	   0x08		Turn on list debugging messages.
+	   ====		=====================================
+
+	2. You can append aui=1 to the end of the insmod line to cause
+	   the adapter to use the AUI interface instead of the 10 Base T
+	   interface.  This is also what to do if you want to use the BNC
+	   connector on a TLAN based device.  (Setting this option on a
+	   device that does not have an AUI/BNC connector will probably
+	   cause it to not function correctly.)
+
+	3. You can set duplex=1 to force half duplex, and duplex=2 to
+	   force full duplex.
+
+	4. You can set speed=10 to force 10Mbs operation, and speed=100
+	   to force 100Mbs operation. (I'm not sure what will happen
+	   if a card which only supports 10Mbs is forced into 100Mbs
+	   mode.)
+
+	5. You have to use speed=X duplex=Y together now. If you just
+	   do "insmod tlan.o speed=100" the driver will do Auto-Neg.
+	   To force a 10Mbps Half-Duplex link do "insmod tlan.o speed=10
+	   duplex=1".
+
+	6. If the driver is built into the kernel, you can use the 3rd
+	   and 4th parameters to set aui and debug respectively.  For
+	   example::
+
+		ether=0,0,0x1,0x7,eth0
+
+	   This sets aui to 0x1 and debug to 0x7, assuming eth0 is a
+	   supported TLAN device.
+
+	   The bits in the third byte are assigned as follows:
+
+		====   ===============
+		0x01   aui
+		0x02   use half duplex
+		0x04   use full duplex
+		0x08   use 10BaseT
+		0x10   use 100BaseTx
+		====   ===============
+
+	   You also need to set both speed and duplex settings when forcing
+	   speeds with kernel-parameters.
+	   ether=0,0,0x12,0,eth0 will force link to 100Mbps Half-Duplex.
+
+	7. If you have more than one tlan adapter in your system, you can
+	   use the above options on a per adapter basis. To force a 100Mbit/HD
+	   link with your eth1 adapter use::
+
+		insmod tlan speed=0,100 duplex=0,1
+
+	   Now eth0 will use auto-neg and eth1 will be forced to 100Mbit/HD.
+	   Note that the tlan driver supports a maximum of 8 adapters.
+
+
+III. Things to try if you have problems
+=======================================
+
+	1. Make sure your card's PCI id is among those listed in
+	   section I, above.
+	2. Make sure routing is correct.
+	3. Try forcing different speed/duplex settings
+
+
+There is also a tlan mailing list which you can join by sending "subscribe tlan"
+in the body of an email to majordomo@vuser.vu.union.edu.
+
+There is also a tlan website at http://www.compaq.com
+
diff --git a/Documentation/networking/device_drivers/ti/tlan.txt b/Documentation/networking/device_drivers/ti/tlan.txt
deleted file mode 100644
index 34550dfcef74..000000000000
--- a/Documentation/networking/device_drivers/ti/tlan.txt
+++ /dev/null
@@ -1,117 +0,0 @@
-(C) 1997-1998 Caldera, Inc.
-(C) 1998 James Banks
-(C) 1999-2001 Torben Mathiasen <tmm@image.dk, torben.mathiasen@compaq.com>
-
-For driver information/updates visit http://www.compaq.com
-
-
-TLAN driver for Linux, version 1.14a
-README
-
-
-I.  Supported Devices.
-
-    Only PCI devices will work with this driver.
-
-    Supported:
-    Vendor ID	Device ID	Name
-    0e11	ae32		Compaq Netelligent 10/100 TX PCI UTP
-    0e11	ae34		Compaq Netelligent 10 T PCI UTP
-    0e11	ae35		Compaq Integrated NetFlex 3/P
-    0e11	ae40		Compaq Netelligent Dual 10/100 TX PCI UTP
-    0e11	ae43		Compaq Netelligent Integrated 10/100 TX UTP
-    0e11	b011		Compaq Netelligent 10/100 TX Embedded UTP
-    0e11	b012		Compaq Netelligent 10 T/2 PCI UTP/Coax
-    0e11	b030		Compaq Netelligent 10/100 TX UTP
-    0e11	f130		Compaq NetFlex 3/P
-    0e11	f150		Compaq NetFlex 3/P
-    108d	0012		Olicom OC-2325	
-    108d	0013		Olicom OC-2183
-    108d	0014		Olicom OC-2326	
-
-
-    Caveats:
-    
-    I am not sure if 100BaseTX daughterboards (for those cards which
-    support such things) will work.  I haven't had any solid evidence
-    either way.
-
-    However, if a card supports 100BaseTx without requiring an add
-    on daughterboard, it should work with 100BaseTx.
-
-    The "Netelligent 10 T/2 PCI UTP/Coax" (b012) device is untested,
-    but I do not expect any problems.
-    
-
-II.   Driver Options
-	1. You can append debug=x to the end of the insmod line to get
-           debug messages, where x is a bit field where the bits mean
-	   the following:
-	   
-	   0x01		Turn on general debugging messages.
-	   0x02		Turn on receive debugging messages.
-	   0x04		Turn on transmit debugging messages.
-	   0x08		Turn on list debugging messages.
-
-	2. You can append aui=1 to the end of the insmod line to cause
-           the adapter to use the AUI interface instead of the 10 Base T
-           interface.  This is also what to do if you want to use the BNC
-	   connector on a TLAN based device.  (Setting this option on a
-	   device that does not have an AUI/BNC connector will probably
-	   cause it to not function correctly.)
-
-	3. You can set duplex=1 to force half duplex, and duplex=2 to
-	   force full duplex.
-
-	4. You can set speed=10 to force 10Mbs operation, and speed=100
-	   to force 100Mbs operation. (I'm not sure what will happen
-	   if a card which only supports 10Mbs is forced into 100Mbs
-	   mode.)
-
-	5. You have to use speed=X duplex=Y together now. If you just
-	   do "insmod tlan.o speed=100" the driver will do Auto-Neg.
-	   To force a 10Mbps Half-Duplex link do "insmod tlan.o speed=10 
-	   duplex=1".
-
-	6. If the driver is built into the kernel, you can use the 3rd
-	   and 4th parameters to set aui and debug respectively.  For
-	   example:
-
-	   ether=0,0,0x1,0x7,eth0
-
-	   This sets aui to 0x1 and debug to 0x7, assuming eth0 is a
-	   supported TLAN device.
-
-	   The bits in the third byte are assigned as follows:
-
-		0x01 = aui
-		0x02 = use half duplex
-		0x04 = use full duplex
-		0x08 = use 10BaseT
-		0x10 = use 100BaseTx
-
-	   You also need to set both speed and duplex settings when forcing
-	   speeds with kernel-parameters. 
-	   ether=0,0,0x12,0,eth0 will force link to 100Mbps Half-Duplex.
-
-	7. If you have more than one tlan adapter in your system, you can
-	   use the above options on a per adapter basis. To force a 100Mbit/HD
-	   link with your eth1 adapter use:
-	   
-	   insmod tlan speed=0,100 duplex=0,1
-
-	   Now eth0 will use auto-neg and eth1 will be forced to 100Mbit/HD.
-	   Note that the tlan driver supports a maximum of 8 adapters.
-
-
-III.  Things to try if you have problems.
-	1. Make sure your card's PCI id is among those listed in
-	   section I, above.
-	2. Make sure routing is correct.
-	3. Try forcing different speed/duplex settings
-
-
-There is also a tlan mailing list which you can join by sending "subscribe tlan"
-in the body of an email to majordomo@vuser.vu.union.edu.
-There is also a tlan website at http://www.compaq.com
-
diff --git a/MAINTAINERS b/MAINTAINERS
index 94afbf577a06..38dbfbfccb5e 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -16971,7 +16971,7 @@ M:	Samuel Chessman <chessman@tux.org>
 L:	tlan-devel@lists.sourceforge.net (subscribers-only)
 S:	Maintained
 W:	http://sourceforge.net/projects/tlan/
-F:	Documentation/networking/device_drivers/ti/tlan.txt
+F:	Documentation/networking/device_drivers/ti/tlan.rst
 F:	drivers/net/ethernet/ti/tlan.*
 
 TM6000 VIDEO4LINUX DRIVER
diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig
index 89cec778cf2d..7b0ad777828d 100644
--- a/drivers/net/ethernet/ti/Kconfig
+++ b/drivers/net/ethernet/ti/Kconfig
@@ -138,7 +138,7 @@ config TLAN
 
 	  Devices currently supported by this driver are Compaq Netelligent,
 	  Compaq NetFlex and Olicom cards.  Please read the file
-	  <file:Documentation/networking/device_drivers/ti/tlan.txt>
+	  <file:Documentation/networking/device_drivers/ti/tlan.rst>
 	  for more details.
 
 	  To compile this driver as a module, choose M here. The module
diff --git a/drivers/net/ethernet/ti/tlan.c b/drivers/net/ethernet/ti/tlan.c
index ad465202980a..857709828058 100644
--- a/drivers/net/ethernet/ti/tlan.c
+++ b/drivers/net/ethernet/ti/tlan.c
@@ -70,7 +70,7 @@ MODULE_DESCRIPTION("Driver for TI ThunderLAN based ethernet PCI adapters");
 MODULE_LICENSE("GPL");
 
 /* Turn on debugging.
- * See Documentation/networking/device_drivers/ti/tlan.txt for details
+ * See Documentation/networking/device_drivers/ti/tlan.rst for details
  */
 static  int		debug;
 module_param(debug, int, 0);
-- 
cgit v1.2.3-59-g8ed1b


From 7ac0cbb49142edc22f0b3b4033907da6b3f698d9 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 1 May 2020 16:44:57 +0200
Subject: docs: networking: device drivers: convert toshiba/spider_net.txt to
 ReST

- add SPDX header;
- adjust title markup;
- mark code blocks and literals as such;
- adjust identation, whitespaces and blank lines where needed;
- add to networking/index.rst.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/device_drivers/index.rst  |   1 +
 .../device_drivers/toshiba/spider_net.rst          | 202 ++++++++++++++++++++
 .../device_drivers/toshiba/spider_net.txt          | 204 ---------------------
 MAINTAINERS                                        |   2 +-
 4 files changed, 204 insertions(+), 205 deletions(-)
 create mode 100644 Documentation/networking/device_drivers/toshiba/spider_net.rst
 delete mode 100644 Documentation/networking/device_drivers/toshiba/spider_net.txt

diff --git a/Documentation/networking/device_drivers/index.rst b/Documentation/networking/device_drivers/index.rst
index adc0bf65fb02..e18dad11bc72 100644
--- a/Documentation/networking/device_drivers/index.rst
+++ b/Documentation/networking/device_drivers/index.rst
@@ -50,6 +50,7 @@ Contents:
    ti/cpsw_switchdev
    ti/cpsw
    ti/tlan
+   toshiba/spider_net
 
 .. only::  subproject and html
 
diff --git a/Documentation/networking/device_drivers/toshiba/spider_net.rst b/Documentation/networking/device_drivers/toshiba/spider_net.rst
new file mode 100644
index 000000000000..fe5b32be15cd
--- /dev/null
+++ b/Documentation/networking/device_drivers/toshiba/spider_net.rst
@@ -0,0 +1,202 @@
+.. SPDX-License-Identifier: GPL-2.0
+
+===========================
+The Spidernet Device Driver
+===========================
+
+Written by Linas Vepstas <linas@austin.ibm.com>
+
+Version of 7 June 2007
+
+Abstract
+========
+This document sketches the structure of portions of the spidernet
+device driver in the Linux kernel tree. The spidernet is a gigabit
+ethernet device built into the Toshiba southbridge commonly used
+in the SONY Playstation 3 and the IBM QS20 Cell blade.
+
+The Structure of the RX Ring.
+=============================
+The receive (RX) ring is a circular linked list of RX descriptors,
+together with three pointers into the ring that are used to manage its
+contents.
+
+The elements of the ring are called "descriptors" or "descrs"; they
+describe the received data. This includes a pointer to a buffer
+containing the received data, the buffer size, and various status bits.
+
+There are three primary states that a descriptor can be in: "empty",
+"full" and "not-in-use".  An "empty" or "ready" descriptor is ready
+to receive data from the hardware. A "full" descriptor has data in it,
+and is waiting to be emptied and processed by the OS. A "not-in-use"
+descriptor is neither empty or full; it is simply not ready. It may
+not even have a data buffer in it, or is otherwise unusable.
+
+During normal operation, on device startup, the OS (specifically, the
+spidernet device driver) allocates a set of RX descriptors and RX
+buffers. These are all marked "empty", ready to receive data. This
+ring is handed off to the hardware, which sequentially fills in the
+buffers, and marks them "full". The OS follows up, taking the full
+buffers, processing them, and re-marking them empty.
+
+This filling and emptying is managed by three pointers, the "head"
+and "tail" pointers, managed by the OS, and a hardware current
+descriptor pointer (GDACTDPA). The GDACTDPA points at the descr
+currently being filled. When this descr is filled, the hardware
+marks it full, and advances the GDACTDPA by one.  Thus, when there is
+flowing RX traffic, every descr behind it should be marked "full",
+and everything in front of it should be "empty".  If the hardware
+discovers that the current descr is not empty, it will signal an
+interrupt, and halt processing.
+
+The tail pointer tails or trails the hardware pointer. When the
+hardware is ahead, the tail pointer will be pointing at a "full"
+descr. The OS will process this descr, and then mark it "not-in-use",
+and advance the tail pointer.  Thus, when there is flowing RX traffic,
+all of the descrs in front of the tail pointer should be "full", and
+all of those behind it should be "not-in-use". When RX traffic is not
+flowing, then the tail pointer can catch up to the hardware pointer.
+The OS will then note that the current tail is "empty", and halt
+processing.
+
+The head pointer (somewhat mis-named) follows after the tail pointer.
+When traffic is flowing, then the head pointer will be pointing at
+a "not-in-use" descr. The OS will perform various housekeeping duties
+on this descr. This includes allocating a new data buffer and
+dma-mapping it so as to make it visible to the hardware. The OS will
+then mark the descr as "empty", ready to receive data. Thus, when there
+is flowing RX traffic, everything in front of the head pointer should
+be "not-in-use", and everything behind it should be "empty". If no
+RX traffic is flowing, then the head pointer can catch up to the tail
+pointer, at which point the OS will notice that the head descr is
+"empty", and it will halt processing.
+
+Thus, in an idle system, the GDACTDPA, tail and head pointers will
+all be pointing at the same descr, which should be "empty". All of the
+other descrs in the ring should be "empty" as well.
+
+The show_rx_chain() routine will print out the locations of the
+GDACTDPA, tail and head pointers. It will also summarize the contents
+of the ring, starting at the tail pointer, and listing the status
+of the descrs that follow.
+
+A typical example of the output, for a nearly idle system, might be::
+
+    net eth1: Total number of descrs=256
+    net eth1: Chain tail located at descr=20
+    net eth1: Chain head is at 20
+    net eth1: HW curr desc (GDACTDPA) is at 21
+    net eth1: Have 1 descrs with stat=x40800101
+    net eth1: HW next desc (GDACNEXTDA) is at 22
+    net eth1: Last 255 descrs with stat=xa0800000
+
+In the above, the hardware has filled in one descr, number 20. Both
+head and tail are pointing at 20, because it has not yet been emptied.
+Meanwhile, hw is pointing at 21, which is free.
+
+The "Have nnn decrs" refers to the descr starting at the tail: in this
+case, nnn=1 descr, starting at descr 20. The "Last nnn descrs" refers
+to all of the rest of the descrs, from the last status change. The "nnn"
+is a count of how many descrs have exactly the same status.
+
+The status x4... corresponds to "full" and status xa... corresponds
+to "empty". The actual value printed is RXCOMST_A.
+
+In the device driver source code, a different set of names are
+used for these same concepts, so that::
+
+    "empty" == SPIDER_NET_DESCR_CARDOWNED == 0xa
+    "full"  == SPIDER_NET_DESCR_FRAME_END == 0x4
+    "not in use" == SPIDER_NET_DESCR_NOT_IN_USE == 0xf
+
+
+The RX RAM full bug/feature
+===========================
+
+As long as the OS can empty out the RX buffers at a rate faster than
+the hardware can fill them, there is no problem. If, for some reason,
+the OS fails to empty the RX ring fast enough, the hardware GDACTDPA
+pointer will catch up to the head, notice the not-empty condition,
+ad stop. However, RX packets may still continue arriving on the wire.
+The spidernet chip can save some limited number of these in local RAM.
+When this local ram fills up, the spider chip will issue an interrupt
+indicating this (GHIINT0STS will show ERRINT, and the GRMFLLINT bit
+will be set in GHIINT1STS).  When the RX ram full condition occurs,
+a certain bug/feature is triggered that has to be specially handled.
+This section describes the special handling for this condition.
+
+When the OS finally has a chance to run, it will empty out the RX ring.
+In particular, it will clear the descriptor on which the hardware had
+stopped. However, once the hardware has decided that a certain
+descriptor is invalid, it will not restart at that descriptor; instead
+it will restart at the next descr. This potentially will lead to a
+deadlock condition, as the tail pointer will be pointing at this descr,
+which, from the OS point of view, is empty; the OS will be waiting for
+this descr to be filled. However, the hardware has skipped this descr,
+and is filling the next descrs. Since the OS doesn't see this, there
+is a potential deadlock, with the OS waiting for one descr to fill,
+while the hardware is waiting for a different set of descrs to become
+empty.
+
+A call to show_rx_chain() at this point indicates the nature of the
+problem. A typical print when the network is hung shows the following::
+
+    net eth1: Spider RX RAM full, incoming packets might be discarded!
+    net eth1: Total number of descrs=256
+    net eth1: Chain tail located at descr=255
+    net eth1: Chain head is at 255
+    net eth1: HW curr desc (GDACTDPA) is at 0
+    net eth1: Have 1 descrs with stat=xa0800000
+    net eth1: HW next desc (GDACNEXTDA) is at 1
+    net eth1: Have 127 descrs with stat=x40800101
+    net eth1: Have 1 descrs with stat=x40800001
+    net eth1: Have 126 descrs with stat=x40800101
+    net eth1: Last 1 descrs with stat=xa0800000
+
+Both the tail and head pointers are pointing at descr 255, which is
+marked xa... which is "empty". Thus, from the OS point of view, there
+is nothing to be done. In particular, there is the implicit assumption
+that everything in front of the "empty" descr must surely also be empty,
+as explained in the last section. The OS is waiting for descr 255 to
+become non-empty, which, in this case, will never happen.
+
+The HW pointer is at descr 0. This descr is marked 0x4.. or "full".
+Since its already full, the hardware can do nothing more, and thus has
+halted processing. Notice that descrs 0 through 254 are all marked
+"full", while descr 254 and 255 are empty. (The "Last 1 descrs" is
+descr 254, since tail was at 255.) Thus, the system is deadlocked,
+and there can be no forward progress; the OS thinks there's nothing
+to do, and the hardware has nowhere to put incoming data.
+
+This bug/feature is worked around with the spider_net_resync_head_ptr()
+routine. When the driver receives RX interrupts, but an examination
+of the RX chain seems to show it is empty, then it is probable that
+the hardware has skipped a descr or two (sometimes dozens under heavy
+network conditions). The spider_net_resync_head_ptr() subroutine will
+search the ring for the next full descr, and the driver will resume
+operations there.  Since this will leave "holes" in the ring, there
+is also a spider_net_resync_tail_ptr() that will skip over such holes.
+
+As of this writing, the spider_net_resync() strategy seems to work very
+well, even under heavy network loads.
+
+
+The TX ring
+===========
+The TX ring uses a low-watermark interrupt scheme to make sure that
+the TX queue is appropriately serviced for large packet sizes.
+
+For packet sizes greater than about 1KBytes, the kernel can fill
+the TX ring quicker than the device can drain it. Once the ring
+is full, the netdev is stopped. When there is room in the ring,
+the netdev needs to be reawakened, so that more TX packets are placed
+in the ring. The hardware can empty the ring about four times per jiffy,
+so its not appropriate to wait for the poll routine to refill, since
+the poll routine runs only once per jiffy.  The low-watermark mechanism
+marks a descr about 1/4th of the way from the bottom of the queue, so
+that an interrupt is generated when the descr is processed. This
+interrupt wakes up the netdev, which can then refill the queue.
+For large packets, this mechanism generates a relatively small number
+of interrupts, about 1K/sec. For smaller packets, this will drop to zero
+interrupts, as the hardware can empty the queue faster than the kernel
+can fill it.
diff --git a/Documentation/networking/device_drivers/toshiba/spider_net.txt b/Documentation/networking/device_drivers/toshiba/spider_net.txt
deleted file mode 100644
index b0b75f8463b3..000000000000
--- a/Documentation/networking/device_drivers/toshiba/spider_net.txt
+++ /dev/null
@@ -1,204 +0,0 @@
-
-            The Spidernet Device Driver
-            ===========================
-
-Written by Linas Vepstas <linas@austin.ibm.com>
-
-Version of 7 June 2007
-
-Abstract
-========
-This document sketches the structure of portions of the spidernet
-device driver in the Linux kernel tree. The spidernet is a gigabit
-ethernet device built into the Toshiba southbridge commonly used
-in the SONY Playstation 3 and the IBM QS20 Cell blade.
-
-The Structure of the RX Ring.
-=============================
-The receive (RX) ring is a circular linked list of RX descriptors,
-together with three pointers into the ring that are used to manage its
-contents.
-
-The elements of the ring are called "descriptors" or "descrs"; they
-describe the received data. This includes a pointer to a buffer
-containing the received data, the buffer size, and various status bits.
-
-There are three primary states that a descriptor can be in: "empty",
-"full" and "not-in-use".  An "empty" or "ready" descriptor is ready
-to receive data from the hardware. A "full" descriptor has data in it,
-and is waiting to be emptied and processed by the OS. A "not-in-use"
-descriptor is neither empty or full; it is simply not ready. It may
-not even have a data buffer in it, or is otherwise unusable.
-
-During normal operation, on device startup, the OS (specifically, the
-spidernet device driver) allocates a set of RX descriptors and RX
-buffers. These are all marked "empty", ready to receive data. This
-ring is handed off to the hardware, which sequentially fills in the
-buffers, and marks them "full". The OS follows up, taking the full
-buffers, processing them, and re-marking them empty.
-
-This filling and emptying is managed by three pointers, the "head"
-and "tail" pointers, managed by the OS, and a hardware current
-descriptor pointer (GDACTDPA). The GDACTDPA points at the descr
-currently being filled. When this descr is filled, the hardware
-marks it full, and advances the GDACTDPA by one.  Thus, when there is
-flowing RX traffic, every descr behind it should be marked "full",
-and everything in front of it should be "empty".  If the hardware
-discovers that the current descr is not empty, it will signal an
-interrupt, and halt processing.
-
-The tail pointer tails or trails the hardware pointer. When the
-hardware is ahead, the tail pointer will be pointing at a "full"
-descr. The OS will process this descr, and then mark it "not-in-use",
-and advance the tail pointer.  Thus, when there is flowing RX traffic,
-all of the descrs in front of the tail pointer should be "full", and
-all of those behind it should be "not-in-use". When RX traffic is not
-flowing, then the tail pointer can catch up to the hardware pointer.
-The OS will then note that the current tail is "empty", and halt
-processing.
-
-The head pointer (somewhat mis-named) follows after the tail pointer.
-When traffic is flowing, then the head pointer will be pointing at
-a "not-in-use" descr. The OS will perform various housekeeping duties
-on this descr. This includes allocating a new data buffer and
-dma-mapping it so as to make it visible to the hardware. The OS will
-then mark the descr as "empty", ready to receive data. Thus, when there
-is flowing RX traffic, everything in front of the head pointer should
-be "not-in-use", and everything behind it should be "empty". If no
-RX traffic is flowing, then the head pointer can catch up to the tail
-pointer, at which point the OS will notice that the head descr is
-"empty", and it will halt processing.
-
-Thus, in an idle system, the GDACTDPA, tail and head pointers will
-all be pointing at the same descr, which should be "empty". All of the
-other descrs in the ring should be "empty" as well.
-
-The show_rx_chain() routine will print out the locations of the
-GDACTDPA, tail and head pointers. It will also summarize the contents
-of the ring, starting at the tail pointer, and listing the status
-of the descrs that follow.
-
-A typical example of the output, for a nearly idle system, might be
-
-net eth1: Total number of descrs=256
-net eth1: Chain tail located at descr=20
-net eth1: Chain head is at 20
-net eth1: HW curr desc (GDACTDPA) is at 21
-net eth1: Have 1 descrs with stat=x40800101
-net eth1: HW next desc (GDACNEXTDA) is at 22
-net eth1: Last 255 descrs with stat=xa0800000
-
-In the above, the hardware has filled in one descr, number 20. Both
-head and tail are pointing at 20, because it has not yet been emptied.
-Meanwhile, hw is pointing at 21, which is free.
-
-The "Have nnn decrs" refers to the descr starting at the tail: in this
-case, nnn=1 descr, starting at descr 20. The "Last nnn descrs" refers
-to all of the rest of the descrs, from the last status change. The "nnn"
-is a count of how many descrs have exactly the same status.
-
-The status x4... corresponds to "full" and status xa... corresponds
-to "empty". The actual value printed is RXCOMST_A.
-
-In the device driver source code, a different set of names are
-used for these same concepts, so that
-
-"empty" == SPIDER_NET_DESCR_CARDOWNED == 0xa
-"full"  == SPIDER_NET_DESCR_FRAME_END == 0x4
-"not in use" == SPIDER_NET_DESCR_NOT_IN_USE == 0xf
-
-
-The RX RAM full bug/feature
-===========================
-
-As long as the OS can empty out the RX buffers at a rate faster than
-the hardware can fill them, there is no problem. If, for some reason,
-the OS fails to empty the RX ring fast enough, the hardware GDACTDPA
-pointer will catch up to the head, notice the not-empty condition,
-ad stop. However, RX packets may still continue arriving on the wire.
-The spidernet chip can save some limited number of these in local RAM.
-When this local ram fills up, the spider chip will issue an interrupt
-indicating this (GHIINT0STS will show ERRINT, and the GRMFLLINT bit
-will be set in GHIINT1STS).  When the RX ram full condition occurs,
-a certain bug/feature is triggered that has to be specially handled.
-This section describes the special handling for this condition.
-
-When the OS finally has a chance to run, it will empty out the RX ring.
-In particular, it will clear the descriptor on which the hardware had
-stopped. However, once the hardware has decided that a certain
-descriptor is invalid, it will not restart at that descriptor; instead
-it will restart at the next descr. This potentially will lead to a
-deadlock condition, as the tail pointer will be pointing at this descr,
-which, from the OS point of view, is empty; the OS will be waiting for
-this descr to be filled. However, the hardware has skipped this descr,
-and is filling the next descrs. Since the OS doesn't see this, there
-is a potential deadlock, with the OS waiting for one descr to fill,
-while the hardware is waiting for a different set of descrs to become
-empty.
-
-A call to show_rx_chain() at this point indicates the nature of the
-problem. A typical print when the network is hung shows the following:
-
-net eth1: Spider RX RAM full, incoming packets might be discarded!
-net eth1: Total number of descrs=256
-net eth1: Chain tail located at descr=255
-net eth1: Chain head is at 255
-net eth1: HW curr desc (GDACTDPA) is at 0
-net eth1: Have 1 descrs with stat=xa0800000
-net eth1: HW next desc (GDACNEXTDA) is at 1
-net eth1: Have 127 descrs with stat=x40800101
-net eth1: Have 1 descrs with stat=x40800001
-net eth1: Have 126 descrs with stat=x40800101
-net eth1: Last 1 descrs with stat=xa0800000
-
-Both the tail and head pointers are pointing at descr 255, which is
-marked xa... which is "empty". Thus, from the OS point of view, there
-is nothing to be done. In particular, there is the implicit assumption
-that everything in front of the "empty" descr must surely also be empty,
-as explained in the last section. The OS is waiting for descr 255 to
-become non-empty, which, in this case, will never happen.
-
-The HW pointer is at descr 0. This descr is marked 0x4.. or "full".
-Since its already full, the hardware can do nothing more, and thus has
-halted processing. Notice that descrs 0 through 254 are all marked
-"full", while descr 254 and 255 are empty. (The "Last 1 descrs" is
-descr 254, since tail was at 255.) Thus, the system is deadlocked,
-and there can be no forward progress; the OS thinks there's nothing
-to do, and the hardware has nowhere to put incoming data.
-
-This bug/feature is worked around with the spider_net_resync_head_ptr()
-routine. When the driver receives RX interrupts, but an examination
-of the RX chain seems to show it is empty, then it is probable that
-the hardware has skipped a descr or two (sometimes dozens under heavy
-network conditions). The spider_net_resync_head_ptr() subroutine will
-search the ring for the next full descr, and the driver will resume
-operations there.  Since this will leave "holes" in the ring, there
-is also a spider_net_resync_tail_ptr() that will skip over such holes.
-
-As of this writing, the spider_net_resync() strategy seems to work very
-well, even under heavy network loads.
-
-
-The TX ring
-===========
-The TX ring uses a low-watermark interrupt scheme to make sure that
-the TX queue is appropriately serviced for large packet sizes.
-
-For packet sizes greater than about 1KBytes, the kernel can fill
-the TX ring quicker than the device can drain it. Once the ring
-is full, the netdev is stopped. When there is room in the ring,
-the netdev needs to be reawakened, so that more TX packets are placed
-in the ring. The hardware can empty the ring about four times per jiffy,
-so its not appropriate to wait for the poll routine to refill, since
-the poll routine runs only once per jiffy.  The low-watermark mechanism
-marks a descr about 1/4th of the way from the bottom of the queue, so
-that an interrupt is generated when the descr is processed. This
-interrupt wakes up the netdev, which can then refill the queue.
-For large packets, this mechanism generates a relatively small number
-of interrupts, about 1K/sec. For smaller packets, this will drop to zero
-interrupts, as the hardware can empty the queue faster than the kernel
-can fill it.
-
-
- ======= END OF DOCUMENT ========
-
diff --git a/MAINTAINERS b/MAINTAINERS
index 38dbfbfccb5e..db7a6d462dff 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -15874,7 +15874,7 @@ SPIDERNET NETWORK DRIVER for CELL
 M:	Ishizaki Kou <kou.ishizaki@toshiba.co.jp>
 L:	netdev@vger.kernel.org
 S:	Supported
-F:	Documentation/networking/device_drivers/toshiba/spider_net.txt
+F:	Documentation/networking/device_drivers/toshiba/spider_net.rst
 F:	drivers/net/ethernet/toshiba/spider_net*
 
 SPMI SUBSYSTEM
-- 
cgit v1.2.3-59-g8ed1b


From 2c8e961d20e38c366315f11bec6c0ae4458785c4 Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 1 May 2020 16:44:58 +0200
Subject: net: docs: add page_pool.rst to index.rst

This file is already in ReST format. Add it to the net
index.rst, in order to make it part of the documentation
body.

Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/index.rst | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/networking/index.rst b/Documentation/networking/index.rst
index f5733ca4fbcb..0186e276690a 100644
--- a/Documentation/networking/index.rst
+++ b/Documentation/networking/index.rst
@@ -25,6 +25,7 @@ Contents:
    failover
    net_dim
    net_failover
+   page_pool
    phy
    sfp-phylink
    alias
-- 
cgit v1.2.3-59-g8ed1b


From 30cbf2ddfc5927fcd85b930c9bd8732fc4fc31cd Mon Sep 17 00:00:00 2001
From: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Date: Fri, 1 May 2020 16:44:59 +0200
Subject: docs: networking: arcnet-hardware.rst: don't duplicate chapter names

Since changeset 58ad30cf91f0 ("docs: fix reference to core-api/namespaces.rst"),
auto-references for chapters are generated. This is a nice feature, but
has a drawback: no chapters can have the same sumber.

So, we need to change two chapter titles, to avoid warnings when
building the docs.

Fixes: 58ad30cf91f0 ("docs: fix reference to core-api/namespaces.rst")
Signed-off-by: Mauro Carvalho Chehab <mchehab+huawei@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/arcnet-hardware.rst | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/Documentation/networking/arcnet-hardware.rst b/Documentation/networking/arcnet-hardware.rst
index b5a1a020c824..ac249ac8fcf2 100644
--- a/Documentation/networking/arcnet-hardware.rst
+++ b/Documentation/networking/arcnet-hardware.rst
@@ -1296,8 +1296,8 @@ DIP Switches:
 	11111           0xC400 (guessed - crashes tested system)
 	=============   ============================================
 
-CNet Technology Inc.
-====================
+CNet Technology Inc. (8-bit cards)
+==================================
 
 120 Series (8-bit cards)
 ------------------------
@@ -1520,8 +1520,8 @@ The jumpers labeled EXT1 and EXT2 are used to determine the timeout
 parameters. These two jumpers are normally left open.
 
 
-CNet Technology Inc.
-====================
+CNet Technology Inc. (16-bit cards)
+===================================
 
 160 Series (16-bit cards)
 -------------------------
-- 
cgit v1.2.3-59-g8ed1b


From beecf11bc2188067824591612151c4dc6ec383c7 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Thu, 30 Apr 2020 16:31:52 -0700
Subject: bpf: Bpf_{g,s}etsockopt for struct bpf_sock_addr

Currently, bpf_getsockopt and bpf_setsockopt helpers operate on the
'struct bpf_sock_ops' context in BPF_PROG_TYPE_SOCK_OPS program.
Let's generalize them and make them available for 'struct bpf_sock_addr'.
That way, in the future, we can allow those helpers in more places.

As an example, let's expose those 'struct bpf_sock_addr' based helpers to
BPF_CGROUP_INET{4,6}_CONNECT hooks. That way we can override CC before the
connection is made.

v3:
* Expose custom helpers for bpf_sock_addr context instead of doing
  generic bpf_sock argument (as suggested by Daniel). Even with
  try_socket_lock that doesn't sleep we have a problem where context sk
  is already locked and socket lock is non-nestable.

v2:
* s/BPF_PROG_TYPE_CGROUP_SOCKOPT/BPF_PROG_TYPE_SOCK_OPS/

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Link: https://lore.kernel.org/bpf/20200430233152.199403-1-sdf@google.com
---
 include/uapi/linux/bpf.h                          |  14 ++-
 net/core/filter.c                                 | 118 +++++++++++++++++-----
 tools/include/uapi/linux/bpf.h                    |  14 ++-
 tools/testing/selftests/bpf/config                |   1 +
 tools/testing/selftests/bpf/progs/connect4_prog.c |  46 +++++++++
 5 files changed, 166 insertions(+), 27 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 705e4822f997..b3643e27e264 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1587,7 +1587,7 @@ union bpf_attr {
  * 	Return
  * 		0
  *
- * int bpf_setsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen)
+ * int bpf_setsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen)
  * 	Description
  * 		Emulate a call to **setsockopt()** on the socket associated to
  * 		*bpf_socket*, which must be a full socket. The *level* at
@@ -1595,6 +1595,11 @@ union bpf_attr {
  * 		must be specified, see **setsockopt(2)** for more information.
  * 		The option value of length *optlen* is pointed by *optval*.
  *
+ * 		*bpf_socket* should be one of the following:
+ * 		* **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**.
+ * 		* **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
+ * 		  and **BPF_CGROUP_INET6_CONNECT**.
+ *
  * 		This helper actually implements a subset of **setsockopt()**.
  * 		It supports the following *level*\ s:
  *
@@ -1789,7 +1794,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_getsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen)
+ * int bpf_getsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen)
  * 	Description
  * 		Emulate a call to **getsockopt()** on the socket associated to
  * 		*bpf_socket*, which must be a full socket. The *level* at
@@ -1798,6 +1803,11 @@ union bpf_attr {
  * 		The retrieved value is stored in the structure pointed by
  * 		*opval* and of length *optlen*.
  *
+ * 		*bpf_socket* should be one of the following:
+ * 		* **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**.
+ * 		* **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
+ * 		  and **BPF_CGROUP_INET6_CONNECT**.
+ *
  * 		This helper actually implements a subset of **getsockopt()**.
  * 		It supports the following *level*\ s:
  *
diff --git a/net/core/filter.c b/net/core/filter.c
index 70b32723e6be..dfaf5df13722 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4194,16 +4194,19 @@ static const struct bpf_func_proto bpf_get_socket_uid_proto = {
 	.arg1_type      = ARG_PTR_TO_CTX,
 };
 
-BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
-	   int, level, int, optname, char *, optval, int, optlen)
+#define SOCKOPT_CC_REINIT (1 << 0)
+
+static int _bpf_setsockopt(struct sock *sk, int level, int optname,
+			   char *optval, int optlen, u32 flags)
 {
-	struct sock *sk = bpf_sock->sk;
 	int ret = 0;
 	int val;
 
 	if (!sk_fullsock(sk))
 		return -EINVAL;
 
+	sock_owned_by_me(sk);
+
 	if (level == SOL_SOCKET) {
 		if (optlen != sizeof(int))
 			return -EINVAL;
@@ -4298,7 +4301,7 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
 		   sk->sk_prot->setsockopt == tcp_setsockopt) {
 		if (optname == TCP_CONGESTION) {
 			char name[TCP_CA_NAME_MAX];
-			bool reinit = bpf_sock->op > BPF_SOCK_OPS_NEEDS_ECN;
+			bool reinit = flags & SOCKOPT_CC_REINIT;
 
 			strncpy(name, optval, min_t(long, optlen,
 						    TCP_CA_NAME_MAX-1));
@@ -4345,24 +4348,14 @@ BPF_CALL_5(bpf_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
 	return ret;
 }
 
-static const struct bpf_func_proto bpf_setsockopt_proto = {
-	.func		= bpf_setsockopt,
-	.gpl_only	= false,
-	.ret_type	= RET_INTEGER,
-	.arg1_type	= ARG_PTR_TO_CTX,
-	.arg2_type	= ARG_ANYTHING,
-	.arg3_type	= ARG_ANYTHING,
-	.arg4_type	= ARG_PTR_TO_MEM,
-	.arg5_type	= ARG_CONST_SIZE,
-};
-
-BPF_CALL_5(bpf_getsockopt, struct bpf_sock_ops_kern *, bpf_sock,
-	   int, level, int, optname, char *, optval, int, optlen)
+static int _bpf_getsockopt(struct sock *sk, int level, int optname,
+			   char *optval, int optlen)
 {
-	struct sock *sk = bpf_sock->sk;
-
 	if (!sk_fullsock(sk))
 		goto err_clear;
+
+	sock_owned_by_me(sk);
+
 #ifdef CONFIG_INET
 	if (level == SOL_TCP && sk->sk_prot->getsockopt == tcp_getsockopt) {
 		struct inet_connection_sock *icsk;
@@ -4428,8 +4421,71 @@ err_clear:
 	return -EINVAL;
 }
 
-static const struct bpf_func_proto bpf_getsockopt_proto = {
-	.func		= bpf_getsockopt,
+BPF_CALL_5(bpf_sock_addr_setsockopt, struct bpf_sock_addr_kern *, ctx,
+	   int, level, int, optname, char *, optval, int, optlen)
+{
+	u32 flags = 0;
+	return _bpf_setsockopt(ctx->sk, level, optname, optval, optlen,
+			       flags);
+}
+
+static const struct bpf_func_proto bpf_sock_addr_setsockopt_proto = {
+	.func		= bpf_sock_addr_setsockopt,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_ANYTHING,
+	.arg4_type	= ARG_PTR_TO_MEM,
+	.arg5_type	= ARG_CONST_SIZE,
+};
+
+BPF_CALL_5(bpf_sock_addr_getsockopt, struct bpf_sock_addr_kern *, ctx,
+	   int, level, int, optname, char *, optval, int, optlen)
+{
+	return _bpf_getsockopt(ctx->sk, level, optname, optval, optlen);
+}
+
+static const struct bpf_func_proto bpf_sock_addr_getsockopt_proto = {
+	.func		= bpf_sock_addr_getsockopt,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_ANYTHING,
+	.arg4_type	= ARG_PTR_TO_UNINIT_MEM,
+	.arg5_type	= ARG_CONST_SIZE,
+};
+
+BPF_CALL_5(bpf_sock_ops_setsockopt, struct bpf_sock_ops_kern *, bpf_sock,
+	   int, level, int, optname, char *, optval, int, optlen)
+{
+	u32 flags = 0;
+	if (bpf_sock->op > BPF_SOCK_OPS_NEEDS_ECN)
+		flags |= SOCKOPT_CC_REINIT;
+	return _bpf_setsockopt(bpf_sock->sk, level, optname, optval, optlen,
+			       flags);
+}
+
+static const struct bpf_func_proto bpf_sock_ops_setsockopt_proto = {
+	.func		= bpf_sock_ops_setsockopt,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+	.arg3_type	= ARG_ANYTHING,
+	.arg4_type	= ARG_PTR_TO_MEM,
+	.arg5_type	= ARG_CONST_SIZE,
+};
+
+BPF_CALL_5(bpf_sock_ops_getsockopt, struct bpf_sock_ops_kern *, bpf_sock,
+	   int, level, int, optname, char *, optval, int, optlen)
+{
+	return _bpf_getsockopt(bpf_sock->sk, level, optname, optval, optlen);
+}
+
+static const struct bpf_func_proto bpf_sock_ops_getsockopt_proto = {
+	.func		= bpf_sock_ops_getsockopt,
 	.gpl_only	= false,
 	.ret_type	= RET_INTEGER,
 	.arg1_type	= ARG_PTR_TO_CTX,
@@ -6043,6 +6099,22 @@ sock_addr_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_sk_storage_get_proto;
 	case BPF_FUNC_sk_storage_delete:
 		return &bpf_sk_storage_delete_proto;
+	case BPF_FUNC_setsockopt:
+		switch (prog->expected_attach_type) {
+		case BPF_CGROUP_INET4_CONNECT:
+		case BPF_CGROUP_INET6_CONNECT:
+			return &bpf_sock_addr_setsockopt_proto;
+		default:
+			return NULL;
+		}
+	case BPF_FUNC_getsockopt:
+		switch (prog->expected_attach_type) {
+		case BPF_CGROUP_INET4_CONNECT:
+		case BPF_CGROUP_INET6_CONNECT:
+			return &bpf_sock_addr_getsockopt_proto;
+		default:
+			return NULL;
+		}
 	default:
 		return bpf_base_func_proto(func_id);
 	}
@@ -6261,9 +6333,9 @@ sock_ops_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
 	switch (func_id) {
 	case BPF_FUNC_setsockopt:
-		return &bpf_setsockopt_proto;
+		return &bpf_sock_ops_setsockopt_proto;
 	case BPF_FUNC_getsockopt:
-		return &bpf_getsockopt_proto;
+		return &bpf_sock_ops_getsockopt_proto;
 	case BPF_FUNC_sock_ops_cb_flags_set:
 		return &bpf_sock_ops_cb_flags_set_proto;
 	case BPF_FUNC_sock_map_update:
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 705e4822f997..b3643e27e264 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1587,7 +1587,7 @@ union bpf_attr {
  * 	Return
  * 		0
  *
- * int bpf_setsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen)
+ * int bpf_setsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen)
  * 	Description
  * 		Emulate a call to **setsockopt()** on the socket associated to
  * 		*bpf_socket*, which must be a full socket. The *level* at
@@ -1595,6 +1595,11 @@ union bpf_attr {
  * 		must be specified, see **setsockopt(2)** for more information.
  * 		The option value of length *optlen* is pointed by *optval*.
  *
+ * 		*bpf_socket* should be one of the following:
+ * 		* **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**.
+ * 		* **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
+ * 		  and **BPF_CGROUP_INET6_CONNECT**.
+ *
  * 		This helper actually implements a subset of **setsockopt()**.
  * 		It supports the following *level*\ s:
  *
@@ -1789,7 +1794,7 @@ union bpf_attr {
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
- * int bpf_getsockopt(struct bpf_sock_ops *bpf_socket, int level, int optname, void *optval, int optlen)
+ * int bpf_getsockopt(void *bpf_socket, int level, int optname, void *optval, int optlen)
  * 	Description
  * 		Emulate a call to **getsockopt()** on the socket associated to
  * 		*bpf_socket*, which must be a full socket. The *level* at
@@ -1798,6 +1803,11 @@ union bpf_attr {
  * 		The retrieved value is stored in the structure pointed by
  * 		*opval* and of length *optlen*.
  *
+ * 		*bpf_socket* should be one of the following:
+ * 		* **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**.
+ * 		* **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
+ * 		  and **BPF_CGROUP_INET6_CONNECT**.
+ *
  * 		This helper actually implements a subset of **getsockopt()**.
  * 		It supports the following *level*\ s:
  *
diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 60e3ae5d4e48..6e5b94c036ca 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -37,3 +37,4 @@ CONFIG_IPV6_SIT=m
 CONFIG_BPF_JIT=y
 CONFIG_BPF_LSM=y
 CONFIG_SECURITY=y
+CONFIG_TCP_CONG_DCTCP=y
diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c
index ad3c498a8150..972918cd2d7f 100644
--- a/tools/testing/selftests/bpf/progs/connect4_prog.c
+++ b/tools/testing/selftests/bpf/progs/connect4_prog.c
@@ -8,6 +8,7 @@
 #include <linux/in.h>
 #include <linux/in6.h>
 #include <sys/socket.h>
+#include <netinet/tcp.h>
 
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_endian.h>
@@ -16,6 +17,10 @@
 #define DST_REWRITE_IP4		0x7f000001U
 #define DST_REWRITE_PORT4	4444
 
+#ifndef TCP_CA_NAME_MAX
+#define TCP_CA_NAME_MAX 16
+#endif
+
 int _version SEC("version") = 1;
 
 __attribute__ ((noinline))
@@ -33,6 +38,43 @@ int do_bind(struct bpf_sock_addr *ctx)
 	return 1;
 }
 
+static __inline int verify_cc(struct bpf_sock_addr *ctx,
+			      char expected[TCP_CA_NAME_MAX])
+{
+	char buf[TCP_CA_NAME_MAX];
+	int i;
+
+	if (bpf_getsockopt(ctx, SOL_TCP, TCP_CONGESTION, &buf, sizeof(buf)))
+		return 1;
+
+	for (i = 0; i < TCP_CA_NAME_MAX; i++) {
+		if (buf[i] != expected[i])
+			return 1;
+		if (buf[i] == 0)
+			break;
+	}
+
+	return 0;
+}
+
+static __inline int set_cc(struct bpf_sock_addr *ctx)
+{
+	char dctcp[TCP_CA_NAME_MAX] = "dctcp";
+	char cubic[TCP_CA_NAME_MAX] = "cubic";
+
+	if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, &dctcp, sizeof(dctcp)))
+		return 1;
+	if (verify_cc(ctx, dctcp))
+		return 1;
+
+	if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, &cubic, sizeof(cubic)))
+		return 1;
+	if (verify_cc(ctx, cubic))
+		return 1;
+
+	return 0;
+}
+
 SEC("cgroup/connect4")
 int connect_v4_prog(struct bpf_sock_addr *ctx)
 {
@@ -66,6 +108,10 @@ int connect_v4_prog(struct bpf_sock_addr *ctx)
 
 	bpf_sk_release(sk);
 
+	/* Rewrite congestion control. */
+	if (ctx->type == SOCK_STREAM && set_cc(ctx))
+		return 0;
+
 	/* Rewrite destination. */
 	ctx->user_ip4 = bpf_htonl(DST_REWRITE_IP4);
 	ctx->user_port = bpf_htons(DST_REWRITE_PORT4);
-- 
cgit v1.2.3-59-g8ed1b


From f773d5118b6ce63d645d404bf29075252b890c66 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Fri, 1 May 2020 19:22:29 +0200
Subject: r8169: remove redundant driver message when entering promiscuous mode

Net core -  __dev_set_promiscuity - prints a message already when
promiscuous mode in entered/left, therefore we don't have to do this
in the driver too. Also the driver message would be misleading
(would be because "link" message level is disabled per default)
because it would print "promisc mode enabled" even if it's being
left. Reason is that __dev_change_flags() calls dev_set_rx_mode()
before touching the promisc flag.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 0f869a761d8c..bfa199b36652 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -2634,8 +2634,6 @@ static void rtl_set_rx_mode(struct net_device *dev)
 	u32 tmp;
 
 	if (dev->flags & IFF_PROMISC) {
-		/* Unconditionally log net taps. */
-		netif_notice(tp, link, dev, "Promiscuous mode enabled\n");
 		rx_mode |= AcceptAllPhys;
 	} else if (netdev_mc_count(dev) > MC_FILTER_LIMIT ||
 		   dev->flags & IFF_ALLMULTI ||
-- 
cgit v1.2.3-59-g8ed1b


From d56f58cec90d46a60d1acb15954b96d89aa154ee Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Fri, 1 May 2020 19:23:36 +0200
Subject: r8169: simplify counter handling

The counter handling functions can only fail if rtl8169_do_counters()
times out. In the poll function we emit an error message in case of
timeout, therefore we don't have to propagate the timeout all the
way up just to print another message basically saying the same.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 38 +++++++++++--------------------
 1 file changed, 13 insertions(+), 25 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index bfa199b36652..1c2ea7506784 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -1625,7 +1625,7 @@ DECLARE_RTL_COND(rtl_counters_cond)
 	return RTL_R32(tp, CounterAddrLow) & (CounterReset | CounterDump);
 }
 
-static bool rtl8169_do_counters(struct rtl8169_private *tp, u32 counter_cmd)
+static void rtl8169_do_counters(struct rtl8169_private *tp, u32 counter_cmd)
 {
 	dma_addr_t paddr = tp->counters_phys_addr;
 	u32 cmd;
@@ -1636,22 +1636,20 @@ static bool rtl8169_do_counters(struct rtl8169_private *tp, u32 counter_cmd)
 	RTL_W32(tp, CounterAddrLow, cmd);
 	RTL_W32(tp, CounterAddrLow, cmd | counter_cmd);
 
-	return rtl_udelay_loop_wait_low(tp, &rtl_counters_cond, 10, 1000);
+	rtl_udelay_loop_wait_low(tp, &rtl_counters_cond, 10, 1000);
 }
 
-static bool rtl8169_reset_counters(struct rtl8169_private *tp)
+static void rtl8169_reset_counters(struct rtl8169_private *tp)
 {
 	/*
 	 * Versions prior to RTL_GIGA_MAC_VER_19 don't support resetting the
 	 * tally counters.
 	 */
-	if (tp->mac_version < RTL_GIGA_MAC_VER_19)
-		return true;
-
-	return rtl8169_do_counters(tp, CounterReset);
+	if (tp->mac_version >= RTL_GIGA_MAC_VER_19)
+		rtl8169_do_counters(tp, CounterReset);
 }
 
-static bool rtl8169_update_counters(struct rtl8169_private *tp)
+static void rtl8169_update_counters(struct rtl8169_private *tp)
 {
 	u8 val = RTL_R8(tp, ChipCmd);
 
@@ -1659,16 +1657,13 @@ static bool rtl8169_update_counters(struct rtl8169_private *tp)
 	 * Some chips are unable to dump tally counters when the receiver
 	 * is disabled. If 0xff chip may be in a PCI power-save state.
 	 */
-	if (!(val & CmdRxEnb) || val == 0xff)
-		return true;
-
-	return rtl8169_do_counters(tp, CounterDump);
+	if (val & CmdRxEnb && val != 0xff)
+		rtl8169_do_counters(tp, CounterDump);
 }
 
-static bool rtl8169_init_counter_offsets(struct rtl8169_private *tp)
+static void rtl8169_init_counter_offsets(struct rtl8169_private *tp)
 {
 	struct rtl8169_counters *counters = tp->counters;
-	bool ret = false;
 
 	/*
 	 * rtl8169_init_counter_offsets is called from rtl_open.  On chip
@@ -1686,22 +1681,16 @@ static bool rtl8169_init_counter_offsets(struct rtl8169_private *tp)
 	 */
 
 	if (tp->tc_offset.inited)
-		return true;
-
-	/* If both, reset and update fail, propagate to caller. */
-	if (rtl8169_reset_counters(tp))
-		ret = true;
+		return;
 
-	if (rtl8169_update_counters(tp))
-		ret = true;
+	rtl8169_reset_counters(tp);
+	rtl8169_update_counters(tp);
 
 	tp->tc_offset.tx_errors = counters->tx_errors;
 	tp->tc_offset.tx_multi_collision = counters->tx_multi_collision;
 	tp->tc_offset.tx_aborted = counters->tx_aborted;
 	tp->tc_offset.rx_missed = counters->rx_missed;
 	tp->tc_offset.inited = true;
-
-	return ret;
 }
 
 static void rtl8169_get_ethtool_stats(struct net_device *dev,
@@ -4759,8 +4748,7 @@ static int rtl_open(struct net_device *dev)
 
 	rtl_hw_start(tp);
 
-	if (!rtl8169_init_counter_offsets(tp))
-		netif_warn(tp, hw, dev, "counter reset/update failed\n");
+	rtl8169_init_counter_offsets(tp);
 
 	phy_start(tp->phydev);
 	netif_start_queue(dev);
-- 
cgit v1.2.3-59-g8ed1b


From 3bf6ff3cec5d2b1d8185e76c7fae48ff7021ebd0 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Fri, 1 May 2020 19:24:47 +0200
Subject: r8169: remove "out of memory" error message from rtl_request_firmware

When preparing an unrelated change, checkpatch complained about this
redundant out-of-memory message. Therefore remove it.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 1c2ea7506784..768721d565ae 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -2507,10 +2507,8 @@ static void rtl_request_firmware(struct rtl8169_private *tp)
 		return;
 
 	rtl_fw = kzalloc(sizeof(*rtl_fw), GFP_KERNEL);
-	if (!rtl_fw) {
-		netif_warn(tp, ifup, tp->dev, "Unable to load firmware, out of memory\n");
+	if (!rtl_fw)
 		return;
-	}
 
 	rtl_fw->phy_write = rtl_writephy;
 	rtl_fw->phy_read = rtl_readphy;
-- 
cgit v1.2.3-59-g8ed1b


From 93882c6f210af5e318cdbe40b320053cff0fa033 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Fri, 1 May 2020 19:26:22 +0200
Subject: r8169: switch from netif_xxx message functions to netdev_xxx

Considering the few messages we have in the driver, there's not really
a benefit in being able to control them on a message type level.
Therefore simplify the code and switch to the netdev_xxx message
functions. In addition add net_ratelimit() to messages that can be
printed from a hot path.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 68 ++++++++++---------------------
 1 file changed, 22 insertions(+), 46 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 768721d565ae..8b665f2ec21f 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -59,9 +59,6 @@
 #define FIRMWARE_8107E_2	"rtl_nic/rtl8107e-2.fw"
 #define FIRMWARE_8125A_3	"rtl_nic/rtl8125a-3.fw"
 
-#define R8169_MSG_DEFAULT \
-	(NETIF_MSG_DRV | NETIF_MSG_PROBE | NETIF_MSG_IFUP | NETIF_MSG_IFDOWN)
-
 /* Maximum number of multicast addresses to filter (vs. Rx-all-multicast).
    The RTL chips use a 64 element hash table based on the Ethernet CRC. */
 #define	MC_FILTER_LIMIT	32
@@ -179,10 +176,6 @@ static const struct pci_device_id rtl8169_pci_tbl[] = {
 
 MODULE_DEVICE_TABLE(pci, rtl8169_pci_tbl);
 
-static struct {
-	u32 msg_enable;
-} debug = { -1 };
-
 enum rtl_registers {
 	MAC0		= 0,	/* Ethernet hardware address. */
 	MAC4		= 4,
@@ -604,7 +597,6 @@ struct rtl8169_private {
 	struct net_device *dev;
 	struct phy_device *phydev;
 	struct napi_struct napi;
-	u32 msg_enable;
 	enum mac_version mac_version;
 	u32 cur_rx; /* Index into the Rx descriptor buffer of next Rx pkt. */
 	u32 cur_tx; /* Index into the Tx descriptor buffer of next Rx pkt. */
@@ -646,8 +638,6 @@ typedef void (*rtl_generic_fct)(struct rtl8169_private *tp);
 
 MODULE_AUTHOR("Realtek and the Linux r8169 crew <netdev@vger.kernel.org>");
 MODULE_DESCRIPTION("RealTek RTL-8169 Gigabit Ethernet driver");
-module_param_named(debug, debug.msg_enable, int, 0);
-MODULE_PARM_DESC(debug, "Debug verbosity level (0=none, ..., 16=all)");
 MODULE_SOFTDEP("pre: realtek");
 MODULE_LICENSE("GPL");
 MODULE_FIRMWARE(FIRMWARE_8168D_1);
@@ -751,8 +741,10 @@ static bool rtl_loop_wait(struct rtl8169_private *tp, const struct rtl_cond *c,
 			return true;
 		delay(d);
 	}
-	netif_err(tp, drv, tp->dev, "%s == %d (loop: %d, delay: %d).\n",
-		  c->msg, !high, n, d);
+
+	if (net_ratelimit())
+		netdev_err(tp->dev, "%s == %d (loop: %d, delay: %d).\n",
+			   c->msg, !high, n, d);
 	return false;
 }
 
@@ -797,7 +789,8 @@ static bool name ## _check(struct rtl8169_private *tp)
 static bool rtl_ocp_reg_failure(struct rtl8169_private *tp, u32 reg)
 {
 	if (reg & 0xffff0001) {
-		netif_err(tp, drv, tp->dev, "Invalid ocp reg %x!\n", reg);
+		if (net_ratelimit())
+			netdev_err(tp->dev, "Invalid ocp reg %x!\n", reg);
 		return true;
 	}
 	return false;
@@ -1580,20 +1573,6 @@ static void rtl8169_get_regs(struct net_device *dev, struct ethtool_regs *regs,
 	rtl_unlock_work(tp);
 }
 
-static u32 rtl8169_get_msglevel(struct net_device *dev)
-{
-	struct rtl8169_private *tp = netdev_priv(dev);
-
-	return tp->msg_enable;
-}
-
-static void rtl8169_set_msglevel(struct net_device *dev, u32 value)
-{
-	struct rtl8169_private *tp = netdev_priv(dev);
-
-	tp->msg_enable = value;
-}
-
 static const char rtl8169_gstrings[][ETH_GSTRING_LEN] = {
 	"tx_packets",
 	"rx_packets",
@@ -1985,8 +1964,6 @@ static const struct ethtool_ops rtl8169_ethtool_ops = {
 	.get_link		= ethtool_op_get_link,
 	.get_coalesce		= rtl_get_coalesce,
 	.set_coalesce		= rtl_set_coalesce,
-	.get_msglevel		= rtl8169_get_msglevel,
-	.set_msglevel		= rtl8169_set_msglevel,
 	.get_regs		= rtl8169_get_regs,
 	.get_wol		= rtl8169_get_wol,
 	.set_wol		= rtl8169_set_wol,
@@ -3868,8 +3845,7 @@ static struct page *rtl8169_alloc_rx_data(struct rtl8169_private *tp,
 
 	mapping = dma_map_page(d, data, 0, R8169_RX_BUF_SIZE, DMA_FROM_DEVICE);
 	if (unlikely(dma_mapping_error(d, mapping))) {
-		if (net_ratelimit())
-			netif_err(tp, drv, tp->dev, "Failed to map RX DMA!\n");
+		netdev_err(tp->dev, "Failed to map RX DMA!\n");
 		__free_pages(data, get_order(R8169_RX_BUF_SIZE));
 		return NULL;
 	}
@@ -4006,7 +3982,7 @@ static int rtl8169_tx_map(struct rtl8169_private *tp, const u32 *opts, u32 len,
 	ret = dma_mapping_error(d, mapping);
 	if (unlikely(ret)) {
 		if (net_ratelimit())
-			netif_err(tp, drv, tp->dev, "Failed to map TX data!\n");
+			netdev_err(tp->dev, "Failed to map TX data!\n");
 		return ret;
 	}
 
@@ -4172,7 +4148,8 @@ static netdev_tx_t rtl8169_start_xmit(struct sk_buff *skb,
 	txd_first = tp->TxDescArray + entry;
 
 	if (unlikely(!rtl_tx_slots_avail(tp, frags))) {
-		netif_err(tp, drv, dev, "BUG! Tx Ring full when queue awake!\n");
+		if (net_ratelimit())
+			netdev_err(dev, "BUG! Tx Ring full when queue awake!\n");
 		goto err_stop_0;
 	}
 
@@ -4334,9 +4311,9 @@ static void rtl8169_pcierr_interrupt(struct net_device *dev)
 
 	pci_status_errs = pci_status_get_and_clear_errors(pdev);
 
-	netif_err(tp, intr, dev, "PCI error (cmd = 0x%04x, status_errs = 0x%04x)\n",
-		  pci_cmd, pci_status_errs);
-
+	if (net_ratelimit())
+		netdev_err(dev, "PCI error (cmd = 0x%04x, status_errs = 0x%04x)\n",
+			   pci_cmd, pci_status_errs);
 	/*
 	 * The recovery sequence below admits a very elaborated explanation:
 	 * - it seems to work;
@@ -4454,8 +4431,9 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, u32 budget
 		dma_rmb();
 
 		if (unlikely(status & RxRES)) {
-			netif_info(tp, rx_err, dev, "Rx ERROR. status = %08x\n",
-				   status);
+			if (net_ratelimit())
+				netdev_warn(dev, "Rx ERROR. status = %08x\n",
+					    status);
 			dev->stats.rx_errors++;
 			if (status & (RxRWT | RxRUNT))
 				dev->stats.rx_length_errors++;
@@ -5326,7 +5304,6 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	tp = netdev_priv(dev);
 	tp->dev = dev;
 	tp->pci_dev = pdev;
-	tp->msg_enable = netif_msg_init(debug.msg_enable, R8169_MSG_DEFAULT);
 	tp->supports_gmii = ent->driver_data == RTL_CFG_NO_GBIT ? 0 : 1;
 	tp->eee_adv = -1;
 	tp->ocp_base = OCP_STD_PHY_BASE;
@@ -5484,15 +5461,14 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (rc)
 		return rc;
 
-	netif_info(tp, probe, dev, "%s, %pM, XID %03x, IRQ %d\n",
-		   rtl_chip_infos[chipset].name, dev->dev_addr, xid,
-		   pci_irq_vector(pdev, 0));
+	netdev_info(dev, "%s, %pM, XID %03x, IRQ %d\n",
+		    rtl_chip_infos[chipset].name, dev->dev_addr, xid,
+		    pci_irq_vector(pdev, 0));
 
 	if (jumbo_max)
-		netif_info(tp, probe, dev,
-			   "jumbo features [frames: %d bytes, tx checksumming: %s]\n",
-			   jumbo_max, tp->mac_version <= RTL_GIGA_MAC_VER_06 ?
-			   "ok" : "ko");
+		netdev_info(dev, "jumbo features [frames: %d bytes, tx checksumming: %s]\n",
+			    jumbo_max, tp->mac_version <= RTL_GIGA_MAC_VER_06 ?
+			    "ok" : "ko");
 
 	if (r8168_check_dash(tp))
 		rtl8168_driver_start(tp);
-- 
cgit v1.2.3-59-g8ed1b


From f0628c524fd188c3f9418e12478dfdfadacba815 Mon Sep 17 00:00:00 2001
From: Cambda Zhu <cambda@linux.alibaba.com>
Date: Fri, 24 Apr 2020 16:06:16 +0800
Subject: net: Replace the limit of TCP_LINGER2 with TCP_FIN_TIMEOUT_MAX

This patch changes the behavior of TCP_LINGER2 about its limit. The
sysctl_tcp_fin_timeout used to be the limit of TCP_LINGER2 but now it's
only the default value. A new macro named TCP_FIN_TIMEOUT_MAX is added
as the limit of TCP_LINGER2, which is 2 minutes.

Since TCP_LINGER2 used sysctl_tcp_fin_timeout as the default value
and the limit in the past, the system administrator cannot set the
default value for most of sockets and let some sockets have a greater
timeout. It might be a mistake that let the sysctl to be the limit of
the TCP_LINGER2. Maybe we can add a new sysctl to set the max of
TCP_LINGER2, but FIN-WAIT-2 timeout is usually no need to be too long
and 2 minutes are legal considering TCP specs.

Changes in v3:
- Remove the new socket option and change the TCP_LINGER2 behavior so
  that the timeout can be set to value between sysctl_tcp_fin_timeout
  and 2 minutes.

Changes in v2:
- Add int overflow check for the new socket option.

Changes in v1:
- Add a new socket option to set timeout greater than
  sysctl_tcp_fin_timeout.

Signed-off-by: Cambda Zhu <cambda@linux.alibaba.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h | 1 +
 net/ipv4/tcp.c    | 4 ++--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index dcf9a72eeaa6..1beed50522b1 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -126,6 +126,7 @@ void tcp_time_wait(struct sock *sk, int state, int timeo);
 				  * to combine FIN-WAIT-2 timeout with
 				  * TIME-WAIT timer.
 				  */
+#define TCP_FIN_TIMEOUT_MAX (120 * HZ) /* max TCP_LINGER2 value (two minutes) */
 
 #define TCP_DELACK_MAX	((unsigned)(HZ/5))	/* maximal time to delay before sending an ACK */
 #if HZ >= 100
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 6d87de434377..8c1250103959 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -3035,8 +3035,8 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 	case TCP_LINGER2:
 		if (val < 0)
 			tp->linger2 = -1;
-		else if (val > net->ipv4.sysctl_tcp_fin_timeout / HZ)
-			tp->linger2 = 0;
+		else if (val > TCP_FIN_TIMEOUT_MAX / HZ)
+			tp->linger2 = TCP_FIN_TIMEOUT_MAX;
 		else
 			tp->linger2 = val * HZ;
 		break;
-- 
cgit v1.2.3-59-g8ed1b


From 138c67677ff5ac0bce7131033c39d52a81e87a60 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 1 May 2020 11:56:22 -0700
Subject: bpf: Fix use-after-free of bpf_link when priming half-fails

If bpf_link_prime() succeeds to allocate new anon file, but then fails to
allocate ID for it, link priming is considered to be failed and user is
supposed ot be able to directly kfree() bpf_link, because it was never exposed
to user-space.

But at that point file already keeps a pointer to bpf_link and will eventually
call bpf_link_release(), so if bpf_link was kfree()'d by caller, that would
lead to use-after-free.

Fix this by first allocating ID and only then allocating file. Adding ID to
link_idr is ok, because link at that point still doesn't have its ID set, so
no user-space process can create a new FD for it.

Fixes: a3b80e107894 ("bpf: Allocate ID for bpf_link")
Reported-by: syzbot+39b64425f91b5aab714d@syzkaller.appspotmail.com
Suggested-by: Martin KaFai Lau <kafai@fb.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20200501185622.3088964-1-andriin@fb.com
---
 kernel/bpf/syscall.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 4f34eecec9ce..bb1ab7da6103 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2348,19 +2348,20 @@ int bpf_link_prime(struct bpf_link *link, struct bpf_link_primer *primer)
 	if (fd < 0)
 		return fd;
 
-	file = anon_inode_getfile("bpf_link", &bpf_link_fops, link, O_CLOEXEC);
-	if (IS_ERR(file)) {
-		put_unused_fd(fd);
-		return PTR_ERR(file);
-	}
 
 	id = bpf_link_alloc_id(link);
 	if (id < 0) {
 		put_unused_fd(fd);
-		fput(file);
 		return id;
 	}
 
+	file = anon_inode_getfile("bpf_link", &bpf_link_fops, link, O_CLOEXEC);
+	if (IS_ERR(file)) {
+		bpf_link_free_id(id);
+		put_unused_fd(fd);
+		return PTR_ERR(file);
+	}
+
 	primer->link = link;
 	primer->file = file;
 	primer->fd = fd;
-- 
cgit v1.2.3-59-g8ed1b


From 1bb694e208395816fee278ca46d2796727d3f4a9 Mon Sep 17 00:00:00 2001
From: Christophe Roullier <christophe.roullier@st.com>
Date: Mon, 27 Apr 2020 12:00:38 +0200
Subject: net: ethernet: stmmac: simplify phy modes management for stm32

No new feature, just to simplify stm32 part to be easier to use.
Add by default all Ethernet clocks in DT, and activate or not in function
of phy mode, clock frequency, if property "st,ext-phyclk" is set or not.
Keep backward compatibility
--------------------------------------------------------------------------
|PHY_MODE |    Normal | PHY wo crystal|   PHY wo crystal   |  No 125Mhz  |
|         |	      |      25MHz    |        50MHz       |  from PHY   |
--------------------------------------------------------------------------
|  MII    |	 -    |     eth-ck    |       n/a          |	    n/a  |
|         |	      | st,ext-phyclk |                    |             |
--------------------------------------------------------------------------
|  GMII   |	 -    |     eth-ck    |       n/a          |	    n/a  |
|         |	      | st,ext-phyclk |                    |             |
--------------------------------------------------------------------------
| RGMII   |	 -    |     eth-ck    |       n/a          |     eth-ck  |
|         |	     | st,ext-phyclk |                    |st,eth-clk-sel|
|         |	     |               |                    |       or     |
|         |	     |               |                    | st,ext-phyclk|
----------------==--------------------------------------------------------
| RMII    |	 -   |     eth-ck    |      eth-ck        |       n/a    |
|         | 	     | st,ext-phyclk | st,eth-ref-clk-sel |              |
|         | 	     |               | or st,ext-phyclk   |              |
--------------------------------------------------------------------------

Signed-off-by: Christophe Roullier <christophe.roullier@st.com>
Acked-by: Alexandre TORGUE <alexandre.torgue@st.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c | 74 ++++++++++++++---------
 1 file changed, 44 insertions(+), 30 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
index b2dc99289687..5d4df4c5254e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-stm32.c
@@ -29,6 +29,11 @@
 #define SYSCFG_PMCR_ETH_CLK_SEL		BIT(16)
 #define SYSCFG_PMCR_ETH_REF_CLK_SEL	BIT(17)
 
+/* CLOCK feed to PHY*/
+#define ETH_CK_F_25M	25000000
+#define ETH_CK_F_50M	50000000
+#define ETH_CK_F_125M	125000000
+
 /*  Ethernet PHY interface selection in register SYSCFG Configuration
  *------------------------------------------
  * src	 |BIT(23)| BIT(22)| BIT(21)|BIT(20)|
@@ -58,33 +63,20 @@
  *|         |        |      25MHz    |        50MHz       |                  |
  * ---------------------------------------------------------------------------
  *|  MII    |	 -   |     eth-ck    |	      n/a	  |	  n/a        |
- *|         |        |		     |                    |		     |
+ *|         |        | st,ext-phyclk |                    |		     |
  * ---------------------------------------------------------------------------
  *|  GMII   |	 -   |     eth-ck    |	      n/a	  |	  n/a        |
- *|         |        |               |                    |		     |
+ *|         |        | st,ext-phyclk |                    |		     |
  * ---------------------------------------------------------------------------
- *| RGMII   |	 -   |     eth-ck    |	      n/a	  |  eth-ck (no pin) |
- *|         |        |               |                    |  st,eth-clk-sel  |
+ *| RGMII   |	 -   |     eth-ck    |	      n/a	  |      eth-ck      |
+ *|         |        | st,ext-phyclk |                    | st,eth-clk-sel or|
+ *|         |        |               |                    | st,ext-phyclk    |
  * ---------------------------------------------------------------------------
  *| RMII    |	 -   |     eth-ck    |	    eth-ck        |	  n/a        |
- *|         |        |		     | st,eth-ref-clk-sel |		     |
+ *|         |        | st,ext-phyclk | st,eth-ref-clk-sel |		     |
+ *|         |        |               | or st,ext-phyclk   |		     |
  * ---------------------------------------------------------------------------
  *
- * BIT(17) : set this bit in RMII mode when you have PHY without crystal 50MHz
- * BIT(16) : set this bit in GMII/RGMII PHY when you do not want use 125Mhz
- * from PHY
- *-----------------------------------------------------
- * src	 |         BIT(17)       |       BIT(16)      |
- *-----------------------------------------------------
- * MII   |           n/a	 |         n/a        |
- *-----------------------------------------------------
- * GMII  |           n/a         |   st,eth-clk-sel   |
- *-----------------------------------------------------
- * RGMII |           n/a         |   st,eth-clk-sel   |
- *-----------------------------------------------------
- * RMII  |   st,eth-ref-clk-sel	 |         n/a        |
- *-----------------------------------------------------
- *
  */
 
 struct stm32_dwmac {
@@ -93,6 +85,8 @@ struct stm32_dwmac {
 	struct clk *clk_eth_ck;
 	struct clk *clk_ethstp;
 	struct clk *syscfg_clk;
+	int ext_phyclk;
+	int enable_eth_ck;
 	int eth_clk_sel_reg;
 	int eth_ref_clk_sel_reg;
 	int irq_pwr_wakeup;
@@ -155,14 +149,17 @@ static int stm32mp1_clk_prepare(struct stm32_dwmac *dwmac, bool prepare)
 		ret = clk_prepare_enable(dwmac->syscfg_clk);
 		if (ret)
 			return ret;
-		ret = clk_prepare_enable(dwmac->clk_eth_ck);
-		if (ret) {
-			clk_disable_unprepare(dwmac->syscfg_clk);
-			return ret;
+		if (dwmac->enable_eth_ck) {
+			ret = clk_prepare_enable(dwmac->clk_eth_ck);
+			if (ret) {
+				clk_disable_unprepare(dwmac->syscfg_clk);
+				return ret;
+			}
 		}
 	} else {
 		clk_disable_unprepare(dwmac->syscfg_clk);
-		clk_disable_unprepare(dwmac->clk_eth_ck);
+		if (dwmac->enable_eth_ck)
+			clk_disable_unprepare(dwmac->clk_eth_ck);
 	}
 	return ret;
 }
@@ -170,24 +167,34 @@ static int stm32mp1_clk_prepare(struct stm32_dwmac *dwmac, bool prepare)
 static int stm32mp1_set_mode(struct plat_stmmacenet_data *plat_dat)
 {
 	struct stm32_dwmac *dwmac = plat_dat->bsp_priv;
-	u32 reg = dwmac->mode_reg;
+	u32 reg = dwmac->mode_reg, clk_rate;
 	int val;
 
+	clk_rate = clk_get_rate(dwmac->clk_eth_ck);
+	dwmac->enable_eth_ck = false;
 	switch (plat_dat->interface) {
 	case PHY_INTERFACE_MODE_MII:
+		if (clk_rate == ETH_CK_F_25M && dwmac->ext_phyclk)
+			dwmac->enable_eth_ck = true;
 		val = SYSCFG_PMCR_ETH_SEL_MII;
 		pr_debug("SYSCFG init : PHY_INTERFACE_MODE_MII\n");
 		break;
 	case PHY_INTERFACE_MODE_GMII:
 		val = SYSCFG_PMCR_ETH_SEL_GMII;
-		if (dwmac->eth_clk_sel_reg)
+		if (clk_rate == ETH_CK_F_25M &&
+		    (dwmac->eth_clk_sel_reg || dwmac->ext_phyclk)) {
+			dwmac->enable_eth_ck = true;
 			val |= SYSCFG_PMCR_ETH_CLK_SEL;
+		}
 		pr_debug("SYSCFG init : PHY_INTERFACE_MODE_GMII\n");
 		break;
 	case PHY_INTERFACE_MODE_RMII:
 		val = SYSCFG_PMCR_ETH_SEL_RMII;
-		if (dwmac->eth_ref_clk_sel_reg)
+		if ((clk_rate == ETH_CK_F_25M || clk_rate == ETH_CK_F_50M) &&
+		    (dwmac->eth_ref_clk_sel_reg || dwmac->ext_phyclk)) {
+			dwmac->enable_eth_ck = true;
 			val |= SYSCFG_PMCR_ETH_REF_CLK_SEL;
+		}
 		pr_debug("SYSCFG init : PHY_INTERFACE_MODE_RMII\n");
 		break;
 	case PHY_INTERFACE_MODE_RGMII:
@@ -195,8 +202,11 @@ static int stm32mp1_set_mode(struct plat_stmmacenet_data *plat_dat)
 	case PHY_INTERFACE_MODE_RGMII_RXID:
 	case PHY_INTERFACE_MODE_RGMII_TXID:
 		val = SYSCFG_PMCR_ETH_SEL_RGMII;
-		if (dwmac->eth_clk_sel_reg)
+		if ((clk_rate == ETH_CK_F_25M || clk_rate == ETH_CK_F_125M) &&
+		    (dwmac->eth_clk_sel_reg || dwmac->ext_phyclk)) {
+			dwmac->enable_eth_ck = true;
 			val |= SYSCFG_PMCR_ETH_CLK_SEL;
+		}
 		pr_debug("SYSCFG init : PHY_INTERFACE_MODE_RGMII\n");
 		break;
 	default:
@@ -294,6 +304,9 @@ static int stm32mp1_parse_data(struct stm32_dwmac *dwmac,
 	struct device_node *np = dev->of_node;
 	int err = 0;
 
+	/* Ethernet PHY have no crystal */
+	dwmac->ext_phyclk = of_property_read_bool(np, "st,ext-phyclk");
+
 	/* Gigabit Ethernet 125MHz clock selection. */
 	dwmac->eth_clk_sel_reg = of_property_read_bool(np, "st,eth-clk-sel");
 
@@ -431,7 +444,8 @@ static int stm32mp1_suspend(struct stm32_dwmac *dwmac)
 
 	clk_disable_unprepare(dwmac->clk_tx);
 	clk_disable_unprepare(dwmac->syscfg_clk);
-	clk_disable_unprepare(dwmac->clk_eth_ck);
+	if (dwmac->enable_eth_ck)
+		clk_disable_unprepare(dwmac->clk_eth_ck);
 
 	return ret;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 41a46913bee76d8493681442907ccd989ced2633 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Mon, 27 Apr 2020 18:37:43 +0200
Subject: net: fix skb_panic to output real address

In skb_panic() the real pointer values are really needed to diagnose
issues, e.g. data and head are related (to calculate headroom). The
hashed versions of the addresses doesn't make much sense here. The
patch use the printk specifier %px to print the actual address.

The printk documentation on %px:
https://www.kernel.org/doc/html/latest/core-api/printk-formats.html#unmodified-addresses

Fixes: ad67b74d2469 ("printk: hash addresses printed with %p")
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 7e29590482ce..1bf0c3d278e7 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -102,7 +102,7 @@ EXPORT_SYMBOL(sysctl_max_skb_frags);
 static void skb_panic(struct sk_buff *skb, unsigned int sz, void *addr,
 		      const char msg[])
 {
-	pr_emerg("%s: text:%p len:%d put:%d head:%p data:%p tail:%#lx end:%#lx dev:%s\n",
+	pr_emerg("%s: text:%px len:%d put:%d head:%px data:%px tail:%#lx end:%#lx dev:%s\n",
 		 msg, addr, skb->len, sz, skb->head, skb->data,
 		 (unsigned long)skb->tail, (unsigned long)skb->end,
 		 skb->dev ? skb->dev->name : "<NULL>");
-- 
cgit v1.2.3-59-g8ed1b


From e00edb4efbbc07425441a3be2aa87abaf5800d96 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Wed, 29 Apr 2020 02:52:20 +0000
Subject: drivers: net: davinci_mdio: fix potential NULL dereference in
 davinci_mdio_probe()

platform_get_resource() may fail and return NULL, so we should
better check it's return value to avoid a NULL pointer dereference
since devm_ioremap() does not check input parameters for null.

This is detected by Coccinelle semantic patch.

@@
expression pdev, res, n, t, e, e1, e2;
@@

res = \(platform_get_resource\|platform_get_resource_byname\)(pdev, t, n);
+ if (!res)
+   return -EINVAL;
... when != res == NULL
e = devm_ioremap(e1, res->start, e2);

Fixes: 03f66f067560 ("net: ethernet: ti: davinci_mdio: use devm_ioremap()")
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Reviewed-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/davinci_mdio.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/ti/davinci_mdio.c b/drivers/net/ethernet/ti/davinci_mdio.c
index 38b7f6d35759..702fdc393da0 100644
--- a/drivers/net/ethernet/ti/davinci_mdio.c
+++ b/drivers/net/ethernet/ti/davinci_mdio.c
@@ -397,6 +397,8 @@ static int davinci_mdio_probe(struct platform_device *pdev)
 	data->dev = dev;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
+	if (!res)
+		return -EINVAL;
 	data->regs = devm_ioremap(dev, res->start, resource_size(res));
 	if (!data->regs)
 		return -ENOMEM;
-- 
cgit v1.2.3-59-g8ed1b


From fe677b057e33a0345570064c0d82ef6956b005d6 Mon Sep 17 00:00:00 2001
From: Igor Russkikh <irusskikh@marvell.com>
Date: Thu, 30 Apr 2020 11:04:29 +0300
Subject: net: atlantic: update company name in the driver description

Aquantia is now part of Marvell. Thus, update the driver description.

Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: Mark Starovoytov <mstarovoitov@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/aquantia/atlantic/aq_cfg.h    | 4 ++--
 drivers/net/ethernet/aquantia/atlantic/aq_common.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h
index 7560f5506e55..52b9833fda99 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_cfg.h
@@ -80,8 +80,8 @@
 
 #define AQ_CFG_LOCK_TRYS   100U
 
-#define AQ_CFG_DRV_AUTHOR      "aQuantia"
-#define AQ_CFG_DRV_DESC        "aQuantia Corporation(R) Network Driver"
+#define AQ_CFG_DRV_AUTHOR      "Marvell"
+#define AQ_CFG_DRV_DESC        "Marvell (Aquantia) Corporation(R) Network Driver"
 #define AQ_CFG_DRV_NAME        "atlantic"
 
 #endif /* AQ_CFG_H */
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_common.h b/drivers/net/ethernet/aquantia/atlantic/aq_common.h
index c8c402b013bb..d5beb798bab6 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_common.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_common.h
@@ -37,7 +37,7 @@
 #define AQ_DEVICE_ID_AQC111S	0x91B1
 #define AQ_DEVICE_ID_AQC112S	0x92B1
 
-#define HW_ATL_NIC_NAME "aQuantia AQtion 10Gbit Network Adapter"
+#define HW_ATL_NIC_NAME "Marvell (aQuantia) AQtion 10Gbit Network Adapter"
 
 #define AQ_HWREV_ANY	0
 #define AQ_HWREV_1	1
-- 
cgit v1.2.3-59-g8ed1b


From 98c4353c5add3cb2bbb3c2da1cbd7fd2d09f396b Mon Sep 17 00:00:00 2001
From: Igor Russkikh <irusskikh@marvell.com>
Date: Thu, 30 Apr 2020 11:04:30 +0300
Subject: net: atlantic: add A2 device IDs

Adding device ids for the new generation of atlantic nic.

Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: Mark Starovoytov <mstarovoitov@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/aquantia/atlantic/aq_common.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_common.h b/drivers/net/ethernet/aquantia/atlantic/aq_common.h
index d5beb798bab6..1261e7c7a01e 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_common.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_common.h
@@ -37,6 +37,13 @@
 #define AQ_DEVICE_ID_AQC111S	0x91B1
 #define AQ_DEVICE_ID_AQC112S	0x92B1
 
+#define AQ_DEVICE_ID_AQC113DEV	0x00C0
+#define AQ_DEVICE_ID_AQC113CS	0x94C0
+#define AQ_DEVICE_ID_AQC114CS	0x93C0
+#define AQ_DEVICE_ID_AQC113	0x04C0
+#define AQ_DEVICE_ID_AQC113C	0x14C0
+#define AQ_DEVICE_ID_AQC115C	0x12C0
+
 #define HW_ATL_NIC_NAME "Marvell (aQuantia) AQtion 10Gbit Network Adapter"
 
 #define AQ_HWREV_ANY	0
-- 
cgit v1.2.3-59-g8ed1b


From 3d464aadef75415c55a5a4feb611a9bbf034d7d3 Mon Sep 17 00:00:00 2001
From: Igor Russkikh <irusskikh@marvell.com>
Date: Thu, 30 Apr 2020 11:04:31 +0300
Subject: net: atlantic: add defines for 10M and EEE 100M link mode

This patch adds defines for 10M and EEE 100M link modes, which are
supported by A2.

10M support is added in this patch series.
EEE is out of scope, but will be added in a follow-up series.

Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: Mark Starovoytov <mstarovoitov@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/aquantia/atlantic/aq_common.h | 22 ++++++++++++----------
 .../net/ethernet/aquantia/atlantic/aq_ethtool.c    |  3 +++
 drivers/net/ethernet/aquantia/atlantic/aq_nic.c    | 12 ++++++++++++
 3 files changed, 27 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_common.h b/drivers/net/ethernet/aquantia/atlantic/aq_common.h
index 1261e7c7a01e..53620ba6d7a6 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_common.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_common.h
@@ -50,16 +50,18 @@
 #define AQ_HWREV_1	1
 #define AQ_HWREV_2	2
 
-#define AQ_NIC_RATE_10G        BIT(0)
-#define AQ_NIC_RATE_5G         BIT(1)
-#define AQ_NIC_RATE_5GSR       BIT(2)
-#define AQ_NIC_RATE_2GS        BIT(3)
-#define AQ_NIC_RATE_1G         BIT(4)
-#define AQ_NIC_RATE_100M       BIT(5)
+#define AQ_NIC_RATE_10G		BIT(0)
+#define AQ_NIC_RATE_5G		BIT(1)
+#define AQ_NIC_RATE_5GSR	BIT(2)
+#define AQ_NIC_RATE_2GS		BIT(3)
+#define AQ_NIC_RATE_1G		BIT(4)
+#define AQ_NIC_RATE_100M	BIT(5)
+#define AQ_NIC_RATE_10M		BIT(6)
 
-#define AQ_NIC_RATE_EEE_10G	BIT(6)
-#define AQ_NIC_RATE_EEE_5G	BIT(7)
-#define AQ_NIC_RATE_EEE_2GS	BIT(8)
-#define AQ_NIC_RATE_EEE_1G	BIT(9)
+#define AQ_NIC_RATE_EEE_10G	BIT(7)
+#define AQ_NIC_RATE_EEE_5G	BIT(8)
+#define AQ_NIC_RATE_EEE_2GS	BIT(9)
+#define AQ_NIC_RATE_EEE_1G	BIT(10)
+#define AQ_NIC_RATE_EEE_100M	BIT(11)
 
 #endif /* AQ_COMMON_H */
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
index 7241cf92b43a..0c9dd8edc062 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
@@ -611,6 +611,9 @@ static enum hw_atl_fw2x_rate eee_mask_to_ethtool_mask(u32 speed)
 	if (speed & AQ_NIC_RATE_EEE_1G)
 		rate |= SUPPORTED_1000baseT_Full;
 
+	if (speed & AQ_NIC_RATE_EEE_100M)
+		rate |= SUPPORTED_100baseT_Full;
+
 	return rate;
 }
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index a369705a786a..80dd744dcbd1 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -885,6 +885,10 @@ void aq_nic_get_link_ksettings(struct aq_nic_s *self,
 		ethtool_link_ksettings_add_link_mode(cmd, supported,
 						     100baseT_Full);
 
+	if (self->aq_nic_cfg.aq_hw_caps->link_speed_msk & AQ_NIC_RATE_10M)
+		ethtool_link_ksettings_add_link_mode(cmd, supported,
+						     10baseT_Full);
+
 	if (self->aq_nic_cfg.aq_hw_caps->flow_control) {
 		ethtool_link_ksettings_add_link_mode(cmd, supported,
 						     Pause);
@@ -924,6 +928,10 @@ void aq_nic_get_link_ksettings(struct aq_nic_s *self,
 		ethtool_link_ksettings_add_link_mode(cmd, advertising,
 						     100baseT_Full);
 
+	if (self->aq_nic_cfg.link_speed_msk  & AQ_NIC_RATE_10M)
+		ethtool_link_ksettings_add_link_mode(cmd, advertising,
+						     10baseT_Full);
+
 	if (self->aq_nic_cfg.fc.cur & AQ_NIC_FC_RX)
 		ethtool_link_ksettings_add_link_mode(cmd, advertising,
 						     Pause);
@@ -954,6 +962,10 @@ int aq_nic_set_link_ksettings(struct aq_nic_s *self,
 		speed = cmd->base.speed;
 
 		switch (speed) {
+		case SPEED_10:
+			rate = AQ_NIC_RATE_10M;
+			break;
+
 		case SPEED_100:
 			rate = AQ_NIC_RATE_100M;
 			break;
-- 
cgit v1.2.3-59-g8ed1b


From 099d074e3f0cd3f48e57e3349b9f8a25b0e3feb8 Mon Sep 17 00:00:00 2001
From: Mark Starovoytov <mstarovoitov@marvell.com>
Date: Thu, 30 Apr 2020 11:04:32 +0300
Subject: net: atlantic: add hw_soft_reset, hw_prepare to hw_ops

A2 will have a different implementation of these 2 APIs, so
this patch moves them to hw_ops in preparation for A2.

Signed-off-by: Mark Starovoytov <mstarovoitov@marvell.com>
Co-developed-by: Dmitry Bezrukov <dbezrukov@marvell.com>
Signed-off-by: Dmitry Bezrukov <dbezrukov@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/aquantia/atlantic/aq_hw.h           |  5 +++++
 drivers/net/ethernet/aquantia/atlantic/aq_nic.c          | 16 +++++++++++++++-
 .../net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c    |  2 ++
 .../net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c    |  2 ++
 .../net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c |  4 ----
 5 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
index 7d71bc7dc500..84abce29d590 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
@@ -182,6 +182,11 @@ struct aq_hw_ops {
 
 	int (*hw_set_mac_address)(struct aq_hw_s *self, u8 *mac_addr);
 
+	int (*hw_soft_reset)(struct aq_hw_s *self);
+
+	int (*hw_prepare)(struct aq_hw_s *self,
+			  const struct aq_fw_ops **fw_ops);
+
 	int (*hw_reset)(struct aq_hw_s *self);
 
 	int (*hw_init)(struct aq_hw_s *self, u8 *mac_addr);
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index 80dd744dcbd1..7f4d8abab951 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -257,6 +257,20 @@ static void aq_nic_polling_timer_cb(struct timer_list *t)
 		  AQ_CFG_POLLING_TIMER_INTERVAL);
 }
 
+static int aq_nic_hw_prepare(struct aq_nic_s *self)
+{
+	int err = 0;
+
+	err = self->aq_hw_ops->hw_soft_reset(self->aq_hw);
+	if (err)
+		goto exit;
+
+	err = self->aq_hw_ops->hw_prepare(self->aq_hw, &self->aq_fw_ops);
+
+exit:
+	return err;
+}
+
 int aq_nic_ndev_register(struct aq_nic_s *self)
 {
 	int err = 0;
@@ -266,7 +280,7 @@ int aq_nic_ndev_register(struct aq_nic_s *self)
 		goto err_exit;
 	}
 
-	err = hw_atl_utils_initfw(self->aq_hw, &self->aq_fw_ops);
+	err = aq_nic_hw_prepare(self);
 	if (err)
 		goto err_exit;
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
index 9b1062b8af64..2dba8c277ecb 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
@@ -886,6 +886,8 @@ static int hw_atl_a0_hw_ring_rx_stop(struct aq_hw_s *self,
 }
 
 const struct aq_hw_ops hw_atl_ops_a0 = {
+	.hw_soft_reset        = hw_atl_utils_soft_reset,
+	.hw_prepare           = hw_atl_utils_initfw,
 	.hw_set_mac_address   = hw_atl_a0_hw_mac_addr_set,
 	.hw_init              = hw_atl_a0_hw_init,
 	.hw_reset             = hw_atl_a0_hw_reset,
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
index d20d91cdece8..4e2e4eef028d 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
@@ -1478,6 +1478,8 @@ static int hw_atl_b0_set_loopback(struct aq_hw_s *self, u32 mode, bool enable)
 }
 
 const struct aq_hw_ops hw_atl_ops_b0 = {
+	.hw_soft_reset        = hw_atl_utils_soft_reset,
+	.hw_prepare           = hw_atl_utils_initfw,
 	.hw_set_mac_address   = hw_atl_b0_hw_mac_addr_set,
 	.hw_init              = hw_atl_b0_hw_init,
 	.hw_reset             = hw_atl_b0_hw_reset,
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
index 354705f9bc49..7259bcb81e9b 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
@@ -67,10 +67,6 @@ int hw_atl_utils_initfw(struct aq_hw_s *self, const struct aq_fw_ops **fw_ops)
 {
 	int err = 0;
 
-	err = hw_atl_utils_soft_reset(self);
-	if (err)
-		return err;
-
 	hw_atl_utils_hw_chip_features_init(self,
 					   &self->chip_features);
 
-- 
cgit v1.2.3-59-g8ed1b


From 36e90a5297ea02c67c0f17d8b39eb9ceb93dd6f0 Mon Sep 17 00:00:00 2001
From: Nikita Danilov <ndanilov@marvell.com>
Date: Thu, 30 Apr 2020 11:04:33 +0300
Subject: net: atlantic: simplify hw_get_fw_version() usage

hw_get_fw_version() never fails, so this patch simplifies its
usage by utilizing return value instead of output argument.

Signed-off-by: Nikita Danilov <ndanilov@marvell.com>
Signed-off-by: Mark Starovoytov <mstarovoitov@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/aquantia/atlantic/aq_hw.h               | 2 +-
 drivers/net/ethernet/aquantia/atlantic/aq_nic.c              | 6 +-----
 drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c | 8 +++-----
 drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h | 2 +-
 4 files changed, 6 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
index 84abce29d590..c0dada1075cf 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
@@ -259,7 +259,7 @@ struct aq_hw_ops {
 
 	struct aq_stats_s *(*hw_get_hw_stats)(struct aq_hw_s *self);
 
-	int (*hw_get_fw_version)(struct aq_hw_s *self, u32 *fw_version);
+	u32 (*hw_get_fw_version)(struct aq_hw_s *self);
 
 	int (*hw_set_offload)(struct aq_hw_s *self,
 			      struct aq_nic_cfg_s *aq_nic_cfg);
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index 7f4d8abab951..57102f35e9f3 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -1032,11 +1032,7 @@ struct aq_nic_cfg_s *aq_nic_get_cfg(struct aq_nic_s *self)
 
 u32 aq_nic_get_fw_version(struct aq_nic_s *self)
 {
-	u32 fw_version = 0U;
-
-	self->aq_hw_ops->hw_get_fw_version(self->aq_hw, &fw_version);
-
-	return fw_version;
+	return self->aq_hw_ops->hw_get_fw_version(self->aq_hw);
 }
 
 int aq_nic_set_loopback(struct aq_nic_s *self)
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
index 7259bcb81e9b..bd1712ca9ef2 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
@@ -70,7 +70,7 @@ int hw_atl_utils_initfw(struct aq_hw_s *self, const struct aq_fw_ops **fw_ops)
 	hw_atl_utils_hw_chip_features_init(self,
 					   &self->chip_features);
 
-	hw_atl_utils_get_fw_version(self, &self->fw_ver_actual);
+	self->fw_ver_actual = hw_atl_utils_get_fw_version(self);
 
 	if (hw_atl_utils_ver_match(HW_ATL_FW_VER_1X,
 				   self->fw_ver_actual) == 0) {
@@ -915,11 +915,9 @@ int hw_atl_utils_hw_get_regs(struct aq_hw_s *self,
 	return 0;
 }
 
-int hw_atl_utils_get_fw_version(struct aq_hw_s *self, u32 *fw_version)
+u32 hw_atl_utils_get_fw_version(struct aq_hw_s *self)
 {
-	*fw_version = aq_hw_read_reg(self, 0x18U);
-
-	return 0;
+	return aq_hw_read_reg(self, HW_ATL_MPI_FW_VERSION);
 }
 
 static int aq_fw1x_set_wake_magic(struct aq_hw_s *self, bool wol_enabled,
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
index b15513914636..086627a96746 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
@@ -622,7 +622,7 @@ int hw_atl_utils_hw_set_power(struct aq_hw_s *self,
 
 int hw_atl_utils_hw_deinit(struct aq_hw_s *self);
 
-int hw_atl_utils_get_fw_version(struct aq_hw_s *self, u32 *fw_version);
+u32 hw_atl_utils_get_fw_version(struct aq_hw_s *self);
 
 int hw_atl_utils_update_stats(struct aq_hw_s *self);
 
-- 
cgit v1.2.3-59-g8ed1b


From d0f23741c202c685447050713907f3be39a985ee Mon Sep 17 00:00:00 2001
From: Mark Starovoytov <mstarovoitov@marvell.com>
Date: Thu, 30 Apr 2020 11:04:34 +0300
Subject: net: atlantic: make hw_get_regs optional

This patch fixes potential crash in case if hw_get_regs is NULL.

Signed-off-by: Mark Starovoytov <mstarovoitov@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/aquantia/atlantic/aq_nic.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index 57102f35e9f3..2dbea5cd7684 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -778,6 +778,9 @@ int aq_nic_get_regs(struct aq_nic_s *self, struct ethtool_regs *regs, void *p)
 	u32 *regs_buff = p;
 	int err = 0;
 
+	if (unlikely(!self->aq_hw_ops->hw_get_regs))
+		return -EOPNOTSUPP;
+
 	regs->version = 1;
 
 	err = self->aq_hw_ops->hw_get_regs(self->aq_hw,
@@ -792,6 +795,9 @@ err_exit:
 
 int aq_nic_get_regs_count(struct aq_nic_s *self)
 {
+	if (unlikely(!self->aq_hw_ops->hw_get_regs))
+		return 0;
+
 	return self->aq_nic_cfg.aq_hw_caps->mac_regs_count;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From d1ad88fe9fa9f5c3e4ecf509efb579852b44cc79 Mon Sep 17 00:00:00 2001
From: Mark Starovoytov <mstarovoitov@marvell.com>
Date: Thu, 30 Apr 2020 11:04:35 +0300
Subject: net: atlantic: move IS_CHIP_FEATURE to aq_hw.h

IS_CHIP feature will be used to differentiate between A1 and A2,
where necessary. Thus, move it to aq_hw.h, rename it and make
it accept the 'hw' pointer.

Signed-off-by: Mark Starovoytov <mstarovoitov@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/aquantia/atlantic/aq_hw.h     | 13 ++++++++
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c  |  2 +-
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c  |  6 ++--
 .../aquantia/atlantic/hw_atl/hw_atl_utils.c        | 36 ++++++++++++----------
 .../aquantia/atlantic/hw_atl/hw_atl_utils.h        | 11 -------
 5 files changed, 36 insertions(+), 32 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
index c0dada1075cf..f420ef40b627 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
@@ -136,6 +136,19 @@ enum aq_priv_flags {
 				 BIT(AQ_HW_LOOPBACK_PHYINT_SYS) |\
 				 BIT(AQ_HW_LOOPBACK_PHYEXT_SYS))
 
+#define ATL_HW_CHIP_MIPS         0x00000001U
+#define ATL_HW_CHIP_TPO2         0x00000002U
+#define ATL_HW_CHIP_RPF2         0x00000004U
+#define ATL_HW_CHIP_MPI_AQ       0x00000010U
+#define ATL_HW_CHIP_ATLANTIC     0x00800000U
+#define ATL_HW_CHIP_REVISION_A0  0x01000000U
+#define ATL_HW_CHIP_REVISION_B0  0x02000000U
+#define ATL_HW_CHIP_REVISION_B1  0x04000000U
+#define ATL_HW_CHIP_ANTIGUA      0x08000000U
+
+#define ATL_HW_IS_CHIP_FEATURE(_HW_, _F_) (!!(ATL_HW_CHIP_##_F_ & \
+	(_HW_)->chip_features))
+
 struct aq_hw_s {
 	atomic_t flags;
 	u8 rbl_enabled:1;
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
index 2dba8c277ecb..eee265b4415a 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
@@ -267,7 +267,7 @@ static int hw_atl_a0_hw_init_tx_path(struct aq_hw_s *self)
 	hw_atl_tdm_tx_desc_wr_wb_irq_en_set(self, 1U);
 
 	/* misc */
-	aq_hw_write_reg(self, 0x00007040U, IS_CHIP_FEATURE(TPO2) ?
+	aq_hw_write_reg(self, 0x00007040U, ATL_HW_IS_CHIP_FEATURE(self, TPO2) ?
 			0x00010000U : 0x00000000U);
 	hw_atl_tdm_tx_dca_en_set(self, 0U);
 	hw_atl_tdm_tx_dca_mode_set(self, 0U);
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
index 4e2e4eef028d..3b42045b9c7d 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
@@ -324,7 +324,7 @@ static int hw_atl_b0_hw_init_tx_path(struct aq_hw_s *self)
 	hw_atl_tdm_tx_desc_wr_wb_irq_en_set(self, 1U);
 
 	/* misc */
-	aq_hw_write_reg(self, 0x00007040U, IS_CHIP_FEATURE(TPO2) ?
+	aq_hw_write_reg(self, 0x00007040U, ATL_HW_IS_CHIP_FEATURE(self, TPO2) ?
 			0x00010000U : 0x00000000U);
 	hw_atl_tdm_tx_dca_en_set(self, 0U);
 	hw_atl_tdm_tx_dca_mode_set(self, 0U);
@@ -372,8 +372,8 @@ static int hw_atl_b0_hw_init_rx_path(struct aq_hw_s *self)
 	hw_atl_rdm_rx_desc_wr_wb_irq_en_set(self, 1U);
 
 	/* misc */
-	aq_hw_write_reg(self, 0x00005040U,
-			IS_CHIP_FEATURE(RPF2) ? 0x000F0000U : 0x00000000U);
+	aq_hw_write_reg(self, 0x00005040U, ATL_HW_IS_CHIP_FEATURE(self, RPF2) ?
+			0x000F0000U : 0x00000000U);
 
 	hw_atl_rpfl2broadcast_flr_act_set(self, 1U);
 	hw_atl_rpfl2broadcast_count_threshold_set(self, 0xFFFFU & (~0U / 256U));
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
index bd1712ca9ef2..20655a2170cc 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
@@ -309,7 +309,7 @@ int hw_atl_utils_fw_downld_dwords(struct aq_hw_s *self, u32 a,
 	for (++cnt; --cnt && !err;) {
 		aq_hw_write_reg(self, HW_ATL_MIF_CMD, 0x00008000U);
 
-		if (IS_CHIP_FEATURE(REVISION_B1))
+		if (ATL_HW_IS_CHIP_FEATURE(self, REVISION_B1))
 			err = readx_poll_timeout_atomic(hw_atl_utils_mif_addr_get,
 							self, val, val != a,
 							1U, 1000U);
@@ -405,7 +405,7 @@ static int hw_atl_utils_fw_upload_dwords(struct aq_hw_s *self, u32 addr, u32 *p,
 	if (err < 0)
 		goto err_exit;
 
-	if (IS_CHIP_FEATURE(REVISION_B1))
+	if (ATL_HW_IS_CHIP_FEATURE(self, REVISION_B1))
 		err = hw_atl_utils_write_b1_mbox(self, addr, p, cnt, area);
 	else
 		err = hw_atl_utils_write_b0_mbox(self, addr, p, cnt);
@@ -497,7 +497,7 @@ int hw_atl_utils_fw_rpc_call(struct aq_hw_s *self, unsigned int rpc_size)
 	struct aq_hw_atl_utils_fw_rpc_tid_s sw;
 	int err = 0;
 
-	if (!IS_CHIP_FEATURE(MIPS)) {
+	if (!ATL_HW_IS_CHIP_FEATURE(self, MIPS)) {
 		err = -1;
 		goto err_exit;
 	}
@@ -603,7 +603,7 @@ void hw_atl_utils_mpi_read_stats(struct aq_hw_s *self,
 	if (err < 0)
 		goto err_exit;
 
-	if (IS_CHIP_FEATURE(REVISION_A0)) {
+	if (ATL_HW_IS_CHIP_FEATURE(self, REVISION_A0)) {
 		unsigned int mtu = self->aq_nic_cfg ?
 					self->aq_nic_cfg->mtu : 1514U;
 		pmbox->stats.ubrc = pmbox->stats.uprc * mtu;
@@ -802,22 +802,24 @@ void hw_atl_utils_hw_chip_features_init(struct aq_hw_s *self, u32 *p)
 	u32 mif_rev = val & 0xFFU;
 	u32 chip_features = 0U;
 
+	chip_features |= ATL_HW_CHIP_ATLANTIC;
+
 	if ((0xFU & mif_rev) == 1U) {
-		chip_features |= HAL_ATLANTIC_UTILS_CHIP_REVISION_A0 |
-			HAL_ATLANTIC_UTILS_CHIP_MPI_AQ |
-			HAL_ATLANTIC_UTILS_CHIP_MIPS;
+		chip_features |= ATL_HW_CHIP_REVISION_A0 |
+			ATL_HW_CHIP_MPI_AQ |
+			ATL_HW_CHIP_MIPS;
 	} else if ((0xFU & mif_rev) == 2U) {
-		chip_features |= HAL_ATLANTIC_UTILS_CHIP_REVISION_B0 |
-			HAL_ATLANTIC_UTILS_CHIP_MPI_AQ |
-			HAL_ATLANTIC_UTILS_CHIP_MIPS |
-			HAL_ATLANTIC_UTILS_CHIP_TPO2 |
-			HAL_ATLANTIC_UTILS_CHIP_RPF2;
+		chip_features |= ATL_HW_CHIP_REVISION_B0 |
+			ATL_HW_CHIP_MPI_AQ |
+			ATL_HW_CHIP_MIPS |
+			ATL_HW_CHIP_TPO2 |
+			ATL_HW_CHIP_RPF2;
 	} else if ((0xFU & mif_rev) == 0xAU) {
-		chip_features |= HAL_ATLANTIC_UTILS_CHIP_REVISION_B1 |
-			HAL_ATLANTIC_UTILS_CHIP_MPI_AQ |
-			HAL_ATLANTIC_UTILS_CHIP_MIPS |
-			HAL_ATLANTIC_UTILS_CHIP_TPO2 |
-			HAL_ATLANTIC_UTILS_CHIP_RPF2;
+		chip_features |= ATL_HW_CHIP_REVISION_B1 |
+			ATL_HW_CHIP_MPI_AQ |
+			ATL_HW_CHIP_MIPS |
+			ATL_HW_CHIP_TPO2 |
+			ATL_HW_CHIP_RPF2;
 	}
 
 	*p = chip_features;
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
index 086627a96746..5513254642b3 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
@@ -406,17 +406,6 @@ enum hw_atl_rx_ctrl_registers_l3l4 {
 #define HW_ATL_GET_REG_LOCATION_FL3L4(location) \
 	((location) - AQ_RX_FIRST_LOC_FL3L4)
 
-#define HAL_ATLANTIC_UTILS_CHIP_MIPS         0x00000001U
-#define HAL_ATLANTIC_UTILS_CHIP_TPO2         0x00000002U
-#define HAL_ATLANTIC_UTILS_CHIP_RPF2         0x00000004U
-#define HAL_ATLANTIC_UTILS_CHIP_MPI_AQ       0x00000010U
-#define HAL_ATLANTIC_UTILS_CHIP_REVISION_A0  0x01000000U
-#define HAL_ATLANTIC_UTILS_CHIP_REVISION_B0  0x02000000U
-#define HAL_ATLANTIC_UTILS_CHIP_REVISION_B1  0x04000000U
-
-#define IS_CHIP_FEATURE(_F_) (HAL_ATLANTIC_UTILS_CHIP_##_F_ & \
-	self->chip_features)
-
 enum hal_atl_utils_fw_state_e {
 	MPI_DEINIT = 0,
 	MPI_RESET = 1,
-- 
cgit v1.2.3-59-g8ed1b


From f67619611b4ce0e3b2bbc2cd75b9c2ce2035ef1e Mon Sep 17 00:00:00 2001
From: Dmitry Bogdanov <dbogdanov@marvell.com>
Date: Thu, 30 Apr 2020 11:04:36 +0300
Subject: net: atlantic: A2 driver-firmware interface

This patch adds the driver<->firmware interface for A2

Signed-off-by: Dmitry Bogdanov <dbogdanov@marvell.com>
Signed-off-by: Mark Starovoytov <mstarovoitov@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../aquantia/atlantic/hw_atl2/hw_atl2_utils.h      | 593 +++++++++++++++++++++
 1 file changed, 593 insertions(+)
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils.h

diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils.h
new file mode 100644
index 000000000000..7d4ac65440c9
--- /dev/null
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils.h
@@ -0,0 +1,593 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Atlantic Network Driver
+ * Copyright (C) 2020 Marvell International Ltd.
+ */
+
+#ifndef HW_ATL2_UTILS_H
+#define HW_ATL2_UTILS_H
+
+/* F W    A P I */
+
+struct link_options_s {
+	u8 link_up:1;
+	u8 link_renegotiate:1;
+	u8 minimal_link_speed:1;
+	u8 internal_loopback:1;
+	u8 external_loopback:1;
+	u8 rate_10M_hd:1;
+	u8 rate_100M_hd:1;
+	u8 rate_1G_hd:1;
+
+	u8 rate_10M:1;
+	u8 rate_100M:1;
+	u8 rate_1G:1;
+	u8 rate_2P5G:1;
+	u8 rate_N2P5G:1;
+	u8 rate_5G:1;
+	u8 rate_N5G:1;
+	u8 rate_10G:1;
+
+	u8 eee_100M:1;
+	u8 eee_1G:1;
+	u8 eee_2P5G:1;
+	u8 eee_5G:1;
+	u8 eee_10G:1;
+	u8 rsvd3:3;
+
+	u8 pause_rx:1;
+	u8 pause_tx:1;
+	u8 rsvd4:1;
+	u8 downshift:1;
+	u8 downshift_retry:4;
+};
+
+struct link_control_s {
+	u8 mode:4;
+	u8 disable_crc_corruption:1;
+	u8 discard_short_frames:1;
+	u8 flow_control_mode:1;
+	u8 disable_length_check:1;
+
+	u8 discard_errored_frames:1;
+	u8 control_frame_enable:1;
+	u8 enable_tx_padding:1;
+	u8 enable_crc_forwarding:1;
+	u8 enable_frame_padding_removal_rx: 1;
+	u8 promiscuous_mode: 1;
+	u8 rsvd:2;
+
+	u16 rsvd2;
+};
+
+struct thermal_shutdown_s {
+	u8 enable:1;
+	u8 warning_enable:1;
+	u8 rsvd:6;
+
+	u8 shutdown_temperature;
+	u8 cold_temperature;
+	u8 warning_temperature;
+};
+
+struct mac_address_s {
+	u8 mac_address[6];
+};
+
+struct mac_address_aligned_s {
+	struct mac_address_s aligned;
+	u16 rsvd;
+};
+
+struct sleep_proxy_s {
+	struct wake_on_lan_s {
+		u8 wake_on_magic_packet:1;
+		u8 wake_on_pattern:1;
+		u8 wake_on_link_up:1;
+		u8 wake_on_link_down:1;
+		u8 wake_on_ping:1;
+		u8 wake_on_timer:1;
+		u8 rsvd:2;
+
+		u8 rsvd2;
+		u16 rsvd3;
+
+		u32 link_up_timeout;
+		u32 link_down_timeout;
+		u32 timer;
+	} wake_on_lan;
+
+	struct {
+		u32 mask[4];
+		u32 crc32;
+	} wake_up_pattern[8];
+
+	struct __attribute__ ((__packed__)) {
+		u8 arp_responder:1;
+		u8 echo_responder:1;
+		u8 igmp_client:1;
+		u8 echo_truncate:1;
+		u8 address_guard:1;
+		u8 ignore_fragmented:1;
+		u8 rsvd:2;
+
+		u16 echo_max_len;
+		u8 rsvd2;
+	} ipv4_offload;
+
+	u32 ipv4_offload_addr[8];
+	u32 reserved[8];
+
+	struct __attribute__ ((__packed__)) {
+		u8 ns_responder:1;
+		u8 echo_responder:1;
+		u8 mld_client:1;
+		u8 echo_truncate:1;
+		u8 address_guard:1;
+		u8 rsvd:3;
+
+		u16 echo_max_len;
+		u8 rsvd2;
+	} ipv6_offload;
+
+	u32 ipv6_offload_addr[16][4];
+
+	struct {
+		u16 port[16];
+	} tcp_port_offload;
+
+	struct {
+		u16 port[16];
+	} udp_port_offload;
+
+	struct {
+		u32 retry_count;
+		u32 retry_interval;
+	} ka4_offload;
+
+	struct {
+		u32 timeout;
+		u16 local_port;
+		u16 remote_port;
+		u8 remote_mac_addr[6];
+		u16 rsvd;
+		u32 rsvd2;
+		u32 rsvd3;
+		u16 rsvd4;
+		u16 win_size;
+		u32 seq_num;
+		u32 ack_num;
+		u32 local_ip;
+		u32 remote_ip;
+	} ka4_connection[16];
+
+	struct {
+		u32 retry_count;
+		u32 retry_interval;
+	} ka6_offload;
+
+	struct {
+		u32 timeout;
+		u16 local_port;
+		u16 remote_port;
+		u8 remote_mac_addr[6];
+		u16 rsvd;
+		u32 rsvd2;
+		u32 rsvd3;
+		u16 rsvd4;
+		u16 win_size;
+		u32 seq_num;
+		u32 ack_num;
+		u32 local_ip[4];
+		u32 remote_ip[4];
+	} ka6_connection[16];
+
+	struct {
+		u32 rr_count;
+		u32 rr_buf_len;
+		u32 idx_offset;
+		u32 rr__offset;
+	} mdns_offload;
+};
+
+struct pause_quanta_s {
+	u16 quanta_10M;
+	u16 threshold_10M;
+	u16 quanta_100M;
+	u16 threshold_100M;
+	u16 quanta_1G;
+	u16 threshold_1G;
+	u16 quanta_2P5G;
+	u16 threshold_2P5G;
+	u16 quanta_5G;
+	u16 threshold_5G;
+	u16 quanta_10G;
+	u16 threshold_10G;
+};
+
+struct data_buffer_status_s {
+	u32 data_offset;
+	u32 data_length;
+};
+
+struct device_caps_s {
+	u8 finite_flashless:1;
+	u8 cable_diag:1;
+	u8 ncsi:1;
+	u8 avb:1;
+	u8 rsvd:4;
+
+	u8 rsvd2;
+	u16 rsvd3;
+	u32 rsvd4;
+};
+
+struct version_s {
+	struct bundle_version_t {
+		u8 major;
+		u8 minor;
+		u16 build;
+	} bundle;
+	struct mac_version_t {
+		u8 major;
+		u8 minor;
+		u16 build;
+	} mac;
+	struct phy_version_t {
+		u8 major;
+		u8 minor;
+		u16 build;
+	} phy;
+	u32 rsvd;
+};
+
+struct link_status_s {
+	u8 link_state:4;
+	u8 link_rate:4;
+
+	u8 pause_tx:1;
+	u8 pause_rx:1;
+	u8 eee:1;
+	u8 duplex:1;
+	u8 rsvd:4;
+
+	u16 rsvd2;
+};
+
+struct wol_status_s {
+	u8 wake_count;
+	u8 wake_reason;
+
+	u16 wake_up_packet_length :12;
+	u16 wake_up_pattern_number :3;
+	u16 rsvd:1;
+
+	u32 wake_up_packet[379];
+};
+
+struct mac_health_monitor_s {
+	u8 mac_ready:1;
+	u8 mac_fault:1;
+	u8 mac_flashless_finished:1;
+	u8 rsvd:5;
+
+	u8 mac_temperature;
+	u16 mac_heart_beat;
+	u16 mac_fault_code;
+	u16 rsvd2;
+};
+
+struct phy_health_monitor_s {
+	u8 phy_ready:1;
+	u8 phy_fault:1;
+	u8 phy_hot_warning:1;
+	u8 rsvd:5;
+
+	u8 phy_temperature;
+	u16 phy_heart_beat;
+	u16 phy_fault_code;
+	u16 rsvd2;
+};
+
+struct device_link_caps_s {
+	u8 rsvd:3;
+	u8 internal_loopback:1;
+	u8 external_loopback:1;
+	u8 rate_10M_hd:1;
+	u8 rate_100M_hd:1;
+	u8 rate_1G_hd:1;
+
+	u8 rate_10M:1;
+	u8 rate_100M:1;
+	u8 rate_1G:1;
+	u8 rate_2P5G:1;
+	u8 rate_N2P5G:1;
+	u8 rate_5G:1;
+	u8 rate_N5G:1;
+	u8 rate_10G:1;
+
+	u8 rsvd3:1;
+	u8 eee_100M:1;
+	u8 eee_1G:1;
+	u8 eee_2P5G:1;
+	u8 rsvd4:1;
+	u8 eee_5G:1;
+	u8 rsvd5:1;
+	u8 eee_10G:1;
+
+	u8 pause_rx:1;
+	u8 pause_tx:1;
+	u8 pfc:1;
+	u8 downshift:1;
+	u8 downshift_retry:4;
+};
+
+struct sleep_proxy_caps_s {
+	u8 ipv4_offload:1;
+	u8 ipv6_offload:1;
+	u8 tcp_port_offload:1;
+	u8 udp_port_offload:1;
+	u8 ka4_offload:1;
+	u8 ka6_offload:1;
+	u8 mdns_offload:1;
+	u8 wake_on_ping:1;
+
+	u8 wake_on_magic_packet:1;
+	u8 wake_on_pattern:1;
+	u8 wake_on_timer:1;
+	u8 wake_on_link:1;
+	u8 wake_patterns_count:4;
+
+	u8 ipv4_count;
+	u8 ipv6_count;
+
+	u8 tcp_port_offload_count;
+	u8 udp_port_offload_count;
+
+	u8 tcp4_ka_count;
+	u8 tcp6_ka_count;
+
+	u8 igmp_offload:1;
+	u8 mld_offload:1;
+	u8 rsvd:6;
+
+	u8 rsvd2;
+	u16 rsvd3;
+};
+
+struct lkp_link_caps_s {
+	u8 rsvd:5;
+	u8 rate_10M_hd:1;
+	u8 rate_100M_hd:1;
+	u8 rate_1G_hd:1;
+
+	u8 rate_10M:1;
+	u8 rate_100M:1;
+	u8 rate_1G:1;
+	u8 rate_2P5G:1;
+	u8 rate_N2P5G:1;
+	u8 rate_5G:1;
+	u8 rate_N5G:1;
+	u8 rate_10G:1;
+
+	u8 rsvd2:1;
+	u8 eee_100M:1;
+	u8 eee_1G:1;
+	u8 eee_2P5G:1;
+	u8 rsvd3:1;
+	u8 eee_5G:1;
+	u8 rsvd4:1;
+	u8 eee_10G:1;
+
+	u8 pause_rx:1;
+	u8 pause_tx:1;
+	u8 rsvd5:6;
+};
+
+struct core_dump_s {
+	u32 reg0;
+	u32 reg1;
+	u32 reg2;
+
+	u32 hi;
+	u32 lo;
+
+	u32 regs[32];
+};
+
+struct trace_s {
+	u32 sync_counter;
+	u32 mem_buffer[0x1ff];
+};
+
+struct cable_diag_control_s {
+	u8 toggle :1;
+	u8 rsvd:7;
+
+	u8 wait_timeout_sec;
+	u16 rsvd2;
+};
+
+struct cable_diag_lane_data_s {
+	u8 result_code;
+	u8 dist;
+	u8 far_dist;
+	u8 rsvd;
+};
+
+struct cable_diag_status_s {
+	struct cable_diag_lane_data_s lane_data[4];
+	u8 transact_id;
+	u8 status:4;
+	u8 rsvd:4;
+	u16 rsvd2;
+};
+
+struct statistics_s {
+	struct {
+		u32 link_up;
+		u32 link_down;
+	} link;
+
+	struct {
+		u64 tx_unicast_octets;
+		u64 tx_multicast_octets;
+		u64 tx_broadcast_octets;
+		u64 rx_unicast_octets;
+		u64 rx_multicast_octets;
+		u64 rx_broadcast_octets;
+
+		u32 tx_unicast_frames;
+		u32 tx_multicast_frames;
+		u32 tx_broadcast_frames;
+		u32 tx_errors;
+
+		u32 rx_unicast_frames;
+		u32 rx_multicast_frames;
+		u32 rx_broadcast_frames;
+		u32 rx_dropped_frames;
+		u32 rx_error_frames;
+
+		u32 tx_good_frames;
+		u32 rx_good_frames;
+		u32 reserve_fw_gap;
+	} msm;
+	u32 main_loop_cycles;
+	u32 reserve_fw_gap;
+};
+
+struct filter_caps_s {
+	u8 l2_filters_base_index:6;
+	u8 flexible_filter_mask:2;
+	u8 l2_filter_count;
+	u8 ethertype_filter_base_index;
+	u8 ethertype_filter_count;
+
+	u8 vlan_filter_base_index;
+	u8 vlan_filter_count;
+	u8 l3_ip4_filter_base_index:4;
+	u8 l3_ip4_filter_count:4;
+	u8 l3_ip6_filter_base_index:4;
+	u8 l3_ip6_filter_count:4;
+
+	u8 l4_filter_base_index:4;
+	u8 l4_filter_count:4;
+	u8 l4_flex_filter_base_index:4;
+	u8 l4_flex_filter_count:4;
+	u8 rslv_tbl_base_index;
+	u8 rslv_tbl_count;
+};
+
+struct request_policy_s {
+	struct {
+		u8 all:1;
+		u8 mcast:1;
+		u8 rx_queue_tc_index:5;
+		u8 queue_or_tc:1;
+	} promisc;
+
+	struct {
+		u8 accept:1;
+		u8 rsvd:1;
+		u8 rx_queue_tc_index:5;
+		u8 queue_or_tc:1;
+	} bcast;
+
+	struct {
+		u8 accept:1;
+		u8 rsvd:1;
+		u8 rx_queue_tc_index:5;
+		u8 queue_or_tc:1;
+	} mcast;
+
+	u8 rsvd:8;
+};
+
+struct fw_interface_in {
+	u32 mtu;
+	u32 rsvd1;
+	struct mac_address_aligned_s mac_address;
+	struct link_control_s link_control;
+	u32 rsvd2;
+	struct link_options_s link_options;
+	u32 rsvd3;
+	struct thermal_shutdown_s thermal_shutdown;
+	u32 rsvd4;
+	struct sleep_proxy_s sleep_proxy;
+	u32 rsvd5;
+	struct pause_quanta_s pause_quanta[8];
+	struct cable_diag_control_s cable_diag_control;
+	u32 rsvd6;
+	struct data_buffer_status_s data_buffer_status;
+	u32 rsvd7;
+	struct request_policy_s request_policy;
+};
+
+struct transaction_counter_s {
+	u16 transaction_cnt_a;
+	u16 transaction_cnt_b;
+};
+
+struct management_status_s {
+	struct mac_address_s mac_address;
+	u16 vlan;
+
+	struct{
+		u32 enable : 1;
+		u32 rsvd:31;
+	} flags;
+
+	u32 rsvd1;
+	u32 rsvd2;
+	u32 rsvd3;
+	u32 rsvd4;
+	u32 rsvd5;
+};
+
+struct fw_interface_out {
+	struct transaction_counter_s transaction_id;
+	struct version_s version;
+	struct link_status_s link_status;
+	struct wol_status_s wol_status;
+	u32 rsvd;
+	u32 rsvd2;
+	struct mac_health_monitor_s mac_health_monitor;
+	u32 rsvd3;
+	u32 rsvd4;
+	struct phy_health_monitor_s phy_health_monitor;
+	u32 rsvd5;
+	u32 rsvd6;
+	struct cable_diag_status_s cable_diag_status;
+	u32 rsvd7;
+	struct device_link_caps_s device_link_caps;
+	u32 rsvd8;
+	struct sleep_proxy_caps_s sleep_proxy_caps;
+	u32 rsvd9;
+	struct lkp_link_caps_s lkp_link_caps;
+	u32 rsvd10;
+	struct core_dump_s core_dump;
+	u32 rsvd11;
+	struct statistics_s stats;
+	u32 rsvd12;
+	struct filter_caps_s filter_caps;
+	struct device_caps_s device_caps;
+	u32 rsvd13;
+	struct management_status_s management_status;
+	u32 reserve[21];
+	struct trace_s trace;
+};
+
+#define  AQ_A2_FW_LINK_RATE_INVALID 0
+#define  AQ_A2_FW_LINK_RATE_10M     1
+#define  AQ_A2_FW_LINK_RATE_100M    2
+#define  AQ_A2_FW_LINK_RATE_1G      3
+#define  AQ_A2_FW_LINK_RATE_2G5     4
+#define  AQ_A2_FW_LINK_RATE_5G      5
+#define  AQ_A2_FW_LINK_RATE_10G     6
+
+#define  AQ_HOST_MODE_INVALID      0U
+#define  AQ_HOST_MODE_ACTIVE       1U
+#define  AQ_HOST_MODE_SLEEP_PROXY  2U
+#define  AQ_HOST_MODE_LOW_POWER    3U
+#define  AQ_HOST_MODE_SHUTDOWN     4U
+
+#endif /* HW_ATL2_UTILS_H */
-- 
cgit v1.2.3-59-g8ed1b


From 258ff0cf61d607e17f2e273aae3e50c1dd251dec Mon Sep 17 00:00:00 2001
From: Dmitry Bogdanov <dbogdanov@marvell.com>
Date: Thu, 30 Apr 2020 11:04:37 +0300
Subject: net: atlantic: minimal A2 HW bindings required for fw_ops

This patch adds the bare minimum of A2 HW bindings required to
get fw_ops working.

Signed-off-by: Dmitry Bogdanov <dbogdanov@marvell.com>
Signed-off-by: Mark Starovoytov <mstarovoitov@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/aquantia/atlantic/Makefile    |  1 +
 drivers/net/ethernet/aquantia/atlantic/aq_hw.h     |  1 +
 .../aquantia/atlantic/hw_atl2/hw_atl2_llh.c        | 56 ++++++++++++++++++++++
 .../aquantia/atlantic/hw_atl2/hw_atl2_llh.h        | 31 ++++++++++++
 .../atlantic/hw_atl2/hw_atl2_llh_internal.h        | 48 +++++++++++++++++++
 5 files changed, 137 insertions(+)
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.c
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.h
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh_internal.h

diff --git a/drivers/net/ethernet/aquantia/atlantic/Makefile b/drivers/net/ethernet/aquantia/atlantic/Makefile
index 8b555665a33a..86824f1868ab 100644
--- a/drivers/net/ethernet/aquantia/atlantic/Makefile
+++ b/drivers/net/ethernet/aquantia/atlantic/Makefile
@@ -25,6 +25,7 @@ atlantic-objs := aq_main.o \
 	hw_atl/hw_atl_utils.o \
 	hw_atl/hw_atl_utils_fw2x.o \
 	hw_atl/hw_atl_llh.o \
+	hw_atl2/hw_atl2_llh.o \
 	macsec/macsec_api.o
 
 atlantic-$(CONFIG_MACSEC) += aq_macsec.o
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
index f420ef40b627..e770d91e0876 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
@@ -172,6 +172,7 @@ struct aq_hw_s {
 	struct hw_atl_utils_fw_rpc rpc;
 	s64 ptp_clk_offset;
 	u16 phy_id;
+	void *priv;
 };
 
 struct aq_ring_s;
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.c
new file mode 100644
index 000000000000..b6164bc5fffd
--- /dev/null
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.c
@@ -0,0 +1,56 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Atlantic Network Driver
+ * Copyright (C) 2020 Marvell International Ltd.
+ */
+
+#include "hw_atl2_llh.h"
+#include "hw_atl2_llh_internal.h"
+#include "aq_hw_utils.h"
+
+void hw_atl2_mif_shared_buf_get(struct aq_hw_s *aq_hw, int offset, u32 *data,
+				int len)
+{
+	int j = 0;
+	int i;
+
+	for (i = offset; i < offset + len; i++, j++)
+		data[j] = aq_hw_read_reg(aq_hw,
+					 HW_ATL2_MIF_SHARED_BUFFER_IN_ADR(i));
+}
+
+void hw_atl2_mif_shared_buf_write(struct aq_hw_s *aq_hw, int offset, u32 *data,
+				  int len)
+{
+	int j = 0;
+	int i;
+
+	for (i = offset; i < offset + len; i++, j++)
+		aq_hw_write_reg(aq_hw, HW_ATL2_MIF_SHARED_BUFFER_IN_ADR(i),
+				data[j]);
+}
+
+void hw_atl2_mif_shared_buf_read(struct aq_hw_s *aq_hw, int offset, u32 *data,
+				 int len)
+{
+	int j = 0;
+	int i;
+
+	for (i = offset; i < offset + len; i++, j++)
+		data[j] = aq_hw_read_reg(aq_hw,
+					 HW_ATL2_MIF_SHARED_BUFFER_OUT_ADR(i));
+}
+
+void hw_atl2_mif_host_finished_write_set(struct aq_hw_s *aq_hw, u32 finish)
+{
+	aq_hw_write_reg_bit(aq_hw, HW_ATL2_MIF_HOST_FINISHED_WRITE_ADR,
+			    HW_ATL2_MIF_HOST_FINISHED_WRITE_MSK,
+			    HW_ATL2_MIF_HOST_FINISHED_WRITE_SHIFT,
+			    finish);
+}
+
+u32 hw_atl2_mif_mcp_finished_read_get(struct aq_hw_s *aq_hw)
+{
+	return aq_hw_read_reg_bit(aq_hw, HW_ATL2_MIF_MCP_FINISHED_READ_ADR,
+				  HW_ATL2_MIF_MCP_FINISHED_READ_MSK,
+				  HW_ATL2_MIF_MCP_FINISHED_READ_SHIFT);
+}
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.h
new file mode 100644
index 000000000000..8ef8bd6b2534
--- /dev/null
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.h
@@ -0,0 +1,31 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Atlantic Network Driver
+ * Copyright (C) 2020 Marvell International Ltd.
+ */
+
+#ifndef HW_ATL2_LLH_H
+#define HW_ATL2_LLH_H
+
+#include <linux/types.h>
+
+struct aq_hw_s;
+
+/* get data from firmware shared input buffer */
+void hw_atl2_mif_shared_buf_get(struct aq_hw_s *aq_hw, int offset, u32 *data,
+				int len);
+
+/* set data into firmware shared input buffer */
+void hw_atl2_mif_shared_buf_write(struct aq_hw_s *aq_hw, int offset, u32 *data,
+				  int len);
+
+/* get data from firmware shared output buffer */
+void hw_atl2_mif_shared_buf_read(struct aq_hw_s *aq_hw, int offset, u32 *data,
+				 int len);
+
+/* set host finished write shared buffer indication */
+void hw_atl2_mif_host_finished_write_set(struct aq_hw_s *aq_hw, u32 finish);
+
+/* get mcp finished read shared buffer indication */
+u32 hw_atl2_mif_mcp_finished_read_get(struct aq_hw_s *aq_hw);
+
+#endif /* HW_ATL2_LLH_H */
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh_internal.h
new file mode 100644
index 000000000000..835deb2d1950
--- /dev/null
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh_internal.h
@@ -0,0 +1,48 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Atlantic Network Driver
+ * Copyright (C) 2020 Marvell International Ltd.
+ */
+
+#ifndef HW_ATL2_LLH_INTERNAL_H
+#define HW_ATL2_LLH_INTERNAL_H
+
+/* Register address for firmware shared input buffer */
+#define HW_ATL2_MIF_SHARED_BUFFER_IN_ADR(dword) (0x00012000U + (dword) * 0x4U)
+/* Register address for firmware shared output buffer */
+#define HW_ATL2_MIF_SHARED_BUFFER_OUT_ADR(dword) (0x00013000U + (dword) * 0x4U)
+
+/* pif_host_finished_buf_wr_i Bitfield Definitions
+ * Preprocessor definitions for the bitfield "pif_host_finished_buf_wr_i".
+ * PORT="pif_host_finished_buf_wr_i"
+ */
+/* Register address for bitfield rpif_host_finished_buf_wr_i */
+#define HW_ATL2_MIF_HOST_FINISHED_WRITE_ADR 0x00000e00u
+/* Bitmask for bitfield pif_host_finished_buf_wr_i */
+#define HW_ATL2_MIF_HOST_FINISHED_WRITE_MSK 0x00000001u
+/* Inverted bitmask for bitfield pif_host_finished_buf_wr_i */
+#define HW_ATL2_MIF_HOST_FINISHED_WRITE_MSKN 0xFFFFFFFEu
+/* Lower bit position of bitfield pif_host_finished_buf_wr_i */
+#define HW_ATL2_MIF_HOST_FINISHED_WRITE_SHIFT 0
+/* Width of bitfield pif_host_finished_buf_wr_i */
+#define HW_ATL2_MIF_HOST_FINISHED_WRITE_WIDTH 1
+/* Default value of bitfield pif_host_finished_buf_wr_i */
+#define HW_ATL2_MIF_HOST_FINISHED_WRITE_DEFAULT 0x0
+
+/* pif_mcp_finished_buf_rd_i Bitfield Definitions
+ * Preprocessor definitions for the bitfield "pif_mcp_finished_buf_rd_i".
+ * PORT="pif_mcp_finished_buf_rd_i"
+ */
+/* Register address for bitfield pif_mcp_finished_buf_rd_i */
+#define HW_ATL2_MIF_MCP_FINISHED_READ_ADR 0x00000e04u
+/* Bitmask for bitfield pif_mcp_finished_buf_rd_i */
+#define HW_ATL2_MIF_MCP_FINISHED_READ_MSK 0x00000001u
+/* Inverted bitmask for bitfield pif_mcp_finished_buf_rd_i */
+#define HW_ATL2_MIF_MCP_FINISHED_READ_MSKN 0xFFFFFFFEu
+/* Lower bit position of bitfield pif_mcp_finished_buf_rd_i */
+#define HW_ATL2_MIF_MCP_FINISHED_READ_SHIFT 0
+/* Width of bitfield pif_mcp_finished_buf_rd_i */
+#define HW_ATL2_MIF_MCP_FINISHED_READ_WIDTH 1
+/* Default value of bitfield pif_mcp_finished_buf_rd_i */
+#define HW_ATL2_MIF_MCP_FINISHED_READ_DEFAULT 0x0
+
+#endif /* HW_ATL2_LLH_INTERNAL_H */
-- 
cgit v1.2.3-59-g8ed1b


From 5cfd54d7dc186a368af92aba0dcb8b4d4bbe8658 Mon Sep 17 00:00:00 2001
From: Dmitry Bogdanov <dbogdanov@marvell.com>
Date: Thu, 30 Apr 2020 11:04:38 +0300
Subject: net: atlantic: minimal A2 fw_ops

This patch adds the minimum set of FW ops for A2.

Signed-off-by: Dmitry Bogdanov <dbogdanov@marvell.com>
Co-developed-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: Mark Starovoytov <mstarovoitov@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/aquantia/atlantic/Makefile    |   1 +
 .../aquantia/atlantic/hw_atl2/hw_atl2_internal.h   |  17 ++
 .../aquantia/atlantic/hw_atl2/hw_atl2_utils.h      |   5 +
 .../aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c   | 329 +++++++++++++++++++++
 4 files changed, 352 insertions(+)
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_internal.h
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c

diff --git a/drivers/net/ethernet/aquantia/atlantic/Makefile b/drivers/net/ethernet/aquantia/atlantic/Makefile
index 86824f1868ab..fa845c15d0e1 100644
--- a/drivers/net/ethernet/aquantia/atlantic/Makefile
+++ b/drivers/net/ethernet/aquantia/atlantic/Makefile
@@ -25,6 +25,7 @@ atlantic-objs := aq_main.o \
 	hw_atl/hw_atl_utils.o \
 	hw_atl/hw_atl_utils_fw2x.o \
 	hw_atl/hw_atl_llh.o \
+	hw_atl2/hw_atl2_utils_fw.o \
 	hw_atl2/hw_atl2_llh.o \
 	macsec/macsec_api.o
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_internal.h
new file mode 100644
index 000000000000..233db3222bb8
--- /dev/null
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_internal.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Atlantic Network Driver
+ * Copyright (C) 2020 Marvell International Ltd.
+ */
+
+#ifndef HW_ATL2_INTERNAL_H
+#define HW_ATL2_INTERNAL_H
+
+#include "hw_atl2_utils.h"
+
+#define HW_ATL2_MTU_JUMBO  16352U
+
+struct hw_atl2_priv {
+	struct statistics_s last_stats;
+};
+
+#endif /* HW_ATL2_INTERNAL_H */
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils.h
index 7d4ac65440c9..5421fbed3db5 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils.h
@@ -590,4 +590,9 @@ struct fw_interface_out {
 #define  AQ_HOST_MODE_LOW_POWER    3U
 #define  AQ_HOST_MODE_SHUTDOWN     4U
 
+int hw_atl2_utils_get_action_resolve_table_caps(struct aq_hw_s *self,
+						u8 *base_index, u8 *count);
+
+extern const struct aq_fw_ops aq_a2_fw_ops;
+
 #endif /* HW_ATL2_UTILS_H */
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c
new file mode 100644
index 000000000000..c3e0e5575810
--- /dev/null
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c
@@ -0,0 +1,329 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Atlantic Network Driver
+ * Copyright (C) 2020 Marvell International Ltd.
+ */
+
+#include <linux/iopoll.h>
+
+#include "aq_hw.h"
+#include "hw_atl/hw_atl_llh.h"
+#include "hw_atl2_utils.h"
+#include "hw_atl2_llh.h"
+#include "hw_atl2_internal.h"
+
+#define AQ_A2_FW_READ_TRY_MAX 1000
+
+#define hw_atl2_shared_buffer_write(HW, ITEM, VARIABLE) \
+	hw_atl2_mif_shared_buf_write(HW,\
+		(offsetof(struct fw_interface_in, ITEM) / sizeof(u32)),\
+		(u32 *)&(VARIABLE), sizeof(VARIABLE) / sizeof(u32))
+
+#define hw_atl2_shared_buffer_get(HW, ITEM, VARIABLE) \
+	hw_atl2_mif_shared_buf_get(HW, \
+		(offsetof(struct fw_interface_in, ITEM) / sizeof(u32)),\
+		(u32 *)&(VARIABLE), \
+		sizeof(VARIABLE) / sizeof(u32))
+
+/* This should never be used on non atomic fields,
+ * treat any > u32 read as non atomic.
+ */
+#define hw_atl2_shared_buffer_read(HW, ITEM, VARIABLE) \
+{\
+	BUILD_BUG_ON_MSG((offsetof(struct fw_interface_out, ITEM) % \
+			 sizeof(u32)) != 0,\
+			 "Non aligned read " # ITEM);\
+	BUILD_BUG_ON_MSG(sizeof(VARIABLE) > sizeof(u32),\
+			 "Non atomic read " # ITEM);\
+	hw_atl2_mif_shared_buf_read(HW, \
+		(offsetof(struct fw_interface_out, ITEM) / sizeof(u32)),\
+		(u32 *)&(VARIABLE), sizeof(VARIABLE) / sizeof(u32));\
+}
+
+#define hw_atl2_shared_buffer_read_safe(HW, ITEM, DATA) \
+	hw_atl2_shared_buffer_read_block((HW), \
+		(offsetof(struct fw_interface_out, ITEM) / sizeof(u32)),\
+		sizeof(((struct fw_interface_out *)0)->ITEM) / sizeof(u32),\
+		(DATA))
+
+static int hw_atl2_shared_buffer_read_block(struct aq_hw_s *self,
+					    u32 offset, u32 dwords, void *data)
+{
+	struct transaction_counter_s tid1, tid2;
+	int cnt = 0;
+
+	do {
+		do {
+			hw_atl2_shared_buffer_read(self, transaction_id, tid1);
+			cnt++;
+			if (cnt > AQ_A2_FW_READ_TRY_MAX)
+				return -ETIME;
+			if (tid1.transaction_cnt_a != tid1.transaction_cnt_b)
+				udelay(1);
+		} while (tid1.transaction_cnt_a != tid1.transaction_cnt_b);
+
+		hw_atl2_mif_shared_buf_read(self, offset, (u32 *)data, dwords);
+
+		hw_atl2_shared_buffer_read(self, transaction_id, tid2);
+
+		cnt++;
+		if (cnt > AQ_A2_FW_READ_TRY_MAX)
+			return -ETIME;
+	} while (tid2.transaction_cnt_a != tid2.transaction_cnt_b ||
+		 tid1.transaction_cnt_a != tid2.transaction_cnt_a);
+
+	return 0;
+}
+
+static inline int hw_atl2_shared_buffer_finish_ack(struct aq_hw_s *self)
+{
+	u32 val;
+	int err;
+
+	hw_atl2_mif_host_finished_write_set(self, 1U);
+	err = readx_poll_timeout_atomic(hw_atl2_mif_mcp_finished_read_get,
+					self, val, val == 0U,
+					100, 100000U);
+	WARN(err, "hw_atl2_shared_buffer_finish_ack");
+
+	return err;
+}
+
+static int aq_a2_fw_init(struct aq_hw_s *self)
+{
+	struct link_control_s link_control;
+	u32 mtu;
+	u32 val;
+	int err;
+
+	hw_atl2_shared_buffer_get(self, link_control, link_control);
+	link_control.mode = AQ_HOST_MODE_ACTIVE;
+	hw_atl2_shared_buffer_write(self, link_control, link_control);
+
+	hw_atl2_shared_buffer_get(self, mtu, mtu);
+	mtu = HW_ATL2_MTU_JUMBO;
+	hw_atl2_shared_buffer_write(self, mtu, mtu);
+
+	hw_atl2_mif_host_finished_write_set(self, 1U);
+	err = readx_poll_timeout_atomic(hw_atl2_mif_mcp_finished_read_get,
+					self, val, val == 0U,
+					100, 5000000U);
+	WARN(err, "hw_atl2_shared_buffer_finish_ack");
+
+	return err;
+}
+
+static int aq_a2_fw_deinit(struct aq_hw_s *self)
+{
+	struct link_control_s link_control;
+
+	hw_atl2_shared_buffer_get(self, link_control, link_control);
+	link_control.mode = AQ_HOST_MODE_SHUTDOWN;
+	hw_atl2_shared_buffer_write(self, link_control, link_control);
+
+	return hw_atl2_shared_buffer_finish_ack(self);
+}
+
+static void a2_link_speed_mask2fw(u32 speed,
+				  struct link_options_s *link_options)
+{
+	link_options->rate_10G = !!(speed & AQ_NIC_RATE_10G);
+	link_options->rate_5G = !!(speed & AQ_NIC_RATE_5G);
+	link_options->rate_N5G = !!(speed & AQ_NIC_RATE_5GSR);
+	link_options->rate_2P5G = !!(speed & AQ_NIC_RATE_2GS);
+	link_options->rate_N2P5G = link_options->rate_2P5G;
+	link_options->rate_1G = !!(speed & AQ_NIC_RATE_1G);
+	link_options->rate_100M = !!(speed & AQ_NIC_RATE_100M);
+	link_options->rate_10M = !!(speed & AQ_NIC_RATE_10M);
+}
+
+static int aq_a2_fw_set_link_speed(struct aq_hw_s *self, u32 speed)
+{
+	struct link_options_s link_options;
+
+	hw_atl2_shared_buffer_get(self, link_options, link_options);
+	link_options.link_up = 1U;
+	a2_link_speed_mask2fw(speed, &link_options);
+	hw_atl2_shared_buffer_write(self, link_options, link_options);
+
+	return hw_atl2_shared_buffer_finish_ack(self);
+}
+
+static int aq_a2_fw_set_state(struct aq_hw_s *self,
+			      enum hal_atl_utils_fw_state_e state)
+{
+	struct link_options_s link_options;
+
+	hw_atl2_shared_buffer_get(self, link_options, link_options);
+
+	switch (state) {
+	case MPI_INIT:
+		link_options.link_up = 1U;
+		break;
+	case MPI_DEINIT:
+		link_options.link_up = 0U;
+		break;
+	case MPI_RESET:
+	case MPI_POWER:
+		/* No actions */
+		break;
+	}
+
+	hw_atl2_shared_buffer_write(self, link_options, link_options);
+
+	return hw_atl2_shared_buffer_finish_ack(self);
+}
+
+static int aq_a2_fw_update_link_status(struct aq_hw_s *self)
+{
+	struct link_status_s link_status;
+
+	hw_atl2_shared_buffer_read(self, link_status, link_status);
+
+	switch (link_status.link_rate) {
+	case AQ_A2_FW_LINK_RATE_10G:
+		self->aq_link_status.mbps = 10000;
+		break;
+	case AQ_A2_FW_LINK_RATE_5G:
+		self->aq_link_status.mbps = 5000;
+		break;
+	case AQ_A2_FW_LINK_RATE_2G5:
+		self->aq_link_status.mbps = 2500;
+		break;
+	case AQ_A2_FW_LINK_RATE_1G:
+		self->aq_link_status.mbps = 1000;
+		break;
+	case AQ_A2_FW_LINK_RATE_100M:
+		self->aq_link_status.mbps = 100;
+		break;
+	case AQ_A2_FW_LINK_RATE_10M:
+		self->aq_link_status.mbps = 10;
+		break;
+	default:
+		self->aq_link_status.mbps = 0;
+	}
+
+	return 0;
+}
+
+static int aq_a2_fw_get_mac_permanent(struct aq_hw_s *self, u8 *mac)
+{
+	struct mac_address_aligned_s mac_address;
+
+	hw_atl2_shared_buffer_get(self, mac_address, mac_address);
+	ether_addr_copy(mac, (u8 *)mac_address.aligned.mac_address);
+
+	if ((mac[0] & 0x01U) || ((mac[0] | mac[1] | mac[2]) == 0x00U)) {
+		unsigned int rnd = 0;
+		u32 h;
+		u32 l;
+
+		get_random_bytes(&rnd, sizeof(unsigned int));
+
+		l = 0xE3000000U | (0xFFFFU & rnd) | (0x00 << 16);
+		h = 0x8001300EU;
+
+		mac[5] = (u8)(0xFFU & l);
+		l >>= 8;
+		mac[4] = (u8)(0xFFU & l);
+		l >>= 8;
+		mac[3] = (u8)(0xFFU & l);
+		l >>= 8;
+		mac[2] = (u8)(0xFFU & l);
+		mac[1] = (u8)(0xFFU & h);
+		h >>= 8;
+		mac[0] = (u8)(0xFFU & h);
+	}
+
+	return 0;
+}
+
+static int aq_a2_fw_update_stats(struct aq_hw_s *self)
+{
+	struct hw_atl2_priv *priv = (struct hw_atl2_priv *)self->priv;
+	struct statistics_s stats;
+
+	hw_atl2_shared_buffer_read_safe(self, stats, &stats);
+
+#define AQ_SDELTA(_N_, _F_) (self->curr_stats._N_ += \
+			stats.msm._F_ - priv->last_stats.msm._F_)
+
+	if (self->aq_link_status.mbps) {
+		AQ_SDELTA(uprc, rx_unicast_frames);
+		AQ_SDELTA(mprc, rx_multicast_frames);
+		AQ_SDELTA(bprc, rx_broadcast_frames);
+		AQ_SDELTA(erpr, rx_error_frames);
+
+		AQ_SDELTA(uptc, tx_unicast_frames);
+		AQ_SDELTA(mptc, tx_multicast_frames);
+		AQ_SDELTA(bptc, tx_broadcast_frames);
+		AQ_SDELTA(erpt, tx_errors);
+
+		AQ_SDELTA(ubrc, rx_unicast_octets);
+		AQ_SDELTA(ubtc, tx_unicast_octets);
+		AQ_SDELTA(mbrc, rx_multicast_octets);
+		AQ_SDELTA(mbtc, tx_multicast_octets);
+		AQ_SDELTA(bbrc, rx_broadcast_octets);
+		AQ_SDELTA(bbtc, tx_broadcast_octets);
+	}
+#undef AQ_SDELTA
+	self->curr_stats.dma_pkt_rc =
+		hw_atl_stats_rx_dma_good_pkt_counter_get(self);
+	self->curr_stats.dma_pkt_tc =
+		hw_atl_stats_tx_dma_good_pkt_counter_get(self);
+	self->curr_stats.dma_oct_rc =
+		hw_atl_stats_rx_dma_good_octet_counter_get(self);
+	self->curr_stats.dma_oct_tc =
+		hw_atl_stats_tx_dma_good_octet_counter_get(self);
+	self->curr_stats.dpc = hw_atl_rpb_rx_dma_drop_pkt_cnt_get(self);
+
+	memcpy(&priv->last_stats, &stats, sizeof(stats));
+
+	return 0;
+}
+
+static int aq_a2_fw_renegotiate(struct aq_hw_s *self)
+{
+	struct link_options_s link_options;
+	int err;
+
+	hw_atl2_shared_buffer_get(self, link_options, link_options);
+	link_options.link_renegotiate = 1U;
+	hw_atl2_shared_buffer_write(self, link_options, link_options);
+
+	err = hw_atl2_shared_buffer_finish_ack(self);
+
+	/* We should put renegotiate status back to zero
+	 * after command completes
+	 */
+	link_options.link_renegotiate = 0U;
+	hw_atl2_shared_buffer_write(self, link_options, link_options);
+
+	return err;
+}
+
+int hw_atl2_utils_get_action_resolve_table_caps(struct aq_hw_s *self,
+						u8 *base_index, u8 *count)
+{
+	struct filter_caps_s filter_caps;
+	int err;
+
+	err = hw_atl2_shared_buffer_read_safe(self, filter_caps, &filter_caps);
+	if (err)
+		return err;
+
+	*base_index = filter_caps.rslv_tbl_base_index;
+	*count = filter_caps.rslv_tbl_count;
+	return 0;
+}
+
+const struct aq_fw_ops aq_a2_fw_ops = {
+	.init               = aq_a2_fw_init,
+	.deinit             = aq_a2_fw_deinit,
+	.reset              = NULL,
+	.renegotiate        = aq_a2_fw_renegotiate,
+	.get_mac_permanent  = aq_a2_fw_get_mac_permanent,
+	.set_link_speed     = aq_a2_fw_set_link_speed,
+	.set_state          = aq_a2_fw_set_state,
+	.update_link_status = aq_a2_fw_update_link_status,
+	.update_stats       = aq_a2_fw_update_stats,
+};
-- 
cgit v1.2.3-59-g8ed1b


From b3f0c79cba2060c1af37b32d60eff8598391519e Mon Sep 17 00:00:00 2001
From: Igor Russkikh <irusskikh@marvell.com>
Date: Thu, 30 Apr 2020 11:04:39 +0300
Subject: net: atlantic: A2 hw_ops skeleton

This patch adds basic hw_ops layout for A2.

Actual implementation will be added in the follow-up patches.

Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: Mark Starovoytov <mstarovoitov@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/aquantia/atlantic/Makefile    |   1 +
 drivers/net/ethernet/aquantia/atlantic/aq_hw.h     |   1 +
 .../net/ethernet/aquantia/atlantic/aq_pci_func.c   |  39 +++-
 .../ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c   | 226 +++++++++++++++++++++
 .../ethernet/aquantia/atlantic/hw_atl2/hw_atl2.h   |  14 ++
 .../aquantia/atlantic/hw_atl2/hw_atl2_internal.h   |  20 ++
 6 files changed, 294 insertions(+), 7 deletions(-)
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.h

diff --git a/drivers/net/ethernet/aquantia/atlantic/Makefile b/drivers/net/ethernet/aquantia/atlantic/Makefile
index fa845c15d0e1..23f0e5b5fcdb 100644
--- a/drivers/net/ethernet/aquantia/atlantic/Makefile
+++ b/drivers/net/ethernet/aquantia/atlantic/Makefile
@@ -25,6 +25,7 @@ atlantic-objs := aq_main.o \
 	hw_atl/hw_atl_utils.o \
 	hw_atl/hw_atl_utils_fw2x.o \
 	hw_atl/hw_atl_llh.o \
+	hw_atl2/hw_atl2.o \
 	hw_atl2/hw_atl2_utils_fw.o \
 	hw_atl2/hw_atl2_llh.o \
 	macsec/macsec_api.o
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
index e770d91e0876..03fea9469f01 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
@@ -55,6 +55,7 @@ struct aq_hw_caps_s {
 	u8 rx_rings;
 	bool flow_control;
 	bool is_64_dma;
+	u32 priv_data_len;
 };
 
 struct aq_hw_link_status_s {
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
index 2edf137a7030..ce46cdbc69e6 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
@@ -16,6 +16,7 @@
 #include "aq_pci_func.h"
 #include "hw_atl/hw_atl_a0.h"
 #include "hw_atl/hw_atl_b0.h"
+#include "hw_atl2/hw_atl2.h"
 #include "aq_filters.h"
 #include "aq_drvinfo.h"
 #include "aq_macsec.h"
@@ -41,6 +42,13 @@ static const struct pci_device_id aq_pci_tbl[] = {
 	{ PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC111S), },
 	{ PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC112S), },
 
+	{ PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC113DEV), },
+	{ PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC113CS), },
+	{ PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC114CS), },
+	{ PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC113), },
+	{ PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC113C), },
+	{ PCI_VDEVICE(AQUANTIA, AQ_DEVICE_ID_AQC115C), },
+
 	{}
 };
 
@@ -70,6 +78,13 @@ static const struct aq_board_revision_s hw_atl_boards[] = {
 	{ AQ_DEVICE_ID_AQC109S,	AQ_HWREV_ANY,	&hw_atl_ops_b1, &hw_atl_b0_caps_aqc109s, },
 	{ AQ_DEVICE_ID_AQC111S,	AQ_HWREV_ANY,	&hw_atl_ops_b1, &hw_atl_b0_caps_aqc111s, },
 	{ AQ_DEVICE_ID_AQC112S,	AQ_HWREV_ANY,	&hw_atl_ops_b1, &hw_atl_b0_caps_aqc112s, },
+
+	{ AQ_DEVICE_ID_AQC113DEV,	AQ_HWREV_ANY,	&hw_atl2_ops, &hw_atl2_caps_aqc113, },
+	{ AQ_DEVICE_ID_AQC113,		AQ_HWREV_ANY,	&hw_atl2_ops, &hw_atl2_caps_aqc113, },
+	{ AQ_DEVICE_ID_AQC113CS,	AQ_HWREV_ANY,	&hw_atl2_ops, &hw_atl2_caps_aqc113, },
+	{ AQ_DEVICE_ID_AQC114CS,	AQ_HWREV_ANY,	&hw_atl2_ops, &hw_atl2_caps_aqc113, },
+	{ AQ_DEVICE_ID_AQC113C,		AQ_HWREV_ANY,	&hw_atl2_ops, &hw_atl2_caps_aqc113, },
+	{ AQ_DEVICE_ID_AQC115C,		AQ_HWREV_ANY,	&hw_atl2_ops, &hw_atl2_caps_aqc113, },
 };
 
 MODULE_DEVICE_TABLE(pci, aq_pci_tbl);
@@ -104,10 +119,8 @@ int aq_pci_func_init(struct pci_dev *pdev)
 	int err;
 
 	err = pci_set_dma_mask(pdev, DMA_BIT_MASK(64));
-	if (!err) {
+	if (!err)
 		err = pci_set_consistent_dma_mask(pdev, DMA_BIT_MASK(64));
-
-	}
 	if (err) {
 		err = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
 		if (!err)
@@ -237,6 +250,15 @@ static int aq_pci_probe(struct pci_dev *pdev,
 		goto err_ioremap;
 	}
 	self->aq_hw->aq_nic_cfg = aq_nic_get_cfg(self);
+	if (self->aq_hw->aq_nic_cfg->aq_hw_caps->priv_data_len) {
+		int len = self->aq_hw->aq_nic_cfg->aq_hw_caps->priv_data_len;
+
+		self->aq_hw->priv = kzalloc(len, GFP_KERNEL);
+		if (!self->aq_hw->priv) {
+			err = -ENOMEM;
+			goto err_free_aq_hw;
+		}
+	}
 
 	for (bar = 0; bar < 4; ++bar) {
 		if (IORESOURCE_MEM & pci_resource_flags(pdev, bar)) {
@@ -245,19 +267,19 @@ static int aq_pci_probe(struct pci_dev *pdev,
 			mmio_pa = pci_resource_start(pdev, bar);
 			if (mmio_pa == 0U) {
 				err = -EIO;
-				goto err_free_aq_hw;
+				goto err_free_aq_hw_priv;
 			}
 
 			reg_sz = pci_resource_len(pdev, bar);
 			if ((reg_sz <= 24 /*ATL_REGS_SIZE*/)) {
 				err = -EIO;
-				goto err_free_aq_hw;
+				goto err_free_aq_hw_priv;
 			}
 
 			self->aq_hw->mmio = ioremap(mmio_pa, reg_sz);
 			if (!self->aq_hw->mmio) {
 				err = -EIO;
-				goto err_free_aq_hw;
+				goto err_free_aq_hw_priv;
 			}
 			break;
 		}
@@ -265,7 +287,7 @@ static int aq_pci_probe(struct pci_dev *pdev,
 
 	if (bar == 4) {
 		err = -EIO;
-		goto err_free_aq_hw;
+		goto err_free_aq_hw_priv;
 	}
 
 	numvecs = min((u8)AQ_CFG_VECS_DEF,
@@ -305,6 +327,8 @@ err_register:
 	aq_pci_free_irq_vectors(self);
 err_hwinit:
 	iounmap(self->aq_hw->mmio);
+err_free_aq_hw_priv:
+	kfree(self->aq_hw->priv);
 err_free_aq_hw:
 	kfree(self->aq_hw);
 err_ioremap:
@@ -332,6 +356,7 @@ static void aq_pci_remove(struct pci_dev *pdev)
 		aq_nic_free_vectors(self);
 		aq_pci_free_irq_vectors(self);
 		iounmap(self->aq_hw->mmio);
+		kfree(self->aq_hw->priv);
 		kfree(self->aq_hw);
 		pci_release_regions(pdev);
 		free_netdev(self->ndev);
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
new file mode 100644
index 000000000000..58c74a73b6cf
--- /dev/null
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
@@ -0,0 +1,226 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Atlantic Network Driver
+ * Copyright (C) 2020 Marvell International Ltd.
+ */
+
+#include "aq_hw.h"
+#include "hw_atl2_utils.h"
+#include "hw_atl2_internal.h"
+
+#define DEFAULT_BOARD_BASIC_CAPABILITIES \
+	.is_64_dma = true,		  \
+	.msix_irqs = 8U,		  \
+	.irq_mask = ~0U,		  \
+	.vecs = HW_ATL2_RSS_MAX,	  \
+	.tcs = HW_ATL2_TC_MAX,	  \
+	.rxd_alignment = 1U,		  \
+	.rxd_size = HW_ATL2_RXD_SIZE,   \
+	.rxds_max = HW_ATL2_MAX_RXD,    \
+	.rxds_min = HW_ATL2_MIN_RXD,    \
+	.txd_alignment = 1U,		  \
+	.txd_size = HW_ATL2_TXD_SIZE,   \
+	.txds_max = HW_ATL2_MAX_TXD,    \
+	.txds_min = HW_ATL2_MIN_TXD,    \
+	.txhwb_alignment = 4096U,	  \
+	.tx_rings = HW_ATL2_TX_RINGS,   \
+	.rx_rings = HW_ATL2_RX_RINGS,   \
+	.hw_features = NETIF_F_HW_CSUM |  \
+			NETIF_F_RXCSUM |  \
+			NETIF_F_RXHASH |  \
+			NETIF_F_SG |      \
+			NETIF_F_TSO |     \
+			NETIF_F_TSO6 |    \
+			NETIF_F_LRO |     \
+			NETIF_F_NTUPLE |  \
+			NETIF_F_HW_VLAN_CTAG_FILTER | \
+			NETIF_F_HW_VLAN_CTAG_RX |     \
+			NETIF_F_HW_VLAN_CTAG_TX |     \
+			NETIF_F_GSO_UDP_L4      |     \
+			NETIF_F_GSO_PARTIAL,          \
+	.hw_priv_flags = IFF_UNICAST_FLT, \
+	.flow_control = true,		  \
+	.mtu = HW_ATL2_MTU_JUMBO,	  \
+	.mac_regs_count = 72,		  \
+	.hw_alive_check_addr = 0x10U,     \
+	.priv_data_len = sizeof(struct hw_atl2_priv)
+
+const struct aq_hw_caps_s hw_atl2_caps_aqc113 = {
+	DEFAULT_BOARD_BASIC_CAPABILITIES,
+	.media_type = AQ_HW_MEDIA_TYPE_TP,
+	.link_speed_msk = AQ_NIC_RATE_10G |
+			  AQ_NIC_RATE_5G  |
+			  AQ_NIC_RATE_2GS |
+			  AQ_NIC_RATE_1G  |
+			  AQ_NIC_RATE_100M      |
+			  AQ_NIC_RATE_10M,
+};
+
+static int hw_atl2_hw_reset(struct aq_hw_s *self)
+{
+	return -EOPNOTSUPP;
+}
+
+static int hw_atl2_hw_rss_hash_set(struct aq_hw_s *self,
+				   struct aq_rss_parameters *rss_params)
+{
+	return -EOPNOTSUPP;
+}
+
+static int hw_atl2_hw_rss_set(struct aq_hw_s *self,
+			      struct aq_rss_parameters *rss_params)
+{
+	return -EOPNOTSUPP;
+}
+
+static int hw_atl2_hw_offload_set(struct aq_hw_s *self,
+				  struct aq_nic_cfg_s *aq_nic_cfg)
+{
+	return -EOPNOTSUPP;
+}
+
+static int hw_atl2_hw_mac_addr_set(struct aq_hw_s *self, u8 *mac_addr)
+{
+	return -EOPNOTSUPP;
+}
+
+static int hw_atl2_hw_init(struct aq_hw_s *self, u8 *mac_addr)
+{
+	struct hw_atl2_priv *priv = (struct hw_atl2_priv *)self->priv;
+	u8 base_index, count;
+	int err;
+
+	err = hw_atl2_utils_get_action_resolve_table_caps(self, &base_index,
+							  &count);
+	if (err)
+		return err;
+
+	priv->art_base_index = 8 * base_index;
+
+	return -EOPNOTSUPP;
+}
+
+static int hw_atl2_hw_ring_tx_start(struct aq_hw_s *self,
+				    struct aq_ring_s *ring)
+{
+	return -EOPNOTSUPP;
+}
+
+static int hw_atl2_hw_ring_rx_start(struct aq_hw_s *self,
+				    struct aq_ring_s *ring)
+{
+	return -EOPNOTSUPP;
+}
+
+static int hw_atl2_hw_start(struct aq_hw_s *self)
+{
+	return -EOPNOTSUPP;
+}
+
+static int hw_atl2_hw_ring_tx_xmit(struct aq_hw_s *self,
+				   struct aq_ring_s *ring,
+				   unsigned int frags)
+{
+	return -EOPNOTSUPP;
+}
+
+static int hw_atl2_hw_ring_rx_init(struct aq_hw_s *self,
+				   struct aq_ring_s *aq_ring,
+				   struct aq_ring_param_s *aq_ring_param)
+{
+	return -EOPNOTSUPP;
+}
+
+static int hw_atl2_hw_ring_tx_init(struct aq_hw_s *self,
+				   struct aq_ring_s *aq_ring,
+				   struct aq_ring_param_s *aq_ring_param)
+{
+	return -EOPNOTSUPP;
+}
+
+static int hw_atl2_hw_ring_rx_fill(struct aq_hw_s *self, struct aq_ring_s *ring,
+				   unsigned int sw_tail_old)
+{
+	return -EOPNOTSUPP;
+}
+
+static int hw_atl2_hw_ring_tx_head_update(struct aq_hw_s *self,
+					  struct aq_ring_s *ring)
+{
+	return -EOPNOTSUPP;
+}
+
+static int hw_atl2_hw_ring_rx_receive(struct aq_hw_s *self,
+				      struct aq_ring_s *ring)
+{
+	return -EOPNOTSUPP;
+}
+
+static int hw_atl2_hw_irq_enable(struct aq_hw_s *self, u64 mask)
+{
+	return -EOPNOTSUPP;
+}
+
+static int hw_atl2_hw_irq_disable(struct aq_hw_s *self, u64 mask)
+{
+	return -EOPNOTSUPP;
+}
+
+static int hw_atl2_hw_irq_read(struct aq_hw_s *self, u64 *mask)
+{
+	return -EOPNOTSUPP;
+}
+
+static int hw_atl2_hw_interrupt_moderation_set(struct aq_hw_s *self)
+{
+	return -EOPNOTSUPP;
+}
+
+static int hw_atl2_hw_stop(struct aq_hw_s *self)
+{
+	return -EOPNOTSUPP;
+}
+
+static int hw_atl2_hw_ring_tx_stop(struct aq_hw_s *self, struct aq_ring_s *ring)
+{
+	return -EOPNOTSUPP;
+}
+
+static int hw_atl2_hw_ring_rx_stop(struct aq_hw_s *self, struct aq_ring_s *ring)
+{
+	return -EOPNOTSUPP;
+}
+
+static struct aq_stats_s *hw_atl2_utils_get_hw_stats(struct aq_hw_s *self)
+{
+	return &self->curr_stats;
+}
+
+const struct aq_hw_ops hw_atl2_ops = {
+	.hw_set_mac_address   = hw_atl2_hw_mac_addr_set,
+	.hw_init              = hw_atl2_hw_init,
+	.hw_reset             = hw_atl2_hw_reset,
+	.hw_start             = hw_atl2_hw_start,
+	.hw_ring_tx_start     = hw_atl2_hw_ring_tx_start,
+	.hw_ring_tx_stop      = hw_atl2_hw_ring_tx_stop,
+	.hw_ring_rx_start     = hw_atl2_hw_ring_rx_start,
+	.hw_ring_rx_stop      = hw_atl2_hw_ring_rx_stop,
+	.hw_stop              = hw_atl2_hw_stop,
+
+	.hw_ring_tx_xmit         = hw_atl2_hw_ring_tx_xmit,
+	.hw_ring_tx_head_update  = hw_atl2_hw_ring_tx_head_update,
+
+	.hw_ring_rx_receive      = hw_atl2_hw_ring_rx_receive,
+	.hw_ring_rx_fill         = hw_atl2_hw_ring_rx_fill,
+
+	.hw_irq_enable           = hw_atl2_hw_irq_enable,
+	.hw_irq_disable          = hw_atl2_hw_irq_disable,
+	.hw_irq_read             = hw_atl2_hw_irq_read,
+
+	.hw_ring_rx_init             = hw_atl2_hw_ring_rx_init,
+	.hw_ring_tx_init             = hw_atl2_hw_ring_tx_init,
+	.hw_interrupt_moderation_set = hw_atl2_hw_interrupt_moderation_set,
+	.hw_rss_set                  = hw_atl2_hw_rss_set,
+	.hw_rss_hash_set             = hw_atl2_hw_rss_hash_set,
+	.hw_get_hw_stats             = hw_atl2_utils_get_hw_stats,
+	.hw_set_offload              = hw_atl2_hw_offload_set,
+};
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.h
new file mode 100644
index 000000000000..de8723f1c28a
--- /dev/null
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0-only */
+/* Atlantic Network Driver
+ * Copyright (C) 2020 Marvell International Ltd.
+ */
+
+#ifndef HW_ATL2_H
+#define HW_ATL2_H
+
+#include "aq_common.h"
+
+extern const struct aq_hw_caps_s hw_atl2_caps_aqc113;
+extern const struct aq_hw_ops hw_atl2_ops;
+
+#endif /* HW_ATL2_H */
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_internal.h
index 233db3222bb8..f82058484332 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_internal.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_internal.h
@@ -9,9 +9,29 @@
 #include "hw_atl2_utils.h"
 
 #define HW_ATL2_MTU_JUMBO  16352U
+#define HW_ATL2_MTU        1514U
+
+#define HW_ATL2_TX_RINGS 4U
+#define HW_ATL2_RX_RINGS 4U
+
+#define HW_ATL2_RINGS_MAX 32U
+#define HW_ATL2_TXD_SIZE       (16U)
+#define HW_ATL2_RXD_SIZE       (16U)
+
+#define HW_ATL2_TC_MAX 1U
+#define HW_ATL2_RSS_MAX 8U
+
+#define HW_ATL2_MIN_RXD \
+	(ALIGN(AQ_CFG_SKB_FRAGS_MAX + 1U, AQ_HW_RXD_MULTIPLE))
+#define HW_ATL2_MIN_TXD \
+	(ALIGN(AQ_CFG_SKB_FRAGS_MAX + 1U, AQ_HW_TXD_MULTIPLE))
+
+#define HW_ATL2_MAX_RXD 8184U
+#define HW_ATL2_MAX_TXD 8184U
 
 struct hw_atl2_priv {
 	struct statistics_s last_stats;
+	unsigned int art_base_index;
 };
 
 #endif /* HW_ATL2_INTERNAL_H */
-- 
cgit v1.2.3-59-g8ed1b


From 57fe8fd2255cd97d2c2a9b69cb5172c0f15343b8 Mon Sep 17 00:00:00 2001
From: Igor Russkikh <irusskikh@marvell.com>
Date: Thu, 30 Apr 2020 11:04:40 +0300
Subject: net: atlantic: HW bindings for A2 RFP

RPF is one of the modules which has been significantly
changed/extended on A2.

This patch adds the necessary A2 register definitions
for RPF, which are used in follow-up patches.

Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Co-developed-by: Dmitry Bogdanov <dbogdanov@marvell.com>
Signed-off-by: Dmitry Bogdanov <dbogdanov@marvell.com>
Signed-off-by: Mark Starovoytov <mstarovoitov@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c |  14 ++
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h |   6 +
 .../aquantia/atlantic/hw_atl2/hw_atl2_llh.c        |  74 ++++++++++
 .../aquantia/atlantic/hw_atl2/hw_atl2_llh.h        |  26 ++++
 .../atlantic/hw_atl2/hw_atl2_llh_internal.h        | 164 +++++++++++++++++++++
 5 files changed, 284 insertions(+)

diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c
index d1f68fc16291..8dd3232d72c4 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c
@@ -693,6 +693,13 @@ void hw_atl_rpfl2multicast_flr_en_set(struct aq_hw_s *aq_hw,
 			    HW_ATL_RPFL2MC_ENF_SHIFT, l2multicast_flr_en);
 }
 
+u32 hw_atl_rpfl2promiscuous_mode_en_get(struct aq_hw_s *aq_hw)
+{
+	return aq_hw_read_reg_bit(aq_hw, HW_ATL_RPFL2PROMIS_MODE_ADR,
+				  HW_ATL_RPFL2PROMIS_MODE_MSK,
+				  HW_ATL_RPFL2PROMIS_MODE_SHIFT);
+}
+
 void hw_atl_rpfl2promiscuous_mode_en_set(struct aq_hw_s *aq_hw,
 					 u32 l2promiscuous_mode_en)
 {
@@ -867,6 +874,13 @@ void hw_atl_rpf_vlan_prom_mode_en_set(struct aq_hw_s *aq_hw,
 			    vlan_prom_mode_en);
 }
 
+u32 hw_atl_rpf_vlan_prom_mode_en_get(struct aq_hw_s *aq_hw)
+{
+	return aq_hw_read_reg_bit(aq_hw, HW_ATL_RPF_VL_PROMIS_MODE_ADR,
+				  HW_ATL_RPF_VL_PROMIS_MODE_MSK,
+				  HW_ATL_RPF_VL_PROMIS_MODE_SHIFT);
+}
+
 void hw_atl_rpf_vlan_accept_untagged_packets_set(struct aq_hw_s *aq_hw,
 						 u32 vlan_acc_untagged_packets)
 {
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h
index 62992b23c0e8..a4699a682973 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h
@@ -349,6 +349,9 @@ void hw_atl_rpfl2multicast_flr_en_set(struct aq_hw_s *aq_hw,
 				      u32 l2multicast_flr_en,
 				      u32 filter);
 
+/* get l2 promiscuous mode enable */
+u32 hw_atl_rpfl2promiscuous_mode_en_get(struct aq_hw_s *aq_hw);
+
 /* set l2 promiscuous mode enable */
 void hw_atl_rpfl2promiscuous_mode_en_set(struct aq_hw_s *aq_hw,
 					 u32 l2promiscuous_mode_en);
@@ -420,6 +423,9 @@ void hw_atl_rpf_vlan_outer_etht_set(struct aq_hw_s *aq_hw, u32 vlan_outer_etht);
 void hw_atl_rpf_vlan_prom_mode_en_set(struct aq_hw_s *aq_hw,
 				      u32 vlan_prom_mode_en);
 
+/* Get VLAN promiscuous mode enable */
+u32 hw_atl_rpf_vlan_prom_mode_en_get(struct aq_hw_s *aq_hw);
+
 /* Set VLAN untagged action */
 void hw_atl_rpf_vlan_untagged_act_set(struct aq_hw_s *aq_hw,
 				      u32 vlan_untagged_act);
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.c
index b6164bc5fffd..67f46a7bdcda 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.c
@@ -7,6 +7,80 @@
 #include "hw_atl2_llh_internal.h"
 #include "aq_hw_utils.h"
 
+void hw_atl2_rpf_rss_hash_type_set(struct aq_hw_s *aq_hw, u32 rss_hash_type)
+{
+	aq_hw_write_reg_bit(aq_hw, HW_ATL2_RPF_PIF_RPF_RSS_HASH_TYPEI_ADR,
+			    HW_ATL2_RPF_PIF_RPF_RSS_HASH_TYPEI_MSK,
+			    HW_ATL2_RPF_PIF_RPF_RSS_HASH_TYPEI_SHIFT,
+			    rss_hash_type);
+}
+
+/* rpf */
+
+void hw_atl2_rpf_new_enable_set(struct aq_hw_s *aq_hw, u32 enable)
+{
+	aq_hw_write_reg_bit(aq_hw, HW_ATL2_RPF_NEW_EN_ADR,
+			    HW_ATL2_RPF_NEW_EN_MSK,
+			    HW_ATL2_RPF_NEW_EN_SHIFT,
+			    enable);
+}
+
+void hw_atl2_rpfl2_uc_flr_tag_set(struct aq_hw_s *aq_hw, u32 tag, u32 filter)
+{
+	aq_hw_write_reg_bit(aq_hw, HW_ATL2_RPFL2UC_TAG_ADR(filter),
+			    HW_ATL2_RPFL2UC_TAG_MSK,
+			    HW_ATL2_RPFL2UC_TAG_SHIFT,
+			    tag);
+}
+
+void hw_atl2_rpfl2_bc_flr_tag_set(struct aq_hw_s *aq_hw, u32 tag)
+{
+	aq_hw_write_reg_bit(aq_hw, HW_ATL2_RPF_L2_BC_TAG_ADR,
+			    HW_ATL2_RPF_L2_BC_TAG_MSK,
+			    HW_ATL2_RPF_L2_BC_TAG_SHIFT,
+			    tag);
+}
+
+void hw_atl2_new_rpf_rss_redir_set(struct aq_hw_s *aq_hw, u32 tc, u32 index,
+				   u32 queue)
+{
+	aq_hw_write_reg_bit(aq_hw, HW_ATL2_RPF_RSS_REDIR_ADR(tc, index),
+			    HW_ATL2_RPF_RSS_REDIR_MSK(tc),
+			    HW_ATL2_RPF_RSS_REDIR_SHIFT(tc),
+			    queue);
+}
+
+void hw_atl2_rpf_vlan_flr_tag_set(struct aq_hw_s *aq_hw, u32 tag, u32 filter)
+{
+	aq_hw_write_reg_bit(aq_hw, HW_ATL2_RPF_VL_TAG_ADR(filter),
+			    HW_ATL2_RPF_VL_TAG_MSK,
+			    HW_ATL2_RPF_VL_TAG_SHIFT,
+			    tag);
+}
+
+/* set action resolver record */
+void hw_atl2_rpf_act_rslvr_record_set(struct aq_hw_s *aq_hw, u8 location,
+				      u32 tag, u32 mask, u32 action)
+{
+	aq_hw_write_reg(aq_hw,
+			HW_ATL2_RPF_ACT_RSLVR_REQ_TAG_ADR(location),
+			tag);
+	aq_hw_write_reg(aq_hw,
+			HW_ATL2_RPF_ACT_RSLVR_TAG_MASK_ADR(location),
+			mask);
+	aq_hw_write_reg(aq_hw,
+			HW_ATL2_RPF_ACT_RSLVR_ACTN_ADR(location),
+			action);
+}
+
+void hw_atl2_rpf_act_rslvr_section_en_set(struct aq_hw_s *aq_hw, u32 sections)
+{
+	aq_hw_write_reg_bit(aq_hw, HW_ATL2_RPF_REC_TAB_EN_ADR,
+			    HW_ATL2_RPF_REC_TAB_EN_MSK,
+			    HW_ATL2_RPF_REC_TAB_EN_SHIFT,
+			    sections);
+}
+
 void hw_atl2_mif_shared_buf_get(struct aq_hw_s *aq_hw, int offset, u32 *data,
 				int len)
 {
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.h
index 8ef8bd6b2534..bd5b0d5a8084 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.h
@@ -10,6 +10,32 @@
 
 struct aq_hw_s;
 
+/** Set RSS HASH type */
+void hw_atl2_rpf_rss_hash_type_set(struct aq_hw_s *aq_hw, u32 rss_hash_type);
+
+/* set new RPF enable */
+void hw_atl2_rpf_new_enable_set(struct aq_hw_s *aq_hw, u32 enable);
+
+/* set l2 unicast filter tag */
+void hw_atl2_rpfl2_uc_flr_tag_set(struct aq_hw_s *aq_hw, u32 tag, u32 filter);
+
+/* set l2 broadcast filter tag */
+void hw_atl2_rpfl2_bc_flr_tag_set(struct aq_hw_s *aq_hw, u32 tag);
+
+/* set new rss redirection table */
+void hw_atl2_new_rpf_rss_redir_set(struct aq_hw_s *aq_hw, u32 tc, u32 index,
+				   u32 queue);
+
+/* Set VLAN filter tag */
+void hw_atl2_rpf_vlan_flr_tag_set(struct aq_hw_s *aq_hw, u32 tag, u32 filter);
+
+/* set action resolver record */
+void hw_atl2_rpf_act_rslvr_record_set(struct aq_hw_s *aq_hw, u8 location,
+				      u32 tag, u32 mask, u32 action);
+
+/* set enable action resolver section */
+void hw_atl2_rpf_act_rslvr_section_en_set(struct aq_hw_s *aq_hw, u32 sections);
+
 /* get data from firmware shared input buffer */
 void hw_atl2_mif_shared_buf_get(struct aq_hw_s *aq_hw, int offset, u32 *data,
 				int len);
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh_internal.h
index 835deb2d1950..886491b6ab73 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh_internal.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh_internal.h
@@ -6,6 +6,170 @@
 #ifndef HW_ATL2_LLH_INTERNAL_H
 #define HW_ATL2_LLH_INTERNAL_H
 
+/* RX pif_rpf_rss_hash_type_i Bitfield Definitions
+ */
+#define HW_ATL2_RPF_PIF_RPF_RSS_HASH_TYPEI_ADR 0x000054C8
+#define HW_ATL2_RPF_PIF_RPF_RSS_HASH_TYPEI_MSK 0x000001FF
+#define HW_ATL2_RPF_PIF_RPF_RSS_HASH_TYPEI_MSKN 0xFFFFFE00
+#define HW_ATL2_RPF_PIF_RPF_RSS_HASH_TYPEI_SHIFT 0
+#define HW_ATL2_RPF_PIF_RPF_RSS_HASH_TYPEI_WIDTH 9
+
+/* rx rpf_new_rpf_en bitfield definitions
+ * preprocessor definitions for the bitfield "rpf_new_rpf_en_i".
+ * port="pif_rpf_new_rpf_en_i
+ */
+
+/* register address for bitfield rpf_new_rpf_en */
+#define HW_ATL2_RPF_NEW_EN_ADR 0x00005104
+/* bitmask for bitfield rpf_new_rpf_en */
+#define HW_ATL2_RPF_NEW_EN_MSK 0x00000800
+/* inverted bitmask for bitfield rpf_new_rpf_en */
+#define HW_ATL2_RPF_NEW_EN_MSKN 0xfffff7ff
+/* lower bit position of bitfield rpf_new_rpf_en */
+#define HW_ATL2_RPF_NEW_EN_SHIFT 11
+/* width of bitfield rpf_new_rpf_en */
+#define HW_ATL2_RPF_NEW_EN_WIDTH 1
+/* default value of bitfield rpf_new_rpf_en */
+#define HW_ATL2_RPF_NEW_EN_DEFAULT 0x0
+
+/* rx l2_uc_req_tag0{f}[5:0] bitfield definitions
+ * preprocessor definitions for the bitfield "l2_uc_req_tag0{f}[7:0]".
+ * parameter: filter {f} | stride size 0x8 | range [0, 37]
+ * port="pif_rpf_l2_uc_req_tag0[5:0]"
+ */
+
+/* register address for bitfield l2_uc_req_tag0{f}[2:0] */
+#define HW_ATL2_RPFL2UC_TAG_ADR(filter) (0x00005114 + (filter) * 0x8)
+/* bitmask for bitfield l2_uc_req_tag0{f}[2:0] */
+#define HW_ATL2_RPFL2UC_TAG_MSK 0x0FC00000
+/* inverted bitmask for bitfield l2_uc_req_tag0{f}[2:0] */
+#define HW_ATL2_RPFL2UC_TAG_MSKN 0xF03FFFFF
+/* lower bit position of bitfield l2_uc_req_tag0{f}[2:0] */
+#define HW_ATL2_RPFL2UC_TAG_SHIFT 22
+/* width of bitfield l2_uc_req_tag0{f}[2:0] */
+#define HW_ATL2_RPFL2UC_TAG_WIDTH 6
+/* default value of bitfield l2_uc_req_tag0{f}[2:0] */
+#define HW_ATL2_RPFL2UC_TAG_DEFAULT 0x0
+
+/* rpf_l2_bc_req_tag[5:0] bitfield definitions
+ * preprocessor definitions for the bitfield "rpf_l2_bc_req_tag[5:0]".
+ * port="pifrpf_l2_bc_req_tag_i[5:0]"
+ */
+
+/* register address for bitfield rpf_l2_bc_req_tag */
+#define HW_ATL2_RPF_L2_BC_TAG_ADR 0x000050F0
+/* bitmask for bitfield rpf_l2_bc_req_tag */
+#define HW_ATL2_RPF_L2_BC_TAG_MSK 0x0000003F
+/* inverted bitmask for bitfield rpf_l2_bc_req_tag */
+#define HW_ATL2_RPF_L2_BC_TAG_MSKN 0xffffffc0
+/* lower bit position of bitfield rpf_l2_bc_req_tag */
+#define HW_ATL2_RPF_L2_BC_TAG_SHIFT 0
+/* width of bitfield rpf_l2_bc_req_tag */
+#define HW_ATL2_RPF_L2_BC_TAG_WIDTH 6
+/* default value of bitfield rpf_l2_bc_req_tag */
+#define HW_ATL2_RPF_L2_BC_TAG_DEFAULT 0x0
+
+/* rx rpf_rss_red1_data_[4:0] bitfield definitions
+ * preprocessor definitions for the bitfield "rpf_rss_red1_data[4:0]".
+ * port="pif_rpf_rss_red1_data_i[4:0]"
+ */
+
+/* register address for bitfield rpf_rss_red1_data[4:0] */
+#define HW_ATL2_RPF_RSS_REDIR_ADR(TC, INDEX) (0x00006200 + \
+					(0x100 * !!((TC) > 3)) + (INDEX) * 4)
+/* bitmask for bitfield rpf_rss_red1_data[4:0] */
+#define HW_ATL2_RPF_RSS_REDIR_MSK(TC)  (0x00000001F << (5 * ((TC) % 4)))
+/* lower bit position of bitfield rpf_rss_red1_data[4:0] */
+#define HW_ATL2_RPF_RSS_REDIR_SHIFT(TC) (5 * ((TC) % 4))
+/* width of bitfield rpf_rss_red1_data[4:0] */
+#define HW_ATL2_RPF_RSS_REDIR_WIDTH 5
+/* default value of bitfield rpf_rss_red1_data[4:0] */
+#define HW_ATL2_RPF_RSS_REDIR_DEFAULT 0x0
+
+/* rx vlan_req_tag0{f}[3:0] bitfield definitions
+ * preprocessor definitions for the bitfield "vlan_req_tag0{f}[3:0]".
+ * parameter: filter {f} | stride size 0x4 | range [0, 15]
+ * port="pif_rpf_vlan_req_tag0[3:0]"
+ */
+
+/* register address for bitfield vlan_req_tag0{f}[3:0] */
+#define HW_ATL2_RPF_VL_TAG_ADR(filter) (0x00005290 + (filter) * 0x4)
+/* bitmask for bitfield vlan_req_tag0{f}[3:0] */
+#define HW_ATL2_RPF_VL_TAG_MSK 0x0000F000
+/* inverted bitmask for bitfield vlan_req_tag0{f}[3:0] */
+#define HW_ATL2_RPF_VL_TAG_MSKN 0xFFFF0FFF
+/* lower bit position of bitfield vlan_req_tag0{f}[3:0] */
+#define HW_ATL2_RPF_VL_TAG_SHIFT 12
+/* width of bitfield vlan_req_tag0{f}[3:0] */
+#define HW_ATL2_RPF_VL_TAG_WIDTH 4
+/* default value of bitfield vlan_req_tag0{f}[3:0] */
+#define HW_ATL2_RPF_VL_TAG_DEFAULT 0x0
+
+/* ahb_mem_addr{f}[31:0] Bitfield Definitions
+ * Preprocessor definitions for the bitfield "ahb_mem_addr{f}[31:0]".
+ * Parameter: filter {f} | stride size 0x10 | range [0, 127]
+ * PORT="ahb_mem_addr{f}[31:0]"
+ */
+
+/* Register address for bitfield ahb_mem_addr{f}[31:0] */
+#define HW_ATL2_RPF_ACT_RSLVR_REQ_TAG_ADR(filter) \
+	(0x00014000u + (filter) * 0x10)
+/* Bitmask for bitfield ahb_mem_addr{f}[31:0] */
+#define HW_ATL2_RPF_ACT_RSLVR_REQ_TAG_MSK 0xFFFFFFFFu
+/* Inverted bitmask for bitfield ahb_mem_addr{f}[31:0] */
+#define HW_ATL2_RPF_ACT_RSLVR_REQ_TAG_MSKN 0x00000000u
+/* Lower bit position of bitfield ahb_mem_addr{f}[31:0] */
+#define HW_ATL2_RPF_ACT_RSLVR_REQ_TAG_SHIFT 0
+/* Width of bitfield ahb_mem_addr{f}[31:0] */
+#define HW_ATL2_RPF_ACT_RSLVR_REQ_TAG_WIDTH 31
+/* Default value of bitfield ahb_mem_addr{f}[31:0] */
+#define HW_ATL2_RPF_ACT_RSLVR_REQ_TAG_DEFAULT 0x0
+
+/* Register address for bitfield ahb_mem_addr{f}[31:0] */
+#define HW_ATL2_RPF_ACT_RSLVR_TAG_MASK_ADR(filter) \
+	(0x00014004u + (filter) * 0x10)
+/* Bitmask for bitfield ahb_mem_addr{f}[31:0] */
+#define HW_ATL2_RPF_ACT_RSLVR_TAG_MASK_MSK 0xFFFFFFFFu
+/* Inverted bitmask for bitfield ahb_mem_addr{f}[31:0] */
+#define HW_ATL2_RPF_ACT_RSLVR_TAG_MASK_MSKN 0x00000000u
+/* Lower bit position of bitfield ahb_mem_addr{f}[31:0] */
+#define HW_ATL2_RPF_ACT_RSLVR_TAG_MASK_SHIFT 0
+/* Width of bitfield ahb_mem_addr{f}[31:0] */
+#define HW_ATL2_RPF_ACT_RSLVR_TAG_MASK_WIDTH 31
+/* Default value of bitfield ahb_mem_addr{f}[31:0] */
+#define HW_ATL2_RPF_ACT_RSLVR_TAG_MASK_DEFAULT 0x0
+
+/* Register address for bitfield ahb_mem_addr{f}[31:0] */
+#define HW_ATL2_RPF_ACT_RSLVR_ACTN_ADR(filter) \
+	(0x00014008u + (filter) * 0x10)
+/* Bitmask for bitfield ahb_mem_addr{f}[31:0] */
+#define HW_ATL2_RPF_ACT_RSLVR_ACTN_MSK 0x000007FFu
+/* Inverted bitmask for bitfield ahb_mem_addr{f}[31:0] */
+#define HW_ATL2_RPF_ACT_RSLVR_ACTN_MSKN 0xFFFFF800u
+/* Lower bit position of bitfield ahb_mem_addr{f}[31:0] */
+#define HW_ATL2_RPF_ACT_RSLVR_ACTN_SHIFT 0
+/* Width of bitfield ahb_mem_addr{f}[31:0] */
+#define HW_ATL2_RPF_ACT_RSLVR_ACTN_WIDTH 10
+/* Default value of bitfield ahb_mem_addr{f}[31:0] */
+#define HW_ATL2_RPF_ACT_RSLVR_ACTN_DEFAULT 0x0
+
+/* rpf_rec_tab_en[15:0] Bitfield Definitions
+ * Preprocessor definitions for the bitfield "rpf_rec_tab_en[15:0]".
+ * PORT="pif_rpf_rec_tab_en[15:0]"
+ */
+/* Register address for bitfield rpf_rec_tab_en[15:0] */
+#define HW_ATL2_RPF_REC_TAB_EN_ADR 0x00006ff0u
+/* Bitmask for bitfield rpf_rec_tab_en[15:0] */
+#define HW_ATL2_RPF_REC_TAB_EN_MSK 0x0000FFFFu
+/* Inverted bitmask for bitfield rpf_rec_tab_en[15:0] */
+#define HW_ATL2_RPF_REC_TAB_EN_MSKN 0xFFFF0000u
+/* Lower bit position of bitfield rpf_rec_tab_en[15:0] */
+#define HW_ATL2_RPF_REC_TAB_EN_SHIFT 0
+/* Width of bitfield rpf_rec_tab_en[15:0] */
+#define HW_ATL2_RPF_REC_TAB_EN_WIDTH 16
+/* Default value of bitfield rpf_rec_tab_en[15:0] */
+#define HW_ATL2_RPF_REC_TAB_EN_DEFAULT 0x0
+
 /* Register address for firmware shared input buffer */
 #define HW_ATL2_MIF_SHARED_BUFFER_IN_ADR(dword) (0x00012000U + (dword) * 0x4U)
 /* Register address for firmware shared output buffer */
-- 
cgit v1.2.3-59-g8ed1b


From 3417368494db497c0426d1dcc46c4c459ff43ca7 Mon Sep 17 00:00:00 2001
From: Igor Russkikh <irusskikh@marvell.com>
Date: Thu, 30 Apr 2020 11:04:41 +0300
Subject: net: atlantic: add A2 RPF hw_ops

This patch adds RPF-related hw_ops, which are needed for basic
functionality.

Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Co-developed-by: Dmitry Bogdanov <dbogdanov@marvell.com>
Signed-off-by: Dmitry Bogdanov <dbogdanov@marvell.com>
Signed-off-by: Mark Starovoytov <mstarovoitov@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../aquantia/atlantic/hw_atl/hw_atl_utils.h        |   2 +
 .../ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c   | 208 +++++++++++++++++++++
 .../aquantia/atlantic/hw_atl2/hw_atl2_internal.h   |  49 +++++
 3 files changed, 259 insertions(+)

diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
index 5513254642b3..5db57ea9a5bd 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
@@ -360,6 +360,8 @@ struct aq_rx_filter_vlan {
 	u8 queue;
 };
 
+#define HW_ATL_VLAN_MAX_FILTERS         16U
+
 struct aq_rx_filter_l2 {
 	s8 queue;
 	u8 location;
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
index 58c74a73b6cf..7dd5f9a1c505 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
@@ -4,9 +4,17 @@
  */
 
 #include "aq_hw.h"
+#include "aq_hw_utils.h"
+#include "aq_nic.h"
+#include "hw_atl/hw_atl_utils.h"
+#include "hw_atl/hw_atl_llh.h"
 #include "hw_atl2_utils.h"
+#include "hw_atl2_llh.h"
 #include "hw_atl2_internal.h"
 
+static int hw_atl2_act_rslvr_table_set(struct aq_hw_s *self, u8 location,
+				       u32 tag, u32 mask, u32 action);
+
 #define DEFAULT_BOARD_BASIC_CAPABILITIES \
 	.is_64_dma = true,		  \
 	.msix_irqs = 8U,		  \
@@ -55,6 +63,11 @@ const struct aq_hw_caps_s hw_atl2_caps_aqc113 = {
 			  AQ_NIC_RATE_10M,
 };
 
+static u32 hw_atl2_sem_act_rslvr_get(struct aq_hw_s *self)
+{
+	return hw_atl_reg_glb_cpu_sem_get(self, HW_ATL2_FW_SM_ACT_RSLVR);
+}
+
 static int hw_atl2_hw_reset(struct aq_hw_s *self)
 {
 	return -EOPNOTSUPP;
@@ -78,6 +91,60 @@ static int hw_atl2_hw_offload_set(struct aq_hw_s *self,
 	return -EOPNOTSUPP;
 }
 
+static void hw_atl2_hw_new_rx_filter_vlan_promisc(struct aq_hw_s *self,
+						  bool promisc)
+{
+	u16 off_action = (!promisc &&
+			  !hw_atl_rpfl2promiscuous_mode_en_get(self)) ?
+				HW_ATL2_ACTION_DROP : HW_ATL2_ACTION_DISABLE;
+	struct hw_atl2_priv *priv = (struct hw_atl2_priv *)self->priv;
+	u8 index;
+
+	index = priv->art_base_index + HW_ATL2_RPF_VLAN_PROMISC_OFF_INDEX;
+	hw_atl2_act_rslvr_table_set(self, index, 0,
+				    HW_ATL2_RPF_TAG_VLAN_MASK |
+				    HW_ATL2_RPF_TAG_UNTAG_MASK, off_action);
+}
+
+static void hw_atl2_hw_new_rx_filter_promisc(struct aq_hw_s *self, bool promisc)
+{
+	u16 off_action = promisc ? HW_ATL2_ACTION_DISABLE : HW_ATL2_ACTION_DROP;
+	struct hw_atl2_priv *priv = (struct hw_atl2_priv *)self->priv;
+	bool vlan_promisc_enable;
+	u8 index;
+
+	index = priv->art_base_index + HW_ATL2_RPF_L2_PROMISC_OFF_INDEX;
+	hw_atl2_act_rslvr_table_set(self, index, 0,
+				    HW_ATL2_RPF_TAG_UC_MASK |
+				    HW_ATL2_RPF_TAG_ALLMC_MASK,
+				    off_action);
+
+	/* turn VLAN promisc mode too */
+	vlan_promisc_enable = hw_atl_rpf_vlan_prom_mode_en_get(self);
+	hw_atl2_hw_new_rx_filter_vlan_promisc(self, promisc |
+					      vlan_promisc_enable);
+}
+
+static int hw_atl2_act_rslvr_table_set(struct aq_hw_s *self, u8 location,
+				       u32 tag, u32 mask, u32 action)
+{
+	u32 val;
+	int err;
+
+	err = readx_poll_timeout_atomic(hw_atl2_sem_act_rslvr_get,
+					self, val, val == 1,
+					1, 10000U);
+	if (err)
+		return err;
+
+	hw_atl2_rpf_act_rslvr_record_set(self, location, tag, mask,
+					 action);
+
+	hw_atl_reg_glb_cpu_sem_set(self, 1, HW_ATL2_FW_SM_ACT_RSLVR);
+
+	return err;
+}
+
 static int hw_atl2_hw_mac_addr_set(struct aq_hw_s *self, u8 *mac_addr)
 {
 	return -EOPNOTSUPP;
@@ -170,6 +237,88 @@ static int hw_atl2_hw_irq_read(struct aq_hw_s *self, u64 *mask)
 	return -EOPNOTSUPP;
 }
 
+#define IS_FILTER_ENABLED(_F_) ((packet_filter & (_F_)) ? 1U : 0U)
+
+static int hw_atl2_hw_packet_filter_set(struct aq_hw_s *self,
+					unsigned int packet_filter)
+{
+	struct aq_nic_cfg_s *cfg = self->aq_nic_cfg;
+	u32 vlan_promisc;
+	u32 l2_promisc;
+	unsigned int i;
+
+	l2_promisc = IS_FILTER_ENABLED(IFF_PROMISC) ||
+		     !!(cfg->priv_flags & BIT(AQ_HW_LOOPBACK_DMA_NET));
+	vlan_promisc = l2_promisc || cfg->is_vlan_force_promisc;
+
+	hw_atl_rpfl2promiscuous_mode_en_set(self, l2_promisc);
+
+	hw_atl_rpf_vlan_prom_mode_en_set(self, vlan_promisc);
+
+	hw_atl2_hw_new_rx_filter_promisc(self, IS_FILTER_ENABLED(IFF_PROMISC));
+
+	hw_atl_rpfl2multicast_flr_en_set(self,
+					 IS_FILTER_ENABLED(IFF_ALLMULTI) &&
+					 IS_FILTER_ENABLED(IFF_MULTICAST), 0);
+
+	hw_atl_rpfl2_accept_all_mc_packets_set(self,
+					      IS_FILTER_ENABLED(IFF_ALLMULTI) &&
+					      IS_FILTER_ENABLED(IFF_MULTICAST));
+
+	hw_atl_rpfl2broadcast_en_set(self, IS_FILTER_ENABLED(IFF_BROADCAST));
+
+	for (i = HW_ATL2_MAC_MIN; i < HW_ATL2_MAC_MAX; ++i)
+		hw_atl_rpfl2_uc_flr_en_set(self,
+					   (cfg->is_mc_list_enabled &&
+					    (i <= cfg->mc_list_count)) ?
+				    1U : 0U, i);
+
+	return aq_hw_err_from_flags(self);
+}
+
+#undef IS_FILTER_ENABLED
+
+static int hw_atl2_hw_multicast_list_set(struct aq_hw_s *self,
+					 u8 ar_mac
+					 [AQ_HW_MULTICAST_ADDRESS_MAX]
+					 [ETH_ALEN],
+					 u32 count)
+{
+	struct aq_nic_cfg_s *cfg = self->aq_nic_cfg;
+	int err = 0;
+
+	if (count > (HW_ATL2_MAC_MAX - HW_ATL2_MAC_MIN)) {
+		err = -EBADRQC;
+		goto err_exit;
+	}
+	for (cfg->mc_list_count = 0U;
+			cfg->mc_list_count < count;
+			++cfg->mc_list_count) {
+		u32 i = cfg->mc_list_count;
+		u32 h = (ar_mac[i][0] << 8) | (ar_mac[i][1]);
+		u32 l = (ar_mac[i][2] << 24) | (ar_mac[i][3] << 16) |
+					(ar_mac[i][4] << 8) | ar_mac[i][5];
+
+		hw_atl_rpfl2_uc_flr_en_set(self, 0U, HW_ATL2_MAC_MIN + i);
+
+		hw_atl_rpfl2unicast_dest_addresslsw_set(self, l,
+							HW_ATL2_MAC_MIN + i);
+
+		hw_atl_rpfl2unicast_dest_addressmsw_set(self, h,
+							HW_ATL2_MAC_MIN + i);
+
+		hw_atl2_rpfl2_uc_flr_tag_set(self, 1, HW_ATL2_MAC_MIN + i);
+
+		hw_atl_rpfl2_uc_flr_en_set(self, (cfg->is_mc_list_enabled),
+					   HW_ATL2_MAC_MIN + i);
+	}
+
+	err = aq_hw_err_from_flags(self);
+
+err_exit:
+	return err;
+}
+
 static int hw_atl2_hw_interrupt_moderation_set(struct aq_hw_s *self)
 {
 	return -EOPNOTSUPP;
@@ -195,6 +344,61 @@ static struct aq_stats_s *hw_atl2_utils_get_hw_stats(struct aq_hw_s *self)
 	return &self->curr_stats;
 }
 
+static int hw_atl2_hw_vlan_set(struct aq_hw_s *self,
+			       struct aq_rx_filter_vlan *aq_vlans)
+{
+	struct hw_atl2_priv *priv = (struct hw_atl2_priv *)self->priv;
+	u32 queue;
+	u8 index;
+	int i;
+
+	hw_atl_rpf_vlan_prom_mode_en_set(self, 1U);
+
+	for (i = 0; i < HW_ATL_VLAN_MAX_FILTERS; i++) {
+		queue = HW_ATL2_ACTION_ASSIGN_QUEUE(aq_vlans[i].queue);
+
+		hw_atl_rpf_vlan_flr_en_set(self, 0U, i);
+		hw_atl_rpf_vlan_rxq_en_flr_set(self, 0U, i);
+		index = priv->art_base_index + HW_ATL2_RPF_VLAN_USER_INDEX + i;
+		hw_atl2_act_rslvr_table_set(self, index, 0, 0,
+					    HW_ATL2_ACTION_DISABLE);
+		if (aq_vlans[i].enable) {
+			hw_atl_rpf_vlan_id_flr_set(self,
+						   aq_vlans[i].vlan_id, i);
+			hw_atl_rpf_vlan_flr_act_set(self, 1U, i);
+			hw_atl_rpf_vlan_flr_en_set(self, 1U, i);
+
+			if (aq_vlans[i].queue != 0xFF) {
+				hw_atl_rpf_vlan_rxq_flr_set(self,
+							    aq_vlans[i].queue,
+							    i);
+				hw_atl_rpf_vlan_rxq_en_flr_set(self, 1U, i);
+
+				hw_atl2_rpf_vlan_flr_tag_set(self, i + 2, i);
+
+				index = priv->art_base_index +
+					HW_ATL2_RPF_VLAN_USER_INDEX + i;
+				hw_atl2_act_rslvr_table_set(self, index,
+					(i + 2) << HW_ATL2_RPF_TAG_VLAN_OFFSET,
+					HW_ATL2_RPF_TAG_VLAN_MASK, queue);
+			} else {
+				hw_atl2_rpf_vlan_flr_tag_set(self, 1, i);
+			}
+		}
+	}
+
+	return aq_hw_err_from_flags(self);
+}
+
+static int hw_atl2_hw_vlan_ctrl(struct aq_hw_s *self, bool enable)
+{
+	/* set promisc in case of disabing the vlan filter */
+	hw_atl_rpf_vlan_prom_mode_en_set(self, !enable);
+	hw_atl2_hw_new_rx_filter_vlan_promisc(self, !enable);
+
+	return aq_hw_err_from_flags(self);
+}
+
 const struct aq_hw_ops hw_atl2_ops = {
 	.hw_set_mac_address   = hw_atl2_hw_mac_addr_set,
 	.hw_init              = hw_atl2_hw_init,
@@ -218,6 +422,10 @@ const struct aq_hw_ops hw_atl2_ops = {
 
 	.hw_ring_rx_init             = hw_atl2_hw_ring_rx_init,
 	.hw_ring_tx_init             = hw_atl2_hw_ring_tx_init,
+	.hw_packet_filter_set        = hw_atl2_hw_packet_filter_set,
+	.hw_filter_vlan_set          = hw_atl2_hw_vlan_set,
+	.hw_filter_vlan_ctrl         = hw_atl2_hw_vlan_ctrl,
+	.hw_multicast_list_set       = hw_atl2_hw_multicast_list_set,
 	.hw_interrupt_moderation_set = hw_atl2_hw_interrupt_moderation_set,
 	.hw_rss_set                  = hw_atl2_hw_rss_set,
 	.hw_rss_hash_set             = hw_atl2_hw_rss_hash_set,
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_internal.h
index f82058484332..dccc89df2223 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_internal.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_internal.h
@@ -18,6 +18,10 @@
 #define HW_ATL2_TXD_SIZE       (16U)
 #define HW_ATL2_RXD_SIZE       (16U)
 
+#define HW_ATL2_MAC_UC   0U
+#define HW_ATL2_MAC_MIN  1U
+#define HW_ATL2_MAC_MAX  38U
+
 #define HW_ATL2_TC_MAX 1U
 #define HW_ATL2_RSS_MAX 8U
 
@@ -29,6 +33,51 @@
 #define HW_ATL2_MAX_RXD 8184U
 #define HW_ATL2_MAX_TXD 8184U
 
+#define HW_ATL2_FW_SM_ACT_RSLVR  0x3U
+
+#define HW_ATL2_RPF_TAG_UC_OFFSET      0x0
+#define HW_ATL2_RPF_TAG_ALLMC_OFFSET   0x6
+#define HW_ATL2_RPF_TAG_ET_OFFSET      0x7
+#define HW_ATL2_RPF_TAG_VLAN_OFFSET    0xA
+#define HW_ATL2_RPF_TAG_UNTAG_OFFSET   0xE
+#define HW_ATL2_RPF_TAG_L3_V4_OFFSET   0xF
+#define HW_ATL2_RPF_TAG_L3_V6_OFFSET   0x12
+#define HW_ATL2_RPF_TAG_L4_OFFSET      0x15
+#define HW_ATL2_RPF_TAG_L4_FLEX_OFFSET 0x18
+#define HW_ATL2_RPF_TAG_FLEX_OFFSET    0x1B
+#define HW_ATL2_RPF_TAG_PCP_OFFSET     0x1D
+
+#define HW_ATL2_RPF_TAG_UC_MASK    (0x0000003F << HW_ATL2_RPF_TAG_UC_OFFSET)
+#define HW_ATL2_RPF_TAG_ALLMC_MASK (0x00000001 << HW_ATL2_RPF_TAG_ALLMC_OFFSET)
+#define HW_ATL2_RPF_TAG_UNTAG_MASK (0x00000001 << HW_ATL2_RPF_TAG_UNTAG_OFFSET)
+#define HW_ATL2_RPF_TAG_VLAN_MASK  (0x0000000F << HW_ATL2_RPF_TAG_VLAN_OFFSET)
+#define HW_ATL2_RPF_TAG_ET_MASK    (0x00000007 << HW_ATL2_RPF_TAG_ET_OFFSET)
+#define HW_ATL2_RPF_TAG_L3_V4_MASK (0x00000007 << HW_ATL2_RPF_TAG_L3_V4_OFFSET)
+#define HW_ATL2_RPF_TAG_L3_V6_MASK (0x00000007 << HW_ATL2_RPF_TAG_L3_V6_OFFSET)
+#define HW_ATL2_RPF_TAG_L4_MASK    (0x00000007 << HW_ATL2_RPF_TAG_L4_OFFSET)
+#define HW_ATL2_RPF_TAG_PCP_MASK   (0x00000007 << HW_ATL2_RPF_TAG_PCP_OFFSET)
+
+enum HW_ATL2_RPF_ART_INDEX {
+	HW_ATL2_RPF_L2_PROMISC_OFF_INDEX,
+	HW_ATL2_RPF_VLAN_PROMISC_OFF_INDEX,
+	HW_ATL2_RPF_L3L4_USER_INDEX	= 8,
+	HW_ATL2_RPF_ET_PCP_USER_INDEX	= HW_ATL2_RPF_L3L4_USER_INDEX + 16,
+	HW_ATL2_RPF_VLAN_USER_INDEX	= HW_ATL2_RPF_ET_PCP_USER_INDEX + 16,
+	HW_ATL2_RPF_PCP_TO_TC_INDEX	= HW_ATL2_RPF_VLAN_USER_INDEX +
+					  HW_ATL_VLAN_MAX_FILTERS,
+};
+
+#define HW_ATL2_ACTION(ACTION, RSS, INDEX, VALID) \
+	((((ACTION) & 0x3U) << 8) | \
+	(((RSS) & 0x1U) << 7) | \
+	(((INDEX) & 0x3FU) << 2) | \
+	(((VALID) & 0x1U) << 0))
+
+#define HW_ATL2_ACTION_DROP HW_ATL2_ACTION(0, 0, 0, 1)
+#define HW_ATL2_ACTION_DISABLE HW_ATL2_ACTION(0, 0, 0, 0)
+#define HW_ATL2_ACTION_ASSIGN_QUEUE(QUEUE) HW_ATL2_ACTION(1, 0, (QUEUE), 1)
+#define HW_ATL2_ACTION_ASSIGN_TC(TC) HW_ATL2_ACTION(1, 1, (TC), 1)
+
 struct hw_atl2_priv {
 	struct statistics_s last_stats;
 	unsigned int art_base_index;
-- 
cgit v1.2.3-59-g8ed1b


From ec7629e0c2217963eedb886026a71040c9d32aa9 Mon Sep 17 00:00:00 2001
From: Dmitry Bogdanov <dbogdanov@marvell.com>
Date: Thu, 30 Apr 2020 11:04:42 +0300
Subject: net: atlantic: HW bindings for basic A2 init/deinit hw_ops

This patch adds A2 register definitions for basic A2 HW
initialization / deinitialization.

Signed-off-by: Dmitry Bogdanov <dbogdanov@marvell.com>
Co-developed-by: Egor Pomozov <epomozov@marvell.com>
Signed-off-by: Egor Pomozov <epomozov@marvell.com>
Co-developed-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Co-developed-by: Nikita Danilov <ndanilov@marvell.com>
Signed-off-by: Nikita Danilov <ndanilov@marvell.com>
Signed-off-by: Mark Starovoytov <mstarovoitov@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../aquantia/atlantic/hw_atl2/hw_atl2_llh.c        |  70 +++++++++++++
 .../aquantia/atlantic/hw_atl2/hw_atl2_llh.h        |  29 ++++++
 .../atlantic/hw_atl2/hw_atl2_llh_internal.h        | 108 +++++++++++++++++++++
 3 files changed, 207 insertions(+)

diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.c
index 67f46a7bdcda..af176e1e5a18 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.c
@@ -58,6 +58,55 @@ void hw_atl2_rpf_vlan_flr_tag_set(struct aq_hw_s *aq_hw, u32 tag, u32 filter)
 			    tag);
 }
 
+/* TX */
+
+void hw_atl2_tpb_tx_buf_clk_gate_en_set(struct aq_hw_s *aq_hw, u32 clk_gate_en)
+{
+	aq_hw_write_reg_bit(aq_hw, HW_ATL2_TPB_TX_BUF_CLK_GATE_EN_ADR,
+			    HW_ATL2_TPB_TX_BUF_CLK_GATE_EN_MSK,
+			    HW_ATL2_TPB_TX_BUF_CLK_GATE_EN_SHIFT,
+			    clk_gate_en);
+}
+
+void hw_atl2_tps_tx_pkt_shed_tc_data_max_credit_set(struct aq_hw_s *aq_hw,
+						    u32 max_credit,
+						    u32 tc)
+{
+	aq_hw_write_reg_bit(aq_hw, HW_ATL2_TPS_DATA_TCTCREDIT_MAX_ADR(tc),
+			    HW_ATL2_TPS_DATA_TCTCREDIT_MAX_MSK,
+			    HW_ATL2_TPS_DATA_TCTCREDIT_MAX_SHIFT,
+			    max_credit);
+}
+
+void hw_atl2_tps_tx_pkt_shed_tc_data_weight_set(struct aq_hw_s *aq_hw,
+						u32 tx_pkt_shed_tc_data_weight,
+						u32 tc)
+{
+	aq_hw_write_reg_bit(aq_hw, HW_ATL2_TPS_DATA_TCTWEIGHT_ADR(tc),
+			    HW_ATL2_TPS_DATA_TCTWEIGHT_MSK,
+			    HW_ATL2_TPS_DATA_TCTWEIGHT_SHIFT,
+			    tx_pkt_shed_tc_data_weight);
+}
+
+u32 hw_atl2_get_hw_version(struct aq_hw_s *aq_hw)
+{
+	return aq_hw_read_reg(aq_hw, HW_ATL2_FPGA_VER_ADR);
+}
+
+void hw_atl2_init_launchtime(struct aq_hw_s *aq_hw)
+{
+	u32 hw_ver = hw_atl2_get_hw_version(aq_hw);
+
+	aq_hw_write_reg_bit(aq_hw, HW_ATL2_LT_CTRL_ADR,
+			    HW_ATL2_LT_CTRL_CLK_RATIO_MSK,
+			    HW_ATL2_LT_CTRL_CLK_RATIO_SHIFT,
+			    hw_ver  < HW_ATL2_FPGA_VER_U32(1, 0, 0, 0) ?
+			    HW_ATL2_LT_CTRL_CLK_RATIO_FULL_SPEED :
+			    hw_ver >= HW_ATL2_FPGA_VER_U32(1, 0, 85, 2) ?
+			    HW_ATL2_LT_CTRL_CLK_RATIO_HALF_SPEED :
+			    HW_ATL2_LT_CTRL_CLK_RATIO_QUATER_SPEED);
+}
+
 /* set action resolver record */
 void hw_atl2_rpf_act_rslvr_record_set(struct aq_hw_s *aq_hw, u8 location,
 				      u32 tag, u32 mask, u32 action)
@@ -128,3 +177,24 @@ u32 hw_atl2_mif_mcp_finished_read_get(struct aq_hw_s *aq_hw)
 				  HW_ATL2_MIF_MCP_FINISHED_READ_MSK,
 				  HW_ATL2_MIF_MCP_FINISHED_READ_SHIFT);
 }
+
+u32 hw_atl2_mif_mcp_boot_reg_get(struct aq_hw_s *aq_hw)
+{
+	return aq_hw_read_reg(aq_hw, HW_ATL2_MIF_BOOT_REG_ADR);
+}
+
+void hw_atl2_mif_mcp_boot_reg_set(struct aq_hw_s *aq_hw, u32 val)
+{
+	return aq_hw_write_reg(aq_hw, HW_ATL2_MIF_BOOT_REG_ADR, val);
+}
+
+u32 hw_atl2_mif_host_req_int_get(struct aq_hw_s *aq_hw)
+{
+	return aq_hw_read_reg(aq_hw, HW_ATL2_MCP_HOST_REQ_INT_ADR);
+}
+
+void hw_atl2_mif_host_req_int_clr(struct aq_hw_s *aq_hw, u32 val)
+{
+	return aq_hw_write_reg(aq_hw, HW_ATL2_MCP_HOST_REQ_INT_CLR_ADR,
+			       val);
+}
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.h
index bd5b0d5a8084..4acbbceb623f 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.h
@@ -29,6 +29,23 @@ void hw_atl2_new_rpf_rss_redir_set(struct aq_hw_s *aq_hw, u32 tc, u32 index,
 /* Set VLAN filter tag */
 void hw_atl2_rpf_vlan_flr_tag_set(struct aq_hw_s *aq_hw, u32 tag, u32 filter);
 
+/* set tx buffer clock gate enable */
+void hw_atl2_tpb_tx_buf_clk_gate_en_set(struct aq_hw_s *aq_hw, u32 clk_gate_en);
+
+/* set tx packet scheduler tc data max credit */
+void hw_atl2_tps_tx_pkt_shed_tc_data_max_credit_set(struct aq_hw_s *aq_hw,
+						    u32 max_credit,
+						    u32 tc);
+
+/* set tx packet scheduler tc data weight */
+void hw_atl2_tps_tx_pkt_shed_tc_data_weight_set(struct aq_hw_s *aq_hw,
+						u32 tx_pkt_shed_tc_data_weight,
+						u32 tc);
+
+u32 hw_atl2_get_hw_version(struct aq_hw_s *aq_hw);
+
+void hw_atl2_init_launchtime(struct aq_hw_s *aq_hw);
+
 /* set action resolver record */
 void hw_atl2_rpf_act_rslvr_record_set(struct aq_hw_s *aq_hw, u8 location,
 				      u32 tag, u32 mask, u32 action);
@@ -54,4 +71,16 @@ void hw_atl2_mif_host_finished_write_set(struct aq_hw_s *aq_hw, u32 finish);
 /* get mcp finished read shared buffer indication */
 u32 hw_atl2_mif_mcp_finished_read_get(struct aq_hw_s *aq_hw);
 
+/* get mcp boot register */
+u32 hw_atl2_mif_mcp_boot_reg_get(struct aq_hw_s *aq_hw);
+
+/* set mcp boot register */
+void hw_atl2_mif_mcp_boot_reg_set(struct aq_hw_s *aq_hw, u32 val);
+
+/* get host interrupt request */
+u32 hw_atl2_mif_host_req_int_get(struct aq_hw_s *aq_hw);
+
+/* clear host interrupt request */
+void hw_atl2_mif_host_req_int_clr(struct aq_hw_s *aq_hw, u32 val);
+
 #endif /* HW_ATL2_LLH_H */
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh_internal.h
index 886491b6ab73..14b78e090950 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh_internal.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh_internal.h
@@ -105,6 +105,105 @@
 /* default value of bitfield vlan_req_tag0{f}[3:0] */
 #define HW_ATL2_RPF_VL_TAG_DEFAULT 0x0
 
+/* RX rx_q{Q}_tc_map[2:0] Bitfield Definitions
+ * Preprocessor definitions for the bitfield "rx_q{Q}_tc_map[2:0]".
+ * Parameter: Queue {Q} | bit-level stride | range [0, 31]
+ * PORT="pif_rx_q0_tc_map_i[2:0]"
+ */
+
+/* Register address for bitfield rx_q{Q}_tc_map[2:0] */
+#define HW_ATL2_RX_Q_TC_MAP_ADR(queue) \
+	(((queue) < 32) ? 0x00005900 + ((queue) / 8) * 4 : 0)
+/* Lower bit position of bitfield rx_q{Q}_tc_map[2:0] */
+#define HW_ATL2_RX_Q_TC_MAP_SHIFT(queue) \
+	(((queue) < 32) ? ((queue) * 4) % 32 : 0)
+/* Width of bitfield rx_q{Q}_tc_map[2:0] */
+#define HW_ATL2_RX_Q_TC_MAP_WIDTH 3
+/* Default value of bitfield rx_q{Q}_tc_map[2:0] */
+#define HW_ATL2_RX_Q_TC_MAP_DEFAULT 0x0
+
+/* tx tx_buffer_clk_gate_en bitfield definitions
+ * preprocessor definitions for the bitfield "tx_buffer_clk_gate_en".
+ * port="pif_tpb_tx_buffer_clk_gate_en_i"
+ */
+
+/* register address for bitfield tx_buffer_clk_gate_en */
+#define HW_ATL2_TPB_TX_BUF_CLK_GATE_EN_ADR 0x00007900
+/* bitmask for bitfield tx_buffer_clk_gate_en */
+#define HW_ATL2_TPB_TX_BUF_CLK_GATE_EN_MSK 0x00000020
+/* inverted bitmask for bitfield tx_buffer_clk_gate_en */
+#define HW_ATL2_TPB_TX_BUF_CLK_GATE_EN_MSKN 0xffffffdf
+/* lower bit position of bitfield tx_buffer_clk_gate_en */
+#define HW_ATL2_TPB_TX_BUF_CLK_GATE_EN_SHIFT 5
+/* width of bitfield tx_buffer_clk_gate_en */
+#define HW_ATL2_TPB_TX_BUF_CLK_GATE_EN_WIDTH 1
+/* default value of bitfield tx_buffer_clk_gate_en */
+#define HW_ATL2_TPB_TX_BUF_CLK_GATE_EN_DEFAULT 0x0
+
+/* tx data_tc{t}_credit_max[b:0] bitfield definitions
+ * preprocessor definitions for the bitfield "data_tc{t}_credit_max[b:0]".
+ * parameter: tc {t} | stride size 0x4 | range [0, 7]
+ * port="pif_tps_data_tc0_credit_max_i[11:0]"
+ */
+
+/* register address for bitfield data_tc{t}_credit_max[b:0] */
+#define HW_ATL2_TPS_DATA_TCTCREDIT_MAX_ADR(tc) (0x00007110 + (tc) * 0x4)
+/* bitmask for bitfield data_tc{t}_credit_max[b:0] */
+#define HW_ATL2_TPS_DATA_TCTCREDIT_MAX_MSK 0x0fff0000
+/* inverted bitmask for bitfield data_tc{t}_credit_max[b:0] */
+#define HW_ATL2_TPS_DATA_TCTCREDIT_MAX_MSKN 0xf000ffff
+/* lower bit position of bitfield data_tc{t}_credit_max[b:0] */
+#define HW_ATL2_TPS_DATA_TCTCREDIT_MAX_SHIFT 16
+/* width of bitfield data_tc{t}_credit_max[b:0] */
+#define HW_ATL2_TPS_DATA_TCTCREDIT_MAX_WIDTH 12
+/* default value of bitfield data_tc{t}_credit_max[b:0] */
+#define HW_ATL2_TPS_DATA_TCTCREDIT_MAX_DEFAULT 0x0
+
+/* tx data_tc{t}_weight[8:0] bitfield definitions
+ * preprocessor definitions for the bitfield "data_tc{t}_weight[8:0]".
+ * parameter: tc {t} | stride size 0x4 | range [0, 7]
+ * port="pif_tps_data_tc0_weight_i[8:0]"
+ */
+
+/* register address for bitfield data_tc{t}_weight[8:0] */
+#define HW_ATL2_TPS_DATA_TCTWEIGHT_ADR(tc) (0x00007110 + (tc) * 0x4)
+/* bitmask for bitfield data_tc{t}_weight[8:0] */
+#define HW_ATL2_TPS_DATA_TCTWEIGHT_MSK 0x000001ff
+/* inverted bitmask for bitfield data_tc{t}_weight[8:0] */
+#define HW_ATL2_TPS_DATA_TCTWEIGHT_MSKN 0xfffffe00
+/* lower bit position of bitfield data_tc{t}_weight[8:0] */
+#define HW_ATL2_TPS_DATA_TCTWEIGHT_SHIFT 0
+/* width of bitfield data_tc{t}_weight[8:0] */
+#define HW_ATL2_TPS_DATA_TCTWEIGHT_WIDTH 9
+/* default value of bitfield data_tc{t}_weight[8:0] */
+#define HW_ATL2_TPS_DATA_TCTWEIGHT_DEFAULT 0x0
+
+/* Launch time control register */
+#define HW_ATL2_LT_CTRL_ADR 0x00007a1c
+
+#define HW_ATL2_LT_CTRL_AVB_LEN_CMP_TRSHLD_MSK 0xFFFF0000
+#define HW_ATL2_LT_CTRL_AVB_LEN_CMP_TRSHLD_SHIFT 16
+
+#define HW_ATL2_LT_CTRL_CLK_RATIO_MSK 0x0000FF00
+#define HW_ATL2_LT_CTRL_CLK_RATIO_SHIFT 8
+#define HW_ATL2_LT_CTRL_CLK_RATIO_QUATER_SPEED 4
+#define HW_ATL2_LT_CTRL_CLK_RATIO_HALF_SPEED 2
+#define HW_ATL2_LT_CTRL_CLK_RATIO_FULL_SPEED 1
+
+#define HW_ATL2_LT_CTRL_25G_MODE_SUPPORT_MSK 0x00000008
+#define HW_ATL2_LT_CTRL_25G_MODE_SUPPORT_SHIFT 3
+
+#define HW_ATL2_LT_CTRL_LINK_SPEED_MSK 0x00000007
+#define HW_ATL2_LT_CTRL_LINK_SPEED_SHIFT 0
+
+/* FPGA VER register */
+#define HW_ATL2_FPGA_VER_ADR 0x000000f4
+#define HW_ATL2_FPGA_VER_U32(mj, mi, bl, rv) \
+	((((mj) & 0xff) << 24) | \
+	 (((mi) & 0xff) << 16) | \
+	 (((bl) & 0xff) << 8) | \
+	 (((rv) & 0xff) << 0))
+
 /* ahb_mem_addr{f}[31:0] Bitfield Definitions
  * Preprocessor definitions for the bitfield "ahb_mem_addr{f}[31:0]".
  * Parameter: filter {f} | stride size 0x10 | range [0, 127]
@@ -209,4 +308,13 @@
 /* Default value of bitfield pif_mcp_finished_buf_rd_i */
 #define HW_ATL2_MIF_MCP_FINISHED_READ_DEFAULT 0x0
 
+/* Register address for bitfield pif_mcp_boot_reg */
+#define HW_ATL2_MIF_BOOT_REG_ADR 0x00003040u
+
+#define HW_ATL2_MCP_HOST_REQ_INT_READY BIT(0)
+
+#define HW_ATL2_MCP_HOST_REQ_INT_ADR 0x00000F00u
+#define HW_ATL2_MCP_HOST_REQ_INT_SET_ADR 0x00000F04u
+#define HW_ATL2_MCP_HOST_REQ_INT_CLR_ADR 0x00000F08u
+
 #endif /* HW_ATL2_LLH_INTERNAL_H */
-- 
cgit v1.2.3-59-g8ed1b


From c1be0bf092bd292ee617622c116f5981a34cce96 Mon Sep 17 00:00:00 2001
From: Dmitry Bogdanov <dbogdanov@marvell.com>
Date: Thu, 30 Apr 2020 11:04:43 +0300
Subject: net: atlantic: common functions needed for basic A2 init/deinit
 hw_ops

This patch adds common functions (mostly FW-related), which are
needed for basic A2 HW initialization / deinitialization.

Signed-off-by: Dmitry Bogdanov <dbogdanov@marvell.com>
Co-developed-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: Mark Starovoytov <mstarovoitov@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/aquantia/atlantic/Makefile    |   1 +
 .../aquantia/atlantic/hw_atl/hw_atl_utils.c        |   3 +-
 .../aquantia/atlantic/hw_atl/hw_atl_utils.h        |   2 +
 .../aquantia/atlantic/hw_atl2/hw_atl2_utils.c      | 139 +++++++++++++++++++++
 .../aquantia/atlantic/hw_atl2/hw_atl2_utils.h      |   8 ++
 .../aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c   |  12 ++
 6 files changed, 163 insertions(+), 2 deletions(-)
 create mode 100644 drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils.c

diff --git a/drivers/net/ethernet/aquantia/atlantic/Makefile b/drivers/net/ethernet/aquantia/atlantic/Makefile
index 23f0e5b5fcdb..130a105d03f3 100644
--- a/drivers/net/ethernet/aquantia/atlantic/Makefile
+++ b/drivers/net/ethernet/aquantia/atlantic/Makefile
@@ -26,6 +26,7 @@ atlantic-objs := aq_main.o \
 	hw_atl/hw_atl_utils_fw2x.o \
 	hw_atl/hw_atl_llh.o \
 	hw_atl2/hw_atl2.o \
+	hw_atl2/hw_atl2_utils.o \
 	hw_atl2/hw_atl2_utils_fw.o \
 	hw_atl2/hw_atl2_llh.o \
 	macsec/macsec_api.o
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
index 20655a2170cc..1100d40a0302 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
@@ -53,7 +53,6 @@ enum mcp_area {
 	MCP_AREA_SETTINGS = 0x20000000,
 };
 
-static int hw_atl_utils_ver_match(u32 ver_expected, u32 ver_actual);
 static int hw_atl_utils_mpi_set_state(struct aq_hw_s *self,
 				      enum hal_atl_utils_fw_state_e state);
 static u32 hw_atl_utils_get_mpi_mbox_tid(struct aq_hw_s *self);
@@ -434,7 +433,7 @@ int hw_atl_write_fwsettings_dwords(struct aq_hw_s *self, u32 offset, u32 *p,
 					     p, cnt, MCP_AREA_SETTINGS);
 }
 
-static int hw_atl_utils_ver_match(u32 ver_expected, u32 ver_actual)
+int hw_atl_utils_ver_match(u32 ver_expected, u32 ver_actual)
 {
 	const u32 dw_major_mask = 0xff000000U;
 	const u32 dw_minor_mask = 0x00ffffffU;
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
index 5db57ea9a5bd..99c1b6644ec3 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
@@ -634,6 +634,8 @@ int hw_atl_utils_fw_rpc_call(struct aq_hw_s *self, unsigned int rpc_size);
 int hw_atl_utils_fw_rpc_wait(struct aq_hw_s *self,
 			     struct hw_atl_utils_fw_rpc **rpc);
 
+int hw_atl_utils_ver_match(u32 ver_expected, u32 ver_actual);
+
 extern const struct aq_fw_ops aq_fw_1x_ops;
 extern const struct aq_fw_ops aq_fw_2x_ops;
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils.c
new file mode 100644
index 000000000000..85ccc9a011a0
--- /dev/null
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils.c
@@ -0,0 +1,139 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Atlantic Network Driver
+ * Copyright (C) 2020 Marvell International Ltd.
+ */
+
+#include <linux/iopoll.h>
+
+#include "aq_hw_utils.h"
+#include "hw_atl/hw_atl_utils.h"
+#include "hw_atl2_utils.h"
+#include "hw_atl2_llh.h"
+#include "hw_atl2_llh_internal.h"
+
+#define HW_ATL2_FW_VER_1X          0x01000000U
+
+#define AQ_A2_BOOT_STARTED         BIT(0x18)
+#define AQ_A2_CRASH_INIT           BIT(0x1B)
+#define AQ_A2_BOOT_CODE_FAILED     BIT(0x1C)
+#define AQ_A2_FW_INIT_FAILED       BIT(0x1D)
+#define AQ_A2_FW_INIT_COMP_SUCCESS BIT(0x1F)
+
+#define AQ_A2_FW_BOOT_FAILED_MASK (AQ_A2_CRASH_INIT | \
+				   AQ_A2_BOOT_CODE_FAILED | \
+				   AQ_A2_FW_INIT_FAILED)
+#define AQ_A2_FW_BOOT_COMPLETE_MASK (AQ_A2_FW_BOOT_FAILED_MASK | \
+				     AQ_A2_FW_INIT_COMP_SUCCESS)
+
+#define AQ_A2_FW_BOOT_REQ_REBOOT        BIT(0x0)
+#define AQ_A2_FW_BOOT_REQ_HOST_BOOT     BIT(0x8)
+#define AQ_A2_FW_BOOT_REQ_MAC_FAST_BOOT BIT(0xA)
+#define AQ_A2_FW_BOOT_REQ_PHY_FAST_BOOT BIT(0xB)
+
+int hw_atl2_utils_initfw(struct aq_hw_s *self, const struct aq_fw_ops **fw_ops)
+{
+	int err;
+
+	self->fw_ver_actual = hw_atl2_utils_get_fw_version(self);
+
+	if (hw_atl_utils_ver_match(HW_ATL2_FW_VER_1X,
+				   self->fw_ver_actual) == 0) {
+		*fw_ops = &aq_a2_fw_ops;
+	} else {
+		aq_pr_err("Bad FW version detected: %x, but continue\n",
+			  self->fw_ver_actual);
+		*fw_ops = &aq_a2_fw_ops;
+	}
+	aq_pr_trace("Detect ATL2FW %x\n", self->fw_ver_actual);
+	self->aq_fw_ops = *fw_ops;
+	err = self->aq_fw_ops->init(self);
+
+	self->chip_features |= ATL_HW_CHIP_ANTIGUA;
+
+	return err;
+}
+
+static bool hw_atl2_mcp_boot_complete(struct aq_hw_s *self)
+{
+	u32 rbl_status;
+
+	rbl_status = hw_atl2_mif_mcp_boot_reg_get(self);
+	if (rbl_status & AQ_A2_FW_BOOT_COMPLETE_MASK)
+		return true;
+
+	/* Host boot requested */
+	if (hw_atl2_mif_host_req_int_get(self) & HW_ATL2_MCP_HOST_REQ_INT_READY)
+		return true;
+
+	return false;
+}
+
+int hw_atl2_utils_soft_reset(struct aq_hw_s *self)
+{
+	bool rbl_complete = false;
+	u32 rbl_status = 0;
+	u32 rbl_request;
+	int err;
+
+	err = readx_poll_timeout_atomic(hw_atl2_mif_mcp_boot_reg_get, self,
+				rbl_status,
+				((rbl_status & AQ_A2_BOOT_STARTED) &&
+				 (rbl_status != 0xFFFFFFFFu)),
+				10, 500000);
+	if (err)
+		aq_pr_trace("Boot code probably hanged, reboot anyway");
+
+	hw_atl2_mif_host_req_int_clr(self, 0x01);
+	rbl_request = AQ_A2_FW_BOOT_REQ_REBOOT;
+#ifdef AQ_CFG_FAST_START
+	rbl_request |= AQ_A2_FW_BOOT_REQ_MAC_FAST_BOOT;
+#endif
+	hw_atl2_mif_mcp_boot_reg_set(self, rbl_request);
+
+	/* Wait for RBL boot */
+	err = readx_poll_timeout_atomic(hw_atl2_mif_mcp_boot_reg_get, self,
+				rbl_status,
+				((rbl_status & AQ_A2_BOOT_STARTED) &&
+				 (rbl_status != 0xFFFFFFFFu)),
+				10, 200000);
+	if (err) {
+		aq_pr_err("Boot code hanged");
+		goto err_exit;
+	}
+
+	err = readx_poll_timeout_atomic(hw_atl2_mcp_boot_complete, self,
+					rbl_complete,
+					rbl_complete,
+					10, 2000000);
+
+	if (err) {
+		aq_pr_err("FW Restart timed out");
+		goto err_exit;
+	}
+
+	rbl_status = hw_atl2_mif_mcp_boot_reg_get(self);
+
+	if (rbl_status & AQ_A2_FW_BOOT_FAILED_MASK) {
+		err = -EIO;
+		aq_pr_err("FW Restart failed");
+		goto err_exit;
+	}
+
+	if (hw_atl2_mif_host_req_int_get(self) &
+	    HW_ATL2_MCP_HOST_REQ_INT_READY) {
+		err = -EIO;
+		aq_pr_err("No FW detected. Dynamic FW load not implemented");
+		goto err_exit;
+	}
+
+	if (self->aq_fw_ops) {
+		err = self->aq_fw_ops->init(self);
+		if (err) {
+			aq_pr_err("FW Init failed");
+			goto err_exit;
+		}
+	}
+
+err_exit:
+	return err;
+}
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils.h
index 5421fbed3db5..2317dd8459d0 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils.h
@@ -6,6 +6,8 @@
 #ifndef HW_ATL2_UTILS_H
 #define HW_ATL2_UTILS_H
 
+#include "aq_hw.h"
+
 /* F W    A P I */
 
 struct link_options_s {
@@ -590,6 +592,12 @@ struct fw_interface_out {
 #define  AQ_HOST_MODE_LOW_POWER    3U
 #define  AQ_HOST_MODE_SHUTDOWN     4U
 
+int hw_atl2_utils_initfw(struct aq_hw_s *self, const struct aq_fw_ops **fw_ops);
+
+int hw_atl2_utils_soft_reset(struct aq_hw_s *self);
+
+u32 hw_atl2_utils_get_fw_version(struct aq_hw_s *self);
+
 int hw_atl2_utils_get_action_resolve_table_caps(struct aq_hw_s *self,
 						u8 *base_index, u8 *count);
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c
index c3e0e5575810..f5fb4b11f51a 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c
@@ -301,6 +301,18 @@ static int aq_a2_fw_renegotiate(struct aq_hw_s *self)
 	return err;
 }
 
+u32 hw_atl2_utils_get_fw_version(struct aq_hw_s *self)
+{
+	struct version_s version;
+
+	hw_atl2_shared_buffer_read_safe(self, version, &version);
+
+	/* A2 FW version is stored in reverse order */
+	return version.mac.major << 24 |
+	       version.mac.minor << 16 |
+	       version.mac.build;
+}
+
 int hw_atl2_utils_get_action_resolve_table_caps(struct aq_hw_s *self,
 						u8 *base_index, u8 *count)
 {
-- 
cgit v1.2.3-59-g8ed1b


From e54dcf4bba3e2c36b3eb89cd9063753c2a3ef459 Mon Sep 17 00:00:00 2001
From: Igor Russkikh <irusskikh@marvell.com>
Date: Thu, 30 Apr 2020 11:04:44 +0300
Subject: net: atlantic: basic A2 init/deinit hw_ops

This patch adds basic A2 HW initialization / deinitialization.

Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Co-developed-by: Dmitry Bogdanov <dbogdanov@marvell.com>
Signed-off-by: Dmitry Bogdanov <dbogdanov@marvell.com>
Signed-off-by: Mark Starovoytov <mstarovoitov@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/aquantia/atlantic/aq_nic.c    |   3 +-
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c  |  24 +-
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h  |  14 +
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c |   4 +-
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h |   4 +-
 .../ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c   | 344 +++++++++++++++++----
 .../aquantia/atlantic/hw_atl2/hw_atl2_internal.h   |  48 +++
 7 files changed, 362 insertions(+), 79 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index 2dbea5cd7684..f97b073efd8e 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -378,7 +378,8 @@ int aq_nic_init(struct aq_nic_s *self)
 	if (err < 0)
 		goto err_exit;
 
-	if (self->aq_nic_cfg.aq_hw_caps->media_type == AQ_HW_MEDIA_TYPE_TP) {
+	if (ATL_HW_IS_CHIP_FEATURE(self->aq_hw, ATLANTIC) &&
+	    self->aq_nic_cfg.aq_hw_caps->media_type == AQ_HW_MEDIA_TYPE_TP) {
 		self->aq_hw->phy_id = HW_ATL_PHY_ID_MAX;
 		err = aq_phy_init(self->aq_hw);
 	}
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
index 3b42045b9c7d..c46199f14ec4 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
@@ -187,8 +187,8 @@ static int hw_atl_b0_hw_qos_set(struct aq_hw_s *self)
 	return aq_hw_err_from_flags(self);
 }
 
-static int hw_atl_b0_hw_rss_hash_set(struct aq_hw_s *self,
-				     struct aq_rss_parameters *rss_params)
+int hw_atl_b0_hw_rss_hash_set(struct aq_hw_s *self,
+			      struct aq_rss_parameters *rss_params)
 {
 	struct aq_nic_cfg_s *cfg = self->aq_nic_cfg;
 	unsigned int addr = 0U;
@@ -215,8 +215,8 @@ err_exit:
 	return err;
 }
 
-static int hw_atl_b0_hw_rss_set(struct aq_hw_s *self,
-				struct aq_rss_parameters *rss_params)
+int hw_atl_b0_hw_rss_set(struct aq_hw_s *self,
+			 struct aq_rss_parameters *rss_params)
 {
 	u32 num_rss_queues = max(1U, self->aq_nic_cfg->num_rss_queues);
 	u8 *indirection_table =	rss_params->indirection_table;
@@ -314,7 +314,7 @@ static int hw_atl_b0_hw_offload_set(struct aq_hw_s *self,
 static int hw_atl_b0_hw_init_tx_path(struct aq_hw_s *self)
 {
 	/* Tx TC/Queue number config */
-	hw_atl_rpb_tps_tx_tc_mode_set(self, 1U);
+	hw_atl_tpb_tps_tx_tc_mode_set(self, 1U);
 
 	hw_atl_thm_lso_tcp_flag_of_first_pkt_set(self, 0x0FF6U);
 	hw_atl_thm_lso_tcp_flag_of_middle_pkt_set(self, 0x0FF6U);
@@ -495,7 +495,7 @@ static int hw_atl_b0_hw_ring_rx_start(struct aq_hw_s *self,
 	return aq_hw_err_from_flags(self);
 }
 
-static int hw_atl_b0_hw_start(struct aq_hw_s *self)
+int hw_atl_b0_hw_start(struct aq_hw_s *self)
 {
 	hw_atl_tpb_tx_buff_en_set(self, 1);
 	hw_atl_rpb_rx_buff_en_set(self, 1);
@@ -854,14 +854,14 @@ static int hw_atl_b0_hw_ring_rx_receive(struct aq_hw_s *self,
 	return aq_hw_err_from_flags(self);
 }
 
-static int hw_atl_b0_hw_irq_enable(struct aq_hw_s *self, u64 mask)
+int hw_atl_b0_hw_irq_enable(struct aq_hw_s *self, u64 mask)
 {
 	hw_atl_itr_irq_msk_setlsw_set(self, LODWORD(mask));
 
 	return aq_hw_err_from_flags(self);
 }
 
-static int hw_atl_b0_hw_irq_disable(struct aq_hw_s *self, u64 mask)
+int hw_atl_b0_hw_irq_disable(struct aq_hw_s *self, u64 mask)
 {
 	hw_atl_itr_irq_msk_clearlsw_set(self, LODWORD(mask));
 	hw_atl_itr_irq_status_clearlsw_set(self, LODWORD(mask));
@@ -871,7 +871,7 @@ static int hw_atl_b0_hw_irq_disable(struct aq_hw_s *self, u64 mask)
 	return aq_hw_err_from_flags(self);
 }
 
-static int hw_atl_b0_hw_irq_read(struct aq_hw_s *self, u64 *mask)
+int hw_atl_b0_hw_irq_read(struct aq_hw_s *self, u64 *mask)
 {
 	*mask = hw_atl_itr_irq_statuslsw_get(self);
 
@@ -880,8 +880,8 @@ static int hw_atl_b0_hw_irq_read(struct aq_hw_s *self, u64 *mask)
 
 #define IS_FILTER_ENABLED(_F_) ((packet_filter & (_F_)) ? 1U : 0U)
 
-static int hw_atl_b0_hw_packet_filter_set(struct aq_hw_s *self,
-					  unsigned int packet_filter)
+int hw_atl_b0_hw_packet_filter_set(struct aq_hw_s *self,
+				   unsigned int packet_filter)
 {
 	struct aq_nic_cfg_s *cfg = self->aq_nic_cfg;
 	unsigned int i = 0U;
@@ -1089,7 +1089,7 @@ static int hw_atl_b0_hw_ring_rx_stop(struct aq_hw_s *self,
 
 static int hw_atl_b0_tx_tc_mode_get(struct aq_hw_s *self, u32 *tc_mode)
 {
-	*tc_mode = hw_atl_rpb_tps_tx_tc_mode_get(self);
+	*tc_mode = hw_atl_tpb_tps_tx_tc_mode_get(self);
 	return aq_hw_err_from_flags(self);
 }
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h
index 09af1683034b..ea7136b06b32 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h
@@ -33,4 +33,18 @@ extern const struct aq_hw_ops hw_atl_ops_b0;
 
 #define hw_atl_ops_b1 hw_atl_ops_b0
 
+int hw_atl_b0_hw_rss_hash_set(struct aq_hw_s *self,
+			      struct aq_rss_parameters *rss_params);
+int hw_atl_b0_hw_rss_set(struct aq_hw_s *self,
+			 struct aq_rss_parameters *rss_params);
+
+int hw_atl_b0_hw_start(struct aq_hw_s *self);
+
+int hw_atl_b0_hw_irq_enable(struct aq_hw_s *self, u64 mask);
+int hw_atl_b0_hw_irq_disable(struct aq_hw_s *self, u64 mask);
+int hw_atl_b0_hw_irq_read(struct aq_hw_s *self, u64 *mask);
+
+int hw_atl_b0_hw_packet_filter_set(struct aq_hw_s *self,
+				   unsigned int packet_filter);
+
 #endif /* HW_ATL_B0_H */
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c
index 8dd3232d72c4..9e2d01a6aac8 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c
@@ -1318,14 +1318,14 @@ void hw_atl_tpb_tx_buff_en_set(struct aq_hw_s *aq_hw, u32 tx_buff_en)
 			    HW_ATL_TPB_TX_BUF_EN_SHIFT, tx_buff_en);
 }
 
-u32 hw_atl_rpb_tps_tx_tc_mode_get(struct aq_hw_s *aq_hw)
+u32 hw_atl_tpb_tps_tx_tc_mode_get(struct aq_hw_s *aq_hw)
 {
 	return aq_hw_read_reg_bit(aq_hw, HW_ATL_TPB_TX_TC_MODE_ADDR,
 			HW_ATL_TPB_TX_TC_MODE_MSK,
 			HW_ATL_TPB_TX_TC_MODE_SHIFT);
 }
 
-void hw_atl_rpb_tps_tx_tc_mode_set(struct aq_hw_s *aq_hw,
+void hw_atl_tpb_tps_tx_tc_mode_set(struct aq_hw_s *aq_hw,
 				   u32 tx_traf_class_mode)
 {
 	aq_hw_write_reg_bit(aq_hw, HW_ATL_TPB_TX_TC_MODE_ADDR,
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h
index a4699a682973..b88cb84805d5 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h
@@ -616,11 +616,11 @@ void hw_atl_thm_lso_tcp_flag_of_middle_pkt_set(struct aq_hw_s *aq_hw,
 /* tpb */
 
 /* set TX Traffic Class Mode */
-void hw_atl_rpb_tps_tx_tc_mode_set(struct aq_hw_s *aq_hw,
+void hw_atl_tpb_tps_tx_tc_mode_set(struct aq_hw_s *aq_hw,
 				   u32 tx_traf_class_mode);
 
 /* get TX Traffic Class Mode */
-u32 hw_atl_rpb_tps_tx_tc_mode_get(struct aq_hw_s *aq_hw);
+u32 hw_atl_tpb_tps_tx_tc_mode_get(struct aq_hw_s *aq_hw);
 
 /* set tx buffer enable */
 void hw_atl_tpb_tx_buff_en_set(struct aq_hw_s *aq_hw, u32 tx_buff_en);
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
index 7dd5f9a1c505..ad0b22b3c01f 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
@@ -6,11 +6,13 @@
 #include "aq_hw.h"
 #include "aq_hw_utils.h"
 #include "aq_nic.h"
+#include "hw_atl/hw_atl_b0.h"
 #include "hw_atl/hw_atl_utils.h"
 #include "hw_atl/hw_atl_llh.h"
 #include "hw_atl2_utils.h"
 #include "hw_atl2_llh.h"
 #include "hw_atl2_internal.h"
+#include "hw_atl2_llh_internal.h"
 
 static int hw_atl2_act_rslvr_table_set(struct aq_hw_s *self, u8 location,
 				       u32 tag, u32 mask, u32 action);
@@ -70,19 +72,106 @@ static u32 hw_atl2_sem_act_rslvr_get(struct aq_hw_s *self)
 
 static int hw_atl2_hw_reset(struct aq_hw_s *self)
 {
-	return -EOPNOTSUPP;
+	struct hw_atl2_priv *priv = (struct hw_atl2_priv *)self->priv;
+	int err;
+
+	err = hw_atl2_utils_soft_reset(self);
+	if (err)
+		return err;
+
+	memset(priv, 0, sizeof(*priv));
+
+	self->aq_fw_ops->set_state(self, MPI_RESET);
+
+	err = aq_hw_err_from_flags(self);
+
+	return err;
 }
 
-static int hw_atl2_hw_rss_hash_set(struct aq_hw_s *self,
-				   struct aq_rss_parameters *rss_params)
+static int hw_atl2_hw_queue_to_tc_map_set(struct aq_hw_s *self)
 {
-	return -EOPNOTSUPP;
+	if (!hw_atl_rpb_rpf_rx_traf_class_mode_get(self)) {
+		aq_hw_write_reg(self, HW_ATL2_RX_Q_TC_MAP_ADR(0), 0x11110000);
+		aq_hw_write_reg(self, HW_ATL2_RX_Q_TC_MAP_ADR(8), 0x33332222);
+		aq_hw_write_reg(self, HW_ATL2_RX_Q_TC_MAP_ADR(16), 0x55554444);
+		aq_hw_write_reg(self, HW_ATL2_RX_Q_TC_MAP_ADR(24), 0x77776666);
+	} else {
+		aq_hw_write_reg(self, HW_ATL2_RX_Q_TC_MAP_ADR(0), 0x00000000);
+		aq_hw_write_reg(self, HW_ATL2_RX_Q_TC_MAP_ADR(8), 0x11111111);
+		aq_hw_write_reg(self, HW_ATL2_RX_Q_TC_MAP_ADR(16), 0x22222222);
+		aq_hw_write_reg(self, HW_ATL2_RX_Q_TC_MAP_ADR(24), 0x33333333);
+	}
+
+	return aq_hw_err_from_flags(self);
+}
+
+static int hw_atl2_hw_qos_set(struct aq_hw_s *self)
+{
+	struct aq_nic_cfg_s *cfg = self->aq_nic_cfg;
+	u32 tx_buff_size = HW_ATL2_TXBUF_MAX;
+	u32 rx_buff_size = HW_ATL2_RXBUF_MAX;
+	unsigned int prio = 0U;
+	u32 threshold = 0U;
+	u32 tc = 0U;
+
+	/* TPS Descriptor rate init */
+	hw_atl_tps_tx_pkt_shed_desc_rate_curr_time_res_set(self, 0x0U);
+	hw_atl_tps_tx_pkt_shed_desc_rate_lim_set(self, 0xA);
+
+	/* TPS VM init */
+	hw_atl_tps_tx_pkt_shed_desc_vm_arb_mode_set(self, 0U);
+
+	/* TPS TC credits init */
+	hw_atl_tps_tx_pkt_shed_desc_tc_arb_mode_set(self, 0U);
+	hw_atl_tps_tx_pkt_shed_data_arb_mode_set(self, 0U);
+
+	tc = 0;
+
+	/* TX Packet Scheduler Data TC0 */
+	hw_atl2_tps_tx_pkt_shed_tc_data_max_credit_set(self, 0xFFF0, tc);
+	hw_atl2_tps_tx_pkt_shed_tc_data_weight_set(self, 0x640, tc);
+	hw_atl_tps_tx_pkt_shed_desc_tc_max_credit_set(self, 0x50, tc);
+	hw_atl_tps_tx_pkt_shed_desc_tc_weight_set(self, 0x1E, tc);
+
+	/* Tx buf size TC0 */
+	hw_atl_tpb_tx_pkt_buff_size_per_tc_set(self, tx_buff_size, tc);
+
+	threshold = (tx_buff_size * (1024 / 32U) * 66U) / 100U;
+	hw_atl_tpb_tx_buff_hi_threshold_per_tc_set(self, threshold, tc);
+
+	threshold = (tx_buff_size * (1024 / 32U) * 50U) / 100U;
+	hw_atl_tpb_tx_buff_lo_threshold_per_tc_set(self, threshold, tc);
+
+	/* QoS Rx buf size per TC */
+	hw_atl_rpb_rx_pkt_buff_size_per_tc_set(self, rx_buff_size, tc);
+
+	threshold = (rx_buff_size * (1024U / 32U) * 66U) / 100U;
+	hw_atl_rpb_rx_buff_hi_threshold_per_tc_set(self, threshold, tc);
+
+	threshold = (rx_buff_size * (1024U / 32U) * 50U) / 100U;
+	hw_atl_rpb_rx_buff_lo_threshold_per_tc_set(self, threshold, tc);
+
+	/* QoS 802.1p priority -> TC mapping */
+	for (prio = 0; prio < 8; ++prio)
+		hw_atl_rpf_rpb_user_priority_tc_map_set(self, prio,
+							cfg->tcs * prio / 8);
+
+	/* ATL2 Apply legacy ring to TC mapping */
+	hw_atl2_hw_queue_to_tc_map_set(self);
+
+	return aq_hw_err_from_flags(self);
 }
 
 static int hw_atl2_hw_rss_set(struct aq_hw_s *self,
 			      struct aq_rss_parameters *rss_params)
 {
-	return -EOPNOTSUPP;
+	u8 *indirection_table =	rss_params->indirection_table;
+	int i;
+
+	for (i = HW_ATL2_RSS_REDIRECTION_MAX; i--;)
+		hw_atl2_new_rpf_rss_redir_set(self, 0, i, indirection_table[i]);
+
+	return hw_atl_b0_hw_rss_set(self, rss_params);
 }
 
 static int hw_atl2_hw_offload_set(struct aq_hw_s *self,
@@ -91,6 +180,80 @@ static int hw_atl2_hw_offload_set(struct aq_hw_s *self,
 	return -EOPNOTSUPP;
 }
 
+static int hw_atl2_hw_init_tx_path(struct aq_hw_s *self)
+{
+	/* Tx TC/RSS number config */
+	hw_atl_tpb_tps_tx_tc_mode_set(self, 1U);
+
+	hw_atl_thm_lso_tcp_flag_of_first_pkt_set(self, 0x0FF6U);
+	hw_atl_thm_lso_tcp_flag_of_middle_pkt_set(self, 0x0FF6U);
+	hw_atl_thm_lso_tcp_flag_of_last_pkt_set(self, 0x0F7FU);
+
+	/* Tx interrupts */
+	hw_atl_tdm_tx_desc_wr_wb_irq_en_set(self, 1U);
+
+	/* misc */
+	hw_atl_tdm_tx_dca_en_set(self, 0U);
+	hw_atl_tdm_tx_dca_mode_set(self, 0U);
+
+	hw_atl_tpb_tx_path_scp_ins_en_set(self, 1U);
+
+	hw_atl2_tpb_tx_buf_clk_gate_en_set(self, 0U);
+
+	return aq_hw_err_from_flags(self);
+}
+
+static void hw_atl2_hw_init_new_rx_filters(struct aq_hw_s *self)
+{
+	struct hw_atl2_priv *priv = (struct hw_atl2_priv *)self->priv;
+	u8 index;
+
+	hw_atl2_rpf_act_rslvr_section_en_set(self, 0xFFFF);
+	hw_atl2_rpfl2_uc_flr_tag_set(self, HW_ATL2_RPF_TAG_BASE_UC,
+				     HW_ATL2_MAC_UC);
+	hw_atl2_rpfl2_bc_flr_tag_set(self, HW_ATL2_RPF_TAG_BASE_UC);
+
+	index = priv->art_base_index + HW_ATL2_RPF_L2_PROMISC_OFF_INDEX;
+	hw_atl2_act_rslvr_table_set(self, index, 0,
+				    HW_ATL2_RPF_TAG_UC_MASK |
+					HW_ATL2_RPF_TAG_ALLMC_MASK,
+				    HW_ATL2_ACTION_DROP);
+
+	index = priv->art_base_index + HW_ATL2_RPF_VLAN_PROMISC_OFF_INDEX;
+	hw_atl2_act_rslvr_table_set(self, index, 0,
+				    HW_ATL2_RPF_TAG_VLAN_MASK |
+					HW_ATL2_RPF_TAG_UNTAG_MASK,
+				    HW_ATL2_ACTION_DROP);
+
+	index = priv->art_base_index + HW_ATL2_RPF_VLAN_INDEX;
+	hw_atl2_act_rslvr_table_set(self, index, HW_ATL2_RPF_TAG_BASE_VLAN,
+				    HW_ATL2_RPF_TAG_VLAN_MASK,
+				    HW_ATL2_ACTION_ASSIGN_TC(0));
+
+	index = priv->art_base_index + HW_ATL2_RPF_MAC_INDEX;
+	hw_atl2_act_rslvr_table_set(self, index, HW_ATL2_RPF_TAG_BASE_UC,
+				    HW_ATL2_RPF_TAG_UC_MASK,
+				    HW_ATL2_ACTION_ASSIGN_TC(0));
+
+	index = priv->art_base_index + HW_ATL2_RPF_ALLMC_INDEX;
+	hw_atl2_act_rslvr_table_set(self, index, HW_ATL2_RPF_TAG_BASE_ALLMC,
+				    HW_ATL2_RPF_TAG_ALLMC_MASK,
+				    HW_ATL2_ACTION_ASSIGN_TC(0));
+
+	index = priv->art_base_index + HW_ATL2_RPF_UNTAG_INDEX;
+	hw_atl2_act_rslvr_table_set(self, index, HW_ATL2_RPF_TAG_UNTAG_MASK,
+				    HW_ATL2_RPF_TAG_UNTAG_MASK,
+				    HW_ATL2_ACTION_ASSIGN_TC(0));
+
+	index = priv->art_base_index + HW_ATL2_RPF_VLAN_PROMISC_ON_INDEX;
+	hw_atl2_act_rslvr_table_set(self, index, 0, HW_ATL2_RPF_TAG_VLAN_MASK,
+				    HW_ATL2_ACTION_DISABLE);
+
+	index = priv->art_base_index + HW_ATL2_RPF_L2_PROMISC_ON_INDEX;
+	hw_atl2_act_rslvr_table_set(self, index, 0, HW_ATL2_RPF_TAG_UC_MASK,
+				    HW_ATL2_ACTION_DISABLE);
+}
+
 static void hw_atl2_hw_new_rx_filter_vlan_promisc(struct aq_hw_s *self,
 						  bool promisc)
 {
@@ -145,6 +308,57 @@ static int hw_atl2_act_rslvr_table_set(struct aq_hw_s *self, u8 location,
 	return err;
 }
 
+static int hw_atl2_hw_init_rx_path(struct aq_hw_s *self)
+{
+	struct aq_nic_cfg_s *cfg = self->aq_nic_cfg;
+	int i;
+
+	/* Rx TC/RSS number config */
+	hw_atl_rpb_rpf_rx_traf_class_mode_set(self, 1U);
+
+	/* Rx flow control */
+	hw_atl_rpb_rx_flow_ctl_mode_set(self, 1U);
+
+	hw_atl2_rpf_rss_hash_type_set(self, HW_ATL2_RPF_RSS_HASH_TYPE_ALL);
+
+	/* RSS Ring selection */
+	hw_atl_reg_rx_flr_rss_control1set(self, cfg->is_rss ?
+						HW_ATL_RSS_ENABLED_3INDEX_BITS :
+						HW_ATL_RSS_DISABLED);
+
+	/* Multicast filters */
+	for (i = HW_ATL2_MAC_MAX; i--;) {
+		hw_atl_rpfl2_uc_flr_en_set(self, (i == 0U) ? 1U : 0U, i);
+		hw_atl_rpfl2unicast_flr_act_set(self, 1U, i);
+	}
+
+	hw_atl_reg_rx_flr_mcst_flr_msk_set(self, 0x00000000U);
+	hw_atl_reg_rx_flr_mcst_flr_set(self, HW_ATL_MCAST_FLT_ANY_TO_HOST, 0U);
+
+	/* Vlan filters */
+	hw_atl_rpf_vlan_outer_etht_set(self, ETH_P_8021AD);
+	hw_atl_rpf_vlan_inner_etht_set(self, ETH_P_8021Q);
+
+	hw_atl_rpf_vlan_prom_mode_en_set(self, 1);
+
+	/* Always accept untagged packets */
+	hw_atl_rpf_vlan_accept_untagged_packets_set(self, 1U);
+	hw_atl_rpf_vlan_untagged_act_set(self, 1U);
+
+	hw_atl2_hw_init_new_rx_filters(self);
+
+	/* Rx Interrupts */
+	hw_atl_rdm_rx_desc_wr_wb_irq_en_set(self, 1U);
+
+	hw_atl_rpfl2broadcast_flr_act_set(self, 1U);
+	hw_atl_rpfl2broadcast_count_threshold_set(self, 0xFFFFU & (~0U / 256U));
+
+	hw_atl_rdm_rx_dca_en_set(self, 0U);
+	hw_atl_rdm_rx_dca_mode_set(self, 0U);
+
+	return aq_hw_err_from_flags(self);
+}
+
 static int hw_atl2_hw_mac_addr_set(struct aq_hw_s *self, u8 *mac_addr)
 {
 	return -EOPNOTSUPP;
@@ -152,7 +366,15 @@ static int hw_atl2_hw_mac_addr_set(struct aq_hw_s *self, u8 *mac_addr)
 
 static int hw_atl2_hw_init(struct aq_hw_s *self, u8 *mac_addr)
 {
+	static u32 aq_hw_atl2_igcr_table_[4][2] = {
+		[AQ_HW_IRQ_INVALID] = { 0x20000000U, 0x20000000U },
+		[AQ_HW_IRQ_LEGACY]  = { 0x20000080U, 0x20000080U },
+		[AQ_HW_IRQ_MSI]     = { 0x20000021U, 0x20000025U },
+		[AQ_HW_IRQ_MSIX]    = { 0x20000022U, 0x20000026U },
+	};
+
 	struct hw_atl2_priv *priv = (struct hw_atl2_priv *)self->priv;
+	struct aq_nic_cfg_s *aq_nic_cfg = self->aq_nic_cfg;
 	u8 base_index, count;
 	int err;
 
@@ -163,7 +385,49 @@ static int hw_atl2_hw_init(struct aq_hw_s *self, u8 *mac_addr)
 
 	priv->art_base_index = 8 * base_index;
 
-	return -EOPNOTSUPP;
+	hw_atl2_init_launchtime(self);
+
+	hw_atl2_hw_init_tx_path(self);
+	hw_atl2_hw_init_rx_path(self);
+
+	hw_atl2_hw_mac_addr_set(self, mac_addr);
+
+	self->aq_fw_ops->set_link_speed(self, aq_nic_cfg->link_speed_msk);
+	self->aq_fw_ops->set_state(self, MPI_INIT);
+
+	hw_atl2_hw_qos_set(self);
+	hw_atl2_hw_rss_set(self, &aq_nic_cfg->aq_rss);
+	hw_atl_b0_hw_rss_hash_set(self, &aq_nic_cfg->aq_rss);
+
+	hw_atl2_rpf_new_enable_set(self, 1);
+
+	/* Reset link status and read out initial hardware counters */
+	self->aq_link_status.mbps = 0;
+	self->aq_fw_ops->update_stats(self);
+
+	err = aq_hw_err_from_flags(self);
+	if (err < 0)
+		goto err_exit;
+
+	/* Interrupts */
+	hw_atl_reg_irq_glb_ctl_set(self,
+				   aq_hw_atl2_igcr_table_[aq_nic_cfg->irq_type]
+						 [(aq_nic_cfg->vecs > 1U) ?
+						  1 : 0]);
+
+	hw_atl_itr_irq_auto_masklsw_set(self, aq_nic_cfg->aq_hw_caps->irq_mask);
+
+	/* Interrupts */
+	hw_atl_reg_gen_irq_map_set(self,
+				   ((HW_ATL2_ERR_INT << 0x18) |
+				    (1U << 0x1F)) |
+				   ((HW_ATL2_ERR_INT << 0x10) |
+				    (1U << 0x17)), 0U);
+
+	hw_atl2_hw_offload_set(self, aq_nic_cfg);
+
+err_exit:
+	return err;
 }
 
 static int hw_atl2_hw_ring_tx_start(struct aq_hw_s *self,
@@ -178,11 +442,6 @@ static int hw_atl2_hw_ring_rx_start(struct aq_hw_s *self,
 	return -EOPNOTSUPP;
 }
 
-static int hw_atl2_hw_start(struct aq_hw_s *self)
-{
-	return -EOPNOTSUPP;
-}
-
 static int hw_atl2_hw_ring_tx_xmit(struct aq_hw_s *self,
 				   struct aq_ring_s *ring,
 				   unsigned int frags)
@@ -222,58 +481,14 @@ static int hw_atl2_hw_ring_rx_receive(struct aq_hw_s *self,
 	return -EOPNOTSUPP;
 }
 
-static int hw_atl2_hw_irq_enable(struct aq_hw_s *self, u64 mask)
-{
-	return -EOPNOTSUPP;
-}
-
-static int hw_atl2_hw_irq_disable(struct aq_hw_s *self, u64 mask)
-{
-	return -EOPNOTSUPP;
-}
-
-static int hw_atl2_hw_irq_read(struct aq_hw_s *self, u64 *mask)
-{
-	return -EOPNOTSUPP;
-}
-
 #define IS_FILTER_ENABLED(_F_) ((packet_filter & (_F_)) ? 1U : 0U)
 
 static int hw_atl2_hw_packet_filter_set(struct aq_hw_s *self,
 					unsigned int packet_filter)
 {
-	struct aq_nic_cfg_s *cfg = self->aq_nic_cfg;
-	u32 vlan_promisc;
-	u32 l2_promisc;
-	unsigned int i;
-
-	l2_promisc = IS_FILTER_ENABLED(IFF_PROMISC) ||
-		     !!(cfg->priv_flags & BIT(AQ_HW_LOOPBACK_DMA_NET));
-	vlan_promisc = l2_promisc || cfg->is_vlan_force_promisc;
-
-	hw_atl_rpfl2promiscuous_mode_en_set(self, l2_promisc);
-
-	hw_atl_rpf_vlan_prom_mode_en_set(self, vlan_promisc);
-
 	hw_atl2_hw_new_rx_filter_promisc(self, IS_FILTER_ENABLED(IFF_PROMISC));
 
-	hw_atl_rpfl2multicast_flr_en_set(self,
-					 IS_FILTER_ENABLED(IFF_ALLMULTI) &&
-					 IS_FILTER_ENABLED(IFF_MULTICAST), 0);
-
-	hw_atl_rpfl2_accept_all_mc_packets_set(self,
-					      IS_FILTER_ENABLED(IFF_ALLMULTI) &&
-					      IS_FILTER_ENABLED(IFF_MULTICAST));
-
-	hw_atl_rpfl2broadcast_en_set(self, IS_FILTER_ENABLED(IFF_BROADCAST));
-
-	for (i = HW_ATL2_MAC_MIN; i < HW_ATL2_MAC_MAX; ++i)
-		hw_atl_rpfl2_uc_flr_en_set(self,
-					   (cfg->is_mc_list_enabled &&
-					    (i <= cfg->mc_list_count)) ?
-				    1U : 0U, i);
-
-	return aq_hw_err_from_flags(self);
+	return hw_atl_b0_hw_packet_filter_set(self, packet_filter);
 }
 
 #undef IS_FILTER_ENABLED
@@ -326,7 +541,9 @@ static int hw_atl2_hw_interrupt_moderation_set(struct aq_hw_s *self)
 
 static int hw_atl2_hw_stop(struct aq_hw_s *self)
 {
-	return -EOPNOTSUPP;
+	hw_atl_b0_hw_irq_disable(self, HW_ATL2_INT_MASK);
+
+	return 0;
 }
 
 static int hw_atl2_hw_ring_tx_stop(struct aq_hw_s *self, struct aq_ring_s *ring)
@@ -400,10 +617,12 @@ static int hw_atl2_hw_vlan_ctrl(struct aq_hw_s *self, bool enable)
 }
 
 const struct aq_hw_ops hw_atl2_ops = {
+	.hw_soft_reset        = hw_atl2_utils_soft_reset,
+	.hw_prepare           = hw_atl2_utils_initfw,
 	.hw_set_mac_address   = hw_atl2_hw_mac_addr_set,
 	.hw_init              = hw_atl2_hw_init,
 	.hw_reset             = hw_atl2_hw_reset,
-	.hw_start             = hw_atl2_hw_start,
+	.hw_start             = hw_atl_b0_hw_start,
 	.hw_ring_tx_start     = hw_atl2_hw_ring_tx_start,
 	.hw_ring_tx_stop      = hw_atl2_hw_ring_tx_stop,
 	.hw_ring_rx_start     = hw_atl2_hw_ring_rx_start,
@@ -416,9 +635,9 @@ const struct aq_hw_ops hw_atl2_ops = {
 	.hw_ring_rx_receive      = hw_atl2_hw_ring_rx_receive,
 	.hw_ring_rx_fill         = hw_atl2_hw_ring_rx_fill,
 
-	.hw_irq_enable           = hw_atl2_hw_irq_enable,
-	.hw_irq_disable          = hw_atl2_hw_irq_disable,
-	.hw_irq_read             = hw_atl2_hw_irq_read,
+	.hw_irq_enable           = hw_atl_b0_hw_irq_enable,
+	.hw_irq_disable          = hw_atl_b0_hw_irq_disable,
+	.hw_irq_read             = hw_atl_b0_hw_irq_read,
 
 	.hw_ring_rx_init             = hw_atl2_hw_ring_rx_init,
 	.hw_ring_tx_init             = hw_atl2_hw_ring_tx_init,
@@ -428,7 +647,8 @@ const struct aq_hw_ops hw_atl2_ops = {
 	.hw_multicast_list_set       = hw_atl2_hw_multicast_list_set,
 	.hw_interrupt_moderation_set = hw_atl2_hw_interrupt_moderation_set,
 	.hw_rss_set                  = hw_atl2_hw_rss_set,
-	.hw_rss_hash_set             = hw_atl2_hw_rss_hash_set,
+	.hw_rss_hash_set             = hw_atl_b0_hw_rss_hash_set,
 	.hw_get_hw_stats             = hw_atl2_utils_get_hw_stats,
+	.hw_get_fw_version           = hw_atl2_utils_get_fw_version,
 	.hw_set_offload              = hw_atl2_hw_offload_set,
 };
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_internal.h
index dccc89df2223..eb1243256730 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_internal.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_internal.h
@@ -22,6 +22,15 @@
 #define HW_ATL2_MAC_MIN  1U
 #define HW_ATL2_MAC_MAX  38U
 
+/* interrupts */
+#define HW_ATL2_ERR_INT 8U
+#define HW_ATL2_INT_MASK  (0xFFFFFFFFU)
+
+#define HW_ATL2_TXBUF_MAX              128U
+#define HW_ATL2_RXBUF_MAX              192U
+
+#define HW_ATL2_RSS_REDIRECTION_MAX 64U
+
 #define HW_ATL2_TC_MAX 1U
 #define HW_ATL2_RSS_MAX 8U
 
@@ -57,6 +66,11 @@
 #define HW_ATL2_RPF_TAG_L4_MASK    (0x00000007 << HW_ATL2_RPF_TAG_L4_OFFSET)
 #define HW_ATL2_RPF_TAG_PCP_MASK   (0x00000007 << HW_ATL2_RPF_TAG_PCP_OFFSET)
 
+#define HW_ATL2_RPF_TAG_BASE_UC    BIT(HW_ATL2_RPF_TAG_UC_OFFSET)
+#define HW_ATL2_RPF_TAG_BASE_ALLMC BIT(HW_ATL2_RPF_TAG_ALLMC_OFFSET)
+#define HW_ATL2_RPF_TAG_BASE_UNTAG BIT(HW_ATL2_RPF_TAG_UNTAG_OFFSET)
+#define HW_ATL2_RPF_TAG_BASE_VLAN  BIT(HW_ATL2_RPF_TAG_VLAN_OFFSET)
+
 enum HW_ATL2_RPF_ART_INDEX {
 	HW_ATL2_RPF_L2_PROMISC_OFF_INDEX,
 	HW_ATL2_RPF_VLAN_PROMISC_OFF_INDEX,
@@ -65,6 +79,13 @@ enum HW_ATL2_RPF_ART_INDEX {
 	HW_ATL2_RPF_VLAN_USER_INDEX	= HW_ATL2_RPF_ET_PCP_USER_INDEX + 16,
 	HW_ATL2_RPF_PCP_TO_TC_INDEX	= HW_ATL2_RPF_VLAN_USER_INDEX +
 					  HW_ATL_VLAN_MAX_FILTERS,
+	HW_ATL2_RPF_VLAN_INDEX		= HW_ATL2_RPF_PCP_TO_TC_INDEX +
+					  AQ_CFG_TCS_MAX,
+	HW_ATL2_RPF_MAC_INDEX,
+	HW_ATL2_RPF_ALLMC_INDEX,
+	HW_ATL2_RPF_UNTAG_INDEX,
+	HW_ATL2_RPF_VLAN_PROMISC_ON_INDEX,
+	HW_ATL2_RPF_L2_PROMISC_ON_INDEX,
 };
 
 #define HW_ATL2_ACTION(ACTION, RSS, INDEX, VALID) \
@@ -78,6 +99,33 @@ enum HW_ATL2_RPF_ART_INDEX {
 #define HW_ATL2_ACTION_ASSIGN_QUEUE(QUEUE) HW_ATL2_ACTION(1, 0, (QUEUE), 1)
 #define HW_ATL2_ACTION_ASSIGN_TC(TC) HW_ATL2_ACTION(1, 1, (TC), 1)
 
+enum HW_ATL2_RPF_RSS_HASH_TYPE {
+	HW_ATL2_RPF_RSS_HASH_TYPE_NONE = 0,
+	HW_ATL2_RPF_RSS_HASH_TYPE_IPV4 = BIT(0),
+	HW_ATL2_RPF_RSS_HASH_TYPE_IPV4_TCP = BIT(1),
+	HW_ATL2_RPF_RSS_HASH_TYPE_IPV4_UDP = BIT(2),
+	HW_ATL2_RPF_RSS_HASH_TYPE_IPV6 = BIT(3),
+	HW_ATL2_RPF_RSS_HASH_TYPE_IPV6_TCP = BIT(4),
+	HW_ATL2_RPF_RSS_HASH_TYPE_IPV6_UDP = BIT(5),
+	HW_ATL2_RPF_RSS_HASH_TYPE_IPV6_EX = BIT(6),
+	HW_ATL2_RPF_RSS_HASH_TYPE_IPV6_EX_TCP = BIT(7),
+	HW_ATL2_RPF_RSS_HASH_TYPE_IPV6_EX_UDP = BIT(8),
+	HW_ATL2_RPF_RSS_HASH_TYPE_ALL = HW_ATL2_RPF_RSS_HASH_TYPE_IPV4 |
+					HW_ATL2_RPF_RSS_HASH_TYPE_IPV4_TCP |
+					HW_ATL2_RPF_RSS_HASH_TYPE_IPV4_UDP |
+					HW_ATL2_RPF_RSS_HASH_TYPE_IPV6 |
+					HW_ATL2_RPF_RSS_HASH_TYPE_IPV6_TCP |
+					HW_ATL2_RPF_RSS_HASH_TYPE_IPV6_UDP |
+					HW_ATL2_RPF_RSS_HASH_TYPE_IPV6_EX |
+					HW_ATL2_RPF_RSS_HASH_TYPE_IPV6_EX_TCP |
+					HW_ATL2_RPF_RSS_HASH_TYPE_IPV6_EX_UDP,
+};
+
+#define HW_ATL_RSS_DISABLED 0x00000000U
+#define HW_ATL_RSS_ENABLED_3INDEX_BITS 0xB3333333U
+
+#define HW_ATL_MCAST_FLT_ANY_TO_HOST 0x00010FFFU
+
 struct hw_atl2_priv {
 	struct statistics_s last_stats;
 	unsigned int art_base_index;
-- 
cgit v1.2.3-59-g8ed1b


From 43c670c8e48a1817ae4b64683d7d65cad5bb0502 Mon Sep 17 00:00:00 2001
From: Igor Russkikh <irusskikh@marvell.com>
Date: Thu, 30 Apr 2020 11:04:45 +0300
Subject: net: atlantic: A2 ingress / egress hw configuration

Chip generations are mostly compatible register-wise, but there are still
some differences. Therefore we've made some of first generation (A1) code
non-static to re-use it where possible.

Some pieces are A2 specific, in which case we redefine/extend such APIs.

Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: Mark Starovoytov <mstarovoitov@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c  |  54 +++----
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h  |  23 +++
 .../ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c   | 176 ++++++++++++---------
 .../aquantia/atlantic/hw_atl2/hw_atl2_internal.h   |   3 +
 .../aquantia/atlantic/hw_atl2/hw_atl2_llh.c        |   8 +
 .../aquantia/atlantic/hw_atl2/hw_atl2_llh.h        |   5 +
 .../atlantic/hw_atl2/hw_atl2_llh_internal.h        |   8 +
 7 files changed, 172 insertions(+), 105 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
index c46199f14ec4..cbb7a00d61b4 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
@@ -251,9 +251,10 @@ err_exit:
 	return err;
 }
 
-static int hw_atl_b0_hw_offload_set(struct aq_hw_s *self,
-				    struct aq_nic_cfg_s *aq_nic_cfg)
+int hw_atl_b0_hw_offload_set(struct aq_hw_s *self,
+			     struct aq_nic_cfg_s *aq_nic_cfg)
 {
+	u64 rxcsum = !!(aq_nic_cfg->features & NETIF_F_RXCSUM);
 	unsigned int i;
 
 	/* TX checksums offloads*/
@@ -261,10 +262,8 @@ static int hw_atl_b0_hw_offload_set(struct aq_hw_s *self,
 	hw_atl_tpo_tcp_udp_crc_offload_en_set(self, 1);
 
 	/* RX checksums offloads*/
-	hw_atl_rpo_ipv4header_crc_offload_en_set(self, !!(aq_nic_cfg->features &
-						 NETIF_F_RXCSUM));
-	hw_atl_rpo_tcp_udp_crc_offload_en_set(self, !!(aq_nic_cfg->features &
-					      NETIF_F_RXCSUM));
+	hw_atl_rpo_ipv4header_crc_offload_en_set(self, rxcsum);
+	hw_atl_rpo_tcp_udp_crc_offload_en_set(self, rxcsum);
 
 	/* LSO offloads*/
 	hw_atl_tdm_large_send_offload_en_set(self, 0xFFFFFFFFU);
@@ -272,7 +271,7 @@ static int hw_atl_b0_hw_offload_set(struct aq_hw_s *self,
 	/* Outer VLAN tag offload */
 	hw_atl_rpo_outer_vlan_tag_mode_set(self, 1U);
 
-/* LRO offloads */
+	/* LRO offloads */
 	{
 		unsigned int val = (8U < HW_ATL_B0_LRO_RXD_MAX) ? 0x3U :
 			((4U < HW_ATL_B0_LRO_RXD_MAX) ? 0x2U :
@@ -384,7 +383,7 @@ static int hw_atl_b0_hw_init_rx_path(struct aq_hw_s *self)
 	return aq_hw_err_from_flags(self);
 }
 
-static int hw_atl_b0_hw_mac_addr_set(struct aq_hw_s *self, u8 *mac_addr)
+int hw_atl_b0_hw_mac_addr_set(struct aq_hw_s *self, u8 *mac_addr)
 {
 	unsigned int h = 0U;
 	unsigned int l = 0U;
@@ -479,16 +478,14 @@ err_exit:
 	return err;
 }
 
-static int hw_atl_b0_hw_ring_tx_start(struct aq_hw_s *self,
-				      struct aq_ring_s *ring)
+int hw_atl_b0_hw_ring_tx_start(struct aq_hw_s *self, struct aq_ring_s *ring)
 {
 	hw_atl_tdm_tx_desc_en_set(self, 1, ring->idx);
 
 	return aq_hw_err_from_flags(self);
 }
 
-static int hw_atl_b0_hw_ring_rx_start(struct aq_hw_s *self,
-				      struct aq_ring_s *ring)
+int hw_atl_b0_hw_ring_rx_start(struct aq_hw_s *self, struct aq_ring_s *ring)
 {
 	hw_atl_rdm_rx_desc_en_set(self, 1, ring->idx);
 
@@ -511,9 +508,8 @@ static int hw_atl_b0_hw_tx_ring_tail_update(struct aq_hw_s *self,
 	return 0;
 }
 
-static int hw_atl_b0_hw_ring_tx_xmit(struct aq_hw_s *self,
-				     struct aq_ring_s *ring,
-				     unsigned int frags)
+int hw_atl_b0_hw_ring_tx_xmit(struct aq_hw_s *self, struct aq_ring_s *ring,
+			      unsigned int frags)
 {
 	struct aq_ring_buff_s *buff = NULL;
 	struct hw_atl_txd_s *txd = NULL;
@@ -600,9 +596,8 @@ static int hw_atl_b0_hw_ring_tx_xmit(struct aq_hw_s *self,
 	return aq_hw_err_from_flags(self);
 }
 
-static int hw_atl_b0_hw_ring_rx_init(struct aq_hw_s *self,
-				     struct aq_ring_s *aq_ring,
-				     struct aq_ring_param_s *aq_ring_param)
+int hw_atl_b0_hw_ring_rx_init(struct aq_hw_s *self, struct aq_ring_s *aq_ring,
+			      struct aq_ring_param_s *aq_ring_param)
 {
 	u32 dma_desc_addr_msw = (u32)(((u64)aq_ring->dx_ring_pa) >> 32);
 	u32 vlan_rx_stripping = self->aq_nic_cfg->is_vlan_rx_strip;
@@ -643,9 +638,8 @@ static int hw_atl_b0_hw_ring_rx_init(struct aq_hw_s *self,
 	return aq_hw_err_from_flags(self);
 }
 
-static int hw_atl_b0_hw_ring_tx_init(struct aq_hw_s *self,
-				     struct aq_ring_s *aq_ring,
-				     struct aq_ring_param_s *aq_ring_param)
+int hw_atl_b0_hw_ring_tx_init(struct aq_hw_s *self, struct aq_ring_s *aq_ring,
+			      struct aq_ring_param_s *aq_ring_param)
 {
 	u32 dma_desc_msw_addr = (u32)(((u64)aq_ring->dx_ring_pa) >> 32);
 	u32 dma_desc_lsw_addr = (u32)aq_ring->dx_ring_pa;
@@ -673,9 +667,8 @@ static int hw_atl_b0_hw_ring_tx_init(struct aq_hw_s *self,
 	return aq_hw_err_from_flags(self);
 }
 
-static int hw_atl_b0_hw_ring_rx_fill(struct aq_hw_s *self,
-				     struct aq_ring_s *ring,
-				     unsigned int sw_tail_old)
+int hw_atl_b0_hw_ring_rx_fill(struct aq_hw_s *self, struct aq_ring_s *ring,
+			      unsigned int sw_tail_old)
 {
 	for (; sw_tail_old != ring->sw_tail;
 		sw_tail_old = aq_ring_next_dx(ring, sw_tail_old)) {
@@ -734,8 +727,8 @@ static int hw_atl_b0_hw_ring_hwts_rx_receive(struct aq_hw_s *self,
 	return aq_hw_err_from_flags(self);
 }
 
-static int hw_atl_b0_hw_ring_tx_head_update(struct aq_hw_s *self,
-					    struct aq_ring_s *ring)
+int hw_atl_b0_hw_ring_tx_head_update(struct aq_hw_s *self,
+				     struct aq_ring_s *ring)
 {
 	unsigned int hw_head_;
 	int err = 0;
@@ -753,8 +746,7 @@ err_exit:
 	return err;
 }
 
-static int hw_atl_b0_hw_ring_rx_receive(struct aq_hw_s *self,
-					struct aq_ring_s *ring)
+int hw_atl_b0_hw_ring_rx_receive(struct aq_hw_s *self, struct aq_ring_s *ring)
 {
 	for (; ring->hw_head != ring->sw_tail;
 		ring->hw_head = aq_ring_next_dx(ring, ring->hw_head)) {
@@ -1071,16 +1063,14 @@ err_exit:
 	return err;
 }
 
-static int hw_atl_b0_hw_ring_tx_stop(struct aq_hw_s *self,
-				     struct aq_ring_s *ring)
+int hw_atl_b0_hw_ring_tx_stop(struct aq_hw_s *self, struct aq_ring_s *ring)
 {
 	hw_atl_tdm_tx_desc_en_set(self, 0U, ring->idx);
 
 	return aq_hw_err_from_flags(self);
 }
 
-static int hw_atl_b0_hw_ring_rx_stop(struct aq_hw_s *self,
-				     struct aq_ring_s *ring)
+int hw_atl_b0_hw_ring_rx_stop(struct aq_hw_s *self, struct aq_ring_s *ring)
 {
 	hw_atl_rdm_rx_desc_en_set(self, 0U, ring->idx);
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h
index ea7136b06b32..f5091d79ab43 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h
@@ -37,6 +37,29 @@ int hw_atl_b0_hw_rss_hash_set(struct aq_hw_s *self,
 			      struct aq_rss_parameters *rss_params);
 int hw_atl_b0_hw_rss_set(struct aq_hw_s *self,
 			 struct aq_rss_parameters *rss_params);
+int hw_atl_b0_hw_offload_set(struct aq_hw_s *self,
+			     struct aq_nic_cfg_s *aq_nic_cfg);
+
+int hw_atl_b0_hw_ring_tx_start(struct aq_hw_s *self, struct aq_ring_s *ring);
+int hw_atl_b0_hw_ring_rx_start(struct aq_hw_s *self, struct aq_ring_s *ring);
+
+int hw_atl_b0_hw_ring_rx_init(struct aq_hw_s *self, struct aq_ring_s *aq_ring,
+			      struct aq_ring_param_s *aq_ring_param);
+int hw_atl_b0_hw_ring_rx_fill(struct aq_hw_s *self, struct aq_ring_s *ring,
+			      unsigned int sw_tail_old);
+int hw_atl_b0_hw_ring_rx_receive(struct aq_hw_s *self, struct aq_ring_s *ring);
+
+int hw_atl_b0_hw_ring_tx_init(struct aq_hw_s *self, struct aq_ring_s *aq_ring,
+			      struct aq_ring_param_s *aq_ring_param);
+int hw_atl_b0_hw_ring_tx_xmit(struct aq_hw_s *self, struct aq_ring_s *ring,
+			      unsigned int frags);
+int hw_atl_b0_hw_ring_tx_head_update(struct aq_hw_s *self,
+				     struct aq_ring_s *ring);
+
+int hw_atl_b0_hw_ring_tx_stop(struct aq_hw_s *self, struct aq_ring_s *ring);
+int hw_atl_b0_hw_ring_rx_stop(struct aq_hw_s *self, struct aq_ring_s *ring);
+
+int hw_atl_b0_hw_mac_addr_set(struct aq_hw_s *self, u8 *mac_addr);
 
 int hw_atl_b0_hw_start(struct aq_hw_s *self);
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
index ad0b22b3c01f..04d194f754fa 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
@@ -5,6 +5,7 @@
 
 #include "aq_hw.h"
 #include "aq_hw_utils.h"
+#include "aq_ring.h"
 #include "aq_nic.h"
 #include "hw_atl/hw_atl_b0.h"
 #include "hw_atl/hw_atl_utils.h"
@@ -174,12 +175,6 @@ static int hw_atl2_hw_rss_set(struct aq_hw_s *self,
 	return hw_atl_b0_hw_rss_set(self, rss_params);
 }
 
-static int hw_atl2_hw_offload_set(struct aq_hw_s *self,
-				  struct aq_nic_cfg_s *aq_nic_cfg)
-{
-	return -EOPNOTSUPP;
-}
-
 static int hw_atl2_hw_init_tx_path(struct aq_hw_s *self)
 {
 	/* Tx TC/RSS number config */
@@ -359,11 +354,6 @@ static int hw_atl2_hw_init_rx_path(struct aq_hw_s *self)
 	return aq_hw_err_from_flags(self);
 }
 
-static int hw_atl2_hw_mac_addr_set(struct aq_hw_s *self, u8 *mac_addr)
-{
-	return -EOPNOTSUPP;
-}
-
 static int hw_atl2_hw_init(struct aq_hw_s *self, u8 *mac_addr)
 {
 	static u32 aq_hw_atl2_igcr_table_[4][2] = {
@@ -390,7 +380,7 @@ static int hw_atl2_hw_init(struct aq_hw_s *self, u8 *mac_addr)
 	hw_atl2_hw_init_tx_path(self);
 	hw_atl2_hw_init_rx_path(self);
 
-	hw_atl2_hw_mac_addr_set(self, mac_addr);
+	hw_atl_b0_hw_mac_addr_set(self, mac_addr);
 
 	self->aq_fw_ops->set_link_speed(self, aq_nic_cfg->link_speed_msk);
 	self->aq_fw_ops->set_state(self, MPI_INIT);
@@ -424,61 +414,24 @@ static int hw_atl2_hw_init(struct aq_hw_s *self, u8 *mac_addr)
 				   ((HW_ATL2_ERR_INT << 0x10) |
 				    (1U << 0x17)), 0U);
 
-	hw_atl2_hw_offload_set(self, aq_nic_cfg);
+	hw_atl_b0_hw_offload_set(self, aq_nic_cfg);
 
 err_exit:
 	return err;
 }
 
-static int hw_atl2_hw_ring_tx_start(struct aq_hw_s *self,
-				    struct aq_ring_s *ring)
-{
-	return -EOPNOTSUPP;
-}
-
-static int hw_atl2_hw_ring_rx_start(struct aq_hw_s *self,
-				    struct aq_ring_s *ring)
-{
-	return -EOPNOTSUPP;
-}
-
-static int hw_atl2_hw_ring_tx_xmit(struct aq_hw_s *self,
-				   struct aq_ring_s *ring,
-				   unsigned int frags)
-{
-	return -EOPNOTSUPP;
-}
-
 static int hw_atl2_hw_ring_rx_init(struct aq_hw_s *self,
 				   struct aq_ring_s *aq_ring,
 				   struct aq_ring_param_s *aq_ring_param)
 {
-	return -EOPNOTSUPP;
+	return hw_atl_b0_hw_ring_rx_init(self, aq_ring, aq_ring_param);
 }
 
 static int hw_atl2_hw_ring_tx_init(struct aq_hw_s *self,
 				   struct aq_ring_s *aq_ring,
 				   struct aq_ring_param_s *aq_ring_param)
 {
-	return -EOPNOTSUPP;
-}
-
-static int hw_atl2_hw_ring_rx_fill(struct aq_hw_s *self, struct aq_ring_s *ring,
-				   unsigned int sw_tail_old)
-{
-	return -EOPNOTSUPP;
-}
-
-static int hw_atl2_hw_ring_tx_head_update(struct aq_hw_s *self,
-					  struct aq_ring_s *ring)
-{
-	return -EOPNOTSUPP;
-}
-
-static int hw_atl2_hw_ring_rx_receive(struct aq_hw_s *self,
-				      struct aq_ring_s *ring)
-{
-	return -EOPNOTSUPP;
+	return hw_atl_b0_hw_ring_tx_init(self, aq_ring, aq_ring_param);
 }
 
 #define IS_FILTER_ENABLED(_F_) ((packet_filter & (_F_)) ? 1U : 0U)
@@ -536,7 +489,94 @@ err_exit:
 
 static int hw_atl2_hw_interrupt_moderation_set(struct aq_hw_s *self)
 {
-	return -EOPNOTSUPP;
+	unsigned int i = 0U;
+	u32 itr_tx = 2U;
+	u32 itr_rx = 2U;
+
+	switch (self->aq_nic_cfg->itr) {
+	case  AQ_CFG_INTERRUPT_MODERATION_ON:
+	case  AQ_CFG_INTERRUPT_MODERATION_AUTO:
+		hw_atl_tdm_tx_desc_wr_wb_irq_en_set(self, 0U);
+		hw_atl_tdm_tdm_intr_moder_en_set(self, 1U);
+		hw_atl_rdm_rx_desc_wr_wb_irq_en_set(self, 0U);
+		hw_atl_rdm_rdm_intr_moder_en_set(self, 1U);
+
+		if (self->aq_nic_cfg->itr == AQ_CFG_INTERRUPT_MODERATION_ON) {
+			/* HW timers are in 2us units */
+			int tx_max_timer = self->aq_nic_cfg->tx_itr / 2;
+			int tx_min_timer = tx_max_timer / 2;
+
+			int rx_max_timer = self->aq_nic_cfg->rx_itr / 2;
+			int rx_min_timer = rx_max_timer / 2;
+
+			tx_max_timer = min(HW_ATL2_INTR_MODER_MAX,
+					   tx_max_timer);
+			tx_min_timer = min(HW_ATL2_INTR_MODER_MIN,
+					   tx_min_timer);
+			rx_max_timer = min(HW_ATL2_INTR_MODER_MAX,
+					   rx_max_timer);
+			rx_min_timer = min(HW_ATL2_INTR_MODER_MIN,
+					   rx_min_timer);
+
+			itr_tx |= tx_min_timer << 0x8U;
+			itr_tx |= tx_max_timer << 0x10U;
+			itr_rx |= rx_min_timer << 0x8U;
+			itr_rx |= rx_max_timer << 0x10U;
+		} else {
+			static unsigned int hw_atl2_timers_table_tx_[][2] = {
+				{0xfU, 0xffU}, /* 10Gbit */
+				{0xfU, 0x1ffU}, /* 5Gbit */
+				{0xfU, 0x1ffU}, /* 5Gbit 5GS */
+				{0xfU, 0x1ffU}, /* 2.5Gbit */
+				{0xfU, 0x1ffU}, /* 1Gbit */
+				{0xfU, 0x1ffU}, /* 100Mbit */
+			};
+			static unsigned int hw_atl2_timers_table_rx_[][2] = {
+				{0x6U, 0x38U},/* 10Gbit */
+				{0xCU, 0x70U},/* 5Gbit */
+				{0xCU, 0x70U},/* 5Gbit 5GS */
+				{0x18U, 0xE0U},/* 2.5Gbit */
+				{0x30U, 0x80U},/* 1Gbit */
+				{0x4U, 0x50U},/* 100Mbit */
+			};
+			unsigned int mbps = self->aq_link_status.mbps;
+			unsigned int speed_index;
+
+			speed_index = hw_atl_utils_mbps_2_speed_index(mbps);
+
+			/* Update user visible ITR settings */
+			self->aq_nic_cfg->tx_itr = hw_atl2_timers_table_tx_
+							[speed_index][1] * 2;
+			self->aq_nic_cfg->rx_itr = hw_atl2_timers_table_rx_
+							[speed_index][1] * 2;
+
+			itr_tx |= hw_atl2_timers_table_tx_
+						[speed_index][0] << 0x8U;
+			itr_tx |= hw_atl2_timers_table_tx_
+						[speed_index][1] << 0x10U;
+
+			itr_rx |= hw_atl2_timers_table_rx_
+						[speed_index][0] << 0x8U;
+			itr_rx |= hw_atl2_timers_table_rx_
+						[speed_index][1] << 0x10U;
+		}
+		break;
+	case AQ_CFG_INTERRUPT_MODERATION_OFF:
+		hw_atl_tdm_tx_desc_wr_wb_irq_en_set(self, 1U);
+		hw_atl_tdm_tdm_intr_moder_en_set(self, 0U);
+		hw_atl_rdm_rx_desc_wr_wb_irq_en_set(self, 1U);
+		hw_atl_rdm_rdm_intr_moder_en_set(self, 0U);
+		itr_tx = 0U;
+		itr_rx = 0U;
+		break;
+	}
+
+	for (i = HW_ATL2_RINGS_MAX; i--;) {
+		hw_atl2_reg_tx_intr_moder_ctrl_set(self, itr_tx, i);
+		hw_atl_reg_rx_intr_moder_ctrl_set(self, itr_rx, i);
+	}
+
+	return aq_hw_err_from_flags(self);
 }
 
 static int hw_atl2_hw_stop(struct aq_hw_s *self)
@@ -546,16 +586,6 @@ static int hw_atl2_hw_stop(struct aq_hw_s *self)
 	return 0;
 }
 
-static int hw_atl2_hw_ring_tx_stop(struct aq_hw_s *self, struct aq_ring_s *ring)
-{
-	return -EOPNOTSUPP;
-}
-
-static int hw_atl2_hw_ring_rx_stop(struct aq_hw_s *self, struct aq_ring_s *ring)
-{
-	return -EOPNOTSUPP;
-}
-
 static struct aq_stats_s *hw_atl2_utils_get_hw_stats(struct aq_hw_s *self)
 {
 	return &self->curr_stats;
@@ -619,21 +649,21 @@ static int hw_atl2_hw_vlan_ctrl(struct aq_hw_s *self, bool enable)
 const struct aq_hw_ops hw_atl2_ops = {
 	.hw_soft_reset        = hw_atl2_utils_soft_reset,
 	.hw_prepare           = hw_atl2_utils_initfw,
-	.hw_set_mac_address   = hw_atl2_hw_mac_addr_set,
+	.hw_set_mac_address   = hw_atl_b0_hw_mac_addr_set,
 	.hw_init              = hw_atl2_hw_init,
 	.hw_reset             = hw_atl2_hw_reset,
 	.hw_start             = hw_atl_b0_hw_start,
-	.hw_ring_tx_start     = hw_atl2_hw_ring_tx_start,
-	.hw_ring_tx_stop      = hw_atl2_hw_ring_tx_stop,
-	.hw_ring_rx_start     = hw_atl2_hw_ring_rx_start,
-	.hw_ring_rx_stop      = hw_atl2_hw_ring_rx_stop,
+	.hw_ring_tx_start     = hw_atl_b0_hw_ring_tx_start,
+	.hw_ring_tx_stop      = hw_atl_b0_hw_ring_tx_stop,
+	.hw_ring_rx_start     = hw_atl_b0_hw_ring_rx_start,
+	.hw_ring_rx_stop      = hw_atl_b0_hw_ring_rx_stop,
 	.hw_stop              = hw_atl2_hw_stop,
 
-	.hw_ring_tx_xmit         = hw_atl2_hw_ring_tx_xmit,
-	.hw_ring_tx_head_update  = hw_atl2_hw_ring_tx_head_update,
+	.hw_ring_tx_xmit         = hw_atl_b0_hw_ring_tx_xmit,
+	.hw_ring_tx_head_update  = hw_atl_b0_hw_ring_tx_head_update,
 
-	.hw_ring_rx_receive      = hw_atl2_hw_ring_rx_receive,
-	.hw_ring_rx_fill         = hw_atl2_hw_ring_rx_fill,
+	.hw_ring_rx_receive      = hw_atl_b0_hw_ring_rx_receive,
+	.hw_ring_rx_fill         = hw_atl_b0_hw_ring_rx_fill,
 
 	.hw_irq_enable           = hw_atl_b0_hw_irq_enable,
 	.hw_irq_disable          = hw_atl_b0_hw_irq_disable,
@@ -650,5 +680,5 @@ const struct aq_hw_ops hw_atl2_ops = {
 	.hw_rss_hash_set             = hw_atl_b0_hw_rss_hash_set,
 	.hw_get_hw_stats             = hw_atl2_utils_get_hw_stats,
 	.hw_get_fw_version           = hw_atl2_utils_get_fw_version,
-	.hw_set_offload              = hw_atl2_hw_offload_set,
+	.hw_set_offload              = hw_atl_b0_hw_offload_set,
 };
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_internal.h
index eb1243256730..e66b3583bfe9 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_internal.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_internal.h
@@ -34,6 +34,9 @@
 #define HW_ATL2_TC_MAX 1U
 #define HW_ATL2_RSS_MAX 8U
 
+#define HW_ATL2_INTR_MODER_MAX  0x1FF
+#define HW_ATL2_INTR_MODER_MIN  0xFF
+
 #define HW_ATL2_MIN_RXD \
 	(ALIGN(AQ_CFG_SKB_FRAGS_MAX + 1U, AQ_HW_RXD_MULTIPLE))
 #define HW_ATL2_MIN_TXD \
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.c
index af176e1e5a18..e779d70fde66 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.c
@@ -68,6 +68,14 @@ void hw_atl2_tpb_tx_buf_clk_gate_en_set(struct aq_hw_s *aq_hw, u32 clk_gate_en)
 			    clk_gate_en);
 }
 
+void hw_atl2_reg_tx_intr_moder_ctrl_set(struct aq_hw_s *aq_hw,
+					u32 tx_intr_moderation_ctl,
+					u32 queue)
+{
+	aq_hw_write_reg(aq_hw, HW_ATL2_TX_INTR_MODERATION_CTL_ADR(queue),
+			tx_intr_moderation_ctl);
+}
+
 void hw_atl2_tps_tx_pkt_shed_tc_data_max_credit_set(struct aq_hw_s *aq_hw,
 						    u32 max_credit,
 						    u32 tc)
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.h
index 4acbbceb623f..8c6d78a64d42 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.h
@@ -10,6 +10,11 @@
 
 struct aq_hw_s;
 
+/* Set TX Interrupt Moderation Control Register */
+void hw_atl2_reg_tx_intr_moder_ctrl_set(struct aq_hw_s *aq_hw,
+					u32 tx_intr_moderation_ctl,
+					u32 queue);
+
 /** Set RSS HASH type */
 void hw_atl2_rpf_rss_hash_type_set(struct aq_hw_s *aq_hw, u32 rss_hash_type);
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh_internal.h
index 14b78e090950..cde9e9d2836d 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh_internal.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh_internal.h
@@ -178,6 +178,14 @@
 /* default value of bitfield data_tc{t}_weight[8:0] */
 #define HW_ATL2_TPS_DATA_TCTWEIGHT_DEFAULT 0x0
 
+/* tx interrupt moderation control register definitions
+ * Preprocessor definitions for TX Interrupt Moderation Control Register
+ * Base Address: 0x00007c28
+ * Parameter: queue {Q} | stride size 0x4 | range [0, 31]
+ */
+
+#define HW_ATL2_TX_INTR_MODERATION_CTL_ADR(queue) (0x00007c28u + (queue) * 0x40)
+
 /* Launch time control register */
 #define HW_ATL2_LT_CTRL_ADR 0x00007a1c
 
-- 
cgit v1.2.3-59-g8ed1b


From bf604bc90b30a7c9d024e3be9b3ad3b0dc3f51c4 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Thu, 30 Apr 2020 09:21:34 +0100
Subject: net: dsa: mv88e6xxx: use generic clause 45 definitions

The private MV88E6390_PCS_CONTROL_1 definitions in serdes.h reflects
the IEEE 802.3 standard PCS control register 1 definitions, only
offset by 0x1000 in the PHYXS register space.  Rather than inventing
our own, use those that already exist, and name the register
MV88E6390_10G_CTRL1.

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/serdes.c | 12 ++++++------
 drivers/net/dsa/mv88e6xxx/serdes.h |  6 +-----
 2 files changed, 7 insertions(+), 11 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/serdes.c b/drivers/net/dsa/mv88e6xxx/serdes.c
index 2098f19b534d..33d9923cf7c5 100644
--- a/drivers/net/dsa/mv88e6xxx/serdes.c
+++ b/drivers/net/dsa/mv88e6xxx/serdes.c
@@ -534,21 +534,21 @@ static int mv88e6390_serdes_power_10g(struct mv88e6xxx_chip *chip, u8 lane,
 	int err;
 
 	err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
-				    MV88E6390_PCS_CONTROL_1, &val);
+				    MV88E6390_10G_CTRL1, &val);
 
 	if (err)
 		return err;
 
 	if (up)
-		new_val = val & ~(MV88E6390_PCS_CONTROL_1_RESET |
-				  MV88E6390_PCS_CONTROL_1_LOOPBACK |
-				  MV88E6390_PCS_CONTROL_1_PDOWN);
+		new_val = val & ~(MDIO_CTRL1_RESET |
+				  MDIO_PCS_CTRL1_LOOPBACK |
+				  MDIO_CTRL1_LPOWER);
 	else
-		new_val = val | MV88E6390_PCS_CONTROL_1_PDOWN;
+		new_val = val | MDIO_CTRL1_LPOWER;
 
 	if (val != new_val)
 		err = mv88e6390_serdes_write(chip, lane, MDIO_MMD_PHYXS,
-					     MV88E6390_PCS_CONTROL_1, new_val);
+					     MV88E6390_10G_CTRL1, new_val);
 
 	return err;
 }
diff --git a/drivers/net/dsa/mv88e6xxx/serdes.h b/drivers/net/dsa/mv88e6xxx/serdes.h
index 7990cadba4c2..71e3c3d0a24e 100644
--- a/drivers/net/dsa/mv88e6xxx/serdes.h
+++ b/drivers/net/dsa/mv88e6xxx/serdes.h
@@ -40,11 +40,7 @@
 #define MV88E6390_PORT10_LANE3		0x17
 
 /* 10GBASE-R and 10GBASE-X4/X2 */
-#define MV88E6390_PCS_CONTROL_1		0x1000
-#define MV88E6390_PCS_CONTROL_1_RESET		BIT(15)
-#define MV88E6390_PCS_CONTROL_1_LOOPBACK	BIT(14)
-#define MV88E6390_PCS_CONTROL_1_SPEED		BIT(13)
-#define MV88E6390_PCS_CONTROL_1_PDOWN		BIT(11)
+#define MV88E6390_10G_CTRL1		(0x1000 + MDIO_CTRL1)
 
 /* 1000BASE-X and SGMII */
 #define MV88E6390_SGMII_BMCR		(0x2000 + MII_BMCR)
-- 
cgit v1.2.3-59-g8ed1b


From 7019bba4330750a29d87b6ce70ac6fabc007b3dc Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Thu, 30 Apr 2020 09:21:39 +0100
Subject: net: dsa: mv88e6xxx: 88e6390 10G serdes support

Add support for reading and reporting the 10G link status on the
88e6390 in addition to the 1000BASE-X/2500BASE-X/SGMII status.

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mv88e6xxx/serdes.c | 43 ++++++++++++++++++++++++++++++++++++--
 drivers/net/dsa/mv88e6xxx/serdes.h |  1 +
 2 files changed, 42 insertions(+), 2 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/serdes.c b/drivers/net/dsa/mv88e6xxx/serdes.c
index 33d9923cf7c5..9c07b4f3d345 100644
--- a/drivers/net/dsa/mv88e6xxx/serdes.c
+++ b/drivers/net/dsa/mv88e6xxx/serdes.c
@@ -748,8 +748,8 @@ int mv88e6390_serdes_pcs_config(struct mv88e6xxx_chip *chip, int port,
 				      MV88E6390_SGMII_BMCR, bmcr);
 }
 
-int mv88e6390_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port,
-				   u8 lane, struct phylink_link_state *state)
+static int mv88e6390_serdes_pcs_get_state_sgmii(struct mv88e6xxx_chip *chip,
+	int port, u8 lane, struct phylink_link_state *state)
 {
 	u16 lpa, status;
 	int err;
@@ -771,6 +771,45 @@ int mv88e6390_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port,
 	return mv88e6xxx_serdes_pcs_get_state(chip, status, lpa, state);
 }
 
+static int mv88e6390_serdes_pcs_get_state_10g(struct mv88e6xxx_chip *chip,
+	int port, u8 lane, struct phylink_link_state *state)
+{
+	u16 status;
+	int err;
+
+	err = mv88e6390_serdes_read(chip, lane, MDIO_MMD_PHYXS,
+				    MV88E6390_10G_STAT1, &status);
+	if (err)
+		return err;
+
+	state->link = !!(status & MDIO_STAT1_LSTATUS);
+	if (state->link) {
+		state->speed = SPEED_10000;
+		state->duplex = DUPLEX_FULL;
+	}
+
+	return 0;
+}
+
+int mv88e6390_serdes_pcs_get_state(struct mv88e6xxx_chip *chip, int port,
+				   u8 lane, struct phylink_link_state *state)
+{
+	switch (state->interface) {
+	case PHY_INTERFACE_MODE_SGMII:
+	case PHY_INTERFACE_MODE_1000BASEX:
+	case PHY_INTERFACE_MODE_2500BASEX:
+		return mv88e6390_serdes_pcs_get_state_sgmii(chip, port, lane,
+							    state);
+	case PHY_INTERFACE_MODE_XAUI:
+	case PHY_INTERFACE_MODE_RXAUI:
+		return mv88e6390_serdes_pcs_get_state_10g(chip, port, lane,
+							  state);
+
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 int mv88e6390_serdes_pcs_an_restart(struct mv88e6xxx_chip *chip, int port,
 				    u8 lane)
 {
diff --git a/drivers/net/dsa/mv88e6xxx/serdes.h b/drivers/net/dsa/mv88e6xxx/serdes.h
index 71e3c3d0a24e..14315f26228a 100644
--- a/drivers/net/dsa/mv88e6xxx/serdes.h
+++ b/drivers/net/dsa/mv88e6xxx/serdes.h
@@ -41,6 +41,7 @@
 
 /* 10GBASE-R and 10GBASE-X4/X2 */
 #define MV88E6390_10G_CTRL1		(0x1000 + MDIO_CTRL1)
+#define MV88E6390_10G_STAT1		(0x1000 + MDIO_STAT1)
 
 /* 1000BASE-X and SGMII */
 #define MV88E6390_SGMII_BMCR		(0x2000 + MII_BMCR)
-- 
cgit v1.2.3-59-g8ed1b


From aba7924fa5a490d6c586b9cd7d0f78f872a40d1d Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Thu, 30 Apr 2020 17:13:22 -0500
Subject: net: ipa: pass channel pointer to gsi_channel_state()

Pass a channel pointer rather than a GSI pointer and channel ID to
gsi_channel_state().

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/gsi.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c
index 845478a19a4f..6946c39b664a 100644
--- a/drivers/net/ipa/gsi.c
+++ b/drivers/net/ipa/gsi.c
@@ -416,12 +416,13 @@ static void gsi_evt_ring_de_alloc_command(struct gsi *gsi, u32 evt_ring_id)
 }
 
 /* Return the hardware's notion of the current state of a channel */
-static enum gsi_channel_state
-gsi_channel_state(struct gsi *gsi, u32 channel_id)
+static enum gsi_channel_state gsi_channel_state(struct gsi_channel *channel)
 {
+	u32 channel_id = gsi_channel_id(channel);
+	void *virt = channel->gsi->virt;
 	u32 val;
 
-	val = ioread32(gsi->virt + GSI_CH_C_CNTXT_0_OFFSET(channel_id));
+	val = ioread32(virt + GSI_CH_C_CNTXT_0_OFFSET(channel_id));
 
 	return u32_get_bits(val, CHSTATE_FMASK);
 }
@@ -453,7 +454,7 @@ static int gsi_channel_alloc_command(struct gsi *gsi, u32 channel_id)
 	int ret;
 
 	/* Get initial channel state */
-	channel->state = gsi_channel_state(gsi, channel_id);
+	channel->state = gsi_channel_state(channel);
 
 	if (channel->state != GSI_CHANNEL_STATE_NOT_ALLOCATED)
 		return -EINVAL;
@@ -940,7 +941,7 @@ static void gsi_isr_chan_ctrl(struct gsi *gsi)
 		channel_mask ^= BIT(channel_id);
 
 		channel = &gsi->channel[channel_id];
-		channel->state = gsi_channel_state(gsi, channel_id);
+		channel->state = gsi_channel_state(channel);
 
 		complete(&channel->completion);
 	}
-- 
cgit v1.2.3-59-g8ed1b


From a2003b303875b41542bad1c2e81800fdd4c27c29 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Thu, 30 Apr 2020 17:13:23 -0500
Subject: net: ipa: do not cache channel state

It is possible for a GSI channel's state to be changed as a result
of an action by a different execution environment.  Specifically,
the modem is able to issue a GSI generic command that causes a state
change on a GSI channel associated with the AP.

A channel's state only needs to be known when a channel is allocated
or deallocaed, started or stopped, or reset.  So there is little
value in caching the state anyway.

Stop recording a copy of the channel's last known state, and instead
fetch the true state from hardware whenever it's needed.  In such
cases, *do* record the state in a local variable, in case an error
message reports it (so the value reported is the value seen).

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/gsi.c | 87 ++++++++++++++++++++++++++++++++-------------------
 drivers/net/ipa/gsi.h |  3 +-
 2 files changed, 55 insertions(+), 35 deletions(-)

diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c
index 6946c39b664a..8184d34124b7 100644
--- a/drivers/net/ipa/gsi.c
+++ b/drivers/net/ipa/gsi.c
@@ -415,7 +415,7 @@ static void gsi_evt_ring_de_alloc_command(struct gsi *gsi, u32 evt_ring_id)
 			evt_ring->state);
 }
 
-/* Return the hardware's notion of the current state of a channel */
+/* Fetch the current state of a channel from hardware */
 static enum gsi_channel_state gsi_channel_state(struct gsi_channel *channel)
 {
 	u32 channel_id = gsi_channel_id(channel);
@@ -433,16 +433,18 @@ gsi_channel_command(struct gsi_channel *channel, enum gsi_ch_cmd_opcode opcode)
 {
 	struct completion *completion = &channel->completion;
 	u32 channel_id = gsi_channel_id(channel);
+	struct gsi *gsi = channel->gsi;
 	u32 val;
 
 	val = u32_encode_bits(channel_id, CH_CHID_FMASK);
 	val |= u32_encode_bits(opcode, CH_OPCODE_FMASK);
 
-	if (gsi_command(channel->gsi, GSI_CH_CMD_OFFSET, val, completion))
+	if (gsi_command(gsi, GSI_CH_CMD_OFFSET, val, completion))
 		return 0;	/* Success! */
 
-	dev_err(channel->gsi->dev, "GSI command %u to channel %u timed out "
-		"(state is %u)\n", opcode, channel_id, channel->state);
+	dev_err(gsi->dev,
+		"GSI command %u to channel %u timed out (state is %u)\n",
+		opcode, channel_id, gsi_channel_state(channel));
 
 	return -ETIMEDOUT;
 }
@@ -451,18 +453,21 @@ gsi_channel_command(struct gsi_channel *channel, enum gsi_ch_cmd_opcode opcode)
 static int gsi_channel_alloc_command(struct gsi *gsi, u32 channel_id)
 {
 	struct gsi_channel *channel = &gsi->channel[channel_id];
+	enum gsi_channel_state state;
 	int ret;
 
 	/* Get initial channel state */
-	channel->state = gsi_channel_state(channel);
-
-	if (channel->state != GSI_CHANNEL_STATE_NOT_ALLOCATED)
+	state = gsi_channel_state(channel);
+	if (state != GSI_CHANNEL_STATE_NOT_ALLOCATED)
 		return -EINVAL;
 
 	ret = gsi_channel_command(channel, GSI_CH_ALLOCATE);
-	if (!ret && channel->state != GSI_CHANNEL_STATE_ALLOCATED) {
+
+	/* Channel state will normally have been updated */
+	state = gsi_channel_state(channel);
+	if (!ret && state != GSI_CHANNEL_STATE_ALLOCATED) {
 		dev_err(gsi->dev, "bad channel state (%u) after alloc\n",
-			channel->state);
+			state);
 		ret = -EIO;
 	}
 
@@ -472,18 +477,21 @@ static int gsi_channel_alloc_command(struct gsi *gsi, u32 channel_id)
 /* Start an ALLOCATED channel */
 static int gsi_channel_start_command(struct gsi_channel *channel)
 {
-	enum gsi_channel_state state = channel->state;
+	enum gsi_channel_state state;
 	int ret;
 
+	state = gsi_channel_state(channel);
 	if (state != GSI_CHANNEL_STATE_ALLOCATED &&
 	    state != GSI_CHANNEL_STATE_STOPPED)
 		return -EINVAL;
 
 	ret = gsi_channel_command(channel, GSI_CH_START);
-	if (!ret && channel->state != GSI_CHANNEL_STATE_STARTED) {
+
+	/* Channel state will normally have been updated */
+	state = gsi_channel_state(channel);
+	if (!ret && state != GSI_CHANNEL_STATE_STARTED) {
 		dev_err(channel->gsi->dev,
-			"bad channel state (%u) after start\n",
-			channel->state);
+			"bad channel state (%u) after start\n", state);
 		ret = -EIO;
 	}
 
@@ -493,23 +501,27 @@ static int gsi_channel_start_command(struct gsi_channel *channel)
 /* Stop a GSI channel in STARTED state */
 static int gsi_channel_stop_command(struct gsi_channel *channel)
 {
-	enum gsi_channel_state state = channel->state;
+	enum gsi_channel_state state;
 	int ret;
 
+	state = gsi_channel_state(channel);
 	if (state != GSI_CHANNEL_STATE_STARTED &&
 	    state != GSI_CHANNEL_STATE_STOP_IN_PROC)
 		return -EINVAL;
 
 	ret = gsi_channel_command(channel, GSI_CH_STOP);
-	if (ret || channel->state == GSI_CHANNEL_STATE_STOPPED)
+
+	/* Channel state will normally have been updated */
+	state = gsi_channel_state(channel);
+	if (ret || state == GSI_CHANNEL_STATE_STOPPED)
 		return ret;
 
 	/* We may have to try again if stop is in progress */
-	if (channel->state == GSI_CHANNEL_STATE_STOP_IN_PROC)
+	if (state == GSI_CHANNEL_STATE_STOP_IN_PROC)
 		return -EAGAIN;
 
-	dev_err(channel->gsi->dev, "bad channel state (%u) after stop\n",
-		channel->state);
+	dev_err(channel->gsi->dev,
+		"bad channel state (%u) after stop\n", state);
 
 	return -EIO;
 }
@@ -517,41 +529,49 @@ static int gsi_channel_stop_command(struct gsi_channel *channel)
 /* Reset a GSI channel in ALLOCATED or ERROR state. */
 static void gsi_channel_reset_command(struct gsi_channel *channel)
 {
+	enum gsi_channel_state state;
 	int ret;
 
 	msleep(1);	/* A short delay is required before a RESET command */
 
-	if (channel->state != GSI_CHANNEL_STATE_STOPPED &&
-	    channel->state != GSI_CHANNEL_STATE_ERROR) {
+	state = gsi_channel_state(channel);
+	if (state != GSI_CHANNEL_STATE_STOPPED &&
+	    state != GSI_CHANNEL_STATE_ERROR) {
 		dev_err(channel->gsi->dev,
-			"bad channel state (%u) before reset\n",
-			channel->state);
+			"bad channel state (%u) before reset\n", state);
 		return;
 	}
 
 	ret = gsi_channel_command(channel, GSI_CH_RESET);
-	if (!ret && channel->state != GSI_CHANNEL_STATE_ALLOCATED)
+
+	/* Channel state will normally have been updated */
+	state = gsi_channel_state(channel);
+	if (!ret && state != GSI_CHANNEL_STATE_ALLOCATED)
 		dev_err(channel->gsi->dev,
-			"bad channel state (%u) after reset\n",
-			channel->state);
+			"bad channel state (%u) after reset\n", state);
 }
 
 /* Deallocate an ALLOCATED GSI channel */
 static void gsi_channel_de_alloc_command(struct gsi *gsi, u32 channel_id)
 {
 	struct gsi_channel *channel = &gsi->channel[channel_id];
+	enum gsi_channel_state state;
 	int ret;
 
-	if (channel->state != GSI_CHANNEL_STATE_ALLOCATED) {
-		dev_err(gsi->dev, "bad channel state (%u) before dealloc\n",
-			channel->state);
+	state = gsi_channel_state(channel);
+	if (state != GSI_CHANNEL_STATE_ALLOCATED) {
+		dev_err(gsi->dev,
+			"bad channel state (%u) before dealloc\n", state);
 		return;
 	}
 
 	ret = gsi_channel_command(channel, GSI_CH_DE_ALLOC);
-	if (!ret && channel->state != GSI_CHANNEL_STATE_NOT_ALLOCATED)
-		dev_err(gsi->dev, "bad channel state (%u) after dealloc\n",
-			channel->state);
+
+	/* Channel state will normally have been updated */
+	state = gsi_channel_state(channel);
+	if (!ret && state != GSI_CHANNEL_STATE_NOT_ALLOCATED)
+		dev_err(gsi->dev,
+			"bad channel state (%u) after dealloc\n", state);
 }
 
 /* Ring an event ring doorbell, reporting the last entry processed by the AP.
@@ -778,6 +798,7 @@ int gsi_channel_start(struct gsi *gsi, u32 channel_id)
 int gsi_channel_stop(struct gsi *gsi, u32 channel_id)
 {
 	struct gsi_channel *channel = &gsi->channel[channel_id];
+	enum gsi_channel_state state;
 	u32 retries;
 	int ret;
 
@@ -787,7 +808,8 @@ int gsi_channel_stop(struct gsi *gsi, u32 channel_id)
 	 * STOP command timed out.  We won't stop a channel if stopping it
 	 * was successful previously (so we still want the freeze above).
 	 */
-	if (channel->state == GSI_CHANNEL_STATE_STOPPED)
+	state = gsi_channel_state(channel);
+	if (state == GSI_CHANNEL_STATE_STOPPED)
 		return 0;
 
 	/* RX channels might require a little time to enter STOPPED state */
@@ -941,7 +963,6 @@ static void gsi_isr_chan_ctrl(struct gsi *gsi)
 		channel_mask ^= BIT(channel_id);
 
 		channel = &gsi->channel[channel_id];
-		channel->state = gsi_channel_state(channel);
 
 		complete(&channel->completion);
 	}
diff --git a/drivers/net/ipa/gsi.h b/drivers/net/ipa/gsi.h
index 0698ff1ae7a6..19471017fadf 100644
--- a/drivers/net/ipa/gsi.h
+++ b/drivers/net/ipa/gsi.h
@@ -113,8 +113,7 @@ struct gsi_channel {
 	u16 tre_count;
 	u16 event_count;
 
-	struct completion completion;	/* signals channel state changes */
-	enum gsi_channel_state state;
+	struct completion completion;	/* signals channel command completion */
 
 	struct gsi_ring tre_ring;
 	u32 evt_ring_id;
-- 
cgit v1.2.3-59-g8ed1b


From eb236c2994b031f805be691fa9ea39cba4690166 Mon Sep 17 00:00:00 2001
From: Doug Berger <opendmb@gmail.com>
Date: Thu, 30 Apr 2020 16:26:51 -0700
Subject: net: bcmgenet: Move wake-up event out of side band ISR

The side band interrupt service routine is not available on chips
like 7211, or rather, it does not permit the signaling of wake-up
events due to the complex interrupt hierarchy.

Move the wake-up event accounting into a .resume_noirq function,
account for possible wake-up events and clear the MPD/HFB interrupts
from there, while leaving the hardware untouched until the resume
function proceeds with doing its usual business.

Because bcmgenet_wol_power_down_cfg() now enables the MPD and HFB
interrupts, it is invoked by a .suspend_noirq function to prevent
the servicing of interrupts after the clocks have been disabled.

Signed-off-by: Doug Berger <opendmb@gmail.com>
Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/genet/bcmgenet.c     | 72 ++++++++++++++++++----
 drivers/net/ethernet/broadcom/genet/bcmgenet.h     |  2 +
 drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c |  6 ++
 3 files changed, 67 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.c b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
index ad614d7201bd..ff31da0ed846 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.c
@@ -3270,10 +3270,7 @@ static irqreturn_t bcmgenet_isr0(int irq, void *dev_id)
 
 static irqreturn_t bcmgenet_wol_isr(int irq, void *dev_id)
 {
-	struct bcmgenet_priv *priv = dev_id;
-
-	pm_wakeup_event(&priv->pdev->dev, 0);
-
+	/* Acknowledge the interrupt */
 	return IRQ_HANDLED;
 }
 
@@ -4174,13 +4171,12 @@ static void bcmgenet_shutdown(struct platform_device *pdev)
 }
 
 #ifdef CONFIG_PM_SLEEP
-static int bcmgenet_resume(struct device *d)
+static int bcmgenet_resume_noirq(struct device *d)
 {
 	struct net_device *dev = dev_get_drvdata(d);
 	struct bcmgenet_priv *priv = netdev_priv(dev);
-	unsigned long dma_ctrl;
-	u32 offset, reg;
 	int ret;
+	u32 reg;
 
 	if (!netif_running(dev))
 		return 0;
@@ -4190,6 +4186,34 @@ static int bcmgenet_resume(struct device *d)
 	if (ret)
 		return ret;
 
+	if (device_may_wakeup(d) && priv->wolopts) {
+		/* Account for Wake-on-LAN events and clear those events
+		 * (Some devices need more time between enabling the clocks
+		 *  and the interrupt register reflecting the wake event so
+		 *  read the register twice)
+		 */
+		reg = bcmgenet_intrl2_0_readl(priv, INTRL2_CPU_STAT);
+		reg = bcmgenet_intrl2_0_readl(priv, INTRL2_CPU_STAT);
+		if (reg & UMAC_IRQ_WAKE_EVENT)
+			pm_wakeup_event(&priv->pdev->dev, 0);
+	}
+
+	bcmgenet_intrl2_0_writel(priv, UMAC_IRQ_WAKE_EVENT, INTRL2_CPU_CLEAR);
+
+	return 0;
+}
+
+static int bcmgenet_resume(struct device *d)
+{
+	struct net_device *dev = dev_get_drvdata(d);
+	struct bcmgenet_priv *priv = netdev_priv(dev);
+	unsigned long dma_ctrl;
+	u32 offset, reg;
+	int ret;
+
+	if (!netif_running(dev))
+		return 0;
+
 	/* From WOL-enabled suspend, switch to regular clock */
 	if (device_may_wakeup(d) && priv->wolopts)
 		bcmgenet_power_up(priv, GENET_POWER_WOL_MAGIC);
@@ -4262,7 +4286,6 @@ static int bcmgenet_suspend(struct device *d)
 {
 	struct net_device *dev = dev_get_drvdata(d);
 	struct bcmgenet_priv *priv = netdev_priv(dev);
-	int ret = 0;
 	u32 offset;
 
 	if (!netif_running(dev))
@@ -4282,23 +4305,46 @@ static int bcmgenet_suspend(struct device *d)
 	priv->hfb_en[2] = bcmgenet_hfb_reg_readl(priv, offset + sizeof(u32));
 	bcmgenet_hfb_reg_writel(priv, 0, HFB_CTRL);
 
+	return 0;
+}
+
+static int bcmgenet_suspend_noirq(struct device *d)
+{
+	struct net_device *dev = dev_get_drvdata(d);
+	struct bcmgenet_priv *priv = netdev_priv(dev);
+	int ret = 0;
+
+	if (!netif_running(dev))
+		return 0;
+
 	/* Prepare the device for Wake-on-LAN and switch to the slow clock */
 	if (device_may_wakeup(d) && priv->wolopts)
 		ret = bcmgenet_power_down(priv, GENET_POWER_WOL_MAGIC);
 	else if (priv->internal_phy)
 		ret = bcmgenet_power_down(priv, GENET_POWER_PASSIVE);
 
+	/* Let the framework handle resumption and leave the clocks on */
+	if (ret)
+		return ret;
+
 	/* Turn off the clocks */
 	clk_disable_unprepare(priv->clk);
 
-	if (ret)
-		bcmgenet_resume(d);
-
-	return ret;
+	return 0;
 }
+#else
+#define bcmgenet_suspend	NULL
+#define bcmgenet_suspend_noirq	NULL
+#define bcmgenet_resume		NULL
+#define bcmgenet_resume_noirq	NULL
 #endif /* CONFIG_PM_SLEEP */
 
-static SIMPLE_DEV_PM_OPS(bcmgenet_pm_ops, bcmgenet_suspend, bcmgenet_resume);
+static const struct dev_pm_ops bcmgenet_pm_ops = {
+	.suspend	= bcmgenet_suspend,
+	.suspend_noirq	= bcmgenet_suspend_noirq,
+	.resume		= bcmgenet_resume,
+	.resume_noirq	= bcmgenet_resume_noirq,
+};
 
 static const struct acpi_device_id genet_acpi_match[] = {
 	{ "BCM6E4E", (kernel_ulong_t)&bcm2711_plat_data },
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet.h b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
index 031d91f45067..a12cb59298f4 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet.h
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet.h
@@ -312,6 +312,8 @@ struct bcmgenet_mib_counters {
 #define UMAC_IRQ_HFB_SM			(1 << 10)
 #define UMAC_IRQ_HFB_MM			(1 << 11)
 #define UMAC_IRQ_MPD_R			(1 << 12)
+#define UMAC_IRQ_WAKE_EVENT		(UMAC_IRQ_HFB_SM | UMAC_IRQ_HFB_MM | \
+					 UMAC_IRQ_MPD_R)
 #define UMAC_IRQ_RXDMA_MBDONE		(1 << 13)
 #define UMAC_IRQ_RXDMA_PDONE		(1 << 14)
 #define UMAC_IRQ_RXDMA_BDONE		(1 << 15)
diff --git a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c
index 4b9d65f392c2..4ea6a26b04f7 100644
--- a/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c
+++ b/drivers/net/ethernet/broadcom/genet/bcmgenet_wol.c
@@ -193,6 +193,12 @@ int bcmgenet_wol_power_down_cfg(struct bcmgenet_priv *priv,
 		bcmgenet_ext_writel(priv, reg, EXT_EXT_PWR_MGMT);
 	}
 
+	reg = UMAC_IRQ_MPD_R;
+	if (hfb_enable)
+		reg |=  UMAC_IRQ_HFB_SM | UMAC_IRQ_HFB_MM;
+
+	bcmgenet_intrl2_0_writel(priv, reg, INTRL2_CPU_MASK_CLEAR);
+
 	return 0;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From a51c328df3106663879645680609eb49b3ff6444 Mon Sep 17 00:00:00 2001
From: Po Liu <Po.Liu@nxp.com>
Date: Fri, 1 May 2020 08:53:15 +0800
Subject: net: qos: introduce a gate control flow action

Introduce a ingress frame gate control flow action.
Tc gate action does the work like this:
Assume there is a gate allow specified ingress frames can be passed at
specific time slot, and be dropped at specific time slot. Tc filter
chooses the ingress frames, and tc gate action would specify what slot
does these frames can be passed to device and what time slot would be
dropped.
Tc gate action would provide an entry list to tell how much time gate
keep open and how much time gate keep state close. Gate action also
assign a start time to tell when the entry list start. Then driver would
repeat the gate entry list cyclically.
For the software simulation, gate action requires the user assign a time
clock type.

Below is the setting example in user space. Tc filter a stream source ip
address is 192.168.0.20 and gate action own two time slots. One is last
200ms gate open let frame pass another is last 100ms gate close let
frames dropped. When the ingress frames have reach total frames over
8000000 bytes, the excessive frames will be dropped in that 200000000ns
time slot.

> tc qdisc add dev eth0 ingress

> tc filter add dev eth0 parent ffff: protocol ip \
	   flower src_ip 192.168.0.20 \
	   action gate index 2 clockid CLOCK_TAI \
	   sched-entry open 200000000 -1 8000000 \
	   sched-entry close 100000000 -1 -1

> tc chain del dev eth0 ingress chain 0

"sched-entry" follow the name taprio style. Gate state is
"open"/"close". Follow with period nanosecond. Then next item is internal
priority value means which ingress queue should put. "-1" means
wildcard. The last value optional specifies the maximum number of
MSDU octets that are permitted to pass the gate during the specified
time interval.
Base-time is not set will be 0 as default, as result start time would
be ((N + 1) * cycletime) which is the minimal of future time.

Below example shows filtering a stream with destination mac address is
10:00:80:00:00:00 and ip type is ICMP, follow the action gate. The gate
action would run with one close time slot which means always keep close.
The time cycle is total 200000000ns. The base-time would calculate by:

 1357000000000 + (N + 1) * cycletime

When the total value is the future time, it will be the start time.
The cycletime here would be 200000000ns for this case.

> tc filter add dev eth0 parent ffff:  protocol ip \
	   flower skip_hw ip_proto icmp dst_mac 10:00:80:00:00:00 \
	   action gate index 12 base-time 1357000000000 \
	   sched-entry close 200000000 -1 -1 \
	   clockid CLOCK_TAI

Signed-off-by: Po Liu <Po.Liu@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tc_act/tc_gate.h        |  47 +++
 include/uapi/linux/pkt_cls.h        |   1 +
 include/uapi/linux/tc_act/tc_gate.h |  47 +++
 net/sched/Kconfig                   |  12 +
 net/sched/Makefile                  |   1 +
 net/sched/act_gate.c                | 636 ++++++++++++++++++++++++++++++++++++
 6 files changed, 744 insertions(+)
 create mode 100644 include/net/tc_act/tc_gate.h
 create mode 100644 include/uapi/linux/tc_act/tc_gate.h
 create mode 100644 net/sched/act_gate.c

diff --git a/include/net/tc_act/tc_gate.h b/include/net/tc_act/tc_gate.h
new file mode 100644
index 000000000000..330ad8b02495
--- /dev/null
+++ b/include/net/tc_act/tc_gate.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0-or-later */
+/* Copyright 2020 NXP */
+
+#ifndef __NET_TC_GATE_H
+#define __NET_TC_GATE_H
+
+#include <net/act_api.h>
+#include <linux/tc_act/tc_gate.h>
+
+struct tcfg_gate_entry {
+	int			index;
+	u8			gate_state;
+	u32			interval;
+	s32			ipv;
+	s32			maxoctets;
+	struct list_head	list;
+};
+
+struct tcf_gate_params {
+	s32			tcfg_priority;
+	u64			tcfg_basetime;
+	u64			tcfg_cycletime;
+	u64			tcfg_cycletime_ext;
+	u32			tcfg_flags;
+	s32			tcfg_clockid;
+	size_t			num_entries;
+	struct list_head	entries;
+};
+
+#define GATE_ACT_GATE_OPEN	BIT(0)
+#define GATE_ACT_PENDING	BIT(1)
+
+struct tcf_gate {
+	struct tc_action	common;
+	struct tcf_gate_params	param;
+	u8			current_gate_status;
+	ktime_t			current_close_time;
+	u32			current_entry_octets;
+	s32			current_max_octets;
+	struct tcfg_gate_entry	*next_entry;
+	struct hrtimer		hitimer;
+	enum tk_offsets		tk_offset;
+};
+
+#define to_gate(a) ((struct tcf_gate *)a)
+
+#endif
diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index 9f06d29cab70..fc672b232437 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -134,6 +134,7 @@ enum tca_id {
 	TCA_ID_CTINFO,
 	TCA_ID_MPLS,
 	TCA_ID_CT,
+	TCA_ID_GATE,
 	/* other actions go here */
 	__TCA_ID_MAX = 255
 };
diff --git a/include/uapi/linux/tc_act/tc_gate.h b/include/uapi/linux/tc_act/tc_gate.h
new file mode 100644
index 000000000000..f214b3a6d44f
--- /dev/null
+++ b/include/uapi/linux/tc_act/tc_gate.h
@@ -0,0 +1,47 @@
+/* SPDX-License-Identifier: GPL-2.0+ WITH Linux-syscall-note */
+/* Copyright 2020 NXP */
+
+#ifndef __LINUX_TC_GATE_H
+#define __LINUX_TC_GATE_H
+
+#include <linux/pkt_cls.h>
+
+struct tc_gate {
+	tc_gen;
+};
+
+enum {
+	TCA_GATE_ENTRY_UNSPEC,
+	TCA_GATE_ENTRY_INDEX,
+	TCA_GATE_ENTRY_GATE,
+	TCA_GATE_ENTRY_INTERVAL,
+	TCA_GATE_ENTRY_IPV,
+	TCA_GATE_ENTRY_MAX_OCTETS,
+	__TCA_GATE_ENTRY_MAX,
+};
+#define TCA_GATE_ENTRY_MAX (__TCA_GATE_ENTRY_MAX - 1)
+
+enum {
+	TCA_GATE_ONE_ENTRY_UNSPEC,
+	TCA_GATE_ONE_ENTRY,
+	__TCA_GATE_ONE_ENTRY_MAX,
+};
+#define TCA_GATE_ONE_ENTRY_MAX (__TCA_GATE_ONE_ENTRY_MAX - 1)
+
+enum {
+	TCA_GATE_UNSPEC,
+	TCA_GATE_TM,
+	TCA_GATE_PARMS,
+	TCA_GATE_PAD,
+	TCA_GATE_PRIORITY,
+	TCA_GATE_ENTRY_LIST,
+	TCA_GATE_BASE_TIME,
+	TCA_GATE_CYCLE_TIME,
+	TCA_GATE_CYCLE_TIME_EXT,
+	TCA_GATE_FLAGS,
+	TCA_GATE_CLOCKID,
+	__TCA_GATE_MAX,
+};
+#define TCA_GATE_MAX (__TCA_GATE_MAX - 1)
+
+#endif
diff --git a/net/sched/Kconfig b/net/sched/Kconfig
index bfbefb7bff9d..2f20073f4f84 100644
--- a/net/sched/Kconfig
+++ b/net/sched/Kconfig
@@ -981,6 +981,18 @@ config NET_ACT_CT
 	  To compile this code as a module, choose M here: the
 	  module will be called act_ct.
 
+config NET_ACT_GATE
+	tristate "Frame gate entry list control tc action"
+	depends on NET_CLS_ACT
+	help
+	  Say Y here to allow to control the ingress flow to be passed at
+	  specific time slot and be dropped at other specific time slot by
+	  the gate entry list.
+
+	  If unsure, say N.
+	  To compile this code as a module, choose M here: the
+	  module will be called act_gate.
+
 config NET_IFE_SKBMARK
 	tristate "Support to encoding decoding skb mark on IFE action"
 	depends on NET_ACT_IFE
diff --git a/net/sched/Makefile b/net/sched/Makefile
index 31c367a6cd09..66bbf9a98f9e 100644
--- a/net/sched/Makefile
+++ b/net/sched/Makefile
@@ -30,6 +30,7 @@ obj-$(CONFIG_NET_IFE_SKBPRIO)	+= act_meta_skbprio.o
 obj-$(CONFIG_NET_IFE_SKBTCINDEX)	+= act_meta_skbtcindex.o
 obj-$(CONFIG_NET_ACT_TUNNEL_KEY)+= act_tunnel_key.o
 obj-$(CONFIG_NET_ACT_CT)	+= act_ct.o
+obj-$(CONFIG_NET_ACT_GATE)	+= act_gate.o
 obj-$(CONFIG_NET_SCH_FIFO)	+= sch_fifo.o
 obj-$(CONFIG_NET_SCH_CBQ)	+= sch_cbq.o
 obj-$(CONFIG_NET_SCH_HTB)	+= sch_htb.o
diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c
new file mode 100644
index 000000000000..35fc48795541
--- /dev/null
+++ b/net/sched/act_gate.c
@@ -0,0 +1,636 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/* Copyright 2020 NXP */
+
+#include <linux/module.h>
+#include <linux/types.h>
+#include <linux/kernel.h>
+#include <linux/string.h>
+#include <linux/errno.h>
+#include <linux/skbuff.h>
+#include <linux/rtnetlink.h>
+#include <linux/init.h>
+#include <linux/slab.h>
+#include <net/act_api.h>
+#include <net/netlink.h>
+#include <net/pkt_cls.h>
+#include <net/tc_act/tc_gate.h>
+
+static unsigned int gate_net_id;
+static struct tc_action_ops act_gate_ops;
+
+static ktime_t gate_get_time(struct tcf_gate *gact)
+{
+	ktime_t mono = ktime_get();
+
+	switch (gact->tk_offset) {
+	case TK_OFFS_MAX:
+		return mono;
+	default:
+		return ktime_mono_to_any(mono, gact->tk_offset);
+	}
+
+	return KTIME_MAX;
+}
+
+static int gate_get_start_time(struct tcf_gate *gact, ktime_t *start)
+{
+	struct tcf_gate_params *param = &gact->param;
+	ktime_t now, base, cycle;
+	u64 n;
+
+	base = ns_to_ktime(param->tcfg_basetime);
+	now = gate_get_time(gact);
+
+	if (ktime_after(base, now)) {
+		*start = base;
+		return 0;
+	}
+
+	cycle = param->tcfg_cycletime;
+
+	/* cycle time should not be zero */
+	if (!cycle)
+		return -EFAULT;
+
+	n = div64_u64(ktime_sub_ns(now, base), cycle);
+	*start = ktime_add_ns(base, (n + 1) * cycle);
+	return 0;
+}
+
+static void gate_start_timer(struct tcf_gate *gact, ktime_t start)
+{
+	ktime_t expires;
+
+	expires = hrtimer_get_expires(&gact->hitimer);
+	if (expires == 0)
+		expires = KTIME_MAX;
+
+	start = min_t(ktime_t, start, expires);
+
+	hrtimer_start(&gact->hitimer, start, HRTIMER_MODE_ABS_SOFT);
+}
+
+static enum hrtimer_restart gate_timer_func(struct hrtimer *timer)
+{
+	struct tcf_gate *gact = container_of(timer, struct tcf_gate,
+					     hitimer);
+	struct tcf_gate_params *p = &gact->param;
+	struct tcfg_gate_entry *next;
+	ktime_t close_time, now;
+
+	spin_lock(&gact->tcf_lock);
+
+	next = gact->next_entry;
+
+	/* cycle start, clear pending bit, clear total octets */
+	gact->current_gate_status = next->gate_state ? GATE_ACT_GATE_OPEN : 0;
+	gact->current_entry_octets = 0;
+	gact->current_max_octets = next->maxoctets;
+
+	gact->current_close_time = ktime_add_ns(gact->current_close_time,
+						next->interval);
+
+	close_time = gact->current_close_time;
+
+	if (list_is_last(&next->list, &p->entries))
+		next = list_first_entry(&p->entries,
+					struct tcfg_gate_entry, list);
+	else
+		next = list_next_entry(next, list);
+
+	now = gate_get_time(gact);
+
+	if (ktime_after(now, close_time)) {
+		ktime_t cycle, base;
+		u64 n;
+
+		cycle = p->tcfg_cycletime;
+		base = ns_to_ktime(p->tcfg_basetime);
+		n = div64_u64(ktime_sub_ns(now, base), cycle);
+		close_time = ktime_add_ns(base, (n + 1) * cycle);
+	}
+
+	gact->next_entry = next;
+
+	hrtimer_set_expires(&gact->hitimer, close_time);
+
+	spin_unlock(&gact->tcf_lock);
+
+	return HRTIMER_RESTART;
+}
+
+static int tcf_gate_act(struct sk_buff *skb, const struct tc_action *a,
+			struct tcf_result *res)
+{
+	struct tcf_gate *gact = to_gate(a);
+
+	spin_lock(&gact->tcf_lock);
+
+	tcf_lastuse_update(&gact->tcf_tm);
+	bstats_update(&gact->tcf_bstats, skb);
+
+	if (unlikely(gact->current_gate_status & GATE_ACT_PENDING)) {
+		spin_unlock(&gact->tcf_lock);
+		return gact->tcf_action;
+	}
+
+	if (!(gact->current_gate_status & GATE_ACT_GATE_OPEN))
+		goto drop;
+
+	if (gact->current_max_octets >= 0) {
+		gact->current_entry_octets += qdisc_pkt_len(skb);
+		if (gact->current_entry_octets > gact->current_max_octets) {
+			gact->tcf_qstats.overlimits++;
+			goto drop;
+		}
+	}
+
+	spin_unlock(&gact->tcf_lock);
+
+	return gact->tcf_action;
+drop:
+	gact->tcf_qstats.drops++;
+	spin_unlock(&gact->tcf_lock);
+
+	return TC_ACT_SHOT;
+}
+
+static const struct nla_policy entry_policy[TCA_GATE_ENTRY_MAX + 1] = {
+	[TCA_GATE_ENTRY_INDEX]		= { .type = NLA_U32 },
+	[TCA_GATE_ENTRY_GATE]		= { .type = NLA_FLAG },
+	[TCA_GATE_ENTRY_INTERVAL]	= { .type = NLA_U32 },
+	[TCA_GATE_ENTRY_IPV]		= { .type = NLA_S32 },
+	[TCA_GATE_ENTRY_MAX_OCTETS]	= { .type = NLA_S32 },
+};
+
+static const struct nla_policy gate_policy[TCA_GATE_MAX + 1] = {
+	[TCA_GATE_PARMS]		= { .len = sizeof(struct tc_gate),
+					    .type = NLA_EXACT_LEN },
+	[TCA_GATE_PRIORITY]		= { .type = NLA_S32 },
+	[TCA_GATE_ENTRY_LIST]		= { .type = NLA_NESTED },
+	[TCA_GATE_BASE_TIME]		= { .type = NLA_U64 },
+	[TCA_GATE_CYCLE_TIME]		= { .type = NLA_U64 },
+	[TCA_GATE_CYCLE_TIME_EXT]	= { .type = NLA_U64 },
+	[TCA_GATE_FLAGS]		= { .type = NLA_U32 },
+	[TCA_GATE_CLOCKID]		= { .type = NLA_S32 },
+};
+
+static int fill_gate_entry(struct nlattr **tb, struct tcfg_gate_entry *entry,
+			   struct netlink_ext_ack *extack)
+{
+	u32 interval = 0;
+
+	entry->gate_state = nla_get_flag(tb[TCA_GATE_ENTRY_GATE]);
+
+	if (tb[TCA_GATE_ENTRY_INTERVAL])
+		interval = nla_get_u32(tb[TCA_GATE_ENTRY_INTERVAL]);
+
+	if (interval == 0) {
+		NL_SET_ERR_MSG(extack, "Invalid interval for schedule entry");
+		return -EINVAL;
+	}
+
+	entry->interval = interval;
+
+	if (tb[TCA_GATE_ENTRY_IPV])
+		entry->ipv = nla_get_s32(tb[TCA_GATE_ENTRY_IPV]);
+	else
+		entry->ipv = -1;
+
+	if (tb[TCA_GATE_ENTRY_MAX_OCTETS])
+		entry->maxoctets = nla_get_s32(tb[TCA_GATE_ENTRY_MAX_OCTETS]);
+	else
+		entry->maxoctets = -1;
+
+	return 0;
+}
+
+static int parse_gate_entry(struct nlattr *n, struct  tcfg_gate_entry *entry,
+			    int index, struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[TCA_GATE_ENTRY_MAX + 1] = { };
+	int err;
+
+	err = nla_parse_nested(tb, TCA_GATE_ENTRY_MAX, n, entry_policy, extack);
+	if (err < 0) {
+		NL_SET_ERR_MSG(extack, "Could not parse nested entry");
+		return -EINVAL;
+	}
+
+	entry->index = index;
+
+	return fill_gate_entry(tb, entry, extack);
+}
+
+static void release_entry_list(struct list_head *entries)
+{
+	struct tcfg_gate_entry *entry, *e;
+
+	list_for_each_entry_safe(entry, e, entries, list) {
+		list_del(&entry->list);
+		kfree(entry);
+	}
+}
+
+static int parse_gate_list(struct nlattr *list_attr,
+			   struct tcf_gate_params *sched,
+			   struct netlink_ext_ack *extack)
+{
+	struct tcfg_gate_entry *entry;
+	struct nlattr *n;
+	int err, rem;
+	int i = 0;
+
+	if (!list_attr)
+		return -EINVAL;
+
+	nla_for_each_nested(n, list_attr, rem) {
+		if (nla_type(n) != TCA_GATE_ONE_ENTRY) {
+			NL_SET_ERR_MSG(extack, "Attribute isn't type 'entry'");
+			continue;
+		}
+
+		entry = kzalloc(sizeof(*entry), GFP_ATOMIC);
+		if (!entry) {
+			NL_SET_ERR_MSG(extack, "Not enough memory for entry");
+			err = -ENOMEM;
+			goto release_list;
+		}
+
+		err = parse_gate_entry(n, entry, i, extack);
+		if (err < 0) {
+			kfree(entry);
+			goto release_list;
+		}
+
+		list_add_tail(&entry->list, &sched->entries);
+		i++;
+	}
+
+	sched->num_entries = i;
+
+	return i;
+
+release_list:
+	release_entry_list(&sched->entries);
+
+	return err;
+}
+
+static int tcf_gate_init(struct net *net, struct nlattr *nla,
+			 struct nlattr *est, struct tc_action **a,
+			 int ovr, int bind, bool rtnl_held,
+			 struct tcf_proto *tp, u32 flags,
+			 struct netlink_ext_ack *extack)
+{
+	struct tc_action_net *tn = net_generic(net, gate_net_id);
+	enum tk_offsets tk_offset = TK_OFFS_TAI;
+	struct nlattr *tb[TCA_GATE_MAX + 1];
+	struct tcf_chain *goto_ch = NULL;
+	struct tcf_gate_params *p;
+	s32 clockid = CLOCK_TAI;
+	struct tcf_gate *gact;
+	struct tc_gate *parm;
+	int ret = 0, err;
+	u64 basetime = 0;
+	u32 gflags = 0;
+	s32 prio = -1;
+	ktime_t start;
+	u32 index;
+
+	if (!nla)
+		return -EINVAL;
+
+	err = nla_parse_nested(tb, TCA_GATE_MAX, nla, gate_policy, extack);
+	if (err < 0)
+		return err;
+
+	if (!tb[TCA_GATE_PARMS])
+		return -EINVAL;
+
+	parm = nla_data(tb[TCA_GATE_PARMS]);
+	index = parm->index;
+
+	err = tcf_idr_check_alloc(tn, &index, a, bind);
+	if (err < 0)
+		return err;
+
+	if (err && bind)
+		return 0;
+
+	if (!err) {
+		ret = tcf_idr_create(tn, index, est, a,
+				     &act_gate_ops, bind, false, 0);
+		if (ret) {
+			tcf_idr_cleanup(tn, index);
+			return ret;
+		}
+
+		ret = ACT_P_CREATED;
+	} else if (!ovr) {
+		tcf_idr_release(*a, bind);
+		return -EEXIST;
+	}
+
+	if (tb[TCA_GATE_PRIORITY])
+		prio = nla_get_s32(tb[TCA_GATE_PRIORITY]);
+
+	if (tb[TCA_GATE_BASE_TIME])
+		basetime = nla_get_u64(tb[TCA_GATE_BASE_TIME]);
+
+	if (tb[TCA_GATE_FLAGS])
+		gflags = nla_get_u32(tb[TCA_GATE_FLAGS]);
+
+	if (tb[TCA_GATE_CLOCKID]) {
+		clockid = nla_get_s32(tb[TCA_GATE_CLOCKID]);
+		switch (clockid) {
+		case CLOCK_REALTIME:
+			tk_offset = TK_OFFS_REAL;
+			break;
+		case CLOCK_MONOTONIC:
+			tk_offset = TK_OFFS_MAX;
+			break;
+		case CLOCK_BOOTTIME:
+			tk_offset = TK_OFFS_BOOT;
+			break;
+		case CLOCK_TAI:
+			tk_offset = TK_OFFS_TAI;
+			break;
+		default:
+			NL_SET_ERR_MSG(extack, "Invalid 'clockid'");
+			goto release_idr;
+		}
+	}
+
+	err = tcf_action_check_ctrlact(parm->action, tp, &goto_ch, extack);
+	if (err < 0)
+		goto release_idr;
+
+	gact = to_gate(*a);
+
+	spin_lock_bh(&gact->tcf_lock);
+	p = &gact->param;
+
+	if (tb[TCA_GATE_CYCLE_TIME]) {
+		p->tcfg_cycletime = nla_get_u64(tb[TCA_GATE_CYCLE_TIME]);
+		if (!p->tcfg_cycletime_ext)
+			goto chain_put;
+	}
+
+	INIT_LIST_HEAD(&p->entries);
+	if (tb[TCA_GATE_ENTRY_LIST]) {
+		err = parse_gate_list(tb[TCA_GATE_ENTRY_LIST], p, extack);
+		if (err < 0)
+			goto chain_put;
+	}
+
+	if (!p->tcfg_cycletime) {
+		struct tcfg_gate_entry *entry;
+		ktime_t cycle = 0;
+
+		list_for_each_entry(entry, &p->entries, list)
+			cycle = ktime_add_ns(cycle, entry->interval);
+		p->tcfg_cycletime = cycle;
+	}
+
+	if (tb[TCA_GATE_CYCLE_TIME_EXT])
+		p->tcfg_cycletime_ext =
+			nla_get_u64(tb[TCA_GATE_CYCLE_TIME_EXT]);
+
+	p->tcfg_priority = prio;
+	p->tcfg_basetime = basetime;
+	p->tcfg_clockid = clockid;
+	p->tcfg_flags = gflags;
+
+	gact->tk_offset = tk_offset;
+	hrtimer_init(&gact->hitimer, clockid, HRTIMER_MODE_ABS_SOFT);
+	gact->hitimer.function = gate_timer_func;
+
+	err = gate_get_start_time(gact, &start);
+	if (err < 0) {
+		NL_SET_ERR_MSG(extack,
+			       "Internal error: failed get start time");
+		release_entry_list(&p->entries);
+		goto chain_put;
+	}
+
+	gact->current_close_time = start;
+	gact->current_gate_status = GATE_ACT_GATE_OPEN | GATE_ACT_PENDING;
+
+	gact->next_entry = list_first_entry(&p->entries,
+					    struct tcfg_gate_entry, list);
+
+	goto_ch = tcf_action_set_ctrlact(*a, parm->action, goto_ch);
+
+	gate_start_timer(gact, start);
+
+	spin_unlock_bh(&gact->tcf_lock);
+
+	if (goto_ch)
+		tcf_chain_put_by_act(goto_ch);
+
+	if (ret == ACT_P_CREATED)
+		tcf_idr_insert(tn, *a);
+
+	return ret;
+
+chain_put:
+	spin_unlock_bh(&gact->tcf_lock);
+
+	if (goto_ch)
+		tcf_chain_put_by_act(goto_ch);
+release_idr:
+	tcf_idr_release(*a, bind);
+	return err;
+}
+
+static void tcf_gate_cleanup(struct tc_action *a)
+{
+	struct tcf_gate *gact = to_gate(a);
+	struct tcf_gate_params *p;
+
+	hrtimer_cancel(&gact->hitimer);
+
+	p = &gact->param;
+
+	release_entry_list(&p->entries);
+}
+
+static int dumping_entry(struct sk_buff *skb,
+			 struct tcfg_gate_entry *entry)
+{
+	struct nlattr *item;
+
+	item = nla_nest_start_noflag(skb, TCA_GATE_ONE_ENTRY);
+	if (!item)
+		return -ENOSPC;
+
+	if (nla_put_u32(skb, TCA_GATE_ENTRY_INDEX, entry->index))
+		goto nla_put_failure;
+
+	if (entry->gate_state && nla_put_flag(skb, TCA_GATE_ENTRY_GATE))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_GATE_ENTRY_INTERVAL, entry->interval))
+		goto nla_put_failure;
+
+	if (nla_put_s32(skb, TCA_GATE_ENTRY_MAX_OCTETS, entry->maxoctets))
+		goto nla_put_failure;
+
+	if (nla_put_s32(skb, TCA_GATE_ENTRY_IPV, entry->ipv))
+		goto nla_put_failure;
+
+	return nla_nest_end(skb, item);
+
+nla_put_failure:
+	nla_nest_cancel(skb, item);
+	return -1;
+}
+
+static int tcf_gate_dump(struct sk_buff *skb, struct tc_action *a,
+			 int bind, int ref)
+{
+	unsigned char *b = skb_tail_pointer(skb);
+	struct tcf_gate *gact = to_gate(a);
+	struct tc_gate opt = {
+		.index    = gact->tcf_index,
+		.refcnt   = refcount_read(&gact->tcf_refcnt) - ref,
+		.bindcnt  = atomic_read(&gact->tcf_bindcnt) - bind,
+	};
+	struct tcfg_gate_entry *entry;
+	struct tcf_gate_params *p;
+	struct nlattr *entry_list;
+	struct tcf_t t;
+
+	spin_lock_bh(&gact->tcf_lock);
+	opt.action = gact->tcf_action;
+
+	p = &gact->param;
+
+	if (nla_put(skb, TCA_GATE_PARMS, sizeof(opt), &opt))
+		goto nla_put_failure;
+
+	if (nla_put_u64_64bit(skb, TCA_GATE_BASE_TIME,
+			      p->tcfg_basetime, TCA_GATE_PAD))
+		goto nla_put_failure;
+
+	if (nla_put_u64_64bit(skb, TCA_GATE_CYCLE_TIME,
+			      p->tcfg_cycletime, TCA_GATE_PAD))
+		goto nla_put_failure;
+
+	if (nla_put_u64_64bit(skb, TCA_GATE_CYCLE_TIME_EXT,
+			      p->tcfg_cycletime_ext, TCA_GATE_PAD))
+		goto nla_put_failure;
+
+	if (nla_put_s32(skb, TCA_GATE_CLOCKID, p->tcfg_clockid))
+		goto nla_put_failure;
+
+	if (nla_put_u32(skb, TCA_GATE_FLAGS, p->tcfg_flags))
+		goto nla_put_failure;
+
+	if (nla_put_s32(skb, TCA_GATE_PRIORITY, p->tcfg_priority))
+		goto nla_put_failure;
+
+	entry_list = nla_nest_start_noflag(skb, TCA_GATE_ENTRY_LIST);
+	if (!entry_list)
+		goto nla_put_failure;
+
+	list_for_each_entry(entry, &p->entries, list) {
+		if (dumping_entry(skb, entry) < 0)
+			goto nla_put_failure;
+	}
+
+	nla_nest_end(skb, entry_list);
+
+	tcf_tm_dump(&t, &gact->tcf_tm);
+	if (nla_put_64bit(skb, TCA_GATE_TM, sizeof(t), &t, TCA_GATE_PAD))
+		goto nla_put_failure;
+	spin_unlock_bh(&gact->tcf_lock);
+
+	return skb->len;
+
+nla_put_failure:
+	spin_unlock_bh(&gact->tcf_lock);
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+static int tcf_gate_walker(struct net *net, struct sk_buff *skb,
+			   struct netlink_callback *cb, int type,
+			   const struct tc_action_ops *ops,
+			   struct netlink_ext_ack *extack)
+{
+	struct tc_action_net *tn = net_generic(net, gate_net_id);
+
+	return tcf_generic_walker(tn, skb, cb, type, ops, extack);
+}
+
+static void tcf_gate_stats_update(struct tc_action *a, u64 bytes, u32 packets,
+				  u64 lastuse, bool hw)
+{
+	struct tcf_gate *gact = to_gate(a);
+	struct tcf_t *tm = &gact->tcf_tm;
+
+	tcf_action_update_stats(a, bytes, packets, false, hw);
+	tm->lastuse = max_t(u64, tm->lastuse, lastuse);
+}
+
+static int tcf_gate_search(struct net *net, struct tc_action **a, u32 index)
+{
+	struct tc_action_net *tn = net_generic(net, gate_net_id);
+
+	return tcf_idr_search(tn, a, index);
+}
+
+static size_t tcf_gate_get_fill_size(const struct tc_action *act)
+{
+	return nla_total_size(sizeof(struct tc_gate));
+}
+
+static struct tc_action_ops act_gate_ops = {
+	.kind		=	"gate",
+	.id		=	TCA_ID_GATE,
+	.owner		=	THIS_MODULE,
+	.act		=	tcf_gate_act,
+	.dump		=	tcf_gate_dump,
+	.init		=	tcf_gate_init,
+	.cleanup	=	tcf_gate_cleanup,
+	.walk		=	tcf_gate_walker,
+	.stats_update	=	tcf_gate_stats_update,
+	.get_fill_size	=	tcf_gate_get_fill_size,
+	.lookup		=	tcf_gate_search,
+	.size		=	sizeof(struct tcf_gate),
+};
+
+static __net_init int gate_init_net(struct net *net)
+{
+	struct tc_action_net *tn = net_generic(net, gate_net_id);
+
+	return tc_action_net_init(net, tn, &act_gate_ops);
+}
+
+static void __net_exit gate_exit_net(struct list_head *net_list)
+{
+	tc_action_net_exit(net_list, gate_net_id);
+}
+
+static struct pernet_operations gate_net_ops = {
+	.init = gate_init_net,
+	.exit_batch = gate_exit_net,
+	.id   = &gate_net_id,
+	.size = sizeof(struct tc_action_net),
+};
+
+static int __init gate_init_module(void)
+{
+	return tcf_register_action(&act_gate_ops, &gate_net_ops);
+}
+
+static void __exit gate_cleanup_module(void)
+{
+	tcf_unregister_action(&act_gate_ops, &gate_net_ops);
+}
+
+module_init(gate_init_module);
+module_exit(gate_cleanup_module);
+MODULE_LICENSE("GPL v2");
-- 
cgit v1.2.3-59-g8ed1b


From d29bdd69ecdd70e8e3c2268fc8e188d6ab55e54a Mon Sep 17 00:00:00 2001
From: Po Liu <Po.Liu@nxp.com>
Date: Fri, 1 May 2020 08:53:16 +0800
Subject: net: schedule: add action gate offloading

Add the gate action to the flow action entry. Add the gate parameters to
the tc_setup_flow_action() queueing to the entries of flow_action_entry
array provide to the driver.

Signed-off-by: Po Liu <Po.Liu@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow_offload.h   | 10 +++++
 include/net/tc_act/tc_gate.h | 99 ++++++++++++++++++++++++++++++++++++++++++++
 net/sched/cls_api.c          | 33 +++++++++++++++
 3 files changed, 142 insertions(+)

diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index 3619c6acf60f..94a30fe02e6d 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -147,6 +147,7 @@ enum flow_action_id {
 	FLOW_ACTION_MPLS_PUSH,
 	FLOW_ACTION_MPLS_POP,
 	FLOW_ACTION_MPLS_MANGLE,
+	FLOW_ACTION_GATE,
 	NUM_FLOW_ACTIONS,
 };
 
@@ -255,6 +256,15 @@ struct flow_action_entry {
 			u8		bos;
 			u8		ttl;
 		} mpls_mangle;
+		struct {
+			u32		index;
+			s32		prio;
+			u64		basetime;
+			u64		cycletime;
+			u64		cycletimeext;
+			u32		num_entries;
+			struct action_gate_entry *entries;
+		} gate;
 	};
 	struct flow_action_cookie *cookie; /* user defined action cookie */
 };
diff --git a/include/net/tc_act/tc_gate.h b/include/net/tc_act/tc_gate.h
index 330ad8b02495..8bc6be81a7ad 100644
--- a/include/net/tc_act/tc_gate.h
+++ b/include/net/tc_act/tc_gate.h
@@ -7,6 +7,13 @@
 #include <net/act_api.h>
 #include <linux/tc_act/tc_gate.h>
 
+struct action_gate_entry {
+	u8			gate_state;
+	u32			interval;
+	s32			ipv;
+	s32			maxoctets;
+};
+
 struct tcfg_gate_entry {
 	int			index;
 	u8			gate_state;
@@ -44,4 +51,96 @@ struct tcf_gate {
 
 #define to_gate(a) ((struct tcf_gate *)a)
 
+static inline bool is_tcf_gate(const struct tc_action *a)
+{
+#ifdef CONFIG_NET_CLS_ACT
+	if (a->ops && a->ops->id == TCA_ID_GATE)
+		return true;
+#endif
+	return false;
+}
+
+static inline u32 tcf_gate_index(const struct tc_action *a)
+{
+	return a->tcfa_index;
+}
+
+static inline s32 tcf_gate_prio(const struct tc_action *a)
+{
+	s32 tcfg_prio;
+
+	tcfg_prio = to_gate(a)->param.tcfg_priority;
+
+	return tcfg_prio;
+}
+
+static inline u64 tcf_gate_basetime(const struct tc_action *a)
+{
+	u64 tcfg_basetime;
+
+	tcfg_basetime = to_gate(a)->param.tcfg_basetime;
+
+	return tcfg_basetime;
+}
+
+static inline u64 tcf_gate_cycletime(const struct tc_action *a)
+{
+	u64 tcfg_cycletime;
+
+	tcfg_cycletime = to_gate(a)->param.tcfg_cycletime;
+
+	return tcfg_cycletime;
+}
+
+static inline u64 tcf_gate_cycletimeext(const struct tc_action *a)
+{
+	u64 tcfg_cycletimeext;
+
+	tcfg_cycletimeext = to_gate(a)->param.tcfg_cycletime_ext;
+
+	return tcfg_cycletimeext;
+}
+
+static inline u32 tcf_gate_num_entries(const struct tc_action *a)
+{
+	u32 num_entries;
+
+	num_entries = to_gate(a)->param.num_entries;
+
+	return num_entries;
+}
+
+static inline struct action_gate_entry
+			*tcf_gate_get_list(const struct tc_action *a)
+{
+	struct action_gate_entry *oe;
+	struct tcf_gate_params *p;
+	struct tcfg_gate_entry *entry;
+	u32 num_entries;
+	int i = 0;
+
+	p = &to_gate(a)->param;
+	num_entries = p->num_entries;
+
+	list_for_each_entry(entry, &p->entries, list)
+		i++;
+
+	if (i != num_entries)
+		return NULL;
+
+	oe = kcalloc(num_entries, sizeof(*oe), GFP_ATOMIC);
+	if (!oe)
+		return NULL;
+
+	i = 0;
+	list_for_each_entry(entry, &p->entries, list) {
+		oe[i].gate_state = entry->gate_state;
+		oe[i].interval = entry->interval;
+		oe[i].ipv = entry->ipv;
+		oe[i].maxoctets = entry->maxoctets;
+		i++;
+	}
+
+	return oe;
+}
 #endif
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 11b683c45c28..7e85c91d0752 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -39,6 +39,7 @@
 #include <net/tc_act/tc_skbedit.h>
 #include <net/tc_act/tc_ct.h>
 #include <net/tc_act/tc_mpls.h>
+#include <net/tc_act/tc_gate.h>
 #include <net/flow_offload.h>
 
 extern const struct nla_policy rtm_tca_policy[TCA_MAX + 1];
@@ -3526,6 +3527,27 @@ static void tcf_sample_get_group(struct flow_action_entry *entry,
 #endif
 }
 
+static void tcf_gate_entry_destructor(void *priv)
+{
+	struct action_gate_entry *oe = priv;
+
+	kfree(oe);
+}
+
+static int tcf_gate_get_entries(struct flow_action_entry *entry,
+				const struct tc_action *act)
+{
+	entry->gate.entries = tcf_gate_get_list(act);
+
+	if (!entry->gate.entries)
+		return -EINVAL;
+
+	entry->destructor = tcf_gate_entry_destructor;
+	entry->destructor_priv = entry->gate.entries;
+
+	return 0;
+}
+
 int tc_setup_flow_action(struct flow_action *flow_action,
 			 const struct tcf_exts *exts)
 {
@@ -3672,6 +3694,17 @@ int tc_setup_flow_action(struct flow_action *flow_action,
 		} else if (is_tcf_skbedit_priority(act)) {
 			entry->id = FLOW_ACTION_PRIORITY;
 			entry->priority = tcf_skbedit_priority(act);
+		} else if (is_tcf_gate(act)) {
+			entry->id = FLOW_ACTION_GATE;
+			entry->gate.index = tcf_gate_index(act);
+			entry->gate.prio = tcf_gate_prio(act);
+			entry->gate.basetime = tcf_gate_basetime(act);
+			entry->gate.cycletime = tcf_gate_cycletime(act);
+			entry->gate.cycletimeext = tcf_gate_cycletimeext(act);
+			entry->gate.num_entries = tcf_gate_num_entries(act);
+			err = tcf_gate_get_entries(entry, act);
+			if (err)
+				goto err_out;
 		} else {
 			err = -EOPNOTSUPP;
 			goto err_out_locked;
-- 
cgit v1.2.3-59-g8ed1b


From 79e499829f3ff5b8f70c87baf1b03ebb3401a3e4 Mon Sep 17 00:00:00 2001
From: Po Liu <Po.Liu@nxp.com>
Date: Fri, 1 May 2020 08:53:17 +0800
Subject: net: enetc: add hw tc hw offload features for PSPF capability

This patch is to let ethtool enable/disable the tc flower offload
features. Hardware ENETC has the feature of PSFP which is for per-stream
policing. When enable the tc hw offloading feature, driver would enable
the IEEE 802.1Qci feature. It is only set the register enable bit for
this feature not enable for any entry of per stream filtering and stream
gate or stream identify but get how much capabilities for each feature.

Signed-off-by: Po Liu <Po.Liu@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc.c    | 23 ++++++++++++
 drivers/net/ethernet/freescale/enetc/enetc.h    | 48 +++++++++++++++++++++++++
 drivers/net/ethernet/freescale/enetc/enetc_hw.h | 17 +++++++++
 drivers/net/ethernet/freescale/enetc/enetc_pf.c |  8 +++++
 4 files changed, 96 insertions(+)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index ccf2611f4a20..04aac7cbb506 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -756,6 +756,9 @@ void enetc_get_si_caps(struct enetc_si *si)
 
 	if (val & ENETC_SIPCAPR0_QBV)
 		si->hw_features |= ENETC_SI_F_QBV;
+
+	if (val & ENETC_SIPCAPR0_PSFP)
+		si->hw_features |= ENETC_SI_F_PSFP;
 }
 
 static int enetc_dma_alloc_bdr(struct enetc_bdr *r, size_t bd_size)
@@ -1567,6 +1570,23 @@ static int enetc_set_rss(struct net_device *ndev, int en)
 	return 0;
 }
 
+static int enetc_set_psfp(struct net_device *ndev, int en)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+
+	if (en) {
+		priv->active_offloads |= ENETC_F_QCI;
+		enetc_get_max_cap(priv);
+		enetc_psfp_enable(&priv->si->hw);
+	} else {
+		priv->active_offloads &= ~ENETC_F_QCI;
+		memset(&priv->psfp_cap, 0, sizeof(struct psfp_cap));
+		enetc_psfp_disable(&priv->si->hw);
+	}
+
+	return 0;
+}
+
 int enetc_set_features(struct net_device *ndev,
 		       netdev_features_t features)
 {
@@ -1575,6 +1595,9 @@ int enetc_set_features(struct net_device *ndev,
 	if (changed & NETIF_F_RXHASH)
 		enetc_set_rss(ndev, !!(features & NETIF_F_RXHASH));
 
+	if (changed & NETIF_F_HW_TC)
+		enetc_set_psfp(ndev, !!(features & NETIF_F_HW_TC));
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h
index 56c43f35b633..2cfe877c3778 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc.h
@@ -151,6 +151,7 @@ enum enetc_errata {
 };
 
 #define ENETC_SI_F_QBV BIT(0)
+#define ENETC_SI_F_PSFP BIT(1)
 
 /* PCI IEP device data */
 struct enetc_si {
@@ -203,12 +204,20 @@ struct enetc_cls_rule {
 };
 
 #define ENETC_MAX_BDR_INT	2 /* fixed to max # of available cpus */
+struct psfp_cap {
+	u32 max_streamid;
+	u32 max_psfp_filter;
+	u32 max_psfp_gate;
+	u32 max_psfp_gatelist;
+	u32 max_psfp_meter;
+};
 
 /* TODO: more hardware offloads */
 enum enetc_active_offloads {
 	ENETC_F_RX_TSTAMP	= BIT(0),
 	ENETC_F_TX_TSTAMP	= BIT(1),
 	ENETC_F_QBV             = BIT(2),
+	ENETC_F_QCI		= BIT(3),
 };
 
 struct enetc_ndev_priv {
@@ -231,6 +240,8 @@ struct enetc_ndev_priv {
 
 	struct enetc_cls_rule *cls_rules;
 
+	struct psfp_cap psfp_cap;
+
 	struct device_node *phy_node;
 	phy_interface_t if_mode;
 };
@@ -289,9 +300,46 @@ int enetc_setup_tc_taprio(struct net_device *ndev, void *type_data);
 void enetc_sched_speed_set(struct net_device *ndev);
 int enetc_setup_tc_cbs(struct net_device *ndev, void *type_data);
 int enetc_setup_tc_txtime(struct net_device *ndev, void *type_data);
+
+static inline void enetc_get_max_cap(struct enetc_ndev_priv *priv)
+{
+	u32 reg;
+
+	reg = enetc_port_rd(&priv->si->hw, ENETC_PSIDCAPR);
+	priv->psfp_cap.max_streamid = reg & ENETC_PSIDCAPR_MSK;
+	/* Port stream filter capability */
+	reg = enetc_port_rd(&priv->si->hw, ENETC_PSFCAPR);
+	priv->psfp_cap.max_psfp_filter = reg & ENETC_PSFCAPR_MSK;
+	/* Port stream gate capability */
+	reg = enetc_port_rd(&priv->si->hw, ENETC_PSGCAPR);
+	priv->psfp_cap.max_psfp_gate = (reg & ENETC_PSGCAPR_SGIT_MSK);
+	priv->psfp_cap.max_psfp_gatelist = (reg & ENETC_PSGCAPR_GCL_MSK) >> 16;
+	/* Port flow meter capability */
+	reg = enetc_port_rd(&priv->si->hw, ENETC_PFMCAPR);
+	priv->psfp_cap.max_psfp_meter = reg & ENETC_PFMCAPR_MSK;
+}
+
+static inline void enetc_psfp_enable(struct enetc_hw *hw)
+{
+	enetc_wr(hw, ENETC_PPSFPMR, enetc_rd(hw, ENETC_PPSFPMR) |
+		 ENETC_PPSFPMR_PSFPEN | ENETC_PPSFPMR_VS |
+		 ENETC_PPSFPMR_PVC | ENETC_PPSFPMR_PVZC);
+}
+
+static inline void enetc_psfp_disable(struct enetc_hw *hw)
+{
+	enetc_wr(hw, ENETC_PPSFPMR, enetc_rd(hw, ENETC_PPSFPMR) &
+		 ~ENETC_PPSFPMR_PSFPEN & ~ENETC_PPSFPMR_VS &
+		 ~ENETC_PPSFPMR_PVC & ~ENETC_PPSFPMR_PVZC);
+}
 #else
 #define enetc_setup_tc_taprio(ndev, type_data) -EOPNOTSUPP
 #define enetc_sched_speed_set(ndev) (void)0
 #define enetc_setup_tc_cbs(ndev, type_data) -EOPNOTSUPP
 #define enetc_setup_tc_txtime(ndev, type_data) -EOPNOTSUPP
+#define enetc_get_max_cap(p)		\
+	memset(&((p)->psfp_cap), 0, sizeof(struct psfp_cap))
+
+#define enetc_psfp_enable(hw) (void)0
+#define enetc_psfp_disable(hw) (void)0
 #endif
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h
index 2a6523136947..587974862f48 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h
@@ -19,6 +19,7 @@
 #define ENETC_SICTR1	0x1c
 #define ENETC_SIPCAPR0	0x20
 #define ENETC_SIPCAPR0_QBV	BIT(4)
+#define ENETC_SIPCAPR0_PSFP	BIT(9)
 #define ENETC_SIPCAPR0_RSS	BIT(8)
 #define ENETC_SIPCAPR1	0x24
 #define ENETC_SITGTGR	0x30
@@ -228,6 +229,15 @@ enum enetc_bdr_type {TX, RX};
 #define ENETC_PM0_IFM_RLP	(BIT(5) | BIT(11))
 #define ENETC_PM0_IFM_RGAUTO	(BIT(15) | ENETC_PMO_IFM_RG | BIT(1))
 #define ENETC_PM0_IFM_XGMII	BIT(12)
+#define ENETC_PSIDCAPR		0x1b08
+#define ENETC_PSIDCAPR_MSK	GENMASK(15, 0)
+#define ENETC_PSFCAPR		0x1b18
+#define ENETC_PSFCAPR_MSK	GENMASK(15, 0)
+#define ENETC_PSGCAPR		0x1b28
+#define ENETC_PSGCAPR_GCL_MSK	GENMASK(18, 16)
+#define ENETC_PSGCAPR_SGIT_MSK	GENMASK(15, 0)
+#define ENETC_PFMCAPR		0x1b38
+#define ENETC_PFMCAPR_MSK	GENMASK(15, 0)
 
 /* MAC counters */
 #define ENETC_PM0_REOCT		0x8100
@@ -621,3 +631,10 @@ struct enetc_cbd {
 /* Port time specific departure */
 #define ENETC_PTCTSDR(n)	(0x1210 + 4 * (n))
 #define ENETC_TSDE		BIT(31)
+
+/* PSFP setting */
+#define ENETC_PPSFPMR 0x11b00
+#define ENETC_PPSFPMR_PSFPEN BIT(0)
+#define ENETC_PPSFPMR_VS BIT(1)
+#define ENETC_PPSFPMR_PVC BIT(2)
+#define ENETC_PPSFPMR_PVZC BIT(3)
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
index de1ad4975074..cef9fbfdb056 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
@@ -727,6 +727,14 @@ static void enetc_pf_netdev_setup(struct enetc_si *si, struct net_device *ndev,
 	if (si->hw_features & ENETC_SI_F_QBV)
 		priv->active_offloads |= ENETC_F_QBV;
 
+	if (si->hw_features & ENETC_SI_F_PSFP) {
+		priv->active_offloads |= ENETC_F_QCI;
+		ndev->features |= NETIF_F_HW_TC;
+		ndev->hw_features |= NETIF_F_HW_TC;
+		enetc_get_max_cap(priv);
+		enetc_psfp_enable(&si->hw);
+	}
+
 	/* pick up primary MAC address from SI */
 	enetc_get_primary_mac_addr(&si->hw, ndev->dev_addr);
 }
-- 
cgit v1.2.3-59-g8ed1b


From 888ae5a3952badf11fdcd210f5da9994462ec2fc Mon Sep 17 00:00:00 2001
From: Po Liu <Po.Liu@nxp.com>
Date: Fri, 1 May 2020 08:53:18 +0800
Subject: net: enetc: add tc flower psfp offload driver

This patch is to add tc flower offload for the enetc IEEE 802.1Qci(PSFP)
function. There are four main feature parts to implement the flow
policing and filtering for ingress flow with IEEE 802.1Qci features.
They are stream identify(this is defined in the P802.1cb exactly but
needed for 802.1Qci), stream filtering, stream gate and flow metering.
Each function block includes many entries by index to assign parameters.
So for one frame would be filtered by stream identify first, then
flow into stream filter block by the same handle between stream identify
and stream filtering. Then flow into stream gate control which assigned
by the stream filtering entry. And then policing by the gate and limited
by the max sdu in the filter block(optional). At last, policing by the
flow metering block, index choosing at the fitering block.
So you can see that each entry of block may link to many upper entries
since they can be assigned same index means more streams want to share
the same feature in the stream filtering or stream gate or flow
metering.
To implement such features, each stream filtered by source/destination
mac address, some stream maybe also plus the vlan id value would be
treated as one flow chain. This would be identified by the chain_index
which already in the tc filter concept. Driver would maintain this chain
and also with gate modules. The stream filter entry create by the gate
index and flow meter(optional) entry id and also one priority value.
Offloading only transfer the gate action and flow filtering parameters.
Driver would create (or search same gate id and flow meter id and
 priority) one stream filter entry to set to the hardware. So stream
filtering do not need transfer by the action offloading.
This architecture is same with tc filter and actions relationship. tc
filter maintain the list for each flow feature by keys. And actions
maintain by the action list.

Below showing a example commands by tc:
> tc qdisc add dev eth0 ingress
> ip link set eth0 address 10:00:80:00:00:00
> tc filter add dev eth0 parent ffff: protocol ip chain 11 \
	flower skip_sw dst_mac 10:00:80:00:00:00 \
	action gate index 10 \
	sched-entry open 200000000 1 8000000 \
	sched-entry close 100000000 -1 -1

Command means to set the dst_mac 10:00:80:00:00:00 to index 11 of stream
identify module. Then setting the gate index 10 of stream gate module.
Keep the gate open for 200ms and limit the traffic volume to 8MB in this
sched-entry. Then direct the frames to the ingress queue 1.

Signed-off-by: Po Liu <Po.Liu@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc.c     |   25 +-
 drivers/net/ethernet/freescale/enetc/enetc.h     |   46 +-
 drivers/net/ethernet/freescale/enetc/enetc_hw.h  |  142 +++
 drivers/net/ethernet/freescale/enetc/enetc_pf.c  |    4 +-
 drivers/net/ethernet/freescale/enetc/enetc_qos.c | 1098 ++++++++++++++++++++++
 5 files changed, 1300 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc.c b/drivers/net/ethernet/freescale/enetc/enetc.c
index 04aac7cbb506..298c55786fd9 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc.c
@@ -1521,6 +1521,8 @@ int enetc_setup_tc(struct net_device *ndev, enum tc_setup_type type,
 		return enetc_setup_tc_cbs(ndev, type_data);
 	case TC_SETUP_QDISC_ETF:
 		return enetc_setup_tc_txtime(ndev, type_data);
+	case TC_SETUP_BLOCK:
+		return enetc_setup_tc_psfp(ndev, type_data);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -1573,17 +1575,23 @@ static int enetc_set_rss(struct net_device *ndev, int en)
 static int enetc_set_psfp(struct net_device *ndev, int en)
 {
 	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+	int err;
 
 	if (en) {
+		err = enetc_psfp_enable(priv);
+		if (err)
+			return err;
+
 		priv->active_offloads |= ENETC_F_QCI;
-		enetc_get_max_cap(priv);
-		enetc_psfp_enable(&priv->si->hw);
-	} else {
-		priv->active_offloads &= ~ENETC_F_QCI;
-		memset(&priv->psfp_cap, 0, sizeof(struct psfp_cap));
-		enetc_psfp_disable(&priv->si->hw);
+		return 0;
 	}
 
+	err = enetc_psfp_disable(priv);
+	if (err)
+		return err;
+
+	priv->active_offloads &= ~ENETC_F_QCI;
+
 	return 0;
 }
 
@@ -1591,14 +1599,15 @@ int enetc_set_features(struct net_device *ndev,
 		       netdev_features_t features)
 {
 	netdev_features_t changed = ndev->features ^ features;
+	int err = 0;
 
 	if (changed & NETIF_F_RXHASH)
 		enetc_set_rss(ndev, !!(features & NETIF_F_RXHASH));
 
 	if (changed & NETIF_F_HW_TC)
-		enetc_set_psfp(ndev, !!(features & NETIF_F_HW_TC));
+		err = enetc_set_psfp(ndev, !!(features & NETIF_F_HW_TC));
 
-	return 0;
+	return err;
 }
 
 #ifdef CONFIG_FSL_ENETC_PTP_CLOCK
diff --git a/drivers/net/ethernet/freescale/enetc/enetc.h b/drivers/net/ethernet/freescale/enetc/enetc.h
index 2cfe877c3778..b705464f6882 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc.h
@@ -300,6 +300,11 @@ int enetc_setup_tc_taprio(struct net_device *ndev, void *type_data);
 void enetc_sched_speed_set(struct net_device *ndev);
 int enetc_setup_tc_cbs(struct net_device *ndev, void *type_data);
 int enetc_setup_tc_txtime(struct net_device *ndev, void *type_data);
+int enetc_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+			    void *cb_priv);
+int enetc_setup_tc_psfp(struct net_device *ndev, void *type_data);
+int enetc_psfp_init(struct enetc_ndev_priv *priv);
+int enetc_psfp_clean(struct enetc_ndev_priv *priv);
 
 static inline void enetc_get_max_cap(struct enetc_ndev_priv *priv)
 {
@@ -319,27 +324,60 @@ static inline void enetc_get_max_cap(struct enetc_ndev_priv *priv)
 	priv->psfp_cap.max_psfp_meter = reg & ENETC_PFMCAPR_MSK;
 }
 
-static inline void enetc_psfp_enable(struct enetc_hw *hw)
+static inline int enetc_psfp_enable(struct enetc_ndev_priv *priv)
 {
+	struct enetc_hw *hw = &priv->si->hw;
+	int err;
+
+	enetc_get_max_cap(priv);
+
+	err = enetc_psfp_init(priv);
+	if (err)
+		return err;
+
 	enetc_wr(hw, ENETC_PPSFPMR, enetc_rd(hw, ENETC_PPSFPMR) |
 		 ENETC_PPSFPMR_PSFPEN | ENETC_PPSFPMR_VS |
 		 ENETC_PPSFPMR_PVC | ENETC_PPSFPMR_PVZC);
+
+	return 0;
 }
 
-static inline void enetc_psfp_disable(struct enetc_hw *hw)
+static inline int enetc_psfp_disable(struct enetc_ndev_priv *priv)
 {
+	struct enetc_hw *hw = &priv->si->hw;
+	int err;
+
+	err = enetc_psfp_clean(priv);
+	if (err)
+		return err;
+
 	enetc_wr(hw, ENETC_PPSFPMR, enetc_rd(hw, ENETC_PPSFPMR) &
 		 ~ENETC_PPSFPMR_PSFPEN & ~ENETC_PPSFPMR_VS &
 		 ~ENETC_PPSFPMR_PVC & ~ENETC_PPSFPMR_PVZC);
+
+	memset(&priv->psfp_cap, 0, sizeof(struct psfp_cap));
+
+	return 0;
 }
+
 #else
 #define enetc_setup_tc_taprio(ndev, type_data) -EOPNOTSUPP
 #define enetc_sched_speed_set(ndev) (void)0
 #define enetc_setup_tc_cbs(ndev, type_data) -EOPNOTSUPP
 #define enetc_setup_tc_txtime(ndev, type_data) -EOPNOTSUPP
+#define enetc_setup_tc_psfp(ndev, type_data) -EOPNOTSUPP
+#define enetc_setup_tc_block_cb NULL
+
 #define enetc_get_max_cap(p)		\
 	memset(&((p)->psfp_cap), 0, sizeof(struct psfp_cap))
 
-#define enetc_psfp_enable(hw) (void)0
-#define enetc_psfp_disable(hw) (void)0
+static inline int enetc_psfp_enable(struct enetc_ndev_priv *priv)
+{
+	return 0;
+}
+
+static inline int enetc_psfp_disable(struct enetc_ndev_priv *priv)
+{
+	return 0;
+}
 #endif
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_hw.h b/drivers/net/ethernet/freescale/enetc/enetc_hw.h
index 587974862f48..6314051bc6c1 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_hw.h
+++ b/drivers/net/ethernet/freescale/enetc/enetc_hw.h
@@ -567,6 +567,9 @@ enum bdcr_cmd_class {
 	BDCR_CMD_RFS,
 	BDCR_CMD_PORT_GCL,
 	BDCR_CMD_RECV_CLASSIFIER,
+	BDCR_CMD_STREAM_IDENTIFY,
+	BDCR_CMD_STREAM_FILTER,
+	BDCR_CMD_STREAM_GCL,
 	__BDCR_CMD_MAX_LEN,
 	BDCR_CMD_MAX_LEN = __BDCR_CMD_MAX_LEN - 1,
 };
@@ -598,13 +601,152 @@ struct tgs_gcl_data {
 	struct gce	entry[];
 };
 
+/* class 7, command 0, Stream Identity Entry Configuration */
+struct streamid_conf {
+	__le32	stream_handle;	/* init gate value */
+	__le32	iports;
+		u8	id_type;
+		u8	oui[3];
+		u8	res[3];
+		u8	en;
+};
+
+#define ENETC_CBDR_SID_VID_MASK 0xfff
+#define ENETC_CBDR_SID_VIDM BIT(12)
+#define ENETC_CBDR_SID_TG_MASK 0xc000
+/* streamid_conf address point to this data space */
+struct streamid_data {
+	union {
+		u8 dmac[6];
+		u8 smac[6];
+	};
+	u16     vid_vidm_tg;
+};
+
+#define ENETC_CBDR_SFI_PRI_MASK 0x7
+#define ENETC_CBDR_SFI_PRIM		BIT(3)
+#define ENETC_CBDR_SFI_BLOV		BIT(4)
+#define ENETC_CBDR_SFI_BLEN		BIT(5)
+#define ENETC_CBDR_SFI_MSDUEN	BIT(6)
+#define ENETC_CBDR_SFI_FMITEN	BIT(7)
+#define ENETC_CBDR_SFI_ENABLE	BIT(7)
+/* class 8, command 0, Stream Filter Instance, Short Format */
+struct sfi_conf {
+	__le32	stream_handle;
+		u8	multi;
+		u8	res[2];
+		u8	sthm;
+	/* Max Service Data Unit or Flow Meter Instance Table index.
+	 * Depending on the value of FLT this represents either Max
+	 * Service Data Unit (max frame size) allowed by the filter
+	 * entry or is an index into the Flow Meter Instance table
+	 * index identifying the policer which will be used to police
+	 * it.
+	 */
+	__le16	fm_inst_table_index;
+	__le16	msdu;
+	__le16	sg_inst_table_index;
+		u8	res1[2];
+	__le32	input_ports;
+		u8	res2[3];
+		u8	en;
+};
+
+/* class 8, command 2 stream Filter Instance status query short format
+ * command no need structure define
+ * Stream Filter Instance Query Statistics Response data
+ */
+struct sfi_counter_data {
+	u32 matchl;
+	u32 matchh;
+	u32 msdu_dropl;
+	u32 msdu_droph;
+	u32 stream_gate_dropl;
+	u32 stream_gate_droph;
+	u32 flow_meter_dropl;
+	u32 flow_meter_droph;
+};
+
+#define ENETC_CBDR_SGI_OIPV_MASK 0x7
+#define ENETC_CBDR_SGI_OIPV_EN	BIT(3)
+#define ENETC_CBDR_SGI_CGTST	BIT(6)
+#define ENETC_CBDR_SGI_OGTST	BIT(7)
+#define ENETC_CBDR_SGI_CFG_CHG  BIT(1)
+#define ENETC_CBDR_SGI_CFG_PND  BIT(2)
+#define ENETC_CBDR_SGI_OEX		BIT(4)
+#define ENETC_CBDR_SGI_OEXEN	BIT(5)
+#define ENETC_CBDR_SGI_IRX		BIT(6)
+#define ENETC_CBDR_SGI_IRXEN	BIT(7)
+#define ENETC_CBDR_SGI_ACLLEN_MASK 0x3
+#define ENETC_CBDR_SGI_OCLLEN_MASK 0xc
+#define	ENETC_CBDR_SGI_EN		BIT(7)
+/* class 9, command 0, Stream Gate Instance Table, Short Format
+ * class 9, command 2, Stream Gate Instance Table entry query write back
+ * Short Format
+ */
+struct sgi_table {
+	u8	res[8];
+	u8	oipv;
+	u8	res0[2];
+	u8	ocgtst;
+	u8	res1[7];
+	u8	gset;
+	u8	oacl_len;
+	u8	res2[2];
+	u8	en;
+};
+
+#define ENETC_CBDR_SGI_AIPV_MASK 0x7
+#define ENETC_CBDR_SGI_AIPV_EN	BIT(3)
+#define ENETC_CBDR_SGI_AGTST	BIT(7)
+
+/* class 9, command 1, Stream Gate Control List, Long Format */
+struct sgcl_conf {
+	u8	aipv;
+	u8	res[2];
+	u8	agtst;
+	u8	res1[4];
+	union {
+		struct {
+			u8 res2[4];
+			u8 acl_len;
+			u8 res3[3];
+		};
+		u8 cct[8]; /* Config change time */
+	};
+};
+
+#define ENETC_CBDR_SGL_IOMEN	BIT(0)
+#define ENETC_CBDR_SGL_IPVEN	BIT(3)
+#define ENETC_CBDR_SGL_GTST		BIT(4)
+#define ENETC_CBDR_SGL_IPV_MASK 0xe
+/* Stream Gate Control List Entry */
+struct sgce {
+	u32	interval;
+	u8	msdu[3];
+	u8	multi;
+};
+
+/* stream control list class 9 , cmd 1 data buffer */
+struct sgcl_data {
+	u32		btl;
+	u32		bth;
+	u32		ct;
+	u32		cte;
+	struct sgce	sgcl[0];
+};
+
 struct enetc_cbd {
 	union{
+		struct sfi_conf sfi_conf;
+		struct sgi_table sgi_table;
 		struct {
 			__le32	addr[2];
 			union {
 				__le32	opt[4];
 				struct tgs_gcl_conf	gcl_conf;
+				struct streamid_conf	sid_set;
+				struct sgcl_conf	sgcl_conf;
 			};
 		};	/* Long format */
 		__le32 data[6];
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_pf.c b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
index cef9fbfdb056..824d211ec00f 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_pf.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_pf.c
@@ -727,12 +727,10 @@ static void enetc_pf_netdev_setup(struct enetc_si *si, struct net_device *ndev,
 	if (si->hw_features & ENETC_SI_F_QBV)
 		priv->active_offloads |= ENETC_F_QBV;
 
-	if (si->hw_features & ENETC_SI_F_PSFP) {
+	if (si->hw_features & ENETC_SI_F_PSFP && !enetc_psfp_enable(priv)) {
 		priv->active_offloads |= ENETC_F_QCI;
 		ndev->features |= NETIF_F_HW_TC;
 		ndev->hw_features |= NETIF_F_HW_TC;
-		enetc_get_max_cap(priv);
-		enetc_psfp_enable(&si->hw);
 	}
 
 	/* pick up primary MAC address from SI */
diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c
index 0c6bf3a55a9a..30fca29b2739 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c
@@ -5,6 +5,9 @@
 
 #include <net/pkt_sched.h>
 #include <linux/math64.h>
+#include <linux/refcount.h>
+#include <net/pkt_cls.h>
+#include <net/tc_act/tc_gate.h>
 
 static u16 enetc_get_max_gcl_len(struct enetc_hw *hw)
 {
@@ -331,3 +334,1098 @@ int enetc_setup_tc_txtime(struct net_device *ndev, void *type_data)
 
 	return 0;
 }
+
+enum streamid_type {
+	STREAMID_TYPE_RESERVED = 0,
+	STREAMID_TYPE_NULL,
+	STREAMID_TYPE_SMAC,
+};
+
+enum streamid_vlan_tagged {
+	STREAMID_VLAN_RESERVED = 0,
+	STREAMID_VLAN_TAGGED,
+	STREAMID_VLAN_UNTAGGED,
+	STREAMID_VLAN_ALL,
+};
+
+#define ENETC_PSFP_WILDCARD -1
+#define HANDLE_OFFSET 100
+
+enum forward_type {
+	FILTER_ACTION_TYPE_PSFP = BIT(0),
+	FILTER_ACTION_TYPE_ACL = BIT(1),
+	FILTER_ACTION_TYPE_BOTH = GENMASK(1, 0),
+};
+
+/* This is for limit output type for input actions */
+struct actions_fwd {
+	u64 actions;
+	u64 keys;	/* include the must needed keys */
+	enum forward_type output;
+};
+
+struct psfp_streamfilter_counters {
+	u64 matching_frames_count;
+	u64 passing_frames_count;
+	u64 not_passing_frames_count;
+	u64 passing_sdu_count;
+	u64 not_passing_sdu_count;
+	u64 red_frames_count;
+};
+
+struct enetc_streamid {
+	u32 index;
+	union {
+		u8 src_mac[6];
+		u8 dst_mac[6];
+	};
+	u8 filtertype;
+	u16 vid;
+	u8 tagged;
+	s32 handle;
+};
+
+struct enetc_psfp_filter {
+	u32 index;
+	s32 handle;
+	s8 prio;
+	u32 gate_id;
+	s32 meter_id;
+	refcount_t refcount;
+	struct hlist_node node;
+};
+
+struct enetc_psfp_gate {
+	u32 index;
+	s8 init_ipv;
+	u64 basetime;
+	u64 cycletime;
+	u64 cycletimext;
+	u32 num_entries;
+	refcount_t refcount;
+	struct hlist_node node;
+	struct action_gate_entry entries[0];
+};
+
+struct enetc_stream_filter {
+	struct enetc_streamid sid;
+	u32 sfi_index;
+	u32 sgi_index;
+	struct flow_stats stats;
+	struct hlist_node node;
+};
+
+struct enetc_psfp {
+	unsigned long dev_bitmap;
+	unsigned long *psfp_sfi_bitmap;
+	struct hlist_head stream_list;
+	struct hlist_head psfp_filter_list;
+	struct hlist_head psfp_gate_list;
+	spinlock_t psfp_lock; /* spinlock for the struct enetc_psfp r/w */
+};
+
+struct actions_fwd enetc_act_fwd[] = {
+	{
+		BIT(FLOW_ACTION_GATE),
+		BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS),
+		FILTER_ACTION_TYPE_PSFP
+	},
+	/* example for ACL actions */
+	{
+		BIT(FLOW_ACTION_DROP),
+		0,
+		FILTER_ACTION_TYPE_ACL
+	}
+};
+
+static struct enetc_psfp epsfp = {
+	.psfp_sfi_bitmap = NULL,
+};
+
+static LIST_HEAD(enetc_block_cb_list);
+
+static inline int enetc_get_port(struct enetc_ndev_priv *priv)
+{
+	return priv->si->pdev->devfn & 0x7;
+}
+
+/* Stream Identity Entry Set Descriptor */
+static int enetc_streamid_hw_set(struct enetc_ndev_priv *priv,
+				 struct enetc_streamid *sid,
+				 u8 enable)
+{
+	struct enetc_cbd cbd = {.cmd = 0};
+	struct streamid_data *si_data;
+	struct streamid_conf *si_conf;
+	u16 data_size;
+	dma_addr_t dma;
+	int err;
+
+	if (sid->index >= priv->psfp_cap.max_streamid)
+		return -EINVAL;
+
+	if (sid->filtertype != STREAMID_TYPE_NULL &&
+	    sid->filtertype != STREAMID_TYPE_SMAC)
+		return -EOPNOTSUPP;
+
+	/* Disable operation before enable */
+	cbd.index = cpu_to_le16((u16)sid->index);
+	cbd.cls = BDCR_CMD_STREAM_IDENTIFY;
+	cbd.status_flags = 0;
+
+	data_size = sizeof(struct streamid_data);
+	si_data = kzalloc(data_size, __GFP_DMA | GFP_KERNEL);
+	cbd.length = cpu_to_le16(data_size);
+
+	dma = dma_map_single(&priv->si->pdev->dev, si_data,
+			     data_size, DMA_FROM_DEVICE);
+	if (dma_mapping_error(&priv->si->pdev->dev, dma)) {
+		netdev_err(priv->si->ndev, "DMA mapping failed!\n");
+		kfree(si_data);
+		return -ENOMEM;
+	}
+
+	cbd.addr[0] = lower_32_bits(dma);
+	cbd.addr[1] = upper_32_bits(dma);
+	memset(si_data->dmac, 0xff, ETH_ALEN);
+	si_data->vid_vidm_tg =
+		cpu_to_le16(ENETC_CBDR_SID_VID_MASK
+			    + ((0x3 << 14) | ENETC_CBDR_SID_VIDM));
+
+	si_conf = &cbd.sid_set;
+	/* Only one port supported for one entry, set itself */
+	si_conf->iports = 1 << enetc_get_port(priv);
+	si_conf->id_type = 1;
+	si_conf->oui[2] = 0x0;
+	si_conf->oui[1] = 0x80;
+	si_conf->oui[0] = 0xC2;
+
+	err = enetc_send_cmd(priv->si, &cbd);
+	if (err)
+		return -EINVAL;
+
+	if (!enable) {
+		kfree(si_data);
+		return 0;
+	}
+
+	/* Enable the entry overwrite again incase space flushed by hardware */
+	memset(&cbd, 0, sizeof(cbd));
+
+	cbd.index = cpu_to_le16((u16)sid->index);
+	cbd.cmd = 0;
+	cbd.cls = BDCR_CMD_STREAM_IDENTIFY;
+	cbd.status_flags = 0;
+
+	si_conf->en = 0x80;
+	si_conf->stream_handle = cpu_to_le32(sid->handle);
+	si_conf->iports = 1 << enetc_get_port(priv);
+	si_conf->id_type = sid->filtertype;
+	si_conf->oui[2] = 0x0;
+	si_conf->oui[1] = 0x80;
+	si_conf->oui[0] = 0xC2;
+
+	memset(si_data, 0, data_size);
+
+	cbd.length = cpu_to_le16(data_size);
+
+	cbd.addr[0] = lower_32_bits(dma);
+	cbd.addr[1] = upper_32_bits(dma);
+
+	/* VIDM default to be 1.
+	 * VID Match. If set (b1) then the VID must match, otherwise
+	 * any VID is considered a match. VIDM setting is only used
+	 * when TG is set to b01.
+	 */
+	if (si_conf->id_type == STREAMID_TYPE_NULL) {
+		ether_addr_copy(si_data->dmac, sid->dst_mac);
+		si_data->vid_vidm_tg =
+		cpu_to_le16((sid->vid & ENETC_CBDR_SID_VID_MASK) +
+			    ((((u16)(sid->tagged) & 0x3) << 14)
+			     | ENETC_CBDR_SID_VIDM));
+	} else if (si_conf->id_type == STREAMID_TYPE_SMAC) {
+		ether_addr_copy(si_data->smac, sid->src_mac);
+		si_data->vid_vidm_tg =
+		cpu_to_le16((sid->vid & ENETC_CBDR_SID_VID_MASK) +
+			    ((((u16)(sid->tagged) & 0x3) << 14)
+			     | ENETC_CBDR_SID_VIDM));
+	}
+
+	err = enetc_send_cmd(priv->si, &cbd);
+	kfree(si_data);
+
+	return err;
+}
+
+/* Stream Filter Instance Set Descriptor */
+static int enetc_streamfilter_hw_set(struct enetc_ndev_priv *priv,
+				     struct enetc_psfp_filter *sfi,
+				     u8 enable)
+{
+	struct enetc_cbd cbd = {.cmd = 0};
+	struct sfi_conf *sfi_config;
+
+	cbd.index = cpu_to_le16(sfi->index);
+	cbd.cls = BDCR_CMD_STREAM_FILTER;
+	cbd.status_flags = 0x80;
+	cbd.length = cpu_to_le16(1);
+
+	sfi_config = &cbd.sfi_conf;
+	if (!enable)
+		goto exit;
+
+	sfi_config->en = 0x80;
+
+	if (sfi->handle >= 0) {
+		sfi_config->stream_handle =
+			cpu_to_le32(sfi->handle);
+		sfi_config->sthm |= 0x80;
+	}
+
+	sfi_config->sg_inst_table_index = cpu_to_le16(sfi->gate_id);
+	sfi_config->input_ports = 1 << enetc_get_port(priv);
+
+	/* The priority value which may be matched against the
+	 * frame’s priority value to determine a match for this entry.
+	 */
+	if (sfi->prio >= 0)
+		sfi_config->multi |= (sfi->prio & 0x7) | 0x8;
+
+	/* Filter Type. Identifies the contents of the MSDU/FM_INST_INDEX
+	 * field as being either an MSDU value or an index into the Flow
+	 * Meter Instance table.
+	 * TODO: no limit max sdu
+	 */
+
+	if (sfi->meter_id >= 0) {
+		sfi_config->fm_inst_table_index = cpu_to_le16(sfi->meter_id);
+		sfi_config->multi |= 0x80;
+	}
+
+exit:
+	return enetc_send_cmd(priv->si, &cbd);
+}
+
+static int enetc_streamcounter_hw_get(struct enetc_ndev_priv *priv,
+				      u32 index,
+				      struct psfp_streamfilter_counters *cnt)
+{
+	struct enetc_cbd cbd = { .cmd = 2 };
+	struct sfi_counter_data *data_buf;
+	dma_addr_t dma;
+	u16 data_size;
+	int err;
+
+	cbd.index = cpu_to_le16((u16)index);
+	cbd.cmd = 2;
+	cbd.cls = BDCR_CMD_STREAM_FILTER;
+	cbd.status_flags = 0;
+
+	data_size = sizeof(struct sfi_counter_data);
+	data_buf = kzalloc(data_size, __GFP_DMA | GFP_KERNEL);
+	if (!data_buf)
+		return -ENOMEM;
+
+	dma = dma_map_single(&priv->si->pdev->dev, data_buf,
+			     data_size, DMA_FROM_DEVICE);
+	if (dma_mapping_error(&priv->si->pdev->dev, dma)) {
+		netdev_err(priv->si->ndev, "DMA mapping failed!\n");
+		err = -ENOMEM;
+		goto exit;
+	}
+	cbd.addr[0] = lower_32_bits(dma);
+	cbd.addr[1] = upper_32_bits(dma);
+
+	cbd.length = cpu_to_le16(data_size);
+
+	err = enetc_send_cmd(priv->si, &cbd);
+	if (err)
+		goto exit;
+
+	cnt->matching_frames_count =
+			((u64)le32_to_cpu(data_buf->matchh) << 32)
+			+ data_buf->matchl;
+
+	cnt->not_passing_sdu_count =
+			((u64)le32_to_cpu(data_buf->msdu_droph) << 32)
+			+ data_buf->msdu_dropl;
+
+	cnt->passing_sdu_count = cnt->matching_frames_count
+				- cnt->not_passing_sdu_count;
+
+	cnt->not_passing_frames_count =
+		((u64)le32_to_cpu(data_buf->stream_gate_droph) << 32)
+		+ le32_to_cpu(data_buf->stream_gate_dropl);
+
+	cnt->passing_frames_count = cnt->matching_frames_count
+				- cnt->not_passing_sdu_count
+				- cnt->not_passing_frames_count;
+
+	cnt->red_frames_count =
+		((u64)le32_to_cpu(data_buf->flow_meter_droph) << 32)
+		+ le32_to_cpu(data_buf->flow_meter_dropl);
+
+exit:
+	kfree(data_buf);
+	return err;
+}
+
+static u64 get_ptp_now(struct enetc_hw *hw)
+{
+	u64 now_lo, now_hi, now;
+
+	now_lo = enetc_rd(hw, ENETC_SICTR0);
+	now_hi = enetc_rd(hw, ENETC_SICTR1);
+	now = now_lo | now_hi << 32;
+
+	return now;
+}
+
+static int get_start_ns(u64 now, u64 cycle, u64 *start)
+{
+	u64 n;
+
+	if (!cycle)
+		return -EFAULT;
+
+	n = div64_u64(now, cycle);
+
+	*start = (n + 1) * cycle;
+
+	return 0;
+}
+
+/* Stream Gate Instance Set Descriptor */
+static int enetc_streamgate_hw_set(struct enetc_ndev_priv *priv,
+				   struct enetc_psfp_gate *sgi,
+				   u8 enable)
+{
+	struct enetc_cbd cbd = { .cmd = 0 };
+	struct sgi_table *sgi_config;
+	struct sgcl_conf *sgcl_config;
+	struct sgcl_data *sgcl_data;
+	struct sgce *sgce;
+	dma_addr_t dma;
+	u16 data_size;
+	int err, i;
+	u64 now;
+
+	cbd.index = cpu_to_le16(sgi->index);
+	cbd.cmd = 0;
+	cbd.cls = BDCR_CMD_STREAM_GCL;
+	cbd.status_flags = 0x80;
+
+	/* disable */
+	if (!enable)
+		return enetc_send_cmd(priv->si, &cbd);
+
+	if (!sgi->num_entries)
+		return 0;
+
+	if (sgi->num_entries > priv->psfp_cap.max_psfp_gatelist ||
+	    !sgi->cycletime)
+		return -EINVAL;
+
+	/* enable */
+	sgi_config = &cbd.sgi_table;
+
+	/* Keep open before gate list start */
+	sgi_config->ocgtst = 0x80;
+
+	sgi_config->oipv = (sgi->init_ipv < 0) ?
+				0x0 : ((sgi->init_ipv & 0x7) | 0x8);
+
+	sgi_config->en = 0x80;
+
+	/* Basic config */
+	err = enetc_send_cmd(priv->si, &cbd);
+	if (err)
+		return -EINVAL;
+
+	memset(&cbd, 0, sizeof(cbd));
+
+	cbd.index = cpu_to_le16(sgi->index);
+	cbd.cmd = 1;
+	cbd.cls = BDCR_CMD_STREAM_GCL;
+	cbd.status_flags = 0;
+
+	sgcl_config = &cbd.sgcl_conf;
+
+	sgcl_config->acl_len = (sgi->num_entries - 1) & 0x3;
+
+	data_size = struct_size(sgcl_data, sgcl, sgi->num_entries);
+
+	sgcl_data = kzalloc(data_size, __GFP_DMA | GFP_KERNEL);
+	if (!sgcl_data)
+		return -ENOMEM;
+
+	cbd.length = cpu_to_le16(data_size);
+
+	dma = dma_map_single(&priv->si->pdev->dev,
+			     sgcl_data, data_size,
+			     DMA_FROM_DEVICE);
+	if (dma_mapping_error(&priv->si->pdev->dev, dma)) {
+		netdev_err(priv->si->ndev, "DMA mapping failed!\n");
+		kfree(sgcl_data);
+		return -ENOMEM;
+	}
+
+	cbd.addr[0] = lower_32_bits(dma);
+	cbd.addr[1] = upper_32_bits(dma);
+
+	sgce = &sgcl_data->sgcl[0];
+
+	sgcl_config->agtst = 0x80;
+
+	sgcl_data->ct = cpu_to_le32(sgi->cycletime);
+	sgcl_data->cte = cpu_to_le32(sgi->cycletimext);
+
+	if (sgi->init_ipv >= 0)
+		sgcl_config->aipv = (sgi->init_ipv & 0x7) | 0x8;
+
+	for (i = 0; i < sgi->num_entries; i++) {
+		struct action_gate_entry *from = &sgi->entries[i];
+		struct sgce *to = &sgce[i];
+
+		if (from->gate_state)
+			to->multi |= 0x10;
+
+		if (from->ipv >= 0)
+			to->multi |= ((from->ipv & 0x7) << 5) | 0x08;
+
+		if (from->maxoctets >= 0) {
+			to->multi |= 0x01;
+			to->msdu[0] = from->maxoctets & 0xFF;
+			to->msdu[1] = (from->maxoctets >> 8) & 0xFF;
+			to->msdu[2] = (from->maxoctets >> 16) & 0xFF;
+		}
+
+		to->interval = cpu_to_le32(from->interval);
+	}
+
+	/* If basetime is less than now, calculate start time */
+	now = get_ptp_now(&priv->si->hw);
+
+	if (sgi->basetime < now) {
+		u64 start;
+
+		err = get_start_ns(now, sgi->cycletime, &start);
+		if (err)
+			goto exit;
+		sgcl_data->btl = cpu_to_le32(lower_32_bits(start));
+		sgcl_data->bth = cpu_to_le32(upper_32_bits(start));
+	} else {
+		u32 hi, lo;
+
+		hi = upper_32_bits(sgi->basetime);
+		lo = lower_32_bits(sgi->basetime);
+		sgcl_data->bth = cpu_to_le32(hi);
+		sgcl_data->btl = cpu_to_le32(lo);
+	}
+
+	err = enetc_send_cmd(priv->si, &cbd);
+
+exit:
+	kfree(sgcl_data);
+
+	return err;
+}
+
+static struct enetc_stream_filter *enetc_get_stream_by_index(u32 index)
+{
+	struct enetc_stream_filter *f;
+
+	hlist_for_each_entry(f, &epsfp.stream_list, node)
+		if (f->sid.index == index)
+			return f;
+
+	return NULL;
+}
+
+static struct enetc_psfp_gate *enetc_get_gate_by_index(u32 index)
+{
+	struct enetc_psfp_gate *g;
+
+	hlist_for_each_entry(g, &epsfp.psfp_gate_list, node)
+		if (g->index == index)
+			return g;
+
+	return NULL;
+}
+
+static struct enetc_psfp_filter *enetc_get_filter_by_index(u32 index)
+{
+	struct enetc_psfp_filter *s;
+
+	hlist_for_each_entry(s, &epsfp.psfp_filter_list, node)
+		if (s->index == index)
+			return s;
+
+	return NULL;
+}
+
+static struct enetc_psfp_filter
+	*enetc_psfp_check_sfi(struct enetc_psfp_filter *sfi)
+{
+	struct enetc_psfp_filter *s;
+
+	hlist_for_each_entry(s, &epsfp.psfp_filter_list, node)
+		if (s->gate_id == sfi->gate_id &&
+		    s->prio == sfi->prio &&
+		    s->meter_id == sfi->meter_id)
+			return s;
+
+	return NULL;
+}
+
+static int enetc_get_free_index(struct enetc_ndev_priv *priv)
+{
+	u32 max_size = priv->psfp_cap.max_psfp_filter;
+	unsigned long index;
+
+	index = find_first_zero_bit(epsfp.psfp_sfi_bitmap, max_size);
+	if (index == max_size)
+		return -1;
+
+	return index;
+}
+
+static void stream_filter_unref(struct enetc_ndev_priv *priv, u32 index)
+{
+	struct enetc_psfp_filter *sfi;
+	u8 z;
+
+	sfi = enetc_get_filter_by_index(index);
+	WARN_ON(!sfi);
+	z = refcount_dec_and_test(&sfi->refcount);
+
+	if (z) {
+		enetc_streamfilter_hw_set(priv, sfi, false);
+		hlist_del(&sfi->node);
+		kfree(sfi);
+		clear_bit(sfi->index, epsfp.psfp_sfi_bitmap);
+	}
+}
+
+static void stream_gate_unref(struct enetc_ndev_priv *priv, u32 index)
+{
+	struct enetc_psfp_gate *sgi;
+	u8 z;
+
+	sgi = enetc_get_gate_by_index(index);
+	WARN_ON(!sgi);
+	z = refcount_dec_and_test(&sgi->refcount);
+	if (z) {
+		enetc_streamgate_hw_set(priv, sgi, false);
+		hlist_del(&sgi->node);
+		kfree(sgi);
+	}
+}
+
+static void remove_one_chain(struct enetc_ndev_priv *priv,
+			     struct enetc_stream_filter *filter)
+{
+	stream_gate_unref(priv, filter->sgi_index);
+	stream_filter_unref(priv, filter->sfi_index);
+
+	hlist_del(&filter->node);
+	kfree(filter);
+}
+
+static int enetc_psfp_hw_set(struct enetc_ndev_priv *priv,
+			     struct enetc_streamid *sid,
+			     struct enetc_psfp_filter *sfi,
+			     struct enetc_psfp_gate *sgi)
+{
+	int err;
+
+	err = enetc_streamid_hw_set(priv, sid, true);
+	if (err)
+		return err;
+
+	if (sfi) {
+		err = enetc_streamfilter_hw_set(priv, sfi, true);
+		if (err)
+			goto revert_sid;
+	}
+
+	err = enetc_streamgate_hw_set(priv, sgi, true);
+	if (err)
+		goto revert_sfi;
+
+	return 0;
+
+revert_sfi:
+	if (sfi)
+		enetc_streamfilter_hw_set(priv, sfi, false);
+revert_sid:
+	enetc_streamid_hw_set(priv, sid, false);
+	return err;
+}
+
+struct actions_fwd *enetc_check_flow_actions(u64 acts, unsigned int inputkeys)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(enetc_act_fwd); i++)
+		if (acts == enetc_act_fwd[i].actions &&
+		    inputkeys & enetc_act_fwd[i].keys)
+			return &enetc_act_fwd[i];
+
+	return NULL;
+}
+
+static int enetc_psfp_parse_clsflower(struct enetc_ndev_priv *priv,
+				      struct flow_cls_offload *f)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+	struct netlink_ext_ack *extack = f->common.extack;
+	struct enetc_stream_filter *filter, *old_filter;
+	struct enetc_psfp_filter *sfi, *old_sfi;
+	struct enetc_psfp_gate *sgi, *old_sgi;
+	struct flow_action_entry *entry;
+	struct action_gate_entry *e;
+	u8 sfi_overwrite = 0;
+	int entries_size;
+	int i, err;
+
+	if (f->common.chain_index >= priv->psfp_cap.max_streamid) {
+		NL_SET_ERR_MSG_MOD(extack, "No Stream identify resource!");
+		return -ENOSPC;
+	}
+
+	flow_action_for_each(i, entry, &rule->action)
+		if (entry->id == FLOW_ACTION_GATE)
+			break;
+
+	if (entry->id != FLOW_ACTION_GATE)
+		return -EINVAL;
+
+	filter = kzalloc(sizeof(*filter), GFP_KERNEL);
+	if (!filter)
+		return -ENOMEM;
+
+	filter->sid.index = f->common.chain_index;
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ETH_ADDRS)) {
+		struct flow_match_eth_addrs match;
+
+		flow_rule_match_eth_addrs(rule, &match);
+
+		if (!is_zero_ether_addr(match.mask->dst) &&
+		    !is_zero_ether_addr(match.mask->src)) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "Cannot match on both source and destination MAC");
+			goto free_filter;
+		}
+
+		if (!is_zero_ether_addr(match.mask->dst)) {
+			if (!is_broadcast_ether_addr(match.mask->dst)) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "Masked matching on destination MAC not supported");
+				goto free_filter;
+			}
+			ether_addr_copy(filter->sid.dst_mac, match.key->dst);
+			filter->sid.filtertype = STREAMID_TYPE_NULL;
+		}
+
+		if (!is_zero_ether_addr(match.mask->src)) {
+			if (!is_broadcast_ether_addr(match.mask->src)) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "Masked matching on source MAC not supported");
+				goto free_filter;
+			}
+			ether_addr_copy(filter->sid.src_mac, match.key->src);
+			filter->sid.filtertype = STREAMID_TYPE_SMAC;
+		}
+	} else {
+		NL_SET_ERR_MSG_MOD(extack, "Unsupported, must include ETH_ADDRS");
+		goto free_filter;
+	}
+
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
+		struct flow_match_vlan match;
+
+		flow_rule_match_vlan(rule, &match);
+		if (match.mask->vlan_priority) {
+			if (match.mask->vlan_priority !=
+			    (VLAN_PRIO_MASK >> VLAN_PRIO_SHIFT)) {
+				NL_SET_ERR_MSG_MOD(extack, "Only full mask is supported for VLAN priority");
+				err = -EINVAL;
+				goto free_filter;
+			}
+		}
+
+		if (match.mask->vlan_id) {
+			if (match.mask->vlan_id != VLAN_VID_MASK) {
+				NL_SET_ERR_MSG_MOD(extack, "Only full mask is supported for VLAN id");
+				err = -EINVAL;
+				goto free_filter;
+			}
+
+			filter->sid.vid = match.key->vlan_id;
+			if (!filter->sid.vid)
+				filter->sid.tagged = STREAMID_VLAN_UNTAGGED;
+			else
+				filter->sid.tagged = STREAMID_VLAN_TAGGED;
+		}
+	} else {
+		filter->sid.tagged = STREAMID_VLAN_ALL;
+	}
+
+	/* parsing gate action */
+	if (entry->gate.index >= priv->psfp_cap.max_psfp_gate) {
+		NL_SET_ERR_MSG_MOD(extack, "No Stream Gate resource!");
+		err = -ENOSPC;
+		goto free_filter;
+	}
+
+	if (entry->gate.num_entries >= priv->psfp_cap.max_psfp_gatelist) {
+		NL_SET_ERR_MSG_MOD(extack, "No Stream Gate resource!");
+		err = -ENOSPC;
+		goto free_filter;
+	}
+
+	entries_size = struct_size(sgi, entries, entry->gate.num_entries);
+	sgi = kzalloc(entries_size, GFP_KERNEL);
+	if (!sgi) {
+		err = -ENOMEM;
+		goto free_filter;
+	}
+
+	refcount_set(&sgi->refcount, 1);
+	sgi->index = entry->gate.index;
+	sgi->init_ipv = entry->gate.prio;
+	sgi->basetime = entry->gate.basetime;
+	sgi->cycletime = entry->gate.cycletime;
+	sgi->num_entries = entry->gate.num_entries;
+
+	e = sgi->entries;
+	for (i = 0; i < entry->gate.num_entries; i++) {
+		e[i].gate_state = entry->gate.entries[i].gate_state;
+		e[i].interval = entry->gate.entries[i].interval;
+		e[i].ipv = entry->gate.entries[i].ipv;
+		e[i].maxoctets = entry->gate.entries[i].maxoctets;
+	}
+
+	filter->sgi_index = sgi->index;
+
+	sfi = kzalloc(sizeof(*sfi), GFP_KERNEL);
+	if (!sfi) {
+		err = -ENOMEM;
+		goto free_gate;
+	}
+
+	refcount_set(&sfi->refcount, 1);
+	sfi->gate_id = sgi->index;
+
+	/* flow meter not support yet */
+	sfi->meter_id = ENETC_PSFP_WILDCARD;
+
+	/* prio ref the filter prio */
+	if (f->common.prio && f->common.prio <= BIT(3))
+		sfi->prio = f->common.prio - 1;
+	else
+		sfi->prio = ENETC_PSFP_WILDCARD;
+
+	old_sfi = enetc_psfp_check_sfi(sfi);
+	if (!old_sfi) {
+		int index;
+
+		index = enetc_get_free_index(priv);
+		if (sfi->handle < 0) {
+			NL_SET_ERR_MSG_MOD(extack, "No Stream Filter resource!");
+			err = -ENOSPC;
+			goto free_sfi;
+		}
+
+		sfi->index = index;
+		sfi->handle = index + HANDLE_OFFSET;
+		/* Update the stream filter handle also */
+		filter->sid.handle = sfi->handle;
+		filter->sfi_index = sfi->index;
+		sfi_overwrite = 0;
+	} else {
+		filter->sfi_index = old_sfi->index;
+		filter->sid.handle = old_sfi->handle;
+		sfi_overwrite = 1;
+	}
+
+	err = enetc_psfp_hw_set(priv, &filter->sid,
+				sfi_overwrite ? NULL : sfi, sgi);
+	if (err)
+		goto free_sfi;
+
+	spin_lock(&epsfp.psfp_lock);
+	/* Remove the old node if exist and update with a new node */
+	old_sgi = enetc_get_gate_by_index(filter->sgi_index);
+	if (old_sgi) {
+		refcount_set(&sgi->refcount,
+			     refcount_read(&old_sgi->refcount) + 1);
+		hlist_del(&old_sgi->node);
+		kfree(old_sgi);
+	}
+
+	hlist_add_head(&sgi->node, &epsfp.psfp_gate_list);
+
+	if (!old_sfi) {
+		hlist_add_head(&sfi->node, &epsfp.psfp_filter_list);
+		set_bit(sfi->index, epsfp.psfp_sfi_bitmap);
+	} else {
+		kfree(sfi);
+		refcount_inc(&old_sfi->refcount);
+	}
+
+	old_filter = enetc_get_stream_by_index(filter->sid.index);
+	if (old_filter)
+		remove_one_chain(priv, old_filter);
+
+	filter->stats.lastused = jiffies;
+	hlist_add_head(&filter->node, &epsfp.stream_list);
+
+	spin_unlock(&epsfp.psfp_lock);
+
+	return 0;
+
+free_sfi:
+	kfree(sfi);
+free_gate:
+	kfree(sgi);
+free_filter:
+	kfree(filter);
+
+	return err;
+}
+
+static int enetc_config_clsflower(struct enetc_ndev_priv *priv,
+				  struct flow_cls_offload *cls_flower)
+{
+	struct flow_rule *rule = flow_cls_offload_flow_rule(cls_flower);
+	struct netlink_ext_ack *extack = cls_flower->common.extack;
+	struct flow_dissector *dissector = rule->match.dissector;
+	struct flow_action *action = &rule->action;
+	struct flow_action_entry *entry;
+	struct actions_fwd *fwd;
+	u64 actions = 0;
+	int i, err;
+
+	if (!flow_action_has_entries(action)) {
+		NL_SET_ERR_MSG_MOD(extack, "At least one action is needed");
+		return -EINVAL;
+	}
+
+	flow_action_for_each(i, entry, action)
+		actions |= BIT(entry->id);
+
+	fwd = enetc_check_flow_actions(actions, dissector->used_keys);
+	if (!fwd) {
+		NL_SET_ERR_MSG_MOD(extack, "Unsupported filter type!");
+		return -EOPNOTSUPP;
+	}
+
+	if (fwd->output & FILTER_ACTION_TYPE_PSFP) {
+		err = enetc_psfp_parse_clsflower(priv, cls_flower);
+		if (err) {
+			NL_SET_ERR_MSG_MOD(extack, "Invalid PSFP inputs");
+			return err;
+		}
+	} else {
+		NL_SET_ERR_MSG_MOD(extack, "Unsupported actions");
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int enetc_psfp_destroy_clsflower(struct enetc_ndev_priv *priv,
+					struct flow_cls_offload *f)
+{
+	struct enetc_stream_filter *filter;
+	struct netlink_ext_ack *extack = f->common.extack;
+	int err;
+
+	if (f->common.chain_index >= priv->psfp_cap.max_streamid) {
+		NL_SET_ERR_MSG_MOD(extack, "No Stream identify resource!");
+		return -ENOSPC;
+	}
+
+	filter = enetc_get_stream_by_index(f->common.chain_index);
+	if (!filter)
+		return -EINVAL;
+
+	err = enetc_streamid_hw_set(priv, &filter->sid, false);
+	if (err)
+		return err;
+
+	remove_one_chain(priv, filter);
+
+	return 0;
+}
+
+static int enetc_destroy_clsflower(struct enetc_ndev_priv *priv,
+				   struct flow_cls_offload *f)
+{
+	return enetc_psfp_destroy_clsflower(priv, f);
+}
+
+static int enetc_psfp_get_stats(struct enetc_ndev_priv *priv,
+				struct flow_cls_offload *f)
+{
+	struct psfp_streamfilter_counters counters = {};
+	struct enetc_stream_filter *filter;
+	struct flow_stats stats = {};
+	int err;
+
+	filter = enetc_get_stream_by_index(f->common.chain_index);
+	if (!filter)
+		return -EINVAL;
+
+	err = enetc_streamcounter_hw_get(priv, filter->sfi_index, &counters);
+	if (err)
+		return -EINVAL;
+
+	spin_lock(&epsfp.psfp_lock);
+	stats.pkts = counters.matching_frames_count - filter->stats.pkts;
+	stats.lastused = filter->stats.lastused;
+	filter->stats.pkts += stats.pkts;
+	spin_unlock(&epsfp.psfp_lock);
+
+	flow_stats_update(&f->stats, 0x0, stats.pkts, stats.lastused,
+			  FLOW_ACTION_HW_STATS_DELAYED);
+
+	return 0;
+}
+
+static int enetc_setup_tc_cls_flower(struct enetc_ndev_priv *priv,
+				     struct flow_cls_offload *cls_flower)
+{
+	switch (cls_flower->command) {
+	case FLOW_CLS_REPLACE:
+		return enetc_config_clsflower(priv, cls_flower);
+	case FLOW_CLS_DESTROY:
+		return enetc_destroy_clsflower(priv, cls_flower);
+	case FLOW_CLS_STATS:
+		return enetc_psfp_get_stats(priv, cls_flower);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static inline void clean_psfp_sfi_bitmap(void)
+{
+	bitmap_free(epsfp.psfp_sfi_bitmap);
+	epsfp.psfp_sfi_bitmap = NULL;
+}
+
+static void clean_stream_list(void)
+{
+	struct enetc_stream_filter *s;
+	struct hlist_node *tmp;
+
+	hlist_for_each_entry_safe(s, tmp, &epsfp.stream_list, node) {
+		hlist_del(&s->node);
+		kfree(s);
+	}
+}
+
+static void clean_sfi_list(void)
+{
+	struct enetc_psfp_filter *sfi;
+	struct hlist_node *tmp;
+
+	hlist_for_each_entry_safe(sfi, tmp, &epsfp.psfp_filter_list, node) {
+		hlist_del(&sfi->node);
+		kfree(sfi);
+	}
+}
+
+static void clean_sgi_list(void)
+{
+	struct enetc_psfp_gate *sgi;
+	struct hlist_node *tmp;
+
+	hlist_for_each_entry_safe(sgi, tmp, &epsfp.psfp_gate_list, node) {
+		hlist_del(&sgi->node);
+		kfree(sgi);
+	}
+}
+
+static void clean_psfp_all(void)
+{
+	/* Disable all list nodes and free all memory */
+	clean_sfi_list();
+	clean_sgi_list();
+	clean_stream_list();
+	epsfp.dev_bitmap = 0;
+	clean_psfp_sfi_bitmap();
+}
+
+int enetc_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+			    void *cb_priv)
+{
+	struct net_device *ndev = cb_priv;
+
+	if (!tc_can_offload(ndev))
+		return -EOPNOTSUPP;
+
+	switch (type) {
+	case TC_SETUP_CLSFLOWER:
+		return enetc_setup_tc_cls_flower(netdev_priv(ndev), type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+int enetc_psfp_init(struct enetc_ndev_priv *priv)
+{
+	if (epsfp.psfp_sfi_bitmap)
+		return 0;
+
+	epsfp.psfp_sfi_bitmap = bitmap_zalloc(priv->psfp_cap.max_psfp_filter,
+					      GFP_KERNEL);
+	if (!epsfp.psfp_sfi_bitmap)
+		return -ENOMEM;
+
+	spin_lock_init(&epsfp.psfp_lock);
+
+	if (list_empty(&enetc_block_cb_list))
+		epsfp.dev_bitmap = 0;
+
+	return 0;
+}
+
+int enetc_psfp_clean(struct enetc_ndev_priv *priv)
+{
+	if (!list_empty(&enetc_block_cb_list))
+		return -EBUSY;
+
+	clean_psfp_all();
+
+	return 0;
+}
+
+int enetc_setup_tc_psfp(struct net_device *ndev, void *type_data)
+{
+	struct enetc_ndev_priv *priv = netdev_priv(ndev);
+	struct flow_block_offload *f = type_data;
+	int err;
+
+	err = flow_block_cb_setup_simple(f, &enetc_block_cb_list,
+					 enetc_setup_tc_block_cb,
+					 ndev, ndev, true);
+	if (err)
+		return err;
+
+	switch (f->command) {
+	case FLOW_BLOCK_BIND:
+		set_bit(enetc_get_port(priv), &epsfp.dev_bitmap);
+		break;
+	case FLOW_BLOCK_UNBIND:
+		clear_bit(enetc_get_port(priv), &epsfp.dev_bitmap);
+		if (!epsfp.dev_bitmap)
+			clean_psfp_all();
+		break;
+	}
+
+	return 0;
+}
-- 
cgit v1.2.3-59-g8ed1b


From 7562a13d5a8ce9bc5020705da5f50221021f5a2c Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Fri, 1 May 2020 12:48:01 +0200
Subject: net/smc: multiple link support for rmb buffer registration

The CONFIRM_RKEY LLC processing handles all links in one LLC message.
Move the call to this processing out of smcr_link_reg_rmb() which does
processing per link, into smcr_lgr_reg_rmbs() which is responsible for
link group level processing. Move smcr_link_reg_rmb() into module
smc_core.c.
>From af_smc.c now call smcr_lgr_reg_rmbs() to register new rmbs on all
available links.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/af_smc.c   | 54 +++++++++++++++++++-----------------------------------
 net/smc/smc_core.c | 16 ++++++++++++++++
 net/smc/smc_core.h |  1 +
 3 files changed, 36 insertions(+), 35 deletions(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index bd9662d06896..20d6d3fbb86c 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -337,46 +337,30 @@ static void smc_copy_sock_settings_to_smc(struct smc_sock *smc)
 	smc_copy_sock_settings(&smc->sk, smc->clcsock->sk, SK_FLAGS_CLC_TO_SMC);
 }
 
-/* register a new rmb, send confirm_rkey msg to register with peer */
-static int smcr_link_reg_rmb(struct smc_link *link,
-			     struct smc_buf_desc *rmb_desc, bool conf_rkey)
-{
-	if (!rmb_desc->is_reg_mr[link->link_idx]) {
-		/* register memory region for new rmb */
-		if (smc_wr_reg_send(link, rmb_desc->mr_rx[link->link_idx])) {
-			rmb_desc->is_reg_err = true;
-			return -EFAULT;
-		}
-		rmb_desc->is_reg_mr[link->link_idx] = true;
-	}
-	if (!conf_rkey)
-		return 0;
-
-	/* exchange confirm_rkey msg with peer */
-	if (!rmb_desc->is_conf_rkey) {
-		if (smc_llc_do_confirm_rkey(link, rmb_desc)) {
-			rmb_desc->is_reg_err = true;
-			return -EFAULT;
-		}
-		rmb_desc->is_conf_rkey = true;
-	}
-	return 0;
-}
-
 /* register the new rmb on all links */
-static int smcr_lgr_reg_rmbs(struct smc_link_group *lgr,
+static int smcr_lgr_reg_rmbs(struct smc_link *link,
 			     struct smc_buf_desc *rmb_desc)
 {
-	int i, rc;
+	struct smc_link_group *lgr = link->lgr;
+	int i, rc = 0;
 
 	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
 		if (lgr->lnk[i].state != SMC_LNK_ACTIVE)
 			continue;
-		rc = smcr_link_reg_rmb(&lgr->lnk[i], rmb_desc, true);
+		rc = smcr_link_reg_rmb(&lgr->lnk[i], rmb_desc);
 		if (rc)
-			return rc;
+			goto out;
 	}
-	return 0;
+
+	/* exchange confirm_rkey msg with peer */
+	rc = smc_llc_do_confirm_rkey(link, rmb_desc);
+	if (rc) {
+		rc = -EFAULT;
+		goto out;
+	}
+	rmb_desc->is_conf_rkey = true;
+out:
+	return rc;
 }
 
 static int smcr_clnt_conf_first_link(struct smc_sock *smc)
@@ -408,7 +392,7 @@ static int smcr_clnt_conf_first_link(struct smc_sock *smc)
 
 	smc_wr_remember_qp_attr(link);
 
-	if (smcr_link_reg_rmb(link, smc->conn.rmb_desc, false))
+	if (smcr_link_reg_rmb(link, smc->conn.rmb_desc))
 		return SMC_CLC_DECL_ERR_REGRMB;
 
 	/* confirm_rkey is implicit on 1st contact */
@@ -670,7 +654,7 @@ static int smc_connect_rdma(struct smc_sock *smc,
 			return smc_connect_abort(smc, SMC_CLC_DECL_ERR_RDYLNK,
 						 ini->cln_first_contact);
 	} else {
-		if (smcr_lgr_reg_rmbs(smc->conn.lgr, smc->conn.rmb_desc))
+		if (smcr_lgr_reg_rmbs(link, smc->conn.rmb_desc))
 			return smc_connect_abort(smc, SMC_CLC_DECL_ERR_REGRMB,
 						 ini->cln_first_contact);
 	}
@@ -1045,7 +1029,7 @@ static int smcr_serv_conf_first_link(struct smc_sock *smc)
 
 	link->lgr->type = SMC_LGR_SINGLE;
 
-	if (smcr_link_reg_rmb(link, smc->conn.rmb_desc, false))
+	if (smcr_link_reg_rmb(link, smc->conn.rmb_desc))
 		return SMC_CLC_DECL_ERR_REGRMB;
 
 	/* send CONFIRM LINK request to client over the RoCE fabric */
@@ -1220,7 +1204,7 @@ static int smc_listen_rdma_reg(struct smc_sock *new_smc, int local_contact)
 	struct smc_connection *conn = &new_smc->conn;
 
 	if (local_contact != SMC_FIRST_CONTACT) {
-		if (smcr_lgr_reg_rmbs(conn->lgr, conn->rmb_desc))
+		if (smcr_lgr_reg_rmbs(conn->lnk, conn->rmb_desc))
 			return SMC_CLC_DECL_ERR_REGRMB;
 	}
 	smc_rmb_sync_sg_for_device(&new_smc->conn);
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 3539ceef9a97..de6bc36fe9a7 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -1127,6 +1127,22 @@ free_table:
 	return rc;
 }
 
+/* register a new rmb on IB device */
+int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc)
+{
+	if (list_empty(&link->lgr->list))
+		return -ENOLINK;
+	if (!rmb_desc->is_reg_mr[link->link_idx]) {
+		/* register memory region for new rmb */
+		if (smc_wr_reg_send(link, rmb_desc->mr_rx[link->link_idx])) {
+			rmb_desc->is_reg_err = true;
+			return -EFAULT;
+		}
+		rmb_desc->is_reg_mr[link->link_idx] = true;
+	}
+	return 0;
+}
+
 static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
 						bool is_rmb, int bufsize)
 {
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index f12474cc666c..fd512188d2c6 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -367,6 +367,7 @@ void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr);
 int smc_core_init(void);
 void smc_core_exit(void);
 
+int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc);
 static inline struct smc_link_group *smc_get_lgr(struct smc_link *link)
 {
 	return link->lgr;
-- 
cgit v1.2.3-59-g8ed1b


From 4a3641c160873fe6b6bcff00a6ea15e7430d8d42 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Fri, 1 May 2020 12:48:02 +0200
Subject: net/smc: unmapping of buffers to support multiple links

With the support of multiple links that are created and cleared there
is a need to unmap one link from all current buffers. Add unmapping by
link and by rmb. And make smcr_link_clear() available to be called from
the LLC layer.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_core.c | 76 ++++++++++++++++++++++++++++++++++++++++++------------
 net/smc/smc_core.h |  1 +
 2 files changed, 60 insertions(+), 17 deletions(-)

diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index de6bc36fe9a7..d5ecea490b4e 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -498,14 +498,69 @@ void smc_conn_free(struct smc_connection *conn)
 		smc_lgr_schedule_free_work(lgr);
 }
 
-static void smcr_link_clear(struct smc_link *lnk)
+/* unregister a link from a buf_desc */
+static void smcr_buf_unmap_link(struct smc_buf_desc *buf_desc, bool is_rmb,
+				struct smc_link *lnk)
+{
+	if (is_rmb)
+		buf_desc->is_reg_mr[lnk->link_idx] = false;
+	if (!buf_desc->is_map_ib[lnk->link_idx])
+		return;
+	if (is_rmb) {
+		if (buf_desc->mr_rx[lnk->link_idx]) {
+			smc_ib_put_memory_region(
+					buf_desc->mr_rx[lnk->link_idx]);
+			buf_desc->mr_rx[lnk->link_idx] = NULL;
+		}
+		smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_FROM_DEVICE);
+	} else {
+		smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_TO_DEVICE);
+	}
+	sg_free_table(&buf_desc->sgt[lnk->link_idx]);
+	buf_desc->is_map_ib[lnk->link_idx] = false;
+}
+
+/* unmap all buffers of lgr for a deleted link */
+static void smcr_buf_unmap_lgr(struct smc_link *lnk)
+{
+	struct smc_link_group *lgr = lnk->lgr;
+	struct smc_buf_desc *buf_desc, *bf;
+	int i;
+
+	for (i = 0; i < SMC_RMBE_SIZES; i++) {
+		mutex_lock(&lgr->rmbs_lock);
+		list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list)
+			smcr_buf_unmap_link(buf_desc, true, lnk);
+		mutex_unlock(&lgr->rmbs_lock);
+		mutex_lock(&lgr->sndbufs_lock);
+		list_for_each_entry_safe(buf_desc, bf, &lgr->sndbufs[i],
+					 list)
+			smcr_buf_unmap_link(buf_desc, false, lnk);
+		mutex_unlock(&lgr->sndbufs_lock);
+	}
+}
+
+static void smcr_rtoken_clear_link(struct smc_link *lnk)
+{
+	struct smc_link_group *lgr = lnk->lgr;
+	int i;
+
+	for (i = 0; i < SMC_RMBS_PER_LGR_MAX; i++) {
+		lgr->rtokens[i][lnk->link_idx].rkey = 0;
+		lgr->rtokens[i][lnk->link_idx].dma_addr = 0;
+	}
+}
+
+void smcr_link_clear(struct smc_link *lnk)
 {
 	struct smc_ib_device *smcibdev;
 
-	if (lnk->peer_qpn == 0)
+	if (!lnk->lgr || lnk->state == SMC_LNK_UNUSED)
 		return;
 	lnk->peer_qpn = 0;
 	smc_llc_link_clear(lnk);
+	smcr_buf_unmap_lgr(lnk);
+	smcr_rtoken_clear_link(lnk);
 	smc_ib_modify_qp_reset(lnk);
 	smc_wr_free_link(lnk);
 	smc_ib_destroy_queue_pair(lnk);
@@ -522,23 +577,10 @@ static void smcr_link_clear(struct smc_link *lnk)
 static void smcr_buf_free(struct smc_link_group *lgr, bool is_rmb,
 			  struct smc_buf_desc *buf_desc)
 {
-	struct smc_link *lnk;
 	int i;
 
-	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
-		lnk = &lgr->lnk[i];
-		if (!buf_desc->is_map_ib[lnk->link_idx])
-			continue;
-		if (is_rmb) {
-			if (buf_desc->mr_rx[lnk->link_idx])
-				smc_ib_put_memory_region(
-						buf_desc->mr_rx[lnk->link_idx]);
-			smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_FROM_DEVICE);
-		} else {
-			smc_ib_buf_unmap_sg(lnk, buf_desc, DMA_TO_DEVICE);
-		}
-		sg_free_table(&buf_desc->sgt[lnk->link_idx]);
-	}
+	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
+		smcr_buf_unmap_link(buf_desc, is_rmb, &lgr->lnk[i]);
 
 	if (buf_desc->pages)
 		__free_pages(buf_desc->pages, buf_desc->order);
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index fd512188d2c6..fa532a423fd7 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -367,6 +367,7 @@ void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr);
 int smc_core_init(void);
 void smc_core_exit(void);
 
+void smcr_link_clear(struct smc_link *lnk);
 int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc);
 static inline struct smc_link_group *smc_get_lgr(struct smc_link *link)
 {
-- 
cgit v1.2.3-59-g8ed1b


From fb33d27727254618aaf6bc2fedcb0fda1d5c0239 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Fri, 1 May 2020 12:48:03 +0200
Subject: net/smc: map and register buffers for a new link

Introduce support to map and register all current buffers for a new
link. smcr_buf_map_lgr() will map used buffers for a new link and
smcr_buf_reg_lgr() can be called to register used buffers on the
IB device of the new link.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_core.c | 60 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 net/smc/smc_core.h |  2 ++
 2 files changed, 62 insertions(+)

diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index d5ecea490b4e..0e87f652caea 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -1185,6 +1185,66 @@ int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc)
 	return 0;
 }
 
+static int _smcr_buf_map_lgr(struct smc_link *lnk, struct mutex *lock,
+			     struct list_head *lst, bool is_rmb)
+{
+	struct smc_buf_desc *buf_desc, *bf;
+	int rc = 0;
+
+	mutex_lock(lock);
+	list_for_each_entry_safe(buf_desc, bf, lst, list) {
+		if (!buf_desc->used)
+			continue;
+		rc = smcr_buf_map_link(buf_desc, is_rmb, lnk);
+		if (rc)
+			goto out;
+	}
+out:
+	mutex_unlock(lock);
+	return rc;
+}
+
+/* map all used buffers of lgr for a new link */
+int smcr_buf_map_lgr(struct smc_link *lnk)
+{
+	struct smc_link_group *lgr = lnk->lgr;
+	int i, rc = 0;
+
+	for (i = 0; i < SMC_RMBE_SIZES; i++) {
+		rc = _smcr_buf_map_lgr(lnk, &lgr->rmbs_lock,
+				       &lgr->rmbs[i], true);
+		if (rc)
+			return rc;
+		rc = _smcr_buf_map_lgr(lnk, &lgr->sndbufs_lock,
+				       &lgr->sndbufs[i], false);
+		if (rc)
+			return rc;
+	}
+	return 0;
+}
+
+/* register all used buffers of lgr for a new link */
+int smcr_buf_reg_lgr(struct smc_link *lnk)
+{
+	struct smc_link_group *lgr = lnk->lgr;
+	struct smc_buf_desc *buf_desc, *bf;
+	int i, rc = 0;
+
+	mutex_lock(&lgr->rmbs_lock);
+	for (i = 0; i < SMC_RMBE_SIZES; i++) {
+		list_for_each_entry_safe(buf_desc, bf, &lgr->rmbs[i], list) {
+			if (!buf_desc->used)
+				continue;
+			rc = smcr_link_reg_rmb(lnk, buf_desc);
+			if (rc)
+				goto out;
+		}
+	}
+out:
+	mutex_unlock(&lgr->rmbs_lock);
+	return rc;
+}
+
 static struct smc_buf_desc *smcr_new_buf_create(struct smc_link_group *lgr,
 						bool is_rmb, int bufsize)
 {
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index fa532a423fd7..61ddb5264936 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -368,6 +368,8 @@ int smc_core_init(void);
 void smc_core_exit(void);
 
 void smcr_link_clear(struct smc_link *lnk);
+int smcr_buf_map_lgr(struct smc_link *lnk);
+int smcr_buf_reg_lgr(struct smc_link *lnk);
 int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc);
 static inline struct smc_link_group *smc_get_lgr(struct smc_link *link)
 {
-- 
cgit v1.2.3-59-g8ed1b


From fbed3b37c89633eb602f4ec8e30186e601b793e5 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Fri, 1 May 2020 12:48:04 +0200
Subject: net/smc: extend smc_llc_send_add_link() and
 smc_llc_send_delete_link()

All LLC sends are done from worker context only, so remove the prep
functions which were used to build the message before it was sent, and
add the function content into the respective send function
smc_llc_send_add_link() and smc_llc_send_delete_link().
Extend smc_llc_send_add_link() to include the qp_mtu value in the LLC
message, which is needed to establish a link after the initial link was
created. Extend smc_llc_send_delete_link() to contain a link_id and a
reason code for the link deletion in the LLC message, which is needed
when a specific link should be deleted.
And add the list of existing DELETE_LINK reason codes.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_core.c |  3 +-
 net/smc/smc_llc.c  | 89 ++++++++++++++++++++++++++++--------------------------
 net/smc/smc_llc.h  | 16 ++++++++--
 3 files changed, 62 insertions(+), 46 deletions(-)

diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 0e87f652caea..c905675017c7 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -199,7 +199,8 @@ void smc_lgr_cleanup_early(struct smc_connection *conn)
 static int smcr_link_send_delete(struct smc_link *lnk, bool orderly)
 {
 	if (lnk->state == SMC_LNK_ACTIVE &&
-	    !smc_llc_send_delete_link(lnk, SMC_LLC_REQ, orderly)) {
+	    !smc_llc_send_delete_link(lnk, 0, SMC_LLC_REQ, orderly,
+				      SMC_LLC_DEL_PROG_INIT_TERM)) {
 		return 0;
 	}
 	return -ENOTCONN;
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 327cf30b98cc..171835926db6 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -58,7 +58,13 @@ struct smc_llc_msg_add_link {		/* type 0x02 */
 	u8 sender_gid[SMC_GID_SIZE];
 	u8 sender_qp_num[3];
 	u8 link_num;
-	u8 flags2;	/* QP mtu */
+#if defined(__BIG_ENDIAN_BITFIELD)
+	u8 reserved3 : 4,
+	   qp_mtu   : 4;
+#elif defined(__LITTLE_ENDIAN_BITFIELD)
+	u8 qp_mtu   : 4,
+	   reserved3 : 4;
+#endif
 	u8 initial_psn[3];
 	u8 reserved[8];
 };
@@ -427,26 +433,9 @@ static int smc_llc_send_delete_rkey(struct smc_link *link,
 	return rc;
 }
 
-/* prepare an add link message */
-static void smc_llc_prep_add_link(struct smc_llc_msg_add_link *addllc,
-				  struct smc_link *link, u8 mac[], u8 gid[],
-				  enum smc_llc_reqresp reqresp)
-{
-	memset(addllc, 0, sizeof(*addllc));
-	addllc->hd.common.type = SMC_LLC_ADD_LINK;
-	addllc->hd.length = sizeof(struct smc_llc_msg_add_link);
-	if (reqresp == SMC_LLC_RESP) {
-		addllc->hd.flags |= SMC_LLC_FLAG_RESP;
-		/* always reject more links for now */
-		addllc->hd.flags |= SMC_LLC_FLAG_ADD_LNK_REJ;
-		addllc->hd.add_link_rej_rsn = SMC_LLC_REJ_RSN_NO_ALT_PATH;
-	}
-	memcpy(addllc->sender_mac, mac, ETH_ALEN);
-	memcpy(addllc->sender_gid, gid, SMC_GID_SIZE);
-}
-
 /* send ADD LINK request or response */
 int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[],
+			  struct smc_link *link_new,
 			  enum smc_llc_reqresp reqresp)
 {
 	struct smc_llc_msg_add_link *addllc;
@@ -458,32 +447,33 @@ int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[],
 	if (rc)
 		return rc;
 	addllc = (struct smc_llc_msg_add_link *)wr_buf;
-	smc_llc_prep_add_link(addllc, link, mac, gid, reqresp);
+
+	memset(addllc, 0, sizeof(*addllc));
+	addllc->hd.common.type = SMC_LLC_ADD_LINK;
+	addllc->hd.length = sizeof(struct smc_llc_msg_add_link);
+	if (reqresp == SMC_LLC_RESP)
+		addllc->hd.flags |= SMC_LLC_FLAG_RESP;
+	memcpy(addllc->sender_mac, mac, ETH_ALEN);
+	memcpy(addllc->sender_gid, gid, SMC_GID_SIZE);
+	if (link_new) {
+		addllc->link_num = link_new->link_id;
+		hton24(addllc->sender_qp_num, link_new->roce_qp->qp_num);
+		hton24(addllc->initial_psn, link_new->psn_initial);
+		if (reqresp == SMC_LLC_REQ)
+			addllc->qp_mtu = link_new->path_mtu;
+		else
+			addllc->qp_mtu = min(link_new->path_mtu,
+					     link_new->peer_mtu);
+	}
 	/* send llc message */
 	rc = smc_wr_tx_send(link, pend);
 	return rc;
 }
 
-/* prepare a delete link message */
-static void smc_llc_prep_delete_link(struct smc_llc_msg_del_link *delllc,
-				     struct smc_link *link,
-				     enum smc_llc_reqresp reqresp, bool orderly)
-{
-	memset(delllc, 0, sizeof(*delllc));
-	delllc->hd.common.type = SMC_LLC_DELETE_LINK;
-	delllc->hd.length = sizeof(struct smc_llc_msg_add_link);
-	if (reqresp == SMC_LLC_RESP)
-		delllc->hd.flags |= SMC_LLC_FLAG_RESP;
-	/* DEL_LINK_ALL because only 1 link supported */
-	delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL;
-	if (orderly)
-		delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY;
-	delllc->link_num = link->link_id;
-}
-
 /* send DELETE LINK request or response */
-int smc_llc_send_delete_link(struct smc_link *link,
-			     enum smc_llc_reqresp reqresp, bool orderly)
+int smc_llc_send_delete_link(struct smc_link *link, u8 link_del_id,
+			     enum smc_llc_reqresp reqresp, bool orderly,
+			     u32 reason)
 {
 	struct smc_llc_msg_del_link *delllc;
 	struct smc_wr_tx_pend_priv *pend;
@@ -494,7 +484,19 @@ int smc_llc_send_delete_link(struct smc_link *link,
 	if (rc)
 		return rc;
 	delllc = (struct smc_llc_msg_del_link *)wr_buf;
-	smc_llc_prep_delete_link(delllc, link, reqresp, orderly);
+
+	memset(delllc, 0, sizeof(*delllc));
+	delllc->hd.common.type = SMC_LLC_DELETE_LINK;
+	delllc->hd.length = sizeof(struct smc_llc_msg_del_link);
+	if (reqresp == SMC_LLC_RESP)
+		delllc->hd.flags |= SMC_LLC_FLAG_RESP;
+	if (orderly)
+		delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY;
+	if (link_del_id)
+		delllc->link_num = link_del_id;
+	else
+		delllc->hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL;
+	delllc->reason = htonl(reason);
 	/* send llc message */
 	rc = smc_wr_tx_send(link, pend);
 	return rc;
@@ -547,12 +549,13 @@ static void smc_llc_rx_delete_link(struct smc_link *link,
 	smc_lgr_forget(lgr);
 	if (lgr->role == SMC_SERV) {
 		/* client asks to delete this link, send request */
-		smc_llc_prep_delete_link(llc, link, SMC_LLC_REQ, true);
+		smc_llc_send_delete_link(link, 0, SMC_LLC_REQ, true,
+					 SMC_LLC_DEL_PROG_INIT_TERM);
 	} else {
 		/* server requests to delete this link, send response */
-		smc_llc_prep_delete_link(llc, link, SMC_LLC_RESP, true);
+		smc_llc_send_delete_link(link, 0, SMC_LLC_RESP, true,
+					 SMC_LLC_DEL_PROG_INIT_TERM);
 	}
-	smc_llc_send_message(link, llc);
 	smc_lgr_terminate_sched(lgr);
 }
 
diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h
index 48029a5e14c3..d2c50d3e43a6 100644
--- a/net/smc/smc_llc.h
+++ b/net/smc/smc_llc.h
@@ -35,6 +35,16 @@ enum smc_llc_msg_type {
 	SMC_LLC_DELETE_RKEY		= 0x09,
 };
 
+/* LLC DELETE LINK Request Reason Codes */
+#define SMC_LLC_DEL_LOST_PATH		0x00010000
+#define SMC_LLC_DEL_OP_INIT_TERM	0x00020000
+#define SMC_LLC_DEL_PROG_INIT_TERM	0x00030000
+#define SMC_LLC_DEL_PROT_VIOL		0x00040000
+#define SMC_LLC_DEL_NO_ASYM_NEEDED	0x00050000
+/* LLC DELETE LINK Response Reason Codes */
+#define SMC_LLC_DEL_NOLNK	0x00100000  /* Unknown Link ID (no link) */
+#define SMC_LLC_DEL_NOLGR	0x00200000  /* Unknown Link Group */
+
 /* returns a usable link of the link group, or NULL */
 static inline struct smc_link *smc_llc_usable_link(struct smc_link_group *lgr)
 {
@@ -50,9 +60,11 @@ static inline struct smc_link *smc_llc_usable_link(struct smc_link_group *lgr)
 int smc_llc_send_confirm_link(struct smc_link *lnk,
 			      enum smc_llc_reqresp reqresp);
 int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[],
+			  struct smc_link *link_new,
 			  enum smc_llc_reqresp reqresp);
-int smc_llc_send_delete_link(struct smc_link *link,
-			     enum smc_llc_reqresp reqresp, bool orderly);
+int smc_llc_send_delete_link(struct smc_link *link, u8 link_del_id,
+			     enum smc_llc_reqresp reqresp, bool orderly,
+			     u32 reason);
 void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc);
 void smc_llc_lgr_clear(struct smc_link_group *lgr);
 int smc_llc_link_init(struct smc_link *link);
-- 
cgit v1.2.3-59-g8ed1b


From d550066776aae3bb31e0240cab24f62e33c47fd3 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Fri, 1 May 2020 12:48:05 +0200
Subject: net/smc: mutex to protect the lgr against parallel reconfigurations

Introduce llc_conf_mutex in the link group which is used to protect the
buffers and lgr states against parallel link reconfiguration.
This ensures that new connections do not start to register buffers with
the links of a link group when link creation or termination is running.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/af_smc.c   |  9 +++++++++
 net/smc/smc_core.c | 26 ++++++++++++++++++++++----
 net/smc/smc_core.h |  2 ++
 net/smc/smc_llc.c  |  9 +--------
 4 files changed, 34 insertions(+), 12 deletions(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 20d6d3fbb86c..6663a63be9e4 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -344,6 +344,13 @@ static int smcr_lgr_reg_rmbs(struct smc_link *link,
 	struct smc_link_group *lgr = link->lgr;
 	int i, rc = 0;
 
+	rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
+	if (rc)
+		return rc;
+	/* protect against parallel smc_llc_cli_rkey_exchange() and
+	 * parallel smcr_link_reg_rmb()
+	 */
+	mutex_lock(&lgr->llc_conf_mutex);
 	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
 		if (lgr->lnk[i].state != SMC_LNK_ACTIVE)
 			continue;
@@ -360,6 +367,8 @@ static int smcr_lgr_reg_rmbs(struct smc_link *link,
 	}
 	rmb_desc->is_conf_rkey = true;
 out:
+	mutex_unlock(&lgr->llc_conf_mutex);
+	smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
 	return rc;
 }
 
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index c905675017c7..4c3af05d76a5 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -448,11 +448,21 @@ out:
 static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc,
 			   struct smc_link_group *lgr)
 {
+	int rc;
+
 	if (rmb_desc->is_conf_rkey && !list_empty(&lgr->list)) {
 		/* unregister rmb with peer */
-		smc_llc_do_delete_rkey(lgr, rmb_desc);
-		rmb_desc->is_conf_rkey = false;
+		rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
+		if (!rc) {
+			/* protect against smc_llc_cli_rkey_exchange() */
+			mutex_lock(&lgr->llc_conf_mutex);
+			smc_llc_do_delete_rkey(lgr, rmb_desc);
+			rmb_desc->is_conf_rkey = false;
+			mutex_unlock(&lgr->llc_conf_mutex);
+			smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
+		}
 	}
+
 	if (rmb_desc->is_reg_err) {
 		/* buf registration failed, reuse not possible */
 		mutex_lock(&lgr->rmbs_lock);
@@ -552,6 +562,7 @@ static void smcr_rtoken_clear_link(struct smc_link *lnk)
 	}
 }
 
+/* must be called under lgr->llc_conf_mutex lock */
 void smcr_link_clear(struct smc_link *lnk)
 {
 	struct smc_ib_device *smcibdev;
@@ -1170,7 +1181,9 @@ free_table:
 	return rc;
 }
 
-/* register a new rmb on IB device */
+/* register a new rmb on IB device,
+ * must be called under lgr->llc_conf_mutex lock
+ */
 int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc)
 {
 	if (list_empty(&link->lgr->list))
@@ -1224,7 +1237,9 @@ int smcr_buf_map_lgr(struct smc_link *lnk)
 	return 0;
 }
 
-/* register all used buffers of lgr for a new link */
+/* register all used buffers of lgr for a new link,
+ * must be called under lgr->llc_conf_mutex lock
+ */
 int smcr_buf_reg_lgr(struct smc_link *lnk)
 {
 	struct smc_link_group *lgr = lnk->lgr;
@@ -1278,6 +1293,8 @@ static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
 {
 	int i, rc = 0;
 
+	/* protect against parallel link reconfiguration */
+	mutex_lock(&lgr->llc_conf_mutex);
 	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
 		struct smc_link *lnk = &lgr->lnk[i];
 
@@ -1289,6 +1306,7 @@ static int smcr_buf_map_usable_links(struct smc_link_group *lgr,
 		}
 	}
 out:
+	mutex_unlock(&lgr->llc_conf_mutex);
 	return rc;
 }
 
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 61ddb5264936..aa198dd0f0e4 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -248,6 +248,8 @@ struct smc_link_group {
 						/* queue for llc events */
 			spinlock_t		llc_event_q_lock;
 						/* protects llc_event_q */
+			struct mutex		llc_conf_mutex;
+						/* protects lgr reconfig. */
 			struct work_struct	llc_event_work;
 						/* llc event worker */
 			wait_queue_head_t	llc_waiter;
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 171835926db6..ceed3c89926f 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -848,6 +848,7 @@ void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc)
 	spin_lock_init(&lgr->llc_event_q_lock);
 	spin_lock_init(&lgr->llc_flow_lock);
 	init_waitqueue_head(&lgr->llc_waiter);
+	mutex_init(&lgr->llc_conf_mutex);
 	lgr->llc_testlink_time = net->ipv4.sysctl_tcp_keepalive_time;
 }
 
@@ -897,9 +898,6 @@ int smc_llc_do_confirm_rkey(struct smc_link *send_link,
 	struct smc_llc_qentry *qentry = NULL;
 	int rc = 0;
 
-	rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
-	if (rc)
-		return rc;
 	rc = smc_llc_send_confirm_rkey(send_link, rmb_desc);
 	if (rc)
 		goto out;
@@ -911,7 +909,6 @@ int smc_llc_do_confirm_rkey(struct smc_link *send_link,
 out:
 	if (qentry)
 		smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
-	smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
 	return rc;
 }
 
@@ -927,9 +924,6 @@ int smc_llc_do_delete_rkey(struct smc_link_group *lgr,
 	if (!send_link)
 		return -ENOLINK;
 
-	rc = smc_llc_flow_initiate(lgr, SMC_LLC_FLOW_RKEY);
-	if (rc)
-		return rc;
 	/* protected by llc_flow control */
 	rc = smc_llc_send_delete_rkey(send_link, rmb_desc);
 	if (rc)
@@ -942,7 +936,6 @@ int smc_llc_do_delete_rkey(struct smc_link_group *lgr,
 out:
 	if (qentry)
 		smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
-	smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
 	return rc;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 35dcf7ec02dcff16504bc52a368822254f889f00 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Fri, 1 May 2020 12:48:06 +0200
Subject: net/smc: remember PNETID of IB device for later device matching

The PNETID is needed to find an alternate link for a link group.
Save the PNETID of the link that is used to create the link group for
later device matching.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_core.c | 2 ++
 net/smc/smc_core.h | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 4c3af05d76a5..d7ab92fc5b15 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -413,6 +413,8 @@ static int smc_lgr_create(struct smc_sock *smc, struct smc_init_info *ini)
 		lgr->role = smc->listen_smc ? SMC_SERV : SMC_CLNT;
 		memcpy(lgr->peer_systemid, ini->ib_lcl->id_for_peer,
 		       SMC_SYSTEMID_LEN);
+		memcpy(lgr->pnet_id, ini->ib_dev->pnetid[ini->ib_port - 1],
+		       SMC_MAX_PNETID_LEN);
 		smc_llc_lgr_init(lgr, smc);
 
 		link_idx = SMC_SINGLE_LINK;
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index aa198dd0f0e4..413eaad50c7f 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -244,6 +244,8 @@ struct smc_link_group {
 			u8			next_link_id;
 			enum smc_lgr_type	type;
 						/* redundancy state */
+			u8			pnet_id[SMC_MAX_PNETID_LEN + 1];
+						/* pnet id of this lgr */
 			struct list_head	llc_event_q;
 						/* queue for llc events */
 			spinlock_t		llc_event_q_lock;
-- 
cgit v1.2.3-59-g8ed1b


From 1f90a05d9ff907c70456e7c9d7058372679a88c6 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Fri, 1 May 2020 12:48:07 +0200
Subject: net/smc: add smcr_port_add() and smcr_link_up() processing

Call smcr_port_add() when an IB event reports a new active IB device.
smcr_port_add() will start a work which either triggers the local
ADD_LINK processing, or send an ADD_LINK LLC message to the SMC server
to initiate the processing.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_core.c | 86 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 net/smc/smc_core.h |  1 +
 net/smc/smc_ib.c   |  1 +
 3 files changed, 88 insertions(+)

diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index d7ab92fc5b15..20bc9e46bf52 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -44,10 +44,19 @@ static struct smc_lgr_list smc_lgr_list = {	/* established link groups */
 static atomic_t lgr_cnt = ATOMIC_INIT(0); /* number of existing link groups */
 static DECLARE_WAIT_QUEUE_HEAD(lgrs_deleted);
 
+struct smc_ib_up_work {
+	struct work_struct	work;
+	struct smc_link_group	*lgr;
+	struct smc_ib_device	*smcibdev;
+	u8			ibport;
+};
+
 static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
 			 struct smc_buf_desc *buf_desc);
 static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft);
 
+static void smc_link_up_work(struct work_struct *work);
+
 /* return head of link group list and its lock for a given link group */
 static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr,
 						  spinlock_t **lgr_lock)
@@ -928,6 +937,83 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
 	}
 }
 
+/* link is up - establish alternate link if applicable */
+static void smcr_link_up(struct smc_link_group *lgr,
+			 struct smc_ib_device *smcibdev, u8 ibport)
+{
+	struct smc_link *link = NULL;
+
+	if (list_empty(&lgr->list) ||
+	    lgr->type == SMC_LGR_SYMMETRIC ||
+	    lgr->type == SMC_LGR_ASYMMETRIC_PEER)
+		return;
+
+	if (lgr->role == SMC_SERV) {
+		/* trigger local add link processing */
+		link = smc_llc_usable_link(lgr);
+		if (!link)
+			return;
+		/* tbd: call smc_llc_srv_add_link_local(link); */
+	} else {
+		/* invite server to start add link processing */
+		u8 gid[SMC_GID_SIZE];
+
+		if (smc_ib_determine_gid(smcibdev, ibport, lgr->vlan_id, gid,
+					 NULL))
+			return;
+		if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
+			/* some other llc task is ongoing */
+			wait_event_interruptible_timeout(lgr->llc_waiter,
+				(lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE),
+				SMC_LLC_WAIT_TIME);
+		}
+		if (list_empty(&lgr->list) ||
+		    !smc_ib_port_active(smcibdev, ibport))
+			return; /* lgr or device no longer active */
+		link = smc_llc_usable_link(lgr);
+		if (!link)
+			return;
+		smc_llc_send_add_link(link, smcibdev->mac[ibport - 1], gid,
+				      NULL, SMC_LLC_REQ);
+	}
+}
+
+void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport)
+{
+	struct smc_ib_up_work *ib_work;
+	struct smc_link_group *lgr, *n;
+
+	list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
+		if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
+			    SMC_MAX_PNETID_LEN) ||
+		    lgr->type == SMC_LGR_SYMMETRIC ||
+		    lgr->type == SMC_LGR_ASYMMETRIC_PEER)
+			continue;
+		ib_work = kmalloc(sizeof(*ib_work), GFP_KERNEL);
+		if (!ib_work)
+			continue;
+		INIT_WORK(&ib_work->work, smc_link_up_work);
+		ib_work->lgr = lgr;
+		ib_work->smcibdev = smcibdev;
+		ib_work->ibport = ibport;
+		schedule_work(&ib_work->work);
+	}
+}
+
+static void smc_link_up_work(struct work_struct *work)
+{
+	struct smc_ib_up_work *ib_work = container_of(work,
+						      struct smc_ib_up_work,
+						      work);
+	struct smc_link_group *lgr = ib_work->lgr;
+
+	if (list_empty(&lgr->list))
+		goto out;
+	smcr_link_up(lgr, ib_work->smcibdev, ib_work->ibport);
+out:
+	kfree(ib_work);
+}
+
 /* Determine vlan of internal TCP socket.
  * @vlan_id: address to store the determined vlan id into
  */
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 413eaad50c7f..86453ad83491 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -345,6 +345,7 @@ void smc_lgr_forget(struct smc_link_group *lgr);
 void smc_lgr_cleanup_early(struct smc_connection *conn);
 void smc_lgr_terminate_sched(struct smc_link_group *lgr);
 void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport);
+void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport);
 void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid,
 			unsigned short vlan);
 void smc_smcd_terminate_all(struct smcd_dev *dev);
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index c090678a3e5a..545fb0bc3714 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -252,6 +252,7 @@ static void smc_ib_port_event_work(struct work_struct *work)
 			smc_port_terminate(smcibdev, port_idx + 1);
 		} else {
 			clear_bit(port_idx, smcibdev->ports_going_away);
+			smcr_port_add(smcibdev, port_idx + 1);
 		}
 	}
 }
-- 
cgit v1.2.3-59-g8ed1b


From 541afa10c126b6c22c2a805a559c70cc41fd156e Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Fri, 1 May 2020 12:48:08 +0200
Subject: net/smc: add smcr_port_err() and smcr_link_down() processing

Call smcr_port_err() when an IB event reports an inactive IB device.
smcr_port_err() calls smcr_link_down() for all affected links.
smcr_link_down() either triggers the local DELETE_LINK processing, or
sends an DELETE_LINK LLC message to the SMC server to initiate the
processing.
The old handler function smc_port_terminate() is removed.
Add helper smcr_link_down_cond() to take a link down conditionally, and
smcr_link_down_cond_sched() to schedule the link_down processing to a
work.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_core.c | 119 +++++++++++++++++++++++++++++++++++++++--------------
 net/smc/smc_core.h |   6 ++-
 net/smc/smc_ib.c   |   2 +-
 net/smc/smc_llc.h  |   3 ++
 4 files changed, 98 insertions(+), 32 deletions(-)

diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 20bc9e46bf52..62108e0cd529 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -56,6 +56,7 @@ static void smc_buf_free(struct smc_link_group *lgr, bool is_rmb,
 static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft);
 
 static void smc_link_up_work(struct work_struct *work);
+static void smc_link_down_work(struct work_struct *work);
 
 /* return head of link group list and its lock for a given link group */
 static inline struct list_head *smc_lgr_list_head(struct smc_link_group *lgr,
@@ -320,6 +321,7 @@ static int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
 	lnk->smcibdev = ini->ib_dev;
 	lnk->ibport = ini->ib_port;
 	lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
+	INIT_WORK(&lnk->link_down_wrk, smc_link_down_work);
 	if (!ini->ib_dev->initialized) {
 		rc = (int)smc_ib_setup_per_ibdev(ini->ib_dev);
 		if (rc)
@@ -818,36 +820,6 @@ void smc_lgr_terminate_sched(struct smc_link_group *lgr)
 	schedule_work(&lgr->terminate_work);
 }
 
-/* Called when IB port is terminated */
-void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport)
-{
-	struct smc_link_group *lgr, *l;
-	LIST_HEAD(lgr_free_list);
-	int i;
-
-	spin_lock_bh(&smc_lgr_list.lock);
-	list_for_each_entry_safe(lgr, l, &smc_lgr_list.list, list) {
-		if (lgr->is_smcd)
-			continue;
-		/* tbd - terminate only when no more links are active */
-		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
-			if (!smc_link_usable(&lgr->lnk[i]))
-				continue;
-			if (lgr->lnk[i].smcibdev == smcibdev &&
-			    lgr->lnk[i].ibport == ibport) {
-				list_move(&lgr->list, &lgr_free_list);
-				lgr->freeing = 1;
-			}
-		}
-	}
-	spin_unlock_bh(&smc_lgr_list.lock);
-
-	list_for_each_entry_safe(lgr, l, &lgr_free_list, list) {
-		list_del_init(&lgr->list);
-		__smc_lgr_terminate(lgr, false);
-	}
-}
-
 /* Called when peer lgr shutdown (regularly or abnormally) is received */
 void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
 {
@@ -1000,6 +972,79 @@ void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport)
 	}
 }
 
+/* link is down - switch connections to alternate link,
+ * must be called under lgr->llc_conf_mutex lock
+ */
+static void smcr_link_down(struct smc_link *lnk)
+{
+	struct smc_link_group *lgr = lnk->lgr;
+	struct smc_link *to_lnk;
+	int del_link_id;
+
+	if (!lgr || lnk->state == SMC_LNK_UNUSED || list_empty(&lgr->list))
+		return;
+
+	smc_ib_modify_qp_reset(lnk);
+	to_lnk = NULL;
+	/* tbd: call to_lnk = smc_switch_conns(lgr, lnk, true); */
+	if (!to_lnk) { /* no backup link available */
+		smcr_link_clear(lnk);
+		return;
+	}
+	lgr->type = SMC_LGR_SINGLE;
+	del_link_id = lnk->link_id;
+
+	if (lgr->role == SMC_SERV) {
+		/* trigger local delete link processing */
+	} else {
+		if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
+			/* another llc task is ongoing */
+			mutex_unlock(&lgr->llc_conf_mutex);
+			wait_event_interruptible_timeout(lgr->llc_waiter,
+				(lgr->llc_flow_lcl.type == SMC_LLC_FLOW_NONE),
+				SMC_LLC_WAIT_TIME);
+			mutex_lock(&lgr->llc_conf_mutex);
+		}
+		smc_llc_send_delete_link(to_lnk, del_link_id, SMC_LLC_REQ, true,
+					 SMC_LLC_DEL_LOST_PATH);
+	}
+}
+
+/* must be called under lgr->llc_conf_mutex lock */
+void smcr_link_down_cond(struct smc_link *lnk)
+{
+	if (smc_link_downing(&lnk->state))
+		smcr_link_down(lnk);
+}
+
+/* will get the lgr->llc_conf_mutex lock */
+void smcr_link_down_cond_sched(struct smc_link *lnk)
+{
+	if (smc_link_downing(&lnk->state))
+		schedule_work(&lnk->link_down_wrk);
+}
+
+void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport)
+{
+	struct smc_link_group *lgr, *n;
+	int i;
+
+	list_for_each_entry_safe(lgr, n, &smc_lgr_list.list, list) {
+		if (strncmp(smcibdev->pnetid[ibport - 1], lgr->pnet_id,
+			    SMC_MAX_PNETID_LEN))
+			continue; /* lgr is not affected */
+		if (list_empty(&lgr->list))
+			continue;
+		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+			struct smc_link *lnk = &lgr->lnk[i];
+
+			if (smc_link_usable(lnk) &&
+			    lnk->smcibdev == smcibdev && lnk->ibport == ibport)
+				smcr_link_down_cond_sched(lnk);
+		}
+	}
+}
+
 static void smc_link_up_work(struct work_struct *work)
 {
 	struct smc_ib_up_work *ib_work = container_of(work,
@@ -1014,6 +1059,20 @@ out:
 	kfree(ib_work);
 }
 
+static void smc_link_down_work(struct work_struct *work)
+{
+	struct smc_link *link = container_of(work, struct smc_link,
+					     link_down_wrk);
+	struct smc_link_group *lgr = link->lgr;
+
+	if (list_empty(&lgr->list))
+		return;
+	wake_up_interruptible_all(&lgr->llc_waiter);
+	mutex_lock(&lgr->llc_conf_mutex);
+	smcr_link_down(link);
+	mutex_unlock(&lgr->llc_conf_mutex);
+}
+
 /* Determine vlan of internal TCP socket.
  * @vlan_id: address to store the determined vlan id into
  */
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 86453ad83491..da3cddbd1651 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -117,6 +117,7 @@ struct smc_link {
 	u8			link_id;	/* unique # within link group */
 	u8			link_idx;	/* index in lgr link array */
 	struct smc_link_group	*lgr;		/* parent link group */
+	struct work_struct	link_down_wrk;	/* wrk to bring link down */
 
 	enum smc_link_state	state;		/* state of link */
 	struct delayed_work	llc_testlink_wrk; /* testlink worker */
@@ -344,8 +345,8 @@ struct smc_clc_msg_local;
 void smc_lgr_forget(struct smc_link_group *lgr);
 void smc_lgr_cleanup_early(struct smc_connection *conn);
 void smc_lgr_terminate_sched(struct smc_link_group *lgr);
-void smc_port_terminate(struct smc_ib_device *smcibdev, u8 ibport);
 void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport);
+void smcr_port_err(struct smc_ib_device *smcibdev, u8 ibport);
 void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid,
 			unsigned short vlan);
 void smc_smcd_terminate_all(struct smcd_dev *dev);
@@ -376,6 +377,9 @@ void smcr_link_clear(struct smc_link *lnk);
 int smcr_buf_map_lgr(struct smc_link *lnk);
 int smcr_buf_reg_lgr(struct smc_link *lnk);
 int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc);
+void smcr_link_down_cond(struct smc_link *lnk);
+void smcr_link_down_cond_sched(struct smc_link *lnk);
+
 static inline struct smc_link_group *smc_get_lgr(struct smc_link *link)
 {
 	return link->lgr;
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 545fb0bc3714..2c743caad69a 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -249,7 +249,7 @@ static void smc_ib_port_event_work(struct work_struct *work)
 		clear_bit(port_idx, &smcibdev->port_event_mask);
 		if (!smc_ib_port_active(smcibdev, port_idx + 1)) {
 			set_bit(port_idx, smcibdev->ports_going_away);
-			smc_port_terminate(smcibdev, port_idx + 1);
+			smcr_port_err(smcibdev, port_idx + 1);
 		} else {
 			clear_bit(port_idx, smcibdev->ports_going_away);
 			smcr_port_add(smcibdev, port_idx + 1);
diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h
index d2c50d3e43a6..4ed4486e5082 100644
--- a/net/smc/smc_llc.h
+++ b/net/smc/smc_llc.h
@@ -35,6 +35,9 @@ enum smc_llc_msg_type {
 	SMC_LLC_DELETE_RKEY		= 0x09,
 };
 
+#define smc_link_downing(state) \
+	(cmpxchg(state, SMC_LNK_ACTIVE, SMC_LNK_INACTIVE) == SMC_LNK_ACTIVE)
+
 /* LLC DELETE LINK Request Reason Codes */
 #define SMC_LLC_DEL_LOST_PATH		0x00010000
 #define SMC_LLC_DEL_OP_INIT_TERM	0x00020000
-- 
cgit v1.2.3-59-g8ed1b


From 87523930a16eb57ebb20318e92b5df4b64fe8b20 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Fri, 1 May 2020 12:48:09 +0200
Subject: net/smc: take link down instead of terminating the link group

Use the introduced link down processing in all places where the link
group is terminated and take down the affected link only.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_core.c |  7 ++-----
 net/smc/smc_llc.c  |  4 ++--
 net/smc/smc_tx.c   |  2 +-
 net/smc/smc_wr.c   | 19 ++++++++-----------
 4 files changed, 13 insertions(+), 19 deletions(-)

diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 62108e0cd529..849ae3f9b796 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -884,11 +884,8 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
 	} else {
 		list_for_each_entry_safe(lgr, lg, &smc_lgr_list.list, list) {
 			for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
-				if (lgr->lnk[i].smcibdev == smcibdev) {
-					list_move(&lgr->list, &lgr_free_list);
-					lgr->freeing = 1;
-					break;
-				}
+				if (lgr->lnk[i].smcibdev == smcibdev)
+					smcr_link_down_cond_sched(&lgr->lnk[i]);
 			}
 		}
 	}
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index ceed3c89926f..e478a4c11877 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -556,7 +556,7 @@ static void smc_llc_rx_delete_link(struct smc_link *link,
 		smc_llc_send_delete_link(link, 0, SMC_LLC_RESP, true,
 					 SMC_LLC_DEL_PROG_INIT_TERM);
 	}
-	smc_lgr_terminate_sched(lgr);
+	smcr_link_down_cond(link);
 }
 
 /* process a confirm_rkey request from peer, remote flow */
@@ -831,7 +831,7 @@ static void smc_llc_testlink_work(struct work_struct *work)
 	if (link->state != SMC_LNK_ACTIVE)
 		return;		/* link state changed */
 	if (rc <= 0) {
-		smc_lgr_terminate_sched(smc_get_lgr(link));
+		smcr_link_down_cond_sched(link);
 		return;
 	}
 	next_interval = link->llc_testlink_time;
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index d74bfe6a90f1..417204572a69 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -283,7 +283,7 @@ static int smc_tx_rdma_write(struct smc_connection *conn, int peer_rmbe_offset,
 	rdma_wr->rkey = lgr->rtokens[conn->rtoken_idx][link->link_idx].rkey;
 	rc = ib_post_send(link->roce_qp, &rdma_wr->wr, NULL);
 	if (rc)
-		smc_lgr_terminate_sched(lgr);
+		smcr_link_down_cond_sched(link);
 	return rc;
 }
 
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index 93223628c002..031e6c9561b1 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -120,8 +120,8 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)
 			       sizeof(link->wr_tx_bufs[i]));
 			clear_bit(i, link->wr_tx_mask);
 		}
-		/* terminate connections of this link group abnormally */
-		smc_lgr_terminate_sched(smc_get_lgr(link));
+		/* terminate link */
+		smcr_link_down_cond_sched(link);
 	}
 	if (pnd_snd.handler)
 		pnd_snd.handler(&pnd_snd.priv, link, wc->status);
@@ -212,8 +212,8 @@ int smc_wr_tx_get_free_slot(struct smc_link *link,
 			(smc_wr_tx_get_free_slot_index(link, &idx) != -EBUSY),
 			SMC_WR_TX_WAIT_FREE_SLOT_TIME);
 		if (!rc) {
-			/* timeout - terminate connections */
-			smc_lgr_terminate_sched(lgr);
+			/* timeout - terminate link */
+			smcr_link_down_cond_sched(link);
 			return -EPIPE;
 		}
 		if (idx == link->wr_tx_cnt)
@@ -270,7 +270,7 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv)
 	rc = ib_post_send(link->roce_qp, &link->wr_tx_ibs[pend->idx], NULL);
 	if (rc) {
 		smc_wr_tx_put_slot(link, priv);
-		smc_lgr_terminate_sched(smc_get_lgr(link));
+		smcr_link_down_cond_sched(link);
 	}
 	return rc;
 }
@@ -294,8 +294,8 @@ int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)
 					      (link->wr_reg_state != POSTED),
 					      SMC_WR_REG_MR_WAIT_TIME);
 	if (!rc) {
-		/* timeout - terminate connections */
-		smc_lgr_terminate_sched(smc_get_lgr(link));
+		/* timeout - terminate link */
+		smcr_link_down_cond_sched(link);
 		return -EPIPE;
 	}
 	if (rc == -ERESTARTSYS)
@@ -393,10 +393,7 @@ static inline void smc_wr_rx_process_cqes(struct ib_wc wc[], int num)
 			case IB_WC_RETRY_EXC_ERR:
 			case IB_WC_RNR_RETRY_EXC_ERR:
 			case IB_WC_WR_FLUSH_ERR:
-				/* terminate connections of this link group
-				 * abnormally
-				 */
-				smc_lgr_terminate_sched(smc_get_lgr(link));
+				smcr_link_down_cond_sched(link);
 				break;
 			default:
 				smc_wr_rx_post(link); /* refill WR RX */
-- 
cgit v1.2.3-59-g8ed1b


From 33d203302d1cc744a13349d2576c985feb469220 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Fri, 1 May 2020 12:48:10 +0200
Subject: net/smc: remove DELETE LINK processing from smc_core.c

Support for multiple links makes the former DELETE LINK processing
obsolete which sent one DELETE_LINK LLC message for each single link.
Remove this processing from smc_core.c.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_core.c | 33 ---------------------------------
 1 file changed, 33 deletions(-)

diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 849ae3f9b796..60c708f6de51 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -202,20 +202,6 @@ void smc_lgr_cleanup_early(struct smc_connection *conn)
 	smc_lgr_schedule_free_work_fast(lgr);
 }
 
-/* Send delete link, either as client to request the initiation
- * of the DELETE LINK sequence from server; or as server to
- * initiate the delete processing. See smc_llc_rx_delete_link().
- */
-static int smcr_link_send_delete(struct smc_link *lnk, bool orderly)
-{
-	if (lnk->state == SMC_LNK_ACTIVE &&
-	    !smc_llc_send_delete_link(lnk, 0, SMC_LLC_REQ, orderly,
-				      SMC_LLC_DEL_PROG_INIT_TERM)) {
-		return 0;
-	}
-	return -ENOTCONN;
-}
-
 static void smc_lgr_free(struct smc_link_group *lgr);
 
 static void smc_lgr_free_work(struct work_struct *work)
@@ -241,25 +227,6 @@ static void smc_lgr_free_work(struct work_struct *work)
 		return;
 	}
 	list_del_init(&lgr->list); /* remove from smc_lgr_list */
-
-	if (!lgr->is_smcd && !lgr->terminating)	{
-		bool do_wait = false;
-
-		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
-			struct smc_link *lnk = &lgr->lnk[i];
-			/* try to send del link msg, on err free immediately */
-			if (lnk->state == SMC_LNK_ACTIVE &&
-			    !smcr_link_send_delete(lnk, true)) {
-				/* reschedule in case we never receive a resp */
-				smc_lgr_schedule_free_work(lgr);
-				do_wait = true;
-			}
-		}
-		if (do_wait) {
-			spin_unlock_bh(lgr_lock);
-			return; /* wait for resp, see smc_llc_rx_delete_link */
-		}
-	}
 	lgr->freeing = 1; /* this instance does the freeing, no new schedule */
 	spin_unlock_bh(lgr_lock);
 	cancel_delayed_work(&lgr->free_work);
-- 
cgit v1.2.3-59-g8ed1b


From 6c868a3edc70ec9819d6a94268625d25e6bc9587 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Fri, 1 May 2020 12:48:11 +0200
Subject: net/smc: introduce smc_pnet_find_alt_roce()

Introduce a new function in smc_pnet.c that searches for an alternate
IB device, using an existing link group and a primary IB device. The
alternate IB device needs to be active and must have the same PNETID
as the link group.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_pnet.c | 15 +++++++++++++--
 net/smc/smc_pnet.h |  5 ++++-
 2 files changed, 17 insertions(+), 3 deletions(-)

diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index bd01c71b827a..50c96e843fab 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -777,7 +777,8 @@ static int smc_pnet_find_ndev_pnetid_by_table(struct net_device *ndev,
 
 /* find a roce device for the given pnetid */
 static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id,
-					  struct smc_init_info *ini)
+					  struct smc_init_info *ini,
+					  struct smc_ib_device *known_dev)
 {
 	struct smc_ib_device *ibdev;
 	int i;
@@ -785,6 +786,8 @@ static void _smc_pnet_find_roce_by_pnetid(u8 *pnet_id,
 	ini->ib_dev = NULL;
 	spin_lock(&smc_ib_devices.lock);
 	list_for_each_entry(ibdev, &smc_ib_devices.list, list) {
+		if (ibdev == known_dev)
+			continue;
 		for (i = 1; i <= SMC_MAX_PORTS; i++) {
 			if (!rdma_is_port_valid(ibdev->ibdev, i))
 				continue;
@@ -803,6 +806,14 @@ out:
 	spin_unlock(&smc_ib_devices.lock);
 }
 
+/* find alternate roce device with same pnet_id and vlan_id */
+void smc_pnet_find_alt_roce(struct smc_link_group *lgr,
+			    struct smc_init_info *ini,
+			    struct smc_ib_device *known_dev)
+{
+	_smc_pnet_find_roce_by_pnetid(lgr->pnet_id, ini, known_dev);
+}
+
 /* if handshake network device belongs to a roce device, return its
  * IB device and port
  */
@@ -857,7 +868,7 @@ static void smc_pnet_find_roce_by_pnetid(struct net_device *ndev,
 		smc_pnet_find_rdma_dev(ndev, ini);
 		return; /* pnetid could not be determined */
 	}
-	_smc_pnet_find_roce_by_pnetid(ndev_pnetid, ini);
+	_smc_pnet_find_roce_by_pnetid(ndev_pnetid, ini, NULL);
 }
 
 static void smc_pnet_find_ism_by_pnetid(struct net_device *ndev,
diff --git a/net/smc/smc_pnet.h b/net/smc/smc_pnet.h
index ea207f8fc6f7..811a65986691 100644
--- a/net/smc/smc_pnet.h
+++ b/net/smc/smc_pnet.h
@@ -19,6 +19,7 @@
 struct smc_ib_device;
 struct smcd_dev;
 struct smc_init_info;
+struct smc_link_group;
 
 /**
  * struct smc_pnettable - SMC PNET table anchor
@@ -48,5 +49,7 @@ void smc_pnet_find_roce_resource(struct sock *sk, struct smc_init_info *ini);
 void smc_pnet_find_ism_resource(struct sock *sk, struct smc_init_info *ini);
 int smc_pnetid_by_table_ib(struct smc_ib_device *smcibdev, u8 ib_port);
 int smc_pnetid_by_table_smcd(struct smcd_dev *smcd);
-
+void smc_pnet_find_alt_roce(struct smc_link_group *lgr,
+			    struct smc_init_info *ini,
+			    struct smc_ib_device *known_dev);
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From 8574cf4055ab44724ee9a4c30921d3ed853d787c Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Fri, 1 May 2020 12:48:12 +0200
Subject: net/smc: allocate index for a new link

Add smc_llc_alloc_alt_link() to find a free link index for a new link,
depending on the new link group type. And update constants for the
maximum number of links to 3 (2 symmetric and 1 dangling asymmetric link).
These maximum numbers are the same as used by other implementations of the
SMC-R protocol.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_core.h |  2 +-
 net/smc/smc_llc.c  | 24 ++++++++++++++++++++++++
 2 files changed, 25 insertions(+), 1 deletion(-)

diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index da3cddbd1651..eb27f2eb7c8c 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -128,7 +128,7 @@ struct smc_link {
 /* For now we just allow one parallel link per link group. The SMC protocol
  * allows more (up to 8).
  */
-#define SMC_LINKS_PER_LGR_MAX	1
+#define SMC_LINKS_PER_LGR_MAX	3
 #define SMC_SINGLE_LINK		0
 
 #define SMC_FIRST_CONTACT	1		/* first contact to a peer */
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index e478a4c11877..3a25b6ebe3a8 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -541,6 +541,30 @@ static int smc_llc_send_message(struct smc_link *link, void *llcbuf)
 
 /********************************* receive ***********************************/
 
+static int smc_llc_alloc_alt_link(struct smc_link_group *lgr,
+				  enum smc_lgr_type lgr_new_t)
+{
+	int i;
+
+	if (lgr->type == SMC_LGR_SYMMETRIC ||
+	    (lgr->type != SMC_LGR_SINGLE &&
+	     (lgr_new_t == SMC_LGR_ASYMMETRIC_LOCAL ||
+	      lgr_new_t == SMC_LGR_ASYMMETRIC_PEER)))
+		return -EMLINK;
+
+	if (lgr_new_t == SMC_LGR_ASYMMETRIC_LOCAL ||
+	    lgr_new_t == SMC_LGR_ASYMMETRIC_PEER) {
+		for (i = SMC_LINKS_PER_LGR_MAX - 1; i >= 0; i--)
+			if (lgr->lnk[i].state == SMC_LNK_UNUSED)
+				return i;
+	} else {
+		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
+			if (lgr->lnk[i].state == SMC_LNK_UNUSED)
+				return i;
+	}
+	return -EMLINK;
+}
+
 static void smc_llc_rx_delete_link(struct smc_link *link,
 				   struct smc_llc_msg_del_link *llc)
 {
-- 
cgit v1.2.3-59-g8ed1b


From b45e7f98ab7c2d7035d92100ee011584693eccce Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Fri, 1 May 2020 12:48:13 +0200
Subject: net/smc: llc_add_link_work to handle ADD_LINK LLC requests

Introduce a work that is scheduled when a new ADD_LINK LLC request is
received. The work will call either the SMC client or SMC server
ADD_LINK processing.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_core.h |  1 +
 net/smc/smc_llc.c  | 24 ++++++++++++++++++++++--
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index eb27f2eb7c8c..555ada9d2423 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -253,6 +253,7 @@ struct smc_link_group {
 						/* protects llc_event_q */
 			struct mutex		llc_conf_mutex;
 						/* protects lgr reconfig. */
+			struct work_struct	llc_add_link_work;
 			struct work_struct	llc_event_work;
 						/* llc event worker */
 			wait_queue_head_t	llc_waiter;
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 3a25b6ebe3a8..50f59746bdf9 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -565,6 +565,24 @@ static int smc_llc_alloc_alt_link(struct smc_link_group *lgr,
 	return -EMLINK;
 }
 
+/* worker to process an add link message */
+static void smc_llc_add_link_work(struct work_struct *work)
+{
+	struct smc_link_group *lgr = container_of(work, struct smc_link_group,
+						  llc_add_link_work);
+
+	if (list_empty(&lgr->list)) {
+		/* link group is terminating */
+		smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
+		goto out;
+	}
+
+	/* tbd: call smc_llc_process_cli_add_link(lgr); */
+	/* tbd: call smc_llc_process_srv_add_link(lgr); */
+out:
+	smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
+}
+
 static void smc_llc_rx_delete_link(struct smc_link *link,
 				   struct smc_llc_msg_del_link *llc)
 {
@@ -685,11 +703,11 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry)
 				wake_up_interruptible(&lgr->llc_waiter);
 			} else if (smc_llc_flow_start(&lgr->llc_flow_lcl,
 						      qentry)) {
-				/* tbd: schedule_work(&lgr->llc_add_link_work); */
+				schedule_work(&lgr->llc_add_link_work);
 			}
 		} else if (smc_llc_flow_start(&lgr->llc_flow_lcl, qentry)) {
 			/* as smc server, handle client suggestion */
-			/* tbd: schedule_work(&lgr->llc_add_link_work); */
+			schedule_work(&lgr->llc_add_link_work);
 		}
 		return;
 	case SMC_LLC_CONFIRM_LINK:
@@ -868,6 +886,7 @@ void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc)
 	struct net *net = sock_net(smc->clcsock->sk);
 
 	INIT_WORK(&lgr->llc_event_work, smc_llc_event_work);
+	INIT_WORK(&lgr->llc_add_link_work, smc_llc_add_link_work);
 	INIT_LIST_HEAD(&lgr->llc_event_q);
 	spin_lock_init(&lgr->llc_event_q_lock);
 	spin_lock_init(&lgr->llc_flow_lock);
@@ -882,6 +901,7 @@ void smc_llc_lgr_clear(struct smc_link_group *lgr)
 	smc_llc_event_flush(lgr);
 	wake_up_interruptible_all(&lgr->llc_waiter);
 	cancel_work_sync(&lgr->llc_event_work);
+	cancel_work_sync(&lgr->llc_add_link_work);
 	if (lgr->delayed_event) {
 		kfree(lgr->delayed_event);
 		lgr->delayed_event = NULL;
-- 
cgit v1.2.3-59-g8ed1b


From 57dc6f3b4133f45e73d87895180ca1f3eaf01722 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Fri, 1 May 2020 15:43:20 -0700
Subject: selftests/bpf: Use reno instead of dctcp

Andrey pointed out that we can use reno instead of dctcp for CC
tests and drop CONFIG_TCP_CONG_DCTCP=y requirement.

Fixes: beecf11bc218 ("bpf: Bpf_{g,s}etsockopt for struct bpf_sock_addr")
Suggested-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20200501224320.28441-1-sdf@google.com
---
 tools/testing/selftests/bpf/config                | 1 -
 tools/testing/selftests/bpf/progs/connect4_prog.c | 6 +++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 6e5b94c036ca..60e3ae5d4e48 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -37,4 +37,3 @@ CONFIG_IPV6_SIT=m
 CONFIG_BPF_JIT=y
 CONFIG_BPF_LSM=y
 CONFIG_SECURITY=y
-CONFIG_TCP_CONG_DCTCP=y
diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c
index 972918cd2d7f..c2c85c31cffd 100644
--- a/tools/testing/selftests/bpf/progs/connect4_prog.c
+++ b/tools/testing/selftests/bpf/progs/connect4_prog.c
@@ -59,12 +59,12 @@ static __inline int verify_cc(struct bpf_sock_addr *ctx,
 
 static __inline int set_cc(struct bpf_sock_addr *ctx)
 {
-	char dctcp[TCP_CA_NAME_MAX] = "dctcp";
+	char reno[TCP_CA_NAME_MAX] = "reno";
 	char cubic[TCP_CA_NAME_MAX] = "cubic";
 
-	if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, &dctcp, sizeof(dctcp)))
+	if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, &reno, sizeof(reno)))
 		return 1;
-	if (verify_cc(ctx, dctcp))
+	if (verify_cc(ctx, reno))
 		return 1;
 
 	if (bpf_setsockopt(ctx, SOL_TCP, TCP_CONGESTION, &cubic, sizeof(cubic)))
-- 
cgit v1.2.3-59-g8ed1b


From 184ecc9eb260d5a3bcdddc5bebd18f285ac004e9 Mon Sep 17 00:00:00 2001
From: Vincent Cheng <vincent.cheng.xh@renesas.com>
Date: Fri, 1 May 2020 23:35:36 -0400
Subject: ptp: Add adjphase function to support phase offset control.

Adds adjust phase function to take advantage of a PHC
clock's hardware filtering capability that uses phase offset
control word instead of frequency offset control word.

Signed-off-by: Vincent Cheng <vincent.cheng.xh@renesas.com>
Reviewed-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ptp/ptp_clock.c          | 3 +++
 include/linux/ptp_clock_kernel.h | 6 +++++-
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c
index acabbe72e55e..fc984a8828fb 100644
--- a/drivers/ptp/ptp_clock.c
+++ b/drivers/ptp/ptp_clock.c
@@ -146,6 +146,9 @@ static int ptp_clock_adjtime(struct posix_clock *pc, struct __kernel_timex *tx)
 		else
 			err = ops->adjfreq(ops, ppb);
 		ptp->dialed_frequency = tx->freq;
+	} else if (tx->modes & ADJ_OFFSET) {
+		if (ops->adjphase)
+			err = ops->adjphase(ops, tx->offset);
 	} else if (tx->modes == 0) {
 		tx->freq = ptp->dialed_frequency;
 		err = 0;
diff --git a/include/linux/ptp_clock_kernel.h b/include/linux/ptp_clock_kernel.h
index 121a7eda4593..31144d954d89 100644
--- a/include/linux/ptp_clock_kernel.h
+++ b/include/linux/ptp_clock_kernel.h
@@ -36,7 +36,7 @@ struct ptp_system_timestamp {
 };
 
 /**
- * struct ptp_clock_info - decribes a PTP hardware clock
+ * struct ptp_clock_info - describes a PTP hardware clock
  *
  * @owner:     The clock driver should set to THIS_MODULE.
  * @name:      A short "friendly name" to identify the clock and to
@@ -65,6 +65,9 @@ struct ptp_system_timestamp {
  *            parameter delta: Desired frequency offset from nominal frequency
  *            in parts per billion
  *
+ * @adjphase:  Adjusts the phase offset of the hardware clock.
+ *             parameter delta: Desired change in nanoseconds.
+ *
  * @adjtime:  Shifts the time of the hardware clock.
  *            parameter delta: Desired change in nanoseconds.
  *
@@ -128,6 +131,7 @@ struct ptp_clock_info {
 	struct ptp_pin_desc *pin_config;
 	int (*adjfine)(struct ptp_clock_info *ptp, long scaled_ppm);
 	int (*adjfreq)(struct ptp_clock_info *ptp, s32 delta);
+	int (*adjphase)(struct ptp_clock_info *ptp, s32 phase);
 	int (*adjtime)(struct ptp_clock_info *ptp, s64 delta);
 	int (*gettime64)(struct ptp_clock_info *ptp, struct timespec64 *ts);
 	int (*gettimex64)(struct ptp_clock_info *ptp, struct timespec64 *ts,
-- 
cgit v1.2.3-59-g8ed1b


From d3f1cbd29fa63f1bb608603a6cd54ca7af56a68b Mon Sep 17 00:00:00 2001
From: Vincent Cheng <vincent.cheng.xh@renesas.com>
Date: Fri, 1 May 2020 23:35:37 -0400
Subject: ptp: Add adjust_phase to ptp_clock_caps capability.

Add adjust_phase to ptp_clock_caps capability to allow
user to query if a PHC driver supports adjust phase with
ioctl PTP_CLOCK_GETCAPS command.

Signed-off-by: Vincent Cheng <vincent.cheng.xh@renesas.com>
Reviewed-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ptp/ptp_chardev.c             | 1 +
 include/uapi/linux/ptp_clock.h        | 4 +++-
 tools/testing/selftests/ptp/testptp.c | 6 ++++--
 3 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/ptp/ptp_chardev.c b/drivers/ptp/ptp_chardev.c
index 93d574faf1fe..375cd6e4aade 100644
--- a/drivers/ptp/ptp_chardev.c
+++ b/drivers/ptp/ptp_chardev.c
@@ -136,6 +136,7 @@ long ptp_ioctl(struct posix_clock *pc, unsigned int cmd, unsigned long arg)
 		caps.pps = ptp->info->pps;
 		caps.n_pins = ptp->info->n_pins;
 		caps.cross_timestamping = ptp->info->getcrosststamp != NULL;
+		caps.adjust_phase = ptp->info->adjphase != NULL;
 		if (copy_to_user((void __user *)arg, &caps, sizeof(caps)))
 			err = -EFAULT;
 		break;
diff --git a/include/uapi/linux/ptp_clock.h b/include/uapi/linux/ptp_clock.h
index 9dc9d0079e98..ff070aa64278 100644
--- a/include/uapi/linux/ptp_clock.h
+++ b/include/uapi/linux/ptp_clock.h
@@ -89,7 +89,9 @@ struct ptp_clock_caps {
 	int n_pins;    /* Number of input/output pins. */
 	/* Whether the clock supports precise system-device cross timestamps */
 	int cross_timestamping;
-	int rsv[13];   /* Reserved for future use. */
+	/* Whether the clock supports adjust phase */
+	int adjust_phase;
+	int rsv[12];   /* Reserved for future use. */
 };
 
 struct ptp_extts_request {
diff --git a/tools/testing/selftests/ptp/testptp.c b/tools/testing/selftests/ptp/testptp.c
index c0dd10257df5..da7a9dda9490 100644
--- a/tools/testing/selftests/ptp/testptp.c
+++ b/tools/testing/selftests/ptp/testptp.c
@@ -269,14 +269,16 @@ int main(int argc, char *argv[])
 			       "  %d programmable periodic signals\n"
 			       "  %d pulse per second\n"
 			       "  %d programmable pins\n"
-			       "  %d cross timestamping\n",
+			       "  %d cross timestamping\n"
+			       "  %d adjust_phase\n",
 			       caps.max_adj,
 			       caps.n_alarm,
 			       caps.n_ext_ts,
 			       caps.n_per_out,
 			       caps.pps,
 			       caps.n_pins,
-			       caps.cross_timestamping);
+			       caps.cross_timestamping,
+			       caps.adjust_phase);
 		}
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 425d2b1c563826cf73e204172919fb40b7c45f1d Mon Sep 17 00:00:00 2001
From: Vincent Cheng <vincent.cheng.xh@renesas.com>
Date: Fri, 1 May 2020 23:35:38 -0400
Subject: ptp: ptp_clockmatrix: Add adjphase() to support PHC write phase mode.

Add idtcm_adjphase() to support PHC write phase mode.

Signed-off-by: Vincent Cheng <vincent.cheng.xh@renesas.com>
Acked-by: Richard Cochran <richardcochran@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ptp/ptp_clockmatrix.c | 92 +++++++++++++++++++++++++++++++++++++++++++
 drivers/ptp/ptp_clockmatrix.h |  8 +++-
 2 files changed, 98 insertions(+), 2 deletions(-)

diff --git a/drivers/ptp/ptp_clockmatrix.c b/drivers/ptp/ptp_clockmatrix.c
index a3f608832660..ceb6bc58f3b4 100644
--- a/drivers/ptp/ptp_clockmatrix.c
+++ b/drivers/ptp/ptp_clockmatrix.c
@@ -10,6 +10,7 @@
 #include <linux/module.h>
 #include <linux/ptp_clock_kernel.h>
 #include <linux/delay.h>
+#include <linux/jiffies.h>
 #include <linux/kernel.h>
 #include <linux/timekeeping.h>
 
@@ -24,6 +25,16 @@ MODULE_LICENSE("GPL");
 
 #define SETTIME_CORRECTION (0)
 
+static long set_write_phase_ready(struct ptp_clock_info *ptp)
+{
+	struct idtcm_channel *channel =
+		container_of(ptp, struct idtcm_channel, caps);
+
+	channel->write_phase_ready = 1;
+
+	return 0;
+}
+
 static int char_array_to_timespec(u8 *buf,
 				  u8 count,
 				  struct timespec64 *ts)
@@ -871,6 +882,64 @@ static int idtcm_set_pll_mode(struct idtcm_channel *channel,
 
 /* PTP Hardware Clock interface */
 
+/**
+ * @brief Maximum absolute value for write phase offset in picoseconds
+ *
+ * Destination signed register is 32-bit register in resolution of 50ps
+ *
+ * 0x7fffffff * 50 =  2147483647 * 50 = 107374182350
+ */
+static int _idtcm_adjphase(struct idtcm_channel *channel, s32 delta_ns)
+{
+	struct idtcm *idtcm = channel->idtcm;
+
+	int err;
+	u8 i;
+	u8 buf[4] = {0};
+	s32 phase_50ps;
+	s64 offset_ps;
+
+	if (channel->pll_mode != PLL_MODE_WRITE_PHASE) {
+
+		err = idtcm_set_pll_mode(channel, PLL_MODE_WRITE_PHASE);
+
+		if (err)
+			return err;
+
+		channel->write_phase_ready = 0;
+
+		ptp_schedule_worker(channel->ptp_clock,
+				    msecs_to_jiffies(WR_PHASE_SETUP_MS));
+	}
+
+	if (!channel->write_phase_ready)
+		delta_ns = 0;
+
+	offset_ps = (s64)delta_ns * 1000;
+
+	/*
+	 * Check for 32-bit signed max * 50:
+	 *
+	 * 0x7fffffff * 50 =  2147483647 * 50 = 107374182350
+	 */
+	if (offset_ps > MAX_ABS_WRITE_PHASE_PICOSECONDS)
+		offset_ps = MAX_ABS_WRITE_PHASE_PICOSECONDS;
+	else if (offset_ps < -MAX_ABS_WRITE_PHASE_PICOSECONDS)
+		offset_ps = -MAX_ABS_WRITE_PHASE_PICOSECONDS;
+
+	phase_50ps = DIV_ROUND_CLOSEST(div64_s64(offset_ps, 50), 1);
+
+	for (i = 0; i < 4; i++) {
+		buf[i] = phase_50ps & 0xff;
+		phase_50ps >>= 8;
+	}
+
+	err = idtcm_write(idtcm, channel->dpll_phase, DPLL_WR_PHASE,
+			  buf, sizeof(buf));
+
+	return err;
+}
+
 static int idtcm_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
 {
 	struct idtcm_channel *channel =
@@ -977,6 +1046,24 @@ static int idtcm_adjtime(struct ptp_clock_info *ptp, s64 delta)
 	return err;
 }
 
+static int idtcm_adjphase(struct ptp_clock_info *ptp, s32 delta)
+{
+	struct idtcm_channel *channel =
+		container_of(ptp, struct idtcm_channel, caps);
+
+	struct idtcm *idtcm = channel->idtcm;
+
+	int err;
+
+	mutex_lock(&idtcm->reg_lock);
+
+	err = _idtcm_adjphase(channel, delta);
+
+	mutex_unlock(&idtcm->reg_lock);
+
+	return err;
+}
+
 static int idtcm_enable(struct ptp_clock_info *ptp,
 			struct ptp_clock_request *rq, int on)
 {
@@ -1055,13 +1142,16 @@ static const struct ptp_clock_info idtcm_caps = {
 	.owner		= THIS_MODULE,
 	.max_adj	= 244000,
 	.n_per_out	= 1,
+	.adjphase	= &idtcm_adjphase,
 	.adjfreq	= &idtcm_adjfreq,
 	.adjtime	= &idtcm_adjtime,
 	.gettime64	= &idtcm_gettime,
 	.settime64	= &idtcm_settime,
 	.enable		= &idtcm_enable,
+	.do_aux_work	= &set_write_phase_ready,
 };
 
+
 static int idtcm_enable_channel(struct idtcm *idtcm, u32 index)
 {
 	struct idtcm_channel *channel;
@@ -1146,6 +1236,8 @@ static int idtcm_enable_channel(struct idtcm *idtcm, u32 index)
 	if (!channel->ptp_clock)
 		return -ENOTSUPP;
 
+	channel->write_phase_ready = 0;
+
 	dev_info(&idtcm->client->dev, "PLL%d registered as ptp%d\n",
 		 index, channel->ptp_clock->index);
 
diff --git a/drivers/ptp/ptp_clockmatrix.h b/drivers/ptp/ptp_clockmatrix.h
index 6c1f93ab46f3..3de0eb72889c 100644
--- a/drivers/ptp/ptp_clockmatrix.h
+++ b/drivers/ptp/ptp_clockmatrix.h
@@ -15,6 +15,8 @@
 #define FW_FILENAME	"idtcm.bin"
 #define MAX_PHC_PLL	4
 
+#define MAX_ABS_WRITE_PHASE_PICOSECONDS (107374182350LL)
+
 #define PLL_MASK_ADDR		(0xFFA5)
 #define DEFAULT_PLL_MASK	(0x04)
 
@@ -33,8 +35,9 @@
 
 #define POST_SM_RESET_DELAY_MS		(3000)
 #define PHASE_PULL_IN_THRESHOLD_NS	(150000)
-#define TOD_WRITE_OVERHEAD_COUNT_MAX    (5)
-#define TOD_BYTE_COUNT                  (11)
+#define TOD_WRITE_OVERHEAD_COUNT_MAX	(5)
+#define TOD_BYTE_COUNT			(11)
+#define WR_PHASE_SETUP_MS		(5000)
 
 /* Values of DPLL_N.DPLL_MODE.PLL_MODE */
 enum pll_mode {
@@ -77,6 +80,7 @@ struct idtcm_channel {
 	u16			hw_dpll_n;
 	enum pll_mode		pll_mode;
 	u16			output_mask;
+	int			write_phase_ready;
 };
 
 struct idtcm {
-- 
cgit v1.2.3-59-g8ed1b


From 40b94224c339e44f689e713875c6c27c9c1270a7 Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Sat, 2 May 2020 16:41:06 -0700
Subject: smc: Remove unused function.
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

net/smc/smc_llc.c:544:12: warning: ‘smc_llc_alloc_alt_link’ defined but not used [-Wunused-function]
 static int smc_llc_alloc_alt_link(struct smc_link_group *lgr,
            ^~~~~~~~~~~~~~~~~~~~~~

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_llc.c | 24 ------------------------
 1 file changed, 24 deletions(-)

diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 50f59746bdf9..4e3db4d4b783 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -541,30 +541,6 @@ static int smc_llc_send_message(struct smc_link *link, void *llcbuf)
 
 /********************************* receive ***********************************/
 
-static int smc_llc_alloc_alt_link(struct smc_link_group *lgr,
-				  enum smc_lgr_type lgr_new_t)
-{
-	int i;
-
-	if (lgr->type == SMC_LGR_SYMMETRIC ||
-	    (lgr->type != SMC_LGR_SINGLE &&
-	     (lgr_new_t == SMC_LGR_ASYMMETRIC_LOCAL ||
-	      lgr_new_t == SMC_LGR_ASYMMETRIC_PEER)))
-		return -EMLINK;
-
-	if (lgr_new_t == SMC_LGR_ASYMMETRIC_LOCAL ||
-	    lgr_new_t == SMC_LGR_ASYMMETRIC_PEER) {
-		for (i = SMC_LINKS_PER_LGR_MAX - 1; i >= 0; i--)
-			if (lgr->lnk[i].state == SMC_LNK_UNUSED)
-				return i;
-	} else {
-		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
-			if (lgr->lnk[i].state == SMC_LNK_UNUSED)
-				return i;
-	}
-	return -EMLINK;
-}
-
 /* worker to process an add link message */
 static void smc_llc_add_link_work(struct work_struct *work)
 {
-- 
cgit v1.2.3-59-g8ed1b


From f166f890c8f026a931e1bb80f51561a1d2f41b27 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sat, 2 May 2020 17:25:04 +0200
Subject: net: ethernet: fec: Replace interrupt driven MDIO with polled IO

Measurements of the MDIO bus have shown that driving the MDIO bus
using interrupts is slow. Back to back MDIO transactions take about
90us, with 25us spent performing the transaction, and the remainder of
the time the bus is idle.

Replacing the completion interrupt with polled IO results in back to
back transactions of 40us. The polling loop waiting for the hardware
to complete the transaction takes around 28us. Which suggests
interrupt handling has an overhead of 50us, and polled IO nearly
halves this overhead, and doubles the MDIO performance.

Care has to be taken when setting the MII_SPEED register, or it can
trigger an MII event> That then upsets the polling, due to an
unexpected pending event.

Suggested-by: Chris Heally <cphealy@gmail.com>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/fec.h      |  4 +-
 drivers/net/ethernet/freescale/fec_main.c | 77 ++++++++++++++++++-------------
 2 files changed, 45 insertions(+), 36 deletions(-)

diff --git a/drivers/net/ethernet/freescale/fec.h b/drivers/net/ethernet/freescale/fec.h
index e74dd1f86bba..a6cdd5b61921 100644
--- a/drivers/net/ethernet/freescale/fec.h
+++ b/drivers/net/ethernet/freescale/fec.h
@@ -376,8 +376,7 @@ struct bufdesc_ex {
 #define FEC_ENET_TS_AVAIL       ((uint)0x00010000)
 #define FEC_ENET_TS_TIMER       ((uint)0x00008000)
 
-#define FEC_DEFAULT_IMASK (FEC_ENET_TXF | FEC_ENET_RXF | FEC_ENET_MII)
-#define FEC_NAPI_IMASK	FEC_ENET_MII
+#define FEC_DEFAULT_IMASK (FEC_ENET_TXF | FEC_ENET_RXF)
 #define FEC_RX_DISABLED_IMASK (FEC_DEFAULT_IMASK & (~FEC_ENET_RXF))
 
 /* ENET interrupt coalescing macro define */
@@ -543,7 +542,6 @@ struct fec_enet_private {
 	int	link;
 	int	full_duplex;
 	int	speed;
-	struct	completion mdio_done;
 	int	irq[FEC_IRQ_NUM];
 	bool	bufdesc_ex;
 	int	pause_flag;
diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index c7b84bb22f75..2e209142f2d1 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -976,8 +976,8 @@ fec_restart(struct net_device *ndev)
 	writel((__force u32)cpu_to_be32(temp_mac[1]),
 	       fep->hwp + FEC_ADDR_HIGH);
 
-	/* Clear any outstanding interrupt. */
-	writel(0xffffffff, fep->hwp + FEC_IEVENT);
+	/* Clear any outstanding interrupt, except MDIO. */
+	writel((0xffffffff & ~FEC_ENET_MII), fep->hwp + FEC_IEVENT);
 
 	fec_enet_bd_init(ndev);
 
@@ -1123,7 +1123,7 @@ fec_restart(struct net_device *ndev)
 	if (fep->link)
 		writel(FEC_DEFAULT_IMASK, fep->hwp + FEC_IMASK);
 	else
-		writel(FEC_ENET_MII, fep->hwp + FEC_IMASK);
+		writel(0, fep->hwp + FEC_IMASK);
 
 	/* Init the interrupt coalescing */
 	fec_enet_itr_coal_init(ndev);
@@ -1652,6 +1652,10 @@ fec_enet_interrupt(int irq, void *dev_id)
 	irqreturn_t ret = IRQ_NONE;
 
 	int_events = readl(fep->hwp + FEC_IEVENT);
+
+	/* Don't clear MDIO events, we poll for those */
+	int_events &= ~FEC_ENET_MII;
+
 	writel(int_events, fep->hwp + FEC_IEVENT);
 	fec_enet_collect_events(fep, int_events);
 
@@ -1659,16 +1663,12 @@ fec_enet_interrupt(int irq, void *dev_id)
 		ret = IRQ_HANDLED;
 
 		if (napi_schedule_prep(&fep->napi)) {
-			/* Disable the NAPI interrupts */
-			writel(FEC_NAPI_IMASK, fep->hwp + FEC_IMASK);
+			/* Disable interrupts */
+			writel(0, fep->hwp + FEC_IMASK);
 			__napi_schedule(&fep->napi);
 		}
 	}
 
-	if (int_events & FEC_ENET_MII) {
-		ret = IRQ_HANDLED;
-		complete(&fep->mdio_done);
-	}
 	return ret;
 }
 
@@ -1818,11 +1818,24 @@ static void fec_enet_adjust_link(struct net_device *ndev)
 		phy_print_status(phy_dev);
 }
 
+static int fec_enet_mdio_wait(struct fec_enet_private *fep)
+{
+	uint ievent;
+	int ret;
+
+	ret = readl_poll_timeout_atomic(fep->hwp + FEC_IEVENT, ievent,
+					ievent & FEC_ENET_MII, 2, 30000);
+
+	if (!ret)
+		writel(FEC_ENET_MII, fep->hwp + FEC_IEVENT);
+
+	return ret;
+}
+
 static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
 {
 	struct fec_enet_private *fep = bus->priv;
 	struct device *dev = &fep->pdev->dev;
-	unsigned long time_left;
 	int ret = 0, frame_start, frame_addr, frame_op;
 	bool is_c45 = !!(regnum & MII_ADDR_C45);
 
@@ -1830,8 +1843,6 @@ static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
 	if (ret < 0)
 		return ret;
 
-	reinit_completion(&fep->mdio_done);
-
 	if (is_c45) {
 		frame_start = FEC_MMFR_ST_C45;
 
@@ -1843,11 +1854,9 @@ static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
 		       fep->hwp + FEC_MII_DATA);
 
 		/* wait for end of transfer */
-		time_left = wait_for_completion_timeout(&fep->mdio_done,
-				usecs_to_jiffies(FEC_MII_TIMEOUT));
-		if (time_left == 0) {
+		ret = fec_enet_mdio_wait(fep);
+		if (ret) {
 			netdev_err(fep->netdev, "MDIO address write timeout\n");
-			ret = -ETIMEDOUT;
 			goto out;
 		}
 
@@ -1866,11 +1875,9 @@ static int fec_enet_mdio_read(struct mii_bus *bus, int mii_id, int regnum)
 		FEC_MMFR_TA, fep->hwp + FEC_MII_DATA);
 
 	/* wait for end of transfer */
-	time_left = wait_for_completion_timeout(&fep->mdio_done,
-			usecs_to_jiffies(FEC_MII_TIMEOUT));
-	if (time_left == 0) {
+	ret = fec_enet_mdio_wait(fep);
+	if (ret) {
 		netdev_err(fep->netdev, "MDIO read timeout\n");
-		ret = -ETIMEDOUT;
 		goto out;
 	}
 
@@ -1888,7 +1895,6 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
 {
 	struct fec_enet_private *fep = bus->priv;
 	struct device *dev = &fep->pdev->dev;
-	unsigned long time_left;
 	int ret, frame_start, frame_addr;
 	bool is_c45 = !!(regnum & MII_ADDR_C45);
 
@@ -1898,8 +1904,6 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
 	else
 		ret = 0;
 
-	reinit_completion(&fep->mdio_done);
-
 	if (is_c45) {
 		frame_start = FEC_MMFR_ST_C45;
 
@@ -1911,11 +1915,9 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
 		       fep->hwp + FEC_MII_DATA);
 
 		/* wait for end of transfer */
-		time_left = wait_for_completion_timeout(&fep->mdio_done,
-			usecs_to_jiffies(FEC_MII_TIMEOUT));
-		if (time_left == 0) {
+		ret = fec_enet_mdio_wait(fep);
+		if (ret) {
 			netdev_err(fep->netdev, "MDIO address write timeout\n");
-			ret = -ETIMEDOUT;
 			goto out;
 		}
 	} else {
@@ -1931,12 +1933,9 @@ static int fec_enet_mdio_write(struct mii_bus *bus, int mii_id, int regnum,
 		fep->hwp + FEC_MII_DATA);
 
 	/* wait for end of transfer */
-	time_left = wait_for_completion_timeout(&fep->mdio_done,
-			usecs_to_jiffies(FEC_MII_TIMEOUT));
-	if (time_left == 0) {
+	ret = fec_enet_mdio_wait(fep);
+	if (ret)
 		netdev_err(fep->netdev, "MDIO write timeout\n");
-		ret  = -ETIMEDOUT;
-	}
 
 out:
 	pm_runtime_mark_last_busy(dev);
@@ -2143,8 +2142,21 @@ static int fec_enet_mii_init(struct platform_device *pdev)
 	if (suppress_preamble)
 		fep->phy_speed |= BIT(7);
 
+	/* Clear MMFR to avoid to generate MII event by writing MSCR.
+	 * MII event generation condition:
+	 * - writing MSCR:
+	 *	- mmfr[31:0]_not_zero & mscr[7:0]_is_zero &
+	 *	  mscr_reg_data_in[7:0] != 0
+	 * - writing MMFR:
+	 *	- mscr[7:0]_not_zero
+	 */
+	writel(0, fep->hwp + FEC_MII_DATA);
+
 	writel(fep->phy_speed, fep->hwp + FEC_MII_SPEED);
 
+	/* Clear any pending transaction complete indication */
+	writel(FEC_ENET_MII, fep->hwp + FEC_IEVENT);
+
 	fep->mii_bus = mdiobus_alloc();
 	if (fep->mii_bus == NULL) {
 		err = -ENOMEM;
@@ -3686,7 +3698,6 @@ fec_probe(struct platform_device *pdev)
 		fep->irq[i] = irq;
 	}
 
-	init_completion(&fep->mdio_done);
 	ret = fec_enet_mii_init(pdev);
 	if (ret)
 		goto failed_mii_init;
-- 
cgit v1.2.3-59-g8ed1b


From ee1bd483cc062d5050f9537064651dd2e06baee7 Mon Sep 17 00:00:00 2001
From: Dmitry Yakunin <zeil@yandex-team.ru>
Date: Sat, 2 May 2020 18:34:42 +0300
Subject: inet_diag: bc: read cgroup id only for full sockets

Fix bug introduced by commit b1f3e43dbfac ("inet_diag: add support for
cgroup filter").

Signed-off-by: Dmitry Yakunin <zeil@yandex-team.ru>
Reported-by: syzbot+ee80f840d9bf6893223b@syzkaller.appspotmail.com
Reported-by: syzbot+13bef047dbfffa5cd1af@syzkaller.appspotmail.com
Fixes: b1f3e43dbfac ("inet_diag: add support for cgroup filter")
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/inet_diag.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/inet_diag.c b/net/ipv4/inet_diag.c
index 0034092358c3..125f4f8a36b4 100644
--- a/net/ipv4/inet_diag.c
+++ b/net/ipv4/inet_diag.c
@@ -746,7 +746,8 @@ int inet_diag_bc_sk(const struct nlattr *bc, struct sock *sk)
 	else
 		entry.mark = 0;
 #ifdef CONFIG_SOCK_CGROUP_DATA
-	entry.cgroup_id = cgroup_id(sock_cgroup_ptr(&sk->sk_cgrp_data));
+	entry.cgroup_id = sk_fullsock(sk) ?
+		cgroup_id(sock_cgroup_ptr(&sk->sk_cgrp_data)) : 0;
 #endif
 
 	return inet_diag_bc_run(bc, &entry);
-- 
cgit v1.2.3-59-g8ed1b


From dde0a648fc00e2156a3358600c5fbfb3f53256ac Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 2 May 2020 19:54:18 -0700
Subject: net_sched: sch_fq: avoid touching f->next from fq_gc()

A significant amount of cpu cycles is spent in fq_gc()

When fq_gc() does its lookup in the rb-tree, it needs the
following fields from struct fq_flow :

f->sk       (lookup key in the rb-tree)
f->fq_node  (anchor in the rb-tree)
f->next     (used to determine if the flow is detached)
f->age      (used to determine if the flow is candidate for gc)

This unfortunately spans two cache lines (assuming 64 bytes cache lines)

We can avoid using f->next, if we use the low order bit of f->{age|tail}

This low order bit is 0, if f->tail points to an sk_buff.
We set the low order bit to 1, if the union contains a jiffies value.

Combined with the following patch, this makes sure we only need
to bring into cpu caches one cache line per flow.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_fq.c | 21 +++++++++++++--------
 1 file changed, 13 insertions(+), 8 deletions(-)

diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 4c060134c736..bc9ca1ba507b 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -70,14 +70,14 @@ struct fq_flow {
 	struct sk_buff	*head;		/* list of skbs for this flow : first skb */
 	union {
 		struct sk_buff *tail;	/* last skb in the list */
-		unsigned long  age;	/* jiffies when flow was emptied, for gc */
+		unsigned long  age;	/* (jiffies | 1UL) when flow was emptied, for gc */
 	};
 	struct rb_node	fq_node;	/* anchor in fq_root[] trees */
 	struct sock	*sk;
 	int		qlen;		/* number of packets in flow queue */
 	int		credit;
 	u32		socket_hash;	/* sk_hash */
-	struct fq_flow *next;		/* next pointer in RR lists, or &detached */
+	struct fq_flow *next;		/* next pointer in RR lists */
 
 	struct rb_node  rate_node;	/* anchor in q->delayed tree */
 	u64		time_next_packet;
@@ -126,20 +126,25 @@ struct fq_sched_data {
 	struct qdisc_watchdog watchdog;
 };
 
-/* special value to mark a detached flow (not on old/new list) */
-static struct fq_flow detached, throttled;
-
+/*
+ * f->tail and f->age share the same location.
+ * We can use the low order bit to differentiate if this location points
+ * to a sk_buff or contains a jiffies value, if we force this value to be odd.
+ * This assumes f->tail low order bit must be 0 since alignof(struct sk_buff) >= 2
+ */
 static void fq_flow_set_detached(struct fq_flow *f)
 {
-	f->next = &detached;
-	f->age = jiffies;
+	f->age = jiffies | 1UL;
 }
 
 static bool fq_flow_is_detached(const struct fq_flow *f)
 {
-	return f->next == &detached;
+	return !!(f->age & 1UL);
 }
 
+/* special value to mark a throttled flow (not on old/new list) */
+static struct fq_flow throttled;
+
 static bool fq_flow_is_throttled(const struct fq_flow *f)
 {
 	return f->next == &throttled;
-- 
cgit v1.2.3-59-g8ed1b


From 7ba0537c2b534149be288f851900b4cf5aacde48 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 2 May 2020 19:54:19 -0700
Subject: net_sched: sch_fq: change fq_flow size/layout

sizeof(struct fq_flow) is 112 bytes on 64bit arches.

This means that half of them use two cache lines, but 50% use
three cache lines.

This patch adds cache line alignment, and makes sure that only
the first cache line is touched by fq_enqueue(), which is more
expensive that fq_dequeue() in general.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_fq.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index bc9ca1ba507b..ced1f987d7e4 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -66,6 +66,7 @@ static inline struct fq_skb_cb *fq_skb_cb(struct sk_buff *skb)
  * in linear list (head,tail), otherwise are placed in a rbtree (t_root).
  */
 struct fq_flow {
+/* First cache line : used in fq_gc(), fq_enqueue(), fq_dequeue() */
 	struct rb_root	t_root;
 	struct sk_buff	*head;		/* list of skbs for this flow : first skb */
 	union {
@@ -74,14 +75,18 @@ struct fq_flow {
 	};
 	struct rb_node	fq_node;	/* anchor in fq_root[] trees */
 	struct sock	*sk;
+	u32		socket_hash;	/* sk_hash */
 	int		qlen;		/* number of packets in flow queue */
+
+/* Second cache line, used in fq_dequeue() */
 	int		credit;
-	u32		socket_hash;	/* sk_hash */
+	/* 32bit hole on 64bit arches */
+
 	struct fq_flow *next;		/* next pointer in RR lists */
 
 	struct rb_node  rate_node;	/* anchor in q->delayed tree */
 	u64		time_next_packet;
-};
+} ____cacheline_aligned_in_smp;
 
 struct fq_flow_head {
 	struct fq_flow *first;
-- 
cgit v1.2.3-59-g8ed1b


From 82a0aa53b520edf50e08bad347d87d898de414eb Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 2 May 2020 19:54:20 -0700
Subject: net_sched: sch_fq: use bulk freeing in fq_gc()

fq_gc() already builds a small array of pointers, so using
kmem_cache_free_bulk() needs very little change.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_fq.c | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index ced1f987d7e4..53ec47ff8469 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -214,9 +214,10 @@ static void fq_gc(struct fq_sched_data *q,
 		  struct rb_root *root,
 		  struct sock *sk)
 {
-	struct fq_flow *f, *tofree[FQ_GC_MAX];
 	struct rb_node **p, *parent;
-	int fcnt = 0;
+	void *tofree[FQ_GC_MAX];
+	struct fq_flow *f;
+	int i, fcnt = 0;
 
 	p = &root->rb_node;
 	parent = NULL;
@@ -239,15 +240,18 @@ static void fq_gc(struct fq_sched_data *q,
 			p = &parent->rb_left;
 	}
 
+	if (!fcnt)
+		return;
+
+	for (i = fcnt; i > 0; ) {
+		f = tofree[--i];
+		rb_erase(&f->fq_node, root);
+	}
 	q->flows -= fcnt;
 	q->inactive_flows -= fcnt;
 	q->stat_gc_flows += fcnt;
-	while (fcnt) {
-		struct fq_flow *f = tofree[--fcnt];
 
-		rb_erase(&f->fq_node, root);
-		kmem_cache_free(fq_flow_cachep, f);
-	}
+	kmem_cache_free_bulk(fq_flow_cachep, fcnt, tofree);
 }
 
 static struct fq_flow *fq_classify(struct sk_buff *skb, struct fq_sched_data *q)
-- 
cgit v1.2.3-59-g8ed1b


From c288b0ca86a0a49ad450cf2d76a9a70c2ca9e43f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 2 May 2020 19:54:21 -0700
Subject: net_sched: sch_fq: do not call fq_peek() twice per packet

This refactors the code to not call fq_peek() from fq_dequeue_head()
since the caller can provide the skb.

Also rename fq_dequeue_head() to fq_dequeue_skb() because 'head' is
a bit vague, given the skb could come from t_root rb-tree.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_fq.c | 34 ++++++++++++++++------------------
 1 file changed, 16 insertions(+), 18 deletions(-)

diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 53ec47ff8469..567df8fcaf70 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -384,19 +384,17 @@ static void fq_erase_head(struct Qdisc *sch, struct fq_flow *flow,
 	}
 }
 
-/* remove one skb from head of flow queue */
-static struct sk_buff *fq_dequeue_head(struct Qdisc *sch, struct fq_flow *flow)
+/* Remove one skb from flow queue.
+ * This skb must be the return value of prior fq_peek().
+ */
+static void fq_dequeue_skb(struct Qdisc *sch, struct fq_flow *flow,
+			   struct sk_buff *skb)
 {
-	struct sk_buff *skb = fq_peek(flow);
-
-	if (skb) {
-		fq_erase_head(sch, flow, skb);
-		skb_mark_not_on_list(skb);
-		flow->qlen--;
-		qdisc_qstats_backlog_dec(sch, skb);
-		sch->q.qlen--;
-	}
-	return skb;
+	fq_erase_head(sch, flow, skb);
+	skb_mark_not_on_list(skb);
+	flow->qlen--;
+	qdisc_qstats_backlog_dec(sch, skb);
+	sch->q.qlen--;
 }
 
 static void flow_queue_add(struct fq_flow *flow, struct sk_buff *skb)
@@ -508,9 +506,11 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch)
 	if (!sch->q.qlen)
 		return NULL;
 
-	skb = fq_dequeue_head(sch, &q->internal);
-	if (skb)
+	skb = fq_peek(&q->internal);
+	if (unlikely(skb)) {
+		fq_dequeue_skb(sch, &q->internal, skb);
 		goto out;
+	}
 
 	now = ktime_get_ns();
 	fq_check_throttled(q, now);
@@ -550,10 +550,8 @@ begin:
 			INET_ECN_set_ce(skb);
 			q->stat_ce_mark++;
 		}
-	}
-
-	skb = fq_dequeue_head(sch, f);
-	if (!skb) {
+		fq_dequeue_skb(sch, f, skb);
+	} else {
 		head->first = f->next;
 		/* force a pass through old_flows to prevent starvation */
 		if ((head == &q->new_flows) && q->old_flows.first) {
-- 
cgit v1.2.3-59-g8ed1b


From 348e289b0f23451d1c47b940e759f0f3a0c5756e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 2 May 2020 19:54:22 -0700
Subject: net_sched: sch_fq: perform a prefetch() earlier

The prefetch() done in fq_dequeue() can be done a bit earlier
after the refactoring of the code done in the prior patch.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_fq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 567df8fcaf70..4f0104243cc2 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -546,6 +546,7 @@ begin:
 			fq_flow_set_throttled(q, f);
 			goto begin;
 		}
+		prefetch(&skb->end);
 		if ((s64)(now - time_next_packet - q->ce_threshold) > 0) {
 			INET_ECN_set_ce(skb);
 			q->stat_ce_mark++;
@@ -562,7 +563,6 @@ begin:
 		}
 		goto begin;
 	}
-	prefetch(&skb->end);
 	plen = qdisc_pkt_len(skb);
 	f->credit -= plen;
 
-- 
cgit v1.2.3-59-g8ed1b


From 80f8443fcdaa27871a233d08e9142612e6ade77c Mon Sep 17 00:00:00 2001
From: Arthur Kiyanovski <akiyano@amazon.com>
Date: Sun, 3 May 2020 09:52:10 +0000
Subject: net: ena: avoid unnecessary admin command when RSS function set fails

Currently when ena_set_hash_function() fails the hash function is
restored to the previous value by calling an admin command to get
the hash function from the device.

In this commit we avoid the admin command, by saving the previous
hash function before calling ena_set_hash_function() and using this
previous value to restore the hash function in case of failure of
ena_set_hash_function().

Signed-off-by: Sameeh Jubran <sameehj@amazon.com>
Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_com.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index a250046b8e18..424ba08955e9 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -2286,6 +2286,7 @@ int ena_com_fill_hash_function(struct ena_com_dev *ena_dev,
 	struct ena_admin_get_feat_resp get_resp;
 	struct ena_admin_feature_rss_flow_hash_control *hash_key =
 		rss->hash_key;
+	enum ena_admin_hash_functions old_func;
 	int rc;
 
 	/* Make sure size is a mult of DWs */
@@ -2325,12 +2326,13 @@ int ena_com_fill_hash_function(struct ena_com_dev *ena_dev,
 		return -EINVAL;
 	}
 
+	old_func = rss->hash_func;
 	rss->hash_func = func;
 	rc = ena_com_set_hash_function(ena_dev);
 
 	/* Restore the old function */
 	if (unlikely(rc))
-		ena_com_get_hash_function(ena_dev, NULL, NULL);
+		rss->hash_func = old_func;
 
 	return rc;
 }
-- 
cgit v1.2.3-59-g8ed1b


From e9a1de378dd46375f9abfd8de1e6f59ee114a793 Mon Sep 17 00:00:00 2001
From: Arthur Kiyanovski <akiyano@amazon.com>
Date: Sun, 3 May 2020 09:52:11 +0000
Subject: net: ena: fix error returning in ena_com_get_hash_function()

In case the "func" parameter is NULL we now return "-EINVAL".
This shouldn't happen in general, but when it does happen, this is the
proper way to handle it.

We also check func for NULL in the beginning of the function, as there
is no reason to do all the work and realize in the end of the function
it was useless.

Signed-off-by: Sameeh Jubran <sameehj@amazon.com>
Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_com.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index 424ba08955e9..66edc86c41c9 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -2347,6 +2347,9 @@ int ena_com_get_hash_function(struct ena_com_dev *ena_dev,
 		rss->hash_key;
 	int rc;
 
+	if (unlikely(!func))
+		return -EINVAL;
+
 	rc = ena_com_get_feature_ex(ena_dev, &get_resp,
 				    ENA_ADMIN_RSS_HASH_FUNCTION,
 				    rss->hash_key_dma_addr,
@@ -2359,8 +2362,7 @@ int ena_com_get_hash_function(struct ena_com_dev *ena_dev,
 	if (rss->hash_func)
 		rss->hash_func--;
 
-	if (func)
-		*func = rss->hash_func;
+	*func = rss->hash_func;
 
 	if (key)
 		memcpy(key, hash_key->key, (size_t)(hash_key->keys_num) << 2);
-- 
cgit v1.2.3-59-g8ed1b


From f66c2ea3b18a8cc8e45ff32c636aaefb2ac06315 Mon Sep 17 00:00:00 2001
From: Sameeh Jubran <sameehj@amazon.com>
Date: Sun, 3 May 2020 09:52:12 +0000
Subject: net: ena: allow setting the hash function without changing the key

Current code does not allow setting the hash function without
changing the key. This commit enables it.

To achieve this we separate ena_com_get_hash_function() to 2 functions:
ena_com_get_hash_function() - which gets only the hash function, and
ena_com_get_hash_key() - which gets only the hash key.

Also return 0 instead of rc at the end of ena_get_rxfh() since all
previous operations succeeded.

Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com>
Signed-off-by: Sameeh Jubran <sameehj@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_com.c     | 13 +++++++++----
 drivers/net/ethernet/amazon/ena/ena_com.h     | 21 +++++++++++++++------
 drivers/net/ethernet/amazon/ena/ena_ethtool.c | 12 ++++++++----
 3 files changed, 32 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index 66edc86c41c9..d428d0606166 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -2338,13 +2338,10 @@ int ena_com_fill_hash_function(struct ena_com_dev *ena_dev,
 }
 
 int ena_com_get_hash_function(struct ena_com_dev *ena_dev,
-			      enum ena_admin_hash_functions *func,
-			      u8 *key)
+			      enum ena_admin_hash_functions *func)
 {
 	struct ena_rss *rss = &ena_dev->rss;
 	struct ena_admin_get_feat_resp get_resp;
-	struct ena_admin_feature_rss_flow_hash_control *hash_key =
-		rss->hash_key;
 	int rc;
 
 	if (unlikely(!func))
@@ -2364,6 +2361,14 @@ int ena_com_get_hash_function(struct ena_com_dev *ena_dev,
 
 	*func = rss->hash_func;
 
+	return 0;
+}
+
+int ena_com_get_hash_key(struct ena_com_dev *ena_dev, u8 *key)
+{
+	struct ena_admin_feature_rss_flow_hash_control *hash_key =
+		ena_dev->rss.hash_key;
+
 	if (key)
 		memcpy(key, hash_key->key, (size_t)(hash_key->keys_num) << 2);
 
diff --git a/drivers/net/ethernet/amazon/ena/ena_com.h b/drivers/net/ethernet/amazon/ena/ena_com.h
index 469f298199a7..e2e2fd1dc820 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.h
+++ b/drivers/net/ethernet/amazon/ena/ena_com.h
@@ -695,13 +695,11 @@ int ena_com_fill_hash_function(struct ena_com_dev *ena_dev,
  */
 int ena_com_set_hash_function(struct ena_com_dev *ena_dev);
 
-/* ena_com_get_hash_function - Retrieve the hash function and the hash key
- * from the device.
+/* ena_com_get_hash_function - Retrieve the hash function from the device.
  * @ena_dev: ENA communication layer struct
  * @func: hash function
- * @key: hash key
  *
- * Retrieve the hash function and the hash key from the device.
+ * Retrieve the hash function from the device.
  *
  * @note: If the caller called ena_com_fill_hash_function but didn't flash
  * it to the device, the new configuration will be lost.
@@ -709,9 +707,20 @@ int ena_com_set_hash_function(struct ena_com_dev *ena_dev);
  * @return: 0 on Success and negative value otherwise.
  */
 int ena_com_get_hash_function(struct ena_com_dev *ena_dev,
-			      enum ena_admin_hash_functions *func,
-			      u8 *key);
+			      enum ena_admin_hash_functions *func);
 
+/* ena_com_get_hash_key - Retrieve the hash key
+ * @ena_dev: ENA communication layer struct
+ * @key: hash key
+ *
+ * Retrieve the hash key.
+ *
+ * @note: If the caller called ena_com_fill_hash_key but didn't flash
+ * it to the device, the new configuration will be lost.
+ *
+ * @return: 0 on Success and negative value otherwise.
+ */
+int ena_com_get_hash_key(struct ena_com_dev *ena_dev, u8 *key);
 /* ena_com_fill_hash_ctrl - Fill RSS hash control
  * @ena_dev: ENA communication layer struct.
  * @proto: The protocol to configure.
diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
index 9cc28b4b2627..0c3a2f14387e 100644
--- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c
+++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
@@ -672,7 +672,7 @@ static int ena_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
 	/* We call this function in order to check if the device
 	 * supports getting/setting the hash function.
 	 */
-	rc = ena_com_get_hash_function(adapter->ena_dev, &ena_func, key);
+	rc = ena_com_get_hash_function(adapter->ena_dev, &ena_func);
 	if (rc) {
 		if (rc == -EOPNOTSUPP) {
 			key = NULL;
@@ -683,6 +683,10 @@ static int ena_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
 		return rc;
 	}
 
+	rc = ena_com_get_hash_key(adapter->ena_dev, key);
+	if (rc)
+		return rc;
+
 	switch (ena_func) {
 	case ENA_ADMIN_TOEPLITZ:
 		func = ETH_RSS_HASH_TOP;
@@ -699,7 +703,7 @@ static int ena_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
 	if (hfunc)
 		*hfunc = func;
 
-	return rc;
+	return 0;
 }
 
 static int ena_set_rxfh(struct net_device *netdev, const u32 *indir,
@@ -707,7 +711,7 @@ static int ena_set_rxfh(struct net_device *netdev, const u32 *indir,
 {
 	struct ena_adapter *adapter = netdev_priv(netdev);
 	struct ena_com_dev *ena_dev = adapter->ena_dev;
-	enum ena_admin_hash_functions func;
+	enum ena_admin_hash_functions func = 0;
 	int rc, i;
 
 	if (indir) {
@@ -746,7 +750,7 @@ static int ena_set_rxfh(struct net_device *netdev, const u32 *indir,
 		return -EOPNOTSUPP;
 	}
 
-	if (key) {
+	if (key || func) {
 		rc = ena_com_fill_hash_function(ena_dev, func, key,
 						ENA_HASH_KEY_SIZE,
 						0xFFFFFFFF);
-- 
cgit v1.2.3-59-g8ed1b


From c1bd17e51c71dc8214fbccc2b5abea4d31ac65e6 Mon Sep 17 00:00:00 2001
From: Arthur Kiyanovski <akiyano@amazon.com>
Date: Sun, 3 May 2020 09:52:13 +0000
Subject: net: ena: change default RSS hash function to Toeplitz

Currently in the driver we are setting the hash function to be CRC32.
Starting with this commit we want to change the default behaviour so that
we set the hash function to be Toeplitz instead.

Signed-off-by: Sameeh Jubran <sameehj@amazon.com>
Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_netdev.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 2cc765df8da3..6baafc3aebea 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -3991,7 +3991,7 @@ static int ena_rss_init_default(struct ena_adapter *adapter)
 		}
 	}
 
-	rc = ena_com_fill_hash_function(ena_dev, ENA_ADMIN_CRC32, NULL,
+	rc = ena_com_fill_hash_function(ena_dev, ENA_ADMIN_TOEPLITZ, NULL,
 					ENA_HASH_KEY_SIZE, 0xFFFFFFFF);
 	if (unlikely(rc && (rc != -EOPNOTSUPP))) {
 		dev_err(dev, "Cannot fill hash function\n");
-- 
cgit v1.2.3-59-g8ed1b


From 0af3c4e2eab8aa5a904287534e442add36023273 Mon Sep 17 00:00:00 2001
From: Sameeh Jubran <sameehj@amazon.com>
Date: Sun, 3 May 2020 09:52:14 +0000
Subject: net: ena: changes to RSS hash key allocation

This commit contains 2 cosmetic changes:

1. Use ena_com_check_supported_feature_id() in
   ena_com_hash_key_fill_default_key() instead of rewriting
   its implementation. This also saves us a superfluous admin
   command by using the cached value.

2. Change if conditions in ena_com_rss_init() to be clearer.

Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com>
Signed-off-by: Sameeh Jubran <sameehj@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_com.c | 16 +++++-----------
 1 file changed, 5 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index d428d0606166..b51bf62af11b 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -1067,16 +1067,10 @@ static void ena_com_hash_key_fill_default_key(struct ena_com_dev *ena_dev)
 static int ena_com_hash_key_allocate(struct ena_com_dev *ena_dev)
 {
 	struct ena_rss *rss = &ena_dev->rss;
-	struct ena_admin_get_feat_resp get_resp;
-	int rc;
 
-	rc = ena_com_get_feature_ex(ena_dev, &get_resp,
-				    ENA_ADMIN_RSS_HASH_FUNCTION,
-				    ena_dev->rss.hash_key_dma_addr,
-				    sizeof(ena_dev->rss.hash_key), 0);
-	if (unlikely(rc)) {
+	if (!ena_com_check_supported_feature_id(ena_dev,
+						ENA_ADMIN_RSS_HASH_FUNCTION))
 		return -EOPNOTSUPP;
-	}
 
 	rss->hash_key =
 		dma_alloc_coherent(ena_dev->dmadev, sizeof(*rss->hash_key),
@@ -2650,10 +2644,10 @@ int ena_com_rss_init(struct ena_com_dev *ena_dev, u16 indr_tbl_log_size)
 	 * ignore this error and have indirection table support only.
 	 */
 	rc = ena_com_hash_key_allocate(ena_dev);
-	if (unlikely(rc) && rc != -EOPNOTSUPP)
-		goto err_hash_key;
-	else if (rc != -EOPNOTSUPP)
+	if (likely(!rc))
 		ena_com_hash_key_fill_default_key(ena_dev);
+	else if (rc != -EOPNOTSUPP)
+		goto err_hash_key;
 
 	rc = ena_com_hash_ctrl_init(ena_dev);
 	if (unlikely(rc))
-- 
cgit v1.2.3-59-g8ed1b


From cac7172f2d33ae75601d1bc928532d3ef7513db9 Mon Sep 17 00:00:00 2001
From: Sameeh Jubran <sameehj@amazon.com>
Date: Sun, 3 May 2020 09:52:15 +0000
Subject: net: ena: remove code that does nothing

Both key and func parameters are pointers on the stack.
Setting them to NULL does nothing.
The original intent was to leave the key and func unset in this case,
but for this to happen nothing needs to be done as the calling
function ethtool_get_rxfh() already clears key and func.

This commit removes the above described useless code.

Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com>
Signed-off-by: Sameeh Jubran <sameehj@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_ethtool.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
index 0c3a2f14387e..c7df25f92dbd 100644
--- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c
+++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
@@ -674,11 +674,8 @@ static int ena_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
 	 */
 	rc = ena_com_get_hash_function(adapter->ena_dev, &ena_func);
 	if (rc) {
-		if (rc == -EOPNOTSUPP) {
-			key = NULL;
-			hfunc = NULL;
+		if (rc == -EOPNOTSUPP)
 			rc = 0;
-		}
 
 		return rc;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From d4a8b3bb0bb7e63d391b8e442681b72ab1429471 Mon Sep 17 00:00:00 2001
From: Sameeh Jubran <sameehj@amazon.com>
Date: Sun, 3 May 2020 09:52:16 +0000
Subject: net: ena: add unmask interrupts statistics to ethtool

Add unmask interrupts statistics to ethtool.

Signed-off-by: Netanel Belgazal <netanel@amazon.com>
Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com>
Signed-off-by: Sameeh Jubran <sameehj@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_ethtool.c | 1 +
 drivers/net/ethernet/amazon/ena/ena_netdev.c  | 3 +++
 drivers/net/ethernet/amazon/ena/ena_netdev.h  | 1 +
 3 files changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
index c7df25f92dbd..74725d606964 100644
--- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c
+++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
@@ -83,6 +83,7 @@ static const struct ena_stats ena_stats_tx_strings[] = {
 	ENA_STAT_TX_ENTRY(bad_req_id),
 	ENA_STAT_TX_ENTRY(llq_buffer_copy),
 	ENA_STAT_TX_ENTRY(missed_tx),
+	ENA_STAT_TX_ENTRY(unmask_interrupt),
 };
 
 static const struct ena_stats ena_stats_rx_strings[] = {
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 6baafc3aebea..3cea4c9090c2 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -1762,6 +1762,9 @@ static void ena_unmask_interrupt(struct ena_ring *tx_ring,
 				tx_ring->smoothed_interval,
 				true);
 
+	u64_stats_update_begin(&tx_ring->syncp);
+	tx_ring->tx_stats.unmask_interrupt++;
+	u64_stats_update_end(&tx_ring->syncp);
 	/* It is a shared MSI-X.
 	 * Tx and Rx CQ have pointer to it.
 	 * So we use one of them to reach the intr reg
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
index 97dfd0c67e84..ebeb911c0efb 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h
@@ -248,6 +248,7 @@ struct ena_stats_tx {
 	u64 bad_req_id;
 	u64 llq_buffer_copy;
 	u64 missed_tx;
+	u64 unmask_interrupt;
 };
 
 struct ena_stats_rx {
-- 
cgit v1.2.3-59-g8ed1b


From 5c665f8c596ded2d8b876bac024409856117f40e Mon Sep 17 00:00:00 2001
From: Sameeh Jubran <sameehj@amazon.com>
Date: Sun, 3 May 2020 09:52:17 +0000
Subject: net: ena: add support for reporting of packet drops

1. Add support for getting tx drops from the device and saving them
in the driver.
2. Report tx via netdev stats.

Signed-off-by: Igor Chauskin <igorch@amazon.com>
Signed-off-by: Guy Tzalik <gtzalik@amazon.com>
Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com>
Signed-off-by: Sameeh Jubran <sameehj@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_admin_defs.h | 8 ++++++++
 drivers/net/ethernet/amazon/ena/ena_netdev.c     | 6 ++++++
 drivers/net/ethernet/amazon/ena/ena_netdev.h     | 1 +
 3 files changed, 15 insertions(+)

diff --git a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
index 8baf847e8622..7be3dcbf3d16 100644
--- a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
+++ b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
@@ -404,6 +404,10 @@ struct ena_admin_basic_stats {
 	u32 rx_drops_low;
 
 	u32 rx_drops_high;
+
+	u32 tx_drops_low;
+
+	u32 tx_drops_high;
 };
 
 struct ena_admin_acq_get_stats_resp {
@@ -1017,6 +1021,10 @@ struct ena_admin_aenq_keep_alive_desc {
 	u32 rx_drops_low;
 
 	u32 rx_drops_high;
+
+	u32 tx_drops_low;
+
+	u32 tx_drops_high;
 };
 
 struct ena_admin_ena_mmio_req_read_less_resp {
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 3cea4c9090c2..517681319a57 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -3172,6 +3172,7 @@ static void ena_get_stats64(struct net_device *netdev,
 	struct ena_ring *rx_ring, *tx_ring;
 	unsigned int start;
 	u64 rx_drops;
+	u64 tx_drops;
 	int i;
 
 	if (!test_bit(ENA_FLAG_DEV_UP, &adapter->flags))
@@ -3206,9 +3207,11 @@ static void ena_get_stats64(struct net_device *netdev,
 	do {
 		start = u64_stats_fetch_begin_irq(&adapter->syncp);
 		rx_drops = adapter->dev_stats.rx_drops;
+		tx_drops = adapter->dev_stats.tx_drops;
 	} while (u64_stats_fetch_retry_irq(&adapter->syncp, start));
 
 	stats->rx_dropped = rx_drops;
+	stats->tx_dropped = tx_drops;
 
 	stats->multicast = 0;
 	stats->collisions = 0;
@@ -4517,14 +4520,17 @@ static void ena_keep_alive_wd(void *adapter_data,
 	struct ena_adapter *adapter = (struct ena_adapter *)adapter_data;
 	struct ena_admin_aenq_keep_alive_desc *desc;
 	u64 rx_drops;
+	u64 tx_drops;
 
 	desc = (struct ena_admin_aenq_keep_alive_desc *)aenq_e;
 	adapter->last_keep_alive_jiffies = jiffies;
 
 	rx_drops = ((u64)desc->rx_drops_high << 32) | desc->rx_drops_low;
+	tx_drops = ((u64)desc->tx_drops_high << 32) | desc->tx_drops_low;
 
 	u64_stats_update_begin(&adapter->syncp);
 	adapter->dev_stats.rx_drops = rx_drops;
+	adapter->dev_stats.tx_drops = tx_drops;
 	u64_stats_update_end(&adapter->syncp);
 }
 
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
index ebeb911c0efb..bd278c4721c6 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h
@@ -334,6 +334,7 @@ struct ena_stats_dev {
 	u64 interface_down;
 	u64 admin_q_pause;
 	u64 rx_drops;
+	u64 tx_drops;
 };
 
 enum ena_flags_t {
-- 
cgit v1.2.3-59-g8ed1b


From f5db045e76fa74bdc95f9736429a565260a5d890 Mon Sep 17 00:00:00 2001
From: Arthur Kiyanovski <akiyano@amazon.com>
Date: Sun, 3 May 2020 09:52:18 +0000
Subject: net: ena: drop superfluous prototype

Before this commit there was a function prototype named
ena_com_get_ena_admin_polling_mode() that was never implemented.

This patch simply deletes it.

Signed-off-by: Igor Chauskin <igorch@amazon.com>
Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_com.h | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_com.h b/drivers/net/ethernet/amazon/ena/ena_com.h
index e2e2fd1dc820..a55379471f98 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.h
+++ b/drivers/net/ethernet/amazon/ena/ena_com.h
@@ -501,18 +501,6 @@ bool ena_com_get_admin_running_state(struct ena_com_dev *ena_dev);
  */
 void ena_com_set_admin_polling_mode(struct ena_com_dev *ena_dev, bool polling);
 
-/* ena_com_set_admin_polling_mode - Get the admin completion queue polling mode
- * @ena_dev: ENA communication layer struct
- *
- * Get the admin completion mode.
- * If polling mode is on, ena_com_execute_admin_command will perform a
- * polling on the admin completion queue for the commands completion,
- * otherwise it will wait on wait event.
- *
- * @return state
- */
-bool ena_com_get_ena_admin_polling_mode(struct ena_com_dev *ena_dev);
-
 /* ena_com_set_admin_auto_polling_mode - Enable autoswitch to polling mode
  * @ena_dev: ENA communication layer struct
  * @polling: Enable/Disable polling mode
-- 
cgit v1.2.3-59-g8ed1b


From c1c0e40b367efc92ad05bb802180817534a32343 Mon Sep 17 00:00:00 2001
From: Sameeh Jubran <sameehj@amazon.com>
Date: Sun, 3 May 2020 09:52:19 +0000
Subject: net: ena: use SHUTDOWN as reset reason when closing interface

The 'ENA_REGS_RESET_SHUTDOWN' enum indicates a normal driver
shutdown / removal procedure.

Also, a comment is added to one of the reset reason assignments for
code clarity.

Signed-off-by: Shay Agroskin <shayagr@amazon.com>
Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com>
Signed-off-by: Sameeh Jubran <sameehj@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_netdev.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 517681319a57..2818965427e9 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -3439,6 +3439,7 @@ static void ena_destroy_device(struct ena_adapter *adapter, bool graceful)
 
 	ena_com_mmio_reg_read_request_destroy(ena_dev);
 
+	/* return reset reason to default value */
 	adapter->reset_reason = ENA_REGS_RESET_NORMAL;
 
 	clear_bit(ENA_FLAG_TRIGGER_RESET, &adapter->flags);
@@ -4362,6 +4363,7 @@ static void __ena_shutoff(struct pci_dev *pdev, bool shutdown)
 	cancel_work_sync(&adapter->reset_task);
 
 	rtnl_lock(); /* lock released inside the below if-else block */
+	adapter->reset_reason = ENA_REGS_RESET_SHUTDOWN;
 	ena_destroy_device(adapter, true);
 	if (shutdown) {
 		netif_device_detach(netdev);
-- 
cgit v1.2.3-59-g8ed1b


From dcc2789a11f9bbe27dc79f85f426c22def3e921c Mon Sep 17 00:00:00 2001
From: Sameeh Jubran <sameehj@amazon.com>
Date: Sun, 3 May 2020 09:52:20 +0000
Subject: net: ena: cosmetic: remove unnecessary spaces and tabs in ena_com.h
 macros

The macros in ena_com.h have inconsistent spaces between
the macro name and it's value.

This commit sets all the macros to have a single space between
the name and value.

Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com>
Signed-off-by: Sameeh Jubran <sameehj@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_com.h | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_com.h b/drivers/net/ethernet/amazon/ena/ena_com.h
index a55379471f98..13a1b7812c46 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.h
+++ b/drivers/net/ethernet/amazon/ena/ena_com.h
@@ -54,9 +54,9 @@
 #undef pr_fmt
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
-#define ENA_MAX_NUM_IO_QUEUES		128U
+#define ENA_MAX_NUM_IO_QUEUES 128U
 /* We need to queues for each IO (on for Tx and one for Rx) */
-#define ENA_TOTAL_NUM_QUEUES		(2 * (ENA_MAX_NUM_IO_QUEUES))
+#define ENA_TOTAL_NUM_QUEUES (2 * (ENA_MAX_NUM_IO_QUEUES))
 
 #define ENA_MAX_HANDLERS 256
 
@@ -73,13 +73,13 @@
 /*****************************************************************************/
 /* ENA adaptive interrupt moderation settings */
 
-#define ENA_INTR_INITIAL_TX_INTERVAL_USECS		64
-#define ENA_INTR_INITIAL_RX_INTERVAL_USECS		0
-#define ENA_DEFAULT_INTR_DELAY_RESOLUTION		1
+#define ENA_INTR_INITIAL_TX_INTERVAL_USECS 64
+#define ENA_INTR_INITIAL_RX_INTERVAL_USECS 0
+#define ENA_DEFAULT_INTR_DELAY_RESOLUTION 1
 
-#define ENA_HW_HINTS_NO_TIMEOUT				0xFFFF
+#define ENA_HW_HINTS_NO_TIMEOUT	0xFFFF
 
-#define ENA_FEATURE_MAX_QUEUE_EXT_VER	1
+#define ENA_FEATURE_MAX_QUEUE_EXT_VER 1
 
 struct ena_llq_configurations {
 	enum ena_admin_llq_header_location llq_header_location;
-- 
cgit v1.2.3-59-g8ed1b


From 77a651f5647da15b215e3523acce852dad453b6c Mon Sep 17 00:00:00 2001
From: Arthur Kiyanovski <akiyano@amazon.com>
Date: Sun, 3 May 2020 09:52:21 +0000
Subject: net: ena: cosmetic: extract code to ena_indirection_table_set()

Extract code to ena_indirection_table_set() to make
the code cleaner.

Signed-off-by: Sameeh Jubran <sameehj@amazon.com>
Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_ethtool.c | 48 +++++++++++++++++----------
 1 file changed, 30 insertions(+), 18 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
index 74725d606964..830d3711d6ee 100644
--- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c
+++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
@@ -636,6 +636,32 @@ static u32 ena_get_rxfh_key_size(struct net_device *netdev)
 	return ENA_HASH_KEY_SIZE;
 }
 
+static int ena_indirection_table_set(struct ena_adapter *adapter,
+				     const u32 *indir)
+{
+	struct ena_com_dev *ena_dev = adapter->ena_dev;
+	int i, rc;
+
+	for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) {
+		rc = ena_com_indirect_table_fill_entry(ena_dev,
+						       i,
+						       ENA_IO_RXQ_IDX(indir[i]));
+		if (unlikely(rc)) {
+			netif_err(adapter, drv, adapter->netdev,
+				  "Cannot fill indirect table (index is too large)\n");
+			return rc;
+		}
+	}
+
+	rc = ena_com_indirect_table_set(ena_dev);
+	if (rc) {
+		netif_err(adapter, drv, adapter->netdev,
+			  "Cannot set indirect table\n");
+		return rc == -EPERM ? -EOPNOTSUPP : rc;
+	}
+	return rc;
+}
+
 static int ena_indirection_table_get(struct ena_adapter *adapter, u32 *indir)
 {
 	struct ena_com_dev *ena_dev = adapter->ena_dev;
@@ -710,26 +736,12 @@ static int ena_set_rxfh(struct net_device *netdev, const u32 *indir,
 	struct ena_adapter *adapter = netdev_priv(netdev);
 	struct ena_com_dev *ena_dev = adapter->ena_dev;
 	enum ena_admin_hash_functions func = 0;
-	int rc, i;
+	int rc;
 
 	if (indir) {
-		for (i = 0; i < ENA_RX_RSS_TABLE_SIZE; i++) {
-			rc = ena_com_indirect_table_fill_entry(ena_dev,
-							       i,
-							       ENA_IO_RXQ_IDX(indir[i]));
-			if (unlikely(rc)) {
-				netif_err(adapter, drv, netdev,
-					  "Cannot fill indirect table (index is too large)\n");
-				return rc;
-			}
-		}
-
-		rc = ena_com_indirect_table_set(ena_dev);
-		if (rc) {
-			netif_err(adapter, drv, netdev,
-				  "Cannot set indirect table\n");
-			return rc == -EPERM ? -EOPNOTSUPP : rc;
-		}
+		rc = ena_indirection_table_set(adapter, indir);
+		if (rc)
+			return rc;
 	}
 
 	switch (hfunc) {
-- 
cgit v1.2.3-59-g8ed1b


From 336ba09f2ef71b82f07c1200be0ddf4eb923d69f Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Sun, 3 May 2020 14:38:40 +0200
Subject: net/smc: first part of add link processing as SMC client

First set of functions to process an ADD_LINK LLC request as an SMC
client. Find an alternate IB device, determine the new link group type
and get the index for the new link. Then ready the link, map the buffers
and send an ADD_LINK LLC response. If any error occurs, send a reject
LLC message and terminate the processing.
Add smc_llc_alloc_alt_link() to find a free link index for a new link,
depending on the new link group type.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_core.c |   4 +-
 net/smc/smc_core.h |   2 +
 net/smc/smc_llc.c  | 107 +++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 111 insertions(+), 2 deletions(-)

diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 60c708f6de51..2f8faa9c9e8e 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -273,8 +273,8 @@ static u8 smcr_next_link_id(struct smc_link_group *lgr)
 	return link_id;
 }
 
-static int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
-			  u8 link_idx, struct smc_init_info *ini)
+int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
+		   u8 link_idx, struct smc_init_info *ini)
 {
 	u8 rndvec[3];
 	int rc;
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 555ada9d2423..4e00819e2db7 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -374,6 +374,8 @@ void smc_lgr_schedule_free_work_fast(struct smc_link_group *lgr);
 int smc_core_init(void);
 void smc_core_exit(void);
 
+int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
+		   u8 link_idx, struct smc_init_info *ini);
 void smcr_link_clear(struct smc_link *lnk);
 int smcr_buf_map_lgr(struct smc_link *lnk);
 int smcr_buf_reg_lgr(struct smc_link *lnk);
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 4e3db4d4b783..8716d8739329 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -17,6 +17,7 @@
 #include "smc_core.h"
 #include "smc_clc.h"
 #include "smc_llc.h"
+#include "smc_pnet.h"
 
 #define SMC_LLC_DATA_LEN		40
 
@@ -541,6 +542,112 @@ static int smc_llc_send_message(struct smc_link *link, void *llcbuf)
 
 /********************************* receive ***********************************/
 
+static int smc_llc_alloc_alt_link(struct smc_link_group *lgr,
+				  enum smc_lgr_type lgr_new_t)
+{
+	int i;
+
+	if (lgr->type == SMC_LGR_SYMMETRIC ||
+	    (lgr->type != SMC_LGR_SINGLE &&
+	     (lgr_new_t == SMC_LGR_ASYMMETRIC_LOCAL ||
+	      lgr_new_t == SMC_LGR_ASYMMETRIC_PEER)))
+		return -EMLINK;
+
+	if (lgr_new_t == SMC_LGR_ASYMMETRIC_LOCAL ||
+	    lgr_new_t == SMC_LGR_ASYMMETRIC_PEER) {
+		for (i = SMC_LINKS_PER_LGR_MAX - 1; i >= 0; i--)
+			if (lgr->lnk[i].state == SMC_LNK_UNUSED)
+				return i;
+	} else {
+		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
+			if (lgr->lnk[i].state == SMC_LNK_UNUSED)
+				return i;
+	}
+	return -EMLINK;
+}
+
+/* prepare and send an add link reject response */
+static int smc_llc_cli_add_link_reject(struct smc_llc_qentry *qentry)
+{
+	qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_RESP;
+	qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_ADD_LNK_REJ;
+	qentry->msg.raw.hdr.add_link_rej_rsn = SMC_LLC_REJ_RSN_NO_ALT_PATH;
+	return smc_llc_send_message(qentry->link, &qentry->msg);
+}
+
+static void smc_llc_save_add_link_info(struct smc_link *link,
+				       struct smc_llc_msg_add_link *add_llc)
+{
+	link->peer_qpn = ntoh24(add_llc->sender_qp_num);
+	memcpy(link->peer_gid, add_llc->sender_gid, SMC_GID_SIZE);
+	memcpy(link->peer_mac, add_llc->sender_mac, ETH_ALEN);
+	link->peer_psn = ntoh24(add_llc->initial_psn);
+	link->peer_mtu = add_llc->qp_mtu;
+}
+
+/* as an SMC client, process an add link request */
+int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry)
+{
+	struct smc_llc_msg_add_link *llc = &qentry->msg.add_link;
+	enum smc_lgr_type lgr_new_t = SMC_LGR_SYMMETRIC;
+	struct smc_link_group *lgr = smc_get_lgr(link);
+	struct smc_link *lnk_new = NULL;
+	struct smc_init_info ini;
+	int lnk_idx, rc = 0;
+
+	ini.vlan_id = lgr->vlan_id;
+	smc_pnet_find_alt_roce(lgr, &ini, link->smcibdev);
+	if (!memcmp(llc->sender_gid, link->peer_gid, SMC_GID_SIZE) &&
+	    !memcmp(llc->sender_mac, link->peer_mac, ETH_ALEN)) {
+		if (!ini.ib_dev)
+			goto out_reject;
+		lgr_new_t = SMC_LGR_ASYMMETRIC_PEER;
+	}
+	if (!ini.ib_dev) {
+		lgr_new_t = SMC_LGR_ASYMMETRIC_LOCAL;
+		ini.ib_dev = link->smcibdev;
+		ini.ib_port = link->ibport;
+	}
+	lnk_idx = smc_llc_alloc_alt_link(lgr, lgr_new_t);
+	if (lnk_idx < 0)
+		goto out_reject;
+	lnk_new = &lgr->lnk[lnk_idx];
+	rc = smcr_link_init(lgr, lnk_new, lnk_idx, &ini);
+	if (rc)
+		goto out_reject;
+	smc_llc_save_add_link_info(lnk_new, llc);
+	lnk_new->link_id = llc->link_num;
+
+	rc = smc_ib_ready_link(lnk_new);
+	if (rc)
+		goto out_clear_lnk;
+
+	rc = smcr_buf_map_lgr(lnk_new);
+	if (rc)
+		goto out_clear_lnk;
+
+	rc = smc_llc_send_add_link(link,
+				   lnk_new->smcibdev->mac[ini.ib_port - 1],
+				   lnk_new->gid, lnk_new, SMC_LLC_RESP);
+	if (rc)
+		goto out_clear_lnk;
+	/* tbd: rc = smc_llc_cli_rkey_exchange(link, lnk_new); */
+	if (rc) {
+		rc = 0;
+		goto out_clear_lnk;
+	}
+	/* tbd: rc = smc_llc_cli_conf_link(link, &ini, lnk_new, lgr_new_t); */
+	if (!rc)
+		goto out;
+out_clear_lnk:
+	smcr_link_clear(lnk_new);
+out_reject:
+	smc_llc_cli_add_link_reject(qentry);
+out:
+	kfree(qentry);
+	return rc;
+}
+
 /* worker to process an add link message */
 static void smc_llc_add_link_work(struct work_struct *work)
 {
-- 
cgit v1.2.3-59-g8ed1b


From 87f88cda2128a72d79d4cc700729488af1081a06 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Sun, 3 May 2020 14:38:41 +0200
Subject: net/smc: rkey processing for a new link as SMC client

Part of the SMC client new link establishment process is the exchange of
rkeys for all used buffers.
Add new LLC message type ADD_LINK_CONTINUE which is used to exchange
rkeys of all current RMB buffers. Add functions to iterate over all
used RMB buffers of the link group, and implement the ADD_LINK_CONTINUE
processing.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_llc.c | 157 +++++++++++++++++++++++++++++++++++++++++++++++++++++-
 net/smc/smc_llc.h |   1 +
 2 files changed, 157 insertions(+), 1 deletion(-)

diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 8716d8739329..a06b618f172e 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -70,6 +70,23 @@ struct smc_llc_msg_add_link {		/* type 0x02 */
 	u8 reserved[8];
 };
 
+struct smc_llc_msg_add_link_cont_rt {
+	__be32 rmb_key;
+	__be32 rmb_key_new;
+	__be64 rmb_vaddr_new;
+};
+
+#define SMC_LLC_RKEYS_PER_CONT_MSG	2
+
+struct smc_llc_msg_add_link_cont {	/* type 0x03 */
+	struct smc_llc_hdr hd;
+	u8 link_num;
+	u8 num_rkeys;
+	u8 reserved2[2];
+	struct smc_llc_msg_add_link_cont_rt rt[SMC_LLC_RKEYS_PER_CONT_MSG];
+	u8 reserved[4];
+} __packed;			/* format defined in RFC7609 */
+
 #define SMC_LLC_FLAG_DEL_LINK_ALL	0x40
 #define SMC_LLC_FLAG_DEL_LINK_ORDERLY	0x20
 
@@ -121,6 +138,7 @@ struct smc_llc_msg_delete_rkey {	/* type 0x09 */
 union smc_llc_msg {
 	struct smc_llc_msg_confirm_link confirm_link;
 	struct smc_llc_msg_add_link add_link;
+	struct smc_llc_msg_add_link_cont add_link_cont;
 	struct smc_llc_msg_del_link delete_link;
 
 	struct smc_llc_msg_confirm_rkey confirm_rkey;
@@ -566,6 +584,137 @@ static int smc_llc_alloc_alt_link(struct smc_link_group *lgr,
 	return -EMLINK;
 }
 
+/* return first buffer from any of the next buf lists */
+static struct smc_buf_desc *_smc_llc_get_next_rmb(struct smc_link_group *lgr,
+						  int *buf_lst)
+{
+	struct smc_buf_desc *buf_pos;
+
+	while (*buf_lst < SMC_RMBE_SIZES) {
+		buf_pos = list_first_entry_or_null(&lgr->rmbs[*buf_lst],
+						   struct smc_buf_desc, list);
+		if (buf_pos)
+			return buf_pos;
+		(*buf_lst)++;
+	}
+	return NULL;
+}
+
+/* return next rmb from buffer lists */
+static struct smc_buf_desc *smc_llc_get_next_rmb(struct smc_link_group *lgr,
+						 int *buf_lst,
+						 struct smc_buf_desc *buf_pos)
+{
+	struct smc_buf_desc *buf_next;
+
+	if (!buf_pos || list_is_last(&buf_pos->list, &lgr->rmbs[*buf_lst])) {
+		(*buf_lst)++;
+		return _smc_llc_get_next_rmb(lgr, buf_lst);
+	}
+	buf_next = list_next_entry(buf_pos, list);
+	return buf_next;
+}
+
+static struct smc_buf_desc *smc_llc_get_first_rmb(struct smc_link_group *lgr,
+						  int *buf_lst)
+{
+	*buf_lst = 0;
+	return smc_llc_get_next_rmb(lgr, buf_lst, NULL);
+}
+
+/* send one add_link_continue msg */
+static int smc_llc_add_link_cont(struct smc_link *link,
+				 struct smc_link *link_new, u8 *num_rkeys_todo,
+				 int *buf_lst, struct smc_buf_desc **buf_pos)
+{
+	struct smc_llc_msg_add_link_cont *addc_llc;
+	struct smc_link_group *lgr = link->lgr;
+	int prim_lnk_idx, lnk_idx, i, rc;
+	struct smc_wr_tx_pend_priv *pend;
+	struct smc_wr_buf *wr_buf;
+	struct smc_buf_desc *rmb;
+	u8 n;
+
+	rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+	if (rc)
+		return rc;
+	addc_llc = (struct smc_llc_msg_add_link_cont *)wr_buf;
+	memset(addc_llc, 0, sizeof(*addc_llc));
+
+	prim_lnk_idx = link->link_idx;
+	lnk_idx = link_new->link_idx;
+	addc_llc->link_num = link_new->link_id;
+	addc_llc->num_rkeys = *num_rkeys_todo;
+	n = *num_rkeys_todo;
+	for (i = 0; i < min_t(u8, n, SMC_LLC_RKEYS_PER_CONT_MSG); i++) {
+		if (!*buf_pos) {
+			addc_llc->num_rkeys = addc_llc->num_rkeys -
+					      *num_rkeys_todo;
+			*num_rkeys_todo = 0;
+			break;
+		}
+		rmb = *buf_pos;
+
+		addc_llc->rt[i].rmb_key = htonl(rmb->mr_rx[prim_lnk_idx]->rkey);
+		addc_llc->rt[i].rmb_key_new = htonl(rmb->mr_rx[lnk_idx]->rkey);
+		addc_llc->rt[i].rmb_vaddr_new =
+			cpu_to_be64((u64)sg_dma_address(rmb->sgt[lnk_idx].sgl));
+
+		(*num_rkeys_todo)--;
+		*buf_pos = smc_llc_get_next_rmb(lgr, buf_lst, *buf_pos);
+		while (*buf_pos && !(*buf_pos)->used)
+			*buf_pos = smc_llc_get_next_rmb(lgr, buf_lst, *buf_pos);
+	}
+	addc_llc->hd.common.type = SMC_LLC_ADD_LINK_CONT;
+	addc_llc->hd.length = sizeof(struct smc_llc_msg_add_link_cont);
+	if (lgr->role == SMC_CLNT)
+		addc_llc->hd.flags |= SMC_LLC_FLAG_RESP;
+	return smc_wr_tx_send(link, pend);
+}
+
+static int smc_llc_cli_rkey_exchange(struct smc_link *link,
+				     struct smc_link *link_new)
+{
+	struct smc_llc_msg_add_link_cont *addc_llc;
+	struct smc_link_group *lgr = link->lgr;
+	u8 max, num_rkeys_send, num_rkeys_recv;
+	struct smc_llc_qentry *qentry;
+	struct smc_buf_desc *buf_pos;
+	int buf_lst;
+	int rc = 0;
+	int i;
+
+	mutex_lock(&lgr->rmbs_lock);
+	num_rkeys_send = lgr->conns_num;
+	buf_pos = smc_llc_get_first_rmb(lgr, &buf_lst);
+	do {
+		qentry = smc_llc_wait(lgr, NULL, SMC_LLC_WAIT_TIME,
+				      SMC_LLC_ADD_LINK_CONT);
+		if (!qentry) {
+			rc = -ETIMEDOUT;
+			break;
+		}
+		addc_llc = &qentry->msg.add_link_cont;
+		num_rkeys_recv = addc_llc->num_rkeys;
+		max = min_t(u8, num_rkeys_recv, SMC_LLC_RKEYS_PER_CONT_MSG);
+		for (i = 0; i < max; i++) {
+			smc_rtoken_set(lgr, link->link_idx, link_new->link_idx,
+				       addc_llc->rt[i].rmb_key,
+				       addc_llc->rt[i].rmb_vaddr_new,
+				       addc_llc->rt[i].rmb_key_new);
+			num_rkeys_recv--;
+		}
+		smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
+		rc = smc_llc_add_link_cont(link, link_new, &num_rkeys_send,
+					   &buf_lst, &buf_pos);
+		if (rc)
+			break;
+	} while (num_rkeys_send || num_rkeys_recv);
+
+	mutex_unlock(&lgr->rmbs_lock);
+	return rc;
+}
+
 /* prepare and send an add link reject response */
 static int smc_llc_cli_add_link_reject(struct smc_llc_qentry *qentry)
 {
@@ -631,7 +780,7 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry)
 				   lnk_new->gid, lnk_new, SMC_LLC_RESP);
 	if (rc)
 		goto out_clear_lnk;
-	/* tbd: rc = smc_llc_cli_rkey_exchange(link, lnk_new); */
+	rc = smc_llc_cli_rkey_exchange(link, lnk_new);
 	if (rc) {
 		rc = 0;
 		goto out_clear_lnk;
@@ -794,6 +943,7 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry)
 		}
 		return;
 	case SMC_LLC_CONFIRM_LINK:
+	case SMC_LLC_ADD_LINK_CONT:
 		if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
 			/* a flow is waiting for this message */
 			smc_llc_flow_qentry_set(&lgr->llc_flow_lcl, qentry);
@@ -873,6 +1023,7 @@ static void smc_llc_rx_response(struct smc_link *link,
 		break;
 	case SMC_LLC_ADD_LINK:
 	case SMC_LLC_CONFIRM_LINK:
+	case SMC_LLC_ADD_LINK_CONT:
 	case SMC_LLC_CONFIRM_RKEY:
 	case SMC_LLC_DELETE_RKEY:
 		/* assign responses to the local flow, we requested them */
@@ -1092,6 +1243,10 @@ static struct smc_wr_rx_handler smc_llc_rx_handlers[] = {
 		.handler	= smc_llc_rx_handler,
 		.type		= SMC_LLC_ADD_LINK
 	},
+	{
+		.handler	= smc_llc_rx_handler,
+		.type		= SMC_LLC_ADD_LINK_CONT
+	},
 	{
 		.handler	= smc_llc_rx_handler,
 		.type		= SMC_LLC_DELETE_LINK
diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h
index 4ed4486e5082..97a4f02f5a93 100644
--- a/net/smc/smc_llc.h
+++ b/net/smc/smc_llc.h
@@ -28,6 +28,7 @@ enum smc_llc_reqresp {
 enum smc_llc_msg_type {
 	SMC_LLC_CONFIRM_LINK		= 0x01,
 	SMC_LLC_ADD_LINK		= 0x02,
+	SMC_LLC_ADD_LINK_CONT		= 0x03,
 	SMC_LLC_DELETE_LINK		= 0x04,
 	SMC_LLC_CONFIRM_RKEY		= 0x06,
 	SMC_LLC_TEST_LINK		= 0x07,
-- 
cgit v1.2.3-59-g8ed1b


From b1570a87f57e94e9f74b8942840f9bd16bd1aba5 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Sun, 3 May 2020 14:38:42 +0200
Subject: net/smc: final part of add link processing as SMC client

This patch finalizes the ADD_LINK processing of new links. Receive the
CONFIRM_LINK request from peer, complete the link initialization,
register all used buffers with the IB device and finally send the
CONFIRM_LINK response, which completes the ADD_LINK processing.
And activate smc_llc_cli_add_link() in af_smc.c.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/af_smc.c  |  2 +-
 net/smc/smc_llc.c | 73 ++++++++++++++++++++++++++++++++++++++++++++++++++++---
 net/smc/smc_llc.h |  1 +
 3 files changed, 72 insertions(+), 4 deletions(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 6663a63be9e4..1afb6e4275f2 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -427,7 +427,7 @@ static int smcr_clnt_conf_first_link(struct smc_sock *smc)
 		return rc;
 	}
 	smc_llc_flow_qentry_clr(&link->lgr->llc_flow_lcl);
-	/* tbd: call smc_llc_cli_add_link(link, qentry); */
+	smc_llc_cli_add_link(link, qentry);
 	return 0;
 }
 
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index a06b618f172e..d56ca60597d4 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -381,7 +381,7 @@ int smc_llc_send_confirm_link(struct smc_link *link,
 	hton24(confllc->sender_qp_num, link->roce_qp->qp_num);
 	confllc->link_num = link->link_id;
 	memcpy(confllc->link_uid, lgr->id, SMC_LGR_ID_SIZE);
-	confllc->max_links = SMC_LLC_ADD_LNK_MAX_LINKS; /* enforce peer resp. */
+	confllc->max_links = SMC_LLC_ADD_LNK_MAX_LINKS;
 	/* send llc message */
 	rc = smc_wr_tx_send(link, pend);
 	return rc;
@@ -724,6 +724,61 @@ static int smc_llc_cli_add_link_reject(struct smc_llc_qentry *qentry)
 	return smc_llc_send_message(qentry->link, &qentry->msg);
 }
 
+static int smc_llc_cli_conf_link(struct smc_link *link,
+				 struct smc_init_info *ini,
+				 struct smc_link *link_new,
+				 enum smc_lgr_type lgr_new_t)
+{
+	struct smc_link_group *lgr = link->lgr;
+	struct smc_llc_msg_del_link *del_llc;
+	struct smc_llc_qentry *qentry = NULL;
+	int rc = 0;
+
+	/* receive CONFIRM LINK request over RoCE fabric */
+	qentry = smc_llc_wait(lgr, NULL, SMC_LLC_WAIT_FIRST_TIME, 0);
+	if (!qentry) {
+		rc = smc_llc_send_delete_link(link, link_new->link_id,
+					      SMC_LLC_REQ, false,
+					      SMC_LLC_DEL_LOST_PATH);
+		return -ENOLINK;
+	}
+	if (qentry->msg.raw.hdr.common.type != SMC_LLC_CONFIRM_LINK) {
+		/* received DELETE_LINK instead */
+		del_llc = &qentry->msg.delete_link;
+		qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_RESP;
+		smc_llc_send_message(link, &qentry->msg);
+		smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
+		return -ENOLINK;
+	}
+	smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
+
+	rc = smc_ib_modify_qp_rts(link_new);
+	if (rc) {
+		smc_llc_send_delete_link(link, link_new->link_id, SMC_LLC_REQ,
+					 false, SMC_LLC_DEL_LOST_PATH);
+		return -ENOLINK;
+	}
+	smc_wr_remember_qp_attr(link_new);
+
+	rc = smcr_buf_reg_lgr(link_new);
+	if (rc) {
+		smc_llc_send_delete_link(link, link_new->link_id, SMC_LLC_REQ,
+					 false, SMC_LLC_DEL_LOST_PATH);
+		return -ENOLINK;
+	}
+
+	/* send CONFIRM LINK response over RoCE fabric */
+	rc = smc_llc_send_confirm_link(link_new, SMC_LLC_RESP);
+	if (rc) {
+		smc_llc_send_delete_link(link, link_new->link_id, SMC_LLC_REQ,
+					 false, SMC_LLC_DEL_LOST_PATH);
+		return -ENOLINK;
+	}
+	smc_llc_link_active(link_new);
+	lgr->type = lgr_new_t;
+	return 0;
+}
+
 static void smc_llc_save_add_link_info(struct smc_link *link,
 				       struct smc_llc_msg_add_link *add_llc)
 {
@@ -785,7 +840,7 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry)
 		rc = 0;
 		goto out_clear_lnk;
 	}
-	/* tbd: rc = smc_llc_cli_conf_link(link, &ini, lnk_new, lgr_new_t); */
+	rc = smc_llc_cli_conf_link(link, &ini, lnk_new, lgr_new_t);
 	if (!rc)
 		goto out;
 out_clear_lnk:
@@ -797,6 +852,17 @@ out:
 	return rc;
 }
 
+static void smc_llc_process_cli_add_link(struct smc_link_group *lgr)
+{
+	struct smc_llc_qentry *qentry;
+
+	qentry = smc_llc_flow_qentry_clr(&lgr->llc_flow_lcl);
+
+	mutex_lock(&lgr->llc_conf_mutex);
+	smc_llc_cli_add_link(qentry->link, qentry);
+	mutex_unlock(&lgr->llc_conf_mutex);
+}
+
 /* worker to process an add link message */
 static void smc_llc_add_link_work(struct work_struct *work)
 {
@@ -809,7 +875,8 @@ static void smc_llc_add_link_work(struct work_struct *work)
 		goto out;
 	}
 
-	/* tbd: call smc_llc_process_cli_add_link(lgr); */
+	if (lgr->role == SMC_CLNT)
+		smc_llc_process_cli_add_link(lgr);
 	/* tbd: call smc_llc_process_srv_add_link(lgr); */
 out:
 	smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h
index 97a4f02f5a93..7c314bbef8c8 100644
--- a/net/smc/smc_llc.h
+++ b/net/smc/smc_llc.h
@@ -88,6 +88,7 @@ struct smc_llc_qentry *smc_llc_wait(struct smc_link_group *lgr,
 				    int time_out, u8 exp_msg);
 struct smc_llc_qentry *smc_llc_flow_qentry_clr(struct smc_llc_flow *flow);
 void smc_llc_flow_qentry_del(struct smc_llc_flow *flow);
+int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry);
 int smc_llc_init(void) __init;
 
 #endif /* SMC_LLC_H */
-- 
cgit v1.2.3-59-g8ed1b


From 2d2209f2018943d4152a21eff5b76f1952e0b435 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Sun, 3 May 2020 14:38:43 +0200
Subject: net/smc: first part of add link processing as SMC server

First set of functions to process an ADD_LINK LLC request as an SMC
server. Find an alternate IB device, determine the new link group type
and get the index for the new link. Then initialize the link and send
the ADD_LINK LLC message to the peer. Save the contents of the response,
ready the link, map all used buffers and register the buffers with the
IB device. If any error occurs, stop the processing and clear the link.
And call smc_llc_srv_add_link() in af_smc.c to start second link
establishment after the initial link of a link group was created.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/af_smc.c  |  2 +-
 net/smc/smc_llc.c | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 net/smc/smc_llc.h |  1 +
 3 files changed, 92 insertions(+), 2 deletions(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 1afb6e4275f2..c67272007f41 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -1067,7 +1067,7 @@ static int smcr_serv_conf_first_link(struct smc_sock *smc)
 	smc_llc_link_active(link);
 
 	/* initial contact - try to establish second link */
-	/* tbd: call smc_llc_srv_add_link(link); */
+	smc_llc_srv_add_link(link);
 	return 0;
 }
 
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index d56ca60597d4..e2f254e21759 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -863,6 +863,94 @@ static void smc_llc_process_cli_add_link(struct smc_link_group *lgr)
 	mutex_unlock(&lgr->llc_conf_mutex);
 }
 
+int smc_llc_srv_add_link(struct smc_link *link)
+{
+	enum smc_lgr_type lgr_new_t = SMC_LGR_SYMMETRIC;
+	struct smc_link_group *lgr = link->lgr;
+	struct smc_llc_msg_add_link *add_llc;
+	struct smc_llc_qentry *qentry = NULL;
+	struct smc_link *link_new;
+	struct smc_init_info ini;
+	int lnk_idx, rc = 0;
+
+	/* ignore client add link recommendation, start new flow */
+	ini.vlan_id = lgr->vlan_id;
+	smc_pnet_find_alt_roce(lgr, &ini, link->smcibdev);
+	if (!ini.ib_dev) {
+		lgr_new_t = SMC_LGR_ASYMMETRIC_LOCAL;
+		ini.ib_dev = link->smcibdev;
+		ini.ib_port = link->ibport;
+	}
+	lnk_idx = smc_llc_alloc_alt_link(lgr, lgr_new_t);
+	if (lnk_idx < 0)
+		return 0;
+
+	rc = smcr_link_init(lgr, &lgr->lnk[lnk_idx], lnk_idx, &ini);
+	if (rc)
+		return rc;
+	link_new = &lgr->lnk[lnk_idx];
+	rc = smc_llc_send_add_link(link,
+				   link_new->smcibdev->mac[ini.ib_port - 1],
+				   link_new->gid, link_new, SMC_LLC_REQ);
+	if (rc)
+		goto out_err;
+	/* receive ADD LINK response over the RoCE fabric */
+	qentry = smc_llc_wait(lgr, link, SMC_LLC_WAIT_TIME, SMC_LLC_ADD_LINK);
+	if (!qentry) {
+		rc = -ETIMEDOUT;
+		goto out_err;
+	}
+	add_llc = &qentry->msg.add_link;
+	if (add_llc->hd.flags & SMC_LLC_FLAG_ADD_LNK_REJ) {
+		smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
+		rc = -ENOLINK;
+		goto out_err;
+	}
+	if (lgr->type == SMC_LGR_SINGLE &&
+	    (!memcmp(add_llc->sender_gid, link->peer_gid, SMC_GID_SIZE) &&
+	     !memcmp(add_llc->sender_mac, link->peer_mac, ETH_ALEN))) {
+		lgr_new_t = SMC_LGR_ASYMMETRIC_PEER;
+	}
+	smc_llc_save_add_link_info(link_new, add_llc);
+	smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
+
+	rc = smc_ib_ready_link(link_new);
+	if (rc)
+		goto out_err;
+	rc = smcr_buf_map_lgr(link_new);
+	if (rc)
+		goto out_err;
+	rc = smcr_buf_reg_lgr(link_new);
+	if (rc)
+		goto out_err;
+	/* tbd: rc = smc_llc_srv_rkey_exchange(link, link_new); */
+	if (rc)
+		goto out_err;
+	/* tbd: rc = smc_llc_srv_conf_link(link, link_new, lgr_new_t); */
+	if (rc)
+		goto out_err;
+	return 0;
+out_err:
+	smcr_link_clear(link_new);
+	return rc;
+}
+
+static void smc_llc_process_srv_add_link(struct smc_link_group *lgr)
+{
+	struct smc_link *link = lgr->llc_flow_lcl.qentry->link;
+	int rc;
+
+	smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
+
+	mutex_lock(&lgr->llc_conf_mutex);
+	rc = smc_llc_srv_add_link(link);
+	if (!rc && lgr->type == SMC_LGR_SYMMETRIC) {
+		/* delete any asymmetric link */
+		/* tbd: smc_llc_delete_asym_link(lgr); */
+	}
+	mutex_unlock(&lgr->llc_conf_mutex);
+}
+
 /* worker to process an add link message */
 static void smc_llc_add_link_work(struct work_struct *work)
 {
@@ -877,7 +965,8 @@ static void smc_llc_add_link_work(struct work_struct *work)
 
 	if (lgr->role == SMC_CLNT)
 		smc_llc_process_cli_add_link(lgr);
-	/* tbd: call smc_llc_process_srv_add_link(lgr); */
+	else
+		smc_llc_process_srv_add_link(lgr);
 out:
 	smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
 }
diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h
index 7c314bbef8c8..1a7748d0541f 100644
--- a/net/smc/smc_llc.h
+++ b/net/smc/smc_llc.h
@@ -89,6 +89,7 @@ struct smc_llc_qentry *smc_llc_wait(struct smc_link_group *lgr,
 struct smc_llc_qentry *smc_llc_flow_qentry_clr(struct smc_llc_flow *flow);
 void smc_llc_flow_qentry_del(struct smc_llc_flow *flow);
 int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry);
+int smc_llc_srv_add_link(struct smc_link *link);
 int smc_llc_init(void) __init;
 
 #endif /* SMC_LLC_H */
-- 
cgit v1.2.3-59-g8ed1b


From 57b499242cb888a32815f8663b60338bcb0b5747 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Sun, 3 May 2020 14:38:44 +0200
Subject: net/smc: rkey processing for a new link as SMC server

Part of SMC server new link establishment is the exchange of rkeys for
used buffers.
Loop over all used RMB buffers and send ADD_LINK_CONTINUE LLC messages
to the peer.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_llc.c | 43 ++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 42 insertions(+), 1 deletion(-)

diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index e2f254e21759..de73432bd72f 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -863,6 +863,47 @@ static void smc_llc_process_cli_add_link(struct smc_link_group *lgr)
 	mutex_unlock(&lgr->llc_conf_mutex);
 }
 
+static int smc_llc_srv_rkey_exchange(struct smc_link *link,
+				     struct smc_link *link_new)
+{
+	struct smc_llc_msg_add_link_cont *addc_llc;
+	struct smc_link_group *lgr = link->lgr;
+	u8 max, num_rkeys_send, num_rkeys_recv;
+	struct smc_llc_qentry *qentry = NULL;
+	struct smc_buf_desc *buf_pos;
+	int buf_lst;
+	int rc = 0;
+	int i;
+
+	mutex_lock(&lgr->rmbs_lock);
+	num_rkeys_send = lgr->conns_num;
+	buf_pos = smc_llc_get_first_rmb(lgr, &buf_lst);
+	do {
+		smc_llc_add_link_cont(link, link_new, &num_rkeys_send,
+				      &buf_lst, &buf_pos);
+		qentry = smc_llc_wait(lgr, link, SMC_LLC_WAIT_TIME,
+				      SMC_LLC_ADD_LINK_CONT);
+		if (!qentry) {
+			rc = -ETIMEDOUT;
+			goto out;
+		}
+		addc_llc = &qentry->msg.add_link_cont;
+		num_rkeys_recv = addc_llc->num_rkeys;
+		max = min_t(u8, num_rkeys_recv, SMC_LLC_RKEYS_PER_CONT_MSG);
+		for (i = 0; i < max; i++) {
+			smc_rtoken_set(lgr, link->link_idx, link_new->link_idx,
+				       addc_llc->rt[i].rmb_key,
+				       addc_llc->rt[i].rmb_vaddr_new,
+				       addc_llc->rt[i].rmb_key_new);
+			num_rkeys_recv--;
+		}
+		smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
+	} while (num_rkeys_send || num_rkeys_recv);
+out:
+	mutex_unlock(&lgr->rmbs_lock);
+	return rc;
+}
+
 int smc_llc_srv_add_link(struct smc_link *link)
 {
 	enum smc_lgr_type lgr_new_t = SMC_LGR_SYMMETRIC;
@@ -923,7 +964,7 @@ int smc_llc_srv_add_link(struct smc_link *link)
 	rc = smcr_buf_reg_lgr(link_new);
 	if (rc)
 		goto out_err;
-	/* tbd: rc = smc_llc_srv_rkey_exchange(link, link_new); */
+	rc = smc_llc_srv_rkey_exchange(link, link_new);
 	if (rc)
 		goto out_err;
 	/* tbd: rc = smc_llc_srv_conf_link(link, link_new, lgr_new_t); */
-- 
cgit v1.2.3-59-g8ed1b


From 1551c95b61242b1a20565bae8d711f35a601c4f3 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Sun, 3 May 2020 14:38:45 +0200
Subject: net/smc: final part of add link processing as SMC server

This patch finalizes the ADD_LINK processing of new links. Send the
CONFIRM_LINK request to the peer, receive the response and set link
state to ACTIVE.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_llc.c | 29 ++++++++++++++++++++++++++++-
 1 file changed, 28 insertions(+), 1 deletion(-)

diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index de73432bd72f..1fefee55e293 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -904,6 +904,33 @@ out:
 	return rc;
 }
 
+static int smc_llc_srv_conf_link(struct smc_link *link,
+				 struct smc_link *link_new,
+				 enum smc_lgr_type lgr_new_t)
+{
+	struct smc_link_group *lgr = link->lgr;
+	struct smc_llc_qentry *qentry = NULL;
+	int rc;
+
+	/* send CONFIRM LINK request over the RoCE fabric */
+	rc = smc_llc_send_confirm_link(link_new, SMC_LLC_REQ);
+	if (rc)
+		return -ENOLINK;
+	/* receive CONFIRM LINK response over the RoCE fabric */
+	qentry = smc_llc_wait(lgr, link, SMC_LLC_WAIT_FIRST_TIME,
+			      SMC_LLC_CONFIRM_LINK);
+	if (!qentry) {
+		/* send DELETE LINK */
+		smc_llc_send_delete_link(link, link_new->link_id, SMC_LLC_REQ,
+					 false, SMC_LLC_DEL_LOST_PATH);
+		return -ENOLINK;
+	}
+	smc_llc_link_active(link_new);
+	lgr->type = lgr_new_t;
+	smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
+	return 0;
+}
+
 int smc_llc_srv_add_link(struct smc_link *link)
 {
 	enum smc_lgr_type lgr_new_t = SMC_LGR_SYMMETRIC;
@@ -967,7 +994,7 @@ int smc_llc_srv_add_link(struct smc_link *link)
 	rc = smc_llc_srv_rkey_exchange(link, link_new);
 	if (rc)
 		goto out_err;
-	/* tbd: rc = smc_llc_srv_conf_link(link, link_new, lgr_new_t); */
+	rc = smc_llc_srv_conf_link(link, link_new, lgr_new_t);
 	if (rc)
 		goto out_err;
 	return 0;
-- 
cgit v1.2.3-59-g8ed1b


From c9a5d243035161f06175a7c6d487c9860e0f179a Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Sun, 3 May 2020 14:38:46 +0200
Subject: net/smc: delete an asymmetric link as SMC server

When a link group moved from asymmetric to symmetric state then the
dangling asymmetric link can be deleted. Add smc_llc_find_asym_link() to
find the respective link and add smc_llc_delete_asym_link() to delete
it.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_llc.c | 81 ++++++++++++++++++++++++++++++++++++++++++++++++++++++-
 net/smc/smc_wr.c  |  2 +-
 net/smc/smc_wr.h  |  1 +
 3 files changed, 82 insertions(+), 2 deletions(-)

diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 1fefee55e293..9d102c912be9 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -863,6 +863,85 @@ static void smc_llc_process_cli_add_link(struct smc_link_group *lgr)
 	mutex_unlock(&lgr->llc_conf_mutex);
 }
 
+/* find the asymmetric link when 3 links are established  */
+static struct smc_link *smc_llc_find_asym_link(struct smc_link_group *lgr)
+{
+	int asym_idx = -ENOENT;
+	int i, j, k;
+	bool found;
+
+	/* determine asymmetric link */
+	found = false;
+	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+		for (j = i + 1; j < SMC_LINKS_PER_LGR_MAX; j++) {
+			if (!smc_link_usable(&lgr->lnk[i]) ||
+			    !smc_link_usable(&lgr->lnk[j]))
+				continue;
+			if (!memcmp(lgr->lnk[i].gid, lgr->lnk[j].gid,
+				    SMC_GID_SIZE)) {
+				found = true;	/* asym_lnk is i or j */
+				break;
+			}
+		}
+		if (found)
+			break;
+	}
+	if (!found)
+		goto out; /* no asymmetric link */
+	for (k = 0; k < SMC_LINKS_PER_LGR_MAX; k++) {
+		if (!smc_link_usable(&lgr->lnk[k]))
+			continue;
+		if (k != i &&
+		    !memcmp(lgr->lnk[i].peer_gid, lgr->lnk[k].peer_gid,
+			    SMC_GID_SIZE)) {
+			asym_idx = i;
+			break;
+		}
+		if (k != j &&
+		    !memcmp(lgr->lnk[j].peer_gid, lgr->lnk[k].peer_gid,
+			    SMC_GID_SIZE)) {
+			asym_idx = j;
+			break;
+		}
+	}
+out:
+	return (asym_idx < 0) ? NULL : &lgr->lnk[asym_idx];
+}
+
+static void smc_llc_delete_asym_link(struct smc_link_group *lgr)
+{
+	struct smc_link *lnk_new = NULL, *lnk_asym;
+	struct smc_llc_qentry *qentry;
+	int rc;
+
+	lnk_asym = smc_llc_find_asym_link(lgr);
+	if (!lnk_asym)
+		return; /* no asymmetric link */
+	if (!smc_link_downing(&lnk_asym->state))
+		return;
+	/* tbd: lnk_new = smc_switch_conns(lgr, lnk_asym, false); */
+	smc_wr_tx_wait_no_pending_sends(lnk_asym);
+	if (!lnk_new)
+		goto out_free;
+	/* change flow type from ADD_LINK into DEL_LINK */
+	lgr->llc_flow_lcl.type = SMC_LLC_FLOW_DEL_LINK;
+	rc = smc_llc_send_delete_link(lnk_new, lnk_asym->link_id, SMC_LLC_REQ,
+				      true, SMC_LLC_DEL_NO_ASYM_NEEDED);
+	if (rc) {
+		smcr_link_down_cond(lnk_new);
+		goto out_free;
+	}
+	qentry = smc_llc_wait(lgr, lnk_new, SMC_LLC_WAIT_TIME,
+			      SMC_LLC_DELETE_LINK);
+	if (!qentry) {
+		smcr_link_down_cond(lnk_new);
+		goto out_free;
+	}
+	smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
+out_free:
+	smcr_link_clear(lnk_asym);
+}
+
 static int smc_llc_srv_rkey_exchange(struct smc_link *link,
 				     struct smc_link *link_new)
 {
@@ -1014,7 +1093,7 @@ static void smc_llc_process_srv_add_link(struct smc_link_group *lgr)
 	rc = smc_llc_srv_add_link(link);
 	if (!rc && lgr->type == SMC_LGR_SYMMETRIC) {
 		/* delete any asymmetric link */
-		/* tbd: smc_llc_delete_asym_link(lgr); */
+		smc_llc_delete_asym_link(lgr);
 	}
 	mutex_unlock(&lgr->llc_conf_mutex);
 }
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index 031e6c9561b1..3fd27bea4f7a 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -61,7 +61,7 @@ static inline bool smc_wr_is_tx_pend(struct smc_link *link)
 }
 
 /* wait till all pending tx work requests on the given link are completed */
-static inline int smc_wr_tx_wait_no_pending_sends(struct smc_link *link)
+int smc_wr_tx_wait_no_pending_sends(struct smc_link *link)
 {
 	if (wait_event_timeout(link->wr_tx_wait, !smc_wr_is_tx_pend(link),
 			       SMC_WR_TX_WAIT_PENDING_TIME))
diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h
index 3ac99c898418..f7eaeb3391f3 100644
--- a/net/smc/smc_wr.h
+++ b/net/smc/smc_wr.h
@@ -106,6 +106,7 @@ void smc_wr_tx_dismiss_slots(struct smc_link *lnk, u8 wr_rx_hdr_type,
 			     smc_wr_tx_filter filter,
 			     smc_wr_tx_dismisser dismisser,
 			     unsigned long data);
+int smc_wr_tx_wait_no_pending_sends(struct smc_link *link);
 
 int smc_wr_rx_register_handler(struct smc_wr_rx_handler *handler);
 int smc_wr_rx_post_init(struct smc_link *link);
-- 
cgit v1.2.3-59-g8ed1b


From 9ec6bf19ec8bb19f4211f6a2bf62c079d46b54ea Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Sun, 3 May 2020 14:38:47 +0200
Subject: net/smc: llc_del_link_work and use the LLC flow for delete link

Introduce a work that is scheduled when a new DELETE_LINK LLC request is
received. The work will call either the SMC client or SMC server
DELETE_LINK processing.
And use the LLC flow framework to process incoming DELETE_LINK LLC
messages, scheduling the llc_del_link_work for those events.
With these changes smc_lgr_forget() is only called by one function and
can be migrated into smc_lgr_cleanup_early().

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_core.c | 22 ++++++++--------------
 net/smc/smc_core.h |  2 +-
 net/smc/smc_llc.c  | 55 +++++++++++++++++++++++++++++++++++-------------------
 3 files changed, 45 insertions(+), 34 deletions(-)

diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 2f8faa9c9e8e..a964304283fa 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -193,12 +193,19 @@ static void smc_lgr_unregister_conn(struct smc_connection *conn)
 void smc_lgr_cleanup_early(struct smc_connection *conn)
 {
 	struct smc_link_group *lgr = conn->lgr;
+	struct list_head *lgr_list;
+	spinlock_t *lgr_lock;
 
 	if (!lgr)
 		return;
 
 	smc_conn_free(conn);
-	smc_lgr_forget(lgr);
+	lgr_list = smc_lgr_list_head(lgr, &lgr_lock);
+	spin_lock_bh(lgr_lock);
+	/* do not use this link group for new connections */
+	if (!list_empty(lgr_list))
+		list_del_init(lgr_list);
+	spin_unlock_bh(lgr_lock);
 	smc_lgr_schedule_free_work_fast(lgr);
 }
 
@@ -653,19 +660,6 @@ static void smc_lgr_free(struct smc_link_group *lgr)
 	kfree(lgr);
 }
 
-void smc_lgr_forget(struct smc_link_group *lgr)
-{
-	struct list_head *lgr_list;
-	spinlock_t *lgr_lock;
-
-	lgr_list = smc_lgr_list_head(lgr, &lgr_lock);
-	spin_lock_bh(lgr_lock);
-	/* do not use this link group for new connections */
-	if (!list_empty(lgr_list))
-		list_del_init(lgr_list);
-	spin_unlock_bh(lgr_lock);
-}
-
 static void smcd_unregister_all_dmbs(struct smc_link_group *lgr)
 {
 	int i;
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 4e00819e2db7..7fe53feb9dc4 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -254,6 +254,7 @@ struct smc_link_group {
 			struct mutex		llc_conf_mutex;
 						/* protects lgr reconfig. */
 			struct work_struct	llc_add_link_work;
+			struct work_struct	llc_del_link_work;
 			struct work_struct	llc_event_work;
 						/* llc event worker */
 			wait_queue_head_t	llc_waiter;
@@ -343,7 +344,6 @@ struct smc_sock;
 struct smc_clc_msg_accept_confirm;
 struct smc_clc_msg_local;
 
-void smc_lgr_forget(struct smc_link_group *lgr);
 void smc_lgr_cleanup_early(struct smc_connection *conn);
 void smc_lgr_terminate_sched(struct smc_link_group *lgr);
 void smcr_port_add(struct smc_ib_device *smcibdev, u8 ibport);
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 9d102c912be9..e4e3910a9624 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -1118,22 +1118,18 @@ out:
 	smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
 }
 
-static void smc_llc_rx_delete_link(struct smc_link *link,
-				   struct smc_llc_msg_del_link *llc)
+static void smc_llc_delete_link_work(struct work_struct *work)
 {
-	struct smc_link_group *lgr = smc_get_lgr(link);
+	struct smc_link_group *lgr = container_of(work, struct smc_link_group,
+						  llc_del_link_work);
 
-	smc_lgr_forget(lgr);
-	if (lgr->role == SMC_SERV) {
-		/* client asks to delete this link, send request */
-		smc_llc_send_delete_link(link, 0, SMC_LLC_REQ, true,
-					 SMC_LLC_DEL_PROG_INIT_TERM);
-	} else {
-		/* server requests to delete this link, send response */
-		smc_llc_send_delete_link(link, 0, SMC_LLC_RESP, true,
-					 SMC_LLC_DEL_PROG_INIT_TERM);
+	if (list_empty(&lgr->list)) {
+		/* link group is terminating */
+		smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
+		goto out;
 	}
-	smcr_link_down_cond(link);
+out:
+	smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
 }
 
 /* process a confirm_rkey request from peer, remote flow */
@@ -1255,8 +1251,30 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry)
 		}
 		break;
 	case SMC_LLC_DELETE_LINK:
-		smc_llc_rx_delete_link(link, &llc->delete_link);
-		break;
+		if (lgr->role == SMC_CLNT) {
+			/* server requests to delete this link, send response */
+			if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
+				/* DEL LINK REQ during ADD LINK SEQ */
+				smc_llc_flow_qentry_set(&lgr->llc_flow_lcl,
+							qentry);
+				wake_up_interruptible(&lgr->llc_waiter);
+			} else if (smc_llc_flow_start(&lgr->llc_flow_lcl,
+						      qentry)) {
+				schedule_work(&lgr->llc_del_link_work);
+			}
+		} else {
+			if (lgr->llc_flow_lcl.type == SMC_LLC_FLOW_ADD_LINK &&
+			    !lgr->llc_flow_lcl.qentry) {
+				/* DEL LINK REQ during ADD LINK SEQ */
+				smc_llc_flow_qentry_set(&lgr->llc_flow_lcl,
+							qentry);
+				wake_up_interruptible(&lgr->llc_waiter);
+			} else if (smc_llc_flow_start(&lgr->llc_flow_lcl,
+						      qentry)) {
+				schedule_work(&lgr->llc_del_link_work);
+			}
+		}
+		return;
 	case SMC_LLC_CONFIRM_RKEY:
 		/* new request from remote, assign to remote flow */
 		if (smc_llc_flow_start(&lgr->llc_flow_rmt, qentry)) {
@@ -1325,6 +1343,7 @@ static void smc_llc_rx_response(struct smc_link *link,
 			complete(&link->llc_testlink_resp);
 		break;
 	case SMC_LLC_ADD_LINK:
+	case SMC_LLC_DELETE_LINK:
 	case SMC_LLC_CONFIRM_LINK:
 	case SMC_LLC_ADD_LINK_CONT:
 	case SMC_LLC_CONFIRM_RKEY:
@@ -1333,10 +1352,6 @@ static void smc_llc_rx_response(struct smc_link *link,
 		smc_llc_flow_qentry_set(&link->lgr->llc_flow_lcl, qentry);
 		wake_up_interruptible(&link->lgr->llc_waiter);
 		return;
-	case SMC_LLC_DELETE_LINK:
-		if (link->lgr->role == SMC_SERV)
-			smc_lgr_schedule_free_work_fast(link->lgr);
-		break;
 	case SMC_LLC_CONFIRM_RKEY_CONT:
 		/* not used because max links is 3 */
 		break;
@@ -1424,6 +1439,7 @@ void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc)
 
 	INIT_WORK(&lgr->llc_event_work, smc_llc_event_work);
 	INIT_WORK(&lgr->llc_add_link_work, smc_llc_add_link_work);
+	INIT_WORK(&lgr->llc_del_link_work, smc_llc_delete_link_work);
 	INIT_LIST_HEAD(&lgr->llc_event_q);
 	spin_lock_init(&lgr->llc_event_q_lock);
 	spin_lock_init(&lgr->llc_flow_lock);
@@ -1439,6 +1455,7 @@ void smc_llc_lgr_clear(struct smc_link_group *lgr)
 	wake_up_interruptible_all(&lgr->llc_waiter);
 	cancel_work_sync(&lgr->llc_event_work);
 	cancel_work_sync(&lgr->llc_add_link_work);
+	cancel_work_sync(&lgr->llc_del_link_work);
 	if (lgr->delayed_event) {
 		kfree(lgr->delayed_event);
 		lgr->delayed_event = NULL;
-- 
cgit v1.2.3-59-g8ed1b


From 9c4168789cc635e1f0d265157b7617259d56bfee Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Sun, 3 May 2020 14:38:48 +0200
Subject: net/smc: delete link processing as SMC client

Add smc_llc_process_cli_delete_link() to process a DELETE_LINK request
as SMC client. When the request is to delete ALL links then terminate
the whole link group. If not, find the link to delete by its link_id,
send the DELETE_LINK response LLC message and then clear the deleted
link. Finally determine and update the link group state.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_llc.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 72 insertions(+)

diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index e4e3910a9624..cd57b4fb1842 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -863,6 +863,18 @@ static void smc_llc_process_cli_add_link(struct smc_link_group *lgr)
 	mutex_unlock(&lgr->llc_conf_mutex);
 }
 
+static int smc_llc_active_link_count(struct smc_link_group *lgr)
+{
+	int i, link_count = 0;
+
+	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+		if (!smc_link_usable(&lgr->lnk[i]))
+			continue;
+		link_count++;
+	}
+	return link_count;
+}
+
 /* find the asymmetric link when 3 links are established  */
 static struct smc_link *smc_llc_find_asym_link(struct smc_link_group *lgr)
 {
@@ -1118,6 +1130,63 @@ out:
 	smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
 }
 
+static void smc_llc_process_cli_delete_link(struct smc_link_group *lgr)
+{
+	struct smc_link *lnk_del = NULL, *lnk_asym, *lnk;
+	struct smc_llc_msg_del_link *del_llc;
+	struct smc_llc_qentry *qentry;
+	int active_links;
+	int lnk_idx;
+
+	qentry = smc_llc_flow_qentry_clr(&lgr->llc_flow_lcl);
+	lnk = qentry->link;
+	del_llc = &qentry->msg.delete_link;
+
+	if (del_llc->hd.flags & SMC_LLC_FLAG_DEL_LINK_ALL) {
+		smc_lgr_terminate_sched(lgr);
+		goto out;
+	}
+	mutex_lock(&lgr->llc_conf_mutex);
+	/* delete single link */
+	for (lnk_idx = 0; lnk_idx < SMC_LINKS_PER_LGR_MAX; lnk_idx++) {
+		if (lgr->lnk[lnk_idx].link_id != del_llc->link_num)
+			continue;
+		lnk_del = &lgr->lnk[lnk_idx];
+		break;
+	}
+	del_llc->hd.flags |= SMC_LLC_FLAG_RESP;
+	if (!lnk_del) {
+		/* link was not found */
+		del_llc->reason = htonl(SMC_LLC_DEL_NOLNK);
+		smc_llc_send_message(lnk, &qentry->msg);
+		goto out_unlock;
+	}
+	lnk_asym = smc_llc_find_asym_link(lgr);
+
+	del_llc->reason = 0;
+	smc_llc_send_message(lnk, &qentry->msg); /* response */
+
+	if (smc_link_downing(&lnk_del->state)) {
+		/* tbd: call smc_switch_conns(lgr, lnk_del, false); */
+		smc_wr_tx_wait_no_pending_sends(lnk_del);
+	}
+	smcr_link_clear(lnk_del);
+
+	active_links = smc_llc_active_link_count(lgr);
+	if (lnk_del == lnk_asym) {
+		/* expected deletion of asym link, don't change lgr state */
+	} else if (active_links == 1) {
+		lgr->type = SMC_LGR_SINGLE;
+	} else if (!active_links) {
+		lgr->type = SMC_LGR_NONE;
+		smc_lgr_terminate_sched(lgr);
+	}
+out_unlock:
+	mutex_unlock(&lgr->llc_conf_mutex);
+out:
+	kfree(qentry);
+}
+
 static void smc_llc_delete_link_work(struct work_struct *work)
 {
 	struct smc_link_group *lgr = container_of(work, struct smc_link_group,
@@ -1128,6 +1197,9 @@ static void smc_llc_delete_link_work(struct work_struct *work)
 		smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
 		goto out;
 	}
+
+	if (lgr->role == SMC_CLNT)
+		smc_llc_process_cli_delete_link(lgr);
 out:
 	smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
 }
-- 
cgit v1.2.3-59-g8ed1b


From 08ae27ddfb6514a8316b17256cd4262bb6931c1f Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Sun, 3 May 2020 14:38:49 +0200
Subject: net/smc: delete link processing as SMC server

Add smc_llc_process_srv_delete_link() to process a DELETE_LINK request
as SMC server. When the request is to delete ALL links then terminate
the whole link group. If not, find the link to delete by its link_id,
send the DELETE_LINK request LLC message and wait for the response.
No matter if a response was received, clear the deleted link and update
the link group state.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_llc.c | 72 +++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 72 insertions(+)

diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index cd57b4fb1842..ac065f6d60dc 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -1187,6 +1187,76 @@ out:
 	kfree(qentry);
 }
 
+static void smc_llc_process_srv_delete_link(struct smc_link_group *lgr)
+{
+	struct smc_llc_msg_del_link *del_llc;
+	struct smc_link *lnk, *lnk_del;
+	struct smc_llc_qentry *qentry;
+	int active_links;
+	int i;
+
+	mutex_lock(&lgr->llc_conf_mutex);
+	qentry = smc_llc_flow_qentry_clr(&lgr->llc_flow_lcl);
+	lnk = qentry->link;
+	del_llc = &qentry->msg.delete_link;
+
+	if (qentry->msg.delete_link.hd.flags & SMC_LLC_FLAG_DEL_LINK_ALL) {
+		/* delete entire lgr */
+		smc_lgr_terminate_sched(lgr);
+		goto out;
+	}
+	/* delete single link */
+	lnk_del = NULL;
+	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+		if (lgr->lnk[i].link_id == del_llc->link_num) {
+			lnk_del = &lgr->lnk[i];
+			break;
+		}
+	}
+	if (!lnk_del)
+		goto out; /* asymmetric link already deleted */
+
+	if (smc_link_downing(&lnk_del->state)) {
+		/* tbd: call smc_switch_conns(lgr, lnk_del, false); */
+		smc_wr_tx_wait_no_pending_sends(lnk_del);
+	}
+	if (!list_empty(&lgr->list)) {
+		/* qentry is either a request from peer (send it back to
+		 * initiate the DELETE_LINK processing), or a locally
+		 * enqueued DELETE_LINK request (forward it)
+		 */
+		if (!smc_llc_send_message(lnk, &qentry->msg)) {
+			struct smc_llc_msg_del_link *del_llc_resp;
+			struct smc_llc_qentry *qentry2;
+
+			qentry2 = smc_llc_wait(lgr, lnk, SMC_LLC_WAIT_TIME,
+					       SMC_LLC_DELETE_LINK);
+			if (!qentry2) {
+			} else {
+				del_llc_resp = &qentry2->msg.delete_link;
+				smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
+			}
+		}
+	}
+	smcr_link_clear(lnk_del);
+
+	active_links = smc_llc_active_link_count(lgr);
+	if (active_links == 1) {
+		lgr->type = SMC_LGR_SINGLE;
+	} else if (!active_links) {
+		lgr->type = SMC_LGR_NONE;
+		smc_lgr_terminate_sched(lgr);
+	}
+
+	if (lgr->type == SMC_LGR_SINGLE && !list_empty(&lgr->list)) {
+		/* trigger setup of asymm alt link */
+		/* tbd: call smc_llc_srv_add_link_local(lnk); */
+	}
+out:
+	mutex_unlock(&lgr->llc_conf_mutex);
+	kfree(qentry);
+}
+
 static void smc_llc_delete_link_work(struct work_struct *work)
 {
 	struct smc_link_group *lgr = container_of(work, struct smc_link_group,
@@ -1200,6 +1270,8 @@ static void smc_llc_delete_link_work(struct work_struct *work)
 
 	if (lgr->role == SMC_CLNT)
 		smc_llc_process_cli_delete_link(lgr);
+	else
+		smc_llc_process_srv_delete_link(lgr);
 out:
 	smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
 }
-- 
cgit v1.2.3-59-g8ed1b


From 4dadd151b26589fd0520feb97c93ee981b393a99 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Sun, 3 May 2020 14:38:50 +0200
Subject: net/smc: enqueue local LLC messages

As SMC server, when a second link was deleted, trigger the setup of an
asymmetric link. Do this by enqueueing a local ADD_LINK message which
is processed by the LLC layer as if it were received from peer. Do the
same when a new IB port became active and a new link could be created.
smc_llc_srv_add_link_local() enqueues a local ADD_LINK message.
And smc_llc_srv_delete_link_local() is used the same way to enqueue a
local DELETE_LINK message. This is used when an IB port is no longer
active.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_core.c |  3 ++-
 net/smc/smc_llc.c  | 30 +++++++++++++++++++++++++++++-
 net/smc/smc_llc.h  |  2 ++
 3 files changed, 33 insertions(+), 2 deletions(-)

diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index a964304283fa..32a6cadc5c1f 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -883,7 +883,7 @@ static void smcr_link_up(struct smc_link_group *lgr,
 		link = smc_llc_usable_link(lgr);
 		if (!link)
 			return;
-		/* tbd: call smc_llc_srv_add_link_local(link); */
+		smc_llc_srv_add_link_local(link);
 	} else {
 		/* invite server to start add link processing */
 		u8 gid[SMC_GID_SIZE];
@@ -954,6 +954,7 @@ static void smcr_link_down(struct smc_link *lnk)
 
 	if (lgr->role == SMC_SERV) {
 		/* trigger local delete link processing */
+		smc_llc_srv_delete_link_local(to_lnk, del_link_id);
 	} else {
 		if (lgr->llc_flow_lcl.type != SMC_LLC_FLOW_NONE) {
 			/* another llc task is ongoing */
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index ac065f6d60dc..7675ccd6f3c3 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -159,6 +159,8 @@ struct smc_llc_qentry {
 	union smc_llc_msg msg;
 };
 
+static void smc_llc_enqueue(struct smc_link *link, union smc_llc_msg *llc);
+
 struct smc_llc_qentry *smc_llc_flow_qentry_clr(struct smc_llc_flow *flow)
 {
 	struct smc_llc_qentry *qentry = flow->qentry;
@@ -1110,6 +1112,17 @@ static void smc_llc_process_srv_add_link(struct smc_link_group *lgr)
 	mutex_unlock(&lgr->llc_conf_mutex);
 }
 
+/* enqueue a local add_link req to trigger a new add_link flow, only as SERV */
+void smc_llc_srv_add_link_local(struct smc_link *link)
+{
+	struct smc_llc_msg_add_link add_llc = {0};
+
+	add_llc.hd.length = sizeof(add_llc);
+	add_llc.hd.common.type = SMC_LLC_ADD_LINK;
+	/* no dev and port needed, we as server ignore client data anyway */
+	smc_llc_enqueue(link, (union smc_llc_msg *)&add_llc);
+}
+
 /* worker to process an add link message */
 static void smc_llc_add_link_work(struct work_struct *work)
 {
@@ -1130,6 +1143,21 @@ out:
 	smc_llc_flow_stop(lgr, &lgr->llc_flow_lcl);
 }
 
+/* enqueue a local del_link msg to trigger a new del_link flow,
+ * called only for role SMC_SERV
+ */
+void smc_llc_srv_delete_link_local(struct smc_link *link, u8 del_link_id)
+{
+	struct smc_llc_msg_del_link del_llc = {0};
+
+	del_llc.hd.length = sizeof(del_llc);
+	del_llc.hd.common.type = SMC_LLC_DELETE_LINK;
+	del_llc.link_num = del_link_id;
+	del_llc.reason = htonl(SMC_LLC_DEL_LOST_PATH);
+	del_llc.hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY;
+	smc_llc_enqueue(link, (union smc_llc_msg *)&del_llc);
+}
+
 static void smc_llc_process_cli_delete_link(struct smc_link_group *lgr)
 {
 	struct smc_link *lnk_del = NULL, *lnk_asym, *lnk;
@@ -1250,7 +1278,7 @@ static void smc_llc_process_srv_delete_link(struct smc_link_group *lgr)
 
 	if (lgr->type == SMC_LGR_SINGLE && !list_empty(&lgr->list)) {
 		/* trigger setup of asymm alt link */
-		/* tbd: call smc_llc_srv_add_link_local(lnk); */
+		smc_llc_srv_add_link_local(lnk);
 	}
 out:
 	mutex_unlock(&lgr->llc_conf_mutex);
diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h
index 1a7748d0541f..c335fc5f363c 100644
--- a/net/smc/smc_llc.h
+++ b/net/smc/smc_llc.h
@@ -69,6 +69,7 @@ int smc_llc_send_add_link(struct smc_link *link, u8 mac[], u8 gid[],
 int smc_llc_send_delete_link(struct smc_link *link, u8 link_del_id,
 			     enum smc_llc_reqresp reqresp, bool orderly,
 			     u32 reason);
+void smc_llc_srv_delete_link_local(struct smc_link *link, u8 del_link_id);
 void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc);
 void smc_llc_lgr_clear(struct smc_link_group *lgr);
 int smc_llc_link_init(struct smc_link *link);
@@ -90,6 +91,7 @@ struct smc_llc_qentry *smc_llc_flow_qentry_clr(struct smc_llc_flow *flow);
 void smc_llc_flow_qentry_del(struct smc_llc_flow *flow);
 int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry);
 int smc_llc_srv_add_link(struct smc_link *link);
+void smc_llc_srv_add_link_local(struct smc_link *link);
 int smc_llc_init(void) __init;
 
 #endif /* SMC_LLC_H */
-- 
cgit v1.2.3-59-g8ed1b


From 2326aa011967f0afbcba7fe1a005d01f8b12900b Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Mon, 27 Apr 2020 10:43:48 +0000
Subject: ath10k: fix possible memory leak in ath10k_bmi_lz_data_large()

'cmd' is malloced in ath10k_bmi_lz_data_large() and should be freed
before leaving from the error handling cases, otherwise it will cause
memory leak.

Fixes: d58f466a5dee ("ath10k: add large size for BMI download data for SDIO")
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200427104348.13570-1-weiyongjun1@huawei.com
---
 drivers/net/wireless/ath/ath10k/bmi.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wireless/ath/ath10k/bmi.c b/drivers/net/wireless/ath/ath10k/bmi.c
index ea908107581d..5b6db6e66f65 100644
--- a/drivers/net/wireless/ath/ath10k/bmi.c
+++ b/drivers/net/wireless/ath/ath10k/bmi.c
@@ -380,6 +380,7 @@ static int ath10k_bmi_lz_data_large(struct ath10k *ar, const void *buffer, u32 l
 						  NULL, NULL);
 		if (ret) {
 			ath10k_warn(ar, "unable to write to the device\n");
+			kfree(cmd);
 			return ret;
 		}
 
-- 
cgit v1.2.3-59-g8ed1b


From f76f750aeea47fd98b6502eb6d37f84ca33662bf Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Mon, 27 Apr 2020 10:46:21 +0000
Subject: ath11k: fix error return code in ath11k_dp_alloc()

Fix to return negative error code -ENOMEM from the error handling
case instead of 0, as done elsewhere in this function.

Fixes: d0998eb84ed3 ("ath11k: optimise ath11k_dp_tx_completion_handler")
Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200427104621.23752-1-weiyongjun1@huawei.com
---
 drivers/net/wireless/ath/ath11k/dp.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath11k/dp.c b/drivers/net/wireless/ath/ath11k/dp.c
index 8d6fb848f8c4..145015d2f49c 100644
--- a/drivers/net/wireless/ath/ath11k/dp.c
+++ b/drivers/net/wireless/ath/ath11k/dp.c
@@ -911,8 +911,10 @@ int ath11k_dp_alloc(struct ath11k_base *ab)
 		dp->tx_ring[i].tx_status_head = 0;
 		dp->tx_ring[i].tx_status_tail = DP_TX_COMP_RING_SIZE - 1;
 		dp->tx_ring[i].tx_status = kmalloc(size, GFP_KERNEL);
-		if (!dp->tx_ring[i].tx_status)
+		if (!dp->tx_ring[i].tx_status) {
+			ret = -ENOMEM;
 			goto fail_cmn_srng_cleanup;
+		}
 	}
 
 	for (i = 0; i < HAL_DSCP_TID_MAP_TBL_NUM_ENTRIES_MAX; i++)
-- 
cgit v1.2.3-59-g8ed1b


From 66307ca040575e6200a5d40656e2dee71d9da91c Mon Sep 17 00:00:00 2001
From: Sathishkumar Muruganandam <murugana@codeaurora.org>
Date: Tue, 28 Apr 2020 10:15:25 +0530
Subject: ath11k: fix mgmt_tx_wmi cmd sent to FW for deleted vdev

In Multi-AP VAP scenario with frequent interface up-down, there is a
chance that ath11k_mgmt_over_wmi_tx_work() will dequeue a skb
corresponding to currently deleted/stopped vdev.

FW will assert on receiving mgmt_tx_wmi cmd for already deleted vdev.
Hence adding validation checks for arvif present on the corresponding
ar before sending mgmt_tx_wmi cmd.

Signed-off-by: Sathishkumar Muruganandam <murugana@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1588049126-1490-2-git-send-email-murugana@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/mac.c | 29 ++++++++++++++++++++++-------
 1 file changed, 22 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c
index 065b7d6d4ab2..2b3a63ac216c 100644
--- a/drivers/net/wireless/ath/ath11k/mac.c
+++ b/drivers/net/wireless/ath/ath11k/mac.c
@@ -3793,15 +3793,30 @@ static void ath11k_mgmt_over_wmi_tx_work(struct work_struct *work)
 
 	while ((skb = skb_dequeue(&ar->wmi_mgmt_tx_queue)) != NULL) {
 		info = IEEE80211_SKB_CB(skb);
-		arvif = ath11k_vif_to_arvif(info->control.vif);
-
-		ret = ath11k_mac_mgmt_tx_wmi(ar, arvif, skb);
-		if (ret) {
-			ath11k_warn(ar->ab, "failed to transmit management frame %d\n",
-				    ret);
+		if (!info->control.vif) {
+			ath11k_warn(ar->ab, "no vif found for mgmt frame, flags 0x%x\n",
+				    info->control.flags);
 			ieee80211_free_txskb(ar->hw, skb);
+			continue;
+		}
+
+		arvif = ath11k_vif_to_arvif(info->control.vif);
+		if (ar->allocated_vdev_map & (1LL << arvif->vdev_id) &&
+		    arvif->is_started) {
+			ret = ath11k_mac_mgmt_tx_wmi(ar, arvif, skb);
+			if (ret) {
+				ath11k_warn(ar->ab, "failed to tx mgmt frame, vdev_id %d :%d\n",
+					    arvif->vdev_id, ret);
+				ieee80211_free_txskb(ar->hw, skb);
+			} else {
+				atomic_inc(&ar->num_pending_mgmt_tx);
+			}
 		} else {
-			atomic_inc(&ar->num_pending_mgmt_tx);
+			ath11k_warn(ar->ab,
+				    "dropping mgmt frame for vdev %d, flags 0x%x is_started %d\n",
+				    arvif->vdev_id, info->control.flags,
+				    arvif->is_started);
+			ieee80211_free_txskb(ar->hw, skb);
 		}
 	}
 }
-- 
cgit v1.2.3-59-g8ed1b


From 657680cc86ca4b5d49b5bb313b1320fb8439528c Mon Sep 17 00:00:00 2001
From: Sathishkumar Muruganandam <murugana@codeaurora.org>
Date: Tue, 28 Apr 2020 10:15:26 +0530
Subject: ath11k: add DBG_MAC prints to track vdev events

Added DBG_MAC prints to track vdev create, delete, start and
stop events.

Signed-off-by: Sathishkumar Muruganandam <murugana@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1588049126-1490-3-git-send-email-murugana@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/mac.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c
index 2b3a63ac216c..f33c6d714da8 100644
--- a/drivers/net/wireless/ath/ath11k/mac.c
+++ b/drivers/net/wireless/ath/ath11k/mac.c
@@ -4229,6 +4229,8 @@ static int ath11k_mac_op_add_interface(struct ieee80211_hw *hw,
 	}
 
 	ar->num_created_vdevs++;
+	ath11k_dbg(ab, ATH11K_DBG_MAC, "vdev %pM created, vdev_id %d\n",
+		   vif->addr, arvif->vdev_id);
 	ar->allocated_vdev_map |= 1LL << arvif->vdev_id;
 	ab->free_vdev_map &= ~(1LL << arvif->vdev_id);
 
@@ -4399,6 +4401,8 @@ static void ath11k_mac_op_remove_interface(struct ieee80211_hw *hw,
 			    arvif->vdev_id, ret);
 
 	ar->num_created_vdevs--;
+	ath11k_dbg(ab, ATH11K_DBG_MAC, "vdev %pM deleted, vdev_id %d\n",
+		   vif->addr, arvif->vdev_id);
 	ar->allocated_vdev_map &= ~(1LL << arvif->vdev_id);
 	ab->free_vdev_map |= 1LL << (arvif->vdev_id);
 
@@ -4664,6 +4668,8 @@ ath11k_mac_vdev_start_restart(struct ath11k_vif *arvif,
 	}
 
 	ar->num_started_vdevs++;
+	ath11k_dbg(ab, ATH11K_DBG_MAC,  "vdev %pM started, vdev_id %d\n",
+		   arvif->vif->addr, arvif->vdev_id);
 
 	/* Enable CAC Flag in the driver by checking the channel DFS cac time,
 	 * i.e dfs_cac_ms value which will be valid only for radar channels
@@ -4722,6 +4728,8 @@ static int ath11k_mac_vdev_stop(struct ath11k_vif *arvif)
 	WARN_ON(ar->num_started_vdevs == 0);
 
 	ar->num_started_vdevs--;
+	ath11k_dbg(ar->ab, ATH11K_DBG_MAC, "vdev %pM stopped, vdev_id %d\n",
+		   arvif->vif->addr, arvif->vdev_id);
 
 	if (test_bit(ATH11K_CAC_RUNNING, &ar->dev_flags)) {
 		clear_bit(ATH11K_CAC_RUNNING, &ar->dev_flags);
-- 
cgit v1.2.3-59-g8ed1b


From 421ae61c10567c6f347619b50b25cbe9d6e1f407 Mon Sep 17 00:00:00 2001
From: Yan-Hsuan Chuang <yhchuang@realtek.com>
Date: Fri, 24 Apr 2020 15:38:12 +0800
Subject: rtw88: 8822c: update phy parameter tables to v50

Update RTL8822C devices' phy tables to v50.

The new parameters introduces new RFE type 5 for some new modules.
Also added a new regulatory CN for power limit.

Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200424073812.26896-1-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/main.h          |     1 +
 drivers/net/wireless/realtek/rtw88/rtw8822c.c      |     1 +
 .../net/wireless/realtek/rtw88/rtw8822c_table.c    | 16870 +++++++++++++++++--
 .../net/wireless/realtek/rtw88/rtw8822c_table.h    |     1 +
 4 files changed, 15545 insertions(+), 1328 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtw88/main.h b/drivers/net/wireless/realtek/rtw88/main.h
index 157aca641f6d..cb0dd30e9683 100644
--- a/drivers/net/wireless/realtek/rtw88/main.h
+++ b/drivers/net/wireless/realtek/rtw88/main.h
@@ -339,6 +339,7 @@ enum rtw_regulatory_domains {
 	RTW_REGD_CHILE		= 6,
 	RTW_REGD_UKRAINE	= 7,
 	RTW_REGD_MEXICO		= 8,
+	RTW_REGD_CN		= 9,
 	RTW_REGD_WW,
 
 	RTW_REGD_MAX
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822c.c b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
index 8dd92136145d..38a096d5af6f 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8822c.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
@@ -3899,6 +3899,7 @@ static const struct rtw_rfe_def rtw8822c_rfe_defs[] = {
 	[0] = RTW_DEF_RFE(8822c, 0, 0),
 	[1] = RTW_DEF_RFE(8822c, 0, 0),
 	[2] = RTW_DEF_RFE(8822c, 0, 0),
+	[5] = RTW_DEF_RFE(8822c, 0, 5),
 };
 
 static const struct rtw_hw_reg rtw8822c_dig[] = {
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822c_table.c b/drivers/net/wireless/realtek/rtw88/rtw8822c_table.c
index d102a2c27757..08d01a7bb1bf 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8822c_table.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822c_table.c
@@ -13,6 +13,7 @@ static const u32 rtw8822c_mac[] = {
 RTW_DECL_TABLE_PHY_COND(rtw8822c_mac, rtw_phy_cfg_mac);
 
 static const u32 rtw8822c_agc[] = {
+	0x80000015,	0x00000000,	0x40000000,	0x00000000,
 		0x1D90, 0x300001FF,
 		0x1D90, 0x300101FE,
 		0x1D90, 0x300201FD,
@@ -77,51 +78,313 @@ static const u32 rtw8822c_agc[] = {
 		0x1D90, 0x303D0003,
 		0x1D90, 0x303E0002,
 		0x1D90, 0x303F0001,
-		0x1D90, 0x304000FF,
-		0x1D90, 0x304100FF,
-		0x1D90, 0x304200FF,
-		0x1D90, 0x304300FF,
-		0x1D90, 0x304400FE,
-		0x1D90, 0x304500FD,
-		0x1D90, 0x304600FC,
-		0x1D90, 0x304700FB,
-		0x1D90, 0x304800FA,
-		0x1D90, 0x304900F9,
-		0x1D90, 0x304A00F8,
-		0x1D90, 0x304B00F7,
-		0x1D90, 0x304C00F6,
-		0x1D90, 0x304D00F5,
-		0x1D90, 0x304E00F4,
-		0x1D90, 0x304F00F3,
-		0x1D90, 0x305000F2,
-		0x1D90, 0x305100F1,
-		0x1D90, 0x305200F0,
-		0x1D90, 0x305300EF,
-		0x1D90, 0x305400EE,
-		0x1D90, 0x305500ED,
-		0x1D90, 0x305600EC,
-		0x1D90, 0x305700EB,
-		0x1D90, 0x305800EA,
-		0x1D90, 0x305900E9,
-		0x1D90, 0x305A00E8,
-		0x1D90, 0x305B00E7,
-		0x1D90, 0x305C00E6,
-		0x1D90, 0x305D00C7,
-		0x1D90, 0x305E00C6,
-		0x1D90, 0x305F00C5,
-		0x1D90, 0x306000C4,
-		0x1D90, 0x306100C3,
-		0x1D90, 0x306200C2,
-		0x1D90, 0x306300A4,
-		0x1D90, 0x306400A3,
-		0x1D90, 0x306500A2,
-		0x1D90, 0x30660086,
-		0x1D90, 0x30670085,
+	0x90000016,	0x00000000,	0x40000000,	0x00000000,
+		0x1D90, 0x300001FF,
+		0x1D90, 0x300101FE,
+		0x1D90, 0x300201FD,
+		0x1D90, 0x300301FC,
+		0x1D90, 0x300401FB,
+		0x1D90, 0x300501FA,
+		0x1D90, 0x300601F9,
+		0x1D90, 0x300701F8,
+		0x1D90, 0x300801F7,
+		0x1D90, 0x300901F6,
+		0x1D90, 0x300A01F5,
+		0x1D90, 0x300B01F4,
+		0x1D90, 0x300C01F3,
+		0x1D90, 0x300D01F2,
+		0x1D90, 0x300E01F1,
+		0x1D90, 0x300F01F0,
+		0x1D90, 0x301001EF,
+		0x1D90, 0x301101EE,
+		0x1D90, 0x301201ED,
+		0x1D90, 0x301301EC,
+		0x1D90, 0x301401EB,
+		0x1D90, 0x301501EA,
+		0x1D90, 0x301601E9,
+		0x1D90, 0x301701E8,
+		0x1D90, 0x301801E7,
+		0x1D90, 0x301901E5,
+		0x1D90, 0x301A01E4,
+		0x1D90, 0x301B01C5,
+		0x1D90, 0x301C01C4,
+		0x1D90, 0x301D01C3,
+		0x1D90, 0x301E01C2,
+		0x1D90, 0x301F0188,
+		0x1D90, 0x30200187,
+		0x1D90, 0x30210186,
+		0x1D90, 0x30220184,
+		0x1D90, 0x30230183,
+		0x1D90, 0x30240182,
+		0x1D90, 0x30250181,
+		0x1D90, 0x30260148,
+		0x1D90, 0x30270147,
+		0x1D90, 0x30280146,
+		0x1D90, 0x30290144,
+		0x1D90, 0x302A0143,
+		0x1D90, 0x302B0142,
+		0x1D90, 0x302C0141,
+		0x1D90, 0x302D00C8,
+		0x1D90, 0x302E00C7,
+		0x1D90, 0x302F00C6,
+		0x1D90, 0x303000C5,
+		0x1D90, 0x303100C4,
+		0x1D90, 0x303200C3,
+		0x1D90, 0x30330048,
+		0x1D90, 0x30340047,
+		0x1D90, 0x30350046,
+		0x1D90, 0x30360045,
+		0x1D90, 0x30370025,
+		0x1D90, 0x30380024,
+		0x1D90, 0x30390023,
+		0x1D90, 0x303A0022,
+		0x1D90, 0x303B0021,
+		0x1D90, 0x303C0020,
+		0x1D90, 0x303D0003,
+		0x1D90, 0x303E0002,
+		0x1D90, 0x303F0001,
+	0xA0000000,	0x00000000,
+		0x1D90, 0x300001FF,
+		0x1D90, 0x300101FE,
+		0x1D90, 0x300201FD,
+		0x1D90, 0x300301FC,
+		0x1D90, 0x300401FB,
+		0x1D90, 0x300501FA,
+		0x1D90, 0x300601F9,
+		0x1D90, 0x300701F8,
+		0x1D90, 0x300801F7,
+		0x1D90, 0x300901F6,
+		0x1D90, 0x300A01F5,
+		0x1D90, 0x300B01F4,
+		0x1D90, 0x300C01F3,
+		0x1D90, 0x300D01F2,
+		0x1D90, 0x300E01F1,
+		0x1D90, 0x300F01F0,
+		0x1D90, 0x301001EF,
+		0x1D90, 0x301101EE,
+		0x1D90, 0x301201ED,
+		0x1D90, 0x301301EC,
+		0x1D90, 0x301401EB,
+		0x1D90, 0x301501EA,
+		0x1D90, 0x301601E9,
+		0x1D90, 0x301701E8,
+		0x1D90, 0x301801E7,
+		0x1D90, 0x301901E5,
+		0x1D90, 0x301A01E4,
+		0x1D90, 0x301B01C5,
+		0x1D90, 0x301C01C4,
+		0x1D90, 0x301D01C3,
+		0x1D90, 0x301E01C2,
+		0x1D90, 0x301F0188,
+		0x1D90, 0x30200187,
+		0x1D90, 0x30210186,
+		0x1D90, 0x30220184,
+		0x1D90, 0x30230183,
+		0x1D90, 0x30240182,
+		0x1D90, 0x30250181,
+		0x1D90, 0x30260148,
+		0x1D90, 0x30270147,
+		0x1D90, 0x30280146,
+		0x1D90, 0x30290144,
+		0x1D90, 0x302A0143,
+		0x1D90, 0x302B0142,
+		0x1D90, 0x302C0141,
+		0x1D90, 0x302D00C8,
+		0x1D90, 0x302E00C7,
+		0x1D90, 0x302F00C6,
+		0x1D90, 0x303000C5,
+		0x1D90, 0x303100C4,
+		0x1D90, 0x303200C3,
+		0x1D90, 0x30330048,
+		0x1D90, 0x30340047,
+		0x1D90, 0x30350046,
+		0x1D90, 0x30360045,
+		0x1D90, 0x30370025,
+		0x1D90, 0x30380024,
+		0x1D90, 0x30390023,
+		0x1D90, 0x303A0022,
+		0x1D90, 0x303B0021,
+		0x1D90, 0x303C0020,
+		0x1D90, 0x303D0003,
+		0x1D90, 0x303E0002,
+		0x1D90, 0x303F0001,
+	0xB0000000,	0x00000000,
+	0x80000015,	0x00000000,	0x40000000,	0x00000000,
+		0x1D90, 0x304001FD,
+		0x1D90, 0x304101FC,
+		0x1D90, 0x304201FB,
+		0x1D90, 0x304301FA,
+		0x1D90, 0x304401F9,
+		0x1D90, 0x304501F8,
+		0x1D90, 0x304601F7,
+		0x1D90, 0x304701F6,
+		0x1D90, 0x304801F5,
+		0x1D90, 0x304901F4,
+		0x1D90, 0x304A01F3,
+		0x1D90, 0x304B01F2,
+		0x1D90, 0x304C01F1,
+		0x1D90, 0x304D01F0,
+		0x1D90, 0x304E01EF,
+		0x1D90, 0x304F00EE,
+		0x1D90, 0x305000ED,
+		0x1D90, 0x305100EC,
+		0x1D90, 0x305200EB,
+		0x1D90, 0x305300EA,
+		0x1D90, 0x305400E9,
+		0x1D90, 0x305500E8,
+		0x1D90, 0x305600E7,
+		0x1D90, 0x305700E6,
+		0x1D90, 0x305800E5,
+		0x1D90, 0x305900E4,
+		0x1D90, 0x305A00E3,
+		0x1D90, 0x305B00C3,
+		0x1D90, 0x305C00C2,
+		0x1D90, 0x305D00A4,
+		0x1D90, 0x305E00A3,
+		0x1D90, 0x305F00A2,
+		0x1D90, 0x306000A1,
+		0x1D90, 0x30610085,
+		0x1D90, 0x30620084,
+		0x1D90, 0x30630083,
+		0x1D90, 0x30640082,
+		0x1D90, 0x30650069,
+		0x1D90, 0x30660068,
+		0x1D90, 0x30670067,
+		0x1D90, 0x30680066,
+		0x1D90, 0x30690065,
+		0x1D90, 0x306A0064,
+		0x1D90, 0x306B0063,
+		0x1D90, 0x306C0043,
+		0x1D90, 0x306D0042,
+		0x1D90, 0x306E0041,
+		0x1D90, 0x306F0025,
+		0x1D90, 0x30700024,
+		0x1D90, 0x30710023,
+		0x1D90, 0x30720022,
+		0x1D90, 0x30730021,
+		0x1D90, 0x30740020,
+		0x1D90, 0x30750004,
+		0x1D90, 0x30760003,
+		0x1D90, 0x30770002,
+		0x1D90, 0x30780001,
+		0x1D90, 0x30790000,
+		0x1D90, 0x307A0000,
+		0x1D90, 0x307B0000,
+		0x1D90, 0x307C0000,
+		0x1D90, 0x307D0000,
+		0x1D90, 0x307E0000,
+		0x1D90, 0x307F0000,
+	0x90000016,	0x00000000,	0x40000000,	0x00000000,
+		0x1D90, 0x304001FD,
+		0x1D90, 0x304101FC,
+		0x1D90, 0x304201FB,
+		0x1D90, 0x304301FA,
+		0x1D90, 0x304401F9,
+		0x1D90, 0x304501F8,
+		0x1D90, 0x304601F7,
+		0x1D90, 0x304701F6,
+		0x1D90, 0x304801F5,
+		0x1D90, 0x304901F4,
+		0x1D90, 0x304A01F3,
+		0x1D90, 0x304B01F2,
+		0x1D90, 0x304C01F1,
+		0x1D90, 0x304D01F0,
+		0x1D90, 0x304E01EF,
+		0x1D90, 0x304F00EE,
+		0x1D90, 0x305000ED,
+		0x1D90, 0x305100EC,
+		0x1D90, 0x305200EB,
+		0x1D90, 0x305300EA,
+		0x1D90, 0x305400E9,
+		0x1D90, 0x305500E8,
+		0x1D90, 0x305600E7,
+		0x1D90, 0x305700E6,
+		0x1D90, 0x305800E5,
+		0x1D90, 0x305900E4,
+		0x1D90, 0x305A00E3,
+		0x1D90, 0x305B00C3,
+		0x1D90, 0x305C00C2,
+		0x1D90, 0x305D00A4,
+		0x1D90, 0x305E00A3,
+		0x1D90, 0x305F00A2,
+		0x1D90, 0x306000A1,
+		0x1D90, 0x30610085,
+		0x1D90, 0x30620084,
+		0x1D90, 0x30630083,
+		0x1D90, 0x30640082,
+		0x1D90, 0x30650069,
+		0x1D90, 0x30660068,
+		0x1D90, 0x30670067,
+		0x1D90, 0x30680066,
+		0x1D90, 0x30690065,
+		0x1D90, 0x306A0064,
+		0x1D90, 0x306B0063,
+		0x1D90, 0x306C0043,
+		0x1D90, 0x306D0042,
+		0x1D90, 0x306E0041,
+		0x1D90, 0x306F0025,
+		0x1D90, 0x30700024,
+		0x1D90, 0x30710023,
+		0x1D90, 0x30720022,
+		0x1D90, 0x30730021,
+		0x1D90, 0x30740020,
+		0x1D90, 0x30750004,
+		0x1D90, 0x30760003,
+		0x1D90, 0x30770002,
+		0x1D90, 0x30780001,
+		0x1D90, 0x30790000,
+		0x1D90, 0x307A0000,
+		0x1D90, 0x307B0000,
+		0x1D90, 0x307C0000,
+		0x1D90, 0x307D0000,
+		0x1D90, 0x307E0000,
+		0x1D90, 0x307F0000,
+	0xA0000000,	0x00000000,
+		0x1D90, 0x3040011F,
+		0x1D90, 0x3041011F,
+		0x1D90, 0x3042011F,
+		0x1D90, 0x3043011F,
+		0x1D90, 0x3044011F,
+		0x1D90, 0x3045011F,
+		0x1D90, 0x3046011F,
+		0x1D90, 0x3047011F,
+		0x1D90, 0x3048011F,
+		0x1D90, 0x3049011F,
+		0x1D90, 0x304A011F,
+		0x1D90, 0x304B011F,
+		0x1D90, 0x304C011F,
+		0x1D90, 0x304D011F,
+		0x1D90, 0x304E011F,
+		0x1D90, 0x304F00F4,
+		0x1D90, 0x305000F3,
+		0x1D90, 0x305100F2,
+		0x1D90, 0x305200F1,
+		0x1D90, 0x305300F0,
+		0x1D90, 0x305400EF,
+		0x1D90, 0x305500EE,
+		0x1D90, 0x305600ED,
+		0x1D90, 0x305700EC,
+		0x1D90, 0x305800EB,
+		0x1D90, 0x305900EA,
+		0x1D90, 0x305A00E9,
+		0x1D90, 0x305B00E8,
+		0x1D90, 0x305C00E7,
+		0x1D90, 0x305D00E6,
+		0x1D90, 0x305E00E4,
+		0x1D90, 0x305F00E3,
+		0x1D90, 0x306000E2,
+		0x1D90, 0x306100C4,
+		0x1D90, 0x306200C3,
+		0x1D90, 0x306300C2,
+		0x1D90, 0x306400A4,
+		0x1D90, 0x306500A3,
+		0x1D90, 0x306600A2,
+		0x1D90, 0x306700A1,
 		0x1D90, 0x30680084,
 		0x1D90, 0x30690083,
 		0x1D90, 0x306A0082,
-		0x1D90, 0x306B0069,
-		0x1D90, 0x306C0068,
+		0x1D90, 0x306B0081,
+		0x1D90, 0x306C0080,
 		0x1D90, 0x306D0067,
 		0x1D90, 0x306E0066,
 		0x1D90, 0x306F0065,
@@ -130,131 +393,395 @@ static const u32 rtw8822c_agc[] = {
 		0x1D90, 0x30720044,
 		0x1D90, 0x30730043,
 		0x1D90, 0x30740042,
-		0x1D90, 0x30750025,
+		0x1D90, 0x30750041,
 		0x1D90, 0x30760024,
 		0x1D90, 0x30770023,
 		0x1D90, 0x30780022,
 		0x1D90, 0x30790021,
 		0x1D90, 0x307A0020,
-		0x1D90, 0x307B0003,
-		0x1D90, 0x307C0002,
-		0x1D90, 0x307D0001,
-		0x1D90, 0x307E0000,
+		0x1D90, 0x307B0004,
+		0x1D90, 0x307C0003,
+		0x1D90, 0x307D0002,
+		0x1D90, 0x307E0001,
 		0x1D90, 0x307F0000,
+	0xB0000000,	0x00000000,
+	0x80000015,	0x00000000,	0x40000000,	0x00000000,
+		0x1D90, 0x308000FA,
+		0x1D90, 0x308100F9,
+		0x1D90, 0x308200F8,
+		0x1D90, 0x308300F7,
+		0x1D90, 0x308400F6,
+		0x1D90, 0x308500F5,
+		0x1D90, 0x308600F4,
+		0x1D90, 0x308700F3,
+		0x1D90, 0x308800F2,
+		0x1D90, 0x308900F1,
+		0x1D90, 0x308A00F0,
+		0x1D90, 0x308B00EF,
+		0x1D90, 0x308C00EE,
+		0x1D90, 0x308D00ED,
+		0x1D90, 0x308E00EC,
+		0x1D90, 0x308F00EB,
+		0x1D90, 0x309000EA,
+		0x1D90, 0x309100E8,
+		0x1D90, 0x309200E7,
+		0x1D90, 0x309300E6,
+		0x1D90, 0x309400E5,
+		0x1D90, 0x309500E4,
+		0x1D90, 0x309600C4,
+		0x1D90, 0x309700C3,
+		0x1D90, 0x309800C2,
+		0x1D90, 0x309900C1,
+		0x1D90, 0x309A00A3,
+		0x1D90, 0x309B00A2,
+		0x1D90, 0x309C00A1,
+		0x1D90, 0x309D0085,
+		0x1D90, 0x309E0084,
+		0x1D90, 0x309F0083,
+		0x1D90, 0x30A00082,
+		0x1D90, 0x30A10081,
+		0x1D90, 0x30A20067,
+		0x1D90, 0x30A30066,
+		0x1D90, 0x30A40065,
+		0x1D90, 0x30A50064,
+		0x1D90, 0x30A60063,
+		0x1D90, 0x30A70044,
+		0x1D90, 0x30A80043,
+		0x1D90, 0x30A90042,
+		0x1D90, 0x30AA0026,
+		0x1D90, 0x30AB0025,
+		0x1D90, 0x30AC0024,
+		0x1D90, 0x30AD0023,
+		0x1D90, 0x30AE0022,
+		0x1D90, 0x30AF0021,
+		0x1D90, 0x30B00005,
+		0x1D90, 0x30B10004,
+		0x1D90, 0x30B20003,
+		0x1D90, 0x30B30002,
+		0x1D90, 0x30B40001,
+		0x1D90, 0x30B50000,
+		0x1D90, 0x30B60000,
+		0x1D90, 0x30B70000,
+		0x1D90, 0x30B80000,
+		0x1D90, 0x30B90000,
+		0x1D90, 0x30BA0000,
+		0x1D90, 0x30BB0000,
+		0x1D90, 0x30BC0000,
+		0x1D90, 0x30BD0000,
+		0x1D90, 0x30BE0000,
+		0x1D90, 0x30BF0000,
+	0x90000016,	0x00000000,	0x40000000,	0x00000000,
+		0x1D90, 0x308000FA,
+		0x1D90, 0x308100F9,
+		0x1D90, 0x308200F8,
+		0x1D90, 0x308300F7,
+		0x1D90, 0x308400F6,
+		0x1D90, 0x308500F5,
+		0x1D90, 0x308600F4,
+		0x1D90, 0x308700F3,
+		0x1D90, 0x308800F2,
+		0x1D90, 0x308900F1,
+		0x1D90, 0x308A00F0,
+		0x1D90, 0x308B00EF,
+		0x1D90, 0x308C00EE,
+		0x1D90, 0x308D00ED,
+		0x1D90, 0x308E00EC,
+		0x1D90, 0x308F00EB,
+		0x1D90, 0x309000EA,
+		0x1D90, 0x309100E8,
+		0x1D90, 0x309200E7,
+		0x1D90, 0x309300E6,
+		0x1D90, 0x309400E5,
+		0x1D90, 0x309500E4,
+		0x1D90, 0x309600C4,
+		0x1D90, 0x309700C3,
+		0x1D90, 0x309800C2,
+		0x1D90, 0x309900C1,
+		0x1D90, 0x309A00A3,
+		0x1D90, 0x309B00A2,
+		0x1D90, 0x309C00A1,
+		0x1D90, 0x309D0085,
+		0x1D90, 0x309E0084,
+		0x1D90, 0x309F0083,
+		0x1D90, 0x30A00082,
+		0x1D90, 0x30A10081,
+		0x1D90, 0x30A20067,
+		0x1D90, 0x30A30066,
+		0x1D90, 0x30A40065,
+		0x1D90, 0x30A50064,
+		0x1D90, 0x30A60063,
+		0x1D90, 0x30A70044,
+		0x1D90, 0x30A80043,
+		0x1D90, 0x30A90042,
+		0x1D90, 0x30AA0026,
+		0x1D90, 0x30AB0025,
+		0x1D90, 0x30AC0024,
+		0x1D90, 0x30AD0023,
+		0x1D90, 0x30AE0022,
+		0x1D90, 0x30AF0021,
+		0x1D90, 0x30B00005,
+		0x1D90, 0x30B10004,
+		0x1D90, 0x30B20003,
+		0x1D90, 0x30B30002,
+		0x1D90, 0x30B40001,
+		0x1D90, 0x30B50000,
+		0x1D90, 0x30B60000,
+		0x1D90, 0x30B70000,
+		0x1D90, 0x30B80000,
+		0x1D90, 0x30B90000,
+		0x1D90, 0x30BA0000,
+		0x1D90, 0x30BB0000,
+		0x1D90, 0x30BC0000,
+		0x1D90, 0x30BD0000,
+		0x1D90, 0x30BE0000,
+		0x1D90, 0x30BF0000,
+	0xA0000000,	0x00000000,
 		0x1D90, 0x308000FF,
 		0x1D90, 0x308100FF,
 		0x1D90, 0x308200FF,
 		0x1D90, 0x308300FF,
-		0x1D90, 0x308400FE,
-		0x1D90, 0x308500FD,
-		0x1D90, 0x308600FC,
-		0x1D90, 0x308700FB,
-		0x1D90, 0x308800FA,
-		0x1D90, 0x308900F9,
-		0x1D90, 0x308A00F8,
-		0x1D90, 0x308B00F7,
-		0x1D90, 0x308C00F6,
-		0x1D90, 0x308D00F5,
-		0x1D90, 0x308E00F4,
-		0x1D90, 0x308F00F3,
-		0x1D90, 0x309000F2,
-		0x1D90, 0x309100F1,
-		0x1D90, 0x309200F0,
-		0x1D90, 0x309300EF,
-		0x1D90, 0x309400EE,
-		0x1D90, 0x309500ED,
-		0x1D90, 0x309600EC,
-		0x1D90, 0x309700EB,
-		0x1D90, 0x309800EA,
-		0x1D90, 0x309900E9,
-		0x1D90, 0x309A00E8,
-		0x1D90, 0x309B00E7,
-		0x1D90, 0x309C00E6,
-		0x1D90, 0x309D00C7,
-		0x1D90, 0x309E00C6,
-		0x1D90, 0x309F00C5,
+		0x1D90, 0x308400FF,
+		0x1D90, 0x308500FF,
+		0x1D90, 0x308600FE,
+		0x1D90, 0x308700FD,
+		0x1D90, 0x308800FC,
+		0x1D90, 0x308900FB,
+		0x1D90, 0x308A00FA,
+		0x1D90, 0x308B00F9,
+		0x1D90, 0x308C00F8,
+		0x1D90, 0x308D00F7,
+		0x1D90, 0x308E00F6,
+		0x1D90, 0x308F00F5,
+		0x1D90, 0x309000F4,
+		0x1D90, 0x309100F3,
+		0x1D90, 0x309200F2,
+		0x1D90, 0x309300F1,
+		0x1D90, 0x309400F0,
+		0x1D90, 0x309500EF,
+		0x1D90, 0x309600EE,
+		0x1D90, 0x309700ED,
+		0x1D90, 0x309800EC,
+		0x1D90, 0x309900EB,
+		0x1D90, 0x309A00EA,
+		0x1D90, 0x309B00E8,
+		0x1D90, 0x309C00E7,
+		0x1D90, 0x309D00E6,
+		0x1D90, 0x309E00E5,
+		0x1D90, 0x309F00E4,
 		0x1D90, 0x30A000C4,
 		0x1D90, 0x30A100C3,
 		0x1D90, 0x30A200C2,
-		0x1D90, 0x30A300A4,
+		0x1D90, 0x30A300C1,
 		0x1D90, 0x30A400A3,
 		0x1D90, 0x30A500A2,
-		0x1D90, 0x30A60086,
+		0x1D90, 0x30A600A1,
 		0x1D90, 0x30A70085,
 		0x1D90, 0x30A80084,
 		0x1D90, 0x30A90083,
 		0x1D90, 0x30AA0082,
-		0x1D90, 0x30AB0069,
-		0x1D90, 0x30AC0068,
-		0x1D90, 0x30AD0067,
-		0x1D90, 0x30AE0066,
-		0x1D90, 0x30AF0065,
-		0x1D90, 0x30B00064,
-		0x1D90, 0x30B10063,
-		0x1D90, 0x30B20044,
-		0x1D90, 0x30B30043,
-		0x1D90, 0x30B40042,
+		0x1D90, 0x30AB0081,
+		0x1D90, 0x30AC0067,
+		0x1D90, 0x30AD0066,
+		0x1D90, 0x30AE0065,
+		0x1D90, 0x30AF0064,
+		0x1D90, 0x30B00063,
+		0x1D90, 0x30B10044,
+		0x1D90, 0x30B20043,
+		0x1D90, 0x30B30042,
+		0x1D90, 0x30B40026,
 		0x1D90, 0x30B50025,
 		0x1D90, 0x30B60024,
 		0x1D90, 0x30B70023,
 		0x1D90, 0x30B80022,
 		0x1D90, 0x30B90021,
-		0x1D90, 0x30BA0020,
-		0x1D90, 0x30BB0003,
-		0x1D90, 0x30BC0002,
-		0x1D90, 0x30BD0001,
-		0x1D90, 0x30BE0000,
+		0x1D90, 0x30BA0005,
+		0x1D90, 0x30BB0004,
+		0x1D90, 0x30BC0003,
+		0x1D90, 0x30BD0002,
+		0x1D90, 0x30BE0001,
 		0x1D90, 0x30BF0000,
+	0xB0000000,	0x00000000,
+	0x80000015,	0x00000000,	0x40000000,	0x00000000,
+		0x1D90, 0x30C000F8,
+		0x1D90, 0x30C100F7,
+		0x1D90, 0x30C200F6,
+		0x1D90, 0x30C300F5,
+		0x1D90, 0x30C400F4,
+		0x1D90, 0x30C500F3,
+		0x1D90, 0x30C600F2,
+		0x1D90, 0x30C700F1,
+		0x1D90, 0x30C800F0,
+		0x1D90, 0x30C900EF,
+		0x1D90, 0x30CA00EE,
+		0x1D90, 0x30CB00ED,
+		0x1D90, 0x30CC00EC,
+		0x1D90, 0x30CD00EB,
+		0x1D90, 0x30CE00EA,
+		0x1D90, 0x30CF00E8,
+		0x1D90, 0x30D000E7,
+		0x1D90, 0x30D100E6,
+		0x1D90, 0x30D200E5,
+		0x1D90, 0x30D300E4,
+		0x1D90, 0x30D400E3,
+		0x1D90, 0x30D500E2,
+		0x1D90, 0x30D600A6,
+		0x1D90, 0x30D700A5,
+		0x1D90, 0x30D800A4,
+		0x1D90, 0x30D900A3,
+		0x1D90, 0x30DA00A2,
+		0x1D90, 0x30DB0086,
+		0x1D90, 0x30DC0085,
+		0x1D90, 0x30DD0084,
+		0x1D90, 0x30DE0083,
+		0x1D90, 0x30DF0081,
+		0x1D90, 0x30E00068,
+		0x1D90, 0x30E10067,
+		0x1D90, 0x30E20066,
+		0x1D90, 0x30E30065,
+		0x1D90, 0x30E40064,
+		0x1D90, 0x30E50045,
+		0x1D90, 0x30E60044,
+		0x1D90, 0x30E70043,
+		0x1D90, 0x30E80042,
+		0x1D90, 0x30E90025,
+		0x1D90, 0x30EA0024,
+		0x1D90, 0x30EB0023,
+		0x1D90, 0x30EC0022,
+		0x1D90, 0x30ED0021,
+		0x1D90, 0x30EE0005,
+		0x1D90, 0x30EF0004,
+		0x1D90, 0x30F00003,
+		0x1D90, 0x30F10002,
+		0x1D90, 0x30F20001,
+		0x1D90, 0x30F30000,
+		0x1D90, 0x30F40000,
+		0x1D90, 0x30F50000,
+		0x1D90, 0x30F60000,
+		0x1D90, 0x30F70000,
+		0x1D90, 0x30F80000,
+		0x1D90, 0x30F90000,
+		0x1D90, 0x30FA0000,
+		0x1D90, 0x30FB0000,
+		0x1D90, 0x30FC0000,
+		0x1D90, 0x30FD0000,
+		0x1D90, 0x30FE0000,
+		0x1D90, 0x30FF0000,
+	0x90000016,	0x00000000,	0x40000000,	0x00000000,
+		0x1D90, 0x30C000F8,
+		0x1D90, 0x30C100F7,
+		0x1D90, 0x30C200F6,
+		0x1D90, 0x30C300F5,
+		0x1D90, 0x30C400F4,
+		0x1D90, 0x30C500F3,
+		0x1D90, 0x30C600F2,
+		0x1D90, 0x30C700F1,
+		0x1D90, 0x30C800F0,
+		0x1D90, 0x30C900EF,
+		0x1D90, 0x30CA00EE,
+		0x1D90, 0x30CB00ED,
+		0x1D90, 0x30CC00EC,
+		0x1D90, 0x30CD00EB,
+		0x1D90, 0x30CE00EA,
+		0x1D90, 0x30CF00E8,
+		0x1D90, 0x30D000E7,
+		0x1D90, 0x30D100E6,
+		0x1D90, 0x30D200E5,
+		0x1D90, 0x30D300E4,
+		0x1D90, 0x30D400E3,
+		0x1D90, 0x30D500E2,
+		0x1D90, 0x30D600A6,
+		0x1D90, 0x30D700A5,
+		0x1D90, 0x30D800A4,
+		0x1D90, 0x30D900A3,
+		0x1D90, 0x30DA00A2,
+		0x1D90, 0x30DB0086,
+		0x1D90, 0x30DC0085,
+		0x1D90, 0x30DD0084,
+		0x1D90, 0x30DE0083,
+		0x1D90, 0x30DF0081,
+		0x1D90, 0x30E00068,
+		0x1D90, 0x30E10067,
+		0x1D90, 0x30E20066,
+		0x1D90, 0x30E30065,
+		0x1D90, 0x30E40064,
+		0x1D90, 0x30E50045,
+		0x1D90, 0x30E60044,
+		0x1D90, 0x30E70043,
+		0x1D90, 0x30E80042,
+		0x1D90, 0x30E90025,
+		0x1D90, 0x30EA0024,
+		0x1D90, 0x30EB0023,
+		0x1D90, 0x30EC0022,
+		0x1D90, 0x30ED0021,
+		0x1D90, 0x30EE0005,
+		0x1D90, 0x30EF0004,
+		0x1D90, 0x30F00003,
+		0x1D90, 0x30F10002,
+		0x1D90, 0x30F20001,
+		0x1D90, 0x30F30000,
+		0x1D90, 0x30F40000,
+		0x1D90, 0x30F50000,
+		0x1D90, 0x30F60000,
+		0x1D90, 0x30F70000,
+		0x1D90, 0x30F80000,
+		0x1D90, 0x30F90000,
+		0x1D90, 0x30FA0000,
+		0x1D90, 0x30FB0000,
+		0x1D90, 0x30FC0000,
+		0x1D90, 0x30FD0000,
+		0x1D90, 0x30FE0000,
+		0x1D90, 0x30FF0000,
+	0xA0000000,	0x00000000,
 		0x1D90, 0x30C000FF,
 		0x1D90, 0x30C100FF,
 		0x1D90, 0x30C200FF,
 		0x1D90, 0x30C300FF,
-		0x1D90, 0x30C400FE,
-		0x1D90, 0x30C500FD,
-		0x1D90, 0x30C600FC,
-		0x1D90, 0x30C700FB,
-		0x1D90, 0x30C800FA,
-		0x1D90, 0x30C900F9,
-		0x1D90, 0x30CA00F8,
-		0x1D90, 0x30CB00F7,
-		0x1D90, 0x30CC00F6,
-		0x1D90, 0x30CD00F5,
-		0x1D90, 0x30CE00F4,
-		0x1D90, 0x30CF00F3,
-		0x1D90, 0x30D000F2,
-		0x1D90, 0x30D100F1,
-		0x1D90, 0x30D200F0,
-		0x1D90, 0x30D300EF,
-		0x1D90, 0x30D400EE,
-		0x1D90, 0x30D500ED,
-		0x1D90, 0x30D600EC,
-		0x1D90, 0x30D700EB,
-		0x1D90, 0x30D800EA,
-		0x1D90, 0x30D900E9,
-		0x1D90, 0x30DA00E8,
-		0x1D90, 0x30DB00E7,
-		0x1D90, 0x30DC00E6,
-		0x1D90, 0x30DD00C7,
-		0x1D90, 0x30DE00C6,
-		0x1D90, 0x30DF00C5,
-		0x1D90, 0x30E000C4,
-		0x1D90, 0x30E100C3,
-		0x1D90, 0x30E200C2,
-		0x1D90, 0x30E300A4,
-		0x1D90, 0x30E400A3,
-		0x1D90, 0x30E500A2,
-		0x1D90, 0x30E60086,
-		0x1D90, 0x30E70085,
-		0x1D90, 0x30E80084,
-		0x1D90, 0x30E90083,
-		0x1D90, 0x30EA0082,
-		0x1D90, 0x30EB0069,
-		0x1D90, 0x30EC0068,
-		0x1D90, 0x30ED0067,
-		0x1D90, 0x30EE0066,
-		0x1D90, 0x30EF0065,
-		0x1D90, 0x30F00064,
-		0x1D90, 0x30F10063,
+		0x1D90, 0x30C400FF,
+		0x1D90, 0x30C500FF,
+		0x1D90, 0x30C600FE,
+		0x1D90, 0x30C700FD,
+		0x1D90, 0x30C800FC,
+		0x1D90, 0x30C900FB,
+		0x1D90, 0x30CA00FA,
+		0x1D90, 0x30CB00F9,
+		0x1D90, 0x30CC00F8,
+		0x1D90, 0x30CD00F7,
+		0x1D90, 0x30CE00F6,
+		0x1D90, 0x30CF00F5,
+		0x1D90, 0x30D000F4,
+		0x1D90, 0x30D100F3,
+		0x1D90, 0x30D200F2,
+		0x1D90, 0x30D300F1,
+		0x1D90, 0x30D400F0,
+		0x1D90, 0x30D500EF,
+		0x1D90, 0x30D600EE,
+		0x1D90, 0x30D700ED,
+		0x1D90, 0x30D800EC,
+		0x1D90, 0x30D900EB,
+		0x1D90, 0x30DA00EA,
+		0x1D90, 0x30DB00E8,
+		0x1D90, 0x30DC00E7,
+		0x1D90, 0x30DD00E6,
+		0x1D90, 0x30DE00E5,
+		0x1D90, 0x30DF00E4,
+		0x1D90, 0x30E000E3,
+		0x1D90, 0x30E100E2,
+		0x1D90, 0x30E200A6,
+		0x1D90, 0x30E300A5,
+		0x1D90, 0x30E400A4,
+		0x1D90, 0x30E500A3,
+		0x1D90, 0x30E600A2,
+		0x1D90, 0x30E70086,
+		0x1D90, 0x30E80085,
+		0x1D90, 0x30E90084,
+		0x1D90, 0x30EA0083,
+		0x1D90, 0x30EB0082,
+		0x1D90, 0x30EC0067,
+		0x1D90, 0x30ED0066,
+		0x1D90, 0x30EE0065,
+		0x1D90, 0x30EF0064,
+		0x1D90, 0x30F00063,
+		0x1D90, 0x30F10045,
 		0x1D90, 0x30F20044,
 		0x1D90, 0x30F30043,
 		0x1D90, 0x30F40042,
@@ -263,12 +790,14 @@ static const u32 rtw8822c_agc[] = {
 		0x1D90, 0x30F70023,
 		0x1D90, 0x30F80022,
 		0x1D90, 0x30F90021,
-		0x1D90, 0x30FA0020,
-		0x1D90, 0x30FB0003,
-		0x1D90, 0x30FC0002,
-		0x1D90, 0x30FD0001,
-		0x1D90, 0x30FE0000,
+		0x1D90, 0x30FA0005,
+		0x1D90, 0x30FB0004,
+		0x1D90, 0x30FC0003,
+		0x1D90, 0x30FD0002,
+		0x1D90, 0x30FE0001,
 		0x1D90, 0x30FF0000,
+	0xB0000000,	0x00000000,
+	0x80000015,	0x00000000,	0x40000000,	0x00000000,
 		0x1D90, 0x310001FF,
 		0x1D90, 0x310101FF,
 		0x1D90, 0x310201FF,
@@ -333,111 +862,505 @@ static const u32 rtw8822c_agc[] = {
 		0x1D90, 0x313D0045,
 		0x1D90, 0x313E0044,
 		0x1D90, 0x313F0043,
-		0x1D90, 0x314001FF,
-		0x1D90, 0x314101FF,
-		0x1D90, 0x314201FF,
-		0x1D90, 0x314301FF,
-		0x1D90, 0x314401FF,
-		0x1D90, 0x314501FF,
-		0x1D90, 0x314601FF,
-		0x1D90, 0x314701FE,
-		0x1D90, 0x314801FD,
-		0x1D90, 0x314901FC,
-		0x1D90, 0x314A01FB,
-		0x1D90, 0x314B01FA,
-		0x1D90, 0x314C01F9,
-		0x1D90, 0x314D01F8,
-		0x1D90, 0x314E01F7,
-		0x1D90, 0x314F01F6,
-		0x1D90, 0x315001F5,
-		0x1D90, 0x315101F4,
-		0x1D90, 0x315201F3,
-		0x1D90, 0x315301F2,
-		0x1D90, 0x315401F1,
-		0x1D90, 0x315501F0,
-		0x1D90, 0x315601EF,
-		0x1D90, 0x315701EE,
-		0x1D90, 0x315801ED,
-		0x1D90, 0x315901EC,
-		0x1D90, 0x315A01EB,
-		0x1D90, 0x315B01EA,
-		0x1D90, 0x315C01E9,
-		0x1D90, 0x315D018F,
-		0x1D90, 0x315E018E,
-		0x1D90, 0x315F018D,
-		0x1D90, 0x3160018C,
-		0x1D90, 0x3161018B,
-		0x1D90, 0x3162018A,
-		0x1D90, 0x31630189,
-		0x1D90, 0x31640188,
-		0x1D90, 0x31650187,
-		0x1D90, 0x31660186,
-		0x1D90, 0x31670185,
-		0x1D90, 0x31680184,
-		0x1D90, 0x31690183,
-		0x1D90, 0x316A0182,
-		0x1D90, 0x316B0149,
-		0x1D90, 0x316C0148,
-		0x1D90, 0x316D0147,
-		0x1D90, 0x316E0146,
-		0x1D90, 0x316F0145,
-		0x1D90, 0x31700144,
-		0x1D90, 0x31710143,
-		0x1D90, 0x31720142,
-		0x1D90, 0x31730141,
-		0x1D90, 0x31740140,
-		0x1D90, 0x317500C7,
-		0x1D90, 0x317600C6,
-		0x1D90, 0x317700C5,
-		0x1D90, 0x317800C4,
-		0x1D90, 0x317900C3,
-		0x1D90, 0x317A0088,
-		0x1D90, 0x317B0087,
-		0x1D90, 0x317C0086,
-		0x1D90, 0x317D0045,
-		0x1D90, 0x317E0044,
-		0x1D90, 0x317F0043,
-		0x1D90, 0x318001FE,
-		0x1D90, 0x318101FD,
-		0x1D90, 0x318201FC,
-		0x1D90, 0x318301FB,
-		0x1D90, 0x318401FA,
-		0x1D90, 0x318501F9,
-		0x1D90, 0x318601F8,
-		0x1D90, 0x318701F7,
-		0x1D90, 0x318801F6,
-		0x1D90, 0x318901F5,
-		0x1D90, 0x318A01F4,
-		0x1D90, 0x318B01F3,
-		0x1D90, 0x318C01F2,
-		0x1D90, 0x318D01F1,
-		0x1D90, 0x318E01F0,
-		0x1D90, 0x318F01EF,
-		0x1D90, 0x319001EE,
-		0x1D90, 0x319101ED,
-		0x1D90, 0x319201EC,
-		0x1D90, 0x319301EB,
-		0x1D90, 0x319401EA,
-		0x1D90, 0x319501E9,
-		0x1D90, 0x319601E7,
-		0x1D90, 0x319701E6,
-		0x1D90, 0x319801E5,
-		0x1D90, 0x319901E4,
-		0x1D90, 0x319A01A8,
-		0x1D90, 0x319B01A7,
-		0x1D90, 0x319C01A6,
-		0x1D90, 0x319D01A5,
-		0x1D90, 0x319E0185,
-		0x1D90, 0x319F0184,
-		0x1D90, 0x31A00183,
-		0x1D90, 0x31A10182,
-		0x1D90, 0x31A20149,
-		0x1D90, 0x31A30148,
-		0x1D90, 0x31A40147,
-		0x1D90, 0x31A50145,
-		0x1D90, 0x31A60144,
-		0x1D90, 0x31A70143,
-		0x1D90, 0x31A80142,
+	0x90000016,	0x00000000,	0x40000000,	0x00000000,
+		0x1D90, 0x310001FF,
+		0x1D90, 0x310101FF,
+		0x1D90, 0x310201FF,
+		0x1D90, 0x310301FF,
+		0x1D90, 0x310401FF,
+		0x1D90, 0x310501FF,
+		0x1D90, 0x310601FF,
+		0x1D90, 0x310701FF,
+		0x1D90, 0x310801FF,
+		0x1D90, 0x310901FE,
+		0x1D90, 0x310A01FD,
+		0x1D90, 0x310B01FC,
+		0x1D90, 0x310C01FB,
+		0x1D90, 0x310D01FA,
+		0x1D90, 0x310E01F9,
+		0x1D90, 0x310F01F8,
+		0x1D90, 0x311001F7,
+		0x1D90, 0x311101F6,
+		0x1D90, 0x311201F5,
+		0x1D90, 0x311301F4,
+		0x1D90, 0x311401F3,
+		0x1D90, 0x311501F2,
+		0x1D90, 0x311601F1,
+		0x1D90, 0x311701F0,
+		0x1D90, 0x311801EF,
+		0x1D90, 0x311901EE,
+		0x1D90, 0x311A01ED,
+		0x1D90, 0x311B01EC,
+		0x1D90, 0x311C01EB,
+		0x1D90, 0x311D0192,
+		0x1D90, 0x311E0191,
+		0x1D90, 0x311F0190,
+		0x1D90, 0x3120018F,
+		0x1D90, 0x3121018E,
+		0x1D90, 0x3122018D,
+		0x1D90, 0x3123018C,
+		0x1D90, 0x3124018B,
+		0x1D90, 0x3125018A,
+		0x1D90, 0x31260189,
+		0x1D90, 0x31270188,
+		0x1D90, 0x31280187,
+		0x1D90, 0x31290186,
+		0x1D90, 0x312A0185,
+		0x1D90, 0x312B0149,
+		0x1D90, 0x312C0148,
+		0x1D90, 0x312D0147,
+		0x1D90, 0x312E0146,
+		0x1D90, 0x312F0145,
+		0x1D90, 0x31300144,
+		0x1D90, 0x31310143,
+		0x1D90, 0x31320142,
+		0x1D90, 0x31330141,
+		0x1D90, 0x31340140,
+		0x1D90, 0x313500C7,
+		0x1D90, 0x313600C6,
+		0x1D90, 0x313700C5,
+		0x1D90, 0x313800C4,
+		0x1D90, 0x313900C3,
+		0x1D90, 0x313A0088,
+		0x1D90, 0x313B0087,
+		0x1D90, 0x313C0086,
+		0x1D90, 0x313D0045,
+		0x1D90, 0x313E0044,
+		0x1D90, 0x313F0043,
+	0xA0000000,	0x00000000,
+		0x1D90, 0x310001FF,
+		0x1D90, 0x310101FF,
+		0x1D90, 0x310201FF,
+		0x1D90, 0x310301FF,
+		0x1D90, 0x310401FF,
+		0x1D90, 0x310501FF,
+		0x1D90, 0x310601FF,
+		0x1D90, 0x310701FF,
+		0x1D90, 0x310801FF,
+		0x1D90, 0x310901FE,
+		0x1D90, 0x310A01FD,
+		0x1D90, 0x310B01FC,
+		0x1D90, 0x310C01FB,
+		0x1D90, 0x310D01FA,
+		0x1D90, 0x310E01F9,
+		0x1D90, 0x310F01F8,
+		0x1D90, 0x311001F7,
+		0x1D90, 0x311101F6,
+		0x1D90, 0x311201F5,
+		0x1D90, 0x311301F4,
+		0x1D90, 0x311401F3,
+		0x1D90, 0x311501F2,
+		0x1D90, 0x311601F1,
+		0x1D90, 0x311701F0,
+		0x1D90, 0x311801EF,
+		0x1D90, 0x311901EE,
+		0x1D90, 0x311A01ED,
+		0x1D90, 0x311B01EC,
+		0x1D90, 0x311C01EB,
+		0x1D90, 0x311D0192,
+		0x1D90, 0x311E0191,
+		0x1D90, 0x311F0190,
+		0x1D90, 0x3120018F,
+		0x1D90, 0x3121018E,
+		0x1D90, 0x3122018D,
+		0x1D90, 0x3123018C,
+		0x1D90, 0x3124018B,
+		0x1D90, 0x3125018A,
+		0x1D90, 0x31260189,
+		0x1D90, 0x31270188,
+		0x1D90, 0x31280187,
+		0x1D90, 0x31290186,
+		0x1D90, 0x312A0185,
+		0x1D90, 0x312B0149,
+		0x1D90, 0x312C0148,
+		0x1D90, 0x312D0147,
+		0x1D90, 0x312E0146,
+		0x1D90, 0x312F0145,
+		0x1D90, 0x31300144,
+		0x1D90, 0x31310143,
+		0x1D90, 0x31320142,
+		0x1D90, 0x31330141,
+		0x1D90, 0x31340140,
+		0x1D90, 0x313500C7,
+		0x1D90, 0x313600C6,
+		0x1D90, 0x313700C5,
+		0x1D90, 0x313800C4,
+		0x1D90, 0x313900C3,
+		0x1D90, 0x313A0088,
+		0x1D90, 0x313B0087,
+		0x1D90, 0x313C0086,
+		0x1D90, 0x313D0045,
+		0x1D90, 0x313E0044,
+		0x1D90, 0x313F0043,
+	0xB0000000,	0x00000000,
+	0x80000015,	0x00000000,	0x40000000,	0x00000000,
+		0x1D90, 0x314001FF,
+		0x1D90, 0x314101FF,
+		0x1D90, 0x314201FF,
+		0x1D90, 0x314301FF,
+		0x1D90, 0x314401FF,
+		0x1D90, 0x314501FF,
+		0x1D90, 0x314601FF,
+		0x1D90, 0x314701FE,
+		0x1D90, 0x314801FD,
+		0x1D90, 0x314901FC,
+		0x1D90, 0x314A01FB,
+		0x1D90, 0x314B01FA,
+		0x1D90, 0x314C01F9,
+		0x1D90, 0x314D01F8,
+		0x1D90, 0x314E01F7,
+		0x1D90, 0x314F01F6,
+		0x1D90, 0x315001F5,
+		0x1D90, 0x315101F4,
+		0x1D90, 0x315201F3,
+		0x1D90, 0x315301F2,
+		0x1D90, 0x315401F1,
+		0x1D90, 0x315501F0,
+		0x1D90, 0x315601EF,
+		0x1D90, 0x315701EE,
+		0x1D90, 0x315801ED,
+		0x1D90, 0x315901EC,
+		0x1D90, 0x315A01EB,
+		0x1D90, 0x315B01EA,
+		0x1D90, 0x315C01E9,
+		0x1D90, 0x315D018F,
+		0x1D90, 0x315E018E,
+		0x1D90, 0x315F018D,
+		0x1D90, 0x3160018C,
+		0x1D90, 0x3161018B,
+		0x1D90, 0x3162018A,
+		0x1D90, 0x31630189,
+		0x1D90, 0x31640188,
+		0x1D90, 0x31650187,
+		0x1D90, 0x31660186,
+		0x1D90, 0x31670185,
+		0x1D90, 0x31680184,
+		0x1D90, 0x31690183,
+		0x1D90, 0x316A0182,
+		0x1D90, 0x316B0149,
+		0x1D90, 0x316C0148,
+		0x1D90, 0x316D0147,
+		0x1D90, 0x316E0146,
+		0x1D90, 0x316F0145,
+		0x1D90, 0x31700144,
+		0x1D90, 0x31710143,
+		0x1D90, 0x31720142,
+		0x1D90, 0x31730141,
+		0x1D90, 0x31740140,
+		0x1D90, 0x317500C7,
+		0x1D90, 0x317600C6,
+		0x1D90, 0x317700C5,
+		0x1D90, 0x317800C4,
+		0x1D90, 0x317900C3,
+		0x1D90, 0x317A0088,
+		0x1D90, 0x317B0087,
+		0x1D90, 0x317C0086,
+		0x1D90, 0x317D0045,
+		0x1D90, 0x317E0044,
+		0x1D90, 0x317F0043,
+	0x90000016,	0x00000000,	0x40000000,	0x00000000,
+		0x1D90, 0x314001FF,
+		0x1D90, 0x314101FF,
+		0x1D90, 0x314201FF,
+		0x1D90, 0x314301FF,
+		0x1D90, 0x314401FF,
+		0x1D90, 0x314501FF,
+		0x1D90, 0x314601FF,
+		0x1D90, 0x314701FE,
+		0x1D90, 0x314801FD,
+		0x1D90, 0x314901FC,
+		0x1D90, 0x314A01FB,
+		0x1D90, 0x314B01FA,
+		0x1D90, 0x314C01F9,
+		0x1D90, 0x314D01F8,
+		0x1D90, 0x314E01F7,
+		0x1D90, 0x314F01F6,
+		0x1D90, 0x315001F5,
+		0x1D90, 0x315101F4,
+		0x1D90, 0x315201F3,
+		0x1D90, 0x315301F2,
+		0x1D90, 0x315401F1,
+		0x1D90, 0x315501F0,
+		0x1D90, 0x315601EF,
+		0x1D90, 0x315701EE,
+		0x1D90, 0x315801ED,
+		0x1D90, 0x315901EC,
+		0x1D90, 0x315A01EB,
+		0x1D90, 0x315B01EA,
+		0x1D90, 0x315C01E9,
+		0x1D90, 0x315D018F,
+		0x1D90, 0x315E018E,
+		0x1D90, 0x315F018D,
+		0x1D90, 0x3160018C,
+		0x1D90, 0x3161018B,
+		0x1D90, 0x3162018A,
+		0x1D90, 0x31630189,
+		0x1D90, 0x31640188,
+		0x1D90, 0x31650187,
+		0x1D90, 0x31660186,
+		0x1D90, 0x31670185,
+		0x1D90, 0x31680184,
+		0x1D90, 0x31690183,
+		0x1D90, 0x316A0182,
+		0x1D90, 0x316B0149,
+		0x1D90, 0x316C0148,
+		0x1D90, 0x316D0147,
+		0x1D90, 0x316E0146,
+		0x1D90, 0x316F0145,
+		0x1D90, 0x31700144,
+		0x1D90, 0x31710143,
+		0x1D90, 0x31720142,
+		0x1D90, 0x31730141,
+		0x1D90, 0x31740140,
+		0x1D90, 0x317500C7,
+		0x1D90, 0x317600C6,
+		0x1D90, 0x317700C5,
+		0x1D90, 0x317800C4,
+		0x1D90, 0x317900C3,
+		0x1D90, 0x317A0088,
+		0x1D90, 0x317B0087,
+		0x1D90, 0x317C0086,
+		0x1D90, 0x317D0045,
+		0x1D90, 0x317E0044,
+		0x1D90, 0x317F0043,
+	0xA0000000,	0x00000000,
+		0x1D90, 0x314001FF,
+		0x1D90, 0x314101FF,
+		0x1D90, 0x314201FF,
+		0x1D90, 0x314301FF,
+		0x1D90, 0x314401FF,
+		0x1D90, 0x314501FF,
+		0x1D90, 0x314601FF,
+		0x1D90, 0x314701FE,
+		0x1D90, 0x314801FD,
+		0x1D90, 0x314901FC,
+		0x1D90, 0x314A01FB,
+		0x1D90, 0x314B01FA,
+		0x1D90, 0x314C01F9,
+		0x1D90, 0x314D01F8,
+		0x1D90, 0x314E01F7,
+		0x1D90, 0x314F01F6,
+		0x1D90, 0x315001F5,
+		0x1D90, 0x315101F4,
+		0x1D90, 0x315201F3,
+		0x1D90, 0x315301F2,
+		0x1D90, 0x315401F1,
+		0x1D90, 0x315501F0,
+		0x1D90, 0x315601EF,
+		0x1D90, 0x315701EE,
+		0x1D90, 0x315801ED,
+		0x1D90, 0x315901EC,
+		0x1D90, 0x315A01EB,
+		0x1D90, 0x315B01EA,
+		0x1D90, 0x315C01E9,
+		0x1D90, 0x315D018F,
+		0x1D90, 0x315E018E,
+		0x1D90, 0x315F018D,
+		0x1D90, 0x3160018C,
+		0x1D90, 0x3161018B,
+		0x1D90, 0x3162018A,
+		0x1D90, 0x31630189,
+		0x1D90, 0x31640188,
+		0x1D90, 0x31650187,
+		0x1D90, 0x31660186,
+		0x1D90, 0x31670185,
+		0x1D90, 0x31680184,
+		0x1D90, 0x31690183,
+		0x1D90, 0x316A0182,
+		0x1D90, 0x316B0149,
+		0x1D90, 0x316C0148,
+		0x1D90, 0x316D0147,
+		0x1D90, 0x316E0146,
+		0x1D90, 0x316F0145,
+		0x1D90, 0x31700144,
+		0x1D90, 0x31710143,
+		0x1D90, 0x31720142,
+		0x1D90, 0x31730141,
+		0x1D90, 0x31740140,
+		0x1D90, 0x317500C7,
+		0x1D90, 0x317600C6,
+		0x1D90, 0x317700C5,
+		0x1D90, 0x317800C4,
+		0x1D90, 0x317900C3,
+		0x1D90, 0x317A0088,
+		0x1D90, 0x317B0087,
+		0x1D90, 0x317C0086,
+		0x1D90, 0x317D0045,
+		0x1D90, 0x317E0044,
+		0x1D90, 0x317F0043,
+	0xB0000000,	0x00000000,
+	0x80000015,	0x00000000,	0x40000000,	0x00000000,
+		0x1D90, 0x318001FE,
+		0x1D90, 0x318101FD,
+		0x1D90, 0x318201FC,
+		0x1D90, 0x318301FB,
+		0x1D90, 0x318401FA,
+		0x1D90, 0x318501F9,
+		0x1D90, 0x318601F8,
+		0x1D90, 0x318701F7,
+		0x1D90, 0x318801F6,
+		0x1D90, 0x318901F5,
+		0x1D90, 0x318A01F4,
+		0x1D90, 0x318B01F3,
+		0x1D90, 0x318C01F2,
+		0x1D90, 0x318D01F1,
+		0x1D90, 0x318E01F0,
+		0x1D90, 0x318F01EF,
+		0x1D90, 0x319001EE,
+		0x1D90, 0x319101ED,
+		0x1D90, 0x319201EC,
+		0x1D90, 0x319301EB,
+		0x1D90, 0x319401EA,
+		0x1D90, 0x319501E9,
+		0x1D90, 0x319601E7,
+		0x1D90, 0x319701E6,
+		0x1D90, 0x319801E5,
+		0x1D90, 0x319901E4,
+		0x1D90, 0x319A01A8,
+		0x1D90, 0x319B01A7,
+		0x1D90, 0x319C01A6,
+		0x1D90, 0x319D01A5,
+		0x1D90, 0x319E0185,
+		0x1D90, 0x319F0184,
+		0x1D90, 0x31A00183,
+		0x1D90, 0x31A10182,
+		0x1D90, 0x31A20149,
+		0x1D90, 0x31A30148,
+		0x1D90, 0x31A40147,
+		0x1D90, 0x31A50145,
+		0x1D90, 0x31A60144,
+		0x1D90, 0x31A70143,
+		0x1D90, 0x31A80142,
+		0x1D90, 0x31A900E6,
+		0x1D90, 0x31AA00E5,
+		0x1D90, 0x31AB00C9,
+		0x1D90, 0x31AC00C8,
+		0x1D90, 0x31AD00C7,
+		0x1D90, 0x31AE00C6,
+		0x1D90, 0x31AF00C5,
+		0x1D90, 0x31B000C4,
+		0x1D90, 0x31B100C3,
+		0x1D90, 0x31B20088,
+		0x1D90, 0x31B30087,
+		0x1D90, 0x31B40086,
+		0x1D90, 0x31B50085,
+		0x1D90, 0x31B60026,
+		0x1D90, 0x31B70025,
+		0x1D90, 0x31B80024,
+		0x1D90, 0x31B90023,
+		0x1D90, 0x31BA0022,
+		0x1D90, 0x31BB0021,
+		0x1D90, 0x31BC0020,
+		0x1D90, 0x31BD0003,
+		0x1D90, 0x31BE0002,
+		0x1D90, 0x31BF0001,
+	0x90000016,	0x00000000,	0x40000000,	0x00000000,
+		0x1D90, 0x318001FE,
+		0x1D90, 0x318101FD,
+		0x1D90, 0x318201FC,
+		0x1D90, 0x318301FB,
+		0x1D90, 0x318401FA,
+		0x1D90, 0x318501F9,
+		0x1D90, 0x318601F8,
+		0x1D90, 0x318701F7,
+		0x1D90, 0x318801F6,
+		0x1D90, 0x318901F5,
+		0x1D90, 0x318A01F4,
+		0x1D90, 0x318B01F3,
+		0x1D90, 0x318C01F2,
+		0x1D90, 0x318D01F1,
+		0x1D90, 0x318E01F0,
+		0x1D90, 0x318F01EF,
+		0x1D90, 0x319001EE,
+		0x1D90, 0x319101ED,
+		0x1D90, 0x319201EC,
+		0x1D90, 0x319301EB,
+		0x1D90, 0x319401EA,
+		0x1D90, 0x319501E9,
+		0x1D90, 0x319601E7,
+		0x1D90, 0x319701E6,
+		0x1D90, 0x319801E5,
+		0x1D90, 0x319901E4,
+		0x1D90, 0x319A01A8,
+		0x1D90, 0x319B01A7,
+		0x1D90, 0x319C01A6,
+		0x1D90, 0x319D01A5,
+		0x1D90, 0x319E0185,
+		0x1D90, 0x319F0184,
+		0x1D90, 0x31A00183,
+		0x1D90, 0x31A10182,
+		0x1D90, 0x31A20149,
+		0x1D90, 0x31A30148,
+		0x1D90, 0x31A40147,
+		0x1D90, 0x31A50145,
+		0x1D90, 0x31A60144,
+		0x1D90, 0x31A70143,
+		0x1D90, 0x31A80142,
+		0x1D90, 0x31A900E6,
+		0x1D90, 0x31AA00E5,
+		0x1D90, 0x31AB00C9,
+		0x1D90, 0x31AC00C8,
+		0x1D90, 0x31AD00C7,
+		0x1D90, 0x31AE00C6,
+		0x1D90, 0x31AF00C5,
+		0x1D90, 0x31B000C4,
+		0x1D90, 0x31B100C3,
+		0x1D90, 0x31B20088,
+		0x1D90, 0x31B30087,
+		0x1D90, 0x31B40086,
+		0x1D90, 0x31B50085,
+		0x1D90, 0x31B60026,
+		0x1D90, 0x31B70025,
+		0x1D90, 0x31B80024,
+		0x1D90, 0x31B90023,
+		0x1D90, 0x31BA0022,
+		0x1D90, 0x31BB0021,
+		0x1D90, 0x31BC0020,
+		0x1D90, 0x31BD0003,
+		0x1D90, 0x31BE0002,
+		0x1D90, 0x31BF0001,
+	0xA0000000,	0x00000000,
+		0x1D90, 0x318001FE,
+		0x1D90, 0x318101FD,
+		0x1D90, 0x318201FC,
+		0x1D90, 0x318301FB,
+		0x1D90, 0x318401FA,
+		0x1D90, 0x318501F9,
+		0x1D90, 0x318601F8,
+		0x1D90, 0x318701F7,
+		0x1D90, 0x318801F6,
+		0x1D90, 0x318901F5,
+		0x1D90, 0x318A01F4,
+		0x1D90, 0x318B01F3,
+		0x1D90, 0x318C01F2,
+		0x1D90, 0x318D01F1,
+		0x1D90, 0x318E01F0,
+		0x1D90, 0x318F01EF,
+		0x1D90, 0x319001EE,
+		0x1D90, 0x319101ED,
+		0x1D90, 0x319201EC,
+		0x1D90, 0x319301EB,
+		0x1D90, 0x319401EA,
+		0x1D90, 0x319501E9,
+		0x1D90, 0x319601E7,
+		0x1D90, 0x319701E6,
+		0x1D90, 0x319801E5,
+		0x1D90, 0x319901E4,
+		0x1D90, 0x319A01A8,
+		0x1D90, 0x319B01A7,
+		0x1D90, 0x319C01A6,
+		0x1D90, 0x319D01A5,
+		0x1D90, 0x319E0185,
+		0x1D90, 0x319F0184,
+		0x1D90, 0x31A00183,
+		0x1D90, 0x31A10182,
+		0x1D90, 0x31A20149,
+		0x1D90, 0x31A30148,
+		0x1D90, 0x31A40147,
+		0x1D90, 0x31A50145,
+		0x1D90, 0x31A60144,
+		0x1D90, 0x31A70143,
+		0x1D90, 0x31A80142,
 		0x1D90, 0x31A900E6,
 		0x1D90, 0x31AA00E5,
 		0x1D90, 0x31AB00C9,
@@ -461,8 +1384,17 @@ static const u32 rtw8822c_agc[] = {
 		0x1D90, 0x31BD0003,
 		0x1D90, 0x31BE0002,
 		0x1D90, 0x31BF0001,
+	0xB0000000,	0x00000000,
+	0x80000015,	0x00000000,	0x40000000,	0x00000000,
+		0x1D70, 0x22222222,
+		0x1D70, 0x20202020,
+	0x90000016,	0x00000000,	0x40000000,	0x00000000,
 		0x1D70, 0x22222222,
 		0x1D70, 0x20202020,
+	0xA0000000,	0x00000000,
+		0x1D70, 0x22222222,
+		0x1D70, 0x20202020,
+	0xB0000000,	0x00000000,
 };
 
 RTW_DECL_TABLE_PHY_COND(rtw8822c_agc, rtw_phy_cfg_agc);
@@ -732,7 +1664,7 @@ static const u32 rtw8822c_bb[] = {
 		0xC18, 0x00087672,
 		0xC1C, 0x15260000,
 		0xC20, 0x00000000,
-		0xC24, 0x40600000,
+		0xC24, 0x406000FF,
 		0xC28, 0x06400F76,
 		0xC2C, 0xE30020E1,
 		0xC30, 0x140C9494,
@@ -861,9 +1793,29 @@ static const u32 rtw8822c_bb[] = {
 		0x1828, 0x000004FD,
 		0x182C, 0x00000000,
 		0x1834, 0x00000000,
+	0x83000015,	0x00000000,	0x40000000,	0x00000000,
+		0x1838, 0x20100000,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x1838, 0x20100000,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x1838, 0x20100000,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x1838, 0x20100000,
+	0xA0000000,	0x00000000,
 		0x1838, 0x20000000,
+	0xB0000000,	0x00000000,
 		0x183C, 0x00000000,
+	0x83000015,	0x00000000,	0x40000000,	0x00000000,
+		0x1840, 0x00002300,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x1840, 0x00002300,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x1840, 0x00002300,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x1840, 0x00002300,
+	0xA0000000,	0x00000000,
 		0x1840, 0x00000000,
+	0xB0000000,	0x00000000,
 		0x1844, 0x00000000,
 		0x1848, 0x00000000,
 		0x184C, 0x00000000,
@@ -874,13 +1826,33 @@ static const u32 rtw8822c_bb[] = {
 		0x1860, 0xF0040FF8,
 		0x1864, 0x7F000000,
 		0x1868, 0x00000000,
+	0x83000015,	0x00000000,	0x40000000,	0x00000000,
+		0x186C, 0x0000FF02,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x186C, 0x0000FF02,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x186C, 0x0000FF02,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x186C, 0x0000FF02,
+	0xA0000000,	0x00000000,
 		0x186C, 0x0000FF00,
+	0xB0000000,	0x00000000,
 		0x1870, 0x00000000,
 		0x1874, 0x00000000,
 		0x1878, 0x00000000,
 		0x187C, 0x00000000,
 		0x1880, 0x00000000,
+	0x83000015,	0x00000000,	0x40000000,	0x00000000,
+		0x1884, 0x03B00000,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x1884, 0x03B00000,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x1884, 0x03B00000,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x1884, 0x03B00000,
+	0xA0000000,	0x00000000,
 		0x1884, 0x02B00000,
+	0xB0000000,	0x00000000,
 		0x1888, 0x00000000,
 		0x188C, 0x00000000,
 		0x1890, 0x00000000,
@@ -999,7 +1971,7 @@ static const u32 rtw8822c_bb[] = {
 		0x1C58, 0x00000000,
 		0x1C5C, 0xFFFFFFFF,
 		0x1C60, 0x0F030032,
-		0x1C64, 0x360F0000,
+		0x1C64, 0x360F0008,
 		0x1C68, 0x007F0000,
 		0x1C6C, 0x00010000,
 		0x1C70, 0x00037FFE,
@@ -1010,8 +1982,22 @@ static const u32 rtw8822c_bb[] = {
 		0x1C84, 0x245120D4,
 		0x1C88, 0xC8400483,
 		0x1C8C, 0x40005A20,
+	0x83000015,	0x00000000,	0x40000000,	0x00000000,
+		0x1C94, 0x00000B0E,
+		0x1C98, 0x00450000,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x1C94, 0x00000B0E,
+		0x1C98, 0x00450000,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x1C94, 0x00000B0E,
+		0x1C98, 0x00450000,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x1C94, 0x00000B0E,
+		0x1C98, 0x00450000,
+	0xA0000000,	0x00000000,
 		0x1C94, 0x00000000,
 		0x1C98, 0x00000000,
+	0xB0000000,	0x00000000,
 		0x1C9C, 0x00000000,
 		0x1CA0, 0x00000000,
 		0x1CA4, 0x20000000,
@@ -1125,7 +2111,7 @@ static const u32 rtw8822c_bb[] = {
 		0x1E60, 0x00000000,
 		0x1E64, 0xF3A00001,
 		0x1E68, 0x0028846E,
-		0x1E6C, 0x40374906,
+		0x1E6C, 0x40274906,
 		0x1E70, 0x00001000,
 		0x1E74, 0x00000000,
 		0x1E78, 0x00000000,
@@ -1344,10 +2330,30 @@ static const u32 rtw8822c_bb[] = {
 		0x4128, 0x000004FD,
 		0x412C, 0x00000000,
 		0x4134, 0x00000000,
+	0x83000015,	0x00000000,	0x40000000,	0x00000000,
+		0x4138, 0x20100000,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x4138, 0x20100000,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x4138, 0x20100000,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x4138, 0x20100000,
+	0xA0000000,	0x00000000,
 		0x4138, 0x20000000,
+	0xB0000000,	0x00000000,
 		0x413C, 0x00000000,
 		0x4140, 0x00000000,
+	0x83000015,	0x00000000,	0x40000000,	0x00000000,
+		0x4144, 0x00002030,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x4144, 0x00002030,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x4144, 0x00002030,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x4144, 0x00002030,
+	0xA0000000,	0x00000000,
 		0x4144, 0x00000000,
+	0xB0000000,	0x00000000,
 		0x4148, 0x00000000,
 		0x414C, 0x00000000,
 		0x4150, 0x00000000,
@@ -1357,13 +2363,33 @@ static const u32 rtw8822c_bb[] = {
 		0x4160, 0xF0040FF8,
 		0x4164, 0x7F000000,
 		0x4168, 0x00000000,
+	0x83000015,	0x00000000,	0x40000000,	0x00000000,
+		0x416C, 0x00008002,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x416C, 0x00008002,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x416C, 0x00008002,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x416C, 0x00008002,
+	0xA0000000,	0x00000000,
 		0x416C, 0x00008000,
+	0xB0000000,	0x00000000,
 		0x4170, 0x00000000,
 		0x4174, 0x00000000,
 		0x4178, 0x00000000,
 		0x417C, 0x00000000,
 		0x4180, 0x00000000,
+	0x83000015,	0x00000000,	0x40000000,	0x00000000,
+		0x4184, 0x03B00000,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x4184, 0x03B00000,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x4184, 0x03B00000,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x4184, 0x03B00000,
+	0xA0000000,	0x00000000,
 		0x4184, 0x02B00000,
+	0xB0000000,	0x00000000,
 		0x4188, 0x00000000,
 		0x418C, 0x00000000,
 		0x4190, 0x00000000,
@@ -1483,7 +2509,7 @@ static const u32 rtw8822c_bb[] = {
 		0x1AC4, 0x00000000,
 		0x1AC8, 0x00000807,
 		0x1ACC, 0x00000707,
-		0x1AD0, 0xA33529AD,
+		0x1AD0, 0xA33529CE,
 		0x1AD4, 0x0D8D8452,
 		0x1AD8, 0x08024024,
 		0x1ADC, 0x000D0001,
@@ -1757,56 +2783,55 @@ static const u32 rtw8822c_bb[] = {
 		0x1D94, 0x40FF0000,
 		0xC0C, 0x02F1D8B7,
 		0x1EE8, 0x00000000,
-
 };
 
 RTW_DECL_TABLE_PHY_COND(rtw8822c_bb, rtw_phy_cfg_bb);
 
 static const struct rtw_phy_pg_cfg_pair rtw8822c_bb_pg_type0[] = {
 	{ 0, 0, 0, 0x00000c20, 0xffffffff, 0x484c5054, },
-	{ 0, 0, 0, 0x00000c24, 0xffffffff, 0x54585c60, },
+	{ 0, 0, 0, 0x00000c24, 0xffffffff, 0x54585858, },
 	{ 0, 0, 0, 0x00000c28, 0xffffffff, 0x44484c50, },
-	{ 0, 0, 0, 0x00000c2c, 0xffffffff, 0x5054585c, },
+	{ 0, 0, 0, 0x00000c2c, 0xffffffff, 0x50545858, },
 	{ 0, 0, 0, 0x00000c30, 0xffffffff, 0x4044484c, },
-	{ 0, 0, 1, 0x00000c34, 0xffffffff, 0x5054585c, },
+	{ 0, 0, 1, 0x00000c34, 0xffffffff, 0x50545858, },
 	{ 0, 0, 1, 0x00000c38, 0xffffffff, 0x4044484c, },
-	{ 0, 0, 0, 0x00000c3c, 0xffffffff, 0x5054585c, },
+	{ 0, 0, 0, 0x00000c3c, 0xffffffff, 0x50545858, },
 	{ 0, 0, 0, 0x00000c40, 0xffffffff, 0x4044484c, },
-	{ 0, 0, 0, 0x00000c44, 0xffffffff, 0x585c383c, },
+	{ 0, 0, 0, 0x00000c44, 0xffffffff, 0x5858383c, },
 	{ 0, 0, 1, 0x00000c48, 0xffffffff, 0x484c5054, },
 	{ 0, 0, 1, 0x00000c4c, 0xffffffff, 0x383c4044, },
 	{ 0, 1, 0, 0x00000e20, 0xffffffff, 0x484c5054, },
-	{ 0, 1, 0, 0x00000e24, 0xffffffff, 0x54585c60, },
+	{ 0, 1, 0, 0x00000e24, 0xffffffff, 0x54585858, },
 	{ 0, 1, 0, 0x00000e28, 0xffffffff, 0x44484c50, },
-	{ 0, 1, 0, 0x00000e2c, 0xffffffff, 0x5054585c, },
+	{ 0, 1, 0, 0x00000e2c, 0xffffffff, 0x50545858, },
 	{ 0, 1, 0, 0x00000e30, 0xffffffff, 0x4044484c, },
-	{ 0, 1, 1, 0x00000e34, 0xffffffff, 0x5054585c, },
+	{ 0, 1, 1, 0x00000e34, 0xffffffff, 0x50545858, },
 	{ 0, 1, 1, 0x00000e38, 0xffffffff, 0x4044484c, },
-	{ 0, 1, 0, 0x00000e3c, 0xffffffff, 0x5054585c, },
+	{ 0, 1, 0, 0x00000e3c, 0xffffffff, 0x50545858, },
 	{ 0, 1, 0, 0x00000e40, 0xffffffff, 0x4044484c, },
-	{ 0, 1, 0, 0x00000e44, 0xffffffff, 0x585c383c, },
+	{ 0, 1, 0, 0x00000e44, 0xffffffff, 0x5858383c, },
 	{ 0, 1, 1, 0x00000e48, 0xffffffff, 0x484c5054, },
 	{ 0, 1, 1, 0x00000e4c, 0xffffffff, 0x383c4044, },
-	{ 1, 0, 0, 0x00000c24, 0xffffffff, 0x54585c60, },
+	{ 1, 0, 0, 0x00000c24, 0xffffffff, 0x54585858, },
 	{ 1, 0, 0, 0x00000c28, 0xffffffff, 0x44484c50, },
-	{ 1, 0, 0, 0x00000c2c, 0xffffffff, 0x5054585c, },
+	{ 1, 0, 0, 0x00000c2c, 0xffffffff, 0x50545858, },
 	{ 1, 0, 0, 0x00000c30, 0xffffffff, 0x4044484c, },
-	{ 1, 0, 1, 0x00000c34, 0xffffffff, 0x5054585c, },
+	{ 1, 0, 1, 0x00000c34, 0xffffffff, 0x50545858, },
 	{ 1, 0, 1, 0x00000c38, 0xffffffff, 0x4044484c, },
-	{ 1, 0, 0, 0x00000c3c, 0xffffffff, 0x5054585c, },
+	{ 1, 0, 0, 0x00000c3c, 0xffffffff, 0x50545858, },
 	{ 1, 0, 0, 0x00000c40, 0xffffffff, 0x4044484c, },
-	{ 1, 0, 0, 0x00000c44, 0xffffffff, 0x585c383c, },
+	{ 1, 0, 0, 0x00000c44, 0xffffffff, 0x5858383c, },
 	{ 1, 0, 1, 0x00000c48, 0xffffffff, 0x484c5054, },
 	{ 1, 0, 1, 0x00000c4c, 0xffffffff, 0x383c4044, },
-	{ 1, 1, 0, 0x00000e24, 0xffffffff, 0x54585c60, },
+	{ 1, 1, 0, 0x00000e24, 0xffffffff, 0x54585858, },
 	{ 1, 1, 0, 0x00000e28, 0xffffffff, 0x44484c50, },
-	{ 1, 1, 0, 0x00000e2c, 0xffffffff, 0x5054585c, },
+	{ 1, 1, 0, 0x00000e2c, 0xffffffff, 0x50545858, },
 	{ 1, 1, 0, 0x00000e30, 0xffffffff, 0x4044484c, },
-	{ 1, 1, 1, 0x00000e34, 0xffffffff, 0x5054585c, },
+	{ 1, 1, 1, 0x00000e34, 0xffffffff, 0x50545858, },
 	{ 1, 1, 1, 0x00000e38, 0xffffffff, 0x4044484c, },
-	{ 1, 1, 0, 0x00000e3c, 0xffffffff, 0x5054585c, },
+	{ 1, 1, 0, 0x00000e3c, 0xffffffff, 0x50545858, },
 	{ 1, 1, 0, 0x00000e40, 0xffffffff, 0x4044484c, },
-	{ 1, 1, 0, 0x00000e44, 0xffffffff, 0x585c383c, },
+	{ 1, 1, 0, 0x00000e44, 0xffffffff, 0x5858383c, },
 	{ 1, 1, 1, 0x00000e48, 0xffffffff, 0x484c5054, },
 	{ 1, 1, 1, 0x00000e4c, 0xffffffff, 0x383c4044, },
 };
@@ -1821,1048 +2846,2588 @@ static const u32 rtw8822c_rf_a[] = {
 	0x81000001,	0x00000000,	0x40000000,	0x00000000,
 		0x08E, 0x000B9140,
 	0x91000002,	0x00000000,	0x40000000,	0x00000000,
-		0x08E, 0x000B9140,
+		0x08E, 0x000B9140,
+	0x92000001,	0x00000000,	0x40000000,	0x00000000,
+		0x08E, 0x000A5540,
+	0x92000002,	0x00000000,	0x40000000,	0x00000000,
+		0x08E, 0x000A5540,
+	0x93000001,	0x00000000,	0x40000000,	0x00000000,
+		0x08E, 0x000A5540,
+	0x93000002,	0x00000000,	0x40000000,	0x00000000,
+		0x08E, 0x000A5540,
+	0x93000003,	0x00000000,	0x40000000,	0x00000000,
+		0x08E, 0x000A5540,
+	0x93000004,	0x00000000,	0x40000000,	0x00000000,
+		0x08E, 0x000A5540,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x08E, 0x000A5540,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x08E, 0x000A5540,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x08E, 0x000A5540,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x08E, 0x000A5540,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x08E, 0x000A5540,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x08E, 0x000A5540,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x08E, 0x000A5540,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x08E, 0x000A5540,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x08E, 0x000A5540,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x08E, 0x000A5540,
+	0xA0000000,	0x00000000,
+		0x08E, 0x000A5540,
+	0xB0000000,	0x00000000,
+		0x081, 0x0000FC01,
+		0x081, 0x0002FC01,
+		0x081, 0x0003FC01,
+	0x81000001,	0x00000000,	0x40000000,	0x00000000,
+		0x085, 0x0006A06C,
+	0x91000002,	0x00000000,	0x40000000,	0x00000000,
+		0x085, 0x0006A06C,
+	0x92000001,	0x00000000,	0x40000000,	0x00000000,
+		0x085, 0x0006A06C,
+	0x92000002,	0x00000000,	0x40000000,	0x00000000,
+		0x085, 0x0006A06C,
+	0x93000001,	0x00000000,	0x40000000,	0x00000000,
+		0x085, 0x0006A06C,
+	0x93000002,	0x00000000,	0x40000000,	0x00000000,
+		0x085, 0x0006A06C,
+	0x93000003,	0x00000000,	0x40000000,	0x00000000,
+		0x085, 0x0006A06C,
+	0x93000004,	0x00000000,	0x40000000,	0x00000000,
+		0x085, 0x0006A06C,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x085, 0x0006A06C,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x085, 0x0006A06C,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x085, 0x0006A06C,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x085, 0x0006A06C,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x085, 0x0006A06C,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x085, 0x0006A06C,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x085, 0x0006A06C,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x085, 0x0006A06C,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x085, 0x0006A06C,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x085, 0x0006A06C,
+	0xA0000000,	0x00000000,
+		0x085, 0x0006A06C,
+	0xB0000000,	0x00000000,
+	0x81000001,	0x00000000,	0x40000000,	0x00000000,
+		0x0EE, 0x00000010,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000002,
+		0x03F, 0x0000002A,
+		0x0EE, 0x00000000,
+	0x91000002,	0x00000000,	0x40000000,	0x00000000,
+		0x0EE, 0x00000010,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000002,
+		0x03F, 0x0000002A,
+		0x0EE, 0x00000000,
+	0x92000001,	0x00000000,	0x40000000,	0x00000000,
+		0x0EE, 0x00000010,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000002,
+		0x03F, 0x0000002A,
+		0x0EE, 0x00000000,
+	0x92000002,	0x00000000,	0x40000000,	0x00000000,
+		0x0EE, 0x00000010,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000002,
+		0x03F, 0x0000002A,
+		0x0EE, 0x00000000,
+	0x93000001,	0x00000000,	0x40000000,	0x00000000,
+		0x0EE, 0x00000010,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000002,
+		0x03F, 0x0000002A,
+		0x0EE, 0x00000000,
+	0x93000002,	0x00000000,	0x40000000,	0x00000000,
+		0x0EE, 0x00000010,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000002,
+		0x03F, 0x0000002A,
+		0x0EE, 0x00000000,
+	0x93000003,	0x00000000,	0x40000000,	0x00000000,
+		0x0EE, 0x00000010,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000002,
+		0x03F, 0x0000002A,
+		0x0EE, 0x00000000,
+	0x93000004,	0x00000000,	0x40000000,	0x00000000,
+		0x0EE, 0x00000010,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000002,
+		0x03F, 0x0000002A,
+		0x0EE, 0x00000000,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x0EE, 0x00000010,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000002,
+		0x03F, 0x0000002A,
+		0x0EE, 0x00000000,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x0EE, 0x00000010,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000002,
+		0x03F, 0x0000002A,
+		0x0EE, 0x00000000,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x0EE, 0x00000010,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000002,
+		0x03F, 0x0000002A,
+		0x0EE, 0x00000000,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x0EE, 0x00000010,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000002,
+		0x03F, 0x0000002A,
+		0x0EE, 0x00000000,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x0EE, 0x00000010,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000002,
+		0x03F, 0x0000002A,
+		0x0EE, 0x00000000,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x0EE, 0x00000010,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000002,
+		0x03F, 0x0000002A,
+		0x0EE, 0x00000000,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x0EE, 0x00000010,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000002,
+		0x03F, 0x0000002A,
+		0x0EE, 0x00000000,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x0EE, 0x00000010,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000002,
+		0x03F, 0x0000002A,
+		0x0EE, 0x00000000,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x0EE, 0x00000010,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000002,
+		0x03F, 0x0000002A,
+		0x0EE, 0x00000000,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x0EE, 0x00000010,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000002,
+		0x03F, 0x0000002A,
+		0x0EE, 0x00000000,
+	0xA0000000,	0x00000000,
+		0x0EE, 0x00000010,
+		0x033, 0x00000001,
+		0x03F, 0x0000003F,
+		0x033, 0x00000001,
+		0x03F, 0x0000003F,
+		0x033, 0x00000002,
+		0x03F, 0x0000003F,
+		0x0EE, 0x00000000,
+	0xB0000000,	0x00000000,
+	0x81000001,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00010000,
+		0x033, 0x0000000F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000000E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000000D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000000C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000000B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000000A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000009,
+		0x03F, 0x00000280,
+		0x033, 0x00000008,
+		0x03F, 0x000FF280,
+		0x033, 0x00000007,
+		0x03F, 0x00000200,
+		0x033, 0x00000006,
+		0x03F, 0x000001C0,
+		0x033, 0x00000005,
+		0x03F, 0x00000180,
+		0x033, 0x00000004,
+		0x03F, 0x00000040,
+	0x91000002,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00010000,
+		0x033, 0x0000000F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000000E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000000D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000000C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000000B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000000A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000009,
+		0x03F, 0x00000280,
+		0x033, 0x00000008,
+		0x03F, 0x000FF280,
+		0x033, 0x00000007,
+		0x03F, 0x00000200,
+		0x033, 0x00000006,
+		0x03F, 0x000001C0,
+		0x033, 0x00000005,
+		0x03F, 0x00000180,
+		0x033, 0x00000004,
+		0x03F, 0x00000040,
 	0x92000001,	0x00000000,	0x40000000,	0x00000000,
-		0x08E, 0x000A5540,
+		0x0EF, 0x00010000,
+		0x033, 0x0000000F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000000E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000000D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000000C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000000B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000000A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000009,
+		0x03F, 0x00000280,
+		0x033, 0x00000008,
+		0x03F, 0x000FF280,
+		0x033, 0x00000007,
+		0x03F, 0x00000200,
+		0x033, 0x00000006,
+		0x03F, 0x000001C0,
+		0x033, 0x00000005,
+		0x03F, 0x00000180,
+		0x033, 0x00000004,
+		0x03F, 0x00000040,
 	0x92000002,	0x00000000,	0x40000000,	0x00000000,
-		0x08E, 0x000A5540,
+		0x0EF, 0x00010000,
+		0x033, 0x0000000F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000000E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000000D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000000C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000000B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000000A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000009,
+		0x03F, 0x00000280,
+		0x033, 0x00000008,
+		0x03F, 0x000FF280,
+		0x033, 0x00000007,
+		0x03F, 0x00000200,
+		0x033, 0x00000006,
+		0x03F, 0x000001C0,
+		0x033, 0x00000005,
+		0x03F, 0x00000180,
+		0x033, 0x00000004,
+		0x03F, 0x00000040,
 	0x93000001,	0x00000000,	0x40000000,	0x00000000,
-		0x08E, 0x000A5540,
+		0x0EF, 0x00010000,
+		0x033, 0x0000000F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000000E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000000D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000000C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000000B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000000A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000009,
+		0x03F, 0x00000280,
+		0x033, 0x00000008,
+		0x03F, 0x000FF280,
+		0x033, 0x00000007,
+		0x03F, 0x00000200,
+		0x033, 0x00000006,
+		0x03F, 0x000001C0,
+		0x033, 0x00000005,
+		0x03F, 0x00000180,
+		0x033, 0x00000004,
+		0x03F, 0x00000040,
 	0x93000002,	0x00000000,	0x40000000,	0x00000000,
-		0x08E, 0x000A5540,
+		0x0EF, 0x00010000,
+		0x033, 0x0000000F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000000E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000000D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000000C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000000B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000000A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000009,
+		0x03F, 0x00000280,
+		0x033, 0x00000008,
+		0x03F, 0x000FF280,
+		0x033, 0x00000007,
+		0x03F, 0x00000200,
+		0x033, 0x00000006,
+		0x03F, 0x000001C0,
+		0x033, 0x00000005,
+		0x03F, 0x00000180,
+		0x033, 0x00000004,
+		0x03F, 0x00000040,
 	0x93000003,	0x00000000,	0x40000000,	0x00000000,
-		0x08E, 0x000A5540,
+		0x0EF, 0x00010000,
+		0x033, 0x0000000F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000000E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000000D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000000C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000000B,
+		0x03F, 0x00000287,
+		0x033, 0x0000000A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000009,
+		0x03F, 0x00000207,
+		0x033, 0x00000008,
+		0x03F, 0x000FF280,
+		0x033, 0x00000007,
+		0x03F, 0x00000200,
+		0x033, 0x00000006,
+		0x03F, 0x000001C0,
+		0x033, 0x00000005,
+		0x03F, 0x00000180,
+		0x033, 0x00000004,
+		0x03F, 0x00000040,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
-		0x08E, 0x000A5540,
+		0x0EF, 0x00010000,
+		0x033, 0x0000000F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000000E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000000D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000000C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000000B,
+		0x03F, 0x00000287,
+		0x033, 0x0000000A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000009,
+		0x03F, 0x00000207,
+		0x033, 0x00000008,
+		0x03F, 0x000FF280,
+		0x033, 0x00000007,
+		0x03F, 0x00000200,
+		0x033, 0x00000006,
+		0x03F, 0x000001C0,
+		0x033, 0x00000005,
+		0x03F, 0x00000180,
+		0x033, 0x00000004,
+		0x03F, 0x00000040,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00010000,
+		0x033, 0x0000000F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000000E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000000D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000000C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000000B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000000A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000009,
+		0x03F, 0x00000280,
+		0x033, 0x00000008,
+		0x03F, 0x000FF280,
+		0x033, 0x00000007,
+		0x03F, 0x00000200,
+		0x033, 0x00000006,
+		0x03F, 0x000001C0,
+		0x033, 0x00000005,
+		0x03F, 0x00000180,
+		0x033, 0x00000004,
+		0x03F, 0x00000040,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00010000,
+		0x033, 0x0000000F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000000E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000000D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000000C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000000B,
+		0x03F, 0x00000287,
+		0x033, 0x0000000A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000009,
+		0x03F, 0x00000207,
+		0x033, 0x00000008,
+		0x03F, 0x000FF280,
+		0x033, 0x00000007,
+		0x03F, 0x00000200,
+		0x033, 0x00000006,
+		0x03F, 0x000001C0,
+		0x033, 0x00000005,
+		0x03F, 0x00000180,
+		0x033, 0x00000004,
+		0x03F, 0x00000040,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00010000,
+		0x033, 0x0000000F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000000E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000000D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000000C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000000B,
+		0x03F, 0x00000287,
+		0x033, 0x0000000A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000009,
+		0x03F, 0x00000207,
+		0x033, 0x00000008,
+		0x03F, 0x000FF280,
+		0x033, 0x00000007,
+		0x03F, 0x00000200,
+		0x033, 0x00000006,
+		0x03F, 0x000001C0,
+		0x033, 0x00000005,
+		0x03F, 0x00000180,
+		0x033, 0x00000004,
+		0x03F, 0x00000040,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00010000,
+		0x033, 0x0000000F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000000E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000000D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000000C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000000B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000000A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000009,
+		0x03F, 0x00000280,
+		0x033, 0x00000008,
+		0x03F, 0x000FF280,
+		0x033, 0x00000007,
+		0x03F, 0x00000200,
+		0x033, 0x00000006,
+		0x03F, 0x000001C0,
+		0x033, 0x00000005,
+		0x03F, 0x00000180,
+		0x033, 0x00000004,
+		0x03F, 0x00000040,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00010000,
+		0x033, 0x0000000F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000000E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000000D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000000C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000000B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000000A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000009,
+		0x03F, 0x00000280,
+		0x033, 0x00000008,
+		0x03F, 0x000FF280,
+		0x033, 0x00000007,
+		0x03F, 0x00000200,
+		0x033, 0x00000006,
+		0x03F, 0x000001C0,
+		0x033, 0x00000005,
+		0x03F, 0x00000180,
+		0x033, 0x00000004,
+		0x03F, 0x00000040,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00010000,
+		0x033, 0x0000000F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000000E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000000D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000000C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000000B,
+		0x03F, 0x00000287,
+		0x033, 0x0000000A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000009,
+		0x03F, 0x00000207,
+		0x033, 0x00000008,
+		0x03F, 0x000FF280,
+		0x033, 0x00000007,
+		0x03F, 0x00000200,
+		0x033, 0x00000006,
+		0x03F, 0x000001C0,
+		0x033, 0x00000005,
+		0x03F, 0x00000180,
+		0x033, 0x00000004,
+		0x03F, 0x00000040,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00010000,
+		0x033, 0x0000000F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000000E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000000D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000000C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000000B,
+		0x03F, 0x00000287,
+		0x033, 0x0000000A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000009,
+		0x03F, 0x00000207,
+		0x033, 0x00000008,
+		0x03F, 0x000FF280,
+		0x033, 0x00000007,
+		0x03F, 0x00000200,
+		0x033, 0x00000006,
+		0x03F, 0x000001C0,
+		0x033, 0x00000005,
+		0x03F, 0x00000180,
+		0x033, 0x00000004,
+		0x03F, 0x00000040,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00010000,
+		0x033, 0x0000000F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000000E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000000D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000000C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000000B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000000A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000009,
+		0x03F, 0x00000280,
+		0x033, 0x00000008,
+		0x03F, 0x000FF280,
+		0x033, 0x00000007,
+		0x03F, 0x00000200,
+		0x033, 0x00000006,
+		0x03F, 0x000001C0,
+		0x033, 0x00000005,
+		0x03F, 0x00000180,
+		0x033, 0x00000004,
+		0x03F, 0x00000040,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00010000,
+		0x033, 0x0000000F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000000E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000000D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000000C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000000B,
+		0x03F, 0x00000287,
+		0x033, 0x0000000A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000009,
+		0x03F, 0x00000207,
+		0x033, 0x00000008,
+		0x03F, 0x000FF280,
+		0x033, 0x00000007,
+		0x03F, 0x00000200,
+		0x033, 0x00000006,
+		0x03F, 0x000001C0,
+		0x033, 0x00000005,
+		0x03F, 0x00000180,
+		0x033, 0x00000004,
+		0x03F, 0x00000040,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00010000,
+		0x033, 0x0000000F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000000E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000000D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000000C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000000B,
+		0x03F, 0x00000287,
+		0x033, 0x0000000A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000009,
+		0x03F, 0x00000207,
+		0x033, 0x00000008,
+		0x03F, 0x000FF280,
+		0x033, 0x00000007,
+		0x03F, 0x00000200,
+		0x033, 0x00000006,
+		0x03F, 0x000001C0,
+		0x033, 0x00000005,
+		0x03F, 0x00000180,
+		0x033, 0x00000004,
+		0x03F, 0x00000040,
 	0xA0000000,	0x00000000,
-		0x08E, 0x000A5540,
+		0x0EF, 0x00010000,
+		0x033, 0x0000000F,
+		0x03F, 0x000773E8,
+		0x033, 0x0000000E,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000000D,
+		0x03F, 0x00000380,
+		0x033, 0x0000000C,
+		0x03F, 0x000FF380,
+		0x033, 0x0000000B,
+		0x03F, 0x00000300,
+		0x033, 0x0000000A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000009,
+		0x03F, 0x00000280,
+		0x033, 0x00000008,
+		0x03F, 0x000FF280,
+		0x033, 0x00000007,
+		0x03F, 0x00000200,
+		0x033, 0x00000006,
+		0x03F, 0x000001C0,
+		0x033, 0x00000005,
+		0x03F, 0x00000180,
+		0x033, 0x00000004,
+		0x03F, 0x00000040,
 	0xB0000000,	0x00000000,
-		0x081, 0x0000FC01,
-		0x081, 0x0002FC01,
-		0x081, 0x0003FC01,
+		0x033, 0x00000003,
+		0x03F, 0x00000000,
 	0x81000001,	0x00000000,	0x40000000,	0x00000000,
-		0x085, 0x0006A06C,
+		0x033, 0x0000001F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000001E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000001D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000001C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000001B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000001A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000019,
+		0x03F, 0x00000280,
+		0x033, 0x00000018,
+		0x03F, 0x000FF280,
+		0x033, 0x00000017,
+		0x03F, 0x00000200,
+		0x033, 0x00000016,
+		0x03F, 0x000001C0,
+		0x033, 0x00000015,
+		0x03F, 0x00000180,
+		0x033, 0x00000014,
+		0x03F, 0x00000040,
 	0x91000002,	0x00000000,	0x40000000,	0x00000000,
-		0x085, 0x0006A06C,
+		0x033, 0x0000001F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000001E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000001D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000001C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000001B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000001A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000019,
+		0x03F, 0x00000280,
+		0x033, 0x00000018,
+		0x03F, 0x000FF280,
+		0x033, 0x00000017,
+		0x03F, 0x00000200,
+		0x033, 0x00000016,
+		0x03F, 0x000001C0,
+		0x033, 0x00000015,
+		0x03F, 0x00000180,
+		0x033, 0x00000014,
+		0x03F, 0x00000040,
 	0x92000001,	0x00000000,	0x40000000,	0x00000000,
-		0x085, 0x0006A06C,
+		0x033, 0x0000001F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000001E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000001D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000001C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000001B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000001A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000019,
+		0x03F, 0x00000280,
+		0x033, 0x00000018,
+		0x03F, 0x000FF280,
+		0x033, 0x00000017,
+		0x03F, 0x00000200,
+		0x033, 0x00000016,
+		0x03F, 0x000001C0,
+		0x033, 0x00000015,
+		0x03F, 0x00000180,
+		0x033, 0x00000014,
+		0x03F, 0x00000040,
 	0x92000002,	0x00000000,	0x40000000,	0x00000000,
-		0x085, 0x0006A06C,
+		0x033, 0x0000001F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000001E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000001D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000001C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000001B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000001A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000019,
+		0x03F, 0x00000280,
+		0x033, 0x00000018,
+		0x03F, 0x000FF280,
+		0x033, 0x00000017,
+		0x03F, 0x00000200,
+		0x033, 0x00000016,
+		0x03F, 0x000001C0,
+		0x033, 0x00000015,
+		0x03F, 0x00000180,
+		0x033, 0x00000014,
+		0x03F, 0x00000040,
 	0x93000001,	0x00000000,	0x40000000,	0x00000000,
-		0x085, 0x0006A06C,
+		0x033, 0x0000001F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000001E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000001D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000001C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000001B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000001A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000019,
+		0x03F, 0x00000280,
+		0x033, 0x00000018,
+		0x03F, 0x000FF280,
+		0x033, 0x00000017,
+		0x03F, 0x00000200,
+		0x033, 0x00000016,
+		0x03F, 0x000001C0,
+		0x033, 0x00000015,
+		0x03F, 0x00000180,
+		0x033, 0x00000014,
+		0x03F, 0x00000040,
 	0x93000002,	0x00000000,	0x40000000,	0x00000000,
-		0x085, 0x0006A06C,
+		0x033, 0x0000001F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000001E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000001D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000001C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000001B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000001A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000019,
+		0x03F, 0x00000280,
+		0x033, 0x00000018,
+		0x03F, 0x000FF280,
+		0x033, 0x00000017,
+		0x03F, 0x00000200,
+		0x033, 0x00000016,
+		0x03F, 0x000001C0,
+		0x033, 0x00000015,
+		0x03F, 0x00000180,
+		0x033, 0x00000014,
+		0x03F, 0x00000040,
 	0x93000003,	0x00000000,	0x40000000,	0x00000000,
-		0x085, 0x0006A06C,
+		0x033, 0x0000001F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000001E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000001D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000001C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000001B,
+		0x03F, 0x00000287,
+		0x033, 0x0000001A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000019,
+		0x03F, 0x00000207,
+		0x033, 0x00000018,
+		0x03F, 0x000FF280,
+		0x033, 0x00000017,
+		0x03F, 0x00000200,
+		0x033, 0x00000016,
+		0x03F, 0x000001C0,
+		0x033, 0x00000015,
+		0x03F, 0x00000180,
+		0x033, 0x00000014,
+		0x03F, 0x00000040,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
-		0x085, 0x0006A06C,
+		0x033, 0x0000001F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000001E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000001D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000001C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000001B,
+		0x03F, 0x00000287,
+		0x033, 0x0000001A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000019,
+		0x03F, 0x00000207,
+		0x033, 0x00000018,
+		0x03F, 0x000FF280,
+		0x033, 0x00000017,
+		0x03F, 0x00000200,
+		0x033, 0x00000016,
+		0x03F, 0x000001C0,
+		0x033, 0x00000015,
+		0x03F, 0x00000180,
+		0x033, 0x00000014,
+		0x03F, 0x00000040,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000001F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000001E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000001D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000001C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000001B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000001A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000019,
+		0x03F, 0x00000280,
+		0x033, 0x00000018,
+		0x03F, 0x000FF280,
+		0x033, 0x00000017,
+		0x03F, 0x00000200,
+		0x033, 0x00000016,
+		0x03F, 0x000001C0,
+		0x033, 0x00000015,
+		0x03F, 0x00000180,
+		0x033, 0x00000014,
+		0x03F, 0x00000040,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000001F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000001E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000001D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000001C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000001B,
+		0x03F, 0x00000287,
+		0x033, 0x0000001A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000019,
+		0x03F, 0x00000207,
+		0x033, 0x00000018,
+		0x03F, 0x000FF280,
+		0x033, 0x00000017,
+		0x03F, 0x00000200,
+		0x033, 0x00000016,
+		0x03F, 0x000001C0,
+		0x033, 0x00000015,
+		0x03F, 0x00000180,
+		0x033, 0x00000014,
+		0x03F, 0x00000040,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000001F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000001E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000001D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000001C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000001B,
+		0x03F, 0x00000287,
+		0x033, 0x0000001A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000019,
+		0x03F, 0x00000207,
+		0x033, 0x00000018,
+		0x03F, 0x000FF280,
+		0x033, 0x00000017,
+		0x03F, 0x00000200,
+		0x033, 0x00000016,
+		0x03F, 0x000001C0,
+		0x033, 0x00000015,
+		0x03F, 0x00000180,
+		0x033, 0x00000014,
+		0x03F, 0x00000040,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000001F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000001E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000001D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000001C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000001B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000001A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000019,
+		0x03F, 0x00000280,
+		0x033, 0x00000018,
+		0x03F, 0x000FF280,
+		0x033, 0x00000017,
+		0x03F, 0x00000200,
+		0x033, 0x00000016,
+		0x03F, 0x000001C0,
+		0x033, 0x00000015,
+		0x03F, 0x00000180,
+		0x033, 0x00000014,
+		0x03F, 0x00000040,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000001F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000001E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000001D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000001C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000001B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000001A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000019,
+		0x03F, 0x00000280,
+		0x033, 0x00000018,
+		0x03F, 0x000FF280,
+		0x033, 0x00000017,
+		0x03F, 0x00000200,
+		0x033, 0x00000016,
+		0x03F, 0x000001C0,
+		0x033, 0x00000015,
+		0x03F, 0x00000180,
+		0x033, 0x00000014,
+		0x03F, 0x00000040,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000001F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000001E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000001D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000001C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000001B,
+		0x03F, 0x00000287,
+		0x033, 0x0000001A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000019,
+		0x03F, 0x00000207,
+		0x033, 0x00000018,
+		0x03F, 0x000FF280,
+		0x033, 0x00000017,
+		0x03F, 0x00000200,
+		0x033, 0x00000016,
+		0x03F, 0x000001C0,
+		0x033, 0x00000015,
+		0x03F, 0x00000180,
+		0x033, 0x00000014,
+		0x03F, 0x00000040,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000001F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000001E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000001D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000001C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000001B,
+		0x03F, 0x00000287,
+		0x033, 0x0000001A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000019,
+		0x03F, 0x00000207,
+		0x033, 0x00000018,
+		0x03F, 0x000FF280,
+		0x033, 0x00000017,
+		0x03F, 0x00000200,
+		0x033, 0x00000016,
+		0x03F, 0x000001C0,
+		0x033, 0x00000015,
+		0x03F, 0x00000180,
+		0x033, 0x00000014,
+		0x03F, 0x00000040,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000001F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000001E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000001D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000001C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000001B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000001A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000019,
+		0x03F, 0x00000280,
+		0x033, 0x00000018,
+		0x03F, 0x000FF280,
+		0x033, 0x00000017,
+		0x03F, 0x00000200,
+		0x033, 0x00000016,
+		0x03F, 0x000001C0,
+		0x033, 0x00000015,
+		0x03F, 0x00000180,
+		0x033, 0x00000014,
+		0x03F, 0x00000040,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000001F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000001E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000001D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000001C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000001B,
+		0x03F, 0x00000287,
+		0x033, 0x0000001A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000019,
+		0x03F, 0x00000207,
+		0x033, 0x00000018,
+		0x03F, 0x000FF280,
+		0x033, 0x00000017,
+		0x03F, 0x00000200,
+		0x033, 0x00000016,
+		0x03F, 0x000001C0,
+		0x033, 0x00000015,
+		0x03F, 0x00000180,
+		0x033, 0x00000014,
+		0x03F, 0x00000040,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000001F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000001E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000001D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000001C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000001B,
+		0x03F, 0x00000287,
+		0x033, 0x0000001A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000019,
+		0x03F, 0x00000207,
+		0x033, 0x00000018,
+		0x03F, 0x000FF280,
+		0x033, 0x00000017,
+		0x03F, 0x00000200,
+		0x033, 0x00000016,
+		0x03F, 0x000001C0,
+		0x033, 0x00000015,
+		0x03F, 0x00000180,
+		0x033, 0x00000014,
+		0x03F, 0x00000040,
 	0xA0000000,	0x00000000,
-		0x085, 0x0006A06C,
+		0x033, 0x0000001F,
+		0x03F, 0x000773E8,
+		0x033, 0x0000001E,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000001D,
+		0x03F, 0x00000380,
+		0x033, 0x0000001C,
+		0x03F, 0x000FF380,
+		0x033, 0x0000001B,
+		0x03F, 0x00000300,
+		0x033, 0x0000001A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000019,
+		0x03F, 0x00000280,
+		0x033, 0x00000018,
+		0x03F, 0x000FF280,
+		0x033, 0x00000017,
+		0x03F, 0x00000200,
+		0x033, 0x00000016,
+		0x03F, 0x000001C0,
+		0x033, 0x00000015,
+		0x03F, 0x00000180,
+		0x033, 0x00000014,
+		0x03F, 0x00000040,
 	0xB0000000,	0x00000000,
+		0x033, 0x00000013,
+		0x03F, 0x00000000,
 	0x81000001,	0x00000000,	0x40000000,	0x00000000,
-		0x0EE, 0x00000010,
-		0x033, 0x00000001,
-		0x03F, 0x0000002A,
-		0x033, 0x00000001,
-		0x03F, 0x0000002A,
-		0x033, 0x00000002,
-		0x03F, 0x0000002A,
-		0x0EE, 0x00000000,
+		0x033, 0x0000002F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000002E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000002D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000002C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000002B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000002A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000029,
+		0x03F, 0x00000280,
+		0x033, 0x00000028,
+		0x03F, 0x000FF280,
+		0x033, 0x00000027,
+		0x03F, 0x00000200,
+		0x033, 0x00000026,
+		0x03F, 0x000001C0,
+		0x033, 0x00000025,
+		0x03F, 0x00000180,
+		0x033, 0x00000024,
+		0x03F, 0x00000040,
 	0x91000002,	0x00000000,	0x40000000,	0x00000000,
-		0x0EE, 0x00000010,
-		0x033, 0x00000001,
-		0x03F, 0x0000002A,
-		0x033, 0x00000001,
-		0x03F, 0x0000002A,
-		0x033, 0x00000002,
-		0x03F, 0x0000002A,
-		0x0EE, 0x00000000,
-	0x92000001,	0x00000000,	0x40000000,	0x00000000,
-		0x0EE, 0x00000010,
-		0x033, 0x00000001,
-		0x03F, 0x0000002A,
-		0x033, 0x00000001,
-		0x03F, 0x0000002A,
-		0x033, 0x00000002,
-		0x03F, 0x0000002A,
-		0x0EE, 0x00000000,
+		0x033, 0x0000002F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000002E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000002D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000002C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000002B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000002A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000029,
+		0x03F, 0x00000280,
+		0x033, 0x00000028,
+		0x03F, 0x000FF280,
+		0x033, 0x00000027,
+		0x03F, 0x00000200,
+		0x033, 0x00000026,
+		0x03F, 0x000001C0,
+		0x033, 0x00000025,
+		0x03F, 0x00000180,
+		0x033, 0x00000024,
+		0x03F, 0x00000040,
+	0x92000001,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000002F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000002E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000002D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000002C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000002B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000002A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000029,
+		0x03F, 0x00000280,
+		0x033, 0x00000028,
+		0x03F, 0x000FF280,
+		0x033, 0x00000027,
+		0x03F, 0x00000200,
+		0x033, 0x00000026,
+		0x03F, 0x000001C0,
+		0x033, 0x00000025,
+		0x03F, 0x00000180,
+		0x033, 0x00000024,
+		0x03F, 0x00000040,
 	0x92000002,	0x00000000,	0x40000000,	0x00000000,
-		0x0EE, 0x00000010,
-		0x033, 0x00000001,
-		0x03F, 0x0000002A,
-		0x033, 0x00000001,
-		0x03F, 0x0000002A,
-		0x033, 0x00000002,
-		0x03F, 0x0000002A,
-		0x0EE, 0x00000000,
+		0x033, 0x0000002F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000002E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000002D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000002C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000002B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000002A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000029,
+		0x03F, 0x00000280,
+		0x033, 0x00000028,
+		0x03F, 0x000FF280,
+		0x033, 0x00000027,
+		0x03F, 0x00000200,
+		0x033, 0x00000026,
+		0x03F, 0x000001C0,
+		0x033, 0x00000025,
+		0x03F, 0x00000180,
+		0x033, 0x00000024,
+		0x03F, 0x00000040,
 	0x93000001,	0x00000000,	0x40000000,	0x00000000,
-		0x0EE, 0x00000010,
-		0x033, 0x00000001,
-		0x03F, 0x0000002A,
-		0x033, 0x00000001,
-		0x03F, 0x0000002A,
-		0x033, 0x00000002,
-		0x03F, 0x0000002A,
-		0x0EE, 0x00000000,
+		0x033, 0x0000002F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000002E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000002D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000002C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000002B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000002A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000029,
+		0x03F, 0x00000280,
+		0x033, 0x00000028,
+		0x03F, 0x000FF280,
+		0x033, 0x00000027,
+		0x03F, 0x00000200,
+		0x033, 0x00000026,
+		0x03F, 0x000001C0,
+		0x033, 0x00000025,
+		0x03F, 0x00000180,
+		0x033, 0x00000024,
+		0x03F, 0x00000040,
 	0x93000002,	0x00000000,	0x40000000,	0x00000000,
-		0x0EE, 0x00000010,
-		0x033, 0x00000001,
-		0x03F, 0x0000002A,
-		0x033, 0x00000001,
-		0x03F, 0x0000002A,
-		0x033, 0x00000002,
-		0x03F, 0x0000002A,
-		0x0EE, 0x00000000,
+		0x033, 0x0000002F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000002E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000002D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000002C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000002B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000002A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000029,
+		0x03F, 0x00000280,
+		0x033, 0x00000028,
+		0x03F, 0x000FF280,
+		0x033, 0x00000027,
+		0x03F, 0x00000200,
+		0x033, 0x00000026,
+		0x03F, 0x000001C0,
+		0x033, 0x00000025,
+		0x03F, 0x00000180,
+		0x033, 0x00000024,
+		0x03F, 0x00000040,
 	0x93000003,	0x00000000,	0x40000000,	0x00000000,
-		0x0EE, 0x00000010,
-		0x033, 0x00000001,
-		0x03F, 0x0000002A,
-		0x033, 0x00000001,
-		0x03F, 0x0000002A,
-		0x033, 0x00000002,
-		0x03F, 0x0000002A,
-		0x0EE, 0x00000000,
+		0x033, 0x0000002F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000002E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000002D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000002C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000002B,
+		0x03F, 0x00000287,
+		0x033, 0x0000002A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000029,
+		0x03F, 0x00000207,
+		0x033, 0x00000028,
+		0x03F, 0x000FF280,
+		0x033, 0x00000027,
+		0x03F, 0x00000200,
+		0x033, 0x00000026,
+		0x03F, 0x000001C0,
+		0x033, 0x00000025,
+		0x03F, 0x00000180,
+		0x033, 0x00000024,
+		0x03F, 0x00000040,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
-		0x0EE, 0x00000010,
-		0x033, 0x00000001,
-		0x03F, 0x0000002A,
-		0x033, 0x00000001,
-		0x03F, 0x0000002A,
-		0x033, 0x00000002,
-		0x03F, 0x0000002A,
-		0x0EE, 0x00000000,
+		0x033, 0x0000002F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000002E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000002D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000002C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000002B,
+		0x03F, 0x00000287,
+		0x033, 0x0000002A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000029,
+		0x03F, 0x00000207,
+		0x033, 0x00000028,
+		0x03F, 0x000FF280,
+		0x033, 0x00000027,
+		0x03F, 0x00000200,
+		0x033, 0x00000026,
+		0x03F, 0x000001C0,
+		0x033, 0x00000025,
+		0x03F, 0x00000180,
+		0x033, 0x00000024,
+		0x03F, 0x00000040,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000002F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000002E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000002D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000002C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000002B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000002A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000029,
+		0x03F, 0x00000280,
+		0x033, 0x00000028,
+		0x03F, 0x000FF280,
+		0x033, 0x00000027,
+		0x03F, 0x00000200,
+		0x033, 0x00000026,
+		0x03F, 0x000001C0,
+		0x033, 0x00000025,
+		0x03F, 0x00000180,
+		0x033, 0x00000024,
+		0x03F, 0x00000040,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000002F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000002E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000002D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000002C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000002B,
+		0x03F, 0x00000287,
+		0x033, 0x0000002A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000029,
+		0x03F, 0x00000207,
+		0x033, 0x00000028,
+		0x03F, 0x000FF280,
+		0x033, 0x00000027,
+		0x03F, 0x00000200,
+		0x033, 0x00000026,
+		0x03F, 0x000001C0,
+		0x033, 0x00000025,
+		0x03F, 0x00000180,
+		0x033, 0x00000024,
+		0x03F, 0x00000040,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000002F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000002E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000002D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000002C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000002B,
+		0x03F, 0x00000287,
+		0x033, 0x0000002A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000029,
+		0x03F, 0x00000207,
+		0x033, 0x00000028,
+		0x03F, 0x000FF280,
+		0x033, 0x00000027,
+		0x03F, 0x00000200,
+		0x033, 0x00000026,
+		0x03F, 0x000001C0,
+		0x033, 0x00000025,
+		0x03F, 0x00000180,
+		0x033, 0x00000024,
+		0x03F, 0x00000040,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000002F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000002E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000002D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000002C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000002B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000002A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000029,
+		0x03F, 0x00000280,
+		0x033, 0x00000028,
+		0x03F, 0x000FF280,
+		0x033, 0x00000027,
+		0x03F, 0x00000200,
+		0x033, 0x00000026,
+		0x03F, 0x000001C0,
+		0x033, 0x00000025,
+		0x03F, 0x00000180,
+		0x033, 0x00000024,
+		0x03F, 0x00000040,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000002F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000002E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000002D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000002C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000002B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000002A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000029,
+		0x03F, 0x00000280,
+		0x033, 0x00000028,
+		0x03F, 0x000FF280,
+		0x033, 0x00000027,
+		0x03F, 0x00000200,
+		0x033, 0x00000026,
+		0x03F, 0x000001C0,
+		0x033, 0x00000025,
+		0x03F, 0x00000180,
+		0x033, 0x00000024,
+		0x03F, 0x00000040,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000002F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000002E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000002D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000002C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000002B,
+		0x03F, 0x00000287,
+		0x033, 0x0000002A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000029,
+		0x03F, 0x00000207,
+		0x033, 0x00000028,
+		0x03F, 0x000FF280,
+		0x033, 0x00000027,
+		0x03F, 0x00000200,
+		0x033, 0x00000026,
+		0x03F, 0x000001C0,
+		0x033, 0x00000025,
+		0x03F, 0x00000180,
+		0x033, 0x00000024,
+		0x03F, 0x00000040,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000002F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000002E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000002D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000002C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000002B,
+		0x03F, 0x00000287,
+		0x033, 0x0000002A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000029,
+		0x03F, 0x00000207,
+		0x033, 0x00000028,
+		0x03F, 0x000FF280,
+		0x033, 0x00000027,
+		0x03F, 0x00000200,
+		0x033, 0x00000026,
+		0x03F, 0x000001C0,
+		0x033, 0x00000025,
+		0x03F, 0x00000180,
+		0x033, 0x00000024,
+		0x03F, 0x00000040,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000002F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000002E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000002D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000002C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000002B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000002A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000029,
+		0x03F, 0x00000280,
+		0x033, 0x00000028,
+		0x03F, 0x000FF280,
+		0x033, 0x00000027,
+		0x03F, 0x00000200,
+		0x033, 0x00000026,
+		0x03F, 0x000001C0,
+		0x033, 0x00000025,
+		0x03F, 0x00000180,
+		0x033, 0x00000024,
+		0x03F, 0x00000040,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000002F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000002E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000002D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000002C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000002B,
+		0x03F, 0x00000287,
+		0x033, 0x0000002A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000029,
+		0x03F, 0x00000207,
+		0x033, 0x00000028,
+		0x03F, 0x000FF280,
+		0x033, 0x00000027,
+		0x03F, 0x00000200,
+		0x033, 0x00000026,
+		0x03F, 0x000001C0,
+		0x033, 0x00000025,
+		0x03F, 0x00000180,
+		0x033, 0x00000024,
+		0x03F, 0x00000040,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000002F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000002E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000002D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000002C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000002B,
+		0x03F, 0x00000287,
+		0x033, 0x0000002A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000029,
+		0x03F, 0x00000207,
+		0x033, 0x00000028,
+		0x03F, 0x000FF280,
+		0x033, 0x00000027,
+		0x03F, 0x00000200,
+		0x033, 0x00000026,
+		0x03F, 0x000001C0,
+		0x033, 0x00000025,
+		0x03F, 0x00000180,
+		0x033, 0x00000024,
+		0x03F, 0x00000040,
 	0xA0000000,	0x00000000,
-		0x0EE, 0x00000010,
-		0x033, 0x00000001,
-		0x03F, 0x0000003F,
-		0x033, 0x00000001,
-		0x03F, 0x0000003F,
-		0x033, 0x00000002,
-		0x03F, 0x0000003F,
-		0x0EE, 0x00000000,
+		0x033, 0x0000002F,
+		0x03F, 0x000773E8,
+		0x033, 0x0000002E,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000002D,
+		0x03F, 0x00000380,
+		0x033, 0x0000002C,
+		0x03F, 0x000FF380,
+		0x033, 0x0000002B,
+		0x03F, 0x00000300,
+		0x033, 0x0000002A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000029,
+		0x03F, 0x00000280,
+		0x033, 0x00000028,
+		0x03F, 0x000FF280,
+		0x033, 0x00000027,
+		0x03F, 0x00000200,
+		0x033, 0x00000026,
+		0x03F, 0x000001C0,
+		0x033, 0x00000025,
+		0x03F, 0x00000180,
+		0x033, 0x00000024,
+		0x03F, 0x00000040,
 	0xB0000000,	0x00000000,
+		0x033, 0x00000023,
+		0x03F, 0x00000000,
 	0x81000001,	0x00000000,	0x40000000,	0x00000000,
-		0x0EF, 0x00010000,
-		0x033, 0x0000000F,
+		0x033, 0x0000003F,
 		0x03F, 0x000773C0,
-		0x033, 0x0000000E,
+		0x033, 0x0000003E,
 		0x03F, 0x000FF3C0,
-		0x033, 0x0000000D,
+		0x033, 0x0000003D,
 		0x03F, 0x000773E8,
-		0x033, 0x0000000C,
+		0x033, 0x0000003C,
 		0x03F, 0x000FF3E8,
-		0x033, 0x0000000B,
+		0x033, 0x0000003B,
 		0x03F, 0x000FF3A0,
-		0x033, 0x0000000A,
+		0x033, 0x0000003A,
 		0x03F, 0x000002A8,
-		0x033, 0x00000009,
+		0x033, 0x00000039,
 		0x03F, 0x00000280,
-		0x033, 0x00000008,
+		0x033, 0x00000038,
 		0x03F, 0x000FF280,
-		0x033, 0x00000007,
+		0x033, 0x00000037,
 		0x03F, 0x00000200,
-		0x033, 0x00000006,
+		0x033, 0x00000036,
 		0x03F, 0x000001C0,
-		0x033, 0x00000005,
+		0x033, 0x00000035,
 		0x03F, 0x00000180,
-		0x033, 0x00000004,
+		0x033, 0x00000034,
 		0x03F, 0x00000040,
 	0x91000002,	0x00000000,	0x40000000,	0x00000000,
-		0x0EF, 0x00010000,
-		0x033, 0x0000000F,
+		0x033, 0x0000003F,
 		0x03F, 0x000773C0,
-		0x033, 0x0000000E,
+		0x033, 0x0000003E,
 		0x03F, 0x000FF3C0,
-		0x033, 0x0000000D,
+		0x033, 0x0000003D,
 		0x03F, 0x000773E8,
-		0x033, 0x0000000C,
+		0x033, 0x0000003C,
 		0x03F, 0x000FF3E8,
-		0x033, 0x0000000B,
+		0x033, 0x0000003B,
 		0x03F, 0x000FF3A0,
-		0x033, 0x0000000A,
+		0x033, 0x0000003A,
 		0x03F, 0x000002A8,
-		0x033, 0x00000009,
+		0x033, 0x00000039,
 		0x03F, 0x00000280,
-		0x033, 0x00000008,
+		0x033, 0x00000038,
 		0x03F, 0x000FF280,
-		0x033, 0x00000007,
+		0x033, 0x00000037,
 		0x03F, 0x00000200,
-		0x033, 0x00000006,
+		0x033, 0x00000036,
 		0x03F, 0x000001C0,
-		0x033, 0x00000005,
+		0x033, 0x00000035,
 		0x03F, 0x00000180,
-		0x033, 0x00000004,
+		0x033, 0x00000034,
 		0x03F, 0x00000040,
 	0x92000001,	0x00000000,	0x40000000,	0x00000000,
-		0x0EF, 0x00010000,
-		0x033, 0x0000000F,
+		0x033, 0x0000003F,
 		0x03F, 0x000773C0,
-		0x033, 0x0000000E,
+		0x033, 0x0000003E,
 		0x03F, 0x000FF3C0,
-		0x033, 0x0000000D,
+		0x033, 0x0000003D,
 		0x03F, 0x000773E8,
-		0x033, 0x0000000C,
+		0x033, 0x0000003C,
 		0x03F, 0x000FF3E8,
-		0x033, 0x0000000B,
+		0x033, 0x0000003B,
 		0x03F, 0x000FF3A0,
-		0x033, 0x0000000A,
+		0x033, 0x0000003A,
 		0x03F, 0x000002A8,
-		0x033, 0x00000009,
+		0x033, 0x00000039,
 		0x03F, 0x00000280,
-		0x033, 0x00000008,
+		0x033, 0x00000038,
 		0x03F, 0x000FF280,
-		0x033, 0x00000007,
+		0x033, 0x00000037,
 		0x03F, 0x00000200,
-		0x033, 0x00000006,
+		0x033, 0x00000036,
 		0x03F, 0x000001C0,
-		0x033, 0x00000005,
+		0x033, 0x00000035,
 		0x03F, 0x00000180,
-		0x033, 0x00000004,
+		0x033, 0x00000034,
 		0x03F, 0x00000040,
 	0x92000002,	0x00000000,	0x40000000,	0x00000000,
-		0x0EF, 0x00010000,
-		0x033, 0x0000000F,
+		0x033, 0x0000003F,
 		0x03F, 0x000773C0,
-		0x033, 0x0000000E,
+		0x033, 0x0000003E,
 		0x03F, 0x000FF3C0,
-		0x033, 0x0000000D,
+		0x033, 0x0000003D,
 		0x03F, 0x000773E8,
-		0x033, 0x0000000C,
+		0x033, 0x0000003C,
 		0x03F, 0x000FF3E8,
-		0x033, 0x0000000B,
+		0x033, 0x0000003B,
 		0x03F, 0x000FF3A0,
-		0x033, 0x0000000A,
+		0x033, 0x0000003A,
 		0x03F, 0x000002A8,
-		0x033, 0x00000009,
+		0x033, 0x00000039,
 		0x03F, 0x00000280,
-		0x033, 0x00000008,
+		0x033, 0x00000038,
 		0x03F, 0x000FF280,
-		0x033, 0x00000007,
+		0x033, 0x00000037,
 		0x03F, 0x00000200,
-		0x033, 0x00000006,
+		0x033, 0x00000036,
 		0x03F, 0x000001C0,
-		0x033, 0x00000005,
+		0x033, 0x00000035,
 		0x03F, 0x00000180,
-		0x033, 0x00000004,
+		0x033, 0x00000034,
 		0x03F, 0x00000040,
 	0x93000001,	0x00000000,	0x40000000,	0x00000000,
-		0x0EF, 0x00010000,
-		0x033, 0x0000000F,
+		0x033, 0x0000003F,
 		0x03F, 0x000773C0,
-		0x033, 0x0000000E,
+		0x033, 0x0000003E,
 		0x03F, 0x000FF3C0,
-		0x033, 0x0000000D,
+		0x033, 0x0000003D,
 		0x03F, 0x000773E8,
-		0x033, 0x0000000C,
+		0x033, 0x0000003C,
 		0x03F, 0x000FF3E8,
-		0x033, 0x0000000B,
+		0x033, 0x0000003B,
 		0x03F, 0x000FF3A0,
-		0x033, 0x0000000A,
+		0x033, 0x0000003A,
 		0x03F, 0x000002A8,
-		0x033, 0x00000009,
+		0x033, 0x00000039,
 		0x03F, 0x00000280,
-		0x033, 0x00000008,
+		0x033, 0x00000038,
 		0x03F, 0x000FF280,
-		0x033, 0x00000007,
+		0x033, 0x00000037,
 		0x03F, 0x00000200,
-		0x033, 0x00000006,
+		0x033, 0x00000036,
 		0x03F, 0x000001C0,
-		0x033, 0x00000005,
+		0x033, 0x00000035,
 		0x03F, 0x00000180,
-		0x033, 0x00000004,
+		0x033, 0x00000034,
 		0x03F, 0x00000040,
 	0x93000002,	0x00000000,	0x40000000,	0x00000000,
-		0x0EF, 0x00010000,
-		0x033, 0x0000000F,
+		0x033, 0x0000003F,
 		0x03F, 0x000773C0,
-		0x033, 0x0000000E,
+		0x033, 0x0000003E,
 		0x03F, 0x000FF3C0,
-		0x033, 0x0000000D,
+		0x033, 0x0000003D,
 		0x03F, 0x000773E8,
-		0x033, 0x0000000C,
+		0x033, 0x0000003C,
 		0x03F, 0x000FF3E8,
-		0x033, 0x0000000B,
+		0x033, 0x0000003B,
 		0x03F, 0x000FF3A0,
-		0x033, 0x0000000A,
+		0x033, 0x0000003A,
 		0x03F, 0x000002A8,
-		0x033, 0x00000009,
+		0x033, 0x00000039,
 		0x03F, 0x00000280,
-		0x033, 0x00000008,
+		0x033, 0x00000038,
 		0x03F, 0x000FF280,
-		0x033, 0x00000007,
+		0x033, 0x00000037,
 		0x03F, 0x00000200,
-		0x033, 0x00000006,
+		0x033, 0x00000036,
 		0x03F, 0x000001C0,
-		0x033, 0x00000005,
+		0x033, 0x00000035,
 		0x03F, 0x00000180,
-		0x033, 0x00000004,
+		0x033, 0x00000034,
 		0x03F, 0x00000040,
 	0x93000003,	0x00000000,	0x40000000,	0x00000000,
-		0x0EF, 0x00010000,
-		0x033, 0x0000000F,
+		0x033, 0x0000003F,
 		0x03F, 0x000773C0,
-		0x033, 0x0000000E,
+		0x033, 0x0000003E,
 		0x03F, 0x000FF3C0,
-		0x033, 0x0000000D,
+		0x033, 0x0000003D,
 		0x03F, 0x000773E8,
-		0x033, 0x0000000C,
+		0x033, 0x0000003C,
 		0x03F, 0x000FF3E8,
-		0x033, 0x0000000B,
+		0x033, 0x0000003B,
 		0x03F, 0x00000287,
-		0x033, 0x0000000A,
+		0x033, 0x0000003A,
 		0x03F, 0x000002A8,
-		0x033, 0x00000009,
+		0x033, 0x00000039,
 		0x03F, 0x00000207,
-		0x033, 0x00000008,
+		0x033, 0x00000038,
 		0x03F, 0x000FF280,
-		0x033, 0x00000007,
+		0x033, 0x00000037,
 		0x03F, 0x00000200,
-		0x033, 0x00000006,
+		0x033, 0x00000036,
 		0x03F, 0x000001C0,
-		0x033, 0x00000005,
+		0x033, 0x00000035,
 		0x03F, 0x00000180,
-		0x033, 0x00000004,
+		0x033, 0x00000034,
 		0x03F, 0x00000040,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
-		0x0EF, 0x00010000,
-		0x033, 0x0000000F,
+		0x033, 0x0000003F,
 		0x03F, 0x000773C0,
-		0x033, 0x0000000E,
+		0x033, 0x0000003E,
 		0x03F, 0x000FF3C0,
-		0x033, 0x0000000D,
+		0x033, 0x0000003D,
 		0x03F, 0x000773E8,
-		0x033, 0x0000000C,
+		0x033, 0x0000003C,
 		0x03F, 0x000FF3E8,
-		0x033, 0x0000000B,
+		0x033, 0x0000003B,
 		0x03F, 0x00000287,
-		0x033, 0x0000000A,
+		0x033, 0x0000003A,
 		0x03F, 0x000002A8,
-		0x033, 0x00000009,
+		0x033, 0x00000039,
 		0x03F, 0x00000207,
-		0x033, 0x00000008,
+		0x033, 0x00000038,
 		0x03F, 0x000FF280,
-		0x033, 0x00000007,
+		0x033, 0x00000037,
 		0x03F, 0x00000200,
-		0x033, 0x00000006,
+		0x033, 0x00000036,
 		0x03F, 0x000001C0,
-		0x033, 0x00000005,
+		0x033, 0x00000035,
 		0x03F, 0x00000180,
-		0x033, 0x00000004,
+		0x033, 0x00000034,
 		0x03F, 0x00000040,
-	0xA0000000,	0x00000000,
-		0x0EF, 0x00010000,
-		0x033, 0x0000000F,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000003F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000003E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000003D,
 		0x03F, 0x000773E8,
-		0x033, 0x0000000E,
+		0x033, 0x0000003C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000003B,
 		0x03F, 0x000FF3A0,
-		0x033, 0x0000000D,
-		0x03F, 0x00000380,
-		0x033, 0x0000000C,
-		0x03F, 0x000FF380,
-		0x033, 0x0000000B,
-		0x03F, 0x00000300,
-		0x033, 0x0000000A,
+		0x033, 0x0000003A,
 		0x03F, 0x000002A8,
-		0x033, 0x00000009,
+		0x033, 0x00000039,
 		0x03F, 0x00000280,
-		0x033, 0x00000008,
+		0x033, 0x00000038,
 		0x03F, 0x000FF280,
-		0x033, 0x00000007,
+		0x033, 0x00000037,
 		0x03F, 0x00000200,
-		0x033, 0x00000006,
+		0x033, 0x00000036,
 		0x03F, 0x000001C0,
-		0x033, 0x00000005,
+		0x033, 0x00000035,
 		0x03F, 0x00000180,
-		0x033, 0x00000004,
+		0x033, 0x00000034,
 		0x03F, 0x00000040,
-	0xB0000000,	0x00000000,
-		0x033, 0x00000003,
-		0x03F, 0x00000000,
-	0x81000001,	0x00000000,	0x40000000,	0x00000000,
-		0x033, 0x0000001F,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000003F,
 		0x03F, 0x000773C0,
-		0x033, 0x0000001E,
+		0x033, 0x0000003E,
 		0x03F, 0x000FF3C0,
-		0x033, 0x0000001D,
+		0x033, 0x0000003D,
 		0x03F, 0x000773E8,
-		0x033, 0x0000001C,
+		0x033, 0x0000003C,
 		0x03F, 0x000FF3E8,
-		0x033, 0x0000001B,
-		0x03F, 0x000FF3A0,
-		0x033, 0x0000001A,
+		0x033, 0x0000003B,
+		0x03F, 0x00000287,
+		0x033, 0x0000003A,
 		0x03F, 0x000002A8,
-		0x033, 0x00000019,
-		0x03F, 0x00000280,
-		0x033, 0x00000018,
+		0x033, 0x00000039,
+		0x03F, 0x00000207,
+		0x033, 0x00000038,
 		0x03F, 0x000FF280,
-		0x033, 0x00000017,
+		0x033, 0x00000037,
 		0x03F, 0x00000200,
-		0x033, 0x00000016,
+		0x033, 0x00000036,
 		0x03F, 0x000001C0,
-		0x033, 0x00000015,
+		0x033, 0x00000035,
 		0x03F, 0x00000180,
-		0x033, 0x00000014,
+		0x033, 0x00000034,
 		0x03F, 0x00000040,
-	0x91000002,	0x00000000,	0x40000000,	0x00000000,
-		0x033, 0x0000001F,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000003F,
 		0x03F, 0x000773C0,
-		0x033, 0x0000001E,
+		0x033, 0x0000003E,
 		0x03F, 0x000FF3C0,
-		0x033, 0x0000001D,
+		0x033, 0x0000003D,
 		0x03F, 0x000773E8,
-		0x033, 0x0000001C,
+		0x033, 0x0000003C,
 		0x03F, 0x000FF3E8,
-		0x033, 0x0000001B,
+		0x033, 0x0000003B,
+		0x03F, 0x00000287,
+		0x033, 0x0000003A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000039,
+		0x03F, 0x00000207,
+		0x033, 0x00000038,
+		0x03F, 0x000FF280,
+		0x033, 0x00000037,
+		0x03F, 0x00000200,
+		0x033, 0x00000036,
+		0x03F, 0x000001C0,
+		0x033, 0x00000035,
+		0x03F, 0x00000180,
+		0x033, 0x00000034,
+		0x03F, 0x00000040,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000003F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000003E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000003D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000003C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000003B,
 		0x03F, 0x000FF3A0,
-		0x033, 0x0000001A,
+		0x033, 0x0000003A,
 		0x03F, 0x000002A8,
-		0x033, 0x00000019,
+		0x033, 0x00000039,
 		0x03F, 0x00000280,
-		0x033, 0x00000018,
+		0x033, 0x00000038,
 		0x03F, 0x000FF280,
-		0x033, 0x00000017,
+		0x033, 0x00000037,
 		0x03F, 0x00000200,
-		0x033, 0x00000016,
+		0x033, 0x00000036,
 		0x03F, 0x000001C0,
-		0x033, 0x00000015,
+		0x033, 0x00000035,
 		0x03F, 0x00000180,
-		0x033, 0x00000014,
+		0x033, 0x00000034,
 		0x03F, 0x00000040,
-	0x92000001,	0x00000000,	0x40000000,	0x00000000,
-		0x033, 0x0000001F,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000003F,
 		0x03F, 0x000773C0,
-		0x033, 0x0000001E,
+		0x033, 0x0000003E,
 		0x03F, 0x000FF3C0,
-		0x033, 0x0000001D,
+		0x033, 0x0000003D,
 		0x03F, 0x000773E8,
-		0x033, 0x0000001C,
+		0x033, 0x0000003C,
 		0x03F, 0x000FF3E8,
-		0x033, 0x0000001B,
+		0x033, 0x0000003B,
 		0x03F, 0x000FF3A0,
-		0x033, 0x0000001A,
+		0x033, 0x0000003A,
 		0x03F, 0x000002A8,
-		0x033, 0x00000019,
+		0x033, 0x00000039,
 		0x03F, 0x00000280,
-		0x033, 0x00000018,
+		0x033, 0x00000038,
 		0x03F, 0x000FF280,
-		0x033, 0x00000017,
+		0x033, 0x00000037,
 		0x03F, 0x00000200,
-		0x033, 0x00000016,
+		0x033, 0x00000036,
 		0x03F, 0x000001C0,
-		0x033, 0x00000015,
+		0x033, 0x00000035,
 		0x03F, 0x00000180,
-		0x033, 0x00000014,
+		0x033, 0x00000034,
 		0x03F, 0x00000040,
-	0x92000002,	0x00000000,	0x40000000,	0x00000000,
-		0x033, 0x0000001F,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000003F,
 		0x03F, 0x000773C0,
-		0x033, 0x0000001E,
+		0x033, 0x0000003E,
 		0x03F, 0x000FF3C0,
-		0x033, 0x0000001D,
+		0x033, 0x0000003D,
 		0x03F, 0x000773E8,
-		0x033, 0x0000001C,
+		0x033, 0x0000003C,
 		0x03F, 0x000FF3E8,
-		0x033, 0x0000001B,
-		0x03F, 0x000FF3A0,
-		0x033, 0x0000001A,
+		0x033, 0x0000003B,
+		0x03F, 0x00000287,
+		0x033, 0x0000003A,
 		0x03F, 0x000002A8,
-		0x033, 0x00000019,
-		0x03F, 0x00000280,
-		0x033, 0x00000018,
+		0x033, 0x00000039,
+		0x03F, 0x00000207,
+		0x033, 0x00000038,
 		0x03F, 0x000FF280,
-		0x033, 0x00000017,
+		0x033, 0x00000037,
 		0x03F, 0x00000200,
-		0x033, 0x00000016,
+		0x033, 0x00000036,
 		0x03F, 0x000001C0,
-		0x033, 0x00000015,
+		0x033, 0x00000035,
 		0x03F, 0x00000180,
-		0x033, 0x00000014,
+		0x033, 0x00000034,
 		0x03F, 0x00000040,
-	0x93000001,	0x00000000,	0x40000000,	0x00000000,
-		0x033, 0x0000001F,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000003F,
 		0x03F, 0x000773C0,
-		0x033, 0x0000001E,
+		0x033, 0x0000003E,
 		0x03F, 0x000FF3C0,
-		0x033, 0x0000001D,
+		0x033, 0x0000003D,
 		0x03F, 0x000773E8,
-		0x033, 0x0000001C,
+		0x033, 0x0000003C,
 		0x03F, 0x000FF3E8,
-		0x033, 0x0000001B,
-		0x03F, 0x000FF3A0,
-		0x033, 0x0000001A,
+		0x033, 0x0000003B,
+		0x03F, 0x00000287,
+		0x033, 0x0000003A,
 		0x03F, 0x000002A8,
-		0x033, 0x00000019,
-		0x03F, 0x00000280,
-		0x033, 0x00000018,
+		0x033, 0x00000039,
+		0x03F, 0x00000207,
+		0x033, 0x00000038,
 		0x03F, 0x000FF280,
-		0x033, 0x00000017,
+		0x033, 0x00000037,
 		0x03F, 0x00000200,
-		0x033, 0x00000016,
+		0x033, 0x00000036,
 		0x03F, 0x000001C0,
-		0x033, 0x00000015,
+		0x033, 0x00000035,
 		0x03F, 0x00000180,
-		0x033, 0x00000014,
+		0x033, 0x00000034,
 		0x03F, 0x00000040,
-	0x93000002,	0x00000000,	0x40000000,	0x00000000,
-		0x033, 0x0000001F,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000003F,
 		0x03F, 0x000773C0,
-		0x033, 0x0000001E,
+		0x033, 0x0000003E,
 		0x03F, 0x000FF3C0,
-		0x033, 0x0000001D,
+		0x033, 0x0000003D,
 		0x03F, 0x000773E8,
-		0x033, 0x0000001C,
+		0x033, 0x0000003C,
 		0x03F, 0x000FF3E8,
-		0x033, 0x0000001B,
+		0x033, 0x0000003B,
 		0x03F, 0x000FF3A0,
-		0x033, 0x0000001A,
+		0x033, 0x0000003A,
 		0x03F, 0x000002A8,
-		0x033, 0x00000019,
+		0x033, 0x00000039,
 		0x03F, 0x00000280,
-		0x033, 0x00000018,
+		0x033, 0x00000038,
 		0x03F, 0x000FF280,
-		0x033, 0x00000017,
+		0x033, 0x00000037,
 		0x03F, 0x00000200,
-		0x033, 0x00000016,
+		0x033, 0x00000036,
 		0x03F, 0x000001C0,
-		0x033, 0x00000015,
+		0x033, 0x00000035,
 		0x03F, 0x00000180,
-		0x033, 0x00000014,
+		0x033, 0x00000034,
 		0x03F, 0x00000040,
-	0x93000003,	0x00000000,	0x40000000,	0x00000000,
-		0x033, 0x0000001F,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000003F,
 		0x03F, 0x000773C0,
-		0x033, 0x0000001E,
+		0x033, 0x0000003E,
 		0x03F, 0x000FF3C0,
-		0x033, 0x0000001D,
+		0x033, 0x0000003D,
 		0x03F, 0x000773E8,
-		0x033, 0x0000001C,
+		0x033, 0x0000003C,
 		0x03F, 0x000FF3E8,
-		0x033, 0x0000001B,
+		0x033, 0x0000003B,
 		0x03F, 0x00000287,
-		0x033, 0x0000001A,
+		0x033, 0x0000003A,
 		0x03F, 0x000002A8,
-		0x033, 0x00000019,
+		0x033, 0x00000039,
 		0x03F, 0x00000207,
-		0x033, 0x00000018,
+		0x033, 0x00000038,
 		0x03F, 0x000FF280,
-		0x033, 0x00000017,
+		0x033, 0x00000037,
 		0x03F, 0x00000200,
-		0x033, 0x00000016,
+		0x033, 0x00000036,
 		0x03F, 0x000001C0,
-		0x033, 0x00000015,
+		0x033, 0x00000035,
 		0x03F, 0x00000180,
-		0x033, 0x00000014,
+		0x033, 0x00000034,
 		0x03F, 0x00000040,
-	0x93000004,	0x00000000,	0x40000000,	0x00000000,
-		0x033, 0x0000001F,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000003F,
 		0x03F, 0x000773C0,
-		0x033, 0x0000001E,
+		0x033, 0x0000003E,
 		0x03F, 0x000FF3C0,
-		0x033, 0x0000001D,
+		0x033, 0x0000003D,
 		0x03F, 0x000773E8,
-		0x033, 0x0000001C,
+		0x033, 0x0000003C,
 		0x03F, 0x000FF3E8,
-		0x033, 0x0000001B,
+		0x033, 0x0000003B,
 		0x03F, 0x00000287,
-		0x033, 0x0000001A,
+		0x033, 0x0000003A,
 		0x03F, 0x000002A8,
-		0x033, 0x00000019,
+		0x033, 0x00000039,
 		0x03F, 0x00000207,
-		0x033, 0x00000018,
+		0x033, 0x00000038,
 		0x03F, 0x000FF280,
-		0x033, 0x00000017,
+		0x033, 0x00000037,
 		0x03F, 0x00000200,
-		0x033, 0x00000016,
+		0x033, 0x00000036,
 		0x03F, 0x000001C0,
-		0x033, 0x00000015,
+		0x033, 0x00000035,
 		0x03F, 0x00000180,
-		0x033, 0x00000014,
+		0x033, 0x00000034,
 		0x03F, 0x00000040,
 	0xA0000000,	0x00000000,
-		0x033, 0x0000001F,
+		0x033, 0x0000003F,
 		0x03F, 0x000773E8,
-		0x033, 0x0000001E,
+		0x033, 0x0000003E,
 		0x03F, 0x000FF3A0,
-		0x033, 0x0000001D,
+		0x033, 0x0000003D,
 		0x03F, 0x00000380,
-		0x033, 0x0000001C,
+		0x033, 0x0000003C,
 		0x03F, 0x000FF380,
-		0x033, 0x0000001B,
+		0x033, 0x0000003B,
 		0x03F, 0x00000300,
-		0x033, 0x0000001A,
+		0x033, 0x0000003A,
 		0x03F, 0x000002A8,
-		0x033, 0x00000019,
+		0x033, 0x00000039,
 		0x03F, 0x00000280,
-		0x033, 0x00000018,
+		0x033, 0x00000038,
 		0x03F, 0x000FF280,
-		0x033, 0x00000017,
+		0x033, 0x00000037,
 		0x03F, 0x00000200,
-		0x033, 0x00000016,
+		0x033, 0x00000036,
 		0x03F, 0x000001C0,
-		0x033, 0x00000015,
+		0x033, 0x00000035,
 		0x03F, 0x00000180,
-		0x033, 0x00000014,
+		0x033, 0x00000034,
 		0x03F, 0x00000040,
 	0xB0000000,	0x00000000,
-		0x033, 0x00000013,
+		0x033, 0x00000033,
 		0x03F, 0x00000000,
 	0x81000001,	0x00000000,	0x40000000,	0x00000000,
-		0x033, 0x0000002F,
+		0x033, 0x0000004F,
 		0x03F, 0x000773C0,
-		0x033, 0x0000002E,
+		0x033, 0x0000004E,
 		0x03F, 0x000FF3C0,
-		0x033, 0x0000002D,
+		0x033, 0x0000004D,
 		0x03F, 0x000773E8,
-		0x033, 0x0000002C,
+		0x033, 0x0000004C,
 		0x03F, 0x000FF3E8,
-		0x033, 0x0000002B,
+		0x033, 0x0000004B,
 		0x03F, 0x000FF3A0,
-		0x033, 0x0000002A,
+		0x033, 0x0000004A,
 		0x03F, 0x000002A8,
-		0x033, 0x00000029,
+		0x033, 0x00000049,
 		0x03F, 0x00000280,
-		0x033, 0x00000028,
+		0x033, 0x00000048,
 		0x03F, 0x000FF280,
-		0x033, 0x00000027,
+		0x033, 0x00000047,
 		0x03F, 0x00000200,
-		0x033, 0x00000026,
+		0x033, 0x00000046,
 		0x03F, 0x000001C0,
-		0x033, 0x00000025,
+		0x033, 0x00000045,
 		0x03F, 0x00000180,
-		0x033, 0x00000024,
+		0x033, 0x00000044,
 		0x03F, 0x00000040,
 	0x91000002,	0x00000000,	0x40000000,	0x00000000,
-		0x033, 0x0000002F,
+		0x033, 0x0000004F,
 		0x03F, 0x000773C0,
-		0x033, 0x0000002E,
+		0x033, 0x0000004E,
 		0x03F, 0x000FF3C0,
-		0x033, 0x0000002D,
+		0x033, 0x0000004D,
 		0x03F, 0x000773E8,
-		0x033, 0x0000002C,
+		0x033, 0x0000004C,
 		0x03F, 0x000FF3E8,
-		0x033, 0x0000002B,
+		0x033, 0x0000004B,
 		0x03F, 0x000FF3A0,
-		0x033, 0x0000002A,
+		0x033, 0x0000004A,
 		0x03F, 0x000002A8,
-		0x033, 0x00000029,
+		0x033, 0x00000049,
 		0x03F, 0x00000280,
-		0x033, 0x00000028,
+		0x033, 0x00000048,
 		0x03F, 0x000FF280,
-		0x033, 0x00000027,
+		0x033, 0x00000047,
 		0x03F, 0x00000200,
-		0x033, 0x00000026,
+		0x033, 0x00000046,
 		0x03F, 0x000001C0,
-		0x033, 0x00000025,
+		0x033, 0x00000045,
 		0x03F, 0x00000180,
-		0x033, 0x00000024,
+		0x033, 0x00000044,
 		0x03F, 0x00000040,
 	0x92000001,	0x00000000,	0x40000000,	0x00000000,
-		0x033, 0x0000002F,
+		0x033, 0x0000004F,
 		0x03F, 0x000773C0,
-		0x033, 0x0000002E,
+		0x033, 0x0000004E,
 		0x03F, 0x000FF3C0,
-		0x033, 0x0000002D,
+		0x033, 0x0000004D,
 		0x03F, 0x000773E8,
-		0x033, 0x0000002C,
+		0x033, 0x0000004C,
 		0x03F, 0x000FF3E8,
-		0x033, 0x0000002B,
+		0x033, 0x0000004B,
 		0x03F, 0x000FF3A0,
-		0x033, 0x0000002A,
+		0x033, 0x0000004A,
 		0x03F, 0x000002A8,
-		0x033, 0x00000029,
+		0x033, 0x00000049,
 		0x03F, 0x00000280,
-		0x033, 0x00000028,
+		0x033, 0x00000048,
 		0x03F, 0x000FF280,
-		0x033, 0x00000027,
+		0x033, 0x00000047,
 		0x03F, 0x00000200,
-		0x033, 0x00000026,
+		0x033, 0x00000046,
 		0x03F, 0x000001C0,
-		0x033, 0x00000025,
+		0x033, 0x00000045,
 		0x03F, 0x00000180,
-		0x033, 0x00000024,
+		0x033, 0x00000044,
 		0x03F, 0x00000040,
 	0x92000002,	0x00000000,	0x40000000,	0x00000000,
-		0x033, 0x0000002F,
+		0x033, 0x0000004F,
 		0x03F, 0x000773C0,
-		0x033, 0x0000002E,
+		0x033, 0x0000004E,
 		0x03F, 0x000FF3C0,
-		0x033, 0x0000002D,
+		0x033, 0x0000004D,
 		0x03F, 0x000773E8,
-		0x033, 0x0000002C,
+		0x033, 0x0000004C,
 		0x03F, 0x000FF3E8,
-		0x033, 0x0000002B,
+		0x033, 0x0000004B,
 		0x03F, 0x000FF3A0,
-		0x033, 0x0000002A,
+		0x033, 0x0000004A,
 		0x03F, 0x000002A8,
-		0x033, 0x00000029,
+		0x033, 0x00000049,
 		0x03F, 0x00000280,
-		0x033, 0x00000028,
+		0x033, 0x00000048,
 		0x03F, 0x000FF280,
-		0x033, 0x00000027,
+		0x033, 0x00000047,
 		0x03F, 0x00000200,
-		0x033, 0x00000026,
+		0x033, 0x00000046,
 		0x03F, 0x000001C0,
-		0x033, 0x00000025,
+		0x033, 0x00000045,
 		0x03F, 0x00000180,
-		0x033, 0x00000024,
+		0x033, 0x00000044,
 		0x03F, 0x00000040,
 	0x93000001,	0x00000000,	0x40000000,	0x00000000,
-		0x033, 0x0000002F,
+		0x033, 0x0000004F,
 		0x03F, 0x000773C0,
-		0x033, 0x0000002E,
+		0x033, 0x0000004E,
 		0x03F, 0x000FF3C0,
-		0x033, 0x0000002D,
+		0x033, 0x0000004D,
 		0x03F, 0x000773E8,
-		0x033, 0x0000002C,
+		0x033, 0x0000004C,
 		0x03F, 0x000FF3E8,
-		0x033, 0x0000002B,
+		0x033, 0x0000004B,
 		0x03F, 0x000FF3A0,
-		0x033, 0x0000002A,
+		0x033, 0x0000004A,
 		0x03F, 0x000002A8,
-		0x033, 0x00000029,
+		0x033, 0x00000049,
 		0x03F, 0x00000280,
-		0x033, 0x00000028,
+		0x033, 0x00000048,
 		0x03F, 0x000FF280,
-		0x033, 0x00000027,
+		0x033, 0x00000047,
 		0x03F, 0x00000200,
-		0x033, 0x00000026,
+		0x033, 0x00000046,
 		0x03F, 0x000001C0,
-		0x033, 0x00000025,
+		0x033, 0x00000045,
 		0x03F, 0x00000180,
-		0x033, 0x00000024,
+		0x033, 0x00000044,
 		0x03F, 0x00000040,
 	0x93000002,	0x00000000,	0x40000000,	0x00000000,
-		0x033, 0x0000002F,
+		0x033, 0x0000004F,
 		0x03F, 0x000773C0,
-		0x033, 0x0000002E,
+		0x033, 0x0000004E,
 		0x03F, 0x000FF3C0,
-		0x033, 0x0000002D,
+		0x033, 0x0000004D,
 		0x03F, 0x000773E8,
-		0x033, 0x0000002C,
+		0x033, 0x0000004C,
 		0x03F, 0x000FF3E8,
-		0x033, 0x0000002B,
+		0x033, 0x0000004B,
 		0x03F, 0x000FF3A0,
-		0x033, 0x0000002A,
+		0x033, 0x0000004A,
 		0x03F, 0x000002A8,
-		0x033, 0x00000029,
+		0x033, 0x00000049,
 		0x03F, 0x00000280,
-		0x033, 0x00000028,
+		0x033, 0x00000048,
 		0x03F, 0x000FF280,
-		0x033, 0x00000027,
+		0x033, 0x00000047,
 		0x03F, 0x00000200,
-		0x033, 0x00000026,
+		0x033, 0x00000046,
 		0x03F, 0x000001C0,
-		0x033, 0x00000025,
+		0x033, 0x00000045,
 		0x03F, 0x00000180,
-		0x033, 0x00000024,
+		0x033, 0x00000044,
 		0x03F, 0x00000040,
 	0x93000003,	0x00000000,	0x40000000,	0x00000000,
-		0x033, 0x0000002F,
+		0x033, 0x0000004F,
 		0x03F, 0x000773C0,
-		0x033, 0x0000002E,
+		0x033, 0x0000004E,
 		0x03F, 0x000FF3C0,
-		0x033, 0x0000002D,
+		0x033, 0x0000004D,
 		0x03F, 0x000773E8,
-		0x033, 0x0000002C,
+		0x033, 0x0000004C,
 		0x03F, 0x000FF3E8,
-		0x033, 0x0000002B,
+		0x033, 0x0000004B,
 		0x03F, 0x00000287,
-		0x033, 0x0000002A,
+		0x033, 0x0000004A,
 		0x03F, 0x000002A8,
-		0x033, 0x00000029,
+		0x033, 0x00000049,
 		0x03F, 0x00000207,
-		0x033, 0x00000028,
+		0x033, 0x00000048,
 		0x03F, 0x000FF280,
-		0x033, 0x00000027,
+		0x033, 0x00000047,
 		0x03F, 0x00000200,
-		0x033, 0x00000026,
+		0x033, 0x00000046,
 		0x03F, 0x000001C0,
-		0x033, 0x00000025,
+		0x033, 0x00000045,
 		0x03F, 0x00000180,
-		0x033, 0x00000024,
+		0x033, 0x00000044,
 		0x03F, 0x00000040,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
-		0x033, 0x0000002F,
+		0x033, 0x0000004F,
 		0x03F, 0x000773C0,
-		0x033, 0x0000002E,
+		0x033, 0x0000004E,
 		0x03F, 0x000FF3C0,
-		0x033, 0x0000002D,
+		0x033, 0x0000004D,
 		0x03F, 0x000773E8,
-		0x033, 0x0000002C,
+		0x033, 0x0000004C,
 		0x03F, 0x000FF3E8,
-		0x033, 0x0000002B,
+		0x033, 0x0000004B,
 		0x03F, 0x00000287,
-		0x033, 0x0000002A,
+		0x033, 0x0000004A,
 		0x03F, 0x000002A8,
-		0x033, 0x00000029,
+		0x033, 0x00000049,
 		0x03F, 0x00000207,
-		0x033, 0x00000028,
-		0x03F, 0x000FF280,
-		0x033, 0x00000027,
-		0x03F, 0x00000200,
-		0x033, 0x00000026,
-		0x03F, 0x000001C0,
-		0x033, 0x00000025,
-		0x03F, 0x00000180,
-		0x033, 0x00000024,
-		0x03F, 0x00000040,
-	0xA0000000,	0x00000000,
-		0x033, 0x0000002F,
-		0x03F, 0x000773E8,
-		0x033, 0x0000002E,
-		0x03F, 0x000FF3A0,
-		0x033, 0x0000002D,
-		0x03F, 0x00000380,
-		0x033, 0x0000002C,
-		0x03F, 0x000FF380,
-		0x033, 0x0000002B,
-		0x03F, 0x00000300,
-		0x033, 0x0000002A,
-		0x03F, 0x000002A8,
-		0x033, 0x00000029,
-		0x03F, 0x00000280,
-		0x033, 0x00000028,
-		0x03F, 0x000FF280,
-		0x033, 0x00000027,
-		0x03F, 0x00000200,
-		0x033, 0x00000026,
-		0x03F, 0x000001C0,
-		0x033, 0x00000025,
-		0x03F, 0x00000180,
-		0x033, 0x00000024,
-		0x03F, 0x00000040,
-	0xB0000000,	0x00000000,
-		0x033, 0x00000023,
-		0x03F, 0x00000000,
-	0x81000001,	0x00000000,	0x40000000,	0x00000000,
-		0x033, 0x0000003F,
-		0x03F, 0x000773C0,
-		0x033, 0x0000003E,
-		0x03F, 0x000FF3C0,
-		0x033, 0x0000003D,
-		0x03F, 0x000773E8,
-		0x033, 0x0000003C,
-		0x03F, 0x000FF3E8,
-		0x033, 0x0000003B,
-		0x03F, 0x000FF3A0,
-		0x033, 0x0000003A,
-		0x03F, 0x000002A8,
-		0x033, 0x00000039,
-		0x03F, 0x00000280,
-		0x033, 0x00000038,
+		0x033, 0x00000048,
 		0x03F, 0x000FF280,
-		0x033, 0x00000037,
+		0x033, 0x00000047,
 		0x03F, 0x00000200,
-		0x033, 0x00000036,
+		0x033, 0x00000046,
 		0x03F, 0x000001C0,
-		0x033, 0x00000035,
+		0x033, 0x00000045,
 		0x03F, 0x00000180,
-		0x033, 0x00000034,
+		0x033, 0x00000044,
 		0x03F, 0x00000040,
-	0x91000002,	0x00000000,	0x40000000,	0x00000000,
-		0x033, 0x0000003F,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000004F,
 		0x03F, 0x000773C0,
-		0x033, 0x0000003E,
+		0x033, 0x0000004E,
 		0x03F, 0x000FF3C0,
-		0x033, 0x0000003D,
+		0x033, 0x0000004D,
 		0x03F, 0x000773E8,
-		0x033, 0x0000003C,
+		0x033, 0x0000004C,
 		0x03F, 0x000FF3E8,
-		0x033, 0x0000003B,
+		0x033, 0x0000004B,
 		0x03F, 0x000FF3A0,
-		0x033, 0x0000003A,
+		0x033, 0x0000004A,
 		0x03F, 0x000002A8,
-		0x033, 0x00000039,
+		0x033, 0x00000049,
 		0x03F, 0x00000280,
-		0x033, 0x00000038,
+		0x033, 0x00000048,
 		0x03F, 0x000FF280,
-		0x033, 0x00000037,
+		0x033, 0x00000047,
 		0x03F, 0x00000200,
-		0x033, 0x00000036,
+		0x033, 0x00000046,
 		0x03F, 0x000001C0,
-		0x033, 0x00000035,
+		0x033, 0x00000045,
 		0x03F, 0x00000180,
-		0x033, 0x00000034,
+		0x033, 0x00000044,
 		0x03F, 0x00000040,
-	0x92000001,	0x00000000,	0x40000000,	0x00000000,
-		0x033, 0x0000003F,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000004F,
 		0x03F, 0x000773C0,
-		0x033, 0x0000003E,
+		0x033, 0x0000004E,
 		0x03F, 0x000FF3C0,
-		0x033, 0x0000003D,
+		0x033, 0x0000004D,
 		0x03F, 0x000773E8,
-		0x033, 0x0000003C,
+		0x033, 0x0000004C,
 		0x03F, 0x000FF3E8,
-		0x033, 0x0000003B,
-		0x03F, 0x000FF3A0,
-		0x033, 0x0000003A,
+		0x033, 0x0000004B,
+		0x03F, 0x00000287,
+		0x033, 0x0000004A,
 		0x03F, 0x000002A8,
-		0x033, 0x00000039,
-		0x03F, 0x00000280,
-		0x033, 0x00000038,
+		0x033, 0x00000049,
+		0x03F, 0x00000207,
+		0x033, 0x00000048,
 		0x03F, 0x000FF280,
-		0x033, 0x00000037,
+		0x033, 0x00000047,
 		0x03F, 0x00000200,
-		0x033, 0x00000036,
+		0x033, 0x00000046,
 		0x03F, 0x000001C0,
-		0x033, 0x00000035,
+		0x033, 0x00000045,
 		0x03F, 0x00000180,
-		0x033, 0x00000034,
+		0x033, 0x00000044,
 		0x03F, 0x00000040,
-	0x92000002,	0x00000000,	0x40000000,	0x00000000,
-		0x033, 0x0000003F,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000004F,
 		0x03F, 0x000773C0,
-		0x033, 0x0000003E,
+		0x033, 0x0000004E,
 		0x03F, 0x000FF3C0,
-		0x033, 0x0000003D,
+		0x033, 0x0000004D,
 		0x03F, 0x000773E8,
-		0x033, 0x0000003C,
+		0x033, 0x0000004C,
 		0x03F, 0x000FF3E8,
-		0x033, 0x0000003B,
-		0x03F, 0x000FF3A0,
-		0x033, 0x0000003A,
+		0x033, 0x0000004B,
+		0x03F, 0x00000287,
+		0x033, 0x0000004A,
 		0x03F, 0x000002A8,
-		0x033, 0x00000039,
-		0x03F, 0x00000280,
-		0x033, 0x00000038,
+		0x033, 0x00000049,
+		0x03F, 0x00000207,
+		0x033, 0x00000048,
 		0x03F, 0x000FF280,
-		0x033, 0x00000037,
+		0x033, 0x00000047,
 		0x03F, 0x00000200,
-		0x033, 0x00000036,
+		0x033, 0x00000046,
 		0x03F, 0x000001C0,
-		0x033, 0x00000035,
+		0x033, 0x00000045,
 		0x03F, 0x00000180,
-		0x033, 0x00000034,
+		0x033, 0x00000044,
 		0x03F, 0x00000040,
-	0x93000001,	0x00000000,	0x40000000,	0x00000000,
-		0x033, 0x0000003F,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000004F,
 		0x03F, 0x000773C0,
-		0x033, 0x0000003E,
+		0x033, 0x0000004E,
 		0x03F, 0x000FF3C0,
-		0x033, 0x0000003D,
+		0x033, 0x0000004D,
 		0x03F, 0x000773E8,
-		0x033, 0x0000003C,
+		0x033, 0x0000004C,
 		0x03F, 0x000FF3E8,
-		0x033, 0x0000003B,
+		0x033, 0x0000004B,
 		0x03F, 0x000FF3A0,
-		0x033, 0x0000003A,
+		0x033, 0x0000004A,
 		0x03F, 0x000002A8,
-		0x033, 0x00000039,
+		0x033, 0x00000049,
 		0x03F, 0x00000280,
-		0x033, 0x00000038,
+		0x033, 0x00000048,
 		0x03F, 0x000FF280,
-		0x033, 0x00000037,
+		0x033, 0x00000047,
 		0x03F, 0x00000200,
-		0x033, 0x00000036,
+		0x033, 0x00000046,
 		0x03F, 0x000001C0,
-		0x033, 0x00000035,
+		0x033, 0x00000045,
 		0x03F, 0x00000180,
-		0x033, 0x00000034,
+		0x033, 0x00000044,
 		0x03F, 0x00000040,
-	0x93000002,	0x00000000,	0x40000000,	0x00000000,
-		0x033, 0x0000003F,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000004F,
 		0x03F, 0x000773C0,
-		0x033, 0x0000003E,
+		0x033, 0x0000004E,
 		0x03F, 0x000FF3C0,
-		0x033, 0x0000003D,
+		0x033, 0x0000004D,
 		0x03F, 0x000773E8,
-		0x033, 0x0000003C,
+		0x033, 0x0000004C,
 		0x03F, 0x000FF3E8,
-		0x033, 0x0000003B,
+		0x033, 0x0000004B,
 		0x03F, 0x000FF3A0,
-		0x033, 0x0000003A,
+		0x033, 0x0000004A,
 		0x03F, 0x000002A8,
-		0x033, 0x00000039,
+		0x033, 0x00000049,
 		0x03F, 0x00000280,
-		0x033, 0x00000038,
+		0x033, 0x00000048,
 		0x03F, 0x000FF280,
-		0x033, 0x00000037,
+		0x033, 0x00000047,
 		0x03F, 0x00000200,
-		0x033, 0x00000036,
+		0x033, 0x00000046,
 		0x03F, 0x000001C0,
-		0x033, 0x00000035,
+		0x033, 0x00000045,
 		0x03F, 0x00000180,
-		0x033, 0x00000034,
+		0x033, 0x00000044,
 		0x03F, 0x00000040,
-	0x93000003,	0x00000000,	0x40000000,	0x00000000,
-		0x033, 0x0000003F,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000004F,
 		0x03F, 0x000773C0,
-		0x033, 0x0000003E,
+		0x033, 0x0000004E,
 		0x03F, 0x000FF3C0,
-		0x033, 0x0000003D,
+		0x033, 0x0000004D,
 		0x03F, 0x000773E8,
-		0x033, 0x0000003C,
+		0x033, 0x0000004C,
 		0x03F, 0x000FF3E8,
-		0x033, 0x0000003B,
+		0x033, 0x0000004B,
 		0x03F, 0x00000287,
-		0x033, 0x0000003A,
+		0x033, 0x0000004A,
 		0x03F, 0x000002A8,
-		0x033, 0x00000039,
+		0x033, 0x00000049,
 		0x03F, 0x00000207,
-		0x033, 0x00000038,
+		0x033, 0x00000048,
 		0x03F, 0x000FF280,
-		0x033, 0x00000037,
+		0x033, 0x00000047,
 		0x03F, 0x00000200,
-		0x033, 0x00000036,
+		0x033, 0x00000046,
 		0x03F, 0x000001C0,
-		0x033, 0x00000035,
+		0x033, 0x00000045,
 		0x03F, 0x00000180,
-		0x033, 0x00000034,
+		0x033, 0x00000044,
 		0x03F, 0x00000040,
-	0x93000004,	0x00000000,	0x40000000,	0x00000000,
-		0x033, 0x0000003F,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000004F,
 		0x03F, 0x000773C0,
-		0x033, 0x0000003E,
+		0x033, 0x0000004E,
 		0x03F, 0x000FF3C0,
-		0x033, 0x0000003D,
+		0x033, 0x0000004D,
 		0x03F, 0x000773E8,
-		0x033, 0x0000003C,
+		0x033, 0x0000004C,
 		0x03F, 0x000FF3E8,
-		0x033, 0x0000003B,
+		0x033, 0x0000004B,
 		0x03F, 0x00000287,
-		0x033, 0x0000003A,
+		0x033, 0x0000004A,
 		0x03F, 0x000002A8,
-		0x033, 0x00000039,
+		0x033, 0x00000049,
 		0x03F, 0x00000207,
-		0x033, 0x00000038,
+		0x033, 0x00000048,
 		0x03F, 0x000FF280,
-		0x033, 0x00000037,
+		0x033, 0x00000047,
 		0x03F, 0x00000200,
-		0x033, 0x00000036,
+		0x033, 0x00000046,
 		0x03F, 0x000001C0,
-		0x033, 0x00000035,
+		0x033, 0x00000045,
 		0x03F, 0x00000180,
-		0x033, 0x00000034,
+		0x033, 0x00000044,
 		0x03F, 0x00000040,
-	0xA0000000,	0x00000000,
-		0x033, 0x0000003F,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000004F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000004E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000004D,
 		0x03F, 0x000773E8,
-		0x033, 0x0000003E,
+		0x033, 0x0000004C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000004B,
 		0x03F, 0x000FF3A0,
-		0x033, 0x0000003D,
-		0x03F, 0x00000380,
-		0x033, 0x0000003C,
-		0x03F, 0x000FF380,
-		0x033, 0x0000003B,
-		0x03F, 0x00000300,
-		0x033, 0x0000003A,
+		0x033, 0x0000004A,
 		0x03F, 0x000002A8,
-		0x033, 0x00000039,
+		0x033, 0x00000049,
 		0x03F, 0x00000280,
-		0x033, 0x00000038,
+		0x033, 0x00000048,
 		0x03F, 0x000FF280,
-		0x033, 0x00000037,
+		0x033, 0x00000047,
 		0x03F, 0x00000200,
-		0x033, 0x00000036,
+		0x033, 0x00000046,
 		0x03F, 0x000001C0,
-		0x033, 0x00000035,
+		0x033, 0x00000045,
 		0x03F, 0x00000180,
-		0x033, 0x00000034,
+		0x033, 0x00000044,
 		0x03F, 0x00000040,
-	0xB0000000,	0x00000000,
-		0x033, 0x00000033,
-		0x03F, 0x00000000,
-	0x81000001,	0x00000000,	0x40000000,	0x00000000,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x0000004F,
 		0x03F, 0x000773C0,
 		0x033, 0x0000004E,
@@ -2872,11 +5437,11 @@ static const u32 rtw8822c_rf_a[] = {
 		0x033, 0x0000004C,
 		0x03F, 0x000FF3E8,
 		0x033, 0x0000004B,
-		0x03F, 0x000FF3A0,
+		0x03F, 0x00000287,
 		0x033, 0x0000004A,
 		0x03F, 0x000002A8,
 		0x033, 0x00000049,
-		0x03F, 0x00000280,
+		0x03F, 0x00000207,
 		0x033, 0x00000048,
 		0x03F, 0x000FF280,
 		0x033, 0x00000047,
@@ -2887,7 +5452,7 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00000180,
 		0x033, 0x00000044,
 		0x03F, 0x00000040,
-	0x91000002,	0x00000000,	0x40000000,	0x00000000,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x0000004F,
 		0x03F, 0x000773C0,
 		0x033, 0x0000004E,
@@ -2897,11 +5462,11 @@ static const u32 rtw8822c_rf_a[] = {
 		0x033, 0x0000004C,
 		0x03F, 0x000FF3E8,
 		0x033, 0x0000004B,
-		0x03F, 0x000FF3A0,
+		0x03F, 0x00000287,
 		0x033, 0x0000004A,
 		0x03F, 0x000002A8,
 		0x033, 0x00000049,
-		0x03F, 0x00000280,
+		0x03F, 0x00000207,
 		0x033, 0x00000048,
 		0x03F, 0x000FF280,
 		0x033, 0x00000047,
@@ -2912,17 +5477,17 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00000180,
 		0x033, 0x00000044,
 		0x03F, 0x00000040,
-	0x92000001,	0x00000000,	0x40000000,	0x00000000,
+	0xA0000000,	0x00000000,
 		0x033, 0x0000004F,
-		0x03F, 0x000773C0,
+		0x03F, 0x000773E8,
 		0x033, 0x0000004E,
-		0x03F, 0x000FF3C0,
+		0x03F, 0x000FF3A0,
 		0x033, 0x0000004D,
-		0x03F, 0x000773E8,
+		0x03F, 0x00000380,
 		0x033, 0x0000004C,
-		0x03F, 0x000FF3E8,
+		0x03F, 0x000FF380,
 		0x033, 0x0000004B,
-		0x03F, 0x000FF3A0,
+		0x03F, 0x00000300,
 		0x033, 0x0000004A,
 		0x03F, 0x000002A8,
 		0x033, 0x00000049,
@@ -2937,160 +5502,235 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00000180,
 		0x033, 0x00000044,
 		0x03F, 0x00000040,
+	0xB0000000,	0x00000000,
+		0x033, 0x00000043,
+		0x03F, 0x00000000,
+	0x81000001,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000005F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000005E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000005D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000005C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000005B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000005A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000059,
+		0x03F, 0x00000280,
+		0x033, 0x00000058,
+		0x03F, 0x000FF280,
+		0x033, 0x00000057,
+		0x03F, 0x00000200,
+		0x033, 0x00000056,
+		0x03F, 0x000001C0,
+		0x033, 0x00000055,
+		0x03F, 0x00000180,
+		0x033, 0x00000054,
+		0x03F, 0x00000040,
+	0x91000002,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000005F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000005E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000005D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000005C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000005B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000005A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000059,
+		0x03F, 0x00000280,
+		0x033, 0x00000058,
+		0x03F, 0x000FF280,
+		0x033, 0x00000057,
+		0x03F, 0x00000200,
+		0x033, 0x00000056,
+		0x03F, 0x000001C0,
+		0x033, 0x00000055,
+		0x03F, 0x00000180,
+		0x033, 0x00000054,
+		0x03F, 0x00000040,
+	0x92000001,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000005F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000005E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000005D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000005C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000005B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000005A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000059,
+		0x03F, 0x00000280,
+		0x033, 0x00000058,
+		0x03F, 0x000FF280,
+		0x033, 0x00000057,
+		0x03F, 0x00000200,
+		0x033, 0x00000056,
+		0x03F, 0x000001C0,
+		0x033, 0x00000055,
+		0x03F, 0x00000180,
+		0x033, 0x00000054,
+		0x03F, 0x00000040,
 	0x92000002,	0x00000000,	0x40000000,	0x00000000,
-		0x033, 0x0000004F,
+		0x033, 0x0000005F,
 		0x03F, 0x000773C0,
-		0x033, 0x0000004E,
+		0x033, 0x0000005E,
 		0x03F, 0x000FF3C0,
-		0x033, 0x0000004D,
+		0x033, 0x0000005D,
 		0x03F, 0x000773E8,
-		0x033, 0x0000004C,
+		0x033, 0x0000005C,
 		0x03F, 0x000FF3E8,
-		0x033, 0x0000004B,
+		0x033, 0x0000005B,
 		0x03F, 0x000FF3A0,
-		0x033, 0x0000004A,
+		0x033, 0x0000005A,
 		0x03F, 0x000002A8,
-		0x033, 0x00000049,
+		0x033, 0x00000059,
 		0x03F, 0x00000280,
-		0x033, 0x00000048,
+		0x033, 0x00000058,
 		0x03F, 0x000FF280,
-		0x033, 0x00000047,
+		0x033, 0x00000057,
 		0x03F, 0x00000200,
-		0x033, 0x00000046,
+		0x033, 0x00000056,
 		0x03F, 0x000001C0,
-		0x033, 0x00000045,
+		0x033, 0x00000055,
 		0x03F, 0x00000180,
-		0x033, 0x00000044,
+		0x033, 0x00000054,
 		0x03F, 0x00000040,
 	0x93000001,	0x00000000,	0x40000000,	0x00000000,
-		0x033, 0x0000004F,
+		0x033, 0x0000005F,
 		0x03F, 0x000773C0,
-		0x033, 0x0000004E,
+		0x033, 0x0000005E,
 		0x03F, 0x000FF3C0,
-		0x033, 0x0000004D,
+		0x033, 0x0000005D,
 		0x03F, 0x000773E8,
-		0x033, 0x0000004C,
+		0x033, 0x0000005C,
 		0x03F, 0x000FF3E8,
-		0x033, 0x0000004B,
+		0x033, 0x0000005B,
 		0x03F, 0x000FF3A0,
-		0x033, 0x0000004A,
+		0x033, 0x0000005A,
 		0x03F, 0x000002A8,
-		0x033, 0x00000049,
+		0x033, 0x00000059,
 		0x03F, 0x00000280,
-		0x033, 0x00000048,
+		0x033, 0x00000058,
 		0x03F, 0x000FF280,
-		0x033, 0x00000047,
+		0x033, 0x00000057,
 		0x03F, 0x00000200,
-		0x033, 0x00000046,
+		0x033, 0x00000056,
 		0x03F, 0x000001C0,
-		0x033, 0x00000045,
+		0x033, 0x00000055,
 		0x03F, 0x00000180,
-		0x033, 0x00000044,
+		0x033, 0x00000054,
 		0x03F, 0x00000040,
 	0x93000002,	0x00000000,	0x40000000,	0x00000000,
-		0x033, 0x0000004F,
+		0x033, 0x0000005F,
 		0x03F, 0x000773C0,
-		0x033, 0x0000004E,
+		0x033, 0x0000005E,
 		0x03F, 0x000FF3C0,
-		0x033, 0x0000004D,
+		0x033, 0x0000005D,
 		0x03F, 0x000773E8,
-		0x033, 0x0000004C,
+		0x033, 0x0000005C,
 		0x03F, 0x000FF3E8,
-		0x033, 0x0000004B,
+		0x033, 0x0000005B,
 		0x03F, 0x000FF3A0,
-		0x033, 0x0000004A,
+		0x033, 0x0000005A,
 		0x03F, 0x000002A8,
-		0x033, 0x00000049,
+		0x033, 0x00000059,
 		0x03F, 0x00000280,
-		0x033, 0x00000048,
+		0x033, 0x00000058,
 		0x03F, 0x000FF280,
-		0x033, 0x00000047,
+		0x033, 0x00000057,
 		0x03F, 0x00000200,
-		0x033, 0x00000046,
+		0x033, 0x00000056,
 		0x03F, 0x000001C0,
-		0x033, 0x00000045,
+		0x033, 0x00000055,
 		0x03F, 0x00000180,
-		0x033, 0x00000044,
+		0x033, 0x00000054,
 		0x03F, 0x00000040,
 	0x93000003,	0x00000000,	0x40000000,	0x00000000,
-		0x033, 0x0000004F,
+		0x033, 0x0000005F,
 		0x03F, 0x000773C0,
-		0x033, 0x0000004E,
+		0x033, 0x0000005E,
 		0x03F, 0x000FF3C0,
-		0x033, 0x0000004D,
+		0x033, 0x0000005D,
 		0x03F, 0x000773E8,
-		0x033, 0x0000004C,
+		0x033, 0x0000005C,
 		0x03F, 0x000FF3E8,
-		0x033, 0x0000004B,
+		0x033, 0x0000005B,
 		0x03F, 0x00000287,
-		0x033, 0x0000004A,
+		0x033, 0x0000005A,
 		0x03F, 0x000002A8,
-		0x033, 0x00000049,
+		0x033, 0x00000059,
 		0x03F, 0x00000207,
-		0x033, 0x00000048,
+		0x033, 0x00000058,
 		0x03F, 0x000FF280,
-		0x033, 0x00000047,
+		0x033, 0x00000057,
 		0x03F, 0x00000200,
-		0x033, 0x00000046,
+		0x033, 0x00000056,
 		0x03F, 0x000001C0,
-		0x033, 0x00000045,
+		0x033, 0x00000055,
 		0x03F, 0x00000180,
-		0x033, 0x00000044,
+		0x033, 0x00000054,
 		0x03F, 0x00000040,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
-		0x033, 0x0000004F,
+		0x033, 0x0000005F,
 		0x03F, 0x000773C0,
-		0x033, 0x0000004E,
+		0x033, 0x0000005E,
 		0x03F, 0x000FF3C0,
-		0x033, 0x0000004D,
+		0x033, 0x0000005D,
 		0x03F, 0x000773E8,
-		0x033, 0x0000004C,
+		0x033, 0x0000005C,
 		0x03F, 0x000FF3E8,
-		0x033, 0x0000004B,
+		0x033, 0x0000005B,
 		0x03F, 0x00000287,
-		0x033, 0x0000004A,
+		0x033, 0x0000005A,
 		0x03F, 0x000002A8,
-		0x033, 0x00000049,
+		0x033, 0x00000059,
 		0x03F, 0x00000207,
-		0x033, 0x00000048,
+		0x033, 0x00000058,
 		0x03F, 0x000FF280,
-		0x033, 0x00000047,
+		0x033, 0x00000057,
 		0x03F, 0x00000200,
-		0x033, 0x00000046,
+		0x033, 0x00000056,
 		0x03F, 0x000001C0,
-		0x033, 0x00000045,
+		0x033, 0x00000055,
 		0x03F, 0x00000180,
-		0x033, 0x00000044,
+		0x033, 0x00000054,
 		0x03F, 0x00000040,
-	0xA0000000,	0x00000000,
-		0x033, 0x0000004F,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000005F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000005E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000005D,
 		0x03F, 0x000773E8,
-		0x033, 0x0000004E,
+		0x033, 0x0000005C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000005B,
 		0x03F, 0x000FF3A0,
-		0x033, 0x0000004D,
-		0x03F, 0x00000380,
-		0x033, 0x0000004C,
-		0x03F, 0x000FF380,
-		0x033, 0x0000004B,
-		0x03F, 0x00000300,
-		0x033, 0x0000004A,
+		0x033, 0x0000005A,
 		0x03F, 0x000002A8,
-		0x033, 0x00000049,
+		0x033, 0x00000059,
 		0x03F, 0x00000280,
-		0x033, 0x00000048,
+		0x033, 0x00000058,
 		0x03F, 0x000FF280,
-		0x033, 0x00000047,
+		0x033, 0x00000057,
 		0x03F, 0x00000200,
-		0x033, 0x00000046,
+		0x033, 0x00000056,
 		0x03F, 0x000001C0,
-		0x033, 0x00000045,
+		0x033, 0x00000055,
 		0x03F, 0x00000180,
-		0x033, 0x00000044,
+		0x033, 0x00000054,
 		0x03F, 0x00000040,
-	0xB0000000,	0x00000000,
-		0x033, 0x00000043,
-		0x03F, 0x00000000,
-	0x81000001,	0x00000000,	0x40000000,	0x00000000,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x0000005F,
 		0x03F, 0x000773C0,
 		0x033, 0x0000005E,
@@ -3100,11 +5740,11 @@ static const u32 rtw8822c_rf_a[] = {
 		0x033, 0x0000005C,
 		0x03F, 0x000FF3E8,
 		0x033, 0x0000005B,
-		0x03F, 0x000FF3A0,
+		0x03F, 0x00000287,
 		0x033, 0x0000005A,
 		0x03F, 0x000002A8,
 		0x033, 0x00000059,
-		0x03F, 0x00000280,
+		0x03F, 0x00000207,
 		0x033, 0x00000058,
 		0x03F, 0x000FF280,
 		0x033, 0x00000057,
@@ -3115,7 +5755,7 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00000180,
 		0x033, 0x00000054,
 		0x03F, 0x00000040,
-	0x91000002,	0x00000000,	0x40000000,	0x00000000,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x0000005F,
 		0x03F, 0x000773C0,
 		0x033, 0x0000005E,
@@ -3125,11 +5765,11 @@ static const u32 rtw8822c_rf_a[] = {
 		0x033, 0x0000005C,
 		0x03F, 0x000FF3E8,
 		0x033, 0x0000005B,
-		0x03F, 0x000FF3A0,
+		0x03F, 0x00000287,
 		0x033, 0x0000005A,
 		0x03F, 0x000002A8,
 		0x033, 0x00000059,
-		0x03F, 0x00000280,
+		0x03F, 0x00000207,
 		0x033, 0x00000058,
 		0x03F, 0x000FF280,
 		0x033, 0x00000057,
@@ -3140,7 +5780,7 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00000180,
 		0x033, 0x00000054,
 		0x03F, 0x00000040,
-	0x92000001,	0x00000000,	0x40000000,	0x00000000,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x0000005F,
 		0x03F, 0x000773C0,
 		0x033, 0x0000005E,
@@ -3165,7 +5805,7 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00000180,
 		0x033, 0x00000054,
 		0x03F, 0x00000040,
-	0x92000002,	0x00000000,	0x40000000,	0x00000000,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x0000005F,
 		0x03F, 0x000773C0,
 		0x033, 0x0000005E,
@@ -3190,7 +5830,7 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00000180,
 		0x033, 0x00000054,
 		0x03F, 0x00000040,
-	0x93000001,	0x00000000,	0x40000000,	0x00000000,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x0000005F,
 		0x03F, 0x000773C0,
 		0x033, 0x0000005E,
@@ -3200,11 +5840,11 @@ static const u32 rtw8822c_rf_a[] = {
 		0x033, 0x0000005C,
 		0x03F, 0x000FF3E8,
 		0x033, 0x0000005B,
-		0x03F, 0x000FF3A0,
+		0x03F, 0x00000287,
 		0x033, 0x0000005A,
 		0x03F, 0x000002A8,
 		0x033, 0x00000059,
-		0x03F, 0x00000280,
+		0x03F, 0x00000207,
 		0x033, 0x00000058,
 		0x03F, 0x000FF280,
 		0x033, 0x00000057,
@@ -3215,7 +5855,32 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00000180,
 		0x033, 0x00000054,
 		0x03F, 0x00000040,
-	0x93000002,	0x00000000,	0x40000000,	0x00000000,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000005F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000005E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000005D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000005C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000005B,
+		0x03F, 0x00000287,
+		0x033, 0x0000005A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000059,
+		0x03F, 0x00000207,
+		0x033, 0x00000058,
+		0x03F, 0x000FF280,
+		0x033, 0x00000057,
+		0x03F, 0x00000200,
+		0x033, 0x00000056,
+		0x03F, 0x000001C0,
+		0x033, 0x00000055,
+		0x03F, 0x00000180,
+		0x033, 0x00000054,
+		0x03F, 0x00000040,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x0000005F,
 		0x03F, 0x000773C0,
 		0x033, 0x0000005E,
@@ -3240,7 +5905,7 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00000180,
 		0x033, 0x00000054,
 		0x03F, 0x00000040,
-	0x93000003,	0x00000000,	0x40000000,	0x00000000,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x0000005F,
 		0x03F, 0x000773C0,
 		0x033, 0x0000005E,
@@ -3265,7 +5930,7 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00000180,
 		0x033, 0x00000054,
 		0x03F, 0x00000040,
-	0x93000004,	0x00000000,	0x40000000,	0x00000000,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x0000005F,
 		0x03F, 0x000773C0,
 		0x033, 0x0000005E,
@@ -3334,24 +5999,1367 @@ static const u32 rtw8822c_rf_a[] = {
 		0x0EF, 0x00000000,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x0EF, 0x00000000,
-	0xA0000000,	0x00000000,
-		0x0EF, 0x00000000,
-	0xB0000000,	0x00000000,
-		0x08A, 0x000E7DE3,
-		0x08B, 0x0008FE00,
-		0x0EE, 0x00000008,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00000000,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00000000,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00000000,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00000000,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00000000,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00000000,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00000000,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00000000,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00000000,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00000000,
+	0xA0000000,	0x00000000,
+		0x0EF, 0x00000000,
+	0xB0000000,	0x00000000,
+		0x08A, 0x000E7DE3,
+		0x08B, 0x0008FE00,
+		0x0EE, 0x00000008,
+		0x033, 0x00000000,
+		0x03F, 0x00000023,
+		0x033, 0x00000001,
+		0x03F, 0x00000023,
+		0x0EE, 0x00000000,
+		0x0EF, 0x00004000,
+		0x033, 0x00000000,
+		0x03F, 0x0000000F,
+		0x033, 0x00000002,
+		0x03F, 0x00000000,
+		0x0EF, 0x00000000,
+	0x81000001,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00020000,
+		0x033, 0x00000000,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000001,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000002,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000003,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000004,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000005,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000006,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000007,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000008,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000009,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000000A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000000B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000000C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000000D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000000E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000000F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000010,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000011,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000012,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000013,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000014,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000015,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000016,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000017,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000018,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000019,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000001A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000001B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000001C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000001D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000001E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000001F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000020,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000021,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000022,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000023,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000024,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000025,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000026,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000027,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000028,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000029,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000002A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000002B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000002C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000002D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000002E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000002F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x0EF, 0x00000000,
+	0x91000002,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00020000,
+		0x033, 0x00000000,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000001,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000002,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000003,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000004,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000005,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000006,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000007,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000008,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000009,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000000A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000000B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000000C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000000D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000000E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000000F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000010,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000011,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000012,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000013,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000014,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000015,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000016,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000017,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000018,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000019,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000001A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000001B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000001C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000001D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000001E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000001F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000020,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000021,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000022,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000023,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000024,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000025,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000026,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000027,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000028,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000029,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000002A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000002B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000002C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000002D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000002E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000002F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x0EF, 0x00000000,
+	0x92000001,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00020000,
+		0x033, 0x00000000,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000001,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000002,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000003,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000004,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000005,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000006,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000007,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000008,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000009,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000000A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000000B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000000C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000000D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000000E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000000F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000010,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000011,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000012,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000013,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000014,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000015,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000016,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000017,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000018,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000019,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000001A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000001B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000001C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000001D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000001E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000001F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000020,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000021,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000022,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000023,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000024,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000025,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000026,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000027,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000028,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000029,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000002A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000002B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000002C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000002D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000002E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000002F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x0EF, 0x00000000,
+	0x92000002,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00020000,
+		0x033, 0x00000000,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000001,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000002,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000003,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000004,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000005,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000006,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000007,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000008,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000009,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000000A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000000B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000000C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000000D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000000E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000000F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000010,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000011,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000012,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000013,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000014,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000015,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000016,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000017,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000018,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000019,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000001A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000001B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000001C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000001D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000001E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000001F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000020,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000021,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000022,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000023,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000024,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000025,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000026,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000027,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000028,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000029,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000002A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000002B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000002C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000002D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000002E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000002F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x0EF, 0x00000000,
+	0x93000001,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00020000,
+		0x033, 0x00000000,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000001,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000002,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000003,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000004,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000005,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000006,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000007,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000008,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000009,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000000A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000000B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000000C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000000D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000000E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000000F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000010,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000011,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000012,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000013,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000014,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000015,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000016,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000017,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000018,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000019,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000001A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000001B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000001C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000001D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000001E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000001F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000020,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000021,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000022,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000023,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000024,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000025,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000026,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000027,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000028,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000029,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000002A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000002B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000002C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000002D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000002E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000002F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x0EF, 0x00000000,
+	0x93000002,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00020000,
+		0x033, 0x00000000,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000001,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000002,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000003,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000004,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000005,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000006,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000007,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000008,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000009,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000000A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000000B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000000C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000000D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000000E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000000F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000010,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000011,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000012,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000013,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000014,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000015,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000016,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000017,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000018,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000019,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000001A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000001B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000001C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000001D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000001E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000001F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000020,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000021,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000022,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000023,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000024,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000025,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000026,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000027,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000028,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000029,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000002A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000002B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000002C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000002D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000002E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000002F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x0EF, 0x00000000,
+	0x93000003,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00020000,
+		0x033, 0x00000000,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000001,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000002,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000003,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000004,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000005,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000006,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000007,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000008,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000009,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000000A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000000B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000000C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000000D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000000E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000000F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000010,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000011,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000012,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000013,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000014,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000015,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000016,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000017,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000018,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000019,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000001A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000001B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000001C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000001D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000001E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000001F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000020,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000021,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000022,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000023,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000024,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000025,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000026,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000027,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000028,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000029,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000002A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000002B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000002C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000002D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000002E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000002F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x0EF, 0x00000000,
+	0x93000004,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00020000,
+		0x033, 0x00000000,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000001,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000002,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000003,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000004,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000005,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000006,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000007,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000008,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000009,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000000A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000000B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000000C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000000D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000000E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000000F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000010,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000011,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000012,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000013,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000014,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000015,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000016,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000017,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000018,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000019,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000001A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000001B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000001C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000001D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000001E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000001F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000020,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000021,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000022,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000023,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000024,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000025,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000026,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000027,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000028,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000029,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000002A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000002B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000002C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000002D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000002E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000002F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x0EF, 0x00000000,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00020000,
 		0x033, 0x00000000,
-		0x03F, 0x00000023,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
 		0x033, 0x00000001,
-		0x03F, 0x00000023,
-		0x0EE, 0x00000000,
-		0x0EF, 0x00004000,
-		0x033, 0x00000000,
-		0x03F, 0x0000000F,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
 		0x033, 0x00000002,
-		0x03F, 0x00000000,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000003,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000004,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000005,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000006,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000007,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000008,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000009,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000000A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000000B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000000C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000000D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000000E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000000F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000010,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000011,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000012,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000013,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000014,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000015,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000016,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000017,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000018,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000019,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000001A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000001B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000001C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000001D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000001E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000001F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000020,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000021,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000022,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000023,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000024,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000025,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000026,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000027,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000028,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000029,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000002A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000002B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000002C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000002D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000002E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000002F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
 		0x0EF, 0x00000000,
-	0x81000001,	0x00000000,	0x40000000,	0x00000000,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
 		0x0EF, 0x00020000,
 		0x033, 0x00000000,
 		0x03E, 0x00001C86,
@@ -3378,8 +7386,8 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03E, 0x00000000,
 		0x03F, 0x0002F81C,
 		0x033, 0x00000008,
-		0x03E, 0x00001C86,
-		0x03F, 0x00020000,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
 		0x033, 0x00000009,
 		0x03E, 0x00001C02,
 		0x03F, 0x00020000,
@@ -3498,7 +7506,7 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03E, 0x00000000,
 		0x03F, 0x0002C010,
 		0x0EF, 0x00000000,
-	0x91000002,	0x00000000,	0x40000000,	0x00000000,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
 		0x0EF, 0x00020000,
 		0x033, 0x00000000,
 		0x03E, 0x00001C86,
@@ -3525,8 +7533,8 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03E, 0x00000000,
 		0x03F, 0x0002F81C,
 		0x033, 0x00000008,
-		0x03E, 0x00001C86,
-		0x03F, 0x00020000,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
 		0x033, 0x00000009,
 		0x03E, 0x00001C02,
 		0x03F, 0x00020000,
@@ -3645,7 +7653,7 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03E, 0x00000000,
 		0x03F, 0x0002C010,
 		0x0EF, 0x00000000,
-	0x92000001,	0x00000000,	0x40000000,	0x00000000,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
 		0x0EF, 0x00020000,
 		0x033, 0x00000000,
 		0x03E, 0x00001C86,
@@ -3672,8 +7680,8 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03E, 0x00000000,
 		0x03F, 0x0002F81C,
 		0x033, 0x00000008,
-		0x03E, 0x00001C86,
-		0x03F, 0x00020000,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
 		0x033, 0x00000009,
 		0x03E, 0x00001C02,
 		0x03F, 0x00020000,
@@ -3792,7 +7800,7 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03E, 0x00000000,
 		0x03F, 0x0002C010,
 		0x0EF, 0x00000000,
-	0x92000002,	0x00000000,	0x40000000,	0x00000000,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
 		0x0EF, 0x00020000,
 		0x033, 0x00000000,
 		0x03E, 0x00001C86,
@@ -3819,8 +7827,8 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03E, 0x00000000,
 		0x03F, 0x0002F81C,
 		0x033, 0x00000008,
-		0x03E, 0x00001C86,
-		0x03F, 0x00020000,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
 		0x033, 0x00000009,
 		0x03E, 0x00001C02,
 		0x03F, 0x00020000,
@@ -3939,7 +7947,7 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03E, 0x00000000,
 		0x03F, 0x0002C010,
 		0x0EF, 0x00000000,
-	0x93000001,	0x00000000,	0x40000000,	0x00000000,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
 		0x0EF, 0x00020000,
 		0x033, 0x00000000,
 		0x03E, 0x00001C86,
@@ -3966,8 +7974,8 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03E, 0x00000000,
 		0x03F, 0x0002F81C,
 		0x033, 0x00000008,
-		0x03E, 0x00001C86,
-		0x03F, 0x00020000,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
 		0x033, 0x00000009,
 		0x03E, 0x00001C02,
 		0x03F, 0x00020000,
@@ -4086,7 +8094,7 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03E, 0x00000000,
 		0x03F, 0x0002C010,
 		0x0EF, 0x00000000,
-	0x93000002,	0x00000000,	0x40000000,	0x00000000,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
 		0x0EF, 0x00020000,
 		0x033, 0x00000000,
 		0x03E, 0x00001C86,
@@ -4113,8 +8121,8 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03E, 0x00000000,
 		0x03F, 0x0002F81C,
 		0x033, 0x00000008,
-		0x03E, 0x00001C86,
-		0x03F, 0x00020000,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
 		0x033, 0x00000009,
 		0x03E, 0x00001C02,
 		0x03F, 0x00020000,
@@ -4233,7 +8241,7 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03E, 0x00000000,
 		0x03F, 0x0002C010,
 		0x0EF, 0x00000000,
-	0x93000003,	0x00000000,	0x40000000,	0x00000000,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
 		0x0EF, 0x00020000,
 		0x033, 0x00000000,
 		0x03E, 0x00001C86,
@@ -4260,8 +8268,8 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03E, 0x00000000,
 		0x03F, 0x0002F81C,
 		0x033, 0x00000008,
-		0x03E, 0x00001C86,
-		0x03F, 0x00020000,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
 		0x033, 0x00000009,
 		0x03E, 0x00001C02,
 		0x03F, 0x00020000,
@@ -4380,7 +8388,7 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03E, 0x00000000,
 		0x03F, 0x0002C010,
 		0x0EF, 0x00000000,
-	0x93000004,	0x00000000,	0x40000000,	0x00000000,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
 		0x0EF, 0x00020000,
 		0x033, 0x00000000,
 		0x03E, 0x00001C86,
@@ -4407,8 +8415,155 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03E, 0x00000000,
 		0x03F, 0x0002F81C,
 		0x033, 0x00000008,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000009,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000000A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000000B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000000C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000000D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000000E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000000F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000010,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000011,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000012,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000013,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000014,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000015,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000016,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000017,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000018,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000019,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000001A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000001B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000001C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000001D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000001E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000001F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000020,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000021,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000022,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000023,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000024,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000025,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000026,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000027,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000028,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000029,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000002A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000002B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000002C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000002D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000002E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000002F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x0EF, 0x00000000,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00020000,
+		0x033, 0x00000000,
 		0x03E, 0x00001C86,
 		0x03F, 0x00020000,
+		0x033, 0x00000001,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000002,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000003,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000004,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000005,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000006,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000007,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000008,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
 		0x033, 0x00000009,
 		0x03E, 0x00001C02,
 		0x03F, 0x00020000,
@@ -4695,6 +8850,26 @@ static const u32 rtw8822c_rf_a[] = {
 		0x063, 0x00000002,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x063, 0x00000002,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x063, 0x00000002,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x063, 0x00000002,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x063, 0x00000002,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x063, 0x00000002,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x063, 0x00000002,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x063, 0x00000002,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x063, 0x00000002,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x063, 0x00000002,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x063, 0x00000002,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x063, 0x00000002,
 	0xA0000000,	0x00000000,
 		0x063, 0x00000C02,
 	0xB0000000,	0x00000000,
@@ -4834,7 +9009,169 @@ static const u32 rtw8822c_rf_a[] = {
 		0x030, 0x00017238,
 		0x030, 0x00018228,
 		0x030, 0x00019238,
-	0x93000002,	0x00000000,	0x40000000,	0x00000000,
+	0x93000002,	0x00000000,	0x40000000,	0x00000000,
+		0x030, 0x00000238,
+		0x030, 0x00001238,
+		0x030, 0x00002238,
+		0x030, 0x00003238,
+		0x030, 0x00004228,
+		0x030, 0x00005238,
+		0x030, 0x00006238,
+		0x030, 0x00007238,
+		0x030, 0x00008228,
+		0x030, 0x00009238,
+		0x030, 0x0000A238,
+		0x030, 0x0000B238,
+		0x030, 0x0000C238,
+		0x030, 0x0000D238,
+		0x030, 0x0000E228,
+		0x030, 0x0000F238,
+		0x030, 0x00010238,
+		0x030, 0x00011238,
+		0x030, 0x00012228,
+		0x030, 0x00013238,
+		0x030, 0x00014238,
+		0x030, 0x00015238,
+		0x030, 0x00016228,
+		0x030, 0x00017238,
+		0x030, 0x00018228,
+		0x030, 0x00019238,
+	0x93000003,	0x00000000,	0x40000000,	0x00000000,
+		0x030, 0x00000239,
+		0x030, 0x00001239,
+		0x030, 0x00002239,
+		0x030, 0x00003239,
+		0x030, 0x00004239,
+		0x030, 0x00005239,
+		0x030, 0x00006239,
+		0x030, 0x00007239,
+		0x030, 0x00008239,
+		0x030, 0x00009239,
+		0x030, 0x0000A239,
+		0x030, 0x0000B239,
+		0x030, 0x0000C239,
+		0x030, 0x0000D239,
+		0x030, 0x0000E209,
+		0x030, 0x0000F239,
+		0x030, 0x00010239,
+		0x030, 0x00011239,
+		0x030, 0x00012209,
+		0x030, 0x00013239,
+		0x030, 0x00014239,
+		0x030, 0x00015239,
+		0x030, 0x00016209,
+		0x030, 0x00017239,
+		0x030, 0x00018209,
+		0x030, 0x00019239,
+	0x93000004,	0x00000000,	0x40000000,	0x00000000,
+		0x030, 0x00000239,
+		0x030, 0x00001239,
+		0x030, 0x00002239,
+		0x030, 0x00003239,
+		0x030, 0x00004239,
+		0x030, 0x00005239,
+		0x030, 0x00006239,
+		0x030, 0x00007239,
+		0x030, 0x00008239,
+		0x030, 0x00009239,
+		0x030, 0x0000A239,
+		0x030, 0x0000B239,
+		0x030, 0x0000C239,
+		0x030, 0x0000D239,
+		0x030, 0x0000E209,
+		0x030, 0x0000F239,
+		0x030, 0x00010239,
+		0x030, 0x00011239,
+		0x030, 0x00012209,
+		0x030, 0x00013239,
+		0x030, 0x00014239,
+		0x030, 0x00015239,
+		0x030, 0x00016209,
+		0x030, 0x00017239,
+		0x030, 0x00018209,
+		0x030, 0x00019239,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x030, 0x00000238,
+		0x030, 0x00001238,
+		0x030, 0x00002238,
+		0x030, 0x00003238,
+		0x030, 0x00004228,
+		0x030, 0x00005238,
+		0x030, 0x00006238,
+		0x030, 0x00007238,
+		0x030, 0x00008228,
+		0x030, 0x00009238,
+		0x030, 0x0000A238,
+		0x030, 0x0000B238,
+		0x030, 0x0000C238,
+		0x030, 0x0000D238,
+		0x030, 0x0000E228,
+		0x030, 0x0000F238,
+		0x030, 0x00010238,
+		0x030, 0x00011238,
+		0x030, 0x00012228,
+		0x030, 0x00013238,
+		0x030, 0x00014238,
+		0x030, 0x00015238,
+		0x030, 0x00016228,
+		0x030, 0x00017238,
+		0x030, 0x00018228,
+		0x030, 0x00019238,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x030, 0x00000239,
+		0x030, 0x00001239,
+		0x030, 0x00002239,
+		0x030, 0x00003239,
+		0x030, 0x00004239,
+		0x030, 0x00005239,
+		0x030, 0x00006239,
+		0x030, 0x00007239,
+		0x030, 0x00008239,
+		0x030, 0x00009239,
+		0x030, 0x0000A239,
+		0x030, 0x0000B239,
+		0x030, 0x0000C239,
+		0x030, 0x0000D239,
+		0x030, 0x0000E209,
+		0x030, 0x0000F239,
+		0x030, 0x00010239,
+		0x030, 0x00011239,
+		0x030, 0x00012209,
+		0x030, 0x00013239,
+		0x030, 0x00014239,
+		0x030, 0x00015239,
+		0x030, 0x00016209,
+		0x030, 0x00017239,
+		0x030, 0x00018209,
+		0x030, 0x00019239,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x030, 0x00000239,
+		0x030, 0x00001239,
+		0x030, 0x00002239,
+		0x030, 0x00003239,
+		0x030, 0x00004239,
+		0x030, 0x00005239,
+		0x030, 0x00006239,
+		0x030, 0x00007239,
+		0x030, 0x00008239,
+		0x030, 0x00009239,
+		0x030, 0x0000A239,
+		0x030, 0x0000B239,
+		0x030, 0x0000C239,
+		0x030, 0x0000D239,
+		0x030, 0x0000E209,
+		0x030, 0x0000F239,
+		0x030, 0x00010239,
+		0x030, 0x00011239,
+		0x030, 0x00012209,
+		0x030, 0x00013239,
+		0x030, 0x00014239,
+		0x030, 0x00015239,
+		0x030, 0x00016209,
+		0x030, 0x00017239,
+		0x030, 0x00018209,
+		0x030, 0x00019239,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
 		0x030, 0x00000238,
 		0x030, 0x00001238,
 		0x030, 0x00002238,
@@ -4861,7 +9198,34 @@ static const u32 rtw8822c_rf_a[] = {
 		0x030, 0x00017238,
 		0x030, 0x00018228,
 		0x030, 0x00019238,
-	0x93000003,	0x00000000,	0x40000000,	0x00000000,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x030, 0x00000238,
+		0x030, 0x00001238,
+		0x030, 0x00002238,
+		0x030, 0x00003238,
+		0x030, 0x00004228,
+		0x030, 0x00005238,
+		0x030, 0x00006238,
+		0x030, 0x00007238,
+		0x030, 0x00008228,
+		0x030, 0x00009238,
+		0x030, 0x0000A238,
+		0x030, 0x0000B238,
+		0x030, 0x0000C238,
+		0x030, 0x0000D238,
+		0x030, 0x0000E228,
+		0x030, 0x0000F238,
+		0x030, 0x00010238,
+		0x030, 0x00011238,
+		0x030, 0x00012228,
+		0x030, 0x00013238,
+		0x030, 0x00014238,
+		0x030, 0x00015238,
+		0x030, 0x00016228,
+		0x030, 0x00017238,
+		0x030, 0x00018228,
+		0x030, 0x00019238,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
 		0x030, 0x00000239,
 		0x030, 0x00001239,
 		0x030, 0x00002239,
@@ -4888,7 +9252,88 @@ static const u32 rtw8822c_rf_a[] = {
 		0x030, 0x00017239,
 		0x030, 0x00018209,
 		0x030, 0x00019239,
-	0x93000004,	0x00000000,	0x40000000,	0x00000000,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x030, 0x00000239,
+		0x030, 0x00001239,
+		0x030, 0x00002239,
+		0x030, 0x00003239,
+		0x030, 0x00004239,
+		0x030, 0x00005239,
+		0x030, 0x00006239,
+		0x030, 0x00007239,
+		0x030, 0x00008239,
+		0x030, 0x00009239,
+		0x030, 0x0000A239,
+		0x030, 0x0000B239,
+		0x030, 0x0000C239,
+		0x030, 0x0000D239,
+		0x030, 0x0000E209,
+		0x030, 0x0000F239,
+		0x030, 0x00010239,
+		0x030, 0x00011239,
+		0x030, 0x00012209,
+		0x030, 0x00013239,
+		0x030, 0x00014239,
+		0x030, 0x00015239,
+		0x030, 0x00016209,
+		0x030, 0x00017239,
+		0x030, 0x00018209,
+		0x030, 0x00019239,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x030, 0x00000238,
+		0x030, 0x00001238,
+		0x030, 0x00002238,
+		0x030, 0x00003238,
+		0x030, 0x00004228,
+		0x030, 0x00005238,
+		0x030, 0x00006238,
+		0x030, 0x00007238,
+		0x030, 0x00008228,
+		0x030, 0x00009238,
+		0x030, 0x0000A238,
+		0x030, 0x0000B238,
+		0x030, 0x0000C238,
+		0x030, 0x0000D238,
+		0x030, 0x0000E228,
+		0x030, 0x0000F238,
+		0x030, 0x00010238,
+		0x030, 0x00011238,
+		0x030, 0x00012228,
+		0x030, 0x00013238,
+		0x030, 0x00014238,
+		0x030, 0x00015238,
+		0x030, 0x00016228,
+		0x030, 0x00017238,
+		0x030, 0x00018228,
+		0x030, 0x00019238,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x030, 0x00000239,
+		0x030, 0x00001239,
+		0x030, 0x00002239,
+		0x030, 0x00003239,
+		0x030, 0x00004239,
+		0x030, 0x00005239,
+		0x030, 0x00006239,
+		0x030, 0x00007239,
+		0x030, 0x00008239,
+		0x030, 0x00009239,
+		0x030, 0x0000A239,
+		0x030, 0x0000B239,
+		0x030, 0x0000C239,
+		0x030, 0x0000D239,
+		0x030, 0x0000E209,
+		0x030, 0x0000F239,
+		0x030, 0x00010239,
+		0x030, 0x00011239,
+		0x030, 0x00012209,
+		0x030, 0x00013239,
+		0x030, 0x00014239,
+		0x030, 0x00015239,
+		0x030, 0x00016209,
+		0x030, 0x00017239,
+		0x030, 0x00018209,
+		0x030, 0x00019239,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
 		0x030, 0x00000239,
 		0x030, 0x00001239,
 		0x030, 0x00002239,
@@ -4958,7 +9403,59 @@ static const u32 rtw8822c_rf_a[] = {
 		0x030, 0x00009334,
 		0x030, 0x0000A334,
 		0x030, 0x0000B334,
-	0x91000002,	0x00000000,	0x40000000,	0x00000000,
+	0x91000002,	0x00000000,	0x40000000,	0x00000000,
+		0x030, 0x00000334,
+		0x030, 0x00001334,
+		0x030, 0x00002334,
+		0x030, 0x00003334,
+		0x030, 0x00004334,
+		0x030, 0x00005334,
+		0x030, 0x00006334,
+		0x030, 0x00007334,
+		0x030, 0x00008334,
+		0x030, 0x00009334,
+		0x030, 0x0000A334,
+		0x030, 0x0000B334,
+	0x92000001,	0x00000000,	0x40000000,	0x00000000,
+		0x030, 0x00000334,
+		0x030, 0x00001334,
+		0x030, 0x00002334,
+		0x030, 0x00003334,
+		0x030, 0x00004334,
+		0x030, 0x00005334,
+		0x030, 0x00006334,
+		0x030, 0x00007334,
+		0x030, 0x00008334,
+		0x030, 0x00009334,
+		0x030, 0x0000A334,
+		0x030, 0x0000B334,
+	0x92000002,	0x00000000,	0x40000000,	0x00000000,
+		0x030, 0x00000334,
+		0x030, 0x00001334,
+		0x030, 0x00002334,
+		0x030, 0x00003334,
+		0x030, 0x00004334,
+		0x030, 0x00005334,
+		0x030, 0x00006334,
+		0x030, 0x00007334,
+		0x030, 0x00008334,
+		0x030, 0x00009334,
+		0x030, 0x0000A334,
+		0x030, 0x0000B334,
+	0x93000001,	0x00000000,	0x40000000,	0x00000000,
+		0x030, 0x00000334,
+		0x030, 0x00001334,
+		0x030, 0x00002334,
+		0x030, 0x00003334,
+		0x030, 0x00004334,
+		0x030, 0x00005334,
+		0x030, 0x00006334,
+		0x030, 0x00007334,
+		0x030, 0x00008334,
+		0x030, 0x00009334,
+		0x030, 0x0000A334,
+		0x030, 0x0000B334,
+	0x93000002,	0x00000000,	0x40000000,	0x00000000,
 		0x030, 0x00000334,
 		0x030, 0x00001334,
 		0x030, 0x00002334,
@@ -4971,7 +9468,7 @@ static const u32 rtw8822c_rf_a[] = {
 		0x030, 0x00009334,
 		0x030, 0x0000A334,
 		0x030, 0x0000B334,
-	0x92000001,	0x00000000,	0x40000000,	0x00000000,
+	0x93000003,	0x00000000,	0x40000000,	0x00000000,
 		0x030, 0x00000334,
 		0x030, 0x00001334,
 		0x030, 0x00002334,
@@ -4984,7 +9481,7 @@ static const u32 rtw8822c_rf_a[] = {
 		0x030, 0x00009334,
 		0x030, 0x0000A334,
 		0x030, 0x0000B334,
-	0x92000002,	0x00000000,	0x40000000,	0x00000000,
+	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x030, 0x00000334,
 		0x030, 0x00001334,
 		0x030, 0x00002334,
@@ -4997,7 +9494,7 @@ static const u32 rtw8822c_rf_a[] = {
 		0x030, 0x00009334,
 		0x030, 0x0000A334,
 		0x030, 0x0000B334,
-	0x93000001,	0x00000000,	0x40000000,	0x00000000,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
 		0x030, 0x00000334,
 		0x030, 0x00001334,
 		0x030, 0x00002334,
@@ -5010,7 +9507,7 @@ static const u32 rtw8822c_rf_a[] = {
 		0x030, 0x00009334,
 		0x030, 0x0000A334,
 		0x030, 0x0000B334,
-	0x93000002,	0x00000000,	0x40000000,	0x00000000,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
 		0x030, 0x00000334,
 		0x030, 0x00001334,
 		0x030, 0x00002334,
@@ -5023,7 +9520,7 @@ static const u32 rtw8822c_rf_a[] = {
 		0x030, 0x00009334,
 		0x030, 0x0000A334,
 		0x030, 0x0000B334,
-	0x93000003,	0x00000000,	0x40000000,	0x00000000,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
 		0x030, 0x00000334,
 		0x030, 0x00001334,
 		0x030, 0x00002334,
@@ -5036,7 +9533,85 @@ static const u32 rtw8822c_rf_a[] = {
 		0x030, 0x00009334,
 		0x030, 0x0000A334,
 		0x030, 0x0000B334,
-	0x93000004,	0x00000000,	0x40000000,	0x00000000,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x030, 0x00000334,
+		0x030, 0x00001334,
+		0x030, 0x00002334,
+		0x030, 0x00003334,
+		0x030, 0x00004334,
+		0x030, 0x00005334,
+		0x030, 0x00006334,
+		0x030, 0x00007334,
+		0x030, 0x00008334,
+		0x030, 0x00009334,
+		0x030, 0x0000A334,
+		0x030, 0x0000B334,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x030, 0x00000334,
+		0x030, 0x00001334,
+		0x030, 0x00002334,
+		0x030, 0x00003334,
+		0x030, 0x00004334,
+		0x030, 0x00005334,
+		0x030, 0x00006334,
+		0x030, 0x00007334,
+		0x030, 0x00008334,
+		0x030, 0x00009334,
+		0x030, 0x0000A334,
+		0x030, 0x0000B334,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x030, 0x00000334,
+		0x030, 0x00001334,
+		0x030, 0x00002334,
+		0x030, 0x00003334,
+		0x030, 0x00004334,
+		0x030, 0x00005334,
+		0x030, 0x00006334,
+		0x030, 0x00007334,
+		0x030, 0x00008334,
+		0x030, 0x00009334,
+		0x030, 0x0000A334,
+		0x030, 0x0000B334,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x030, 0x00000334,
+		0x030, 0x00001334,
+		0x030, 0x00002334,
+		0x030, 0x00003334,
+		0x030, 0x00004334,
+		0x030, 0x00005334,
+		0x030, 0x00006334,
+		0x030, 0x00007334,
+		0x030, 0x00008334,
+		0x030, 0x00009334,
+		0x030, 0x0000A334,
+		0x030, 0x0000B334,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x030, 0x00000334,
+		0x030, 0x00001334,
+		0x030, 0x00002334,
+		0x030, 0x00003334,
+		0x030, 0x00004334,
+		0x030, 0x00005334,
+		0x030, 0x00006334,
+		0x030, 0x00007334,
+		0x030, 0x00008334,
+		0x030, 0x00009334,
+		0x030, 0x0000A334,
+		0x030, 0x0000B334,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x030, 0x00000334,
+		0x030, 0x00001334,
+		0x030, 0x00002334,
+		0x030, 0x00003334,
+		0x030, 0x00004334,
+		0x030, 0x00005334,
+		0x030, 0x00006334,
+		0x030, 0x00007334,
+		0x030, 0x00008334,
+		0x030, 0x00009334,
+		0x030, 0x0000A334,
+		0x030, 0x0000B334,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
 		0x030, 0x00000334,
 		0x030, 0x00001334,
 		0x030, 0x00002334,
@@ -5076,6 +9651,99 @@ static const u32 rtw8822c_rf_a[] = {
 		0x030, 0x0000C330,
 		0x0EF, 0x00000000,
 		0x0EE, 0x00010000,
+	0x83000015,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000200,
+		0x03F, 0x00000005,
+		0x033, 0x00000201,
+		0x03F, 0x00000008,
+		0x033, 0x00000202,
+		0x03F, 0x0000000B,
+		0x033, 0x00000203,
+		0x03F, 0x0000000E,
+		0x033, 0x00000204,
+		0x03F, 0x0000002B,
+		0x033, 0x00000205,
+		0x03F, 0x0000002E,
+		0x033, 0x00000206,
+		0x03F, 0x0000006B,
+		0x033, 0x00000207,
+		0x03F, 0x0000006E,
+		0x033, 0x00000208,
+		0x03F, 0x00000071,
+		0x033, 0x00000209,
+		0x03F, 0x00000074,
+		0x033, 0x0000020A,
+		0x03F, 0x00000077,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000200,
+		0x03F, 0x00000005,
+		0x033, 0x00000201,
+		0x03F, 0x00000008,
+		0x033, 0x00000202,
+		0x03F, 0x0000000B,
+		0x033, 0x00000203,
+		0x03F, 0x0000000E,
+		0x033, 0x00000204,
+		0x03F, 0x0000002B,
+		0x033, 0x00000205,
+		0x03F, 0x0000002E,
+		0x033, 0x00000206,
+		0x03F, 0x0000006B,
+		0x033, 0x00000207,
+		0x03F, 0x0000006E,
+		0x033, 0x00000208,
+		0x03F, 0x00000071,
+		0x033, 0x00000209,
+		0x03F, 0x00000074,
+		0x033, 0x0000020A,
+		0x03F, 0x00000077,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000200,
+		0x03F, 0x00000005,
+		0x033, 0x00000201,
+		0x03F, 0x00000008,
+		0x033, 0x00000202,
+		0x03F, 0x0000000B,
+		0x033, 0x00000203,
+		0x03F, 0x0000000E,
+		0x033, 0x00000204,
+		0x03F, 0x0000002B,
+		0x033, 0x00000205,
+		0x03F, 0x0000002E,
+		0x033, 0x00000206,
+		0x03F, 0x0000006B,
+		0x033, 0x00000207,
+		0x03F, 0x0000006E,
+		0x033, 0x00000208,
+		0x03F, 0x00000071,
+		0x033, 0x00000209,
+		0x03F, 0x00000074,
+		0x033, 0x0000020A,
+		0x03F, 0x00000077,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000200,
+		0x03F, 0x00000005,
+		0x033, 0x00000201,
+		0x03F, 0x00000008,
+		0x033, 0x00000202,
+		0x03F, 0x0000000B,
+		0x033, 0x00000203,
+		0x03F, 0x0000000E,
+		0x033, 0x00000204,
+		0x03F, 0x0000002B,
+		0x033, 0x00000205,
+		0x03F, 0x0000002E,
+		0x033, 0x00000206,
+		0x03F, 0x0000006B,
+		0x033, 0x00000207,
+		0x03F, 0x0000006E,
+		0x033, 0x00000208,
+		0x03F, 0x00000071,
+		0x033, 0x00000209,
+		0x03F, 0x00000074,
+		0x033, 0x0000020A,
+		0x03F, 0x00000077,
+	0xA0000000,	0x00000000,
 		0x033, 0x00000200,
 		0x03F, 0x0000006A,
 		0x033, 0x00000201,
@@ -5098,6 +9766,100 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00000CF4,
 		0x033, 0x0000020A,
 		0x03F, 0x00000CF7,
+	0xB0000000,	0x00000000,
+	0x83000015,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000280,
+		0x03F, 0x00000005,
+		0x033, 0x00000281,
+		0x03F, 0x00000008,
+		0x033, 0x00000282,
+		0x03F, 0x0000000B,
+		0x033, 0x00000283,
+		0x03F, 0x0000000E,
+		0x033, 0x00000284,
+		0x03F, 0x0000002B,
+		0x033, 0x00000285,
+		0x03F, 0x0000002E,
+		0x033, 0x00000286,
+		0x03F, 0x0000006B,
+		0x033, 0x00000287,
+		0x03F, 0x0000006E,
+		0x033, 0x00000288,
+		0x03F, 0x00000071,
+		0x033, 0x00000289,
+		0x03F, 0x00000074,
+		0x033, 0x0000028A,
+		0x03F, 0x00000077,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000280,
+		0x03F, 0x00000005,
+		0x033, 0x00000281,
+		0x03F, 0x00000008,
+		0x033, 0x00000282,
+		0x03F, 0x0000000B,
+		0x033, 0x00000283,
+		0x03F, 0x0000000E,
+		0x033, 0x00000284,
+		0x03F, 0x0000002B,
+		0x033, 0x00000285,
+		0x03F, 0x0000002E,
+		0x033, 0x00000286,
+		0x03F, 0x0000006B,
+		0x033, 0x00000287,
+		0x03F, 0x0000006E,
+		0x033, 0x00000288,
+		0x03F, 0x00000071,
+		0x033, 0x00000289,
+		0x03F, 0x00000074,
+		0x033, 0x0000028A,
+		0x03F, 0x00000077,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000280,
+		0x03F, 0x00000005,
+		0x033, 0x00000281,
+		0x03F, 0x00000008,
+		0x033, 0x00000282,
+		0x03F, 0x0000000B,
+		0x033, 0x00000283,
+		0x03F, 0x0000000E,
+		0x033, 0x00000284,
+		0x03F, 0x0000002B,
+		0x033, 0x00000285,
+		0x03F, 0x0000002E,
+		0x033, 0x00000286,
+		0x03F, 0x0000006B,
+		0x033, 0x00000287,
+		0x03F, 0x0000006E,
+		0x033, 0x00000288,
+		0x03F, 0x00000071,
+		0x033, 0x00000289,
+		0x03F, 0x00000074,
+		0x033, 0x0000028A,
+		0x03F, 0x00000077,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000280,
+		0x03F, 0x00000005,
+		0x033, 0x00000281,
+		0x03F, 0x00000008,
+		0x033, 0x00000282,
+		0x03F, 0x0000000B,
+		0x033, 0x00000283,
+		0x03F, 0x0000000E,
+		0x033, 0x00000284,
+		0x03F, 0x0000002B,
+		0x033, 0x00000285,
+		0x03F, 0x0000002E,
+		0x033, 0x00000286,
+		0x03F, 0x0000006B,
+		0x033, 0x00000287,
+		0x03F, 0x0000006E,
+		0x033, 0x00000288,
+		0x03F, 0x00000071,
+		0x033, 0x00000289,
+		0x03F, 0x00000074,
+		0x033, 0x0000028A,
+		0x03F, 0x00000077,
+	0xA0000000,	0x00000000,
 		0x033, 0x00000280,
 		0x03F, 0x0000006A,
 		0x033, 0x00000281,
@@ -5120,6 +9882,104 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00000CF4,
 		0x033, 0x0000028A,
 		0x03F, 0x00000CF7,
+	0xB0000000,	0x00000000,
+	0x83000015,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000300,
+		0x03F, 0x00000005,
+		0x033, 0x00000301,
+		0x03F, 0x00000008,
+		0x033, 0x00000302,
+		0x03F, 0x0000000B,
+		0x033, 0x00000303,
+		0x03F, 0x0000000E,
+		0x033, 0x00000304,
+		0x03F, 0x0000002B,
+		0x033, 0x00000305,
+		0x03F, 0x0000002E,
+		0x033, 0x00000306,
+		0x03F, 0x00000031,
+		0x033, 0x00000307,
+		0x03F, 0x00000034,
+		0x033, 0x00000308,
+		0x03F, 0x00000053,
+		0x033, 0x00000309,
+		0x03F, 0x00000056,
+		0x033, 0x0000030A,
+		0x03F, 0x000000D1,
+		0x0EE, 0x00000000,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000300,
+		0x03F, 0x00000005,
+		0x033, 0x00000301,
+		0x03F, 0x00000008,
+		0x033, 0x00000302,
+		0x03F, 0x0000000B,
+		0x033, 0x00000303,
+		0x03F, 0x0000000E,
+		0x033, 0x00000304,
+		0x03F, 0x0000002B,
+		0x033, 0x00000305,
+		0x03F, 0x0000002E,
+		0x033, 0x00000306,
+		0x03F, 0x00000031,
+		0x033, 0x00000307,
+		0x03F, 0x00000034,
+		0x033, 0x00000308,
+		0x03F, 0x00000053,
+		0x033, 0x00000309,
+		0x03F, 0x00000056,
+		0x033, 0x0000030A,
+		0x03F, 0x000000D1,
+		0x0EE, 0x00000000,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000300,
+		0x03F, 0x00000005,
+		0x033, 0x00000301,
+		0x03F, 0x00000008,
+		0x033, 0x00000302,
+		0x03F, 0x0000000B,
+		0x033, 0x00000303,
+		0x03F, 0x0000000E,
+		0x033, 0x00000304,
+		0x03F, 0x0000002B,
+		0x033, 0x00000305,
+		0x03F, 0x0000002E,
+		0x033, 0x00000306,
+		0x03F, 0x00000031,
+		0x033, 0x00000307,
+		0x03F, 0x00000034,
+		0x033, 0x00000308,
+		0x03F, 0x00000053,
+		0x033, 0x00000309,
+		0x03F, 0x00000056,
+		0x033, 0x0000030A,
+		0x03F, 0x000000D1,
+		0x0EE, 0x00000000,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000300,
+		0x03F, 0x00000005,
+		0x033, 0x00000301,
+		0x03F, 0x00000008,
+		0x033, 0x00000302,
+		0x03F, 0x0000000B,
+		0x033, 0x00000303,
+		0x03F, 0x0000000E,
+		0x033, 0x00000304,
+		0x03F, 0x0000002B,
+		0x033, 0x00000305,
+		0x03F, 0x0000002E,
+		0x033, 0x00000306,
+		0x03F, 0x00000031,
+		0x033, 0x00000307,
+		0x03F, 0x00000034,
+		0x033, 0x00000308,
+		0x03F, 0x00000053,
+		0x033, 0x00000309,
+		0x03F, 0x00000056,
+		0x033, 0x0000030A,
+		0x03F, 0x000000D1,
+		0x0EE, 0x00000000,
+	0xA0000000,	0x00000000,
 		0x033, 0x00000300,
 		0x03F, 0x0000006A,
 		0x033, 0x00000301,
@@ -5143,6 +10003,7 @@ static const u32 rtw8822c_rf_a[] = {
 		0x033, 0x0000030A,
 		0x03F, 0x00000CF7,
 		0x0EE, 0x00000000,
+	0xB0000000,	0x00000000,
 		0x051, 0x0003C800,
 	0x81000001,	0x00000000,	0x40000000,	0x00000000,
 		0x052, 0x000902CA,
@@ -5160,6 +10021,26 @@ static const u32 rtw8822c_rf_a[] = {
 		0x052, 0x000902CA,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x052, 0x000902CA,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x052, 0x000902CA,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x052, 0x000902CA,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x052, 0x000902CA,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x052, 0x000902CA,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x052, 0x000902CA,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x052, 0x000902CA,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x052, 0x000902CA,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x052, 0x000902CA,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x052, 0x000902CA,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x052, 0x000902CA,
 	0xA0000000,	0x00000000,
 		0x052, 0x000942CA,
 	0xB0000000,	0x00000000,
@@ -5185,6 +10066,26 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00028246,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00028246,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00002A46,
 	0xB0000000,	0x00000000,
@@ -5200,11 +10101,31 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00010E46,
 	0x93000001,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00028246,
-	0x93000002,	0x00000000,	0x40000000,	0x00000000,
+	0x93000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x93000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x93000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00028246,
-	0x93000003,	0x00000000,	0x40000000,	0x00000000,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00028246,
-	0x93000004,	0x00000000,	0x40000000,	0x00000000,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00028246,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00002A46,
@@ -5227,6 +10148,26 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00030246,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00030246,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00002A46,
 	0xB0000000,	0x00000000,
@@ -5248,6 +10189,26 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00028246,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00028246,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00002A46,
 	0xB0000000,	0x00000000,
@@ -5269,6 +10230,26 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00028246,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00028246,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00002A46,
 	0xB0000000,	0x00000000,
@@ -5290,6 +10271,26 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00030246,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00030246,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00002A46,
 	0xB0000000,	0x00000000,
@@ -5311,6 +10312,26 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00028246,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00028246,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00002A46,
 	0xB0000000,	0x00000000,
@@ -5332,6 +10353,26 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00028246,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00028246,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00002A46,
 	0xB0000000,	0x00000000,
@@ -5353,6 +10394,26 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00030246,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00030246,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00002A46,
 	0xB0000000,	0x00000000,
@@ -5374,6 +10435,26 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00028246,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00028246,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00002A46,
 	0xB0000000,	0x00000000,
@@ -5395,6 +10476,26 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00028246,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00028246,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00002A46,
 	0xB0000000,	0x00000000,
@@ -5416,6 +10517,26 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00030246,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00030246,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00002A46,
 	0xB0000000,	0x00000000,
@@ -5437,6 +10558,26 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00028246,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00028246,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00002A46,
 	0xB0000000,	0x00000000,
@@ -5458,6 +10599,26 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00028246,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00028246,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00028246,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00002A46,
 	0xB0000000,	0x00000000,
@@ -5479,6 +10640,26 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00030246,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00030246,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00030246,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00002A46,
 	0xB0000000,	0x00000000,
@@ -5493,13 +10674,33 @@ static const u32 rtw8822c_rf_a[] = {
 	0x92000002,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x0000EA46,
 	0x93000001,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00028246,
+		0x03F, 0x00025E46,
 	0x93000002,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00028246,
+		0x03F, 0x00025E46,
 	0x93000003,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00028246,
+		0x03F, 0x00025E46,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00028246,
+		0x03F, 0x00025E46,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00002A46,
 	0xB0000000,	0x00000000,
@@ -5514,13 +10715,33 @@ static const u32 rtw8822c_rf_a[] = {
 	0x92000002,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x0000EA46,
 	0x93000001,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00028246,
+		0x03F, 0x00025E46,
 	0x93000002,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00028246,
+		0x03F, 0x00025E46,
 	0x93000003,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00028246,
+		0x03F, 0x00025E46,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00028246,
+		0x03F, 0x00025E46,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00002A46,
 	0xB0000000,	0x00000000,
@@ -5535,13 +10756,33 @@ static const u32 rtw8822c_rf_a[] = {
 	0x92000002,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x0000EA46,
 	0x93000001,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00030246,
+		0x03F, 0x00031E46,
 	0x93000002,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00030246,
+		0x03F, 0x00031E46,
 	0x93000003,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00030246,
+		0x03F, 0x00031E46,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00030246,
+		0x03F, 0x00031E46,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00002A46,
 	0xB0000000,	0x00000000,
@@ -5563,6 +10804,26 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00025E46,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00025E46,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00002A46,
 	0xB0000000,	0x00000000,
@@ -5584,6 +10845,26 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00025E46,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00025E46,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00002A46,
 	0xB0000000,	0x00000000,
@@ -5605,6 +10886,26 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00031E46,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00031E46,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00002A46,
 	0xB0000000,	0x00000000,
@@ -5626,6 +10927,26 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00025E46,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00025E46,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00002A46,
 	0xB0000000,	0x00000000,
@@ -5647,6 +10968,26 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00025E46,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00025E46,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00002A46,
 	0xB0000000,	0x00000000,
@@ -5668,6 +11009,26 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00031E46,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00031E46,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00002A46,
 	0xB0000000,	0x00000000,
@@ -5683,11 +11044,31 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x0000EA46,
 	0x93000001,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00025E46,
-	0x93000002,	0x00000000,	0x40000000,	0x00000000,
+	0x93000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x93000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x93000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00025E46,
-	0x93000003,	0x00000000,	0x40000000,	0x00000000,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00025E46,
-	0x93000004,	0x00000000,	0x40000000,	0x00000000,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00025E46,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00002A46,
@@ -5710,6 +11091,26 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00025E46,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00025E46,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00002A46,
 	0xB0000000,	0x00000000,
@@ -5731,6 +11132,26 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00031E46,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00031E46,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00002A46,
 	0xB0000000,	0x00000000,
@@ -5752,6 +11173,26 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00025E46,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00025E46,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00002A46,
 	0xB0000000,	0x00000000,
@@ -5773,6 +11214,26 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00025E46,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00025E46,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00002A46,
 	0xB0000000,	0x00000000,
@@ -5794,6 +11255,26 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00031E46,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00031E46,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00002A46,
 	0xB0000000,	0x00000000,
@@ -5815,6 +11296,26 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00025E46,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00025E46,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00002A46,
 	0xB0000000,	0x00000000,
@@ -5836,6 +11337,26 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00025E46,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00025E46,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00002A46,
 	0xB0000000,	0x00000000,
@@ -5857,6 +11378,26 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00031E46,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00031E46,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00002A46,
 	0xB0000000,	0x00000000,
@@ -5878,6 +11419,26 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00025E46,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00025E46,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00002A46,
 	0xB0000000,	0x00000000,
@@ -5899,6 +11460,26 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00025E46,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00025E46,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00002A46,
 	0xB0000000,	0x00000000,
@@ -5920,6 +11501,26 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00031E46,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00031E46,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00002A46,
 	0xB0000000,	0x00000000,
@@ -5941,6 +11542,26 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00025E46,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00025E46,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00002A46,
 	0xB0000000,	0x00000000,
@@ -5962,6 +11583,26 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00025E46,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00025E46,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00002A46,
 	0xB0000000,	0x00000000,
@@ -5983,6 +11624,26 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00031E46,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00031E46,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00002A46,
 	0xB0000000,	0x00000000,
@@ -6004,6 +11665,26 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00025E46,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00025E46,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00002A46,
 	0xB0000000,	0x00000000,
@@ -6025,6 +11706,26 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00025E46,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00025E46,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00025E46,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00002A46,
 	0xB0000000,	0x00000000,
@@ -6046,6 +11747,26 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00031E46,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00031E46,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00031E46,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00002A46,
 	0xB0000000,	0x00000000,
@@ -6058,21 +11779,110 @@ static const u32 rtw8822c_rf_a[] = {
 	0x92000001,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x0000EA46,
 	0x92000002,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x0000EA46,
-	0x93000001,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00021E46,
-	0x93000002,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00021E46,
-	0x93000003,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00021E46,
-	0x93000004,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x00021E46,
-	0xA0000000,	0x00000000,
-		0x03F, 0x00002A46,
-	0xB0000000,	0x00000000,
-		0x0EF, 0x00000000,
-		0x0EE, 0x00010000,
-	0x81000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0000EA46,
+	0x93000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00021E46,
+	0x93000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00021E46,
+	0x93000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00021E46,
+	0x93000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00021E46,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00021E46,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00021E46,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00021E46,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00021E46,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00021E46,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00021E46,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00021E46,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00021E46,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00021E46,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x00021E46,
+	0xA0000000,	0x00000000,
+		0x03F, 0x00002A46,
+	0xB0000000,	0x00000000,
+		0x0EF, 0x00000000,
+		0x0EE, 0x00010000,
+	0x81000001,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000060,
+		0x03F, 0x00000468,
+		0x033, 0x00000061,
+		0x03F, 0x00000868,
+		0x033, 0x00000062,
+		0x03F, 0x00000909,
+		0x033, 0x00000063,
+		0x03F, 0x00000D0A,
+		0x033, 0x00000064,
+		0x03F, 0x00000D4A,
+		0x033, 0x00000065,
+		0x03F, 0x00000D8B,
+		0x033, 0x00000066,
+		0x03F, 0x00000DEB,
+		0x033, 0x00000067,
+		0x03F, 0x00000DEE,
+		0x033, 0x00000068,
+		0x03F, 0x00000DF1,
+		0x033, 0x00000069,
+		0x03F, 0x00000DF4,
+		0x033, 0x0000006A,
+		0x03F, 0x00000DF7,
+	0x91000002,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000060,
+		0x03F, 0x00000468,
+		0x033, 0x00000061,
+		0x03F, 0x00000868,
+		0x033, 0x00000062,
+		0x03F, 0x00000909,
+		0x033, 0x00000063,
+		0x03F, 0x00000D0A,
+		0x033, 0x00000064,
+		0x03F, 0x00000D4A,
+		0x033, 0x00000065,
+		0x03F, 0x00000D8B,
+		0x033, 0x00000066,
+		0x03F, 0x00000DEB,
+		0x033, 0x00000067,
+		0x03F, 0x00000DEE,
+		0x033, 0x00000068,
+		0x03F, 0x00000DF1,
+		0x033, 0x00000069,
+		0x03F, 0x00000DF4,
+		0x033, 0x0000006A,
+		0x03F, 0x00000DF7,
+	0x92000001,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000060,
+		0x03F, 0x00000468,
+		0x033, 0x00000061,
+		0x03F, 0x00000868,
+		0x033, 0x00000062,
+		0x03F, 0x00000909,
+		0x033, 0x00000063,
+		0x03F, 0x00000D0A,
+		0x033, 0x00000064,
+		0x03F, 0x00000D4A,
+		0x033, 0x00000065,
+		0x03F, 0x00000D8B,
+		0x033, 0x00000066,
+		0x03F, 0x00000DEB,
+		0x033, 0x00000067,
+		0x03F, 0x00000DEE,
+		0x033, 0x00000068,
+		0x03F, 0x00000DF1,
+		0x033, 0x00000069,
+		0x03F, 0x00000DF4,
+		0x033, 0x0000006A,
+		0x03F, 0x00000DF7,
+	0x92000002,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000468,
 		0x033, 0x00000061,
@@ -6082,9 +11892,239 @@ static const u32 rtw8822c_rf_a[] = {
 		0x033, 0x00000063,
 		0x03F, 0x00000D0A,
 		0x033, 0x00000064,
-		0x03F, 0x00000D4A,
+		0x03F, 0x00000D4A,
+		0x033, 0x00000065,
+		0x03F, 0x00000D8B,
+		0x033, 0x00000066,
+		0x03F, 0x00000DEB,
+		0x033, 0x00000067,
+		0x03F, 0x00000DEE,
+		0x033, 0x00000068,
+		0x03F, 0x00000DF1,
+		0x033, 0x00000069,
+		0x03F, 0x00000DF4,
+		0x033, 0x0000006A,
+		0x03F, 0x00000DF7,
+	0x93000001,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000060,
+		0x03F, 0x00000467,
+		0x033, 0x00000061,
+		0x03F, 0x00000867,
+		0x033, 0x00000062,
+		0x03F, 0x00000908,
+		0x033, 0x00000063,
+		0x03F, 0x00000D09,
+		0x033, 0x00000064,
+		0x03F, 0x00000D49,
+		0x033, 0x00000065,
+		0x03F, 0x00000D8A,
+		0x033, 0x00000066,
+		0x03F, 0x00000DEB,
+		0x033, 0x00000067,
+		0x03F, 0x00000DEE,
+		0x033, 0x00000068,
+		0x03F, 0x00000DF1,
+		0x033, 0x00000069,
+		0x03F, 0x00000DF4,
+		0x033, 0x0000006A,
+		0x03F, 0x00000DF7,
+	0x93000002,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000060,
+		0x03F, 0x00000467,
+		0x033, 0x00000061,
+		0x03F, 0x00000867,
+		0x033, 0x00000062,
+		0x03F, 0x00000908,
+		0x033, 0x00000063,
+		0x03F, 0x00000D09,
+		0x033, 0x00000064,
+		0x03F, 0x00000D49,
+		0x033, 0x00000065,
+		0x03F, 0x00000D8A,
+		0x033, 0x00000066,
+		0x03F, 0x00000DEB,
+		0x033, 0x00000067,
+		0x03F, 0x00000DEE,
+		0x033, 0x00000068,
+		0x03F, 0x00000DF1,
+		0x033, 0x00000069,
+		0x03F, 0x00000DF4,
+		0x033, 0x0000006A,
+		0x03F, 0x00000DF7,
+	0x93000003,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000060,
+		0x03F, 0x00000467,
+		0x033, 0x00000061,
+		0x03F, 0x00000867,
+		0x033, 0x00000062,
+		0x03F, 0x00000908,
+		0x033, 0x00000063,
+		0x03F, 0x00000D09,
+		0x033, 0x00000064,
+		0x03F, 0x00000D49,
+		0x033, 0x00000065,
+		0x03F, 0x00000D8A,
+		0x033, 0x00000066,
+		0x03F, 0x00000DEB,
+		0x033, 0x00000067,
+		0x03F, 0x00000DEE,
+		0x033, 0x00000068,
+		0x03F, 0x00000DF1,
+		0x033, 0x00000069,
+		0x03F, 0x00000DF4,
+		0x033, 0x0000006A,
+		0x03F, 0x00000DF7,
+	0x93000004,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000060,
+		0x03F, 0x00000467,
+		0x033, 0x00000061,
+		0x03F, 0x00000867,
+		0x033, 0x00000062,
+		0x03F, 0x00000908,
+		0x033, 0x00000063,
+		0x03F, 0x00000D09,
+		0x033, 0x00000064,
+		0x03F, 0x00000D49,
+		0x033, 0x00000065,
+		0x03F, 0x00000D8A,
+		0x033, 0x00000066,
+		0x03F, 0x00000DEB,
+		0x033, 0x00000067,
+		0x03F, 0x00000DEE,
+		0x033, 0x00000068,
+		0x03F, 0x00000DF1,
+		0x033, 0x00000069,
+		0x03F, 0x00000DF4,
+		0x033, 0x0000006A,
+		0x03F, 0x00000DF7,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000060,
+		0x03F, 0x00000467,
+		0x033, 0x00000061,
+		0x03F, 0x00000867,
+		0x033, 0x00000062,
+		0x03F, 0x00000908,
+		0x033, 0x00000063,
+		0x03F, 0x00000D09,
+		0x033, 0x00000064,
+		0x03F, 0x00000D49,
+		0x033, 0x00000065,
+		0x03F, 0x00000D8A,
+		0x033, 0x00000066,
+		0x03F, 0x00000DEB,
+		0x033, 0x00000067,
+		0x03F, 0x00000DEE,
+		0x033, 0x00000068,
+		0x03F, 0x00000DF1,
+		0x033, 0x00000069,
+		0x03F, 0x00000DF4,
+		0x033, 0x0000006A,
+		0x03F, 0x00000DF7,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000060,
+		0x03F, 0x00000467,
+		0x033, 0x00000061,
+		0x03F, 0x00000867,
+		0x033, 0x00000062,
+		0x03F, 0x00000908,
+		0x033, 0x00000063,
+		0x03F, 0x00000D09,
+		0x033, 0x00000064,
+		0x03F, 0x00000D49,
+		0x033, 0x00000065,
+		0x03F, 0x00000D8A,
+		0x033, 0x00000066,
+		0x03F, 0x00000DEB,
+		0x033, 0x00000067,
+		0x03F, 0x00000DEE,
+		0x033, 0x00000068,
+		0x03F, 0x00000DF1,
+		0x033, 0x00000069,
+		0x03F, 0x00000DF4,
+		0x033, 0x0000006A,
+		0x03F, 0x00000DF7,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000060,
+		0x03F, 0x00000467,
+		0x033, 0x00000061,
+		0x03F, 0x00000867,
+		0x033, 0x00000062,
+		0x03F, 0x00000908,
+		0x033, 0x00000063,
+		0x03F, 0x00000D09,
+		0x033, 0x00000064,
+		0x03F, 0x00000D49,
+		0x033, 0x00000065,
+		0x03F, 0x00000D8A,
+		0x033, 0x00000066,
+		0x03F, 0x00000DEB,
+		0x033, 0x00000067,
+		0x03F, 0x00000DEE,
+		0x033, 0x00000068,
+		0x03F, 0x00000DF1,
+		0x033, 0x00000069,
+		0x03F, 0x00000DF4,
+		0x033, 0x0000006A,
+		0x03F, 0x00000DF7,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000060,
+		0x03F, 0x00000467,
+		0x033, 0x00000061,
+		0x03F, 0x00000867,
+		0x033, 0x00000062,
+		0x03F, 0x00000908,
+		0x033, 0x00000063,
+		0x03F, 0x00000D09,
+		0x033, 0x00000064,
+		0x03F, 0x00000D49,
+		0x033, 0x00000065,
+		0x03F, 0x00000D8A,
+		0x033, 0x00000066,
+		0x03F, 0x00000DEB,
+		0x033, 0x00000067,
+		0x03F, 0x00000DEE,
+		0x033, 0x00000068,
+		0x03F, 0x00000DF1,
+		0x033, 0x00000069,
+		0x03F, 0x00000DF4,
+		0x033, 0x0000006A,
+		0x03F, 0x00000DF7,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000060,
+		0x03F, 0x00000467,
+		0x033, 0x00000061,
+		0x03F, 0x00000867,
+		0x033, 0x00000062,
+		0x03F, 0x00000908,
+		0x033, 0x00000063,
+		0x03F, 0x00000D09,
+		0x033, 0x00000064,
+		0x03F, 0x00000D49,
+		0x033, 0x00000065,
+		0x03F, 0x00000D8A,
+		0x033, 0x00000066,
+		0x03F, 0x00000DEB,
+		0x033, 0x00000067,
+		0x03F, 0x00000DEE,
+		0x033, 0x00000068,
+		0x03F, 0x00000DF1,
+		0x033, 0x00000069,
+		0x03F, 0x00000DF4,
+		0x033, 0x0000006A,
+		0x03F, 0x00000DF7,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000060,
+		0x03F, 0x00000467,
+		0x033, 0x00000061,
+		0x03F, 0x00000867,
+		0x033, 0x00000062,
+		0x03F, 0x00000908,
+		0x033, 0x00000063,
+		0x03F, 0x00000D09,
+		0x033, 0x00000064,
+		0x03F, 0x00000D49,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8B,
+		0x03F, 0x00000D8A,
 		0x033, 0x00000066,
 		0x03F, 0x00000DEB,
 		0x033, 0x00000067,
@@ -6095,19 +12135,19 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00000DF4,
 		0x033, 0x0000006A,
 		0x03F, 0x00000DF7,
-	0x91000002,	0x00000000,	0x40000000,	0x00000000,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
-		0x03F, 0x00000468,
+		0x03F, 0x00000467,
 		0x033, 0x00000061,
-		0x03F, 0x00000868,
+		0x03F, 0x00000867,
 		0x033, 0x00000062,
-		0x03F, 0x00000909,
+		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D0A,
+		0x03F, 0x00000D09,
 		0x033, 0x00000064,
-		0x03F, 0x00000D4A,
+		0x03F, 0x00000D49,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8B,
+		0x03F, 0x00000D8A,
 		0x033, 0x00000066,
 		0x03F, 0x00000DEB,
 		0x033, 0x00000067,
@@ -6118,19 +12158,19 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00000DF4,
 		0x033, 0x0000006A,
 		0x03F, 0x00000DF7,
-	0x92000001,	0x00000000,	0x40000000,	0x00000000,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
-		0x03F, 0x00000468,
+		0x03F, 0x00000467,
 		0x033, 0x00000061,
-		0x03F, 0x00000868,
+		0x03F, 0x00000867,
 		0x033, 0x00000062,
-		0x03F, 0x00000909,
+		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D0A,
+		0x03F, 0x00000D09,
 		0x033, 0x00000064,
-		0x03F, 0x00000D4A,
+		0x03F, 0x00000D49,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8B,
+		0x03F, 0x00000D8A,
 		0x033, 0x00000066,
 		0x03F, 0x00000DEB,
 		0x033, 0x00000067,
@@ -6141,19 +12181,19 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00000DF4,
 		0x033, 0x0000006A,
 		0x03F, 0x00000DF7,
-	0x92000002,	0x00000000,	0x40000000,	0x00000000,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
-		0x03F, 0x00000468,
+		0x03F, 0x00000467,
 		0x033, 0x00000061,
-		0x03F, 0x00000868,
+		0x03F, 0x00000867,
 		0x033, 0x00000062,
-		0x03F, 0x00000909,
+		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D0A,
+		0x03F, 0x00000D09,
 		0x033, 0x00000064,
-		0x03F, 0x00000D4A,
+		0x03F, 0x00000D49,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8B,
+		0x03F, 0x00000D8A,
 		0x033, 0x00000066,
 		0x03F, 0x00000DEB,
 		0x033, 0x00000067,
@@ -6164,7 +12204,7 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00000DF4,
 		0x033, 0x0000006A,
 		0x03F, 0x00000DF7,
-	0x93000001,	0x00000000,	0x40000000,	0x00000000,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
 		0x033, 0x00000061,
@@ -6187,19 +12227,19 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00000DF4,
 		0x033, 0x0000006A,
 		0x03F, 0x00000DF7,
-	0x93000002,	0x00000000,	0x40000000,	0x00000000,
+	0xA0000000,	0x00000000,
 		0x033, 0x00000060,
-		0x03F, 0x00000467,
+		0x03F, 0x00000487,
 		0x033, 0x00000061,
-		0x03F, 0x00000867,
+		0x03F, 0x00000887,
 		0x033, 0x00000062,
-		0x03F, 0x00000908,
+		0x03F, 0x00000947,
 		0x033, 0x00000063,
-		0x03F, 0x00000D09,
+		0x03F, 0x00000D48,
 		0x033, 0x00000064,
-		0x03F, 0x00000D49,
+		0x03F, 0x00000D88,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8A,
+		0x03F, 0x00000DE8,
 		0x033, 0x00000066,
 		0x03F, 0x00000DEB,
 		0x033, 0x00000067,
@@ -6210,89 +12250,250 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00000DF4,
 		0x033, 0x0000006A,
 		0x03F, 0x00000DF7,
+	0xB0000000,	0x00000000,
+	0x81000001,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000020,
+		0x03F, 0x00000468,
+		0x033, 0x00000021,
+		0x03F, 0x00000868,
+		0x033, 0x00000022,
+		0x03F, 0x00000909,
+		0x033, 0x00000023,
+		0x03F, 0x00000D0A,
+		0x033, 0x00000024,
+		0x03F, 0x00000D4A,
+		0x033, 0x00000025,
+		0x03F, 0x00000D8B,
+		0x033, 0x00000026,
+		0x03F, 0x00000DEB,
+		0x033, 0x00000027,
+		0x03F, 0x00000DEE,
+		0x033, 0x00000028,
+		0x03F, 0x00000DF1,
+		0x033, 0x00000029,
+		0x03F, 0x00000DF4,
+		0x033, 0x0000002A,
+		0x03F, 0x00000DF7,
+	0x91000002,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000020,
+		0x03F, 0x00000468,
+		0x033, 0x00000021,
+		0x03F, 0x00000868,
+		0x033, 0x00000022,
+		0x03F, 0x00000909,
+		0x033, 0x00000023,
+		0x03F, 0x00000D0A,
+		0x033, 0x00000024,
+		0x03F, 0x00000D4A,
+		0x033, 0x00000025,
+		0x03F, 0x00000D8B,
+		0x033, 0x00000026,
+		0x03F, 0x00000DEB,
+		0x033, 0x00000027,
+		0x03F, 0x00000DEE,
+		0x033, 0x00000028,
+		0x03F, 0x00000DF1,
+		0x033, 0x00000029,
+		0x03F, 0x00000DF4,
+		0x033, 0x0000002A,
+		0x03F, 0x00000DF7,
+	0x92000001,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000020,
+		0x03F, 0x00000468,
+		0x033, 0x00000021,
+		0x03F, 0x00000868,
+		0x033, 0x00000022,
+		0x03F, 0x00000909,
+		0x033, 0x00000023,
+		0x03F, 0x00000D0A,
+		0x033, 0x00000024,
+		0x03F, 0x00000D4A,
+		0x033, 0x00000025,
+		0x03F, 0x00000D8B,
+		0x033, 0x00000026,
+		0x03F, 0x00000DEB,
+		0x033, 0x00000027,
+		0x03F, 0x00000DEE,
+		0x033, 0x00000028,
+		0x03F, 0x00000DF1,
+		0x033, 0x00000029,
+		0x03F, 0x00000DF4,
+		0x033, 0x0000002A,
+		0x03F, 0x00000DF7,
+	0x92000002,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000020,
+		0x03F, 0x00000468,
+		0x033, 0x00000021,
+		0x03F, 0x00000868,
+		0x033, 0x00000022,
+		0x03F, 0x00000909,
+		0x033, 0x00000023,
+		0x03F, 0x00000D0A,
+		0x033, 0x00000024,
+		0x03F, 0x00000D4A,
+		0x033, 0x00000025,
+		0x03F, 0x00000D8B,
+		0x033, 0x00000026,
+		0x03F, 0x00000DEB,
+		0x033, 0x00000027,
+		0x03F, 0x00000DEE,
+		0x033, 0x00000028,
+		0x03F, 0x00000DF1,
+		0x033, 0x00000029,
+		0x03F, 0x00000DF4,
+		0x033, 0x0000002A,
+		0x03F, 0x00000DF7,
+	0x93000001,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000020,
+		0x03F, 0x00000467,
+		0x033, 0x00000021,
+		0x03F, 0x00000867,
+		0x033, 0x00000022,
+		0x03F, 0x00000908,
+		0x033, 0x00000023,
+		0x03F, 0x00000D09,
+		0x033, 0x00000024,
+		0x03F, 0x00000D49,
+		0x033, 0x00000025,
+		0x03F, 0x00000D8A,
+		0x033, 0x00000026,
+		0x03F, 0x00000DEB,
+		0x033, 0x00000027,
+		0x03F, 0x00000DEE,
+		0x033, 0x00000028,
+		0x03F, 0x00000DF1,
+		0x033, 0x00000029,
+		0x03F, 0x00000DF4,
+		0x033, 0x0000002A,
+		0x03F, 0x00000DF7,
+	0x93000002,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000020,
+		0x03F, 0x00000467,
+		0x033, 0x00000021,
+		0x03F, 0x00000867,
+		0x033, 0x00000022,
+		0x03F, 0x00000908,
+		0x033, 0x00000023,
+		0x03F, 0x00000D09,
+		0x033, 0x00000024,
+		0x03F, 0x00000D49,
+		0x033, 0x00000025,
+		0x03F, 0x00000D8A,
+		0x033, 0x00000026,
+		0x03F, 0x00000DEB,
+		0x033, 0x00000027,
+		0x03F, 0x00000DEE,
+		0x033, 0x00000028,
+		0x03F, 0x00000DF1,
+		0x033, 0x00000029,
+		0x03F, 0x00000DF4,
+		0x033, 0x0000002A,
+		0x03F, 0x00000DF7,
 	0x93000003,	0x00000000,	0x40000000,	0x00000000,
-		0x033, 0x00000060,
+		0x033, 0x00000020,
 		0x03F, 0x00000467,
-		0x033, 0x00000061,
+		0x033, 0x00000021,
 		0x03F, 0x00000867,
-		0x033, 0x00000062,
+		0x033, 0x00000022,
 		0x03F, 0x00000908,
-		0x033, 0x00000063,
+		0x033, 0x00000023,
 		0x03F, 0x00000D09,
-		0x033, 0x00000064,
+		0x033, 0x00000024,
 		0x03F, 0x00000D49,
-		0x033, 0x00000065,
+		0x033, 0x00000025,
 		0x03F, 0x00000D8A,
-		0x033, 0x00000066,
+		0x033, 0x00000026,
+		0x03F, 0x00000DEB,
+		0x033, 0x00000027,
+		0x03F, 0x00000DEE,
+		0x033, 0x00000028,
+		0x03F, 0x00000DF1,
+		0x033, 0x00000029,
+		0x03F, 0x00000DF4,
+		0x033, 0x0000002A,
+		0x03F, 0x00000DF7,
+	0x93000004,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000020,
+		0x03F, 0x00000467,
+		0x033, 0x00000021,
+		0x03F, 0x00000867,
+		0x033, 0x00000022,
+		0x03F, 0x00000908,
+		0x033, 0x00000023,
+		0x03F, 0x00000D09,
+		0x033, 0x00000024,
+		0x03F, 0x00000D49,
+		0x033, 0x00000025,
+		0x03F, 0x00000D8A,
+		0x033, 0x00000026,
+		0x03F, 0x00000DEB,
+		0x033, 0x00000027,
+		0x03F, 0x00000DEE,
+		0x033, 0x00000028,
+		0x03F, 0x00000DF1,
+		0x033, 0x00000029,
+		0x03F, 0x00000DF4,
+		0x033, 0x0000002A,
+		0x03F, 0x00000DF7,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000020,
+		0x03F, 0x00000467,
+		0x033, 0x00000021,
+		0x03F, 0x00000867,
+		0x033, 0x00000022,
+		0x03F, 0x00000908,
+		0x033, 0x00000023,
+		0x03F, 0x00000D09,
+		0x033, 0x00000024,
+		0x03F, 0x00000D49,
+		0x033, 0x00000025,
+		0x03F, 0x00000D8A,
+		0x033, 0x00000026,
 		0x03F, 0x00000DEB,
-		0x033, 0x00000067,
+		0x033, 0x00000027,
 		0x03F, 0x00000DEE,
-		0x033, 0x00000068,
+		0x033, 0x00000028,
 		0x03F, 0x00000DF1,
-		0x033, 0x00000069,
+		0x033, 0x00000029,
 		0x03F, 0x00000DF4,
-		0x033, 0x0000006A,
+		0x033, 0x0000002A,
 		0x03F, 0x00000DF7,
-	0x93000004,	0x00000000,	0x40000000,	0x00000000,
-		0x033, 0x00000060,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000020,
 		0x03F, 0x00000467,
-		0x033, 0x00000061,
+		0x033, 0x00000021,
 		0x03F, 0x00000867,
-		0x033, 0x00000062,
+		0x033, 0x00000022,
 		0x03F, 0x00000908,
-		0x033, 0x00000063,
+		0x033, 0x00000023,
 		0x03F, 0x00000D09,
-		0x033, 0x00000064,
+		0x033, 0x00000024,
 		0x03F, 0x00000D49,
-		0x033, 0x00000065,
+		0x033, 0x00000025,
 		0x03F, 0x00000D8A,
-		0x033, 0x00000066,
-		0x03F, 0x00000DEB,
-		0x033, 0x00000067,
-		0x03F, 0x00000DEE,
-		0x033, 0x00000068,
-		0x03F, 0x00000DF1,
-		0x033, 0x00000069,
-		0x03F, 0x00000DF4,
-		0x033, 0x0000006A,
-		0x03F, 0x00000DF7,
-	0xA0000000,	0x00000000,
-		0x033, 0x00000060,
-		0x03F, 0x00000487,
-		0x033, 0x00000061,
-		0x03F, 0x00000887,
-		0x033, 0x00000062,
-		0x03F, 0x00000947,
-		0x033, 0x00000063,
-		0x03F, 0x00000D48,
-		0x033, 0x00000064,
-		0x03F, 0x00000D88,
-		0x033, 0x00000065,
-		0x03F, 0x00000DE8,
-		0x033, 0x00000066,
+		0x033, 0x00000026,
 		0x03F, 0x00000DEB,
-		0x033, 0x00000067,
+		0x033, 0x00000027,
 		0x03F, 0x00000DEE,
-		0x033, 0x00000068,
+		0x033, 0x00000028,
 		0x03F, 0x00000DF1,
-		0x033, 0x00000069,
+		0x033, 0x00000029,
 		0x03F, 0x00000DF4,
-		0x033, 0x0000006A,
+		0x033, 0x0000002A,
 		0x03F, 0x00000DF7,
-	0xB0000000,	0x00000000,
-	0x81000001,	0x00000000,	0x40000000,	0x00000000,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000020,
-		0x03F, 0x00000468,
+		0x03F, 0x00000467,
 		0x033, 0x00000021,
-		0x03F, 0x00000868,
+		0x03F, 0x00000867,
 		0x033, 0x00000022,
-		0x03F, 0x00000909,
+		0x03F, 0x00000908,
 		0x033, 0x00000023,
-		0x03F, 0x00000D0A,
+		0x03F, 0x00000D09,
 		0x033, 0x00000024,
-		0x03F, 0x00000D4A,
+		0x03F, 0x00000D49,
 		0x033, 0x00000025,
-		0x03F, 0x00000D8B,
+		0x03F, 0x00000D8A,
 		0x033, 0x00000026,
 		0x03F, 0x00000DEB,
 		0x033, 0x00000027,
@@ -6303,19 +12504,19 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00000DF4,
 		0x033, 0x0000002A,
 		0x03F, 0x00000DF7,
-	0x91000002,	0x00000000,	0x40000000,	0x00000000,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000020,
-		0x03F, 0x00000468,
+		0x03F, 0x00000467,
 		0x033, 0x00000021,
-		0x03F, 0x00000868,
+		0x03F, 0x00000867,
 		0x033, 0x00000022,
-		0x03F, 0x00000909,
+		0x03F, 0x00000908,
 		0x033, 0x00000023,
-		0x03F, 0x00000D0A,
+		0x03F, 0x00000D09,
 		0x033, 0x00000024,
-		0x03F, 0x00000D4A,
+		0x03F, 0x00000D49,
 		0x033, 0x00000025,
-		0x03F, 0x00000D8B,
+		0x03F, 0x00000D8A,
 		0x033, 0x00000026,
 		0x03F, 0x00000DEB,
 		0x033, 0x00000027,
@@ -6326,19 +12527,19 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00000DF4,
 		0x033, 0x0000002A,
 		0x03F, 0x00000DF7,
-	0x92000001,	0x00000000,	0x40000000,	0x00000000,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000020,
-		0x03F, 0x00000468,
+		0x03F, 0x00000467,
 		0x033, 0x00000021,
-		0x03F, 0x00000868,
+		0x03F, 0x00000867,
 		0x033, 0x00000022,
-		0x03F, 0x00000909,
+		0x03F, 0x00000908,
 		0x033, 0x00000023,
-		0x03F, 0x00000D0A,
+		0x03F, 0x00000D09,
 		0x033, 0x00000024,
-		0x03F, 0x00000D4A,
+		0x03F, 0x00000D49,
 		0x033, 0x00000025,
-		0x03F, 0x00000D8B,
+		0x03F, 0x00000D8A,
 		0x033, 0x00000026,
 		0x03F, 0x00000DEB,
 		0x033, 0x00000027,
@@ -6349,19 +12550,19 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00000DF4,
 		0x033, 0x0000002A,
 		0x03F, 0x00000DF7,
-	0x92000002,	0x00000000,	0x40000000,	0x00000000,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000020,
-		0x03F, 0x00000468,
+		0x03F, 0x00000467,
 		0x033, 0x00000021,
-		0x03F, 0x00000868,
+		0x03F, 0x00000867,
 		0x033, 0x00000022,
-		0x03F, 0x00000909,
+		0x03F, 0x00000908,
 		0x033, 0x00000023,
-		0x03F, 0x00000D0A,
+		0x03F, 0x00000D09,
 		0x033, 0x00000024,
-		0x03F, 0x00000D4A,
+		0x03F, 0x00000D49,
 		0x033, 0x00000025,
-		0x03F, 0x00000D8B,
+		0x03F, 0x00000D8A,
 		0x033, 0x00000026,
 		0x03F, 0x00000DEB,
 		0x033, 0x00000027,
@@ -6372,7 +12573,7 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00000DF4,
 		0x033, 0x0000002A,
 		0x03F, 0x00000DF7,
-	0x93000001,	0x00000000,	0x40000000,	0x00000000,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000020,
 		0x03F, 0x00000467,
 		0x033, 0x00000021,
@@ -6395,7 +12596,7 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00000DF4,
 		0x033, 0x0000002A,
 		0x03F, 0x00000DF7,
-	0x93000002,	0x00000000,	0x40000000,	0x00000000,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000020,
 		0x03F, 0x00000467,
 		0x033, 0x00000021,
@@ -6418,7 +12619,7 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00000DF4,
 		0x033, 0x0000002A,
 		0x03F, 0x00000DF7,
-	0x93000003,	0x00000000,	0x40000000,	0x00000000,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000020,
 		0x03F, 0x00000467,
 		0x033, 0x00000021,
@@ -6441,7 +12642,7 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00000DF4,
 		0x033, 0x0000002A,
 		0x03F, 0x00000DF7,
-	0x93000004,	0x00000000,	0x40000000,	0x00000000,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000020,
 		0x03F, 0x00000467,
 		0x033, 0x00000021,
@@ -6489,7 +12690,7 @@ static const u32 rtw8822c_rf_a[] = {
 		0x03F, 0x00000DF7,
 	0xB0000000,	0x00000000,
 		0x0EE, 0x00000000,
-		0x05C, 0x000FCC00,
+		0x05C, 0x000FC000,
 		0x067, 0x0000A505,
 		0x0D3, 0x00000542,
 		0x043, 0x00005000,
@@ -6513,6 +12714,26 @@ static const u32 rtw8822c_rf_a[] = {
 		0x0B3, 0x000FC760,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x0B3, 0x000FC760,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x0B3, 0x000FC760,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x0B3, 0x000FC760,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x0B3, 0x000FC760,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x0B3, 0x000FC760,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x0B3, 0x000FC760,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x0B3, 0x000FC760,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x0B3, 0x000FC760,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x0B3, 0x000FC760,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x0B3, 0x000FC760,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x0B3, 0x000FC760,
 	0xA0000000,	0x00000000,
 		0x0B3, 0x0007C760,
 	0xB0000000,	0x00000000,
@@ -6522,6 +12743,18 @@ static const u32 rtw8822c_rf_a[] = {
 		0x0B6, 0x000387F8,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x0B6, 0x000387F8,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x0B6, 0x000387F8,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x0B6, 0x000387F8,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x0B6, 0x000387F8,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x0B6, 0x000387F8,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x0B6, 0x000387F8,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x0B6, 0x000387F8,
 	0xA0000000,	0x00000000,
 		0x0B6, 0x000187F8,
 	0xB0000000,	0x00000000,
@@ -6552,12 +12785,33 @@ static const u32 rtw8822c_rf_a[] = {
 		0x0B3, 0x000FC760,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x0B3, 0x000FC760,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x0B3, 0x000FC760,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x0B3, 0x000FC760,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x0B3, 0x000FC760,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x0B3, 0x000FC760,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x0B3, 0x000FC760,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x0B3, 0x000FC760,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x0B3, 0x000FC760,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x0B3, 0x000FC760,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x0B3, 0x000FC760,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x0B3, 0x000FC760,
 	0xA0000000,	0x00000000,
 		0x0B3, 0x0007C700,
 	0xB0000000,	0x00000000,
 		0x018, 0x0001B124,
 		0xFFE, 0x00000000,
 		0xFFE, 0x00000000,
+		0xFFE, 0x00000000,
 	0x81000001,	0x00000000,	0x40000000,	0x00000000,
 		0x0B3, 0x0007C760,
 	0x91000002,	0x00000000,	0x40000000,	0x00000000,
@@ -6574,6 +12828,26 @@ static const u32 rtw8822c_rf_a[] = {
 		0x0B3, 0x000FC760,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x0B3, 0x000FC760,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x0B3, 0x000FC760,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x0B3, 0x000FC760,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x0B3, 0x000FC760,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x0B3, 0x000FC760,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x0B3, 0x000FC760,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x0B3, 0x000FC760,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x0B3, 0x000FC760,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x0B3, 0x000FC760,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x0B3, 0x000FC760,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x0B3, 0x000FC760,
 	0xA0000000,	0x00000000,
 		0x0B3, 0x0007C760,
 	0xB0000000,	0x00000000,
@@ -6605,6 +12879,26 @@ static const u32 rtw8822c_rf_a[] = {
 		0x0DD, 0x00000540,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x0DD, 0x00000540,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x0DD, 0x00000540,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x0DD, 0x00000540,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x0DD, 0x00000540,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x0DD, 0x00000540,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x0DD, 0x00000540,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x0DD, 0x00000540,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x0DD, 0x00000540,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x0DD, 0x00000540,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x0DD, 0x00000540,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x0DD, 0x00000540,
 	0xA0000000,	0x00000000,
 		0x0DD, 0x00000500,
 	0xB0000000,	0x00000000,
@@ -6714,7 +13008,37 @@ static const u32 rtw8822c_rf_b[] = {
 		0x093, 0x0008483F,
 		0x0EF, 0x00080000,
 		0x033, 0x00000001,
+	0x83000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0009123E,
+	0x93000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0009123E,
+	0x93000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0009123E,
+	0x93000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0009123E,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0009123E,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0009123E,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0009123E,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0009123E,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0009123E,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0009123E,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0009123E,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0009123E,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0009123E,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0009123E,
+	0xA0000000,	0x00000000,
 		0x03F, 0x00091230,
+	0xB0000000,	0x00000000,
 		0x0EF, 0x00000000,
 		0x0DE, 0x00000020,
 	0x81000001,	0x00000000,	0x40000000,	0x00000000,
@@ -6733,6 +13057,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x08E, 0x000A5540,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x08E, 0x000A5540,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x08E, 0x000A5540,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x08E, 0x000A5540,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x08E, 0x000A5540,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x08E, 0x000A5540,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x08E, 0x000A5540,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x08E, 0x000A5540,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x08E, 0x000A5540,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x08E, 0x000A5540,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x08E, 0x000A5540,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x08E, 0x000A5540,
 	0xA0000000,	0x00000000,
 		0x08E, 0x000A5540,
 	0xB0000000,	0x00000000,
@@ -6794,7 +13138,97 @@ static const u32 rtw8822c_rf_b[] = {
 		0x033, 0x00000002,
 		0x03F, 0x0000002A,
 		0x0EE, 0x00000000,
-	0x93000003,	0x00000000,	0x40000000,	0x00000000,
+	0x93000003,	0x00000000,	0x40000000,	0x00000000,
+		0x0EE, 0x00000010,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000002,
+		0x03F, 0x0000002A,
+		0x0EE, 0x00000000,
+	0x93000004,	0x00000000,	0x40000000,	0x00000000,
+		0x0EE, 0x00000010,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000002,
+		0x03F, 0x0000002A,
+		0x0EE, 0x00000000,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x0EE, 0x00000010,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000002,
+		0x03F, 0x0000002A,
+		0x0EE, 0x00000000,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x0EE, 0x00000010,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000002,
+		0x03F, 0x0000002A,
+		0x0EE, 0x00000000,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x0EE, 0x00000010,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000002,
+		0x03F, 0x0000002A,
+		0x0EE, 0x00000000,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x0EE, 0x00000010,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000002,
+		0x03F, 0x0000002A,
+		0x0EE, 0x00000000,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x0EE, 0x00000010,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000002,
+		0x03F, 0x0000002A,
+		0x0EE, 0x00000000,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x0EE, 0x00000010,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000002,
+		0x03F, 0x0000002A,
+		0x0EE, 0x00000000,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x0EE, 0x00000010,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000002,
+		0x03F, 0x0000002A,
+		0x0EE, 0x00000000,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x0EE, 0x00000010,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000001,
+		0x03F, 0x0000002A,
+		0x033, 0x00000002,
+		0x03F, 0x0000002A,
+		0x0EE, 0x00000000,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
 		0x0EE, 0x00000010,
 		0x033, 0x00000001,
 		0x03F, 0x0000002A,
@@ -6803,7 +13237,7 @@ static const u32 rtw8822c_rf_b[] = {
 		0x033, 0x00000002,
 		0x03F, 0x0000002A,
 		0x0EE, 0x00000000,
-	0x93000004,	0x00000000,	0x40000000,	0x00000000,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
 		0x0EE, 0x00000010,
 		0x033, 0x00000001,
 		0x03F, 0x0000002A,
@@ -6911,11 +13345,271 @@ static const u32 rtw8822c_rf_b[] = {
 		0x033, 0x0000000C,
 		0x03F, 0x000FF3E8,
 		0x033, 0x0000000B,
-		0x03F, 0x000FF3A0,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000000A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000009,
+		0x03F, 0x00000280,
+		0x033, 0x00000008,
+		0x03F, 0x000FF280,
+		0x033, 0x00000007,
+		0x03F, 0x00000200,
+		0x033, 0x00000006,
+		0x03F, 0x000001C0,
+		0x033, 0x00000005,
+		0x03F, 0x00000180,
+		0x033, 0x00000004,
+		0x03F, 0x00000040,
+	0x93000001,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00010000,
+		0x033, 0x0000000F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000000E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000000D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000000C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000000B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000000A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000009,
+		0x03F, 0x00000280,
+		0x033, 0x00000008,
+		0x03F, 0x000FF280,
+		0x033, 0x00000007,
+		0x03F, 0x00000200,
+		0x033, 0x00000006,
+		0x03F, 0x000001C0,
+		0x033, 0x00000005,
+		0x03F, 0x00000180,
+		0x033, 0x00000004,
+		0x03F, 0x00000040,
+	0x93000002,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00010000,
+		0x033, 0x0000000F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000000E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000000D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000000C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000000B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000000A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000009,
+		0x03F, 0x00000280,
+		0x033, 0x00000008,
+		0x03F, 0x000FF280,
+		0x033, 0x00000007,
+		0x03F, 0x00000200,
+		0x033, 0x00000006,
+		0x03F, 0x000001C0,
+		0x033, 0x00000005,
+		0x03F, 0x00000180,
+		0x033, 0x00000004,
+		0x03F, 0x00000040,
+	0x93000003,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00010000,
+		0x033, 0x0000000F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000000E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000000D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000000C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000000B,
+		0x03F, 0x00000287,
+		0x033, 0x0000000A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000009,
+		0x03F, 0x00000207,
+		0x033, 0x00000008,
+		0x03F, 0x000FF280,
+		0x033, 0x00000007,
+		0x03F, 0x00000200,
+		0x033, 0x00000006,
+		0x03F, 0x000001C0,
+		0x033, 0x00000005,
+		0x03F, 0x00000180,
+		0x033, 0x00000004,
+		0x03F, 0x00000040,
+	0x93000004,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00010000,
+		0x033, 0x0000000F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000000E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000000D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000000C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000000B,
+		0x03F, 0x00000287,
+		0x033, 0x0000000A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000009,
+		0x03F, 0x00000207,
+		0x033, 0x00000008,
+		0x03F, 0x000FF280,
+		0x033, 0x00000007,
+		0x03F, 0x00000200,
+		0x033, 0x00000006,
+		0x03F, 0x000001C0,
+		0x033, 0x00000005,
+		0x03F, 0x00000180,
+		0x033, 0x00000004,
+		0x03F, 0x00000040,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00010000,
+		0x033, 0x0000000F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000000E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000000D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000000C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000000B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000000A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000009,
+		0x03F, 0x00000280,
+		0x033, 0x00000008,
+		0x03F, 0x000FF280,
+		0x033, 0x00000007,
+		0x03F, 0x00000200,
+		0x033, 0x00000006,
+		0x03F, 0x000001C0,
+		0x033, 0x00000005,
+		0x03F, 0x00000180,
+		0x033, 0x00000004,
+		0x03F, 0x00000040,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00010000,
+		0x033, 0x0000000F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000000E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000000D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000000C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000000B,
+		0x03F, 0x00000287,
+		0x033, 0x0000000A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000009,
+		0x03F, 0x00000207,
+		0x033, 0x00000008,
+		0x03F, 0x000FF280,
+		0x033, 0x00000007,
+		0x03F, 0x00000200,
+		0x033, 0x00000006,
+		0x03F, 0x000001C0,
+		0x033, 0x00000005,
+		0x03F, 0x00000180,
+		0x033, 0x00000004,
+		0x03F, 0x00000040,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00010000,
+		0x033, 0x0000000F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000000E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000000D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000000C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000000B,
+		0x03F, 0x00000287,
+		0x033, 0x0000000A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000009,
+		0x03F, 0x00000207,
+		0x033, 0x00000008,
+		0x03F, 0x000FF280,
+		0x033, 0x00000007,
+		0x03F, 0x00000200,
+		0x033, 0x00000006,
+		0x03F, 0x000001C0,
+		0x033, 0x00000005,
+		0x03F, 0x00000180,
+		0x033, 0x00000004,
+		0x03F, 0x00000040,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00010000,
+		0x033, 0x0000000F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000000E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000000D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000000C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000000B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000000A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000009,
+		0x03F, 0x00000280,
+		0x033, 0x00000008,
+		0x03F, 0x000FF280,
+		0x033, 0x00000007,
+		0x03F, 0x00000200,
+		0x033, 0x00000006,
+		0x03F, 0x000001C0,
+		0x033, 0x00000005,
+		0x03F, 0x00000180,
+		0x033, 0x00000004,
+		0x03F, 0x00000040,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00010000,
+		0x033, 0x0000000F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000000E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000000D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000000C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000000B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000000A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000009,
+		0x03F, 0x00000280,
+		0x033, 0x00000008,
+		0x03F, 0x000FF280,
+		0x033, 0x00000007,
+		0x03F, 0x00000200,
+		0x033, 0x00000006,
+		0x03F, 0x000001C0,
+		0x033, 0x00000005,
+		0x03F, 0x00000180,
+		0x033, 0x00000004,
+		0x03F, 0x00000040,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00010000,
+		0x033, 0x0000000F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000000E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000000D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000000C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000000B,
+		0x03F, 0x00000287,
 		0x033, 0x0000000A,
 		0x03F, 0x000002A8,
 		0x033, 0x00000009,
-		0x03F, 0x00000280,
+		0x03F, 0x00000207,
 		0x033, 0x00000008,
 		0x03F, 0x000FF280,
 		0x033, 0x00000007,
@@ -6926,7 +13620,7 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x00000180,
 		0x033, 0x00000004,
 		0x03F, 0x00000040,
-	0x93000001,	0x00000000,	0x40000000,	0x00000000,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
 		0x0EF, 0x00010000,
 		0x033, 0x0000000F,
 		0x03F, 0x000773C0,
@@ -6937,11 +13631,11 @@ static const u32 rtw8822c_rf_b[] = {
 		0x033, 0x0000000C,
 		0x03F, 0x000FF3E8,
 		0x033, 0x0000000B,
-		0x03F, 0x000FF3A0,
+		0x03F, 0x00000287,
 		0x033, 0x0000000A,
 		0x03F, 0x000002A8,
 		0x033, 0x00000009,
-		0x03F, 0x00000280,
+		0x03F, 0x00000207,
 		0x033, 0x00000008,
 		0x03F, 0x000FF280,
 		0x033, 0x00000007,
@@ -6952,7 +13646,7 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x00000180,
 		0x033, 0x00000004,
 		0x03F, 0x00000040,
-	0x93000002,	0x00000000,	0x40000000,	0x00000000,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
 		0x0EF, 0x00010000,
 		0x033, 0x0000000F,
 		0x03F, 0x000773C0,
@@ -6978,7 +13672,7 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x00000180,
 		0x033, 0x00000004,
 		0x03F, 0x00000040,
-	0x93000003,	0x00000000,	0x40000000,	0x00000000,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
 		0x0EF, 0x00010000,
 		0x033, 0x0000000F,
 		0x03F, 0x000773C0,
@@ -7004,7 +13698,7 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x00000180,
 		0x033, 0x00000004,
 		0x03F, 0x00000040,
-	0x93000004,	0x00000000,	0x40000000,	0x00000000,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
 		0x0EF, 0x00010000,
 		0x033, 0x0000000F,
 		0x03F, 0x000773C0,
@@ -7144,11 +13838,261 @@ static const u32 rtw8822c_rf_b[] = {
 		0x033, 0x0000001C,
 		0x03F, 0x000FF3E8,
 		0x033, 0x0000001B,
-		0x03F, 0x000FF3A0,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000001A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000019,
+		0x03F, 0x00000280,
+		0x033, 0x00000018,
+		0x03F, 0x000FF280,
+		0x033, 0x00000017,
+		0x03F, 0x00000200,
+		0x033, 0x00000016,
+		0x03F, 0x000001C0,
+		0x033, 0x00000015,
+		0x03F, 0x00000180,
+		0x033, 0x00000014,
+		0x03F, 0x00000040,
+	0x93000001,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000001F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000001E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000001D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000001C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000001B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000001A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000019,
+		0x03F, 0x00000280,
+		0x033, 0x00000018,
+		0x03F, 0x000FF280,
+		0x033, 0x00000017,
+		0x03F, 0x00000200,
+		0x033, 0x00000016,
+		0x03F, 0x000001C0,
+		0x033, 0x00000015,
+		0x03F, 0x00000180,
+		0x033, 0x00000014,
+		0x03F, 0x00000040,
+	0x93000002,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000001F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000001E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000001D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000001C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000001B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000001A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000019,
+		0x03F, 0x00000280,
+		0x033, 0x00000018,
+		0x03F, 0x000FF280,
+		0x033, 0x00000017,
+		0x03F, 0x00000200,
+		0x033, 0x00000016,
+		0x03F, 0x000001C0,
+		0x033, 0x00000015,
+		0x03F, 0x00000180,
+		0x033, 0x00000014,
+		0x03F, 0x00000040,
+	0x93000003,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000001F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000001E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000001D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000001C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000001B,
+		0x03F, 0x00000287,
+		0x033, 0x0000001A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000019,
+		0x03F, 0x00000207,
+		0x033, 0x00000018,
+		0x03F, 0x000FF280,
+		0x033, 0x00000017,
+		0x03F, 0x00000200,
+		0x033, 0x00000016,
+		0x03F, 0x000001C0,
+		0x033, 0x00000015,
+		0x03F, 0x00000180,
+		0x033, 0x00000014,
+		0x03F, 0x00000040,
+	0x93000004,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000001F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000001E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000001D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000001C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000001B,
+		0x03F, 0x00000287,
+		0x033, 0x0000001A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000019,
+		0x03F, 0x00000207,
+		0x033, 0x00000018,
+		0x03F, 0x000FF280,
+		0x033, 0x00000017,
+		0x03F, 0x00000200,
+		0x033, 0x00000016,
+		0x03F, 0x000001C0,
+		0x033, 0x00000015,
+		0x03F, 0x00000180,
+		0x033, 0x00000014,
+		0x03F, 0x00000040,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000001F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000001E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000001D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000001C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000001B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000001A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000019,
+		0x03F, 0x00000280,
+		0x033, 0x00000018,
+		0x03F, 0x000FF280,
+		0x033, 0x00000017,
+		0x03F, 0x00000200,
+		0x033, 0x00000016,
+		0x03F, 0x000001C0,
+		0x033, 0x00000015,
+		0x03F, 0x00000180,
+		0x033, 0x00000014,
+		0x03F, 0x00000040,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000001F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000001E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000001D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000001C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000001B,
+		0x03F, 0x00000287,
+		0x033, 0x0000001A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000019,
+		0x03F, 0x00000207,
+		0x033, 0x00000018,
+		0x03F, 0x000FF280,
+		0x033, 0x00000017,
+		0x03F, 0x00000200,
+		0x033, 0x00000016,
+		0x03F, 0x000001C0,
+		0x033, 0x00000015,
+		0x03F, 0x00000180,
+		0x033, 0x00000014,
+		0x03F, 0x00000040,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000001F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000001E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000001D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000001C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000001B,
+		0x03F, 0x00000287,
+		0x033, 0x0000001A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000019,
+		0x03F, 0x00000207,
+		0x033, 0x00000018,
+		0x03F, 0x000FF280,
+		0x033, 0x00000017,
+		0x03F, 0x00000200,
+		0x033, 0x00000016,
+		0x03F, 0x000001C0,
+		0x033, 0x00000015,
+		0x03F, 0x00000180,
+		0x033, 0x00000014,
+		0x03F, 0x00000040,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000001F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000001E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000001D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000001C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000001B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000001A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000019,
+		0x03F, 0x00000280,
+		0x033, 0x00000018,
+		0x03F, 0x000FF280,
+		0x033, 0x00000017,
+		0x03F, 0x00000200,
+		0x033, 0x00000016,
+		0x03F, 0x000001C0,
+		0x033, 0x00000015,
+		0x03F, 0x00000180,
+		0x033, 0x00000014,
+		0x03F, 0x00000040,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000001F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000001E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000001D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000001C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000001B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000001A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000019,
+		0x03F, 0x00000280,
+		0x033, 0x00000018,
+		0x03F, 0x000FF280,
+		0x033, 0x00000017,
+		0x03F, 0x00000200,
+		0x033, 0x00000016,
+		0x03F, 0x000001C0,
+		0x033, 0x00000015,
+		0x03F, 0x00000180,
+		0x033, 0x00000014,
+		0x03F, 0x00000040,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000001F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000001E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000001D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000001C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000001B,
+		0x03F, 0x00000287,
 		0x033, 0x0000001A,
 		0x03F, 0x000002A8,
 		0x033, 0x00000019,
-		0x03F, 0x00000280,
+		0x03F, 0x00000207,
 		0x033, 0x00000018,
 		0x03F, 0x000FF280,
 		0x033, 0x00000017,
@@ -7159,7 +14103,7 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x00000180,
 		0x033, 0x00000014,
 		0x03F, 0x00000040,
-	0x93000001,	0x00000000,	0x40000000,	0x00000000,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x0000001F,
 		0x03F, 0x000773C0,
 		0x033, 0x0000001E,
@@ -7169,11 +14113,11 @@ static const u32 rtw8822c_rf_b[] = {
 		0x033, 0x0000001C,
 		0x03F, 0x000FF3E8,
 		0x033, 0x0000001B,
-		0x03F, 0x000FF3A0,
+		0x03F, 0x00000287,
 		0x033, 0x0000001A,
 		0x03F, 0x000002A8,
 		0x033, 0x00000019,
-		0x03F, 0x00000280,
+		0x03F, 0x00000207,
 		0x033, 0x00000018,
 		0x03F, 0x000FF280,
 		0x033, 0x00000017,
@@ -7184,7 +14128,7 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x00000180,
 		0x033, 0x00000014,
 		0x03F, 0x00000040,
-	0x93000002,	0x00000000,	0x40000000,	0x00000000,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x0000001F,
 		0x03F, 0x000773C0,
 		0x033, 0x0000001E,
@@ -7209,7 +14153,7 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x00000180,
 		0x033, 0x00000014,
 		0x03F, 0x00000040,
-	0x93000003,	0x00000000,	0x40000000,	0x00000000,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x0000001F,
 		0x03F, 0x000773C0,
 		0x033, 0x0000001E,
@@ -7234,7 +14178,7 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x00000180,
 		0x033, 0x00000014,
 		0x03F, 0x00000040,
-	0x93000004,	0x00000000,	0x40000000,	0x00000000,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x0000001F,
 		0x03F, 0x000773C0,
 		0x033, 0x0000001E,
@@ -7372,11 +14316,261 @@ static const u32 rtw8822c_rf_b[] = {
 		0x033, 0x0000002C,
 		0x03F, 0x000FF3E8,
 		0x033, 0x0000002B,
-		0x03F, 0x000FF3A0,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000002A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000029,
+		0x03F, 0x00000280,
+		0x033, 0x00000028,
+		0x03F, 0x000FF280,
+		0x033, 0x00000027,
+		0x03F, 0x00000200,
+		0x033, 0x00000026,
+		0x03F, 0x000001C0,
+		0x033, 0x00000025,
+		0x03F, 0x00000180,
+		0x033, 0x00000024,
+		0x03F, 0x00000040,
+	0x93000001,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000002F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000002E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000002D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000002C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000002B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000002A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000029,
+		0x03F, 0x00000280,
+		0x033, 0x00000028,
+		0x03F, 0x000FF280,
+		0x033, 0x00000027,
+		0x03F, 0x00000200,
+		0x033, 0x00000026,
+		0x03F, 0x000001C0,
+		0x033, 0x00000025,
+		0x03F, 0x00000180,
+		0x033, 0x00000024,
+		0x03F, 0x00000040,
+	0x93000002,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000002F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000002E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000002D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000002C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000002B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000002A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000029,
+		0x03F, 0x00000280,
+		0x033, 0x00000028,
+		0x03F, 0x000FF280,
+		0x033, 0x00000027,
+		0x03F, 0x00000200,
+		0x033, 0x00000026,
+		0x03F, 0x000001C0,
+		0x033, 0x00000025,
+		0x03F, 0x00000180,
+		0x033, 0x00000024,
+		0x03F, 0x00000040,
+	0x93000003,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000002F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000002E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000002D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000002C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000002B,
+		0x03F, 0x00000287,
+		0x033, 0x0000002A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000029,
+		0x03F, 0x00000207,
+		0x033, 0x00000028,
+		0x03F, 0x000FF280,
+		0x033, 0x00000027,
+		0x03F, 0x00000200,
+		0x033, 0x00000026,
+		0x03F, 0x000001C0,
+		0x033, 0x00000025,
+		0x03F, 0x00000180,
+		0x033, 0x00000024,
+		0x03F, 0x00000040,
+	0x93000004,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000002F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000002E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000002D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000002C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000002B,
+		0x03F, 0x00000287,
+		0x033, 0x0000002A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000029,
+		0x03F, 0x00000207,
+		0x033, 0x00000028,
+		0x03F, 0x000FF280,
+		0x033, 0x00000027,
+		0x03F, 0x00000200,
+		0x033, 0x00000026,
+		0x03F, 0x000001C0,
+		0x033, 0x00000025,
+		0x03F, 0x00000180,
+		0x033, 0x00000024,
+		0x03F, 0x00000040,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000002F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000002E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000002D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000002C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000002B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000002A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000029,
+		0x03F, 0x00000280,
+		0x033, 0x00000028,
+		0x03F, 0x000FF280,
+		0x033, 0x00000027,
+		0x03F, 0x00000200,
+		0x033, 0x00000026,
+		0x03F, 0x000001C0,
+		0x033, 0x00000025,
+		0x03F, 0x00000180,
+		0x033, 0x00000024,
+		0x03F, 0x00000040,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000002F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000002E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000002D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000002C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000002B,
+		0x03F, 0x00000287,
+		0x033, 0x0000002A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000029,
+		0x03F, 0x00000207,
+		0x033, 0x00000028,
+		0x03F, 0x000FF280,
+		0x033, 0x00000027,
+		0x03F, 0x00000200,
+		0x033, 0x00000026,
+		0x03F, 0x000001C0,
+		0x033, 0x00000025,
+		0x03F, 0x00000180,
+		0x033, 0x00000024,
+		0x03F, 0x00000040,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000002F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000002E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000002D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000002C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000002B,
+		0x03F, 0x00000287,
+		0x033, 0x0000002A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000029,
+		0x03F, 0x00000207,
+		0x033, 0x00000028,
+		0x03F, 0x000FF280,
+		0x033, 0x00000027,
+		0x03F, 0x00000200,
+		0x033, 0x00000026,
+		0x03F, 0x000001C0,
+		0x033, 0x00000025,
+		0x03F, 0x00000180,
+		0x033, 0x00000024,
+		0x03F, 0x00000040,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000002F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000002E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000002D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000002C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000002B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000002A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000029,
+		0x03F, 0x00000280,
+		0x033, 0x00000028,
+		0x03F, 0x000FF280,
+		0x033, 0x00000027,
+		0x03F, 0x00000200,
+		0x033, 0x00000026,
+		0x03F, 0x000001C0,
+		0x033, 0x00000025,
+		0x03F, 0x00000180,
+		0x033, 0x00000024,
+		0x03F, 0x00000040,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000002F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000002E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000002D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000002C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000002B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000002A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000029,
+		0x03F, 0x00000280,
+		0x033, 0x00000028,
+		0x03F, 0x000FF280,
+		0x033, 0x00000027,
+		0x03F, 0x00000200,
+		0x033, 0x00000026,
+		0x03F, 0x000001C0,
+		0x033, 0x00000025,
+		0x03F, 0x00000180,
+		0x033, 0x00000024,
+		0x03F, 0x00000040,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000002F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000002E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000002D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000002C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000002B,
+		0x03F, 0x00000287,
 		0x033, 0x0000002A,
 		0x03F, 0x000002A8,
 		0x033, 0x00000029,
-		0x03F, 0x00000280,
+		0x03F, 0x00000207,
 		0x033, 0x00000028,
 		0x03F, 0x000FF280,
 		0x033, 0x00000027,
@@ -7387,7 +14581,7 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x00000180,
 		0x033, 0x00000024,
 		0x03F, 0x00000040,
-	0x93000001,	0x00000000,	0x40000000,	0x00000000,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x0000002F,
 		0x03F, 0x000773C0,
 		0x033, 0x0000002E,
@@ -7397,11 +14591,11 @@ static const u32 rtw8822c_rf_b[] = {
 		0x033, 0x0000002C,
 		0x03F, 0x000FF3E8,
 		0x033, 0x0000002B,
-		0x03F, 0x000FF3A0,
+		0x03F, 0x00000287,
 		0x033, 0x0000002A,
 		0x03F, 0x000002A8,
 		0x033, 0x00000029,
-		0x03F, 0x00000280,
+		0x03F, 0x00000207,
 		0x033, 0x00000028,
 		0x03F, 0x000FF280,
 		0x033, 0x00000027,
@@ -7412,7 +14606,7 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x00000180,
 		0x033, 0x00000024,
 		0x03F, 0x00000040,
-	0x93000002,	0x00000000,	0x40000000,	0x00000000,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x0000002F,
 		0x03F, 0x000773C0,
 		0x033, 0x0000002E,
@@ -7437,7 +14631,7 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x00000180,
 		0x033, 0x00000024,
 		0x03F, 0x00000040,
-	0x93000003,	0x00000000,	0x40000000,	0x00000000,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x0000002F,
 		0x03F, 0x000773C0,
 		0x033, 0x0000002E,
@@ -7462,7 +14656,7 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x00000180,
 		0x033, 0x00000024,
 		0x03F, 0x00000040,
-	0x93000004,	0x00000000,	0x40000000,	0x00000000,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x0000002F,
 		0x03F, 0x000773C0,
 		0x033, 0x0000002E,
@@ -7600,11 +14794,261 @@ static const u32 rtw8822c_rf_b[] = {
 		0x033, 0x0000003C,
 		0x03F, 0x000FF3E8,
 		0x033, 0x0000003B,
-		0x03F, 0x000FF3A0,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000003A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000039,
+		0x03F, 0x00000280,
+		0x033, 0x00000038,
+		0x03F, 0x000FF280,
+		0x033, 0x00000037,
+		0x03F, 0x00000200,
+		0x033, 0x00000036,
+		0x03F, 0x000001C0,
+		0x033, 0x00000035,
+		0x03F, 0x00000180,
+		0x033, 0x00000034,
+		0x03F, 0x00000040,
+	0x93000001,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000003F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000003E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000003D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000003C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000003B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000003A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000039,
+		0x03F, 0x00000280,
+		0x033, 0x00000038,
+		0x03F, 0x000FF280,
+		0x033, 0x00000037,
+		0x03F, 0x00000200,
+		0x033, 0x00000036,
+		0x03F, 0x000001C0,
+		0x033, 0x00000035,
+		0x03F, 0x00000180,
+		0x033, 0x00000034,
+		0x03F, 0x00000040,
+	0x93000002,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000003F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000003E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000003D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000003C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000003B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000003A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000039,
+		0x03F, 0x00000280,
+		0x033, 0x00000038,
+		0x03F, 0x000FF280,
+		0x033, 0x00000037,
+		0x03F, 0x00000200,
+		0x033, 0x00000036,
+		0x03F, 0x000001C0,
+		0x033, 0x00000035,
+		0x03F, 0x00000180,
+		0x033, 0x00000034,
+		0x03F, 0x00000040,
+	0x93000003,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000003F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000003E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000003D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000003C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000003B,
+		0x03F, 0x00000287,
+		0x033, 0x0000003A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000039,
+		0x03F, 0x00000207,
+		0x033, 0x00000038,
+		0x03F, 0x000FF280,
+		0x033, 0x00000037,
+		0x03F, 0x00000200,
+		0x033, 0x00000036,
+		0x03F, 0x000001C0,
+		0x033, 0x00000035,
+		0x03F, 0x00000180,
+		0x033, 0x00000034,
+		0x03F, 0x00000040,
+	0x93000004,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000003F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000003E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000003D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000003C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000003B,
+		0x03F, 0x00000287,
+		0x033, 0x0000003A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000039,
+		0x03F, 0x00000207,
+		0x033, 0x00000038,
+		0x03F, 0x000FF280,
+		0x033, 0x00000037,
+		0x03F, 0x00000200,
+		0x033, 0x00000036,
+		0x03F, 0x000001C0,
+		0x033, 0x00000035,
+		0x03F, 0x00000180,
+		0x033, 0x00000034,
+		0x03F, 0x00000040,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000003F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000003E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000003D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000003C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000003B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000003A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000039,
+		0x03F, 0x00000280,
+		0x033, 0x00000038,
+		0x03F, 0x000FF280,
+		0x033, 0x00000037,
+		0x03F, 0x00000200,
+		0x033, 0x00000036,
+		0x03F, 0x000001C0,
+		0x033, 0x00000035,
+		0x03F, 0x00000180,
+		0x033, 0x00000034,
+		0x03F, 0x00000040,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000003F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000003E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000003D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000003C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000003B,
+		0x03F, 0x00000287,
+		0x033, 0x0000003A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000039,
+		0x03F, 0x00000207,
+		0x033, 0x00000038,
+		0x03F, 0x000FF280,
+		0x033, 0x00000037,
+		0x03F, 0x00000200,
+		0x033, 0x00000036,
+		0x03F, 0x000001C0,
+		0x033, 0x00000035,
+		0x03F, 0x00000180,
+		0x033, 0x00000034,
+		0x03F, 0x00000040,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000003F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000003E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000003D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000003C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000003B,
+		0x03F, 0x00000287,
+		0x033, 0x0000003A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000039,
+		0x03F, 0x00000207,
+		0x033, 0x00000038,
+		0x03F, 0x000FF280,
+		0x033, 0x00000037,
+		0x03F, 0x00000200,
+		0x033, 0x00000036,
+		0x03F, 0x000001C0,
+		0x033, 0x00000035,
+		0x03F, 0x00000180,
+		0x033, 0x00000034,
+		0x03F, 0x00000040,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000003F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000003E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000003D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000003C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000003B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000003A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000039,
+		0x03F, 0x00000280,
+		0x033, 0x00000038,
+		0x03F, 0x000FF280,
+		0x033, 0x00000037,
+		0x03F, 0x00000200,
+		0x033, 0x00000036,
+		0x03F, 0x000001C0,
+		0x033, 0x00000035,
+		0x03F, 0x00000180,
+		0x033, 0x00000034,
+		0x03F, 0x00000040,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000003F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000003E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000003D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000003C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000003B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000003A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000039,
+		0x03F, 0x00000280,
+		0x033, 0x00000038,
+		0x03F, 0x000FF280,
+		0x033, 0x00000037,
+		0x03F, 0x00000200,
+		0x033, 0x00000036,
+		0x03F, 0x000001C0,
+		0x033, 0x00000035,
+		0x03F, 0x00000180,
+		0x033, 0x00000034,
+		0x03F, 0x00000040,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000003F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000003E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000003D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000003C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000003B,
+		0x03F, 0x00000287,
 		0x033, 0x0000003A,
 		0x03F, 0x000002A8,
 		0x033, 0x00000039,
-		0x03F, 0x00000280,
+		0x03F, 0x00000207,
 		0x033, 0x00000038,
 		0x03F, 0x000FF280,
 		0x033, 0x00000037,
@@ -7615,7 +15059,7 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x00000180,
 		0x033, 0x00000034,
 		0x03F, 0x00000040,
-	0x93000001,	0x00000000,	0x40000000,	0x00000000,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x0000003F,
 		0x03F, 0x000773C0,
 		0x033, 0x0000003E,
@@ -7625,11 +15069,11 @@ static const u32 rtw8822c_rf_b[] = {
 		0x033, 0x0000003C,
 		0x03F, 0x000FF3E8,
 		0x033, 0x0000003B,
-		0x03F, 0x000FF3A0,
+		0x03F, 0x00000287,
 		0x033, 0x0000003A,
 		0x03F, 0x000002A8,
 		0x033, 0x00000039,
-		0x03F, 0x00000280,
+		0x03F, 0x00000207,
 		0x033, 0x00000038,
 		0x03F, 0x000FF280,
 		0x033, 0x00000037,
@@ -7640,7 +15084,7 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x00000180,
 		0x033, 0x00000034,
 		0x03F, 0x00000040,
-	0x93000002,	0x00000000,	0x40000000,	0x00000000,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x0000003F,
 		0x03F, 0x000773C0,
 		0x033, 0x0000003E,
@@ -7665,7 +15109,7 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x00000180,
 		0x033, 0x00000034,
 		0x03F, 0x00000040,
-	0x93000003,	0x00000000,	0x40000000,	0x00000000,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x0000003F,
 		0x03F, 0x000773C0,
 		0x033, 0x0000003E,
@@ -7690,7 +15134,7 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x00000180,
 		0x033, 0x00000034,
 		0x03F, 0x00000040,
-	0x93000004,	0x00000000,	0x40000000,	0x00000000,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x0000003F,
 		0x03F, 0x000773C0,
 		0x033, 0x0000003E,
@@ -7843,7 +15287,257 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x00000180,
 		0x033, 0x00000044,
 		0x03F, 0x00000040,
-	0x93000001,	0x00000000,	0x40000000,	0x00000000,
+	0x93000001,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000004F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000004E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000004D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000004C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000004B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000004A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000049,
+		0x03F, 0x00000280,
+		0x033, 0x00000048,
+		0x03F, 0x000FF280,
+		0x033, 0x00000047,
+		0x03F, 0x00000200,
+		0x033, 0x00000046,
+		0x03F, 0x000001C0,
+		0x033, 0x00000045,
+		0x03F, 0x00000180,
+		0x033, 0x00000044,
+		0x03F, 0x00000040,
+	0x93000002,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000004F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000004E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000004D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000004C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000004B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000004A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000049,
+		0x03F, 0x00000280,
+		0x033, 0x00000048,
+		0x03F, 0x000FF280,
+		0x033, 0x00000047,
+		0x03F, 0x00000200,
+		0x033, 0x00000046,
+		0x03F, 0x000001C0,
+		0x033, 0x00000045,
+		0x03F, 0x00000180,
+		0x033, 0x00000044,
+		0x03F, 0x00000040,
+	0x93000003,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000004F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000004E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000004D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000004C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000004B,
+		0x03F, 0x00000287,
+		0x033, 0x0000004A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000049,
+		0x03F, 0x00000207,
+		0x033, 0x00000048,
+		0x03F, 0x000FF280,
+		0x033, 0x00000047,
+		0x03F, 0x00000200,
+		0x033, 0x00000046,
+		0x03F, 0x000001C0,
+		0x033, 0x00000045,
+		0x03F, 0x00000180,
+		0x033, 0x00000044,
+		0x03F, 0x00000040,
+	0x93000004,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000004F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000004E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000004D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000004C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000004B,
+		0x03F, 0x00000287,
+		0x033, 0x0000004A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000049,
+		0x03F, 0x00000207,
+		0x033, 0x00000048,
+		0x03F, 0x000FF280,
+		0x033, 0x00000047,
+		0x03F, 0x00000200,
+		0x033, 0x00000046,
+		0x03F, 0x000001C0,
+		0x033, 0x00000045,
+		0x03F, 0x00000180,
+		0x033, 0x00000044,
+		0x03F, 0x00000040,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000004F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000004E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000004D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000004C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000004B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000004A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000049,
+		0x03F, 0x00000280,
+		0x033, 0x00000048,
+		0x03F, 0x000FF280,
+		0x033, 0x00000047,
+		0x03F, 0x00000200,
+		0x033, 0x00000046,
+		0x03F, 0x000001C0,
+		0x033, 0x00000045,
+		0x03F, 0x00000180,
+		0x033, 0x00000044,
+		0x03F, 0x00000040,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000004F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000004E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000004D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000004C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000004B,
+		0x03F, 0x00000287,
+		0x033, 0x0000004A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000049,
+		0x03F, 0x00000207,
+		0x033, 0x00000048,
+		0x03F, 0x000FF280,
+		0x033, 0x00000047,
+		0x03F, 0x00000200,
+		0x033, 0x00000046,
+		0x03F, 0x000001C0,
+		0x033, 0x00000045,
+		0x03F, 0x00000180,
+		0x033, 0x00000044,
+		0x03F, 0x00000040,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000004F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000004E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000004D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000004C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000004B,
+		0x03F, 0x00000287,
+		0x033, 0x0000004A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000049,
+		0x03F, 0x00000207,
+		0x033, 0x00000048,
+		0x03F, 0x000FF280,
+		0x033, 0x00000047,
+		0x03F, 0x00000200,
+		0x033, 0x00000046,
+		0x03F, 0x000001C0,
+		0x033, 0x00000045,
+		0x03F, 0x00000180,
+		0x033, 0x00000044,
+		0x03F, 0x00000040,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000004F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000004E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000004D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000004C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000004B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000004A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000049,
+		0x03F, 0x00000280,
+		0x033, 0x00000048,
+		0x03F, 0x000FF280,
+		0x033, 0x00000047,
+		0x03F, 0x00000200,
+		0x033, 0x00000046,
+		0x03F, 0x000001C0,
+		0x033, 0x00000045,
+		0x03F, 0x00000180,
+		0x033, 0x00000044,
+		0x03F, 0x00000040,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000004F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000004E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000004D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000004C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000004B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000004A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000049,
+		0x03F, 0x00000280,
+		0x033, 0x00000048,
+		0x03F, 0x000FF280,
+		0x033, 0x00000047,
+		0x03F, 0x00000200,
+		0x033, 0x00000046,
+		0x03F, 0x000001C0,
+		0x033, 0x00000045,
+		0x03F, 0x00000180,
+		0x033, 0x00000044,
+		0x03F, 0x00000040,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000004F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000004E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000004D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000004C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000004B,
+		0x03F, 0x00000287,
+		0x033, 0x0000004A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000049,
+		0x03F, 0x00000207,
+		0x033, 0x00000048,
+		0x03F, 0x000FF280,
+		0x033, 0x00000047,
+		0x03F, 0x00000200,
+		0x033, 0x00000046,
+		0x03F, 0x000001C0,
+		0x033, 0x00000045,
+		0x03F, 0x00000180,
+		0x033, 0x00000044,
+		0x03F, 0x00000040,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x0000004F,
 		0x03F, 0x000773C0,
 		0x033, 0x0000004E,
@@ -7853,11 +15547,11 @@ static const u32 rtw8822c_rf_b[] = {
 		0x033, 0x0000004C,
 		0x03F, 0x000FF3E8,
 		0x033, 0x0000004B,
-		0x03F, 0x000FF3A0,
+		0x03F, 0x00000287,
 		0x033, 0x0000004A,
 		0x03F, 0x000002A8,
 		0x033, 0x00000049,
-		0x03F, 0x00000280,
+		0x03F, 0x00000207,
 		0x033, 0x00000048,
 		0x03F, 0x000FF280,
 		0x033, 0x00000047,
@@ -7868,7 +15562,7 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x00000180,
 		0x033, 0x00000044,
 		0x03F, 0x00000040,
-	0x93000002,	0x00000000,	0x40000000,	0x00000000,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x0000004F,
 		0x03F, 0x000773C0,
 		0x033, 0x0000004E,
@@ -7893,7 +15587,7 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x00000180,
 		0x033, 0x00000044,
 		0x03F, 0x00000040,
-	0x93000003,	0x00000000,	0x40000000,	0x00000000,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x0000004F,
 		0x03F, 0x000773C0,
 		0x033, 0x0000004E,
@@ -7918,7 +15612,7 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x00000180,
 		0x033, 0x00000044,
 		0x03F, 0x00000040,
-	0x93000004,	0x00000000,	0x40000000,	0x00000000,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x0000004F,
 		0x03F, 0x000773C0,
 		0x033, 0x0000004E,
@@ -8171,6 +15865,256 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x00000180,
 		0x033, 0x00000054,
 		0x03F, 0x00000040,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000005F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000005E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000005D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000005C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000005B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000005A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000059,
+		0x03F, 0x00000280,
+		0x033, 0x00000058,
+		0x03F, 0x000FF280,
+		0x033, 0x00000057,
+		0x03F, 0x00000200,
+		0x033, 0x00000056,
+		0x03F, 0x000001C0,
+		0x033, 0x00000055,
+		0x03F, 0x00000180,
+		0x033, 0x00000054,
+		0x03F, 0x00000040,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000005F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000005E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000005D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000005C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000005B,
+		0x03F, 0x00000287,
+		0x033, 0x0000005A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000059,
+		0x03F, 0x00000207,
+		0x033, 0x00000058,
+		0x03F, 0x000FF280,
+		0x033, 0x00000057,
+		0x03F, 0x00000200,
+		0x033, 0x00000056,
+		0x03F, 0x000001C0,
+		0x033, 0x00000055,
+		0x03F, 0x00000180,
+		0x033, 0x00000054,
+		0x03F, 0x00000040,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000005F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000005E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000005D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000005C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000005B,
+		0x03F, 0x00000287,
+		0x033, 0x0000005A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000059,
+		0x03F, 0x00000207,
+		0x033, 0x00000058,
+		0x03F, 0x000FF280,
+		0x033, 0x00000057,
+		0x03F, 0x00000200,
+		0x033, 0x00000056,
+		0x03F, 0x000001C0,
+		0x033, 0x00000055,
+		0x03F, 0x00000180,
+		0x033, 0x00000054,
+		0x03F, 0x00000040,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000005F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000005E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000005D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000005C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000005B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000005A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000059,
+		0x03F, 0x00000280,
+		0x033, 0x00000058,
+		0x03F, 0x000FF280,
+		0x033, 0x00000057,
+		0x03F, 0x00000200,
+		0x033, 0x00000056,
+		0x03F, 0x000001C0,
+		0x033, 0x00000055,
+		0x03F, 0x00000180,
+		0x033, 0x00000054,
+		0x03F, 0x00000040,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000005F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000005E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000005D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000005C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000005B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000005A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000059,
+		0x03F, 0x00000280,
+		0x033, 0x00000058,
+		0x03F, 0x000FF280,
+		0x033, 0x00000057,
+		0x03F, 0x00000200,
+		0x033, 0x00000056,
+		0x03F, 0x000001C0,
+		0x033, 0x00000055,
+		0x03F, 0x00000180,
+		0x033, 0x00000054,
+		0x03F, 0x00000040,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000005F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000005E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000005D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000005C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000005B,
+		0x03F, 0x00000287,
+		0x033, 0x0000005A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000059,
+		0x03F, 0x00000207,
+		0x033, 0x00000058,
+		0x03F, 0x000FF280,
+		0x033, 0x00000057,
+		0x03F, 0x00000200,
+		0x033, 0x00000056,
+		0x03F, 0x000001C0,
+		0x033, 0x00000055,
+		0x03F, 0x00000180,
+		0x033, 0x00000054,
+		0x03F, 0x00000040,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000005F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000005E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000005D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000005C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000005B,
+		0x03F, 0x00000287,
+		0x033, 0x0000005A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000059,
+		0x03F, 0x00000207,
+		0x033, 0x00000058,
+		0x03F, 0x000FF280,
+		0x033, 0x00000057,
+		0x03F, 0x00000200,
+		0x033, 0x00000056,
+		0x03F, 0x000001C0,
+		0x033, 0x00000055,
+		0x03F, 0x00000180,
+		0x033, 0x00000054,
+		0x03F, 0x00000040,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000005F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000005E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000005D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000005C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000005B,
+		0x03F, 0x000FF3A0,
+		0x033, 0x0000005A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000059,
+		0x03F, 0x00000280,
+		0x033, 0x00000058,
+		0x03F, 0x000FF280,
+		0x033, 0x00000057,
+		0x03F, 0x00000200,
+		0x033, 0x00000056,
+		0x03F, 0x000001C0,
+		0x033, 0x00000055,
+		0x03F, 0x00000180,
+		0x033, 0x00000054,
+		0x03F, 0x00000040,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000005F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000005E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000005D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000005C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000005B,
+		0x03F, 0x00000287,
+		0x033, 0x0000005A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000059,
+		0x03F, 0x00000207,
+		0x033, 0x00000058,
+		0x03F, 0x000FF280,
+		0x033, 0x00000057,
+		0x03F, 0x00000200,
+		0x033, 0x00000056,
+		0x03F, 0x000001C0,
+		0x033, 0x00000055,
+		0x03F, 0x00000180,
+		0x033, 0x00000054,
+		0x03F, 0x00000040,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x0000005F,
+		0x03F, 0x000773C0,
+		0x033, 0x0000005E,
+		0x03F, 0x000FF3C0,
+		0x033, 0x0000005D,
+		0x03F, 0x000773E8,
+		0x033, 0x0000005C,
+		0x03F, 0x000FF3E8,
+		0x033, 0x0000005B,
+		0x03F, 0x00000287,
+		0x033, 0x0000005A,
+		0x03F, 0x000002A8,
+		0x033, 0x00000059,
+		0x03F, 0x00000207,
+		0x033, 0x00000058,
+		0x03F, 0x000FF280,
+		0x033, 0x00000057,
+		0x03F, 0x00000200,
+		0x033, 0x00000056,
+		0x03F, 0x000001C0,
+		0x033, 0x00000055,
+		0x03F, 0x00000180,
+		0x033, 0x00000054,
+		0x03F, 0x00000040,
 	0xA0000000,	0x00000000,
 		0x033, 0x0000005F,
 		0x03F, 0x000773E8,
@@ -8197,42 +16141,1532 @@ static const u32 rtw8822c_rf_b[] = {
 		0x033, 0x00000054,
 		0x03F, 0x00000040,
 	0xB0000000,	0x00000000,
-		0x033, 0x00000053,
+		0x033, 0x00000053,
+		0x03F, 0x00000000,
+	0x81000001,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00000000,
+	0x91000002,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00000000,
+	0x92000001,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00000000,
+	0x92000002,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00000000,
+	0x93000001,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00000000,
+	0x93000002,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00000000,
+	0x93000003,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00000000,
+	0x93000004,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00000000,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00000000,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00000000,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00000000,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00000000,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00000000,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00000000,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00000000,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00000000,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00000000,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00000000,
+	0xA0000000,	0x00000000,
+		0x0EF, 0x00000000,
+	0xB0000000,	0x00000000,
+		0x08A, 0x000E7DE3,
+		0x08B, 0x0008FE00,
+		0x0EE, 0x00000008,
+		0x033, 0x00000000,
+		0x03F, 0x00000023,
+		0x033, 0x00000001,
+		0x03F, 0x00000023,
+		0x0EE, 0x00000000,
+		0x0EF, 0x00004000,
+		0x033, 0x00000000,
+		0x03F, 0x0000000F,
+		0x033, 0x00000002,
 		0x03F, 0x00000000,
+		0x0EF, 0x00000000,
 	0x81000001,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00020000,
+		0x033, 0x00000000,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000001,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000002,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000003,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000004,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000005,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000006,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000007,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000008,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000009,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000000A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000000B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000000C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000000D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000000E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000000F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000010,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000011,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000012,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000013,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000014,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000015,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000016,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000017,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000018,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000019,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000001A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000001B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000001C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000001D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000001E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000001F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000020,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000021,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000022,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000023,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000024,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000025,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000026,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000027,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000028,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000029,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000002A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000002B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000002C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000002D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000002E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000002F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
 		0x0EF, 0x00000000,
 	0x91000002,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00020000,
+		0x033, 0x00000000,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000001,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000002,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000003,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000004,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000005,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000006,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000007,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000008,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000009,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000000A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000000B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000000C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000000D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000000E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000000F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000010,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000011,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000012,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000013,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000014,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000015,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000016,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000017,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000018,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000019,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000001A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000001B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000001C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000001D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000001E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000001F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000020,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000021,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000022,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000023,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000024,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000025,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000026,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000027,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000028,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000029,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000002A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000002B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000002C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000002D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000002E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000002F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
 		0x0EF, 0x00000000,
 	0x92000001,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00020000,
+		0x033, 0x00000000,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000001,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000002,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000003,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000004,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000005,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000006,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000007,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000008,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000009,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000000A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000000B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000000C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000000D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000000E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000000F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000010,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000011,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000012,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000013,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000014,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000015,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000016,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000017,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000018,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000019,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000001A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000001B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000001C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000001D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000001E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000001F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000020,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000021,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000022,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000023,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000024,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000025,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000026,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000027,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000028,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000029,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000002A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000002B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000002C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000002D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000002E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000002F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
 		0x0EF, 0x00000000,
 	0x92000002,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00020000,
+		0x033, 0x00000000,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000001,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000002,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000003,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000004,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000005,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000006,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000007,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000008,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000009,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000000A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000000B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000000C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000000D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000000E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000000F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000010,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000011,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000012,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000013,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000014,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000015,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000016,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000017,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000018,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000019,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000001A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000001B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000001C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000001D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000001E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000001F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000020,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000021,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000022,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000023,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000024,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000025,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000026,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000027,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000028,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000029,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000002A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000002B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000002C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000002D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000002E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000002F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
 		0x0EF, 0x00000000,
 	0x93000001,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00020000,
+		0x033, 0x00000000,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000001,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000002,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000003,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000004,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000005,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000006,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000007,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000008,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000009,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000000A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000000B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000000C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000000D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000000E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000000F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000010,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000011,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000012,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000013,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000014,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000015,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000016,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000017,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000018,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000019,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000001A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000001B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000001C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000001D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000001E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000001F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000020,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000021,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000022,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000023,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000024,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000025,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000026,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000027,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000028,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000029,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000002A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000002B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000002C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000002D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000002E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000002F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
 		0x0EF, 0x00000000,
 	0x93000002,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00020000,
+		0x033, 0x00000000,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000001,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000002,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000003,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000004,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000005,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000006,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000007,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000008,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000009,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000000A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000000B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000000C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000000D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000000E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000000F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000010,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000011,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000012,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000013,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000014,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000015,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000016,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000017,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000018,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000019,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000001A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000001B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000001C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000001D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000001E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000001F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000020,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000021,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000022,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000023,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000024,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000025,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000026,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000027,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000028,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000029,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000002A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000002B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000002C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000002D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000002E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000002F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
 		0x0EF, 0x00000000,
 	0x93000003,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00020000,
+		0x033, 0x00000000,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000001,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000002,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000003,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000004,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000005,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000006,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000007,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000008,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000009,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000000A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000000B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000000C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000000D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000000E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000000F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000010,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000011,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000012,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000013,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000014,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000015,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000016,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000017,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000018,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000019,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000001A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000001B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000001C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000001D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000001E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000001F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000020,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000021,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000022,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000023,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000024,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000025,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000026,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000027,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000028,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000029,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000002A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000002B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000002C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000002D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000002E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000002F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
 		0x0EF, 0x00000000,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00020000,
+		0x033, 0x00000000,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000001,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000002,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000003,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000004,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000005,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000006,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000007,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000008,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000009,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000000A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000000B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000000C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000000D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000000E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000000F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000010,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000011,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000012,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000013,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000014,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000015,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000016,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000017,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000018,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000019,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000001A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000001B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000001C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000001D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000001E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000001F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000020,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000021,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000022,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000023,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000024,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000025,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000026,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000027,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000028,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000029,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000002A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000002B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000002C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000002D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000002E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000002F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
 		0x0EF, 0x00000000,
-	0xA0000000,	0x00000000,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00020000,
+		0x033, 0x00000000,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000001,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000002,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000003,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000004,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000005,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000006,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000007,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000008,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000009,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000000A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000000B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000000C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000000D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000000E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000000F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000010,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000011,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000012,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000013,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000014,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000015,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000016,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000017,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000018,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000019,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000001A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000001B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000001C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000001D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000001E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000001F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000020,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000021,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000022,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000023,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000024,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000025,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000026,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000027,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000028,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000029,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000002A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000002B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000002C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000002D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000002E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000002F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
 		0x0EF, 0x00000000,
-	0xB0000000,	0x00000000,
-		0x08A, 0x000E7DE3,
-		0x08B, 0x0008FE00,
-		0x0EE, 0x00000008,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x0EF, 0x00020000,
 		0x033, 0x00000000,
-		0x03F, 0x00000023,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
 		0x033, 0x00000001,
-		0x03F, 0x00000023,
-		0x0EE, 0x00000000,
-		0x0EF, 0x00004000,
-		0x033, 0x00000000,
-		0x03F, 0x0000000F,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
 		0x033, 0x00000002,
-		0x03F, 0x00000000,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000003,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000004,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000005,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000006,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000007,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000008,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000009,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000000A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000000B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000000C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000000D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000000E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000000F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
+		0x033, 0x00000010,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000011,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000012,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000013,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000014,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000015,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000016,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000017,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000018,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000019,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000001A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000001B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000001C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000001D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000001E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000001F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000020,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000021,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x00000022,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x00000023,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x00000024,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x00000025,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x00000026,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x00000027,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
+		0x033, 0x00000028,
+		0x03E, 0x00001C86,
+		0x03F, 0x00020000,
+		0x033, 0x00000029,
+		0x03E, 0x00001C02,
+		0x03F, 0x00020000,
+		0x033, 0x0000002A,
+		0x03E, 0x00000F02,
+		0x03F, 0x00020000,
+		0x033, 0x0000002B,
+		0x03E, 0x00000F00,
+		0x03F, 0x00020000,
+		0x033, 0x0000002C,
+		0x03E, 0x00000086,
+		0x03F, 0x00020000,
+		0x033, 0x0000002D,
+		0x03E, 0x00000002,
+		0x03F, 0x00020000,
+		0x033, 0x0000002E,
+		0x03E, 0x00000000,
+		0x03F, 0x00020000,
+		0x033, 0x0000002F,
+		0x03E, 0x00000000,
+		0x03F, 0x0002C010,
 		0x0EF, 0x00000000,
-	0x81000001,	0x00000000,	0x40000000,	0x00000000,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
 		0x0EF, 0x00020000,
 		0x033, 0x00000000,
 		0x03E, 0x00001C86,
@@ -8259,8 +17693,8 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03E, 0x00000000,
 		0x03F, 0x0002F81C,
 		0x033, 0x00000008,
-		0x03E, 0x00001C86,
-		0x03F, 0x00020000,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
 		0x033, 0x00000009,
 		0x03E, 0x00001C02,
 		0x03F, 0x00020000,
@@ -8379,7 +17813,7 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03E, 0x00000000,
 		0x03F, 0x0002C010,
 		0x0EF, 0x00000000,
-	0x91000002,	0x00000000,	0x40000000,	0x00000000,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
 		0x0EF, 0x00020000,
 		0x033, 0x00000000,
 		0x03E, 0x00001C86,
@@ -8404,10 +17838,10 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x00020000,
 		0x033, 0x00000007,
 		0x03E, 0x00000000,
-		0x03F, 0x0002F81C,
+		0x03F, 0x0002C010,
 		0x033, 0x00000008,
-		0x03E, 0x00001C86,
-		0x03F, 0x00020000,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
 		0x033, 0x00000009,
 		0x03E, 0x00001C02,
 		0x03F, 0x00020000,
@@ -8526,7 +17960,7 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03E, 0x00000000,
 		0x03F, 0x0002C010,
 		0x0EF, 0x00000000,
-	0x92000001,	0x00000000,	0x40000000,	0x00000000,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
 		0x0EF, 0x00020000,
 		0x033, 0x00000000,
 		0x03E, 0x00001C86,
@@ -8551,10 +17985,10 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x00020000,
 		0x033, 0x00000007,
 		0x03E, 0x00000000,
-		0x03F, 0x0002F81C,
+		0x03F, 0x0002C010,
 		0x033, 0x00000008,
-		0x03E, 0x00001C86,
-		0x03F, 0x00020000,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
 		0x033, 0x00000009,
 		0x03E, 0x00001C02,
 		0x03F, 0x00020000,
@@ -8673,7 +18107,7 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03E, 0x00000000,
 		0x03F, 0x0002C010,
 		0x0EF, 0x00000000,
-	0x92000002,	0x00000000,	0x40000000,	0x00000000,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
 		0x0EF, 0x00020000,
 		0x033, 0x00000000,
 		0x03E, 0x00001C86,
@@ -8698,10 +18132,10 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x00020000,
 		0x033, 0x00000007,
 		0x03E, 0x00000000,
-		0x03F, 0x0002F81C,
+		0x03F, 0x0002C010,
 		0x033, 0x00000008,
-		0x03E, 0x00001C86,
-		0x03F, 0x00020000,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
 		0x033, 0x00000009,
 		0x03E, 0x00001C02,
 		0x03F, 0x00020000,
@@ -8820,7 +18254,7 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03E, 0x00000000,
 		0x03F, 0x0002C010,
 		0x0EF, 0x00000000,
-	0x93000001,	0x00000000,	0x40000000,	0x00000000,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
 		0x0EF, 0x00020000,
 		0x033, 0x00000000,
 		0x03E, 0x00001C86,
@@ -8845,10 +18279,10 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x00020000,
 		0x033, 0x00000007,
 		0x03E, 0x00000000,
-		0x03F, 0x0002F81C,
+		0x03F, 0x0002C010,
 		0x033, 0x00000008,
-		0x03E, 0x00001C86,
-		0x03F, 0x00020000,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
 		0x033, 0x00000009,
 		0x03E, 0x00001C02,
 		0x03F, 0x00020000,
@@ -8967,7 +18401,7 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03E, 0x00000000,
 		0x03F, 0x0002C010,
 		0x0EF, 0x00000000,
-	0x93000002,	0x00000000,	0x40000000,	0x00000000,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
 		0x0EF, 0x00020000,
 		0x033, 0x00000000,
 		0x03E, 0x00001C86,
@@ -8992,10 +18426,10 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x00020000,
 		0x033, 0x00000007,
 		0x03E, 0x00000000,
-		0x03F, 0x0002F81C,
+		0x03F, 0x0002C010,
 		0x033, 0x00000008,
-		0x03E, 0x00001C86,
-		0x03F, 0x00020000,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
 		0x033, 0x00000009,
 		0x03E, 0x00001C02,
 		0x03F, 0x00020000,
@@ -9114,7 +18548,7 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03E, 0x00000000,
 		0x03F, 0x0002C010,
 		0x0EF, 0x00000000,
-	0x93000003,	0x00000000,	0x40000000,	0x00000000,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
 		0x0EF, 0x00020000,
 		0x033, 0x00000000,
 		0x03E, 0x00001C86,
@@ -9141,8 +18575,8 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03E, 0x00000000,
 		0x03F, 0x0002F81C,
 		0x033, 0x00000008,
-		0x03E, 0x00001C86,
-		0x03F, 0x00020000,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
 		0x033, 0x00000009,
 		0x03E, 0x00001C02,
 		0x03F, 0x00020000,
@@ -9261,7 +18695,7 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03E, 0x00000000,
 		0x03F, 0x0002C010,
 		0x0EF, 0x00000000,
-	0x93000004,	0x00000000,	0x40000000,	0x00000000,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
 		0x0EF, 0x00020000,
 		0x033, 0x00000000,
 		0x03E, 0x00001C86,
@@ -9288,8 +18722,8 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03E, 0x00000000,
 		0x03F, 0x0002F81C,
 		0x033, 0x00000008,
-		0x03E, 0x00001C86,
-		0x03F, 0x00020000,
+		0x03E, 0x00000000,
+		0x03F, 0x0002F81C,
 		0x033, 0x00000009,
 		0x03E, 0x00001C02,
 		0x03F, 0x00020000,
@@ -9576,6 +19010,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x063, 0x00000002,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x063, 0x00000002,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x063, 0x00000002,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x063, 0x00000002,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x063, 0x00000002,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x063, 0x00000002,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x063, 0x00000002,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x063, 0x00000002,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x063, 0x00000002,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x063, 0x00000002,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x063, 0x00000002,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x063, 0x00000002,
 	0xA0000000,	0x00000000,
 		0x063, 0x00000C02,
 	0xB0000000,	0x00000000,
@@ -9742,7 +19196,277 @@ static const u32 rtw8822c_rf_b[] = {
 		0x030, 0x00017238,
 		0x030, 0x00018228,
 		0x030, 0x00019238,
-	0x93000003,	0x00000000,	0x40000000,	0x00000000,
+	0x93000003,	0x00000000,	0x40000000,	0x00000000,
+		0x030, 0x00000239,
+		0x030, 0x00001239,
+		0x030, 0x00002239,
+		0x030, 0x00003239,
+		0x030, 0x00004239,
+		0x030, 0x00005239,
+		0x030, 0x00006239,
+		0x030, 0x00007239,
+		0x030, 0x00008239,
+		0x030, 0x00009239,
+		0x030, 0x0000A239,
+		0x030, 0x0000B239,
+		0x030, 0x0000C239,
+		0x030, 0x0000D239,
+		0x030, 0x0000E209,
+		0x030, 0x0000F239,
+		0x030, 0x00010239,
+		0x030, 0x00011239,
+		0x030, 0x00012209,
+		0x030, 0x00013239,
+		0x030, 0x00014239,
+		0x030, 0x00015239,
+		0x030, 0x00016209,
+		0x030, 0x00017239,
+		0x030, 0x00018209,
+		0x030, 0x00019239,
+	0x93000004,	0x00000000,	0x40000000,	0x00000000,
+		0x030, 0x00000239,
+		0x030, 0x00001239,
+		0x030, 0x00002239,
+		0x030, 0x00003239,
+		0x030, 0x00004239,
+		0x030, 0x00005239,
+		0x030, 0x00006239,
+		0x030, 0x00007239,
+		0x030, 0x00008239,
+		0x030, 0x00009239,
+		0x030, 0x0000A239,
+		0x030, 0x0000B239,
+		0x030, 0x0000C239,
+		0x030, 0x0000D239,
+		0x030, 0x0000E209,
+		0x030, 0x0000F239,
+		0x030, 0x00010239,
+		0x030, 0x00011239,
+		0x030, 0x00012209,
+		0x030, 0x00013239,
+		0x030, 0x00014239,
+		0x030, 0x00015239,
+		0x030, 0x00016209,
+		0x030, 0x00017239,
+		0x030, 0x00018209,
+		0x030, 0x00019239,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x030, 0x00000238,
+		0x030, 0x00001238,
+		0x030, 0x00002238,
+		0x030, 0x00003238,
+		0x030, 0x00004228,
+		0x030, 0x00005238,
+		0x030, 0x00006238,
+		0x030, 0x00007238,
+		0x030, 0x00008228,
+		0x030, 0x00009238,
+		0x030, 0x0000A238,
+		0x030, 0x0000B238,
+		0x030, 0x0000C238,
+		0x030, 0x0000D238,
+		0x030, 0x0000E228,
+		0x030, 0x0000F238,
+		0x030, 0x00010238,
+		0x030, 0x00011238,
+		0x030, 0x00012228,
+		0x030, 0x00013238,
+		0x030, 0x00014238,
+		0x030, 0x00015238,
+		0x030, 0x00016228,
+		0x030, 0x00017238,
+		0x030, 0x00018228,
+		0x030, 0x00019238,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x030, 0x00000239,
+		0x030, 0x00001239,
+		0x030, 0x00002239,
+		0x030, 0x00003239,
+		0x030, 0x00004239,
+		0x030, 0x00005239,
+		0x030, 0x00006239,
+		0x030, 0x00007239,
+		0x030, 0x00008239,
+		0x030, 0x00009239,
+		0x030, 0x0000A239,
+		0x030, 0x0000B239,
+		0x030, 0x0000C239,
+		0x030, 0x0000D239,
+		0x030, 0x0000E209,
+		0x030, 0x0000F239,
+		0x030, 0x00010239,
+		0x030, 0x00011239,
+		0x030, 0x00012209,
+		0x030, 0x00013239,
+		0x030, 0x00014239,
+		0x030, 0x00015239,
+		0x030, 0x00016209,
+		0x030, 0x00017239,
+		0x030, 0x00018209,
+		0x030, 0x00019239,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x030, 0x00000239,
+		0x030, 0x00001239,
+		0x030, 0x00002239,
+		0x030, 0x00003239,
+		0x030, 0x00004239,
+		0x030, 0x00005239,
+		0x030, 0x00006239,
+		0x030, 0x00007239,
+		0x030, 0x00008239,
+		0x030, 0x00009239,
+		0x030, 0x0000A239,
+		0x030, 0x0000B239,
+		0x030, 0x0000C239,
+		0x030, 0x0000D239,
+		0x030, 0x0000E209,
+		0x030, 0x0000F239,
+		0x030, 0x00010239,
+		0x030, 0x00011239,
+		0x030, 0x00012209,
+		0x030, 0x00013239,
+		0x030, 0x00014239,
+		0x030, 0x00015239,
+		0x030, 0x00016209,
+		0x030, 0x00017239,
+		0x030, 0x00018209,
+		0x030, 0x00019239,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x030, 0x00000238,
+		0x030, 0x00001238,
+		0x030, 0x00002238,
+		0x030, 0x00003238,
+		0x030, 0x00004228,
+		0x030, 0x00005238,
+		0x030, 0x00006238,
+		0x030, 0x00007238,
+		0x030, 0x00008228,
+		0x030, 0x00009238,
+		0x030, 0x0000A238,
+		0x030, 0x0000B238,
+		0x030, 0x0000C238,
+		0x030, 0x0000D238,
+		0x030, 0x0000E228,
+		0x030, 0x0000F238,
+		0x030, 0x00010238,
+		0x030, 0x00011238,
+		0x030, 0x00012228,
+		0x030, 0x00013238,
+		0x030, 0x00014238,
+		0x030, 0x00015238,
+		0x030, 0x00016228,
+		0x030, 0x00017238,
+		0x030, 0x00018228,
+		0x030, 0x00019238,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x030, 0x00000238,
+		0x030, 0x00001238,
+		0x030, 0x00002238,
+		0x030, 0x00003238,
+		0x030, 0x00004228,
+		0x030, 0x00005238,
+		0x030, 0x00006238,
+		0x030, 0x00007238,
+		0x030, 0x00008228,
+		0x030, 0x00009238,
+		0x030, 0x0000A238,
+		0x030, 0x0000B238,
+		0x030, 0x0000C238,
+		0x030, 0x0000D238,
+		0x030, 0x0000E228,
+		0x030, 0x0000F238,
+		0x030, 0x00010238,
+		0x030, 0x00011238,
+		0x030, 0x00012228,
+		0x030, 0x00013238,
+		0x030, 0x00014238,
+		0x030, 0x00015238,
+		0x030, 0x00016228,
+		0x030, 0x00017238,
+		0x030, 0x00018228,
+		0x030, 0x00019238,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x030, 0x00000239,
+		0x030, 0x00001239,
+		0x030, 0x00002239,
+		0x030, 0x00003239,
+		0x030, 0x00004239,
+		0x030, 0x00005239,
+		0x030, 0x00006239,
+		0x030, 0x00007239,
+		0x030, 0x00008239,
+		0x030, 0x00009239,
+		0x030, 0x0000A239,
+		0x030, 0x0000B239,
+		0x030, 0x0000C239,
+		0x030, 0x0000D239,
+		0x030, 0x0000E209,
+		0x030, 0x0000F239,
+		0x030, 0x00010239,
+		0x030, 0x00011239,
+		0x030, 0x00012209,
+		0x030, 0x00013239,
+		0x030, 0x00014239,
+		0x030, 0x00015239,
+		0x030, 0x00016209,
+		0x030, 0x00017239,
+		0x030, 0x00018209,
+		0x030, 0x00019239,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x030, 0x00000239,
+		0x030, 0x00001239,
+		0x030, 0x00002239,
+		0x030, 0x00003239,
+		0x030, 0x00004239,
+		0x030, 0x00005239,
+		0x030, 0x00006239,
+		0x030, 0x00007239,
+		0x030, 0x00008239,
+		0x030, 0x00009239,
+		0x030, 0x0000A239,
+		0x030, 0x0000B239,
+		0x030, 0x0000C239,
+		0x030, 0x0000D239,
+		0x030, 0x0000E209,
+		0x030, 0x0000F239,
+		0x030, 0x00010239,
+		0x030, 0x00011239,
+		0x030, 0x00012209,
+		0x030, 0x00013239,
+		0x030, 0x00014239,
+		0x030, 0x00015239,
+		0x030, 0x00016209,
+		0x030, 0x00017239,
+		0x030, 0x00018209,
+		0x030, 0x00019239,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x030, 0x00000238,
+		0x030, 0x00001238,
+		0x030, 0x00002238,
+		0x030, 0x00003238,
+		0x030, 0x00004228,
+		0x030, 0x00005238,
+		0x030, 0x00006238,
+		0x030, 0x00007238,
+		0x030, 0x00008228,
+		0x030, 0x00009238,
+		0x030, 0x0000A238,
+		0x030, 0x0000B238,
+		0x030, 0x0000C238,
+		0x030, 0x0000D238,
+		0x030, 0x0000E228,
+		0x030, 0x0000F238,
+		0x030, 0x00010238,
+		0x030, 0x00011238,
+		0x030, 0x00012228,
+		0x030, 0x00013238,
+		0x030, 0x00014238,
+		0x030, 0x00015238,
+		0x030, 0x00016228,
+		0x030, 0x00017238,
+		0x030, 0x00018228,
+		0x030, 0x00019238,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
 		0x030, 0x00000239,
 		0x030, 0x00001239,
 		0x030, 0x00002239,
@@ -9769,7 +19493,7 @@ static const u32 rtw8822c_rf_b[] = {
 		0x030, 0x00017239,
 		0x030, 0x00018209,
 		0x030, 0x00019239,
-	0x93000004,	0x00000000,	0x40000000,	0x00000000,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
 		0x030, 0x00000239,
 		0x030, 0x00001239,
 		0x030, 0x00002239,
@@ -9930,6 +19654,136 @@ static const u32 rtw8822c_rf_b[] = {
 		0x030, 0x00009334,
 		0x030, 0x0000A334,
 		0x030, 0x0000B334,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x030, 0x00000334,
+		0x030, 0x00001334,
+		0x030, 0x00002334,
+		0x030, 0x00003334,
+		0x030, 0x00004334,
+		0x030, 0x00005334,
+		0x030, 0x00006334,
+		0x030, 0x00007334,
+		0x030, 0x00008334,
+		0x030, 0x00009334,
+		0x030, 0x0000A334,
+		0x030, 0x0000B334,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x030, 0x00000334,
+		0x030, 0x00001334,
+		0x030, 0x00002334,
+		0x030, 0x00003334,
+		0x030, 0x00004334,
+		0x030, 0x00005334,
+		0x030, 0x00006334,
+		0x030, 0x00007334,
+		0x030, 0x00008334,
+		0x030, 0x00009334,
+		0x030, 0x0000A334,
+		0x030, 0x0000B334,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x030, 0x00000334,
+		0x030, 0x00001334,
+		0x030, 0x00002334,
+		0x030, 0x00003334,
+		0x030, 0x00004334,
+		0x030, 0x00005334,
+		0x030, 0x00006334,
+		0x030, 0x00007334,
+		0x030, 0x00008334,
+		0x030, 0x00009334,
+		0x030, 0x0000A334,
+		0x030, 0x0000B334,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x030, 0x00000334,
+		0x030, 0x00001334,
+		0x030, 0x00002334,
+		0x030, 0x00003334,
+		0x030, 0x00004334,
+		0x030, 0x00005334,
+		0x030, 0x00006334,
+		0x030, 0x00007334,
+		0x030, 0x00008334,
+		0x030, 0x00009334,
+		0x030, 0x0000A334,
+		0x030, 0x0000B334,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x030, 0x00000334,
+		0x030, 0x00001334,
+		0x030, 0x00002334,
+		0x030, 0x00003334,
+		0x030, 0x00004334,
+		0x030, 0x00005334,
+		0x030, 0x00006334,
+		0x030, 0x00007334,
+		0x030, 0x00008334,
+		0x030, 0x00009334,
+		0x030, 0x0000A334,
+		0x030, 0x0000B334,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x030, 0x00000334,
+		0x030, 0x00001334,
+		0x030, 0x00002334,
+		0x030, 0x00003334,
+		0x030, 0x00004334,
+		0x030, 0x00005334,
+		0x030, 0x00006334,
+		0x030, 0x00007334,
+		0x030, 0x00008334,
+		0x030, 0x00009334,
+		0x030, 0x0000A334,
+		0x030, 0x0000B334,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x030, 0x00000334,
+		0x030, 0x00001334,
+		0x030, 0x00002334,
+		0x030, 0x00003334,
+		0x030, 0x00004334,
+		0x030, 0x00005334,
+		0x030, 0x00006334,
+		0x030, 0x00007334,
+		0x030, 0x00008334,
+		0x030, 0x00009334,
+		0x030, 0x0000A334,
+		0x030, 0x0000B334,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x030, 0x00000334,
+		0x030, 0x00001334,
+		0x030, 0x00002334,
+		0x030, 0x00003334,
+		0x030, 0x00004334,
+		0x030, 0x00005334,
+		0x030, 0x00006334,
+		0x030, 0x00007334,
+		0x030, 0x00008334,
+		0x030, 0x00009334,
+		0x030, 0x0000A334,
+		0x030, 0x0000B334,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x030, 0x00000334,
+		0x030, 0x00001334,
+		0x030, 0x00002334,
+		0x030, 0x00003334,
+		0x030, 0x00004334,
+		0x030, 0x00005334,
+		0x030, 0x00006334,
+		0x030, 0x00007334,
+		0x030, 0x00008334,
+		0x030, 0x00009334,
+		0x030, 0x0000A334,
+		0x030, 0x0000B334,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x030, 0x00000334,
+		0x030, 0x00001334,
+		0x030, 0x00002334,
+		0x030, 0x00003334,
+		0x030, 0x00004334,
+		0x030, 0x00005334,
+		0x030, 0x00006334,
+		0x030, 0x00007334,
+		0x030, 0x00008334,
+		0x030, 0x00009334,
+		0x030, 0x0000A334,
+		0x030, 0x0000B334,
 	0xA0000000,	0x00000000,
 		0x030, 0x00000232,
 		0x030, 0x00001232,
@@ -9957,6 +19811,99 @@ static const u32 rtw8822c_rf_b[] = {
 		0x030, 0x0000C330,
 		0x0EF, 0x00000000,
 		0x0EE, 0x00010000,
+	0x83000015,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000200,
+		0x03F, 0x00000005,
+		0x033, 0x00000201,
+		0x03F, 0x00000008,
+		0x033, 0x00000202,
+		0x03F, 0x0000000B,
+		0x033, 0x00000203,
+		0x03F, 0x0000000E,
+		0x033, 0x00000204,
+		0x03F, 0x0000002B,
+		0x033, 0x00000205,
+		0x03F, 0x0000002E,
+		0x033, 0x00000206,
+		0x03F, 0x0000006B,
+		0x033, 0x00000207,
+		0x03F, 0x0000006E,
+		0x033, 0x00000208,
+		0x03F, 0x00000071,
+		0x033, 0x00000209,
+		0x03F, 0x00000074,
+		0x033, 0x0000020A,
+		0x03F, 0x00000077,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000200,
+		0x03F, 0x00000005,
+		0x033, 0x00000201,
+		0x03F, 0x00000008,
+		0x033, 0x00000202,
+		0x03F, 0x0000000B,
+		0x033, 0x00000203,
+		0x03F, 0x0000000E,
+		0x033, 0x00000204,
+		0x03F, 0x0000002B,
+		0x033, 0x00000205,
+		0x03F, 0x0000002E,
+		0x033, 0x00000206,
+		0x03F, 0x0000006B,
+		0x033, 0x00000207,
+		0x03F, 0x0000006E,
+		0x033, 0x00000208,
+		0x03F, 0x00000071,
+		0x033, 0x00000209,
+		0x03F, 0x00000074,
+		0x033, 0x0000020A,
+		0x03F, 0x00000077,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000200,
+		0x03F, 0x00000005,
+		0x033, 0x00000201,
+		0x03F, 0x00000008,
+		0x033, 0x00000202,
+		0x03F, 0x0000000B,
+		0x033, 0x00000203,
+		0x03F, 0x0000000E,
+		0x033, 0x00000204,
+		0x03F, 0x0000002B,
+		0x033, 0x00000205,
+		0x03F, 0x0000002E,
+		0x033, 0x00000206,
+		0x03F, 0x0000006B,
+		0x033, 0x00000207,
+		0x03F, 0x0000006E,
+		0x033, 0x00000208,
+		0x03F, 0x00000071,
+		0x033, 0x00000209,
+		0x03F, 0x00000074,
+		0x033, 0x0000020A,
+		0x03F, 0x00000077,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000200,
+		0x03F, 0x00000005,
+		0x033, 0x00000201,
+		0x03F, 0x00000008,
+		0x033, 0x00000202,
+		0x03F, 0x0000000B,
+		0x033, 0x00000203,
+		0x03F, 0x0000000E,
+		0x033, 0x00000204,
+		0x03F, 0x0000002B,
+		0x033, 0x00000205,
+		0x03F, 0x0000002E,
+		0x033, 0x00000206,
+		0x03F, 0x0000006B,
+		0x033, 0x00000207,
+		0x03F, 0x0000006E,
+		0x033, 0x00000208,
+		0x03F, 0x00000071,
+		0x033, 0x00000209,
+		0x03F, 0x00000074,
+		0x033, 0x0000020A,
+		0x03F, 0x00000077,
+	0xA0000000,	0x00000000,
 		0x033, 0x00000200,
 		0x03F, 0x0000006A,
 		0x033, 0x00000201,
@@ -9979,6 +19926,100 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x00000CF4,
 		0x033, 0x0000020A,
 		0x03F, 0x00000CF7,
+	0xB0000000,	0x00000000,
+	0x83000015,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000280,
+		0x03F, 0x00000005,
+		0x033, 0x00000281,
+		0x03F, 0x00000008,
+		0x033, 0x00000282,
+		0x03F, 0x0000000B,
+		0x033, 0x00000283,
+		0x03F, 0x0000000E,
+		0x033, 0x00000284,
+		0x03F, 0x0000002B,
+		0x033, 0x00000285,
+		0x03F, 0x0000002E,
+		0x033, 0x00000286,
+		0x03F, 0x0000006B,
+		0x033, 0x00000287,
+		0x03F, 0x0000006E,
+		0x033, 0x00000288,
+		0x03F, 0x00000071,
+		0x033, 0x00000289,
+		0x03F, 0x00000074,
+		0x033, 0x0000028A,
+		0x03F, 0x00000077,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000280,
+		0x03F, 0x00000005,
+		0x033, 0x00000281,
+		0x03F, 0x00000008,
+		0x033, 0x00000282,
+		0x03F, 0x0000000B,
+		0x033, 0x00000283,
+		0x03F, 0x0000000E,
+		0x033, 0x00000284,
+		0x03F, 0x0000002B,
+		0x033, 0x00000285,
+		0x03F, 0x0000002E,
+		0x033, 0x00000286,
+		0x03F, 0x0000006B,
+		0x033, 0x00000287,
+		0x03F, 0x0000006E,
+		0x033, 0x00000288,
+		0x03F, 0x00000071,
+		0x033, 0x00000289,
+		0x03F, 0x00000074,
+		0x033, 0x0000028A,
+		0x03F, 0x00000077,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000280,
+		0x03F, 0x00000005,
+		0x033, 0x00000281,
+		0x03F, 0x00000008,
+		0x033, 0x00000282,
+		0x03F, 0x0000000B,
+		0x033, 0x00000283,
+		0x03F, 0x0000000E,
+		0x033, 0x00000284,
+		0x03F, 0x0000002B,
+		0x033, 0x00000285,
+		0x03F, 0x0000002E,
+		0x033, 0x00000286,
+		0x03F, 0x0000006B,
+		0x033, 0x00000287,
+		0x03F, 0x0000006E,
+		0x033, 0x00000288,
+		0x03F, 0x00000071,
+		0x033, 0x00000289,
+		0x03F, 0x00000074,
+		0x033, 0x0000028A,
+		0x03F, 0x00000077,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000280,
+		0x03F, 0x00000005,
+		0x033, 0x00000281,
+		0x03F, 0x00000008,
+		0x033, 0x00000282,
+		0x03F, 0x0000000B,
+		0x033, 0x00000283,
+		0x03F, 0x0000000E,
+		0x033, 0x00000284,
+		0x03F, 0x0000002B,
+		0x033, 0x00000285,
+		0x03F, 0x0000002E,
+		0x033, 0x00000286,
+		0x03F, 0x0000006B,
+		0x033, 0x00000287,
+		0x03F, 0x0000006E,
+		0x033, 0x00000288,
+		0x03F, 0x00000071,
+		0x033, 0x00000289,
+		0x03F, 0x00000074,
+		0x033, 0x0000028A,
+		0x03F, 0x00000077,
+	0xA0000000,	0x00000000,
 		0x033, 0x00000280,
 		0x03F, 0x0000006A,
 		0x033, 0x00000281,
@@ -10001,6 +20042,104 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x00000CF4,
 		0x033, 0x0000028A,
 		0x03F, 0x00000CF7,
+	0xB0000000,	0x00000000,
+	0x83000015,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000300,
+		0x03F, 0x00000005,
+		0x033, 0x00000301,
+		0x03F, 0x00000008,
+		0x033, 0x00000302,
+		0x03F, 0x0000000B,
+		0x033, 0x00000303,
+		0x03F, 0x0000000E,
+		0x033, 0x00000304,
+		0x03F, 0x0000002B,
+		0x033, 0x00000305,
+		0x03F, 0x0000002E,
+		0x033, 0x00000306,
+		0x03F, 0x00000031,
+		0x033, 0x00000307,
+		0x03F, 0x00000034,
+		0x033, 0x00000308,
+		0x03F, 0x00000053,
+		0x033, 0x00000309,
+		0x03F, 0x00000056,
+		0x033, 0x0000030A,
+		0x03F, 0x000000D1,
+		0x0EE, 0x00000000,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000300,
+		0x03F, 0x00000005,
+		0x033, 0x00000301,
+		0x03F, 0x00000008,
+		0x033, 0x00000302,
+		0x03F, 0x0000000B,
+		0x033, 0x00000303,
+		0x03F, 0x0000000E,
+		0x033, 0x00000304,
+		0x03F, 0x0000002B,
+		0x033, 0x00000305,
+		0x03F, 0x0000002E,
+		0x033, 0x00000306,
+		0x03F, 0x00000031,
+		0x033, 0x00000307,
+		0x03F, 0x00000034,
+		0x033, 0x00000308,
+		0x03F, 0x00000053,
+		0x033, 0x00000309,
+		0x03F, 0x00000056,
+		0x033, 0x0000030A,
+		0x03F, 0x000000D1,
+		0x0EE, 0x00000000,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000300,
+		0x03F, 0x00000005,
+		0x033, 0x00000301,
+		0x03F, 0x00000008,
+		0x033, 0x00000302,
+		0x03F, 0x0000000B,
+		0x033, 0x00000303,
+		0x03F, 0x0000000E,
+		0x033, 0x00000304,
+		0x03F, 0x0000002B,
+		0x033, 0x00000305,
+		0x03F, 0x0000002E,
+		0x033, 0x00000306,
+		0x03F, 0x00000031,
+		0x033, 0x00000307,
+		0x03F, 0x00000034,
+		0x033, 0x00000308,
+		0x03F, 0x00000053,
+		0x033, 0x00000309,
+		0x03F, 0x00000056,
+		0x033, 0x0000030A,
+		0x03F, 0x000000D1,
+		0x0EE, 0x00000000,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000300,
+		0x03F, 0x00000005,
+		0x033, 0x00000301,
+		0x03F, 0x00000008,
+		0x033, 0x00000302,
+		0x03F, 0x0000000B,
+		0x033, 0x00000303,
+		0x03F, 0x0000000E,
+		0x033, 0x00000304,
+		0x03F, 0x0000002B,
+		0x033, 0x00000305,
+		0x03F, 0x0000002E,
+		0x033, 0x00000306,
+		0x03F, 0x00000031,
+		0x033, 0x00000307,
+		0x03F, 0x00000034,
+		0x033, 0x00000308,
+		0x03F, 0x00000053,
+		0x033, 0x00000309,
+		0x03F, 0x00000056,
+		0x033, 0x0000030A,
+		0x03F, 0x000000D1,
+		0x0EE, 0x00000000,
+	0xA0000000,	0x00000000,
 		0x033, 0x00000300,
 		0x03F, 0x0000006A,
 		0x033, 0x00000301,
@@ -10024,6 +20163,7 @@ static const u32 rtw8822c_rf_b[] = {
 		0x033, 0x0000030A,
 		0x03F, 0x00000CF7,
 		0x0EE, 0x00000000,
+	0xB0000000,	0x00000000,
 		0x051, 0x0003C800,
 	0x81000001,	0x00000000,	0x40000000,	0x00000000,
 		0x052, 0x000902CA,
@@ -10041,6 +20181,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x052, 0x000902CA,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x052, 0x000902CA,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x052, 0x000902CA,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x052, 0x000902CA,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x052, 0x000902CA,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x052, 0x000902CA,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x052, 0x000902CA,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x052, 0x000902CA,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x052, 0x000902CA,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x052, 0x000902CA,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x052, 0x000902CA,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x052, 0x000902CA,
 	0xA0000000,	0x00000000,
 		0x052, 0x000942C0,
 	0xB0000000,	0x00000000,
@@ -10057,6 +20217,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03E, 0x00000030,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03E, 0x00000030,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
 	0xA0000000,	0x00000000,
 		0x03E, 0x00000020,
 	0xB0000000,	0x00000000,
@@ -10076,6 +20256,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x000241C6,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x000241C6,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
 	0xA0000000,	0x00000000,
 		0x03F, 0x0000C246,
 	0xB0000000,	0x00000000,
@@ -10088,6 +20288,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03E, 0x00000030,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03E, 0x00000030,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
 	0xA0000000,	0x00000000,
 		0x03E, 0x00000020,
 	0xB0000000,	0x00000000,
@@ -10107,6 +20327,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x000241C6,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x000241C6,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
 	0xA0000000,	0x00000000,
 		0x03F, 0x0000C246,
 	0xB0000000,	0x00000000,
@@ -10128,6 +20368,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x0002C246,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x0002C246,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
 	0xA0000000,	0x00000000,
 		0x03F, 0x0000C246,
 	0xB0000000,	0x00000000,
@@ -10140,6 +20400,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03E, 0x00000030,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03E, 0x00000030,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
 	0xA0000000,	0x00000000,
 		0x03E, 0x00000020,
 	0xB0000000,	0x00000000,
@@ -10159,6 +20439,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x000241C6,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x000241C6,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
 	0xA0000000,	0x00000000,
 		0x03F, 0x0000C246,
 	0xB0000000,	0x00000000,
@@ -10171,6 +20471,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03E, 0x00000030,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03E, 0x00000030,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
 	0xA0000000,	0x00000000,
 		0x03E, 0x00000020,
 	0xB0000000,	0x00000000,
@@ -10190,6 +20510,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x000241C6,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x000241C6,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
 	0xA0000000,	0x00000000,
 		0x03F, 0x0000C246,
 	0xB0000000,	0x00000000,
@@ -10211,6 +20551,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x0002C246,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x0002C246,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
 	0xA0000000,	0x00000000,
 		0x03F, 0x0000C246,
 	0xB0000000,	0x00000000,
@@ -10223,6 +20583,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03E, 0x00000030,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03E, 0x00000030,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
 	0xA0000000,	0x00000000,
 		0x03E, 0x00000020,
 	0xB0000000,	0x00000000,
@@ -10242,6 +20622,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x000241C6,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x000241C6,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
 	0xA0000000,	0x00000000,
 		0x03F, 0x0000C246,
 	0xB0000000,	0x00000000,
@@ -10254,6 +20654,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03E, 0x00000030,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03E, 0x00000030,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
 	0xA0000000,	0x00000000,
 		0x03E, 0x00000020,
 	0xB0000000,	0x00000000,
@@ -10273,6 +20693,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x000241C6,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x000241C6,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
 	0xA0000000,	0x00000000,
 		0x03F, 0x0000C246,
 	0xB0000000,	0x00000000,
@@ -10294,6 +20734,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x0002C246,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x0002C246,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
 	0xA0000000,	0x00000000,
 		0x03F, 0x0000C246,
 	0xB0000000,	0x00000000,
@@ -10306,6 +20766,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03E, 0x00000030,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03E, 0x00000030,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
 	0xA0000000,	0x00000000,
 		0x03E, 0x00000020,
 	0xB0000000,	0x00000000,
@@ -10325,6 +20805,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x000241C6,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x000241C6,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00008E46,
 	0xB0000000,	0x00000000,
@@ -10337,6 +20837,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03E, 0x00000030,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03E, 0x00000030,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
 	0xA0000000,	0x00000000,
 		0x03E, 0x00000020,
 	0xB0000000,	0x00000000,
@@ -10356,6 +20876,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x000241C6,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x000241C6,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00008E46,
 	0xB0000000,	0x00000000,
@@ -10377,6 +20917,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x0002C246,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x0002C246,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00008E46,
 	0xB0000000,	0x00000000,
@@ -10385,9 +20945,29 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03E, 0x00000030,
 	0x93000002,	0x00000000,	0x40000000,	0x00000000,
 		0x03E, 0x00000030,
-	0x93000003,	0x00000000,	0x40000000,	0x00000000,
+	0x93000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
 		0x03E, 0x00000030,
-	0x93000004,	0x00000000,	0x40000000,	0x00000000,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
 		0x03E, 0x00000030,
 	0xA0000000,	0x00000000,
 		0x03E, 0x00000020,
@@ -10408,6 +20988,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x000241C6,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x000241C6,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00008E46,
 	0xB0000000,	0x00000000,
@@ -10420,6 +21020,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03E, 0x00000030,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03E, 0x00000030,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
 	0xA0000000,	0x00000000,
 		0x03E, 0x00000020,
 	0xB0000000,	0x00000000,
@@ -10439,6 +21059,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x000241C6,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x000241C6,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000241C6,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00008E46,
 	0xB0000000,	0x00000000,
@@ -10460,6 +21100,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x0002C246,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x0002C246,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002C246,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00008E46,
 	0xB0000000,	0x00000000,
@@ -10472,6 +21132,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03E, 0x00000030,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03E, 0x00000030,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
 	0xA0000000,	0x00000000,
 		0x03E, 0x00000020,
 	0xB0000000,	0x00000000,
@@ -10484,13 +21164,33 @@ static const u32 rtw8822c_rf_b[] = {
 	0x92000002,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00024246,
 	0x93000001,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x000241C6,
+		0x03F, 0x000209C6,
 	0x93000002,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x000241C6,
+		0x03F, 0x000209C6,
 	0x93000003,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x000241C6,
+		0x03F, 0x000209C6,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x000241C6,
+		0x03F, 0x000209C6,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00008E46,
 	0xB0000000,	0x00000000,
@@ -10503,6 +21203,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03E, 0x00000030,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03E, 0x00000030,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
 	0xA0000000,	0x00000000,
 		0x03E, 0x00000020,
 	0xB0000000,	0x00000000,
@@ -10515,13 +21235,33 @@ static const u32 rtw8822c_rf_b[] = {
 	0x92000002,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00024246,
 	0x93000001,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x000241C6,
+		0x03F, 0x000209C6,
 	0x93000002,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x000241C6,
+		0x03F, 0x000209C6,
 	0x93000003,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x000241C6,
+		0x03F, 0x000209C6,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x000241C6,
+		0x03F, 0x000209C6,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00008E46,
 	0xB0000000,	0x00000000,
@@ -10536,13 +21276,33 @@ static const u32 rtw8822c_rf_b[] = {
 	0x92000002,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x00024246,
 	0x93000001,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x0002C246,
+		0x03F, 0x0002CA46,
 	0x93000002,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x0002C246,
+		0x03F, 0x0002CA46,
 	0x93000003,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x0002C246,
+		0x03F, 0x0002CA46,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
-		0x03F, 0x0002C246,
+		0x03F, 0x0002CA46,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00008E46,
 	0xB0000000,	0x00000000,
@@ -10555,6 +21315,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03E, 0x00000030,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03E, 0x00000030,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
 	0xA0000000,	0x00000000,
 		0x03E, 0x00000020,
 	0xB0000000,	0x00000000,
@@ -10574,6 +21354,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x000209C6,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x000209C6,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00008E46,
 	0xB0000000,	0x00000000,
@@ -10586,6 +21386,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03E, 0x00000030,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03E, 0x00000030,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
 	0xA0000000,	0x00000000,
 		0x03E, 0x00000020,
 	0xB0000000,	0x00000000,
@@ -10605,6 +21425,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x000209C6,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x000209C6,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00008E46,
 	0xB0000000,	0x00000000,
@@ -10626,6 +21466,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x0002CA46,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x0002CA46,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00008E46,
 	0xB0000000,	0x00000000,
@@ -10638,6 +21498,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03E, 0x00000030,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03E, 0x00000030,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
 	0xA0000000,	0x00000000,
 		0x03E, 0x00000020,
 	0xB0000000,	0x00000000,
@@ -10657,6 +21537,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x000209C6,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x000209C6,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00008E46,
 	0xB0000000,	0x00000000,
@@ -10669,6 +21569,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03E, 0x00000030,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03E, 0x00000030,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
 	0xA0000000,	0x00000000,
 		0x03E, 0x00000020,
 	0xB0000000,	0x00000000,
@@ -10688,6 +21608,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x000209C6,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x000209C6,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00008E46,
 	0xB0000000,	0x00000000,
@@ -10709,6 +21649,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x0002CA46,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x0002CA46,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00008E46,
 	0xB0000000,	0x00000000,
@@ -10721,6 +21681,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03E, 0x00000030,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03E, 0x00000030,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
 	0xA0000000,	0x00000000,
 		0x03E, 0x00000020,
 	0xB0000000,	0x00000000,
@@ -10740,6 +21720,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x000209C6,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x000209C6,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00008E46,
 	0xB0000000,	0x00000000,
@@ -10752,6 +21752,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03E, 0x00000030,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03E, 0x00000030,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
 	0xA0000000,	0x00000000,
 		0x03E, 0x00000020,
 	0xB0000000,	0x00000000,
@@ -10765,11 +21785,31 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x0001CA46,
 	0x93000001,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x000209C6,
-	0x93000002,	0x00000000,	0x40000000,	0x00000000,
+	0x93000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x93000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x93000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x000209C6,
-	0x93000003,	0x00000000,	0x40000000,	0x00000000,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x000209C6,
-	0x93000004,	0x00000000,	0x40000000,	0x00000000,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x000209C6,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00008E46,
@@ -10792,6 +21832,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x0002CA46,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x0002CA46,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00008E46,
 	0xB0000000,	0x00000000,
@@ -10804,6 +21864,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03E, 0x00000030,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03E, 0x00000030,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
 	0xA0000000,	0x00000000,
 		0x03E, 0x00000020,
 	0xB0000000,	0x00000000,
@@ -10823,6 +21903,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x000209C6,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x000209C6,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00008E46,
 	0xB0000000,	0x00000000,
@@ -10835,6 +21935,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03E, 0x00000030,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03E, 0x00000030,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
 	0xA0000000,	0x00000000,
 		0x03E, 0x00000020,
 	0xB0000000,	0x00000000,
@@ -10854,6 +21974,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x000209C6,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x000209C6,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00008E46,
 	0xB0000000,	0x00000000,
@@ -10875,6 +22015,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x0002CA46,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x0002CA46,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00008E46,
 	0xB0000000,	0x00000000,
@@ -10887,6 +22047,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03E, 0x00000030,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03E, 0x00000030,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
 	0xA0000000,	0x00000000,
 		0x03E, 0x00000020,
 	0xB0000000,	0x00000000,
@@ -10906,6 +22086,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x000209C6,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x000209C6,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00008E46,
 	0xB0000000,	0x00000000,
@@ -10918,6 +22118,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03E, 0x00000030,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03E, 0x00000030,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
 	0xA0000000,	0x00000000,
 		0x03E, 0x00000020,
 	0xB0000000,	0x00000000,
@@ -10937,6 +22157,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x000209C6,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x000209C6,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00008E46,
 	0xB0000000,	0x00000000,
@@ -10958,6 +22198,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x0002CA46,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x0002CA46,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00008E46,
 	0xB0000000,	0x00000000,
@@ -10970,6 +22230,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03E, 0x00000030,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03E, 0x00000030,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
 	0xA0000000,	0x00000000,
 		0x03E, 0x00000020,
 	0xB0000000,	0x00000000,
@@ -10989,6 +22269,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x000209C6,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x000209C6,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00008E46,
 	0xB0000000,	0x00000000,
@@ -11001,6 +22301,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03E, 0x00000030,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03E, 0x00000030,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
 	0xA0000000,	0x00000000,
 		0x03E, 0x00000020,
 	0xB0000000,	0x00000000,
@@ -11020,6 +22340,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x000209C6,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x000209C6,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00008E46,
 	0xB0000000,	0x00000000,
@@ -11032,6 +22372,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03E, 0x00000020,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03E, 0x00000020,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000020,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000020,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000020,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000020,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000020,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000020,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000020,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000020,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000020,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000020,
 	0xA0000000,	0x00000000,
 		0x03E, 0x00000020,
 	0xB0000000,	0x00000000,
@@ -11051,6 +22411,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x0002CA46,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x0002CA46,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00008E46,
 	0xB0000000,	0x00000000,
@@ -11063,6 +22443,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03E, 0x00000030,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03E, 0x00000030,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
 	0xA0000000,	0x00000000,
 		0x03E, 0x00000020,
 	0xB0000000,	0x00000000,
@@ -11082,6 +22482,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x000209C6,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x000209C6,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00008E46,
 	0xB0000000,	0x00000000,
@@ -11094,6 +22514,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03E, 0x00000030,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03E, 0x00000030,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
 	0xA0000000,	0x00000000,
 		0x03E, 0x00000020,
 	0xB0000000,	0x00000000,
@@ -11113,6 +22553,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x000209C6,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x000209C6,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00008E46,
 	0xB0000000,	0x00000000,
@@ -11134,6 +22594,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x0002CA46,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x0002CA46,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00008E46,
 	0xB0000000,	0x00000000,
@@ -11146,6 +22626,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03E, 0x00000030,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03E, 0x00000030,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
 	0xA0000000,	0x00000000,
 		0x03E, 0x00000020,
 	0xB0000000,	0x00000000,
@@ -11165,6 +22665,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x000209C6,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x000209C6,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00008E46,
 	0xB0000000,	0x00000000,
@@ -11177,6 +22697,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03E, 0x00000030,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03E, 0x00000030,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03E, 0x00000030,
 	0xA0000000,	0x00000000,
 		0x03E, 0x00000020,
 	0xB0000000,	0x00000000,
@@ -11196,6 +22736,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x000209C6,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x000209C6,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x000209C6,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00008E46,
 	0xB0000000,	0x00000000,
@@ -11217,6 +22777,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x0002CA46,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x0002CA46,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0002CA46,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00008E46,
 	0xB0000000,	0x00000000,
@@ -11238,6 +22818,26 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x0001CA46,
 	0x93000004,	0x00000000,	0x40000000,	0x00000000,
 		0x03F, 0x0001CA46,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0001CA46,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0001CA46,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0001CA46,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0001CA46,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0001CA46,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0001CA46,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0001CA46,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0001CA46,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0001CA46,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
+		0x03F, 0x0001CA46,
 	0xA0000000,	0x00000000,
 		0x03F, 0x00008E46,
 	0xB0000000,	0x00000000,
@@ -11289,19 +22889,249 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x00000DF4,
 		0x033, 0x0000006A,
 		0x03F, 0x00000DF7,
-	0x92000001,	0x00000000,	0x40000000,	0x00000000,
+	0x92000001,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000060,
+		0x03F, 0x00000468,
+		0x033, 0x00000061,
+		0x03F, 0x00000868,
+		0x033, 0x00000062,
+		0x03F, 0x00000909,
+		0x033, 0x00000063,
+		0x03F, 0x00000D0A,
+		0x033, 0x00000064,
+		0x03F, 0x00000D4A,
+		0x033, 0x00000065,
+		0x03F, 0x00000D8B,
+		0x033, 0x00000066,
+		0x03F, 0x00000DEB,
+		0x033, 0x00000067,
+		0x03F, 0x00000DEE,
+		0x033, 0x00000068,
+		0x03F, 0x00000DF1,
+		0x033, 0x00000069,
+		0x03F, 0x00000DF4,
+		0x033, 0x0000006A,
+		0x03F, 0x00000DF7,
+	0x92000002,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000060,
+		0x03F, 0x00000468,
+		0x033, 0x00000061,
+		0x03F, 0x00000868,
+		0x033, 0x00000062,
+		0x03F, 0x00000909,
+		0x033, 0x00000063,
+		0x03F, 0x00000D0A,
+		0x033, 0x00000064,
+		0x03F, 0x00000D4A,
+		0x033, 0x00000065,
+		0x03F, 0x00000D8B,
+		0x033, 0x00000066,
+		0x03F, 0x00000DEB,
+		0x033, 0x00000067,
+		0x03F, 0x00000DEE,
+		0x033, 0x00000068,
+		0x03F, 0x00000DF1,
+		0x033, 0x00000069,
+		0x03F, 0x00000DF4,
+		0x033, 0x0000006A,
+		0x03F, 0x00000DF7,
+	0x93000001,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000060,
+		0x03F, 0x00000467,
+		0x033, 0x00000061,
+		0x03F, 0x00000867,
+		0x033, 0x00000062,
+		0x03F, 0x00000908,
+		0x033, 0x00000063,
+		0x03F, 0x00000D09,
+		0x033, 0x00000064,
+		0x03F, 0x00000D49,
+		0x033, 0x00000065,
+		0x03F, 0x00000D8A,
+		0x033, 0x00000066,
+		0x03F, 0x00000DEB,
+		0x033, 0x00000067,
+		0x03F, 0x00000DEE,
+		0x033, 0x00000068,
+		0x03F, 0x00000DF1,
+		0x033, 0x00000069,
+		0x03F, 0x00000DF4,
+		0x033, 0x0000006A,
+		0x03F, 0x00000DF7,
+	0x93000002,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000060,
+		0x03F, 0x00000467,
+		0x033, 0x00000061,
+		0x03F, 0x00000867,
+		0x033, 0x00000062,
+		0x03F, 0x00000908,
+		0x033, 0x00000063,
+		0x03F, 0x00000D09,
+		0x033, 0x00000064,
+		0x03F, 0x00000D49,
+		0x033, 0x00000065,
+		0x03F, 0x00000D8A,
+		0x033, 0x00000066,
+		0x03F, 0x00000DEB,
+		0x033, 0x00000067,
+		0x03F, 0x00000DEE,
+		0x033, 0x00000068,
+		0x03F, 0x00000DF1,
+		0x033, 0x00000069,
+		0x03F, 0x00000DF4,
+		0x033, 0x0000006A,
+		0x03F, 0x00000DF7,
+	0x93000003,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000060,
+		0x03F, 0x00000467,
+		0x033, 0x00000061,
+		0x03F, 0x00000867,
+		0x033, 0x00000062,
+		0x03F, 0x00000908,
+		0x033, 0x00000063,
+		0x03F, 0x00000D09,
+		0x033, 0x00000064,
+		0x03F, 0x00000D49,
+		0x033, 0x00000065,
+		0x03F, 0x00000D8A,
+		0x033, 0x00000066,
+		0x03F, 0x00000DEB,
+		0x033, 0x00000067,
+		0x03F, 0x00000DEE,
+		0x033, 0x00000068,
+		0x03F, 0x00000DF1,
+		0x033, 0x00000069,
+		0x03F, 0x00000DF4,
+		0x033, 0x0000006A,
+		0x03F, 0x00000DF7,
+	0x93000004,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000060,
+		0x03F, 0x00000467,
+		0x033, 0x00000061,
+		0x03F, 0x00000867,
+		0x033, 0x00000062,
+		0x03F, 0x00000908,
+		0x033, 0x00000063,
+		0x03F, 0x00000D09,
+		0x033, 0x00000064,
+		0x03F, 0x00000D49,
+		0x033, 0x00000065,
+		0x03F, 0x00000D8A,
+		0x033, 0x00000066,
+		0x03F, 0x00000DEB,
+		0x033, 0x00000067,
+		0x03F, 0x00000DEE,
+		0x033, 0x00000068,
+		0x03F, 0x00000DF1,
+		0x033, 0x00000069,
+		0x03F, 0x00000DF4,
+		0x033, 0x0000006A,
+		0x03F, 0x00000DF7,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000060,
+		0x03F, 0x00000467,
+		0x033, 0x00000061,
+		0x03F, 0x00000867,
+		0x033, 0x00000062,
+		0x03F, 0x00000908,
+		0x033, 0x00000063,
+		0x03F, 0x00000D09,
+		0x033, 0x00000064,
+		0x03F, 0x00000D49,
+		0x033, 0x00000065,
+		0x03F, 0x00000D8A,
+		0x033, 0x00000066,
+		0x03F, 0x00000DEB,
+		0x033, 0x00000067,
+		0x03F, 0x00000DEE,
+		0x033, 0x00000068,
+		0x03F, 0x00000DF1,
+		0x033, 0x00000069,
+		0x03F, 0x00000DF4,
+		0x033, 0x0000006A,
+		0x03F, 0x00000DF7,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000060,
+		0x03F, 0x00000467,
+		0x033, 0x00000061,
+		0x03F, 0x00000867,
+		0x033, 0x00000062,
+		0x03F, 0x00000908,
+		0x033, 0x00000063,
+		0x03F, 0x00000D09,
+		0x033, 0x00000064,
+		0x03F, 0x00000D49,
+		0x033, 0x00000065,
+		0x03F, 0x00000D8A,
+		0x033, 0x00000066,
+		0x03F, 0x00000DEB,
+		0x033, 0x00000067,
+		0x03F, 0x00000DEE,
+		0x033, 0x00000068,
+		0x03F, 0x00000DF1,
+		0x033, 0x00000069,
+		0x03F, 0x00000DF4,
+		0x033, 0x0000006A,
+		0x03F, 0x00000DF7,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000060,
+		0x03F, 0x00000467,
+		0x033, 0x00000061,
+		0x03F, 0x00000867,
+		0x033, 0x00000062,
+		0x03F, 0x00000908,
+		0x033, 0x00000063,
+		0x03F, 0x00000D09,
+		0x033, 0x00000064,
+		0x03F, 0x00000D49,
+		0x033, 0x00000065,
+		0x03F, 0x00000D8A,
+		0x033, 0x00000066,
+		0x03F, 0x00000DEB,
+		0x033, 0x00000067,
+		0x03F, 0x00000DEE,
+		0x033, 0x00000068,
+		0x03F, 0x00000DF1,
+		0x033, 0x00000069,
+		0x03F, 0x00000DF4,
+		0x033, 0x0000006A,
+		0x03F, 0x00000DF7,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000060,
+		0x03F, 0x00000467,
+		0x033, 0x00000061,
+		0x03F, 0x00000867,
+		0x033, 0x00000062,
+		0x03F, 0x00000908,
+		0x033, 0x00000063,
+		0x03F, 0x00000D09,
+		0x033, 0x00000064,
+		0x03F, 0x00000D49,
+		0x033, 0x00000065,
+		0x03F, 0x00000D8A,
+		0x033, 0x00000066,
+		0x03F, 0x00000DEB,
+		0x033, 0x00000067,
+		0x03F, 0x00000DEE,
+		0x033, 0x00000068,
+		0x03F, 0x00000DF1,
+		0x033, 0x00000069,
+		0x03F, 0x00000DF4,
+		0x033, 0x0000006A,
+		0x03F, 0x00000DF7,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
-		0x03F, 0x00000468,
+		0x03F, 0x00000467,
 		0x033, 0x00000061,
-		0x03F, 0x00000868,
+		0x03F, 0x00000867,
 		0x033, 0x00000062,
-		0x03F, 0x00000909,
+		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D0A,
+		0x03F, 0x00000D09,
 		0x033, 0x00000064,
-		0x03F, 0x00000D4A,
+		0x03F, 0x00000D49,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8B,
+		0x03F, 0x00000D8A,
 		0x033, 0x00000066,
 		0x03F, 0x00000DEB,
 		0x033, 0x00000067,
@@ -11312,19 +23142,19 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x00000DF4,
 		0x033, 0x0000006A,
 		0x03F, 0x00000DF7,
-	0x92000002,	0x00000000,	0x40000000,	0x00000000,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
-		0x03F, 0x00000468,
+		0x03F, 0x00000467,
 		0x033, 0x00000061,
-		0x03F, 0x00000868,
+		0x03F, 0x00000867,
 		0x033, 0x00000062,
-		0x03F, 0x00000909,
+		0x03F, 0x00000908,
 		0x033, 0x00000063,
-		0x03F, 0x00000D0A,
+		0x03F, 0x00000D09,
 		0x033, 0x00000064,
-		0x03F, 0x00000D4A,
+		0x03F, 0x00000D49,
 		0x033, 0x00000065,
-		0x03F, 0x00000D8B,
+		0x03F, 0x00000D8A,
 		0x033, 0x00000066,
 		0x03F, 0x00000DEB,
 		0x033, 0x00000067,
@@ -11335,7 +23165,7 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x00000DF4,
 		0x033, 0x0000006A,
 		0x03F, 0x00000DF7,
-	0x93000001,	0x00000000,	0x40000000,	0x00000000,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
 		0x033, 0x00000061,
@@ -11358,7 +23188,7 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x00000DF4,
 		0x033, 0x0000006A,
 		0x03F, 0x00000DF7,
-	0x93000002,	0x00000000,	0x40000000,	0x00000000,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
 		0x033, 0x00000061,
@@ -11381,7 +23211,7 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x00000DF4,
 		0x033, 0x0000006A,
 		0x03F, 0x00000DF7,
-	0x93000003,	0x00000000,	0x40000000,	0x00000000,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
 		0x033, 0x00000061,
@@ -11404,7 +23234,7 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x00000DF4,
 		0x033, 0x0000006A,
 		0x03F, 0x00000DF7,
-	0x93000004,	0x00000000,	0x40000000,	0x00000000,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000060,
 		0x03F, 0x00000467,
 		0x033, 0x00000061,
@@ -11476,17 +23306,247 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x00000DF7,
 	0x91000002,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000020,
-		0x03F, 0x00000468,
+		0x03F, 0x00000468,
+		0x033, 0x00000021,
+		0x03F, 0x00000868,
+		0x033, 0x00000022,
+		0x03F, 0x00000909,
+		0x033, 0x00000023,
+		0x03F, 0x00000D0A,
+		0x033, 0x00000024,
+		0x03F, 0x00000D4A,
+		0x033, 0x00000025,
+		0x03F, 0x00000D8B,
+		0x033, 0x00000026,
+		0x03F, 0x00000DEB,
+		0x033, 0x00000027,
+		0x03F, 0x00000DEE,
+		0x033, 0x00000028,
+		0x03F, 0x00000DF1,
+		0x033, 0x00000029,
+		0x03F, 0x00000DF4,
+		0x033, 0x0000002A,
+		0x03F, 0x00000DF7,
+	0x92000001,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000020,
+		0x03F, 0x00000468,
+		0x033, 0x00000021,
+		0x03F, 0x00000868,
+		0x033, 0x00000022,
+		0x03F, 0x00000909,
+		0x033, 0x00000023,
+		0x03F, 0x00000D0A,
+		0x033, 0x00000024,
+		0x03F, 0x00000D4A,
+		0x033, 0x00000025,
+		0x03F, 0x00000D8B,
+		0x033, 0x00000026,
+		0x03F, 0x00000DEB,
+		0x033, 0x00000027,
+		0x03F, 0x00000DEE,
+		0x033, 0x00000028,
+		0x03F, 0x00000DF1,
+		0x033, 0x00000029,
+		0x03F, 0x00000DF4,
+		0x033, 0x0000002A,
+		0x03F, 0x00000DF7,
+	0x92000002,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000020,
+		0x03F, 0x00000468,
+		0x033, 0x00000021,
+		0x03F, 0x00000868,
+		0x033, 0x00000022,
+		0x03F, 0x00000909,
+		0x033, 0x00000023,
+		0x03F, 0x00000D0A,
+		0x033, 0x00000024,
+		0x03F, 0x00000D4A,
+		0x033, 0x00000025,
+		0x03F, 0x00000D8B,
+		0x033, 0x00000026,
+		0x03F, 0x00000DEB,
+		0x033, 0x00000027,
+		0x03F, 0x00000DEE,
+		0x033, 0x00000028,
+		0x03F, 0x00000DF1,
+		0x033, 0x00000029,
+		0x03F, 0x00000DF4,
+		0x033, 0x0000002A,
+		0x03F, 0x00000DF7,
+	0x93000001,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000020,
+		0x03F, 0x00000467,
+		0x033, 0x00000021,
+		0x03F, 0x00000867,
+		0x033, 0x00000022,
+		0x03F, 0x00000908,
+		0x033, 0x00000023,
+		0x03F, 0x00000D09,
+		0x033, 0x00000024,
+		0x03F, 0x00000D49,
+		0x033, 0x00000025,
+		0x03F, 0x00000D8A,
+		0x033, 0x00000026,
+		0x03F, 0x00000DEB,
+		0x033, 0x00000027,
+		0x03F, 0x00000DEE,
+		0x033, 0x00000028,
+		0x03F, 0x00000DF1,
+		0x033, 0x00000029,
+		0x03F, 0x00000DF4,
+		0x033, 0x0000002A,
+		0x03F, 0x00000DF7,
+	0x93000002,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000020,
+		0x03F, 0x00000467,
+		0x033, 0x00000021,
+		0x03F, 0x00000867,
+		0x033, 0x00000022,
+		0x03F, 0x00000908,
+		0x033, 0x00000023,
+		0x03F, 0x00000D09,
+		0x033, 0x00000024,
+		0x03F, 0x00000D49,
+		0x033, 0x00000025,
+		0x03F, 0x00000D8A,
+		0x033, 0x00000026,
+		0x03F, 0x00000DEB,
+		0x033, 0x00000027,
+		0x03F, 0x00000DEE,
+		0x033, 0x00000028,
+		0x03F, 0x00000DF1,
+		0x033, 0x00000029,
+		0x03F, 0x00000DF4,
+		0x033, 0x0000002A,
+		0x03F, 0x00000DF7,
+	0x93000003,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000020,
+		0x03F, 0x00000467,
+		0x033, 0x00000021,
+		0x03F, 0x00000867,
+		0x033, 0x00000022,
+		0x03F, 0x00000908,
+		0x033, 0x00000023,
+		0x03F, 0x00000D09,
+		0x033, 0x00000024,
+		0x03F, 0x00000D49,
+		0x033, 0x00000025,
+		0x03F, 0x00000D8A,
+		0x033, 0x00000026,
+		0x03F, 0x00000DEB,
+		0x033, 0x00000027,
+		0x03F, 0x00000DEE,
+		0x033, 0x00000028,
+		0x03F, 0x00000DF1,
+		0x033, 0x00000029,
+		0x03F, 0x00000DF4,
+		0x033, 0x0000002A,
+		0x03F, 0x00000DF7,
+	0x93000004,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000020,
+		0x03F, 0x00000467,
+		0x033, 0x00000021,
+		0x03F, 0x00000867,
+		0x033, 0x00000022,
+		0x03F, 0x00000908,
+		0x033, 0x00000023,
+		0x03F, 0x00000D09,
+		0x033, 0x00000024,
+		0x03F, 0x00000D49,
+		0x033, 0x00000025,
+		0x03F, 0x00000D8A,
+		0x033, 0x00000026,
+		0x03F, 0x00000DEB,
+		0x033, 0x00000027,
+		0x03F, 0x00000DEE,
+		0x033, 0x00000028,
+		0x03F, 0x00000DF1,
+		0x033, 0x00000029,
+		0x03F, 0x00000DF4,
+		0x033, 0x0000002A,
+		0x03F, 0x00000DF7,
+	0x93000005,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000020,
+		0x03F, 0x00000467,
+		0x033, 0x00000021,
+		0x03F, 0x00000867,
+		0x033, 0x00000022,
+		0x03F, 0x00000908,
+		0x033, 0x00000023,
+		0x03F, 0x00000D09,
+		0x033, 0x00000024,
+		0x03F, 0x00000D49,
+		0x033, 0x00000025,
+		0x03F, 0x00000D8A,
+		0x033, 0x00000026,
+		0x03F, 0x00000DEB,
+		0x033, 0x00000027,
+		0x03F, 0x00000DEE,
+		0x033, 0x00000028,
+		0x03F, 0x00000DF1,
+		0x033, 0x00000029,
+		0x03F, 0x00000DF4,
+		0x033, 0x0000002A,
+		0x03F, 0x00000DF7,
+	0x93000015,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000020,
+		0x03F, 0x00000467,
+		0x033, 0x00000021,
+		0x03F, 0x00000867,
+		0x033, 0x00000022,
+		0x03F, 0x00000908,
+		0x033, 0x00000023,
+		0x03F, 0x00000D09,
+		0x033, 0x00000024,
+		0x03F, 0x00000D49,
+		0x033, 0x00000025,
+		0x03F, 0x00000D8A,
+		0x033, 0x00000026,
+		0x03F, 0x00000DEB,
+		0x033, 0x00000027,
+		0x03F, 0x00000DEE,
+		0x033, 0x00000028,
+		0x03F, 0x00000DF1,
+		0x033, 0x00000029,
+		0x03F, 0x00000DF4,
+		0x033, 0x0000002A,
+		0x03F, 0x00000DF7,
+	0x93000016,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000020,
+		0x03F, 0x00000467,
+		0x033, 0x00000021,
+		0x03F, 0x00000867,
+		0x033, 0x00000022,
+		0x03F, 0x00000908,
+		0x033, 0x00000023,
+		0x03F, 0x00000D09,
+		0x033, 0x00000024,
+		0x03F, 0x00000D49,
+		0x033, 0x00000025,
+		0x03F, 0x00000D8A,
+		0x033, 0x00000026,
+		0x03F, 0x00000DEB,
+		0x033, 0x00000027,
+		0x03F, 0x00000DEE,
+		0x033, 0x00000028,
+		0x03F, 0x00000DF1,
+		0x033, 0x00000029,
+		0x03F, 0x00000DF4,
+		0x033, 0x0000002A,
+		0x03F, 0x00000DF7,
+	0x94000001,	0x00000000,	0x40000000,	0x00000000,
+		0x033, 0x00000020,
+		0x03F, 0x00000467,
 		0x033, 0x00000021,
-		0x03F, 0x00000868,
+		0x03F, 0x00000867,
 		0x033, 0x00000022,
-		0x03F, 0x00000909,
+		0x03F, 0x00000908,
 		0x033, 0x00000023,
-		0x03F, 0x00000D0A,
+		0x03F, 0x00000D09,
 		0x033, 0x00000024,
-		0x03F, 0x00000D4A,
+		0x03F, 0x00000D49,
 		0x033, 0x00000025,
-		0x03F, 0x00000D8B,
+		0x03F, 0x00000D8A,
 		0x033, 0x00000026,
 		0x03F, 0x00000DEB,
 		0x033, 0x00000027,
@@ -11497,19 +23557,19 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x00000DF4,
 		0x033, 0x0000002A,
 		0x03F, 0x00000DF7,
-	0x92000001,	0x00000000,	0x40000000,	0x00000000,
+	0x94000002,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000020,
-		0x03F, 0x00000468,
+		0x03F, 0x00000467,
 		0x033, 0x00000021,
-		0x03F, 0x00000868,
+		0x03F, 0x00000867,
 		0x033, 0x00000022,
-		0x03F, 0x00000909,
+		0x03F, 0x00000908,
 		0x033, 0x00000023,
-		0x03F, 0x00000D0A,
+		0x03F, 0x00000D09,
 		0x033, 0x00000024,
-		0x03F, 0x00000D4A,
+		0x03F, 0x00000D49,
 		0x033, 0x00000025,
-		0x03F, 0x00000D8B,
+		0x03F, 0x00000D8A,
 		0x033, 0x00000026,
 		0x03F, 0x00000DEB,
 		0x033, 0x00000027,
@@ -11520,19 +23580,19 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x00000DF4,
 		0x033, 0x0000002A,
 		0x03F, 0x00000DF7,
-	0x92000002,	0x00000000,	0x40000000,	0x00000000,
+	0x94000003,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000020,
-		0x03F, 0x00000468,
+		0x03F, 0x00000467,
 		0x033, 0x00000021,
-		0x03F, 0x00000868,
+		0x03F, 0x00000867,
 		0x033, 0x00000022,
-		0x03F, 0x00000909,
+		0x03F, 0x00000908,
 		0x033, 0x00000023,
-		0x03F, 0x00000D0A,
+		0x03F, 0x00000D09,
 		0x033, 0x00000024,
-		0x03F, 0x00000D4A,
+		0x03F, 0x00000D49,
 		0x033, 0x00000025,
-		0x03F, 0x00000D8B,
+		0x03F, 0x00000D8A,
 		0x033, 0x00000026,
 		0x03F, 0x00000DEB,
 		0x033, 0x00000027,
@@ -11543,7 +23603,7 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x00000DF4,
 		0x033, 0x0000002A,
 		0x03F, 0x00000DF7,
-	0x93000001,	0x00000000,	0x40000000,	0x00000000,
+	0x94000004,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000020,
 		0x03F, 0x00000467,
 		0x033, 0x00000021,
@@ -11566,7 +23626,7 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x00000DF4,
 		0x033, 0x0000002A,
 		0x03F, 0x00000DF7,
-	0x93000002,	0x00000000,	0x40000000,	0x00000000,
+	0x94000005,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000020,
 		0x03F, 0x00000467,
 		0x033, 0x00000021,
@@ -11589,7 +23649,7 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x00000DF4,
 		0x033, 0x0000002A,
 		0x03F, 0x00000DF7,
-	0x93000003,	0x00000000,	0x40000000,	0x00000000,
+	0x94000015,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000020,
 		0x03F, 0x00000467,
 		0x033, 0x00000021,
@@ -11612,7 +23672,7 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x00000DF4,
 		0x033, 0x0000002A,
 		0x03F, 0x00000DF7,
-	0x93000004,	0x00000000,	0x40000000,	0x00000000,
+	0x94000016,	0x00000000,	0x40000000,	0x00000000,
 		0x033, 0x00000020,
 		0x03F, 0x00000467,
 		0x033, 0x00000021,
@@ -11660,7 +23720,7 @@ static const u32 rtw8822c_rf_b[] = {
 		0x03F, 0x00000DF7,
 	0xB0000000,	0x00000000,
 		0x0EE, 0x00000000,
-		0x05C, 0x000FCC00,
+		0x05C, 0x000FC000,
 		0x067, 0x0000A505,
 		0x0D3, 0x00000542,
 		0x043, 0x00005000,
@@ -11710,6 +23770,10 @@ static const u32 rtw8822c_rf_b[] = {
 		0x033, 0x00000007,
 		0x03F, 0x00000002,
 		0x0EF, 0x00000000,
+		0x0EF, 0x00080000,
+		0x033, 0x00000001,
+		0x03F, 0x000916BF,
+		0x0EF, 0x00000000,
 };
 
 RTW_DECL_TABLE_RF_RADIO(rtw8822c_rf_b, B);
@@ -11717,394 +23781,1961 @@ RTW_DECL_TABLE_RF_RADIO(rtw8822c_rf_b, B);
 static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 0, 0, 0, 0, 1, 72, },
 	{ 2, 0, 0, 0, 1, 60, },
+	{ 1, 0, 0, 0, 1, 68, },
+	{ 3, 0, 0, 0, 1, 72, },
+	{ 4, 0, 0, 0, 1, 76, },
+	{ 5, 0, 0, 0, 1, 60, },
+	{ 6, 0, 0, 0, 1, 72, },
+	{ 7, 0, 0, 0, 1, 60, },
+	{ 8, 0, 0, 0, 1, 72, },
+	{ 9, 0, 0, 0, 1, 60, },
 	{ 0, 0, 0, 0, 2, 72, },
 	{ 2, 0, 0, 0, 2, 60, },
+	{ 1, 0, 0, 0, 2, 68, },
+	{ 3, 0, 0, 0, 2, 72, },
+	{ 4, 0, 0, 0, 2, 76, },
+	{ 5, 0, 0, 0, 2, 60, },
+	{ 6, 0, 0, 0, 2, 72, },
+	{ 7, 0, 0, 0, 2, 60, },
+	{ 8, 0, 0, 0, 2, 72, },
+	{ 9, 0, 0, 0, 2, 60, },
 	{ 0, 0, 0, 0, 3, 76, },
 	{ 2, 0, 0, 0, 3, 60, },
+	{ 1, 0, 0, 0, 3, 68, },
+	{ 3, 0, 0, 0, 3, 76, },
+	{ 4, 0, 0, 0, 3, 76, },
+	{ 5, 0, 0, 0, 3, 60, },
+	{ 6, 0, 0, 0, 3, 76, },
+	{ 7, 0, 0, 0, 3, 60, },
+	{ 8, 0, 0, 0, 3, 76, },
+	{ 9, 0, 0, 0, 3, 60, },
 	{ 0, 0, 0, 0, 4, 76, },
 	{ 2, 0, 0, 0, 4, 60, },
+	{ 1, 0, 0, 0, 4, 68, },
+	{ 3, 0, 0, 0, 4, 76, },
+	{ 4, 0, 0, 0, 4, 76, },
+	{ 5, 0, 0, 0, 4, 60, },
+	{ 6, 0, 0, 0, 4, 76, },
+	{ 7, 0, 0, 0, 4, 60, },
+	{ 8, 0, 0, 0, 4, 76, },
+	{ 9, 0, 0, 0, 4, 60, },
 	{ 0, 0, 0, 0, 5, 76, },
 	{ 2, 0, 0, 0, 5, 60, },
+	{ 1, 0, 0, 0, 5, 68, },
+	{ 3, 0, 0, 0, 5, 76, },
+	{ 4, 0, 0, 0, 5, 76, },
+	{ 5, 0, 0, 0, 5, 60, },
+	{ 6, 0, 0, 0, 5, 76, },
+	{ 7, 0, 0, 0, 5, 60, },
+	{ 8, 0, 0, 0, 5, 76, },
+	{ 9, 0, 0, 0, 5, 60, },
 	{ 0, 0, 0, 0, 6, 76, },
 	{ 2, 0, 0, 0, 6, 60, },
+	{ 1, 0, 0, 0, 6, 68, },
+	{ 3, 0, 0, 0, 6, 76, },
+	{ 4, 0, 0, 0, 6, 76, },
+	{ 5, 0, 0, 0, 6, 60, },
+	{ 6, 0, 0, 0, 6, 76, },
+	{ 7, 0, 0, 0, 6, 60, },
+	{ 8, 0, 0, 0, 6, 76, },
+	{ 9, 0, 0, 0, 6, 60, },
 	{ 0, 0, 0, 0, 7, 76, },
 	{ 2, 0, 0, 0, 7, 60, },
+	{ 1, 0, 0, 0, 7, 68, },
+	{ 3, 0, 0, 0, 7, 76, },
+	{ 4, 0, 0, 0, 7, 76, },
+	{ 5, 0, 0, 0, 7, 60, },
+	{ 6, 0, 0, 0, 7, 76, },
+	{ 7, 0, 0, 0, 7, 60, },
+	{ 8, 0, 0, 0, 7, 76, },
+	{ 9, 0, 0, 0, 7, 60, },
 	{ 0, 0, 0, 0, 8, 76, },
 	{ 2, 0, 0, 0, 8, 60, },
+	{ 1, 0, 0, 0, 8, 68, },
+	{ 3, 0, 0, 0, 8, 76, },
+	{ 4, 0, 0, 0, 8, 76, },
+	{ 5, 0, 0, 0, 8, 60, },
+	{ 6, 0, 0, 0, 8, 76, },
+	{ 7, 0, 0, 0, 8, 60, },
+	{ 8, 0, 0, 0, 8, 76, },
+	{ 9, 0, 0, 0, 8, 60, },
 	{ 0, 0, 0, 0, 9, 76, },
 	{ 2, 0, 0, 0, 9, 60, },
+	{ 1, 0, 0, 0, 9, 68, },
+	{ 3, 0, 0, 0, 9, 76, },
+	{ 4, 0, 0, 0, 9, 76, },
+	{ 5, 0, 0, 0, 9, 60, },
+	{ 6, 0, 0, 0, 9, 76, },
+	{ 7, 0, 0, 0, 9, 60, },
+	{ 8, 0, 0, 0, 9, 76, },
+	{ 9, 0, 0, 0, 9, 60, },
 	{ 0, 0, 0, 0, 10, 72, },
 	{ 2, 0, 0, 0, 10, 60, },
+	{ 1, 0, 0, 0, 10, 68, },
+	{ 3, 0, 0, 0, 10, 72, },
+	{ 4, 0, 0, 0, 10, 76, },
+	{ 5, 0, 0, 0, 10, 60, },
+	{ 6, 0, 0, 0, 10, 72, },
+	{ 7, 0, 0, 0, 10, 60, },
+	{ 8, 0, 0, 0, 10, 72, },
+	{ 9, 0, 0, 0, 10, 60, },
 	{ 0, 0, 0, 0, 11, 72, },
 	{ 2, 0, 0, 0, 11, 60, },
+	{ 1, 0, 0, 0, 11, 68, },
+	{ 3, 0, 0, 0, 11, 72, },
+	{ 4, 0, 0, 0, 11, 76, },
+	{ 5, 0, 0, 0, 11, 60, },
+	{ 6, 0, 0, 0, 11, 72, },
+	{ 7, 0, 0, 0, 11, 60, },
+	{ 8, 0, 0, 0, 11, 72, },
+	{ 9, 0, 0, 0, 11, 60, },
 	{ 0, 0, 0, 0, 12, 52, },
 	{ 2, 0, 0, 0, 12, 60, },
+	{ 1, 0, 0, 0, 12, 68, },
+	{ 3, 0, 0, 0, 12, 52, },
+	{ 4, 0, 0, 0, 12, 76, },
+	{ 5, 0, 0, 0, 12, 60, },
+	{ 6, 0, 0, 0, 12, 52, },
+	{ 7, 0, 0, 0, 12, 60, },
+	{ 8, 0, 0, 0, 12, 52, },
+	{ 9, 0, 0, 0, 12, 60, },
 	{ 0, 0, 0, 0, 13, 48, },
 	{ 2, 0, 0, 0, 13, 60, },
+	{ 1, 0, 0, 0, 13, 68, },
+	{ 3, 0, 0, 0, 13, 48, },
+	{ 4, 0, 0, 0, 13, 76, },
+	{ 5, 0, 0, 0, 13, 60, },
+	{ 6, 0, 0, 0, 13, 48, },
+	{ 7, 0, 0, 0, 13, 60, },
+	{ 8, 0, 0, 0, 13, 48, },
+	{ 9, 0, 0, 0, 13, 60, },
 	{ 0, 0, 0, 0, 14, 127, },
 	{ 2, 0, 0, 0, 14, 127, },
+	{ 1, 0, 0, 0, 14, 68, },
+	{ 3, 0, 0, 0, 14, 127, },
+	{ 4, 0, 0, 0, 14, 127, },
+	{ 5, 0, 0, 0, 14, 127, },
+	{ 6, 0, 0, 0, 14, 127, },
+	{ 7, 0, 0, 0, 14, 127, },
+	{ 8, 0, 0, 0, 14, 127, },
+	{ 9, 0, 0, 0, 14, 127, },
 	{ 0, 0, 0, 1, 1, 52, },
 	{ 2, 0, 0, 1, 1, 60, },
+	{ 1, 0, 0, 1, 1, 76, },
+	{ 3, 0, 0, 1, 1, 52, },
+	{ 4, 0, 0, 1, 1, 76, },
+	{ 5, 0, 0, 1, 1, 60, },
+	{ 6, 0, 0, 1, 1, 52, },
+	{ 7, 0, 0, 1, 1, 60, },
+	{ 8, 0, 0, 1, 1, 52, },
+	{ 9, 0, 0, 1, 1, 60, },
 	{ 0, 0, 0, 1, 2, 60, },
 	{ 2, 0, 0, 1, 2, 60, },
+	{ 1, 0, 0, 1, 2, 76, },
+	{ 3, 0, 0, 1, 2, 60, },
+	{ 4, 0, 0, 1, 2, 76, },
+	{ 5, 0, 0, 1, 2, 60, },
+	{ 6, 0, 0, 1, 2, 60, },
+	{ 7, 0, 0, 1, 2, 60, },
+	{ 8, 0, 0, 1, 2, 60, },
+	{ 9, 0, 0, 1, 2, 60, },
 	{ 0, 0, 0, 1, 3, 64, },
 	{ 2, 0, 0, 1, 3, 60, },
+	{ 1, 0, 0, 1, 3, 76, },
+	{ 3, 0, 0, 1, 3, 64, },
+	{ 4, 0, 0, 1, 3, 76, },
+	{ 5, 0, 0, 1, 3, 60, },
+	{ 6, 0, 0, 1, 3, 64, },
+	{ 7, 0, 0, 1, 3, 60, },
+	{ 8, 0, 0, 1, 3, 64, },
+	{ 9, 0, 0, 1, 3, 60, },
 	{ 0, 0, 0, 1, 4, 68, },
 	{ 2, 0, 0, 1, 4, 60, },
+	{ 1, 0, 0, 1, 4, 76, },
+	{ 3, 0, 0, 1, 4, 68, },
+	{ 4, 0, 0, 1, 4, 76, },
+	{ 5, 0, 0, 1, 4, 60, },
+	{ 6, 0, 0, 1, 4, 68, },
+	{ 7, 0, 0, 1, 4, 60, },
+	{ 8, 0, 0, 1, 4, 68, },
+	{ 9, 0, 0, 1, 4, 60, },
 	{ 0, 0, 0, 1, 5, 76, },
 	{ 2, 0, 0, 1, 5, 60, },
+	{ 1, 0, 0, 1, 5, 76, },
+	{ 3, 0, 0, 1, 5, 76, },
+	{ 4, 0, 0, 1, 5, 76, },
+	{ 5, 0, 0, 1, 5, 60, },
+	{ 6, 0, 0, 1, 5, 76, },
+	{ 7, 0, 0, 1, 5, 60, },
+	{ 8, 0, 0, 1, 5, 76, },
+	{ 9, 0, 0, 1, 5, 60, },
 	{ 0, 0, 0, 1, 6, 76, },
 	{ 2, 0, 0, 1, 6, 60, },
+	{ 1, 0, 0, 1, 6, 76, },
+	{ 3, 0, 0, 1, 6, 76, },
+	{ 4, 0, 0, 1, 6, 76, },
+	{ 5, 0, 0, 1, 6, 60, },
+	{ 6, 0, 0, 1, 6, 76, },
+	{ 7, 0, 0, 1, 6, 60, },
+	{ 8, 0, 0, 1, 6, 76, },
+	{ 9, 0, 0, 1, 6, 60, },
 	{ 0, 0, 0, 1, 7, 76, },
 	{ 2, 0, 0, 1, 7, 60, },
+	{ 1, 0, 0, 1, 7, 76, },
+	{ 3, 0, 0, 1, 7, 76, },
+	{ 4, 0, 0, 1, 7, 76, },
+	{ 5, 0, 0, 1, 7, 60, },
+	{ 6, 0, 0, 1, 7, 76, },
+	{ 7, 0, 0, 1, 7, 60, },
+	{ 8, 0, 0, 1, 7, 76, },
+	{ 9, 0, 0, 1, 7, 60, },
 	{ 0, 0, 0, 1, 8, 68, },
 	{ 2, 0, 0, 1, 8, 60, },
+	{ 1, 0, 0, 1, 8, 76, },
+	{ 3, 0, 0, 1, 8, 68, },
+	{ 4, 0, 0, 1, 8, 76, },
+	{ 5, 0, 0, 1, 8, 60, },
+	{ 6, 0, 0, 1, 8, 68, },
+	{ 7, 0, 0, 1, 8, 60, },
+	{ 8, 0, 0, 1, 8, 68, },
+	{ 9, 0, 0, 1, 8, 60, },
 	{ 0, 0, 0, 1, 9, 64, },
 	{ 2, 0, 0, 1, 9, 60, },
+	{ 1, 0, 0, 1, 9, 76, },
+	{ 3, 0, 0, 1, 9, 64, },
+	{ 4, 0, 0, 1, 9, 76, },
+	{ 5, 0, 0, 1, 9, 60, },
+	{ 6, 0, 0, 1, 9, 64, },
+	{ 7, 0, 0, 1, 9, 60, },
+	{ 8, 0, 0, 1, 9, 64, },
+	{ 9, 0, 0, 1, 9, 60, },
 	{ 0, 0, 0, 1, 10, 60, },
 	{ 2, 0, 0, 1, 10, 60, },
+	{ 1, 0, 0, 1, 10, 76, },
+	{ 3, 0, 0, 1, 10, 60, },
+	{ 4, 0, 0, 1, 10, 76, },
+	{ 5, 0, 0, 1, 10, 60, },
+	{ 6, 0, 0, 1, 10, 60, },
+	{ 7, 0, 0, 1, 10, 60, },
+	{ 8, 0, 0, 1, 10, 60, },
+	{ 9, 0, 0, 1, 10, 60, },
 	{ 0, 0, 0, 1, 11, 52, },
 	{ 2, 0, 0, 1, 11, 60, },
+	{ 1, 0, 0, 1, 11, 76, },
+	{ 3, 0, 0, 1, 11, 52, },
+	{ 4, 0, 0, 1, 11, 76, },
+	{ 5, 0, 0, 1, 11, 60, },
+	{ 6, 0, 0, 1, 11, 52, },
+	{ 7, 0, 0, 1, 11, 60, },
+	{ 8, 0, 0, 1, 11, 52, },
+	{ 9, 0, 0, 1, 11, 60, },
 	{ 0, 0, 0, 1, 12, 40, },
 	{ 2, 0, 0, 1, 12, 60, },
+	{ 1, 0, 0, 1, 12, 76, },
+	{ 3, 0, 0, 1, 12, 40, },
+	{ 4, 0, 0, 1, 12, 76, },
+	{ 5, 0, 0, 1, 12, 60, },
+	{ 6, 0, 0, 1, 12, 40, },
+	{ 7, 0, 0, 1, 12, 60, },
+	{ 8, 0, 0, 1, 12, 40, },
+	{ 9, 0, 0, 1, 12, 60, },
 	{ 0, 0, 0, 1, 13, 28, },
 	{ 2, 0, 0, 1, 13, 60, },
+	{ 1, 0, 0, 1, 13, 76, },
+	{ 3, 0, 0, 1, 13, 28, },
+	{ 4, 0, 0, 1, 13, 70, },
+	{ 5, 0, 0, 1, 13, 60, },
+	{ 6, 0, 0, 1, 13, 28, },
+	{ 7, 0, 0, 1, 13, 60, },
+	{ 8, 0, 0, 1, 13, 28, },
+	{ 9, 0, 0, 1, 13, 60, },
 	{ 0, 0, 0, 1, 14, 127, },
 	{ 2, 0, 0, 1, 14, 127, },
+	{ 1, 0, 0, 1, 14, 127, },
+	{ 3, 0, 0, 1, 14, 127, },
+	{ 4, 0, 0, 1, 14, 127, },
+	{ 5, 0, 0, 1, 14, 127, },
+	{ 6, 0, 0, 1, 14, 127, },
+	{ 7, 0, 0, 1, 14, 127, },
+	{ 8, 0, 0, 1, 14, 127, },
+	{ 9, 0, 0, 1, 14, 127, },
 	{ 0, 0, 0, 2, 1, 52, },
 	{ 2, 0, 0, 2, 1, 60, },
+	{ 1, 0, 0, 2, 1, 76, },
+	{ 3, 0, 0, 2, 1, 52, },
+	{ 4, 0, 0, 2, 1, 76, },
+	{ 5, 0, 0, 2, 1, 60, },
+	{ 6, 0, 0, 2, 1, 52, },
+	{ 7, 0, 0, 2, 1, 60, },
+	{ 8, 0, 0, 2, 1, 52, },
+	{ 9, 0, 0, 2, 1, 60, },
 	{ 0, 0, 0, 2, 2, 60, },
 	{ 2, 0, 0, 2, 2, 60, },
+	{ 1, 0, 0, 2, 2, 76, },
+	{ 3, 0, 0, 2, 2, 60, },
+	{ 4, 0, 0, 2, 2, 76, },
+	{ 5, 0, 0, 2, 2, 60, },
+	{ 6, 0, 0, 2, 2, 60, },
+	{ 7, 0, 0, 2, 2, 60, },
+	{ 8, 0, 0, 2, 2, 60, },
+	{ 9, 0, 0, 2, 2, 60, },
 	{ 0, 0, 0, 2, 3, 64, },
 	{ 2, 0, 0, 2, 3, 60, },
+	{ 1, 0, 0, 2, 3, 76, },
+	{ 3, 0, 0, 2, 3, 64, },
+	{ 4, 0, 0, 2, 3, 76, },
+	{ 5, 0, 0, 2, 3, 60, },
+	{ 6, 0, 0, 2, 3, 64, },
+	{ 7, 0, 0, 2, 3, 60, },
+	{ 8, 0, 0, 2, 3, 64, },
+	{ 9, 0, 0, 2, 3, 60, },
 	{ 0, 0, 0, 2, 4, 68, },
 	{ 2, 0, 0, 2, 4, 60, },
+	{ 1, 0, 0, 2, 4, 76, },
+	{ 3, 0, 0, 2, 4, 68, },
+	{ 4, 0, 0, 2, 4, 76, },
+	{ 5, 0, 0, 2, 4, 60, },
+	{ 6, 0, 0, 2, 4, 68, },
+	{ 7, 0, 0, 2, 4, 60, },
+	{ 8, 0, 0, 2, 4, 68, },
+	{ 9, 0, 0, 2, 4, 60, },
 	{ 0, 0, 0, 2, 5, 76, },
 	{ 2, 0, 0, 2, 5, 60, },
+	{ 1, 0, 0, 2, 5, 76, },
+	{ 3, 0, 0, 2, 5, 76, },
+	{ 4, 0, 0, 2, 5, 76, },
+	{ 5, 0, 0, 2, 5, 60, },
+	{ 6, 0, 0, 2, 5, 76, },
+	{ 7, 0, 0, 2, 5, 60, },
+	{ 8, 0, 0, 2, 5, 76, },
+	{ 9, 0, 0, 2, 5, 60, },
 	{ 0, 0, 0, 2, 6, 76, },
 	{ 2, 0, 0, 2, 6, 60, },
+	{ 1, 0, 0, 2, 6, 76, },
+	{ 3, 0, 0, 2, 6, 76, },
+	{ 4, 0, 0, 2, 6, 76, },
+	{ 5, 0, 0, 2, 6, 60, },
+	{ 6, 0, 0, 2, 6, 76, },
+	{ 7, 0, 0, 2, 6, 60, },
+	{ 8, 0, 0, 2, 6, 76, },
+	{ 9, 0, 0, 2, 6, 60, },
 	{ 0, 0, 0, 2, 7, 76, },
 	{ 2, 0, 0, 2, 7, 60, },
+	{ 1, 0, 0, 2, 7, 76, },
+	{ 3, 0, 0, 2, 7, 76, },
+	{ 4, 0, 0, 2, 7, 76, },
+	{ 5, 0, 0, 2, 7, 60, },
+	{ 6, 0, 0, 2, 7, 76, },
+	{ 7, 0, 0, 2, 7, 60, },
+	{ 8, 0, 0, 2, 7, 76, },
+	{ 9, 0, 0, 2, 7, 60, },
 	{ 0, 0, 0, 2, 8, 68, },
 	{ 2, 0, 0, 2, 8, 60, },
+	{ 1, 0, 0, 2, 8, 76, },
+	{ 3, 0, 0, 2, 8, 68, },
+	{ 4, 0, 0, 2, 8, 76, },
+	{ 5, 0, 0, 2, 8, 60, },
+	{ 6, 0, 0, 2, 8, 68, },
+	{ 7, 0, 0, 2, 8, 60, },
+	{ 8, 0, 0, 2, 8, 68, },
+	{ 9, 0, 0, 2, 8, 60, },
 	{ 0, 0, 0, 2, 9, 64, },
 	{ 2, 0, 0, 2, 9, 60, },
+	{ 1, 0, 0, 2, 9, 76, },
+	{ 3, 0, 0, 2, 9, 64, },
+	{ 4, 0, 0, 2, 9, 76, },
+	{ 5, 0, 0, 2, 9, 60, },
+	{ 6, 0, 0, 2, 9, 64, },
+	{ 7, 0, 0, 2, 9, 60, },
+	{ 8, 0, 0, 2, 9, 64, },
+	{ 9, 0, 0, 2, 9, 60, },
 	{ 0, 0, 0, 2, 10, 60, },
 	{ 2, 0, 0, 2, 10, 60, },
+	{ 1, 0, 0, 2, 10, 76, },
+	{ 3, 0, 0, 2, 10, 60, },
+	{ 4, 0, 0, 2, 10, 76, },
+	{ 5, 0, 0, 2, 10, 60, },
+	{ 6, 0, 0, 2, 10, 60, },
+	{ 7, 0, 0, 2, 10, 60, },
+	{ 8, 0, 0, 2, 10, 60, },
+	{ 9, 0, 0, 2, 10, 60, },
 	{ 0, 0, 0, 2, 11, 52, },
 	{ 2, 0, 0, 2, 11, 60, },
+	{ 1, 0, 0, 2, 11, 76, },
+	{ 3, 0, 0, 2, 11, 52, },
+	{ 4, 0, 0, 2, 11, 76, },
+	{ 5, 0, 0, 2, 11, 60, },
+	{ 6, 0, 0, 2, 11, 52, },
+	{ 7, 0, 0, 2, 11, 60, },
+	{ 8, 0, 0, 2, 11, 52, },
+	{ 9, 0, 0, 2, 11, 60, },
 	{ 0, 0, 0, 2, 12, 40, },
 	{ 2, 0, 0, 2, 12, 60, },
+	{ 1, 0, 0, 2, 12, 76, },
+	{ 3, 0, 0, 2, 12, 40, },
+	{ 4, 0, 0, 2, 12, 76, },
+	{ 5, 0, 0, 2, 12, 60, },
+	{ 6, 0, 0, 2, 12, 40, },
+	{ 7, 0, 0, 2, 12, 60, },
+	{ 8, 0, 0, 2, 12, 40, },
+	{ 9, 0, 0, 2, 12, 60, },
 	{ 0, 0, 0, 2, 13, 28, },
 	{ 2, 0, 0, 2, 13, 60, },
+	{ 1, 0, 0, 2, 13, 76, },
+	{ 3, 0, 0, 2, 13, 28, },
+	{ 4, 0, 0, 2, 13, 72, },
+	{ 5, 0, 0, 2, 13, 60, },
+	{ 6, 0, 0, 2, 13, 28, },
+	{ 7, 0, 0, 2, 13, 60, },
+	{ 8, 0, 0, 2, 13, 28, },
+	{ 9, 0, 0, 2, 13, 60, },
 	{ 0, 0, 0, 2, 14, 127, },
 	{ 2, 0, 0, 2, 14, 127, },
+	{ 1, 0, 0, 2, 14, 127, },
+	{ 3, 0, 0, 2, 14, 127, },
+	{ 4, 0, 0, 2, 14, 127, },
+	{ 5, 0, 0, 2, 14, 127, },
+	{ 6, 0, 0, 2, 14, 127, },
+	{ 7, 0, 0, 2, 14, 127, },
+	{ 8, 0, 0, 2, 14, 127, },
+	{ 9, 0, 0, 2, 14, 127, },
 	{ 0, 0, 0, 3, 1, 52, },
 	{ 2, 0, 0, 3, 1, 36, },
+	{ 1, 0, 0, 3, 1, 66, },
+	{ 3, 0, 0, 3, 1, 52, },
+	{ 4, 0, 0, 3, 1, 68, },
+	{ 5, 0, 0, 3, 1, 36, },
+	{ 6, 0, 0, 3, 1, 52, },
+	{ 7, 0, 0, 3, 1, 36, },
+	{ 8, 0, 0, 3, 1, 52, },
+	{ 9, 0, 0, 3, 1, 36, },
 	{ 0, 0, 0, 3, 2, 60, },
 	{ 2, 0, 0, 3, 2, 36, },
+	{ 1, 0, 0, 3, 2, 66, },
+	{ 3, 0, 0, 3, 2, 60, },
+	{ 4, 0, 0, 3, 2, 70, },
+	{ 5, 0, 0, 3, 2, 36, },
+	{ 6, 0, 0, 3, 2, 60, },
+	{ 7, 0, 0, 3, 2, 36, },
+	{ 8, 0, 0, 3, 2, 60, },
+	{ 9, 0, 0, 3, 2, 36, },
 	{ 0, 0, 0, 3, 3, 64, },
 	{ 2, 0, 0, 3, 3, 36, },
+	{ 1, 0, 0, 3, 3, 66, },
+	{ 3, 0, 0, 3, 3, 64, },
+	{ 4, 0, 0, 3, 3, 70, },
+	{ 5, 0, 0, 3, 3, 36, },
+	{ 6, 0, 0, 3, 3, 64, },
+	{ 7, 0, 0, 3, 3, 36, },
+	{ 8, 0, 0, 3, 3, 64, },
+	{ 9, 0, 0, 3, 3, 36, },
 	{ 0, 0, 0, 3, 4, 68, },
 	{ 2, 0, 0, 3, 4, 36, },
+	{ 1, 0, 0, 3, 4, 66, },
+	{ 3, 0, 0, 3, 4, 68, },
+	{ 4, 0, 0, 3, 4, 70, },
+	{ 5, 0, 0, 3, 4, 36, },
+	{ 6, 0, 0, 3, 4, 68, },
+	{ 7, 0, 0, 3, 4, 36, },
+	{ 8, 0, 0, 3, 4, 68, },
+	{ 9, 0, 0, 3, 4, 36, },
 	{ 0, 0, 0, 3, 5, 76, },
 	{ 2, 0, 0, 3, 5, 36, },
+	{ 1, 0, 0, 3, 5, 66, },
+	{ 3, 0, 0, 3, 5, 76, },
+	{ 4, 0, 0, 3, 5, 70, },
+	{ 5, 0, 0, 3, 5, 36, },
+	{ 6, 0, 0, 3, 5, 76, },
+	{ 7, 0, 0, 3, 5, 36, },
+	{ 8, 0, 0, 3, 5, 76, },
+	{ 9, 0, 0, 3, 5, 36, },
 	{ 0, 0, 0, 3, 6, 76, },
 	{ 2, 0, 0, 3, 6, 36, },
+	{ 1, 0, 0, 3, 6, 66, },
+	{ 3, 0, 0, 3, 6, 76, },
+	{ 4, 0, 0, 3, 6, 70, },
+	{ 5, 0, 0, 3, 6, 36, },
+	{ 6, 0, 0, 3, 6, 76, },
+	{ 7, 0, 0, 3, 6, 36, },
+	{ 8, 0, 0, 3, 6, 76, },
+	{ 9, 0, 0, 3, 6, 36, },
 	{ 0, 0, 0, 3, 7, 76, },
 	{ 2, 0, 0, 3, 7, 36, },
+	{ 1, 0, 0, 3, 7, 66, },
+	{ 3, 0, 0, 3, 7, 76, },
+	{ 4, 0, 0, 3, 7, 70, },
+	{ 5, 0, 0, 3, 7, 36, },
+	{ 6, 0, 0, 3, 7, 76, },
+	{ 7, 0, 0, 3, 7, 36, },
+	{ 8, 0, 0, 3, 7, 76, },
+	{ 9, 0, 0, 3, 7, 36, },
 	{ 0, 0, 0, 3, 8, 68, },
 	{ 2, 0, 0, 3, 8, 36, },
+	{ 1, 0, 0, 3, 8, 66, },
+	{ 3, 0, 0, 3, 8, 68, },
+	{ 4, 0, 0, 3, 8, 70, },
+	{ 5, 0, 0, 3, 8, 36, },
+	{ 6, 0, 0, 3, 8, 68, },
+	{ 7, 0, 0, 3, 8, 36, },
+	{ 8, 0, 0, 3, 8, 68, },
+	{ 9, 0, 0, 3, 8, 36, },
 	{ 0, 0, 0, 3, 9, 64, },
 	{ 2, 0, 0, 3, 9, 36, },
+	{ 1, 0, 0, 3, 9, 66, },
+	{ 3, 0, 0, 3, 9, 64, },
+	{ 4, 0, 0, 3, 9, 70, },
+	{ 5, 0, 0, 3, 9, 36, },
+	{ 6, 0, 0, 3, 9, 64, },
+	{ 7, 0, 0, 3, 9, 36, },
+	{ 8, 0, 0, 3, 9, 64, },
+	{ 9, 0, 0, 3, 9, 36, },
 	{ 0, 0, 0, 3, 10, 60, },
 	{ 2, 0, 0, 3, 10, 36, },
+	{ 1, 0, 0, 3, 10, 66, },
+	{ 3, 0, 0, 3, 10, 60, },
+	{ 4, 0, 0, 3, 10, 70, },
+	{ 5, 0, 0, 3, 10, 36, },
+	{ 6, 0, 0, 3, 10, 60, },
+	{ 7, 0, 0, 3, 10, 36, },
+	{ 8, 0, 0, 3, 10, 60, },
+	{ 9, 0, 0, 3, 10, 36, },
 	{ 0, 0, 0, 3, 11, 52, },
 	{ 2, 0, 0, 3, 11, 36, },
+	{ 1, 0, 0, 3, 11, 66, },
+	{ 3, 0, 0, 3, 11, 52, },
+	{ 4, 0, 0, 3, 11, 70, },
+	{ 5, 0, 0, 3, 11, 36, },
+	{ 6, 0, 0, 3, 11, 52, },
+	{ 7, 0, 0, 3, 11, 36, },
+	{ 8, 0, 0, 3, 11, 52, },
+	{ 9, 0, 0, 3, 11, 36, },
 	{ 0, 0, 0, 3, 12, 40, },
 	{ 2, 0, 0, 3, 12, 36, },
+	{ 1, 0, 0, 3, 12, 66, },
+	{ 3, 0, 0, 3, 12, 40, },
+	{ 4, 0, 0, 3, 12, 70, },
+	{ 5, 0, 0, 3, 12, 36, },
+	{ 6, 0, 0, 3, 12, 40, },
+	{ 7, 0, 0, 3, 12, 36, },
+	{ 8, 0, 0, 3, 12, 40, },
+	{ 9, 0, 0, 3, 12, 36, },
 	{ 0, 0, 0, 3, 13, 28, },
 	{ 2, 0, 0, 3, 13, 36, },
+	{ 1, 0, 0, 3, 13, 66, },
+	{ 3, 0, 0, 3, 13, 28, },
+	{ 4, 0, 0, 3, 13, 62, },
+	{ 5, 0, 0, 3, 13, 36, },
+	{ 6, 0, 0, 3, 13, 28, },
+	{ 7, 0, 0, 3, 13, 36, },
+	{ 8, 0, 0, 3, 13, 28, },
+	{ 9, 0, 0, 3, 13, 36, },
 	{ 0, 0, 0, 3, 14, 127, },
 	{ 2, 0, 0, 3, 14, 127, },
+	{ 1, 0, 0, 3, 14, 127, },
+	{ 3, 0, 0, 3, 14, 127, },
+	{ 4, 0, 0, 3, 14, 127, },
+	{ 5, 0, 0, 3, 14, 127, },
+	{ 6, 0, 0, 3, 14, 127, },
+	{ 7, 0, 0, 3, 14, 127, },
+	{ 8, 0, 0, 3, 14, 127, },
+	{ 9, 0, 0, 3, 14, 127, },
 	{ 0, 0, 1, 2, 1, 127, },
 	{ 2, 0, 1, 2, 1, 127, },
+	{ 1, 0, 1, 2, 1, 127, },
+	{ 3, 0, 1, 2, 1, 127, },
+	{ 4, 0, 1, 2, 1, 127, },
+	{ 5, 0, 1, 2, 1, 127, },
+	{ 6, 0, 1, 2, 1, 127, },
+	{ 7, 0, 1, 2, 1, 127, },
+	{ 8, 0, 1, 2, 1, 127, },
+	{ 9, 0, 1, 2, 1, 127, },
 	{ 0, 0, 1, 2, 2, 127, },
 	{ 2, 0, 1, 2, 2, 127, },
+	{ 1, 0, 1, 2, 2, 127, },
+	{ 3, 0, 1, 2, 2, 127, },
+	{ 4, 0, 1, 2, 2, 127, },
+	{ 5, 0, 1, 2, 2, 127, },
+	{ 6, 0, 1, 2, 2, 127, },
+	{ 7, 0, 1, 2, 2, 127, },
+	{ 8, 0, 1, 2, 2, 127, },
+	{ 9, 0, 1, 2, 2, 127, },
 	{ 0, 0, 1, 2, 3, 52, },
 	{ 2, 0, 1, 2, 3, 60, },
+	{ 1, 0, 1, 2, 3, 72, },
+	{ 3, 0, 1, 2, 3, 52, },
+	{ 4, 0, 1, 2, 3, 72, },
+	{ 5, 0, 1, 2, 3, 60, },
+	{ 6, 0, 1, 2, 3, 52, },
+	{ 7, 0, 1, 2, 3, 60, },
+	{ 8, 0, 1, 2, 3, 52, },
+	{ 9, 0, 1, 2, 3, 60, },
 	{ 0, 0, 1, 2, 4, 52, },
 	{ 2, 0, 1, 2, 4, 60, },
+	{ 1, 0, 1, 2, 4, 72, },
+	{ 3, 0, 1, 2, 4, 52, },
+	{ 4, 0, 1, 2, 4, 72, },
+	{ 5, 0, 1, 2, 4, 60, },
+	{ 6, 0, 1, 2, 4, 52, },
+	{ 7, 0, 1, 2, 4, 60, },
+	{ 8, 0, 1, 2, 4, 52, },
+	{ 9, 0, 1, 2, 4, 60, },
 	{ 0, 0, 1, 2, 5, 60, },
 	{ 2, 0, 1, 2, 5, 60, },
+	{ 1, 0, 1, 2, 5, 72, },
+	{ 3, 0, 1, 2, 5, 60, },
+	{ 4, 0, 1, 2, 5, 72, },
+	{ 5, 0, 1, 2, 5, 60, },
+	{ 6, 0, 1, 2, 5, 60, },
+	{ 7, 0, 1, 2, 5, 60, },
+	{ 8, 0, 1, 2, 5, 60, },
+	{ 9, 0, 1, 2, 5, 60, },
 	{ 0, 0, 1, 2, 6, 64, },
 	{ 2, 0, 1, 2, 6, 60, },
+	{ 1, 0, 1, 2, 6, 72, },
+	{ 3, 0, 1, 2, 6, 64, },
+	{ 4, 0, 1, 2, 6, 72, },
+	{ 5, 0, 1, 2, 6, 60, },
+	{ 6, 0, 1, 2, 6, 64, },
+	{ 7, 0, 1, 2, 6, 60, },
+	{ 8, 0, 1, 2, 6, 64, },
+	{ 9, 0, 1, 2, 6, 60, },
 	{ 0, 0, 1, 2, 7, 60, },
 	{ 2, 0, 1, 2, 7, 60, },
+	{ 1, 0, 1, 2, 7, 72, },
+	{ 3, 0, 1, 2, 7, 60, },
+	{ 4, 0, 1, 2, 7, 72, },
+	{ 5, 0, 1, 2, 7, 60, },
+	{ 6, 0, 1, 2, 7, 60, },
+	{ 7, 0, 1, 2, 7, 60, },
+	{ 8, 0, 1, 2, 7, 60, },
+	{ 9, 0, 1, 2, 7, 60, },
 	{ 0, 0, 1, 2, 8, 52, },
 	{ 2, 0, 1, 2, 8, 60, },
+	{ 1, 0, 1, 2, 8, 72, },
+	{ 3, 0, 1, 2, 8, 52, },
+	{ 4, 0, 1, 2, 8, 72, },
+	{ 5, 0, 1, 2, 8, 60, },
+	{ 6, 0, 1, 2, 8, 52, },
+	{ 7, 0, 1, 2, 8, 60, },
+	{ 8, 0, 1, 2, 8, 52, },
+	{ 9, 0, 1, 2, 8, 60, },
 	{ 0, 0, 1, 2, 9, 52, },
 	{ 2, 0, 1, 2, 9, 60, },
+	{ 1, 0, 1, 2, 9, 72, },
+	{ 3, 0, 1, 2, 9, 52, },
+	{ 4, 0, 1, 2, 9, 72, },
+	{ 5, 0, 1, 2, 9, 60, },
+	{ 6, 0, 1, 2, 9, 52, },
+	{ 7, 0, 1, 2, 9, 60, },
+	{ 8, 0, 1, 2, 9, 52, },
+	{ 9, 0, 1, 2, 9, 60, },
 	{ 0, 0, 1, 2, 10, 40, },
 	{ 2, 0, 1, 2, 10, 60, },
+	{ 1, 0, 1, 2, 10, 72, },
+	{ 3, 0, 1, 2, 10, 40, },
+	{ 4, 0, 1, 2, 10, 72, },
+	{ 5, 0, 1, 2, 10, 60, },
+	{ 6, 0, 1, 2, 10, 40, },
+	{ 7, 0, 1, 2, 10, 60, },
+	{ 8, 0, 1, 2, 10, 40, },
+	{ 9, 0, 1, 2, 10, 60, },
 	{ 0, 0, 1, 2, 11, 28, },
 	{ 2, 0, 1, 2, 11, 60, },
+	{ 1, 0, 1, 2, 11, 72, },
+	{ 3, 0, 1, 2, 11, 28, },
+	{ 4, 0, 1, 2, 11, 70, },
+	{ 5, 0, 1, 2, 11, 60, },
+	{ 6, 0, 1, 2, 11, 28, },
+	{ 7, 0, 1, 2, 11, 60, },
+	{ 8, 0, 1, 2, 11, 28, },
+	{ 9, 0, 1, 2, 11, 60, },
 	{ 0, 0, 1, 2, 12, 127, },
 	{ 2, 0, 1, 2, 12, 127, },
+	{ 1, 0, 1, 2, 12, 127, },
+	{ 3, 0, 1, 2, 12, 127, },
+	{ 4, 0, 1, 2, 12, 127, },
+	{ 5, 0, 1, 2, 12, 127, },
+	{ 6, 0, 1, 2, 12, 127, },
+	{ 7, 0, 1, 2, 12, 127, },
+	{ 8, 0, 1, 2, 12, 127, },
+	{ 9, 0, 1, 2, 12, 127, },
 	{ 0, 0, 1, 2, 13, 127, },
 	{ 2, 0, 1, 2, 13, 127, },
+	{ 1, 0, 1, 2, 13, 127, },
+	{ 3, 0, 1, 2, 13, 127, },
+	{ 4, 0, 1, 2, 13, 127, },
+	{ 5, 0, 1, 2, 13, 127, },
+	{ 6, 0, 1, 2, 13, 127, },
+	{ 7, 0, 1, 2, 13, 127, },
+	{ 8, 0, 1, 2, 13, 127, },
+	{ 9, 0, 1, 2, 13, 127, },
 	{ 0, 0, 1, 2, 14, 127, },
 	{ 2, 0, 1, 2, 14, 127, },
+	{ 1, 0, 1, 2, 14, 127, },
+	{ 3, 0, 1, 2, 14, 127, },
+	{ 4, 0, 1, 2, 14, 127, },
+	{ 5, 0, 1, 2, 14, 127, },
+	{ 6, 0, 1, 2, 14, 127, },
+	{ 7, 0, 1, 2, 14, 127, },
+	{ 8, 0, 1, 2, 14, 127, },
+	{ 9, 0, 1, 2, 14, 127, },
 	{ 0, 0, 1, 3, 1, 127, },
 	{ 2, 0, 1, 3, 1, 127, },
+	{ 1, 0, 1, 3, 1, 127, },
+	{ 3, 0, 1, 3, 1, 127, },
+	{ 4, 0, 1, 3, 1, 127, },
+	{ 5, 0, 1, 3, 1, 127, },
+	{ 6, 0, 1, 3, 1, 127, },
+	{ 7, 0, 1, 3, 1, 127, },
+	{ 8, 0, 1, 3, 1, 127, },
+	{ 9, 0, 1, 3, 1, 127, },
 	{ 0, 0, 1, 3, 2, 127, },
 	{ 2, 0, 1, 3, 2, 127, },
+	{ 1, 0, 1, 3, 2, 127, },
+	{ 3, 0, 1, 3, 2, 127, },
+	{ 4, 0, 1, 3, 2, 127, },
+	{ 5, 0, 1, 3, 2, 127, },
+	{ 6, 0, 1, 3, 2, 127, },
+	{ 7, 0, 1, 3, 2, 127, },
+	{ 8, 0, 1, 3, 2, 127, },
+	{ 9, 0, 1, 3, 2, 127, },
 	{ 0, 0, 1, 3, 3, 48, },
 	{ 2, 0, 1, 3, 3, 36, },
+	{ 1, 0, 1, 3, 3, 66, },
+	{ 3, 0, 1, 3, 3, 48, },
+	{ 4, 0, 1, 3, 3, 66, },
+	{ 5, 0, 1, 3, 3, 36, },
+	{ 6, 0, 1, 3, 3, 48, },
+	{ 7, 0, 1, 3, 3, 36, },
+	{ 8, 0, 1, 3, 3, 48, },
+	{ 9, 0, 1, 3, 3, 36, },
 	{ 0, 0, 1, 3, 4, 48, },
 	{ 2, 0, 1, 3, 4, 36, },
+	{ 1, 0, 1, 3, 4, 66, },
+	{ 3, 0, 1, 3, 4, 48, },
+	{ 4, 0, 1, 3, 4, 70, },
+	{ 5, 0, 1, 3, 4, 36, },
+	{ 6, 0, 1, 3, 4, 48, },
+	{ 7, 0, 1, 3, 4, 36, },
+	{ 8, 0, 1, 3, 4, 48, },
+	{ 9, 0, 1, 3, 4, 36, },
 	{ 0, 0, 1, 3, 5, 60, },
 	{ 2, 0, 1, 3, 5, 36, },
+	{ 1, 0, 1, 3, 5, 66, },
+	{ 3, 0, 1, 3, 5, 60, },
+	{ 4, 0, 1, 3, 5, 70, },
+	{ 5, 0, 1, 3, 5, 36, },
+	{ 6, 0, 1, 3, 5, 60, },
+	{ 7, 0, 1, 3, 5, 36, },
+	{ 8, 0, 1, 3, 5, 60, },
+	{ 9, 0, 1, 3, 5, 36, },
 	{ 0, 0, 1, 3, 6, 64, },
 	{ 2, 0, 1, 3, 6, 36, },
+	{ 1, 0, 1, 3, 6, 66, },
+	{ 3, 0, 1, 3, 6, 64, },
+	{ 4, 0, 1, 3, 6, 70, },
+	{ 5, 0, 1, 3, 6, 36, },
+	{ 6, 0, 1, 3, 6, 64, },
+	{ 7, 0, 1, 3, 6, 36, },
+	{ 8, 0, 1, 3, 6, 64, },
+	{ 9, 0, 1, 3, 6, 36, },
 	{ 0, 0, 1, 3, 7, 60, },
 	{ 2, 0, 1, 3, 7, 36, },
+	{ 1, 0, 1, 3, 7, 66, },
+	{ 3, 0, 1, 3, 7, 60, },
+	{ 4, 0, 1, 3, 7, 70, },
+	{ 5, 0, 1, 3, 7, 36, },
+	{ 6, 0, 1, 3, 7, 60, },
+	{ 7, 0, 1, 3, 7, 36, },
+	{ 8, 0, 1, 3, 7, 60, },
+	{ 9, 0, 1, 3, 7, 36, },
 	{ 0, 0, 1, 3, 8, 52, },
 	{ 2, 0, 1, 3, 8, 36, },
+	{ 1, 0, 1, 3, 8, 66, },
+	{ 3, 0, 1, 3, 8, 52, },
+	{ 4, 0, 1, 3, 8, 70, },
+	{ 5, 0, 1, 3, 8, 36, },
+	{ 6, 0, 1, 3, 8, 52, },
+	{ 7, 0, 1, 3, 8, 36, },
+	{ 8, 0, 1, 3, 8, 52, },
+	{ 9, 0, 1, 3, 8, 36, },
 	{ 0, 0, 1, 3, 9, 52, },
 	{ 2, 0, 1, 3, 9, 36, },
+	{ 1, 0, 1, 3, 9, 66, },
+	{ 3, 0, 1, 3, 9, 52, },
+	{ 4, 0, 1, 3, 9, 70, },
+	{ 5, 0, 1, 3, 9, 36, },
+	{ 6, 0, 1, 3, 9, 52, },
+	{ 7, 0, 1, 3, 9, 36, },
+	{ 8, 0, 1, 3, 9, 52, },
+	{ 9, 0, 1, 3, 9, 36, },
 	{ 0, 0, 1, 3, 10, 40, },
 	{ 2, 0, 1, 3, 10, 36, },
+	{ 1, 0, 1, 3, 10, 66, },
+	{ 3, 0, 1, 3, 10, 40, },
+	{ 4, 0, 1, 3, 10, 70, },
+	{ 5, 0, 1, 3, 10, 36, },
+	{ 6, 0, 1, 3, 10, 40, },
+	{ 7, 0, 1, 3, 10, 36, },
+	{ 8, 0, 1, 3, 10, 40, },
+	{ 9, 0, 1, 3, 10, 36, },
 	{ 0, 0, 1, 3, 11, 26, },
 	{ 2, 0, 1, 3, 11, 36, },
+	{ 1, 0, 1, 3, 11, 66, },
+	{ 3, 0, 1, 3, 11, 26, },
+	{ 4, 0, 1, 3, 11, 66, },
+	{ 5, 0, 1, 3, 11, 36, },
+	{ 6, 0, 1, 3, 11, 26, },
+	{ 7, 0, 1, 3, 11, 36, },
+	{ 8, 0, 1, 3, 11, 26, },
+	{ 9, 0, 1, 3, 11, 36, },
 	{ 0, 0, 1, 3, 12, 127, },
 	{ 2, 0, 1, 3, 12, 127, },
+	{ 1, 0, 1, 3, 12, 127, },
+	{ 3, 0, 1, 3, 12, 127, },
+	{ 4, 0, 1, 3, 12, 127, },
+	{ 5, 0, 1, 3, 12, 127, },
+	{ 6, 0, 1, 3, 12, 127, },
+	{ 7, 0, 1, 3, 12, 127, },
+	{ 8, 0, 1, 3, 12, 127, },
+	{ 9, 0, 1, 3, 12, 127, },
 	{ 0, 0, 1, 3, 13, 127, },
 	{ 2, 0, 1, 3, 13, 127, },
+	{ 1, 0, 1, 3, 13, 127, },
+	{ 3, 0, 1, 3, 13, 127, },
+	{ 4, 0, 1, 3, 13, 127, },
+	{ 5, 0, 1, 3, 13, 127, },
+	{ 6, 0, 1, 3, 13, 127, },
+	{ 7, 0, 1, 3, 13, 127, },
+	{ 8, 0, 1, 3, 13, 127, },
+	{ 9, 0, 1, 3, 13, 127, },
 	{ 0, 0, 1, 3, 14, 127, },
 	{ 2, 0, 1, 3, 14, 127, },
+	{ 1, 0, 1, 3, 14, 127, },
+	{ 3, 0, 1, 3, 14, 127, },
+	{ 4, 0, 1, 3, 14, 127, },
+	{ 5, 0, 1, 3, 14, 127, },
+	{ 6, 0, 1, 3, 14, 127, },
+	{ 7, 0, 1, 3, 14, 127, },
+	{ 8, 0, 1, 3, 14, 127, },
+	{ 9, 0, 1, 3, 14, 127, },
 	{ 0, 1, 0, 1, 36, 74, },
 	{ 2, 1, 0, 1, 36, 62, },
+	{ 1, 1, 0, 1, 36, 60, },
+	{ 3, 1, 0, 1, 36, 62, },
+	{ 4, 1, 0, 1, 36, 76, },
+	{ 5, 1, 0, 1, 36, 62, },
+	{ 6, 1, 0, 1, 36, 64, },
+	{ 7, 1, 0, 1, 36, 54, },
+	{ 8, 1, 0, 1, 36, 62, },
+	{ 9, 1, 0, 1, 36, 62, },
 	{ 0, 1, 0, 1, 40, 76, },
 	{ 2, 1, 0, 1, 40, 62, },
+	{ 1, 1, 0, 1, 40, 62, },
+	{ 3, 1, 0, 1, 40, 62, },
+	{ 4, 1, 0, 1, 40, 76, },
+	{ 5, 1, 0, 1, 40, 62, },
+	{ 6, 1, 0, 1, 40, 64, },
+	{ 7, 1, 0, 1, 40, 54, },
+	{ 8, 1, 0, 1, 40, 62, },
+	{ 9, 1, 0, 1, 40, 62, },
 	{ 0, 1, 0, 1, 44, 76, },
 	{ 2, 1, 0, 1, 44, 62, },
+	{ 1, 1, 0, 1, 44, 62, },
+	{ 3, 1, 0, 1, 44, 62, },
+	{ 4, 1, 0, 1, 44, 76, },
+	{ 5, 1, 0, 1, 44, 62, },
+	{ 6, 1, 0, 1, 44, 64, },
+	{ 7, 1, 0, 1, 44, 54, },
+	{ 8, 1, 0, 1, 44, 62, },
+	{ 9, 1, 0, 1, 44, 62, },
 	{ 0, 1, 0, 1, 48, 76, },
 	{ 2, 1, 0, 1, 48, 62, },
+	{ 1, 1, 0, 1, 48, 62, },
+	{ 3, 1, 0, 1, 48, 62, },
+	{ 4, 1, 0, 1, 48, 54, },
+	{ 5, 1, 0, 1, 48, 62, },
+	{ 6, 1, 0, 1, 48, 64, },
+	{ 7, 1, 0, 1, 48, 54, },
+	{ 8, 1, 0, 1, 48, 62, },
+	{ 9, 1, 0, 1, 48, 62, },
 	{ 0, 1, 0, 1, 52, 76, },
 	{ 2, 1, 0, 1, 52, 62, },
+	{ 1, 1, 0, 1, 52, 62, },
+	{ 3, 1, 0, 1, 52, 64, },
+	{ 4, 1, 0, 1, 52, 76, },
+	{ 5, 1, 0, 1, 52, 62, },
+	{ 6, 1, 0, 1, 52, 76, },
+	{ 7, 1, 0, 1, 52, 54, },
+	{ 8, 1, 0, 1, 52, 76, },
+	{ 9, 1, 0, 1, 52, 62, },
 	{ 0, 1, 0, 1, 56, 76, },
 	{ 2, 1, 0, 1, 56, 62, },
+	{ 1, 1, 0, 1, 56, 62, },
+	{ 3, 1, 0, 1, 56, 64, },
+	{ 4, 1, 0, 1, 56, 76, },
+	{ 5, 1, 0, 1, 56, 62, },
+	{ 6, 1, 0, 1, 56, 76, },
+	{ 7, 1, 0, 1, 56, 54, },
+	{ 8, 1, 0, 1, 56, 76, },
+	{ 9, 1, 0, 1, 56, 62, },
 	{ 0, 1, 0, 1, 60, 76, },
 	{ 2, 1, 0, 1, 60, 62, },
+	{ 1, 1, 0, 1, 60, 62, },
+	{ 3, 1, 0, 1, 60, 64, },
+	{ 4, 1, 0, 1, 60, 76, },
+	{ 5, 1, 0, 1, 60, 62, },
+	{ 6, 1, 0, 1, 60, 76, },
+	{ 7, 1, 0, 1, 60, 54, },
+	{ 8, 1, 0, 1, 60, 76, },
+	{ 9, 1, 0, 1, 60, 62, },
 	{ 0, 1, 0, 1, 64, 74, },
 	{ 2, 1, 0, 1, 64, 62, },
+	{ 1, 1, 0, 1, 64, 60, },
+	{ 3, 1, 0, 1, 64, 64, },
+	{ 4, 1, 0, 1, 64, 76, },
+	{ 5, 1, 0, 1, 64, 62, },
+	{ 6, 1, 0, 1, 64, 74, },
+	{ 7, 1, 0, 1, 64, 54, },
+	{ 8, 1, 0, 1, 64, 74, },
+	{ 9, 1, 0, 1, 64, 62, },
 	{ 0, 1, 0, 1, 100, 72, },
 	{ 2, 1, 0, 1, 100, 62, },
+	{ 1, 1, 0, 1, 100, 76, },
+	{ 3, 1, 0, 1, 100, 72, },
+	{ 4, 1, 0, 1, 100, 76, },
+	{ 5, 1, 0, 1, 100, 62, },
+	{ 6, 1, 0, 1, 100, 72, },
+	{ 7, 1, 0, 1, 100, 54, },
+	{ 8, 1, 0, 1, 100, 72, },
+	{ 9, 1, 0, 1, 100, 127, },
 	{ 0, 1, 0, 1, 104, 76, },
 	{ 2, 1, 0, 1, 104, 62, },
+	{ 1, 1, 0, 1, 104, 76, },
+	{ 3, 1, 0, 1, 104, 76, },
+	{ 4, 1, 0, 1, 104, 76, },
+	{ 5, 1, 0, 1, 104, 62, },
+	{ 6, 1, 0, 1, 104, 76, },
+	{ 7, 1, 0, 1, 104, 54, },
+	{ 8, 1, 0, 1, 104, 76, },
+	{ 9, 1, 0, 1, 104, 127, },
 	{ 0, 1, 0, 1, 108, 76, },
 	{ 2, 1, 0, 1, 108, 62, },
+	{ 1, 1, 0, 1, 108, 76, },
+	{ 3, 1, 0, 1, 108, 76, },
+	{ 4, 1, 0, 1, 108, 76, },
+	{ 5, 1, 0, 1, 108, 62, },
+	{ 6, 1, 0, 1, 108, 76, },
+	{ 7, 1, 0, 1, 108, 54, },
+	{ 8, 1, 0, 1, 108, 76, },
+	{ 9, 1, 0, 1, 108, 127, },
 	{ 0, 1, 0, 1, 112, 76, },
 	{ 2, 1, 0, 1, 112, 62, },
+	{ 1, 1, 0, 1, 112, 76, },
+	{ 3, 1, 0, 1, 112, 76, },
+	{ 4, 1, 0, 1, 112, 76, },
+	{ 5, 1, 0, 1, 112, 62, },
+	{ 6, 1, 0, 1, 112, 76, },
+	{ 7, 1, 0, 1, 112, 54, },
+	{ 8, 1, 0, 1, 112, 76, },
+	{ 9, 1, 0, 1, 112, 127, },
 	{ 0, 1, 0, 1, 116, 76, },
 	{ 2, 1, 0, 1, 116, 62, },
+	{ 1, 1, 0, 1, 116, 76, },
+	{ 3, 1, 0, 1, 116, 76, },
+	{ 4, 1, 0, 1, 116, 76, },
+	{ 5, 1, 0, 1, 116, 62, },
+	{ 6, 1, 0, 1, 116, 76, },
+	{ 7, 1, 0, 1, 116, 54, },
+	{ 8, 1, 0, 1, 116, 76, },
+	{ 9, 1, 0, 1, 116, 127, },
 	{ 0, 1, 0, 1, 120, 76, },
 	{ 2, 1, 0, 1, 120, 62, },
+	{ 1, 1, 0, 1, 120, 76, },
+	{ 3, 1, 0, 1, 120, 127, },
+	{ 4, 1, 0, 1, 120, 76, },
+	{ 5, 1, 0, 1, 120, 127, },
+	{ 6, 1, 0, 1, 120, 76, },
+	{ 7, 1, 0, 1, 120, 54, },
+	{ 8, 1, 0, 1, 120, 76, },
+	{ 9, 1, 0, 1, 120, 127, },
 	{ 0, 1, 0, 1, 124, 76, },
 	{ 2, 1, 0, 1, 124, 62, },
+	{ 1, 1, 0, 1, 124, 76, },
+	{ 3, 1, 0, 1, 124, 127, },
+	{ 4, 1, 0, 1, 124, 76, },
+	{ 5, 1, 0, 1, 124, 127, },
+	{ 6, 1, 0, 1, 124, 76, },
+	{ 7, 1, 0, 1, 124, 54, },
+	{ 8, 1, 0, 1, 124, 76, },
+	{ 9, 1, 0, 1, 124, 127, },
 	{ 0, 1, 0, 1, 128, 76, },
 	{ 2, 1, 0, 1, 128, 62, },
+	{ 1, 1, 0, 1, 128, 76, },
+	{ 3, 1, 0, 1, 128, 127, },
+	{ 4, 1, 0, 1, 128, 76, },
+	{ 5, 1, 0, 1, 128, 127, },
+	{ 6, 1, 0, 1, 128, 76, },
+	{ 7, 1, 0, 1, 128, 54, },
+	{ 8, 1, 0, 1, 128, 76, },
+	{ 9, 1, 0, 1, 128, 127, },
 	{ 0, 1, 0, 1, 132, 76, },
 	{ 2, 1, 0, 1, 132, 62, },
+	{ 1, 1, 0, 1, 132, 76, },
+	{ 3, 1, 0, 1, 132, 76, },
+	{ 4, 1, 0, 1, 132, 76, },
+	{ 5, 1, 0, 1, 132, 62, },
+	{ 6, 1, 0, 1, 132, 76, },
+	{ 7, 1, 0, 1, 132, 54, },
+	{ 8, 1, 0, 1, 132, 76, },
+	{ 9, 1, 0, 1, 132, 127, },
 	{ 0, 1, 0, 1, 136, 76, },
 	{ 2, 1, 0, 1, 136, 62, },
+	{ 1, 1, 0, 1, 136, 76, },
+	{ 3, 1, 0, 1, 136, 76, },
+	{ 4, 1, 0, 1, 136, 76, },
+	{ 5, 1, 0, 1, 136, 62, },
+	{ 6, 1, 0, 1, 136, 76, },
+	{ 7, 1, 0, 1, 136, 54, },
+	{ 8, 1, 0, 1, 136, 76, },
+	{ 9, 1, 0, 1, 136, 127, },
 	{ 0, 1, 0, 1, 140, 72, },
 	{ 2, 1, 0, 1, 140, 62, },
+	{ 1, 1, 0, 1, 140, 76, },
+	{ 3, 1, 0, 1, 140, 72, },
+	{ 4, 1, 0, 1, 140, 76, },
+	{ 5, 1, 0, 1, 140, 62, },
+	{ 6, 1, 0, 1, 140, 72, },
+	{ 7, 1, 0, 1, 140, 54, },
+	{ 8, 1, 0, 1, 140, 72, },
+	{ 9, 1, 0, 1, 140, 127, },
 	{ 0, 1, 0, 1, 144, 76, },
 	{ 2, 1, 0, 1, 144, 127, },
+	{ 1, 1, 0, 1, 144, 127, },
+	{ 3, 1, 0, 1, 144, 76, },
+	{ 4, 1, 0, 1, 144, 76, },
+	{ 5, 1, 0, 1, 144, 127, },
+	{ 6, 1, 0, 1, 144, 76, },
+	{ 7, 1, 0, 1, 144, 127, },
+	{ 8, 1, 0, 1, 144, 76, },
+	{ 9, 1, 0, 1, 144, 127, },
 	{ 0, 1, 0, 1, 149, 76, },
 	{ 2, 1, 0, 1, 149, -128, },
+	{ 1, 1, 0, 1, 149, 127, },
+	{ 3, 1, 0, 1, 149, 76, },
+	{ 4, 1, 0, 1, 149, 74, },
+	{ 5, 1, 0, 1, 149, 76, },
+	{ 6, 1, 0, 1, 149, 76, },
+	{ 7, 1, 0, 1, 149, 54, },
+	{ 8, 1, 0, 1, 149, 76, },
+	{ 9, 1, 0, 1, 149, -128, },
 	{ 0, 1, 0, 1, 153, 76, },
 	{ 2, 1, 0, 1, 153, -128, },
+	{ 1, 1, 0, 1, 153, 127, },
+	{ 3, 1, 0, 1, 153, 76, },
+	{ 4, 1, 0, 1, 153, 74, },
+	{ 5, 1, 0, 1, 153, 76, },
+	{ 6, 1, 0, 1, 153, 76, },
+	{ 7, 1, 0, 1, 153, 54, },
+	{ 8, 1, 0, 1, 153, 76, },
+	{ 9, 1, 0, 1, 153, -128, },
 	{ 0, 1, 0, 1, 157, 76, },
 	{ 2, 1, 0, 1, 157, -128, },
+	{ 1, 1, 0, 1, 157, 127, },
+	{ 3, 1, 0, 1, 157, 76, },
+	{ 4, 1, 0, 1, 157, 74, },
+	{ 5, 1, 0, 1, 157, 76, },
+	{ 6, 1, 0, 1, 157, 76, },
+	{ 7, 1, 0, 1, 157, 54, },
+	{ 8, 1, 0, 1, 157, 76, },
+	{ 9, 1, 0, 1, 157, -128, },
 	{ 0, 1, 0, 1, 161, 76, },
 	{ 2, 1, 0, 1, 161, -128, },
+	{ 1, 1, 0, 1, 161, 127, },
+	{ 3, 1, 0, 1, 161, 76, },
+	{ 4, 1, 0, 1, 161, 74, },
+	{ 5, 1, 0, 1, 161, 76, },
+	{ 6, 1, 0, 1, 161, 76, },
+	{ 7, 1, 0, 1, 161, 54, },
+	{ 8, 1, 0, 1, 161, 76, },
+	{ 9, 1, 0, 1, 161, -128, },
 	{ 0, 1, 0, 1, 165, 76, },
 	{ 2, 1, 0, 1, 165, -128, },
+	{ 1, 1, 0, 1, 165, 127, },
+	{ 3, 1, 0, 1, 165, 76, },
+	{ 4, 1, 0, 1, 165, 74, },
+	{ 5, 1, 0, 1, 165, 76, },
+	{ 6, 1, 0, 1, 165, 76, },
+	{ 7, 1, 0, 1, 165, 54, },
+	{ 8, 1, 0, 1, 165, 76, },
+	{ 9, 1, 0, 1, 165, -128, },
 	{ 0, 1, 0, 2, 36, 72, },
 	{ 2, 1, 0, 2, 36, 62, },
+	{ 1, 1, 0, 2, 36, 62, },
+	{ 3, 1, 0, 2, 36, 62, },
+	{ 4, 1, 0, 2, 36, 76, },
+	{ 5, 1, 0, 2, 36, 62, },
+	{ 6, 1, 0, 2, 36, 64, },
+	{ 7, 1, 0, 2, 36, 54, },
+	{ 8, 1, 0, 2, 36, 62, },
+	{ 9, 1, 0, 2, 36, 62, },
 	{ 0, 1, 0, 2, 40, 76, },
 	{ 2, 1, 0, 2, 40, 62, },
+	{ 1, 1, 0, 2, 40, 62, },
+	{ 3, 1, 0, 2, 40, 62, },
+	{ 4, 1, 0, 2, 40, 76, },
+	{ 5, 1, 0, 2, 40, 62, },
+	{ 6, 1, 0, 2, 40, 64, },
+	{ 7, 1, 0, 2, 40, 54, },
+	{ 8, 1, 0, 2, 40, 62, },
+	{ 9, 1, 0, 2, 40, 62, },
 	{ 0, 1, 0, 2, 44, 76, },
 	{ 2, 1, 0, 2, 44, 62, },
+	{ 1, 1, 0, 2, 44, 62, },
+	{ 3, 1, 0, 2, 44, 62, },
+	{ 4, 1, 0, 2, 44, 76, },
+	{ 5, 1, 0, 2, 44, 62, },
+	{ 6, 1, 0, 2, 44, 64, },
+	{ 7, 1, 0, 2, 44, 54, },
+	{ 8, 1, 0, 2, 44, 62, },
+	{ 9, 1, 0, 2, 44, 62, },
 	{ 0, 1, 0, 2, 48, 76, },
 	{ 2, 1, 0, 2, 48, 62, },
+	{ 1, 1, 0, 2, 48, 62, },
+	{ 3, 1, 0, 2, 48, 62, },
+	{ 4, 1, 0, 2, 48, 54, },
+	{ 5, 1, 0, 2, 48, 62, },
+	{ 6, 1, 0, 2, 48, 64, },
+	{ 7, 1, 0, 2, 48, 54, },
+	{ 8, 1, 0, 2, 48, 62, },
+	{ 9, 1, 0, 2, 48, 62, },
 	{ 0, 1, 0, 2, 52, 76, },
 	{ 2, 1, 0, 2, 52, 62, },
+	{ 1, 1, 0, 2, 52, 62, },
+	{ 3, 1, 0, 2, 52, 64, },
+	{ 4, 1, 0, 2, 52, 76, },
+	{ 5, 1, 0, 2, 52, 62, },
+	{ 6, 1, 0, 2, 52, 76, },
+	{ 7, 1, 0, 2, 52, 54, },
+	{ 8, 1, 0, 2, 52, 76, },
+	{ 9, 1, 0, 2, 52, 62, },
 	{ 0, 1, 0, 2, 56, 76, },
 	{ 2, 1, 0, 2, 56, 62, },
+	{ 1, 1, 0, 2, 56, 62, },
+	{ 3, 1, 0, 2, 56, 64, },
+	{ 4, 1, 0, 2, 56, 76, },
+	{ 5, 1, 0, 2, 56, 62, },
+	{ 6, 1, 0, 2, 56, 76, },
+	{ 7, 1, 0, 2, 56, 54, },
+	{ 8, 1, 0, 2, 56, 76, },
+	{ 9, 1, 0, 2, 56, 62, },
 	{ 0, 1, 0, 2, 60, 76, },
 	{ 2, 1, 0, 2, 60, 62, },
+	{ 1, 1, 0, 2, 60, 62, },
+	{ 3, 1, 0, 2, 60, 64, },
+	{ 4, 1, 0, 2, 60, 76, },
+	{ 5, 1, 0, 2, 60, 62, },
+	{ 6, 1, 0, 2, 60, 76, },
+	{ 7, 1, 0, 2, 60, 54, },
+	{ 8, 1, 0, 2, 60, 76, },
+	{ 9, 1, 0, 2, 60, 62, },
 	{ 0, 1, 0, 2, 64, 74, },
 	{ 2, 1, 0, 2, 64, 62, },
+	{ 1, 1, 0, 2, 64, 60, },
+	{ 3, 1, 0, 2, 64, 64, },
+	{ 4, 1, 0, 2, 64, 74, },
+	{ 5, 1, 0, 2, 64, 62, },
+	{ 6, 1, 0, 2, 64, 74, },
+	{ 7, 1, 0, 2, 64, 54, },
+	{ 8, 1, 0, 2, 64, 74, },
+	{ 9, 1, 0, 2, 64, 62, },
 	{ 0, 1, 0, 2, 100, 70, },
 	{ 2, 1, 0, 2, 100, 62, },
+	{ 1, 1, 0, 2, 100, 76, },
+	{ 3, 1, 0, 2, 100, 70, },
+	{ 4, 1, 0, 2, 100, 76, },
+	{ 5, 1, 0, 2, 100, 62, },
+	{ 6, 1, 0, 2, 100, 70, },
+	{ 7, 1, 0, 2, 100, 54, },
+	{ 8, 1, 0, 2, 100, 70, },
+	{ 9, 1, 0, 2, 100, 127, },
 	{ 0, 1, 0, 2, 104, 76, },
 	{ 2, 1, 0, 2, 104, 62, },
+	{ 1, 1, 0, 2, 104, 76, },
+	{ 3, 1, 0, 2, 104, 76, },
+	{ 4, 1, 0, 2, 104, 76, },
+	{ 5, 1, 0, 2, 104, 62, },
+	{ 6, 1, 0, 2, 104, 76, },
+	{ 7, 1, 0, 2, 104, 54, },
+	{ 8, 1, 0, 2, 104, 76, },
+	{ 9, 1, 0, 2, 104, 127, },
 	{ 0, 1, 0, 2, 108, 76, },
 	{ 2, 1, 0, 2, 108, 62, },
+	{ 1, 1, 0, 2, 108, 76, },
+	{ 3, 1, 0, 2, 108, 76, },
+	{ 4, 1, 0, 2, 108, 76, },
+	{ 5, 1, 0, 2, 108, 62, },
+	{ 6, 1, 0, 2, 108, 76, },
+	{ 7, 1, 0, 2, 108, 54, },
+	{ 8, 1, 0, 2, 108, 76, },
+	{ 9, 1, 0, 2, 108, 127, },
 	{ 0, 1, 0, 2, 112, 76, },
 	{ 2, 1, 0, 2, 112, 62, },
+	{ 1, 1, 0, 2, 112, 76, },
+	{ 3, 1, 0, 2, 112, 76, },
+	{ 4, 1, 0, 2, 112, 76, },
+	{ 5, 1, 0, 2, 112, 62, },
+	{ 6, 1, 0, 2, 112, 76, },
+	{ 7, 1, 0, 2, 112, 54, },
+	{ 8, 1, 0, 2, 112, 76, },
+	{ 9, 1, 0, 2, 112, 127, },
 	{ 0, 1, 0, 2, 116, 76, },
 	{ 2, 1, 0, 2, 116, 62, },
+	{ 1, 1, 0, 2, 116, 76, },
+	{ 3, 1, 0, 2, 116, 76, },
+	{ 4, 1, 0, 2, 116, 76, },
+	{ 5, 1, 0, 2, 116, 62, },
+	{ 6, 1, 0, 2, 116, 76, },
+	{ 7, 1, 0, 2, 116, 54, },
+	{ 8, 1, 0, 2, 116, 76, },
+	{ 9, 1, 0, 2, 116, 127, },
 	{ 0, 1, 0, 2, 120, 76, },
 	{ 2, 1, 0, 2, 120, 62, },
+	{ 1, 1, 0, 2, 120, 76, },
+	{ 3, 1, 0, 2, 120, 127, },
+	{ 4, 1, 0, 2, 120, 76, },
+	{ 5, 1, 0, 2, 120, 127, },
+	{ 6, 1, 0, 2, 120, 76, },
+	{ 7, 1, 0, 2, 120, 54, },
+	{ 8, 1, 0, 2, 120, 76, },
+	{ 9, 1, 0, 2, 120, 127, },
 	{ 0, 1, 0, 2, 124, 76, },
 	{ 2, 1, 0, 2, 124, 62, },
+	{ 1, 1, 0, 2, 124, 76, },
+	{ 3, 1, 0, 2, 124, 127, },
+	{ 4, 1, 0, 2, 124, 76, },
+	{ 5, 1, 0, 2, 124, 127, },
+	{ 6, 1, 0, 2, 124, 76, },
+	{ 7, 1, 0, 2, 124, 54, },
+	{ 8, 1, 0, 2, 124, 76, },
+	{ 9, 1, 0, 2, 124, 127, },
 	{ 0, 1, 0, 2, 128, 76, },
 	{ 2, 1, 0, 2, 128, 62, },
+	{ 1, 1, 0, 2, 128, 76, },
+	{ 3, 1, 0, 2, 128, 127, },
+	{ 4, 1, 0, 2, 128, 76, },
+	{ 5, 1, 0, 2, 128, 127, },
+	{ 6, 1, 0, 2, 128, 76, },
+	{ 7, 1, 0, 2, 128, 54, },
+	{ 8, 1, 0, 2, 128, 76, },
+	{ 9, 1, 0, 2, 128, 127, },
 	{ 0, 1, 0, 2, 132, 76, },
 	{ 2, 1, 0, 2, 132, 62, },
+	{ 1, 1, 0, 2, 132, 76, },
+	{ 3, 1, 0, 2, 132, 76, },
+	{ 4, 1, 0, 2, 132, 76, },
+	{ 5, 1, 0, 2, 132, 62, },
+	{ 6, 1, 0, 2, 132, 76, },
+	{ 7, 1, 0, 2, 132, 54, },
+	{ 8, 1, 0, 2, 132, 76, },
+	{ 9, 1, 0, 2, 132, 127, },
 	{ 0, 1, 0, 2, 136, 76, },
 	{ 2, 1, 0, 2, 136, 62, },
+	{ 1, 1, 0, 2, 136, 76, },
+	{ 3, 1, 0, 2, 136, 76, },
+	{ 4, 1, 0, 2, 136, 76, },
+	{ 5, 1, 0, 2, 136, 62, },
+	{ 6, 1, 0, 2, 136, 76, },
+	{ 7, 1, 0, 2, 136, 54, },
+	{ 8, 1, 0, 2, 136, 76, },
+	{ 9, 1, 0, 2, 136, 127, },
 	{ 0, 1, 0, 2, 140, 70, },
 	{ 2, 1, 0, 2, 140, 62, },
+	{ 1, 1, 0, 2, 140, 76, },
+	{ 3, 1, 0, 2, 140, 70, },
+	{ 4, 1, 0, 2, 140, 76, },
+	{ 5, 1, 0, 2, 140, 62, },
+	{ 6, 1, 0, 2, 140, 70, },
+	{ 7, 1, 0, 2, 140, 54, },
+	{ 8, 1, 0, 2, 140, 70, },
+	{ 9, 1, 0, 2, 140, 127, },
 	{ 0, 1, 0, 2, 144, 76, },
 	{ 2, 1, 0, 2, 144, 127, },
+	{ 1, 1, 0, 2, 144, 127, },
+	{ 3, 1, 0, 2, 144, 76, },
+	{ 4, 1, 0, 2, 144, 76, },
+	{ 5, 1, 0, 2, 144, 127, },
+	{ 6, 1, 0, 2, 144, 76, },
+	{ 7, 1, 0, 2, 144, 127, },
+	{ 8, 1, 0, 2, 144, 76, },
+	{ 9, 1, 0, 2, 144, 127, },
 	{ 0, 1, 0, 2, 149, 76, },
 	{ 2, 1, 0, 2, 149, -128, },
+	{ 1, 1, 0, 2, 149, 127, },
+	{ 3, 1, 0, 2, 149, 76, },
+	{ 4, 1, 0, 2, 149, 74, },
+	{ 5, 1, 0, 2, 149, 76, },
+	{ 6, 1, 0, 2, 149, 76, },
+	{ 7, 1, 0, 2, 149, 54, },
+	{ 8, 1, 0, 2, 149, 76, },
+	{ 9, 1, 0, 2, 149, -128, },
 	{ 0, 1, 0, 2, 153, 76, },
 	{ 2, 1, 0, 2, 153, -128, },
+	{ 1, 1, 0, 2, 153, 127, },
+	{ 3, 1, 0, 2, 153, 76, },
+	{ 4, 1, 0, 2, 153, 74, },
+	{ 5, 1, 0, 2, 153, 76, },
+	{ 6, 1, 0, 2, 153, 76, },
+	{ 7, 1, 0, 2, 153, 54, },
+	{ 8, 1, 0, 2, 153, 76, },
+	{ 9, 1, 0, 2, 153, -128, },
 	{ 0, 1, 0, 2, 157, 76, },
 	{ 2, 1, 0, 2, 157, -128, },
+	{ 1, 1, 0, 2, 157, 127, },
+	{ 3, 1, 0, 2, 157, 76, },
+	{ 4, 1, 0, 2, 157, 74, },
+	{ 5, 1, 0, 2, 157, 76, },
+	{ 6, 1, 0, 2, 157, 76, },
+	{ 7, 1, 0, 2, 157, 54, },
+	{ 8, 1, 0, 2, 157, 76, },
+	{ 9, 1, 0, 2, 157, -128, },
 	{ 0, 1, 0, 2, 161, 76, },
 	{ 2, 1, 0, 2, 161, -128, },
+	{ 1, 1, 0, 2, 161, 127, },
+	{ 3, 1, 0, 2, 161, 76, },
+	{ 4, 1, 0, 2, 161, 74, },
+	{ 5, 1, 0, 2, 161, 76, },
+	{ 6, 1, 0, 2, 161, 76, },
+	{ 7, 1, 0, 2, 161, 54, },
+	{ 8, 1, 0, 2, 161, 76, },
+	{ 9, 1, 0, 2, 161, -128, },
 	{ 0, 1, 0, 2, 165, 76, },
 	{ 2, 1, 0, 2, 165, -128, },
+	{ 1, 1, 0, 2, 165, 127, },
+	{ 3, 1, 0, 2, 165, 76, },
+	{ 4, 1, 0, 2, 165, 74, },
+	{ 5, 1, 0, 2, 165, 76, },
+	{ 6, 1, 0, 2, 165, 76, },
+	{ 7, 1, 0, 2, 165, 54, },
+	{ 8, 1, 0, 2, 165, 76, },
+	{ 9, 1, 0, 2, 165, -128, },
 	{ 0, 1, 0, 3, 36, 68, },
 	{ 2, 1, 0, 3, 36, 38, },
+	{ 1, 1, 0, 3, 36, 50, },
+	{ 3, 1, 0, 3, 36, 38, },
+	{ 4, 1, 0, 3, 36, 66, },
+	{ 5, 1, 0, 3, 36, 38, },
+	{ 6, 1, 0, 3, 36, 52, },
+	{ 7, 1, 0, 3, 36, 30, },
+	{ 8, 1, 0, 3, 36, 50, },
+	{ 9, 1, 0, 3, 36, 38, },
 	{ 0, 1, 0, 3, 40, 68, },
 	{ 2, 1, 0, 3, 40, 38, },
+	{ 1, 1, 0, 3, 40, 50, },
+	{ 3, 1, 0, 3, 40, 38, },
+	{ 4, 1, 0, 3, 40, 66, },
+	{ 5, 1, 0, 3, 40, 38, },
+	{ 6, 1, 0, 3, 40, 52, },
+	{ 7, 1, 0, 3, 40, 30, },
+	{ 8, 1, 0, 3, 40, 50, },
+	{ 9, 1, 0, 3, 40, 38, },
 	{ 0, 1, 0, 3, 44, 68, },
 	{ 2, 1, 0, 3, 44, 38, },
+	{ 1, 1, 0, 3, 44, 50, },
+	{ 3, 1, 0, 3, 44, 38, },
+	{ 4, 1, 0, 3, 44, 66, },
+	{ 5, 1, 0, 3, 44, 38, },
+	{ 6, 1, 0, 3, 44, 52, },
+	{ 7, 1, 0, 3, 44, 30, },
+	{ 8, 1, 0, 3, 44, 50, },
+	{ 9, 1, 0, 3, 44, 38, },
 	{ 0, 1, 0, 3, 48, 68, },
 	{ 2, 1, 0, 3, 48, 38, },
+	{ 1, 1, 0, 3, 48, 50, },
+	{ 3, 1, 0, 3, 48, 38, },
+	{ 4, 1, 0, 3, 48, 36, },
+	{ 5, 1, 0, 3, 48, 38, },
+	{ 6, 1, 0, 3, 48, 52, },
+	{ 7, 1, 0, 3, 48, 30, },
+	{ 8, 1, 0, 3, 48, 50, },
+	{ 9, 1, 0, 3, 48, 38, },
 	{ 0, 1, 0, 3, 52, 68, },
 	{ 2, 1, 0, 3, 52, 38, },
+	{ 1, 1, 0, 3, 52, 50, },
+	{ 3, 1, 0, 3, 52, 40, },
+	{ 4, 1, 0, 3, 52, 66, },
+	{ 5, 1, 0, 3, 52, 38, },
+	{ 6, 1, 0, 3, 52, 68, },
+	{ 7, 1, 0, 3, 52, 30, },
+	{ 8, 1, 0, 3, 52, 68, },
+	{ 9, 1, 0, 3, 52, 38, },
 	{ 0, 1, 0, 3, 56, 68, },
 	{ 2, 1, 0, 3, 56, 38, },
+	{ 1, 1, 0, 3, 56, 50, },
+	{ 3, 1, 0, 3, 56, 40, },
+	{ 4, 1, 0, 3, 56, 66, },
+	{ 5, 1, 0, 3, 56, 38, },
+	{ 6, 1, 0, 3, 56, 68, },
+	{ 7, 1, 0, 3, 56, 30, },
+	{ 8, 1, 0, 3, 56, 68, },
+	{ 9, 1, 0, 3, 56, 38, },
 	{ 0, 1, 0, 3, 60, 66, },
 	{ 2, 1, 0, 3, 60, 38, },
+	{ 1, 1, 0, 3, 60, 50, },
+	{ 3, 1, 0, 3, 60, 40, },
+	{ 4, 1, 0, 3, 60, 66, },
+	{ 5, 1, 0, 3, 60, 38, },
+	{ 6, 1, 0, 3, 60, 66, },
+	{ 7, 1, 0, 3, 60, 30, },
+	{ 8, 1, 0, 3, 60, 66, },
+	{ 9, 1, 0, 3, 60, 38, },
 	{ 0, 1, 0, 3, 64, 68, },
 	{ 2, 1, 0, 3, 64, 38, },
+	{ 1, 1, 0, 3, 64, 50, },
+	{ 3, 1, 0, 3, 64, 40, },
+	{ 4, 1, 0, 3, 64, 66, },
+	{ 5, 1, 0, 3, 64, 38, },
+	{ 6, 1, 0, 3, 64, 68, },
+	{ 7, 1, 0, 3, 64, 30, },
+	{ 8, 1, 0, 3, 64, 68, },
+	{ 9, 1, 0, 3, 64, 38, },
 	{ 0, 1, 0, 3, 100, 60, },
 	{ 2, 1, 0, 3, 100, 38, },
+	{ 1, 1, 0, 3, 100, 70, },
+	{ 3, 1, 0, 3, 100, 60, },
+	{ 4, 1, 0, 3, 100, 64, },
+	{ 5, 1, 0, 3, 100, 38, },
+	{ 6, 1, 0, 3, 100, 60, },
+	{ 7, 1, 0, 3, 100, 30, },
+	{ 8, 1, 0, 3, 100, 60, },
+	{ 9, 1, 0, 3, 100, 127, },
 	{ 0, 1, 0, 3, 104, 68, },
 	{ 2, 1, 0, 3, 104, 38, },
+	{ 1, 1, 0, 3, 104, 70, },
+	{ 3, 1, 0, 3, 104, 68, },
+	{ 4, 1, 0, 3, 104, 64, },
+	{ 5, 1, 0, 3, 104, 38, },
+	{ 6, 1, 0, 3, 104, 68, },
+	{ 7, 1, 0, 3, 104, 30, },
+	{ 8, 1, 0, 3, 104, 68, },
+	{ 9, 1, 0, 3, 104, 127, },
 	{ 0, 1, 0, 3, 108, 68, },
 	{ 2, 1, 0, 3, 108, 38, },
+	{ 1, 1, 0, 3, 108, 70, },
+	{ 3, 1, 0, 3, 108, 68, },
+	{ 4, 1, 0, 3, 108, 64, },
+	{ 5, 1, 0, 3, 108, 38, },
+	{ 6, 1, 0, 3, 108, 68, },
+	{ 7, 1, 0, 3, 108, 30, },
+	{ 8, 1, 0, 3, 108, 68, },
+	{ 9, 1, 0, 3, 108, 127, },
 	{ 0, 1, 0, 3, 112, 68, },
 	{ 2, 1, 0, 3, 112, 38, },
+	{ 1, 1, 0, 3, 112, 70, },
+	{ 3, 1, 0, 3, 112, 68, },
+	{ 4, 1, 0, 3, 112, 64, },
+	{ 5, 1, 0, 3, 112, 38, },
+	{ 6, 1, 0, 3, 112, 68, },
+	{ 7, 1, 0, 3, 112, 30, },
+	{ 8, 1, 0, 3, 112, 68, },
+	{ 9, 1, 0, 3, 112, 127, },
 	{ 0, 1, 0, 3, 116, 68, },
 	{ 2, 1, 0, 3, 116, 38, },
+	{ 1, 1, 0, 3, 116, 70, },
+	{ 3, 1, 0, 3, 116, 68, },
+	{ 4, 1, 0, 3, 116, 64, },
+	{ 5, 1, 0, 3, 116, 38, },
+	{ 6, 1, 0, 3, 116, 68, },
+	{ 7, 1, 0, 3, 116, 30, },
+	{ 8, 1, 0, 3, 116, 68, },
+	{ 9, 1, 0, 3, 116, 127, },
 	{ 0, 1, 0, 3, 120, 68, },
 	{ 2, 1, 0, 3, 120, 38, },
+	{ 1, 1, 0, 3, 120, 70, },
+	{ 3, 1, 0, 3, 120, 127, },
+	{ 4, 1, 0, 3, 120, 64, },
+	{ 5, 1, 0, 3, 120, 127, },
+	{ 6, 1, 0, 3, 120, 68, },
+	{ 7, 1, 0, 3, 120, 30, },
+	{ 8, 1, 0, 3, 120, 68, },
+	{ 9, 1, 0, 3, 120, 127, },
 	{ 0, 1, 0, 3, 124, 68, },
 	{ 2, 1, 0, 3, 124, 38, },
+	{ 1, 1, 0, 3, 124, 70, },
+	{ 3, 1, 0, 3, 124, 127, },
+	{ 4, 1, 0, 3, 124, 64, },
+	{ 5, 1, 0, 3, 124, 127, },
+	{ 6, 1, 0, 3, 124, 68, },
+	{ 7, 1, 0, 3, 124, 30, },
+	{ 8, 1, 0, 3, 124, 68, },
+	{ 9, 1, 0, 3, 124, 127, },
 	{ 0, 1, 0, 3, 128, 68, },
 	{ 2, 1, 0, 3, 128, 38, },
+	{ 1, 1, 0, 3, 128, 70, },
+	{ 3, 1, 0, 3, 128, 127, },
+	{ 4, 1, 0, 3, 128, 64, },
+	{ 5, 1, 0, 3, 128, 127, },
+	{ 6, 1, 0, 3, 128, 68, },
+	{ 7, 1, 0, 3, 128, 30, },
+	{ 8, 1, 0, 3, 128, 68, },
+	{ 9, 1, 0, 3, 128, 127, },
 	{ 0, 1, 0, 3, 132, 68, },
 	{ 2, 1, 0, 3, 132, 38, },
+	{ 1, 1, 0, 3, 132, 70, },
+	{ 3, 1, 0, 3, 132, 68, },
+	{ 4, 1, 0, 3, 132, 64, },
+	{ 5, 1, 0, 3, 132, 38, },
+	{ 6, 1, 0, 3, 132, 68, },
+	{ 7, 1, 0, 3, 132, 30, },
+	{ 8, 1, 0, 3, 132, 68, },
+	{ 9, 1, 0, 3, 132, 127, },
 	{ 0, 1, 0, 3, 136, 68, },
 	{ 2, 1, 0, 3, 136, 38, },
+	{ 1, 1, 0, 3, 136, 70, },
+	{ 3, 1, 0, 3, 136, 68, },
+	{ 4, 1, 0, 3, 136, 64, },
+	{ 5, 1, 0, 3, 136, 38, },
+	{ 6, 1, 0, 3, 136, 68, },
+	{ 7, 1, 0, 3, 136, 30, },
+	{ 8, 1, 0, 3, 136, 68, },
+	{ 9, 1, 0, 3, 136, 127, },
 	{ 0, 1, 0, 3, 140, 60, },
 	{ 2, 1, 0, 3, 140, 38, },
+	{ 1, 1, 0, 3, 140, 70, },
+	{ 3, 1, 0, 3, 140, 60, },
+	{ 4, 1, 0, 3, 140, 64, },
+	{ 5, 1, 0, 3, 140, 38, },
+	{ 6, 1, 0, 3, 140, 60, },
+	{ 7, 1, 0, 3, 140, 30, },
+	{ 8, 1, 0, 3, 140, 60, },
+	{ 9, 1, 0, 3, 140, 127, },
 	{ 0, 1, 0, 3, 144, 68, },
 	{ 2, 1, 0, 3, 144, 127, },
+	{ 1, 1, 0, 3, 144, 127, },
+	{ 3, 1, 0, 3, 144, 68, },
+	{ 4, 1, 0, 3, 144, 64, },
+	{ 5, 1, 0, 3, 144, 127, },
+	{ 6, 1, 0, 3, 144, 68, },
+	{ 7, 1, 0, 3, 144, 127, },
+	{ 8, 1, 0, 3, 144, 68, },
+	{ 9, 1, 0, 3, 144, 127, },
 	{ 0, 1, 0, 3, 149, 76, },
 	{ 2, 1, 0, 3, 149, -128, },
+	{ 1, 1, 0, 3, 149, 127, },
+	{ 3, 1, 0, 3, 149, 76, },
+	{ 4, 1, 0, 3, 149, 60, },
+	{ 5, 1, 0, 3, 149, 76, },
+	{ 6, 1, 0, 3, 149, 76, },
+	{ 7, 1, 0, 3, 149, 30, },
+	{ 8, 1, 0, 3, 149, 72, },
+	{ 9, 1, 0, 3, 149, -128, },
 	{ 0, 1, 0, 3, 153, 76, },
 	{ 2, 1, 0, 3, 153, -128, },
+	{ 1, 1, 0, 3, 153, 127, },
+	{ 3, 1, 0, 3, 153, 76, },
+	{ 4, 1, 0, 3, 153, 60, },
+	{ 5, 1, 0, 3, 153, 76, },
+	{ 6, 1, 0, 3, 153, 76, },
+	{ 7, 1, 0, 3, 153, 30, },
+	{ 8, 1, 0, 3, 153, 76, },
+	{ 9, 1, 0, 3, 153, -128, },
 	{ 0, 1, 0, 3, 157, 76, },
 	{ 2, 1, 0, 3, 157, -128, },
+	{ 1, 1, 0, 3, 157, 127, },
+	{ 3, 1, 0, 3, 157, 76, },
+	{ 4, 1, 0, 3, 157, 60, },
+	{ 5, 1, 0, 3, 157, 76, },
+	{ 6, 1, 0, 3, 157, 76, },
+	{ 7, 1, 0, 3, 157, 30, },
+	{ 8, 1, 0, 3, 157, 76, },
+	{ 9, 1, 0, 3, 157, -128, },
 	{ 0, 1, 0, 3, 161, 76, },
 	{ 2, 1, 0, 3, 161, -128, },
+	{ 1, 1, 0, 3, 161, 127, },
+	{ 3, 1, 0, 3, 161, 76, },
+	{ 4, 1, 0, 3, 161, 60, },
+	{ 5, 1, 0, 3, 161, 76, },
+	{ 6, 1, 0, 3, 161, 76, },
+	{ 7, 1, 0, 3, 161, 30, },
+	{ 8, 1, 0, 3, 161, 76, },
+	{ 9, 1, 0, 3, 161, -128, },
 	{ 0, 1, 0, 3, 165, 76, },
 	{ 2, 1, 0, 3, 165, -128, },
+	{ 1, 1, 0, 3, 165, 127, },
+	{ 3, 1, 0, 3, 165, 76, },
+	{ 4, 1, 0, 3, 165, 60, },
+	{ 5, 1, 0, 3, 165, 76, },
+	{ 6, 1, 0, 3, 165, 76, },
+	{ 7, 1, 0, 3, 165, 30, },
+	{ 8, 1, 0, 3, 165, 76, },
+	{ 9, 1, 0, 3, 165, -128, },
 	{ 0, 1, 1, 2, 38, 66, },
 	{ 2, 1, 1, 2, 38, 64, },
+	{ 1, 1, 1, 2, 38, 62, },
+	{ 3, 1, 1, 2, 38, 64, },
+	{ 4, 1, 1, 2, 38, 72, },
+	{ 5, 1, 1, 2, 38, 64, },
+	{ 6, 1, 1, 2, 38, 64, },
+	{ 7, 1, 1, 2, 38, 54, },
+	{ 8, 1, 1, 2, 38, 62, },
+	{ 9, 1, 1, 2, 38, 64, },
 	{ 0, 1, 1, 2, 46, 72, },
 	{ 2, 1, 1, 2, 46, 64, },
+	{ 1, 1, 1, 2, 46, 62, },
+	{ 3, 1, 1, 2, 46, 64, },
+	{ 4, 1, 1, 2, 46, 60, },
+	{ 5, 1, 1, 2, 46, 64, },
+	{ 6, 1, 1, 2, 46, 64, },
+	{ 7, 1, 1, 2, 46, 54, },
+	{ 8, 1, 1, 2, 46, 62, },
+	{ 9, 1, 1, 2, 46, 64, },
 	{ 0, 1, 1, 2, 54, 72, },
 	{ 2, 1, 1, 2, 54, 64, },
+	{ 1, 1, 1, 2, 54, 62, },
+	{ 3, 1, 1, 2, 54, 64, },
+	{ 4, 1, 1, 2, 54, 72, },
+	{ 5, 1, 1, 2, 54, 64, },
+	{ 6, 1, 1, 2, 54, 72, },
+	{ 7, 1, 1, 2, 54, 54, },
+	{ 8, 1, 1, 2, 54, 72, },
+	{ 9, 1, 1, 2, 54, 64, },
 	{ 0, 1, 1, 2, 62, 64, },
 	{ 2, 1, 1, 2, 62, 64, },
+	{ 1, 1, 1, 2, 62, 62, },
+	{ 3, 1, 1, 2, 62, 64, },
+	{ 4, 1, 1, 2, 62, 70, },
+	{ 5, 1, 1, 2, 62, 64, },
+	{ 6, 1, 1, 2, 62, 64, },
+	{ 7, 1, 1, 2, 62, 54, },
+	{ 8, 1, 1, 2, 62, 64, },
+	{ 9, 1, 1, 2, 62, 64, },
 	{ 0, 1, 1, 2, 102, 58, },
 	{ 2, 1, 1, 2, 102, 64, },
+	{ 1, 1, 1, 2, 102, 72, },
+	{ 3, 1, 1, 2, 102, 58, },
+	{ 4, 1, 1, 2, 102, 72, },
+	{ 5, 1, 1, 2, 102, 64, },
+	{ 6, 1, 1, 2, 102, 58, },
+	{ 7, 1, 1, 2, 102, 54, },
+	{ 8, 1, 1, 2, 102, 58, },
+	{ 9, 1, 1, 2, 102, 127, },
 	{ 0, 1, 1, 2, 110, 72, },
 	{ 2, 1, 1, 2, 110, 64, },
+	{ 1, 1, 1, 2, 110, 72, },
+	{ 3, 1, 1, 2, 110, 72, },
+	{ 4, 1, 1, 2, 110, 72, },
+	{ 5, 1, 1, 2, 110, 64, },
+	{ 6, 1, 1, 2, 110, 72, },
+	{ 7, 1, 1, 2, 110, 54, },
+	{ 8, 1, 1, 2, 110, 72, },
+	{ 9, 1, 1, 2, 110, 127, },
 	{ 0, 1, 1, 2, 118, 72, },
 	{ 2, 1, 1, 2, 118, 64, },
+	{ 1, 1, 1, 2, 118, 72, },
+	{ 3, 1, 1, 2, 118, 127, },
+	{ 4, 1, 1, 2, 118, 72, },
+	{ 5, 1, 1, 2, 118, 127, },
+	{ 6, 1, 1, 2, 118, 72, },
+	{ 7, 1, 1, 2, 118, 54, },
+	{ 8, 1, 1, 2, 118, 72, },
+	{ 9, 1, 1, 2, 118, 127, },
 	{ 0, 1, 1, 2, 126, 72, },
 	{ 2, 1, 1, 2, 126, 64, },
+	{ 1, 1, 1, 2, 126, 72, },
+	{ 3, 1, 1, 2, 126, 127, },
+	{ 4, 1, 1, 2, 126, 72, },
+	{ 5, 1, 1, 2, 126, 127, },
+	{ 6, 1, 1, 2, 126, 72, },
+	{ 7, 1, 1, 2, 126, 54, },
+	{ 8, 1, 1, 2, 126, 72, },
+	{ 9, 1, 1, 2, 126, 127, },
 	{ 0, 1, 1, 2, 134, 72, },
 	{ 2, 1, 1, 2, 134, 64, },
+	{ 1, 1, 1, 2, 134, 72, },
+	{ 3, 1, 1, 2, 134, 72, },
+	{ 4, 1, 1, 2, 134, 72, },
+	{ 5, 1, 1, 2, 134, 64, },
+	{ 6, 1, 1, 2, 134, 72, },
+	{ 7, 1, 1, 2, 134, 54, },
+	{ 8, 1, 1, 2, 134, 72, },
+	{ 9, 1, 1, 2, 134, 127, },
 	{ 0, 1, 1, 2, 142, 72, },
 	{ 2, 1, 1, 2, 142, 127, },
+	{ 1, 1, 1, 2, 142, 127, },
+	{ 3, 1, 1, 2, 142, 72, },
+	{ 4, 1, 1, 2, 142, 72, },
+	{ 5, 1, 1, 2, 142, 127, },
+	{ 6, 1, 1, 2, 142, 72, },
+	{ 7, 1, 1, 2, 142, 127, },
+	{ 8, 1, 1, 2, 142, 72, },
+	{ 9, 1, 1, 2, 142, 127, },
 	{ 0, 1, 1, 2, 151, 72, },
 	{ 2, 1, 1, 2, 151, -128, },
+	{ 1, 1, 1, 2, 151, 127, },
+	{ 3, 1, 1, 2, 151, 72, },
+	{ 4, 1, 1, 2, 151, 72, },
+	{ 5, 1, 1, 2, 151, 72, },
+	{ 6, 1, 1, 2, 151, 72, },
+	{ 7, 1, 1, 2, 151, 54, },
+	{ 8, 1, 1, 2, 151, 72, },
+	{ 9, 1, 1, 2, 151, -128, },
 	{ 0, 1, 1, 2, 159, 72, },
 	{ 2, 1, 1, 2, 159, -128, },
+	{ 1, 1, 1, 2, 159, 127, },
+	{ 3, 1, 1, 2, 159, 72, },
+	{ 4, 1, 1, 2, 159, 72, },
+	{ 5, 1, 1, 2, 159, 72, },
+	{ 6, 1, 1, 2, 159, 72, },
+	{ 7, 1, 1, 2, 159, 54, },
+	{ 8, 1, 1, 2, 159, 72, },
+	{ 9, 1, 1, 2, 159, -128, },
 	{ 0, 1, 1, 3, 38, 60, },
 	{ 2, 1, 1, 3, 38, 40, },
+	{ 1, 1, 1, 3, 38, 50, },
+	{ 3, 1, 1, 3, 38, 40, },
+	{ 4, 1, 1, 3, 38, 62, },
+	{ 5, 1, 1, 3, 38, 40, },
+	{ 6, 1, 1, 3, 38, 52, },
+	{ 7, 1, 1, 3, 38, 30, },
+	{ 8, 1, 1, 3, 38, 50, },
+	{ 9, 1, 1, 3, 38, 40, },
 	{ 0, 1, 1, 3, 46, 68, },
 	{ 2, 1, 1, 3, 46, 40, },
+	{ 1, 1, 1, 3, 46, 50, },
+	{ 3, 1, 1, 3, 46, 40, },
+	{ 4, 1, 1, 3, 46, 46, },
+	{ 5, 1, 1, 3, 46, 40, },
+	{ 6, 1, 1, 3, 46, 52, },
+	{ 7, 1, 1, 3, 46, 30, },
+	{ 8, 1, 1, 3, 46, 50, },
+	{ 9, 1, 1, 3, 46, 40, },
 	{ 0, 1, 1, 3, 54, 68, },
 	{ 2, 1, 1, 3, 54, 40, },
+	{ 1, 1, 1, 3, 54, 50, },
+	{ 3, 1, 1, 3, 54, 40, },
+	{ 4, 1, 1, 3, 54, 62, },
+	{ 5, 1, 1, 3, 54, 40, },
+	{ 6, 1, 1, 3, 54, 68, },
+	{ 7, 1, 1, 3, 54, 30, },
+	{ 8, 1, 1, 3, 54, 68, },
+	{ 9, 1, 1, 3, 54, 40, },
 	{ 0, 1, 1, 3, 62, 58, },
 	{ 2, 1, 1, 3, 62, 40, },
+	{ 1, 1, 1, 3, 62, 48, },
+	{ 3, 1, 1, 3, 62, 40, },
+	{ 4, 1, 1, 3, 62, 58, },
+	{ 5, 1, 1, 3, 62, 40, },
+	{ 6, 1, 1, 3, 62, 58, },
+	{ 7, 1, 1, 3, 62, 30, },
+	{ 8, 1, 1, 3, 62, 58, },
+	{ 9, 1, 1, 3, 62, 40, },
 	{ 0, 1, 1, 3, 102, 54, },
 	{ 2, 1, 1, 3, 102, 40, },
+	{ 1, 1, 1, 3, 102, 70, },
+	{ 3, 1, 1, 3, 102, 54, },
+	{ 4, 1, 1, 3, 102, 64, },
+	{ 5, 1, 1, 3, 102, 40, },
+	{ 6, 1, 1, 3, 102, 54, },
+	{ 7, 1, 1, 3, 102, 30, },
+	{ 8, 1, 1, 3, 102, 54, },
+	{ 9, 1, 1, 3, 102, 127, },
 	{ 0, 1, 1, 3, 110, 68, },
 	{ 2, 1, 1, 3, 110, 40, },
+	{ 1, 1, 1, 3, 110, 70, },
+	{ 3, 1, 1, 3, 110, 68, },
+	{ 4, 1, 1, 3, 110, 64, },
+	{ 5, 1, 1, 3, 110, 40, },
+	{ 6, 1, 1, 3, 110, 68, },
+	{ 7, 1, 1, 3, 110, 30, },
+	{ 8, 1, 1, 3, 110, 68, },
+	{ 9, 1, 1, 3, 110, 127, },
 	{ 0, 1, 1, 3, 118, 68, },
 	{ 2, 1, 1, 3, 118, 40, },
+	{ 1, 1, 1, 3, 118, 70, },
+	{ 3, 1, 1, 3, 118, 127, },
+	{ 4, 1, 1, 3, 118, 64, },
+	{ 5, 1, 1, 3, 118, 127, },
+	{ 6, 1, 1, 3, 118, 68, },
+	{ 7, 1, 1, 3, 118, 30, },
+	{ 8, 1, 1, 3, 118, 68, },
+	{ 9, 1, 1, 3, 118, 127, },
 	{ 0, 1, 1, 3, 126, 68, },
 	{ 2, 1, 1, 3, 126, 40, },
+	{ 1, 1, 1, 3, 126, 70, },
+	{ 3, 1, 1, 3, 126, 127, },
+	{ 4, 1, 1, 3, 126, 64, },
+	{ 5, 1, 1, 3, 126, 127, },
+	{ 6, 1, 1, 3, 126, 68, },
+	{ 7, 1, 1, 3, 126, 30, },
+	{ 8, 1, 1, 3, 126, 68, },
+	{ 9, 1, 1, 3, 126, 127, },
 	{ 0, 1, 1, 3, 134, 68, },
 	{ 2, 1, 1, 3, 134, 40, },
+	{ 1, 1, 1, 3, 134, 70, },
+	{ 3, 1, 1, 3, 134, 68, },
+	{ 4, 1, 1, 3, 134, 64, },
+	{ 5, 1, 1, 3, 134, 40, },
+	{ 6, 1, 1, 3, 134, 68, },
+	{ 7, 1, 1, 3, 134, 30, },
+	{ 8, 1, 1, 3, 134, 68, },
+	{ 9, 1, 1, 3, 134, 127, },
 	{ 0, 1, 1, 3, 142, 68, },
 	{ 2, 1, 1, 3, 142, 127, },
+	{ 1, 1, 1, 3, 142, 127, },
+	{ 3, 1, 1, 3, 142, 68, },
+	{ 4, 1, 1, 3, 142, 64, },
+	{ 5, 1, 1, 3, 142, 127, },
+	{ 6, 1, 1, 3, 142, 68, },
+	{ 7, 1, 1, 3, 142, 127, },
+	{ 8, 1, 1, 3, 142, 68, },
+	{ 9, 1, 1, 3, 142, 127, },
 	{ 0, 1, 1, 3, 151, 72, },
 	{ 2, 1, 1, 3, 151, -128, },
+	{ 1, 1, 1, 3, 151, 127, },
+	{ 3, 1, 1, 3, 151, 72, },
+	{ 4, 1, 1, 3, 151, 66, },
+	{ 5, 1, 1, 3, 151, 72, },
+	{ 6, 1, 1, 3, 151, 72, },
+	{ 7, 1, 1, 3, 151, 30, },
+	{ 8, 1, 1, 3, 151, 68, },
+	{ 9, 1, 1, 3, 151, -128, },
 	{ 0, 1, 1, 3, 159, 72, },
 	{ 2, 1, 1, 3, 159, -128, },
+	{ 1, 1, 1, 3, 159, 127, },
+	{ 3, 1, 1, 3, 159, 72, },
+	{ 4, 1, 1, 3, 159, 66, },
+	{ 5, 1, 1, 3, 159, 72, },
+	{ 6, 1, 1, 3, 159, 72, },
+	{ 7, 1, 1, 3, 159, 30, },
+	{ 8, 1, 1, 3, 159, 72, },
+	{ 9, 1, 1, 3, 159, -128, },
 	{ 0, 1, 2, 4, 42, 64, },
 	{ 2, 1, 2, 4, 42, 64, },
+	{ 1, 1, 2, 4, 42, 64, },
+	{ 3, 1, 2, 4, 42, 64, },
+	{ 4, 1, 2, 4, 42, 68, },
+	{ 5, 1, 2, 4, 42, 64, },
+	{ 6, 1, 2, 4, 42, 64, },
+	{ 7, 1, 2, 4, 42, 54, },
+	{ 8, 1, 2, 4, 42, 62, },
+	{ 9, 1, 2, 4, 42, 64, },
 	{ 0, 1, 2, 4, 58, 62, },
 	{ 2, 1, 2, 4, 58, 64, },
+	{ 1, 1, 2, 4, 58, 64, },
+	{ 3, 1, 2, 4, 58, 62, },
+	{ 4, 1, 2, 4, 58, 64, },
+	{ 5, 1, 2, 4, 58, 64, },
+	{ 6, 1, 2, 4, 58, 62, },
+	{ 7, 1, 2, 4, 58, 54, },
+	{ 8, 1, 2, 4, 58, 62, },
+	{ 9, 1, 2, 4, 58, 64, },
 	{ 0, 1, 2, 4, 106, 58, },
 	{ 2, 1, 2, 4, 106, 64, },
+	{ 1, 1, 2, 4, 106, 72, },
+	{ 3, 1, 2, 4, 106, 58, },
+	{ 4, 1, 2, 4, 106, 66, },
+	{ 5, 1, 2, 4, 106, 64, },
+	{ 6, 1, 2, 4, 106, 58, },
+	{ 7, 1, 2, 4, 106, 54, },
+	{ 8, 1, 2, 4, 106, 58, },
+	{ 9, 1, 2, 4, 106, 127, },
 	{ 0, 1, 2, 4, 122, 72, },
 	{ 2, 1, 2, 4, 122, 64, },
+	{ 1, 1, 2, 4, 122, 72, },
+	{ 3, 1, 2, 4, 122, 127, },
+	{ 4, 1, 2, 4, 122, 68, },
+	{ 5, 1, 2, 4, 122, 127, },
+	{ 6, 1, 2, 4, 122, 72, },
+	{ 7, 1, 2, 4, 122, 54, },
+	{ 8, 1, 2, 4, 122, 72, },
+	{ 9, 1, 2, 4, 122, 127, },
 	{ 0, 1, 2, 4, 138, 72, },
 	{ 2, 1, 2, 4, 138, 127, },
+	{ 1, 1, 2, 4, 138, 127, },
+	{ 3, 1, 2, 4, 138, 72, },
+	{ 4, 1, 2, 4, 138, 68, },
+	{ 5, 1, 2, 4, 138, 127, },
+	{ 6, 1, 2, 4, 138, 72, },
+	{ 7, 1, 2, 4, 138, 127, },
+	{ 8, 1, 2, 4, 138, 72, },
+	{ 9, 1, 2, 4, 138, 127, },
 	{ 0, 1, 2, 4, 155, 72, },
 	{ 2, 1, 2, 4, 155, -128, },
+	{ 1, 1, 2, 4, 155, 127, },
+	{ 3, 1, 2, 4, 155, 72, },
+	{ 4, 1, 2, 4, 155, 68, },
+	{ 5, 1, 2, 4, 155, 72, },
+	{ 6, 1, 2, 4, 155, 72, },
+	{ 7, 1, 2, 4, 155, 54, },
+	{ 8, 1, 2, 4, 155, 68, },
+	{ 9, 1, 2, 4, 155, -128, },
 	{ 0, 1, 2, 5, 42, 54, },
 	{ 2, 1, 2, 5, 42, 40, },
+	{ 1, 1, 2, 5, 42, 50, },
+	{ 3, 1, 2, 5, 42, 40, },
+	{ 4, 1, 2, 5, 42, 58, },
+	{ 5, 1, 2, 5, 42, 40, },
+	{ 6, 1, 2, 5, 42, 52, },
+	{ 7, 1, 2, 5, 42, 30, },
+	{ 8, 1, 2, 5, 42, 50, },
+	{ 9, 1, 2, 5, 42, 40, },
 	{ 0, 1, 2, 5, 58, 52, },
 	{ 2, 1, 2, 5, 58, 40, },
+	{ 1, 1, 2, 5, 58, 50, },
+	{ 3, 1, 2, 5, 58, 40, },
+	{ 4, 1, 2, 5, 58, 56, },
+	{ 5, 1, 2, 5, 58, 40, },
+	{ 6, 1, 2, 5, 58, 52, },
+	{ 7, 1, 2, 5, 58, 30, },
+	{ 8, 1, 2, 5, 58, 52, },
+	{ 9, 1, 2, 5, 58, 40, },
 	{ 0, 1, 2, 5, 106, 50, },
 	{ 2, 1, 2, 5, 106, 40, },
+	{ 1, 1, 2, 5, 106, 72, },
+	{ 3, 1, 2, 5, 106, 50, },
+	{ 4, 1, 2, 5, 106, 56, },
+	{ 5, 1, 2, 5, 106, 40, },
+	{ 6, 1, 2, 5, 106, 50, },
+	{ 7, 1, 2, 5, 106, 30, },
+	{ 8, 1, 2, 5, 106, 50, },
+	{ 9, 1, 2, 5, 106, 127, },
 	{ 0, 1, 2, 5, 122, 66, },
 	{ 2, 1, 2, 5, 122, 40, },
+	{ 1, 1, 2, 5, 122, 72, },
+	{ 3, 1, 2, 5, 122, 127, },
+	{ 4, 1, 2, 5, 122, 56, },
+	{ 5, 1, 2, 5, 122, 127, },
+	{ 6, 1, 2, 5, 122, 66, },
+	{ 7, 1, 2, 5, 122, 30, },
+	{ 8, 1, 2, 5, 122, 66, },
+	{ 9, 1, 2, 5, 122, 127, },
 	{ 0, 1, 2, 5, 138, 66, },
 	{ 2, 1, 2, 5, 138, 127, },
+	{ 1, 1, 2, 5, 138, 127, },
+	{ 3, 1, 2, 5, 138, 66, },
+	{ 4, 1, 2, 5, 138, 58, },
+	{ 5, 1, 2, 5, 138, 127, },
+	{ 6, 1, 2, 5, 138, 66, },
+	{ 7, 1, 2, 5, 138, 127, },
+	{ 8, 1, 2, 5, 138, 66, },
+	{ 9, 1, 2, 5, 138, 127, },
 	{ 0, 1, 2, 5, 155, 62, },
 	{ 2, 1, 2, 5, 155, -128, },
+	{ 1, 1, 2, 5, 155, 127, },
+	{ 3, 1, 2, 5, 155, 62, },
+	{ 4, 1, 2, 5, 155, 58, },
+	{ 5, 1, 2, 5, 155, 72, },
+	{ 6, 1, 2, 5, 155, 62, },
+	{ 7, 1, 2, 5, 155, 30, },
+	{ 8, 1, 2, 5, 155, 62, },
+	{ 9, 1, 2, 5, 155, -128, },
+};
+
+RTW_DECL_TABLE_TXPWR_LMT(rtw8822c_txpwr_lmt_type0);
+
+static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type5[] = {
+	{ 0, 0, 0, 0, 1, 72, },
+	{ 2, 0, 0, 0, 1, 60, },
 	{ 1, 0, 0, 0, 1, 68, },
 	{ 3, 0, 0, 0, 1, 72, },
 	{ 4, 0, 0, 0, 1, 76, },
@@ -12112,6 +25743,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 0, 1, 72, },
 	{ 7, 0, 0, 0, 1, 60, },
 	{ 8, 0, 0, 0, 1, 72, },
+	{ 9, 0, 0, 0, 1, 60, },
+	{ 0, 0, 0, 0, 2, 72, },
+	{ 2, 0, 0, 0, 2, 60, },
 	{ 1, 0, 0, 0, 2, 68, },
 	{ 3, 0, 0, 0, 2, 72, },
 	{ 4, 0, 0, 0, 2, 76, },
@@ -12119,6 +25753,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 0, 2, 72, },
 	{ 7, 0, 0, 0, 2, 60, },
 	{ 8, 0, 0, 0, 2, 72, },
+	{ 9, 0, 0, 0, 2, 60, },
+	{ 0, 0, 0, 0, 3, 76, },
+	{ 2, 0, 0, 0, 3, 60, },
 	{ 1, 0, 0, 0, 3, 68, },
 	{ 3, 0, 0, 0, 3, 76, },
 	{ 4, 0, 0, 0, 3, 76, },
@@ -12126,6 +25763,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 0, 3, 76, },
 	{ 7, 0, 0, 0, 3, 60, },
 	{ 8, 0, 0, 0, 3, 76, },
+	{ 9, 0, 0, 0, 3, 60, },
+	{ 0, 0, 0, 0, 4, 76, },
+	{ 2, 0, 0, 0, 4, 60, },
 	{ 1, 0, 0, 0, 4, 68, },
 	{ 3, 0, 0, 0, 4, 76, },
 	{ 4, 0, 0, 0, 4, 76, },
@@ -12133,6 +25773,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 0, 4, 76, },
 	{ 7, 0, 0, 0, 4, 60, },
 	{ 8, 0, 0, 0, 4, 76, },
+	{ 9, 0, 0, 0, 4, 60, },
+	{ 0, 0, 0, 0, 5, 76, },
+	{ 2, 0, 0, 0, 5, 60, },
 	{ 1, 0, 0, 0, 5, 68, },
 	{ 3, 0, 0, 0, 5, 76, },
 	{ 4, 0, 0, 0, 5, 76, },
@@ -12140,6 +25783,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 0, 5, 76, },
 	{ 7, 0, 0, 0, 5, 60, },
 	{ 8, 0, 0, 0, 5, 76, },
+	{ 9, 0, 0, 0, 5, 60, },
+	{ 0, 0, 0, 0, 6, 76, },
+	{ 2, 0, 0, 0, 6, 60, },
 	{ 1, 0, 0, 0, 6, 68, },
 	{ 3, 0, 0, 0, 6, 76, },
 	{ 4, 0, 0, 0, 6, 76, },
@@ -12147,6 +25793,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 0, 6, 76, },
 	{ 7, 0, 0, 0, 6, 60, },
 	{ 8, 0, 0, 0, 6, 76, },
+	{ 9, 0, 0, 0, 6, 60, },
+	{ 0, 0, 0, 0, 7, 76, },
+	{ 2, 0, 0, 0, 7, 60, },
 	{ 1, 0, 0, 0, 7, 68, },
 	{ 3, 0, 0, 0, 7, 76, },
 	{ 4, 0, 0, 0, 7, 76, },
@@ -12154,6 +25803,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 0, 7, 76, },
 	{ 7, 0, 0, 0, 7, 60, },
 	{ 8, 0, 0, 0, 7, 76, },
+	{ 9, 0, 0, 0, 7, 60, },
+	{ 0, 0, 0, 0, 8, 76, },
+	{ 2, 0, 0, 0, 8, 60, },
 	{ 1, 0, 0, 0, 8, 68, },
 	{ 3, 0, 0, 0, 8, 76, },
 	{ 4, 0, 0, 0, 8, 76, },
@@ -12161,6 +25813,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 0, 8, 76, },
 	{ 7, 0, 0, 0, 8, 60, },
 	{ 8, 0, 0, 0, 8, 76, },
+	{ 9, 0, 0, 0, 8, 60, },
+	{ 0, 0, 0, 0, 9, 76, },
+	{ 2, 0, 0, 0, 9, 60, },
 	{ 1, 0, 0, 0, 9, 68, },
 	{ 3, 0, 0, 0, 9, 76, },
 	{ 4, 0, 0, 0, 9, 76, },
@@ -12168,6 +25823,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 0, 9, 76, },
 	{ 7, 0, 0, 0, 9, 60, },
 	{ 8, 0, 0, 0, 9, 76, },
+	{ 9, 0, 0, 0, 9, 60, },
+	{ 0, 0, 0, 0, 10, 72, },
+	{ 2, 0, 0, 0, 10, 60, },
 	{ 1, 0, 0, 0, 10, 68, },
 	{ 3, 0, 0, 0, 10, 72, },
 	{ 4, 0, 0, 0, 10, 76, },
@@ -12175,6 +25833,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 0, 10, 72, },
 	{ 7, 0, 0, 0, 10, 60, },
 	{ 8, 0, 0, 0, 10, 72, },
+	{ 9, 0, 0, 0, 10, 60, },
+	{ 0, 0, 0, 0, 11, 72, },
+	{ 2, 0, 0, 0, 11, 60, },
 	{ 1, 0, 0, 0, 11, 68, },
 	{ 3, 0, 0, 0, 11, 72, },
 	{ 4, 0, 0, 0, 11, 76, },
@@ -12182,6 +25843,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 0, 11, 72, },
 	{ 7, 0, 0, 0, 11, 60, },
 	{ 8, 0, 0, 0, 11, 72, },
+	{ 9, 0, 0, 0, 11, 60, },
+	{ 0, 0, 0, 0, 12, 52, },
+	{ 2, 0, 0, 0, 12, 60, },
 	{ 1, 0, 0, 0, 12, 68, },
 	{ 3, 0, 0, 0, 12, 52, },
 	{ 4, 0, 0, 0, 12, 76, },
@@ -12189,6 +25853,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 0, 12, 52, },
 	{ 7, 0, 0, 0, 12, 60, },
 	{ 8, 0, 0, 0, 12, 52, },
+	{ 9, 0, 0, 0, 12, 60, },
+	{ 0, 0, 0, 0, 13, 48, },
+	{ 2, 0, 0, 0, 13, 60, },
 	{ 1, 0, 0, 0, 13, 68, },
 	{ 3, 0, 0, 0, 13, 48, },
 	{ 4, 0, 0, 0, 13, 76, },
@@ -12196,6 +25863,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 0, 13, 48, },
 	{ 7, 0, 0, 0, 13, 60, },
 	{ 8, 0, 0, 0, 13, 48, },
+	{ 9, 0, 0, 0, 13, 60, },
+	{ 0, 0, 0, 0, 14, 127, },
+	{ 2, 0, 0, 0, 14, 127, },
 	{ 1, 0, 0, 0, 14, 68, },
 	{ 3, 0, 0, 0, 14, 127, },
 	{ 4, 0, 0, 0, 14, 127, },
@@ -12203,6 +25873,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 0, 14, 127, },
 	{ 7, 0, 0, 0, 14, 127, },
 	{ 8, 0, 0, 0, 14, 127, },
+	{ 9, 0, 0, 0, 14, 127, },
+	{ 0, 0, 0, 1, 1, 52, },
+	{ 2, 0, 0, 1, 1, 60, },
 	{ 1, 0, 0, 1, 1, 76, },
 	{ 3, 0, 0, 1, 1, 52, },
 	{ 4, 0, 0, 1, 1, 76, },
@@ -12210,6 +25883,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 1, 1, 52, },
 	{ 7, 0, 0, 1, 1, 60, },
 	{ 8, 0, 0, 1, 1, 52, },
+	{ 9, 0, 0, 1, 1, 60, },
+	{ 0, 0, 0, 1, 2, 60, },
+	{ 2, 0, 0, 1, 2, 60, },
 	{ 1, 0, 0, 1, 2, 76, },
 	{ 3, 0, 0, 1, 2, 60, },
 	{ 4, 0, 0, 1, 2, 76, },
@@ -12217,6 +25893,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 1, 2, 60, },
 	{ 7, 0, 0, 1, 2, 60, },
 	{ 8, 0, 0, 1, 2, 60, },
+	{ 9, 0, 0, 1, 2, 60, },
+	{ 0, 0, 0, 1, 3, 64, },
+	{ 2, 0, 0, 1, 3, 60, },
 	{ 1, 0, 0, 1, 3, 76, },
 	{ 3, 0, 0, 1, 3, 64, },
 	{ 4, 0, 0, 1, 3, 76, },
@@ -12224,6 +25903,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 1, 3, 64, },
 	{ 7, 0, 0, 1, 3, 60, },
 	{ 8, 0, 0, 1, 3, 64, },
+	{ 9, 0, 0, 1, 3, 60, },
+	{ 0, 0, 0, 1, 4, 68, },
+	{ 2, 0, 0, 1, 4, 60, },
 	{ 1, 0, 0, 1, 4, 76, },
 	{ 3, 0, 0, 1, 4, 68, },
 	{ 4, 0, 0, 1, 4, 76, },
@@ -12231,6 +25913,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 1, 4, 68, },
 	{ 7, 0, 0, 1, 4, 60, },
 	{ 8, 0, 0, 1, 4, 68, },
+	{ 9, 0, 0, 1, 4, 60, },
+	{ 0, 0, 0, 1, 5, 76, },
+	{ 2, 0, 0, 1, 5, 60, },
 	{ 1, 0, 0, 1, 5, 76, },
 	{ 3, 0, 0, 1, 5, 76, },
 	{ 4, 0, 0, 1, 5, 76, },
@@ -12238,6 +25923,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 1, 5, 76, },
 	{ 7, 0, 0, 1, 5, 60, },
 	{ 8, 0, 0, 1, 5, 76, },
+	{ 9, 0, 0, 1, 5, 60, },
+	{ 0, 0, 0, 1, 6, 76, },
+	{ 2, 0, 0, 1, 6, 60, },
 	{ 1, 0, 0, 1, 6, 76, },
 	{ 3, 0, 0, 1, 6, 76, },
 	{ 4, 0, 0, 1, 6, 76, },
@@ -12245,6 +25933,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 1, 6, 76, },
 	{ 7, 0, 0, 1, 6, 60, },
 	{ 8, 0, 0, 1, 6, 76, },
+	{ 9, 0, 0, 1, 6, 60, },
+	{ 0, 0, 0, 1, 7, 76, },
+	{ 2, 0, 0, 1, 7, 60, },
 	{ 1, 0, 0, 1, 7, 76, },
 	{ 3, 0, 0, 1, 7, 76, },
 	{ 4, 0, 0, 1, 7, 76, },
@@ -12252,6 +25943,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 1, 7, 76, },
 	{ 7, 0, 0, 1, 7, 60, },
 	{ 8, 0, 0, 1, 7, 76, },
+	{ 9, 0, 0, 1, 7, 60, },
+	{ 0, 0, 0, 1, 8, 68, },
+	{ 2, 0, 0, 1, 8, 60, },
 	{ 1, 0, 0, 1, 8, 76, },
 	{ 3, 0, 0, 1, 8, 68, },
 	{ 4, 0, 0, 1, 8, 76, },
@@ -12259,6 +25953,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 1, 8, 68, },
 	{ 7, 0, 0, 1, 8, 60, },
 	{ 8, 0, 0, 1, 8, 68, },
+	{ 9, 0, 0, 1, 8, 60, },
+	{ 0, 0, 0, 1, 9, 64, },
+	{ 2, 0, 0, 1, 9, 60, },
 	{ 1, 0, 0, 1, 9, 76, },
 	{ 3, 0, 0, 1, 9, 64, },
 	{ 4, 0, 0, 1, 9, 76, },
@@ -12266,6 +25963,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 1, 9, 64, },
 	{ 7, 0, 0, 1, 9, 60, },
 	{ 8, 0, 0, 1, 9, 64, },
+	{ 9, 0, 0, 1, 9, 60, },
+	{ 0, 0, 0, 1, 10, 60, },
+	{ 2, 0, 0, 1, 10, 60, },
 	{ 1, 0, 0, 1, 10, 76, },
 	{ 3, 0, 0, 1, 10, 60, },
 	{ 4, 0, 0, 1, 10, 76, },
@@ -12273,6 +25973,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 1, 10, 60, },
 	{ 7, 0, 0, 1, 10, 60, },
 	{ 8, 0, 0, 1, 10, 60, },
+	{ 9, 0, 0, 1, 10, 60, },
+	{ 0, 0, 0, 1, 11, 52, },
+	{ 2, 0, 0, 1, 11, 60, },
 	{ 1, 0, 0, 1, 11, 76, },
 	{ 3, 0, 0, 1, 11, 52, },
 	{ 4, 0, 0, 1, 11, 76, },
@@ -12280,6 +25983,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 1, 11, 52, },
 	{ 7, 0, 0, 1, 11, 60, },
 	{ 8, 0, 0, 1, 11, 52, },
+	{ 9, 0, 0, 1, 11, 60, },
+	{ 0, 0, 0, 1, 12, 40, },
+	{ 2, 0, 0, 1, 12, 60, },
 	{ 1, 0, 0, 1, 12, 76, },
 	{ 3, 0, 0, 1, 12, 40, },
 	{ 4, 0, 0, 1, 12, 76, },
@@ -12287,6 +25993,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 1, 12, 40, },
 	{ 7, 0, 0, 1, 12, 60, },
 	{ 8, 0, 0, 1, 12, 40, },
+	{ 9, 0, 0, 1, 12, 60, },
+	{ 0, 0, 0, 1, 13, 28, },
+	{ 2, 0, 0, 1, 13, 60, },
 	{ 1, 0, 0, 1, 13, 76, },
 	{ 3, 0, 0, 1, 13, 28, },
 	{ 4, 0, 0, 1, 13, 70, },
@@ -12294,6 +26003,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 1, 13, 28, },
 	{ 7, 0, 0, 1, 13, 60, },
 	{ 8, 0, 0, 1, 13, 28, },
+	{ 9, 0, 0, 1, 13, 60, },
+	{ 0, 0, 0, 1, 14, 127, },
+	{ 2, 0, 0, 1, 14, 127, },
 	{ 1, 0, 0, 1, 14, 127, },
 	{ 3, 0, 0, 1, 14, 127, },
 	{ 4, 0, 0, 1, 14, 127, },
@@ -12301,6 +26013,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 1, 14, 127, },
 	{ 7, 0, 0, 1, 14, 127, },
 	{ 8, 0, 0, 1, 14, 127, },
+	{ 9, 0, 0, 1, 14, 127, },
+	{ 0, 0, 0, 2, 1, 52, },
+	{ 2, 0, 0, 2, 1, 60, },
 	{ 1, 0, 0, 2, 1, 76, },
 	{ 3, 0, 0, 2, 1, 52, },
 	{ 4, 0, 0, 2, 1, 76, },
@@ -12308,6 +26023,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 2, 1, 52, },
 	{ 7, 0, 0, 2, 1, 60, },
 	{ 8, 0, 0, 2, 1, 52, },
+	{ 9, 0, 0, 2, 1, 60, },
+	{ 0, 0, 0, 2, 2, 60, },
+	{ 2, 0, 0, 2, 2, 60, },
 	{ 1, 0, 0, 2, 2, 76, },
 	{ 3, 0, 0, 2, 2, 60, },
 	{ 4, 0, 0, 2, 2, 76, },
@@ -12315,6 +26033,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 2, 2, 60, },
 	{ 7, 0, 0, 2, 2, 60, },
 	{ 8, 0, 0, 2, 2, 60, },
+	{ 9, 0, 0, 2, 2, 60, },
+	{ 0, 0, 0, 2, 3, 64, },
+	{ 2, 0, 0, 2, 3, 60, },
 	{ 1, 0, 0, 2, 3, 76, },
 	{ 3, 0, 0, 2, 3, 64, },
 	{ 4, 0, 0, 2, 3, 76, },
@@ -12322,6 +26043,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 2, 3, 64, },
 	{ 7, 0, 0, 2, 3, 60, },
 	{ 8, 0, 0, 2, 3, 64, },
+	{ 9, 0, 0, 2, 3, 60, },
+	{ 0, 0, 0, 2, 4, 68, },
+	{ 2, 0, 0, 2, 4, 60, },
 	{ 1, 0, 0, 2, 4, 76, },
 	{ 3, 0, 0, 2, 4, 68, },
 	{ 4, 0, 0, 2, 4, 76, },
@@ -12329,6 +26053,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 2, 4, 68, },
 	{ 7, 0, 0, 2, 4, 60, },
 	{ 8, 0, 0, 2, 4, 68, },
+	{ 9, 0, 0, 2, 4, 60, },
+	{ 0, 0, 0, 2, 5, 76, },
+	{ 2, 0, 0, 2, 5, 60, },
 	{ 1, 0, 0, 2, 5, 76, },
 	{ 3, 0, 0, 2, 5, 76, },
 	{ 4, 0, 0, 2, 5, 76, },
@@ -12336,6 +26063,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 2, 5, 76, },
 	{ 7, 0, 0, 2, 5, 60, },
 	{ 8, 0, 0, 2, 5, 76, },
+	{ 9, 0, 0, 2, 5, 60, },
+	{ 0, 0, 0, 2, 6, 76, },
+	{ 2, 0, 0, 2, 6, 60, },
 	{ 1, 0, 0, 2, 6, 76, },
 	{ 3, 0, 0, 2, 6, 76, },
 	{ 4, 0, 0, 2, 6, 76, },
@@ -12343,6 +26073,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 2, 6, 76, },
 	{ 7, 0, 0, 2, 6, 60, },
 	{ 8, 0, 0, 2, 6, 76, },
+	{ 9, 0, 0, 2, 6, 60, },
+	{ 0, 0, 0, 2, 7, 76, },
+	{ 2, 0, 0, 2, 7, 60, },
 	{ 1, 0, 0, 2, 7, 76, },
 	{ 3, 0, 0, 2, 7, 76, },
 	{ 4, 0, 0, 2, 7, 76, },
@@ -12350,6 +26083,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 2, 7, 76, },
 	{ 7, 0, 0, 2, 7, 60, },
 	{ 8, 0, 0, 2, 7, 76, },
+	{ 9, 0, 0, 2, 7, 60, },
+	{ 0, 0, 0, 2, 8, 68, },
+	{ 2, 0, 0, 2, 8, 60, },
 	{ 1, 0, 0, 2, 8, 76, },
 	{ 3, 0, 0, 2, 8, 68, },
 	{ 4, 0, 0, 2, 8, 76, },
@@ -12357,6 +26093,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 2, 8, 68, },
 	{ 7, 0, 0, 2, 8, 60, },
 	{ 8, 0, 0, 2, 8, 68, },
+	{ 9, 0, 0, 2, 8, 60, },
+	{ 0, 0, 0, 2, 9, 64, },
+	{ 2, 0, 0, 2, 9, 60, },
 	{ 1, 0, 0, 2, 9, 76, },
 	{ 3, 0, 0, 2, 9, 64, },
 	{ 4, 0, 0, 2, 9, 76, },
@@ -12364,6 +26103,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 2, 9, 64, },
 	{ 7, 0, 0, 2, 9, 60, },
 	{ 8, 0, 0, 2, 9, 64, },
+	{ 9, 0, 0, 2, 9, 60, },
+	{ 0, 0, 0, 2, 10, 60, },
+	{ 2, 0, 0, 2, 10, 60, },
 	{ 1, 0, 0, 2, 10, 76, },
 	{ 3, 0, 0, 2, 10, 60, },
 	{ 4, 0, 0, 2, 10, 76, },
@@ -12371,6 +26113,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 2, 10, 60, },
 	{ 7, 0, 0, 2, 10, 60, },
 	{ 8, 0, 0, 2, 10, 60, },
+	{ 9, 0, 0, 2, 10, 60, },
+	{ 0, 0, 0, 2, 11, 52, },
+	{ 2, 0, 0, 2, 11, 60, },
 	{ 1, 0, 0, 2, 11, 76, },
 	{ 3, 0, 0, 2, 11, 52, },
 	{ 4, 0, 0, 2, 11, 76, },
@@ -12378,6 +26123,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 2, 11, 52, },
 	{ 7, 0, 0, 2, 11, 60, },
 	{ 8, 0, 0, 2, 11, 52, },
+	{ 9, 0, 0, 2, 11, 60, },
+	{ 0, 0, 0, 2, 12, 40, },
+	{ 2, 0, 0, 2, 12, 60, },
 	{ 1, 0, 0, 2, 12, 76, },
 	{ 3, 0, 0, 2, 12, 40, },
 	{ 4, 0, 0, 2, 12, 76, },
@@ -12385,6 +26133,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 2, 12, 40, },
 	{ 7, 0, 0, 2, 12, 60, },
 	{ 8, 0, 0, 2, 12, 40, },
+	{ 9, 0, 0, 2, 12, 60, },
+	{ 0, 0, 0, 2, 13, 28, },
+	{ 2, 0, 0, 2, 13, 60, },
 	{ 1, 0, 0, 2, 13, 76, },
 	{ 3, 0, 0, 2, 13, 28, },
 	{ 4, 0, 0, 2, 13, 72, },
@@ -12392,6 +26143,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 2, 13, 28, },
 	{ 7, 0, 0, 2, 13, 60, },
 	{ 8, 0, 0, 2, 13, 28, },
+	{ 9, 0, 0, 2, 13, 60, },
+	{ 0, 0, 0, 2, 14, 127, },
+	{ 2, 0, 0, 2, 14, 127, },
 	{ 1, 0, 0, 2, 14, 127, },
 	{ 3, 0, 0, 2, 14, 127, },
 	{ 4, 0, 0, 2, 14, 127, },
@@ -12399,6 +26153,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 2, 14, 127, },
 	{ 7, 0, 0, 2, 14, 127, },
 	{ 8, 0, 0, 2, 14, 127, },
+	{ 9, 0, 0, 2, 14, 127, },
+	{ 0, 0, 0, 3, 1, 52, },
+	{ 2, 0, 0, 3, 1, 36, },
 	{ 1, 0, 0, 3, 1, 66, },
 	{ 3, 0, 0, 3, 1, 52, },
 	{ 4, 0, 0, 3, 1, 68, },
@@ -12406,6 +26163,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 3, 1, 52, },
 	{ 7, 0, 0, 3, 1, 36, },
 	{ 8, 0, 0, 3, 1, 52, },
+	{ 9, 0, 0, 3, 1, 36, },
+	{ 0, 0, 0, 3, 2, 60, },
+	{ 2, 0, 0, 3, 2, 36, },
 	{ 1, 0, 0, 3, 2, 66, },
 	{ 3, 0, 0, 3, 2, 60, },
 	{ 4, 0, 0, 3, 2, 70, },
@@ -12413,6 +26173,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 3, 2, 60, },
 	{ 7, 0, 0, 3, 2, 36, },
 	{ 8, 0, 0, 3, 2, 60, },
+	{ 9, 0, 0, 3, 2, 36, },
+	{ 0, 0, 0, 3, 3, 64, },
+	{ 2, 0, 0, 3, 3, 36, },
 	{ 1, 0, 0, 3, 3, 66, },
 	{ 3, 0, 0, 3, 3, 64, },
 	{ 4, 0, 0, 3, 3, 70, },
@@ -12420,6 +26183,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 3, 3, 64, },
 	{ 7, 0, 0, 3, 3, 36, },
 	{ 8, 0, 0, 3, 3, 64, },
+	{ 9, 0, 0, 3, 3, 36, },
+	{ 0, 0, 0, 3, 4, 68, },
+	{ 2, 0, 0, 3, 4, 36, },
 	{ 1, 0, 0, 3, 4, 66, },
 	{ 3, 0, 0, 3, 4, 68, },
 	{ 4, 0, 0, 3, 4, 70, },
@@ -12427,6 +26193,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 3, 4, 68, },
 	{ 7, 0, 0, 3, 4, 36, },
 	{ 8, 0, 0, 3, 4, 68, },
+	{ 9, 0, 0, 3, 4, 36, },
+	{ 0, 0, 0, 3, 5, 76, },
+	{ 2, 0, 0, 3, 5, 36, },
 	{ 1, 0, 0, 3, 5, 66, },
 	{ 3, 0, 0, 3, 5, 76, },
 	{ 4, 0, 0, 3, 5, 70, },
@@ -12434,6 +26203,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 3, 5, 76, },
 	{ 7, 0, 0, 3, 5, 36, },
 	{ 8, 0, 0, 3, 5, 76, },
+	{ 9, 0, 0, 3, 5, 36, },
+	{ 0, 0, 0, 3, 6, 76, },
+	{ 2, 0, 0, 3, 6, 36, },
 	{ 1, 0, 0, 3, 6, 66, },
 	{ 3, 0, 0, 3, 6, 76, },
 	{ 4, 0, 0, 3, 6, 70, },
@@ -12441,6 +26213,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 3, 6, 76, },
 	{ 7, 0, 0, 3, 6, 36, },
 	{ 8, 0, 0, 3, 6, 76, },
+	{ 9, 0, 0, 3, 6, 36, },
+	{ 0, 0, 0, 3, 7, 76, },
+	{ 2, 0, 0, 3, 7, 36, },
 	{ 1, 0, 0, 3, 7, 66, },
 	{ 3, 0, 0, 3, 7, 76, },
 	{ 4, 0, 0, 3, 7, 70, },
@@ -12448,6 +26223,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 3, 7, 76, },
 	{ 7, 0, 0, 3, 7, 36, },
 	{ 8, 0, 0, 3, 7, 76, },
+	{ 9, 0, 0, 3, 7, 36, },
+	{ 0, 0, 0, 3, 8, 68, },
+	{ 2, 0, 0, 3, 8, 36, },
 	{ 1, 0, 0, 3, 8, 66, },
 	{ 3, 0, 0, 3, 8, 68, },
 	{ 4, 0, 0, 3, 8, 70, },
@@ -12455,6 +26233,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 3, 8, 68, },
 	{ 7, 0, 0, 3, 8, 36, },
 	{ 8, 0, 0, 3, 8, 68, },
+	{ 9, 0, 0, 3, 8, 36, },
+	{ 0, 0, 0, 3, 9, 64, },
+	{ 2, 0, 0, 3, 9, 36, },
 	{ 1, 0, 0, 3, 9, 66, },
 	{ 3, 0, 0, 3, 9, 64, },
 	{ 4, 0, 0, 3, 9, 70, },
@@ -12462,6 +26243,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 3, 9, 64, },
 	{ 7, 0, 0, 3, 9, 36, },
 	{ 8, 0, 0, 3, 9, 64, },
+	{ 9, 0, 0, 3, 9, 36, },
+	{ 0, 0, 0, 3, 10, 60, },
+	{ 2, 0, 0, 3, 10, 36, },
 	{ 1, 0, 0, 3, 10, 66, },
 	{ 3, 0, 0, 3, 10, 60, },
 	{ 4, 0, 0, 3, 10, 70, },
@@ -12469,6 +26253,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 3, 10, 60, },
 	{ 7, 0, 0, 3, 10, 36, },
 	{ 8, 0, 0, 3, 10, 60, },
+	{ 9, 0, 0, 3, 10, 36, },
+	{ 0, 0, 0, 3, 11, 52, },
+	{ 2, 0, 0, 3, 11, 36, },
 	{ 1, 0, 0, 3, 11, 66, },
 	{ 3, 0, 0, 3, 11, 52, },
 	{ 4, 0, 0, 3, 11, 70, },
@@ -12476,6 +26263,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 3, 11, 52, },
 	{ 7, 0, 0, 3, 11, 36, },
 	{ 8, 0, 0, 3, 11, 52, },
+	{ 9, 0, 0, 3, 11, 36, },
+	{ 0, 0, 0, 3, 12, 40, },
+	{ 2, 0, 0, 3, 12, 36, },
 	{ 1, 0, 0, 3, 12, 66, },
 	{ 3, 0, 0, 3, 12, 40, },
 	{ 4, 0, 0, 3, 12, 70, },
@@ -12483,6 +26273,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 3, 12, 40, },
 	{ 7, 0, 0, 3, 12, 36, },
 	{ 8, 0, 0, 3, 12, 40, },
+	{ 9, 0, 0, 3, 12, 36, },
+	{ 0, 0, 0, 3, 13, 28, },
+	{ 2, 0, 0, 3, 13, 36, },
 	{ 1, 0, 0, 3, 13, 66, },
 	{ 3, 0, 0, 3, 13, 28, },
 	{ 4, 0, 0, 3, 13, 62, },
@@ -12490,6 +26283,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 3, 13, 28, },
 	{ 7, 0, 0, 3, 13, 36, },
 	{ 8, 0, 0, 3, 13, 28, },
+	{ 9, 0, 0, 3, 13, 36, },
+	{ 0, 0, 0, 3, 14, 127, },
+	{ 2, 0, 0, 3, 14, 127, },
 	{ 1, 0, 0, 3, 14, 127, },
 	{ 3, 0, 0, 3, 14, 127, },
 	{ 4, 0, 0, 3, 14, 127, },
@@ -12497,6 +26293,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 0, 3, 14, 127, },
 	{ 7, 0, 0, 3, 14, 127, },
 	{ 8, 0, 0, 3, 14, 127, },
+	{ 9, 0, 0, 3, 14, 127, },
+	{ 0, 0, 1, 2, 1, 127, },
+	{ 2, 0, 1, 2, 1, 127, },
 	{ 1, 0, 1, 2, 1, 127, },
 	{ 3, 0, 1, 2, 1, 127, },
 	{ 4, 0, 1, 2, 1, 127, },
@@ -12504,6 +26303,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 1, 2, 1, 127, },
 	{ 7, 0, 1, 2, 1, 127, },
 	{ 8, 0, 1, 2, 1, 127, },
+	{ 9, 0, 1, 2, 1, 127, },
+	{ 0, 0, 1, 2, 2, 127, },
+	{ 2, 0, 1, 2, 2, 127, },
 	{ 1, 0, 1, 2, 2, 127, },
 	{ 3, 0, 1, 2, 2, 127, },
 	{ 4, 0, 1, 2, 2, 127, },
@@ -12511,6 +26313,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 1, 2, 2, 127, },
 	{ 7, 0, 1, 2, 2, 127, },
 	{ 8, 0, 1, 2, 2, 127, },
+	{ 9, 0, 1, 2, 2, 127, },
+	{ 0, 0, 1, 2, 3, 52, },
+	{ 2, 0, 1, 2, 3, 60, },
 	{ 1, 0, 1, 2, 3, 72, },
 	{ 3, 0, 1, 2, 3, 52, },
 	{ 4, 0, 1, 2, 3, 72, },
@@ -12518,6 +26323,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 1, 2, 3, 52, },
 	{ 7, 0, 1, 2, 3, 60, },
 	{ 8, 0, 1, 2, 3, 52, },
+	{ 9, 0, 1, 2, 3, 60, },
+	{ 0, 0, 1, 2, 4, 52, },
+	{ 2, 0, 1, 2, 4, 60, },
 	{ 1, 0, 1, 2, 4, 72, },
 	{ 3, 0, 1, 2, 4, 52, },
 	{ 4, 0, 1, 2, 4, 72, },
@@ -12525,6 +26333,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 1, 2, 4, 52, },
 	{ 7, 0, 1, 2, 4, 60, },
 	{ 8, 0, 1, 2, 4, 52, },
+	{ 9, 0, 1, 2, 4, 60, },
+	{ 0, 0, 1, 2, 5, 60, },
+	{ 2, 0, 1, 2, 5, 60, },
 	{ 1, 0, 1, 2, 5, 72, },
 	{ 3, 0, 1, 2, 5, 60, },
 	{ 4, 0, 1, 2, 5, 72, },
@@ -12532,6 +26343,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 1, 2, 5, 60, },
 	{ 7, 0, 1, 2, 5, 60, },
 	{ 8, 0, 1, 2, 5, 60, },
+	{ 9, 0, 1, 2, 5, 60, },
+	{ 0, 0, 1, 2, 6, 64, },
+	{ 2, 0, 1, 2, 6, 60, },
 	{ 1, 0, 1, 2, 6, 72, },
 	{ 3, 0, 1, 2, 6, 64, },
 	{ 4, 0, 1, 2, 6, 72, },
@@ -12539,6 +26353,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 1, 2, 6, 64, },
 	{ 7, 0, 1, 2, 6, 60, },
 	{ 8, 0, 1, 2, 6, 64, },
+	{ 9, 0, 1, 2, 6, 60, },
+	{ 0, 0, 1, 2, 7, 60, },
+	{ 2, 0, 1, 2, 7, 60, },
 	{ 1, 0, 1, 2, 7, 72, },
 	{ 3, 0, 1, 2, 7, 60, },
 	{ 4, 0, 1, 2, 7, 72, },
@@ -12546,6 +26363,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 1, 2, 7, 60, },
 	{ 7, 0, 1, 2, 7, 60, },
 	{ 8, 0, 1, 2, 7, 60, },
+	{ 9, 0, 1, 2, 7, 60, },
+	{ 0, 0, 1, 2, 8, 52, },
+	{ 2, 0, 1, 2, 8, 60, },
 	{ 1, 0, 1, 2, 8, 72, },
 	{ 3, 0, 1, 2, 8, 52, },
 	{ 4, 0, 1, 2, 8, 72, },
@@ -12553,6 +26373,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 1, 2, 8, 52, },
 	{ 7, 0, 1, 2, 8, 60, },
 	{ 8, 0, 1, 2, 8, 52, },
+	{ 9, 0, 1, 2, 8, 60, },
+	{ 0, 0, 1, 2, 9, 52, },
+	{ 2, 0, 1, 2, 9, 60, },
 	{ 1, 0, 1, 2, 9, 72, },
 	{ 3, 0, 1, 2, 9, 52, },
 	{ 4, 0, 1, 2, 9, 72, },
@@ -12560,6 +26383,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 1, 2, 9, 52, },
 	{ 7, 0, 1, 2, 9, 60, },
 	{ 8, 0, 1, 2, 9, 52, },
+	{ 9, 0, 1, 2, 9, 60, },
+	{ 0, 0, 1, 2, 10, 40, },
+	{ 2, 0, 1, 2, 10, 60, },
 	{ 1, 0, 1, 2, 10, 72, },
 	{ 3, 0, 1, 2, 10, 40, },
 	{ 4, 0, 1, 2, 10, 72, },
@@ -12567,6 +26393,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 1, 2, 10, 40, },
 	{ 7, 0, 1, 2, 10, 60, },
 	{ 8, 0, 1, 2, 10, 40, },
+	{ 9, 0, 1, 2, 10, 60, },
+	{ 0, 0, 1, 2, 11, 28, },
+	{ 2, 0, 1, 2, 11, 60, },
 	{ 1, 0, 1, 2, 11, 72, },
 	{ 3, 0, 1, 2, 11, 28, },
 	{ 4, 0, 1, 2, 11, 70, },
@@ -12574,6 +26403,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 1, 2, 11, 28, },
 	{ 7, 0, 1, 2, 11, 60, },
 	{ 8, 0, 1, 2, 11, 28, },
+	{ 9, 0, 1, 2, 11, 60, },
+	{ 0, 0, 1, 2, 12, 127, },
+	{ 2, 0, 1, 2, 12, 127, },
 	{ 1, 0, 1, 2, 12, 127, },
 	{ 3, 0, 1, 2, 12, 127, },
 	{ 4, 0, 1, 2, 12, 127, },
@@ -12581,6 +26413,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 1, 2, 12, 127, },
 	{ 7, 0, 1, 2, 12, 127, },
 	{ 8, 0, 1, 2, 12, 127, },
+	{ 9, 0, 1, 2, 12, 127, },
+	{ 0, 0, 1, 2, 13, 127, },
+	{ 2, 0, 1, 2, 13, 127, },
 	{ 1, 0, 1, 2, 13, 127, },
 	{ 3, 0, 1, 2, 13, 127, },
 	{ 4, 0, 1, 2, 13, 127, },
@@ -12588,6 +26423,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 1, 2, 13, 127, },
 	{ 7, 0, 1, 2, 13, 127, },
 	{ 8, 0, 1, 2, 13, 127, },
+	{ 9, 0, 1, 2, 13, 127, },
+	{ 0, 0, 1, 2, 14, 127, },
+	{ 2, 0, 1, 2, 14, 127, },
 	{ 1, 0, 1, 2, 14, 127, },
 	{ 3, 0, 1, 2, 14, 127, },
 	{ 4, 0, 1, 2, 14, 127, },
@@ -12595,6 +26433,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 1, 2, 14, 127, },
 	{ 7, 0, 1, 2, 14, 127, },
 	{ 8, 0, 1, 2, 14, 127, },
+	{ 9, 0, 1, 2, 14, 127, },
+	{ 0, 0, 1, 3, 1, 127, },
+	{ 2, 0, 1, 3, 1, 127, },
 	{ 1, 0, 1, 3, 1, 127, },
 	{ 3, 0, 1, 3, 1, 127, },
 	{ 4, 0, 1, 3, 1, 127, },
@@ -12602,6 +26443,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 1, 3, 1, 127, },
 	{ 7, 0, 1, 3, 1, 127, },
 	{ 8, 0, 1, 3, 1, 127, },
+	{ 9, 0, 1, 3, 1, 127, },
+	{ 0, 0, 1, 3, 2, 127, },
+	{ 2, 0, 1, 3, 2, 127, },
 	{ 1, 0, 1, 3, 2, 127, },
 	{ 3, 0, 1, 3, 2, 127, },
 	{ 4, 0, 1, 3, 2, 127, },
@@ -12609,6 +26453,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 1, 3, 2, 127, },
 	{ 7, 0, 1, 3, 2, 127, },
 	{ 8, 0, 1, 3, 2, 127, },
+	{ 9, 0, 1, 3, 2, 127, },
+	{ 0, 0, 1, 3, 3, 48, },
+	{ 2, 0, 1, 3, 3, 36, },
 	{ 1, 0, 1, 3, 3, 66, },
 	{ 3, 0, 1, 3, 3, 48, },
 	{ 4, 0, 1, 3, 3, 66, },
@@ -12616,6 +26463,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 1, 3, 3, 48, },
 	{ 7, 0, 1, 3, 3, 36, },
 	{ 8, 0, 1, 3, 3, 48, },
+	{ 9, 0, 1, 3, 3, 36, },
+	{ 0, 0, 1, 3, 4, 48, },
+	{ 2, 0, 1, 3, 4, 36, },
 	{ 1, 0, 1, 3, 4, 66, },
 	{ 3, 0, 1, 3, 4, 48, },
 	{ 4, 0, 1, 3, 4, 70, },
@@ -12623,6 +26473,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 1, 3, 4, 48, },
 	{ 7, 0, 1, 3, 4, 36, },
 	{ 8, 0, 1, 3, 4, 48, },
+	{ 9, 0, 1, 3, 4, 36, },
+	{ 0, 0, 1, 3, 5, 60, },
+	{ 2, 0, 1, 3, 5, 36, },
 	{ 1, 0, 1, 3, 5, 66, },
 	{ 3, 0, 1, 3, 5, 60, },
 	{ 4, 0, 1, 3, 5, 70, },
@@ -12630,6 +26483,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 1, 3, 5, 60, },
 	{ 7, 0, 1, 3, 5, 36, },
 	{ 8, 0, 1, 3, 5, 60, },
+	{ 9, 0, 1, 3, 5, 36, },
+	{ 0, 0, 1, 3, 6, 64, },
+	{ 2, 0, 1, 3, 6, 36, },
 	{ 1, 0, 1, 3, 6, 66, },
 	{ 3, 0, 1, 3, 6, 64, },
 	{ 4, 0, 1, 3, 6, 70, },
@@ -12637,6 +26493,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 1, 3, 6, 64, },
 	{ 7, 0, 1, 3, 6, 36, },
 	{ 8, 0, 1, 3, 6, 64, },
+	{ 9, 0, 1, 3, 6, 36, },
+	{ 0, 0, 1, 3, 7, 60, },
+	{ 2, 0, 1, 3, 7, 36, },
 	{ 1, 0, 1, 3, 7, 66, },
 	{ 3, 0, 1, 3, 7, 60, },
 	{ 4, 0, 1, 3, 7, 70, },
@@ -12644,6 +26503,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 1, 3, 7, 60, },
 	{ 7, 0, 1, 3, 7, 36, },
 	{ 8, 0, 1, 3, 7, 60, },
+	{ 9, 0, 1, 3, 7, 36, },
+	{ 0, 0, 1, 3, 8, 52, },
+	{ 2, 0, 1, 3, 8, 36, },
 	{ 1, 0, 1, 3, 8, 66, },
 	{ 3, 0, 1, 3, 8, 52, },
 	{ 4, 0, 1, 3, 8, 70, },
@@ -12651,6 +26513,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 1, 3, 8, 52, },
 	{ 7, 0, 1, 3, 8, 36, },
 	{ 8, 0, 1, 3, 8, 52, },
+	{ 9, 0, 1, 3, 8, 36, },
+	{ 0, 0, 1, 3, 9, 52, },
+	{ 2, 0, 1, 3, 9, 36, },
 	{ 1, 0, 1, 3, 9, 66, },
 	{ 3, 0, 1, 3, 9, 52, },
 	{ 4, 0, 1, 3, 9, 70, },
@@ -12658,6 +26523,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 1, 3, 9, 52, },
 	{ 7, 0, 1, 3, 9, 36, },
 	{ 8, 0, 1, 3, 9, 52, },
+	{ 9, 0, 1, 3, 9, 36, },
+	{ 0, 0, 1, 3, 10, 40, },
+	{ 2, 0, 1, 3, 10, 36, },
 	{ 1, 0, 1, 3, 10, 66, },
 	{ 3, 0, 1, 3, 10, 40, },
 	{ 4, 0, 1, 3, 10, 70, },
@@ -12665,6 +26533,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 1, 3, 10, 40, },
 	{ 7, 0, 1, 3, 10, 36, },
 	{ 8, 0, 1, 3, 10, 40, },
+	{ 9, 0, 1, 3, 10, 36, },
+	{ 0, 0, 1, 3, 11, 26, },
+	{ 2, 0, 1, 3, 11, 36, },
 	{ 1, 0, 1, 3, 11, 66, },
 	{ 3, 0, 1, 3, 11, 26, },
 	{ 4, 0, 1, 3, 11, 66, },
@@ -12672,6 +26543,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 1, 3, 11, 26, },
 	{ 7, 0, 1, 3, 11, 36, },
 	{ 8, 0, 1, 3, 11, 26, },
+	{ 9, 0, 1, 3, 11, 36, },
+	{ 0, 0, 1, 3, 12, 127, },
+	{ 2, 0, 1, 3, 12, 127, },
 	{ 1, 0, 1, 3, 12, 127, },
 	{ 3, 0, 1, 3, 12, 127, },
 	{ 4, 0, 1, 3, 12, 127, },
@@ -12679,6 +26553,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 1, 3, 12, 127, },
 	{ 7, 0, 1, 3, 12, 127, },
 	{ 8, 0, 1, 3, 12, 127, },
+	{ 9, 0, 1, 3, 12, 127, },
+	{ 0, 0, 1, 3, 13, 127, },
+	{ 2, 0, 1, 3, 13, 127, },
 	{ 1, 0, 1, 3, 13, 127, },
 	{ 3, 0, 1, 3, 13, 127, },
 	{ 4, 0, 1, 3, 13, 127, },
@@ -12686,6 +26563,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 1, 3, 13, 127, },
 	{ 7, 0, 1, 3, 13, 127, },
 	{ 8, 0, 1, 3, 13, 127, },
+	{ 9, 0, 1, 3, 13, 127, },
+	{ 0, 0, 1, 3, 14, 127, },
+	{ 2, 0, 1, 3, 14, 127, },
 	{ 1, 0, 1, 3, 14, 127, },
 	{ 3, 0, 1, 3, 14, 127, },
 	{ 4, 0, 1, 3, 14, 127, },
@@ -12693,6 +26573,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 0, 1, 3, 14, 127, },
 	{ 7, 0, 1, 3, 14, 127, },
 	{ 8, 0, 1, 3, 14, 127, },
+	{ 9, 0, 1, 3, 14, 127, },
+	{ 0, 1, 0, 1, 36, 74, },
+	{ 2, 1, 0, 1, 36, 62, },
 	{ 1, 1, 0, 1, 36, 60, },
 	{ 3, 1, 0, 1, 36, 62, },
 	{ 4, 1, 0, 1, 36, 76, },
@@ -12700,6 +26583,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 1, 36, 64, },
 	{ 7, 1, 0, 1, 36, 54, },
 	{ 8, 1, 0, 1, 36, 62, },
+	{ 9, 1, 0, 1, 36, 62, },
+	{ 0, 1, 0, 1, 40, 76, },
+	{ 2, 1, 0, 1, 40, 62, },
 	{ 1, 1, 0, 1, 40, 62, },
 	{ 3, 1, 0, 1, 40, 62, },
 	{ 4, 1, 0, 1, 40, 76, },
@@ -12707,6 +26593,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 1, 40, 64, },
 	{ 7, 1, 0, 1, 40, 54, },
 	{ 8, 1, 0, 1, 40, 62, },
+	{ 9, 1, 0, 1, 40, 62, },
+	{ 0, 1, 0, 1, 44, 76, },
+	{ 2, 1, 0, 1, 44, 62, },
 	{ 1, 1, 0, 1, 44, 62, },
 	{ 3, 1, 0, 1, 44, 62, },
 	{ 4, 1, 0, 1, 44, 76, },
@@ -12714,13 +26603,19 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 1, 44, 64, },
 	{ 7, 1, 0, 1, 44, 54, },
 	{ 8, 1, 0, 1, 44, 62, },
+	{ 9, 1, 0, 1, 44, 62, },
+	{ 0, 1, 0, 1, 48, 76, },
+	{ 2, 1, 0, 1, 48, 62, },
 	{ 1, 1, 0, 1, 48, 62, },
 	{ 3, 1, 0, 1, 48, 62, },
-	{ 4, 1, 0, 1, 48, 76, },
+	{ 4, 1, 0, 1, 48, 54, },
 	{ 5, 1, 0, 1, 48, 62, },
 	{ 6, 1, 0, 1, 48, 64, },
 	{ 7, 1, 0, 1, 48, 54, },
 	{ 8, 1, 0, 1, 48, 62, },
+	{ 9, 1, 0, 1, 48, 62, },
+	{ 0, 1, 0, 1, 52, 76, },
+	{ 2, 1, 0, 1, 52, 62, },
 	{ 1, 1, 0, 1, 52, 62, },
 	{ 3, 1, 0, 1, 52, 64, },
 	{ 4, 1, 0, 1, 52, 76, },
@@ -12728,6 +26623,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 1, 52, 76, },
 	{ 7, 1, 0, 1, 52, 54, },
 	{ 8, 1, 0, 1, 52, 76, },
+	{ 9, 1, 0, 1, 52, 62, },
+	{ 0, 1, 0, 1, 56, 76, },
+	{ 2, 1, 0, 1, 56, 62, },
 	{ 1, 1, 0, 1, 56, 62, },
 	{ 3, 1, 0, 1, 56, 64, },
 	{ 4, 1, 0, 1, 56, 76, },
@@ -12735,6 +26633,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 1, 56, 76, },
 	{ 7, 1, 0, 1, 56, 54, },
 	{ 8, 1, 0, 1, 56, 76, },
+	{ 9, 1, 0, 1, 56, 62, },
+	{ 0, 1, 0, 1, 60, 76, },
+	{ 2, 1, 0, 1, 60, 62, },
 	{ 1, 1, 0, 1, 60, 62, },
 	{ 3, 1, 0, 1, 60, 64, },
 	{ 4, 1, 0, 1, 60, 76, },
@@ -12742,6 +26643,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 1, 60, 76, },
 	{ 7, 1, 0, 1, 60, 54, },
 	{ 8, 1, 0, 1, 60, 76, },
+	{ 9, 1, 0, 1, 60, 62, },
+	{ 0, 1, 0, 1, 64, 74, },
+	{ 2, 1, 0, 1, 64, 62, },
 	{ 1, 1, 0, 1, 64, 60, },
 	{ 3, 1, 0, 1, 64, 64, },
 	{ 4, 1, 0, 1, 64, 76, },
@@ -12749,6 +26653,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 1, 64, 74, },
 	{ 7, 1, 0, 1, 64, 54, },
 	{ 8, 1, 0, 1, 64, 74, },
+	{ 9, 1, 0, 1, 64, 62, },
+	{ 0, 1, 0, 1, 100, 72, },
+	{ 2, 1, 0, 1, 100, 62, },
 	{ 1, 1, 0, 1, 100, 76, },
 	{ 3, 1, 0, 1, 100, 72, },
 	{ 4, 1, 0, 1, 100, 76, },
@@ -12756,6 +26663,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 1, 100, 72, },
 	{ 7, 1, 0, 1, 100, 54, },
 	{ 8, 1, 0, 1, 100, 72, },
+	{ 9, 1, 0, 1, 100, 127, },
+	{ 0, 1, 0, 1, 104, 76, },
+	{ 2, 1, 0, 1, 104, 62, },
 	{ 1, 1, 0, 1, 104, 76, },
 	{ 3, 1, 0, 1, 104, 76, },
 	{ 4, 1, 0, 1, 104, 76, },
@@ -12763,6 +26673,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 1, 104, 76, },
 	{ 7, 1, 0, 1, 104, 54, },
 	{ 8, 1, 0, 1, 104, 76, },
+	{ 9, 1, 0, 1, 104, 127, },
+	{ 0, 1, 0, 1, 108, 76, },
+	{ 2, 1, 0, 1, 108, 62, },
 	{ 1, 1, 0, 1, 108, 76, },
 	{ 3, 1, 0, 1, 108, 76, },
 	{ 4, 1, 0, 1, 108, 76, },
@@ -12770,6 +26683,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 1, 108, 76, },
 	{ 7, 1, 0, 1, 108, 54, },
 	{ 8, 1, 0, 1, 108, 76, },
+	{ 9, 1, 0, 1, 108, 127, },
+	{ 0, 1, 0, 1, 112, 76, },
+	{ 2, 1, 0, 1, 112, 62, },
 	{ 1, 1, 0, 1, 112, 76, },
 	{ 3, 1, 0, 1, 112, 76, },
 	{ 4, 1, 0, 1, 112, 76, },
@@ -12777,6 +26693,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 1, 112, 76, },
 	{ 7, 1, 0, 1, 112, 54, },
 	{ 8, 1, 0, 1, 112, 76, },
+	{ 9, 1, 0, 1, 112, 127, },
+	{ 0, 1, 0, 1, 116, 76, },
+	{ 2, 1, 0, 1, 116, 62, },
 	{ 1, 1, 0, 1, 116, 76, },
 	{ 3, 1, 0, 1, 116, 76, },
 	{ 4, 1, 0, 1, 116, 76, },
@@ -12784,6 +26703,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 1, 116, 76, },
 	{ 7, 1, 0, 1, 116, 54, },
 	{ 8, 1, 0, 1, 116, 76, },
+	{ 9, 1, 0, 1, 116, 127, },
+	{ 0, 1, 0, 1, 120, 76, },
+	{ 2, 1, 0, 1, 120, 62, },
 	{ 1, 1, 0, 1, 120, 76, },
 	{ 3, 1, 0, 1, 120, 127, },
 	{ 4, 1, 0, 1, 120, 76, },
@@ -12791,6 +26713,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 1, 120, 76, },
 	{ 7, 1, 0, 1, 120, 54, },
 	{ 8, 1, 0, 1, 120, 76, },
+	{ 9, 1, 0, 1, 120, 127, },
+	{ 0, 1, 0, 1, 124, 76, },
+	{ 2, 1, 0, 1, 124, 62, },
 	{ 1, 1, 0, 1, 124, 76, },
 	{ 3, 1, 0, 1, 124, 127, },
 	{ 4, 1, 0, 1, 124, 76, },
@@ -12798,6 +26723,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 1, 124, 76, },
 	{ 7, 1, 0, 1, 124, 54, },
 	{ 8, 1, 0, 1, 124, 76, },
+	{ 9, 1, 0, 1, 124, 127, },
+	{ 0, 1, 0, 1, 128, 76, },
+	{ 2, 1, 0, 1, 128, 62, },
 	{ 1, 1, 0, 1, 128, 76, },
 	{ 3, 1, 0, 1, 128, 127, },
 	{ 4, 1, 0, 1, 128, 76, },
@@ -12805,6 +26733,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 1, 128, 76, },
 	{ 7, 1, 0, 1, 128, 54, },
 	{ 8, 1, 0, 1, 128, 76, },
+	{ 9, 1, 0, 1, 128, 127, },
+	{ 0, 1, 0, 1, 132, 76, },
+	{ 2, 1, 0, 1, 132, 62, },
 	{ 1, 1, 0, 1, 132, 76, },
 	{ 3, 1, 0, 1, 132, 76, },
 	{ 4, 1, 0, 1, 132, 76, },
@@ -12812,20 +26743,29 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 1, 132, 76, },
 	{ 7, 1, 0, 1, 132, 54, },
 	{ 8, 1, 0, 1, 132, 76, },
+	{ 9, 1, 0, 1, 132, 127, },
+	{ 0, 1, 0, 1, 136, 76, },
+	{ 2, 1, 0, 1, 136, 62, },
 	{ 1, 1, 0, 1, 136, 76, },
 	{ 3, 1, 0, 1, 136, 76, },
 	{ 4, 1, 0, 1, 136, 76, },
 	{ 5, 1, 0, 1, 136, 62, },
 	{ 6, 1, 0, 1, 136, 76, },
-	{ 7, 1, 0, 1, 136, 127, },
+	{ 7, 1, 0, 1, 136, 54, },
 	{ 8, 1, 0, 1, 136, 76, },
+	{ 9, 1, 0, 1, 136, 127, },
+	{ 0, 1, 0, 1, 140, 72, },
+	{ 2, 1, 0, 1, 140, 62, },
 	{ 1, 1, 0, 1, 140, 76, },
 	{ 3, 1, 0, 1, 140, 72, },
 	{ 4, 1, 0, 1, 140, 76, },
 	{ 5, 1, 0, 1, 140, 62, },
 	{ 6, 1, 0, 1, 140, 72, },
-	{ 7, 1, 0, 1, 140, 127, },
+	{ 7, 1, 0, 1, 140, 54, },
 	{ 8, 1, 0, 1, 140, 72, },
+	{ 9, 1, 0, 1, 140, 127, },
+	{ 0, 1, 0, 1, 144, 76, },
+	{ 2, 1, 0, 1, 144, 127, },
 	{ 1, 1, 0, 1, 144, 127, },
 	{ 3, 1, 0, 1, 144, 76, },
 	{ 4, 1, 0, 1, 144, 76, },
@@ -12833,6 +26773,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 1, 144, 76, },
 	{ 7, 1, 0, 1, 144, 127, },
 	{ 8, 1, 0, 1, 144, 76, },
+	{ 9, 1, 0, 1, 144, 127, },
+	{ 0, 1, 0, 1, 149, 76, },
+	{ 2, 1, 0, 1, 149, -128, },
 	{ 1, 1, 0, 1, 149, 127, },
 	{ 3, 1, 0, 1, 149, 76, },
 	{ 4, 1, 0, 1, 149, 74, },
@@ -12840,6 +26783,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 1, 149, 76, },
 	{ 7, 1, 0, 1, 149, 54, },
 	{ 8, 1, 0, 1, 149, 76, },
+	{ 9, 1, 0, 1, 149, -128, },
+	{ 0, 1, 0, 1, 153, 76, },
+	{ 2, 1, 0, 1, 153, -128, },
 	{ 1, 1, 0, 1, 153, 127, },
 	{ 3, 1, 0, 1, 153, 76, },
 	{ 4, 1, 0, 1, 153, 74, },
@@ -12847,6 +26793,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 1, 153, 76, },
 	{ 7, 1, 0, 1, 153, 54, },
 	{ 8, 1, 0, 1, 153, 76, },
+	{ 9, 1, 0, 1, 153, -128, },
+	{ 0, 1, 0, 1, 157, 76, },
+	{ 2, 1, 0, 1, 157, -128, },
 	{ 1, 1, 0, 1, 157, 127, },
 	{ 3, 1, 0, 1, 157, 76, },
 	{ 4, 1, 0, 1, 157, 74, },
@@ -12854,6 +26803,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 1, 157, 76, },
 	{ 7, 1, 0, 1, 157, 54, },
 	{ 8, 1, 0, 1, 157, 76, },
+	{ 9, 1, 0, 1, 157, -128, },
+	{ 0, 1, 0, 1, 161, 76, },
+	{ 2, 1, 0, 1, 161, -128, },
 	{ 1, 1, 0, 1, 161, 127, },
 	{ 3, 1, 0, 1, 161, 76, },
 	{ 4, 1, 0, 1, 161, 74, },
@@ -12861,6 +26813,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 1, 161, 76, },
 	{ 7, 1, 0, 1, 161, 54, },
 	{ 8, 1, 0, 1, 161, 76, },
+	{ 9, 1, 0, 1, 161, -128, },
+	{ 0, 1, 0, 1, 165, 76, },
+	{ 2, 1, 0, 1, 165, -128, },
 	{ 1, 1, 0, 1, 165, 127, },
 	{ 3, 1, 0, 1, 165, 76, },
 	{ 4, 1, 0, 1, 165, 74, },
@@ -12868,6 +26823,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 1, 165, 76, },
 	{ 7, 1, 0, 1, 165, 54, },
 	{ 8, 1, 0, 1, 165, 76, },
+	{ 9, 1, 0, 1, 165, -128, },
+	{ 0, 1, 0, 2, 36, 72, },
+	{ 2, 1, 0, 2, 36, 62, },
 	{ 1, 1, 0, 2, 36, 62, },
 	{ 3, 1, 0, 2, 36, 62, },
 	{ 4, 1, 0, 2, 36, 76, },
@@ -12875,6 +26833,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 2, 36, 64, },
 	{ 7, 1, 0, 2, 36, 54, },
 	{ 8, 1, 0, 2, 36, 62, },
+	{ 9, 1, 0, 2, 36, 62, },
+	{ 0, 1, 0, 2, 40, 76, },
+	{ 2, 1, 0, 2, 40, 62, },
 	{ 1, 1, 0, 2, 40, 62, },
 	{ 3, 1, 0, 2, 40, 62, },
 	{ 4, 1, 0, 2, 40, 76, },
@@ -12882,6 +26843,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 2, 40, 64, },
 	{ 7, 1, 0, 2, 40, 54, },
 	{ 8, 1, 0, 2, 40, 62, },
+	{ 9, 1, 0, 2, 40, 62, },
+	{ 0, 1, 0, 2, 44, 76, },
+	{ 2, 1, 0, 2, 44, 62, },
 	{ 1, 1, 0, 2, 44, 62, },
 	{ 3, 1, 0, 2, 44, 62, },
 	{ 4, 1, 0, 2, 44, 76, },
@@ -12889,13 +26853,19 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 2, 44, 64, },
 	{ 7, 1, 0, 2, 44, 54, },
 	{ 8, 1, 0, 2, 44, 62, },
+	{ 9, 1, 0, 2, 44, 62, },
+	{ 0, 1, 0, 2, 48, 76, },
+	{ 2, 1, 0, 2, 48, 62, },
 	{ 1, 1, 0, 2, 48, 62, },
 	{ 3, 1, 0, 2, 48, 62, },
-	{ 4, 1, 0, 2, 48, 76, },
+	{ 4, 1, 0, 2, 48, 54, },
 	{ 5, 1, 0, 2, 48, 62, },
 	{ 6, 1, 0, 2, 48, 64, },
 	{ 7, 1, 0, 2, 48, 54, },
 	{ 8, 1, 0, 2, 48, 62, },
+	{ 9, 1, 0, 2, 48, 62, },
+	{ 0, 1, 0, 2, 52, 76, },
+	{ 2, 1, 0, 2, 52, 62, },
 	{ 1, 1, 0, 2, 52, 62, },
 	{ 3, 1, 0, 2, 52, 64, },
 	{ 4, 1, 0, 2, 52, 76, },
@@ -12903,6 +26873,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 2, 52, 76, },
 	{ 7, 1, 0, 2, 52, 54, },
 	{ 8, 1, 0, 2, 52, 76, },
+	{ 9, 1, 0, 2, 52, 62, },
+	{ 0, 1, 0, 2, 56, 76, },
+	{ 2, 1, 0, 2, 56, 62, },
 	{ 1, 1, 0, 2, 56, 62, },
 	{ 3, 1, 0, 2, 56, 64, },
 	{ 4, 1, 0, 2, 56, 76, },
@@ -12910,6 +26883,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 2, 56, 76, },
 	{ 7, 1, 0, 2, 56, 54, },
 	{ 8, 1, 0, 2, 56, 76, },
+	{ 9, 1, 0, 2, 56, 62, },
+	{ 0, 1, 0, 2, 60, 76, },
+	{ 2, 1, 0, 2, 60, 62, },
 	{ 1, 1, 0, 2, 60, 62, },
 	{ 3, 1, 0, 2, 60, 64, },
 	{ 4, 1, 0, 2, 60, 76, },
@@ -12917,6 +26893,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 2, 60, 76, },
 	{ 7, 1, 0, 2, 60, 54, },
 	{ 8, 1, 0, 2, 60, 76, },
+	{ 9, 1, 0, 2, 60, 62, },
+	{ 0, 1, 0, 2, 64, 74, },
+	{ 2, 1, 0, 2, 64, 62, },
 	{ 1, 1, 0, 2, 64, 60, },
 	{ 3, 1, 0, 2, 64, 64, },
 	{ 4, 1, 0, 2, 64, 74, },
@@ -12924,6 +26903,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 2, 64, 74, },
 	{ 7, 1, 0, 2, 64, 54, },
 	{ 8, 1, 0, 2, 64, 74, },
+	{ 9, 1, 0, 2, 64, 62, },
+	{ 0, 1, 0, 2, 100, 70, },
+	{ 2, 1, 0, 2, 100, 62, },
 	{ 1, 1, 0, 2, 100, 76, },
 	{ 3, 1, 0, 2, 100, 70, },
 	{ 4, 1, 0, 2, 100, 76, },
@@ -12931,6 +26913,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 2, 100, 70, },
 	{ 7, 1, 0, 2, 100, 54, },
 	{ 8, 1, 0, 2, 100, 70, },
+	{ 9, 1, 0, 2, 100, 127, },
+	{ 0, 1, 0, 2, 104, 76, },
+	{ 2, 1, 0, 2, 104, 62, },
 	{ 1, 1, 0, 2, 104, 76, },
 	{ 3, 1, 0, 2, 104, 76, },
 	{ 4, 1, 0, 2, 104, 76, },
@@ -12938,6 +26923,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 2, 104, 76, },
 	{ 7, 1, 0, 2, 104, 54, },
 	{ 8, 1, 0, 2, 104, 76, },
+	{ 9, 1, 0, 2, 104, 127, },
+	{ 0, 1, 0, 2, 108, 76, },
+	{ 2, 1, 0, 2, 108, 62, },
 	{ 1, 1, 0, 2, 108, 76, },
 	{ 3, 1, 0, 2, 108, 76, },
 	{ 4, 1, 0, 2, 108, 76, },
@@ -12945,6 +26933,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 2, 108, 76, },
 	{ 7, 1, 0, 2, 108, 54, },
 	{ 8, 1, 0, 2, 108, 76, },
+	{ 9, 1, 0, 2, 108, 127, },
+	{ 0, 1, 0, 2, 112, 76, },
+	{ 2, 1, 0, 2, 112, 62, },
 	{ 1, 1, 0, 2, 112, 76, },
 	{ 3, 1, 0, 2, 112, 76, },
 	{ 4, 1, 0, 2, 112, 76, },
@@ -12952,6 +26943,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 2, 112, 76, },
 	{ 7, 1, 0, 2, 112, 54, },
 	{ 8, 1, 0, 2, 112, 76, },
+	{ 9, 1, 0, 2, 112, 127, },
+	{ 0, 1, 0, 2, 116, 76, },
+	{ 2, 1, 0, 2, 116, 62, },
 	{ 1, 1, 0, 2, 116, 76, },
 	{ 3, 1, 0, 2, 116, 76, },
 	{ 4, 1, 0, 2, 116, 76, },
@@ -12959,6 +26953,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 2, 116, 76, },
 	{ 7, 1, 0, 2, 116, 54, },
 	{ 8, 1, 0, 2, 116, 76, },
+	{ 9, 1, 0, 2, 116, 127, },
+	{ 0, 1, 0, 2, 120, 76, },
+	{ 2, 1, 0, 2, 120, 62, },
 	{ 1, 1, 0, 2, 120, 76, },
 	{ 3, 1, 0, 2, 120, 127, },
 	{ 4, 1, 0, 2, 120, 76, },
@@ -12966,6 +26963,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 2, 120, 76, },
 	{ 7, 1, 0, 2, 120, 54, },
 	{ 8, 1, 0, 2, 120, 76, },
+	{ 9, 1, 0, 2, 120, 127, },
+	{ 0, 1, 0, 2, 124, 76, },
+	{ 2, 1, 0, 2, 124, 62, },
 	{ 1, 1, 0, 2, 124, 76, },
 	{ 3, 1, 0, 2, 124, 127, },
 	{ 4, 1, 0, 2, 124, 76, },
@@ -12973,6 +26973,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 2, 124, 76, },
 	{ 7, 1, 0, 2, 124, 54, },
 	{ 8, 1, 0, 2, 124, 76, },
+	{ 9, 1, 0, 2, 124, 127, },
+	{ 0, 1, 0, 2, 128, 76, },
+	{ 2, 1, 0, 2, 128, 62, },
 	{ 1, 1, 0, 2, 128, 76, },
 	{ 3, 1, 0, 2, 128, 127, },
 	{ 4, 1, 0, 2, 128, 76, },
@@ -12980,6 +26983,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 2, 128, 76, },
 	{ 7, 1, 0, 2, 128, 54, },
 	{ 8, 1, 0, 2, 128, 76, },
+	{ 9, 1, 0, 2, 128, 127, },
+	{ 0, 1, 0, 2, 132, 76, },
+	{ 2, 1, 0, 2, 132, 62, },
 	{ 1, 1, 0, 2, 132, 76, },
 	{ 3, 1, 0, 2, 132, 76, },
 	{ 4, 1, 0, 2, 132, 76, },
@@ -12987,20 +26993,29 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 2, 132, 76, },
 	{ 7, 1, 0, 2, 132, 54, },
 	{ 8, 1, 0, 2, 132, 76, },
+	{ 9, 1, 0, 2, 132, 127, },
+	{ 0, 1, 0, 2, 136, 76, },
+	{ 2, 1, 0, 2, 136, 62, },
 	{ 1, 1, 0, 2, 136, 76, },
 	{ 3, 1, 0, 2, 136, 76, },
 	{ 4, 1, 0, 2, 136, 76, },
 	{ 5, 1, 0, 2, 136, 62, },
 	{ 6, 1, 0, 2, 136, 76, },
-	{ 7, 1, 0, 2, 136, 127, },
+	{ 7, 1, 0, 2, 136, 54, },
 	{ 8, 1, 0, 2, 136, 76, },
+	{ 9, 1, 0, 2, 136, 127, },
+	{ 0, 1, 0, 2, 140, 70, },
+	{ 2, 1, 0, 2, 140, 62, },
 	{ 1, 1, 0, 2, 140, 76, },
 	{ 3, 1, 0, 2, 140, 70, },
 	{ 4, 1, 0, 2, 140, 76, },
 	{ 5, 1, 0, 2, 140, 62, },
 	{ 6, 1, 0, 2, 140, 70, },
-	{ 7, 1, 0, 2, 140, 127, },
+	{ 7, 1, 0, 2, 140, 54, },
 	{ 8, 1, 0, 2, 140, 70, },
+	{ 9, 1, 0, 2, 140, 127, },
+	{ 0, 1, 0, 2, 144, 76, },
+	{ 2, 1, 0, 2, 144, 127, },
 	{ 1, 1, 0, 2, 144, 127, },
 	{ 3, 1, 0, 2, 144, 76, },
 	{ 4, 1, 0, 2, 144, 76, },
@@ -13008,6 +27023,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 2, 144, 76, },
 	{ 7, 1, 0, 2, 144, 127, },
 	{ 8, 1, 0, 2, 144, 76, },
+	{ 9, 1, 0, 2, 144, 127, },
+	{ 0, 1, 0, 2, 149, 76, },
+	{ 2, 1, 0, 2, 149, -128, },
 	{ 1, 1, 0, 2, 149, 127, },
 	{ 3, 1, 0, 2, 149, 76, },
 	{ 4, 1, 0, 2, 149, 74, },
@@ -13015,6 +27033,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 2, 149, 76, },
 	{ 7, 1, 0, 2, 149, 54, },
 	{ 8, 1, 0, 2, 149, 76, },
+	{ 9, 1, 0, 2, 149, -128, },
+	{ 0, 1, 0, 2, 153, 76, },
+	{ 2, 1, 0, 2, 153, -128, },
 	{ 1, 1, 0, 2, 153, 127, },
 	{ 3, 1, 0, 2, 153, 76, },
 	{ 4, 1, 0, 2, 153, 74, },
@@ -13022,6 +27043,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 2, 153, 76, },
 	{ 7, 1, 0, 2, 153, 54, },
 	{ 8, 1, 0, 2, 153, 76, },
+	{ 9, 1, 0, 2, 153, -128, },
+	{ 0, 1, 0, 2, 157, 76, },
+	{ 2, 1, 0, 2, 157, -128, },
 	{ 1, 1, 0, 2, 157, 127, },
 	{ 3, 1, 0, 2, 157, 76, },
 	{ 4, 1, 0, 2, 157, 74, },
@@ -13029,6 +27053,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 2, 157, 76, },
 	{ 7, 1, 0, 2, 157, 54, },
 	{ 8, 1, 0, 2, 157, 76, },
+	{ 9, 1, 0, 2, 157, -128, },
+	{ 0, 1, 0, 2, 161, 76, },
+	{ 2, 1, 0, 2, 161, -128, },
 	{ 1, 1, 0, 2, 161, 127, },
 	{ 3, 1, 0, 2, 161, 76, },
 	{ 4, 1, 0, 2, 161, 74, },
@@ -13036,6 +27063,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 2, 161, 76, },
 	{ 7, 1, 0, 2, 161, 54, },
 	{ 8, 1, 0, 2, 161, 76, },
+	{ 9, 1, 0, 2, 161, -128, },
+	{ 0, 1, 0, 2, 165, 76, },
+	{ 2, 1, 0, 2, 165, -128, },
 	{ 1, 1, 0, 2, 165, 127, },
 	{ 3, 1, 0, 2, 165, 76, },
 	{ 4, 1, 0, 2, 165, 74, },
@@ -13043,6 +27073,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 2, 165, 76, },
 	{ 7, 1, 0, 2, 165, 54, },
 	{ 8, 1, 0, 2, 165, 76, },
+	{ 9, 1, 0, 2, 165, -128, },
+	{ 0, 1, 0, 3, 36, 68, },
+	{ 2, 1, 0, 3, 36, 38, },
 	{ 1, 1, 0, 3, 36, 50, },
 	{ 3, 1, 0, 3, 36, 38, },
 	{ 4, 1, 0, 3, 36, 66, },
@@ -13050,6 +27083,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 3, 36, 52, },
 	{ 7, 1, 0, 3, 36, 30, },
 	{ 8, 1, 0, 3, 36, 50, },
+	{ 9, 1, 0, 3, 36, 38, },
+	{ 0, 1, 0, 3, 40, 68, },
+	{ 2, 1, 0, 3, 40, 38, },
 	{ 1, 1, 0, 3, 40, 50, },
 	{ 3, 1, 0, 3, 40, 38, },
 	{ 4, 1, 0, 3, 40, 66, },
@@ -13057,6 +27093,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 3, 40, 52, },
 	{ 7, 1, 0, 3, 40, 30, },
 	{ 8, 1, 0, 3, 40, 50, },
+	{ 9, 1, 0, 3, 40, 38, },
+	{ 0, 1, 0, 3, 44, 68, },
+	{ 2, 1, 0, 3, 44, 38, },
 	{ 1, 1, 0, 3, 44, 50, },
 	{ 3, 1, 0, 3, 44, 38, },
 	{ 4, 1, 0, 3, 44, 66, },
@@ -13064,13 +27103,19 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 3, 44, 52, },
 	{ 7, 1, 0, 3, 44, 30, },
 	{ 8, 1, 0, 3, 44, 50, },
+	{ 9, 1, 0, 3, 44, 38, },
+	{ 0, 1, 0, 3, 48, 68, },
+	{ 2, 1, 0, 3, 48, 38, },
 	{ 1, 1, 0, 3, 48, 50, },
 	{ 3, 1, 0, 3, 48, 38, },
-	{ 4, 1, 0, 3, 48, 66, },
+	{ 4, 1, 0, 3, 48, 36, },
 	{ 5, 1, 0, 3, 48, 38, },
 	{ 6, 1, 0, 3, 48, 52, },
 	{ 7, 1, 0, 3, 48, 30, },
 	{ 8, 1, 0, 3, 48, 50, },
+	{ 9, 1, 0, 3, 48, 38, },
+	{ 0, 1, 0, 3, 52, 68, },
+	{ 2, 1, 0, 3, 52, 38, },
 	{ 1, 1, 0, 3, 52, 50, },
 	{ 3, 1, 0, 3, 52, 40, },
 	{ 4, 1, 0, 3, 52, 66, },
@@ -13078,6 +27123,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 3, 52, 68, },
 	{ 7, 1, 0, 3, 52, 30, },
 	{ 8, 1, 0, 3, 52, 68, },
+	{ 9, 1, 0, 3, 52, 38, },
+	{ 0, 1, 0, 3, 56, 68, },
+	{ 2, 1, 0, 3, 56, 38, },
 	{ 1, 1, 0, 3, 56, 50, },
 	{ 3, 1, 0, 3, 56, 40, },
 	{ 4, 1, 0, 3, 56, 66, },
@@ -13085,6 +27133,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 3, 56, 68, },
 	{ 7, 1, 0, 3, 56, 30, },
 	{ 8, 1, 0, 3, 56, 68, },
+	{ 9, 1, 0, 3, 56, 38, },
+	{ 0, 1, 0, 3, 60, 66, },
+	{ 2, 1, 0, 3, 60, 38, },
 	{ 1, 1, 0, 3, 60, 50, },
 	{ 3, 1, 0, 3, 60, 40, },
 	{ 4, 1, 0, 3, 60, 66, },
@@ -13092,6 +27143,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 3, 60, 66, },
 	{ 7, 1, 0, 3, 60, 30, },
 	{ 8, 1, 0, 3, 60, 66, },
+	{ 9, 1, 0, 3, 60, 38, },
+	{ 0, 1, 0, 3, 64, 68, },
+	{ 2, 1, 0, 3, 64, 38, },
 	{ 1, 1, 0, 3, 64, 50, },
 	{ 3, 1, 0, 3, 64, 40, },
 	{ 4, 1, 0, 3, 64, 66, },
@@ -13099,6 +27153,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 3, 64, 68, },
 	{ 7, 1, 0, 3, 64, 30, },
 	{ 8, 1, 0, 3, 64, 68, },
+	{ 9, 1, 0, 3, 64, 38, },
+	{ 0, 1, 0, 3, 100, 60, },
+	{ 2, 1, 0, 3, 100, 38, },
 	{ 1, 1, 0, 3, 100, 70, },
 	{ 3, 1, 0, 3, 100, 60, },
 	{ 4, 1, 0, 3, 100, 64, },
@@ -13106,6 +27163,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 3, 100, 60, },
 	{ 7, 1, 0, 3, 100, 30, },
 	{ 8, 1, 0, 3, 100, 60, },
+	{ 9, 1, 0, 3, 100, 127, },
+	{ 0, 1, 0, 3, 104, 68, },
+	{ 2, 1, 0, 3, 104, 38, },
 	{ 1, 1, 0, 3, 104, 70, },
 	{ 3, 1, 0, 3, 104, 68, },
 	{ 4, 1, 0, 3, 104, 64, },
@@ -13113,6 +27173,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 3, 104, 68, },
 	{ 7, 1, 0, 3, 104, 30, },
 	{ 8, 1, 0, 3, 104, 68, },
+	{ 9, 1, 0, 3, 104, 127, },
+	{ 0, 1, 0, 3, 108, 68, },
+	{ 2, 1, 0, 3, 108, 38, },
 	{ 1, 1, 0, 3, 108, 70, },
 	{ 3, 1, 0, 3, 108, 68, },
 	{ 4, 1, 0, 3, 108, 64, },
@@ -13120,6 +27183,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 3, 108, 68, },
 	{ 7, 1, 0, 3, 108, 30, },
 	{ 8, 1, 0, 3, 108, 68, },
+	{ 9, 1, 0, 3, 108, 127, },
+	{ 0, 1, 0, 3, 112, 68, },
+	{ 2, 1, 0, 3, 112, 38, },
 	{ 1, 1, 0, 3, 112, 70, },
 	{ 3, 1, 0, 3, 112, 68, },
 	{ 4, 1, 0, 3, 112, 64, },
@@ -13127,6 +27193,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 3, 112, 68, },
 	{ 7, 1, 0, 3, 112, 30, },
 	{ 8, 1, 0, 3, 112, 68, },
+	{ 9, 1, 0, 3, 112, 127, },
+	{ 0, 1, 0, 3, 116, 68, },
+	{ 2, 1, 0, 3, 116, 38, },
 	{ 1, 1, 0, 3, 116, 70, },
 	{ 3, 1, 0, 3, 116, 68, },
 	{ 4, 1, 0, 3, 116, 64, },
@@ -13134,6 +27203,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 3, 116, 68, },
 	{ 7, 1, 0, 3, 116, 30, },
 	{ 8, 1, 0, 3, 116, 68, },
+	{ 9, 1, 0, 3, 116, 127, },
+	{ 0, 1, 0, 3, 120, 68, },
+	{ 2, 1, 0, 3, 120, 38, },
 	{ 1, 1, 0, 3, 120, 70, },
 	{ 3, 1, 0, 3, 120, 127, },
 	{ 4, 1, 0, 3, 120, 64, },
@@ -13141,6 +27213,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 3, 120, 68, },
 	{ 7, 1, 0, 3, 120, 30, },
 	{ 8, 1, 0, 3, 120, 68, },
+	{ 9, 1, 0, 3, 120, 127, },
+	{ 0, 1, 0, 3, 124, 68, },
+	{ 2, 1, 0, 3, 124, 38, },
 	{ 1, 1, 0, 3, 124, 70, },
 	{ 3, 1, 0, 3, 124, 127, },
 	{ 4, 1, 0, 3, 124, 64, },
@@ -13148,6 +27223,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 3, 124, 68, },
 	{ 7, 1, 0, 3, 124, 30, },
 	{ 8, 1, 0, 3, 124, 68, },
+	{ 9, 1, 0, 3, 124, 127, },
+	{ 0, 1, 0, 3, 128, 68, },
+	{ 2, 1, 0, 3, 128, 38, },
 	{ 1, 1, 0, 3, 128, 70, },
 	{ 3, 1, 0, 3, 128, 127, },
 	{ 4, 1, 0, 3, 128, 64, },
@@ -13155,6 +27233,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 3, 128, 68, },
 	{ 7, 1, 0, 3, 128, 30, },
 	{ 8, 1, 0, 3, 128, 68, },
+	{ 9, 1, 0, 3, 128, 127, },
+	{ 0, 1, 0, 3, 132, 68, },
+	{ 2, 1, 0, 3, 132, 38, },
 	{ 1, 1, 0, 3, 132, 70, },
 	{ 3, 1, 0, 3, 132, 68, },
 	{ 4, 1, 0, 3, 132, 64, },
@@ -13162,20 +27243,29 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 3, 132, 68, },
 	{ 7, 1, 0, 3, 132, 30, },
 	{ 8, 1, 0, 3, 132, 68, },
+	{ 9, 1, 0, 3, 132, 127, },
+	{ 0, 1, 0, 3, 136, 68, },
+	{ 2, 1, 0, 3, 136, 38, },
 	{ 1, 1, 0, 3, 136, 70, },
 	{ 3, 1, 0, 3, 136, 68, },
 	{ 4, 1, 0, 3, 136, 64, },
 	{ 5, 1, 0, 3, 136, 38, },
 	{ 6, 1, 0, 3, 136, 68, },
-	{ 7, 1, 0, 3, 136, 127, },
+	{ 7, 1, 0, 3, 136, 30, },
 	{ 8, 1, 0, 3, 136, 68, },
+	{ 9, 1, 0, 3, 136, 127, },
+	{ 0, 1, 0, 3, 140, 60, },
+	{ 2, 1, 0, 3, 140, 38, },
 	{ 1, 1, 0, 3, 140, 70, },
 	{ 3, 1, 0, 3, 140, 60, },
 	{ 4, 1, 0, 3, 140, 64, },
 	{ 5, 1, 0, 3, 140, 38, },
 	{ 6, 1, 0, 3, 140, 60, },
-	{ 7, 1, 0, 3, 140, 127, },
+	{ 7, 1, 0, 3, 140, 30, },
 	{ 8, 1, 0, 3, 140, 60, },
+	{ 9, 1, 0, 3, 140, 127, },
+	{ 0, 1, 0, 3, 144, 68, },
+	{ 2, 1, 0, 3, 144, 127, },
 	{ 1, 1, 0, 3, 144, 127, },
 	{ 3, 1, 0, 3, 144, 68, },
 	{ 4, 1, 0, 3, 144, 64, },
@@ -13183,6 +27273,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 3, 144, 68, },
 	{ 7, 1, 0, 3, 144, 127, },
 	{ 8, 1, 0, 3, 144, 68, },
+	{ 9, 1, 0, 3, 144, 127, },
+	{ 0, 1, 0, 3, 149, 76, },
+	{ 2, 1, 0, 3, 149, -128, },
 	{ 1, 1, 0, 3, 149, 127, },
 	{ 3, 1, 0, 3, 149, 76, },
 	{ 4, 1, 0, 3, 149, 60, },
@@ -13190,6 +27283,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 3, 149, 76, },
 	{ 7, 1, 0, 3, 149, 30, },
 	{ 8, 1, 0, 3, 149, 72, },
+	{ 9, 1, 0, 3, 149, -128, },
+	{ 0, 1, 0, 3, 153, 76, },
+	{ 2, 1, 0, 3, 153, -128, },
 	{ 1, 1, 0, 3, 153, 127, },
 	{ 3, 1, 0, 3, 153, 76, },
 	{ 4, 1, 0, 3, 153, 60, },
@@ -13197,6 +27293,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 3, 153, 76, },
 	{ 7, 1, 0, 3, 153, 30, },
 	{ 8, 1, 0, 3, 153, 76, },
+	{ 9, 1, 0, 3, 153, -128, },
+	{ 0, 1, 0, 3, 157, 76, },
+	{ 2, 1, 0, 3, 157, -128, },
 	{ 1, 1, 0, 3, 157, 127, },
 	{ 3, 1, 0, 3, 157, 76, },
 	{ 4, 1, 0, 3, 157, 60, },
@@ -13204,6 +27303,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 3, 157, 76, },
 	{ 7, 1, 0, 3, 157, 30, },
 	{ 8, 1, 0, 3, 157, 76, },
+	{ 9, 1, 0, 3, 157, -128, },
+	{ 0, 1, 0, 3, 161, 76, },
+	{ 2, 1, 0, 3, 161, -128, },
 	{ 1, 1, 0, 3, 161, 127, },
 	{ 3, 1, 0, 3, 161, 76, },
 	{ 4, 1, 0, 3, 161, 60, },
@@ -13211,6 +27313,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 3, 161, 76, },
 	{ 7, 1, 0, 3, 161, 30, },
 	{ 8, 1, 0, 3, 161, 76, },
+	{ 9, 1, 0, 3, 161, -128, },
+	{ 0, 1, 0, 3, 165, 76, },
+	{ 2, 1, 0, 3, 165, -128, },
 	{ 1, 1, 0, 3, 165, 127, },
 	{ 3, 1, 0, 3, 165, 76, },
 	{ 4, 1, 0, 3, 165, 60, },
@@ -13218,6 +27323,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 0, 3, 165, 76, },
 	{ 7, 1, 0, 3, 165, 30, },
 	{ 8, 1, 0, 3, 165, 76, },
+	{ 9, 1, 0, 3, 165, -128, },
+	{ 0, 1, 1, 2, 38, 66, },
+	{ 2, 1, 1, 2, 38, 64, },
 	{ 1, 1, 1, 2, 38, 62, },
 	{ 3, 1, 1, 2, 38, 64, },
 	{ 4, 1, 1, 2, 38, 72, },
@@ -13225,13 +27333,19 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 1, 2, 38, 64, },
 	{ 7, 1, 1, 2, 38, 54, },
 	{ 8, 1, 1, 2, 38, 62, },
+	{ 9, 1, 1, 2, 38, 64, },
+	{ 0, 1, 1, 2, 46, 72, },
+	{ 2, 1, 1, 2, 46, 64, },
 	{ 1, 1, 1, 2, 46, 62, },
 	{ 3, 1, 1, 2, 46, 64, },
-	{ 4, 1, 1, 2, 46, 72, },
+	{ 4, 1, 1, 2, 46, 60, },
 	{ 5, 1, 1, 2, 46, 64, },
 	{ 6, 1, 1, 2, 46, 64, },
 	{ 7, 1, 1, 2, 46, 54, },
 	{ 8, 1, 1, 2, 46, 62, },
+	{ 9, 1, 1, 2, 46, 64, },
+	{ 0, 1, 1, 2, 54, 72, },
+	{ 2, 1, 1, 2, 54, 64, },
 	{ 1, 1, 1, 2, 54, 62, },
 	{ 3, 1, 1, 2, 54, 64, },
 	{ 4, 1, 1, 2, 54, 72, },
@@ -13239,6 +27353,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 1, 2, 54, 72, },
 	{ 7, 1, 1, 2, 54, 54, },
 	{ 8, 1, 1, 2, 54, 72, },
+	{ 9, 1, 1, 2, 54, 64, },
+	{ 0, 1, 1, 2, 62, 64, },
+	{ 2, 1, 1, 2, 62, 64, },
 	{ 1, 1, 1, 2, 62, 62, },
 	{ 3, 1, 1, 2, 62, 64, },
 	{ 4, 1, 1, 2, 62, 70, },
@@ -13246,6 +27363,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 1, 2, 62, 64, },
 	{ 7, 1, 1, 2, 62, 54, },
 	{ 8, 1, 1, 2, 62, 64, },
+	{ 9, 1, 1, 2, 62, 64, },
+	{ 0, 1, 1, 2, 102, 58, },
+	{ 2, 1, 1, 2, 102, 64, },
 	{ 1, 1, 1, 2, 102, 72, },
 	{ 3, 1, 1, 2, 102, 58, },
 	{ 4, 1, 1, 2, 102, 72, },
@@ -13253,6 +27373,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 1, 2, 102, 58, },
 	{ 7, 1, 1, 2, 102, 54, },
 	{ 8, 1, 1, 2, 102, 58, },
+	{ 9, 1, 1, 2, 102, 127, },
+	{ 0, 1, 1, 2, 110, 72, },
+	{ 2, 1, 1, 2, 110, 64, },
 	{ 1, 1, 1, 2, 110, 72, },
 	{ 3, 1, 1, 2, 110, 72, },
 	{ 4, 1, 1, 2, 110, 72, },
@@ -13260,6 +27383,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 1, 2, 110, 72, },
 	{ 7, 1, 1, 2, 110, 54, },
 	{ 8, 1, 1, 2, 110, 72, },
+	{ 9, 1, 1, 2, 110, 127, },
+	{ 0, 1, 1, 2, 118, 72, },
+	{ 2, 1, 1, 2, 118, 64, },
 	{ 1, 1, 1, 2, 118, 72, },
 	{ 3, 1, 1, 2, 118, 127, },
 	{ 4, 1, 1, 2, 118, 72, },
@@ -13267,6 +27393,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 1, 2, 118, 72, },
 	{ 7, 1, 1, 2, 118, 54, },
 	{ 8, 1, 1, 2, 118, 72, },
+	{ 9, 1, 1, 2, 118, 127, },
+	{ 0, 1, 1, 2, 126, 72, },
+	{ 2, 1, 1, 2, 126, 64, },
 	{ 1, 1, 1, 2, 126, 72, },
 	{ 3, 1, 1, 2, 126, 127, },
 	{ 4, 1, 1, 2, 126, 72, },
@@ -13274,13 +27403,19 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 1, 2, 126, 72, },
 	{ 7, 1, 1, 2, 126, 54, },
 	{ 8, 1, 1, 2, 126, 72, },
+	{ 9, 1, 1, 2, 126, 127, },
+	{ 0, 1, 1, 2, 134, 72, },
+	{ 2, 1, 1, 2, 134, 64, },
 	{ 1, 1, 1, 2, 134, 72, },
 	{ 3, 1, 1, 2, 134, 72, },
 	{ 4, 1, 1, 2, 134, 72, },
 	{ 5, 1, 1, 2, 134, 64, },
 	{ 6, 1, 1, 2, 134, 72, },
-	{ 7, 1, 1, 2, 134, 127, },
+	{ 7, 1, 1, 2, 134, 54, },
 	{ 8, 1, 1, 2, 134, 72, },
+	{ 9, 1, 1, 2, 134, 127, },
+	{ 0, 1, 1, 2, 142, 72, },
+	{ 2, 1, 1, 2, 142, 127, },
 	{ 1, 1, 1, 2, 142, 127, },
 	{ 3, 1, 1, 2, 142, 72, },
 	{ 4, 1, 1, 2, 142, 72, },
@@ -13288,6 +27423,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 1, 2, 142, 72, },
 	{ 7, 1, 1, 2, 142, 127, },
 	{ 8, 1, 1, 2, 142, 72, },
+	{ 9, 1, 1, 2, 142, 127, },
+	{ 0, 1, 1, 2, 151, 72, },
+	{ 2, 1, 1, 2, 151, -128, },
 	{ 1, 1, 1, 2, 151, 127, },
 	{ 3, 1, 1, 2, 151, 72, },
 	{ 4, 1, 1, 2, 151, 72, },
@@ -13295,6 +27433,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 1, 2, 151, 72, },
 	{ 7, 1, 1, 2, 151, 54, },
 	{ 8, 1, 1, 2, 151, 72, },
+	{ 9, 1, 1, 2, 151, -128, },
+	{ 0, 1, 1, 2, 159, 72, },
+	{ 2, 1, 1, 2, 159, -128, },
 	{ 1, 1, 1, 2, 159, 127, },
 	{ 3, 1, 1, 2, 159, 72, },
 	{ 4, 1, 1, 2, 159, 72, },
@@ -13302,6 +27443,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 1, 2, 159, 72, },
 	{ 7, 1, 1, 2, 159, 54, },
 	{ 8, 1, 1, 2, 159, 72, },
+	{ 9, 1, 1, 2, 159, -128, },
+	{ 0, 1, 1, 3, 38, 60, },
+	{ 2, 1, 1, 3, 38, 40, },
 	{ 1, 1, 1, 3, 38, 50, },
 	{ 3, 1, 1, 3, 38, 40, },
 	{ 4, 1, 1, 3, 38, 62, },
@@ -13309,13 +27453,19 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 1, 3, 38, 52, },
 	{ 7, 1, 1, 3, 38, 30, },
 	{ 8, 1, 1, 3, 38, 50, },
+	{ 9, 1, 1, 3, 38, 40, },
+	{ 0, 1, 1, 3, 46, 68, },
+	{ 2, 1, 1, 3, 46, 40, },
 	{ 1, 1, 1, 3, 46, 50, },
 	{ 3, 1, 1, 3, 46, 40, },
-	{ 4, 1, 1, 3, 46, 62, },
+	{ 4, 1, 1, 3, 46, 46, },
 	{ 5, 1, 1, 3, 46, 40, },
 	{ 6, 1, 1, 3, 46, 52, },
 	{ 7, 1, 1, 3, 46, 30, },
 	{ 8, 1, 1, 3, 46, 50, },
+	{ 9, 1, 1, 3, 46, 40, },
+	{ 0, 1, 1, 3, 54, 68, },
+	{ 2, 1, 1, 3, 54, 40, },
 	{ 1, 1, 1, 3, 54, 50, },
 	{ 3, 1, 1, 3, 54, 40, },
 	{ 4, 1, 1, 3, 54, 62, },
@@ -13323,6 +27473,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 1, 3, 54, 68, },
 	{ 7, 1, 1, 3, 54, 30, },
 	{ 8, 1, 1, 3, 54, 68, },
+	{ 9, 1, 1, 3, 54, 40, },
+	{ 0, 1, 1, 3, 62, 58, },
+	{ 2, 1, 1, 3, 62, 40, },
 	{ 1, 1, 1, 3, 62, 48, },
 	{ 3, 1, 1, 3, 62, 40, },
 	{ 4, 1, 1, 3, 62, 58, },
@@ -13330,6 +27483,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 1, 3, 62, 58, },
 	{ 7, 1, 1, 3, 62, 30, },
 	{ 8, 1, 1, 3, 62, 58, },
+	{ 9, 1, 1, 3, 62, 40, },
+	{ 0, 1, 1, 3, 102, 54, },
+	{ 2, 1, 1, 3, 102, 40, },
 	{ 1, 1, 1, 3, 102, 70, },
 	{ 3, 1, 1, 3, 102, 54, },
 	{ 4, 1, 1, 3, 102, 64, },
@@ -13337,6 +27493,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 1, 3, 102, 54, },
 	{ 7, 1, 1, 3, 102, 30, },
 	{ 8, 1, 1, 3, 102, 54, },
+	{ 9, 1, 1, 3, 102, 127, },
+	{ 0, 1, 1, 3, 110, 68, },
+	{ 2, 1, 1, 3, 110, 40, },
 	{ 1, 1, 1, 3, 110, 70, },
 	{ 3, 1, 1, 3, 110, 68, },
 	{ 4, 1, 1, 3, 110, 64, },
@@ -13344,6 +27503,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 1, 3, 110, 68, },
 	{ 7, 1, 1, 3, 110, 30, },
 	{ 8, 1, 1, 3, 110, 68, },
+	{ 9, 1, 1, 3, 110, 127, },
+	{ 0, 1, 1, 3, 118, 68, },
+	{ 2, 1, 1, 3, 118, 40, },
 	{ 1, 1, 1, 3, 118, 70, },
 	{ 3, 1, 1, 3, 118, 127, },
 	{ 4, 1, 1, 3, 118, 64, },
@@ -13351,6 +27513,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 1, 3, 118, 68, },
 	{ 7, 1, 1, 3, 118, 30, },
 	{ 8, 1, 1, 3, 118, 68, },
+	{ 9, 1, 1, 3, 118, 127, },
+	{ 0, 1, 1, 3, 126, 68, },
+	{ 2, 1, 1, 3, 126, 40, },
 	{ 1, 1, 1, 3, 126, 70, },
 	{ 3, 1, 1, 3, 126, 127, },
 	{ 4, 1, 1, 3, 126, 64, },
@@ -13358,13 +27523,19 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 1, 3, 126, 68, },
 	{ 7, 1, 1, 3, 126, 30, },
 	{ 8, 1, 1, 3, 126, 68, },
+	{ 9, 1, 1, 3, 126, 127, },
+	{ 0, 1, 1, 3, 134, 68, },
+	{ 2, 1, 1, 3, 134, 40, },
 	{ 1, 1, 1, 3, 134, 70, },
 	{ 3, 1, 1, 3, 134, 68, },
 	{ 4, 1, 1, 3, 134, 64, },
 	{ 5, 1, 1, 3, 134, 40, },
 	{ 6, 1, 1, 3, 134, 68, },
-	{ 7, 1, 1, 3, 134, 127, },
+	{ 7, 1, 1, 3, 134, 30, },
 	{ 8, 1, 1, 3, 134, 68, },
+	{ 9, 1, 1, 3, 134, 127, },
+	{ 0, 1, 1, 3, 142, 68, },
+	{ 2, 1, 1, 3, 142, 127, },
 	{ 1, 1, 1, 3, 142, 127, },
 	{ 3, 1, 1, 3, 142, 68, },
 	{ 4, 1, 1, 3, 142, 64, },
@@ -13372,6 +27543,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 1, 3, 142, 68, },
 	{ 7, 1, 1, 3, 142, 127, },
 	{ 8, 1, 1, 3, 142, 68, },
+	{ 9, 1, 1, 3, 142, 127, },
+	{ 0, 1, 1, 3, 151, 72, },
+	{ 2, 1, 1, 3, 151, -128, },
 	{ 1, 1, 1, 3, 151, 127, },
 	{ 3, 1, 1, 3, 151, 72, },
 	{ 4, 1, 1, 3, 151, 66, },
@@ -13379,6 +27553,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 1, 3, 151, 72, },
 	{ 7, 1, 1, 3, 151, 30, },
 	{ 8, 1, 1, 3, 151, 68, },
+	{ 9, 1, 1, 3, 151, -128, },
+	{ 0, 1, 1, 3, 159, 72, },
+	{ 2, 1, 1, 3, 159, -128, },
 	{ 1, 1, 1, 3, 159, 127, },
 	{ 3, 1, 1, 3, 159, 72, },
 	{ 4, 1, 1, 3, 159, 66, },
@@ -13386,6 +27563,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 1, 3, 159, 72, },
 	{ 7, 1, 1, 3, 159, 30, },
 	{ 8, 1, 1, 3, 159, 72, },
+	{ 9, 1, 1, 3, 159, -128, },
+	{ 0, 1, 2, 4, 42, 64, },
+	{ 2, 1, 2, 4, 42, 64, },
 	{ 1, 1, 2, 4, 42, 64, },
 	{ 3, 1, 2, 4, 42, 64, },
 	{ 4, 1, 2, 4, 42, 68, },
@@ -13393,6 +27573,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 2, 4, 42, 64, },
 	{ 7, 1, 2, 4, 42, 54, },
 	{ 8, 1, 2, 4, 42, 62, },
+	{ 9, 1, 2, 4, 42, 64, },
+	{ 0, 1, 2, 4, 58, 62, },
+	{ 2, 1, 2, 4, 58, 64, },
 	{ 1, 1, 2, 4, 58, 64, },
 	{ 3, 1, 2, 4, 58, 62, },
 	{ 4, 1, 2, 4, 58, 64, },
@@ -13400,6 +27583,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 2, 4, 58, 62, },
 	{ 7, 1, 2, 4, 58, 54, },
 	{ 8, 1, 2, 4, 58, 62, },
+	{ 9, 1, 2, 4, 58, 64, },
+	{ 0, 1, 2, 4, 106, 58, },
+	{ 2, 1, 2, 4, 106, 64, },
 	{ 1, 1, 2, 4, 106, 72, },
 	{ 3, 1, 2, 4, 106, 58, },
 	{ 4, 1, 2, 4, 106, 66, },
@@ -13407,6 +27593,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 2, 4, 106, 58, },
 	{ 7, 1, 2, 4, 106, 54, },
 	{ 8, 1, 2, 4, 106, 58, },
+	{ 9, 1, 2, 4, 106, 127, },
+	{ 0, 1, 2, 4, 122, 72, },
+	{ 2, 1, 2, 4, 122, 64, },
 	{ 1, 1, 2, 4, 122, 72, },
 	{ 3, 1, 2, 4, 122, 127, },
 	{ 4, 1, 2, 4, 122, 68, },
@@ -13414,6 +27603,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 2, 4, 122, 72, },
 	{ 7, 1, 2, 4, 122, 54, },
 	{ 8, 1, 2, 4, 122, 72, },
+	{ 9, 1, 2, 4, 122, 127, },
+	{ 0, 1, 2, 4, 138, 72, },
+	{ 2, 1, 2, 4, 138, 127, },
 	{ 1, 1, 2, 4, 138, 127, },
 	{ 3, 1, 2, 4, 138, 72, },
 	{ 4, 1, 2, 4, 138, 68, },
@@ -13421,6 +27613,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 2, 4, 138, 72, },
 	{ 7, 1, 2, 4, 138, 127, },
 	{ 8, 1, 2, 4, 138, 72, },
+	{ 9, 1, 2, 4, 138, 127, },
+	{ 0, 1, 2, 4, 155, 72, },
+	{ 2, 1, 2, 4, 155, -128, },
 	{ 1, 1, 2, 4, 155, 127, },
 	{ 3, 1, 2, 4, 155, 72, },
 	{ 4, 1, 2, 4, 155, 68, },
@@ -13428,6 +27623,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 2, 4, 155, 72, },
 	{ 7, 1, 2, 4, 155, 54, },
 	{ 8, 1, 2, 4, 155, 68, },
+	{ 9, 1, 2, 4, 155, -128, },
+	{ 0, 1, 2, 5, 42, 54, },
+	{ 2, 1, 2, 5, 42, 40, },
 	{ 1, 1, 2, 5, 42, 50, },
 	{ 3, 1, 2, 5, 42, 40, },
 	{ 4, 1, 2, 5, 42, 58, },
@@ -13435,6 +27633,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 2, 5, 42, 52, },
 	{ 7, 1, 2, 5, 42, 30, },
 	{ 8, 1, 2, 5, 42, 50, },
+	{ 9, 1, 2, 5, 42, 40, },
+	{ 0, 1, 2, 5, 58, 52, },
+	{ 2, 1, 2, 5, 58, 40, },
 	{ 1, 1, 2, 5, 58, 50, },
 	{ 3, 1, 2, 5, 58, 40, },
 	{ 4, 1, 2, 5, 58, 56, },
@@ -13442,6 +27643,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 2, 5, 58, 52, },
 	{ 7, 1, 2, 5, 58, 30, },
 	{ 8, 1, 2, 5, 58, 52, },
+	{ 9, 1, 2, 5, 58, 40, },
+	{ 0, 1, 2, 5, 106, 50, },
+	{ 2, 1, 2, 5, 106, 40, },
 	{ 1, 1, 2, 5, 106, 72, },
 	{ 3, 1, 2, 5, 106, 50, },
 	{ 4, 1, 2, 5, 106, 56, },
@@ -13449,6 +27653,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 2, 5, 106, 50, },
 	{ 7, 1, 2, 5, 106, 30, },
 	{ 8, 1, 2, 5, 106, 50, },
+	{ 9, 1, 2, 5, 106, 127, },
+	{ 0, 1, 2, 5, 122, 66, },
+	{ 2, 1, 2, 5, 122, 40, },
 	{ 1, 1, 2, 5, 122, 72, },
 	{ 3, 1, 2, 5, 122, 127, },
 	{ 4, 1, 2, 5, 122, 56, },
@@ -13456,6 +27663,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 2, 5, 122, 66, },
 	{ 7, 1, 2, 5, 122, 30, },
 	{ 8, 1, 2, 5, 122, 66, },
+	{ 9, 1, 2, 5, 122, 127, },
+	{ 0, 1, 2, 5, 138, 66, },
+	{ 2, 1, 2, 5, 138, 127, },
 	{ 1, 1, 2, 5, 138, 127, },
 	{ 3, 1, 2, 5, 138, 66, },
 	{ 4, 1, 2, 5, 138, 58, },
@@ -13463,6 +27673,9 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 2, 5, 138, 66, },
 	{ 7, 1, 2, 5, 138, 127, },
 	{ 8, 1, 2, 5, 138, 66, },
+	{ 9, 1, 2, 5, 138, 127, },
+	{ 0, 1, 2, 5, 155, 62, },
+	{ 2, 1, 2, 5, 155, -128, },
 	{ 1, 1, 2, 5, 155, 127, },
 	{ 3, 1, 2, 5, 155, 62, },
 	{ 4, 1, 2, 5, 155, 58, },
@@ -13470,9 +27683,10 @@ static const struct rtw_txpwr_lmt_cfg_pair rtw8822c_txpwr_lmt_type0[] = {
 	{ 6, 1, 2, 5, 155, 62, },
 	{ 7, 1, 2, 5, 155, 30, },
 	{ 8, 1, 2, 5, 155, 62, },
+	{ 9, 1, 2, 5, 155, -128, },
 };
 
-RTW_DECL_TABLE_TXPWR_LMT(rtw8822c_txpwr_lmt_type0);
+RTW_DECL_TABLE_TXPWR_LMT(rtw8822c_txpwr_lmt_type5);
 
 static const u32 rtw8822c_dpk_afe_no_dpk[] = {
 	0x18a4, BIT(7), 0,
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822c_table.h b/drivers/net/wireless/realtek/rtw88/rtw8822c_table.h
index 80c06c4f8184..2ae2b0aa5699 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8822c_table.h
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822c_table.h
@@ -12,6 +12,7 @@ extern const struct rtw_table rtw8822c_bb_pg_type0_tbl;
 extern const struct rtw_table rtw8822c_rf_a_tbl;
 extern const struct rtw_table rtw8822c_rf_b_tbl;
 extern const struct rtw_table rtw8822c_txpwr_lmt_type0_tbl;
+extern const struct rtw_table rtw8822c_txpwr_lmt_type5_tbl;
 extern const struct rtw_table rtw8822c_dpk_afe_no_dpk_tbl;
 extern const struct rtw_table rtw8822c_dpk_afe_is_dpk_tbl;
 extern const struct rtw_table rtw8822c_dpk_mac_bb_tbl;
-- 
cgit v1.2.3-59-g8ed1b


From a6336094c3ab70efa8b16546a2e1e11d9afc8000 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 24 Apr 2020 09:47:33 +0100
Subject: rtw88: fix spelling mistake "fimrware" -> "firmware"

There are spelling mistakes in two rtw_err error messages. Fix them.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200424084733.7716-1-colin.king@canonical.com
---
 drivers/net/wireless/realtek/rtw88/mac.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtw88/mac.c b/drivers/net/wireless/realtek/rtw88/mac.c
index 6969379ba37e..bd82b48e02f4 100644
--- a/drivers/net/wireless/realtek/rtw88/mac.c
+++ b/drivers/net/wireless/realtek/rtw88/mac.c
@@ -830,7 +830,7 @@ download_firmware_legacy(struct rtw_dev *rtwdev, const u8 *data, u32 size)
 		write_firmware_page(rtwdev, page, data, last_page_size);
 
 	if (!check_hw_ready(rtwdev, REG_MCUFW_CTRL, BIT_FWDL_CHK_RPT, 1)) {
-		rtw_err(rtwdev, "failed to check download fimrware report\n");
+		rtw_err(rtwdev, "failed to check download firmware report\n");
 		return -EINVAL;
 	}
 
@@ -857,7 +857,7 @@ static int download_firmware_validate_legacy(struct rtw_dev *rtwdev)
 		msleep(20);
 	}
 
-	rtw_err(rtwdev, "failed to validate fimrware\n");
+	rtw_err(rtwdev, "failed to validate firmware\n");
 	return -EINVAL;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 774965f22dc740ff470d8f26b9a44c5c92a7715b Mon Sep 17 00:00:00 2001
From: Wright Feng <wright.feng@cypress.com>
Date: Mon, 27 Apr 2020 01:59:59 -0500
Subject: brcmfmac: keep apsta enabled when AP starts with MCHAN feature

When starting station mode on wlan0 and AP mode on wlan1, the apsta will
be disabled and cause data stall on wlan0(station). The apsta feature
with MCHAN(Multi-Channel Concurrent) or RSDB(Real Simultaneous
Dual-Band) can make STA+AP work on two bands concurrently.
Because of that, we keep apsta enabled if firmware supports MCHAN or
RSDB features

Signed-off-by: Wright Feng <wright.feng@cypress.com>
Signed-off-by: Chi-Hsien Lin <chi-hsien.lin@cypress.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1587970803-77700-2-git-send-email-chi-hsien.lin@cypress.com
---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
index 2ba165330038..4bef5f0a7539 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
@@ -4727,7 +4727,8 @@ brcmf_cfg80211_start_ap(struct wiphy *wiphy, struct net_device *ndev,
 
 		if ((dev_role == NL80211_IFTYPE_AP) &&
 		    ((ifp->ifidx == 0) ||
-		     !brcmf_feat_is_enabled(ifp, BRCMF_FEAT_RSDB))) {
+		     (!brcmf_feat_is_enabled(ifp, BRCMF_FEAT_RSDB) &&
+		      !brcmf_feat_is_enabled(ifp, BRCMF_FEAT_MCHAN)))) {
 			err = brcmf_fil_cmd_int_set(ifp, BRCMF_C_DOWN, 1);
 			if (err < 0) {
 				bphy_err(drvr, "BRCMF_C_DOWN error %d\n",
-- 
cgit v1.2.3-59-g8ed1b


From 19f557a9b8d1e4bf3cb96fe4495f09833be53e92 Mon Sep 17 00:00:00 2001
From: Wright Feng <wright.feng@cypress.com>
Date: Mon, 27 Apr 2020 02:00:00 -0500
Subject: brcmfmac: remove arp_hostip_clear from brcmf_netdev_stop

The firmware does not respond ARP request and causes ping failure with
following steps:

1. Bring up interface
   ifconfig wlan0 up or start wpa_supplicant
2. Set the IP address
   ifconfig wlan0 192.168.100.10
3. Bring down interface or
   ifconfig wlan0 down or kill wpa_supplicant
4. Bring up interface again and set the same IP address
5. Connect to AP(192.168.100.1) and ping to AP will be failed.

FMAC clears arp_hostip when bringing down the interface, but not set it
back if setting the same IP address. We are able to see the IP address
in interface info(inconfig wlan0) but the ping still cannot work because
the firmware ARP offload does not respond the ARP request.
Because of that, we remove "arp_hostip_clear" from function
"brcmf_netdev_stop"

Signed-off-by: Wright Feng <wright.feng@cypress.com>
Signed-off-by: Chi-Hsien Lin <chi-hsien.lin@cypress.com>
Reviewed-by: Franky Lin <franky.lin@broadcom.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1587970803-77700-3-git-send-email-chi-hsien.lin@cypress.com
---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
index 23627c953a5e..10584ee9cd52 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/core.c
@@ -579,9 +579,6 @@ static int brcmf_netdev_stop(struct net_device *ndev)
 
 	brcmf_cfg80211_down(ndev);
 
-	if (ifp->drvr->bus_if->state == BRCMF_BUS_UP)
-		brcmf_fil_iovar_data_set(ifp, "arp_hostip_clear", NULL, 0);
-
 	brcmf_net_setcarrier(ifp, false);
 
 	return 0;
-- 
cgit v1.2.3-59-g8ed1b


From d524d5ce36555bec8b40d4b65f15acd922ac965d Mon Sep 17 00:00:00 2001
From: Madhan Mohan R <MadhanMohan.R@cypress.com>
Date: Mon, 27 Apr 2020 02:00:01 -0500
Subject: brcmfmac: p2p cert 6.1.9-support GOUT handling p2p presence request

Send p2p presence response from the p2p interface address instead
of the p2p device address. This is needed for p2p cert 6.1.9 to pass.

Signed-off-by: Madhan Mohan R <MadhanMohan.R@cypress.com>
Signed-off-by: Chi-Hsien Lin <chi-hsien.lin@cypress.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1587970803-77700-4-git-send-email-chi-hsien.lin@cypress.com
---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
index 1f5deea5a288..c68edb198819 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
@@ -1491,6 +1491,7 @@ static s32 brcmf_p2p_tx_action_frame(struct brcmf_p2p_info *p2p,
 {
 	struct brcmf_pub *drvr = p2p->cfg->pub;
 	struct brcmf_cfg80211_vif *vif;
+	struct brcmf_p2p_action_frame *p2p_af;
 	s32 err = 0;
 	s32 timeout = 0;
 
@@ -1500,7 +1501,13 @@ static s32 brcmf_p2p_tx_action_frame(struct brcmf_p2p_info *p2p,
 	clear_bit(BRCMF_P2P_STATUS_ACTION_TX_COMPLETED, &p2p->status);
 	clear_bit(BRCMF_P2P_STATUS_ACTION_TX_NOACK, &p2p->status);
 
-	vif = p2p->bss_idx[P2PAPI_BSSCFG_DEVICE].vif;
+	/* check if it is a p2p_presence response */
+	p2p_af = (struct brcmf_p2p_action_frame *)af_params->action_frame.data;
+	if (p2p_af->subtype == P2P_AF_PRESENCE_RSP)
+		vif = p2p->bss_idx[P2PAPI_BSSCFG_CONNECTION].vif;
+	else
+		vif = p2p->bss_idx[P2PAPI_BSSCFG_DEVICE].vif;
+
 	err = brcmf_fil_bsscfg_data_set(vif->ifp, "actframe", af_params,
 					sizeof(*af_params));
 	if (err) {
-- 
cgit v1.2.3-59-g8ed1b


From 053ac9e1438a1415a3b6b11d6e504c7a74ebd2ba Mon Sep 17 00:00:00 2001
From: Chi-Hsien Lin <chi-hsien.lin@cypress.com>
Date: Mon, 27 Apr 2020 02:00:02 -0500
Subject: brcmfmac: only generate random p2p address when needed

P2p spec mentioned that the p2p device address should be the globally
administered address with locally administered bit set. Therefore,
follow this guideline by default.

When the primary interface is set to a locally administered address, the
locally administered bit cannot be set again. Generate a random locally
administered address for this case.

Reviewed-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Signed-off-by: Chi-Hsien Lin <chi-hsien.lin@cypress.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1587970803-77700-5-git-send-email-chi-hsien.lin@cypress.com
---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
index c68edb198819..f8ece9f381a5 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
@@ -457,10 +457,21 @@ static int brcmf_p2p_set_firmware(struct brcmf_if *ifp, u8 *p2p_mac)
  */
 static void brcmf_p2p_generate_bss_mac(struct brcmf_p2p_info *p2p, u8 *dev_addr)
 {
+	struct brcmf_if *pri_ifp = p2p->bss_idx[P2PAPI_BSSCFG_PRIMARY].vif->ifp;
 	bool random_addr = false;
+	bool local_admin = false;
 
-	if (!dev_addr || is_zero_ether_addr(dev_addr))
-		random_addr = true;
+	if (!dev_addr || is_zero_ether_addr(dev_addr)) {
+		/* If the primary interface address is already locally
+		 * administered, create a new random address.
+		 */
+		if (pri_ifp->mac_addr[0] & 0x02) {
+			random_addr = true;
+		} else {
+			dev_addr = pri_ifp->mac_addr;
+			local_admin = true;
+		}
+	}
 
 	/* Generate the P2P Device Address obtaining a random ethernet
 	 * address with the locally administered bit set.
@@ -470,6 +481,9 @@ static void brcmf_p2p_generate_bss_mac(struct brcmf_p2p_info *p2p, u8 *dev_addr)
 	else
 		memcpy(p2p->dev_addr, dev_addr, ETH_ALEN);
 
+	if (local_admin)
+		p2p->dev_addr[0] |= 0x02;
+
 	/* Generate the P2P Interface Address.  If the discovery and connection
 	 * BSSCFGs need to simultaneously co-exist, then this address must be
 	 * different from the P2P Device Address, but also locally administered.
-- 
cgit v1.2.3-59-g8ed1b


From 2719afcae759e476bf1a7b19aefaec6433bb52ac Mon Sep 17 00:00:00 2001
From: Ryohei Kondo <ryohei.kondo@cypress.com>
Date: Mon, 27 Apr 2020 02:00:03 -0500
Subject: brcmfmac: add vendor ie for association responses

Miracast Certification clause 6.1.2 may fail if there is no WFD IE in
p2p assoc response. This change allows WFD IE to be added to p2p assoc
response.

Related WFA certification:
6.1.2 P-SnUT operating as a Group Owner accepts a WFD Session with a
Reference Source

Reviewed-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Signed-off-by: Ryohei Kondo <ryohei.kondo@cypress.com>
Signed-off-by: Chi-Hsien Lin <chi-hsien.lin@cypress.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1587970803-77700-6-git-send-email-chi-hsien.lin@cypress.com
---
 .../net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c    | 14 ++++++++++++++
 .../net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.h    |  4 ++++
 2 files changed, 18 insertions(+)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
index 4bef5f0a7539..f2f84af923a9 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
@@ -4449,6 +4449,11 @@ s32 brcmf_vif_set_mgmt_ie(struct brcmf_cfg80211_vif *vif, s32 pktflag,
 		mgmt_ie_len = &saved_ie->assoc_req_ie_len;
 		mgmt_ie_buf_len = sizeof(saved_ie->assoc_req_ie);
 		break;
+	case BRCMF_VNDR_IE_ASSOCRSP_FLAG:
+		mgmt_ie_buf = saved_ie->assoc_res_ie;
+		mgmt_ie_len = &saved_ie->assoc_res_ie_len;
+		mgmt_ie_buf_len = sizeof(saved_ie->assoc_res_ie);
+		break;
 	default:
 		err = -EPERM;
 		bphy_err(drvr, "not suitable type\n");
@@ -4595,6 +4600,15 @@ brcmf_config_ap_mgmt_ie(struct brcmf_cfg80211_vif *vif,
 	else
 		brcmf_dbg(TRACE, "Applied Vndr IEs for Probe Resp\n");
 
+	/* Set Assoc Response IEs to FW */
+	err = brcmf_vif_set_mgmt_ie(vif, BRCMF_VNDR_IE_ASSOCRSP_FLAG,
+				    beacon->assocresp_ies,
+				    beacon->assocresp_ies_len);
+	if (err)
+		brcmf_err("Set Assoc Resp IE Failed\n");
+	else
+		brcmf_dbg(TRACE, "Applied Vndr IEs for Assoc Resp\n");
+
 	return err;
 }
 
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.h
index 6ce48f6275a4..3ca8c07d6370 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.h
@@ -153,19 +153,23 @@ enum brcmf_vif_status {
  * @probe_req_ie: IE info for probe request.
  * @probe_res_ie: IE info for probe response.
  * @beacon_ie: IE info for beacon frame.
+ * @assoc_res_ie: IE info for association response frame.
  * @probe_req_ie_len: IE info length for probe request.
  * @probe_res_ie_len: IE info length for probe response.
  * @beacon_ie_len: IE info length for beacon frame.
+ * @assoc_res_ie_len: IE info length for association response frame.
  */
 struct vif_saved_ie {
 	u8  probe_req_ie[IE_MAX_LEN];
 	u8  probe_res_ie[IE_MAX_LEN];
 	u8  beacon_ie[IE_MAX_LEN];
 	u8  assoc_req_ie[IE_MAX_LEN];
+	u8  assoc_res_ie[IE_MAX_LEN];
 	u32 probe_req_ie_len;
 	u32 probe_res_ie_len;
 	u32 beacon_ie_len;
 	u32 assoc_req_ie_len;
+	u32 assoc_res_ie_len;
 };
 
 /**
-- 
cgit v1.2.3-59-g8ed1b


From ff2af09f4515422d6b464faf36b771aeb2819e46 Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Sun, 26 Apr 2020 17:40:53 +0800
Subject: brcmfmac: remove comparison to bool in brcmf_fws_attach()

Fix the following coccicheck warning:

drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c:2359:6-40:
WARNING: Comparison to bool

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Reviewed-by: Chi-hsien Lin <chi-hsien.lin@cypress.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200426094053.23132-1-yanaijie@huawei.com
---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c
index 8cc52935fd41..2b7837887c0b 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c
@@ -2356,7 +2356,7 @@ struct brcmf_fws_info *brcmf_fws_attach(struct brcmf_pub *drvr)
 	fws->drvr = drvr;
 	fws->fcmode = drvr->settings->fcmode;
 
-	if ((drvr->bus_if->always_use_fws_queue == false) &&
+	if (!drvr->bus_if->always_use_fws_queue &&
 	    (fws->fcmode == BRCMF_FWS_FCMODE_NONE)) {
 		fws->avoid_queueing = true;
 		brcmf_dbg(INFO, "FWS queueing will be avoided\n");
-- 
cgit v1.2.3-59-g8ed1b


From ea1b3bc6d5ad95fef32811bec7df37f51809f4e1 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Wed, 29 Apr 2020 12:15:26 +0200
Subject: brcmfmac: no need to check return value of debugfs_create functions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When calling debugfs functions, there is no need to ever check the
return value.  The function can work or not, but the code logic should
never do something different based on this.

In doing this, make brcmf_debugfs_add_entry() return void as no one was
even paying attention to the return value.

Cc: Arend van Spriel <arend.vanspriel@broadcom.com>
Cc: Franky Lin <franky.lin@broadcom.com>
Cc: Hante Meuleman <hante.meuleman@broadcom.com>
Cc: Chi-Hsien Lin <chi-hsien.lin@cypress.com>
Cc: Wright Feng <wright.feng@cypress.com>
Cc: Kalle Valo <kvalo@codeaurora.org>
Cc: "David S. Miller" <davem@davemloft.net>
Cc: Pieter-Paul Giesberts <pieter-paul.giesberts@broadcom.com>
Cc: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Cc: "Rafał Miłecki" <rafal@milecki.pl>
Cc: linux-wireless@vger.kernel.org
Cc: brcm80211-dev-list.pdl@broadcom.com
Cc: brcm80211-dev-list@cypress.com
Cc: netdev@vger.kernel.org
Cc: linux-kernel@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Acked-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200429101526.GA2094124@kroah.com
---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/debug.c |  9 +++------
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/debug.h | 12 +++++-------
 2 files changed, 8 insertions(+), 13 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/debug.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/debug.c
index 120515fe8250..eecf8a38d94a 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/debug.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/debug.c
@@ -47,13 +47,10 @@ struct dentry *brcmf_debugfs_get_devdir(struct brcmf_pub *drvr)
 	return drvr->wiphy->debugfsdir;
 }
 
-int brcmf_debugfs_add_entry(struct brcmf_pub *drvr, const char *fn,
+void brcmf_debugfs_add_entry(struct brcmf_pub *drvr, const char *fn,
 			    int (*read_fn)(struct seq_file *seq, void *data))
 {
-	struct dentry *e;
-
 	WARN(!drvr->wiphy->debugfsdir, "wiphy not (yet) registered\n");
-	e = debugfs_create_devm_seqfile(drvr->bus_if->dev, fn,
-					drvr->wiphy->debugfsdir, read_fn);
-	return PTR_ERR_OR_ZERO(e);
+	debugfs_create_devm_seqfile(drvr->bus_if->dev, fn,
+				    drvr->wiphy->debugfsdir, read_fn);
 }
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/debug.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/debug.h
index 9b221b509ade..4146faeed344 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/debug.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/debug.h
@@ -116,8 +116,8 @@ struct brcmf_bus;
 struct brcmf_pub;
 #ifdef DEBUG
 struct dentry *brcmf_debugfs_get_devdir(struct brcmf_pub *drvr);
-int brcmf_debugfs_add_entry(struct brcmf_pub *drvr, const char *fn,
-			    int (*read_fn)(struct seq_file *seq, void *data));
+void brcmf_debugfs_add_entry(struct brcmf_pub *drvr, const char *fn,
+			     int (*read_fn)(struct seq_file *seq, void *data));
 int brcmf_debug_create_memdump(struct brcmf_bus *bus, const void *data,
 			       size_t len);
 #else
@@ -126,11 +126,9 @@ static inline struct dentry *brcmf_debugfs_get_devdir(struct brcmf_pub *drvr)
 	return ERR_PTR(-ENOENT);
 }
 static inline
-int brcmf_debugfs_add_entry(struct brcmf_pub *drvr, const char *fn,
-			    int (*read_fn)(struct seq_file *seq, void *data))
-{
-	return 0;
-}
+void brcmf_debugfs_add_entry(struct brcmf_pub *drvr, const char *fn,
+			     int (*read_fn)(struct seq_file *seq, void *data))
+{ }
 static inline
 int brcmf_debug_create_memdump(struct brcmf_bus *bus, const void *data,
 			       size_t len)
-- 
cgit v1.2.3-59-g8ed1b


From cf48db69bdfad2930b95fd51d64444e5a7b469ae Mon Sep 17 00:00:00 2001
From: Luke Nelson <lukenels@cs.washington.edu>
Date: Thu, 30 Apr 2020 19:02:09 -0700
Subject: bpf, arm: Optimize ALU64 ARSH X using orrpl conditional instruction

This patch optimizes the code generated by emit_a32_arsh_r64, which
handles the BPF_ALU64 BPF_ARSH BPF_X instruction.

The original code uses a conditional B followed by an unconditional ORR.
The optimization saves one instruction by removing the B instruction
and using a conditional ORR (with an inverted condition).

Example of the code generated for BPF_ALU64_REG(BPF_ARSH, BPF_REG_0,
BPF_REG_1), before optimization:

  34:  rsb    ip, r2, #32
  38:  subs   r9, r2, #32
  3c:  lsr    lr, r0, r2
  40:  orr    lr, lr, r1, lsl ip
  44:  bmi    0x4c
  48:  orr    lr, lr, r1, asr r9
  4c:  asr    ip, r1, r2
  50:  mov    r0, lr
  54:  mov    r1, ip

and after optimization:

  34:  rsb    ip, r2, #32
  38:  subs   r9, r2, #32
  3c:  lsr    lr, r0, r2
  40:  orr    lr, lr, r1, lsl ip
  44:  orrpl  lr, lr, r1, asr r9
  48:  asr    ip, r1, r2
  4c:  mov    r0, lr
  50:  mov    r1, ip

Tested on QEMU using lib/test_bpf and test_verifier.

Co-developed-by: Xi Wang <xi.wang@gmail.com>
Signed-off-by: Xi Wang <xi.wang@gmail.com>
Signed-off-by: Luke Nelson <luke.r.nels@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200501020210.32294-2-luke.r.nels@gmail.com
---
 arch/arm/net/bpf_jit_32.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index bf85d6db4931..48b89211ee5c 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -860,8 +860,8 @@ static inline void emit_a32_arsh_r64(const s8 dst[], const s8 src[],
 	emit(ARM_SUBS_I(tmp2[0], rt, 32), ctx);
 	emit(ARM_MOV_SR(ARM_LR, rd[1], SRTYPE_LSR, rt), ctx);
 	emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_ASL, ARM_IP), ctx);
-	_emit(ARM_COND_MI, ARM_B(0), ctx);
-	emit(ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_ASR, tmp2[0]), ctx);
+	_emit(ARM_COND_PL,
+	      ARM_ORR_SR(ARM_LR, ARM_LR, rd[0], SRTYPE_ASR, tmp2[0]), ctx);
 	emit(ARM_MOV_SR(ARM_IP, rd[0], SRTYPE_ASR, rt), ctx);
 
 	arm_bpf_put_reg32(dst_lo, ARM_LR, ctx);
-- 
cgit v1.2.3-59-g8ed1b


From c648c9c7429e979ca081359f39b6902aed92d490 Mon Sep 17 00:00:00 2001
From: Luke Nelson <lukenels@cs.washington.edu>
Date: Thu, 30 Apr 2020 19:02:10 -0700
Subject: bpf, arm: Optimize ALU ARSH K using asr immediate instruction

This patch adds an optimization that uses the asr immediate instruction
for BPF_ALU BPF_ARSH BPF_K, rather than loading the immediate to
a temporary register. This is similar to existing code for handling
BPF_ALU BPF_{LSH,RSH} BPF_K. This optimization saves two instructions
and is more consistent with LSH and RSH.

Example of the code generated for BPF_ALU32_IMM(BPF_ARSH, BPF_REG_0, 5)
before the optimization:

  2c:  mov    r8, #5
  30:  mov    r9, #0
  34:  asr    r0, r0, r8

and after optimization:

  2c:  asr    r0, r0, #5

Tested on QEMU using lib/test_bpf and test_verifier.

Co-developed-by: Xi Wang <xi.wang@gmail.com>
Signed-off-by: Xi Wang <xi.wang@gmail.com>
Signed-off-by: Luke Nelson <luke.r.nels@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200501020210.32294-3-luke.r.nels@gmail.com
---
 arch/arm/net/bpf_jit_32.c | 10 +++++++---
 arch/arm/net/bpf_jit_32.h |  3 +++
 2 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/arch/arm/net/bpf_jit_32.c b/arch/arm/net/bpf_jit_32.c
index 48b89211ee5c..0207b6ea6e8a 100644
--- a/arch/arm/net/bpf_jit_32.c
+++ b/arch/arm/net/bpf_jit_32.c
@@ -795,6 +795,9 @@ static inline void emit_a32_alu_i(const s8 dst, const u32 val,
 	case BPF_RSH:
 		emit(ARM_LSR_I(rd, rd, val), ctx);
 		break;
+	case BPF_ARSH:
+		emit(ARM_ASR_I(rd, rd, val), ctx);
+		break;
 	case BPF_NEG:
 		emit(ARM_RSB_I(rd, rd, val), ctx);
 		break;
@@ -1408,7 +1411,6 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
 	case BPF_ALU | BPF_MUL | BPF_X:
 	case BPF_ALU | BPF_LSH | BPF_X:
 	case BPF_ALU | BPF_RSH | BPF_X:
-	case BPF_ALU | BPF_ARSH | BPF_K:
 	case BPF_ALU | BPF_ARSH | BPF_X:
 	case BPF_ALU64 | BPF_ADD | BPF_K:
 	case BPF_ALU64 | BPF_ADD | BPF_X:
@@ -1465,10 +1467,12 @@ static int build_insn(const struct bpf_insn *insn, struct jit_ctx *ctx)
 	case BPF_ALU64 | BPF_MOD | BPF_K:
 	case BPF_ALU64 | BPF_MOD | BPF_X:
 		goto notyet;
-	/* dst = dst >> imm */
 	/* dst = dst << imm */
-	case BPF_ALU | BPF_RSH | BPF_K:
+	/* dst = dst >> imm */
+	/* dst = dst >> imm (signed) */
 	case BPF_ALU | BPF_LSH | BPF_K:
+	case BPF_ALU | BPF_RSH | BPF_K:
+	case BPF_ALU | BPF_ARSH | BPF_K:
 		if (unlikely(imm > 31))
 			return -EINVAL;
 		if (imm)
diff --git a/arch/arm/net/bpf_jit_32.h b/arch/arm/net/bpf_jit_32.h
index fb67cbc589e0..e0b593a1498d 100644
--- a/arch/arm/net/bpf_jit_32.h
+++ b/arch/arm/net/bpf_jit_32.h
@@ -94,6 +94,9 @@
 #define ARM_INST_LSR_I		0x01a00020
 #define ARM_INST_LSR_R		0x01a00030
 
+#define ARM_INST_ASR_I		0x01a00040
+#define ARM_INST_ASR_R		0x01a00050
+
 #define ARM_INST_MOV_R		0x01a00000
 #define ARM_INST_MOVS_R		0x01b00000
 #define ARM_INST_MOV_I		0x03a00000
-- 
cgit v1.2.3-59-g8ed1b


From 460c2577aaf349f4e49eaf2b9ec3d8c52a619ef5 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Mon, 4 May 2020 04:50:27 -0400
Subject: bnxt_en: Update firmware spec. to 1.10.1.33.

Changes include additional statistics, ECN support, context memory
interface change for better TQM context memory sizing, firmware
health status definitions, etc.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h | 216 ++++++++++++++++++++++----
 1 file changed, 184 insertions(+), 32 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
index 7cf27dffadb5..7e9235c8d21e 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_hsi.h
@@ -2,7 +2,7 @@
  *
  * Copyright (c) 2014-2016 Broadcom Corporation
  * Copyright (c) 2014-2018 Broadcom Limited
- * Copyright (c) 2018-2019 Broadcom Inc.
+ * Copyright (c) 2018-2020 Broadcom Inc.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of the GNU General Public License as published by
@@ -207,6 +207,8 @@ struct cmd_nums {
 	#define HWRM_PORT_PHY_MDIO_READ                   0xb6UL
 	#define HWRM_PORT_PHY_MDIO_BUS_ACQUIRE            0xb7UL
 	#define HWRM_PORT_PHY_MDIO_BUS_RELEASE            0xb8UL
+	#define HWRM_PORT_QSTATS_EXT_PFC_WD               0xb9UL
+	#define HWRM_PORT_ECN_QSTATS                      0xbaUL
 	#define HWRM_FW_RESET                             0xc0UL
 	#define HWRM_FW_QSTATUS                           0xc1UL
 	#define HWRM_FW_HEALTH_CHECK                      0xc2UL
@@ -220,6 +222,8 @@ struct cmd_nums {
 	#define HWRM_FW_SET_STRUCTURED_DATA               0xcaUL
 	#define HWRM_FW_GET_STRUCTURED_DATA               0xcbUL
 	#define HWRM_FW_IPC_MAILBOX                       0xccUL
+	#define HWRM_FW_ECN_CFG                           0xcdUL
+	#define HWRM_FW_ECN_QCFG                          0xceUL
 	#define HWRM_EXEC_FWD_RESP                        0xd0UL
 	#define HWRM_REJECT_FWD_RESP                      0xd1UL
 	#define HWRM_FWD_RESP                             0xd2UL
@@ -233,6 +237,7 @@ struct cmd_nums {
 	#define HWRM_TEMP_MONITOR_QUERY                   0xe0UL
 	#define HWRM_REG_POWER_QUERY                      0xe1UL
 	#define HWRM_CORE_FREQUENCY_QUERY                 0xe2UL
+	#define HWRM_REG_POWER_HISTOGRAM                  0xe3UL
 	#define HWRM_WOL_FILTER_ALLOC                     0xf0UL
 	#define HWRM_WOL_FILTER_FREE                      0xf1UL
 	#define HWRM_WOL_FILTER_QCFG                      0xf2UL
@@ -331,6 +336,7 @@ struct cmd_nums {
 	#define HWRM_FUNC_VF_BW_CFG                       0x195UL
 	#define HWRM_FUNC_VF_BW_QCFG                      0x196UL
 	#define HWRM_FUNC_HOST_PF_IDS_QUERY               0x197UL
+	#define HWRM_FUNC_QSTATS_EXT                      0x198UL
 	#define HWRM_SELFTEST_QLIST                       0x200UL
 	#define HWRM_SELFTEST_EXEC                        0x201UL
 	#define HWRM_SELFTEST_IRQ                         0x202UL
@@ -341,6 +347,31 @@ struct cmd_nums {
 	#define HWRM_MFG_OTP_CFG                          0x207UL
 	#define HWRM_MFG_OTP_QCFG                         0x208UL
 	#define HWRM_MFG_HDMA_TEST                        0x209UL
+	#define HWRM_MFG_FRU_EEPROM_WRITE                 0x20aUL
+	#define HWRM_MFG_FRU_EEPROM_READ                  0x20bUL
+	#define HWRM_TF                                   0x2bcUL
+	#define HWRM_TF_VERSION_GET                       0x2bdUL
+	#define HWRM_TF_SESSION_OPEN                      0x2c6UL
+	#define HWRM_TF_SESSION_ATTACH                    0x2c7UL
+	#define HWRM_TF_SESSION_CLOSE                     0x2c8UL
+	#define HWRM_TF_SESSION_QCFG                      0x2c9UL
+	#define HWRM_TF_SESSION_RESC_QCAPS                0x2caUL
+	#define HWRM_TF_SESSION_RESC_ALLOC                0x2cbUL
+	#define HWRM_TF_SESSION_RESC_FREE                 0x2ccUL
+	#define HWRM_TF_SESSION_RESC_FLUSH                0x2cdUL
+	#define HWRM_TF_TBL_TYPE_GET                      0x2d0UL
+	#define HWRM_TF_TBL_TYPE_SET                      0x2d1UL
+	#define HWRM_TF_CTXT_MEM_RGTR                     0x2daUL
+	#define HWRM_TF_CTXT_MEM_UNRGTR                   0x2dbUL
+	#define HWRM_TF_EXT_EM_QCAPS                      0x2dcUL
+	#define HWRM_TF_EXT_EM_OP                         0x2ddUL
+	#define HWRM_TF_EXT_EM_CFG                        0x2deUL
+	#define HWRM_TF_EXT_EM_QCFG                       0x2dfUL
+	#define HWRM_TF_TCAM_SET                          0x2eeUL
+	#define HWRM_TF_TCAM_GET                          0x2efUL
+	#define HWRM_TF_TCAM_MOVE                         0x2f0UL
+	#define HWRM_TF_TCAM_FREE                         0x2f1UL
+	#define HWRM_SV                                   0x400UL
 	#define HWRM_DBG_READ_DIRECT                      0xff10UL
 	#define HWRM_DBG_READ_INDIRECT                    0xff11UL
 	#define HWRM_DBG_WRITE_DIRECT                     0xff12UL
@@ -356,6 +387,10 @@ struct cmd_nums {
 	#define HWRM_DBG_RING_INFO_GET                    0xff1cUL
 	#define HWRM_DBG_CRASHDUMP_HEADER                 0xff1dUL
 	#define HWRM_DBG_CRASHDUMP_ERASE                  0xff1eUL
+	#define HWRM_DBG_DRV_TRACE                        0xff1fUL
+	#define HWRM_DBG_QCAPS                            0xff20UL
+	#define HWRM_DBG_QCFG                             0xff21UL
+	#define HWRM_DBG_CRASHDUMP_MEDIUM_CFG             0xff22UL
 	#define HWRM_NVM_FACTORY_DEFAULTS                 0xffeeUL
 	#define HWRM_NVM_VALIDATE_OPTION                  0xffefUL
 	#define HWRM_NVM_FLUSH                            0xfff0UL
@@ -429,8 +464,8 @@ struct hwrm_err_output {
 #define HWRM_VERSION_MAJOR 1
 #define HWRM_VERSION_MINOR 10
 #define HWRM_VERSION_UPDATE 1
-#define HWRM_VERSION_RSVD 12
-#define HWRM_VERSION_STR "1.10.1.12"
+#define HWRM_VERSION_RSVD 33
+#define HWRM_VERSION_STR "1.10.1.33"
 
 /* hwrm_ver_get_input (size:192b/24B) */
 struct hwrm_ver_get_input {
@@ -482,6 +517,7 @@ struct hwrm_ver_get_output {
 	#define VER_GET_RESP_DEV_CAPS_CFG_CFA_EEM_SUPPORTED                        0x800UL
 	#define VER_GET_RESP_DEV_CAPS_CFG_CFA_ADV_FLOW_MGNT_SUPPORTED              0x1000UL
 	#define VER_GET_RESP_DEV_CAPS_CFG_CFA_TFLIB_SUPPORTED                      0x2000UL
+	#define VER_GET_RESP_DEV_CAPS_CFG_CFA_TRUFLOW_SUPPORTED                    0x4000UL
 	u8	roce_fw_maj_8b;
 	u8	roce_fw_min_8b;
 	u8	roce_fw_bld_8b;
@@ -647,6 +683,7 @@ struct hwrm_async_event_cmpl {
 	#define ASYNC_EVENT_CMPL_EVENT_ID_TFLIB_LINK_STATUS_CHANGE   0x3eUL
 	#define ASYNC_EVENT_CMPL_EVENT_ID_QUIESCE_DONE               0x3fUL
 	#define ASYNC_EVENT_CMPL_EVENT_ID_DEFERRED_RESPONSE          0x40UL
+	#define ASYNC_EVENT_CMPL_EVENT_ID_PFC_WATCHDOG_CFG_CHANGE    0x41UL
 	#define ASYNC_EVENT_CMPL_EVENT_ID_FW_TRACE_MSG               0xfeUL
 	#define ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR                 0xffUL
 	#define ASYNC_EVENT_CMPL_EVENT_ID_LAST                      ASYNC_EVENT_CMPL_EVENT_ID_HWRM_ERROR
@@ -1089,7 +1126,7 @@ struct hwrm_func_qcaps_input {
 	u8	unused_0[6];
 };
 
-/* hwrm_func_qcaps_output (size:640b/80B) */
+/* hwrm_func_qcaps_output (size:704b/88B) */
 struct hwrm_func_qcaps_output {
 	__le16	error_code;
 	__le16	req_type;
@@ -1126,6 +1163,10 @@ struct hwrm_func_qcaps_output {
 	#define FUNC_QCAPS_RESP_FLAGS_ERR_RECOVER_RELOAD                    0x2000000UL
 	#define FUNC_QCAPS_RESP_FLAGS_NOTIFY_VF_DEF_VNIC_CHNG_SUPPORTED     0x4000000UL
 	#define FUNC_QCAPS_RESP_FLAGS_VLAN_ACCELERATION_TX_DISABLED         0x8000000UL
+	#define FUNC_QCAPS_RESP_FLAGS_COREDUMP_CMD_SUPPORTED                0x10000000UL
+	#define FUNC_QCAPS_RESP_FLAGS_CRASHDUMP_CMD_SUPPORTED               0x20000000UL
+	#define FUNC_QCAPS_RESP_FLAGS_PFC_WD_STATS_SUPPORTED                0x40000000UL
+	#define FUNC_QCAPS_RESP_FLAGS_DBG_QCAPS_CMD_SUPPORTED               0x80000000UL
 	u8	mac_address[6];
 	__le16	max_rsscos_ctx;
 	__le16	max_cmpl_rings;
@@ -1146,7 +1187,12 @@ struct hwrm_func_qcaps_output {
 	__le32	max_flow_id;
 	__le32	max_hw_ring_grps;
 	__le16	max_sp_tx_rings;
-	u8	unused_0;
+	u8	unused_0[2];
+	__le32	flags_ext;
+	#define FUNC_QCAPS_RESP_FLAGS_EXT_ECN_MARK_SUPPORTED         0x1UL
+	#define FUNC_QCAPS_RESP_FLAGS_EXT_ECN_STATS_SUPPORTED        0x2UL
+	#define FUNC_QCAPS_RESP_FLAGS_EXT_EXT_HW_STATS_SUPPORTED     0x4UL
+	u8	unused_1[3];
 	u8	valid;
 };
 
@@ -1161,7 +1207,7 @@ struct hwrm_func_qcfg_input {
 	u8	unused_0[6];
 };
 
-/* hwrm_func_qcfg_output (size:704b/88B) */
+/* hwrm_func_qcfg_output (size:768b/96B) */
 struct hwrm_func_qcfg_output {
 	__le16	error_code;
 	__le16	req_type;
@@ -1267,7 +1313,11 @@ struct hwrm_func_qcfg_output {
 	u8	always_1;
 	__le32	reset_addr_poll;
 	__le16	legacy_l2_db_size_kb;
-	u8	unused_2[1];
+	__le16	svif_info;
+	#define FUNC_QCFG_RESP_SVIF_INFO_SVIF_MASK      0x7fffUL
+	#define FUNC_QCFG_RESP_SVIF_INFO_SVIF_SFT       0
+	#define FUNC_QCFG_RESP_SVIF_INFO_SVIF_VALID     0x8000UL
+	u8	unused_2[7];
 	u8	valid;
 };
 
@@ -1420,9 +1470,10 @@ struct hwrm_func_qstats_input {
 	__le64	resp_addr;
 	__le16	fid;
 	u8	flags;
-	#define FUNC_QSTATS_REQ_FLAGS_UNUSED    0x0UL
-	#define FUNC_QSTATS_REQ_FLAGS_ROCE_ONLY 0x1UL
-	#define FUNC_QSTATS_REQ_FLAGS_LAST     FUNC_QSTATS_REQ_FLAGS_ROCE_ONLY
+	#define FUNC_QSTATS_REQ_FLAGS_UNUSED       0x0UL
+	#define FUNC_QSTATS_REQ_FLAGS_ROCE_ONLY    0x1UL
+	#define FUNC_QSTATS_REQ_FLAGS_COUNTER_MASK 0x2UL
+	#define FUNC_QSTATS_REQ_FLAGS_LAST        FUNC_QSTATS_REQ_FLAGS_COUNTER_MASK
 	u8	unused_0[5];
 };
 
@@ -1456,6 +1507,53 @@ struct hwrm_func_qstats_output {
 	u8	valid;
 };
 
+/* hwrm_func_qstats_ext_input (size:192b/24B) */
+struct hwrm_func_qstats_ext_input {
+	__le16	req_type;
+	__le16	cmpl_ring;
+	__le16	seq_id;
+	__le16	target_id;
+	__le64	resp_addr;
+	__le16	fid;
+	u8	flags;
+	#define FUNC_QSTATS_EXT_REQ_FLAGS_UNUSED       0x0UL
+	#define FUNC_QSTATS_EXT_REQ_FLAGS_ROCE_ONLY    0x1UL
+	#define FUNC_QSTATS_EXT_REQ_FLAGS_COUNTER_MASK 0x2UL
+	#define FUNC_QSTATS_EXT_REQ_FLAGS_LAST        FUNC_QSTATS_EXT_REQ_FLAGS_COUNTER_MASK
+	u8	unused_0[5];
+};
+
+/* hwrm_func_qstats_ext_output (size:1472b/184B) */
+struct hwrm_func_qstats_ext_output {
+	__le16	error_code;
+	__le16	req_type;
+	__le16	seq_id;
+	__le16	resp_len;
+	__le64	rx_ucast_pkts;
+	__le64	rx_mcast_pkts;
+	__le64	rx_bcast_pkts;
+	__le64	rx_discard_pkts;
+	__le64	rx_drop_pkts;
+	__le64	rx_ucast_bytes;
+	__le64	rx_mcast_bytes;
+	__le64	rx_bcast_bytes;
+	__le64	tx_ucast_pkts;
+	__le64	tx_mcast_pkts;
+	__le64	tx_bcast_pkts;
+	__le64	tx_discard_pkts;
+	__le64	tx_drop_pkts;
+	__le64	tx_ucast_bytes;
+	__le64	tx_mcast_bytes;
+	__le64	tx_bcast_bytes;
+	__le64	rx_tpa_eligible_pkt;
+	__le64	rx_tpa_eligible_bytes;
+	__le64	rx_tpa_pkt;
+	__le64	rx_tpa_bytes;
+	__le64	rx_tpa_errors;
+	u8	unused_0[7];
+	u8	valid;
+};
+
 /* hwrm_func_clr_stats_input (size:192b/24B) */
 struct hwrm_func_clr_stats_input {
 	__le16	req_type;
@@ -1808,7 +1906,7 @@ struct hwrm_func_backing_store_qcaps_output {
 	u8	ctx_kind_initializer;
 	__le32	rsvd;
 	__le16	rsvd1;
-	u8	rsvd2;
+	u8	tqm_fp_rings_count;
 	u8	valid;
 };
 
@@ -2231,7 +2329,17 @@ struct hwrm_error_recovery_qcfg_output {
 	#define ERROR_RECOVERY_QCFG_RESP_RESET_REG_ADDR_SFT           2
 	__le32	reset_reg_val[16];
 	u8	delay_after_reset[16];
-	u8	unused_1[7];
+	__le32	err_recovery_cnt_reg;
+	#define ERROR_RECOVERY_QCFG_RESP_ERR_RECOVERY_CNT_REG_ADDR_SPACE_MASK    0x3UL
+	#define ERROR_RECOVERY_QCFG_RESP_ERR_RECOVERY_CNT_REG_ADDR_SPACE_SFT     0
+	#define ERROR_RECOVERY_QCFG_RESP_ERR_RECOVERY_CNT_REG_ADDR_SPACE_PCIE_CFG  0x0UL
+	#define ERROR_RECOVERY_QCFG_RESP_ERR_RECOVERY_CNT_REG_ADDR_SPACE_GRC       0x1UL
+	#define ERROR_RECOVERY_QCFG_RESP_ERR_RECOVERY_CNT_REG_ADDR_SPACE_BAR0      0x2UL
+	#define ERROR_RECOVERY_QCFG_RESP_ERR_RECOVERY_CNT_REG_ADDR_SPACE_BAR1      0x3UL
+	#define ERROR_RECOVERY_QCFG_RESP_ERR_RECOVERY_CNT_REG_ADDR_SPACE_LAST     ERROR_RECOVERY_QCFG_RESP_ERR_RECOVERY_CNT_REG_ADDR_SPACE_BAR1
+	#define ERROR_RECOVERY_QCFG_RESP_ERR_RECOVERY_CNT_REG_ADDR_MASK          0xfffffffcUL
+	#define ERROR_RECOVERY_QCFG_RESP_ERR_RECOVERY_CNT_REG_ADDR_SFT           2
+	u8	unused_1[3];
 	u8	valid;
 };
 
@@ -2934,7 +3042,11 @@ struct hwrm_port_qstats_input {
 	__le16	target_id;
 	__le64	resp_addr;
 	__le16	port_id;
-	u8	unused_0[6];
+	u8	flags;
+	#define PORT_QSTATS_REQ_FLAGS_UNUSED       0x0UL
+	#define PORT_QSTATS_REQ_FLAGS_COUNTER_MASK 0x1UL
+	#define PORT_QSTATS_REQ_FLAGS_LAST        PORT_QSTATS_REQ_FLAGS_COUNTER_MASK
+	u8	unused_0[5];
 	__le64	tx_stat_host_addr;
 	__le64	rx_stat_host_addr;
 };
@@ -3058,7 +3170,11 @@ struct hwrm_port_qstats_ext_input {
 	__le16	port_id;
 	__le16	tx_stat_size;
 	__le16	rx_stat_size;
-	u8	unused_0[2];
+	u8	flags;
+	#define PORT_QSTATS_EXT_REQ_FLAGS_UNUSED       0x0UL
+	#define PORT_QSTATS_EXT_REQ_FLAGS_COUNTER_MASK 0x1UL
+	#define PORT_QSTATS_EXT_REQ_FLAGS_LAST        PORT_QSTATS_EXT_REQ_FLAGS_COUNTER_MASK
+	u8	unused_0;
 	__le64	tx_stat_host_addr;
 	__le64	rx_stat_host_addr;
 };
@@ -3840,14 +3956,22 @@ struct hwrm_queue_pfcenable_qcfg_output {
 	__le16	seq_id;
 	__le16	resp_len;
 	__le32	flags;
-	#define QUEUE_PFCENABLE_QCFG_RESP_FLAGS_PRI0_PFC_ENABLED     0x1UL
-	#define QUEUE_PFCENABLE_QCFG_RESP_FLAGS_PRI1_PFC_ENABLED     0x2UL
-	#define QUEUE_PFCENABLE_QCFG_RESP_FLAGS_PRI2_PFC_ENABLED     0x4UL
-	#define QUEUE_PFCENABLE_QCFG_RESP_FLAGS_PRI3_PFC_ENABLED     0x8UL
-	#define QUEUE_PFCENABLE_QCFG_RESP_FLAGS_PRI4_PFC_ENABLED     0x10UL
-	#define QUEUE_PFCENABLE_QCFG_RESP_FLAGS_PRI5_PFC_ENABLED     0x20UL
-	#define QUEUE_PFCENABLE_QCFG_RESP_FLAGS_PRI6_PFC_ENABLED     0x40UL
-	#define QUEUE_PFCENABLE_QCFG_RESP_FLAGS_PRI7_PFC_ENABLED     0x80UL
+	#define QUEUE_PFCENABLE_QCFG_RESP_FLAGS_PRI0_PFC_ENABLED              0x1UL
+	#define QUEUE_PFCENABLE_QCFG_RESP_FLAGS_PRI1_PFC_ENABLED              0x2UL
+	#define QUEUE_PFCENABLE_QCFG_RESP_FLAGS_PRI2_PFC_ENABLED              0x4UL
+	#define QUEUE_PFCENABLE_QCFG_RESP_FLAGS_PRI3_PFC_ENABLED              0x8UL
+	#define QUEUE_PFCENABLE_QCFG_RESP_FLAGS_PRI4_PFC_ENABLED              0x10UL
+	#define QUEUE_PFCENABLE_QCFG_RESP_FLAGS_PRI5_PFC_ENABLED              0x20UL
+	#define QUEUE_PFCENABLE_QCFG_RESP_FLAGS_PRI6_PFC_ENABLED              0x40UL
+	#define QUEUE_PFCENABLE_QCFG_RESP_FLAGS_PRI7_PFC_ENABLED              0x80UL
+	#define QUEUE_PFCENABLE_QCFG_RESP_FLAGS_PRI0_PFC_WATCHDOG_ENABLED     0x100UL
+	#define QUEUE_PFCENABLE_QCFG_RESP_FLAGS_PRI1_PFC_WATCHDOG_ENABLED     0x200UL
+	#define QUEUE_PFCENABLE_QCFG_RESP_FLAGS_PRI2_PFC_WATCHDOG_ENABLED     0x400UL
+	#define QUEUE_PFCENABLE_QCFG_RESP_FLAGS_PRI3_PFC_WATCHDOG_ENABLED     0x800UL
+	#define QUEUE_PFCENABLE_QCFG_RESP_FLAGS_PRI4_PFC_WATCHDOG_ENABLED     0x1000UL
+	#define QUEUE_PFCENABLE_QCFG_RESP_FLAGS_PRI5_PFC_WATCHDOG_ENABLED     0x2000UL
+	#define QUEUE_PFCENABLE_QCFG_RESP_FLAGS_PRI6_PFC_WATCHDOG_ENABLED     0x4000UL
+	#define QUEUE_PFCENABLE_QCFG_RESP_FLAGS_PRI7_PFC_WATCHDOG_ENABLED     0x8000UL
 	u8	unused_0[3];
 	u8	valid;
 };
@@ -3860,14 +3984,22 @@ struct hwrm_queue_pfcenable_cfg_input {
 	__le16	target_id;
 	__le64	resp_addr;
 	__le32	flags;
-	#define QUEUE_PFCENABLE_CFG_REQ_FLAGS_PRI0_PFC_ENABLED     0x1UL
-	#define QUEUE_PFCENABLE_CFG_REQ_FLAGS_PRI1_PFC_ENABLED     0x2UL
-	#define QUEUE_PFCENABLE_CFG_REQ_FLAGS_PRI2_PFC_ENABLED     0x4UL
-	#define QUEUE_PFCENABLE_CFG_REQ_FLAGS_PRI3_PFC_ENABLED     0x8UL
-	#define QUEUE_PFCENABLE_CFG_REQ_FLAGS_PRI4_PFC_ENABLED     0x10UL
-	#define QUEUE_PFCENABLE_CFG_REQ_FLAGS_PRI5_PFC_ENABLED     0x20UL
-	#define QUEUE_PFCENABLE_CFG_REQ_FLAGS_PRI6_PFC_ENABLED     0x40UL
-	#define QUEUE_PFCENABLE_CFG_REQ_FLAGS_PRI7_PFC_ENABLED     0x80UL
+	#define QUEUE_PFCENABLE_CFG_REQ_FLAGS_PRI0_PFC_ENABLED              0x1UL
+	#define QUEUE_PFCENABLE_CFG_REQ_FLAGS_PRI1_PFC_ENABLED              0x2UL
+	#define QUEUE_PFCENABLE_CFG_REQ_FLAGS_PRI2_PFC_ENABLED              0x4UL
+	#define QUEUE_PFCENABLE_CFG_REQ_FLAGS_PRI3_PFC_ENABLED              0x8UL
+	#define QUEUE_PFCENABLE_CFG_REQ_FLAGS_PRI4_PFC_ENABLED              0x10UL
+	#define QUEUE_PFCENABLE_CFG_REQ_FLAGS_PRI5_PFC_ENABLED              0x20UL
+	#define QUEUE_PFCENABLE_CFG_REQ_FLAGS_PRI6_PFC_ENABLED              0x40UL
+	#define QUEUE_PFCENABLE_CFG_REQ_FLAGS_PRI7_PFC_ENABLED              0x80UL
+	#define QUEUE_PFCENABLE_CFG_REQ_FLAGS_PRI0_PFC_WATCHDOG_ENABLED     0x100UL
+	#define QUEUE_PFCENABLE_CFG_REQ_FLAGS_PRI1_PFC_WATCHDOG_ENABLED     0x200UL
+	#define QUEUE_PFCENABLE_CFG_REQ_FLAGS_PRI2_PFC_WATCHDOG_ENABLED     0x400UL
+	#define QUEUE_PFCENABLE_CFG_REQ_FLAGS_PRI3_PFC_WATCHDOG_ENABLED     0x800UL
+	#define QUEUE_PFCENABLE_CFG_REQ_FLAGS_PRI4_PFC_WATCHDOG_ENABLED     0x1000UL
+	#define QUEUE_PFCENABLE_CFG_REQ_FLAGS_PRI5_PFC_WATCHDOG_ENABLED     0x2000UL
+	#define QUEUE_PFCENABLE_CFG_REQ_FLAGS_PRI6_PFC_WATCHDOG_ENABLED     0x4000UL
+	#define QUEUE_PFCENABLE_CFG_REQ_FLAGS_PRI7_PFC_WATCHDOG_ENABLED     0x8000UL
 	__le16	port_id;
 	u8	unused_0[2];
 };
@@ -5287,7 +5419,11 @@ struct hwrm_ring_cmpl_ring_qaggint_params_input {
 	__le16	target_id;
 	__le64	resp_addr;
 	__le16	ring_id;
-	u8	unused_0[6];
+	__le16	flags;
+	#define RING_CMPL_RING_QAGGINT_PARAMS_REQ_FLAGS_UNUSED_0_MASK 0x3UL
+	#define RING_CMPL_RING_QAGGINT_PARAMS_REQ_FLAGS_UNUSED_0_SFT 0
+	#define RING_CMPL_RING_QAGGINT_PARAMS_REQ_FLAGS_IS_NQ        0x4UL
+	u8	unused_0[4];
 };
 
 /* hwrm_ring_cmpl_ring_qaggint_params_output (size:256b/32B) */
@@ -7618,7 +7754,9 @@ struct hwrm_nvm_modify_input {
 	__le64	resp_addr;
 	__le64	host_src_addr;
 	__le16	dir_idx;
-	u8	unused_0[2];
+	__le16	flags;
+	#define NVM_MODIFY_REQ_FLAGS_BATCH_MODE     0x1UL
+	#define NVM_MODIFY_REQ_FLAGS_BATCH_LAST     0x2UL
 	__le32	offset;
 	__le32	len;
 	u8	unused_1[4];
@@ -8027,4 +8165,18 @@ struct hwrm_selftest_irq_output {
 	u8	valid;
 };
 
+/* fw_status_reg (size:32b/4B) */
+struct fw_status_reg {
+	u32	fw_status;
+	#define FW_STATUS_REG_CODE_MASK              0xffffUL
+	#define FW_STATUS_REG_CODE_SFT               0
+	#define FW_STATUS_REG_CODE_READY               0x8000UL
+	#define FW_STATUS_REG_CODE_LAST               FW_STATUS_REG_CODE_READY
+	#define FW_STATUS_REG_IMAGE_DEGRADED         0x10000UL
+	#define FW_STATUS_REG_RECOVERABLE            0x20000UL
+	#define FW_STATUS_REG_CRASHDUMP_ONGOING      0x40000UL
+	#define FW_STATUS_REG_CRASHDUMP_COMPLETE     0x80000UL
+	#define FW_STATUS_REG_SHUTDOWN               0x100000UL
+};
+
 #endif /* _BNXT_HSI_H_ */
-- 
cgit v1.2.3-59-g8ed1b


From ac3158cb01084aa654222f1ad970b6c1af3cef98 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Mon, 4 May 2020 04:50:28 -0400
Subject: bnxt_en: Allocate TQM ring context memory according to fw
 specification.

Newer firmware spec. will specify the number of TQM rings to allocate
context memory for.  Use the firmware specified value and fall back
to the old value derived from bp->max_q if it is not available.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 30 +++++++++++++++++-------------
 drivers/net/ethernet/broadcom/bnxt/bnxt.h |  1 +
 2 files changed, 18 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index fead64f1ad90..32a208397b68 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -6434,23 +6434,13 @@ static int bnxt_hwrm_func_backing_store_qcaps(struct bnxt *bp)
 	if (!rc) {
 		struct bnxt_ctx_pg_info *ctx_pg;
 		struct bnxt_ctx_mem_info *ctx;
-		int i;
+		int i, tqm_rings;
 
 		ctx = kzalloc(sizeof(*ctx), GFP_KERNEL);
 		if (!ctx) {
 			rc = -ENOMEM;
 			goto ctx_err;
 		}
-		ctx_pg = kzalloc(sizeof(*ctx_pg) * (bp->max_q + 1), GFP_KERNEL);
-		if (!ctx_pg) {
-			kfree(ctx);
-			rc = -ENOMEM;
-			goto ctx_err;
-		}
-		for (i = 0; i < bp->max_q + 1; i++, ctx_pg++)
-			ctx->tqm_mem[i] = ctx_pg;
-
-		bp->ctx = ctx;
 		ctx->qp_max_entries = le32_to_cpu(resp->qp_max_entries);
 		ctx->qp_min_qp1_entries = le16_to_cpu(resp->qp_min_qp1_entries);
 		ctx->qp_max_l2_entries = le16_to_cpu(resp->qp_max_l2_entries);
@@ -6483,6 +6473,20 @@ static int bnxt_hwrm_func_backing_store_qcaps(struct bnxt *bp)
 		ctx->tim_entry_size = le16_to_cpu(resp->tim_entry_size);
 		ctx->tim_max_entries = le32_to_cpu(resp->tim_max_entries);
 		ctx->ctx_kind_initializer = resp->ctx_kind_initializer;
+		ctx->tqm_fp_rings_count = resp->tqm_fp_rings_count;
+		if (!ctx->tqm_fp_rings_count)
+			ctx->tqm_fp_rings_count = bp->max_q;
+
+		tqm_rings = ctx->tqm_fp_rings_count + 1;
+		ctx_pg = kcalloc(tqm_rings, sizeof(*ctx_pg), GFP_KERNEL);
+		if (!ctx_pg) {
+			kfree(ctx);
+			rc = -ENOMEM;
+			goto ctx_err;
+		}
+		for (i = 0; i < tqm_rings; i++, ctx_pg++)
+			ctx->tqm_mem[i] = ctx_pg;
+		bp->ctx = ctx;
 	} else {
 		rc = 0;
 	}
@@ -6735,7 +6739,7 @@ static void bnxt_free_ctx_mem(struct bnxt *bp)
 		return;
 
 	if (ctx->tqm_mem[0]) {
-		for (i = 0; i < bp->max_q + 1; i++)
+		for (i = 0; i < ctx->tqm_fp_rings_count + 1; i++)
 			bnxt_free_ctx_pg_tbls(bp, ctx->tqm_mem[i]);
 		kfree(ctx->tqm_mem[0]);
 		ctx->tqm_mem[0] = NULL;
@@ -6849,7 +6853,7 @@ skip_rdma:
 	entries = roundup(entries, ctx->tqm_entries_multiple);
 	entries = clamp_t(u32, entries, ctx->tqm_min_entries_per_ring,
 			  ctx->tqm_max_entries_per_ring);
-	for (i = 0; i < bp->max_q + 1; i++) {
+	for (i = 0; i < ctx->tqm_fp_rings_count + 1; i++) {
 		ctx_pg = ctx->tqm_mem[i];
 		ctx_pg->entries = entries;
 		mem_size = ctx->tqm_entry_size * entries;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index f2caa2756f5b..1dbc3aece7a6 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -1357,6 +1357,7 @@ struct bnxt_ctx_mem_info {
 	u16	mrav_num_entries_units;
 	u8	tqm_entries_multiple;
 	u8	ctx_kind_initializer;
+	u8	tqm_fp_rings_count;
 
 	u32	flags;
 	#define BNXT_CTX_FLAG_INITED	0x01
-- 
cgit v1.2.3-59-g8ed1b


From c7dd7ab4b204ac0142e0d05e71e05e71ae6cb270 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Mon, 4 May 2020 04:50:29 -0400
Subject: bnxt_en: Improve TQM ring context memory sizing formulas.

The current formulas to calculate the TQM slow path and fast path ring
context memory sizes are not quite correct.  TQM slow path entry is
array index 0 of ctx->tqm_mem[].  The other array entries are for fast
path.  Fix these sizes according to latest firmware spec. for 57500 and
newer chips.

Fixes: 3be8136ce14e ("bnxt_en: Initialize context memory to the value specified by firmware.")
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 32a208397b68..0cf41a167204 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -6760,6 +6760,7 @@ static int bnxt_alloc_ctx_mem(struct bnxt *bp)
 	struct bnxt_ctx_pg_info *ctx_pg;
 	struct bnxt_ctx_mem_info *ctx;
 	u32 mem_size, ena, entries;
+	u32 entries_sp, min;
 	u32 num_mr, num_ah;
 	u32 extra_srqs = 0;
 	u32 extra_qps = 0;
@@ -6849,14 +6850,17 @@ static int bnxt_alloc_ctx_mem(struct bnxt *bp)
 	ena |= FUNC_BACKING_STORE_CFG_REQ_ENABLES_TIM;
 
 skip_rdma:
-	entries = ctx->qp_max_l2_entries + extra_qps;
+	min = ctx->tqm_min_entries_per_ring;
+	entries_sp = ctx->vnic_max_vnic_entries + ctx->qp_max_l2_entries +
+		     2 * (extra_qps + ctx->qp_min_qp1_entries) + min;
+	entries_sp = roundup(entries_sp, ctx->tqm_entries_multiple);
+	entries = ctx->qp_max_l2_entries + extra_qps + ctx->qp_min_qp1_entries;
 	entries = roundup(entries, ctx->tqm_entries_multiple);
-	entries = clamp_t(u32, entries, ctx->tqm_min_entries_per_ring,
-			  ctx->tqm_max_entries_per_ring);
+	entries = clamp_t(u32, entries, min, ctx->tqm_max_entries_per_ring);
 	for (i = 0; i < ctx->tqm_fp_rings_count + 1; i++) {
 		ctx_pg = ctx->tqm_mem[i];
-		ctx_pg->entries = entries;
-		mem_size = ctx->tqm_entry_size * entries;
+		ctx_pg->entries = i ? entries : entries_sp;
+		mem_size = ctx->tqm_entry_size * ctx_pg->entries;
 		rc = bnxt_alloc_ctx_pg_tbls(bp, ctx_pg, mem_size, 1, false);
 		if (rc)
 			return rc;
-- 
cgit v1.2.3-59-g8ed1b


From d0b82c5461c9b9bfcb572fe0b50d8e2662e281f1 Mon Sep 17 00:00:00 2001
From: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Date: Mon, 4 May 2020 04:50:30 -0400
Subject: bnxt_en: Do not include ETH_FCS_LEN in the max packet length sent to
 fw.

The firmware does not expect the CRC to be included in the length
passed from the driver.  The firmware always configures the chip
to strip out the CRC.

Signed-off-by: Vasundhara Volam <vasundhara-v.volam@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c       | 3 +--
 drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c | 2 +-
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 0cf41a167204..5919f729e8a2 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -5045,8 +5045,7 @@ int bnxt_hwrm_vnic_cfg(struct bnxt *bp, u16 vnic_id)
 	req.dflt_ring_grp = cpu_to_le16(bp->grp_info[grp_idx].fw_grp_id);
 	req.lb_rule = cpu_to_le16(0xffff);
 vnic_mru:
-	req.mru = cpu_to_le16(bp->dev->mtu + ETH_HLEN + ETH_FCS_LEN +
-			      VLAN_HLEN);
+	req.mru = cpu_to_le16(bp->dev->mtu + ETH_HLEN + VLAN_HLEN);
 
 	req.vnic_id = cpu_to_le16(vnic->fw_vnic_id);
 #ifdef CONFIG_BNXT_SRIOV
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
index 6ea3df6da18c..c883e8884faf 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_sriov.c
@@ -651,7 +651,7 @@ static int bnxt_hwrm_func_cfg(struct bnxt *bp, int num_vfs)
 				  FUNC_CFG_REQ_ENABLES_NUM_VNICS |
 				  FUNC_CFG_REQ_ENABLES_NUM_HW_RING_GRPS);
 
-	mtu = bp->dev->mtu + ETH_HLEN + ETH_FCS_LEN + VLAN_HLEN;
+	mtu = bp->dev->mtu + ETH_HLEN + VLAN_HLEN;
 	req.mru = cpu_to_le16(mtu);
 	req.mtu = cpu_to_le16(mtu);
 
-- 
cgit v1.2.3-59-g8ed1b


From 95fec034fd440b4882701df3e84d2b76af4e627d Mon Sep 17 00:00:00 2001
From: Edwin Peer <edwin.peer@broadcom.com>
Date: Mon, 4 May 2020 04:50:31 -0400
Subject: bnxt_en: prepare to refactor ethtool reset types

Extract bnxt_hwrm_firmware_reset() for performing firmware reset
operations. This new helper function will be used in a subsequent
patch to separate unrelated reset types out of bnxt_firmware_reset().

Signed-off-by: Edwin Peer <edwin.peer@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 46 ++++++++++++++---------
 1 file changed, 29 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 34046a6286e8..ed6a32299bec 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -1749,8 +1749,8 @@ static int bnxt_flash_nvram(struct net_device *dev,
 	return rc;
 }
 
-static int bnxt_firmware_reset(struct net_device *dev,
-			       u16 dir_type)
+static int bnxt_hwrm_firmware_reset(struct net_device *dev, u8 proc_type,
+				    u8 self_reset, u8 flags)
 {
 	struct hwrm_fw_reset_input req = {0};
 	struct bnxt *bp = netdev_priv(dev);
@@ -1758,48 +1758,60 @@ static int bnxt_firmware_reset(struct net_device *dev,
 
 	bnxt_hwrm_cmd_hdr_init(bp, &req, HWRM_FW_RESET, -1, -1);
 
+	req.embedded_proc_type = proc_type;
+	req.selfrst_status = self_reset;
+	req.flags = flags;
+
+	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+	if (rc == -EACCES)
+		bnxt_print_admin_err(bp);
+	return rc;
+}
+
+static int bnxt_firmware_reset(struct net_device *dev, u16 dir_type)
+{
+	u8 self_reset = FW_RESET_REQ_SELFRST_STATUS_SELFRSTNONE;
+	struct bnxt *bp = netdev_priv(dev);
+	u8 proc_type, flags = 0;
+
 	/* TODO: Address self-reset of APE/KONG/BONO/TANG or ungraceful reset */
 	/*       (e.g. when firmware isn't already running) */
 	switch (dir_type) {
 	case BNX_DIR_TYPE_CHIMP_PATCH:
 	case BNX_DIR_TYPE_BOOTCODE:
 	case BNX_DIR_TYPE_BOOTCODE_2:
-		req.embedded_proc_type = FW_RESET_REQ_EMBEDDED_PROC_TYPE_BOOT;
+		proc_type = FW_RESET_REQ_EMBEDDED_PROC_TYPE_BOOT;
 		/* Self-reset ChiMP upon next PCIe reset: */
-		req.selfrst_status = FW_RESET_REQ_SELFRST_STATUS_SELFRSTPCIERST;
+		self_reset = FW_RESET_REQ_SELFRST_STATUS_SELFRSTPCIERST;
 		break;
 	case BNX_DIR_TYPE_APE_FW:
 	case BNX_DIR_TYPE_APE_PATCH:
-		req.embedded_proc_type = FW_RESET_REQ_EMBEDDED_PROC_TYPE_MGMT;
+		proc_type = FW_RESET_REQ_EMBEDDED_PROC_TYPE_MGMT;
 		/* Self-reset APE upon next PCIe reset: */
-		req.selfrst_status = FW_RESET_REQ_SELFRST_STATUS_SELFRSTPCIERST;
+		self_reset = FW_RESET_REQ_SELFRST_STATUS_SELFRSTPCIERST;
 		break;
 	case BNX_DIR_TYPE_KONG_FW:
 	case BNX_DIR_TYPE_KONG_PATCH:
-		req.embedded_proc_type =
-			FW_RESET_REQ_EMBEDDED_PROC_TYPE_NETCTRL;
+		proc_type = FW_RESET_REQ_EMBEDDED_PROC_TYPE_NETCTRL;
 		break;
 	case BNX_DIR_TYPE_BONO_FW:
 	case BNX_DIR_TYPE_BONO_PATCH:
-		req.embedded_proc_type = FW_RESET_REQ_EMBEDDED_PROC_TYPE_ROCE;
+		proc_type = FW_RESET_REQ_EMBEDDED_PROC_TYPE_ROCE;
 		break;
 	case BNXT_FW_RESET_CHIP:
-		req.embedded_proc_type = FW_RESET_REQ_EMBEDDED_PROC_TYPE_CHIP;
-		req.selfrst_status = FW_RESET_REQ_SELFRST_STATUS_SELFRSTASAP;
+		proc_type = FW_RESET_REQ_EMBEDDED_PROC_TYPE_CHIP;
+		self_reset = FW_RESET_REQ_SELFRST_STATUS_SELFRSTASAP;
 		if (bp->fw_cap & BNXT_FW_CAP_HOT_RESET)
-			req.flags = FW_RESET_REQ_FLAGS_RESET_GRACEFUL;
+			flags = FW_RESET_REQ_FLAGS_RESET_GRACEFUL;
 		break;
 	case BNXT_FW_RESET_AP:
-		req.embedded_proc_type = FW_RESET_REQ_EMBEDDED_PROC_TYPE_AP;
+		proc_type = FW_RESET_REQ_EMBEDDED_PROC_TYPE_AP;
 		break;
 	default:
 		return -EINVAL;
 	}
 
-	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
-	if (rc == -EACCES)
-		bnxt_print_admin_err(bp);
-	return rc;
+	return bnxt_hwrm_firmware_reset(dev, proc_type, self_reset, flags);
 }
 
 static int bnxt_flash_firmware(struct net_device *dev,
-- 
cgit v1.2.3-59-g8ed1b


From 94f17e89c956553606d5c7cf4f40ce6012529d48 Mon Sep 17 00:00:00 2001
From: Edwin Peer <edwin.peer@broadcom.com>
Date: Mon, 4 May 2020 04:50:32 -0400
Subject: bnxt_en: refactor ethtool firmware reset types

The case statement in bnxt_firmware_reset() dangerously mixes types.
This patch separates the application processor and whole chip resets
from the rest such that the selection is performed on a pure type.

Signed-off-by: Edwin Peer <edwin.peer@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 38 +++++++++++++++--------
 1 file changed, 25 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index ed6a32299bec..d99da82b6eff 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -1768,10 +1768,10 @@ static int bnxt_hwrm_firmware_reset(struct net_device *dev, u8 proc_type,
 	return rc;
 }
 
-static int bnxt_firmware_reset(struct net_device *dev, u16 dir_type)
+static int bnxt_firmware_reset(struct net_device *dev,
+			       enum bnxt_nvm_directory_type dir_type)
 {
 	u8 self_reset = FW_RESET_REQ_SELFRST_STATUS_SELFRSTNONE;
-	struct bnxt *bp = netdev_priv(dev);
 	u8 proc_type, flags = 0;
 
 	/* TODO: Address self-reset of APE/KONG/BONO/TANG or ungraceful reset */
@@ -1798,15 +1798,6 @@ static int bnxt_firmware_reset(struct net_device *dev, u16 dir_type)
 	case BNX_DIR_TYPE_BONO_PATCH:
 		proc_type = FW_RESET_REQ_EMBEDDED_PROC_TYPE_ROCE;
 		break;
-	case BNXT_FW_RESET_CHIP:
-		proc_type = FW_RESET_REQ_EMBEDDED_PROC_TYPE_CHIP;
-		self_reset = FW_RESET_REQ_SELFRST_STATUS_SELFRSTASAP;
-		if (bp->fw_cap & BNXT_FW_CAP_HOT_RESET)
-			flags = FW_RESET_REQ_FLAGS_RESET_GRACEFUL;
-		break;
-	case BNXT_FW_RESET_AP:
-		proc_type = FW_RESET_REQ_EMBEDDED_PROC_TYPE_AP;
-		break;
 	default:
 		return -EINVAL;
 	}
@@ -1814,6 +1805,27 @@ static int bnxt_firmware_reset(struct net_device *dev, u16 dir_type)
 	return bnxt_hwrm_firmware_reset(dev, proc_type, self_reset, flags);
 }
 
+static int bnxt_firmware_reset_chip(struct net_device *dev)
+{
+	struct bnxt *bp = netdev_priv(dev);
+	u8 flags = 0;
+
+	if (bp->fw_cap & BNXT_FW_CAP_HOT_RESET)
+		flags = FW_RESET_REQ_FLAGS_RESET_GRACEFUL;
+
+	return bnxt_hwrm_firmware_reset(dev,
+					FW_RESET_REQ_EMBEDDED_PROC_TYPE_CHIP,
+					FW_RESET_REQ_SELFRST_STATUS_SELFRSTASAP,
+					flags);
+}
+
+static int bnxt_firmware_reset_ap(struct net_device *dev)
+{
+	return bnxt_hwrm_firmware_reset(dev, FW_RESET_REQ_EMBEDDED_PROC_TYPE_AP,
+					FW_RESET_REQ_SELFRST_STATUS_SELFRSTNONE,
+					0);
+}
+
 static int bnxt_flash_firmware(struct net_device *dev,
 			       u16 dir_type,
 			       const u8 *fw_data,
@@ -3006,7 +3018,7 @@ static int bnxt_reset(struct net_device *dev, u32 *flags)
 		if (bp->hwrm_spec_code < 0x10803)
 			return -EOPNOTSUPP;
 
-		rc = bnxt_firmware_reset(dev, BNXT_FW_RESET_CHIP);
+		rc = bnxt_firmware_reset_chip(dev);
 		if (!rc) {
 			netdev_info(dev, "Reset request successful.\n");
 			if (!(bp->fw_cap & BNXT_FW_CAP_HOT_RESET))
@@ -3018,7 +3030,7 @@ static int bnxt_reset(struct net_device *dev, u32 *flags)
 		if (bp->hwrm_spec_code < 0x10803)
 			return -EOPNOTSUPP;
 
-		rc = bnxt_firmware_reset(dev, BNXT_FW_RESET_AP);
+		rc = bnxt_firmware_reset_ap(dev);
 		if (!rc) {
 			netdev_info(dev, "Reset Application Processor request successful.\n");
 			*flags = 0;
-- 
cgit v1.2.3-59-g8ed1b


From 7a13240e371891d90cd51e3ea55ea04f4b2065dc Mon Sep 17 00:00:00 2001
From: Edwin Peer <edwin.peer@broadcom.com>
Date: Mon, 4 May 2020 04:50:33 -0400
Subject: bnxt_en: fix ethtool_reset_flags ABI violations

The ethtool ABI specifies that the reset operation should only clear
the flags that were actually reset. Setting the flags to zero after
a chip reset violates this because it does not include resetting the
application processor complex. Similarly, components that are not yet
defined are also not necessarily being reset.

The fact that chip reset does not cover the AP also means that it is
inappropriate to treat these two components exclusively of one another.
The ABI provides a mechanism to report a failure to reset independent
components via the returned bitmask, so it is also wrong to fail hard
if one of a set of independent resets is not possible.

It is incorrect to rely on the passed by reference flags in bnxt_reset(),
which are being updated as components are reset. The initially requested
value should be used instead so that hard errors do not propagate if any
earlier components could have been reset successfully.

Note, AP and chip resets are global in nature. Dedicated resets are
thus not currently supported.

Signed-off-by: Edwin Peer <edwin.peer@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 47 ++++++++++++-----------
 drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h |  8 +++-
 2 files changed, 31 insertions(+), 24 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index d99da82b6eff..9937c216aa48 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -2999,7 +2999,10 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest,
 static int bnxt_reset(struct net_device *dev, u32 *flags)
 {
 	struct bnxt *bp = netdev_priv(dev);
-	int rc = 0;
+	u32 req = *flags;
+
+	if (!req)
+		return -EINVAL;
 
 	if (!BNXT_PF(bp)) {
 		netdev_err(dev, "Reset is not supported from a VF\n");
@@ -3013,33 +3016,33 @@ static int bnxt_reset(struct net_device *dev, u32 *flags)
 		return -EBUSY;
 	}
 
-	if (*flags == ETH_RESET_ALL) {
+	if ((req & BNXT_FW_RESET_CHIP) == BNXT_FW_RESET_CHIP) {
 		/* This feature is not supported in older firmware versions */
-		if (bp->hwrm_spec_code < 0x10803)
-			return -EOPNOTSUPP;
-
-		rc = bnxt_firmware_reset_chip(dev);
-		if (!rc) {
-			netdev_info(dev, "Reset request successful.\n");
-			if (!(bp->fw_cap & BNXT_FW_CAP_HOT_RESET))
-				netdev_info(dev, "Reload driver to complete reset\n");
-			*flags = 0;
+		if (bp->hwrm_spec_code >= 0x10803) {
+			if (!bnxt_firmware_reset_chip(dev)) {
+				netdev_info(dev, "Firmware reset request successful.\n");
+				if (!(bp->fw_cap & BNXT_FW_CAP_HOT_RESET))
+					netdev_info(dev, "Reload driver to complete reset\n");
+				*flags &= ~BNXT_FW_RESET_CHIP;
+			}
+		} else if (req == BNXT_FW_RESET_CHIP) {
+			return -EOPNOTSUPP; /* only request, fail hard */
 		}
-	} else if (*flags == ETH_RESET_AP) {
-		/* This feature is not supported in older firmware versions */
-		if (bp->hwrm_spec_code < 0x10803)
-			return -EOPNOTSUPP;
+	}
 
-		rc = bnxt_firmware_reset_ap(dev);
-		if (!rc) {
-			netdev_info(dev, "Reset Application Processor request successful.\n");
-			*flags = 0;
+	if (req & BNXT_FW_RESET_AP) {
+		/* This feature is not supported in older firmware versions */
+		if (bp->hwrm_spec_code >= 0x10803) {
+			if (!bnxt_firmware_reset_ap(dev)) {
+				netdev_info(dev, "Reset application processor successful.\n");
+				*flags &= ~BNXT_FW_RESET_AP;
+			}
+		} else if (req == BNXT_FW_RESET_AP) {
+			return -EOPNOTSUPP; /* only request, fail hard */
 		}
-	} else {
-		rc = -EINVAL;
 	}
 
-	return rc;
+	return 0;
 }
 
 static int bnxt_hwrm_dbg_dma_data(struct bnxt *bp, void *msg, int msg_len,
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h
index 3576d951727b..ce7585ff9e4d 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.h
@@ -77,8 +77,12 @@ struct hwrm_dbg_cmn_output {
 #define BNXT_LED_DFLT_ENABLES(x)			\
 	cpu_to_le32(BNXT_LED_DFLT_ENA << (BNXT_LED_DFLT_ENA_SHIFT * (x)))
 
-#define BNXT_FW_RESET_AP	0xfffe
-#define BNXT_FW_RESET_CHIP	0xffff
+#define BNXT_FW_RESET_AP	(ETH_RESET_AP << ETH_RESET_SHARED_SHIFT)
+#define BNXT_FW_RESET_CHIP	((ETH_RESET_MGMT | ETH_RESET_IRQ |	\
+				  ETH_RESET_DMA | ETH_RESET_FILTER |	\
+				  ETH_RESET_OFFLOAD | ETH_RESET_MAC |	\
+				  ETH_RESET_PHY | ETH_RESET_RAM)	\
+				 << ETH_RESET_SHARED_SHIFT)
 
 extern const struct ethtool_ops bnxt_ethtool_ops;
 
-- 
cgit v1.2.3-59-g8ed1b


From 8cec0940803c255f501d4b9f4764cd47fc206ad4 Mon Sep 17 00:00:00 2001
From: Edwin Peer <edwin.peer@broadcom.com>
Date: Mon, 4 May 2020 04:50:34 -0400
Subject: bnxt_en: Improve kernel log messages related to ethtool reset.

Kernel log messages for failed AP reset commands should be suppressed.
These are expected to fail on devices that do not have an AP.  Add
missing driver reload message after AP reset and log it in a common
way without duplication.

Signed-off-by: Edwin Peer <edwin.peer@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 9937c216aa48..ad68bc393cc7 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -1762,9 +1762,14 @@ static int bnxt_hwrm_firmware_reset(struct net_device *dev, u8 proc_type,
 	req.selfrst_status = self_reset;
 	req.flags = flags;
 
-	rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
-	if (rc == -EACCES)
-		bnxt_print_admin_err(bp);
+	if (proc_type == FW_RESET_REQ_EMBEDDED_PROC_TYPE_AP) {
+		rc = hwrm_send_message_silent(bp, &req, sizeof(req),
+					      HWRM_CMD_TIMEOUT);
+	} else {
+		rc = hwrm_send_message(bp, &req, sizeof(req), HWRM_CMD_TIMEOUT);
+		if (rc == -EACCES)
+			bnxt_print_admin_err(bp);
+	}
 	return rc;
 }
 
@@ -2999,6 +3004,7 @@ static void bnxt_self_test(struct net_device *dev, struct ethtool_test *etest,
 static int bnxt_reset(struct net_device *dev, u32 *flags)
 {
 	struct bnxt *bp = netdev_priv(dev);
+	bool reload = false;
 	u32 req = *flags;
 
 	if (!req)
@@ -3022,7 +3028,7 @@ static int bnxt_reset(struct net_device *dev, u32 *flags)
 			if (!bnxt_firmware_reset_chip(dev)) {
 				netdev_info(dev, "Firmware reset request successful.\n");
 				if (!(bp->fw_cap & BNXT_FW_CAP_HOT_RESET))
-					netdev_info(dev, "Reload driver to complete reset\n");
+					reload = true;
 				*flags &= ~BNXT_FW_RESET_CHIP;
 			}
 		} else if (req == BNXT_FW_RESET_CHIP) {
@@ -3035,6 +3041,7 @@ static int bnxt_reset(struct net_device *dev, u32 *flags)
 		if (bp->hwrm_spec_code >= 0x10803) {
 			if (!bnxt_firmware_reset_ap(dev)) {
 				netdev_info(dev, "Reset application processor successful.\n");
+				reload = true;
 				*flags &= ~BNXT_FW_RESET_AP;
 			}
 		} else if (req == BNXT_FW_RESET_AP) {
@@ -3042,6 +3049,9 @@ static int bnxt_reset(struct net_device *dev, u32 *flags)
 		}
 	}
 
+	if (reload)
+		netdev_info(dev, "Reload driver to complete reset\n");
+
 	return 0;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From ebdf73dc595b6711dbfaf3007d513909bd814940 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Mon, 4 May 2020 04:50:35 -0400
Subject: bnxt_en: Define the doorbell offsets on 57500 chips.

Define the 57500 chip doorbell offsets instead of using the magic
values in the C file.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 4 ++--
 drivers/net/ethernet/broadcom/bnxt/bnxt.h | 3 +++
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 5919f729e8a2..2e564026fe96 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -5355,9 +5355,9 @@ static void bnxt_set_db(struct bnxt *bp, struct bnxt_db_info *db, u32 ring_type,
 {
 	if (bp->flags & BNXT_FLAG_CHIP_P5) {
 		if (BNXT_PF(bp))
-			db->doorbell = bp->bar1 + 0x10000;
+			db->doorbell = bp->bar1 + DB_PF_OFFSET_P5;
 		else
-			db->doorbell = bp->bar1 + 0x4000;
+			db->doorbell = bp->bar1 + DB_VF_OFFSET_P5;
 		switch (ring_type) {
 		case HWRM_RING_ALLOC_TX:
 			db->db_key64 = DBR_PATH_L2 | DBR_TYPE_SQ;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 1dbc3aece7a6..a3b80409c25e 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -537,6 +537,9 @@ struct nqe_cn {
 #define DBR_TYPE_NQ_ARM					(0xbULL << 60)
 #define DBR_TYPE_NULL					(0xfULL << 60)
 
+#define DB_PF_OFFSET_P5					0x10000
+#define DB_VF_OFFSET_P5					0x4000
+
 #define INVALID_HW_RING_ID	((u16)-1)
 
 /* The hardware supports certain page sizes.  Use the supported page sizes
-- 
cgit v1.2.3-59-g8ed1b


From e93b30d56fc0670e508456afc59f16d70fe1f83f Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Mon, 4 May 2020 04:50:36 -0400
Subject: bnxt_en: Set the db_offset on 57500 chips for the RDMA MSIX entries.

The driver provides completion ring or NQ doorbell offset for each
MSIX entry requested by the RDMA driver.  The NQ offset on 57500
chips is different than legacy chips.  Set it correctly based on
chip type for correctness.  The RDMA driver is ignoring this field
for the 57500 chips so it is not causing any problem.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
index 4a316c4b3fa8..4b40778ac8dd 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
@@ -104,7 +104,13 @@ static void bnxt_fill_msix_vecs(struct bnxt *bp, struct bnxt_msix_entry *ent)
 	for (i = 0; i < num_msix; i++) {
 		ent[i].vector = bp->irq_tbl[idx + i].vector;
 		ent[i].ring_idx = idx + i;
-		ent[i].db_offset = (idx + i) * 0x80;
+		if (bp->flags & BNXT_FLAG_CHIP_P5) {
+			ent[i].db_offset = DB_PF_OFFSET_P5;
+			if (BNXT_VF(bp))
+				ent[i].db_offset = DB_VF_OFFSET_P5;
+		} else {
+			ent[i].db_offset = (idx + i) * 0x80;
+		}
 	}
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 8ae2473842bdbb95bfb451b130dad6a650b3ad1b Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Mon, 4 May 2020 04:50:37 -0400
Subject: bnxt_en: Add support for L2 doorbell size.

Read the L2 doorbell size from the firmware and only map the portion
of the doorbell BAR for L2 use.  This will leave the remaining doorbell
BAR available for the RoCE driver to use.  The RoCE driver can map
the remaining portion as write-combining to support the push feature.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c | 43 ++++++++++++++++++++++++++-----
 drivers/net/ethernet/broadcom/bnxt/bnxt.h |  1 +
 2 files changed, 37 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 2e564026fe96..8f11344b8055 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -6364,6 +6364,7 @@ static int bnxt_hwrm_func_qcfg(struct bnxt *bp)
 {
 	struct hwrm_func_qcfg_input req = {0};
 	struct hwrm_func_qcfg_output *resp = bp->hwrm_cmd_resp_addr;
+	u32 min_db_offset = 0;
 	u16 flags;
 	int rc;
 
@@ -6412,6 +6413,21 @@ static int bnxt_hwrm_func_qcfg(struct bnxt *bp)
 	if (!bp->max_mtu)
 		bp->max_mtu = BNXT_MAX_MTU;
 
+	if (bp->db_size)
+		goto func_qcfg_exit;
+
+	if (bp->flags & BNXT_FLAG_CHIP_P5) {
+		if (BNXT_PF(bp))
+			min_db_offset = DB_PF_OFFSET_P5;
+		else
+			min_db_offset = DB_VF_OFFSET_P5;
+	}
+	bp->db_size = PAGE_ALIGN(le16_to_cpu(resp->l2_doorbell_bar_size_kb) *
+				 1024);
+	if (!bp->db_size || bp->db_size > pci_resource_len(bp->pdev, 2) ||
+	    bp->db_size <= min_db_offset)
+		bp->db_size = pci_resource_len(bp->pdev, 2);
+
 func_qcfg_exit:
 	mutex_unlock(&bp->hwrm_cmd_lock);
 	return rc;
@@ -10898,6 +10914,9 @@ static int bnxt_init_board(struct pci_dev *pdev, struct net_device *dev)
 	bp->dev = dev;
 	bp->pdev = pdev;
 
+	/* Doorbell BAR bp->bar1 is mapped after bnxt_fw_init_one_p2()
+	 * determines the BAR size.
+	 */
 	bp->bar0 = pci_ioremap_bar(pdev, 0);
 	if (!bp->bar0) {
 		dev_err(&pdev->dev, "Cannot map device registers, aborting\n");
@@ -10905,13 +10924,6 @@ static int bnxt_init_board(struct pci_dev *pdev, struct net_device *dev)
 		goto init_err_release;
 	}
 
-	bp->bar1 = pci_ioremap_bar(pdev, 2);
-	if (!bp->bar1) {
-		dev_err(&pdev->dev, "Cannot map doorbell registers, aborting\n");
-		rc = -ENOMEM;
-		goto init_err_release;
-	}
-
 	bp->bar2 = pci_ioremap_bar(pdev, 4);
 	if (!bp->bar2) {
 		dev_err(&pdev->dev, "Cannot map bar4 registers, aborting\n");
@@ -11833,6 +11845,16 @@ static int bnxt_pcie_dsn_get(struct bnxt *bp, u8 dsn[])
 	return 0;
 }
 
+static int bnxt_map_db_bar(struct bnxt *bp)
+{
+	if (!bp->db_size)
+		return -ENODEV;
+	bp->bar1 = pci_iomap(bp->pdev, 2, bp->db_size);
+	if (!bp->bar1)
+		return -ENOMEM;
+	return 0;
+}
+
 static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	struct net_device *dev;
@@ -11893,6 +11915,13 @@ static int bnxt_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (rc)
 		goto init_err_pci_clean;
 
+	rc = bnxt_map_db_bar(bp);
+	if (rc) {
+		dev_err(&pdev->dev, "Cannot map doorbell BAR rc = %d, aborting\n",
+			rc);
+		goto init_err_pci_clean;
+	}
+
 	dev->hw_features = NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM | NETIF_F_SG |
 			   NETIF_F_TSO | NETIF_F_TSO6 |
 			   NETIF_F_GSO_UDP_TUNNEL | NETIF_F_GSO_GRE |
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index a3b80409c25e..6114b0a576ff 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -1820,6 +1820,7 @@ struct bnxt {
 	/* ensure atomic 64-bit doorbell writes on 32-bit systems. */
 	spinlock_t		db_lock;
 #endif
+	int			db_size;
 
 #define BNXT_NTP_FLTR_MAX_FLTR	4096
 #define BNXT_NTP_FLTR_HASH_SIZE	512
-- 
cgit v1.2.3-59-g8ed1b


From 098286ff930ca752e4c9295ea65840dd55f5f290 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Mon, 4 May 2020 04:50:38 -0400
Subject: bnxt_en: Add doorbell information to bnxt_en_dev struct.

The purpose of this is to inform the RDMA driver the size of the doorbell
BAR that the L2 driver has mapped and the portion that is mapped
uncacheable.  The unchaeable portion is shared with the RoCE driver.
Any remaining unmapped doorbell BAR can be used by the RDMA driver for
its own purpose.  Currently, the entire L2 portion is mapped uncacheable.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c | 2 ++
 drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h | 8 ++++++++
 2 files changed, 10 insertions(+)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
index 4b40778ac8dd..8c8368c2f335 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.c
@@ -481,6 +481,8 @@ struct bnxt_en_dev *bnxt_ulp_probe(struct net_device *dev)
 			edev->flags |= BNXT_EN_FLAG_ROCEV2_CAP;
 		edev->net = dev;
 		edev->pdev = bp->pdev;
+		edev->l2_db_size = bp->db_size;
+		edev->l2_db_size_nc = bp->db_size;
 		bp->edev = edev;
 	}
 	return bp->edev;
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h
index 9895406b9830..6b4d2556a6df 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ulp.h
@@ -67,6 +67,14 @@ struct bnxt_en_dev {
 	#define BNXT_EN_FLAG_ULP_STOPPED	0x8
 	const struct bnxt_en_ops	*en_ops;
 	struct bnxt_ulp			ulp_tbl[BNXT_MAX_ULP];
+	int				l2_db_size;	/* Doorbell BAR size in
+							 * bytes mapped by L2
+							 * driver.
+							 */
+	int				l2_db_size_nc;	/* Doorbell BAR size in
+							 * bytes mapped as non-
+							 * cacheable.
+							 */
 };
 
 struct bnxt_en_ops {
-- 
cgit v1.2.3-59-g8ed1b


From 9d8b5f05529c619b63d68b0dd26a1dfe35a4fab2 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Mon, 4 May 2020 04:50:39 -0400
Subject: bnxt_en: Refactor the software ring counters.

We currently have 3 software ring counters, rx_l4_csum_errors,
rx_buf_errors, and missed_irqs.  The 1st two are RX counters and the
last one is a common counter.  Organize them into 2 structures
bnxt_rx_sw_stats and bnxt_cmn_sw_stats.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.c         |  6 ++---
 drivers/net/ethernet/broadcom/bnxt/bnxt.h         | 19 +++++++++++---
 drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 30 +++++++++++++++++------
 3 files changed, 42 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.c b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
index 8f11344b8055..4bbfea147d98 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.c
@@ -1766,7 +1766,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
 
 		rc = -EIO;
 		if (rx_err & RX_CMPL_ERRORS_BUFFER_ERROR_MASK) {
-			bnapi->cp_ring.rx_buf_errors++;
+			bnapi->cp_ring.sw_stats.rx.rx_buf_errors++;
 			if (!(bp->flags & BNXT_FLAG_CHIP_P5)) {
 				netdev_warn(bp->dev, "RX buffer error %x\n",
 					    rx_err);
@@ -1849,7 +1849,7 @@ static int bnxt_rx_pkt(struct bnxt *bp, struct bnxt_cp_ring_info *cpr,
 	} else {
 		if (rxcmp1->rx_cmp_cfa_code_errors_v2 & RX_CMP_L4_CS_ERR_BITS) {
 			if (dev->features & NETIF_F_RXCSUM)
-				bnapi->cp_ring.rx_l4_csum_errors++;
+				bnapi->cp_ring.sw_stats.rx.rx_l4_csum_errors++;
 		}
 	}
 
@@ -10285,7 +10285,7 @@ static void bnxt_chk_missed_irq(struct bnxt *bp)
 			bnxt_dbg_hwrm_ring_info_get(bp,
 				DBG_RING_INFO_GET_REQ_RING_TYPE_L2_CMPL,
 				fw_ring_id, &val[0], &val[1]);
-			cpr->missed_irqs++;
+			cpr->sw_stats.cmn.missed_irqs++;
 		}
 	}
 }
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 6114b0a576ff..c15517ff7ff6 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -910,6 +910,20 @@ struct bnxt_rx_ring_info {
 	struct page_pool	*page_pool;
 };
 
+struct bnxt_rx_sw_stats {
+	u64			rx_l4_csum_errors;
+	u64			rx_buf_errors;
+};
+
+struct bnxt_cmn_sw_stats {
+	u64			missed_irqs;
+};
+
+struct bnxt_sw_stats {
+	struct bnxt_rx_sw_stats rx;
+	struct bnxt_cmn_sw_stats cmn;
+};
+
 struct bnxt_cp_ring_info {
 	struct bnxt_napi	*bnapi;
 	u32			cp_raw_cons;
@@ -937,9 +951,8 @@ struct bnxt_cp_ring_info {
 	struct ctx_hw_stats	*hw_stats;
 	dma_addr_t		hw_stats_map;
 	u32			hw_stats_ctx_id;
-	u64			rx_l4_csum_errors;
-	u64			rx_buf_errors;
-	u64			missed_irqs;
+
+	struct bnxt_sw_stats	sw_stats;
 
 	struct bnxt_ring_struct	cp_ring_struct;
 
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index ad68bc393cc7..b2b43a7a9abb 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -171,9 +171,12 @@ static const char * const bnxt_ring_tpa2_stats_str[] = {
 	"rx_tpa_errors",
 };
 
-static const char * const bnxt_ring_sw_stats_str[] = {
+static const char * const bnxt_rx_sw_stats_str[] = {
 	"rx_l4_csum_errors",
 	"rx_buf_errors",
+};
+
+static const char * const bnxt_cmn_sw_stats_str[] = {
 	"missed_irqs",
 };
 
@@ -485,7 +488,8 @@ static int bnxt_get_num_ring_stats(struct bnxt *bp)
 	int num_stats;
 
 	num_stats = ARRAY_SIZE(bnxt_ring_stats_str) +
-		    ARRAY_SIZE(bnxt_ring_sw_stats_str) +
+		    ARRAY_SIZE(bnxt_rx_sw_stats_str) +
+		    ARRAY_SIZE(bnxt_cmn_sw_stats_str) +
 		    bnxt_get_num_tpa_ring_stats(bp);
 	return num_stats * bp->cp_nr_rings;
 }
@@ -548,13 +552,19 @@ static void bnxt_get_ethtool_stats(struct net_device *dev,
 		struct bnxt_napi *bnapi = bp->bnapi[i];
 		struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
 		__le64 *hw_stats = (__le64 *)cpr->hw_stats;
+		u64 *sw;
 		int k;
 
 		for (k = 0; k < stat_fields; j++, k++)
 			buf[j] = le64_to_cpu(hw_stats[k]);
-		buf[j++] = cpr->rx_l4_csum_errors;
-		buf[j++] = cpr->rx_buf_errors;
-		buf[j++] = cpr->missed_irqs;
+
+		sw = (u64 *)&cpr->sw_stats.rx;
+		for (k = 0; k < ARRAY_SIZE(bnxt_rx_sw_stats_str); j++, k++)
+			buf[j] = sw[k];
+
+		sw = (u64 *)&cpr->sw_stats.cmn;
+		for (k = 0; k < ARRAY_SIZE(bnxt_cmn_sw_stats_str); j++, k++)
+			buf[j] = sw[k];
 
 		bnxt_sw_func_stats[RX_TOTAL_DISCARDS].counter +=
 			le64_to_cpu(cpr->hw_stats->rx_discard_pkts);
@@ -653,10 +663,16 @@ static void bnxt_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
 				buf += ETH_GSTRING_LEN;
 			}
 skip_tpa_stats:
-			num_str = ARRAY_SIZE(bnxt_ring_sw_stats_str);
+			num_str = ARRAY_SIZE(bnxt_rx_sw_stats_str);
+			for (j = 0; j < num_str; j++) {
+				sprintf(buf, "[%d]: %s", i,
+					bnxt_rx_sw_stats_str[j]);
+				buf += ETH_GSTRING_LEN;
+			}
+			num_str = ARRAY_SIZE(bnxt_cmn_sw_stats_str);
 			for (j = 0; j < num_str; j++) {
 				sprintf(buf, "[%d]: %s", i,
-					bnxt_ring_sw_stats_str[j]);
+					bnxt_cmn_sw_stats_str[j]);
 				buf += ETH_GSTRING_LEN;
 			}
 		}
-- 
cgit v1.2.3-59-g8ed1b


From 3316d50905f0e551d4786767d827589960a8cb83 Mon Sep 17 00:00:00 2001
From: Michael Chan <michael.chan@broadcom.com>
Date: Mon, 4 May 2020 04:50:40 -0400
Subject: bnxt_en: Split HW ring statistics strings into RX and TX parts.

This will allow the RX and TX ring statistics to be separated if needed.
In the next patch, we'll be able to only display RX or TX statistcis if
the channel is RX only or TX only.

Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 53 ++++++++++++++---------
 1 file changed, 33 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index b2b43a7a9abb..85080f50bce5 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -137,7 +137,7 @@ reset_coalesce:
 	return rc;
 }
 
-static const char * const bnxt_ring_stats_str[] = {
+static const char * const bnxt_ring_rx_stats_str[] = {
 	"rx_ucast_packets",
 	"rx_mcast_packets",
 	"rx_bcast_packets",
@@ -146,6 +146,9 @@ static const char * const bnxt_ring_stats_str[] = {
 	"rx_ucast_bytes",
 	"rx_mcast_bytes",
 	"rx_bcast_bytes",
+};
+
+static const char * const bnxt_ring_tx_stats_str[] = {
 	"tx_ucast_packets",
 	"tx_mcast_packets",
 	"tx_bcast_packets",
@@ -306,6 +309,11 @@ static struct {
 	{0, "tx_total_discard_pkts"},
 };
 
+#define NUM_RING_RX_SW_STATS		ARRAY_SIZE(bnxt_rx_sw_stats_str)
+#define NUM_RING_CMN_SW_STATS		ARRAY_SIZE(bnxt_cmn_sw_stats_str)
+#define NUM_RING_RX_HW_STATS		ARRAY_SIZE(bnxt_ring_rx_stats_str)
+#define NUM_RING_TX_HW_STATS		ARRAY_SIZE(bnxt_ring_tx_stats_str)
+
 static const struct {
 	long offset;
 	char string[ETH_GSTRING_LEN];
@@ -485,13 +493,13 @@ static int bnxt_get_num_tpa_ring_stats(struct bnxt *bp)
 
 static int bnxt_get_num_ring_stats(struct bnxt *bp)
 {
-	int num_stats;
+	int rx, tx, cmn;
 
-	num_stats = ARRAY_SIZE(bnxt_ring_stats_str) +
-		    ARRAY_SIZE(bnxt_rx_sw_stats_str) +
-		    ARRAY_SIZE(bnxt_cmn_sw_stats_str) +
-		    bnxt_get_num_tpa_ring_stats(bp);
-	return num_stats * bp->cp_nr_rings;
+	rx = NUM_RING_RX_HW_STATS + NUM_RING_RX_SW_STATS +
+	     bnxt_get_num_tpa_ring_stats(bp);
+	tx = NUM_RING_TX_HW_STATS;
+	cmn = NUM_RING_CMN_SW_STATS;
+	return (rx + tx + cmn) * bp->cp_nr_rings;
 }
 
 static int bnxt_get_num_stats(struct bnxt *bp)
@@ -537,7 +545,7 @@ static void bnxt_get_ethtool_stats(struct net_device *dev,
 {
 	u32 i, j = 0;
 	struct bnxt *bp = netdev_priv(dev);
-	u32 stat_fields = ARRAY_SIZE(bnxt_ring_stats_str) +
+	u32 stat_fields = NUM_RING_RX_HW_STATS + NUM_RING_TX_HW_STATS +
 			  bnxt_get_num_tpa_ring_stats(bp);
 
 	if (!bp->bnapi) {
@@ -559,11 +567,11 @@ static void bnxt_get_ethtool_stats(struct net_device *dev,
 			buf[j] = le64_to_cpu(hw_stats[k]);
 
 		sw = (u64 *)&cpr->sw_stats.rx;
-		for (k = 0; k < ARRAY_SIZE(bnxt_rx_sw_stats_str); j++, k++)
+		for (k = 0; k < NUM_RING_RX_SW_STATS; j++, k++)
 			buf[j] = sw[k];
 
 		sw = (u64 *)&cpr->sw_stats.cmn;
-		for (k = 0; k < ARRAY_SIZE(bnxt_cmn_sw_stats_str); j++, k++)
+		for (k = 0; k < NUM_RING_CMN_SW_STATS; j++, k++)
 			buf[j] = sw[k];
 
 		bnxt_sw_func_stats[RX_TOTAL_DISCARDS].counter +=
@@ -642,34 +650,39 @@ static void bnxt_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
 	switch (stringset) {
 	case ETH_SS_STATS:
 		for (i = 0; i < bp->cp_nr_rings; i++) {
-			num_str = ARRAY_SIZE(bnxt_ring_stats_str);
+			num_str = NUM_RING_RX_HW_STATS;
 			for (j = 0; j < num_str; j++) {
 				sprintf(buf, "[%d]: %s", i,
-					bnxt_ring_stats_str[j]);
+					bnxt_ring_rx_stats_str[j]);
 				buf += ETH_GSTRING_LEN;
 			}
-			if (!BNXT_SUPPORTS_TPA(bp))
+			num_str = NUM_RING_TX_HW_STATS;
+			for (j = 0; j < num_str; j++) {
+				sprintf(buf, "[%d]: %s", i,
+					bnxt_ring_tx_stats_str[j]);
+				buf += ETH_GSTRING_LEN;
+			}
+			num_str = bnxt_get_num_tpa_ring_stats(bp);
+			if (!num_str)
 				goto skip_tpa_stats;
 
-			if (bp->max_tpa_v2) {
-				num_str = ARRAY_SIZE(bnxt_ring_tpa2_stats_str);
+			if (bp->max_tpa_v2)
 				str = bnxt_ring_tpa2_stats_str;
-			} else {
-				num_str = ARRAY_SIZE(bnxt_ring_tpa_stats_str);
+			else
 				str = bnxt_ring_tpa_stats_str;
-			}
+
 			for (j = 0; j < num_str; j++) {
 				sprintf(buf, "[%d]: %s", i, str[j]);
 				buf += ETH_GSTRING_LEN;
 			}
 skip_tpa_stats:
-			num_str = ARRAY_SIZE(bnxt_rx_sw_stats_str);
+			num_str = NUM_RING_RX_SW_STATS;
 			for (j = 0; j < num_str; j++) {
 				sprintf(buf, "[%d]: %s", i,
 					bnxt_rx_sw_stats_str[j]);
 				buf += ETH_GSTRING_LEN;
 			}
-			num_str = ARRAY_SIZE(bnxt_cmn_sw_stats_str);
+			num_str = NUM_RING_CMN_SW_STATS;
 			for (j = 0; j < num_str; j++) {
 				sprintf(buf, "[%d]: %s", i,
 					bnxt_cmn_sw_stats_str[j]);
-- 
cgit v1.2.3-59-g8ed1b


From 125592fbf467d1a70312006bbaf29724d0ba5897 Mon Sep 17 00:00:00 2001
From: Rajesh Ravi <rajesh.ravi@broadcom.com>
Date: Mon, 4 May 2020 04:50:41 -0400
Subject: bnxt_en: show only relevant ethtool stats for a TX or RX ring

Currently, ethtool -S shows all TX/RX ring counters whether the
channel is combined, RX, or TX.  The unused counters will always be
zero.  Improve it by showing only the relevant counters if the channel
is RX or TX.  If the channel is combined, the counters will be shown
exactly the same as before.

[ MChan: Lots of cleanups and simplifications on Rajesh's original
code]

Signed-off-by: Rajesh Ravi <rajesh.ravi@broadcom.com>
Signed-off-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 93 +++++++++++++++++------
 1 file changed, 71 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 85080f50bce5..07526868f7be 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -494,12 +494,20 @@ static int bnxt_get_num_tpa_ring_stats(struct bnxt *bp)
 static int bnxt_get_num_ring_stats(struct bnxt *bp)
 {
 	int rx, tx, cmn;
+	bool sh = false;
+
+	if (bp->flags & BNXT_FLAG_SHARED_RINGS)
+		sh = true;
 
 	rx = NUM_RING_RX_HW_STATS + NUM_RING_RX_SW_STATS +
 	     bnxt_get_num_tpa_ring_stats(bp);
 	tx = NUM_RING_TX_HW_STATS;
 	cmn = NUM_RING_CMN_SW_STATS;
-	return (rx + tx + cmn) * bp->cp_nr_rings;
+	if (sh)
+		return (rx + tx + cmn) * bp->cp_nr_rings;
+	else
+		return rx * bp->rx_nr_rings + tx * bp->tx_nr_rings +
+		       cmn * bp->cp_nr_rings;
 }
 
 static int bnxt_get_num_stats(struct bnxt *bp)
@@ -540,13 +548,29 @@ static int bnxt_get_sset_count(struct net_device *dev, int sset)
 	}
 }
 
+static bool is_rx_ring(struct bnxt *bp, int ring_num)
+{
+	return ring_num < bp->rx_nr_rings;
+}
+
+static bool is_tx_ring(struct bnxt *bp, int ring_num)
+{
+	int tx_base = 0;
+
+	if (!(bp->flags & BNXT_FLAG_SHARED_RINGS))
+		tx_base = bp->rx_nr_rings;
+
+	if (ring_num >= tx_base && ring_num < (tx_base + bp->tx_nr_rings))
+		return true;
+	return false;
+}
+
 static void bnxt_get_ethtool_stats(struct net_device *dev,
 				   struct ethtool_stats *stats, u64 *buf)
 {
 	u32 i, j = 0;
 	struct bnxt *bp = netdev_priv(dev);
-	u32 stat_fields = NUM_RING_RX_HW_STATS + NUM_RING_TX_HW_STATS +
-			  bnxt_get_num_tpa_ring_stats(bp);
+	u32 tpa_stats;
 
 	if (!bp->bnapi) {
 		j += bnxt_get_num_ring_stats(bp) + BNXT_NUM_SW_FUNC_STATS;
@@ -556,6 +580,7 @@ static void bnxt_get_ethtool_stats(struct net_device *dev,
 	for (i = 0; i < BNXT_NUM_SW_FUNC_STATS; i++)
 		bnxt_sw_func_stats[i].counter = 0;
 
+	tpa_stats = bnxt_get_num_tpa_ring_stats(bp);
 	for (i = 0; i < bp->cp_nr_rings; i++) {
 		struct bnxt_napi *bnapi = bp->bnapi[i];
 		struct bnxt_cp_ring_info *cpr = &bnapi->cp_ring;
@@ -563,12 +588,30 @@ static void bnxt_get_ethtool_stats(struct net_device *dev,
 		u64 *sw;
 		int k;
 
-		for (k = 0; k < stat_fields; j++, k++)
+		if (is_rx_ring(bp, i)) {
+			for (k = 0; k < NUM_RING_RX_HW_STATS; j++, k++)
+				buf[j] = le64_to_cpu(hw_stats[k]);
+		}
+		if (is_tx_ring(bp, i)) {
+			k = NUM_RING_RX_HW_STATS;
+			for (; k < NUM_RING_RX_HW_STATS + NUM_RING_TX_HW_STATS;
+			       j++, k++)
+				buf[j] = le64_to_cpu(hw_stats[k]);
+		}
+		if (!tpa_stats || !is_rx_ring(bp, i))
+			goto skip_tpa_ring_stats;
+
+		k = NUM_RING_RX_HW_STATS + NUM_RING_TX_HW_STATS;
+		for (; k < NUM_RING_RX_HW_STATS + NUM_RING_TX_HW_STATS +
+			   tpa_stats; j++, k++)
 			buf[j] = le64_to_cpu(hw_stats[k]);
 
+skip_tpa_ring_stats:
 		sw = (u64 *)&cpr->sw_stats.rx;
-		for (k = 0; k < NUM_RING_RX_SW_STATS; j++, k++)
-			buf[j] = sw[k];
+		if (is_rx_ring(bp, i)) {
+			for (k = 0; k < NUM_RING_RX_SW_STATS; j++, k++)
+				buf[j] = sw[k];
+		}
 
 		sw = (u64 *)&cpr->sw_stats.cmn;
 		for (k = 0; k < NUM_RING_CMN_SW_STATS; j++, k++)
@@ -650,20 +693,24 @@ static void bnxt_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
 	switch (stringset) {
 	case ETH_SS_STATS:
 		for (i = 0; i < bp->cp_nr_rings; i++) {
-			num_str = NUM_RING_RX_HW_STATS;
-			for (j = 0; j < num_str; j++) {
-				sprintf(buf, "[%d]: %s", i,
-					bnxt_ring_rx_stats_str[j]);
-				buf += ETH_GSTRING_LEN;
+			if (is_rx_ring(bp, i)) {
+				num_str = NUM_RING_RX_HW_STATS;
+				for (j = 0; j < num_str; j++) {
+					sprintf(buf, "[%d]: %s", i,
+						bnxt_ring_rx_stats_str[j]);
+					buf += ETH_GSTRING_LEN;
+				}
 			}
-			num_str = NUM_RING_TX_HW_STATS;
-			for (j = 0; j < num_str; j++) {
-				sprintf(buf, "[%d]: %s", i,
-					bnxt_ring_tx_stats_str[j]);
-				buf += ETH_GSTRING_LEN;
+			if (is_tx_ring(bp, i)) {
+				num_str = NUM_RING_TX_HW_STATS;
+				for (j = 0; j < num_str; j++) {
+					sprintf(buf, "[%d]: %s", i,
+						bnxt_ring_tx_stats_str[j]);
+					buf += ETH_GSTRING_LEN;
+				}
 			}
 			num_str = bnxt_get_num_tpa_ring_stats(bp);
-			if (!num_str)
+			if (!num_str || !is_rx_ring(bp, i))
 				goto skip_tpa_stats;
 
 			if (bp->max_tpa_v2)
@@ -676,11 +723,13 @@ static void bnxt_get_strings(struct net_device *dev, u32 stringset, u8 *buf)
 				buf += ETH_GSTRING_LEN;
 			}
 skip_tpa_stats:
-			num_str = NUM_RING_RX_SW_STATS;
-			for (j = 0; j < num_str; j++) {
-				sprintf(buf, "[%d]: %s", i,
-					bnxt_rx_sw_stats_str[j]);
-				buf += ETH_GSTRING_LEN;
+			if (is_rx_ring(bp, i)) {
+				num_str = NUM_RING_RX_SW_STATS;
+				for (j = 0; j < num_str; j++) {
+					sprintf(buf, "[%d]: %s", i,
+						bnxt_rx_sw_stats_str[j]);
+					buf += ETH_GSTRING_LEN;
+				}
 			}
 			num_str = NUM_RING_CMN_SW_STATS;
 			for (j = 0; j < num_str; j++) {
-- 
cgit v1.2.3-59-g8ed1b


From f0ec4f1d32ad49a23b93156949208dd9348e3590 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Mon, 4 May 2020 14:18:37 +0200
Subject: net/smc: save state of last sent CDC message

When a link goes down and all connections of this link need to be
switched to an other link then the producer cursor and the sequence of
the last successfully sent CDC message must be known. Add the two fields
to the SMC connection and update it in the tx completion handler.
And to allow matching of sequences in error cases reset the seqno to the
old value in smc_cdc_msg_send() when the actual send failed.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc.h     | 4 ++++
 net/smc/smc_cdc.c | 6 ++++++
 2 files changed, 10 insertions(+)

diff --git a/net/smc/smc.h b/net/smc/smc.h
index 1a084afa7372..1e9113771600 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -143,6 +143,9 @@ struct smc_connection {
 						 * .prod cf. TCP snd_nxt
 						 * .cons cf. TCP sends ack
 						 */
+	union smc_host_cursor	local_tx_ctrl_fin;
+						/* prod crsr - confirmed by peer
+						 */
 	union smc_host_cursor	tx_curs_prep;	/* tx - prepared data
 						 * snd_max..wmem_alloc
 						 */
@@ -154,6 +157,7 @@ struct smc_connection {
 						 */
 	atomic_t		sndbuf_space;	/* remaining space in sndbuf */
 	u16			tx_cdc_seq;	/* sequence # for CDC send */
+	u16			tx_cdc_seq_fin;	/* sequence # - tx completed */
 	spinlock_t		send_lock;	/* protect wr_sends */
 	struct delayed_work	tx_work;	/* retry of smc_cdc_msg_send */
 	u32			tx_off;		/* base offset in peer rmb */
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index f64589d823aa..c5e33296e55c 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -47,6 +47,9 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
 		/* guarantee 0 <= sndbuf_space <= sndbuf_desc->len */
 		smp_mb__after_atomic();
 		smc_curs_copy(&conn->tx_curs_fin, &cdcpend->cursor, conn);
+		smc_curs_copy(&conn->local_tx_ctrl_fin, &cdcpend->p_cursor,
+			      conn);
+		conn->tx_cdc_seq_fin = cdcpend->ctrl_seq;
 	}
 	smc_tx_sndbuf_nonfull(smc);
 	bh_unlock_sock(&smc->sk);
@@ -104,6 +107,9 @@ int smc_cdc_msg_send(struct smc_connection *conn,
 	if (!rc) {
 		smc_curs_copy(&conn->rx_curs_confirmed, &cfed, conn);
 		conn->local_rx_ctrl.prod_flags.cons_curs_upd_req = 0;
+	} else {
+		conn->tx_cdc_seq--;
+		conn->local_tx_ctrl.seqno = conn->tx_cdc_seq;
 	}
 
 	return rc;
-- 
cgit v1.2.3-59-g8ed1b


From c6f02ebeea3a0ff4bddddf0fd82303190ebb3dd1 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Mon, 4 May 2020 14:18:38 +0200
Subject: net/smc: switch connections to alternate link

Add smc_switch_conns() to switch all connections from a link that is
going down. Find an other link to switch the connections to, and
switch each connection to the new link. smc_switch_cursor() updates the
cursors of a connection to the state of the last successfully sent CDC
message. When there is no link to switch to, terminate the link group.
Call smc_switch_conns() when a link is going down.
And with the possibility that links of connections can switch adapt CDC
and TX functions to detect and handle link switches.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_cdc.c  |  18 +++++++-
 net/smc/smc_cdc.h  |   1 +
 net/smc/smc_core.c | 132 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 net/smc/smc_core.h |   2 +
 net/smc/smc_llc.c  |   6 +--
 net/smc/smc_tx.c   |  12 ++++-
 6 files changed, 162 insertions(+), 9 deletions(-)

diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index c5e33296e55c..3ca986066f32 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -56,11 +56,11 @@ static void smc_cdc_tx_handler(struct smc_wr_tx_pend_priv *pnd_snd,
 }
 
 int smc_cdc_get_free_slot(struct smc_connection *conn,
+			  struct smc_link *link,
 			  struct smc_wr_buf **wr_buf,
 			  struct smc_rdma_wr **wr_rdma_buf,
 			  struct smc_cdc_tx_pend **pend)
 {
-	struct smc_link *link = conn->lnk;
 	int rc;
 
 	rc = smc_wr_tx_get_free_slot(link, smc_cdc_tx_handler, wr_buf,
@@ -119,13 +119,27 @@ static int smcr_cdc_get_slot_and_msg_send(struct smc_connection *conn)
 {
 	struct smc_cdc_tx_pend *pend;
 	struct smc_wr_buf *wr_buf;
+	struct smc_link *link;
+	bool again = false;
 	int rc;
 
-	rc = smc_cdc_get_free_slot(conn, &wr_buf, NULL, &pend);
+again:
+	link = conn->lnk;
+	rc = smc_cdc_get_free_slot(conn, link, &wr_buf, NULL, &pend);
 	if (rc)
 		return rc;
 
 	spin_lock_bh(&conn->send_lock);
+	if (link != conn->lnk) {
+		/* link of connection changed, try again one time*/
+		spin_unlock_bh(&conn->send_lock);
+		smc_wr_tx_put_slot(link,
+				   (struct smc_wr_tx_pend_priv *)pend);
+		if (again)
+			return -ENOLINK;
+		again = true;
+		goto again;
+	}
 	rc = smc_cdc_msg_send(conn, wr_buf, pend);
 	spin_unlock_bh(&conn->send_lock);
 	return rc;
diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h
index 861dc24c588c..42246b4bdcc9 100644
--- a/net/smc/smc_cdc.h
+++ b/net/smc/smc_cdc.h
@@ -304,6 +304,7 @@ struct smc_cdc_tx_pend {
 };
 
 int smc_cdc_get_free_slot(struct smc_connection *conn,
+			  struct smc_link *link,
 			  struct smc_wr_buf **wr_buf,
 			  struct smc_rdma_wr **wr_rdma_buf,
 			  struct smc_cdc_tx_pend **pend);
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 32a6cadc5c1f..21bc1ec07e99 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -432,6 +432,135 @@ out:
 	return rc;
 }
 
+static int smc_write_space(struct smc_connection *conn)
+{
+	int buffer_len = conn->peer_rmbe_size;
+	union smc_host_cursor prod;
+	union smc_host_cursor cons;
+	int space;
+
+	smc_curs_copy(&prod, &conn->local_tx_ctrl.prod, conn);
+	smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);
+	/* determine rx_buf space */
+	space = buffer_len - smc_curs_diff(buffer_len, &cons, &prod);
+	return space;
+}
+
+static int smc_switch_cursor(struct smc_sock *smc)
+{
+	struct smc_connection *conn = &smc->conn;
+	union smc_host_cursor cons, fin;
+	int rc = 0;
+	int diff;
+
+	smc_curs_copy(&conn->tx_curs_sent, &conn->tx_curs_fin, conn);
+	smc_curs_copy(&fin, &conn->local_tx_ctrl_fin, conn);
+	/* set prod cursor to old state, enforce tx_rdma_writes() */
+	smc_curs_copy(&conn->local_tx_ctrl.prod, &fin, conn);
+	smc_curs_copy(&cons, &conn->local_rx_ctrl.cons, conn);
+
+	if (smc_curs_comp(conn->peer_rmbe_size, &cons, &fin) < 0) {
+		/* cons cursor advanced more than fin, and prod was set
+		 * fin above, so now prod is smaller than cons. Fix that.
+		 */
+		diff = smc_curs_diff(conn->peer_rmbe_size, &fin, &cons);
+		smc_curs_add(conn->sndbuf_desc->len,
+			     &conn->tx_curs_sent, diff);
+		smc_curs_add(conn->sndbuf_desc->len,
+			     &conn->tx_curs_fin, diff);
+
+		smp_mb__before_atomic();
+		atomic_add(diff, &conn->sndbuf_space);
+		smp_mb__after_atomic();
+
+		smc_curs_add(conn->peer_rmbe_size,
+			     &conn->local_tx_ctrl.prod, diff);
+		smc_curs_add(conn->peer_rmbe_size,
+			     &conn->local_tx_ctrl_fin, diff);
+	}
+	/* recalculate, value is used by tx_rdma_writes() */
+	atomic_set(&smc->conn.peer_rmbe_space, smc_write_space(conn));
+
+	if (smc->sk.sk_state != SMC_INIT &&
+	    smc->sk.sk_state != SMC_CLOSED) {
+		/* tbd: call rc = smc_cdc_get_slot_and_msg_send(conn); */
+		if (!rc) {
+			schedule_delayed_work(&conn->tx_work, 0);
+			smc->sk.sk_data_ready(&smc->sk);
+		}
+	}
+	return rc;
+}
+
+struct smc_link *smc_switch_conns(struct smc_link_group *lgr,
+				  struct smc_link *from_lnk, bool is_dev_err)
+{
+	struct smc_link *to_lnk = NULL;
+	struct smc_connection *conn;
+	struct smc_sock *smc;
+	struct rb_node *node;
+	int i, rc = 0;
+
+	/* link is inactive, wake up tx waiters */
+	smc_wr_wakeup_tx_wait(from_lnk);
+
+	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+		if (lgr->lnk[i].state != SMC_LNK_ACTIVE ||
+		    i == from_lnk->link_idx)
+			continue;
+		if (is_dev_err && from_lnk->smcibdev == lgr->lnk[i].smcibdev &&
+		    from_lnk->ibport == lgr->lnk[i].ibport) {
+			continue;
+		}
+		to_lnk = &lgr->lnk[i];
+		break;
+	}
+	if (!to_lnk) {
+		smc_lgr_terminate_sched(lgr);
+		return NULL;
+	}
+again:
+	read_lock_bh(&lgr->conns_lock);
+	for (node = rb_first(&lgr->conns_all); node; node = rb_next(node)) {
+		conn = rb_entry(node, struct smc_connection, alert_node);
+		if (conn->lnk != from_lnk)
+			continue;
+		smc = container_of(conn, struct smc_sock, conn);
+		/* conn->lnk not yet set in SMC_INIT state */
+		if (smc->sk.sk_state == SMC_INIT)
+			continue;
+		if (smc->sk.sk_state == SMC_CLOSED ||
+		    smc->sk.sk_state == SMC_PEERCLOSEWAIT1 ||
+		    smc->sk.sk_state == SMC_PEERCLOSEWAIT2 ||
+		    smc->sk.sk_state == SMC_APPFINCLOSEWAIT ||
+		    smc->sk.sk_state == SMC_APPCLOSEWAIT1 ||
+		    smc->sk.sk_state == SMC_APPCLOSEWAIT2 ||
+		    smc->sk.sk_state == SMC_PEERFINCLOSEWAIT ||
+		    smc->sk.sk_state == SMC_PEERABORTWAIT ||
+		    smc->sk.sk_state == SMC_PROCESSABORT) {
+			spin_lock_bh(&conn->send_lock);
+			conn->lnk = to_lnk;
+			spin_unlock_bh(&conn->send_lock);
+			continue;
+		}
+		sock_hold(&smc->sk);
+		read_unlock_bh(&lgr->conns_lock);
+		/* avoid race with smcr_tx_sndbuf_nonempty() */
+		spin_lock_bh(&conn->send_lock);
+		conn->lnk = to_lnk;
+		rc = smc_switch_cursor(smc);
+		spin_unlock_bh(&conn->send_lock);
+		sock_put(&smc->sk);
+		if (rc) {
+			smcr_link_down_cond_sched(to_lnk);
+			return NULL;
+		}
+		goto again;
+	}
+	read_unlock_bh(&lgr->conns_lock);
+	return to_lnk;
+}
+
 static void smcr_buf_unuse(struct smc_buf_desc *rmb_desc,
 			   struct smc_link_group *lgr)
 {
@@ -943,8 +1072,7 @@ static void smcr_link_down(struct smc_link *lnk)
 		return;
 
 	smc_ib_modify_qp_reset(lnk);
-	to_lnk = NULL;
-	/* tbd: call to_lnk = smc_switch_conns(lgr, lnk, true); */
+	to_lnk = smc_switch_conns(lgr, lnk, true);
 	if (!to_lnk) { /* no backup link available */
 		smcr_link_clear(lnk);
 		return;
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 7fe53feb9dc4..584f11230c4f 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -380,6 +380,8 @@ void smcr_link_clear(struct smc_link *lnk);
 int smcr_buf_map_lgr(struct smc_link *lnk);
 int smcr_buf_reg_lgr(struct smc_link *lnk);
 int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc);
+struct smc_link *smc_switch_conns(struct smc_link_group *lgr,
+				  struct smc_link *from_lnk, bool is_dev_err);
 void smcr_link_down_cond(struct smc_link *lnk);
 void smcr_link_down_cond_sched(struct smc_link *lnk);
 
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 7675ccd6f3c3..8d2368accbad 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -933,7 +933,7 @@ static void smc_llc_delete_asym_link(struct smc_link_group *lgr)
 		return; /* no asymmetric link */
 	if (!smc_link_downing(&lnk_asym->state))
 		return;
-	/* tbd: lnk_new = smc_switch_conns(lgr, lnk_asym, false); */
+	lnk_new = smc_switch_conns(lgr, lnk_asym, false);
 	smc_wr_tx_wait_no_pending_sends(lnk_asym);
 	if (!lnk_new)
 		goto out_free;
@@ -1195,7 +1195,7 @@ static void smc_llc_process_cli_delete_link(struct smc_link_group *lgr)
 	smc_llc_send_message(lnk, &qentry->msg); /* response */
 
 	if (smc_link_downing(&lnk_del->state)) {
-		/* tbd: call smc_switch_conns(lgr, lnk_del, false); */
+		smc_switch_conns(lgr, lnk_del, false);
 		smc_wr_tx_wait_no_pending_sends(lnk_del);
 	}
 	smcr_link_clear(lnk_del);
@@ -1245,7 +1245,7 @@ static void smc_llc_process_srv_delete_link(struct smc_link_group *lgr)
 		goto out; /* asymmetric link already deleted */
 
 	if (smc_link_downing(&lnk_del->state)) {
-		/* tbd: call smc_switch_conns(lgr, lnk_del, false); */
+		smc_switch_conns(lgr, lnk_del, false);
 		smc_wr_tx_wait_no_pending_sends(lnk_del);
 	}
 	if (!list_empty(&lgr->list)) {
diff --git a/net/smc/smc_tx.c b/net/smc/smc_tx.c
index 417204572a69..54ba0443847e 100644
--- a/net/smc/smc_tx.c
+++ b/net/smc/smc_tx.c
@@ -482,12 +482,13 @@ static int smc_tx_rdma_writes(struct smc_connection *conn,
 static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
 {
 	struct smc_cdc_producer_flags *pflags = &conn->local_tx_ctrl.prod_flags;
+	struct smc_link *link = conn->lnk;
 	struct smc_rdma_wr *wr_rdma_buf;
 	struct smc_cdc_tx_pend *pend;
 	struct smc_wr_buf *wr_buf;
 	int rc;
 
-	rc = smc_cdc_get_free_slot(conn, &wr_buf, &wr_rdma_buf, &pend);
+	rc = smc_cdc_get_free_slot(conn, link, &wr_buf, &wr_rdma_buf, &pend);
 	if (rc < 0) {
 		if (rc == -EBUSY) {
 			struct smc_sock *smc =
@@ -505,10 +506,17 @@ static int smcr_tx_sndbuf_nonempty(struct smc_connection *conn)
 	}
 
 	spin_lock_bh(&conn->send_lock);
+	if (link != conn->lnk) {
+		/* link of connection changed, tx_work will restart */
+		smc_wr_tx_put_slot(link,
+				   (struct smc_wr_tx_pend_priv *)pend);
+		rc = -ENOLINK;
+		goto out_unlock;
+	}
 	if (!pflags->urg_data_present) {
 		rc = smc_tx_rdma_writes(conn, wr_rdma_buf);
 		if (rc) {
-			smc_wr_tx_put_slot(conn->lnk,
+			smc_wr_tx_put_slot(link,
 					   (struct smc_wr_tx_pend_priv *)pend);
 			goto out_unlock;
 		}
-- 
cgit v1.2.3-59-g8ed1b


From 29bd73dba4f72970895a2459f7190d388f5204f7 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Mon, 4 May 2020 14:18:39 +0200
Subject: net/smc: send failover validation message

When a connection is switched to a new link then a link validation
message must be sent to the peer over the new link, containing the
sequence number of the last CDC message that was sent over the old link.
The peer will validate if this sequence number is the same or lower then
the number he received, and abort the connection if messages were lost.
Add smcr_cdc_msg_send_validation() to send the message validation
message and call it when a connection was switched in
smc_switch_cursor().

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_cdc.c  | 25 +++++++++++++++++++++++++
 net/smc/smc_cdc.h  |  1 +
 net/smc/smc_core.c |  2 +-
 3 files changed, 27 insertions(+), 1 deletion(-)

diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index 3ca986066f32..e6b7eef71831 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -115,6 +115,31 @@ int smc_cdc_msg_send(struct smc_connection *conn,
 	return rc;
 }
 
+/* send a validation msg indicating the move of a conn to an other QP link */
+int smcr_cdc_msg_send_validation(struct smc_connection *conn)
+{
+	struct smc_host_cdc_msg *local = &conn->local_tx_ctrl;
+	struct smc_link *link = conn->lnk;
+	struct smc_cdc_tx_pend *pend;
+	struct smc_wr_buf *wr_buf;
+	struct smc_cdc_msg *peer;
+	int rc;
+
+	rc = smc_cdc_get_free_slot(conn, link, &wr_buf, NULL, &pend);
+	if (rc)
+		return rc;
+
+	peer = (struct smc_cdc_msg *)wr_buf;
+	peer->common.type = local->common.type;
+	peer->len = local->len;
+	peer->seqno = htons(conn->tx_cdc_seq_fin); /* seqno last compl. tx */
+	peer->token = htonl(local->token);
+	peer->prod_flags.failover_validation = 1;
+
+	rc = smc_wr_tx_send(link, (struct smc_wr_tx_pend_priv *)pend);
+	return rc;
+}
+
 static int smcr_cdc_get_slot_and_msg_send(struct smc_connection *conn)
 {
 	struct smc_cdc_tx_pend *pend;
diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h
index 42246b4bdcc9..9cfabc9af120 100644
--- a/net/smc/smc_cdc.h
+++ b/net/smc/smc_cdc.h
@@ -313,6 +313,7 @@ int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf,
 		     struct smc_cdc_tx_pend *pend);
 int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn);
 int smcd_cdc_msg_send(struct smc_connection *conn);
+int smcr_cdc_msg_send_validation(struct smc_connection *conn);
 int smc_cdc_init(void) __init;
 void smcd_cdc_rx_init(struct smc_connection *conn);
 
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 21bc1ec07e99..a558ce0bde97 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -483,7 +483,7 @@ static int smc_switch_cursor(struct smc_sock *smc)
 
 	if (smc->sk.sk_state != SMC_INIT &&
 	    smc->sk.sk_state != SMC_CLOSED) {
-		/* tbd: call rc = smc_cdc_get_slot_and_msg_send(conn); */
+		rc = smcr_cdc_msg_send_validation(conn);
 		if (!rc) {
 			schedule_delayed_work(&conn->tx_work, 0);
 			smc->sk.sk_data_ready(&smc->sk);
-- 
cgit v1.2.3-59-g8ed1b


From b286a0651e4404ab96cdfdcdad8a839a26b3751e Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Mon, 4 May 2020 14:18:40 +0200
Subject: net/smc: handle incoming CDC validation message

Call smc_cdc_msg_validate() when a CDC message with the failover
validation bit enabled was received. Validate that the sequence number
sent with the message is one we already have received. If not, messages
were lost and the connection is terminated using a new abort_work.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc.h      |  2 ++
 net/smc/smc_cdc.c  | 37 +++++++++++++++++++++++++++++++------
 net/smc/smc_core.c | 15 +++++++++++++++
 3 files changed, 48 insertions(+), 6 deletions(-)

diff --git a/net/smc/smc.h b/net/smc/smc.h
index 1e9113771600..6f1c42da7a4c 100644
--- a/net/smc/smc.h
+++ b/net/smc/smc.h
@@ -188,12 +188,14 @@ struct smc_connection {
 	spinlock_t		acurs_lock;	/* protect cursors */
 #endif
 	struct work_struct	close_work;	/* peer sent some closing */
+	struct work_struct	abort_work;	/* abort the connection */
 	struct tasklet_struct	rx_tsklet;	/* Receiver tasklet for SMC-D */
 	u8			rx_off;		/* receive offset:
 						 * 0 for SMC-R, 32 for SMC-D
 						 */
 	u64			peer_token;	/* SMC-D token of peer */
 	u8			killed : 1;	/* abnormal termination */
+	u8			out_of_sync : 1; /* out of sync with peer */
 };
 
 struct smc_sock {				/* smc sock container */
diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index e6b7eef71831..b2b85e1be72c 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -282,6 +282,28 @@ static void smc_cdc_handle_urg_data_arrival(struct smc_sock *smc,
 	sk_send_sigurg(&smc->sk);
 }
 
+static void smc_cdc_msg_validate(struct smc_sock *smc, struct smc_cdc_msg *cdc,
+				 struct smc_link *link)
+{
+	struct smc_connection *conn = &smc->conn;
+	u16 recv_seq = ntohs(cdc->seqno);
+	s16 diff;
+
+	/* check that seqnum was seen before */
+	diff = conn->local_rx_ctrl.seqno - recv_seq;
+	if (diff < 0) { /* diff larger than 0x7fff */
+		/* drop connection */
+		conn->out_of_sync = 1;	/* prevent any further receives */
+		spin_lock_bh(&conn->send_lock);
+		conn->local_tx_ctrl.conn_state_flags.peer_conn_abort = 1;
+		conn->lnk = link;
+		spin_unlock_bh(&conn->send_lock);
+		sock_hold(&smc->sk); /* sock_put in abort_work */
+		if (!schedule_work(&conn->abort_work))
+			sock_put(&smc->sk);
+	}
+}
+
 static void smc_cdc_msg_recv_action(struct smc_sock *smc,
 				    struct smc_cdc_msg *cdc)
 {
@@ -412,16 +434,19 @@ static void smc_cdc_rx_handler(struct ib_wc *wc, void *buf)
 	read_lock_bh(&lgr->conns_lock);
 	conn = smc_lgr_find_conn(ntohl(cdc->token), lgr);
 	read_unlock_bh(&lgr->conns_lock);
-	if (!conn)
+	if (!conn || conn->out_of_sync)
 		return;
 	smc = container_of(conn, struct smc_sock, conn);
 
-	if (!cdc->prod_flags.failover_validation) {
-		if (smc_cdc_before(ntohs(cdc->seqno),
-				   conn->local_rx_ctrl.seqno))
-			/* received seqno is old */
-			return;
+	if (cdc->prod_flags.failover_validation) {
+		smc_cdc_msg_validate(smc, cdc, link);
+		return;
 	}
+	if (smc_cdc_before(ntohs(cdc->seqno),
+			   conn->local_rx_ctrl.seqno))
+		/* received seqno is old */
+		return;
+
 	smc_cdc_msg_recv(smc, cdc);
 }
 
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index a558ce0bde97..b5633fa19b6d 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -615,6 +615,8 @@ void smc_conn_free(struct smc_connection *conn)
 		tasklet_kill(&conn->rx_tsklet);
 	} else {
 		smc_cdc_tx_dismiss_slots(conn);
+		if (current_work() != &conn->abort_work)
+			cancel_work_sync(&conn->abort_work);
 	}
 	if (!list_empty(&lgr->list)) {
 		smc_lgr_unregister_conn(conn);
@@ -996,6 +998,18 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
 	}
 }
 
+/* abort connection, abort_work scheduled from tasklet context */
+static void smc_conn_abort_work(struct work_struct *work)
+{
+	struct smc_connection *conn = container_of(work,
+						   struct smc_connection,
+						   abort_work);
+	struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
+
+	smc_conn_kill(conn, true);
+	sock_put(&smc->sk); /* sock_hold done by schedulers of abort_work */
+}
+
 /* link is up - establish alternate link if applicable */
 static void smcr_link_up(struct smc_link_group *lgr,
 			 struct smc_ib_device *smcibdev, u8 ibport)
@@ -1302,6 +1316,7 @@ create:
 	conn->local_tx_ctrl.common.type = SMC_CDC_MSG_TYPE;
 	conn->local_tx_ctrl.len = SMC_WR_TX_SIZE;
 	conn->urg_state = SMC_URG_READ;
+	INIT_WORK(&smc->conn.abort_work, smc_conn_abort_work);
 	if (ini->is_smcd) {
 		conn->rx_off = sizeof(struct smcd_cdc_msg);
 		smcd_cdc_rx_init(conn); /* init tasklet for this conn */
-- 
cgit v1.2.3-59-g8ed1b


From 09c61d24f96dfef7791debfcaf96efe067ab2ba8 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Mon, 4 May 2020 14:18:41 +0200
Subject: net/smc: wait for departure of an IB message

Introduce smc_wr_tx_send_wait() to send an IB message and wait for the
tx completion event of the message. This makes sure that the message is
no longer in-flight when the function returns.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_core.h |  1 +
 net/smc/smc_wr.c   | 39 +++++++++++++++++++++++++++++++++++++++
 net/smc/smc_wr.h   |  2 ++
 3 files changed, 42 insertions(+)

diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 584f11230c4f..86eebbadc8f6 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -85,6 +85,7 @@ struct smc_link {
 	struct smc_rdma_sges	*wr_tx_rdma_sges;/*RDMA WRITE gather meta data*/
 	struct smc_rdma_wr	*wr_tx_rdmas;	/* WR RDMA WRITE */
 	struct smc_wr_tx_pend	*wr_tx_pends;	/* WR send waiting for CQE */
+	struct completion	*wr_tx_compl;	/* WR send CQE completion */
 	/* above four vectors have wr_tx_cnt elements and use the same index */
 	dma_addr_t		wr_tx_dma_addr;	/* DMA address of wr_tx_bufs */
 	atomic_long_t		wr_tx_id;	/* seq # of last sent WR */
diff --git a/net/smc/smc_wr.c b/net/smc/smc_wr.c
index 3fd27bea4f7a..7239ba9b99dc 100644
--- a/net/smc/smc_wr.c
+++ b/net/smc/smc_wr.c
@@ -44,6 +44,7 @@ struct smc_wr_tx_pend {	/* control data for a pending send request */
 	struct smc_link		*link;
 	u32			idx;
 	struct smc_wr_tx_pend_priv priv;
+	u8			compl_requested;
 };
 
 /******************************** send queue *********************************/
@@ -103,6 +104,8 @@ static inline void smc_wr_tx_process_cqe(struct ib_wc *wc)
 	if (pnd_snd_idx == link->wr_tx_cnt)
 		return;
 	link->wr_tx_pends[pnd_snd_idx].wc_status = wc->status;
+	if (link->wr_tx_pends[pnd_snd_idx].compl_requested)
+		complete(&link->wr_tx_compl[pnd_snd_idx]);
 	memcpy(&pnd_snd, &link->wr_tx_pends[pnd_snd_idx], sizeof(pnd_snd));
 	/* clear the full struct smc_wr_tx_pend including .priv */
 	memset(&link->wr_tx_pends[pnd_snd_idx], 0,
@@ -275,6 +278,33 @@ int smc_wr_tx_send(struct smc_link *link, struct smc_wr_tx_pend_priv *priv)
 	return rc;
 }
 
+/* Send prepared WR slot via ib_post_send and wait for send completion
+ * notification.
+ * @priv: pointer to smc_wr_tx_pend_priv identifying prepared message buffer
+ */
+int smc_wr_tx_send_wait(struct smc_link *link, struct smc_wr_tx_pend_priv *priv,
+			unsigned long timeout)
+{
+	struct smc_wr_tx_pend *pend;
+	int rc;
+
+	pend = container_of(priv, struct smc_wr_tx_pend, priv);
+	pend->compl_requested = 1;
+	init_completion(&link->wr_tx_compl[pend->idx]);
+
+	rc = smc_wr_tx_send(link, priv);
+	if (rc)
+		return rc;
+	/* wait for completion by smc_wr_tx_process_cqe() */
+	rc = wait_for_completion_interruptible_timeout(
+					&link->wr_tx_compl[pend->idx], timeout);
+	if (rc <= 0)
+		rc = -ENODATA;
+	if (rc > 0)
+		rc = 0;
+	return rc;
+}
+
 /* Register a memory region and wait for result. */
 int smc_wr_reg_send(struct smc_link *link, struct ib_mr *mr)
 {
@@ -555,6 +585,8 @@ void smc_wr_free_link(struct smc_link *lnk)
 
 void smc_wr_free_link_mem(struct smc_link *lnk)
 {
+	kfree(lnk->wr_tx_compl);
+	lnk->wr_tx_compl = NULL;
 	kfree(lnk->wr_tx_pends);
 	lnk->wr_tx_pends = NULL;
 	kfree(lnk->wr_tx_mask);
@@ -625,8 +657,15 @@ int smc_wr_alloc_link_mem(struct smc_link *link)
 				    GFP_KERNEL);
 	if (!link->wr_tx_pends)
 		goto no_mem_wr_tx_mask;
+	link->wr_tx_compl = kcalloc(SMC_WR_BUF_CNT,
+				    sizeof(link->wr_tx_compl[0]),
+				    GFP_KERNEL);
+	if (!link->wr_tx_compl)
+		goto no_mem_wr_tx_pends;
 	return 0;
 
+no_mem_wr_tx_pends:
+	kfree(link->wr_tx_pends);
 no_mem_wr_tx_mask:
 	kfree(link->wr_tx_mask);
 no_mem_wr_rx_sges:
diff --git a/net/smc/smc_wr.h b/net/smc/smc_wr.h
index f7eaeb3391f3..423b8709f1c9 100644
--- a/net/smc/smc_wr.h
+++ b/net/smc/smc_wr.h
@@ -101,6 +101,8 @@ int smc_wr_tx_put_slot(struct smc_link *link,
 		       struct smc_wr_tx_pend_priv *wr_pend_priv);
 int smc_wr_tx_send(struct smc_link *link,
 		   struct smc_wr_tx_pend_priv *wr_pend_priv);
+int smc_wr_tx_send_wait(struct smc_link *link, struct smc_wr_tx_pend_priv *priv,
+			unsigned long timeout);
 void smc_wr_tx_cq_handler(struct ib_cq *ib_cq, void *cq_context);
 void smc_wr_tx_dismiss_slots(struct smc_link *lnk, u8 wr_rx_hdr_type,
 			     smc_wr_tx_filter filter,
-- 
cgit v1.2.3-59-g8ed1b


From f3811fd7bc97587b142fed9edf8c726694220cb2 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Mon, 4 May 2020 14:18:42 +0200
Subject: net/smc: send DELETE_LINK, ALL message and wait for send to complete

Add smc_llc_send_message_wait() which uses smc_wr_tx_send_wait() to send
an LLC message and waits for the message send to complete.
smc_llc_send_link_delete_all() calls the new function to send an
DELETE_LINK,ALL LLC message. The RFC states that the sender of this type
of message needs to wait for the completion event of the message
transmission and can terminate the link afterwards.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_core.c |  5 +++++
 net/smc/smc_llc.c  | 44 ++++++++++++++++++++++++++++++++++++++++++++
 net/smc/smc_llc.h  |  2 ++
 3 files changed, 51 insertions(+)

diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index b5633fa19b6d..8f630b76c5a4 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -238,6 +238,9 @@ static void smc_lgr_free_work(struct work_struct *work)
 	spin_unlock_bh(lgr_lock);
 	cancel_delayed_work(&lgr->free_work);
 
+	if (!lgr->is_smcd && !lgr->terminating)
+		smc_llc_send_link_delete_all(lgr, true,
+					     SMC_LLC_DEL_PROG_INIT_TERM);
 	if (lgr->is_smcd && !lgr->terminating)
 		smc_ism_signal_shutdown(lgr);
 	if (!lgr->is_smcd) {
@@ -847,6 +850,8 @@ static void smc_lgr_cleanup(struct smc_link_group *lgr)
 		smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
 		put_device(&lgr->smcd->dev);
 	} else {
+		smc_llc_send_link_delete_all(lgr, false,
+					     SMC_LLC_DEL_OP_INIT_TERM);
 		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
 			struct smc_link *lnk = &lgr->lnk[i];
 
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 8d2368accbad..0ea7ad6188ae 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -560,6 +560,25 @@ static int smc_llc_send_message(struct smc_link *link, void *llcbuf)
 	return smc_wr_tx_send(link, pend);
 }
 
+/* schedule an llc send on link, may wait for buffers,
+ * and wait for send completion notification.
+ * @return 0 on success
+ */
+static int smc_llc_send_message_wait(struct smc_link *link, void *llcbuf)
+{
+	struct smc_wr_tx_pend_priv *pend;
+	struct smc_wr_buf *wr_buf;
+	int rc;
+
+	if (!smc_link_usable(link))
+		return -ENOLINK;
+	rc = smc_llc_add_pending_send(link, &wr_buf, &pend);
+	if (rc)
+		return rc;
+	memcpy(wr_buf, llcbuf, sizeof(union smc_llc_msg));
+	return smc_wr_tx_send_wait(link, pend, SMC_LLC_WAIT_TIME);
+}
+
 /********************************* receive ***********************************/
 
 static int smc_llc_alloc_alt_link(struct smc_link_group *lgr,
@@ -1215,6 +1234,29 @@ out:
 	kfree(qentry);
 }
 
+/* try to send a DELETE LINK ALL request on any active link,
+ * waiting for send completion
+ */
+void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord, u32 rsn)
+{
+	struct smc_llc_msg_del_link delllc = {0};
+	int i;
+
+	delllc.hd.common.type = SMC_LLC_DELETE_LINK;
+	delllc.hd.length = sizeof(delllc);
+	if (ord)
+		delllc.hd.flags |= SMC_LLC_FLAG_DEL_LINK_ORDERLY;
+	delllc.hd.flags |= SMC_LLC_FLAG_DEL_LINK_ALL;
+	delllc.reason = htonl(rsn);
+
+	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+		if (!smc_link_usable(&lgr->lnk[i]))
+			continue;
+		if (!smc_llc_send_message_wait(&lgr->lnk[i], &delllc))
+			break;
+	}
+}
+
 static void smc_llc_process_srv_delete_link(struct smc_link_group *lgr)
 {
 	struct smc_llc_msg_del_link *del_llc;
@@ -1230,6 +1272,8 @@ static void smc_llc_process_srv_delete_link(struct smc_link_group *lgr)
 
 	if (qentry->msg.delete_link.hd.flags & SMC_LLC_FLAG_DEL_LINK_ALL) {
 		/* delete entire lgr */
+		smc_llc_send_link_delete_all(lgr, true, ntohl(
+					      qentry->msg.delete_link.reason));
 		smc_lgr_terminate_sched(lgr);
 		goto out;
 	}
diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h
index c335fc5f363c..6d2a5d943b83 100644
--- a/net/smc/smc_llc.h
+++ b/net/smc/smc_llc.h
@@ -89,6 +89,8 @@ struct smc_llc_qentry *smc_llc_wait(struct smc_link_group *lgr,
 				    int time_out, u8 exp_msg);
 struct smc_llc_qentry *smc_llc_flow_qentry_clr(struct smc_llc_flow *flow);
 void smc_llc_flow_qentry_del(struct smc_llc_flow *flow);
+void smc_llc_send_link_delete_all(struct smc_link_group *lgr, bool ord,
+				  u32 rsn);
 int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry);
 int smc_llc_srv_add_link(struct smc_link *link);
 void smc_llc_srv_add_link_local(struct smc_link *link);
-- 
cgit v1.2.3-59-g8ed1b


From 56bc3b2094b428d808dd1704fdb3086c66bcb310 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Mon, 4 May 2020 14:18:43 +0200
Subject: net/smc: assign link to a new connection

For new connections, assign a link from the link group, using some
simple load balancing.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_core.c | 65 ++++++++++++++++++++++++++++++++++++++----------------
 1 file changed, 46 insertions(+), 19 deletions(-)

diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 8f630b76c5a4..9c19b9aa3719 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -121,16 +121,59 @@ static void smc_lgr_add_alert_token(struct smc_connection *conn)
 	rb_insert_color(&conn->alert_node, &conn->lgr->conns_all);
 }
 
+/* assign an SMC-R link to the connection */
+static int smcr_lgr_conn_assign_link(struct smc_connection *conn, bool first)
+{
+	enum smc_link_state expected = first ? SMC_LNK_ACTIVATING :
+				       SMC_LNK_ACTIVE;
+	int i, j;
+
+	/* do link balancing */
+	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+		struct smc_link *lnk = &conn->lgr->lnk[i];
+
+		if (lnk->state != expected)
+			continue;
+		if (conn->lgr->role == SMC_CLNT) {
+			conn->lnk = lnk; /* temporary, SMC server assigns link*/
+			break;
+		}
+		if (conn->lgr->conns_num % 2) {
+			for (j = i + 1; j < SMC_LINKS_PER_LGR_MAX; j++) {
+				struct smc_link *lnk2;
+
+				lnk2 = &conn->lgr->lnk[j];
+				if (lnk2->state == expected) {
+					conn->lnk = lnk2;
+					break;
+				}
+			}
+		}
+		if (!conn->lnk)
+			conn->lnk = lnk;
+		break;
+	}
+	if (!conn->lnk)
+		return SMC_CLC_DECL_NOACTLINK;
+	return 0;
+}
+
 /* Register connection in link group by assigning an alert token
  * registered in a search tree.
  * Requires @conns_lock
  * Note that '0' is a reserved value and not assigned.
  */
-static int smc_lgr_register_conn(struct smc_connection *conn)
+static int smc_lgr_register_conn(struct smc_connection *conn, bool first)
 {
 	struct smc_sock *smc = container_of(conn, struct smc_sock, conn);
 	static atomic_t nexttoken = ATOMIC_INIT(0);
+	int rc;
 
+	if (!conn->lgr->is_smcd) {
+		rc = smcr_lgr_conn_assign_link(conn, first);
+		if (rc)
+			return rc;
+	}
 	/* find a new alert_token_local value not yet used by some connection
 	 * in this link group
 	 */
@@ -141,22 +184,6 @@ static int smc_lgr_register_conn(struct smc_connection *conn)
 			conn->alert_token_local = 0;
 	}
 	smc_lgr_add_alert_token(conn);
-
-	/* assign the new connection to a link */
-	if (!conn->lgr->is_smcd) {
-		struct smc_link *lnk;
-		int i;
-
-		/* tbd - link balancing */
-		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
-			lnk = &conn->lgr->lnk[i];
-			if (lnk->state == SMC_LNK_ACTIVATING ||
-			    lnk->state == SMC_LNK_ACTIVE)
-				conn->lnk = lnk;
-		}
-		if (!conn->lnk)
-			return SMC_CLC_DECL_NOACTLINK;
-	}
 	conn->lgr->conns_num++;
 	return 0;
 }
@@ -1285,7 +1312,7 @@ int smc_conn_create(struct smc_sock *smc, struct smc_init_info *ini)
 			/* link group found */
 			ini->cln_first_contact = SMC_REUSE_CONTACT;
 			conn->lgr = lgr;
-			rc = smc_lgr_register_conn(conn); /* add conn to lgr */
+			rc = smc_lgr_register_conn(conn, false);
 			write_unlock_bh(&lgr->conns_lock);
 			if (!rc && delayed_work_pending(&lgr->free_work))
 				cancel_delayed_work(&lgr->free_work);
@@ -1313,7 +1340,7 @@ create:
 			goto out;
 		lgr = conn->lgr;
 		write_lock_bh(&lgr->conns_lock);
-		rc = smc_lgr_register_conn(conn); /* add smc conn to lgr */
+		rc = smc_lgr_register_conn(conn, true);
 		write_unlock_bh(&lgr->conns_lock);
 		if (rc)
 			goto out;
-- 
cgit v1.2.3-59-g8ed1b


From ad6c111b8ae760114df6765d5a5ed1b09020d45d Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Mon, 4 May 2020 14:18:44 +0200
Subject: net/smc: asymmetric link tagging

New connections must not be assigned to asymmetric links. Add asymmetric
link tagging using new link variable link_is_asym. The new helpers
smcr_lgr_set_type() and smcr_lgr_set_type_asym() are called to set the
state of the link group, and tag all links accordingly.
smcr_lgr_conn_assign_link() respects the link tagging and will not
assign new connections to links tagged as asymmetric link.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_core.c | 26 +++++++++++++++++++++++---
 net/smc/smc_core.h |  4 ++++
 net/smc/smc_llc.c  | 20 ++++++++++++++------
 3 files changed, 41 insertions(+), 9 deletions(-)

diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 9c19b9aa3719..be15b30a1234 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -132,7 +132,7 @@ static int smcr_lgr_conn_assign_link(struct smc_connection *conn, bool first)
 	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
 		struct smc_link *lnk = &conn->lgr->lnk[i];
 
-		if (lnk->state != expected)
+		if (lnk->state != expected || lnk->link_is_asym)
 			continue;
 		if (conn->lgr->role == SMC_CLNT) {
 			conn->lnk = lnk; /* temporary, SMC server assigns link*/
@@ -143,7 +143,8 @@ static int smcr_lgr_conn_assign_link(struct smc_connection *conn, bool first)
 				struct smc_link *lnk2;
 
 				lnk2 = &conn->lgr->lnk[j];
-				if (lnk2->state == expected) {
+				if (lnk2->state == expected &&
+				    !lnk2->link_is_asym) {
 					conn->lnk = lnk2;
 					break;
 				}
@@ -1030,6 +1031,25 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
 	}
 }
 
+/* set new lgr type and clear all asymmetric link tagging */
+void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type)
+{
+	int i;
+
+	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
+		if (smc_link_usable(&lgr->lnk[i]))
+			lgr->lnk[i].link_is_asym = false;
+	lgr->type = new_type;
+}
+
+/* set new lgr type and tag a link as asymmetric */
+void smcr_lgr_set_type_asym(struct smc_link_group *lgr,
+			    enum smc_lgr_type new_type, int asym_lnk_idx)
+{
+	smcr_lgr_set_type(lgr, new_type);
+	lgr->lnk[asym_lnk_idx].link_is_asym = true;
+}
+
 /* abort connection, abort_work scheduled from tasklet context */
 static void smc_conn_abort_work(struct work_struct *work)
 {
@@ -1123,7 +1143,7 @@ static void smcr_link_down(struct smc_link *lnk)
 		smcr_link_clear(lnk);
 		return;
 	}
-	lgr->type = SMC_LGR_SINGLE;
+	smcr_lgr_set_type(lgr, SMC_LGR_SINGLE);
 	del_link_id = lnk->link_id;
 
 	if (lgr->role == SMC_SERV) {
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 86eebbadc8f6..6ed7ab6d89d5 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -117,6 +117,7 @@ struct smc_link {
 	u8			peer_gid[SMC_GID_SIZE];	/* gid of peer*/
 	u8			link_id;	/* unique # within link group */
 	u8			link_idx;	/* index in lgr link array */
+	u8			link_is_asym;	/* is link asymmetric? */
 	struct smc_link_group	*lgr;		/* parent link group */
 	struct work_struct	link_down_wrk;	/* wrk to bring link down */
 
@@ -380,6 +381,9 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
 void smcr_link_clear(struct smc_link *lnk);
 int smcr_buf_map_lgr(struct smc_link *lnk);
 int smcr_buf_reg_lgr(struct smc_link *lnk);
+void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type);
+void smcr_lgr_set_type_asym(struct smc_link_group *lgr,
+			    enum smc_lgr_type new_type, int asym_lnk_idx);
 int smcr_link_reg_rmb(struct smc_link *link, struct smc_buf_desc *rmb_desc);
 struct smc_link *smc_switch_conns(struct smc_link_group *lgr,
 				  struct smc_link *from_lnk, bool is_dev_err);
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 0ea7ad6188ae..f65b2aac6b52 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -796,7 +796,11 @@ static int smc_llc_cli_conf_link(struct smc_link *link,
 		return -ENOLINK;
 	}
 	smc_llc_link_active(link_new);
-	lgr->type = lgr_new_t;
+	if (lgr_new_t == SMC_LGR_ASYMMETRIC_LOCAL ||
+	    lgr_new_t == SMC_LGR_ASYMMETRIC_PEER)
+		smcr_lgr_set_type_asym(lgr, lgr_new_t, link_new->link_idx);
+	else
+		smcr_lgr_set_type(lgr, lgr_new_t);
 	return 0;
 }
 
@@ -1038,7 +1042,11 @@ static int smc_llc_srv_conf_link(struct smc_link *link,
 		return -ENOLINK;
 	}
 	smc_llc_link_active(link_new);
-	lgr->type = lgr_new_t;
+	if (lgr_new_t == SMC_LGR_ASYMMETRIC_LOCAL ||
+	    lgr_new_t == SMC_LGR_ASYMMETRIC_PEER)
+		smcr_lgr_set_type_asym(lgr, lgr_new_t, link_new->link_idx);
+	else
+		smcr_lgr_set_type(lgr, lgr_new_t);
 	smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
 	return 0;
 }
@@ -1223,9 +1231,9 @@ static void smc_llc_process_cli_delete_link(struct smc_link_group *lgr)
 	if (lnk_del == lnk_asym) {
 		/* expected deletion of asym link, don't change lgr state */
 	} else if (active_links == 1) {
-		lgr->type = SMC_LGR_SINGLE;
+		smcr_lgr_set_type(lgr, SMC_LGR_SINGLE);
 	} else if (!active_links) {
-		lgr->type = SMC_LGR_NONE;
+		smcr_lgr_set_type(lgr, SMC_LGR_NONE);
 		smc_lgr_terminate_sched(lgr);
 	}
 out_unlock:
@@ -1314,9 +1322,9 @@ static void smc_llc_process_srv_delete_link(struct smc_link_group *lgr)
 
 	active_links = smc_llc_active_link_count(lgr);
 	if (active_links == 1) {
-		lgr->type = SMC_LGR_SINGLE;
+		smcr_lgr_set_type(lgr, SMC_LGR_SINGLE);
 	} else if (!active_links) {
-		lgr->type = SMC_LGR_NONE;
+		smcr_lgr_set_type(lgr, SMC_LGR_NONE);
 		smc_lgr_terminate_sched(lgr);
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 3e0c40afce4ea5b08bb7e3f65c55157817116640 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Mon, 4 May 2020 14:18:45 +0200
Subject: net/smc: add termination reason and handle LLC protocol violation

Allow to set the reason code for the link group termination, and set
meaningful values before termination processing is triggered. This
reason code is sent to the peer in the final delete link message.
When the LLC request or response layer receives a message type that was
not handled, drop a warning and terminate the link group.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_core.c |  8 ++++++--
 net/smc/smc_core.h |  2 ++
 net/smc/smc_llc.c  | 14 ++++++++++++++
 net/smc/smc_llc.h  |  8 ++++++++
 4 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index be15b30a1234..b6f93b44f9c7 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -878,8 +878,11 @@ static void smc_lgr_cleanup(struct smc_link_group *lgr)
 		smc_ism_put_vlan(lgr->smcd, lgr->vlan_id);
 		put_device(&lgr->smcd->dev);
 	} else {
-		smc_llc_send_link_delete_all(lgr, false,
-					     SMC_LLC_DEL_OP_INIT_TERM);
+		u32 rsn = lgr->llc_termination_rsn;
+
+		if (!rsn)
+			rsn = SMC_LLC_DEL_PROG_INIT_TERM;
+		smc_llc_send_link_delete_all(lgr, false, rsn);
 		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
 			struct smc_link *lnk = &lgr->lnk[i];
 
@@ -1018,6 +1021,7 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
 
 	list_for_each_entry_safe(lgr, lg, &lgr_free_list, list) {
 		list_del_init(&lgr->list);
+		smc_llc_set_termination_rsn(lgr, SMC_LLC_DEL_OP_INIT_TERM);
 		__smc_lgr_terminate(lgr, false);
 	}
 
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 6ed7ab6d89d5..32bc45af9a1a 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -271,6 +271,8 @@ struct smc_link_group {
 						/* protects llc flow */
 			int			llc_testlink_time;
 						/* link keep alive time */
+			u32			llc_termination_rsn;
+						/* rsn code for termination */
 		};
 		struct { /* SMC-D */
 			u64			peer_gid;
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index f65b2aac6b52..482acf80e26e 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -1420,6 +1420,14 @@ static void smc_llc_rmt_delete_rkey(struct smc_link_group *lgr)
 	smc_llc_flow_qentry_del(&lgr->llc_flow_rmt);
 }
 
+static void smc_llc_protocol_violation(struct smc_link_group *lgr, u8 type)
+{
+	pr_warn_ratelimited("smc: SMC-R lg %*phN LLC protocol violation: "
+			    "llc_type %d\n", SMC_LGR_ID_SIZE, &lgr->id, type);
+	smc_llc_set_termination_rsn(lgr, SMC_LLC_DEL_PROT_VIOL);
+	smc_lgr_terminate_sched(lgr);
+}
+
 /* flush the llc event queue */
 static void smc_llc_event_flush(struct smc_link_group *lgr)
 {
@@ -1520,6 +1528,9 @@ static void smc_llc_event_handler(struct smc_llc_qentry *qentry)
 			smc_llc_flow_stop(lgr, &lgr->llc_flow_rmt);
 		}
 		return;
+	default:
+		smc_llc_protocol_violation(lgr, llc->raw.hdr.common.type);
+		break;
 	}
 out:
 	kfree(qentry);
@@ -1579,6 +1590,9 @@ static void smc_llc_rx_response(struct smc_link *link,
 	case SMC_LLC_CONFIRM_RKEY_CONT:
 		/* not used because max links is 3 */
 		break;
+	default:
+		smc_llc_protocol_violation(link->lgr, llc_type);
+		break;
 	}
 	kfree(qentry);
 }
diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h
index 6d2a5d943b83..f5882ebf357b 100644
--- a/net/smc/smc_llc.h
+++ b/net/smc/smc_llc.h
@@ -60,6 +60,14 @@ static inline struct smc_link *smc_llc_usable_link(struct smc_link_group *lgr)
 	return NULL;
 }
 
+/* set the termination reason code for the link group */
+static inline void smc_llc_set_termination_rsn(struct smc_link_group *lgr,
+					       u32 rsn)
+{
+	if (!lgr->llc_termination_rsn)
+		lgr->llc_termination_rsn = rsn;
+}
+
 /* transmit */
 int smc_llc_send_confirm_link(struct smc_link *lnk,
 			      enum smc_llc_reqresp reqresp);
-- 
cgit v1.2.3-59-g8ed1b


From a52bcc919b14c9d78f03b2b4ff604e5ca69c7e6d Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Mon, 4 May 2020 14:18:46 +0200
Subject: net/smc: improve termination processing

Add helper smcr_lgr_link_deactivate_all() and eliminate duplicate code.
In smc_lgr_free(), clear the smc-r links before smc_lgr_free_bufs() is
called so buffers are already prepared for free. The usage of the soft
parameter in __smc_lgr_terminate() is no longer needed, smc_lgr_free()
can be called directly. smc_lgr_terminate_sched() and
smc_smcd_terminate() set lgr->freeing to indicate that the link group
will be freed soon to avoid unnecessary schedules of the free worker.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_core.c | 61 +++++++++++++++++++++++++++---------------------------
 1 file changed, 31 insertions(+), 30 deletions(-)

diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index b6f93b44f9c7..fb391bc6781e 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -237,6 +237,19 @@ void smc_lgr_cleanup_early(struct smc_connection *conn)
 	smc_lgr_schedule_free_work_fast(lgr);
 }
 
+static void smcr_lgr_link_deactivate_all(struct smc_link_group *lgr)
+{
+	int i;
+
+	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+		struct smc_link *lnk = &lgr->lnk[i];
+
+		if (smc_link_usable(lnk))
+			lnk->state = SMC_LNK_INACTIVE;
+	}
+	wake_up_interruptible_all(&lgr->llc_waiter);
+}
+
 static void smc_lgr_free(struct smc_link_group *lgr);
 
 static void smc_lgr_free_work(struct work_struct *work)
@@ -246,7 +259,6 @@ static void smc_lgr_free_work(struct work_struct *work)
 						  free_work);
 	spinlock_t *lgr_lock;
 	bool conns;
-	int i;
 
 	smc_lgr_list_head(lgr, &lgr_lock);
 	spin_lock_bh(lgr_lock);
@@ -271,15 +283,8 @@ static void smc_lgr_free_work(struct work_struct *work)
 					     SMC_LLC_DEL_PROG_INIT_TERM);
 	if (lgr->is_smcd && !lgr->terminating)
 		smc_ism_signal_shutdown(lgr);
-	if (!lgr->is_smcd) {
-		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
-			struct smc_link *lnk = &lgr->lnk[i];
-
-			if (smc_link_usable(lnk))
-				lnk->state = SMC_LNK_INACTIVE;
-		}
-		wake_up_interruptible_all(&lgr->llc_waiter);
-	}
+	if (!lgr->is_smcd)
+		smcr_lgr_link_deactivate_all(lgr);
 	smc_lgr_free(lgr);
 }
 
@@ -802,6 +807,16 @@ static void smc_lgr_free(struct smc_link_group *lgr)
 {
 	int i;
 
+	if (!lgr->is_smcd) {
+		mutex_lock(&lgr->llc_conf_mutex);
+		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
+			if (lgr->lnk[i].state != SMC_LNK_UNUSED)
+				smcr_link_clear(&lgr->lnk[i]);
+		}
+		mutex_unlock(&lgr->llc_conf_mutex);
+		smc_llc_lgr_clear(lgr);
+	}
+
 	smc_lgr_free_bufs(lgr);
 	if (lgr->is_smcd) {
 		if (!lgr->terminating) {
@@ -811,11 +826,6 @@ static void smc_lgr_free(struct smc_link_group *lgr)
 		if (!atomic_dec_return(&lgr->smcd->lgr_cnt))
 			wake_up(&lgr->smcd->lgrs_deleted);
 	} else {
-		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
-			if (lgr->lnk[i].state != SMC_LNK_UNUSED)
-				smcr_link_clear(&lgr->lnk[i]);
-		}
-		smc_llc_lgr_clear(lgr);
 		if (!atomic_dec_return(&lgr_cnt))
 			wake_up(&lgrs_deleted);
 	}
@@ -870,8 +880,6 @@ static void smc_conn_kill(struct smc_connection *conn, bool soft)
 
 static void smc_lgr_cleanup(struct smc_link_group *lgr)
 {
-	int i;
-
 	if (lgr->is_smcd) {
 		smc_ism_signal_shutdown(lgr);
 		smcd_unregister_all_dmbs(lgr);
@@ -883,13 +891,7 @@ static void smc_lgr_cleanup(struct smc_link_group *lgr)
 		if (!rsn)
 			rsn = SMC_LLC_DEL_PROG_INIT_TERM;
 		smc_llc_send_link_delete_all(lgr, false, rsn);
-		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
-			struct smc_link *lnk = &lgr->lnk[i];
-
-			if (smc_link_usable(lnk))
-				lnk->state = SMC_LNK_INACTIVE;
-		}
-		wake_up_interruptible_all(&lgr->llc_waiter);
+		smcr_lgr_link_deactivate_all(lgr);
 	}
 }
 
@@ -905,8 +907,8 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft)
 
 	if (lgr->terminating)
 		return;	/* lgr already terminating */
-	if (!soft)
-		cancel_delayed_work_sync(&lgr->free_work);
+	/* cancel free_work sync, will terminate when lgr->freeing is set */
+	cancel_delayed_work_sync(&lgr->free_work);
 	lgr->terminating = 1;
 
 	/* kill remaining link group connections */
@@ -926,10 +928,7 @@ static void __smc_lgr_terminate(struct smc_link_group *lgr, bool soft)
 	}
 	read_unlock_bh(&lgr->conns_lock);
 	smc_lgr_cleanup(lgr);
-	if (soft)
-		smc_lgr_schedule_free_work_fast(lgr);
-	else
-		smc_lgr_free(lgr);
+	smc_lgr_free(lgr);
 }
 
 /* unlink link group and schedule termination */
@@ -944,6 +943,7 @@ void smc_lgr_terminate_sched(struct smc_link_group *lgr)
 		return;	/* lgr already terminating */
 	}
 	list_del_init(&lgr->list);
+	lgr->freeing = 1;
 	spin_unlock_bh(lgr_lock);
 	schedule_work(&lgr->terminate_work);
 }
@@ -962,6 +962,7 @@ void smc_smcd_terminate(struct smcd_dev *dev, u64 peer_gid, unsigned short vlan)
 			if (peer_gid) /* peer triggered termination */
 				lgr->peer_shutdown = 1;
 			list_move(&lgr->list, &lgr_free_list);
+			lgr->freeing = 1;
 		}
 	}
 	spin_unlock_bh(&dev->lgr_lock);
-- 
cgit v1.2.3-59-g8ed1b


From 45fa8da0bf5cb447fcf835d184e2d3b745376e69 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Mon, 4 May 2020 14:18:47 +0200
Subject: net/smc: create improved SMC-R link_uid

The link_uid of an SMC-R link is exchanged between SMC peers and its
value can be used for debugging purposes. Create a unique link_uid
during link initialization and use it in communication with SMC-R peers.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_core.c |  1 +
 net/smc/smc_core.h |  4 +++-
 net/smc/smc_llc.c  | 18 ++++++++++++++----
 net/smc/smc_llc.h  |  1 +
 4 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index fb391bc6781e..fb5f685ff494 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -331,6 +331,7 @@ int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
 	lnk->smcibdev = ini->ib_dev;
 	lnk->ibport = ini->ib_port;
 	lnk->path_mtu = ini->ib_dev->pattr[ini->ib_port - 1].active_mtu;
+	smc_llc_link_set_uid(lnk);
 	INIT_WORK(&lnk->link_down_wrk, smc_link_down_work);
 	if (!ini->ib_dev->initialized) {
 		rc = (int)smc_ib_setup_per_ibdev(ini->ib_dev);
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 32bc45af9a1a..e2ace20db7fd 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -70,6 +70,8 @@ struct smc_rdma_wr {				/* work requests per message
 	struct ib_rdma_wr	wr_tx_rdma[SMC_MAX_RDMA_WRITES];
 };
 
+#define SMC_LGR_ID_SIZE		4
+
 struct smc_link {
 	struct smc_ib_device	*smcibdev;	/* ib-device */
 	u8			ibport;		/* port - values 1 | 2 */
@@ -116,6 +118,7 @@ struct smc_link {
 	u8			peer_mac[ETH_ALEN];	/* = gid[8:10||13:15] */
 	u8			peer_gid[SMC_GID_SIZE];	/* gid of peer*/
 	u8			link_id;	/* unique # within link group */
+	u8			link_uid[SMC_LGR_ID_SIZE]; /* unique lnk id */
 	u8			link_idx;	/* index in lgr link array */
 	u8			link_is_asym;	/* is link asymmetric? */
 	struct smc_link_group	*lgr;		/* parent link group */
@@ -178,7 +181,6 @@ struct smc_rtoken {				/* address/key of remote RMB */
 	u32			rkey;
 };
 
-#define SMC_LGR_ID_SIZE		4
 #define SMC_BUF_MIN_SIZE	16384	/* minimum size of an RMB */
 #define SMC_RMBE_SIZES		16	/* number of distinct RMBE sizes */
 /* theoretically, the RFC states that largest size would be 512K,
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 482acf80e26e..afb889d60881 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -361,7 +361,6 @@ static int smc_llc_add_pending_send(struct smc_link *link,
 int smc_llc_send_confirm_link(struct smc_link *link,
 			      enum smc_llc_reqresp reqresp)
 {
-	struct smc_link_group *lgr = smc_get_lgr(link);
 	struct smc_llc_msg_confirm_link *confllc;
 	struct smc_wr_tx_pend_priv *pend;
 	struct smc_wr_buf *wr_buf;
@@ -382,7 +381,7 @@ int smc_llc_send_confirm_link(struct smc_link *link,
 	memcpy(confllc->sender_gid, link->gid, SMC_GID_SIZE);
 	hton24(confllc->sender_qp_num, link->roce_qp->qp_num);
 	confllc->link_num = link->link_id;
-	memcpy(confllc->link_uid, lgr->id, SMC_LGR_ID_SIZE);
+	memcpy(confllc->link_uid, link->link_uid, SMC_LGR_ID_SIZE);
 	confllc->max_links = SMC_LLC_ADD_LNK_MAX_LINKS;
 	/* send llc message */
 	rc = smc_wr_tx_send(link, pend);
@@ -845,7 +844,8 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry)
 	if (rc)
 		goto out_reject;
 	smc_llc_save_add_link_info(lnk_new, llc);
-	lnk_new->link_id = llc->link_num;
+	lnk_new->link_id = llc->link_num;	/* SMC server assigns link id */
+	smc_llc_link_set_uid(lnk_new);
 
 	rc = smc_ib_ready_link(lnk_new);
 	if (rc)
@@ -1775,12 +1775,22 @@ out:
 	return rc;
 }
 
+void smc_llc_link_set_uid(struct smc_link *link)
+{
+	__be32 link_uid;
+
+	link_uid = htonl(*((u32 *)link->lgr->id) + link->link_id);
+	memcpy(link->link_uid, &link_uid, SMC_LGR_ID_SIZE);
+}
+
 /* evaluate confirm link request or response */
 int smc_llc_eval_conf_link(struct smc_llc_qentry *qentry,
 			   enum smc_llc_reqresp type)
 {
-	if (type == SMC_LLC_REQ)	/* SMC server assigns link_id */
+	if (type == SMC_LLC_REQ) {	/* SMC server assigns link_id */
 		qentry->link->link_id = qentry->msg.confirm_link.link_num;
+		smc_llc_link_set_uid(qentry->link);
+	}
 	if (!(qentry->msg.raw.hdr.flags & SMC_LLC_FLAG_NO_RMBE_EYEC))
 		return -ENOTSUPP;
 	return 0;
diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h
index f5882ebf357b..1b68f229cb99 100644
--- a/net/smc/smc_llc.h
+++ b/net/smc/smc_llc.h
@@ -92,6 +92,7 @@ int smc_llc_flow_initiate(struct smc_link_group *lgr,
 void smc_llc_flow_stop(struct smc_link_group *lgr, struct smc_llc_flow *flow);
 int smc_llc_eval_conf_link(struct smc_llc_qentry *qentry,
 			   enum smc_llc_reqresp type);
+void smc_llc_link_set_uid(struct smc_link *link);
 struct smc_llc_qentry *smc_llc_wait(struct smc_link_group *lgr,
 				    struct smc_link *lnk,
 				    int time_out, u8 exp_msg);
-- 
cgit v1.2.3-59-g8ed1b


From 649758fff327eeb184713db8b0b0ebfa28693077 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Mon, 4 May 2020 14:18:48 +0200
Subject: net/smc: save SMC-R peer link_uid

During SMC-R link establishment the peers exchange the link_uid that
is used for debugging purposes. Save the peer link_uid in smc_link so it
can be retrieved by the smc_diag netlink interface.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/af_smc.c   | 2 ++
 net/smc/smc_core.h | 1 +
 net/smc/smc_llc.c  | 9 +++++++++
 net/smc/smc_llc.h  | 1 +
 4 files changed, 13 insertions(+)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index c67272007f41..4e4421c95ca1 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -390,6 +390,7 @@ static int smcr_clnt_conf_first_link(struct smc_sock *smc)
 				      SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
 		return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc;
 	}
+	smc_llc_save_peer_uid(qentry);
 	rc = smc_llc_eval_conf_link(qentry, SMC_LLC_REQ);
 	smc_llc_flow_qentry_del(&link->lgr->llc_flow_lcl);
 	if (rc)
@@ -1056,6 +1057,7 @@ static int smcr_serv_conf_first_link(struct smc_sock *smc)
 				      SMC_CLC_DECLINE, CLC_WAIT_TIME_SHORT);
 		return rc == -EAGAIN ? SMC_CLC_DECL_TIMEOUT_CL : rc;
 	}
+	smc_llc_save_peer_uid(qentry);
 	rc = smc_llc_eval_conf_link(qentry, SMC_LLC_RESP);
 	smc_llc_flow_qentry_del(&link->lgr->llc_flow_lcl);
 	if (rc)
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index e2ace20db7fd..4ae76802214f 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -119,6 +119,7 @@ struct smc_link {
 	u8			peer_gid[SMC_GID_SIZE];	/* gid of peer*/
 	u8			link_id;	/* unique # within link group */
 	u8			link_uid[SMC_LGR_ID_SIZE]; /* unique lnk id */
+	u8			peer_link_uid[SMC_LGR_ID_SIZE]; /* peer uid */
 	u8			link_idx;	/* index in lgr link array */
 	u8			link_is_asym;	/* is link asymmetric? */
 	struct smc_link_group	*lgr;		/* parent link group */
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index afb889d60881..66ddc9cf5e2f 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -770,6 +770,7 @@ static int smc_llc_cli_conf_link(struct smc_link *link,
 		smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
 		return -ENOLINK;
 	}
+	smc_llc_save_peer_uid(qentry);
 	smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
 
 	rc = smc_ib_modify_qp_rts(link_new);
@@ -1041,6 +1042,7 @@ static int smc_llc_srv_conf_link(struct smc_link *link,
 					 false, SMC_LLC_DEL_LOST_PATH);
 		return -ENOLINK;
 	}
+	smc_llc_save_peer_uid(qentry);
 	smc_llc_link_active(link_new);
 	if (lgr_new_t == SMC_LGR_ASYMMETRIC_LOCAL ||
 	    lgr_new_t == SMC_LGR_ASYMMETRIC_PEER)
@@ -1783,6 +1785,13 @@ void smc_llc_link_set_uid(struct smc_link *link)
 	memcpy(link->link_uid, &link_uid, SMC_LGR_ID_SIZE);
 }
 
+/* save peers link user id, used for debug purposes */
+void smc_llc_save_peer_uid(struct smc_llc_qentry *qentry)
+{
+	memcpy(qentry->link->peer_link_uid, qentry->msg.confirm_link.link_uid,
+	       SMC_LGR_ID_SIZE);
+}
+
 /* evaluate confirm link request or response */
 int smc_llc_eval_conf_link(struct smc_llc_qentry *qentry,
 			   enum smc_llc_reqresp type)
diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h
index 1b68f229cb99..55287376112d 100644
--- a/net/smc/smc_llc.h
+++ b/net/smc/smc_llc.h
@@ -93,6 +93,7 @@ void smc_llc_flow_stop(struct smc_link_group *lgr, struct smc_llc_flow *flow);
 int smc_llc_eval_conf_link(struct smc_llc_qentry *qentry,
 			   enum smc_llc_reqresp type);
 void smc_llc_link_set_uid(struct smc_link *link);
+void smc_llc_save_peer_uid(struct smc_llc_qentry *qentry);
 struct smc_llc_qentry *smc_llc_wait(struct smc_link_group *lgr,
 				    struct smc_link *lnk,
 				    int time_out, u8 exp_msg);
-- 
cgit v1.2.3-59-g8ed1b


From e90c9fcedc087c8ba1d34da88381838ed68bfb1c Mon Sep 17 00:00:00 2001
From: Michael Walle <michael@walle.cc>
Date: Mon, 4 May 2020 18:52:28 +0200
Subject: net: dsa: felix: allow the device to be disabled

If there is no specific configuration of the felix switch in the device
tree, but only the default configuration (ie. given by the SoCs dtsi
file), the probe fails because no CPU port has been set. On the other
hand you cannot set a default CPU port because that depends on the
actual board using the switch.

[    2.701300] DSA: tree 0 has no CPU port
[    2.705167] mscc_felix 0000:00:00.5: Failed to register DSA switch: -22
[    2.711844] mscc_felix: probe of 0000:00:00.5 failed with error -22

Thus let the device tree disable this device entirely, like it is also
done with the enetc driver of the same SoC.

Signed-off-by: Michael Walle <michael@walle.cc>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/ocelot/felix.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c
index 8a633ddce6c5..e5b6748f6654 100644
--- a/drivers/net/dsa/ocelot/felix.c
+++ b/drivers/net/dsa/ocelot/felix.c
@@ -773,6 +773,11 @@ static int felix_pci_probe(struct pci_dev *pdev,
 	struct felix *felix;
 	int err;
 
+	if (pdev->dev.of_node && !of_device_is_available(pdev->dev.of_node)) {
+		dev_info(&pdev->dev, "device is disabled, skipping\n");
+		return -ENODEV;
+	}
+
 	err = pci_enable_device(pdev);
 	if (err) {
 		dev_err(&pdev->dev, "device enable failed\n");
-- 
cgit v1.2.3-59-g8ed1b


From b86cd700edd3bfe27f631649727b7796067bb3fd Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Mon, 4 May 2020 19:27:00 +0200
Subject: net: add helper eth_hw_addr_crc

Several drivers use the same code as basis for filter hashes. Therefore
let's factor it out to a helper. This way drivers don't have to access
struct netdev_hw_addr internals.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/etherdevice.h | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h
index 8801f1f986e5..2e5debc0373c 100644
--- a/include/linux/etherdevice.h
+++ b/include/linux/etherdevice.h
@@ -20,6 +20,7 @@
 #include <linux/if_ether.h>
 #include <linux/netdevice.h>
 #include <linux/random.h>
+#include <linux/crc32.h>
 #include <asm/unaligned.h>
 #include <asm/bitsperlong.h>
 
@@ -265,6 +266,17 @@ static inline void eth_hw_addr_random(struct net_device *dev)
 	eth_random_addr(dev->dev_addr);
 }
 
+/**
+ * eth_hw_addr_crc - Calculate CRC from netdev_hw_addr
+ * @ha: pointer to hardware address
+ *
+ * Calculate CRC from a hardware address as basis for filter hashes.
+ */
+static inline u32 eth_hw_addr_crc(struct netdev_hw_addr *ha)
+{
+	return ether_crc(ETH_ALEN, ha->addr);
+}
+
 /**
  * ether_addr_copy - Copy an Ethernet address
  * @dst: Pointer to a six-byte array Ethernet address destination
-- 
cgit v1.2.3-59-g8ed1b


From bc54ac3609aa0361dfeb15758b7bacf3637f6d4a Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Mon, 4 May 2020 19:28:21 +0200
Subject: r8169: use new helper eth_hw_addr_crc

Use new helper eth_hw_addr_crc to simplify the code.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 8b665f2ec21f..2f6512ed0a78 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -19,7 +19,6 @@
 #include <linux/ethtool.h>
 #include <linux/phy.h>
 #include <linux/if_vlan.h>
-#include <linux/crc32.h>
 #include <linux/in.h>
 #include <linux/io.h>
 #include <linux/ip.h>
@@ -2610,7 +2609,7 @@ static void rtl_set_rx_mode(struct net_device *dev)
 
 		mc_filter[1] = mc_filter[0] = 0;
 		netdev_for_each_mc_addr(ha, dev) {
-			u32 bit_nr = ether_crc(ETH_ALEN, ha->addr) >> 26;
+			u32 bit_nr = eth_hw_addr_crc(ha) >> 26;
 			mc_filter[bit_nr >> 5] |= BIT(bit_nr & 31);
 		}
 
-- 
cgit v1.2.3-59-g8ed1b


From 8456c54408a21cddc5c5a3b35d2e77ddd58d20bc Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Mon, 4 May 2020 12:58:56 -0500
Subject: dt-bindings: net: add IPA iommus property

The IPA accesses "IMEM" and main system memory through an SMMU, so
its DT node requires an iommus property to define range of stream IDs
it uses.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/qcom,ipa.yaml | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/net/qcom,ipa.yaml b/Documentation/devicetree/bindings/net/qcom,ipa.yaml
index 140f15245654..7b749fc04c32 100644
--- a/Documentation/devicetree/bindings/net/qcom,ipa.yaml
+++ b/Documentation/devicetree/bindings/net/qcom,ipa.yaml
@@ -20,7 +20,10 @@ description:
   The GSI is an integral part of the IPA, but it is logically isolated
   and has a distinct interrupt and a separately-defined address space.
 
-  See also soc/qcom/qcom,smp2p.txt and interconnect/interconnect.txt.
+  See also soc/qcom/qcom,smp2p.txt and interconnect/interconnect.txt.  See
+  iommu/iommu.txt and iommu/arm,smmu.yaml for more information about SMMU
+  bindings.
+
 
   - |
     --------             ---------
@@ -54,6 +57,9 @@ properties:
       - const: ipa-shared
       - const: gsi
 
+  iommus:
+    maxItems: 1
+
   clocks:
     maxItems: 1
 
@@ -126,6 +132,7 @@ properties:
 
 required:
   - compatible
+  - iommus
   - reg
   - clocks
   - interrupts
@@ -164,6 +171,7 @@ examples:
                 modem-init;
                 modem-remoteproc = <&mss_pil>;
 
+                iommus = <&apps_smmu 0x720 0x3>;
                 reg = <0 0x1e40000 0 0x7000>,
                         <0 0x1e47000 0 0x2000>,
                         <0 0x1e04000 0 0x2c000>;
-- 
cgit v1.2.3-59-g8ed1b


From 3128aae8c439af18048167e3cd5e31680cd190b9 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Mon, 4 May 2020 12:58:57 -0500
Subject: net: ipa: redefine struct ipa_mem_data

The ipa_mem_data structure type was never actually used.  Instead,
the IPA memory regions were defined using the ipa_mem structure.

Redefine struct ipa_mem_data so it encapsulates the array of IPA-local
memory region descriptors along with the count of entries in that
array.  Pass just an ipa_mem structure pointer to ipa_mem_init().

Rename the ipa_mem_data[] array ipa_mem_local_data[] to emphasize
that the memory regions it defines are IPA-local memory.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_data-sc7180.c | 10 +++++++---
 drivers/net/ipa/ipa_data-sdm845.c | 10 +++++++---
 drivers/net/ipa/ipa_data.h        | 13 +++++--------
 drivers/net/ipa/ipa_main.c        |  2 +-
 drivers/net/ipa/ipa_mem.c         |  9 +++++----
 drivers/net/ipa/ipa_mem.h         |  3 ++-
 6 files changed, 27 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ipa/ipa_data-sc7180.c b/drivers/net/ipa/ipa_data-sc7180.c
index 042b5fc3c135..f97e7e4e61c1 100644
--- a/drivers/net/ipa/ipa_data-sc7180.c
+++ b/drivers/net/ipa/ipa_data-sc7180.c
@@ -193,7 +193,7 @@ static const struct ipa_resource_data ipa_resource_data = {
 };
 
 /* IPA-resident memory region configuration for the SC7180 SoC. */
-static const struct ipa_mem ipa_mem_data[] = {
+static const struct ipa_mem ipa_mem_local_data[] = {
 	[IPA_MEM_UC_SHARED] = {
 		.offset		= 0x0000,
 		.size		= 0x0080,
@@ -296,12 +296,16 @@ static const struct ipa_mem ipa_mem_data[] = {
 	},
 };
 
+static struct ipa_mem_data ipa_mem_data = {
+	.local_count	= ARRAY_SIZE(ipa_mem_local_data),
+	.local		= ipa_mem_local_data,
+};
+
 /* Configuration data for the SC7180 SoC. */
 const struct ipa_data ipa_data_sc7180 = {
 	.version	= IPA_VERSION_4_2,
 	.endpoint_count	= ARRAY_SIZE(ipa_gsi_endpoint_data),
 	.endpoint_data	= ipa_gsi_endpoint_data,
 	.resource_data	= &ipa_resource_data,
-	.mem_count	= ARRAY_SIZE(ipa_mem_data),
-	.mem_data	= ipa_mem_data,
+	.mem_data	= &ipa_mem_data,
 };
diff --git a/drivers/net/ipa/ipa_data-sdm845.c b/drivers/net/ipa/ipa_data-sdm845.c
index 0d9c36e1e806..c55507e94559 100644
--- a/drivers/net/ipa/ipa_data-sdm845.c
+++ b/drivers/net/ipa/ipa_data-sdm845.c
@@ -235,7 +235,7 @@ static const struct ipa_resource_data ipa_resource_data = {
 };
 
 /* IPA-resident memory region configuration for the SDM845 SoC. */
-static const struct ipa_mem ipa_mem_data[] = {
+static const struct ipa_mem ipa_mem_local_data[] = {
 	[IPA_MEM_UC_SHARED] = {
 		.offset		= 0x0000,
 		.size		= 0x0080,
@@ -318,12 +318,16 @@ static const struct ipa_mem ipa_mem_data[] = {
 	},
 };
 
+static struct ipa_mem_data ipa_mem_data = {
+	.local_count	= ARRAY_SIZE(ipa_mem_local_data),
+	.local		= ipa_mem_local_data,
+};
+
 /* Configuration data for the SDM845 SoC. */
 const struct ipa_data ipa_data_sdm845 = {
 	.version	= IPA_VERSION_3_5_1,
 	.endpoint_count	= ARRAY_SIZE(ipa_gsi_endpoint_data),
 	.endpoint_data	= ipa_gsi_endpoint_data,
 	.resource_data	= &ipa_resource_data,
-	.mem_count	= ARRAY_SIZE(ipa_mem_data),
-	.mem_data	= ipa_mem_data,
+	.mem_data	= &ipa_mem_data,
 };
diff --git a/drivers/net/ipa/ipa_data.h b/drivers/net/ipa/ipa_data.h
index 7110de2de817..51d8e5a6f23a 100644
--- a/drivers/net/ipa/ipa_data.h
+++ b/drivers/net/ipa/ipa_data.h
@@ -246,14 +246,12 @@ struct ipa_resource_data {
 
 /**
  * struct ipa_mem - IPA-local memory region description
- * @offset:		offset in IPA memory space to base of the region
- * @size:		size in bytes base of the region
- * @canary_count:	number of 32-bit "canary" values that precede region
+ * @local_count:	number of regions defined in the local[] array
+ * @local:		array of IPA-local memory region descriptors
  */
 struct ipa_mem_data {
-	u32 offset;
-	u16 size;
-	u16 canary_count;
+	u32 local_count;
+	const struct ipa_mem *local;
 };
 
 /**
@@ -270,8 +268,7 @@ struct ipa_data {
 	u32 endpoint_count;	/* # entries in endpoint_data[] */
 	const struct ipa_gsi_endpoint_data *endpoint_data;
 	const struct ipa_resource_data *resource_data;
-	u32 mem_count;		/* # entries in mem_data[] */
-	const struct ipa_mem *mem_data;
+	const struct ipa_mem_data *mem_data;
 };
 
 extern const struct ipa_data ipa_data_sdm845;
diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c
index 28998dcce3d2..9295a9122e8e 100644
--- a/drivers/net/ipa/ipa_main.c
+++ b/drivers/net/ipa/ipa_main.c
@@ -778,7 +778,7 @@ static int ipa_probe(struct platform_device *pdev)
 	if (ret)
 		goto err_kfree_ipa;
 
-	ret = ipa_mem_init(ipa, data->mem_count, data->mem_data);
+	ret = ipa_mem_init(ipa, data->mem_data);
 	if (ret)
 		goto err_reg_exit;
 
diff --git a/drivers/net/ipa/ipa_mem.c b/drivers/net/ipa/ipa_mem.c
index 42d2c29d9f0c..fb4de2a12796 100644
--- a/drivers/net/ipa/ipa_mem.c
+++ b/drivers/net/ipa/ipa_mem.c
@@ -12,6 +12,7 @@
 
 #include "ipa.h"
 #include "ipa_reg.h"
+#include "ipa_data.h"
 #include "ipa_cmd.h"
 #include "ipa_mem.h"
 #include "ipa_data.h"
@@ -266,15 +267,15 @@ int ipa_mem_zero_modem(struct ipa *ipa)
 }
 
 /* Perform memory region-related initialization */
-int ipa_mem_init(struct ipa *ipa, u32 count, const struct ipa_mem *mem)
+int ipa_mem_init(struct ipa *ipa, const struct ipa_mem_data *mem_data)
 {
 	struct device *dev = &ipa->pdev->dev;
 	struct resource *res;
 	int ret;
 
-	if (count > IPA_MEM_COUNT) {
+	if (mem_data->local_count > IPA_MEM_COUNT) {
 		dev_err(dev, "to many memory regions (%u > %u)\n",
-			count, IPA_MEM_COUNT);
+			mem_data->local_count, IPA_MEM_COUNT);
 		return -EINVAL;
 	}
 
@@ -302,7 +303,7 @@ int ipa_mem_init(struct ipa *ipa, u32 count, const struct ipa_mem *mem)
 	ipa->mem_size = resource_size(res);
 
 	/* The ipa->mem[] array is indexed by enum ipa_mem_id values */
-	ipa->mem = mem;
+	ipa->mem = mem_data->local;
 
 	return 0;
 }
diff --git a/drivers/net/ipa/ipa_mem.h b/drivers/net/ipa/ipa_mem.h
index 065cb499ebe5..f99180f84f0d 100644
--- a/drivers/net/ipa/ipa_mem.h
+++ b/drivers/net/ipa/ipa_mem.h
@@ -7,6 +7,7 @@
 #define _IPA_MEM_H_
 
 struct ipa;
+struct ipa_mem_data;
 
 /**
  * DOC: IPA Local Memory
@@ -84,7 +85,7 @@ void ipa_mem_teardown(struct ipa *ipa);
 
 int ipa_mem_zero_modem(struct ipa *ipa);
 
-int ipa_mem_init(struct ipa *ipa, u32 count, const struct ipa_mem *mem);
+int ipa_mem_init(struct ipa *ipa, const struct ipa_mem_data *mem_data);
 void ipa_mem_exit(struct ipa *ipa);
 
 #endif /* _IPA_MEM_H_ */
-- 
cgit v1.2.3-59-g8ed1b


From 3e313c3f5a36c7e8e6593ed2f6818795210347eb Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Mon, 4 May 2020 12:58:58 -0500
Subject: net: ipa: define IMEM memory region for IPA

Define a region of IMEM memory available for use by IPA in the
platform configuration data.  Initialize it from ipa_mem_init().
The memory must be mapped for access through an SMMU.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa.h             |  5 +++
 drivers/net/ipa/ipa_data-sc7180.c |  2 +
 drivers/net/ipa/ipa_data-sdm845.c |  2 +
 drivers/net/ipa/ipa_data.h        |  6 ++-
 drivers/net/ipa/ipa_mem.c         | 84 +++++++++++++++++++++++++++++++++++++++
 5 files changed, 98 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ipa/ipa.h b/drivers/net/ipa/ipa.h
index 23fb29889e5a..32f6dfafdb05 100644
--- a/drivers/net/ipa/ipa.h
+++ b/drivers/net/ipa/ipa.h
@@ -47,6 +47,8 @@ struct ipa_interrupt;
  * @mem_offset:		Offset from @mem_virt used for access to IPA memory
  * @mem_size:		Total size (bytes) of memory at @mem_virt
  * @mem:		Array of IPA-local memory region descriptors
+ * @imem_iova:		I/O virtual address of IPA region in IMEM
+ * @imem_size;		Size of IMEM region
  * @zero_addr:		DMA address of preallocated zero-filled memory
  * @zero_virt:		Virtual address of preallocated zero-filled memory
  * @zero_size:		Size (bytes) of preallocated zero-filled memory
@@ -88,6 +90,9 @@ struct ipa {
 	u32 mem_size;
 	const struct ipa_mem *mem;
 
+	unsigned long imem_iova;
+	size_t imem_size;
+
 	dma_addr_t zero_addr;
 	void *zero_virt;
 	size_t zero_size;
diff --git a/drivers/net/ipa/ipa_data-sc7180.c b/drivers/net/ipa/ipa_data-sc7180.c
index f97e7e4e61c1..e9007d151c68 100644
--- a/drivers/net/ipa/ipa_data-sc7180.c
+++ b/drivers/net/ipa/ipa_data-sc7180.c
@@ -299,6 +299,8 @@ static const struct ipa_mem ipa_mem_local_data[] = {
 static struct ipa_mem_data ipa_mem_data = {
 	.local_count	= ARRAY_SIZE(ipa_mem_local_data),
 	.local		= ipa_mem_local_data,
+	.imem_addr	= 0x146a8000,
+	.imem_size	= 0x00002000,
 };
 
 /* Configuration data for the SC7180 SoC. */
diff --git a/drivers/net/ipa/ipa_data-sdm845.c b/drivers/net/ipa/ipa_data-sdm845.c
index c55507e94559..c0e207085550 100644
--- a/drivers/net/ipa/ipa_data-sdm845.c
+++ b/drivers/net/ipa/ipa_data-sdm845.c
@@ -321,6 +321,8 @@ static const struct ipa_mem ipa_mem_local_data[] = {
 static struct ipa_mem_data ipa_mem_data = {
 	.local_count	= ARRAY_SIZE(ipa_mem_local_data),
 	.local		= ipa_mem_local_data,
+	.imem_addr	= 0x146bd000,
+	.imem_size	= 0x00002000,
 };
 
 /* Configuration data for the SDM845 SoC. */
diff --git a/drivers/net/ipa/ipa_data.h b/drivers/net/ipa/ipa_data.h
index 51d8e5a6f23a..69957af56ccd 100644
--- a/drivers/net/ipa/ipa_data.h
+++ b/drivers/net/ipa/ipa_data.h
@@ -245,13 +245,17 @@ struct ipa_resource_data {
 };
 
 /**
- * struct ipa_mem - IPA-local memory region description
+ * struct ipa_mem - description of IPA memory regions
  * @local_count:	number of regions defined in the local[] array
  * @local:		array of IPA-local memory region descriptors
+ * @imem_addr:		physical address of IPA region within IMEM
+ * @imem_size:		size in bytes of IPA IMEM region
  */
 struct ipa_mem_data {
 	u32 local_count;
 	const struct ipa_mem *local;
+	u32 imem_addr;
+	u32 imem_size;
 };
 
 /**
diff --git a/drivers/net/ipa/ipa_mem.c b/drivers/net/ipa/ipa_mem.c
index fb4de2a12796..3c0916597fe1 100644
--- a/drivers/net/ipa/ipa_mem.c
+++ b/drivers/net/ipa/ipa_mem.c
@@ -8,6 +8,7 @@
 #include <linux/bitfield.h>
 #include <linux/bug.h>
 #include <linux/dma-mapping.h>
+#include <linux/iommu.h>
 #include <linux/io.h>
 
 #include "ipa.h"
@@ -266,6 +267,79 @@ int ipa_mem_zero_modem(struct ipa *ipa)
 	return 0;
 }
 
+/**
+ * ipa_imem_init() - Initialize IMEM memory used by the IPA
+ * @ipa:	IPA pointer
+ * @addr:	Physical address of the IPA region in IMEM
+ * @size:	Size (bytes) of the IPA region in IMEM
+ *
+ * IMEM is a block of shared memory separate from system DRAM, and
+ * a portion of this memory is available for the IPA to use.  The
+ * modem accesses this memory directly, but the IPA accesses it
+ * via the IOMMU, using the AP's credentials.
+ *
+ * If this region exists (size > 0) we map it for read/write access
+ * through the IOMMU using the IPA device.
+ *
+ * Note: @addr and @size are not guaranteed to be page-aligned.
+ */
+static int ipa_imem_init(struct ipa *ipa, unsigned long addr, size_t size)
+{
+	struct device *dev = &ipa->pdev->dev;
+	struct iommu_domain *domain;
+	unsigned long iova;
+	phys_addr_t phys;
+	int ret;
+
+	if (!size)
+		return 0;	/* IMEM memory not used */
+
+	domain = iommu_get_domain_for_dev(dev);
+	if (!domain) {
+		dev_err(dev, "no IOMMU domain found for IMEM\n");
+		return -EINVAL;
+	}
+
+	/* Align the address down and the size up to page boundaries */
+	phys = addr & PAGE_MASK;
+	size = PAGE_ALIGN(size + addr - phys);
+	iova = phys;	/* We just want a direct mapping */
+
+	ret = iommu_map(domain, iova, phys, size, IOMMU_READ | IOMMU_WRITE);
+	if (ret)
+		return ret;
+
+	ipa->imem_iova = iova;
+	ipa->imem_size = size;
+
+	return 0;
+}
+
+static void ipa_imem_exit(struct ipa *ipa)
+{
+	struct iommu_domain *domain;
+	struct device *dev;
+
+	if (!ipa->imem_size)
+		return;
+
+	dev = &ipa->pdev->dev;
+	domain = iommu_get_domain_for_dev(dev);
+	if (domain) {
+		size_t size;
+
+		size = iommu_unmap(domain, ipa->imem_iova, ipa->imem_size);
+		if (size != ipa->imem_size)
+			dev_warn(dev, "unmapped %zu IMEM bytes, expected %lu\n",
+				 size, ipa->imem_size);
+	} else {
+		dev_err(dev, "couldn't get IPA IOMMU domain for IMEM\n");
+	}
+
+	ipa->imem_size = 0;
+	ipa->imem_iova = 0;
+}
+
 /* Perform memory region-related initialization */
 int ipa_mem_init(struct ipa *ipa, const struct ipa_mem_data *mem_data)
 {
@@ -305,11 +379,21 @@ int ipa_mem_init(struct ipa *ipa, const struct ipa_mem_data *mem_data)
 	/* The ipa->mem[] array is indexed by enum ipa_mem_id values */
 	ipa->mem = mem_data->local;
 
+	ret = ipa_imem_init(ipa, mem_data->imem_addr, mem_data->imem_size);
+	if (ret)
+		goto err_unmap;
+
 	return 0;
+
+err_unmap:
+	memunmap(ipa->mem_virt);
+
+	return ret;
 }
 
 /* Inverse of ipa_mem_init() */
 void ipa_mem_exit(struct ipa *ipa)
 {
+	ipa_imem_exit(ipa);
 	memunmap(ipa->mem_virt);
 }
-- 
cgit v1.2.3-59-g8ed1b


From a0036bb413d5b28b5b7b3d217f52909511b7c8ae Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Mon, 4 May 2020 12:58:59 -0500
Subject: net: ipa: define SMEM memory region for IPA

Arrange to use an item from SMEM memory for IPA.  SMEM item number
497 is designated to be used by the IPA.  Specify the item ID and
size of the region in platform configuration data.  Allocate and get
a pointer to this region from ipa_mem_init().  The memory must be
mapped for access through an SMMU.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa.h             |   5 ++
 drivers/net/ipa/ipa_data-sc7180.c |   2 +
 drivers/net/ipa/ipa_data-sdm845.c |   2 +
 drivers/net/ipa/ipa_data.h        |   4 ++
 drivers/net/ipa/ipa_mem.c         | 116 ++++++++++++++++++++++++++++++++++++++
 5 files changed, 129 insertions(+)

diff --git a/drivers/net/ipa/ipa.h b/drivers/net/ipa/ipa.h
index 32f6dfafdb05..b10a85392952 100644
--- a/drivers/net/ipa/ipa.h
+++ b/drivers/net/ipa/ipa.h
@@ -49,6 +49,8 @@ struct ipa_interrupt;
  * @mem:		Array of IPA-local memory region descriptors
  * @imem_iova:		I/O virtual address of IPA region in IMEM
  * @imem_size;		Size of IMEM region
+ * @smem_iova:		I/O virtual address of IPA region in SMEM
+ * @smem_size;		Size of SMEM region
  * @zero_addr:		DMA address of preallocated zero-filled memory
  * @zero_virt:		Virtual address of preallocated zero-filled memory
  * @zero_size:		Size (bytes) of preallocated zero-filled memory
@@ -93,6 +95,9 @@ struct ipa {
 	unsigned long imem_iova;
 	size_t imem_size;
 
+	unsigned long smem_iova;
+	size_t smem_size;
+
 	dma_addr_t zero_addr;
 	void *zero_virt;
 	size_t zero_size;
diff --git a/drivers/net/ipa/ipa_data-sc7180.c b/drivers/net/ipa/ipa_data-sc7180.c
index e9007d151c68..43faa35ae726 100644
--- a/drivers/net/ipa/ipa_data-sc7180.c
+++ b/drivers/net/ipa/ipa_data-sc7180.c
@@ -301,6 +301,8 @@ static struct ipa_mem_data ipa_mem_data = {
 	.local		= ipa_mem_local_data,
 	.imem_addr	= 0x146a8000,
 	.imem_size	= 0x00002000,
+	.smem_id	= 497,
+	.smem_size	= 0x00002000,
 };
 
 /* Configuration data for the SC7180 SoC. */
diff --git a/drivers/net/ipa/ipa_data-sdm845.c b/drivers/net/ipa/ipa_data-sdm845.c
index c0e207085550..f7ba85717edf 100644
--- a/drivers/net/ipa/ipa_data-sdm845.c
+++ b/drivers/net/ipa/ipa_data-sdm845.c
@@ -323,6 +323,8 @@ static struct ipa_mem_data ipa_mem_data = {
 	.local		= ipa_mem_local_data,
 	.imem_addr	= 0x146bd000,
 	.imem_size	= 0x00002000,
+	.smem_id	= 497,
+	.smem_size	= 0x00002000,
 };
 
 /* Configuration data for the SDM845 SoC. */
diff --git a/drivers/net/ipa/ipa_data.h b/drivers/net/ipa/ipa_data.h
index 69957af56ccd..16dfd74717b1 100644
--- a/drivers/net/ipa/ipa_data.h
+++ b/drivers/net/ipa/ipa_data.h
@@ -250,12 +250,16 @@ struct ipa_resource_data {
  * @local:		array of IPA-local memory region descriptors
  * @imem_addr:		physical address of IPA region within IMEM
  * @imem_size:		size in bytes of IPA IMEM region
+ * @smem_id:		item identifier for IPA region within SMEM memory
+ * @imem_size:		size in bytes of the IPA SMEM region
  */
 struct ipa_mem_data {
 	u32 local_count;
 	const struct ipa_mem *local;
 	u32 imem_addr;
 	u32 imem_size;
+	u32 smem_id;
+	u32 smem_size;
 };
 
 /**
diff --git a/drivers/net/ipa/ipa_mem.c b/drivers/net/ipa/ipa_mem.c
index 3c0916597fe1..aa8f6b0f3d50 100644
--- a/drivers/net/ipa/ipa_mem.c
+++ b/drivers/net/ipa/ipa_mem.c
@@ -10,6 +10,7 @@
 #include <linux/dma-mapping.h>
 #include <linux/iommu.h>
 #include <linux/io.h>
+#include <linux/soc/qcom/smem.h>
 
 #include "ipa.h"
 #include "ipa_reg.h"
@@ -23,6 +24,9 @@
 /* "Canary" value placed between memory regions to detect overflow */
 #define IPA_MEM_CANARY_VAL		cpu_to_le32(0xdeadbeef)
 
+/* SMEM host id representing the modem. */
+#define QCOM_SMEM_HOST_MODEM	1
+
 /* Add an immediate command to a transaction that zeroes a memory region */
 static void
 ipa_mem_zero_region_add(struct gsi_trans *trans, const struct ipa_mem *mem)
@@ -340,6 +344,111 @@ static void ipa_imem_exit(struct ipa *ipa)
 	ipa->imem_iova = 0;
 }
 
+/**
+ * ipa_smem_init() - Initialize SMEM memory used by the IPA
+ * @ipa:	IPA pointer
+ * @item:	Item ID of SMEM memory
+ * @size:	Size (bytes) of SMEM memory region
+ *
+ * SMEM is a managed block of shared DRAM, from which numbered "items"
+ * can be allocated.  One item is designated for use by the IPA.
+ *
+ * The modem accesses SMEM memory directly, but the IPA accesses it
+ * via the IOMMU, using the AP's credentials.
+ *
+ * If size provided is non-zero, we allocate it and map it for
+ * access through the IOMMU.
+ *
+ * Note: @size and the item address are is not guaranteed to be page-aligned.
+ */
+static int ipa_smem_init(struct ipa *ipa, u32 item, size_t size)
+{
+	struct device *dev = &ipa->pdev->dev;
+	struct iommu_domain *domain;
+	unsigned long iova;
+	phys_addr_t phys;
+	phys_addr_t addr;
+	size_t actual;
+	void *virt;
+	int ret;
+
+	if (!size)
+		return 0;	/* SMEM memory not used */
+
+	/* SMEM is memory shared between the AP and another system entity
+	 * (in this case, the modem).  An allocation from SMEM is persistent
+	 * until the AP reboots; there is no way to free an allocated SMEM
+	 * region.  Allocation only reserves the space; to use it you need
+	 * to "get" a pointer it (this implies no reference counting).
+	 * The item might have already been allocated, in which case we
+	 * use it unless the size isn't what we expect.
+	 */
+	ret = qcom_smem_alloc(QCOM_SMEM_HOST_MODEM, item, size);
+	if (ret && ret != -EEXIST) {
+		dev_err(dev, "error %d allocating size %zu SMEM item %u\n",
+			ret, size, item);
+		return ret;
+	}
+
+	/* Now get the address of the SMEM memory region */
+	virt = qcom_smem_get(QCOM_SMEM_HOST_MODEM, item, &actual);
+	if (IS_ERR(virt)) {
+		ret = PTR_ERR(virt);
+		dev_err(dev, "error %d getting SMEM item %u\n", ret, item);
+		return ret;
+	}
+
+	/* In case the region was already allocated, verify the size */
+	if (ret && actual != size) {
+		dev_err(dev, "SMEM item %u has size %zu, expected %zu\n",
+			item, actual, size);
+		return -EINVAL;
+	}
+
+	domain = iommu_get_domain_for_dev(dev);
+	if (!domain) {
+		dev_err(dev, "no IOMMU domain found for SMEM\n");
+		return -EINVAL;
+	}
+
+	/* Align the address down and the size up to a page boundary */
+	addr = qcom_smem_virt_to_phys(virt) & PAGE_MASK;
+	phys = addr & PAGE_MASK;
+	size = PAGE_ALIGN(size + addr - phys);
+	iova = phys;	/* We just want a direct mapping */
+
+	ret = iommu_map(domain, iova, phys, size, IOMMU_READ | IOMMU_WRITE);
+	if (ret)
+		return ret;
+
+	ipa->smem_iova = iova;
+	ipa->smem_size = size;
+
+	return 0;
+}
+
+static void ipa_smem_exit(struct ipa *ipa)
+{
+	struct device *dev = &ipa->pdev->dev;
+	struct iommu_domain *domain;
+
+	domain = iommu_get_domain_for_dev(dev);
+	if (domain) {
+		size_t size;
+
+		size = iommu_unmap(domain, ipa->smem_iova, ipa->smem_size);
+		if (size != ipa->smem_size)
+			dev_warn(dev, "unmapped %zu SMEM bytes, expected %lu\n",
+				 size, ipa->smem_size);
+
+	} else {
+		dev_err(dev, "couldn't get IPA IOMMU domain for SMEM\n");
+	}
+
+	ipa->smem_size = 0;
+	ipa->smem_iova = 0;
+}
+
 /* Perform memory region-related initialization */
 int ipa_mem_init(struct ipa *ipa, const struct ipa_mem_data *mem_data)
 {
@@ -383,8 +492,14 @@ int ipa_mem_init(struct ipa *ipa, const struct ipa_mem_data *mem_data)
 	if (ret)
 		goto err_unmap;
 
+	ret = ipa_smem_init(ipa, mem_data->smem_id, mem_data->smem_size);
+	if (ret)
+		goto err_imem_exit;
+
 	return 0;
 
+err_imem_exit:
+	ipa_imem_exit(ipa);
 err_unmap:
 	memunmap(ipa->mem_virt);
 
@@ -394,6 +509,7 @@ err_unmap:
 /* Inverse of ipa_mem_init() */
 void ipa_mem_exit(struct ipa *ipa)
 {
+	ipa_smem_exit(ipa);
 	ipa_imem_exit(ipa);
 	memunmap(ipa->mem_virt);
 }
-- 
cgit v1.2.3-59-g8ed1b


From bf6dba76d278d296b385b436d3ac7de56c190d44 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Thu, 30 Apr 2020 13:42:22 +0200
Subject: net: sched: fallback to qdisc noqueue if default qdisc setup fail

Currently if the default qdisc setup/init fails, the device ends up with
qdisc "noop", which causes all TX packets to get dropped.

With the introduction of sysctl net/core/default_qdisc it is possible
to change the default qdisc to be more advanced, which opens for the
possibility that Qdisc_ops->init() can fail.

This patch detect these kind of failures, and choose to fallback to
qdisc "noqueue", which is so simple that its init call will not fail.
This allows the interface to continue functioning.

V2:
As this also captures memory failures, which are transient, the
device is not kept in IFF_NO_QUEUE state.  This allows the net_device
to retry to default qdisc assignment.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_generic.c | 17 ++++++++++++++---
 1 file changed, 14 insertions(+), 3 deletions(-)

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 2efd5b61acef..ad24fa1a51e6 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -1037,10 +1037,9 @@ static void attach_one_default_qdisc(struct net_device *dev,
 		ops = &pfifo_fast_ops;
 
 	qdisc = qdisc_create_dflt(dev_queue, ops, TC_H_ROOT, NULL);
-	if (!qdisc) {
-		netdev_info(dev, "activation failed\n");
+	if (!qdisc)
 		return;
-	}
+
 	if (!netif_is_multiqueue(dev))
 		qdisc->flags |= TCQ_F_ONETXQUEUE | TCQ_F_NOPARENT;
 	dev_queue->qdisc_sleeping = qdisc;
@@ -1065,6 +1064,18 @@ static void attach_default_qdiscs(struct net_device *dev)
 			qdisc->ops->attach(qdisc);
 		}
 	}
+
+	/* Detect default qdisc setup/init failed and fallback to "noqueue" */
+	if (dev->qdisc == &noop_qdisc) {
+		netdev_warn(dev, "default qdisc (%s) fail, fallback to %s\n",
+			    default_qdisc_ops->id, noqueue_qdisc_ops.id);
+		dev->priv_flags |= IFF_NO_QUEUE;
+		netdev_for_each_tx_queue(dev, attach_one_default_qdisc, NULL);
+		dev->qdisc = txq->qdisc_sleeping;
+		qdisc_refcount_inc(dev->qdisc);
+		dev->priv_flags ^= IFF_NO_QUEUE;
+	}
+
 #ifdef CONFIG_NET_SCHED
 	if (dev->qdisc != &noop_qdisc)
 		qdisc_hash_add(dev->qdisc, false);
-- 
cgit v1.2.3-59-g8ed1b


From 39d010504e6b4485d7ceee167743620dd33f4417 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 1 May 2020 07:07:41 -0700
Subject: net_sched: sch_fq: add horizon attribute

QUIC servers would like to use SO_TXTIME, without having CAP_NET_ADMIN,
to efficiently pace UDP packets.

As far as sch_fq is concerned, we need to add safety checks, so
that a buggy application does not fill the qdisc with packets
having delivery time far in the future.

This patch adds a configurable horizon (default: 10 seconds),
and a configurable policy when a packet is beyond the horizon
at enqueue() time:
- either drop the packet (default policy)
- or cap its delivery time to the horizon.

$ tc -s -d qd sh dev eth0
qdisc fq 8022: root refcnt 257 limit 10000p flow_limit 100p buckets 1024
 orphan_mask 1023 quantum 10Kb initial_quantum 51160b low_rate_threshold 550Kbit
 refill_delay 40.0ms timer_slack 10.000us horizon 10.000s
 Sent 1234215879 bytes 837099 pkt (dropped 21, overlimits 0 requeues 6)
 backlog 0b 0p requeues 6
  flows 1191 (inactive 1177 throttled 0)
  gc 0 highprio 0 throttled 692 latency 11.480us
  pkts_too_long 0 alloc_errors 0 horizon_drops 21 horizon_caps 0

v2: fixed an overflow on 32bit kernels in fq_init(), reported
    by kbuild test robot <lkp@intel.com>

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Willem de Bruijn <willemb@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/pkt_sched.h |  6 +++++
 net/sched/sch_fq.c             | 59 ++++++++++++++++++++++++++++++++++++++----
 2 files changed, 60 insertions(+), 5 deletions(-)

diff --git a/include/uapi/linux/pkt_sched.h b/include/uapi/linux/pkt_sched.h
index 0c02737c8f47..a95f3ae7ab37 100644
--- a/include/uapi/linux/pkt_sched.h
+++ b/include/uapi/linux/pkt_sched.h
@@ -913,6 +913,10 @@ enum {
 
 	TCA_FQ_TIMER_SLACK,	/* timer slack */
 
+	TCA_FQ_HORIZON,		/* time horizon in us */
+
+	TCA_FQ_HORIZON_DROP,	/* drop packets beyond horizon, or cap their EDT */
+
 	__TCA_FQ_MAX
 };
 
@@ -932,6 +936,8 @@ struct tc_fq_qd_stats {
 	__u32	throttled_flows;
 	__u32	unthrottle_latency_ns;
 	__u64	ce_mark;		/* packets above ce_threshold */
+	__u64	horizon_drops;
+	__u64	horizon_caps;
 };
 
 /* Heavy-Hitter Filter */
diff --git a/net/sched/sch_fq.c b/net/sched/sch_fq.c
index 4f0104243cc2..8f06a808c59a 100644
--- a/net/sched/sch_fq.c
+++ b/net/sched/sch_fq.c
@@ -100,6 +100,7 @@ struct fq_sched_data {
 
 	struct rb_root	delayed;	/* for rate limited flows */
 	u64		time_next_delayed_flow;
+	u64		ktime_cache;	/* copy of last ktime_get_ns() */
 	unsigned long	unthrottle_latency_ns;
 
 	struct fq_flow	internal;	/* for non classified or high prio packets */
@@ -109,12 +110,13 @@ struct fq_sched_data {
 	u32		flow_plimit;	/* max packets per flow */
 	unsigned long	flow_max_rate;	/* optional max rate per flow */
 	u64		ce_threshold;
+	u64		horizon;	/* horizon in ns */
 	u32		orphan_mask;	/* mask for orphaned skb */
 	u32		low_rate_threshold;
 	struct rb_root	*fq_root;
 	u8		rate_enable;
 	u8		fq_trees_log;
-
+	u8		horizon_drop;
 	u32		flows;
 	u32		inactive_flows;
 	u32		throttled_flows;
@@ -123,6 +125,8 @@ struct fq_sched_data {
 	u64		stat_internal_packets;
 	u64		stat_throttled;
 	u64		stat_ce_mark;
+	u64		stat_horizon_drops;
+	u64		stat_horizon_caps;
 	u64		stat_flows_plimit;
 	u64		stat_pkts_too_long;
 	u64		stat_allocation_errors;
@@ -402,8 +406,6 @@ static void flow_queue_add(struct fq_flow *flow, struct sk_buff *skb)
 	struct rb_node **p, *parent;
 	struct sk_buff *head, *aux;
 
-	fq_skb_cb(skb)->time_to_send = skb->tstamp ?: ktime_get_ns();
-
 	head = flow->head;
 	if (!head ||
 	    fq_skb_cb(skb)->time_to_send >= fq_skb_cb(flow->tail)->time_to_send) {
@@ -431,6 +433,12 @@ static void flow_queue_add(struct fq_flow *flow, struct sk_buff *skb)
 	rb_insert_color(&skb->rbnode, &flow->t_root);
 }
 
+static bool fq_packet_beyond_horizon(const struct sk_buff *skb,
+				    const struct fq_sched_data *q)
+{
+	return unlikely((s64)skb->tstamp > (s64)(q->ktime_cache + q->horizon));
+}
+
 static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 		      struct sk_buff **to_free)
 {
@@ -440,6 +448,28 @@ static int fq_enqueue(struct sk_buff *skb, struct Qdisc *sch,
 	if (unlikely(sch->q.qlen >= sch->limit))
 		return qdisc_drop(skb, sch, to_free);
 
+	if (!skb->tstamp) {
+		fq_skb_cb(skb)->time_to_send = q->ktime_cache = ktime_get_ns();
+	} else {
+		/* Check if packet timestamp is too far in the future.
+		 * Try first if our cached value, to avoid ktime_get_ns()
+		 * cost in most cases.
+		 */
+		if (fq_packet_beyond_horizon(skb, q)) {
+			/* Refresh our cache and check another time */
+			q->ktime_cache = ktime_get_ns();
+			if (fq_packet_beyond_horizon(skb, q)) {
+				if (q->horizon_drop) {
+					q->stat_horizon_drops++;
+					return qdisc_drop(skb, sch, to_free);
+				}
+				q->stat_horizon_caps++;
+				skb->tstamp = q->ktime_cache + q->horizon;
+			}
+		}
+		fq_skb_cb(skb)->time_to_send = skb->tstamp;
+	}
+
 	f = fq_classify(skb, q);
 	if (unlikely(f->qlen >= q->flow_plimit && f != &q->internal)) {
 		q->stat_flows_plimit++;
@@ -512,7 +542,7 @@ static struct sk_buff *fq_dequeue(struct Qdisc *sch)
 		goto out;
 	}
 
-	now = ktime_get_ns();
+	q->ktime_cache = now = ktime_get_ns();
 	fq_check_throttled(q, now);
 begin:
 	head = &q->new_flows;
@@ -765,6 +795,8 @@ static const struct nla_policy fq_policy[TCA_FQ_MAX + 1] = {
 	[TCA_FQ_LOW_RATE_THRESHOLD]	= { .type = NLA_U32 },
 	[TCA_FQ_CE_THRESHOLD]		= { .type = NLA_U32 },
 	[TCA_FQ_TIMER_SLACK]		= { .type = NLA_U32 },
+	[TCA_FQ_HORIZON]		= { .type = NLA_U32 },
+	[TCA_FQ_HORIZON_DROP]		= { .type = NLA_U8 },
 };
 
 static int fq_change(struct Qdisc *sch, struct nlattr *opt,
@@ -854,7 +886,15 @@ static int fq_change(struct Qdisc *sch, struct nlattr *opt,
 	if (tb[TCA_FQ_TIMER_SLACK])
 		q->timer_slack = nla_get_u32(tb[TCA_FQ_TIMER_SLACK]);
 
+	if (tb[TCA_FQ_HORIZON])
+		q->horizon = (u64)NSEC_PER_USEC *
+				  nla_get_u32(tb[TCA_FQ_HORIZON]);
+
+	if (tb[TCA_FQ_HORIZON_DROP])
+		q->horizon_drop = nla_get_u8(tb[TCA_FQ_HORIZON_DROP]);
+
 	if (!err) {
+
 		sch_tree_unlock(sch);
 		err = fq_resize(sch, fq_log);
 		sch_tree_lock(sch);
@@ -907,6 +947,9 @@ static int fq_init(struct Qdisc *sch, struct nlattr *opt,
 
 	q->timer_slack = 10 * NSEC_PER_USEC; /* 10 usec of hrtimer slack */
 
+	q->horizon = 10ULL * NSEC_PER_SEC; /* 10 seconds */
+	q->horizon_drop = 1; /* by default, drop packets beyond horizon */
+
 	/* Default ce_threshold of 4294 seconds */
 	q->ce_threshold		= (u64)NSEC_PER_USEC * ~0U;
 
@@ -924,6 +967,7 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
 {
 	struct fq_sched_data *q = qdisc_priv(sch);
 	u64 ce_threshold = q->ce_threshold;
+	u64 horizon = q->horizon;
 	struct nlattr *opts;
 
 	opts = nla_nest_start_noflag(skb, TCA_OPTIONS);
@@ -933,6 +977,7 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
 	/* TCA_FQ_FLOW_DEFAULT_RATE is not used anymore */
 
 	do_div(ce_threshold, NSEC_PER_USEC);
+	do_div(horizon, NSEC_PER_USEC);
 
 	if (nla_put_u32(skb, TCA_FQ_PLIMIT, sch->limit) ||
 	    nla_put_u32(skb, TCA_FQ_FLOW_PLIMIT, q->flow_plimit) ||
@@ -948,7 +993,9 @@ static int fq_dump(struct Qdisc *sch, struct sk_buff *skb)
 			q->low_rate_threshold) ||
 	    nla_put_u32(skb, TCA_FQ_CE_THRESHOLD, (u32)ce_threshold) ||
 	    nla_put_u32(skb, TCA_FQ_BUCKETS_LOG, q->fq_trees_log) ||
-	    nla_put_u32(skb, TCA_FQ_TIMER_SLACK, q->timer_slack))
+	    nla_put_u32(skb, TCA_FQ_TIMER_SLACK, q->timer_slack) ||
+	    nla_put_u32(skb, TCA_FQ_HORIZON, (u32)horizon) ||
+	    nla_put_u8(skb, TCA_FQ_HORIZON_DROP, q->horizon_drop))
 		goto nla_put_failure;
 
 	return nla_nest_end(skb, opts);
@@ -979,6 +1026,8 @@ static int fq_dump_stats(struct Qdisc *sch, struct gnet_dump *d)
 	st.unthrottle_latency_ns  = min_t(unsigned long,
 					  q->unthrottle_latency_ns, ~0U);
 	st.ce_mark		  = q->stat_ce_mark;
+	st.horizon_drops	  = q->stat_horizon_drops;
+	st.horizon_caps		  = q->stat_horizon_caps;
 	sch_tree_unlock(sch);
 
 	return gnet_stats_copy_app(d, &st, sizeof(st));
-- 
cgit v1.2.3-59-g8ed1b


From dd86fec7e06ab792fe470c66a67ff42bf5d72b91 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 1 May 2020 09:40:40 -0700
Subject: devlink: factor out building a snapshot notification

We'll need to send snapshot info back on the socket
which requested a snapshot to be created. Factor out
constructing a snapshot description from the broadcast
notification code.

v3: new patch

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/devlink.c | 39 ++++++++++++++++++++++++++++-----------
 1 file changed, 28 insertions(+), 11 deletions(-)

diff --git a/net/core/devlink.c b/net/core/devlink.c
index 80f97722f31f..2b7c60c18b99 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -3716,24 +3716,26 @@ nla_put_failure:
 	return err;
 }
 
-static void devlink_nl_region_notify(struct devlink_region *region,
-				     struct devlink_snapshot *snapshot,
-				     enum devlink_command cmd)
+static struct sk_buff *
+devlink_nl_region_notify_build(struct devlink_region *region,
+			       struct devlink_snapshot *snapshot,
+			       enum devlink_command cmd, u32 portid, u32 seq)
 {
 	struct devlink *devlink = region->devlink;
 	struct sk_buff *msg;
 	void *hdr;
 	int err;
 
-	WARN_ON(cmd != DEVLINK_CMD_REGION_NEW && cmd != DEVLINK_CMD_REGION_DEL);
 
 	msg = nlmsg_new(NLMSG_DEFAULT_SIZE, GFP_KERNEL);
 	if (!msg)
-		return;
+		return ERR_PTR(-ENOMEM);
 
-	hdr = genlmsg_put(msg, 0, 0, &devlink_nl_family, 0, cmd);
-	if (!hdr)
+	hdr = genlmsg_put(msg, portid, seq, &devlink_nl_family, 0, cmd);
+	if (!hdr) {
+		err = -EMSGSIZE;
 		goto out_free_msg;
+	}
 
 	err = devlink_nl_put_handle(msg, devlink);
 	if (err)
@@ -3757,15 +3759,30 @@ static void devlink_nl_region_notify(struct devlink_region *region,
 	}
 	genlmsg_end(msg, hdr);
 
-	genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
-				msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
-
-	return;
+	return msg;
 
 out_cancel_msg:
 	genlmsg_cancel(msg, hdr);
 out_free_msg:
 	nlmsg_free(msg);
+	return ERR_PTR(err);
+}
+
+static void devlink_nl_region_notify(struct devlink_region *region,
+				     struct devlink_snapshot *snapshot,
+				     enum devlink_command cmd)
+{
+	struct devlink *devlink = region->devlink;
+	struct sk_buff *msg;
+
+	WARN_ON(cmd != DEVLINK_CMD_REGION_NEW && cmd != DEVLINK_CMD_REGION_DEL);
+
+	msg = devlink_nl_region_notify_build(region, snapshot, cmd, 0, 0);
+	if (IS_ERR(msg))
+		return;
+
+	genlmsg_multicast_netns(&devlink_nl_family, devlink_net(devlink),
+				msg, 0, DEVLINK_MCGRP_CONFIG, GFP_KERNEL);
 }
 
 /**
-- 
cgit v1.2.3-59-g8ed1b


From 043b3e22768d5d909cb1474fc21ae2fbaf026c0c Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 1 May 2020 09:40:41 -0700
Subject: devlink: let kernel allocate region snapshot id

Currently users have to choose a free snapshot id before
calling DEVLINK_CMD_REGION_NEW. This is potentially racy
and inconvenient.

Make the DEVLINK_ATTR_REGION_SNAPSHOT_ID optional and try
to allocate id automatically. Send a message back to the
caller with the snapshot info.

Example use:
$ devlink region new netdevsim/netdevsim1/dummy
netdevsim/netdevsim1/dummy: snapshot 1

$ id=$(devlink -j region new netdevsim/netdevsim1/dummy | \
       jq '.[][][][]')
$ devlink region dump netdevsim/netdevsim1/dummy snapshot $id
[...]
$ devlink region del netdevsim/netdevsim1/dummy snapshot $id

v4:
 - inline the notification code
v3:
 - send the notification only once snapshot creation completed.
v2:
 - don't wrap the line containing extack;
 - add a few sentences to the docs.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Jacob Keller <jacob.e.keller@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../networking/devlink/devlink-region.rst          |  7 ++-
 net/core/devlink.c                                 | 57 +++++++++++++++++-----
 .../selftests/drivers/net/netdevsim/devlink.sh     | 13 +++++
 3 files changed, 62 insertions(+), 15 deletions(-)

diff --git a/Documentation/networking/devlink/devlink-region.rst b/Documentation/networking/devlink/devlink-region.rst
index 04e04d1ff627..daf35427fce1 100644
--- a/Documentation/networking/devlink/devlink-region.rst
+++ b/Documentation/networking/devlink/devlink-region.rst
@@ -23,7 +23,9 @@ states, but see also :doc:`devlink-health`
 Regions may optionally support capturing a snapshot on demand via the
 ``DEVLINK_CMD_REGION_NEW`` netlink message. A driver wishing to allow
 requested snapshots must implement the ``.snapshot`` callback for the region
-in its ``devlink_region_ops`` structure.
+in its ``devlink_region_ops`` structure. If snapshot id is not set in
+the ``DEVLINK_CMD_REGION_NEW`` request kernel will allocate one and send
+the snapshot information to user space.
 
 example usage
 -------------
@@ -45,7 +47,8 @@ example usage
     $ devlink region del pci/0000:00:05.0/cr-space snapshot 1
 
     # Request an immediate snapshot, if supported by the region
-    $ devlink region new pci/0000:00:05.0/cr-space snapshot 5
+    $ devlink region new pci/0000:00:05.0/cr-space
+    pci/0000:00:05.0/cr-space: snapshot 5
 
     # Dump a snapshot:
     $ devlink region dump pci/0000:00:05.0/fw-health snapshot 1
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 2b7c60c18b99..43a9d5be73ca 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -4086,6 +4086,8 @@ static int
 devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info)
 {
 	struct devlink *devlink = info->user_ptr[0];
+	struct devlink_snapshot *snapshot;
+	struct nlattr *snapshot_id_attr;
 	struct devlink_region *region;
 	const char *region_name;
 	u32 snapshot_id;
@@ -4097,11 +4099,6 @@ devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info)
 		return -EINVAL;
 	}
 
-	if (!info->attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]) {
-		NL_SET_ERR_MSG_MOD(info->extack, "No snapshot id provided");
-		return -EINVAL;
-	}
-
 	region_name = nla_data(info->attrs[DEVLINK_ATTR_REGION_NAME]);
 	region = devlink_region_get_by_name(devlink, region_name);
 	if (!region) {
@@ -4119,16 +4116,25 @@ devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info)
 		return -ENOSPC;
 	}
 
-	snapshot_id = nla_get_u32(info->attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID]);
+	snapshot_id_attr = info->attrs[DEVLINK_ATTR_REGION_SNAPSHOT_ID];
+	if (snapshot_id_attr) {
+		snapshot_id = nla_get_u32(snapshot_id_attr);
 
-	if (devlink_region_snapshot_get_by_id(region, snapshot_id)) {
-		NL_SET_ERR_MSG_MOD(info->extack, "The requested snapshot id is already in use");
-		return -EEXIST;
-	}
+		if (devlink_region_snapshot_get_by_id(region, snapshot_id)) {
+			NL_SET_ERR_MSG_MOD(info->extack, "The requested snapshot id is already in use");
+			return -EEXIST;
+		}
 
-	err = __devlink_snapshot_id_insert(devlink, snapshot_id);
-	if (err)
-		return err;
+		err = __devlink_snapshot_id_insert(devlink, snapshot_id);
+		if (err)
+			return err;
+	} else {
+		err = __devlink_region_snapshot_id_get(devlink, &snapshot_id);
+		if (err) {
+			NL_SET_ERR_MSG_MOD(info->extack, "Failed to allocate a new snapshot id");
+			return err;
+		}
+	}
 
 	err = region->ops->snapshot(devlink, info->extack, &data);
 	if (err)
@@ -4138,6 +4144,27 @@ devlink_nl_cmd_region_new(struct sk_buff *skb, struct genl_info *info)
 	if (err)
 		goto err_snapshot_create;
 
+	if (!snapshot_id_attr) {
+		struct sk_buff *msg;
+
+		snapshot = devlink_region_snapshot_get_by_id(region,
+							     snapshot_id);
+		if (WARN_ON(!snapshot))
+			return -EINVAL;
+
+		msg = devlink_nl_region_notify_build(region, snapshot,
+						     DEVLINK_CMD_REGION_NEW,
+						     info->snd_portid,
+						     info->snd_seq);
+		err = PTR_ERR_OR_ZERO(msg);
+		if (err)
+			goto err_notify;
+
+		err = genlmsg_reply(msg, info);
+		if (err)
+			goto err_notify;
+	}
+
 	return 0;
 
 err_snapshot_create:
@@ -4145,6 +4172,10 @@ err_snapshot_create:
 err_snapshot_capture:
 	__devlink_snapshot_id_decrement(devlink, snapshot_id);
 	return err;
+
+err_notify:
+	devlink_region_snapshot_del(region, snapshot);
+	return err;
 }
 
 static int devlink_nl_cmd_region_read_chunk_fill(struct sk_buff *msg,
diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
index 9f9741444549..ad539eccddcb 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
@@ -151,6 +151,19 @@ regions_test()
 
 	check_region_snapshot_count dummy post-second-delete 2
 
+	sid=$(devlink -j region new $DL_HANDLE/dummy | jq '.[][][][]')
+	check_err $? "Failed to create a new snapshot with id allocated by the kernel"
+
+	check_region_snapshot_count dummy post-first-request 3
+
+	devlink region dump $DL_HANDLE/dummy snapshot $sid >> /dev/null
+	check_err $? "Failed to dump a snapshot with id allocated by the kernel"
+
+	devlink region del $DL_HANDLE/dummy snapshot $sid
+	check_err $? "Failed to delete snapshot with id allocated by the kernel"
+
+	check_region_snapshot_count dummy post-first-request 2
+
 	log_test "regions test"
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From aebbd7dfab2584acfb1c5d9abf911024109bc5ee Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 1 May 2020 09:40:42 -0700
Subject: docs: devlink: clarify the scope of snapshot id

In past discussions Jiri explained snapshot ids are cross-region.
Explain this in the docs.

v3: new patch

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/devlink/devlink-region.rst | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/Documentation/networking/devlink/devlink-region.rst b/Documentation/networking/devlink/devlink-region.rst
index daf35427fce1..3654c3e9658f 100644
--- a/Documentation/networking/devlink/devlink-region.rst
+++ b/Documentation/networking/devlink/devlink-region.rst
@@ -14,6 +14,10 @@ Region snapshots are collected by the driver, and can be accessed via read
 or dump commands. This allows future analysis on the created snapshots.
 Regions may optionally support triggering snapshots on demand.
 
+Snapshot identifiers are scoped to the devlink instance, not a region.
+All snapshots with the same snapshot id within a devlink instance
+correspond to the same event.
+
 The major benefit to creating a region is to provide access to internal
 address regions that are otherwise inaccessible to the user.
 
-- 
cgit v1.2.3-59-g8ed1b


From 6e87ac748e94f4d7c9eaed4550789882ec8792d6 Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Fri, 1 May 2020 23:50:05 +0300
Subject: dt-binding: ti: am65x: document common platform time sync cpts module

Document device tree bindings for TI AM654/J721E SoC The Common Platform
Time Sync (CPTS) module. The CPTS module is used to facilitate host control
of time sync operations. Main features of CPTS module are:
  - selection of multiple external clock sources
  - 64-bit timestamp mode in ns with ppm and nudge adjustment.
  - control of time sync events via interrupt or polling
  - hardware timestamp of ext. events (HWx_TS_PUSH)
  - periodic generator function outputs (TS_GENFx)
  - PPS in combination with timesync router
  - Depending on integration it enables compliance with the IEEE 1588-2008
standard for a precision clock synchronization protocol, Ethernet Enhanced
Scheduled Traffic Operations (CPTS_ESTFn) and PCIe Subsystem Precision Time
Measurement (PTM).

Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../bindings/net/ti,k3-am654-cpsw-nuss.yaml        |   7 +
 .../devicetree/bindings/net/ti,k3-am654-cpts.yaml  | 152 +++++++++++++++++++++
 2 files changed, 159 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/net/ti,k3-am654-cpts.yaml

diff --git a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml
index 78bf511e2892..0f3fde45e200 100644
--- a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml
+++ b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml
@@ -144,6 +144,13 @@ patternProperties:
     description:
       CPSW MDIO bus.
 
+  "^cpts$":
+    type: object
+    allOf:
+      - $ref: "ti,am654-cpts.yaml#"
+    description:
+      CPSW Common Platform Time Sync (CPTS) module.
+
 required:
   - compatible
   - reg
diff --git a/Documentation/devicetree/bindings/net/ti,k3-am654-cpts.yaml b/Documentation/devicetree/bindings/net/ti,k3-am654-cpts.yaml
new file mode 100644
index 000000000000..1b535d41e5c6
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/ti,k3-am654-cpts.yaml
@@ -0,0 +1,152 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/ti,am654-cpts.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: The TI AM654x/J721E Common Platform Time Sync (CPTS) module Device Tree Bindings
+
+maintainers:
+  - Grygorii Strashko <grygorii.strashko@ti.com>
+  - Sekhar Nori <nsekhar@ti.com>
+
+description: |+
+  The TI AM654x/J721E CPTS module is used to facilitate host control of time
+  sync operations.
+  Main features of CPTS module are
+  - selection of multiple external clock sources
+  - Software control of time sync events via interrupt or polling
+  - 64-bit timestamp mode in ns with PPM and nudge adjustment.
+  - hardware timestamp push inputs (HWx_TS_PUSH)
+  - timestamp counter compare output (TS_COMP)
+  - timestamp counter bit output (TS_SYNC)
+  - periodic Generator function outputs (TS_GENFx)
+  - Ethernet Enhanced Scheduled Traffic Operations (CPTS_ESTFn) (TSN)
+  - external hardware timestamp push inputs (HWx_TS_PUSH) timestamping
+
+   Depending on integration it enables compliance with the IEEE 1588-2008
+   standard for a precision clock synchronization protocol, Ethernet Enhanced
+   Scheduled Traffic Operations (CPTS_ESTFn) and PCIe Subsystem Precision Time
+   Measurement (PTM).
+
+  TI AM654x/J721E SoCs has several similar CPTS modules integrated into the
+  different parts of the system which could be synchronized with each other
+  - Main CPTS
+  - MCU CPSW CPTS with IEEE 1588-2008 support
+  - PCIe subsystem CPTS for PTM support
+
+  Depending on CPTS module integration and when CPTS is integral part of
+  another module (MCU CPSW for example) "compatible" and "reg" can
+  be omitted - parent module is fully responsible for CPTS enabling and
+  configuration.
+
+properties:
+  $nodename:
+    pattern: "^cpts(@.*|-[0-9a-f])*$"
+
+  compatible:
+    oneOf:
+      - const: ti,am65-cpts
+      - const: ti,j721e-cpts
+
+  reg:
+    maxItems: 1
+    description:
+       The physical base address and size of CPTS IO range
+
+  reg-names:
+    items:
+      - const: cpts
+
+  clocks:
+    description: CPTS reference clock
+
+  clock-names:
+    items:
+      - const: cpts
+
+  interrupts-extended:
+    items:
+      - description: CPTS events interrupt
+
+  interrupt-names:
+    items:
+      - const: "cpts"
+
+  ti,cpts-ext-ts-inputs:
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32
+    maximum: 8
+    description:
+        Number of hardware timestamp push inputs (HWx_TS_PUSH)
+
+  ti,cpts-periodic-outputs:
+    allOf:
+      - $ref: /schemas/types.yaml#/definitions/uint32
+    maximum: 8
+    description:
+         Number of timestamp Generator function outputs (TS_GENFx)
+
+  refclk-mux:
+    type: object
+    description: CPTS reference clock multiplexer clock
+    properties:
+      '#clock-cells':
+        const: 0
+
+      clocks:
+        maxItems: 8
+
+      assigned-clocks:
+        maxItems: 1
+
+      assigned-clocks-parents:
+        maxItems: 1
+
+    required:
+      - clocks
+
+required:
+  - clocks
+  - clock-names
+  - interrupts-extended
+  - interrupt-names
+
+additionalProperties: false
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/irq.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+
+    cpts@310d0000 {
+         compatible = "ti,am65-cpts";
+         reg = <0x0 0x310d0000 0x0 0x400>;
+         reg-names = "cpts";
+         clocks = <&main_cpts_mux>;
+         clock-names = "cpts";
+         interrupts-extended = <&k3_irq 163 0 IRQ_TYPE_LEVEL_HIGH>;
+         interrupt-names = "cpts";
+         ti,cpts-periodic-outputs = <6>;
+         ti,cpts-ext-ts-inputs = <8>;
+
+         main_cpts_mux: refclk-mux {
+               #clock-cells = <0>;
+               clocks = <&k3_clks 118 5>, <&k3_clks 118 11>,
+                        <&k3_clks 157 91>, <&k3_clks 157 77>,
+                        <&k3_clks 157 102>, <&k3_clks 157 80>,
+                        <&k3_clks 120 3>, <&k3_clks 121 3>;
+               assigned-clocks = <&main_cpts_mux>;
+               assigned-clock-parents = <&k3_clks 118 11>;
+         };
+    };
+  - |
+
+    cpts {
+             clocks = <&k3_clks 18 2>;
+             clock-names = "cpts";
+             interrupts-extended = <&gic500 GIC_SPI 858 IRQ_TYPE_LEVEL_HIGH>;
+             interrupt-names = "cpts";
+             ti,cpts-ext-ts-inputs = <4>;
+             ti,cpts-periodic-outputs = <2>;
+    };
-- 
cgit v1.2.3-59-g8ed1b


From f6bd59526ca527e203e3c6c2e62cda5a0fd5c1a2 Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Fri, 1 May 2020 23:50:06 +0300
Subject: net: ethernet: ti: introduce am654 common platform time sync driver

The CPTS module is used to facilitate host control of time sync operations.
Main features of CPTS module are:
- selection of multiple external clock sources
- control of time sync events via interrupt or polling
- 64-bit timestamp mode in ns with HW PPM and nudge adjustment.
- hardware timestamp ext. inputs (HWx_TS_PUSH)
- timestamp Generator function outputs (TS_GENFx)
Depending on integration it enables compliance with the IEEE 1588-2008
standard for a precision clock synchronization protocol, Ethernet Enhanced
Scheduled Traffic Operations (CPTS_ESTFn) and PCIe Subsystem Precision Time
Measurement (PTM).

Introduced driver provides Linux PTP hardware clock for each CPTS device
and network packets timestamping where applicable. CPTS PTP hardware clock
supports following operations:
    - Set time
    - Get time
    - Shift the clock by a given offset atomically
    - Adjust clock frequency
    - Time stamp external events
    - Periodic output signals

Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/Kconfig     |   14 +
 drivers/net/ethernet/ti/Makefile    |    1 +
 drivers/net/ethernet/ti/am65-cpts.c | 1041 +++++++++++++++++++++++++++++++++++
 drivers/net/ethernet/ti/am65-cpts.h |   50 ++
 4 files changed, 1106 insertions(+)
 create mode 100644 drivers/net/ethernet/ti/am65-cpts.c
 create mode 100644 drivers/net/ethernet/ti/am65-cpts.h

diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig
index 7b0ad777828d..2b7590cece39 100644
--- a/drivers/net/ethernet/ti/Kconfig
+++ b/drivers/net/ethernet/ti/Kconfig
@@ -110,6 +110,20 @@ config TI_K3_AM65_CPSW_NUSS
 	  To compile this driver as a module, choose M here: the module
 	  will be called ti-am65-cpsw-nuss.
 
+config TI_K3_AM65_CPTS
+	tristate "TI K3 AM65x CPTS"
+	depends on ARCH_K3 && OF && PTP_1588_CLOCK
+	depends on PTP_1588_CLOCK
+	select NET_PTP_CLASSIFY
+	help
+	  Say y here to support the TI K3 AM65x CPTS with 1588 features such as
+	  PTP hardware clock for each CPTS device and network packets
+	  timestamping where applicable.
+	  Depending on integration CPTS blocks enable compliance with
+	  the IEEE 1588-2008 standard for a precision clock synchronization
+	  protocol, Ethernet Enhanced Scheduled Traffic Operations (CPTS_ESTFn)
+	  and PCIe Subsystem Precision Time Measurement (PTM).
+
 config TI_KEYSTONE_NETCP
 	tristate "TI Keystone NETCP Core Support"
 	select TI_DAVINCI_MDIO
diff --git a/drivers/net/ethernet/ti/Makefile b/drivers/net/ethernet/ti/Makefile
index 53792190e9c2..bf86067f9b4c 100644
--- a/drivers/net/ethernet/ti/Makefile
+++ b/drivers/net/ethernet/ti/Makefile
@@ -26,3 +26,4 @@ keystone_netcp_ethss-y := netcp_ethss.o netcp_sgmii.o netcp_xgbepcsr.o cpsw_ale.
 
 obj-$(CONFIG_TI_K3_AM65_CPSW_NUSS) += ti-am65-cpsw-nuss.o
 ti-am65-cpsw-nuss-y := am65-cpsw-nuss.o cpsw_sl.o am65-cpsw-ethtool.o cpsw_ale.o k3-cppi-desc-pool.o
+obj-$(CONFIG_TI_K3_AM65_CPTS) += am65-cpts.o
diff --git a/drivers/net/ethernet/ti/am65-cpts.c b/drivers/net/ethernet/ti/am65-cpts.c
new file mode 100644
index 000000000000..370162c86e7f
--- /dev/null
+++ b/drivers/net/ethernet/ti/am65-cpts.c
@@ -0,0 +1,1041 @@
+// SPDX-License-Identifier: GPL-2.0
+/* TI K3 AM65x Common Platform Time Sync
+ *
+ * Copyright (C) 2020 Texas Instruments Incorporated - http://www.ti.com
+ *
+ */
+
+#include <linux/clk.h>
+#include <linux/clk-provider.h>
+#include <linux/err.h>
+#include <linux/if_vlan.h>
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/net_tstamp.h>
+#include <linux/of.h>
+#include <linux/of_irq.h>
+#include <linux/platform_device.h>
+#include <linux/pm_runtime.h>
+#include <linux/ptp_classify.h>
+#include <linux/ptp_clock_kernel.h>
+
+#include "am65-cpts.h"
+
+struct am65_genf_regs {
+	u32 comp_lo;	/* Comparison Low Value 0:31 */
+	u32 comp_hi;	/* Comparison High Value 32:63 */
+	u32 control;	/* control */
+	u32 length;	/* Length */
+	u32 ppm_low;	/* PPM Load Low Value 0:31 */
+	u32 ppm_hi;	/* PPM Load High Value 32:63 */
+	u32 ts_nudge;	/* Nudge value */
+} __aligned(32) __packed;
+
+#define AM65_CPTS_GENF_MAX_NUM 9
+#define AM65_CPTS_ESTF_MAX_NUM 8
+
+struct am65_cpts_regs {
+	u32 idver;		/* Identification and version */
+	u32 control;		/* Time sync control */
+	u32 rftclk_sel;		/* Reference Clock Select Register */
+	u32 ts_push;		/* Time stamp event push */
+	u32 ts_load_val_lo;	/* Time Stamp Load Low Value 0:31 */
+	u32 ts_load_en;		/* Time stamp load enable */
+	u32 ts_comp_lo;		/* Time Stamp Comparison Low Value 0:31 */
+	u32 ts_comp_length;	/* Time Stamp Comparison Length */
+	u32 intstat_raw;	/* Time sync interrupt status raw */
+	u32 intstat_masked;	/* Time sync interrupt status masked */
+	u32 int_enable;		/* Time sync interrupt enable */
+	u32 ts_comp_nudge;	/* Time Stamp Comparison Nudge Value */
+	u32 event_pop;		/* Event interrupt pop */
+	u32 event_0;		/* Event Time Stamp lo 0:31 */
+	u32 event_1;		/* Event Type Fields */
+	u32 event_2;		/* Event Type Fields domain */
+	u32 event_3;		/* Event Time Stamp hi 32:63 */
+	u32 ts_load_val_hi;	/* Time Stamp Load High Value 32:63 */
+	u32 ts_comp_hi;		/* Time Stamp Comparison High Value 32:63 */
+	u32 ts_add_val;		/* Time Stamp Add value */
+	u32 ts_ppm_low;		/* Time Stamp PPM Load Low Value 0:31 */
+	u32 ts_ppm_hi;		/* Time Stamp PPM Load High Value 32:63 */
+	u32 ts_nudge;		/* Time Stamp Nudge value */
+	u32 reserv[33];
+	struct am65_genf_regs genf[AM65_CPTS_GENF_MAX_NUM];
+	struct am65_genf_regs estf[AM65_CPTS_ESTF_MAX_NUM];
+};
+
+/* CONTROL_REG */
+#define AM65_CPTS_CONTROL_EN			BIT(0)
+#define AM65_CPTS_CONTROL_INT_TEST		BIT(1)
+#define AM65_CPTS_CONTROL_TS_COMP_POLARITY	BIT(2)
+#define AM65_CPTS_CONTROL_TSTAMP_EN		BIT(3)
+#define AM65_CPTS_CONTROL_SEQUENCE_EN		BIT(4)
+#define AM65_CPTS_CONTROL_64MODE		BIT(5)
+#define AM65_CPTS_CONTROL_TS_COMP_TOG		BIT(6)
+#define AM65_CPTS_CONTROL_TS_PPM_DIR		BIT(7)
+#define AM65_CPTS_CONTROL_HW1_TS_PUSH_EN	BIT(8)
+#define AM65_CPTS_CONTROL_HW2_TS_PUSH_EN	BIT(9)
+#define AM65_CPTS_CONTROL_HW3_TS_PUSH_EN	BIT(10)
+#define AM65_CPTS_CONTROL_HW4_TS_PUSH_EN	BIT(11)
+#define AM65_CPTS_CONTROL_HW5_TS_PUSH_EN	BIT(12)
+#define AM65_CPTS_CONTROL_HW6_TS_PUSH_EN	BIT(13)
+#define AM65_CPTS_CONTROL_HW7_TS_PUSH_EN	BIT(14)
+#define AM65_CPTS_CONTROL_HW8_TS_PUSH_EN	BIT(15)
+#define AM65_CPTS_CONTROL_HW1_TS_PUSH_OFFSET	(8)
+
+#define AM65_CPTS_CONTROL_TS_SYNC_SEL_MASK	(0xF)
+#define AM65_CPTS_CONTROL_TS_SYNC_SEL_SHIFT	(28)
+
+/* RFTCLK_SEL_REG */
+#define AM65_CPTS_RFTCLK_SEL_MASK		(0x1F)
+
+/* TS_PUSH_REG */
+#define AM65_CPTS_TS_PUSH			BIT(0)
+
+/* TS_LOAD_EN_REG */
+#define AM65_CPTS_TS_LOAD_EN			BIT(0)
+
+/* INTSTAT_RAW_REG */
+#define AM65_CPTS_INTSTAT_RAW_TS_PEND		BIT(0)
+
+/* INTSTAT_MASKED_REG */
+#define AM65_CPTS_INTSTAT_MASKED_TS_PEND	BIT(0)
+
+/* INT_ENABLE_REG */
+#define AM65_CPTS_INT_ENABLE_TS_PEND_EN		BIT(0)
+
+/* TS_COMP_NUDGE_REG */
+#define AM65_CPTS_TS_COMP_NUDGE_MASK		(0xFF)
+
+/* EVENT_POP_REG */
+#define AM65_CPTS_EVENT_POP			BIT(0)
+
+/* EVENT_1_REG */
+#define AM65_CPTS_EVENT_1_SEQUENCE_ID_MASK	GENMASK(15, 0)
+
+#define AM65_CPTS_EVENT_1_MESSAGE_TYPE_MASK	GENMASK(19, 16)
+#define AM65_CPTS_EVENT_1_MESSAGE_TYPE_SHIFT	(16)
+
+#define AM65_CPTS_EVENT_1_EVENT_TYPE_MASK	GENMASK(23, 20)
+#define AM65_CPTS_EVENT_1_EVENT_TYPE_SHIFT	(20)
+
+#define AM65_CPTS_EVENT_1_PORT_NUMBER_MASK	GENMASK(28, 24)
+#define AM65_CPTS_EVENT_1_PORT_NUMBER_SHIFT	(24)
+
+/* EVENT_2_REG */
+#define AM65_CPTS_EVENT_2_REG_DOMAIN_MASK	(0xFF)
+#define AM65_CPTS_EVENT_2_REG_DOMAIN_SHIFT	(0)
+
+enum {
+	AM65_CPTS_EV_PUSH,	/* Time Stamp Push Event */
+	AM65_CPTS_EV_ROLL,	/* Time Stamp Rollover Event */
+	AM65_CPTS_EV_HALF,	/* Time Stamp Half Rollover Event */
+	AM65_CPTS_EV_HW,		/* Hardware Time Stamp Push Event */
+	AM65_CPTS_EV_RX,		/* Ethernet Receive Event */
+	AM65_CPTS_EV_TX,		/* Ethernet Transmit Event */
+	AM65_CPTS_EV_TS_COMP,	/* Time Stamp Compare Event */
+	AM65_CPTS_EV_HOST,	/* Host Transmit Event */
+};
+
+struct am65_cpts_event {
+	struct list_head list;
+	unsigned long tmo;
+	u32 event1;
+	u32 event2;
+	u64 timestamp;
+};
+
+#define AM65_CPTS_FIFO_DEPTH		(16)
+#define AM65_CPTS_MAX_EVENTS		(32)
+#define AM65_CPTS_EVENT_RX_TX_TIMEOUT	(20) /* ms */
+#define AM65_CPTS_SKB_TX_WORK_TIMEOUT	1 /* jiffies */
+#define AM65_CPTS_MIN_PPM		0x400
+
+struct am65_cpts {
+	struct device *dev;
+	struct am65_cpts_regs __iomem *reg;
+	struct ptp_clock_info ptp_info;
+	struct ptp_clock *ptp_clock;
+	int phc_index;
+	struct clk_hw *clk_mux_hw;
+	struct device_node *clk_mux_np;
+	struct clk *refclk;
+	u32 refclk_freq;
+	struct list_head events;
+	struct list_head pool;
+	struct am65_cpts_event pool_data[AM65_CPTS_MAX_EVENTS];
+	spinlock_t lock; /* protects events lists*/
+	u32 ext_ts_inputs;
+	u32 genf_num;
+	u32 ts_add_val;
+	int irq;
+	struct mutex ptp_clk_lock; /* PHC access sync */
+	u64 timestamp;
+	u32 genf_enable;
+	u32 hw_ts_enable;
+	struct sk_buff_head txq;
+};
+
+struct am65_cpts_skb_cb_data {
+	unsigned long tmo;
+	u32 skb_mtype_seqid;
+};
+
+#define am65_cpts_write32(c, v, r) writel(v, &(c)->reg->r)
+#define am65_cpts_read32(c, r) readl(&(c)->reg->r)
+
+static void am65_cpts_settime(struct am65_cpts *cpts, u64 start_tstamp)
+{
+	u32 val;
+
+	val = upper_32_bits(start_tstamp);
+	am65_cpts_write32(cpts, val, ts_load_val_hi);
+	val = lower_32_bits(start_tstamp);
+	am65_cpts_write32(cpts, val, ts_load_val_lo);
+
+	am65_cpts_write32(cpts, AM65_CPTS_TS_LOAD_EN, ts_load_en);
+}
+
+static void am65_cpts_set_add_val(struct am65_cpts *cpts)
+{
+	/* select coefficient according to the rate */
+	cpts->ts_add_val = (NSEC_PER_SEC / cpts->refclk_freq - 1) & 0x7;
+
+	am65_cpts_write32(cpts, cpts->ts_add_val, ts_add_val);
+}
+
+static void am65_cpts_disable(struct am65_cpts *cpts)
+{
+	am65_cpts_write32(cpts, 0, control);
+	am65_cpts_write32(cpts, 0, int_enable);
+}
+
+static int am65_cpts_event_get_port(struct am65_cpts_event *event)
+{
+	return (event->event1 & AM65_CPTS_EVENT_1_PORT_NUMBER_MASK) >>
+		AM65_CPTS_EVENT_1_PORT_NUMBER_SHIFT;
+}
+
+static int am65_cpts_event_get_type(struct am65_cpts_event *event)
+{
+	return (event->event1 & AM65_CPTS_EVENT_1_EVENT_TYPE_MASK) >>
+		AM65_CPTS_EVENT_1_EVENT_TYPE_SHIFT;
+}
+
+static int am65_cpts_cpts_purge_events(struct am65_cpts *cpts)
+{
+	struct list_head *this, *next;
+	struct am65_cpts_event *event;
+	int removed = 0;
+
+	list_for_each_safe(this, next, &cpts->events) {
+		event = list_entry(this, struct am65_cpts_event, list);
+		if (time_after(jiffies, event->tmo)) {
+			list_del_init(&event->list);
+			list_add(&event->list, &cpts->pool);
+			++removed;
+		}
+	}
+
+	if (removed)
+		dev_dbg(cpts->dev, "event pool cleaned up %d\n", removed);
+	return removed ? 0 : -1;
+}
+
+static bool am65_cpts_fifo_pop_event(struct am65_cpts *cpts,
+				     struct am65_cpts_event *event)
+{
+	u32 r = am65_cpts_read32(cpts, intstat_raw);
+
+	if (r & AM65_CPTS_INTSTAT_RAW_TS_PEND) {
+		event->timestamp = am65_cpts_read32(cpts, event_0);
+		event->event1 = am65_cpts_read32(cpts, event_1);
+		event->event2 = am65_cpts_read32(cpts, event_2);
+		event->timestamp |= (u64)am65_cpts_read32(cpts, event_3) << 32;
+		am65_cpts_write32(cpts, AM65_CPTS_EVENT_POP, event_pop);
+		return false;
+	}
+	return true;
+}
+
+static int am65_cpts_fifo_read(struct am65_cpts *cpts)
+{
+	struct ptp_clock_event pevent;
+	struct am65_cpts_event *event;
+	bool schedule = false;
+	int i, type, ret = 0;
+	unsigned long flags;
+
+	spin_lock_irqsave(&cpts->lock, flags);
+	for (i = 0; i < AM65_CPTS_FIFO_DEPTH; i++) {
+		event = list_first_entry_or_null(&cpts->pool,
+						 struct am65_cpts_event, list);
+
+		if (!event) {
+			if (am65_cpts_cpts_purge_events(cpts)) {
+				dev_err(cpts->dev, "cpts: event pool empty\n");
+				ret = -1;
+				goto out;
+			}
+			continue;
+		}
+
+		if (am65_cpts_fifo_pop_event(cpts, event))
+			break;
+
+		type = am65_cpts_event_get_type(event);
+		switch (type) {
+		case AM65_CPTS_EV_PUSH:
+			cpts->timestamp = event->timestamp;
+			dev_dbg(cpts->dev, "AM65_CPTS_EV_PUSH t:%llu\n",
+				cpts->timestamp);
+			break;
+		case AM65_CPTS_EV_RX:
+		case AM65_CPTS_EV_TX:
+			event->tmo = jiffies +
+				msecs_to_jiffies(AM65_CPTS_EVENT_RX_TX_TIMEOUT);
+
+			list_del_init(&event->list);
+			list_add_tail(&event->list, &cpts->events);
+
+			dev_dbg(cpts->dev,
+				"AM65_CPTS_EV_TX e1:%08x e2:%08x t:%lld\n",
+				event->event1, event->event2,
+				event->timestamp);
+			schedule = true;
+			break;
+		case AM65_CPTS_EV_HW:
+			pevent.index = am65_cpts_event_get_port(event) - 1;
+			pevent.timestamp = event->timestamp;
+			pevent.type = PTP_CLOCK_EXTTS;
+			dev_dbg(cpts->dev, "AM65_CPTS_EV_HW p:%d t:%llu\n",
+				pevent.index, event->timestamp);
+
+			ptp_clock_event(cpts->ptp_clock, &pevent);
+			break;
+		case AM65_CPTS_EV_HOST:
+			break;
+		case AM65_CPTS_EV_ROLL:
+		case AM65_CPTS_EV_HALF:
+		case AM65_CPTS_EV_TS_COMP:
+			dev_dbg(cpts->dev,
+				"AM65_CPTS_EVT: %d e1:%08x e2:%08x t:%lld\n",
+				type,
+				event->event1, event->event2,
+				event->timestamp);
+			break;
+		default:
+			dev_err(cpts->dev, "cpts: unknown event type\n");
+			ret = -1;
+			goto out;
+		}
+	}
+
+out:
+	spin_unlock_irqrestore(&cpts->lock, flags);
+
+	if (schedule)
+		ptp_schedule_worker(cpts->ptp_clock, 0);
+
+	return ret;
+}
+
+static u64 am65_cpts_gettime(struct am65_cpts *cpts,
+			     struct ptp_system_timestamp *sts)
+{
+	unsigned long flags;
+	u64 val = 0;
+
+	/* temporarily disable cpts interrupt to avoid intentional
+	 * doubled read. Interrupt can be in-flight - it's Ok.
+	 */
+	am65_cpts_write32(cpts, 0, int_enable);
+
+	/* use spin_lock_irqsave() here as it has to run very fast */
+	spin_lock_irqsave(&cpts->lock, flags);
+	ptp_read_system_prets(sts);
+	am65_cpts_write32(cpts, AM65_CPTS_TS_PUSH, ts_push);
+	am65_cpts_read32(cpts, ts_push);
+	ptp_read_system_postts(sts);
+	spin_unlock_irqrestore(&cpts->lock, flags);
+
+	am65_cpts_fifo_read(cpts);
+
+	am65_cpts_write32(cpts, AM65_CPTS_INT_ENABLE_TS_PEND_EN, int_enable);
+
+	val = cpts->timestamp;
+
+	return val;
+}
+
+static irqreturn_t am65_cpts_interrupt(int irq, void *dev_id)
+{
+	struct am65_cpts *cpts = dev_id;
+
+	if (am65_cpts_fifo_read(cpts))
+		dev_dbg(cpts->dev, "cpts: unable to obtain a time stamp\n");
+
+	return IRQ_HANDLED;
+}
+
+/* PTP clock operations */
+static int am65_cpts_ptp_adjfreq(struct ptp_clock_info *ptp, s32 ppb)
+{
+	struct am65_cpts *cpts = container_of(ptp, struct am65_cpts, ptp_info);
+	int neg_adj = 0;
+	u64 adj_period;
+	u32 val;
+
+	if (ppb < 0) {
+		neg_adj = 1;
+		ppb = -ppb;
+	}
+
+	/* base freq = 1GHz = 1 000 000 000
+	 * ppb_norm = ppb * base_freq / clock_freq;
+	 * ppm_norm = ppb_norm / 1000
+	 * adj_period = 1 000 000 / ppm_norm
+	 * adj_period = 1 000 000 000 / ppb_norm
+	 * adj_period = 1 000 000 000 / (ppb * base_freq / clock_freq)
+	 * adj_period = (1 000 000 000 * clock_freq) / (ppb * base_freq)
+	 * adj_period = clock_freq / ppb
+	 */
+	adj_period = div_u64(cpts->refclk_freq, ppb);
+
+	mutex_lock(&cpts->ptp_clk_lock);
+
+	val = am65_cpts_read32(cpts, control);
+	if (neg_adj)
+		val |= AM65_CPTS_CONTROL_TS_PPM_DIR;
+	else
+		val &= ~AM65_CPTS_CONTROL_TS_PPM_DIR;
+	am65_cpts_write32(cpts, val, control);
+
+	val = upper_32_bits(adj_period) & 0x3FF;
+	am65_cpts_write32(cpts, val, ts_ppm_hi);
+	val = lower_32_bits(adj_period);
+	am65_cpts_write32(cpts, val, ts_ppm_low);
+
+	mutex_unlock(&cpts->ptp_clk_lock);
+
+	return 0;
+}
+
+static int am65_cpts_ptp_adjtime(struct ptp_clock_info *ptp, s64 delta)
+{
+	struct am65_cpts *cpts = container_of(ptp, struct am65_cpts, ptp_info);
+	s64 ns;
+
+	mutex_lock(&cpts->ptp_clk_lock);
+	ns = am65_cpts_gettime(cpts, NULL);
+	ns += delta;
+	am65_cpts_settime(cpts, ns);
+	mutex_unlock(&cpts->ptp_clk_lock);
+
+	return 0;
+}
+
+static int am65_cpts_ptp_gettimex(struct ptp_clock_info *ptp,
+				  struct timespec64 *ts,
+				  struct ptp_system_timestamp *sts)
+{
+	struct am65_cpts *cpts = container_of(ptp, struct am65_cpts, ptp_info);
+	u64 ns;
+
+	mutex_lock(&cpts->ptp_clk_lock);
+	ns = am65_cpts_gettime(cpts, sts);
+	mutex_unlock(&cpts->ptp_clk_lock);
+	*ts = ns_to_timespec64(ns);
+
+	return 0;
+}
+
+static int am65_cpts_ptp_settime(struct ptp_clock_info *ptp,
+				 const struct timespec64 *ts)
+{
+	struct am65_cpts *cpts = container_of(ptp, struct am65_cpts, ptp_info);
+	u64 ns;
+
+	ns = timespec64_to_ns(ts);
+	mutex_lock(&cpts->ptp_clk_lock);
+	am65_cpts_settime(cpts, ns);
+	mutex_unlock(&cpts->ptp_clk_lock);
+
+	return 0;
+}
+
+static void am65_cpts_extts_enable_hw(struct am65_cpts *cpts, u32 index, int on)
+{
+	u32 v;
+
+	v = am65_cpts_read32(cpts, control);
+	if (on) {
+		v |= BIT(AM65_CPTS_CONTROL_HW1_TS_PUSH_OFFSET + index);
+		cpts->hw_ts_enable |= BIT(index);
+	} else {
+		v &= ~BIT(AM65_CPTS_CONTROL_HW1_TS_PUSH_OFFSET + index);
+		cpts->hw_ts_enable &= ~BIT(index);
+	}
+	am65_cpts_write32(cpts, v, control);
+}
+
+static int am65_cpts_extts_enable(struct am65_cpts *cpts, u32 index, int on)
+{
+	if (!!(cpts->hw_ts_enable & BIT(index)) == !!on)
+		return 0;
+
+	mutex_lock(&cpts->ptp_clk_lock);
+	am65_cpts_extts_enable_hw(cpts, index, on);
+	mutex_unlock(&cpts->ptp_clk_lock);
+
+	dev_dbg(cpts->dev, "%s: ExtTS:%u %s\n",
+		__func__, index, on ? "enabled" : "disabled");
+
+	return 0;
+}
+
+static void am65_cpts_perout_enable_hw(struct am65_cpts *cpts,
+				       struct ptp_perout_request *req, int on)
+{
+	u64 ns_period, ns_start, cycles;
+	struct timespec64 ts;
+	u32 val;
+
+	if (on) {
+		ts.tv_sec = req->period.sec;
+		ts.tv_nsec = req->period.nsec;
+		ns_period = timespec64_to_ns(&ts);
+
+		cycles = (ns_period * cpts->refclk_freq) / NSEC_PER_SEC;
+
+		ts.tv_sec = req->start.sec;
+		ts.tv_nsec = req->start.nsec;
+		ns_start = timespec64_to_ns(&ts);
+
+		val = upper_32_bits(ns_start);
+		am65_cpts_write32(cpts, val, genf[req->index].comp_hi);
+		val = lower_32_bits(ns_start);
+		am65_cpts_write32(cpts, val, genf[req->index].comp_lo);
+		val = lower_32_bits(cycles);
+		am65_cpts_write32(cpts, val, genf[req->index].length);
+
+		cpts->genf_enable |= BIT(req->index);
+	} else {
+		am65_cpts_write32(cpts, 0, genf[req->index].length);
+
+		cpts->genf_enable &= ~BIT(req->index);
+	}
+}
+
+static int am65_cpts_perout_enable(struct am65_cpts *cpts,
+				   struct ptp_perout_request *req, int on)
+{
+	if (!!(cpts->genf_enable & BIT(req->index)) == !!on)
+		return 0;
+
+	mutex_lock(&cpts->ptp_clk_lock);
+	am65_cpts_perout_enable_hw(cpts, req, on);
+	mutex_unlock(&cpts->ptp_clk_lock);
+
+	dev_dbg(cpts->dev, "%s: GenF:%u %s\n",
+		__func__, req->index, on ? "enabled" : "disabled");
+
+	return 0;
+}
+
+static int am65_cpts_ptp_enable(struct ptp_clock_info *ptp,
+				struct ptp_clock_request *rq, int on)
+{
+	struct am65_cpts *cpts = container_of(ptp, struct am65_cpts, ptp_info);
+
+	switch (rq->type) {
+	case PTP_CLK_REQ_EXTTS:
+		return am65_cpts_extts_enable(cpts, rq->extts.index, on);
+	case PTP_CLK_REQ_PEROUT:
+		return am65_cpts_perout_enable(cpts, &rq->perout, on);
+	default:
+		break;
+	}
+
+	return -EOPNOTSUPP;
+}
+
+static long am65_cpts_ts_work(struct ptp_clock_info *ptp);
+
+static struct ptp_clock_info am65_ptp_info = {
+	.owner		= THIS_MODULE,
+	.name		= "CTPS timer",
+	.adjfreq	= am65_cpts_ptp_adjfreq,
+	.adjtime	= am65_cpts_ptp_adjtime,
+	.gettimex64	= am65_cpts_ptp_gettimex,
+	.settime64	= am65_cpts_ptp_settime,
+	.enable		= am65_cpts_ptp_enable,
+	.do_aux_work	= am65_cpts_ts_work,
+};
+
+static bool am65_cpts_match_tx_ts(struct am65_cpts *cpts,
+				  struct am65_cpts_event *event)
+{
+	struct sk_buff_head txq_list;
+	struct sk_buff *skb, *tmp;
+	unsigned long flags;
+	bool found = false;
+	u32 mtype_seqid;
+
+	mtype_seqid = event->event1 &
+		      (AM65_CPTS_EVENT_1_MESSAGE_TYPE_MASK |
+		       AM65_CPTS_EVENT_1_EVENT_TYPE_MASK |
+		       AM65_CPTS_EVENT_1_SEQUENCE_ID_MASK);
+
+	__skb_queue_head_init(&txq_list);
+
+	spin_lock_irqsave(&cpts->txq.lock, flags);
+	skb_queue_splice_init(&cpts->txq, &txq_list);
+	spin_unlock_irqrestore(&cpts->txq.lock, flags);
+
+	/* no need to grab txq.lock as access is always done under cpts->lock */
+	skb_queue_walk_safe(&txq_list, skb, tmp) {
+		struct skb_shared_hwtstamps ssh;
+		struct am65_cpts_skb_cb_data *skb_cb =
+					(struct am65_cpts_skb_cb_data *)skb->cb;
+
+		if (mtype_seqid == skb_cb->skb_mtype_seqid) {
+			u64 ns = event->timestamp;
+
+			memset(&ssh, 0, sizeof(ssh));
+			ssh.hwtstamp = ns_to_ktime(ns);
+			skb_tstamp_tx(skb, &ssh);
+			found = true;
+			__skb_unlink(skb, &txq_list);
+			dev_consume_skb_any(skb);
+			dev_dbg(cpts->dev,
+				"match tx timestamp mtype_seqid %08x\n",
+				mtype_seqid);
+			break;
+		}
+
+		if (time_after(jiffies, skb_cb->tmo)) {
+			/* timeout any expired skbs over 100 ms */
+			dev_dbg(cpts->dev,
+				"expiring tx timestamp mtype_seqid %08x\n",
+				mtype_seqid);
+			__skb_unlink(skb, &txq_list);
+			dev_consume_skb_any(skb);
+		}
+	}
+
+	spin_lock_irqsave(&cpts->txq.lock, flags);
+	skb_queue_splice(&txq_list, &cpts->txq);
+	spin_unlock_irqrestore(&cpts->txq.lock, flags);
+
+	return found;
+}
+
+static void am65_cpts_find_ts(struct am65_cpts *cpts)
+{
+	struct am65_cpts_event *event;
+	struct list_head *this, *next;
+	LIST_HEAD(events_free);
+	unsigned long flags;
+	LIST_HEAD(events);
+
+	spin_lock_irqsave(&cpts->lock, flags);
+	list_splice_init(&cpts->events, &events);
+	spin_unlock_irqrestore(&cpts->lock, flags);
+
+	list_for_each_safe(this, next, &events) {
+		event = list_entry(this, struct am65_cpts_event, list);
+		if (am65_cpts_match_tx_ts(cpts, event) ||
+		    time_after(jiffies, event->tmo)) {
+			list_del_init(&event->list);
+			list_add(&event->list, &events_free);
+		}
+	}
+
+	spin_lock_irqsave(&cpts->lock, flags);
+	list_splice_tail(&events, &cpts->events);
+	list_splice_tail(&events_free, &cpts->pool);
+	spin_unlock_irqrestore(&cpts->lock, flags);
+}
+
+static long am65_cpts_ts_work(struct ptp_clock_info *ptp)
+{
+	struct am65_cpts *cpts = container_of(ptp, struct am65_cpts, ptp_info);
+	unsigned long flags;
+	long delay = -1;
+
+	am65_cpts_find_ts(cpts);
+
+	spin_lock_irqsave(&cpts->txq.lock, flags);
+	if (!skb_queue_empty(&cpts->txq))
+		delay = AM65_CPTS_SKB_TX_WORK_TIMEOUT;
+	spin_unlock_irqrestore(&cpts->txq.lock, flags);
+
+	return delay;
+}
+
+/**
+ * am65_cpts_rx_enable - enable rx timestamping
+ * @cpts: cpts handle
+ * @skb: packet
+ *
+ * This functions enables rx packets timestamping. The CPTS can timestamp all
+ * rx packets.
+ */
+void am65_cpts_rx_enable(struct am65_cpts *cpts, bool en)
+{
+	u32 val;
+
+	mutex_lock(&cpts->ptp_clk_lock);
+	val = am65_cpts_read32(cpts, control);
+	if (en)
+		val |= AM65_CPTS_CONTROL_TSTAMP_EN;
+	else
+		val &= ~AM65_CPTS_CONTROL_TSTAMP_EN;
+	am65_cpts_write32(cpts, val, control);
+	mutex_unlock(&cpts->ptp_clk_lock);
+}
+EXPORT_SYMBOL_GPL(am65_cpts_rx_enable);
+
+static int am65_skb_get_mtype_seqid(struct sk_buff *skb, u32 *mtype_seqid)
+{
+	unsigned int ptp_class = ptp_classify_raw(skb);
+	u8 *msgtype, *data = skb->data;
+	unsigned int offset = 0;
+	__be16 *seqid;
+
+	if (ptp_class == PTP_CLASS_NONE)
+		return 0;
+
+	if (ptp_class & PTP_CLASS_VLAN)
+		offset += VLAN_HLEN;
+
+	switch (ptp_class & PTP_CLASS_PMASK) {
+	case PTP_CLASS_IPV4:
+		offset += ETH_HLEN + IPV4_HLEN(data + offset) + UDP_HLEN;
+		break;
+	case PTP_CLASS_IPV6:
+		offset += ETH_HLEN + IP6_HLEN + UDP_HLEN;
+		break;
+	case PTP_CLASS_L2:
+		offset += ETH_HLEN;
+		break;
+	default:
+		return 0;
+	}
+
+	if (skb->len + ETH_HLEN < offset + OFF_PTP_SEQUENCE_ID + sizeof(*seqid))
+		return 0;
+
+	if (unlikely(ptp_class & PTP_CLASS_V1))
+		msgtype = data + offset + OFF_PTP_CONTROL;
+	else
+		msgtype = data + offset;
+
+	seqid = (__be16 *)(data + offset + OFF_PTP_SEQUENCE_ID);
+	*mtype_seqid = (*msgtype << AM65_CPTS_EVENT_1_MESSAGE_TYPE_SHIFT) &
+			AM65_CPTS_EVENT_1_MESSAGE_TYPE_MASK;
+	*mtype_seqid |= (ntohs(*seqid) & AM65_CPTS_EVENT_1_SEQUENCE_ID_MASK);
+
+	return 1;
+}
+
+/**
+ * am65_cpts_tx_timestamp - save tx packet for timestamping
+ * @cpts: cpts handle
+ * @skb: packet
+ *
+ * This functions saves tx packet for timestamping if packet can be timestamped.
+ * The future processing is done in from PTP auxiliary worker.
+ */
+void am65_cpts_tx_timestamp(struct am65_cpts *cpts, struct sk_buff *skb)
+{
+	struct am65_cpts_skb_cb_data *skb_cb = (void *)skb->cb;
+
+	if (!(skb_shinfo(skb)->tx_flags & SKBTX_IN_PROGRESS))
+		return;
+
+	/* add frame to queue for processing later.
+	 * The periodic FIFO check will handle this.
+	 */
+	skb_get(skb);
+	/* get the timestamp for timeouts */
+	skb_cb->tmo = jiffies + msecs_to_jiffies(100);
+	skb_queue_tail(&cpts->txq, skb);
+	ptp_schedule_worker(cpts->ptp_clock, 0);
+}
+EXPORT_SYMBOL_GPL(am65_cpts_tx_timestamp);
+
+/**
+ * am65_cpts_prep_tx_timestamp - check and prepare tx packet for timestamping
+ * @cpts: cpts handle
+ * @skb: packet
+ *
+ * This functions should be called from .xmit().
+ * It checks if packet can be timestamped, fills internal cpts data
+ * in skb-cb and marks packet as SKBTX_IN_PROGRESS.
+ */
+void am65_cpts_prep_tx_timestamp(struct am65_cpts *cpts, struct sk_buff *skb)
+{
+	struct am65_cpts_skb_cb_data *skb_cb = (void *)skb->cb;
+	int ret;
+
+	if (!(skb_shinfo(skb)->tx_flags & SKBTX_HW_TSTAMP))
+		return;
+
+	ret = am65_skb_get_mtype_seqid(skb, &skb_cb->skb_mtype_seqid);
+	if (!ret)
+		return;
+	skb_cb->skb_mtype_seqid |= (AM65_CPTS_EV_TX <<
+				   AM65_CPTS_EVENT_1_EVENT_TYPE_SHIFT);
+
+	skb_shinfo(skb)->tx_flags |= SKBTX_IN_PROGRESS;
+}
+EXPORT_SYMBOL_GPL(am65_cpts_prep_tx_timestamp);
+
+int am65_cpts_phc_index(struct am65_cpts *cpts)
+{
+	return cpts->phc_index;
+}
+EXPORT_SYMBOL_GPL(am65_cpts_phc_index);
+
+static void cpts_free_clk_mux(void *data)
+{
+	struct am65_cpts *cpts = data;
+
+	of_clk_del_provider(cpts->clk_mux_np);
+	clk_hw_unregister_mux(cpts->clk_mux_hw);
+	of_node_put(cpts->clk_mux_np);
+}
+
+static int cpts_of_mux_clk_setup(struct am65_cpts *cpts,
+				 struct device_node *node)
+{
+	unsigned int num_parents;
+	const char **parent_names;
+	char *clk_mux_name;
+	void __iomem *reg;
+	int ret = -EINVAL;
+
+	cpts->clk_mux_np = of_get_child_by_name(node, "refclk-mux");
+	if (!cpts->clk_mux_np)
+		return 0;
+
+	num_parents = of_clk_get_parent_count(cpts->clk_mux_np);
+	if (num_parents < 1) {
+		dev_err(cpts->dev, "mux-clock %pOF must have parents\n",
+			cpts->clk_mux_np);
+		goto mux_fail;
+	}
+
+	parent_names = devm_kcalloc(cpts->dev, sizeof(char *), num_parents,
+				    GFP_KERNEL);
+	if (!parent_names) {
+		ret = -ENOMEM;
+		goto mux_fail;
+	}
+
+	of_clk_parent_fill(cpts->clk_mux_np, parent_names, num_parents);
+
+	clk_mux_name = devm_kasprintf(cpts->dev, GFP_KERNEL, "%s.%pOFn",
+				      dev_name(cpts->dev), cpts->clk_mux_np);
+	if (!clk_mux_name) {
+		ret = -ENOMEM;
+		goto mux_fail;
+	}
+
+	reg = &cpts->reg->rftclk_sel;
+	/* dev must be NULL to avoid recursive incrementing
+	 * of module refcnt
+	 */
+	cpts->clk_mux_hw = clk_hw_register_mux(NULL, clk_mux_name,
+					       parent_names, num_parents,
+					       0, reg, 0, 5, 0, NULL);
+	if (IS_ERR(cpts->clk_mux_hw)) {
+		ret = PTR_ERR(cpts->clk_mux_hw);
+		goto mux_fail;
+	}
+
+	ret = of_clk_add_hw_provider(cpts->clk_mux_np, of_clk_hw_simple_get,
+				     cpts->clk_mux_hw);
+	if (ret)
+		goto clk_hw_register;
+
+	ret = devm_add_action_or_reset(cpts->dev, cpts_free_clk_mux, cpts);
+	if (ret)
+		dev_err(cpts->dev, "failed to add clkmux reset action %d", ret);
+
+	return ret;
+
+clk_hw_register:
+	clk_hw_unregister_mux(cpts->clk_mux_hw);
+mux_fail:
+	of_node_put(cpts->clk_mux_np);
+	return ret;
+}
+
+static int am65_cpts_of_parse(struct am65_cpts *cpts, struct device_node *node)
+{
+	u32 prop[2];
+
+	if (!of_property_read_u32(node, "ti,cpts-ext-ts-inputs", &prop[0]))
+		cpts->ext_ts_inputs = prop[0];
+
+	if (!of_property_read_u32(node, "ti,cpts-periodic-outputs", &prop[0]))
+		cpts->genf_num = prop[0];
+
+	return cpts_of_mux_clk_setup(cpts, node);
+}
+
+static void am65_cpts_release(void *data)
+{
+	struct am65_cpts *cpts = data;
+
+	ptp_clock_unregister(cpts->ptp_clock);
+	am65_cpts_disable(cpts);
+	clk_disable_unprepare(cpts->refclk);
+}
+
+struct am65_cpts *am65_cpts_create(struct device *dev, void __iomem *regs,
+				   struct device_node *node)
+{
+	struct am65_cpts *cpts;
+	int ret, i;
+
+	cpts = devm_kzalloc(dev, sizeof(*cpts), GFP_KERNEL);
+	if (!cpts)
+		return ERR_PTR(-ENOMEM);
+
+	cpts->dev = dev;
+	cpts->reg = (struct am65_cpts_regs __iomem *)regs;
+
+	cpts->irq = of_irq_get_byname(node, "cpts");
+	if (cpts->irq <= 0) {
+		ret = cpts->irq ?: -ENXIO;
+		if (ret != -EPROBE_DEFER)
+			dev_err(dev, "Failed to get IRQ number (err = %d)\n",
+				ret);
+		return ERR_PTR(ret);
+	}
+
+	ret = am65_cpts_of_parse(cpts, node);
+	if (ret)
+		return ERR_PTR(ret);
+
+	mutex_init(&cpts->ptp_clk_lock);
+	INIT_LIST_HEAD(&cpts->events);
+	INIT_LIST_HEAD(&cpts->pool);
+	spin_lock_init(&cpts->lock);
+	skb_queue_head_init(&cpts->txq);
+
+	for (i = 0; i < AM65_CPTS_MAX_EVENTS; i++)
+		list_add(&cpts->pool_data[i].list, &cpts->pool);
+
+	cpts->refclk = devm_get_clk_from_child(dev, node, "cpts");
+	if (IS_ERR(cpts->refclk)) {
+		ret = PTR_ERR(cpts->refclk);
+		if (ret != -EPROBE_DEFER)
+			dev_err(dev, "Failed to get refclk %d\n", ret);
+		return ERR_PTR(ret);
+	}
+
+	ret = clk_prepare_enable(cpts->refclk);
+	if (ret) {
+		dev_err(dev, "Failed to enable refclk %d\n", ret);
+		return ERR_PTR(ret);
+	}
+
+	cpts->refclk_freq = clk_get_rate(cpts->refclk);
+
+	am65_ptp_info.max_adj = cpts->refclk_freq / AM65_CPTS_MIN_PPM;
+	cpts->ptp_info = am65_ptp_info;
+
+	if (cpts->ext_ts_inputs)
+		cpts->ptp_info.n_ext_ts = cpts->ext_ts_inputs;
+	if (cpts->genf_num)
+		cpts->ptp_info.n_per_out = cpts->genf_num;
+
+	am65_cpts_set_add_val(cpts);
+
+	am65_cpts_write32(cpts, AM65_CPTS_CONTROL_EN | AM65_CPTS_CONTROL_64MODE,
+			  control);
+	am65_cpts_write32(cpts, AM65_CPTS_INT_ENABLE_TS_PEND_EN, int_enable);
+
+	/* set time to the current system time */
+	am65_cpts_settime(cpts, ktime_to_ns(ktime_get_real()));
+
+	cpts->ptp_clock = ptp_clock_register(&cpts->ptp_info, cpts->dev);
+	if (IS_ERR_OR_NULL(cpts->ptp_clock)) {
+		dev_err(dev, "Failed to register ptp clk %ld\n",
+			PTR_ERR(cpts->ptp_clock));
+		if (!cpts->ptp_clock)
+			ret = -ENODEV;
+		goto refclk_disable;
+	}
+	cpts->phc_index = ptp_clock_index(cpts->ptp_clock);
+
+	ret = devm_add_action_or_reset(dev, am65_cpts_release, cpts);
+	if (ret) {
+		dev_err(dev, "failed to add ptpclk reset action %d", ret);
+		return ERR_PTR(ret);
+	}
+
+	ret = devm_request_threaded_irq(dev, cpts->irq, NULL,
+					am65_cpts_interrupt,
+					IRQF_ONESHOT, dev_name(dev), cpts);
+	if (ret < 0) {
+		dev_err(cpts->dev, "error attaching irq %d\n", ret);
+		return ERR_PTR(ret);
+	}
+
+	dev_info(dev, "CPTS ver 0x%08x, freq:%u, add_val:%u\n",
+		 am65_cpts_read32(cpts, idver),
+		 cpts->refclk_freq, cpts->ts_add_val);
+
+	return cpts;
+
+refclk_disable:
+	clk_disable_unprepare(cpts->refclk);
+	return ERR_PTR(ret);
+}
+EXPORT_SYMBOL_GPL(am65_cpts_create);
+
+static int am65_cpts_probe(struct platform_device *pdev)
+{
+	struct device_node *node = pdev->dev.of_node;
+	struct device *dev = &pdev->dev;
+	struct am65_cpts *cpts;
+	struct resource *res;
+	void __iomem *base;
+
+	res = platform_get_resource_byname(pdev, IORESOURCE_MEM, "cpts");
+	base = devm_ioremap_resource(dev, res);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	cpts = am65_cpts_create(dev, base, node);
+	if (IS_ERR(cpts))
+		return PTR_ERR(cpts);
+
+	return 0;
+}
+
+static const struct of_device_id am65_cpts_of_match[] = {
+	{ .compatible = "ti,am65-cpts", },
+	{ .compatible = "ti,j721e-cpts", },
+	{},
+};
+MODULE_DEVICE_TABLE(of, am65_cpts_of_match);
+
+static struct platform_driver am65_cpts_driver = {
+	.probe		= am65_cpts_probe,
+	.driver		= {
+		.name	= "am65-cpts",
+		.of_match_table = am65_cpts_of_match,
+	},
+};
+module_platform_driver(am65_cpts_driver);
+
+MODULE_LICENSE("GPL v2");
+MODULE_AUTHOR("Grygorii Strashko <grygorii.strashko@ti.com>");
+MODULE_DESCRIPTION("TI K3 AM65 CPTS driver");
diff --git a/drivers/net/ethernet/ti/am65-cpts.h b/drivers/net/ethernet/ti/am65-cpts.h
new file mode 100644
index 000000000000..0b55dc12ba48
--- /dev/null
+++ b/drivers/net/ethernet/ti/am65-cpts.h
@@ -0,0 +1,50 @@
+/* SPDX-License-Identifier: GPL-2.0+ */
+/* TI K3 AM65 CPTS driver interface
+ *
+ * Copyright (C) 2020 Texas Instruments Incorporated - http://www.ti.com
+ */
+
+#ifndef K3_CPTS_H_
+#define K3_CPTS_H_
+
+#include <linux/device.h>
+#include <linux/of.h>
+
+struct am65_cpts;
+
+#if IS_ENABLED(CONFIG_TI_K3_AM65_CPTS)
+struct am65_cpts *am65_cpts_create(struct device *dev, void __iomem *regs,
+				   struct device_node *node);
+int am65_cpts_phc_index(struct am65_cpts *cpts);
+void am65_cpts_tx_timestamp(struct am65_cpts *cpts, struct sk_buff *skb);
+void am65_cpts_prep_tx_timestamp(struct am65_cpts *cpts, struct sk_buff *skb);
+void am65_cpts_rx_enable(struct am65_cpts *cpts, bool en);
+#else
+static inline struct am65_cpts *am65_cpts_create(struct device *dev,
+						 void __iomem *regs,
+						 struct device_node *node)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
+static inline int am65_cpts_phc_index(struct am65_cpts *cpts)
+{
+	return -1;
+}
+
+static inline void am65_cpts_tx_timestamp(struct am65_cpts *cpts,
+					  struct sk_buff *skb)
+{
+}
+
+static inline void am65_cpts_prep_tx_timestamp(struct am65_cpts *cpts,
+					       struct sk_buff *skb)
+{
+}
+
+static inline void am65_cpts_rx_enable(struct am65_cpts *cpts, bool en)
+{
+}
+#endif
+
+#endif /* K3_CPTS_H_ */
-- 
cgit v1.2.3-59-g8ed1b


From b1f66a5bee07adaab218fb98800928185e5f0f18 Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Fri, 1 May 2020 23:50:07 +0300
Subject: net: ethernet: ti: am65-cpsw-nuss: enable packet timestamping support

The MCU CPSW Common Platform Time Sync (CPTS) provides possibility to
timestamp TX PTP packets and all RX packets.

This enables corresponding support in TI AM65x/J721E MCU CPSW driver.

Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/Kconfig             |   1 +
 drivers/net/ethernet/ti/am65-cpsw-ethtool.c |  24 +++-
 drivers/net/ethernet/ti/am65-cpsw-nuss.c    | 172 ++++++++++++++++++++++++++++
 drivers/net/ethernet/ti/am65-cpsw-nuss.h    |   6 +-
 4 files changed, 201 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig
index 2b7590cece39..69b64620a454 100644
--- a/drivers/net/ethernet/ti/Kconfig
+++ b/drivers/net/ethernet/ti/Kconfig
@@ -100,6 +100,7 @@ config TI_K3_AM65_CPSW_NUSS
 	depends on ARCH_K3 && OF && TI_K3_UDMA_GLUE_LAYER
 	select TI_DAVINCI_MDIO
 	imply PHY_TI_GMII_SEL
+	imply TI_AM65_CPTS
 	help
 	  This driver supports TI K3 AM654/J721E CPSW2G Ethernet SubSystem.
 	  The two-port Gigabit Ethernet MAC (MCU_CPSW0) subsystem provides
diff --git a/drivers/net/ethernet/ti/am65-cpsw-ethtool.c b/drivers/net/ethernet/ti/am65-cpsw-ethtool.c
index c3502aa15ea0..23661a6ed426 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-ethtool.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-ethtool.c
@@ -12,6 +12,7 @@
 
 #include "am65-cpsw-nuss.h"
 #include "cpsw_ale.h"
+#include "am65-cpts.h"
 
 #define AM65_CPSW_REGDUMP_VER 0x1
 
@@ -694,6 +695,27 @@ static void am65_cpsw_get_ethtool_stats(struct net_device *ndev,
 					hw_stats[i].offset);
 }
 
+static int am65_cpsw_get_ethtool_ts_info(struct net_device *ndev,
+					 struct ethtool_ts_info *info)
+{
+	struct am65_cpsw_common *common = am65_ndev_to_common(ndev);
+
+	if (!IS_ENABLED(CONFIG_TI_K3_AM65_CPTS))
+		return ethtool_op_get_ts_info(ndev, info);
+
+	info->so_timestamping =
+		SOF_TIMESTAMPING_TX_HARDWARE |
+		SOF_TIMESTAMPING_TX_SOFTWARE |
+		SOF_TIMESTAMPING_RX_HARDWARE |
+		SOF_TIMESTAMPING_RX_SOFTWARE |
+		SOF_TIMESTAMPING_SOFTWARE |
+		SOF_TIMESTAMPING_RAW_HARDWARE;
+	info->phc_index = am65_cpts_phc_index(common->cpts);
+	info->tx_types = BIT(HWTSTAMP_TX_OFF) | BIT(HWTSTAMP_TX_ON);
+	info->rx_filters = BIT(HWTSTAMP_FILTER_NONE) | BIT(HWTSTAMP_FILTER_ALL);
+	return 0;
+}
+
 static u32 am65_cpsw_get_ethtool_priv_flags(struct net_device *ndev)
 {
 	struct am65_cpsw_common *common = am65_ndev_to_common(ndev);
@@ -730,7 +752,7 @@ const struct ethtool_ops am65_cpsw_ethtool_ops_slave = {
 	.get_sset_count		= am65_cpsw_get_sset_count,
 	.get_strings		= am65_cpsw_get_strings,
 	.get_ethtool_stats	= am65_cpsw_get_ethtool_stats,
-	.get_ts_info		= ethtool_op_get_ts_info,
+	.get_ts_info		= am65_cpsw_get_ethtool_ts_info,
 	.get_priv_flags		= am65_cpsw_get_ethtool_priv_flags,
 	.set_priv_flags		= am65_cpsw_set_ethtool_priv_flags,
 
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
index 2bf56733ba94..bb391286d89e 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -30,6 +30,7 @@
 #include "cpsw_sl.h"
 #include "am65-cpsw-nuss.h"
 #include "k3-cppi-desc-pool.h"
+#include "am65-cpts.h"
 
 #define AM65_CPSW_SS_BASE	0x0
 #define AM65_CPSW_SGMII_BASE	0x100
@@ -668,6 +669,18 @@ static void am65_cpsw_nuss_rx_cleanup(void *data, dma_addr_t desc_dma)
 	dev_kfree_skb_any(skb);
 }
 
+static void am65_cpsw_nuss_rx_ts(struct sk_buff *skb, u32 *psdata)
+{
+	struct skb_shared_hwtstamps *ssh;
+	u64 ns;
+
+	ns = ((u64)psdata[1] << 32) | psdata[0];
+
+	ssh = skb_hwtstamps(skb);
+	memset(ssh, 0, sizeof(*ssh));
+	ssh->hwtstamp = ns_to_ktime(ns);
+}
+
 /* RX psdata[2] word format - checksum information */
 #define AM65_CPSW_RX_PSD_CSUM_ADD	GENMASK(15, 0)
 #define AM65_CPSW_RX_PSD_CSUM_ERR	BIT(16)
@@ -745,6 +758,9 @@ static int am65_cpsw_nuss_rx_packets(struct am65_cpsw_common *common,
 	skb->dev = ndev;
 
 	psdata = cppi5_hdesc_get_psdata(desc_rx);
+	/* add RX timestamp */
+	if (port->rx_ts_enabled)
+		am65_cpsw_nuss_rx_ts(skb, psdata);
 	csum_info = psdata[2];
 	dev_dbg(dev, "%s rx csum_info:%#x\n", __func__, csum_info);
 
@@ -904,6 +920,8 @@ static int am65_cpsw_nuss_tx_compl_packets(struct am65_cpsw_common *common,
 
 		ndev = skb->dev;
 
+		am65_cpts_tx_timestamp(common->cpts, skb);
+
 		ndev_priv = netdev_priv(ndev);
 		stats = this_cpu_ptr(ndev_priv->stats);
 		u64_stats_update_begin(&stats->syncp);
@@ -995,6 +1013,10 @@ static netdev_tx_t am65_cpsw_nuss_ndo_slave_xmit(struct sk_buff *skb,
 	/* padding enabled in hw */
 	pkt_len = skb_headlen(skb);
 
+	/* SKB TX timestamp */
+	if (port->tx_ts_enabled)
+		am65_cpts_prep_tx_timestamp(common->cpts, skb);
+
 	q_idx = skb_get_queue_mapping(skb);
 	dev_dbg(dev, "%s skb_queue:%d\n", __func__, q_idx);
 
@@ -1158,6 +1180,111 @@ static int am65_cpsw_nuss_ndo_slave_set_mac_address(struct net_device *ndev,
 	return 0;
 }
 
+static int am65_cpsw_nuss_hwtstamp_set(struct net_device *ndev,
+				       struct ifreq *ifr)
+{
+	struct am65_cpsw_common *common = am65_ndev_to_common(ndev);
+	struct am65_cpsw_port *port = am65_ndev_to_port(ndev);
+	u32 ts_ctrl, seq_id, ts_ctrl_ltype2, ts_vlan_ltype;
+	struct hwtstamp_config cfg;
+
+	if (!IS_ENABLED(CONFIG_TI_K3_AM65_CPTS))
+		return -EOPNOTSUPP;
+
+	if (copy_from_user(&cfg, ifr->ifr_data, sizeof(cfg)))
+		return -EFAULT;
+
+	/* TX HW timestamp */
+	switch (cfg.tx_type) {
+	case HWTSTAMP_TX_OFF:
+	case HWTSTAMP_TX_ON:
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	switch (cfg.rx_filter) {
+	case HWTSTAMP_FILTER_NONE:
+		port->rx_ts_enabled = false;
+		break;
+	case HWTSTAMP_FILTER_ALL:
+	case HWTSTAMP_FILTER_SOME:
+	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V1_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V1_L4_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L4_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L4_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_L2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_L2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_L2_DELAY_REQ:
+	case HWTSTAMP_FILTER_PTP_V2_EVENT:
+	case HWTSTAMP_FILTER_PTP_V2_SYNC:
+	case HWTSTAMP_FILTER_PTP_V2_DELAY_REQ:
+	case HWTSTAMP_FILTER_NTP_ALL:
+		port->rx_ts_enabled = true;
+		cfg.rx_filter = HWTSTAMP_FILTER_ALL;
+		break;
+	default:
+		return -ERANGE;
+	}
+
+	port->tx_ts_enabled = (cfg.tx_type == HWTSTAMP_TX_ON);
+
+	/* cfg TX timestamp */
+	seq_id = (AM65_CPSW_TS_SEQ_ID_OFFSET <<
+		  AM65_CPSW_PN_TS_SEQ_ID_OFFSET_SHIFT) | ETH_P_1588;
+
+	ts_vlan_ltype = ETH_P_8021Q;
+
+	ts_ctrl_ltype2 = ETH_P_1588 |
+			 AM65_CPSW_PN_TS_CTL_LTYPE2_TS_107 |
+			 AM65_CPSW_PN_TS_CTL_LTYPE2_TS_129 |
+			 AM65_CPSW_PN_TS_CTL_LTYPE2_TS_130 |
+			 AM65_CPSW_PN_TS_CTL_LTYPE2_TS_131 |
+			 AM65_CPSW_PN_TS_CTL_LTYPE2_TS_132 |
+			 AM65_CPSW_PN_TS_CTL_LTYPE2_TS_319 |
+			 AM65_CPSW_PN_TS_CTL_LTYPE2_TS_320 |
+			 AM65_CPSW_PN_TS_CTL_LTYPE2_TS_TTL_NONZERO;
+
+	ts_ctrl = AM65_CPSW_TS_EVENT_MSG_TYPE_BITS <<
+		  AM65_CPSW_PN_TS_CTL_MSG_TYPE_EN_SHIFT;
+
+	if (port->tx_ts_enabled)
+		ts_ctrl |= AM65_CPSW_TS_TX_ANX_ALL_EN |
+			   AM65_CPSW_PN_TS_CTL_TX_VLAN_LT1_EN;
+
+	writel(seq_id, port->port_base + AM65_CPSW_PORTN_REG_TS_SEQ_LTYPE_REG);
+	writel(ts_vlan_ltype, port->port_base +
+	       AM65_CPSW_PORTN_REG_TS_VLAN_LTYPE_REG);
+	writel(ts_ctrl_ltype2, port->port_base +
+	       AM65_CPSW_PORTN_REG_TS_CTL_LTYPE2);
+	writel(ts_ctrl, port->port_base + AM65_CPSW_PORTN_REG_TS_CTL);
+
+	/* en/dis RX timestamp */
+	am65_cpts_rx_enable(common->cpts, port->rx_ts_enabled);
+
+	return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0;
+}
+
+static int am65_cpsw_nuss_hwtstamp_get(struct net_device *ndev,
+				       struct ifreq *ifr)
+{
+	struct am65_cpsw_port *port = am65_ndev_to_port(ndev);
+	struct hwtstamp_config cfg;
+
+	if (!IS_ENABLED(CONFIG_TI_K3_AM65_CPTS))
+		return -EOPNOTSUPP;
+
+	cfg.flags = 0;
+	cfg.tx_type = port->tx_ts_enabled ?
+		      HWTSTAMP_TX_ON : HWTSTAMP_TX_OFF;
+	cfg.rx_filter = port->rx_ts_enabled ?
+			HWTSTAMP_FILTER_ALL : HWTSTAMP_FILTER_NONE;
+
+	return copy_to_user(ifr->ifr_data, &cfg, sizeof(cfg)) ? -EFAULT : 0;
+}
+
 static int am65_cpsw_nuss_ndo_slave_ioctl(struct net_device *ndev,
 					  struct ifreq *req, int cmd)
 {
@@ -1166,6 +1293,13 @@ static int am65_cpsw_nuss_ndo_slave_ioctl(struct net_device *ndev,
 	if (!netif_running(ndev))
 		return -EINVAL;
 
+	switch (cmd) {
+	case SIOCSHWTSTAMP:
+		return am65_cpsw_nuss_hwtstamp_set(ndev, req);
+	case SIOCGHWTSTAMP:
+		return am65_cpsw_nuss_hwtstamp_get(ndev, req);
+	}
+
 	if (!port->slave.phy)
 		return -EOPNOTSUPP;
 
@@ -1531,6 +1665,40 @@ static int am65_cpsw_am654_get_efuse_macid(struct device_node *of_node,
 	return 0;
 }
 
+static int am65_cpsw_init_cpts(struct am65_cpsw_common *common)
+{
+	struct device *dev = common->dev;
+	struct device_node *node;
+	struct am65_cpts *cpts;
+	void __iomem *reg_base;
+
+	if (!IS_ENABLED(CONFIG_TI_K3_AM65_CPTS))
+		return 0;
+
+	node = of_get_child_by_name(dev->of_node, "cpts");
+	if (!node) {
+		dev_err(dev, "%s cpts not found\n", __func__);
+		return -ENOENT;
+	}
+
+	reg_base = common->cpsw_base + AM65_CPSW_NU_CPTS_BASE;
+	cpts = am65_cpts_create(dev, reg_base, node);
+	if (IS_ERR(cpts)) {
+		int ret = PTR_ERR(cpts);
+
+		if (ret == -EOPNOTSUPP) {
+			dev_info(dev, "cpts disabled\n");
+			return 0;
+		}
+
+		dev_err(dev, "cpts create err %d\n", ret);
+		return ret;
+	}
+	common->cpts = cpts;
+
+	return 0;
+}
+
 static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common)
 {
 	struct device_node *node, *port_np;
@@ -1899,6 +2067,10 @@ static int am65_cpsw_nuss_probe(struct platform_device *pdev)
 		goto err_of_clear;
 	}
 
+	ret = am65_cpsw_init_cpts(common);
+	if (ret)
+		goto err_of_clear;
+
 	/* init ports */
 	for (i = 0; i < common->port_num; i++)
 		am65_cpsw_nuss_slave_disable_unused(&common->ports[i]);
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.h b/drivers/net/ethernet/ti/am65-cpsw-nuss.h
index 41ae5b4c7931..b1cddfd05a45 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.h
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.h
@@ -10,6 +10,8 @@
 #include <linux/module.h>
 #include <linux/netdevice.h>
 
+struct am65_cpts;
+
 #define HOST_PORT_NUM		0
 
 #define AM65_CPSW_MAX_TX_QUEUES	8
@@ -37,6 +39,8 @@ struct am65_cpsw_port {
 	void __iomem			*stat_base;
 	bool				disabled;
 	struct am65_cpsw_slave_data	slave;
+	bool				tx_ts_enabled;
+	bool				rx_ts_enabled;
 };
 
 struct am65_cpsw_host {
@@ -96,8 +100,8 @@ struct am65_cpsw_common {
 
 	u32			nuss_ver;
 	u32			cpsw_ver;
-
 	bool			pf_p0_rx_ptype_rrobin;
+	struct am65_cpts	*cpts;
 };
 
 struct am65_cpsw_ndev_stats {
-- 
cgit v1.2.3-59-g8ed1b


From 885a26bae0223cac7f939a4a549f2df6c7f89bbd Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Fri, 1 May 2020 23:50:08 +0300
Subject: arm64: dts: ti: k3-am65-mcu: add cpsw cpts node

Add DT node for the TI AM65x SoC Common Platform Time Sync (CPTS).

Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi b/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi
index 353d1e2532a7..0e773e0b3f89 100644
--- a/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi
@@ -247,5 +247,24 @@
 			clock-names = "fck";
 			bus_freq = <1000000>;
 		};
+
+		cpts {
+			clocks = <&mcu_cpsw_cpts_mux>;
+			clock-names = "cpts";
+			interrupts-extended = <&gic500 GIC_SPI 570 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "cpts";
+			ti,cpts-ext-ts-inputs = <4>;
+			ti,cpts-periodic-outputs = <2>;
+
+			mcu_cpsw_cpts_mux: refclk-mux {
+				#clock-cells = <0>;
+				clocks = <&k3_clks 118 5>, <&k3_clks 118 11>,
+					<&k3_clks 118 6>, <&k3_clks 118 3>,
+					<&k3_clks 118 8>, <&k3_clks 118 14>,
+					<&k3_clks 120 3>, <&k3_clks 121 3>;
+				assigned-clocks = <&mcu_cpsw_cpts_mux>;
+				assigned-clock-parents = <&k3_clks 118 5>;
+			};
+		};
 	};
 };
-- 
cgit v1.2.3-59-g8ed1b


From b3f7e95f03189dbc7351b6fcaf223a021fc349c9 Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Fri, 1 May 2020 23:50:09 +0300
Subject: arm64: dts: ti: k3-am65-main: add main navss cpts node

Add DT node for Main NAVSS CPTS module.

Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm64/boot/dts/ti/k3-am65-main.dtsi | 22 ++++++++++++++++++++++
 1 file changed, 22 insertions(+)

diff --git a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi
index 11887c72f23a..0d533d52fcda 100644
--- a/arch/arm64/boot/dts/ti/k3-am65-main.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am65-main.dtsi
@@ -570,6 +570,28 @@
 						<0x5>; /* RX_CHAN */
 			ti,sci-rm-range-rflow = <0x6>; /* GP RFLOW */
 		};
+
+		cpts@310d0000 {
+			compatible = "ti,am65-cpts";
+			reg = <0x0 0x310d0000 0x0 0x400>;
+			reg-names = "cpts";
+			clocks = <&main_cpts_mux>;
+			clock-names = "cpts";
+			interrupts-extended = <&intr_main_navss 163 0>;
+			interrupt-names = "cpts";
+			ti,cpts-periodic-outputs = <6>;
+			ti,cpts-ext-ts-inputs = <8>;
+
+			main_cpts_mux: refclk-mux {
+				#clock-cells = <0>;
+				clocks = <&k3_clks 118 5>, <&k3_clks 118 11>,
+					<&k3_clks 118 6>, <&k3_clks 118 3>,
+					<&k3_clks 118 8>, <&k3_clks 118 14>,
+					<&k3_clks 120 3>, <&k3_clks 121 3>;
+				assigned-clocks = <&main_cpts_mux>;
+				assigned-clock-parents = <&k3_clks 118 5>;
+			};
+		};
 	};
 
 	main_gpio0:  main_gpio0@600000 {
-- 
cgit v1.2.3-59-g8ed1b


From 29390928fe9a7cac7f8b1479f0f285034f16eb6f Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Fri, 1 May 2020 23:50:10 +0300
Subject: arm64: dts: ti: k3-j721e-mcu: add mcu cpsw cpts node

Add DT node for The TI J721E MCU CPSW CPTS which is part of MCU CPSW NUSS.

Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi
index 3d6064125b40..37c355e5a833 100644
--- a/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi
@@ -338,5 +338,14 @@
 			clock-names = "fck";
 			bus_freq = <1000000>;
 		};
+
+		cpts {
+			clocks = <&k3_clks 18 2>;
+			clock-names = "cpts";
+			interrupts-extended = <&gic500 GIC_SPI 858 IRQ_TYPE_LEVEL_HIGH>;
+			interrupt-names = "cpts";
+			ti,cpts-ext-ts-inputs = <4>;
+			ti,cpts-periodic-outputs = <2>;
+		};
 	};
 };
-- 
cgit v1.2.3-59-g8ed1b


From 461d6d058cff60a0fa82c47bf963646418a34d62 Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Fri, 1 May 2020 23:50:11 +0300
Subject: arm64: dts: ti: j721e-main: add main navss cpts node

Add DT node for Main NAVSS CPTS module.

Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm64/boot/dts/ti/k3-j721e-main.dtsi | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi b/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi
index 0b9d14b838a1..844a5b50cf09 100644
--- a/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-j721e-main.dtsi
@@ -254,6 +254,18 @@
 						<0x0c>; /* RX_UHCHAN */
 			ti,sci-rm-range-rflow = <0x00>; /* GP RFLOW */
 		};
+
+		cpts@310d0000 {
+			compatible = "ti,j721e-cpts";
+			reg = <0x0 0x310d0000 0x0 0x400>;
+			reg-names = "cpts";
+			clocks = <&k3_clks 201 1>;
+			clock-names = "cpts";
+			interrupts-extended = <&main_navss_intr 201 0>;
+			interrupt-names = "cpts";
+			ti,cpts-periodic-outputs = <6>;
+			ti,cpts-ext-ts-inputs = <8>;
+		};
 	};
 
 	main_pmx0: pinmux@11c000 {
-- 
cgit v1.2.3-59-g8ed1b


From e7511f560f5499c664c1ba9181c76044e2af578d Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Sat, 2 May 2020 22:22:20 -0700
Subject: bonding: remove useless stats_lock_key

After commit b3e80d44f5b1
("bonding: fix lockdep warning in bond_get_stats()") the dynamic
key is no longer necessary, as we compute nest level at run-time.
So, we can just remove it to save some lockdep key entries.

Test commands:
 ip link add bond0 type bond
 ip link add bond1 type bond
 ip link set bond0 master bond1
 ip link set bond0 nomaster
 ip link set bond1 master bond0

Reported-and-tested-by: syzbot+aaa6fa4949cc5d9b7b25@syzkaller.appspotmail.com
Cc: Dmitry Vyukov <dvyukov@google.com>
Acked-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c | 3 ---
 include/net/bonding.h           | 1 -
 2 files changed, 4 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index d01871321d22..baa93191dfdd 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4491,7 +4491,6 @@ static void bond_uninit(struct net_device *bond_dev)
 
 	list_del(&bond->bond_list);
 
-	lockdep_unregister_key(&bond->stats_lock_key);
 	bond_debug_unregister(bond);
 }
 
@@ -4896,8 +4895,6 @@ static int bond_init(struct net_device *bond_dev)
 		return -ENOMEM;
 
 	spin_lock_init(&bond->stats_lock);
-	lockdep_register_key(&bond->stats_lock_key);
-	lockdep_set_class(&bond->stats_lock, &bond->stats_lock_key);
 	netdev_lockdep_set_classes(bond_dev);
 
 	list_add_tail(&bond->bond_list, &bn->dev_list);
diff --git a/include/net/bonding.h b/include/net/bonding.h
index dc2ce31a1f52..0b696da5c115 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -237,7 +237,6 @@ struct bonding {
 	struct	 dentry *debug_dir;
 #endif /* CONFIG_DEBUG_FS */
 	struct rtnl_link_stats64 bond_stats;
-	struct lock_class_key stats_lock_key;
 };
 
 #define bond_slave_get_rcu(dev) \
-- 
cgit v1.2.3-59-g8ed1b


From 1a33e10e4a95cb109ff1145098175df3113313ef Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Sat, 2 May 2020 22:22:19 -0700
Subject: net: partially revert dynamic lockdep key changes

This patch reverts the folowing commits:

commit 064ff66e2bef84f1153087612032b5b9eab005bd
"bonding: add missing netdev_update_lockdep_key()"

commit 53d374979ef147ab51f5d632dfe20b14aebeccd0
"net: avoid updating qdisc_xmit_lock_key in netdev_update_lockdep_key()"

commit 1f26c0d3d24125992ab0026b0dab16c08df947c7
"net: fix kernel-doc warning in <linux/netdevice.h>"

commit ab92d68fc22f9afab480153bd82a20f6e2533769
"net: core: add generic lockdep keys"

but keeps the addr_list_lock_key because we still lock
addr_list_lock nestedly on stack devices, unlikely xmit_lock
this is safe because we don't take addr_list_lock on any fast
path.

Reported-and-tested-by: syzbot+aaa6fa4949cc5d9b7b25@syzkaller.appspotmail.com
Cc: Dmitry Vyukov <dvyukov@google.com>
Cc: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_main.c                   |  1 +
 drivers/net/ethernet/netronome/nfp/nfp_net_repr.c | 16 ++++
 drivers/net/hamradio/bpqether.c                   | 20 +++++
 drivers/net/hyperv/netvsc_drv.c                   |  2 +
 drivers/net/ipvlan/ipvlan_main.c                  |  2 +
 drivers/net/macsec.c                              |  2 +
 drivers/net/macvlan.c                             |  2 +
 drivers/net/ppp/ppp_generic.c                     |  2 +
 drivers/net/team/team.c                           |  1 +
 drivers/net/vrf.c                                 |  1 +
 drivers/net/wireless/intersil/hostap/hostap_hw.c  | 22 ++++++
 include/linux/netdevice.h                         | 27 +++++--
 net/8021q/vlan_dev.c                              | 21 ++++++
 net/batman-adv/soft-interface.c                   | 30 ++++++++
 net/bluetooth/6lowpan.c                           |  8 ++
 net/core/dev.c                                    | 90 ++++++++++++++++++-----
 net/dsa/slave.c                                   | 12 +++
 net/ieee802154/6lowpan/core.c                     |  8 ++
 net/l2tp/l2tp_eth.c                               |  1 +
 net/netrom/af_netrom.c                            | 21 ++++++
 net/rose/af_rose.c                                | 21 ++++++
 net/sched/sch_generic.c                           | 17 +++--
 22 files changed, 294 insertions(+), 33 deletions(-)

diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index 2e70e43c5df5..d01871321d22 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -4898,6 +4898,7 @@ static int bond_init(struct net_device *bond_dev)
 	spin_lock_init(&bond->stats_lock);
 	lockdep_register_key(&bond->stats_lock_key);
 	lockdep_set_class(&bond->stats_lock, &bond->stats_lock_key);
+	netdev_lockdep_set_classes(bond_dev);
 
 	list_add_tail(&bond->bond_list, &bn->dev_list);
 
diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
index 79d72c88bbef..b3cabc274121 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_repr.c
@@ -299,6 +299,20 @@ static void nfp_repr_clean(struct nfp_repr *repr)
 	nfp_port_free(repr->port);
 }
 
+static struct lock_class_key nfp_repr_netdev_xmit_lock_key;
+
+static void nfp_repr_set_lockdep_class_one(struct net_device *dev,
+					   struct netdev_queue *txq,
+					   void *_unused)
+{
+	lockdep_set_class(&txq->_xmit_lock, &nfp_repr_netdev_xmit_lock_key);
+}
+
+static void nfp_repr_set_lockdep_class(struct net_device *dev)
+{
+	netdev_for_each_tx_queue(dev, nfp_repr_set_lockdep_class_one, NULL);
+}
+
 int nfp_repr_init(struct nfp_app *app, struct net_device *netdev,
 		  u32 cmsg_port_id, struct nfp_port *port,
 		  struct net_device *pf_netdev)
@@ -308,6 +322,8 @@ int nfp_repr_init(struct nfp_app *app, struct net_device *netdev,
 	u32 repr_cap = nn->tlv_caps.repr_cap;
 	int err;
 
+	nfp_repr_set_lockdep_class(netdev);
+
 	repr->port = port;
 	repr->dst = metadata_dst_alloc(0, METADATA_HW_PORT_MUX, GFP_KERNEL);
 	if (!repr->dst)
diff --git a/drivers/net/hamradio/bpqether.c b/drivers/net/hamradio/bpqether.c
index fbea6f232819..206688154fdf 100644
--- a/drivers/net/hamradio/bpqether.c
+++ b/drivers/net/hamradio/bpqether.c
@@ -107,6 +107,25 @@ struct bpqdev {
 
 static LIST_HEAD(bpq_devices);
 
+/*
+ * bpqether network devices are paired with ethernet devices below them, so
+ * form a special "super class" of normal ethernet devices; split their locks
+ * off into a separate class since they always nest.
+ */
+static struct lock_class_key bpq_netdev_xmit_lock_key;
+
+static void bpq_set_lockdep_class_one(struct net_device *dev,
+				      struct netdev_queue *txq,
+				      void *_unused)
+{
+	lockdep_set_class(&txq->_xmit_lock, &bpq_netdev_xmit_lock_key);
+}
+
+static void bpq_set_lockdep_class(struct net_device *dev)
+{
+	netdev_for_each_tx_queue(dev, bpq_set_lockdep_class_one, NULL);
+}
+
 /* ------------------------------------------------------------------------ */
 
 
@@ -477,6 +496,7 @@ static int bpq_new_device(struct net_device *edev)
 	err = register_netdevice(ndev);
 	if (err)
 		goto error;
+	bpq_set_lockdep_class(ndev);
 
 	/* List protected by RTNL */
 	list_add_rcu(&bpq->bpq_list, &bpq_devices);
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index d8e86bdbfba1..c0b647a4c893 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -2456,6 +2456,8 @@ static int netvsc_probe(struct hv_device *dev,
 		NETIF_F_HW_VLAN_CTAG_RX;
 	net->vlan_features = net->features;
 
+	netdev_lockdep_set_classes(net);
+
 	/* MTU range: 68 - 1500 or 65521 */
 	net->min_mtu = NETVSC_MTU_MIN;
 	if (nvdev->nvsp_version >= NVSP_PROTOCOL_VERSION_2)
diff --git a/drivers/net/ipvlan/ipvlan_main.c b/drivers/net/ipvlan/ipvlan_main.c
index f195f278a83a..15e87c097b0b 100644
--- a/drivers/net/ipvlan/ipvlan_main.c
+++ b/drivers/net/ipvlan/ipvlan_main.c
@@ -131,6 +131,8 @@ static int ipvlan_init(struct net_device *dev)
 	dev->gso_max_segs = phy_dev->gso_max_segs;
 	dev->hard_header_len = phy_dev->hard_header_len;
 
+	netdev_lockdep_set_classes(dev);
+
 	ipvlan->pcpu_stats = netdev_alloc_pcpu_stats(struct ipvl_pcpu_stats);
 	if (!ipvlan->pcpu_stats)
 		return -ENOMEM;
diff --git a/drivers/net/macsec.c b/drivers/net/macsec.c
index 758baf7cb8a1..ea3f25cc79ef 100644
--- a/drivers/net/macsec.c
+++ b/drivers/net/macsec.c
@@ -4047,6 +4047,8 @@ static int macsec_newlink(struct net *net, struct net_device *dev,
 	if (err < 0)
 		return err;
 
+	netdev_lockdep_set_classes(dev);
+
 	err = netdev_upper_dev_link(real_dev, dev, extack);
 	if (err < 0)
 		goto unregister;
diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index d45600e0a38c..34eb073cdd74 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -890,6 +890,8 @@ static int macvlan_init(struct net_device *dev)
 	dev->gso_max_segs	= lowerdev->gso_max_segs;
 	dev->hard_header_len	= lowerdev->hard_header_len;
 
+	netdev_lockdep_set_classes(dev);
+
 	vlan->pcpu_stats = netdev_alloc_pcpu_stats(struct vlan_pcpu_stats);
 	if (!vlan->pcpu_stats)
 		return -ENOMEM;
diff --git a/drivers/net/ppp/ppp_generic.c b/drivers/net/ppp/ppp_generic.c
index 22cc2cb9d878..7d005896a0f9 100644
--- a/drivers/net/ppp/ppp_generic.c
+++ b/drivers/net/ppp/ppp_generic.c
@@ -1410,6 +1410,8 @@ static int ppp_dev_init(struct net_device *dev)
 {
 	struct ppp *ppp;
 
+	netdev_lockdep_set_classes(dev);
+
 	ppp = netdev_priv(dev);
 	/* Let the netdevice take a reference on the ppp file. This ensures
 	 * that ppp_destroy_interface() won't run before the device gets
diff --git a/drivers/net/team/team.c b/drivers/net/team/team.c
index 04845a4017f9..8c1e02752ff6 100644
--- a/drivers/net/team/team.c
+++ b/drivers/net/team/team.c
@@ -1647,6 +1647,7 @@ static int team_init(struct net_device *dev)
 
 	lockdep_register_key(&team->team_lock_key);
 	__mutex_init(&team->lock, "team->team_lock_key", &team->team_lock_key);
+	netdev_lockdep_set_classes(dev);
 
 	return 0;
 
diff --git a/drivers/net/vrf.c b/drivers/net/vrf.c
index 56f8aab46f89..43928a1c2f2a 100644
--- a/drivers/net/vrf.c
+++ b/drivers/net/vrf.c
@@ -867,6 +867,7 @@ static int vrf_dev_init(struct net_device *dev)
 
 	/* similarly, oper state is irrelevant; set to up to avoid confusion */
 	dev->operstate = IF_OPER_UP;
+	netdev_lockdep_set_classes(dev);
 	return 0;
 
 out_rth:
diff --git a/drivers/net/wireless/intersil/hostap/hostap_hw.c b/drivers/net/wireless/intersil/hostap/hostap_hw.c
index 58212c532c90..aadf3dec5bf3 100644
--- a/drivers/net/wireless/intersil/hostap/hostap_hw.c
+++ b/drivers/net/wireless/intersil/hostap/hostap_hw.c
@@ -3041,6 +3041,27 @@ static void prism2_clear_set_tim_queue(local_info_t *local)
 	}
 }
 
+
+/*
+ * HostAP uses two layers of net devices, where the inner
+ * layer gets called all the time from the outer layer.
+ * This is a natural nesting, which needs a split lock type.
+ */
+static struct lock_class_key hostap_netdev_xmit_lock_key;
+
+static void prism2_set_lockdep_class_one(struct net_device *dev,
+					 struct netdev_queue *txq,
+					 void *_unused)
+{
+	lockdep_set_class(&txq->_xmit_lock,
+			  &hostap_netdev_xmit_lock_key);
+}
+
+static void prism2_set_lockdep_class(struct net_device *dev)
+{
+	netdev_for_each_tx_queue(dev, prism2_set_lockdep_class_one, NULL);
+}
+
 static struct net_device *
 prism2_init_local_data(struct prism2_helper_functions *funcs, int card_idx,
 		       struct device *sdev)
@@ -3199,6 +3220,7 @@ while (0)
 	if (ret >= 0)
 		ret = register_netdevice(dev);
 
+	prism2_set_lockdep_class(dev);
 	rtnl_unlock();
 	if (ret < 0) {
 		printk(KERN_WARNING "%s: register netdevice failed!\n",
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 5a8d40f1ffe2..7725efd6e48a 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -1805,13 +1805,11 @@ enum netdev_priv_flags {
  *	@phydev:	Physical device may attach itself
  *			for hardware timestamping
  *	@sfp_bus:	attached &struct sfp_bus structure.
- *	@qdisc_tx_busylock_key: lockdep class annotating Qdisc->busylock
- *				spinlock
- *	@qdisc_running_key:	lockdep class annotating Qdisc->running seqcount
- *	@qdisc_xmit_lock_key:	lockdep class annotating
- *				netdev_queue->_xmit_lock spinlock
+ *
  *	@addr_list_lock_key:	lockdep class annotating
  *				net_device->addr_list_lock spinlock
+ *	@qdisc_tx_busylock: lockdep class annotating Qdisc->busylock spinlock
+ *	@qdisc_running_key: lockdep class annotating Qdisc->running seqcount
  *
  *	@proto_down:	protocol port state information can be sent to the
  *			switch driver and used to set the phys state of the
@@ -2112,10 +2110,9 @@ struct net_device {
 #endif
 	struct phy_device	*phydev;
 	struct sfp_bus		*sfp_bus;
-	struct lock_class_key	qdisc_tx_busylock_key;
-	struct lock_class_key	qdisc_running_key;
-	struct lock_class_key	qdisc_xmit_lock_key;
 	struct lock_class_key	addr_list_lock_key;
+	struct lock_class_key	*qdisc_tx_busylock;
+	struct lock_class_key	*qdisc_running_key;
 	bool			proto_down;
 	unsigned		wol_enabled:1;
 
@@ -2200,6 +2197,20 @@ static inline void netdev_for_each_tx_queue(struct net_device *dev,
 		f(dev, &dev->_tx[i], arg);
 }
 
+#define netdev_lockdep_set_classes(dev)				\
+{								\
+	static struct lock_class_key qdisc_tx_busylock_key;	\
+	static struct lock_class_key qdisc_running_key;		\
+	static struct lock_class_key qdisc_xmit_lock_key;	\
+	unsigned int i;						\
+								\
+	(dev)->qdisc_tx_busylock = &qdisc_tx_busylock_key;	\
+	(dev)->qdisc_running_key = &qdisc_running_key;		\
+	for (i = 0; i < (dev)->num_tx_queues; i++)		\
+		lockdep_set_class(&(dev)->_tx[i]._xmit_lock,	\
+				  &qdisc_xmit_lock_key);	\
+}
+
 u16 netdev_pick_tx(struct net_device *dev, struct sk_buff *skb,
 		     struct net_device *sb_dev);
 struct netdev_queue *netdev_core_pick_tx(struct net_device *dev,
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 990b9fde28c6..319220b2341d 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -489,6 +489,25 @@ static void vlan_dev_set_rx_mode(struct net_device *vlan_dev)
 	dev_uc_sync(vlan_dev_priv(vlan_dev)->real_dev, vlan_dev);
 }
 
+/*
+ * vlan network devices have devices nesting below it, and are a special
+ * "super class" of normal network devices; split their locks off into a
+ * separate class since they always nest.
+ */
+static struct lock_class_key vlan_netdev_xmit_lock_key;
+
+static void vlan_dev_set_lockdep_one(struct net_device *dev,
+				     struct netdev_queue *txq,
+				     void *unused)
+{
+	lockdep_set_class(&txq->_xmit_lock, &vlan_netdev_xmit_lock_key);
+}
+
+static void vlan_dev_set_lockdep_class(struct net_device *dev)
+{
+	netdev_for_each_tx_queue(dev, vlan_dev_set_lockdep_one, NULL);
+}
+
 static const struct header_ops vlan_header_ops = {
 	.create	 = vlan_dev_hard_header,
 	.parse	 = eth_header_parse,
@@ -579,6 +598,8 @@ static int vlan_dev_init(struct net_device *dev)
 
 	SET_NETDEV_DEVTYPE(dev, &vlan_type);
 
+	vlan_dev_set_lockdep_class(dev);
+
 	vlan->vlan_pcpu_stats = netdev_alloc_pcpu_stats(struct vlan_pcpu_stats);
 	if (!vlan->vlan_pcpu_stats)
 		return -ENOMEM;
diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 5f05a728f347..822af540b854 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -739,6 +739,34 @@ static int batadv_interface_kill_vid(struct net_device *dev, __be16 proto,
 	return 0;
 }
 
+/* batman-adv network devices have devices nesting below it and are a special
+ * "super class" of normal network devices; split their locks off into a
+ * separate class since they always nest.
+ */
+static struct lock_class_key batadv_netdev_xmit_lock_key;
+
+/**
+ * batadv_set_lockdep_class_one() - Set lockdep class for a single tx queue
+ * @dev: device which owns the tx queue
+ * @txq: tx queue to modify
+ * @_unused: always NULL
+ */
+static void batadv_set_lockdep_class_one(struct net_device *dev,
+					 struct netdev_queue *txq,
+					 void *_unused)
+{
+	lockdep_set_class(&txq->_xmit_lock, &batadv_netdev_xmit_lock_key);
+}
+
+/**
+ * batadv_set_lockdep_class() - Set txq and addr_list lockdep class
+ * @dev: network device to modify
+ */
+static void batadv_set_lockdep_class(struct net_device *dev)
+{
+	netdev_for_each_tx_queue(dev, batadv_set_lockdep_class_one, NULL);
+}
+
 /**
  * batadv_softif_init_late() - late stage initialization of soft interface
  * @dev: registered network device to modify
@@ -752,6 +780,8 @@ static int batadv_softif_init_late(struct net_device *dev)
 	int ret;
 	size_t cnt_len = sizeof(u64) * BATADV_CNT_NUM;
 
+	batadv_set_lockdep_class(dev);
+
 	bat_priv = netdev_priv(dev);
 	bat_priv->soft_iface = dev;
 
diff --git a/net/bluetooth/6lowpan.c b/net/bluetooth/6lowpan.c
index 4febc82a7c76..bb55d92691b0 100644
--- a/net/bluetooth/6lowpan.c
+++ b/net/bluetooth/6lowpan.c
@@ -571,7 +571,15 @@ static netdev_tx_t bt_xmit(struct sk_buff *skb, struct net_device *netdev)
 	return err < 0 ? NET_XMIT_DROP : err;
 }
 
+static int bt_dev_init(struct net_device *dev)
+{
+	netdev_lockdep_set_classes(dev);
+
+	return 0;
+}
+
 static const struct net_device_ops netdev_ops = {
+	.ndo_init		= bt_dev_init,
 	.ndo_start_xmit		= bt_xmit,
 };
 
diff --git a/net/core/dev.c b/net/core/dev.c
index afff16849c26..f8d83922a6af 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -398,6 +398,74 @@ static RAW_NOTIFIER_HEAD(netdev_chain);
 DEFINE_PER_CPU_ALIGNED(struct softnet_data, softnet_data);
 EXPORT_PER_CPU_SYMBOL(softnet_data);
 
+#ifdef CONFIG_LOCKDEP
+/*
+ * register_netdevice() inits txq->_xmit_lock and sets lockdep class
+ * according to dev->type
+ */
+static const unsigned short netdev_lock_type[] = {
+	 ARPHRD_NETROM, ARPHRD_ETHER, ARPHRD_EETHER, ARPHRD_AX25,
+	 ARPHRD_PRONET, ARPHRD_CHAOS, ARPHRD_IEEE802, ARPHRD_ARCNET,
+	 ARPHRD_APPLETLK, ARPHRD_DLCI, ARPHRD_ATM, ARPHRD_METRICOM,
+	 ARPHRD_IEEE1394, ARPHRD_EUI64, ARPHRD_INFINIBAND, ARPHRD_SLIP,
+	 ARPHRD_CSLIP, ARPHRD_SLIP6, ARPHRD_CSLIP6, ARPHRD_RSRVD,
+	 ARPHRD_ADAPT, ARPHRD_ROSE, ARPHRD_X25, ARPHRD_HWX25,
+	 ARPHRD_PPP, ARPHRD_CISCO, ARPHRD_LAPB, ARPHRD_DDCMP,
+	 ARPHRD_RAWHDLC, ARPHRD_TUNNEL, ARPHRD_TUNNEL6, ARPHRD_FRAD,
+	 ARPHRD_SKIP, ARPHRD_LOOPBACK, ARPHRD_LOCALTLK, ARPHRD_FDDI,
+	 ARPHRD_BIF, ARPHRD_SIT, ARPHRD_IPDDP, ARPHRD_IPGRE,
+	 ARPHRD_PIMREG, ARPHRD_HIPPI, ARPHRD_ASH, ARPHRD_ECONET,
+	 ARPHRD_IRDA, ARPHRD_FCPP, ARPHRD_FCAL, ARPHRD_FCPL,
+	 ARPHRD_FCFABRIC, ARPHRD_IEEE80211, ARPHRD_IEEE80211_PRISM,
+	 ARPHRD_IEEE80211_RADIOTAP, ARPHRD_PHONET, ARPHRD_PHONET_PIPE,
+	 ARPHRD_IEEE802154, ARPHRD_VOID, ARPHRD_NONE};
+
+static const char *const netdev_lock_name[] = {
+	"_xmit_NETROM", "_xmit_ETHER", "_xmit_EETHER", "_xmit_AX25",
+	"_xmit_PRONET", "_xmit_CHAOS", "_xmit_IEEE802", "_xmit_ARCNET",
+	"_xmit_APPLETLK", "_xmit_DLCI", "_xmit_ATM", "_xmit_METRICOM",
+	"_xmit_IEEE1394", "_xmit_EUI64", "_xmit_INFINIBAND", "_xmit_SLIP",
+	"_xmit_CSLIP", "_xmit_SLIP6", "_xmit_CSLIP6", "_xmit_RSRVD",
+	"_xmit_ADAPT", "_xmit_ROSE", "_xmit_X25", "_xmit_HWX25",
+	"_xmit_PPP", "_xmit_CISCO", "_xmit_LAPB", "_xmit_DDCMP",
+	"_xmit_RAWHDLC", "_xmit_TUNNEL", "_xmit_TUNNEL6", "_xmit_FRAD",
+	"_xmit_SKIP", "_xmit_LOOPBACK", "_xmit_LOCALTLK", "_xmit_FDDI",
+	"_xmit_BIF", "_xmit_SIT", "_xmit_IPDDP", "_xmit_IPGRE",
+	"_xmit_PIMREG", "_xmit_HIPPI", "_xmit_ASH", "_xmit_ECONET",
+	"_xmit_IRDA", "_xmit_FCPP", "_xmit_FCAL", "_xmit_FCPL",
+	"_xmit_FCFABRIC", "_xmit_IEEE80211", "_xmit_IEEE80211_PRISM",
+	"_xmit_IEEE80211_RADIOTAP", "_xmit_PHONET", "_xmit_PHONET_PIPE",
+	"_xmit_IEEE802154", "_xmit_VOID", "_xmit_NONE"};
+
+static struct lock_class_key netdev_xmit_lock_key[ARRAY_SIZE(netdev_lock_type)];
+
+static inline unsigned short netdev_lock_pos(unsigned short dev_type)
+{
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(netdev_lock_type); i++)
+		if (netdev_lock_type[i] == dev_type)
+			return i;
+	/* the last key is used by default */
+	return ARRAY_SIZE(netdev_lock_type) - 1;
+}
+
+static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
+						 unsigned short dev_type)
+{
+	int i;
+
+	i = netdev_lock_pos(dev_type);
+	lockdep_set_class_and_name(lock, &netdev_xmit_lock_key[i],
+				   netdev_lock_name[i]);
+}
+#else
+static inline void netdev_set_xmit_lockdep_class(spinlock_t *lock,
+						 unsigned short dev_type)
+{
+}
+#endif
+
 /*******************************************************************************
  *
  *		Protocol management and registration routines
@@ -9208,7 +9276,7 @@ static void netdev_init_one_queue(struct net_device *dev,
 {
 	/* Initialize queue lock */
 	spin_lock_init(&queue->_xmit_lock);
-	lockdep_set_class(&queue->_xmit_lock, &dev->qdisc_xmit_lock_key);
+	netdev_set_xmit_lockdep_class(&queue->_xmit_lock, dev->type);
 	queue->xmit_lock_owner = -1;
 	netdev_queue_numa_node_write(queue, NUMA_NO_NODE);
 	queue->dev = dev;
@@ -9255,22 +9323,6 @@ void netif_tx_stop_all_queues(struct net_device *dev)
 }
 EXPORT_SYMBOL(netif_tx_stop_all_queues);
 
-static void netdev_register_lockdep_key(struct net_device *dev)
-{
-	lockdep_register_key(&dev->qdisc_tx_busylock_key);
-	lockdep_register_key(&dev->qdisc_running_key);
-	lockdep_register_key(&dev->qdisc_xmit_lock_key);
-	lockdep_register_key(&dev->addr_list_lock_key);
-}
-
-static void netdev_unregister_lockdep_key(struct net_device *dev)
-{
-	lockdep_unregister_key(&dev->qdisc_tx_busylock_key);
-	lockdep_unregister_key(&dev->qdisc_running_key);
-	lockdep_unregister_key(&dev->qdisc_xmit_lock_key);
-	lockdep_unregister_key(&dev->addr_list_lock_key);
-}
-
 void netdev_update_lockdep_key(struct net_device *dev)
 {
 	lockdep_unregister_key(&dev->addr_list_lock_key);
@@ -9837,7 +9889,7 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 
 	dev_net_set(dev, &init_net);
 
-	netdev_register_lockdep_key(dev);
+	lockdep_register_key(&dev->addr_list_lock_key);
 
 	dev->gso_max_size = GSO_MAX_SIZE;
 	dev->gso_max_segs = GSO_MAX_SEGS;
@@ -9926,7 +9978,7 @@ void free_netdev(struct net_device *dev)
 	free_percpu(dev->xdp_bulkq);
 	dev->xdp_bulkq = NULL;
 
-	netdev_unregister_lockdep_key(dev);
+	lockdep_unregister_key(&dev->addr_list_lock_key);
 
 	/*  Compatibility with error handling in drivers */
 	if (dev->reg_state == NETREG_UNINITIALIZED) {
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index ba8bf90dc0cc..fa2634043751 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -1671,6 +1671,15 @@ static int dsa_slave_phy_setup(struct net_device *slave_dev)
 	return ret;
 }
 
+static struct lock_class_key dsa_slave_netdev_xmit_lock_key;
+static void dsa_slave_set_lockdep_class_one(struct net_device *dev,
+					    struct netdev_queue *txq,
+					    void *_unused)
+{
+	lockdep_set_class(&txq->_xmit_lock,
+			  &dsa_slave_netdev_xmit_lock_key);
+}
+
 int dsa_slave_suspend(struct net_device *slave_dev)
 {
 	struct dsa_port *dp = dsa_slave_to_port(slave_dev);
@@ -1754,6 +1763,9 @@ int dsa_slave_create(struct dsa_port *port)
 		slave_dev->max_mtu = ETH_MAX_MTU;
 	SET_NETDEV_DEVTYPE(slave_dev, &dsa_type);
 
+	netdev_for_each_tx_queue(slave_dev, dsa_slave_set_lockdep_class_one,
+				 NULL);
+
 	SET_NETDEV_DEV(slave_dev, port->ds->dev);
 	slave_dev->dev.of_node = port->dn;
 	slave_dev->vlan_features = master->vlan_features;
diff --git a/net/ieee802154/6lowpan/core.c b/net/ieee802154/6lowpan/core.c
index c0b107cdd715..3297e7fa9945 100644
--- a/net/ieee802154/6lowpan/core.c
+++ b/net/ieee802154/6lowpan/core.c
@@ -58,6 +58,13 @@ static const struct header_ops lowpan_header_ops = {
 	.create	= lowpan_header_create,
 };
 
+static int lowpan_dev_init(struct net_device *ldev)
+{
+	netdev_lockdep_set_classes(ldev);
+
+	return 0;
+}
+
 static int lowpan_open(struct net_device *dev)
 {
 	if (!open_count)
@@ -89,6 +96,7 @@ static int lowpan_get_iflink(const struct net_device *dev)
 }
 
 static const struct net_device_ops lowpan_netdev_ops = {
+	.ndo_init		= lowpan_dev_init,
 	.ndo_start_xmit		= lowpan_xmit,
 	.ndo_open		= lowpan_open,
 	.ndo_stop		= lowpan_stop,
diff --git a/net/l2tp/l2tp_eth.c b/net/l2tp/l2tp_eth.c
index d3b520b9b2c9..fd5ac2788e45 100644
--- a/net/l2tp/l2tp_eth.c
+++ b/net/l2tp/l2tp_eth.c
@@ -56,6 +56,7 @@ static int l2tp_eth_dev_init(struct net_device *dev)
 {
 	eth_hw_addr_random(dev);
 	eth_broadcast_addr(dev->broadcast);
+	netdev_lockdep_set_classes(dev);
 
 	return 0;
 }
diff --git a/net/netrom/af_netrom.c b/net/netrom/af_netrom.c
index 7b1a74f74aad..eccc7d366e17 100644
--- a/net/netrom/af_netrom.c
+++ b/net/netrom/af_netrom.c
@@ -63,6 +63,26 @@ static DEFINE_SPINLOCK(nr_list_lock);
 
 static const struct proto_ops nr_proto_ops;
 
+/*
+ * NETROM network devices are virtual network devices encapsulating NETROM
+ * frames into AX.25 which will be sent through an AX.25 device, so form a
+ * special "super class" of normal net devices; split their locks off into a
+ * separate class since they always nest.
+ */
+static struct lock_class_key nr_netdev_xmit_lock_key;
+
+static void nr_set_lockdep_one(struct net_device *dev,
+			       struct netdev_queue *txq,
+			       void *_unused)
+{
+	lockdep_set_class(&txq->_xmit_lock, &nr_netdev_xmit_lock_key);
+}
+
+static void nr_set_lockdep_key(struct net_device *dev)
+{
+	netdev_for_each_tx_queue(dev, nr_set_lockdep_one, NULL);
+}
+
 /*
  *	Socket removal during an interrupt is now safe.
  */
@@ -1394,6 +1414,7 @@ static int __init nr_proto_init(void)
 			free_netdev(dev);
 			goto fail;
 		}
+		nr_set_lockdep_key(dev);
 		dev_nr[i] = dev;
 	}
 
diff --git a/net/rose/af_rose.c b/net/rose/af_rose.c
index 1e8eeb044b07..e7a872207b46 100644
--- a/net/rose/af_rose.c
+++ b/net/rose/af_rose.c
@@ -64,6 +64,26 @@ static const struct proto_ops rose_proto_ops;
 
 ax25_address rose_callsign;
 
+/*
+ * ROSE network devices are virtual network devices encapsulating ROSE
+ * frames into AX.25 which will be sent through an AX.25 device, so form a
+ * special "super class" of normal net devices; split their locks off into a
+ * separate class since they always nest.
+ */
+static struct lock_class_key rose_netdev_xmit_lock_key;
+
+static void rose_set_lockdep_one(struct net_device *dev,
+				 struct netdev_queue *txq,
+				 void *_unused)
+{
+	lockdep_set_class(&txq->_xmit_lock, &rose_netdev_xmit_lock_key);
+}
+
+static void rose_set_lockdep_key(struct net_device *dev)
+{
+	netdev_for_each_tx_queue(dev, rose_set_lockdep_one, NULL);
+}
+
 /*
  *	Convert a ROSE address into text.
  */
@@ -1511,6 +1531,7 @@ static int __init rose_proto_init(void)
 			free_netdev(dev);
 			goto fail;
 		}
+		rose_set_lockdep_key(dev);
 		dev_rose[i] = dev;
 	}
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index ad24fa1a51e6..ebc55d884247 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -794,6 +794,9 @@ struct Qdisc_ops pfifo_fast_ops __read_mostly = {
 };
 EXPORT_SYMBOL(pfifo_fast_ops);
 
+static struct lock_class_key qdisc_tx_busylock;
+static struct lock_class_key qdisc_running_key;
+
 struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 			  const struct Qdisc_ops *ops,
 			  struct netlink_ext_ack *extack)
@@ -846,9 +849,17 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 	}
 
 	spin_lock_init(&sch->busylock);
+	lockdep_set_class(&sch->busylock,
+			  dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
+
 	/* seqlock has the same scope of busylock, for NOLOCK qdisc */
 	spin_lock_init(&sch->seqlock);
+	lockdep_set_class(&sch->busylock,
+			  dev->qdisc_tx_busylock ?: &qdisc_tx_busylock);
+
 	seqcount_init(&sch->running);
+	lockdep_set_class(&sch->running,
+			  dev->qdisc_running_key ?: &qdisc_running_key);
 
 	sch->ops = ops;
 	sch->flags = ops->static_flags;
@@ -859,12 +870,6 @@ struct Qdisc *qdisc_alloc(struct netdev_queue *dev_queue,
 	dev_hold(dev);
 	refcount_set(&sch->refcnt, 1);
 
-	if (sch != &noop_qdisc) {
-		lockdep_set_class(&sch->busylock, &dev->qdisc_tx_busylock_key);
-		lockdep_set_class(&sch->seqlock, &dev->qdisc_tx_busylock_key);
-		lockdep_set_class(&sch->running, &dev->qdisc_running_key);
-	}
-
 	return sch;
 errout1:
 	kfree(p);
-- 
cgit v1.2.3-59-g8ed1b


From d26c0cc53950464a24adfa76867f1d71f0cbbea6 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 30 Apr 2020 23:30:47 +0200
Subject: bpf: Avoid gcc-10 stringop-overflow warning in struct bpf_prog

gcc-10 warns about accesses to zero-length arrays:

kernel/bpf/core.c: In function 'bpf_patch_insn_single':
cc1: warning: writing 8 bytes into a region of size 0 [-Wstringop-overflow=]
In file included from kernel/bpf/core.c:21:
include/linux/filter.h:550:20: note: at offset 0 to object 'insnsi' with size 0 declared here
  550 |   struct bpf_insn  insnsi[0];
      |                    ^~~~~~

In this case, we really want to have two flexible-array members,
but that is not possible. Removing the union to make insnsi a
flexible-array member while leaving insns as a zero-length array
fixes the warning, as nothing writes to the other one in that way.

This trick only works on linux-3.18 or higher, as older versions
had additional members in the union.

Fixes: 60a3b2253c41 ("net: bpf: make eBPF interpreter images read-only")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200430213101.135134-6-arnd@arndb.de
---
 include/linux/filter.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/include/linux/filter.h b/include/linux/filter.h
index af37318bb1c5..73d06a39e2d6 100644
--- a/include/linux/filter.h
+++ b/include/linux/filter.h
@@ -545,10 +545,8 @@ struct bpf_prog {
 	unsigned int		(*bpf_func)(const void *ctx,
 					    const struct bpf_insn *insn);
 	/* Instructions for interpreter */
-	union {
-		struct sock_filter	insns[0];
-		struct bpf_insn		insnsi[0];
-	};
+	struct sock_filter	insns[0];
+	struct bpf_insn		insnsi[];
 };
 
 struct sk_filter {
-- 
cgit v1.2.3-59-g8ed1b


From e4e5aefc113510c03d34e182ab30bc0cc196675c Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Mon, 4 May 2020 15:33:51 +0200
Subject: xsk: Change two variable names for increased clarity

Change two variables names so that it is clearer what they
represent. The first one is xsk_list that in fact only contains the
list of AF_XDP sockets with a Tx component. Change this to xsk_tx_list
for improved clarity. The second variable is size in the ring
structure. One might think that this is the size of the ring, but it
is in fact the size of the umem, copied into the ring structure to
improve performance. Rename this variable umem_size to avoid any
confusion.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jonathan Lemon <jonathan.lemon@gmail.com>
Link: https://lore.kernel.org/bpf/1588599232-24897-2-git-send-email-magnus.karlsson@intel.com
---
 include/net/xdp_sock.h |  4 ++--
 net/xdp/xdp_umem.c     | 14 +++++++-------
 net/xdp/xsk.c          |  8 ++++----
 net/xdp/xsk_queue.c    |  4 ++--
 net/xdp/xsk_queue.h    |  8 ++++----
 5 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index e86ec48ef627..b72f1f4c3b15 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -62,8 +62,8 @@ struct xdp_umem {
 	struct net_device *dev;
 	struct xdp_umem_fq_reuse *fq_reuse;
 	bool zc;
-	spinlock_t xsk_list_lock;
-	struct list_head xsk_list;
+	spinlock_t xsk_tx_list_lock;
+	struct list_head xsk_tx_list;
 };
 
 /* Nodes are linked in the struct xdp_sock map_list field, and used to
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index ed7a6060f73c..7211f4572760 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -30,9 +30,9 @@ void xdp_add_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
 	if (!xs->tx)
 		return;
 
-	spin_lock_irqsave(&umem->xsk_list_lock, flags);
-	list_add_rcu(&xs->list, &umem->xsk_list);
-	spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
+	spin_lock_irqsave(&umem->xsk_tx_list_lock, flags);
+	list_add_rcu(&xs->list, &umem->xsk_tx_list);
+	spin_unlock_irqrestore(&umem->xsk_tx_list_lock, flags);
 }
 
 void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
@@ -42,9 +42,9 @@ void xdp_del_sk_umem(struct xdp_umem *umem, struct xdp_sock *xs)
 	if (!xs->tx)
 		return;
 
-	spin_lock_irqsave(&umem->xsk_list_lock, flags);
+	spin_lock_irqsave(&umem->xsk_tx_list_lock, flags);
 	list_del_rcu(&xs->list);
-	spin_unlock_irqrestore(&umem->xsk_list_lock, flags);
+	spin_unlock_irqrestore(&umem->xsk_tx_list_lock, flags);
 }
 
 /* The umem is stored both in the _rx struct and the _tx struct as we do
@@ -395,8 +395,8 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
 	umem->pgs = NULL;
 	umem->user = NULL;
 	umem->flags = mr->flags;
-	INIT_LIST_HEAD(&umem->xsk_list);
-	spin_lock_init(&umem->xsk_list_lock);
+	INIT_LIST_HEAD(&umem->xsk_tx_list);
+	spin_lock_init(&umem->xsk_tx_list_lock);
 
 	refcount_set(&umem->users, 1);
 
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index f6e6609f70a3..45ffd67b367d 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -75,7 +75,7 @@ void xsk_set_tx_need_wakeup(struct xdp_umem *umem)
 		return;
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
+	list_for_each_entry_rcu(xs, &umem->xsk_tx_list, list) {
 		xs->tx->ring->flags |= XDP_RING_NEED_WAKEUP;
 	}
 	rcu_read_unlock();
@@ -102,7 +102,7 @@ void xsk_clear_tx_need_wakeup(struct xdp_umem *umem)
 		return;
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
+	list_for_each_entry_rcu(xs, &umem->xsk_tx_list, list) {
 		xs->tx->ring->flags &= ~XDP_RING_NEED_WAKEUP;
 	}
 	rcu_read_unlock();
@@ -305,7 +305,7 @@ void xsk_umem_consume_tx_done(struct xdp_umem *umem)
 	struct xdp_sock *xs;
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
+	list_for_each_entry_rcu(xs, &umem->xsk_tx_list, list) {
 		__xskq_cons_release(xs->tx);
 		xs->sk.sk_write_space(&xs->sk);
 	}
@@ -318,7 +318,7 @@ bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc)
 	struct xdp_sock *xs;
 
 	rcu_read_lock();
-	list_for_each_entry_rcu(xs, &umem->xsk_list, list) {
+	list_for_each_entry_rcu(xs, &umem->xsk_tx_list, list) {
 		if (!xskq_cons_peek_desc(xs->tx, desc, umem))
 			continue;
 
diff --git a/net/xdp/xsk_queue.c b/net/xdp/xsk_queue.c
index c90e9c1e3c63..57fb81bd593c 100644
--- a/net/xdp/xsk_queue.c
+++ b/net/xdp/xsk_queue.c
@@ -9,12 +9,12 @@
 
 #include "xsk_queue.h"
 
-void xskq_set_umem(struct xsk_queue *q, u64 size, u64 chunk_mask)
+void xskq_set_umem(struct xsk_queue *q, u64 umem_size, u64 chunk_mask)
 {
 	if (!q)
 		return;
 
-	q->size = size;
+	q->umem_size = umem_size;
 	q->chunk_mask = chunk_mask;
 }
 
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index b50bb5c76da5..648733ec24ac 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -30,7 +30,7 @@ struct xdp_umem_ring {
 
 struct xsk_queue {
 	u64 chunk_mask;
-	u64 size;
+	u64 umem_size;
 	u32 ring_mask;
 	u32 nentries;
 	u32 cached_prod;
@@ -123,7 +123,7 @@ static inline bool xskq_cons_is_valid_unaligned(struct xsk_queue *q,
 	u64 base_addr = xsk_umem_extract_addr(addr);
 
 	addr = xsk_umem_add_offset_to_addr(addr);
-	if (base_addr >= q->size || addr >= q->size ||
+	if (base_addr >= q->umem_size || addr >= q->umem_size ||
 	    xskq_cons_crosses_non_contig_pg(umem, addr, length)) {
 		q->invalid_descs++;
 		return false;
@@ -134,7 +134,7 @@ static inline bool xskq_cons_is_valid_unaligned(struct xsk_queue *q,
 
 static inline bool xskq_cons_is_valid_addr(struct xsk_queue *q, u64 addr)
 {
-	if (addr >= q->size) {
+	if (addr >= q->umem_size) {
 		q->invalid_descs++;
 		return false;
 	}
@@ -379,7 +379,7 @@ static inline u64 xskq_nb_invalid_descs(struct xsk_queue *q)
 	return q ? q->invalid_descs : 0;
 }
 
-void xskq_set_umem(struct xsk_queue *q, u64 size, u64 chunk_mask);
+void xskq_set_umem(struct xsk_queue *q, u64 umem_size, u64 chunk_mask);
 struct xsk_queue *xskq_create(u32 nentries, bool umem_queue);
 void xskq_destroy(struct xsk_queue *q_ops);
 
-- 
cgit v1.2.3-59-g8ed1b


From 07bf2d97d1f37e7ac8d7be2d84ff108d43556a1d Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Mon, 4 May 2020 15:33:52 +0200
Subject: xsk: Remove unnecessary member in xdp_umem

Remove the unnecessary member of address in struct xdp_umem as it is
only used during the umem registration. No need to carry this around
as it is not used during run-time nor when unregistering the umem.

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Jonathan Lemon <jonathan.lemon@gmail.com>
Link: https://lore.kernel.org/bpf/1588599232-24897-3-git-send-email-magnus.karlsson@intel.com
---
 include/net/xdp_sock.h | 1 -
 net/xdp/xdp_umem.c     | 7 +++----
 2 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index b72f1f4c3b15..67191ccaab85 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -50,7 +50,6 @@ struct xdp_umem {
 	u32 headroom;
 	u32 chunk_size_nohr;
 	struct user_struct *user;
-	unsigned long address;
 	refcount_t users;
 	struct work_struct work;
 	struct page **pgs;
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index 7211f4572760..37ace3bc0d48 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -279,7 +279,7 @@ void xdp_put_umem(struct xdp_umem *umem)
 	}
 }
 
-static int xdp_umem_pin_pages(struct xdp_umem *umem)
+static int xdp_umem_pin_pages(struct xdp_umem *umem, unsigned long address)
 {
 	unsigned int gup_flags = FOLL_WRITE;
 	long npgs;
@@ -291,7 +291,7 @@ static int xdp_umem_pin_pages(struct xdp_umem *umem)
 		return -ENOMEM;
 
 	down_read(&current->mm->mmap_sem);
-	npgs = pin_user_pages(umem->address, umem->npgs,
+	npgs = pin_user_pages(address, umem->npgs,
 			      gup_flags | FOLL_LONGTERM, &umem->pgs[0], NULL);
 	up_read(&current->mm->mmap_sem);
 
@@ -385,7 +385,6 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
 	if (headroom >= chunk_size - XDP_PACKET_HEADROOM)
 		return -EINVAL;
 
-	umem->address = (unsigned long)addr;
 	umem->chunk_mask = unaligned_chunks ? XSK_UNALIGNED_BUF_ADDR_MASK
 					    : ~((u64)chunk_size - 1);
 	umem->size = size;
@@ -404,7 +403,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
 	if (err)
 		return err;
 
-	err = xdp_umem_pin_pages(umem);
+	err = xdp_umem_pin_pages(umem, (unsigned long)addr);
 	if (err)
 		goto out_account;
 
-- 
cgit v1.2.3-59-g8ed1b


From 521fc37be3d879561ca5ab42d64719cf94116af0 Mon Sep 17 00:00:00 2001
From: Maharaja Kennadyrajan <mkenna@codeaurora.org>
Date: Mon, 4 May 2020 12:03:13 +0300
Subject: ath10k: Avoid override CE5 configuration for QCA99X0 chipsets

As the exisiting CE configurations are defined in global, there
are the chances of QCA99X0 family chipsets CE configurations
are getting changed by the ath10k_pci_override_ce_config()
function.

The override will be hit and CE5 configurations will be changed,
when the user bring up the QCA99X0 chipsets along with QCA6174
or QCA9377 chipset. (Bring up QCA99X0 family chipsets after
QCA6174 or QCA9377).

Hence, fixing this issue by moving the global CE configuration
to radio specific CE configuration.

Tested hardware: QCA9888 & QCA6174
Tested firmware: 10.4-3.10-00047 & WLAN.RM.4.4.1.c3-00058

Signed-off-by: Maharaja Kennadyrajan <mkenna@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1587649759-14381-1-git-send-email-mkenna@codeaurora.org
---
 drivers/net/wireless/ath/ath10k/ce.h   |  2 +-
 drivers/net/wireless/ath/ath10k/pci.c  | 70 ++++++++++++++++++++++++++--------
 drivers/net/wireless/ath/ath10k/pci.h  |  4 ++
 drivers/net/wireless/ath/ath10k/snoc.c |  4 +-
 4 files changed, 61 insertions(+), 19 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/ce.h b/drivers/net/wireless/ath/ath10k/ce.h
index a7478c240f78..9711f0eb9117 100644
--- a/drivers/net/wireless/ath/ath10k/ce.h
+++ b/drivers/net/wireless/ath/ath10k/ce.h
@@ -419,7 +419,7 @@ struct ce_pipe_config {
 #define PIPEDIR_INOUT   3  /* bidirectional */
 
 /* Establish a mapping between a service/direction and a pipe. */
-struct service_to_pipe {
+struct ce_service_to_pipe {
 	__le32 service_id;
 	__le32 pipedir;
 	__le32 pipenum;
diff --git a/drivers/net/wireless/ath/ath10k/pci.c b/drivers/net/wireless/ath/ath10k/pci.c
index cd1c5d60261f..1d941d53fdc9 100644
--- a/drivers/net/wireless/ath/ath10k/pci.c
+++ b/drivers/net/wireless/ath/ath10k/pci.c
@@ -116,7 +116,7 @@ static void ath10k_pci_htt_rx_cb(struct ath10k_ce_pipe *ce_state);
 static void ath10k_pci_htt_htc_rx_cb(struct ath10k_ce_pipe *ce_state);
 static void ath10k_pci_pktlog_rx_cb(struct ath10k_ce_pipe *ce_state);
 
-static struct ce_attr host_ce_config_wlan[] = {
+static const struct ce_attr pci_host_ce_config_wlan[] = {
 	/* CE0: host->target HTC control and raw streams */
 	{
 		.flags = CE_ATTR_FLAGS,
@@ -222,7 +222,7 @@ static struct ce_attr host_ce_config_wlan[] = {
 };
 
 /* Target firmware's Copy Engine configuration. */
-static struct ce_pipe_config target_ce_config_wlan[] = {
+static const struct ce_pipe_config pci_target_ce_config_wlan[] = {
 	/* CE0: host->target HTC control and raw streams */
 	{
 		.pipenum = __cpu_to_le32(0),
@@ -335,7 +335,7 @@ static struct ce_pipe_config target_ce_config_wlan[] = {
  * This table is derived from the CE_PCI TABLE, above.
  * It is passed to the Target at startup for use by firmware.
  */
-static struct service_to_pipe target_service_to_ce_map_wlan[] = {
+static const struct ce_service_to_pipe pci_target_service_to_ce_map_wlan[] = {
 	{
 		__cpu_to_le32(ATH10K_HTC_SVC_ID_WMI_DATA_VO),
 		__cpu_to_le32(PIPEDIR_OUT),	/* out = UL = host -> target */
@@ -1787,6 +1787,8 @@ static void ath10k_pci_fw_crashed_dump(struct ath10k *ar)
 void ath10k_pci_hif_send_complete_check(struct ath10k *ar, u8 pipe,
 					int force)
 {
+	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+
 	ath10k_dbg(ar, ATH10K_DBG_PCI, "pci hif send complete check\n");
 
 	if (!force) {
@@ -1804,7 +1806,7 @@ void ath10k_pci_hif_send_complete_check(struct ath10k *ar, u8 pipe,
 		 * If at least 50% of the total resources are still available,
 		 * don't bother checking again yet.
 		 */
-		if (resources > (host_ce_config_wlan[pipe].src_nentries >> 1))
+		if (resources > (ar_pci->attr[pipe].src_nentries >> 1))
 			return;
 	}
 	ath10k_ce_per_engine_service(ar, pipe);
@@ -1820,14 +1822,15 @@ static void ath10k_pci_rx_retry_sync(struct ath10k *ar)
 int ath10k_pci_hif_map_service_to_pipe(struct ath10k *ar, u16 service_id,
 				       u8 *ul_pipe, u8 *dl_pipe)
 {
-	const struct service_to_pipe *entry;
+	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
+	const struct ce_service_to_pipe *entry;
 	bool ul_set = false, dl_set = false;
 	int i;
 
 	ath10k_dbg(ar, ATH10K_DBG_PCI, "pci hif map service\n");
 
-	for (i = 0; i < ARRAY_SIZE(target_service_to_ce_map_wlan); i++) {
-		entry = &target_service_to_ce_map_wlan[i];
+	for (i = 0; i < ARRAY_SIZE(pci_target_service_to_ce_map_wlan); i++) {
+		entry = &ar_pci->serv_to_pipe[i];
 
 		if (__le32_to_cpu(entry->service_id) != service_id)
 			continue;
@@ -2316,6 +2319,7 @@ static int ath10k_bus_get_num_banks(struct ath10k *ar)
 
 int ath10k_pci_init_config(struct ath10k *ar)
 {
+	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
 	u32 interconnect_targ_addr;
 	u32 pcie_state_targ_addr = 0;
 	u32 pipe_cfg_targ_addr = 0;
@@ -2361,7 +2365,7 @@ int ath10k_pci_init_config(struct ath10k *ar)
 	}
 
 	ret = ath10k_pci_diag_write_mem(ar, pipe_cfg_targ_addr,
-					target_ce_config_wlan,
+					ar_pci->pipe_config,
 					sizeof(struct ce_pipe_config) *
 					NUM_TARGET_CE_CONFIG_WLAN);
 
@@ -2386,8 +2390,8 @@ int ath10k_pci_init_config(struct ath10k *ar)
 	}
 
 	ret = ath10k_pci_diag_write_mem(ar, svc_to_pipe_map,
-					target_service_to_ce_map_wlan,
-					sizeof(target_service_to_ce_map_wlan));
+					ar_pci->serv_to_pipe,
+					sizeof(pci_target_service_to_ce_map_wlan));
 	if (ret != 0) {
 		ath10k_err(ar, "Failed to write svc/pipe map: %d\n", ret);
 		return ret;
@@ -2459,23 +2463,24 @@ static void ath10k_pci_override_ce_config(struct ath10k *ar)
 {
 	struct ce_attr *attr;
 	struct ce_pipe_config *config;
+	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
 
 	/* For QCA6174 we're overriding the Copy Engine 5 configuration,
 	 * since it is currently used for other feature.
 	 */
 
 	/* Override Host's Copy Engine 5 configuration */
-	attr = &host_ce_config_wlan[5];
+	attr = &ar_pci->attr[5];
 	attr->src_sz_max = 0;
 	attr->dest_nentries = 0;
 
 	/* Override Target firmware's Copy Engine configuration */
-	config = &target_ce_config_wlan[5];
+	config = &ar_pci->pipe_config[5];
 	config->pipedir = __cpu_to_le32(PIPEDIR_OUT);
 	config->nbytes_max = __cpu_to_le32(2048);
 
 	/* Map from service/endpoint to Copy Engine */
-	target_service_to_ce_map_wlan[15].pipenum = __cpu_to_le32(1);
+	ar_pci->serv_to_pipe[15].pipenum = __cpu_to_le32(1);
 }
 
 int ath10k_pci_alloc_pipes(struct ath10k *ar)
@@ -2491,7 +2496,7 @@ int ath10k_pci_alloc_pipes(struct ath10k *ar)
 		pipe->pipe_num = i;
 		pipe->hif_ce_state = ar;
 
-		ret = ath10k_ce_alloc_pipe(ar, i, &host_ce_config_wlan[i]);
+		ret = ath10k_ce_alloc_pipe(ar, i, &ar_pci->attr[i]);
 		if (ret) {
 			ath10k_err(ar, "failed to allocate copy engine pipe %d: %d\n",
 				   i, ret);
@@ -2504,7 +2509,7 @@ int ath10k_pci_alloc_pipes(struct ath10k *ar)
 			continue;
 		}
 
-		pipe->buf_sz = (size_t)(host_ce_config_wlan[i].src_sz_max);
+		pipe->buf_sz = (size_t)(ar_pci->attr[i].src_sz_max);
 	}
 
 	return 0;
@@ -2520,10 +2525,11 @@ void ath10k_pci_free_pipes(struct ath10k *ar)
 
 int ath10k_pci_init_pipes(struct ath10k *ar)
 {
+	struct ath10k_pci *ar_pci = ath10k_pci_priv(ar);
 	int i, ret;
 
 	for (i = 0; i < CE_COUNT; i++) {
-		ret = ath10k_ce_init_pipe(ar, i, &host_ce_config_wlan[i]);
+		ret = ath10k_ce_init_pipe(ar, i, &ar_pci->attr[i]);
 		if (ret) {
 			ath10k_err(ar, "failed to initialize copy engine pipe %d: %d\n",
 				   i, ret);
@@ -3595,6 +3601,30 @@ static int ath10k_pci_probe(struct pci_dev *pdev,
 
 	timer_setup(&ar_pci->ps_timer, ath10k_pci_ps_timer, 0);
 
+	ar_pci->attr = kmemdup(pci_host_ce_config_wlan,
+			       sizeof(pci_host_ce_config_wlan),
+			       GFP_KERNEL);
+	if (!ar_pci->attr) {
+		ret = -ENOMEM;
+		goto err_free;
+	}
+
+	ar_pci->pipe_config = kmemdup(pci_target_ce_config_wlan,
+				      sizeof(pci_target_ce_config_wlan),
+				      GFP_KERNEL);
+	if (!ar_pci->pipe_config) {
+		ret = -ENOMEM;
+		goto err_free;
+	}
+
+	ar_pci->serv_to_pipe = kmemdup(pci_target_service_to_ce_map_wlan,
+				       sizeof(pci_target_service_to_ce_map_wlan),
+				       GFP_KERNEL);
+	if (!ar_pci->serv_to_pipe) {
+		ret = -ENOMEM;
+		goto err_free;
+	}
+
 	ret = ath10k_pci_setup_resource(ar);
 	if (ret) {
 		ath10k_err(ar, "failed to setup resource: %d\n", ret);
@@ -3690,6 +3720,11 @@ err_free_pipes:
 err_core_destroy:
 	ath10k_core_destroy(ar);
 
+err_free:
+	kfree(ar_pci->attr);
+	kfree(ar_pci->pipe_config);
+	kfree(ar_pci->serv_to_pipe);
+
 	return ret;
 }
 
@@ -3715,6 +3750,9 @@ static void ath10k_pci_remove(struct pci_dev *pdev)
 	ath10k_pci_sleep_sync(ar);
 	ath10k_pci_release(ar);
 	ath10k_core_destroy(ar);
+	kfree(ar_pci->attr);
+	kfree(ar_pci->pipe_config);
+	kfree(ar_pci->serv_to_pipe);
 }
 
 MODULE_DEVICE_TABLE(pci, ath10k_pci_id_table);
diff --git a/drivers/net/wireless/ath/ath10k/pci.h b/drivers/net/wireless/ath/ath10k/pci.h
index 4455ed6c5275..e3cbd259a2dc 100644
--- a/drivers/net/wireless/ath/ath10k/pci.h
+++ b/drivers/net/wireless/ath/ath10k/pci.h
@@ -183,6 +183,10 @@ struct ath10k_pci {
 	 * this struct.
 	 */
 	struct ath10k_ahb ahb[0];
+
+	struct ce_attr *attr;
+	struct ce_pipe_config *pipe_config;
+	struct ce_service_to_pipe *serv_to_pipe;
 };
 
 static inline struct ath10k_pci *ath10k_pci_priv(struct ath10k *ar)
diff --git a/drivers/net/wireless/ath/ath10k/snoc.c b/drivers/net/wireless/ath/ath10k/snoc.c
index 21081b4a27d7..3e870aae59d2 100644
--- a/drivers/net/wireless/ath/ath10k/snoc.c
+++ b/drivers/net/wireless/ath/ath10k/snoc.c
@@ -356,7 +356,7 @@ static struct ce_pipe_config target_ce_config_wlan[] = {
 	},
 };
 
-static struct service_to_pipe target_service_to_ce_map_wlan[] = {
+static struct ce_service_to_pipe target_service_to_ce_map_wlan[] = {
 	{
 		__cpu_to_le32(ATH10K_HTC_SVC_ID_WMI_DATA_VO),
 		__cpu_to_le32(PIPEDIR_OUT),	/* out = UL = host -> target */
@@ -769,7 +769,7 @@ static int ath10k_snoc_hif_map_service_to_pipe(struct ath10k *ar,
 					       u16 service_id,
 					       u8 *ul_pipe, u8 *dl_pipe)
 {
-	const struct service_to_pipe *entry;
+	const struct ce_service_to_pipe *entry;
 	bool ul_set = false, dl_set = false;
 	int i;
 
-- 
cgit v1.2.3-59-g8ed1b


From d431f8939c1419854dfe89dd345387f5397c6edd Mon Sep 17 00:00:00 2001
From: Wen Gong <wgong@codeaurora.org>
Date: Mon, 4 May 2020 12:03:14 +0300
Subject: ath10k: remove the max_sched_scan_reqs value

The struct cfg80211_wowlan of NET_DETECT WoWLAN feature share the same
struct cfg80211_sched_scan_request together with scheduled scan request
feature, and max_sched_scan_reqs of wiphy is only used for sched scan,
and ath10k does not support scheduled scan request feature, so ath10k
does not set flag NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR, but ath10k
set max_sched_scan_reqs of wiphy to a non zero value 1, then function
nl80211_add_commands_unsplit of cfg80211 will set it support command
NL80211_CMD_START_SCHED_SCAN because max_sched_scan_reqs is a non zero
value, but actually ath10k not support it, then it leads a mismatch result
for sched scan of cfg80211, then application shill found the mismatch and
stop running case of MAC random address scan and then the case fail.

After remove max_sched_scan_reqs value, it keeps match for sched scan and
case of MAC random address scan pass.

Tested with QCA6174 SDIO with firmware WLAN.RMH.4.4.1-00029.
Tested with QCA6174 PCIe with firmware WLAN.RM.4.4.1-00110-QCARMSWP-1.

Fixes: ce834e280f2f875 ("ath10k: support NET_DETECT WoWLAN feature")
Signed-off-by: Wen Gong <wgong@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20191114050001.4658-1-wgong@codeaurora.org
---
 drivers/net/wireless/ath/ath10k/mac.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index 0b7d510d2725..91f5444ecedb 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -9188,7 +9188,6 @@ int ath10k_mac_register(struct ath10k *ar)
 	ar->hw->wiphy->max_scan_ie_len = WLAN_SCAN_PARAMS_MAX_IE_LEN;
 
 	if (test_bit(WMI_SERVICE_NLO, ar->wmi.svc_map)) {
-		ar->hw->wiphy->max_sched_scan_reqs = 1;
 		ar->hw->wiphy->max_sched_scan_ssids = WMI_PNO_MAX_SUPP_NETWORKS;
 		ar->hw->wiphy->max_match_sets = WMI_PNO_MAX_SUPP_NETWORKS;
 		ar->hw->wiphy->max_sched_scan_ie_len = WMI_PNO_MAX_IE_LENGTH;
-- 
cgit v1.2.3-59-g8ed1b


From 85325c24d5d2c8fcde35a634742d14d45bf7326e Mon Sep 17 00:00:00 2001
From: Rakesh Pillai <pillair@codeaurora.org>
Date: Mon, 4 May 2020 12:03:33 +0300
Subject: dt-bindings: ath10k: Add wifi-firmware subnode for wifi node

Add a wifi-firmware subnode for the wifi node.
This wifi-firmware subnode is needed for the
targets which do not support TrustZone.

Signed-off-by: Rakesh Pillai <pillair@codeaurora.org>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1586971906-20985-2-git-send-email-pillair@codeaurora.org
---
 .../devicetree/bindings/net/wireless/qcom,ath10k.txt       | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.txt b/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.txt
index 71bf91f97386..65ee68efd574 100644
--- a/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.txt
+++ b/Documentation/devicetree/bindings/net/wireless/qcom,ath10k.txt
@@ -96,6 +96,17 @@ Optional properties:
 - qcom,coexist-gpio-pin : gpio pin number  information to support coex
 			  which will be used by wifi firmware.
 
+* Subnodes
+The ath10k wifi node can contain one optional firmware subnode.
+Firmware subnode is needed when the platform does not have TustZone.
+The firmware subnode must have:
+
+- iommus:
+	Usage: required
+	Value type: <prop-encoded-array>
+	Definition: A list of phandle and IOMMU specifier pairs.
+
+
 Example (to supply PCI based wifi block details):
 
 In this example, the node is defined as child node of the PCI controller.
@@ -196,4 +207,7 @@ wifi@18000000 {
 		memory-region = <&wifi_msa_mem>;
 		iommus = <&apps_smmu 0x0040 0x1>;
 		qcom,msa-fixed-perm;
+		wifi-firmware {
+			iommus = <&apps_iommu 0xc22 0x1>;
+		};
 };
-- 
cgit v1.2.3-59-g8ed1b


From 727fec790ead3d75e2735f66209949c2163523ea Mon Sep 17 00:00:00 2001
From: Rakesh Pillai <pillair@codeaurora.org>
Date: Mon, 4 May 2020 12:03:45 +0300
Subject: ath10k: Setup the msa resources before qmi init

Move the msa resources setup out of qmi init and
setup the msa resources as a part of probe before
the qmi init is done.

Tested HW: WCN3990
Tested FW: WLAN.HL.3.1-01040-QCAHLSWMTPLZ-1

Signed-off-by: Rakesh Pillai <pillair@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1586971906-20985-3-git-send-email-pillair@codeaurora.org
---
 drivers/net/wireless/ath/ath10k/core.h |  5 +++
 drivers/net/wireless/ath/ath10k/qmi.c  | 61 +++++---------------------------
 drivers/net/wireless/ath/ath10k/qmi.h  |  3 --
 drivers/net/wireless/ath/ath10k/snoc.c | 64 ++++++++++++++++++++++++++++++----
 4 files changed, 70 insertions(+), 63 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h
index ad6ef8d492c8..ceac76553b8f 100644
--- a/drivers/net/wireless/ath/ath10k/core.h
+++ b/drivers/net/wireless/ath/ath10k/core.h
@@ -975,6 +975,11 @@ struct ath10k {
 	struct ieee80211_hw *hw;
 	struct ieee80211_ops *ops;
 	struct device *dev;
+	struct msa_region {
+		dma_addr_t paddr;
+		u32 mem_size;
+		void *vaddr;
+	} msa;
 	u8 mac_addr[ETH_ALEN];
 
 	enum ath10k_hw_rev hw_rev;
diff --git a/drivers/net/wireless/ath/ath10k/qmi.c b/drivers/net/wireless/ath/ath10k/qmi.c
index 85dce43c5439..5ae829b46c3d 100644
--- a/drivers/net/wireless/ath/ath10k/qmi.c
+++ b/drivers/net/wireless/ath/ath10k/qmi.c
@@ -122,8 +122,8 @@ static int ath10k_qmi_msa_mem_info_send_sync_msg(struct ath10k_qmi *qmi)
 	int ret;
 	int i;
 
-	req.msa_addr = qmi->msa_pa;
-	req.size = qmi->msa_mem_size;
+	req.msa_addr = ar->msa.paddr;
+	req.size = ar->msa.mem_size;
 
 	ret = qmi_txn_init(&qmi->qmi_hdl, &txn,
 			   wlfw_msa_info_resp_msg_v01_ei, &resp);
@@ -157,12 +157,12 @@ static int ath10k_qmi_msa_mem_info_send_sync_msg(struct ath10k_qmi *qmi)
 		goto out;
 	}
 
-	max_mapped_addr = qmi->msa_pa + qmi->msa_mem_size;
+	max_mapped_addr = ar->msa.paddr + ar->msa.mem_size;
 	qmi->nr_mem_region = resp.mem_region_info_len;
 	for (i = 0; i < resp.mem_region_info_len; i++) {
-		if (resp.mem_region_info[i].size > qmi->msa_mem_size ||
+		if (resp.mem_region_info[i].size > ar->msa.mem_size ||
 		    resp.mem_region_info[i].region_addr > max_mapped_addr ||
-		    resp.mem_region_info[i].region_addr < qmi->msa_pa ||
+		    resp.mem_region_info[i].region_addr < ar->msa.paddr ||
 		    resp.mem_region_info[i].size +
 		    resp.mem_region_info[i].region_addr > max_mapped_addr) {
 			ath10k_err(ar, "received out of range memory region address 0x%llx with size 0x%x, aborting\n",
@@ -1006,54 +1006,10 @@ static void ath10k_qmi_driver_event_work(struct work_struct *work)
 	spin_unlock(&qmi->event_lock);
 }
 
-static int ath10k_qmi_setup_msa_resources(struct ath10k_qmi *qmi, u32 msa_size)
-{
-	struct ath10k *ar = qmi->ar;
-	struct device *dev = ar->dev;
-	struct device_node *node;
-	struct resource r;
-	int ret;
-
-	node = of_parse_phandle(dev->of_node, "memory-region", 0);
-	if (node) {
-		ret = of_address_to_resource(node, 0, &r);
-		if (ret) {
-			dev_err(dev, "failed to resolve msa fixed region\n");
-			return ret;
-		}
-		of_node_put(node);
-
-		qmi->msa_pa = r.start;
-		qmi->msa_mem_size = resource_size(&r);
-		qmi->msa_va = devm_memremap(dev, qmi->msa_pa, qmi->msa_mem_size,
-					    MEMREMAP_WT);
-		if (IS_ERR(qmi->msa_va)) {
-			dev_err(dev, "failed to map memory region: %pa\n", &r.start);
-			return PTR_ERR(qmi->msa_va);
-		}
-	} else {
-		qmi->msa_va = dmam_alloc_coherent(dev, msa_size,
-						  &qmi->msa_pa, GFP_KERNEL);
-		if (!qmi->msa_va) {
-			ath10k_err(ar, "failed to allocate dma memory for msa region\n");
-			return -ENOMEM;
-		}
-		qmi->msa_mem_size = msa_size;
-	}
-
-	if (of_property_read_bool(dev->of_node, "qcom,msa-fixed-perm"))
-		qmi->msa_fixed_perm = true;
-
-	ath10k_dbg(ar, ATH10K_DBG_QMI, "msa pa: %pad , msa va: 0x%p\n",
-		   &qmi->msa_pa,
-		   qmi->msa_va);
-
-	return 0;
-}
-
 int ath10k_qmi_init(struct ath10k *ar, u32 msa_size)
 {
 	struct ath10k_snoc *ar_snoc = ath10k_snoc_priv(ar);
+	struct device *dev = ar->dev;
 	struct ath10k_qmi *qmi;
 	int ret;
 
@@ -1064,9 +1020,8 @@ int ath10k_qmi_init(struct ath10k *ar, u32 msa_size)
 	qmi->ar = ar;
 	ar_snoc->qmi = qmi;
 
-	ret = ath10k_qmi_setup_msa_resources(qmi, msa_size);
-	if (ret)
-		goto err;
+	if (of_property_read_bool(dev->of_node, "qcom,msa-fixed-perm"))
+		qmi->msa_fixed_perm = true;
 
 	ret = qmi_handle_init(&qmi->qmi_hdl,
 			      WLFW_BDF_DOWNLOAD_REQ_MSG_V01_MAX_MSG_LEN,
diff --git a/drivers/net/wireless/ath/ath10k/qmi.h b/drivers/net/wireless/ath/ath10k/qmi.h
index dc257375f161..450be18b60ad 100644
--- a/drivers/net/wireless/ath/ath10k/qmi.h
+++ b/drivers/net/wireless/ath/ath10k/qmi.h
@@ -93,9 +93,6 @@ struct ath10k_qmi {
 	spinlock_t event_lock; /* spinlock for qmi event list */
 	u32 nr_mem_region;
 	struct ath10k_msa_mem_info mem_region[MAX_NUM_MEMORY_REGIONS];
-	dma_addr_t msa_pa;
-	u32 msa_mem_size;
-	void *msa_va;
 	struct ath10k_qmi_chip_info chip_info;
 	struct ath10k_qmi_board_info board_info;
 	struct ath10k_qmi_soc_info soc_info;
diff --git a/drivers/net/wireless/ath/ath10k/snoc.c b/drivers/net/wireless/ath/ath10k/snoc.c
index 3e870aae59d2..7f3f18129a8e 100644
--- a/drivers/net/wireless/ath/ath10k/snoc.c
+++ b/drivers/net/wireless/ath/ath10k/snoc.c
@@ -11,6 +11,7 @@
 #include <linux/platform_device.h>
 #include <linux/property.h>
 #include <linux/regulator/consumer.h>
+#include <linux/of_address.h>
 
 #include "ce.h"
 #include "coredump.h"
@@ -1393,7 +1394,6 @@ static int ath10k_hw_power_off(struct ath10k *ar)
 static void ath10k_msa_dump_memory(struct ath10k *ar,
 				   struct ath10k_fw_crash_data *crash_data)
 {
-	struct ath10k_snoc *ar_snoc = ath10k_snoc_priv(ar);
 	const struct ath10k_hw_mem_layout *mem_layout;
 	const struct ath10k_mem_region *current_region;
 	struct ath10k_dump_ram_data_hdr *hdr;
@@ -1419,15 +1419,15 @@ static void ath10k_msa_dump_memory(struct ath10k *ar,
 	buf_len -= sizeof(*hdr);
 
 	hdr->region_type = cpu_to_le32(current_region->type);
-	hdr->start = cpu_to_le32((unsigned long)ar_snoc->qmi->msa_va);
-	hdr->length = cpu_to_le32(ar_snoc->qmi->msa_mem_size);
+	hdr->start = cpu_to_le32((unsigned long)ar->msa.vaddr);
+	hdr->length = cpu_to_le32(ar->msa.mem_size);
 
-	if (current_region->len < ar_snoc->qmi->msa_mem_size) {
-		memcpy(buf, ar_snoc->qmi->msa_va, current_region->len);
+	if (current_region->len < ar->msa.mem_size) {
+		memcpy(buf, ar->msa.vaddr, current_region->len);
 		ath10k_warn(ar, "msa dump length is less than msa size %x, %x\n",
-			    current_region->len, ar_snoc->qmi->msa_mem_size);
+			    current_region->len, ar->msa.mem_size);
 	} else {
-		memcpy(buf, ar_snoc->qmi->msa_va, ar_snoc->qmi->msa_mem_size);
+		memcpy(buf, ar->msa.vaddr, ar->msa.mem_size);
 	}
 }
 
@@ -1455,6 +1455,50 @@ void ath10k_snoc_fw_crashed_dump(struct ath10k *ar)
 	mutex_unlock(&ar->dump_mutex);
 }
 
+static int ath10k_setup_msa_resources(struct ath10k *ar, u32 msa_size)
+{
+	struct device *dev = ar->dev;
+	struct device_node *node;
+	struct resource r;
+	int ret;
+
+	node = of_parse_phandle(dev->of_node, "memory-region", 0);
+	if (node) {
+		ret = of_address_to_resource(node, 0, &r);
+		if (ret) {
+			dev_err(dev, "failed to resolve msa fixed region\n");
+			return ret;
+		}
+		of_node_put(node);
+
+		ar->msa.paddr = r.start;
+		ar->msa.mem_size = resource_size(&r);
+		ar->msa.vaddr = devm_memremap(dev, ar->msa.paddr,
+					      ar->msa.mem_size,
+					      MEMREMAP_WT);
+		if (IS_ERR(ar->msa.vaddr)) {
+			dev_err(dev, "failed to map memory region: %pa\n",
+				&r.start);
+			return PTR_ERR(ar->msa.vaddr);
+		}
+	} else {
+		ar->msa.vaddr = dmam_alloc_coherent(dev, msa_size,
+						    &ar->msa.paddr,
+						    GFP_KERNEL);
+		if (!ar->msa.vaddr) {
+			ath10k_err(ar, "failed to allocate dma memory for msa region\n");
+			return -ENOMEM;
+		}
+		ar->msa.mem_size = msa_size;
+	}
+
+	ath10k_dbg(ar, ATH10K_DBG_QMI, "qmi msa.paddr: %pad , msa.vaddr: 0x%p\n",
+		   &ar->msa.paddr,
+		   ar->msa.vaddr);
+
+	return 0;
+}
+
 static const struct of_device_id ath10k_snoc_dt_match[] = {
 	{ .compatible = "qcom,wcn3990-wifi",
 	 .data = &drv_priv,
@@ -1557,6 +1601,12 @@ static int ath10k_snoc_probe(struct platform_device *pdev)
 		goto err_free_irq;
 	}
 
+	ret = ath10k_setup_msa_resources(ar, msa_size);
+	if (ret) {
+		ath10k_warn(ar, "failed to setup msa resources: %d\n", ret);
+		goto err_power_off;
+	}
+
 	ret = ath10k_qmi_init(ar, msa_size);
 	if (ret) {
 		ath10k_warn(ar, "failed to register wlfw qmi client: %d\n", ret);
-- 
cgit v1.2.3-59-g8ed1b


From 1423f43273319d53474c70f8f775c8c05e8b690e Mon Sep 17 00:00:00 2001
From: Rakesh Pillai <pillair@codeaurora.org>
Date: Mon, 4 May 2020 12:03:52 +0300
Subject: ath10k: Add support for targets without trustzone

Add the support to attach and map iommu
domain for targets which do not have the
support of TrustZone.

Tested HW: WCN3990
Tested FW: WLAN.HL.3.1-01040-QCAHLSWMTPLZ-1

Signed-off-by: Rakesh Pillai <pillair@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1586971906-20985-4-git-send-email-pillair@codeaurora.org
---
 drivers/net/wireless/ath/ath10k/snoc.c | 118 ++++++++++++++++++++++++++++++++-
 drivers/net/wireless/ath/ath10k/snoc.h |   7 ++
 2 files changed, 124 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath10k/snoc.c b/drivers/net/wireless/ath/ath10k/snoc.c
index 7f3f18129a8e..354d49b1cd45 100644
--- a/drivers/net/wireless/ath/ath10k/snoc.c
+++ b/drivers/net/wireless/ath/ath10k/snoc.c
@@ -12,6 +12,7 @@
 #include <linux/property.h>
 #include <linux/regulator/consumer.h>
 #include <linux/of_address.h>
+#include <linux/iommu.h>
 
 #include "ce.h"
 #include "coredump.h"
@@ -1499,6 +1500,111 @@ static int ath10k_setup_msa_resources(struct ath10k *ar, u32 msa_size)
 	return 0;
 }
 
+static int ath10k_fw_init(struct ath10k *ar)
+{
+	struct ath10k_snoc *ar_snoc = ath10k_snoc_priv(ar);
+	struct device *host_dev = &ar_snoc->dev->dev;
+	struct platform_device_info info;
+	struct iommu_domain *iommu_dom;
+	struct platform_device *pdev;
+	struct device_node *node;
+	int ret;
+
+	node = of_get_child_by_name(host_dev->of_node, "wifi-firmware");
+	if (!node) {
+		ar_snoc->use_tz = true;
+		return 0;
+	}
+
+	memset(&info, 0, sizeof(info));
+	info.fwnode = &node->fwnode;
+	info.parent = host_dev;
+	info.name = node->name;
+	info.dma_mask = DMA_BIT_MASK(32);
+
+	pdev = platform_device_register_full(&info);
+	if (IS_ERR(pdev)) {
+		of_node_put(node);
+		return PTR_ERR(pdev);
+	}
+
+	pdev->dev.of_node = node;
+
+	ret = of_dma_configure(&pdev->dev, node, true);
+	if (ret) {
+		ath10k_err(ar, "dma configure fail: %d\n", ret);
+		goto err_unregister;
+	}
+
+	ar_snoc->fw.dev = &pdev->dev;
+
+	iommu_dom = iommu_domain_alloc(&platform_bus_type);
+	if (!iommu_dom) {
+		ath10k_err(ar, "failed to allocate iommu domain\n");
+		ret = -ENOMEM;
+		goto err_unregister;
+	}
+
+	ret = iommu_attach_device(iommu_dom, ar_snoc->fw.dev);
+	if (ret) {
+		ath10k_err(ar, "could not attach device: %d\n", ret);
+		goto err_iommu_free;
+	}
+
+	ar_snoc->fw.iommu_domain = iommu_dom;
+	ar_snoc->fw.fw_start_addr = ar->msa.paddr;
+
+	ret = iommu_map(iommu_dom, ar_snoc->fw.fw_start_addr,
+			ar->msa.paddr, ar->msa.mem_size,
+			IOMMU_READ | IOMMU_WRITE);
+	if (ret) {
+		ath10k_err(ar, "failed to map firmware region: %d\n", ret);
+		goto err_iommu_detach;
+	}
+
+	of_node_put(node);
+
+	return 0;
+
+err_iommu_detach:
+	iommu_detach_device(iommu_dom, ar_snoc->fw.dev);
+
+err_iommu_free:
+	iommu_domain_free(iommu_dom);
+
+err_unregister:
+	platform_device_unregister(pdev);
+	of_node_put(node);
+
+	return ret;
+}
+
+static int ath10k_fw_deinit(struct ath10k *ar)
+{
+	struct ath10k_snoc *ar_snoc = ath10k_snoc_priv(ar);
+	const size_t mapped_size = ar_snoc->fw.mapped_mem_size;
+	struct iommu_domain *iommu;
+	size_t unmapped_size;
+
+	if (ar_snoc->use_tz)
+		return 0;
+
+	iommu = ar_snoc->fw.iommu_domain;
+
+	unmapped_size = iommu_unmap(iommu, ar_snoc->fw.fw_start_addr,
+				    mapped_size);
+	if (unmapped_size != mapped_size)
+		ath10k_err(ar, "failed to unmap firmware: %zu\n",
+			   unmapped_size);
+
+	iommu_detach_device(iommu, ar_snoc->fw.dev);
+	iommu_domain_free(iommu);
+
+	platform_device_unregister(to_platform_device(ar_snoc->fw.dev));
+
+	return 0;
+}
+
 static const struct of_device_id ath10k_snoc_dt_match[] = {
 	{ .compatible = "qcom,wcn3990-wifi",
 	 .data = &drv_priv,
@@ -1607,16 +1713,25 @@ static int ath10k_snoc_probe(struct platform_device *pdev)
 		goto err_power_off;
 	}
 
+	ret = ath10k_fw_init(ar);
+	if (ret) {
+		ath10k_err(ar, "failed to initialize firmware: %d\n", ret);
+		goto err_power_off;
+	}
+
 	ret = ath10k_qmi_init(ar, msa_size);
 	if (ret) {
 		ath10k_warn(ar, "failed to register wlfw qmi client: %d\n", ret);
-		goto err_power_off;
+		goto err_fw_deinit;
 	}
 
 	ath10k_dbg(ar, ATH10K_DBG_SNOC, "snoc probe\n");
 
 	return 0;
 
+err_fw_deinit:
+	ath10k_fw_deinit(ar);
+
 err_power_off:
 	ath10k_hw_power_off(ar);
 
@@ -1648,6 +1763,7 @@ static int ath10k_snoc_remove(struct platform_device *pdev)
 
 	ath10k_core_unregister(ar);
 	ath10k_hw_power_off(ar);
+	ath10k_fw_deinit(ar);
 	ath10k_snoc_free_irq(ar);
 	ath10k_snoc_release_resource(ar);
 	ath10k_qmi_deinit(ar);
diff --git a/drivers/net/wireless/ath/ath10k/snoc.h b/drivers/net/wireless/ath/ath10k/snoc.h
index c05df45a3945..a3dd06f6ac62 100644
--- a/drivers/net/wireless/ath/ath10k/snoc.h
+++ b/drivers/net/wireless/ath/ath10k/snoc.h
@@ -55,6 +55,13 @@ struct regulator_bulk_data;
 struct ath10k_snoc {
 	struct platform_device *dev;
 	struct ath10k *ar;
+	unsigned int use_tz;
+	struct ath10k_firmware {
+		struct device *dev;
+		dma_addr_t fw_start_addr;
+		struct iommu_domain *iommu_domain;
+		size_t mapped_mem_size;
+	} fw;
 	void __iomem *mem;
 	dma_addr_t mem_pa;
 	struct ath10k_snoc_target_info target_info;
-- 
cgit v1.2.3-59-g8ed1b


From b7b527b9c7c8d50737f45167d2d3399c7278d9e9 Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Mon, 4 May 2020 19:33:36 +0800
Subject: ath11k: use true,false for bool variables

Fix the following coccicheck warning:

drivers/net/wireless/ath/ath11k/dp_rx.c:2964:1-39: WARNING: Assignment
of 0/1 to bool variable
drivers/net/wireless/ath/ath11k/dp_rx.c:2965:1-38: WARNING: Assignment
of 0/1 to bool variable

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200504113336.41249-1-yanaijie@huawei.com
---
 drivers/net/wireless/ath/ath11k/dp_rx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.c b/drivers/net/wireless/ath/ath11k/dp_rx.c
index 47ad3bd9e1c6..6b47bb7865dc 100644
--- a/drivers/net/wireless/ath/ath11k/dp_rx.c
+++ b/drivers/net/wireless/ath/ath11k/dp_rx.c
@@ -2965,8 +2965,8 @@ static int ath11k_dp_rx_h_verify_tkip_mic(struct ath11k *ar, struct ath11k_peer
 	return 0;
 
 mic_fail:
-	(ATH11K_SKB_RXCB(msdu))->is_first_msdu = 1;
-	(ATH11K_SKB_RXCB(msdu))->is_last_msdu = 1;
+	(ATH11K_SKB_RXCB(msdu))->is_first_msdu = true;
+	(ATH11K_SKB_RXCB(msdu))->is_last_msdu = true;
 
 	rxs->flag |= RX_FLAG_MMIC_ERROR | RX_FLAG_MMIC_STRIPPED |
 		    RX_FLAG_IV_STRIPPED | RX_FLAG_DECRYPTED;
-- 
cgit v1.2.3-59-g8ed1b


From 52f274b519939f5e306b26f2a3cf7c63ef45c203 Mon Sep 17 00:00:00 2001
From: Sowmiya Sree Elavalagan <ssreeela@codeaurora.org>
Date: Mon, 4 May 2020 17:15:55 +0530
Subject: ath11k: fix resource unavailability for htt stats after peer stats
 display

htt stats are not working after htt peer stats display
and also after htt peer stats reset. Trying to dump htt
stats shows "Resource temporarily unavailable".
This is because of "ar->debug.htt_stats.stats_req" member is being
consecutively used for all htt stats without being reset
during the previous usage. Hence assigning NULL to this member
after freeing the allocated memory fixes the issue.

console logs below:
# echo 9 >/sys/kernel/debug/ath11k/ipq8074/mac1/htt_stats_type
# cat /sys/kernel/debug/ath11k/ipq8074/mac1/htt_stats_type
9
# cat /sys/kernel/debug/ath11k/ipq8074/mac1/htt_stats
cat: can't open '/sys/kernel/debug/ath11k/ipq8074/mac1/htt_stats'
: Resource temporarily unavailable

Signed-off-by: Sowmiya Sree Elavalagan <ssreeela@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1588592755-10427-1-git-send-email-ssreeela@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/debugfs_sta.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/net/wireless/ath/ath11k/debugfs_sta.c b/drivers/net/wireless/ath/ath11k/debugfs_sta.c
index 68963cfc5097..46eee5178f66 100644
--- a/drivers/net/wireless/ath/ath11k/debugfs_sta.c
+++ b/drivers/net/wireless/ath/ath11k/debugfs_sta.c
@@ -435,13 +435,22 @@ ath11k_dbg_sta_open_htt_peer_stats(struct inode *inode, struct file *file)
 	return 0;
 out:
 	vfree(stats_req);
+	ar->debug.htt_stats.stats_req = NULL;
 	return ret;
 }
 
 static int
 ath11k_dbg_sta_release_htt_peer_stats(struct inode *inode, struct file *file)
 {
+	struct ieee80211_sta *sta = inode->i_private;
+	struct ath11k_sta *arsta = (struct ath11k_sta *)sta->drv_priv;
+	struct ath11k *ar = arsta->arvif->ar;
+
+	mutex_lock(&ar->conf_mutex);
 	vfree(file->private_data);
+	ar->debug.htt_stats.stats_req = NULL;
+	mutex_unlock(&ar->conf_mutex);
+
 	return 0;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 09f6c44aaae0f1bdb8b983d7762676d5018c53bc Mon Sep 17 00:00:00 2001
From: Yunjian Wang <wangyunjian@huawei.com>
Date: Tue, 5 May 2020 10:49:20 +0800
Subject: net: allwinner: Fix use correct return type for ndo_start_xmit()

The method ndo_start_xmit() returns a value of type netdev_tx_t. Fix
the ndo function to use the correct type. And emac_start_xmit() can
leak one skb if 'channel' == 3.

Signed-off-by: Yunjian Wang <wangyunjian@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/allwinner/sun4i-emac.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/allwinner/sun4i-emac.c b/drivers/net/ethernet/allwinner/sun4i-emac.c
index 18d3b4340bd4..b3b8a8010142 100644
--- a/drivers/net/ethernet/allwinner/sun4i-emac.c
+++ b/drivers/net/ethernet/allwinner/sun4i-emac.c
@@ -417,7 +417,7 @@ static void emac_timeout(struct net_device *dev, unsigned int txqueue)
 /* Hardware start transmission.
  * Send a packet to media from the upper layer.
  */
-static int emac_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t emac_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct emac_board_info *db = netdev_priv(dev);
 	unsigned long channel;
@@ -425,7 +425,7 @@ static int emac_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	channel = db->tx_fifo_stat & 3;
 	if (channel == 3)
-		return 1;
+		return NETDEV_TX_BUSY;
 
 	channel = (channel == 1 ? 1 : 0);
 
-- 
cgit v1.2.3-59-g8ed1b


From ab99b7d2ae6e33077045e92b5ca173635c7eceef Mon Sep 17 00:00:00 2001
From: Yunjian Wang <wangyunjian@huawei.com>
Date: Tue, 5 May 2020 11:06:45 +0800
Subject: net: altera: Fix use correct return type for ndo_start_xmit()

The method ndo_start_xmit() returns a value of type netdev_tx_t. Fix
the ndo function to use the correct type.

Signed-off-by: Yunjian Wang <wangyunjian@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/altera/altera_tse_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/altera/altera_tse_main.c b/drivers/net/ethernet/altera/altera_tse_main.c
index 1671c1f36691..907125abef2c 100644
--- a/drivers/net/ethernet/altera/altera_tse_main.c
+++ b/drivers/net/ethernet/altera/altera_tse_main.c
@@ -554,7 +554,7 @@ static irqreturn_t altera_isr(int irq, void *dev_id)
  * physically contiguous fragment starting at
  * skb->data, for length of skb_headlen(skb).
  */
-static int tse_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t tse_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct altera_tse_private *priv = netdev_priv(dev);
 	unsigned int txsize = priv->tx_ring_size;
@@ -562,7 +562,7 @@ static int tse_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	struct tse_buffer *buffer = NULL;
 	int nfrags = skb_shinfo(skb)->nr_frags;
 	unsigned int nopaged_len = skb_headlen(skb);
-	enum netdev_tx ret = NETDEV_TX_OK;
+	netdev_tx_t ret = NETDEV_TX_OK;
 	dma_addr_t dma_addr;
 
 	spin_lock_bh(&priv->tx_lock);
-- 
cgit v1.2.3-59-g8ed1b


From 58618ef85546726cf27c38ddc1b022c703b7a6ad Mon Sep 17 00:00:00 2001
From: Yunjian Wang <wangyunjian@huawei.com>
Date: Tue, 5 May 2020 11:22:20 +0800
Subject: net: nxp: Fix use correct return type for ndo_start_xmit()

The method ndo_start_xmit() returns a value of type netdev_tx_t. Fix
the ndo function to use the correct type.

Signed-off-by: Yunjian Wang <wangyunjian@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/nxp/lpc_eth.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/nxp/lpc_eth.c b/drivers/net/ethernet/nxp/lpc_eth.c
index 311454d9b0bc..d3cbb4215f5c 100644
--- a/drivers/net/ethernet/nxp/lpc_eth.c
+++ b/drivers/net/ethernet/nxp/lpc_eth.c
@@ -1030,7 +1030,8 @@ static int lpc_eth_close(struct net_device *ndev)
 	return 0;
 }
 
-static int lpc_eth_hard_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+static netdev_tx_t lpc_eth_hard_start_xmit(struct sk_buff *skb,
+					   struct net_device *ndev)
 {
 	struct netdata_local *pldat = netdev_priv(ndev);
 	u32 len, txidx;
-- 
cgit v1.2.3-59-g8ed1b


From 3e1853e4e1137ba0a4d314521d153852dbf4aff5 Mon Sep 17 00:00:00 2001
From: Yunjian Wang <wangyunjian@huawei.com>
Date: Tue, 5 May 2020 13:57:49 +0800
Subject: net: emac: Fix use correct return type for ndo_start_xmit()

The method ndo_start_xmit() returns a value of type netdev_tx_t. Fix
the ndo function to use the correct type.

Signed-off-by: Yunjian Wang <wangyunjian@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qualcomm/emac/emac-mac.c | 5 +++--
 drivers/net/ethernet/qualcomm/emac/emac-mac.h | 5 +++--
 drivers/net/ethernet/qualcomm/emac/emac.c     | 3 ++-
 3 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.c b/drivers/net/ethernet/qualcomm/emac/emac-mac.c
index 251d4ac4af02..117188e3c7de 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-mac.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.c
@@ -1431,8 +1431,9 @@ error:
 }
 
 /* Transmit the packet using specified transmit queue */
-int emac_mac_tx_buf_send(struct emac_adapter *adpt, struct emac_tx_queue *tx_q,
-			 struct sk_buff *skb)
+netdev_tx_t emac_mac_tx_buf_send(struct emac_adapter *adpt,
+				 struct emac_tx_queue *tx_q,
+				 struct sk_buff *skb)
 {
 	struct emac_tpd tpd;
 	u32 prod_idx;
diff --git a/drivers/net/ethernet/qualcomm/emac/emac-mac.h b/drivers/net/ethernet/qualcomm/emac/emac-mac.h
index ae08bdd9046c..920123eb8ace 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac-mac.h
+++ b/drivers/net/ethernet/qualcomm/emac/emac-mac.h
@@ -227,8 +227,9 @@ void emac_mac_stop(struct emac_adapter *adpt);
 void emac_mac_mode_config(struct emac_adapter *adpt);
 void emac_mac_rx_process(struct emac_adapter *adpt, struct emac_rx_queue *rx_q,
 			 int *num_pkts, int max_pkts);
-int emac_mac_tx_buf_send(struct emac_adapter *adpt, struct emac_tx_queue *tx_q,
-			 struct sk_buff *skb);
+netdev_tx_t emac_mac_tx_buf_send(struct emac_adapter *adpt,
+				 struct emac_tx_queue *tx_q,
+				 struct sk_buff *skb);
 void emac_mac_tx_process(struct emac_adapter *adpt, struct emac_tx_queue *tx_q);
 void emac_mac_rx_tx_ring_init_all(struct platform_device *pdev,
 				  struct emac_adapter *adpt);
diff --git a/drivers/net/ethernet/qualcomm/emac/emac.c b/drivers/net/ethernet/qualcomm/emac/emac.c
index 18b0c7a2d6dc..20b1b43a0e39 100644
--- a/drivers/net/ethernet/qualcomm/emac/emac.c
+++ b/drivers/net/ethernet/qualcomm/emac/emac.c
@@ -115,7 +115,8 @@ static int emac_napi_rtx(struct napi_struct *napi, int budget)
 }
 
 /* Transmit the packet */
-static int emac_start_xmit(struct sk_buff *skb, struct net_device *netdev)
+static netdev_tx_t emac_start_xmit(struct sk_buff *skb,
+				   struct net_device *netdev)
 {
 	struct emac_adapter *adpt = netdev_priv(netdev);
 
-- 
cgit v1.2.3-59-g8ed1b


From d9a1c584a93a90ecd24fca775a68d113fda05576 Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Tue, 5 May 2020 15:43:49 +0800
Subject: qlcnic: use true,false for bool variable in qlcnic_sriov_common.c

Fix the following coccicheck warning:

drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c:1585:3-25:
WARNING: Assignment of 0/1 to bool variable
drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c:1588:3-25:
WARNING: Assignment of 0/1 to bool variable

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
index f7c2f32237cb..7adbb03cb931 100644
--- a/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
+++ b/drivers/net/ethernet/qlogic/qlcnic/qlcnic_sriov_common.c
@@ -1582,10 +1582,10 @@ void qlcnic_sriov_vf_set_multi(struct net_device *netdev)
 		if (mode == VPORT_MISS_MODE_ACCEPT_ALL &&
 		    !adapter->fdb_mac_learn) {
 			qlcnic_alloc_lb_filters_mem(adapter);
-			adapter->drv_mac_learn = 1;
+			adapter->drv_mac_learn = true;
 			adapter->rx_mac_learn = true;
 		} else {
-			adapter->drv_mac_learn = 0;
+			adapter->drv_mac_learn = false;
 			adapter->rx_mac_learn = false;
 		}
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 66b63a60d7ea9f589a9cdc404057d8072930962d Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Tue, 5 May 2020 15:44:00 +0800
Subject: bnx2x: Remove Comparison to bool in bnx2x_dcb.c

Fix the following coccicheck warning:

drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c:1548:17-31: WARNING:
Comparison to bool
drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c:1148:16-24: WARNING:
Comparison to bool
drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c:1158:30-38: WARNING:
Comparison to bool

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c
index 2c6ba046d2a8..17ae6df90723 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_dcb.c
@@ -1145,7 +1145,7 @@ static void bnx2x_dcbx_get_num_pg_traf_type(struct bnx2x *bp,
 					break;
 				}
 			}
-			if (false == pg_found) {
+			if (!pg_found) {
 				data[help_data->num_of_pg].pg = add_pg;
 				data[help_data->num_of_pg].pg_priority =
 						(1 << ttp[add_traf_type]);
@@ -1155,7 +1155,7 @@ static void bnx2x_dcbx_get_num_pg_traf_type(struct bnx2x *bp,
 		}
 		DP(BNX2X_MSG_DCB,
 		   "add_traf_type %d pg_found %s num_of_pg %d\n",
-		   add_traf_type, (false == pg_found) ? "NO" : "YES",
+		   add_traf_type, !pg_found ? "NO" : "YES",
 		   help_data->num_of_pg);
 	}
 }
@@ -1544,8 +1544,7 @@ static void bnx2x_dcbx_2cos_limit_cee_three_pg_to_cos_params(
 			if (pg_entry < DCBX_MAX_NUM_PG_BW_ENTRIES) {
 				entry = 0;
 
-				if (i == (num_of_pri-1) &&
-				    false == b_found_strict)
+				if (i == (num_of_pri-1) && !b_found_strict)
 					/* last entry will be handled separately
 					 * If no priority is strict than last
 					 * entry goes to last queue.
-- 
cgit v1.2.3-59-g8ed1b


From 93e6044bd1cb635c3a07dc1b8471bc965feb9b04 Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Tue, 5 May 2020 15:45:39 +0800
Subject: net: qede: Use true for bool variable in qede_init_fp()

Fix the following coccicheck warning:

drivers/net/ethernet/qlogic/qede/qede_main.c:1717:5-19: WARNING:
Assignment of 0/1 to bool variable

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qede/qede_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 9b456198cb50..256506024b88 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -1714,7 +1714,7 @@ static void qede_init_fp(struct qede_dev *edev)
 				txq->ndev_txq_id = ndev_tx_id;
 
 				if (edev->dev_info.is_legacy)
-					txq->is_legacy = 1;
+					txq->is_legacy = true;
 				txq->dev = &edev->pdev->dev;
 			}
 
-- 
cgit v1.2.3-59-g8ed1b


From e8cc063d83106bc6855c07db0001e8d01cbd8b3e Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Tue, 5 May 2020 15:45:46 +0800
Subject: net: atheros: remove conversion to bool in atl1c_start_mac()

No need to convert '==' expression to bool. This fixes the following
coccicheck warning:

drivers/net/ethernet/atheros/atl1c/atl1c_main.c:1189:63-68: WARNING:
conversion to bool not needed here

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/atheros/atl1c/atl1c_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index 00bd7bd55794..04bc53af12d9 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -1186,7 +1186,7 @@ static void atl1c_start_mac(struct atl1c_adapter *adapter)
 	struct atl1c_hw *hw = &adapter->hw;
 	u32 mac, txq, rxq;
 
-	hw->mac_duplex = adapter->link_duplex == FULL_DUPLEX ? true : false;
+	hw->mac_duplex = adapter->link_duplex == FULL_DUPLEX;
 	hw->mac_speed = adapter->link_speed == SPEED_1000 ?
 		atl1c_mac_speed_1000 : atl1c_mac_speed_10_100;
 
-- 
cgit v1.2.3-59-g8ed1b


From ec161116ad554fec2e87215da93db48b5767f323 Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Tue, 5 May 2020 15:45:56 +0800
Subject: net: agere: use true,false for bool variable

Fix the following coccicheck warning:

drivers/net/ethernet/agere/et131x.c:717:3-22: WARNING: Assignment of
0/1 to bool variable
drivers/net/ethernet/agere/et131x.c:721:1-20: WARNING: Assignment of
0/1 to bool variable

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Acked-by: Mark Einon <mark.einon@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/agere/et131x.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/agere/et131x.c b/drivers/net/ethernet/agere/et131x.c
index 1b19385ad8a9..865892c1f23f 100644
--- a/drivers/net/ethernet/agere/et131x.c
+++ b/drivers/net/ethernet/agere/et131x.c
@@ -714,11 +714,11 @@ static int et131x_init_eeprom(struct et131x_adapter *adapter)
 			 * gather additional information that normally would
 			 * come from the eeprom, like MAC Address
 			 */
-			adapter->has_eeprom = 0;
+			adapter->has_eeprom = false;
 			return -EIO;
 		}
 	}
-	adapter->has_eeprom = 1;
+	adapter->has_eeprom = true;
 
 	/* Read the EEPROM for information regarding LED behavior. Refer to
 	 * et131x_xcvr_init() for its use.
-- 
cgit v1.2.3-59-g8ed1b


From ba42580019560ed9c54f87c3c4e852ce26869c5d Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Tue, 5 May 2020 15:46:08 +0800
Subject: net: bnxt: Remove Comparison to bool in bnxt_ethtool.c

Fix the following coccicheck warning:

drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c:1991:5-46: WARNING:
Comparison to bool
drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c:1993:10-54: WARNING:
Comparison to bool
drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c:2380:5-38: WARNING:
Comparison to bool

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
index 07526868f7be..dd0c3f227009 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_ethtool.c
@@ -2095,9 +2095,9 @@ static int bnxt_flash_firmware_from_file(struct net_device *dev,
 			   rc, filename);
 		return rc;
 	}
-	if (bnxt_dir_type_is_ape_bin_format(dir_type) == true)
+	if (bnxt_dir_type_is_ape_bin_format(dir_type))
 		rc = bnxt_flash_firmware(dev, dir_type, fw->data, fw->size);
-	else if (bnxt_dir_type_is_other_exec_format(dir_type) == true)
+	else if (bnxt_dir_type_is_other_exec_format(dir_type))
 		rc = bnxt_flash_microcode(dev, dir_type, fw->data, fw->size);
 	else
 		rc = bnxt_flash_nvram(dev, dir_type, BNX_DIR_ORDINAL_FIRST,
@@ -2484,7 +2484,7 @@ static int bnxt_set_eeprom(struct net_device *dev,
 	}
 
 	/* Create or re-write an NVM item: */
-	if (bnxt_dir_type_is_executable(type) == true)
+	if (bnxt_dir_type_is_executable(type))
 		return -EOPNOTSUPP;
 	ext = eeprom->magic & 0xffff;
 	ordinal = eeprom->offset >> 16;
-- 
cgit v1.2.3-59-g8ed1b


From d192ae55382dceb5f782b5a2d70ebc887a410305 Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Tue, 5 May 2020 15:46:23 +0800
Subject: net: ethernet: ti: use true, false for bool variables in cpsw_new.c

Fix the following coccicheck warning:

drivers/net/ethernet/ti/cpsw_new.c:1924:2-17: WARNING: Assignment of
0/1 to bool variable
drivers/net/ethernet/ti/cpsw_new.c:1231:1-16: WARNING: Assignment of
0/1 to bool variable

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/cpsw_new.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c
index 33c8dd686206..dce49311d3d3 100644
--- a/drivers/net/ethernet/ti/cpsw_new.c
+++ b/drivers/net/ethernet/ti/cpsw_new.c
@@ -1228,7 +1228,7 @@ static int cpsw_probe_dt(struct cpsw_common *cpsw)
 	data->active_slave = 0;
 	data->channels = CPSW_MAX_QUEUES;
 	data->ale_entries = CPSW_ALE_NUM_ENTRIES;
-	data->dual_emac = 1;
+	data->dual_emac = true;
 	data->bd_ram_size = CPSW_BD_RAM_SIZE;
 	data->mac_control = 0;
 
@@ -1921,7 +1921,7 @@ static int cpsw_probe(struct platform_device *pdev)
 
 	soc = soc_device_match(cpsw_soc_devices);
 	if (soc)
-		cpsw->quirk_irq = 1;
+		cpsw->quirk_irq = true;
 
 	cpsw->rx_packet_max = rx_packet_max;
 	cpsw->descs_pool_size = descs_pool_size;
-- 
cgit v1.2.3-59-g8ed1b


From 015cba7e369e91656ed31afbadce044a10fa0fab Mon Sep 17 00:00:00 2001
From: Yunjian Wang <wangyunjian@huawei.com>
Date: Tue, 5 May 2020 15:59:26 +0800
Subject: net: ni: Fix use correct return type for ndo_start_xmit()

The method ndo_start_xmit() returns a value of type netdev_tx_t. Fix
the ndo function to use the correct type.

Signed-off-by: Yunjian Wang <wangyunjian@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ni/nixge.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ni/nixge.c b/drivers/net/ethernet/ni/nixge.c
index 2fdd0753b3af..d2708a57f2ff 100644
--- a/drivers/net/ethernet/ni/nixge.c
+++ b/drivers/net/ethernet/ni/nixge.c
@@ -502,7 +502,8 @@ static int nixge_check_tx_bd_space(struct nixge_priv *priv,
 	return 0;
 }
 
-static int nixge_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+static netdev_tx_t nixge_start_xmit(struct sk_buff *skb,
+				    struct net_device *ndev)
 {
 	struct nixge_priv *priv = netdev_priv(ndev);
 	struct nixge_hw_dma_bd *cur_p;
-- 
cgit v1.2.3-59-g8ed1b


From 5447e8e01e101ba19fe5b7551f02d37367156f6b Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 5 May 2020 16:07:12 +0200
Subject: sysctl: Fix unused function warning

The newly added bpf_stats_handler function has the wrong #ifdef
check around it, leading to an unused-function warning when
CONFIG_SYSCTL is disabled:

kernel/sysctl.c:205:12: error: unused function 'bpf_stats_handler' [-Werror,-Wunused-function]
static int bpf_stats_handler(struct ctl_table *table, int write,

Fix the check to match the reference.

Fixes: d46edd671a14 ("bpf: Sharing bpf runtime stats with BPF_ENABLE_STATS")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Luis Chamberlain <mcgrof@kernel.org>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20200505140734.503701-1-arnd@arndb.de
---
 kernel/sysctl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/sysctl.c b/kernel/sysctl.c
index 7adfe5dbce9d..17c7633d90fc 100644
--- a/kernel/sysctl.c
+++ b/kernel/sysctl.c
@@ -201,7 +201,7 @@ static int max_extfrag_threshold = 1000;
 
 #endif /* CONFIG_SYSCTL */
 
-#ifdef CONFIG_BPF_SYSCALL
+#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_SYSCTL)
 static int bpf_stats_handler(struct ctl_table *table, int write,
 			     void __user *buffer, size_t *lenp,
 			     loff_t *ppos)
-- 
cgit v1.2.3-59-g8ed1b


From e94caac1bc0b628b14dd6d81a94315cd880d6150 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Tue, 5 May 2020 16:31:56 +0800
Subject: net: tehuti: remove unused inline function bdx_tx_db_size

There's no callers in-tree anymore.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/tehuti/tehuti.c | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/drivers/net/ethernet/tehuti/tehuti.c b/drivers/net/ethernet/tehuti/tehuti.c
index 40a2ce0ca808..e28727297563 100644
--- a/drivers/net/ethernet/tehuti/tehuti.c
+++ b/drivers/net/ethernet/tehuti/tehuti.c
@@ -1362,18 +1362,6 @@ static void print_rxfd(struct rxf_desc *rxfd)
  * As our benchmarks shows, it adds 1.5 Gbit/sec to NIS's throuput.
  */
 
-/*************************************************************************
- *     Tx DB                                                             *
- *************************************************************************/
-static inline int bdx_tx_db_size(struct txdb *db)
-{
-	int taken = db->wptr - db->rptr;
-	if (taken < 0)
-		taken = db->size + 1 + taken;	/* (size + 1) equals memsz */
-
-	return db->size - taken;
-}
-
 /**
  * __bdx_tx_db_ptr_next - helper function, increment read/write pointer + wrap
  * @db: tx data base
-- 
cgit v1.2.3-59-g8ed1b


From 7a4d40ddf1fa2d78e0a144a6bde9bf783682fba7 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Tue, 5 May 2020 16:33:12 +0800
Subject: net: sun: cassini: Remove unused inline functions

There's no callers in-tree anymore.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sun/cassini.c | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/drivers/net/ethernet/sun/cassini.c b/drivers/net/ethernet/sun/cassini.c
index 3ee6ab104cb9..e6e25960da4f 100644
--- a/drivers/net/ethernet/sun/cassini.c
+++ b/drivers/net/ethernet/sun/cassini.c
@@ -237,12 +237,6 @@ static inline void cas_lock_tx(struct cas *cp)
 		spin_lock_nested(&cp->tx_lock[i], i);
 }
 
-static inline void cas_lock_all(struct cas *cp)
-{
-	spin_lock_irq(&cp->lock);
-	cas_lock_tx(cp);
-}
-
 /* WTZ: QA was finding deadlock problems with the previous
  * versions after long test runs with multiple cards per machine.
  * See if replacing cas_lock_all with safer versions helps. The
@@ -266,12 +260,6 @@ static inline void cas_unlock_tx(struct cas *cp)
 		spin_unlock(&cp->tx_lock[i - 1]);
 }
 
-static inline void cas_unlock_all(struct cas *cp)
-{
-	cas_unlock_tx(cp);
-	spin_unlock_irq(&cp->lock);
-}
-
 #define cas_unlock_all_restore(cp, flags) \
 do { \
 	struct cas *xxxcp = (cp); \
-- 
cgit v1.2.3-59-g8ed1b


From 12dcceb39aabcc199461f2fdc3ff81be0cd7003e Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Tue, 5 May 2020 16:40:37 +0800
Subject: net: mv643xx_eth: Remove unused inline function sum16_as_be

commit 84411f73b884 ("net: mv643xx_eth: Avoid setting the initial TCP checksum")
left behind this, remove it.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mv643xx_eth.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mv643xx_eth.c b/drivers/net/ethernet/marvell/mv643xx_eth.c
index 81d24481b22c..4d4b6243318a 100644
--- a/drivers/net/ethernet/marvell/mv643xx_eth.c
+++ b/drivers/net/ethernet/marvell/mv643xx_eth.c
@@ -666,11 +666,6 @@ static inline unsigned int has_tiny_unaligned_frags(struct sk_buff *skb)
 	return 0;
 }
 
-static inline __be16 sum16_as_be(__sum16 sum)
-{
-	return (__force __be16)sum;
-}
-
 static int skb_tx_csum(struct mv643xx_eth_private *mp, struct sk_buff *skb,
 		       u16 *l4i_chk, u32 *command, int length)
 {
-- 
cgit v1.2.3-59-g8ed1b


From d66e67bd4cc76626253bd51ee034bbbaede4e9ba Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Tue, 5 May 2020 16:42:56 +0800
Subject: net: stmmac: Remove unused inline function stmmac_rx_threshold_count

There's no caller in-tree since
commit 2af6106ae949 ("net: stmmac: Introducing support for Page Pool")

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_main.c | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
index ff22f274aa43..90bddca1ddd8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_main.c
@@ -3543,15 +3543,6 @@ static void stmmac_rx_vlan(struct net_device *dev, struct sk_buff *skb)
 	}
 }
 
-
-static inline int stmmac_rx_threshold_count(struct stmmac_rx_queue *rx_q)
-{
-	if (rx_q->rx_zeroc_thresh < STMMAC_RX_THRESH)
-		return 0;
-
-	return 1;
-}
-
 /**
  * stmmac_rx_refill - refill used skb preallocated buffers
  * @priv: driver private structure
-- 
cgit v1.2.3-59-g8ed1b


From 9f410c2cb7371c2645f6a8cd39cea0e62e391570 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Tue, 5 May 2020 16:43:39 +0800
Subject: myri10ge: Remove unused inline function myri10ge_vlan_ip_csum

commit 4ca3221fe4b6 ("myri10ge: Convert from LRO to GRO")
left behind this, remove it.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/myricom/myri10ge/myri10ge.c | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
index 2616fd735aab..e1e1f4e3639e 100644
--- a/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
+++ b/drivers/net/ethernet/myricom/myri10ge/myri10ge.c
@@ -1174,18 +1174,6 @@ myri10ge_submit_8rx(struct mcp_kreq_ether_recv __iomem * dst,
 	mb();
 }
 
-static inline void myri10ge_vlan_ip_csum(struct sk_buff *skb, __wsum hw_csum)
-{
-	struct vlan_hdr *vh = (struct vlan_hdr *)(skb->data);
-
-	if ((skb->protocol == htons(ETH_P_8021Q)) &&
-	    (vh->h_vlan_encapsulated_proto == htons(ETH_P_IP) ||
-	     vh->h_vlan_encapsulated_proto == htons(ETH_P_IPV6))) {
-		skb->csum = hw_csum;
-		skb->ip_summed = CHECKSUM_COMPLETE;
-	}
-}
-
 static void
 myri10ge_alloc_rx_pages(struct myri10ge_priv *mgp, struct myri10ge_rx_buf *rx,
 			int bytes, int watchdog)
-- 
cgit v1.2.3-59-g8ed1b


From b16751f7adae1d1b6beae0a44cd45d65715ddbf2 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Tue, 5 May 2020 16:44:21 +0800
Subject: net: microchip: Remove unused inline function is_bits_set

There's no callers in-tree.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/microchip/encx24j600-regmap.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/net/ethernet/microchip/encx24j600-regmap.c b/drivers/net/ethernet/microchip/encx24j600-regmap.c
index 1f496fac7033..5bd7fb917b7a 100644
--- a/drivers/net/ethernet/microchip/encx24j600-regmap.c
+++ b/drivers/net/ethernet/microchip/encx24j600-regmap.c
@@ -17,11 +17,6 @@
 
 #include "encx24j600_hw.h"
 
-static inline bool is_bits_set(int value, int mask)
-{
-	return (value & mask) == mask;
-}
-
 static int encx24j600_switch_bank(struct encx24j600_context *ctx,
 				  int bank)
 {
-- 
cgit v1.2.3-59-g8ed1b


From 592138a88d967ae9279275ef275b729e866a552a Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Tue, 5 May 2020 16:47:36 +0800
Subject: net: sched: choke: Remove unused inline function choke_set_classid

There's no callers in-tree anymore since commit 5952fde10c35 ("net:
sched: choke: remove dead filter classify code")

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_choke.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index a36974e9c601..2d350c734375 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -142,11 +142,6 @@ static inline struct choke_skb_cb *choke_skb_cb(const struct sk_buff *skb)
 	return (struct choke_skb_cb *)qdisc_skb_cb(skb)->data;
 }
 
-static inline void choke_set_classid(struct sk_buff *skb, u16 classid)
-{
-	choke_skb_cb(skb)->classid = classid;
-}
-
 /*
  * Compare flow of two packets
  *  Returns true only if source and destination address and port match.
-- 
cgit v1.2.3-59-g8ed1b


From e36cac0c056404c7c595c91773a93dd01aacc367 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Tue, 5 May 2020 16:50:09 +0800
Subject: bnx2x: Remove unused inline function bnx2x_vf_vlan_credit

commit 05cc5a39ddb7 ("bnx2x: add vlan filtering offload")
left behind this, remove it.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c | 21 ---------------------
 1 file changed, 21 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
index 5097a44686b3..b4476f44e386 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_sriov.c
@@ -331,27 +331,6 @@ bnx2x_vf_set_igu_info(struct bnx2x *bp, u8 igu_sb_id, u8 abs_vfid)
 	BP_VFDB(bp)->vf_sbs_pool++;
 }
 
-static inline void bnx2x_vf_vlan_credit(struct bnx2x *bp,
-					struct bnx2x_vlan_mac_obj *obj,
-					atomic_t *counter)
-{
-	struct list_head *pos;
-	int read_lock;
-	int cnt = 0;
-
-	read_lock = bnx2x_vlan_mac_h_read_lock(bp, obj);
-	if (read_lock)
-		DP(BNX2X_MSG_SP, "Failed to take vlan mac read head; continuing anyway\n");
-
-	list_for_each(pos, &obj->head)
-		cnt++;
-
-	if (!read_lock)
-		bnx2x_vlan_mac_h_read_unlock(bp, obj);
-
-	atomic_set(counter, cnt);
-}
-
 static int bnx2x_vf_vlan_mac_clear(struct bnx2x *bp, struct bnx2x_virtf *vf,
 				   int qid, bool drv_only, int type)
 {
-- 
cgit v1.2.3-59-g8ed1b


From 5b545cbc0a57fc93617785ed5c165f5059e36498 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Tue, 5 May 2020 16:51:24 +0800
Subject: net: sierra_net: Remove unused inline function

There's no callers in-tree

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/sierra_net.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/net/usb/sierra_net.c b/drivers/net/usb/sierra_net.c
index 389d19dd7909..0abd257b634c 100644
--- a/drivers/net/usb/sierra_net.c
+++ b/drivers/net/usb/sierra_net.c
@@ -354,11 +354,6 @@ static void sierra_net_set_ctx_index(struct sierra_net_data *priv, u8 ctx_ix)
 		cpu_to_be16(SIERRA_NET_HIP_EXT_IP_OUT_ID);
 }
 
-static inline int sierra_net_is_valid_addrlen(u8 len)
-{
-	return len == sizeof(struct in_addr);
-}
-
 static int sierra_net_parse_lsi(struct usbnet *dev, char *data, int datalen)
 {
 	struct lsi_umts *lsi = (struct lsi_umts *)data;
-- 
cgit v1.2.3-59-g8ed1b


From fe121e078da1fd8a061ab22f26c5911f8ebf46cb Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Tue, 5 May 2020 12:08:02 -0700
Subject: sch_choke: Remove classid from choke_skb_cb.

Suggested by Cong Wang.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_choke.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/sched/sch_choke.c b/net/sched/sch_choke.c
index 2d350c734375..59ff466ec7cb 100644
--- a/net/sched/sch_choke.c
+++ b/net/sched/sch_choke.c
@@ -131,7 +131,6 @@ static void choke_drop_by_idx(struct Qdisc *sch, unsigned int idx,
 }
 
 struct choke_skb_cb {
-	u16			classid;
 	u8			keys_valid;
 	struct			flow_keys_digest keys;
 };
-- 
cgit v1.2.3-59-g8ed1b


From cc31d53f82d7cf8caa3bd7251985fa75fd9efc1c Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Tue, 5 May 2020 10:03:07 +0000
Subject: net: ipa: remove duplicated include from ipa_mem.c

Remove duplicated include.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_mem.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ipa/ipa_mem.c b/drivers/net/ipa/ipa_mem.c
index aa8f6b0f3d50..3ef814119aab 100644
--- a/drivers/net/ipa/ipa_mem.c
+++ b/drivers/net/ipa/ipa_mem.c
@@ -17,7 +17,6 @@
 #include "ipa_data.h"
 #include "ipa_cmd.h"
 #include "ipa_mem.h"
-#include "ipa_data.h"
 #include "ipa_table.h"
 #include "gsi_trans.h"
 
-- 
cgit v1.2.3-59-g8ed1b


From b38eb47f48d0133298f3d12be3960be2632e8f70 Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Tue, 5 May 2020 13:19:35 +0300
Subject: dt-binding: net: ti: am65x-cpts: fix dt_binding_check fail

Fix dt_binding_check fail:
Fix Documentation/devicetree/bindings/net/ti,k3-am654-cpts.yaml: $id: relative path/filename doesn't match actual path or filename
	expected: http://devicetree.org/schemas/net/ti,k3-am654-cpts.yaml#
Unknown file referenced: [Errno 2] No such file or directory: '/usr/local/lib/python3.6/dist-packages/dtschema/schemas/net/ti,am654-cpts.yaml'
 Documentation/devicetree/bindings/net/ti,k3-am654-cpts.yaml: $id: relative path/filename doesn't match actual path or filename
 expected: http://devicetree.org/schemas/net/ti,k3-am654-cpts.yaml#
Unknown file referenced: [Errno 2] No such file or directory: '/usr/local/lib/python3.6/dist-packages/dtschema/schemas/net/ti,am654-cpts.yaml'

Cc: Rob Herring <robh@kernel.org>
Fixes: 6e87ac748e94 ("dt-binding: ti: am65x: document common platform time sync cpts module")
Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml | 2 +-
 Documentation/devicetree/bindings/net/ti,k3-am654-cpts.yaml      | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml
index 0f3fde45e200..0c054a2ce5ba 100644
--- a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml
+++ b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml
@@ -147,7 +147,7 @@ patternProperties:
   "^cpts$":
     type: object
     allOf:
-      - $ref: "ti,am654-cpts.yaml#"
+      - $ref: "ti,k3-am654-cpts.yaml#"
     description:
       CPSW Common Platform Time Sync (CPTS) module.
 
diff --git a/Documentation/devicetree/bindings/net/ti,k3-am654-cpts.yaml b/Documentation/devicetree/bindings/net/ti,k3-am654-cpts.yaml
index 1b535d41e5c6..df83c320e61b 100644
--- a/Documentation/devicetree/bindings/net/ti,k3-am654-cpts.yaml
+++ b/Documentation/devicetree/bindings/net/ti,k3-am654-cpts.yaml
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
 %YAML 1.2
 ---
-$id: http://devicetree.org/schemas/net/ti,am654-cpts.yaml#
+$id: http://devicetree.org/schemas/net/ti,k3-am654-cpts.yaml#
 $schema: http://devicetree.org/meta-schemas/core.yaml#
 
 title: The TI AM654x/J721E Common Platform Time Sync (CPTS) module Device Tree Bindings
-- 
cgit v1.2.3-59-g8ed1b


From 0a99be434d145079d0509473b19e840629d851c2 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Tue, 5 May 2020 15:01:20 +0200
Subject: net/smc: log important pnetid and state change events

Print to system log when SMC links are available or go down, link group
state changes or pnetids are applied to and removed from devices.
The log entries are triggered by either user configuration actions or
adapter activation/deactivation events and are not expected to happen
often. The entries help SMC users to keep track of the SMC link group
status and to detect when actions are needed (like to add replacements
for failed adapters).

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/af_smc.c   |  6 ++----
 net/smc/smc_core.c | 34 +++++++++++++++++++++++++++++-----
 net/smc/smc_core.h |  2 +-
 net/smc/smc_ib.c   | 11 +++++++++++
 net/smc/smc_ism.c  |  6 ++++++
 net/smc/smc_llc.c  | 25 +++++++++++++++++++------
 net/smc/smc_llc.h  |  2 +-
 net/smc/smc_pnet.c | 47 ++++++++++++++++++++++++++++++++++++++++++++---
 8 files changed, 113 insertions(+), 20 deletions(-)

diff --git a/net/smc/af_smc.c b/net/smc/af_smc.c
index 4e4421c95ca1..903321543838 100644
--- a/net/smc/af_smc.c
+++ b/net/smc/af_smc.c
@@ -378,8 +378,6 @@ static int smcr_clnt_conf_first_link(struct smc_sock *smc)
 	struct smc_llc_qentry *qentry;
 	int rc;
 
-	link->lgr->type = SMC_LGR_SINGLE;
-
 	/* receive CONFIRM LINK request from server over RoCE fabric */
 	qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME,
 			      SMC_LLC_CONFIRM_LINK);
@@ -414,6 +412,7 @@ static int smcr_clnt_conf_first_link(struct smc_sock *smc)
 		return SMC_CLC_DECL_TIMEOUT_CL;
 
 	smc_llc_link_active(link);
+	smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE);
 
 	/* optional 2nd link, receive ADD LINK request from server */
 	qentry = smc_llc_wait(link->lgr, NULL, SMC_LLC_WAIT_TIME,
@@ -1037,8 +1036,6 @@ static int smcr_serv_conf_first_link(struct smc_sock *smc)
 	struct smc_llc_qentry *qentry;
 	int rc;
 
-	link->lgr->type = SMC_LGR_SINGLE;
-
 	if (smcr_link_reg_rmb(link, smc->conn.rmb_desc))
 		return SMC_CLC_DECL_ERR_REGRMB;
 
@@ -1067,6 +1064,7 @@ static int smcr_serv_conf_first_link(struct smc_sock *smc)
 	smc->conn.rmb_desc->is_conf_rkey = true;
 
 	smc_llc_link_active(link);
+	smcr_lgr_set_type(link->lgr, SMC_LGR_SINGLE);
 
 	/* initial contact - try to establish second link */
 	smc_llc_srv_add_link(link);
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index fb5f685ff494..65de700e1f17 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -369,7 +369,7 @@ dealloc_pd:
 free_link_mem:
 	smc_wr_free_link_mem(lnk);
 clear_llc_lnk:
-	smc_llc_link_clear(lnk);
+	smc_llc_link_clear(lnk, false);
 out:
 	put_device(&ini->ib_dev->ibdev->dev);
 	memset(lnk, 0, sizeof(struct smc_link));
@@ -718,14 +718,14 @@ static void smcr_rtoken_clear_link(struct smc_link *lnk)
 }
 
 /* must be called under lgr->llc_conf_mutex lock */
-void smcr_link_clear(struct smc_link *lnk)
+void smcr_link_clear(struct smc_link *lnk, bool log)
 {
 	struct smc_ib_device *smcibdev;
 
 	if (!lnk->lgr || lnk->state == SMC_LNK_UNUSED)
 		return;
 	lnk->peer_qpn = 0;
-	smc_llc_link_clear(lnk);
+	smc_llc_link_clear(lnk, log);
 	smcr_buf_unmap_lgr(lnk);
 	smcr_rtoken_clear_link(lnk);
 	smc_ib_modify_qp_reset(lnk);
@@ -812,7 +812,7 @@ static void smc_lgr_free(struct smc_link_group *lgr)
 		mutex_lock(&lgr->llc_conf_mutex);
 		for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++) {
 			if (lgr->lnk[i].state != SMC_LNK_UNUSED)
-				smcr_link_clear(&lgr->lnk[i]);
+				smcr_link_clear(&lgr->lnk[i], false);
 		}
 		mutex_unlock(&lgr->llc_conf_mutex);
 		smc_llc_lgr_clear(lgr);
@@ -1040,12 +1040,36 @@ void smc_smcr_terminate_all(struct smc_ib_device *smcibdev)
 /* set new lgr type and clear all asymmetric link tagging */
 void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type)
 {
+	char *lgr_type = "";
 	int i;
 
 	for (i = 0; i < SMC_LINKS_PER_LGR_MAX; i++)
 		if (smc_link_usable(&lgr->lnk[i]))
 			lgr->lnk[i].link_is_asym = false;
+	if (lgr->type == new_type)
+		return;
 	lgr->type = new_type;
+
+	switch (lgr->type) {
+	case SMC_LGR_NONE:
+		lgr_type = "NONE";
+		break;
+	case SMC_LGR_SINGLE:
+		lgr_type = "SINGLE";
+		break;
+	case SMC_LGR_SYMMETRIC:
+		lgr_type = "SYMMETRIC";
+		break;
+	case SMC_LGR_ASYMMETRIC_PEER:
+		lgr_type = "ASYMMETRIC_PEER";
+		break;
+	case SMC_LGR_ASYMMETRIC_LOCAL:
+		lgr_type = "ASYMMETRIC_LOCAL";
+		break;
+	}
+	pr_warn_ratelimited("smc: SMC-R lg %*phN state changed: "
+			    "%s, pnetid %.16s\n", SMC_LGR_ID_SIZE, &lgr->id,
+			    lgr_type, lgr->pnet_id);
 }
 
 /* set new lgr type and tag a link as asymmetric */
@@ -1146,7 +1170,7 @@ static void smcr_link_down(struct smc_link *lnk)
 	smc_ib_modify_qp_reset(lnk);
 	to_lnk = smc_switch_conns(lgr, lnk, true);
 	if (!to_lnk) { /* no backup link available */
-		smcr_link_clear(lnk);
+		smcr_link_clear(lnk, true);
 		return;
 	}
 	smcr_lgr_set_type(lgr, SMC_LGR_SINGLE);
diff --git a/net/smc/smc_core.h b/net/smc/smc_core.h
index 4ae76802214f..86d160f0d187 100644
--- a/net/smc/smc_core.h
+++ b/net/smc/smc_core.h
@@ -383,7 +383,7 @@ void smc_core_exit(void);
 
 int smcr_link_init(struct smc_link_group *lgr, struct smc_link *lnk,
 		   u8 link_idx, struct smc_init_info *ini);
-void smcr_link_clear(struct smc_link *lnk);
+void smcr_link_clear(struct smc_link *lnk, bool log);
 int smcr_buf_map_lgr(struct smc_link *lnk);
 int smcr_buf_reg_lgr(struct smc_link *lnk);
 void smcr_lgr_set_type(struct smc_link_group *lgr, enum smc_lgr_type new_type);
diff --git a/net/smc/smc_ib.c b/net/smc/smc_ib.c
index 2c743caad69a..f0a5064bf9bd 100644
--- a/net/smc/smc_ib.c
+++ b/net/smc/smc_ib.c
@@ -575,6 +575,8 @@ static void smc_ib_add_dev(struct ib_device *ibdev)
 
 	/* trigger reading of the port attributes */
 	port_cnt = smcibdev->ibdev->phys_port_cnt;
+	pr_warn_ratelimited("smc: adding ib device %s with port count %d\n",
+			    smcibdev->ibdev->name, port_cnt);
 	for (i = 0;
 	     i < min_t(size_t, port_cnt, SMC_MAX_PORTS);
 	     i++) {
@@ -583,6 +585,13 @@ static void smc_ib_add_dev(struct ib_device *ibdev)
 		if (smc_pnetid_by_dev_port(ibdev->dev.parent, i,
 					   smcibdev->pnetid[i]))
 			smc_pnetid_by_table_ib(smcibdev, i + 1);
+		pr_warn_ratelimited("smc:    ib device %s port %d has pnetid "
+				    "%.16s%s\n",
+				    smcibdev->ibdev->name, i + 1,
+				    smcibdev->pnetid[i],
+				    smcibdev->pnetid_by_user[i] ?
+				     " (user defined)" :
+				     "");
 	}
 	schedule_work(&smcibdev->port_event_work);
 }
@@ -599,6 +608,8 @@ static void smc_ib_remove_dev(struct ib_device *ibdev, void *client_data)
 	spin_lock(&smc_ib_devices.lock);
 	list_del_init(&smcibdev->list); /* remove from smc_ib_devices */
 	spin_unlock(&smc_ib_devices.lock);
+	pr_warn_ratelimited("smc: removing ib device %s\n",
+			    smcibdev->ibdev->name);
 	smc_smcr_terminate_all(smcibdev);
 	smc_ib_cleanup_per_ibdev(smcibdev);
 	ib_unregister_event_handler(&smcibdev->event_handler);
diff --git a/net/smc/smc_ism.c b/net/smc/smc_ism.c
index 32be2da2cb85..91f85fc09fb8 100644
--- a/net/smc/smc_ism.c
+++ b/net/smc/smc_ism.c
@@ -321,12 +321,18 @@ int smcd_register_dev(struct smcd_dev *smcd)
 	list_add_tail(&smcd->list, &smcd_dev_list.list);
 	spin_unlock(&smcd_dev_list.lock);
 
+	pr_warn_ratelimited("smc: adding smcd device %s with pnetid %.16s%s\n",
+			    dev_name(&smcd->dev), smcd->pnetid,
+			    smcd->pnetid_by_user ? " (user defined)" : "");
+
 	return device_add(&smcd->dev);
 }
 EXPORT_SYMBOL_GPL(smcd_register_dev);
 
 void smcd_unregister_dev(struct smcd_dev *smcd)
 {
+	pr_warn_ratelimited("smc: removing smcd device %s\n",
+			    dev_name(&smcd->dev));
 	spin_lock(&smcd_dev_list.lock);
 	list_del_init(&smcd->list);
 	spin_unlock(&smcd_dev_list.lock);
diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 66ddc9cf5e2f..4cc583678ac7 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -870,7 +870,7 @@ int smc_llc_cli_add_link(struct smc_link *link, struct smc_llc_qentry *qentry)
 	if (!rc)
 		goto out;
 out_clear_lnk:
-	smcr_link_clear(lnk_new);
+	smcr_link_clear(lnk_new, false);
 out_reject:
 	smc_llc_cli_add_link_reject(qentry);
 out:
@@ -977,7 +977,7 @@ static void smc_llc_delete_asym_link(struct smc_link_group *lgr)
 	}
 	smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
 out_free:
-	smcr_link_clear(lnk_asym);
+	smcr_link_clear(lnk_asym, true);
 }
 
 static int smc_llc_srv_rkey_exchange(struct smc_link *link,
@@ -1121,7 +1121,7 @@ int smc_llc_srv_add_link(struct smc_link *link)
 		goto out_err;
 	return 0;
 out_err:
-	smcr_link_clear(link_new);
+	smcr_link_clear(link_new, false);
 	return rc;
 }
 
@@ -1227,7 +1227,7 @@ static void smc_llc_process_cli_delete_link(struct smc_link_group *lgr)
 		smc_switch_conns(lgr, lnk_del, false);
 		smc_wr_tx_wait_no_pending_sends(lnk_del);
 	}
-	smcr_link_clear(lnk_del);
+	smcr_link_clear(lnk_del, true);
 
 	active_links = smc_llc_active_link_count(lgr);
 	if (lnk_del == lnk_asym) {
@@ -1320,7 +1320,7 @@ static void smc_llc_process_srv_delete_link(struct smc_link_group *lgr)
 			}
 		}
 	}
-	smcr_link_clear(lnk_del);
+	smcr_link_clear(lnk_del, true);
 
 	active_links = smc_llc_active_link_count(lgr);
 	if (active_links == 1) {
@@ -1711,6 +1711,12 @@ int smc_llc_link_init(struct smc_link *link)
 
 void smc_llc_link_active(struct smc_link *link)
 {
+	pr_warn_ratelimited("smc: SMC-R lg %*phN link added: id %*phN, "
+			    "peerid %*phN, ibdev %s, ibport %d\n",
+			    SMC_LGR_ID_SIZE, &link->lgr->id,
+			    SMC_LGR_ID_SIZE, &link->link_uid,
+			    SMC_LGR_ID_SIZE, &link->peer_link_uid,
+			    link->smcibdev->ibdev->name, link->ibport);
 	link->state = SMC_LNK_ACTIVE;
 	if (link->lgr->llc_testlink_time) {
 		link->llc_testlink_time = link->lgr->llc_testlink_time * HZ;
@@ -1720,8 +1726,15 @@ void smc_llc_link_active(struct smc_link *link)
 }
 
 /* called in worker context */
-void smc_llc_link_clear(struct smc_link *link)
+void smc_llc_link_clear(struct smc_link *link, bool log)
 {
+	if (log)
+		pr_warn_ratelimited("smc: SMC-R lg %*phN link removed: id %*phN"
+				    ", peerid %*phN, ibdev %s, ibport %d\n",
+				    SMC_LGR_ID_SIZE, &link->lgr->id,
+				    SMC_LGR_ID_SIZE, &link->link_uid,
+				    SMC_LGR_ID_SIZE, &link->peer_link_uid,
+				    link->smcibdev->ibdev->name, link->ibport);
 	complete(&link->llc_testlink_resp);
 	cancel_delayed_work_sync(&link->llc_testlink_wrk);
 	smc_wr_wakeup_reg_wait(link);
diff --git a/net/smc/smc_llc.h b/net/smc/smc_llc.h
index 55287376112d..a5d2fe3eea61 100644
--- a/net/smc/smc_llc.h
+++ b/net/smc/smc_llc.h
@@ -82,7 +82,7 @@ void smc_llc_lgr_init(struct smc_link_group *lgr, struct smc_sock *smc);
 void smc_llc_lgr_clear(struct smc_link_group *lgr);
 int smc_llc_link_init(struct smc_link *link);
 void smc_llc_link_active(struct smc_link *link);
-void smc_llc_link_clear(struct smc_link *link);
+void smc_llc_link_clear(struct smc_link *link, bool log);
 int smc_llc_do_confirm_rkey(struct smc_link *send_link,
 			    struct smc_buf_desc *rmb_desc);
 int smc_llc_do_delete_rkey(struct smc_link_group *lgr,
diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index 50c96e843fab..be03f1260d59 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -110,8 +110,14 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name)
 		if (!pnet_name ||
 		    smc_pnet_match(pnetelem->pnet_name, pnet_name)) {
 			list_del(&pnetelem->list);
-			if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev)
+			if (pnetelem->type == SMC_PNET_ETH && pnetelem->ndev) {
 				dev_put(pnetelem->ndev);
+				pr_warn_ratelimited("smc: net device %s "
+						    "erased user defined "
+						    "pnetid %.16s\n",
+						    pnetelem->eth_name,
+						    pnetelem->pnet_name);
+			}
 			kfree(pnetelem);
 			rc = 0;
 		}
@@ -130,6 +136,12 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name)
 			    (!pnet_name ||
 			     smc_pnet_match(pnet_name,
 					    ibdev->pnetid[ibport]))) {
+				pr_warn_ratelimited("smc: ib device %s ibport "
+						    "%d erased user defined "
+						    "pnetid %.16s\n",
+						    ibdev->ibdev->name,
+						    ibport + 1,
+						    ibdev->pnetid[ibport]);
 				memset(ibdev->pnetid[ibport], 0,
 				       SMC_MAX_PNETID_LEN);
 				ibdev->pnetid_by_user[ibport] = false;
@@ -144,6 +156,10 @@ static int smc_pnet_remove_by_pnetid(struct net *net, char *pnet_name)
 		if (smcd_dev->pnetid_by_user &&
 		    (!pnet_name ||
 		     smc_pnet_match(pnet_name, smcd_dev->pnetid))) {
+			pr_warn_ratelimited("smc: smcd device %s "
+					    "erased user defined pnetid "
+					    "%.16s\n", dev_name(&smcd_dev->dev),
+					    smcd_dev->pnetid);
 			memset(smcd_dev->pnetid, 0, SMC_MAX_PNETID_LEN);
 			smcd_dev->pnetid_by_user = false;
 			rc = 0;
@@ -174,6 +190,10 @@ static int smc_pnet_add_by_ndev(struct net_device *ndev)
 			dev_hold(ndev);
 			pnetelem->ndev = ndev;
 			rc = 0;
+			pr_warn_ratelimited("smc: adding net device %s with "
+					    "user defined pnetid %.16s\n",
+					    pnetelem->eth_name,
+					    pnetelem->pnet_name);
 			break;
 		}
 	}
@@ -201,6 +221,10 @@ static int smc_pnet_remove_by_ndev(struct net_device *ndev)
 			dev_put(pnetelem->ndev);
 			pnetelem->ndev = NULL;
 			rc = 0;
+			pr_warn_ratelimited("smc: removing net device %s with "
+					    "user defined pnetid %.16s\n",
+					    pnetelem->eth_name,
+					    pnetelem->pnet_name);
 			break;
 		}
 	}
@@ -357,6 +381,10 @@ static int smc_pnet_add_eth(struct smc_pnettable *pnettable, struct net *net,
 		kfree(new_pe);
 		goto out_put;
 	}
+	if (ndev)
+		pr_warn_ratelimited("smc: net device %s "
+				    "applied user defined pnetid %.16s\n",
+				    new_pe->eth_name, new_pe->pnet_name);
 	return 0;
 
 out_put:
@@ -377,11 +405,24 @@ static int smc_pnet_add_ib(struct smc_pnettable *pnettable, char *ib_name,
 
 	/* try to apply the pnetid to active devices */
 	ib_dev = smc_pnet_find_ib(ib_name);
-	if (ib_dev)
+	if (ib_dev) {
 		ibdev_applied = smc_pnet_apply_ib(ib_dev, ib_port, pnet_name);
+		if (ibdev_applied)
+			pr_warn_ratelimited("smc: ib device %s ibport %d "
+					    "applied user defined pnetid "
+					    "%.16s\n", ib_dev->ibdev->name,
+					    ib_port,
+					    ib_dev->pnetid[ib_port - 1]);
+	}
 	smcd_dev = smc_pnet_find_smcd(ib_name);
-	if (smcd_dev)
+	if (smcd_dev) {
 		smcddev_applied = smc_pnet_apply_smcd(smcd_dev, pnet_name);
+		if (smcddev_applied)
+			pr_warn_ratelimited("smc: smcd device %s "
+					    "applied user defined pnetid "
+					    "%.16s\n", dev_name(&smcd_dev->dev),
+					    smcd_dev->pnetid);
+	}
 	/* Apply fails when a device has a hardware-defined pnetid set, do not
 	 * add a pnet table entry in that case.
 	 */
-- 
cgit v1.2.3-59-g8ed1b


From fea805237dd984a71a2c5e5cf074a15505d5ba31 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Tue, 5 May 2020 15:01:21 +0200
Subject: net/smc: remove unused inline function smc_curs_read

commit bac6de7b6370 ("net/smc: eliminate cursor read and write calls")
left behind this.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_cdc.h | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h
index 9cfabc9af120..2ddcc5fb5ceb 100644
--- a/net/smc/smc_cdc.h
+++ b/net/smc/smc_cdc.h
@@ -97,23 +97,6 @@ static inline void smc_curs_add(int size, union smc_host_cursor *curs,
 	}
 }
 
-/* SMC cursors are 8 bytes long and require atomic reading and writing */
-static inline u64 smc_curs_read(union smc_host_cursor *curs,
-				struct smc_connection *conn)
-{
-#ifndef KERNEL_HAS_ATOMIC64
-	unsigned long flags;
-	u64 ret;
-
-	spin_lock_irqsave(&conn->acurs_lock, flags);
-	ret = curs->acurs;
-	spin_unlock_irqrestore(&conn->acurs_lock, flags);
-	return ret;
-#else
-	return atomic64_read(&curs->acurs);
-#endif
-}
-
 /* Copy cursor src into tgt */
 static inline void smc_curs_copy(union smc_host_cursor *tgt,
 				 union smc_host_cursor *src,
-- 
cgit v1.2.3-59-g8ed1b


From 730f135104f3afe320d0df4c53c28d6ad9d17d6b Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Tue, 5 May 2020 15:34:00 +0200
Subject: cxgb4/cxgb4vf: Remove superfluous void * cast in
 debugfs_create_file() call

There is no need to cast a typed pointer to a void pointer when calling
a function that accepts the latter.  Remove it, as the cast prevents
further compiler checks.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
index 9cc3541a7e1c..cec865a97464 100644
--- a/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4vf/cxgb4vf_main.c
@@ -2480,7 +2480,7 @@ static int setup_debugfs(struct adapter *adapter)
 	for (i = 0; i < ARRAY_SIZE(debugfs_files); i++)
 		debugfs_create_file(debugfs_files[i].name,
 				    debugfs_files[i].mode,
-				    adapter->debugfs_root, (void *)adapter,
+				    adapter->debugfs_root, adapter,
 				    debugfs_files[i].fops);
 
 	return 0;
-- 
cgit v1.2.3-59-g8ed1b


From f989d546a2d5a9f001f6f8be49d98c10ab9b1897 Mon Sep 17 00:00:00 2001
From: William Tu <u9012063@gmail.com>
Date: Tue, 5 May 2020 09:05:06 -0700
Subject: erspan: Add type I version 0 support.

The Type I ERSPAN frame format is based on the barebones
IP + GRE(4-byte) encapsulation on top of the raw mirrored frame.
Both type I and II use 0x88BE as protocol type. Unlike type II
and III, no sequence number or key is required.
To creat a type I erspan tunnel device:
  $ ip link add dev erspan11 type erspan \
            local 172.16.1.100 remote 172.16.1.200 \
            erspan_ver 0

Signed-off-by: William Tu <u9012063@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/erspan.h | 19 +++++++++++++++--
 net/ipv4/ip_gre.c    | 58 ++++++++++++++++++++++++++++++++++++++--------------
 2 files changed, 60 insertions(+), 17 deletions(-)

diff --git a/include/net/erspan.h b/include/net/erspan.h
index b39643ef4c95..0d9e86bd9893 100644
--- a/include/net/erspan.h
+++ b/include/net/erspan.h
@@ -2,7 +2,19 @@
 #define __LINUX_ERSPAN_H
 
 /*
- * GRE header for ERSPAN encapsulation (8 octets [34:41]) -- 8 bytes
+ * GRE header for ERSPAN type I encapsulation (4 octets [34:37])
+ *      0                   1                   2                   3
+ *      0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
+ *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *     |0|0|0|0|0|00000|000000000|00000|    Protocol Type for ERSPAN   |
+ *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
+ *
+ *  The Type I ERSPAN frame format is based on the barebones IP + GRE
+ *  encapsulation (as described above) on top of the raw mirrored frame.
+ *  There is no extra ERSPAN header.
+ *
+ *
+ * GRE header for ERSPAN type II and II encapsulation (8 octets [34:41])
  *       0                   1                   2                   3
  *      0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1 2 3 4 5 6 7 8 9 0 1
  *     +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
@@ -43,7 +55,7 @@
  * |                  Platform Specific Info                       |
  * +-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+-+
  *
- * GRE proto ERSPAN type II = 0x88BE, type III = 0x22EB
+ * GRE proto ERSPAN type I/II = 0x88BE, type III = 0x22EB
  */
 
 #include <uapi/linux/erspan.h>
@@ -139,6 +151,9 @@ static inline u8 get_hwid(const struct erspan_md2 *md2)
 
 static inline int erspan_hdr_len(int version)
 {
+	if (version == 0)
+		return 0;
+
 	return sizeof(struct erspan_base_hdr) +
 	       (version == 1 ? ERSPAN_V1_MDSIZE : ERSPAN_V2_MDSIZE);
 }
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 029b24eeafba..e29cd48674d7 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -248,6 +248,15 @@ static void gre_err(struct sk_buff *skb, u32 info)
 	ipgre_err(skb, info, &tpi);
 }
 
+static bool is_erspan_type1(int gre_hdr_len)
+{
+	/* Both ERSPAN type I (version 0) and type II (version 1) use
+	 * protocol 0x88BE, but the type I has only 4-byte GRE header,
+	 * while type II has 8-byte.
+	 */
+	return gre_hdr_len == 4;
+}
+
 static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
 		      int gre_hdr_len)
 {
@@ -262,17 +271,26 @@ static int erspan_rcv(struct sk_buff *skb, struct tnl_ptk_info *tpi,
 	int len;
 
 	itn = net_generic(net, erspan_net_id);
-
 	iph = ip_hdr(skb);
-	ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
-	ver = ershdr->ver;
-
-	tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
-				  tpi->flags | TUNNEL_KEY,
-				  iph->saddr, iph->daddr, tpi->key);
+	if (is_erspan_type1(gre_hdr_len)) {
+		ver = 0;
+		tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
+					  tpi->flags | TUNNEL_NO_KEY,
+					  iph->saddr, iph->daddr, 0);
+	} else {
+		ershdr = (struct erspan_base_hdr *)(skb->data + gre_hdr_len);
+		ver = ershdr->ver;
+		tunnel = ip_tunnel_lookup(itn, skb->dev->ifindex,
+					  tpi->flags | TUNNEL_KEY,
+					  iph->saddr, iph->daddr, tpi->key);
+	}
 
 	if (tunnel) {
-		len = gre_hdr_len + erspan_hdr_len(ver);
+		if (is_erspan_type1(gre_hdr_len))
+			len = gre_hdr_len;
+		else
+			len = gre_hdr_len + erspan_hdr_len(ver);
+
 		if (unlikely(!pskb_may_pull(skb, len)))
 			return PACKET_REJECT;
 
@@ -665,7 +683,10 @@ static netdev_tx_t erspan_xmit(struct sk_buff *skb,
 	}
 
 	/* Push ERSPAN header */
-	if (tunnel->erspan_ver == 1) {
+	if (tunnel->erspan_ver == 0) {
+		proto = htons(ETH_P_ERSPAN);
+		tunnel->parms.o_flags &= ~TUNNEL_SEQ;
+	} else if (tunnel->erspan_ver == 1) {
 		erspan_build_header(skb, ntohl(tunnel->parms.o_key),
 				    tunnel->index,
 				    truncate, true);
@@ -1066,7 +1087,10 @@ static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
 	if (ret)
 		return ret;
 
-	/* ERSPAN should only have GRE sequence and key flag */
+	if (nla_get_u8(data[IFLA_GRE_ERSPAN_VER]) == 0)
+		return 0;
+
+	/* ERSPAN type II/III should only have GRE sequence and key flag */
 	if (data[IFLA_GRE_OFLAGS])
 		flags |= nla_get_be16(data[IFLA_GRE_OFLAGS]);
 	if (data[IFLA_GRE_IFLAGS])
@@ -1174,7 +1198,7 @@ static int erspan_netlink_parms(struct net_device *dev,
 	if (data[IFLA_GRE_ERSPAN_VER]) {
 		t->erspan_ver = nla_get_u8(data[IFLA_GRE_ERSPAN_VER]);
 
-		if (t->erspan_ver != 1 && t->erspan_ver != 2)
+		if (t->erspan_ver > 2)
 			return -EINVAL;
 	}
 
@@ -1259,7 +1283,11 @@ static int erspan_tunnel_init(struct net_device *dev)
 {
 	struct ip_tunnel *tunnel = netdev_priv(dev);
 
-	tunnel->tun_hlen = 8;
+	if (tunnel->erspan_ver == 0)
+		tunnel->tun_hlen = 4; /* 4-byte GRE hdr. */
+	else
+		tunnel->tun_hlen = 8; /* 8-byte GRE hdr. */
+
 	tunnel->parms.iph.protocol = IPPROTO_GRE;
 	tunnel->hlen = tunnel->tun_hlen + tunnel->encap_hlen +
 		       erspan_hdr_len(tunnel->erspan_ver);
@@ -1456,8 +1484,8 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
 	struct ip_tunnel_parm *p = &t->parms;
 	__be16 o_flags = p->o_flags;
 
-	if (t->erspan_ver == 1 || t->erspan_ver == 2) {
-		if (!t->collect_md)
+	if (t->erspan_ver <= 2) {
+		if (t->erspan_ver != 0 && !t->collect_md)
 			o_flags |= TUNNEL_KEY;
 
 		if (nla_put_u8(skb, IFLA_GRE_ERSPAN_VER, t->erspan_ver))
@@ -1466,7 +1494,7 @@ static int ipgre_fill_info(struct sk_buff *skb, const struct net_device *dev)
 		if (t->erspan_ver == 1) {
 			if (nla_put_u32(skb, IFLA_GRE_ERSPAN_INDEX, t->index))
 				goto nla_put_failure;
-		} else {
+		} else if (t->erspan_ver == 2) {
 			if (nla_put_u8(skb, IFLA_GRE_ERSPAN_DIR, t->dir))
 				goto nla_put_failure;
 			if (nla_put_u16(skb, IFLA_GRE_ERSPAN_HWID, t->hwid))
-- 
cgit v1.2.3-59-g8ed1b


From 01241aa000ab0a685062a448b0d7d97f470f446f Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Tue, 5 May 2020 19:21:23 +0300
Subject: net: ethernet: ti: am65-cpts: fix build

It's possible to have build configuration which will force PTP_1588_CLOCK=m
and so TI_K3_AM65_CPTS=m while still have TI_K3_AM65_CPSW_NUSS=y. This will
cause build failures:

aarch64-linux-gnu-ld: ../drivers/net/ethernet/ti/am65-cpsw-nuss.o: in function `am65_cpsw_init_cpts':
../drivers/net/ethernet/ti/am65-cpsw-nuss.c:1685: undefined reference to `am65_cpts_create'
aarch64-linux-gnu-ld: ../drivers/net/ethernet/ti/am65-cpsw-nuss.c:1685:(.text+0x2e20):
relocation truncated to fit: R_AARCH64_CALL26 against undefined symbol `am65_cpts_create'

Fix it by adding dependencies from CPTS in TI_K3_AM65_CPSW_NUSS as below:
   config TI_K3_AM65_CPSW_NUSS
   ...
     depends on TI_K3_AM65_CPTS || !TI_K3_AM65_CPTS

Note. This will create below dependencies and for NFS boot + CPTS all of them
have to be built-in.
  PTP_1588_CLOCK -> TI_K3_AM65_CPTS -> TI_K3_AM65_CPSW_NUSS

While here, clean up TI_K3_AM65_CPTS definition.

Fixes: b1f66a5bee07 ("net: ethernet: ti: am65-cpsw-nuss: enable packet timestamping support")
Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Reported-by: Anders Roxell <anders.roxell@linaro.org>
Tested-by: Anders Roxell <anders.roxell@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/Kconfig | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig
index 69b64620a454..519f5a8c5810 100644
--- a/drivers/net/ethernet/ti/Kconfig
+++ b/drivers/net/ethernet/ti/Kconfig
@@ -100,7 +100,7 @@ config TI_K3_AM65_CPSW_NUSS
 	depends on ARCH_K3 && OF && TI_K3_UDMA_GLUE_LAYER
 	select TI_DAVINCI_MDIO
 	imply PHY_TI_GMII_SEL
-	imply TI_AM65_CPTS
+	depends on TI_K3_AM65_CPTS || !TI_K3_AM65_CPTS
 	help
 	  This driver supports TI K3 AM654/J721E CPSW2G Ethernet SubSystem.
 	  The two-port Gigabit Ethernet MAC (MCU_CPSW0) subsystem provides
@@ -113,9 +113,8 @@ config TI_K3_AM65_CPSW_NUSS
 
 config TI_K3_AM65_CPTS
 	tristate "TI K3 AM65x CPTS"
-	depends on ARCH_K3 && OF && PTP_1588_CLOCK
+	depends on ARCH_K3 && OF
 	depends on PTP_1588_CLOCK
-	select NET_PTP_CLASSIFY
 	help
 	  Say y here to support the TI K3 AM65x CPTS with 1588 features such as
 	  PTP hardware clock for each CPTS device and network packets
-- 
cgit v1.2.3-59-g8ed1b


From e7f33e0c52c0df42f817a8468bf55be6648f9b5f Mon Sep 17 00:00:00 2001
From: John Crispin <john@phrozen.org>
Date: Tue, 5 May 2020 10:42:03 +0300
Subject: ath11k: add tx hw 802.11 encapsulation offloading support

This patch adds support for ethernet rxtx mode to the driver. The feature
is enabled via a new module parameter. If enabled to driver will enable
the feature on a per vif basis if all other requirements were met.

Signed-off-by: Shashidhar Lakkavalli <slakkavalli@datto.com>
Signed-off-by: John Crispin <john@phrozen.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200430152814.18481-1-john@phrozen.org
---
 drivers/net/wireless/ath/ath11k/core.h  |  5 ++++
 drivers/net/wireless/ath/ath11k/dp_tx.c | 17 +++++++++---
 drivers/net/wireless/ath/ath11k/mac.c   | 47 ++++++++++++++++++++++++++-------
 drivers/net/wireless/ath/ath11k/wmi.h   |  3 +++
 4 files changed, 60 insertions(+), 12 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/core.h b/drivers/net/wireless/ath/ath11k/core.h
index 33237eaf0371..70ec544eee67 100644
--- a/drivers/net/wireless/ath/ath11k/core.h
+++ b/drivers/net/wireless/ath/ath11k/core.h
@@ -60,9 +60,14 @@ static inline enum wme_ac ath11k_tid_to_ac(u32 tid)
 		WME_AC_VO);
 }
 
+enum ath11k_skb_flags {
+	ATH11K_SKB_HW_80211_ENCAP = BIT(0),
+};
+
 struct ath11k_skb_cb {
 	dma_addr_t paddr;
 	u8 eid;
+	u8 flags;
 	struct ath11k *ar;
 	struct ieee80211_vif *vif;
 } __packed;
diff --git a/drivers/net/wireless/ath/ath11k/dp_tx.c b/drivers/net/wireless/ath/ath11k/dp_tx.c
index 59018ccb14da..41c990aec6b7 100644
--- a/drivers/net/wireless/ath/ath11k/dp_tx.c
+++ b/drivers/net/wireless/ath/ath11k/dp_tx.c
@@ -12,7 +12,11 @@
 static enum hal_tcl_encap_type
 ath11k_dp_tx_get_encap_type(struct ath11k_vif *arvif, struct sk_buff *skb)
 {
-	/* TODO: Determine encap type based on vif_type and configuration */
+	struct ieee80211_tx_info *tx_info = IEEE80211_SKB_CB(skb);
+
+	if (tx_info->control.flags & IEEE80211_TX_CTRL_HW_80211_ENCAP)
+		return HAL_TCL_ENCAP_TYPE_ETHERNET;
+
 	return HAL_TCL_ENCAP_TYPE_NATIVE_WIFI;
 }
 
@@ -36,8 +40,11 @@ static void ath11k_dp_tx_encap_nwifi(struct sk_buff *skb)
 static u8 ath11k_dp_tx_get_tid(struct sk_buff *skb)
 {
 	struct ieee80211_hdr *hdr = (void *)skb->data;
+	struct ath11k_skb_cb *cb = ATH11K_SKB_CB(skb);
 
-	if (!ieee80211_is_data_qos(hdr->frame_control))
+	if (cb->flags & ATH11K_SKB_HW_80211_ENCAP)
+		return skb->priority & IEEE80211_QOS_CTL_TID_MASK;
+	else if (!ieee80211_is_data_qos(hdr->frame_control))
 		return HAL_DESC_REO_NON_QOS_TID;
 	else
 		return skb->priority & IEEE80211_QOS_CTL_TID_MASK;
@@ -86,7 +93,8 @@ int ath11k_dp_tx(struct ath11k *ar, struct ath11k_vif *arvif,
 	if (test_bit(ATH11K_FLAG_CRASH_FLUSH, &ar->ab->dev_flags))
 		return -ESHUTDOWN;
 
-	if (!ieee80211_is_data(hdr->frame_control))
+	if (!(info->control.flags & IEEE80211_TX_CTRL_HW_80211_ENCAP) &&
+	    !ieee80211_is_data(hdr->frame_control))
 		return -ENOTSUPP;
 
 	pool_id = skb_get_queue_mapping(skb) & (ATH11K_HW_MAX_QUEUES - 1);
@@ -166,7 +174,10 @@ tcl_ring_sel:
 		 *	  skb_checksum_help() is needed
 		 */
 	case HAL_TCL_ENCAP_TYPE_ETHERNET:
+		/* no need to encap */
+		break;
 	case HAL_TCL_ENCAP_TYPE_802_3:
+	default:
 		/* TODO: Take care of other encap modes as well */
 		ret = -EINVAL;
 		goto fail_remove_idr;
diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c
index f33c6d714da8..5ffe55801ca4 100644
--- a/drivers/net/wireless/ath/ath11k/mac.c
+++ b/drivers/net/wireless/ath/ath11k/mac.c
@@ -33,6 +33,12 @@
 	.max_power              = 30, \
 }
 
+/* frame mode values are mapped as per enum ath11k_hw_txrx_mode */
+static unsigned int ath11k_frame_mode = ATH11K_HW_TXRX_NATIVE_WIFI;
+module_param_named(frame_mode, ath11k_frame_mode, uint, 0644);
+MODULE_PARM_DESC(frame_mode,
+		 "Datapath frame mode (0: raw, 1: native wifi (default), 2: ethernet)");
+
 static const struct ieee80211_channel ath11k_2ghz_channels[] = {
 	CHAN2G(1, 2412, 0),
 	CHAN2G(2, 2417, 0),
@@ -3686,10 +3692,10 @@ static int __ath11k_set_antenna(struct ath11k *ar, u32 tx_ant, u32 rx_ant)
 
 int ath11k_mac_tx_mgmt_pending_free(int buf_id, void *skb, void *ctx)
 {
+	struct sk_buff *msdu = skb;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(msdu);
 	struct ath11k *ar = ctx;
 	struct ath11k_base *ab = ar->ab;
-	struct sk_buff *msdu = skb;
-	struct ieee80211_tx_info *info;
 
 	spin_lock_bh(&ar->txmgmt_idr_lock);
 	idr_remove(&ar->txmgmt_idr, buf_id);
@@ -3729,6 +3735,7 @@ static int ath11k_mac_mgmt_tx_wmi(struct ath11k *ar, struct ath11k_vif *arvif,
 {
 	struct ath11k_base *ab = ar->ab;
 	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+	struct ieee80211_tx_info *info;
 	dma_addr_t paddr;
 	int buf_id;
 	int ret;
@@ -3740,11 +3747,14 @@ static int ath11k_mac_mgmt_tx_wmi(struct ath11k *ar, struct ath11k_vif *arvif,
 	if (buf_id < 0)
 		return -ENOSPC;
 
-	if ((ieee80211_is_action(hdr->frame_control) ||
-	     ieee80211_is_deauth(hdr->frame_control) ||
-	     ieee80211_is_disassoc(hdr->frame_control)) &&
-	     ieee80211_has_protected(hdr->frame_control)) {
-		skb_put(skb, IEEE80211_CCMP_MIC_LEN);
+	info = IEEE80211_SKB_CB(skb);
+	if (!(info->control.flags & IEEE80211_TX_CTRL_HW_80211_ENCAP)) {
+		if ((ieee80211_is_action(hdr->frame_control) ||
+		     ieee80211_is_deauth(hdr->frame_control) ||
+		     ieee80211_is_disassoc(hdr->frame_control)) &&
+		     ieee80211_has_protected(hdr->frame_control)) {
+			skb_put(skb, IEEE80211_CCMP_MIC_LEN);
+		}
 	}
 
 	paddr = dma_map_single(ab->dev, skb->data, skb->len, DMA_TO_DEVICE);
@@ -3856,6 +3866,7 @@ static void ath11k_mac_op_tx(struct ieee80211_hw *hw,
 			     struct ieee80211_tx_control *control,
 			     struct sk_buff *skb)
 {
+	struct ath11k_skb_cb *skb_cb = ATH11K_SKB_CB(skb);
 	struct ath11k *ar = hw->priv;
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
 	struct ieee80211_vif *vif = info->control.vif;
@@ -3864,7 +3875,9 @@ static void ath11k_mac_op_tx(struct ieee80211_hw *hw,
 	bool is_prb_rsp;
 	int ret;
 
-	if (ieee80211_is_mgmt(hdr->frame_control)) {
+	if (info->control.flags & IEEE80211_TX_CTRL_HW_80211_ENCAP) {
+		skb_cb->flags |= ATH11K_SKB_HW_80211_ENCAP;
+	} else if (ieee80211_is_mgmt(hdr->frame_control)) {
 		is_prb_rsp = ieee80211_is_probe_resp(hdr->frame_control);
 		ret = ath11k_mac_mgmt_tx(ar, skb, is_prb_rsp);
 		if (ret) {
@@ -4145,6 +4158,7 @@ static int ath11k_mac_op_add_interface(struct ieee80211_hw *hw,
 	struct vdev_create_params vdev_param = {0};
 	struct peer_create_params peer_param;
 	u32 param_id, param_value;
+	int hw_encap = 0;
 	u16 nss;
 	int i;
 	int ret;
@@ -4239,7 +4253,22 @@ static int ath11k_mac_op_add_interface(struct ieee80211_hw *hw,
 	spin_unlock_bh(&ar->data_lock);
 
 	param_id = WMI_VDEV_PARAM_TX_ENCAP_TYPE;
-	param_value = ATH11K_HW_TXRX_NATIVE_WIFI;
+	if (ath11k_frame_mode == ATH11K_HW_TXRX_ETHERNET)
+		switch (vif->type) {
+		case NL80211_IFTYPE_STATION:
+		case NL80211_IFTYPE_AP_VLAN:
+		case NL80211_IFTYPE_AP:
+			hw_encap = 1;
+			break;
+		default:
+			break;
+		}
+
+	if (ieee80211_set_hw_80211_encap(vif, hw_encap))
+		param_value = ATH11K_HW_TXRX_ETHERNET;
+	else
+		param_value = ATH11K_HW_TXRX_NATIVE_WIFI;
+
 	ret = ath11k_wmi_vdev_set_param_cmd(ar, arvif->vdev_id,
 					    param_id, param_value);
 	if (ret) {
diff --git a/drivers/net/wireless/ath/ath11k/wmi.h b/drivers/net/wireless/ath/ath11k/wmi.h
index ba05935b715a..bce8fc2b7257 100644
--- a/drivers/net/wireless/ath/ath11k/wmi.h
+++ b/drivers/net/wireless/ath/ath11k/wmi.h
@@ -4568,6 +4568,9 @@ enum wmi_sta_ps_param_rx_wake_policy {
 	WMI_STA_PS_RX_WAKE_POLICY_POLL_UAPSD = 1,
 };
 
+/* Do not change existing values! Used by ath11k_frame_mode parameter
+ * module parameter.
+ */
 enum ath11k_hw_txrx_mode {
 	ATH11K_HW_TXRX_RAW = 0,
 	ATH11K_HW_TXRX_NATIVE_WIFI = 1,
-- 
cgit v1.2.3-59-g8ed1b


From b14fba7ebd04082f7767a11daea7f12f3593de22 Mon Sep 17 00:00:00 2001
From: Christian Lamparter <chunkeey@gmail.com>
Date: Tue, 5 May 2020 10:42:09 +0300
Subject: carl9170: remove P2P_GO support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch follows up on a bug-report by Frank Schäfer that
discovered P2P GO wasn't working with wpa_supplicant.
This patch removes part of the broken P2P GO support but
keeps the vif switchover code in place.

Cc: <stable@vger.kernel.org>
Link: <https://lkml.kernel.org/r/3a9d86b6-744f-e670-8792-9167257edef8@googlemail.com>
Reported-by: Frank Schäfer <fschaefer.oss@googlemail.com>
Signed-off-by: Christian Lamparter <chunkeey@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200425092811.9494-1-chunkeey@gmail.com
---
 drivers/net/wireless/ath/carl9170/fw.c   |  4 +---
 drivers/net/wireless/ath/carl9170/main.c | 21 ++++-----------------
 2 files changed, 5 insertions(+), 20 deletions(-)

diff --git a/drivers/net/wireless/ath/carl9170/fw.c b/drivers/net/wireless/ath/carl9170/fw.c
index 51934d191f33..1ab09e1c9ec5 100644
--- a/drivers/net/wireless/ath/carl9170/fw.c
+++ b/drivers/net/wireless/ath/carl9170/fw.c
@@ -338,9 +338,7 @@ static int carl9170_fw(struct ar9170 *ar, const __u8 *data, size_t len)
 		ar->hw->wiphy->interface_modes |= BIT(NL80211_IFTYPE_ADHOC);
 
 		if (SUPP(CARL9170FW_WLANTX_CAB)) {
-			if_comb_types |=
-				BIT(NL80211_IFTYPE_AP) |
-				BIT(NL80211_IFTYPE_P2P_GO);
+			if_comb_types |= BIT(NL80211_IFTYPE_AP);
 
 #ifdef CONFIG_MAC80211_MESH
 			if_comb_types |=
diff --git a/drivers/net/wireless/ath/carl9170/main.c b/drivers/net/wireless/ath/carl9170/main.c
index 5914926a5c5b..816929fb5b14 100644
--- a/drivers/net/wireless/ath/carl9170/main.c
+++ b/drivers/net/wireless/ath/carl9170/main.c
@@ -582,11 +582,10 @@ static int carl9170_init_interface(struct ar9170 *ar,
 	ar->disable_offload |= ((vif->type != NL80211_IFTYPE_STATION) &&
 	    (vif->type != NL80211_IFTYPE_AP));
 
-	/* While the driver supports HW offload in a single
-	 * P2P client configuration, it doesn't support HW
-	 * offload in the favourit, concurrent P2P GO+CLIENT
-	 * configuration. Hence, HW offload will always be
-	 * disabled for P2P.
+	/* The driver used to have P2P GO+CLIENT support,
+	 * but since this was dropped and we don't know if
+	 * there are any gremlins lurking in the shadows,
+	 * so best we keep HW offload disabled for P2P.
 	 */
 	ar->disable_offload |= vif->p2p;
 
@@ -639,18 +638,6 @@ static int carl9170_op_add_interface(struct ieee80211_hw *hw,
 			if (vif->type == NL80211_IFTYPE_STATION)
 				break;
 
-			/* P2P GO [master] use-case
-			 * Because the P2P GO station is selected dynamically
-			 * by all participating peers of a WIFI Direct network,
-			 * the driver has be able to change the main interface
-			 * operating mode on the fly.
-			 */
-			if (main_vif->p2p && vif->p2p &&
-			    vif->type == NL80211_IFTYPE_AP) {
-				old_main = main_vif;
-				break;
-			}
-
 			err = -EBUSY;
 			rcu_read_unlock();
 
-- 
cgit v1.2.3-59-g8ed1b


From 559ef68f5f699647b53ab193d24425308e3e9526 Mon Sep 17 00:00:00 2001
From: Ashok Raj Nagarajan <arnagara@codeaurora.org>
Date: Mon, 4 May 2020 22:05:47 +0530
Subject: ath11k: Add support to reset htt peer stats

This patch add supports to reset the per peer htt stats.

Usage:

echo 1 > /sys/kernel/debug/ieee80211/phyX/netdev:wlanX/stations/<peer MAC addr>/htt_peer_stats_reset

While doing so, sync the wmi services between FW and host.

Signed-off-by: Ashok Raj Nagarajan <arnagara@codeaurora.org>
Signed-off-by: Tamizh Chelvam <tamizhr@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1588610147-20231-1-git-send-email-tamizhr@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/debug.h       |  2 +
 drivers/net/wireless/ath/ath11k/debugfs_sta.c | 67 +++++++++++++++++++++++++++
 drivers/net/wireless/ath/ath11k/wmi.h         | 37 +++++++++++++++
 3 files changed, 106 insertions(+)

diff --git a/drivers/net/wireless/ath/ath11k/debug.h b/drivers/net/wireless/ath/ath11k/debug.h
index 45454fcef346..f8aa48e39703 100644
--- a/drivers/net/wireless/ath/ath11k/debug.h
+++ b/drivers/net/wireless/ath/ath11k/debug.h
@@ -80,6 +80,8 @@ struct ath_pktlog_hdr {
 	u8 payload[0];
 };
 
+#define ATH11K_HTT_PEER_STATS_RESET BIT(16)
+
 #define ATH11K_HTT_STATS_BUF_SIZE (1024 * 512)
 #define ATH11K_FW_STATS_BUF_SIZE (1024 * 1024)
 
diff --git a/drivers/net/wireless/ath/ath11k/debugfs_sta.c b/drivers/net/wireless/ath/ath11k/debugfs_sta.c
index 46eee5178f66..7308ed254232 100644
--- a/drivers/net/wireless/ath/ath11k/debugfs_sta.c
+++ b/drivers/net/wireless/ath/ath11k/debugfs_sta.c
@@ -8,6 +8,8 @@
 #include "core.h"
 #include "peer.h"
 #include "debug.h"
+#include "dp_tx.h"
+#include "debug_htt_stats.h"
 
 void
 ath11k_accumulate_per_peer_tx_stats(struct ath11k_sta *arsta,
@@ -758,6 +760,66 @@ static const struct file_operations fops_aggr_mode = {
 	.llseek = default_llseek,
 };
 
+static ssize_t
+ath11k_write_htt_peer_stats_reset(struct file *file,
+				  const char __user *user_buf,
+				  size_t count, loff_t *ppos)
+{
+	struct ieee80211_sta *sta = file->private_data;
+	struct ath11k_sta *arsta = (struct ath11k_sta *)sta->drv_priv;
+	struct ath11k *ar = arsta->arvif->ar;
+	struct htt_ext_stats_cfg_params cfg_params = { 0 };
+	int ret;
+	u8 type;
+
+	ret = kstrtou8_from_user(user_buf, count, 0, &type);
+	if (ret)
+		return ret;
+
+	if (!type)
+		return ret;
+
+	mutex_lock(&ar->conf_mutex);
+	cfg_params.cfg0 = HTT_STAT_PEER_INFO_MAC_ADDR;
+	cfg_params.cfg0 |= FIELD_PREP(GENMASK(15, 1),
+				HTT_PEER_STATS_REQ_MODE_FLUSH_TQM);
+
+	cfg_params.cfg1 = HTT_STAT_DEFAULT_PEER_REQ_TYPE;
+
+	cfg_params.cfg2 |= FIELD_PREP(GENMASK(7, 0), sta->addr[0]);
+	cfg_params.cfg2 |= FIELD_PREP(GENMASK(15, 8), sta->addr[1]);
+	cfg_params.cfg2 |= FIELD_PREP(GENMASK(23, 16), sta->addr[2]);
+	cfg_params.cfg2 |= FIELD_PREP(GENMASK(31, 24), sta->addr[3]);
+
+	cfg_params.cfg3 |= FIELD_PREP(GENMASK(7, 0), sta->addr[4]);
+	cfg_params.cfg3 |= FIELD_PREP(GENMASK(15, 8), sta->addr[5]);
+
+	cfg_params.cfg3 |= ATH11K_HTT_PEER_STATS_RESET;
+
+	ret = ath11k_dp_tx_htt_h2t_ext_stats_req(ar,
+						 ATH11K_DBG_HTT_EXT_STATS_PEER_INFO,
+						 &cfg_params,
+						 0ULL);
+	if (ret) {
+		ath11k_warn(ar->ab, "failed to send htt peer stats request: %d\n", ret);
+		mutex_unlock(&ar->conf_mutex);
+		return ret;
+	}
+
+	mutex_unlock(&ar->conf_mutex);
+
+	ret = count;
+
+	return ret;
+}
+
+static const struct file_operations fops_htt_peer_stats_reset = {
+	.write = ath11k_write_htt_peer_stats_reset,
+	.open = simple_open,
+	.owner = THIS_MODULE,
+	.llseek = default_llseek,
+};
+
 void ath11k_sta_add_debugfs(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 			    struct ieee80211_sta *sta, struct dentry *dir)
 {
@@ -780,4 +842,9 @@ void ath11k_sta_add_debugfs(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 	debugfs_create_file("addba", 0200, dir, sta, &fops_addba);
 	debugfs_create_file("addba_resp", 0200, dir, sta, &fops_addba_resp);
 	debugfs_create_file("delba", 0200, dir, sta, &fops_delba);
+
+	if (test_bit(WMI_TLV_SERVICE_PER_PEER_HTT_STATS_RESET,
+		     ar->ab->wmi_ab.svc_map))
+		debugfs_create_file("htt_peer_stats_reset", 0600, dir, sta,
+				    &fops_htt_peer_stats_reset);
 }
diff --git a/drivers/net/wireless/ath/ath11k/wmi.h b/drivers/net/wireless/ath/ath11k/wmi.h
index bce8fc2b7257..d0a27f27fc80 100644
--- a/drivers/net/wireless/ath/ath11k/wmi.h
+++ b/drivers/net/wireless/ath/ath11k/wmi.h
@@ -1976,6 +1976,43 @@ enum wmi_tlv_service {
 	WMI_TLV_SERVICE_TX_DATA_MGMT_ACK_RSSI = 174,
 	WMI_TLV_SERVICE_NAN_DISABLE_SUPPORT = 175,
 	WMI_TLV_SERVICE_HTT_H2T_NO_HTC_HDR_LEN_IN_MSG_LEN = 176,
+	WMI_TLV_SERVICE_COEX_SUPPORT_UNEQUAL_ISOLATION = 177,
+	WMI_TLV_SERVICE_HW_DB2DBM_CONVERSION_SUPPORT = 178,
+	WMI_TLV_SERVICE_SUPPORT_EXTEND_ADDRESS = 179,
+	WMI_TLV_SERVICE_BEACON_RECEPTION_STATS = 180,
+	WMI_TLV_SERVICE_FETCH_TX_PN = 181,
+	WMI_TLV_SERVICE_PEER_UNMAP_RESPONSE_SUPPORT = 182,
+	WMI_TLV_SERVICE_TX_PER_PEER_AMPDU_SIZE = 183,
+	WMI_TLV_SERVICE_BSS_COLOR_SWITCH_COUNT = 184,
+	WMI_TLV_SERVICE_HTT_PEER_STATS_SUPPORT = 185,
+	WMI_TLV_SERVICE_UL_RU26_ALLOWED = 186,
+	WMI_TLV_SERVICE_GET_MWS_COEX_STATE = 187,
+	WMI_TLV_SERVICE_GET_MWS_DPWB_STATE = 188,
+	WMI_TLV_SERVICE_GET_MWS_TDM_STATE = 189,
+	WMI_TLV_SERVICE_GET_MWS_IDRX_STATE = 190,
+	WMI_TLV_SERVICE_GET_MWS_ANTENNA_SHARING_STATE = 191,
+	WMI_TLV_SERVICE_ENHANCED_TPC_CONFIG_EVENT = 192,
+	WMI_TLV_SERVICE_WLM_STATS_REQUEST = 193,
+	WMI_TLV_SERVICE_EXT_PEER_TID_CONFIGS_SUPPORT = 194,
+	WMI_TLV_SERVICE_WPA3_FT_SAE_SUPPORT = 195,
+	WMI_TLV_SERVICE_WPA3_FT_SUITE_B_SUPPORT = 196,
+	WMI_TLV_SERVICE_VOW_ENABLE = 197,
+	WMI_TLV_SERVICE_CFR_CAPTURE_IND_EVT_TYPE_1 = 198,
+	WMI_TLV_SERVICE_BROADCAST_TWT = 199,
+	WMI_TLV_SERVICE_RAP_DETECTION_SUPPORT = 200,
+	WMI_TLV_SERVICE_PS_TDCC = 201,
+	WMI_TLV_SERVICE_THREE_WAY_COEX_CONFIG_LEGACY   = 202,
+	WMI_TLV_SERVICE_THREE_WAY_COEX_CONFIG_OVERRIDE = 203,
+	WMI_TLV_SERVICE_TX_PWR_PER_PEER = 204,
+	WMI_TLV_SERVICE_STA_PLUS_STA_SUPPORT = 205,
+	WMI_TLV_SERVICE_WPA3_FT_FILS = 206,
+	WMI_TLV_SERVICE_ADAPTIVE_11R_ROAM = 207,
+	WMI_TLV_SERVICE_CHAN_RF_CHARACTERIZATION_INFO = 208,
+	WMI_TLV_SERVICE_FW_IFACE_COMBINATION_SUPPORT = 209,
+	WMI_TLV_SERVICE_TX_COMPL_TSF64 = 210,
+	WMI_TLV_SERVICE_DSM_ROAM_FILTER = 211,
+	WMI_TLV_SERVICE_PACKET_CAPTURE_SUPPORT = 212,
+	WMI_TLV_SERVICE_PER_PEER_HTT_STATS_RESET = 213,
 
 	WMI_MAX_EXT_SERVICE
 
-- 
cgit v1.2.3-59-g8ed1b


From d7d43782d541edb8596d2f4fc7f41b0734948ec5 Mon Sep 17 00:00:00 2001
From: Tamizh Chelvam <tamizhr@codeaurora.org>
Date: Mon, 4 May 2020 22:29:28 +0530
Subject: ath11k: fix kernel panic by freeing the msdu received with invalid
 length

In certain scenario host receives the packets with invalid length
which causes below kernel panic. Free up those msdus to avoid
this kernel panic.

 2270.028121:   <6> task: ffffffc0008306d0 ti: ffffffc0008306d0 task.ti: ffffffc0008306d0
 2270.035247:   <2> PC is at skb_panic+0x40/0x44
 2270.042784:   <2> LR is at skb_panic+0x40/0x44
 2270.521775:   <2> [<ffffffc0004a06e0>] skb_panic+0x40/0x44
 2270.524039:   <2> [<ffffffc0004a1278>] skb_put+0x54/0x5c
 2270.529264:   <2> [<ffffffbffcc373a8>] ath11k_dp_process_rx_err+0x320/0x5b0 [ath11k]
 2270.533860:   <2> [<ffffffbffcc30b68>] ath11k_dp_service_srng+0x80/0x268 [ath11k]
 2270.541063:   <2> [<ffffffbffcc1d554>] ath11k_hal_rx_reo_ent_buf_paddr_get+0x200/0xb64 [ath11k]
 2270.547917:   <2> [<ffffffc0004b1f74>] net_rx_action+0xf8/0x274
 2270.556247:   <2> [<ffffffc000099df4>] __do_softirq+0x128/0x228
 2270.561625:   <2> [<ffffffc00009a130>] irq_exit+0x84/0xcc
 2270.567008:   <2> [<ffffffc0000cfb28>] __handle_domain_irq+0x8c/0xb0
 2270.571695:   <2> [<ffffffc000082484>] gic_handle_irq+0x6c/0xbc

Signed-off-by: Tamizh Chelvam <tamizhr@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1588611568-20791-1-git-send-email-tamizhr@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/dp_rx.c | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.c b/drivers/net/wireless/ath/ath11k/dp_rx.c
index 6b47bb7865dc..85670608c3e2 100644
--- a/drivers/net/wireless/ath/ath11k/dp_rx.c
+++ b/drivers/net/wireless/ath/ath11k/dp_rx.c
@@ -2270,6 +2270,7 @@ static int ath11k_dp_rx_process_msdu(struct ath11k *ar,
 	struct ieee80211_hdr *hdr;
 	struct sk_buff *last_buf;
 	u8 l3_pad_bytes;
+	u8 *hdr_status;
 	u16 msdu_len;
 	int ret;
 
@@ -2298,8 +2299,13 @@ static int ath11k_dp_rx_process_msdu(struct ath11k *ar,
 		skb_pull(msdu, HAL_RX_DESC_SIZE);
 	} else if (!rxcb->is_continuation) {
 		if ((msdu_len + HAL_RX_DESC_SIZE) > DP_RX_BUFFER_SIZE) {
+			hdr_status = ath11k_dp_rx_h_80211_hdr(rx_desc);
 			ret = -EINVAL;
 			ath11k_warn(ar->ab, "invalid msdu len %u\n", msdu_len);
+			ath11k_dbg_dump(ar->ab, ATH11K_DBG_DATA, NULL, "", hdr_status,
+					sizeof(struct ieee80211_hdr));
+			ath11k_dbg_dump(ar->ab, ATH11K_DBG_DATA, NULL, "", rx_desc,
+					sizeof(struct hal_rx_desc));
 			goto free_out;
 		}
 		skb_put(msdu, HAL_RX_DESC_SIZE + l3_pad_bytes + msdu_len);
@@ -3394,6 +3400,7 @@ ath11k_dp_process_rx_err_buf(struct ath11k *ar, u32 *ring_desc, int buf_id, bool
 	struct sk_buff *msdu;
 	struct ath11k_skb_rxcb *rxcb;
 	struct hal_rx_desc *rx_desc;
+	u8 *hdr_status;
 	u16 msdu_len;
 
 	spin_lock_bh(&rx_ring->idr_lock);
@@ -3431,6 +3438,17 @@ ath11k_dp_process_rx_err_buf(struct ath11k *ar, u32 *ring_desc, int buf_id, bool
 
 	rx_desc = (struct hal_rx_desc *)msdu->data;
 	msdu_len = ath11k_dp_rx_h_msdu_start_msdu_len(rx_desc);
+	if ((msdu_len + HAL_RX_DESC_SIZE) > DP_RX_BUFFER_SIZE) {
+		hdr_status = ath11k_dp_rx_h_80211_hdr(rx_desc);
+		ath11k_warn(ar->ab, "invalid msdu leng %u", msdu_len);
+		ath11k_dbg_dump(ar->ab, ATH11K_DBG_DATA, NULL, "", hdr_status,
+				sizeof(struct ieee80211_hdr));
+		ath11k_dbg_dump(ar->ab, ATH11K_DBG_DATA, NULL, "", rx_desc,
+				sizeof(struct hal_rx_desc));
+		dev_kfree_skb_any(msdu);
+		goto exit;
+	}
+
 	skb_put(msdu, HAL_RX_DESC_SIZE + msdu_len);
 
 	if (ath11k_dp_rx_frag_h_mpdu(ar, msdu, ring_desc)) {
-- 
cgit v1.2.3-59-g8ed1b


From ee4dd7061891d7295328302104037520d831ce43 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Mon, 4 May 2020 15:08:38 -0500
Subject: ath6kl: Replace zero-length array with flexible-array

The current codebase makes use of the zero-length array language
extension to the C90 standard, but the preferred mechanism to declare
variable-length types such as these ones is a flexible array member[1][2],
introduced in C99:

struct foo {
        int stuff;
        struct boo array[];
};

By making use of the mechanism above, we will get a compiler warning
in case the flexible array does not occur last in the structure, which
will help us prevent some kind of undefined behavior bugs from being
inadvertently introduced[3] to the codebase from now on.

Also, notice that, dynamic memory allocations won't be affected by
this change:

"Flexible array members have incomplete type, and so the sizeof operator
may not be applied. As a quirk of the original implementation of
zero-length arrays, sizeof evaluates to zero."[1]

sizeof(flexible-array-member) triggers a warning because flexible array
members have incomplete type[1]. There are some instances of code in
which the sizeof operator is being incorrectly/erroneously applied to
zero-length arrays and the result is zero. Such instances may be hiding
some bugs. So, this work (flexible-array member conversions) will also
help to get completely rid of those sorts of issues.

This issue was found with the help of Coccinelle.

[1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html
[2] https://github.com/KSPP/linux/issues/21
[3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour")

Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200504200838.GA31974@embeddedor
---
 drivers/net/wireless/ath/ath6kl/core.h  | 4 ++--
 drivers/net/wireless/ath/ath6kl/debug.c | 2 +-
 drivers/net/wireless/ath/ath6kl/hif.h   | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/ath/ath6kl/core.h b/drivers/net/wireless/ath/ath6kl/core.h
index 0d30e762c090..77e052336eb5 100644
--- a/drivers/net/wireless/ath/ath6kl/core.h
+++ b/drivers/net/wireless/ath/ath6kl/core.h
@@ -160,7 +160,7 @@ enum ath6kl_fw_capability {
 struct ath6kl_fw_ie {
 	__le32 id;
 	__le32 len;
-	u8 data[0];
+	u8 data[];
 };
 
 enum ath6kl_hw_flags {
@@ -406,7 +406,7 @@ struct ath6kl_mgmt_buff {
 	u32 id;
 	bool no_cck;
 	size_t len;
-	u8 buf[0];
+	u8 buf[];
 };
 
 struct ath6kl_sta {
diff --git a/drivers/net/wireless/ath/ath6kl/debug.c b/drivers/net/wireless/ath/ath6kl/debug.c
index 54337d60f288..7506cea46f58 100644
--- a/drivers/net/wireless/ath/ath6kl/debug.c
+++ b/drivers/net/wireless/ath/ath6kl/debug.c
@@ -30,7 +30,7 @@ struct ath6kl_fwlog_slot {
 	__le32 length;
 
 	/* max ATH6KL_FWLOG_PAYLOAD_SIZE bytes */
-	u8 payload[0];
+	u8 payload[];
 };
 
 #define ATH6KL_FWLOG_MAX_ENTRIES 20
diff --git a/drivers/net/wireless/ath/ath6kl/hif.h b/drivers/net/wireless/ath/ath6kl/hif.h
index dc6bd8cd9b83..aea7fea2a81e 100644
--- a/drivers/net/wireless/ath/ath6kl/hif.h
+++ b/drivers/net/wireless/ath/ath6kl/hif.h
@@ -199,7 +199,7 @@ struct hif_scatter_req {
 
 	u32 scat_q_depth;
 
-	struct hif_scatter_item scat_list[0];
+	struct hif_scatter_item scat_list[];
 };
 
 struct ath6kl_irq_proc_registers {
-- 
cgit v1.2.3-59-g8ed1b


From 450edd2805982d14ed79733a82927d2857b27cac Mon Sep 17 00:00:00 2001
From: Masashi Honma <masashi.honma@gmail.com>
Date: Tue, 5 May 2020 06:44:43 +0900
Subject: ath9k_htc: Silence undersized packet warnings

Some devices like TP-Link TL-WN722N produces this kind of messages
frequently.

kernel: ath: phy0: Short RX data len, dropping (dlen: 4)

This warning is useful for developers to recognize that the device
(Wi-Fi dongle or USB hub etc) is noisy but not for general users. So
this patch make this warning to debug message.

Reported-By: Denis <pro.denis@protonmail.com>
Ref: https://bugzilla.kernel.org/show_bug.cgi?id=207539
Fixes: cd486e627e67 ("ath9k_htc: Discard undersized packets")
Signed-off-by: Masashi Honma <masashi.honma@gmail.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200504214443.4485-1-masashi.honma@gmail.com
---
 drivers/net/wireless/ath/ath9k/htc_drv_txrx.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
index 9cec5c216e1f..118e5550b10c 100644
--- a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
+++ b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
@@ -999,9 +999,9 @@ static bool ath9k_rx_prepare(struct ath9k_htc_priv *priv,
 	 * which are not PHY_ERROR (short radar pulses have a length of 3)
 	 */
 	if (unlikely(!rs_datalen || (rs_datalen < 10 && !is_phyerr))) {
-		ath_warn(common,
-			 "Short RX data len, dropping (dlen: %d)\n",
-			 rs_datalen);
+		ath_dbg(common, ANY,
+			"Short RX data len, dropping (dlen: %d)\n",
+			rs_datalen);
 		goto rx_next;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 14dd3a71ccb7081d5d4959370794bbabc3258b34 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Mon, 4 May 2020 15:12:24 -0500
Subject: ath11k: Replace zero-length array with flexible-array

The current codebase makes use of the zero-length array language
extension to the C90 standard, but the preferred mechanism to declare
variable-length types such as these ones is a flexible array member[1][2],
introduced in C99:

struct foo {
        int stuff;
        struct boo array[];
};

By making use of the mechanism above, we will get a compiler warning
in case the flexible array does not occur last in the structure, which
will help us prevent some kind of undefined behavior bugs from being
inadvertently introduced[3] to the codebase from now on.

Also, notice that, dynamic memory allocations won't be affected by
this change:

"Flexible array members have incomplete type, and so the sizeof operator
may not be applied. As a quirk of the original implementation of
zero-length arrays, sizeof evaluates to zero."[1]

sizeof(flexible-array-member) triggers a warning because flexible array
members have incomplete type[1]. There are some instances of code in
which the sizeof operator is being incorrectly/erroneously applied to
zero-length arrays and the result is zero. Such instances may be hiding
some bugs. So, this work (flexible-array member conversions) will also
help to get completely rid of those sorts of issues.

This issue was found with the help of Coccinelle.

[1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html
[2] https://github.com/KSPP/linux/issues/21
[3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour")

Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200504201224.GA32282@embeddedor
---
 drivers/net/wireless/ath/ath11k/debug.h           | 4 ++--
 drivers/net/wireless/ath/ath11k/debug_htt_stats.h | 8 ++++----
 drivers/net/wireless/ath/ath11k/hal_desc.h        | 4 ++--
 drivers/net/wireless/ath/ath11k/hal_rx.h          | 2 +-
 drivers/net/wireless/ath/ath11k/hw.h              | 2 +-
 drivers/net/wireless/ath/ath11k/wmi.h             | 2 +-
 6 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/debug.h b/drivers/net/wireless/ath/ath11k/debug.h
index f8aa48e39703..c30085406bfb 100644
--- a/drivers/net/wireless/ath/ath11k/debug.h
+++ b/drivers/net/wireless/ath/ath11k/debug.h
@@ -67,7 +67,7 @@ struct debug_htt_stats_req {
 	u8 peer_addr[ETH_ALEN];
 	struct completion cmpln;
 	u32 buf_len;
-	u8 buf[0];
+	u8 buf[];
 };
 
 struct ath_pktlog_hdr {
@@ -77,7 +77,7 @@ struct ath_pktlog_hdr {
 	u16 size;
 	u32 timestamp;
 	u32 type_specific_data;
-	u8 payload[0];
+	u8 payload[];
 };
 
 #define ATH11K_HTT_PEER_STATS_RESET BIT(16)
diff --git a/drivers/net/wireless/ath/ath11k/debug_htt_stats.h b/drivers/net/wireless/ath/ath11k/debug_htt_stats.h
index 23a6baa9e95a..682a6ff222bd 100644
--- a/drivers/net/wireless/ath/ath11k/debug_htt_stats.h
+++ b/drivers/net/wireless/ath/ath11k/debug_htt_stats.h
@@ -239,7 +239,7 @@ struct htt_tx_pdev_stats_tx_ppdu_stats_tlv_v {
  */
 struct htt_tx_pdev_stats_tried_mpdu_cnt_hist_tlv_v {
 	u32 hist_bin_size;
-	u32 tried_mpdu_cnt_hist[0]; /* HTT_TX_PDEV_TRIED_MPDU_CNT_HIST */
+	u32 tried_mpdu_cnt_hist[]; /* HTT_TX_PDEV_TRIED_MPDU_CNT_HIST */
 };
 
 /* == SOC ERROR STATS == */
@@ -550,7 +550,7 @@ struct htt_tx_hwq_stats_cmn_tlv {
 struct htt_tx_hwq_difs_latency_stats_tlv_v {
 	u32 hist_intvl;
 	/* histogram of ppdu post to hwsch - > cmd status received */
-	u32 difs_latency_hist[0]; /* HTT_TX_HWQ_MAX_DIFS_LATENCY_BINS */
+	u32 difs_latency_hist[]; /* HTT_TX_HWQ_MAX_DIFS_LATENCY_BINS */
 };
 
 /* NOTE: Variable length TLV, use length spec to infer array size */
@@ -586,7 +586,7 @@ struct htt_tx_hwq_fes_result_stats_tlv_v {
 struct htt_tx_hwq_tried_mpdu_cnt_hist_tlv_v {
 	u32 hist_bin_size;
 	/* Histogram of number of mpdus on tried mpdu */
-	u32 tried_mpdu_cnt_hist[0]; /* HTT_TX_HWQ_TRIED_MPDU_CNT_HIST */
+	u32 tried_mpdu_cnt_hist[]; /* HTT_TX_HWQ_TRIED_MPDU_CNT_HIST */
 };
 
 /* NOTE: Variable length TLV, use length spec to infer array size
@@ -1584,7 +1584,7 @@ struct htt_pdev_stats_twt_session_tlv {
 struct htt_pdev_stats_twt_sessions_tlv {
 	u32 pdev_id;
 	u32 num_sessions;
-	struct htt_pdev_stats_twt_session_tlv twt_session[0];
+	struct htt_pdev_stats_twt_session_tlv twt_session[];
 };
 
 enum htt_rx_reo_resource_sample_id_enum {
diff --git a/drivers/net/wireless/ath/ath11k/hal_desc.h b/drivers/net/wireless/ath/ath11k/hal_desc.h
index 5e200380cca4..a1f747c1c44d 100644
--- a/drivers/net/wireless/ath/ath11k/hal_desc.h
+++ b/drivers/net/wireless/ath/ath11k/hal_desc.h
@@ -477,7 +477,7 @@ enum hal_tlv_tag {
 
 struct hal_tlv_hdr {
 	u32 tl;
-	u8 value[0];
+	u8 value[];
 } __packed;
 
 #define RX_MPDU_DESC_INFO0_MSDU_COUNT		GENMASK(7, 0)
@@ -1972,7 +1972,7 @@ struct hal_rx_reo_queue {
 	u32 processed_total_bytes;
 	u32 info5;
 	u32 rsvd[3];
-	struct hal_rx_reo_queue_ext ext_desc[0];
+	struct hal_rx_reo_queue_ext ext_desc[];
 } __packed;
 
 /* hal_rx_reo_queue
diff --git a/drivers/net/wireless/ath/ath11k/hal_rx.h b/drivers/net/wireless/ath/ath11k/hal_rx.h
index e863e4abfcc1..c436191ae1e8 100644
--- a/drivers/net/wireless/ath/ath11k/hal_rx.h
+++ b/drivers/net/wireless/ath/ath11k/hal_rx.h
@@ -23,7 +23,7 @@ struct hal_rx_wbm_rel_info {
 
 struct hal_rx_mon_status_tlv_hdr {
 	u32 hdr;
-	u8 value[0];
+	u8 value[];
 };
 
 enum hal_rx_su_mu_coding {
diff --git a/drivers/net/wireless/ath/ath11k/hw.h b/drivers/net/wireless/ath/ath11k/hw.h
index 9973477ae373..cdec95644758 100644
--- a/drivers/net/wireless/ath/ath11k/hw.h
+++ b/drivers/net/wireless/ath/ath11k/hw.h
@@ -111,7 +111,7 @@ struct ath11k_hw_params {
 struct ath11k_fw_ie {
 	__le32 id;
 	__le32 len;
-	u8 data[0];
+	u8 data[];
 };
 
 enum ath11k_bd_ie_board_type {
diff --git a/drivers/net/wireless/ath/ath11k/wmi.h b/drivers/net/wireless/ath/ath11k/wmi.h
index d0a27f27fc80..b9f3e559ced7 100644
--- a/drivers/net/wireless/ath/ath11k/wmi.h
+++ b/drivers/net/wireless/ath/ath11k/wmi.h
@@ -39,7 +39,7 @@ struct wmi_cmd_hdr {
 
 struct wmi_tlv {
 	u32 header;
-	u8 value[0];
+	u8 value[];
 } __packed;
 
 #define WMI_TLV_LEN	GENMASK(15, 0)
-- 
cgit v1.2.3-59-g8ed1b


From 6d64be3da282908bb17b0803b9edad8852ffea56 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 4 May 2020 10:06:03 +0200
Subject: xfrm: avoid extract_output indirection for ipv4

We can use a direct call for ipv4, so move the needed functions
to net/xfrm/xfrm_output.c and call them directly.

For ipv6 the indirection can be avoided as well but it will need
a bit more work -- to ease review it will be done in another patch.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h      |  1 -
 net/ipv4/xfrm4_output.c | 40 ----------------------------------------
 net/ipv4/xfrm4_state.c  |  1 -
 net/xfrm/xfrm_output.c  | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 46 insertions(+), 42 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 2577666c34c8..397007324abd 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1580,7 +1580,6 @@ static inline int xfrm4_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi)
 	return xfrm_input(skb, nexthdr, spi, 0);
 }
 
-int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb);
 int xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb);
 int xfrm4_output_finish(struct sock *sk, struct sk_buff *skb);
 int xfrm4_protocol_register(struct xfrm4_protocol *handler, unsigned char protocol);
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 89ba7c87de5d..21c8fa0a31ed 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -14,46 +14,6 @@
 #include <net/xfrm.h>
 #include <net/icmp.h>
 
-static int xfrm4_tunnel_check_size(struct sk_buff *skb)
-{
-	int mtu, ret = 0;
-
-	if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE)
-		goto out;
-
-	if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->ignore_df)
-		goto out;
-
-	mtu = dst_mtu(skb_dst(skb));
-	if ((!skb_is_gso(skb) && skb->len > mtu) ||
-	    (skb_is_gso(skb) &&
-	     !skb_gso_validate_network_len(skb, ip_skb_dst_mtu(skb->sk, skb)))) {
-		skb->protocol = htons(ETH_P_IP);
-
-		if (skb->sk)
-			xfrm_local_error(skb, mtu);
-		else
-			icmp_send(skb, ICMP_DEST_UNREACH,
-				  ICMP_FRAG_NEEDED, htonl(mtu));
-		ret = -EMSGSIZE;
-	}
-out:
-	return ret;
-}
-
-int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb)
-{
-	int err;
-
-	err = xfrm4_tunnel_check_size(skb);
-	if (err)
-		return err;
-
-	XFRM_MODE_SKB_CB(skb)->protocol = ip_hdr(skb)->protocol;
-
-	return xfrm4_extract_header(skb);
-}
-
 int xfrm4_output_finish(struct sock *sk, struct sk_buff *skb)
 {
 	memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index f8ed3c3bb928..d7c200779e4f 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -37,7 +37,6 @@ static struct xfrm_state_afinfo xfrm4_state_afinfo = {
 	.output			= xfrm4_output,
 	.output_finish		= xfrm4_output_finish,
 	.extract_input		= xfrm4_extract_input,
-	.extract_output		= xfrm4_extract_output,
 	.transport_finish	= xfrm4_transport_finish,
 	.local_error		= xfrm4_local_error,
 };
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 2fd3d990d992..a7b3af7f7a1e 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -13,6 +13,7 @@
 #include <linux/slab.h>
 #include <linux/spinlock.h>
 #include <net/dst.h>
+#include <net/icmp.h>
 #include <net/inet_ecn.h>
 #include <net/xfrm.h>
 
@@ -609,6 +610,47 @@ out:
 }
 EXPORT_SYMBOL_GPL(xfrm_output);
 
+static int xfrm4_tunnel_check_size(struct sk_buff *skb)
+{
+	int mtu, ret = 0;
+
+	if (IPCB(skb)->flags & IPSKB_XFRM_TUNNEL_SIZE)
+		goto out;
+
+	if (!(ip_hdr(skb)->frag_off & htons(IP_DF)) || skb->ignore_df)
+		goto out;
+
+	mtu = dst_mtu(skb_dst(skb));
+	if ((!skb_is_gso(skb) && skb->len > mtu) ||
+	    (skb_is_gso(skb) &&
+	     !skb_gso_validate_network_len(skb, ip_skb_dst_mtu(skb->sk, skb)))) {
+		skb->protocol = htons(ETH_P_IP);
+
+		if (skb->sk)
+			xfrm_local_error(skb, mtu);
+		else
+			icmp_send(skb, ICMP_DEST_UNREACH,
+				  ICMP_FRAG_NEEDED, htonl(mtu));
+		ret = -EMSGSIZE;
+	}
+out:
+	return ret;
+}
+
+static int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+	int err;
+
+	err = xfrm4_tunnel_check_size(skb);
+	if (err)
+		return err;
+
+	XFRM_MODE_SKB_CB(skb)->protocol = ip_hdr(skb)->protocol;
+
+	xfrm4_extract_header(skb);
+	return 0;
+}
+
 static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb)
 {
 	const struct xfrm_state_afinfo *afinfo;
@@ -624,6 +666,10 @@ static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb)
 	if (inner_mode == NULL)
 		return -EAFNOSUPPORT;
 
+	switch (inner_mode->family) {
+	case AF_INET:
+		return xfrm4_extract_output(x, skb);
+	}
 	rcu_read_lock();
 	afinfo = xfrm_state_afinfo_get_rcu(inner_mode->family);
 	if (likely(afinfo))
-- 
cgit v1.2.3-59-g8ed1b


From a269fbfc4e9ffe48c1f8142e60a49b6f2e588c58 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 4 May 2020 10:06:04 +0200
Subject: xfrm: state: remove extract_input indirection from xfrm_state_afinfo

In order to keep CONFIG_IPV6=m working, xfrm6_extract_header needs to be
duplicated.  It will be removed again in a followup change when the
remaining caller is moved to net/xfrm as well.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h      |  3 ---
 net/ipv4/xfrm4_input.c  |  5 -----
 net/ipv4/xfrm4_state.c  |  1 -
 net/ipv6/xfrm6_input.c  |  5 -----
 net/ipv6/xfrm6_output.c | 17 ++++++++++++++++-
 net/ipv6/xfrm6_state.c  | 24 ------------------------
 net/xfrm/xfrm_inout.h   | 18 ++++++++++++++++++
 net/xfrm/xfrm_input.c   | 21 +++++++++++----------
 8 files changed, 45 insertions(+), 49 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 397007324abd..a21c1dea5340 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -362,8 +362,6 @@ struct xfrm_state_afinfo {
 
 	int			(*output)(struct net *net, struct sock *sk, struct sk_buff *skb);
 	int			(*output_finish)(struct sock *sk, struct sk_buff *skb);
-	int			(*extract_input)(struct xfrm_state *x,
-						 struct sk_buff *skb);
 	int			(*extract_output)(struct xfrm_state *x,
 						  struct sk_buff *skb);
 	int			(*transport_finish)(struct sk_buff *skb,
@@ -1587,7 +1585,6 @@ int xfrm4_protocol_deregister(struct xfrm4_protocol *handler, unsigned char prot
 int xfrm4_tunnel_register(struct xfrm_tunnel *handler, unsigned short family);
 int xfrm4_tunnel_deregister(struct xfrm_tunnel *handler, unsigned short family);
 void xfrm4_local_error(struct sk_buff *skb, u32 mtu);
-int xfrm6_extract_header(struct sk_buff *skb);
 int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb);
 int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi,
 		  struct ip6_tnl *t);
diff --git a/net/ipv4/xfrm4_input.c b/net/ipv4/xfrm4_input.c
index f8de2482a529..ad2afeef4f10 100644
--- a/net/ipv4/xfrm4_input.c
+++ b/net/ipv4/xfrm4_input.c
@@ -18,11 +18,6 @@
 #include <net/ip.h>
 #include <net/xfrm.h>
 
-int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb)
-{
-	return xfrm4_extract_header(skb);
-}
-
 static int xfrm4_rcv_encap_finish2(struct net *net, struct sock *sk,
 				   struct sk_buff *skb)
 {
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index d7c200779e4f..521fc1bc069c 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -36,7 +36,6 @@ static struct xfrm_state_afinfo xfrm4_state_afinfo = {
 	.proto			= IPPROTO_IPIP,
 	.output			= xfrm4_output,
 	.output_finish		= xfrm4_output_finish,
-	.extract_input		= xfrm4_extract_input,
 	.transport_finish	= xfrm4_transport_finish,
 	.local_error		= xfrm4_local_error,
 };
diff --git a/net/ipv6/xfrm6_input.c b/net/ipv6/xfrm6_input.c
index 56f52353b324..04cbeefd8982 100644
--- a/net/ipv6/xfrm6_input.c
+++ b/net/ipv6/xfrm6_input.c
@@ -17,11 +17,6 @@
 #include <net/ipv6.h>
 #include <net/xfrm.h>
 
-int xfrm6_extract_input(struct xfrm_state *x, struct sk_buff *skb)
-{
-	return xfrm6_extract_header(skb);
-}
-
 int xfrm6_rcv_spi(struct sk_buff *skb, int nexthdr, __be32 spi,
 		  struct ip6_tnl *t)
 {
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index fbe51d40bd7e..855078a43fc7 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -94,6 +94,20 @@ out:
 	return ret;
 }
 
+static void __xfrm6_extract_header(struct sk_buff *skb)
+{
+	struct ipv6hdr *iph = ipv6_hdr(skb);
+
+	XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph);
+	XFRM_MODE_SKB_CB(skb)->id = 0;
+	XFRM_MODE_SKB_CB(skb)->frag_off = htons(IP_DF);
+	XFRM_MODE_SKB_CB(skb)->tos = ipv6_get_dsfield(iph);
+	XFRM_MODE_SKB_CB(skb)->ttl = iph->hop_limit;
+	XFRM_MODE_SKB_CB(skb)->optlen = 0;
+	memcpy(XFRM_MODE_SKB_CB(skb)->flow_lbl, iph->flow_lbl,
+	       sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl));
+}
+
 int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb)
 {
 	int err;
@@ -104,7 +118,8 @@ int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb)
 
 	XFRM_MODE_SKB_CB(skb)->protocol = ipv6_hdr(skb)->nexthdr;
 
-	return xfrm6_extract_header(skb);
+	__xfrm6_extract_header(skb);
+	return 0;
 }
 
 int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb)
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index 78daadecbdef..8fbf5a68ee6e 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -13,36 +13,12 @@
  */
 
 #include <net/xfrm.h>
-#include <linux/pfkeyv2.h>
-#include <linux/ipsec.h>
-#include <linux/netfilter_ipv6.h>
-#include <linux/export.h>
-#include <net/dsfield.h>
-#include <net/ipv6.h>
-#include <net/addrconf.h>
-
-int xfrm6_extract_header(struct sk_buff *skb)
-{
-	struct ipv6hdr *iph = ipv6_hdr(skb);
-
-	XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph);
-	XFRM_MODE_SKB_CB(skb)->id = 0;
-	XFRM_MODE_SKB_CB(skb)->frag_off = htons(IP_DF);
-	XFRM_MODE_SKB_CB(skb)->tos = ipv6_get_dsfield(iph);
-	XFRM_MODE_SKB_CB(skb)->ttl = iph->hop_limit;
-	XFRM_MODE_SKB_CB(skb)->optlen = 0;
-	memcpy(XFRM_MODE_SKB_CB(skb)->flow_lbl, iph->flow_lbl,
-	       sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl));
-
-	return 0;
-}
 
 static struct xfrm_state_afinfo xfrm6_state_afinfo = {
 	.family			= AF_INET6,
 	.proto			= IPPROTO_IPV6,
 	.output			= xfrm6_output,
 	.output_finish		= xfrm6_output_finish,
-	.extract_input		= xfrm6_extract_input,
 	.extract_output		= xfrm6_extract_output,
 	.transport_finish	= xfrm6_transport_finish,
 	.local_error		= xfrm6_local_error,
diff --git a/net/xfrm/xfrm_inout.h b/net/xfrm/xfrm_inout.h
index c7b0318938e2..e24abac92dc2 100644
--- a/net/xfrm/xfrm_inout.h
+++ b/net/xfrm/xfrm_inout.h
@@ -6,6 +6,24 @@
 #ifndef XFRM_INOUT_H
 #define XFRM_INOUT_H 1
 
+static inline void xfrm6_extract_header(struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+	struct ipv6hdr *iph = ipv6_hdr(skb);
+
+	XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph);
+	XFRM_MODE_SKB_CB(skb)->id = 0;
+	XFRM_MODE_SKB_CB(skb)->frag_off = htons(IP_DF);
+	XFRM_MODE_SKB_CB(skb)->tos = ipv6_get_dsfield(iph);
+	XFRM_MODE_SKB_CB(skb)->ttl = iph->hop_limit;
+	XFRM_MODE_SKB_CB(skb)->optlen = 0;
+	memcpy(XFRM_MODE_SKB_CB(skb)->flow_lbl, iph->flow_lbl,
+	       sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl));
+#else
+	WARN_ON_ONCE(1);
+#endif
+}
+
 static inline void xfrm6_beet_make_header(struct sk_buff *skb)
 {
 	struct ipv6hdr *iph = ipv6_hdr(skb);
diff --git a/net/xfrm/xfrm_input.c b/net/xfrm/xfrm_input.c
index aa35f23c4912..6db266a0cb2d 100644
--- a/net/xfrm/xfrm_input.c
+++ b/net/xfrm/xfrm_input.c
@@ -353,17 +353,18 @@ xfrm_inner_mode_encap_remove(struct xfrm_state *x,
 static int xfrm_prepare_input(struct xfrm_state *x, struct sk_buff *skb)
 {
 	const struct xfrm_mode *inner_mode = &x->inner_mode;
-	const struct xfrm_state_afinfo *afinfo;
-	int err = -EAFNOSUPPORT;
-
-	rcu_read_lock();
-	afinfo = xfrm_state_afinfo_get_rcu(x->outer_mode.family);
-	if (likely(afinfo))
-		err = afinfo->extract_input(x, skb);
-	rcu_read_unlock();
 
-	if (err)
-		return err;
+	switch (x->outer_mode.family) {
+	case AF_INET:
+		xfrm4_extract_header(skb);
+		break;
+	case AF_INET6:
+		xfrm6_extract_header(skb);
+		break;
+	default:
+		WARN_ON_ONCE(1);
+		return -EAFNOSUPPORT;
+	}
 
 	if (x->sel.family == AF_UNSPEC) {
 		inner_mode = xfrm_ip2inner_mode(x, XFRM_MODE_SKB_CB(skb)->protocol);
-- 
cgit v1.2.3-59-g8ed1b


From 171916cbd53dec5c7b05efb56a201671d92effc1 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 4 May 2020 10:06:05 +0200
Subject: xfrm: move xfrm4_extract_header to common helper

The function only initializes the XFRM CB in the skb.

After previous patch xfrm4_extract_header is only called from
net/xfrm/xfrm_{input,output}.c.

Because of IPV6=m linker errors the ipv6 equivalent
(xfrm6_extract_header) was already placed in xfrm_inout.h because
we can't call functions residing in a module from the core.

So do the same for the ipv4 helper and place it next to the ipv6 one.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h     |  1 -
 net/ipv4/xfrm4_state.c | 21 ---------------------
 net/xfrm/xfrm_inout.h  | 14 ++++++++++++++
 3 files changed, 14 insertions(+), 22 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index a21c1dea5340..8b956528b6e6 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1562,7 +1562,6 @@ int pktgen_xfrm_outer_mode_output(struct xfrm_state *x, struct sk_buff *skb);
 #endif
 
 void xfrm_local_error(struct sk_buff *skb, int mtu);
-int xfrm4_extract_header(struct sk_buff *skb);
 int xfrm4_extract_input(struct xfrm_state *x, struct sk_buff *skb);
 int xfrm4_rcv_encap(struct sk_buff *skb, int nexthdr, __be32 spi,
 		    int encap_type);
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index 521fc1bc069c..b23a1711297b 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -8,28 +8,7 @@
  *
  */
 
-#include <net/ip.h>
 #include <net/xfrm.h>
-#include <linux/pfkeyv2.h>
-#include <linux/ipsec.h>
-#include <linux/netfilter_ipv4.h>
-#include <linux/export.h>
-
-int xfrm4_extract_header(struct sk_buff *skb)
-{
-	const struct iphdr *iph = ip_hdr(skb);
-
-	XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph);
-	XFRM_MODE_SKB_CB(skb)->id = iph->id;
-	XFRM_MODE_SKB_CB(skb)->frag_off = iph->frag_off;
-	XFRM_MODE_SKB_CB(skb)->tos = iph->tos;
-	XFRM_MODE_SKB_CB(skb)->ttl = iph->ttl;
-	XFRM_MODE_SKB_CB(skb)->optlen = iph->ihl * 4 - sizeof(*iph);
-	memset(XFRM_MODE_SKB_CB(skb)->flow_lbl, 0,
-	       sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl));
-
-	return 0;
-}
 
 static struct xfrm_state_afinfo xfrm4_state_afinfo = {
 	.family			= AF_INET,
diff --git a/net/xfrm/xfrm_inout.h b/net/xfrm/xfrm_inout.h
index e24abac92dc2..efc5e6b2e87b 100644
--- a/net/xfrm/xfrm_inout.h
+++ b/net/xfrm/xfrm_inout.h
@@ -6,6 +6,20 @@
 #ifndef XFRM_INOUT_H
 #define XFRM_INOUT_H 1
 
+static inline void xfrm4_extract_header(struct sk_buff *skb)
+{
+	const struct iphdr *iph = ip_hdr(skb);
+
+	XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph);
+	XFRM_MODE_SKB_CB(skb)->id = iph->id;
+	XFRM_MODE_SKB_CB(skb)->frag_off = iph->frag_off;
+	XFRM_MODE_SKB_CB(skb)->tos = iph->tos;
+	XFRM_MODE_SKB_CB(skb)->ttl = iph->ttl;
+	XFRM_MODE_SKB_CB(skb)->optlen = iph->ihl * 4 - sizeof(*iph);
+	memset(XFRM_MODE_SKB_CB(skb)->flow_lbl, 0,
+	       sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl));
+}
+
 static inline void xfrm6_extract_header(struct sk_buff *skb)
 {
 #if IS_ENABLED(CONFIG_IPV6)
-- 
cgit v1.2.3-59-g8ed1b


From 3e50ddd8b8d5067796fc87cbbb25c71451ccb385 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 4 May 2020 10:06:06 +0200
Subject: xfrm: expose local_rxpmtu via ipv6_stubs

We cannot call this function from the core kernel unless we would force
CONFIG_IPV6=y.

Therefore expose this via ipv6_stubs so we can call it from net/xfrm
in the followup patch.

Since the call is expected to be unlikely, no extra code for the IPV6=y
case is added and we will always eat the indirection cost.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/ipv6_stubs.h | 1 +
 include/net/xfrm.h       | 1 +
 net/ipv6/af_inet6.c      | 1 +
 net/ipv6/xfrm6_output.c  | 2 +-
 4 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h
index 1e9e0cf7dc75..d8ab3872aa2a 100644
--- a/include/net/ipv6_stubs.h
+++ b/include/net/ipv6_stubs.h
@@ -57,6 +57,7 @@ struct ipv6_stub {
 			      const struct in6_addr *solicited_addr,
 			      bool router, bool solicited, bool override, bool inc_opt);
 #if IS_ENABLED(CONFIG_XFRM)
+	void (*xfrm6_local_rxpmtu)(struct sk_buff *skb, u32 mtu);
 	int (*xfrm6_udp_encap_rcv)(struct sock *sk, struct sk_buff *skb);
 	int (*xfrm6_rcv_encap)(struct sk_buff *skb, int nexthdr, __be32 spi,
 			       int encap_type);
diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 8b956528b6e6..10295ab4cdfb 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1608,6 +1608,7 @@ int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb,
 			  u8 **prevhdr);
 
 #ifdef CONFIG_XFRM
+void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu);
 int xfrm4_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
 int xfrm6_udp_encap_rcv(struct sock *sk, struct sk_buff *skb);
 int xfrm_user_policy(struct sock *sk, int optname,
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index cbbb00bad20e..aa4882929fd0 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -963,6 +963,7 @@ static const struct ipv6_stub ipv6_stub_impl = {
 	.udpv6_encap_enable = udpv6_encap_enable,
 	.ndisc_send_na = ndisc_send_na,
 #if IS_ENABLED(CONFIG_XFRM)
+	.xfrm6_local_rxpmtu = xfrm6_local_rxpmtu,
 	.xfrm6_udp_encap_rcv = xfrm6_udp_encap_rcv,
 	.xfrm6_rcv_encap = xfrm6_rcv_encap,
 #endif
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 855078a43fc7..23e2b52cfba6 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -40,7 +40,7 @@ static int xfrm6_local_dontfrag(struct sk_buff *skb)
 	return 0;
 }
 
-static void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu)
+void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu)
 {
 	struct flowi6 fl6;
 	struct sock *sk = skb->sk;
-- 
cgit v1.2.3-59-g8ed1b


From ede64dd2bfe2710549f1922a214959d966baaac3 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 4 May 2020 10:06:07 +0200
Subject: xfrm: place xfrm6_local_dontfrag in xfrm.h

so next patch can re-use it from net/xfrm/xfrm_output.c without
causing a linker error when IPV6 is a module.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h      | 16 ++++++++++++++++
 net/ipv6/xfrm6_output.c | 21 ++-------------------
 2 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 10295ab4cdfb..8f7fb033d557 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -1993,4 +1993,20 @@ static inline int xfrm_tunnel_check(struct sk_buff *skb, struct xfrm_state *x,
 
 	return 0;
 }
+
+#if IS_ENABLED(CONFIG_IPV6)
+static inline bool xfrm6_local_dontfrag(const struct sock *sk)
+{
+	int proto;
+
+	if (!sk || sk->sk_family != AF_INET6)
+		return false;
+
+	proto = sk->sk_protocol;
+	if (proto == IPPROTO_UDP || proto == IPPROTO_RAW)
+		return inet6_sk(sk)->dontfrag;
+
+	return false;
+}
+#endif
 #endif	/* _NET_XFRM_H */
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index 23e2b52cfba6..be64f280510c 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -23,23 +23,6 @@ int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb,
 }
 EXPORT_SYMBOL(xfrm6_find_1stfragopt);
 
-static int xfrm6_local_dontfrag(struct sk_buff *skb)
-{
-	int proto;
-	struct sock *sk = skb->sk;
-
-	if (sk) {
-		if (sk->sk_family != AF_INET6)
-			return 0;
-
-		proto = sk->sk_protocol;
-		if (proto == IPPROTO_UDP || proto == IPPROTO_RAW)
-			return inet6_sk(sk)->dontfrag;
-	}
-
-	return 0;
-}
-
 void xfrm6_local_rxpmtu(struct sk_buff *skb, u32 mtu)
 {
 	struct flowi6 fl6;
@@ -82,7 +65,7 @@ static int xfrm6_tunnel_check_size(struct sk_buff *skb)
 		skb->dev = dst->dev;
 		skb->protocol = htons(ETH_P_IPV6);
 
-		if (xfrm6_local_dontfrag(skb))
+		if (xfrm6_local_dontfrag(skb->sk))
 			xfrm6_local_rxpmtu(skb, mtu);
 		else if (skb->sk)
 			xfrm_local_error(skb, mtu);
@@ -181,7 +164,7 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 
 	toobig = skb->len > mtu && !skb_is_gso(skb);
 
-	if (toobig && xfrm6_local_dontfrag(skb)) {
+	if (toobig && xfrm6_local_dontfrag(skb->sk)) {
 		xfrm6_local_rxpmtu(skb, mtu);
 		kfree_skb(skb);
 		return -EMSGSIZE;
-- 
cgit v1.2.3-59-g8ed1b


From f3075f48ddb2c4d076aeda36fa0939163e4b2816 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 4 May 2020 10:06:08 +0200
Subject: xfrm: remove extract_output indirection from xfrm_state_afinfo

Move this to xfrm_output.c.  This avoids the state->extract_output
indirection.

This patch also removes the duplicated __xfrm6_extract_header helper
added in an earlier patch, we can now use the one from xfrm_inout.h .

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h      |  3 ---
 net/ipv6/xfrm6_output.c | 58 ------------------------------------------
 net/ipv6/xfrm6_state.c  |  1 -
 net/xfrm/xfrm_output.c  | 67 +++++++++++++++++++++++++++++++++++++++++++------
 4 files changed, 59 insertions(+), 70 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index 8f7fb033d557..db814a7e042f 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -362,8 +362,6 @@ struct xfrm_state_afinfo {
 
 	int			(*output)(struct net *net, struct sock *sk, struct sk_buff *skb);
 	int			(*output_finish)(struct sock *sk, struct sk_buff *skb);
-	int			(*extract_output)(struct xfrm_state *x,
-						  struct sk_buff *skb);
 	int			(*transport_finish)(struct sk_buff *skb,
 						    int async);
 	void			(*local_error)(struct sk_buff *skb, u32 mtu);
@@ -1601,7 +1599,6 @@ int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family);
 int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family);
 __be32 xfrm6_tunnel_alloc_spi(struct net *net, xfrm_address_t *saddr);
 __be32 xfrm6_tunnel_spi_lookup(struct net *net, const xfrm_address_t *saddr);
-int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb);
 int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb);
 int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb);
 int xfrm6_find_1stfragopt(struct xfrm_state *x, struct sk_buff *skb,
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index be64f280510c..b7d65b344679 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -47,64 +47,6 @@ void xfrm6_local_error(struct sk_buff *skb, u32 mtu)
 	ipv6_local_error(sk, EMSGSIZE, &fl6, mtu);
 }
 
-static int xfrm6_tunnel_check_size(struct sk_buff *skb)
-{
-	int mtu, ret = 0;
-	struct dst_entry *dst = skb_dst(skb);
-
-	if (skb->ignore_df)
-		goto out;
-
-	mtu = dst_mtu(dst);
-	if (mtu < IPV6_MIN_MTU)
-		mtu = IPV6_MIN_MTU;
-
-	if ((!skb_is_gso(skb) && skb->len > mtu) ||
-	    (skb_is_gso(skb) &&
-	     !skb_gso_validate_network_len(skb, ip6_skb_dst_mtu(skb)))) {
-		skb->dev = dst->dev;
-		skb->protocol = htons(ETH_P_IPV6);
-
-		if (xfrm6_local_dontfrag(skb->sk))
-			xfrm6_local_rxpmtu(skb, mtu);
-		else if (skb->sk)
-			xfrm_local_error(skb, mtu);
-		else
-			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
-		ret = -EMSGSIZE;
-	}
-out:
-	return ret;
-}
-
-static void __xfrm6_extract_header(struct sk_buff *skb)
-{
-	struct ipv6hdr *iph = ipv6_hdr(skb);
-
-	XFRM_MODE_SKB_CB(skb)->ihl = sizeof(*iph);
-	XFRM_MODE_SKB_CB(skb)->id = 0;
-	XFRM_MODE_SKB_CB(skb)->frag_off = htons(IP_DF);
-	XFRM_MODE_SKB_CB(skb)->tos = ipv6_get_dsfield(iph);
-	XFRM_MODE_SKB_CB(skb)->ttl = iph->hop_limit;
-	XFRM_MODE_SKB_CB(skb)->optlen = 0;
-	memcpy(XFRM_MODE_SKB_CB(skb)->flow_lbl, iph->flow_lbl,
-	       sizeof(XFRM_MODE_SKB_CB(skb)->flow_lbl));
-}
-
-int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb)
-{
-	int err;
-
-	err = xfrm6_tunnel_check_size(skb);
-	if (err)
-		return err;
-
-	XFRM_MODE_SKB_CB(skb)->protocol = ipv6_hdr(skb)->nexthdr;
-
-	__xfrm6_extract_header(skb);
-	return 0;
-}
-
 int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb)
 {
 	memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index 8fbf5a68ee6e..15247f2f78e1 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -19,7 +19,6 @@ static struct xfrm_state_afinfo xfrm6_state_afinfo = {
 	.proto			= IPPROTO_IPV6,
 	.output			= xfrm6_output,
 	.output_finish		= xfrm6_output_finish,
-	.extract_output		= xfrm6_extract_output,
 	.transport_finish	= xfrm6_transport_finish,
 	.local_error		= xfrm6_local_error,
 };
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index a7b3af7f7a1e..3a646df1318d 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -17,6 +17,11 @@
 #include <net/inet_ecn.h>
 #include <net/xfrm.h>
 
+#if IS_ENABLED(CONFIG_IPV6)
+#include <net/ip6_route.h>
+#include <net/ipv6_stubs.h>
+#endif
+
 #include "xfrm_inout.h"
 
 static int xfrm_output2(struct net *net, struct sock *sk, struct sk_buff *skb);
@@ -651,11 +656,60 @@ static int xfrm4_extract_output(struct xfrm_state *x, struct sk_buff *skb)
 	return 0;
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
+static int xfrm6_tunnel_check_size(struct sk_buff *skb)
+{
+	int mtu, ret = 0;
+	struct dst_entry *dst = skb_dst(skb);
+
+	if (skb->ignore_df)
+		goto out;
+
+	mtu = dst_mtu(dst);
+	if (mtu < IPV6_MIN_MTU)
+		mtu = IPV6_MIN_MTU;
+
+	if ((!skb_is_gso(skb) && skb->len > mtu) ||
+	    (skb_is_gso(skb) &&
+	     !skb_gso_validate_network_len(skb, ip6_skb_dst_mtu(skb)))) {
+		skb->dev = dst->dev;
+		skb->protocol = htons(ETH_P_IPV6);
+
+		if (xfrm6_local_dontfrag(skb->sk))
+			ipv6_stub->xfrm6_local_rxpmtu(skb, mtu);
+		else if (skb->sk)
+			xfrm_local_error(skb, mtu);
+		else
+			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+		ret = -EMSGSIZE;
+	}
+out:
+	return ret;
+}
+#endif
+
+static int xfrm6_extract_output(struct xfrm_state *x, struct sk_buff *skb)
+{
+#if IS_ENABLED(CONFIG_IPV6)
+	int err;
+
+	err = xfrm6_tunnel_check_size(skb);
+	if (err)
+		return err;
+
+	XFRM_MODE_SKB_CB(skb)->protocol = ipv6_hdr(skb)->nexthdr;
+
+	xfrm6_extract_header(skb);
+	return 0;
+#else
+	WARN_ON_ONCE(1);
+	return -EAFNOSUPPORT;
+#endif
+}
+
 static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb)
 {
-	const struct xfrm_state_afinfo *afinfo;
 	const struct xfrm_mode *inner_mode;
-	int err = -EAFNOSUPPORT;
 
 	if (x->sel.family == AF_UNSPEC)
 		inner_mode = xfrm_ip2inner_mode(x,
@@ -669,14 +723,11 @@ static int xfrm_inner_extract_output(struct xfrm_state *x, struct sk_buff *skb)
 	switch (inner_mode->family) {
 	case AF_INET:
 		return xfrm4_extract_output(x, skb);
+	case AF_INET6:
+		return xfrm6_extract_output(x, skb);
 	}
-	rcu_read_lock();
-	afinfo = xfrm_state_afinfo_get_rcu(inner_mode->family);
-	if (likely(afinfo))
-		err = afinfo->extract_output(x, skb);
-	rcu_read_unlock();
 
-	return err;
+	return -EAFNOSUPPORT;
 }
 
 void xfrm_local_error(struct sk_buff *skb, int mtu)
-- 
cgit v1.2.3-59-g8ed1b


From 2ab6096db2f16b3a6adbad252f1be171e649028d Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 4 May 2020 10:06:09 +0200
Subject: xfrm: remove output_finish indirection from xfrm_state_afinfo

There are only two implementaions, one for ipv4 and one for ipv6.

Both are almost identical, they clear skb->cb[], set the TRANSFORMED flag
in IP(6)CB and then call the common xfrm_output() function.

By placing the IPCB handling into the common function, we avoid the need
for the output_finish indirection as the output functions can simply
use xfrm_output().

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 include/net/xfrm.h      |  1 -
 net/ipv4/xfrm4_output.c | 23 +----------------------
 net/ipv4/xfrm4_state.c  |  1 -
 net/ipv6/xfrm6_output.c | 34 ++--------------------------------
 net/ipv6/xfrm6_state.c  |  1 -
 net/xfrm/xfrm_output.c  | 16 ++++++++++++++++
 6 files changed, 19 insertions(+), 57 deletions(-)

diff --git a/include/net/xfrm.h b/include/net/xfrm.h
index db814a7e042f..094fe682f5d7 100644
--- a/include/net/xfrm.h
+++ b/include/net/xfrm.h
@@ -361,7 +361,6 @@ struct xfrm_state_afinfo {
 	const struct xfrm_type		*type_dstopts;
 
 	int			(*output)(struct net *net, struct sock *sk, struct sk_buff *skb);
-	int			(*output_finish)(struct sock *sk, struct sk_buff *skb);
 	int			(*transport_finish)(struct sk_buff *skb,
 						    int async);
 	void			(*local_error)(struct sk_buff *skb, u32 mtu);
diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 21c8fa0a31ed..502eb189d852 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -14,22 +14,9 @@
 #include <net/xfrm.h>
 #include <net/icmp.h>
 
-int xfrm4_output_finish(struct sock *sk, struct sk_buff *skb)
-{
-	memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
-
-#ifdef CONFIG_NETFILTER
-	IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
-#endif
-
-	return xfrm_output(sk, skb);
-}
-
 static int __xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
 	struct xfrm_state *x = skb_dst(skb)->xfrm;
-	const struct xfrm_state_afinfo *afinfo;
-	int ret = -EAFNOSUPPORT;
 
 #ifdef CONFIG_NETFILTER
 	if (!x) {
@@ -38,15 +25,7 @@ static int __xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 	}
 #endif
 
-	rcu_read_lock();
-	afinfo = xfrm_state_afinfo_get_rcu(x->outer_mode.family);
-	if (likely(afinfo))
-		ret = afinfo->output_finish(sk, skb);
-	else
-		kfree_skb(skb);
-	rcu_read_unlock();
-
-	return ret;
+	return xfrm_output(sk, skb);
 }
 
 int xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb)
diff --git a/net/ipv4/xfrm4_state.c b/net/ipv4/xfrm4_state.c
index b23a1711297b..87d4db591488 100644
--- a/net/ipv4/xfrm4_state.c
+++ b/net/ipv4/xfrm4_state.c
@@ -14,7 +14,6 @@ static struct xfrm_state_afinfo xfrm4_state_afinfo = {
 	.family			= AF_INET,
 	.proto			= IPPROTO_IPIP,
 	.output			= xfrm4_output,
-	.output_finish		= xfrm4_output_finish,
 	.transport_finish	= xfrm4_transport_finish,
 	.local_error		= xfrm4_local_error,
 };
diff --git a/net/ipv6/xfrm6_output.c b/net/ipv6/xfrm6_output.c
index b7d65b344679..8b84d534b19d 100644
--- a/net/ipv6/xfrm6_output.c
+++ b/net/ipv6/xfrm6_output.c
@@ -47,39 +47,9 @@ void xfrm6_local_error(struct sk_buff *skb, u32 mtu)
 	ipv6_local_error(sk, EMSGSIZE, &fl6, mtu);
 }
 
-int xfrm6_output_finish(struct sock *sk, struct sk_buff *skb)
-{
-	memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
-
-#ifdef CONFIG_NETFILTER
-	IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
-#endif
-
-	return xfrm_output(sk, skb);
-}
-
-static int __xfrm6_output_state_finish(struct xfrm_state *x, struct sock *sk,
-				       struct sk_buff *skb)
-{
-	const struct xfrm_state_afinfo *afinfo;
-	int ret = -EAFNOSUPPORT;
-
-	rcu_read_lock();
-	afinfo = xfrm_state_afinfo_get_rcu(x->outer_mode.family);
-	if (likely(afinfo))
-		ret = afinfo->output_finish(sk, skb);
-	else
-		kfree_skb(skb);
-	rcu_read_unlock();
-
-	return ret;
-}
-
 static int __xfrm6_output_finish(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
-	struct xfrm_state *x = skb_dst(skb)->xfrm;
-
-	return __xfrm6_output_state_finish(x, sk, skb);
+	return xfrm_output(sk, skb);
 }
 
 static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
@@ -121,7 +91,7 @@ static int __xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 				    __xfrm6_output_finish);
 
 skip_frag:
-	return __xfrm6_output_state_finish(x, sk, skb);
+	return xfrm_output(sk, skb);
 }
 
 int xfrm6_output(struct net *net, struct sock *sk, struct sk_buff *skb)
diff --git a/net/ipv6/xfrm6_state.c b/net/ipv6/xfrm6_state.c
index 15247f2f78e1..6610b2198fa9 100644
--- a/net/ipv6/xfrm6_state.c
+++ b/net/ipv6/xfrm6_state.c
@@ -18,7 +18,6 @@ static struct xfrm_state_afinfo xfrm6_state_afinfo = {
 	.family			= AF_INET6,
 	.proto			= IPPROTO_IPV6,
 	.output			= xfrm6_output,
-	.output_finish		= xfrm6_output_finish,
 	.transport_finish	= xfrm6_transport_finish,
 	.local_error		= xfrm6_local_error,
 };
diff --git a/net/xfrm/xfrm_output.c b/net/xfrm/xfrm_output.c
index 3a646df1318d..9c43b8dd80fb 100644
--- a/net/xfrm/xfrm_output.c
+++ b/net/xfrm/xfrm_output.c
@@ -571,6 +571,22 @@ int xfrm_output(struct sock *sk, struct sk_buff *skb)
 	struct xfrm_state *x = skb_dst(skb)->xfrm;
 	int err;
 
+	switch (x->outer_mode.family) {
+	case AF_INET:
+		memset(IPCB(skb), 0, sizeof(*IPCB(skb)));
+#ifdef CONFIG_NETFILTER
+		IPCB(skb)->flags |= IPSKB_XFRM_TRANSFORMED;
+#endif
+		break;
+	case AF_INET6:
+		memset(IP6CB(skb), 0, sizeof(*IP6CB(skb)));
+
+#ifdef CONFIG_NETFILTER
+		IP6CB(skb)->flags |= IP6SKB_XFRM_TRANSFORMED;
+#endif
+		break;
+	}
+
 	secpath_reset(skb);
 
 	if (xfrm_dev_offload_ok(skb, x)) {
-- 
cgit v1.2.3-59-g8ed1b


From 0224b2acea0f9e3908d33e27b2dcb4e04686e997 Mon Sep 17 00:00:00 2001
From: Luke Nelson <lukenels@cs.washington.edu>
Date: Tue, 5 May 2020 17:03:17 -0700
Subject: bpf, riscv: Enable missing verifier_zext optimizations on RV64
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Commit 66d0d5a854a6 ("riscv: bpf: eliminate zero extension code-gen")
added support for the verifier zero-extension optimization on RV64 and
commit 46dd3d7d287b ("bpf, riscv: Enable zext optimization for more
RV64G ALU ops") enabled it for more instruction cases.

However, BPF_LSH BPF_X and BPF_{LSH,RSH,ARSH} BPF_K are still missing
the optimization.

This patch enables the zero-extension optimization for these remaining
cases.

Co-developed-by: Xi Wang <xi.wang@gmail.com>
Signed-off-by: Xi Wang <xi.wang@gmail.com>
Signed-off-by: Luke Nelson <luke.r.nels@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Björn Töpel <bjorn.topel@gmail.com>
Acked-by: Björn Töpel <bjorn.topel@gmail.com>
Link: https://lore.kernel.org/bpf/20200506000320.28965-2-luke.r.nels@gmail.com
---
 arch/riscv/net/bpf_jit_comp64.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index d208a9fd6c52..e2636902a74e 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -515,7 +515,7 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
 	case BPF_ALU | BPF_LSH | BPF_X:
 	case BPF_ALU64 | BPF_LSH | BPF_X:
 		emit(is64 ? rv_sll(rd, rd, rs) : rv_sllw(rd, rd, rs), ctx);
-		if (!is64)
+		if (!is64 && !aux->verifier_zext)
 			emit_zext_32(rd, ctx);
 		break;
 	case BPF_ALU | BPF_RSH | BPF_X:
@@ -692,19 +692,19 @@ out_be:
 	case BPF_ALU | BPF_LSH | BPF_K:
 	case BPF_ALU64 | BPF_LSH | BPF_K:
 		emit(is64 ? rv_slli(rd, rd, imm) : rv_slliw(rd, rd, imm), ctx);
-		if (!is64)
+		if (!is64 && !aux->verifier_zext)
 			emit_zext_32(rd, ctx);
 		break;
 	case BPF_ALU | BPF_RSH | BPF_K:
 	case BPF_ALU64 | BPF_RSH | BPF_K:
 		emit(is64 ? rv_srli(rd, rd, imm) : rv_srliw(rd, rd, imm), ctx);
-		if (!is64)
+		if (!is64 && !aux->verifier_zext)
 			emit_zext_32(rd, ctx);
 		break;
 	case BPF_ALU | BPF_ARSH | BPF_K:
 	case BPF_ALU64 | BPF_ARSH | BPF_K:
 		emit(is64 ? rv_srai(rd, rd, imm) : rv_sraiw(rd, rd, imm), ctx);
-		if (!is64)
+		if (!is64 && !aux->verifier_zext)
 			emit_zext_32(rd, ctx);
 		break;
 
-- 
cgit v1.2.3-59-g8ed1b


From 21a099abb765c3754689e1f7ca4536fa560112d0 Mon Sep 17 00:00:00 2001
From: Luke Nelson <lukenels@cs.washington.edu>
Date: Tue, 5 May 2020 17:03:18 -0700
Subject: bpf, riscv: Optimize FROM_LE using verifier_zext on RV64
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch adds two optimizations for BPF_ALU BPF_END BPF_FROM_LE in
the RV64 BPF JIT.

First, it enables the verifier zero-extension optimization to avoid zero
extension when imm == 32. Second, it avoids generating code for imm ==
64, since it is equivalent to a no-op.

Co-developed-by: Xi Wang <xi.wang@gmail.com>
Signed-off-by: Xi Wang <xi.wang@gmail.com>
Signed-off-by: Luke Nelson <luke.r.nels@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Björn Töpel <bjorn.topel@gmail.com>
Acked-by: Björn Töpel <bjorn.topel@gmail.com>
Link: https://lore.kernel.org/bpf/20200506000320.28965-3-luke.r.nels@gmail.com
---
 arch/riscv/net/bpf_jit_comp64.c | 20 ++++++++++++++------
 1 file changed, 14 insertions(+), 6 deletions(-)

diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index e2636902a74e..c3ce9a911b66 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -542,13 +542,21 @@ int bpf_jit_emit_insn(const struct bpf_insn *insn, struct rv_jit_context *ctx,
 
 	/* dst = BSWAP##imm(dst) */
 	case BPF_ALU | BPF_END | BPF_FROM_LE:
-	{
-		int shift = 64 - imm;
-
-		emit(rv_slli(rd, rd, shift), ctx);
-		emit(rv_srli(rd, rd, shift), ctx);
+		switch (imm) {
+		case 16:
+			emit(rv_slli(rd, rd, 48), ctx);
+			emit(rv_srli(rd, rd, 48), ctx);
+			break;
+		case 32:
+			if (!aux->verifier_zext)
+				emit_zext_32(rd, ctx);
+			break;
+		case 64:
+			/* Do nothing */
+			break;
+		}
 		break;
-	}
+
 	case BPF_ALU | BPF_END | BPF_FROM_BE:
 		emit(rv_addi(RV_REG_T2, RV_REG_ZERO, 0), ctx);
 
-- 
cgit v1.2.3-59-g8ed1b


From ca349a6a104e58479defdc08ce56472a48f7cb81 Mon Sep 17 00:00:00 2001
From: Luke Nelson <lukenels@cs.washington.edu>
Date: Tue, 5 May 2020 17:03:19 -0700
Subject: bpf, riscv: Optimize BPF_JMP BPF_K when imm == 0 on RV64
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch adds an optimization to BPF_JMP (32- and 64-bit) BPF_K for
when the BPF immediate is zero.

When the immediate is zero, the code can directly use the RISC-V zero
register instead of loading a zero immediate to a temporary register
first.

Co-developed-by: Xi Wang <xi.wang@gmail.com>
Signed-off-by: Xi Wang <xi.wang@gmail.com>
Signed-off-by: Luke Nelson <luke.r.nels@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Björn Töpel <bjorn.topel@gmail.com>
Acked-by: Björn Töpel <bjorn.topel@gmail.com>
Link: https://lore.kernel.org/bpf/20200506000320.28965-4-luke.r.nels@gmail.com
---
 arch/riscv/net/bpf_jit_comp64.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index c3ce9a911b66..b07cef952019 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -796,7 +796,13 @@ out_be:
 	case BPF_JMP32 | BPF_JSET | BPF_K:
 		rvoff = rv_offset(i, off, ctx);
 		s = ctx->ninsns;
-		emit_imm(RV_REG_T1, imm, ctx);
+		if (imm) {
+			emit_imm(RV_REG_T1, imm, ctx);
+			rs = RV_REG_T1;
+		} else {
+			/* If imm is 0, simply use zero register. */
+			rs = RV_REG_ZERO;
+		}
 		if (!is64) {
 			if (is_signed_bpf_cond(BPF_OP(code)))
 				emit_sext_32_rd(&rd, ctx);
@@ -811,11 +817,10 @@ out_be:
 		if (BPF_OP(code) == BPF_JSET) {
 			/* Adjust for and */
 			rvoff -= 4;
-			emit(rv_and(RV_REG_T1, rd, RV_REG_T1), ctx);
-			emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff,
-				    ctx);
+			emit(rv_and(rs, rd, rs), ctx);
+			emit_branch(BPF_JNE, rs, RV_REG_ZERO, rvoff, ctx);
 		} else {
-			emit_branch(BPF_OP(code), rd, RV_REG_T1, rvoff, ctx);
+			emit_branch(BPF_OP(code), rd, rs, rvoff, ctx);
 		}
 		break;
 
-- 
cgit v1.2.3-59-g8ed1b


From 073ca6a0369e09c586a103e665f2dd67f1c71444 Mon Sep 17 00:00:00 2001
From: Luke Nelson <lukenels@cs.washington.edu>
Date: Tue, 5 May 2020 17:03:20 -0700
Subject: bpf, riscv: Optimize BPF_JSET BPF_K using andi on RV64
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

This patch optimizes BPF_JSET BPF_K by using a RISC-V andi instruction
when the BPF immediate fits in 12 bits, instead of first loading the
immediate to a temporary register.

Examples of generated code with and without this optimization:

BPF_JMP_IMM(BPF_JSET, R1, 2, 1) without optimization:

  20: li    t1,2
  24: and   t1,a0,t1
  28: bnez  t1,0x30

BPF_JMP_IMM(BPF_JSET, R1, 2, 1) with optimization:

  20: andi  t1,a0,2
  24: bnez  t1,0x2c

BPF_JMP32_IMM(BPF_JSET, R1, 2, 1) without optimization:

  20: li    t1,2
  24: mv    t2,a0
  28: slli  t2,t2,0x20
  2c: srli  t2,t2,0x20
  30: slli  t1,t1,0x20
  34: srli  t1,t1,0x20
  38: and   t1,t2,t1
  3c: bnez  t1,0x44

BPF_JMP32_IMM(BPF_JSET, R1, 2, 1) with optimization:

  20: andi  t1,a0,2
  24: bnez  t1,0x2c

In these examples, because the upper 32 bits of the sign-extended
immediate are 0, BPF_JMP BPF_JSET and BPF_JMP32 BPF_JSET are equivalent
and therefore the JIT produces identical code for them.

Co-developed-by: Xi Wang <xi.wang@gmail.com>
Signed-off-by: Xi Wang <xi.wang@gmail.com>
Signed-off-by: Luke Nelson <luke.r.nels@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Björn Töpel <bjorn.topel@gmail.com>
Acked-by: Björn Töpel <bjorn.topel@gmail.com>
Link: https://lore.kernel.org/bpf/20200506000320.28965-5-luke.r.nels@gmail.com
---
 arch/riscv/net/bpf_jit_comp64.c | 27 +++++++++++++++++++--------
 1 file changed, 19 insertions(+), 8 deletions(-)

diff --git a/arch/riscv/net/bpf_jit_comp64.c b/arch/riscv/net/bpf_jit_comp64.c
index b07cef952019..6cfd164cbe88 100644
--- a/arch/riscv/net/bpf_jit_comp64.c
+++ b/arch/riscv/net/bpf_jit_comp64.c
@@ -792,8 +792,6 @@ out_be:
 	case BPF_JMP32 | BPF_JSGE | BPF_K:
 	case BPF_JMP | BPF_JSLE | BPF_K:
 	case BPF_JMP32 | BPF_JSLE | BPF_K:
-	case BPF_JMP | BPF_JSET | BPF_K:
-	case BPF_JMP32 | BPF_JSET | BPF_K:
 		rvoff = rv_offset(i, off, ctx);
 		s = ctx->ninsns;
 		if (imm) {
@@ -813,15 +811,28 @@ out_be:
 
 		/* Adjust for extra insns */
 		rvoff -= (e - s) << 2;
+		emit_branch(BPF_OP(code), rd, rs, rvoff, ctx);
+		break;
 
-		if (BPF_OP(code) == BPF_JSET) {
-			/* Adjust for and */
-			rvoff -= 4;
-			emit(rv_and(rs, rd, rs), ctx);
-			emit_branch(BPF_JNE, rs, RV_REG_ZERO, rvoff, ctx);
+	case BPF_JMP | BPF_JSET | BPF_K:
+	case BPF_JMP32 | BPF_JSET | BPF_K:
+		rvoff = rv_offset(i, off, ctx);
+		s = ctx->ninsns;
+		if (is_12b_int(imm)) {
+			emit(rv_andi(RV_REG_T1, rd, imm), ctx);
 		} else {
-			emit_branch(BPF_OP(code), rd, rs, rvoff, ctx);
+			emit_imm(RV_REG_T1, imm, ctx);
+			emit(rv_and(RV_REG_T1, rd, RV_REG_T1), ctx);
 		}
+		/* For jset32, we should clear the upper 32 bits of t1, but
+		 * sign-extension is sufficient here and saves one instruction,
+		 * as t1 is used only in comparison against zero.
+		 */
+		if (!is64 && imm < 0)
+			emit(rv_addiw(RV_REG_T1, RV_REG_T1, 0), ctx);
+		e = ctx->ninsns;
+		rvoff -= (e - s) << 2;
+		emit_branch(BPF_JNE, RV_REG_T1, RV_REG_ZERO, rvoff, ctx);
 		break;
 
 	/* function call */
-- 
cgit v1.2.3-59-g8ed1b


From 57a29df341466b5cca43ba3d2d7064426727d7c3 Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng@canonical.com>
Date: Sat, 25 Apr 2020 02:49:14 +0800
Subject: iopoll: Introduce read_poll_timeout_atomic macro

Like read_poll_timeout, an atomic variant for multiple parameter read
function can be useful.

Will be used by a later patch.

Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200424184918.30360-1-kai.heng.feng@canonical.com
---
 include/linux/iopoll.h | 62 ++++++++++++++++++++++++++++++++++----------------
 1 file changed, 43 insertions(+), 19 deletions(-)

diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h
index cb20c733b15a..bc89ac625f26 100644
--- a/include/linux/iopoll.h
+++ b/include/linux/iopoll.h
@@ -57,6 +57,48 @@
 	(cond) ? 0 : -ETIMEDOUT; \
 })
 
+/**
+ * read_poll_timeout_atomic - Periodically poll an address until a condition is
+ * 				met or a timeout occurs
+ * @op: accessor function (takes @addr as its only argument)
+ * @addr: Address to poll
+ * @val: Variable to read the value into
+ * @cond: Break condition (usually involving @val)
+ * @delay_us: Time to udelay between reads in us (0 tight-loops).  Should
+ *            be less than ~10us since udelay is used (see
+ *            Documentation/timers/timers-howto.rst).
+ * @timeout_us: Timeout in us, 0 means never timeout
+ * @delay_before_read: if it is true, delay @delay_us before read.
+ *
+ * Returns 0 on success and -ETIMEDOUT upon a timeout. In either
+ * case, the last read value at @args is stored in @val.
+ *
+ * When available, you'll probably want to use one of the specialized
+ * macros defined below rather than this macro directly.
+ */
+#define read_poll_timeout_atomic(op, val, cond, delay_us, timeout_us, \
+					delay_before_read, args...) \
+({ \
+	u64 __timeout_us = (timeout_us); \
+	unsigned long __delay_us = (delay_us); \
+	ktime_t __timeout = ktime_add_us(ktime_get(), __timeout_us); \
+	if (delay_before_read && __delay_us) \
+		udelay(__delay_us); \
+	for (;;) { \
+		(val) = op(args); \
+		if (cond) \
+			break; \
+		if (__timeout_us && \
+		    ktime_compare(ktime_get(), __timeout) > 0) { \
+			(val) = op(args); \
+			break; \
+		} \
+		if (__delay_us) \
+			udelay(__delay_us); \
+	} \
+	(cond) ? 0 : -ETIMEDOUT; \
+})
+
 /**
  * readx_poll_timeout - Periodically poll an address until a condition is met or a timeout occurs
  * @op: accessor function (takes @addr as its only argument)
@@ -96,25 +138,7 @@
  * macros defined below rather than this macro directly.
  */
 #define readx_poll_timeout_atomic(op, addr, val, cond, delay_us, timeout_us) \
-({ \
-	u64 __timeout_us = (timeout_us); \
-	unsigned long __delay_us = (delay_us); \
-	ktime_t __timeout = ktime_add_us(ktime_get(), __timeout_us); \
-	for (;;) { \
-		(val) = op(addr); \
-		if (cond) \
-			break; \
-		if (__timeout_us && \
-		    ktime_compare(ktime_get(), __timeout) > 0) { \
-			(val) = op(addr); \
-			break; \
-		} \
-		if (__delay_us) \
-			udelay(__delay_us);	\
-	} \
-	(cond) ? 0 : -ETIMEDOUT; \
-})
-
+	read_poll_timeout_atomic(op, val, cond, delay_us, timeout_us, false, addr)
 
 #define readb_poll_timeout(addr, val, cond, delay_us, timeout_us) \
 	readx_poll_timeout(readb, addr, val, cond, delay_us, timeout_us)
-- 
cgit v1.2.3-59-g8ed1b


From fd5d781964b05ab586e690923dba6eca3cc16723 Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng@canonical.com>
Date: Thu, 23 Apr 2020 15:30:07 +0800
Subject: rtw88: Use udelay instead of usleep in atomic context

It's incorrect to use usleep in atomic context.

Switch to a macro which uses udelay instead of usleep to prevent the issue.

Fixes: 6343a6d4b213 ("rtw88: Add delay on polling h2c command status bit")
Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200423073007.3566-1-kai.heng.feng@canonical.com
---
 drivers/net/wireless/realtek/rtw88/fw.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtw88/fw.c b/drivers/net/wireless/realtek/rtw88/fw.c
index dde7823143ea..5e981fdeee3c 100644
--- a/drivers/net/wireless/realtek/rtw88/fw.c
+++ b/drivers/net/wireless/realtek/rtw88/fw.c
@@ -228,9 +228,9 @@ static void rtw_fw_send_h2c_command(struct rtw_dev *rtwdev,
 		goto out;
 	}
 
-	ret = read_poll_timeout(rtw_read8, box_state,
-				!((box_state >> box) & 0x1), 100, 3000, false,
-				rtwdev, REG_HMETFR);
+	ret = read_poll_timeout_atomic(rtw_read8, box_state,
+				       !((box_state >> box) & 0x1), 100, 3000,
+				       false, rtwdev, REG_HMETFR);
 
 	if (ret) {
 		rtw_err(rtwdev, "failed to send h2c command\n");
-- 
cgit v1.2.3-59-g8ed1b


From c03e3fe91c1916e5adc97befee1ca5efe5c39bda Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Fri, 24 Apr 2020 17:45:27 +0200
Subject: ipw2x00: Remove a memory allocation failure log message

Axe a memory allocation failure log message. This message is useless and
incorrect (vmalloc is not used here for the memory allocation)

This has been like that since the very beginning of this driver in
commit 43f66a6ce8da ("Add ipw2200 wireless driver.")

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200424154527.27309-1-christophe.jaillet@wanadoo.fr
---
 drivers/net/wireless/intel/ipw2x00/ipw2200.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2200.c b/drivers/net/wireless/intel/ipw2x00/ipw2200.c
index 923be3781c92..e9e686ad57b1 100644
--- a/drivers/net/wireless/intel/ipw2x00/ipw2200.c
+++ b/drivers/net/wireless/intel/ipw2x00/ipw2200.c
@@ -3770,10 +3770,8 @@ static int ipw_queue_tx_init(struct ipw_priv *priv,
 	struct pci_dev *dev = priv->pci_dev;
 
 	q->txb = kmalloc_array(count, sizeof(q->txb[0]), GFP_KERNEL);
-	if (!q->txb) {
-		IPW_ERROR("vmalloc for auxiliary BD structures failed\n");
+	if (!q->txb)
 		return -ENOMEM;
-	}
 
 	q->bd =
 	    pci_alloc_consistent(dev, sizeof(q->bd[0]) * count, &q->q.dma_addr);
-- 
cgit v1.2.3-59-g8ed1b


From fb1a9fc550cf748ba1225d734539ae97b5699b02 Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Sun, 26 Apr 2020 17:41:15 +0800
Subject: rtlwifi: use true,false for bool variable in rtl_init_rfkill()

The 'blocked' is a bool variable, and '==' expression itself is bool
too. So no need to convert it to 0/1.

This fixes the following coccicheck warning:

drivers/net/wireless/realtek/rtlwifi/base.c:508:13-41: WARNING:
Comparison of 0/1 to bool variable

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200426094115.23294-1-yanaijie@huawei.com
---
 drivers/net/wireless/realtek/rtlwifi/base.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/base.c b/drivers/net/wireless/realtek/rtlwifi/base.c
index c75192c4447f..a4489b9302d4 100644
--- a/drivers/net/wireless/realtek/rtlwifi/base.c
+++ b/drivers/net/wireless/realtek/rtlwifi/base.c
@@ -505,7 +505,7 @@ void rtl_init_rfkill(struct ieee80211_hw *hw)
 
 		rtlpriv->rfkill.rfkill_state = radio_state;
 
-		blocked = (rtlpriv->rfkill.rfkill_state == 1) ? 0 : 1;
+		blocked = rtlpriv->rfkill.rfkill_state != 1;
 		wiphy_rfkill_set_hw_state(hw->wiphy, blocked);
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 1f15d7c8f3fcdf8c89e3c13731b2271e3647bbc4 Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Sun, 26 Apr 2020 18:37:09 +0800
Subject: ray_cs: use true,false for bool variable

Fix the following coccicheck warning:

drivers/net/wireless/ray_cs.c:2797:5-14: WARNING: Comparison of 0/1 to
bool variable
drivers/net/wireless/ray_cs.c:2798:2-11: WARNING: Assignment of 0/1 to
bool variable

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200426103709.6730-1-yanaijie@huawei.com
---
 drivers/net/wireless/ray_cs.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/wireless/ray_cs.c b/drivers/net/wireless/ray_cs.c
index c1d542bfa530..bf3fbd14eda3 100644
--- a/drivers/net/wireless/ray_cs.c
+++ b/drivers/net/wireless/ray_cs.c
@@ -2794,8 +2794,7 @@ static int __init init_ray_cs(void)
 	proc_create_data("driver/ray_cs/translate", 0200, NULL, &int_proc_ops,
 			 &translate);
 #endif
-	if (translate != 0)
-		translate = 1;
+	translate = !!translate;
 	return 0;
 } /* init_ray_cs */
 
-- 
cgit v1.2.3-59-g8ed1b


From db39a9ddacada1e4c065d894faa3fa0e1100b10d Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Mon, 4 May 2020 18:50:03 +0800
Subject: rtw88: 8723d: Add DIG parameter

To improve user experience in field, we need DIG to adjust RX initial gain
depends on field situation. Since each chip has its own register address,
this commit defines 8723d specific address.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200504105010.10780-2-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/rtw8723d.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.c b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
index 8ca4d5794434..52afa72caf8f 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8723d.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
@@ -706,6 +706,11 @@ static const struct rtw_rqpn rqpn_table_8723d[] = {
 	 RTW_DMA_MAPPING_EXTRA, RTW_DMA_MAPPING_HIGH},
 };
 
+static const struct rtw_hw_reg rtw8723d_dig[] = {
+	[0] = { .addr = 0xc50, .mask = 0x7f },
+	[1] = { .addr = 0xc50, .mask = 0x7f },
+};
+
 static const struct rtw_rf_sipi_addr rtw8723d_rf_sipi_addr[] = {
 	[RF_PATH_A] = { .hssi_1 = 0x820, .lssi_read    = 0x8a0,
 			.hssi_2 = 0x824, .lssi_read_pi = 0x8b8},
@@ -738,6 +743,7 @@ struct rtw_chip_info rtw8723d_hw_spec = {
 	.csi_buf_pg_num = 0,
 	.band = RTW_BAND_2G,
 	.page_size = 128,
+	.dig_min = 0x20,
 	.ht_supported = true,
 	.vht_supported = false,
 	.lps_deep_mode_supported = 0,
@@ -746,6 +752,7 @@ struct rtw_chip_info rtw8723d_hw_spec = {
 	.pwr_off_seq = card_disable_flow_8723d,
 	.page_table = page_table_8723d,
 	.rqpn_table = rqpn_table_8723d,
+	.dig = rtw8723d_dig,
 	.rf_sipi_addr = {0x840, 0x844},
 	.rf_sipi_read_addr = rtw8723d_rf_sipi_addr,
 	.fix_rf_phy_num = 2,
-- 
cgit v1.2.3-59-g8ed1b


From 158441a2bed49ea294cc79709bf88e17a7b71912 Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Mon, 4 May 2020 18:50:04 +0800
Subject: rtw88: 8723d: Add query_rx_desc

This ops is used to parse RX descriptor to know the length of received
packet and containing PHY status. If PHY status is existing, the order is
RX descriptor, PHY status and then packet.

There are two types of PHY status, named CCK and OFDM. Their size are the
same, but formats are different.

struct ieee80211_rx_status is also filled depends on above information.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200504105010.10780-3-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/rtw8723d.c | 116 ++++++++++++++++++++++++++
 drivers/net/wireless/realtek/rtw88/rtw8723d.h |  22 +++++
 2 files changed, 138 insertions(+)

diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.c b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
index 52afa72caf8f..653cfa9445fc 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8723d.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
@@ -172,6 +172,121 @@ static int rtw8723d_read_efuse(struct rtw_dev *rtwdev, u8 *log_map)
 	return 0;
 }
 
+static void query_phy_status_page0(struct rtw_dev *rtwdev, u8 *phy_status,
+				   struct rtw_rx_pkt_stat *pkt_stat)
+{
+	struct rtw_dm_info *dm_info = &rtwdev->dm_info;
+	s8 min_rx_power = -120;
+	u8 pwdb = GET_PHY_STAT_P0_PWDB(phy_status);
+
+	pkt_stat->rx_power[RF_PATH_A] = pwdb - 97;
+	pkt_stat->rssi = rtw_phy_rf_power_2_rssi(pkt_stat->rx_power, 1);
+	pkt_stat->bw = RTW_CHANNEL_WIDTH_20;
+	pkt_stat->signal_power = max(pkt_stat->rx_power[RF_PATH_A],
+				     min_rx_power);
+	dm_info->rssi[RF_PATH_A] = pkt_stat->rssi;
+}
+
+static void query_phy_status_page1(struct rtw_dev *rtwdev, u8 *phy_status,
+				   struct rtw_rx_pkt_stat *pkt_stat)
+{
+	struct rtw_dm_info *dm_info = &rtwdev->dm_info;
+	u8 rxsc, bw;
+	s8 min_rx_power = -120;
+	s8 rx_evm;
+
+	if (pkt_stat->rate > DESC_RATE11M && pkt_stat->rate < DESC_RATEMCS0)
+		rxsc = GET_PHY_STAT_P1_L_RXSC(phy_status);
+	else
+		rxsc = GET_PHY_STAT_P1_HT_RXSC(phy_status);
+
+	if (GET_PHY_STAT_P1_RF_MODE(phy_status) == 0)
+		bw = RTW_CHANNEL_WIDTH_20;
+	else if ((rxsc == 1) || (rxsc == 2))
+		bw = RTW_CHANNEL_WIDTH_20;
+	else
+		bw = RTW_CHANNEL_WIDTH_40;
+
+	pkt_stat->rx_power[RF_PATH_A] = GET_PHY_STAT_P1_PWDB_A(phy_status) - 110;
+	pkt_stat->rssi = rtw_phy_rf_power_2_rssi(pkt_stat->rx_power, 1);
+	pkt_stat->bw = bw;
+	pkt_stat->signal_power = max(pkt_stat->rx_power[RF_PATH_A],
+				     min_rx_power);
+	pkt_stat->rx_evm[RF_PATH_A] = GET_PHY_STAT_P1_RXEVM_A(phy_status);
+	pkt_stat->rx_snr[RF_PATH_A] = GET_PHY_STAT_P1_RXSNR_A(phy_status);
+	pkt_stat->cfo_tail[RF_PATH_A] = GET_PHY_STAT_P1_CFO_TAIL_A(phy_status);
+
+	dm_info->curr_rx_rate = pkt_stat->rate;
+	dm_info->rssi[RF_PATH_A] = pkt_stat->rssi;
+	dm_info->rx_snr[RF_PATH_A] = pkt_stat->rx_snr[RF_PATH_A] >> 1;
+	dm_info->cfo_tail[RF_PATH_A] = (pkt_stat->cfo_tail[RF_PATH_A] * 5) >> 1;
+
+	rx_evm = clamp_t(s8, -pkt_stat->rx_evm[RF_PATH_A] >> 1, 0, 64);
+	rx_evm &= 0x3F;	/* 64->0: second path of 1SS rate is 64 */
+	dm_info->rx_evm_dbm[RF_PATH_A] = rx_evm;
+}
+
+static void query_phy_status(struct rtw_dev *rtwdev, u8 *phy_status,
+			     struct rtw_rx_pkt_stat *pkt_stat)
+{
+	u8 page;
+
+	page = *phy_status & 0xf;
+
+	switch (page) {
+	case 0:
+		query_phy_status_page0(rtwdev, phy_status, pkt_stat);
+		break;
+	case 1:
+		query_phy_status_page1(rtwdev, phy_status, pkt_stat);
+		break;
+	default:
+		rtw_warn(rtwdev, "unused phy status page (%d)\n", page);
+		return;
+	}
+}
+
+static void rtw8723d_query_rx_desc(struct rtw_dev *rtwdev, u8 *rx_desc,
+				   struct rtw_rx_pkt_stat *pkt_stat,
+				   struct ieee80211_rx_status *rx_status)
+{
+	struct ieee80211_hdr *hdr;
+	u32 desc_sz = rtwdev->chip->rx_pkt_desc_sz;
+	u8 *phy_status = NULL;
+
+	memset(pkt_stat, 0, sizeof(*pkt_stat));
+
+	pkt_stat->phy_status = GET_RX_DESC_PHYST(rx_desc);
+	pkt_stat->icv_err = GET_RX_DESC_ICV_ERR(rx_desc);
+	pkt_stat->crc_err = GET_RX_DESC_CRC32(rx_desc);
+	pkt_stat->decrypted = !GET_RX_DESC_SWDEC(rx_desc) &&
+			      GET_RX_DESC_ENC_TYPE(rx_desc) != RX_DESC_ENC_NONE;
+	pkt_stat->is_c2h = GET_RX_DESC_C2H(rx_desc);
+	pkt_stat->pkt_len = GET_RX_DESC_PKT_LEN(rx_desc);
+	pkt_stat->drv_info_sz = GET_RX_DESC_DRV_INFO_SIZE(rx_desc);
+	pkt_stat->shift = GET_RX_DESC_SHIFT(rx_desc);
+	pkt_stat->rate = GET_RX_DESC_RX_RATE(rx_desc);
+	pkt_stat->cam_id = GET_RX_DESC_MACID(rx_desc);
+	pkt_stat->ppdu_cnt = 0;
+	pkt_stat->tsf_low = GET_RX_DESC_TSFL(rx_desc);
+
+	/* drv_info_sz is in unit of 8-bytes */
+	pkt_stat->drv_info_sz *= 8;
+
+	/* c2h cmd pkt's rx/phy status is not interested */
+	if (pkt_stat->is_c2h)
+		return;
+
+	hdr = (struct ieee80211_hdr *)(rx_desc + desc_sz + pkt_stat->shift +
+				       pkt_stat->drv_info_sz);
+	if (pkt_stat->phy_status) {
+		phy_status = rx_desc + desc_sz + pkt_stat->shift;
+		query_phy_status(rtwdev, phy_status, pkt_stat);
+	}
+
+	rtw_rx_fill_rx_status(rtwdev, pkt_stat, hdr, rx_status, phy_status);
+}
+
 #define BIT_CFENDFORM		BIT(9)
 #define BIT_WMAC_TCR_ERR0	BIT(12)
 #define BIT_WMAC_TCR_ERR1	BIT(13)
@@ -267,6 +382,7 @@ static void rtw8723d_efuse_grant(struct rtw_dev *rtwdev, bool on)
 static struct rtw_chip_ops rtw8723d_ops = {
 	.phy_set_param		= rtw8723d_phy_set_param,
 	.read_efuse		= rtw8723d_read_efuse,
+	.query_rx_desc		= rtw8723d_query_rx_desc,
 	.mac_init		= rtw8723d_mac_init,
 	.read_rf		= rtw_phy_read_rf_sipi,
 	.write_rf		= rtw_phy_write_rf_reg_sipi,
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.h b/drivers/net/wireless/realtek/rtw88/rtw8723d.h
index 6321dea83519..035049a29e7c 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8723d.h
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.h
@@ -44,6 +44,28 @@ struct rtw8723d_efuse {
 	struct rtw8723de_efuse e;
 };
 
+/* phy status page0 */
+#define GET_PHY_STAT_P0_PWDB(phy_stat)                                         \
+	le32_get_bits(*((__le32 *)(phy_stat) + 0x00), GENMASK(15, 8))
+
+/* phy status page1 */
+#define GET_PHY_STAT_P1_PWDB_A(phy_stat)                                       \
+	le32_get_bits(*((__le32 *)(phy_stat) + 0x00), GENMASK(15, 8))
+#define GET_PHY_STAT_P1_PWDB_B(phy_stat)                                       \
+	le32_get_bits(*((__le32 *)(phy_stat) + 0x00), GENMASK(23, 16))
+#define GET_PHY_STAT_P1_RF_MODE(phy_stat)                                      \
+	le32_get_bits(*((__le32 *)(phy_stat) + 0x03), GENMASK(29, 28))
+#define GET_PHY_STAT_P1_L_RXSC(phy_stat)                                       \
+	le32_get_bits(*((__le32 *)(phy_stat) + 0x01), GENMASK(11, 8))
+#define GET_PHY_STAT_P1_HT_RXSC(phy_stat)                                      \
+	le32_get_bits(*((__le32 *)(phy_stat) + 0x01), GENMASK(15, 12))
+#define GET_PHY_STAT_P1_RXEVM_A(phy_stat)                                      \
+	le32_get_bits(*((__le32 *)(phy_stat) + 0x04), GENMASK(7, 0))
+#define GET_PHY_STAT_P1_CFO_TAIL_A(phy_stat)                                   \
+	le32_get_bits(*((__le32 *)(phy_stat) + 0x05), GENMASK(7, 0))
+#define GET_PHY_STAT_P1_RXSNR_A(phy_stat)                                      \
+	le32_get_bits(*((__le32 *)(phy_stat) + 0x06), GENMASK(7, 0))
+
 #define REG_OFDM0_XAAGC1	0x0c50
 #define REG_OFDM0_XBAGC1	0x0c58
 
-- 
cgit v1.2.3-59-g8ed1b


From 5f028a9cf4b9e503151b25284384269beb0b742e Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Mon, 4 May 2020 18:50:05 +0800
Subject: rtw88: 8723d: Add set_channel

Set MAC/BB/RF register according to specified channel. The function
rtw_set_channel_mac() is used to set MAC registers, but 8723D only need
some of them.

For channel 14, we need to set different CCK DFIR values, so restore the
values when channel 1 to 13 is selected.

Spur calibration is needed in channel 13 and 14, and we do notch if spur
is over threshold.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200504105010.10780-4-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/mac.c      |   3 +
 drivers/net/wireless/realtek/rtw88/rtw8723d.c | 161 ++++++++++++++++++++++++++
 drivers/net/wireless/realtek/rtw88/rtw8723d.h |  31 +++++
 3 files changed, 195 insertions(+)

diff --git a/drivers/net/wireless/realtek/rtw88/mac.c b/drivers/net/wireless/realtek/rtw88/mac.c
index bd82b48e02f4..e8ffeb338584 100644
--- a/drivers/net/wireless/realtek/rtw88/mac.c
+++ b/drivers/net/wireless/realtek/rtw88/mac.c
@@ -40,6 +40,9 @@ void rtw_set_channel_mac(struct rtw_dev *rtwdev, u8 channel, u8 bw,
 	}
 	rtw_write32(rtwdev, REG_WMAC_TRXPTCL_CTL, value32);
 
+	if (rtw_chip_wcpu_11n(rtwdev))
+		return;
+
 	value32 = rtw_read32(rtwdev, REG_AFE_CTRL1) & ~(BIT_MAC_CLK_SEL);
 	value32 |= (MAC_CLK_HW_DEF_80M << BIT_SHIFT_MAC_CLK_SEL);
 	rtw_write32(rtwdev, REG_AFE_CTRL1, value32);
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.c b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
index 653cfa9445fc..6011ca8352b3 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8723d.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
@@ -287,6 +287,166 @@ static void rtw8723d_query_rx_desc(struct rtw_dev *rtwdev, u8 *rx_desc,
 	rtw_rx_fill_rx_status(rtwdev, pkt_stat, hdr, rx_status, phy_status);
 }
 
+static bool rtw8723d_check_spur_ov_thres(struct rtw_dev *rtwdev,
+					 u8 channel, u32 thres)
+{
+	u32 freq;
+	bool ret = false;
+
+	if (channel == 13)
+		freq = FREQ_CH13;
+	else if (channel == 14)
+		freq = FREQ_CH14;
+	else
+		return false;
+
+	rtw_write32(rtwdev, REG_ANALOG_P4, DIS_3WIRE);
+	rtw_write32(rtwdev, REG_PSDFN, freq);
+	rtw_write32(rtwdev, REG_PSDFN, START_PSD | freq);
+
+	msleep(30);
+	if (rtw_read32(rtwdev, REG_PSDRPT) >= thres)
+		ret = true;
+
+	rtw_write32(rtwdev, REG_PSDFN, freq);
+	rtw_write32(rtwdev, REG_ANALOG_P4, EN_3WIRE);
+
+	return ret;
+}
+
+static void rtw8723d_cfg_notch(struct rtw_dev *rtwdev, u8 channel, bool notch)
+{
+	if (!notch) {
+		rtw_write32_mask(rtwdev, REG_OFDM0_RXDSP, BIT_MASK_RXDSP, 0x1f);
+		rtw_write32_mask(rtwdev, REG_OFDM0_RXDSP, BIT_EN_RXDSP, 0x0);
+		rtw_write32(rtwdev, REG_OFDM1_CSI1, 0x00000000);
+		rtw_write32(rtwdev, REG_OFDM1_CSI2, 0x00000000);
+		rtw_write32(rtwdev, REG_OFDM1_CSI3, 0x00000000);
+		rtw_write32(rtwdev, REG_OFDM1_CSI4, 0x00000000);
+		rtw_write32_mask(rtwdev, REG_OFDM1_CFOTRK, BIT_EN_CFOTRK, 0x0);
+		return;
+	}
+
+	switch (channel) {
+	case 13:
+		rtw_write32_mask(rtwdev, REG_OFDM0_RXDSP, BIT_MASK_RXDSP, 0xb);
+		rtw_write32_mask(rtwdev, REG_OFDM0_RXDSP, BIT_EN_RXDSP, 0x1);
+		rtw_write32(rtwdev, REG_OFDM1_CSI1, 0x04000000);
+		rtw_write32(rtwdev, REG_OFDM1_CSI2, 0x00000000);
+		rtw_write32(rtwdev, REG_OFDM1_CSI3, 0x00000000);
+		rtw_write32(rtwdev, REG_OFDM1_CSI4, 0x00000000);
+		rtw_write32_mask(rtwdev, REG_OFDM1_CFOTRK, BIT_EN_CFOTRK, 0x1);
+		break;
+	case 14:
+		rtw_write32_mask(rtwdev, REG_OFDM0_RXDSP, BIT_MASK_RXDSP, 0x5);
+		rtw_write32_mask(rtwdev, REG_OFDM0_RXDSP, BIT_EN_RXDSP, 0x1);
+		rtw_write32(rtwdev, REG_OFDM1_CSI1, 0x00000000);
+		rtw_write32(rtwdev, REG_OFDM1_CSI2, 0x00000000);
+		rtw_write32(rtwdev, REG_OFDM1_CSI3, 0x00000000);
+		rtw_write32(rtwdev, REG_OFDM1_CSI4, 0x00080000);
+		rtw_write32_mask(rtwdev, REG_OFDM1_CFOTRK, BIT_EN_CFOTRK, 0x1);
+		break;
+	default:
+		rtw_write32_mask(rtwdev, REG_OFDM0_RXDSP, BIT_EN_RXDSP, 0x0);
+		rtw_write32_mask(rtwdev, REG_OFDM1_CFOTRK, BIT_EN_CFOTRK, 0x0);
+		break;
+	}
+}
+
+static void rtw8723d_spur_cal(struct rtw_dev *rtwdev, u8 channel)
+{
+	bool notch;
+
+	if (channel < 13) {
+		rtw8723d_cfg_notch(rtwdev, channel, false);
+		return;
+	}
+
+	notch = rtw8723d_check_spur_ov_thres(rtwdev, channel, SPUR_THRES);
+	rtw8723d_cfg_notch(rtwdev, channel, notch);
+}
+
+static void rtw8723d_set_channel_rf(struct rtw_dev *rtwdev, u8 channel, u8 bw)
+{
+	u32 rf_cfgch_a, rf_cfgch_b;
+
+	rf_cfgch_a = rtw_read_rf(rtwdev, RF_PATH_A, RF_CFGCH, RFREG_MASK);
+	rf_cfgch_b = rtw_read_rf(rtwdev, RF_PATH_B, RF_CFGCH, RFREG_MASK);
+
+	rf_cfgch_a &= ~RFCFGCH_CHANNEL_MASK;
+	rf_cfgch_b &= ~RFCFGCH_CHANNEL_MASK;
+	rf_cfgch_a |= (channel & RFCFGCH_CHANNEL_MASK);
+	rf_cfgch_b |= (channel & RFCFGCH_CHANNEL_MASK);
+
+	rf_cfgch_a &= ~RFCFGCH_BW_MASK;
+	switch (bw) {
+	case RTW_CHANNEL_WIDTH_20:
+		rf_cfgch_a |= RFCFGCH_BW_20M;
+		break;
+	case RTW_CHANNEL_WIDTH_40:
+		rf_cfgch_a |= RFCFGCH_BW_40M;
+		break;
+	default:
+		break;
+	}
+
+	rtw_write_rf(rtwdev, RF_PATH_A, RF_CFGCH, RFREG_MASK, rf_cfgch_a);
+	rtw_write_rf(rtwdev, RF_PATH_B, RF_CFGCH, RFREG_MASK, rf_cfgch_b);
+
+	rtw8723d_spur_cal(rtwdev, channel);
+}
+
+static const struct rtw_backup_info cck_dfir_cfg[][CCK_DFIR_NR] = {
+	[0] = {
+		{ .len = 4, .reg = 0xA24, .val = 0x64B80C1C },
+		{ .len = 4, .reg = 0xA28, .val = 0x00008810 },
+		{ .len = 4, .reg = 0xAAC, .val = 0x01235667 },
+	},
+	[1] = {
+		{ .len = 4, .reg = 0xA24, .val = 0x0000B81C },
+		{ .len = 4, .reg = 0xA28, .val = 0x00000000 },
+		{ .len = 4, .reg = 0xAAC, .val = 0x00003667 },
+	},
+};
+
+static void rtw8723d_set_channel_bb(struct rtw_dev *rtwdev, u8 channel, u8 bw,
+				    u8 primary_ch_idx)
+{
+	const struct rtw_backup_info *cck_dfir;
+	int i;
+
+	cck_dfir = channel <= 13 ? cck_dfir_cfg[0] : cck_dfir_cfg[1];
+
+	for (i = 0; i < CCK_DFIR_NR; i++, cck_dfir++)
+		rtw_write32(rtwdev, cck_dfir->reg, cck_dfir->val);
+
+	switch (bw) {
+	case RTW_CHANNEL_WIDTH_20:
+		rtw_write32_mask(rtwdev, REG_FPGA0_RFMOD, BIT_MASK_RFMOD, 0x0);
+		rtw_write32_mask(rtwdev, REG_FPGA1_RFMOD, BIT_MASK_RFMOD, 0x0);
+		rtw_write32_mask(rtwdev, REG_BBRX_DFIR, BIT_RXBB_DFIR_EN, 1);
+		rtw_write32_mask(rtwdev, REG_BBRX_DFIR, BIT_MASK_RXBB_DFIR, 0xa);
+		break;
+	case RTW_CHANNEL_WIDTH_40:
+		rtw_write32_mask(rtwdev, REG_FPGA0_RFMOD, BIT_MASK_RFMOD, 0x1);
+		rtw_write32_mask(rtwdev, REG_FPGA1_RFMOD, BIT_MASK_RFMOD, 0x1);
+		rtw_write32_mask(rtwdev, REG_BBRX_DFIR, BIT_RXBB_DFIR_EN, 0);
+		rtw_write32_mask(rtwdev, REG_CCK0_SYS, BIT_CCK_SIDE_BAND,
+				 (primary_ch_idx == RTW_SC_20_UPPER ? 1 : 0));
+		break;
+	default:
+		break;
+	}
+}
+
+static void rtw8723d_set_channel(struct rtw_dev *rtwdev, u8 channel, u8 bw,
+				 u8 primary_chan_idx)
+{
+	rtw8723d_set_channel_rf(rtwdev, channel, bw);
+	rtw_set_channel_mac(rtwdev, channel, bw, primary_chan_idx);
+	rtw8723d_set_channel_bb(rtwdev, channel, bw, primary_chan_idx);
+}
+
 #define BIT_CFENDFORM		BIT(9)
 #define BIT_WMAC_TCR_ERR0	BIT(12)
 #define BIT_WMAC_TCR_ERR1	BIT(13)
@@ -383,6 +543,7 @@ static struct rtw_chip_ops rtw8723d_ops = {
 	.phy_set_param		= rtw8723d_phy_set_param,
 	.read_efuse		= rtw8723d_read_efuse,
 	.query_rx_desc		= rtw8723d_query_rx_desc,
+	.set_channel		= rtw8723d_set_channel,
 	.mac_init		= rtw8723d_mac_init,
 	.read_rf		= rtw_phy_read_rf_sipi,
 	.write_rf		= rtw_phy_write_rf_reg_sipi,
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.h b/drivers/net/wireless/realtek/rtw88/rtw8723d.h
index 035049a29e7c..06614602de54 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8723d.h
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.h
@@ -66,7 +66,38 @@ struct rtw8723d_efuse {
 #define GET_PHY_STAT_P1_RXSNR_A(phy_stat)                                      \
 	le32_get_bits(*((__le32 *)(phy_stat) + 0x06), GENMASK(7, 0))
 
+#define SPUR_THRES		0x16
+#define CCK_DFIR_NR		3
+#define DIS_3WIRE		0xccf000c0
+#define EN_3WIRE		0xccc000c0
+#define START_PSD		0x400000
+#define FREQ_CH13		0xfccd
+#define FREQ_CH14		0xff9a
+#define RFCFGCH_CHANNEL_MASK	GENMASK(7, 0)
+#define RFCFGCH_BW_MASK		(BIT(11) | BIT(10))
+#define RFCFGCH_BW_20M		(BIT(11) | BIT(10))
+#define RFCFGCH_BW_40M		BIT(10)
+#define BIT_MASK_RFMOD		BIT(0)
+
+#define REG_PSDFN		0x0808
+#define REG_ANALOG_P4		0x088c
+#define REG_PSDRPT		0x08b4
+#define REG_FPGA1_RFMOD		0x0900
+#define REG_BBRX_DFIR		0x0954
+#define BIT_MASK_RXBB_DFIR	GENMASK(27, 24)
+#define BIT_RXBB_DFIR_EN	BIT(19)
+#define REG_CCK0_SYS		0x0a00
+#define BIT_CCK_SIDE_BAND	BIT(4)
+#define REG_OFDM0_RXDSP		0x0c40
+#define BIT_MASK_RXDSP		GENMASK(28, 24)
+#define BIT_EN_RXDSP		BIT(9)
 #define REG_OFDM0_XAAGC1	0x0c50
 #define REG_OFDM0_XBAGC1	0x0c58
+#define REG_OFDM1_CFOTRK	0x0d2c
+#define BIT_EN_CFOTRK		BIT(28)
+#define REG_OFDM1_CSI1		0x0d40
+#define REG_OFDM1_CSI2		0x0d44
+#define REG_OFDM1_CSI3		0x0d48
+#define REG_OFDM1_CSI4		0x0d4c
 
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From 614b1f874454b6d01f1e376f72172cba5404e738 Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Mon, 4 May 2020 18:50:06 +0800
Subject: rtw88: handle C2H_CCX_TX_RPT to know if packet TX'ed successfully

TX status report of 8723D differs from 8822B/8822C, it uses
C2H_CCX_TX_RPT (0x03) with different format. With sequence number
and TX status, driver can know if certain packet was transmitted
successfully.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200504105010.10780-5-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/fw.c |  6 +++++-
 drivers/net/wireless/realtek/rtw88/fw.h |  7 +++++--
 drivers/net/wireless/realtek/rtw88/tx.c | 11 ++++++++---
 drivers/net/wireless/realtek/rtw88/tx.h |  2 +-
 4 files changed, 19 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtw88/fw.c b/drivers/net/wireless/realtek/rtw88/fw.c
index 5e981fdeee3c..2c28afe525c7 100644
--- a/drivers/net/wireless/realtek/rtw88/fw.c
+++ b/drivers/net/wireless/realtek/rtw88/fw.c
@@ -25,7 +25,7 @@ static void rtw_fw_c2h_cmd_handle_ext(struct rtw_dev *rtwdev,
 
 	switch (sub_cmd_id) {
 	case C2H_CCX_RPT:
-		rtw_tx_report_handle(rtwdev, skb);
+		rtw_tx_report_handle(rtwdev, skb, C2H_CCX_RPT);
 		break;
 	default:
 		break;
@@ -142,6 +142,9 @@ void rtw_fw_c2h_cmd_handle(struct rtw_dev *rtwdev, struct sk_buff *skb)
 		goto unlock;
 
 	switch (c2h->id) {
+	case C2H_CCX_TX_RPT:
+		rtw_tx_report_handle(rtwdev, skb, C2H_CCX_TX_RPT);
+		break;
 	case C2H_BT_INFO:
 		rtw_coex_bt_info_notify(rtwdev, c2h->payload, len);
 		break;
@@ -155,6 +158,7 @@ void rtw_fw_c2h_cmd_handle(struct rtw_dev *rtwdev, struct sk_buff *skb)
 		rtw_fw_ra_report_handle(rtwdev, c2h->payload, len);
 		break;
 	default:
+		rtw_dbg(rtwdev, RTW_DBG_FW, "C2H 0x%x isn't handled\n", c2h->id);
 		break;
 	}
 
diff --git a/drivers/net/wireless/realtek/rtw88/fw.h b/drivers/net/wireless/realtek/rtw88/fw.h
index 2933ef741e53..470e1809645a 100644
--- a/drivers/net/wireless/realtek/rtw88/fw.h
+++ b/drivers/net/wireless/realtek/rtw88/fw.h
@@ -26,6 +26,7 @@
 #define FW_START_ADDR_LEGACY		0x1000
 
 enum rtw_c2h_cmd_id {
+	C2H_CCX_TX_RPT = 0x03,
 	C2H_BT_INFO = 0x09,
 	C2H_BT_MP_INFO = 0x0b,
 	C2H_RA_RPT = 0x0c,
@@ -218,8 +219,10 @@ struct rtw_fw_hdr_legacy {
 } __packed;
 
 /* C2H */
-#define GET_CCX_REPORT_SEQNUM(c2h_payload)	(c2h_payload[8] & 0xfc)
-#define GET_CCX_REPORT_STATUS(c2h_payload)	(c2h_payload[9] & 0xc0)
+#define GET_CCX_REPORT_SEQNUM_V0(c2h_payload)	(c2h_payload[6] & 0xfc)
+#define GET_CCX_REPORT_STATUS_V0(c2h_payload)	(c2h_payload[0] & 0xc0)
+#define GET_CCX_REPORT_SEQNUM_V1(c2h_payload)	(c2h_payload[8] & 0xfc)
+#define GET_CCX_REPORT_STATUS_V1(c2h_payload)	(c2h_payload[9] & 0xc0)
 
 #define GET_RA_REPORT_RATE(c2h_payload)		(c2h_payload[0] & 0x7f)
 #define GET_RA_REPORT_SGI(c2h_payload)		((c2h_payload[0] & 0x80) >> 7)
diff --git a/drivers/net/wireless/realtek/rtw88/tx.c b/drivers/net/wireless/realtek/rtw88/tx.c
index 60989987f67b..79c42118825f 100644
--- a/drivers/net/wireless/realtek/rtw88/tx.c
+++ b/drivers/net/wireless/realtek/rtw88/tx.c
@@ -196,7 +196,7 @@ static void rtw_tx_report_tx_status(struct rtw_dev *rtwdev,
 	ieee80211_tx_status_irqsafe(rtwdev->hw, skb);
 }
 
-void rtw_tx_report_handle(struct rtw_dev *rtwdev, struct sk_buff *skb)
+void rtw_tx_report_handle(struct rtw_dev *rtwdev, struct sk_buff *skb, int src)
 {
 	struct rtw_tx_report *tx_report = &rtwdev->tx_report;
 	struct rtw_c2h_cmd *c2h;
@@ -207,8 +207,13 @@ void rtw_tx_report_handle(struct rtw_dev *rtwdev, struct sk_buff *skb)
 
 	c2h = get_c2h_from_skb(skb);
 
-	sn = GET_CCX_REPORT_SEQNUM(c2h->payload);
-	st = GET_CCX_REPORT_STATUS(c2h->payload);
+	if (src == C2H_CCX_TX_RPT) {
+		sn = GET_CCX_REPORT_SEQNUM_V0(c2h->payload);
+		st = GET_CCX_REPORT_STATUS_V0(c2h->payload);
+	} else {
+		sn = GET_CCX_REPORT_SEQNUM_V1(c2h->payload);
+		st = GET_CCX_REPORT_STATUS_V1(c2h->payload);
+	}
 
 	spin_lock_irqsave(&tx_report->q_lock, flags);
 	skb_queue_walk_safe(&tx_report->queue, cur, tmp) {
diff --git a/drivers/net/wireless/realtek/rtw88/tx.h b/drivers/net/wireless/realtek/rtw88/tx.h
index b973de0f4dc0..72dfd4059f03 100644
--- a/drivers/net/wireless/realtek/rtw88/tx.h
+++ b/drivers/net/wireless/realtek/rtw88/tx.h
@@ -95,7 +95,7 @@ void rtw_tx_pkt_info_update(struct rtw_dev *rtwdev,
 			    struct sk_buff *skb);
 void rtw_tx_fill_tx_desc(struct rtw_tx_pkt_info *pkt_info, struct sk_buff *skb);
 void rtw_tx_report_enqueue(struct rtw_dev *rtwdev, struct sk_buff *skb, u8 sn);
-void rtw_tx_report_handle(struct rtw_dev *rtwdev, struct sk_buff *skb);
+void rtw_tx_report_handle(struct rtw_dev *rtwdev, struct sk_buff *skb, int src);
 void rtw_rsvd_page_pkt_info_update(struct rtw_dev *rtwdev,
 				   struct rtw_tx_pkt_info *pkt_info,
 				   struct sk_buff *skb);
-- 
cgit v1.2.3-59-g8ed1b


From 3ac14439152d88435acd93a74b2dd9715abae42c Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Mon, 4 May 2020 18:50:07 +0800
Subject: rtw88: 8723d: some chips don't support LDPC

Some chips are not able to receive LDPC packets. Add an attribute
to rtw_chip_info to determine if the LDPC capability in [ht/vht]_cap
should be advertised or not.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200504105010.10780-6-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/main.c     | 9 +++++++--
 drivers/net/wireless/realtek/rtw88/main.h     | 6 ++++++
 drivers/net/wireless/realtek/rtw88/rtw8723d.c | 1 +
 drivers/net/wireless/realtek/rtw88/rtw8822b.c | 1 +
 drivers/net/wireless/realtek/rtw88/rtw8822c.c | 1 +
 5 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtw88/main.c b/drivers/net/wireless/realtek/rtw88/main.c
index b0dadff0dc7b..f88a7d2370aa 100644
--- a/drivers/net/wireless/realtek/rtw88/main.c
+++ b/drivers/net/wireless/realtek/rtw88/main.c
@@ -933,8 +933,11 @@ static void rtw_init_ht_cap(struct rtw_dev *rtwdev,
 	ht_cap->cap = 0;
 	ht_cap->cap |= IEEE80211_HT_CAP_SGI_20 |
 			IEEE80211_HT_CAP_MAX_AMSDU |
-			IEEE80211_HT_CAP_LDPC_CODING |
 			(1 << IEEE80211_HT_CAP_RX_STBC_SHIFT);
+
+	if (rtw_chip_has_rx_ldpc(rtwdev))
+		ht_cap->cap |= IEEE80211_HT_CAP_LDPC_CODING;
+
 	if (efuse->hw_cap.bw & BIT(RTW_CHANNEL_WIDTH_40))
 		ht_cap->cap |= IEEE80211_HT_CAP_SUP_WIDTH_20_40 |
 				IEEE80211_HT_CAP_DSSSCCK40 |
@@ -968,7 +971,6 @@ static void rtw_init_vht_cap(struct rtw_dev *rtwdev,
 
 	vht_cap->vht_supported = true;
 	vht_cap->cap = IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454 |
-		       IEEE80211_VHT_CAP_RXLDPC |
 		       IEEE80211_VHT_CAP_SHORT_GI_80 |
 		       IEEE80211_VHT_CAP_TXSTBC |
 		       IEEE80211_VHT_CAP_RXSTBC_1 |
@@ -981,6 +983,9 @@ static void rtw_init_vht_cap(struct rtw_dev *rtwdev,
 	vht_cap->cap |= (rtwdev->hal.bfee_sts_cap <<
 			IEEE80211_VHT_CAP_BEAMFORMEE_STS_SHIFT);
 
+	if (rtw_chip_has_rx_ldpc(rtwdev))
+		vht_cap->cap |= IEEE80211_VHT_CAP_RXLDPC;
+
 	mcs_map = IEEE80211_VHT_MCS_SUPPORT_0_9 << 0 |
 		  IEEE80211_VHT_MCS_NOT_SUPPORTED << 4 |
 		  IEEE80211_VHT_MCS_NOT_SUPPORTED << 6 |
diff --git a/drivers/net/wireless/realtek/rtw88/main.h b/drivers/net/wireless/realtek/rtw88/main.h
index cb0dd30e9683..ed9c7163fc4e 100644
--- a/drivers/net/wireless/realtek/rtw88/main.h
+++ b/drivers/net/wireless/realtek/rtw88/main.h
@@ -1085,6 +1085,7 @@ struct rtw_chip_info {
 	u8 dig_min;
 	u8 txgi_factor;
 	bool is_pwr_by_rate_dec;
+	bool rx_ldpc;
 	u8 max_power_index;
 
 	bool ht_supported;
@@ -1743,6 +1744,11 @@ static inline bool rtw_chip_wcpu_11ac(struct rtw_dev *rtwdev)
 	return rtwdev->chip->wlan_cpu == RTW_WCPU_11AC;
 }
 
+static inline bool rtw_chip_has_rx_ldpc(struct rtw_dev *rtwdev)
+{
+	return rtwdev->chip->rx_ldpc;
+}
+
 void rtw_get_channel_params(struct cfg80211_chan_def *chandef,
 			    struct rtw_channel_params *ch_param);
 bool check_hw_ready(struct rtw_dev *rtwdev, u32 addr, u32 mask, u32 target);
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.c b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
index 6011ca8352b3..6fe7596d6a11 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8723d.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
@@ -1039,6 +1039,7 @@ struct rtw_chip_info rtw8723d_hw_spec = {
 	.rf_tbl = {&rtw8723d_rf_a_tbl},
 	.rfe_defs = rtw8723d_rfe_defs,
 	.rfe_defs_size = ARRAY_SIZE(rtw8723d_rfe_defs),
+	.rx_ldpc = false,
 };
 EXPORT_SYMBOL(rtw8723d_hw_spec);
 
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822b.c b/drivers/net/wireless/realtek/rtw88/rtw8822b.c
index ffee8111d145..f1019e196918 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8822b.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822b.c
@@ -2447,6 +2447,7 @@ struct rtw_chip_info rtw8822b_hw_spec = {
 	.iqk_threshold = 8,
 	.bfer_su_max_num = 2,
 	.bfer_mu_max_num = 1,
+	.rx_ldpc = true,
 
 	.coex_para_ver = 0x19062706,
 	.bt_desired_ver = 0x6,
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822c.c b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
index 38a096d5af6f..9a9423e23e9d 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8822c.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
@@ -4312,6 +4312,7 @@ struct rtw_chip_info rtw8822c_hw_spec = {
 	.iqk_threshold = 8,
 	.bfer_su_max_num = 2,
 	.bfer_mu_max_num = 1,
+	.rx_ldpc = true,
 
 #ifdef CONFIG_PM
 	.wow_fw_name = "rtw88/rtw8822c_wow_fw.bin",
-- 
cgit v1.2.3-59-g8ed1b


From 439d4a978d4883695d41c38d856676f3a0a806ba Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Mon, 4 May 2020 18:50:08 +0800
Subject: rtw88: 8723d: Add chip_ops::false_alarm_statistics

This ops is used to do statistics of false alarm periodically, and then
fine tune RX initial gain to adaptive different circumstance.

There are three steps, hold/get/reset counter, to retrieve false alarm
counters that consist of CCK and OFDM. In addition to false alarm
counters, it also collects CRC ok/error counters of CCK, OFDM and HT.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200504105010.10780-7-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/rtw8723d.c | 65 +++++++++++++++++++++++++++
 drivers/net/wireless/realtek/rtw88/rtw8723d.h | 41 +++++++++++++++++
 2 files changed, 106 insertions(+)

diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.c b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
index 6fe7596d6a11..2f98e58396b0 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8723d.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
@@ -539,6 +539,70 @@ static void rtw8723d_efuse_grant(struct rtw_dev *rtwdev, bool on)
 	}
 }
 
+static void rtw8723d_false_alarm_statistics(struct rtw_dev *rtwdev)
+{
+	struct rtw_dm_info *dm_info = &rtwdev->dm_info;
+	u32 cck_fa_cnt;
+	u32 ofdm_fa_cnt;
+	u32 crc32_cnt;
+	u32 val32;
+
+	/* hold counter */
+	rtw_write32_mask(rtwdev, REG_OFDM_FA_HOLDC_11N, BIT_MASK_OFDM_FA_KEEP, 1);
+	rtw_write32_mask(rtwdev, REG_OFDM_FA_RSTD_11N, BIT_MASK_OFDM_FA_KEEP1, 1);
+	rtw_write32_mask(rtwdev, REG_CCK_FA_RST_11N, BIT_MASK_CCK_CNT_KEEP, 1);
+	rtw_write32_mask(rtwdev, REG_CCK_FA_RST_11N, BIT_MASK_CCK_FA_KEEP, 1);
+
+	cck_fa_cnt = rtw_read32_mask(rtwdev, REG_CCK_FA_LSB_11N, MASKBYTE0);
+	cck_fa_cnt += rtw_read32_mask(rtwdev, REG_CCK_FA_MSB_11N, MASKBYTE3) << 8;
+
+	val32 = rtw_read32(rtwdev, REG_OFDM_FA_TYPE1_11N);
+	ofdm_fa_cnt = u32_get_bits(val32, BIT_MASK_OFDM_FF_CNT);
+	ofdm_fa_cnt += u32_get_bits(val32, BIT_MASK_OFDM_SF_CNT);
+	val32 = rtw_read32(rtwdev, REG_OFDM_FA_TYPE2_11N);
+	dm_info->ofdm_cca_cnt = u32_get_bits(val32, BIT_MASK_OFDM_CCA_CNT);
+	ofdm_fa_cnt += u32_get_bits(val32, BIT_MASK_OFDM_PF_CNT);
+	val32 = rtw_read32(rtwdev, REG_OFDM_FA_TYPE3_11N);
+	ofdm_fa_cnt += u32_get_bits(val32, BIT_MASK_OFDM_RI_CNT);
+	ofdm_fa_cnt += u32_get_bits(val32, BIT_MASK_OFDM_CRC_CNT);
+	val32 = rtw_read32(rtwdev, REG_OFDM_FA_TYPE4_11N);
+	ofdm_fa_cnt += u32_get_bits(val32, BIT_MASK_OFDM_MNS_CNT);
+
+	dm_info->cck_fa_cnt = cck_fa_cnt;
+	dm_info->ofdm_fa_cnt = ofdm_fa_cnt;
+	dm_info->total_fa_cnt = cck_fa_cnt + ofdm_fa_cnt;
+
+	dm_info->cck_err_cnt = rtw_read32(rtwdev, REG_IGI_C_11N);
+	dm_info->cck_ok_cnt = rtw_read32(rtwdev, REG_IGI_D_11N);
+	crc32_cnt = rtw_read32(rtwdev, REG_OFDM_CRC32_CNT_11N);
+	dm_info->ofdm_err_cnt = u32_get_bits(crc32_cnt, BIT_MASK_OFDM_LCRC_ERR);
+	dm_info->ofdm_ok_cnt = u32_get_bits(crc32_cnt, BIT_MASK_OFDM_LCRC_OK);
+	crc32_cnt = rtw_read32(rtwdev, REG_HT_CRC32_CNT_11N);
+	dm_info->ht_err_cnt = u32_get_bits(crc32_cnt, BIT_MASK_HT_CRC_ERR);
+	dm_info->ht_ok_cnt = u32_get_bits(crc32_cnt, BIT_MASK_HT_CRC_OK);
+	dm_info->vht_err_cnt = 0;
+	dm_info->vht_ok_cnt = 0;
+
+	val32 = rtw_read32(rtwdev, REG_CCK_CCA_CNT_11N);
+	dm_info->cck_cca_cnt = (u32_get_bits(val32, BIT_MASK_CCK_FA_MSB) << 8) |
+			       u32_get_bits(val32, BIT_MASK_CCK_FA_LSB);
+	dm_info->total_cca_cnt = dm_info->cck_cca_cnt + dm_info->ofdm_cca_cnt;
+
+	/* reset counter */
+	rtw_write32_mask(rtwdev, REG_OFDM_FA_RSTC_11N, BIT_MASK_OFDM_FA_RST, 1);
+	rtw_write32_mask(rtwdev, REG_OFDM_FA_RSTC_11N, BIT_MASK_OFDM_FA_RST, 0);
+	rtw_write32_mask(rtwdev, REG_OFDM_FA_RSTD_11N, BIT_MASK_OFDM_FA_RST1, 1);
+	rtw_write32_mask(rtwdev, REG_OFDM_FA_RSTD_11N, BIT_MASK_OFDM_FA_RST1, 0);
+	rtw_write32_mask(rtwdev, REG_OFDM_FA_HOLDC_11N, BIT_MASK_OFDM_FA_KEEP, 0);
+	rtw_write32_mask(rtwdev, REG_OFDM_FA_RSTD_11N, BIT_MASK_OFDM_FA_KEEP1, 0);
+	rtw_write32_mask(rtwdev, REG_CCK_FA_RST_11N, BIT_MASK_CCK_CNT_KPEN, 0);
+	rtw_write32_mask(rtwdev, REG_CCK_FA_RST_11N, BIT_MASK_CCK_CNT_KPEN, 2);
+	rtw_write32_mask(rtwdev, REG_CCK_FA_RST_11N, BIT_MASK_CCK_FA_KPEN, 0);
+	rtw_write32_mask(rtwdev, REG_CCK_FA_RST_11N, BIT_MASK_CCK_FA_KPEN, 2);
+	rtw_write32_mask(rtwdev, REG_PAGE_F_RST_11N, BIT_MASK_F_RST_ALL, 1);
+	rtw_write32_mask(rtwdev, REG_PAGE_F_RST_11N, BIT_MASK_F_RST_ALL, 0);
+}
+
 static struct rtw_chip_ops rtw8723d_ops = {
 	.phy_set_param		= rtw8723d_phy_set_param,
 	.read_efuse		= rtw8723d_read_efuse,
@@ -551,6 +615,7 @@ static struct rtw_chip_ops rtw8723d_ops = {
 	.set_antenna		= NULL,
 	.cfg_ldo25		= rtw8723d_cfg_ldo25,
 	.efuse_grant		= rtw8723d_efuse_grant,
+	.false_alarm_statistics	= rtw8723d_false_alarm_statistics,
 	.config_bfee		= NULL,
 	.set_gid_table		= NULL,
 	.cfg_csi_rate		= NULL,
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.h b/drivers/net/wireless/realtek/rtw88/rtw8723d.h
index 06614602de54..ac66f672bec8 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8723d.h
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.h
@@ -88,16 +88,57 @@ struct rtw8723d_efuse {
 #define BIT_RXBB_DFIR_EN	BIT(19)
 #define REG_CCK0_SYS		0x0a00
 #define BIT_CCK_SIDE_BAND	BIT(4)
+#define REG_CCK_FA_RST_11N	0x0a2c
+#define BIT_MASK_CCK_CNT_KEEP	BIT(12)
+#define BIT_MASK_CCK_CNT_EN	BIT(13)
+#define BIT_MASK_CCK_CNT_KPEN	(BIT_MASK_CCK_CNT_KEEP | BIT_MASK_CCK_CNT_EN)
+#define BIT_MASK_CCK_FA_KEEP	BIT(14)
+#define BIT_MASK_CCK_FA_EN	BIT(15)
+#define BIT_MASK_CCK_FA_KPEN	(BIT_MASK_CCK_FA_KEEP | BIT_MASK_CCK_FA_EN)
+#define REG_CCK_FA_LSB_11N	0x0a5c
+#define REG_CCK_FA_MSB_11N	0x0a58
+#define REG_CCK_CCA_CNT_11N	0x0a60
+#define BIT_MASK_CCK_FA_MSB	GENMASK(7, 0)
+#define BIT_MASK_CCK_FA_LSB	GENMASK(15, 8)
+#define REG_OFDM_FA_HOLDC_11N	0x0c00
+#define BIT_MASK_OFDM_FA_KEEP	BIT(31)
+#define REG_OFDM_FA_RSTC_11N	0x0c0c
+#define BIT_MASK_OFDM_FA_RST	BIT(31)
 #define REG_OFDM0_RXDSP		0x0c40
 #define BIT_MASK_RXDSP		GENMASK(28, 24)
 #define BIT_EN_RXDSP		BIT(9)
 #define REG_OFDM0_XAAGC1	0x0c50
 #define REG_OFDM0_XBAGC1	0x0c58
+#define REG_OFDM_FA_TYPE1_11N	0x0cf0
+#define BIT_MASK_OFDM_FF_CNT	GENMASK(15, 0)
+#define BIT_MASK_OFDM_SF_CNT	GENMASK(31, 16)
+#define REG_OFDM_FA_RSTD_11N	0x0d00
+#define BIT_MASK_OFDM_FA_RST1	BIT(27)
+#define BIT_MASK_OFDM_FA_KEEP1	BIT(31)
 #define REG_OFDM1_CFOTRK	0x0d2c
 #define BIT_EN_CFOTRK		BIT(28)
 #define REG_OFDM1_CSI1		0x0d40
 #define REG_OFDM1_CSI2		0x0d44
 #define REG_OFDM1_CSI3		0x0d48
 #define REG_OFDM1_CSI4		0x0d4c
+#define REG_OFDM_FA_TYPE2_11N	0x0da0
+#define BIT_MASK_OFDM_CCA_CNT	GENMASK(15, 0)
+#define BIT_MASK_OFDM_PF_CNT	GENMASK(31, 16)
+#define REG_OFDM_FA_TYPE3_11N	0x0da4
+#define BIT_MASK_OFDM_RI_CNT	GENMASK(15, 0)
+#define BIT_MASK_OFDM_CRC_CNT	GENMASK(31, 16)
+#define REG_OFDM_FA_TYPE4_11N	0x0da8
+#define BIT_MASK_OFDM_MNS_CNT	GENMASK(15, 0)
+#define REG_PAGE_F_RST_11N		0x0f14
+#define BIT_MASK_F_RST_ALL		BIT(16)
+#define REG_IGI_C_11N			0x0f84
+#define REG_IGI_D_11N			0x0f88
+#define REG_HT_CRC32_CNT_11N		0x0f90
+#define BIT_MASK_HT_CRC_OK		GENMASK(15, 0)
+#define BIT_MASK_HT_CRC_ERR		GENMASK(31, 16)
+#define REG_OFDM_CRC32_CNT_11N		0x0f94
+#define BIT_MASK_OFDM_LCRC_OK		GENMASK(15, 0)
+#define BIT_MASK_OFDM_LCRC_ERR		GENMASK(31, 16)
+#define REG_HT_CRC32_CNT_11N_AGG	0x0fb8
 
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From fc637a860a825e934886498874f9f8372798a462 Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Mon, 4 May 2020 18:50:09 +0800
Subject: rtw88: 8723d: Set IG register for CCK rate

DIG sets only one IG register for most chips, but 8723D need to set
additional register for CCK rate.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200504105010.10780-8-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/main.h     | 1 +
 drivers/net/wireless/realtek/rtw88/phy.c      | 4 ++++
 drivers/net/wireless/realtek/rtw88/rtw8723d.c | 5 +++++
 drivers/net/wireless/realtek/rtw88/rtw8822b.c | 1 +
 drivers/net/wireless/realtek/rtw88/rtw8822c.c | 1 +
 5 files changed, 12 insertions(+)

diff --git a/drivers/net/wireless/realtek/rtw88/main.h b/drivers/net/wireless/realtek/rtw88/main.h
index ed9c7163fc4e..e0365a70c6f7 100644
--- a/drivers/net/wireless/realtek/rtw88/main.h
+++ b/drivers/net/wireless/realtek/rtw88/main.h
@@ -1101,6 +1101,7 @@ struct rtw_chip_info {
 	const struct rtw_intf_phy_para_table *intf_table;
 
 	const struct rtw_hw_reg *dig;
+	const struct rtw_hw_reg *dig_cck;
 	u32 rf_base_addr[2];
 	u32 rf_sipi_addr[2];
 	const struct rtw_rf_sipi_addr *rf_sipi_read_addr;
diff --git a/drivers/net/wireless/realtek/rtw88/phy.c b/drivers/net/wireless/realtek/rtw88/phy.c
index 8489abfdc12e..72a16eff9db3 100644
--- a/drivers/net/wireless/realtek/rtw88/phy.c
+++ b/drivers/net/wireless/realtek/rtw88/phy.c
@@ -140,9 +140,13 @@ void rtw_phy_dig_write(struct rtw_dev *rtwdev, u8 igi)
 {
 	struct rtw_chip_info *chip = rtwdev->chip;
 	struct rtw_hal *hal = &rtwdev->hal;
+	const struct rtw_hw_reg *dig_cck = &chip->dig_cck[0];
 	u32 addr, mask;
 	u8 path;
 
+	if (dig_cck)
+		rtw_write32_mask(rtwdev, dig_cck->addr, dig_cck->mask, igi >> 1);
+
 	for (path = 0; path < hal->rf_path_num; path++) {
 		addr = chip->dig[path].addr;
 		mask = chip->dig[path].mask;
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.c b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
index 2f98e58396b0..b6266b2942cf 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8723d.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
@@ -1053,6 +1053,10 @@ static const struct rtw_hw_reg rtw8723d_dig[] = {
 	[1] = { .addr = 0xc50, .mask = 0x7f },
 };
 
+static const struct rtw_hw_reg rtw8723d_dig_cck[] = {
+	[0] = { .addr = 0xa0c, .mask = 0x3f00 },
+};
+
 static const struct rtw_rf_sipi_addr rtw8723d_rf_sipi_addr[] = {
 	[RF_PATH_A] = { .hssi_1 = 0x820, .lssi_read    = 0x8a0,
 			.hssi_2 = 0x824, .lssi_read_pi = 0x8b8},
@@ -1095,6 +1099,7 @@ struct rtw_chip_info rtw8723d_hw_spec = {
 	.page_table = page_table_8723d,
 	.rqpn_table = rqpn_table_8723d,
 	.dig = rtw8723d_dig,
+	.dig_cck = rtw8723d_dig_cck,
 	.rf_sipi_addr = {0x840, 0x844},
 	.rf_sipi_read_addr = rtw8723d_rf_sipi_addr,
 	.fix_rf_phy_num = 2,
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822b.c b/drivers/net/wireless/realtek/rtw88/rtw8822b.c
index f1019e196918..45636382dafd 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8822b.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822b.c
@@ -2435,6 +2435,7 @@ struct rtw_chip_info rtw8822b_hw_spec = {
 	.rqpn_table = rqpn_table_8822b,
 	.intf_table = &phy_para_table_8822b,
 	.dig = rtw8822b_dig,
+	.dig_cck = NULL,
 	.rf_base_addr = {0x2800, 0x2c00},
 	.rf_sipi_addr = {0xc90, 0xe90},
 	.mac_tbl = &rtw8822b_mac_tbl,
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822c.c b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
index 9a9423e23e9d..64b77a7cbffd 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8822c.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
@@ -4297,6 +4297,7 @@ struct rtw_chip_info rtw8822c_hw_spec = {
 	.rqpn_table = rqpn_table_8822c,
 	.intf_table = &phy_para_table_8822c,
 	.dig = rtw8822c_dig,
+	.dig_cck = NULL,
 	.rf_base_addr = {0x3c00, 0x4c00},
 	.rf_sipi_addr = {0x1808, 0x4108},
 	.mac_tbl = &rtw8822c_mac_tbl,
-- 
cgit v1.2.3-59-g8ed1b


From 1757940430efa1b6a4d64074ef50336c6f8a46e6 Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Mon, 4 May 2020 18:50:10 +0800
Subject: rtw88: 8723d: add interface configurations table

Interface configuration table is used to configure PCI PHY that are
normally decided by design or bootstrap pin, and driver can do additional
settings by this table.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200504105010.10780-9-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/rtw8723d.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.c b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
index b6266b2942cf..92c742d1ce6d 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8723d.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
@@ -1048,6 +1048,26 @@ static const struct rtw_rqpn rqpn_table_8723d[] = {
 	 RTW_DMA_MAPPING_EXTRA, RTW_DMA_MAPPING_HIGH},
 };
 
+static const struct rtw_intf_phy_para pcie_gen1_param_8723d[] = {
+	{0x0008, 0x4a22,
+	 RTW_IP_SEL_PHY,
+	 RTW_INTF_PHY_CUT_ALL,
+	 RTW_INTF_PHY_PLATFORM_ALL},
+	{0x0009, 0x1000,
+	 RTW_IP_SEL_PHY,
+	 ~(RTW_INTF_PHY_CUT_A | RTW_INTF_PHY_CUT_B),
+	 RTW_INTF_PHY_PLATFORM_ALL},
+	{0xFFFF, 0x0000,
+	 RTW_IP_SEL_PHY,
+	 RTW_INTF_PHY_CUT_ALL,
+	 RTW_INTF_PHY_PLATFORM_ALL},
+};
+
+static const struct rtw_intf_phy_para_table phy_para_table_8723d = {
+	.gen1_para	= pcie_gen1_param_8723d,
+	.n_gen1_para	= ARRAY_SIZE(pcie_gen1_param_8723d),
+};
+
 static const struct rtw_hw_reg rtw8723d_dig[] = {
 	[0] = { .addr = 0xc50, .mask = 0x7f },
 	[1] = { .addr = 0xc50, .mask = 0x7f },
@@ -1098,6 +1118,7 @@ struct rtw_chip_info rtw8723d_hw_spec = {
 	.pwr_off_seq = card_disable_flow_8723d,
 	.page_table = page_table_8723d,
 	.rqpn_table = rqpn_table_8723d,
+	.intf_table = &phy_para_table_8723d,
 	.dig = rtw8723d_dig,
 	.dig_cck = rtw8723d_dig_cck,
 	.rf_sipi_addr = {0x840, 0x844},
-- 
cgit v1.2.3-59-g8ed1b


From 191f6b08bfef24e1a9641eaac96ed030a7be4599 Mon Sep 17 00:00:00 2001
From: Dejin Zheng <zhengdejin5@gmail.com>
Date: Mon, 4 May 2020 16:34:42 +0800
Subject: rtw88: fix an issue about leak system resources

the related system resources were not released when pci_iomap() return
error in the rtw_pci_io_mapping() function. add pci_release_regions() to
fix it.

Fixes: e3037485c68ec1a ("rtw88: new Realtek 802.11ac driver")
Cc: Andy Shevchenko <andy.shevchenko@gmail.com>
Signed-off-by: Dejin Zheng <zhengdejin5@gmail.com>
Acked-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200504083442.3033-1-zhengdejin5@gmail.com
---
 drivers/net/wireless/realtek/rtw88/pci.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wireless/realtek/rtw88/pci.c b/drivers/net/wireless/realtek/rtw88/pci.c
index 8a8d746d3349..b9a5868a5da0 100644
--- a/drivers/net/wireless/realtek/rtw88/pci.c
+++ b/drivers/net/wireless/realtek/rtw88/pci.c
@@ -1102,6 +1102,7 @@ static int rtw_pci_io_mapping(struct rtw_dev *rtwdev,
 	len = pci_resource_len(pdev, bar_id);
 	rtwpci->mmap = pci_iomap(pdev, bar_id, len);
 	if (!rtwpci->mmap) {
+		pci_release_regions(pdev);
 		rtw_err(rtwdev, "failed to map pci memory\n");
 		return -ENOMEM;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From babfd3caf3569e729f38a23dde75f11f5af057f5 Mon Sep 17 00:00:00 2001
From: Wright Feng <wright.feng@cypress.com>
Date: Mon, 4 May 2020 01:07:31 -0500
Subject: brcmfmac: support the second p2p connection

With RSDB feature, firmware is able to support two P2P-AGO or two
P2P-GC at the same time. So we add the second p2p connection type
to map to the second P2P connection bsscfg.

Signed-off-by: Wright Feng <wright.feng@cypress.com>
Signed-off-by: Chi-hsien Lin <chi-hsien.lin@cypress.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1588572453-194663-2-git-send-email-wright.feng@cypress.com
---
 .../net/wireless/broadcom/brcm80211/brcmfmac/p2p.c | 56 ++++++++++++++++++----
 .../net/wireless/broadcom/brcm80211/brcmfmac/p2p.h |  9 ++--
 2 files changed, 54 insertions(+), 11 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
index f8ece9f381a5..6612103305d8 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
@@ -17,6 +17,7 @@
 #include "fwil_types.h"
 #include "p2p.h"
 #include "cfg80211.h"
+#include "feature.h"
 
 /* parameters used for p2p escan */
 #define P2PAPI_SCAN_NPROBES 1
@@ -488,9 +489,13 @@ static void brcmf_p2p_generate_bss_mac(struct brcmf_p2p_info *p2p, u8 *dev_addr)
 	 * BSSCFGs need to simultaneously co-exist, then this address must be
 	 * different from the P2P Device Address, but also locally administered.
 	 */
-	memcpy(p2p->int_addr, p2p->dev_addr, ETH_ALEN);
-	p2p->int_addr[0] |= 0x02;
-	p2p->int_addr[4] ^= 0x80;
+	memcpy(p2p->conn_int_addr, p2p->dev_addr, ETH_ALEN);
+	p2p->conn_int_addr[0] |= 0x02;
+	p2p->conn_int_addr[4] ^= 0x80;
+
+	memcpy(p2p->conn2_int_addr, p2p->dev_addr, ETH_ALEN);
+	p2p->conn2_int_addr[0] |= 0x02;
+	p2p->conn2_int_addr[4] ^= 0x90;
 }
 
 /**
@@ -2015,7 +2020,7 @@ int brcmf_p2p_ifchange(struct brcmf_cfg80211_info *cfg,
 
 	if_request.type = cpu_to_le16((u16)if_type);
 	if_request.chspec = cpu_to_le16(chanspec);
-	memcpy(if_request.addr, p2p->int_addr, sizeof(if_request.addr));
+	memcpy(if_request.addr, p2p->conn_int_addr, sizeof(if_request.addr));
 
 	brcmf_cfg80211_arm_vif_event(cfg, vif);
 	err = brcmf_fil_iovar_data_set(vif->ifp, "p2p_ifupd", &if_request,
@@ -2170,6 +2175,27 @@ fail:
 	return ERR_PTR(err);
 }
 
+int brcmf_p2p_get_conn_idx(struct brcmf_cfg80211_info *cfg)
+{
+	int i;
+	struct brcmf_if *ifp = netdev_priv(cfg_to_ndev(cfg));
+
+	if (!ifp)
+		return -ENODEV;
+
+	for (i = P2PAPI_BSSCFG_CONNECTION; i < P2PAPI_BSSCFG_MAX; i++) {
+		if (!cfg->p2p.bss_idx[i].vif) {
+			if (i == P2PAPI_BSSCFG_CONNECTION2 &&
+			    !(brcmf_feat_is_enabled(ifp, BRCMF_FEAT_RSDB))) {
+				brcmf_err("Multi p2p not supported");
+				return -EIO;
+			}
+			return i;
+		}
+	}
+	return -EIO;
+}
+
 /**
  * brcmf_p2p_add_vif() - create a new P2P virtual interface.
  *
@@ -2189,7 +2215,9 @@ struct wireless_dev *brcmf_p2p_add_vif(struct wiphy *wiphy, const char *name,
 	struct brcmf_pub *drvr = cfg->pub;
 	struct brcmf_cfg80211_vif *vif;
 	enum brcmf_fil_p2p_if_types iftype;
-	int err;
+	int err = 0;
+	int connidx;
+	u8 *p2p_intf_addr;
 
 	if (brcmf_cfg80211_vif_event_armed(cfg))
 		return ERR_PTR(-EBUSY);
@@ -2215,9 +2243,21 @@ struct wireless_dev *brcmf_p2p_add_vif(struct wiphy *wiphy, const char *name,
 		return (struct wireless_dev *)vif;
 	brcmf_cfg80211_arm_vif_event(cfg, vif);
 
-	err = brcmf_p2p_request_p2p_if(&cfg->p2p, ifp, cfg->p2p.int_addr,
-				       iftype);
+	connidx = brcmf_p2p_get_conn_idx(cfg);
+
+	if (connidx == P2PAPI_BSSCFG_CONNECTION)
+		p2p_intf_addr = cfg->p2p.conn_int_addr;
+	else if (connidx == P2PAPI_BSSCFG_CONNECTION2)
+		p2p_intf_addr = cfg->p2p.conn2_int_addr;
+	else
+		err = -EINVAL;
+
+	if (!err)
+		err =  brcmf_p2p_request_p2p_if(&cfg->p2p, ifp,
+						p2p_intf_addr, iftype);
+
 	if (err) {
+		brcmf_err("request p2p interface failed\n");
 		brcmf_cfg80211_arm_vif_event(cfg, NULL);
 		goto fail;
 	}
@@ -2249,7 +2289,7 @@ struct wireless_dev *brcmf_p2p_add_vif(struct wiphy *wiphy, const char *name,
 		goto fail;
 	}
 
-	cfg->p2p.bss_idx[P2PAPI_BSSCFG_CONNECTION].vif = vif;
+	cfg->p2p.bss_idx[connidx].vif = vif;
 	/* Disable firmware roaming for P2P interface  */
 	brcmf_fil_iovar_int_set(ifp, "roam_off", 1);
 	if (iftype == BRCMF_FIL_P2P_IF_GO) {
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.h
index 64ab9b6a677d..d2ecee565bf2 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.h
@@ -14,13 +14,15 @@ struct brcmf_cfg80211_info;
  *
  * @P2PAPI_BSSCFG_PRIMARY: maps to driver's primary bsscfg.
  * @P2PAPI_BSSCFG_DEVICE: maps to driver's P2P device discovery bsscfg.
- * @P2PAPI_BSSCFG_CONNECTION: maps to driver's P2P connection bsscfg.
+ * @P2PAPI_BSSCFG_CONNECTION: maps to driver's 1st P2P connection bsscfg.
+ * @P2PAPI_BSSCFG_CONNECTION2: maps to driver's 2nd P2P connection bsscfg.
  * @P2PAPI_BSSCFG_MAX: used for range checking.
  */
 enum p2p_bss_type {
 	P2PAPI_BSSCFG_PRIMARY, /* maps to driver's primary bsscfg */
 	P2PAPI_BSSCFG_DEVICE, /* maps to driver's P2P device discovery bsscfg */
-	P2PAPI_BSSCFG_CONNECTION, /* maps to driver's P2P connection bsscfg */
+	P2PAPI_BSSCFG_CONNECTION, /* driver's 1st P2P connection bsscfg */
+	P2PAPI_BSSCFG_CONNECTION2, /* driver's 2nd P2P connection bsscfg */
 	P2PAPI_BSSCFG_MAX
 };
 
@@ -119,7 +121,8 @@ struct brcmf_p2p_info {
 	struct brcmf_cfg80211_info *cfg;
 	unsigned long status;
 	u8 dev_addr[ETH_ALEN];
-	u8 int_addr[ETH_ALEN];
+	u8 conn_int_addr[ETH_ALEN];
+	u8 conn2_int_addr[ETH_ALEN];
 	struct p2p_bss bss_idx[P2PAPI_BSSCFG_MAX];
 	struct timer_list listen_timer;
 	u8 listen_channel;
-- 
cgit v1.2.3-59-g8ed1b


From 9c29da3f4e7ef9810bdfaf3d8aa5e6d2e33136f8 Mon Sep 17 00:00:00 2001
From: Joseph Chuang <joseph.chuang@cypress.com>
Date: Mon, 4 May 2020 01:07:32 -0500
Subject: brcmfmac: Fix P2P Group Formation failure via Go-neg method

P2P group formation fails since either peer is not able to send go-neg
confirm or dut is not able to send go-neg response. To fix this, retry
limit should be increased and dwell time check should be added.

Signed-off-by: Joseph Chuang <joseph.chuang@cypress.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1588572453-194663-3-git-send-email-wright.feng@cypress.com
---
 .../net/wireless/broadcom/brcm80211/brcmfmac/p2p.c | 28 ++++++++++++++++++++--
 1 file changed, 26 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
index 6612103305d8..733c98f6ef86 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
@@ -60,7 +60,7 @@
 #define P2P_AF_MIN_DWELL_TIME		100
 #define P2P_AF_MED_DWELL_TIME		400
 #define P2P_AF_LONG_DWELL_TIME		1000
-#define P2P_AF_TX_MAX_RETRY		1
+#define P2P_AF_TX_MAX_RETRY		5
 #define P2P_AF_MAX_WAIT_TIME		msecs_to_jiffies(2000)
 #define P2P_INVALID_CHANNEL		-1
 #define P2P_CHANNEL_SYNC_RETRY		5
@@ -93,6 +93,9 @@
 #define P2PSD_ACTION_ID_GAS_CRESP	0x0d	/* GAS Comback Response AF */
 
 #define BRCMF_P2P_DISABLE_TIMEOUT	msecs_to_jiffies(500)
+
+/* Mask for retry counter of custom dwell time */
+#define CUSTOM_RETRY_MASK 0xff000000
 /**
  * struct brcmf_p2p_disc_st_le - set discovery state in firmware.
  *
@@ -1666,6 +1669,17 @@ static s32 brcmf_p2p_pub_af_tx(struct brcmf_cfg80211_info *cfg,
 	return err;
 }
 
+static bool brcmf_p2p_check_dwell_overflow(s32 requested_dwell,
+					   unsigned long dwell_jiffies)
+{
+	if ((requested_dwell & CUSTOM_RETRY_MASK) &&
+	    (jiffies_to_msecs(jiffies - dwell_jiffies) >
+	    (requested_dwell & ~CUSTOM_RETRY_MASK))) {
+		brcmf_err("Action frame TX retry time over dwell time!\n");
+		return true;
+	}
+	return false;
+}
 /**
  * brcmf_p2p_send_action_frame() - send action frame .
  *
@@ -1690,6 +1704,10 @@ bool brcmf_p2p_send_action_frame(struct brcmf_cfg80211_info *cfg,
 	s32 tx_retry;
 	s32 extra_listen_time;
 	uint delta_ms;
+	unsigned long dwell_jiffies = 0;
+	bool dwell_overflow = false;
+
+	s32 requested_dwell = af_params->dwell_time;
 
 	action_frame = &af_params->action_frame;
 	action_frame_len = le16_to_cpu(action_frame->len);
@@ -1801,12 +1819,18 @@ bool brcmf_p2p_send_action_frame(struct brcmf_cfg80211_info *cfg,
 		/* update channel */
 		af_params->channel = cpu_to_le32(afx_hdl->peer_chan);
 	}
+	dwell_jiffies = jiffies;
+	dwell_overflow = brcmf_p2p_check_dwell_overflow(requested_dwell,
+							dwell_jiffies);
 
 	tx_retry = 0;
 	while (!p2p->block_gon_req_tx &&
-	       (ack == false) && (tx_retry < P2P_AF_TX_MAX_RETRY)) {
+	       (!ack) && (tx_retry < P2P_AF_TX_MAX_RETRY) &&
+		!dwell_overflow) {
 		ack = !brcmf_p2p_tx_action_frame(p2p, af_params);
 		tx_retry++;
+		dwell_overflow = brcmf_p2p_check_dwell_overflow(requested_dwell,
+								dwell_jiffies);
 	}
 	if (ack == false) {
 		bphy_err(drvr, "Failed to send Action Frame(retry %d)\n",
-- 
cgit v1.2.3-59-g8ed1b


From 7f26cedfc9fda65d6e8c2aacbf5b43a33a29065c Mon Sep 17 00:00:00 2001
From: Justin Li <Justin.Li@cypress.com>
Date: Mon, 4 May 2020 01:07:33 -0500
Subject: brcmfmac: Add P2P Action Frame retry delay to fix GAS Comeback
 Response failure issue

It was observed that P2P Cert. 5.1.19: DEVUT responds to Service
Discovery request failed due to DUT did not send GAS Comeback Response
after receiving request from test bed P2P peer. To fix this issue,
we need to add P2P Action Frame retry delay to enhance P2P connection
under VSDB and noisy environment, since the peer can be in other
channels under VSDB.

Signed-off-by: Justin Li <Justin.Li@cypress.com>
Signed-off-by: Chi-hsien Lin <chi-hsien.lin@cypress.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1588572453-194663-4-git-send-email-wright.feng@cypress.com
---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
index 733c98f6ef86..e32c24a2670d 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
@@ -66,6 +66,7 @@
 #define P2P_CHANNEL_SYNC_RETRY		5
 #define P2P_AF_FRM_SCAN_MAX_WAIT	msecs_to_jiffies(450)
 #define P2P_DEFAULT_SLEEP_TIME_VSDB	200
+#define P2P_AF_RETRY_DELAY_TIME		40
 
 /* WiFi P2P Public Action Frame OUI Subtypes */
 #define P2P_PAF_GON_REQ		0	/* Group Owner Negotiation Req */
@@ -1827,6 +1828,9 @@ bool brcmf_p2p_send_action_frame(struct brcmf_cfg80211_info *cfg,
 	while (!p2p->block_gon_req_tx &&
 	       (!ack) && (tx_retry < P2P_AF_TX_MAX_RETRY) &&
 		!dwell_overflow) {
+		if (af_params->channel)
+			msleep(P2P_AF_RETRY_DELAY_TIME);
+
 		ack = !brcmf_p2p_tx_action_frame(p2p, af_params);
 		tx_retry++;
 		dwell_overflow = brcmf_p2p_check_dwell_overflow(requested_dwell,
-- 
cgit v1.2.3-59-g8ed1b


From 4f5cf93395d747593c7e2cebdc32155931e03594 Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Mon, 4 May 2020 19:33:57 +0800
Subject: brcmsmac: remove Comparison to bool in brcms_b_txstatus()

Fix the following coccicheck warning:

drivers/net/wireless/broadcom/brcm80211/brcmsmac/main.c:1060:6-12:
WARNING: Comparison to bool

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200504113357.41422-1-yanaijie@huawei.com
---
 drivers/net/wireless/broadcom/brcm80211/brcmsmac/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/main.c b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/main.c
index d88f8d456b94..77494fc30c2c 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmsmac/main.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmsmac/main.c
@@ -1057,7 +1057,7 @@ brcms_b_txstatus(struct brcms_hardware *wlc_hw, bool bound, bool *fatal)
 		txs->lasttxtime = 0;
 
 		*fatal = brcms_c_dotxstatus(wlc_hw->wlc, txs);
-		if (*fatal == true)
+		if (*fatal)
 			return false;
 		n++;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 1b56bed20212390432541c7a54e5a70561809293 Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Wed, 29 Apr 2020 22:09:24 +0800
Subject: rtlwifi: remove comparison of 0/1 to bool variable

The variable 'rtlpriv->rfkill.rfkill_state' is bool and can directly
assigned to bool values.

Fix the following coccicheck warning:

drivers/net/wireless/realtek/rtlwifi/core.c:1725:14-42: WARNING:
Comparison of 0/1 to bool variable

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200429140924.7750-1-yanaijie@huawei.com
---
 drivers/net/wireless/realtek/rtlwifi/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/core.c b/drivers/net/wireless/realtek/rtlwifi/core.c
index f73e690bbe8e..4dd82c6052f0 100644
--- a/drivers/net/wireless/realtek/rtlwifi/core.c
+++ b/drivers/net/wireless/realtek/rtlwifi/core.c
@@ -1722,7 +1722,7 @@ static void rtl_op_rfkill_poll(struct ieee80211_hw *hw)
 				 "wireless radio switch turned %s\n",
 				  radio_state ? "on" : "off");
 
-			blocked = (rtlpriv->rfkill.rfkill_state == 1) ? 0 : 1;
+			blocked = !rtlpriv->rfkill.rfkill_state;
 			wiphy_rfkill_set_hw_state(hw->wiphy, blocked);
 		}
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 08afb432c996e34e7047110a4d8c6979b8bd2b19 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Thu, 30 Apr 2020 23:30:45 +0200
Subject: mwifiex: avoid -Wstringop-overflow warning

gcc-10 reports a warning for mwifiex_cmd_802_11_key_material_v1:

drivers/net/wireless/marvell/mwifiex/sta_cmd.c: In function 'mwifiex_cmd_802_11_key_material_v1':
cc1: warning: writing 16 bytes into a region of size 0 [-Wstringop-overflow=]
In file included from drivers/net/wireless/marvell/mwifiex/sta_cmd.c:23:
drivers/net/wireless/marvell/mwifiex/fw.h:993:9: note: at offset 0 to object 'action' with size 2 declared here
  993 |  __le16 action;
      |         ^~~~~~

As the warning makes no sense, I reported it as a bug for gcc. In the
meantime using a temporary pointer for the key data makes the code easier
to read and stops the warning.

Fixes: 5e6e3a92b9a4 ("wireless: mwifiex: initial commit for Marvell mwifiex driver")
Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=94881
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200430213101.135134-4-arnd@arndb.de
---
 drivers/net/wireless/marvell/mwifiex/sta_cmd.c | 39 +++++++++++---------------
 1 file changed, 16 insertions(+), 23 deletions(-)

diff --git a/drivers/net/wireless/marvell/mwifiex/sta_cmd.c b/drivers/net/wireless/marvell/mwifiex/sta_cmd.c
index 0bd93f26bd7f..8bd355d7974e 100644
--- a/drivers/net/wireless/marvell/mwifiex/sta_cmd.c
+++ b/drivers/net/wireless/marvell/mwifiex/sta_cmd.c
@@ -853,43 +853,36 @@ mwifiex_cmd_802_11_key_material_v1(struct mwifiex_private *priv,
 		memset(&key_material->key_param_set, 0,
 		       sizeof(struct mwifiex_ie_type_key_param_set));
 	if (enc_key->is_wapi_key) {
+		struct mwifiex_ie_type_key_param_set *set;
+
 		mwifiex_dbg(priv->adapter, INFO, "info: Set WAPI Key\n");
-		key_material->key_param_set.key_type_id =
-						cpu_to_le16(KEY_TYPE_ID_WAPI);
+		set = &key_material->key_param_set;
+		set->key_type_id = cpu_to_le16(KEY_TYPE_ID_WAPI);
 		if (cmd_oid == KEY_INFO_ENABLED)
-			key_material->key_param_set.key_info =
-						cpu_to_le16(KEY_ENABLED);
+			set->key_info = cpu_to_le16(KEY_ENABLED);
 		else
-			key_material->key_param_set.key_info =
-						cpu_to_le16(!KEY_ENABLED);
+			set->key_info = cpu_to_le16(!KEY_ENABLED);
 
-		key_material->key_param_set.key[0] = enc_key->key_index;
+		set->key[0] = enc_key->key_index;
 		if (!priv->sec_info.wapi_key_on)
-			key_material->key_param_set.key[1] = 1;
+			set->key[1] = 1;
 		else
 			/* set 0 when re-key */
-			key_material->key_param_set.key[1] = 0;
+			set->key[1] = 0;
 
 		if (!is_broadcast_ether_addr(enc_key->mac_addr)) {
 			/* WAPI pairwise key: unicast */
-			key_material->key_param_set.key_info |=
-				cpu_to_le16(KEY_UNICAST);
+			set->key_info |= cpu_to_le16(KEY_UNICAST);
 		} else {	/* WAPI group key: multicast */
-			key_material->key_param_set.key_info |=
-				cpu_to_le16(KEY_MCAST);
+			set->key_info |= cpu_to_le16(KEY_MCAST);
 			priv->sec_info.wapi_key_on = true;
 		}
 
-		key_material->key_param_set.type =
-					cpu_to_le16(TLV_TYPE_KEY_MATERIAL);
-		key_material->key_param_set.key_len =
-						cpu_to_le16(WAPI_KEY_LEN);
-		memcpy(&key_material->key_param_set.key[2],
-		       enc_key->key_material, enc_key->key_len);
-		memcpy(&key_material->key_param_set.key[2 + enc_key->key_len],
-		       enc_key->pn, PN_LEN);
-		key_material->key_param_set.length =
-			cpu_to_le16(WAPI_KEY_LEN + KEYPARAMSET_FIXED_LEN);
+		set->type = cpu_to_le16(TLV_TYPE_KEY_MATERIAL);
+		set->key_len = cpu_to_le16(WAPI_KEY_LEN);
+		memcpy(&set->key[2], enc_key->key_material, enc_key->key_len);
+		memcpy(&set->key[2 + enc_key->key_len], enc_key->pn, PN_LEN);
+		set->length = cpu_to_le16(WAPI_KEY_LEN + KEYPARAMSET_FIXED_LEN);
 
 		key_param_len = (WAPI_KEY_LEN + KEYPARAMSET_FIXED_LEN) +
 				 sizeof(struct mwifiex_ie_types_header);
-- 
cgit v1.2.3-59-g8ed1b


From 049ceac308b0d57c4f06b9fb957cdf95d315cf0b Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 1 May 2020 18:39:00 +0100
Subject: libertas_tf: avoid a null dereference in pointer priv

Currently there is a check if priv is null when calling lbtf_remove_card
but not in a previous call to if_usb_reset_dev that can also dereference
priv.  Fix this by also only calling lbtf_remove_card if priv is null.

It is noteable that there don't seem to be any bugs reported that the
null pointer dereference has ever occurred, so I'm not sure if the null
check is required, but since we're doing a null check anyway it should
be done for both function calls.

Addresses-Coverity: ("Dereference before null check")
Fixes: baa0280f08c7 ("libertas_tf: don't defer firmware loading until start()")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200501173900.296658-1-colin.king@canonical.com
---
 drivers/net/wireless/marvell/libertas_tf/if_usb.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/marvell/libertas_tf/if_usb.c b/drivers/net/wireless/marvell/libertas_tf/if_usb.c
index 25ac9db35dbf..bedc09215088 100644
--- a/drivers/net/wireless/marvell/libertas_tf/if_usb.c
+++ b/drivers/net/wireless/marvell/libertas_tf/if_usb.c
@@ -247,10 +247,10 @@ static void if_usb_disconnect(struct usb_interface *intf)
 
 	lbtf_deb_enter(LBTF_DEB_MAIN);
 
-	if_usb_reset_device(priv);
-
-	if (priv)
+	if (priv) {
+		if_usb_reset_device(priv);
 		lbtf_remove_card(priv);
+	}
 
 	/* Unlink and free urb */
 	if_usb_free(cardp);
-- 
cgit v1.2.3-59-g8ed1b


From e2b9ac59081028d48ec7aa9a3510981eda94c327 Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Mon, 4 May 2020 19:33:00 +0800
Subject: b43: remove Comparison of 0/1 to bool variable in phy_n.c

Fix the following coccicheck warning:

drivers/net/wireless/broadcom/b43/phy_n.c:5510:19-32: WARNING:
Comparison of 0/1 to bool variable

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200504113300.40895-1-yanaijie@huawei.com
---
 drivers/net/wireless/broadcom/b43/phy_n.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/broadcom/b43/phy_n.c b/drivers/net/wireless/broadcom/b43/phy_n.c
index d3c001fa8eb4..c33b4235839d 100644
--- a/drivers/net/wireless/broadcom/b43/phy_n.c
+++ b/drivers/net/wireless/broadcom/b43/phy_n.c
@@ -5507,7 +5507,7 @@ static int b43_nphy_cal_tx_iq_lo(struct b43_wldev *dev,
 			core = (cmd & 0x3000) >> 12;
 			type = (cmd & 0x0F00) >> 8;
 
-			if (phy6or5x && updated[core] == 0) {
+			if (phy6or5x && !updated[core]) {
 				b43_nphy_update_tx_cal_ladder(dev, core);
 				updated[core] = true;
 			}
-- 
cgit v1.2.3-59-g8ed1b


From f8f24ece219204333ac825e9c8aaf3403e201d92 Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Mon, 4 May 2020 19:33:11 +0800
Subject: b43: remove Comparison of 0/1 to bool variable in pio.c

Fix the following coccicheck warning:

drivers/net/wireless/broadcom/b43/pio.c:768:10-25: WARNING: Comparison
of 0/1 to bool variable

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200504113311.41026-1-yanaijie@huawei.com
---
 drivers/net/wireless/broadcom/b43/pio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/broadcom/b43/pio.c b/drivers/net/wireless/broadcom/b43/pio.c
index 69f8b46c9015..1a11c5dfb8d9 100644
--- a/drivers/net/wireless/broadcom/b43/pio.c
+++ b/drivers/net/wireless/broadcom/b43/pio.c
@@ -765,7 +765,7 @@ void b43_pio_rx(struct b43_pio_rxqueue *q)
 	bool stop;
 
 	while (1) {
-		stop = (pio_rx_frame(q) == 0);
+		stop = !pio_rx_frame(q);
 		if (stop)
 			break;
 		cond_resched();
-- 
cgit v1.2.3-59-g8ed1b


From cbb1404f65414130fb89e52a97b9d853d303dc5c Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Mon, 4 May 2020 19:33:21 +0800
Subject: rtlwifi: rtl8188ee: remove Comparison to bool in rf.c

Fix the following coccicheck warning:

drivers/net/wireless/realtek/rtlwifi/rtl8188ee/rf.c:476:6-14: WARNING:
Comparison to bool
drivers/net/wireless/realtek/rtlwifi/rtl8188ee/rf.c:54:5-22: WARNING:
Comparison to bool

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200504113321.41118-1-yanaijie@huawei.com
---
 drivers/net/wireless/realtek/rtlwifi/rtl8188ee/rf.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/rf.c b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/rf.c
index 0f401ad92c2e..c376817a1bf4 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/rf.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8188ee/rf.c
@@ -51,7 +51,7 @@ void rtl88e_phy_rf6052_set_cck_txpower(struct ieee80211_hw *hw,
 	if (rtlefuse->eeprom_regulatory != 0)
 		turbo_scanoff = true;
 
-	if (mac->act_scanning == true) {
+	if (mac->act_scanning) {
 		tx_agc[RF90_PATH_A] = 0x3f3f3f3f;
 		tx_agc[RF90_PATH_B] = 0x3f3f3f3f;
 
@@ -473,7 +473,7 @@ static bool _rtl88e_phy_rf6052_config_parafile(struct ieee80211_hw *hw)
 			break;
 		}
 
-		if (rtstatus != true) {
+		if (!rtstatus) {
 			RT_TRACE(rtlpriv, COMP_INIT, DBG_TRACE,
 				 "Radio[%d] Fail!!\n", rfpath);
 			return false;
-- 
cgit v1.2.3-59-g8ed1b


From bbae62e32ee9ef3995cb72e565dd8790a6858de2 Mon Sep 17 00:00:00 2001
From: Samuel Zou <zou_wei@huawei.com>
Date: Wed, 6 May 2020 10:54:58 +0800
Subject: net: ethernet: ti: Use PTR_ERR_OR_ZERO() to simplify code

Fixes coccicheck warning:

drivers/net/ethernet/ti/am65-cpts.c:1017:1-3: WARNING: PTR_ERR_OR_ZERO can be used

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Samuel Zou <zou_wei@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/am65-cpts.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/ti/am65-cpts.c b/drivers/net/ethernet/ti/am65-cpts.c
index 370162c86e7f..51c94b2a77b1 100644
--- a/drivers/net/ethernet/ti/am65-cpts.c
+++ b/drivers/net/ethernet/ti/am65-cpts.c
@@ -1014,10 +1014,7 @@ static int am65_cpts_probe(struct platform_device *pdev)
 		return PTR_ERR(base);
 
 	cpts = am65_cpts_create(dev, base, node);
-	if (IS_ERR(cpts))
-		return PTR_ERR(cpts);
-
-	return 0;
+	return PTR_ERR_OR_ZERO(cpts);
 }
 
 static const struct of_device_id am65_cpts_of_match[] = {
-- 
cgit v1.2.3-59-g8ed1b


From cadb5c4e394da0fc13e4d46ad7c1445d60dc175f Mon Sep 17 00:00:00 2001
From: Yunjian Wang <wangyunjian@huawei.com>
Date: Wed, 6 May 2020 14:10:41 +0800
Subject: net: cortina: Fix use correct return type for ndo_start_xmit()

The method ndo_start_xmit() returns a value of type netdev_tx_t. Fix
the ndo function to use the correct type.

Signed-off-by: Yunjian Wang <wangyunjian@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cortina/gemini.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/cortina/gemini.c b/drivers/net/ethernet/cortina/gemini.c
index 5bff5c2be88b..8d13ea370db1 100644
--- a/drivers/net/ethernet/cortina/gemini.c
+++ b/drivers/net/ethernet/cortina/gemini.c
@@ -1224,7 +1224,8 @@ map_error:
 	return -ENOMEM;
 }
 
-static int gmac_start_xmit(struct sk_buff *skb, struct net_device *netdev)
+static netdev_tx_t gmac_start_xmit(struct sk_buff *skb,
+				   struct net_device *netdev)
 {
 	struct gemini_ethernet_port *port = netdev_priv(netdev);
 	unsigned short m = (1 << port->txq_order) - 1;
-- 
cgit v1.2.3-59-g8ed1b


From 8741e18419bf4b9e3b02c0ed01ea13f1aa497fa7 Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Wed, 6 May 2020 14:16:16 +0800
Subject: net: bridge: return false in br_mrp_enabled()

Fix the following coccicheck warning:

net/bridge/br_private.h:1334:8-9: WARNING: return of 0/1 in function
'br_mrp_enabled' with return type bool

Fixes: 6536993371fab ("bridge: mrp: Integrate MRP into the bridge")
Signed-off-by: Jason Yan <yanaijie@huawei.com>
Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_private.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index c35647cb138a..78d3a951180d 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -1331,7 +1331,7 @@ static inline int br_mrp_process(struct net_bridge_port *p, struct sk_buff *skb)
 
 static inline bool br_mrp_enabled(struct net_bridge *br)
 {
-	return 0;
+	return false;
 }
 
 static inline void br_mrp_port_del(struct net_bridge *br,
-- 
cgit v1.2.3-59-g8ed1b


From f9cbf19c7f5d92e3e91727b6dff6e43b1d6a62a3 Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Wed, 6 May 2020 14:16:30 +0800
Subject: net: mlx4: remove unneeded variable "err" in mlx4_en_get_rxfh()

Fix the following coccicheck warning:

drivers/net/ethernet/mellanox/mlx4/en_ethtool.c:1238:5-8: Unneeded
variable: "err". Return "0" on line 1252

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index 8a5ea2543670..216e6b2e9eed 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -1235,7 +1235,6 @@ static int mlx4_en_get_rxfh(struct net_device *dev, u32 *ring_index, u8 *key,
 	struct mlx4_en_priv *priv = netdev_priv(dev);
 	u32 n = mlx4_en_get_rxfh_indir_size(dev);
 	u32 i, rss_rings;
-	int err = 0;
 
 	rss_rings = priv->prof->rss_rings ?: n;
 	rss_rings = rounddown_pow_of_two(rss_rings);
@@ -1249,7 +1248,7 @@ static int mlx4_en_get_rxfh(struct net_device *dev, u32 *ring_index, u8 *key,
 		memcpy(key, priv->rss_key, MLX4_EN_RSS_KEY_SIZE);
 	if (hfunc)
 		*hfunc = priv->rss_hash_fn;
-	return err;
+	return 0;
 }
 
 static int mlx4_en_set_rxfh(struct net_device *dev, const u32 *ring_index,
-- 
cgit v1.2.3-59-g8ed1b


From 2f8649900b7d0dced6aff0765ea15e874a91b73c Mon Sep 17 00:00:00 2001
From: Samuel Zou <zou_wei@huawei.com>
Date: Wed, 6 May 2020 14:27:30 +0800
Subject: net: ethernet: mediatek: Make mtk_m32 static

Fix the following sparse warning:

drivers/net/ethernet/mediatek/mtk_eth_soc.c:68:5: warning:
symbol 'mtk_m32' was not declared. Should it be static?

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Samuel Zou <zou_wei@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 09047109d0da..2822268220f7 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -65,7 +65,7 @@ u32 mtk_r32(struct mtk_eth *eth, unsigned reg)
 	return __raw_readl(eth->base + reg);
 }
 
-u32 mtk_m32(struct mtk_eth *eth, u32 mask, u32 set, unsigned reg)
+static u32 mtk_m32(struct mtk_eth *eth, u32 mask, u32 set, unsigned reg)
 {
 	u32 val;
 
-- 
cgit v1.2.3-59-g8ed1b


From 3be83016275d1e113ce8c4aa477174a8a6d7a55d Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Wed, 6 May 2020 10:09:40 +0200
Subject: s390/qeth: keep track of LP2LP capability for csum offload

When enabling TX CSO, make a note of whether the device has support for
LP2LP offloading. This will become relevant in subsequent patches.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core.h      |  3 +++
 drivers/s390/net/qeth_core_main.c | 23 ++++++++++++++---------
 2 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index e0b26310ecab..2ac7771394d8 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -688,6 +688,9 @@ struct qeth_card_info {
 	u8 promisc_mode:1;
 	u8 use_v1_blkt:1;
 	u8 is_vm_nic:1;
+	/* no bitfield, we take a pointer on these two: */
+	u8 has_lp2lp_cso_v6;
+	u8 has_lp2lp_cso_v4;
 	enum qeth_card_types type;
 	enum qeth_link_types link_type;
 	int broadcast_capable;
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index f7689461c242..ef96890eea5c 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -6300,7 +6300,7 @@ static int qeth_set_csum_off(struct qeth_card *card, enum qeth_ipa_funcs cstype,
 }
 
 static int qeth_set_csum_on(struct qeth_card *card, enum qeth_ipa_funcs cstype,
-			    enum qeth_prot_versions prot)
+			    enum qeth_prot_versions prot, u8 *lp2lp)
 {
 	u32 required_features = QETH_IPA_CHECKSUM_UDP | QETH_IPA_CHECKSUM_TCP;
 	struct qeth_cmd_buffer *iob;
@@ -6352,8 +6352,11 @@ static int qeth_set_csum_on(struct qeth_card *card, enum qeth_ipa_funcs cstype,
 
 	dev_info(&card->gdev->dev, "HW Checksumming (%sbound IPv%d) enabled\n",
 		 cstype == IPA_INBOUND_CHECKSUM ? "in" : "out", prot);
-	if (!qeth_ipa_caps_enabled(&caps, QETH_IPA_CHECKSUM_LP2LP) &&
-	    cstype == IPA_OUTBOUND_CHECKSUM)
+
+	if (lp2lp)
+		*lp2lp = qeth_ipa_caps_enabled(&caps, QETH_IPA_CHECKSUM_LP2LP);
+
+	if (lp2lp && !*lp2lp)
 		dev_warn(&card->gdev->dev,
 			 "Hardware checksumming is performed only if %s and its peer use different OSA Express 3 ports\n",
 			 QETH_CARD_IFNAME(card));
@@ -6361,9 +6364,9 @@ static int qeth_set_csum_on(struct qeth_card *card, enum qeth_ipa_funcs cstype,
 }
 
 static int qeth_set_ipa_csum(struct qeth_card *card, bool on, int cstype,
-			     enum qeth_prot_versions prot)
+			     enum qeth_prot_versions prot, u8 *lp2lp)
 {
-	return on ? qeth_set_csum_on(card, cstype, prot) :
+	return on ? qeth_set_csum_on(card, cstype, prot, lp2lp) :
 		    qeth_set_csum_off(card, cstype, prot);
 }
 
@@ -6451,13 +6454,13 @@ static int qeth_set_ipa_rx_csum(struct qeth_card *card, bool on)
 
 	if (qeth_is_supported(card, IPA_INBOUND_CHECKSUM))
 		rc_ipv4 = qeth_set_ipa_csum(card, on, IPA_INBOUND_CHECKSUM,
-					    QETH_PROT_IPV4);
+					    QETH_PROT_IPV4, NULL);
 	if (!qeth_is_supported6(card, IPA_INBOUND_CHECKSUM_V6))
 		/* no/one Offload Assist available, so the rc is trivial */
 		return rc_ipv4;
 
 	rc_ipv6 = qeth_set_ipa_csum(card, on, IPA_INBOUND_CHECKSUM,
-				    QETH_PROT_IPV6);
+				    QETH_PROT_IPV6, NULL);
 
 	if (on)
 		/* enable: success if any Assist is active */
@@ -6504,13 +6507,15 @@ int qeth_set_features(struct net_device *dev, netdev_features_t features)
 
 	if ((changed & NETIF_F_IP_CSUM)) {
 		rc = qeth_set_ipa_csum(card, features & NETIF_F_IP_CSUM,
-				       IPA_OUTBOUND_CHECKSUM, QETH_PROT_IPV4);
+				       IPA_OUTBOUND_CHECKSUM, QETH_PROT_IPV4,
+				       &card->info.has_lp2lp_cso_v4);
 		if (rc)
 			changed ^= NETIF_F_IP_CSUM;
 	}
 	if (changed & NETIF_F_IPV6_CSUM) {
 		rc = qeth_set_ipa_csum(card, features & NETIF_F_IPV6_CSUM,
-				       IPA_OUTBOUND_CHECKSUM, QETH_PROT_IPV6);
+				       IPA_OUTBOUND_CHECKSUM, QETH_PROT_IPV6,
+				       &card->info.has_lp2lp_cso_v6);
 		if (rc)
 			changed ^= NETIF_F_IPV6_CSUM;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 0d49c06bcfc29042306814a2199b7996dbf1ecb0 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Wed, 6 May 2020 10:09:41 +0200
Subject: s390/qeth: process local address events

In configurations where specific HW offloads are in use, OSA adapters
will raise notifications to their virtual devices about the IP addresses
that currently reside on the same adapter.
Cache these addresses in two RCU-enabled hash tables, and flush the
tables once the relevant HW offload(s) get disabled.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core.h      |  13 +++
 drivers/s390/net/qeth_core_main.c | 217 ++++++++++++++++++++++++++++++++++++++
 drivers/s390/net/qeth_core_mpc.h  |  25 +++++
 drivers/s390/net/qeth_l2_main.c   |   1 +
 drivers/s390/net/qeth_l3_main.c   |   1 +
 5 files changed, 257 insertions(+)

diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 2ac7771394d8..b92af3735dd4 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -21,8 +21,10 @@
 #include <linux/seq_file.h>
 #include <linux/hashtable.h>
 #include <linux/ip.h>
+#include <linux/rcupdate.h>
 #include <linux/refcount.h>
 #include <linux/timer.h>
+#include <linux/types.h>
 #include <linux/wait.h>
 #include <linux/workqueue.h>
 
@@ -356,6 +358,12 @@ static inline bool qeth_l3_same_next_hop(struct qeth_hdr_layer3 *h1,
 			       &h2->next_hop.ipv6_addr);
 }
 
+struct qeth_local_addr {
+	struct hlist_node hnode;
+	struct rcu_head rcu;
+	struct in6_addr addr;
+};
+
 enum qeth_qdio_info_states {
 	QETH_QDIO_UNINITIALIZED,
 	QETH_QDIO_ALLOCATED,
@@ -800,6 +808,10 @@ struct qeth_card {
 	wait_queue_head_t wait_q;
 	DECLARE_HASHTABLE(mac_htable, 4);
 	DECLARE_HASHTABLE(ip_htable, 4);
+	DECLARE_HASHTABLE(local_addrs4, 4);
+	DECLARE_HASHTABLE(local_addrs6, 4);
+	spinlock_t local_addrs4_lock;
+	spinlock_t local_addrs6_lock;
 	struct mutex ip_lock;
 	DECLARE_HASHTABLE(ip_mc_htable, 4);
 	struct work_struct rx_mode_work;
@@ -1025,6 +1037,7 @@ void qeth_notify_cmd(struct qeth_cmd_buffer *iob, int reason);
 void qeth_put_cmd(struct qeth_cmd_buffer *iob);
 
 void qeth_schedule_recovery(struct qeth_card *);
+void qeth_flush_local_addrs(struct qeth_card *card);
 int qeth_poll(struct napi_struct *napi, int budget);
 void qeth_clear_ipacmd_list(struct qeth_card *);
 int qeth_qdio_clear_card(struct qeth_card *, int);
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index ef96890eea5c..6b5d42a4501c 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -26,6 +26,7 @@
 #include <linux/if_vlan.h>
 #include <linux/netdevice.h>
 #include <linux/netdev_features.h>
+#include <linux/rcutree.h>
 #include <linux/skbuff.h>
 #include <linux/vmalloc.h>
 
@@ -623,6 +624,187 @@ void qeth_notify_cmd(struct qeth_cmd_buffer *iob, int reason)
 }
 EXPORT_SYMBOL_GPL(qeth_notify_cmd);
 
+static void qeth_flush_local_addrs4(struct qeth_card *card)
+{
+	struct qeth_local_addr *addr;
+	struct hlist_node *tmp;
+	unsigned int i;
+
+	spin_lock_irq(&card->local_addrs4_lock);
+	hash_for_each_safe(card->local_addrs4, i, tmp, addr, hnode) {
+		hash_del_rcu(&addr->hnode);
+		kfree_rcu(addr, rcu);
+	}
+	spin_unlock_irq(&card->local_addrs4_lock);
+}
+
+static void qeth_flush_local_addrs6(struct qeth_card *card)
+{
+	struct qeth_local_addr *addr;
+	struct hlist_node *tmp;
+	unsigned int i;
+
+	spin_lock_irq(&card->local_addrs6_lock);
+	hash_for_each_safe(card->local_addrs6, i, tmp, addr, hnode) {
+		hash_del_rcu(&addr->hnode);
+		kfree_rcu(addr, rcu);
+	}
+	spin_unlock_irq(&card->local_addrs6_lock);
+}
+
+void qeth_flush_local_addrs(struct qeth_card *card)
+{
+	qeth_flush_local_addrs4(card);
+	qeth_flush_local_addrs6(card);
+}
+EXPORT_SYMBOL_GPL(qeth_flush_local_addrs);
+
+static void qeth_add_local_addrs4(struct qeth_card *card,
+				  struct qeth_ipacmd_local_addrs4 *cmd)
+{
+	unsigned int i;
+
+	if (cmd->addr_length !=
+	    sizeof_field(struct qeth_ipacmd_local_addr4, addr)) {
+		dev_err_ratelimited(&card->gdev->dev,
+				    "Dropped IPv4 ADD LOCAL ADDR event with bad length %u\n",
+				    cmd->addr_length);
+		return;
+	}
+
+	spin_lock(&card->local_addrs4_lock);
+	for (i = 0; i < cmd->count; i++) {
+		unsigned int key = ipv4_addr_hash(cmd->addrs[i].addr);
+		struct qeth_local_addr *addr;
+		bool duplicate = false;
+
+		hash_for_each_possible(card->local_addrs4, addr, hnode, key) {
+			if (addr->addr.s6_addr32[3] == cmd->addrs[i].addr) {
+				duplicate = true;
+				break;
+			}
+		}
+
+		if (duplicate)
+			continue;
+
+		addr = kmalloc(sizeof(*addr), GFP_ATOMIC);
+		if (!addr) {
+			dev_err(&card->gdev->dev,
+				"Failed to allocate local addr object. Traffic to %pI4 might suffer.\n",
+				&cmd->addrs[i].addr);
+			continue;
+		}
+
+		ipv6_addr_set(&addr->addr, 0, 0, 0, cmd->addrs[i].addr);
+		hash_add_rcu(card->local_addrs4, &addr->hnode, key);
+	}
+	spin_unlock(&card->local_addrs4_lock);
+}
+
+static void qeth_add_local_addrs6(struct qeth_card *card,
+				  struct qeth_ipacmd_local_addrs6 *cmd)
+{
+	unsigned int i;
+
+	if (cmd->addr_length !=
+	    sizeof_field(struct qeth_ipacmd_local_addr6, addr)) {
+		dev_err_ratelimited(&card->gdev->dev,
+				    "Dropped IPv6 ADD LOCAL ADDR event with bad length %u\n",
+				    cmd->addr_length);
+		return;
+	}
+
+	spin_lock(&card->local_addrs6_lock);
+	for (i = 0; i < cmd->count; i++) {
+		u32 key = ipv6_addr_hash(&cmd->addrs[i].addr);
+		struct qeth_local_addr *addr;
+		bool duplicate = false;
+
+		hash_for_each_possible(card->local_addrs6, addr, hnode, key) {
+			if (ipv6_addr_equal(&addr->addr, &cmd->addrs[i].addr)) {
+				duplicate = true;
+				break;
+			}
+		}
+
+		if (duplicate)
+			continue;
+
+		addr = kmalloc(sizeof(*addr), GFP_ATOMIC);
+		if (!addr) {
+			dev_err(&card->gdev->dev,
+				"Failed to allocate local addr object. Traffic to %pI6c might suffer.\n",
+				&cmd->addrs[i].addr);
+			continue;
+		}
+
+		addr->addr = cmd->addrs[i].addr;
+		hash_add_rcu(card->local_addrs6, &addr->hnode, key);
+	}
+	spin_unlock(&card->local_addrs6_lock);
+}
+
+static void qeth_del_local_addrs4(struct qeth_card *card,
+				  struct qeth_ipacmd_local_addrs4 *cmd)
+{
+	unsigned int i;
+
+	if (cmd->addr_length !=
+	    sizeof_field(struct qeth_ipacmd_local_addr4, addr)) {
+		dev_err_ratelimited(&card->gdev->dev,
+				    "Dropped IPv4 DEL LOCAL ADDR event with bad length %u\n",
+				    cmd->addr_length);
+		return;
+	}
+
+	spin_lock(&card->local_addrs4_lock);
+	for (i = 0; i < cmd->count; i++) {
+		struct qeth_ipacmd_local_addr4 *addr = &cmd->addrs[i];
+		unsigned int key = ipv4_addr_hash(addr->addr);
+		struct qeth_local_addr *tmp;
+
+		hash_for_each_possible(card->local_addrs4, tmp, hnode, key) {
+			if (tmp->addr.s6_addr32[3] == addr->addr) {
+				hash_del_rcu(&tmp->hnode);
+				kfree_rcu(tmp, rcu);
+				break;
+			}
+		}
+	}
+	spin_unlock(&card->local_addrs4_lock);
+}
+
+static void qeth_del_local_addrs6(struct qeth_card *card,
+				  struct qeth_ipacmd_local_addrs6 *cmd)
+{
+	unsigned int i;
+
+	if (cmd->addr_length !=
+	    sizeof_field(struct qeth_ipacmd_local_addr6, addr)) {
+		dev_err_ratelimited(&card->gdev->dev,
+				    "Dropped IPv6 DEL LOCAL ADDR event with bad length %u\n",
+				    cmd->addr_length);
+		return;
+	}
+
+	spin_lock(&card->local_addrs6_lock);
+	for (i = 0; i < cmd->count; i++) {
+		struct qeth_ipacmd_local_addr6 *addr = &cmd->addrs[i];
+		u32 key = ipv6_addr_hash(&addr->addr);
+		struct qeth_local_addr *tmp;
+
+		hash_for_each_possible(card->local_addrs6, tmp, hnode, key) {
+			if (ipv6_addr_equal(&tmp->addr, &addr->addr)) {
+				hash_del_rcu(&tmp->hnode);
+				kfree_rcu(tmp, rcu);
+				break;
+			}
+		}
+	}
+	spin_unlock(&card->local_addrs6_lock);
+}
+
 static void qeth_issue_ipa_msg(struct qeth_ipa_cmd *cmd, int rc,
 		struct qeth_card *card)
 {
@@ -686,9 +868,19 @@ static struct qeth_ipa_cmd *qeth_check_ipa_data(struct qeth_card *card,
 	case IPA_CMD_MODCCID:
 		return cmd;
 	case IPA_CMD_REGISTER_LOCAL_ADDR:
+		if (cmd->hdr.prot_version == QETH_PROT_IPV4)
+			qeth_add_local_addrs4(card, &cmd->data.local_addrs4);
+		else if (cmd->hdr.prot_version == QETH_PROT_IPV6)
+			qeth_add_local_addrs6(card, &cmd->data.local_addrs6);
+
 		QETH_CARD_TEXT(card, 3, "irla");
 		return NULL;
 	case IPA_CMD_UNREGISTER_LOCAL_ADDR:
+		if (cmd->hdr.prot_version == QETH_PROT_IPV4)
+			qeth_del_local_addrs4(card, &cmd->data.local_addrs4);
+		else if (cmd->hdr.prot_version == QETH_PROT_IPV6)
+			qeth_del_local_addrs6(card, &cmd->data.local_addrs6);
+
 		QETH_CARD_TEXT(card, 3, "urla");
 		return NULL;
 	default:
@@ -1376,6 +1568,10 @@ static void qeth_setup_card(struct qeth_card *card)
 	qeth_init_qdio_info(card);
 	INIT_DELAYED_WORK(&card->buffer_reclaim_work, qeth_buffer_reclaim_work);
 	INIT_WORK(&card->close_dev_work, qeth_close_dev_handler);
+	hash_init(card->local_addrs4);
+	hash_init(card->local_addrs6);
+	spin_lock_init(&card->local_addrs4_lock);
+	spin_lock_init(&card->local_addrs6_lock);
 }
 
 static void qeth_core_sl_print(struct seq_file *m, struct service_level *slr)
@@ -6496,6 +6692,24 @@ void qeth_enable_hw_features(struct net_device *dev)
 }
 EXPORT_SYMBOL_GPL(qeth_enable_hw_features);
 
+static void qeth_check_restricted_features(struct qeth_card *card,
+					   netdev_features_t changed,
+					   netdev_features_t actual)
+{
+	netdev_features_t ipv6_features = NETIF_F_TSO6;
+	netdev_features_t ipv4_features = NETIF_F_TSO;
+
+	if (!card->info.has_lp2lp_cso_v6)
+		ipv6_features |= NETIF_F_IPV6_CSUM;
+	if (!card->info.has_lp2lp_cso_v4)
+		ipv4_features |= NETIF_F_IP_CSUM;
+
+	if ((changed & ipv6_features) && !(actual & ipv6_features))
+		qeth_flush_local_addrs6(card);
+	if ((changed & ipv4_features) && !(actual & ipv4_features))
+		qeth_flush_local_addrs4(card);
+}
+
 int qeth_set_features(struct net_device *dev, netdev_features_t features)
 {
 	struct qeth_card *card = dev->ml_priv;
@@ -6537,6 +6751,9 @@ int qeth_set_features(struct net_device *dev, netdev_features_t features)
 			changed ^= NETIF_F_TSO6;
 	}
 
+	qeth_check_restricted_features(card, dev->features ^ features,
+				       dev->features ^ changed);
+
 	/* everything changed successfully? */
 	if ((dev->features ^ features) == changed)
 		return 0;
diff --git a/drivers/s390/net/qeth_core_mpc.h b/drivers/s390/net/qeth_core_mpc.h
index d89a04bfd8b0..9d6f39d8f9ab 100644
--- a/drivers/s390/net/qeth_core_mpc.h
+++ b/drivers/s390/net/qeth_core_mpc.h
@@ -772,6 +772,29 @@ struct qeth_ipacmd_addr_change {
 	struct qeth_ipacmd_addr_change_entry entry[];
 } __packed;
 
+/* [UN]REGISTER_LOCAL_ADDRESS notifications */
+struct qeth_ipacmd_local_addr4 {
+	__be32 addr;
+	u32 flags;
+};
+
+struct qeth_ipacmd_local_addrs4 {
+	u32 count;
+	u32 addr_length;
+	struct qeth_ipacmd_local_addr4 addrs[];
+};
+
+struct qeth_ipacmd_local_addr6 {
+	struct in6_addr addr;
+	u32 flags;
+};
+
+struct qeth_ipacmd_local_addrs6 {
+	u32 count;
+	u32 addr_length;
+	struct qeth_ipacmd_local_addr6 addrs[];
+};
+
 /* Header for each IPA command */
 struct qeth_ipacmd_hdr {
 	__u8   command;
@@ -803,6 +826,8 @@ struct qeth_ipa_cmd {
 		struct qeth_ipacmd_setbridgeport	sbp;
 		struct qeth_ipacmd_addr_change		addrchange;
 		struct qeth_ipacmd_vnicc		vnicc;
+		struct qeth_ipacmd_local_addrs4		local_addrs4;
+		struct qeth_ipacmd_local_addrs6		local_addrs6;
 	} data;
 } __attribute__ ((packed));
 
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 0bd5b09e7a22..47f624b37040 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -291,6 +291,7 @@ static void qeth_l2_stop_card(struct qeth_card *card)
 	qeth_qdio_clear_card(card, 0);
 	qeth_clear_working_pool_list(card);
 	flush_workqueue(card->event_wq);
+	qeth_flush_local_addrs(card);
 	card->info.promisc_mode = 0;
 }
 
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index 0742a749d26e..fec4ac41e946 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -1176,6 +1176,7 @@ static void qeth_l3_stop_card(struct qeth_card *card)
 	qeth_qdio_clear_card(card, 0);
 	qeth_clear_working_pool_list(card);
 	flush_workqueue(card->event_wq);
+	qeth_flush_local_addrs(card);
 	card->info.promisc_mode = 0;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From fb8d258049f70e6bfd4674b11deb566fb13a0839 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Wed, 6 May 2020 10:09:42 +0200
Subject: s390/qeth: add debugfs file for local IP addresses

For debugging purposes, provide read access to the local_addr caches
via debug/qeth/<dev_name>/local_addrs.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core.h      |  2 ++
 drivers/s390/net/qeth_core_main.c | 32 +++++++++++++++++++++++++++++++-
 2 files changed, 33 insertions(+), 1 deletion(-)

diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index b92af3735dd4..3d8b8e0f2438 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -11,6 +11,7 @@
 #define __QETH_CORE_H__
 
 #include <linux/completion.h>
+#include <linux/debugfs.h>
 #include <linux/if.h>
 #include <linux/if_arp.h>
 #include <linux/etherdevice.h>
@@ -797,6 +798,7 @@ struct qeth_card {
 	struct qeth_channel data;
 
 	struct net_device *dev;
+	struct dentry *debugfs;
 	struct qeth_card_stats stats;
 	struct qeth_card_info info;
 	struct qeth_token token;
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 6b5d42a4501c..771282cb7aef 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -61,6 +61,7 @@ EXPORT_SYMBOL_GPL(qeth_core_header_cache);
 static struct kmem_cache *qeth_qdio_outbuf_cache;
 
 static struct device *qeth_core_root_dev;
+static struct dentry *qeth_debugfs_root;
 static struct lock_class_key qdio_out_skb_queue_key;
 
 static void qeth_issue_next_read_cb(struct qeth_card *card,
@@ -805,6 +806,24 @@ static void qeth_del_local_addrs6(struct qeth_card *card,
 	spin_unlock(&card->local_addrs6_lock);
 }
 
+static int qeth_debugfs_local_addr_show(struct seq_file *m, void *v)
+{
+	struct qeth_card *card = m->private;
+	struct qeth_local_addr *tmp;
+	unsigned int i;
+
+	rcu_read_lock();
+	hash_for_each_rcu(card->local_addrs4, i, tmp, hnode)
+		seq_printf(m, "%pI4\n", &tmp->addr.s6_addr32[3]);
+	hash_for_each_rcu(card->local_addrs6, i, tmp, hnode)
+		seq_printf(m, "%pI6c\n", &tmp->addr);
+	rcu_read_unlock();
+
+	return 0;
+}
+
+DEFINE_SHOW_ATTRIBUTE(qeth_debugfs_local_addr);
+
 static void qeth_issue_ipa_msg(struct qeth_ipa_cmd *cmd, int rc,
 		struct qeth_card *card)
 {
@@ -1608,6 +1627,11 @@ static struct qeth_card *qeth_alloc_card(struct ccwgroup_device *gdev)
 	if (!card->read_cmd)
 		goto out_read_cmd;
 
+	card->debugfs = debugfs_create_dir(dev_name(&gdev->dev),
+					   qeth_debugfs_root);
+	debugfs_create_file("local_addrs", 0400, card->debugfs, card,
+			    &qeth_debugfs_local_addr_fops);
+
 	card->qeth_service_level.seq_print = qeth_core_sl_print;
 	register_service_level(&card->qeth_service_level);
 	return card;
@@ -5085,9 +5109,11 @@ out_free_nothing:
 static void qeth_core_free_card(struct qeth_card *card)
 {
 	QETH_CARD_TEXT(card, 2, "freecrd");
+
+	unregister_service_level(&card->qeth_service_level);
+	debugfs_remove_recursive(card->debugfs);
 	qeth_put_cmd(card->read_cmd);
 	destroy_workqueue(card->event_wq);
-	unregister_service_level(&card->qeth_service_level);
 	dev_set_drvdata(&card->gdev->dev, NULL);
 	kfree(card);
 }
@@ -6967,6 +6993,8 @@ static int __init qeth_core_init(void)
 
 	pr_info("loading core functions\n");
 
+	qeth_debugfs_root = debugfs_create_dir("qeth", NULL);
+
 	rc = qeth_register_dbf_views();
 	if (rc)
 		goto dbf_err;
@@ -7008,6 +7036,7 @@ slab_err:
 register_err:
 	qeth_unregister_dbf_views();
 dbf_err:
+	debugfs_remove_recursive(qeth_debugfs_root);
 	pr_err("Initializing the qeth device driver failed\n");
 	return rc;
 }
@@ -7021,6 +7050,7 @@ static void __exit qeth_core_exit(void)
 	kmem_cache_destroy(qeth_core_header_cache);
 	root_device_unregister(qeth_core_root_dev);
 	qeth_unregister_dbf_views();
+	debugfs_remove_recursive(qeth_debugfs_root);
 	pr_info("core functions removed\n");
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 1d38c2e4941e1add2a89ef7abb22c918bbb3c103 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Wed, 6 May 2020 10:09:43 +0200
Subject: s390/qeth: extract helpers for next-hop lookup

These will be used in a subsequent patch.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core.h    | 29 ++++++++++++++++++++++-------
 drivers/s390/net/qeth_l3_main.c | 18 +++++-------------
 2 files changed, 27 insertions(+), 20 deletions(-)

diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 3d8b8e0f2438..6b0d37d2c638 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -34,6 +34,7 @@
 #include <net/ipv6.h>
 #include <net/if_inet6.h>
 #include <net/addrconf.h>
+#include <net/route.h>
 #include <net/sch_generic.h>
 #include <net/tcp.h>
 
@@ -234,11 +235,7 @@ struct qeth_hdr_layer3 {
 	__u16 frame_offset;
 	union {
 		/* TX: */
-		struct in6_addr ipv6_addr;
-		struct ipv4 {
-			u8 res[12];
-			u32 addr;
-		} ipv4;
+		struct in6_addr addr;
 		/* RX: */
 		struct rx {
 			u8 res1[2];
@@ -355,8 +352,7 @@ static inline bool qeth_l3_same_next_hop(struct qeth_hdr_layer3 *h1,
 					 struct qeth_hdr_layer3 *h2)
 {
 	return !((h1->flags ^ h2->flags) & QETH_HDR_IPV6) &&
-	       ipv6_addr_equal(&h1->next_hop.ipv6_addr,
-			       &h2->next_hop.ipv6_addr);
+	       ipv6_addr_equal(&h1->next_hop.addr, &h2->next_hop.addr);
 }
 
 struct qeth_local_addr {
@@ -945,6 +941,25 @@ static inline struct dst_entry *qeth_dst_check_rcu(struct sk_buff *skb, int ipv)
 	return dst;
 }
 
+static inline __be32 qeth_next_hop_v4_rcu(struct sk_buff *skb,
+					  struct dst_entry *dst)
+{
+	struct rtable *rt = (struct rtable *) dst;
+
+	return (rt) ? rt_nexthop(rt, ip_hdr(skb)->daddr) : ip_hdr(skb)->daddr;
+}
+
+static inline struct in6_addr *qeth_next_hop_v6_rcu(struct sk_buff *skb,
+						    struct dst_entry *dst)
+{
+	struct rt6_info *rt = (struct rt6_info *) dst;
+
+	if (rt && !ipv6_addr_any(&rt->rt6i_gateway))
+		return &rt->rt6i_gateway;
+	else
+		return &ipv6_hdr(skb)->daddr;
+}
+
 static inline void qeth_tx_csum(struct sk_buff *skb, u8 *flags, int ipv)
 {
 	*flags |= QETH_HDR_EXT_CSUM_TRANSP_REQ;
diff --git a/drivers/s390/net/qeth_l3_main.c b/drivers/s390/net/qeth_l3_main.c
index fec4ac41e946..1e50aa0297a3 100644
--- a/drivers/s390/net/qeth_l3_main.c
+++ b/drivers/s390/net/qeth_l3_main.c
@@ -1694,8 +1694,8 @@ static void qeth_l3_fill_header(struct qeth_qdio_out_q *queue,
 
 		if (skb->protocol == htons(ETH_P_AF_IUCV)) {
 			l3_hdr->flags = QETH_HDR_IPV6 | QETH_CAST_UNICAST;
-			l3_hdr->next_hop.ipv6_addr.s6_addr16[0] = htons(0xfe80);
-			memcpy(&l3_hdr->next_hop.ipv6_addr.s6_addr32[2],
+			l3_hdr->next_hop.addr.s6_addr16[0] = htons(0xfe80);
+			memcpy(&l3_hdr->next_hop.addr.s6_addr32[2],
 			       iucv_trans_hdr(skb)->destUserID, 8);
 			return;
 		}
@@ -1729,18 +1729,10 @@ static void qeth_l3_fill_header(struct qeth_qdio_out_q *queue,
 	l3_hdr->flags |= qeth_l3_cast_type_to_flag(cast_type);
 
 	if (ipv == 4) {
-		struct rtable *rt = (struct rtable *) dst;
-
-		*((__be32 *) &hdr->hdr.l3.next_hop.ipv4.addr) = (rt) ?
-				rt_nexthop(rt, ip_hdr(skb)->daddr) :
-				ip_hdr(skb)->daddr;
+		l3_hdr->next_hop.addr.s6_addr32[3] =
+					qeth_next_hop_v4_rcu(skb, dst);
 	} else if (ipv == 6) {
-		struct rt6_info *rt = (struct rt6_info *) dst;
-
-		if (rt && !ipv6_addr_any(&rt->rt6i_gateway))
-			l3_hdr->next_hop.ipv6_addr = rt->rt6i_gateway;
-		else
-			l3_hdr->next_hop.ipv6_addr = ipv6_hdr(skb)->daddr;
+		l3_hdr->next_hop.addr = *qeth_next_hop_v6_rcu(skb, dst);
 
 		hdr->hdr.l3.flags |= QETH_HDR_IPV6;
 		if (!IS_IQD(card))
-- 
cgit v1.2.3-59-g8ed1b


From c619e9a6f52f87b405053edc5abb55d3e36a8e7b Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Wed, 6 May 2020 10:09:44 +0200
Subject: s390/qeth: don't use restricted offloads for local traffic

Current OSA models don't support TSO for traffic to local next-hops, and
some old models didn't offer TX CSO for such packets either.

So as part of .ndo_features_check, check if a packet's next-hop resides
on the same OSA Adapter. Opt out from affected HW offloads accordingly.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core_main.c | 84 +++++++++++++++++++++++++++++++++++++--
 drivers/s390/net/qeth_l2_main.c   |  1 +
 2 files changed, 81 insertions(+), 4 deletions(-)

diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 771282cb7aef..1f18b38047a0 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -806,6 +806,58 @@ static void qeth_del_local_addrs6(struct qeth_card *card,
 	spin_unlock(&card->local_addrs6_lock);
 }
 
+static bool qeth_next_hop_is_local_v4(struct qeth_card *card,
+				      struct sk_buff *skb)
+{
+	struct qeth_local_addr *tmp;
+	bool is_local = false;
+	unsigned int key;
+	__be32 next_hop;
+
+	if (hash_empty(card->local_addrs4))
+		return false;
+
+	rcu_read_lock();
+	next_hop = qeth_next_hop_v4_rcu(skb, qeth_dst_check_rcu(skb, 4));
+	key = ipv4_addr_hash(next_hop);
+
+	hash_for_each_possible_rcu(card->local_addrs4, tmp, hnode, key) {
+		if (tmp->addr.s6_addr32[3] == next_hop) {
+			is_local = true;
+			break;
+		}
+	}
+	rcu_read_unlock();
+
+	return is_local;
+}
+
+static bool qeth_next_hop_is_local_v6(struct qeth_card *card,
+				      struct sk_buff *skb)
+{
+	struct qeth_local_addr *tmp;
+	struct in6_addr *next_hop;
+	bool is_local = false;
+	u32 key;
+
+	if (hash_empty(card->local_addrs6))
+		return false;
+
+	rcu_read_lock();
+	next_hop = qeth_next_hop_v6_rcu(skb, qeth_dst_check_rcu(skb, 6));
+	key = ipv6_addr_hash(next_hop);
+
+	hash_for_each_possible_rcu(card->local_addrs6, tmp, hnode, key) {
+		if (ipv6_addr_equal(&tmp->addr, next_hop)) {
+			is_local = true;
+			break;
+		}
+	}
+	rcu_read_unlock();
+
+	return is_local;
+}
+
 static int qeth_debugfs_local_addr_show(struct seq_file *m, void *v)
 {
 	struct qeth_card *card = m->private;
@@ -6578,10 +6630,6 @@ static int qeth_set_csum_on(struct qeth_card *card, enum qeth_ipa_funcs cstype,
 	if (lp2lp)
 		*lp2lp = qeth_ipa_caps_enabled(&caps, QETH_IPA_CHECKSUM_LP2LP);
 
-	if (lp2lp && !*lp2lp)
-		dev_warn(&card->gdev->dev,
-			 "Hardware checksumming is performed only if %s and its peer use different OSA Express 3 ports\n",
-			 QETH_CARD_IFNAME(card));
 	return 0;
 }
 
@@ -6816,6 +6864,34 @@ netdev_features_t qeth_features_check(struct sk_buff *skb,
 				      struct net_device *dev,
 				      netdev_features_t features)
 {
+	/* Traffic with local next-hop is not eligible for some offloads: */
+	if (skb->ip_summed == CHECKSUM_PARTIAL) {
+		struct qeth_card *card = dev->ml_priv;
+		netdev_features_t restricted = 0;
+
+		if (skb_is_gso(skb) && !netif_needs_gso(skb, features))
+			restricted |= NETIF_F_ALL_TSO;
+
+		switch (vlan_get_protocol(skb)) {
+		case htons(ETH_P_IP):
+			if (!card->info.has_lp2lp_cso_v4)
+				restricted |= NETIF_F_IP_CSUM;
+
+			if (restricted && qeth_next_hop_is_local_v4(card, skb))
+				features &= ~restricted;
+			break;
+		case htons(ETH_P_IPV6):
+			if (!card->info.has_lp2lp_cso_v6)
+				restricted |= NETIF_F_IPV6_CSUM;
+
+			if (restricted && qeth_next_hop_is_local_v6(card, skb))
+				features &= ~restricted;
+			break;
+		default:
+			break;
+		}
+	}
+
 	/* GSO segmentation builds skbs with
 	 *	a (small) linear part for the headers, and
 	 *	page frags for the data.
diff --git a/drivers/s390/net/qeth_l2_main.c b/drivers/s390/net/qeth_l2_main.c
index 47f624b37040..da47e423e1b1 100644
--- a/drivers/s390/net/qeth_l2_main.c
+++ b/drivers/s390/net/qeth_l2_main.c
@@ -710,6 +710,7 @@ static int qeth_l2_setup_netdev(struct qeth_card *card)
 
 	if (card->dev->hw_features & (NETIF_F_TSO | NETIF_F_TSO6)) {
 		card->dev->needed_headroom = sizeof(struct qeth_hdr_tso);
+		netif_keep_dst(card->dev);
 		netif_set_gso_max_size(card->dev,
 				       PAGE_SIZE * (QDIO_MAX_ELEMENTS_PER_BUFFER - 1));
 	}
-- 
cgit v1.2.3-59-g8ed1b


From fff1aceb880f509fa02341acf0adca905513f139 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Wed, 6 May 2020 10:09:45 +0200
Subject: s390/qeth: merge TX skb mapping code

Merge the __qeth_fill_buffer() helper into its only caller. This way all
mapping-related context is in one place, and we can make some more use
of it in a subsequent patch.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core_main.c | 64 +++++++++++++++++----------------------
 1 file changed, 27 insertions(+), 37 deletions(-)

diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 1f18b38047a0..9c9a6edb5384 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -4081,15 +4081,39 @@ static bool qeth_iqd_may_bulk(struct qeth_qdio_out_q *queue,
 	       qeth_l3_iqd_same_vlan(&prev_hdr->hdr.l3, &curr_hdr->hdr.l3);
 }
 
-static unsigned int __qeth_fill_buffer(struct sk_buff *skb,
-				       struct qeth_qdio_out_buffer *buf,
-				       bool is_first_elem, unsigned int offset)
+/**
+ * qeth_fill_buffer() - map skb into an output buffer
+ * @buf:	buffer to transport the skb
+ * @skb:	skb to map into the buffer
+ * @hdr:	qeth_hdr for this skb. Either at skb->data, or allocated
+ *		from qeth_core_header_cache.
+ * @offset:	when mapping the skb, start at skb->data + offset
+ * @hd_len:	if > 0, build a dedicated header element of this size
+ */
+static unsigned int qeth_fill_buffer(struct qeth_qdio_out_buffer *buf,
+				     struct sk_buff *skb, struct qeth_hdr *hdr,
+				     unsigned int offset, unsigned int hd_len)
 {
 	struct qdio_buffer *buffer = buf->buffer;
 	int element = buf->next_element_to_fill;
 	int length = skb_headlen(skb) - offset;
 	char *data = skb->data + offset;
 	unsigned int elem_length, cnt;
+	bool is_first_elem = true;
+
+	__skb_queue_tail(&buf->skb_list, skb);
+
+	/* build dedicated element for HW Header */
+	if (hd_len) {
+		is_first_elem = false;
+
+		buffer->element[element].addr = virt_to_phys(hdr);
+		buffer->element[element].length = hd_len;
+		buffer->element[element].eflags = SBAL_EFLAGS_FIRST_FRAG;
+		/* remember to free cache-allocated HW header: */
+		buf->is_header[element] = ((void *)hdr != skb->data);
+		element++;
+	}
 
 	/* map linear part into buffer element(s) */
 	while (length > 0) {
@@ -4143,40 +4167,6 @@ static unsigned int __qeth_fill_buffer(struct sk_buff *skb,
 	return element;
 }
 
-/**
- * qeth_fill_buffer() - map skb into an output buffer
- * @buf:	buffer to transport the skb
- * @skb:	skb to map into the buffer
- * @hdr:	qeth_hdr for this skb. Either at skb->data, or allocated
- *		from qeth_core_header_cache.
- * @offset:	when mapping the skb, start at skb->data + offset
- * @hd_len:	if > 0, build a dedicated header element of this size
- */
-static unsigned int qeth_fill_buffer(struct qeth_qdio_out_buffer *buf,
-				     struct sk_buff *skb, struct qeth_hdr *hdr,
-				     unsigned int offset, unsigned int hd_len)
-{
-	struct qdio_buffer *buffer = buf->buffer;
-	bool is_first_elem = true;
-
-	__skb_queue_tail(&buf->skb_list, skb);
-
-	/* build dedicated header element */
-	if (hd_len) {
-		int element = buf->next_element_to_fill;
-		is_first_elem = false;
-
-		buffer->element[element].addr = virt_to_phys(hdr);
-		buffer->element[element].length = hd_len;
-		buffer->element[element].eflags = SBAL_EFLAGS_FIRST_FRAG;
-		/* remember to free cache-allocated qeth_hdr: */
-		buf->is_header[element] = ((void *)hdr != skb->data);
-		buf->next_element_to_fill++;
-	}
-
-	return __qeth_fill_buffer(skb, buf, is_first_elem, offset);
-}
-
 static int __qeth_xmit(struct qeth_card *card, struct qeth_qdio_out_q *queue,
 		       struct sk_buff *skb, unsigned int elements,
 		       struct qeth_hdr *hdr, unsigned int offset,
-- 
cgit v1.2.3-59-g8ed1b


From a875fd1620ffb65f4e0d488f7fe1fba62cf9fedd Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Wed, 6 May 2020 10:09:46 +0200
Subject: s390/qeth: indicate contiguous TX buffer elements

The TX path usually maps the full content of a page into a buffer
element. But there's specific skb layouts (ie. linearized TSO skbs)
where the HW header (1) requires a separate buffer element, and (2) is
page-contiguous with the packet data that's mapped into the next buffer
element.
Flag such buffer elements accordingly, so that HW can optimize its data
access for them.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core_main.c | 12 ++++++++++--
 1 file changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 9c9a6edb5384..4d1d053eebb7 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -4110,8 +4110,16 @@ static unsigned int qeth_fill_buffer(struct qeth_qdio_out_buffer *buf,
 		buffer->element[element].addr = virt_to_phys(hdr);
 		buffer->element[element].length = hd_len;
 		buffer->element[element].eflags = SBAL_EFLAGS_FIRST_FRAG;
-		/* remember to free cache-allocated HW header: */
-		buf->is_header[element] = ((void *)hdr != skb->data);
+
+		/* HW header is allocated from cache: */
+		if ((void *)hdr != skb->data)
+			buf->is_header[element] = 1;
+		/* HW header was pushed and is contiguous with linear part: */
+		else if (length > 0 && !PAGE_ALIGNED(data) &&
+			 (data == (char *)hdr + hd_len))
+			buffer->element[element].eflags |=
+				SBAL_EFLAGS_CONTIGUOUS;
+
 		element++;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 6ee091efa7b59829448d0f9287ea62c1f448a318 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Wed, 6 May 2020 10:09:47 +0200
Subject: s390/qeth: set TX IRQ marker on last buffer in a group

When qeth_flush_buffers() gets called for a group of TX buffers
(currently up to 2 for OSA-style devices), the code iterates over each
buffer for some final processing.

During this processing, it sets the TX IRQ marker on the leading buffer
rather than the last one. This can result in delayed TX completion of
the trailing buffers. So pull the IRQ marker code out of the loop, and
apply it to the final buffer.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core_main.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 4d1d053eebb7..164cc7f377fc 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -3617,11 +3617,11 @@ static int qeth_switch_to_nonpacking_if_needed(struct qeth_qdio_out_q *queue)
 static void qeth_flush_buffers(struct qeth_qdio_out_q *queue, int index,
 			       int count)
 {
+	struct qeth_qdio_out_buffer *buf = queue->bufs[index];
+	unsigned int qdio_flags = QDIO_FLAG_SYNC_OUTPUT;
 	struct qeth_card *card = queue->card;
-	struct qeth_qdio_out_buffer *buf;
 	int rc;
 	int i;
-	unsigned int qdio_flags;
 
 	for (i = index; i < index + count; ++i) {
 		unsigned int bidx = QDIO_BUFNR(i);
@@ -3638,9 +3638,10 @@ static void qeth_flush_buffers(struct qeth_qdio_out_q *queue, int index,
 		if (IS_IQD(card)) {
 			skb_queue_walk(&buf->skb_list, skb)
 				skb_tx_timestamp(skb);
-			continue;
 		}
+	}
 
+	if (!IS_IQD(card)) {
 		if (!queue->do_pack) {
 			if ((atomic_read(&queue->used_buffers) >=
 				(QETH_HIGH_WATERMARK_PACK -
@@ -3665,12 +3666,12 @@ static void qeth_flush_buffers(struct qeth_qdio_out_q *queue, int index,
 				buf->buffer->element[0].sflags |= SBAL_SFLAGS0_PCI_REQ;
 			}
 		}
+
+		if (atomic_read(&queue->set_pci_flags_count))
+			qdio_flags |= QDIO_FLAG_PCI_OUT;
 	}
 
 	QETH_TXQ_STAT_INC(queue, doorbell);
-	qdio_flags = QDIO_FLAG_SYNC_OUTPUT;
-	if (atomic_read(&queue->set_pci_flags_count))
-		qdio_flags |= QDIO_FLAG_PCI_OUT;
 	rc = do_QDIO(CARD_DDEV(queue->card), qdio_flags,
 		     queue->queue_no, index, count);
 
-- 
cgit v1.2.3-59-g8ed1b


From 7005b7547680963ad127c5069c6be082272105bc Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Wed, 6 May 2020 10:09:48 +0200
Subject: s390/qeth: return error when starting a reset fails

When starting the reset worker via sysfs is unsuccessful, return an
error to the user.
Modernize the sysfs input parsing while at it.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Reviewed-by: Alexandra Winter <wintera@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/qeth_core.h      |  2 +-
 drivers/s390/net/qeth_core_main.c | 26 +++++++++++++++++---------
 drivers/s390/net/qeth_core_sys.c  | 15 +++++++++------
 3 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/drivers/s390/net/qeth_core.h b/drivers/s390/net/qeth_core.h
index 6b0d37d2c638..51ea56b73a97 100644
--- a/drivers/s390/net/qeth_core.h
+++ b/drivers/s390/net/qeth_core.h
@@ -1053,7 +1053,7 @@ struct qeth_cmd_buffer *qeth_get_diag_cmd(struct qeth_card *card,
 void qeth_notify_cmd(struct qeth_cmd_buffer *iob, int reason);
 void qeth_put_cmd(struct qeth_cmd_buffer *iob);
 
-void qeth_schedule_recovery(struct qeth_card *);
+int qeth_schedule_recovery(struct qeth_card *card);
 void qeth_flush_local_addrs(struct qeth_card *card);
 int qeth_poll(struct napi_struct *napi, int budget);
 void qeth_clear_ipacmd_list(struct qeth_card *);
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index 164cc7f377fc..c0ab6e7bc129 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -1131,16 +1131,18 @@ static int qeth_set_thread_start_bit(struct qeth_card *card,
 		unsigned long thread)
 {
 	unsigned long flags;
+	int rc = 0;
 
 	spin_lock_irqsave(&card->thread_mask_lock, flags);
-	if (!(card->thread_allowed_mask & thread) ||
-	      (card->thread_start_mask & thread)) {
-		spin_unlock_irqrestore(&card->thread_mask_lock, flags);
-		return -EPERM;
-	}
-	card->thread_start_mask |= thread;
+	if (!(card->thread_allowed_mask & thread))
+		rc = -EPERM;
+	else if (card->thread_start_mask & thread)
+		rc = -EBUSY;
+	else
+		card->thread_start_mask |= thread;
 	spin_unlock_irqrestore(&card->thread_mask_lock, flags);
-	return 0;
+
+	return rc;
 }
 
 static void qeth_clear_thread_start_bit(struct qeth_card *card,
@@ -1193,11 +1195,17 @@ static int qeth_do_run_thread(struct qeth_card *card, unsigned long thread)
 	return rc;
 }
 
-void qeth_schedule_recovery(struct qeth_card *card)
+int qeth_schedule_recovery(struct qeth_card *card)
 {
+	int rc;
+
 	QETH_CARD_TEXT(card, 2, "startrec");
-	if (qeth_set_thread_start_bit(card, QETH_RECOVER_THREAD) == 0)
+
+	rc = qeth_set_thread_start_bit(card, QETH_RECOVER_THREAD);
+	if (!rc)
 		schedule_work(&card->kernel_thread_starter);
+
+	return rc;
 }
 
 static int qeth_get_problem(struct qeth_card *card, struct ccw_device *cdev,
diff --git a/drivers/s390/net/qeth_core_sys.c b/drivers/s390/net/qeth_core_sys.c
index d7e429f6631e..c901c942fed7 100644
--- a/drivers/s390/net/qeth_core_sys.c
+++ b/drivers/s390/net/qeth_core_sys.c
@@ -275,17 +275,20 @@ static ssize_t qeth_dev_recover_store(struct device *dev,
 		struct device_attribute *attr, const char *buf, size_t count)
 {
 	struct qeth_card *card = dev_get_drvdata(dev);
-	char *tmp;
-	int i;
+	bool reset;
+	int rc;
+
+	rc = kstrtobool(buf, &reset);
+	if (rc)
+		return rc;
 
 	if (!qeth_card_hw_is_reachable(card))
 		return -EPERM;
 
-	i = simple_strtoul(buf, &tmp, 16);
-	if (i == 1)
-		qeth_schedule_recovery(card);
+	if (reset)
+		rc = qeth_schedule_recovery(card);
 
-	return count;
+	return rc ? rc : count;
 }
 
 static DEVICE_ATTR(recover, 0200, NULL, qeth_dev_recover_store);
-- 
cgit v1.2.3-59-g8ed1b


From dc9c404f98a98b470d7cad88a7cbed16dd273d39 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Wed, 6 May 2020 10:09:49 +0200
Subject: s390/qeth: clean up Kconfig help text

Remove a stale doc link. While at it also reword the help text to get
rid of an outdated marketing term.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/Kconfig | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/s390/net/Kconfig b/drivers/s390/net/Kconfig
index 3850a0f5f0bc..53120e68796e 100644
--- a/drivers/s390/net/Kconfig
+++ b/drivers/s390/net/Kconfig
@@ -63,12 +63,9 @@ config QETH
 	prompt "Gigabit Ethernet device support"
 	depends on CCW && NETDEVICES && IP_MULTICAST && QDIO && ETHERNET
 	help
-	  This driver supports the IBM System z OSA Express adapters
-	  in QDIO mode (all media types), HiperSockets interfaces and z/VM
-	  virtual NICs for Guest LAN and VSWITCH.
-	
-	  For details please refer to the documentation provided by IBM at
-	  <http://www.ibm.com/developerworks/linux/linux390>
+	  This driver supports IBM's OSA Express network adapters in QDIO mode,
+	  HiperSockets interfaces and z/VM virtual NICs for Guest LAN and
+	  VSWITCH.
 
 	  To compile this driver as a module, choose M.
 	  The module name is qeth.
-- 
cgit v1.2.3-59-g8ed1b


From 1f3e915e316c4393ce940ab184c176b7202f3ff7 Mon Sep 17 00:00:00 2001
From: Yunjian Wang <wangyunjian@huawei.com>
Date: Wed, 6 May 2020 17:25:14 +0800
Subject: net: renesas: Fix use correct return type for ndo_start_xmit()

The method ndo_start_xmit() returns a value of type netdev_tx_t. Fix
the ndo function to use the correct type.

Signed-off-by: Yunjian Wang <wangyunjian@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/sh_eth.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/renesas/sh_eth.c b/drivers/net/ethernet/renesas/sh_eth.c
index 8ed73f44405d..f45331ed90b0 100644
--- a/drivers/net/ethernet/renesas/sh_eth.c
+++ b/drivers/net/ethernet/renesas/sh_eth.c
@@ -2472,7 +2472,8 @@ static void sh_eth_tx_timeout(struct net_device *ndev, unsigned int txqueue)
 }
 
 /* Packet transmit function */
-static int sh_eth_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+static netdev_tx_t sh_eth_start_xmit(struct sk_buff *skb,
+				     struct net_device *ndev)
 {
 	struct sh_eth_private *mdp = netdev_priv(ndev);
 	struct sh_eth_txdesc *txdesc;
-- 
cgit v1.2.3-59-g8ed1b


From d54fc481bd828afee199e4853020c5ecd9d79778 Mon Sep 17 00:00:00 2001
From: Yunjian Wang <wangyunjian@huawei.com>
Date: Wed, 6 May 2020 19:04:59 +0800
Subject: net: socionext: Fix use correct return type for ndo_start_xmit()

The method ndo_start_xmit() returns a value of type netdev_tx_t. Fix
the ndo function to use the correct type.

Signed-off-by: Yunjian Wang <wangyunjian@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/socionext/sni_ave.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/socionext/sni_ave.c b/drivers/net/ethernet/socionext/sni_ave.c
index 67ddf782d98a..f2638446b62e 100644
--- a/drivers/net/ethernet/socionext/sni_ave.c
+++ b/drivers/net/ethernet/socionext/sni_ave.c
@@ -1394,7 +1394,7 @@ static int ave_stop(struct net_device *ndev)
 	return 0;
 }
 
-static int ave_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+static netdev_tx_t ave_start_xmit(struct sk_buff *skb, struct net_device *ndev)
 {
 	struct ave_private *priv = netdev_priv(ndev);
 	u32 proc_idx, done_idx, ndesc, cmdsts;
-- 
cgit v1.2.3-59-g8ed1b


From e910a3993818ab69fcb9d791cbbe07bb153c0660 Mon Sep 17 00:00:00 2001
From: Yunjian Wang <wangyunjian@huawei.com>
Date: Wed, 6 May 2020 19:13:08 +0800
Subject: net: mediatek: Fix use correct return type for ndo_start_xmit()

The method ndo_start_xmit() returns a value of type netdev_tx_t. Fix
the ndo function to use the correct type.

Signed-off-by: Yunjian Wang <wangyunjian@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_eth_soc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_eth_soc.c b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
index 2822268220f7..f6a1f8666f95 100644
--- a/drivers/net/ethernet/mediatek/mtk_eth_soc.c
+++ b/drivers/net/ethernet/mediatek/mtk_eth_soc.c
@@ -1122,7 +1122,7 @@ static void mtk_stop_queue(struct mtk_eth *eth)
 	}
 }
 
-static int mtk_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t mtk_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct mtk_mac *mac = netdev_priv(dev);
 	struct mtk_eth *eth = mac->hw;
-- 
cgit v1.2.3-59-g8ed1b


From df1577cbaa87d8a29bf18f41d2ba82668d59d106 Mon Sep 17 00:00:00 2001
From: ChenTao <chentao107@huawei.com>
Date: Wed, 6 May 2020 19:22:17 +0800
Subject: net: enetc: Make some symbols static

Fix the following warning:

drivers/net/ethernet/freescale/enetc/enetc_qos.c:427:20: warning:
symbol 'enetc_act_fwd' was not declared. Should it be static?
drivers/net/ethernet/freescale/enetc/enetc_qos.c:966:20: warning:
symbol 'enetc_check_flow_actions' was not declared. Should it be static?

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: ChenTao <chentao107@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc_qos.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c
index 48e589e9d0f7..172acb602ccb 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c
@@ -424,7 +424,7 @@ struct enetc_psfp {
 	spinlock_t psfp_lock; /* spinlock for the struct enetc_psfp r/w */
 };
 
-struct actions_fwd enetc_act_fwd[] = {
+static struct actions_fwd enetc_act_fwd[] = {
 	{
 		BIT(FLOW_ACTION_GATE),
 		BIT(FLOW_DISSECTOR_KEY_ETH_ADDRS),
@@ -963,7 +963,8 @@ revert_sid:
 	return err;
 }
 
-struct actions_fwd *enetc_check_flow_actions(u64 acts, unsigned int inputkeys)
+static struct actions_fwd *enetc_check_flow_actions(u64 acts,
+						    unsigned int inputkeys)
 {
 	int i;
 
-- 
cgit v1.2.3-59-g8ed1b


From 1cfecc2353c24b37eea69d9b9a606ca9f473f9bc Mon Sep 17 00:00:00 2001
From: Yunjian Wang <wangyunjian@huawei.com>
Date: Wed, 6 May 2020 19:38:46 +0800
Subject: net: lantiq: Fix use correct return type for ndo_start_xmit()

The method ndo_start_xmit() returns a value of type netdev_tx_t. Fix
the ndo function to use the correct type.

Signed-off-by: Yunjian Wang <wangyunjian@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/lantiq_xrx200.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/lantiq_xrx200.c b/drivers/net/ethernet/lantiq_xrx200.c
index 900affbdcc0e..1645e4e7ebdb 100644
--- a/drivers/net/ethernet/lantiq_xrx200.c
+++ b/drivers/net/ethernet/lantiq_xrx200.c
@@ -276,7 +276,8 @@ static int xrx200_tx_housekeeping(struct napi_struct *napi, int budget)
 	return pkts;
 }
 
-static int xrx200_start_xmit(struct sk_buff *skb, struct net_device *net_dev)
+static netdev_tx_t xrx200_start_xmit(struct sk_buff *skb,
+				     struct net_device *net_dev)
 {
 	struct xrx200_priv *priv = netdev_priv(net_dev);
 	struct xrx200_chan *ch = &priv->chan_tx;
-- 
cgit v1.2.3-59-g8ed1b


From 10786531cf996d0d1a0f901d9d559eb947013ac3 Mon Sep 17 00:00:00 2001
From: Yunjian Wang <wangyunjian@huawei.com>
Date: Wed, 6 May 2020 20:10:28 +0800
Subject: net: moxa: Fix use correct return type for ndo_start_xmit()

The method ndo_start_xmit() returns a value of type netdev_tx_t. Fix
the ndo function to use the correct type.

Signed-off-by: Yunjian Wang <wangyunjian@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/moxa/moxart_ether.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/moxa/moxart_ether.c b/drivers/net/ethernet/moxa/moxart_ether.c
index e1651756bf9d..1b25cc42f442 100644
--- a/drivers/net/ethernet/moxa/moxart_ether.c
+++ b/drivers/net/ethernet/moxa/moxart_ether.c
@@ -331,14 +331,15 @@ static irqreturn_t moxart_mac_interrupt(int irq, void *dev_id)
 	return IRQ_HANDLED;
 }
 
-static int moxart_mac_start_xmit(struct sk_buff *skb, struct net_device *ndev)
+static netdev_tx_t moxart_mac_start_xmit(struct sk_buff *skb,
+					 struct net_device *ndev)
 {
 	struct moxart_mac_priv_t *priv = netdev_priv(ndev);
 	void *desc;
 	unsigned int len;
 	unsigned int tx_head;
 	u32 txdes1;
-	int ret = NETDEV_TX_BUSY;
+	netdev_tx_t ret = NETDEV_TX_BUSY;
 
 	spin_lock_irq(&priv->txlock);
 
-- 
cgit v1.2.3-59-g8ed1b


From 787e4a79cd5ffc46fe5fa1ce9464e79143423ac0 Mon Sep 17 00:00:00 2001
From: Yunjian Wang <wangyunjian@huawei.com>
Date: Wed, 6 May 2020 20:21:45 +0800
Subject: net: cpmac: Fix use correct return type for ndo_start_xmit()

The method ndo_start_xmit() returns a value of type netdev_tx_t. Fix
the ndo function to use the correct type.

Signed-off-by: Yunjian Wang <wangyunjian@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/cpmac.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/ti/cpmac.c b/drivers/net/ethernet/ti/cpmac.c
index a530afe3ce12..c20715107075 100644
--- a/drivers/net/ethernet/ti/cpmac.c
+++ b/drivers/net/ethernet/ti/cpmac.c
@@ -532,7 +532,7 @@ fatal_error:
 
 }
 
-static int cpmac_start_xmit(struct sk_buff *skb, struct net_device *dev)
+static netdev_tx_t cpmac_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	int queue;
 	unsigned int len;
-- 
cgit v1.2.3-59-g8ed1b


From 9532131f6995002e88390b8b6572a44531c1fae0 Mon Sep 17 00:00:00 2001
From: Yunjian Wang <wangyunjian@huawei.com>
Date: Wed, 6 May 2020 20:55:52 +0800
Subject: net: 7990: Fix use correct return type for ndo_start_xmit()

The method ndo_start_xmit() returns a value of type netdev_tx_t. Fix
the ndo function to use the correct type.

Signed-off-by: Yunjian Wang <wangyunjian@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amd/7990.c | 2 +-
 drivers/net/ethernet/amd/7990.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/amd/7990.c b/drivers/net/ethernet/amd/7990.c
index cf3562e82ca9..50fb66369415 100644
--- a/drivers/net/ethernet/amd/7990.c
+++ b/drivers/net/ethernet/amd/7990.c
@@ -536,7 +536,7 @@ void lance_tx_timeout(struct net_device *dev, unsigned int txqueue)
 }
 EXPORT_SYMBOL_GPL(lance_tx_timeout);
 
-int lance_start_xmit(struct sk_buff *skb, struct net_device *dev)
+netdev_tx_t lance_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct lance_private *lp = netdev_priv(dev);
 	volatile struct lance_init_block *ib = lp->init_block;
diff --git a/drivers/net/ethernet/amd/7990.h b/drivers/net/ethernet/amd/7990.h
index 8266b3c1fefc..e53551daeea1 100644
--- a/drivers/net/ethernet/amd/7990.h
+++ b/drivers/net/ethernet/amd/7990.h
@@ -241,7 +241,7 @@ struct lance_private {
 /* Now the prototypes we export */
 int lance_open(struct net_device *dev);
 int lance_close(struct net_device *dev);
-int lance_start_xmit(struct sk_buff *skb, struct net_device *dev);
+netdev_tx_t lance_start_xmit(struct sk_buff *skb, struct net_device *dev);
 void lance_set_multicast(struct net_device *dev);
 void lance_tx_timeout(struct net_device *dev, unsigned int txqueue);
 #ifdef CONFIG_NET_POLL_CONTROLLER
-- 
cgit v1.2.3-59-g8ed1b


From 857b412511c87ed7137e63b90b2de020c24660e9 Mon Sep 17 00:00:00 2001
From: Ayush Sawal <ayush.sawal@chelsio.com>
Date: Wed, 6 May 2020 20:17:19 +0530
Subject: Revert "crypto: chelsio - Inline single pdu only"

This reverts commit 27c6feb0fb33a665a746346e76714826a5be5d10.

For ipsec offload the chelsio's ethernet driver expects a single mtu
sized packet.

But when ipsec traffic is running using iperf, most of the packets in
that traffic are gso packets(large sized skbs) because GSO is enabled by
default in TCP, due to this commit 0a6b2a1dc2a2 ("tcp: switch to GSO
being always on"), so chcr_ipsec_offload_ok() receives a gso
skb(with gso_size non zero).

Due to the check in chcr_ipsec_offload_ok(), this function returns false
for most of the packet, then ipsec offload is skipped and the skb goes
out taking the coprocessor path which reduces the bandwidth for inline
ipsec.

If this check is removed then for most of the packets(large sized skbs)
the chcr_ipsec_offload_ok() returns true and then as GSO is on, the
segmentation of the packet happens in the kernel and then finally the
driver_xmit is called, which receives a segmented mtu sized packet which
is what the driver expects for ipsec offload. So this case becomes
unnecessary here, therefore removing it.

Signed-off-by: Ayush Sawal <ayush.sawal@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/crypto/chelsio/chcr_ipsec.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/crypto/chelsio/chcr_ipsec.c b/drivers/crypto/chelsio/chcr_ipsec.c
index 9fd3b9d1ec2f..d25689837b26 100644
--- a/drivers/crypto/chelsio/chcr_ipsec.c
+++ b/drivers/crypto/chelsio/chcr_ipsec.c
@@ -294,9 +294,6 @@ static bool chcr_ipsec_offload_ok(struct sk_buff *skb, struct xfrm_state *x)
 		if (ipv6_ext_hdr(ipv6_hdr(skb)->nexthdr))
 			return false;
 	}
-	/* Inline single pdu */
-	if (skb_shinfo(skb)->gso_size)
-		return false;
 	return true;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 6349084746ff4f5f7ebc748e4b2a890f8c57b129 Mon Sep 17 00:00:00 2001
From: Michael Walle <michael@walle.cc>
Date: Wed, 6 May 2020 16:53:13 +0200
Subject: net: phy: add concept of shared storage for PHYs

There are packages which contain multiple PHY devices, eg. a quad PHY
transceiver. Provide functions to allocate and free shared storage.

Usually, a quad PHY contains global registers, which don't belong to any
PHY. Provide convenience functions to access these registers.

Signed-off-by: Michael Walle <michael@walle.cc>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio_bus.c   |   1 +
 drivers/net/phy/phy_device.c | 138 +++++++++++++++++++++++++++++++++++++++++++
 include/linux/phy.h          |  89 ++++++++++++++++++++++++++++
 3 files changed, 228 insertions(+)

diff --git a/drivers/net/phy/mdio_bus.c b/drivers/net/phy/mdio_bus.c
index 3e79b96fa344..255fdfcc13a6 100644
--- a/drivers/net/phy/mdio_bus.c
+++ b/drivers/net/phy/mdio_bus.c
@@ -614,6 +614,7 @@ int __mdiobus_register(struct mii_bus *bus, struct module *owner)
 	}
 
 	mutex_init(&bus->mdio_lock);
+	mutex_init(&bus->shared_lock);
 
 	/* de-assert bus level PHY GPIO reset */
 	gpiod = devm_gpiod_get_optional(&bus->dev, "reset", GPIOD_OUT_LOW);
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 7e1ddd5745d2..b1c5e4503bc4 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1461,6 +1461,144 @@ bool phy_driver_is_genphy_10g(struct phy_device *phydev)
 }
 EXPORT_SYMBOL_GPL(phy_driver_is_genphy_10g);
 
+/**
+ * phy_package_join - join a common PHY group
+ * @phydev: target phy_device struct
+ * @addr: cookie and PHY address for global register access
+ * @priv_size: if non-zero allocate this amount of bytes for private data
+ *
+ * This joins a PHY group and provides a shared storage for all phydevs in
+ * this group. This is intended to be used for packages which contain
+ * more than one PHY, for example a quad PHY transceiver.
+ *
+ * The addr parameter serves as a cookie which has to have the same value
+ * for all members of one group and as a PHY address to access generic
+ * registers of a PHY package. Usually, one of the PHY addresses of the
+ * different PHYs in the package provides access to these global registers.
+ * The address which is given here, will be used in the phy_package_read()
+ * and phy_package_write() convenience functions. If your PHY doesn't have
+ * global registers you can just pick any of the PHY addresses.
+ *
+ * This will set the shared pointer of the phydev to the shared storage.
+ * If this is the first call for a this cookie the shared storage will be
+ * allocated. If priv_size is non-zero, the given amount of bytes are
+ * allocated for the priv member.
+ *
+ * Returns < 1 on error, 0 on success. Esp. calling phy_package_join()
+ * with the same cookie but a different priv_size is an error.
+ */
+int phy_package_join(struct phy_device *phydev, int addr, size_t priv_size)
+{
+	struct mii_bus *bus = phydev->mdio.bus;
+	struct phy_package_shared *shared;
+	int ret;
+
+	if (addr < 0 || addr >= PHY_MAX_ADDR)
+		return -EINVAL;
+
+	mutex_lock(&bus->shared_lock);
+	shared = bus->shared[addr];
+	if (!shared) {
+		ret = -ENOMEM;
+		shared = kzalloc(sizeof(*shared), GFP_KERNEL);
+		if (!shared)
+			goto err_unlock;
+		if (priv_size) {
+			shared->priv = kzalloc(priv_size, GFP_KERNEL);
+			if (!shared->priv)
+				goto err_free;
+			shared->priv_size = priv_size;
+		}
+		shared->addr = addr;
+		refcount_set(&shared->refcnt, 1);
+		bus->shared[addr] = shared;
+	} else {
+		ret = -EINVAL;
+		if (priv_size && priv_size != shared->priv_size)
+			goto err_unlock;
+		refcount_inc(&shared->refcnt);
+	}
+	mutex_unlock(&bus->shared_lock);
+
+	phydev->shared = shared;
+
+	return 0;
+
+err_free:
+	kfree(shared);
+err_unlock:
+	mutex_unlock(&bus->shared_lock);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(phy_package_join);
+
+/**
+ * phy_package_leave - leave a common PHY group
+ * @phydev: target phy_device struct
+ *
+ * This leaves a PHY group created by phy_package_join(). If this phydev
+ * was the last user of the shared data between the group, this data is
+ * freed. Resets the phydev->shared pointer to NULL.
+ */
+void phy_package_leave(struct phy_device *phydev)
+{
+	struct phy_package_shared *shared = phydev->shared;
+	struct mii_bus *bus = phydev->mdio.bus;
+
+	if (!shared)
+		return;
+
+	if (refcount_dec_and_mutex_lock(&shared->refcnt, &bus->shared_lock)) {
+		bus->shared[shared->addr] = NULL;
+		mutex_unlock(&bus->shared_lock);
+		kfree(shared->priv);
+		kfree(shared);
+	}
+
+	phydev->shared = NULL;
+}
+EXPORT_SYMBOL_GPL(phy_package_leave);
+
+static void devm_phy_package_leave(struct device *dev, void *res)
+{
+	phy_package_leave(*(struct phy_device **)res);
+}
+
+/**
+ * devm_phy_package_join - resource managed phy_package_join()
+ * @dev: device that is registering this PHY package
+ * @phydev: target phy_device struct
+ * @addr: cookie and PHY address for global register access
+ * @priv_size: if non-zero allocate this amount of bytes for private data
+ *
+ * Managed phy_package_join(). Shared storage fetched by this function,
+ * phy_package_leave() is automatically called on driver detach. See
+ * phy_package_join() for more information.
+ */
+int devm_phy_package_join(struct device *dev, struct phy_device *phydev,
+			  int addr, size_t priv_size)
+{
+	struct phy_device **ptr;
+	int ret;
+
+	ptr = devres_alloc(devm_phy_package_leave, sizeof(*ptr),
+			   GFP_KERNEL);
+	if (!ptr)
+		return -ENOMEM;
+
+	ret = phy_package_join(phydev, addr, priv_size);
+
+	if (!ret) {
+		*ptr = phydev;
+		devres_add(dev, ptr);
+	} else {
+		devres_free(ptr);
+	}
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(devm_phy_package_join);
+
 /**
  * phy_detach - detach a PHY device from its network device
  * @phydev: target phy_device struct
diff --git a/include/linux/phy.h b/include/linux/phy.h
index e2bfb9240587..1d36ac608159 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -25,6 +25,7 @@
 #include <linux/u64_stats_sync.h>
 #include <linux/irqreturn.h>
 #include <linux/iopoll.h>
+#include <linux/refcount.h>
 
 #include <linux/atomic.h>
 
@@ -227,6 +228,28 @@ struct mdio_bus_stats {
 	struct u64_stats_sync syncp;
 };
 
+/* Represents a shared structure between different phydev's in the same
+ * package, for example a quad PHY. See phy_package_join() and
+ * phy_package_leave().
+ */
+struct phy_package_shared {
+	int addr;
+	refcount_t refcnt;
+	unsigned long flags;
+	size_t priv_size;
+
+	/* private data pointer */
+	/* note that this pointer is shared between different phydevs and
+	 * the user has to take care of appropriate locking. It is allocated
+	 * and freed automatically by phy_package_join() and
+	 * phy_package_leave().
+	 */
+	void *priv;
+};
+
+/* used as bit number in atomic bitops */
+#define PHY_SHARED_F_INIT_DONE 0
+
 /*
  * The Bus class for PHYs.  Devices which provide access to
  * PHYs should register using this structure
@@ -278,6 +301,12 @@ struct mii_bus {
 	int reset_delay_us;
 	/* RESET GPIO descriptor pointer */
 	struct gpio_desc *reset_gpiod;
+
+	/* protect access to the shared element */
+	struct mutex shared_lock;
+
+	/* shared state across different PHYs */
+	struct phy_package_shared *shared[PHY_MAX_ADDR];
 };
 #define to_mii_bus(d) container_of(d, struct mii_bus, dev)
 
@@ -478,6 +507,10 @@ struct phy_device {
 	/* For use by PHYs to maintain extra state */
 	void *priv;
 
+	/* shared data pointer */
+	/* For use by PHYs inside the same package that need a shared state. */
+	struct phy_package_shared *shared;
+
 	/* Interrupt and Polling infrastructure */
 	struct delayed_work state_queue;
 
@@ -1354,6 +1387,10 @@ int phy_ethtool_get_link_ksettings(struct net_device *ndev,
 int phy_ethtool_set_link_ksettings(struct net_device *ndev,
 				   const struct ethtool_link_ksettings *cmd);
 int phy_ethtool_nway_reset(struct net_device *ndev);
+int phy_package_join(struct phy_device *phydev, int addr, size_t priv_size);
+void phy_package_leave(struct phy_device *phydev);
+int devm_phy_package_join(struct device *dev, struct phy_device *phydev,
+			  int addr, size_t priv_size);
 
 #if IS_ENABLED(CONFIG_PHYLIB)
 int __init mdio_bus_init(void);
@@ -1406,6 +1443,58 @@ static inline int phy_ethtool_get_stats(struct phy_device *phydev,
 	return 0;
 }
 
+static inline int phy_package_read(struct phy_device *phydev, u32 regnum)
+{
+	struct phy_package_shared *shared = phydev->shared;
+
+	if (!shared)
+		return -EIO;
+
+	return mdiobus_read(phydev->mdio.bus, shared->addr, regnum);
+}
+
+static inline int __phy_package_read(struct phy_device *phydev, u32 regnum)
+{
+	struct phy_package_shared *shared = phydev->shared;
+
+	if (!shared)
+		return -EIO;
+
+	return __mdiobus_read(phydev->mdio.bus, shared->addr, regnum);
+}
+
+static inline int phy_package_write(struct phy_device *phydev,
+				    u32 regnum, u16 val)
+{
+	struct phy_package_shared *shared = phydev->shared;
+
+	if (!shared)
+		return -EIO;
+
+	return mdiobus_write(phydev->mdio.bus, shared->addr, regnum, val);
+}
+
+static inline int __phy_package_write(struct phy_device *phydev,
+				      u32 regnum, u16 val)
+{
+	struct phy_package_shared *shared = phydev->shared;
+
+	if (!shared)
+		return -EIO;
+
+	return __mdiobus_write(phydev->mdio.bus, shared->addr, regnum, val);
+}
+
+static inline bool phy_package_init_once(struct phy_device *phydev)
+{
+	struct phy_package_shared *shared = phydev->shared;
+
+	if (!shared)
+		return false;
+
+	return !test_and_set_bit(PHY_SHARED_F_INIT_DONE, &shared->flags);
+}
+
 extern struct bus_type mdio_bus_type;
 
 struct mdio_board_info {
-- 
cgit v1.2.3-59-g8ed1b


From dc9989f173289f376d06cb289ae0bd46c6ac8017 Mon Sep 17 00:00:00 2001
From: Michael Walle <michael@walle.cc>
Date: Wed, 6 May 2020 16:53:14 +0200
Subject: net: phy: bcm54140: use phy_package_shared

Use the new phy_package_shared common storage to ease the package
initialization and to access the global registers.

Signed-off-by: Michael Walle <michael@walle.cc>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/bcm54140.c | 57 +++++++++-------------------------------------
 1 file changed, 11 insertions(+), 46 deletions(-)

diff --git a/drivers/net/phy/bcm54140.c b/drivers/net/phy/bcm54140.c
index 400d7c3c405a..9ef37a3bc2bb 100644
--- a/drivers/net/phy/bcm54140.c
+++ b/drivers/net/phy/bcm54140.c
@@ -132,7 +132,6 @@ struct bcm54140_priv {
 	int port;
 	int base_addr;
 #if IS_ENABLED(CONFIG_HWMON)
-	bool pkg_init;
 	/* protect the alarm bits */
 	struct mutex alarm_lock;
 	u16 alarm;
@@ -407,36 +406,6 @@ static int bcm54140_enable_monitoring(struct phy_device *phydev)
 	return bcm_phy_modify_rdb(phydev, BCM54140_RDB_MON_CTRL, mask, set);
 }
 
-/* Check if one PHY has already done the init of the parts common to all PHYs
- * in the Quad PHY package.
- */
-static bool bcm54140_is_pkg_init(struct phy_device *phydev)
-{
-	struct bcm54140_priv *priv = phydev->priv;
-	struct mii_bus *bus = phydev->mdio.bus;
-	int base_addr = priv->base_addr;
-	struct phy_device *phy;
-	int i;
-
-	/* Quad PHY */
-	for (i = 0; i < 4; i++) {
-		phy = mdiobus_get_phy(bus, base_addr + i);
-		if (!phy)
-			continue;
-
-		if ((phy->phy_id & phydev->drv->phy_id_mask) !=
-		    (phydev->drv->phy_id & phydev->drv->phy_id_mask))
-			continue;
-
-		priv = phy->priv;
-
-		if (priv && priv->pkg_init)
-			return true;
-	}
-
-	return false;
-}
-
 static int bcm54140_probe_once(struct phy_device *phydev)
 {
 	struct device *hwmon;
@@ -457,38 +426,34 @@ static int bcm54140_probe_once(struct phy_device *phydev)
 
 static int bcm54140_base_read_rdb(struct phy_device *phydev, u16 rdb)
 {
-	struct bcm54140_priv *priv = phydev->priv;
-	struct mii_bus *bus = phydev->mdio.bus;
 	int ret;
 
-	mutex_lock(&bus->mdio_lock);
-	ret = __mdiobus_write(bus, priv->base_addr, MII_BCM54XX_RDB_ADDR, rdb);
+	phy_lock_mdio_bus(phydev);
+	ret = __phy_package_write(phydev, MII_BCM54XX_RDB_ADDR, rdb);
 	if (ret < 0)
 		goto out;
 
-	ret = __mdiobus_read(bus, priv->base_addr, MII_BCM54XX_RDB_DATA);
+	ret = __phy_package_read(phydev, MII_BCM54XX_RDB_DATA);
 
 out:
-	mutex_unlock(&bus->mdio_lock);
+	phy_unlock_mdio_bus(phydev);
 	return ret;
 }
 
 static int bcm54140_base_write_rdb(struct phy_device *phydev,
 				   u16 rdb, u16 val)
 {
-	struct bcm54140_priv *priv = phydev->priv;
-	struct mii_bus *bus = phydev->mdio.bus;
 	int ret;
 
-	mutex_lock(&bus->mdio_lock);
-	ret = __mdiobus_write(bus, priv->base_addr, MII_BCM54XX_RDB_ADDR, rdb);
+	phy_lock_mdio_bus(phydev);
+	ret = __phy_package_write(phydev, MII_BCM54XX_RDB_ADDR, rdb);
 	if (ret < 0)
 		goto out;
 
-	ret = __mdiobus_write(bus, priv->base_addr, MII_BCM54XX_RDB_DATA, val);
+	ret = __phy_package_write(phydev, MII_BCM54XX_RDB_DATA, val);
 
 out:
-	mutex_unlock(&bus->mdio_lock);
+	phy_unlock_mdio_bus(phydev);
 	return ret;
 }
 
@@ -618,16 +583,16 @@ static int bcm54140_probe(struct phy_device *phydev)
 	if (ret)
 		return ret;
 
+	devm_phy_package_join(&phydev->mdio.dev, phydev, priv->base_addr, 0);
+
 #if IS_ENABLED(CONFIG_HWMON)
 	mutex_init(&priv->alarm_lock);
 
-	if (!bcm54140_is_pkg_init(phydev)) {
+	if (phy_package_init_once(phydev)) {
 		ret = bcm54140_probe_once(phydev);
 		if (ret)
 			return ret;
 	}
-
-	priv->pkg_init = true;
 #endif
 
 	phydev_dbg(phydev, "probed (port %d, base PHY address %d)\n",
-- 
cgit v1.2.3-59-g8ed1b


From deb04e9c0ff2b42cadf198c3204b13025c9bd72e Mon Sep 17 00:00:00 2001
From: Michael Walle <michael@walle.cc>
Date: Wed, 6 May 2020 16:53:15 +0200
Subject: net: phy: mscc: use phy_package_shared

Use the new phy_package_shared common storage to ease the package
initialization and to access the global registers.

Signed-off-by: Michael Walle <michael@walle.cc>
Tested-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mscc/mscc.h      |   1 -
 drivers/net/phy/mscc/mscc_main.c | 101 +++++++++++++--------------------------
 2 files changed, 32 insertions(+), 70 deletions(-)

diff --git a/drivers/net/phy/mscc/mscc.h b/drivers/net/phy/mscc/mscc.h
index 030bf8b600df..acdd8ee61a39 100644
--- a/drivers/net/phy/mscc/mscc.h
+++ b/drivers/net/phy/mscc/mscc.h
@@ -353,7 +353,6 @@ struct vsc8531_private {
 	const struct vsc85xx_hw_stat *hw_stats;
 	u64 *stats;
 	int nstats;
-	bool pkg_init;
 	/* For multiple port PHYs; the MDIO address of the base PHY in the
 	 * package.
 	 */
diff --git a/drivers/net/phy/mscc/mscc_main.c b/drivers/net/phy/mscc/mscc_main.c
index 5391acdece05..6508d6536134 100644
--- a/drivers/net/phy/mscc/mscc_main.c
+++ b/drivers/net/phy/mscc/mscc_main.c
@@ -691,27 +691,23 @@ out_unlock:
 /* phydev->bus->mdio_lock should be locked when using this function */
 static int phy_base_write(struct phy_device *phydev, u32 regnum, u16 val)
 {
-	struct vsc8531_private *priv = phydev->priv;
-
 	if (unlikely(!mutex_is_locked(&phydev->mdio.bus->mdio_lock))) {
 		dev_err(&phydev->mdio.dev, "MDIO bus lock not held!\n");
 		dump_stack();
 	}
 
-	return __mdiobus_write(phydev->mdio.bus, priv->base_addr, regnum, val);
+	return __phy_package_write(phydev, regnum, val);
 }
 
 /* phydev->bus->mdio_lock should be locked when using this function */
 static int phy_base_read(struct phy_device *phydev, u32 regnum)
 {
-	struct vsc8531_private *priv = phydev->priv;
-
 	if (unlikely(!mutex_is_locked(&phydev->mdio.bus->mdio_lock))) {
 		dev_err(&phydev->mdio.dev, "MDIO bus lock not held!\n");
 		dump_stack();
 	}
 
-	return __mdiobus_read(phydev->mdio.bus, priv->base_addr, regnum);
+	return __phy_package_read(phydev, regnum);
 }
 
 /* bus->mdio_lock should be locked when using this function */
@@ -1287,65 +1283,38 @@ out:
 	return ret;
 }
 
-/* Check if one PHY has already done the init of the parts common to all PHYs
- * in the Quad PHY package.
- */
-static bool vsc8584_is_pkg_init(struct phy_device *phydev, bool reversed)
+static void vsc8584_get_base_addr(struct phy_device *phydev)
 {
-	struct mii_bus *bus = phydev->mdio.bus;
-	struct vsc8531_private *vsc8531;
-	struct phy_device *phy;
-	int i, addr;
-
-	/* VSC8584 is a Quad PHY */
-	for (i = 0; i < 4; i++) {
-		vsc8531 = phydev->priv;
-
-		if (reversed)
-			addr = vsc8531->base_addr - i;
-		else
-			addr = vsc8531->base_addr + i;
+	struct vsc8531_private *vsc8531 = phydev->priv;
+	u16 val, addr;
 
-		phy = mdiobus_get_phy(bus, addr);
-		if (!phy)
-			continue;
+	mutex_lock(&phydev->mdio.bus->mdio_lock);
+	__phy_write(phydev, MSCC_EXT_PAGE_ACCESS, MSCC_PHY_PAGE_EXTENDED);
 
-		if ((phy->phy_id & phydev->drv->phy_id_mask) !=
-		    (phydev->drv->phy_id & phydev->drv->phy_id_mask))
-			continue;
+	addr = __phy_read(phydev, MSCC_PHY_EXT_PHY_CNTL_4);
+	addr >>= PHY_CNTL_4_ADDR_POS;
 
-		vsc8531 = phy->priv;
+	val = __phy_read(phydev, MSCC_PHY_ACTIPHY_CNTL);
 
-		if (vsc8531 && vsc8531->pkg_init)
-			return true;
-	}
+	__phy_write(phydev, MSCC_EXT_PAGE_ACCESS, MSCC_PHY_PAGE_STANDARD);
+	mutex_unlock(&phydev->mdio.bus->mdio_lock);
 
-	return false;
+	if (val & PHY_ADDR_REVERSED)
+		vsc8531->base_addr = phydev->mdio.addr + addr;
+	else
+		vsc8531->base_addr = phydev->mdio.addr - addr;
 }
 
 static int vsc8584_config_init(struct phy_device *phydev)
 {
 	struct vsc8531_private *vsc8531 = phydev->priv;
-	u16 addr, val;
 	int ret, i;
+	u16 val;
 
 	phydev->mdix_ctrl = ETH_TP_MDI_AUTO;
 
 	mutex_lock(&phydev->mdio.bus->mdio_lock);
 
-	__mdiobus_write(phydev->mdio.bus, phydev->mdio.addr,
-			MSCC_EXT_PAGE_ACCESS, MSCC_PHY_PAGE_EXTENDED);
-	addr = __mdiobus_read(phydev->mdio.bus, phydev->mdio.addr,
-			      MSCC_PHY_EXT_PHY_CNTL_4);
-	addr >>= PHY_CNTL_4_ADDR_POS;
-
-	val = __mdiobus_read(phydev->mdio.bus, phydev->mdio.addr,
-			     MSCC_PHY_ACTIPHY_CNTL);
-	if (val & PHY_ADDR_REVERSED)
-		vsc8531->base_addr = phydev->mdio.addr + addr;
-	else
-		vsc8531->base_addr = phydev->mdio.addr - addr;
-
 	/* Some parts of the init sequence are identical for every PHY in the
 	 * package. Some parts are modifying the GPIO register bank which is a
 	 * set of registers that are affecting all PHYs, a few resetting the
@@ -1359,7 +1328,7 @@ static int vsc8584_config_init(struct phy_device *phydev)
 	 * do the correct init sequence for all PHYs that are package-critical
 	 * in this pre-init function.
 	 */
-	if (!vsc8584_is_pkg_init(phydev, val & PHY_ADDR_REVERSED ? 1 : 0)) {
+	if (phy_package_init_once(phydev)) {
 		/* The following switch statement assumes that the lowest
 		 * nibble of the phy_id_mask is always 0. This works because
 		 * the lowest nibble of the PHY_ID's below are also 0.
@@ -1388,8 +1357,6 @@ static int vsc8584_config_init(struct phy_device *phydev)
 			goto err;
 	}
 
-	vsc8531->pkg_init = true;
-
 	phy_base_write(phydev, MSCC_EXT_PAGE_ACCESS,
 		       MSCC_PHY_PAGE_EXTENDED_GPIO);
 
@@ -1427,7 +1394,8 @@ static int vsc8584_config_init(struct phy_device *phydev)
 
 	/* Disable SerDes for 100Base-FX */
 	ret = vsc8584_cmd(phydev, PROC_CMD_FIBER_MEDIA_CONF |
-			  PROC_CMD_FIBER_PORT(addr) | PROC_CMD_FIBER_DISABLE |
+			  PROC_CMD_FIBER_PORT(vsc8531->base_addr) |
+			  PROC_CMD_FIBER_DISABLE |
 			  PROC_CMD_READ_MOD_WRITE_PORT |
 			  PROC_CMD_RST_CONF_PORT | PROC_CMD_FIBER_100BASE_FX);
 	if (ret)
@@ -1435,7 +1403,8 @@ static int vsc8584_config_init(struct phy_device *phydev)
 
 	/* Disable SerDes for 1000Base-X */
 	ret = vsc8584_cmd(phydev, PROC_CMD_FIBER_MEDIA_CONF |
-			  PROC_CMD_FIBER_PORT(addr) | PROC_CMD_FIBER_DISABLE |
+			  PROC_CMD_FIBER_PORT(vsc8531->base_addr) |
+			  PROC_CMD_FIBER_DISABLE |
 			  PROC_CMD_READ_MOD_WRITE_PORT |
 			  PROC_CMD_RST_CONF_PORT | PROC_CMD_FIBER_1000BASE_X);
 	if (ret)
@@ -1750,26 +1719,14 @@ static int vsc8514_config_init(struct phy_device *phydev)
 {
 	struct vsc8531_private *vsc8531 = phydev->priv;
 	unsigned long deadline;
-	u16 val, addr;
 	int ret, i;
+	u16 val;
 	u32 reg;
 
 	phydev->mdix_ctrl = ETH_TP_MDI_AUTO;
 
 	mutex_lock(&phydev->mdio.bus->mdio_lock);
 
-	__phy_write(phydev, MSCC_EXT_PAGE_ACCESS, MSCC_PHY_PAGE_EXTENDED);
-
-	addr = __phy_read(phydev, MSCC_PHY_EXT_PHY_CNTL_4);
-	addr >>= PHY_CNTL_4_ADDR_POS;
-
-	val = __phy_read(phydev, MSCC_PHY_ACTIPHY_CNTL);
-
-	if (val & PHY_ADDR_REVERSED)
-		vsc8531->base_addr = phydev->mdio.addr + addr;
-	else
-		vsc8531->base_addr = phydev->mdio.addr - addr;
-
 	/* Some parts of the init sequence are identical for every PHY in the
 	 * package. Some parts are modifying the GPIO register bank which is a
 	 * set of registers that are affecting all PHYs, a few resetting the
@@ -1781,11 +1738,9 @@ static int vsc8514_config_init(struct phy_device *phydev)
 	 * do the correct init sequence for all PHYs that are package-critical
 	 * in this pre-init function.
 	 */
-	if (!vsc8584_is_pkg_init(phydev, val & PHY_ADDR_REVERSED ? 1 : 0))
+	if (phy_package_init_once(phydev))
 		vsc8514_config_pre_init(phydev);
 
-	vsc8531->pkg_init = true;
-
 	phy_base_write(phydev, MSCC_EXT_PAGE_ACCESS,
 		       MSCC_PHY_PAGE_EXTENDED_GPIO);
 
@@ -1991,6 +1946,10 @@ static int vsc8514_probe(struct phy_device *phydev)
 
 	phydev->priv = vsc8531;
 
+	vsc8584_get_base_addr(phydev);
+	devm_phy_package_join(&phydev->mdio.dev, phydev,
+			      vsc8531->base_addr, 0);
+
 	vsc8531->nleds = 4;
 	vsc8531->supp_led_modes = VSC85XX_SUPP_LED_MODES;
 	vsc8531->hw_stats = vsc85xx_hw_stats;
@@ -2046,6 +2005,10 @@ static int vsc8584_probe(struct phy_device *phydev)
 
 	phydev->priv = vsc8531;
 
+	vsc8584_get_base_addr(phydev);
+	devm_phy_package_join(&phydev->mdio.dev, phydev,
+			      vsc8531->base_addr, 0);
+
 	vsc8531->nleds = 4;
 	vsc8531->supp_led_modes = VSC8584_SUPP_LED_MODES;
 	vsc8531->hw_stats = vsc8584_hw_stats;
-- 
cgit v1.2.3-59-g8ed1b


From 969c54646af0d7d94a5f0f37adbbfe024e85466e Mon Sep 17 00:00:00 2001
From: Fernando Gont <fgont@si6networks.com>
Date: Fri, 1 May 2020 00:51:47 -0300
Subject: ipv6: Implement draft-ietf-6man-rfc4941bis

Implement the upcoming rev of RFC4941 (IPv6 temporary addresses):
https://tools.ietf.org/html/draft-ietf-6man-rfc4941bis-09

* Reduces the default Valid Lifetime to 2 days
  The number of extra addresses employed when Valid Lifetime was
  7 days exacerbated the stress caused on network
  elements/devices. Additionally, the motivation for temporary
  addresses is indeed privacy and reduced exposure. With a
  default Valid Lifetime of 7 days, an address that becomes
  revealed by active communication is reachable and exposed for
  one whole week. The only use case for a Valid Lifetime of 7
  days could be some application that is expecting to have long
  lived connections. But if you want to have a long lived
  connections, you shouldn't be using a temporary address in the
  first place. Additionally, in the era of mobile devices, general
  applications should nevertheless be prepared and robust to
  address changes (e.g. nodes swap wifi <-> 4G, etc.)

* Employs different IIDs for different prefixes
  To avoid network activity correlation among addresses configured
  for different prefixes

* Uses a simpler algorithm for IID generation
  No need to store "history" anywhere

Signed-off-by: Fernando Gont <fgont@si6networks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ip-sysctl.rst |  2 +-
 include/net/if_inet6.h                 |  1 -
 net/ipv6/addrconf.c                    | 91 +++++++++++++++-------------------
 3 files changed, 40 insertions(+), 54 deletions(-)

diff --git a/Documentation/networking/ip-sysctl.rst b/Documentation/networking/ip-sysctl.rst
index 50b440d29a13..b72f89d5694c 100644
--- a/Documentation/networking/ip-sysctl.rst
+++ b/Documentation/networking/ip-sysctl.rst
@@ -2065,7 +2065,7 @@ use_tempaddr - INTEGER
 temp_valid_lft - INTEGER
 	valid lifetime (in seconds) for temporary addresses.
 
-	Default: 604800 (7 days)
+	Default: 172800 (2 days)
 
 temp_prefered_lft - INTEGER
 	Preferred lifetime (in seconds) for temporary addresses.
diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index a01981d7108f..212eb278bda6 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -190,7 +190,6 @@ struct inet6_dev {
 	int			dead;
 
 	u32			desync_factor;
-	u8			rndid[8];
 	struct list_head	tempaddr_list;
 
 	struct in6_addr		token;
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 26e666fe9a0e..fd885f06c4ed 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -135,8 +135,7 @@ static inline void addrconf_sysctl_unregister(struct inet6_dev *idev)
 }
 #endif
 
-static void ipv6_regen_rndid(struct inet6_dev *idev);
-static void ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr);
+static void ipv6_gen_rnd_iid(struct in6_addr *addr);
 
 static int ipv6_generate_eui64(u8 *eui, struct net_device *dev);
 static int ipv6_count_addresses(const struct inet6_dev *idev);
@@ -432,8 +431,7 @@ static struct inet6_dev *ipv6_add_dev(struct net_device *dev)
 	    dev->type == ARPHRD_SIT ||
 	    dev->type == ARPHRD_NONE) {
 		ndev->cnf.use_tempaddr = -1;
-	} else
-		ipv6_regen_rndid(ndev);
+	}
 
 	ndev->token = in6addr_any;
 
@@ -1306,29 +1304,21 @@ out:
 	in6_ifa_put(ifp);
 }
 
-static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp,
-				struct inet6_ifaddr *ift,
-				bool block)
+static int ipv6_create_tempaddr(struct inet6_ifaddr *ifp, bool block)
 {
 	struct inet6_dev *idev = ifp->idev;
-	struct in6_addr addr, *tmpaddr;
 	unsigned long tmp_tstamp, age;
 	unsigned long regen_advance;
-	struct ifa6_config cfg;
-	int ret = 0;
 	unsigned long now = jiffies;
-	long max_desync_factor;
 	s32 cnf_temp_preferred_lft;
+	struct inet6_ifaddr *ift;
+	struct ifa6_config cfg;
+	long max_desync_factor;
+	struct in6_addr addr;
+	int ret = 0;
 
 	write_lock_bh(&idev->lock);
-	if (ift) {
-		spin_lock_bh(&ift->lock);
-		memcpy(&addr.s6_addr[8], &ift->addr.s6_addr[8], 8);
-		spin_unlock_bh(&ift->lock);
-		tmpaddr = &addr;
-	} else {
-		tmpaddr = NULL;
-	}
+
 retry:
 	in6_dev_hold(idev);
 	if (idev->cnf.use_tempaddr <= 0) {
@@ -1351,8 +1341,8 @@ retry:
 	}
 	in6_ifa_hold(ifp);
 	memcpy(addr.s6_addr, ifp->addr.s6_addr, 8);
-	ipv6_try_regen_rndid(idev, tmpaddr);
-	memcpy(&addr.s6_addr[8], idev->rndid, 8);
+	ipv6_gen_rnd_iid(&addr);
+
 	age = (now - ifp->tstamp) / HZ;
 
 	regen_advance = idev->cnf.regen_max_retry *
@@ -1417,7 +1407,6 @@ retry:
 		in6_ifa_put(ifp);
 		in6_dev_put(idev);
 		pr_info("%s: retry temporary address regeneration\n", __func__);
-		tmpaddr = &addr;
 		write_lock_bh(&idev->lock);
 		goto retry;
 	}
@@ -2032,7 +2021,7 @@ static void addrconf_dad_stop(struct inet6_ifaddr *ifp, int dad_failed)
 		if (ifpub) {
 			in6_ifa_hold(ifpub);
 			spin_unlock_bh(&ifp->lock);
-			ipv6_create_tempaddr(ifpub, ifp, true);
+			ipv6_create_tempaddr(ifpub, true);
 			in6_ifa_put(ifpub);
 		} else {
 			spin_unlock_bh(&ifp->lock);
@@ -2329,40 +2318,38 @@ static int ipv6_inherit_eui64(u8 *eui, struct inet6_dev *idev)
 	return err;
 }
 
-/* (re)generation of randomized interface identifier (RFC 3041 3.2, 3.5) */
-static void ipv6_regen_rndid(struct inet6_dev *idev)
+/* Generation of a randomized Interface Identifier
+ * draft-ietf-6man-rfc4941bis, Section 3.3.1
+ */
+
+static void ipv6_gen_rnd_iid(struct in6_addr *addr)
 {
 regen:
-	get_random_bytes(idev->rndid, sizeof(idev->rndid));
-	idev->rndid[0] &= ~0x02;
+	get_random_bytes(&addr->s6_addr[8], 8);
 
-	/*
-	 * <draft-ietf-ipngwg-temp-addresses-v2-00.txt>:
-	 * check if generated address is not inappropriate
+	/* <draft-ietf-6man-rfc4941bis-08.txt>, Section 3.3.1:
+	 * check if generated address is not inappropriate:
 	 *
-	 *  - Reserved subnet anycast (RFC 2526)
-	 *	11111101 11....11 1xxxxxxx
-	 *  - ISATAP (RFC4214) 6.1
-	 *	00-00-5E-FE-xx-xx-xx-xx
-	 *  - value 0
-	 *  - XXX: already assigned to an address on the device
+	 * - Reserved IPv6 Interface Identifers
+	 * - XXX: already assigned to an address on the device
 	 */
-	if (idev->rndid[0] == 0xfd &&
-	    (idev->rndid[1]&idev->rndid[2]&idev->rndid[3]&idev->rndid[4]&idev->rndid[5]&idev->rndid[6]) == 0xff &&
-	    (idev->rndid[7]&0x80))
+
+	/* Subnet-router anycast: 0000:0000:0000:0000 */
+	if (!(addr->s6_addr32[2] | addr->s6_addr32[3]))
 		goto regen;
-	if ((idev->rndid[0]|idev->rndid[1]) == 0) {
-		if (idev->rndid[2] == 0x5e && idev->rndid[3] == 0xfe)
-			goto regen;
-		if ((idev->rndid[2]|idev->rndid[3]|idev->rndid[4]|idev->rndid[5]|idev->rndid[6]|idev->rndid[7]) == 0x00)
-			goto regen;
-	}
-}
 
-static void  ipv6_try_regen_rndid(struct inet6_dev *idev, struct in6_addr *tmpaddr)
-{
-	if (tmpaddr && memcmp(idev->rndid, &tmpaddr->s6_addr[8], 8) == 0)
-		ipv6_regen_rndid(idev);
+	/* IANA Ethernet block: 0200:5EFF:FE00:0000-0200:5EFF:FE00:5212
+	 * Proxy Mobile IPv6:   0200:5EFF:FE00:5213
+	 * IANA Ethernet block: 0200:5EFF:FE00:5214-0200:5EFF:FEFF:FFFF
+	 */
+	if (ntohl(addr->s6_addr32[2]) == 0x02005eff &&
+	    (ntohl(addr->s6_addr32[3]) & 0Xff000000) == 0xfe000000)
+		goto regen;
+
+	/* Reserved subnet anycast addresses */
+	if (ntohl(addr->s6_addr32[2]) == 0xfdffffff &&
+	    ntohl(addr->s6_addr32[3]) >= 0Xffffff80)
+		goto regen;
 }
 
 /*
@@ -2544,7 +2531,7 @@ static void manage_tempaddrs(struct inet6_dev *idev,
 		 * no temporary address currently exists.
 		 */
 		read_unlock_bh(&idev->lock);
-		ipv6_create_tempaddr(ifp, NULL, false);
+		ipv6_create_tempaddr(ifp, false);
 	} else {
 		read_unlock_bh(&idev->lock);
 	}
@@ -4531,7 +4518,7 @@ restart:
 						ifpub->regen_count = 0;
 						spin_unlock(&ifpub->lock);
 						rcu_read_unlock_bh();
-						ipv6_create_tempaddr(ifpub, ifp, true);
+						ipv6_create_tempaddr(ifpub, true);
 						in6_ifa_put(ifpub);
 						in6_ifa_put(ifp);
 						rcu_read_lock_bh();
-- 
cgit v1.2.3-59-g8ed1b


From c6af13d334759c33c14b6fad4c676c6d1dbf9564 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Fri, 1 May 2020 23:27:21 +0200
Subject: timer: add fsleep for flexible sleeping

Sleeping for a certain amount of time requires use of different
functions, depending on the time period.
Documentation/timers/timers-howto.rst explains when to use which
function, and also checkpatch checks for some potentially
problematic cases.

So let's create a helper that automatically chooses the appropriate
sleep function -> fsleep(), for flexible sleeping

If the delay is a constant, then the compiler should be able to ensure
that the new helper doesn't create overhead. If the delay is not
constant, then the new helper can save some code.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/timers/timers-howto.rst |  3 +++
 include/linux/delay.h                 | 11 +++++++++++
 2 files changed, 14 insertions(+)

diff --git a/Documentation/timers/timers-howto.rst b/Documentation/timers/timers-howto.rst
index 7e3167bec2b1..afb0a43b8cdf 100644
--- a/Documentation/timers/timers-howto.rst
+++ b/Documentation/timers/timers-howto.rst
@@ -110,3 +110,6 @@ NON-ATOMIC CONTEXT:
 			short, the difference is whether the sleep can be ended
 			early by a signal. In general, just use msleep unless
 			you know you have a need for the interruptible variant.
+
+	FLEXIBLE SLEEPING (any delay, uninterruptible)
+		* Use fsleep
diff --git a/include/linux/delay.h b/include/linux/delay.h
index 8e6828094c1e..5e016a4029d9 100644
--- a/include/linux/delay.h
+++ b/include/linux/delay.h
@@ -65,4 +65,15 @@ static inline void ssleep(unsigned int seconds)
 	msleep(seconds * 1000);
 }
 
+/* see Documentation/timers/timers-howto.rst for the thresholds */
+static inline void fsleep(unsigned long usecs)
+{
+	if (usecs <= 10)
+		udelay(usecs);
+	else if (usecs <= 20000)
+		usleep_range(usecs, 2 * usecs);
+	else
+		msleep(DIV_ROUND_UP(usecs, 1000));
+}
+
 #endif /* defined(_LINUX_DELAY_H) */
-- 
cgit v1.2.3-59-g8ed1b


From d6836ef02c1726c4c5f1cb712a9abdcd5f8fcd14 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Fri, 1 May 2020 23:29:12 +0200
Subject: r8169: use fsleep in polling functions

Use new flexible sleep function fsleep() to merge the udelay and msleep
polling functions. We can safely do this because no polling function
is used in atomic context in this driver.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 108 ++++++++++++------------------
 1 file changed, 44 insertions(+), 64 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 2f6512ed0a78..f06dbc9a046d 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -724,55 +724,35 @@ struct rtl_cond {
 	const char *msg;
 };
 
-static void rtl_udelay(unsigned int d)
-{
-	udelay(d);
-}
-
 static bool rtl_loop_wait(struct rtl8169_private *tp, const struct rtl_cond *c,
-			  void (*delay)(unsigned int), unsigned int d, int n,
-			  bool high)
+			  unsigned long usecs, int n, bool high)
 {
 	int i;
 
 	for (i = 0; i < n; i++) {
 		if (c->check(tp) == high)
 			return true;
-		delay(d);
+		fsleep(usecs);
 	}
 
 	if (net_ratelimit())
-		netdev_err(tp->dev, "%s == %d (loop: %d, delay: %d).\n",
-			   c->msg, !high, n, d);
+		netdev_err(tp->dev, "%s == %d (loop: %d, delay: %lu).\n",
+			   c->msg, !high, n, usecs);
 	return false;
 }
 
-static bool rtl_udelay_loop_wait_high(struct rtl8169_private *tp,
-				      const struct rtl_cond *c,
-				      unsigned int d, int n)
-{
-	return rtl_loop_wait(tp, c, rtl_udelay, d, n, true);
-}
-
-static bool rtl_udelay_loop_wait_low(struct rtl8169_private *tp,
-				     const struct rtl_cond *c,
-				     unsigned int d, int n)
-{
-	return rtl_loop_wait(tp, c, rtl_udelay, d, n, false);
-}
-
-static bool rtl_msleep_loop_wait_high(struct rtl8169_private *tp,
-				      const struct rtl_cond *c,
-				      unsigned int d, int n)
+static bool rtl_loop_wait_high(struct rtl8169_private *tp,
+			       const struct rtl_cond *c,
+			       unsigned long d, int n)
 {
-	return rtl_loop_wait(tp, c, msleep, d, n, true);
+	return rtl_loop_wait(tp, c, d, n, true);
 }
 
-static bool rtl_msleep_loop_wait_low(struct rtl8169_private *tp,
-				     const struct rtl_cond *c,
-				     unsigned int d, int n)
+static bool rtl_loop_wait_low(struct rtl8169_private *tp,
+			      const struct rtl_cond *c,
+			      unsigned long d, int n)
 {
-	return rtl_loop_wait(tp, c, msleep, d, n, false);
+	return rtl_loop_wait(tp, c, d, n, false);
 }
 
 #define DECLARE_RTL_COND(name)				\
@@ -807,7 +787,7 @@ static void r8168_phy_ocp_write(struct rtl8169_private *tp, u32 reg, u32 data)
 
 	RTL_W32(tp, GPHY_OCP, OCPAR_FLAG | (reg << 15) | data);
 
-	rtl_udelay_loop_wait_low(tp, &rtl_ocp_gphy_cond, 25, 10);
+	rtl_loop_wait_low(tp, &rtl_ocp_gphy_cond, 25, 10);
 }
 
 static int r8168_phy_ocp_read(struct rtl8169_private *tp, u32 reg)
@@ -817,7 +797,7 @@ static int r8168_phy_ocp_read(struct rtl8169_private *tp, u32 reg)
 
 	RTL_W32(tp, GPHY_OCP, reg << 15);
 
-	return rtl_udelay_loop_wait_high(tp, &rtl_ocp_gphy_cond, 25, 10) ?
+	return rtl_loop_wait_high(tp, &rtl_ocp_gphy_cond, 25, 10) ?
 		(RTL_R32(tp, GPHY_OCP) & 0xffff) : -ETIMEDOUT;
 }
 
@@ -895,7 +875,7 @@ static void r8169_mdio_write(struct rtl8169_private *tp, int reg, int value)
 {
 	RTL_W32(tp, PHYAR, 0x80000000 | (reg & 0x1f) << 16 | (value & 0xffff));
 
-	rtl_udelay_loop_wait_low(tp, &rtl_phyar_cond, 25, 20);
+	rtl_loop_wait_low(tp, &rtl_phyar_cond, 25, 20);
 	/*
 	 * According to hardware specs a 20us delay is required after write
 	 * complete indication, but before sending next command.
@@ -909,7 +889,7 @@ static int r8169_mdio_read(struct rtl8169_private *tp, int reg)
 
 	RTL_W32(tp, PHYAR, 0x0 | (reg & 0x1f) << 16);
 
-	value = rtl_udelay_loop_wait_high(tp, &rtl_phyar_cond, 25, 20) ?
+	value = rtl_loop_wait_high(tp, &rtl_phyar_cond, 25, 20) ?
 		RTL_R32(tp, PHYAR) & 0xffff : -ETIMEDOUT;
 
 	/*
@@ -932,7 +912,7 @@ static void r8168dp_1_mdio_access(struct rtl8169_private *tp, int reg, u32 data)
 	RTL_W32(tp, OCPAR, OCPAR_GPHY_WRITE_CMD);
 	RTL_W32(tp, EPHY_RXER_NUM, 0);
 
-	rtl_udelay_loop_wait_low(tp, &rtl_ocpar_cond, 1000, 100);
+	rtl_loop_wait_low(tp, &rtl_ocpar_cond, 1000, 100);
 }
 
 static void r8168dp_1_mdio_write(struct rtl8169_private *tp, int reg, int value)
@@ -949,7 +929,7 @@ static int r8168dp_1_mdio_read(struct rtl8169_private *tp, int reg)
 	RTL_W32(tp, OCPAR, OCPAR_GPHY_READ_CMD);
 	RTL_W32(tp, EPHY_RXER_NUM, 0);
 
-	return rtl_udelay_loop_wait_high(tp, &rtl_ocpar_cond, 1000, 100) ?
+	return rtl_loop_wait_high(tp, &rtl_ocpar_cond, 1000, 100) ?
 		RTL_R32(tp, OCPDR) & OCPDR_DATA_MASK : -ETIMEDOUT;
 }
 
@@ -1035,7 +1015,7 @@ static void rtl_ephy_write(struct rtl8169_private *tp, int reg_addr, int value)
 	RTL_W32(tp, EPHYAR, EPHYAR_WRITE_CMD | (value & EPHYAR_DATA_MASK) |
 		(reg_addr & EPHYAR_REG_MASK) << EPHYAR_REG_SHIFT);
 
-	rtl_udelay_loop_wait_low(tp, &rtl_ephyar_cond, 10, 100);
+	rtl_loop_wait_low(tp, &rtl_ephyar_cond, 10, 100);
 
 	udelay(10);
 }
@@ -1044,7 +1024,7 @@ static u16 rtl_ephy_read(struct rtl8169_private *tp, int reg_addr)
 {
 	RTL_W32(tp, EPHYAR, (reg_addr & EPHYAR_REG_MASK) << EPHYAR_REG_SHIFT);
 
-	return rtl_udelay_loop_wait_high(tp, &rtl_ephyar_cond, 10, 100) ?
+	return rtl_loop_wait_high(tp, &rtl_ephyar_cond, 10, 100) ?
 		RTL_R32(tp, EPHYAR) & EPHYAR_DATA_MASK : ~0;
 }
 
@@ -1060,7 +1040,7 @@ static void _rtl_eri_write(struct rtl8169_private *tp, int addr, u32 mask,
 	RTL_W32(tp, ERIDR, val);
 	RTL_W32(tp, ERIAR, ERIAR_WRITE_CMD | type | mask | addr);
 
-	rtl_udelay_loop_wait_low(tp, &rtl_eriar_cond, 100, 100);
+	rtl_loop_wait_low(tp, &rtl_eriar_cond, 100, 100);
 }
 
 static void rtl_eri_write(struct rtl8169_private *tp, int addr, u32 mask,
@@ -1073,7 +1053,7 @@ static u32 _rtl_eri_read(struct rtl8169_private *tp, int addr, int type)
 {
 	RTL_W32(tp, ERIAR, ERIAR_READ_CMD | type | ERIAR_MASK_1111 | addr);
 
-	return rtl_udelay_loop_wait_high(tp, &rtl_eriar_cond, 100, 100) ?
+	return rtl_loop_wait_high(tp, &rtl_eriar_cond, 100, 100) ?
 		RTL_R32(tp, ERIDR) : ~0;
 }
 
@@ -1106,7 +1086,7 @@ static void rtl_eri_clear_bits(struct rtl8169_private *tp, int addr, u32 mask,
 static u32 r8168dp_ocp_read(struct rtl8169_private *tp, u8 mask, u16 reg)
 {
 	RTL_W32(tp, OCPAR, ((u32)mask & 0x0f) << 12 | (reg & 0x0fff));
-	return rtl_udelay_loop_wait_high(tp, &rtl_ocpar_cond, 100, 20) ?
+	return rtl_loop_wait_high(tp, &rtl_ocpar_cond, 100, 20) ?
 		RTL_R32(tp, OCPDR) : ~0;
 }
 
@@ -1120,7 +1100,7 @@ static void r8168dp_ocp_write(struct rtl8169_private *tp, u8 mask, u16 reg,
 {
 	RTL_W32(tp, OCPDR, data);
 	RTL_W32(tp, OCPAR, OCPAR_FLAG | ((u32)mask & 0x0f) << 12 | (reg & 0x0fff));
-	rtl_udelay_loop_wait_low(tp, &rtl_ocpar_cond, 100, 20);
+	rtl_loop_wait_low(tp, &rtl_ocpar_cond, 100, 20);
 }
 
 static void r8168ep_ocp_write(struct rtl8169_private *tp, u8 mask, u16 reg,
@@ -1168,7 +1148,7 @@ DECLARE_RTL_COND(rtl_ocp_tx_cond)
 static void rtl8168ep_stop_cmac(struct rtl8169_private *tp)
 {
 	RTL_W8(tp, IBCR2, RTL_R8(tp, IBCR2) & ~0x01);
-	rtl_msleep_loop_wait_high(tp, &rtl_ocp_tx_cond, 50, 2000);
+	rtl_loop_wait_high(tp, &rtl_ocp_tx_cond, 50000, 2000);
 	RTL_W8(tp, IBISR0, RTL_R8(tp, IBISR0) | 0x20);
 	RTL_W8(tp, IBCR0, RTL_R8(tp, IBCR0) & ~0x01);
 }
@@ -1176,7 +1156,7 @@ static void rtl8168ep_stop_cmac(struct rtl8169_private *tp)
 static void rtl8168dp_driver_start(struct rtl8169_private *tp)
 {
 	r8168dp_oob_notify(tp, OOB_CMD_DRIVER_START);
-	rtl_msleep_loop_wait_high(tp, &rtl_dp_ocp_read_cond, 10, 10);
+	rtl_loop_wait_high(tp, &rtl_dp_ocp_read_cond, 10000, 10);
 }
 
 static void rtl8168ep_driver_start(struct rtl8169_private *tp)
@@ -1184,7 +1164,7 @@ static void rtl8168ep_driver_start(struct rtl8169_private *tp)
 	r8168ep_ocp_write(tp, 0x01, 0x180, OOB_CMD_DRIVER_START);
 	r8168ep_ocp_write(tp, 0x01, 0x30,
 			  r8168ep_ocp_read(tp, 0x01, 0x30) | 0x01);
-	rtl_msleep_loop_wait_high(tp, &rtl_ep_ocp_read_cond, 10, 10);
+	rtl_loop_wait_high(tp, &rtl_ep_ocp_read_cond, 10000, 10);
 }
 
 static void rtl8168_driver_start(struct rtl8169_private *tp)
@@ -1207,7 +1187,7 @@ static void rtl8168_driver_start(struct rtl8169_private *tp)
 static void rtl8168dp_driver_stop(struct rtl8169_private *tp)
 {
 	r8168dp_oob_notify(tp, OOB_CMD_DRIVER_STOP);
-	rtl_msleep_loop_wait_low(tp, &rtl_dp_ocp_read_cond, 10, 10);
+	rtl_loop_wait_low(tp, &rtl_dp_ocp_read_cond, 10000, 10);
 }
 
 static void rtl8168ep_driver_stop(struct rtl8169_private *tp)
@@ -1216,7 +1196,7 @@ static void rtl8168ep_driver_stop(struct rtl8169_private *tp)
 	r8168ep_ocp_write(tp, 0x01, 0x180, OOB_CMD_DRIVER_STOP);
 	r8168ep_ocp_write(tp, 0x01, 0x30,
 			  r8168ep_ocp_read(tp, 0x01, 0x30) | 0x01);
-	rtl_msleep_loop_wait_low(tp, &rtl_ep_ocp_read_cond, 10, 10);
+	rtl_loop_wait_low(tp, &rtl_ep_ocp_read_cond, 10000, 10);
 }
 
 static void rtl8168_driver_stop(struct rtl8169_private *tp)
@@ -1277,7 +1257,7 @@ u8 rtl8168d_efuse_read(struct rtl8169_private *tp, int reg_addr)
 {
 	RTL_W32(tp, EFUSEAR, (reg_addr & EFUSEAR_REG_MASK) << EFUSEAR_REG_SHIFT);
 
-	return rtl_udelay_loop_wait_high(tp, &rtl_efusear_cond, 100, 300) ?
+	return rtl_loop_wait_high(tp, &rtl_efusear_cond, 100, 300) ?
 		RTL_R32(tp, EFUSEAR) & EFUSEAR_DATA_MASK : ~0;
 }
 
@@ -1614,7 +1594,7 @@ static void rtl8169_do_counters(struct rtl8169_private *tp, u32 counter_cmd)
 	RTL_W32(tp, CounterAddrLow, cmd);
 	RTL_W32(tp, CounterAddrLow, cmd | counter_cmd);
 
-	rtl_udelay_loop_wait_low(tp, &rtl_counters_cond, 10, 1000);
+	rtl_loop_wait_low(tp, &rtl_counters_cond, 10, 1000);
 }
 
 static void rtl8169_reset_counters(struct rtl8169_private *tp)
@@ -2471,7 +2451,7 @@ static void rtl_hw_reset(struct rtl8169_private *tp)
 {
 	RTL_W8(tp, ChipCmd, CmdReset);
 
-	rtl_udelay_loop_wait_low(tp, &rtl_chipcmd_cond, 100, 100);
+	rtl_loop_wait_low(tp, &rtl_chipcmd_cond, 100, 100);
 }
 
 static void rtl_request_firmware(struct rtl8169_private *tp)
@@ -2525,12 +2505,12 @@ static void rtl8169_hw_reset(struct rtl8169_private *tp)
 	case RTL_GIGA_MAC_VER_27:
 	case RTL_GIGA_MAC_VER_28:
 	case RTL_GIGA_MAC_VER_31:
-		rtl_udelay_loop_wait_low(tp, &rtl_npq_cond, 20, 42*42);
+		rtl_loop_wait_low(tp, &rtl_npq_cond, 20, 2000);
 		break;
 	case RTL_GIGA_MAC_VER_34 ... RTL_GIGA_MAC_VER_38:
 	case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_52:
 		RTL_W8(tp, ChipCmd, RTL_R8(tp, ChipCmd) | StopReq);
-		rtl_udelay_loop_wait_high(tp, &rtl_txcfg_empty_cond, 100, 666);
+		rtl_loop_wait_high(tp, &rtl_txcfg_empty_cond, 100, 666);
 		break;
 	default:
 		RTL_W8(tp, ChipCmd, RTL_R8(tp, ChipCmd) | StopReq);
@@ -2640,7 +2620,7 @@ static void rtl_csi_write(struct rtl8169_private *tp, int addr, int value)
 	RTL_W32(tp, CSIAR, CSIAR_WRITE_CMD | (addr & CSIAR_ADDR_MASK) |
 		CSIAR_BYTE_ENABLE | func << 16);
 
-	rtl_udelay_loop_wait_low(tp, &rtl_csiar_cond, 10, 100);
+	rtl_loop_wait_low(tp, &rtl_csiar_cond, 10, 100);
 }
 
 static u32 rtl_csi_read(struct rtl8169_private *tp, int addr)
@@ -2650,7 +2630,7 @@ static u32 rtl_csi_read(struct rtl8169_private *tp, int addr)
 	RTL_W32(tp, CSIAR, (addr & CSIAR_ADDR_MASK) | func << 16 |
 		CSIAR_BYTE_ENABLE);
 
-	return rtl_udelay_loop_wait_high(tp, &rtl_csiar_cond, 10, 100) ?
+	return rtl_loop_wait_high(tp, &rtl_csiar_cond, 10, 100) ?
 		RTL_R32(tp, CSIDR) : ~0;
 }
 
@@ -3605,7 +3585,7 @@ static void rtl_hw_start_8125_common(struct rtl8169_private *tp)
 
 	r8168_mac_ocp_write(tp, 0xe098, 0xc302);
 
-	rtl_udelay_loop_wait_low(tp, &rtl_mac_ocp_e00e_cond, 1000, 10);
+	rtl_loop_wait_low(tp, &rtl_mac_ocp_e00e_cond, 1000, 10);
 
 	rtl8125_config_eee_mac(tp);
 
@@ -5148,10 +5128,10 @@ static void rtl_hw_init_8168g(struct rtl8169_private *tp)
 {
 	RTL_W32(tp, MISC, RTL_R32(tp, MISC) | RXDV_GATED_EN);
 
-	if (!rtl_udelay_loop_wait_high(tp, &rtl_txcfg_empty_cond, 100, 42))
+	if (!rtl_loop_wait_high(tp, &rtl_txcfg_empty_cond, 100, 42))
 		return;
 
-	if (!rtl_udelay_loop_wait_high(tp, &rtl_rxtx_empty_cond, 100, 42))
+	if (!rtl_loop_wait_high(tp, &rtl_rxtx_empty_cond, 100, 42))
 		return;
 
 	RTL_W8(tp, ChipCmd, RTL_R8(tp, ChipCmd) & ~(CmdTxEnb | CmdRxEnb));
@@ -5160,19 +5140,19 @@ static void rtl_hw_init_8168g(struct rtl8169_private *tp)
 
 	r8168_mac_ocp_modify(tp, 0xe8de, BIT(14), 0);
 
-	if (!rtl_udelay_loop_wait_high(tp, &rtl_link_list_ready_cond, 100, 42))
+	if (!rtl_loop_wait_high(tp, &rtl_link_list_ready_cond, 100, 42))
 		return;
 
 	r8168_mac_ocp_modify(tp, 0xe8de, 0, BIT(15));
 
-	rtl_udelay_loop_wait_high(tp, &rtl_link_list_ready_cond, 100, 42);
+	rtl_loop_wait_high(tp, &rtl_link_list_ready_cond, 100, 42);
 }
 
 static void rtl_hw_init_8125(struct rtl8169_private *tp)
 {
 	RTL_W32(tp, MISC, RTL_R32(tp, MISC) | RXDV_GATED_EN);
 
-	if (!rtl_udelay_loop_wait_high(tp, &rtl_rxtx_empty_cond, 100, 42))
+	if (!rtl_loop_wait_high(tp, &rtl_rxtx_empty_cond, 100, 42))
 		return;
 
 	RTL_W8(tp, ChipCmd, RTL_R8(tp, ChipCmd) & ~(CmdTxEnb | CmdRxEnb));
@@ -5181,14 +5161,14 @@ static void rtl_hw_init_8125(struct rtl8169_private *tp)
 
 	r8168_mac_ocp_modify(tp, 0xe8de, BIT(14), 0);
 
-	if (!rtl_udelay_loop_wait_high(tp, &rtl_link_list_ready_cond, 100, 42))
+	if (!rtl_loop_wait_high(tp, &rtl_link_list_ready_cond, 100, 42))
 		return;
 
 	r8168_mac_ocp_write(tp, 0xc0aa, 0x07d0);
 	r8168_mac_ocp_write(tp, 0xc0a6, 0x0150);
 	r8168_mac_ocp_write(tp, 0xc01e, 0x5555);
 
-	rtl_udelay_loop_wait_high(tp, &rtl_link_list_ready_cond, 100, 42);
+	rtl_loop_wait_high(tp, &rtl_link_list_ready_cond, 100, 42);
 }
 
 static void rtl_hw_initialize(struct rtl8169_private *tp)
-- 
cgit v1.2.3-59-g8ed1b


From b94c280d0b45d1304e920f6edb07141161237a5f Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Mon, 4 May 2020 13:13:50 -0500
Subject: arm64: dts: sdm845: add IPA iommus property

Add an "iommus" property to the IPA node in "sdm845.dtsi".  It is
required because there are two regions of memory the IPA accesses
through an SMMU.  The next few patches define and map those regions.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm64/boot/dts/qcom/sdm845.dtsi | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/arch/arm64/boot/dts/qcom/sdm845.dtsi b/arch/arm64/boot/dts/qcom/sdm845.dtsi
index 8f926b5234d4..de6bb86c4968 100644
--- a/arch/arm64/boot/dts/qcom/sdm845.dtsi
+++ b/arch/arm64/boot/dts/qcom/sdm845.dtsi
@@ -1761,6 +1761,8 @@
 
 		ipa: ipa@1e40000 {
 			compatible = "qcom,sdm845-ipa";
+
+			iommus = <&apps_smmu 0x720 0x3>;
 			reg = <0 0x1e40000 0 0x7000>,
 			      <0 0x1e47000 0 0x2000>,
 			      <0 0x1e04000 0 0x2c000>;
-- 
cgit v1.2.3-59-g8ed1b


From 8dc242ad661c2694a582541c2264ffc0e7c4d27d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 4 May 2020 11:27:49 -0700
Subject: tcp: refine tcp_pacing_delay() for very low pacing rates

With the addition of horizon feature to sch_fq, we noticed some
suboptimal behavior of extremely low pacing rate TCP flows, especially
when TCP is not aware of a drop happening in lower stacks.

Back in commit 3f80e08f40cd ("tcp: add tcp_reset_xmit_timer() helper"),
tcp_pacing_delay() was added to estimate an extra delay to add to standard
rto timers.

This patch removes the skb argument from this helper and
tcp_reset_xmit_timer() because it makes more sense to simply
consider the time at which next packet is allowed to be sent,
instead of the time of whatever packet has been sent.

This avoids arming RTO timer too soon and removes
spurious horizon drops.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h     | 21 ++++++++-------------
 net/ipv4/tcp_input.c  |  4 ++--
 net/ipv4/tcp_output.c |  8 +++-----
 3 files changed, 13 insertions(+), 20 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 1beed50522b1..43b87a8d4790 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -1289,26 +1289,22 @@ static inline bool tcp_needs_internal_pacing(const struct sock *sk)
 	return smp_load_acquire(&sk->sk_pacing_status) == SK_PACING_NEEDED;
 }
 
-/* Return in jiffies the delay before one skb is sent.
- * If @skb is NULL, we look at EDT for next packet being sent on the socket.
+/* Estimates in how many jiffies next packet for this flow can be sent.
+ * Scheduling a retransmit timer too early would be silly.
  */
-static inline unsigned long tcp_pacing_delay(const struct sock *sk,
-					     const struct sk_buff *skb)
+static inline unsigned long tcp_pacing_delay(const struct sock *sk)
 {
-	s64 pacing_delay = skb ? skb->tstamp : tcp_sk(sk)->tcp_wstamp_ns;
+	s64 delay = tcp_sk(sk)->tcp_wstamp_ns - tcp_sk(sk)->tcp_clock_cache;
 
-	pacing_delay -= tcp_sk(sk)->tcp_clock_cache;
-
-	return pacing_delay > 0 ? nsecs_to_jiffies(pacing_delay) : 0;
+	return delay > 0 ? nsecs_to_jiffies(delay) : 0;
 }
 
 static inline void tcp_reset_xmit_timer(struct sock *sk,
 					const int what,
 					unsigned long when,
-					const unsigned long max_when,
-					const struct sk_buff *skb)
+					const unsigned long max_when)
 {
-	inet_csk_reset_xmit_timer(sk, what, when + tcp_pacing_delay(sk, skb),
+	inet_csk_reset_xmit_timer(sk, what, when + tcp_pacing_delay(sk),
 				  max_when);
 }
 
@@ -1336,8 +1332,7 @@ static inline void tcp_check_probe_timer(struct sock *sk)
 {
 	if (!tcp_sk(sk)->packets_out && !inet_csk(sk)->icsk_pending)
 		tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
-				     tcp_probe0_base(sk), TCP_RTO_MAX,
-				     NULL);
+				     tcp_probe0_base(sk), TCP_RTO_MAX);
 }
 
 static inline void tcp_init_wl(struct tcp_sock *tp, u32 seq)
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index d68128a672ab..7d205b2a733c 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -3014,7 +3014,7 @@ void tcp_rearm_rto(struct sock *sk)
 			rto = usecs_to_jiffies(max_t(int, delta_us, 1));
 		}
 		tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS, rto,
-				     TCP_RTO_MAX, tcp_rtx_queue_head(sk));
+				     TCP_RTO_MAX);
 	}
 }
 
@@ -3291,7 +3291,7 @@ static void tcp_ack_probe(struct sock *sk)
 		unsigned long when = tcp_probe0_when(sk, TCP_RTO_MAX);
 
 		tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0,
-				     when, TCP_RTO_MAX, NULL);
+				     when, TCP_RTO_MAX);
 	}
 }
 
diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index c414aeb1efa9..32c9db902f18 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -2593,8 +2593,7 @@ bool tcp_schedule_loss_probe(struct sock *sk, bool advancing_rto)
 	if (rto_delta_us > 0)
 		timeout = min_t(u32, timeout, usecs_to_jiffies(rto_delta_us));
 
-	tcp_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout,
-			     TCP_RTO_MAX, NULL);
+	tcp_reset_xmit_timer(sk, ICSK_TIME_LOSS_PROBE, timeout, TCP_RTO_MAX);
 	return true;
 }
 
@@ -3174,8 +3173,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 		    icsk->icsk_pending != ICSK_TIME_REO_TIMEOUT)
 			tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
 					     inet_csk(sk)->icsk_rto,
-					     TCP_RTO_MAX,
-					     skb);
+					     TCP_RTO_MAX);
 	}
 }
 
@@ -3907,7 +3905,7 @@ void tcp_send_probe0(struct sock *sk)
 		 */
 		timeout = TCP_RESOURCE_PROBE_INTERVAL;
 	}
-	tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, timeout, TCP_RTO_MAX, NULL);
+	tcp_reset_xmit_timer(sk, ICSK_TIME_PROBE0, timeout, TCP_RTO_MAX);
 }
 
 int tcp_rtx_synack(const struct sock *sk, struct request_sock *req)
-- 
cgit v1.2.3-59-g8ed1b


From 916e6d1a5ef17a6b3bffad0f086f173cde4240d8 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 4 May 2020 11:27:50 -0700
Subject: tcp: defer xmit timer reset in tcp_xmit_retransmit_queue()

As hinted in prior change ("tcp: refine tcp_pacing_delay()
for very low pacing rates"), it is probably best arming
the xmit timer only when all the packets have been scheduled,
rather than when the head of rtx queue has been re-sent.

This does matter for flows having extremely low pacing rates,
since their tp->tcp_wstamp_ns could be far in the future.

Note that the regular xmit path has a stronger limit
in tcp_small_queue_check(), meaning it is less likely to
go beyond the pacing horizon.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_output.c | 16 ++++++++++------
 1 file changed, 10 insertions(+), 6 deletions(-)

diff --git a/net/ipv4/tcp_output.c b/net/ipv4/tcp_output.c
index 32c9db902f18..a50e1990a845 100644
--- a/net/ipv4/tcp_output.c
+++ b/net/ipv4/tcp_output.c
@@ -3112,6 +3112,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 	const struct inet_connection_sock *icsk = inet_csk(sk);
 	struct sk_buff *skb, *rtx_head, *hole = NULL;
 	struct tcp_sock *tp = tcp_sk(sk);
+	bool rearm_timer = false;
 	u32 max_segs;
 	int mib_idx;
 
@@ -3134,7 +3135,7 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 
 		segs = tp->snd_cwnd - tcp_packets_in_flight(tp);
 		if (segs <= 0)
-			return;
+			break;
 		sacked = TCP_SKB_CB(skb)->sacked;
 		/* In case tcp_shift_skb_data() have aggregated large skbs,
 		 * we need to make sure not sending too bigs TSO packets
@@ -3159,10 +3160,10 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 			continue;
 
 		if (tcp_small_queue_check(sk, skb, 1))
-			return;
+			break;
 
 		if (tcp_retransmit_skb(sk, skb, segs))
-			return;
+			break;
 
 		NET_ADD_STATS(sock_net(sk), mib_idx, tcp_skb_pcount(skb));
 
@@ -3171,10 +3172,13 @@ void tcp_xmit_retransmit_queue(struct sock *sk)
 
 		if (skb == rtx_head &&
 		    icsk->icsk_pending != ICSK_TIME_REO_TIMEOUT)
-			tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
-					     inet_csk(sk)->icsk_rto,
-					     TCP_RTO_MAX);
+			rearm_timer = true;
+
 	}
+	if (rearm_timer)
+		tcp_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+				     inet_csk(sk)->icsk_rto,
+				     TCP_RTO_MAX);
 }
 
 /* We allow to exceed memory limits for FIN packets to expedite
-- 
cgit v1.2.3-59-g8ed1b


From f86a1909ea2f9f418d168a7eadb5a773d075e25d Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Mon, 4 May 2020 18:30:02 -0500
Subject: net: ipa: rename db_enable flag

In several places, a Boolean flag is used in the GSI code to
indicate whether the "doorbell engine" should be enabled or not
when a channel is configured.  This is basically done to abstract
this property from the IPA version; the GSI code doesn't otherwise
"know" what the IPA hardware version is.  The doorbell engine is
enabled only for IPA v3.5.1, not for IPA v4.0 and later.

The next patch makes another change that affects behavior during
channel reset (which also involves programming the channel).  It
also distinguishes IPA v3.5.1 hardware from newer hardware.

Rather than creating another flag whose value matches the "db_enable"
value, just rename "db_enable" to be "legacy" so it can be used to
signal more than just the special doorbell handling.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/gsi.c          | 16 ++++++++--------
 drivers/net/ipa/gsi.h          | 12 ++++++------
 drivers/net/ipa/ipa_endpoint.c | 12 ++++++------
 drivers/net/ipa/ipa_main.c     |  2 +-
 4 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c
index 8184d34124b7..cd5d8045c7e5 100644
--- a/drivers/net/ipa/gsi.c
+++ b/drivers/net/ipa/gsi.c
@@ -834,7 +834,7 @@ int gsi_channel_stop(struct gsi *gsi, u32 channel_id)
 }
 
 /* Reset and reconfigure a channel (possibly leaving doorbell disabled) */
-void gsi_channel_reset(struct gsi *gsi, u32 channel_id, bool db_enable)
+void gsi_channel_reset(struct gsi *gsi, u32 channel_id, bool legacy)
 {
 	struct gsi_channel *channel = &gsi->channel[channel_id];
 
@@ -845,7 +845,7 @@ void gsi_channel_reset(struct gsi *gsi, u32 channel_id, bool db_enable)
 	if (!channel->toward_ipa)
 		gsi_channel_reset_command(channel);
 
-	gsi_channel_program(channel, db_enable);
+	gsi_channel_program(channel, legacy);
 	gsi_channel_trans_cancel_pending(channel);
 
 	mutex_unlock(&gsi->mutex);
@@ -1455,7 +1455,7 @@ static void gsi_evt_ring_teardown(struct gsi *gsi)
 
 /* Setup function for a single channel */
 static int gsi_channel_setup_one(struct gsi *gsi, u32 channel_id,
-				 bool db_enable)
+				 bool legacy)
 {
 	struct gsi_channel *channel = &gsi->channel[channel_id];
 	u32 evt_ring_id = channel->evt_ring_id;
@@ -1474,7 +1474,7 @@ static int gsi_channel_setup_one(struct gsi *gsi, u32 channel_id,
 	if (ret)
 		goto err_evt_ring_de_alloc;
 
-	gsi_channel_program(channel, db_enable);
+	gsi_channel_program(channel, legacy);
 
 	if (channel->toward_ipa)
 		netif_tx_napi_add(&gsi->dummy_dev, &channel->napi,
@@ -1545,7 +1545,7 @@ static void gsi_modem_channel_halt(struct gsi *gsi, u32 channel_id)
 }
 
 /* Setup function for channels */
-static int gsi_channel_setup(struct gsi *gsi, bool db_enable)
+static int gsi_channel_setup(struct gsi *gsi, bool legacy)
 {
 	u32 channel_id = 0;
 	u32 mask;
@@ -1557,7 +1557,7 @@ static int gsi_channel_setup(struct gsi *gsi, bool db_enable)
 	mutex_lock(&gsi->mutex);
 
 	do {
-		ret = gsi_channel_setup_one(gsi, channel_id, db_enable);
+		ret = gsi_channel_setup_one(gsi, channel_id, legacy);
 		if (ret)
 			goto err_unwind;
 	} while (++channel_id < gsi->channel_count);
@@ -1643,7 +1643,7 @@ static void gsi_channel_teardown(struct gsi *gsi)
 }
 
 /* Setup function for GSI.  GSI firmware must be loaded and initialized */
-int gsi_setup(struct gsi *gsi, bool db_enable)
+int gsi_setup(struct gsi *gsi, bool legacy)
 {
 	u32 val;
 
@@ -1686,7 +1686,7 @@ int gsi_setup(struct gsi *gsi, bool db_enable)
 	/* Writing 1 indicates IRQ interrupts; 0 would be MSI */
 	iowrite32(1, gsi->virt + GSI_CNTXT_INTSET_OFFSET);
 
-	return gsi_channel_setup(gsi, db_enable);
+	return gsi_channel_setup(gsi, legacy);
 }
 
 /* Inverse of gsi_setup() */
diff --git a/drivers/net/ipa/gsi.h b/drivers/net/ipa/gsi.h
index 19471017fadf..90a02194e7ad 100644
--- a/drivers/net/ipa/gsi.h
+++ b/drivers/net/ipa/gsi.h
@@ -165,14 +165,14 @@ struct gsi {
 /**
  * gsi_setup() - Set up the GSI subsystem
  * @gsi:	Address of GSI structure embedded in an IPA structure
- * @db_enable:	Whether to use the GSI doorbell engine
+ * @legacy:	Set up for legacy hardware
  *
  * @Return:	0 if successful, or a negative error code
  *
  * Performs initialization that must wait until the GSI hardware is
  * ready (including firmware loaded).
  */
-int gsi_setup(struct gsi *gsi, bool db_enable);
+int gsi_setup(struct gsi *gsi, bool legacy);
 
 /**
  * gsi_teardown() - Tear down GSI subsystem
@@ -220,15 +220,15 @@ int gsi_channel_stop(struct gsi *gsi, u32 channel_id);
  * gsi_channel_reset() - Reset an allocated GSI channel
  * @gsi:	GSI pointer
  * @channel_id:	Channel to be reset
- * @db_enable:	Whether doorbell engine should be enabled
+ * @legacy:	Legacy behavior
  *
- * Reset a channel and reconfigure it.  The @db_enable flag indicates
- * whether the doorbell engine will be enabled following reconfiguration.
+ * Reset a channel and reconfigure it.  The @legacy flag indicates
+ * that some steps should be done differently for legacy hardware.
  *
  * GSI hardware relinquishes ownership of all pending receive buffer
  * transactions and they will complete with their cancelled flag set.
  */
-void gsi_channel_reset(struct gsi *gsi, u32 channel_id, bool db_enable);
+void gsi_channel_reset(struct gsi *gsi, u32 channel_id, bool legacy);
 
 int gsi_channel_suspend(struct gsi *gsi, u32 channel_id, bool stop);
 int gsi_channel_resume(struct gsi *gsi, u32 channel_id, bool start);
diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c
index 6de03be28784..db82ae48e402 100644
--- a/drivers/net/ipa/ipa_endpoint.c
+++ b/drivers/net/ipa/ipa_endpoint.c
@@ -1136,7 +1136,7 @@ static int ipa_endpoint_reset_rx_aggr(struct ipa_endpoint *endpoint)
 	bool endpoint_suspended = false;
 	struct gsi *gsi = &ipa->gsi;
 	dma_addr_t addr;
-	bool db_enable;
+	bool legacy;
 	u32 retries;
 	u32 len = 1;
 	void *virt;
@@ -1200,8 +1200,8 @@ static int ipa_endpoint_reset_rx_aggr(struct ipa_endpoint *endpoint)
 	 * complete the channel reset sequence.  Finish by suspending the
 	 * channel again (if necessary).
 	 */
-	db_enable = ipa->version == IPA_VERSION_3_5_1;
-	gsi_channel_reset(gsi, endpoint->channel_id, db_enable);
+	legacy = ipa->version == IPA_VERSION_3_5_1;
+	gsi_channel_reset(gsi, endpoint->channel_id, legacy);
 
 	msleep(1);
 
@@ -1223,8 +1223,8 @@ static void ipa_endpoint_reset(struct ipa_endpoint *endpoint)
 {
 	u32 channel_id = endpoint->channel_id;
 	struct ipa *ipa = endpoint->ipa;
-	bool db_enable;
 	bool special;
+	bool legacy;
 	int ret = 0;
 
 	/* On IPA v3.5.1, if an RX endpoint is reset while aggregation
@@ -1233,12 +1233,12 @@ static void ipa_endpoint_reset(struct ipa_endpoint *endpoint)
 	 *
 	 * IPA v3.5.1 enables the doorbell engine.  Newer versions do not.
 	 */
-	db_enable = ipa->version == IPA_VERSION_3_5_1;
+	legacy = ipa->version == IPA_VERSION_3_5_1;
 	special = !endpoint->toward_ipa && endpoint->data->aggregation;
 	if (special && ipa_endpoint_aggr_active(endpoint))
 		ret = ipa_endpoint_reset_rx_aggr(endpoint);
 	else
-		gsi_channel_reset(&ipa->gsi, channel_id, db_enable);
+		gsi_channel_reset(&ipa->gsi, channel_id, legacy);
 
 	if (ret)
 		dev_err(&ipa->pdev->dev,
diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c
index 9295a9122e8e..e0b1fe3c34f9 100644
--- a/drivers/net/ipa/ipa_main.c
+++ b/drivers/net/ipa/ipa_main.c
@@ -108,7 +108,7 @@ int ipa_setup(struct ipa *ipa)
 	struct ipa_endpoint *command_endpoint;
 	int ret;
 
-	/* IPA v4.0 and above don't use the doorbell engine. */
+	/* Setup for IPA v3.5.1 has some slight differences */
 	ret = gsi_setup(&ipa->gsi, ipa->version == IPA_VERSION_3_5_1);
 	if (ret)
 		return ret;
-- 
cgit v1.2.3-59-g8ed1b


From a3f2405b0462c31d0fb32ba6da72ede4e63c8a14 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Mon, 4 May 2020 18:30:03 -0500
Subject: net: ipa: only reset channel twice for IPA v3.5.1

In gsi_channel_reset(), RX channels are subjected to two consecutive
CHANNEL_RESET commands.  This workaround should only be used for IPA
version 3.5.1, and for newer hardware "can lead to unwanted behavior."

Only issue the second CHANNEL_RESET command for legacy hardware.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/gsi.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c
index cd5d8045c7e5..8ccbbb920c11 100644
--- a/drivers/net/ipa/gsi.c
+++ b/drivers/net/ipa/gsi.c
@@ -840,9 +840,9 @@ void gsi_channel_reset(struct gsi *gsi, u32 channel_id, bool legacy)
 
 	mutex_lock(&gsi->mutex);
 
-	/* Due to a hardware quirk we need to reset RX channels twice. */
 	gsi_channel_reset_command(channel);
-	if (!channel->toward_ipa)
+	/* Due to a hardware quirk we may need to reset RX channels twice. */
+	if (legacy && !channel->toward_ipa)
 		gsi_channel_reset_command(channel);
 
 	gsi_channel_program(channel, legacy);
-- 
cgit v1.2.3-59-g8ed1b


From 4900bf341d963e9742365cd86bfab78d3d851f39 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Mon, 4 May 2020 18:37:11 -0500
Subject: net: ipa: have ipa_endpoint_init_ctrl() return previous state

Change ipa_endpoint_init_ctrl() so it returns the previous state
(whether suspend or delay mode was enabled) rather than indicating
whether the request caused a change in state.  This makes it easier
to understand what's happening where called.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_endpoint.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c
index db82ae48e402..447165e980ea 100644
--- a/drivers/net/ipa/ipa_endpoint.c
+++ b/drivers/net/ipa/ipa_endpoint.c
@@ -284,11 +284,12 @@ static struct gsi_trans *ipa_endpoint_trans_alloc(struct ipa_endpoint *endpoint,
 /* suspend_delay represents suspend for RX, delay for TX endpoints.
  * Note that suspend is not supported starting with IPA v4.0.
  */
-static int
+static bool
 ipa_endpoint_init_ctrl(struct ipa_endpoint *endpoint, bool suspend_delay)
 {
 	u32 offset = IPA_REG_ENDP_INIT_CTRL_N_OFFSET(endpoint->endpoint_id);
 	struct ipa *ipa = endpoint->ipa;
+	bool state;
 	u32 mask;
 	u32 val;
 
@@ -296,13 +297,14 @@ ipa_endpoint_init_ctrl(struct ipa_endpoint *endpoint, bool suspend_delay)
 	mask = endpoint->toward_ipa ? ENDP_DELAY_FMASK : ENDP_SUSPEND_FMASK;
 
 	val = ioread32(ipa->reg_virt + offset);
-	if (suspend_delay == !!(val & mask))
-		return -EALREADY;	/* Already set to desired state */
-
-	val ^= mask;
-	iowrite32(val, ipa->reg_virt + offset);
+	/* Don't bother if it's already in the requested state */
+	state = !!(val & mask);
+	if (suspend_delay != state) {
+		val ^= mask;
+		iowrite32(val, ipa->reg_virt + offset);
+	}
 
-	return 0;
+	return state;
 }
 
 /* Enable or disable delay or suspend mode on all modem endpoints */
@@ -1164,8 +1166,7 @@ static int ipa_endpoint_reset_rx_aggr(struct ipa_endpoint *endpoint)
 
 	/* Make sure the channel isn't suspended */
 	if (endpoint->ipa->version == IPA_VERSION_3_5_1)
-		if (!ipa_endpoint_init_ctrl(endpoint, false))
-			endpoint_suspended = true;
+		endpoint_suspended = ipa_endpoint_init_ctrl(endpoint, false);
 
 	/* Start channel and do a 1 byte read */
 	ret = gsi_channel_start(gsi, endpoint->channel_id);
@@ -1318,21 +1319,20 @@ static void ipa_endpoint_program(struct ipa_endpoint *endpoint)
 	if (endpoint->toward_ipa) {
 		bool delay_mode = endpoint->data->tx.delay;
 
-		ret = ipa_endpoint_init_ctrl(endpoint, delay_mode);
 		/* Endpoint is expected to not be in delay mode */
-		if (!ret != delay_mode) {
+		if (ipa_endpoint_init_ctrl(endpoint, delay_mode))
 			dev_warn(dev,
 				"TX endpoint %u was %sin delay mode\n",
 				endpoint->endpoint_id,
 				delay_mode ? "already " : "");
-		}
 		ipa_endpoint_init_hdr_ext(endpoint);
 		ipa_endpoint_init_aggr(endpoint);
 		ipa_endpoint_init_deaggr(endpoint);
 		ipa_endpoint_init_seq(endpoint);
 	} else {
+		/* Endpoint is expected to not be suspended */
 		if (endpoint->ipa->version == IPA_VERSION_3_5_1) {
-			if (!ipa_endpoint_init_ctrl(endpoint, false))
+			if (ipa_endpoint_init_ctrl(endpoint, false))
 				dev_warn(dev,
 					"RX endpoint %u was suspended\n",
 					endpoint->endpoint_id);
@@ -1471,7 +1471,7 @@ void ipa_endpoint_resume_one(struct ipa_endpoint *endpoint)
 	/* IPA v3.5.1 doesn't use channel start for resume */
 	start_channel = endpoint->ipa->version != IPA_VERSION_3_5_1;
 	if (!endpoint->toward_ipa && !start_channel)
-		WARN_ON(ipa_endpoint_init_ctrl(endpoint, false));
+		WARN_ON(!ipa_endpoint_init_ctrl(endpoint, false));
 
 	ret = gsi_channel_resume(gsi, endpoint->channel_id, start_channel);
 	if (ret)
-- 
cgit v1.2.3-59-g8ed1b


From 4fa95248798ed540701e8ce987fb2a6742818f61 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Mon, 4 May 2020 18:37:12 -0500
Subject: net: ipa: introduce ipa_endpoint_program_suspend()

Create a new helper function that encapsulates enabling or disabling
suspend on an RX endpoint.  It returns the previous state of the
endpoint (true means suspend mode was enabled).

Create another function that handles enabling or disabling delay mode
on a TX endpoint.  Delay mode does not work correctly on IPA version
4.2, so we don't currently use it (and shouldn't).

We only set delay mode in one case, and although we don't expect an
endpoint to already be in delay mode, it doesn't really matter if it
was.  So the delay function doesn't return a value.

Stop issuing warnings if the previous suspend or delay mode state
differs from what is expected.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_endpoint.c | 67 ++++++++++++++++++++++++++----------------
 1 file changed, 41 insertions(+), 26 deletions(-)

diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c
index 447165e980ea..81bf41ecd3f6 100644
--- a/drivers/net/ipa/ipa_endpoint.c
+++ b/drivers/net/ipa/ipa_endpoint.c
@@ -293,7 +293,14 @@ ipa_endpoint_init_ctrl(struct ipa_endpoint *endpoint, bool suspend_delay)
 	u32 mask;
 	u32 val;
 
-	/* assert(ipa->version == IPA_VERSION_3_5_1 */
+	/* Suspend is not supported for IPA v4.0+.  Delay doesn't work
+	 * correctly on IPA v4.2.
+	 *
+	 * if (endpoint->toward_ipa)
+	 * 	assert(ipa->version != IPA_VERSION_4.2);
+	 * else
+	 * 	assert(ipa->version == IPA_VERSION_3_5_1);
+	 */
 	mask = endpoint->toward_ipa ? ENDP_DELAY_FMASK : ENDP_SUSPEND_FMASK;
 
 	val = ioread32(ipa->reg_virt + offset);
@@ -307,13 +314,31 @@ ipa_endpoint_init_ctrl(struct ipa_endpoint *endpoint, bool suspend_delay)
 	return state;
 }
 
+/* We currently don't care what the previous state was for delay mode */
+static void
+ipa_endpoint_program_delay(struct ipa_endpoint *endpoint, bool enable)
+{
+	/* assert(endpoint->toward_ipa); */
+
+	(void)ipa_endpoint_init_ctrl(endpoint, enable);
+}
+
+/* Returns previous suspend state (true means it was enabled) */
+static bool
+ipa_endpoint_program_suspend(struct ipa_endpoint *endpoint, bool enable)
+{
+	/* assert(!endpoint->toward_ipa); */
+
+	return ipa_endpoint_init_ctrl(endpoint, enable);
+}
+
 /* Enable or disable delay or suspend mode on all modem endpoints */
 void ipa_endpoint_modem_pause_all(struct ipa *ipa, bool enable)
 {
 	bool support_suspend;
 	u32 endpoint_id;
 
-	/* DELAY mode doesn't work right on IPA v4.2 */
+	/* DELAY mode doesn't work correctly on IPA v4.2 */
 	if (ipa->version == IPA_VERSION_4_2)
 		return;
 
@@ -327,8 +352,10 @@ void ipa_endpoint_modem_pause_all(struct ipa *ipa, bool enable)
 			continue;
 
 		/* Set TX delay mode, or for IPA v3.5.1 RX suspend mode */
-		if (endpoint->toward_ipa || support_suspend)
-			(void)ipa_endpoint_init_ctrl(endpoint, enable);
+		if (endpoint->toward_ipa)
+			ipa_endpoint_program_delay(endpoint, enable);
+		else if (support_suspend)
+			(void)ipa_endpoint_program_suspend(endpoint, enable);
 	}
 }
 
@@ -1135,8 +1162,8 @@ static int ipa_endpoint_reset_rx_aggr(struct ipa_endpoint *endpoint)
 {
 	struct device *dev = &endpoint->ipa->pdev->dev;
 	struct ipa *ipa = endpoint->ipa;
-	bool endpoint_suspended = false;
 	struct gsi *gsi = &ipa->gsi;
+	bool suspended = false;
 	dma_addr_t addr;
 	bool legacy;
 	u32 retries;
@@ -1166,7 +1193,7 @@ static int ipa_endpoint_reset_rx_aggr(struct ipa_endpoint *endpoint)
 
 	/* Make sure the channel isn't suspended */
 	if (endpoint->ipa->version == IPA_VERSION_3_5_1)
-		endpoint_suspended = ipa_endpoint_init_ctrl(endpoint, false);
+		suspended = ipa_endpoint_program_suspend(endpoint, false);
 
 	/* Start channel and do a 1 byte read */
 	ret = gsi_channel_start(gsi, endpoint->channel_id);
@@ -1211,8 +1238,8 @@ static int ipa_endpoint_reset_rx_aggr(struct ipa_endpoint *endpoint)
 err_endpoint_stop:
 	ipa_endpoint_stop(endpoint);
 out_suspend_again:
-	if (endpoint_suspended)
-		(void)ipa_endpoint_init_ctrl(endpoint, true);
+	if (suspended)
+		(void)ipa_endpoint_program_suspend(endpoint, true);
 	dma_unmap_single(dev, addr, len, DMA_FROM_DEVICE);
 out_kfree:
 	kfree(virt);
@@ -1313,30 +1340,18 @@ int ipa_endpoint_stop(struct ipa_endpoint *endpoint)
 
 static void ipa_endpoint_program(struct ipa_endpoint *endpoint)
 {
-	struct device *dev = &endpoint->ipa->pdev->dev;
-	int ret;
-
 	if (endpoint->toward_ipa) {
 		bool delay_mode = endpoint->data->tx.delay;
 
-		/* Endpoint is expected to not be in delay mode */
-		if (ipa_endpoint_init_ctrl(endpoint, delay_mode))
-			dev_warn(dev,
-				"TX endpoint %u was %sin delay mode\n",
-				endpoint->endpoint_id,
-				delay_mode ? "already " : "");
+		if (endpoint->ipa->version != IPA_VERSION_4_2)
+			ipa_endpoint_program_delay(endpoint, delay_mode);
 		ipa_endpoint_init_hdr_ext(endpoint);
 		ipa_endpoint_init_aggr(endpoint);
 		ipa_endpoint_init_deaggr(endpoint);
 		ipa_endpoint_init_seq(endpoint);
 	} else {
-		/* Endpoint is expected to not be suspended */
-		if (endpoint->ipa->version == IPA_VERSION_3_5_1) {
-			if (ipa_endpoint_init_ctrl(endpoint, false))
-				dev_warn(dev,
-					"RX endpoint %u was suspended\n",
-					endpoint->endpoint_id);
-		}
+		if (endpoint->ipa->version == IPA_VERSION_3_5_1)
+			(void)ipa_endpoint_program_suspend(endpoint, false);
 		ipa_endpoint_init_hdr_ext(endpoint);
 		ipa_endpoint_init_aggr(endpoint);
 	}
@@ -1448,7 +1463,7 @@ void ipa_endpoint_suspend_one(struct ipa_endpoint *endpoint)
 		 * aggregation frame, then simulating the arrival of such
 		 * an interrupt.
 		 */
-		WARN_ON(ipa_endpoint_init_ctrl(endpoint, true));
+		(void)ipa_endpoint_program_suspend(endpoint, true);
 		ipa_endpoint_suspend_aggr(endpoint);
 	}
 
@@ -1471,7 +1486,7 @@ void ipa_endpoint_resume_one(struct ipa_endpoint *endpoint)
 	/* IPA v3.5.1 doesn't use channel start for resume */
 	start_channel = endpoint->ipa->version != IPA_VERSION_3_5_1;
 	if (!endpoint->toward_ipa && !start_channel)
-		WARN_ON(!ipa_endpoint_init_ctrl(endpoint, false));
+		(void)ipa_endpoint_program_suspend(endpoint, false);
 
 	ret = gsi_channel_resume(gsi, endpoint->channel_id, start_channel);
 	if (ret)
-- 
cgit v1.2.3-59-g8ed1b


From a4dcad344687abce72e2dea56e91d8c715407b6f Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Mon, 4 May 2020 18:37:13 -0500
Subject: net: ipa: remove endpoint delay mode feature

A "delay mode" feature was put in place to work around a problem
that was observed during development of the upstream IPA driver.  It
used TX endpoint "delay mode" in order to prevent transmitting
packets toward the modem before it was ready.

A race condition that would explain the problem has long since been
fixed, and we have concluded that the "delay mode" feature is no
longer required.  So get rid of it.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_data-sdm845.c | 1 -
 drivers/net/ipa/ipa_data.h        | 6 ------
 drivers/net/ipa/ipa_endpoint.c    | 4 +---
 3 files changed, 1 insertion(+), 10 deletions(-)

diff --git a/drivers/net/ipa/ipa_data-sdm845.c b/drivers/net/ipa/ipa_data-sdm845.c
index f7ba85717edf..52d4b84e0dac 100644
--- a/drivers/net/ipa/ipa_data-sdm845.c
+++ b/drivers/net/ipa/ipa_data-sdm845.c
@@ -74,7 +74,6 @@ static const struct ipa_gsi_endpoint_data ipa_gsi_endpoint_data[] = {
 				.tx = {
 					.status_endpoint =
 						IPA_ENDPOINT_MODEM_AP_RX,
-					.delay	= true,
 				},
 			},
 		},
diff --git a/drivers/net/ipa/ipa_data.h b/drivers/net/ipa/ipa_data.h
index 16dfd74717b1..7fc1058a5ca9 100644
--- a/drivers/net/ipa/ipa_data.h
+++ b/drivers/net/ipa/ipa_data.h
@@ -80,18 +80,12 @@ struct gsi_channel_data {
 /**
  * struct ipa_endpoint_tx_data - configuration data for TX endpoints
  * @status_endpoint:	endpoint to which status elements are sent
- * @delay:		whether endpoint starts in delay mode
- *
- * Delay mode prevents a TX endpoint from transmitting anything, even if
- * commands have been presented to the hardware.  Once the endpoint exits
- * delay mode, queued transfer commands are sent.
  *
  * The @status_endpoint is only valid if the endpoint's @status_enable
  * flag is set.
  */
 struct ipa_endpoint_tx_data {
 	enum ipa_endpoint_name status_endpoint;
-	bool delay;
 };
 
 /**
diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c
index 81bf41ecd3f6..dec1dc8618ed 100644
--- a/drivers/net/ipa/ipa_endpoint.c
+++ b/drivers/net/ipa/ipa_endpoint.c
@@ -1341,10 +1341,8 @@ int ipa_endpoint_stop(struct ipa_endpoint *endpoint)
 static void ipa_endpoint_program(struct ipa_endpoint *endpoint)
 {
 	if (endpoint->toward_ipa) {
-		bool delay_mode = endpoint->data->tx.delay;
-
 		if (endpoint->ipa->version != IPA_VERSION_4_2)
-			ipa_endpoint_program_delay(endpoint, delay_mode);
+			ipa_endpoint_program_delay(endpoint, false);
 		ipa_endpoint_init_hdr_ext(endpoint);
 		ipa_endpoint_init_aggr(endpoint);
 		ipa_endpoint_init_deaggr(endpoint);
-- 
cgit v1.2.3-59-g8ed1b


From 97e4692dda26d047899a3f1f3f57c3d861b6b9c9 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Mon, 4 May 2020 18:53:41 -0500
Subject: net: ipa: fix a bug in ipa_endpoint_stop()

In ipa_endpoint_stop(), for TX endpoints we set the number of retries
to 0.  When we break out of the loop, retries being 0 means we return
EIO rather than the value of ret (which should be 0).

Fix this by using a non-zero retry count for both RX and TX
channels, and just break out of the loop after calling
gsi_channel_stop() for TX channels.  This way only RX channels
will retry, and the retry count will be non-zero at the end
for TX channels (so the proper value gets returned).

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
(cherry picked from commit 713b6ebb4c376b3fb65fdceb3b59e401c93248f9)
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_endpoint.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c
index dec1dc8618ed..de5b1e42d1ed 100644
--- a/drivers/net/ipa/ipa_endpoint.c
+++ b/drivers/net/ipa/ipa_endpoint.c
@@ -1311,7 +1311,7 @@ static int ipa_endpoint_stop_rx_dma(struct ipa *ipa)
  */
 int ipa_endpoint_stop(struct ipa_endpoint *endpoint)
 {
-	u32 retries = endpoint->toward_ipa ? 0 : IPA_ENDPOINT_STOP_RX_RETRIES;
+	u32 retries = IPA_ENDPOINT_STOP_RX_RETRIES;
 	int ret;
 
 	do {
@@ -1319,12 +1319,9 @@ int ipa_endpoint_stop(struct ipa_endpoint *endpoint)
 		struct gsi *gsi = &ipa->gsi;
 
 		ret = gsi_channel_stop(gsi, endpoint->channel_id);
-		if (ret != -EAGAIN)
+		if (ret != -EAGAIN || endpoint->toward_ipa)
 			break;
 
-		if (endpoint->toward_ipa)
-			continue;
-
 		/* For IPA v3.5.1, send a DMA read task and check again */
 		if (ipa->version == IPA_VERSION_3_5_1) {
 			ret = ipa_endpoint_stop_rx_dma(ipa);
-- 
cgit v1.2.3-59-g8ed1b


From c72ddf0dbf2d9d80afd37e28f0a52e728e6db159 Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Mon, 4 May 2020 18:53:42 -0500
Subject: net: ipa: get rid of workaround in ipa_endpoint_stop()

In ipa_endpoint_stop(), a workaround is used for IPA version 3.5.1
where a 1-byte DMA request is issued between GSI channel stop
retries.

It turns out that this workaround is only required for IPA versions
3.1 and 3.2, and we don't support those.  So remove the call to
ipa_endpoint_stop_rx_dma() in that function.  That leaves that
function unused, so get rid of it.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_endpoint.c | 39 +--------------------------------------
 1 file changed, 1 insertion(+), 38 deletions(-)

diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c
index de5b1e42d1ed..7176ae892e75 100644
--- a/drivers/net/ipa/ipa_endpoint.c
+++ b/drivers/net/ipa/ipa_endpoint.c
@@ -33,13 +33,10 @@
 #define IPA_RX_BUFFER_OVERHEAD	(PAGE_SIZE - SKB_MAX_ORDER(NET_SKB_PAD, 0))
 
 #define IPA_ENDPOINT_STOP_RX_RETRIES		10
-#define IPA_ENDPOINT_STOP_RX_SIZE		1	/* bytes */
 
 #define IPA_ENDPOINT_RESET_AGGR_RETRY_MAX	3
 #define IPA_AGGR_TIME_LIMIT_DEFAULT		1000	/* microseconds */
 
-#define ENDPOINT_STOP_DMA_TIMEOUT		15	/* milliseconds */
-
 /** enum ipa_status_opcode - status element opcode hardware values */
 enum ipa_status_opcode {
 	IPA_STATUS_OPCODE_PACKET		= 0x01,
@@ -1274,32 +1271,6 @@ static void ipa_endpoint_reset(struct ipa_endpoint *endpoint)
 			ret, endpoint->channel_id, endpoint->endpoint_id);
 }
 
-static int ipa_endpoint_stop_rx_dma(struct ipa *ipa)
-{
-	u16 size = IPA_ENDPOINT_STOP_RX_SIZE;
-	struct gsi_trans *trans;
-	dma_addr_t addr;
-	int ret;
-
-	trans = ipa_cmd_trans_alloc(ipa, 1);
-	if (!trans) {
-		dev_err(&ipa->pdev->dev,
-			"no transaction for RX endpoint STOP workaround\n");
-		return -EBUSY;
-	}
-
-	/* Read into the highest part of the zero memory area */
-	addr = ipa->zero_addr + ipa->zero_size - size;
-
-	ipa_cmd_dma_task_32b_addr_add(trans, size, addr, false);
-
-	ret = gsi_trans_commit_wait_timeout(trans, ENDPOINT_STOP_DMA_TIMEOUT);
-	if (ret)
-		gsi_trans_free(trans);
-
-	return ret;
-}
-
 /**
  * ipa_endpoint_stop() - Stops a GSI channel in IPA
  * @client:	Client whose endpoint should be stopped
@@ -1315,20 +1286,12 @@ int ipa_endpoint_stop(struct ipa_endpoint *endpoint)
 	int ret;
 
 	do {
-		struct ipa *ipa = endpoint->ipa;
-		struct gsi *gsi = &ipa->gsi;
+		struct gsi *gsi = &endpoint->ipa->gsi;
 
 		ret = gsi_channel_stop(gsi, endpoint->channel_id);
 		if (ret != -EAGAIN || endpoint->toward_ipa)
 			break;
 
-		/* For IPA v3.5.1, send a DMA read task and check again */
-		if (ipa->version == IPA_VERSION_3_5_1) {
-			ret = ipa_endpoint_stop_rx_dma(ipa);
-			if (ret)
-				break;
-		}
-
 		msleep(1);
 	} while (retries--);
 
-- 
cgit v1.2.3-59-g8ed1b


From 9928fcc76f7bb5c93d512767b039e8257ab3093e Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Mon, 4 May 2020 18:53:43 -0500
Subject: net: ipa: don't retry in ipa_endpoint_stop()

The only reason ipa_endpoint_stop() had a retry loop was that the
just-removed workaround required an IPA DMA command to occur between
attempts.  The gsi_channel_stop() call that implements the stop does
its own retry loop, to cover a channel's transition from started to
stop-in-progress to stopped state.

Get rid of the unnecessary retry loop in ipa_endpoint_stop().

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_endpoint.c | 17 ++---------------
 1 file changed, 2 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c
index 7176ae892e75..68ba33ec7ce9 100644
--- a/drivers/net/ipa/ipa_endpoint.c
+++ b/drivers/net/ipa/ipa_endpoint.c
@@ -32,8 +32,6 @@
 /* The amount of RX buffer space consumed by standard skb overhead */
 #define IPA_RX_BUFFER_OVERHEAD	(PAGE_SIZE - SKB_MAX_ORDER(NET_SKB_PAD, 0))
 
-#define IPA_ENDPOINT_STOP_RX_RETRIES		10
-
 #define IPA_ENDPOINT_RESET_AGGR_RETRY_MAX	3
 #define IPA_AGGR_TIME_LIMIT_DEFAULT		1000	/* microseconds */
 
@@ -1282,20 +1280,9 @@ static void ipa_endpoint_reset(struct ipa_endpoint *endpoint)
  */
 int ipa_endpoint_stop(struct ipa_endpoint *endpoint)
 {
-	u32 retries = IPA_ENDPOINT_STOP_RX_RETRIES;
-	int ret;
-
-	do {
-		struct gsi *gsi = &endpoint->ipa->gsi;
-
-		ret = gsi_channel_stop(gsi, endpoint->channel_id);
-		if (ret != -EAGAIN || endpoint->toward_ipa)
-			break;
-
-		msleep(1);
-	} while (retries--);
+	struct gsi *gsi = &endpoint->ipa->gsi;
 
-	return retries ? ret : -EIO;
+	return gsi_channel_stop(gsi, endpoint->channel_id);
 }
 
 static void ipa_endpoint_program(struct ipa_endpoint *endpoint)
-- 
cgit v1.2.3-59-g8ed1b


From f30dcb7dcb1aa925dfc83923c580a53c975b754b Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Mon, 4 May 2020 18:53:44 -0500
Subject: net: ipa: kill ipa_endpoint_stop()

The previous commit made ipa_endpoint_stop() be a trivial wrapper
around gsi_channel_stop().  Since it no longer does anything
special, just open-code it in the three places it's used.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_endpoint.c | 27 ++++++---------------------
 drivers/net/ipa/ipa_endpoint.h |  2 --
 2 files changed, 6 insertions(+), 23 deletions(-)

diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c
index 68ba33ec7ce9..82066a223a67 100644
--- a/drivers/net/ipa/ipa_endpoint.c
+++ b/drivers/net/ipa/ipa_endpoint.c
@@ -1214,7 +1214,7 @@ static int ipa_endpoint_reset_rx_aggr(struct ipa_endpoint *endpoint)
 
 	gsi_trans_read_byte_done(gsi, endpoint->channel_id);
 
-	ret = ipa_endpoint_stop(endpoint);
+	ret = gsi_channel_stop(gsi, endpoint->channel_id);
 	if (ret)
 		goto out_suspend_again;
 
@@ -1231,7 +1231,7 @@ static int ipa_endpoint_reset_rx_aggr(struct ipa_endpoint *endpoint)
 	goto out_suspend_again;
 
 err_endpoint_stop:
-	ipa_endpoint_stop(endpoint);
+	(void)gsi_channel_stop(gsi, endpoint->channel_id);
 out_suspend_again:
 	if (suspended)
 		(void)ipa_endpoint_program_suspend(endpoint, true);
@@ -1269,22 +1269,6 @@ static void ipa_endpoint_reset(struct ipa_endpoint *endpoint)
 			ret, endpoint->channel_id, endpoint->endpoint_id);
 }
 
-/**
- * ipa_endpoint_stop() - Stops a GSI channel in IPA
- * @client:	Client whose endpoint should be stopped
- *
- * This function implements the sequence to stop a GSI channel
- * in IPA. This function returns when the channel is is STOP state.
- *
- * Return value: 0 on success, negative otherwise
- */
-int ipa_endpoint_stop(struct ipa_endpoint *endpoint)
-{
-	struct gsi *gsi = &endpoint->ipa->gsi;
-
-	return gsi_channel_stop(gsi, endpoint->channel_id);
-}
-
 static void ipa_endpoint_program(struct ipa_endpoint *endpoint)
 {
 	if (endpoint->toward_ipa) {
@@ -1337,12 +1321,13 @@ void ipa_endpoint_disable_one(struct ipa_endpoint *endpoint)
 {
 	u32 mask = BIT(endpoint->endpoint_id);
 	struct ipa *ipa = endpoint->ipa;
+	struct gsi *gsi = &ipa->gsi;
 	int ret;
 
-	if (!(endpoint->ipa->enabled & mask))
+	if (!(ipa->enabled & mask))
 		return;
 
-	endpoint->ipa->enabled ^= mask;
+	ipa->enabled ^= mask;
 
 	if (!endpoint->toward_ipa) {
 		ipa_endpoint_replenish_disable(endpoint);
@@ -1351,7 +1336,7 @@ void ipa_endpoint_disable_one(struct ipa_endpoint *endpoint)
 	}
 
 	/* Note that if stop fails, the channel's state is not well-defined */
-	ret = ipa_endpoint_stop(endpoint);
+	ret = gsi_channel_stop(gsi, endpoint->channel_id);
 	if (ret)
 		dev_err(&ipa->pdev->dev,
 			"error %d attempting to stop endpoint %u\n", ret,
diff --git a/drivers/net/ipa/ipa_endpoint.h b/drivers/net/ipa/ipa_endpoint.h
index 4b336a1f759d..3b297d65828e 100644
--- a/drivers/net/ipa/ipa_endpoint.h
+++ b/drivers/net/ipa/ipa_endpoint.h
@@ -76,8 +76,6 @@ int ipa_endpoint_modem_exception_reset_all(struct ipa *ipa);
 
 int ipa_endpoint_skb_tx(struct ipa_endpoint *endpoint, struct sk_buff *skb);
 
-int ipa_endpoint_stop(struct ipa_endpoint *endpoint);
-
 void ipa_endpoint_exit_one(struct ipa_endpoint *endpoint);
 
 int ipa_endpoint_enable_one(struct ipa_endpoint *endpoint);
-- 
cgit v1.2.3-59-g8ed1b


From da1a782a7140fab22f2dfe8453d7b73c786d73de Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Mon, 4 May 2020 18:53:45 -0500
Subject: net: ipa: kill ipa_cmd_dma_task_32b_addr_add()

A recent commit removed the only use of ipa_cmd_dma_task_32b_addr_add().
This function (and the IPA immediate command it implements) is no
longer needed, so get rid of it, along with all of the definitions
associated with it.  Isolate its removal in a commit so it can be
easily added back again if needed.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_cmd.c | 59 -----------------------------------------------
 drivers/net/ipa/ipa_cmd.h | 11 ---------
 2 files changed, 70 deletions(-)

diff --git a/drivers/net/ipa/ipa_cmd.c b/drivers/net/ipa/ipa_cmd.c
index d226b858742d..394f8a6df086 100644
--- a/drivers/net/ipa/ipa_cmd.c
+++ b/drivers/net/ipa/ipa_cmd.c
@@ -103,28 +103,6 @@ struct ipa_cmd_ip_packet_init {
 /* Field masks for ipa_cmd_ip_packet_init dest_endpoint field */
 #define IPA_PACKET_INIT_DEST_ENDPOINT_FMASK		GENMASK(4, 0)
 
-/* IPA_CMD_DMA_TASK_32B_ADDR */
-
-/* This opcode gets modified with a DMA operation count */
-
-#define DMA_TASK_32B_ADDR_OPCODE_COUNT_FMASK		GENMASK(15, 8)
-
-struct ipa_cmd_hw_dma_task_32b_addr {
-	__le16 flags;
-	__le16 size;
-	__le32 addr;
-	__le16 packet_size;
-	u8 reserved[6];
-};
-
-/* Field masks for ipa_cmd_hw_dma_task_32b_addr flags field */
-#define DMA_TASK_32B_ADDR_FLAGS_SW_RSVD_FMASK		GENMASK(10, 0)
-#define DMA_TASK_32B_ADDR_FLAGS_CMPLT_FMASK		GENMASK(11, 11)
-#define DMA_TASK_32B_ADDR_FLAGS_EOF_FMASK		GENMASK(12, 12)
-#define DMA_TASK_32B_ADDR_FLAGS_FLSH_FMASK		GENMASK(13, 13)
-#define DMA_TASK_32B_ADDR_FLAGS_LOCK_FMASK		GENMASK(14, 14)
-#define DMA_TASK_32B_ADDR_FLAGS_UNLOCK_FMASK		GENMASK(15, 15)
-
 /* IPA_CMD_DMA_SHARED_MEM */
 
 /* For IPA v4.0+, this opcode gets modified with pipeline clear options */
@@ -163,7 +141,6 @@ union ipa_cmd_payload {
 	struct ipa_cmd_hw_hdr_init_local hdr_init_local;
 	struct ipa_cmd_register_write register_write;
 	struct ipa_cmd_ip_packet_init ip_packet_init;
-	struct ipa_cmd_hw_dma_task_32b_addr dma_task_32b_addr;
 	struct ipa_cmd_hw_dma_mem_mem dma_shared_mem;
 	struct ipa_cmd_ip_packet_tag_status ip_packet_tag_status;
 };
@@ -508,42 +485,6 @@ static void ipa_cmd_ip_packet_init_add(struct gsi_trans *trans, u8 endpoint_id)
 			  direction, opcode);
 }
 
-/* Use a 32-bit DMA command to zero a block of memory */
-void ipa_cmd_dma_task_32b_addr_add(struct gsi_trans *trans, u16 size,
-				   dma_addr_t addr, bool toward_ipa)
-{
-	struct ipa *ipa = container_of(trans->gsi, struct ipa, gsi);
-	enum ipa_cmd_opcode opcode = IPA_CMD_DMA_TASK_32B_ADDR;
-	struct ipa_cmd_hw_dma_task_32b_addr *payload;
-	union ipa_cmd_payload *cmd_payload;
-	enum dma_data_direction direction;
-	dma_addr_t payload_addr;
-	u16 flags;
-
-	/* assert(addr <= U32_MAX); */
-	addr &= GENMASK_ULL(31, 0);
-
-	/* The opcode encodes the number of DMA operations in the high byte */
-	opcode |= u16_encode_bits(1, DMA_TASK_32B_ADDR_OPCODE_COUNT_FMASK);
-
-	direction = toward_ipa ? DMA_TO_DEVICE : DMA_FROM_DEVICE;
-
-	/* complete: 0 = don't interrupt; eof: 0 = don't assert eot */
-	flags = DMA_TASK_32B_ADDR_FLAGS_FLSH_FMASK;
-	/* lock: 0 = don't lock endpoint; unlock: 0 = don't unlock */
-
-	cmd_payload = ipa_cmd_payload_alloc(ipa, &payload_addr);
-	payload = &cmd_payload->dma_task_32b_addr;
-
-	payload->flags = cpu_to_le16(flags);
-	payload->size = cpu_to_le16(size);
-	payload->addr = cpu_to_le32((u32)addr);
-	payload->packet_size = cpu_to_le16(size);
-
-	gsi_trans_cmd_add(trans, payload, sizeof(*payload), payload_addr,
-			  direction, opcode);
-}
-
 /* Use a DMA command to read or write a block of IPA-resident memory */
 void ipa_cmd_dma_shared_mem_add(struct gsi_trans *trans, u32 offset, u16 size,
 				dma_addr_t addr, bool toward_ipa)
diff --git a/drivers/net/ipa/ipa_cmd.h b/drivers/net/ipa/ipa_cmd.h
index 4917525b3a47..e440aa69c8b5 100644
--- a/drivers/net/ipa/ipa_cmd.h
+++ b/drivers/net/ipa/ipa_cmd.h
@@ -35,7 +35,6 @@ enum ipa_cmd_opcode {
 	IPA_CMD_HDR_INIT_LOCAL		= 9,
 	IPA_CMD_REGISTER_WRITE		= 12,
 	IPA_CMD_IP_PACKET_INIT		= 16,
-	IPA_CMD_DMA_TASK_32B_ADDR	= 17,
 	IPA_CMD_DMA_SHARED_MEM		= 19,
 	IPA_CMD_IP_PACKET_TAG_STATUS	= 20,
 };
@@ -147,16 +146,6 @@ void ipa_cmd_hdr_init_local_add(struct gsi_trans *trans, u32 offset, u16 size,
 void ipa_cmd_register_write_add(struct gsi_trans *trans, u32 offset, u32 value,
 				u32 mask, bool clear_full);
 
-/**
- * ipa_cmd_dma_task_32b_addr_add() - Add a 32-bit DMA command to a transaction
- * @trans:	GSi transaction
- * @size:	Number of bytes to be memory to be transferred
- * @addr:	DMA address of buffer to be read into or written from
- * @toward_ipa:	true means write to IPA memory; false means read
- */
-void ipa_cmd_dma_task_32b_addr_add(struct gsi_trans *trans, u16 size,
-				   dma_addr_t addr, bool toward_ipa);
-
 /**
  * ipa_cmd_dma_shared_mem_add() - Add a DMA memory command to a transaction
  * @trans:	GSI transaction
-- 
cgit v1.2.3-59-g8ed1b


From d91a3159e8d9a1bf58c2aaad1ef10a09bc91350c Mon Sep 17 00:00:00 2001
From: Devulapally Shiva Krishna <shiva@chelsio.com>
Date: Tue, 5 May 2020 08:42:53 +0530
Subject: Crypto/chcr: fix gcm-aes and rfc4106-gcm failed tests

This patch fixes two issues observed during self tests with
CONFIG_CRYPTO_MANAGER_EXTRA_TESTS enabled.

1. gcm(aes) hang issue , that happens during decryption.
2. rfc4106-gcm-aes-chcr encryption unexpectedly succeeded.

For gcm-aes decryption , authtag is not mapped due to
sg_nents_for_len(upto size: assoclen+ cryptlen - authsize).
So fix it by dma_mapping authtag.
Also replaced sg_nents() to sg_nents_for_len() in case of aead_dma_unmap().

For rfc4106-gcm-aes-chcr, used crypto_ipsec_check_assoclen() for checking
the validity of assoclen.

Signed-off-by: Ayush Sawal <ayush.sawal@chelsio.com>
Signed-off-by: Devulapally Shiva Krishna <shiva@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/crypto/chelsio/chcr_algo.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c
index c29b80dd30d8..e300eb32a9d3 100644
--- a/drivers/crypto/chelsio/chcr_algo.c
+++ b/drivers/crypto/chelsio/chcr_algo.c
@@ -2556,7 +2556,7 @@ int chcr_aead_dma_map(struct device *dev,
 	int dst_size;
 
 	dst_size = req->assoclen + req->cryptlen + (op_type ?
-				-authsize : authsize);
+				0 : authsize);
 	if (!req->cryptlen || !dst_size)
 		return 0;
 	reqctx->iv_dma = dma_map_single(dev, reqctx->iv, (IV + reqctx->b0_len),
@@ -2603,15 +2603,16 @@ void chcr_aead_dma_unmap(struct device *dev,
 	int dst_size;
 
 	dst_size = req->assoclen + req->cryptlen + (op_type ?
-					-authsize : authsize);
+					0 : authsize);
 	if (!req->cryptlen || !dst_size)
 		return;
 
 	dma_unmap_single(dev, reqctx->iv_dma, (IV + reqctx->b0_len),
 					DMA_BIDIRECTIONAL);
 	if (req->src == req->dst) {
-		dma_unmap_sg(dev, req->src, sg_nents(req->src),
-				   DMA_BIDIRECTIONAL);
+		dma_unmap_sg(dev, req->src,
+			     sg_nents_for_len(req->src, dst_size),
+			     DMA_BIDIRECTIONAL);
 	} else {
 		dma_unmap_sg(dev, req->src, sg_nents(req->src),
 				   DMA_TO_DEVICE);
@@ -3702,6 +3703,13 @@ static int chcr_aead_op(struct aead_request *req,
 			return -ENOSPC;
 	}
 
+	if (get_aead_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_AEAD_RFC4106 &&
+	    crypto_ipsec_check_assoclen(req->assoclen) != 0) {
+		pr_err("RFC4106: Invalid value of assoclen %d\n",
+		       req->assoclen);
+		return -EINVAL;
+	}
+
 	/* Form a WR from req */
 	skb = create_wr_fn(req, u_ctx->lldi.rxq_ids[reqctx->rxqidx], size);
 
-- 
cgit v1.2.3-59-g8ed1b


From 6b363a286cd01961423f5dcd648b265088ec56d0 Mon Sep 17 00:00:00 2001
From: Devulapally Shiva Krishna <shiva@chelsio.com>
Date: Tue, 5 May 2020 08:42:54 +0530
Subject: Crypto/chcr: fix ctr, cbc, xts and rfc3686-ctr failed tests

This solves the following issues observed during self test when
CONFIG_CRYPTO_MANAGER_EXTRA_TESTS is enabled.

1. Added fallback for cbc, ctr and rfc3686 if req->nbytes is zero
and for xts added a fallback case if req->nbytes is not multiple of 16.

2. In case of cbc-aes, solved wrong iv update. When
chcr_cipher_fallback() is called, used req->info pointer instead of
reqctx->iv.

3. In cbc-aes decryption there was a wrong result. This occurs when
chcr_cipher_fallback() is called from chcr_handle_cipher_resp().
In the fallback function iv(req->info) used is wrongly updated.
So use the initial iv for this case.

4)In case of ctr-aes encryption observed wrong result. In adjust_ctr_overflow()
there is condition which checks if ((bytes / AES_BLOCK_SIZE) > c),
where c is the number of blocks which can be processed without iv overflow,
but for the above bytes (req->nbytes < 32 , not a multiple of 16) this
condition fails and the 2nd block is corrupted as it requires the rollover iv.
So added a '=' condition in this to take care of this.

5)In rfc3686-ctr there was wrong result observed. This occurs when
chcr_cipher_fallback() is called from chcr_handle_cipher_resp().
Here also copying initial_iv in init_iv pointer for handling the fallback
case correctly.

Signed-off-by: Ayush Sawal <ayush.sawal@chelsio.com>
Signed-off-by: Devulapally Shiva Krishna <shiva@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/crypto/chelsio/chcr_algo.c   | 42 ++++++++++++++++++++++++------------
 drivers/crypto/chelsio/chcr_crypto.h |  1 +
 2 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c
index e300eb32a9d3..51adba5685a4 100644
--- a/drivers/crypto/chelsio/chcr_algo.c
+++ b/drivers/crypto/chelsio/chcr_algo.c
@@ -1054,8 +1054,8 @@ static unsigned int adjust_ctr_overflow(u8 *iv, u32 bytes)
 	u32 temp = be32_to_cpu(*--b);
 
 	temp = ~temp;
-	c = (u64)temp +  1; // No of block can processed withou overflow
-	if ((bytes / AES_BLOCK_SIZE) > c)
+	c = (u64)temp +  1; // No of block can processed without overflow
+	if ((bytes / AES_BLOCK_SIZE) >= c)
 		bytes = c * AES_BLOCK_SIZE;
 	return bytes;
 }
@@ -1158,15 +1158,16 @@ static int chcr_final_cipher_iv(struct skcipher_request *req,
 static int chcr_handle_cipher_resp(struct skcipher_request *req,
 				   unsigned char *input, int err)
 {
+	struct chcr_skcipher_req_ctx *reqctx = skcipher_request_ctx(req);
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
-	struct chcr_context *ctx = c_ctx(tfm);
-	struct uld_ctx *u_ctx = ULD_CTX(c_ctx(tfm));
-	struct ablk_ctx *ablkctx = ABLK_CTX(c_ctx(tfm));
-	struct sk_buff *skb;
 	struct cpl_fw6_pld *fw6_pld = (struct cpl_fw6_pld *)input;
-	struct chcr_skcipher_req_ctx *reqctx = skcipher_request_ctx(req);
-	struct cipher_wr_param wrparam;
+	struct ablk_ctx *ablkctx = ABLK_CTX(c_ctx(tfm));
+	struct uld_ctx *u_ctx = ULD_CTX(c_ctx(tfm));
 	struct chcr_dev *dev = c_ctx(tfm)->dev;
+	struct chcr_context *ctx = c_ctx(tfm);
+	struct adapter *adap = padap(ctx->dev);
+	struct cipher_wr_param wrparam;
+	struct sk_buff *skb;
 	int bytes;
 
 	if (err)
@@ -1197,6 +1198,8 @@ static int chcr_handle_cipher_resp(struct skcipher_request *req,
 	if (unlikely(bytes == 0)) {
 		chcr_cipher_dma_unmap(&ULD_CTX(c_ctx(tfm))->lldi.pdev->dev,
 				      req);
+		memcpy(req->iv, reqctx->init_iv, IV);
+		atomic_inc(&adap->chcr_stats.fallback);
 		err = chcr_cipher_fallback(ablkctx->sw_cipher,
 				     req->base.flags,
 				     req->src,
@@ -1248,20 +1251,28 @@ static int process_cipher(struct skcipher_request *req,
 				  struct sk_buff **skb,
 				  unsigned short op_type)
 {
+	struct chcr_skcipher_req_ctx *reqctx = skcipher_request_ctx(req);
 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
 	unsigned int ivsize = crypto_skcipher_ivsize(tfm);
-	struct chcr_skcipher_req_ctx *reqctx = skcipher_request_ctx(req);
 	struct ablk_ctx *ablkctx = ABLK_CTX(c_ctx(tfm));
+	struct adapter *adap = padap(c_ctx(tfm)->dev);
 	struct	cipher_wr_param wrparam;
 	int bytes, err = -EINVAL;
+	int subtype;
 
 	reqctx->processed = 0;
 	reqctx->partial_req = 0;
 	if (!req->iv)
 		goto error;
+	subtype = get_cryptoalg_subtype(tfm);
 	if ((ablkctx->enckey_len == 0) || (ivsize > AES_BLOCK_SIZE) ||
 	    (req->cryptlen == 0) ||
 	    (req->cryptlen % crypto_skcipher_blocksize(tfm))) {
+		if (req->cryptlen == 0 && subtype != CRYPTO_ALG_SUB_TYPE_XTS)
+			goto fallback;
+		else if (req->cryptlen % crypto_skcipher_blocksize(tfm) &&
+			 subtype == CRYPTO_ALG_SUB_TYPE_XTS)
+			goto fallback;
 		pr_err("AES: Invalid value of Key Len %d nbytes %d IV Len %d\n",
 		       ablkctx->enckey_len, req->cryptlen, ivsize);
 		goto error;
@@ -1302,12 +1313,10 @@ static int process_cipher(struct skcipher_request *req,
 	} else {
 		bytes = req->cryptlen;
 	}
-	if (get_cryptoalg_subtype(tfm) ==
-	    CRYPTO_ALG_SUB_TYPE_CTR) {
+	if (subtype == CRYPTO_ALG_SUB_TYPE_CTR) {
 		bytes = adjust_ctr_overflow(req->iv, bytes);
 	}
-	if (get_cryptoalg_subtype(tfm) ==
-	    CRYPTO_ALG_SUB_TYPE_CTR_RFC3686) {
+	if (subtype == CRYPTO_ALG_SUB_TYPE_CTR_RFC3686) {
 		memcpy(reqctx->iv, ablkctx->nonce, CTR_RFC3686_NONCE_SIZE);
 		memcpy(reqctx->iv + CTR_RFC3686_NONCE_SIZE, req->iv,
 				CTR_RFC3686_IV_SIZE);
@@ -1315,20 +1324,25 @@ static int process_cipher(struct skcipher_request *req,
 		/* initialize counter portion of counter block */
 		*(__be32 *)(reqctx->iv + CTR_RFC3686_NONCE_SIZE +
 			CTR_RFC3686_IV_SIZE) = cpu_to_be32(1);
+		memcpy(reqctx->init_iv, reqctx->iv, IV);
 
 	} else {
 
 		memcpy(reqctx->iv, req->iv, IV);
+		memcpy(reqctx->init_iv, req->iv, IV);
 	}
 	if (unlikely(bytes == 0)) {
 		chcr_cipher_dma_unmap(&ULD_CTX(c_ctx(tfm))->lldi.pdev->dev,
 				      req);
+fallback:       atomic_inc(&adap->chcr_stats.fallback);
 		err = chcr_cipher_fallback(ablkctx->sw_cipher,
 					   req->base.flags,
 					   req->src,
 					   req->dst,
 					   req->cryptlen,
-					   reqctx->iv,
+					   subtype ==
+					   CRYPTO_ALG_SUB_TYPE_CTR_RFC3686 ?
+					   reqctx->iv : req->iv,
 					   op_type);
 		goto error;
 	}
diff --git a/drivers/crypto/chelsio/chcr_crypto.h b/drivers/crypto/chelsio/chcr_crypto.h
index 542bebae001f..b3fdbdc25acb 100644
--- a/drivers/crypto/chelsio/chcr_crypto.h
+++ b/drivers/crypto/chelsio/chcr_crypto.h
@@ -302,6 +302,7 @@ struct chcr_skcipher_req_ctx {
 	unsigned int op;
 	u16 imm;
 	u8 iv[CHCR_MAX_CRYPTO_IV_LEN];
+	u8 init_iv[CHCR_MAX_CRYPTO_IV_LEN];
 	u16 txqidx;
 	u16 rxqidx;
 };
-- 
cgit v1.2.3-59-g8ed1b


From 10b0c75d7bc19606fa9a62c8ab9180e95c0e0385 Mon Sep 17 00:00:00 2001
From: Devulapally Shiva Krishna <shiva@chelsio.com>
Date: Tue, 5 May 2020 08:42:55 +0530
Subject: Crypto/chcr: fix for ccm(aes) failed test

The ccm(aes) test fails when req->assoclen > ~240bytes.

The problem is the value assigned to auth_offset is wrong.
As auth_offset is unsigned char, it can take max value as 255.
So fix it by making it unsigned int.

Signed-off-by: Ayush Sawal <ayush.sawal@chelsio.com>
Signed-off-by: Devulapally Shiva Krishna <shiva@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/crypto/chelsio/chcr_algo.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c
index 51adba5685a4..6b1a656e0a89 100644
--- a/drivers/crypto/chelsio/chcr_algo.c
+++ b/drivers/crypto/chelsio/chcr_algo.c
@@ -2925,7 +2925,7 @@ static void fill_sec_cpl_for_aead(struct cpl_tx_sec_pdu *sec_cpl,
 	unsigned int mac_mode = CHCR_SCMD_AUTH_MODE_CBCMAC;
 	unsigned int rx_channel_id = reqctx->rxqidx / ctx->rxq_perchan;
 	unsigned int ccm_xtra;
-	unsigned char tag_offset = 0, auth_offset = 0;
+	unsigned int tag_offset = 0, auth_offset = 0;
 	unsigned int assoclen;
 
 	if (get_aead_subtype(tfm) == CRYPTO_ALG_SUB_TYPE_AEAD_RFC4309)
-- 
cgit v1.2.3-59-g8ed1b


From ee91ac1b11e44b38268a2f129c86a8a4ee4c218a Mon Sep 17 00:00:00 2001
From: Devulapally Shiva Krishna <shiva@chelsio.com>
Date: Tue, 5 May 2020 08:42:56 +0530
Subject: Crypto/chcr: support for 48 byte key_len in aes-xts

Added support for 48 byte key length for aes-xts.

Signed-off-by: Ayush Sawal <ayush.sawal@chelsio.com>
Signed-off-by: Devulapally Shiva Krishna <shiva@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/crypto/chelsio/chcr_algo.c | 27 +++++++++++++++++++++++++--
 1 file changed, 25 insertions(+), 2 deletions(-)

diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c
index 6b1a656e0a89..0d25af42cadb 100644
--- a/drivers/crypto/chelsio/chcr_algo.c
+++ b/drivers/crypto/chelsio/chcr_algo.c
@@ -1077,7 +1077,14 @@ static int chcr_update_tweak(struct skcipher_request *req, u8 *iv,
 
 	keylen = ablkctx->enckey_len / 2;
 	key = ablkctx->key + keylen;
-	ret = aes_expandkey(&aes, key, keylen);
+	/* For a 192 bit key remove the padded zeroes which was
+	 * added in chcr_xts_setkey
+	 */
+	if (KEY_CONTEXT_CK_SIZE_G(ntohl(ablkctx->key_ctx_hdr))
+			== CHCR_KEYCTX_CIPHER_KEY_SIZE_192)
+		ret = aes_expandkey(&aes, key, keylen - 8);
+	else
+		ret = aes_expandkey(&aes, key, keylen);
 	if (ret)
 		return ret;
 	aes_encrypt(&aes, iv, iv);
@@ -2264,12 +2271,28 @@ static int chcr_aes_xts_setkey(struct crypto_skcipher *cipher, const u8 *key,
 	ablkctx->enckey_len = key_len;
 	get_aes_decrypt_key(ablkctx->rrkey, ablkctx->key, key_len << 2);
 	context_size = (KEY_CONTEXT_HDR_SALT_AND_PAD + key_len) >> 4;
-	ablkctx->key_ctx_hdr =
+	/* Both keys for xts must be aligned to 16 byte boundary
+	 * by padding with zeros. So for 24 byte keys padding 8 zeroes.
+	 */
+	if (key_len == 48) {
+		context_size = (KEY_CONTEXT_HDR_SALT_AND_PAD + key_len
+				+ 16) >> 4;
+		memmove(ablkctx->key + 32, ablkctx->key + 24, 24);
+		memset(ablkctx->key + 24, 0, 8);
+		memset(ablkctx->key + 56, 0, 8);
+		ablkctx->enckey_len = 64;
+		ablkctx->key_ctx_hdr =
+			FILL_KEY_CTX_HDR(CHCR_KEYCTX_CIPHER_KEY_SIZE_192,
+					 CHCR_KEYCTX_NO_KEY, 1,
+					 0, context_size);
+	} else {
+		ablkctx->key_ctx_hdr =
 		FILL_KEY_CTX_HDR((key_len == AES_KEYSIZE_256) ?
 				 CHCR_KEYCTX_CIPHER_KEY_SIZE_128 :
 				 CHCR_KEYCTX_CIPHER_KEY_SIZE_256,
 				 CHCR_KEYCTX_NO_KEY, 1,
 				 0, context_size);
+	}
 	ablkctx->ciph_mode = CHCR_SCMD_CIPHER_MODE_AES_XTS;
 	return 0;
 badkey_err:
-- 
cgit v1.2.3-59-g8ed1b


From 02f58e5bf2e98bf7543019d7bba21f83ff8abb13 Mon Sep 17 00:00:00 2001
From: Devulapally Shiva Krishna <shiva@chelsio.com>
Date: Tue, 5 May 2020 08:42:57 +0530
Subject: Crypto/chcr: fix for hmac(sha) test fails

The hmac(sha) test fails for a zero length source text data.
For hmac(sha) minimum length of the data must be of block-size.
So fix this by including the data_len for the last block.

Signed-off-by: Ayush Sawal <ayush.sawal@chelsio.com>
Signed-off-by: Devulapally Shiva Krishna <shiva@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/crypto/chelsio/chcr_algo.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c
index 0d25af42cadb..b8c1c4dd3ef0 100644
--- a/drivers/crypto/chelsio/chcr_algo.c
+++ b/drivers/crypto/chelsio/chcr_algo.c
@@ -2005,7 +2005,7 @@ static int chcr_ahash_digest(struct ahash_request *req)
 	req_ctx->data_len += params.bfr_len + params.sg_len;
 
 	if (req->nbytes == 0) {
-		create_last_hash_block(req_ctx->reqbfr, bs, 0);
+		create_last_hash_block(req_ctx->reqbfr, bs, req_ctx->data_len);
 		params.more = 1;
 		params.bfr_len = bs;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From bdbdac7649fac05f88c9f7ab18121a17fb591687 Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Tue, 5 May 2020 08:35:05 +0200
Subject: ethtool: provide UAPI for PHY master/slave configuration.

This UAPI is needed for BroadR-Reach 100BASE-T1 devices. Due to lack of
auto-negotiation support, we needed to be able to configure the
MASTER-SLAVE role of the port manually or from an application in user
space.

The same UAPI can be used for 1000BASE-T or MultiGBASE-T devices to
force MASTER or SLAVE role. See IEEE 802.3-2018:
22.2.4.3.7 MASTER-SLAVE control register (Register 9)
22.2.4.3.8 MASTER-SLAVE status register (Register 10)
40.5.2 MASTER-SLAVE configuration resolution
45.2.1.185.1 MASTER-SLAVE config value (1.2100.14)
45.2.7.10 MultiGBASE-T AN control 1 register (Register 7.32)

The MASTER-SLAVE role affects the clock configuration:

-------------------------------------------------------------------------------
When the  PHY is configured as MASTER, the PMA Transmit function shall
source TX_TCLK from a local clock source. When configured as SLAVE, the
PMA Transmit function shall source TX_TCLK from the clock recovered from
data stream provided by MASTER.

iMX6Q                     KSZ9031                XXX
------\                /-----------\        /------------\
      |                |           |        |            |
 MAC  |<----RGMII----->| PHY Slave |<------>| PHY Master |
      |<--- 125 MHz ---+-<------/  |        | \          |
------/                \-----------/        \------------/
                                               ^
                                                \-TX_TCLK

-------------------------------------------------------------------------------

Since some clock or link related issues are only reproducible in a
specific MASTER-SLAVE-role, MAC and PHY configuration, it is beneficial
to provide generic (not 100BASE-T1 specific) interface to the user space
for configuration flexibility and trouble shooting.

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ethtool-netlink.rst | 35 ++++++-----
 drivers/net/phy/phy.c                        |  4 +-
 drivers/net/phy/phy_device.c                 | 94 ++++++++++++++++++++++++++++
 include/linux/phy.h                          |  3 +
 include/uapi/linux/ethtool.h                 | 16 ++++-
 include/uapi/linux/ethtool_netlink.h         |  2 +
 include/uapi/linux/mii.h                     |  2 +
 net/ethtool/ioctl.c                          |  6 ++
 net/ethtool/linkmodes.c                      | 53 ++++++++++++++++
 9 files changed, 197 insertions(+), 18 deletions(-)

diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst
index 567326491f80..8f5cefc539cf 100644
--- a/Documentation/networking/ethtool-netlink.rst
+++ b/Documentation/networking/ethtool-netlink.rst
@@ -392,14 +392,16 @@ Request contents:
 
 Kernel response contents:
 
-  ====================================  ======  ==========================
-  ``ETHTOOL_A_LINKMODES_HEADER``        nested  reply header
-  ``ETHTOOL_A_LINKMODES_AUTONEG``       u8      autonegotiation status
-  ``ETHTOOL_A_LINKMODES_OURS``          bitset  advertised link modes
-  ``ETHTOOL_A_LINKMODES_PEER``          bitset  partner link modes
-  ``ETHTOOL_A_LINKMODES_SPEED``         u32     link speed (Mb/s)
-  ``ETHTOOL_A_LINKMODES_DUPLEX``        u8      duplex mode
-  ====================================  ======  ==========================
+  ==========================================  ======  ==========================
+  ``ETHTOOL_A_LINKMODES_HEADER``              nested  reply header
+  ``ETHTOOL_A_LINKMODES_AUTONEG``             u8      autonegotiation status
+  ``ETHTOOL_A_LINKMODES_OURS``                bitset  advertised link modes
+  ``ETHTOOL_A_LINKMODES_PEER``                bitset  partner link modes
+  ``ETHTOOL_A_LINKMODES_SPEED``               u32     link speed (Mb/s)
+  ``ETHTOOL_A_LINKMODES_DUPLEX``              u8      duplex mode
+  ``ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG``    u8      Master/slave port mode
+  ``ETHTOOL_A_LINKMODES_MASTER_SLAVE_STATE``  u8      Master/slave port state
+  ==========================================  ======  ==========================
 
 For ``ETHTOOL_A_LINKMODES_OURS``, value represents advertised modes and mask
 represents supported modes. ``ETHTOOL_A_LINKMODES_PEER`` in the reply is a bit
@@ -414,14 +416,15 @@ LINKMODES_SET
 
 Request contents:
 
-  ====================================  ======  ==========================
-  ``ETHTOOL_A_LINKMODES_HEADER``        nested  request header
-  ``ETHTOOL_A_LINKMODES_AUTONEG``       u8      autonegotiation status
-  ``ETHTOOL_A_LINKMODES_OURS``          bitset  advertised link modes
-  ``ETHTOOL_A_LINKMODES_PEER``          bitset  partner link modes
-  ``ETHTOOL_A_LINKMODES_SPEED``         u32     link speed (Mb/s)
-  ``ETHTOOL_A_LINKMODES_DUPLEX``        u8      duplex mode
-  ====================================  ======  ==========================
+  ==========================================  ======  ==========================
+  ``ETHTOOL_A_LINKMODES_HEADER``              nested  request header
+  ``ETHTOOL_A_LINKMODES_AUTONEG``             u8      autonegotiation status
+  ``ETHTOOL_A_LINKMODES_OURS``                bitset  advertised link modes
+  ``ETHTOOL_A_LINKMODES_PEER``                bitset  partner link modes
+  ``ETHTOOL_A_LINKMODES_SPEED``               u32     link speed (Mb/s)
+  ``ETHTOOL_A_LINKMODES_DUPLEX``              u8      duplex mode
+  ``ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG``    u8      Master/slave port mode
+  ==========================================  ======  ==========================
 
 ``ETHTOOL_A_LINKMODES_OURS`` bit set allows setting advertised link modes. If
 autonegotiation is on (either set now or kept from before), advertised modes
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 72c69a9c8a98..8c22d02b4218 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -295,7 +295,7 @@ int phy_ethtool_ksettings_set(struct phy_device *phydev,
 			 phydev->advertising, autoneg == AUTONEG_ENABLE);
 
 	phydev->duplex = duplex;
-
+	phydev->master_slave_set = cmd->base.master_slave_cfg;
 	phydev->mdix_ctrl = cmd->base.eth_tp_mdix_ctrl;
 
 	/* Restart the PHY */
@@ -314,6 +314,8 @@ void phy_ethtool_ksettings_get(struct phy_device *phydev,
 
 	cmd->base.speed = phydev->speed;
 	cmd->base.duplex = phydev->duplex;
+	cmd->base.master_slave_cfg = phydev->master_slave_get;
+	cmd->base.master_slave_state = phydev->master_slave_state;
 	if (phydev->interface == PHY_INTERFACE_MODE_MOCA)
 		cmd->base.port = PORT_BNC;
 	else
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index b1c5e4503bc4..83fc8e1b5793 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1913,6 +1913,90 @@ int genphy_setup_forced(struct phy_device *phydev)
 }
 EXPORT_SYMBOL(genphy_setup_forced);
 
+static int genphy_setup_master_slave(struct phy_device *phydev)
+{
+	u16 ctl = 0;
+
+	if (!phydev->is_gigabit_capable)
+		return 0;
+
+	switch (phydev->master_slave_set) {
+	case MASTER_SLAVE_CFG_MASTER_PREFERRED:
+		ctl |= CTL1000_PREFER_MASTER;
+		break;
+	case MASTER_SLAVE_CFG_SLAVE_PREFERRED:
+		break;
+	case MASTER_SLAVE_CFG_MASTER_FORCE:
+		ctl |= CTL1000_AS_MASTER;
+		/* fallthrough */
+	case MASTER_SLAVE_CFG_SLAVE_FORCE:
+		ctl |= CTL1000_ENABLE_MASTER;
+		break;
+	case MASTER_SLAVE_CFG_UNKNOWN:
+	case MASTER_SLAVE_CFG_UNSUPPORTED:
+		return 0;
+	default:
+		phydev_warn(phydev, "Unsupported Master/Slave mode\n");
+		return -EOPNOTSUPP;
+	}
+
+	return phy_modify_changed(phydev, MII_CTRL1000,
+				  (CTL1000_ENABLE_MASTER | CTL1000_AS_MASTER |
+				   CTL1000_PREFER_MASTER), ctl);
+}
+
+static int genphy_read_master_slave(struct phy_device *phydev)
+{
+	int cfg, state;
+	u16 val;
+
+	if (!phydev->is_gigabit_capable) {
+		phydev->master_slave_get = MASTER_SLAVE_CFG_UNSUPPORTED;
+		phydev->master_slave_state = MASTER_SLAVE_STATE_UNSUPPORTED;
+		return 0;
+	}
+
+	phydev->master_slave_get = MASTER_SLAVE_CFG_UNKNOWN;
+	phydev->master_slave_state = MASTER_SLAVE_STATE_UNKNOWN;
+
+	val = phy_read(phydev, MII_CTRL1000);
+	if (val < 0)
+		return val;
+
+	if (val & CTL1000_ENABLE_MASTER) {
+		if (val & CTL1000_AS_MASTER)
+			cfg = MASTER_SLAVE_CFG_MASTER_FORCE;
+		else
+			cfg = MASTER_SLAVE_CFG_SLAVE_FORCE;
+	} else {
+		if (val & CTL1000_PREFER_MASTER)
+			cfg = MASTER_SLAVE_CFG_MASTER_PREFERRED;
+		else
+			cfg = MASTER_SLAVE_CFG_SLAVE_PREFERRED;
+	}
+
+	val = phy_read(phydev, MII_STAT1000);
+	if (val < 0)
+		return val;
+
+	if (val & LPA_1000MSFAIL) {
+		state = MASTER_SLAVE_STATE_ERR;
+	} else if (phydev->link) {
+		/* this bits are valid only for active link */
+		if (val & LPA_1000MSRES)
+			state = MASTER_SLAVE_STATE_MASTER;
+		else
+			state = MASTER_SLAVE_STATE_SLAVE;
+	} else {
+		state = MASTER_SLAVE_STATE_UNKNOWN;
+	}
+
+	phydev->master_slave_get = cfg;
+	phydev->master_slave_state = state;
+
+	return 0;
+}
+
 /**
  * genphy_restart_aneg - Enable and Restart Autonegotiation
  * @phydev: target phy_device struct
@@ -1971,6 +2055,12 @@ int __genphy_config_aneg(struct phy_device *phydev, bool changed)
 	if (genphy_config_eee_advert(phydev))
 		changed = true;
 
+	err = genphy_setup_master_slave(phydev);
+	if (err < 0)
+		return err;
+	else if (err)
+		changed = true;
+
 	if (AUTONEG_ENABLE != phydev->autoneg)
 		return genphy_setup_forced(phydev);
 
@@ -2205,6 +2295,10 @@ int genphy_read_status(struct phy_device *phydev)
 	phydev->pause = 0;
 	phydev->asym_pause = 0;
 
+	err = genphy_read_master_slave(phydev);
+	if (err < 0)
+		return err;
+
 	err = genphy_read_lpa(phydev);
 	if (err < 0)
 		return err;
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 1d36ac608159..a2b91b5f9d0a 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -477,6 +477,9 @@ struct phy_device {
 	int duplex;
 	int pause;
 	int asym_pause;
+	u8 master_slave_get;
+	u8 master_slave_set;
+	u8 master_slave_state;
 
 	/* Union of PHY and Attached devices' supported link modes */
 	/* See ethtool.h for more info */
diff --git a/include/uapi/linux/ethtool.h b/include/uapi/linux/ethtool.h
index 92f737f10117..f4662b3a9e1e 100644
--- a/include/uapi/linux/ethtool.h
+++ b/include/uapi/linux/ethtool.h
@@ -1666,6 +1666,18 @@ static inline int ethtool_validate_duplex(__u8 duplex)
 	return 0;
 }
 
+#define MASTER_SLAVE_CFG_UNSUPPORTED		0
+#define MASTER_SLAVE_CFG_UNKNOWN		1
+#define MASTER_SLAVE_CFG_MASTER_PREFERRED	2
+#define MASTER_SLAVE_CFG_SLAVE_PREFERRED	3
+#define MASTER_SLAVE_CFG_MASTER_FORCE		4
+#define MASTER_SLAVE_CFG_SLAVE_FORCE		5
+#define MASTER_SLAVE_STATE_UNSUPPORTED		0
+#define MASTER_SLAVE_STATE_UNKNOWN		1
+#define MASTER_SLAVE_STATE_MASTER		2
+#define MASTER_SLAVE_STATE_SLAVE		3
+#define MASTER_SLAVE_STATE_ERR			4
+
 /* Which connector port. */
 #define PORT_TP			0x00
 #define PORT_AUI		0x01
@@ -1904,7 +1916,9 @@ struct ethtool_link_settings {
 	__u8	eth_tp_mdix_ctrl;
 	__s8	link_mode_masks_nwords;
 	__u8	transceiver;
-	__u8	reserved1[3];
+	__u8	master_slave_cfg;
+	__u8	master_slave_state;
+	__u8	reserved1[1];
 	__u32	reserved[7];
 	__u32	link_mode_masks[0];
 	/* layout of link_mode_masks fields:
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index 7fde76366ba4..bf1d310e20bc 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -216,6 +216,8 @@ enum {
 	ETHTOOL_A_LINKMODES_PEER,		/* bitset */
 	ETHTOOL_A_LINKMODES_SPEED,		/* u32 */
 	ETHTOOL_A_LINKMODES_DUPLEX,		/* u8 */
+	ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG,	/* u8 */
+	ETHTOOL_A_LINKMODES_MASTER_SLAVE_STATE,	/* u8 */
 
 	/* add new constants above here */
 	__ETHTOOL_A_LINKMODES_CNT,
diff --git a/include/uapi/linux/mii.h b/include/uapi/linux/mii.h
index 90f9b4e1ba27..39f7c44baf53 100644
--- a/include/uapi/linux/mii.h
+++ b/include/uapi/linux/mii.h
@@ -151,11 +151,13 @@
 /* 1000BASE-T Control register */
 #define ADVERTISE_1000FULL	0x0200  /* Advertise 1000BASE-T full duplex */
 #define ADVERTISE_1000HALF	0x0100  /* Advertise 1000BASE-T half duplex */
+#define CTL1000_PREFER_MASTER	0x0400  /* prefer to operate as master */
 #define CTL1000_AS_MASTER	0x0800
 #define CTL1000_ENABLE_MASTER	0x1000
 
 /* 1000BASE-T Status register */
 #define LPA_1000MSFAIL		0x8000	/* Master/Slave resolution failure */
+#define LPA_1000MSRES		0x4000	/* Master/Slave resolution status */
 #define LPA_1000LOCALRXOK	0x2000	/* Link partner local receiver status */
 #define LPA_1000REMRXOK		0x1000	/* Link partner remote receiver status */
 #define LPA_1000FULL		0x0800	/* Link partner 1000BASE-T full duplex */
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index 226d5ecdd567..52102ab1709b 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -552,6 +552,8 @@ static int ethtool_get_link_ksettings(struct net_device *dev,
 	link_ksettings.base.cmd = ETHTOOL_GLINKSETTINGS;
 	link_ksettings.base.link_mode_masks_nwords
 		= __ETHTOOL_LINK_MODE_MASK_NU32;
+	link_ksettings.base.master_slave_cfg = MASTER_SLAVE_CFG_UNSUPPORTED;
+	link_ksettings.base.master_slave_state = MASTER_SLAVE_STATE_UNSUPPORTED;
 
 	return store_link_ksettings_for_user(useraddr, &link_ksettings);
 }
@@ -589,6 +591,10 @@ static int ethtool_set_link_ksettings(struct net_device *dev,
 	    != link_ksettings.base.link_mode_masks_nwords)
 		return -EINVAL;
 
+	if (link_ksettings.base.master_slave_cfg ||
+	    link_ksettings.base.master_slave_state)
+		return -EINVAL;
+
 	err = dev->ethtool_ops->set_link_ksettings(dev, &link_ksettings);
 	if (err >= 0) {
 		ethtool_notify(dev, ETHTOOL_MSG_LINKINFO_NTF, NULL);
diff --git a/net/ethtool/linkmodes.c b/net/ethtool/linkmodes.c
index 452608c6d856..fd4f3e58c6f6 100644
--- a/net/ethtool/linkmodes.c
+++ b/net/ethtool/linkmodes.c
@@ -27,6 +27,8 @@ linkmodes_get_policy[ETHTOOL_A_LINKMODES_MAX + 1] = {
 	[ETHTOOL_A_LINKMODES_PEER]		= { .type = NLA_REJECT },
 	[ETHTOOL_A_LINKMODES_SPEED]		= { .type = NLA_REJECT },
 	[ETHTOOL_A_LINKMODES_DUPLEX]		= { .type = NLA_REJECT },
+	[ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG]	= { .type = NLA_REJECT },
+	[ETHTOOL_A_LINKMODES_MASTER_SLAVE_STATE]	= { .type = NLA_REJECT },
 };
 
 static int linkmodes_prepare_data(const struct ethnl_req_info *req_base,
@@ -63,6 +65,7 @@ static int linkmodes_reply_size(const struct ethnl_req_info *req_base,
 {
 	const struct linkmodes_reply_data *data = LINKMODES_REPDATA(reply_base);
 	const struct ethtool_link_ksettings *ksettings = &data->ksettings;
+	const struct ethtool_link_settings *lsettings = &ksettings->base;
 	bool compact = req_base->flags & ETHTOOL_FLAG_COMPACT_BITSETS;
 	int len, ret;
 
@@ -86,6 +89,12 @@ static int linkmodes_reply_size(const struct ethnl_req_info *req_base,
 		len += ret;
 	}
 
+	if (lsettings->master_slave_cfg != MASTER_SLAVE_CFG_UNSUPPORTED)
+		len += nla_total_size(sizeof(u8));
+
+	if (lsettings->master_slave_state != MASTER_SLAVE_STATE_UNSUPPORTED)
+		len += nla_total_size(sizeof(u8));
+
 	return len;
 }
 
@@ -122,6 +131,16 @@ static int linkmodes_fill_reply(struct sk_buff *skb,
 	    nla_put_u8(skb, ETHTOOL_A_LINKMODES_DUPLEX, lsettings->duplex))
 		return -EMSGSIZE;
 
+	if (lsettings->master_slave_cfg != MASTER_SLAVE_CFG_UNSUPPORTED &&
+	    nla_put_u8(skb, ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG,
+		       lsettings->master_slave_cfg))
+		return -EMSGSIZE;
+
+	if (lsettings->master_slave_state != MASTER_SLAVE_STATE_UNSUPPORTED &&
+	    nla_put_u8(skb, ETHTOOL_A_LINKMODES_MASTER_SLAVE_STATE,
+		       lsettings->master_slave_state))
+		return -EMSGSIZE;
+
 	return 0;
 }
 
@@ -249,6 +268,8 @@ linkmodes_set_policy[ETHTOOL_A_LINKMODES_MAX + 1] = {
 	[ETHTOOL_A_LINKMODES_PEER]		= { .type = NLA_REJECT },
 	[ETHTOOL_A_LINKMODES_SPEED]		= { .type = NLA_U32 },
 	[ETHTOOL_A_LINKMODES_DUPLEX]		= { .type = NLA_U8 },
+	[ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG]	= { .type = NLA_U8 },
+	[ETHTOOL_A_LINKMODES_MASTER_SLAVE_STATE]	= { .type = NLA_REJECT },
 };
 
 /* Set advertised link modes to all supported modes matching requested speed
@@ -287,14 +308,45 @@ static bool ethnl_auto_linkmodes(struct ethtool_link_ksettings *ksettings,
 			     __ETHTOOL_LINK_MODE_MASK_NBITS);
 }
 
+static bool ethnl_validate_master_slave_cfg(u8 cfg)
+{
+	switch (cfg) {
+	case MASTER_SLAVE_CFG_MASTER_PREFERRED:
+	case MASTER_SLAVE_CFG_SLAVE_PREFERRED:
+	case MASTER_SLAVE_CFG_MASTER_FORCE:
+	case MASTER_SLAVE_CFG_SLAVE_FORCE:
+		return true;
+	}
+
+	return false;
+}
+
 static int ethnl_update_linkmodes(struct genl_info *info, struct nlattr **tb,
 				  struct ethtool_link_ksettings *ksettings,
 				  bool *mod)
 {
 	struct ethtool_link_settings *lsettings = &ksettings->base;
 	bool req_speed, req_duplex;
+	const struct nlattr *master_slave_cfg;
 	int ret;
 
+	master_slave_cfg = tb[ETHTOOL_A_LINKMODES_MASTER_SLAVE_CFG];
+	if (master_slave_cfg) {
+		u8 cfg = nla_get_u8(master_slave_cfg);
+
+		if (lsettings->master_slave_cfg == MASTER_SLAVE_CFG_UNSUPPORTED) {
+			NL_SET_ERR_MSG_ATTR(info->extack, master_slave_cfg,
+					    "master/slave configuration not supported by device");
+			return -EOPNOTSUPP;
+		}
+
+		if (!ethnl_validate_master_slave_cfg(cfg)) {
+			NL_SET_ERR_MSG_ATTR(info->extack, master_slave_cfg,
+					    "master/slave value is invalid");
+			return -EOPNOTSUPP;
+		}
+	}
+
 	*mod = false;
 	req_speed = tb[ETHTOOL_A_LINKMODES_SPEED];
 	req_duplex = tb[ETHTOOL_A_LINKMODES_DUPLEX];
@@ -311,6 +363,7 @@ static int ethnl_update_linkmodes(struct genl_info *info, struct nlattr **tb,
 			 mod);
 	ethnl_update_u8(&lsettings->duplex, tb[ETHTOOL_A_LINKMODES_DUPLEX],
 			mod);
+	ethnl_update_u8(&lsettings->master_slave_cfg, master_slave_cfg, mod);
 
 	if (!tb[ETHTOOL_A_LINKMODES_OURS] && lsettings->autoneg &&
 	    (req_speed || req_duplex) &&
-- 
cgit v1.2.3-59-g8ed1b


From b883e47bde4b3eaf18e8d0d73709b33942b4a589 Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Tue, 5 May 2020 08:35:06 +0200
Subject: net: phy: tja11xx: add support for master-slave configuration

The TJA11xx PHYs have a vendor specific Master/Slave configuration bit,
which is not compatible with IEEE 803.2-2018 spec for 100Base-T1
devices. So, provide a custom config_ange call back to solve this
problem.

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/nxp-tja11xx.c | 43 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/drivers/net/phy/nxp-tja11xx.c b/drivers/net/phy/nxp-tja11xx.c
index cc766b2d4136..ca5f9d4dc57e 100644
--- a/drivers/net/phy/nxp-tja11xx.c
+++ b/drivers/net/phy/nxp-tja11xx.c
@@ -30,6 +30,7 @@
 #define MII_ECTRL_WAKE_REQUEST		BIT(0)
 
 #define MII_CFG1			18
+#define MII_CFG1_MASTER_SLAVE		BIT(15)
 #define MII_CFG1_AUTO_OP		BIT(14)
 #define MII_CFG1_SLEEP_CONFIRM		BIT(6)
 #define MII_CFG1_LED_MODE_MASK		GENMASK(5, 4)
@@ -167,6 +168,32 @@ static int tja11xx_soft_reset(struct phy_device *phydev)
 	return genphy_soft_reset(phydev);
 }
 
+static int tja11xx_config_aneg(struct phy_device *phydev)
+{
+	u16 ctl = 0;
+	int ret;
+
+	switch (phydev->master_slave_set) {
+	case MASTER_SLAVE_CFG_MASTER_FORCE:
+		ctl |= MII_CFG1_MASTER_SLAVE;
+		break;
+	case MASTER_SLAVE_CFG_SLAVE_FORCE:
+		break;
+	case MASTER_SLAVE_CFG_UNKNOWN:
+	case MASTER_SLAVE_CFG_UNSUPPORTED:
+		return 0;
+	default:
+		phydev_warn(phydev, "Unsupported Master/Slave mode\n");
+		return -ENOTSUPP;
+	}
+
+	ret = phy_modify_changed(phydev, MII_CFG1, MII_CFG1_MASTER_SLAVE, ctl);
+	if (ret < 0)
+		return ret;
+
+	return __genphy_config_aneg(phydev, ret);
+}
+
 static int tja11xx_config_init(struct phy_device *phydev)
 {
 	int ret;
@@ -224,10 +251,22 @@ static int tja11xx_read_status(struct phy_device *phydev)
 {
 	int ret;
 
+	phydev->master_slave_get = MASTER_SLAVE_CFG_UNKNOWN;
+	phydev->master_slave_state = MASTER_SLAVE_STATE_UNSUPPORTED;
+
 	ret = genphy_update_link(phydev);
 	if (ret)
 		return ret;
 
+	ret = phy_read(phydev, MII_CFG1);
+	if (ret < 0)
+		return ret;
+
+	if (ret & MII_CFG1_MASTER_SLAVE)
+		phydev->master_slave_get = MASTER_SLAVE_CFG_MASTER_FORCE;
+	else
+		phydev->master_slave_get = MASTER_SLAVE_CFG_SLAVE_FORCE;
+
 	if (phydev->link) {
 		ret = phy_read(phydev, MII_COMMSTAT);
 		if (ret < 0)
@@ -504,6 +543,7 @@ static struct phy_driver tja11xx_driver[] = {
 		.features       = PHY_BASIC_T1_FEATURES,
 		.probe		= tja11xx_probe,
 		.soft_reset	= tja11xx_soft_reset,
+		.config_aneg	= tja11xx_config_aneg,
 		.config_init	= tja11xx_config_init,
 		.read_status	= tja11xx_read_status,
 		.suspend	= genphy_suspend,
@@ -519,6 +559,7 @@ static struct phy_driver tja11xx_driver[] = {
 		.features       = PHY_BASIC_T1_FEATURES,
 		.probe		= tja11xx_probe,
 		.soft_reset	= tja11xx_soft_reset,
+		.config_aneg	= tja11xx_config_aneg,
 		.config_init	= tja11xx_config_init,
 		.read_status	= tja11xx_read_status,
 		.suspend	= genphy_suspend,
@@ -533,6 +574,7 @@ static struct phy_driver tja11xx_driver[] = {
 		.features       = PHY_BASIC_T1_FEATURES,
 		.probe		= tja1102_p0_probe,
 		.soft_reset	= tja11xx_soft_reset,
+		.config_aneg	= tja11xx_config_aneg,
 		.config_init	= tja11xx_config_init,
 		.read_status	= tja11xx_read_status,
 		.match_phy_device = tja1102_p0_match_phy_device,
@@ -551,6 +593,7 @@ static struct phy_driver tja11xx_driver[] = {
 		.features       = PHY_BASIC_T1_FEATURES,
 		/* currently no probe for Port 1 is need */
 		.soft_reset	= tja11xx_soft_reset,
+		.config_aneg	= tja11xx_config_aneg,
 		.config_init	= tja11xx_config_init,
 		.read_status	= tja11xx_read_status,
 		.match_phy_device = tja1102_p1_match_phy_device,
-- 
cgit v1.2.3-59-g8ed1b


From ae1804de93f6f1626906567ae7deec8e0111259d Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 5 May 2020 17:38:19 +0200
Subject: dsa: sja1105: dynamically allocate stats structure

The addition of sja1105_port_status_ether structure into the
statistics causes the frame size to go over the warning limit:

drivers/net/dsa/sja1105/sja1105_ethtool.c:421:6: error: stack frame size of 1104 bytes in function 'sja1105_get_ethtool_stats' [-Werror,-Wframe-larger-than=]

Use dynamic allocation to avoid this.

Fixes: 336aa67bd027 ("net: dsa: sja1105: show more ethtool statistics counters for P/Q/R/S")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_ethtool.c | 144 +++++++++++++++---------------
 1 file changed, 74 insertions(+), 70 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_ethtool.c b/drivers/net/dsa/sja1105/sja1105_ethtool.c
index d742ffcbfce9..709f035055c5 100644
--- a/drivers/net/dsa/sja1105/sja1105_ethtool.c
+++ b/drivers/net/dsa/sja1105/sja1105_ethtool.c
@@ -421,92 +421,96 @@ static char sja1105pqrs_extra_port_stats[][ETH_GSTRING_LEN] = {
 void sja1105_get_ethtool_stats(struct dsa_switch *ds, int port, u64 *data)
 {
 	struct sja1105_private *priv = ds->priv;
-	struct sja1105_port_status status;
+	struct sja1105_port_status *status;
 	int rc, i, k = 0;
 
-	memset(&status, 0, sizeof(status));
+	status = kzalloc(sizeof(*status), GFP_KERNEL);
+	if (!status)
+		goto out;
 
-	rc = sja1105_port_status_get(priv, &status, port);
+	rc = sja1105_port_status_get(priv, status, port);
 	if (rc < 0) {
 		dev_err(ds->dev, "Failed to read port %d counters: %d\n",
 			port, rc);
-		return;
+		goto out;
 	}
 	memset(data, 0, ARRAY_SIZE(sja1105_port_stats) * sizeof(u64));
-	data[k++] = status.mac.n_runt;
-	data[k++] = status.mac.n_soferr;
-	data[k++] = status.mac.n_alignerr;
-	data[k++] = status.mac.n_miierr;
-	data[k++] = status.mac.typeerr;
-	data[k++] = status.mac.sizeerr;
-	data[k++] = status.mac.tctimeout;
-	data[k++] = status.mac.priorerr;
-	data[k++] = status.mac.nomaster;
-	data[k++] = status.mac.memov;
-	data[k++] = status.mac.memerr;
-	data[k++] = status.mac.invtyp;
-	data[k++] = status.mac.intcyov;
-	data[k++] = status.mac.domerr;
-	data[k++] = status.mac.pcfbagdrop;
-	data[k++] = status.mac.spcprior;
-	data[k++] = status.mac.ageprior;
-	data[k++] = status.mac.portdrop;
-	data[k++] = status.mac.lendrop;
-	data[k++] = status.mac.bagdrop;
-	data[k++] = status.mac.policeerr;
-	data[k++] = status.mac.drpnona664err;
-	data[k++] = status.mac.spcerr;
-	data[k++] = status.mac.agedrp;
-	data[k++] = status.hl1.n_n664err;
-	data[k++] = status.hl1.n_vlanerr;
-	data[k++] = status.hl1.n_unreleased;
-	data[k++] = status.hl1.n_sizeerr;
-	data[k++] = status.hl1.n_crcerr;
-	data[k++] = status.hl1.n_vlnotfound;
-	data[k++] = status.hl1.n_ctpolerr;
-	data[k++] = status.hl1.n_polerr;
-	data[k++] = status.hl1.n_rxfrm;
-	data[k++] = status.hl1.n_rxbyte;
-	data[k++] = status.hl1.n_txfrm;
-	data[k++] = status.hl1.n_txbyte;
-	data[k++] = status.hl2.n_qfull;
-	data[k++] = status.hl2.n_part_drop;
-	data[k++] = status.hl2.n_egr_disabled;
-	data[k++] = status.hl2.n_not_reach;
+	data[k++] = status->mac.n_runt;
+	data[k++] = status->mac.n_soferr;
+	data[k++] = status->mac.n_alignerr;
+	data[k++] = status->mac.n_miierr;
+	data[k++] = status->mac.typeerr;
+	data[k++] = status->mac.sizeerr;
+	data[k++] = status->mac.tctimeout;
+	data[k++] = status->mac.priorerr;
+	data[k++] = status->mac.nomaster;
+	data[k++] = status->mac.memov;
+	data[k++] = status->mac.memerr;
+	data[k++] = status->mac.invtyp;
+	data[k++] = status->mac.intcyov;
+	data[k++] = status->mac.domerr;
+	data[k++] = status->mac.pcfbagdrop;
+	data[k++] = status->mac.spcprior;
+	data[k++] = status->mac.ageprior;
+	data[k++] = status->mac.portdrop;
+	data[k++] = status->mac.lendrop;
+	data[k++] = status->mac.bagdrop;
+	data[k++] = status->mac.policeerr;
+	data[k++] = status->mac.drpnona664err;
+	data[k++] = status->mac.spcerr;
+	data[k++] = status->mac.agedrp;
+	data[k++] = status->hl1.n_n664err;
+	data[k++] = status->hl1.n_vlanerr;
+	data[k++] = status->hl1.n_unreleased;
+	data[k++] = status->hl1.n_sizeerr;
+	data[k++] = status->hl1.n_crcerr;
+	data[k++] = status->hl1.n_vlnotfound;
+	data[k++] = status->hl1.n_ctpolerr;
+	data[k++] = status->hl1.n_polerr;
+	data[k++] = status->hl1.n_rxfrm;
+	data[k++] = status->hl1.n_rxbyte;
+	data[k++] = status->hl1.n_txfrm;
+	data[k++] = status->hl1.n_txbyte;
+	data[k++] = status->hl2.n_qfull;
+	data[k++] = status->hl2.n_part_drop;
+	data[k++] = status->hl2.n_egr_disabled;
+	data[k++] = status->hl2.n_not_reach;
 
 	if (priv->info->device_id == SJA1105E_DEVICE_ID ||
 	    priv->info->device_id == SJA1105T_DEVICE_ID)
-		return;
+		goto out;;
 
 	memset(data + k, 0, ARRAY_SIZE(sja1105pqrs_extra_port_stats) *
 			sizeof(u64));
 	for (i = 0; i < 8; i++) {
-		data[k++] = status.hl2.qlevel_hwm[i];
-		data[k++] = status.hl2.qlevel[i];
+		data[k++] = status->hl2.qlevel_hwm[i];
+		data[k++] = status->hl2.qlevel[i];
 	}
-	data[k++] = status.ether.n_drops_nolearn;
-	data[k++] = status.ether.n_drops_noroute;
-	data[k++] = status.ether.n_drops_ill_dtag;
-	data[k++] = status.ether.n_drops_dtag;
-	data[k++] = status.ether.n_drops_sotag;
-	data[k++] = status.ether.n_drops_sitag;
-	data[k++] = status.ether.n_drops_utag;
-	data[k++] = status.ether.n_tx_bytes_1024_2047;
-	data[k++] = status.ether.n_tx_bytes_512_1023;
-	data[k++] = status.ether.n_tx_bytes_256_511;
-	data[k++] = status.ether.n_tx_bytes_128_255;
-	data[k++] = status.ether.n_tx_bytes_65_127;
-	data[k++] = status.ether.n_tx_bytes_64;
-	data[k++] = status.ether.n_tx_mcast;
-	data[k++] = status.ether.n_tx_bcast;
-	data[k++] = status.ether.n_rx_bytes_1024_2047;
-	data[k++] = status.ether.n_rx_bytes_512_1023;
-	data[k++] = status.ether.n_rx_bytes_256_511;
-	data[k++] = status.ether.n_rx_bytes_128_255;
-	data[k++] = status.ether.n_rx_bytes_65_127;
-	data[k++] = status.ether.n_rx_bytes_64;
-	data[k++] = status.ether.n_rx_mcast;
-	data[k++] = status.ether.n_rx_bcast;
+	data[k++] = status->ether.n_drops_nolearn;
+	data[k++] = status->ether.n_drops_noroute;
+	data[k++] = status->ether.n_drops_ill_dtag;
+	data[k++] = status->ether.n_drops_dtag;
+	data[k++] = status->ether.n_drops_sotag;
+	data[k++] = status->ether.n_drops_sitag;
+	data[k++] = status->ether.n_drops_utag;
+	data[k++] = status->ether.n_tx_bytes_1024_2047;
+	data[k++] = status->ether.n_tx_bytes_512_1023;
+	data[k++] = status->ether.n_tx_bytes_256_511;
+	data[k++] = status->ether.n_tx_bytes_128_255;
+	data[k++] = status->ether.n_tx_bytes_65_127;
+	data[k++] = status->ether.n_tx_bytes_64;
+	data[k++] = status->ether.n_tx_mcast;
+	data[k++] = status->ether.n_tx_bcast;
+	data[k++] = status->ether.n_rx_bytes_1024_2047;
+	data[k++] = status->ether.n_rx_bytes_512_1023;
+	data[k++] = status->ether.n_rx_bytes_256_511;
+	data[k++] = status->ether.n_rx_bytes_128_255;
+	data[k++] = status->ether.n_rx_bytes_65_127;
+	data[k++] = status->ether.n_rx_bytes_64;
+	data[k++] = status->ether.n_rx_mcast;
+	data[k++] = status->ether.n_rx_bcast;
+out:
+	kfree(status);
 }
 
 void sja1105_get_strings(struct dsa_switch *ds, int port,
-- 
cgit v1.2.3-59-g8ed1b


From 2b6c6f0716322fc51332e8fa1c40e2d68f289bec Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Wed, 6 May 2020 22:03:52 +0800
Subject: bpf, i386: Remove unneeded conversion to bool

The '==' expression itself is bool, no need to convert it to bool again.
This fixes the following coccicheck warning:

  arch/x86/net/bpf_jit_comp32.c:1478:50-55: WARNING: conversion to bool not needed here
  arch/x86/net/bpf_jit_comp32.c:1479:50-55: WARNING: conversion to bool not needed here

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20200506140352.37154-1-yanaijie@huawei.com
---
 arch/x86/net/bpf_jit_comp32.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/arch/x86/net/bpf_jit_comp32.c b/arch/x86/net/bpf_jit_comp32.c
index 66cd150b7e54..96fde03aa987 100644
--- a/arch/x86/net/bpf_jit_comp32.c
+++ b/arch/x86/net/bpf_jit_comp32.c
@@ -1475,8 +1475,8 @@ static int do_jit(struct bpf_prog *bpf_prog, int *addrs, u8 *image,
 	for (i = 0; i < insn_cnt; i++, insn++) {
 		const s32 imm32 = insn->imm;
 		const bool is64 = BPF_CLASS(insn->code) == BPF_ALU64;
-		const bool dstk = insn->dst_reg == BPF_REG_AX ? false : true;
-		const bool sstk = insn->src_reg == BPF_REG_AX ? false : true;
+		const bool dstk = insn->dst_reg != BPF_REG_AX;
+		const bool sstk = insn->src_reg != BPF_REG_AX;
 		const u8 code = insn->code;
 		const u8 *dst = bpf2ia32[insn->dst_reg];
 		const u8 *src = bpf2ia32[insn->src_reg];
-- 
cgit v1.2.3-59-g8ed1b


From 31e2d067fd966188e47461f1b5d253405b3559fb Mon Sep 17 00:00:00 2001
From: Zheng Zengkai <zhengzengkai@huawei.com>
Date: Thu, 7 May 2020 16:03:26 +0800
Subject: net: phy: Make iproc_mdio_resume static

Fix sparse warnings:

drivers/net/phy/mdio-bcm-iproc.c:182:5: warning:
 symbol 'iproc_mdio_resume' was not declared. Should it be static?

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Zheng Zengkai <zhengzengkai@huawei.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio-bcm-iproc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/phy/mdio-bcm-iproc.c b/drivers/net/phy/mdio-bcm-iproc.c
index 38bf40e0d673..77fc970cdfde 100644
--- a/drivers/net/phy/mdio-bcm-iproc.c
+++ b/drivers/net/phy/mdio-bcm-iproc.c
@@ -179,7 +179,7 @@ static int iproc_mdio_remove(struct platform_device *pdev)
 }
 
 #ifdef CONFIG_PM_SLEEP
-int iproc_mdio_resume(struct device *dev)
+static int iproc_mdio_resume(struct device *dev)
 {
 	struct platform_device *pdev = to_platform_device(dev);
 	struct iproc_mdio_priv *priv = platform_get_drvdata(pdev);
-- 
cgit v1.2.3-59-g8ed1b


From 49d4c392cc0186df5ce6f1037afe5131c56d1316 Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Thu, 7 May 2020 19:07:41 +0800
Subject: b43: remove dead function b43_rssinoise_postprocess()

This function is dead for more than 10 years. Remove it.

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/wireless/broadcom/b43/xmit.c | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/drivers/net/wireless/broadcom/b43/xmit.c b/drivers/net/wireless/broadcom/b43/xmit.c
index 058745219516..55babc6d1091 100644
--- a/drivers/net/wireless/broadcom/b43/xmit.c
+++ b/drivers/net/wireless/broadcom/b43/xmit.c
@@ -629,19 +629,6 @@ static s8 b43_rssi_postprocess(struct b43_wldev *dev,
 	return (s8) tmp;
 }
 
-//TODO
-#if 0
-static s8 b43_rssinoise_postprocess(struct b43_wldev *dev, u8 in_rssi)
-{
-	struct b43_phy *phy = &dev->phy;
-	s8 ret;
-
-	ret = b43_rssi_postprocess(dev, in_rssi, 0, 1, 1);
-
-	return ret;
-}
-#endif
-
 void b43_rx(struct b43_wldev *dev, struct sk_buff *skb, const void *_rxhdr)
 {
 	struct ieee80211_rx_status status;
-- 
cgit v1.2.3-59-g8ed1b


From f9a98f901e19eb0fbd61c29cf38b75d210d276bc Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Thu, 7 May 2020 19:08:36 +0800
Subject: net: atheros: remove dead code in atl1c_resume()

This code has been marked dead for nearly 10 years. Remove it.

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/atheros/atl1c/atl1c_main.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
index 04bc53af12d9..decab9a8e4a8 100644
--- a/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
+++ b/drivers/net/ethernet/atheros/atl1c/atl1c_main.c
@@ -2449,12 +2449,6 @@ static int atl1c_resume(struct device *dev)
 	atl1c_reset_mac(&adapter->hw);
 	atl1c_phy_init(&adapter->hw);
 
-#if 0
-	AT_READ_REG(&adapter->hw, REG_PM_CTRLSTAT, &pm_data);
-	pm_data &= ~PM_CTRLSTAT_PME_EN;
-	AT_WRITE_REG(&adapter->hw, REG_PM_CTRLSTAT, pm_data);
-#endif
-
 	netif_device_attach(netdev);
 	if (netif_running(netdev))
 		atl1c_up(adapter);
-- 
cgit v1.2.3-59-g8ed1b


From 27ad2cd2000298e5822a12012edda47d72fdb27e Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Thu, 7 May 2020 19:08:47 +0800
Subject: net: tulip: de4x5: make PCI_signature() return void

This function always return 0 now, we can make it return void to
simplify the code. This fixes the following coccicheck warning:

drivers/net/ethernet/dec/tulip/de4x5.c:3908:11-17: Unneeded variable:
"status". Return "0" on line 3912

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/dec/tulip/de4x5.c | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/dec/tulip/de4x5.c b/drivers/net/ethernet/dec/tulip/de4x5.c
index f16853c3c851..0ccd9994ad45 100644
--- a/drivers/net/ethernet/dec/tulip/de4x5.c
+++ b/drivers/net/ethernet/dec/tulip/de4x5.c
@@ -951,7 +951,7 @@ static void    reset_init_sia(struct net_device *dev, s32 sicr, s32 strr, s32 si
 static int     test_ans(struct net_device *dev, s32 irqs, s32 irq_mask, s32 msec);
 static int     test_tp(struct net_device *dev, s32 msec);
 static int     EISA_signature(char *name, struct device *device);
-static int     PCI_signature(char *name, struct de4x5_private *lp);
+static void    PCI_signature(char *name, struct de4x5_private *lp);
 static void    DevicePresent(struct net_device *dev, u_long iobase);
 static void    enet_addr_rst(u_long aprom_addr);
 static int     de4x5_bad_srom(struct de4x5_private *lp);
@@ -3902,14 +3902,14 @@ EISA_signature(char *name, struct device *device)
 /*
 ** Look for a particular board name in the PCI configuration space
 */
-static int
+static void
 PCI_signature(char *name, struct de4x5_private *lp)
 {
-    int i, status = 0, siglen = ARRAY_SIZE(de4x5_signatures);
+    int i, siglen = ARRAY_SIZE(de4x5_signatures);
 
     if (lp->chipset == DC21040) {
 	strcpy(name, "DE434/5");
-	return status;
+	return;
     } else {                           /* Search for a DEC name in the SROM */
 	int tmp = *((char *)&lp->srom + 19) * 3;
 	strncpy(name, (char *)&lp->srom + 26 + tmp, 8);
@@ -3935,8 +3935,6 @@ PCI_signature(char *name, struct de4x5_private *lp)
     } else if ((lp->chipset & ~0x00ff) == DC2114x) {
 	lp->useSROM = true;
     }
-
-    return status;
 }
 
 /*
-- 
cgit v1.2.3-59-g8ed1b


From 5a7c45097c3f63ac10def7206292b88023205a3e Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Thu, 7 May 2020 19:08:57 +0800
Subject: net: mlx4: remove unneeded variable "err" in
 mlx4_en_ethtool_add_mac_rule()

Fix the following coccicheck warning:

drivers/net/ethernet/mellanox/mlx4/en_ethtool.c:1396:5-8: Unneeded
variable: "err". Return "0" on line 1411

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx4/en_ethtool.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
index 216e6b2e9eed..b816154bc79a 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_ethtool.c
@@ -1392,7 +1392,6 @@ static int mlx4_en_ethtool_add_mac_rule(struct ethtool_rxnfc *cmd,
 					struct mlx4_spec_list *spec_l2,
 					unsigned char *mac)
 {
-	int err = 0;
 	__be64 mac_msk = cpu_to_be64(MLX4_MAC_MASK << 16);
 
 	spec_l2->id = MLX4_NET_TRANS_RULE_ID_ETH;
@@ -1407,7 +1406,7 @@ static int mlx4_en_ethtool_add_mac_rule(struct ethtool_rxnfc *cmd,
 
 	list_add_tail(&spec_l2->list, rule_list_h);
 
-	return err;
+	return 0;
 }
 
 static int mlx4_en_ethtool_add_mac_rule_by_ipv4(struct mlx4_en_priv *priv,
-- 
cgit v1.2.3-59-g8ed1b


From 7f960633a458136d168f2049508d39cba8be55bd Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Thu, 7 May 2020 19:09:05 +0800
Subject: net: encx24j600: make encx24j600_hw_init() return void

This function always return 0 now, we can make it return void to
simplify the code. This fixes the following coccicheck warning:

drivers/net/ethernet/microchip/encx24j600.c:609:5-8: Unneeded variable:
"ret". Return "0" on line 653

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/microchip/encx24j600.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/microchip/encx24j600.c b/drivers/net/ethernet/microchip/encx24j600.c
index 39925e4bf2ec..fccc4805247f 100644
--- a/drivers/net/ethernet/microchip/encx24j600.c
+++ b/drivers/net/ethernet/microchip/encx24j600.c
@@ -604,9 +604,8 @@ static void encx24j600_set_rxfilter_mode(struct encx24j600_priv *priv)
 	}
 }
 
-static int encx24j600_hw_init(struct encx24j600_priv *priv)
+static void encx24j600_hw_init(struct encx24j600_priv *priv)
 {
-	int ret = 0;
 	u16 macon2;
 
 	priv->hw_enabled = false;
@@ -649,8 +648,6 @@ static int encx24j600_hw_init(struct encx24j600_priv *priv)
 
 	if (netif_msg_hw(priv))
 		encx24j600_dump_config(priv, "Hw is initialized");
-
-	return ret;
 }
 
 static void encx24j600_hw_enable(struct encx24j600_priv *priv)
@@ -1042,12 +1039,7 @@ static int encx24j600_spi_probe(struct spi_device *spi)
 	}
 
 	/* Initialize the device HW to the consistent state */
-	if (encx24j600_hw_init(priv)) {
-		netif_err(priv, probe, ndev,
-			  DRV_NAME ": HW initialization error\n");
-		ret = -EIO;
-		goto out_free;
-	}
+	encx24j600_hw_init(priv);
 
 	kthread_init_worker(&priv->kworker);
 	kthread_init_work(&priv->tx_work, encx24j600_tx_proc);
-- 
cgit v1.2.3-59-g8ed1b


From 571cf29644542b9e410ed102cb35ecb27b4fdfa3 Mon Sep 17 00:00:00 2001
From: Po Liu <Po.Liu@nxp.com>
Date: Thu, 7 May 2020 18:57:38 +0800
Subject: net:enetc: bug fix for qos sfi operate space after freed

'Dan Carpenter' reported:
This code frees "sfi" and then dereferences it on the next line:
>                 kfree(sfi);
>                 clear_bit(sfi->index, epsfp.psfp_sfi_bitmap);

This "sfi->index" should be "index".

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Po Liu <Po.Liu@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/enetc/enetc_qos.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/freescale/enetc/enetc_qos.c b/drivers/net/ethernet/freescale/enetc/enetc_qos.c
index 172acb602ccb..fd3df19eaa32 100644
--- a/drivers/net/ethernet/freescale/enetc/enetc_qos.c
+++ b/drivers/net/ethernet/freescale/enetc/enetc_qos.c
@@ -903,7 +903,7 @@ static void stream_filter_unref(struct enetc_ndev_priv *priv, u32 index)
 		enetc_streamfilter_hw_set(priv, sfi, false);
 		hlist_del(&sfi->node);
 		kfree(sfi);
-		clear_bit(sfi->index, epsfp.psfp_sfi_bitmap);
+		clear_bit(index, epsfp.psfp_sfi_bitmap);
 	}
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 0932969e0b1b6ba54028b35b80148302e8fe7db8 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Thu, 7 May 2020 13:42:05 +0200
Subject: via-rhine: Add platform dependencies

The VIA Rhine Ethernet interface is only present on PCI devices or
VIA/WonderMedia VT8500/WM85xx SoCs.  Add platform dependencies to the
VIA_RHINE config symbol, to avoid asking the user about it when
configuring a kernel without PCI or VT8500/WM85xx support.

Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Acked-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/via/Kconfig | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/via/Kconfig b/drivers/net/ethernet/via/Kconfig
index a962097b58c6..6cff5f7d57c4 100644
--- a/drivers/net/ethernet/via/Kconfig
+++ b/drivers/net/ethernet/via/Kconfig
@@ -19,6 +19,7 @@ if NET_VENDOR_VIA
 config VIA_RHINE
 	tristate "VIA Rhine support"
 	depends on PCI || (OF_IRQ && GENERIC_PCI_IOMAP)
+	depends on PCI || ARCH_VT8500 || COMPILE_TEST
 	depends on HAS_DMA
 	select CRC32
 	select MII
-- 
cgit v1.2.3-59-g8ed1b


From 6e728f321393b1fce9e1c2c3e55f9f7c15991321 Mon Sep 17 00:00:00 2001
From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Date: Thu, 7 May 2020 18:23:05 +0530
Subject: net: qrtr: Add MHI transport layer

MHI is the transport layer used for communicating to the external modems.
Hence, this commit adds MHI transport layer support to QRTR for
transferring the QMI messages over IPC Router.

Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/qrtr/Kconfig  |   7 +++
 net/qrtr/Makefile |   2 +
 net/qrtr/mhi.c    | 127 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 136 insertions(+)
 create mode 100644 net/qrtr/mhi.c

diff --git a/net/qrtr/Kconfig b/net/qrtr/Kconfig
index 63f89cc6e82c..8eb876471564 100644
--- a/net/qrtr/Kconfig
+++ b/net/qrtr/Kconfig
@@ -29,4 +29,11 @@ config QRTR_TUN
 	  implement endpoints of QRTR, for purpose of tunneling data to other
 	  hosts or testing purposes.
 
+config QRTR_MHI
+	tristate "MHI IPC Router channels"
+	depends on MHI_BUS
+	help
+	  Say Y here to support MHI based ipcrouter channels. MHI is the
+	  transport used for communicating to external modems.
+
 endif # QRTR
diff --git a/net/qrtr/Makefile b/net/qrtr/Makefile
index 32d4e923925d..1b1411d158a7 100644
--- a/net/qrtr/Makefile
+++ b/net/qrtr/Makefile
@@ -5,3 +5,5 @@ obj-$(CONFIG_QRTR_SMD) += qrtr-smd.o
 qrtr-smd-y	:= smd.o
 obj-$(CONFIG_QRTR_TUN) += qrtr-tun.o
 qrtr-tun-y	:= tun.o
+obj-$(CONFIG_QRTR_MHI) += qrtr-mhi.o
+qrtr-mhi-y	:= mhi.o
diff --git a/net/qrtr/mhi.c b/net/qrtr/mhi.c
new file mode 100644
index 000000000000..ff0c41467fc1
--- /dev/null
+++ b/net/qrtr/mhi.c
@@ -0,0 +1,127 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Copyright (c) 2018-2020, The Linux Foundation. All rights reserved.
+ */
+
+#include <linux/mhi.h>
+#include <linux/mod_devicetable.h>
+#include <linux/module.h>
+#include <linux/skbuff.h>
+#include <net/sock.h>
+
+#include "qrtr.h"
+
+struct qrtr_mhi_dev {
+	struct qrtr_endpoint ep;
+	struct mhi_device *mhi_dev;
+	struct device *dev;
+};
+
+/* From MHI to QRTR */
+static void qcom_mhi_qrtr_dl_callback(struct mhi_device *mhi_dev,
+				      struct mhi_result *mhi_res)
+{
+	struct qrtr_mhi_dev *qdev = dev_get_drvdata(&mhi_dev->dev);
+	int rc;
+
+	if (!qdev || mhi_res->transaction_status)
+		return;
+
+	rc = qrtr_endpoint_post(&qdev->ep, mhi_res->buf_addr,
+				mhi_res->bytes_xferd);
+	if (rc == -EINVAL)
+		dev_err(qdev->dev, "invalid ipcrouter packet\n");
+}
+
+/* From QRTR to MHI */
+static void qcom_mhi_qrtr_ul_callback(struct mhi_device *mhi_dev,
+				      struct mhi_result *mhi_res)
+{
+	struct sk_buff *skb = mhi_res->buf_addr;
+
+	if (skb->sk)
+		sock_put(skb->sk);
+	consume_skb(skb);
+}
+
+/* Send data over MHI */
+static int qcom_mhi_qrtr_send(struct qrtr_endpoint *ep, struct sk_buff *skb)
+{
+	struct qrtr_mhi_dev *qdev = container_of(ep, struct qrtr_mhi_dev, ep);
+	int rc;
+
+	rc = skb_linearize(skb);
+	if (rc)
+		goto free_skb;
+
+	rc = mhi_queue_skb(qdev->mhi_dev, DMA_TO_DEVICE, skb, skb->len,
+			   MHI_EOT);
+	if (rc)
+		goto free_skb;
+
+	if (skb->sk)
+		sock_hold(skb->sk);
+
+	return rc;
+
+free_skb:
+	kfree_skb(skb);
+
+	return rc;
+}
+
+static int qcom_mhi_qrtr_probe(struct mhi_device *mhi_dev,
+			       const struct mhi_device_id *id)
+{
+	struct qrtr_mhi_dev *qdev;
+	int rc;
+
+	qdev = devm_kzalloc(&mhi_dev->dev, sizeof(*qdev), GFP_KERNEL);
+	if (!qdev)
+		return -ENOMEM;
+
+	qdev->mhi_dev = mhi_dev;
+	qdev->dev = &mhi_dev->dev;
+	qdev->ep.xmit = qcom_mhi_qrtr_send;
+
+	dev_set_drvdata(&mhi_dev->dev, qdev);
+	rc = qrtr_endpoint_register(&qdev->ep, QRTR_EP_NID_AUTO);
+	if (rc)
+		return rc;
+
+	dev_dbg(qdev->dev, "Qualcomm MHI QRTR driver probed\n");
+
+	return 0;
+}
+
+static void qcom_mhi_qrtr_remove(struct mhi_device *mhi_dev)
+{
+	struct qrtr_mhi_dev *qdev = dev_get_drvdata(&mhi_dev->dev);
+
+	qrtr_endpoint_unregister(&qdev->ep);
+	dev_set_drvdata(&mhi_dev->dev, NULL);
+}
+
+static const struct mhi_device_id qcom_mhi_qrtr_id_table[] = {
+	{ .chan = "IPCR" },
+	{}
+};
+MODULE_DEVICE_TABLE(mhi, qcom_mhi_qrtr_id_table);
+
+static struct mhi_driver qcom_mhi_qrtr_driver = {
+	.probe = qcom_mhi_qrtr_probe,
+	.remove = qcom_mhi_qrtr_remove,
+	.dl_xfer_cb = qcom_mhi_qrtr_dl_callback,
+	.ul_xfer_cb = qcom_mhi_qrtr_ul_callback,
+	.id_table = qcom_mhi_qrtr_id_table,
+	.driver = {
+		.name = "qcom_mhi_qrtr",
+	},
+};
+
+module_mhi_driver(qcom_mhi_qrtr_driver);
+
+MODULE_AUTHOR("Chris Lew <clew@codeaurora.org>");
+MODULE_AUTHOR("Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>");
+MODULE_DESCRIPTION("Qualcomm IPC-Router MHI interface driver");
+MODULE_LICENSE("GPL v2");
-- 
cgit v1.2.3-59-g8ed1b


From e42671084361302141a09284fde9bbc14fdd16bf Mon Sep 17 00:00:00 2001
From: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Date: Thu, 7 May 2020 18:23:06 +0530
Subject: net: qrtr: Do not depend on ARCH_QCOM

IPC Router protocol is also used by external modems for exchanging the QMI
messages. Hence, it doesn't always depend on Qualcomm platforms. One such
instance is the QCA6390 WLAN device connected to x86 machine.

Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Manivannan Sadhasivam <manivannan.sadhasivam@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/qrtr/Kconfig | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/qrtr/Kconfig b/net/qrtr/Kconfig
index 8eb876471564..f362ca316015 100644
--- a/net/qrtr/Kconfig
+++ b/net/qrtr/Kconfig
@@ -4,7 +4,6 @@
 
 config QRTR
 	tristate "Qualcomm IPC Router support"
-	depends on ARCH_QCOM || COMPILE_TEST
 	---help---
 	  Say Y if you intend to use Qualcomm IPC router protocol.  The
 	  protocol is used to communicate with services provided by other
-- 
cgit v1.2.3-59-g8ed1b


From bb206a0869b7f1e7c853d7b94e91510c321402c7 Mon Sep 17 00:00:00 2001
From: Chen Zhou <chenzhou10@huawei.com>
Date: Thu, 7 May 2020 21:26:39 +0800
Subject: cxgb4: remove duplicate headers

Remove duplicate headers which are included twice.

Signed-off-by: Chen Zhou <chenzhou10@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index e46a14f44a6f..30d25a37fc3b 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -466,8 +466,6 @@ static inline struct mbox_cmd *mbox_cmd_log_entry(struct mbox_cmd_log *log,
 	return &((struct mbox_cmd *)&(log)[1])[entry_idx];
 }
 
-#include "t4fw_api.h"
-
 #define FW_VERSION(chip) ( \
 		FW_HDR_FW_VER_MAJOR_G(chip##FW_VERSION_MAJOR) | \
 		FW_HDR_FW_VER_MINOR_G(chip##FW_VERSION_MINOR) | \
-- 
cgit v1.2.3-59-g8ed1b


From e1eea8112017cbdc596d90caf6ede191502a9691 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 5 May 2020 22:20:52 +0300
Subject: net: dsa: introduce a dsa_port_from_netdev public helper

As its implementation shows, this is synonimous with calling
dsa_slave_dev_check followed by dsa_slave_to_port, so it is quite simple
already and provides functionality which is already there.

However there is now a need for these functions outside dsa_priv.h, for
example in drivers that perform mirroring and redirection through
tc-flower offloads (they are given raw access to the flow_cls_offload
structure), where they need to call this function on act->dev.

But simply exporting dsa_slave_to_port would make it non-inline and
would result in an extra function call in the hotpath, as can be seen
for example in sja1105:

Before:

000006dc <sja1105_xmit>:
{
 6dc:	e92d4ff0 	push	{r4, r5, r6, r7, r8, r9, sl, fp, lr}
 6e0:	e1a04000 	mov	r4, r0
 6e4:	e591958c 	ldr	r9, [r1, #1420]	; 0x58c <- Inline dsa_slave_to_port
 6e8:	e1a05001 	mov	r5, r1
 6ec:	e24dd004 	sub	sp, sp, #4
	u16 tx_vid = dsa_8021q_tx_vid(dp->ds, dp->index);
 6f0:	e1c901d8 	ldrd	r0, [r9, #24]
 6f4:	ebfffffe 	bl	0 <dsa_8021q_tx_vid>
			6f4: R_ARM_CALL	dsa_8021q_tx_vid
	u8 pcp = netdev_txq_to_tc(netdev, queue_mapping);
 6f8:	e1d416b0 	ldrh	r1, [r4, #96]	; 0x60
	u16 tx_vid = dsa_8021q_tx_vid(dp->ds, dp->index);
 6fc:	e1a08000 	mov	r8, r0

After:

000006e4 <sja1105_xmit>:
{
 6e4:	e92d4ff0 	push	{r4, r5, r6, r7, r8, r9, sl, fp, lr}
 6e8:	e1a04000 	mov	r4, r0
 6ec:	e24dd004 	sub	sp, sp, #4
	struct dsa_port *dp = dsa_slave_to_port(netdev);
 6f0:	e1a00001 	mov	r0, r1
{
 6f4:	e1a05001 	mov	r5, r1
	struct dsa_port *dp = dsa_slave_to_port(netdev);
 6f8:	ebfffffe 	bl	0 <dsa_slave_to_port>
			6f8: R_ARM_CALL	dsa_slave_to_port
 6fc:	e1a09000 	mov	r9, r0
	u16 tx_vid = dsa_8021q_tx_vid(dp->ds, dp->index);
 700:	e1c001d8 	ldrd	r0, [r0, #24]
 704:	ebfffffe 	bl	0 <dsa_8021q_tx_vid>
			704: R_ARM_CALL	dsa_8021q_tx_vid

Because we want to avoid possible performance regressions, introduce
this new function which is designed to be public.

Suggested-by: Vivien Didelot <vivien.didelot@gmail.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Vivien Didelot <vivien.didelot@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h | 1 +
 net/dsa/dsa.c     | 9 +++++++++
 2 files changed, 10 insertions(+)

diff --git a/include/net/dsa.h b/include/net/dsa.h
index fb3f9222f2a1..6dfc8c2f68b8 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -637,6 +637,7 @@ void dsa_devlink_resource_occ_get_register(struct dsa_switch *ds,
 					   void *occ_get_priv);
 void dsa_devlink_resource_occ_get_unregister(struct dsa_switch *ds,
 					     u64 resource_id);
+struct dsa_port *dsa_port_from_netdev(struct net_device *netdev);
 
 struct dsa_devlink_priv {
 	struct dsa_switch *ds;
diff --git a/net/dsa/dsa.c b/net/dsa/dsa.c
index 0384a911779e..1ce9ba8cf545 100644
--- a/net/dsa/dsa.c
+++ b/net/dsa/dsa.c
@@ -412,6 +412,15 @@ void dsa_devlink_resource_occ_get_unregister(struct dsa_switch *ds,
 }
 EXPORT_SYMBOL_GPL(dsa_devlink_resource_occ_get_unregister);
 
+struct dsa_port *dsa_port_from_netdev(struct net_device *netdev)
+{
+	if (!netdev || !dsa_slave_dev_check(netdev))
+		return ERR_PTR(-ENODEV);
+
+	return dsa_slave_to_port(netdev);
+}
+EXPORT_SYMBOL_GPL(dsa_port_from_netdev);
+
 static int __init dsa_init_module(void)
 {
 	int rc;
-- 
cgit v1.2.3-59-g8ed1b


From 94f94d4acfb2a5e978f98d924be33c981e2f86c6 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 5 May 2020 22:20:53 +0300
Subject: net: dsa: sja1105: add static tables for virtual links

This patch adds the register definitions for the:
- VL Lookup Table
- VL Policing Table
- VL Forwarding Table
- VL Forwarding Parameters Table

These are needed in order to perform TTEthernet operations: QoS
classification, flow-based policing and/or frame redirecting with the
switch.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105.h                |   2 +
 drivers/net/dsa/sja1105/sja1105_dynamic_config.c |  51 ++++++
 drivers/net/dsa/sja1105/sja1105_static_config.c  | 202 +++++++++++++++++++++++
 drivers/net/dsa/sja1105/sja1105_static_config.h  |  63 +++++++
 4 files changed, 318 insertions(+)

diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
index 2f62942692ec..602aa30c832f 100644
--- a/drivers/net/dsa/sja1105/sja1105.h
+++ b/drivers/net/dsa/sja1105/sja1105.h
@@ -254,6 +254,8 @@ size_t sja1105pqrs_mac_config_entry_packing(void *buf, void *entry_ptr,
 					    enum packing_op op);
 size_t sja1105pqrs_avb_params_entry_packing(void *buf, void *entry_ptr,
 					    enum packing_op op);
+size_t sja1105_vl_lookup_entry_packing(void *buf, void *entry_ptr,
+				       enum packing_op op);
 
 /* From sja1105_flower.c */
 int sja1105_cls_flower_del(struct dsa_switch *ds, int port,
diff --git a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
index bf9b36ff35bf..bdee01811960 100644
--- a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
+++ b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
@@ -97,6 +97,12 @@
 
 #define SJA1105_SIZE_DYN_CMD					4
 
+#define SJA1105ET_SJA1105_SIZE_VL_LOOKUP_DYN_CMD		\
+	SJA1105_SIZE_DYN_CMD
+
+#define SJA1105PQRS_SJA1105_SIZE_VL_LOOKUP_DYN_CMD		\
+	(SJA1105_SIZE_DYN_CMD + SJA1105_SIZE_VL_LOOKUP_ENTRY)
+
 #define SJA1105ET_SIZE_MAC_CONFIG_DYN_ENTRY			\
 	SJA1105_SIZE_DYN_CMD
 
@@ -146,6 +152,29 @@ enum sja1105_hostcmd {
 	SJA1105_HOSTCMD_INVALIDATE = 4,
 };
 
+static void
+sja1105_vl_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
+			      enum packing_op op)
+{
+	const int size = SJA1105_SIZE_DYN_CMD;
+
+	sja1105_packing(buf, &cmd->valid,   31, 31, size, op);
+	sja1105_packing(buf, &cmd->errors,  30, 30, size, op);
+	sja1105_packing(buf, &cmd->rdwrset, 29, 29, size, op);
+	sja1105_packing(buf, &cmd->index,    9,  0, size, op);
+}
+
+static size_t sja1105et_vl_lookup_entry_packing(void *buf, void *entry_ptr,
+						enum packing_op op)
+{
+	struct sja1105_vl_lookup_entry *entry = entry_ptr;
+	const int size = SJA1105ET_SJA1105_SIZE_VL_LOOKUP_DYN_CMD;
+
+	sja1105_packing(buf, &entry->egrmirr,  21, 17, size, op);
+	sja1105_packing(buf, &entry->ingrmirr, 16, 16, size, op);
+	return size;
+}
+
 static void
 sja1105pqrs_l2_lookup_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
 				  enum packing_op op)
@@ -505,6 +534,16 @@ sja1105pqrs_avb_params_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
 struct sja1105_dynamic_table_ops sja1105et_dyn_ops[BLK_IDX_MAX_DYN] = {
 	[BLK_IDX_SCHEDULE] = {0},
 	[BLK_IDX_SCHEDULE_ENTRY_POINTS] = {0},
+	[BLK_IDX_VL_LOOKUP] = {
+		.entry_packing = sja1105et_vl_lookup_entry_packing,
+		.cmd_packing = sja1105_vl_lookup_cmd_packing,
+		.access = OP_WRITE,
+		.max_entry_count = SJA1105_MAX_VL_LOOKUP_COUNT,
+		.packed_size = SJA1105ET_SJA1105_SIZE_VL_LOOKUP_DYN_CMD,
+		.addr = 0x35,
+	},
+	[BLK_IDX_VL_POLICING] = {0},
+	[BLK_IDX_VL_FORWARDING] = {0},
 	[BLK_IDX_L2_LOOKUP] = {
 		.entry_packing = sja1105et_dyn_l2_lookup_entry_packing,
 		.cmd_packing = sja1105et_l2_lookup_cmd_packing,
@@ -548,6 +587,7 @@ struct sja1105_dynamic_table_ops sja1105et_dyn_ops[BLK_IDX_MAX_DYN] = {
 	},
 	[BLK_IDX_SCHEDULE_PARAMS] = {0},
 	[BLK_IDX_SCHEDULE_ENTRY_POINTS_PARAMS] = {0},
+	[BLK_IDX_VL_FORWARDING_PARAMS] = {0},
 	[BLK_IDX_L2_LOOKUP_PARAMS] = {
 		.entry_packing = sja1105et_l2_lookup_params_entry_packing,
 		.cmd_packing = sja1105et_l2_lookup_params_cmd_packing,
@@ -573,6 +613,16 @@ struct sja1105_dynamic_table_ops sja1105et_dyn_ops[BLK_IDX_MAX_DYN] = {
 struct sja1105_dynamic_table_ops sja1105pqrs_dyn_ops[BLK_IDX_MAX_DYN] = {
 	[BLK_IDX_SCHEDULE] = {0},
 	[BLK_IDX_SCHEDULE_ENTRY_POINTS] = {0},
+	[BLK_IDX_VL_LOOKUP] = {
+		.entry_packing = sja1105_vl_lookup_entry_packing,
+		.cmd_packing = sja1105_vl_lookup_cmd_packing,
+		.access = (OP_READ | OP_WRITE),
+		.max_entry_count = SJA1105_MAX_VL_LOOKUP_COUNT,
+		.packed_size = SJA1105PQRS_SJA1105_SIZE_VL_LOOKUP_DYN_CMD,
+		.addr = 0x47,
+	},
+	[BLK_IDX_VL_POLICING] = {0},
+	[BLK_IDX_VL_FORWARDING] = {0},
 	[BLK_IDX_L2_LOOKUP] = {
 		.entry_packing = sja1105pqrs_dyn_l2_lookup_entry_packing,
 		.cmd_packing = sja1105pqrs_l2_lookup_cmd_packing,
@@ -616,6 +666,7 @@ struct sja1105_dynamic_table_ops sja1105pqrs_dyn_ops[BLK_IDX_MAX_DYN] = {
 	},
 	[BLK_IDX_SCHEDULE_PARAMS] = {0},
 	[BLK_IDX_SCHEDULE_ENTRY_POINTS_PARAMS] = {0},
+	[BLK_IDX_VL_FORWARDING_PARAMS] = {0},
 	[BLK_IDX_L2_LOOKUP_PARAMS] = {
 		.entry_packing = sja1105et_l2_lookup_params_entry_packing,
 		.cmd_packing = sja1105et_l2_lookup_params_cmd_packing,
diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.c b/drivers/net/dsa/sja1105/sja1105_static_config.c
index bbfe034910a0..b68c9c92c248 100644
--- a/drivers/net/dsa/sja1105/sja1105_static_config.c
+++ b/drivers/net/dsa/sja1105/sja1105_static_config.c
@@ -432,6 +432,84 @@ static size_t sja1105_schedule_entry_packing(void *buf, void *entry_ptr,
 	return size;
 }
 
+static size_t
+sja1105_vl_forwarding_params_entry_packing(void *buf, void *entry_ptr,
+					   enum packing_op op)
+{
+	struct sja1105_vl_forwarding_params_entry *entry = entry_ptr;
+	const size_t size = SJA1105_SIZE_VL_FORWARDING_PARAMS_ENTRY;
+	int offset, i;
+
+	for (i = 0, offset = 16; i < 8; i++, offset += 10)
+		sja1105_packing(buf, &entry->partspc[i],
+				offset + 9, offset + 0, size, op);
+	sja1105_packing(buf, &entry->debugen, 15, 15, size, op);
+	return size;
+}
+
+static size_t sja1105_vl_forwarding_entry_packing(void *buf, void *entry_ptr,
+						  enum packing_op op)
+{
+	struct sja1105_vl_forwarding_entry *entry = entry_ptr;
+	const size_t size = SJA1105_SIZE_VL_FORWARDING_ENTRY;
+
+	sja1105_packing(buf, &entry->type,      31, 31, size, op);
+	sja1105_packing(buf, &entry->priority,  30, 28, size, op);
+	sja1105_packing(buf, &entry->partition, 27, 25, size, op);
+	sja1105_packing(buf, &entry->destports, 24, 20, size, op);
+	return size;
+}
+
+size_t sja1105_vl_lookup_entry_packing(void *buf, void *entry_ptr,
+				       enum packing_op op)
+{
+	struct sja1105_vl_lookup_entry *entry = entry_ptr;
+	const size_t size = SJA1105_SIZE_VL_LOOKUP_ENTRY;
+
+	if (entry->format == SJA1105_VL_FORMAT_PSFP) {
+		/* Interpreting vllupformat as 0 */
+		sja1105_packing(buf, &entry->destports,
+				95, 91, size, op);
+		sja1105_packing(buf, &entry->iscritical,
+				90, 90, size, op);
+		sja1105_packing(buf, &entry->macaddr,
+				89, 42, size, op);
+		sja1105_packing(buf, &entry->vlanid,
+				41, 30, size, op);
+		sja1105_packing(buf, &entry->port,
+				29, 27, size, op);
+		sja1105_packing(buf, &entry->vlanprior,
+				26, 24, size, op);
+	} else {
+		/* Interpreting vllupformat as 1 */
+		sja1105_packing(buf, &entry->egrmirr,
+				95, 91, size, op);
+		sja1105_packing(buf, &entry->ingrmirr,
+				90, 90, size, op);
+		sja1105_packing(buf, &entry->vlid,
+				57, 42, size, op);
+		sja1105_packing(buf, &entry->port,
+				29, 27, size, op);
+	}
+	return size;
+}
+
+static size_t sja1105_vl_policing_entry_packing(void *buf, void *entry_ptr,
+						enum packing_op op)
+{
+	struct sja1105_vl_policing_entry *entry = entry_ptr;
+	const size_t size = SJA1105_SIZE_VL_POLICING_ENTRY;
+
+	sja1105_packing(buf, &entry->type,      63, 63, size, op);
+	sja1105_packing(buf, &entry->maxlen,    62, 52, size, op);
+	sja1105_packing(buf, &entry->sharindx,  51, 42, size, op);
+	if (entry->type == 0) {
+		sja1105_packing(buf, &entry->bag,    41, 28, size, op);
+		sja1105_packing(buf, &entry->jitter, 27, 18, size, op);
+	}
+	return size;
+}
+
 size_t sja1105_vlan_lookup_entry_packing(void *buf, void *entry_ptr,
 					 enum packing_op op)
 {
@@ -510,6 +588,9 @@ static void sja1105_table_write_crc(u8 *table_start, u8 *crc_ptr)
 static u64 blk_id_map[BLK_IDX_MAX] = {
 	[BLK_IDX_SCHEDULE] = BLKID_SCHEDULE,
 	[BLK_IDX_SCHEDULE_ENTRY_POINTS] = BLKID_SCHEDULE_ENTRY_POINTS,
+	[BLK_IDX_VL_LOOKUP] = BLKID_VL_LOOKUP,
+	[BLK_IDX_VL_POLICING] = BLKID_VL_POLICING,
+	[BLK_IDX_VL_FORWARDING] = BLKID_VL_FORWARDING,
 	[BLK_IDX_L2_LOOKUP] = BLKID_L2_LOOKUP,
 	[BLK_IDX_L2_POLICING] = BLKID_L2_POLICING,
 	[BLK_IDX_VLAN_LOOKUP] = BLKID_VLAN_LOOKUP,
@@ -517,6 +598,7 @@ static u64 blk_id_map[BLK_IDX_MAX] = {
 	[BLK_IDX_MAC_CONFIG] = BLKID_MAC_CONFIG,
 	[BLK_IDX_SCHEDULE_PARAMS] = BLKID_SCHEDULE_PARAMS,
 	[BLK_IDX_SCHEDULE_ENTRY_POINTS_PARAMS] = BLKID_SCHEDULE_ENTRY_POINTS_PARAMS,
+	[BLK_IDX_VL_FORWARDING_PARAMS] = BLKID_VL_FORWARDING_PARAMS,
 	[BLK_IDX_L2_LOOKUP_PARAMS] = BLKID_L2_LOOKUP_PARAMS,
 	[BLK_IDX_L2_FORWARDING_PARAMS] = BLKID_L2_FORWARDING_PARAMS,
 	[BLK_IDX_AVB_PARAMS] = BLKID_AVB_PARAMS,
@@ -533,6 +615,9 @@ const char *sja1105_static_config_error_msg[] = {
 		"schedule-table present, but one of "
 		"schedule-entry-points-table, schedule-parameters-table or "
 		"schedule-entry-points-parameters table is empty",
+	[SJA1105_INCORRECT_VIRTUAL_LINK_CONFIGURATION] =
+		"vl-lookup-table present, but one of vl-policing-table, "
+		"vl-forwarding-table or vl-forwarding-parameters-table is empty",
 	[SJA1105_MISSING_L2_POLICING_TABLE] =
 		"l2-policing-table needs to have at least one entry",
 	[SJA1105_MISSING_L2_FORWARDING_TABLE] =
@@ -560,6 +645,7 @@ static sja1105_config_valid_t
 static_config_check_memory_size(const struct sja1105_table *tables)
 {
 	const struct sja1105_l2_forwarding_params_entry *l2_fwd_params;
+	const struct sja1105_vl_forwarding_params_entry *vl_fwd_params;
 	int i, mem = 0;
 
 	l2_fwd_params = tables[BLK_IDX_L2_FORWARDING_PARAMS].entries;
@@ -567,6 +653,12 @@ static_config_check_memory_size(const struct sja1105_table *tables)
 	for (i = 0; i < 8; i++)
 		mem += l2_fwd_params->part_spc[i];
 
+	if (tables[BLK_IDX_VL_FORWARDING_PARAMS].entry_count) {
+		vl_fwd_params = tables[BLK_IDX_VL_FORWARDING_PARAMS].entries;
+		for (i = 0; i < 8; i++)
+			mem += vl_fwd_params->partspc[i];
+	}
+
 	if (mem > SJA1105_MAX_FRAME_MEMORY)
 		return SJA1105_OVERCOMMITTED_FRAME_MEMORY;
 
@@ -594,6 +686,32 @@ sja1105_static_config_check_valid(const struct sja1105_static_config *config)
 		if (!IS_FULL(BLK_IDX_SCHEDULE_ENTRY_POINTS_PARAMS))
 			return SJA1105_INCORRECT_TTETHERNET_CONFIGURATION;
 	}
+	if (tables[BLK_IDX_VL_LOOKUP].entry_count) {
+		struct sja1105_vl_lookup_entry *vl_lookup;
+		bool has_critical_links = false;
+		int i;
+
+		vl_lookup = tables[BLK_IDX_VL_LOOKUP].entries;
+
+		for (i = 0; i < tables[BLK_IDX_VL_LOOKUP].entry_count; i++) {
+			if (vl_lookup[i].iscritical) {
+				has_critical_links = true;
+				break;
+			}
+		}
+
+		if (tables[BLK_IDX_VL_POLICING].entry_count == 0 &&
+		    has_critical_links)
+			return SJA1105_INCORRECT_VIRTUAL_LINK_CONFIGURATION;
+
+		if (tables[BLK_IDX_VL_FORWARDING].entry_count == 0 &&
+		    has_critical_links)
+			return SJA1105_INCORRECT_VIRTUAL_LINK_CONFIGURATION;
+
+		if (tables[BLK_IDX_VL_FORWARDING_PARAMS].entry_count == 0 &&
+		    has_critical_links)
+			return SJA1105_INCORRECT_VIRTUAL_LINK_CONFIGURATION;
+	}
 
 	if (tables[BLK_IDX_L2_POLICING].entry_count == 0)
 		return SJA1105_MISSING_L2_POLICING_TABLE;
@@ -703,6 +821,9 @@ sja1105_static_config_get_length(const struct sja1105_static_config *config)
 struct sja1105_table_ops sja1105e_table_ops[BLK_IDX_MAX] = {
 	[BLK_IDX_SCHEDULE] = {0},
 	[BLK_IDX_SCHEDULE_ENTRY_POINTS] = {0},
+	[BLK_IDX_VL_LOOKUP] = {0},
+	[BLK_IDX_VL_POLICING] = {0},
+	[BLK_IDX_VL_FORWARDING] = {0},
 	[BLK_IDX_L2_LOOKUP] = {
 		.packing = sja1105et_l2_lookup_entry_packing,
 		.unpacked_entry_size = sizeof(struct sja1105_l2_lookup_entry),
@@ -735,6 +856,7 @@ struct sja1105_table_ops sja1105e_table_ops[BLK_IDX_MAX] = {
 	},
 	[BLK_IDX_SCHEDULE_PARAMS] = {0},
 	[BLK_IDX_SCHEDULE_ENTRY_POINTS_PARAMS] = {0},
+	[BLK_IDX_VL_FORWARDING_PARAMS] = {0},
 	[BLK_IDX_L2_LOOKUP_PARAMS] = {
 		.packing = sja1105et_l2_lookup_params_entry_packing,
 		.unpacked_entry_size = sizeof(struct sja1105_l2_lookup_params_entry),
@@ -781,6 +903,24 @@ struct sja1105_table_ops sja1105t_table_ops[BLK_IDX_MAX] = {
 		.packed_entry_size = SJA1105_SIZE_SCHEDULE_ENTRY_POINTS_ENTRY,
 		.max_entry_count = SJA1105_MAX_SCHEDULE_ENTRY_POINTS_COUNT,
 	},
+	[BLK_IDX_VL_LOOKUP] = {
+		.packing = sja1105_vl_lookup_entry_packing,
+		.unpacked_entry_size = sizeof(struct sja1105_vl_lookup_entry),
+		.packed_entry_size = SJA1105_SIZE_VL_LOOKUP_ENTRY,
+		.max_entry_count = SJA1105_MAX_VL_LOOKUP_COUNT,
+	},
+	[BLK_IDX_VL_POLICING] = {
+		.packing = sja1105_vl_policing_entry_packing,
+		.unpacked_entry_size = sizeof(struct sja1105_vl_policing_entry),
+		.packed_entry_size = SJA1105_SIZE_VL_POLICING_ENTRY,
+		.max_entry_count = SJA1105_MAX_VL_POLICING_COUNT,
+	},
+	[BLK_IDX_VL_FORWARDING] = {
+		.packing = sja1105_vl_forwarding_entry_packing,
+		.unpacked_entry_size = sizeof(struct sja1105_vl_forwarding_entry),
+		.packed_entry_size = SJA1105_SIZE_VL_FORWARDING_ENTRY,
+		.max_entry_count = SJA1105_MAX_VL_FORWARDING_COUNT,
+	},
 	[BLK_IDX_L2_LOOKUP] = {
 		.packing = sja1105et_l2_lookup_entry_packing,
 		.unpacked_entry_size = sizeof(struct sja1105_l2_lookup_entry),
@@ -823,6 +963,12 @@ struct sja1105_table_ops sja1105t_table_ops[BLK_IDX_MAX] = {
 		.packed_entry_size = SJA1105_SIZE_SCHEDULE_ENTRY_POINTS_PARAMS_ENTRY,
 		.max_entry_count = SJA1105_MAX_SCHEDULE_ENTRY_POINTS_PARAMS_COUNT,
 	},
+	[BLK_IDX_VL_FORWARDING_PARAMS] = {
+		.packing = sja1105_vl_forwarding_params_entry_packing,
+		.unpacked_entry_size = sizeof(struct sja1105_vl_forwarding_params_entry),
+		.packed_entry_size = SJA1105_SIZE_VL_FORWARDING_PARAMS_ENTRY,
+		.max_entry_count = SJA1105_MAX_VL_FORWARDING_PARAMS_COUNT,
+	},
 	[BLK_IDX_L2_LOOKUP_PARAMS] = {
 		.packing = sja1105et_l2_lookup_params_entry_packing,
 		.unpacked_entry_size = sizeof(struct sja1105_l2_lookup_params_entry),
@@ -859,6 +1005,9 @@ struct sja1105_table_ops sja1105t_table_ops[BLK_IDX_MAX] = {
 struct sja1105_table_ops sja1105p_table_ops[BLK_IDX_MAX] = {
 	[BLK_IDX_SCHEDULE] = {0},
 	[BLK_IDX_SCHEDULE_ENTRY_POINTS] = {0},
+	[BLK_IDX_VL_LOOKUP] = {0},
+	[BLK_IDX_VL_POLICING] = {0},
+	[BLK_IDX_VL_FORWARDING] = {0},
 	[BLK_IDX_L2_LOOKUP] = {
 		.packing = sja1105pqrs_l2_lookup_entry_packing,
 		.unpacked_entry_size = sizeof(struct sja1105_l2_lookup_entry),
@@ -891,6 +1040,7 @@ struct sja1105_table_ops sja1105p_table_ops[BLK_IDX_MAX] = {
 	},
 	[BLK_IDX_SCHEDULE_PARAMS] = {0},
 	[BLK_IDX_SCHEDULE_ENTRY_POINTS_PARAMS] = {0},
+	[BLK_IDX_VL_FORWARDING_PARAMS] = {0},
 	[BLK_IDX_L2_LOOKUP_PARAMS] = {
 		.packing = sja1105pqrs_l2_lookup_params_entry_packing,
 		.unpacked_entry_size = sizeof(struct sja1105_l2_lookup_params_entry),
@@ -937,6 +1087,24 @@ struct sja1105_table_ops sja1105q_table_ops[BLK_IDX_MAX] = {
 		.packed_entry_size = SJA1105_SIZE_SCHEDULE_ENTRY_POINTS_ENTRY,
 		.max_entry_count = SJA1105_MAX_SCHEDULE_ENTRY_POINTS_COUNT,
 	},
+	[BLK_IDX_VL_LOOKUP] = {
+		.packing = sja1105_vl_lookup_entry_packing,
+		.unpacked_entry_size = sizeof(struct sja1105_vl_lookup_entry),
+		.packed_entry_size = SJA1105_SIZE_VL_LOOKUP_ENTRY,
+		.max_entry_count = SJA1105_MAX_VL_LOOKUP_COUNT,
+	},
+	[BLK_IDX_VL_POLICING] = {
+		.packing = sja1105_vl_policing_entry_packing,
+		.unpacked_entry_size = sizeof(struct sja1105_vl_policing_entry),
+		.packed_entry_size = SJA1105_SIZE_VL_POLICING_ENTRY,
+		.max_entry_count = SJA1105_MAX_VL_POLICING_COUNT,
+	},
+	[BLK_IDX_VL_FORWARDING] = {
+		.packing = sja1105_vl_forwarding_entry_packing,
+		.unpacked_entry_size = sizeof(struct sja1105_vl_forwarding_entry),
+		.packed_entry_size = SJA1105_SIZE_VL_FORWARDING_ENTRY,
+		.max_entry_count = SJA1105_MAX_VL_FORWARDING_COUNT,
+	},
 	[BLK_IDX_L2_LOOKUP] = {
 		.packing = sja1105pqrs_l2_lookup_entry_packing,
 		.unpacked_entry_size = sizeof(struct sja1105_l2_lookup_entry),
@@ -979,6 +1147,12 @@ struct sja1105_table_ops sja1105q_table_ops[BLK_IDX_MAX] = {
 		.packed_entry_size = SJA1105_SIZE_SCHEDULE_ENTRY_POINTS_PARAMS_ENTRY,
 		.max_entry_count = SJA1105_MAX_SCHEDULE_ENTRY_POINTS_PARAMS_COUNT,
 	},
+	[BLK_IDX_VL_FORWARDING_PARAMS] = {
+		.packing = sja1105_vl_forwarding_params_entry_packing,
+		.unpacked_entry_size = sizeof(struct sja1105_vl_forwarding_params_entry),
+		.packed_entry_size = SJA1105_SIZE_VL_FORWARDING_PARAMS_ENTRY,
+		.max_entry_count = SJA1105_MAX_VL_FORWARDING_PARAMS_COUNT,
+	},
 	[BLK_IDX_L2_LOOKUP_PARAMS] = {
 		.packing = sja1105pqrs_l2_lookup_params_entry_packing,
 		.unpacked_entry_size = sizeof(struct sja1105_l2_lookup_params_entry),
@@ -1015,6 +1189,9 @@ struct sja1105_table_ops sja1105q_table_ops[BLK_IDX_MAX] = {
 struct sja1105_table_ops sja1105r_table_ops[BLK_IDX_MAX] = {
 	[BLK_IDX_SCHEDULE] = {0},
 	[BLK_IDX_SCHEDULE_ENTRY_POINTS] = {0},
+	[BLK_IDX_VL_LOOKUP] = {0},
+	[BLK_IDX_VL_POLICING] = {0},
+	[BLK_IDX_VL_FORWARDING] = {0},
 	[BLK_IDX_L2_LOOKUP] = {
 		.packing = sja1105pqrs_l2_lookup_entry_packing,
 		.unpacked_entry_size = sizeof(struct sja1105_l2_lookup_entry),
@@ -1047,6 +1224,7 @@ struct sja1105_table_ops sja1105r_table_ops[BLK_IDX_MAX] = {
 	},
 	[BLK_IDX_SCHEDULE_PARAMS] = {0},
 	[BLK_IDX_SCHEDULE_ENTRY_POINTS_PARAMS] = {0},
+	[BLK_IDX_VL_FORWARDING_PARAMS] = {0},
 	[BLK_IDX_L2_LOOKUP_PARAMS] = {
 		.packing = sja1105pqrs_l2_lookup_params_entry_packing,
 		.unpacked_entry_size = sizeof(struct sja1105_l2_lookup_params_entry),
@@ -1093,6 +1271,24 @@ struct sja1105_table_ops sja1105s_table_ops[BLK_IDX_MAX] = {
 		.packed_entry_size = SJA1105_SIZE_SCHEDULE_ENTRY_POINTS_ENTRY,
 		.max_entry_count = SJA1105_MAX_SCHEDULE_ENTRY_POINTS_COUNT,
 	},
+	[BLK_IDX_VL_LOOKUP] = {
+		.packing = sja1105_vl_lookup_entry_packing,
+		.unpacked_entry_size = sizeof(struct sja1105_vl_lookup_entry),
+		.packed_entry_size = SJA1105_SIZE_VL_LOOKUP_ENTRY,
+		.max_entry_count = SJA1105_MAX_VL_LOOKUP_COUNT,
+	},
+	[BLK_IDX_VL_POLICING] = {
+		.packing = sja1105_vl_policing_entry_packing,
+		.unpacked_entry_size = sizeof(struct sja1105_vl_policing_entry),
+		.packed_entry_size = SJA1105_SIZE_VL_POLICING_ENTRY,
+		.max_entry_count = SJA1105_MAX_VL_POLICING_COUNT,
+	},
+	[BLK_IDX_VL_FORWARDING] = {
+		.packing = sja1105_vl_forwarding_entry_packing,
+		.unpacked_entry_size = sizeof(struct sja1105_vl_forwarding_entry),
+		.packed_entry_size = SJA1105_SIZE_VL_FORWARDING_ENTRY,
+		.max_entry_count = SJA1105_MAX_VL_FORWARDING_COUNT,
+	},
 	[BLK_IDX_L2_LOOKUP] = {
 		.packing = sja1105pqrs_l2_lookup_entry_packing,
 		.unpacked_entry_size = sizeof(struct sja1105_l2_lookup_entry),
@@ -1135,6 +1331,12 @@ struct sja1105_table_ops sja1105s_table_ops[BLK_IDX_MAX] = {
 		.packed_entry_size = SJA1105_SIZE_SCHEDULE_ENTRY_POINTS_PARAMS_ENTRY,
 		.max_entry_count = SJA1105_MAX_SCHEDULE_ENTRY_POINTS_PARAMS_COUNT,
 	},
+	[BLK_IDX_VL_FORWARDING_PARAMS] = {
+		.packing = sja1105_vl_forwarding_params_entry_packing,
+		.unpacked_entry_size = sizeof(struct sja1105_vl_forwarding_params_entry),
+		.packed_entry_size = SJA1105_SIZE_VL_FORWARDING_PARAMS_ENTRY,
+		.max_entry_count = SJA1105_MAX_VL_FORWARDING_PARAMS_COUNT,
+	},
 	[BLK_IDX_L2_LOOKUP_PARAMS] = {
 		.packing = sja1105pqrs_l2_lookup_params_entry_packing,
 		.unpacked_entry_size = sizeof(struct sja1105_l2_lookup_params_entry),
diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.h b/drivers/net/dsa/sja1105/sja1105_static_config.h
index 8afafb6aef12..1a8fcbbb57b6 100644
--- a/drivers/net/dsa/sja1105/sja1105_static_config.h
+++ b/drivers/net/dsa/sja1105/sja1105_static_config.h
@@ -13,6 +13,9 @@
 #define SJA1105_SIZE_TABLE_HEADER			12
 #define SJA1105_SIZE_SCHEDULE_ENTRY			8
 #define SJA1105_SIZE_SCHEDULE_ENTRY_POINTS_ENTRY	4
+#define SJA1105_SIZE_VL_LOOKUP_ENTRY			12
+#define SJA1105_SIZE_VL_POLICING_ENTRY			8
+#define SJA1105_SIZE_VL_FORWARDING_ENTRY		4
 #define SJA1105_SIZE_L2_POLICING_ENTRY			8
 #define SJA1105_SIZE_VLAN_LOOKUP_ENTRY			8
 #define SJA1105_SIZE_L2_FORWARDING_ENTRY		8
@@ -20,6 +23,7 @@
 #define SJA1105_SIZE_XMII_PARAMS_ENTRY			4
 #define SJA1105_SIZE_SCHEDULE_PARAMS_ENTRY		12
 #define SJA1105_SIZE_SCHEDULE_ENTRY_POINTS_PARAMS_ENTRY	4
+#define SJA1105_SIZE_VL_FORWARDING_PARAMS_ENTRY         12
 #define SJA1105ET_SIZE_L2_LOOKUP_ENTRY			12
 #define SJA1105ET_SIZE_MAC_CONFIG_ENTRY			28
 #define SJA1105ET_SIZE_L2_LOOKUP_PARAMS_ENTRY		4
@@ -35,6 +39,9 @@
 enum {
 	BLKID_SCHEDULE					= 0x00,
 	BLKID_SCHEDULE_ENTRY_POINTS			= 0x01,
+	BLKID_VL_LOOKUP					= 0x02,
+	BLKID_VL_POLICING				= 0x03,
+	BLKID_VL_FORWARDING				= 0x04,
 	BLKID_L2_LOOKUP					= 0x05,
 	BLKID_L2_POLICING				= 0x06,
 	BLKID_VLAN_LOOKUP				= 0x07,
@@ -42,6 +49,7 @@ enum {
 	BLKID_MAC_CONFIG				= 0x09,
 	BLKID_SCHEDULE_PARAMS				= 0x0A,
 	BLKID_SCHEDULE_ENTRY_POINTS_PARAMS		= 0x0B,
+	BLKID_VL_FORWARDING_PARAMS			= 0x0C,
 	BLKID_L2_LOOKUP_PARAMS				= 0x0D,
 	BLKID_L2_FORWARDING_PARAMS			= 0x0E,
 	BLKID_AVB_PARAMS				= 0x10,
@@ -52,6 +60,9 @@ enum {
 enum sja1105_blk_idx {
 	BLK_IDX_SCHEDULE = 0,
 	BLK_IDX_SCHEDULE_ENTRY_POINTS,
+	BLK_IDX_VL_LOOKUP,
+	BLK_IDX_VL_POLICING,
+	BLK_IDX_VL_FORWARDING,
 	BLK_IDX_L2_LOOKUP,
 	BLK_IDX_L2_POLICING,
 	BLK_IDX_VLAN_LOOKUP,
@@ -59,6 +70,7 @@ enum sja1105_blk_idx {
 	BLK_IDX_MAC_CONFIG,
 	BLK_IDX_SCHEDULE_PARAMS,
 	BLK_IDX_SCHEDULE_ENTRY_POINTS_PARAMS,
+	BLK_IDX_VL_FORWARDING_PARAMS,
 	BLK_IDX_L2_LOOKUP_PARAMS,
 	BLK_IDX_L2_FORWARDING_PARAMS,
 	BLK_IDX_AVB_PARAMS,
@@ -73,6 +85,9 @@ enum sja1105_blk_idx {
 
 #define SJA1105_MAX_SCHEDULE_COUNT			1024
 #define SJA1105_MAX_SCHEDULE_ENTRY_POINTS_COUNT		2048
+#define SJA1105_MAX_VL_LOOKUP_COUNT			1024
+#define SJA1105_MAX_VL_POLICING_COUNT			1024
+#define SJA1105_MAX_VL_FORWARDING_COUNT			1024
 #define SJA1105_MAX_L2_LOOKUP_COUNT			1024
 #define SJA1105_MAX_L2_POLICING_COUNT			45
 #define SJA1105_MAX_VLAN_LOOKUP_COUNT			4096
@@ -80,6 +95,7 @@ enum sja1105_blk_idx {
 #define SJA1105_MAX_MAC_CONFIG_COUNT			5
 #define SJA1105_MAX_SCHEDULE_PARAMS_COUNT		1
 #define SJA1105_MAX_SCHEDULE_ENTRY_POINTS_PARAMS_COUNT	1
+#define SJA1105_MAX_VL_FORWARDING_PARAMS_COUNT		1
 #define SJA1105_MAX_L2_LOOKUP_PARAMS_COUNT		1
 #define SJA1105_MAX_L2_FORWARDING_PARAMS_COUNT		1
 #define SJA1105_MAX_GENERAL_PARAMS_COUNT		1
@@ -262,6 +278,52 @@ struct sja1105_xmii_params_entry {
 	u64 xmii_mode[5];
 };
 
+enum {
+	SJA1105_VL_FORMAT_PSFP		= 0,
+	SJA1105_VL_FORMAT_ARINC664	= 1,
+};
+
+struct sja1105_vl_lookup_entry {
+	u64 format;
+	u64 port;
+	union {
+		/* SJA1105_VL_FORMAT_PSFP */
+		struct {
+			u64 destports;
+			u64 iscritical;
+			u64 macaddr;
+			u64 vlanid;
+			u64 vlanprior;
+		};
+		/* SJA1105_VL_FORMAT_ARINC664 */
+		struct {
+			u64 egrmirr;
+			u64 ingrmirr;
+			u64 vlid;
+		};
+	};
+};
+
+struct sja1105_vl_policing_entry {
+	u64 type;
+	u64 maxlen;
+	u64 sharindx;
+	u64 bag;
+	u64 jitter;
+};
+
+struct sja1105_vl_forwarding_entry {
+	u64 type;
+	u64 priority;
+	u64 partition;
+	u64 destports;
+};
+
+struct sja1105_vl_forwarding_params_entry {
+	u64 partspc[8];
+	u64 debugen;
+};
+
 struct sja1105_table_header {
 	u64 block_id;
 	u64 len;
@@ -303,6 +365,7 @@ typedef enum {
 	SJA1105_CONFIG_OK = 0,
 	SJA1105_TTETHERNET_NOT_SUPPORTED,
 	SJA1105_INCORRECT_TTETHERNET_CONFIGURATION,
+	SJA1105_INCORRECT_VIRTUAL_LINK_CONFIGURATION,
 	SJA1105_MISSING_L2_POLICING_TABLE,
 	SJA1105_MISSING_L2_FORWARDING_TABLE,
 	SJA1105_MISSING_L2_FORWARDING_PARAMS_TABLE,
-- 
cgit v1.2.3-59-g8ed1b


From b70bb8d4ab2c0a4992e4692f07e9b91056b30c88 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 5 May 2020 22:20:54 +0300
Subject: net: dsa: sja1105: make room for virtual link parsing in flower
 offload

Virtual links are a sja1105 hardware concept of executing various flow
actions based on a key extracted from the frame's DMAC, VID and PCP.

Currently the tc-flower offload code supports only parsing the DMAC if
that is the broadcast MAC address, and the VLAN PCP. Extract the key
parsing logic from the L2 policers functionality and move it into its
own function, after adding extra logic for matching on any DMAC and VID.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105.h        |  28 +++++++-
 drivers/net/dsa/sja1105/sja1105_flower.c | 111 +++++++++++++++++++++++--------
 2 files changed, 112 insertions(+), 27 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
index 602aa30c832f..95633ad9bfb7 100644
--- a/drivers/net/dsa/sja1105/sja1105.h
+++ b/drivers/net/dsa/sja1105/sja1105.h
@@ -97,6 +97,32 @@ struct sja1105_info {
 	const char *name;
 };
 
+enum sja1105_key_type {
+	SJA1105_KEY_BCAST,
+	SJA1105_KEY_TC,
+	SJA1105_KEY_VLAN_UNAWARE_VL,
+	SJA1105_KEY_VLAN_AWARE_VL,
+};
+
+struct sja1105_key {
+	enum sja1105_key_type type;
+
+	union {
+		/* SJA1105_KEY_TC */
+		struct {
+			int pcp;
+		} tc;
+
+		/* SJA1105_KEY_VLAN_UNAWARE_VL */
+		/* SJA1105_KEY_VLAN_AWARE_VL */
+		struct {
+			u64 dmac;
+			u16 vid;
+			u16 pcp;
+		} vl;
+	};
+};
+
 enum sja1105_rule_type {
 	SJA1105_RULE_BCAST_POLICER,
 	SJA1105_RULE_TC_POLICER,
@@ -106,6 +132,7 @@ struct sja1105_rule {
 	struct list_head list;
 	unsigned long cookie;
 	unsigned long port_mask;
+	struct sja1105_key key;
 	enum sja1105_rule_type type;
 
 	union {
@@ -117,7 +144,6 @@ struct sja1105_rule {
 		/* SJA1105_RULE_TC_POLICER */
 		struct {
 			int sharindx;
-			int tc;
 		} tc_pol;
 	};
 };
diff --git a/drivers/net/dsa/sja1105/sja1105_flower.c b/drivers/net/dsa/sja1105/sja1105_flower.c
index 5288a722e625..3246d5a49436 100644
--- a/drivers/net/dsa/sja1105/sja1105_flower.c
+++ b/drivers/net/dsa/sja1105/sja1105_flower.c
@@ -46,6 +46,7 @@ static int sja1105_setup_bcast_policer(struct sja1105_private *priv,
 		rule->cookie = cookie;
 		rule->type = SJA1105_RULE_BCAST_POLICER;
 		rule->bcast_pol.sharindx = sja1105_find_free_l2_policer(priv);
+		rule->key.type = SJA1105_KEY_BCAST;
 		new_rule = true;
 	}
 
@@ -117,7 +118,8 @@ static int sja1105_setup_tc_policer(struct sja1105_private *priv,
 		rule->cookie = cookie;
 		rule->type = SJA1105_RULE_TC_POLICER;
 		rule->tc_pol.sharindx = sja1105_find_free_l2_policer(priv);
-		rule->tc_pol.tc = tc;
+		rule->key.type = SJA1105_KEY_TC;
+		rule->key.tc.pcp = tc;
 		new_rule = true;
 	}
 
@@ -169,14 +171,37 @@ out:
 	return rc;
 }
 
-static int sja1105_flower_parse_policer(struct sja1105_private *priv, int port,
-					struct netlink_ext_ack *extack,
-					struct flow_cls_offload *cls,
-					u64 rate_bytes_per_sec,
-					s64 burst)
+static int sja1105_flower_policer(struct sja1105_private *priv, int port,
+				  struct netlink_ext_ack *extack,
+				  unsigned long cookie, struct sja1105_key *key,
+				  u64 rate_bytes_per_sec,
+				  s64 burst)
+{
+	switch (key->type) {
+	case SJA1105_KEY_BCAST:
+		return sja1105_setup_bcast_policer(priv, extack, cookie, port,
+						   rate_bytes_per_sec, burst);
+	case SJA1105_KEY_TC:
+		return sja1105_setup_tc_policer(priv, extack, cookie, port,
+						key->tc.pcp, rate_bytes_per_sec,
+						burst);
+	default:
+		NL_SET_ERR_MSG_MOD(extack, "Unknown keys for policing");
+		return -EOPNOTSUPP;
+	}
+}
+
+static int sja1105_flower_parse_key(struct sja1105_private *priv,
+				    struct netlink_ext_ack *extack,
+				    struct flow_cls_offload *cls,
+				    struct sja1105_key *key)
 {
 	struct flow_rule *rule = flow_cls_offload_flow_rule(cls);
 	struct flow_dissector *dissector = rule->match.dissector;
+	bool is_bcast_dmac = false;
+	u64 dmac = U64_MAX;
+	u16 vid = U16_MAX;
+	u16 pcp = U16_MAX;
 
 	if (dissector->used_keys &
 	    ~(BIT(FLOW_DISSECTOR_KEY_BASIC) |
@@ -213,16 +238,14 @@ static int sja1105_flower_parse_policer(struct sja1105_private *priv, int port,
 			return -EOPNOTSUPP;
 		}
 
-		if (!ether_addr_equal_masked(match.key->dst, bcast,
-					     match.mask->dst)) {
+		if (!ether_addr_equal(match.mask->dst, bcast)) {
 			NL_SET_ERR_MSG_MOD(extack,
-					   "Only matching on broadcast DMAC is supported");
+					   "Masked matching on MAC not supported");
 			return -EOPNOTSUPP;
 		}
 
-		return sja1105_setup_bcast_policer(priv, extack, cls->cookie,
-						   port, rate_bytes_per_sec,
-						   burst);
+		dmac = ether_addr_to_u64(match.key->dst);
+		is_bcast_dmac = ether_addr_equal(match.key->dst, bcast);
 	}
 
 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_VLAN)) {
@@ -230,22 +253,46 @@ static int sja1105_flower_parse_policer(struct sja1105_private *priv, int port,
 
 		flow_rule_match_vlan(rule, &match);
 
-		if (match.key->vlan_id & match.mask->vlan_id) {
+		if (match.mask->vlan_id &&
+		    match.mask->vlan_id != VLAN_VID_MASK) {
 			NL_SET_ERR_MSG_MOD(extack,
-					   "Matching on VID is not supported");
+					   "Masked matching on VID is not supported");
 			return -EOPNOTSUPP;
 		}
 
-		if (match.mask->vlan_priority != 0x7) {
+		if (match.mask->vlan_priority &&
+		    match.mask->vlan_priority != 0x7) {
 			NL_SET_ERR_MSG_MOD(extack,
 					   "Masked matching on PCP is not supported");
 			return -EOPNOTSUPP;
 		}
 
-		return sja1105_setup_tc_policer(priv, extack, cls->cookie, port,
-						match.key->vlan_priority,
-						rate_bytes_per_sec,
-						burst);
+		if (match.mask->vlan_id)
+			vid = match.key->vlan_id;
+		if (match.mask->vlan_priority)
+			pcp = match.key->vlan_priority;
+	}
+
+	if (is_bcast_dmac && vid == U16_MAX && pcp == U16_MAX) {
+		key->type = SJA1105_KEY_BCAST;
+		return 0;
+	}
+	if (dmac == U64_MAX && vid == U16_MAX && pcp != U16_MAX) {
+		key->type = SJA1105_KEY_TC;
+		key->tc.pcp = pcp;
+		return 0;
+	}
+	if (dmac != U64_MAX && vid != U16_MAX && pcp != U16_MAX) {
+		key->type = SJA1105_KEY_VLAN_AWARE_VL;
+		key->vl.dmac = dmac;
+		key->vl.vid = vid;
+		key->vl.pcp = pcp;
+		return 0;
+	}
+	if (dmac != U64_MAX) {
+		key->type = SJA1105_KEY_VLAN_UNAWARE_VL;
+		key->vl.dmac = dmac;
+		return 0;
 	}
 
 	NL_SET_ERR_MSG_MOD(extack, "Not matching on any known key");
@@ -259,22 +306,34 @@ int sja1105_cls_flower_add(struct dsa_switch *ds, int port,
 	struct netlink_ext_ack *extack = cls->common.extack;
 	struct sja1105_private *priv = ds->priv;
 	const struct flow_action_entry *act;
-	int rc = -EOPNOTSUPP, i;
+	unsigned long cookie = cls->cookie;
+	struct sja1105_key key;
+	int rc, i;
+
+	rc = sja1105_flower_parse_key(priv, extack, cls, &key);
+	if (rc)
+		return rc;
+
+	rc = -EOPNOTSUPP;
 
 	flow_action_for_each(i, act, &rule->action) {
 		switch (act->id) {
 		case FLOW_ACTION_POLICE:
-			rc = sja1105_flower_parse_policer(priv, port, extack, cls,
-							  act->police.rate_bytes_ps,
-							  act->police.burst);
+			rc = sja1105_flower_policer(priv, port,
+						    extack, cookie, &key,
+						    act->police.rate_bytes_ps,
+						    act->police.burst);
+			if (rc)
+				goto out;
 			break;
 		default:
 			NL_SET_ERR_MSG_MOD(extack,
 					   "Action not supported");
-			break;
+			rc = -EOPNOTSUPP;
+			goto out;
 		}
 	}
-
+out:
 	return rc;
 }
 
@@ -297,7 +356,7 @@ int sja1105_cls_flower_del(struct dsa_switch *ds, int port,
 		old_sharindx = policing[bcast].sharindx;
 		policing[bcast].sharindx = port;
 	} else if (rule->type == SJA1105_RULE_TC_POLICER) {
-		int index = (port * SJA1105_NUM_TC) + rule->tc_pol.tc;
+		int index = (port * SJA1105_NUM_TC) + rule->key.tc.pcp;
 
 		old_sharindx = policing[index].sharindx;
 		policing[index].sharindx = port;
-- 
cgit v1.2.3-59-g8ed1b


From dfacc5a23e227cabdff41b6202f510398e90d36b Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 5 May 2020 22:20:55 +0300
Subject: net: dsa: sja1105: support flow-based redirection via virtual links

Implement tc-flower offloads for redirect, trap and drop using
non-critical virtual links.

Commands which were tested to work are:

  # Send frames received on swp2 with a DA of 42:be:24:9b:76:20 to the
  # CPU and to swp3. This type of key (DA only) when the port's VLAN
  # awareness state is off.
  tc qdisc add dev swp2 clsact
  tc filter add dev swp2 ingress flower skip_sw dst_mac 42:be:24:9b:76:20 \
          action mirred egress redirect dev swp3 \
          action trap

  # Drop frames received on swp2 with a DA of 42:be:24:9b:76:20, a VID
  # of 100 and a PCP of 0.
  tc filter add dev swp2 ingress protocol 802.1Q flower skip_sw \
          dst_mac 42:be:24:9b:76:20 vlan_id 100 vlan_prio 0 action drop

Under the hood, all rules match on DMAC, VID and PCP, but when VLAN
filtering is disabled, those are set internally by the driver to the
port-based defaults. Because we would be put in an awkward situation if
the user were to change the VLAN filtering state while there are active
rules (packets would no longer match on the specified keys), we simply
deny changing vlan_filtering unless the list of flows offloaded via
virtual links is empty. Then the user can re-add new rules.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/Kconfig          |   9 +
 drivers/net/dsa/sja1105/Makefile         |   4 +
 drivers/net/dsa/sja1105/sja1105.h        |  18 ++
 drivers/net/dsa/sja1105/sja1105_flower.c |  57 +++++-
 drivers/net/dsa/sja1105/sja1105_main.c   |  12 +-
 drivers/net/dsa/sja1105/sja1105_vl.c     | 302 +++++++++++++++++++++++++++++++
 drivers/net/dsa/sja1105/sja1105_vl.h     |  41 +++++
 7 files changed, 437 insertions(+), 6 deletions(-)
 create mode 100644 drivers/net/dsa/sja1105/sja1105_vl.c
 create mode 100644 drivers/net/dsa/sja1105/sja1105_vl.h

diff --git a/drivers/net/dsa/sja1105/Kconfig b/drivers/net/dsa/sja1105/Kconfig
index 68c3086af9af..5e83b365f17a 100644
--- a/drivers/net/dsa/sja1105/Kconfig
+++ b/drivers/net/dsa/sja1105/Kconfig
@@ -34,3 +34,12 @@ config NET_DSA_SJA1105_TAS
 	  This enables support for the TTEthernet-based egress scheduling
 	  engine in the SJA1105 DSA driver, which is controlled using a
 	  hardware offload of the tc-tqprio qdisc.
+
+config NET_DSA_SJA1105_VL
+	bool "Support for Virtual Links on NXP SJA1105"
+	depends on NET_DSA_SJA1105_TAS
+	help
+	  This enables support for flow classification using capable devices
+	  (SJA1105T, SJA1105Q, SJA1105S). The following actions are supported:
+	  - redirect, trap, drop
+	  - time-based ingress policing, via the tc-gate action
diff --git a/drivers/net/dsa/sja1105/Makefile b/drivers/net/dsa/sja1105/Makefile
index 8943d8d66f2b..c88e56a29db8 100644
--- a/drivers/net/dsa/sja1105/Makefile
+++ b/drivers/net/dsa/sja1105/Makefile
@@ -17,3 +17,7 @@ endif
 ifdef CONFIG_NET_DSA_SJA1105_TAS
 sja1105-objs += sja1105_tas.o
 endif
+
+ifdef CONFIG_NET_DSA_SJA1105_VL
+sja1105-objs += sja1105_vl.o
+endif
diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
index 95633ad9bfb7..1756000f6936 100644
--- a/drivers/net/dsa/sja1105/sja1105.h
+++ b/drivers/net/dsa/sja1105/sja1105.h
@@ -126,6 +126,13 @@ struct sja1105_key {
 enum sja1105_rule_type {
 	SJA1105_RULE_BCAST_POLICER,
 	SJA1105_RULE_TC_POLICER,
+	SJA1105_RULE_VL,
+};
+
+enum sja1105_vl_type {
+	SJA1105_VL_NONCRITICAL,
+	SJA1105_VL_RATE_CONSTRAINED,
+	SJA1105_VL_TIME_TRIGGERED,
 };
 
 struct sja1105_rule {
@@ -135,6 +142,7 @@ struct sja1105_rule {
 	struct sja1105_key key;
 	enum sja1105_rule_type type;
 
+	/* Action */
 	union {
 		/* SJA1105_RULE_BCAST_POLICER */
 		struct {
@@ -145,12 +153,19 @@ struct sja1105_rule {
 		struct {
 			int sharindx;
 		} tc_pol;
+
+		/* SJA1105_RULE_VL */
+		struct {
+			unsigned long destports;
+			enum sja1105_vl_type type;
+		} vl;
 	};
 };
 
 struct sja1105_flow_block {
 	struct list_head rules;
 	bool l2_policer_used[SJA1105_NUM_L2_POLICERS];
+	int num_virtual_links;
 };
 
 struct sja1105_private {
@@ -187,6 +202,7 @@ enum sja1105_reset_reason {
 	SJA1105_AGEING_TIME,
 	SJA1105_SCHEDULING,
 	SJA1105_BEST_EFFORT_POLICING,
+	SJA1105_VIRTUAL_LINKS,
 };
 
 int sja1105_static_config_reload(struct sja1105_private *priv,
@@ -290,5 +306,7 @@ int sja1105_cls_flower_add(struct dsa_switch *ds, int port,
 			   struct flow_cls_offload *cls, bool ingress);
 void sja1105_flower_setup(struct dsa_switch *ds);
 void sja1105_flower_teardown(struct dsa_switch *ds);
+struct sja1105_rule *sja1105_rule_find(struct sja1105_private *priv,
+				       unsigned long cookie);
 
 #endif
diff --git a/drivers/net/dsa/sja1105/sja1105_flower.c b/drivers/net/dsa/sja1105/sja1105_flower.c
index 3246d5a49436..5f08eed0b1fc 100644
--- a/drivers/net/dsa/sja1105/sja1105_flower.c
+++ b/drivers/net/dsa/sja1105/sja1105_flower.c
@@ -2,9 +2,10 @@
 /* Copyright 2020, NXP Semiconductors
  */
 #include "sja1105.h"
+#include "sja1105_vl.h"
 
-static struct sja1105_rule *sja1105_rule_find(struct sja1105_private *priv,
-					      unsigned long cookie)
+struct sja1105_rule *sja1105_rule_find(struct sja1105_private *priv,
+				       unsigned long cookie)
 {
 	struct sja1105_rule *rule;
 
@@ -173,7 +174,8 @@ out:
 
 static int sja1105_flower_policer(struct sja1105_private *priv, int port,
 				  struct netlink_ext_ack *extack,
-				  unsigned long cookie, struct sja1105_key *key,
+				  unsigned long cookie,
+				  struct sja1105_key *key,
 				  u64 rate_bytes_per_sec,
 				  s64 burst)
 {
@@ -308,6 +310,7 @@ int sja1105_cls_flower_add(struct dsa_switch *ds, int port,
 	const struct flow_action_entry *act;
 	unsigned long cookie = cls->cookie;
 	struct sja1105_key key;
+	bool vl_rule = false;
 	int rc, i;
 
 	rc = sja1105_flower_parse_key(priv, extack, cls, &key);
@@ -319,13 +322,50 @@ int sja1105_cls_flower_add(struct dsa_switch *ds, int port,
 	flow_action_for_each(i, act, &rule->action) {
 		switch (act->id) {
 		case FLOW_ACTION_POLICE:
-			rc = sja1105_flower_policer(priv, port,
-						    extack, cookie, &key,
+			rc = sja1105_flower_policer(priv, port, extack, cookie,
+						    &key,
 						    act->police.rate_bytes_ps,
 						    act->police.burst);
 			if (rc)
 				goto out;
 			break;
+		case FLOW_ACTION_TRAP: {
+			int cpu = dsa_upstream_port(ds, port);
+
+			vl_rule = true;
+
+			rc = sja1105_vl_redirect(priv, port, extack, cookie,
+						 &key, BIT(cpu), true);
+			if (rc)
+				goto out;
+			break;
+		}
+		case FLOW_ACTION_REDIRECT: {
+			struct dsa_port *to_dp;
+
+			to_dp = dsa_port_from_netdev(act->dev);
+			if (IS_ERR(to_dp)) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "Destination not a switch port");
+				return -EOPNOTSUPP;
+			}
+
+			vl_rule = true;
+
+			rc = sja1105_vl_redirect(priv, port, extack, cookie,
+						 &key, BIT(to_dp->index), true);
+			if (rc)
+				goto out;
+			break;
+		}
+		case FLOW_ACTION_DROP:
+			vl_rule = true;
+
+			rc = sja1105_vl_redirect(priv, port, extack, cookie,
+						 &key, 0, false);
+			if (rc)
+				goto out;
+			break;
 		default:
 			NL_SET_ERR_MSG_MOD(extack,
 					   "Action not supported");
@@ -333,6 +373,10 @@ int sja1105_cls_flower_add(struct dsa_switch *ds, int port,
 			goto out;
 		}
 	}
+
+	if (vl_rule && !rc)
+		rc = sja1105_static_config_reload(priv, SJA1105_VIRTUAL_LINKS);
+
 out:
 	return rc;
 }
@@ -348,6 +392,9 @@ int sja1105_cls_flower_del(struct dsa_switch *ds, int port,
 	if (!rule)
 		return 0;
 
+	if (rule->type == SJA1105_RULE_VL)
+		return sja1105_vl_delete(priv, port, rule, cls->common.extack);
+
 	policing = priv->static_config.tables[BLK_IDX_L2_POLICING].entries;
 
 	if (rule->type == SJA1105_RULE_BCAST_POLICER) {
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 472f4eb20c49..8bb104ee73d5 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -445,7 +445,7 @@ static int sja1105_init_general_params(struct sja1105_private *priv)
 		 */
 		.casc_port = SJA1105_NUM_PORTS,
 		/* No TTEthernet */
-		.vllupformat = 0,
+		.vllupformat = SJA1105_VL_FORMAT_PSFP,
 		.vlmarker = 0,
 		.vlmask = 0,
 		/* Only update correctionField for 1-step PTP (L2 transport) */
@@ -1589,6 +1589,7 @@ static const char * const sja1105_reset_reasons[] = {
 	[SJA1105_AGEING_TIME] = "Ageing time",
 	[SJA1105_SCHEDULING] = "Time-aware scheduling",
 	[SJA1105_BEST_EFFORT_POLICING] = "Best-effort policing",
+	[SJA1105_VIRTUAL_LINKS] = "Virtual links",
 };
 
 /* For situations where we need to change a setting at runtime that is only
@@ -1831,9 +1832,18 @@ static int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled)
 	struct sja1105_general_params_entry *general_params;
 	struct sja1105_private *priv = ds->priv;
 	struct sja1105_table *table;
+	struct sja1105_rule *rule;
 	u16 tpid, tpid2;
 	int rc;
 
+	list_for_each_entry(rule, &priv->flow_block.rules, list) {
+		if (rule->type == SJA1105_RULE_VL) {
+			dev_err(ds->dev,
+				"Cannot change VLAN filtering state while VL rules are active\n");
+			return -EBUSY;
+		}
+	}
+
 	if (enabled) {
 		/* Enable VLAN filtering. */
 		tpid  = ETH_P_8021Q;
diff --git a/drivers/net/dsa/sja1105/sja1105_vl.c b/drivers/net/dsa/sja1105/sja1105_vl.c
new file mode 100644
index 000000000000..c226779b8275
--- /dev/null
+++ b/drivers/net/dsa/sja1105/sja1105_vl.c
@@ -0,0 +1,302 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright 2020, NXP Semiconductors
+ */
+#include <linux/dsa/8021q.h>
+#include "sja1105.h"
+
+/* The switch flow classification core implements TTEthernet, which 'thinks' in
+ * terms of Virtual Links (VL), a concept borrowed from ARINC 664 part 7.
+ * However it also has one other operating mode (VLLUPFORMAT=0) where it acts
+ * somewhat closer to a pre-standard implementation of IEEE 802.1Qci
+ * (Per-Stream Filtering and Policing), which is what the driver is going to be
+ * implementing.
+ *
+ *                                 VL Lookup
+ *        Key = {DMAC && VLANID   +---------+  Key = { (DMAC[47:16] & VLMASK ==
+ *               && VLAN PCP      |         |                         VLMARKER)
+ *               && INGRESS PORT} +---------+                      (both fixed)
+ *            (exact match,            |             && DMAC[15:0] == VLID
+ *         all specified in rule)      |                    (specified in rule)
+ *                                     v             && INGRESS PORT }
+ *                               ------------
+ *                    0 (PSFP)  /            \  1 (ARINC664)
+ *                 +-----------/  VLLUPFORMAT \----------+
+ *                 |           \    (fixed)   /          |
+ *                 |            \            /           |
+ *  0 (forwarding) v             ------------            |
+ *           ------------                                |
+ *          /            \  1 (QoS classification)       |
+ *     +---/  ISCRITICAL  \-----------+                  |
+ *     |   \  (per rule)  /           |                  |
+ *     |    \            /   VLID taken from      VLID taken from
+ *     v     ------------     index of rule       contents of rule
+ *  select                     that matched         that matched
+ * DESTPORTS                          |                  |
+ *  |                                 +---------+--------+
+ *  |                                           |
+ *  |                                           v
+ *  |                                     VL Forwarding
+ *  |                                   (indexed by VLID)
+ *  |                                      +---------+
+ *  |                       +--------------|         |
+ *  |                       |  select TYPE +---------+
+ *  |                       v
+ *  |   0 (rate      ------------    1 (time
+ *  |  constrained) /            \   triggered)
+ *  |       +------/     TYPE     \------------+
+ *  |       |      \  (per VLID)  /            |
+ *  |       v       \            /             v
+ *  |  VL Policing   ------------         VL Policing
+ *  | (indexed by VLID)                (indexed by VLID)
+ *  |  +---------+                        +---------+
+ *  |  | TYPE=0  |                        | TYPE=1  |
+ *  |  +---------+                        +---------+
+ *  |  select SHARINDX                 select SHARINDX to
+ *  |  to rate-limit                 re-enter VL Forwarding
+ *  |  groups of VL's               with new VLID for egress
+ *  |  to same quota                           |
+ *  |       |                                  |
+ *  |  select MAXLEN -> exceed => drop    select MAXLEN -> exceed => drop
+ *  |       |                                  |
+ *  |       v                                  v
+ *  |  VL Forwarding                      VL Forwarding
+ *  | (indexed by SHARINDX)             (indexed by SHARINDX)
+ *  |  +---------+                        +---------+
+ *  |  | TYPE=0  |                        | TYPE=1  |
+ *  |  +---------+                        +---------+
+ *  |  select PRIORITY,                 select PRIORITY,
+ *  | PARTITION, DESTPORTS            PARTITION, DESTPORTS
+ *  |       |                                  |
+ *  |       v                                  v
+ *  |  VL Policing                        VL Policing
+ *  | (indexed by SHARINDX)           (indexed by SHARINDX)
+ *  |  +---------+                        +---------+
+ *  |  | TYPE=0  |                        | TYPE=1  |
+ *  |  +---------+                        +---------+
+ *  |       |                                  |
+ *  |       v                                  |
+ *  |  select BAG, -> exceed => drop           |
+ *  |    JITTER                                v
+ *  |       |             ----------------------------------------------
+ *  |       |            /    Reception Window is open for this VL      \
+ *  |       |           /    (the Schedule Table executes an entry i     \
+ *  |       |          /   M <= i < N, for which these conditions hold):  \ no
+ *  |       |    +----/                                                    \-+
+ *  |       |    |yes \       WINST[M] == 1 && WINSTINDEX[M] == VLID       / |
+ *  |       |    |     \     WINEND[N] == 1 && WINSTINDEX[N] == VLID      /  |
+ *  |       |    |      \                                                /   |
+ *  |       |    |       \ (the VL window has opened and not yet closed)/    |
+ *  |       |    |        ----------------------------------------------     |
+ *  |       |    v                                                           v
+ *  |       |  dispatch to DESTPORTS when the Schedule Table               drop
+ *  |       |  executes an entry i with TXEN == 1 && VLINDEX == i
+ *  v       v
+ * dispatch immediately to DESTPORTS
+ *
+ * The per-port classification key is always composed of {DMAC, VID, PCP} and
+ * is non-maskable. This 'looks like' the NULL stream identification function
+ * from IEEE 802.1CB clause 6, except for the extra VLAN PCP. When the switch
+ * ports operate as VLAN-unaware, we do allow the user to not specify the VLAN
+ * ID and PCP, and then the port-based defaults will be used.
+ *
+ * In TTEthernet, routing is something that needs to be done manually for each
+ * Virtual Link. So the flow action must always include one of:
+ * a. 'redirect', 'trap' or 'drop': select the egress port list
+ * Additionally, the following actions may be applied on a Virtual Link,
+ * turning it into 'critical' traffic:
+ * b. 'police': turn it into a rate-constrained VL, with bandwidth limitation
+ *    given by the maximum frame length, bandwidth allocation gap (BAG) and
+ *    maximum jitter.
+ * c. 'gate': turn it into a time-triggered VL, which can be only be received
+ *    and forwarded according to a given schedule.
+ */
+
+static bool sja1105_vl_key_lower(struct sja1105_vl_lookup_entry *a,
+				 struct sja1105_vl_lookup_entry *b)
+{
+	if (a->macaddr < b->macaddr)
+		return true;
+	if (a->macaddr > b->macaddr)
+		return false;
+	if (a->vlanid < b->vlanid)
+		return true;
+	if (a->vlanid > b->vlanid)
+		return false;
+	if (a->port < b->port)
+		return true;
+	if (a->port > b->port)
+		return false;
+	if (a->vlanprior < b->vlanprior)
+		return true;
+	if (a->vlanprior > b->vlanprior)
+		return false;
+	/* Keys are equal */
+	return false;
+}
+
+static int sja1105_init_virtual_links(struct sja1105_private *priv,
+				      struct netlink_ext_ack *extack)
+{
+	struct sja1105_vl_lookup_entry *vl_lookup;
+	struct sja1105_table *table;
+	struct sja1105_rule *rule;
+	int num_virtual_links = 0;
+	int i, j, k;
+
+	/* Figure out the dimensioning of the problem */
+	list_for_each_entry(rule, &priv->flow_block.rules, list) {
+		if (rule->type != SJA1105_RULE_VL)
+			continue;
+		/* Each VL lookup entry matches on a single ingress port */
+		num_virtual_links += hweight_long(rule->port_mask);
+	}
+
+	if (num_virtual_links > SJA1105_MAX_VL_LOOKUP_COUNT) {
+		NL_SET_ERR_MSG_MOD(extack, "Not enough VL entries available");
+		return -ENOSPC;
+	}
+
+	/* Discard previous VL Lookup Table */
+	table = &priv->static_config.tables[BLK_IDX_VL_LOOKUP];
+	if (table->entry_count) {
+		kfree(table->entries);
+		table->entry_count = 0;
+	}
+
+	/* Nothing to do */
+	if (!num_virtual_links)
+		return 0;
+
+	/* Pre-allocate space in the static config tables */
+
+	/* VL Lookup Table */
+	table = &priv->static_config.tables[BLK_IDX_VL_LOOKUP];
+	table->entries = kcalloc(num_virtual_links,
+				 table->ops->unpacked_entry_size,
+				 GFP_KERNEL);
+	if (!table->entries)
+		return -ENOMEM;
+	table->entry_count = num_virtual_links;
+	vl_lookup = table->entries;
+
+	k = 0;
+
+	list_for_each_entry(rule, &priv->flow_block.rules, list) {
+		unsigned long port;
+
+		if (rule->type != SJA1105_RULE_VL)
+			continue;
+
+		for_each_set_bit(port, &rule->port_mask, SJA1105_NUM_PORTS) {
+			vl_lookup[k].format = SJA1105_VL_FORMAT_PSFP;
+			vl_lookup[k].port = port;
+			vl_lookup[k].macaddr = rule->key.vl.dmac;
+			if (rule->key.type == SJA1105_KEY_VLAN_AWARE_VL) {
+				vl_lookup[k].vlanid = rule->key.vl.vid;
+				vl_lookup[k].vlanprior = rule->key.vl.pcp;
+			} else {
+				u16 vid = dsa_8021q_rx_vid(priv->ds, port);
+
+				vl_lookup[k].vlanid = vid;
+				vl_lookup[k].vlanprior = 0;
+			}
+			/* For critical VLs, the DESTPORTS mask is taken from
+			 * the VL Forwarding Table, so no point in putting it
+			 * in the VL Lookup Table
+			 */
+			if (rule->vl.type == SJA1105_VL_NONCRITICAL)
+				vl_lookup[k].destports = rule->vl.destports;
+			else
+				vl_lookup[k].iscritical = true;
+			k++;
+		}
+	}
+
+	/* UM10944.pdf chapter 4.2.3 VL Lookup table:
+	 * "the entries in the VL Lookup table must be sorted in ascending
+	 * order (i.e. the smallest value must be loaded first) according to
+	 * the following sort order: MACADDR, VLANID, PORT, VLANPRIOR."
+	 */
+	for (i = 0; i < num_virtual_links; i++) {
+		struct sja1105_vl_lookup_entry *a = &vl_lookup[i];
+
+		for (j = i + 1; j < num_virtual_links; j++) {
+			struct sja1105_vl_lookup_entry *b = &vl_lookup[j];
+
+			if (sja1105_vl_key_lower(b, a)) {
+				struct sja1105_vl_lookup_entry tmp = *a;
+
+				*a = *b;
+				*b = tmp;
+			}
+		}
+	}
+
+	return 0;
+}
+
+int sja1105_vl_redirect(struct sja1105_private *priv, int port,
+			struct netlink_ext_ack *extack, unsigned long cookie,
+			struct sja1105_key *key, unsigned long destports,
+			bool append)
+{
+	struct sja1105_rule *rule = sja1105_rule_find(priv, cookie);
+	int rc;
+
+	if (dsa_port_is_vlan_filtering(dsa_to_port(priv->ds, port)) &&
+	    key->type != SJA1105_KEY_VLAN_AWARE_VL) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Can only redirect based on {DMAC, VID, PCP}");
+		return -EOPNOTSUPP;
+	} else if (key->type != SJA1105_KEY_VLAN_UNAWARE_VL) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Can only redirect based on DMAC");
+		return -EOPNOTSUPP;
+	}
+
+	if (!rule) {
+		rule = kzalloc(sizeof(*rule), GFP_KERNEL);
+		if (!rule)
+			return -ENOMEM;
+
+		rule->cookie = cookie;
+		rule->type = SJA1105_RULE_VL;
+		rule->key = *key;
+		list_add(&rule->list, &priv->flow_block.rules);
+	}
+
+	rule->port_mask |= BIT(port);
+	if (append)
+		rule->vl.destports |= destports;
+	else
+		rule->vl.destports = destports;
+
+	rc = sja1105_init_virtual_links(priv, extack);
+	if (rc) {
+		rule->port_mask &= ~BIT(port);
+		if (!rule->port_mask) {
+			list_del(&rule->list);
+			kfree(rule);
+		}
+	}
+
+	return rc;
+}
+
+int sja1105_vl_delete(struct sja1105_private *priv, int port,
+		      struct sja1105_rule *rule, struct netlink_ext_ack *extack)
+{
+	int rc;
+
+	rule->port_mask &= ~BIT(port);
+	if (!rule->port_mask) {
+		list_del(&rule->list);
+		kfree(rule);
+	}
+
+	rc = sja1105_init_virtual_links(priv, extack);
+	if (rc)
+		return rc;
+
+	return sja1105_static_config_reload(priv, SJA1105_VIRTUAL_LINKS);
+}
diff --git a/drivers/net/dsa/sja1105/sja1105_vl.h b/drivers/net/dsa/sja1105/sja1105_vl.h
new file mode 100644
index 000000000000..08ee5557b463
--- /dev/null
+++ b/drivers/net/dsa/sja1105/sja1105_vl.h
@@ -0,0 +1,41 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright 2020, NXP Semiconductors
+ */
+#ifndef _SJA1105_VL_H
+#define _SJA1105_VL_H
+
+#if IS_ENABLED(CONFIG_NET_DSA_SJA1105_VL)
+
+int sja1105_vl_redirect(struct sja1105_private *priv, int port,
+			struct netlink_ext_ack *extack, unsigned long cookie,
+			struct sja1105_key *key, unsigned long destports,
+			bool append);
+
+int sja1105_vl_delete(struct sja1105_private *priv, int port,
+		      struct sja1105_rule *rule,
+		      struct netlink_ext_ack *extack);
+
+#else
+
+static inline int sja1105_vl_redirect(struct sja1105_private *priv, int port,
+				      struct netlink_ext_ack *extack,
+				      unsigned long cookie,
+				      struct sja1105_key *key,
+				      unsigned long destports,
+				      bool append)
+{
+	NL_SET_ERR_MSG_MOD(extack, "Virtual Links not compiled in");
+	return -EOPNOTSUPP;
+}
+
+static inline int sja1105_vl_delete(struct sja1105_private *priv,
+				    int port, struct sja1105_rule *rule,
+				    struct netlink_ext_ack *extack)
+{
+	NL_SET_ERR_MSG_MOD(extack, "Virtual Links not compiled in");
+	return -EOPNOTSUPP;
+}
+
+#endif /* IS_ENABLED(CONFIG_NET_DSA_SJA1105_VL) */
+
+#endif /* _SJA1105_VL_H */
-- 
cgit v1.2.3-59-g8ed1b


From 834f8933d5ddd732274cb6050252bd1c7cc7349d Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 5 May 2020 22:20:56 +0300
Subject: net: dsa: sja1105: implement tc-gate using time-triggered virtual
 links

Restrict the TTEthernet hardware support on this switch to operate as
closely as possible to IEEE 802.1Qci as possible. This means that it can
perform PTP-time-based ingress admission control on streams identified
by {DMAC, VID, PCP}, which is useful when trying to ensure the
determinism of traffic scheduled via IEEE 802.1Qbv.

The oddity comes from the fact that in hardware (and in TTEthernet at
large), virtual links always need a full-blown action, including not
only the type of policing, but also the list of destination ports. So in
practice, a single tc-gate action will result in all packets getting
dropped. Additional actions (either "trap" or "redirect") need to be
specified in the same filter rule such that the conforming packets are
actually forwarded somewhere.

Apart from the VL Lookup, Policing and Forwarding tables which need to
be programmed for each flow (virtual link), the Schedule engine also
needs to be told to open/close the admission gates for each individual
virtual link. A fairly accurate (and detailed) description of how that
works is already present in sja1105_tas.c, since it is already used to
trigger the egress gates for the tc-taprio offload (IEEE 802.1Qbv). Key
point here, we remember that the schedule engine supports 8
"subschedules" (execution threads that iterate through the global
schedule in parallel, and that no 2 hardware threads must execute a
schedule entry at the same time). For tc-taprio, each egress port used
one of these 8 subschedules, leaving a total of 4 subschedules unused.
In principle we could have allocated 1 subschedule for the tc-gate
offload of each ingress port, but actually the schedules of all virtual
links installed on each ingress port would have needed to be merged
together, before they could have been programmed to hardware. So
simplify our life and just merge the entire tc-gate configuration, for
all virtual links on all ingress ports, into a single subschedule. Be
sure to check that against the usual hardware scheduling conflicts, and
program it to hardware alongside any tc-taprio subschedule that may be
present.

The following scenarios were tested:

1. Quantitative testing:

   tc qdisc add dev swp2 clsact
   tc filter add dev swp2 ingress flower skip_sw \
           dst_mac 42:be:24:9b:76:20 \
           action gate index 1 base-time 0 \
           sched-entry OPEN 1200 -1 -1 \
           sched-entry CLOSE 1200 -1 -1 \
           action trap

   ping 192.168.1.2 -f
   PING 192.168.1.2 (192.168.1.2) 56(84) bytes of data.
   .............................
   --- 192.168.1.2 ping statistics ---
   948 packets transmitted, 467 received, 50.7384% packet loss, time 9671ms

2. Qualitative testing (with a phase-aligned schedule - the clocks are
   synchronized by ptp4l, not shown here):

   Receiver (sja1105):

   tc qdisc add dev swp2 clsact
   now=$(phc_ctl /dev/ptp1 get | awk '/clock time is/ {print $5}') && \
           sec=$(echo $now | awk -F. '{print $1}') && \
           base_time="$(((sec + 2) * 1000000000))" && \
           echo "base time ${base_time}"
   tc filter add dev swp2 ingress flower skip_sw \
           dst_mac 42:be:24:9b:76:20 \
           action gate base-time ${base_time} \
           sched-entry OPEN  60000 -1 -1 \
           sched-entry CLOSE 40000 -1 -1 \
           action trap

   Sender (enetc):
   now=$(phc_ctl /dev/ptp0 get | awk '/clock time is/ {print $5}') && \
           sec=$(echo $now | awk -F. '{print $1}') && \
           base_time="$(((sec + 2) * 1000000000))" && \
           echo "base time ${base_time}"
   tc qdisc add dev eno0 parent root taprio \
           num_tc 8 \
           map 0 1 2 3 4 5 6 7 \
           queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \
           base-time ${base_time} \
           sched-entry S 01  50000 \
           sched-entry S 00  50000 \
           flags 2

   ping -A 192.168.1.1
   PING 192.168.1.1 (192.168.1.1): 56 data bytes
   ...
   ^C
   --- 192.168.1.1 ping statistics ---
   1425 packets transmitted, 1424 packets received, 0% packet loss
   round-trip min/avg/max = 0.322/0.361/0.990 ms

   And just for comparison, with the tc-taprio schedule deleted:

   ping -A 192.168.1.1
   PING 192.168.1.1 (192.168.1.1): 56 data bytes
   ...
   ^C
   --- 192.168.1.1 ping statistics ---
   33 packets transmitted, 19 packets received, 42% packet loss
   round-trip min/avg/max = 0.336/0.464/0.597 ms

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105.h               |  13 +-
 drivers/net/dsa/sja1105/sja1105_flower.c        |  57 ++-
 drivers/net/dsa/sja1105/sja1105_main.c          |   1 +
 drivers/net/dsa/sja1105/sja1105_ptp.h           |  13 +
 drivers/net/dsa/sja1105/sja1105_spi.c           |   2 +
 drivers/net/dsa/sja1105/sja1105_static_config.h |   2 +
 drivers/net/dsa/sja1105/sja1105_tas.c           | 127 +++++-
 drivers/net/dsa/sja1105/sja1105_tas.h           |  36 ++
 drivers/net/dsa/sja1105/sja1105_vl.c            | 494 ++++++++++++++++++++++++
 drivers/net/dsa/sja1105/sja1105_vl.h            |  31 ++
 10 files changed, 759 insertions(+), 17 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
index 1756000f6936..8df2a5c53b02 100644
--- a/drivers/net/dsa/sja1105/sja1105.h
+++ b/drivers/net/dsa/sja1105/sja1105.h
@@ -36,6 +36,7 @@ struct sja1105_regs {
 	u64 status;
 	u64 port_control;
 	u64 rgu;
+	u64 vl_status;
 	u64 config;
 	u64 sgmii;
 	u64 rmii_pll1;
@@ -156,8 +157,16 @@ struct sja1105_rule {
 
 		/* SJA1105_RULE_VL */
 		struct {
-			unsigned long destports;
 			enum sja1105_vl_type type;
+			unsigned long destports;
+			int sharindx;
+			int maxlen;
+			int ipv;
+			u64 base_time;
+			u64 cycle_time;
+			int num_entries;
+			struct action_gate_entry *entries;
+			struct flow_stats stats;
 		} vl;
 	};
 };
@@ -304,6 +313,8 @@ int sja1105_cls_flower_del(struct dsa_switch *ds, int port,
 			   struct flow_cls_offload *cls, bool ingress);
 int sja1105_cls_flower_add(struct dsa_switch *ds, int port,
 			   struct flow_cls_offload *cls, bool ingress);
+int sja1105_cls_flower_stats(struct dsa_switch *ds, int port,
+			     struct flow_cls_offload *cls, bool ingress);
 void sja1105_flower_setup(struct dsa_switch *ds);
 void sja1105_flower_teardown(struct dsa_switch *ds);
 struct sja1105_rule *sja1105_rule_find(struct sja1105_private *priv,
diff --git a/drivers/net/dsa/sja1105/sja1105_flower.c b/drivers/net/dsa/sja1105/sja1105_flower.c
index 5f08eed0b1fc..9ee8968610cd 100644
--- a/drivers/net/dsa/sja1105/sja1105_flower.c
+++ b/drivers/net/dsa/sja1105/sja1105_flower.c
@@ -309,7 +309,9 @@ int sja1105_cls_flower_add(struct dsa_switch *ds, int port,
 	struct sja1105_private *priv = ds->priv;
 	const struct flow_action_entry *act;
 	unsigned long cookie = cls->cookie;
+	bool routing_rule = false;
 	struct sja1105_key key;
+	bool gate_rule = false;
 	bool vl_rule = false;
 	int rc, i;
 
@@ -332,6 +334,7 @@ int sja1105_cls_flower_add(struct dsa_switch *ds, int port,
 		case FLOW_ACTION_TRAP: {
 			int cpu = dsa_upstream_port(ds, port);
 
+			routing_rule = true;
 			vl_rule = true;
 
 			rc = sja1105_vl_redirect(priv, port, extack, cookie,
@@ -350,6 +353,7 @@ int sja1105_cls_flower_add(struct dsa_switch *ds, int port,
 				return -EOPNOTSUPP;
 			}
 
+			routing_rule = true;
 			vl_rule = true;
 
 			rc = sja1105_vl_redirect(priv, port, extack, cookie,
@@ -366,6 +370,21 @@ int sja1105_cls_flower_add(struct dsa_switch *ds, int port,
 			if (rc)
 				goto out;
 			break;
+		case FLOW_ACTION_GATE:
+			gate_rule = true;
+			vl_rule = true;
+
+			rc = sja1105_vl_gate(priv, port, extack, cookie,
+					     &key, act->gate.index,
+					     act->gate.prio,
+					     act->gate.basetime,
+					     act->gate.cycletime,
+					     act->gate.cycletimeext,
+					     act->gate.num_entries,
+					     act->gate.entries);
+			if (rc)
+				goto out;
+			break;
 		default:
 			NL_SET_ERR_MSG_MOD(extack,
 					   "Action not supported");
@@ -374,8 +393,23 @@ int sja1105_cls_flower_add(struct dsa_switch *ds, int port,
 		}
 	}
 
-	if (vl_rule && !rc)
+	if (vl_rule && !rc) {
+		/* Delay scheduling configuration until DESTPORTS has been
+		 * populated by all other actions.
+		 */
+		if (gate_rule) {
+			if (!routing_rule) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "Can only offload gate action together with redirect or trap");
+				return -EOPNOTSUPP;
+			}
+			rc = sja1105_init_scheduling(priv);
+			if (rc)
+				goto out;
+		}
+
 		rc = sja1105_static_config_reload(priv, SJA1105_VIRTUAL_LINKS);
+	}
 
 out:
 	return rc;
@@ -421,6 +455,27 @@ int sja1105_cls_flower_del(struct dsa_switch *ds, int port,
 	return sja1105_static_config_reload(priv, SJA1105_BEST_EFFORT_POLICING);
 }
 
+int sja1105_cls_flower_stats(struct dsa_switch *ds, int port,
+			     struct flow_cls_offload *cls, bool ingress)
+{
+	struct sja1105_private *priv = ds->priv;
+	struct sja1105_rule *rule = sja1105_rule_find(priv, cls->cookie);
+	int rc;
+
+	if (!rule)
+		return 0;
+
+	if (rule->type != SJA1105_RULE_VL)
+		return 0;
+
+	rc = sja1105_vl_stats(priv, port, rule, &cls->stats,
+			      cls->common.extack);
+	if (rc)
+		return rc;
+
+	return 0;
+}
+
 void sja1105_flower_setup(struct dsa_switch *ds)
 {
 	struct sja1105_private *priv = ds->priv;
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 8bb104ee73d5..666e54565df0 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -2369,6 +2369,7 @@ static const struct dsa_switch_ops sja1105_switch_ops = {
 	.port_policer_del	= sja1105_port_policer_del,
 	.cls_flower_add		= sja1105_cls_flower_add,
 	.cls_flower_del		= sja1105_cls_flower_del,
+	.cls_flower_stats	= sja1105_cls_flower_stats,
 };
 
 static int sja1105_check_device_id(struct sja1105_private *priv)
diff --git a/drivers/net/dsa/sja1105/sja1105_ptp.h b/drivers/net/dsa/sja1105/sja1105_ptp.h
index 43480b24f1f0..6408d1158f2d 100644
--- a/drivers/net/dsa/sja1105/sja1105_ptp.h
+++ b/drivers/net/dsa/sja1105/sja1105_ptp.h
@@ -48,6 +48,19 @@ static inline s64 future_base_time(s64 base_time, s64 cycle_time, s64 now)
 	return base_time + n * cycle_time;
 }
 
+/* This is not a preprocessor macro because the "ns" argument may or may not be
+ * s64 at caller side. This ensures it is properly type-cast before div_s64.
+ */
+static inline s64 ns_to_sja1105_delta(s64 ns)
+{
+	return div_s64(ns, 200);
+}
+
+static inline s64 sja1105_delta_to_ns(s64 delta)
+{
+	return delta * 200;
+}
+
 struct sja1105_ptp_cmd {
 	u64 startptpcp;		/* start toggling PTP_CLK pin */
 	u64 stopptpcp;		/* stop toggling PTP_CLK pin */
diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c
index 43f14a5c2718..0be75c49e6c3 100644
--- a/drivers/net/dsa/sja1105/sja1105_spi.c
+++ b/drivers/net/dsa/sja1105/sja1105_spi.c
@@ -439,6 +439,7 @@ static struct sja1105_regs sja1105et_regs = {
 	.prod_id = 0x100BC3,
 	.status = 0x1,
 	.port_control = 0x11,
+	.vl_status = 0x10000,
 	.config = 0x020000,
 	.rgu = 0x100440,
 	/* UM10944.pdf, Table 86, ACU Register overview */
@@ -472,6 +473,7 @@ static struct sja1105_regs sja1105pqrs_regs = {
 	.prod_id = 0x100BC3,
 	.status = 0x1,
 	.port_control = 0x12,
+	.vl_status = 0x10000,
 	.config = 0x020000,
 	.rgu = 0x100440,
 	/* UM10944.pdf, Table 86, ACU Register overview */
diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.h b/drivers/net/dsa/sja1105/sja1105_static_config.h
index 1a8fcbbb57b6..b569e3de3590 100644
--- a/drivers/net/dsa/sja1105/sja1105_static_config.h
+++ b/drivers/net/dsa/sja1105/sja1105_static_config.h
@@ -302,6 +302,8 @@ struct sja1105_vl_lookup_entry {
 			u64 vlid;
 		};
 	};
+	/* Not part of hardware structure */
+	unsigned long flow_cookie;
 };
 
 struct sja1105_vl_policing_entry {
diff --git a/drivers/net/dsa/sja1105/sja1105_tas.c b/drivers/net/dsa/sja1105/sja1105_tas.c
index 77e547b4cd89..3aa1a8b5f766 100644
--- a/drivers/net/dsa/sja1105/sja1105_tas.c
+++ b/drivers/net/dsa/sja1105/sja1105_tas.c
@@ -7,7 +7,6 @@
 #define SJA1105_TAS_CLKSRC_STANDALONE	1
 #define SJA1105_TAS_CLKSRC_AS6802	2
 #define SJA1105_TAS_CLKSRC_PTP		3
-#define SJA1105_TAS_MAX_DELTA		BIT(19)
 #define SJA1105_GATE_MASK		GENMASK_ULL(SJA1105_NUM_TC - 1, 0)
 
 #define work_to_sja1105_tas(d) \
@@ -15,22 +14,10 @@
 #define tas_to_sja1105(d) \
 	container_of((d), struct sja1105_private, tas_data)
 
-/* This is not a preprocessor macro because the "ns" argument may or may not be
- * s64 at caller side. This ensures it is properly type-cast before div_s64.
- */
-static s64 ns_to_sja1105_delta(s64 ns)
-{
-	return div_s64(ns, 200);
-}
-
-static s64 sja1105_delta_to_ns(s64 delta)
-{
-	return delta * 200;
-}
-
 static int sja1105_tas_set_runtime_params(struct sja1105_private *priv)
 {
 	struct sja1105_tas_data *tas_data = &priv->tas_data;
+	struct sja1105_gating_config *gating_cfg = &tas_data->gating_cfg;
 	struct dsa_switch *ds = priv->ds;
 	s64 earliest_base_time = S64_MAX;
 	s64 latest_base_time = 0;
@@ -59,6 +46,19 @@ static int sja1105_tas_set_runtime_params(struct sja1105_private *priv)
 		}
 	}
 
+	if (!list_empty(&gating_cfg->entries)) {
+		tas_data->enabled = true;
+
+		if (max_cycle_time < gating_cfg->cycle_time)
+			max_cycle_time = gating_cfg->cycle_time;
+		if (latest_base_time < gating_cfg->base_time)
+			latest_base_time = gating_cfg->base_time;
+		if (earliest_base_time > gating_cfg->base_time) {
+			earliest_base_time = gating_cfg->base_time;
+			its_cycle_time = gating_cfg->cycle_time;
+		}
+	}
+
 	if (!tas_data->enabled)
 		return 0;
 
@@ -155,13 +155,14 @@ static int sja1105_tas_set_runtime_params(struct sja1105_private *priv)
  *  their "subschedule end index" (subscheind) equal to the last valid
  *  subschedule's end index (in this case 5).
  */
-static int sja1105_init_scheduling(struct sja1105_private *priv)
+int sja1105_init_scheduling(struct sja1105_private *priv)
 {
 	struct sja1105_schedule_entry_points_entry *schedule_entry_points;
 	struct sja1105_schedule_entry_points_params_entry
 					*schedule_entry_points_params;
 	struct sja1105_schedule_params_entry *schedule_params;
 	struct sja1105_tas_data *tas_data = &priv->tas_data;
+	struct sja1105_gating_config *gating_cfg = &tas_data->gating_cfg;
 	struct sja1105_schedule_entry *schedule;
 	struct sja1105_table *table;
 	int schedule_start_idx;
@@ -213,6 +214,11 @@ static int sja1105_init_scheduling(struct sja1105_private *priv)
 		}
 	}
 
+	if (!list_empty(&gating_cfg->entries)) {
+		num_entries += gating_cfg->num_entries;
+		num_cycles++;
+	}
+
 	/* Nothing to do */
 	if (!num_cycles)
 		return 0;
@@ -312,6 +318,42 @@ static int sja1105_init_scheduling(struct sja1105_private *priv)
 		cycle++;
 	}
 
+	if (!list_empty(&gating_cfg->entries)) {
+		struct sja1105_gate_entry *e;
+
+		/* Relative base time */
+		s64 rbt;
+
+		schedule_start_idx = k;
+		schedule_end_idx = k + gating_cfg->num_entries - 1;
+		rbt = future_base_time(gating_cfg->base_time,
+				       gating_cfg->cycle_time,
+				       tas_data->earliest_base_time);
+		rbt -= tas_data->earliest_base_time;
+		entry_point_delta = ns_to_sja1105_delta(rbt) + 1;
+
+		schedule_entry_points[cycle].subschindx = cycle;
+		schedule_entry_points[cycle].delta = entry_point_delta;
+		schedule_entry_points[cycle].address = schedule_start_idx;
+
+		for (i = cycle; i < 8; i++)
+			schedule_params->subscheind[i] = schedule_end_idx;
+
+		list_for_each_entry(e, &gating_cfg->entries, list) {
+			schedule[k].delta = ns_to_sja1105_delta(e->interval);
+			schedule[k].destports = e->rule->vl.destports;
+			schedule[k].setvalid = true;
+			schedule[k].txen = true;
+			schedule[k].vlindex = e->rule->vl.sharindx;
+			schedule[k].winstindex = e->rule->vl.sharindx;
+			if (e->gate_state) /* Gate open */
+				schedule[k].winst = true;
+			else /* Gate closed */
+				schedule[k].winend = true;
+			k++;
+		}
+	}
+
 	return 0;
 }
 
@@ -415,6 +457,54 @@ sja1105_tas_check_conflicts(struct sja1105_private *priv, int port,
 	return false;
 }
 
+/* Check the tc-taprio configuration on @port for conflicts with the tc-gate
+ * global subschedule. If @port is -1, check it against all ports.
+ * To reuse the sja1105_tas_check_conflicts logic without refactoring it,
+ * convert the gating configuration to a dummy tc-taprio offload structure.
+ */
+bool sja1105_gating_check_conflicts(struct sja1105_private *priv, int port,
+				    struct netlink_ext_ack *extack)
+{
+	struct sja1105_gating_config *gating_cfg = &priv->tas_data.gating_cfg;
+	size_t num_entries = gating_cfg->num_entries;
+	struct tc_taprio_qopt_offload *dummy;
+	struct sja1105_gate_entry *e;
+	bool conflict;
+	int i = 0;
+
+	if (list_empty(&gating_cfg->entries))
+		return false;
+
+	dummy = kzalloc(sizeof(struct tc_taprio_sched_entry) * num_entries +
+			sizeof(struct tc_taprio_qopt_offload), GFP_KERNEL);
+	if (!dummy) {
+		NL_SET_ERR_MSG_MOD(extack, "Failed to allocate memory");
+		return true;
+	}
+
+	dummy->num_entries = num_entries;
+	dummy->base_time = gating_cfg->base_time;
+	dummy->cycle_time = gating_cfg->cycle_time;
+
+	list_for_each_entry(e, &gating_cfg->entries, list)
+		dummy->entries[i++].interval = e->interval;
+
+	if (port != -1) {
+		conflict = sja1105_tas_check_conflicts(priv, port, dummy);
+	} else {
+		for (port = 0; port < SJA1105_NUM_PORTS; port++) {
+			conflict = sja1105_tas_check_conflicts(priv, port,
+							       dummy);
+			if (conflict)
+				break;
+		}
+	}
+
+	kfree(dummy);
+
+	return conflict;
+}
+
 int sja1105_setup_tc_taprio(struct dsa_switch *ds, int port,
 			    struct tc_taprio_qopt_offload *admin)
 {
@@ -473,6 +563,11 @@ int sja1105_setup_tc_taprio(struct dsa_switch *ds, int port,
 			return -ERANGE;
 	}
 
+	if (sja1105_gating_check_conflicts(priv, port, NULL)) {
+		dev_err(ds->dev, "Conflict with tc-gate schedule\n");
+		return -ERANGE;
+	}
+
 	tas_data->offload[port] = taprio_offload_get(admin);
 
 	rc = sja1105_init_scheduling(priv);
@@ -779,6 +874,8 @@ void sja1105_tas_setup(struct dsa_switch *ds)
 	INIT_WORK(&tas_data->tas_work, sja1105_tas_state_machine);
 	tas_data->state = SJA1105_TAS_STATE_DISABLED;
 	tas_data->last_op = SJA1105_PTP_NONE;
+
+	INIT_LIST_HEAD(&tas_data->gating_cfg.entries);
 }
 
 void sja1105_tas_teardown(struct dsa_switch *ds)
diff --git a/drivers/net/dsa/sja1105/sja1105_tas.h b/drivers/net/dsa/sja1105/sja1105_tas.h
index b226c3dfd5b1..0c173ff51751 100644
--- a/drivers/net/dsa/sja1105/sja1105_tas.h
+++ b/drivers/net/dsa/sja1105/sja1105_tas.h
@@ -6,6 +6,10 @@
 
 #include <net/pkt_sched.h>
 
+#define SJA1105_TAS_MAX_DELTA		BIT(18)
+
+struct sja1105_private;
+
 #if IS_ENABLED(CONFIG_NET_DSA_SJA1105_TAS)
 
 enum sja1105_tas_state {
@@ -20,8 +24,23 @@ enum sja1105_ptp_op {
 	SJA1105_PTP_ADJUSTFREQ,
 };
 
+struct sja1105_gate_entry {
+	struct list_head list;
+	struct sja1105_rule *rule;
+	s64 interval;
+	u8 gate_state;
+};
+
+struct sja1105_gating_config {
+	u64 cycle_time;
+	s64 base_time;
+	int num_entries;
+	struct list_head entries;
+};
+
 struct sja1105_tas_data {
 	struct tc_taprio_qopt_offload *offload[SJA1105_NUM_PORTS];
+	struct sja1105_gating_config gating_cfg;
 	enum sja1105_tas_state state;
 	enum sja1105_ptp_op last_op;
 	struct work_struct tas_work;
@@ -42,6 +61,11 @@ void sja1105_tas_clockstep(struct dsa_switch *ds);
 
 void sja1105_tas_adjfreq(struct dsa_switch *ds);
 
+bool sja1105_gating_check_conflicts(struct sja1105_private *priv, int port,
+				    struct netlink_ext_ack *extack);
+
+int sja1105_init_scheduling(struct sja1105_private *priv);
+
 #else
 
 /* C doesn't allow empty structures, bah! */
@@ -63,6 +87,18 @@ static inline void sja1105_tas_clockstep(struct dsa_switch *ds) { }
 
 static inline void sja1105_tas_adjfreq(struct dsa_switch *ds) { }
 
+static inline bool
+sja1105_gating_check_conflicts(struct dsa_switch *ds, int port,
+			       struct netlink_ext_ack *extack)
+{
+	return true;
+}
+
+static inline int sja1105_init_scheduling(struct sja1105_private *priv)
+{
+	return 0;
+}
+
 #endif /* IS_ENABLED(CONFIG_NET_DSA_SJA1105_TAS) */
 
 #endif /* _SJA1105_TAS_H */
diff --git a/drivers/net/dsa/sja1105/sja1105_vl.c b/drivers/net/dsa/sja1105/sja1105_vl.c
index c226779b8275..b52f1af6e7e7 100644
--- a/drivers/net/dsa/sja1105/sja1105_vl.c
+++ b/drivers/net/dsa/sja1105/sja1105_vl.c
@@ -1,9 +1,13 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright 2020, NXP Semiconductors
  */
+#include <net/tc_act/tc_gate.h>
 #include <linux/dsa/8021q.h>
 #include "sja1105.h"
 
+#define SJA1105_VL_FRAME_MEMORY			100
+#define SJA1105_SIZE_VL_STATUS			8
+
 /* The switch flow classification core implements TTEthernet, which 'thinks' in
  * terms of Virtual Links (VL), a concept borrowed from ARINC 664 part 7.
  * However it also has one other operating mode (VLLUPFORMAT=0) where it acts
@@ -137,18 +141,33 @@ static bool sja1105_vl_key_lower(struct sja1105_vl_lookup_entry *a,
 static int sja1105_init_virtual_links(struct sja1105_private *priv,
 				      struct netlink_ext_ack *extack)
 {
+	struct sja1105_l2_forwarding_params_entry *l2_fwd_params;
+	struct sja1105_vl_forwarding_params_entry *vl_fwd_params;
+	struct sja1105_vl_policing_entry *vl_policing;
+	struct sja1105_vl_forwarding_entry *vl_fwd;
 	struct sja1105_vl_lookup_entry *vl_lookup;
+	bool have_critical_virtual_links = false;
 	struct sja1105_table *table;
 	struct sja1105_rule *rule;
 	int num_virtual_links = 0;
+	int max_sharindx = 0;
 	int i, j, k;
 
+	table = &priv->static_config.tables[BLK_IDX_L2_FORWARDING_PARAMS];
+	l2_fwd_params = table->entries;
+	l2_fwd_params->part_spc[0] = SJA1105_MAX_FRAME_MEMORY;
+
 	/* Figure out the dimensioning of the problem */
 	list_for_each_entry(rule, &priv->flow_block.rules, list) {
 		if (rule->type != SJA1105_RULE_VL)
 			continue;
 		/* Each VL lookup entry matches on a single ingress port */
 		num_virtual_links += hweight_long(rule->port_mask);
+
+		if (rule->vl.type != SJA1105_VL_NONCRITICAL)
+			have_critical_virtual_links = true;
+		if (max_sharindx < rule->vl.sharindx)
+			max_sharindx = rule->vl.sharindx;
 	}
 
 	if (num_virtual_links > SJA1105_MAX_VL_LOOKUP_COUNT) {
@@ -156,6 +175,13 @@ static int sja1105_init_virtual_links(struct sja1105_private *priv,
 		return -ENOSPC;
 	}
 
+	if (max_sharindx + 1 > SJA1105_MAX_VL_LOOKUP_COUNT) {
+		NL_SET_ERR_MSG_MOD(extack, "Policer index out of range");
+		return -ENOSPC;
+	}
+
+	max_sharindx = max_t(int, num_virtual_links, max_sharindx) + 1;
+
 	/* Discard previous VL Lookup Table */
 	table = &priv->static_config.tables[BLK_IDX_VL_LOOKUP];
 	if (table->entry_count) {
@@ -163,6 +189,27 @@ static int sja1105_init_virtual_links(struct sja1105_private *priv,
 		table->entry_count = 0;
 	}
 
+	/* Discard previous VL Policing Table */
+	table = &priv->static_config.tables[BLK_IDX_VL_POLICING];
+	if (table->entry_count) {
+		kfree(table->entries);
+		table->entry_count = 0;
+	}
+
+	/* Discard previous VL Forwarding Table */
+	table = &priv->static_config.tables[BLK_IDX_VL_FORWARDING];
+	if (table->entry_count) {
+		kfree(table->entries);
+		table->entry_count = 0;
+	}
+
+	/* Discard previous VL Forwarding Parameters Table */
+	table = &priv->static_config.tables[BLK_IDX_VL_FORWARDING_PARAMS];
+	if (table->entry_count) {
+		kfree(table->entries);
+		table->entry_count = 0;
+	}
+
 	/* Nothing to do */
 	if (!num_virtual_links)
 		return 0;
@@ -208,6 +255,7 @@ static int sja1105_init_virtual_links(struct sja1105_private *priv,
 				vl_lookup[k].destports = rule->vl.destports;
 			else
 				vl_lookup[k].iscritical = true;
+			vl_lookup[k].flow_cookie = rule->cookie;
 			k++;
 		}
 	}
@@ -232,6 +280,68 @@ static int sja1105_init_virtual_links(struct sja1105_private *priv,
 		}
 	}
 
+	if (!have_critical_virtual_links)
+		return 0;
+
+	/* VL Policing Table */
+	table = &priv->static_config.tables[BLK_IDX_VL_POLICING];
+	table->entries = kcalloc(max_sharindx, table->ops->unpacked_entry_size,
+				 GFP_KERNEL);
+	if (!table->entries)
+		return -ENOMEM;
+	table->entry_count = max_sharindx;
+	vl_policing = table->entries;
+
+	/* VL Forwarding Table */
+	table = &priv->static_config.tables[BLK_IDX_VL_FORWARDING];
+	table->entries = kcalloc(max_sharindx, table->ops->unpacked_entry_size,
+				 GFP_KERNEL);
+	if (!table->entries)
+		return -ENOMEM;
+	table->entry_count = max_sharindx;
+	vl_fwd = table->entries;
+
+	/* VL Forwarding Parameters Table */
+	table = &priv->static_config.tables[BLK_IDX_VL_FORWARDING_PARAMS];
+	table->entries = kcalloc(1, table->ops->unpacked_entry_size,
+				 GFP_KERNEL);
+	if (!table->entries)
+		return -ENOMEM;
+	table->entry_count = 1;
+	vl_fwd_params = table->entries;
+
+	/* Reserve some frame buffer memory for the critical-traffic virtual
+	 * links (this needs to be done). At the moment, hardcode the value
+	 * at 100 blocks of 128 bytes of memory each. This leaves 829 blocks
+	 * remaining for best-effort traffic. TODO: figure out a more flexible
+	 * way to perform the frame buffer partitioning.
+	 */
+	l2_fwd_params->part_spc[0] = SJA1105_MAX_FRAME_MEMORY -
+				     SJA1105_VL_FRAME_MEMORY;
+	vl_fwd_params->partspc[0] = SJA1105_VL_FRAME_MEMORY;
+
+	for (i = 0; i < num_virtual_links; i++) {
+		unsigned long cookie = vl_lookup[i].flow_cookie;
+		struct sja1105_rule *rule = sja1105_rule_find(priv, cookie);
+
+		if (rule->vl.type == SJA1105_VL_NONCRITICAL)
+			continue;
+		if (rule->vl.type == SJA1105_VL_TIME_TRIGGERED) {
+			int sharindx = rule->vl.sharindx;
+
+			vl_policing[i].type = 1;
+			vl_policing[i].sharindx = sharindx;
+			vl_policing[i].maxlen = rule->vl.maxlen;
+			vl_policing[sharindx].type = 1;
+
+			vl_fwd[i].type = 1;
+			vl_fwd[sharindx].type = 1;
+			vl_fwd[sharindx].priority = rule->vl.ipv;
+			vl_fwd[sharindx].partition = 0;
+			vl_fwd[sharindx].destports = rule->vl.destports;
+		}
+	}
+
 	return 0;
 }
 
@@ -300,3 +410,387 @@ int sja1105_vl_delete(struct sja1105_private *priv, int port,
 
 	return sja1105_static_config_reload(priv, SJA1105_VIRTUAL_LINKS);
 }
+
+/* Insert into the global gate list, sorted by gate action time. */
+static int sja1105_insert_gate_entry(struct sja1105_gating_config *gating_cfg,
+				     struct sja1105_rule *rule,
+				     u8 gate_state, s64 entry_time,
+				     struct netlink_ext_ack *extack)
+{
+	struct sja1105_gate_entry *e;
+	int rc;
+
+	e = kzalloc(sizeof(*e), GFP_KERNEL);
+	if (!e)
+		return -ENOMEM;
+
+	e->rule = rule;
+	e->gate_state = gate_state;
+	e->interval = entry_time;
+
+	if (list_empty(&gating_cfg->entries)) {
+		list_add(&e->list, &gating_cfg->entries);
+	} else {
+		struct sja1105_gate_entry *p;
+
+		list_for_each_entry(p, &gating_cfg->entries, list) {
+			if (p->interval == e->interval) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "Gate conflict");
+				rc = -EBUSY;
+				goto err;
+			}
+
+			if (e->interval < p->interval)
+				break;
+		}
+		list_add(&e->list, p->list.prev);
+	}
+
+	gating_cfg->num_entries++;
+
+	return 0;
+err:
+	kfree(e);
+	return rc;
+}
+
+/* The gate entries contain absolute times in their e->interval field. Convert
+ * that to proper intervals (i.e. "0, 5, 10, 15" to "5, 5, 5, 5").
+ */
+static void
+sja1105_gating_cfg_time_to_interval(struct sja1105_gating_config *gating_cfg,
+				    u64 cycle_time)
+{
+	struct sja1105_gate_entry *last_e;
+	struct sja1105_gate_entry *e;
+	struct list_head *prev;
+	u32 prev_time = 0;
+
+	list_for_each_entry(e, &gating_cfg->entries, list) {
+		struct sja1105_gate_entry *p;
+
+		prev = e->list.prev;
+
+		if (prev == &gating_cfg->entries)
+			continue;
+
+		p = list_entry(prev, struct sja1105_gate_entry, list);
+		prev_time = e->interval;
+		p->interval = e->interval - p->interval;
+	}
+	last_e = list_last_entry(&gating_cfg->entries,
+				 struct sja1105_gate_entry, list);
+	if (last_e->list.prev != &gating_cfg->entries)
+		last_e->interval = cycle_time - last_e->interval;
+}
+
+static void sja1105_free_gating_config(struct sja1105_gating_config *gating_cfg)
+{
+	struct sja1105_gate_entry *e, *n;
+
+	list_for_each_entry_safe(e, n, &gating_cfg->entries, list) {
+		list_del(&e->list);
+		kfree(e);
+	}
+}
+
+static int sja1105_compose_gating_subschedule(struct sja1105_private *priv,
+					      struct netlink_ext_ack *extack)
+{
+	struct sja1105_gating_config *gating_cfg = &priv->tas_data.gating_cfg;
+	struct sja1105_rule *rule;
+	s64 max_cycle_time = 0;
+	s64 its_base_time = 0;
+	int i, rc = 0;
+
+	list_for_each_entry(rule, &priv->flow_block.rules, list) {
+		if (rule->type != SJA1105_RULE_VL)
+			continue;
+		if (rule->vl.type != SJA1105_VL_TIME_TRIGGERED)
+			continue;
+
+		if (max_cycle_time < rule->vl.cycle_time) {
+			max_cycle_time = rule->vl.cycle_time;
+			its_base_time = rule->vl.base_time;
+		}
+	}
+
+	if (!max_cycle_time)
+		return 0;
+
+	dev_dbg(priv->ds->dev, "max_cycle_time %lld its_base_time %lld\n",
+		max_cycle_time, its_base_time);
+
+	sja1105_free_gating_config(gating_cfg);
+
+	gating_cfg->base_time = its_base_time;
+	gating_cfg->cycle_time = max_cycle_time;
+	gating_cfg->num_entries = 0;
+
+	list_for_each_entry(rule, &priv->flow_block.rules, list) {
+		s64 time;
+		s64 rbt;
+
+		if (rule->type != SJA1105_RULE_VL)
+			continue;
+		if (rule->vl.type != SJA1105_VL_TIME_TRIGGERED)
+			continue;
+
+		/* Calculate the difference between this gating schedule's
+		 * base time, and the base time of the gating schedule with the
+		 * longest cycle time. We call it the relative base time (rbt).
+		 */
+		rbt = future_base_time(rule->vl.base_time, rule->vl.cycle_time,
+				       its_base_time);
+		rbt -= its_base_time;
+
+		time = rbt;
+
+		for (i = 0; i < rule->vl.num_entries; i++) {
+			u8 gate_state = rule->vl.entries[i].gate_state;
+			s64 entry_time = time;
+
+			while (entry_time < max_cycle_time) {
+				rc = sja1105_insert_gate_entry(gating_cfg, rule,
+							       gate_state,
+							       entry_time,
+							       extack);
+				if (rc)
+					goto err;
+
+				entry_time += rule->vl.cycle_time;
+			}
+			time += rule->vl.entries[i].interval;
+		}
+	}
+
+	sja1105_gating_cfg_time_to_interval(gating_cfg, max_cycle_time);
+
+	return 0;
+err:
+	sja1105_free_gating_config(gating_cfg);
+	return rc;
+}
+
+int sja1105_vl_gate(struct sja1105_private *priv, int port,
+		    struct netlink_ext_ack *extack, unsigned long cookie,
+		    struct sja1105_key *key, u32 index, s32 prio,
+		    u64 base_time, u64 cycle_time, u64 cycle_time_ext,
+		    u32 num_entries, struct action_gate_entry *entries)
+{
+	struct sja1105_rule *rule = sja1105_rule_find(priv, cookie);
+	int ipv = -1;
+	int i, rc;
+	s32 rem;
+
+	if (cycle_time_ext) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Cycle time extension not supported");
+		return -EOPNOTSUPP;
+	}
+
+	div_s64_rem(base_time, sja1105_delta_to_ns(1), &rem);
+	if (rem) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Base time must be multiple of 200 ns");
+		return -ERANGE;
+	}
+
+	div_s64_rem(cycle_time, sja1105_delta_to_ns(1), &rem);
+	if (rem) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Cycle time must be multiple of 200 ns");
+		return -ERANGE;
+	}
+
+	if (dsa_port_is_vlan_filtering(dsa_to_port(priv->ds, port)) &&
+	    key->type != SJA1105_KEY_VLAN_AWARE_VL) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Can only gate based on {DMAC, VID, PCP}");
+		return -EOPNOTSUPP;
+	} else if (key->type != SJA1105_KEY_VLAN_UNAWARE_VL) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Can only gate based on DMAC");
+		return -EOPNOTSUPP;
+	}
+
+	if (!rule) {
+		rule = kzalloc(sizeof(*rule), GFP_KERNEL);
+		if (!rule)
+			return -ENOMEM;
+
+		list_add(&rule->list, &priv->flow_block.rules);
+		rule->cookie = cookie;
+		rule->type = SJA1105_RULE_VL;
+		rule->key = *key;
+		rule->vl.type = SJA1105_VL_TIME_TRIGGERED;
+		rule->vl.sharindx = index;
+		rule->vl.base_time = base_time;
+		rule->vl.cycle_time = cycle_time;
+		rule->vl.num_entries = num_entries;
+		rule->vl.entries = kcalloc(num_entries,
+					   sizeof(struct action_gate_entry),
+					   GFP_KERNEL);
+		if (!rule->vl.entries) {
+			rc = -ENOMEM;
+			goto out;
+		}
+
+		for (i = 0; i < num_entries; i++) {
+			div_s64_rem(entries[i].interval,
+				    sja1105_delta_to_ns(1), &rem);
+			if (rem) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "Interval must be multiple of 200 ns");
+				rc = -ERANGE;
+				goto out;
+			}
+
+			if (!entries[i].interval) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "Interval cannot be zero");
+				rc = -ERANGE;
+				goto out;
+			}
+
+			if (ns_to_sja1105_delta(entries[i].interval) >
+			    SJA1105_TAS_MAX_DELTA) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "Maximum interval is 52 ms");
+				rc = -ERANGE;
+				goto out;
+			}
+
+			if (entries[i].maxoctets != -1) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "Cannot offload IntervalOctetMax");
+				rc = -EOPNOTSUPP;
+				goto out;
+			}
+
+			if (ipv == -1) {
+				ipv = entries[i].ipv;
+			} else if (ipv != entries[i].ipv) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "Only support a single IPV per VL");
+				rc = -EOPNOTSUPP;
+				goto out;
+			}
+
+			rule->vl.entries[i] = entries[i];
+		}
+
+		if (ipv == -1) {
+			if (key->type == SJA1105_KEY_VLAN_AWARE_VL)
+				ipv = key->vl.pcp;
+			else
+				ipv = 0;
+		}
+
+		/* TODO: support per-flow MTU */
+		rule->vl.maxlen = VLAN_ETH_FRAME_LEN + ETH_FCS_LEN;
+		rule->vl.ipv = ipv;
+	}
+
+	rule->port_mask |= BIT(port);
+
+	rc = sja1105_compose_gating_subschedule(priv, extack);
+	if (rc)
+		goto out;
+
+	rc = sja1105_init_virtual_links(priv, extack);
+	if (rc)
+		goto out;
+
+	if (sja1105_gating_check_conflicts(priv, -1, extack)) {
+		NL_SET_ERR_MSG_MOD(extack, "Conflict with tc-taprio schedule");
+		rc = -ERANGE;
+		goto out;
+	}
+
+out:
+	if (rc) {
+		rule->port_mask &= ~BIT(port);
+		if (!rule->port_mask) {
+			list_del(&rule->list);
+			kfree(rule->vl.entries);
+			kfree(rule);
+		}
+	}
+
+	return rc;
+}
+
+static int sja1105_find_vlid(struct sja1105_private *priv, int port,
+			     struct sja1105_key *key)
+{
+	struct sja1105_vl_lookup_entry *vl_lookup;
+	struct sja1105_table *table;
+	int i;
+
+	if (WARN_ON(key->type != SJA1105_KEY_VLAN_AWARE_VL &&
+		    key->type != SJA1105_KEY_VLAN_UNAWARE_VL))
+		return -1;
+
+	table = &priv->static_config.tables[BLK_IDX_VL_LOOKUP];
+	vl_lookup = table->entries;
+
+	for (i = 0; i < table->entry_count; i++) {
+		if (key->type == SJA1105_KEY_VLAN_AWARE_VL) {
+			if (vl_lookup[i].port == port &&
+			    vl_lookup[i].macaddr == key->vl.dmac &&
+			    vl_lookup[i].vlanid == key->vl.vid &&
+			    vl_lookup[i].vlanprior == key->vl.pcp)
+				return i;
+		} else {
+			if (vl_lookup[i].port == port &&
+			    vl_lookup[i].macaddr == key->vl.dmac)
+				return i;
+		}
+	}
+
+	return -1;
+}
+
+int sja1105_vl_stats(struct sja1105_private *priv, int port,
+		     struct sja1105_rule *rule, struct flow_stats *stats,
+		     struct netlink_ext_ack *extack)
+{
+	const struct sja1105_regs *regs = priv->info->regs;
+	u8 buf[SJA1105_SIZE_VL_STATUS] = {0};
+	u64 unreleased;
+	u64 timingerr;
+	u64 lengtherr;
+	int vlid, rc;
+	u64 pkts;
+
+	if (rule->vl.type != SJA1105_VL_TIME_TRIGGERED)
+		return 0;
+
+	vlid = sja1105_find_vlid(priv, port, &rule->key);
+	if (vlid < 0)
+		return 0;
+
+	rc = sja1105_xfer_buf(priv, SPI_READ, regs->vl_status + 2 * vlid, buf,
+			      SJA1105_SIZE_VL_STATUS);
+	if (rc) {
+		NL_SET_ERR_MSG_MOD(extack, "SPI access failed");
+		return rc;
+	}
+
+	sja1105_unpack(buf, &timingerr,  31, 16, SJA1105_SIZE_VL_STATUS);
+	sja1105_unpack(buf, &unreleased, 15,  0, SJA1105_SIZE_VL_STATUS);
+	sja1105_unpack(buf, &lengtherr,  47, 32, SJA1105_SIZE_VL_STATUS);
+
+	pkts = timingerr + unreleased + lengtherr;
+
+	flow_stats_update(stats, 0, pkts - rule->vl.stats.pkts,
+			  jiffies - rule->vl.stats.lastused,
+			  FLOW_ACTION_HW_STATS_IMMEDIATE);
+
+	rule->vl.stats.pkts = pkts;
+	rule->vl.stats.lastused = jiffies;
+
+	return 0;
+}
diff --git a/drivers/net/dsa/sja1105/sja1105_vl.h b/drivers/net/dsa/sja1105/sja1105_vl.h
index 08ee5557b463..323fa0535af7 100644
--- a/drivers/net/dsa/sja1105/sja1105_vl.h
+++ b/drivers/net/dsa/sja1105/sja1105_vl.h
@@ -15,6 +15,16 @@ int sja1105_vl_delete(struct sja1105_private *priv, int port,
 		      struct sja1105_rule *rule,
 		      struct netlink_ext_ack *extack);
 
+int sja1105_vl_gate(struct sja1105_private *priv, int port,
+		    struct netlink_ext_ack *extack, unsigned long cookie,
+		    struct sja1105_key *key, u32 index, s32 prio,
+		    u64 base_time, u64 cycle_time, u64 cycle_time_ext,
+		    u32 num_entries, struct action_gate_entry *entries);
+
+int sja1105_vl_stats(struct sja1105_private *priv, int port,
+		     struct sja1105_rule *rule, struct flow_stats *stats,
+		     struct netlink_ext_ack *extack);
+
 #else
 
 static inline int sja1105_vl_redirect(struct sja1105_private *priv, int port,
@@ -36,6 +46,27 @@ static inline int sja1105_vl_delete(struct sja1105_private *priv,
 	return -EOPNOTSUPP;
 }
 
+static inline int sja1105_vl_gate(struct sja1105_private *priv, int port,
+				  struct netlink_ext_ack *extack,
+				  unsigned long cookie,
+				  struct sja1105_key *key, u32 index, s32 prio,
+				  u64 base_time, u64 cycle_time,
+				  u64 cycle_time_ext, u32 num_entries,
+				  struct action_gate_entry *entries)
+{
+	NL_SET_ERR_MSG_MOD(extack, "Virtual Links not compiled in");
+	return -EOPNOTSUPP;
+}
+
+static inline int sja1105_vl_stats(struct sja1105_private *priv, int port,
+				   struct sja1105_rule *rule,
+				   struct flow_stats *stats,
+				   struct netlink_ext_ack *extack)
+{
+	NL_SET_ERR_MSG_MOD(extack, "Virtual Links not compiled in");
+	return -EOPNOTSUPP;
+}
+
 #endif /* IS_ENABLED(CONFIG_NET_DSA_SJA1105_VL) */
 
 #endif /* _SJA1105_VL_H */
-- 
cgit v1.2.3-59-g8ed1b


From 47cfa3af4e23f5ea29ed9202557c428b43742c57 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 5 May 2020 22:20:57 +0300
Subject: docs: net: dsa: sja1105: document intended usage of virtual links

Add some verbiage describing how the hardware features of the switch are
exposed to users through tc-flower.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/dsa/sja1105.rst | 116 +++++++++++++++++++++++++++++++
 1 file changed, 116 insertions(+)

diff --git a/Documentation/networking/dsa/sja1105.rst b/Documentation/networking/dsa/sja1105.rst
index 64553d8d91cb..34581629dd3f 100644
--- a/Documentation/networking/dsa/sja1105.rst
+++ b/Documentation/networking/dsa/sja1105.rst
@@ -230,6 +230,122 @@ simultaneously on two ports. The driver checks the consistency of the schedules
 against this restriction and errors out when appropriate. Schedule analysis is
 needed to avoid this, which is outside the scope of the document.
 
+Routing actions (redirect, trap, drop)
+--------------------------------------
+
+The switch is able to offload flow-based redirection of packets to a set of
+destination ports specified by the user. Internally, this is implemented by
+making use of Virtual Links, a TTEthernet concept.
+
+The driver supports 2 types of keys for Virtual Links:
+
+- VLAN-aware virtual links: these match on destination MAC address, VLAN ID and
+  VLAN PCP.
+- VLAN-unaware virtual links: these match on destination MAC address only.
+
+The VLAN awareness state of the bridge (vlan_filtering) cannot be changed while
+there are virtual link rules installed.
+
+Composing multiple actions inside the same rule is supported. When only routing
+actions are requested, the driver creates a "non-critical" virtual link. When
+the action list also contains tc-gate (more details below), the virtual link
+becomes "time-critical" (draws frame buffers from a reserved memory partition,
+etc).
+
+The 3 routing actions that are supported are "trap", "drop" and "redirect".
+
+Example 1: send frames received on swp2 with a DA of 42:be:24:9b:76:20 to the
+CPU and to swp3. This type of key (DA only) when the port's VLAN awareness
+state is off::
+
+  tc qdisc add dev swp2 clsact
+  tc filter add dev swp2 ingress flower skip_sw dst_mac 42:be:24:9b:76:20 \
+          action mirred egress redirect dev swp3 \
+          action trap
+
+Example 2: drop frames received on swp2 with a DA of 42:be:24:9b:76:20, a VID
+of 100 and a PCP of 0::
+
+  tc filter add dev swp2 ingress protocol 802.1Q flower skip_sw \
+          dst_mac 42:be:24:9b:76:20 vlan_id 100 vlan_prio 0 action drop
+
+Time-based ingress policing
+---------------------------
+
+The TTEthernet hardware abilities of the switch can be constrained to act
+similarly to the Per-Stream Filtering and Policing (PSFP) clause specified in
+IEEE 802.1Q-2018 (formerly 802.1Qci). This means it can be used to perform
+tight timing-based admission control for up to 1024 flows (identified by a
+tuple composed of destination MAC address, VLAN ID and VLAN PCP). Packets which
+are received outside their expected reception window are dropped.
+
+This capability can be managed through the offload of the tc-gate action. As
+routing actions are intrinsic to virtual links in TTEthernet (which performs
+explicit routing of time-critical traffic and does not leave that in the hands
+of the FDB, flooding etc), the tc-gate action may never appear alone when
+asking sja1105 to offload it. One (or more) redirect or trap actions must also
+follow along.
+
+Example: create a tc-taprio schedule that is phase-aligned with a tc-gate
+schedule (the clocks must be synchronized by a 1588 application stack, which is
+outside the scope of this document). No packet delivered by the sender will be
+dropped. Note that the reception window is larger than the transmission window
+(and much more so, in this example) to compensate for the packet propagation
+delay of the link (which can be determined by the 1588 application stack).
+
+Receiver (sja1105)::
+
+  tc qdisc add dev swp2 clsact
+  now=$(phc_ctl /dev/ptp1 get | awk '/clock time is/ {print $5}') && \
+          sec=$(echo $now | awk -F. '{print $1}') && \
+          base_time="$(((sec + 2) * 1000000000))" && \
+          echo "base time ${base_time}"
+  tc filter add dev swp2 ingress flower skip_sw \
+          dst_mac 42:be:24:9b:76:20 \
+          action gate base-time ${base_time} \
+          sched-entry OPEN  60000 -1 -1 \
+          sched-entry CLOSE 40000 -1 -1 \
+          action trap
+
+Sender::
+
+  now=$(phc_ctl /dev/ptp0 get | awk '/clock time is/ {print $5}') && \
+          sec=$(echo $now | awk -F. '{print $1}') && \
+          base_time="$(((sec + 2) * 1000000000))" && \
+          echo "base time ${base_time}"
+  tc qdisc add dev eno0 parent root taprio \
+          num_tc 8 \
+          map 0 1 2 3 4 5 6 7 \
+          queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7 \
+          base-time ${base_time} \
+          sched-entry S 01  50000 \
+          sched-entry S 00  50000 \
+          flags 2
+
+The engine used to schedule the ingress gate operations is the same that the
+one used for the tc-taprio offload. Therefore, the restrictions regarding the
+fact that no two gate actions (either tc-gate or tc-taprio gates) may fire at
+the same time (during the same 200 ns slot) still apply.
+
+To come in handy, it is possible to share time-triggered virtual links across
+more than 1 ingress port, via flow blocks. In this case, the restriction of
+firing at the same time does not apply because there is a single schedule in
+the system, that of the shared virtual link::
+
+  tc qdisc add dev swp2 ingress_block 1 clsact
+  tc qdisc add dev swp3 ingress_block 1 clsact
+  tc filter add block 1 flower skip_sw dst_mac 42:be:24:9b:76:20 \
+          action gate index 2 \
+          base-time 0 \
+          sched-entry OPEN 50000000 -1 -1 \
+          sched-entry CLOSE 50000000 -1 -1 \
+          action trap
+
+Hardware statistics for each flow are also available ("pkts" counts the number
+of dropped frames, which is a sum of frames dropped due to timing violations,
+lack of destination ports and MTU enforcement checks). Byte-level counters are
+not available.
+
 Device Tree bindings and board design
 =====================================
 
-- 
cgit v1.2.3-59-g8ed1b


From 7596ac9d19a9df25707ecaac0675881f62dd8c18 Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Tue, 5 May 2020 23:14:29 +0300
Subject: soc: fsl: dpio: properly compute the consumer index

Mask the consumer index before using it. Without this, we would be
writing frame descriptors beyond the ring size supported by the QBMAN
block.

Fixes: 3b2abda7d28c ("soc: fsl: dpio: Replace QMAN array mode with ring mode enqueue")
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Acked-by: Li Yang <leoyang.li@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/soc/fsl/dpio/qbman-portal.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/soc/fsl/dpio/qbman-portal.c b/drivers/soc/fsl/dpio/qbman-portal.c
index 804b8ba9bf5c..23a1377971f4 100644
--- a/drivers/soc/fsl/dpio/qbman-portal.c
+++ b/drivers/soc/fsl/dpio/qbman-portal.c
@@ -669,6 +669,7 @@ int qbman_swp_enqueue_multiple_direct(struct qbman_swp *s,
 		eqcr_ci = s->eqcr.ci;
 		p = s->addr_cena + QBMAN_CENA_SWP_EQCR_CI;
 		s->eqcr.ci = qbman_read_register(s, QBMAN_CINH_SWP_EQCR_CI);
+		s->eqcr.ci &= full_mask;
 
 		s->eqcr.available = qm_cyc_diff(s->eqcr.pi_ring_size,
 					eqcr_ci, s->eqcr.ci);
-- 
cgit v1.2.3-59-g8ed1b


From f96e87178bb819a2ee25623a6935fa850c2defdd Mon Sep 17 00:00:00 2001
From: Taehee Yoo <ap420073@gmail.com>
Date: Wed, 6 May 2020 15:47:45 +0000
Subject: hsr: remove WARN_ONCE() in hsr_fill_frame_info()

When VLAN frame is being sent, hsr calls WARN_ONCE() because hsr doesn't
support VLAN. But using WARN_ONCE() is overdoing.
Using netdev_warn_once() is enough.

Signed-off-by: Taehee Yoo <ap420073@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/hsr/hsr_forward.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/hsr/hsr_forward.c b/net/hsr/hsr_forward.c
index ddd9605bad04..ed13760463de 100644
--- a/net/hsr/hsr_forward.c
+++ b/net/hsr/hsr_forward.c
@@ -321,7 +321,7 @@ static int hsr_fill_frame_info(struct hsr_frame_info *frame,
 	if (ethhdr->h_proto == htons(ETH_P_8021Q)) {
 		frame->is_vlan = true;
 		/* FIXME: */
-		WARN_ONCE(1, "HSR: VLAN not yet supported");
+		netdev_warn_once(skb->dev, "VLAN not yet supported");
 	}
 	if (ethhdr->h_proto == htons(ETH_P_PRP) ||
 	    ethhdr->h_proto == htons(ETH_P_HSR)) {
-- 
cgit v1.2.3-59-g8ed1b


From 38c440b24052cad3fa6dcbc32bf8e132da3fe55d Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Wed, 6 May 2020 20:47:17 +0300
Subject: dpaa2-eth: create a function to flush the XDP fds

Create an independent function that takes a particular frame queue and
an array of frame descriptors and tries to enqueue them until it hits
the maximum number fo retries. The same function will be used in the
next patch also on the XDP_TX path.

Also, create the dpaa2_eth_xdp_fds structure to incorporate the array of
FDs as well as the number of FDs already populated.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 61 ++++++++++++++++--------
 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h |  7 ++-
 2 files changed, 46 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index 11accab81ea1..0f3e842a4fd6 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -244,6 +244,35 @@ static void xdp_release_buf(struct dpaa2_eth_priv *priv,
 	ch->xdp.drop_cnt = 0;
 }
 
+static int dpaa2_eth_xdp_flush(struct dpaa2_eth_priv *priv,
+			       struct dpaa2_eth_fq *fq,
+			       struct dpaa2_eth_xdp_fds *xdp_fds)
+{
+	int total_enqueued = 0, retries = 0, enqueued;
+	struct dpaa2_eth_drv_stats *percpu_extras;
+	int num_fds, err, max_retries;
+	struct dpaa2_fd *fds;
+
+	percpu_extras = this_cpu_ptr(priv->percpu_extras);
+
+	/* try to enqueue all the FDs until the max number of retries is hit */
+	fds = xdp_fds->fds;
+	num_fds = xdp_fds->num;
+	max_retries = num_fds * DPAA2_ETH_ENQUEUE_RETRIES;
+	while (total_enqueued < num_fds && retries < max_retries) {
+		err = priv->enqueue(priv, fq, &fds[total_enqueued],
+				    0, num_fds - total_enqueued, &enqueued);
+		if (err == -EBUSY) {
+			percpu_extras->tx_portal_busy += ++retries;
+			continue;
+		}
+		total_enqueued += enqueued;
+	}
+	xdp_fds->num = 0;
+
+	return total_enqueued;
+}
+
 static int xdp_enqueue(struct dpaa2_eth_priv *priv, struct dpaa2_fd *fd,
 		       void *buf_start, u16 queue_id)
 {
@@ -1934,12 +1963,11 @@ static int dpaa2_eth_xdp_xmit(struct net_device *net_dev, int n,
 			      struct xdp_frame **frames, u32 flags)
 {
 	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
-	int total_enqueued = 0, retries = 0, enqueued;
-	struct dpaa2_eth_drv_stats *percpu_extras;
+	struct dpaa2_eth_xdp_fds *xdp_redirect_fds;
 	struct rtnl_link_stats64 *percpu_stats;
-	int num_fds, i, err, max_retries;
 	struct dpaa2_eth_fq *fq;
 	struct dpaa2_fd *fds;
+	int enqueued, i, err;
 
 	if (unlikely(flags & ~XDP_XMIT_FLAGS_MASK))
 		return -EINVAL;
@@ -1948,10 +1976,10 @@ static int dpaa2_eth_xdp_xmit(struct net_device *net_dev, int n,
 		return -ENETDOWN;
 
 	fq = &priv->fq[smp_processor_id()];
-	fds = fq->xdp_fds;
+	xdp_redirect_fds = &fq->xdp_redirect_fds;
+	fds = xdp_redirect_fds->fds;
 
 	percpu_stats = this_cpu_ptr(priv->percpu_stats);
-	percpu_extras = this_cpu_ptr(priv->percpu_extras);
 
 	/* create a FD for each xdp_frame in the list received */
 	for (i = 0; i < n; i++) {
@@ -1959,28 +1987,19 @@ static int dpaa2_eth_xdp_xmit(struct net_device *net_dev, int n,
 		if (err)
 			break;
 	}
-	num_fds = i;
+	xdp_redirect_fds->num = i;
 
-	/* try to enqueue all the FDs until the max number of retries is hit */
-	max_retries = num_fds * DPAA2_ETH_ENQUEUE_RETRIES;
-	while (total_enqueued < num_fds && retries < max_retries) {
-		err = priv->enqueue(priv, fq, &fds[total_enqueued],
-				    0, num_fds - total_enqueued, &enqueued);
-		if (err == -EBUSY) {
-			percpu_extras->tx_portal_busy += ++retries;
-			continue;
-		}
-		total_enqueued += enqueued;
-	}
+	/* enqueue all the frame descriptors */
+	enqueued = dpaa2_eth_xdp_flush(priv, fq, xdp_redirect_fds);
 
 	/* update statistics */
-	percpu_stats->tx_packets += total_enqueued;
-	for (i = 0; i < total_enqueued; i++)
+	percpu_stats->tx_packets += enqueued;
+	for (i = 0; i < enqueued; i++)
 		percpu_stats->tx_bytes += dpaa2_fd_get_len(&fds[i]);
-	for (i = total_enqueued; i < n; i++)
+	for (i = enqueued; i < n; i++)
 		xdp_return_frame_rx_napi(frames[i]);
 
-	return total_enqueued;
+	return enqueued;
 }
 
 static int update_xps(struct dpaa2_eth_priv *priv)
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
index 43cd8409f2e9..b5f7dbbc2a02 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
@@ -310,6 +310,11 @@ enum dpaa2_eth_fq_type {
 
 struct dpaa2_eth_priv;
 
+struct dpaa2_eth_xdp_fds {
+	struct dpaa2_fd fds[DEV_MAP_BULK_SIZE];
+	ssize_t num;
+};
+
 struct dpaa2_eth_fq {
 	u32 fqid;
 	u32 tx_qdbin;
@@ -328,7 +333,7 @@ struct dpaa2_eth_fq {
 			struct dpaa2_eth_fq *fq);
 	struct dpaa2_eth_fq_stats stats;
 
-	struct dpaa2_fd xdp_fds[DEV_MAP_BULK_SIZE];
+	struct dpaa2_eth_xdp_fds xdp_redirect_fds;
 };
 
 struct dpaa2_eth_ch_xdp {
-- 
cgit v1.2.3-59-g8ed1b


From a45cfcc69a2519463db0e18db5b7f9c7739f559d Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Wed, 6 May 2020 21:13:59 +0300
Subject: net: ethernet: ti: am65-cpsw-nuss: use of_platform_device_create()
 for mdio

The MCU CPSW expected to populate only MDIO device, but follow up patches
will add "compatible" property to the MCU CPSW CPTS node which will cause
creation of CPTS device and MCU CPSW init failure. Hence, switch to use
of_platform_device_create() instead of of_platform_populate() for MDIO
device population.

Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/am65-cpsw-nuss.c | 24 ++++++++++++++++++------
 drivers/net/ethernet/ti/am65-cpsw-nuss.h |  2 ++
 2 files changed, 20 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
index f8c589929308..8cdbb2b9b13a 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -2031,10 +2031,21 @@ static int am65_cpsw_nuss_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	ret = of_platform_populate(dev->of_node, NULL, NULL, dev);
-	/* We do not want to force this, as in some cases may not have child */
-	if (ret)
-		dev_warn(dev, "populating child nodes err:%d\n", ret);
+	node = of_get_child_by_name(dev->of_node, "mdio");
+	if (!node) {
+		dev_warn(dev, "MDIO node not found\n");
+	} else if (of_device_is_available(node)) {
+		struct platform_device *mdio_pdev;
+
+		mdio_pdev = of_platform_device_create(node, NULL, dev);
+		if (!mdio_pdev) {
+			ret = -ENODEV;
+			goto err_pm_clear;
+		}
+
+		common->mdio_dev =  &mdio_pdev->dev;
+	}
+	of_node_put(node);
 
 	am65_cpsw_nuss_get_ver(common);
 
@@ -2090,7 +2101,8 @@ static int am65_cpsw_nuss_probe(struct platform_device *pdev)
 	return 0;
 
 err_of_clear:
-	of_platform_depopulate(dev);
+	of_platform_device_destroy(common->mdio_dev, NULL);
+err_pm_clear:
 	pm_runtime_put_sync(dev);
 	pm_runtime_disable(dev);
 	return ret;
@@ -2115,7 +2127,7 @@ static int am65_cpsw_nuss_remove(struct platform_device *pdev)
 	 */
 	am65_cpsw_nuss_cleanup_ndev(common);
 
-	of_platform_depopulate(dev);
+	of_platform_device_destroy(common->mdio_dev, NULL);
 
 	pm_runtime_put_sync(&pdev->dev);
 	pm_runtime_disable(&pdev->dev);
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.h b/drivers/net/ethernet/ti/am65-cpsw-nuss.h
index b1cddfd05a45..8a6382188cb5 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.h
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.h
@@ -9,6 +9,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/netdevice.h>
+#include <linux/platform_device.h>
 
 struct am65_cpts;
 
@@ -76,6 +77,7 @@ struct am65_cpsw_pdata {
 
 struct am65_cpsw_common {
 	struct device		*dev;
+	struct device		*mdio_dev;
 	const struct am65_cpsw_pdata *pdata;
 
 	void __iomem		*ss_base;
-- 
cgit v1.2.3-59-g8ed1b


From 4786f4a08d72b7af68cfa258cb7e0abdbf13f002 Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Wed, 6 May 2020 21:14:00 +0300
Subject: dt-binding: net: ti: am65x-cpts: make reg and compatible required

This patch follows K3 CPTS review comments from Rob Herring
<robh@kernel.org>.
 - "reg" and "compatible" properties are required now
 - minor format changes
 - K3 CPTS example added to K3 MCU CPSW bindings

Cc: Rob Herring <robh@kernel.org>
Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../bindings/net/ti,k3-am654-cpsw-nuss.yaml        | 15 ++++++++++++-
 .../devicetree/bindings/net/ti,k3-am654-cpts.yaml  | 25 ++++++++--------------
 2 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml
index 0c054a2ce5ba..c87395f360a6 100644
--- a/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml
+++ b/Documentation/devicetree/bindings/net/ti,k3-am654-cpsw-nuss.yaml
@@ -144,7 +144,7 @@ patternProperties:
     description:
       CPSW MDIO bus.
 
-  "^cpts$":
+  "^cpts@[0-9a-f]+":
     type: object
     allOf:
       - $ref: "ti,k3-am654-cpts.yaml#"
@@ -171,6 +171,8 @@ examples:
     #include <dt-bindings/pinctrl/k3.h>
     #include <dt-bindings/soc/ti,sci_pm_domain.h>
     #include <dt-bindings/net/ti-dp83867.h>
+    #include <dt-bindings/interrupt-controller/irq.h>
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
 
     mcu_cpsw: ethernet@46000000 {
         compatible = "ti,am654-cpsw-nuss";
@@ -229,4 +231,15 @@ examples:
                     ti,fifo-depth = <DP83867_PHYCR_FIFO_DEPTH_4_B_NIB>;
               };
         };
+
+        cpts@3d000 {
+             compatible = "ti,am65-cpts";
+             reg = <0x0 0x3d000 0x0 0x400>;
+             clocks = <&k3_clks 18 2>;
+             clock-names = "cpts";
+             interrupts-extended = <&gic500 GIC_SPI 858 IRQ_TYPE_LEVEL_HIGH>;
+             interrupt-names = "cpts";
+             ti,cpts-ext-ts-inputs = <4>;
+             ti,cpts-periodic-outputs = <2>;
+        };
     };
diff --git a/Documentation/devicetree/bindings/net/ti,k3-am654-cpts.yaml b/Documentation/devicetree/bindings/net/ti,k3-am654-cpts.yaml
index df83c320e61b..50e027911dd4 100644
--- a/Documentation/devicetree/bindings/net/ti,k3-am654-cpts.yaml
+++ b/Documentation/devicetree/bindings/net/ti,k3-am654-cpts.yaml
@@ -42,7 +42,7 @@ description: |+
 
 properties:
   $nodename:
-    pattern: "^cpts(@.*|-[0-9a-f])*$"
+    pattern: "^cpts@[0-9a-f]+$"
 
   compatible:
     oneOf:
@@ -52,7 +52,7 @@ properties:
   reg:
     maxItems: 1
     description:
-       The physical base address and size of CPTS IO range
+      The physical base address and size of CPTS IO range
 
   reg-names:
     items:
@@ -65,27 +65,27 @@ properties:
     items:
       - const: cpts
 
-  interrupts-extended:
+  interrupts:
     items:
       - description: CPTS events interrupt
 
   interrupt-names:
     items:
-      - const: "cpts"
+      - const: cpts
 
   ti,cpts-ext-ts-inputs:
     allOf:
       - $ref: /schemas/types.yaml#/definitions/uint32
     maximum: 8
     description:
-        Number of hardware timestamp push inputs (HWx_TS_PUSH)
+      Number of hardware timestamp push inputs (HWx_TS_PUSH)
 
   ti,cpts-periodic-outputs:
     allOf:
       - $ref: /schemas/types.yaml#/definitions/uint32
     maximum: 8
     description:
-         Number of timestamp Generator function outputs (TS_GENFx)
+      Number of timestamp Generator function outputs (TS_GENFx)
 
   refclk-mux:
     type: object
@@ -107,9 +107,11 @@ properties:
       - clocks
 
 required:
+  - compatible
+  - reg
   - clocks
   - clock-names
-  - interrupts-extended
+  - interrupts
   - interrupt-names
 
 additionalProperties: false
@@ -140,13 +142,4 @@ examples:
                assigned-clock-parents = <&k3_clks 118 11>;
          };
     };
-  - |
 
-    cpts {
-             clocks = <&k3_clks 18 2>;
-             clock-names = "cpts";
-             interrupts-extended = <&gic500 GIC_SPI 858 IRQ_TYPE_LEVEL_HIGH>;
-             interrupt-names = "cpts";
-             ti,cpts-ext-ts-inputs = <4>;
-             ti,cpts-periodic-outputs = <2>;
-    };
-- 
cgit v1.2.3-59-g8ed1b


From ef2d1363c55a2bae14a20d5c0ce6939c7badf8c6 Mon Sep 17 00:00:00 2001
From: Grygorii Strashko <grygorii.strashko@ti.com>
Date: Wed, 6 May 2020 21:14:01 +0300
Subject: arm64: dts: ti: k3-am65/j721e-mcu: update cpts node

Update CPTS node following DT binding update:
 - add reg and compatible properties
 - fix node name

Signed-off-by: Grygorii Strashko <grygorii.strashko@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi         | 4 +++-
 arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi | 4 +++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi b/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi
index 0e773e0b3f89..ae5f813d0cac 100644
--- a/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-am65-mcu.dtsi
@@ -248,7 +248,9 @@
 			bus_freq = <1000000>;
 		};
 
-		cpts {
+		cpts@3d000 {
+			compatible = "ti,am65-cpts";
+			reg = <0x0 0x3d000 0x0 0x400>;
 			clocks = <&mcu_cpsw_cpts_mux>;
 			clock-names = "cpts";
 			interrupts-extended = <&gic500 GIC_SPI 570 IRQ_TYPE_LEVEL_HIGH>;
diff --git a/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi b/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi
index 37c355e5a833..dc31bd0434cb 100644
--- a/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi
+++ b/arch/arm64/boot/dts/ti/k3-j721e-mcu-wakeup.dtsi
@@ -339,7 +339,9 @@
 			bus_freq = <1000000>;
 		};
 
-		cpts {
+		cpts@3d000 {
+			compatible = "ti,am65-cpts";
+			reg = <0x0 0x3d000 0x0 0x400>;
 			clocks = <&k3_clks 18 2>;
 			clock-names = "cpts";
 			interrupts-extended = <&gic500 GIC_SPI 858 IRQ_TYPE_LEVEL_HIGH>;
-- 
cgit v1.2.3-59-g8ed1b


From c75a33c84b83ffbb8b8b58a6bf4dea69dba21326 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Wed, 6 May 2020 17:58:27 -0700
Subject: net: remove newlines in NL_SET_ERR_MSG_MOD

The NL_SET_ERR_MSG_MOD macro is used to report a string describing an
error message to userspace via the netlink extended ACK structure. It
should not have a trailing newline.

Add a cocci script which catches cases where the newline marker is
present. Using this script, fix the handful of cases which accidentally
included a trailing new line.

I couldn't figure out a way to get a patch mode working, so this script
only implements context, report, and org.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: Andy Whitcroft <apw@canonical.com>
Cc: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c |  2 +-
 drivers/net/ethernet/mscc/ocelot_tc.c           |  6 +-
 net/bridge/br_mrp_netlink.c                     |  2 +-
 net/bridge/br_stp_if.c                          |  2 +-
 net/dsa/slave.c                                 |  6 +-
 scripts/coccinelle/misc/newline_in_nl_msg.cocci | 75 +++++++++++++++++++++++++
 6 files changed, 84 insertions(+), 9 deletions(-)
 create mode 100644 scripts/coccinelle/misc/newline_in_nl_msg.cocci

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 77397aa66810..a050808f2128 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -1097,7 +1097,7 @@ mlx5e_tc_add_nic_flow(struct mlx5e_priv *priv,
 		if (IS_ERR(priv->fs.tc.t)) {
 			mutex_unlock(&priv->fs.tc.t_lock);
 			NL_SET_ERR_MSG_MOD(extack,
-					   "Failed to create tc offload table\n");
+					   "Failed to create tc offload table");
 			netdev_err(priv->netdev,
 				   "Failed to create tc offload table\n");
 			return PTR_ERR(priv->fs.tc.t);
diff --git a/drivers/net/ethernet/mscc/ocelot_tc.c b/drivers/net/ethernet/mscc/ocelot_tc.c
index d326e231f0ad..b7baf7624e18 100644
--- a/drivers/net/ethernet/mscc/ocelot_tc.c
+++ b/drivers/net/ethernet/mscc/ocelot_tc.c
@@ -48,7 +48,7 @@ static int ocelot_setup_tc_cls_matchall(struct ocelot_port_private *priv,
 
 		if (priv->tc.police_id && priv->tc.police_id != f->cookie) {
 			NL_SET_ERR_MSG_MOD(extack,
-					   "Only one policer per port is supported\n");
+					   "Only one policer per port is supported");
 			return -EEXIST;
 		}
 
@@ -59,7 +59,7 @@ static int ocelot_setup_tc_cls_matchall(struct ocelot_port_private *priv,
 
 		err = ocelot_port_policer_add(ocelot, port, &pol);
 		if (err) {
-			NL_SET_ERR_MSG_MOD(extack, "Could not add policer\n");
+			NL_SET_ERR_MSG_MOD(extack, "Could not add policer");
 			return err;
 		}
 
@@ -73,7 +73,7 @@ static int ocelot_setup_tc_cls_matchall(struct ocelot_port_private *priv,
 		err = ocelot_port_policer_del(ocelot, port);
 		if (err) {
 			NL_SET_ERR_MSG_MOD(extack,
-					   "Could not delete policer\n");
+					   "Could not delete policer");
 			return err;
 		}
 		priv->tc.police_id = 0;
diff --git a/net/bridge/br_mrp_netlink.c b/net/bridge/br_mrp_netlink.c
index 503896638be0..397e7f710772 100644
--- a/net/bridge/br_mrp_netlink.c
+++ b/net/bridge/br_mrp_netlink.c
@@ -28,7 +28,7 @@ int br_mrp_parse(struct net_bridge *br, struct net_bridge_port *p,
 	int err;
 
 	if (br->stp_enabled != BR_NO_STP) {
-		NL_SET_ERR_MSG_MOD(extack, "MRP can't be enabled if STP is already enabled\n");
+		NL_SET_ERR_MSG_MOD(extack, "MRP can't be enabled if STP is already enabled");
 		return -EINVAL;
 	}
 
diff --git a/net/bridge/br_stp_if.c b/net/bridge/br_stp_if.c
index a42850b7eb9a..ba55851fe132 100644
--- a/net/bridge/br_stp_if.c
+++ b/net/bridge/br_stp_if.c
@@ -203,7 +203,7 @@ int br_stp_set_enabled(struct net_bridge *br, unsigned long val,
 
 	if (br_mrp_enabled(br)) {
 		NL_SET_ERR_MSG_MOD(extack,
-				   "STP can't be enabled if MRP is already enabled\n");
+				   "STP can't be enabled if MRP is already enabled");
 		return -EINVAL;
 	}
 
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index ea0fcf7bf786..dfb4282fc339 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -911,13 +911,13 @@ dsa_slave_add_cls_matchall_police(struct net_device *dev,
 
 	if (!ds->ops->port_policer_add) {
 		NL_SET_ERR_MSG_MOD(extack,
-				   "Policing offload not implemented\n");
+				   "Policing offload not implemented");
 		return -EOPNOTSUPP;
 	}
 
 	if (!ingress) {
 		NL_SET_ERR_MSG_MOD(extack,
-				   "Only supported on ingress qdisc\n");
+				   "Only supported on ingress qdisc");
 		return -EOPNOTSUPP;
 	}
 
@@ -928,7 +928,7 @@ dsa_slave_add_cls_matchall_police(struct net_device *dev,
 	list_for_each_entry(mall_tc_entry, &p->mall_tc_list, list) {
 		if (mall_tc_entry->type == DSA_PORT_MALL_POLICER) {
 			NL_SET_ERR_MSG_MOD(extack,
-					   "Only one port policer allowed\n");
+					   "Only one port policer allowed");
 			return -EEXIST;
 		}
 	}
diff --git a/scripts/coccinelle/misc/newline_in_nl_msg.cocci b/scripts/coccinelle/misc/newline_in_nl_msg.cocci
new file mode 100644
index 000000000000..c175886e4015
--- /dev/null
+++ b/scripts/coccinelle/misc/newline_in_nl_msg.cocci
@@ -0,0 +1,75 @@
+// SPDX-License-Identifier: GPL-2.0-only
+///
+/// Catch strings ending in newline with GENL_SET_ERR_MSG, NL_SET_ERR_MSG,
+/// NL_SET_ERR_MSG_MOD.
+///
+// Confidence: Very High
+// Copyright: (C) 2020 Intel Corporation
+// URL: http://coccinelle.lip6.fr/
+// Options: --no-includes --include-headers
+
+virtual context
+virtual org
+virtual report
+
+@r depends on context || org || report@
+expression e;
+constant m;
+position p;
+@@
+  \(GENL_SET_ERR_MSG\|NL_SET_ERR_MSG\|NL_SET_ERR_MSG_MOD\)(e,m@p)
+
+@script:python@
+m << r.m;
+@@
+
+if not m.endswith("\\n\""):
+	cocci.include_match(False)
+
+@r1 depends on r@
+identifier fname;
+expression r.e;
+constant r.m;
+position r.p;
+@@
+  fname(e,m@p)
+
+//----------------------------------------------------------
+//  For context mode
+//----------------------------------------------------------
+
+@depends on context && r@
+identifier r1.fname;
+expression r.e;
+constant r.m;
+@@
+* fname(e,m)
+
+//----------------------------------------------------------
+//  For org mode
+//----------------------------------------------------------
+
+@script:python depends on org@
+fname << r1.fname;
+m << r.m;
+p << r.p;
+@@
+
+if m.endswith("\\n\""):
+	msg="WARNING avoid newline at end of message in %s" % (fname)
+	msg_safe=msg.replace("[","@(").replace("]",")")
+	coccilib.org.print_todo(p[0], msg_safe)
+
+//----------------------------------------------------------
+//  For report mode
+//----------------------------------------------------------
+
+@script:python depends on report@
+fname << r1.fname;
+m << r.m;
+p << r.p;
+@@
+
+if m.endswith("\\n\""):
+	msg="WARNING avoid newline at end of message in %s" % (fname)
+	coccilib.report.print_report(p[0], msg)
-- 
cgit v1.2.3-59-g8ed1b


From 636ef28d6e4d174e424102466caf572b0406fb0e Mon Sep 17 00:00:00 2001
From: zhang kai <zhangkaiheb@126.com>
Date: Thu, 7 May 2020 11:08:30 +0800
Subject: tcp: tcp_mark_head_lost is only valid for sack-tcp

so tcp_is_sack/reno checks are removed from tcp_mark_head_lost.

Signed-off-by: zhang kai <zhangkaiheb@126.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_input.c | 32 +++++++-------------------------
 1 file changed, 7 insertions(+), 25 deletions(-)

diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index 66e55e51c550..7529c2816f2f 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -2183,8 +2183,7 @@ static bool tcp_time_to_recover(struct sock *sk, int flag)
 }
 
 /* Detect loss in event "A" above by marking head of queue up as lost.
- * For non-SACK(Reno) senders, the first "packets" number of segments
- * are considered lost. For RFC3517 SACK, a segment is considered lost if it
+ * For RFC3517 SACK, a segment is considered lost if it
  * has at least tp->reordering SACKed seqments above it; "packets" refers to
  * the maximum SACKed segments to pass before reaching this limit.
  */
@@ -2192,10 +2191,9 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
 {
 	struct tcp_sock *tp = tcp_sk(sk);
 	struct sk_buff *skb;
-	int cnt, oldcnt, lost;
-	unsigned int mss;
+	int cnt;
 	/* Use SACK to deduce losses of new sequences sent during recovery */
-	const u32 loss_high = tcp_is_sack(tp) ?  tp->snd_nxt : tp->high_seq;
+	const u32 loss_high = tp->snd_nxt;
 
 	WARN_ON(packets > tp->packets_out);
 	skb = tp->lost_skb_hint;
@@ -2218,26 +2216,11 @@ static void tcp_mark_head_lost(struct sock *sk, int packets, int mark_head)
 		if (after(TCP_SKB_CB(skb)->end_seq, loss_high))
 			break;
 
-		oldcnt = cnt;
-		if (tcp_is_reno(tp) ||
-		    (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED))
+		if (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED)
 			cnt += tcp_skb_pcount(skb);
 
-		if (cnt > packets) {
-			if (tcp_is_sack(tp) ||
-			    (TCP_SKB_CB(skb)->sacked & TCPCB_SACKED_ACKED) ||
-			    (oldcnt >= packets))
-				break;
-
-			mss = tcp_skb_mss(skb);
-			/* If needed, chop off the prefix to mark as lost. */
-			lost = (packets - oldcnt) * mss;
-			if (lost < skb->len &&
-			    tcp_fragment(sk, TCP_FRAG_IN_RTX_QUEUE, skb,
-					 lost, mss, GFP_ATOMIC) < 0)
-				break;
-			cnt = packets;
-		}
+		if (cnt > packets)
+			break;
 
 		tcp_skb_mark_lost(tp, skb);
 
@@ -2849,8 +2832,7 @@ static void tcp_fastretrans_alert(struct sock *sk, const u32 prior_snd_una,
 			if (tcp_try_undo_partial(sk, prior_snd_una))
 				return;
 			/* Partial ACK arrived. Force fast retransmit. */
-			do_lost = tcp_is_reno(tp) ||
-				  tcp_force_fast_retransmit(sk);
+			do_lost = tcp_force_fast_retransmit(sk);
 		}
 		if (tcp_try_undo_dsack(sk)) {
 			tcp_try_keep_open(sk);
-- 
cgit v1.2.3-59-g8ed1b


From ca7e3edc221d5cf750ae04cac29cf9fe9db38e84 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Thu, 7 May 2020 16:24:06 +0200
Subject: net/smc: remove set but not used variables 'del_llc, del_llc_resp'

Fixes gcc '-Wunused-but-set-variable' warning:

net/smc/smc_llc.c: In function 'smc_llc_cli_conf_link':
net/smc/smc_llc.c:753:31: warning:
 variable 'del_llc' set but not used [-Wunused-but-set-variable]
  struct smc_llc_msg_del_link *del_llc;
                               ^
net/smc/smc_llc.c: In function 'smc_llc_process_srv_delete_link':
net/smc/smc_llc.c:1311:33: warning:
 variable 'del_llc_resp' set but not used [-Wunused-but-set-variable]
    struct smc_llc_msg_del_link *del_llc_resp;
                                 ^

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_llc.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/net/smc/smc_llc.c b/net/smc/smc_llc.c
index 4cc583678ac7..391237b601fe 100644
--- a/net/smc/smc_llc.c
+++ b/net/smc/smc_llc.c
@@ -750,7 +750,6 @@ static int smc_llc_cli_conf_link(struct smc_link *link,
 				 enum smc_lgr_type lgr_new_t)
 {
 	struct smc_link_group *lgr = link->lgr;
-	struct smc_llc_msg_del_link *del_llc;
 	struct smc_llc_qentry *qentry = NULL;
 	int rc = 0;
 
@@ -764,7 +763,6 @@ static int smc_llc_cli_conf_link(struct smc_link *link,
 	}
 	if (qentry->msg.raw.hdr.common.type != SMC_LLC_CONFIRM_LINK) {
 		/* received DELETE_LINK instead */
-		del_llc = &qentry->msg.delete_link;
 		qentry->msg.raw.hdr.flags |= SMC_LLC_FLAG_RESP;
 		smc_llc_send_message(link, &qentry->msg);
 		smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
@@ -1308,16 +1306,12 @@ static void smc_llc_process_srv_delete_link(struct smc_link_group *lgr)
 		 * enqueued DELETE_LINK request (forward it)
 		 */
 		if (!smc_llc_send_message(lnk, &qentry->msg)) {
-			struct smc_llc_msg_del_link *del_llc_resp;
 			struct smc_llc_qentry *qentry2;
 
 			qentry2 = smc_llc_wait(lgr, lnk, SMC_LLC_WAIT_TIME,
 					       SMC_LLC_DELETE_LINK);
-			if (!qentry2) {
-			} else {
-				del_llc_resp = &qentry2->msg.delete_link;
+			if (qentry2)
 				smc_llc_flow_qentry_del(&lgr->llc_flow_lcl);
-			}
 		}
 	}
 	smcr_link_clear(lnk_del, true);
-- 
cgit v1.2.3-59-g8ed1b


From 3a13f98b4c16fb3489bdfd7550fcaa333ee69850 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 7 May 2020 15:34:30 +0100
Subject: net: phy: fix less than zero comparison with unsigned variable val

The unsigned variable val is being checked for an error by checking
if it is less than zero. This can never occur because val is unsigned.
Fix this by making val a plain int.

Addresses-Coverity: ("Unsigned compared against zero")
Fixes: bdbdac7649fa ("ethtool: provide UAPI for PHY master/slave configuration.")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy_device.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 83fc8e1b5793..c3a107cf578e 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -1948,7 +1948,7 @@ static int genphy_setup_master_slave(struct phy_device *phydev)
 static int genphy_read_master_slave(struct phy_device *phydev)
 {
 	int cfg, state;
-	u16 val;
+	int val;
 
 	if (!phydev->is_gigabit_capable) {
 		phydev->master_slave_get = MASTER_SLAVE_CFG_UNSUPPORTED;
-- 
cgit v1.2.3-59-g8ed1b


From 307f660d056b5eb8f5bb2328fac3915ab75b5007 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 7 May 2020 09:32:18 -0700
Subject: netpoll: remove dev argument from netpoll_send_skb_on_dev()

netpoll_send_skb_on_dev() can get the device pointer directly from np->dev

Rename it to __netpoll_send_skb()

Following patch will move netpoll_send_skb() out-of-line.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netpoll.h |  5 ++---
 net/core/netpoll.c      | 10 ++++++----
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index 676f1ff161a9..00e0bae3d402 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -63,13 +63,12 @@ int netpoll_setup(struct netpoll *np);
 void __netpoll_cleanup(struct netpoll *np);
 void __netpoll_free(struct netpoll *np);
 void netpoll_cleanup(struct netpoll *np);
-void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
-			     struct net_device *dev);
+void __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb);
 static inline void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
 {
 	unsigned long flags;
 	local_irq_save(flags);
-	netpoll_send_skb_on_dev(np, skb, np->dev);
+	__netpoll_send_skb(np, skb);
 	local_irq_restore(flags);
 }
 
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 15b366a1a958..c5059b7ffc94 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -305,17 +305,19 @@ static int netpoll_owner_active(struct net_device *dev)
 }
 
 /* call with IRQ disabled */
-void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
-			     struct net_device *dev)
+void __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
 {
 	netdev_tx_t status = NETDEV_TX_BUSY;
+	struct net_device *dev;
 	unsigned long tries;
 	/* It is up to the caller to keep npinfo alive. */
 	struct netpoll_info *npinfo;
 
 	lockdep_assert_irqs_disabled();
 
-	npinfo = rcu_dereference_bh(np->dev->npinfo);
+	dev = np->dev;
+	npinfo = rcu_dereference_bh(dev->npinfo);
+
 	if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
 		dev_kfree_skb_irq(skb);
 		return;
@@ -358,7 +360,7 @@ void netpoll_send_skb_on_dev(struct netpoll *np, struct sk_buff *skb,
 		schedule_delayed_work(&npinfo->tx_work,0);
 	}
 }
-EXPORT_SYMBOL(netpoll_send_skb_on_dev);
+EXPORT_SYMBOL(__netpoll_send_skb);
 
 void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
 {
-- 
cgit v1.2.3-59-g8ed1b


From fb1eee476b0d3be3e58dac1a3a96f726c6278bed Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 7 May 2020 09:32:19 -0700
Subject: netpoll: move netpoll_send_skb() out of line

There is no need to inline this helper, as we intend to add more
code in this function.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netpoll.h |  9 +--------
 net/core/netpoll.c      | 13 +++++++++++--
 2 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index 00e0bae3d402..e466ddffef61 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -63,14 +63,7 @@ int netpoll_setup(struct netpoll *np);
 void __netpoll_cleanup(struct netpoll *np);
 void __netpoll_free(struct netpoll *np);
 void netpoll_cleanup(struct netpoll *np);
-void __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb);
-static inline void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
-{
-	unsigned long flags;
-	local_irq_save(flags);
-	__netpoll_send_skb(np, skb);
-	local_irq_restore(flags);
-}
+void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb);
 
 #ifdef CONFIG_NETPOLL
 static inline void *netpoll_poll_lock(struct napi_struct *napi)
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index c5059b7ffc94..34cd34f24423 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -305,7 +305,7 @@ static int netpoll_owner_active(struct net_device *dev)
 }
 
 /* call with IRQ disabled */
-void __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
+static void __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
 {
 	netdev_tx_t status = NETDEV_TX_BUSY;
 	struct net_device *dev;
@@ -360,7 +360,16 @@ void __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
 		schedule_delayed_work(&npinfo->tx_work,0);
 	}
 }
-EXPORT_SYMBOL(__netpoll_send_skb);
+
+void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
+{
+	unsigned long flags;
+
+	local_irq_save(flags);
+	__netpoll_send_skb(np, skb);
+	local_irq_restore(flags);
+}
+EXPORT_SYMBOL(netpoll_send_skb);
 
 void netpoll_send_udp(struct netpoll *np, const char *msg, int len)
 {
-- 
cgit v1.2.3-59-g8ed1b


From 1ddabdfaf70c202b88925edd74c66f4707dbd92e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 7 May 2020 09:32:20 -0700
Subject: netpoll: netpoll_send_skb() returns transmit status

Some callers want to know if the packet has been sent or
dropped, to inform upper stacks.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netpoll.h |  2 +-
 net/core/netpoll.c      | 11 +++++++----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/include/linux/netpoll.h b/include/linux/netpoll.h
index e466ddffef61..f47af135bd56 100644
--- a/include/linux/netpoll.h
+++ b/include/linux/netpoll.h
@@ -63,7 +63,7 @@ int netpoll_setup(struct netpoll *np);
 void __netpoll_cleanup(struct netpoll *np);
 void __netpoll_free(struct netpoll *np);
 void netpoll_cleanup(struct netpoll *np);
-void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb);
+netdev_tx_t netpoll_send_skb(struct netpoll *np, struct sk_buff *skb);
 
 #ifdef CONFIG_NETPOLL
 static inline void *netpoll_poll_lock(struct napi_struct *napi)
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 34cd34f24423..40d2753aa47d 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -305,7 +305,7 @@ static int netpoll_owner_active(struct net_device *dev)
 }
 
 /* call with IRQ disabled */
-static void __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
+static netdev_tx_t __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
 {
 	netdev_tx_t status = NETDEV_TX_BUSY;
 	struct net_device *dev;
@@ -320,7 +320,7 @@ static void __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
 
 	if (!npinfo || !netif_running(dev) || !netif_device_present(dev)) {
 		dev_kfree_skb_irq(skb);
-		return;
+		return NET_XMIT_DROP;
 	}
 
 	/* don't get messages out of order, and no recursion */
@@ -359,15 +359,18 @@ static void __netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
 		skb_queue_tail(&npinfo->txq, skb);
 		schedule_delayed_work(&npinfo->tx_work,0);
 	}
+	return NETDEV_TX_OK;
 }
 
-void netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
+netdev_tx_t netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
 {
 	unsigned long flags;
+	netdev_tx_t ret;
 
 	local_irq_save(flags);
-	__netpoll_send_skb(np, skb);
+	ret = __netpoll_send_skb(np, skb);
 	local_irq_restore(flags);
+	return ret;
 }
 EXPORT_SYMBOL(netpoll_send_skb);
 
-- 
cgit v1.2.3-59-g8ed1b


From f78ed2204db9fc35b545d693865bddbe0149aa1f Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 7 May 2020 09:32:21 -0700
Subject: netpoll: accept NULL np argument in netpoll_send_skb()

netpoll_send_skb() callers seem to leak skb if
the np pointer is NULL. While this should not happen, we
can make the code more robust.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvlan.c   |  5 ++---
 include/linux/if_team.h |  5 +----
 include/net/bonding.h   |  5 +----
 net/8021q/vlan_dev.c    |  5 ++---
 net/bridge/br_private.h |  5 +----
 net/core/netpoll.c      | 11 ++++++++---
 net/dsa/slave.c         |  5 ++---
 7 files changed, 17 insertions(+), 24 deletions(-)

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 34eb073cdd74..9a419d5102ce 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -542,12 +542,11 @@ xmit_world:
 static inline netdev_tx_t macvlan_netpoll_send_skb(struct macvlan_dev *vlan, struct sk_buff *skb)
 {
 #ifdef CONFIG_NET_POLL_CONTROLLER
-	if (vlan->netpoll)
-		netpoll_send_skb(vlan->netpoll, skb);
+	return netpoll_send_skb(vlan->netpoll, skb);
 #else
 	BUG();
-#endif
 	return NETDEV_TX_OK;
+#endif
 }
 
 static netdev_tx_t macvlan_start_xmit(struct sk_buff *skb,
diff --git a/include/linux/if_team.h b/include/linux/if_team.h
index ec7e4bd07f82..537dc2b8c879 100644
--- a/include/linux/if_team.h
+++ b/include/linux/if_team.h
@@ -102,10 +102,7 @@ static inline bool team_port_dev_txable(const struct net_device *port_dev)
 static inline void team_netpoll_send_skb(struct team_port *port,
 					 struct sk_buff *skb)
 {
-	struct netpoll *np = port->np;
-
-	if (np)
-		netpoll_send_skb(np, skb);
+	netpoll_send_skb(port->np, skb);
 }
 #else
 static inline void team_netpoll_send_skb(struct team_port *port,
diff --git a/include/net/bonding.h b/include/net/bonding.h
index 0b696da5c115..f211983cd52a 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -507,10 +507,7 @@ static inline unsigned long slave_last_rx(struct bonding *bond,
 static inline void bond_netpoll_send_skb(const struct slave *slave,
 					 struct sk_buff *skb)
 {
-	struct netpoll *np = slave->np;
-
-	if (np)
-		netpoll_send_skb(np, skb);
+	netpoll_send_skb(slave->np, skb);
 }
 #else
 static inline void bond_netpoll_send_skb(const struct slave *slave,
diff --git a/net/8021q/vlan_dev.c b/net/8021q/vlan_dev.c
index 319220b2341d..f00bb57f0f60 100644
--- a/net/8021q/vlan_dev.c
+++ b/net/8021q/vlan_dev.c
@@ -88,12 +88,11 @@ static int vlan_dev_hard_header(struct sk_buff *skb, struct net_device *dev,
 static inline netdev_tx_t vlan_netpoll_send_skb(struct vlan_dev_priv *vlan, struct sk_buff *skb)
 {
 #ifdef CONFIG_NET_POLL_CONTROLLER
-	if (vlan->netpoll)
-		netpoll_send_skb(vlan->netpoll, skb);
+	return netpoll_send_skb(vlan->netpoll, skb);
 #else
 	BUG();
-#endif
 	return NETDEV_TX_OK;
+#endif
 }
 
 static netdev_tx_t vlan_dev_hard_start_xmit(struct sk_buff *skb,
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 78d3a951180d..4dc21e8f7e33 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -598,10 +598,7 @@ netdev_tx_t br_dev_xmit(struct sk_buff *skb, struct net_device *dev);
 static inline void br_netpoll_send_skb(const struct net_bridge_port *p,
 				       struct sk_buff *skb)
 {
-	struct netpoll *np = p->np;
-
-	if (np)
-		netpoll_send_skb(np, skb);
+	netpoll_send_skb(p->np, skb);
 }
 
 int br_netpoll_enable(struct net_bridge_port *p);
diff --git a/net/core/netpoll.c b/net/core/netpoll.c
index 40d2753aa47d..093e90e52bc2 100644
--- a/net/core/netpoll.c
+++ b/net/core/netpoll.c
@@ -367,9 +367,14 @@ netdev_tx_t netpoll_send_skb(struct netpoll *np, struct sk_buff *skb)
 	unsigned long flags;
 	netdev_tx_t ret;
 
-	local_irq_save(flags);
-	ret = __netpoll_send_skb(np, skb);
-	local_irq_restore(flags);
+	if (unlikely(!np)) {
+		dev_kfree_skb_irq(skb);
+		ret = NET_XMIT_DROP;
+	} else {
+		local_irq_save(flags);
+		ret = __netpoll_send_skb(np, skb);
+		local_irq_restore(flags);
+	}
 	return ret;
 }
 EXPORT_SYMBOL(netpoll_send_skb);
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index dfb4282fc339..61b0de52040a 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -445,12 +445,11 @@ static inline netdev_tx_t dsa_slave_netpoll_send_skb(struct net_device *dev,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	struct dsa_slave_priv *p = netdev_priv(dev);
 
-	if (p->netpoll)
-		netpoll_send_skb(p->netpoll, skb);
+	return netpoll_send_skb(p->netpoll, skb);
 #else
 	BUG();
-#endif
 	return NETDEV_TX_OK;
+#endif
 }
 
 static void dsa_skb_tx_timestamp(struct dsa_slave_priv *p,
-- 
cgit v1.2.3-59-g8ed1b


From ae46f184bc1fb15bf2de47114c29236e61ca4bbc Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 7 May 2020 09:32:22 -0700
Subject: bonding: propagate transmit status

Currently, bonding always returns NETDEV_TX_OK to its caller.

It is worth trying to be more accurate : TCP for instance
can have different recovery strategies if it can have more
precise status, if packet was dropped by slave qdisc.

This is especially important when host is under stress.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Jay Vosburgh <j.vosburgh@gmail.com>
Cc: Veaceslav Falico <vfalico@gmail.com>
Cc: Andy Gospodarek <andy@greyhouse.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/bonding/bond_alb.c  |  7 ++---
 drivers/net/bonding/bond_main.c | 60 +++++++++++++++--------------------------
 include/net/bonding.h           | 13 +++++----
 3 files changed, 32 insertions(+), 48 deletions(-)

diff --git a/drivers/net/bonding/bond_alb.c b/drivers/net/bonding/bond_alb.c
index c81698550e5a..3a598d04b156 100644
--- a/drivers/net/bonding/bond_alb.c
+++ b/drivers/net/bonding/bond_alb.c
@@ -1318,8 +1318,7 @@ static netdev_tx_t bond_do_alb_xmit(struct sk_buff *skb, struct bonding *bond,
 					tx_slave->dev->dev_addr);
 		}
 
-		bond_dev_queue_xmit(bond, skb, tx_slave->dev);
-		goto out;
+		return bond_dev_queue_xmit(bond, skb, tx_slave->dev);
 	}
 
 	if (tx_slave && bond->params.tlb_dynamic_lb) {
@@ -1329,9 +1328,7 @@ static netdev_tx_t bond_do_alb_xmit(struct sk_buff *skb, struct bonding *bond,
 	}
 
 	/* no suitable interface, frame not sent */
-	bond_tx_drop(bond->dev, skb);
-out:
-	return NETDEV_TX_OK;
+	return bond_tx_drop(bond->dev, skb);
 }
 
 netdev_tx_t bond_tlb_xmit(struct sk_buff *skb, struct net_device *bond_dev)
diff --git a/drivers/net/bonding/bond_main.c b/drivers/net/bonding/bond_main.c
index baa93191dfdd..4f9e7c421f57 100644
--- a/drivers/net/bonding/bond_main.c
+++ b/drivers/net/bonding/bond_main.c
@@ -287,7 +287,7 @@ const char *bond_mode_name(int mode)
  * @skb: hw accel VLAN tagged skb to transmit
  * @slave_dev: slave that is supposed to xmit this skbuff
  */
-void bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb,
+netdev_tx_t bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb,
 			struct net_device *slave_dev)
 {
 	skb->dev = slave_dev;
@@ -297,9 +297,9 @@ void bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb,
 	skb_set_queue_mapping(skb, qdisc_skb_cb(skb)->slave_dev_queue_mapping);
 
 	if (unlikely(netpoll_tx_running(bond->dev)))
-		bond_netpoll_send_skb(bond_get_slave_by_dev(bond, slave_dev), skb);
-	else
-		dev_queue_xmit(skb);
+		return bond_netpoll_send_skb(bond_get_slave_by_dev(bond, slave_dev), skb);
+
+	return dev_queue_xmit(skb);
 }
 
 /* In the following 2 functions, bond_vlan_rx_add_vid and bond_vlan_rx_kill_vid,
@@ -3932,7 +3932,7 @@ unwind:
  * it fails, it tries to find the first available slave for transmission.
  * The skb is consumed in all cases, thus the function is void.
  */
-static void bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int slave_id)
+static netdev_tx_t bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int slave_id)
 {
 	struct list_head *iter;
 	struct slave *slave;
@@ -3941,10 +3941,8 @@ static void bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int sl
 	/* Here we start from the slave with slave_id */
 	bond_for_each_slave_rcu(bond, slave, iter) {
 		if (--i < 0) {
-			if (bond_slave_can_tx(slave)) {
-				bond_dev_queue_xmit(bond, skb, slave->dev);
-				return;
-			}
+			if (bond_slave_can_tx(slave))
+				return bond_dev_queue_xmit(bond, skb, slave->dev);
 		}
 	}
 
@@ -3953,13 +3951,11 @@ static void bond_xmit_slave_id(struct bonding *bond, struct sk_buff *skb, int sl
 	bond_for_each_slave_rcu(bond, slave, iter) {
 		if (--i < 0)
 			break;
-		if (bond_slave_can_tx(slave)) {
-			bond_dev_queue_xmit(bond, skb, slave->dev);
-			return;
-		}
+		if (bond_slave_can_tx(slave))
+			return bond_dev_queue_xmit(bond, skb, slave->dev);
 	}
 	/* no slave that can tx has been found */
-	bond_tx_drop(bond->dev, skb);
+	return bond_tx_drop(bond->dev, skb);
 }
 
 /**
@@ -4020,10 +4016,8 @@ static netdev_tx_t bond_xmit_roundrobin(struct sk_buff *skb,
 		if (iph->protocol == IPPROTO_IGMP) {
 			slave = rcu_dereference(bond->curr_active_slave);
 			if (slave)
-				bond_dev_queue_xmit(bond, skb, slave->dev);
-			else
-				bond_xmit_slave_id(bond, skb, 0);
-			return NETDEV_TX_OK;
+				return bond_dev_queue_xmit(bond, skb, slave->dev);
+			return bond_xmit_slave_id(bond, skb, 0);
 		}
 	}
 
@@ -4031,11 +4025,9 @@ non_igmp:
 	slave_cnt = READ_ONCE(bond->slave_cnt);
 	if (likely(slave_cnt)) {
 		slave_id = bond_rr_gen_slave_id(bond);
-		bond_xmit_slave_id(bond, skb, slave_id % slave_cnt);
-	} else {
-		bond_tx_drop(bond_dev, skb);
+		return bond_xmit_slave_id(bond, skb, slave_id % slave_cnt);
 	}
-	return NETDEV_TX_OK;
+	return bond_tx_drop(bond_dev, skb);
 }
 
 /* In active-backup mode, we know that bond->curr_active_slave is always valid if
@@ -4049,11 +4041,9 @@ static netdev_tx_t bond_xmit_activebackup(struct sk_buff *skb,
 
 	slave = rcu_dereference(bond->curr_active_slave);
 	if (slave)
-		bond_dev_queue_xmit(bond, skb, slave->dev);
-	else
-		bond_tx_drop(bond_dev, skb);
+		return bond_dev_queue_xmit(bond, skb, slave->dev);
 
-	return NETDEV_TX_OK;
+	return bond_tx_drop(bond_dev, skb);
 }
 
 /* Use this to update slave_array when (a) it's not appropriate to update
@@ -4196,12 +4186,9 @@ static netdev_tx_t bond_3ad_xor_xmit(struct sk_buff *skb,
 	count = slaves ? READ_ONCE(slaves->count) : 0;
 	if (likely(count)) {
 		slave = slaves->arr[bond_xmit_hash(bond, skb) % count];
-		bond_dev_queue_xmit(bond, skb, slave->dev);
-	} else {
-		bond_tx_drop(dev, skb);
+		return bond_dev_queue_xmit(bond, skb, slave->dev);
 	}
-
-	return NETDEV_TX_OK;
+	return bond_tx_drop(dev, skb);
 }
 
 /* in broadcast mode, we send everything to all usable interfaces. */
@@ -4227,11 +4214,9 @@ static netdev_tx_t bond_xmit_broadcast(struct sk_buff *skb,
 		}
 	}
 	if (slave && bond_slave_is_up(slave) && slave->link == BOND_LINK_UP)
-		bond_dev_queue_xmit(bond, skb, slave->dev);
-	else
-		bond_tx_drop(bond_dev, skb);
+		return bond_dev_queue_xmit(bond, skb, slave->dev);
 
-	return NETDEV_TX_OK;
+	return bond_tx_drop(bond_dev, skb);
 }
 
 /*------------------------- Device initialization ---------------------------*/
@@ -4310,8 +4295,7 @@ static netdev_tx_t __bond_start_xmit(struct sk_buff *skb, struct net_device *dev
 		/* Should never happen, mode already checked */
 		netdev_err(dev, "Unknown bonding mode %d\n", BOND_MODE(bond));
 		WARN_ON_ONCE(1);
-		bond_tx_drop(dev, skb);
-		return NETDEV_TX_OK;
+		return bond_tx_drop(dev, skb);
 	}
 }
 
@@ -4330,7 +4314,7 @@ static netdev_tx_t bond_start_xmit(struct sk_buff *skb, struct net_device *dev)
 	if (bond_has_slaves(bond))
 		ret = __bond_start_xmit(skb, dev);
 	else
-		bond_tx_drop(dev, skb);
+		ret = bond_tx_drop(dev, skb);
 	rcu_read_unlock();
 
 	return ret;
diff --git a/include/net/bonding.h b/include/net/bonding.h
index f211983cd52a..9b1e76515a9c 100644
--- a/include/net/bonding.h
+++ b/include/net/bonding.h
@@ -504,15 +504,17 @@ static inline unsigned long slave_last_rx(struct bonding *bond,
 }
 
 #ifdef CONFIG_NET_POLL_CONTROLLER
-static inline void bond_netpoll_send_skb(const struct slave *slave,
+static inline netdev_tx_t bond_netpoll_send_skb(const struct slave *slave,
 					 struct sk_buff *skb)
 {
-	netpoll_send_skb(slave->np, skb);
+	return netpoll_send_skb(slave->np, skb);
 }
 #else
-static inline void bond_netpoll_send_skb(const struct slave *slave,
+static inline netdev_tx_t bond_netpoll_send_skb(const struct slave *slave,
 					 struct sk_buff *skb)
 {
+	BUG();
+	return NETDEV_TX_OK;
 }
 #endif
 
@@ -606,7 +608,7 @@ struct bond_net {
 };
 
 int bond_arp_rcv(const struct sk_buff *skb, struct bonding *bond, struct slave *slave);
-void bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, struct net_device *slave_dev);
+netdev_tx_t bond_dev_queue_xmit(struct bonding *bond, struct sk_buff *skb, struct net_device *slave_dev);
 int bond_create(struct net *net, const char *name);
 int bond_create_sysfs(struct bond_net *net);
 void bond_destroy_sysfs(struct bond_net *net);
@@ -739,10 +741,11 @@ extern struct bond_parm_tbl ad_select_tbl[];
 /* exported from bond_netlink.c */
 extern struct rtnl_link_ops bond_link_ops;
 
-static inline void bond_tx_drop(struct net_device *dev, struct sk_buff *skb)
+static inline netdev_tx_t bond_tx_drop(struct net_device *dev, struct sk_buff *skb)
 {
 	atomic_long_inc(&dev->tx_dropped);
 	dev_kfree_skb_any(skb);
+	return NET_XMIT_DROP;
 }
 
 #endif /* _NET_BONDING_H */
-- 
cgit v1.2.3-59-g8ed1b


From 790709f249728640faa4eff38286a9feb34fed81 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 7 May 2020 10:05:39 -0700
Subject: net: relax SO_TXTIME CAP_NET_ADMIN check

Now sch_fq has horizon feature, we want to allow QUIC/UDP applications
to use EDT model so that pacing can be offloaded to the kernel (sch_fq)
or the NIC.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Willem de Bruijn <willemb@google.com>
Acked-by: Willem de Bruijn <willemb@google.com>
Acked-by: Vinicius Costa Gomes <vinicius.gomes@intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/sock.c | 28 ++++++++++++++++++----------
 1 file changed, 18 insertions(+), 10 deletions(-)

diff --git a/net/core/sock.c b/net/core/sock.c
index b714162213ae..fd85e651ce28 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -1152,23 +1152,31 @@ set_rcvbuf:
 		break;
 
 	case SO_TXTIME:
-		if (!ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
-			ret = -EPERM;
-		} else if (optlen != sizeof(struct sock_txtime)) {
+		if (optlen != sizeof(struct sock_txtime)) {
 			ret = -EINVAL;
+			break;
 		} else if (copy_from_user(&sk_txtime, optval,
 			   sizeof(struct sock_txtime))) {
 			ret = -EFAULT;
+			break;
 		} else if (sk_txtime.flags & ~SOF_TXTIME_FLAGS_MASK) {
 			ret = -EINVAL;
-		} else {
-			sock_valbool_flag(sk, SOCK_TXTIME, true);
-			sk->sk_clockid = sk_txtime.clockid;
-			sk->sk_txtime_deadline_mode =
-				!!(sk_txtime.flags & SOF_TXTIME_DEADLINE_MODE);
-			sk->sk_txtime_report_errors =
-				!!(sk_txtime.flags & SOF_TXTIME_REPORT_ERRORS);
+			break;
+		}
+		/* CLOCK_MONOTONIC is only used by sch_fq, and this packet
+		 * scheduler has enough safe guards.
+		 */
+		if (sk_txtime.clockid != CLOCK_MONOTONIC &&
+		    !ns_capable(sock_net(sk)->user_ns, CAP_NET_ADMIN)) {
+			ret = -EPERM;
+			break;
 		}
+		sock_valbool_flag(sk, SOCK_TXTIME, true);
+		sk->sk_clockid = sk_txtime.clockid;
+		sk->sk_txtime_deadline_mode =
+			!!(sk_txtime.flags & SOF_TXTIME_DEADLINE_MODE);
+		sk->sk_txtime_report_errors =
+			!!(sk_txtime.flags & SOF_TXTIME_REPORT_ERRORS);
 		break;
 
 	case SO_BINDTOIFINDEX:
-- 
cgit v1.2.3-59-g8ed1b


From 382d8296c5b5664a4f16653f71d3fbf64263afb3 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 24 Apr 2020 18:48:07 +0300
Subject: iwlwifi: fw api: fix PHY data 2/3 position

In AX210 devices, the PHY data wasn't actually reported, but now
that it's going to be reported it turns out that the position is
supposed to be the other way around, fix that.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200424182643.06de959301f5.I544c353a8a811f107bd66d168e37920237ecf071@changeid
---
 drivers/net/wireless/intel/iwlwifi/fw/api/rx.h | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h b/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h
index 88bc7733065f..b8b36a4f9eb9 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/rx.h
@@ -5,10 +5,9 @@
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2012 - 2014, 2018 - 2020 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2015 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -28,10 +27,9 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2012 - 2014, 2018 - 2020 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2015 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -535,9 +533,9 @@ struct iwl_rx_mpdu_desc_v3 {
 		__le32 filter_match;
 
 		/**
-		 * @phy_data2: depends on info type (see @phy_data1)
+		 * @phy_data3: depends on info type (see @phy_data1)
 		 */
-		__le32 phy_data2;
+		__le32 phy_data3;
 	};
 
 	/* DW8 - carries rss_hash only when rpa_en == 1 */
@@ -548,9 +546,9 @@ struct iwl_rx_mpdu_desc_v3 {
 		__le32 rss_hash;
 
 		/**
-		 * @phy_data3: depends on info type (see @phy_data1)
+		 * @phy_data2: depends on info type (see @phy_data1)
 		 */
-		__le32 phy_data3;
+		__le32 phy_data2;
 	};
 	/* DW9 */
 	/**
-- 
cgit v1.2.3-59-g8ed1b


From ebfa7f8ae155c9a0bb2e4038d6b5d8b14881c424 Mon Sep 17 00:00:00 2001
From: Mordechay Goodstein <mordechay.goodstein@intel.com>
Date: Fri, 24 Apr 2020 18:48:08 +0300
Subject: iwlwifi: yoyo: add support for parsing SHARED_MEM_ALLOC version 4

The new version adds the information for RX2C FIFO addresses.

Use the new addresses to parse the FIFO info when dumping.

Signed-off-by: Mordechay Goodstein <mordechay.goodstein@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200424182643.97cc25d96b53.I65fd0400d80f505bd6d7eed442f12db24b25bbe3@changeid
---
 drivers/net/wireless/intel/iwlwifi/fw/api/debug.h | 14 ++++++-----
 drivers/net/wireless/intel/iwlwifi/fw/dbg.c       | 29 ++++++++++++++++++++---
 drivers/net/wireless/intel/iwlwifi/fw/runtime.h   |  5 ++--
 drivers/net/wireless/intel/iwlwifi/fw/smem.c      | 14 +++++++----
 drivers/net/wireless/intel/iwlwifi/iwl-prph.h     | 11 ++++-----
 5 files changed, 52 insertions(+), 21 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/debug.h b/drivers/net/wireless/intel/iwlwifi/fw/api/debug.h
index 98e957ecbeed..94b1a1268476 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/debug.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/debug.h
@@ -5,10 +5,9 @@
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2007 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2007 - 2014, 2018 - 2020 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -28,10 +27,9 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2005 - 2014, 2018 - 2020 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -218,6 +216,8 @@ struct iwl_shared_mem_lmac_cfg {
  * @page_buff_size: size of %page_buff_addr
  * @lmac_num: number of LMACs (1 or 2)
  * @lmac_smem: per - LMAC smem data
+ * @rxfifo2_control_addr: start addr of RXF2C
+ * @rxfifo2_control_size: size of RXF2C
  */
 struct iwl_shared_mem_cfg {
 	__le32 shared_mem_addr;
@@ -229,8 +229,10 @@ struct iwl_shared_mem_cfg {
 	__le32 page_buff_addr;
 	__le32 page_buff_size;
 	__le32 lmac_num;
-	struct iwl_shared_mem_lmac_cfg lmac_smem[2];
-} __packed; /* SHARED_MEM_ALLOC_API_S_VER_3 */
+	struct iwl_shared_mem_lmac_cfg lmac_smem[3];
+	__le32 rxfifo2_control_addr;
+	__le32 rxfifo2_control_size;
+} __packed; /* SHARED_MEM_ALLOC_API_S_VER_4 */
 
 /**
  * struct iwl_mfuart_load_notif - mfuart image version & status
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
index 39c8332be3ac..914f0eb07d52 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
@@ -1386,13 +1386,36 @@ static void iwl_ini_get_rxf_data(struct iwl_fw_runtime *fwrt,
 	}
 
 	fifo_idx = ffs(fid2) - 1;
-	if (fid2 && !WARN_ON_ONCE(fifo_idx != 0)) {
-		data->size = fwrt->smem_cfg.rxfifo2_size;
-		data->offset = RXF_DIFF_FROM_PREV;
+	if (fid2 && !WARN_ON_ONCE(~BIT(fifo_idx) & fid2)) {
+		u8 max_idx;
+
+		if (iwl_fw_lookup_notif_ver(fwrt->fw, SYSTEM_GROUP,
+					    SHARED_MEM_CFG_CMD, 0) <= 3)
+			max_idx = 0;
+		else
+			max_idx = 1;
+
+		if (WARN_ONCE(fifo_idx > max_idx,
+			      "invalid umac fifo idx %d", fifo_idx))
+			return;
+
 		/* use bit 31 to distinguish between umac and lmac rxf while
 		 * parsing the dump
 		 */
 		data->fifo_num = fifo_idx | IWL_RXF_UMAC_BIT;
+
+		switch (fifo_idx) {
+		case 0:
+			data->size = fwrt->smem_cfg.rxfifo2_size;
+			data->offset = iwl_umac_prph(fwrt->trans,
+						     RXF_DIFF_FROM_PREV);
+			break;
+		case 1:
+			data->size = fwrt->smem_cfg.rxfifo2_control_size;
+			data->offset = iwl_umac_prph(fwrt->trans,
+						     RXF2C_DIFF_FROM_PREV);
+			break;
+		}
 		return;
 	}
 }
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/runtime.h b/drivers/net/wireless/intel/iwlwifi/fw/runtime.h
index 9906d9b9bdd5..9629ef94b214 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/runtime.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/runtime.h
@@ -6,7 +6,7 @@
  * GPL LICENSE SUMMARY
  *
  * Copyright(c) 2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2019 Intel Corporation
+ * Copyright (C) 2018-2020 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -27,7 +27,7 @@
  * BSD LICENSE
  *
  * Copyright(c) 2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2019 Intel Corporation
+ * Copyright (C) 2018-2020 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -86,6 +86,7 @@ struct iwl_fwrt_shared_mem_cfg {
 		u32 rxfifo1_size;
 	} lmac[MAX_NUM_LMAC];
 	u32 rxfifo2_size;
+	u32 rxfifo2_control_size;
 	u32 internal_txfifo_addr;
 	u32 internal_txfifo_size[TX_FIFO_INTERNAL_MAX_NUM];
 };
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/smem.c b/drivers/net/wireless/intel/iwlwifi/fw/smem.c
index 409b2dd854ac..700fdab14209 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/smem.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/smem.c
@@ -5,10 +5,9 @@
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2012 - 2014, 2018 - 2020 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -28,10 +27,9 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2012 - 2014, 2018 - 2020 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -71,6 +69,8 @@ static void iwl_parse_shared_mem_22000(struct iwl_fw_runtime *fwrt,
 	struct iwl_shared_mem_cfg *mem_cfg = (void *)pkt->data;
 	int i, lmac;
 	int lmac_num = le32_to_cpu(mem_cfg->lmac_num);
+	u8 api_ver = iwl_fw_lookup_notif_ver(fwrt->fw, SYSTEM_GROUP,
+					     SHARED_MEM_CFG_CMD, 0);
 
 	if (WARN_ON(lmac_num > ARRAY_SIZE(mem_cfg->lmac_smem)))
 		return;
@@ -80,6 +80,12 @@ static void iwl_parse_shared_mem_22000(struct iwl_fw_runtime *fwrt,
 		ARRAY_SIZE(mem_cfg->lmac_smem[0].txfifo_size);
 	fwrt->smem_cfg.rxfifo2_size = le32_to_cpu(mem_cfg->rxfifo2_size);
 
+	if (api_ver >= 4 &&
+	    !WARN_ON_ONCE(iwl_rx_packet_payload_len(pkt) < sizeof(*mem_cfg))) {
+		fwrt->smem_cfg.rxfifo2_control_size =
+			le32_to_cpu(mem_cfg->rxfifo2_control_size);
+	}
+
 	for (lmac = 0; lmac < lmac_num; lmac++) {
 		struct iwl_shared_mem_lmac_cfg *lmac_cfg =
 			&mem_cfg->lmac_smem[lmac];
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-prph.h b/drivers/net/wireless/intel/iwlwifi/iwl-prph.h
index 1136d9784f9d..8e254c0eda13 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-prph.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-prph.h
@@ -5,10 +5,9 @@
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved.
- * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016        Intel Deutschland GmbH
- * Copyright (C) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2005 - 2014, 2018 - 2020 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -28,10 +27,9 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved.
- * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
+ * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016        Intel Deutschland GmbH
- * Copyright (C) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2005 - 2014, 2018 - 2020 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -326,6 +324,7 @@
 #define RXF_SIZE_BYTE_CND_POS		(7)
 #define RXF_SIZE_BYTE_CNT_MSK		(0x3ff << RXF_SIZE_BYTE_CND_POS)
 #define RXF_DIFF_FROM_PREV		(0x200)
+#define RXF2C_DIFF_FROM_PREV		(0x4e00)
 
 #define RXF_LD_FENCE_OFFSET_ADDR	(0xa00c10)
 #define RXF_FIFO_RD_FENCE_ADDR		(0xa00c0c)
-- 
cgit v1.2.3-59-g8ed1b


From cc9b6012d34b8cb130d4269a79032b75a84bf46e Mon Sep 17 00:00:00 2001
From: Mordechay Goodstein <mordechay.goodstein@intel.com>
Date: Fri, 24 Apr 2020 18:48:09 +0300
Subject: iwlwifi: yoyo: use hweight_long instead of bit manipulating

Also we can only have one fid1 or fid2 set so no need to check
if the fid2 is set in case fid1 wasn't set.

Signed-off-by: Mordechay Goodstein <mordechay.goodstein@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200424182644.34e74106bad3.Ic3a9f0a35fed47b02ebcd27c2dc2b50cb1e56bdf@changeid
---
 drivers/net/wireless/intel/iwlwifi/fw/dbg.c | 25 +++++++++++++------------
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
index 914f0eb07d52..37c8b6cc2ec7 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
@@ -1367,28 +1367,30 @@ static void iwl_ini_get_rxf_data(struct iwl_fw_runtime *fwrt,
 	struct iwl_fw_ini_region_tlv *reg = (void *)reg_data->reg_tlv->data;
 	u32 fid1 = le32_to_cpu(reg->fifos.fid[0]);
 	u32 fid2 = le32_to_cpu(reg->fifos.fid[1]);
-	u32 fifo_idx;
+	u8 fifo_idx;
 
 	if (!data)
 		return;
 
+	/* make sure only one bit is set in only one fid */
+	if (WARN_ONCE(hweight_long(fid1) + hweight_long(fid2) != 1,
+		      "fid1=%x, fid2=%x\n", fid1, fid2))
+		return;
+
 	memset(data, 0, sizeof(*data));
 
-	if (WARN_ON_ONCE((fid1 && fid2) || (!fid1 && !fid2)))
-		return;
+	if (fid1) {
+		fifo_idx = ffs(fid1) - 1;
+		if (WARN_ONCE(fifo_idx >= MAX_NUM_LMAC, "fifo_idx=%d\n",
+			      fifo_idx))
+			return;
 
-	fifo_idx = ffs(fid1) - 1;
-	if (fid1 && !WARN_ON_ONCE((~BIT(fifo_idx) & fid1) ||
-				  fifo_idx >= MAX_NUM_LMAC)) {
 		data->size = fwrt->smem_cfg.lmac[fifo_idx].rxfifo1_size;
 		data->fifo_num = fifo_idx;
-		return;
-	}
-
-	fifo_idx = ffs(fid2) - 1;
-	if (fid2 && !WARN_ON_ONCE(~BIT(fifo_idx) & fid2)) {
+	} else {
 		u8 max_idx;
 
+		fifo_idx = ffs(fid2) - 1;
 		if (iwl_fw_lookup_notif_ver(fwrt->fw, SYSTEM_GROUP,
 					    SHARED_MEM_CFG_CMD, 0) <= 3)
 			max_idx = 0;
@@ -1416,7 +1418,6 @@ static void iwl_ini_get_rxf_data(struct iwl_fw_runtime *fwrt,
 						     RXF2C_DIFF_FROM_PREV);
 			break;
 		}
-		return;
 	}
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From f25c418dcad93755cf48537d60a46070112be72c Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Fri, 24 Apr 2020 18:48:10 +0300
Subject: iwlwifi: remove antenna_coupling module parameter

This module parameter should not be mangled by users.
This relates to a very old driver and I doubt people can
really check the antenna coupling in a way that would make
the BT Coexistence work better with a real value.
Drop it.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200424182644.6e566897ce0a.I8395a50c1c39522e542366064bff33a33009ce7b@changeid
---
 drivers/net/wireless/intel/iwlwifi/dvm/dev.h       |  3 +--
 drivers/net/wireless/intel/iwlwifi/dvm/main.c      |  9 +--------
 drivers/net/wireless/intel/iwlwifi/dvm/rs.c        | 11 +----------
 drivers/net/wireless/intel/iwlwifi/iwl-drv.c       | 11 ++---------
 drivers/net/wireless/intel/iwlwifi/iwl-modparams.h |  8 ++------
 5 files changed, 7 insertions(+), 35 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/dev.h b/drivers/net/wireless/intel/iwlwifi/dvm/dev.h
index 8d8380026180..4bd792c06ff6 100644
--- a/drivers/net/wireless/intel/iwlwifi/dvm/dev.h
+++ b/drivers/net/wireless/intel/iwlwifi/dvm/dev.h
@@ -1,7 +1,7 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
 /******************************************************************************
  *
- * Copyright(c) 2003 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2003 - 2014, 2020 Intel Corporation. All rights reserved.
  *
  * Contact Information:
  *  Intel Linux Wireless <linuxwifi@intel.com>
@@ -810,7 +810,6 @@ struct iwl_priv {
 	u8 bt_traffic_load, last_bt_traffic_load;
 	bool bt_ch_announce;
 	bool bt_full_concurrent;
-	bool bt_ant_couple_ok;
 	__le32 kill_ack_mask;
 	__le32 kill_cts_mask;
 	__le16 bt_valid;
diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/main.c b/drivers/net/wireless/intel/iwlwifi/dvm/main.c
index 598ee7315558..99822744122f 100644
--- a/drivers/net/wireless/intel/iwlwifi/dvm/main.c
+++ b/drivers/net/wireless/intel/iwlwifi/dvm/main.c
@@ -1,9 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /******************************************************************************
  *
- * Copyright(c) 2003 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2003 - 2014, 2018 - 2020  Intel Corporation. All rights reserved.
  * Copyright(c) 2015 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2019 Intel Corporation
  *
  * Portions of this file are derived from the ipw3945 project, as well
  * as portions of the ieee80211 subsystem header files.
@@ -1370,12 +1369,6 @@ static struct iwl_op_mode *iwl_op_mode_dvm_start(struct iwl_trans *trans,
 
 	IWL_DEBUG_INFO(priv, "*** LOAD DRIVER ***\n");
 
-	/* is antenna coupling more than 35dB ? */
-	priv->bt_ant_couple_ok =
-		(iwlwifi_mod_params.antenna_coupling >
-			IWL_BT_ANTENNA_COUPLING_THRESHOLD) ?
-			true : false;
-
 	/* bt channel inhibition enabled*/
 	priv->bt_ch_announce = true;
 	IWL_DEBUG_INFO(priv, "BT channel inhibition is %s\n",
diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/rs.c b/drivers/net/wireless/intel/iwlwifi/dvm/rs.c
index dac809df7f1d..4fa4eab2d7f3 100644
--- a/drivers/net/wireless/intel/iwlwifi/dvm/rs.c
+++ b/drivers/net/wireless/intel/iwlwifi/dvm/rs.c
@@ -2,6 +2,7 @@
 /******************************************************************************
  *
  * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved.
+ * Copyright (C) 2019 - 2020 Intel Corporation
  *
  * Contact Information:
  *  Intel Linux Wireless <linuxwifi@intel.com>
@@ -846,16 +847,6 @@ static void rs_bt_update_lq(struct iwl_priv *priv, struct iwl_rxon_context *ctx,
 	struct iwl_scale_tbl_info *tbl;
 	bool full_concurrent = priv->bt_full_concurrent;
 
-	if (priv->bt_ant_couple_ok) {
-		/*
-		 * Is there a need to switch between
-		 * full concurrency and 3-wire?
-		 */
-		if (priv->bt_ci_compliance)
-			full_concurrent = true;
-		else
-			full_concurrent = false;
-	}
 	if ((priv->bt_traffic_load != priv->last_bt_traffic_load) ||
 	    (priv->bt_full_concurrent != full_concurrent)) {
 		priv->bt_full_concurrent = full_concurrent;
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
index a483d389d9c2..f3148e70f85c 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
@@ -5,10 +5,9 @@
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2007 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2007 - 2014, 2018 - 2020  Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -28,10 +27,9 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2005 - 2014, 2018 - 2020 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -1824,11 +1822,6 @@ MODULE_PARM_DESC(amsdu_size,
 module_param_named(fw_restart, iwlwifi_mod_params.fw_restart, bool, 0444);
 MODULE_PARM_DESC(fw_restart, "restart firmware in case of error (default true)");
 
-module_param_named(antenna_coupling, iwlwifi_mod_params.antenna_coupling,
-		   int, 0444);
-MODULE_PARM_DESC(antenna_coupling,
-		 "specify antenna coupling in dB (default: 0 dB)");
-
 module_param_named(nvm_file, iwlwifi_mod_params.nvm_file, charp, 0444);
 MODULE_PARM_DESC(nvm_file, "NVM file name");
 
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-modparams.h b/drivers/net/wireless/intel/iwlwifi/iwl-modparams.h
index b094cc1e9be0..e8ce3a300857 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-modparams.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-modparams.h
@@ -5,8 +5,7 @@
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2007 - 2014 Intel Corporation. All rights reserved.
- * Copyright(c) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2007 - 2014, 2018 - 2020 Intel Corporation. All rights reserved.
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -26,8 +25,7 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved.
- * Copyright(c) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2005 - 2014, 2018 - 2020 Intel Corporation. All rights reserved.
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -111,7 +109,6 @@ enum iwl_uapsd_disable {
  * @power_save: enable power save, default = false
  * @power_level: power level, default = 1
  * @debug_level: levels are IWL_DL_*
- * @antenna_coupling: antenna coupling in dB, default = 0
  * @nvm_file: specifies a external NVM file
  * @uapsd_disable: disable U-APSD, see &enum iwl_uapsd_disable, default =
  *	IWL_DISABLE_UAPSD_BSS | IWL_DISABLE_UAPSD_P2P_CLIENT
@@ -131,7 +128,6 @@ struct iwl_mod_params {
 #ifdef CONFIG_IWLWIFI_DEBUG
 	u32 debug_level;
 #endif
-	int antenna_coupling;
 	char *nvm_file;
 	u32 uapsd_disable;
 	bool disable_11ac;
-- 
cgit v1.2.3-59-g8ed1b


From 0c9e025e797e02c35449b3ad08d3317e5fc7d7b8 Mon Sep 17 00:00:00 2001
From: Mordechay Goodstein <mordechay.goodstein@intel.com>
Date: Fri, 24 Apr 2020 18:48:11 +0300
Subject: iwlwifi: yoyo: don't access TLV before verifying len

If we access the TLV memory with shorter len than the struct
we access garbage data that was not given by the user.

On the way rewrite the checker in a cleaner way.

Signed-off-by: Mordechay Goodstein <mordechay.goodstein@intel.com>
Fixes: a9248de42464 ("iwlwifi: dbg_ini: add TLV allocation new API support")
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200424182644.54418c829390.I15d6b462a0e69a280b6c6cfbcb6bcb05bb5f79ee@changeid
---
 .../net/wireless/intel/iwlwifi/fw/api/dbg-tlv.h    |  5 ++-
 drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c   | 44 +++++++++++-----------
 2 files changed, 24 insertions(+), 25 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/dbg-tlv.h b/drivers/net/wireless/intel/iwlwifi/fw/api/dbg-tlv.h
index b9d7ed93311c..74ac65bd545a 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/dbg-tlv.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/dbg-tlv.h
@@ -5,7 +5,7 @@
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright (C) 2018 - 2019 Intel Corporation
+ * Copyright (C) 2018 - 2020 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -25,7 +25,7 @@
  *
  * BSD LICENSE
  *
- * Copyright (C) 2018 - 2019 Intel Corporation
+ * Copyright (C) 2018 - 2020 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -304,6 +304,7 @@ enum iwl_fw_ini_buffer_location {
 	IWL_FW_INI_LOCATION_SRAM_PATH,
 	IWL_FW_INI_LOCATION_DRAM_PATH,
 	IWL_FW_INI_LOCATION_NPK_PATH,
+	IWL_FW_INI_LOCATION_NUM,
 }; /* FW_DEBUG_TLV_BUFFER_LOCATION_E_VER_1 */
 
 /**
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c
index 9eb8fbfaa2a2..7987a288917b 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-dbg-tlv.c
@@ -165,38 +165,36 @@ static int iwl_dbg_tlv_alloc_buf_alloc(struct iwl_trans *trans,
 				       struct iwl_ucode_tlv *tlv)
 {
 	struct iwl_fw_ini_allocation_tlv *alloc = (void *)tlv->data;
-	u32 buf_location = le32_to_cpu(alloc->buf_location);
-	u32 alloc_id = le32_to_cpu(alloc->alloc_id);
+	u32 buf_location;
+	u32 alloc_id;
 
-	if (le32_to_cpu(tlv->length) != sizeof(*alloc) ||
-	    (buf_location != IWL_FW_INI_LOCATION_SRAM_PATH &&
-	     buf_location != IWL_FW_INI_LOCATION_DRAM_PATH &&
-	     buf_location != IWL_FW_INI_LOCATION_NPK_PATH)) {
-		IWL_ERR(trans,
-			"WRT: Invalid allocation TLV\n");
+	if (le32_to_cpu(tlv->length) != sizeof(*alloc))
 		return -EINVAL;
-	}
 
-	if ((buf_location == IWL_FW_INI_LOCATION_SRAM_PATH ||
-	     buf_location == IWL_FW_INI_LOCATION_NPK_PATH) &&
-	     alloc_id != IWL_FW_INI_ALLOCATION_ID_DBGC1) {
-		IWL_ERR(trans,
-			"WRT: Allocation TLV for SMEM/NPK path must have id %u (current: %u)\n",
-			IWL_FW_INI_ALLOCATION_ID_DBGC1, alloc_id);
-		return -EINVAL;
-	}
+	buf_location = le32_to_cpu(alloc->buf_location);
+	alloc_id = le32_to_cpu(alloc->alloc_id);
+
+	if (buf_location == IWL_FW_INI_LOCATION_INVALID ||
+	    buf_location >= IWL_FW_INI_LOCATION_NUM)
+		goto err;
 
 	if (alloc_id == IWL_FW_INI_ALLOCATION_INVALID ||
-	    alloc_id >= IWL_FW_INI_ALLOCATION_NUM) {
-		IWL_ERR(trans,
-			"WRT: Invalid allocation id %u for allocation TLV\n",
-			alloc_id);
-		return -EINVAL;
-	}
+	    alloc_id >= IWL_FW_INI_ALLOCATION_NUM)
+		goto err;
+
+	if ((buf_location == IWL_FW_INI_LOCATION_SRAM_PATH ||
+	     buf_location == IWL_FW_INI_LOCATION_NPK_PATH) &&
+	     alloc_id != IWL_FW_INI_ALLOCATION_ID_DBGC1)
+		goto err;
 
 	trans->dbg.fw_mon_cfg[alloc_id] = *alloc;
 
 	return 0;
+err:
+	IWL_ERR(trans,
+		"WRT: Invalid allocation id %u and/or location id %u for allocation TLV\n",
+		alloc_id, buf_location);
+	return -EINVAL;
 }
 
 static int iwl_dbg_tlv_alloc_hcmd(struct iwl_trans *trans,
-- 
cgit v1.2.3-59-g8ed1b


From f092e4e35b34fba55f2ad5b8d34d342755afa49a Mon Sep 17 00:00:00 2001
From: Avraham Stern <avraham.stern@intel.com>
Date: Fri, 24 Apr 2020 18:48:12 +0300
Subject: iwlwifi: mvm: add support for range request command version 9

This version adds support for per responder calibrations.
Currently the driver will use a single value for all responders
and bandwidths.

Signed-off-by: Avraham Stern <avraham.stern@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200424182644.5ce74a87009c.I9079332b21eef490bbdbf8d7d66e35d7d0c7882b@changeid
---
 .../net/wireless/intel/iwlwifi/fw/api/location.h   | 110 +++++++++++++++++++--
 .../net/wireless/intel/iwlwifi/mvm/ftm-initiator.c |  67 ++++++++++---
 2 files changed, 155 insertions(+), 22 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/location.h b/drivers/net/wireless/intel/iwlwifi/fw/api/location.h
index 0214e553d5ae..7ffad19d80fd 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/location.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/location.h
@@ -6,8 +6,7 @@
  * GPL LICENSE SUMMARY
  *
  * Copyright(c) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018 Intel Corporation
- * Copyright (C) 2019 Intel Corporation
+ * Copyright (C) 2018 - 2020 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -28,8 +27,7 @@
  * BSD LICENSE
  *
  * Copyright(c) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018 Intel Corporation
- * Copyright (C) 2019 Intel Corporation
+ * Copyright (C) 2018 - 2020 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -147,6 +145,7 @@ struct iwl_tof_config_cmd {
  * @IWL_TOF_BW_40: 40 MHz
  * @IWL_TOF_BW_80: 80 MHz
  * @IWL_TOF_BW_160: 160 MHz
+ * @IWL_TOF_BW_NUM: number of tof bandwidths
  */
 enum iwl_tof_bandwidth {
 	IWL_TOF_BW_20_LEGACY,
@@ -154,6 +153,7 @@ enum iwl_tof_bandwidth {
 	IWL_TOF_BW_40,
 	IWL_TOF_BW_80,
 	IWL_TOF_BW_160,
+	IWL_TOF_BW_NUM,
 }; /* LOCAT_BW_TYPE_E */
 
 /*
@@ -430,6 +430,9 @@ struct iwl_tof_range_req_ap_entry_v2 {
  * @IWL_INITIATOR_AP_FLAGS_NON_TB: Use non trigger based flow
  * @IWL_INITIATOR_AP_FLAGS_TB: Use trigger based flow
  * @IWL_INITIATOR_AP_FLAGS_SECURED: request secured measurement
+ * @IWL_INITIATOR_AP_FLAGS_LMR_FEEDBACK: Send LMR feedback
+ * @IWL_INITIATOR_AP_FLAGS_USE_CALIB: Use calibration values from the request
+ *      instead of fw internal values.
  */
 enum iwl_initiator_ap_flags {
 	IWL_INITIATOR_AP_FLAGS_ASAP = BIT(1),
@@ -442,6 +445,8 @@ enum iwl_initiator_ap_flags {
 	IWL_INITIATOR_AP_FLAGS_NON_TB = BIT(9),
 	IWL_INITIATOR_AP_FLAGS_TB = BIT(10),
 	IWL_INITIATOR_AP_FLAGS_SECURED = BIT(11),
+	IWL_INITIATOR_AP_FLAGS_LMR_FEEDBACK = BIT(12),
+	IWL_INITIATOR_AP_FLAGS_USE_CALIB = BIT(13),
 };
 
 /**
@@ -508,7 +513,7 @@ enum iwl_location_bw {
 #define LOCATION_BW_POS	4
 
 /**
- * struct iwl_tof_range_req_ap_entry - AP configuration parameters
+ * struct iwl_tof_range_req_ap_entry_v4 - AP configuration parameters
  * @initiator_ap_flags: see &enum iwl_initiator_ap_flags.
  * @channel_num: AP Channel number
  * @format_bw: bits 0 - 3: &enum iwl_location_frame_format.
@@ -527,7 +532,7 @@ enum iwl_location_bw {
  * @hltk: HLTK to be used for secured 11az measurement
  * @tk: TK to be used for secured 11az measurement
  */
-struct iwl_tof_range_req_ap_entry {
+struct iwl_tof_range_req_ap_entry_v4 {
 	__le32 initiator_ap_flags;
 	u8 channel_num;
 	u8 format_bw;
@@ -542,6 +547,65 @@ struct iwl_tof_range_req_ap_entry {
 	u8 tk[TK_11AZ_LEN];
 } __packed; /* LOCATION_RANGE_REQ_AP_ENTRY_CMD_API_S_VER_4 */
 
+/**
+ * enum iwl_location_cipher - location cipher selection
+ * @IWL_LOCATION_CIPHER_CCMP_128: CCMP 128
+ * @IWL_LOCATION_CIPHER_CCMP_256: CCMP 256
+ * @IWL_LOCATION_CIPHER_GCMP_128: GCMP 128
+ * @IWL_LOCATION_CIPHER_GCMP_256: GCMP 256
+ */
+enum iwl_location_cipher {
+	IWL_LOCATION_CIPHER_CCMP_128,
+	IWL_LOCATION_CIPHER_CCMP_256,
+	IWL_LOCATION_CIPHER_GCMP_128,
+	IWL_LOCATION_CIPHER_GCMP_256,
+};
+
+/**
+ * struct iwl_tof_range_req_ap_entry - AP configuration parameters
+ * @initiator_ap_flags: see &enum iwl_initiator_ap_flags.
+ * @channel_num: AP Channel number
+ * @format_bw: bits 0 - 3: &enum iwl_location_frame_format.
+ *             bits 4 - 7: &enum iwl_location_bw.
+ * @ctrl_ch_position: Coding of the control channel position relative to the
+ *	center frequency, see iwl_mvm_get_ctrl_pos().
+ * @ftmr_max_retries: Max number of retries to send the FTMR in case of no
+ *	reply from the AP.
+ * @bssid: AP's BSSID
+ * @burst_period: Recommended value to be sent to the AP. Measurement
+ *	periodicity In units of 100ms. ignored if num_of_bursts_exp = 0
+ * @samples_per_burst: the number of FTMs pairs in single Burst (1-31);
+ * @num_of_bursts: Recommended value to be sent to the AP. 2s Exponent of
+ *	the number of measurement iterations (min 2^0 = 1, max 2^14)
+ * @reserved: For alignment and future use
+ * @cipher: pairwise cipher suite for secured measurement.
+ *          &enum iwl_location_cipher.
+ * @hltk: HLTK to be used for secured 11az measurement
+ * @tk: TK to be used for secured 11az measurement
+ * @calib: An array of calibration values per FTM rx bandwidth.
+ *         If &IWL_INITIATOR_AP_FLAGS_USE_CALIB is set, the fw will use the
+ *         calibration value that corresponds to the rx bandwidth of the FTM
+ *         frame.
+ * @reserved2: For alignment and future use.
+ */
+struct iwl_tof_range_req_ap_entry {
+	__le32 initiator_ap_flags;
+	u8 channel_num;
+	u8 format_bw;
+	u8 ctrl_ch_position;
+	u8 ftmr_max_retries;
+	u8 bssid[ETH_ALEN];
+	__le16 burst_period;
+	u8 samples_per_burst;
+	u8 num_of_bursts;
+	u8 reserved;
+	u8 cipher;
+	u8 hltk[HLTK_11AZ_LEN];
+	u8 tk[TK_11AZ_LEN];
+	__le16 calib[IWL_TOF_BW_NUM];
+	__le16 reserved2;
+} __packed; /* LOCATION_RANGE_REQ_AP_ENTRY_CMD_API_S_VER_5 */
+
 /**
  * enum iwl_tof_response_mode
  * @IWL_MVM_TOF_RESPONSE_ASAP: report each AP measurement separately as soon as
@@ -676,7 +740,7 @@ struct iwl_tof_range_req_cmd_v7 {
 } __packed; /* LOCATION_RANGE_REQ_CMD_API_S_VER_7 */
 
 /**
- * struct iwl_tof_range_req_cmd - start measurement cmd
+ * struct iwl_tof_range_req_cmd_v8 - start measurement cmd
  * @initiator_flags: see flags @ iwl_tof_initiator_flags
  * @request_id: A Token incremented per request. The same Token will be
  *		sent back in the range response
@@ -693,7 +757,7 @@ struct iwl_tof_range_req_cmd_v7 {
  * @specific_calib: The specific calib value to inject to this measurement calc
  * @ap: per-AP request data, see &struct iwl_tof_range_req_ap_entry_v2.
  */
-struct iwl_tof_range_req_cmd {
+struct iwl_tof_range_req_cmd_v8 {
 	__le32 initiator_flags;
 	u8 request_id;
 	u8 num_of_ap;
@@ -704,9 +768,37 @@ struct iwl_tof_range_req_cmd {
 	__le32 tsf_mac_id;
 	__le16 common_calib;
 	__le16 specific_calib;
-	struct iwl_tof_range_req_ap_entry ap[IWL_MVM_TOF_MAX_APS];
+	struct iwl_tof_range_req_ap_entry_v4 ap[IWL_MVM_TOF_MAX_APS];
 } __packed; /* LOCATION_RANGE_REQ_CMD_API_S_VER_8 */
 
+/**
+ * struct iwl_tof_range_req_cmd - start measurement cmd
+ * @initiator_flags: see flags @ iwl_tof_initiator_flags
+ * @request_id: A Token incremented per request. The same Token will be
+ *		sent back in the range response
+ * @num_of_ap: Number of APs to measure (error if > IWL_MVM_TOF_MAX_APS)
+ * @range_req_bssid: ranging request BSSID
+ * @macaddr_mask: Bits set to 0 shall be copied from the MAC address template.
+ *		  Bits set to 1 shall be randomized by the UMAC
+ * @macaddr_template: MAC address template to use for non-randomized bits
+ * @req_timeout_ms: Requested timeout of the response in units of milliseconds.
+ *	This is the session time for completing the measurement.
+ * @tsf_mac_id: report the measurement start time for each ap in terms of the
+ *	TSF of this mac id. 0xff to disable TSF reporting.
+ * @ap: per-AP request data, see &struct iwl_tof_range_req_ap_entry_v2.
+ */
+struct iwl_tof_range_req_cmd {
+	__le32 initiator_flags;
+	u8 request_id;
+	u8 num_of_ap;
+	u8 range_req_bssid[ETH_ALEN];
+	u8 macaddr_mask[ETH_ALEN];
+	u8 macaddr_template[ETH_ALEN];
+	__le32 req_timeout_ms;
+	__le32 tsf_mac_id;
+	struct iwl_tof_range_req_ap_entry ap[IWL_MVM_TOF_MAX_APS];
+} __packed; /* LOCATION_RANGE_REQ_CMD_API_S_VER_9 */
+
 /*
  * enum iwl_tof_range_request_status - status of the sent request
  * @IWL_TOF_RANGE_REQUEST_STATUS_SUCCESSFUL - FW successfully received the
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
index cdb87139100d..aaa7dd1788b1 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
@@ -164,9 +164,10 @@ static void iwl_mvm_ftm_cmd_v5(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 		eth_broadcast_addr(cmd->range_req_bssid);
 }
 
-static void iwl_mvm_ftm_cmd(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
-			    struct iwl_tof_range_req_cmd *cmd,
-			    struct cfg80211_pmsr_request *req)
+static void iwl_mvm_ftm_cmd_common(struct iwl_mvm *mvm,
+				   struct ieee80211_vif *vif,
+				   struct iwl_tof_range_req_cmd *cmd,
+				   struct cfg80211_pmsr_request *req)
 {
 	int i;
 
@@ -210,6 +211,13 @@ static void iwl_mvm_ftm_cmd(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 	cmd->tsf_mac_id = cpu_to_le32(0xff);
 }
 
+static void iwl_mvm_ftm_cmd_v8(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
+			       struct iwl_tof_range_req_cmd_v8 *cmd,
+			       struct cfg80211_pmsr_request *req)
+{
+	iwl_mvm_ftm_cmd_common(mvm, vif, (void *)cmd, req);
+}
+
 static int
 iwl_mvm_ftm_target_chandef_v1(struct iwl_mvm *mvm,
 			      struct cfg80211_pmsr_request_peer *peer,
@@ -382,9 +390,10 @@ iwl_mvm_ftm_put_target_v3(struct iwl_mvm *mvm,
 	return 0;
 }
 
-static int iwl_mvm_ftm_put_target_v4(struct iwl_mvm *mvm,
-				     struct cfg80211_pmsr_request_peer *peer,
-				     struct iwl_tof_range_req_ap_entry *target)
+static int
+iwl_mvm_ftm_put_target(struct iwl_mvm *mvm,
+		       struct cfg80211_pmsr_request_peer *peer,
+		       struct iwl_tof_range_req_ap_entry_v4 *target)
 {
 	int ret;
 
@@ -394,7 +403,7 @@ static int iwl_mvm_ftm_put_target_v4(struct iwl_mvm *mvm,
 	if (ret)
 		return ret;
 
-	iwl_mvm_ftm_put_target_common(mvm, peer, target);
+	iwl_mvm_ftm_put_target_common(mvm, peer, (void *)target);
 
 	return 0;
 }
@@ -456,7 +465,7 @@ static int iwl_mvm_ftm_start_v7(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 	 * Versions 7 and 8 has the same structure except from the responders
 	 * list, so iwl_mvm_ftm_cmd() can be used for version 7 too.
 	 */
-	iwl_mvm_ftm_cmd(mvm, vif, (void *)&cmd_v7, req);
+	iwl_mvm_ftm_cmd_v8(mvm, vif, (void *)&cmd_v7, req);
 
 	for (i = 0; i < cmd_v7.num_of_ap; i++) {
 		struct cfg80211_pmsr_request_peer *peer = &req->peers[i];
@@ -471,6 +480,32 @@ static int iwl_mvm_ftm_start_v7(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 
 static int iwl_mvm_ftm_start_v8(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 				struct cfg80211_pmsr_request *req)
+{
+	struct iwl_tof_range_req_cmd_v8 cmd;
+	struct iwl_host_cmd hcmd = {
+		.id = iwl_cmd_id(TOF_RANGE_REQ_CMD, LOCATION_GROUP, 0),
+		.dataflags[0] = IWL_HCMD_DFL_DUP,
+		.data[0] = &cmd,
+		.len[0] = sizeof(cmd),
+	};
+	u8 i;
+	int err;
+
+	iwl_mvm_ftm_cmd_v8(mvm, vif, (void *)&cmd, req);
+
+	for (i = 0; i < cmd.num_of_ap; i++) {
+		struct cfg80211_pmsr_request_peer *peer = &req->peers[i];
+
+		err = iwl_mvm_ftm_put_target(mvm, peer, &cmd.ap[i]);
+		if (err)
+			return err;
+	}
+
+	return iwl_mvm_ftm_send_cmd(mvm, &hcmd);
+}
+
+static int iwl_mvm_ftm_start_v9(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
+				struct cfg80211_pmsr_request *req)
 {
 	struct iwl_tof_range_req_cmd cmd;
 	struct iwl_host_cmd hcmd = {
@@ -482,12 +517,12 @@ static int iwl_mvm_ftm_start_v8(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 	u8 i;
 	int err;
 
-	iwl_mvm_ftm_cmd(mvm, vif, &cmd, req);
+	iwl_mvm_ftm_cmd_common(mvm, vif, &cmd, req);
 
 	for (i = 0; i < cmd.num_of_ap; i++) {
 		struct cfg80211_pmsr_request_peer *peer = &req->peers[i];
 
-		err = iwl_mvm_ftm_put_target_v4(mvm, peer, &cmd.ap[i]);
+		err = iwl_mvm_ftm_put_target(mvm, peer, (void *)&cmd.ap[i]);
 		if (err)
 			return err;
 	}
@@ -511,11 +546,17 @@ int iwl_mvm_ftm_start(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 		u8 cmd_ver = iwl_fw_lookup_cmd_ver(mvm->fw, LOCATION_GROUP,
 						   TOF_RANGE_REQ_CMD);
 
-		if (cmd_ver == 8)
+		switch (cmd_ver) {
+		case 9:
+			err = iwl_mvm_ftm_start_v9(mvm, vif, req);
+			break;
+		case 8:
 			err = iwl_mvm_ftm_start_v8(mvm, vif, req);
-		else
+			break;
+		default:
 			err = iwl_mvm_ftm_start_v7(mvm, vif, req);
-
+			break;
+		}
 	} else {
 		err = iwl_mvm_ftm_start_v5(mvm, vif, req);
 	}
-- 
cgit v1.2.3-59-g8ed1b


From f4bfdc5e571ef5107112a7f1daa6a9c572e4a798 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Fri, 24 Apr 2020 18:48:13 +0300
Subject: iwlwifi: mvm: stop supporting swcrypto and bt_coex_active module
 parameters

Leave them active for iwldvm. We do not test this configuration
and there is no reason nowadays to allow this.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200424182644.674a325b008b.Ifc925ca84500fb76c7b6f926a24a34ca777b4192@changeid
---
 drivers/net/wireless/intel/iwlwifi/mvm/coex.c     |  7 ++--
 drivers/net/wireless/intel/iwlwifi/mvm/d3.c       | 31 +++++++----------
 drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 42 ++++++++++-------------
 3 files changed, 33 insertions(+), 47 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/coex.c b/drivers/net/wireless/intel/iwlwifi/mvm/coex.c
index 3d2abbc5c76c..5ae22cd7ecdb 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/coex.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/coex.c
@@ -5,7 +5,7 @@
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2013 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 - 2014, 2018 - 2020 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  *
  * This program is free software; you can redistribute it and/or modify
@@ -26,7 +26,7 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2013 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2013 - 2014, 2018 - 2020 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * All rights reserved.
  *
@@ -216,8 +216,7 @@ int iwl_mvm_send_bt_init_conf(struct iwl_mvm *mvm)
 		goto send_cmd;
 	}
 
-	mode = iwlwifi_mod_params.bt_coex_active ? BT_COEX_NW : BT_COEX_DISABLE;
-	bt_cmd.mode = cpu_to_le32(mode);
+	bt_cmd.mode = cpu_to_le32(BT_COEX_NW);
 
 	if (IWL_MVM_BT_COEX_SYNC2SCO)
 		bt_cmd.enabled_modules |=
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
index 222775714859..89096bcb053e 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
@@ -5,10 +5,9 @@
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2012 - 2014, 2018 - 2020 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -28,10 +27,9 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2012 - 2014, 2018 - 2020 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -80,9 +78,6 @@ void iwl_mvm_set_rekey_data(struct ieee80211_hw *hw,
 	struct iwl_mvm *mvm = IWL_MAC80211_GET_MVM(hw);
 	struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
 
-	if (iwlwifi_mod_params.swcrypto)
-		return;
-
 	mutex_lock(&mvm->mutex);
 
 	memcpy(mvmvif->rekey_data.kek, data->kek, NL80211_KEK_LEN);
@@ -843,18 +838,16 @@ iwl_mvm_wowlan_config(struct iwl_mvm *mvm,
 			return ret;
 	}
 
-	if (!iwlwifi_mod_params.swcrypto) {
-		/*
-		 * This needs to be unlocked due to lock ordering
-		 * constraints. Since we're in the suspend path
-		 * that isn't really a problem though.
-		 */
-		mutex_unlock(&mvm->mutex);
-		ret = iwl_mvm_wowlan_config_key_params(mvm, vif, CMD_ASYNC);
-		mutex_lock(&mvm->mutex);
-		if (ret)
-			return ret;
-	}
+	/*
+	 * This needs to be unlocked due to lock ordering
+	 * constraints. Since we're in the suspend path
+	 * that isn't really a problem though.
+	 */
+	mutex_unlock(&mvm->mutex);
+	ret = iwl_mvm_wowlan_config_key_params(mvm, vif, CMD_ASYNC);
+	mutex_lock(&mvm->mutex);
+	if (ret)
+		return ret;
 
 	ret = iwl_mvm_send_cmd_pdu(mvm, WOWLAN_CONFIGURATION, 0,
 				   sizeof(*wowlan_config_cmd),
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index 853ba7b8bf3f..ee3d2ff432f7 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -475,23 +475,23 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm)
 		hw->wiphy->n_cipher_suites++;
 	}
 
-	/* Enable 11w if software crypto is not enabled (as the
-	 * firmware will interpret some mgmt packets, so enabling it
-	 * with software crypto isn't safe).
-	 */
-	if (!iwlwifi_mod_params.swcrypto) {
-		ieee80211_hw_set(hw, MFP_CAPABLE);
+	if (iwlwifi_mod_params.swcrypto)
+		IWL_ERR(mvm,
+			"iwlmvm doesn't allow to disable HW crypto, check swcrypto module parameter\n");
+	if (!iwlwifi_mod_params.bt_coex_active)
+		IWL_ERR(mvm,
+			"iwlmvm doesn't allow to disable BT Coex, check bt_coex_active module parameter\n");
+
+	ieee80211_hw_set(hw, MFP_CAPABLE);
+	mvm->ciphers[hw->wiphy->n_cipher_suites] = WLAN_CIPHER_SUITE_AES_CMAC;
+	hw->wiphy->n_cipher_suites++;
+	if (iwl_mvm_has_new_rx_api(mvm)) {
 		mvm->ciphers[hw->wiphy->n_cipher_suites] =
-			WLAN_CIPHER_SUITE_AES_CMAC;
+			WLAN_CIPHER_SUITE_BIP_GMAC_128;
+		hw->wiphy->n_cipher_suites++;
+		mvm->ciphers[hw->wiphy->n_cipher_suites] =
+			WLAN_CIPHER_SUITE_BIP_GMAC_256;
 		hw->wiphy->n_cipher_suites++;
-		if (iwl_mvm_has_new_rx_api(mvm)) {
-			mvm->ciphers[hw->wiphy->n_cipher_suites] =
-				WLAN_CIPHER_SUITE_BIP_GMAC_128;
-			hw->wiphy->n_cipher_suites++;
-			mvm->ciphers[hw->wiphy->n_cipher_suites] =
-				WLAN_CIPHER_SUITE_BIP_GMAC_256;
-			hw->wiphy->n_cipher_suites++;
-		}
 	}
 
 	/* currently FW API supports only one optional cipher scheme */
@@ -697,10 +697,9 @@ int iwl_mvm_mac_setup_register(struct iwl_mvm *mvm)
 				     WIPHY_WOWLAN_EAP_IDENTITY_REQ |
 				     WIPHY_WOWLAN_RFKILL_RELEASE |
 				     WIPHY_WOWLAN_NET_DETECT;
-		if (!iwlwifi_mod_params.swcrypto)
-			mvm->wowlan.flags |= WIPHY_WOWLAN_SUPPORTS_GTK_REKEY |
-					     WIPHY_WOWLAN_GTK_REKEY_FAILURE |
-					     WIPHY_WOWLAN_4WAY_HANDSHAKE;
+		mvm->wowlan.flags |= WIPHY_WOWLAN_SUPPORTS_GTK_REKEY |
+				     WIPHY_WOWLAN_GTK_REKEY_FAILURE |
+				     WIPHY_WOWLAN_4WAY_HANDSHAKE;
 
 		mvm->wowlan.n_patterns = IWL_WOWLAN_MAX_PATTERNS;
 		mvm->wowlan.pattern_min_len = IWL_WOWLAN_MIN_PATTERN_LEN;
@@ -3366,11 +3365,6 @@ static int __iwl_mvm_mac_set_key(struct ieee80211_hw *hw,
 	int ret, i;
 	u8 key_offset;
 
-	if (iwlwifi_mod_params.swcrypto) {
-		IWL_DEBUG_MAC80211(mvm, "leave - hwcrypto disabled\n");
-		return -EOPNOTSUPP;
-	}
-
 	switch (key->cipher) {
 	case WLAN_CIPHER_SUITE_TKIP:
 		if (!mvm->trans->trans_cfg->gen2) {
-- 
cgit v1.2.3-59-g8ed1b


From 9617040ecab4810363ccab81aac1de3725675c9b Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Fri, 24 Apr 2020 18:48:14 +0300
Subject: iwlwifi: mvm: remove iwlmvm's tfd_q_hang_detect module parameter

This should be controlled by the firmware debugging mechanism
and not by a module parameter. This has always been true.
Remove it and assume it is set.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200424182644.b6e4982e62ae.I7f7352f79c40ada2f221bd4b41449a40821e833f@changeid
---
 drivers/net/wireless/intel/iwlwifi/mvm/mvm.h   |  8 ++------
 drivers/net/wireless/intel/iwlwifi/mvm/ops.c   | 11 ++---------
 drivers/net/wireless/intel/iwlwifi/mvm/sta.c   | 16 ++++++----------
 drivers/net/wireless/intel/iwlwifi/mvm/utils.c |  9 +++------
 4 files changed, 13 insertions(+), 31 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
index 9e2a0858108c..e2f7f6ec711e 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mvm.h
@@ -5,10 +5,9 @@
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2012 - 2014, 2018 - 2020 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -28,10 +27,9 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2012 - 2014, 2018 - 2020 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -134,12 +132,10 @@ extern const struct ieee80211_ops iwl_mvm_hw_ops;
  *	We will register to mac80211 to have testmode working. The NIC must not
  *	be up'ed after the INIT fw asserted. This is useful to be able to use
  *	proprietary tools over testmode to debug the INIT fw.
- * @tfd_q_hang_detect: enabled the detection of hung transmit queues
  * @power_scheme: one of enum iwl_power_scheme
  */
 struct iwl_mvm_mod_params {
 	bool init_dbg;
-	bool tfd_q_hang_detect;
 	int power_scheme;
 };
 extern struct iwl_mvm_mod_params iwlmvm_mod_params;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
index d0afc806706d..bde1b9b5face 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
@@ -5,10 +5,9 @@
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2012 - 2014, 2018 - 2020 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -28,10 +27,9 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2012 - 2014, 2018 - 2020 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -94,7 +92,6 @@ static const struct iwl_op_mode_ops iwl_mvm_ops_mq;
 
 struct iwl_mvm_mod_params iwlmvm_mod_params = {
 	.power_scheme = IWL_POWER_SCHEME_BPS,
-	.tfd_q_hang_detect = true
 	/* rest of fields are 0 by default */
 };
 
@@ -104,10 +101,6 @@ MODULE_PARM_DESC(init_dbg,
 module_param_named(power_scheme, iwlmvm_mod_params.power_scheme, int, 0444);
 MODULE_PARM_DESC(power_scheme,
 		 "power management scheme: 1-active, 2-balanced, 3-low power, default: 2");
-module_param_named(tfd_q_hang_detect, iwlmvm_mod_params.tfd_q_hang_detect,
-		   bool, 0444);
-MODULE_PARM_DESC(tfd_q_hang_detect,
-		 "TFD queues hang detection (default: true");
 
 /*
  * module init and exit functions
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
index 56ae72debb96..ca5dcd7643e0 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
@@ -5,10 +5,9 @@
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2012 - 2015 Intel Corporation. All rights reserved.
+ * Copyright(c) 2012 - 2015, 2018 - 2020 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -28,10 +27,9 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2012 - 2015 Intel Corporation. All rights reserved.
+ * Copyright(c) 2012 - 2015, 2018 - 2020 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -1965,9 +1963,8 @@ void iwl_mvm_dealloc_int_sta(struct iwl_mvm *mvm, struct iwl_mvm_int_sta *sta)
 static void iwl_mvm_enable_aux_snif_queue(struct iwl_mvm *mvm, u16 queue,
 					  u8 sta_id, u8 fifo)
 {
-	unsigned int wdg_timeout = iwlmvm_mod_params.tfd_q_hang_detect ?
-		mvm->trans->trans_cfg->base_params->wd_timeout :
-		IWL_WATCHDOG_DISABLED;
+	unsigned int wdg_timeout =
+		mvm->trans->trans_cfg->base_params->wd_timeout;
 	struct iwl_trans_txq_scd_cfg cfg = {
 		.fifo = fifo,
 		.sta_id = sta_id,
@@ -1983,9 +1980,8 @@ static void iwl_mvm_enable_aux_snif_queue(struct iwl_mvm *mvm, u16 queue,
 
 static int iwl_mvm_enable_aux_snif_queue_tvqm(struct iwl_mvm *mvm, u8 sta_id)
 {
-	unsigned int wdg_timeout = iwlmvm_mod_params.tfd_q_hang_detect ?
-		mvm->trans->trans_cfg->base_params->wd_timeout :
-		IWL_WATCHDOG_DISABLED;
+	unsigned int wdg_timeout =
+		mvm->trans->trans_cfg->base_params->wd_timeout;
 
 	WARN_ON(!iwl_mvm_has_new_tx_api(mvm));
 
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
index 6096276cb0d0..ee8f152e7606 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
@@ -5,10 +5,9 @@
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2012 - 2014, 2018 - 2020 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
  * Copyright (C) 2015 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -28,10 +27,9 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2012 - 2014, 2018 - 2020 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
  * Copyright (C) 2015 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -952,8 +950,7 @@ unsigned int iwl_mvm_get_wd_timeout(struct iwl_mvm *mvm,
 				IWL_UCODE_TLV_CAPA_STA_PM_NOTIF) &&
 		    vif && vif->type == NL80211_IFTYPE_AP)
 			return IWL_WATCHDOG_DISABLED;
-		return iwlmvm_mod_params.tfd_q_hang_detect ?
-			default_timeout : IWL_WATCHDOG_DISABLED;
+		return default_timeout;
 	}
 
 	trigger = iwl_fw_dbg_get_trigger(mvm->fw, FW_DBG_TRIGGER_TXQ_TIMERS);
-- 
cgit v1.2.3-59-g8ed1b


From a65a5824298b06049dbaceb8a9bd19709dc9507c Mon Sep 17 00:00:00 2001
From: Mordechay Goodstein <mordechay.goodstein@intel.com>
Date: Fri, 24 Apr 2020 18:48:15 +0300
Subject: iwlwifi: avoid debug max amsdu config overwriting itself

If we set amsdu_len one after another the second one overwrites
the orig_amsdu_len so allow only moving from debug to non debug state.

Also the TLC update check was wrong: it was checking that also the orig
is smaller then the new updated size, which is not the case in debug
amsdu mode.

Signed-off-by: Mordechay Goodstein <mordechay.goodstein@intel.com>
Fixes: af2984e9e625 ("iwlwifi: mvm: add a debugfs entry to set a fixed size AMSDU for all TX packets")
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200424182644.e565446a4fce.I9729d8c520d8b8bb4de9a5cdc62e01eb85168aac@changeid
---
 drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c | 11 +++++++----
 drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c   | 15 ++++++++-------
 2 files changed, 15 insertions(+), 11 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
index 3beef8d077b8..8fae7e707374 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/debugfs.c
@@ -5,10 +5,9 @@
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2012 - 2014, 2018 - 2020 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -28,10 +27,9 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2012 - 2014, 2018 - 2020 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -481,6 +479,11 @@ static ssize_t iwl_dbgfs_amsdu_len_write(struct ieee80211_sta *sta,
 	if (kstrtou16(buf, 0, &amsdu_len))
 		return -EINVAL;
 
+	/* only change from debug set <-> debug unset */
+	if ((amsdu_len && mvmsta->orig_amsdu_len) ||
+	    (!!amsdu_len && mvmsta->orig_amsdu_len))
+		return -EBUSY;
+
 	if (amsdu_len) {
 		mvmsta->orig_amsdu_len = sta->max_amsdu_len;
 		sta->max_amsdu_len = amsdu_len;
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c
index 15d11fb72aca..6f4d241d47e9 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/rs-fw.c
@@ -369,14 +369,15 @@ void iwl_mvm_tlc_update_notif(struct iwl_mvm *mvm,
 		u16 size = le32_to_cpu(notif->amsdu_size);
 		int i;
 
-		/*
-		 * In debug sta->max_amsdu_len < size
-		 * so also check with orig_amsdu_len which holds the original
-		 * data before debugfs changed the value
-		 */
-		if (WARN_ON(sta->max_amsdu_len < size &&
-			    mvmsta->orig_amsdu_len < size))
+		if (sta->max_amsdu_len < size) {
+			/*
+			 * In debug sta->max_amsdu_len < size
+			 * so also check with orig_amsdu_len which holds the
+			 * original data before debugfs changed the value
+			 */
+			WARN_ON(mvmsta->orig_amsdu_len < size);
 			goto out;
+		}
 
 		mvmsta->amsdu_enabled = le32_to_cpu(notif->amsdu_enabled);
 		mvmsta->max_amsdu_len = size;
-- 
cgit v1.2.3-59-g8ed1b


From 73f23d91cfa32c087b7289f2516efa186b2e982c Mon Sep 17 00:00:00 2001
From: Shaul Triebitz <shaul.triebitz@intel.com>
Date: Fri, 24 Apr 2020 18:48:16 +0300
Subject: iwlwifi: mvm: set properly station flags in STA_HE_CTXT_CMD

For ACK_ENABLED and 32BIT_BA_BITMAP flags check the station capabilities
rather than bss_conf.ack_enabled and bss_conf.multi_sta_back_32bit.
These fields are stations capabilities and should not be in bss_conf.
Also note that the bss_conf flags are set in station mode only.
In the next patch I will remove ack_enabled and multi_sta_back_32bit
from the bss_conf structure.

Signed-off-by: Shaul Triebitz <shaul.triebitz@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200424182644.bc7230b74f93.I144f73cd6a797a7060429981fee62572861bc76b@changeid
---
 drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index ee3d2ff432f7..10df77ab1a77 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -2179,6 +2179,15 @@ static void iwl_mvm_cfg_he_sta(struct iwl_mvm *mvm,
 			flags |= STA_CTXT_HE_PACKET_EXT;
 		}
 	}
+
+	if (sta->he_cap.he_cap_elem.mac_cap_info[2] &
+	    IEEE80211_HE_MAC_CAP2_32BIT_BA_BITMAP)
+		flags |= STA_CTXT_HE_32BIT_BA_BITMAP;
+
+	if (sta->he_cap.he_cap_elem.mac_cap_info[2] &
+	    IEEE80211_HE_MAC_CAP2_ACK_EN)
+		flags |= STA_CTXT_HE_ACK_ENABLED;
+
 	rcu_read_unlock();
 
 	/* Mark MU EDCA as enabled, unless none detected on some AC */
@@ -2203,11 +2212,6 @@ static void iwl_mvm_cfg_he_sta(struct iwl_mvm *mvm,
 			cpu_to_le16(mu_edca->mu_edca_timer);
 	}
 
-	if (vif->bss_conf.multi_sta_back_32bit)
-		flags |= STA_CTXT_HE_32BIT_BA_BITMAP;
-
-	if (vif->bss_conf.ack_enabled)
-		flags |= STA_CTXT_HE_ACK_ENABLED;
 
 	if (vif->bss_conf.uora_exists) {
 		flags |= STA_CTXT_HE_TRIG_RND_ALLOC;
-- 
cgit v1.2.3-59-g8ed1b


From 89cb1ddee1a9f26ee2c1a60e768dfd0e8e705b0a Mon Sep 17 00:00:00 2001
From: Mordechay Goodstein <mordechay.goodstein@intel.com>
Date: Fri, 24 Apr 2020 18:48:17 +0300
Subject: iwlwifi: yoyo: add D3 resume timepoint

This timepoint is used for getting a clean log (if needed)
after resume without any commands/interrupts from the driver
to the FW.

Signed-off-by: Mordechay Goodstein <mordechay.goodstein@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200424182644.7cd450812977.Ibc9ddd6eae6af5ce499ac1e4f6c01853577d1e83@changeid
---
 drivers/net/wireless/intel/iwlwifi/mvm/d3.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
index 89096bcb053e..2a94545d737f 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/d3.c
@@ -1986,6 +1986,9 @@ static int __iwl_mvm_resume(struct iwl_mvm *mvm, bool test)
 		goto err;
 	}
 
+	iwl_dbg_tlv_time_point(&mvm->fwrt, IWL_FW_INI_TIME_POINT_HOST_D3_END,
+			       NULL);
+
 	ret = iwl_trans_d3_resume(mvm->trans, &d3_status, test, !unified_image);
 	if (ret)
 		goto err;
-- 
cgit v1.2.3-59-g8ed1b


From 45baf306b0a14bba59c1529add2ba7f8df48082e Mon Sep 17 00:00:00 2001
From: Luca Coelho <luciano.coelho@intel.com>
Date: Fri, 24 Apr 2020 18:48:18 +0300
Subject: iwlwifi: bump FW API to 55 for AX devices

Start supporting API version 55 for AX devices.  We skipped 54 because
it won't be published.

Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200424182644.5b4628e18203.I626d3b19d84214a83cd92dc4ddf291903b0983aa@changeid
---
 drivers/net/wireless/intel/iwlwifi/cfg/22000.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
index 2f741f5e3a7d..2fbe0aa3e548 100644
--- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
+++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
@@ -6,7 +6,7 @@
  * GPL LICENSE SUMMARY
  *
  * Copyright(c) 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2019 Intel Corporation
+ * Copyright (C) 2018-2020 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -20,7 +20,7 @@
  * BSD LICENSE
  *
  * Copyright(c) 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018-2019 Intel Corporation
+ * Copyright (C) 2018-2020 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -57,7 +57,7 @@
 #include "iwl-prph.h"
 
 /* Highest firmware API version supported */
-#define IWL_22000_UCODE_API_MAX	53
+#define IWL_22000_UCODE_API_MAX	55
 
 /* Lowest firmware API version supported */
 #define IWL_22000_UCODE_API_MIN	39
-- 
cgit v1.2.3-59-g8ed1b


From 2abe24f9e15b2ccce471dd1996debdae6c84e64b Mon Sep 17 00:00:00 2001
From: Shahar S Matityahu <shahar.s.matityahu@intel.com>
Date: Fri, 24 Apr 2020 19:46:57 +0300
Subject: iwlwifi: dbg: set debug descriptor to NULL outside of
 iwl_fw_free_dump_desc

To avoid static analysis warning and to make the flow more readable, set
the debug descriptor to NULL outside iwl_fw_free_dump_desc and only in
the required places.

Signed-off-by: Shahar S Matityahu <shahar.s.matityahu@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200424194456.5d5c50750a52.I17e33fc268c2097b7c42877f86cef2aa163b913a@changeid
---
 drivers/net/wireless/intel/iwlwifi/fw/dbg.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
index 37c8b6cc2ec7..4df10f3d99d2 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
@@ -2198,12 +2198,11 @@ static u32 iwl_dump_ini_file_gen(struct iwl_fw_runtime *fwrt,
 }
 
 static inline void iwl_fw_free_dump_desc(struct iwl_fw_runtime *fwrt,
-					 const struct iwl_fw_dump_desc **desc)
+					 const struct iwl_fw_dump_desc *desc)
 {
-	if (desc && *desc != &iwl_dump_desc_assert)
-		kfree(*desc);
+	if (desc && desc != &iwl_dump_desc_assert)
+		kfree(desc);
 
-	*desc = NULL;
 	fwrt->dump.lmac_err_id[0] = 0;
 	if (fwrt->smem_cfg.num_lmacs > 1)
 		fwrt->dump.lmac_err_id[1] = 0;
@@ -2315,7 +2314,7 @@ int iwl_fw_dbg_collect_desc(struct iwl_fw_runtime *fwrt,
 	unsigned long idx;
 
 	if (iwl_trans_dbg_ini_valid(fwrt->trans)) {
-		iwl_fw_free_dump_desc(fwrt, &desc);
+		iwl_fw_free_dump_desc(fwrt, desc);
 		return 0;
 	}
 
@@ -2336,7 +2335,7 @@ int iwl_fw_dbg_collect_desc(struct iwl_fw_runtime *fwrt,
 	wk_data = &fwrt->dump.wks[idx];
 
 	if (WARN_ON(wk_data->dump_data.desc))
-		iwl_fw_free_dump_desc(fwrt, &wk_data->dump_data.desc);
+		iwl_fw_free_dump_desc(fwrt, wk_data->dump_data.desc);
 
 	wk_data->dump_data.desc = desc;
 	wk_data->dump_data.monitor_only = monitor_only;
@@ -2593,10 +2592,12 @@ static void iwl_fw_dbg_collect_sync(struct iwl_fw_runtime *fwrt, u8 wk_idx)
 	iwl_fw_dbg_stop_restart_recording(fwrt, &params, false);
 
 out:
-	if (iwl_trans_dbg_ini_valid(fwrt->trans))
+	if (iwl_trans_dbg_ini_valid(fwrt->trans)) {
 		iwl_fw_error_dump_data_free(dump_data);
-	else
-		iwl_fw_free_dump_desc(fwrt, &dump_data->desc);
+	} else {
+		iwl_fw_free_dump_desc(fwrt, dump_data->desc);
+		dump_data->desc = NULL;
+	}
 
 	clear_bit(wk_idx, &fwrt->dump.active_wks);
 }
-- 
cgit v1.2.3-59-g8ed1b


From 71e9378bcfd2e9dd96c2bfbef23f3562946d30b3 Mon Sep 17 00:00:00 2001
From: Luca Coelho <luciano.coelho@intel.com>
Date: Fri, 24 Apr 2020 19:46:58 +0300
Subject: iwlwifi: mvm: initialize iwl_dev_tx_power_cmd to zero

If the REDUCE_TX_POWER_CMD version is v4 or v5, we are not
initializing some values before sending to the FW, which causes SAR
not to work properly.  Solve this by initializing the struct in the
declaration.

Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200424194456.0dc957a264ff.I43cfd72d539c1287ccaaa454e95c673dac38214f@changeid
---
 drivers/net/wireless/intel/iwlwifi/mvm/fw.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
index 164fc9e98c86..60246a8eed50 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
@@ -787,13 +787,12 @@ int iwl_mvm_sar_select_profile(struct iwl_mvm *mvm, int prof_a, int prof_b)
 	union {
 		struct iwl_dev_tx_power_cmd v5;
 		struct iwl_dev_tx_power_cmd_v4 v4;
-	} cmd;
-
+	} cmd = {
+		.v5.v3.set_mode = cpu_to_le32(IWL_TX_POWER_MODE_SET_CHAINS),
+	};
 	int ret;
 	u16 len = 0;
 
-	cmd.v5.v3.set_mode = cpu_to_le32(IWL_TX_POWER_MODE_SET_CHAINS);
-
 	if (fw_has_api(&mvm->fw->ucode_capa,
 		       IWL_UCODE_TLV_API_REDUCE_TX_POWER))
 		len = sizeof(cmd.v5);
-- 
cgit v1.2.3-59-g8ed1b


From 0960237d2fa39eae376580690c2d6bc6bd0a1d07 Mon Sep 17 00:00:00 2001
From: Mordechay Goodstein <mordechay.goodstein@intel.com>
Date: Fri, 24 Apr 2020 19:46:59 +0300
Subject: iwlwifi: yoyo: remove magic number

The for loop is iterating over active regions so iterate only
over the len of the active regions buffer size.

Signed-off-by: Mordechay Goodstein <mordechay.goodstein@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200424194456.e10482b9eed7.I15da7bb25d9b9e3eef1c1b117dc585e703ce756a@changeid
---
 drivers/net/wireless/intel/iwlwifi/fw/dbg.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
index 4df10f3d99d2..04de7688884d 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
@@ -2119,7 +2119,11 @@ static u32 iwl_dump_ini_trigger(struct iwl_fw_runtime *fwrt,
 	u32 size = 0;
 	u64 regions_mask = le64_to_cpu(trigger->regions_mask);
 
-	for (i = 0; i < 64; i++) {
+	BUILD_BUG_ON(sizeof(trigger->regions_mask) != sizeof(regions_mask));
+	BUILD_BUG_ON((sizeof(trigger->regions_mask) * BITS_PER_BYTE) <
+		     ARRAY_SIZE(fwrt->trans->dbg.active_regions));
+
+	for (i = 0; i < ARRAY_SIZE(fwrt->trans->dbg.active_regions); i++) {
 		u32 reg_type;
 		struct iwl_fw_ini_region_tlv *reg;
 
-- 
cgit v1.2.3-59-g8ed1b


From a8eb340f2ea48280eb2b7dc5a0e6cfff4928f5a5 Mon Sep 17 00:00:00 2001
From: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Date: Fri, 24 Apr 2020 19:47:00 +0300
Subject: iwlwifi: move iwl_set_soc_latency to iwl-drv to be used by other
 op_modes

All the op_mode need to send this command as well. Instead of
duplicating the code from mvm, put the code in a common place.

Signed-off-by: Emmanuel Grumbach <emmanuel.grumbach@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200424194456.7f30f977f9bf.I060b51d0d66d09b9d1ee512e7de8f2d695a52152@changeid
---
 drivers/net/wireless/intel/iwlwifi/fw/init.c    | 55 ++++++++++++++++++++++++-
 drivers/net/wireless/intel/iwlwifi/fw/runtime.h |  1 +
 drivers/net/wireless/intel/iwlwifi/mvm/fw.c     | 46 +--------------------
 3 files changed, 55 insertions(+), 47 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/init.c b/drivers/net/wireless/intel/iwlwifi/fw/init.c
index ba00d162ce72..b373606e1241 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/init.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/init.c
@@ -6,7 +6,7 @@
  * GPL LICENSE SUMMARY
  *
  * Copyright(c) 2017 Intel Deutschland GmbH
- * Copyright(c) 2019 Intel Corporation
+ * Copyright(c) 2019 - 2020 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -27,7 +27,7 @@
  * BSD LICENSE
  *
  * Copyright(c) 2017 Intel Deutschland GmbH
- * Copyright(c) 2019 Intel Corporation
+ * Copyright(c) 2019 - 2020 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -62,6 +62,9 @@
 #include "dbg.h"
 #include "debugfs.h"
 
+#include "fw/api/soc.h"
+#include "fw/api/commands.h"
+
 void iwl_fw_runtime_init(struct iwl_fw_runtime *fwrt, struct iwl_trans *trans,
 			const struct iwl_fw *fw,
 			const struct iwl_fw_runtime_ops *ops, void *ops_ctx,
@@ -95,3 +98,51 @@ void iwl_fw_runtime_resume(struct iwl_fw_runtime *fwrt)
 	iwl_fw_resume_timestamp(fwrt);
 }
 IWL_EXPORT_SYMBOL(iwl_fw_runtime_resume);
+
+/* set device type and latency */
+int iwl_set_soc_latency(struct iwl_fw_runtime *fwrt)
+{
+	struct iwl_soc_configuration_cmd cmd = {};
+	struct iwl_host_cmd hcmd = {
+		.id = iwl_cmd_id(SOC_CONFIGURATION_CMD, SYSTEM_GROUP, 0),
+		.data[0] = &cmd,
+		.len[0] = sizeof(cmd),
+	};
+	int ret;
+
+	/*
+	 * In VER_1 of this command, the discrete value is considered
+	 * an integer; In VER_2, it's a bitmask.  Since we have only 2
+	 * values in VER_1, this is backwards-compatible with VER_2,
+	 * as long as we don't set any other bits.
+	 */
+	if (!fwrt->trans->trans_cfg->integrated)
+		cmd.flags = cpu_to_le32(SOC_CONFIG_CMD_FLAGS_DISCRETE);
+
+	BUILD_BUG_ON(IWL_CFG_TRANS_LTR_DELAY_NONE !=
+		     SOC_FLAGS_LTR_APPLY_DELAY_NONE);
+	BUILD_BUG_ON(IWL_CFG_TRANS_LTR_DELAY_200US !=
+		     SOC_FLAGS_LTR_APPLY_DELAY_200);
+	BUILD_BUG_ON(IWL_CFG_TRANS_LTR_DELAY_2500US !=
+		     SOC_FLAGS_LTR_APPLY_DELAY_2500);
+	BUILD_BUG_ON(IWL_CFG_TRANS_LTR_DELAY_1820US !=
+		     SOC_FLAGS_LTR_APPLY_DELAY_1820);
+
+	if (fwrt->trans->trans_cfg->ltr_delay != IWL_CFG_TRANS_LTR_DELAY_NONE &&
+	    !WARN_ON(!fwrt->trans->trans_cfg->integrated))
+		cmd.flags |= le32_encode_bits(fwrt->trans->trans_cfg->ltr_delay,
+					      SOC_FLAGS_LTR_APPLY_DELAY_MASK);
+
+	if (iwl_fw_lookup_cmd_ver(fwrt->fw, IWL_ALWAYS_LONG_GROUP,
+				  SCAN_REQ_UMAC) >= 2 &&
+	    fwrt->trans->trans_cfg->low_latency_xtal)
+		cmd.flags |= cpu_to_le32(SOC_CONFIG_CMD_FLAGS_LOW_LATENCY);
+
+	cmd.latency = cpu_to_le32(fwrt->trans->trans_cfg->xtal_latency);
+
+	ret = iwl_trans_send_cmd(fwrt->trans, &hcmd);
+	if (ret)
+		IWL_ERR(fwrt, "Failed to set soc latency: %d\n", ret);
+	return ret;
+}
+IWL_EXPORT_SYMBOL(iwl_set_soc_latency);
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/runtime.h b/drivers/net/wireless/intel/iwlwifi/fw/runtime.h
index 9629ef94b214..b5e5e32b6152 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/runtime.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/runtime.h
@@ -242,5 +242,6 @@ int iwl_init_paging(struct iwl_fw_runtime *fwrt, enum iwl_ucode_type type);
 void iwl_free_fw_paging(struct iwl_fw_runtime *fwrt);
 
 void iwl_get_shared_mem_conf(struct iwl_fw_runtime *fwrt);
+int iwl_set_soc_latency(struct iwl_fw_runtime *fwrt);
 
 #endif /* __iwl_fw_runtime_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
index 60246a8eed50..5e8d3f8c3d86 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
@@ -87,50 +87,6 @@ struct iwl_mvm_alive_data {
 	u32 scd_base_addr;
 };
 
-/* set device type and latency */
-static int iwl_set_soc_latency(struct iwl_mvm *mvm)
-{
-	struct iwl_soc_configuration_cmd cmd = {};
-	int ret;
-
-	/*
-	 * In VER_1 of this command, the discrete value is considered
-	 * an integer; In VER_2, it's a bitmask.  Since we have only 2
-	 * values in VER_1, this is backwards-compatible with VER_2,
-	 * as long as we don't set any other bits.
-	 */
-	if (!mvm->trans->trans_cfg->integrated)
-		cmd.flags = cpu_to_le32(SOC_CONFIG_CMD_FLAGS_DISCRETE);
-
-	BUILD_BUG_ON(IWL_CFG_TRANS_LTR_DELAY_NONE !=
-		     SOC_FLAGS_LTR_APPLY_DELAY_NONE);
-	BUILD_BUG_ON(IWL_CFG_TRANS_LTR_DELAY_200US !=
-		     SOC_FLAGS_LTR_APPLY_DELAY_200);
-	BUILD_BUG_ON(IWL_CFG_TRANS_LTR_DELAY_2500US !=
-		     SOC_FLAGS_LTR_APPLY_DELAY_2500);
-	BUILD_BUG_ON(IWL_CFG_TRANS_LTR_DELAY_1820US !=
-		     SOC_FLAGS_LTR_APPLY_DELAY_1820);
-
-	if (mvm->trans->trans_cfg->ltr_delay != IWL_CFG_TRANS_LTR_DELAY_NONE &&
-	    !WARN_ON(!mvm->trans->trans_cfg->integrated))
-		cmd.flags |= le32_encode_bits(mvm->trans->trans_cfg->ltr_delay,
-					      SOC_FLAGS_LTR_APPLY_DELAY_MASK);
-
-	if (iwl_fw_lookup_cmd_ver(mvm->fw, IWL_ALWAYS_LONG_GROUP,
-				  SCAN_REQ_UMAC) >= 2 &&
-	    mvm->trans->trans_cfg->low_latency_xtal)
-		cmd.flags |= cpu_to_le32(SOC_CONFIG_CMD_FLAGS_LOW_LATENCY);
-
-	cmd.latency = cpu_to_le32(mvm->trans->trans_cfg->xtal_latency);
-
-	ret = iwl_mvm_send_cmd_pdu(mvm, iwl_cmd_id(SOC_CONFIGURATION_CMD,
-						   SYSTEM_GROUP, 0), 0,
-				   sizeof(cmd), &cmd);
-	if (ret)
-		IWL_ERR(mvm, "Failed to set soc latency: %d\n", ret);
-	return ret;
-}
-
 static int iwl_send_tx_ant_cfg(struct iwl_mvm *mvm, u8 valid_tx_ant)
 {
 	struct iwl_tx_ant_cfg_cmd tx_ant_cmd = {
@@ -1237,7 +1193,7 @@ int iwl_mvm_up(struct iwl_mvm *mvm)
 
 	if (fw_has_capa(&mvm->fw->ucode_capa,
 			IWL_UCODE_TLV_CAPA_SOC_LATENCY_SUPPORT)) {
-		ret = iwl_set_soc_latency(mvm);
+		ret = iwl_set_soc_latency(&mvm->fwrt);
 		if (ret)
 			goto error;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 37dee1f18eefd727a2f186ea16c6d76c802d8541 Mon Sep 17 00:00:00 2001
From: Luca Coelho <luciano.coelho@intel.com>
Date: Fri, 24 Apr 2020 19:47:01 +0300
Subject: iwlwifi: mvm: add IML/ROM information to the assertion dumps

Dump the IML/ROM error code and data, which are read from some
registers, when printing an assertion dump.  This makes it easier to
debug IML/ROM errors.

Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200424194456.a522161a7372.I2a65ee35a5e0242f8a0e106f126356dff81ef59d@changeid
---
 drivers/net/wireless/intel/iwlwifi/mvm/utils.c | 20 ++++++++++++++++++++
 1 file changed, 20 insertions(+)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
index ee8f152e7606..be57b8391850 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/utils.c
@@ -586,6 +586,23 @@ static void iwl_mvm_dump_lmac_error_log(struct iwl_mvm *mvm, u8 lmac_num)
 	IWL_ERR(mvm, "0x%08X | flow_handler\n", table.flow_handler);
 }
 
+static void iwl_mvm_dump_iml_error_log(struct iwl_mvm *mvm)
+{
+	struct iwl_trans *trans = mvm->trans;
+	u32 error;
+
+	error = iwl_read_umac_prph(trans, UMAG_SB_CPU_2_STATUS);
+
+	IWL_ERR(trans, "IML/ROM dump:\n");
+
+	if (error & 0xFFFF0000)
+		IWL_ERR(trans, "IML/ROM SYSASSERT:\n");
+
+	IWL_ERR(mvm, "0x%08X | IML/ROM error/state\n", error);
+	IWL_ERR(mvm, "0x%08X | IML/ROM data1\n",
+		iwl_read_umac_prph(trans, UMAG_SB_CPU_1_STATUS));
+}
+
 void iwl_mvm_dump_nic_error_log(struct iwl_mvm *mvm)
 {
 	if (!test_bit(STATUS_DEVICE_ENABLED, &mvm->trans->status)) {
@@ -601,6 +618,9 @@ void iwl_mvm_dump_nic_error_log(struct iwl_mvm *mvm)
 
 	iwl_mvm_dump_umac_error_log(mvm);
 
+	if (mvm->trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_AX210)
+		iwl_mvm_dump_iml_error_log(mvm);
+
 	iwl_fw_error_print_fseq_regs(&mvm->fwrt);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 53abad420b7e106232fdc93208e438570f4aa908 Mon Sep 17 00:00:00 2001
From: Luca Coelho <luciano.coelho@intel.com>
Date: Fri, 24 Apr 2020 19:47:02 +0300
Subject: iwlwifi: pcie: remove outdated comment about PCI RTPM reference

This comment was only related to the PCI RTPM implementation, which
has been removed a while ago, and is not meaningless.  Remove it.

Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200424194456.f362e4560a3b.I8d858c6c9a6c98b45d2195dfe28dabe0286c8a83@changeid
---
 drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 12 ++----------
 1 file changed, 2 insertions(+), 10 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
index 2083eb4f2f15..5c216fa6505c 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
@@ -5,10 +5,9 @@
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2007 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016-2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2007 - 2014, 2018 - 2020 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -28,11 +27,10 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * All rights reserved.
  * Copyright(c) 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2005 - 2014, 2018 - 2020 Intel Corporation
  *
  * Redistribution and use in source and binary forms, with or without
  * modification, are permitted provided that the following conditions
@@ -1162,12 +1160,6 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	/* register transport layer debugfs here */
 	iwl_trans_pcie_dbgfs_register(iwl_trans);
 
-	/* The PCI device starts with a reference taken and we are
-	 * supposed to release it here.  But to simplify the
-	 * interaction with the opmode, we don't do it now, but let
-	 * the opmode release it when it's ready.
-	 */
-
 	return 0;
 
 out_free_trans:
-- 
cgit v1.2.3-59-g8ed1b


From ebe7b061e9293968181bbe60a3e261681358c50f Mon Sep 17 00:00:00 2001
From: Luca Coelho <luciano.coelho@intel.com>
Date: Fri, 24 Apr 2020 19:47:03 +0300
Subject: iwlwifi: pcie: remove mangling for iwl_ax101_cfg_qu_hr

All devices that use iwl_ax101_cfg_qu_hr are recognized via the device
info table, so the cfg will never be iwl_ax101_cfg_qu_hr.  Remove the
code that converts this into QuZ and Qu-C, since it's not needed
anymore.

Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200424194456.30b6b4aae1c1.If5e583835c9f7f2771a50ba1b2f33bb85f25b383@changeid
---
 drivers/net/wireless/intel/iwlwifi/pcie/drv.c | 23 -----------------------
 1 file changed, 23 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
index 5c216fa6505c..4146526b1278 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
@@ -1062,29 +1062,6 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 			   CSR_HW_RF_ID_TYPE_CHIP_ID(CSR_HW_RF_ID_TYPE_GF4)) {
 			iwl_trans->cfg = &iwlax411_2ax_cfg_so_gf4_a0;
 		}
-	} else if (cfg == &iwl_ax101_cfg_qu_hr) {
-		if ((CSR_HW_RF_ID_TYPE_CHIP_ID(iwl_trans->hw_rf_id) ==
-		     CSR_HW_RF_ID_TYPE_CHIP_ID(CSR_HW_RF_ID_TYPE_HR) &&
-		     iwl_trans->hw_rev == CSR_HW_REV_TYPE_QNJ_B0) ||
-		    (CSR_HW_RF_ID_TYPE_CHIP_ID(iwl_trans->hw_rf_id) ==
-		     CSR_HW_RF_ID_TYPE_CHIP_ID(CSR_HW_RF_ID_TYPE_HR1))) {
-			iwl_trans->cfg = &iwl22000_2ax_cfg_qnj_hr_b0;
-		} else if (CSR_HW_RF_ID_TYPE_CHIP_ID(iwl_trans->hw_rf_id) ==
-		    CSR_HW_RF_ID_TYPE_CHIP_ID(CSR_HW_RF_ID_TYPE_HR) &&
-		    iwl_trans->hw_rev == CSR_HW_REV_TYPE_QUZ) {
-			iwl_trans->cfg = &iwl_ax101_cfg_quz_hr;
-		} else if (CSR_HW_RF_ID_TYPE_CHIP_ID(iwl_trans->hw_rf_id) ==
-			   CSR_HW_RF_ID_TYPE_CHIP_ID(CSR_HW_RF_ID_TYPE_HR)) {
-			iwl_trans->cfg = &iwl_ax101_cfg_qu_hr;
-		} else if (CSR_HW_RF_ID_TYPE_CHIP_ID(iwl_trans->hw_rf_id) ==
-			   CSR_HW_RF_ID_TYPE_CHIP_ID(CSR_HW_RF_ID_TYPE_HRCDB)) {
-			IWL_ERR(iwl_trans, "RF ID HRCDB is not supported\n");
-			return -EINVAL;
-		} else {
-			IWL_ERR(iwl_trans, "Unrecognized RF ID 0x%08x\n",
-				CSR_HW_RF_ID_TYPE_CHIP_ID(iwl_trans->hw_rf_id));
-			return -EINVAL;
-		}
 	}
 
 	/*
-- 
cgit v1.2.3-59-g8ed1b


From d5727b60a18edb2e762a95fbf445b1ce3ff41e25 Mon Sep 17 00:00:00 2001
From: Luca Coelho <luciano.coelho@intel.com>
Date: Fri, 24 Apr 2020 19:47:04 +0300
Subject: iwlwifi: pcie: convert QnJ with Hr to the device table

Convert all the occurrences of QnJ with Hr into a single entry in the
PCI device table.

Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200424194456.21283ca79d82.I57a493c679a56fff11417afdaec76617711ff6a9@changeid
---
 drivers/net/wireless/intel/iwlwifi/cfg/22000.c  | 14 +++++++-------
 drivers/net/wireless/intel/iwlwifi/iwl-config.h |  4 ++--
 drivers/net/wireless/intel/iwlwifi/pcie/drv.c   | 24 +++++-------------------
 3 files changed, 14 insertions(+), 28 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
index 2fbe0aa3e548..c88394849c86 100644
--- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
+++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
@@ -77,7 +77,7 @@
 #define IWL_22000_HR_FW_PRE		"iwlwifi-Qu-a0-hr-a0-"
 #define IWL_22000_HR_CDB_FW_PRE		"iwlwifi-QuIcp-z0-hrcdb-a0-"
 #define IWL_22000_QU_B_HR_B_FW_PRE	"iwlwifi-Qu-b0-hr-b0-"
-#define IWL_22000_HR_B_FW_PRE		"iwlwifi-QuQnj-b0-hr-b0-"
+#define IWL_QNJ_B_HR_B_FW_PRE		"iwlwifi-QuQnj-b0-hr-b0-"
 #define IWL_QU_C_HR_B_FW_PRE		"iwlwifi-Qu-c0-hr-b0-"
 #define IWL_QU_B_JF_B_FW_PRE		"iwlwifi-Qu-b0-jf-b0-"
 #define IWL_QU_C_JF_B_FW_PRE		"iwlwifi-Qu-c0-jf-b0-"
@@ -99,8 +99,8 @@
 	IWL_22000_JF_FW_PRE __stringify(api) ".ucode"
 #define IWL_22000_QU_B_HR_B_MODULE_FIRMWARE(api) \
 	IWL_22000_QU_B_HR_B_FW_PRE __stringify(api) ".ucode"
-#define IWL_22000_HR_B_QNJ_MODULE_FIRMWARE(api)	\
-	IWL_22000_HR_B_FW_PRE __stringify(api) ".ucode"
+#define IWL_QNJ_B_HR_B_MODULE_FIRMWARE(api)	\
+	IWL_QNJ_B_HR_B_FW_PRE __stringify(api) ".ucode"
 #define IWL_QUZ_A_HR_B_MODULE_FIRMWARE(api) \
 	IWL_QUZ_A_HR_B_FW_PRE __stringify(api) ".ucode"
 #define IWL_QUZ_A_JF_B_MODULE_FIRMWARE(api) \
@@ -343,6 +343,7 @@ const struct iwl_cfg_trans_params iwl_ax200_trans_cfg = {
 };
 
 const char iwl_ax200_name[] = "Intel(R) Wi-Fi 6 AX200 160MHz";
+const char iwl_ax201_name[] = "Intel(R) Wi-Fi 6 AX201 160MHz";
 
 const char iwl_ax200_killer_1650w_name[] =
 	"Killer(R) Wi-Fi 6 AX1650w 160MHz Wireless Network Adapter (200D2W)";
@@ -520,9 +521,8 @@ const struct iwl_cfg killer1650i_2ax_cfg_qu_c0_hr_b0 = {
 	.num_rbds = IWL_NUM_RBDS_22000_HE,
 };
 
-const struct iwl_cfg iwl22000_2ax_cfg_qnj_hr_b0 = {
-	.name = "Intel(R) Dual Band Wireless AX 22000",
-	.fw_name_pre = IWL_22000_HR_B_FW_PRE,
+const struct iwl_cfg iwl_qnj_b0_hr_b0_cfg = {
+	.fw_name_pre = IWL_QNJ_B_HR_B_FW_PRE,
 	IWL_DEVICE_22500,
 	/*
 	 * This device doesn't support receiving BlockAck with a large bitmap
@@ -609,7 +609,7 @@ const struct iwl_cfg iwlax211_cfg_snj_gf_a0 = {
 
 MODULE_FIRMWARE(IWL_22000_HR_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
 MODULE_FIRMWARE(IWL_22000_JF_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
-MODULE_FIRMWARE(IWL_22000_HR_B_QNJ_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
+MODULE_FIRMWARE(IWL_QNJ_B_HR_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
 MODULE_FIRMWARE(IWL_QU_C_HR_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
 MODULE_FIRMWARE(IWL_QU_B_JF_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
 MODULE_FIRMWARE(IWL_QUZ_A_HR_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
index 3a9a33851793..91ec41e0d427 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
@@ -535,6 +535,7 @@ extern const char iwl9260_killer_1550_name[];
 extern const char iwl9560_killer_1550i_name[];
 extern const char iwl9560_killer_1550s_name[];
 extern const char iwl_ax200_name[];
+extern const char iwl_ax201_name[];
 extern const char iwl_ax200_killer_1650w_name[];
 extern const char iwl_ax200_killer_1650x_name[];
 
@@ -625,8 +626,7 @@ extern const struct iwl_cfg killer1650s_2ax_cfg_qu_c0_hr_b0;
 extern const struct iwl_cfg killer1650i_2ax_cfg_qu_c0_hr_b0;
 extern const struct iwl_cfg killer1650x_2ax_cfg;
 extern const struct iwl_cfg killer1650w_2ax_cfg;
-extern const struct iwl_cfg iwl22000_2ax_cfg_qnj_hr_b0_f0;
-extern const struct iwl_cfg iwl22000_2ax_cfg_qnj_hr_b0;
+extern const struct iwl_cfg iwl_qnj_b0_hr_b0_cfg;
 extern const struct iwl_cfg iwlax210_2ax_cfg_so_jf_a0;
 extern const struct iwl_cfg iwlax210_2ax_cfg_so_hr_a0;
 extern const struct iwl_cfg iwlax211_2ax_cfg_so_gf_a0;
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
index 4146526b1278..d5f437ac3c43 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
@@ -590,7 +590,10 @@ static const struct iwl_dev_info iwl_dev_info_table[] = {
 	IWL_DEV_INFO(0x2723, 0x1654, iwl_ax200_cfg_cc, iwl_ax200_killer_1650x_name),
 	IWL_DEV_INFO(0x2723, IWL_CFG_ANY, iwl_ax200_cfg_cc, iwl_ax200_name),
 
-/* Qu with Hr */
+	/* QnJ with Hr */
+	IWL_DEV_INFO(0x2720, IWL_CFG_ANY, iwl_qnj_b0_hr_b0_cfg, iwl_ax201_name),
+
+	/* Qu with Hr */
 	IWL_DEV_INFO(0x43F0, 0x0044, iwl_ax101_cfg_qu_hr, NULL),
 	IWL_DEV_INFO(0x43F0, 0x0070, iwl_ax201_cfg_qu_hr, NULL),
 	IWL_DEV_INFO(0x43F0, 0x0074, iwl_ax201_cfg_qu_hr, NULL),
@@ -675,23 +678,6 @@ static const struct iwl_dev_info iwl_dev_info_table[] = {
 	IWL_DEV_INFO(0x4DF0, 0x4070, iwl_ax201_cfg_qu_hr, NULL),
 	IWL_DEV_INFO(0x4DF0, 0x4244, iwl_ax101_cfg_qu_hr, NULL),
 
-	IWL_DEV_INFO(0x2720, 0x0000, iwl22000_2ax_cfg_qnj_hr_b0, NULL),
-	IWL_DEV_INFO(0x2720, 0x0040, iwl22000_2ax_cfg_qnj_hr_b0, NULL),
-	IWL_DEV_INFO(0x2720, 0x0044, iwl22000_2ax_cfg_qnj_hr_b0, NULL),
-	IWL_DEV_INFO(0x2720, 0x0070, iwl22000_2ax_cfg_qnj_hr_b0, NULL),
-	IWL_DEV_INFO(0x2720, 0x0074, iwl22000_2ax_cfg_qnj_hr_b0, NULL),
-	IWL_DEV_INFO(0x2720, 0x0078, iwl22000_2ax_cfg_qnj_hr_b0, NULL),
-	IWL_DEV_INFO(0x2720, 0x007C, iwl22000_2ax_cfg_qnj_hr_b0, NULL),
-	IWL_DEV_INFO(0x2720, 0x0244, iwl22000_2ax_cfg_qnj_hr_b0, NULL),
-	IWL_DEV_INFO(0x2720, 0x0310, iwl22000_2ax_cfg_qnj_hr_b0, NULL),
-	IWL_DEV_INFO(0x2720, 0x0A10, iwl22000_2ax_cfg_qnj_hr_b0, NULL),
-	IWL_DEV_INFO(0x2720, 0x1080, iwl22000_2ax_cfg_qnj_hr_b0, NULL),
-	IWL_DEV_INFO(0x2720, 0x1651, iwl22000_2ax_cfg_qnj_hr_b0, NULL),
-	IWL_DEV_INFO(0x2720, 0x1652, iwl22000_2ax_cfg_qnj_hr_b0, NULL),
-	IWL_DEV_INFO(0x2720, 0x2074, iwl22000_2ax_cfg_qnj_hr_b0, NULL),
-	IWL_DEV_INFO(0x2720, 0x4070, iwl22000_2ax_cfg_qnj_hr_b0, NULL),
-	IWL_DEV_INFO(0x2720, 0x4244, iwl22000_2ax_cfg_qnj_hr_b0, NULL),
-
 	_IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY,
 		      IWL_CFG_MAC_TYPE_PU, IWL_CFG_ANY,
 		      IWL_CFG_RF_TYPE_JF1, IWL_CFG_RF_ID_JF1,
@@ -789,7 +775,7 @@ static const struct iwl_dev_info iwl_dev_info_table[] = {
 		      IWL_CFG_NO_160, IWL_CFG_CORES_BT,
 		      iwl9260_2ac_cfg, iwl9260_name),
 
-	/* Qu with Jf */
+/* Qu with Jf */
 	/* Qu B step */
 	_IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY,
 		      IWL_CFG_MAC_TYPE_QU, SILICON_B_STEP,
-- 
cgit v1.2.3-59-g8ed1b


From e34aa5abfcf56502b79a4fdbe5fd9e94f70aa279 Mon Sep 17 00:00:00 2001
From: Luca Coelho <luciano.coelho@intel.com>
Date: Fri, 24 Apr 2020 19:47:05 +0300
Subject: iwlwifi: pcie: remove occurrences of 22000 in the FW name defines

We don't use the number 22000 for our devices anymore, so remove all
occurrences of it in the FW name macros.

While at it, add IWL_QU_B_HR_B to the list of firmwares used by the
driver, which was missing.

Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200424194456.93cc41bdbb4d.Ib7599901888a2d050f851bd878a875f593f2e8e5@changeid
---
 drivers/net/wireless/intel/iwlwifi/cfg/22000.c | 72 ++++++++++++--------------
 1 file changed, 32 insertions(+), 40 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
index c88394849c86..b9d13e38f12b 100644
--- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
+++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
@@ -73,10 +73,7 @@
 #define IWL_22000_SMEM_OFFSET		0x400000
 #define IWL_22000_SMEM_LEN		0xD0000
 
-#define IWL_22000_JF_FW_PRE		"iwlwifi-Qu-a0-jf-b0-"
-#define IWL_22000_HR_FW_PRE		"iwlwifi-Qu-a0-hr-a0-"
-#define IWL_22000_HR_CDB_FW_PRE		"iwlwifi-QuIcp-z0-hrcdb-a0-"
-#define IWL_22000_QU_B_HR_B_FW_PRE	"iwlwifi-Qu-b0-hr-b0-"
+#define IWL_QU_B_HR_B_FW_PRE		"iwlwifi-Qu-b0-hr-b0-"
 #define IWL_QNJ_B_HR_B_FW_PRE		"iwlwifi-QuQnj-b0-hr-b0-"
 #define IWL_QU_C_HR_B_FW_PRE		"iwlwifi-Qu-c0-hr-b0-"
 #define IWL_QU_B_JF_B_FW_PRE		"iwlwifi-Qu-b0-jf-b0-"
@@ -85,20 +82,16 @@
 #define IWL_QUZ_A_JF_B_FW_PRE		"iwlwifi-QuZ-a0-jf-b0-"
 #define IWL_QNJ_B_JF_B_FW_PRE		"iwlwifi-QuQnj-b0-jf-b0-"
 #define IWL_CC_A_FW_PRE			"iwlwifi-cc-a0-"
-#define IWL_22000_SO_A_JF_B_FW_PRE	"iwlwifi-so-a0-jf-b0-"
-#define IWL_22000_SO_A_HR_B_FW_PRE      "iwlwifi-so-a0-hr-b0-"
-#define IWL_22000_SO_A_GF_A_FW_PRE      "iwlwifi-so-a0-gf-a0-"
-#define IWL_22000_TY_A_GF_A_FW_PRE      "iwlwifi-ty-a0-gf-a0-"
-#define IWL_22000_SO_A_GF4_A_FW_PRE     "iwlwifi-so-a0-gf4-a0-"
+#define IWL_SO_A_JF_B_FW_PRE		"iwlwifi-so-a0-jf-b0-"
+#define IWL_SO_A_HR_B_FW_PRE		"iwlwifi-so-a0-hr-b0-"
+#define IWL_SO_A_GF_A_FW_PRE		"iwlwifi-so-a0-gf-a0-"
+#define IWL_TY_A_GF_A_FW_PRE		"iwlwifi-ty-a0-gf-a0-"
+#define IWL_SO_A_GF4_A_FW_PRE		"iwlwifi-so-a0-gf4-a0-"
 #define IWL_SNJ_A_GF4_A_FW_PRE		"iwlwifi-SoSnj-a0-gf4-a0-"
 #define IWL_SNJ_A_GF_A_FW_PRE		"iwlwifi-SoSnj-a0-gf-a0-"
 
-#define IWL_22000_HR_MODULE_FIRMWARE(api) \
-	IWL_22000_HR_FW_PRE __stringify(api) ".ucode"
-#define IWL_22000_JF_MODULE_FIRMWARE(api) \
-	IWL_22000_JF_FW_PRE __stringify(api) ".ucode"
-#define IWL_22000_QU_B_HR_B_MODULE_FIRMWARE(api) \
-	IWL_22000_QU_B_HR_B_FW_PRE __stringify(api) ".ucode"
+#define IWL_QU_B_HR_B_MODULE_FIRMWARE(api) \
+	IWL_QU_B_HR_B_FW_PRE __stringify(api) ".ucode"
 #define IWL_QNJ_B_HR_B_MODULE_FIRMWARE(api)	\
 	IWL_QNJ_B_HR_B_FW_PRE __stringify(api) ".ucode"
 #define IWL_QUZ_A_HR_B_MODULE_FIRMWARE(api) \
@@ -113,14 +106,14 @@
 	IWL_QNJ_B_JF_B_FW_PRE __stringify(api) ".ucode"
 #define IWL_CC_A_MODULE_FIRMWARE(api)			\
 	IWL_CC_A_FW_PRE __stringify(api) ".ucode"
-#define IWL_22000_SO_A_JF_B_MODULE_FIRMWARE(api) \
-	IWL_22000_SO_A_JF_B_FW_PRE __stringify(api) ".ucode"
-#define IWL_22000_SO_A_HR_B_MODULE_FIRMWARE(api) \
-	IWL_22000_SO_A_HR_B_FW_PRE __stringify(api) ".ucode"
-#define IWL_22000_SO_A_GF_A_MODULE_FIRMWARE(api) \
-	IWL_22000_SO_A_GF_A_FW_PRE __stringify(api) ".ucode"
-#define IWL_22000_TY_A_GF_A_MODULE_FIRMWARE(api) \
-	IWL_22000_TY_A_GF_A_FW_PRE __stringify(api) ".ucode"
+#define IWL_SO_A_JF_B_MODULE_FIRMWARE(api) \
+	IWL_SO_A_JF_B_FW_PRE __stringify(api) ".ucode"
+#define IWL_SO_A_HR_B_MODULE_FIRMWARE(api) \
+	IWL_SO_A_HR_B_FW_PRE __stringify(api) ".ucode"
+#define IWL_SO_A_GF_A_MODULE_FIRMWARE(api) \
+	IWL_SO_A_GF_A_FW_PRE __stringify(api) ".ucode"
+#define IWL_TY_A_GF_A_MODULE_FIRMWARE(api) \
+	IWL_TY_A_GF_A_FW_PRE __stringify(api) ".ucode"
 #define IWL_SNJ_A_GF4_A_MODULE_FIRMWARE(api) \
 	IWL_SNJ_A_GF4_A_FW_PRE __stringify(api) ".ucode"
 #define IWL_SNJ_A_GF_A_MODULE_FIRMWARE(api) \
@@ -352,7 +345,7 @@ const char iwl_ax200_killer_1650x_name[] =
 
 const struct iwl_cfg iwl_ax101_cfg_qu_hr = {
 	.name = "Intel(R) Wi-Fi 6 AX101",
-	.fw_name_pre = IWL_22000_QU_B_HR_B_FW_PRE,
+	.fw_name_pre = IWL_QU_B_HR_B_FW_PRE,
 	IWL_DEVICE_22500,
 	/*
 	 * This device doesn't support receiving BlockAck with a large bitmap
@@ -366,7 +359,7 @@ const struct iwl_cfg iwl_ax101_cfg_qu_hr = {
 
 const struct iwl_cfg iwl_ax201_cfg_qu_hr = {
 	.name = "Intel(R) Wi-Fi 6 AX201 160MHz",
-	.fw_name_pre = IWL_22000_QU_B_HR_B_FW_PRE,
+	.fw_name_pre = IWL_QU_B_HR_B_FW_PRE,
 	IWL_DEVICE_22500,
 	/*
 	 * This device doesn't support receiving BlockAck with a large bitmap
@@ -471,7 +464,7 @@ const struct iwl_cfg iwl_ax200_cfg_cc = {
 
 const struct iwl_cfg killer1650s_2ax_cfg_qu_b0_hr_b0 = {
 	.name = "Killer(R) Wi-Fi 6 AX1650i 160MHz Wireless Network Adapter (201NGW)",
-	.fw_name_pre = IWL_22000_QU_B_HR_B_FW_PRE,
+	.fw_name_pre = IWL_QU_B_HR_B_FW_PRE,
 	IWL_DEVICE_22500,
 	/*
 	 * This device doesn't support receiving BlockAck with a large bitmap
@@ -484,7 +477,7 @@ const struct iwl_cfg killer1650s_2ax_cfg_qu_b0_hr_b0 = {
 
 const struct iwl_cfg killer1650i_2ax_cfg_qu_b0_hr_b0 = {
 	.name = "Killer(R) Wi-Fi 6 AX1650s 160MHz Wireless Network Adapter (201D2W)",
-	.fw_name_pre = IWL_22000_QU_B_HR_B_FW_PRE,
+	.fw_name_pre = IWL_QU_B_HR_B_FW_PRE,
 	IWL_DEVICE_22500,
 	/*
 	 * This device doesn't support receiving BlockAck with a large bitmap
@@ -535,21 +528,21 @@ const struct iwl_cfg iwl_qnj_b0_hr_b0_cfg = {
 
 const struct iwl_cfg iwlax210_2ax_cfg_so_jf_a0 = {
 	.name = "Intel(R) Wireless-AC 9560 160MHz",
-	.fw_name_pre = IWL_22000_SO_A_JF_B_FW_PRE,
+	.fw_name_pre = IWL_SO_A_JF_B_FW_PRE,
 	IWL_DEVICE_AX210,
 	.num_rbds = IWL_NUM_RBDS_NON_HE,
 };
 
 const struct iwl_cfg iwlax210_2ax_cfg_so_hr_a0 = {
 	.name = "Intel(R) Wi-Fi 6 AX210 160MHz",
-	.fw_name_pre = IWL_22000_SO_A_HR_B_FW_PRE,
+	.fw_name_pre = IWL_SO_A_HR_B_FW_PRE,
 	IWL_DEVICE_AX210,
 	.num_rbds = IWL_NUM_RBDS_AX210_HE,
 };
 
 const struct iwl_cfg iwlax211_2ax_cfg_so_gf_a0 = {
 	.name = "Intel(R) Wi-Fi 6 AX211 160MHz",
-	.fw_name_pre = IWL_22000_SO_A_GF_A_FW_PRE,
+	.fw_name_pre = IWL_SO_A_GF_A_FW_PRE,
 	.uhb_supported = true,
 	IWL_DEVICE_AX210,
 	.num_rbds = IWL_NUM_RBDS_AX210_HE,
@@ -557,7 +550,7 @@ const struct iwl_cfg iwlax211_2ax_cfg_so_gf_a0 = {
 
 const struct iwl_cfg iwlax211_2ax_cfg_so_gf_a0_long = {
 	.name = "Intel(R) Wi-Fi 6 AX211 160MHz",
-	.fw_name_pre = IWL_22000_SO_A_GF_A_FW_PRE,
+	.fw_name_pre = IWL_SO_A_GF_A_FW_PRE,
 	.uhb_supported = true,
 	IWL_DEVICE_AX210,
 	.num_rbds = IWL_NUM_RBDS_AX210_HE,
@@ -567,7 +560,7 @@ const struct iwl_cfg iwlax211_2ax_cfg_so_gf_a0_long = {
 
 const struct iwl_cfg iwlax210_2ax_cfg_ty_gf_a0 = {
 	.name = "Intel(R) Wi-Fi 6 AX210 160MHz",
-	.fw_name_pre = IWL_22000_TY_A_GF_A_FW_PRE,
+	.fw_name_pre = IWL_TY_A_GF_A_FW_PRE,
 	.uhb_supported = true,
 	IWL_DEVICE_AX210,
 	.num_rbds = IWL_NUM_RBDS_AX210_HE,
@@ -575,7 +568,7 @@ const struct iwl_cfg iwlax210_2ax_cfg_ty_gf_a0 = {
 
 const struct iwl_cfg iwlax411_2ax_cfg_so_gf4_a0 = {
 	.name = "Intel(R) Wi-Fi 6 AX411 160MHz",
-	.fw_name_pre = IWL_22000_SO_A_GF4_A_FW_PRE,
+	.fw_name_pre = IWL_SO_A_GF4_A_FW_PRE,
 	.uhb_supported = true,
 	IWL_DEVICE_AX210,
 	.num_rbds = IWL_NUM_RBDS_AX210_HE,
@@ -583,7 +576,7 @@ const struct iwl_cfg iwlax411_2ax_cfg_so_gf4_a0 = {
 
 const struct iwl_cfg iwlax411_2ax_cfg_so_gf4_a0_long = {
 	.name = "Intel(R) Wi-Fi 6 AX411 160MHz",
-	.fw_name_pre = IWL_22000_SO_A_GF4_A_FW_PRE,
+	.fw_name_pre = IWL_SO_A_GF4_A_FW_PRE,
 	.uhb_supported = true,
 	IWL_DEVICE_AX210,
 	.num_rbds = IWL_NUM_RBDS_AX210_HE,
@@ -607,8 +600,7 @@ const struct iwl_cfg iwlax211_cfg_snj_gf_a0 = {
 	.num_rbds = IWL_NUM_RBDS_AX210_HE,
 };
 
-MODULE_FIRMWARE(IWL_22000_HR_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
-MODULE_FIRMWARE(IWL_22000_JF_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
+MODULE_FIRMWARE(IWL_QU_B_HR_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
 MODULE_FIRMWARE(IWL_QNJ_B_HR_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
 MODULE_FIRMWARE(IWL_QU_C_HR_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
 MODULE_FIRMWARE(IWL_QU_B_JF_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
@@ -616,9 +608,9 @@ MODULE_FIRMWARE(IWL_QUZ_A_HR_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
 MODULE_FIRMWARE(IWL_QUZ_A_JF_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
 MODULE_FIRMWARE(IWL_QNJ_B_JF_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
 MODULE_FIRMWARE(IWL_CC_A_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
-MODULE_FIRMWARE(IWL_22000_SO_A_JF_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
-MODULE_FIRMWARE(IWL_22000_SO_A_HR_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
-MODULE_FIRMWARE(IWL_22000_SO_A_GF_A_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
-MODULE_FIRMWARE(IWL_22000_TY_A_GF_A_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
+MODULE_FIRMWARE(IWL_SO_A_JF_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
+MODULE_FIRMWARE(IWL_SO_A_HR_B_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
+MODULE_FIRMWARE(IWL_SO_A_GF_A_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
+MODULE_FIRMWARE(IWL_TY_A_GF_A_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
 MODULE_FIRMWARE(IWL_SNJ_A_GF4_A_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
 MODULE_FIRMWARE(IWL_SNJ_A_GF_A_MODULE_FIRMWARE(IWL_22000_UCODE_API_MAX));
-- 
cgit v1.2.3-59-g8ed1b


From 3f910a25839b13436bf0a22186f1698b23eafb91 Mon Sep 17 00:00:00 2001
From: Luca Coelho <luciano.coelho@intel.com>
Date: Fri, 24 Apr 2020 19:47:06 +0300
Subject: iwlwifi: pcie: convert all AX101 devices to the device tables

Convert all Qu/Hr1 devices to the new device tables, by modifying the
corresponding structures, adding a new name and generalizing the
device recognition.

Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200424194456.ec0e04102d2c.Ia36f2c7bbf06cb6436424d40d6adb2376f2962ee@changeid
---
 drivers/net/wireless/intel/iwlwifi/cfg/22000.c  | 10 ++---
 drivers/net/wireless/intel/iwlwifi/iwl-config.h | 11 ++++--
 drivers/net/wireless/intel/iwlwifi/pcie/drv.c   | 52 ++++++++++++-------------
 3 files changed, 37 insertions(+), 36 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
index b9d13e38f12b..1fcc346ba425 100644
--- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
+++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
@@ -337,14 +337,14 @@ const struct iwl_cfg_trans_params iwl_ax200_trans_cfg = {
 
 const char iwl_ax200_name[] = "Intel(R) Wi-Fi 6 AX200 160MHz";
 const char iwl_ax201_name[] = "Intel(R) Wi-Fi 6 AX201 160MHz";
+const char iwl_ax101_name[] = "Intel(R) Wi-Fi 6 AX101";
 
 const char iwl_ax200_killer_1650w_name[] =
 	"Killer(R) Wi-Fi 6 AX1650w 160MHz Wireless Network Adapter (200D2W)";
 const char iwl_ax200_killer_1650x_name[] =
 	"Killer(R) Wi-Fi 6 AX1650x 160MHz Wireless Network Adapter (200NGW)";
 
-const struct iwl_cfg iwl_ax101_cfg_qu_hr = {
-	.name = "Intel(R) Wi-Fi 6 AX101",
+const struct iwl_cfg iwl_qu_b0_hr1_b0 = {
 	.fw_name_pre = IWL_QU_B_HR_B_FW_PRE,
 	IWL_DEVICE_22500,
 	/*
@@ -370,8 +370,7 @@ const struct iwl_cfg iwl_ax201_cfg_qu_hr = {
 	.num_rbds = IWL_NUM_RBDS_22000_HE,
 };
 
-const struct iwl_cfg iwl_ax101_cfg_qu_c0_hr_b0 = {
-	.name = "Intel(R) Wi-Fi 6 AX101",
+const struct iwl_cfg iwl_qu_c0_hr1_b0 = {
 	.fw_name_pre = IWL_QU_C_HR_B_FW_PRE,
 	IWL_DEVICE_22500,
 	/*
@@ -397,8 +396,7 @@ const struct iwl_cfg iwl_ax201_cfg_qu_c0_hr_b0 = {
 	.num_rbds = IWL_NUM_RBDS_22000_HE,
 };
 
-const struct iwl_cfg iwl_ax101_cfg_quz_hr = {
-	.name = "Intel(R) Wi-Fi 6 AX101",
+const struct iwl_cfg iwl_quz_a0_hr1_b0 = {
 	.fw_name_pre = IWL_QUZ_A_HR_B_FW_PRE,
 	IWL_DEVICE_22500,
 	/*
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-config.h b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
index 91ec41e0d427..244899f3f3bf 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-config.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-config.h
@@ -477,12 +477,16 @@ struct iwl_cfg {
 #define IWL_CFG_RF_TYPE_TH1		0x108
 #define IWL_CFG_RF_TYPE_JF2		0x105
 #define IWL_CFG_RF_TYPE_JF1		0x108
+#define IWL_CFG_RF_TYPE_HR2		0x10A
+#define IWL_CFG_RF_TYPE_HR1		0x10C
 
 #define IWL_CFG_RF_ID_TH		0x1
 #define IWL_CFG_RF_ID_TH1		0x1
 #define IWL_CFG_RF_ID_JF		0x3
 #define IWL_CFG_RF_ID_JF1		0x6
 #define IWL_CFG_RF_ID_JF1_DIV		0xA
+#define IWL_CFG_RF_ID_HR		0x7
+#define IWL_CFG_RF_ID_HR1		0x4
 
 #define IWL_CFG_NO_160			0x0
 #define IWL_CFG_160			0x1
@@ -536,6 +540,7 @@ extern const char iwl9560_killer_1550i_name[];
 extern const char iwl9560_killer_1550s_name[];
 extern const char iwl_ax200_name[];
 extern const char iwl_ax201_name[];
+extern const char iwl_ax101_name[];
 extern const char iwl_ax200_killer_1650w_name[];
 extern const char iwl_ax200_killer_1650x_name[];
 
@@ -610,9 +615,9 @@ extern const struct iwl_cfg iwl9560_qu_c0_jf_b0_cfg;
 extern const struct iwl_cfg iwl9560_quz_a0_jf_b0_cfg;
 extern const struct iwl_cfg iwl9560_qnj_b0_jf_b0_cfg;
 extern const struct iwl_cfg iwl9560_2ac_cfg_soc;
-extern const struct iwl_cfg iwl_ax101_cfg_qu_hr;
-extern const struct iwl_cfg iwl_ax101_cfg_qu_c0_hr_b0;
-extern const struct iwl_cfg iwl_ax101_cfg_quz_hr;
+extern const struct iwl_cfg iwl_qu_b0_hr1_b0;
+extern const struct iwl_cfg iwl_qu_c0_hr1_b0;
+extern const struct iwl_cfg iwl_quz_a0_hr1_b0;
 extern const struct iwl_cfg iwl_ax200_cfg_cc;
 extern const struct iwl_cfg iwl_ax201_cfg_qu_hr;
 extern const struct iwl_cfg iwl_ax201_cfg_qu_hr;
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
index d5f437ac3c43..f179cd08b418 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/drv.c
@@ -594,89 +594,68 @@ static const struct iwl_dev_info iwl_dev_info_table[] = {
 	IWL_DEV_INFO(0x2720, IWL_CFG_ANY, iwl_qnj_b0_hr_b0_cfg, iwl_ax201_name),
 
 	/* Qu with Hr */
-	IWL_DEV_INFO(0x43F0, 0x0044, iwl_ax101_cfg_qu_hr, NULL),
 	IWL_DEV_INFO(0x43F0, 0x0070, iwl_ax201_cfg_qu_hr, NULL),
 	IWL_DEV_INFO(0x43F0, 0x0074, iwl_ax201_cfg_qu_hr, NULL),
 	IWL_DEV_INFO(0x43F0, 0x0078, iwl_ax201_cfg_qu_hr, NULL),
 	IWL_DEV_INFO(0x43F0, 0x007C, iwl_ax201_cfg_qu_hr, NULL),
-	IWL_DEV_INFO(0x43F0, 0x0244, iwl_ax101_cfg_qu_hr, NULL),
-	IWL_DEV_INFO(0x43F0, 0x1651, killer1650s_2ax_cfg_qu_b0_hr_b0, NULL),
-	IWL_DEV_INFO(0x43F0, 0x1652, killer1650i_2ax_cfg_qu_b0_hr_b0, NULL),
 	IWL_DEV_INFO(0x43F0, 0x2074, iwl_ax201_cfg_qu_hr, NULL),
 	IWL_DEV_INFO(0x43F0, 0x4070, iwl_ax201_cfg_qu_hr, NULL),
-	IWL_DEV_INFO(0x43F0, 0x4244, iwl_ax101_cfg_qu_hr, NULL),
-	IWL_DEV_INFO(0xA0F0, 0x0044, iwl_ax101_cfg_qu_hr, NULL),
 	IWL_DEV_INFO(0xA0F0, 0x0070, iwl_ax201_cfg_qu_hr, NULL),
 	IWL_DEV_INFO(0xA0F0, 0x0074, iwl_ax201_cfg_qu_hr, NULL),
 	IWL_DEV_INFO(0xA0F0, 0x0078, iwl_ax201_cfg_qu_hr, NULL),
 	IWL_DEV_INFO(0xA0F0, 0x007C, iwl_ax201_cfg_qu_hr, NULL),
-	IWL_DEV_INFO(0xA0F0, 0x0244, iwl_ax101_cfg_qu_hr, NULL),
 	IWL_DEV_INFO(0xA0F0, 0x0A10, iwl_ax201_cfg_qu_hr, NULL),
 	IWL_DEV_INFO(0xA0F0, 0x1651, killer1650s_2ax_cfg_qu_b0_hr_b0, NULL),
 	IWL_DEV_INFO(0xA0F0, 0x1652, killer1650i_2ax_cfg_qu_b0_hr_b0, NULL),
 	IWL_DEV_INFO(0xA0F0, 0x2074, iwl_ax201_cfg_qu_hr, NULL),
 	IWL_DEV_INFO(0xA0F0, 0x4070, iwl_ax201_cfg_qu_hr, NULL),
-	IWL_DEV_INFO(0xA0F0, 0x4244, iwl_ax101_cfg_qu_hr, NULL),
 	IWL_DEV_INFO(0x02F0, 0x0070, iwl_ax201_cfg_quz_hr, NULL),
 	IWL_DEV_INFO(0x02F0, 0x0074, iwl_ax201_cfg_quz_hr, NULL),
 	IWL_DEV_INFO(0x02F0, 0x0078, iwl_ax201_cfg_quz_hr, NULL),
 	IWL_DEV_INFO(0x02F0, 0x007C, iwl_ax201_cfg_quz_hr, NULL),
-	IWL_DEV_INFO(0x02F0, 0x0244, iwl_ax101_cfg_quz_hr, NULL),
 	IWL_DEV_INFO(0x02F0, 0x0310, iwl_ax201_cfg_quz_hr, NULL),
 	IWL_DEV_INFO(0x02F0, 0x1651, iwl_ax1650s_cfg_quz_hr, NULL),
 	IWL_DEV_INFO(0x02F0, 0x1652, iwl_ax1650i_cfg_quz_hr, NULL),
 	IWL_DEV_INFO(0x02F0, 0x2074, iwl_ax201_cfg_quz_hr, NULL),
 	IWL_DEV_INFO(0x02F0, 0x4070, iwl_ax201_cfg_quz_hr, NULL),
-	IWL_DEV_INFO(0x02F0, 0x4244, iwl_ax101_cfg_quz_hr, NULL),
 	IWL_DEV_INFO(0x06F0, 0x0070, iwl_ax201_cfg_quz_hr, NULL),
 	IWL_DEV_INFO(0x06F0, 0x0074, iwl_ax201_cfg_quz_hr, NULL),
 	IWL_DEV_INFO(0x06F0, 0x0078, iwl_ax201_cfg_quz_hr, NULL),
 	IWL_DEV_INFO(0x06F0, 0x007C, iwl_ax201_cfg_quz_hr, NULL),
-	IWL_DEV_INFO(0x06F0, 0x0244, iwl_ax101_cfg_quz_hr, NULL),
 	IWL_DEV_INFO(0x06F0, 0x0310, iwl_ax201_cfg_quz_hr, NULL),
 	IWL_DEV_INFO(0x06F0, 0x1651, iwl_ax1650s_cfg_quz_hr, NULL),
 	IWL_DEV_INFO(0x06F0, 0x1652, iwl_ax1650i_cfg_quz_hr, NULL),
 	IWL_DEV_INFO(0x06F0, 0x2074, iwl_ax201_cfg_quz_hr, NULL),
 	IWL_DEV_INFO(0x06F0, 0x4070, iwl_ax201_cfg_quz_hr, NULL),
-	IWL_DEV_INFO(0x06F0, 0x4244, iwl_ax101_cfg_quz_hr, NULL),
-	IWL_DEV_INFO(0x34F0, 0x0044, iwl_ax101_cfg_qu_hr, NULL),
 	IWL_DEV_INFO(0x34F0, 0x0070, iwl_ax201_cfg_qu_hr, NULL),
 	IWL_DEV_INFO(0x34F0, 0x0074, iwl_ax201_cfg_qu_hr, NULL),
 	IWL_DEV_INFO(0x34F0, 0x0078, iwl_ax201_cfg_qu_hr, NULL),
 	IWL_DEV_INFO(0x34F0, 0x007C, iwl_ax201_cfg_qu_hr, NULL),
-	IWL_DEV_INFO(0x34F0, 0x0244, iwl_ax101_cfg_qu_hr, NULL),
 	IWL_DEV_INFO(0x34F0, 0x0310, iwl_ax201_cfg_qu_hr, NULL),
 	IWL_DEV_INFO(0x34F0, 0x1651, killer1650s_2ax_cfg_qu_b0_hr_b0, NULL),
 	IWL_DEV_INFO(0x34F0, 0x1652, killer1650i_2ax_cfg_qu_b0_hr_b0, NULL),
 	IWL_DEV_INFO(0x34F0, 0x2074, iwl_ax201_cfg_qu_hr, NULL),
 	IWL_DEV_INFO(0x34F0, 0x4070, iwl_ax201_cfg_qu_hr, NULL),
-	IWL_DEV_INFO(0x34F0, 0x4244, iwl_ax101_cfg_qu_hr, NULL),
 
-	IWL_DEV_INFO(0x3DF0, 0x0044, iwl_ax101_cfg_qu_hr, NULL),
 	IWL_DEV_INFO(0x3DF0, 0x0070, iwl_ax201_cfg_qu_hr, NULL),
 	IWL_DEV_INFO(0x3DF0, 0x0074, iwl_ax201_cfg_qu_hr, NULL),
 	IWL_DEV_INFO(0x3DF0, 0x0078, iwl_ax201_cfg_qu_hr, NULL),
 	IWL_DEV_INFO(0x3DF0, 0x007C, iwl_ax201_cfg_qu_hr, NULL),
-	IWL_DEV_INFO(0x3DF0, 0x0244, iwl_ax101_cfg_qu_hr, NULL),
 	IWL_DEV_INFO(0x3DF0, 0x0310, iwl_ax201_cfg_qu_hr, NULL),
 	IWL_DEV_INFO(0x3DF0, 0x1651, killer1650s_2ax_cfg_qu_b0_hr_b0, NULL),
 	IWL_DEV_INFO(0x3DF0, 0x1652, killer1650i_2ax_cfg_qu_b0_hr_b0, NULL),
 	IWL_DEV_INFO(0x3DF0, 0x2074, iwl_ax201_cfg_qu_hr, NULL),
 	IWL_DEV_INFO(0x3DF0, 0x4070, iwl_ax201_cfg_qu_hr, NULL),
-	IWL_DEV_INFO(0x3DF0, 0x4244, iwl_ax101_cfg_qu_hr, NULL),
 
-	IWL_DEV_INFO(0x4DF0, 0x0044, iwl_ax101_cfg_qu_hr, NULL),
 	IWL_DEV_INFO(0x4DF0, 0x0070, iwl_ax201_cfg_qu_hr, NULL),
 	IWL_DEV_INFO(0x4DF0, 0x0074, iwl_ax201_cfg_qu_hr, NULL),
 	IWL_DEV_INFO(0x4DF0, 0x0078, iwl_ax201_cfg_qu_hr, NULL),
 	IWL_DEV_INFO(0x4DF0, 0x007C, iwl_ax201_cfg_qu_hr, NULL),
-	IWL_DEV_INFO(0x4DF0, 0x0244, iwl_ax101_cfg_qu_hr, NULL),
 	IWL_DEV_INFO(0x4DF0, 0x0310, iwl_ax201_cfg_qu_hr, NULL),
 	IWL_DEV_INFO(0x4DF0, 0x1651, killer1650s_2ax_cfg_qu_b0_hr_b0, NULL),
 	IWL_DEV_INFO(0x4DF0, 0x1652, killer1650i_2ax_cfg_qu_b0_hr_b0, NULL),
 	IWL_DEV_INFO(0x4DF0, 0x2074, iwl_ax201_cfg_qu_hr, NULL),
 	IWL_DEV_INFO(0x4DF0, 0x4070, iwl_ax201_cfg_qu_hr, NULL),
-	IWL_DEV_INFO(0x4DF0, 0x4244, iwl_ax101_cfg_qu_hr, NULL),
 
 	_IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY,
 		      IWL_CFG_MAC_TYPE_PU, IWL_CFG_ANY,
@@ -951,6 +930,29 @@ static const struct iwl_dev_info iwl_dev_info_table[] = {
 		      IWL_CFG_RF_TYPE_JF2, IWL_CFG_RF_ID_JF,
 		      IWL_CFG_NO_160, IWL_CFG_CORES_BT,
 		      iwl9560_qnj_b0_jf_b0_cfg, iwl9560_killer_1550i_name),
+
+/* Qu with Hr */
+	/* Qu B step */
+	_IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY,
+		      IWL_CFG_MAC_TYPE_QU, SILICON_B_STEP,
+		      IWL_CFG_RF_TYPE_HR1, IWL_CFG_ANY,
+		      IWL_CFG_ANY, IWL_CFG_ANY,
+		      iwl_qu_b0_hr1_b0, iwl_ax101_name),
+
+	/* Qu C step */
+	_IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY,
+		      IWL_CFG_MAC_TYPE_QU, SILICON_C_STEP,
+		      IWL_CFG_RF_TYPE_HR1, IWL_CFG_ANY,
+		      IWL_CFG_ANY, IWL_CFG_ANY,
+		      iwl_qu_c0_hr1_b0, iwl_ax101_name),
+
+	/* QuZ */
+	_IWL_DEV_INFO(IWL_CFG_ANY, IWL_CFG_ANY,
+		      IWL_CFG_MAC_TYPE_QUZ, IWL_CFG_ANY,
+		      IWL_CFG_RF_TYPE_HR1, IWL_CFG_ANY,
+		      IWL_CFG_ANY, IWL_CFG_ANY,
+		      iwl_quz_a0_hr1_b0, iwl_ax101_name),
+
 #endif /* CONFIG_IWLMVM */
 };
 
@@ -1057,9 +1059,7 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	 * rest must be removed once we convert Qu with Hr as well.
 	 */
 	if (iwl_trans->hw_rev == CSR_HW_REV_TYPE_QU_C0) {
-		if (iwl_trans->cfg == &iwl_ax101_cfg_qu_hr)
-			iwl_trans->cfg = &iwl_ax101_cfg_qu_c0_hr_b0;
-		else if (iwl_trans->cfg == &iwl_ax201_cfg_qu_hr)
+		if (iwl_trans->cfg == &iwl_ax201_cfg_qu_hr)
 			iwl_trans->cfg = &iwl_ax201_cfg_qu_c0_hr_b0;
 		else if (iwl_trans->cfg == &killer1650s_2ax_cfg_qu_b0_hr_b0)
 			iwl_trans->cfg = &killer1650s_2ax_cfg_qu_c0_hr_b0;
@@ -1069,9 +1069,7 @@ static int iwl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	/* same thing for QuZ... */
 	if (iwl_trans->hw_rev == CSR_HW_REV_TYPE_QUZ) {
-		if (iwl_trans->cfg == &iwl_ax101_cfg_qu_hr)
-			iwl_trans->cfg = &iwl_ax101_cfg_quz_hr;
-		else if (iwl_trans->cfg == &iwl_ax201_cfg_qu_hr)
+		if (iwl_trans->cfg == &iwl_ax201_cfg_qu_hr)
 			iwl_trans->cfg = &iwl_ax201_cfg_quz_hr;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 36dfe9ac6e8b8fc2e25733d003a867a40db791da Mon Sep 17 00:00:00 2001
From: Mordechay Goodstein <mordechay.goodstein@intel.com>
Date: Fri, 24 Apr 2020 19:47:07 +0300
Subject: iwlwifi: dump api version in yaml format

Used for debugging what FW API we are using to understand misalignment
with API changes.

The output looks like this as a yaml format

fw_api_ver:
  0x0001:
    name: MVM_ALIVE
    cmd_ver: 99
    notif_ver: 4
  0x0108:
    name: PHY_CONTEXT_CMD
    cmd_ver: 2
    notif_ver: 0
...

Signed-off-by: Mordechay Goodstein <mordechay.goodstein@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200424194456.18bf540ab8e0.I6217488f1740f0e6accd0cecd09dfd46bad88426@changeid
---
 drivers/net/wireless/intel/iwlwifi/fw/debugfs.c | 104 +++++++++++++++++++++++-
 1 file changed, 100 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/debugfs.c b/drivers/net/wireless/intel/iwlwifi/fw/debugfs.c
index 89f74116569d..6e72c27f527b 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/debugfs.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/debugfs.c
@@ -5,10 +5,9 @@
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018 Intel Corporation
+ * Copyright(c) 2012 - 2014, 2018 - 2020 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -28,10 +27,9 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018 Intel Corporation
+ * Copyright(c) 2012 - 2014, 2018 - 2020 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -64,6 +62,7 @@
 #include "api/commands.h"
 #include "debugfs.h"
 #include "dbg.h"
+#include <linux/seq_file.h>
 
 #define FWRT_DEBUGFS_OPEN_WRAPPER(name, buflen, argtype)		\
 struct dbgfs_##name##_data {						\
@@ -329,11 +328,108 @@ static ssize_t iwl_dbgfs_fw_dbg_domain_read(struct iwl_fw_runtime *fwrt,
 
 FWRT_DEBUGFS_READ_FILE_OPS(fw_dbg_domain, 20);
 
+struct iwl_dbgfs_fw_info_priv {
+	struct iwl_fw_runtime *fwrt;
+};
+
+struct iwl_dbgfs_fw_info_state {
+	loff_t pos;
+};
+
+static void *iwl_dbgfs_fw_info_seq_next(struct seq_file *seq,
+					void *v, loff_t *pos)
+{
+	struct iwl_dbgfs_fw_info_state *state = v;
+	struct iwl_dbgfs_fw_info_priv *priv = seq->private;
+	const struct iwl_fw *fw = priv->fwrt->fw;
+
+	*pos = ++state->pos;
+	if (*pos >= fw->ucode_capa.n_cmd_versions)
+		return NULL;
+
+	return state;
+}
+
+static void iwl_dbgfs_fw_info_seq_stop(struct seq_file *seq,
+				       void *v)
+{
+	kfree(v);
+}
+
+static void *iwl_dbgfs_fw_info_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	struct iwl_dbgfs_fw_info_priv *priv = seq->private;
+	const struct iwl_fw *fw = priv->fwrt->fw;
+	struct iwl_dbgfs_fw_info_state *state;
+
+	if (*pos >= fw->ucode_capa.n_cmd_versions)
+		return NULL;
+
+	state = kzalloc(sizeof(*state), GFP_KERNEL);
+	if (!state)
+		return NULL;
+	state->pos = *pos;
+	return state;
+};
+
+static int iwl_dbgfs_fw_info_seq_show(struct seq_file *seq, void *v)
+{
+	struct iwl_dbgfs_fw_info_state *state = v;
+	struct iwl_dbgfs_fw_info_priv *priv = seq->private;
+	const struct iwl_fw *fw = priv->fwrt->fw;
+	const struct iwl_fw_cmd_version *ver;
+	u32 cmd_id;
+
+	if (!state->pos)
+		seq_puts(seq, "fw_api_ver:\n");
+
+	ver = &fw->ucode_capa.cmd_versions[state->pos];
+
+	cmd_id = iwl_cmd_id(ver->cmd, ver->group, 0);
+
+	seq_printf(seq, "  0x%04x:\n", cmd_id);
+	seq_printf(seq, "    name: %s\n",
+		   iwl_get_cmd_string(priv->fwrt->trans, cmd_id));
+	seq_printf(seq, "    cmd_ver: %d\n", ver->cmd_ver);
+	seq_printf(seq, "    notif_ver: %d\n", ver->notif_ver);
+	return 0;
+}
+
+static const struct seq_operations iwl_dbgfs_info_seq_ops = {
+	.start = iwl_dbgfs_fw_info_seq_start,
+	.next = iwl_dbgfs_fw_info_seq_next,
+	.stop = iwl_dbgfs_fw_info_seq_stop,
+	.show = iwl_dbgfs_fw_info_seq_show,
+};
+
+static int iwl_dbgfs_fw_info_open(struct inode *inode, struct file *filp)
+{
+	struct iwl_dbgfs_fw_info_priv *priv;
+
+	priv = __seq_open_private(filp, &iwl_dbgfs_info_seq_ops,
+				  sizeof(*priv));
+
+	if (!priv)
+		return -ENOMEM;
+
+	priv->fwrt = inode->i_private;
+	return 0;
+}
+
+static const struct file_operations iwl_dbgfs_fw_info_ops = {
+	.owner = THIS_MODULE,
+	.open = iwl_dbgfs_fw_info_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = seq_release_private,
+};
+
 void iwl_fwrt_dbgfs_register(struct iwl_fw_runtime *fwrt,
 			    struct dentry *dbgfs_dir)
 {
 	INIT_DELAYED_WORK(&fwrt->timestamp.wk, iwl_fw_timestamp_marker_wk);
 	FWRT_DEBUGFS_ADD_FILE(timestamp_marker, dbgfs_dir, 0200);
+	FWRT_DEBUGFS_ADD_FILE(fw_info, dbgfs_dir, 0200);
 	FWRT_DEBUGFS_ADD_FILE(send_hcmd, dbgfs_dir, 0200);
 	FWRT_DEBUGFS_ADD_FILE(fw_dbg_domain, dbgfs_dir, 0400);
 }
-- 
cgit v1.2.3-59-g8ed1b


From 752d95a2bf6fe207d612fc1febd621df714f5dde Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Sat, 25 Apr 2020 13:04:49 +0300
Subject: iwlwifi: pcie: allocate much smaller byte-count table

The hardware needs a byte-count table with the size of each frame
on the queue to build A-MPDUs, but:
 * newer generation no longer have the duplicated space at the end,
   they can deal with the wrap properly - and we don't even fill
   the dup anyway
 * we have a maximum queue size of 512 right now and don't use the
   theoretical hardware maximum of 65536.

Together, this reduces the byte count table DMA allocation from
64KiB (65536*2 + 64*2 rounded up) to 1 KiB (though that might be
rounded up to a full 4 KiB page by the allocator, not sure it can
share the allocations.)

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200425130140.c263b787b5ab.I059507a9760b1ce1d45d84dcaa91629a5cfb58e0@changeid
---
 drivers/net/wireless/intel/iwlwifi/iwl-fh.h       |  7 +++----
 drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c | 24 +++++++++++++++--------
 2 files changed, 19 insertions(+), 12 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-fh.h b/drivers/net/wireless/intel/iwlwifi/iwl-fh.h
index bf673ce5f183..a8e988281eaf 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-fh.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-fh.h
@@ -7,7 +7,7 @@
  *
  * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2015 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2018 - 2020 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -29,7 +29,7 @@
  *
  * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2015 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2018 - 2020 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -646,8 +646,7 @@ struct iwl_rb_status {
 #define TFD_QUEUE_CB_SIZE(x)	(ilog2(x) - 3)
 #define TFD_QUEUE_SIZE_BC_DUP	(64)
 #define TFD_QUEUE_BC_SIZE	(TFD_QUEUE_SIZE_MAX + TFD_QUEUE_SIZE_BC_DUP)
-#define TFD_QUEUE_BC_SIZE_GEN3	(TFD_QUEUE_SIZE_MAX_GEN3 + \
-				 TFD_QUEUE_SIZE_BC_DUP)
+#define TFD_QUEUE_BC_SIZE_GEN3	512
 #define IWL_TX_DMA_MASK        DMA_BIT_MASK(36)
 #define IWL_NUM_OF_TBS		20
 #define IWL_TFH_NUM_TBS		25
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
index 53747ac945b8..84df12ff131a 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
@@ -6,7 +6,7 @@
  * GPL LICENSE SUMMARY
  *
  * Copyright(c) 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2018 - 2020 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -20,7 +20,7 @@
  * BSD LICENSE
  *
  * Copyright(c) 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2018 - 2020 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -1272,17 +1272,25 @@ int iwl_trans_pcie_dyn_txq_alloc_dma(struct iwl_trans *trans,
 				     struct iwl_txq **intxq, int size,
 				     unsigned int timeout)
 {
+	size_t bc_tbl_size, bc_tbl_entries;
+	struct iwl_txq *txq;
 	int ret;
 
-	struct iwl_txq *txq;
+	if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_AX210) {
+		bc_tbl_size = sizeof(struct iwl_gen3_bc_tbl);
+		bc_tbl_entries = bc_tbl_size / sizeof(u16);
+	} else {
+		bc_tbl_size = sizeof(struct iwlagn_scd_bc_tbl);
+		bc_tbl_entries = bc_tbl_size / sizeof(u16);
+	}
+
+	if (WARN_ON(size > bc_tbl_entries))
+		return -EINVAL;
+
 	txq = kzalloc(sizeof(*txq), GFP_KERNEL);
 	if (!txq)
 		return -ENOMEM;
-	ret = iwl_pcie_alloc_dma_ptr(trans, &txq->bc_tbl,
-				     (trans->trans_cfg->device_family >=
-				      IWL_DEVICE_FAMILY_AX210) ?
-				     sizeof(struct iwl_gen3_bc_tbl) :
-				     sizeof(struct iwlagn_scd_bc_tbl));
+	ret = iwl_pcie_alloc_dma_ptr(trans, &txq->bc_tbl, bc_tbl_size);
 	if (ret) {
 		IWL_ERR(trans, "Scheduler BC Table allocation failed\n");
 		kfree(txq);
-- 
cgit v1.2.3-59-g8ed1b


From 92f78d4b15a40f6b055be73bbecc2e79b10638f0 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Sat, 25 Apr 2020 13:04:50 +0300
Subject: iwlwifi: mvm: attempt to allocate smaller queues

We currently attempt to allocate queues that are 512 entries long,
but that requires 32 KiB memory, which may not be available, at
least not contiguously. If we fail to allocate, attempt to use a
smaller queue all the way down to 16 entries (which fit into a
single page).

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200425130140.c8548d7cc08a.I5059c410e628726cbce98d6311b690c632d00f97@changeid
---
 drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 23 +++++++++++++++--------
 1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
index ca5dcd7643e0..b36aef1d61b0 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
@@ -749,16 +749,23 @@ static int iwl_mvm_tvqm_enable_txq(struct iwl_mvm *mvm,
 		size = max_t(u32, IWL_MGMT_QUEUE_SIZE,
 			     mvm->trans->cfg->min_txq_size);
 	}
-	queue = iwl_trans_txq_alloc(mvm->trans,
-				    cpu_to_le16(TX_QUEUE_CFG_ENABLE_QUEUE),
-				    sta_id, tid, SCD_QUEUE_CFG, size, timeout);
 
-	if (queue < 0) {
-		IWL_DEBUG_TX_QUEUES(mvm,
-				    "Failed allocating TXQ for sta %d tid %d, ret: %d\n",
-				    sta_id, tid, queue);
+	do {
+		__le16 enable = cpu_to_le16(TX_QUEUE_CFG_ENABLE_QUEUE);
+
+		queue = iwl_trans_txq_alloc(mvm->trans, enable,
+					    sta_id, tid, SCD_QUEUE_CFG,
+					    size, timeout);
+
+		if (queue < 0)
+			IWL_DEBUG_TX_QUEUES(mvm,
+					    "Failed allocating TXQ of size %d for sta %d tid %d, ret: %d\n",
+					    size, sta_id, tid, queue);
+		size /= 2;
+	} while (queue < 0 && size >= 16);
+
+	if (queue < 0)
 		return queue;
-	}
 
 	IWL_DEBUG_TX_QUEUES(mvm, "Enabling TXQ #%d for sta %d tid %d\n",
 			    queue, sta_id, tid);
-- 
cgit v1.2.3-59-g8ed1b


From 281277b206f03f8e66e6d5c151d75ac9c15e066d Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Sat, 25 Apr 2020 13:04:51 +0300
Subject: iwlwifi: dbg: mark a variable __maybe_unused

If CONFIG_IWLWIFI_DEBUGFS is not set, the variable is assigned
but not checked, resulting in a compiler warning. Suppress it,
we need the variable for the debugfs-enabled case.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200425130140.485f886f5a6c.I8a91c560c26cced33b15d8419caebb53a9abcc2d@changeid
---
 drivers/net/wireless/intel/iwlwifi/fw/dbg.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
index 04de7688884d..2a271aad8106 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
@@ -2760,7 +2760,7 @@ void iwl_fw_dbg_stop_restart_recording(struct iwl_fw_runtime *fwrt,
 				       struct iwl_fw_dbg_params *params,
 				       bool stop)
 {
-	int ret = 0;
+	int ret __maybe_unused = 0;
 
 	if (test_bit(STATUS_FW_ERROR, &fwrt->trans->status))
 		return;
-- 
cgit v1.2.3-59-g8ed1b


From a8e82c36081e574c0c7bdc1b407a697465777069 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Sat, 25 Apr 2020 13:04:52 +0300
Subject: iwlwifi: pcie: remove some dead code

We can never get into this code with a gen2/3 device, and therefore
don't need to allocate the byte count tables in a single contiguous
DMA region. Just WARN and bail out if something is misconfigured.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200425130140.a748d33252ef.If2f5810016efb40b041f93fe8c6b4c251542e2f1@changeid
---
 drivers/net/wireless/intel/iwlwifi/pcie/tx.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
index 4582d418ba4d..9ff78bca460b 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
@@ -8,7 +8,7 @@
  * Copyright(c) 2003 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2018 - 2020 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of version 2 of the GNU General Public License as
@@ -31,7 +31,7 @@
  * Copyright(c) 2003 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2018 - 2020 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -954,10 +954,10 @@ static int iwl_pcie_tx_alloc(struct iwl_trans *trans)
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	u16 bc_tbls_size = trans->trans_cfg->base_params->num_of_queues;
 
-	bc_tbls_size *= (trans->trans_cfg->device_family >=
-			 IWL_DEVICE_FAMILY_AX210) ?
-		sizeof(struct iwl_gen3_bc_tbl) :
-		sizeof(struct iwlagn_scd_bc_tbl);
+	if (WARN_ON(trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_AX210))
+		return -EINVAL;
+
+	bc_tbls_size *= sizeof(struct iwlagn_scd_bc_tbl);
 
 	/*It is not allowed to alloc twice, so warn when this happens.
 	 * We cannot rely on the previous allocation, so free and fail */
-- 
cgit v1.2.3-59-g8ed1b


From c239feec5038585d8ba622ecc97322e0e2f3398c Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Sat, 25 Apr 2020 13:04:53 +0300
Subject: iwlwifi: pcie: gen2: use DMA pool for byte-count tables

Since the recent patch in this area, we no longer allocate 64k
for a single queue, but only 1k, which still means a full page.
Use a DMA pool to reduce this further, since we will have a lot
of queues in a typical system that can share pages.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200425130140.6e84c79aea30.Ie9a417132812d110ec1cc87852f101477c01cfcb@changeid
---
 drivers/net/wireless/intel/iwlwifi/pcie/internal.h |  1 +
 drivers/net/wireless/intel/iwlwifi/pcie/trans.c    | 19 +++++++++++++++++++
 drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c  | 11 ++++++++---
 3 files changed, 28 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
index abe649af689c..43f81204c152 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
@@ -556,6 +556,7 @@ struct iwl_trans_pcie {
 	u32 scd_base_addr;
 	struct iwl_dma_ptr scd_bc_tbls;
 	struct iwl_dma_ptr kw;
+	struct dma_pool *bc_pool;
 
 	struct iwl_txq *txq_memory;
 	struct iwl_txq *txq[IWL_MAX_TVQM_QUEUES];
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
index a0daae058c1c..8ccfc7cc7348 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
@@ -3672,6 +3672,25 @@ struct iwl_trans *iwl_trans_pcie_alloc(struct pci_dev *pdev,
 
 	init_waitqueue_head(&trans_pcie->sx_waitq);
 
+	/*
+	 * For gen2 devices, we use a single allocation for each byte-count
+	 * table, but they're pretty small (1k) so use a DMA pool that we
+	 * allocate here.
+	 */
+	if (cfg_trans->gen2) {
+		size_t bc_tbl_size;
+
+		if (cfg_trans->device_family >= IWL_DEVICE_FAMILY_AX210)
+			bc_tbl_size = sizeof(struct iwl_gen3_bc_tbl);
+		else
+			bc_tbl_size = sizeof(struct iwlagn_scd_bc_tbl);
+
+		trans_pcie->bc_pool = dmam_pool_create("iwlwifi:bc", &pdev->dev,
+						       bc_tbl_size, 256, 0);
+		if (!trans_pcie->bc_pool)
+			goto out_no_pci;
+	}
+
 	if (trans_pcie->msix_enabled) {
 		ret = iwl_pcie_init_msix_handler(pdev, trans_pcie);
 		if (ret)
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
index 84df12ff131a..bb55563bba68 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
@@ -1224,7 +1224,9 @@ void iwl_pcie_gen2_txq_free_memory(struct iwl_trans *trans,
 	}
 
 	kfree(txq->entries);
-	iwl_pcie_free_dma_ptr(trans, &txq->bc_tbl);
+	if (txq->bc_tbl.addr)
+		dma_pool_free(trans_pcie->bc_pool, txq->bc_tbl.addr,
+			      txq->bc_tbl.dma);
 	kfree(txq);
 }
 
@@ -1272,6 +1274,7 @@ int iwl_trans_pcie_dyn_txq_alloc_dma(struct iwl_trans *trans,
 				     struct iwl_txq **intxq, int size,
 				     unsigned int timeout)
 {
+	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	size_t bc_tbl_size, bc_tbl_entries;
 	struct iwl_txq *txq;
 	int ret;
@@ -1290,8 +1293,10 @@ int iwl_trans_pcie_dyn_txq_alloc_dma(struct iwl_trans *trans,
 	txq = kzalloc(sizeof(*txq), GFP_KERNEL);
 	if (!txq)
 		return -ENOMEM;
-	ret = iwl_pcie_alloc_dma_ptr(trans, &txq->bc_tbl, bc_tbl_size);
-	if (ret) {
+
+	txq->bc_tbl.addr = dma_pool_alloc(trans_pcie->bc_pool, GFP_KERNEL,
+					  &txq->bc_tbl.dma);
+	if (!txq->bc_tbl.addr) {
 		IWL_ERR(trans, "Scheduler BC Table allocation failed\n");
 		kfree(txq);
 		return -ENOMEM;
-- 
cgit v1.2.3-59-g8ed1b


From 3da1a4e3287094887397c083d1549b2d067fa1ed Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Sat, 25 Apr 2020 13:04:54 +0300
Subject: iwlwifi: use longer queues for 256-BA

When we have 256 block-ack support, we may need to be very fast
to provide a lot of frames to the hardware to transmit, but that
cannot be guaranteed. Use a longer queue size to have more time,
and the next possible queue size is 1024 since it must be a power
of two.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200425130140.851866c7e4c4.I13fa678929431f1694fd202c1da40aa476ab70fe@changeid
---
 drivers/net/wireless/intel/iwlwifi/cfg/22000.c | 2 +-
 drivers/net/wireless/intel/iwlwifi/iwl-fh.h    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
index 1fcc346ba425..1daa653bcb99 100644
--- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
+++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
@@ -211,7 +211,7 @@ static const struct iwl_ht_params iwl_22000_ht_params = {
 	.trans.base_params = &iwl_ax210_base_params,			\
 	.min_txq_size = 128,						\
 	.gp2_reg_addr = 0xd02c68,					\
-	.min_256_ba_txq_size = 512,					\
+	.min_256_ba_txq_size = 1024,					\
 	.mon_dram_regs = {						\
 		.write_ptr = {						\
 			.addr = DBGC_CUR_DBGBUF_STATUS,			\
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-fh.h b/drivers/net/wireless/intel/iwlwifi/iwl-fh.h
index a8e988281eaf..e77d8d13cb51 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-fh.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-fh.h
@@ -646,7 +646,7 @@ struct iwl_rb_status {
 #define TFD_QUEUE_CB_SIZE(x)	(ilog2(x) - 3)
 #define TFD_QUEUE_SIZE_BC_DUP	(64)
 #define TFD_QUEUE_BC_SIZE	(TFD_QUEUE_SIZE_MAX + TFD_QUEUE_SIZE_BC_DUP)
-#define TFD_QUEUE_BC_SIZE_GEN3	512
+#define TFD_QUEUE_BC_SIZE_GEN3	1024
 #define IWL_TX_DMA_MASK        DMA_BIT_MASK(36)
 #define IWL_NUM_OF_TBS		20
 #define IWL_TFH_NUM_TBS		25
-- 
cgit v1.2.3-59-g8ed1b


From e88e2cd0b80f2dfa18f72cfb7a3b8ab3d8d59894 Mon Sep 17 00:00:00 2001
From: Mordechay Goodstein <mordechay.goodstein@intel.com>
Date: Sat, 25 Apr 2020 13:04:55 +0300
Subject: iwlwifi: tx: enable A-MSDU in low latency mode

Tests have shown that we can meet low latency KPIs with A-MSDU
enabled so enable it to achieve max TPT.

Signed-off-by: Mordechay Goodstein <mordechay.goodstein@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200425130140.e469ce6501e4.Ibdecebca830bdfbf5220693dd1f5367f7736242d@changeid
---
 drivers/net/wireless/intel/iwlwifi/mvm/tx.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
index a8d0d17f79fd..2f6484e0d726 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/tx.c
@@ -8,7 +8,7 @@
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2018 - 2020 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -31,7 +31,7 @@
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2018 - 2020 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -920,11 +920,8 @@ static int iwl_mvm_tx_tso(struct iwl_mvm *mvm, struct sk_buff *skb,
 	 * No need to lock amsdu_in_ampdu_allowed since it can't be modified
 	 * during an BA session.
 	 */
-	if (info->flags & IEEE80211_TX_CTL_AMPDU &&
-	    !mvmsta->tid_data[tid].amsdu_in_ampdu_allowed)
-		return iwl_mvm_tx_tso_segment(skb, 1, netdev_flags, mpdus_skb);
-
-	if (iwl_mvm_vif_low_latency(iwl_mvm_vif_from_mac80211(mvmsta->vif)) ||
+	if ((info->flags & IEEE80211_TX_CTL_AMPDU &&
+	     !mvmsta->tid_data[tid].amsdu_in_ampdu_allowed) ||
 	    !(mvmsta->amsdu_enabled & BIT(tid)))
 		return iwl_mvm_tx_tso_segment(skb, 1, netdev_flags, mpdus_skb);
 
-- 
cgit v1.2.3-59-g8ed1b


From 3d1d87ab1a3a40c5fea3dde11d4532c4efd11093 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Sat, 25 Apr 2020 13:04:56 +0300
Subject: iwlwifi: mvm: don't transmit on unallocated queue

We can currently end up transmitting on an unallocated queue, if
the allocation fails. Stop doing that, by simply not transmitting.
We don't have any better strategy here, unfortunately, but the
previous commits make that much less likely.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200425130140.dcf1801f25ef.I6d71e13ea042765800f2ee41401b8eb282527c34@changeid
---
 drivers/net/wireless/intel/iwlwifi/mvm/sta.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
index b36aef1d61b0..44d4720b7629 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
@@ -1400,7 +1400,17 @@ void iwl_mvm_add_new_dqa_stream_wk(struct work_struct *wk)
 		if (tid == IEEE80211_NUM_TIDS)
 			tid = IWL_MAX_TID_COUNT;
 
-		iwl_mvm_sta_alloc_queue(mvm, txq->sta, txq->ac, tid);
+		/*
+		 * We can't really do much here, but if this fails we can't
+		 * transmit anyway - so just don't transmit the frame etc.
+		 * and let them back up ... we've tried our best to allocate
+		 * a queue in the function itself.
+		 */
+		if (iwl_mvm_sta_alloc_queue(mvm, txq->sta, txq->ac, tid)) {
+			list_del_init(&mvmtxq->list);
+			continue;
+		}
+
 		list_del_init(&mvmtxq->list);
 		local_bh_disable();
 		iwl_mvm_mac_itxq_xmit(mvm->hw, txq);
-- 
cgit v1.2.3-59-g8ed1b


From e9a7f025e72c919d7d05318acf887541266933b9 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Sat, 25 Apr 2020 13:04:57 +0300
Subject: iwlwifi: remove outdated copyright print/module statement

Remove the outdated copyright, don't print it, and update the
module author to actually be Intel, not Intel's copyright.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200425130140.dc86a4e9451a.Ice2e21b6427a4b57f953dba9ceb5b8b96b251a8c@changeid
---
 drivers/net/wireless/intel/iwlwifi/dvm/main.c | 2 +-
 drivers/net/wireless/intel/iwlwifi/iwl-drv.c  | 3 +--
 drivers/net/wireless/intel/iwlwifi/iwl-drv.h  | 7 +++----
 drivers/net/wireless/intel/iwlwifi/mvm/ops.c  | 2 +-
 4 files changed, 6 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/dvm/main.c b/drivers/net/wireless/intel/iwlwifi/dvm/main.c
index 99822744122f..b882705ff66d 100644
--- a/drivers/net/wireless/intel/iwlwifi/dvm/main.c
+++ b/drivers/net/wireless/intel/iwlwifi/dvm/main.c
@@ -52,7 +52,7 @@
 
 #define DRV_DESCRIPTION	"Intel(R) Wireless WiFi Link AGN driver for Linux"
 MODULE_DESCRIPTION(DRV_DESCRIPTION);
-MODULE_AUTHOR(DRV_COPYRIGHT " " DRV_AUTHOR);
+MODULE_AUTHOR(DRV_AUTHOR);
 MODULE_LICENSE("GPL");
 
 /* Please keep this array *SORTED* by hex value.
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
index f3148e70f85c..04f14bfdd091 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.c
@@ -85,7 +85,7 @@
 
 #define DRV_DESCRIPTION	"Intel(R) Wireless WiFi driver for Linux"
 MODULE_DESCRIPTION(DRV_DESCRIPTION);
-MODULE_AUTHOR(DRV_COPYRIGHT " " DRV_AUTHOR);
+MODULE_AUTHOR(DRV_AUTHOR);
 MODULE_LICENSE("GPL");
 
 #ifdef CONFIG_IWLWIFI_DEBUGFS
@@ -1774,7 +1774,6 @@ static int __init iwl_drv_init(void)
 		INIT_LIST_HEAD(&iwlwifi_opmode_table[i].drv);
 
 	pr_info(DRV_DESCRIPTION "\n");
-	pr_info(DRV_COPYRIGHT "\n");
 
 #ifdef CONFIG_IWLWIFI_DEBUGFS
 	/* Create the root of iwlwifi debugfs subsystem. */
diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-drv.h b/drivers/net/wireless/intel/iwlwifi/iwl-drv.h
index 2be30af7bdc3..8938a6467996 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-drv.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-drv.h
@@ -5,7 +5,7 @@
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2008 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2008 - 2014, 2020 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
  *
  * This program is free software; you can redistribute it and/or modify
@@ -26,7 +26,7 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2005 - 2014 Intel Corporation. All rights reserved.
+ * Copyright(c) 2005 - 2014, 2020 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
  * All rights reserved.
  *
@@ -63,8 +63,7 @@
 
 /* for all modules */
 #define DRV_NAME        "iwlwifi"
-#define DRV_COPYRIGHT	"Copyright(c) 2003- 2015 Intel Corporation"
-#define DRV_AUTHOR     "<linuxwifi@intel.com>"
+#define DRV_AUTHOR	"Intel Corporation <linuxwifi@intel.com>"
 
 /* radio config bits (actual values from NVM definition) */
 #define NVM_RF_CFG_DASH_MSK(x)   (x & 0x3)         /* bits 0-1   */
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
index bde1b9b5face..d095ff847be9 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ops.c
@@ -84,7 +84,7 @@
 
 #define DRV_DESCRIPTION	"The new Intel(R) wireless AGN driver for Linux"
 MODULE_DESCRIPTION(DRV_DESCRIPTION);
-MODULE_AUTHOR(DRV_COPYRIGHT " " DRV_AUTHOR);
+MODULE_AUTHOR(DRV_AUTHOR);
 MODULE_LICENSE("GPL");
 
 static const struct iwl_op_mode_ops iwl_mvm_ops;
-- 
cgit v1.2.3-59-g8ed1b


From b1c860f6ec73d993f0427e8d0d70c8f3d6625e6d Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Sat, 25 Apr 2020 13:04:58 +0300
Subject: iwlwifi: pcie: skip fragmented receive buffers

We don't really expect fragmented RBs, and don't seem to be seeing
them in practice since that would've caused a crash. Nevertheless,
we should be expecting the hardware to send them.

Parse the flag indicating a fragmented buffer, but then discard it
and any fragments thereof, at least for now. We need to do more
work in the higher layers to properly deal with this, since we may
not get "normal" firmware notifications that are fragmented, only
RX, and then we need to put it back together and add the necessary
API to report a chain of things to the higher layers, this doesn't
fit into the struct iwl_rx_cmd_buffer today.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200425130140.e78a59f70b1d.Ica656a98a4e4220d73edc97600edd680cbc97241@changeid
---
 drivers/net/wireless/intel/iwlwifi/pcie/internal.h |  4 ++-
 drivers/net/wireless/intel/iwlwifi/pcie/rx.c       | 32 ++++++++++++++++++----
 2 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
index 43f81204c152..b76c0396335a 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
@@ -189,6 +189,8 @@ struct iwl_rx_completion_desc {
  * @rb_stts_dma: bus address of receive buffer status
  * @lock:
  * @queue: actual rx queue. Not used for multi-rx queue.
+ * @next_rb_is_fragment: indicates that the previous RB that we handled set
+ *	the fragmented flag, so the next one is still another fragment
  *
  * NOTE:  rx_free and rx_used are used as a FIFO for iwl_rx_mem_buffers
  */
@@ -214,7 +216,7 @@ struct iwl_rxq {
 	u32 queue_size;
 	struct list_head rx_free;
 	struct list_head rx_used;
-	bool need_update;
+	bool need_update, next_rb_is_fragment;
 	void *rb_stts;
 	dma_addr_t rb_stts_dma;
 	spinlock_t lock;
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
index 8c29071cb415..72d1cf27e6a4 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
@@ -1427,7 +1427,8 @@ static void iwl_pcie_rx_handle_rb(struct iwl_trans *trans,
 }
 
 static struct iwl_rx_mem_buffer *iwl_pcie_get_rxb(struct iwl_trans *trans,
-						  struct iwl_rxq *rxq, int i)
+						  struct iwl_rxq *rxq, int i,
+						  bool *join)
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	struct iwl_rx_mem_buffer *rxb;
@@ -1441,10 +1442,12 @@ static struct iwl_rx_mem_buffer *iwl_pcie_get_rxb(struct iwl_trans *trans,
 		return rxb;
 	}
 
-	if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_AX210)
+	if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_AX210) {
 		vid = le16_to_cpu(rxq->cd[i].rbid);
-	else
+		*join = rxq->cd[i].flags & IWL_RX_CD_FLAGS_FRAGMENTED;
+	} else {
 		vid = le32_to_cpu(rxq->bd_32[i]) & 0x0FFF; /* 12-bit VID */
+	}
 
 	if (!vid || vid > RX_POOL_SIZE(trans_pcie->num_rx_bufs))
 		goto out_err;
@@ -1502,6 +1505,7 @@ restart:
 		u32 rb_pending_alloc =
 			atomic_read(&trans_pcie->rba.req_pending) *
 			RX_CLAIM_REQ_ALLOC;
+		bool join = false;
 
 		if (unlikely(rb_pending_alloc >= rxq->queue_size / 2 &&
 			     !emergency)) {
@@ -1514,11 +1518,29 @@ restart:
 
 		IWL_DEBUG_RX(trans, "Q %d: HW = %d, SW = %d\n", rxq->id, r, i);
 
-		rxb = iwl_pcie_get_rxb(trans, rxq, i);
+		rxb = iwl_pcie_get_rxb(trans, rxq, i, &join);
 		if (!rxb)
 			goto out;
 
-		iwl_pcie_rx_handle_rb(trans, rxq, rxb, emergency, i);
+		if (unlikely(join || rxq->next_rb_is_fragment)) {
+			rxq->next_rb_is_fragment = join;
+			/*
+			 * We can only get a multi-RB in the following cases:
+			 *  - firmware issue, sending a too big notification
+			 *  - sniffer mode with a large A-MSDU
+			 *  - large MTU frames (>2k)
+			 * since the multi-RB functionality is limited to newer
+			 * hardware that cannot put multiple entries into a
+			 * single RB.
+			 *
+			 * Right now, the higher layers aren't set up to deal
+			 * with that, so discard all of these.
+			 */
+			list_add_tail(&rxb->list, &rxq->rx_free);
+			rxq->free_count++;
+		} else {
+			iwl_pcie_rx_handle_rb(trans, rxq, rxb, emergency, i);
+		}
 
 		i = (i + 1) & (rxq->queue_size - 1);
 
-- 
cgit v1.2.3-59-g8ed1b


From d3d2674cc6ff10e89bfbace7b943c09fb9052cca Mon Sep 17 00:00:00 2001
From: Liad Kaufman <liad.kaufman@intel.com>
Date: Sat, 25 Apr 2020 13:04:59 +0300
Subject: iwlwifi: dbg_ini: differentiate ax210 hw with same hw type

There are several "flavors" of HW that have the same HW type, but
can be told apart after reading a certain perph register. This
is easy to do in runtime, but more complicated to do when looking
at the logs offline.

To make it easier to tell apart these "flavors" when looking at
the dumped dbg info, add these bits to the HW type, allowing
simple differentiation.

Signed-off-by: Liad Kaufman <liad.kaufman@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200425130140.330ea11d17ae.Ie59b25430a308090b15112ac6deedf4fbf487ff1@changeid
---
 drivers/net/wireless/intel/iwlwifi/fw/dbg.c        | 22 +++++++++++++++++++++-
 drivers/net/wireless/intel/iwlwifi/fw/error-dump.h | 13 +++++++++++--
 2 files changed, 32 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
index 2a271aad8106..4d3687cc83a4 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/dbg.c
@@ -1958,6 +1958,7 @@ static u32 iwl_dump_ini_info(struct iwl_fw_runtime *fwrt,
 	struct iwl_fw_ini_dump_cfg_name *cfg_name;
 	u32 size = sizeof(*tlv) + sizeof(*dump);
 	u32 num_of_cfg_names = 0;
+	u32 hw_type;
 
 	list_for_each_entry(node, &fwrt->trans->dbg.debug_info_tlv_list, list) {
 		size += sizeof(*cfg_name);
@@ -1986,7 +1987,26 @@ static u32 iwl_dump_ini_info(struct iwl_fw_runtime *fwrt,
 	dump->ver_subtype = cpu_to_le32(fwrt->dump.fw_ver.subtype);
 
 	dump->hw_step = cpu_to_le32(CSR_HW_REV_STEP(fwrt->trans->hw_rev));
-	dump->hw_type = cpu_to_le32(CSR_HW_REV_TYPE(fwrt->trans->hw_rev));
+
+	/*
+	 * Several HWs all have type == 0x42, so we'll override this value
+	 * according to the detected HW
+	 */
+	hw_type = CSR_HW_REV_TYPE(fwrt->trans->hw_rev);
+	if (hw_type == IWL_AX210_HW_TYPE) {
+		u32 prph_val = iwl_read_prph(fwrt->trans, WFPM_OTP_CFG1_ADDR);
+		u32 is_jacket = !!(prph_val & WFPM_OTP_CFG1_IS_JACKET_BIT);
+		u32 is_cdb = !!(prph_val & WFPM_OTP_CFG1_IS_CDB_BIT);
+		u32 masked_bits = is_jacket | (is_cdb << 1);
+
+		/*
+		 * The HW type depends on certain bits in this case, so add
+		 * these bits to the HW type. We won't have collisions since we
+		 * add these bits after the highest possible bit in the mask.
+		 */
+		hw_type |= masked_bits << IWL_AX210_HW_TYPE_ADDITION_SHIFT;
+	}
+	dump->hw_type = cpu_to_le32(hw_type);
 
 	dump->rf_id_flavor =
 		cpu_to_le32(CSR_HW_RFID_FLAVOR(fwrt->trans->hw_rf_id));
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/error-dump.h b/drivers/net/wireless/intel/iwlwifi/fw/error-dump.h
index f008e1bbfdf4..72bfc64580ab 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/error-dump.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/error-dump.h
@@ -8,7 +8,7 @@
  * Copyright(c) 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2014 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2019 Intel Corporation
+ * Copyright (C) 2018 - 2020 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -31,7 +31,7 @@
  * Copyright(c) 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2014 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2019 Intel Corporation
+ * Copyright (C) 2018 - 2020 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -394,6 +394,15 @@ struct iwl_fw_ini_dump_cfg_name {
 	u8 cfg_name[IWL_FW_INI_MAX_CFG_NAME];
 } __packed;
 
+/* AX210's HW type */
+#define IWL_AX210_HW_TYPE 0x42
+/* How many bits to roll when adding to the HW type of AX210 HW */
+#define IWL_AX210_HW_TYPE_ADDITION_SHIFT 12
+/* This prph is used to tell apart HW_TYPE == 0x42 NICs */
+#define WFPM_OTP_CFG1_ADDR 0xd03098
+#define WFPM_OTP_CFG1_IS_JACKET_BIT BIT(4)
+#define WFPM_OTP_CFG1_IS_CDB_BIT BIT(5)
+
 /* struct iwl_fw_ini_dump_info - ini dump information
  * @version: dump version
  * @time_point: time point that caused the dump collection
-- 
cgit v1.2.3-59-g8ed1b


From 33181bb8e8fe947e1f8020a4b103601a4cac94d9 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Fri, 8 May 2020 10:46:08 -0700
Subject: selftests/bpf: Generalize helpers to control background listener

Move the following routines that let us start a background listener
thread and connect to a server by fd to the test_prog:
* start_server - socket+bind+listen
* connect_to_fd - connect to the server identified by fd

These will be used in the next commit.

Also, extend these helpers to support AF_INET6 and accept the family
as an argument.

v5:
* drop pthread.h (Martin KaFai Lau)
* add SO_SNDTIMEO (Martin KaFai Lau)

v4:
* export extra helper to start server without a thread (Martin KaFai Lau)
* tcp_rtt is no longer starting background thread (Martin KaFai Lau)

v2:
* put helpers into network_helpers.c (Andrii Nakryiko)

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrey Ignatov <rdna@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20200508174611.228805-2-sdf@google.com
---
 tools/testing/selftests/bpf/Makefile             |   2 +-
 tools/testing/selftests/bpf/network_helpers.c    |  93 ++++++++++++++++++
 tools/testing/selftests/bpf/network_helpers.h    |  10 ++
 tools/testing/selftests/bpf/prog_tests/tcp_rtt.c | 116 +----------------------
 4 files changed, 108 insertions(+), 113 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/network_helpers.c
 create mode 100644 tools/testing/selftests/bpf/network_helpers.h

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 3d942be23d09..8f25966b500b 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -354,7 +354,7 @@ endef
 TRUNNER_TESTS_DIR := prog_tests
 TRUNNER_BPF_PROGS_DIR := progs
 TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c	\
-			 flow_dissector_load.h
+			 network_helpers.c flow_dissector_load.h
 TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read				\
 		       $(wildcard progs/btf_dump_test_case_*.c)
 TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
new file mode 100644
index 000000000000..0073dddb72fd
--- /dev/null
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include <errno.h>
+#include <stdbool.h>
+#include <stdio.h>
+#include <string.h>
+#include <unistd.h>
+#include <linux/err.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+
+#include "network_helpers.h"
+
+#define clean_errno() (errno == 0 ? "None" : strerror(errno))
+#define log_err(MSG, ...) fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
+	__FILE__, __LINE__, clean_errno(), ##__VA_ARGS__)
+
+int start_server(int family, int type)
+{
+	struct sockaddr_storage addr = {};
+	socklen_t len;
+	int fd;
+
+	if (family == AF_INET) {
+		struct sockaddr_in *sin = (void *)&addr;
+
+		sin->sin_family = AF_INET;
+		len = sizeof(*sin);
+	} else {
+		struct sockaddr_in6 *sin6 = (void *)&addr;
+
+		sin6->sin6_family = AF_INET6;
+		len = sizeof(*sin6);
+	}
+
+	fd = socket(family, type | SOCK_NONBLOCK, 0);
+	if (fd < 0) {
+		log_err("Failed to create server socket");
+		return -1;
+	}
+
+	if (bind(fd, (const struct sockaddr *)&addr, len) < 0) {
+		log_err("Failed to bind socket");
+		close(fd);
+		return -1;
+	}
+
+	if (type == SOCK_STREAM) {
+		if (listen(fd, 1) < 0) {
+			log_err("Failed to listed on socket");
+			close(fd);
+			return -1;
+		}
+	}
+
+	return fd;
+}
+
+static const struct timeval timeo_sec = { .tv_sec = 3 };
+static const size_t timeo_optlen = sizeof(timeo_sec);
+
+int connect_to_fd(int family, int type, int server_fd)
+{
+	struct sockaddr_storage addr;
+	socklen_t len = sizeof(addr);
+	int fd;
+
+	fd = socket(family, type, 0);
+	if (fd < 0) {
+		log_err("Failed to create client socket");
+		return -1;
+	}
+
+	if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeo_sec, timeo_optlen)) {
+		log_err("Failed to set SO_RCVTIMEO");
+		goto out;
+	}
+
+	if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
+		log_err("Failed to get server addr");
+		goto out;
+	}
+
+	if (connect(fd, (const struct sockaddr *)&addr, len) < 0) {
+		log_err("Fail to connect to server with family %d", family);
+		goto out;
+	}
+
+	return fd;
+
+out:
+	close(fd);
+	return -1;
+}
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
new file mode 100644
index 000000000000..30068eacc1a2
--- /dev/null
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -0,0 +1,10 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef __NETWORK_HELPERS_H
+#define __NETWORK_HELPERS_H
+#include <sys/socket.h>
+#include <sys/types.h>
+
+int start_server(int family, int type);
+int connect_to_fd(int family, int type, int server_fd);
+
+#endif
diff --git a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
index e56b52ab41da..9013a0c01eed 100644
--- a/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
+++ b/tools/testing/selftests/bpf/prog_tests/tcp_rtt.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
 #include "cgroup_helpers.h"
+#include "network_helpers.h"
 
 struct tcp_rtt_storage {
 	__u32 invoked;
@@ -87,34 +88,6 @@ static int verify_sk(int map_fd, int client_fd, const char *msg, __u32 invoked,
 	return err;
 }
 
-static int connect_to_server(int server_fd)
-{
-	struct sockaddr_storage addr;
-	socklen_t len = sizeof(addr);
-	int fd;
-
-	fd = socket(AF_INET, SOCK_STREAM, 0);
-	if (fd < 0) {
-		log_err("Failed to create client socket");
-		return -1;
-	}
-
-	if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
-		log_err("Failed to get server addr");
-		goto out;
-	}
-
-	if (connect(fd, (const struct sockaddr *)&addr, len) < 0) {
-		log_err("Fail to connect to server");
-		goto out;
-	}
-
-	return fd;
-
-out:
-	close(fd);
-	return -1;
-}
 
 static int run_test(int cgroup_fd, int server_fd)
 {
@@ -145,7 +118,7 @@ static int run_test(int cgroup_fd, int server_fd)
 		goto close_bpf_object;
 	}
 
-	client_fd = connect_to_server(server_fd);
+	client_fd = connect_to_fd(AF_INET, SOCK_STREAM, server_fd);
 	if (client_fd < 0) {
 		err = -1;
 		goto close_bpf_object;
@@ -180,103 +153,22 @@ close_bpf_object:
 	return err;
 }
 
-static int start_server(void)
-{
-	struct sockaddr_in addr = {
-		.sin_family = AF_INET,
-		.sin_addr.s_addr = htonl(INADDR_LOOPBACK),
-	};
-	int fd;
-
-	fd = socket(AF_INET, SOCK_STREAM | SOCK_NONBLOCK, 0);
-	if (fd < 0) {
-		log_err("Failed to create server socket");
-		return -1;
-	}
-
-	if (bind(fd, (const struct sockaddr *)&addr, sizeof(addr)) < 0) {
-		log_err("Failed to bind socket");
-		close(fd);
-		return -1;
-	}
-
-	return fd;
-}
-
-static pthread_mutex_t server_started_mtx = PTHREAD_MUTEX_INITIALIZER;
-static pthread_cond_t server_started = PTHREAD_COND_INITIALIZER;
-static volatile bool server_done = false;
-
-static void *server_thread(void *arg)
-{
-	struct sockaddr_storage addr;
-	socklen_t len = sizeof(addr);
-	int fd = *(int *)arg;
-	int client_fd;
-	int err;
-
-	err = listen(fd, 1);
-
-	pthread_mutex_lock(&server_started_mtx);
-	pthread_cond_signal(&server_started);
-	pthread_mutex_unlock(&server_started_mtx);
-
-	if (CHECK_FAIL(err < 0)) {
-		perror("Failed to listed on socket");
-		return ERR_PTR(err);
-	}
-
-	while (true) {
-		client_fd = accept(fd, (struct sockaddr *)&addr, &len);
-		if (client_fd == -1 && errno == EAGAIN) {
-			usleep(50);
-			continue;
-		}
-		break;
-	}
-	if (CHECK_FAIL(client_fd < 0)) {
-		perror("Failed to accept client");
-		return ERR_PTR(err);
-	}
-
-	while (!server_done)
-		usleep(50);
-
-	close(client_fd);
-
-	return NULL;
-}
-
 void test_tcp_rtt(void)
 {
 	int server_fd, cgroup_fd;
-	pthread_t tid;
-	void *server_res;
 
 	cgroup_fd = test__join_cgroup("/tcp_rtt");
 	if (CHECK_FAIL(cgroup_fd < 0))
 		return;
 
-	server_fd = start_server();
+	server_fd = start_server(AF_INET, SOCK_STREAM);
 	if (CHECK_FAIL(server_fd < 0))
 		goto close_cgroup_fd;
 
-	if (CHECK_FAIL(pthread_create(&tid, NULL, server_thread,
-				      (void *)&server_fd)))
-		goto close_server_fd;
-
-	pthread_mutex_lock(&server_started_mtx);
-	pthread_cond_wait(&server_started, &server_started_mtx);
-	pthread_mutex_unlock(&server_started_mtx);
-
 	CHECK_FAIL(run_test(cgroup_fd, server_fd));
 
-	server_done = true;
-	CHECK_FAIL(pthread_join(tid, &server_res));
-	CHECK_FAIL(IS_ERR(server_res));
-
-close_server_fd:
 	close(server_fd);
+
 close_cgroup_fd:
 	close(cgroup_fd);
 }
-- 
cgit v1.2.3-59-g8ed1b


From 488a23b89d175cc78f352417114f4f5a10470722 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Fri, 8 May 2020 10:46:09 -0700
Subject: selftests/bpf: Move existing common networking parts into
 network_helpers

1. Move pkt_v4 and pkt_v6 into network_helpers and adjust the users.
2. Copy-paste spin_lock_thread into two tests that use it.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Acked-by: Andrey Ignatov <rdna@fb.com>
Link: https://lore.kernel.org/bpf/20200508174611.228805-3-sdf@google.com
---
 tools/testing/selftests/bpf/network_helpers.c      | 17 ++++++++++++
 tools/testing/selftests/bpf/network_helpers.h      | 29 +++++++++++++++++++++
 .../selftests/bpf/prog_tests/fexit_bpf2bpf.c       |  1 +
 .../selftests/bpf/prog_tests/flow_dissector.c      |  1 +
 .../bpf/prog_tests/flow_dissector_load_bytes.c     |  1 +
 .../testing/selftests/bpf/prog_tests/global_data.c |  1 +
 tools/testing/selftests/bpf/prog_tests/kfree_skb.c |  1 +
 tools/testing/selftests/bpf/prog_tests/l4lb_all.c  |  1 +
 tools/testing/selftests/bpf/prog_tests/map_lock.c  | 14 ++++++++++
 .../testing/selftests/bpf/prog_tests/pkt_access.c  |  1 +
 .../selftests/bpf/prog_tests/pkt_md_access.c       |  1 +
 .../selftests/bpf/prog_tests/prog_run_xattr.c      |  1 +
 .../selftests/bpf/prog_tests/queue_stack_map.c     |  1 +
 .../selftests/bpf/prog_tests/signal_pending.c      |  1 +
 tools/testing/selftests/bpf/prog_tests/skb_ctx.c   |  1 +
 tools/testing/selftests/bpf/prog_tests/spinlock.c  | 14 ++++++++++
 tools/testing/selftests/bpf/prog_tests/xdp.c       |  1 +
 .../selftests/bpf/prog_tests/xdp_adjust_tail.c     |  1 +
 .../testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c |  1 +
 .../selftests/bpf/prog_tests/xdp_noinline.c        |  1 +
 tools/testing/selftests/bpf/test_progs.c           | 30 ----------------------
 tools/testing/selftests/bpf/test_progs.h           | 23 -----------------
 22 files changed, 90 insertions(+), 53 deletions(-)

diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index 0073dddb72fd..0ff64b70b746 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -14,6 +14,23 @@
 #define log_err(MSG, ...) fprintf(stderr, "(%s:%d: errno: %s) " MSG "\n", \
 	__FILE__, __LINE__, clean_errno(), ##__VA_ARGS__)
 
+struct ipv4_packet pkt_v4 = {
+	.eth.h_proto = __bpf_constant_htons(ETH_P_IP),
+	.iph.ihl = 5,
+	.iph.protocol = IPPROTO_TCP,
+	.iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
+	.tcp.urg_ptr = 123,
+	.tcp.doff = 5,
+};
+
+struct ipv6_packet pkt_v6 = {
+	.eth.h_proto = __bpf_constant_htons(ETH_P_IPV6),
+	.iph.nexthdr = IPPROTO_TCP,
+	.iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
+	.tcp.urg_ptr = 123,
+	.tcp.doff = 5,
+};
+
 int start_server(int family, int type)
 {
 	struct sockaddr_storage addr = {};
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
index 30068eacc1a2..a0be7db4f67d 100644
--- a/tools/testing/selftests/bpf/network_helpers.h
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -3,6 +3,35 @@
 #define __NETWORK_HELPERS_H
 #include <sys/socket.h>
 #include <sys/types.h>
+#include <linux/types.h>
+typedef __u16 __sum16;
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <netinet/tcp.h>
+#include <bpf/bpf_endian.h>
+
+#define MAGIC_VAL 0x1234
+#define NUM_ITER 100000
+#define VIP_NUM 5
+#define MAGIC_BYTES 123
+
+/* ipv4 test vector */
+struct ipv4_packet {
+	struct ethhdr eth;
+	struct iphdr iph;
+	struct tcphdr tcp;
+} __packed;
+extern struct ipv4_packet pkt_v4;
+
+/* ipv6 test vector */
+struct ipv6_packet {
+	struct ethhdr eth;
+	struct ipv6hdr iph;
+	struct tcphdr tcp;
+} __packed;
+extern struct ipv6_packet pkt_v6;
 
 int start_server(int family, int type);
 int connect_to_fd(int family, int type, int server_fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
index c2642517e1d8..a895bfed55db 100644
--- a/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/fexit_bpf2bpf.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2019 Facebook */
 #include <test_progs.h>
+#include <network_helpers.h>
 
 static void test_fexit_bpf2bpf_common(const char *obj_file,
 				      const char *target_obj_file,
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
index 92563898867c..2301c4d3ecec 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
+#include <network_helpers.h>
 #include <error.h>
 #include <linux/if.h>
 #include <linux/if_tun.h>
diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c
index dc5ef155ec28..0e8a4d2f023d 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_load_bytes.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
+#include <network_helpers.h>
 
 void test_flow_dissector_load_bytes(void)
 {
diff --git a/tools/testing/selftests/bpf/prog_tests/global_data.c b/tools/testing/selftests/bpf/prog_tests/global_data.c
index c680926fce73..e3cb62b0a110 100644
--- a/tools/testing/selftests/bpf/prog_tests/global_data.c
+++ b/tools/testing/selftests/bpf/prog_tests/global_data.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
+#include <network_helpers.h>
 
 static void test_global_data_number(struct bpf_object *obj, __u32 duration)
 {
diff --git a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
index 7507c8f689bc..42c3a3103c26 100644
--- a/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
+++ b/tools/testing/selftests/bpf/prog_tests/kfree_skb.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
+#include <network_helpers.h>
 
 struct meta {
 	int ifindex;
diff --git a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
index eaf64595be88..c2d373e294bb 100644
--- a/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
+++ b/tools/testing/selftests/bpf/prog_tests/l4lb_all.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
+#include <network_helpers.h>
 
 static void test_l4lb(const char *file)
 {
diff --git a/tools/testing/selftests/bpf/prog_tests/map_lock.c b/tools/testing/selftests/bpf/prog_tests/map_lock.c
index 8f91f1881d11..ce17b1ed8709 100644
--- a/tools/testing/selftests/bpf/prog_tests/map_lock.c
+++ b/tools/testing/selftests/bpf/prog_tests/map_lock.c
@@ -1,5 +1,19 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
+#include <network_helpers.h>
+
+static void *spin_lock_thread(void *arg)
+{
+	__u32 duration, retval;
+	int err, prog_fd = *(u32 *) arg;
+
+	err = bpf_prog_test_run(prog_fd, 10000, &pkt_v4, sizeof(pkt_v4),
+				NULL, NULL, &retval, &duration);
+	CHECK(err || retval, "",
+	      "err %d errno %d retval %d duration %d\n",
+	      err, errno, retval, duration);
+	pthread_exit(arg);
+}
 
 static void *parallel_map_access(void *arg)
 {
diff --git a/tools/testing/selftests/bpf/prog_tests/pkt_access.c b/tools/testing/selftests/bpf/prog_tests/pkt_access.c
index a2537dfa899c..44b514fabccd 100644
--- a/tools/testing/selftests/bpf/prog_tests/pkt_access.c
+++ b/tools/testing/selftests/bpf/prog_tests/pkt_access.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
+#include <network_helpers.h>
 
 void test_pkt_access(void)
 {
diff --git a/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c b/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c
index 5f7aea605019..939015cd6dba 100644
--- a/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c
+++ b/tools/testing/selftests/bpf/prog_tests/pkt_md_access.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
+#include <network_helpers.h>
 
 void test_pkt_md_access(void)
 {
diff --git a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c b/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
index 5dd89b941f53..dde2b7ae7bc9 100644
--- a/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
+++ b/tools/testing/selftests/bpf/prog_tests/prog_run_xattr.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
+#include <network_helpers.h>
 
 void test_prog_run_xattr(void)
 {
diff --git a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
index faccc66f4e39..f47e7b1cb32c 100644
--- a/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
+++ b/tools/testing/selftests/bpf/prog_tests/queue_stack_map.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
+#include <network_helpers.h>
 
 enum {
 	QUEUE,
diff --git a/tools/testing/selftests/bpf/prog_tests/signal_pending.c b/tools/testing/selftests/bpf/prog_tests/signal_pending.c
index 996e808f43a2..dfcbddcbe4d3 100644
--- a/tools/testing/selftests/bpf/prog_tests/signal_pending.c
+++ b/tools/testing/selftests/bpf/prog_tests/signal_pending.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
+#include <network_helpers.h>
 
 static void sigalrm_handler(int s) {}
 static struct sigaction sigalrm_action = {
diff --git a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
index 4538bd08203f..7021b92af313 100644
--- a/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
+++ b/tools/testing/selftests/bpf/prog_tests/skb_ctx.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
+#include <network_helpers.h>
 
 void test_skb_ctx(void)
 {
diff --git a/tools/testing/selftests/bpf/prog_tests/spinlock.c b/tools/testing/selftests/bpf/prog_tests/spinlock.c
index 1ae00cd3174e..7577a77a4c4c 100644
--- a/tools/testing/selftests/bpf/prog_tests/spinlock.c
+++ b/tools/testing/selftests/bpf/prog_tests/spinlock.c
@@ -1,5 +1,19 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
+#include <network_helpers.h>
+
+static void *spin_lock_thread(void *arg)
+{
+	__u32 duration, retval;
+	int err, prog_fd = *(u32 *) arg;
+
+	err = bpf_prog_test_run(prog_fd, 10000, &pkt_v4, sizeof(pkt_v4),
+				NULL, NULL, &retval, &duration);
+	CHECK(err || retval, "",
+	      "err %d errno %d retval %d duration %d\n",
+	      err, errno, retval, duration);
+	pthread_exit(arg);
+}
 
 void test_spinlock(void)
 {
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp.c b/tools/testing/selftests/bpf/prog_tests/xdp.c
index dcb5ecac778e..48921ff74850 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
+#include <network_helpers.h>
 
 void test_xdp(void)
 {
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
index 3744196d7cba..6c8ca1c93f9b 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
+#include <network_helpers.h>
 
 void test_xdp_adjust_tail(void)
 {
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
index a0f688c37023..2c6c570b21f8 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_bpf2bpf.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
+#include <network_helpers.h>
 #include <net/if.h>
 #include "test_xdp.skel.h"
 #include "test_xdp_bpf2bpf.skel.h"
diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
index c9404e6b226e..f284f72158ef 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_noinline.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <test_progs.h>
+#include <network_helpers.h>
 
 void test_xdp_noinline(void)
 {
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 93970ec1c9e9..0f411fdc4f6d 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -222,23 +222,6 @@ int test__join_cgroup(const char *path)
 	return fd;
 }
 
-struct ipv4_packet pkt_v4 = {
-	.eth.h_proto = __bpf_constant_htons(ETH_P_IP),
-	.iph.ihl = 5,
-	.iph.protocol = IPPROTO_TCP,
-	.iph.tot_len = __bpf_constant_htons(MAGIC_BYTES),
-	.tcp.urg_ptr = 123,
-	.tcp.doff = 5,
-};
-
-struct ipv6_packet pkt_v6 = {
-	.eth.h_proto = __bpf_constant_htons(ETH_P_IPV6),
-	.iph.nexthdr = IPPROTO_TCP,
-	.iph.payload_len = __bpf_constant_htons(MAGIC_BYTES),
-	.tcp.urg_ptr = 123,
-	.tcp.doff = 5,
-};
-
 int bpf_find_map(const char *test, struct bpf_object *obj, const char *name)
 {
 	struct bpf_map *map;
@@ -358,19 +341,6 @@ err:
 	return -1;
 }
 
-void *spin_lock_thread(void *arg)
-{
-	__u32 duration, retval;
-	int err, prog_fd = *(u32 *) arg;
-
-	err = bpf_prog_test_run(prog_fd, 10000, &pkt_v4, sizeof(pkt_v4),
-				NULL, NULL, &retval, &duration);
-	CHECK(err || retval, "",
-	      "err %d errno %d retval %d duration %d\n",
-	      err, errno, retval, duration);
-	pthread_exit(arg);
-}
-
 /* extern declarations for test funcs */
 #define DEFINE_TEST(name) extern void test_##name(void);
 #include <prog_tests/tests.h>
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index 10188cc8e9e0..83287c76332b 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -87,24 +87,6 @@ extern void test__skip(void);
 extern void test__fail(void);
 extern int test__join_cgroup(const char *path);
 
-#define MAGIC_BYTES 123
-
-/* ipv4 test vector */
-struct ipv4_packet {
-	struct ethhdr eth;
-	struct iphdr iph;
-	struct tcphdr tcp;
-} __packed;
-extern struct ipv4_packet pkt_v4;
-
-/* ipv6 test vector */
-struct ipv6_packet {
-	struct ethhdr eth;
-	struct ipv6hdr iph;
-	struct tcphdr tcp;
-} __packed;
-extern struct ipv6_packet pkt_v6;
-
 #define PRINT_FAIL(format...)                                                  \
 	({                                                                     \
 		test__fail();                                                  \
@@ -143,10 +125,6 @@ extern struct ipv6_packet pkt_v6;
 #define CHECK_ATTR(condition, tag, format...) \
 	_CHECK(condition, tag, tattr.duration, format)
 
-#define MAGIC_VAL 0x1234
-#define NUM_ITER 100000
-#define VIP_NUM 5
-
 static inline __u64 ptr_to_u64(const void *ptr)
 {
 	return (__u64) (unsigned long) ptr;
@@ -156,7 +134,6 @@ int bpf_find_map(const char *test, struct bpf_object *obj, const char *name);
 int compare_map_keys(int map1_fd, int map2_fd);
 int compare_stack_ips(int smap_fd, int amap_fd, int stack_trace_len);
 int extract_build_id(char *build_id, size_t size);
-void *spin_lock_thread(void *arg);
 
 #ifdef __x86_64__
 #define SYS_NANOSLEEP_KPROBE_NAME "__x64_sys_nanosleep"
-- 
cgit v1.2.3-59-g8ed1b


From cb0721c7e200750907bb8ef59b12646a5cb2dadf Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Fri, 8 May 2020 10:46:10 -0700
Subject: net: Refactor arguments of inet{,6}_bind

The intent is to add an additional bind parameter in the next commit.
Instead of adding another argument, let's convert all existing
flag arguments into an extendable bit field.

No functional changes.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrey Ignatov <rdna@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20200508174611.228805-4-sdf@google.com
---
 include/net/inet_common.h |  6 +++++-
 include/net/ipv6_stubs.h  |  2 +-
 net/core/filter.c         |  6 ++++--
 net/ipv4/af_inet.c        | 10 +++++-----
 net/ipv6/af_inet6.c       | 10 +++++-----
 5 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/include/net/inet_common.h b/include/net/inet_common.h
index ae2ba897675c..c38f4f7d660a 100644
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@@ -35,8 +35,12 @@ int inet_shutdown(struct socket *sock, int how);
 int inet_listen(struct socket *sock, int backlog);
 void inet_sock_destruct(struct sock *sk);
 int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
+/* Don't allocate port at this moment, defer to connect. */
+#define BIND_FORCE_ADDRESS_NO_PORT	(1 << 0)
+/* Grab and release socket lock. */
+#define BIND_WITH_LOCK			(1 << 1)
 int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
-		bool force_bind_address_no_port, bool with_lock);
+		u32 flags);
 int inet_getname(struct socket *sock, struct sockaddr *uaddr,
 		 int peer);
 int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
diff --git a/include/net/ipv6_stubs.h b/include/net/ipv6_stubs.h
index a5f7c12c326a..6e622dd3122e 100644
--- a/include/net/ipv6_stubs.h
+++ b/include/net/ipv6_stubs.h
@@ -63,7 +63,7 @@ extern const struct ipv6_stub *ipv6_stub __read_mostly;
 /* A stub used by bpf helpers. Similarly ugly as ipv6_stub */
 struct ipv6_bpf_stub {
 	int (*inet6_bind)(struct sock *sk, struct sockaddr *uaddr, int addr_len,
-			  bool force_bind_address_no_port, bool with_lock);
+			  u32 flags);
 	struct sock *(*udp6_lib_lookup)(struct net *net,
 				     const struct in6_addr *saddr, __be16 sport,
 				     const struct in6_addr *daddr, __be16 dport,
diff --git a/net/core/filter.c b/net/core/filter.c
index dfaf5df13722..fa9ddab5dd1f 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4538,7 +4538,8 @@ BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern *, ctx, struct sockaddr *, addr,
 			return err;
 		if (((struct sockaddr_in *)addr)->sin_port != htons(0))
 			return err;
-		return __inet_bind(sk, addr, addr_len, true, false);
+		return __inet_bind(sk, addr, addr_len,
+				   BIND_FORCE_ADDRESS_NO_PORT);
 #if IS_ENABLED(CONFIG_IPV6)
 	} else if (addr->sa_family == AF_INET6) {
 		if (addr_len < SIN6_LEN_RFC2133)
@@ -4548,7 +4549,8 @@ BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern *, ctx, struct sockaddr *, addr,
 		/* ipv6_bpf_stub cannot be NULL, since it's called from
 		 * bpf_cgroup_inet6_connect hook and ipv6 is already loaded
 		 */
-		return ipv6_bpf_stub->inet6_bind(sk, addr, addr_len, true, false);
+		return ipv6_bpf_stub->inet6_bind(sk, addr, addr_len,
+						 BIND_FORCE_ADDRESS_NO_PORT);
 #endif /* CONFIG_IPV6 */
 	}
 #endif /* CONFIG_INET */
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 6177c4ba0037..68e74b1b0f26 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -450,12 +450,12 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	if (err)
 		return err;
 
-	return __inet_bind(sk, uaddr, addr_len, false, true);
+	return __inet_bind(sk, uaddr, addr_len, BIND_WITH_LOCK);
 }
 EXPORT_SYMBOL(inet_bind);
 
 int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
-		bool force_bind_address_no_port, bool with_lock)
+		u32 flags)
 {
 	struct sockaddr_in *addr = (struct sockaddr_in *)uaddr;
 	struct inet_sock *inet = inet_sk(sk);
@@ -506,7 +506,7 @@ int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
 	 *      would be illegal to use them (multicast/broadcast) in
 	 *      which case the sending device address is used.
 	 */
-	if (with_lock)
+	if (flags & BIND_WITH_LOCK)
 		lock_sock(sk);
 
 	/* Check these errors (active socket, double bind). */
@@ -520,7 +520,7 @@ int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
 
 	/* Make sure we are allowed to bind here. */
 	if (snum || !(inet->bind_address_no_port ||
-		      force_bind_address_no_port)) {
+		      (flags & BIND_FORCE_ADDRESS_NO_PORT))) {
 		if (sk->sk_prot->get_port(sk, snum)) {
 			inet->inet_saddr = inet->inet_rcv_saddr = 0;
 			err = -EADDRINUSE;
@@ -543,7 +543,7 @@ int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
 	sk_dst_reset(sk);
 	err = 0;
 out_release_sock:
-	if (with_lock)
+	if (flags & BIND_WITH_LOCK)
 		release_sock(sk);
 out:
 	return err;
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 345baa0a754f..552c2592b81c 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -273,7 +273,7 @@ out_rcu_unlock:
 }
 
 static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
-			bool force_bind_address_no_port, bool with_lock)
+			u32 flags)
 {
 	struct sockaddr_in6 *addr = (struct sockaddr_in6 *)uaddr;
 	struct inet_sock *inet = inet_sk(sk);
@@ -297,7 +297,7 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
 	    !ns_capable(net->user_ns, CAP_NET_BIND_SERVICE))
 		return -EACCES;
 
-	if (with_lock)
+	if (flags & BIND_WITH_LOCK)
 		lock_sock(sk);
 
 	/* Check these errors (active socket, double bind). */
@@ -400,7 +400,7 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
 
 	/* Make sure we are allowed to bind here. */
 	if (snum || !(inet->bind_address_no_port ||
-		      force_bind_address_no_port)) {
+		      (flags & BIND_FORCE_ADDRESS_NO_PORT))) {
 		if (sk->sk_prot->get_port(sk, snum)) {
 			sk->sk_ipv6only = saved_ipv6only;
 			inet_reset_saddr(sk);
@@ -423,7 +423,7 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
 	inet->inet_dport = 0;
 	inet->inet_daddr = 0;
 out:
-	if (with_lock)
+	if (flags & BIND_WITH_LOCK)
 		release_sock(sk);
 	return err;
 out_unlock:
@@ -451,7 +451,7 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len)
 	if (err)
 		return err;
 
-	return __inet6_bind(sk, uaddr, addr_len, false, true);
+	return __inet6_bind(sk, uaddr, addr_len, BIND_WITH_LOCK);
 }
 EXPORT_SYMBOL(inet6_bind);
 
-- 
cgit v1.2.3-59-g8ed1b


From 8086fbaf49345f988deec539ec8e182b02914401 Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Fri, 8 May 2020 10:46:11 -0700
Subject: bpf: Allow any port in bpf_bind helper

We want to have a tighter control on what ports we bind to in
the BPF_CGROUP_INET{4,6}_CONNECT hooks even if it means
connect() becomes slightly more expensive. The expensive part
comes from the fact that we now need to call inet_csk_get_port()
that verifies that the port is not used and allocates an entry
in the hash table for it.

Since we can't rely on "snum || !bind_address_no_port" to prevent
us from calling POST_BIND hook anymore, let's add another bind flag
to indicate that the call site is BPF program.

v5:
* fix wrong AF_INET (should be AF_INET6) in the bpf program for v6

v3:
* More bpf_bind documentation refinements (Martin KaFai Lau)
* Add UDP tests as well (Martin KaFai Lau)
* Don't start the thread, just do socket+bind+listen (Martin KaFai Lau)

v2:
* Update documentation (Andrey Ignatov)
* Pass BIND_FORCE_ADDRESS_NO_PORT conditionally (Andrey Ignatov)

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrey Ignatov <rdna@fb.com>
Acked-by: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20200508174611.228805-5-sdf@google.com
---
 include/net/inet_common.h                          |   2 +
 include/uapi/linux/bpf.h                           |   9 +-
 net/core/filter.c                                  |  18 ++--
 net/ipv4/af_inet.c                                 |  10 +-
 net/ipv6/af_inet6.c                                |  12 ++-
 tools/include/uapi/linux/bpf.h                     |   9 +-
 .../selftests/bpf/prog_tests/connect_force_port.c  | 115 +++++++++++++++++++++
 .../selftests/bpf/progs/connect_force_port4.c      |  28 +++++
 .../selftests/bpf/progs/connect_force_port6.c      |  28 +++++
 9 files changed, 203 insertions(+), 28 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/connect_force_port.c
 create mode 100644 tools/testing/selftests/bpf/progs/connect_force_port4.c
 create mode 100644 tools/testing/selftests/bpf/progs/connect_force_port6.c

diff --git a/include/net/inet_common.h b/include/net/inet_common.h
index c38f4f7d660a..cb2818862919 100644
--- a/include/net/inet_common.h
+++ b/include/net/inet_common.h
@@ -39,6 +39,8 @@ int inet_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
 #define BIND_FORCE_ADDRESS_NO_PORT	(1 << 0)
 /* Grab and release socket lock. */
 #define BIND_WITH_LOCK			(1 << 1)
+/* Called from BPF program. */
+#define BIND_FROM_BPF			(1 << 2)
 int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
 		u32 flags);
 int inet_getname(struct socket *sock, struct sockaddr *uaddr,
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index b3643e27e264..6e5e7caa3739 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1994,10 +1994,11 @@ union bpf_attr {
  *
  * 		This helper works for IPv4 and IPv6, TCP and UDP sockets. The
  * 		domain (*addr*\ **->sa_family**) must be **AF_INET** (or
- * 		**AF_INET6**). Looking for a free port to bind to can be
- * 		expensive, therefore binding to port is not permitted by the
- * 		helper: *addr*\ **->sin_port** (or **sin6_port**, respectively)
- * 		must be set to zero.
+ * 		**AF_INET6**). It's advised to pass zero port (**sin_port**
+ * 		or **sin6_port**) which triggers IP_BIND_ADDRESS_NO_PORT-like
+ * 		behavior and lets the kernel efficiently pick up an unused
+ * 		port as long as 4-tuple is unique. Passing non-zero port might
+ * 		lead to degraded performance.
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
diff --git a/net/core/filter.c b/net/core/filter.c
index fa9ddab5dd1f..da0634979f53 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4525,32 +4525,28 @@ BPF_CALL_3(bpf_bind, struct bpf_sock_addr_kern *, ctx, struct sockaddr *, addr,
 {
 #ifdef CONFIG_INET
 	struct sock *sk = ctx->sk;
+	u32 flags = BIND_FROM_BPF;
 	int err;
 
-	/* Binding to port can be expensive so it's prohibited in the helper.
-	 * Only binding to IP is supported.
-	 */
 	err = -EINVAL;
 	if (addr_len < offsetofend(struct sockaddr, sa_family))
 		return err;
 	if (addr->sa_family == AF_INET) {
 		if (addr_len < sizeof(struct sockaddr_in))
 			return err;
-		if (((struct sockaddr_in *)addr)->sin_port != htons(0))
-			return err;
-		return __inet_bind(sk, addr, addr_len,
-				   BIND_FORCE_ADDRESS_NO_PORT);
+		if (((struct sockaddr_in *)addr)->sin_port == htons(0))
+			flags |= BIND_FORCE_ADDRESS_NO_PORT;
+		return __inet_bind(sk, addr, addr_len, flags);
 #if IS_ENABLED(CONFIG_IPV6)
 	} else if (addr->sa_family == AF_INET6) {
 		if (addr_len < SIN6_LEN_RFC2133)
 			return err;
-		if (((struct sockaddr_in6 *)addr)->sin6_port != htons(0))
-			return err;
+		if (((struct sockaddr_in6 *)addr)->sin6_port == htons(0))
+			flags |= BIND_FORCE_ADDRESS_NO_PORT;
 		/* ipv6_bpf_stub cannot be NULL, since it's called from
 		 * bpf_cgroup_inet6_connect hook and ipv6 is already loaded
 		 */
-		return ipv6_bpf_stub->inet6_bind(sk, addr, addr_len,
-						 BIND_FORCE_ADDRESS_NO_PORT);
+		return ipv6_bpf_stub->inet6_bind(sk, addr, addr_len, flags);
 #endif /* CONFIG_IPV6 */
 	}
 #endif /* CONFIG_INET */
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index 68e74b1b0f26..fcf0d12a407a 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -526,10 +526,12 @@ int __inet_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
 			err = -EADDRINUSE;
 			goto out_release_sock;
 		}
-		err = BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk);
-		if (err) {
-			inet->inet_saddr = inet->inet_rcv_saddr = 0;
-			goto out_release_sock;
+		if (!(flags & BIND_FROM_BPF)) {
+			err = BPF_CGROUP_RUN_PROG_INET4_POST_BIND(sk);
+			if (err) {
+				inet->inet_saddr = inet->inet_rcv_saddr = 0;
+				goto out_release_sock;
+			}
 		}
 	}
 
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 552c2592b81c..771a462a8322 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -407,11 +407,13 @@ static int __inet6_bind(struct sock *sk, struct sockaddr *uaddr, int addr_len,
 			err = -EADDRINUSE;
 			goto out;
 		}
-		err = BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk);
-		if (err) {
-			sk->sk_ipv6only = saved_ipv6only;
-			inet_reset_saddr(sk);
-			goto out;
+		if (!(flags & BIND_FROM_BPF)) {
+			err = BPF_CGROUP_RUN_PROG_INET6_POST_BIND(sk);
+			if (err) {
+				sk->sk_ipv6only = saved_ipv6only;
+				inet_reset_saddr(sk);
+				goto out;
+			}
 		}
 	}
 
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index b3643e27e264..6e5e7caa3739 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1994,10 +1994,11 @@ union bpf_attr {
  *
  * 		This helper works for IPv4 and IPv6, TCP and UDP sockets. The
  * 		domain (*addr*\ **->sa_family**) must be **AF_INET** (or
- * 		**AF_INET6**). Looking for a free port to bind to can be
- * 		expensive, therefore binding to port is not permitted by the
- * 		helper: *addr*\ **->sin_port** (or **sin6_port**, respectively)
- * 		must be set to zero.
+ * 		**AF_INET6**). It's advised to pass zero port (**sin_port**
+ * 		or **sin6_port**) which triggers IP_BIND_ADDRESS_NO_PORT-like
+ * 		behavior and lets the kernel efficiently pick up an unused
+ * 		port as long as 4-tuple is unique. Passing non-zero port might
+ * 		lead to degraded performance.
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
diff --git a/tools/testing/selftests/bpf/prog_tests/connect_force_port.c b/tools/testing/selftests/bpf/prog_tests/connect_force_port.c
new file mode 100644
index 000000000000..47fbb20cb6a6
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/connect_force_port.c
@@ -0,0 +1,115 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <test_progs.h>
+#include "cgroup_helpers.h"
+#include "network_helpers.h"
+
+static int verify_port(int family, int fd, int expected)
+{
+	struct sockaddr_storage addr;
+	socklen_t len = sizeof(addr);
+	__u16 port;
+
+	if (getsockname(fd, (struct sockaddr *)&addr, &len)) {
+		log_err("Failed to get server addr");
+		return -1;
+	}
+
+	if (family == AF_INET)
+		port = ((struct sockaddr_in *)&addr)->sin_port;
+	else
+		port = ((struct sockaddr_in6 *)&addr)->sin6_port;
+
+	if (ntohs(port) != expected) {
+		log_err("Unexpected port %d, expected %d", ntohs(port),
+			expected);
+		return -1;
+	}
+
+	return 0;
+}
+
+static int run_test(int cgroup_fd, int server_fd, int family, int type)
+{
+	struct bpf_prog_load_attr attr = {
+		.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+	};
+	struct bpf_object *obj;
+	int expected_port;
+	int prog_fd;
+	int err;
+	int fd;
+
+	if (family == AF_INET) {
+		attr.file = "./connect_force_port4.o";
+		attr.expected_attach_type = BPF_CGROUP_INET4_CONNECT;
+		expected_port = 22222;
+	} else {
+		attr.file = "./connect_force_port6.o";
+		attr.expected_attach_type = BPF_CGROUP_INET6_CONNECT;
+		expected_port = 22223;
+	}
+
+	err = bpf_prog_load_xattr(&attr, &obj, &prog_fd);
+	if (err) {
+		log_err("Failed to load BPF object");
+		return -1;
+	}
+
+	err = bpf_prog_attach(prog_fd, cgroup_fd, attr.expected_attach_type,
+			      0);
+	if (err) {
+		log_err("Failed to attach BPF program");
+		goto close_bpf_object;
+	}
+
+	fd = connect_to_fd(family, type, server_fd);
+	if (fd < 0) {
+		err = -1;
+		goto close_bpf_object;
+	}
+
+	err = verify_port(family, fd, expected_port);
+
+	close(fd);
+
+close_bpf_object:
+	bpf_object__close(obj);
+	return err;
+}
+
+void test_connect_force_port(void)
+{
+	int server_fd, cgroup_fd;
+
+	cgroup_fd = test__join_cgroup("/connect_force_port");
+	if (CHECK_FAIL(cgroup_fd < 0))
+		return;
+
+	server_fd = start_server(AF_INET, SOCK_STREAM);
+	if (CHECK_FAIL(server_fd < 0))
+		goto close_cgroup_fd;
+	CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET, SOCK_STREAM));
+	close(server_fd);
+
+	server_fd = start_server(AF_INET6, SOCK_STREAM);
+	if (CHECK_FAIL(server_fd < 0))
+		goto close_cgroup_fd;
+	CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET6, SOCK_STREAM));
+	close(server_fd);
+
+	server_fd = start_server(AF_INET, SOCK_DGRAM);
+	if (CHECK_FAIL(server_fd < 0))
+		goto close_cgroup_fd;
+	CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET, SOCK_DGRAM));
+	close(server_fd);
+
+	server_fd = start_server(AF_INET6, SOCK_DGRAM);
+	if (CHECK_FAIL(server_fd < 0))
+		goto close_cgroup_fd;
+	CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET6, SOCK_DGRAM));
+	close(server_fd);
+
+close_cgroup_fd:
+	close(cgroup_fd);
+}
diff --git a/tools/testing/selftests/bpf/progs/connect_force_port4.c b/tools/testing/selftests/bpf/progs/connect_force_port4.c
new file mode 100644
index 000000000000..1b8eb34b2db0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/connect_force_port4.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <sys/socket.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+int _version SEC("version") = 1;
+
+SEC("cgroup/connect4")
+int _connect4(struct bpf_sock_addr *ctx)
+{
+	struct sockaddr_in sa = {};
+
+	sa.sin_family = AF_INET;
+	sa.sin_port = bpf_htons(22222);
+	sa.sin_addr.s_addr = bpf_htonl(0x7f000001); /* 127.0.0.1 */
+
+	if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0)
+		return 0;
+
+	return 1;
+}
diff --git a/tools/testing/selftests/bpf/progs/connect_force_port6.c b/tools/testing/selftests/bpf/progs/connect_force_port6.c
new file mode 100644
index 000000000000..ae6f7d750b4c
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/connect_force_port6.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <string.h>
+
+#include <linux/bpf.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <sys/socket.h>
+
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+char _license[] SEC("license") = "GPL";
+int _version SEC("version") = 1;
+
+SEC("cgroup/connect6")
+int _connect6(struct bpf_sock_addr *ctx)
+{
+	struct sockaddr_in6 sa = {};
+
+	sa.sin6_family = AF_INET6;
+	sa.sin6_port = bpf_htons(22223);
+	sa.sin6_addr.s6_addr32[3] = bpf_htonl(1); /* ::1 */
+
+	if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0)
+		return 0;
+
+	return 1;
+}
-- 
cgit v1.2.3-59-g8ed1b


From cf86a086a18095e33e0637cb78cda1fcf5280852 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Thu, 7 May 2020 18:58:10 -0700
Subject: net/dst: use a smaller percpu_counter batch for dst entries
 accounting

percpu_counter_add() uses a default batch size which is quite big
on platforms with 256 cpus. (2*256 -> 512)

This means dst_entries_get_fast() can be off by +/- 2*(nr_cpus^2)
(131072 on servers with 256 cpus)

Reduce the batch size to something more reasonable, and
add logic to ip6_dst_gc() to call dst_entries_get_slow()
before calling the _very_ expensive fib6_run_gc() function.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/dst_ops.h | 4 +++-
 net/core/dst.c        | 8 ++++----
 net/ipv6/route.c      | 3 +++
 3 files changed, 10 insertions(+), 5 deletions(-)

diff --git a/include/net/dst_ops.h b/include/net/dst_ops.h
index 443863c7b8da..88ff7bb2bb9b 100644
--- a/include/net/dst_ops.h
+++ b/include/net/dst_ops.h
@@ -53,9 +53,11 @@ static inline int dst_entries_get_slow(struct dst_ops *dst)
 	return percpu_counter_sum_positive(&dst->pcpuc_entries);
 }
 
+#define DST_PERCPU_COUNTER_BATCH 32
 static inline void dst_entries_add(struct dst_ops *dst, int val)
 {
-	percpu_counter_add(&dst->pcpuc_entries, val);
+	percpu_counter_add_batch(&dst->pcpuc_entries, val,
+				 DST_PERCPU_COUNTER_BATCH);
 }
 
 static inline int dst_entries_init(struct dst_ops *dst)
diff --git a/net/core/dst.c b/net/core/dst.c
index 193af526e908..d6b6ced0d451 100644
--- a/net/core/dst.c
+++ b/net/core/dst.c
@@ -81,11 +81,11 @@ void *dst_alloc(struct dst_ops *ops, struct net_device *dev,
 {
 	struct dst_entry *dst;
 
-	if (ops->gc && dst_entries_get_fast(ops) > ops->gc_thresh) {
+	if (ops->gc &&
+	    !(flags & DST_NOCOUNT) &&
+	    dst_entries_get_fast(ops) > ops->gc_thresh) {
 		if (ops->gc(ops)) {
-			printk_ratelimited(KERN_NOTICE "Route cache is full: "
-					   "consider increasing sysctl "
-					   "net.ipv[4|6].route.max_size.\n");
+			pr_notice_ratelimited("Route cache is full: consider increasing sysctl net.ipv6.route.max_size.\n");
 			return NULL;
 		}
 	}
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 1ff142393c76..a9072dba00f4 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3195,6 +3195,9 @@ static int ip6_dst_gc(struct dst_ops *ops)
 	int entries;
 
 	entries = dst_entries_get_fast(ops);
+	if (entries > rt_max_size)
+		entries = dst_entries_get_slow(ops);
+
 	if (time_after(rt_last_gc + rt_min_interval, jiffies) &&
 	    entries <= rt_max_size)
 		goto out;
-- 
cgit v1.2.3-59-g8ed1b


From 3712c1c2ef2a0cbca633d150e568deb8f63a6f29 Mon Sep 17 00:00:00 2001
From: Yang Yingliang <yangyingliang@huawei.com>
Date: Fri, 8 May 2020 11:52:08 +0800
Subject: ieee802154: 6lowpan: remove unnecessary comparison

The type of dispatch is u8 which is always '<=' 0xff, so the
dispatch <= 0xff is always true, we can remove this comparison.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Yang Yingliang <yangyingliang@huawei.com>
Signed-off-by: Stefan Schmidt <stefan@datenfreihafen.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ieee802154/6lowpan/rx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ieee802154/6lowpan/rx.c b/net/ieee802154/6lowpan/rx.c
index ee179380a766..b34d050c9687 100644
--- a/net/ieee802154/6lowpan/rx.c
+++ b/net/ieee802154/6lowpan/rx.c
@@ -240,7 +240,7 @@ static inline bool lowpan_is_reserved(u8 dispatch)
 	return ((dispatch >= 0x44 && dispatch <= 0x4F) ||
 		(dispatch >= 0x51 && dispatch <= 0x5F) ||
 		(dispatch >= 0xc8 && dispatch <= 0xdf) ||
-		(dispatch >= 0xe8 && dispatch <= 0xff));
+		dispatch >= 0xe8);
 }
 
 /* lowpan_rx_h_check checks on generic 6LoWPAN requirements
-- 
cgit v1.2.3-59-g8ed1b


From da384effb1609b1333d475e9d4b318913b3d194b Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Fri, 8 May 2020 10:11:14 +0000
Subject: net: dsa: vsc73xx: convert to devm_platform_ioremap_resource

Use the helper function that wraps the calls to platform_get_resource()
and devm_ioremap_resource() together.

Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/dsa/vitesse-vsc73xx-platform.c | 10 +---------
 1 file changed, 1 insertion(+), 9 deletions(-)

diff --git a/drivers/net/dsa/vitesse-vsc73xx-platform.c b/drivers/net/dsa/vitesse-vsc73xx-platform.c
index 0541785f9fee..5e54a5726aa4 100644
--- a/drivers/net/dsa/vitesse-vsc73xx-platform.c
+++ b/drivers/net/dsa/vitesse-vsc73xx-platform.c
@@ -89,7 +89,6 @@ static int vsc73xx_platform_probe(struct platform_device *pdev)
 {
 	struct device *dev = &pdev->dev;
 	struct vsc73xx_platform *vsc_platform;
-	struct resource *res = NULL;
 	int ret;
 
 	vsc_platform = devm_kzalloc(dev, sizeof(*vsc_platform), GFP_KERNEL);
@@ -103,14 +102,7 @@ static int vsc73xx_platform_probe(struct platform_device *pdev)
 	vsc_platform->vsc.ops = &vsc73xx_platform_ops;
 
 	/* obtain I/O memory space */
-	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!res) {
-		dev_err(&pdev->dev, "cannot obtain I/O memory space\n");
-		ret = -ENXIO;
-		return ret;
-	}
-
-	vsc_platform->base_addr = devm_ioremap_resource(&pdev->dev, res);
+	vsc_platform->base_addr = devm_platform_ioremap_resource(pdev, 0);
 	if (IS_ERR(vsc_platform->base_addr)) {
 		dev_err(&pdev->dev, "cannot request I/O memory space\n");
 		ret = -ENXIO;
-- 
cgit v1.2.3-59-g8ed1b


From b41d272eda6da5a0d6cad76c3ba18952b32f5ce9 Mon Sep 17 00:00:00 2001
From: Samuel Zou <zou_wei@huawei.com>
Date: Fri, 8 May 2020 20:00:55 +0800
Subject: net: dsa: sja1105: remove set but not used variable 'prev_time'
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes gcc '-Wunused-but-set-variable' warning:

drivers/net/dsa/sja1105/sja1105_vl.c:468:6: warning: variable ‘prev_time’ set but not used [-Wunused-but-set-variable]
  u32 prev_time = 0;
      ^~~~~~~~~

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Samuel Zou <zou_wei@huawei.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Tested-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/dsa/sja1105/sja1105_vl.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_vl.c b/drivers/net/dsa/sja1105/sja1105_vl.c
index b52f1af6e7e7..aa9b0b92f437 100644
--- a/drivers/net/dsa/sja1105/sja1105_vl.c
+++ b/drivers/net/dsa/sja1105/sja1105_vl.c
@@ -465,7 +465,6 @@ sja1105_gating_cfg_time_to_interval(struct sja1105_gating_config *gating_cfg,
 	struct sja1105_gate_entry *last_e;
 	struct sja1105_gate_entry *e;
 	struct list_head *prev;
-	u32 prev_time = 0;
 
 	list_for_each_entry(e, &gating_cfg->entries, list) {
 		struct sja1105_gate_entry *p;
@@ -476,7 +475,6 @@ sja1105_gating_cfg_time_to_interval(struct sja1105_gating_config *gating_cfg,
 			continue;
 
 		p = list_entry(prev, struct sja1105_gate_entry, list);
-		prev_time = e->interval;
 		p->interval = e->interval - p->interval;
 	}
 	last_e = list_last_entry(&gating_cfg->entries,
-- 
cgit v1.2.3-59-g8ed1b


From d8882935fcae28bceb5f6f56f09cded8d36d85e6 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Fri, 8 May 2020 07:34:14 -0700
Subject: ipv6: use DST_NOCOUNT in ip6_rt_pcpu_alloc()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

We currently have to adjust ipv6 route gc_thresh/max_size depending
on number of cpus on a server, this makes very little sense.

If the kernels sets /proc/sys/net/ipv6/route/gc_thresh to 1024
and /proc/sys/net/ipv6/route/max_size to 4096, then we better
not track the percpu dst that our implementation uses.

Only routes not added (directly or indirectly) by the admin
should be tracked and limited.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Martin KaFai Lau <kafai@fb.com>
Cc: David Ahern <dsahern@kernel.org>
Cc: Maciej Żenczykowski <maze@google.com>
Acked-by: Wei Wang <weiwan@google.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ipv6/route.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a9072dba00f4..4292653af533 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -1377,7 +1377,7 @@ static struct rt6_info *ip6_rt_pcpu_alloc(const struct fib6_result *res)
 
 	rcu_read_lock();
 	dev = ip6_rt_get_dev_rcu(res);
-	pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags);
+	pcpu_rt = ip6_dst_alloc(dev_net(dev), dev, flags | DST_NOCOUNT);
 	rcu_read_unlock();
 	if (!pcpu_rt) {
 		fib6_info_release(f6i);
-- 
cgit v1.2.3-59-g8ed1b


From b5c3babb188fd0ccd4d3dceaa83b225474b6aa70 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Fri, 8 May 2020 12:41:33 -0700
Subject: net: ipa: Remove ipa_endpoint_stop{,_rx_dma} again

When building arm64 allyesconfig:

drivers/net/ipa/ipa_endpoint.c: In function 'ipa_endpoint_stop_rx_dma':
drivers/net/ipa/ipa_endpoint.c:1274:13: error: 'IPA_ENDPOINT_STOP_RX_SIZE' undeclared (first use in this function)
drivers/net/ipa/ipa_endpoint.c:1274:13: note: each undeclared identifier is reported only once for each function it appears in
drivers/net/ipa/ipa_endpoint.c:1289:2: error: implicit declaration of function 'ipa_cmd_dma_task_32b_addr_add' [-Werror=implicit-function-declaration]
drivers/net/ipa/ipa_endpoint.c:1291:45: error: 'ENDPOINT_STOP_DMA_TIMEOUT' undeclared (first use in this function)
drivers/net/ipa/ipa_endpoint.c: In function 'ipa_endpoint_stop':
drivers/net/ipa/ipa_endpoint.c:1309:16: error: 'IPA_ENDPOINT_STOP_RX_RETRIES' undeclared (first use in this function)

These functions were removed in a series, merged in as
commit 33395f4a5c1b ("Merge branch 'net-ipa-kill-endpoint-stop-workaround'").

Remove them again so that the build works properly.

Fixes: 3793faad7b5b ("Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net")
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Reviewed-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ipa/ipa_endpoint.c | 61 ------------------------------------------
 1 file changed, 61 deletions(-)

diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c
index 5fec30e542cb..82066a223a67 100644
--- a/drivers/net/ipa/ipa_endpoint.c
+++ b/drivers/net/ipa/ipa_endpoint.c
@@ -1269,67 +1269,6 @@ static void ipa_endpoint_reset(struct ipa_endpoint *endpoint)
 			ret, endpoint->channel_id, endpoint->endpoint_id);
 }
 
-static int ipa_endpoint_stop_rx_dma(struct ipa *ipa)
-{
-	u16 size = IPA_ENDPOINT_STOP_RX_SIZE;
-	struct gsi_trans *trans;
-	dma_addr_t addr;
-	int ret;
-
-	trans = ipa_cmd_trans_alloc(ipa, 1);
-	if (!trans) {
-		dev_err(&ipa->pdev->dev,
-			"no transaction for RX endpoint STOP workaround\n");
-		return -EBUSY;
-	}
-
-	/* Read into the highest part of the zero memory area */
-	addr = ipa->zero_addr + ipa->zero_size - size;
-
-	ipa_cmd_dma_task_32b_addr_add(trans, size, addr, false);
-
-	ret = gsi_trans_commit_wait_timeout(trans, ENDPOINT_STOP_DMA_TIMEOUT);
-	if (ret)
-		gsi_trans_free(trans);
-
-	return ret;
-}
-
-/**
- * ipa_endpoint_stop() - Stops a GSI channel in IPA
- * @client:	Client whose endpoint should be stopped
- *
- * This function implements the sequence to stop a GSI channel
- * in IPA. This function returns when the channel is is STOP state.
- *
- * Return value: 0 on success, negative otherwise
- */
-int ipa_endpoint_stop(struct ipa_endpoint *endpoint)
-{
-	u32 retries = IPA_ENDPOINT_STOP_RX_RETRIES;
-	int ret;
-
-	do {
-		struct ipa *ipa = endpoint->ipa;
-		struct gsi *gsi = &ipa->gsi;
-
-		ret = gsi_channel_stop(gsi, endpoint->channel_id);
-		if (ret != -EAGAIN || endpoint->toward_ipa)
-			break;
-
-		/* For IPA v3.5.1, send a DMA read task and check again */
-		if (ipa->version == IPA_VERSION_3_5_1) {
-			ret = ipa_endpoint_stop_rx_dma(ipa);
-			if (ret)
-				break;
-		}
-
-		msleep(1);
-	} while (retries--);
-
-	return retries ? ret : -EIO;
-}
-
 static void ipa_endpoint_program(struct ipa_endpoint *endpoint)
 {
 	if (endpoint->toward_ipa) {
-- 
cgit v1.2.3-59-g8ed1b


From e031ce80d9f9867ab7c27b9acaadd75c62d6007a Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Fri, 8 May 2020 23:30:02 +0200
Subject: r8169: add helper r8168g_wait_ll_share_fifo_ready

Create a helper for this waiting function, name of the helper is
borrowed from the vendor driver. In addition don't return in the two
hw_init functions if the first wait runs into a timeout, there's no
benefit in doing so.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/realtek/r8169_main.c | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index f06dbc9a046d..ba67864c04c1 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -5055,6 +5055,11 @@ DECLARE_RTL_COND(rtl_link_list_ready_cond)
 	return RTL_R8(tp, MCU) & LINK_LIST_RDY;
 }
 
+static void r8168g_wait_ll_share_fifo_ready(struct rtl8169_private *tp)
+{
+	rtl_loop_wait_high(tp, &rtl_link_list_ready_cond, 100, 42);
+}
+
 DECLARE_RTL_COND(rtl_rxtx_empty_cond)
 {
 	return (RTL_R8(tp, MCU) & RXTX_EMPTY) == RXTX_EMPTY;
@@ -5139,13 +5144,10 @@ static void rtl_hw_init_8168g(struct rtl8169_private *tp)
 	RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB);
 
 	r8168_mac_ocp_modify(tp, 0xe8de, BIT(14), 0);
-
-	if (!rtl_loop_wait_high(tp, &rtl_link_list_ready_cond, 100, 42))
-		return;
+	r8168g_wait_ll_share_fifo_ready(tp);
 
 	r8168_mac_ocp_modify(tp, 0xe8de, 0, BIT(15));
-
-	rtl_loop_wait_high(tp, &rtl_link_list_ready_cond, 100, 42);
+	r8168g_wait_ll_share_fifo_ready(tp);
 }
 
 static void rtl_hw_init_8125(struct rtl8169_private *tp)
@@ -5160,15 +5162,12 @@ static void rtl_hw_init_8125(struct rtl8169_private *tp)
 	RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB);
 
 	r8168_mac_ocp_modify(tp, 0xe8de, BIT(14), 0);
-
-	if (!rtl_loop_wait_high(tp, &rtl_link_list_ready_cond, 100, 42))
-		return;
+	r8168g_wait_ll_share_fifo_ready(tp);
 
 	r8168_mac_ocp_write(tp, 0xc0aa, 0x07d0);
 	r8168_mac_ocp_write(tp, 0xc0a6, 0x0150);
 	r8168_mac_ocp_write(tp, 0xc01e, 0x5555);
-
-	rtl_loop_wait_high(tp, &rtl_link_list_ready_cond, 100, 42);
+	r8168g_wait_ll_share_fifo_ready(tp);
 }
 
 static void rtl_hw_initialize(struct rtl8169_private *tp)
-- 
cgit v1.2.3-59-g8ed1b


From 9617886fa65d3264c5aac5f1305e8e5a6ca865f4 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Fri, 8 May 2020 23:30:43 +0200
Subject: r8169: add helper rtl_enable_rxdvgate

Add a helper for setting RXDV_GATED_EN, the 2ms delay is copied from
the vendor driver.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/realtek/r8169_main.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index ba67864c04c1..5b389d3e7c9a 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -2494,6 +2494,12 @@ DECLARE_RTL_COND(rtl_txcfg_empty_cond)
 	return RTL_R32(tp, TxConfig) & TXCFG_EMPTY;
 }
 
+static void rtl_enable_rxdvgate(struct rtl8169_private *tp)
+{
+	RTL_W32(tp, MISC, RTL_R32(tp, MISC) | RXDV_GATED_EN);
+	fsleep(2000);
+}
+
 static void rtl8169_hw_reset(struct rtl8169_private *tp)
 {
 	/* Disable interrupts */
@@ -5131,7 +5137,7 @@ static int r8169_mdio_register(struct rtl8169_private *tp)
 
 static void rtl_hw_init_8168g(struct rtl8169_private *tp)
 {
-	RTL_W32(tp, MISC, RTL_R32(tp, MISC) | RXDV_GATED_EN);
+	rtl_enable_rxdvgate(tp);
 
 	if (!rtl_loop_wait_high(tp, &rtl_txcfg_empty_cond, 100, 42))
 		return;
@@ -5152,7 +5158,7 @@ static void rtl_hw_init_8168g(struct rtl8169_private *tp)
 
 static void rtl_hw_init_8125(struct rtl8169_private *tp)
 {
-	RTL_W32(tp, MISC, RTL_R32(tp, MISC) | RXDV_GATED_EN);
+	rtl_enable_rxdvgate(tp);
 
 	if (!rtl_loop_wait_high(tp, &rtl_rxtx_empty_cond, 100, 42))
 		return;
-- 
cgit v1.2.3-59-g8ed1b


From 6f9395c69e62e0ffefaaddf97f32d7690e00f791 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Fri, 8 May 2020 23:31:46 +0200
Subject: r8169: add helper rtl_wait_txrx_fifo_empty

Add a helper for waiting for FIFO's to be empty, again the name is
borrowed from the vendor driver.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/realtek/r8169_main.c | 35 ++++++++++++++++++-------------
 1 file changed, 21 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 5b389d3e7c9a..47ff514aec39 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -2494,10 +2494,31 @@ DECLARE_RTL_COND(rtl_txcfg_empty_cond)
 	return RTL_R32(tp, TxConfig) & TXCFG_EMPTY;
 }
 
+DECLARE_RTL_COND(rtl_rxtx_empty_cond)
+{
+	return (RTL_R8(tp, MCU) & RXTX_EMPTY) == RXTX_EMPTY;
+}
+
+static void rtl_wait_txrx_fifo_empty(struct rtl8169_private *tp)
+{
+	switch (tp->mac_version) {
+	case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_52:
+		rtl_loop_wait_high(tp, &rtl_txcfg_empty_cond, 100, 42);
+		rtl_loop_wait_high(tp, &rtl_rxtx_empty_cond, 100, 42);
+		break;
+	case RTL_GIGA_MAC_VER_60 ... RTL_GIGA_MAC_VER_61:
+		rtl_loop_wait_high(tp, &rtl_rxtx_empty_cond, 100, 42);
+		break;
+	default:
+		break;
+	}
+}
+
 static void rtl_enable_rxdvgate(struct rtl8169_private *tp)
 {
 	RTL_W32(tp, MISC, RTL_R32(tp, MISC) | RXDV_GATED_EN);
 	fsleep(2000);
+	rtl_wait_txrx_fifo_empty(tp);
 }
 
 static void rtl8169_hw_reset(struct rtl8169_private *tp)
@@ -5066,11 +5087,6 @@ static void r8168g_wait_ll_share_fifo_ready(struct rtl8169_private *tp)
 	rtl_loop_wait_high(tp, &rtl_link_list_ready_cond, 100, 42);
 }
 
-DECLARE_RTL_COND(rtl_rxtx_empty_cond)
-{
-	return (RTL_R8(tp, MCU) & RXTX_EMPTY) == RXTX_EMPTY;
-}
-
 static int r8169_mdio_read_reg(struct mii_bus *mii_bus, int phyaddr, int phyreg)
 {
 	struct rtl8169_private *tp = mii_bus->priv;
@@ -5139,12 +5155,6 @@ static void rtl_hw_init_8168g(struct rtl8169_private *tp)
 {
 	rtl_enable_rxdvgate(tp);
 
-	if (!rtl_loop_wait_high(tp, &rtl_txcfg_empty_cond, 100, 42))
-		return;
-
-	if (!rtl_loop_wait_high(tp, &rtl_rxtx_empty_cond, 100, 42))
-		return;
-
 	RTL_W8(tp, ChipCmd, RTL_R8(tp, ChipCmd) & ~(CmdTxEnb | CmdRxEnb));
 	msleep(1);
 	RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB);
@@ -5160,9 +5170,6 @@ static void rtl_hw_init_8125(struct rtl8169_private *tp)
 {
 	rtl_enable_rxdvgate(tp);
 
-	if (!rtl_loop_wait_high(tp, &rtl_rxtx_empty_cond, 100, 42))
-		return;
-
 	RTL_W8(tp, ChipCmd, RTL_R8(tp, ChipCmd) & ~(CmdTxEnb | CmdRxEnb));
 	msleep(1);
 	RTL_W8(tp, MCU, RTL_R8(tp, MCU) & ~NOW_IS_OOB);
-- 
cgit v1.2.3-59-g8ed1b


From ce740c5f6f7a2142ca7acee7799996c0beac2980 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Fri, 8 May 2020 23:32:49 +0200
Subject: r8169: improve reset handling for chips from RTL8168g

Sync the reset preparation for chips from RTL8168g with the r8168 and
r8125 vendor drivers.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/realtek/r8169_main.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 47ff514aec39..b4e49c446bd0 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -2535,10 +2535,13 @@ static void rtl8169_hw_reset(struct rtl8169_private *tp)
 		rtl_loop_wait_low(tp, &rtl_npq_cond, 20, 2000);
 		break;
 	case RTL_GIGA_MAC_VER_34 ... RTL_GIGA_MAC_VER_38:
-	case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_52:
 		RTL_W8(tp, ChipCmd, RTL_R8(tp, ChipCmd) | StopReq);
 		rtl_loop_wait_high(tp, &rtl_txcfg_empty_cond, 100, 666);
 		break;
+	case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_61:
+		rtl_enable_rxdvgate(tp);
+		fsleep(2000);
+		break;
 	default:
 		RTL_W8(tp, ChipCmd, RTL_R8(tp, ChipCmd) | StopReq);
 		udelay(100);
-- 
cgit v1.2.3-59-g8ed1b


From 4d2c99940825637d007da150ad03a6f4442de0f0 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Sat, 9 May 2020 00:14:47 +0100
Subject: net: tg3: tidy up loop, remove need to compute off with a multiply

Currently the value for 'off' is computed using a multiplication and
a couple of statements later off is being incremented by len and
this value is never read.  Clean up the code by removing the
multiplication and just increment off by len on each iteration.

Addresses-Coverity: ("Unused value")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/broadcom/tg3.c | 8 +++-----
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/tg3.c b/drivers/net/ethernet/broadcom/tg3.c
index ff98a82b7bc4..7a3b22b35238 100644
--- a/drivers/net/ethernet/broadcom/tg3.c
+++ b/drivers/net/ethernet/broadcom/tg3.c
@@ -10797,17 +10797,15 @@ static int tg3_init_hw(struct tg3 *tp, bool reset_phy)
 #ifdef CONFIG_TIGON3_HWMON
 static void tg3_sd_scan_scratchpad(struct tg3 *tp, struct tg3_ocir *ocir)
 {
+	u32 off, len = TG3_OCIR_LEN;
 	int i;
 
-	for (i = 0; i < TG3_SD_NUM_RECS; i++, ocir++) {
-		u32 off = i * TG3_OCIR_LEN, len = TG3_OCIR_LEN;
-
+	for (i = 0, off = 0; i < TG3_SD_NUM_RECS; i++, ocir++, off += len) {
 		tg3_ape_scratchpad_read(tp, (u32 *) ocir, off, len);
-		off += len;
 
 		if (ocir->signature != TG3_OCIR_SIG_MAGIC ||
 		    !(ocir->version_flags & TG3_OCIR_FLAG_ACTIVE))
-			memset(ocir, 0, TG3_OCIR_LEN);
+			memset(ocir, 0, len);
 	}
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From d8f05e9891fd4e80863f09a17c6d594ac6870a33 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 8 May 2020 23:40:26 +0100
Subject: cnic: remove redundant assignment to variable ret

The variable ret is being assigned with a value that is never read,
the assignment is redundant and can be removed.

Addresses-Coverity: ("Unused value")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Reviewed-by: Michael Chan <michael.chan@broadcom.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/broadcom/cnic.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/broadcom/cnic.c b/drivers/net/ethernet/broadcom/cnic.c
index 61ab7d21f6bd..c5cca63b8571 100644
--- a/drivers/net/ethernet/broadcom/cnic.c
+++ b/drivers/net/ethernet/broadcom/cnic.c
@@ -1918,7 +1918,6 @@ static int cnic_bnx2x_iscsi_ofld1(struct cnic_dev *dev, struct kwqe *wqes[],
 	ret = cnic_alloc_bnx2x_conn_resc(dev, l5_cid);
 	if (ret) {
 		atomic_dec(&cp->iscsi_conn);
-		ret = 0;
 		goto done;
 	}
 	ret = cnic_setup_bnx2x_ctx(dev, wqes, num);
-- 
cgit v1.2.3-59-g8ed1b


From 6a9c9548ee204882d196fdbc95f689e1d0fd00aa Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 8 May 2020 23:33:21 +0100
Subject: net/atheros: remove redundant assignment to variable size

The variable size is being assigned with a value that is never read,
the assignment is redundant and cab be removed.

Addresses-Coverity: ("Unused value")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/atheros/atlx/atl1.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/atheros/atlx/atl1.c b/drivers/net/ethernet/atheros/atlx/atl1.c
index 271e7034fa70..b35fcfcd692d 100644
--- a/drivers/net/ethernet/atheros/atlx/atl1.c
+++ b/drivers/net/ethernet/atheros/atlx/atl1.c
@@ -1042,7 +1042,7 @@ static s32 atl1_setup_ring_resources(struct atl1_adapter *adapter)
 	 * each ring/block may need up to 8 bytes for alignment, hence the
 	 * additional 40 bytes tacked onto the end.
 	 */
-	ring_header->size = size =
+	ring_header->size =
 		sizeof(struct tx_packet_desc) * tpd_ring->count
 		+ sizeof(struct rx_free_desc) * rfd_ring->count
 		+ sizeof(struct rx_return_desc) * rrd_ring->count
-- 
cgit v1.2.3-59-g8ed1b


From 5eb2bcf247de02b92749fe6a6b342f6393ca3f86 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 8 May 2020 23:58:10 +0100
Subject: net: lio_core: remove redundant assignment to variable tx_done

The variable tx_done is being assigned with a value that is never read
as the function returns a few statements later.  The assignment is
redundant and can be removed.

Addresses-Coverity: ("Unused value")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/cavium/liquidio/lio_core.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/cavium/liquidio/lio_core.c b/drivers/net/ethernet/cavium/liquidio/lio_core.c
index d7e805749a5b..e40c64b79f66 100644
--- a/drivers/net/ethernet/cavium/liquidio/lio_core.c
+++ b/drivers/net/ethernet/cavium/liquidio/lio_core.c
@@ -782,7 +782,6 @@ static int liquidio_napi_poll(struct napi_struct *napi, int budget)
 	if ((work_done < budget && tx_done) ||
 	    (iq && iq->pkt_in_done >= MAX_REG_CNT) ||
 	    (droq->pkt_count >= MAX_REG_CNT)) {
-		tx_done = 1;
 		napi_complete_done(napi, work_done);
 
 		octeon_enable_irq(droq->oct_dev, droq->q_no);
-- 
cgit v1.2.3-59-g8ed1b


From f02bac9ad6415e40bf32bf84ce5832698ebe5d15 Mon Sep 17 00:00:00 2001
From: Maxim Mikityanskiy <maximmi@mellanox.com>
Date: Thu, 5 Dec 2019 18:07:25 +0200
Subject: net/mlx5e: Return bool from TLS and IPSEC offloads

TLS and IPSEC offloads currently return struct sk_buff *, but the value
is either NULL or the same skb that was passed as a parameter. Return
bool instead to provide stronger guarantees to the calling code (it
won't need to support handling a different SKB that could be potentially
returned before this change) and to simplify restructuring this code in
the following commits.

Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../mellanox/mlx5/core/en_accel/en_accel.h         | 23 +++++------
 .../mellanox/mlx5/core/en_accel/ipsec_rxtx.c       | 12 +++---
 .../mellanox/mlx5/core/en_accel/ipsec_rxtx.h       |  6 +--
 .../ethernet/mellanox/mlx5/core/en_accel/ktls.h    |  7 ++--
 .../ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c | 11 +++--
 .../mellanox/mlx5/core/en_accel/tls_rxtx.c         | 48 +++++++++-------------
 .../mellanox/mlx5/core/en_accel/tls_rxtx.h         |  8 ++--
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c    |  3 +-
 8 files changed, 50 insertions(+), 68 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
index a6f65d4b2f36..6249998444c0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
@@ -102,33 +102,30 @@ mlx5e_udp_gso_handle_tx_skb(struct sk_buff *skb)
 	udp_hdr(skb)->len = htons(payload_len);
 }
 
-static inline struct sk_buff *
-mlx5e_accel_handle_tx(struct sk_buff *skb,
-		      struct mlx5e_txqsq *sq,
-		      struct net_device *dev,
-		      struct mlx5e_tx_wqe **wqe,
-		      u16 *pi)
+static inline bool mlx5e_accel_handle_tx(struct sk_buff *skb,
+					 struct mlx5e_txqsq *sq,
+					 struct net_device *dev,
+					 struct mlx5e_tx_wqe **wqe,
+					 u16 *pi)
 {
 #ifdef CONFIG_MLX5_EN_TLS
 	if (test_bit(MLX5E_SQ_STATE_TLS, &sq->state)) {
-		skb = mlx5e_tls_handle_tx_skb(dev, sq, skb, wqe, pi);
-		if (unlikely(!skb))
-			return NULL;
+		if (unlikely(!mlx5e_tls_handle_tx_skb(dev, sq, skb, wqe, pi)))
+			return false;
 	}
 #endif
 
 #ifdef CONFIG_MLX5_EN_IPSEC
 	if (test_bit(MLX5E_SQ_STATE_IPSEC, &sq->state)) {
-		skb = mlx5e_ipsec_handle_tx_skb(dev, *wqe, skb);
-		if (unlikely(!skb))
-			return NULL;
+		if (unlikely(!mlx5e_ipsec_handle_tx_skb(dev, *wqe, skb)))
+			return false;
 	}
 #endif
 
 	if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
 		mlx5e_udp_gso_handle_tx_skb(skb);
 
-	return skb;
+	return true;
 }
 
 #endif /* __MLX5E_EN_ACCEL_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
index 0dd17514caae..f60eb6a4b57c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
@@ -233,9 +233,9 @@ static void mlx5e_ipsec_set_metadata(struct sk_buff *skb,
 		   ntohs(mdata->content.tx.seq));
 }
 
-struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
-					  struct mlx5e_tx_wqe *wqe,
-					  struct sk_buff *skb)
+bool mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
+			       struct mlx5e_tx_wqe *wqe,
+			       struct sk_buff *skb)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	struct xfrm_offload *xo = xfrm_offload(skb);
@@ -245,7 +245,7 @@ struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
 	struct sec_path *sp;
 
 	if (!xo)
-		return skb;
+		return true;
 
 	sp = skb_sec_path(skb);
 	if (unlikely(sp->len != 1)) {
@@ -281,11 +281,11 @@ struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
 	sa_entry->set_iv_op(skb, x, xo);
 	mlx5e_ipsec_set_metadata(skb, mdata, xo);
 
-	return skb;
+	return true;
 
 drop:
 	kfree_skb(skb);
-	return NULL;
+	return false;
 }
 
 static inline struct xfrm_state *
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
index db84500b024f..64e948cc3dc5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
@@ -52,9 +52,9 @@ void mlx5e_ipsec_set_iv_esn(struct sk_buff *skb, struct xfrm_state *x,
 			    struct xfrm_offload *xo);
 void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_state *x,
 			struct xfrm_offload *xo);
-struct sk_buff *mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
-					  struct mlx5e_tx_wqe *wqe,
-					  struct sk_buff *skb);
+bool mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
+			       struct mlx5e_tx_wqe *wqe,
+			       struct sk_buff *skb);
 
 #endif /* CONFIG_MLX5_EN_IPSEC */
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
index 9daaec244385..742aca8782d6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
@@ -95,10 +95,9 @@ mlx5e_get_ktls_tx_priv_ctx(struct tls_context *tls_ctx)
 void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv);
 void mlx5e_ktls_tx_offload_set_pending(struct mlx5e_ktls_offload_context_tx *priv_tx);
 
-struct sk_buff *mlx5e_ktls_handle_tx_skb(struct net_device *netdev,
-					 struct mlx5e_txqsq *sq,
-					 struct sk_buff *skb,
-					 struct mlx5e_tx_wqe **wqe, u16 *pi);
+bool mlx5e_ktls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq,
+			      struct sk_buff *skb, struct mlx5e_tx_wqe **wqe,
+			      u16 *pi);
 void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
 					   struct mlx5e_tx_wqe_info *wi,
 					   u32 *dma_fifo_cc);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
index ba973937f0b5..8fcd14803558 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
@@ -413,10 +413,9 @@ err_out:
 	return MLX5E_KTLS_SYNC_FAIL;
 }
 
-struct sk_buff *mlx5e_ktls_handle_tx_skb(struct net_device *netdev,
-					 struct mlx5e_txqsq *sq,
-					 struct sk_buff *skb,
-					 struct mlx5e_tx_wqe **wqe, u16 *pi)
+bool mlx5e_ktls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq,
+			      struct sk_buff *skb, struct mlx5e_tx_wqe **wqe,
+			      u16 *pi)
 {
 	struct mlx5e_ktls_offload_context_tx *priv_tx;
 	struct mlx5e_sq_stats *stats = sq->stats;
@@ -474,9 +473,9 @@ struct sk_buff *mlx5e_ktls_handle_tx_skb(struct net_device *netdev,
 	stats->tls_encrypted_bytes   += datalen;
 
 out:
-	return skb;
+	return true;
 
 err_out:
 	dev_kfree_skb_any(skb);
-	return NULL;
+	return false;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
index 1d7ddeb7a46b..e8f2c214a8de 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
@@ -184,12 +184,10 @@ static void mlx5e_tls_complete_sync_skb(struct sk_buff *skb,
 	nskb->queue_mapping = skb->queue_mapping;
 }
 
-static struct sk_buff *
-mlx5e_tls_handle_ooo(struct mlx5e_tls_offload_context_tx *context,
-		     struct mlx5e_txqsq *sq, struct sk_buff *skb,
-		     struct mlx5e_tx_wqe **wqe,
-		     u16 *pi,
-		     struct mlx5e_tls *tls)
+static bool mlx5e_tls_handle_ooo(struct mlx5e_tls_offload_context_tx *context,
+				 struct mlx5e_txqsq *sq, struct sk_buff *skb,
+				 struct mlx5e_tx_wqe **wqe, u16 *pi,
+				 struct mlx5e_tls *tls)
 {
 	u32 tcp_seq = ntohl(tcp_hdr(skb)->seq);
 	struct sync_info info;
@@ -217,7 +215,7 @@ mlx5e_tls_handle_ooo(struct mlx5e_tls_offload_context_tx *context,
 		if (likely(payload <= -info.sync_len))
 			/* SKB payload doesn't require offload
 			 */
-			return skb;
+			return true;
 
 		atomic64_inc(&tls->sw_stats.tx_tls_drop_bypass_required);
 		goto err_out;
@@ -250,18 +248,16 @@ mlx5e_tls_handle_ooo(struct mlx5e_tls_offload_context_tx *context,
 	mlx5e_sq_xmit(sq, nskb, *wqe, *pi, true);
 	*pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
 	*wqe = MLX5E_TX_FETCH_WQE(sq, *pi);
-	return skb;
+	return true;
 
 err_out:
 	dev_kfree_skb_any(skb);
-	return NULL;
+	return false;
 }
 
-struct sk_buff *mlx5e_tls_handle_tx_skb(struct net_device *netdev,
-					struct mlx5e_txqsq *sq,
-					struct sk_buff *skb,
-					struct mlx5e_tx_wqe **wqe,
-					u16 *pi)
+bool mlx5e_tls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq,
+			     struct sk_buff *skb, struct mlx5e_tx_wqe **wqe,
+			     u16 *pi)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	struct mlx5e_tls_offload_context_tx *context;
@@ -270,41 +266,35 @@ struct sk_buff *mlx5e_tls_handle_tx_skb(struct net_device *netdev,
 	int datalen;
 	u32 skb_seq;
 
-	if (MLX5_CAP_GEN(sq->channel->mdev, tls_tx)) {
-		skb = mlx5e_ktls_handle_tx_skb(netdev, sq, skb, wqe, pi);
-		goto out;
-	}
+	if (MLX5_CAP_GEN(sq->channel->mdev, tls_tx))
+		return mlx5e_ktls_handle_tx_skb(netdev, sq, skb, wqe, pi);
 
 	if (!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk))
-		goto out;
+		return true;
 
 	datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb));
 	if (!datalen)
-		goto out;
+		return true;
 
 	tls_ctx = tls_get_ctx(skb->sk);
 	if (unlikely(tls_ctx->netdev != netdev))
-		goto out;
+		return true;
 
 	skb_seq = ntohl(tcp_hdr(skb)->seq);
 	context = mlx5e_get_tls_tx_context(tls_ctx);
 	expected_seq = context->expected_seq;
 
-	if (unlikely(expected_seq != skb_seq)) {
-		skb = mlx5e_tls_handle_ooo(context, sq, skb, wqe, pi, priv->tls);
-		goto out;
-	}
+	if (unlikely(expected_seq != skb_seq))
+		return mlx5e_tls_handle_ooo(context, sq, skb, wqe, pi, priv->tls);
 
 	if (unlikely(mlx5e_tls_add_metadata(skb, context->swid))) {
 		atomic64_inc(&priv->tls->sw_stats.tx_tls_drop_metadata);
 		dev_kfree_skb_any(skb);
-		skb = NULL;
-		goto out;
+		return false;
 	}
 
 	context->expected_seq = skb_seq + datalen;
-out:
-	return skb;
+	return true;
 }
 
 static int tls_update_resync_sn(struct net_device *netdev,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h
index 90bc1f2384c8..890d452bf1ae 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h
@@ -40,11 +40,9 @@
 #include "en.h"
 #include "en/txrx.h"
 
-struct sk_buff *mlx5e_tls_handle_tx_skb(struct net_device *netdev,
-					struct mlx5e_txqsq *sq,
-					struct sk_buff *skb,
-					struct mlx5e_tx_wqe **wqe,
-					u16 *pi);
+bool mlx5e_tls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq,
+			     struct sk_buff *skb, struct mlx5e_tx_wqe **wqe,
+			     u16 *pi);
 
 void mlx5e_tls_handle_rx_skb(struct net_device *netdev, struct sk_buff *skb,
 			     u32 *cqe_bcnt);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 583e1b201b75..7a6ed72ae00a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -394,8 +394,7 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
 	wqe = MLX5E_TX_FETCH_WQE(sq, pi);
 
 	/* might send skbs and update wqe and pi */
-	skb = mlx5e_accel_handle_tx(skb, sq, dev, &wqe, &pi);
-	if (unlikely(!skb))
+	if (unlikely(!mlx5e_accel_handle_tx(skb, sq, dev, &wqe, &pi)))
 		return NETDEV_TX_OK;
 
 	return mlx5e_sq_xmit(sq, skb, wqe, pi, netdev_xmit_more());
-- 
cgit v1.2.3-59-g8ed1b


From 7f8546f3f041d6b1cce270581517217867764077 Mon Sep 17 00:00:00 2001
From: Maxim Mikityanskiy <maximmi@mellanox.com>
Date: Fri, 3 Jan 2020 16:17:30 +0200
Subject: net/mlx5e: Unify checks of TLS offloads

Both INNOVA and ConnectX TLS offloads perform the same checks in the
beginning. Unify them to reduce repeating code. Do WARN_ON_ONCE on
netdev mismatch and finish with an error in both offloads, not only in
the ConnectX one.

Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h |  4 ++--
 .../net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c  | 17 ++---------------
 .../net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c | 14 +++++++++-----
 3 files changed, 13 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
index 742aca8782d6..81f8b7467569 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
@@ -95,9 +95,9 @@ mlx5e_get_ktls_tx_priv_ctx(struct tls_context *tls_ctx)
 void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv);
 void mlx5e_ktls_tx_offload_set_pending(struct mlx5e_ktls_offload_context_tx *priv_tx);
 
-bool mlx5e_ktls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq,
+bool mlx5e_ktls_handle_tx_skb(struct tls_context *tls_ctx, struct mlx5e_txqsq *sq,
 			      struct sk_buff *skb, struct mlx5e_tx_wqe **wqe,
-			      u16 *pi);
+			      u16 *pi, int datalen);
 void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
 					   struct mlx5e_tx_wqe_info *wi,
 					   u32 *dma_fifo_cc);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
index 8fcd14803558..c61604f3722c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
@@ -413,28 +413,15 @@ err_out:
 	return MLX5E_KTLS_SYNC_FAIL;
 }
 
-bool mlx5e_ktls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq,
+bool mlx5e_ktls_handle_tx_skb(struct tls_context *tls_ctx, struct mlx5e_txqsq *sq,
 			      struct sk_buff *skb, struct mlx5e_tx_wqe **wqe,
-			      u16 *pi)
+			      u16 *pi, int datalen)
 {
 	struct mlx5e_ktls_offload_context_tx *priv_tx;
 	struct mlx5e_sq_stats *stats = sq->stats;
 	struct mlx5_wqe_ctrl_seg *cseg;
-	struct tls_context *tls_ctx;
-	int datalen;
 	u32 seq;
 
-	if (!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk))
-		goto out;
-
-	datalen = skb->len - (skb_transport_offset(skb) + tcp_hdrlen(skb));
-	if (!datalen)
-		goto out;
-
-	tls_ctx = tls_get_ctx(skb->sk);
-	if (WARN_ON_ONCE(tls_ctx->netdev != netdev))
-		goto err_out;
-
 	priv_tx = mlx5e_get_ktls_tx_priv_ctx(tls_ctx);
 
 	if (unlikely(mlx5e_ktls_tx_offload_test_and_clear_pending(priv_tx))) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
index e8f2c214a8de..26c59cfbec9b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
@@ -266,9 +266,6 @@ bool mlx5e_tls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq,
 	int datalen;
 	u32 skb_seq;
 
-	if (MLX5_CAP_GEN(sq->channel->mdev, tls_tx))
-		return mlx5e_ktls_handle_tx_skb(netdev, sq, skb, wqe, pi);
-
 	if (!skb->sk || !tls_is_sk_tx_device_offloaded(skb->sk))
 		return true;
 
@@ -277,8 +274,11 @@ bool mlx5e_tls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq,
 		return true;
 
 	tls_ctx = tls_get_ctx(skb->sk);
-	if (unlikely(tls_ctx->netdev != netdev))
-		return true;
+	if (WARN_ON_ONCE(tls_ctx->netdev != netdev))
+		goto err_out;
+
+	if (MLX5_CAP_GEN(sq->channel->mdev, tls_tx))
+		return mlx5e_ktls_handle_tx_skb(tls_ctx, sq, skb, wqe, pi, datalen);
 
 	skb_seq = ntohl(tcp_hdr(skb)->seq);
 	context = mlx5e_get_tls_tx_context(tls_ctx);
@@ -295,6 +295,10 @@ bool mlx5e_tls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq,
 
 	context->expected_seq = skb_seq + datalen;
 	return true;
+
+err_out:
+	dev_kfree_skb_any(skb);
+	return false;
 }
 
 static int tls_update_resync_sn(struct net_device *netdev,
-- 
cgit v1.2.3-59-g8ed1b


From 3df711db05b545f3c9b610bee62d33f4e67f64b7 Mon Sep 17 00:00:00 2001
From: Maxim Mikityanskiy <maximmi@mellanox.com>
Date: Thu, 5 Dec 2019 18:11:16 +0200
Subject: net/mlx5e: Return void from mlx5e_sq_xmit and mlx5i_sq_xmit

mlx5e_sq_xmit and mlx5i_sq_xmit always return NETDEV_TX_OK. Drop the
return value to simplify the code.

Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h       |  4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c    | 24 ++++++++++------------
 .../net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c  |  4 +++-
 .../net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h  |  5 ++---
 4 files changed, 18 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 0864b76ca2c0..da7fe6aafeed 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -919,8 +919,8 @@ void mlx5e_build_ptys2ethtool_map(void);
 u16 mlx5e_select_queue(struct net_device *dev, struct sk_buff *skb,
 		       struct net_device *sb_dev);
 netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev);
-netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
-			  struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more);
+void mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+		   struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more);
 
 void mlx5e_trigger_irq(struct mlx5e_icosq *sq);
 void mlx5e_completion_event(struct mlx5_core_cq *mcq, struct mlx5_eqe *eqe);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index 7a6ed72ae00a..bb6d3774eafb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -265,8 +265,8 @@ mlx5e_txwqe_complete(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 		mlx5e_notify_hw(wq, sq->pc, sq->uar_map, cseg);
 }
 
-netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
-			  struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more)
+void mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+		   struct mlx5e_tx_wqe *wqe, u16 pi, bool xmit_more)
 {
 	struct mlx5_wq_cyc *wq = &sq->wq;
 	struct mlx5_wqe_ctrl_seg *cseg;
@@ -373,13 +373,11 @@ netdev_tx_t mlx5e_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 	mlx5e_txwqe_complete(sq, skb, opcode, ds_cnt, num_wqebbs, num_bytes,
 			     num_dma, wi, cseg, xmit_more);
 
-	return NETDEV_TX_OK;
+	return;
 
 err_drop:
 	stats->dropped++;
 	dev_kfree_skb_any(skb);
-
-	return NETDEV_TX_OK;
 }
 
 netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
@@ -395,9 +393,12 @@ netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	/* might send skbs and update wqe and pi */
 	if (unlikely(!mlx5e_accel_handle_tx(skb, sq, dev, &wqe, &pi)))
-		return NETDEV_TX_OK;
+		goto out;
+
+	mlx5e_sq_xmit(sq, skb, wqe, pi, netdev_xmit_more());
 
-	return mlx5e_sq_xmit(sq, skb, wqe, pi, netdev_xmit_more());
+out:
+	return NETDEV_TX_OK;
 }
 
 bool mlx5e_poll_tx_cq(struct mlx5e_cq *cq, int napi_budget)
@@ -567,9 +568,8 @@ mlx5i_txwqe_build_datagram(struct mlx5_av *av, u32 dqpn, u32 dqkey,
 	dseg->av.key.qkey.qkey = cpu_to_be32(dqkey);
 }
 
-netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
-			  struct mlx5_av *av, u32 dqpn, u32 dqkey,
-			  bool xmit_more)
+void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+		   struct mlx5_av *av, u32 dqpn, u32 dqkey, bool xmit_more)
 {
 	struct mlx5i_tx_wqe *wqe;
 
@@ -647,12 +647,10 @@ netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
 	mlx5e_txwqe_complete(sq, skb, opcode, ds_cnt, num_wqebbs, num_bytes,
 			     num_dma, wi, cseg, xmit_more);
 
-	return NETDEV_TX_OK;
+	return;
 
 err_drop:
 	stats->dropped++;
 	dev_kfree_skb_any(skb);
-
-	return NETDEV_TX_OK;
 }
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index 068578be00f1..035bd21e5d4e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -663,7 +663,9 @@ static int mlx5i_xmit(struct net_device *dev, struct sk_buff *skb,
 	struct mlx5_ib_ah *mah   = to_mah(address);
 	struct mlx5i_priv *ipriv = epriv->ppriv;
 
-	return mlx5i_sq_xmit(sq, skb, &mah->av, dqpn, ipriv->qkey, netdev_xmit_more());
+	mlx5i_sq_xmit(sq, skb, &mah->av, dqpn, ipriv->qkey, netdev_xmit_more());
+
+	return NETDEV_TX_OK;
 }
 
 static void mlx5i_set_pkey_index(struct net_device *netdev, int id)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
index 7844ab5d0ce7..c4aa47018c0e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
@@ -113,9 +113,8 @@ struct mlx5i_tx_wqe {
 #define MLX5I_SQ_FETCH_WQE(sq, pi) \
 	((struct mlx5i_tx_wqe *)mlx5e_fetch_wqe(&(sq)->wq, pi, sizeof(struct mlx5i_tx_wqe)))
 
-netdev_tx_t mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
-			  struct mlx5_av *av, u32 dqpn, u32 dqkey,
-			  bool xmit_more);
+void mlx5i_sq_xmit(struct mlx5e_txqsq *sq, struct sk_buff *skb,
+		   struct mlx5_av *av, u32 dqpn, u32 dqkey, bool xmit_more);
 void mlx5i_handle_rx_cqe(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
 void mlx5i_get_stats(struct net_device *dev, struct rtnl_link_stats64 *stats);
 
-- 
cgit v1.2.3-59-g8ed1b


From 0bdb078c74854c48bffa323899f2e0c5ca924e72 Mon Sep 17 00:00:00 2001
From: Maxim Mikityanskiy <maximmi@mellanox.com>
Date: Fri, 6 Dec 2019 16:14:57 +0200
Subject: net/mlx5e: Pass only eseg to IPSEC offload

IPSEC offload needs to modify the eseg of the WQE that is being filled,
but it receives a pointer to the whole WQE. To make the contract
stricter, pass only the pointer to the eseg of that WQE. This commit is
preparation for the following refactoring of offloads in the TX path and
for the MPWQE support.

Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h   | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c | 4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
index 6249998444c0..c658c8556863 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
@@ -117,7 +117,7 @@ static inline bool mlx5e_accel_handle_tx(struct sk_buff *skb,
 
 #ifdef CONFIG_MLX5_EN_IPSEC
 	if (test_bit(MLX5E_SQ_STATE_IPSEC, &sq->state)) {
-		if (unlikely(!mlx5e_ipsec_handle_tx_skb(dev, *wqe, skb)))
+		if (unlikely(!mlx5e_ipsec_handle_tx_skb(dev, &(*wqe)->eth, skb)))
 			return false;
 	}
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
index f60eb6a4b57c..0e1ac3e68c72 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
@@ -234,7 +234,7 @@ static void mlx5e_ipsec_set_metadata(struct sk_buff *skb,
 }
 
 bool mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
-			       struct mlx5e_tx_wqe *wqe,
+			       struct mlx5_wqe_eth_seg *eseg,
 			       struct sk_buff *skb)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -276,7 +276,7 @@ bool mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
 		atomic64_inc(&priv->ipsec->sw_stats.ipsec_tx_drop_metadata);
 		goto drop;
 	}
-	mlx5e_ipsec_set_swp(skb, &wqe->eth, x->props.mode, xo);
+	mlx5e_ipsec_set_swp(skb, eseg, x->props.mode, xo);
 	sa_entry = (struct mlx5e_ipsec_sa_entry *)x->xso.offload_handle;
 	sa_entry->set_iv_op(skb, x, xo);
 	mlx5e_ipsec_set_metadata(skb, mdata, xo);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
index 64e948cc3dc5..bd6f32aee8d6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
@@ -53,7 +53,7 @@ void mlx5e_ipsec_set_iv_esn(struct sk_buff *skb, struct xfrm_state *x,
 void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_state *x,
 			struct xfrm_offload *xo);
 bool mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
-			       struct mlx5e_tx_wqe *wqe,
+			       struct mlx5_wqe_eth_seg *eseg,
 			       struct sk_buff *skb);
 
 #endif /* CONFIG_MLX5_EN_IPSEC */
-- 
cgit v1.2.3-59-g8ed1b


From 2eeb6e384102e1124d0a5633803dda0cdbcac471 Mon Sep 17 00:00:00 2001
From: Maxim Mikityanskiy <maximmi@mellanox.com>
Date: Fri, 6 Dec 2019 16:42:09 +0200
Subject: net/mlx5e: Make TLS offload independent of wqe and pi

TLS offload may write a 32-bit field (tisn) to the cseg of the WQE. To
do that, it receives pi and wqe pointers. As TLS offload may also send
additional WQEs, it has to update pi and wqe, and in many cases it even
doesn't use pi calculated before and wqe zeroed before and does it
itself. Also, mlx5e_sq_xmit has to copy the whole cseg if it goes to the
mlx5e_fill_sq_frag_edge flow. This all is not efficient.

It's more efficient to do the following:

1. Just return tisn from TLS offload and make the caller fill it in a
more appropriate place.

2. Calculate pi and clear wqe after calling TLS offload.

3. If TLS offload has to send WQEs, calculate pi and clear wqe just
before that. It's already done in all places anyway, so this commit
allows to remove some redundant memsets and calls.

Copying of cseg will be eliminated in one of the following commits, and
all other stuff is done here.

Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h | 10 +++++++++-
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h |  3 +--
 .../net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c  | 13 +++----------
 .../net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c | 17 +++++++++--------
 .../net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h |  3 +--
 5 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
index c658c8556863..66bfab021d6b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
@@ -109,10 +109,18 @@ static inline bool mlx5e_accel_handle_tx(struct sk_buff *skb,
 					 u16 *pi)
 {
 #ifdef CONFIG_MLX5_EN_TLS
+	u32 tls_tisn = 0;
+
 	if (test_bit(MLX5E_SQ_STATE_TLS, &sq->state)) {
-		if (unlikely(!mlx5e_tls_handle_tx_skb(dev, sq, skb, wqe, pi)))
+		/* May send SKBs and WQEs. */
+		if (unlikely(!mlx5e_tls_handle_tx_skb(dev, sq, skb, &tls_tisn)))
 			return false;
 	}
+
+	*pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
+	*wqe = MLX5E_TX_FETCH_WQE(sq, *pi);
+
+	(*wqe)->ctrl.tisn = cpu_to_be32(tls_tisn << 8);
 #endif
 
 #ifdef CONFIG_MLX5_EN_IPSEC
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
index 81f8b7467569..7d9d9420f19d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
@@ -96,8 +96,7 @@ void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv);
 void mlx5e_ktls_tx_offload_set_pending(struct mlx5e_ktls_offload_context_tx *priv_tx);
 
 bool mlx5e_ktls_handle_tx_skb(struct tls_context *tls_ctx, struct mlx5e_txqsq *sq,
-			      struct sk_buff *skb, struct mlx5e_tx_wqe **wqe,
-			      u16 *pi, int datalen);
+			      struct sk_buff *skb, u32 *tisn, int datalen);
 void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
 					   struct mlx5e_tx_wqe_info *wi,
 					   u32 *dma_fifo_cc);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
index c61604f3722c..b49d7c1e49dc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
@@ -414,20 +414,16 @@ err_out:
 }
 
 bool mlx5e_ktls_handle_tx_skb(struct tls_context *tls_ctx, struct mlx5e_txqsq *sq,
-			      struct sk_buff *skb, struct mlx5e_tx_wqe **wqe,
-			      u16 *pi, int datalen)
+			      struct sk_buff *skb, u32 *tisn, int datalen)
 {
 	struct mlx5e_ktls_offload_context_tx *priv_tx;
 	struct mlx5e_sq_stats *stats = sq->stats;
-	struct mlx5_wqe_ctrl_seg *cseg;
 	u32 seq;
 
 	priv_tx = mlx5e_get_ktls_tx_priv_ctx(tls_ctx);
 
 	if (unlikely(mlx5e_ktls_tx_offload_test_and_clear_pending(priv_tx))) {
 		mlx5e_ktls_tx_post_param_wqes(sq, priv_tx, false, false);
-		*pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
-		*wqe = MLX5E_TX_FETCH_WQE(sq, *pi);
 		stats->tls_ctx++;
 	}
 
@@ -438,23 +434,20 @@ bool mlx5e_ktls_handle_tx_skb(struct tls_context *tls_ctx, struct mlx5e_txqsq *s
 
 		switch (ret) {
 		case MLX5E_KTLS_SYNC_DONE:
-			*pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
-			*wqe = MLX5E_TX_FETCH_WQE(sq, *pi);
 			break;
 		case MLX5E_KTLS_SYNC_SKIP_NO_DATA:
 			if (likely(!skb->decrypted))
 				goto out;
 			WARN_ON_ONCE(1);
 			/* fall-through */
-		default: /* MLX5E_KTLS_SYNC_FAIL */
+		case MLX5E_KTLS_SYNC_FAIL:
 			goto err_out;
 		}
 	}
 
 	priv_tx->expected_seq = seq + datalen;
 
-	cseg = &(*wqe)->ctrl;
-	cseg->tisn = cpu_to_be32(priv_tx->tisn << 8);
+	*tisn = priv_tx->tisn;
 
 	stats->tls_encrypted_packets += skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1;
 	stats->tls_encrypted_bytes   += datalen;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
index 26c59cfbec9b..8e6b0b0ce2e4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
@@ -186,14 +186,15 @@ static void mlx5e_tls_complete_sync_skb(struct sk_buff *skb,
 
 static bool mlx5e_tls_handle_ooo(struct mlx5e_tls_offload_context_tx *context,
 				 struct mlx5e_txqsq *sq, struct sk_buff *skb,
-				 struct mlx5e_tx_wqe **wqe, u16 *pi,
 				 struct mlx5e_tls *tls)
 {
 	u32 tcp_seq = ntohl(tcp_hdr(skb)->seq);
+	struct mlx5e_tx_wqe *wqe;
 	struct sync_info info;
 	struct sk_buff *nskb;
 	int linear_len = 0;
 	int headln;
+	u16 pi;
 	int i;
 
 	sq->stats->tls_ooo++;
@@ -245,9 +246,10 @@ static bool mlx5e_tls_handle_ooo(struct mlx5e_tls_offload_context_tx *context,
 	sq->stats->tls_resync_bytes += nskb->len;
 	mlx5e_tls_complete_sync_skb(skb, nskb, tcp_seq, headln,
 				    cpu_to_be64(info.rcd_sn));
-	mlx5e_sq_xmit(sq, nskb, *wqe, *pi, true);
-	*pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
-	*wqe = MLX5E_TX_FETCH_WQE(sq, *pi);
+	pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
+	wqe = MLX5E_TX_FETCH_WQE(sq, pi);
+	mlx5e_sq_xmit(sq, nskb, wqe, pi, true);
+
 	return true;
 
 err_out:
@@ -256,8 +258,7 @@ err_out:
 }
 
 bool mlx5e_tls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq,
-			     struct sk_buff *skb, struct mlx5e_tx_wqe **wqe,
-			     u16 *pi)
+			     struct sk_buff *skb, u32 *tisn)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	struct mlx5e_tls_offload_context_tx *context;
@@ -278,14 +279,14 @@ bool mlx5e_tls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq,
 		goto err_out;
 
 	if (MLX5_CAP_GEN(sq->channel->mdev, tls_tx))
-		return mlx5e_ktls_handle_tx_skb(tls_ctx, sq, skb, wqe, pi, datalen);
+		return mlx5e_ktls_handle_tx_skb(tls_ctx, sq, skb, tisn, datalen);
 
 	skb_seq = ntohl(tcp_hdr(skb)->seq);
 	context = mlx5e_get_tls_tx_context(tls_ctx);
 	expected_seq = context->expected_seq;
 
 	if (unlikely(expected_seq != skb_seq))
-		return mlx5e_tls_handle_ooo(context, sq, skb, wqe, pi, priv->tls);
+		return mlx5e_tls_handle_ooo(context, sq, skb, priv->tls);
 
 	if (unlikely(mlx5e_tls_add_metadata(skb, context->swid))) {
 		atomic64_inc(&priv->tls->sw_stats.tx_tls_drop_metadata);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h
index 890d452bf1ae..3630ed8b1206 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h
@@ -41,8 +41,7 @@
 #include "en/txrx.h"
 
 bool mlx5e_tls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq,
-			     struct sk_buff *skb, struct mlx5e_tx_wqe **wqe,
-			     u16 *pi);
+			     struct sk_buff *skb, u32 *tisn);
 
 void mlx5e_tls_handle_rx_skb(struct net_device *netdev, struct sk_buff *skb,
 			     u32 *cqe_bcnt);
-- 
cgit v1.2.3-59-g8ed1b


From 5546100038eeff96fd0361f9b820405c016f8578 Mon Sep 17 00:00:00 2001
From: Maxim Mikityanskiy <maximmi@mellanox.com>
Date: Mon, 9 Dec 2019 12:30:42 +0200
Subject: net/mlx5e: Update UDP fields of the SKB for GSO first

mlx5e_udp_gso_handle_tx_skb updates the length field in the UDP header
in case of GSO. It doesn't interfere with other offloads, so do it first
to simplify further restructuring of the code. This way we'll make all
independent modifications to the SKB before starting to work with WQEs.

Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Reviewed-by: Raed Salem <raeds@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
index 66bfab021d6b..d286fb09955c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
@@ -110,7 +110,12 @@ static inline bool mlx5e_accel_handle_tx(struct sk_buff *skb,
 {
 #ifdef CONFIG_MLX5_EN_TLS
 	u32 tls_tisn = 0;
+#endif
+
+	if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
+		mlx5e_udp_gso_handle_tx_skb(skb);
 
+#ifdef CONFIG_MLX5_EN_TLS
 	if (test_bit(MLX5E_SQ_STATE_TLS, &sq->state)) {
 		/* May send SKBs and WQEs. */
 		if (unlikely(!mlx5e_tls_handle_tx_skb(dev, sq, skb, &tls_tisn)))
@@ -130,9 +135,6 @@ static inline bool mlx5e_accel_handle_tx(struct sk_buff *skb,
 	}
 #endif
 
-	if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
-		mlx5e_udp_gso_handle_tx_skb(skb);
-
 	return true;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 714c88a38bce3477392944d6c0d0bc724abaff09 Mon Sep 17 00:00:00 2001
From: Maxim Mikityanskiy <maximmi@mellanox.com>
Date: Wed, 29 Jan 2020 14:35:09 +0200
Subject: net/mlx5e: Split TX acceleration offloads into two phases

After previous modifications, the offloads are no longer called one by
one, the pi is calculated and the wqe is cleared on between of TLS and
IPSEC offloads, which doesn't quite fit mlx5e_accel_handle_tx's purpose.

This patch splits mlx5e_accel_handle_tx into two functions that
correspond to two logical phases of running offloads:

1. Before fetching a WQE. Here runs the code that can post WQEs on its
own, before the main WQE is fetched. It's the main part of TLS offload.

2. After fetching a WQE. Here runs the code that updates the WQE's
fields, but can't post other WQEs any more. It's a minor part of TLS
offload that sets the tisn field in the cseg, and eseg-based offloads
(currently IPSEC, and later patches will move GENEVE and checksum
offloads there, too).

It allows to make mlx5e_xmit take care of all actions needed to transmit
a packet in the right order, improve the structure of the code and
reduce unnecessary operations. The structure will be further improved in
the following patches (all eseg-based offloads will be moved to a single
place, and reserving space for the main WQE will happen between phase 1
and phase 2 of offloads to eliminate unneeded data movements).

Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../mellanox/mlx5/core/en_accel/en_accel.h         | 33 ++++++++++++++--------
 .../mellanox/mlx5/core/en_accel/ipsec_rxtx.c       |  3 +-
 .../mellanox/mlx5/core/en_accel/ipsec_rxtx.h       |  2 +-
 .../ethernet/mellanox/mlx5/core/en_accel/ktls.h    |  4 ++-
 .../ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c |  5 ++--
 .../mellanox/mlx5/core/en_accel/tls_rxtx.c         | 10 +++++--
 .../mellanox/mlx5/core/en_accel/tls_rxtx.h         |  8 +++++-
 drivers/net/ethernet/mellanox/mlx5/core/en_tx.c    | 10 +++++--
 8 files changed, 52 insertions(+), 23 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
index d286fb09955c..fac145dcf2ce 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/en_accel.h
@@ -102,35 +102,44 @@ mlx5e_udp_gso_handle_tx_skb(struct sk_buff *skb)
 	udp_hdr(skb)->len = htons(payload_len);
 }
 
-static inline bool mlx5e_accel_handle_tx(struct sk_buff *skb,
-					 struct mlx5e_txqsq *sq,
-					 struct net_device *dev,
-					 struct mlx5e_tx_wqe **wqe,
-					 u16 *pi)
-{
+struct mlx5e_accel_tx_state {
 #ifdef CONFIG_MLX5_EN_TLS
-	u32 tls_tisn = 0;
+	struct mlx5e_accel_tx_tls_state tls;
 #endif
+};
 
+static inline bool mlx5e_accel_tx_begin(struct net_device *dev,
+					struct mlx5e_txqsq *sq,
+					struct sk_buff *skb,
+					struct mlx5e_accel_tx_state *state)
+{
 	if (skb_is_gso(skb) && skb_shinfo(skb)->gso_type & SKB_GSO_UDP_L4)
 		mlx5e_udp_gso_handle_tx_skb(skb);
 
 #ifdef CONFIG_MLX5_EN_TLS
 	if (test_bit(MLX5E_SQ_STATE_TLS, &sq->state)) {
 		/* May send SKBs and WQEs. */
-		if (unlikely(!mlx5e_tls_handle_tx_skb(dev, sq, skb, &tls_tisn)))
+		if (unlikely(!mlx5e_tls_handle_tx_skb(dev, sq, skb, &state->tls)))
 			return false;
 	}
+#endif
 
-	*pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
-	*wqe = MLX5E_TX_FETCH_WQE(sq, *pi);
+	return true;
+}
 
-	(*wqe)->ctrl.tisn = cpu_to_be32(tls_tisn << 8);
+static inline bool mlx5e_accel_tx_finish(struct mlx5e_priv *priv,
+					 struct mlx5e_txqsq *sq,
+					 struct sk_buff *skb,
+					 struct mlx5e_tx_wqe *wqe,
+					 struct mlx5e_accel_tx_state *state)
+{
+#ifdef CONFIG_MLX5_EN_TLS
+	mlx5e_tls_handle_tx_wqe(sq, &wqe->ctrl, &state->tls);
 #endif
 
 #ifdef CONFIG_MLX5_EN_IPSEC
 	if (test_bit(MLX5E_SQ_STATE_IPSEC, &sq->state)) {
-		if (unlikely(!mlx5e_ipsec_handle_tx_skb(dev, &(*wqe)->eth, skb)))
+		if (unlikely(!mlx5e_ipsec_handle_tx_skb(priv, &wqe->eth, skb)))
 			return false;
 	}
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
index 0e1ac3e68c72..824b87ac8f9e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.c
@@ -233,11 +233,10 @@ static void mlx5e_ipsec_set_metadata(struct sk_buff *skb,
 		   ntohs(mdata->content.tx.seq));
 }
 
-bool mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
+bool mlx5e_ipsec_handle_tx_skb(struct mlx5e_priv *priv,
 			       struct mlx5_wqe_eth_seg *eseg,
 			       struct sk_buff *skb)
 {
-	struct mlx5e_priv *priv = netdev_priv(netdev);
 	struct xfrm_offload *xo = xfrm_offload(skb);
 	struct mlx5e_ipsec_metadata *mdata;
 	struct mlx5e_ipsec_sa_entry *sa_entry;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
index bd6f32aee8d6..ba02643586a5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ipsec_rxtx.h
@@ -52,7 +52,7 @@ void mlx5e_ipsec_set_iv_esn(struct sk_buff *skb, struct xfrm_state *x,
 			    struct xfrm_offload *xo);
 void mlx5e_ipsec_set_iv(struct sk_buff *skb, struct xfrm_state *x,
 			struct xfrm_offload *xo);
-bool mlx5e_ipsec_handle_tx_skb(struct net_device *netdev,
+bool mlx5e_ipsec_handle_tx_skb(struct mlx5e_priv *priv,
 			       struct mlx5_wqe_eth_seg *eseg,
 			       struct sk_buff *skb);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
index 7d9d9420f19d..dabbc5f226ce 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
@@ -9,6 +9,7 @@
 #ifdef CONFIG_MLX5_EN_TLS
 #include <net/tls.h>
 #include "accel/tls.h"
+#include "en_accel/tls_rxtx.h"
 
 #define MLX5E_KTLS_STATIC_UMR_WQE_SZ \
 	(offsetof(struct mlx5e_umr_wqe, tls_static_params_ctx) + \
@@ -96,7 +97,8 @@ void mlx5e_ktls_build_netdev(struct mlx5e_priv *priv);
 void mlx5e_ktls_tx_offload_set_pending(struct mlx5e_ktls_offload_context_tx *priv_tx);
 
 bool mlx5e_ktls_handle_tx_skb(struct tls_context *tls_ctx, struct mlx5e_txqsq *sq,
-			      struct sk_buff *skb, u32 *tisn, int datalen);
+			      struct sk_buff *skb, int datalen,
+			      struct mlx5e_accel_tx_tls_state *state);
 void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
 					   struct mlx5e_tx_wqe_info *wi,
 					   u32 *dma_fifo_cc);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
index b49d7c1e49dc..352b0a3ef0ad 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
@@ -414,7 +414,8 @@ err_out:
 }
 
 bool mlx5e_ktls_handle_tx_skb(struct tls_context *tls_ctx, struct mlx5e_txqsq *sq,
-			      struct sk_buff *skb, u32 *tisn, int datalen)
+			      struct sk_buff *skb, int datalen,
+			      struct mlx5e_accel_tx_tls_state *state)
 {
 	struct mlx5e_ktls_offload_context_tx *priv_tx;
 	struct mlx5e_sq_stats *stats = sq->stats;
@@ -447,7 +448,7 @@ bool mlx5e_ktls_handle_tx_skb(struct tls_context *tls_ctx, struct mlx5e_txqsq *s
 
 	priv_tx->expected_seq = seq + datalen;
 
-	*tisn = priv_tx->tisn;
+	state->tls_tisn = priv_tx->tisn;
 
 	stats->tls_encrypted_packets += skb_is_gso(skb) ? skb_shinfo(skb)->gso_segs : 1;
 	stats->tls_encrypted_bytes   += datalen;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
index 8e6b0b0ce2e4..05454a843b28 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.c
@@ -258,7 +258,7 @@ err_out:
 }
 
 bool mlx5e_tls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq,
-			     struct sk_buff *skb, u32 *tisn)
+			     struct sk_buff *skb, struct mlx5e_accel_tx_tls_state *state)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	struct mlx5e_tls_offload_context_tx *context;
@@ -279,7 +279,7 @@ bool mlx5e_tls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq,
 		goto err_out;
 
 	if (MLX5_CAP_GEN(sq->channel->mdev, tls_tx))
-		return mlx5e_ktls_handle_tx_skb(tls_ctx, sq, skb, tisn, datalen);
+		return mlx5e_ktls_handle_tx_skb(tls_ctx, sq, skb, datalen, state);
 
 	skb_seq = ntohl(tcp_hdr(skb)->seq);
 	context = mlx5e_get_tls_tx_context(tls_ctx);
@@ -302,6 +302,12 @@ err_out:
 	return false;
 }
 
+void mlx5e_tls_handle_tx_wqe(struct mlx5e_txqsq *sq, struct mlx5_wqe_ctrl_seg *cseg,
+			     struct mlx5e_accel_tx_tls_state *state)
+{
+	cseg->tisn = cpu_to_be32(state->tls_tisn << 8);
+}
+
 static int tls_update_resync_sn(struct net_device *netdev,
 				struct sk_buff *skb,
 				struct mlx5e_tls_metadata *mdata)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h
index 3630ed8b1206..a50d0394df0a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls_rxtx.h
@@ -40,8 +40,14 @@
 #include "en.h"
 #include "en/txrx.h"
 
+struct mlx5e_accel_tx_tls_state {
+	u32 tls_tisn;
+};
+
 bool mlx5e_tls_handle_tx_skb(struct net_device *netdev, struct mlx5e_txqsq *sq,
-			     struct sk_buff *skb, u32 *tisn);
+			     struct sk_buff *skb, struct mlx5e_accel_tx_tls_state *state);
+void mlx5e_tls_handle_tx_wqe(struct mlx5e_txqsq *sq, struct mlx5_wqe_ctrl_seg *cseg,
+			     struct mlx5e_accel_tx_tls_state *state);
 
 void mlx5e_tls_handle_rx_skb(struct net_device *netdev, struct sk_buff *skb,
 			     u32 *cqe_bcnt);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
index bb6d3774eafb..f79454746d0d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tx.c
@@ -383,16 +383,22 @@ err_drop:
 netdev_tx_t mlx5e_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
+	struct mlx5e_accel_tx_state accel = {};
 	struct mlx5e_tx_wqe *wqe;
 	struct mlx5e_txqsq *sq;
 	u16 pi;
 
 	sq = priv->txq2sq[skb_get_queue_mapping(skb)];
+
+	/* May send SKBs and WQEs. */
+	if (unlikely(!mlx5e_accel_tx_begin(dev, sq, skb, &accel)))
+		goto out;
+
 	pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
 	wqe = MLX5E_TX_FETCH_WQE(sq, pi);
 
-	/* might send skbs and update wqe and pi */
-	if (unlikely(!mlx5e_accel_handle_tx(skb, sq, dev, &wqe, &pi)))
+	/* May update the WQE, but may not post other WQEs. */
+	if (unlikely(!mlx5e_accel_tx_finish(priv, sq, skb, wqe, &accel)))
 		goto out;
 
 	mlx5e_sq_xmit(sq, skb, wqe, pi, netdev_xmit_more());
-- 
cgit v1.2.3-59-g8ed1b


From ab1e0ce99d3dabc57e5a383b3ffc60fb97aafe9e Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Thu, 9 Apr 2020 14:02:04 +0300
Subject: net/mlx5e: kTLS, Fill work queue edge separately in TX flow

For the static and progress context params WQEs, do the edge
filling separately.
This improves the WQ utilization, code readability, and reduces
the chance of future bugs.

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Reviewed-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c | 18 ++++++++----------
 1 file changed, 8 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
index 352b0a3ef0ad..efc271e24b03 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
@@ -134,14 +134,14 @@ post_static_params(struct mlx5e_txqsq *sq,
 		   struct mlx5e_ktls_offload_context_tx *priv_tx,
 		   bool fence)
 {
+	u16 pi, num_wqebbs = MLX5E_KTLS_STATIC_WQEBBS;
 	struct mlx5e_umr_wqe *umr_wqe;
-	u16 pi;
 
-	pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
+	pi = mlx5e_txqsq_get_next_pi(sq, num_wqebbs);
 	umr_wqe = MLX5E_TLS_FETCH_UMR_WQE(sq, pi);
 	build_static_params(umr_wqe, sq->pc, sq->sqn, priv_tx, fence);
-	tx_fill_wi(sq, pi, MLX5E_KTLS_STATIC_WQEBBS, 0, NULL);
-	sq->pc += MLX5E_KTLS_STATIC_WQEBBS;
+	tx_fill_wi(sq, pi, num_wqebbs, 0, NULL);
+	sq->pc += num_wqebbs;
 }
 
 static void
@@ -149,14 +149,14 @@ post_progress_params(struct mlx5e_txqsq *sq,
 		     struct mlx5e_ktls_offload_context_tx *priv_tx,
 		     bool fence)
 {
+	u16 pi, num_wqebbs = MLX5E_KTLS_PROGRESS_WQEBBS;
 	struct mlx5e_tx_wqe *wqe;
-	u16 pi;
 
-	pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
+	pi = mlx5e_txqsq_get_next_pi(sq, num_wqebbs);
 	wqe = MLX5E_TLS_FETCH_PROGRESS_WQE(sq, pi);
 	build_progress_params(wqe, sq->pc, sq->sqn, priv_tx, fence);
-	tx_fill_wi(sq, pi, MLX5E_KTLS_PROGRESS_WQEBBS, 0, NULL);
-	sq->pc += MLX5E_KTLS_PROGRESS_WQEBBS;
+	tx_fill_wi(sq, pi, num_wqebbs, 0, NULL);
+	sq->pc += num_wqebbs;
 }
 
 static void
@@ -166,8 +166,6 @@ mlx5e_ktls_tx_post_param_wqes(struct mlx5e_txqsq *sq,
 {
 	bool progress_fence = skip_static_post || !fence_first_post;
 
-	mlx5e_txqsq_get_next_pi(sq, MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS);
-
 	if (!skip_static_post)
 		post_static_params(sq, priv_tx, fence_first_post);
 
-- 
cgit v1.2.3-59-g8ed1b


From f713ce1de8970f52ebaec7aa516b125b5c75dad9 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Thu, 9 Apr 2020 12:53:31 +0300
Subject: net/mlx5e: kTLS, Do not fill edge for the DUMP WQEs in TX flow

Every single DUMP WQE resides in a single WQEBB.
As the pi is calculated per each one separately, there is
no real need for a contiguous room for them, allow them to populate
different WQ fragments.
This reduces WQ waste and improves its utilization.

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Reviewed-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
index efc271e24b03..1c9d0174676d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
@@ -272,6 +272,7 @@ tx_post_resync_dump(struct mlx5e_txqsq *sq, skb_frag_t *frag, u32 tisn, bool fir
 	int fsz;
 	u16 pi;
 
+	BUILD_BUG_ON(MLX5E_KTLS_DUMP_WQEBBS != 1);
 	pi = mlx5_wq_cyc_ctr2ix(&sq->wq, sq->pc);
 	wqe = MLX5E_TLS_FETCH_DUMP_WQE(sq, pi);
 
@@ -340,7 +341,6 @@ mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx,
 	struct mlx5e_sq_stats *stats = sq->stats;
 	enum mlx5e_ktls_sync_retval ret;
 	struct tx_sync_info info = {};
-	u8 num_wqebbs;
 	int i = 0;
 
 	ret = tx_sync_info_get(priv_tx, seq, datalen, &info);
@@ -369,9 +369,6 @@ mlx5e_ktls_tx_handle_ooo(struct mlx5e_ktls_offload_context_tx *priv_tx,
 		return MLX5E_KTLS_SYNC_DONE;
 	}
 
-	num_wqebbs = mlx5e_ktls_dumps_num_wqebbs(sq, info.nr_frags, info.sync_len);
-	mlx5e_txqsq_get_next_pi(sq, num_wqebbs);
-
 	for (; i < info.nr_frags; i++) {
 		unsigned int orig_fsz, frag_offset = 0, n = 0;
 		skb_frag_t *f = &info.frags[i];
-- 
cgit v1.2.3-59-g8ed1b


From 05dfd570826f1ae408e1a3faeddb753ff06fed14 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Thu, 9 Apr 2020 13:43:43 +0300
Subject: net/mlx5e: Take TX WQE info structures out of general EN header

Into the txrx header file.
The mlx5e_sq_wqe_info structure describes WQE info for the ICOSQ,
rename it to better reflect this.

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Reviewed-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h      | 27 -----------------------
 drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h | 22 ++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h  |  5 +++++
 3 files changed, 27 insertions(+), 27 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index da7fe6aafeed..3bd64c63865b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -339,16 +339,6 @@ struct mlx5e_cq_decomp {
 	u16                        wqe_counter;
 } ____cacheline_aligned_in_smp;
 
-struct mlx5e_tx_wqe_info {
-	struct sk_buff *skb;
-	u32 num_bytes;
-	u8  num_wqebbs;
-	u8  num_dma;
-#ifdef CONFIG_MLX5_EN_TLS
-	struct page *resync_dump_frag_page;
-#endif
-};
-
 enum mlx5e_dma_map_type {
 	MLX5E_DMA_MAP_SINGLE,
 	MLX5E_DMA_MAP_PAGE
@@ -370,18 +360,6 @@ enum {
 	MLX5E_SQ_STATE_PENDING_XSK_TX,
 };
 
-struct mlx5e_icosq_wqe_info {
-	u8  opcode;
-	u8 num_wqebbs;
-
-	/* Auxiliary data for different opcodes. */
-	union {
-		struct {
-			struct mlx5e_rq *rq;
-		} umr;
-	};
-};
-
 struct mlx5e_txqsq {
 	/* data path */
 
@@ -484,11 +462,6 @@ struct mlx5e_xdp_info_fifo {
 	u32 mask;
 };
 
-struct mlx5e_xdp_wqe_info {
-	u8 num_wqebbs;
-	u8 num_pkts;
-};
-
 struct mlx5e_xdp_mpwqe {
 	/* Current MPWQE session */
 	struct mlx5e_tx_wqe *wqe;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
index 89fe65593c16..9e150d160cde 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -81,6 +81,16 @@ mlx5e_post_nop_fence(struct mlx5_wq_cyc *wq, u32 sqn, u16 *pc)
 	return wqe;
 }
 
+struct mlx5e_tx_wqe_info {
+	struct sk_buff *skb;
+	u32 num_bytes;
+	u8 num_wqebbs;
+	u8 num_dma;
+#ifdef CONFIG_MLX5_EN_TLS
+	struct page *resync_dump_frag_page;
+#endif
+};
+
 static inline u16 mlx5e_txqsq_get_next_pi(struct mlx5e_txqsq *sq, u16 size)
 {
 	struct mlx5_wq_cyc *wq = &sq->wq;
@@ -109,6 +119,18 @@ static inline u16 mlx5e_txqsq_get_next_pi(struct mlx5e_txqsq *sq, u16 size)
 	return pi;
 }
 
+struct mlx5e_icosq_wqe_info {
+	u8 opcode;
+	u8 num_wqebbs;
+
+	/* Auxiliary data for different opcodes. */
+	union {
+		struct {
+			struct mlx5e_rq *rq;
+		} umr;
+	};
+};
+
 static inline u16 mlx5e_icosq_get_next_pi(struct mlx5e_icosq *sq, u16 size)
 {
 	struct mlx5_wq_cyc *wq = &sq->wq;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
index ed6f045febeb..e2e01f064c1e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
@@ -137,6 +137,11 @@ mlx5e_xdp_no_room_for_inline_pkt(struct mlx5e_xdp_mpwqe *session)
 	       session->ds_count + MLX5E_XDP_INLINE_WQE_MAX_DS_CNT > MLX5E_XDP_MPW_MAX_NUM_DS;
 }
 
+struct mlx5e_xdp_wqe_info {
+	u8 num_wqebbs;
+	u8 num_pkts;
+};
+
 static inline void
 mlx5e_xdp_mpwqe_add_dseg(struct mlx5e_xdpsq *sq,
 			 struct mlx5e_xdp_xmit_data *xdptxd,
-- 
cgit v1.2.3-59-g8ed1b


From 41a8e4ebb4727912c54504125e134723df8cf3cf Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Thu, 19 Mar 2020 16:50:14 +0200
Subject: net/mlx5e: Use struct assignment for WQE info updates

Struct assignment looks more clean, and implies resetting
the not assigned fields to zero, instead of holding values
from older ring cycles.

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Reviewed-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c   |  9 +++++----
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c        | 16 ++++++++--------
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c          |  9 ++++++---
 drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c        |  7 +++++--
 4 files changed, 24 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
index 1c9d0174676d..3cd78d9503c1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls_tx.c
@@ -108,10 +108,11 @@ static void tx_fill_wi(struct mlx5e_txqsq *sq,
 {
 	struct mlx5e_tx_wqe_info *wi = &sq->db.wqe_info[pi];
 
-	memset(wi, 0, sizeof(*wi));
-	wi->num_wqebbs = num_wqebbs;
-	wi->num_bytes  = num_bytes;
-	wi->resync_dump_frag_page = page;
+	*wi = (struct mlx5e_tx_wqe_info) {
+		.num_wqebbs = num_wqebbs,
+		.num_bytes  = num_bytes,
+		.resync_dump_frag_page = page,
+	};
 }
 
 void mlx5e_ktls_tx_offload_set_pending(struct mlx5e_ktls_offload_context_tx *priv_tx)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 048a4f8601a8..0a9dfc31de3e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1364,13 +1364,12 @@ static void mlx5e_deactivate_txqsq(struct mlx5e_txqsq *sq)
 	/* last doorbell out, godspeed .. */
 	if (mlx5e_wqc_has_room_for(wq, sq->cc, sq->pc, 1)) {
 		u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
-		struct mlx5e_tx_wqe_info *wi;
 		struct mlx5e_tx_wqe *nop;
 
-		wi = &sq->db.wqe_info[pi];
+		sq->db.wqe_info[pi] = (struct mlx5e_tx_wqe_info) {
+			.num_wqebbs = 1,
+		};
 
-		memset(wi, 0, sizeof(*wi));
-		wi->num_wqebbs = 1;
 		nop = mlx5e_post_nop(wq, sq->sqn, &sq->pc);
 		mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &nop->ctrl);
 	}
@@ -1482,20 +1481,21 @@ int mlx5e_open_xdpsq(struct mlx5e_channel *c, struct mlx5e_params *params,
 
 		/* Pre initialize fixed WQE fields */
 		for (i = 0; i < mlx5_wq_cyc_get_size(&sq->wq); i++) {
-			struct mlx5e_xdp_wqe_info *wi  = &sq->db.wqe_info[i];
 			struct mlx5e_tx_wqe      *wqe  = mlx5_wq_cyc_get_wqe(&sq->wq, i);
 			struct mlx5_wqe_ctrl_seg *cseg = &wqe->ctrl;
 			struct mlx5_wqe_eth_seg  *eseg = &wqe->eth;
 			struct mlx5_wqe_data_seg *dseg;
 
+			sq->db.wqe_info[i] = (struct mlx5e_xdp_wqe_info) {
+				.num_wqebbs = 1,
+				.num_pkts   = 1,
+			};
+
 			cseg->qpn_ds = cpu_to_be32((sq->sqn << 8) | ds_cnt);
 			eseg->inline_hdr.sz = cpu_to_be16(inline_hdr_sz);
 
 			dseg = (struct mlx5_wqe_data_seg *)cseg + (ds_cnt - 1);
 			dseg->lkey = sq->mkey_be;
-
-			wi->num_wqebbs = 1;
-			wi->num_pkts   = 1;
 		}
 	}
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index d9a5a669b84d..8142b6e70857 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -505,9 +505,12 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
 			    MLX5_OPCODE_UMR);
 	umr_wqe->uctrl.xlt_offset = cpu_to_be16(xlt_offset);
 
-	sq->db.wqe_info[pi].opcode = MLX5_OPCODE_UMR;
-	sq->db.wqe_info[pi].num_wqebbs = MLX5E_UMR_WQEBBS;
-	sq->db.wqe_info[pi].umr.rq = rq;
+	sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) {
+		.opcode     = MLX5_OPCODE_UMR,
+		.num_wqebbs = MLX5E_UMR_WQEBBS,
+		.umr.rq     = rq,
+	};
+
 	sq->pc += MLX5E_UMR_WQEBBS;
 
 	sq->doorbell_cseg = &umr_wqe->ctrl;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
index 869fd58a6775..73293f9c3f63 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -78,8 +78,11 @@ void mlx5e_trigger_irq(struct mlx5e_icosq *sq)
 	struct mlx5e_tx_wqe *nopwqe;
 	u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
 
-	sq->db.wqe_info[pi].opcode = MLX5_OPCODE_NOP;
-	sq->db.wqe_info[pi].num_wqebbs = 1;
+	sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) {
+		.opcode     = MLX5_OPCODE_NOP,
+		.num_wqebbs = 1,
+	};
+
 	nopwqe = mlx5e_post_nop(wq, sq->sqn, &sq->pc);
 	mlx5e_notify_hw(wq, sq->pc, sq->uar_map, &nopwqe->ctrl);
 }
-- 
cgit v1.2.3-59-g8ed1b


From 6b74f60ef5a97ec3988a41e4bb745660029209a2 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Mon, 30 Mar 2020 15:42:02 +0300
Subject: net/mlx5: Accel, Remove unnecessary header include

The include of Ethernet driver header in core is not needed
and actually wrong.
Remove it.

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Reviewed-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/accel/accel.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/accel.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/accel.h
index c13260467750..82b185121edb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/accel.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/accel.h
@@ -5,7 +5,6 @@
 
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
-#include "en.h"
 
 static inline bool is_metadata_hdr_valid(struct sk_buff *skb)
 {
-- 
cgit v1.2.3-59-g8ed1b


From 28bff09518e9ef942173e41e7521b93ea7be0cf0 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Mon, 16 Dec 2019 14:05:07 +0200
Subject: net/mlx5e: Enhance ICOSQ WQE info fields

The same WQE opcode might be used in different ICOSQ flows
and WQE types.
To have a better distinguishability, replace it with an enum that
better indicates the WQE type and flow it is used for.

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Reviewed-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h | 11 ++++++++---
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c   | 17 ++++++++++-------
 drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c |  2 +-
 3 files changed, 19 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
index 9e150d160cde..dce2bbbf9109 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -27,6 +27,11 @@
 
 #define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start))
 
+enum mlx5e_icosq_wqe_type {
+	MLX5E_ICOSQ_WQE_NOP,
+	MLX5E_ICOSQ_WQE_UMR_RX,
+};
+
 static inline bool
 mlx5e_wqc_has_room_for(struct mlx5_wq_cyc *wq, u16 cc, u16 pc, u16 n)
 {
@@ -120,10 +125,10 @@ static inline u16 mlx5e_txqsq_get_next_pi(struct mlx5e_txqsq *sq, u16 size)
 }
 
 struct mlx5e_icosq_wqe_info {
-	u8 opcode;
+	u8 wqe_type;
 	u8 num_wqebbs;
 
-	/* Auxiliary data for different opcodes. */
+	/* Auxiliary data for different wqe types. */
 	union {
 		struct {
 			struct mlx5e_rq *rq;
@@ -147,7 +152,7 @@ static inline u16 mlx5e_icosq_get_next_pi(struct mlx5e_icosq *sq, u16 size)
 		/* Fill SQ frag edge with NOPs to avoid WQE wrapping two pages. */
 		for (; wi < edge_wi; wi++) {
 			*wi = (struct mlx5e_icosq_wqe_info) {
-				.opcode = MLX5_OPCODE_NOP,
+				.wqe_type   = MLX5E_ICOSQ_WQE_NOP,
 				.num_wqebbs = 1,
 			};
 			mlx5e_post_nop(wq, sq->sqn, &sq->pc);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 8142b6e70857..779600bebcca 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -506,7 +506,7 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
 	umr_wqe->uctrl.xlt_offset = cpu_to_be16(xlt_offset);
 
 	sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) {
-		.opcode     = MLX5_OPCODE_UMR,
+		.wqe_type   = MLX5E_ICOSQ_WQE_UMR_RX,
 		.num_wqebbs = MLX5E_UMR_WQEBBS,
 		.umr.rq     = rq,
 	};
@@ -619,15 +619,18 @@ int mlx5e_poll_ico_cq(struct mlx5e_cq *cq)
 				break;
 			}
 
-			if (likely(wi->opcode == MLX5_OPCODE_UMR))
+			switch (wi->wqe_type) {
+			case MLX5E_ICOSQ_WQE_UMR_RX:
 				wi->umr.rq->mpwqe.umr_completed++;
-			else if (unlikely(wi->opcode != MLX5_OPCODE_NOP))
+				break;
+			case MLX5E_ICOSQ_WQE_NOP:
+				break;
+			default:
 				netdev_WARN_ONCE(cq->channel->netdev,
-						 "Bad OPCODE in ICOSQ WQE info: 0x%x\n",
-						 wi->opcode);
-
+						 "Bad WQE type in ICOSQ WQE info: 0x%x\n",
+						 wi->wqe_type);
+			}
 		} while (!last_wqe);
-
 	} while ((++i < MLX5E_TX_CQ_POLL_BUDGET) && (cqe = mlx5_cqwq_get_cqe(&cq->wq)));
 
 	sq->cc = sqcc;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
index 73293f9c3f63..8480278f2ee2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_txrx.c
@@ -79,7 +79,7 @@ void mlx5e_trigger_irq(struct mlx5e_icosq *sq)
 	u16 pi = mlx5_wq_cyc_ctr2ix(wq, sq->pc);
 
 	sq->db.wqe_info[pi] = (struct mlx5e_icosq_wqe_info) {
-		.opcode     = MLX5_OPCODE_NOP,
+		.wqe_type   = MLX5E_ICOSQ_WQE_NOP,
 		.num_wqebbs = 1,
 	};
 
-- 
cgit v1.2.3-59-g8ed1b


From 38e86bfcf7f565639d89ce99ecf41a598f652cdc Mon Sep 17 00:00:00 2001
From: Mark Starovoytov <mstarovoitov@marvell.com>
Date: Sat, 9 May 2020 09:46:54 +0300
Subject: net: atlantic: use __packed instead of the full expansion.

This patches fixes the review comment made by Jakub Kicinski
in the "net: atlantic: A2 support" patch series.

Signed-off-by: Mark Starovoytov <mstarovoitov@marvell.com>
Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils.h
index 2317dd8459d0..b66fa346581c 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils.h
@@ -103,7 +103,7 @@ struct sleep_proxy_s {
 		u32 crc32;
 	} wake_up_pattern[8];
 
-	struct __attribute__ ((__packed__)) {
+	struct __packed {
 		u8 arp_responder:1;
 		u8 echo_responder:1;
 		u8 igmp_client:1;
@@ -119,7 +119,7 @@ struct sleep_proxy_s {
 	u32 ipv4_offload_addr[8];
 	u32 reserved[8];
 
-	struct __attribute__ ((__packed__)) {
+	struct __packed {
 		u8 ns_responder:1;
 		u8 echo_responder:1;
 		u8 mld_client:1;
-- 
cgit v1.2.3-59-g8ed1b


From 7bb377107c72a40ab7505341f8626c8eb79a0cb7 Mon Sep 17 00:00:00 2001
From: Mark Starovoytov <mstarovoitov@marvell.com>
Date: Sat, 9 May 2020 09:46:55 +0300
Subject: net: atlantic: minor MACSec code cleanup

This patch fixes a couple of minor merge issues found in macsec_api.c
after corresponding patch series has been applied.

These are not real bugs, so pushing to net-next.

Signed-off-by: Mark Starovoytov <mstarovoitov@marvell.com>
Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/aquantia/atlantic/macsec/macsec_api.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/macsec/macsec_api.c b/drivers/net/ethernet/aquantia/atlantic/macsec/macsec_api.c
index fbe9d88b13c7..36c7cf05630a 100644
--- a/drivers/net/ethernet/aquantia/atlantic/macsec/macsec_api.c
+++ b/drivers/net/ethernet/aquantia/atlantic/macsec/macsec_api.c
@@ -846,8 +846,7 @@ static int get_ingress_sakey_record(struct aq_hw_s *hw,
 	rec->key[7] = packed_record[14];
 	rec->key[7] |= packed_record[15] << 16;
 
-	rec->key_len = (rec->key_len & 0xFFFFFFFC) |
-		       (packed_record[16] & 0x3);
+	rec->key_len = packed_record[16] & 0x3;
 
 	return 0;
 }
@@ -1158,6 +1157,7 @@ static int set_egress_ctlf_record(struct aq_hw_s *hw,
 
 	packed_record[0] = rec->sa_da[0] & 0xFFFF;
 	packed_record[1] = (rec->sa_da[0] >> 16) & 0xFFFF;
+
 	packed_record[2] = rec->sa_da[1] & 0xFFFF;
 
 	packed_record[3] = rec->eth_type & 0xFFFF;
@@ -1552,7 +1552,7 @@ static int set_egress_sc_record(struct aq_hw_s *hw,
 
 	packed_record[5] |= (rec->sak_len & 0x3) << 4;
 
-	packed_record[7] |= (rec->valid & 0x1) << 15;
+	packed_record[7] = (rec->valid & 0x1) << 15;
 
 	return set_raw_egress_record(hw, packed_record, 8, 2,
 				     ROWOFFSET_EGRESSSCRECORD + table_index);
-- 
cgit v1.2.3-59-g8ed1b


From 843e1396f62ed7ff57feaea9ceb3374ad42f445f Mon Sep 17 00:00:00 2001
From: Mark Starovoytov <mstarovoitov@marvell.com>
Date: Sat, 9 May 2020 09:46:56 +0300
Subject: net: atlantic: rename AQ_NIC_RATE_2GS to AQ_NIC_RATE_2G5

This patch changes the constant name to a more logical "2G5"
(for 2.5G speeds).

Signed-off-by: Mark Starovoytov <mstarovoitov@marvell.com>
Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/aquantia/atlantic/aq_common.h        | 11 ++++++-----
 drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c       |  2 +-
 drivers/net/ethernet/aquantia/atlantic/aq_nic.c           | 13 +++++++------
 drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c | 15 ++++++++-------
 drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c | 15 ++++++++-------
 .../net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c  |  9 +++++----
 .../net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h  |  9 +++++----
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c | 13 +++++++------
 drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c  |  2 +-
 .../ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c |  2 +-
 10 files changed, 49 insertions(+), 42 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_common.h b/drivers/net/ethernet/aquantia/atlantic/aq_common.h
index 53620ba6d7a6..52ad9433cabc 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_common.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_common.h
@@ -1,7 +1,8 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * aQuantia Corporation Network Driver
- * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+/* Atlantic Network Driver
+ *
+ * Copyright (C) 2014-2019 aQuantia Corporation
+ * Copyright (C) 2019-2020 Marvell International Ltd.
  */
 
 /* File aq_common.h: Basic includes for all files in project. */
@@ -53,14 +54,14 @@
 #define AQ_NIC_RATE_10G		BIT(0)
 #define AQ_NIC_RATE_5G		BIT(1)
 #define AQ_NIC_RATE_5GSR	BIT(2)
-#define AQ_NIC_RATE_2GS		BIT(3)
+#define AQ_NIC_RATE_2G5		BIT(3)
 #define AQ_NIC_RATE_1G		BIT(4)
 #define AQ_NIC_RATE_100M	BIT(5)
 #define AQ_NIC_RATE_10M		BIT(6)
 
 #define AQ_NIC_RATE_EEE_10G	BIT(7)
 #define AQ_NIC_RATE_EEE_5G	BIT(8)
-#define AQ_NIC_RATE_EEE_2GS	BIT(9)
+#define AQ_NIC_RATE_EEE_2G5	BIT(9)
 #define AQ_NIC_RATE_EEE_1G	BIT(10)
 #define AQ_NIC_RATE_EEE_100M	BIT(11)
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
index 0c9dd8edc062..86fc77d85fda 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
@@ -605,7 +605,7 @@ static enum hw_atl_fw2x_rate eee_mask_to_ethtool_mask(u32 speed)
 	if (speed & AQ_NIC_RATE_EEE_10G)
 		rate |= SUPPORTED_10000baseT_Full;
 
-	if (speed & AQ_NIC_RATE_EEE_2GS)
+	if (speed & AQ_NIC_RATE_EEE_2G5)
 		rate |= SUPPORTED_2500baseX_Full;
 
 	if (speed & AQ_NIC_RATE_EEE_1G)
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index f97b073efd8e..18cad06f2ea7 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -1,7 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0-only
-/*
- * aQuantia Corporation Network Driver
- * Copyright (C) 2014-2019 aQuantia Corporation. All rights reserved
+/* Atlantic Network Driver
+ *
+ * Copyright (C) 2014-2019 aQuantia Corporation
+ * Copyright (C) 2019-2020 Marvell International Ltd.
  */
 
 /* File aq_nic.c: Definition of common code for NIC. */
@@ -894,7 +895,7 @@ void aq_nic_get_link_ksettings(struct aq_nic_s *self,
 		ethtool_link_ksettings_add_link_mode(cmd, supported,
 						     5000baseT_Full);
 
-	if (self->aq_nic_cfg.aq_hw_caps->link_speed_msk & AQ_NIC_RATE_2GS)
+	if (self->aq_nic_cfg.aq_hw_caps->link_speed_msk & AQ_NIC_RATE_2G5)
 		ethtool_link_ksettings_add_link_mode(cmd, supported,
 						     2500baseT_Full);
 
@@ -937,7 +938,7 @@ void aq_nic_get_link_ksettings(struct aq_nic_s *self,
 		ethtool_link_ksettings_add_link_mode(cmd, advertising,
 						     5000baseT_Full);
 
-	if (self->aq_nic_cfg.link_speed_msk  & AQ_NIC_RATE_2GS)
+	if (self->aq_nic_cfg.link_speed_msk  & AQ_NIC_RATE_2G5)
 		ethtool_link_ksettings_add_link_mode(cmd, advertising,
 						     2500baseT_Full);
 
@@ -996,7 +997,7 @@ int aq_nic_set_link_ksettings(struct aq_nic_s *self,
 			break;
 
 		case SPEED_2500:
-			rate = AQ_NIC_RATE_2GS;
+			rate = AQ_NIC_RATE_2G5;
 			break;
 
 		case SPEED_5000:
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
index eee265b4415a..70f06c40bdf2 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
@@ -1,7 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0-only
-/*
- * aQuantia Corporation Network Driver
- * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+/* Atlantic Network Driver
+ *
+ * Copyright (C) 2014-2019 aQuantia Corporation
+ * Copyright (C) 2019-2020 Marvell International Ltd.
  */
 
 /* File hw_atl_a0.c: Definition of Atlantic hardware specific functions. */
@@ -47,7 +48,7 @@ const struct aq_hw_caps_s hw_atl_a0_caps_aqc100 = {
 	DEFAULT_A0_BOARD_BASIC_CAPABILITIES,
 	.media_type = AQ_HW_MEDIA_TYPE_FIBRE,
 	.link_speed_msk = AQ_NIC_RATE_5G |
-			  AQ_NIC_RATE_2GS |
+			  AQ_NIC_RATE_2G5 |
 			  AQ_NIC_RATE_1G |
 			  AQ_NIC_RATE_100M,
 };
@@ -57,7 +58,7 @@ const struct aq_hw_caps_s hw_atl_a0_caps_aqc107 = {
 	.media_type = AQ_HW_MEDIA_TYPE_TP,
 	.link_speed_msk = AQ_NIC_RATE_10G |
 			  AQ_NIC_RATE_5G |
-			  AQ_NIC_RATE_2GS |
+			  AQ_NIC_RATE_2G5 |
 			  AQ_NIC_RATE_1G |
 			  AQ_NIC_RATE_100M,
 };
@@ -66,7 +67,7 @@ const struct aq_hw_caps_s hw_atl_a0_caps_aqc108 = {
 	DEFAULT_A0_BOARD_BASIC_CAPABILITIES,
 	.media_type = AQ_HW_MEDIA_TYPE_TP,
 	.link_speed_msk = AQ_NIC_RATE_5G |
-			  AQ_NIC_RATE_2GS |
+			  AQ_NIC_RATE_2G5 |
 			  AQ_NIC_RATE_1G |
 			  AQ_NIC_RATE_100M,
 };
@@ -74,7 +75,7 @@ const struct aq_hw_caps_s hw_atl_a0_caps_aqc108 = {
 const struct aq_hw_caps_s hw_atl_a0_caps_aqc109 = {
 	DEFAULT_A0_BOARD_BASIC_CAPABILITIES,
 	.media_type = AQ_HW_MEDIA_TYPE_TP,
-	.link_speed_msk = AQ_NIC_RATE_2GS |
+	.link_speed_msk = AQ_NIC_RATE_2G5 |
 			  AQ_NIC_RATE_1G |
 			  AQ_NIC_RATE_100M,
 };
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
index cbb7a00d61b4..1d872547a87c 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
@@ -1,7 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0-only
-/*
- * aQuantia Corporation Network Driver
- * Copyright (C) 2014-2019 aQuantia Corporation. All rights reserved
+/* Atlantic Network Driver
+ *
+ * Copyright (C) 2014-2019 aQuantia Corporation
+ * Copyright (C) 2019-2020 Marvell International Ltd.
  */
 
 /* File hw_atl_b0.c: Definition of Atlantic hardware specific functions. */
@@ -59,7 +60,7 @@ const struct aq_hw_caps_s hw_atl_b0_caps_aqc100 = {
 	.media_type = AQ_HW_MEDIA_TYPE_FIBRE,
 	.link_speed_msk = AQ_NIC_RATE_10G |
 			  AQ_NIC_RATE_5G |
-			  AQ_NIC_RATE_2GS |
+			  AQ_NIC_RATE_2G5 |
 			  AQ_NIC_RATE_1G |
 			  AQ_NIC_RATE_100M,
 };
@@ -69,7 +70,7 @@ const struct aq_hw_caps_s hw_atl_b0_caps_aqc107 = {
 	.media_type = AQ_HW_MEDIA_TYPE_TP,
 	.link_speed_msk = AQ_NIC_RATE_10G |
 			  AQ_NIC_RATE_5G |
-			  AQ_NIC_RATE_2GS |
+			  AQ_NIC_RATE_2G5 |
 			  AQ_NIC_RATE_1G |
 			  AQ_NIC_RATE_100M,
 };
@@ -78,7 +79,7 @@ const struct aq_hw_caps_s hw_atl_b0_caps_aqc108 = {
 	DEFAULT_B0_BOARD_BASIC_CAPABILITIES,
 	.media_type = AQ_HW_MEDIA_TYPE_TP,
 	.link_speed_msk = AQ_NIC_RATE_5G |
-			  AQ_NIC_RATE_2GS |
+			  AQ_NIC_RATE_2G5 |
 			  AQ_NIC_RATE_1G |
 			  AQ_NIC_RATE_100M,
 };
@@ -86,7 +87,7 @@ const struct aq_hw_caps_s hw_atl_b0_caps_aqc108 = {
 const struct aq_hw_caps_s hw_atl_b0_caps_aqc109 = {
 	DEFAULT_B0_BOARD_BASIC_CAPABILITIES,
 	.media_type = AQ_HW_MEDIA_TYPE_TP,
-	.link_speed_msk = AQ_NIC_RATE_2GS |
+	.link_speed_msk = AQ_NIC_RATE_2G5 |
 			  AQ_NIC_RATE_1G |
 			  AQ_NIC_RATE_100M,
 };
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
index 1100d40a0302..73c0f41df8d8 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.c
@@ -1,7 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0-only
-/*
- * aQuantia Corporation Network Driver
- * Copyright (C) 2014-2019 aQuantia Corporation. All rights reserved
+/* Atlantic Network Driver
+ *
+ * Copyright (C) 2014-2019 aQuantia Corporation
+ * Copyright (C) 2019-2020 Marvell International Ltd.
  */
 
 /* File hw_atl_utils.c: Definition of common functions for Atlantic hardware
@@ -687,7 +688,7 @@ int hw_atl_utils_mpi_get_link_status(struct aq_hw_s *self)
 			link_status->mbps = 5000U;
 			break;
 
-		case HAL_ATLANTIC_RATE_2GS:
+		case HAL_ATLANTIC_RATE_2G5:
 			link_status->mbps = 2500U;
 			break;
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
index 99c1b6644ec3..0b4b54fc1de0 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils.h
@@ -1,7 +1,8 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * aQuantia Corporation Network Driver
- * Copyright (C) 2014-2019 aQuantia Corporation. All rights reserved
+/* Atlantic Network Driver
+ *
+ * Copyright (C) 2014-2019 aQuantia Corporation
+ * Copyright (C) 2019-2020 Marvell International Ltd.
  */
 
 /* File hw_atl_utils.h: Declaration of common functions for Atlantic hardware
@@ -418,7 +419,7 @@ enum hal_atl_utils_fw_state_e {
 #define HAL_ATLANTIC_RATE_10G        BIT(0)
 #define HAL_ATLANTIC_RATE_5G         BIT(1)
 #define HAL_ATLANTIC_RATE_5GSR       BIT(2)
-#define HAL_ATLANTIC_RATE_2GS        BIT(3)
+#define HAL_ATLANTIC_RATE_2G5        BIT(3)
 #define HAL_ATLANTIC_RATE_1G         BIT(4)
 #define HAL_ATLANTIC_RATE_100M       BIT(5)
 #define HAL_ATLANTIC_RATE_INVALID    BIT(6)
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
index 1ad10cc14918..017364486703 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
@@ -1,7 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0-only
-/*
- * aQuantia Corporation Network Driver
- * Copyright (C) 2014-2019 aQuantia Corporation. All rights reserved
+/* Atlantic Network Driver
+ *
+ * Copyright (C) 2014-2019 aQuantia Corporation
+ * Copyright (C) 2019-2020 Marvell International Ltd.
  */
 
 /* File hw_atl_utils_fw2x.c: Definition of firmware 2.x functions for
@@ -134,7 +135,7 @@ static enum hw_atl_fw2x_rate link_speed_mask_2fw2x_ratemask(u32 speed)
 	if (speed & AQ_NIC_RATE_5GSR)
 		rate |= FW2X_RATE_5G;
 
-	if (speed & AQ_NIC_RATE_2GS)
+	if (speed & AQ_NIC_RATE_2G5)
 		rate |= FW2X_RATE_2G5;
 
 	if (speed & AQ_NIC_RATE_1G)
@@ -155,7 +156,7 @@ static u32 fw2x_to_eee_mask(u32 speed)
 	if (speed & HW_ATL_FW2X_CAP_EEE_5G_MASK)
 		rate |= AQ_NIC_RATE_EEE_5G;
 	if (speed & HW_ATL_FW2X_CAP_EEE_2G5_MASK)
-		rate |= AQ_NIC_RATE_EEE_2GS;
+		rate |= AQ_NIC_RATE_EEE_2G5;
 	if (speed & HW_ATL_FW2X_CAP_EEE_1G_MASK)
 		rate |= AQ_NIC_RATE_EEE_1G;
 
@@ -170,7 +171,7 @@ static u32 eee_mask_to_fw2x(u32 speed)
 		rate |= HW_ATL_FW2X_CAP_EEE_10G_MASK;
 	if (speed & AQ_NIC_RATE_EEE_5G)
 		rate |= HW_ATL_FW2X_CAP_EEE_5G_MASK;
-	if (speed & AQ_NIC_RATE_EEE_2GS)
+	if (speed & AQ_NIC_RATE_EEE_2G5)
 		rate |= HW_ATL_FW2X_CAP_EEE_2G5_MASK;
 	if (speed & AQ_NIC_RATE_EEE_1G)
 		rate |= HW_ATL_FW2X_CAP_EEE_1G_MASK;
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
index 04d194f754fa..84d9b828dc4e 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
@@ -60,7 +60,7 @@ const struct aq_hw_caps_s hw_atl2_caps_aqc113 = {
 	.media_type = AQ_HW_MEDIA_TYPE_TP,
 	.link_speed_msk = AQ_NIC_RATE_10G |
 			  AQ_NIC_RATE_5G  |
-			  AQ_NIC_RATE_2GS |
+			  AQ_NIC_RATE_2G5 |
 			  AQ_NIC_RATE_1G  |
 			  AQ_NIC_RATE_100M      |
 			  AQ_NIC_RATE_10M,
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c
index f5fb4b11f51a..e8f4aad8c1e5 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c
@@ -129,7 +129,7 @@ static void a2_link_speed_mask2fw(u32 speed,
 	link_options->rate_10G = !!(speed & AQ_NIC_RATE_10G);
 	link_options->rate_5G = !!(speed & AQ_NIC_RATE_5G);
 	link_options->rate_N5G = !!(speed & AQ_NIC_RATE_5GSR);
-	link_options->rate_2P5G = !!(speed & AQ_NIC_RATE_2GS);
+	link_options->rate_2P5G = !!(speed & AQ_NIC_RATE_2G5);
 	link_options->rate_N2P5G = link_options->rate_2P5G;
 	link_options->rate_1G = !!(speed & AQ_NIC_RATE_1G);
 	link_options->rate_100M = !!(speed & AQ_NIC_RATE_100M);
-- 
cgit v1.2.3-59-g8ed1b


From c64d7b23f51161ceb6e92f8e5b8077e8e04ddbef Mon Sep 17 00:00:00 2001
From: Mark Starovoytov <mstarovoitov@marvell.com>
Date: Sat, 9 May 2020 09:46:57 +0300
Subject: net: atlantic: remove TPO2 check from A0 code

TPO2 was introduced in B0 only, no reason to check for it in A0 code.

Signed-off-by: Mark Starovoytov <mstarovoitov@marvell.com>
Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
index 70f06c40bdf2..1b0670a8ae33 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
@@ -268,8 +268,7 @@ static int hw_atl_a0_hw_init_tx_path(struct aq_hw_s *self)
 	hw_atl_tdm_tx_desc_wr_wb_irq_en_set(self, 1U);
 
 	/* misc */
-	aq_hw_write_reg(self, 0x00007040U, ATL_HW_IS_CHIP_FEATURE(self, TPO2) ?
-			0x00010000U : 0x00000000U);
+	aq_hw_write_reg(self, 0x00007040U, 0x00000000U);
 	hw_atl_tdm_tx_dca_en_set(self, 0U);
 	hw_atl_tdm_tx_dca_mode_set(self, 0U);
 
-- 
cgit v1.2.3-59-g8ed1b


From ad46bd5ec357ebb18c42398d8760bad4d611b53e Mon Sep 17 00:00:00 2001
From: Mark Starovoytov <mstarovoitov@marvell.com>
Date: Sat, 9 May 2020 09:46:58 +0300
Subject: net: atlantic: remove hw_atl_b0_hw_rss_set call from A2 code

No need to call hw_atl_b0_hw_rss_set from hw_atl2_hw_rss_set

Signed-off-by: Mark Starovoytov <mstarovoitov@marvell.com>
Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c | 4 ++--
 drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h | 9 ++++-----
 drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c  | 2 +-
 3 files changed, 7 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
index 1d872547a87c..fa3cd7e9954b 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
@@ -216,8 +216,8 @@ err_exit:
 	return err;
 }
 
-int hw_atl_b0_hw_rss_set(struct aq_hw_s *self,
-			 struct aq_rss_parameters *rss_params)
+static int hw_atl_b0_hw_rss_set(struct aq_hw_s *self,
+				struct aq_rss_parameters *rss_params)
 {
 	u32 num_rss_queues = max(1U, self->aq_nic_cfg->num_rss_queues);
 	u8 *indirection_table =	rss_params->indirection_table;
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h
index f5091d79ab43..b855459272ca 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h
@@ -1,7 +1,8 @@
 /* SPDX-License-Identifier: GPL-2.0-only */
-/*
- * aQuantia Corporation Network Driver
- * Copyright (C) 2014-2017 aQuantia Corporation. All rights reserved
+/* Atlantic Network Driver
+ *
+ * Copyright (C) 2014-2019 aQuantia Corporation
+ * Copyright (C) 2019-2020 Marvell International Ltd.
  */
 
 /* File hw_atl_b0.h: Declaration of abstract interface for Atlantic hardware
@@ -35,8 +36,6 @@ extern const struct aq_hw_ops hw_atl_ops_b0;
 
 int hw_atl_b0_hw_rss_hash_set(struct aq_hw_s *self,
 			      struct aq_rss_parameters *rss_params);
-int hw_atl_b0_hw_rss_set(struct aq_hw_s *self,
-			 struct aq_rss_parameters *rss_params);
 int hw_atl_b0_hw_offload_set(struct aq_hw_s *self,
 			     struct aq_nic_cfg_s *aq_nic_cfg);
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
index 84d9b828dc4e..6f2b33ae3d06 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
@@ -172,7 +172,7 @@ static int hw_atl2_hw_rss_set(struct aq_hw_s *self,
 	for (i = HW_ATL2_RSS_REDIRECTION_MAX; i--;)
 		hw_atl2_new_rpf_rss_redir_set(self, 0, i, indirection_table[i]);
 
-	return hw_atl_b0_hw_rss_set(self, rss_params);
+	return aq_hw_err_from_flags(self);
 }
 
 static int hw_atl2_hw_init_tx_path(struct aq_hw_s *self)
-- 
cgit v1.2.3-59-g8ed1b


From 097d638cf4e19be208d26c6bbbca7f8da348294a Mon Sep 17 00:00:00 2001
From: Mark Starovoytov <mstarovoitov@marvell.com>
Date: Sat, 9 May 2020 09:46:59 +0300
Subject: net: atlantic: remove check for boot code survivability before reset
 request

This patch removes unnecessary check for boot code survivability before
reset request.

Signed-off-by: Mark Starovoytov <mstarovoitov@marvell.com>
Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils.c | 8 --------
 1 file changed, 8 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils.c
index 85ccc9a011a0..f3766780e975 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils.c
@@ -75,14 +75,6 @@ int hw_atl2_utils_soft_reset(struct aq_hw_s *self)
 	u32 rbl_request;
 	int err;
 
-	err = readx_poll_timeout_atomic(hw_atl2_mif_mcp_boot_reg_get, self,
-				rbl_status,
-				((rbl_status & AQ_A2_BOOT_STARTED) &&
-				 (rbl_status != 0xFFFFFFFFu)),
-				10, 500000);
-	if (err)
-		aq_pr_trace("Boot code probably hanged, reboot anyway");
-
 	hw_atl2_mif_host_req_int_clr(self, 0x01);
 	rbl_request = AQ_A2_FW_BOOT_REQ_REBOOT;
 #ifdef AQ_CFG_FAST_START
-- 
cgit v1.2.3-59-g8ed1b


From b4de6c49e569c7ca91bf0d4c25cdb6eeca9c9529 Mon Sep 17 00:00:00 2001
From: Mark Starovoytov <mstarovoitov@marvell.com>
Date: Sat, 9 May 2020 09:47:00 +0300
Subject: net: atlantic: unify MAC generation

This patch unifies invalid MAC address handling with other drivers.

Basically we've switched to using standard APIs (is_valid_ether_addr /
eth_hw_addr_random) where possible.
It's worth noting that some of engineering Aquantia NICs might be
provisioned with a partially zeroed out MAC, which is still invalid,
but not caught by is_valid_ether_addr(), so we've added a special
handling for this case.

Also adding a warning in case of fallback to random MAC, because
this shouldn't be needed on production NICs, they should all be
provisioned with unique MAC.

NB! Default systemd/udevd configuration is 'MACAddressPolicy=persistent'.
    This causes MAC address to be persisted across driver reloads and
    reboots. We had to change it to 'none' for verification purposes.

Signed-off-by: Mark Starovoytov <mstarovoitov@marvell.com>
Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/aquantia/atlantic/aq_nic.c    | 14 +++++++++++++
 .../aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c   | 22 ---------------------
 .../aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c   | 23 +---------------------
 3 files changed, 15 insertions(+), 44 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index 18cad06f2ea7..1c6d12deb47a 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -272,6 +272,14 @@ exit:
 	return err;
 }
 
+static bool aq_nic_is_valid_ether_addr(const u8 *addr)
+{
+	/* Some engineering samples of Aquantia NICs are provisioned with a
+	 * partially populated MAC, which is still invalid.
+	 */
+	return !(addr[0] == 0 && addr[1] == 0 && addr[2] == 0);
+}
+
 int aq_nic_ndev_register(struct aq_nic_s *self)
 {
 	int err = 0;
@@ -296,6 +304,12 @@ int aq_nic_ndev_register(struct aq_nic_s *self)
 	if (err)
 		goto err_exit;
 
+	if (!is_valid_ether_addr(self->ndev->dev_addr) ||
+	    !aq_nic_is_valid_ether_addr(self->ndev->dev_addr)) {
+		netdev_warn(self->ndev, "MAC is invalid, will use random.");
+		eth_hw_addr_random(self->ndev);
+	}
+
 #if defined(AQ_CFG_MAC_ADDR_PERMANENT)
 	{
 		static u8 mac_addr_permanent[] = AQ_CFG_MAC_ADDR_PERMANENT;
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
index 017364486703..eeedd8c90067 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_utils_fw2x.c
@@ -283,8 +283,6 @@ static int aq_fw2x_get_mac_permanent(struct aq_hw_s *self, u8 *mac)
 	u32 efuse_addr = aq_hw_read_reg(self, HW_ATL_FW2X_MPI_EFUSE_ADDR);
 	u32 mac_addr[2] = { 0 };
 	int err = 0;
-	u32 h = 0U;
-	u32 l = 0U;
 
 	if (efuse_addr != 0) {
 		err = hw_atl_utils_fw_downld_dwords(self,
@@ -299,26 +297,6 @@ static int aq_fw2x_get_mac_permanent(struct aq_hw_s *self, u8 *mac)
 
 	ether_addr_copy(mac, (u8 *)mac_addr);
 
-	if ((mac[0] & 0x01U) || ((mac[0] | mac[1] | mac[2]) == 0x00U)) {
-		unsigned int rnd = 0;
-
-		get_random_bytes(&rnd, sizeof(unsigned int));
-
-		l = 0xE3000000U | (0xFFFFU & rnd) | (0x00 << 16);
-		h = 0x8001300EU;
-
-		mac[5] = (u8)(0xFFU & l);
-		l >>= 8;
-		mac[4] = (u8)(0xFFU & l);
-		l >>= 8;
-		mac[3] = (u8)(0xFFU & l);
-		l >>= 8;
-		mac[2] = (u8)(0xFFU & l);
-		mac[1] = (u8)(0xFFU & h);
-		h >>= 8;
-		mac[0] = (u8)(0xFFU & h);
-	}
-
 	return err;
 }
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c
index e8f4aad8c1e5..0ffc33bd67d0 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_utils_fw.c
@@ -6,6 +6,7 @@
 #include <linux/iopoll.h>
 
 #include "aq_hw.h"
+#include "aq_hw_utils.h"
 #include "hw_atl/hw_atl_llh.h"
 #include "hw_atl2_utils.h"
 #include "hw_atl2_llh.h"
@@ -212,28 +213,6 @@ static int aq_a2_fw_get_mac_permanent(struct aq_hw_s *self, u8 *mac)
 	hw_atl2_shared_buffer_get(self, mac_address, mac_address);
 	ether_addr_copy(mac, (u8 *)mac_address.aligned.mac_address);
 
-	if ((mac[0] & 0x01U) || ((mac[0] | mac[1] | mac[2]) == 0x00U)) {
-		unsigned int rnd = 0;
-		u32 h;
-		u32 l;
-
-		get_random_bytes(&rnd, sizeof(unsigned int));
-
-		l = 0xE3000000U | (0xFFFFU & rnd) | (0x00 << 16);
-		h = 0x8001300EU;
-
-		mac[5] = (u8)(0xFFU & l);
-		l >>= 8;
-		mac[4] = (u8)(0xFFU & l);
-		l >>= 8;
-		mac[3] = (u8)(0xFFU & l);
-		l >>= 8;
-		mac[2] = (u8)(0xFFU & l);
-		mac[1] = (u8)(0xFFU & h);
-		h >>= 8;
-		mac[0] = (u8)(0xFFU & h);
-	}
-
 	return 0;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 72ef908bb3ff9261dc38d079ef332c91418f8693 Mon Sep 17 00:00:00 2001
From: Luo bin <luobin9@huawei.com>
Date: Fri, 8 May 2020 20:18:50 +0000
Subject: hinic: add three net_device_ops of vf

adds ndo_set_vf_rate/ndo_set_vf_spoofchk/ndo_set_vf_link_state
to configure netdev of virtual function

Signed-off-by: Luo bin <luobin9@huawei.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c |  29 +++
 drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c  |  35 ++-
 drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h  |  21 ++
 drivers/net/ethernet/huawei/hinic/hinic_hw_if.c   |  32 ++-
 drivers/net/ethernet/huawei/hinic/hinic_hw_if.h   |   6 +-
 drivers/net/ethernet/huawei/hinic/hinic_hw_mbox.c |   4 +-
 drivers/net/ethernet/huawei/hinic/hinic_main.c    |  17 +-
 drivers/net/ethernet/huawei/hinic/hinic_port.c    |   8 +-
 drivers/net/ethernet/huawei/hinic/hinic_port.h    |  43 ++++
 drivers/net/ethernet/huawei/hinic/hinic_sriov.c   | 275 ++++++++++++++++++++++
 drivers/net/ethernet/huawei/hinic/hinic_sriov.h   |   7 +
 11 files changed, 453 insertions(+), 24 deletions(-)

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c
index 33c5333657c1..cb5b6e5f787f 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_cmdq.c
@@ -849,6 +849,25 @@ err_init_cmdq:
 	return err;
 }
 
+static int hinic_set_cmdq_depth(struct hinic_hwdev *hwdev, u16 cmdq_depth)
+{
+	struct hinic_cmd_hw_ioctxt hw_ioctxt = { 0 };
+	struct hinic_pfhwdev *pfhwdev;
+
+	pfhwdev = container_of(hwdev, struct hinic_pfhwdev, hwdev);
+
+	hw_ioctxt.func_idx = HINIC_HWIF_FUNC_IDX(hwdev->hwif);
+	hw_ioctxt.ppf_idx = HINIC_HWIF_PPF_IDX(hwdev->hwif);
+
+	hw_ioctxt.set_cmdq_depth = HW_IOCTXT_SET_CMDQ_DEPTH_ENABLE;
+	hw_ioctxt.cmdq_depth = (u8)ilog2(cmdq_depth);
+
+	return hinic_msg_to_mgmt(&pfhwdev->pf_to_mgmt, HINIC_MOD_COMM,
+				 HINIC_COMM_CMD_HWCTXT_SET,
+				 &hw_ioctxt, sizeof(hw_ioctxt), NULL,
+				 NULL, HINIC_MGMT_MSG_SYNC);
+}
+
 /**
  * hinic_init_cmdqs - init all cmdqs
  * @cmdqs: cmdqs to init
@@ -899,8 +918,18 @@ int hinic_init_cmdqs(struct hinic_cmdqs *cmdqs, struct hinic_hwif *hwif,
 
 	hinic_ceq_register_cb(&func_to_io->ceqs, HINIC_CEQ_CMDQ, cmdqs,
 			      cmdq_ceq_handler);
+
+	err = hinic_set_cmdq_depth(hwdev, CMDQ_DEPTH);
+	if (err) {
+		dev_err(&hwif->pdev->dev, "Failed to set cmdq depth\n");
+		goto err_set_cmdq_depth;
+	}
+
 	return 0;
 
+err_set_cmdq_depth:
+	hinic_ceq_unregister_cb(&func_to_io->ceqs, HINIC_CEQ_CMDQ);
+
 err_cmdq_ctxt:
 	hinic_wqs_cmdq_free(&cmdqs->cmdq_pages, cmdqs->saved_wqs,
 			    HINIC_MAX_CMDQ_TYPES);
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
index e5cab58e4ddd..1ce8b8d572cf 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
@@ -44,10 +44,6 @@ enum io_status {
 	IO_RUNNING = 1,
 };
 
-enum hw_ioctxt_set_cmdq_depth {
-	HW_IOCTXT_SET_CMDQ_DEPTH_DEFAULT,
-};
-
 /**
  * get_capability - convert device capabilities to NIC capabilities
  * @hwdev: the HW device to set and convert device capabilities for
@@ -667,6 +663,32 @@ static void free_pfhwdev(struct hinic_pfhwdev *pfhwdev)
 	hinic_pf_to_mgmt_free(&pfhwdev->pf_to_mgmt);
 }
 
+static int hinic_l2nic_reset(struct hinic_hwdev *hwdev)
+{
+	struct hinic_cmd_l2nic_reset l2nic_reset = {0};
+	u16 out_size = sizeof(l2nic_reset);
+	struct hinic_pfhwdev *pfhwdev;
+	int err;
+
+	pfhwdev = container_of(hwdev, struct hinic_pfhwdev, hwdev);
+
+	l2nic_reset.func_id = HINIC_HWIF_FUNC_IDX(hwdev->hwif);
+	/* 0 represents standard l2nic reset flow */
+	l2nic_reset.reset_flag = 0;
+
+	err = hinic_msg_to_mgmt(&pfhwdev->pf_to_mgmt, HINIC_MOD_COMM,
+				HINIC_COMM_CMD_L2NIC_RESET, &l2nic_reset,
+				sizeof(l2nic_reset), &l2nic_reset,
+				&out_size, HINIC_MGMT_MSG_SYNC);
+	if (err || !out_size || l2nic_reset.status) {
+		dev_err(&hwdev->hwif->pdev->dev, "Failed to reset L2NIC resources, err: %d, status: 0x%x, out_size: 0x%x\n",
+			err, l2nic_reset.status, out_size);
+		return -EIO;
+	}
+
+	return 0;
+}
+
 /**
  * hinic_init_hwdev - Initialize the NIC HW
  * @pdev: the NIC pci device
@@ -729,6 +751,10 @@ struct hinic_hwdev *hinic_init_hwdev(struct pci_dev *pdev)
 		goto err_init_pfhwdev;
 	}
 
+	err = hinic_l2nic_reset(hwdev);
+	if (err)
+		goto err_l2nic_reset;
+
 	err = get_dev_cap(hwdev);
 	if (err) {
 		dev_err(&pdev->dev, "Failed to get device capabilities\n");
@@ -759,6 +785,7 @@ err_resources_state:
 err_init_fw_ctxt:
 	hinic_vf_func_free(hwdev);
 err_vf_func_init:
+err_l2nic_reset:
 err_dev_cap:
 	free_pfhwdev(pfhwdev);
 
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
index 531d1072e0df..c8f62a024a58 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
@@ -25,6 +25,7 @@
 
 #define HINIC_PF_SET_VF_ALREADY				0x4
 #define HINIC_MGMT_STATUS_EXIST				0x6
+#define HINIC_MGMT_CMD_UNSUPPORTED			0xFF
 
 struct hinic_cap {
 	u16     max_qps;
@@ -33,6 +34,11 @@ struct hinic_cap {
 	u16     max_vf_qps;
 };
 
+enum hw_ioctxt_set_cmdq_depth {
+	HW_IOCTXT_SET_CMDQ_DEPTH_DEFAULT,
+	HW_IOCTXT_SET_CMDQ_DEPTH_ENABLE,
+};
+
 enum hinic_port_cmd {
 	HINIC_PORT_CMD_VF_REGISTER = 0x0,
 	HINIC_PORT_CMD_VF_UNREGISTER = 0x1,
@@ -86,12 +92,16 @@ enum hinic_port_cmd {
 
 	HINIC_PORT_CMD_FWCTXT_INIT      = 69,
 
+	HINIC_PORT_CMD_ENABLE_SPOOFCHK = 78,
+
 	HINIC_PORT_CMD_GET_MGMT_VERSION = 88,
 
 	HINIC_PORT_CMD_SET_FUNC_STATE   = 93,
 
 	HINIC_PORT_CMD_GET_GLOBAL_QPN   = 102,
 
+	HINIC_PORT_CMD_SET_VF_RATE = 105,
+
 	HINIC_PORT_CMD_SET_VF_VLAN	= 106,
 
 	HINIC_PORT_CMD_CLR_VF_VLAN,
@@ -107,6 +117,8 @@ enum hinic_port_cmd {
 	HINIC_PORT_CMD_GET_CAP          = 170,
 
 	HINIC_PORT_CMD_SET_LRO_TIMER	= 244,
+
+	HINIC_PORT_CMD_SET_VF_MAX_MIN_RATE = 249,
 };
 
 enum hinic_ucode_cmd {
@@ -247,6 +259,15 @@ struct hinic_cmd_hw_ci {
 	u64     ci_addr;
 };
 
+struct hinic_cmd_l2nic_reset {
+	u8	status;
+	u8	version;
+	u8	rsvd0[6];
+
+	u16	func_id;
+	u16	reset_flag;
+};
+
 struct hinic_hwdev {
 	struct hinic_hwif               *hwif;
 	struct msix_entry               *msix_entries;
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c
index 3fbd2eb80582..cf127d896ba6 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.c
@@ -10,6 +10,7 @@
 #include <linux/io.h>
 #include <linux/types.h>
 #include <linux/bitops.h>
+#include <linux/delay.h>
 
 #include "hinic_hw_csr.h"
 #include "hinic_hw_if.h"
@@ -18,6 +19,8 @@
 
 #define VALID_MSIX_IDX(attr, msix_index) ((msix_index) < (attr)->num_irqs)
 
+#define WAIT_HWIF_READY_TIMEOUT	10000
+
 /**
  * hinic_msix_attr_set - set message attribute for msix entry
  * @hwif: the HW interface of a pci function device
@@ -187,20 +190,39 @@ void hinic_set_msix_state(struct hinic_hwif *hwif, u16 msix_idx,
  **/
 static int hwif_ready(struct hinic_hwif *hwif)
 {
-	struct pci_dev *pdev = hwif->pdev;
 	u32 addr, attr1;
 
 	addr   = HINIC_CSR_FUNC_ATTR1_ADDR;
 	attr1  = hinic_hwif_read_reg(hwif, addr);
 
-	if (!HINIC_FA1_GET(attr1, INIT_STATUS)) {
-		dev_err(&pdev->dev, "hwif status is not ready\n");
-		return -EFAULT;
+	if (!HINIC_FA1_GET(attr1, MGMT_INIT_STATUS))
+		return -EBUSY;
+
+	if (HINIC_IS_VF(hwif)) {
+		if (!HINIC_FA1_GET(attr1, PF_INIT_STATUS))
+			return -EBUSY;
 	}
 
 	return 0;
 }
 
+static int wait_hwif_ready(struct hinic_hwif *hwif)
+{
+	unsigned long timeout = 0;
+
+	do {
+		if (!hwif_ready(hwif))
+			return 0;
+
+		usleep_range(999, 1000);
+		timeout++;
+	} while (timeout <= WAIT_HWIF_READY_TIMEOUT);
+
+	dev_err(&hwif->pdev->dev, "Wait for hwif timeout\n");
+
+	return -EBUSY;
+}
+
 /**
  * set_hwif_attr - set the attributes in the relevant members in hwif
  * @hwif: the HW interface of a pci function device
@@ -373,7 +395,7 @@ int hinic_init_hwif(struct hinic_hwif *hwif, struct pci_dev *pdev)
 		goto err_map_intr_bar;
 	}
 
-	err = hwif_ready(hwif);
+	err = wait_hwif_ready(hwif);
 	if (err) {
 		dev_err(&pdev->dev, "HW interface is not ready\n");
 		goto err_hwif_ready;
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h
index 53bb89c1dd26..5bb6ec4dcb7c 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h
@@ -55,13 +55,15 @@
 #define HINIC_FA1_IRQS_PER_FUNC_SHIFT                           20
 #define HINIC_FA1_DMA_ATTR_PER_FUNC_SHIFT                       24
 /* reserved members - off 27 */
-#define HINIC_FA1_INIT_STATUS_SHIFT                             30
+#define HINIC_FA1_MGMT_INIT_STATUS_SHIFT			30
+#define HINIC_FA1_PF_INIT_STATUS_SHIFT				31
 
 #define HINIC_FA1_AEQS_PER_FUNC_MASK                            0x3
 #define HINIC_FA1_CEQS_PER_FUNC_MASK                            0x7
 #define HINIC_FA1_IRQS_PER_FUNC_MASK                            0xF
 #define HINIC_FA1_DMA_ATTR_PER_FUNC_MASK                        0x7
-#define HINIC_FA1_INIT_STATUS_MASK                              0x1
+#define HINIC_FA1_MGMT_INIT_STATUS_MASK                         0x1
+#define HINIC_FA1_PF_INIT_STATUS_MASK				0x1
 
 #define HINIC_FA1_GET(val, member)                              \
 	(((val) >> HINIC_FA1_##member##_SHIFT) & HINIC_FA1_##member##_MASK)
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_mbox.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_mbox.c
index 564fb2294a29..bc2f87e6cb5d 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_mbox.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_mbox.c
@@ -627,7 +627,7 @@ wait_for_mbox_seg_completion(struct hinic_mbox_func_to_func *func_to_func,
 	struct hinic_hwdev *hwdev = func_to_func->hwdev;
 	struct completion *done = &send_mbox->send_done;
 	u32 cnt = 0;
-	ulong jif;
+	unsigned long jif;
 
 	if (poll) {
 		while (cnt < MBOX_MSG_POLLING_TIMEOUT) {
@@ -869,7 +869,7 @@ int hinic_mbox_to_func(struct hinic_mbox_func_to_func *func_to_func,
 {
 	struct hinic_recv_mbox *mbox_for_resp;
 	struct mbox_msg_info msg_info = {0};
-	ulong timeo;
+	unsigned long timeo;
 	int err;
 
 	mbox_for_resp = &func_to_func->mbox_resp[dst_func];
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c
index b66bb86cff96..3d6569d7bac8 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_main.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c
@@ -427,10 +427,6 @@ static int hinic_open(struct net_device *netdev)
 		goto err_func_port_state;
 	}
 
-	if (!HINIC_IS_VF(nic_dev->hwdev->hwif))
-		/* Wait up to 3 sec between port enable to link state */
-		msleep(3000);
-
 	down(&nic_dev->mgmt_lock);
 
 	err = hinic_port_link_state(nic_dev, &link_state);
@@ -766,10 +762,12 @@ static void hinic_set_rx_mode(struct net_device *netdev)
 		  HINIC_RX_MODE_MC |
 		  HINIC_RX_MODE_BC;
 
-	if (netdev->flags & IFF_PROMISC)
-		rx_mode |= HINIC_RX_MODE_PROMISC;
-	else if (netdev->flags & IFF_ALLMULTI)
+	if (netdev->flags & IFF_PROMISC) {
+		if (!HINIC_IS_VF(nic_dev->hwdev->hwif))
+			rx_mode |= HINIC_RX_MODE_PROMISC;
+	} else if (netdev->flags & IFF_ALLMULTI) {
 		rx_mode |= HINIC_RX_MODE_MC_ALL;
+	}
 
 	rx_mode_work->rx_mode = rx_mode;
 
@@ -868,6 +866,9 @@ static const struct net_device_ops hinic_netdev_ops = {
 	.ndo_set_vf_vlan = hinic_ndo_set_vf_vlan,
 	.ndo_get_vf_config = hinic_ndo_get_vf_config,
 	.ndo_set_vf_trust = hinic_ndo_set_vf_trust,
+	.ndo_set_vf_rate = hinic_ndo_set_vf_bw,
+	.ndo_set_vf_spoofchk = hinic_ndo_set_vf_spoofchk,
+	.ndo_set_vf_link_state = hinic_ndo_set_vf_link_state,
 };
 
 static const struct net_device_ops hinicvf_netdev_ops = {
@@ -1232,6 +1233,8 @@ static void hinic_remove(struct pci_dev *pdev)
 
 	unregister_netdev(netdev);
 
+	hinic_port_del_mac(nic_dev, netdev->dev_addr, 0);
+
 	hinic_hwdev_cb_unregister(nic_dev->hwdev,
 				  HINIC_MGMT_MSG_CMD_LINK_STATUS);
 
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_port.c b/drivers/net/ethernet/huawei/hinic/hinic_port.c
index b7fe0adcc29a..714d8279c591 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_port.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_port.c
@@ -66,15 +66,15 @@ static int change_mac(struct hinic_dev *nic_dev, const u8 *addr,
 		return -EFAULT;
 	}
 
-	if (cmd == HINIC_PORT_CMD_SET_MAC && port_mac_cmd.status ==
-	    HINIC_PF_SET_VF_ALREADY) {
-		dev_warn(&pdev->dev, "PF has already set VF mac, Ignore set operation\n");
+	if (port_mac_cmd.status == HINIC_PF_SET_VF_ALREADY) {
+		dev_warn(&pdev->dev, "PF has already set VF mac, ignore %s operation\n",
+			 (op == MAC_SET) ? "set" : "del");
 		return HINIC_PF_SET_VF_ALREADY;
 	}
 
 	if (cmd == HINIC_PORT_CMD_SET_MAC && port_mac_cmd.status ==
 	    HINIC_MGMT_STATUS_EXIST)
-		dev_warn(&pdev->dev, "MAC is repeated. Ignore set operation\n");
+		dev_warn(&pdev->dev, "MAC is repeated, ignore set operation\n");
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_port.h b/drivers/net/ethernet/huawei/hinic/hinic_port.h
index 5ad04fb6722a..f2781521970e 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_port.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_port.h
@@ -506,6 +506,49 @@ struct hinic_cmd_vport_stats {
 	struct hinic_vport_stats stats;
 };
 
+struct hinic_tx_rate_cfg_max_min {
+	u8	status;
+	u8	version;
+	u8	rsvd0[6];
+
+	u16	func_id;
+	u16	rsvd1;
+	u32	min_rate;
+	u32	max_rate;
+	u8	rsvd2[8];
+};
+
+struct hinic_tx_rate_cfg {
+	u8	status;
+	u8	version;
+	u8	rsvd0[6];
+
+	u16	func_id;
+	u16	rsvd1;
+	u32	tx_rate;
+};
+
+enum nic_speed_level {
+	LINK_SPEED_10MB = 0,
+	LINK_SPEED_100MB,
+	LINK_SPEED_1GB,
+	LINK_SPEED_10GB,
+	LINK_SPEED_25GB,
+	LINK_SPEED_40GB,
+	LINK_SPEED_100GB,
+	LINK_SPEED_LEVELS,
+};
+
+struct hinic_spoofchk_set {
+	u8	status;
+	u8	version;
+	u8	rsvd0[6];
+
+	u8	state;
+	u8	rsvd1;
+	u16	func_id;
+};
+
 int hinic_port_add_mac(struct hinic_dev *nic_dev, const u8 *addr,
 		       u16 vlan_id);
 
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_sriov.c b/drivers/net/ethernet/huawei/hinic/hinic_sriov.c
index fd4aaf43874a..efab2dd2c889 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_sriov.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_sriov.c
@@ -22,6 +22,7 @@ MODULE_PARM_DESC(set_vf_link_state, "Set vf link state, 0 represents link auto,
 
 #define HINIC_VLAN_PRIORITY_SHIFT 13
 #define HINIC_ADD_VLAN_IN_MAC 0x8000
+#define HINIC_TX_RATE_TABLE_FULL 12
 
 static int hinic_set_mac(struct hinic_hwdev *hwdev, const u8 *mac_addr,
 			 u16 vlan_id, u16 func_id)
@@ -129,6 +130,84 @@ static int hinic_set_vf_vlan(struct hinic_hwdev *hwdev, bool add, u16 vid,
 	return 0;
 }
 
+static int hinic_set_vf_tx_rate_max_min(struct hinic_hwdev *hwdev, u16 vf_id,
+					u32 max_rate, u32 min_rate)
+{
+	struct hinic_func_to_io *nic_io = &hwdev->func_to_io;
+	struct hinic_tx_rate_cfg_max_min rate_cfg = {0};
+	u16 out_size = sizeof(rate_cfg);
+	int err;
+
+	rate_cfg.func_id = hinic_glb_pf_vf_offset(hwdev->hwif) + vf_id;
+	rate_cfg.max_rate = max_rate;
+	rate_cfg.min_rate = min_rate;
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_SET_VF_MAX_MIN_RATE,
+				 &rate_cfg, sizeof(rate_cfg), &rate_cfg,
+				 &out_size);
+	if ((rate_cfg.status != HINIC_MGMT_CMD_UNSUPPORTED &&
+	     rate_cfg.status) || err || !out_size) {
+		dev_err(&hwdev->hwif->pdev->dev, "Failed to set VF(%d) max rate(%d), min rate(%d), err: %d, status: 0x%x, out size: 0x%x\n",
+			HW_VF_ID_TO_OS(vf_id), max_rate, min_rate, err,
+			rate_cfg.status, out_size);
+		return -EIO;
+	}
+
+	if (!rate_cfg.status) {
+		nic_io->vf_infos[HW_VF_ID_TO_OS(vf_id)].max_rate = max_rate;
+		nic_io->vf_infos[HW_VF_ID_TO_OS(vf_id)].min_rate = min_rate;
+	}
+
+	return rate_cfg.status;
+}
+
+static int hinic_set_vf_rate_limit(struct hinic_hwdev *hwdev, u16 vf_id,
+				   u32 tx_rate)
+{
+	struct hinic_func_to_io *nic_io = &hwdev->func_to_io;
+	struct hinic_tx_rate_cfg rate_cfg = {0};
+	u16 out_size = sizeof(rate_cfg);
+	int err;
+
+	rate_cfg.func_id = hinic_glb_pf_vf_offset(hwdev->hwif) + vf_id;
+	rate_cfg.tx_rate = tx_rate;
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_SET_VF_RATE,
+				 &rate_cfg, sizeof(rate_cfg), &rate_cfg,
+				 &out_size);
+	if (err || !out_size || rate_cfg.status) {
+		dev_err(&hwdev->hwif->pdev->dev, "Failed to set VF(%d) rate(%d), err: %d, status: 0x%x, out size: 0x%x\n",
+			HW_VF_ID_TO_OS(vf_id), tx_rate, err, rate_cfg.status,
+			out_size);
+		if (rate_cfg.status)
+			return rate_cfg.status;
+
+		return -EIO;
+	}
+
+	nic_io->vf_infos[HW_VF_ID_TO_OS(vf_id)].max_rate = tx_rate;
+	nic_io->vf_infos[HW_VF_ID_TO_OS(vf_id)].min_rate = 0;
+
+	return 0;
+}
+
+static int hinic_set_vf_tx_rate(struct hinic_hwdev *hwdev, u16 vf_id,
+				u32 max_rate, u32 min_rate)
+{
+	int err;
+
+	err = hinic_set_vf_tx_rate_max_min(hwdev, vf_id, max_rate, min_rate);
+	if (err != HINIC_MGMT_CMD_UNSUPPORTED)
+		return err;
+
+	if (min_rate) {
+		dev_err(&hwdev->hwif->pdev->dev, "Current firmware doesn't support to set min tx rate\n");
+		return -EOPNOTSUPP;
+	}
+
+	dev_info(&hwdev->hwif->pdev->dev, "Current firmware doesn't support to set min tx rate, force min_tx_rate = max_tx_rate\n");
+
+	return hinic_set_vf_rate_limit(hwdev, vf_id, max_rate);
+}
+
 static int hinic_init_vf_config(struct hinic_hwdev *hwdev, u16 vf_id)
 {
 	struct vf_data_storage *vf_info;
@@ -160,6 +239,17 @@ static int hinic_init_vf_config(struct hinic_hwdev *hwdev, u16 vf_id)
 		}
 	}
 
+	if (vf_info->max_rate) {
+		err = hinic_set_vf_tx_rate(hwdev, vf_id, vf_info->max_rate,
+					   vf_info->min_rate);
+		if (err) {
+			dev_err(&hwdev->hwif->pdev->dev, "Failed to set VF %d max rate: %d, min rate: %d\n",
+				HW_VF_ID_TO_OS(vf_id), vf_info->max_rate,
+				vf_info->min_rate);
+			return err;
+		}
+	}
+
 	return 0;
 }
 
@@ -700,6 +790,185 @@ int hinic_ndo_set_vf_trust(struct net_device *netdev, int vf, bool setting)
 	return err;
 }
 
+int hinic_ndo_set_vf_bw(struct net_device *netdev,
+			int vf, int min_tx_rate, int max_tx_rate)
+{
+	u32 speeds[] = {SPEED_10, SPEED_100, SPEED_1000, SPEED_10000,
+			SPEED_25000, SPEED_40000, SPEED_100000};
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	struct hinic_port_cap port_cap = { 0 };
+	enum hinic_port_link_state link_state;
+	int err;
+
+	if (vf >= nic_dev->sriov_info.num_vfs) {
+		netif_err(nic_dev, drv, netdev, "VF number must be less than %d\n",
+			  nic_dev->sriov_info.num_vfs);
+		return -EINVAL;
+	}
+
+	if (max_tx_rate < min_tx_rate) {
+		netif_err(nic_dev, drv, netdev, "Max rate %d must be greater than or equal to min rate %d\n",
+			  max_tx_rate, min_tx_rate);
+		return -EINVAL;
+	}
+
+	err = hinic_port_link_state(nic_dev, &link_state);
+	if (err) {
+		netif_err(nic_dev, drv, netdev,
+			  "Get link status failed when setting vf tx rate\n");
+		return -EIO;
+	}
+
+	if (link_state == HINIC_LINK_STATE_DOWN) {
+		netif_err(nic_dev, drv, netdev,
+			  "Link status must be up when setting vf tx rate\n");
+		return -EPERM;
+	}
+
+	err = hinic_port_get_cap(nic_dev, &port_cap);
+	if (err || port_cap.speed > LINK_SPEED_100GB)
+		return -EIO;
+
+	/* rate limit cannot be less than 0 and greater than link speed */
+	if (max_tx_rate < 0 || max_tx_rate > speeds[port_cap.speed]) {
+		netif_err(nic_dev, drv, netdev, "Max tx rate must be in [0 - %d]\n",
+			  speeds[port_cap.speed]);
+		return -EINVAL;
+	}
+
+	err = hinic_set_vf_tx_rate(nic_dev->hwdev, OS_VF_ID_TO_HW(vf),
+				   max_tx_rate, min_tx_rate);
+	if (err) {
+		netif_err(nic_dev, drv, netdev,
+			  "Unable to set VF %d max rate %d min rate %d%s\n",
+			  vf, max_tx_rate, min_tx_rate,
+			  err == HINIC_TX_RATE_TABLE_FULL ?
+			  ", tx rate profile is full" : "");
+		return -EIO;
+	}
+
+	netif_info(nic_dev, drv, netdev,
+		   "Set VF %d max tx rate %d min tx rate %d successfully\n",
+		   vf, max_tx_rate, min_tx_rate);
+
+	return 0;
+}
+
+static int hinic_set_vf_spoofchk(struct hinic_hwdev *hwdev, u16 vf_id,
+				 bool spoofchk)
+{
+	struct hinic_spoofchk_set spoofchk_cfg = {0};
+	struct vf_data_storage *vf_infos = NULL;
+	u16 out_size = sizeof(spoofchk_cfg);
+	int err;
+
+	if (!hwdev)
+		return -EINVAL;
+
+	vf_infos = hwdev->func_to_io.vf_infos;
+
+	spoofchk_cfg.func_id = hinic_glb_pf_vf_offset(hwdev->hwif) + vf_id;
+	spoofchk_cfg.state = spoofchk ? 1 : 0;
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_ENABLE_SPOOFCHK,
+				 &spoofchk_cfg, sizeof(spoofchk_cfg),
+				 &spoofchk_cfg, &out_size);
+	if (spoofchk_cfg.status == HINIC_MGMT_CMD_UNSUPPORTED) {
+		err = HINIC_MGMT_CMD_UNSUPPORTED;
+	} else if (err || !out_size || spoofchk_cfg.status) {
+		dev_err(&hwdev->hwif->pdev->dev, "Failed to set VF(%d) spoofchk, err: %d, status: 0x%x, out size: 0x%x\n",
+			HW_VF_ID_TO_OS(vf_id), err, spoofchk_cfg.status,
+			out_size);
+		err = -EIO;
+	}
+
+	vf_infos[HW_VF_ID_TO_OS(vf_id)].spoofchk = spoofchk;
+
+	return err;
+}
+
+int hinic_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	struct hinic_sriov_info *sriov_info;
+	bool cur_spoofchk;
+	int err;
+
+	sriov_info = &nic_dev->sriov_info;
+	if (vf >= sriov_info->num_vfs)
+		return -EINVAL;
+
+	cur_spoofchk = nic_dev->hwdev->func_to_io.vf_infos[vf].spoofchk;
+
+	/* same request, so just return success */
+	if ((setting && cur_spoofchk) || (!setting && !cur_spoofchk))
+		return 0;
+
+	err = hinic_set_vf_spoofchk(sriov_info->hwdev,
+				    OS_VF_ID_TO_HW(vf), setting);
+
+	if (!err) {
+		netif_info(nic_dev, drv, netdev, "Set VF %d spoofchk %s successfully\n",
+			   vf, setting ? "on" : "off");
+	} else if (err == HINIC_MGMT_CMD_UNSUPPORTED) {
+		netif_err(nic_dev, drv, netdev,
+			  "Current firmware doesn't support to set vf spoofchk, need to upgrade latest firmware version\n");
+		err = -EOPNOTSUPP;
+	}
+
+	return err;
+}
+
+static int hinic_set_vf_link_state(struct hinic_hwdev *hwdev, u16 vf_id,
+				   int link)
+{
+	struct hinic_func_to_io *nic_io = &hwdev->func_to_io;
+	struct vf_data_storage *vf_infos = nic_io->vf_infos;
+	u8 link_status = 0;
+
+	switch (link) {
+	case HINIC_IFLA_VF_LINK_STATE_AUTO:
+		vf_infos[HW_VF_ID_TO_OS(vf_id)].link_forced = false;
+		vf_infos[HW_VF_ID_TO_OS(vf_id)].link_up = nic_io->link_status ?
+			true : false;
+		link_status = nic_io->link_status;
+		break;
+	case HINIC_IFLA_VF_LINK_STATE_ENABLE:
+		vf_infos[HW_VF_ID_TO_OS(vf_id)].link_forced = true;
+		vf_infos[HW_VF_ID_TO_OS(vf_id)].link_up = true;
+		link_status = HINIC_LINK_UP;
+		break;
+	case HINIC_IFLA_VF_LINK_STATE_DISABLE:
+		vf_infos[HW_VF_ID_TO_OS(vf_id)].link_forced = true;
+		vf_infos[HW_VF_ID_TO_OS(vf_id)].link_up = false;
+		link_status = HINIC_LINK_DOWN;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* Notify the VF of its new link state */
+	hinic_notify_vf_link_status(hwdev, vf_id, link_status);
+
+	return 0;
+}
+
+int hinic_ndo_set_vf_link_state(struct net_device *netdev, int vf_id, int link)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	struct hinic_sriov_info *sriov_info;
+
+	sriov_info = &nic_dev->sriov_info;
+
+	if (vf_id >= sriov_info->num_vfs) {
+		netif_err(nic_dev, drv, netdev,
+			  "Invalid VF Identifier %d\n", vf_id);
+		return -EINVAL;
+	}
+
+	return hinic_set_vf_link_state(sriov_info->hwdev,
+				      OS_VF_ID_TO_HW(vf_id), link);
+}
+
 /* pf receive message from vf */
 static int nic_pf_mbox_handler(void *hwdev, u16 vf_id, u8 cmd, void *buf_in,
 			       u16 in_size, void *buf_out, u16 *out_size)
@@ -801,6 +1070,12 @@ static void hinic_clear_vf_infos(struct hinic_dev *nic_dev, u16 vf_id)
 	if (hinic_vf_info_vlanprio(nic_dev->hwdev, vf_id))
 		hinic_kill_vf_vlan(nic_dev->hwdev, vf_id);
 
+	if (vf_infos->max_rate)
+		hinic_set_vf_tx_rate(nic_dev->hwdev, vf_id, 0, 0);
+
+	if (vf_infos->spoofchk)
+		hinic_set_vf_spoofchk(nic_dev->hwdev, vf_id, false);
+
 	if (vf_infos->trust)
 		hinic_set_vf_trust(nic_dev->hwdev, vf_id, false);
 
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_sriov.h b/drivers/net/ethernet/huawei/hinic/hinic_sriov.h
index 64affc7474b5..ba627a362f9a 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_sriov.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_sriov.h
@@ -86,6 +86,13 @@ int hinic_ndo_get_vf_config(struct net_device *netdev,
 
 int hinic_ndo_set_vf_trust(struct net_device *netdev, int vf, bool setting);
 
+int hinic_ndo_set_vf_bw(struct net_device *netdev,
+			int vf, int min_tx_rate, int max_tx_rate);
+
+int hinic_ndo_set_vf_spoofchk(struct net_device *netdev, int vf, bool setting);
+
+int hinic_ndo_set_vf_link_state(struct net_device *netdev, int vf_id, int link);
+
 void hinic_notify_all_vfs_link_changed(struct hinic_hwdev *hwdev,
 				       u8 link_status);
 
-- 
cgit v1.2.3-59-g8ed1b


From 18aa23b31f6c8e3f4b01b70a8a4eab873e2eb710 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Sat, 9 May 2020 23:06:02 +0300
Subject: mlxsw: spectrum_matchall: Restrict sample action to be allowed only
 on ingress

HW supports packet sampling on ingress only. Check and fail if user
is adding sample on egress.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
index da1c05f44cec..c75661521bbc 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
@@ -212,6 +212,11 @@ int mlxsw_sp_mall_replace(struct mlxsw_sp_flow_block *block,
 		mall_entry->mirror.to_dev = act->dev;
 	} else if (act->id == FLOW_ACTION_SAMPLE &&
 		   protocol == htons(ETH_P_ALL)) {
+		if (!mall_entry->ingress) {
+			NL_SET_ERR_MSG(f->common.extack, "Sample is not supported on egress");
+			err = -EOPNOTSUPP;
+			goto errout;
+		}
 		if (act->sample.rate > MLXSW_REG_MPSC_RATE_MAX) {
 			NL_SET_ERR_MSG(f->common.extack, "Sample rate not supported");
 			err = -EOPNOTSUPP;
-- 
cgit v1.2.3-59-g8ed1b


From 593bb843798636131bd6d9eb1633b1ee55a8a53b Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Sat, 9 May 2020 23:06:03 +0300
Subject: mlxsw: spectrum_flower: Expose a function to get min and max rule
 priority

Introduce an infrastructure that allows to get minimum and maximum
rule priority for specified chain. This is going to be used by
a subsequent patch to enforce ordering between flower and
matchall filters.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h     |  7 ++++
 drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c | 13 +++++++-
 .../ethernet/mellanox/mlxsw/spectrum_acl_tcam.c    | 39 +++++++++++++++++++---
 .../ethernet/mellanox/mlxsw/spectrum_acl_tcam.h    |  3 +-
 .../net/ethernet/mellanox/mlxsw/spectrum_flower.c  | 20 +++++++++++
 5 files changed, 75 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index a12ca673c224..d9a963c77401 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -739,6 +739,9 @@ mlxsw_sp_acl_ruleset_get(struct mlxsw_sp *mlxsw_sp,
 void mlxsw_sp_acl_ruleset_put(struct mlxsw_sp *mlxsw_sp,
 			      struct mlxsw_sp_acl_ruleset *ruleset);
 u16 mlxsw_sp_acl_ruleset_group_id(struct mlxsw_sp_acl_ruleset *ruleset);
+void mlxsw_sp_acl_ruleset_prio_get(struct mlxsw_sp_acl_ruleset *ruleset,
+				   unsigned int *p_min_prio,
+				   unsigned int *p_max_prio);
 
 struct mlxsw_sp_acl_rule_info *
 mlxsw_sp_acl_rulei_create(struct mlxsw_sp_acl *acl,
@@ -912,6 +915,10 @@ int mlxsw_sp_flower_tmplt_create(struct mlxsw_sp *mlxsw_sp,
 void mlxsw_sp_flower_tmplt_destroy(struct mlxsw_sp *mlxsw_sp,
 				   struct mlxsw_sp_flow_block *block,
 				   struct flow_cls_offload *f);
+int mlxsw_sp_flower_prio_get(struct mlxsw_sp *mlxsw_sp,
+			     struct mlxsw_sp_flow_block *block,
+			     u32 chain_index, unsigned int *p_min_prio,
+			     unsigned int *p_max_prio);
 
 /* spectrum_qdisc.c */
 int mlxsw_sp_tc_qdisc_init(struct mlxsw_sp_port *mlxsw_sp_port);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
index c61f78e30397..47da9ee0045d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl.c
@@ -51,6 +51,8 @@ struct mlxsw_sp_acl_ruleset {
 	struct mlxsw_sp_acl_ruleset_ht_key ht_key;
 	struct rhashtable rule_ht;
 	unsigned int ref_count;
+	unsigned int min_prio;
+	unsigned int max_prio;
 	unsigned long priv[];
 	/* priv has to be always the last item */
 };
@@ -178,7 +180,8 @@ mlxsw_sp_acl_ruleset_create(struct mlxsw_sp *mlxsw_sp,
 		goto err_rhashtable_init;
 
 	err = ops->ruleset_add(mlxsw_sp, &acl->tcam, ruleset->priv,
-			       tmplt_elusage);
+			       tmplt_elusage, &ruleset->min_prio,
+			       &ruleset->max_prio);
 	if (err)
 		goto err_ops_ruleset_add;
 
@@ -293,6 +296,14 @@ u16 mlxsw_sp_acl_ruleset_group_id(struct mlxsw_sp_acl_ruleset *ruleset)
 	return ops->ruleset_group_id(ruleset->priv);
 }
 
+void mlxsw_sp_acl_ruleset_prio_get(struct mlxsw_sp_acl_ruleset *ruleset,
+				   unsigned int *p_min_prio,
+				   unsigned int *p_max_prio)
+{
+	*p_min_prio = ruleset->min_prio;
+	*p_max_prio = ruleset->max_prio;
+}
+
 struct mlxsw_sp_acl_rule_info *
 mlxsw_sp_acl_rulei_create(struct mlxsw_sp_acl *acl,
 			  struct mlxsw_afa_block *afa_block)
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
index a6e30e020b5c..5c020403342f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.c
@@ -179,6 +179,8 @@ struct mlxsw_sp_acl_tcam_vgroup {
 	bool tmplt_elusage_set;
 	struct mlxsw_afk_element_usage tmplt_elusage;
 	bool vregion_rehash_enabled;
+	unsigned int *p_min_prio;
+	unsigned int *p_max_prio;
 };
 
 struct mlxsw_sp_acl_tcam_rehash_ctx {
@@ -316,13 +318,17 @@ mlxsw_sp_acl_tcam_vgroup_add(struct mlxsw_sp *mlxsw_sp,
 			     const struct mlxsw_sp_acl_tcam_pattern *patterns,
 			     unsigned int patterns_count,
 			     struct mlxsw_afk_element_usage *tmplt_elusage,
-			     bool vregion_rehash_enabled)
+			     bool vregion_rehash_enabled,
+			     unsigned int *p_min_prio,
+			     unsigned int *p_max_prio)
 {
 	int err;
 
 	vgroup->patterns = patterns;
 	vgroup->patterns_count = patterns_count;
 	vgroup->vregion_rehash_enabled = vregion_rehash_enabled;
+	vgroup->p_min_prio = p_min_prio;
+	vgroup->p_max_prio = p_max_prio;
 
 	if (tmplt_elusage) {
 		vgroup->tmplt_elusage_set = true;
@@ -416,6 +422,21 @@ mlxsw_sp_acl_tcam_vregion_max_prio(struct mlxsw_sp_acl_tcam_vregion *vregion)
 	return vchunk->priority;
 }
 
+static void
+mlxsw_sp_acl_tcam_vgroup_prio_update(struct mlxsw_sp_acl_tcam_vgroup *vgroup)
+{
+	struct mlxsw_sp_acl_tcam_vregion *vregion;
+
+	if (list_empty(&vgroup->vregion_list))
+		return;
+	vregion = list_first_entry(&vgroup->vregion_list,
+				   typeof(*vregion), list);
+	*vgroup->p_min_prio = mlxsw_sp_acl_tcam_vregion_prio(vregion);
+	vregion = list_last_entry(&vgroup->vregion_list,
+				  typeof(*vregion), list);
+	*vgroup->p_max_prio = mlxsw_sp_acl_tcam_vregion_max_prio(vregion);
+}
+
 static int
 mlxsw_sp_acl_tcam_group_region_attach(struct mlxsw_sp *mlxsw_sp,
 				      struct mlxsw_sp_acl_tcam_group *group,
@@ -1035,6 +1056,7 @@ mlxsw_sp_acl_tcam_vchunk_create(struct mlxsw_sp *mlxsw_sp,
 	}
 	list_add_tail(&vchunk->list, pos);
 	mutex_unlock(&vregion->lock);
+	mlxsw_sp_acl_tcam_vgroup_prio_update(vgroup);
 
 	return vchunk;
 
@@ -1066,6 +1088,7 @@ mlxsw_sp_acl_tcam_vchunk_destroy(struct mlxsw_sp *mlxsw_sp,
 			       mlxsw_sp_acl_tcam_vchunk_ht_params);
 	mlxsw_sp_acl_tcam_vregion_put(mlxsw_sp, vchunk->vregion);
 	kfree(vchunk);
+	mlxsw_sp_acl_tcam_vgroup_prio_update(vgroup);
 }
 
 static struct mlxsw_sp_acl_tcam_vchunk *
@@ -1582,14 +1605,17 @@ static int
 mlxsw_sp_acl_tcam_flower_ruleset_add(struct mlxsw_sp *mlxsw_sp,
 				     struct mlxsw_sp_acl_tcam *tcam,
 				     void *ruleset_priv,
-				     struct mlxsw_afk_element_usage *tmplt_elusage)
+				     struct mlxsw_afk_element_usage *tmplt_elusage,
+				     unsigned int *p_min_prio,
+				     unsigned int *p_max_prio)
 {
 	struct mlxsw_sp_acl_tcam_flower_ruleset *ruleset = ruleset_priv;
 
 	return mlxsw_sp_acl_tcam_vgroup_add(mlxsw_sp, tcam, &ruleset->vgroup,
 					    mlxsw_sp_acl_tcam_patterns,
 					    MLXSW_SP_ACL_TCAM_PATTERNS_COUNT,
-					    tmplt_elusage, true);
+					    tmplt_elusage, true,
+					    p_min_prio, p_max_prio);
 }
 
 static void
@@ -1698,7 +1724,9 @@ static int
 mlxsw_sp_acl_tcam_mr_ruleset_add(struct mlxsw_sp *mlxsw_sp,
 				 struct mlxsw_sp_acl_tcam *tcam,
 				 void *ruleset_priv,
-				 struct mlxsw_afk_element_usage *tmplt_elusage)
+				 struct mlxsw_afk_element_usage *tmplt_elusage,
+				 unsigned int *p_min_prio,
+				 unsigned int *p_max_prio)
 {
 	struct mlxsw_sp_acl_tcam_mr_ruleset *ruleset = ruleset_priv;
 	int err;
@@ -1706,7 +1734,8 @@ mlxsw_sp_acl_tcam_mr_ruleset_add(struct mlxsw_sp *mlxsw_sp,
 	err = mlxsw_sp_acl_tcam_vgroup_add(mlxsw_sp, tcam, &ruleset->vgroup,
 					   mlxsw_sp_acl_tcam_patterns,
 					   MLXSW_SP_ACL_TCAM_PATTERNS_COUNT,
-					   tmplt_elusage, false);
+					   tmplt_elusage, false,
+					   p_min_prio, p_max_prio);
 	if (err)
 		return err;
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h
index 96437992b102..a41df10ade9b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_acl_tcam.h
@@ -42,7 +42,8 @@ struct mlxsw_sp_acl_profile_ops {
 	size_t ruleset_priv_size;
 	int (*ruleset_add)(struct mlxsw_sp *mlxsw_sp,
 			   struct mlxsw_sp_acl_tcam *tcam, void *ruleset_priv,
-			   struct mlxsw_afk_element_usage *tmplt_elusage);
+			   struct mlxsw_afk_element_usage *tmplt_elusage,
+			   unsigned int *p_min_prio, unsigned int *p_max_prio);
 	void (*ruleset_del)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv);
 	int (*ruleset_bind)(struct mlxsw_sp *mlxsw_sp, void *ruleset_priv,
 			    struct mlxsw_sp_port *mlxsw_sp_port,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
index 0897ca1967ab..18d22217e435 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
@@ -647,3 +647,23 @@ void mlxsw_sp_flower_tmplt_destroy(struct mlxsw_sp *mlxsw_sp,
 	mlxsw_sp_acl_ruleset_put(mlxsw_sp, ruleset);
 	mlxsw_sp_acl_ruleset_put(mlxsw_sp, ruleset);
 }
+
+int mlxsw_sp_flower_prio_get(struct mlxsw_sp *mlxsw_sp,
+			     struct mlxsw_sp_flow_block *block,
+			     u32 chain_index, unsigned int *p_min_prio,
+			     unsigned int *p_max_prio)
+{
+	struct mlxsw_sp_acl_ruleset *ruleset;
+
+	ruleset = mlxsw_sp_acl_ruleset_lookup(mlxsw_sp, block,
+					      chain_index,
+					      MLXSW_SP_ACL_PROFILE_FLOWER);
+	if (IS_ERR(ruleset))
+		/* In case there are no flower rules, the caller
+		 * receives -ENOENT to indicate there is no need
+		 * to check the priorities.
+		 */
+		return PTR_ERR(ruleset);
+	mlxsw_sp_acl_ruleset_prio_get(ruleset, p_min_prio, p_max_prio);
+	return 0;
+}
-- 
cgit v1.2.3-59-g8ed1b


From 5a2939b9d7ddbdc399ce42cae8150c424e0bd764 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Sat, 9 May 2020 23:06:04 +0300
Subject: mlxsw: spectrum_matchall: Put matchall list into substruct of flow
 struct

As there are going to be other matchall specific fields in flow
structure, put the existing list field into matchall substruct.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h          |  4 +++-
 drivers/net/ethernet/mellanox/mlxsw/spectrum_flow.c     |  2 +-
 drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c | 10 +++++-----
 3 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index d9a963c77401..553693469805 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -636,7 +636,9 @@ struct mlxsw_sp_acl_rule_info {
 /* spectrum_flow.c */
 struct mlxsw_sp_flow_block {
 	struct list_head binding_list;
-	struct list_head mall_list;
+	struct {
+		struct list_head list;
+	} mall;
 	struct mlxsw_sp_acl_ruleset *ruleset_zero;
 	struct mlxsw_sp *mlxsw_sp;
 	unsigned int rule_count;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flow.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flow.c
index ecab581ff956..76644f6a8121 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flow.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flow.c
@@ -18,7 +18,7 @@ mlxsw_sp_flow_block_create(struct mlxsw_sp *mlxsw_sp, struct net *net)
 	if (!block)
 		return NULL;
 	INIT_LIST_HEAD(&block->binding_list);
-	INIT_LIST_HEAD(&block->mall_list);
+	INIT_LIST_HEAD(&block->mall.list);
 	block->mlxsw_sp = mlxsw_sp;
 	block->net = net;
 	return block;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
index c75661521bbc..d64ee31a611c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
@@ -37,7 +37,7 @@ mlxsw_sp_mall_entry_find(struct mlxsw_sp_flow_block *block, unsigned long cookie
 {
 	struct mlxsw_sp_mall_entry *mall_entry;
 
-	list_for_each_entry(mall_entry, &block->mall_list, list)
+	list_for_each_entry(mall_entry, &block->mall.list, list)
 		if (mall_entry->cookie == cookie)
 			return mall_entry;
 
@@ -244,7 +244,7 @@ int mlxsw_sp_mall_replace(struct mlxsw_sp_flow_block *block,
 		block->egress_blocker_rule_count++;
 	else
 		block->ingress_blocker_rule_count++;
-	list_add_tail(&mall_entry->list, &block->mall_list);
+	list_add_tail(&mall_entry->list, &block->mall.list);
 	return 0;
 
 rollback:
@@ -285,7 +285,7 @@ int mlxsw_sp_mall_port_bind(struct mlxsw_sp_flow_block *block,
 	struct mlxsw_sp_mall_entry *mall_entry;
 	int err;
 
-	list_for_each_entry(mall_entry, &block->mall_list, list) {
+	list_for_each_entry(mall_entry, &block->mall.list, list) {
 		err = mlxsw_sp_mall_port_rule_add(mlxsw_sp_port, mall_entry);
 		if (err)
 			goto rollback;
@@ -293,7 +293,7 @@ int mlxsw_sp_mall_port_bind(struct mlxsw_sp_flow_block *block,
 	return 0;
 
 rollback:
-	list_for_each_entry_continue_reverse(mall_entry, &block->mall_list,
+	list_for_each_entry_continue_reverse(mall_entry, &block->mall.list,
 					     list)
 		mlxsw_sp_mall_port_rule_del(mlxsw_sp_port, mall_entry);
 	return err;
@@ -304,6 +304,6 @@ void mlxsw_sp_mall_port_unbind(struct mlxsw_sp_flow_block *block,
 {
 	struct mlxsw_sp_mall_entry *mall_entry;
 
-	list_for_each_entry(mall_entry, &block->mall_list, list)
+	list_for_each_entry(mall_entry, &block->mall.list, list)
 		mlxsw_sp_mall_port_rule_del(mlxsw_sp_port, mall_entry);
 }
-- 
cgit v1.2.3-59-g8ed1b


From aed65285fb9e16328f9bbf14394fef5ddbe82815 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Sat, 9 May 2020 23:06:05 +0300
Subject: mlxsw: spectrum_matchall: Expose a function to get min and max rule
 priority

Introduce an infrastructure that allows to get minimum and maximum
rule priority for specified chain. This is going to be used by
a subsequent patch to enforce ordering between flower and
matchall filters.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h     |  4 +++
 .../ethernet/mellanox/mlxsw/spectrum_matchall.c    | 34 ++++++++++++++++++++++
 2 files changed, 38 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 553693469805..456dbaa5ee26 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -638,6 +638,8 @@ struct mlxsw_sp_flow_block {
 	struct list_head binding_list;
 	struct {
 		struct list_head list;
+		unsigned int min_prio;
+		unsigned int max_prio;
 	} mall;
 	struct mlxsw_sp_acl_ruleset *ruleset_zero;
 	struct mlxsw_sp *mlxsw_sp;
@@ -900,6 +902,8 @@ int mlxsw_sp_mall_port_bind(struct mlxsw_sp_flow_block *block,
 			    struct mlxsw_sp_port *mlxsw_sp_port);
 void mlxsw_sp_mall_port_unbind(struct mlxsw_sp_flow_block *block,
 			       struct mlxsw_sp_port *mlxsw_sp_port);
+int mlxsw_sp_mall_prio_get(struct mlxsw_sp_flow_block *block, u32 chain_index,
+			   unsigned int *p_min_prio, unsigned int *p_max_prio);
 
 /* spectrum_flower.c */
 int mlxsw_sp_flower_replace(struct mlxsw_sp *mlxsw_sp,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
index d64ee31a611c..b11bab76b2e1 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
@@ -23,6 +23,7 @@ struct mlxsw_sp_mall_mirror_entry {
 struct mlxsw_sp_mall_entry {
 	struct list_head list;
 	unsigned long cookie;
+	unsigned int priority;
 	enum mlxsw_sp_mall_action_type type;
 	bool ingress;
 	union {
@@ -175,6 +176,22 @@ mlxsw_sp_mall_port_rule_del(struct mlxsw_sp_port *mlxsw_sp_port,
 	}
 }
 
+static void mlxsw_sp_mall_prio_update(struct mlxsw_sp_flow_block *block)
+{
+	struct mlxsw_sp_mall_entry *mall_entry;
+
+	if (list_empty(&block->mall.list))
+		return;
+	block->mall.min_prio = UINT_MAX;
+	block->mall.max_prio = 0;
+	list_for_each_entry(mall_entry, &block->mall.list, list) {
+		if (mall_entry->priority < block->mall.min_prio)
+			block->mall.min_prio = mall_entry->priority;
+		if (mall_entry->priority > block->mall.max_prio)
+			block->mall.max_prio = mall_entry->priority;
+	}
+}
+
 int mlxsw_sp_mall_replace(struct mlxsw_sp_flow_block *block,
 			  struct tc_cls_matchall_offload *f)
 {
@@ -203,6 +220,7 @@ int mlxsw_sp_mall_replace(struct mlxsw_sp_flow_block *block,
 	if (!mall_entry)
 		return -ENOMEM;
 	mall_entry->cookie = f->cookie;
+	mall_entry->priority = f->common.prio;
 	mall_entry->ingress = mlxsw_sp_flow_block_is_ingress_bound(block);
 
 	act = &f->rule->action.entries[0];
@@ -245,6 +263,7 @@ int mlxsw_sp_mall_replace(struct mlxsw_sp_flow_block *block,
 	else
 		block->ingress_blocker_rule_count++;
 	list_add_tail(&mall_entry->list, &block->mall.list);
+	mlxsw_sp_mall_prio_update(block);
 	return 0;
 
 rollback:
@@ -277,6 +296,7 @@ void mlxsw_sp_mall_destroy(struct mlxsw_sp_flow_block *block,
 	list_for_each_entry(binding, &block->binding_list, list)
 		mlxsw_sp_mall_port_rule_del(binding->mlxsw_sp_port, mall_entry);
 	kfree_rcu(mall_entry, rcu); /* sample RX packets may be in-flight */
+	mlxsw_sp_mall_prio_update(block);
 }
 
 int mlxsw_sp_mall_port_bind(struct mlxsw_sp_flow_block *block,
@@ -307,3 +327,17 @@ void mlxsw_sp_mall_port_unbind(struct mlxsw_sp_flow_block *block,
 	list_for_each_entry(mall_entry, &block->mall.list, list)
 		mlxsw_sp_mall_port_rule_del(mlxsw_sp_port, mall_entry);
 }
+
+int mlxsw_sp_mall_prio_get(struct mlxsw_sp_flow_block *block, u32 chain_index,
+			   unsigned int *p_min_prio, unsigned int *p_max_prio)
+{
+	if (chain_index || list_empty(&block->mall.list))
+		/* In case there are no matchall rules, the caller
+		 * receives -ENOENT to indicate there is no need
+		 * to check the priorities.
+		 */
+		return -ENOENT;
+	*p_min_prio = block->mall.min_prio;
+	*p_max_prio = block->mall.max_prio;
+	return 0;
+}
-- 
cgit v1.2.3-59-g8ed1b


From 18346b70ab776fa2cbb9a5f0f0f9c4700c470c5e Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Sat, 9 May 2020 23:06:06 +0300
Subject: mlxsw: spectrum_matchall: Forbid to insert matchall rules in
 collision with flower rules

On ingress, the matchall rules doing mirroring and sampling are offloaded
into hardware blocks that are processed before any flower rules.
On egress, the matchall mirroring rules are offloaded into hardware
block that is processed after all flower rules.

Therefore check the priorities of inserted matchall rules against
existing flower rules and ensure the correct ordering.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h     |  3 +-
 .../net/ethernet/mellanox/mlxsw/spectrum_flow.c    |  4 ++-
 .../ethernet/mellanox/mlxsw/spectrum_matchall.c    | 37 +++++++++++++++++++++-
 3 files changed, 41 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 456dbaa5ee26..147a5634244b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -894,7 +894,8 @@ extern const struct mlxsw_afk_ops mlxsw_sp1_afk_ops;
 extern const struct mlxsw_afk_ops mlxsw_sp2_afk_ops;
 
 /* spectrum_matchall.c */
-int mlxsw_sp_mall_replace(struct mlxsw_sp_flow_block *block,
+int mlxsw_sp_mall_replace(struct mlxsw_sp *mlxsw_sp,
+			  struct mlxsw_sp_flow_block *block,
 			  struct tc_cls_matchall_offload *f);
 void mlxsw_sp_mall_destroy(struct mlxsw_sp_flow_block *block,
 			   struct tc_cls_matchall_offload *f);
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flow.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flow.c
index 76644f6a8121..47b66f347ff1 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flow.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flow.c
@@ -135,9 +135,11 @@ static int mlxsw_sp_flow_block_unbind(struct mlxsw_sp *mlxsw_sp,
 static int mlxsw_sp_flow_block_mall_cb(struct mlxsw_sp_flow_block *flow_block,
 				       struct tc_cls_matchall_offload *f)
 {
+	struct mlxsw_sp *mlxsw_sp = mlxsw_sp_flow_block_mlxsw_sp(flow_block);
+
 	switch (f->command) {
 	case TC_CLSMATCHALL_REPLACE:
-		return mlxsw_sp_mall_replace(flow_block, f);
+		return mlxsw_sp_mall_replace(mlxsw_sp, flow_block, f);
 	case TC_CLSMATCHALL_DESTROY:
 		mlxsw_sp_mall_destroy(flow_block, f);
 		return 0;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
index b11bab76b2e1..f1a44a8eda55 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_matchall.c
@@ -192,13 +192,17 @@ static void mlxsw_sp_mall_prio_update(struct mlxsw_sp_flow_block *block)
 	}
 }
 
-int mlxsw_sp_mall_replace(struct mlxsw_sp_flow_block *block,
+int mlxsw_sp_mall_replace(struct mlxsw_sp *mlxsw_sp,
+			  struct mlxsw_sp_flow_block *block,
 			  struct tc_cls_matchall_offload *f)
 {
 	struct mlxsw_sp_flow_block_binding *binding;
 	struct mlxsw_sp_mall_entry *mall_entry;
 	__be16 protocol = f->common.protocol;
 	struct flow_action_entry *act;
+	unsigned int flower_min_prio;
+	unsigned int flower_max_prio;
+	bool flower_prio_valid;
 	int err;
 
 	if (!flow_offload_has_one_action(&f->rule->action)) {
@@ -216,6 +220,19 @@ int mlxsw_sp_mall_replace(struct mlxsw_sp_flow_block *block,
 		return -EOPNOTSUPP;
 	}
 
+	err = mlxsw_sp_flower_prio_get(mlxsw_sp, block, f->common.chain_index,
+				       &flower_min_prio, &flower_max_prio);
+	if (err) {
+		if (err != -ENOENT) {
+			NL_SET_ERR_MSG(f->common.extack, "Failed to get flower priorities");
+			return err;
+		}
+		flower_prio_valid = false;
+		/* No flower filters are installed in specified chain. */
+	} else {
+		flower_prio_valid = true;
+	}
+
 	mall_entry = kzalloc(sizeof(*mall_entry), GFP_KERNEL);
 	if (!mall_entry)
 		return -ENOMEM;
@@ -226,6 +243,18 @@ int mlxsw_sp_mall_replace(struct mlxsw_sp_flow_block *block,
 	act = &f->rule->action.entries[0];
 
 	if (act->id == FLOW_ACTION_MIRRED && protocol == htons(ETH_P_ALL)) {
+		if (flower_prio_valid && mall_entry->ingress &&
+		    mall_entry->priority >= flower_min_prio) {
+			NL_SET_ERR_MSG(f->common.extack, "Failed to add behind existing flower rules");
+			err = -EOPNOTSUPP;
+			goto errout;
+		}
+		if (flower_prio_valid && !mall_entry->ingress &&
+		    mall_entry->priority <= flower_max_prio) {
+			NL_SET_ERR_MSG(f->common.extack, "Failed to add in front of existing flower rules");
+			err = -EOPNOTSUPP;
+			goto errout;
+		}
 		mall_entry->type = MLXSW_SP_MALL_ACTION_TYPE_MIRROR;
 		mall_entry->mirror.to_dev = act->dev;
 	} else if (act->id == FLOW_ACTION_SAMPLE &&
@@ -235,6 +264,12 @@ int mlxsw_sp_mall_replace(struct mlxsw_sp_flow_block *block,
 			err = -EOPNOTSUPP;
 			goto errout;
 		}
+		if (flower_prio_valid &&
+		    mall_entry->priority >= flower_min_prio) {
+			NL_SET_ERR_MSG(f->common.extack, "Failed to add behind existing flower rules");
+			err = -EOPNOTSUPP;
+			goto errout;
+		}
 		if (act->sample.rate > MLXSW_REG_MPSC_RATE_MAX) {
 			NL_SET_ERR_MSG(f->common.extack, "Sample rate not supported");
 			err = -EOPNOTSUPP;
-- 
cgit v1.2.3-59-g8ed1b


From 67ed68fc0c9aa4a456d03df216e571e0c6177097 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Sat, 9 May 2020 23:06:07 +0300
Subject: mlxsw: spectrum_flower: Forbid to insert flower rules in collision
 with matchall rules

On ingress, the matchall rules doing mirroring and sampling are offloaded
into hardware blocks that are processed before any flower rules.
On egress, the matchall mirroring rules are offloaded into hardware
block that is processed after all flower rules.

Therefore check the priorities of inserted flower rules against
existing matchall rules and ensure the correct ordering.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_flower.c  | 32 ++++++++++++++++++++++
 1 file changed, 32 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
index 18d22217e435..b286fe158820 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
@@ -505,6 +505,34 @@ static int mlxsw_sp_flower_parse(struct mlxsw_sp *mlxsw_sp,
 					     f->common.extack);
 }
 
+static int mlxsw_sp_flower_mall_prio_check(struct mlxsw_sp_flow_block *block,
+					   struct flow_cls_offload *f)
+{
+	bool ingress = mlxsw_sp_flow_block_is_ingress_bound(block);
+	unsigned int mall_min_prio;
+	unsigned int mall_max_prio;
+	int err;
+
+	err = mlxsw_sp_mall_prio_get(block, f->common.chain_index,
+				     &mall_min_prio, &mall_max_prio);
+	if (err) {
+		if (err == -ENOENT)
+			/* No matchall filters installed on this chain. */
+			return 0;
+		NL_SET_ERR_MSG(f->common.extack, "Failed to get matchall priorities");
+		return err;
+	}
+	if (ingress && f->common.prio <= mall_min_prio) {
+		NL_SET_ERR_MSG(f->common.extack, "Failed to add in front of existing matchall rules");
+		return -EOPNOTSUPP;
+	}
+	if (!ingress && f->common.prio >= mall_max_prio) {
+		NL_SET_ERR_MSG(f->common.extack, "Failed to add behind of existing matchall rules");
+		return -EOPNOTSUPP;
+	}
+	return 0;
+}
+
 int mlxsw_sp_flower_replace(struct mlxsw_sp *mlxsw_sp,
 			    struct mlxsw_sp_flow_block *block,
 			    struct flow_cls_offload *f)
@@ -514,6 +542,10 @@ int mlxsw_sp_flower_replace(struct mlxsw_sp *mlxsw_sp,
 	struct mlxsw_sp_acl_rule *rule;
 	int err;
 
+	err = mlxsw_sp_flower_mall_prio_check(block, f);
+	if (err)
+		return err;
+
 	ruleset = mlxsw_sp_acl_ruleset_get(mlxsw_sp, block,
 					   f->common.chain_index,
 					   MLXSW_SP_ACL_PROFILE_FLOWER, NULL);
-- 
cgit v1.2.3-59-g8ed1b


From b886dea37b78debeea7019c649c05c7e2ba027fc Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Sat, 9 May 2020 23:06:08 +0300
Subject: selftests: mlxsw: rename tc_flower_restrictions.sh to
 tc_restrictions.sh

The file is about to contain matchall restrictions too, so change the
name to make it more generic.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../drivers/net/mlxsw/tc_flower_restrictions.sh    | 186 ---------------------
 .../selftests/drivers/net/mlxsw/tc_restrictions.sh | 186 +++++++++++++++++++++
 2 files changed, 186 insertions(+), 186 deletions(-)
 delete mode 100755 tools/testing/selftests/drivers/net/mlxsw/tc_flower_restrictions.sh
 create mode 100755 tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh

diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_restrictions.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_flower_restrictions.sh
deleted file mode 100755
index 68c80d0ec1ec..000000000000
--- a/tools/testing/selftests/drivers/net/mlxsw/tc_flower_restrictions.sh
+++ /dev/null
@@ -1,186 +0,0 @@
-#!/bin/bash
-# SPDX-License-Identifier: GPL-2.0
-
-lib_dir=$(dirname $0)/../../../net/forwarding
-
-ALL_TESTS="
-	shared_block_drop_test
-	egress_redirect_test
-	multi_mirror_test
-"
-NUM_NETIFS=2
-
-source $lib_dir/tc_common.sh
-source $lib_dir/lib.sh
-
-switch_create()
-{
-	simple_if_init $swp1 192.0.2.1/24
-	simple_if_init $swp2 192.0.2.2/24
-}
-
-switch_destroy()
-{
-	simple_if_fini $swp2 192.0.2.2/24
-	simple_if_fini $swp1 192.0.2.1/24
-}
-
-shared_block_drop_test()
-{
-	RET=0
-
-	# It is forbidden in mlxsw driver to have mixed-bound
-	# shared block with a drop rule.
-
-	tc qdisc add dev $swp1 ingress_block 22 clsact
-	check_err $? "Failed to create clsact with ingress block"
-
-	tc filter add block 22 protocol ip pref 1 handle 101 flower \
-		skip_sw dst_ip 192.0.2.2 action drop
-	check_err $? "Failed to add drop rule to ingress bound block"
-
-	tc qdisc add dev $swp2 ingress_block 22 clsact
-	check_err $? "Failed to create another clsact with ingress shared block"
-
-	tc qdisc del dev $swp2 clsact
-
-	tc qdisc add dev $swp2 egress_block 22 clsact
-	check_fail $? "Incorrect success to create another clsact with egress shared block"
-
-	tc filter del block 22 protocol ip pref 1 handle 101 flower
-
-	tc qdisc add dev $swp2 egress_block 22 clsact
-	check_err $? "Failed to create another clsact with egress shared block after blocker drop rule removed"
-
-	tc filter add block 22 protocol ip pref 1 handle 101 flower \
-		skip_sw dst_ip 192.0.2.2 action drop
-	check_fail $? "Incorrect success to add drop rule to mixed bound block"
-
-	tc qdisc del dev $swp1 clsact
-
-	tc qdisc add dev $swp1 egress_block 22 clsact
-	check_err $? "Failed to create another clsact with egress shared block"
-
-	tc filter add block 22 protocol ip pref 1 handle 101 flower \
-		skip_sw dst_ip 192.0.2.2 action drop
-	check_err $? "Failed to add drop rule to egress bound shared block"
-
-	tc filter del block 22 protocol ip pref 1 handle 101 flower
-
-	tc qdisc del dev $swp2 clsact
-	tc qdisc del dev $swp1 clsact
-
-	log_test "shared block drop"
-}
-
-egress_redirect_test()
-{
-	RET=0
-
-	# It is forbidden in mlxsw driver to have mirred redirect on
-	# egress-bound block.
-
-	tc qdisc add dev $swp1 ingress_block 22 clsact
-	check_err $? "Failed to create clsact with ingress block"
-
-	tc filter add block 22 protocol ip pref 1 handle 101 flower \
-		skip_sw dst_ip 192.0.2.2 \
-		action mirred egress redirect dev $swp2
-	check_err $? "Failed to add redirect rule to ingress bound block"
-
-	tc qdisc add dev $swp2 ingress_block 22 clsact
-	check_err $? "Failed to create another clsact with ingress shared block"
-
-	tc qdisc del dev $swp2 clsact
-
-	tc qdisc add dev $swp2 egress_block 22 clsact
-	check_fail $? "Incorrect success to create another clsact with egress shared block"
-
-	tc filter del block 22 protocol ip pref 1 handle 101 flower
-
-	tc qdisc add dev $swp2 egress_block 22 clsact
-	check_err $? "Failed to create another clsact with egress shared block after blocker redirect rule removed"
-
-	tc filter add block 22 protocol ip pref 1 handle 101 flower \
-		skip_sw dst_ip 192.0.2.2 \
-		action mirred egress redirect dev $swp2
-	check_fail $? "Incorrect success to add redirect rule to mixed bound block"
-
-	tc qdisc del dev $swp1 clsact
-
-	tc qdisc add dev $swp1 egress_block 22 clsact
-	check_err $? "Failed to create another clsact with egress shared block"
-
-	tc filter add block 22 protocol ip pref 1 handle 101 flower \
-		skip_sw dst_ip 192.0.2.2 \
-		action mirred egress redirect dev $swp2
-	check_fail $? "Incorrect success to add redirect rule to egress bound shared block"
-
-	tc qdisc del dev $swp2 clsact
-
-	tc filter add block 22 protocol ip pref 1 handle 101 flower \
-		skip_sw dst_ip 192.0.2.2 \
-		action mirred egress redirect dev $swp2
-	check_fail $? "Incorrect success to add redirect rule to egress bound block"
-
-	tc qdisc del dev $swp1 clsact
-
-	log_test "shared block drop"
-}
-
-multi_mirror_test()
-{
-	RET=0
-
-	# It is forbidden in mlxsw driver to have multiple mirror
-	# actions in a single rule.
-
-	tc qdisc add dev $swp1 clsact
-
-	tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
-		skip_sw dst_ip 192.0.2.2 \
-		action mirred egress mirror dev $swp2
-	check_err $? "Failed to add rule with single mirror action"
-
-	tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
-
-	tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
-		skip_sw dst_ip 192.0.2.2 \
-		action mirred egress mirror dev $swp2 \
-		action mirred egress mirror dev $swp1
-	check_fail $? "Incorrect success to add rule with two mirror actions"
-
-	tc qdisc del dev $swp1 clsact
-
-	log_test "multi mirror"
-}
-
-setup_prepare()
-{
-	swp1=${NETIFS[p1]}
-	swp2=${NETIFS[p2]}
-
-	vrf_prepare
-
-	switch_create
-}
-
-cleanup()
-{
-	pre_cleanup
-
-	switch_destroy
-
-	vrf_cleanup
-}
-
-check_tc_shblock_support
-
-trap cleanup EXIT
-
-setup_prepare
-setup_wait
-
-tests_run
-
-exit $EXIT_STATUS
diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
new file mode 100755
index 000000000000..68c80d0ec1ec
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
@@ -0,0 +1,186 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	shared_block_drop_test
+	egress_redirect_test
+	multi_mirror_test
+"
+NUM_NETIFS=2
+
+source $lib_dir/tc_common.sh
+source $lib_dir/lib.sh
+
+switch_create()
+{
+	simple_if_init $swp1 192.0.2.1/24
+	simple_if_init $swp2 192.0.2.2/24
+}
+
+switch_destroy()
+{
+	simple_if_fini $swp2 192.0.2.2/24
+	simple_if_fini $swp1 192.0.2.1/24
+}
+
+shared_block_drop_test()
+{
+	RET=0
+
+	# It is forbidden in mlxsw driver to have mixed-bound
+	# shared block with a drop rule.
+
+	tc qdisc add dev $swp1 ingress_block 22 clsact
+	check_err $? "Failed to create clsact with ingress block"
+
+	tc filter add block 22 protocol ip pref 1 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 action drop
+	check_err $? "Failed to add drop rule to ingress bound block"
+
+	tc qdisc add dev $swp2 ingress_block 22 clsact
+	check_err $? "Failed to create another clsact with ingress shared block"
+
+	tc qdisc del dev $swp2 clsact
+
+	tc qdisc add dev $swp2 egress_block 22 clsact
+	check_fail $? "Incorrect success to create another clsact with egress shared block"
+
+	tc filter del block 22 protocol ip pref 1 handle 101 flower
+
+	tc qdisc add dev $swp2 egress_block 22 clsact
+	check_err $? "Failed to create another clsact with egress shared block after blocker drop rule removed"
+
+	tc filter add block 22 protocol ip pref 1 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 action drop
+	check_fail $? "Incorrect success to add drop rule to mixed bound block"
+
+	tc qdisc del dev $swp1 clsact
+
+	tc qdisc add dev $swp1 egress_block 22 clsact
+	check_err $? "Failed to create another clsact with egress shared block"
+
+	tc filter add block 22 protocol ip pref 1 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 action drop
+	check_err $? "Failed to add drop rule to egress bound shared block"
+
+	tc filter del block 22 protocol ip pref 1 handle 101 flower
+
+	tc qdisc del dev $swp2 clsact
+	tc qdisc del dev $swp1 clsact
+
+	log_test "shared block drop"
+}
+
+egress_redirect_test()
+{
+	RET=0
+
+	# It is forbidden in mlxsw driver to have mirred redirect on
+	# egress-bound block.
+
+	tc qdisc add dev $swp1 ingress_block 22 clsact
+	check_err $? "Failed to create clsact with ingress block"
+
+	tc filter add block 22 protocol ip pref 1 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 \
+		action mirred egress redirect dev $swp2
+	check_err $? "Failed to add redirect rule to ingress bound block"
+
+	tc qdisc add dev $swp2 ingress_block 22 clsact
+	check_err $? "Failed to create another clsact with ingress shared block"
+
+	tc qdisc del dev $swp2 clsact
+
+	tc qdisc add dev $swp2 egress_block 22 clsact
+	check_fail $? "Incorrect success to create another clsact with egress shared block"
+
+	tc filter del block 22 protocol ip pref 1 handle 101 flower
+
+	tc qdisc add dev $swp2 egress_block 22 clsact
+	check_err $? "Failed to create another clsact with egress shared block after blocker redirect rule removed"
+
+	tc filter add block 22 protocol ip pref 1 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 \
+		action mirred egress redirect dev $swp2
+	check_fail $? "Incorrect success to add redirect rule to mixed bound block"
+
+	tc qdisc del dev $swp1 clsact
+
+	tc qdisc add dev $swp1 egress_block 22 clsact
+	check_err $? "Failed to create another clsact with egress shared block"
+
+	tc filter add block 22 protocol ip pref 1 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 \
+		action mirred egress redirect dev $swp2
+	check_fail $? "Incorrect success to add redirect rule to egress bound shared block"
+
+	tc qdisc del dev $swp2 clsact
+
+	tc filter add block 22 protocol ip pref 1 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 \
+		action mirred egress redirect dev $swp2
+	check_fail $? "Incorrect success to add redirect rule to egress bound block"
+
+	tc qdisc del dev $swp1 clsact
+
+	log_test "shared block drop"
+}
+
+multi_mirror_test()
+{
+	RET=0
+
+	# It is forbidden in mlxsw driver to have multiple mirror
+	# actions in a single rule.
+
+	tc qdisc add dev $swp1 clsact
+
+	tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 \
+		action mirred egress mirror dev $swp2
+	check_err $? "Failed to add rule with single mirror action"
+
+	tc filter del dev $swp1 ingress protocol ip pref 1 handle 101 flower
+
+	tc filter add dev $swp1 ingress protocol ip pref 1 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 \
+		action mirred egress mirror dev $swp2 \
+		action mirred egress mirror dev $swp1
+	check_fail $? "Incorrect success to add rule with two mirror actions"
+
+	tc qdisc del dev $swp1 clsact
+
+	log_test "multi mirror"
+}
+
+setup_prepare()
+{
+	swp1=${NETIFS[p1]}
+	swp2=${NETIFS[p2]}
+
+	vrf_prepare
+
+	switch_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	switch_destroy
+
+	vrf_cleanup
+}
+
+check_tc_shblock_support
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
-- 
cgit v1.2.3-59-g8ed1b


From 240fe73457fbfc13cb30d1d16064f19590ff10f6 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Sat, 9 May 2020 23:06:09 +0300
Subject: selftests: mlxsw: tc_restrictions: add test to check sample action
 restrictions

Check that matchall rules with sample actions are not possible to be
inserted to egress.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/drivers/net/mlxsw/tc_restrictions.sh | 25 ++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
index 68c80d0ec1ec..a67e80315e47 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
@@ -7,6 +7,7 @@ ALL_TESTS="
 	shared_block_drop_test
 	egress_redirect_test
 	multi_mirror_test
+	matchall_sample_egress_test
 "
 NUM_NETIFS=2
 
@@ -155,6 +156,30 @@ multi_mirror_test()
 	log_test "multi mirror"
 }
 
+matchall_sample_egress_test()
+{
+	RET=0
+
+	# It is forbidden in mlxsw driver to have matchall with sample action
+	# bound on egress
+
+	tc qdisc add dev $swp1 clsact
+
+	tc filter add dev $swp1 ingress protocol all pref 1 handle 101 \
+		matchall skip_sw action sample rate 100 group 1
+	check_err $? "Failed to add rule with sample action on ingress"
+
+	tc filter del dev $swp1 ingress protocol all pref 1 handle 101 matchall
+
+	tc filter add dev $swp1 egress protocol all pref 1 handle 101 \
+		matchall skip_sw action sample rate 100 group 1
+	check_fail $? "Incorrect success to add rule with sample action on egress"
+
+	tc qdisc del dev $swp1 clsact
+
+	log_test "matchall sample egress"
+}
+
 setup_prepare()
 {
 	swp1=${NETIFS[p1]}
-- 
cgit v1.2.3-59-g8ed1b


From aa7431123fc6f36574d9cc23be24dc802bb4cfa5 Mon Sep 17 00:00:00 2001
From: Jiri Pirko <jiri@mellanox.com>
Date: Sat, 9 May 2020 23:06:10 +0300
Subject: selftests: mlxsw: tc_restrictions: add couple of test for the correct
 matchall-flower ordering

Make sure that the drive restricts incorrect order of inserted matchall
vs. flower rules.

Signed-off-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../selftests/drivers/net/mlxsw/tc_restrictions.sh | 107 +++++++++++++++++++++
 1 file changed, 107 insertions(+)

diff --git a/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
index a67e80315e47..9241250c5921 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/tc_restrictions.sh
@@ -8,6 +8,9 @@ ALL_TESTS="
 	egress_redirect_test
 	multi_mirror_test
 	matchall_sample_egress_test
+	matchall_mirror_behind_flower_ingress_test
+	matchall_sample_behind_flower_ingress_test
+	matchall_mirror_behind_flower_egress_test
 "
 NUM_NETIFS=2
 
@@ -180,6 +183,110 @@ matchall_sample_egress_test()
 	log_test "matchall sample egress"
 }
 
+matchall_behind_flower_ingress_test()
+{
+	local action=$1
+	local action_args=$2
+
+	RET=0
+
+	# On ingress, all matchall-mirror and matchall-sample
+	# rules have to be in front of the flower rules
+
+	tc qdisc add dev $swp1 clsact
+
+	tc filter add dev $swp1 ingress protocol ip pref 10 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 action drop
+
+	tc filter add dev $swp1 ingress protocol all pref 9 handle 102 \
+		matchall skip_sw action $action_args
+	check_err $? "Failed to add matchall rule in front of a flower rule"
+
+	tc filter del dev $swp1 ingress protocol all pref 9 handle 102 matchall
+
+	tc filter add dev $swp1 ingress protocol all pref 11 handle 102 \
+		matchall skip_sw action $action_args
+	check_fail $? "Incorrect success to add matchall rule behind a flower rule"
+
+	tc filter del dev $swp1 ingress protocol ip pref 10 handle 101 flower
+
+	tc filter add dev $swp1 ingress protocol all pref 9 handle 102 \
+		matchall skip_sw action $action_args
+
+	tc filter add dev $swp1 ingress protocol ip pref 10 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 action drop
+	check_err $? "Failed to add flower rule behind a matchall rule"
+
+	tc filter del dev $swp1 ingress protocol ip pref 10 handle 101 flower
+
+	tc filter add dev $swp1 ingress protocol ip pref 8 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 action drop
+	check_fail $? "Incorrect success to add flower rule in front of a matchall rule"
+
+	tc qdisc del dev $swp1 clsact
+
+	log_test "matchall $action flower ingress"
+}
+
+matchall_mirror_behind_flower_ingress_test()
+{
+	matchall_behind_flower_ingress_test "mirror" "mirred egress mirror dev $swp2"
+}
+
+matchall_sample_behind_flower_ingress_test()
+{
+	matchall_behind_flower_ingress_test "sample" "sample rate 100 group 1"
+}
+
+matchall_behind_flower_egress_test()
+{
+	local action=$1
+	local action_args=$2
+
+	RET=0
+
+	# On egress, all matchall-mirror rules have to be behind the flower rules
+
+	tc qdisc add dev $swp1 clsact
+
+	tc filter add dev $swp1 egress protocol ip pref 10 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 action drop
+
+	tc filter add dev $swp1 egress protocol all pref 11 handle 102 \
+		matchall skip_sw action $action_args
+	check_err $? "Failed to add matchall rule in front of a flower rule"
+
+	tc filter del dev $swp1 egress protocol all pref 11 handle 102 matchall
+
+	tc filter add dev $swp1 egress protocol all pref 9 handle 102 \
+		matchall skip_sw action $action_args
+	check_fail $? "Incorrect success to add matchall rule behind a flower rule"
+
+	tc filter del dev $swp1 egress protocol ip pref 10 handle 101 flower
+
+	tc filter add dev $swp1 egress protocol all pref 11 handle 102 \
+		matchall skip_sw action $action_args
+
+	tc filter add dev $swp1 egress protocol ip pref 10 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 action drop
+	check_err $? "Failed to add flower rule behind a matchall rule"
+
+	tc filter del dev $swp1 egress protocol ip pref 10 handle 101 flower
+
+	tc filter add dev $swp1 egress protocol ip pref 12 handle 101 flower \
+		skip_sw dst_ip 192.0.2.2 action drop
+	check_fail $? "Incorrect success to add flower rule in front of a matchall rule"
+
+	tc qdisc del dev $swp1 clsact
+
+	log_test "matchall $action flower egress"
+}
+
+matchall_mirror_behind_flower_egress_test()
+{
+	matchall_behind_flower_egress_test "mirror" "mirred egress mirror dev $swp2"
+}
+
 setup_prepare()
 {
 	swp1=${NETIFS[p1]}
-- 
cgit v1.2.3-59-g8ed1b


From ae24345da54e452880808b011fa2d8a0bbd191ba Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Sat, 9 May 2020 10:58:59 -0700
Subject: bpf: Implement an interface to register bpf_iter targets

The target can call bpf_iter_reg_target() to register itself.
The needed information:
  target:           target name
  seq_ops:          the seq_file operations for the target
  init_seq_private  target callback to initialize seq_priv during file open
  fini_seq_private  target callback to clean up seq_priv during file release
  seq_priv_size:    the private_data size needed by the seq_file
                    operations

The target name represents a target which provides a seq_ops
for iterating objects.

The target can provide two callback functions, init_seq_private
and fini_seq_private, called during file open/release time.
For example, /proc/net/{tcp6, ipv6_route, netlink, ...}, net
name space needs to be setup properly during file open and
released properly during file release.

Function bpf_iter_unreg_target() is also implemented to unregister
a particular target.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200509175859.2474669-1-yhs@fb.com
---
 include/linux/bpf.h   | 15 +++++++++++++
 kernel/bpf/Makefile   |  2 +-
 kernel/bpf/bpf_iter.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 75 insertions(+), 1 deletion(-)
 create mode 100644 kernel/bpf/bpf_iter.c

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 1262ec460ab3..40c78b86fe38 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -31,6 +31,7 @@ struct seq_file;
 struct btf;
 struct btf_type;
 struct exception_table_entry;
+struct seq_operations;
 
 extern struct idr btf_idr;
 extern spinlock_t btf_idr_lock;
@@ -1126,6 +1127,20 @@ struct bpf_link *bpf_link_get_from_fd(u32 ufd);
 int bpf_obj_pin_user(u32 ufd, const char __user *pathname);
 int bpf_obj_get_user(const char __user *pathname, int flags);
 
+typedef int (*bpf_iter_init_seq_priv_t)(void *private_data);
+typedef void (*bpf_iter_fini_seq_priv_t)(void *private_data);
+
+struct bpf_iter_reg {
+	const char *target;
+	const struct seq_operations *seq_ops;
+	bpf_iter_init_seq_priv_t init_seq_private;
+	bpf_iter_fini_seq_priv_t fini_seq_private;
+	u32 seq_priv_size;
+};
+
+int bpf_iter_reg_target(struct bpf_iter_reg *reg_info);
+void bpf_iter_unreg_target(const char *target);
+
 int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
 int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
 int bpf_percpu_hash_update(struct bpf_map *map, void *key, void *value,
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index f2d7be596966..6a8b0febd3f6 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -2,7 +2,7 @@
 obj-y := core.o
 CFLAGS_core.o += $(call cc-disable-warning, override-init)
 
-obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o
+obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o
 obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
 obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o
 obj-$(CONFIG_BPF_SYSCALL) += disasm.o
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
new file mode 100644
index 000000000000..5a8119d17d14
--- /dev/null
+++ b/kernel/bpf/bpf_iter.c
@@ -0,0 +1,59 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2020 Facebook */
+
+#include <linux/fs.h>
+#include <linux/filter.h>
+#include <linux/bpf.h>
+
+struct bpf_iter_target_info {
+	struct list_head list;
+	const char *target;
+	const struct seq_operations *seq_ops;
+	bpf_iter_init_seq_priv_t init_seq_private;
+	bpf_iter_fini_seq_priv_t fini_seq_private;
+	u32 seq_priv_size;
+};
+
+static struct list_head targets = LIST_HEAD_INIT(targets);
+static DEFINE_MUTEX(targets_mutex);
+
+int bpf_iter_reg_target(struct bpf_iter_reg *reg_info)
+{
+	struct bpf_iter_target_info *tinfo;
+
+	tinfo = kmalloc(sizeof(*tinfo), GFP_KERNEL);
+	if (!tinfo)
+		return -ENOMEM;
+
+	tinfo->target = reg_info->target;
+	tinfo->seq_ops = reg_info->seq_ops;
+	tinfo->init_seq_private = reg_info->init_seq_private;
+	tinfo->fini_seq_private = reg_info->fini_seq_private;
+	tinfo->seq_priv_size = reg_info->seq_priv_size;
+	INIT_LIST_HEAD(&tinfo->list);
+
+	mutex_lock(&targets_mutex);
+	list_add(&tinfo->list, &targets);
+	mutex_unlock(&targets_mutex);
+
+	return 0;
+}
+
+void bpf_iter_unreg_target(const char *target)
+{
+	struct bpf_iter_target_info *tinfo;
+	bool found = false;
+
+	mutex_lock(&targets_mutex);
+	list_for_each_entry(tinfo, &targets, list) {
+		if (!strcmp(target, tinfo->target)) {
+			list_del(&tinfo->list);
+			kfree(tinfo);
+			found = true;
+			break;
+		}
+	}
+	mutex_unlock(&targets_mutex);
+
+	WARN_ON(found == false);
+}
-- 
cgit v1.2.3-59-g8ed1b


From 15d83c4d7cef5c067a8b075ce59e97df4f60706e Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Sat, 9 May 2020 10:59:00 -0700
Subject: bpf: Allow loading of a bpf_iter program

A bpf_iter program is a tracing program with attach type
BPF_TRACE_ITER. The load attribute
  attach_btf_id
is used by the verifier against a particular kernel function,
which represents a target, e.g., __bpf_iter__bpf_map
for target bpf_map which is implemented later.

The program return value must be 0 or 1 for now.
  0 : successful, except potential seq_file buffer overflow
      which is handled by seq_file reader.
  1 : request to restart the same object

In the future, other return values may be used for filtering or
teminating the iterator.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200509175900.2474947-1-yhs@fb.com
---
 include/linux/bpf.h            |  3 +++
 include/uapi/linux/bpf.h       |  1 +
 kernel/bpf/bpf_iter.c          | 36 ++++++++++++++++++++++++++++++++++++
 kernel/bpf/verifier.c          | 21 +++++++++++++++++++++
 tools/include/uapi/linux/bpf.h |  1 +
 5 files changed, 62 insertions(+)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 40c78b86fe38..f28bdd714754 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1127,6 +1127,8 @@ struct bpf_link *bpf_link_get_from_fd(u32 ufd);
 int bpf_obj_pin_user(u32 ufd, const char __user *pathname);
 int bpf_obj_get_user(const char __user *pathname, int flags);
 
+#define BPF_ITER_FUNC_PREFIX "__bpf_iter__"
+
 typedef int (*bpf_iter_init_seq_priv_t)(void *private_data);
 typedef void (*bpf_iter_fini_seq_priv_t)(void *private_data);
 
@@ -1140,6 +1142,7 @@ struct bpf_iter_reg {
 
 int bpf_iter_reg_target(struct bpf_iter_reg *reg_info);
 void bpf_iter_unreg_target(const char *target);
+bool bpf_iter_prog_supported(struct bpf_prog *prog);
 
 int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
 int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 6e5e7caa3739..c8a5325cc8d0 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -218,6 +218,7 @@ enum bpf_attach_type {
 	BPF_TRACE_FEXIT,
 	BPF_MODIFY_RETURN,
 	BPF_LSM_MAC,
+	BPF_TRACE_ITER,
 	__MAX_BPF_ATTACH_TYPE
 };
 
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index 5a8119d17d14..dec182d8395a 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -12,6 +12,7 @@ struct bpf_iter_target_info {
 	bpf_iter_init_seq_priv_t init_seq_private;
 	bpf_iter_fini_seq_priv_t fini_seq_private;
 	u32 seq_priv_size;
+	u32 btf_id;	/* cached value */
 };
 
 static struct list_head targets = LIST_HEAD_INIT(targets);
@@ -57,3 +58,38 @@ void bpf_iter_unreg_target(const char *target)
 
 	WARN_ON(found == false);
 }
+
+static void cache_btf_id(struct bpf_iter_target_info *tinfo,
+			 struct bpf_prog *prog)
+{
+	tinfo->btf_id = prog->aux->attach_btf_id;
+}
+
+bool bpf_iter_prog_supported(struct bpf_prog *prog)
+{
+	const char *attach_fname = prog->aux->attach_func_name;
+	u32 prog_btf_id = prog->aux->attach_btf_id;
+	const char *prefix = BPF_ITER_FUNC_PREFIX;
+	struct bpf_iter_target_info *tinfo;
+	int prefix_len = strlen(prefix);
+	bool supported = false;
+
+	if (strncmp(attach_fname, prefix, prefix_len))
+		return false;
+
+	mutex_lock(&targets_mutex);
+	list_for_each_entry(tinfo, &targets, list) {
+		if (tinfo->btf_id && tinfo->btf_id == prog_btf_id) {
+			supported = true;
+			break;
+		}
+		if (!strcmp(attach_fname + prefix_len, tinfo->target)) {
+			cache_btf_id(tinfo, prog);
+			supported = true;
+			break;
+		}
+	}
+	mutex_unlock(&targets_mutex);
+
+	return supported;
+}
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 70ad009577f8..d725ff7d11db 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -7101,6 +7101,10 @@ static int check_return_code(struct bpf_verifier_env *env)
 			return 0;
 		range = tnum_const(0);
 		break;
+	case BPF_PROG_TYPE_TRACING:
+		if (env->prog->expected_attach_type != BPF_TRACE_ITER)
+			return 0;
+		break;
 	default:
 		return 0;
 	}
@@ -10481,6 +10485,7 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
 	struct bpf_prog *tgt_prog = prog->aux->linked_prog;
 	u32 btf_id = prog->aux->attach_btf_id;
 	const char prefix[] = "btf_trace_";
+	struct btf_func_model fmodel;
 	int ret = 0, subprog = -1, i;
 	struct bpf_trampoline *tr;
 	const struct btf_type *t;
@@ -10622,6 +10627,22 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
 		prog->aux->attach_func_proto = t;
 		prog->aux->attach_btf_trace = true;
 		return 0;
+	case BPF_TRACE_ITER:
+		if (!btf_type_is_func(t)) {
+			verbose(env, "attach_btf_id %u is not a function\n",
+				btf_id);
+			return -EINVAL;
+		}
+		t = btf_type_by_id(btf, t->type);
+		if (!btf_type_is_func_proto(t))
+			return -EINVAL;
+		prog->aux->attach_func_name = tname;
+		prog->aux->attach_func_proto = t;
+		if (!bpf_iter_prog_supported(prog))
+			return -EINVAL;
+		ret = btf_distill_func_proto(&env->log, btf, t,
+					     tname, &fmodel);
+		return ret;
 	default:
 		if (!prog_extension)
 			return -EINVAL;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 6e5e7caa3739..c8a5325cc8d0 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -218,6 +218,7 @@ enum bpf_attach_type {
 	BPF_TRACE_FEXIT,
 	BPF_MODIFY_RETURN,
 	BPF_LSM_MAC,
+	BPF_TRACE_ITER,
 	__MAX_BPF_ATTACH_TYPE
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From de4e05cac46d206f9090051ef09930514bff73e4 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Sat, 9 May 2020 10:59:01 -0700
Subject: bpf: Support bpf tracing/iter programs for BPF_LINK_CREATE

Given a bpf program, the step to create an anonymous bpf iterator is:
  - create a bpf_iter_link, which combines bpf program and the target.
    In the future, there could be more information recorded in the link.
    A link_fd will be returned to the user space.
  - create an anonymous bpf iterator with the given link_fd.

The bpf_iter_link can be pinned to bpffs mount file system to
create a file based bpf iterator as well.

The benefit to use of bpf_iter_link:
  - using bpf link simplifies design and implementation as bpf link
    is used for other tracing bpf programs.
  - for file based bpf iterator, bpf_iter_link provides a standard
    way to replace underlying bpf programs.
  - for both anonymous and free based iterators, bpf link query
    capability can be leveraged.

The patch added support of tracing/iter programs for BPF_LINK_CREATE.
A new link type BPF_LINK_TYPE_ITER is added to facilitate link
querying. Currently, only prog_id is needed, so there is no
additional in-kernel show_fdinfo() and fill_link_info() hook
is needed for BPF_LINK_TYPE_ITER link.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200509175901.2475084-1-yhs@fb.com
---
 include/linux/bpf.h            |  1 +
 include/linux/bpf_types.h      |  1 +
 include/uapi/linux/bpf.h       |  1 +
 kernel/bpf/bpf_iter.c          | 62 ++++++++++++++++++++++++++++++++++++++++++
 kernel/bpf/syscall.c           | 14 ++++++++++
 tools/include/uapi/linux/bpf.h |  1 +
 6 files changed, 80 insertions(+)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index f28bdd714754..e93d2d33c82c 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1143,6 +1143,7 @@ struct bpf_iter_reg {
 int bpf_iter_reg_target(struct bpf_iter_reg *reg_info);
 void bpf_iter_unreg_target(const char *target);
 bool bpf_iter_prog_supported(struct bpf_prog *prog);
+int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
 
 int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
 int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 8345cdf553b8..29d22752fc87 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -124,3 +124,4 @@ BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing)
 #ifdef CONFIG_CGROUP_BPF
 BPF_LINK_TYPE(BPF_LINK_TYPE_CGROUP, cgroup)
 #endif
+BPF_LINK_TYPE(BPF_LINK_TYPE_ITER, iter)
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index c8a5325cc8d0..1e8dfff5d5d4 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -229,6 +229,7 @@ enum bpf_link_type {
 	BPF_LINK_TYPE_RAW_TRACEPOINT = 1,
 	BPF_LINK_TYPE_TRACING = 2,
 	BPF_LINK_TYPE_CGROUP = 3,
+	BPF_LINK_TYPE_ITER = 4,
 
 	MAX_BPF_LINK_TYPE,
 };
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index dec182d8395a..03f5832909db 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -15,6 +15,11 @@ struct bpf_iter_target_info {
 	u32 btf_id;	/* cached value */
 };
 
+struct bpf_iter_link {
+	struct bpf_link link;
+	struct bpf_iter_target_info *tinfo;
+};
+
 static struct list_head targets = LIST_HEAD_INIT(targets);
 static DEFINE_MUTEX(targets_mutex);
 
@@ -93,3 +98,60 @@ bool bpf_iter_prog_supported(struct bpf_prog *prog)
 
 	return supported;
 }
+
+static void bpf_iter_link_release(struct bpf_link *link)
+{
+}
+
+static void bpf_iter_link_dealloc(struct bpf_link *link)
+{
+	struct bpf_iter_link *iter_link =
+		container_of(link, struct bpf_iter_link, link);
+
+	kfree(iter_link);
+}
+
+static const struct bpf_link_ops bpf_iter_link_lops = {
+	.release = bpf_iter_link_release,
+	.dealloc = bpf_iter_link_dealloc,
+};
+
+int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+	struct bpf_link_primer link_primer;
+	struct bpf_iter_target_info *tinfo;
+	struct bpf_iter_link *link;
+	bool existed = false;
+	u32 prog_btf_id;
+	int err;
+
+	if (attr->link_create.target_fd || attr->link_create.flags)
+		return -EINVAL;
+
+	prog_btf_id = prog->aux->attach_btf_id;
+	mutex_lock(&targets_mutex);
+	list_for_each_entry(tinfo, &targets, list) {
+		if (tinfo->btf_id == prog_btf_id) {
+			existed = true;
+			break;
+		}
+	}
+	mutex_unlock(&targets_mutex);
+	if (!existed)
+		return -ENOENT;
+
+	link = kzalloc(sizeof(*link), GFP_USER | __GFP_NOWARN);
+	if (!link)
+		return -ENOMEM;
+
+	bpf_link_init(&link->link, BPF_LINK_TYPE_ITER, &bpf_iter_link_lops, prog);
+	link->tinfo = tinfo;
+
+	err  = bpf_link_prime(&link->link, &link_primer);
+	if (err) {
+		kfree(link);
+		return err;
+	}
+
+	return bpf_link_settle(&link_primer);
+}
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index bb1ab7da6103..6ffe2d8fb6c7 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2729,6 +2729,8 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type)
 	case BPF_CGROUP_GETSOCKOPT:
 	case BPF_CGROUP_SETSOCKOPT:
 		return BPF_PROG_TYPE_CGROUP_SOCKOPT;
+	case BPF_TRACE_ITER:
+		return BPF_PROG_TYPE_TRACING;
 	default:
 		return BPF_PROG_TYPE_UNSPEC;
 	}
@@ -3729,6 +3731,15 @@ err_put:
 	return err;
 }
 
+static int tracing_bpf_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+	if (attr->link_create.attach_type == BPF_TRACE_ITER &&
+	    prog->expected_attach_type == BPF_TRACE_ITER)
+		return bpf_iter_link_attach(attr, prog);
+
+	return -EINVAL;
+}
+
 #define BPF_LINK_CREATE_LAST_FIELD link_create.flags
 static int link_create(union bpf_attr *attr)
 {
@@ -3765,6 +3776,9 @@ static int link_create(union bpf_attr *attr)
 	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
 		ret = cgroup_bpf_link_attach(attr, prog);
 		break;
+	case BPF_PROG_TYPE_TRACING:
+		ret = tracing_bpf_link_attach(attr, prog);
+		break;
 	default:
 		ret = -EINVAL;
 	}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index c8a5325cc8d0..1e8dfff5d5d4 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -229,6 +229,7 @@ enum bpf_link_type {
 	BPF_LINK_TYPE_RAW_TRACEPOINT = 1,
 	BPF_LINK_TYPE_TRACING = 2,
 	BPF_LINK_TYPE_CGROUP = 3,
+	BPF_LINK_TYPE_ITER = 4,
 
 	MAX_BPF_LINK_TYPE,
 };
-- 
cgit v1.2.3-59-g8ed1b


From 2057c92bc927f09b22f5609425eb37d7e782f484 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Sat, 9 May 2020 10:59:02 -0700
Subject: bpf: Support bpf tracing/iter programs for BPF_LINK_UPDATE

Added BPF_LINK_UPDATE support for tracing/iter programs.
This way, a file based bpf iterator, which holds a reference
to the link, can have its bpf program updated without
creating new files.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200509175902.2475262-1-yhs@fb.com
---
 kernel/bpf/bpf_iter.c | 31 +++++++++++++++++++++++++++++++
 1 file changed, 31 insertions(+)

diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index 03f5832909db..0542a243b78c 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -23,6 +23,9 @@ struct bpf_iter_link {
 static struct list_head targets = LIST_HEAD_INIT(targets);
 static DEFINE_MUTEX(targets_mutex);
 
+/* protect bpf_iter_link changes */
+static DEFINE_MUTEX(link_mutex);
+
 int bpf_iter_reg_target(struct bpf_iter_reg *reg_info)
 {
 	struct bpf_iter_target_info *tinfo;
@@ -111,9 +114,37 @@ static void bpf_iter_link_dealloc(struct bpf_link *link)
 	kfree(iter_link);
 }
 
+static int bpf_iter_link_replace(struct bpf_link *link,
+				 struct bpf_prog *new_prog,
+				 struct bpf_prog *old_prog)
+{
+	int ret = 0;
+
+	mutex_lock(&link_mutex);
+	if (old_prog && link->prog != old_prog) {
+		ret = -EPERM;
+		goto out_unlock;
+	}
+
+	if (link->prog->type != new_prog->type ||
+	    link->prog->expected_attach_type != new_prog->expected_attach_type ||
+	    link->prog->aux->attach_btf_id != new_prog->aux->attach_btf_id) {
+		ret = -EINVAL;
+		goto out_unlock;
+	}
+
+	old_prog = xchg(&link->prog, new_prog);
+	bpf_prog_put(old_prog);
+
+out_unlock:
+	mutex_unlock(&link_mutex);
+	return ret;
+}
+
 static const struct bpf_link_ops bpf_iter_link_lops = {
 	.release = bpf_iter_link_release,
 	.dealloc = bpf_iter_link_dealloc,
+	.update_prog = bpf_iter_link_replace,
 };
 
 int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
-- 
cgit v1.2.3-59-g8ed1b


From fd4f12bc38c3ad9107169e7c9e6e7f81d93dda97 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Sat, 9 May 2020 10:59:04 -0700
Subject: bpf: Implement bpf_seq_read() for bpf iterator

bpf iterator uses seq_file to provide a lossless
way to transfer data to user space. But we want to call
bpf program after all objects have been traversed, and
bpf program may write additional data to the
seq_file buffer. The current seq_read() does not work
for this use case.

Besides allowing stop() function to write to the buffer,
the bpf_seq_read() also fixed the buffer size to one page.
If any single call of show() or stop() will emit data
more than one page to cause overflow, -E2BIG error code
will be returned to user space.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200509175904.2475468-1-yhs@fb.com
---
 kernel/bpf/bpf_iter.c | 123 ++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 123 insertions(+)

diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index 0542a243b78c..832973ee80fa 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -26,6 +26,129 @@ static DEFINE_MUTEX(targets_mutex);
 /* protect bpf_iter_link changes */
 static DEFINE_MUTEX(link_mutex);
 
+/* bpf_seq_read, a customized and simpler version for bpf iterator.
+ * no_llseek is assumed for this file.
+ * The following are differences from seq_read():
+ *  . fixed buffer size (PAGE_SIZE)
+ *  . assuming no_llseek
+ *  . stop() may call bpf program, handling potential overflow there
+ */
+static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
+			    loff_t *ppos)
+{
+	struct seq_file *seq = file->private_data;
+	size_t n, offs, copied = 0;
+	int err = 0;
+	void *p;
+
+	mutex_lock(&seq->lock);
+
+	if (!seq->buf) {
+		seq->size = PAGE_SIZE;
+		seq->buf = kmalloc(seq->size, GFP_KERNEL);
+		if (!seq->buf) {
+			err = -ENOMEM;
+			goto done;
+		}
+	}
+
+	if (seq->count) {
+		n = min(seq->count, size);
+		err = copy_to_user(buf, seq->buf + seq->from, n);
+		if (err) {
+			err = -EFAULT;
+			goto done;
+		}
+		seq->count -= n;
+		seq->from += n;
+		copied = n;
+		goto done;
+	}
+
+	seq->from = 0;
+	p = seq->op->start(seq, &seq->index);
+	if (!p)
+		goto stop;
+	if (IS_ERR(p)) {
+		err = PTR_ERR(p);
+		seq->op->stop(seq, p);
+		seq->count = 0;
+		goto done;
+	}
+
+	err = seq->op->show(seq, p);
+	if (err > 0) {
+		seq->count = 0;
+	} else if (err < 0 || seq_has_overflowed(seq)) {
+		if (!err)
+			err = -E2BIG;
+		seq->op->stop(seq, p);
+		seq->count = 0;
+		goto done;
+	}
+
+	while (1) {
+		loff_t pos = seq->index;
+
+		offs = seq->count;
+		p = seq->op->next(seq, p, &seq->index);
+		if (pos == seq->index) {
+			pr_info_ratelimited("buggy seq_file .next function %ps "
+				"did not updated position index\n",
+				seq->op->next);
+			seq->index++;
+		}
+
+		if (IS_ERR_OR_NULL(p))
+			break;
+
+		if (seq->count >= size)
+			break;
+
+		err = seq->op->show(seq, p);
+		if (err > 0) {
+			seq->count = offs;
+		} else if (err < 0 || seq_has_overflowed(seq)) {
+			seq->count = offs;
+			if (offs == 0) {
+				if (!err)
+					err = -E2BIG;
+				seq->op->stop(seq, p);
+				goto done;
+			}
+			break;
+		}
+	}
+stop:
+	offs = seq->count;
+	/* bpf program called if !p */
+	seq->op->stop(seq, p);
+	if (!p && seq_has_overflowed(seq)) {
+		seq->count = offs;
+		if (offs == 0) {
+			err = -E2BIG;
+			goto done;
+		}
+	}
+
+	n = min(seq->count, size);
+	err = copy_to_user(buf, seq->buf, n);
+	if (err) {
+		err = -EFAULT;
+		goto done;
+	}
+	copied = n;
+	seq->count -= n;
+	seq->from = n;
+done:
+	if (!copied)
+		copied = err;
+	else
+		*ppos += copied;
+	mutex_unlock(&seq->lock);
+	return copied;
+}
+
 int bpf_iter_reg_target(struct bpf_iter_reg *reg_info)
 {
 	struct bpf_iter_target_info *tinfo;
-- 
cgit v1.2.3-59-g8ed1b


From ac51d99bf81caac8d8881fe52098948110d0de68 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Sat, 9 May 2020 10:59:05 -0700
Subject: bpf: Create anonymous bpf iterator

A new bpf command BPF_ITER_CREATE is added.

The anonymous bpf iterator is seq_file based.
The seq_file private data are referenced by targets.
The bpf_iter infrastructure allocated additional space
at seq_file->private before the space used by targets
to store some meta data, e.g.,
  prog:       prog to run
  session_id: an unique id for each opened seq_file
  seq_num:    how many times bpf programs are queried in this session
  done_stop:  an internal state to decide whether bpf program
              should be called in seq_ops->stop() or not

The seq_num will start from 0 for valid objects.
The bpf program may see the same seq_num more than once if
 - seq_file buffer overflow happens and the same object
   is retried by bpf_seq_read(), or
 - the bpf program explicitly requests a retry of the
   same object

Since module is not supported for bpf_iter, all target
registeration happens at __init time, so there is no
need to change bpf_iter_unreg_target() as it is used
mostly in error path of the init function at which time
no bpf iterators have been created yet.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200509175905.2475770-1-yhs@fb.com
---
 include/linux/bpf.h            |   1 +
 include/uapi/linux/bpf.h       |   6 ++
 kernel/bpf/bpf_iter.c          | 129 +++++++++++++++++++++++++++++++++++++++++
 kernel/bpf/syscall.c           |  26 +++++++++
 tools/include/uapi/linux/bpf.h |   6 ++
 5 files changed, 168 insertions(+)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e93d2d33c82c..80b1b9d8a638 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1144,6 +1144,7 @@ int bpf_iter_reg_target(struct bpf_iter_reg *reg_info);
 void bpf_iter_unreg_target(const char *target);
 bool bpf_iter_prog_supported(struct bpf_prog *prog);
 int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
+int bpf_iter_new_fd(struct bpf_link *link);
 
 int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
 int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 1e8dfff5d5d4..708763f702e1 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -116,6 +116,7 @@ enum bpf_cmd {
 	BPF_LINK_GET_FD_BY_ID,
 	BPF_LINK_GET_NEXT_ID,
 	BPF_ENABLE_STATS,
+	BPF_ITER_CREATE,
 };
 
 enum bpf_map_type {
@@ -614,6 +615,11 @@ union bpf_attr {
 		__u32		type;
 	} enable_stats;
 
+	struct { /* struct used by BPF_ITER_CREATE command */
+		__u32		link_fd;
+		__u32		flags;
+	} iter_create;
+
 } __attribute__((aligned(8)));
 
 /* The description below is an attempt at providing documentation to eBPF
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index 832973ee80fa..e7129b57865f 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -2,6 +2,7 @@
 /* Copyright (c) 2020 Facebook */
 
 #include <linux/fs.h>
+#include <linux/anon_inodes.h>
 #include <linux/filter.h>
 #include <linux/bpf.h>
 
@@ -20,12 +21,24 @@ struct bpf_iter_link {
 	struct bpf_iter_target_info *tinfo;
 };
 
+struct bpf_iter_priv_data {
+	struct bpf_iter_target_info *tinfo;
+	struct bpf_prog *prog;
+	u64 session_id;
+	u64 seq_num;
+	bool done_stop;
+	u8 target_private[] __aligned(8);
+};
+
 static struct list_head targets = LIST_HEAD_INIT(targets);
 static DEFINE_MUTEX(targets_mutex);
 
 /* protect bpf_iter_link changes */
 static DEFINE_MUTEX(link_mutex);
 
+/* incremented on every opened seq_file */
+static atomic64_t session_id;
+
 /* bpf_seq_read, a customized and simpler version for bpf iterator.
  * no_llseek is assumed for this file.
  * The following are differences from seq_read():
@@ -149,6 +162,33 @@ done:
 	return copied;
 }
 
+static int iter_release(struct inode *inode, struct file *file)
+{
+	struct bpf_iter_priv_data *iter_priv;
+	struct seq_file *seq;
+
+	seq = file->private_data;
+	if (!seq)
+		return 0;
+
+	iter_priv = container_of(seq->private, struct bpf_iter_priv_data,
+				 target_private);
+
+	if (iter_priv->tinfo->fini_seq_private)
+		iter_priv->tinfo->fini_seq_private(seq->private);
+
+	bpf_prog_put(iter_priv->prog);
+	seq->private = iter_priv;
+
+	return seq_release_private(inode, file);
+}
+
+static const struct file_operations bpf_iter_fops = {
+	.llseek		= no_llseek,
+	.read		= bpf_seq_read,
+	.release	= iter_release,
+};
+
 int bpf_iter_reg_target(struct bpf_iter_reg *reg_info)
 {
 	struct bpf_iter_target_info *tinfo;
@@ -309,3 +349,92 @@ int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
 
 	return bpf_link_settle(&link_primer);
 }
+
+static void init_seq_meta(struct bpf_iter_priv_data *priv_data,
+			  struct bpf_iter_target_info *tinfo,
+			  struct bpf_prog *prog)
+{
+	priv_data->tinfo = tinfo;
+	priv_data->prog = prog;
+	priv_data->session_id = atomic64_inc_return(&session_id);
+	priv_data->seq_num = 0;
+	priv_data->done_stop = false;
+}
+
+static int prepare_seq_file(struct file *file, struct bpf_iter_link *link)
+{
+	struct bpf_iter_priv_data *priv_data;
+	struct bpf_iter_target_info *tinfo;
+	struct bpf_prog *prog;
+	u32 total_priv_dsize;
+	struct seq_file *seq;
+	int err = 0;
+
+	mutex_lock(&link_mutex);
+	prog = link->link.prog;
+	bpf_prog_inc(prog);
+	mutex_unlock(&link_mutex);
+
+	tinfo = link->tinfo;
+	total_priv_dsize = offsetof(struct bpf_iter_priv_data, target_private) +
+			   tinfo->seq_priv_size;
+	priv_data = __seq_open_private(file, tinfo->seq_ops, total_priv_dsize);
+	if (!priv_data) {
+		err = -ENOMEM;
+		goto release_prog;
+	}
+
+	if (tinfo->init_seq_private) {
+		err = tinfo->init_seq_private(priv_data->target_private);
+		if (err)
+			goto release_seq_file;
+	}
+
+	init_seq_meta(priv_data, tinfo, prog);
+	seq = file->private_data;
+	seq->private = priv_data->target_private;
+
+	return 0;
+
+release_seq_file:
+	seq_release_private(file->f_inode, file);
+	file->private_data = NULL;
+release_prog:
+	bpf_prog_put(prog);
+	return err;
+}
+
+int bpf_iter_new_fd(struct bpf_link *link)
+{
+	struct file *file;
+	unsigned int flags;
+	int err, fd;
+
+	if (link->ops != &bpf_iter_link_lops)
+		return -EINVAL;
+
+	flags = O_RDONLY | O_CLOEXEC;
+	fd = get_unused_fd_flags(flags);
+	if (fd < 0)
+		return fd;
+
+	file = anon_inode_getfile("bpf_iter", &bpf_iter_fops, NULL, flags);
+	if (IS_ERR(file)) {
+		err = PTR_ERR(file);
+		goto free_fd;
+	}
+
+	err = prepare_seq_file(file,
+			       container_of(link, struct bpf_iter_link, link));
+	if (err)
+		goto free_file;
+
+	fd_install(fd, file);
+	return fd;
+
+free_file:
+	fput(file);
+free_fd:
+	put_unused_fd(fd);
+	return err;
+}
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 6ffe2d8fb6c7..a293e88ee01a 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3941,6 +3941,29 @@ static int bpf_enable_stats(union bpf_attr *attr)
 	return -EINVAL;
 }
 
+#define BPF_ITER_CREATE_LAST_FIELD iter_create.flags
+
+static int bpf_iter_create(union bpf_attr *attr)
+{
+	struct bpf_link *link;
+	int err;
+
+	if (CHECK_ATTR(BPF_ITER_CREATE))
+		return -EINVAL;
+
+	if (attr->iter_create.flags)
+		return -EINVAL;
+
+	link = bpf_link_get_from_fd(attr->iter_create.link_fd);
+	if (IS_ERR(link))
+		return PTR_ERR(link);
+
+	err = bpf_iter_new_fd(link);
+	bpf_link_put(link);
+
+	return err;
+}
+
 SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, size)
 {
 	union bpf_attr attr;
@@ -4068,6 +4091,9 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 	case BPF_ENABLE_STATS:
 		err = bpf_enable_stats(&attr);
 		break;
+	case BPF_ITER_CREATE:
+		err = bpf_iter_create(&attr);
+		break;
 	default:
 		err = -EINVAL;
 		break;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 1e8dfff5d5d4..708763f702e1 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -116,6 +116,7 @@ enum bpf_cmd {
 	BPF_LINK_GET_FD_BY_ID,
 	BPF_LINK_GET_NEXT_ID,
 	BPF_ENABLE_STATS,
+	BPF_ITER_CREATE,
 };
 
 enum bpf_map_type {
@@ -614,6 +615,11 @@ union bpf_attr {
 		__u32		type;
 	} enable_stats;
 
+	struct { /* struct used by BPF_ITER_CREATE command */
+		__u32		link_fd;
+		__u32		flags;
+	} iter_create;
+
 } __attribute__((aligned(8)));
 
 /* The description below is an attempt at providing documentation to eBPF
-- 
cgit v1.2.3-59-g8ed1b


From 367ec3e4834cbd611401c2c40a23c22c825474f1 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Sat, 9 May 2020 10:59:06 -0700
Subject: bpf: Create file bpf iterator

To produce a file bpf iterator, the fd must be
corresponding to a link_fd assocciated with a
trace/iter program. When the pinned file is
opened, a seq_file will be generated.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200509175906.2475893-1-yhs@fb.com
---
 include/linux/bpf.h   |  2 ++
 kernel/bpf/bpf_iter.c | 17 ++++++++++++++++-
 kernel/bpf/inode.c    |  5 ++++-
 3 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 80b1b9d8a638..b06653ab3476 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1022,6 +1022,7 @@ static inline void bpf_enable_instrumentation(void)
 
 extern const struct file_operations bpf_map_fops;
 extern const struct file_operations bpf_prog_fops;
+extern const struct file_operations bpf_iter_fops;
 
 #define BPF_PROG_TYPE(_id, _name, prog_ctx_type, kern_ctx_type) \
 	extern const struct bpf_prog_ops _name ## _prog_ops; \
@@ -1145,6 +1146,7 @@ void bpf_iter_unreg_target(const char *target);
 bool bpf_iter_prog_supported(struct bpf_prog *prog);
 int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
 int bpf_iter_new_fd(struct bpf_link *link);
+bool bpf_link_is_iter(struct bpf_link *link);
 
 int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
 int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index e7129b57865f..090f09b0eacb 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -39,6 +39,8 @@ static DEFINE_MUTEX(link_mutex);
 /* incremented on every opened seq_file */
 static atomic64_t session_id;
 
+static int prepare_seq_file(struct file *file, struct bpf_iter_link *link);
+
 /* bpf_seq_read, a customized and simpler version for bpf iterator.
  * no_llseek is assumed for this file.
  * The following are differences from seq_read():
@@ -162,6 +164,13 @@ done:
 	return copied;
 }
 
+static int iter_open(struct inode *inode, struct file *file)
+{
+	struct bpf_iter_link *link = inode->i_private;
+
+	return prepare_seq_file(file, link);
+}
+
 static int iter_release(struct inode *inode, struct file *file)
 {
 	struct bpf_iter_priv_data *iter_priv;
@@ -183,7 +192,8 @@ static int iter_release(struct inode *inode, struct file *file)
 	return seq_release_private(inode, file);
 }
 
-static const struct file_operations bpf_iter_fops = {
+const struct file_operations bpf_iter_fops = {
+	.open		= iter_open,
 	.llseek		= no_llseek,
 	.read		= bpf_seq_read,
 	.release	= iter_release,
@@ -310,6 +320,11 @@ static const struct bpf_link_ops bpf_iter_link_lops = {
 	.update_prog = bpf_iter_link_replace,
 };
 
+bool bpf_link_is_iter(struct bpf_link *link)
+{
+	return link->ops == &bpf_iter_link_lops;
+}
+
 int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog)
 {
 	struct bpf_link_primer link_primer;
diff --git a/kernel/bpf/inode.c b/kernel/bpf/inode.c
index 95087d9f4ed3..fb878ba3f22f 100644
--- a/kernel/bpf/inode.c
+++ b/kernel/bpf/inode.c
@@ -358,8 +358,11 @@ static int bpf_mkmap(struct dentry *dentry, umode_t mode, void *arg)
 
 static int bpf_mklink(struct dentry *dentry, umode_t mode, void *arg)
 {
+	struct bpf_link *link = arg;
+
 	return bpf_mkobj_ops(dentry, mode, arg, &bpf_link_iops,
-			     &bpffs_obj_fops);
+			     bpf_link_is_iter(link) ?
+			     &bpf_iter_fops : &bpffs_obj_fops);
 }
 
 static struct dentry *
-- 
cgit v1.2.3-59-g8ed1b


From e5158d987b72c3f318b4b52a01ac6f3997bd0c00 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Sat, 9 May 2020 10:59:07 -0700
Subject: bpf: Implement common macros/helpers for target iterators

Macro DEFINE_BPF_ITER_FUNC is implemented so target
can define an init function to capture the BTF type
which represents the target.

The bpf_iter_meta is a structure holding meta data, common
to all targets in the bpf program.

Additional marker functions are called before or after
bpf_seq_read() show()/next()/stop() callback functions
to help calculate precise seq_num and whether call bpf_prog
inside stop().

Two functions, bpf_iter_get_info() and bpf_iter_run_prog(),
are implemented so target can get needed information from
bpf_iter infrastructure and can run the program.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200509175907.2475956-1-yhs@fb.com
---
 include/linux/bpf.h   | 11 +++++++
 kernel/bpf/bpf_iter.c | 85 ++++++++++++++++++++++++++++++++++++++++++++++++---
 2 files changed, 91 insertions(+), 5 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index b06653ab3476..ffe0b9b669bf 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1129,6 +1129,9 @@ int bpf_obj_pin_user(u32 ufd, const char __user *pathname);
 int bpf_obj_get_user(const char __user *pathname, int flags);
 
 #define BPF_ITER_FUNC_PREFIX "__bpf_iter__"
+#define DEFINE_BPF_ITER_FUNC(target, args...)			\
+	extern int __bpf_iter__ ## target(args);		\
+	int __init __bpf_iter__ ## target(args) { return 0; }
 
 typedef int (*bpf_iter_init_seq_priv_t)(void *private_data);
 typedef void (*bpf_iter_fini_seq_priv_t)(void *private_data);
@@ -1141,12 +1144,20 @@ struct bpf_iter_reg {
 	u32 seq_priv_size;
 };
 
+struct bpf_iter_meta {
+	__bpf_md_ptr(struct seq_file *, seq);
+	u64 session_id;
+	u64 seq_num;
+};
+
 int bpf_iter_reg_target(struct bpf_iter_reg *reg_info);
 void bpf_iter_unreg_target(const char *target);
 bool bpf_iter_prog_supported(struct bpf_prog *prog);
 int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
 int bpf_iter_new_fd(struct bpf_link *link);
 bool bpf_link_is_iter(struct bpf_link *link);
+struct bpf_prog *bpf_iter_get_info(struct bpf_iter_meta *meta, bool in_stop);
+int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx);
 
 int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value);
 int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value);
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index 090f09b0eacb..30efd15cd4a0 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -41,6 +41,33 @@ static atomic64_t session_id;
 
 static int prepare_seq_file(struct file *file, struct bpf_iter_link *link);
 
+static void bpf_iter_inc_seq_num(struct seq_file *seq)
+{
+	struct bpf_iter_priv_data *iter_priv;
+
+	iter_priv = container_of(seq->private, struct bpf_iter_priv_data,
+				 target_private);
+	iter_priv->seq_num++;
+}
+
+static void bpf_iter_dec_seq_num(struct seq_file *seq)
+{
+	struct bpf_iter_priv_data *iter_priv;
+
+	iter_priv = container_of(seq->private, struct bpf_iter_priv_data,
+				 target_private);
+	iter_priv->seq_num--;
+}
+
+static void bpf_iter_done_stop(struct seq_file *seq)
+{
+	struct bpf_iter_priv_data *iter_priv;
+
+	iter_priv = container_of(seq->private, struct bpf_iter_priv_data,
+				 target_private);
+	iter_priv->done_stop = true;
+}
+
 /* bpf_seq_read, a customized and simpler version for bpf iterator.
  * no_llseek is assumed for this file.
  * The following are differences from seq_read():
@@ -93,6 +120,10 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
 
 	err = seq->op->show(seq, p);
 	if (err > 0) {
+		/* object is skipped, decrease seq_num, so next
+		 * valid object can reuse the same seq_num.
+		 */
+		bpf_iter_dec_seq_num(seq);
 		seq->count = 0;
 	} else if (err < 0 || seq_has_overflowed(seq)) {
 		if (!err)
@@ -117,11 +148,15 @@ static ssize_t bpf_seq_read(struct file *file, char __user *buf, size_t size,
 		if (IS_ERR_OR_NULL(p))
 			break;
 
+		/* got a valid next object, increase seq_num */
+		bpf_iter_inc_seq_num(seq);
+
 		if (seq->count >= size)
 			break;
 
 		err = seq->op->show(seq, p);
 		if (err > 0) {
+			bpf_iter_dec_seq_num(seq);
 			seq->count = offs;
 		} else if (err < 0 || seq_has_overflowed(seq)) {
 			seq->count = offs;
@@ -138,11 +173,15 @@ stop:
 	offs = seq->count;
 	/* bpf program called if !p */
 	seq->op->stop(seq, p);
-	if (!p && seq_has_overflowed(seq)) {
-		seq->count = offs;
-		if (offs == 0) {
-			err = -E2BIG;
-			goto done;
+	if (!p) {
+		if (!seq_has_overflowed(seq)) {
+			bpf_iter_done_stop(seq);
+		} else {
+			seq->count = offs;
+			if (offs == 0) {
+				err = -E2BIG;
+				goto done;
+			}
 		}
 	}
 
@@ -453,3 +492,39 @@ free_fd:
 	put_unused_fd(fd);
 	return err;
 }
+
+struct bpf_prog *bpf_iter_get_info(struct bpf_iter_meta *meta, bool in_stop)
+{
+	struct bpf_iter_priv_data *iter_priv;
+	struct seq_file *seq;
+	void *seq_priv;
+
+	seq = meta->seq;
+	if (seq->file->f_op != &bpf_iter_fops)
+		return NULL;
+
+	seq_priv = seq->private;
+	iter_priv = container_of(seq_priv, struct bpf_iter_priv_data,
+				 target_private);
+
+	if (in_stop && iter_priv->done_stop)
+		return NULL;
+
+	meta->session_id = iter_priv->session_id;
+	meta->seq_num = iter_priv->seq_num;
+
+	return iter_priv->prog;
+}
+
+int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
+{
+	int ret;
+
+	rcu_read_lock();
+	migrate_disable();
+	ret = BPF_PROG_RUN(prog, ctx);
+	migrate_enable();
+	rcu_read_unlock();
+
+	return ret == 0 ? 0 : -EAGAIN;
+}
-- 
cgit v1.2.3-59-g8ed1b


From 6086d29def80edd78f9832ea6eafa74e3818f6a7 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Sat, 9 May 2020 10:59:09 -0700
Subject: bpf: Add bpf_map iterator

Implement seq_file operations to traverse all bpf_maps.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200509175909.2476096-1-yhs@fb.com
---
 include/linux/bpf.h   |  1 +
 kernel/bpf/Makefile   |  2 +-
 kernel/bpf/map_iter.c | 97 +++++++++++++++++++++++++++++++++++++++++++++++++++
 kernel/bpf/syscall.c  | 19 ++++++++++
 4 files changed, 118 insertions(+), 1 deletion(-)
 create mode 100644 kernel/bpf/map_iter.c

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index ffe0b9b669bf..363ab0751967 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1082,6 +1082,7 @@ int  generic_map_update_batch(struct bpf_map *map,
 int  generic_map_delete_batch(struct bpf_map *map,
 			      const union bpf_attr *attr,
 			      union bpf_attr __user *uattr);
+struct bpf_map *bpf_map_get_curr_or_next(u32 *id);
 
 extern int sysctl_unprivileged_bpf_disabled;
 
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 6a8b0febd3f6..b2b5eefc5254 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -2,7 +2,7 @@
 obj-y := core.o
 CFLAGS_core.o += $(call cc-disable-warning, override-init)
 
-obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o
+obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o
 obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
 obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o
 obj-$(CONFIG_BPF_SYSCALL) += disasm.o
diff --git a/kernel/bpf/map_iter.c b/kernel/bpf/map_iter.c
new file mode 100644
index 000000000000..8162e0c00b9f
--- /dev/null
+++ b/kernel/bpf/map_iter.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2020 Facebook */
+#include <linux/bpf.h>
+#include <linux/fs.h>
+#include <linux/filter.h>
+#include <linux/kernel.h>
+
+struct bpf_iter_seq_map_info {
+	u32 mid;
+};
+
+static void *bpf_map_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	struct bpf_iter_seq_map_info *info = seq->private;
+	struct bpf_map *map;
+
+	map = bpf_map_get_curr_or_next(&info->mid);
+	if (!map)
+		return NULL;
+
+	++*pos;
+	return map;
+}
+
+static void *bpf_map_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct bpf_iter_seq_map_info *info = seq->private;
+	struct bpf_map *map;
+
+	++*pos;
+	++info->mid;
+	bpf_map_put((struct bpf_map *)v);
+	map = bpf_map_get_curr_or_next(&info->mid);
+	if (!map)
+		return NULL;
+
+	return map;
+}
+
+struct bpf_iter__bpf_map {
+	__bpf_md_ptr(struct bpf_iter_meta *, meta);
+	__bpf_md_ptr(struct bpf_map *, map);
+};
+
+DEFINE_BPF_ITER_FUNC(bpf_map, struct bpf_iter_meta *meta, struct bpf_map *map)
+
+static int __bpf_map_seq_show(struct seq_file *seq, void *v, bool in_stop)
+{
+	struct bpf_iter__bpf_map ctx;
+	struct bpf_iter_meta meta;
+	struct bpf_prog *prog;
+	int ret = 0;
+
+	ctx.meta = &meta;
+	ctx.map = v;
+	meta.seq = seq;
+	prog = bpf_iter_get_info(&meta, in_stop);
+	if (prog)
+		ret = bpf_iter_run_prog(prog, &ctx);
+
+	return ret;
+}
+
+static int bpf_map_seq_show(struct seq_file *seq, void *v)
+{
+	return __bpf_map_seq_show(seq, v, false);
+}
+
+static void bpf_map_seq_stop(struct seq_file *seq, void *v)
+{
+	if (!v)
+		(void)__bpf_map_seq_show(seq, v, true);
+	else
+		bpf_map_put((struct bpf_map *)v);
+}
+
+static const struct seq_operations bpf_map_seq_ops = {
+	.start	= bpf_map_seq_start,
+	.next	= bpf_map_seq_next,
+	.stop	= bpf_map_seq_stop,
+	.show	= bpf_map_seq_show,
+};
+
+static int __init bpf_map_iter_init(void)
+{
+	struct bpf_iter_reg reg_info = {
+		.target			= "bpf_map",
+		.seq_ops		= &bpf_map_seq_ops,
+		.init_seq_private	= NULL,
+		.fini_seq_private	= NULL,
+		.seq_priv_size		= sizeof(struct bpf_iter_seq_map_info),
+	};
+
+	return bpf_iter_reg_target(&reg_info);
+}
+
+late_initcall(bpf_map_iter_init);
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index a293e88ee01a..de2a75500233 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -2934,6 +2934,25 @@ static int bpf_obj_get_next_id(const union bpf_attr *attr,
 	return err;
 }
 
+struct bpf_map *bpf_map_get_curr_or_next(u32 *id)
+{
+	struct bpf_map *map;
+
+	spin_lock_bh(&map_idr_lock);
+again:
+	map = idr_get_next(&map_idr, id);
+	if (map) {
+		map = __bpf_map_inc_not_zero(map, false);
+		if (IS_ERR(map)) {
+			(*id)++;
+			goto again;
+		}
+	}
+	spin_unlock_bh(&map_idr_lock);
+
+	return map;
+}
+
 #define BPF_PROG_GET_FD_BY_ID_LAST_FIELD prog_id
 
 struct bpf_prog *bpf_prog_by_id(u32 id)
-- 
cgit v1.2.3-59-g8ed1b


From 138d0be35b141e09f6b267c6ae4094318d4e4491 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Sat, 9 May 2020 10:59:10 -0700
Subject: net: bpf: Add netlink and ipv6_route bpf_iter targets

This patch added netlink and ipv6_route targets, using
the same seq_ops (except show() and minor changes for stop())
for /proc/net/{netlink,ipv6_route}.

The net namespace for these targets are the current net
namespace at file open stage, similar to
/proc/net/{netlink,ipv6_route} reference counting
the net namespace at seq_file open stage.

Since module is not supported for now, ipv6_route is
supported only if the IPV6 is built-in, i.e., not compiled
as a module. The restriction can be lifted once module
is properly supported for bpf_iter.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200509175910.2476329-1-yhs@fb.com
---
 fs/proc/proc_net.c       | 19 +++++++++++
 include/linux/proc_fs.h  |  3 ++
 net/ipv6/ip6_fib.c       | 65 ++++++++++++++++++++++++++++++++++--
 net/ipv6/route.c         | 37 ++++++++++++++++++++
 net/netlink/af_netlink.c | 87 ++++++++++++++++++++++++++++++++++++++++++++++--
 5 files changed, 207 insertions(+), 4 deletions(-)

diff --git a/fs/proc/proc_net.c b/fs/proc/proc_net.c
index 4888c5224442..dba63b2429f0 100644
--- a/fs/proc/proc_net.c
+++ b/fs/proc/proc_net.c
@@ -98,6 +98,25 @@ static const struct proc_ops proc_net_seq_ops = {
 	.proc_release	= seq_release_net,
 };
 
+int bpf_iter_init_seq_net(void *priv_data)
+{
+#ifdef CONFIG_NET_NS
+	struct seq_net_private *p = priv_data;
+
+	p->net = get_net(current->nsproxy->net_ns);
+#endif
+	return 0;
+}
+
+void bpf_iter_fini_seq_net(void *priv_data)
+{
+#ifdef CONFIG_NET_NS
+	struct seq_net_private *p = priv_data;
+
+	put_net(p->net);
+#endif
+}
+
 struct proc_dir_entry *proc_create_net_data(const char *name, umode_t mode,
 		struct proc_dir_entry *parent, const struct seq_operations *ops,
 		unsigned int state_size, void *data)
diff --git a/include/linux/proc_fs.h b/include/linux/proc_fs.h
index 45c05fd9c99d..03953c59807d 100644
--- a/include/linux/proc_fs.h
+++ b/include/linux/proc_fs.h
@@ -105,6 +105,9 @@ struct proc_dir_entry *proc_create_net_single_write(const char *name, umode_t mo
 						    void *data);
 extern struct pid *tgid_pidfd_to_pid(const struct file *file);
 
+extern int bpf_iter_init_seq_net(void *priv_data);
+extern void bpf_iter_fini_seq_net(void *priv_data);
+
 #ifdef CONFIG_PROC_PID_ARCH_STATUS
 /*
  * The architecture which selects CONFIG_PROC_PID_ARCH_STATUS must
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index 46ed56719476..a1fcc0ca21af 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -2467,7 +2467,7 @@ void fib6_gc_cleanup(void)
 }
 
 #ifdef CONFIG_PROC_FS
-static int ipv6_route_seq_show(struct seq_file *seq, void *v)
+static int ipv6_route_native_seq_show(struct seq_file *seq, void *v)
 {
 	struct fib6_info *rt = v;
 	struct ipv6_route_iter *iter = seq->private;
@@ -2625,7 +2625,7 @@ static bool ipv6_route_iter_active(struct ipv6_route_iter *iter)
 	return w->node && !(w->state == FWS_U && w->node == w->root);
 }
 
-static void ipv6_route_seq_stop(struct seq_file *seq, void *v)
+static void ipv6_route_native_seq_stop(struct seq_file *seq, void *v)
 	__releases(RCU_BH)
 {
 	struct net *net = seq_file_net(seq);
@@ -2637,6 +2637,67 @@ static void ipv6_route_seq_stop(struct seq_file *seq, void *v)
 	rcu_read_unlock_bh();
 }
 
+#if IS_BUILTIN(CONFIG_IPV6) && defined(CONFIG_BPF_SYSCALL)
+struct bpf_iter__ipv6_route {
+	__bpf_md_ptr(struct bpf_iter_meta *, meta);
+	__bpf_md_ptr(struct fib6_info *, rt);
+};
+
+static int ipv6_route_prog_seq_show(struct bpf_prog *prog,
+				    struct bpf_iter_meta *meta,
+				    void *v)
+{
+	struct bpf_iter__ipv6_route ctx;
+
+	ctx.meta = meta;
+	ctx.rt = v;
+	return bpf_iter_run_prog(prog, &ctx);
+}
+
+static int ipv6_route_seq_show(struct seq_file *seq, void *v)
+{
+	struct ipv6_route_iter *iter = seq->private;
+	struct bpf_iter_meta meta;
+	struct bpf_prog *prog;
+	int ret;
+
+	meta.seq = seq;
+	prog = bpf_iter_get_info(&meta, false);
+	if (!prog)
+		return ipv6_route_native_seq_show(seq, v);
+
+	ret = ipv6_route_prog_seq_show(prog, &meta, v);
+	iter->w.leaf = NULL;
+
+	return ret;
+}
+
+static void ipv6_route_seq_stop(struct seq_file *seq, void *v)
+{
+	struct bpf_iter_meta meta;
+	struct bpf_prog *prog;
+
+	if (!v) {
+		meta.seq = seq;
+		prog = bpf_iter_get_info(&meta, true);
+		if (prog)
+			(void)ipv6_route_prog_seq_show(prog, &meta, v);
+	}
+
+	ipv6_route_native_seq_stop(seq, v);
+}
+#else
+static int ipv6_route_seq_show(struct seq_file *seq, void *v)
+{
+	return ipv6_route_native_seq_show(seq, v);
+}
+
+static void ipv6_route_seq_stop(struct seq_file *seq, void *v)
+{
+	ipv6_route_native_seq_stop(seq, v);
+}
+#endif
+
 const struct seq_operations ipv6_route_seq_ops = {
 	.start	= ipv6_route_seq_start,
 	.next	= ipv6_route_seq_next,
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 3912aac7854d..25f6d3e619d0 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -6393,6 +6393,30 @@ void __init ip6_route_init_special_entries(void)
   #endif
 }
 
+#if IS_BUILTIN(CONFIG_IPV6)
+#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
+DEFINE_BPF_ITER_FUNC(ipv6_route, struct bpf_iter_meta *meta, struct fib6_info *rt)
+
+static int __init bpf_iter_register(void)
+{
+	struct bpf_iter_reg reg_info = {
+		.target			= "ipv6_route",
+		.seq_ops		= &ipv6_route_seq_ops,
+		.init_seq_private	= bpf_iter_init_seq_net,
+		.fini_seq_private	= bpf_iter_fini_seq_net,
+		.seq_priv_size		= sizeof(struct ipv6_route_iter),
+	};
+
+	return bpf_iter_reg_target(&reg_info);
+}
+
+static void bpf_iter_unregister(void)
+{
+	bpf_iter_unreg_target("ipv6_route");
+}
+#endif
+#endif
+
 int __init ip6_route_init(void)
 {
 	int ret;
@@ -6455,6 +6479,14 @@ int __init ip6_route_init(void)
 	if (ret)
 		goto out_register_late_subsys;
 
+#if IS_BUILTIN(CONFIG_IPV6)
+#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
+	ret = bpf_iter_register();
+	if (ret)
+		goto out_register_late_subsys;
+#endif
+#endif
+
 	for_each_possible_cpu(cpu) {
 		struct uncached_list *ul = per_cpu_ptr(&rt6_uncached_list, cpu);
 
@@ -6487,6 +6519,11 @@ out_kmem_cache:
 
 void ip6_route_cleanup(void)
 {
+#if IS_BUILTIN(CONFIG_IPV6)
+#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
+	bpf_iter_unregister();
+#endif
+#endif
 	unregister_netdevice_notifier(&ip6_route_dev_notifier);
 	unregister_pernet_subsys(&ip6_route_net_late_ops);
 	fib6_rules_cleanup();
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 5ded01ca8b20..33cda9baa979 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2596,7 +2596,7 @@ static void *netlink_seq_next(struct seq_file *seq, void *v, loff_t *pos)
 	return __netlink_seq_next(seq);
 }
 
-static void netlink_seq_stop(struct seq_file *seq, void *v)
+static void netlink_native_seq_stop(struct seq_file *seq, void *v)
 {
 	struct nl_seq_iter *iter = seq->private;
 
@@ -2607,7 +2607,7 @@ static void netlink_seq_stop(struct seq_file *seq, void *v)
 }
 
 
-static int netlink_seq_show(struct seq_file *seq, void *v)
+static int netlink_native_seq_show(struct seq_file *seq, void *v)
 {
 	if (v == SEQ_START_TOKEN) {
 		seq_puts(seq,
@@ -2634,6 +2634,68 @@ static int netlink_seq_show(struct seq_file *seq, void *v)
 	return 0;
 }
 
+#ifdef CONFIG_BPF_SYSCALL
+struct bpf_iter__netlink {
+	__bpf_md_ptr(struct bpf_iter_meta *, meta);
+	__bpf_md_ptr(struct netlink_sock *, sk);
+};
+
+DEFINE_BPF_ITER_FUNC(netlink, struct bpf_iter_meta *meta, struct netlink_sock *sk)
+
+static int netlink_prog_seq_show(struct bpf_prog *prog,
+				  struct bpf_iter_meta *meta,
+				  void *v)
+{
+	struct bpf_iter__netlink ctx;
+
+	meta->seq_num--;  /* skip SEQ_START_TOKEN */
+	ctx.meta = meta;
+	ctx.sk = nlk_sk((struct sock *)v);
+	return bpf_iter_run_prog(prog, &ctx);
+}
+
+static int netlink_seq_show(struct seq_file *seq, void *v)
+{
+	struct bpf_iter_meta meta;
+	struct bpf_prog *prog;
+
+	meta.seq = seq;
+	prog = bpf_iter_get_info(&meta, false);
+	if (!prog)
+		return netlink_native_seq_show(seq, v);
+
+	if (v != SEQ_START_TOKEN)
+		return netlink_prog_seq_show(prog, &meta, v);
+
+	return 0;
+}
+
+static void netlink_seq_stop(struct seq_file *seq, void *v)
+{
+	struct bpf_iter_meta meta;
+	struct bpf_prog *prog;
+
+	if (!v) {
+		meta.seq = seq;
+		prog = bpf_iter_get_info(&meta, true);
+		if (prog)
+			(void)netlink_prog_seq_show(prog, &meta, v);
+	}
+
+	netlink_native_seq_stop(seq, v);
+}
+#else
+static int netlink_seq_show(struct seq_file *seq, void *v)
+{
+	return netlink_native_seq_show(seq, v);
+}
+
+static void netlink_seq_stop(struct seq_file *seq, void *v)
+{
+	netlink_native_seq_stop(seq, v);
+}
+#endif
+
 static const struct seq_operations netlink_seq_ops = {
 	.start  = netlink_seq_start,
 	.next   = netlink_seq_next,
@@ -2740,6 +2802,21 @@ static const struct rhashtable_params netlink_rhashtable_params = {
 	.automatic_shrinking = true,
 };
 
+#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
+static int __init bpf_iter_register(void)
+{
+	struct bpf_iter_reg reg_info = {
+		.target			= "netlink",
+		.seq_ops		= &netlink_seq_ops,
+		.init_seq_private	= bpf_iter_init_seq_net,
+		.fini_seq_private	= bpf_iter_fini_seq_net,
+		.seq_priv_size		= sizeof(struct nl_seq_iter),
+	};
+
+	return bpf_iter_reg_target(&reg_info);
+}
+#endif
+
 static int __init netlink_proto_init(void)
 {
 	int i;
@@ -2748,6 +2825,12 @@ static int __init netlink_proto_init(void)
 	if (err != 0)
 		goto out;
 
+#if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
+	err = bpf_iter_register();
+	if (err)
+		goto out;
+#endif
+
 	BUILD_BUG_ON(sizeof(struct netlink_skb_parms) > sizeof_field(struct sk_buff, cb));
 
 	nl_table = kcalloc(MAX_LINKS, sizeof(*nl_table), GFP_KERNEL);
-- 
cgit v1.2.3-59-g8ed1b


From eaaacd23910f2d7c4b22d43f591002cc217d294b Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Sat, 9 May 2020 10:59:11 -0700
Subject: bpf: Add task and task/file iterator targets

Only the tasks belonging to "current" pid namespace
are enumerated.

For task/file target, the bpf program will have access to
  struct task_struct *task
  u32 fd
  struct file *file
where fd/file is an open file for the task.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200509175911.2476407-1-yhs@fb.com
---
 kernel/bpf/Makefile    |   2 +-
 kernel/bpf/task_iter.c | 333 +++++++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 334 insertions(+), 1 deletion(-)
 create mode 100644 kernel/bpf/task_iter.c

diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index b2b5eefc5254..37b2d8620153 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -2,7 +2,7 @@
 obj-y := core.o
 CFLAGS_core.o += $(call cc-disable-warning, override-init)
 
-obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o
+obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o
 obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
 obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o
 obj-$(CONFIG_BPF_SYSCALL) += disasm.o
diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
new file mode 100644
index 000000000000..aeed662d8451
--- /dev/null
+++ b/kernel/bpf/task_iter.c
@@ -0,0 +1,333 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* Copyright (c) 2020 Facebook */
+
+#include <linux/init.h>
+#include <linux/namei.h>
+#include <linux/pid_namespace.h>
+#include <linux/fs.h>
+#include <linux/fdtable.h>
+#include <linux/filter.h>
+
+struct bpf_iter_seq_task_common {
+	struct pid_namespace *ns;
+};
+
+struct bpf_iter_seq_task_info {
+	/* The first field must be struct bpf_iter_seq_task_common.
+	 * this is assumed by {init, fini}_seq_pidns() callback functions.
+	 */
+	struct bpf_iter_seq_task_common common;
+	u32 tid;
+};
+
+static struct task_struct *task_seq_get_next(struct pid_namespace *ns,
+					     u32 *tid)
+{
+	struct task_struct *task = NULL;
+	struct pid *pid;
+
+	rcu_read_lock();
+	pid = idr_get_next(&ns->idr, tid);
+	if (pid)
+		task = get_pid_task(pid, PIDTYPE_PID);
+	rcu_read_unlock();
+
+	return task;
+}
+
+static void *task_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	struct bpf_iter_seq_task_info *info = seq->private;
+	struct task_struct *task;
+
+	task = task_seq_get_next(info->common.ns, &info->tid);
+	if (!task)
+		return NULL;
+
+	++*pos;
+	return task;
+}
+
+static void *task_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct bpf_iter_seq_task_info *info = seq->private;
+	struct task_struct *task;
+
+	++*pos;
+	++info->tid;
+	put_task_struct((struct task_struct *)v);
+	task = task_seq_get_next(info->common.ns, &info->tid);
+	if (!task)
+		return NULL;
+
+	return task;
+}
+
+struct bpf_iter__task {
+	__bpf_md_ptr(struct bpf_iter_meta *, meta);
+	__bpf_md_ptr(struct task_struct *, task);
+};
+
+DEFINE_BPF_ITER_FUNC(task, struct bpf_iter_meta *meta, struct task_struct *task)
+
+static int __task_seq_show(struct seq_file *seq, struct task_struct *task,
+			   bool in_stop)
+{
+	struct bpf_iter_meta meta;
+	struct bpf_iter__task ctx;
+	struct bpf_prog *prog;
+
+	meta.seq = seq;
+	prog = bpf_iter_get_info(&meta, in_stop);
+	if (!prog)
+		return 0;
+
+	meta.seq = seq;
+	ctx.meta = &meta;
+	ctx.task = task;
+	return bpf_iter_run_prog(prog, &ctx);
+}
+
+static int task_seq_show(struct seq_file *seq, void *v)
+{
+	return __task_seq_show(seq, v, false);
+}
+
+static void task_seq_stop(struct seq_file *seq, void *v)
+{
+	if (!v)
+		(void)__task_seq_show(seq, v, true);
+	else
+		put_task_struct((struct task_struct *)v);
+}
+
+static const struct seq_operations task_seq_ops = {
+	.start	= task_seq_start,
+	.next	= task_seq_next,
+	.stop	= task_seq_stop,
+	.show	= task_seq_show,
+};
+
+struct bpf_iter_seq_task_file_info {
+	/* The first field must be struct bpf_iter_seq_task_common.
+	 * this is assumed by {init, fini}_seq_pidns() callback functions.
+	 */
+	struct bpf_iter_seq_task_common common;
+	struct task_struct *task;
+	struct files_struct *files;
+	u32 tid;
+	u32 fd;
+};
+
+static struct file *
+task_file_seq_get_next(struct bpf_iter_seq_task_file_info *info,
+		       struct task_struct **task, struct files_struct **fstruct)
+{
+	struct pid_namespace *ns = info->common.ns;
+	u32 curr_tid = info->tid, max_fds;
+	struct files_struct *curr_files;
+	struct task_struct *curr_task;
+	int curr_fd = info->fd;
+
+	/* If this function returns a non-NULL file object,
+	 * it held a reference to the task/files_struct/file.
+	 * Otherwise, it does not hold any reference.
+	 */
+again:
+	if (*task) {
+		curr_task = *task;
+		curr_files = *fstruct;
+		curr_fd = info->fd;
+	} else {
+		curr_task = task_seq_get_next(ns, &curr_tid);
+		if (!curr_task)
+			return NULL;
+
+		curr_files = get_files_struct(curr_task);
+		if (!curr_files) {
+			put_task_struct(curr_task);
+			curr_tid = ++(info->tid);
+			info->fd = 0;
+			goto again;
+		}
+
+		/* set *fstruct, *task and info->tid */
+		*fstruct = curr_files;
+		*task = curr_task;
+		if (curr_tid == info->tid) {
+			curr_fd = info->fd;
+		} else {
+			info->tid = curr_tid;
+			curr_fd = 0;
+		}
+	}
+
+	rcu_read_lock();
+	max_fds = files_fdtable(curr_files)->max_fds;
+	for (; curr_fd < max_fds; curr_fd++) {
+		struct file *f;
+
+		f = fcheck_files(curr_files, curr_fd);
+		if (!f)
+			continue;
+
+		/* set info->fd */
+		info->fd = curr_fd;
+		get_file(f);
+		rcu_read_unlock();
+		return f;
+	}
+
+	/* the current task is done, go to the next task */
+	rcu_read_unlock();
+	put_files_struct(curr_files);
+	put_task_struct(curr_task);
+	*task = NULL;
+	*fstruct = NULL;
+	info->fd = 0;
+	curr_tid = ++(info->tid);
+	goto again;
+}
+
+static void *task_file_seq_start(struct seq_file *seq, loff_t *pos)
+{
+	struct bpf_iter_seq_task_file_info *info = seq->private;
+	struct files_struct *files = NULL;
+	struct task_struct *task = NULL;
+	struct file *file;
+
+	file = task_file_seq_get_next(info, &task, &files);
+	if (!file) {
+		info->files = NULL;
+		info->task = NULL;
+		return NULL;
+	}
+
+	++*pos;
+	info->task = task;
+	info->files = files;
+
+	return file;
+}
+
+static void *task_file_seq_next(struct seq_file *seq, void *v, loff_t *pos)
+{
+	struct bpf_iter_seq_task_file_info *info = seq->private;
+	struct files_struct *files = info->files;
+	struct task_struct *task = info->task;
+	struct file *file;
+
+	++*pos;
+	++info->fd;
+	fput((struct file *)v);
+	file = task_file_seq_get_next(info, &task, &files);
+	if (!file) {
+		info->files = NULL;
+		info->task = NULL;
+		return NULL;
+	}
+
+	info->task = task;
+	info->files = files;
+
+	return file;
+}
+
+struct bpf_iter__task_file {
+	__bpf_md_ptr(struct bpf_iter_meta *, meta);
+	__bpf_md_ptr(struct task_struct *, task);
+	u32 fd __aligned(8);
+	__bpf_md_ptr(struct file *, file);
+};
+
+DEFINE_BPF_ITER_FUNC(task_file, struct bpf_iter_meta *meta,
+		     struct task_struct *task, u32 fd,
+		     struct file *file)
+
+static int __task_file_seq_show(struct seq_file *seq, struct file *file,
+				bool in_stop)
+{
+	struct bpf_iter_seq_task_file_info *info = seq->private;
+	struct bpf_iter__task_file ctx;
+	struct bpf_iter_meta meta;
+	struct bpf_prog *prog;
+
+	meta.seq = seq;
+	prog = bpf_iter_get_info(&meta, in_stop);
+	if (!prog)
+		return 0;
+
+	ctx.meta = &meta;
+	ctx.task = info->task;
+	ctx.fd = info->fd;
+	ctx.file = file;
+	return bpf_iter_run_prog(prog, &ctx);
+}
+
+static int task_file_seq_show(struct seq_file *seq, void *v)
+{
+	return __task_file_seq_show(seq, v, false);
+}
+
+static void task_file_seq_stop(struct seq_file *seq, void *v)
+{
+	struct bpf_iter_seq_task_file_info *info = seq->private;
+
+	if (!v) {
+		(void)__task_file_seq_show(seq, v, true);
+	} else {
+		fput((struct file *)v);
+		put_files_struct(info->files);
+		put_task_struct(info->task);
+		info->files = NULL;
+		info->task = NULL;
+	}
+}
+
+static int init_seq_pidns(void *priv_data)
+{
+	struct bpf_iter_seq_task_common *common = priv_data;
+
+	common->ns = get_pid_ns(task_active_pid_ns(current));
+	return 0;
+}
+
+static void fini_seq_pidns(void *priv_data)
+{
+	struct bpf_iter_seq_task_common *common = priv_data;
+
+	put_pid_ns(common->ns);
+}
+
+static const struct seq_operations task_file_seq_ops = {
+	.start	= task_file_seq_start,
+	.next	= task_file_seq_next,
+	.stop	= task_file_seq_stop,
+	.show	= task_file_seq_show,
+};
+
+static int __init task_iter_init(void)
+{
+	struct bpf_iter_reg task_file_reg_info = {
+		.target			= "task_file",
+		.seq_ops		= &task_file_seq_ops,
+		.init_seq_private	= init_seq_pidns,
+		.fini_seq_private	= fini_seq_pidns,
+		.seq_priv_size		= sizeof(struct bpf_iter_seq_task_file_info),
+	};
+	struct bpf_iter_reg task_reg_info = {
+		.target			= "task",
+		.seq_ops		= &task_seq_ops,
+		.init_seq_private	= init_seq_pidns,
+		.fini_seq_private	= fini_seq_pidns,
+		.seq_priv_size		= sizeof(struct bpf_iter_seq_task_info),
+	};
+	int ret;
+
+	ret = bpf_iter_reg_target(&task_reg_info);
+	if (ret)
+		return ret;
+
+	return bpf_iter_reg_target(&task_file_reg_info);
+}
+late_initcall(task_iter_init);
-- 
cgit v1.2.3-59-g8ed1b


From b121b341e5983bdccf7a5d6cf9236a45c965a31f Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Sat, 9 May 2020 10:59:12 -0700
Subject: bpf: Add PTR_TO_BTF_ID_OR_NULL support

Add bpf_reg_type PTR_TO_BTF_ID_OR_NULL support.
For tracing/iter program, the bpf program context
definition, e.g., for previous bpf_map target, looks like
  struct bpf_iter__bpf_map {
    struct bpf_iter_meta *meta;
    struct bpf_map *map;
  };

The kernel guarantees that meta is not NULL, but
map pointer maybe NULL. The NULL map indicates that all
objects have been traversed, so bpf program can take
proper action, e.g., do final aggregation and/or send
final report to user space.

Add btf_id_or_null_non0_off to prog->aux structure, to
indicate that if the context access offset is not 0,
set to PTR_TO_BTF_ID_OR_NULL instead of PTR_TO_BTF_ID.
This bit is set for tracing/iter program.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200509175912.2476576-1-yhs@fb.com
---
 include/linux/bpf.h   |  2 ++
 kernel/bpf/btf.c      |  5 ++++-
 kernel/bpf/verifier.c | 16 ++++++++++++----
 3 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 363ab0751967..cf4b6e44f2bc 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -320,6 +320,7 @@ enum bpf_reg_type {
 	PTR_TO_TP_BUFFER,	 /* reg points to a writable raw tp's buffer */
 	PTR_TO_XDP_SOCK,	 /* reg points to struct xdp_sock */
 	PTR_TO_BTF_ID,		 /* reg points to kernel struct */
+	PTR_TO_BTF_ID_OR_NULL,	 /* reg points to kernel struct or NULL */
 };
 
 /* The information passed from prog-specific *_is_valid_access
@@ -658,6 +659,7 @@ struct bpf_prog_aux {
 	bool offload_requested;
 	bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */
 	bool func_proto_unreliable;
+	bool btf_id_or_null_non0_off;
 	enum bpf_tramp_prog_type trampoline_prog_type;
 	struct bpf_trampoline *trampoline;
 	struct hlist_node tramp_hlist;
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index a2cfba89a8e1..c490fbde22d4 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3790,7 +3790,10 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
 		return true;
 
 	/* this is a pointer to another type */
-	info->reg_type = PTR_TO_BTF_ID;
+	if (off != 0 && prog->aux->btf_id_or_null_non0_off)
+		info->reg_type = PTR_TO_BTF_ID_OR_NULL;
+	else
+		info->reg_type = PTR_TO_BTF_ID;
 
 	if (tgt_prog) {
 		ret = btf_translate_to_vmlinux(log, btf, t, tgt_prog->type, arg);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index d725ff7d11db..36b2a38a06fe 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -398,7 +398,8 @@ static bool reg_type_may_be_null(enum bpf_reg_type type)
 	return type == PTR_TO_MAP_VALUE_OR_NULL ||
 	       type == PTR_TO_SOCKET_OR_NULL ||
 	       type == PTR_TO_SOCK_COMMON_OR_NULL ||
-	       type == PTR_TO_TCP_SOCK_OR_NULL;
+	       type == PTR_TO_TCP_SOCK_OR_NULL ||
+	       type == PTR_TO_BTF_ID_OR_NULL;
 }
 
 static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
@@ -483,6 +484,7 @@ static const char * const reg_type_str[] = {
 	[PTR_TO_TP_BUFFER]	= "tp_buffer",
 	[PTR_TO_XDP_SOCK]	= "xdp_sock",
 	[PTR_TO_BTF_ID]		= "ptr_",
+	[PTR_TO_BTF_ID_OR_NULL]	= "ptr_or_null_",
 };
 
 static char slot_type_char[] = {
@@ -543,7 +545,7 @@ static void print_verifier_state(struct bpf_verifier_env *env,
 			/* reg->off should be 0 for SCALAR_VALUE */
 			verbose(env, "%lld", reg->var_off.value + reg->off);
 		} else {
-			if (t == PTR_TO_BTF_ID)
+			if (t == PTR_TO_BTF_ID || t == PTR_TO_BTF_ID_OR_NULL)
 				verbose(env, "%s", kernel_type_name(reg->btf_id));
 			verbose(env, "(id=%d", reg->id);
 			if (reg_type_may_be_refcounted_or_null(t))
@@ -2139,6 +2141,7 @@ static bool is_spillable_regtype(enum bpf_reg_type type)
 	case PTR_TO_TCP_SOCK_OR_NULL:
 	case PTR_TO_XDP_SOCK:
 	case PTR_TO_BTF_ID:
+	case PTR_TO_BTF_ID_OR_NULL:
 		return true;
 	default:
 		return false;
@@ -2659,7 +2662,7 @@ static int check_ctx_access(struct bpf_verifier_env *env, int insn_idx, int off,
 		 */
 		*reg_type = info.reg_type;
 
-		if (*reg_type == PTR_TO_BTF_ID)
+		if (*reg_type == PTR_TO_BTF_ID || *reg_type == PTR_TO_BTF_ID_OR_NULL)
 			*btf_id = info.btf_id;
 		else
 			env->insn_aux_data[insn_idx].ctx_field_size = info.ctx_field_size;
@@ -3243,7 +3246,8 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 				 * a sub-register.
 				 */
 				regs[value_regno].subreg_def = DEF_NOT_SUBREG;
-				if (reg_type == PTR_TO_BTF_ID)
+				if (reg_type == PTR_TO_BTF_ID ||
+				    reg_type == PTR_TO_BTF_ID_OR_NULL)
 					regs[value_regno].btf_id = btf_id;
 			}
 			regs[value_regno].type = reg_type;
@@ -6572,6 +6576,8 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
 			reg->type = PTR_TO_SOCK_COMMON;
 		} else if (reg->type == PTR_TO_TCP_SOCK_OR_NULL) {
 			reg->type = PTR_TO_TCP_SOCK;
+		} else if (reg->type == PTR_TO_BTF_ID_OR_NULL) {
+			reg->type = PTR_TO_BTF_ID;
 		}
 		if (is_null) {
 			/* We don't need id and ref_obj_id from this point
@@ -8429,6 +8435,7 @@ static bool reg_type_mismatch_ok(enum bpf_reg_type type)
 	case PTR_TO_TCP_SOCK_OR_NULL:
 	case PTR_TO_XDP_SOCK:
 	case PTR_TO_BTF_ID:
+	case PTR_TO_BTF_ID_OR_NULL:
 		return false;
 	default:
 		return true;
@@ -10640,6 +10647,7 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
 		prog->aux->attach_func_proto = t;
 		if (!bpf_iter_prog_supported(prog))
 			return -EINVAL;
+		prog->aux->btf_id_or_null_non0_off = true;
 		ret = btf_distill_func_proto(&env->log, btf, t,
 					     tname, &fmodel);
 		return ret;
-- 
cgit v1.2.3-59-g8ed1b


From 492e639f0c222784e2e0f121966375f641c61b15 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Sat, 9 May 2020 10:59:14 -0700
Subject: bpf: Add bpf_seq_printf and bpf_seq_write helpers

Two helpers bpf_seq_printf and bpf_seq_write, are added for
writing data to the seq_file buffer.

bpf_seq_printf supports common format string flag/width/type
fields so at least I can get identical results for
netlink and ipv6_route targets.

For bpf_seq_printf and bpf_seq_write, return value -EOVERFLOW
specifically indicates a write failure due to overflow, which
means the object will be repeated in the next bpf invocation
if object collection stays the same. Note that if the object
collection is changed, depending how collection traversal is
done, even if the object still in the collection, it may not
be visited.

For bpf_seq_printf, format %s, %p{i,I}{4,6} needs to
read kernel memory. Reading kernel memory may fail in
the following two cases:
  - invalid kernel address, or
  - valid kernel address but requiring a major fault
If reading kernel memory failed, the %s string will be
an empty string and %p{i,I}{4,6} will be all 0.
Not returning error to bpf program is consistent with
what bpf_trace_printk() does for now.

bpf_seq_printf may return -EBUSY meaning that internal percpu
buffer for memory copy of strings or other pointees is
not available. Bpf program can return 1 to indicate it
wants the same object to be repeated. Right now, this should not
happen on no-RT kernels since migrate_disable(), which guards
bpf prog call, calls preempt_disable().

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200509175914.2476661-1-yhs@fb.com
---
 include/uapi/linux/bpf.h       |  39 +++++++-
 kernel/trace/bpf_trace.c       | 214 +++++++++++++++++++++++++++++++++++++++++
 scripts/bpf_helpers_doc.py     |   2 +
 tools/include/uapi/linux/bpf.h |  39 +++++++-
 4 files changed, 292 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 708763f702e1..9d1932e23cec 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3077,6 +3077,41 @@ union bpf_attr {
  * 		See: clock_gettime(CLOCK_BOOTTIME)
  * 	Return
  * 		Current *ktime*.
+ *
+ * int bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len)
+ * 	Description
+ * 		seq_printf uses seq_file seq_printf() to print out the format string.
+ * 		The *m* represents the seq_file. The *fmt* and *fmt_size* are for
+ * 		the format string itself. The *data* and *data_len* are format string
+ * 		arguments. The *data* are a u64 array and corresponding format string
+ * 		values are stored in the array. For strings and pointers where pointees
+ * 		are accessed, only the pointer values are stored in the *data* array.
+ * 		The *data_len* is the *data* size in term of bytes.
+ *
+ *		Formats **%s**, **%p{i,I}{4,6}** requires to read kernel memory.
+ *		Reading kernel memory may fail due to either invalid address or
+ *		valid address but requiring a major memory fault. If reading kernel memory
+ *		fails, the string for **%s** will be an empty string, and the ip
+ *		address for **%p{i,I}{4,6}** will be 0. Not returning error to
+ *		bpf program is consistent with what bpf_trace_printk() does for now.
+ * 	Return
+ * 		0 on success, or a negative errno in case of failure.
+ *
+ *		* **-EBUSY**		Percpu memory copy buffer is busy, can try again
+ *					by returning 1 from bpf program.
+ *		* **-EINVAL**		Invalid arguments, or invalid/unsupported formats.
+ *		* **-E2BIG**		Too many format specifiers.
+ *		* **-EOVERFLOW**	Overflow happens, the same object will be tried again.
+ *
+ * int bpf_seq_write(struct seq_file *m, const void *data, u32 len)
+ * 	Description
+ * 		seq_write uses seq_file seq_write() to write the data.
+ * 		The *m* represents the seq_file. The *data* and *len* represent the
+ *		data to write in bytes.
+ * 	Return
+ * 		0 on success, or a negative errno in case of failure.
+ *
+ *		* **-EOVERFLOW**	Overflow happens, the same object will be tried again.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3204,7 +3239,9 @@ union bpf_attr {
 	FN(get_netns_cookie),		\
 	FN(get_current_ancestor_cgroup_id),	\
 	FN(sk_assign),			\
-	FN(ktime_get_boot_ns),
+	FN(ktime_get_boot_ns),		\
+	FN(seq_printf),			\
+	FN(seq_write),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index e875c95d3ced..d961428fb5b6 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -457,6 +457,212 @@ const struct bpf_func_proto *bpf_get_trace_printk_proto(void)
 	return &bpf_trace_printk_proto;
 }
 
+#define MAX_SEQ_PRINTF_VARARGS		12
+#define MAX_SEQ_PRINTF_MAX_MEMCPY	6
+#define MAX_SEQ_PRINTF_STR_LEN		128
+
+struct bpf_seq_printf_buf {
+	char buf[MAX_SEQ_PRINTF_MAX_MEMCPY][MAX_SEQ_PRINTF_STR_LEN];
+};
+static DEFINE_PER_CPU(struct bpf_seq_printf_buf, bpf_seq_printf_buf);
+static DEFINE_PER_CPU(int, bpf_seq_printf_buf_used);
+
+BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size,
+	   const void *, data, u32, data_len)
+{
+	int err = -EINVAL, fmt_cnt = 0, memcpy_cnt = 0;
+	int i, buf_used, copy_size, num_args;
+	u64 params[MAX_SEQ_PRINTF_VARARGS];
+	struct bpf_seq_printf_buf *bufs;
+	const u64 *args = data;
+
+	buf_used = this_cpu_inc_return(bpf_seq_printf_buf_used);
+	if (WARN_ON_ONCE(buf_used > 1)) {
+		err = -EBUSY;
+		goto out;
+	}
+
+	bufs = this_cpu_ptr(&bpf_seq_printf_buf);
+
+	/*
+	 * bpf_check()->check_func_arg()->check_stack_boundary()
+	 * guarantees that fmt points to bpf program stack,
+	 * fmt_size bytes of it were initialized and fmt_size > 0
+	 */
+	if (fmt[--fmt_size] != 0)
+		goto out;
+
+	if (data_len & 7)
+		goto out;
+
+	for (i = 0; i < fmt_size; i++) {
+		if (fmt[i] == '%') {
+			if (fmt[i + 1] == '%')
+				i++;
+			else if (!data || !data_len)
+				goto out;
+		}
+	}
+
+	num_args = data_len / 8;
+
+	/* check format string for allowed specifiers */
+	for (i = 0; i < fmt_size; i++) {
+		/* only printable ascii for now. */
+		if ((!isprint(fmt[i]) && !isspace(fmt[i])) || !isascii(fmt[i])) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		if (fmt[i] != '%')
+			continue;
+
+		if (fmt[i + 1] == '%') {
+			i++;
+			continue;
+		}
+
+		if (fmt_cnt >= MAX_SEQ_PRINTF_VARARGS) {
+			err = -E2BIG;
+			goto out;
+		}
+
+		if (fmt_cnt >= num_args) {
+			err = -EINVAL;
+			goto out;
+		}
+
+		/* fmt[i] != 0 && fmt[last] == 0, so we can access fmt[i + 1] */
+		i++;
+
+		/* skip optional "[0 +-][num]" width formating field */
+		while (fmt[i] == '0' || fmt[i] == '+'  || fmt[i] == '-' ||
+		       fmt[i] == ' ')
+			i++;
+		if (fmt[i] >= '1' && fmt[i] <= '9') {
+			i++;
+			while (fmt[i] >= '0' && fmt[i] <= '9')
+				i++;
+		}
+
+		if (fmt[i] == 's') {
+			/* try our best to copy */
+			if (memcpy_cnt >= MAX_SEQ_PRINTF_MAX_MEMCPY) {
+				err = -E2BIG;
+				goto out;
+			}
+
+			err = strncpy_from_unsafe(bufs->buf[memcpy_cnt],
+						  (void *) (long) args[fmt_cnt],
+						  MAX_SEQ_PRINTF_STR_LEN);
+			if (err < 0)
+				bufs->buf[memcpy_cnt][0] = '\0';
+			params[fmt_cnt] = (u64)(long)bufs->buf[memcpy_cnt];
+
+			fmt_cnt++;
+			memcpy_cnt++;
+			continue;
+		}
+
+		if (fmt[i] == 'p') {
+			if (fmt[i + 1] == 0 ||
+			    fmt[i + 1] == 'K' ||
+			    fmt[i + 1] == 'x') {
+				/* just kernel pointers */
+				params[fmt_cnt] = args[fmt_cnt];
+				fmt_cnt++;
+				continue;
+			}
+
+			/* only support "%pI4", "%pi4", "%pI6" and "%pi6". */
+			if (fmt[i + 1] != 'i' && fmt[i + 1] != 'I') {
+				err = -EINVAL;
+				goto out;
+			}
+			if (fmt[i + 2] != '4' && fmt[i + 2] != '6') {
+				err = -EINVAL;
+				goto out;
+			}
+
+			if (memcpy_cnt >= MAX_SEQ_PRINTF_MAX_MEMCPY) {
+				err = -E2BIG;
+				goto out;
+			}
+
+
+			copy_size = (fmt[i + 2] == '4') ? 4 : 16;
+
+			err = probe_kernel_read(bufs->buf[memcpy_cnt],
+						(void *) (long) args[fmt_cnt],
+						copy_size);
+			if (err < 0)
+				memset(bufs->buf[memcpy_cnt], 0, copy_size);
+			params[fmt_cnt] = (u64)(long)bufs->buf[memcpy_cnt];
+
+			i += 2;
+			fmt_cnt++;
+			memcpy_cnt++;
+			continue;
+		}
+
+		if (fmt[i] == 'l') {
+			i++;
+			if (fmt[i] == 'l')
+				i++;
+		}
+
+		if (fmt[i] != 'i' && fmt[i] != 'd' &&
+		    fmt[i] != 'u' && fmt[i] != 'x') {
+			err = -EINVAL;
+			goto out;
+		}
+
+		params[fmt_cnt] = args[fmt_cnt];
+		fmt_cnt++;
+	}
+
+	/* Maximumly we can have MAX_SEQ_PRINTF_VARARGS parameter, just give
+	 * all of them to seq_printf().
+	 */
+	seq_printf(m, fmt, params[0], params[1], params[2], params[3],
+		   params[4], params[5], params[6], params[7], params[8],
+		   params[9], params[10], params[11]);
+
+	err = seq_has_overflowed(m) ? -EOVERFLOW : 0;
+out:
+	this_cpu_dec(bpf_seq_printf_buf_used);
+	return err;
+}
+
+static int bpf_seq_printf_btf_ids[5];
+static const struct bpf_func_proto bpf_seq_printf_proto = {
+	.func		= bpf_seq_printf,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_BTF_ID,
+	.arg2_type	= ARG_PTR_TO_MEM,
+	.arg3_type	= ARG_CONST_SIZE,
+	.arg4_type      = ARG_PTR_TO_MEM_OR_NULL,
+	.arg5_type      = ARG_CONST_SIZE_OR_ZERO,
+	.btf_id		= bpf_seq_printf_btf_ids,
+};
+
+BPF_CALL_3(bpf_seq_write, struct seq_file *, m, const void *, data, u32, len)
+{
+	return seq_write(m, data, len) ? -EOVERFLOW : 0;
+}
+
+static int bpf_seq_write_btf_ids[5];
+static const struct bpf_func_proto bpf_seq_write_proto = {
+	.func		= bpf_seq_write,
+	.gpl_only	= true,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_BTF_ID,
+	.arg2_type	= ARG_PTR_TO_MEM,
+	.arg3_type	= ARG_CONST_SIZE_OR_ZERO,
+	.btf_id		= bpf_seq_write_btf_ids,
+};
+
 static __always_inline int
 get_map_perf_counter(struct bpf_map *map, u64 flags,
 		     u64 *value, u64 *enabled, u64 *running)
@@ -1226,6 +1432,14 @@ tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	case BPF_FUNC_xdp_output:
 		return &bpf_xdp_output_proto;
 #endif
+	case BPF_FUNC_seq_printf:
+		return prog->expected_attach_type == BPF_TRACE_ITER ?
+		       &bpf_seq_printf_proto :
+		       NULL;
+	case BPF_FUNC_seq_write:
+		return prog->expected_attach_type == BPF_TRACE_ITER ?
+		       &bpf_seq_write_proto :
+		       NULL;
 	default:
 		return raw_tp_prog_func_proto(func_id, prog);
 	}
diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py
index f43d193aff3a..ded304c96a05 100755
--- a/scripts/bpf_helpers_doc.py
+++ b/scripts/bpf_helpers_doc.py
@@ -414,6 +414,7 @@ class PrinterHelpers(Printer):
             'struct sk_reuseport_md',
             'struct sockaddr',
             'struct tcphdr',
+            'struct seq_file',
 
             'struct __sk_buff',
             'struct sk_msg_md',
@@ -450,6 +451,7 @@ class PrinterHelpers(Printer):
             'struct sk_reuseport_md',
             'struct sockaddr',
             'struct tcphdr',
+            'struct seq_file',
     }
     mapped_types = {
             'u8': '__u8',
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 708763f702e1..9d1932e23cec 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3077,6 +3077,41 @@ union bpf_attr {
  * 		See: clock_gettime(CLOCK_BOOTTIME)
  * 	Return
  * 		Current *ktime*.
+ *
+ * int bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len)
+ * 	Description
+ * 		seq_printf uses seq_file seq_printf() to print out the format string.
+ * 		The *m* represents the seq_file. The *fmt* and *fmt_size* are for
+ * 		the format string itself. The *data* and *data_len* are format string
+ * 		arguments. The *data* are a u64 array and corresponding format string
+ * 		values are stored in the array. For strings and pointers where pointees
+ * 		are accessed, only the pointer values are stored in the *data* array.
+ * 		The *data_len* is the *data* size in term of bytes.
+ *
+ *		Formats **%s**, **%p{i,I}{4,6}** requires to read kernel memory.
+ *		Reading kernel memory may fail due to either invalid address or
+ *		valid address but requiring a major memory fault. If reading kernel memory
+ *		fails, the string for **%s** will be an empty string, and the ip
+ *		address for **%p{i,I}{4,6}** will be 0. Not returning error to
+ *		bpf program is consistent with what bpf_trace_printk() does for now.
+ * 	Return
+ * 		0 on success, or a negative errno in case of failure.
+ *
+ *		* **-EBUSY**		Percpu memory copy buffer is busy, can try again
+ *					by returning 1 from bpf program.
+ *		* **-EINVAL**		Invalid arguments, or invalid/unsupported formats.
+ *		* **-E2BIG**		Too many format specifiers.
+ *		* **-EOVERFLOW**	Overflow happens, the same object will be tried again.
+ *
+ * int bpf_seq_write(struct seq_file *m, const void *data, u32 len)
+ * 	Description
+ * 		seq_write uses seq_file seq_write() to write the data.
+ * 		The *m* represents the seq_file. The *data* and *len* represent the
+ *		data to write in bytes.
+ * 	Return
+ * 		0 on success, or a negative errno in case of failure.
+ *
+ *		* **-EOVERFLOW**	Overflow happens, the same object will be tried again.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3204,7 +3239,9 @@ union bpf_attr {
 	FN(get_netns_cookie),		\
 	FN(get_current_ancestor_cgroup_id),	\
 	FN(sk_assign),			\
-	FN(ktime_get_boot_ns),
+	FN(ktime_get_boot_ns),		\
+	FN(seq_printf),			\
+	FN(seq_write),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
-- 
cgit v1.2.3-59-g8ed1b


From 1d68f22b3d53d368d5cc8d09de890250cae5c945 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Sat, 9 May 2020 10:59:15 -0700
Subject: bpf: Handle spilled PTR_TO_BTF_ID properly when checking
 stack_boundary

This specifically to handle the case like below:
   // ptr below is a socket ptr identified by PTR_TO_BTF_ID
   u64 param[2] = { ptr, val };
   bpf_seq_printf(seq, fmt, sizeof(fmt), param, sizeof(param));

In this case, the 16 bytes stack for "param" contains:
   8 bytes for ptr with spilled PTR_TO_BTF_ID
   8 bytes for val as STACK_MISC

The current verifier will complain the ptr should not be visible
to the helper.
   ...
   16: (7b) *(u64 *)(r10 -64) = r2
   18: (7b) *(u64 *)(r10 -56) = r1
   19: (bf) r4 = r10
   ;
   20: (07) r4 += -64
   ; BPF_SEQ_PRINTF(seq, fmt1, (long)s, s->sk_protocol);
   21: (bf) r1 = r6
   22: (18) r2 = 0xffffa8d00018605a
   24: (b4) w3 = 10
   25: (b4) w5 = 16
   26: (85) call bpf_seq_printf#125
    R0=inv(id=0) R1_w=ptr_seq_file(id=0,off=0,imm=0)
    R2_w=map_value(id=0,off=90,ks=4,vs=144,imm=0) R3_w=inv10
    R4_w=fp-64 R5_w=inv16 R6=ptr_seq_file(id=0,off=0,imm=0)
    R7=ptr_netlink_sock(id=0,off=0,imm=0) R10=fp0 fp-56_w=mmmmmmmm
    fp-64_w=ptr_
   last_idx 26 first_idx 13
   regs=8 stack=0 before 25: (b4) w5 = 16
   regs=8 stack=0 before 24: (b4) w3 = 10
   invalid indirect read from stack off -64+0 size 16

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200509175915.2476783-1-yhs@fb.com
---
 kernel/bpf/verifier.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 36b2a38a06fe..2a1826c76bb6 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -3494,6 +3494,11 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
 			*stype = STACK_MISC;
 			goto mark;
 		}
+
+		if (state->stack[spi].slot_type[0] == STACK_SPILL &&
+		    state->stack[spi].spilled_ptr.type == PTR_TO_BTF_ID)
+			goto mark;
+
 		if (state->stack[spi].slot_type[0] == STACK_SPILL &&
 		    state->stack[spi].spilled_ptr.type == SCALAR_VALUE) {
 			__mark_reg_unknown(env, &state->stack[spi].spilled_ptr);
-- 
cgit v1.2.3-59-g8ed1b


From 9c5f8a1008a121e4c6b24af211034e24b0b63081 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Sat, 9 May 2020 10:59:16 -0700
Subject: bpf: Support variable length array in tracing programs

In /proc/net/ipv6_route, we have
  struct fib6_info {
    struct fib6_table *fib6_table;
    ...
    struct fib6_nh fib6_nh[0];
  }
  struct fib6_nh {
    struct fib_nh_common nh_common;
    struct rt6_info **rt6i_pcpu;
    struct rt6_exception_bucket *rt6i_exception_bucket;
  };
  struct fib_nh_common {
    ...
    u8 nhc_gw_family;
    ...
  }

The access:
  struct fib6_nh *fib6_nh = &rt->fib6_nh;
  ... fib6_nh->nh_common.nhc_gw_family ...

This patch ensures such an access is handled properly.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200509175916.2476853-1-yhs@fb.com
---
 kernel/bpf/btf.c | 37 +++++++++++++++++++++++++++++++++++++
 1 file changed, 37 insertions(+)

diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index c490fbde22d4..dcd233139294 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3833,6 +3833,7 @@ int btf_struct_access(struct bpf_verifier_log *log,
 	const struct btf_type *mtype, *elem_type = NULL;
 	const struct btf_member *member;
 	const char *tname, *mname;
+	u32 vlen;
 
 again:
 	tname = __btf_name_by_offset(btf_vmlinux, t->name_off);
@@ -3841,7 +3842,43 @@ again:
 		return -EINVAL;
 	}
 
+	vlen = btf_type_vlen(t);
 	if (off + size > t->size) {
+		/* If the last element is a variable size array, we may
+		 * need to relax the rule.
+		 */
+		struct btf_array *array_elem;
+
+		if (vlen == 0)
+			goto error;
+
+		member = btf_type_member(t) + vlen - 1;
+		mtype = btf_type_skip_modifiers(btf_vmlinux, member->type,
+						NULL);
+		if (!btf_type_is_array(mtype))
+			goto error;
+
+		array_elem = (struct btf_array *)(mtype + 1);
+		if (array_elem->nelems != 0)
+			goto error;
+
+		moff = btf_member_bit_offset(t, member) / 8;
+		if (off < moff)
+			goto error;
+
+		/* Only allow structure for now, can be relaxed for
+		 * other types later.
+		 */
+		elem_type = btf_type_skip_modifiers(btf_vmlinux,
+						    array_elem->type, NULL);
+		if (!btf_type_is_struct(elem_type))
+			goto error;
+
+		off = (off - moff) % elem_type->size;
+		return btf_struct_access(log, elem_type, off, size, atype,
+					 next_btf_id);
+
+error:
 		bpf_log(log, "access beyond struct %s at off %u size %u\n",
 			tname, off, size);
 		return -EACCES;
-- 
cgit v1.2.3-59-g8ed1b


From c09add2fbc5aece00a5b54a48ce39fd4e3284d87 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Sat, 9 May 2020 10:59:17 -0700
Subject: tools/libbpf: Add bpf_iter support

Two new libbpf APIs are added to support bpf_iter:
  - bpf_program__attach_iter
    Given a bpf program and additional parameters, which is
    none now, returns a bpf_link.
  - bpf_iter_create
    syscall level API to create a bpf iterator.

The macro BPF_SEQ_PRINTF are also introduced. The format
looks like:
  BPF_SEQ_PRINTF(seq, "task id %d\n", pid);

This macro can help bpf program writers with
nicer bpf_seq_printf syntax similar to the kernel one.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200509175917.2476936-1-yhs@fb.com
---
 tools/lib/bpf/bpf.c         | 10 +++++++++
 tools/lib/bpf/bpf.h         |  2 ++
 tools/lib/bpf/bpf_tracing.h | 16 ++++++++++++++
 tools/lib/bpf/libbpf.c      | 52 +++++++++++++++++++++++++++++++++++++++++++++
 tools/lib/bpf/libbpf.h      |  9 ++++++++
 tools/lib/bpf/libbpf.map    |  2 ++
 6 files changed, 91 insertions(+)

diff --git a/tools/lib/bpf/bpf.c b/tools/lib/bpf/bpf.c
index 43322f0d6c7f..a7329b671c41 100644
--- a/tools/lib/bpf/bpf.c
+++ b/tools/lib/bpf/bpf.c
@@ -619,6 +619,16 @@ int bpf_link_update(int link_fd, int new_prog_fd,
 	return sys_bpf(BPF_LINK_UPDATE, &attr, sizeof(attr));
 }
 
+int bpf_iter_create(int link_fd)
+{
+	union bpf_attr attr;
+
+	memset(&attr, 0, sizeof(attr));
+	attr.iter_create.link_fd = link_fd;
+
+	return sys_bpf(BPF_ITER_CREATE, &attr, sizeof(attr));
+}
+
 int bpf_prog_query(int target_fd, enum bpf_attach_type type, __u32 query_flags,
 		   __u32 *attach_flags, __u32 *prog_ids, __u32 *prog_cnt)
 {
diff --git a/tools/lib/bpf/bpf.h b/tools/lib/bpf/bpf.h
index 1901b2777854..1b6015b21ba8 100644
--- a/tools/lib/bpf/bpf.h
+++ b/tools/lib/bpf/bpf.h
@@ -187,6 +187,8 @@ struct bpf_link_update_opts {
 LIBBPF_API int bpf_link_update(int link_fd, int new_prog_fd,
 			       const struct bpf_link_update_opts *opts);
 
+LIBBPF_API int bpf_iter_create(int link_fd);
+
 struct bpf_prog_test_run_attr {
 	int prog_fd;
 	int repeat;
diff --git a/tools/lib/bpf/bpf_tracing.h b/tools/lib/bpf/bpf_tracing.h
index f3f3c3fb98cb..cf97d07692b4 100644
--- a/tools/lib/bpf/bpf_tracing.h
+++ b/tools/lib/bpf/bpf_tracing.h
@@ -413,4 +413,20 @@ typeof(name(0)) name(struct pt_regs *ctx)				    \
 }									    \
 static __always_inline typeof(name(0)) ____##name(struct pt_regs *ctx, ##args)
 
+/*
+ * BPF_SEQ_PRINTF to wrap bpf_seq_printf to-be-printed values
+ * in a structure.
+ */
+#define BPF_SEQ_PRINTF(seq, fmt, args...)				    \
+	({								    \
+		_Pragma("GCC diagnostic push")				    \
+		_Pragma("GCC diagnostic ignored \"-Wint-conversion\"")	    \
+		static const char ___fmt[] = fmt;			    \
+		unsigned long long ___param[] = { args };		    \
+		_Pragma("GCC diagnostic pop")				    \
+		int ___ret = bpf_seq_printf(seq, ___fmt, sizeof(___fmt),    \
+					    ___param, sizeof(___param));    \
+		___ret;							    \
+	})
+
 #endif
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 977add1b73e2..6c2f46908f4d 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -6586,6 +6586,8 @@ static struct bpf_link *attach_trace(const struct bpf_sec_def *sec,
 				     struct bpf_program *prog);
 static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
 				   struct bpf_program *prog);
+static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
+				    struct bpf_program *prog);
 
 static const struct bpf_sec_def section_defs[] = {
 	BPF_PROG_SEC("socket",			BPF_PROG_TYPE_SOCKET_FILTER),
@@ -6629,6 +6631,10 @@ static const struct bpf_sec_def section_defs[] = {
 		.is_attach_btf = true,
 		.expected_attach_type = BPF_LSM_MAC,
 		.attach_fn = attach_lsm),
+	SEC_DEF("iter/", TRACING,
+		.expected_attach_type = BPF_TRACE_ITER,
+		.is_attach_btf = true,
+		.attach_fn = attach_iter),
 	BPF_PROG_SEC("xdp",			BPF_PROG_TYPE_XDP),
 	BPF_PROG_SEC("perf_event",		BPF_PROG_TYPE_PERF_EVENT),
 	BPF_PROG_SEC("lwt_in",			BPF_PROG_TYPE_LWT_IN),
@@ -6891,6 +6897,7 @@ invalid_prog:
 
 #define BTF_TRACE_PREFIX "btf_trace_"
 #define BTF_LSM_PREFIX "bpf_lsm_"
+#define BTF_ITER_PREFIX "__bpf_iter__"
 #define BTF_MAX_NAME_SIZE 128
 
 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
@@ -6921,6 +6928,9 @@ static inline int __find_vmlinux_btf_id(struct btf *btf, const char *name,
 	else if (attach_type == BPF_LSM_MAC)
 		err = find_btf_by_prefix_kind(btf, BTF_LSM_PREFIX, name,
 					      BTF_KIND_FUNC);
+	else if (attach_type == BPF_TRACE_ITER)
+		err = find_btf_by_prefix_kind(btf, BTF_ITER_PREFIX, name,
+					      BTF_KIND_FUNC);
 	else
 		err = btf__find_by_name_kind(btf, name, BTF_KIND_FUNC);
 
@@ -7848,6 +7858,12 @@ static struct bpf_link *attach_lsm(const struct bpf_sec_def *sec,
 	return bpf_program__attach_lsm(prog);
 }
 
+static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
+				    struct bpf_program *prog)
+{
+	return bpf_program__attach_iter(prog, NULL);
+}
+
 struct bpf_link *
 bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd)
 {
@@ -7882,6 +7898,42 @@ bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd)
 	return link;
 }
 
+struct bpf_link *
+bpf_program__attach_iter(struct bpf_program *prog,
+			 const struct bpf_iter_attach_opts *opts)
+{
+	char errmsg[STRERR_BUFSIZE];
+	struct bpf_link *link;
+	int prog_fd, link_fd;
+
+	if (!OPTS_VALID(opts, bpf_iter_attach_opts))
+		return ERR_PTR(-EINVAL);
+
+	prog_fd = bpf_program__fd(prog);
+	if (prog_fd < 0) {
+		pr_warn("program '%s': can't attach before loaded\n",
+			bpf_program__title(prog, false));
+		return ERR_PTR(-EINVAL);
+	}
+
+	link = calloc(1, sizeof(*link));
+	if (!link)
+		return ERR_PTR(-ENOMEM);
+	link->detach = &bpf_link__detach_fd;
+
+	link_fd = bpf_link_create(prog_fd, 0, BPF_TRACE_ITER, NULL);
+	if (link_fd < 0) {
+		link_fd = -errno;
+		free(link);
+		pr_warn("program '%s': failed to attach to iterator: %s\n",
+			bpf_program__title(prog, false),
+			libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
+		return ERR_PTR(link_fd);
+	}
+	link->fd = link_fd;
+	return link;
+}
+
 struct bpf_link *bpf_program__attach(struct bpf_program *prog)
 {
 	const struct bpf_sec_def *sec_def;
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index f1dacecb1619..8ea69558f0a8 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -258,6 +258,15 @@ struct bpf_map;
 
 LIBBPF_API struct bpf_link *bpf_map__attach_struct_ops(struct bpf_map *map);
 
+struct bpf_iter_attach_opts {
+	size_t sz; /* size of this struct for forward/backward compatibility */
+};
+#define bpf_iter_attach_opts__last_field sz
+
+LIBBPF_API struct bpf_link *
+bpf_program__attach_iter(struct bpf_program *prog,
+			 const struct bpf_iter_attach_opts *opts);
+
 struct bpf_insn;
 
 /*
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index e03bd4db827e..0133d469d30b 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -258,6 +258,8 @@ LIBBPF_0.0.8 {
 LIBBPF_0.0.9 {
 	global:
 		bpf_enable_stats;
+		bpf_iter_create;
 		bpf_link_get_fd_by_id;
 		bpf_link_get_next_id;
+		bpf_program__attach_iter;
 } LIBBPF_0.0.8;
-- 
cgit v1.2.3-59-g8ed1b


From 5fbc220862fc7a53a0455ccd2d96c82141e222d4 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Sat, 9 May 2020 10:59:19 -0700
Subject: tools/libpf: Add offsetof/container_of macro in bpf_helpers.h

These two helpers will be used later in bpf_iter bpf program
bpf_iter_netlink.c. Put them in bpf_helpers.h since they could
be useful in other cases.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200509175919.2477104-1-yhs@fb.com
---
 tools/lib/bpf/bpf_helpers.h | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/tools/lib/bpf/bpf_helpers.h b/tools/lib/bpf/bpf_helpers.h
index da00b87aa199..f67dce2af802 100644
--- a/tools/lib/bpf/bpf_helpers.h
+++ b/tools/lib/bpf/bpf_helpers.h
@@ -36,6 +36,20 @@
 #define __weak __attribute__((weak))
 #endif
 
+/*
+ * Helper macro to manipulate data structures
+ */
+#ifndef offsetof
+#define offsetof(TYPE, MEMBER)  ((size_t)&((TYPE *)0)->MEMBER)
+#endif
+#ifndef container_of
+#define container_of(ptr, type, member)				\
+	({							\
+		void *__mptr = (void *)(ptr);			\
+		((type *)(__mptr - offsetof(type, member)));	\
+	})
+#endif
+
 /*
  * Helper structure used by eBPF C program
  * to describe BPF map attributes to libbpf loader
-- 
cgit v1.2.3-59-g8ed1b


From 9406b485dea5e25bed7c81cd822747d494cc8bde Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Sat, 9 May 2020 10:59:20 -0700
Subject: tools/bpftool: Add bpf_iter support for bptool

Currently, only one command is supported
  bpftool iter pin <bpf_prog.o> <path>

It will pin the trace/iter bpf program in
the object file <bpf_prog.o> to the <path>
where <path> should be on a bpffs mount.

For example,
  $ bpftool iter pin ./bpf_iter_ipv6_route.o \
    /sys/fs/bpf/my_route
User can then do a `cat` to print out the results:
  $ cat /sys/fs/bpf/my_route
    fe800000000000000000000000000000 40 00000000000000000000000000000000 ...
    00000000000000000000000000000000 00 00000000000000000000000000000000 ...
    00000000000000000000000000000001 80 00000000000000000000000000000000 ...
    fe800000000000008c0162fffebdfd57 80 00000000000000000000000000000000 ...
    ff000000000000000000000000000000 08 00000000000000000000000000000000 ...
    00000000000000000000000000000000 00 00000000000000000000000000000000 ...

The implementation for ipv6_route iterator is in one of subsequent
patches.

This patch also added BPF_LINK_TYPE_ITER to link query.

In the future, we may add additional parameters to pin command
by parameterizing the bpf iterator. For example, a map_id or pid
may be added to let bpf program only traverses a single map or task,
similar to kernel seq_file single_open().

We may also add introspection command for targets/iterators by
leveraging the bpf_iter itself.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200509175920.2477247-1-yhs@fb.com
---
 tools/bpf/bpftool/Documentation/bpftool-iter.rst | 83 ++++++++++++++++++++++
 tools/bpf/bpftool/bash-completion/bpftool        | 13 ++++
 tools/bpf/bpftool/iter.c                         | 88 ++++++++++++++++++++++++
 tools/bpf/bpftool/link.c                         |  1 +
 tools/bpf/bpftool/main.c                         |  3 +-
 tools/bpf/bpftool/main.h                         |  1 +
 6 files changed, 188 insertions(+), 1 deletion(-)
 create mode 100644 tools/bpf/bpftool/Documentation/bpftool-iter.rst
 create mode 100644 tools/bpf/bpftool/iter.c

diff --git a/tools/bpf/bpftool/Documentation/bpftool-iter.rst b/tools/bpf/bpftool/Documentation/bpftool-iter.rst
new file mode 100644
index 000000000000..13b173d93890
--- /dev/null
+++ b/tools/bpf/bpftool/Documentation/bpftool-iter.rst
@@ -0,0 +1,83 @@
+============
+bpftool-iter
+============
+-------------------------------------------------------------------------------
+tool to create BPF iterators
+-------------------------------------------------------------------------------
+
+:Manual section: 8
+
+SYNOPSIS
+========
+
+	**bpftool** [*OPTIONS*] **iter** *COMMAND*
+
+	*COMMANDS* := { **pin** | **help** }
+
+ITER COMMANDS
+===================
+
+|	**bpftool** **iter pin** *OBJ* *PATH*
+|	**bpftool** **iter help**
+|
+|	*OBJ* := /a/file/of/bpf_iter_target.o
+
+
+DESCRIPTION
+===========
+	**bpftool iter pin** *OBJ* *PATH*
+		  A bpf iterator combines a kernel iterating of
+		  particular kernel data (e.g., tasks, bpf_maps, etc.)
+		  and a bpf program called for each kernel data object
+		  (e.g., one task, one bpf_map, etc.). User space can
+		  *read* kernel iterator output through *read()* syscall.
+
+		  The *pin* command creates a bpf iterator from *OBJ*,
+		  and pin it to *PATH*. The *PATH* should be located
+		  in *bpffs* mount. It must not contain a dot
+		  character ('.'), which is reserved for future extensions
+		  of *bpffs*.
+
+		  User can then *cat PATH* to see the bpf iterator output.
+
+	**bpftool iter help**
+		  Print short help message.
+
+OPTIONS
+=======
+	-h, --help
+		  Print short generic help message (similar to **bpftool help**).
+
+	-V, --version
+		  Print version number (similar to **bpftool version**).
+
+	-d, --debug
+		  Print all logs available, even debug-level information. This
+		  includes logs from libbpf as well as from the verifier, when
+		  attempting to load programs.
+
+EXAMPLES
+========
+**# bpftool iter pin bpf_iter_netlink.o /sys/fs/bpf/my_netlink**
+
+::
+
+   Create a file-based bpf iterator from bpf_iter_netlink.o and pin it
+   to /sys/fs/bpf/my_netlink
+
+
+SEE ALSO
+========
+	**bpf**\ (2),
+	**bpf-helpers**\ (7),
+	**bpftool**\ (8),
+	**bpftool-prog**\ (8),
+	**bpftool-map**\ (8),
+	**bpftool-link**\ (8),
+	**bpftool-cgroup**\ (8),
+	**bpftool-feature**\ (8),
+	**bpftool-net**\ (8),
+	**bpftool-perf**\ (8),
+	**bpftool-btf**\ (8)
+	**bpftool-gen**\ (8)
+	**bpftool-struct_ops**\ (8)
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index fc989ead7313..9f0f20e73b87 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -610,6 +610,19 @@ _bpftool()
                     ;;
             esac
             ;;
+        iter)
+            case $command in
+                pin)
+                    _filedir
+                    return 0
+                    ;;
+                *)
+                    [[ $prev == $object ]] && \
+                        COMPREPLY=( $( compgen -W 'pin help' \
+                            -- "$cur" ) )
+                    ;;
+            esac
+            ;;
         map)
             local MAP_TYPE='id pinned name'
             case $command in
diff --git a/tools/bpf/bpftool/iter.c b/tools/bpf/bpftool/iter.c
new file mode 100644
index 000000000000..eb5987a0c3b6
--- /dev/null
+++ b/tools/bpf/bpftool/iter.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+// Copyright (C) 2020 Facebook
+
+#define _GNU_SOURCE
+#include <linux/err.h>
+#include <bpf/libbpf.h>
+
+#include "main.h"
+
+static int do_pin(int argc, char **argv)
+{
+	const char *objfile, *path;
+	struct bpf_program *prog;
+	struct bpf_object *obj;
+	struct bpf_link *link;
+	int err;
+
+	if (!REQ_ARGS(2))
+		usage();
+
+	objfile = GET_ARG();
+	path = GET_ARG();
+
+	obj = bpf_object__open(objfile);
+	if (IS_ERR(obj)) {
+		p_err("can't open objfile %s", objfile);
+		return -1;
+	}
+
+	err = bpf_object__load(obj);
+	if (err) {
+		p_err("can't load objfile %s", objfile);
+		goto close_obj;
+	}
+
+	prog = bpf_program__next(NULL, obj);
+	if (!prog) {
+		p_err("can't find bpf program in objfile %s", objfile);
+		goto close_obj;
+	}
+
+	link = bpf_program__attach_iter(prog, NULL);
+	if (IS_ERR(link)) {
+		err = PTR_ERR(link);
+		p_err("attach_iter failed for program %s",
+		      bpf_program__name(prog));
+		goto close_obj;
+	}
+
+	err = mount_bpffs_for_pin(path);
+	if (err)
+		goto close_link;
+
+	err = bpf_link__pin(link, path);
+	if (err) {
+		p_err("pin_iter failed for program %s to path %s",
+		      bpf_program__name(prog), path);
+		goto close_link;
+	}
+
+close_link:
+	bpf_link__destroy(link);
+close_obj:
+	bpf_object__close(obj);
+	return err;
+}
+
+static int do_help(int argc, char **argv)
+{
+	fprintf(stderr,
+		"Usage: %s %s pin OBJ PATH\n"
+		"       %s %s help\n"
+		"\n",
+		bin_name, argv[-2], bin_name, argv[-2]);
+
+	return 0;
+}
+
+static const struct cmd cmds[] = {
+	{ "help",	do_help },
+	{ "pin",	do_pin },
+	{ 0 }
+};
+
+int do_iter(int argc, char **argv)
+{
+	return cmd_select(cmds, argc, argv, do_help);
+}
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index adc7dc431ed8..b6a0b35c78ae 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -16,6 +16,7 @@ static const char * const link_type_name[] = {
 	[BPF_LINK_TYPE_RAW_TRACEPOINT]		= "raw_tracepoint",
 	[BPF_LINK_TYPE_TRACING]			= "tracing",
 	[BPF_LINK_TYPE_CGROUP]			= "cgroup",
+	[BPF_LINK_TYPE_ITER]			= "iter",
 };
 
 static int link_parse_fd(int *argc, char ***argv)
diff --git a/tools/bpf/bpftool/main.c b/tools/bpf/bpftool/main.c
index 1413a154806e..46bd716a9d86 100644
--- a/tools/bpf/bpftool/main.c
+++ b/tools/bpf/bpftool/main.c
@@ -59,7 +59,7 @@ static int do_help(int argc, char **argv)
 		"       %s batch file FILE\n"
 		"       %s version\n"
 		"\n"
-		"       OBJECT := { prog | map | link | cgroup | perf | net | feature | btf | gen | struct_ops }\n"
+		"       OBJECT := { prog | map | link | cgroup | perf | net | feature | btf | gen | struct_ops | iter }\n"
 		"       " HELP_SPEC_OPTIONS "\n"
 		"",
 		bin_name, bin_name, bin_name);
@@ -224,6 +224,7 @@ static const struct cmd cmds[] = {
 	{ "btf",	do_btf },
 	{ "gen",	do_gen },
 	{ "struct_ops",	do_struct_ops },
+	{ "iter",	do_iter },
 	{ "version",	do_version },
 	{ 0 }
 };
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index 9b1fb81a8331..a41cefabccaf 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -199,6 +199,7 @@ int do_feature(int argc, char **argv);
 int do_btf(int argc, char **argv);
 int do_gen(int argc, char **argv);
 int do_struct_ops(int argc, char **argv);
+int do_iter(int argc, char **argv);
 
 int parse_u32_arg(int *argc, char ***argv, __u32 *val, const char *what);
 int prog_parse_fd(int *argc, char ***argv);
-- 
cgit v1.2.3-59-g8ed1b


From 7c128a6bbd4f5b6780a90f3ce9aff192b7dd9d6a Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Sat, 9 May 2020 10:59:21 -0700
Subject: tools/bpf: selftests: Add iterator programs for ipv6_route and
 netlink

Two bpf programs are added in this patch for netlink and ipv6_route
target. On my VM, I am able to achieve identical
results compared to /proc/net/netlink and /proc/net/ipv6_route.

  $ cat /proc/net/netlink
  sk               Eth Pid        Groups   Rmem     Wmem     Dump  Locks    Drops    Inode
  000000002c42d58b 0   0          00000000 0        0        0     2        0        7
  00000000a4e8b5e1 0   1          00000551 0        0        0     2        0        18719
  00000000e1b1c195 4   0          00000000 0        0        0     2        0        16422
  000000007e6b29f9 6   0          00000000 0        0        0     2        0        16424
  ....
  00000000159a170d 15  1862       00000002 0        0        0     2        0        1886
  000000009aca4bc9 15  3918224839 00000002 0        0        0     2        0        19076
  00000000d0ab31d2 15  1          00000002 0        0        0     2        0        18683
  000000008398fb08 16  0          00000000 0        0        0     2        0        27
  $ cat /sys/fs/bpf/my_netlink
  sk               Eth Pid        Groups   Rmem     Wmem     Dump  Locks    Drops    Inode
  000000002c42d58b 0   0          00000000 0        0        0     2        0        7
  00000000a4e8b5e1 0   1          00000551 0        0        0     2        0        18719
  00000000e1b1c195 4   0          00000000 0        0        0     2        0        16422
  000000007e6b29f9 6   0          00000000 0        0        0     2        0        16424
  ....
  00000000159a170d 15  1862       00000002 0        0        0     2        0        1886
  000000009aca4bc9 15  3918224839 00000002 0        0        0     2        0        19076
  00000000d0ab31d2 15  1          00000002 0        0        0     2        0        18683
  000000008398fb08 16  0          00000000 0        0        0     2        0        27

  $ cat /proc/net/ipv6_route
  fe800000000000000000000000000000 40 00000000000000000000000000000000 00 00000000000000000000000000000000 00000100 00000001 00000000 00000001     eth0
  00000000000000000000000000000000 00 00000000000000000000000000000000 00 00000000000000000000000000000000 ffffffff 00000001 00000000 00200200       lo
  00000000000000000000000000000001 80 00000000000000000000000000000000 00 00000000000000000000000000000000 00000000 00000003 00000000 80200001       lo
  fe80000000000000c04b03fffe7827ce 80 00000000000000000000000000000000 00 00000000000000000000000000000000 00000000 00000002 00000000 80200001     eth0
  ff000000000000000000000000000000 08 00000000000000000000000000000000 00 00000000000000000000000000000000 00000100 00000003 00000000 00000001     eth0
  00000000000000000000000000000000 00 00000000000000000000000000000000 00 00000000000000000000000000000000 ffffffff 00000001 00000000 00200200       lo
  $ cat /sys/fs/bpf/my_ipv6_route
  fe800000000000000000000000000000 40 00000000000000000000000000000000 00 00000000000000000000000000000000 00000100 00000001 00000000 00000001     eth0
  00000000000000000000000000000000 00 00000000000000000000000000000000 00 00000000000000000000000000000000 ffffffff 00000001 00000000 00200200       lo
  00000000000000000000000000000001 80 00000000000000000000000000000000 00 00000000000000000000000000000000 00000000 00000003 00000000 80200001       lo
  fe80000000000000c04b03fffe7827ce 80 00000000000000000000000000000000 00 00000000000000000000000000000000 00000000 00000002 00000000 80200001     eth0
  ff000000000000000000000000000000 08 00000000000000000000000000000000 00 00000000000000000000000000000000 00000100 00000003 00000000 00000001     eth0
  00000000000000000000000000000000 00 00000000000000000000000000000000 00 00000000000000000000000000000000 ffffffff 00000001 00000000 00200200       lo

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200509175921.2477493-1-yhs@fb.com
---
 .../selftests/bpf/progs/bpf_iter_ipv6_route.c      | 62 ++++++++++++++++++++
 .../testing/selftests/bpf/progs/bpf_iter_netlink.c | 66 ++++++++++++++++++++++
 2 files changed, 128 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_netlink.c

diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
new file mode 100644
index 000000000000..ab9e2650e021
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
@@ -0,0 +1,62 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+extern bool CONFIG_IPV6_SUBTREES __kconfig __weak;
+
+#define RTF_GATEWAY		0x0002
+#define IFNAMSIZ		16
+#define fib_nh_gw_family	nh_common.nhc_gw_family
+#define fib_nh_gw6		nh_common.nhc_gw.ipv6
+#define fib_nh_dev		nh_common.nhc_dev
+
+SEC("iter/ipv6_route")
+int dump_ipv6_route(struct bpf_iter__ipv6_route *ctx)
+{
+	struct seq_file *seq = ctx->meta->seq;
+	struct fib6_info *rt = ctx->rt;
+	const struct net_device *dev;
+	struct fib6_nh *fib6_nh;
+	unsigned int flags;
+	struct nexthop *nh;
+
+	if (rt == (void *)0)
+		return 0;
+
+	fib6_nh = &rt->fib6_nh[0];
+	flags = rt->fib6_flags;
+
+	/* FIXME: nexthop_is_multipath is not handled here. */
+	nh = rt->nh;
+	if (rt->nh)
+		fib6_nh = &nh->nh_info->fib6_nh;
+
+	BPF_SEQ_PRINTF(seq, "%pi6 %02x ", &rt->fib6_dst.addr, rt->fib6_dst.plen);
+
+	if (CONFIG_IPV6_SUBTREES)
+		BPF_SEQ_PRINTF(seq, "%pi6 %02x ", &rt->fib6_src.addr,
+			       rt->fib6_src.plen);
+	else
+		BPF_SEQ_PRINTF(seq, "00000000000000000000000000000000 00 ");
+
+	if (fib6_nh->fib_nh_gw_family) {
+		flags |= RTF_GATEWAY;
+		BPF_SEQ_PRINTF(seq, "%pi6 ", &fib6_nh->fib_nh_gw6);
+	} else {
+		BPF_SEQ_PRINTF(seq, "00000000000000000000000000000000 ");
+	}
+
+	dev = fib6_nh->fib_nh_dev;
+	if (dev)
+		BPF_SEQ_PRINTF(seq, "%08x %08x %08x %08x %8s\n", rt->fib6_metric,
+			       rt->fib6_ref.refs.counter, 0, flags, dev->name);
+	else
+		BPF_SEQ_PRINTF(seq, "%08x %08x %08x %08x\n", rt->fib6_metric,
+			       rt->fib6_ref.refs.counter, 0, flags);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
new file mode 100644
index 000000000000..6b40a233d4e0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+#define sk_rmem_alloc	sk_backlog.rmem_alloc
+#define sk_refcnt	__sk_common.skc_refcnt
+
+static inline struct inode *SOCK_INODE(struct socket *socket)
+{
+	return &container_of(socket, struct socket_alloc, socket)->vfs_inode;
+}
+
+SEC("iter/netlink")
+int dump_netlink(struct bpf_iter__netlink *ctx)
+{
+	struct seq_file *seq = ctx->meta->seq;
+	struct netlink_sock *nlk = ctx->sk;
+	unsigned long group, ino;
+	struct inode *inode;
+	struct socket *sk;
+	struct sock *s;
+
+	if (nlk == (void *)0)
+		return 0;
+
+	if (ctx->meta->seq_num == 0)
+		BPF_SEQ_PRINTF(seq, "sk               Eth Pid        Groups   "
+				    "Rmem     Wmem     Dump  Locks    Drops    "
+				    "Inode\n");
+
+	s = &nlk->sk;
+	BPF_SEQ_PRINTF(seq, "%pK %-3d ", s, s->sk_protocol);
+
+	if (!nlk->groups)  {
+		group = 0;
+	} else {
+		/* FIXME: temporary use bpf_probe_read here, needs
+		 * verifier support to do direct access.
+		 */
+		bpf_probe_read(&group, sizeof(group), &nlk->groups[0]);
+	}
+	BPF_SEQ_PRINTF(seq, "%-10u %08x %-8d %-8d %-5d %-8d ",
+		       nlk->portid, (u32)group,
+		       s->sk_rmem_alloc.counter,
+		       s->sk_wmem_alloc.refs.counter - 1,
+		       nlk->cb_running, s->sk_refcnt.refs.counter);
+
+	sk = s->sk_socket;
+	if (!sk) {
+		ino = 0;
+	} else {
+		/* FIXME: container_of inside SOCK_INODE has a forced
+		 * type conversion, and direct access cannot be used
+		 * with current verifier.
+		 */
+		inode = SOCK_INODE(sk);
+		bpf_probe_read(&ino, sizeof(ino), &inode->i_ino);
+	}
+	BPF_SEQ_PRINTF(seq, "%-8u %-8lu\n", s->sk_drops.counter, ino);
+
+	return 0;
+}
-- 
cgit v1.2.3-59-g8ed1b


From acf61631746c01850a9df0cd5617c5c29214776c Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Sat, 9 May 2020 10:59:22 -0700
Subject: tools/bpf: selftests: Add iter progs for bpf_map/task/task_file

The implementation is arbitrary, just to show how the bpf programs
can be written for bpf_map/task/task_file. They can be costomized
for specific needs.

For example, for bpf_map, the iterator prints out:
  $ cat /sys/fs/bpf/my_bpf_map
      id   refcnt  usercnt  locked_vm
       3        2        0         20
       6        2        0         20
       9        2        0         20
      12        2        0         20
      13        2        0         20
      16        2        0         20
      19        2        0         20
      %%% END %%%

For task, the iterator prints out:
  $ cat /sys/fs/bpf/my_task
    tgid      gid
       1        1
       2        2
    ....
    1944     1944
    1948     1948
    1949     1949
    1953     1953
    === END ===

For task/file, the iterator prints out:
  $ cat /sys/fs/bpf/my_task_file
    tgid      gid       fd      file
       1        1        0 ffffffff95c97600
       1        1        1 ffffffff95c97600
       1        1        2 ffffffff95c97600
    ....
    1895     1895      255 ffffffff95c8fe00
    1932     1932        0 ffffffff95c8fe00
    1932     1932        1 ffffffff95c8fe00
    1932     1932        2 ffffffff95c8fe00
    1932     1932        3 ffffffff95c185c0

This is able to print out all open files (fd and file->f_op), so user can compare
f_op against a particular kernel file operations to find what it is.
For example, from /proc/kallsyms, we can find
  ffffffff95c185c0 r eventfd_fops
so we will know tgid 1932 fd 3 is an eventfd file descriptor.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200509175922.2477576-1-yhs@fb.com
---
 .../testing/selftests/bpf/progs/bpf_iter_bpf_map.c | 28 ++++++++++++++++++++++
 tools/testing/selftests/bpf/progs/bpf_iter_task.c  | 25 +++++++++++++++++++
 .../selftests/bpf/progs/bpf_iter_task_file.c       | 26 ++++++++++++++++++++
 3 files changed, 79 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_task.c
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_task_file.c

diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
new file mode 100644
index 000000000000..4867cd3445c8
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("iter/bpf_map")
+int dump_bpf_map(struct bpf_iter__bpf_map *ctx)
+{
+	struct seq_file *seq = ctx->meta->seq;
+	__u64 seq_num = ctx->meta->seq_num;
+	struct bpf_map *map = ctx->map;
+
+	if (map == (void *)0) {
+		BPF_SEQ_PRINTF(seq, "      %%%%%% END %%%%%%\n");
+		return 0;
+	}
+
+	if (seq_num == 0)
+		BPF_SEQ_PRINTF(seq, "      id   refcnt  usercnt  locked_vm\n");
+
+	BPF_SEQ_PRINTF(seq, "%8u %8ld %8ld %10lu\n", map->id, map->refcnt.counter,
+		       map->usercnt.counter,
+		       map->memory.user->locked_vm.counter);
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
new file mode 100644
index 000000000000..90f9011c57ca
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
@@ -0,0 +1,25 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("iter/task")
+int dump_task(struct bpf_iter__task *ctx)
+{
+	struct seq_file *seq = ctx->meta->seq;
+	struct task_struct *task = ctx->task;
+
+	if (task == (void *)0) {
+		BPF_SEQ_PRINTF(seq, "    === END ===\n");
+		return 0;
+	}
+
+	if (ctx->meta->seq_num == 0)
+		BPF_SEQ_PRINTF(seq, "    tgid      gid\n");
+
+	BPF_SEQ_PRINTF(seq, "%8d %8d\n", task->tgid, task->pid);
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
new file mode 100644
index 000000000000..c6ced38f0880
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
@@ -0,0 +1,26 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("iter/task_file")
+int dump_task_file(struct bpf_iter__task_file *ctx)
+{
+	struct seq_file *seq = ctx->meta->seq;
+	struct task_struct *task = ctx->task;
+	__u32 fd = ctx->fd;
+	struct file *file = ctx->file;
+
+	if (task == (void *)0 || file == (void *)0)
+		return 0;
+
+	if (ctx->meta->seq_num == 0)
+		BPF_SEQ_PRINTF(seq, "    tgid      gid       fd      file\n");
+
+	BPF_SEQ_PRINTF(seq, "%8d %8d %8d %lx\n", task->tgid, task->pid, fd,
+		       (long)file->f_op);
+	return 0;
+}
-- 
cgit v1.2.3-59-g8ed1b


From 6879c042e10584ea9d5e2204939cafadcd500465 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Sat, 9 May 2020 10:59:23 -0700
Subject: tools/bpf: selftests: Add bpf_iter selftests

The added test includes the following subtests:
  - test verifier change for btf_id_or_null
  - test load/create_iter/read for
    ipv6_route/netlink/bpf_map/task/task_file
  - test anon bpf iterator
  - test anon bpf iterator reading one char at a time
  - test file bpf iterator
  - test overflow (single bpf program output not overflow)
  - test overflow (single bpf program output overflows)
  - test bpf prog returning 1

The ipv6_route tests the following verifier change
  - access fields in the variable length array of the structure.

The netlink load tests the following verifier change
  - put a btf_id ptr value in a stack and accessible to
    tracing/iter programs.

The anon bpf iterator also tests link auto attach through skeleton.

  $ test_progs -n 2
  #2/1 btf_id_or_null:OK
  #2/2 ipv6_route:OK
  #2/3 netlink:OK
  #2/4 bpf_map:OK
  #2/5 task:OK
  #2/6 task_file:OK
  #2/7 anon:OK
  #2/8 anon-read-one-char:OK
  #2/9 file:OK
  #2/10 overflow:OK
  #2/11 overflow-e2big:OK
  #2/12 prog-ret-1:OK
  #2 bpf_iter:OK
  Summary: 1/12 PASSED, 0 SKIPPED, 0 FAILED

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200509175923.2477637-1-yhs@fb.com
---
 tools/testing/selftests/bpf/prog_tests/bpf_iter.c  | 409 +++++++++++++++++++++
 .../selftests/bpf/progs/bpf_iter_test_kern1.c      |   4 +
 .../selftests/bpf/progs/bpf_iter_test_kern2.c      |   4 +
 .../selftests/bpf/progs/bpf_iter_test_kern3.c      |  18 +
 .../selftests/bpf/progs/bpf_iter_test_kern4.c      |  52 +++
 .../bpf/progs/bpf_iter_test_kern_common.h          |  22 ++
 6 files changed, 509 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/bpf_iter.c
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_test_kern1.c
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_test_kern2.c
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
 create mode 100644 tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h

diff --git a/tools/testing/selftests/bpf/prog_tests/bpf_iter.c b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
new file mode 100644
index 000000000000..87c29dde1cf9
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/bpf_iter.c
@@ -0,0 +1,409 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <test_progs.h>
+#include "bpf_iter_ipv6_route.skel.h"
+#include "bpf_iter_netlink.skel.h"
+#include "bpf_iter_bpf_map.skel.h"
+#include "bpf_iter_task.skel.h"
+#include "bpf_iter_task_file.skel.h"
+#include "bpf_iter_test_kern1.skel.h"
+#include "bpf_iter_test_kern2.skel.h"
+#include "bpf_iter_test_kern3.skel.h"
+#include "bpf_iter_test_kern4.skel.h"
+
+static int duration;
+
+static void test_btf_id_or_null(void)
+{
+	struct bpf_iter_test_kern3 *skel;
+
+	skel = bpf_iter_test_kern3__open_and_load();
+	if (CHECK(skel, "bpf_iter_test_kern3__open_and_load",
+		  "skeleton open_and_load unexpectedly succeeded\n")) {
+		bpf_iter_test_kern3__destroy(skel);
+		return;
+	}
+}
+
+static void do_dummy_read(struct bpf_program *prog)
+{
+	struct bpf_link *link;
+	char buf[16] = {};
+	int iter_fd, len;
+
+	link = bpf_program__attach_iter(prog, NULL);
+	if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+		return;
+
+	iter_fd = bpf_iter_create(bpf_link__fd(link));
+	if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+		goto free_link;
+
+	/* not check contents, but ensure read() ends without error */
+	while ((len = read(iter_fd, buf, sizeof(buf))) > 0)
+		;
+	CHECK(len < 0, "read", "read failed: %s\n", strerror(errno));
+
+	close(iter_fd);
+
+free_link:
+	bpf_link__destroy(link);
+}
+
+static void test_ipv6_route(void)
+{
+	struct bpf_iter_ipv6_route *skel;
+
+	skel = bpf_iter_ipv6_route__open_and_load();
+	if (CHECK(!skel, "bpf_iter_ipv6_route__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		return;
+
+	do_dummy_read(skel->progs.dump_ipv6_route);
+
+	bpf_iter_ipv6_route__destroy(skel);
+}
+
+static void test_netlink(void)
+{
+	struct bpf_iter_netlink *skel;
+
+	skel = bpf_iter_netlink__open_and_load();
+	if (CHECK(!skel, "bpf_iter_netlink__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		return;
+
+	do_dummy_read(skel->progs.dump_netlink);
+
+	bpf_iter_netlink__destroy(skel);
+}
+
+static void test_bpf_map(void)
+{
+	struct bpf_iter_bpf_map *skel;
+
+	skel = bpf_iter_bpf_map__open_and_load();
+	if (CHECK(!skel, "bpf_iter_bpf_map__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		return;
+
+	do_dummy_read(skel->progs.dump_bpf_map);
+
+	bpf_iter_bpf_map__destroy(skel);
+}
+
+static void test_task(void)
+{
+	struct bpf_iter_task *skel;
+
+	skel = bpf_iter_task__open_and_load();
+	if (CHECK(!skel, "bpf_iter_task__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		return;
+
+	do_dummy_read(skel->progs.dump_task);
+
+	bpf_iter_task__destroy(skel);
+}
+
+static void test_task_file(void)
+{
+	struct bpf_iter_task_file *skel;
+
+	skel = bpf_iter_task_file__open_and_load();
+	if (CHECK(!skel, "bpf_iter_task_file__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		return;
+
+	do_dummy_read(skel->progs.dump_task_file);
+
+	bpf_iter_task_file__destroy(skel);
+}
+
+/* The expected string is less than 16 bytes */
+static int do_read_with_fd(int iter_fd, const char *expected,
+			   bool read_one_char)
+{
+	int err = -1, len, read_buf_len, start;
+	char buf[16] = {};
+
+	read_buf_len = read_one_char ? 1 : 16;
+	start = 0;
+	while ((len = read(iter_fd, buf + start, read_buf_len)) > 0) {
+		start += len;
+		if (CHECK(start >= 16, "read", "read len %d\n", len))
+			return -1;
+		read_buf_len = read_one_char ? 1 : 16 - start;
+	}
+	if (CHECK(len < 0, "read", "read failed: %s\n", strerror(errno)))
+		return -1;
+
+	err = strcmp(buf, expected);
+	if (CHECK(err, "read", "incorrect read result: buf %s, expected %s\n",
+		  buf, expected))
+		return -1;
+
+	return 0;
+}
+
+static void test_anon_iter(bool read_one_char)
+{
+	struct bpf_iter_test_kern1 *skel;
+	struct bpf_link *link;
+	int iter_fd, err;
+
+	skel = bpf_iter_test_kern1__open_and_load();
+	if (CHECK(!skel, "bpf_iter_test_kern1__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		return;
+
+	err = bpf_iter_test_kern1__attach(skel);
+	if (CHECK(err, "bpf_iter_test_kern1__attach",
+		  "skeleton attach failed\n")) {
+		goto out;
+	}
+
+	link = skel->links.dump_task;
+	iter_fd = bpf_iter_create(bpf_link__fd(link));
+	if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+		goto out;
+
+	do_read_with_fd(iter_fd, "abcd", read_one_char);
+	close(iter_fd);
+
+out:
+	bpf_iter_test_kern1__destroy(skel);
+}
+
+static int do_read(const char *path, const char *expected)
+{
+	int err, iter_fd;
+
+	iter_fd = open(path, O_RDONLY);
+	if (CHECK(iter_fd < 0, "open", "open %s failed: %s\n",
+		  path, strerror(errno)))
+		return -1;
+
+	err = do_read_with_fd(iter_fd, expected, false);
+	close(iter_fd);
+	return err;
+}
+
+static void test_file_iter(void)
+{
+	const char *path = "/sys/fs/bpf/bpf_iter_test1";
+	struct bpf_iter_test_kern1 *skel1;
+	struct bpf_iter_test_kern2 *skel2;
+	struct bpf_link *link;
+	int err;
+
+	skel1 = bpf_iter_test_kern1__open_and_load();
+	if (CHECK(!skel1, "bpf_iter_test_kern1__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		return;
+
+	link = bpf_program__attach_iter(skel1->progs.dump_task, NULL);
+	if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+		goto out;
+
+	/* unlink this path if it exists. */
+	unlink(path);
+
+	err = bpf_link__pin(link, path);
+	if (CHECK(err, "pin_iter", "pin_iter to %s failed: %d\n", path, err))
+		goto free_link;
+
+	err = do_read(path, "abcd");
+	if (err)
+		goto unlink_path;
+
+	/* file based iterator seems working fine. Let us a link update
+	 * of the underlying link and `cat` the iterator again, its content
+	 * should change.
+	 */
+	skel2 = bpf_iter_test_kern2__open_and_load();
+	if (CHECK(!skel2, "bpf_iter_test_kern2__open_and_load",
+		  "skeleton open_and_load failed\n"))
+		goto unlink_path;
+
+	err = bpf_link__update_program(link, skel2->progs.dump_task);
+	if (CHECK(err, "update_prog", "update_prog failed\n"))
+		goto destroy_skel2;
+
+	do_read(path, "ABCD");
+
+destroy_skel2:
+	bpf_iter_test_kern2__destroy(skel2);
+unlink_path:
+	unlink(path);
+free_link:
+	bpf_link__destroy(link);
+out:
+	bpf_iter_test_kern1__destroy(skel1);
+}
+
+static void test_overflow(bool test_e2big_overflow, bool ret1)
+{
+	__u32 map_info_len, total_read_len, expected_read_len;
+	int err, iter_fd, map1_fd, map2_fd, len;
+	struct bpf_map_info map_info = {};
+	struct bpf_iter_test_kern4 *skel;
+	struct bpf_link *link;
+	__u32 page_size;
+	char *buf;
+
+	skel = bpf_iter_test_kern4__open();
+	if (CHECK(!skel, "bpf_iter_test_kern4__open",
+		  "skeleton open failed\n"))
+		return;
+
+	/* create two maps: bpf program will only do bpf_seq_write
+	 * for these two maps. The goal is one map output almost
+	 * fills seq_file buffer and then the other will trigger
+	 * overflow and needs restart.
+	 */
+	map1_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0);
+	if (CHECK(map1_fd < 0, "bpf_create_map",
+		  "map_creation failed: %s\n", strerror(errno)))
+		goto out;
+	map2_fd = bpf_create_map(BPF_MAP_TYPE_ARRAY, 4, 8, 1, 0);
+	if (CHECK(map2_fd < 0, "bpf_create_map",
+		  "map_creation failed: %s\n", strerror(errno)))
+		goto free_map1;
+
+	/* bpf_seq_printf kernel buffer is one page, so one map
+	 * bpf_seq_write will mostly fill it, and the other map
+	 * will partially fill and then trigger overflow and need
+	 * bpf_seq_read restart.
+	 */
+	page_size = sysconf(_SC_PAGE_SIZE);
+
+	if (test_e2big_overflow) {
+		skel->rodata->print_len = (page_size + 8) / 8;
+		expected_read_len = 2 * (page_size + 8);
+	} else if (!ret1) {
+		skel->rodata->print_len = (page_size - 8) / 8;
+		expected_read_len = 2 * (page_size - 8);
+	} else {
+		skel->rodata->print_len = 1;
+		expected_read_len = 2 * 8;
+	}
+	skel->rodata->ret1 = ret1;
+
+	if (CHECK(bpf_iter_test_kern4__load(skel),
+		  "bpf_iter_test_kern4__load", "skeleton load failed\n"))
+		goto free_map2;
+
+	/* setup filtering map_id in bpf program */
+	map_info_len = sizeof(map_info);
+	err = bpf_obj_get_info_by_fd(map1_fd, &map_info, &map_info_len);
+	if (CHECK(err, "get_map_info", "get map info failed: %s\n",
+		  strerror(errno)))
+		goto free_map2;
+	skel->bss->map1_id = map_info.id;
+
+	err = bpf_obj_get_info_by_fd(map2_fd, &map_info, &map_info_len);
+	if (CHECK(err, "get_map_info", "get map info failed: %s\n",
+		  strerror(errno)))
+		goto free_map2;
+	skel->bss->map2_id = map_info.id;
+
+	link = bpf_program__attach_iter(skel->progs.dump_bpf_map, NULL);
+	if (CHECK(IS_ERR(link), "attach_iter", "attach_iter failed\n"))
+		goto free_map2;
+
+	iter_fd = bpf_iter_create(bpf_link__fd(link));
+	if (CHECK(iter_fd < 0, "create_iter", "create_iter failed\n"))
+		goto free_link;
+
+	buf = malloc(expected_read_len);
+	if (!buf)
+		goto close_iter;
+
+	/* do read */
+	total_read_len = 0;
+	if (test_e2big_overflow) {
+		while ((len = read(iter_fd, buf, expected_read_len)) > 0)
+			total_read_len += len;
+
+		CHECK(len != -1 || errno != E2BIG, "read",
+		      "expected ret -1, errno E2BIG, but get ret %d, error %s\n",
+			  len, strerror(errno));
+		goto free_buf;
+	} else if (!ret1) {
+		while ((len = read(iter_fd, buf, expected_read_len)) > 0)
+			total_read_len += len;
+
+		if (CHECK(len < 0, "read", "read failed: %s\n",
+			  strerror(errno)))
+			goto free_buf;
+	} else {
+		do {
+			len = read(iter_fd, buf, expected_read_len);
+			if (len > 0)
+				total_read_len += len;
+		} while (len > 0 || len == -EAGAIN);
+
+		if (CHECK(len < 0, "read", "read failed: %s\n",
+			  strerror(errno)))
+			goto free_buf;
+	}
+
+	if (CHECK(total_read_len != expected_read_len, "read",
+		  "total len %u, expected len %u\n", total_read_len,
+		  expected_read_len))
+		goto free_buf;
+
+	if (CHECK(skel->bss->map1_accessed != 1, "map1_accessed",
+		  "expected 1 actual %d\n", skel->bss->map1_accessed))
+		goto free_buf;
+
+	if (CHECK(skel->bss->map2_accessed != 2, "map2_accessed",
+		  "expected 2 actual %d\n", skel->bss->map2_accessed))
+		goto free_buf;
+
+	CHECK(skel->bss->map2_seqnum1 != skel->bss->map2_seqnum2,
+	      "map2_seqnum", "two different seqnum %lld %lld\n",
+	      skel->bss->map2_seqnum1, skel->bss->map2_seqnum2);
+
+free_buf:
+	free(buf);
+close_iter:
+	close(iter_fd);
+free_link:
+	bpf_link__destroy(link);
+free_map2:
+	close(map2_fd);
+free_map1:
+	close(map1_fd);
+out:
+	bpf_iter_test_kern4__destroy(skel);
+}
+
+void test_bpf_iter(void)
+{
+	if (test__start_subtest("btf_id_or_null"))
+		test_btf_id_or_null();
+	if (test__start_subtest("ipv6_route"))
+		test_ipv6_route();
+	if (test__start_subtest("netlink"))
+		test_netlink();
+	if (test__start_subtest("bpf_map"))
+		test_bpf_map();
+	if (test__start_subtest("task"))
+		test_task();
+	if (test__start_subtest("task_file"))
+		test_task_file();
+	if (test__start_subtest("anon"))
+		test_anon_iter(false);
+	if (test__start_subtest("anon-read-one-char"))
+		test_anon_iter(true);
+	if (test__start_subtest("file"))
+		test_file_iter();
+	if (test__start_subtest("overflow"))
+		test_overflow(false, false);
+	if (test__start_subtest("overflow-e2big"))
+		test_overflow(true, false);
+	if (test__start_subtest("prog-ret-1"))
+		test_overflow(false, true);
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern1.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern1.c
new file mode 100644
index 000000000000..c71a7c283108
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern1.c
@@ -0,0 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#define START_CHAR 'a'
+#include "bpf_iter_test_kern_common.h"
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern2.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern2.c
new file mode 100644
index 000000000000..8bdc8dc07444
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern2.c
@@ -0,0 +1,4 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#define START_CHAR 'A'
+#include "bpf_iter_test_kern_common.h"
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c
new file mode 100644
index 000000000000..636a00fa074d
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c
@@ -0,0 +1,18 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+SEC("iter/task")
+int dump_task(struct bpf_iter__task *ctx)
+{
+	struct seq_file *seq = ctx->meta->seq;
+	struct task_struct *task = ctx->task;
+	int tgid;
+
+	tgid = task->tgid;
+	bpf_seq_write(seq, &tgid, sizeof(tgid));
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
new file mode 100644
index 000000000000..b18dc0471d07
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
@@ -0,0 +1,52 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+__u32 map1_id = 0, map2_id = 0;
+__u32 map1_accessed = 0, map2_accessed = 0;
+__u64 map1_seqnum = 0, map2_seqnum1 = 0, map2_seqnum2 = 0;
+
+static volatile const __u32 print_len;
+static volatile const __u32 ret1;
+
+SEC("iter/bpf_map")
+int dump_bpf_map(struct bpf_iter__bpf_map *ctx)
+{
+	struct seq_file *seq = ctx->meta->seq;
+	struct bpf_map *map = ctx->map;
+	__u64 seq_num;
+	int i, ret = 0;
+
+	if (map == (void *)0)
+		return 0;
+
+	/* only dump map1_id and map2_id */
+	if (map->id != map1_id && map->id != map2_id)
+		return 0;
+
+	seq_num = ctx->meta->seq_num;
+	if (map->id == map1_id) {
+		map1_seqnum = seq_num;
+		map1_accessed++;
+	}
+
+	if (map->id == map2_id) {
+		if (map2_accessed == 0) {
+			map2_seqnum1 = seq_num;
+			if (ret1)
+				ret = 1;
+		} else {
+			map2_seqnum2 = seq_num;
+		}
+		map2_accessed++;
+	}
+
+	/* fill seq_file buffer */
+	for (i = 0; i < print_len; i++)
+		bpf_seq_write(seq, &seq_num, sizeof(seq_num));
+
+	return ret;
+}
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h
new file mode 100644
index 000000000000..bdd51cf14b54
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2020 Facebook */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+int count = 0;
+
+SEC("iter/task")
+int dump_task(struct bpf_iter__task *ctx)
+{
+	struct seq_file *seq = ctx->meta->seq;
+	char c;
+
+	if (count < 4) {
+		c = START_CHAR + count;
+		bpf_seq_write(seq, &c, sizeof(c));
+		count++;
+	}
+
+	return 0;
+}
-- 
cgit v1.2.3-59-g8ed1b


From b4563facdcae55c83039d5efcc3b45a63da14d2f Mon Sep 17 00:00:00 2001
From: Song Liu <songliubraving@fb.com>
Date: Mon, 4 May 2020 10:36:26 -0700
Subject: bpf, runqslower: include proper uapi/bpf.h

runqslower doesn't specify include path for uapi/bpf.h. This causes the
following warning:

In file included from runqslower.c:10:
.../tools/testing/selftests/bpf/tools/include/bpf/bpf.h:234:38:
warning: 'enum bpf_stats_type' declared inside parameter list will not
be visible outside of this definition or declaration
  234 | LIBBPF_API int bpf_enable_stats(enum bpf_stats_type type);

Fix this by adding -I tools/includ/uapi to the Makefile.

Reported-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Song Liu <songliubraving@fb.com>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/runqslower/Makefile | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/tools/bpf/runqslower/Makefile b/tools/bpf/runqslower/Makefile
index 8a6f82e56a24..fb1337d69868 100644
--- a/tools/bpf/runqslower/Makefile
+++ b/tools/bpf/runqslower/Makefile
@@ -8,7 +8,8 @@ BPFTOOL ?= $(DEFAULT_BPFTOOL)
 LIBBPF_SRC := $(abspath ../../lib/bpf)
 BPFOBJ := $(OUTPUT)/libbpf.a
 BPF_INCLUDE := $(OUTPUT)
-INCLUDES := -I$(OUTPUT) -I$(BPF_INCLUDE) -I$(abspath ../../lib)
+INCLUDES := -I$(OUTPUT) -I$(BPF_INCLUDE) -I$(abspath ../../lib)        \
+       -I$(abspath ../../include/uapi)
 CFLAGS := -g -Wall
 
 # Try to detect best kernel BTF source
-- 
cgit v1.2.3-59-g8ed1b


From e7bb7ecefa817543e11fa3c1c3e55deb90b02e6c Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Thu, 7 May 2020 13:59:21 -0500
Subject: IB/mlx4: Replace zero-length array with flexible-array

The current codebase makes use of the zero-length array language
extension to the C90 standard, but the preferred mechanism to declare
variable-length types such as these ones is a flexible array member[1][2],
introduced in C99:

struct foo {
        int stuff;
        struct boo array[];
};

By making use of the mechanism above, we will get a compiler warning
in case the flexible array does not occur last in the structure, which
will help us prevent some kind of undefined behavior bugs from being
inadvertently introduced[3] to the codebase from now on.

Also, notice that, dynamic memory allocations won't be affected by
this change:

"Flexible array members have incomplete type, and so the sizeof operator
may not be applied. As a quirk of the original implementation of
zero-length arrays, sizeof evaluates to zero."[1]

sizeof(flexible-array-member) triggers a warning because flexible array
members have incomplete type[1]. There are some instances of code in
which the sizeof operator is being incorrectly/erroneously applied to
zero-length arrays and the result is zero. Such instances may be hiding
some bugs. So, this work (flexible-array member conversions) will also
help to get completely rid of those sorts of issues.

This issue was found with the help of Coccinelle.

[1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html
[2] https://github.com/KSPP/linux/issues/21
[3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour")

Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/mlx4/qp.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h
index 8e2828d48d7f..9db93e487496 100644
--- a/include/linux/mlx4/qp.h
+++ b/include/linux/mlx4/qp.h
@@ -362,7 +362,7 @@ struct mlx4_wqe_datagram_seg {
 
 struct mlx4_wqe_lso_seg {
 	__be32			mss_hdr_size;
-	__be32			header[0];
+	__be32			header[];
 };
 
 enum mlx4_wqe_bind_seg_flags2 {
-- 
cgit v1.2.3-59-g8ed1b


From 7a36e4918e30fbd3c820f723ef53d8505283c9e0 Mon Sep 17 00:00:00 2001
From: Kevin Hao <haokexin@gmail.com>
Date: Sat, 9 May 2020 18:43:10 +0800
Subject: octeontx2-pf: Use the napi_alloc_frag() to alloc the pool buffers

In the current codes, the octeontx2 uses its own method to allocate
the pool buffers, but there are some issues in this implementation.
1. We have to run the otx2_get_page() for each allocation cycle and
   this is pretty error prone. As I can see there is no invocation
   of the otx2_get_page() in otx2_pool_refill_task(), this will leave
   the allocated pages have the wrong refcount and may be freed wrongly.
2. It wastes memory. For example, if we only receive one packet in a
   NAPI RX cycle, and then allocate a 2K buffer with otx2_alloc_rbuf()
   to refill the pool buffers and leave the remain area of the allocated
   page wasted. On a kernel with 64K page, 62K area is wasted.

IMHO it is really unnecessary to implement our own method for the
buffers allocate, we can reuse the napi_alloc_frag() to simplify
our code.

Signed-off-by: Kevin Hao <haokexin@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../ethernet/marvell/octeontx2/nic/otx2_common.c   | 52 +++++++++-------------
 .../ethernet/marvell/octeontx2/nic/otx2_common.h   | 15 +------
 .../net/ethernet/marvell/octeontx2/nic/otx2_txrx.c |  3 +-
 .../net/ethernet/marvell/octeontx2/nic/otx2_txrx.h |  4 --
 4 files changed, 24 insertions(+), 50 deletions(-)

diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
index f1d2dea90a8c..5975521a4c86 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.c
@@ -379,40 +379,35 @@ void otx2_config_irq_coalescing(struct otx2_nic *pfvf, int qidx)
 		     (pfvf->hw.cq_ecount_wait - 1));
 }
 
-dma_addr_t otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool,
-			   gfp_t gfp)
+dma_addr_t __otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool)
 {
 	dma_addr_t iova;
+	u8 *buf;
 
-	/* Check if request can be accommodated in previous allocated page */
-	if (pool->page && ((pool->page_offset + pool->rbsize) <=
-	    (PAGE_SIZE << pool->rbpage_order))) {
-		pool->pageref++;
-		goto ret;
-	}
-
-	otx2_get_page(pool);
-
-	/* Allocate a new page */
-	pool->page = alloc_pages(gfp | __GFP_COMP | __GFP_NOWARN,
-				 pool->rbpage_order);
-	if (unlikely(!pool->page))
+	buf = napi_alloc_frag(pool->rbsize);
+	if (unlikely(!buf))
 		return -ENOMEM;
 
-	pool->page_offset = 0;
-ret:
-	iova = (u64)otx2_dma_map_page(pfvf, pool->page, pool->page_offset,
-				      pool->rbsize, DMA_FROM_DEVICE);
-	if (!iova) {
-		if (!pool->page_offset)
-			__free_pages(pool->page, pool->rbpage_order);
-		pool->page = NULL;
+	iova = dma_map_single_attrs(pfvf->dev, buf, pool->rbsize,
+				    DMA_FROM_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
+	if (unlikely(dma_mapping_error(pfvf->dev, iova))) {
+		page_frag_free(buf);
 		return -ENOMEM;
 	}
-	pool->page_offset += pool->rbsize;
+
 	return iova;
 }
 
+static dma_addr_t otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool)
+{
+	dma_addr_t addr;
+
+	local_bh_disable();
+	addr = __otx2_alloc_rbuf(pfvf, pool);
+	local_bh_enable();
+	return addr;
+}
+
 void otx2_tx_timeout(struct net_device *netdev, unsigned int txq)
 {
 	struct otx2_nic *pfvf = netdev_priv(netdev);
@@ -805,7 +800,7 @@ static void otx2_pool_refill_task(struct work_struct *work)
 	free_ptrs = cq->pool_ptrs;
 
 	while (cq->pool_ptrs) {
-		bufptr = otx2_alloc_rbuf(pfvf, rbpool, GFP_KERNEL);
+		bufptr = otx2_alloc_rbuf(pfvf, rbpool);
 		if (bufptr <= 0) {
 			/* Schedule a WQ if we fails to free atleast half of the
 			 * pointers else enable napi for this RQ.
@@ -1064,7 +1059,6 @@ static int otx2_pool_init(struct otx2_nic *pfvf, u16 pool_id,
 		return err;
 
 	pool->rbsize = buf_size;
-	pool->rbpage_order = get_order(buf_size);
 
 	/* Initialize this pool's context via AF */
 	aq = otx2_mbox_alloc_msg_npa_aq_enq(&pfvf->mbox);
@@ -1152,13 +1146,12 @@ int otx2_sq_aura_pool_init(struct otx2_nic *pfvf)
 			return -ENOMEM;
 
 		for (ptr = 0; ptr < num_sqbs; ptr++) {
-			bufptr = otx2_alloc_rbuf(pfvf, pool, GFP_KERNEL);
+			bufptr = otx2_alloc_rbuf(pfvf, pool);
 			if (bufptr <= 0)
 				return bufptr;
 			otx2_aura_freeptr(pfvf, pool_id, bufptr);
 			sq->sqb_ptrs[sq->sqb_count++] = (u64)bufptr;
 		}
-		otx2_get_page(pool);
 	}
 
 	return 0;
@@ -1204,13 +1197,12 @@ int otx2_rq_aura_pool_init(struct otx2_nic *pfvf)
 	for (pool_id = 0; pool_id < hw->rqpool_cnt; pool_id++) {
 		pool = &pfvf->qset.pool[pool_id];
 		for (ptr = 0; ptr < num_ptrs; ptr++) {
-			bufptr = otx2_alloc_rbuf(pfvf, pool, GFP_KERNEL);
+			bufptr = otx2_alloc_rbuf(pfvf, pool);
 			if (bufptr <= 0)
 				return bufptr;
 			otx2_aura_freeptr(pfvf, pool_id,
 					  bufptr + OTX2_HEAD_ROOM);
 		}
-		otx2_get_page(pool);
 	}
 
 	return 0;
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
index 0b1c653b3449..2fa29889522e 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_common.h
@@ -434,18 +434,6 @@ static inline void otx2_aura_freeptr(struct otx2_nic *pfvf,
 		      otx2_get_regaddr(pfvf, NPA_LF_AURA_OP_FREE0));
 }
 
-/* Update page ref count */
-static inline void otx2_get_page(struct otx2_pool *pool)
-{
-	if (!pool->page)
-		return;
-
-	if (pool->pageref)
-		page_ref_add(pool->page, pool->pageref);
-	pool->pageref = 0;
-	pool->page = NULL;
-}
-
 static inline int otx2_get_pool_idx(struct otx2_nic *pfvf, int type, int idx)
 {
 	if (type == AURA_NIX_SQ)
@@ -589,8 +577,7 @@ int otx2_txschq_config(struct otx2_nic *pfvf, int lvl);
 int otx2_txsch_alloc(struct otx2_nic *pfvf);
 int otx2_txschq_stop(struct otx2_nic *pfvf);
 void otx2_sqb_flush(struct otx2_nic *pfvf);
-dma_addr_t otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool,
-			   gfp_t gfp);
+dma_addr_t __otx2_alloc_rbuf(struct otx2_nic *pfvf, struct otx2_pool *pool);
 int otx2_rxtx_enable(struct otx2_nic *pfvf, bool enable);
 void otx2_ctx_disable(struct mbox *mbox, int type, bool npa);
 int otx2_nix_config_bp(struct otx2_nic *pfvf, bool enable);
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
index 45abe0cd0e7b..b04f5429d72d 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.c
@@ -286,7 +286,7 @@ static int otx2_rx_napi_handler(struct otx2_nic *pfvf,
 
 	/* Refill pool with new buffers */
 	while (cq->pool_ptrs) {
-		bufptr = otx2_alloc_rbuf(pfvf, cq->rbpool, GFP_ATOMIC);
+		bufptr = __otx2_alloc_rbuf(pfvf, cq->rbpool);
 		if (unlikely(bufptr <= 0)) {
 			struct refill_work *work;
 			struct delayed_work *dwork;
@@ -304,7 +304,6 @@ static int otx2_rx_napi_handler(struct otx2_nic *pfvf,
 		otx2_aura_freeptr(pfvf, cq->cq_idx, bufptr + OTX2_HEAD_ROOM);
 		cq->pool_ptrs--;
 	}
-	otx2_get_page(cq->rbpool);
 
 	return processed_cqe;
 }
diff --git a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
index 4ab32d3adb78..da97f2d4416f 100644
--- a/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
+++ b/drivers/net/ethernet/marvell/octeontx2/nic/otx2_txrx.h
@@ -113,11 +113,7 @@ struct otx2_cq_poll {
 struct otx2_pool {
 	struct qmem		*stack;
 	struct qmem		*fc_addr;
-	u8			rbpage_order;
 	u16			rbsize;
-	u32			page_offset;
-	u16			pageref;
-	struct page		*page;
 };
 
 struct otx2_cq_queue {
-- 
cgit v1.2.3-59-g8ed1b


From 4f6cd04f2ded199931c44b3c95c4938618cd8b82 Mon Sep 17 00:00:00 2001
From: kbuild test robot <lkp@intel.com>
Date: Sun, 10 May 2020 20:26:56 +0800
Subject: dsa: sja1105: fix semicolon.cocci warnings

drivers/net/dsa/sja1105/sja1105_ethtool.c:481:11-12: Unneeded semicolon

 Remove unneeded semicolon.

Generated by: scripts/coccinelle/misc/semicolon.cocci

Fixes: ae1804de93f6 ("dsa: sja1105: dynamically allocate stats structure")
CC: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/dsa/sja1105/sja1105_ethtool.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_ethtool.c b/drivers/net/dsa/sja1105/sja1105_ethtool.c
index 709f035055c5..9133a831ec79 100644
--- a/drivers/net/dsa/sja1105/sja1105_ethtool.c
+++ b/drivers/net/dsa/sja1105/sja1105_ethtool.c
@@ -478,7 +478,7 @@ void sja1105_get_ethtool_stats(struct dsa_switch *ds, int port, u64 *data)
 
 	if (priv->info->device_id == SJA1105E_DEVICE_ID ||
 	    priv->info->device_id == SJA1105T_DEVICE_ID)
-		goto out;;
+		goto out;
 
 	memset(data + k, 0, ARRAY_SIZE(sja1105pqrs_extra_port_stats) *
 			sizeof(u64));
-- 
cgit v1.2.3-59-g8ed1b


From d728e6402c0023a46b8595e1736695517fd94a7a Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Sat, 9 May 2020 22:41:11 +0100
Subject: net: usb: ax88179_178a: remove redundant assignment to variable ret

The variable ret is being initializeed with a value that is never read
and it is being updated later with a new value. The initialization
is redundant and can be removed.

Addresses-Coverity: ("Unused value")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/usb/ax88179_178a.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/usb/ax88179_178a.c b/drivers/net/usb/ax88179_178a.c
index b05bb11a02cb..950711448f39 100644
--- a/drivers/net/usb/ax88179_178a.c
+++ b/drivers/net/usb/ax88179_178a.c
@@ -860,7 +860,7 @@ static int ax88179_set_eee(struct net_device *net, struct ethtool_eee *edata)
 {
 	struct usbnet *dev = netdev_priv(net);
 	struct ax88179_data *priv = (struct ax88179_data *)dev->data;
-	int ret = -EOPNOTSUPP;
+	int ret;
 
 	priv->eee_enabled = edata->eee_enabled;
 	if (!priv->eee_enabled) {
-- 
cgit v1.2.3-59-g8ed1b


From 1ea08c6bce050a38aa303485af940645cc7b6375 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Sat, 9 May 2020 22:48:03 +0100
Subject: net: huawei_cdc_ncm: remove redundant assignment to variable ret

The variable ret is being initializeed with a value that is never read
and it is being updated later with a new value. The initialization
is redundant and can be removed.

Addresses-Coverity: ("Unused value")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/usb/huawei_cdc_ncm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/usb/huawei_cdc_ncm.c b/drivers/net/usb/huawei_cdc_ncm.c
index 099d84827004..a87f0dabcdb7 100644
--- a/drivers/net/usb/huawei_cdc_ncm.c
+++ b/drivers/net/usb/huawei_cdc_ncm.c
@@ -67,7 +67,7 @@ static int huawei_cdc_ncm_bind(struct usbnet *usbnet_dev,
 {
 	struct cdc_ncm_ctx *ctx;
 	struct usb_driver *subdriver = ERR_PTR(-ENODEV);
-	int ret = -ENODEV;
+	int ret;
 	struct huawei_cdc_ncm_state *drvstate = (void *)&usbnet_dev->data;
 	int drvflags = 0;
 
-- 
cgit v1.2.3-59-g8ed1b


From b9f96423bba6155cdf54f96f0b1e43fa6d0b0b74 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Sat, 9 May 2020 22:57:56 +0100
Subject: net: usb: qmi_wwan: remove redundant assignment to variable status
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The variable status is being initializeed with a value that is never read
and it is being updated later with a new value. The initialization
is redundant and can be removed.

Addresses-Coverity: ("Unused value")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Bjørn Mork <bjorn@mork.no>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/usb/qmi_wwan.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/usb/qmi_wwan.c b/drivers/net/usb/qmi_wwan.c
index 4bb8552a00d3..b0eab6e5279d 100644
--- a/drivers/net/usb/qmi_wwan.c
+++ b/drivers/net/usb/qmi_wwan.c
@@ -719,7 +719,7 @@ static int qmi_wwan_change_dtr(struct usbnet *dev, bool on)
 
 static int qmi_wwan_bind(struct usbnet *dev, struct usb_interface *intf)
 {
-	int status = -1;
+	int status;
 	u8 *buf = intf->cur_altsetting->extra;
 	int len = intf->cur_altsetting->extralen;
 	struct usb_interface_descriptor *desc = &intf->cur_altsetting->desc;
-- 
cgit v1.2.3-59-g8ed1b


From a68a813836e12b15715d9101309899123c250302 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sun, 10 May 2020 21:12:30 +0200
Subject: net: phy: Add cable test support to state machine

Running a cable test is desruptive to normal operation of the PHY and
can take a 5 to 10 seconds to complete. The RTNL lock cannot be held
for this amount of time, and add a new state to the state machine for
running a cable test.

The driver is expected to implement two functions. The first is used
to start a cable test. Once the test has started, it should return.

The second function is called once per second, or on interrupt to
check if the cable test is complete, and to allow the PHY to report
the status.

v2:
Rename phy_cable_test_abort to phy_abort_cable_test
Return different extack when already running test
Use phy_init_hw() to reset the PHY

Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/phy/phy.c | 76 +++++++++++++++++++++++++++++++++++++++++++++++++++
 include/linux/phy.h   | 28 +++++++++++++++++++
 2 files changed, 104 insertions(+)

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 8c22d02b4218..0f4b27215429 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -15,6 +15,7 @@
 #include <linux/interrupt.h>
 #include <linux/delay.h>
 #include <linux/netdevice.h>
+#include <linux/netlink.h>
 #include <linux/etherdevice.h>
 #include <linux/skbuff.h>
 #include <linux/mm.h>
@@ -44,6 +45,7 @@ static const char *phy_state_to_str(enum phy_state st)
 	PHY_STATE_STR(UP)
 	PHY_STATE_STR(RUNNING)
 	PHY_STATE_STR(NOLINK)
+	PHY_STATE_STR(CABLETEST)
 	PHY_STATE_STR(HALTED)
 	}
 
@@ -472,6 +474,62 @@ static void phy_trigger_machine(struct phy_device *phydev)
 	phy_queue_state_machine(phydev, 0);
 }
 
+static void phy_abort_cable_test(struct phy_device *phydev)
+{
+	int err;
+
+	err = phy_init_hw(phydev);
+	if (err)
+		phydev_err(phydev, "Error while aborting cable test");
+}
+
+int phy_start_cable_test(struct phy_device *phydev,
+			 struct netlink_ext_ack *extack)
+{
+	int err;
+
+	if (!(phydev->drv &&
+	      phydev->drv->cable_test_start &&
+	      phydev->drv->cable_test_get_status)) {
+		NL_SET_ERR_MSG(extack,
+			       "PHY driver does not support cable testing");
+		return -EOPNOTSUPP;
+	}
+
+	mutex_lock(&phydev->lock);
+	if (phydev->state == PHY_CABLETEST) {
+		NL_SET_ERR_MSG(extack,
+			       "PHY already performing a test");
+		err = -EBUSY;
+		goto out;
+	}
+
+	if (phydev->state < PHY_UP ||
+	    phydev->state > PHY_CABLETEST) {
+		NL_SET_ERR_MSG(extack,
+			       "PHY not configured. Try setting interface up");
+		err = -EBUSY;
+		goto out;
+	}
+
+	/* Mark the carrier down until the test is complete */
+	phy_link_down(phydev, true);
+
+	err = phydev->drv->cable_test_start(phydev);
+	if (err) {
+		phy_link_up(phydev);
+		goto out;
+	}
+
+	phydev->state = PHY_CABLETEST;
+
+out:
+	mutex_unlock(&phydev->lock);
+
+	return err;
+}
+EXPORT_SYMBOL(phy_start_cable_test);
+
 static int phy_config_aneg(struct phy_device *phydev)
 {
 	if (phydev->drv->config_aneg)
@@ -810,6 +868,9 @@ void phy_stop(struct phy_device *phydev)
 
 	mutex_lock(&phydev->lock);
 
+	if (phydev->state == PHY_CABLETEST)
+		phy_abort_cable_test(phydev);
+
 	if (phydev->sfp_bus)
 		sfp_upstream_stop(phydev->sfp_bus);
 
@@ -872,6 +933,7 @@ void phy_state_machine(struct work_struct *work)
 			container_of(dwork, struct phy_device, state_queue);
 	bool needs_aneg = false, do_suspend = false;
 	enum phy_state old_state;
+	bool finished = false;
 	int err = 0;
 
 	mutex_lock(&phydev->lock);
@@ -890,6 +952,20 @@ void phy_state_machine(struct work_struct *work)
 	case PHY_RUNNING:
 		err = phy_check_link_status(phydev);
 		break;
+	case PHY_CABLETEST:
+		err = phydev->drv->cable_test_get_status(phydev, &finished);
+		if (err) {
+			phy_abort_cable_test(phydev);
+			needs_aneg = true;
+			phydev->state = PHY_UP;
+			break;
+		}
+
+		if (finished) {
+			needs_aneg = true;
+			phydev->state = PHY_UP;
+		}
+		break;
 	case PHY_HALTED:
 		if (phydev->link) {
 			phydev->link = 0;
diff --git a/include/linux/phy.h b/include/linux/phy.h
index a2b91b5f9d0a..632403fc34f4 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -15,6 +15,7 @@
 #include <linux/spinlock.h>
 #include <linux/ethtool.h>
 #include <linux/linkmode.h>
+#include <linux/netlink.h>
 #include <linux/mdio.h>
 #include <linux/mii.h>
 #include <linux/mii_timestamper.h>
@@ -372,6 +373,12 @@ struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr);
  * - irq or timer will set NOLINK if link goes down
  * - phy_stop moves to HALTED
  *
+ * CABLETEST: PHY is performing a cable test. Packet reception/sending
+ * is not expected to work, carrier will be indicated as down. PHY will be
+ * poll once per second, or on interrupt for it current state.
+ * Once complete, move to UP to restart the PHY.
+ * - phy_stop aborts the running test and moves to HALTED
+ *
  * HALTED: PHY is up, but no polling or interrupts are done. Or
  * PHY is in an error state.
  * - phy_start moves to UP
@@ -383,6 +390,7 @@ enum phy_state {
 	PHY_UP,
 	PHY_RUNNING,
 	PHY_NOLINK,
+	PHY_CABLETEST,
 };
 
 /**
@@ -689,6 +697,13 @@ struct phy_driver {
 	int (*module_eeprom)(struct phy_device *dev,
 			     struct ethtool_eeprom *ee, u8 *data);
 
+	/* Start a cable test */
+	int (*cable_test_start)(struct phy_device *dev);
+	/* Once per second, or on interrupt, request the status of the
+	 * test.
+	 */
+	int (*cable_test_get_status)(struct phy_device *dev, bool *finished);
+
 	/* Get statistics from the phy using ethtool */
 	int (*get_sset_count)(struct phy_device *dev);
 	void (*get_strings)(struct phy_device *dev, u8 *data);
@@ -1227,6 +1242,19 @@ int phy_speed_up(struct phy_device *phydev);
 int phy_restart_aneg(struct phy_device *phydev);
 int phy_reset_after_clk_enable(struct phy_device *phydev);
 
+#if IS_ENABLED(CONFIG_PHYLIB)
+int phy_start_cable_test(struct phy_device *phydev,
+			 struct netlink_ext_ack *extack);
+#else
+static inline
+int phy_start_cable_test(struct phy_device *phydev,
+			 struct netlink_ext_ack *extack)
+{
+	NL_SET_ERR_MSG(extack, "Kernel not compiled with PHYLIB support");
+	return -EOPNOTSUPP;
+}
+#endif
+
 static inline void phy_device_reset(struct phy_device *phydev, int value)
 {
 	mdio_device_reset(&phydev->mdio, value);
-- 
cgit v1.2.3-59-g8ed1b


From 97c22438963a7484c05c59ab6654e30f0a3e9288 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sun, 10 May 2020 21:12:32 +0200
Subject: net: phy: Add support for polling cable test

Some PHYs are not capable of generating interrupts when a cable test
finished. They do however support interrupts for normal operations,
like link up/down. As such, the PHY state machine would normally not
poll the PHY.

Add support for indicating the PHY state machine must poll the PHY
when performing a cable test.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/phy/phy.c | 2 ++
 include/linux/phy.h   | 5 +++++
 2 files changed, 7 insertions(+)

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 0f4b27215429..9fa61019533f 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -523,6 +523,8 @@ int phy_start_cable_test(struct phy_device *phydev,
 
 	phydev->state = PHY_CABLETEST;
 
+	if (phy_polling_mode(phydev))
+		phy_trigger_machine(phydev);
 out:
 	mutex_unlock(&phydev->lock);
 
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 632403fc34f4..f58eee735a45 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -79,6 +79,7 @@ extern const int phy_10gbit_features_array[1];
 
 #define PHY_IS_INTERNAL		0x00000001
 #define PHY_RST_AFTER_CLK_EN	0x00000002
+#define PHY_POLL_CABLE_TEST	0x00000004
 #define MDIO_DEVICE_IS_PHY	0x80000000
 
 /* Interface Mode definitions */
@@ -1061,6 +1062,10 @@ static inline bool phy_interrupt_is_valid(struct phy_device *phydev)
  */
 static inline bool phy_polling_mode(struct phy_device *phydev)
 {
+	if (phydev->state == PHY_CABLETEST)
+		if (phydev->drv->flags & PHY_POLL_CABLE_TEST)
+			return true;
+
 	return phydev->irq == PHY_POLL;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 11ca3c4261cdb4e2f33e32daf6447f8185843317 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sun, 10 May 2020 21:12:33 +0200
Subject: net: ethtool: netlink: Add support for triggering a cable test

Add new ethtool netlink calls to trigger the starting of a PHY cable
test.

Add Kconfig'ury to ETHTOOL_NETLINK so that PHYLIB is not a module when
ETHTOOL_NETLINK is builtin, which would result in kernel linking errors.

v2:
Remove unwanted white space change
Remove ethnl_cable_test_act_ops and use doit handler
Rename cable_test_set_policy cable_test_act_policy
Remove ETHTOOL_MSG_CABLE_TEST_ACT_REPLY

v3:
Remove ETHTOOL_MSG_CABLE_TEST_ACT_REPLY from documentation
Remove unused cable_test_get_policy
Add Reviewed-by tags

v4:
Remove unwanted blank line

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Michal Kubecek <mkubecek@suse.cz>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/networking/ethtool-netlink.rst | 16 ++++++++-
 include/uapi/linux/ethtool_netlink.h         | 12 +++++++
 net/Kconfig                                  |  1 +
 net/ethtool/Makefile                         |  2 +-
 net/ethtool/cabletest.c                      | 54 ++++++++++++++++++++++++++++
 net/ethtool/netlink.c                        |  5 +++
 net/ethtool/netlink.h                        |  1 +
 7 files changed, 89 insertions(+), 2 deletions(-)
 create mode 100644 net/ethtool/cabletest.c

diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst
index 8f5cefc539cf..a8731d33d0c9 100644
--- a/Documentation/networking/ethtool-netlink.rst
+++ b/Documentation/networking/ethtool-netlink.rst
@@ -204,6 +204,7 @@ Userspace to kernel:
   ``ETHTOOL_MSG_EEE_GET``               get EEE settings
   ``ETHTOOL_MSG_EEE_SET``               set EEE settings
   ``ETHTOOL_MSG_TSINFO_GET``		get timestamping info
+  ``ETHTOOL_MSG_CABLE_TEST_ACT``        action start cable test
   ===================================== ================================
 
 Kernel to userspace:
@@ -958,13 +959,25 @@ Kernel response contents:
 is no special value for this case). The bitset attributes are omitted if they
 would be empty (no bit set).
 
+CABLE_TEST
+==========
+
+Start a cable test.
+
+Request contents:
+
+  ====================================  ======  ==========================
+  ``ETHTOOL_A_CABLE_TEST_HEADER``       nested  request header
+  ====================================  ======  ==========================
+
 
 Request translation
 ===================
 
 The following table maps ioctl commands to netlink commands providing their
 functionality. Entries with "n/a" in right column are commands which do not
-have their netlink replacement yet.
+have their netlink replacement yet. Entries which "n/a" in the left column
+are netlink only.
 
   =================================== =====================================
   ioctl command                       netlink command
@@ -1053,4 +1066,5 @@ have their netlink replacement yet.
   ``ETHTOOL_PHY_STUNABLE``            n/a
   ``ETHTOOL_GFECPARAM``               n/a
   ``ETHTOOL_SFECPARAM``               n/a
+  n/a                                 ''ETHTOOL_MSG_CABLE_TEST_ACT''
   =================================== =====================================
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index bf1d310e20bc..6bfd648c32cf 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -39,6 +39,7 @@ enum {
 	ETHTOOL_MSG_EEE_GET,
 	ETHTOOL_MSG_EEE_SET,
 	ETHTOOL_MSG_TSINFO_GET,
+	ETHTOOL_MSG_CABLE_TEST_ACT,
 
 	/* add new constants above here */
 	__ETHTOOL_MSG_USER_CNT,
@@ -405,6 +406,17 @@ enum {
 	ETHTOOL_A_TSINFO_MAX = (__ETHTOOL_A_TSINFO_CNT - 1)
 };
 
+/* CABLE TEST */
+
+enum {
+	ETHTOOL_A_CABLE_TEST_UNSPEC,
+	ETHTOOL_A_CABLE_TEST_HEADER,		/* nest - _A_HEADER_* */
+
+	/* add new constants above here */
+	__ETHTOOL_A_CABLE_TEST_CNT,
+	ETHTOOL_A_CABLE_TEST_MAX = __ETHTOOL_A_CABLE_TEST_CNT - 1
+};
+
 /* generic netlink info */
 #define ETHTOOL_GENL_NAME "ethtool"
 #define ETHTOOL_GENL_VERSION 1
diff --git a/net/Kconfig b/net/Kconfig
index c5ba2d180c43..5c524c6ee75d 100644
--- a/net/Kconfig
+++ b/net/Kconfig
@@ -455,6 +455,7 @@ config FAILOVER
 config ETHTOOL_NETLINK
 	bool "Netlink interface for ethtool"
 	default y
+	depends on PHYLIB=y || PHYLIB=n
 	help
 	  An alternative userspace interface for ethtool based on generic
 	  netlink. It provides better extensibility and some new features,
diff --git a/net/ethtool/Makefile b/net/ethtool/Makefile
index 6c360c9c9370..0c2b94f20499 100644
--- a/net/ethtool/Makefile
+++ b/net/ethtool/Makefile
@@ -6,4 +6,4 @@ obj-$(CONFIG_ETHTOOL_NETLINK)	+= ethtool_nl.o
 
 ethtool_nl-y	:= netlink.o bitset.o strset.o linkinfo.o linkmodes.o \
 		   linkstate.o debug.o wol.o features.o privflags.o rings.o \
-		   channels.o coalesce.o pause.o eee.o tsinfo.o
+		   channels.o coalesce.o pause.o eee.o tsinfo.o cabletest.o
diff --git a/net/ethtool/cabletest.c b/net/ethtool/cabletest.c
new file mode 100644
index 000000000000..aeb6672a46d0
--- /dev/null
+++ b/net/ethtool/cabletest.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0-only
+
+#include <linux/phy.h>
+#include "netlink.h"
+#include "common.h"
+
+/* CABLE_TEST_ACT */
+
+static const struct nla_policy
+cable_test_act_policy[ETHTOOL_A_CABLE_TEST_MAX + 1] = {
+	[ETHTOOL_A_CABLE_TEST_UNSPEC]		= { .type = NLA_REJECT },
+	[ETHTOOL_A_CABLE_TEST_HEADER]		= { .type = NLA_NESTED },
+};
+
+int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nlattr *tb[ETHTOOL_A_CABLE_TEST_MAX + 1];
+	struct ethnl_req_info req_info = {};
+	struct net_device *dev;
+	int ret;
+
+	ret = nlmsg_parse(info->nlhdr, GENL_HDRLEN, tb,
+			  ETHTOOL_A_CABLE_TEST_MAX,
+			  cable_test_act_policy, info->extack);
+	if (ret < 0)
+		return ret;
+
+	ret = ethnl_parse_header_dev_get(&req_info,
+					 tb[ETHTOOL_A_CABLE_TEST_HEADER],
+					 genl_info_net(info), info->extack,
+					 true);
+	if (ret < 0)
+		return ret;
+
+	dev = req_info.dev;
+	if (!dev->phydev) {
+		ret = -EOPNOTSUPP;
+		goto out_dev_put;
+	}
+
+	rtnl_lock();
+	ret = ethnl_ops_begin(dev);
+	if (ret < 0)
+		goto out_rtnl;
+
+	ret = phy_start_cable_test(dev->phydev, info->extack);
+
+	ethnl_ops_complete(dev);
+out_rtnl:
+	rtnl_unlock();
+out_dev_put:
+	dev_put(dev);
+	return ret;
+}
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index 0c772318c023..b9c9ddf408fe 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -839,6 +839,11 @@ static const struct genl_ops ethtool_genl_ops[] = {
 		.dumpit	= ethnl_default_dumpit,
 		.done	= ethnl_default_done,
 	},
+	{
+		.cmd	= ETHTOOL_MSG_CABLE_TEST_ACT,
+		.flags	= GENL_UNS_ADMIN_PERM,
+		.doit	= ethnl_act_cable_test,
+	},
 };
 
 static const struct genl_multicast_group ethtool_nl_mcgrps[] = {
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index 81b8fa020bcb..bd7df592db2f 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -357,5 +357,6 @@ int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info);
 int ethnl_set_coalesce(struct sk_buff *skb, struct genl_info *info);
 int ethnl_set_pause(struct sk_buff *skb, struct genl_info *info);
 int ethnl_set_eee(struct sk_buff *skb, struct genl_info *info);
+int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info);
 
 #endif /* _NET_ETHTOOL_NETLINK_H */
-- 
cgit v1.2.3-59-g8ed1b


From b28efb930ba5a7c263826fe02e13e1b6eadb5559 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sun, 10 May 2020 21:12:34 +0200
Subject: net: ethtool: Add attributes for cable test reports

Add the attributes needed to report cable test results to userspace.
The reports are expected to be per twisted pair. A nested property per
pair can report the result of the cable test. A nested property can
also report the length of the cable to any fault.

v2:
Grammar fixes
Change length from u16 to u32
s/DEV/HEADER/g
Add status attributes
Rename pairs from numbers to letters.

v3:
Fixed example in document
Add ETHTOOL_A_CABLE_NEST_* enum
Add ETHTOOL_MSG_CABLE_TEST_NTF to documentation

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Michal Kubecek <mkubecek@suse.cz>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 Documentation/networking/ethtool-netlink.rst | 41 +++++++++++++++++++
 include/uapi/linux/ethtool_netlink.h         | 59 ++++++++++++++++++++++++++++
 2 files changed, 100 insertions(+)

diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst
index a8731d33d0c9..eed46b6aa07d 100644
--- a/Documentation/networking/ethtool-netlink.rst
+++ b/Documentation/networking/ethtool-netlink.rst
@@ -236,6 +236,7 @@ Kernel to userspace:
   ``ETHTOOL_MSG_EEE_GET_REPLY``         EEE settings
   ``ETHTOOL_MSG_EEE_NTF``               EEE settings
   ``ETHTOOL_MSG_TSINFO_GET_REPLY``	timestamping info
+  ``ETHTOOL_MSG_CABLE_TEST_NTF``        Cable test results
   ===================================== =================================
 
 ``GET`` requests are sent by userspace applications to retrieve device
@@ -970,6 +971,46 @@ Request contents:
   ``ETHTOOL_A_CABLE_TEST_HEADER``       nested  request header
   ====================================  ======  ==========================
 
+Notification contents:
+
+An Ethernet cable typically contains 1, 2 or 4 pairs. The length of
+the pair can only be measured when there is a fault in the pair and
+hence a reflection. Information about the fault may not be available,
+depending on the specific hardware. Hence the contents of the notify
+message are mostly optional. The attributes can be repeated an
+arbitrary number of times, in an arbitrary order, for an arbitrary
+number of pairs.
+
+The example shows the notification sent when the test is completed for
+a T2 cable, i.e. two pairs. One pair is OK and hence has no length
+information. The second pair has a fault and does have length
+information.
+
+ +---------------------------------------------+--------+---------------------+
+ | ``ETHTOOL_A_CABLE_TEST_HEADER``             | nested | reply header        |
+ +---------------------------------------------+--------+---------------------+
+ | ``ETHTOOL_A_CABLE_TEST_STATUS``             | u8     | completed           |
+ +---------------------------------------------+--------+---------------------+
+ | ``ETHTOOL_A_CABLE_TEST_NTF_NEST``           | nested | all the results     |
+ +-+-------------------------------------------+--------+---------------------+
+ | | ``ETHTOOL_A_CABLE_NEST_RESULT``           | nested | cable test result   |
+ +-+-+-----------------------------------------+--------+---------------------+
+ | | | ``ETHTOOL_A_CABLE_RESULTS_PAIR``        | u8     | pair number         |
+ +-+-+-----------------------------------------+--------+---------------------+
+ | | | ``ETHTOOL_A_CABLE_RESULTS_CODE``        | u8     | result code         |
+ +-+-+-----------------------------------------+--------+---------------------+
+ | | ``ETHTOOL_A_CABLE_NEST_RESULT``           | nested | cable test results  |
+ +-+-+-----------------------------------------+--------+---------------------+
+ | | | ``ETHTOOL_A_CABLE_RESULTS_PAIR``        | u8     | pair number         |
+ +-+-+-----------------------------------------+--------+---------------------+
+ | | | ``ETHTOOL_A_CABLE_RESULTS_CODE``        | u8     | result code         |
+ +-+-+-----------------------------------------+--------+---------------------+
+ | | ``ETHTOOL_A_CABLE_NEST_FAULT_LENGTH``     | nested | cable length        |
+ +-+-+-----------------------------------------+--------+---------------------+
+ | | | ``ETHTOOL_A_CABLE_FAULT_LENGTH_PAIR``   | u8     | pair number         |
+ +-+-+-----------------------------------------+--------+---------------------+
+ | | | ``ETHTOOL_A_CABLE_FAULT_LENGTH_CM``     | u32    | length in cm        |
+ +-+-+-----------------------------------------+--------+---------------------+
 
 Request translation
 ===================
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index 6bfd648c32cf..2881af411f76 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -75,6 +75,7 @@ enum {
 	ETHTOOL_MSG_EEE_GET_REPLY,
 	ETHTOOL_MSG_EEE_NTF,
 	ETHTOOL_MSG_TSINFO_GET_REPLY,
+	ETHTOOL_MSG_CABLE_TEST_NTF,
 
 	/* add new constants above here */
 	__ETHTOOL_MSG_KERNEL_CNT,
@@ -417,6 +418,64 @@ enum {
 	ETHTOOL_A_CABLE_TEST_MAX = __ETHTOOL_A_CABLE_TEST_CNT - 1
 };
 
+/* CABLE TEST NOTIFY */
+enum {
+	ETHTOOL_A_CABLE_RESULT_CODE_UNSPEC,
+	ETHTOOL_A_CABLE_RESULT_CODE_OK,
+	ETHTOOL_A_CABLE_RESULT_CODE_OPEN,
+	ETHTOOL_A_CABLE_RESULT_CODE_SAME_SHORT,
+	ETHTOOL_A_CABLE_RESULT_CODE_CROSS_SHORT,
+};
+
+enum {
+	ETHTOOL_A_CABLE_PAIR_A,
+	ETHTOOL_A_CABLE_PAIR_B,
+	ETHTOOL_A_CABLE_PAIR_C,
+	ETHTOOL_A_CABLE_PAIR_D,
+};
+
+enum {
+	ETHTOOL_A_CABLE_RESULT_UNSPEC,
+	ETHTOOL_A_CABLE_RESULT_PAIR,		/* u8 ETHTOOL_A_CABLE_PAIR_ */
+	ETHTOOL_A_CABLE_RESULT_CODE,		/* u8 ETHTOOL_A_CABLE_RESULT_CODE_ */
+
+	__ETHTOOL_A_CABLE_RESULT_CNT,
+	ETHTOOL_A_CABLE_RESULT_MAX = (__ETHTOOL_A_CABLE_RESULT_CNT - 1)
+};
+
+enum {
+	ETHTOOL_A_CABLE_FAULT_LENGTH_UNSPEC,
+	ETHTOOL_A_CABLE_FAULT_LENGTH_PAIR,	/* u8 ETHTOOL_A_CABLE_PAIR_ */
+	ETHTOOL_A_CABLE_FAULT_LENGTH_CM,	/* u32 */
+
+	__ETHTOOL_A_CABLE_FAULT_LENGTH_CNT,
+	ETHTOOL_A_CABLE_FAULT_LENGTH_MAX = (__ETHTOOL_A_CABLE_FAULT_LENGTH_CNT - 1)
+};
+
+enum {
+	ETHTOOL_A_CABLE_TEST_NTF_STATUS_UNSPEC,
+	ETHTOOL_A_CABLE_TEST_NTF_STATUS_STARTED,
+	ETHTOOL_A_CABLE_TEST_NTF_STATUS_COMPLETED
+};
+
+enum {
+	ETHTOOL_A_CABLE_NEST_UNSPEC,
+	ETHTOOL_A_CABLE_NEST_RESULT,		/* nest - ETHTOOL_A_CABLE_RESULT_ */
+	ETHTOOL_A_CABLE_NEST_FAULT_LENGTH,	/* nest - ETHTOOL_A_CABLE_FAULT_LENGTH_ */
+	__ETHTOOL_A_CABLE_NEST_CNT,
+	ETHTOOL_A_CABLE_NEST_MAX = (__ETHTOOL_A_CABLE_NEST_CNT - 1)
+};
+
+enum {
+	ETHTOOL_A_CABLE_TEST_NTF_UNSPEC,
+	ETHTOOL_A_CABLE_TEST_NTF_HEADER,	/* nest - ETHTOOL_A_HEADER_* */
+	ETHTOOL_A_CABLE_TEST_NTF_STATUS,	/* u8 - _STARTED/_COMPLETE */
+	ETHTOOL_A_CABLE_TEST_NTF_NEST,		/* nest - of results: */
+
+	__ETHTOOL_A_CABLE_TEST_NTF_CNT,
+	ETHTOOL_A_CABLE_TEST_NTF_MAX = (__ETHTOOL_A_CABLE_TEST_NTF_CNT - 1)
+};
+
 /* generic netlink info */
 #define ETHTOOL_GENL_NAME "ethtool"
 #define ETHTOOL_GENL_VERSION 1
-- 
cgit v1.2.3-59-g8ed1b


From 0df960f14e17e55e68dfd1342f063d17dbcc6107 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sun, 10 May 2020 21:12:35 +0200
Subject: net: ethtool: Make helpers public

Make some helpers for building ethtool netlink messages available
outside the compilation unit, so they can be used for building
messages which are not simple get/set.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Michal Kubecek <mkubecek@suse.cz>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ethtool/netlink.c | 4 ++--
 net/ethtool/netlink.h | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index b9c9ddf408fe..87bc02da74bc 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -181,13 +181,13 @@ err:
 	return NULL;
 }
 
-static void *ethnl_bcastmsg_put(struct sk_buff *skb, u8 cmd)
+void *ethnl_bcastmsg_put(struct sk_buff *skb, u8 cmd)
 {
 	return genlmsg_put(skb, 0, ++ethnl_bcast_seq, &ethtool_genl_family, 0,
 			   cmd);
 }
 
-static int ethnl_multicast(struct sk_buff *skb, struct net_device *dev)
+int ethnl_multicast(struct sk_buff *skb, struct net_device *dev)
 {
 	return genlmsg_multicast_netns(&ethtool_genl_family, dev_net(dev), skb,
 				       0, ETHNL_MCGRP_MONITOR, GFP_KERNEL);
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index bd7df592db2f..b0eb5d920099 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -19,6 +19,8 @@ int ethnl_fill_reply_header(struct sk_buff *skb, struct net_device *dev,
 struct sk_buff *ethnl_reply_init(size_t payload, struct net_device *dev, u8 cmd,
 				 u16 hdr_attrtype, struct genl_info *info,
 				 void **ehdrp);
+void *ethnl_bcastmsg_put(struct sk_buff *skb, u8 cmd);
+int ethnl_multicast(struct sk_buff *skb, struct net_device *dev);
 
 /**
  * ethnl_strz_size() - calculate attribute length for fixed size string
-- 
cgit v1.2.3-59-g8ed1b


From 1dd3f212af30b42c90ba252c165f2f6d2ddf5230 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sun, 10 May 2020 21:12:36 +0200
Subject: net: ethtool: Add infrastructure for reporting cable test results

Provide infrastructure for PHY drivers to report the cable test
results.  A netlink skb is associated to the phydev. Helpers will be
added which can add results to this skb. Once the test has finished
the results are sent to user space.

When netlink ethtool is not part of the kernel configuration stubs are
provided. It is also impossible to trigger a cable test, so the error
code returned by the alloc function is of no consequence.

v2:
Include the status complete in the netlink notification message

v4:
Replace -EINVAL with -EMSGSIZE

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Michal Kubecek <mkubecek@suse.cz>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/phy/phy.c           | 22 +++++++++++++++--
 include/linux/ethtool_netlink.h | 20 +++++++++++++++
 include/linux/phy.h             |  5 ++++
 net/ethtool/cabletest.c         | 55 +++++++++++++++++++++++++++++++++++++++++
 4 files changed, 100 insertions(+), 2 deletions(-)

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 9fa61019533f..afdc1c2146ee 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -22,6 +22,7 @@
 #include <linux/module.h>
 #include <linux/mii.h>
 #include <linux/ethtool.h>
+#include <linux/ethtool_netlink.h>
 #include <linux/phy.h>
 #include <linux/phy_led_triggers.h>
 #include <linux/sfp.h>
@@ -30,6 +31,9 @@
 #include <linux/io.h>
 #include <linux/uaccess.h>
 #include <linux/atomic.h>
+#include <net/netlink.h>
+#include <net/genetlink.h>
+#include <net/sock.h>
 
 #define PHY_STATE_TIME	HZ
 
@@ -478,6 +482,8 @@ static void phy_abort_cable_test(struct phy_device *phydev)
 {
 	int err;
 
+	ethnl_cable_test_finished(phydev);
+
 	err = phy_init_hw(phydev);
 	if (err)
 		phydev_err(phydev, "Error while aborting cable test");
@@ -486,7 +492,7 @@ static void phy_abort_cable_test(struct phy_device *phydev)
 int phy_start_cable_test(struct phy_device *phydev,
 			 struct netlink_ext_ack *extack)
 {
-	int err;
+	int err = -ENOMEM;
 
 	if (!(phydev->drv &&
 	      phydev->drv->cable_test_start &&
@@ -512,19 +518,30 @@ int phy_start_cable_test(struct phy_device *phydev,
 		goto out;
 	}
 
+	err = ethnl_cable_test_alloc(phydev);
+	if (err)
+		goto out;
+
 	/* Mark the carrier down until the test is complete */
 	phy_link_down(phydev, true);
 
 	err = phydev->drv->cable_test_start(phydev);
 	if (err) {
 		phy_link_up(phydev);
-		goto out;
+		goto out_free;
 	}
 
 	phydev->state = PHY_CABLETEST;
 
 	if (phy_polling_mode(phydev))
 		phy_trigger_machine(phydev);
+
+	mutex_unlock(&phydev->lock);
+
+	return 0;
+
+out_free:
+	ethnl_cable_test_free(phydev);
 out:
 	mutex_unlock(&phydev->lock);
 
@@ -964,6 +981,7 @@ void phy_state_machine(struct work_struct *work)
 		}
 
 		if (finished) {
+			ethnl_cable_test_finished(phydev);
 			needs_aneg = true;
 			phydev->state = PHY_UP;
 		}
diff --git a/include/linux/ethtool_netlink.h b/include/linux/ethtool_netlink.h
index d01b77887f82..7d763ba22f6f 100644
--- a/include/linux/ethtool_netlink.h
+++ b/include/linux/ethtool_netlink.h
@@ -14,4 +14,24 @@ enum ethtool_multicast_groups {
 	ETHNL_MCGRP_MONITOR,
 };
 
+struct phy_device;
+
+#if IS_ENABLED(CONFIG_ETHTOOL_NETLINK)
+int ethnl_cable_test_alloc(struct phy_device *phydev);
+void ethnl_cable_test_free(struct phy_device *phydev);
+void ethnl_cable_test_finished(struct phy_device *phydev);
+#else
+static inline int ethnl_cable_test_alloc(struct phy_device *phydev)
+{
+	return -ENOTSUPP;
+}
+
+static inline void ethnl_cable_test_free(struct phy_device *phydev)
+{
+}
+
+static inline void ethnl_cable_test_finished(struct phy_device *phydev)
+{
+}
+#endif /* IS_ENABLED(ETHTOOL_NETLINK) */
 #endif /* _LINUX_ETHTOOL_NETLINK_H_ */
diff --git a/include/linux/phy.h b/include/linux/phy.h
index f58eee735a45..169fae4249a9 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -523,6 +523,11 @@ struct phy_device {
 	/* For use by PHYs inside the same package that need a shared state. */
 	struct phy_package_shared *shared;
 
+	/* Reporting cable test results */
+	struct sk_buff *skb;
+	void *ehdr;
+	struct nlattr *nest;
+
 	/* Interrupt and Polling infrastructure */
 	struct delayed_work state_queue;
 
diff --git a/net/ethtool/cabletest.c b/net/ethtool/cabletest.c
index aeb6672a46d0..ae8e63647663 100644
--- a/net/ethtool/cabletest.c
+++ b/net/ethtool/cabletest.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only
 
 #include <linux/phy.h>
+#include <linux/ethtool_netlink.h>
 #include "netlink.h"
 #include "common.h"
 
@@ -52,3 +53,57 @@ out_dev_put:
 	dev_put(dev);
 	return ret;
 }
+
+int ethnl_cable_test_alloc(struct phy_device *phydev)
+{
+	int err = -ENOMEM;
+
+	phydev->skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!phydev->skb)
+		goto out;
+
+	phydev->ehdr = ethnl_bcastmsg_put(phydev->skb,
+					  ETHTOOL_MSG_CABLE_TEST_NTF);
+	if (!phydev->ehdr) {
+		err = -EMSGSIZE;
+		goto out;
+	}
+
+	err = ethnl_fill_reply_header(phydev->skb, phydev->attached_dev,
+				      ETHTOOL_A_CABLE_TEST_NTF_HEADER);
+	if (err)
+		goto out;
+
+	err = nla_put_u8(phydev->skb, ETHTOOL_A_CABLE_TEST_NTF_STATUS,
+			 ETHTOOL_A_CABLE_TEST_NTF_STATUS_COMPLETED);
+	if (err)
+		goto out;
+
+	phydev->nest = nla_nest_start(phydev->skb,
+				      ETHTOOL_A_CABLE_TEST_NTF_NEST);
+	if (!phydev->nest)
+		goto out;
+
+	return 0;
+
+out:
+	nlmsg_free(phydev->skb);
+	return err;
+}
+EXPORT_SYMBOL_GPL(ethnl_cable_test_alloc);
+
+void ethnl_cable_test_free(struct phy_device *phydev)
+{
+	nlmsg_free(phydev->skb);
+}
+EXPORT_SYMBOL_GPL(ethnl_cable_test_free);
+
+void ethnl_cable_test_finished(struct phy_device *phydev)
+{
+	nla_nest_end(phydev->skb, phydev->nest);
+
+	genlmsg_end(phydev->skb, phydev->ehdr);
+
+	ethnl_multicast(phydev->skb, phydev->attached_dev);
+}
+EXPORT_SYMBOL_GPL(ethnl_cable_test_finished);
-- 
cgit v1.2.3-59-g8ed1b


From 1e2dc14509fd072739e4bab98ac42317267dbad6 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sun, 10 May 2020 21:12:37 +0200
Subject: net: ethtool: Add helpers for reporting test results

The PHY drivers can use these helpers for reporting the results. The
results get translated into netlink attributes which are added to the
pre-allocated skbuf.

v3:
Poison phydev->skb
Return -EMSGSIZE when ethnl_bcastmsg_put() fails
Return valid error code when nla_nest_start() fails
Use u8 for results
Actually put u32 length into message

v4:
s/ENOTSUPP/EOPNOTSUPP/g

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Michal Kubecek <mkubecek@suse.cz>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/linux/ethtool_netlink.h | 15 +++++++++++-
 include/linux/phy.h             |  4 ++++
 net/ethtool/cabletest.c         | 53 ++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 70 insertions(+), 2 deletions(-)

diff --git a/include/linux/ethtool_netlink.h b/include/linux/ethtool_netlink.h
index 7d763ba22f6f..e317fc99565e 100644
--- a/include/linux/ethtool_netlink.h
+++ b/include/linux/ethtool_netlink.h
@@ -20,10 +20,12 @@ struct phy_device;
 int ethnl_cable_test_alloc(struct phy_device *phydev);
 void ethnl_cable_test_free(struct phy_device *phydev);
 void ethnl_cable_test_finished(struct phy_device *phydev);
+int ethnl_cable_test_result(struct phy_device *phydev, u8 pair, u8 result);
+int ethnl_cable_test_fault_length(struct phy_device *phydev, u8 pair, u32 cm);
 #else
 static inline int ethnl_cable_test_alloc(struct phy_device *phydev)
 {
-	return -ENOTSUPP;
+	return -EOPNOTSUPP;
 }
 
 static inline void ethnl_cable_test_free(struct phy_device *phydev)
@@ -33,5 +35,16 @@ static inline void ethnl_cable_test_free(struct phy_device *phydev)
 static inline void ethnl_cable_test_finished(struct phy_device *phydev)
 {
 }
+static inline int ethnl_cable_test_result(struct phy_device *phydev, u8 pair,
+					  u8 result)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int ethnl_cable_test_fault_length(struct phy_device *phydev,
+						u8 pair, u32 cm)
+{
+	return -EOPNOTSUPP;
+}
 #endif /* IS_ENABLED(ETHTOOL_NETLINK) */
 #endif /* _LINUX_ETHTOOL_NETLINK_H_ */
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 169fae4249a9..5d8ff5428010 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -1265,6 +1265,10 @@ int phy_start_cable_test(struct phy_device *phydev,
 }
 #endif
 
+int phy_cable_test_result(struct phy_device *phydev, u8 pair, u16 result);
+int phy_cable_test_fault_length(struct phy_device *phydev, u8 pair,
+				u16 cm);
+
 static inline void phy_device_reset(struct phy_device *phydev, int value)
 {
 	mdio_device_reset(&phydev->mdio, value);
diff --git a/net/ethtool/cabletest.c b/net/ethtool/cabletest.c
index ae8e63647663..e0c917918c70 100644
--- a/net/ethtool/cabletest.c
+++ b/net/ethtool/cabletest.c
@@ -81,13 +81,16 @@ int ethnl_cable_test_alloc(struct phy_device *phydev)
 
 	phydev->nest = nla_nest_start(phydev->skb,
 				      ETHTOOL_A_CABLE_TEST_NTF_NEST);
-	if (!phydev->nest)
+	if (!phydev->nest) {
+		err = -EMSGSIZE;
 		goto out;
+	}
 
 	return 0;
 
 out:
 	nlmsg_free(phydev->skb);
+	phydev->skb = NULL;
 	return err;
 }
 EXPORT_SYMBOL_GPL(ethnl_cable_test_alloc);
@@ -95,6 +98,7 @@ EXPORT_SYMBOL_GPL(ethnl_cable_test_alloc);
 void ethnl_cable_test_free(struct phy_device *phydev)
 {
 	nlmsg_free(phydev->skb);
+	phydev->skb = NULL;
 }
 EXPORT_SYMBOL_GPL(ethnl_cable_test_free);
 
@@ -107,3 +111,50 @@ void ethnl_cable_test_finished(struct phy_device *phydev)
 	ethnl_multicast(phydev->skb, phydev->attached_dev);
 }
 EXPORT_SYMBOL_GPL(ethnl_cable_test_finished);
+
+int ethnl_cable_test_result(struct phy_device *phydev, u8 pair, u8 result)
+{
+	struct nlattr *nest;
+	int ret = -EMSGSIZE;
+
+	nest = nla_nest_start(phydev->skb, ETHTOOL_A_CABLE_NEST_RESULT);
+	if (!nest)
+		return -EMSGSIZE;
+
+	if (nla_put_u8(phydev->skb, ETHTOOL_A_CABLE_RESULT_PAIR, pair))
+		goto err;
+	if (nla_put_u8(phydev->skb, ETHTOOL_A_CABLE_RESULT_CODE, result))
+		goto err;
+
+	nla_nest_end(phydev->skb, nest);
+	return 0;
+
+err:
+	nla_nest_cancel(phydev->skb, nest);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ethnl_cable_test_result);
+
+int ethnl_cable_test_fault_length(struct phy_device *phydev, u8 pair, u32 cm)
+{
+	struct nlattr *nest;
+	int ret = -EMSGSIZE;
+
+	nest = nla_nest_start(phydev->skb,
+			      ETHTOOL_A_CABLE_NEST_FAULT_LENGTH);
+	if (!nest)
+		return -EMSGSIZE;
+
+	if (nla_put_u8(phydev->skb, ETHTOOL_A_CABLE_FAULT_LENGTH_PAIR, pair))
+		goto err;
+	if (nla_put_u32(phydev->skb, ETHTOOL_A_CABLE_FAULT_LENGTH_CM, cm))
+		goto err;
+
+	nla_nest_end(phydev->skb, nest);
+	return 0;
+
+err:
+	nla_nest_cancel(phydev->skb, nest);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ethnl_cable_test_fault_length);
-- 
cgit v1.2.3-59-g8ed1b


From fc879f723c2a938bcb69d5ae49ec0dba10c1ef97 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sun, 10 May 2020 21:12:38 +0200
Subject: net: phy: marvell: Add cable test support

The Marvell PHYs have a couple of different register sets for
performing cable tests. Page 7 provides the simplest to use.

v3:
s/mavell/marvell/g
Remove include of <uapi/linux/ethtool_netlink.h>

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/phy/marvell.c | 201 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 201 insertions(+)

diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 7fc8e10c5f33..4bc7febf9248 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -27,6 +27,7 @@
 #include <linux/module.h>
 #include <linux/mii.h>
 #include <linux/ethtool.h>
+#include <linux/ethtool_netlink.h>
 #include <linux/phy.h>
 #include <linux/marvell_phy.h>
 #include <linux/bitfield.h>
@@ -42,6 +43,7 @@
 #define MII_MARVELL_MSCR_PAGE		0x02
 #define MII_MARVELL_LED_PAGE		0x03
 #define MII_MARVELL_MISC_TEST_PAGE	0x06
+#define MII_MARVELL_VCT7_PAGE		0x07
 #define MII_MARVELL_WOL_PAGE		0x11
 
 #define MII_M1011_IEVENT		0x13
@@ -162,6 +164,36 @@
 #define MII_88E1510_GEN_CTRL_REG_1_MODE_SGMII	0x1	/* SGMII to copper */
 #define MII_88E1510_GEN_CTRL_REG_1_RESET	0x8000	/* Soft reset */
 
+#define MII_VCT7_PAIR_0_DISTANCE	0x10
+#define MII_VCT7_PAIR_1_DISTANCE	0x11
+#define MII_VCT7_PAIR_2_DISTANCE	0x12
+#define MII_VCT7_PAIR_3_DISTANCE	0x13
+
+#define MII_VCT7_RESULTS	0x14
+#define MII_VCT7_RESULTS_PAIR3_MASK	0xf000
+#define MII_VCT7_RESULTS_PAIR2_MASK	0x0f00
+#define MII_VCT7_RESULTS_PAIR1_MASK	0x00f0
+#define MII_VCT7_RESULTS_PAIR0_MASK	0x000f
+#define MII_VCT7_RESULTS_PAIR3_SHIFT	12
+#define MII_VCT7_RESULTS_PAIR2_SHIFT	8
+#define MII_VCT7_RESULTS_PAIR1_SHIFT	4
+#define MII_VCT7_RESULTS_PAIR0_SHIFT	0
+#define MII_VCT7_RESULTS_INVALID	0
+#define MII_VCT7_RESULTS_OK		1
+#define MII_VCT7_RESULTS_OPEN		2
+#define MII_VCT7_RESULTS_SAME_SHORT	3
+#define MII_VCT7_RESULTS_CROSS_SHORT	4
+#define MII_VCT7_RESULTS_BUSY		9
+
+#define MII_VCT7_CTRL		0x15
+#define MII_VCT7_CTRL_RUN_NOW			BIT(15)
+#define MII_VCT7_CTRL_RUN_ANEG			BIT(14)
+#define MII_VCT7_CTRL_DISABLE_CROSS		BIT(13)
+#define MII_VCT7_CTRL_RUN_AFTER_BREAK_LINK	BIT(12)
+#define MII_VCT7_CTRL_IN_PROGRESS		BIT(11)
+#define MII_VCT7_CTRL_METERS			BIT(10)
+#define MII_VCT7_CTRL_CENTIMETERS		0
+
 #define LPA_PAUSE_FIBER		0x180
 #define LPA_PAUSE_ASYM_FIBER	0x100
 
@@ -1658,6 +1690,163 @@ static void marvell_get_stats(struct phy_device *phydev,
 		data[i] = marvell_get_stat(phydev, i);
 }
 
+static int marvell_vct7_cable_test_start(struct phy_device *phydev)
+{
+	int bmcr, bmsr, ret;
+
+	/* If auto-negotiation is enabled, but not complete, the cable
+	 * test never completes. So disable auto-neg.
+	 */
+	bmcr = phy_read(phydev, MII_BMCR);
+	if (bmcr < 0)
+		return bmcr;
+
+	bmsr = phy_read(phydev, MII_BMSR);
+
+	if (bmsr < 0)
+		return bmsr;
+
+	if (bmcr & BMCR_ANENABLE) {
+		ret =  phy_modify(phydev, MII_BMCR, BMCR_ANENABLE, 0);
+		if (ret < 0)
+			return ret;
+		ret = genphy_soft_reset(phydev);
+		if (ret < 0)
+			return ret;
+	}
+
+	/* If the link is up, allow it some time to go down */
+	if (bmsr & BMSR_LSTATUS)
+		msleep(1500);
+
+	return phy_write_paged(phydev, MII_MARVELL_VCT7_PAGE,
+			       MII_VCT7_CTRL,
+			       MII_VCT7_CTRL_RUN_NOW |
+			       MII_VCT7_CTRL_CENTIMETERS);
+}
+
+static int marvell_vct7_distance_to_length(int distance, bool meter)
+{
+	if (meter)
+		distance *= 100;
+
+	return distance;
+}
+
+static bool marvell_vct7_distance_valid(int result)
+{
+	switch (result) {
+	case MII_VCT7_RESULTS_OPEN:
+	case MII_VCT7_RESULTS_SAME_SHORT:
+	case MII_VCT7_RESULTS_CROSS_SHORT:
+		return true;
+	}
+	return false;
+}
+
+static int marvell_vct7_report_length(struct phy_device *phydev,
+				      int pair, bool meter)
+{
+	int length;
+	int ret;
+
+	ret = phy_read_paged(phydev, MII_MARVELL_VCT7_PAGE,
+			     MII_VCT7_PAIR_0_DISTANCE + pair);
+	if (ret < 0)
+		return ret;
+
+	length = marvell_vct7_distance_to_length(ret, meter);
+
+	ethnl_cable_test_fault_length(phydev, pair, length);
+
+	return 0;
+}
+
+static int marvell_vct7_cable_test_report_trans(int result)
+{
+	switch (result) {
+	case MII_VCT7_RESULTS_OK:
+		return ETHTOOL_A_CABLE_RESULT_CODE_OK;
+	case MII_VCT7_RESULTS_OPEN:
+		return ETHTOOL_A_CABLE_RESULT_CODE_OPEN;
+	case MII_VCT7_RESULTS_SAME_SHORT:
+		return ETHTOOL_A_CABLE_RESULT_CODE_SAME_SHORT;
+	case MII_VCT7_RESULTS_CROSS_SHORT:
+		return ETHTOOL_A_CABLE_RESULT_CODE_CROSS_SHORT;
+	default:
+		return ETHTOOL_A_CABLE_RESULT_CODE_UNSPEC;
+	}
+}
+
+static int marvell_vct7_cable_test_report(struct phy_device *phydev)
+{
+	int pair0, pair1, pair2, pair3;
+	bool meter;
+	int ret;
+
+	ret = phy_read_paged(phydev, MII_MARVELL_VCT7_PAGE,
+			     MII_VCT7_RESULTS);
+	if (ret < 0)
+		return ret;
+
+	pair3 = (ret & MII_VCT7_RESULTS_PAIR3_MASK) >>
+		MII_VCT7_RESULTS_PAIR3_SHIFT;
+	pair2 = (ret & MII_VCT7_RESULTS_PAIR2_MASK) >>
+		MII_VCT7_RESULTS_PAIR2_SHIFT;
+	pair1 = (ret & MII_VCT7_RESULTS_PAIR1_MASK) >>
+		MII_VCT7_RESULTS_PAIR1_SHIFT;
+	pair0 = (ret & MII_VCT7_RESULTS_PAIR0_MASK) >>
+		MII_VCT7_RESULTS_PAIR0_SHIFT;
+
+	ethnl_cable_test_result(phydev, ETHTOOL_A_CABLE_PAIR_A,
+				marvell_vct7_cable_test_report_trans(pair0));
+	ethnl_cable_test_result(phydev, ETHTOOL_A_CABLE_PAIR_B,
+				marvell_vct7_cable_test_report_trans(pair1));
+	ethnl_cable_test_result(phydev, ETHTOOL_A_CABLE_PAIR_C,
+				marvell_vct7_cable_test_report_trans(pair2));
+	ethnl_cable_test_result(phydev, ETHTOOL_A_CABLE_PAIR_D,
+				marvell_vct7_cable_test_report_trans(pair3));
+
+	ret = phy_read_paged(phydev, MII_MARVELL_VCT7_PAGE, MII_VCT7_CTRL);
+	if (ret < 0)
+		return ret;
+
+	meter = ret & MII_VCT7_CTRL_METERS;
+
+	if (marvell_vct7_distance_valid(pair0))
+		marvell_vct7_report_length(phydev, 0, meter);
+	if (marvell_vct7_distance_valid(pair1))
+		marvell_vct7_report_length(phydev, 1, meter);
+	if (marvell_vct7_distance_valid(pair2))
+		marvell_vct7_report_length(phydev, 2, meter);
+	if (marvell_vct7_distance_valid(pair3))
+		marvell_vct7_report_length(phydev, 3, meter);
+
+	return 0;
+}
+
+static int marvell_vct7_cable_test_get_status(struct phy_device *phydev,
+					      bool *finished)
+{
+	int ret;
+
+	*finished = false;
+
+	ret = phy_read_paged(phydev, MII_MARVELL_VCT7_PAGE,
+			     MII_VCT7_CTRL);
+
+	if (ret < 0)
+		return ret;
+
+	if (!(ret & MII_VCT7_CTRL_IN_PROGRESS)) {
+		*finished = true;
+
+		return marvell_vct7_cable_test_report(phydev);
+	}
+
+	return 0;
+}
+
 #ifdef CONFIG_HWMON
 static int m88e1121_get_temp(struct phy_device *phydev, long *temp)
 {
@@ -2353,6 +2542,7 @@ static struct phy_driver marvell_drivers[] = {
 		.phy_id_mask = MARVELL_PHY_ID_MASK,
 		.name = "Marvell 88E1510",
 		.features = PHY_GBIT_FIBRE_FEATURES,
+		.flags = PHY_POLL_CABLE_TEST,
 		.probe = &m88e1510_probe,
 		.config_init = &m88e1510_config_init,
 		.config_aneg = &m88e1510_config_aneg,
@@ -2372,12 +2562,15 @@ static struct phy_driver marvell_drivers[] = {
 		.set_loopback = genphy_loopback,
 		.get_tunable = m88e1011_get_tunable,
 		.set_tunable = m88e1011_set_tunable,
+		.cable_test_start = marvell_vct7_cable_test_start,
+		.cable_test_get_status = marvell_vct7_cable_test_get_status,
 	},
 	{
 		.phy_id = MARVELL_PHY_ID_88E1540,
 		.phy_id_mask = MARVELL_PHY_ID_MASK,
 		.name = "Marvell 88E1540",
 		/* PHY_GBIT_FEATURES */
+		.flags = PHY_POLL_CABLE_TEST,
 		.probe = m88e1510_probe,
 		.config_init = &marvell_config_init,
 		.config_aneg = &m88e1510_config_aneg,
@@ -2394,6 +2587,8 @@ static struct phy_driver marvell_drivers[] = {
 		.get_stats = marvell_get_stats,
 		.get_tunable = m88e1540_get_tunable,
 		.set_tunable = m88e1540_set_tunable,
+		.cable_test_start = marvell_vct7_cable_test_start,
+		.cable_test_get_status = marvell_vct7_cable_test_get_status,
 	},
 	{
 		.phy_id = MARVELL_PHY_ID_88E1545,
@@ -2401,6 +2596,7 @@ static struct phy_driver marvell_drivers[] = {
 		.name = "Marvell 88E1545",
 		.probe = m88e1510_probe,
 		/* PHY_GBIT_FEATURES */
+		.flags = PHY_POLL_CABLE_TEST,
 		.config_init = &marvell_config_init,
 		.config_aneg = &m88e1510_config_aneg,
 		.read_status = &marvell_read_status,
@@ -2416,6 +2612,8 @@ static struct phy_driver marvell_drivers[] = {
 		.get_stats = marvell_get_stats,
 		.get_tunable = m88e1540_get_tunable,
 		.set_tunable = m88e1540_set_tunable,
+		.cable_test_start = marvell_vct7_cable_test_start,
+		.cable_test_get_status = marvell_vct7_cable_test_get_status,
 	},
 	{
 		.phy_id = MARVELL_PHY_ID_88E3016,
@@ -2442,6 +2640,7 @@ static struct phy_driver marvell_drivers[] = {
 		.phy_id_mask = MARVELL_PHY_ID_MASK,
 		.name = "Marvell 88E6390",
 		/* PHY_GBIT_FEATURES */
+		.flags = PHY_POLL_CABLE_TEST,
 		.probe = m88e6390_probe,
 		.config_init = &marvell_config_init,
 		.config_aneg = &m88e6390_config_aneg,
@@ -2458,6 +2657,8 @@ static struct phy_driver marvell_drivers[] = {
 		.get_stats = marvell_get_stats,
 		.get_tunable = m88e1540_get_tunable,
 		.set_tunable = m88e1540_set_tunable,
+		.cable_test_start = marvell_vct7_cable_test_start,
+		.cable_test_get_status = marvell_vct7_cable_test_get_status,
 	},
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From 4a459bdc7472b0e6bea6d0dd8df66253ac4f3fe2 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sun, 10 May 2020 21:12:39 +0200
Subject: net: phy: Put interface into oper testing during cable test

Since running a cable test is disruptive, put the interface into
operative state testing while the test is running.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Michal Kubecek <mkubecek@suse.cz>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/phy/phy.c | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index afdc1c2146ee..9bdc924eea83 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -492,6 +492,7 @@ static void phy_abort_cable_test(struct phy_device *phydev)
 int phy_start_cable_test(struct phy_device *phydev,
 			 struct netlink_ext_ack *extack)
 {
+	struct net_device *dev = phydev->attached_dev;
 	int err = -ENOMEM;
 
 	if (!(phydev->drv &&
@@ -525,8 +526,10 @@ int phy_start_cable_test(struct phy_device *phydev,
 	/* Mark the carrier down until the test is complete */
 	phy_link_down(phydev, true);
 
+	netif_testing_on(dev);
 	err = phydev->drv->cable_test_start(phydev);
 	if (err) {
+		netif_testing_off(dev);
 		phy_link_up(phydev);
 		goto out_free;
 	}
@@ -879,6 +882,8 @@ EXPORT_SYMBOL(phy_free_interrupt);
  */
 void phy_stop(struct phy_device *phydev)
 {
+	struct net_device *dev = phydev->attached_dev;
+
 	if (!phy_is_started(phydev)) {
 		WARN(1, "called from state %s\n",
 		     phy_state_to_str(phydev->state));
@@ -887,8 +892,10 @@ void phy_stop(struct phy_device *phydev)
 
 	mutex_lock(&phydev->lock);
 
-	if (phydev->state == PHY_CABLETEST)
+	if (phydev->state == PHY_CABLETEST) {
 		phy_abort_cable_test(phydev);
+		netif_testing_off(dev);
+	}
 
 	if (phydev->sfp_bus)
 		sfp_upstream_stop(phydev->sfp_bus);
@@ -950,6 +957,7 @@ void phy_state_machine(struct work_struct *work)
 	struct delayed_work *dwork = to_delayed_work(work);
 	struct phy_device *phydev =
 			container_of(dwork, struct phy_device, state_queue);
+	struct net_device *dev = phydev->attached_dev;
 	bool needs_aneg = false, do_suspend = false;
 	enum phy_state old_state;
 	bool finished = false;
@@ -975,6 +983,7 @@ void phy_state_machine(struct work_struct *work)
 		err = phydev->drv->cable_test_get_status(phydev, &finished);
 		if (err) {
 			phy_abort_cable_test(phydev);
+			netif_testing_off(dev);
 			needs_aneg = true;
 			phydev->state = PHY_UP;
 			break;
@@ -982,6 +991,7 @@ void phy_state_machine(struct work_struct *work)
 
 		if (finished) {
 			ethnl_cable_test_finished(phydev);
+			netif_testing_off(dev);
 			needs_aneg = true;
 			phydev->state = PHY_UP;
 		}
-- 
cgit v1.2.3-59-g8ed1b


From 9896a4574ecb137d4e5b9283004aa34c688bc761 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Sun, 10 May 2020 21:12:40 +0200
Subject: net: phy: Send notifier when starting the cable test

Given that it takes time to run a cable test, send a notify message at
the start, as well as when it is completed.

v3:
EMSGSIZE when ethnl_bcastmsg_put() fails
Print an error message on failure, since this is a void function.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Michal Kubecek <mkubecek@suse.cz>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 net/ethtool/cabletest.c | 41 +++++++++++++++++++++++++++++++++++++++++
 1 file changed, 41 insertions(+)

diff --git a/net/ethtool/cabletest.c b/net/ethtool/cabletest.c
index e0c917918c70..5ba06eabe8c2 100644
--- a/net/ethtool/cabletest.c
+++ b/net/ethtool/cabletest.c
@@ -13,6 +13,43 @@ cable_test_act_policy[ETHTOOL_A_CABLE_TEST_MAX + 1] = {
 	[ETHTOOL_A_CABLE_TEST_HEADER]		= { .type = NLA_NESTED },
 };
 
+static int ethnl_cable_test_started(struct phy_device *phydev)
+{
+	struct sk_buff *skb;
+	int err = -ENOMEM;
+	void *ehdr;
+
+	skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	if (!skb)
+		goto out;
+
+	ehdr = ethnl_bcastmsg_put(skb, ETHTOOL_MSG_CABLE_TEST_NTF);
+	if (!ehdr) {
+		err = -EMSGSIZE;
+		goto out;
+	}
+
+	err = ethnl_fill_reply_header(skb, phydev->attached_dev,
+				      ETHTOOL_A_CABLE_TEST_NTF_HEADER);
+	if (err)
+		goto out;
+
+	err = nla_put_u8(skb, ETHTOOL_A_CABLE_TEST_NTF_STATUS,
+			 ETHTOOL_A_CABLE_TEST_NTF_STATUS_STARTED);
+	if (err)
+		goto out;
+
+	genlmsg_end(skb, ehdr);
+
+	return ethnl_multicast(skb, phydev->attached_dev);
+
+out:
+	nlmsg_free(skb);
+	phydev_err(phydev, "%s: Error %pe\n", __func__, ERR_PTR(err));
+
+	return err;
+}
+
 int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info)
 {
 	struct nlattr *tb[ETHTOOL_A_CABLE_TEST_MAX + 1];
@@ -47,6 +84,10 @@ int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info)
 	ret = phy_start_cable_test(dev->phydev, info->extack);
 
 	ethnl_ops_complete(dev);
+
+	if (!ret)
+		ethnl_cable_test_started(dev->phydev);
+
 out_rtnl:
 	rtnl_unlock();
 out_dev_put:
-- 
cgit v1.2.3-59-g8ed1b


From b6ca09cb156d349e6fdde8a8466ec15b902d1419 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Thu, 7 May 2020 13:59:35 -0500
Subject: net/mlx5: Replace zero-length array with flexible-array

The current codebase makes use of the zero-length array language
extension to the C90 standard, but the preferred mechanism to declare
variable-length types such as these ones is a flexible array member[1][2],
introduced in C99:

struct foo {
        int stuff;
        struct boo array[];
};

By making use of the mechanism above, we will get a compiler warning
in case the flexible array does not occur last in the structure, which
will help us prevent some kind of undefined behavior bugs from being
inadvertently introduced[3] to the codebase from now on.

Also, notice that, dynamic memory allocations won't be affected by
this change:

"Flexible array members have incomplete type, and so the sizeof operator
may not be applied. As a quirk of the original implementation of
zero-length arrays, sizeof evaluates to zero."[1]

sizeof(flexible-array-member) triggers a warning because flexible array
members have incomplete type[1]. There are some instances of code in
which the sizeof operator is being incorrectly/erroneously applied to
zero-length arrays and the result is zero. Such instances may be hiding
some bugs. So, this work (flexible-array member conversions) will also
help to get completely rid of those sorts of issues.

This issue was found with the help of Coccinelle.

[1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html
[2] https://github.com/KSPP/linux/issues/21
[3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour")

Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/driver.h   |  2 +-
 include/linux/mlx5/mlx5_ifc.h | 66 +++++++++++++++++++++----------------------
 include/linux/mlx5/qp.h       |  2 +-
 3 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 267dfcc5493e..24e04901f92e 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -201,7 +201,7 @@ struct mlx5_rsc_debug {
 	void		       *object;
 	enum dbg_rsc_type	type;
 	struct dentry	       *root;
-	struct mlx5_field_desc	fields[0];
+	struct mlx5_field_desc	fields[];
 };
 
 enum mlx5_dev_event {
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index fb243848132d..c9dd6e99ad56 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1703,7 +1703,7 @@ struct mlx5_ifc_wq_bits {
 
 	u8         reserved_at_140[0x4c0];
 
-	struct mlx5_ifc_cmd_pas_bits pas[0];
+	struct mlx5_ifc_cmd_pas_bits pas[];
 };
 
 struct mlx5_ifc_rq_num_bits {
@@ -1921,7 +1921,7 @@ struct mlx5_ifc_resource_dump_menu_segment_bits {
 	u8         reserved_at_20[0x10];
 	u8         num_of_records[0x10];
 
-	struct mlx5_ifc_resource_dump_menu_record_bits record[0];
+	struct mlx5_ifc_resource_dump_menu_record_bits record[];
 };
 
 struct mlx5_ifc_resource_dump_resource_segment_bits {
@@ -1933,7 +1933,7 @@ struct mlx5_ifc_resource_dump_resource_segment_bits {
 
 	u8         index2[0x20];
 
-	u8         payload[0][0x20];
+	u8         payload[][0x20];
 };
 
 struct mlx5_ifc_resource_dump_terminate_segment_bits {
@@ -3010,7 +3010,7 @@ struct mlx5_ifc_flow_context_bits {
 
 	u8         reserved_at_1200[0x600];
 
-	union mlx5_ifc_dest_format_struct_flow_counter_list_auto_bits destination[0];
+	union mlx5_ifc_dest_format_struct_flow_counter_list_auto_bits destination[];
 };
 
 enum {
@@ -3303,7 +3303,7 @@ struct mlx5_ifc_rqtc_bits {
 
 	u8         reserved_at_e0[0x6a0];
 
-	struct mlx5_ifc_rq_num_bits rq_num[0];
+	struct mlx5_ifc_rq_num_bits rq_num[];
 };
 
 enum {
@@ -3415,7 +3415,7 @@ struct mlx5_ifc_nic_vport_context_bits {
 
 	u8         reserved_at_7e0[0x20];
 
-	u8         current_uc_mac_address[0][0x40];
+	u8         current_uc_mac_address[][0x40];
 };
 
 enum {
@@ -4338,7 +4338,7 @@ struct mlx5_ifc_query_xrc_srq_out_bits {
 
 	u8         reserved_at_280[0x600];
 
-	u8         pas[0][0x40];
+	u8         pas[][0x40];
 };
 
 struct mlx5_ifc_query_xrc_srq_in_bits {
@@ -4616,7 +4616,7 @@ struct mlx5_ifc_query_srq_out_bits {
 
 	u8         reserved_at_280[0x600];
 
-	u8         pas[0][0x40];
+	u8         pas[][0x40];
 };
 
 struct mlx5_ifc_query_srq_in_bits {
@@ -4827,7 +4827,7 @@ struct mlx5_ifc_query_qp_out_bits {
 
 	u8         reserved_at_800[0x80];
 
-	u8         pas[0][0x40];
+	u8         pas[][0x40];
 };
 
 struct mlx5_ifc_query_qp_in_bits {
@@ -5160,7 +5160,7 @@ struct mlx5_ifc_query_hca_vport_pkey_out_bits {
 
 	u8         reserved_at_40[0x40];
 
-	struct mlx5_ifc_pkey_bits pkey[0];
+	struct mlx5_ifc_pkey_bits pkey[];
 };
 
 struct mlx5_ifc_query_hca_vport_pkey_in_bits {
@@ -5196,7 +5196,7 @@ struct mlx5_ifc_query_hca_vport_gid_out_bits {
 	u8         gids_num[0x10];
 	u8         reserved_at_70[0x10];
 
-	struct mlx5_ifc_array128_auto_bits gid[0];
+	struct mlx5_ifc_array128_auto_bits gid[];
 };
 
 struct mlx5_ifc_query_hca_vport_gid_in_bits {
@@ -5464,7 +5464,7 @@ struct mlx5_ifc_query_flow_counter_out_bits {
 
 	u8         reserved_at_40[0x40];
 
-	struct mlx5_ifc_traffic_counter_bits flow_statistics[0];
+	struct mlx5_ifc_traffic_counter_bits flow_statistics[];
 };
 
 struct mlx5_ifc_query_flow_counter_in_bits {
@@ -5558,7 +5558,7 @@ struct mlx5_ifc_query_eq_out_bits {
 
 	u8         reserved_at_300[0x580];
 
-	u8         pas[0][0x40];
+	u8         pas[][0x40];
 };
 
 struct mlx5_ifc_query_eq_in_bits {
@@ -5583,7 +5583,7 @@ struct mlx5_ifc_packet_reformat_context_in_bits {
 	u8         reserved_at_20[0x10];
 	u8         reformat_data[2][0x8];
 
-	u8         more_reformat_data[0][0x8];
+	u8         more_reformat_data[][0x8];
 };
 
 struct mlx5_ifc_query_packet_reformat_context_out_bits {
@@ -5594,7 +5594,7 @@ struct mlx5_ifc_query_packet_reformat_context_out_bits {
 
 	u8         reserved_at_40[0xa0];
 
-	struct mlx5_ifc_packet_reformat_context_in_bits packet_reformat_context[0];
+	struct mlx5_ifc_packet_reformat_context_in_bits packet_reformat_context[];
 };
 
 struct mlx5_ifc_query_packet_reformat_context_in_bits {
@@ -5833,7 +5833,7 @@ struct mlx5_ifc_query_cq_out_bits {
 
 	u8         reserved_at_280[0x600];
 
-	u8         pas[0][0x40];
+	u8         pas[][0x40];
 };
 
 struct mlx5_ifc_query_cq_in_bits {
@@ -6440,7 +6440,7 @@ struct mlx5_ifc_modify_cq_in_bits {
 
 	u8         reserved_at_300[0x580];
 
-	u8         pas[0][0x40];
+	u8         pas[][0x40];
 };
 
 struct mlx5_ifc_modify_cong_status_out_bits {
@@ -6504,7 +6504,7 @@ struct mlx5_ifc_manage_pages_out_bits {
 
 	u8         reserved_at_60[0x20];
 
-	u8         pas[0][0x40];
+	u8         pas[][0x40];
 };
 
 enum {
@@ -6526,7 +6526,7 @@ struct mlx5_ifc_manage_pages_in_bits {
 
 	u8         input_num_entries[0x20];
 
-	u8         pas[0][0x40];
+	u8         pas[][0x40];
 };
 
 struct mlx5_ifc_mad_ifc_out_bits {
@@ -7481,7 +7481,7 @@ struct mlx5_ifc_create_xrc_srq_in_bits {
 
 	u8         reserved_at_300[0x580];
 
-	u8         pas[0][0x40];
+	u8         pas[][0x40];
 };
 
 struct mlx5_ifc_create_tis_out_bits {
@@ -7557,7 +7557,7 @@ struct mlx5_ifc_create_srq_in_bits {
 
 	u8         reserved_at_280[0x600];
 
-	u8         pas[0][0x40];
+	u8         pas[][0x40];
 };
 
 struct mlx5_ifc_create_sq_out_bits {
@@ -7718,7 +7718,7 @@ struct mlx5_ifc_create_qp_in_bits {
 	u8         wq_umem_valid[0x1];
 	u8         reserved_at_861[0x1f];
 
-	u8         pas[0][0x40];
+	u8         pas[][0x40];
 };
 
 struct mlx5_ifc_create_psv_out_bits {
@@ -7789,7 +7789,7 @@ struct mlx5_ifc_create_mkey_in_bits {
 
 	u8         reserved_at_320[0x560];
 
-	u8         klm_pas_mtt[0][0x20];
+	u8         klm_pas_mtt[][0x20];
 };
 
 enum {
@@ -7922,7 +7922,7 @@ struct mlx5_ifc_create_eq_in_bits {
 
 	u8         reserved_at_3c0[0x4c0];
 
-	u8         pas[0][0x40];
+	u8         pas[][0x40];
 };
 
 struct mlx5_ifc_create_dct_out_bits {
@@ -7979,7 +7979,7 @@ struct mlx5_ifc_create_cq_in_bits {
 	u8         cq_umem_valid[0x1];
 	u8         reserved_at_2e1[0x59f];
 
-	u8         pas[0][0x40];
+	u8         pas[][0x40];
 };
 
 struct mlx5_ifc_config_int_moderation_out_bits {
@@ -8335,7 +8335,7 @@ struct mlx5_ifc_access_register_out_bits {
 
 	u8         reserved_at_40[0x40];
 
-	u8         register_data[0][0x20];
+	u8         register_data[][0x20];
 };
 
 enum {
@@ -8355,7 +8355,7 @@ struct mlx5_ifc_access_register_in_bits {
 
 	u8         argument[0x20];
 
-	u8         register_data[0][0x20];
+	u8         register_data[][0x20];
 };
 
 struct mlx5_ifc_sltp_reg_bits {
@@ -9372,7 +9372,7 @@ struct mlx5_ifc_cmd_in_bits {
 	u8         reserved_at_20[0x10];
 	u8         op_mod[0x10];
 
-	u8         command[0][0x20];
+	u8         command[][0x20];
 };
 
 struct mlx5_ifc_cmd_if_box_bits {
@@ -9666,7 +9666,7 @@ struct mlx5_ifc_mcqi_reg_bits {
 	u8         reserved_at_a0[0x10];
 	u8         data_size[0x10];
 
-	union mlx5_ifc_mcqi_reg_data_bits data[0];
+	union mlx5_ifc_mcqi_reg_data_bits data[];
 };
 
 struct mlx5_ifc_mcc_reg_bits {
@@ -10252,7 +10252,7 @@ struct mlx5_ifc_umem_bits {
 
 	u8         num_of_mtt[0x40];
 
-	struct mlx5_ifc_mtt_bits  mtt[0];
+	struct mlx5_ifc_mtt_bits  mtt[];
 };
 
 struct mlx5_ifc_uctx_bits {
@@ -10377,7 +10377,7 @@ struct mlx5_ifc_mtrc_stdb_bits {
 	u8         reserved_at_4[0x4];
 	u8         read_size[0x18];
 	u8         start_offset[0x20];
-	u8         string_db_data[0];
+	u8         string_db_data[];
 };
 
 struct mlx5_ifc_mtrc_ctrl_bits {
@@ -10431,7 +10431,7 @@ struct mlx5_ifc_query_esw_functions_out_bits {
 	struct mlx5_ifc_host_params_context_bits host_params_context;
 
 	u8         reserved_at_280[0x180];
-	u8         host_sf_enable[0][0x40];
+	u8         host_sf_enable[][0x40];
 };
 
 struct mlx5_ifc_sf_partition_bits {
@@ -10451,7 +10451,7 @@ struct mlx5_ifc_query_sf_partitions_out_bits {
 
 	u8         reserved_at_60[0x20];
 
-	struct mlx5_ifc_sf_partition_bits sf_partition[0];
+	struct mlx5_ifc_sf_partition_bits sf_partition[];
 };
 
 struct mlx5_ifc_query_sf_partitions_in_bits {
diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h
index f23eb18526fe..1af5e460b5f6 100644
--- a/include/linux/mlx5/qp.h
+++ b/include/linux/mlx5/qp.h
@@ -408,7 +408,7 @@ struct mlx5_wqe_signature_seg {
 
 struct mlx5_wqe_inline_seg {
 	__be32	byte_count;
-	__be32	data[0];
+	__be32	data[];
 };
 
 enum mlx5_sig_type {
-- 
cgit v1.2.3-59-g8ed1b


From 5705b45155c404a1eb2ccc92e95851cfa21d4f37 Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Sat, 9 May 2020 17:27:37 +0800
Subject: net: hns3: remove a redundant register macro definition

HCLGE_MISC_VECTOR_INT_STS and HCLGE_VECTOR_PF_OTHER_INT_STS_REG
both represent the misc interrupt status register(0x20800), so
removes HCLGE_VECTOR_PF_OTHER_INT_STS_REG and replaces it with
HCLGE_MISC_VECTOR_INT_STS.

Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h  |  1 -
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 12 +++++-------
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h
index 876fd81ad2f1..608fe26fc3fe 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_err.h
@@ -16,7 +16,6 @@
 #define HCLGE_RAS_REG_NFE_MASK   0xFF00
 #define HCLGE_RAS_REG_ROCEE_ERR_MASK   0x3000000
 
-#define HCLGE_VECTOR0_PF_OTHER_INT_STS_REG   0x20800
 #define HCLGE_VECTOR0_REG_MSIX_MASK   0x1FF00
 
 #define HCLGE_IMP_TCM_ECC_ERR_INT_EN	0xFFFF0000
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 71a54ddb51f5..f0b1dc9f31ed 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -2968,13 +2968,11 @@ static int hclge_set_vf_link_state(struct hnae3_handle *handle, int vf,
 
 static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval)
 {
-	u32 rst_src_reg, cmdq_src_reg, msix_src_reg;
+	u32 cmdq_src_reg, msix_src_reg;
 
 	/* fetch the events from their corresponding regs */
-	rst_src_reg = hclge_read_dev(&hdev->hw, HCLGE_MISC_VECTOR_INT_STS);
 	cmdq_src_reg = hclge_read_dev(&hdev->hw, HCLGE_VECTOR0_CMDQ_SRC_REG);
-	msix_src_reg = hclge_read_dev(&hdev->hw,
-				      HCLGE_VECTOR0_PF_OTHER_INT_STS_REG);
+	msix_src_reg = hclge_read_dev(&hdev->hw, HCLGE_MISC_VECTOR_INT_STS);
 
 	/* Assumption: If by any chance reset and mailbox events are reported
 	 * together then we will only process reset event in this go and will
@@ -2984,7 +2982,7 @@ static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval)
 	 *
 	 * check for vector0 reset event sources
 	 */
-	if (BIT(HCLGE_VECTOR0_IMPRESET_INT_B) & rst_src_reg) {
+	if (BIT(HCLGE_VECTOR0_IMPRESET_INT_B) & msix_src_reg) {
 		dev_info(&hdev->pdev->dev, "IMP reset interrupt\n");
 		set_bit(HNAE3_IMP_RESET, &hdev->reset_pending);
 		set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state);
@@ -2993,7 +2991,7 @@ static u32 hclge_check_event_cause(struct hclge_dev *hdev, u32 *clearval)
 		return HCLGE_VECTOR0_EVENT_RST;
 	}
 
-	if (BIT(HCLGE_VECTOR0_GLOBALRESET_INT_B) & rst_src_reg) {
+	if (BIT(HCLGE_VECTOR0_GLOBALRESET_INT_B) & msix_src_reg) {
 		dev_info(&hdev->pdev->dev, "global reset interrupt\n");
 		set_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state);
 		set_bit(HNAE3_GLOBAL_RESET, &hdev->reset_pending);
@@ -3483,7 +3481,7 @@ static enum hnae3_reset_type hclge_get_reset_level(struct hnae3_ae_dev *ae_dev,
 	/* first, resolve any unknown reset type to the known type(s) */
 	if (test_bit(HNAE3_UNKNOWN_RESET, addr)) {
 		u32 msix_sts_reg = hclge_read_dev(&hdev->hw,
-					HCLGE_VECTOR0_PF_OTHER_INT_STS_REG);
+					HCLGE_MISC_VECTOR_INT_STS);
 		/* we will intentionally ignore any errors from this function
 		 *  as we will end up in *some* reset request in any case
 		 */
-- 
cgit v1.2.3-59-g8ed1b


From 4279b4d5ec9c58f63fde23b2d86de4a1e494dc06 Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Sat, 9 May 2020 17:27:38 +0800
Subject: net: hns3: modify two uncorrect macro names

According to the UM, command 0x0B03 and 0x0B13 are used to
query the statistics about TX and RX, not the status, so
modifies the unsuitable macro name of these two command.

Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h  | 4 ++--
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index 9a9d752aedc5..e3bab8f3847f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -184,11 +184,11 @@ enum hclge_opcode_type {
 	/* TQP commands */
 	HCLGE_OPC_CFG_TX_QUEUE		= 0x0B01,
 	HCLGE_OPC_QUERY_TX_POINTER	= 0x0B02,
-	HCLGE_OPC_QUERY_TX_STATUS	= 0x0B03,
+	HCLGE_OPC_QUERY_TX_STATS	= 0x0B03,
 	HCLGE_OPC_TQP_TX_QUEUE_TC	= 0x0B04,
 	HCLGE_OPC_CFG_RX_QUEUE		= 0x0B11,
 	HCLGE_OPC_QUERY_RX_POINTER	= 0x0B12,
-	HCLGE_OPC_QUERY_RX_STATUS	= 0x0B13,
+	HCLGE_OPC_QUERY_RX_STATS	= 0x0B13,
 	HCLGE_OPC_STASH_RX_QUEUE_LRO	= 0x0B16,
 	HCLGE_OPC_CFG_RX_QUEUE_LRO	= 0x0B17,
 	HCLGE_OPC_CFG_COM_TQP_QUEUE	= 0x0B20,
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index f0b1dc9f31ed..3ad6a6ae6c2f 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -552,7 +552,7 @@ static int hclge_tqps_update_stats(struct hnae3_handle *handle)
 		queue = handle->kinfo.tqp[i];
 		tqp = container_of(queue, struct hclge_tqp, q);
 		/* command : HCLGE_OPC_QUERY_IGU_STAT */
-		hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_QUERY_RX_STATUS,
+		hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_QUERY_RX_STATS,
 					   true);
 
 		desc[0].data[0] = cpu_to_le32((tqp->index & 0x1ff));
@@ -572,7 +572,7 @@ static int hclge_tqps_update_stats(struct hnae3_handle *handle)
 		tqp = container_of(queue, struct hclge_tqp, q);
 		/* command : HCLGE_OPC_QUERY_IGU_STAT */
 		hclge_cmd_setup_basic_desc(&desc[0],
-					   HCLGE_OPC_QUERY_TX_STATUS,
+					   HCLGE_OPC_QUERY_TX_STATS,
 					   true);
 
 		desc[0].data[0] = cpu_to_le32((tqp->index & 0x1ff));
-- 
cgit v1.2.3-59-g8ed1b


From a4de02287abb9947336dfdccc83f6c0cc182e7d9 Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Sat, 9 May 2020 17:27:39 +0800
Subject: net: hns3: provide .get_cmdq_stat interface for the client

This patch provides a new interface for the client to query
whether CMDQ is ready to work.

Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h             | 1 +
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 9 +++++++++
 2 files changed, 10 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 5602bf226687..7506cabaa16e 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -552,6 +552,7 @@ struct hnae3_ae_ops {
 	int (*set_vf_mac)(struct hnae3_handle *handle, int vf, u8 *p);
 	int (*get_module_eeprom)(struct hnae3_handle *handle, u32 offset,
 				 u32 len, u8 *data);
+	bool (*get_cmdq_stat)(struct hnae3_handle *handle);
 };
 
 struct hnae3_dcb_ops {
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 3ad6a6ae6c2f..1ff896a9576c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -6402,6 +6402,14 @@ static bool hclge_get_hw_reset_stat(struct hnae3_handle *handle)
 	       hclge_read_dev(&hdev->hw, HCLGE_FUN_RST_ING);
 }
 
+static bool hclge_get_cmdq_stat(struct hnae3_handle *handle)
+{
+	struct hclge_vport *vport = hclge_get_vport(handle);
+	struct hclge_dev *hdev = vport->back;
+
+	return test_bit(HCLGE_STATE_CMD_DISABLE, &hdev->state);
+}
+
 static bool hclge_ae_dev_resetting(struct hnae3_handle *handle)
 {
 	struct hclge_vport *vport = hclge_get_vport(handle);
@@ -11311,6 +11319,7 @@ static const struct hnae3_ae_ops hclge_ops = {
 	.set_vf_rate = hclge_set_vf_rate,
 	.set_vf_mac = hclge_set_vf_mac,
 	.get_module_eeprom = hclge_get_module_eeprom,
+	.get_cmdq_stat = hclge_get_cmdq_stat,
 };
 
 static struct hnae3_ae_algo ae_algo = {
-- 
cgit v1.2.3-59-g8ed1b


From b4401a044a1e6397d21806c210dd0d9b4da93e27 Mon Sep 17 00:00:00 2001
From: Yufeng Mo <moyufeng@huawei.com>
Date: Sat, 9 May 2020 17:27:40 +0800
Subject: net: hns3: optimized the judgment of the input parameters of dump ncl
 config

This patch optimizes the judgment of the input parameters of dump ncl
config by checking the number and value of the input parameters apart.
It's clearer and more reasonable.

Signed-off-by: Yufeng Mo <moyufeng@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 .../net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c    | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index 6cfa8253eefc..48c115c50db5 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -1258,6 +1258,7 @@ static void hclge_dbg_dump_ncl_config(struct hclge_dev *hdev,
 {
 #define HCLGE_MAX_NCL_CONFIG_OFFSET	4096
 #define HCLGE_NCL_CONFIG_LENGTH_IN_EACH_CMD	(20 + 24 * 4)
+#define HCLGE_NCL_CONFIG_PARAM_NUM	2
 
 	struct hclge_desc desc[HCLGE_CMD_NCL_CONFIG_BD_NUM];
 	int bd_num = HCLGE_CMD_NCL_CONFIG_BD_NUM;
@@ -1267,13 +1268,17 @@ static void hclge_dbg_dump_ncl_config(struct hclge_dev *hdev,
 	int ret;
 
 	ret = sscanf(cmd_buf, "%x %x", &offset, &length);
-	if (ret != 2 || offset >= HCLGE_MAX_NCL_CONFIG_OFFSET ||
-	    length > HCLGE_MAX_NCL_CONFIG_OFFSET - offset) {
-		dev_err(&hdev->pdev->dev, "Invalid offset or length.\n");
+	if (ret != HCLGE_NCL_CONFIG_PARAM_NUM) {
+		dev_err(&hdev->pdev->dev,
+			"Too few parameters, num = %d.\n", ret);
 		return;
 	}
-	if (offset < 0 || length <= 0) {
-		dev_err(&hdev->pdev->dev, "Non-positive offset or length.\n");
+
+	if (offset < 0 || offset >= HCLGE_MAX_NCL_CONFIG_OFFSET ||
+	    length <= 0 || length > HCLGE_MAX_NCL_CONFIG_OFFSET - offset) {
+		dev_err(&hdev->pdev->dev,
+			"Invalid input, offset = %d, length = %d.\n",
+			offset, length);
 		return;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 81c287e3dda20441c1469426fc9f44c9b2374fa9 Mon Sep 17 00:00:00 2001
From: Yufeng Mo <moyufeng@huawei.com>
Date: Sat, 9 May 2020 17:27:41 +0800
Subject: net: hns3: disable auto-negotiation off with 1000M setting in ethtool

The 802.3 specification does not specify the behavior of
auto-negotiation off with 1000M in PHY. Therefore, some PHY
compatibility issues occur. This patch forbids the setting of
this unreasonable mode by ethtool in driver.

Signed-off-by: Yufeng Mo <moyufeng@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
index 1a105f2f87a4..6b1545f982aa 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_ethtool.c
@@ -773,8 +773,13 @@ static int hns3_set_link_ksettings(struct net_device *netdev,
 		  cmd->base.autoneg, cmd->base.speed, cmd->base.duplex);
 
 	/* Only support ksettings_set for netdev with phy attached for now */
-	if (netdev->phydev)
+	if (netdev->phydev) {
+		if (cmd->base.speed == SPEED_1000 &&
+		    cmd->base.autoneg == AUTONEG_DISABLE)
+			return -EINVAL;
+
 		return phy_ethtool_ksettings_set(netdev->phydev, cmd);
+	}
 
 	if (handle->pdev->revision == 0x20)
 		return -EOPNOTSUPP;
-- 
cgit v1.2.3-59-g8ed1b


From 9eb8eff0cf2f1e1afc0756bb30cb9746ba90dd07 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sun, 10 May 2020 19:37:40 +0300
Subject: net: bridge: allow enslaving some DSA master network devices

Commit 8db0a2ee2c63 ("net: bridge: reject DSA-enabled master netdevices
as bridge members") added a special check in br_if.c in order to check
for a DSA master network device with a tagging protocol configured. This
was done because back then, such devices, once enslaved in a bridge
would become inoperative and would not pass DSA tagged traffic anymore
due to br_handle_frame returning RX_HANDLER_CONSUMED.

But right now we have valid use cases which do require bridging of DSA
masters. One such example is when the DSA master ports are DSA switch
ports themselves (in a disjoint tree setup). This should be completely
equivalent, functionally speaking, from having multiple DSA switches
hanging off of the ports of a switchdev driver. So we should allow the
enslaving of DSA tagged master network devices.

Instead of the regular br_handle_frame(), install a new function
br_handle_frame_dummy() on these DSA masters, which returns
RX_HANDLER_PASS in order to call into the DSA specific tagging protocol
handlers, and lift the restriction from br_add_if.

Suggested-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Suggested-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Tested-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/dsa.h       |  2 +-
 net/bridge/br_if.c      | 32 +++++++++++++++++++++++---------
 net/bridge/br_input.c   | 23 ++++++++++++++++++++++-
 net/bridge/br_private.h |  6 +++---
 4 files changed, 49 insertions(+), 14 deletions(-)

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 6dfc8c2f68b8..02fb5025e0ac 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -651,7 +651,7 @@ struct dsa_switch_driver {
 struct net_device *dsa_dev_to_net_device(struct device *dev);
 
 /* Keep inline for faster access in hot path */
-static inline bool netdev_uses_dsa(struct net_device *dev)
+static inline bool netdev_uses_dsa(const struct net_device *dev)
 {
 #if IS_ENABLED(CONFIG_NET_DSA)
 	return dev->dsa_ptr && dev->dsa_ptr->rcv;
diff --git a/net/bridge/br_if.c b/net/bridge/br_if.c
index ca685c0cdf95..a0e9a7937412 100644
--- a/net/bridge/br_if.c
+++ b/net/bridge/br_if.c
@@ -563,18 +563,32 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
 	unsigned br_hr, dev_hr;
 	bool changed_addr;
 
-	/* Don't allow bridging non-ethernet like devices, or DSA-enabled
-	 * master network devices since the bridge layer rx_handler prevents
-	 * the DSA fake ethertype handler to be invoked, so we do not strip off
-	 * the DSA switch tag protocol header and the bridge layer just return
-	 * RX_HANDLER_CONSUMED, stopping RX processing for these frames.
-	 */
+	/* Don't allow bridging non-ethernet like devices. */
 	if ((dev->flags & IFF_LOOPBACK) ||
 	    dev->type != ARPHRD_ETHER || dev->addr_len != ETH_ALEN ||
-	    !is_valid_ether_addr(dev->dev_addr) ||
-	    netdev_uses_dsa(dev))
+	    !is_valid_ether_addr(dev->dev_addr))
 		return -EINVAL;
 
+	/* Also don't allow bridging of net devices that are DSA masters, since
+	 * the bridge layer rx_handler prevents the DSA fake ethertype handler
+	 * to be invoked, so we don't get the chance to strip off and parse the
+	 * DSA switch tag protocol header (the bridge layer just returns
+	 * RX_HANDLER_CONSUMED, stopping RX processing for these frames).
+	 * The only case where that would not be an issue is when bridging can
+	 * already be offloaded, such as when the DSA master is itself a DSA
+	 * or plain switchdev port, and is bridged only with other ports from
+	 * the same hardware device.
+	 */
+	if (netdev_uses_dsa(dev)) {
+		list_for_each_entry(p, &br->port_list, list) {
+			if (!netdev_port_same_parent_id(dev, p->dev)) {
+				NL_SET_ERR_MSG(extack,
+					       "Cannot do software bridging with a DSA master");
+				return -EINVAL;
+			}
+		}
+	}
+
 	/* No bridging of bridges */
 	if (dev->netdev_ops->ndo_start_xmit == br_dev_xmit) {
 		NL_SET_ERR_MSG(extack,
@@ -618,7 +632,7 @@ int br_add_if(struct net_bridge *br, struct net_device *dev,
 	if (err)
 		goto err3;
 
-	err = netdev_rx_handler_register(dev, br_handle_frame, p);
+	err = netdev_rx_handler_register(dev, br_get_rx_handler(dev), p);
 	if (err)
 		goto err4;
 
diff --git a/net/bridge/br_input.c b/net/bridge/br_input.c
index d5c34f36f0f4..59a318b9f646 100644
--- a/net/bridge/br_input.c
+++ b/net/bridge/br_input.c
@@ -17,6 +17,7 @@
 #endif
 #include <linux/neighbour.h>
 #include <net/arp.h>
+#include <net/dsa.h>
 #include <linux/export.h>
 #include <linux/rculist.h>
 #include "br_private.h"
@@ -257,7 +258,7 @@ frame_finish:
  * Return NULL if skb is handled
  * note: already called with rcu_read_lock
  */
-rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
+static rx_handler_result_t br_handle_frame(struct sk_buff **pskb)
 {
 	struct net_bridge_port *p;
 	struct sk_buff *skb = *pskb;
@@ -359,3 +360,23 @@ drop:
 	}
 	return RX_HANDLER_CONSUMED;
 }
+
+/* This function has no purpose other than to appease the br_port_get_rcu/rtnl
+ * helpers which identify bridged ports according to the rx_handler installed
+ * on them (so there _needs_ to be a bridge rx_handler even if we don't need it
+ * to do anything useful). This bridge won't support traffic to/from the stack,
+ * but only hardware bridging. So return RX_HANDLER_PASS so we don't steal
+ * frames from the ETH_P_XDSA packet_type handler.
+ */
+static rx_handler_result_t br_handle_frame_dummy(struct sk_buff **pskb)
+{
+	return RX_HANDLER_PASS;
+}
+
+rx_handler_func_t *br_get_rx_handler(const struct net_device *dev)
+{
+	if (netdev_uses_dsa(dev))
+		return br_handle_frame_dummy;
+
+	return br_handle_frame;
+}
diff --git a/net/bridge/br_private.h b/net/bridge/br_private.h
index 4dc21e8f7e33..7501be4eeba0 100644
--- a/net/bridge/br_private.h
+++ b/net/bridge/br_private.h
@@ -702,16 +702,16 @@ int nbp_backup_change(struct net_bridge_port *p, struct net_device *backup_dev);
 
 /* br_input.c */
 int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb);
-rx_handler_result_t br_handle_frame(struct sk_buff **pskb);
+rx_handler_func_t *br_get_rx_handler(const struct net_device *dev);
 
 static inline bool br_rx_handler_check_rcu(const struct net_device *dev)
 {
-	return rcu_dereference(dev->rx_handler) == br_handle_frame;
+	return rcu_dereference(dev->rx_handler) == br_get_rx_handler(dev);
 }
 
 static inline bool br_rx_handler_check_rtnl(const struct net_device *dev)
 {
-	return rcu_dereference_rtnl(dev->rx_handler) == br_handle_frame;
+	return rcu_dereference_rtnl(dev->rx_handler) == br_get_rx_handler(dev);
 }
 
 static inline struct net_bridge_port *br_port_get_check_rcu(const struct net_device *dev)
-- 
cgit v1.2.3-59-g8ed1b


From f66a6a69f97a24546664541237a82b288c2713f6 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sun, 10 May 2020 19:37:41 +0300
Subject: net: dsa: permit cross-chip bridging between all trees in the system

One way of utilizing DSA is by cascading switches which do not all have
compatible taggers. Consider the following real-life topology:

      +---------------------------------------------------------------+
      | LS1028A                                                       |
      |               +------------------------------+                |
      |               |      DSA master for Felix    |                |
      |               |(internal ENETC port 2: eno2))|                |
      |  +------------+------------------------------+-------------+  |
      |  | Felix embedded L2 switch                                |  |
      |  |                                                         |  |
      |  | +--------------+   +--------------+   +--------------+  |  |
      |  | |DSA master for|   |DSA master for|   |DSA master for|  |  |
      |  | |  SJA1105 1   |   |  SJA1105 2   |   |  SJA1105 3   |  |  |
      |  | |(Felix port 1)|   |(Felix port 2)|   |(Felix port 3)|  |  |
      +--+-+--------------+---+--------------+---+--------------+--+--+

+-----------------------+ +-----------------------+ +-----------------------+
|   SJA1105 switch 1    | |   SJA1105 switch 2    | |   SJA1105 switch 3    |
+-----+-----+-----+-----+ +-----+-----+-----+-----+ +-----+-----+-----+-----+
|sw1p0|sw1p1|sw1p2|sw1p3| |sw2p0|sw2p1|sw2p2|sw2p3| |sw3p0|sw3p1|sw3p2|sw3p3|
+-----+-----+-----+-----+ +-----+-----+-----+-----+ +-----+-----+-----+-----+

The above can be described in the device tree as follows (obviously not
complete):

mscc_felix {
	dsa,member = <0 0>;
	ports {
		port@4 {
			ethernet = <&enetc_port2>;
		};
	};
};

sja1105_switch1 {
	dsa,member = <1 1>;
	ports {
		port@4 {
			ethernet = <&mscc_felix_port1>;
		};
	};
};

sja1105_switch2 {
	dsa,member = <2 2>;
	ports {
		port@4 {
			ethernet = <&mscc_felix_port2>;
		};
	};
};

sja1105_switch3 {
	dsa,member = <3 3>;
	ports {
		port@4 {
			ethernet = <&mscc_felix_port3>;
		};
	};
};

Basically we instantiate one DSA switch tree for every hardware switch
in the system, but we still give them globally unique switch IDs (will
come back to that later). Having 3 disjoint switch trees makes the
tagger drivers "just work", because net devices are registered for the
3 Felix DSA master ports, and they are also DSA slave ports to the ENETC
port. So packets received on the ENETC port are stripped of their
stacked DSA tags one by one.

Currently, hardware bridging between ports on the same sja1105 chip is
possible, but switching between sja1105 ports on different chips is
handled by the software bridge. This is fine, but we can do better.

In fact, the dsa_8021q tag used by sja1105 is compatible with cascading.
In other words, a sja1105 switch can correctly parse and route a packet
containing a dsa_8021q tag. So if we could enable hardware bridging on
the Felix DSA master ports, cross-chip bridging could be completely
offloaded.

Such as system would be used as follows:

ip link add dev br0 type bridge && ip link set dev br0 up
for port in sw0p0 sw0p1 sw0p2 sw0p3 \
	    sw1p0 sw1p1 sw1p2 sw1p3 \
	    sw2p0 sw2p1 sw2p2 sw2p3; do
	ip link set dev $port master br0
done

The above makes switching between ports on the same row be performed in
hardware, and between ports on different rows in software. Now assume
the Felix switch ports are called swp0, swp1, swp2. By running the
following extra commands:

ip link add dev br1 type bridge && ip link set dev br1 up
for port in swp0 swp1 swp2; do
	ip link set dev $port master br1
done

the CPU no longer sees packets which traverse sja1105 switch boundaries
and can be forwarded directly by Felix. The br1 bridge would not be used
for any sort of traffic termination.

For this to work, we need to give drivers an opportunity to listen for
bridging events on DSA trees other than their own, and pass that other
tree index as argument. I have made the assumption, for the moment, that
the other existing DSA notifiers don't need to be broadcast to other
trees. That assumption might turn out to be incorrect. But in the
meantime, introduce a dsa_broadcast function, similar in purpose to
dsa_port_notify, which is used only by the bridging notifiers.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/dsa/mv88e6xxx/chip.c | 16 ++++++++++++----
 include/net/dsa.h                | 10 ++++++----
 net/dsa/dsa_priv.h               |  1 +
 net/dsa/port.c                   | 23 +++++++++++++++++++++--
 net/dsa/switch.c                 | 21 +++++++++++++++------
 5 files changed, 55 insertions(+), 16 deletions(-)

diff --git a/drivers/net/dsa/mv88e6xxx/chip.c b/drivers/net/dsa/mv88e6xxx/chip.c
index 2b4a723c8306..7627ea61e0ea 100644
--- a/drivers/net/dsa/mv88e6xxx/chip.c
+++ b/drivers/net/dsa/mv88e6xxx/chip.c
@@ -2233,26 +2233,34 @@ static void mv88e6xxx_port_bridge_leave(struct dsa_switch *ds, int port,
 	mv88e6xxx_reg_unlock(chip);
 }
 
-static int mv88e6xxx_crosschip_bridge_join(struct dsa_switch *ds, int dev,
+static int mv88e6xxx_crosschip_bridge_join(struct dsa_switch *ds,
+					   int tree_index, int sw_index,
 					   int port, struct net_device *br)
 {
 	struct mv88e6xxx_chip *chip = ds->priv;
 	int err;
 
+	if (tree_index != ds->dst->index)
+		return 0;
+
 	mv88e6xxx_reg_lock(chip);
-	err = mv88e6xxx_pvt_map(chip, dev, port);
+	err = mv88e6xxx_pvt_map(chip, sw_index, port);
 	mv88e6xxx_reg_unlock(chip);
 
 	return err;
 }
 
-static void mv88e6xxx_crosschip_bridge_leave(struct dsa_switch *ds, int dev,
+static void mv88e6xxx_crosschip_bridge_leave(struct dsa_switch *ds,
+					     int tree_index, int sw_index,
 					     int port, struct net_device *br)
 {
 	struct mv88e6xxx_chip *chip = ds->priv;
 
+	if (tree_index != ds->dst->index)
+		return;
+
 	mv88e6xxx_reg_lock(chip);
-	if (mv88e6xxx_pvt_map(chip, dev, port))
+	if (mv88e6xxx_pvt_map(chip, sw_index, port))
 		dev_err(ds->dev, "failed to remap cross-chip Port VLAN\n");
 	mv88e6xxx_reg_unlock(chip);
 }
diff --git a/include/net/dsa.h b/include/net/dsa.h
index 02fb5025e0ac..0f4fc00239d9 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -574,10 +574,12 @@ struct dsa_switch_ops {
 	/*
 	 * Cross-chip operations
 	 */
-	int	(*crosschip_bridge_join)(struct dsa_switch *ds, int sw_index,
-					 int port, struct net_device *br);
-	void	(*crosschip_bridge_leave)(struct dsa_switch *ds, int sw_index,
-					  int port, struct net_device *br);
+	int	(*crosschip_bridge_join)(struct dsa_switch *ds, int tree_index,
+					 int sw_index, int port,
+					 struct net_device *br);
+	void	(*crosschip_bridge_leave)(struct dsa_switch *ds, int tree_index,
+					  int sw_index, int port,
+					  struct net_device *br);
 
 	/*
 	 * PTP functionality
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index 6d9a1ef65fa0..a1a0ae242012 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -35,6 +35,7 @@ struct dsa_notifier_ageing_time_info {
 /* DSA_NOTIFIER_BRIDGE_* */
 struct dsa_notifier_bridge_info {
 	struct net_device *br;
+	int tree_index;
 	int sw_index;
 	int port;
 };
diff --git a/net/dsa/port.c b/net/dsa/port.c
index a58fdd362574..ebc8d6cbd1d4 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -13,6 +13,23 @@
 
 #include "dsa_priv.h"
 
+static int dsa_broadcast(unsigned long e, void *v)
+{
+	struct dsa_switch_tree *dst;
+	int err = 0;
+
+	list_for_each_entry(dst, &dsa_tree_list, list) {
+		struct raw_notifier_head *nh = &dst->nh;
+
+		err = raw_notifier_call_chain(nh, e, v);
+		err = notifier_to_errno(err);
+		if (err)
+			break;
+	}
+
+	return err;
+}
+
 static int dsa_port_notify(const struct dsa_port *dp, unsigned long e, void *v)
 {
 	struct raw_notifier_head *nh = &dp->ds->dst->nh;
@@ -120,6 +137,7 @@ void dsa_port_disable(struct dsa_port *dp)
 int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br)
 {
 	struct dsa_notifier_bridge_info info = {
+		.tree_index = dp->ds->dst->index,
 		.sw_index = dp->ds->index,
 		.port = dp->index,
 		.br = br,
@@ -136,7 +154,7 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br)
 	 */
 	dp->bridge_dev = br;
 
-	err = dsa_port_notify(dp, DSA_NOTIFIER_BRIDGE_JOIN, &info);
+	err = dsa_broadcast(DSA_NOTIFIER_BRIDGE_JOIN, &info);
 
 	/* The bridging is rolled back on error */
 	if (err) {
@@ -150,6 +168,7 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br)
 void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br)
 {
 	struct dsa_notifier_bridge_info info = {
+		.tree_index = dp->ds->dst->index,
 		.sw_index = dp->ds->index,
 		.port = dp->index,
 		.br = br,
@@ -161,7 +180,7 @@ void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br)
 	 */
 	dp->bridge_dev = NULL;
 
-	err = dsa_port_notify(dp, DSA_NOTIFIER_BRIDGE_LEAVE, &info);
+	err = dsa_broadcast(DSA_NOTIFIER_BRIDGE_LEAVE, &info);
 	if (err)
 		pr_err("DSA: failed to notify DSA_NOTIFIER_BRIDGE_LEAVE\n");
 
diff --git a/net/dsa/switch.c b/net/dsa/switch.c
index f3c32ff552b3..86c8dc5c32a0 100644
--- a/net/dsa/switch.c
+++ b/net/dsa/switch.c
@@ -89,11 +89,16 @@ static int dsa_switch_mtu(struct dsa_switch *ds,
 static int dsa_switch_bridge_join(struct dsa_switch *ds,
 				  struct dsa_notifier_bridge_info *info)
 {
-	if (ds->index == info->sw_index && ds->ops->port_bridge_join)
+	struct dsa_switch_tree *dst = ds->dst;
+
+	if (dst->index == info->tree_index && ds->index == info->sw_index &&
+	    ds->ops->port_bridge_join)
 		return ds->ops->port_bridge_join(ds, info->port, info->br);
 
-	if (ds->index != info->sw_index && ds->ops->crosschip_bridge_join)
-		return ds->ops->crosschip_bridge_join(ds, info->sw_index,
+	if ((dst->index != info->tree_index || ds->index != info->sw_index) &&
+	    ds->ops->crosschip_bridge_join)
+		return ds->ops->crosschip_bridge_join(ds, info->tree_index,
+						      info->sw_index,
 						      info->port, info->br);
 
 	return 0;
@@ -103,13 +108,17 @@ static int dsa_switch_bridge_leave(struct dsa_switch *ds,
 				   struct dsa_notifier_bridge_info *info)
 {
 	bool unset_vlan_filtering = br_vlan_enabled(info->br);
+	struct dsa_switch_tree *dst = ds->dst;
 	int err, i;
 
-	if (ds->index == info->sw_index && ds->ops->port_bridge_leave)
+	if (dst->index == info->tree_index && ds->index == info->sw_index &&
+	    ds->ops->port_bridge_join)
 		ds->ops->port_bridge_leave(ds, info->port, info->br);
 
-	if (ds->index != info->sw_index && ds->ops->crosschip_bridge_leave)
-		ds->ops->crosschip_bridge_leave(ds, info->sw_index, info->port,
+	if ((dst->index != info->tree_index || ds->index != info->sw_index) &&
+	    ds->ops->crosschip_bridge_join)
+		ds->ops->crosschip_bridge_leave(ds, info->tree_index,
+						info->sw_index, info->port,
 						info->br);
 
 	/* If the bridge was vlan_filtering, the bridge core doesn't trigger an
-- 
cgit v1.2.3-59-g8ed1b


From 3b7bc1f09101ccace330d105c13c2946bf3be6d5 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sun, 10 May 2020 19:37:42 +0300
Subject: net: dsa: introduce a dsa_switch_find function

Somewhat similar to dsa_tree_find, dsa_switch_find returns a dsa_switch
structure pointer by searching for its tree index and switch index (the
parameters from dsa,member). To be used, for example, by drivers who
implement .crosschip_bridge_join and need a reference to the other
switch indicated to by the tree_index and sw_index arguments.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 include/net/dsa.h |  1 +
 net/dsa/dsa2.c    | 21 +++++++++++++++++++++
 2 files changed, 22 insertions(+)

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 0f4fc00239d9..312c2f067e65 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -672,6 +672,7 @@ static inline bool dsa_can_decode(const struct sk_buff *skb,
 
 void dsa_unregister_switch(struct dsa_switch *ds);
 int dsa_register_switch(struct dsa_switch *ds);
+struct dsa_switch *dsa_switch_find(int tree_index, int sw_index);
 #ifdef CONFIG_PM_SLEEP
 int dsa_switch_suspend(struct dsa_switch *ds);
 int dsa_switch_resume(struct dsa_switch *ds);
diff --git a/net/dsa/dsa2.c b/net/dsa/dsa2.c
index d90665b465b8..076908fdd29b 100644
--- a/net/dsa/dsa2.c
+++ b/net/dsa/dsa2.c
@@ -24,6 +24,27 @@ LIST_HEAD(dsa_tree_list);
 static const struct devlink_ops dsa_devlink_ops = {
 };
 
+struct dsa_switch *dsa_switch_find(int tree_index, int sw_index)
+{
+	struct dsa_switch_tree *dst;
+	struct dsa_port *dp;
+
+	list_for_each_entry(dst, &dsa_tree_list, list) {
+		if (dst->index != tree_index)
+			continue;
+
+		list_for_each_entry(dp, &dst->ports, list) {
+			if (dp->ds->index != sw_index)
+				continue;
+
+			return dp->ds;
+		}
+	}
+
+	return NULL;
+}
+EXPORT_SYMBOL_GPL(dsa_switch_find);
+
 static struct dsa_switch_tree *dsa_tree_find(int index)
 {
 	struct dsa_switch_tree *dst;
-- 
cgit v1.2.3-59-g8ed1b


From ac02a451a6148bb9c395b39783ce7299eddf4f31 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sun, 10 May 2020 19:37:43 +0300
Subject: net: dsa: sja1105: implement cross-chip bridging operations

sja1105 uses dsa_8021q for DSA tagging, a format which is VLAN at heart
and which is compatible with cascading. A complete description of this
tagging format is in net/dsa/tag_8021q.c, but a quick summary is that
each external-facing port tags incoming frames with a unique pvid, and
this special VLAN is transmitted as tagged towards the inside of the
system, and as untagged towards the exterior. The tag encodes the switch
id and the source port index.

This means that cross-chip bridging for dsa_8021q only entails adding
the dsa_8021q pvids of one switch to the RX filter of the other
switches. Everything else falls naturally into place, as long as the
bottom-end of ports (the leaves in the tree) is comprised exclusively of
dsa_8021q-compatible (i.e. sja1105 switches). Otherwise, there would be
a chance that a front-panel switch transmits a packet tagged with a
dsa_8021q header, header which it wouldn't be able to remove, and which
would hence "leak" out.

The only use case I tested (due to lack of board availability) was when
the sja1105 switches are part of disjoint trees (however, this doesn't
change the fact that multiple sja1105 switches still need unique switch
identifiers in such a system). But in principle, even "true" single-tree
setups (with DSA links) should work just as fine, except for a small
change which I can't test: dsa_towards_port should be used instead of
dsa_upstream_port (I made the assumption that the routing port that any
sja1105 should use towards its neighbours is the CPU port. That might
not hold true in other setups).

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
---
 drivers/net/dsa/sja1105/sja1105.h      |   2 +
 drivers/net/dsa/sja1105/sja1105_main.c |  90 ++++++++++++++++++++
 include/linux/dsa/8021q.h              |  45 ++++++++++
 net/dsa/tag_8021q.c                    | 151 +++++++++++++++++++++++++++++++++
 4 files changed, 288 insertions(+)

diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
index 8df2a5c53b02..a64ace07b89f 100644
--- a/drivers/net/dsa/sja1105/sja1105.h
+++ b/drivers/net/dsa/sja1105/sja1105.h
@@ -8,6 +8,7 @@
 #include <linux/ptp_clock_kernel.h>
 #include <linux/timecounter.h>
 #include <linux/dsa/sja1105.h>
+#include <linux/dsa/8021q.h>
 #include <net/dsa.h>
 #include <linux/mutex.h>
 #include "sja1105_static_config.h"
@@ -185,6 +186,7 @@ struct sja1105_private {
 	struct gpio_desc *reset_gpio;
 	struct spi_device *spidev;
 	struct dsa_switch *ds;
+	struct list_head crosschip_links;
 	struct sja1105_flow_block flow_block;
 	struct sja1105_port ports[SJA1105_NUM_PORTS];
 	/* Serializes transmission of management frames so that
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 666e54565df0..d5de9305df25 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -25,6 +25,8 @@
 #include "sja1105_sgmii.h"
 #include "sja1105_tas.h"
 
+static const struct dsa_switch_ops sja1105_switch_ops;
+
 static void sja1105_hw_reset(struct gpio_desc *gpio, unsigned int pulse_len,
 			     unsigned int startup_delay)
 {
@@ -1791,6 +1793,84 @@ static int sja1105_vlan_apply(struct sja1105_private *priv, int port, u16 vid,
 	return 0;
 }
 
+static int sja1105_crosschip_bridge_join(struct dsa_switch *ds,
+					 int tree_index, int sw_index,
+					 int other_port, struct net_device *br)
+{
+	struct dsa_switch *other_ds = dsa_switch_find(tree_index, sw_index);
+	struct sja1105_private *other_priv = other_ds->priv;
+	struct sja1105_private *priv = ds->priv;
+	int port, rc;
+
+	if (other_ds->ops != &sja1105_switch_ops)
+		return 0;
+
+	for (port = 0; port < ds->num_ports; port++) {
+		if (!dsa_is_user_port(ds, port))
+			continue;
+		if (dsa_to_port(ds, port)->bridge_dev != br)
+			continue;
+
+		rc = dsa_8021q_crosschip_bridge_join(ds, port, other_ds,
+						     other_port, br,
+						     &priv->crosschip_links);
+		if (rc)
+			return rc;
+
+		rc = dsa_8021q_crosschip_bridge_join(other_ds, other_port, ds,
+						     port, br,
+						     &other_priv->crosschip_links);
+		if (rc)
+			return rc;
+	}
+
+	return 0;
+}
+
+static void sja1105_crosschip_bridge_leave(struct dsa_switch *ds,
+					   int tree_index, int sw_index,
+					   int other_port,
+					   struct net_device *br)
+{
+	struct dsa_switch *other_ds = dsa_switch_find(tree_index, sw_index);
+	struct sja1105_private *other_priv = other_ds->priv;
+	struct sja1105_private *priv = ds->priv;
+	int port;
+
+	if (other_ds->ops != &sja1105_switch_ops)
+		return;
+
+	for (port = 0; port < ds->num_ports; port++) {
+		if (!dsa_is_user_port(ds, port))
+			continue;
+		if (dsa_to_port(ds, port)->bridge_dev != br)
+			continue;
+
+		dsa_8021q_crosschip_bridge_leave(ds, port, other_ds, other_port,
+						 br, &priv->crosschip_links);
+
+		dsa_8021q_crosschip_bridge_leave(other_ds, other_port, ds,
+						 port, br,
+						 &other_priv->crosschip_links);
+	}
+}
+
+static int sja1105_replay_crosschip_vlans(struct dsa_switch *ds, bool enabled)
+{
+	struct sja1105_private *priv = ds->priv;
+	struct dsa_8021q_crosschip_link *c;
+	int rc;
+
+	list_for_each_entry(c, &priv->crosschip_links, list) {
+		rc = dsa_8021q_crosschip_link_apply(ds, c->port, c->other_ds,
+						    c->other_port, enabled);
+		if (rc)
+			break;
+	}
+
+	return rc;
+}
+
 static int sja1105_setup_8021q_tagging(struct dsa_switch *ds, bool enabled)
 {
 	int rc, i;
@@ -1803,6 +1883,12 @@ static int sja1105_setup_8021q_tagging(struct dsa_switch *ds, bool enabled)
 			return rc;
 		}
 	}
+	rc = sja1105_replay_crosschip_vlans(ds, enabled);
+	if (rc) {
+		dev_err(ds->dev, "Failed to replay crosschip VLANs: %d\n", rc);
+		return rc;
+	}
+
 	dev_info(ds->dev, "%s switch tagging\n",
 		 enabled ? "Enabled" : "Disabled");
 	return 0;
@@ -2370,6 +2456,8 @@ static const struct dsa_switch_ops sja1105_switch_ops = {
 	.cls_flower_add		= sja1105_cls_flower_add,
 	.cls_flower_del		= sja1105_cls_flower_del,
 	.cls_flower_stats	= sja1105_cls_flower_stats,
+	.crosschip_bridge_join	= sja1105_crosschip_bridge_join,
+	.crosschip_bridge_leave	= sja1105_crosschip_bridge_leave,
 };
 
 static int sja1105_check_device_id(struct sja1105_private *priv)
@@ -2472,6 +2560,8 @@ static int sja1105_probe(struct spi_device *spi)
 	mutex_init(&priv->ptp_data.lock);
 	mutex_init(&priv->mgmt_lock);
 
+	INIT_LIST_HEAD(&priv->crosschip_links);
+
 	sja1105_tas_setup(ds);
 	sja1105_flower_setup(ds);
 
diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h
index c620d9139c28..b8daaec0896e 100644
--- a/include/linux/dsa/8021q.h
+++ b/include/linux/dsa/8021q.h
@@ -12,11 +12,33 @@ struct sk_buff;
 struct net_device;
 struct packet_type;
 
+struct dsa_8021q_crosschip_link {
+	struct list_head list;
+	int port;
+	struct dsa_switch *other_ds;
+	int other_port;
+	refcount_t refcount;
+};
+
 #if IS_ENABLED(CONFIG_NET_DSA_TAG_8021Q)
 
 int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int index,
 				 bool enabled);
 
+int dsa_8021q_crosschip_link_apply(struct dsa_switch *ds, int port,
+				   struct dsa_switch *other_ds,
+				   int other_port, bool enabled);
+
+int dsa_8021q_crosschip_bridge_join(struct dsa_switch *ds, int port,
+				    struct dsa_switch *other_ds,
+				    int other_port, struct net_device *br,
+				    struct list_head *crosschip_links);
+
+int dsa_8021q_crosschip_bridge_leave(struct dsa_switch *ds, int port,
+				     struct dsa_switch *other_ds,
+				     int other_port, struct net_device *br,
+				     struct list_head *crosschip_links);
+
 struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev,
 			       u16 tpid, u16 tci);
 
@@ -36,6 +58,29 @@ int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int index,
 	return 0;
 }
 
+int dsa_8021q_crosschip_link_apply(struct dsa_switch *ds, int port,
+				   struct dsa_switch *other_ds,
+				   int other_port, bool enabled)
+{
+	return 0;
+}
+
+int dsa_8021q_crosschip_bridge_join(struct dsa_switch *ds, int port,
+				    struct dsa_switch *other_ds,
+				    int other_port, struct net_device *br,
+				    struct list_head *crosschip_links)
+{
+	return 0;
+}
+
+int dsa_8021q_crosschip_bridge_leave(struct dsa_switch *ds, int port,
+				     struct dsa_switch *other_ds,
+				     int other_port, struct net_device *br,
+				     struct list_head *crosschip_links)
+{
+	return 0;
+}
+
 struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev,
 			       u16 tpid, u16 tci)
 {
diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c
index b97ad93d1c1a..ff9c5bf64bda 100644
--- a/net/dsa/tag_8021q.c
+++ b/net/dsa/tag_8021q.c
@@ -8,6 +8,7 @@
  */
 #include <linux/if_bridge.h>
 #include <linux/if_vlan.h>
+#include <linux/dsa/8021q.h>
 
 #include "dsa_priv.h"
 
@@ -288,6 +289,156 @@ int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int port, bool enabled)
 }
 EXPORT_SYMBOL_GPL(dsa_port_setup_8021q_tagging);
 
+int dsa_8021q_crosschip_link_apply(struct dsa_switch *ds, int port,
+				   struct dsa_switch *other_ds,
+				   int other_port, bool enabled)
+{
+	u16 rx_vid = dsa_8021q_rx_vid(ds, port);
+
+	/* @rx_vid of local @ds port @port goes to @other_port of
+	 * @other_ds
+	 */
+	return dsa_8021q_vid_apply(other_ds, other_port, rx_vid,
+				   BRIDGE_VLAN_INFO_UNTAGGED, enabled);
+}
+EXPORT_SYMBOL_GPL(dsa_8021q_crosschip_link_apply);
+
+static int dsa_8021q_crosschip_link_add(struct dsa_switch *ds, int port,
+					struct dsa_switch *other_ds,
+					int other_port,
+					struct list_head *crosschip_links)
+{
+	struct dsa_8021q_crosschip_link *c;
+
+	list_for_each_entry(c, crosschip_links, list) {
+		if (c->port == port && c->other_ds == other_ds &&
+		    c->other_port == other_port) {
+			refcount_inc(&c->refcount);
+			return 0;
+		}
+	}
+
+	dev_dbg(ds->dev, "adding crosschip link from port %d to %s port %d\n",
+		port, dev_name(other_ds->dev), other_port);
+
+	c = kzalloc(sizeof(*c), GFP_KERNEL);
+	if (!c)
+		return -ENOMEM;
+
+	c->port = port;
+	c->other_ds = other_ds;
+	c->other_port = other_port;
+	refcount_set(&c->refcount, 1);
+
+	list_add(&c->list, crosschip_links);
+
+	return 0;
+}
+
+static void dsa_8021q_crosschip_link_del(struct dsa_switch *ds,
+					 struct dsa_8021q_crosschip_link *c,
+					 struct list_head *crosschip_links,
+					 bool *keep)
+{
+	*keep = !refcount_dec_and_test(&c->refcount);
+
+	if (*keep)
+		return;
+
+	dev_dbg(ds->dev,
+		"deleting crosschip link from port %d to %s port %d\n",
+		c->port, dev_name(c->other_ds->dev), c->other_port);
+
+	list_del(&c->list);
+	kfree(c);
+}
+
+/* Make traffic from local port @port be received by remote port @other_port.
+ * This means that our @rx_vid needs to be installed on @other_ds's upstream
+ * and user ports. The user ports should be egress-untagged so that they can
+ * pop the dsa_8021q VLAN. But the @other_upstream can be either egress-tagged
+ * or untagged: it doesn't matter, since it should never egress a frame having
+ * our @rx_vid.
+ */
+int dsa_8021q_crosschip_bridge_join(struct dsa_switch *ds, int port,
+				    struct dsa_switch *other_ds,
+				    int other_port, struct net_device *br,
+				    struct list_head *crosschip_links)
+{
+	/* @other_upstream is how @other_ds reaches us. If we are part
+	 * of disjoint trees, then we are probably connected through
+	 * our CPU ports. If we're part of the same tree though, we should
+	 * probably use dsa_towards_port.
+	 */
+	int other_upstream = dsa_upstream_port(other_ds, other_port);
+	int rc;
+
+	rc = dsa_8021q_crosschip_link_add(ds, port, other_ds,
+					  other_port, crosschip_links);
+	if (rc)
+		return rc;
+
+	if (!br_vlan_enabled(br)) {
+		rc = dsa_8021q_crosschip_link_apply(ds, port, other_ds,
+						    other_port, true);
+		if (rc)
+			return rc;
+	}
+
+	rc = dsa_8021q_crosschip_link_add(ds, port, other_ds,
+					  other_upstream,
+					  crosschip_links);
+	if (rc)
+		return rc;
+
+	if (!br_vlan_enabled(br)) {
+		rc = dsa_8021q_crosschip_link_apply(ds, port, other_ds,
+						    other_upstream, true);
+		if (rc)
+			return rc;
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dsa_8021q_crosschip_bridge_join);
+
+int dsa_8021q_crosschip_bridge_leave(struct dsa_switch *ds, int port,
+				     struct dsa_switch *other_ds,
+				     int other_port, struct net_device *br,
+				     struct list_head *crosschip_links)
+{
+	int other_upstream = dsa_upstream_port(other_ds, other_port);
+	struct dsa_8021q_crosschip_link *c, *n;
+
+	list_for_each_entry_safe(c, n, crosschip_links, list) {
+		if (c->port == port && c->other_ds == other_ds &&
+		    (c->other_port == other_port ||
+		     c->other_port == other_upstream)) {
+			struct dsa_switch *other_ds = c->other_ds;
+			int other_port = c->other_port;
+			bool keep;
+			int rc;
+
+			dsa_8021q_crosschip_link_del(ds, c, crosschip_links,
+						     &keep);
+			if (keep)
+				continue;
+
+			if (!br_vlan_enabled(br)) {
+				rc = dsa_8021q_crosschip_link_apply(ds, port,
+								    other_ds,
+								    other_port,
+								    false);
+				if (rc)
+					return rc;
+			}
+		}
+	}
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(dsa_8021q_crosschip_bridge_leave);
+
 struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev,
 			       u16 tpid, u16 tci)
 {
-- 
cgit v1.2.3-59-g8ed1b


From 1cc2d0e021f8675065b8c5dc74305789d9e54113 Mon Sep 17 00:00:00 2001
From: Vasily Khoruzhick <anarsoul@gmail.com>
Date: Sat, 25 Apr 2020 08:55:29 -0700
Subject: dt-bindings: net: bluetooth: Add rtl8723bs-bluetooth

Add binding document for bluetooth part of RTL8723BS/RTL8723CS

Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com>
Signed-off-by: Alistair Francis <alistair@alistair23.me>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 .../devicetree/bindings/net/realtek-bluetooth.yaml | 54 ++++++++++++++++++++++
 1 file changed, 54 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/net/realtek-bluetooth.yaml

diff --git a/Documentation/devicetree/bindings/net/realtek-bluetooth.yaml b/Documentation/devicetree/bindings/net/realtek-bluetooth.yaml
new file mode 100644
index 000000000000..f15a5e5e4859
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/realtek-bluetooth.yaml
@@ -0,0 +1,54 @@
+# SPDX-License-Identifier: GPL-2.0
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/realtek-bluetooth.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: RTL8723BS/RTL8723CS/RTL8822CS Bluetooth Device Tree Bindings
+
+maintainers:
+  - Vasily Khoruzhick <anarsoul@gmail.com>
+  - Alistair Francis <alistair@alistair23.me>
+
+description:
+  RTL8723CS/RTL8723CS/RTL8822CS is WiFi + BT chip. WiFi part is connected over
+  SDIO, while BT is connected over serial. It speaks H5 protocol with few
+  extra commands to upload firmware and change module speed.
+
+properties:
+  compatible:
+    oneOf:
+      - const: "realtek,rtl8723bs-bt"
+      - const: "realtek,rtl8723cs-bt"
+      - const: "realtek,rtl8822cs-bt"
+
+  device-wake-gpios:
+    maxItems: 1
+    description: GPIO specifier, used to wakeup the BT module
+
+  enable-gpios:
+    maxItems: 1
+    description: GPIO specifier, used to enable the BT module
+
+  host-wake-gpios:
+    maxItems: 1
+    description: GPIO specifier, used to wakeup the host processor
+
+required:
+  - compatible
+
+examples:
+  - |
+    #include <dt-bindings/gpio/gpio.h>
+
+    uart1 {
+        pinctrl-names = "default";
+        pinctrl-0 = <&uart1_pins>, <&uart1_rts_cts_pins>;
+        uart-has-rtscts = <1>;
+
+        bluetooth {
+            compatible = "realtek,rtl8723bs-bt";
+            device-wake-gpios = <&r_pio 0 5 GPIO_ACTIVE_HIGH>; /* PL5 */
+            host-wakeup-gpios = <&r_pio 0 6 GPIO_ACTIVE_HIGH>; /* PL6 */
+        };
+    };
-- 
cgit v1.2.3-59-g8ed1b


From 4765db373ea34c7262e3467f8aaf4167dd1cdafa Mon Sep 17 00:00:00 2001
From: Vasily Khoruzhick <anarsoul@gmail.com>
Date: Sat, 25 Apr 2020 08:55:30 -0700
Subject: Bluetooth: hci_h5: Add support for binding RTL8723BS with device tree

RTL8723BS is often used in ARM boards, so add ability to bind it
using device tree.

Signed-off-by: Vasily Khoruzhick <anarsoul@gmail.com>
Signed-off-by: Alistair Francis <alistair@alistair23.me>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/hci_h5.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/bluetooth/hci_h5.c b/drivers/bluetooth/hci_h5.c
index 106c110efe56..e60b2e0773db 100644
--- a/drivers/bluetooth/hci_h5.c
+++ b/drivers/bluetooth/hci_h5.c
@@ -1018,6 +1018,8 @@ static const struct of_device_id rtl_bluetooth_of_match[] = {
 #ifdef CONFIG_BT_HCIUART_RTL
 	{ .compatible = "realtek,rtl8822cs-bt",
 	  .data = (const void *)&rtl_vnd },
+	{ .compatible = "realtek,rtl8723bs-bt",
+	  .data = (const void *)&rtl_vnd },
 #endif
 	{ },
 };
-- 
cgit v1.2.3-59-g8ed1b


From eaa7b7228ff7f8688623120bb0cd75d1490d5d04 Mon Sep 17 00:00:00 2001
From: Tedd Ho-Jeong An <tedd.an@intel.com>
Date: Fri, 1 May 2020 10:00:50 -0700
Subject: Bluetooth: Fix advertising handle is set to 0

This patch fix the advertising handle is set to 0 regardless of actual
instance value. The affected commands are LE Set Advertising Set Random
Address, LE Set Extended Advertising Data, and LE Set Extended Scan
Response Data commands.

Signed-off-by: Tedd Ho-Jeong An <tedd.an@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_request.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 9ea40106ef17..3f470f0e432c 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -1447,7 +1447,7 @@ void __hci_req_update_scan_rsp_data(struct hci_request *req, u8 instance)
 		memcpy(hdev->scan_rsp_data, cp.data, sizeof(cp.data));
 		hdev->scan_rsp_data_len = len;
 
-		cp.handle = 0;
+		cp.handle = instance;
 		cp.length = len;
 		cp.operation = LE_SET_ADV_DATA_OP_COMPLETE;
 		cp.frag_pref = LE_SET_ADV_DATA_NO_FRAG;
@@ -1591,7 +1591,7 @@ void __hci_req_update_adv_data(struct hci_request *req, u8 instance)
 		hdev->adv_data_len = len;
 
 		cp.length = len;
-		cp.handle = 0;
+		cp.handle = instance;
 		cp.operation = LE_SET_ADV_DATA_OP_COMPLETE;
 		cp.frag_pref = LE_SET_ADV_DATA_NO_FRAG;
 
@@ -1876,7 +1876,7 @@ int __hci_req_setup_ext_adv_instance(struct hci_request *req, u8 instance)
 
 		memset(&cp, 0, sizeof(cp));
 
-		cp.handle = 0;
+		cp.handle = instance;
 		bacpy(&cp.bdaddr, &random_addr);
 
 		hci_req_add(req,
-- 
cgit v1.2.3-59-g8ed1b


From 69d67b461a180144ad1d31174fadf3e3eda78e56 Mon Sep 17 00:00:00 2001
From: Konstantin Forostyan <konstantin.forostyan@peiker-cee.de>
Date: Mon, 4 May 2020 16:01:49 +0000
Subject: Bluetooth: L2CAP: Fix errors during L2CAP_CREDIT_BASED_CONNECTION_REQ
 (0x17)

Fix 2 typos in L2CAP_CREDIT_BASED_CONNECTION_REQ (0x17) handling function, that
cause BlueZ answer with L2CAP_CR_LE_INVALID_PARAMS or L2CAP_CR_LE_INVALID_SCID
error on a correct ECRED connection request.

Enchanced Credit Based Mode support was recently introduced with the commit
15f02b91056253e8cdc592888f431da0731337b8 ("Bluetooth: L2CAP: Add initial code
for Enhanced Credit Based Mode").

Signed-off-by: Konstantin Forostyan <konstantin.forostyan@peiker-cee.de>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap_core.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/bluetooth/l2cap_core.c b/net/bluetooth/l2cap_core.c
index fd9d0d08f9c9..fe913a5c754a 100644
--- a/net/bluetooth/l2cap_core.c
+++ b/net/bluetooth/l2cap_core.c
@@ -5927,7 +5927,7 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn,
 	if (!enable_ecred)
 		return -EINVAL;
 
-	if (cmd_len < sizeof(*req) || cmd_len - sizeof(*req) % sizeof(u16)) {
+	if (cmd_len < sizeof(*req) || (cmd_len - sizeof(*req)) % sizeof(u16)) {
 		result = L2CAP_CR_LE_INVALID_PARAMS;
 		goto response;
 	}
@@ -5964,7 +5964,7 @@ static inline int l2cap_ecred_conn_req(struct l2cap_conn *conn,
 	}
 
 	result = L2CAP_CR_LE_SUCCESS;
-	cmd_len -= sizeof(req);
+	cmd_len -= sizeof(*req);
 	num_scid = cmd_len / sizeof(u16);
 
 	for (i = 0; i < num_scid; i++) {
-- 
cgit v1.2.3-59-g8ed1b


From 5f4b91728bba007be563fa5a3bd5d96b6a03b3b9 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Wed, 6 May 2020 09:57:46 +0200
Subject: Bluetooth: Add MGMT_EV_PHY_CONFIGURATION_CHANGED to supported list

The event MGMT_EV_PHY_CONFIGURATION_CHANGED wasn't listed in the list of
supported events. So add it.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/mgmt.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index f8c0a4fc8090..33b5640ea060 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -147,6 +147,7 @@ static const u16 mgmt_events[] = {
 	MGMT_EV_ADVERTISING_ADDED,
 	MGMT_EV_ADVERTISING_REMOVED,
 	MGMT_EV_EXT_INFO_CHANGED,
+	MGMT_EV_PHY_CONFIGURATION_CHANGED,
 };
 
 static const u16 mgmt_untrusted_commands[] = {
-- 
cgit v1.2.3-59-g8ed1b


From 181d695352305cc52a49c151a1c3370376e54887 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Wed, 6 May 2020 09:57:47 +0200
Subject: Bluetooth: Replace BT_DBG with bt_dev_dbg for management support

The majority of management interaction are based on a controller index
and have a hci_dev associated with it. So use bt_dev_dbg to have a clean
way of indentifying the controller the debug message belongs to.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/mgmt.c | 221 +++++++++++++++++++++++++--------------------------
 1 file changed, 110 insertions(+), 111 deletions(-)

diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 33b5640ea060..78cf72b64014 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -294,7 +294,7 @@ static int read_version(struct sock *sk, struct hci_dev *hdev, void *data,
 {
 	struct mgmt_rp_read_version rp;
 
-	BT_DBG("sock %p", sk);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	mgmt_fill_version_info(&rp);
 
@@ -310,7 +310,7 @@ static int read_commands(struct sock *sk, struct hci_dev *hdev, void *data,
 	size_t rp_size;
 	int i, err;
 
-	BT_DBG("sock %p", sk);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	if (hci_sock_test_flag(sk, HCI_SOCK_TRUSTED)) {
 		num_commands = ARRAY_SIZE(mgmt_commands);
@@ -363,7 +363,7 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data,
 	u16 count;
 	int err;
 
-	BT_DBG("sock %p", sk);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	read_lock(&hci_dev_list_lock);
 
@@ -397,7 +397,7 @@ static int read_index_list(struct sock *sk, struct hci_dev *hdev, void *data,
 		if (d->dev_type == HCI_PRIMARY &&
 		    !hci_dev_test_flag(d, HCI_UNCONFIGURED)) {
 			rp->index[count++] = cpu_to_le16(d->id);
-			BT_DBG("Added hci%u", d->id);
+			bt_dev_dbg(hdev, "Added hci%u", d->id);
 		}
 	}
 
@@ -423,7 +423,7 @@ static int read_unconf_index_list(struct sock *sk, struct hci_dev *hdev,
 	u16 count;
 	int err;
 
-	BT_DBG("sock %p", sk);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	read_lock(&hci_dev_list_lock);
 
@@ -457,7 +457,7 @@ static int read_unconf_index_list(struct sock *sk, struct hci_dev *hdev,
 		if (d->dev_type == HCI_PRIMARY &&
 		    hci_dev_test_flag(d, HCI_UNCONFIGURED)) {
 			rp->index[count++] = cpu_to_le16(d->id);
-			BT_DBG("Added hci%u", d->id);
+			bt_dev_dbg(hdev, "Added hci%u", d->id);
 		}
 	}
 
@@ -482,7 +482,7 @@ static int read_ext_index_list(struct sock *sk, struct hci_dev *hdev,
 	u16 count;
 	int err;
 
-	BT_DBG("sock %p", sk);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	read_lock(&hci_dev_list_lock);
 
@@ -524,7 +524,7 @@ static int read_ext_index_list(struct sock *sk, struct hci_dev *hdev,
 
 		rp->entry[count].bus = d->bus;
 		rp->entry[count++].index = cpu_to_le16(d->id);
-		BT_DBG("Added hci%u", d->id);
+		bt_dev_dbg(hdev, "Added hci%u", d->id);
 	}
 
 	rp->num_controllers = cpu_to_le16(count);
@@ -600,7 +600,7 @@ static int read_config_info(struct sock *sk, struct hci_dev *hdev,
 	struct mgmt_rp_read_config_info rp;
 	u32 options = 0;
 
-	BT_DBG("sock %p %s", sk, hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	hci_dev_lock(hdev);
 
@@ -940,7 +940,7 @@ static void rpa_expired(struct work_struct *work)
 					    rpa_expired.work);
 	struct hci_request req;
 
-	BT_DBG("");
+	bt_dev_dbg(hdev, "");
 
 	hci_dev_set_flag(hdev, HCI_RPA_EXPIRED);
 
@@ -980,7 +980,7 @@ static int read_controller_info(struct sock *sk, struct hci_dev *hdev,
 {
 	struct mgmt_rp_read_info rp;
 
-	BT_DBG("sock %p %s", sk, hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	hci_dev_lock(hdev);
 
@@ -1036,7 +1036,7 @@ static int read_ext_controller_info(struct sock *sk, struct hci_dev *hdev,
 	struct mgmt_rp_read_ext_info *rp = (void *)buf;
 	u16 eir_len;
 
-	BT_DBG("sock %p %s", sk, hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	memset(&buf, 0, sizeof(buf));
 
@@ -1095,7 +1095,7 @@ static int send_settings_rsp(struct sock *sk, u16 opcode, struct hci_dev *hdev)
 
 static void clean_up_hci_complete(struct hci_dev *hdev, u8 status, u16 opcode)
 {
-	BT_DBG("%s status 0x%02x", hdev->name, status);
+	bt_dev_dbg(hdev, "status 0x%02x", status);
 
 	if (hci_conn_count(hdev) == 0) {
 		cancel_delayed_work(&hdev->power_off);
@@ -1171,7 +1171,7 @@ static int set_powered(struct sock *sk, struct hci_dev *hdev, void *data,
 	struct mgmt_pending_cmd *cmd;
 	int err;
 
-	BT_DBG("request for %s", hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	if (cp->val != 0x00 && cp->val != 0x01)
 		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_POWERED,
@@ -1312,7 +1312,7 @@ void mgmt_set_discoverable_complete(struct hci_dev *hdev, u8 status)
 {
 	struct mgmt_pending_cmd *cmd;
 
-	BT_DBG("status 0x%02x", status);
+	bt_dev_dbg(hdev, "status 0x%02x", status);
 
 	hci_dev_lock(hdev);
 
@@ -1351,7 +1351,7 @@ static int set_discoverable(struct sock *sk, struct hci_dev *hdev, void *data,
 	u16 timeout;
 	int err;
 
-	BT_DBG("request for %s", hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED) &&
 	    !hci_dev_test_flag(hdev, HCI_BREDR_ENABLED))
@@ -1477,7 +1477,7 @@ void mgmt_set_connectable_complete(struct hci_dev *hdev, u8 status)
 {
 	struct mgmt_pending_cmd *cmd;
 
-	BT_DBG("status 0x%02x", status);
+	bt_dev_dbg(hdev, "status 0x%02x", status);
 
 	hci_dev_lock(hdev);
 
@@ -1537,7 +1537,7 @@ static int set_connectable(struct sock *sk, struct hci_dev *hdev, void *data,
 	struct mgmt_pending_cmd *cmd;
 	int err;
 
-	BT_DBG("request for %s", hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	if (!hci_dev_test_flag(hdev, HCI_LE_ENABLED) &&
 	    !hci_dev_test_flag(hdev, HCI_BREDR_ENABLED))
@@ -1594,7 +1594,7 @@ static int set_bondable(struct sock *sk, struct hci_dev *hdev, void *data,
 	bool changed;
 	int err;
 
-	BT_DBG("request for %s", hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	if (cp->val != 0x00 && cp->val != 0x01)
 		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_BONDABLE,
@@ -1638,7 +1638,7 @@ static int set_link_security(struct sock *sk, struct hci_dev *hdev, void *data,
 	u8 val, status;
 	int err;
 
-	BT_DBG("request for %s", hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	status = mgmt_bredr_support(hdev);
 	if (status)
@@ -1706,7 +1706,7 @@ static int set_ssp(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
 	u8 status;
 	int err;
 
-	BT_DBG("request for %s", hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	status = mgmt_bredr_support(hdev);
 	if (status)
@@ -1787,7 +1787,7 @@ static int set_hs(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
 	u8 status;
 	int err;
 
-	BT_DBG("request for %s", hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	status = mgmt_bredr_support(hdev);
 	if (status)
@@ -1893,7 +1893,7 @@ static int set_le(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
 	int err;
 	u8 val, enabled;
 
-	BT_DBG("request for %s", hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	if (!lmp_le_capable(hdev))
 		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_LE,
@@ -2054,7 +2054,7 @@ unlock:
 
 static void add_uuid_complete(struct hci_dev *hdev, u8 status, u16 opcode)
 {
-	BT_DBG("status 0x%02x", status);
+	bt_dev_dbg(hdev, "status 0x%02x", status);
 
 	mgmt_class_complete(hdev, MGMT_OP_ADD_UUID, status);
 }
@@ -2067,7 +2067,7 @@ static int add_uuid(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
 	struct bt_uuid *uuid;
 	int err;
 
-	BT_DBG("request for %s", hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	hci_dev_lock(hdev);
 
@@ -2133,7 +2133,7 @@ static bool enable_service_cache(struct hci_dev *hdev)
 
 static void remove_uuid_complete(struct hci_dev *hdev, u8 status, u16 opcode)
 {
-	BT_DBG("status 0x%02x", status);
+	bt_dev_dbg(hdev, "status 0x%02x", status);
 
 	mgmt_class_complete(hdev, MGMT_OP_REMOVE_UUID, status);
 }
@@ -2148,7 +2148,7 @@ static int remove_uuid(struct sock *sk, struct hci_dev *hdev, void *data,
 	struct hci_request req;
 	int err, found;
 
-	BT_DBG("request for %s", hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	hci_dev_lock(hdev);
 
@@ -2219,7 +2219,7 @@ unlock:
 
 static void set_class_complete(struct hci_dev *hdev, u8 status, u16 opcode)
 {
-	BT_DBG("status 0x%02x", status);
+	bt_dev_dbg(hdev, "status 0x%02x", status);
 
 	mgmt_class_complete(hdev, MGMT_OP_SET_DEV_CLASS, status);
 }
@@ -2232,7 +2232,7 @@ static int set_dev_class(struct sock *sk, struct hci_dev *hdev, void *data,
 	struct hci_request req;
 	int err;
 
-	BT_DBG("request for %s", hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	if (!lmp_bredr_capable(hdev))
 		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_DEV_CLASS,
@@ -2305,7 +2305,7 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data,
 	bool changed;
 	int i;
 
-	BT_DBG("request for %s", hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	if (!lmp_bredr_capable(hdev))
 		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS,
@@ -2331,8 +2331,8 @@ static int load_link_keys(struct sock *sk, struct hci_dev *hdev, void *data,
 		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LINK_KEYS,
 				       MGMT_STATUS_INVALID_PARAMS);
 
-	BT_DBG("%s debug_keys %u key_count %u", hdev->name, cp->debug_keys,
-	       key_count);
+	bt_dev_dbg(hdev, "debug_keys %u key_count %u", cp->debug_keys,
+		   key_count);
 
 	for (i = 0; i < key_count; i++) {
 		struct mgmt_link_key_info *key = &cp->keys[i];
@@ -2533,7 +2533,7 @@ static int disconnect(struct sock *sk, struct hci_dev *hdev, void *data,
 	struct hci_conn *conn;
 	int err;
 
-	BT_DBG("");
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	memset(&rp, 0, sizeof(rp));
 	bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr);
@@ -2617,7 +2617,7 @@ static int get_connections(struct sock *sk, struct hci_dev *hdev, void *data,
 	int err;
 	u16 i;
 
-	BT_DBG("");
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	hci_dev_lock(hdev);
 
@@ -2693,7 +2693,7 @@ static int pin_code_reply(struct sock *sk, struct hci_dev *hdev, void *data,
 	struct mgmt_pending_cmd *cmd;
 	int err;
 
-	BT_DBG("");
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	hci_dev_lock(hdev);
 
@@ -2751,7 +2751,7 @@ static int set_io_capability(struct sock *sk, struct hci_dev *hdev, void *data,
 {
 	struct mgmt_cp_set_io_capability *cp = data;
 
-	BT_DBG("");
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	if (cp->io_capability > SMP_IO_KEYBOARD_DISPLAY)
 		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_IO_CAPABILITY,
@@ -2761,8 +2761,7 @@ static int set_io_capability(struct sock *sk, struct hci_dev *hdev, void *data,
 
 	hdev->io_capability = cp->io_capability;
 
-	BT_DBG("%s IO capability set to 0x%02x", hdev->name,
-	       hdev->io_capability);
+	bt_dev_dbg(hdev, "IO capability set to 0x%02x", hdev->io_capability);
 
 	hci_dev_unlock(hdev);
 
@@ -2874,7 +2873,7 @@ static int pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
 	struct hci_conn *conn;
 	int err;
 
-	BT_DBG("");
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	memset(&rp, 0, sizeof(rp));
 	bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr);
@@ -3003,7 +3002,7 @@ static int cancel_pair_device(struct sock *sk, struct hci_dev *hdev, void *data,
 	struct hci_conn *conn;
 	int err;
 
-	BT_DBG("");
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	hci_dev_lock(hdev);
 
@@ -3114,7 +3113,7 @@ static int pin_code_neg_reply(struct sock *sk, struct hci_dev *hdev,
 {
 	struct mgmt_cp_pin_code_neg_reply *cp = data;
 
-	BT_DBG("");
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	return user_pairing_resp(sk, hdev, &cp->addr,
 				MGMT_OP_PIN_CODE_NEG_REPLY,
@@ -3126,7 +3125,7 @@ static int user_confirm_reply(struct sock *sk, struct hci_dev *hdev, void *data,
 {
 	struct mgmt_cp_user_confirm_reply *cp = data;
 
-	BT_DBG("");
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	if (len != sizeof(*cp))
 		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_USER_CONFIRM_REPLY,
@@ -3142,7 +3141,7 @@ static int user_confirm_neg_reply(struct sock *sk, struct hci_dev *hdev,
 {
 	struct mgmt_cp_user_confirm_neg_reply *cp = data;
 
-	BT_DBG("");
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	return user_pairing_resp(sk, hdev, &cp->addr,
 				 MGMT_OP_USER_CONFIRM_NEG_REPLY,
@@ -3154,7 +3153,7 @@ static int user_passkey_reply(struct sock *sk, struct hci_dev *hdev, void *data,
 {
 	struct mgmt_cp_user_passkey_reply *cp = data;
 
-	BT_DBG("");
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	return user_pairing_resp(sk, hdev, &cp->addr,
 				 MGMT_OP_USER_PASSKEY_REPLY,
@@ -3166,7 +3165,7 @@ static int user_passkey_neg_reply(struct sock *sk, struct hci_dev *hdev,
 {
 	struct mgmt_cp_user_passkey_neg_reply *cp = data;
 
-	BT_DBG("");
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	return user_pairing_resp(sk, hdev, &cp->addr,
 				 MGMT_OP_USER_PASSKEY_NEG_REPLY,
@@ -3207,7 +3206,7 @@ static void set_name_complete(struct hci_dev *hdev, u8 status, u16 opcode)
 	struct mgmt_cp_set_local_name *cp;
 	struct mgmt_pending_cmd *cmd;
 
-	BT_DBG("status 0x%02x", status);
+	bt_dev_dbg(hdev, "status 0x%02x", status);
 
 	hci_dev_lock(hdev);
 
@@ -3242,7 +3241,7 @@ static int set_local_name(struct sock *sk, struct hci_dev *hdev, void *data,
 	struct hci_request req;
 	int err;
 
-	BT_DBG("");
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	hci_dev_lock(hdev);
 
@@ -3311,7 +3310,7 @@ static int set_appearance(struct sock *sk, struct hci_dev *hdev, void *data,
 	u16 appearance;
 	int err;
 
-	BT_DBG("");
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	if (!lmp_le_capable(hdev))
 		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_APPEARANCE,
@@ -3343,7 +3342,7 @@ static int get_phy_configuration(struct sock *sk, struct hci_dev *hdev,
 {
 	struct mgmt_rp_get_phy_confguration rp;
 
-	BT_DBG("sock %p %s", sk, hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	hci_dev_lock(hdev);
 
@@ -3376,7 +3375,7 @@ static void set_default_phy_complete(struct hci_dev *hdev, u8 status,
 {
 	struct mgmt_pending_cmd *cmd;
 
-	BT_DBG("status 0x%02x", status);
+	bt_dev_dbg(hdev, "status 0x%02x", status);
 
 	hci_dev_lock(hdev);
 
@@ -3414,7 +3413,7 @@ static int set_phy_configuration(struct sock *sk, struct hci_dev *hdev,
 	bool changed = false;
 	int err;
 
-	BT_DBG("sock %p %s", sk, hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	configurable_phys = get_configurable_phys(hdev);
 	supported_phys = get_supported_phys(hdev);
@@ -3567,7 +3566,7 @@ static int set_blocked_keys(struct sock *sk, struct hci_dev *hdev, void *data,
 	u16 key_count, expected_len;
 	int i;
 
-	BT_DBG("request for %s", hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	key_count = __le16_to_cpu(keys->key_count);
 	if (key_count > max_key_count) {
@@ -3613,7 +3612,7 @@ static int set_wideband_speech(struct sock *sk, struct hci_dev *hdev,
 	int err;
 	bool changed = false;
 
-	BT_DBG("request for %s", hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	if (!test_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks))
 		return mgmt_cmd_status(sk, hdev->id,
@@ -3718,7 +3717,7 @@ static void read_local_oob_data_complete(struct hci_dev *hdev, u8 status,
 	size_t rp_size = sizeof(mgmt_rp);
 	struct mgmt_pending_cmd *cmd;
 
-	BT_DBG("%s status %u", hdev->name, status);
+	bt_dev_dbg(hdev, "status %u", status);
 
 	cmd = pending_find(MGMT_OP_READ_LOCAL_OOB_DATA, hdev);
 	if (!cmd)
@@ -3777,7 +3776,7 @@ static int read_local_oob_data(struct sock *sk, struct hci_dev *hdev,
 	struct hci_request req;
 	int err;
 
-	BT_DBG("%s", hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	hci_dev_lock(hdev);
 
@@ -3827,7 +3826,7 @@ static int add_remote_oob_data(struct sock *sk, struct hci_dev *hdev,
 	struct mgmt_addr_info *addr = data;
 	int err;
 
-	BT_DBG("%s ", hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	if (!bdaddr_type_is_valid(addr->type))
 		return mgmt_cmd_complete(sk, hdev->id,
@@ -3936,7 +3935,7 @@ static int remove_remote_oob_data(struct sock *sk, struct hci_dev *hdev,
 	u8 status;
 	int err;
 
-	BT_DBG("%s", hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	if (cp->addr.type != BDADDR_BREDR)
 		return mgmt_cmd_complete(sk, hdev->id,
@@ -3970,7 +3969,7 @@ void mgmt_start_discovery_complete(struct hci_dev *hdev, u8 status)
 {
 	struct mgmt_pending_cmd *cmd;
 
-	BT_DBG("status %d", status);
+	bt_dev_dbg(hdev, "status %d", status);
 
 	hci_dev_lock(hdev);
 
@@ -4031,7 +4030,7 @@ static int start_discovery_internal(struct sock *sk, struct hci_dev *hdev,
 	u8 status;
 	int err;
 
-	BT_DBG("%s", hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	hci_dev_lock(hdev);
 
@@ -4123,7 +4122,7 @@ static int start_service_discovery(struct sock *sk, struct hci_dev *hdev,
 	u8 status;
 	int err;
 
-	BT_DBG("%s", hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	hci_dev_lock(hdev);
 
@@ -4218,7 +4217,7 @@ void mgmt_stop_discovery_complete(struct hci_dev *hdev, u8 status)
 {
 	struct mgmt_pending_cmd *cmd;
 
-	BT_DBG("status %d", status);
+	bt_dev_dbg(hdev, "status %d", status);
 
 	hci_dev_lock(hdev);
 
@@ -4244,7 +4243,7 @@ static int stop_discovery(struct sock *sk, struct hci_dev *hdev, void *data,
 	struct mgmt_pending_cmd *cmd;
 	int err;
 
-	BT_DBG("%s", hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	hci_dev_lock(hdev);
 
@@ -4286,7 +4285,7 @@ static int confirm_name(struct sock *sk, struct hci_dev *hdev, void *data,
 	struct inquiry_entry *e;
 	int err;
 
-	BT_DBG("%s", hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	hci_dev_lock(hdev);
 
@@ -4328,7 +4327,7 @@ static int block_device(struct sock *sk, struct hci_dev *hdev, void *data,
 	u8 status;
 	int err;
 
-	BT_DBG("%s", hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	if (!bdaddr_type_is_valid(cp->addr.type))
 		return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_BLOCK_DEVICE,
@@ -4364,7 +4363,7 @@ static int unblock_device(struct sock *sk, struct hci_dev *hdev, void *data,
 	u8 status;
 	int err;
 
-	BT_DBG("%s", hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	if (!bdaddr_type_is_valid(cp->addr.type))
 		return mgmt_cmd_complete(sk, hdev->id, MGMT_OP_UNBLOCK_DEVICE,
@@ -4401,7 +4400,7 @@ static int set_device_id(struct sock *sk, struct hci_dev *hdev, void *data,
 	int err;
 	__u16 source;
 
-	BT_DBG("%s", hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	source = __le16_to_cpu(cp->source);
 
@@ -4431,7 +4430,7 @@ static int set_device_id(struct sock *sk, struct hci_dev *hdev, void *data,
 static void enable_advertising_instance(struct hci_dev *hdev, u8 status,
 					u16 opcode)
 {
-	BT_DBG("status %d", status);
+	bt_dev_dbg(hdev, "status %d", status);
 }
 
 static void set_advertising_complete(struct hci_dev *hdev, u8 status,
@@ -4517,7 +4516,7 @@ static int set_advertising(struct sock *sk, struct hci_dev *hdev, void *data,
 	u8 val, status;
 	int err;
 
-	BT_DBG("request for %s", hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	status = mgmt_le_support(hdev);
 	if (status)
@@ -4626,7 +4625,7 @@ static int set_static_address(struct sock *sk, struct hci_dev *hdev,
 	struct mgmt_cp_set_static_address *cp = data;
 	int err;
 
-	BT_DBG("%s", hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	if (!lmp_le_capable(hdev))
 		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_STATIC_ADDRESS,
@@ -4671,7 +4670,7 @@ static int set_scan_params(struct sock *sk, struct hci_dev *hdev,
 	__u16 interval, window;
 	int err;
 
-	BT_DBG("%s", hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	if (!lmp_le_capable(hdev))
 		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_SCAN_PARAMS,
@@ -4726,7 +4725,7 @@ static void fast_connectable_complete(struct hci_dev *hdev, u8 status,
 {
 	struct mgmt_pending_cmd *cmd;
 
-	BT_DBG("status 0x%02x", status);
+	bt_dev_dbg(hdev, "status 0x%02x", status);
 
 	hci_dev_lock(hdev);
 
@@ -4763,7 +4762,7 @@ static int set_fast_connectable(struct sock *sk, struct hci_dev *hdev,
 	struct hci_request req;
 	int err;
 
-	BT_DBG("%s", hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	if (!hci_dev_test_flag(hdev, HCI_BREDR_ENABLED) ||
 	    hdev->hci_ver < BLUETOOTH_VER_1_2)
@@ -4824,7 +4823,7 @@ static void set_bredr_complete(struct hci_dev *hdev, u8 status, u16 opcode)
 {
 	struct mgmt_pending_cmd *cmd;
 
-	BT_DBG("status 0x%02x", status);
+	bt_dev_dbg(hdev, "status 0x%02x", status);
 
 	hci_dev_lock(hdev);
 
@@ -4859,7 +4858,7 @@ static int set_bredr(struct sock *sk, struct hci_dev *hdev, void *data, u16 len)
 	struct hci_request req;
 	int err;
 
-	BT_DBG("request for %s", hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	if (!lmp_bredr_capable(hdev) || !lmp_le_capable(hdev))
 		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_BREDR,
@@ -4969,7 +4968,7 @@ static void sc_enable_complete(struct hci_dev *hdev, u8 status, u16 opcode)
 	struct mgmt_pending_cmd *cmd;
 	struct mgmt_mode *cp;
 
-	BT_DBG("%s status %u", hdev->name, status);
+	bt_dev_dbg(hdev, "status %u", status);
 
 	hci_dev_lock(hdev);
 
@@ -5018,7 +5017,7 @@ static int set_secure_conn(struct sock *sk, struct hci_dev *hdev,
 	u8 val;
 	int err;
 
-	BT_DBG("request for %s", hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	if (!lmp_sc_capable(hdev) &&
 	    !hci_dev_test_flag(hdev, HCI_LE_ENABLED))
@@ -5104,7 +5103,7 @@ static int set_debug_keys(struct sock *sk, struct hci_dev *hdev,
 	bool changed, use_changed;
 	int err;
 
-	BT_DBG("request for %s", hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	if (cp->val != 0x00 && cp->val != 0x01 && cp->val != 0x02)
 		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_DEBUG_KEYS,
@@ -5151,7 +5150,7 @@ static int set_privacy(struct sock *sk, struct hci_dev *hdev, void *cp_data,
 	bool changed;
 	int err;
 
-	BT_DBG("request for %s", hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	if (!lmp_le_capable(hdev))
 		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_PRIVACY,
@@ -5226,7 +5225,7 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data,
 	u16 irk_count, expected_len;
 	int i, err;
 
-	BT_DBG("request for %s", hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	if (!lmp_le_capable(hdev))
 		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_IRKS,
@@ -5248,7 +5247,7 @@ static int load_irks(struct sock *sk, struct hci_dev *hdev, void *cp_data,
 				       MGMT_STATUS_INVALID_PARAMS);
 	}
 
-	BT_DBG("%s irk_count %u", hdev->name, irk_count);
+	bt_dev_dbg(hdev, "irk_count %u", irk_count);
 
 	for (i = 0; i < irk_count; i++) {
 		struct mgmt_irk_info *key = &cp->irks[i];
@@ -5316,7 +5315,7 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
 	u16 key_count, expected_len;
 	int i, err;
 
-	BT_DBG("request for %s", hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	if (!lmp_le_capable(hdev))
 		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_LOAD_LONG_TERM_KEYS,
@@ -5338,7 +5337,7 @@ static int load_long_term_keys(struct sock *sk, struct hci_dev *hdev,
 				       MGMT_STATUS_INVALID_PARAMS);
 	}
 
-	BT_DBG("%s key_count %u", hdev->name, key_count);
+	bt_dev_dbg(hdev, "key_count %u", key_count);
 
 	for (i = 0; i < key_count; i++) {
 		struct mgmt_ltk_info *key = &cp->keys[i];
@@ -5439,7 +5438,7 @@ static void conn_info_refresh_complete(struct hci_dev *hdev, u8 hci_status,
 	u16 handle;
 	u8 status;
 
-	BT_DBG("status 0x%02x", hci_status);
+	bt_dev_dbg(hdev, "status 0x%02x", hci_status);
 
 	hci_dev_lock(hdev);
 
@@ -5493,7 +5492,7 @@ static int get_conn_info(struct sock *sk, struct hci_dev *hdev, void *data,
 	unsigned long conn_info_age;
 	int err = 0;
 
-	BT_DBG("%s", hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	memset(&rp, 0, sizeof(rp));
 	bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr);
@@ -5647,7 +5646,7 @@ static void get_clock_info_complete(struct hci_dev *hdev, u8 status, u16 opcode)
 	struct mgmt_pending_cmd *cmd;
 	struct hci_conn *conn;
 
-	BT_DBG("%s status %u", hdev->name, status);
+	bt_dev_dbg(hdev, "status %u", status);
 
 	hci_dev_lock(hdev);
 
@@ -5684,7 +5683,7 @@ static int get_clock_info(struct sock *sk, struct hci_dev *hdev, void *data,
 	struct hci_conn *conn;
 	int err;
 
-	BT_DBG("%s", hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	memset(&rp, 0, sizeof(rp));
 	bacpy(&rp.addr.bdaddr, &cp->addr.bdaddr);
@@ -5805,8 +5804,8 @@ static int hci_conn_params_set(struct hci_dev *hdev, bdaddr_t *addr,
 
 	params->auto_connect = auto_connect;
 
-	BT_DBG("addr %pMR (type %u) auto_connect %u", addr, addr_type,
-	       auto_connect);
+	bt_dev_dbg(hdev, "addr %pMR (type %u) auto_connect %u",
+		   addr, addr_type, auto_connect);
 
 	return 0;
 }
@@ -5830,7 +5829,7 @@ static int add_device(struct sock *sk, struct hci_dev *hdev,
 	u8 auto_conn, addr_type;
 	int err;
 
-	BT_DBG("%s", hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	if (!bdaddr_type_is_valid(cp->addr.type) ||
 	    !bacmp(&cp->addr.bdaddr, BDADDR_ANY))
@@ -5928,7 +5927,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev,
 	struct mgmt_cp_remove_device *cp = data;
 	int err;
 
-	BT_DBG("%s", hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	hci_dev_lock(hdev);
 
@@ -6037,7 +6036,7 @@ static int remove_device(struct sock *sk, struct hci_dev *hdev,
 			kfree(p);
 		}
 
-		BT_DBG("All LE connection parameters were removed");
+		bt_dev_dbg(hdev, "All LE connection parameters were removed");
 
 		hci_update_background_scan(hdev);
 	}
@@ -6080,7 +6079,7 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data,
 				       MGMT_STATUS_INVALID_PARAMS);
 	}
 
-	BT_DBG("%s param_count %u", hdev->name, param_count);
+	bt_dev_dbg(hdev, "param_count %u", param_count);
 
 	hci_dev_lock(hdev);
 
@@ -6092,8 +6091,8 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data,
 		u16 min, max, latency, timeout;
 		u8 addr_type;
 
-		BT_DBG("Adding %pMR (type %u)", &param->addr.bdaddr,
-		       param->addr.type);
+		bt_dev_dbg(hdev, "Adding %pMR (type %u)", &param->addr.bdaddr,
+			   param->addr.type);
 
 		if (param->addr.type == BDADDR_LE_PUBLIC) {
 			addr_type = ADDR_LE_DEV_PUBLIC;
@@ -6109,8 +6108,8 @@ static int load_conn_param(struct sock *sk, struct hci_dev *hdev, void *data,
 		latency = le16_to_cpu(param->latency);
 		timeout = le16_to_cpu(param->timeout);
 
-		BT_DBG("min 0x%04x max 0x%04x latency 0x%04x timeout 0x%04x",
-		       min, max, latency, timeout);
+		bt_dev_dbg(hdev, "min 0x%04x max 0x%04x latency 0x%04x timeout 0x%04x",
+			   min, max, latency, timeout);
 
 		if (hci_check_conn_params(min, max, latency, timeout) < 0) {
 			bt_dev_err(hdev, "ignoring invalid connection parameters");
@@ -6143,7 +6142,7 @@ static int set_external_config(struct sock *sk, struct hci_dev *hdev,
 	bool changed;
 	int err;
 
-	BT_DBG("%s", hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	if (hdev_is_powered(hdev))
 		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_EXTERNAL_CONFIG,
@@ -6199,7 +6198,7 @@ static int set_public_address(struct sock *sk, struct hci_dev *hdev,
 	bool changed;
 	int err;
 
-	BT_DBG("%s", hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	if (hdev_is_powered(hdev))
 		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_SET_PUBLIC_ADDRESS,
@@ -6254,7 +6253,7 @@ static void read_local_oob_ext_data_complete(struct hci_dev *hdev, u8 status,
 	u16 eir_len;
 	int err;
 
-	BT_DBG("%s status %u", hdev->name, status);
+	bt_dev_dbg(hdev, "status %u", status);
 
 	cmd = pending_find(MGMT_OP_READ_LOCAL_OOB_EXT_DATA, hdev);
 	if (!cmd)
@@ -6393,7 +6392,7 @@ static int read_local_oob_ext_data(struct sock *sk, struct hci_dev *hdev,
 	u8 status, flags, role, addr[7], hash[16], rand[16];
 	int err;
 
-	BT_DBG("%s", hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	if (hdev_is_powered(hdev)) {
 		switch (cp->type) {
@@ -6580,7 +6579,7 @@ static int read_adv_features(struct sock *sk, struct hci_dev *hdev,
 	u32 supported_flags;
 	u8 *instance;
 
-	BT_DBG("%s", hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	if (!lmp_le_capable(hdev))
 		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_READ_ADV_FEATURES,
@@ -6723,7 +6722,7 @@ static void add_advertising_complete(struct hci_dev *hdev, u8 status,
 	struct adv_info *adv_instance, *n;
 	u8 instance;
 
-	BT_DBG("status %d", status);
+	bt_dev_dbg(hdev, "status %d", status);
 
 	hci_dev_lock(hdev);
 
@@ -6782,7 +6781,7 @@ static int add_advertising(struct sock *sk, struct hci_dev *hdev,
 	struct mgmt_pending_cmd *cmd;
 	struct hci_request req;
 
-	BT_DBG("%s", hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	status = mgmt_le_support(hdev);
 	if (status)
@@ -6919,7 +6918,7 @@ static void remove_advertising_complete(struct hci_dev *hdev, u8 status,
 	struct mgmt_cp_remove_advertising *cp;
 	struct mgmt_rp_remove_advertising rp;
 
-	BT_DBG("status %d", status);
+	bt_dev_dbg(hdev, "status %d", status);
 
 	hci_dev_lock(hdev);
 
@@ -6951,7 +6950,7 @@ static int remove_advertising(struct sock *sk, struct hci_dev *hdev,
 	struct hci_request req;
 	int err;
 
-	BT_DBG("%s", hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	hci_dev_lock(hdev);
 
@@ -7023,7 +7022,7 @@ static int get_adv_size_info(struct sock *sk, struct hci_dev *hdev,
 	u32 flags, supported_flags;
 	int err;
 
-	BT_DBG("%s", hdev->name);
+	bt_dev_dbg(hdev, "sock %p", sk);
 
 	if (!lmp_le_capable(hdev))
 		return mgmt_cmd_status(sk, hdev->id, MGMT_OP_GET_ADV_SIZE_INFO,
@@ -7251,7 +7250,7 @@ void mgmt_power_on(struct hci_dev *hdev, int err)
 {
 	struct cmd_lookup match = { NULL, hdev };
 
-	BT_DBG("err %d", err);
+	bt_dev_dbg(hdev, "err %d", err);
 
 	hci_dev_lock(hdev);
 
@@ -7670,7 +7669,7 @@ int mgmt_user_confirm_request(struct hci_dev *hdev, bdaddr_t *bdaddr,
 {
 	struct mgmt_ev_user_confirm_request ev;
 
-	BT_DBG("%s", hdev->name);
+	bt_dev_dbg(hdev, "bdaddr %pMR", bdaddr);
 
 	bacpy(&ev.addr.bdaddr, bdaddr);
 	ev.addr.type = link_to_bdaddr(link_type, addr_type);
@@ -7686,7 +7685,7 @@ int mgmt_user_passkey_request(struct hci_dev *hdev, bdaddr_t *bdaddr,
 {
 	struct mgmt_ev_user_passkey_request ev;
 
-	BT_DBG("%s", hdev->name);
+	bt_dev_dbg(hdev, "bdaddr %pMR", bdaddr);
 
 	bacpy(&ev.addr.bdaddr, bdaddr);
 	ev.addr.type = link_to_bdaddr(link_type, addr_type);
@@ -7747,7 +7746,7 @@ int mgmt_user_passkey_notify(struct hci_dev *hdev, bdaddr_t *bdaddr,
 {
 	struct mgmt_ev_passkey_notify ev;
 
-	BT_DBG("%s", hdev->name);
+	bt_dev_dbg(hdev, "bdaddr %pMR", bdaddr);
 
 	bacpy(&ev.addr.bdaddr, bdaddr);
 	ev.addr.type = link_to_bdaddr(link_type, addr_type);
@@ -8166,7 +8165,7 @@ void mgmt_discovering(struct hci_dev *hdev, u8 discovering)
 {
 	struct mgmt_ev_discovering ev;
 
-	BT_DBG("%s discovering %u", hdev->name, discovering);
+	bt_dev_dbg(hdev, "discovering %u", discovering);
 
 	memset(&ev, 0, sizeof(ev));
 	ev.type = hdev->discovery.type;
-- 
cgit v1.2.3-59-g8ed1b


From 14a81bf021fb428d1df484dba37bea155e8eec0f Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Wed, 6 May 2020 09:57:48 +0200
Subject: Bluetooth: replace zero-length array with flexible-array member

The current codebase makes use of the zero-length array language extension
to the C90 standard, but the preferred mechanism to declare variable-length
types such as these ones is a flexible array member.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/mgmt.h | 42 +++++++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 21 deletions(-)

diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index 65dd6fd1fff3..9d4d87c6028e 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -70,14 +70,14 @@ struct mgmt_rp_read_version {
 struct mgmt_rp_read_commands {
 	__le16	num_commands;
 	__le16	num_events;
-	__le16	opcodes[0];
+	__le16	opcodes[];
 } __packed;
 
 #define MGMT_OP_READ_INDEX_LIST		0x0003
 #define MGMT_READ_INDEX_LIST_SIZE	0
 struct mgmt_rp_read_index_list {
 	__le16	num_controllers;
-	__le16	index[0];
+	__le16	index[];
 } __packed;
 
 /* Reserve one extra byte for names in management messages so that they
@@ -183,7 +183,7 @@ struct mgmt_link_key_info {
 struct mgmt_cp_load_link_keys {
 	__u8	debug_keys;
 	__le16	key_count;
-	struct	mgmt_link_key_info keys[0];
+	struct	mgmt_link_key_info keys[];
 } __packed;
 #define MGMT_LOAD_LINK_KEYS_SIZE	3
 
@@ -206,7 +206,7 @@ struct mgmt_ltk_info {
 #define MGMT_OP_LOAD_LONG_TERM_KEYS	0x0013
 struct mgmt_cp_load_long_term_keys {
 	__le16	key_count;
-	struct	mgmt_ltk_info keys[0];
+	struct	mgmt_ltk_info keys[];
 } __packed;
 #define MGMT_LOAD_LONG_TERM_KEYS_SIZE	2
 
@@ -223,7 +223,7 @@ struct mgmt_rp_disconnect {
 #define MGMT_GET_CONNECTIONS_SIZE	0
 struct mgmt_rp_get_connections {
 	__le16 conn_count;
-	struct mgmt_addr_info addr[0];
+	struct mgmt_addr_info addr[];
 } __packed;
 
 #define MGMT_OP_PIN_CODE_REPLY		0x0016
@@ -413,7 +413,7 @@ struct mgmt_irk_info {
 #define MGMT_OP_LOAD_IRKS		0x0030
 struct mgmt_cp_load_irks {
 	__le16 irk_count;
-	struct mgmt_irk_info irks[0];
+	struct mgmt_irk_info irks[];
 } __packed;
 #define MGMT_LOAD_IRKS_SIZE		2
 
@@ -465,7 +465,7 @@ struct mgmt_conn_param {
 #define MGMT_OP_LOAD_CONN_PARAM		0x0035
 struct mgmt_cp_load_conn_param {
 	__le16 param_count;
-	struct mgmt_conn_param params[0];
+	struct mgmt_conn_param params[];
 } __packed;
 #define MGMT_LOAD_CONN_PARAM_SIZE	2
 
@@ -473,7 +473,7 @@ struct mgmt_cp_load_conn_param {
 #define MGMT_READ_UNCONF_INDEX_LIST_SIZE 0
 struct mgmt_rp_read_unconf_index_list {
 	__le16	num_controllers;
-	__le16	index[0];
+	__le16	index[];
 } __packed;
 
 #define MGMT_OPTION_EXTERNAL_CONFIG	0x00000001
@@ -504,7 +504,7 @@ struct mgmt_cp_start_service_discovery {
 	__u8 type;
 	__s8 rssi;
 	__le16 uuid_count;
-	__u8 uuids[0][16];
+	__u8 uuids[][16];
 } __packed;
 #define MGMT_START_SERVICE_DISCOVERY_SIZE 4
 
@@ -516,7 +516,7 @@ struct mgmt_cp_read_local_oob_ext_data {
 struct mgmt_rp_read_local_oob_ext_data {
 	__u8    type;
 	__le16	eir_len;
-	__u8	eir[0];
+	__u8	eir[];
 } __packed;
 
 #define MGMT_OP_READ_EXT_INDEX_LIST	0x003C
@@ -527,7 +527,7 @@ struct mgmt_rp_read_ext_index_list {
 		__le16 index;
 		__u8   type;
 		__u8   bus;
-	} entry[0];
+	} entry[];
 } __packed;
 
 #define MGMT_OP_READ_ADV_FEATURES	0x0003D
@@ -538,7 +538,7 @@ struct mgmt_rp_read_adv_features {
 	__u8   max_scan_rsp_len;
 	__u8   max_instances;
 	__u8   num_instances;
-	__u8   instance[0];
+	__u8   instance[];
 } __packed;
 
 #define MGMT_OP_ADD_ADVERTISING		0x003E
@@ -549,7 +549,7 @@ struct mgmt_cp_add_advertising {
 	__le16	timeout;
 	__u8	adv_data_len;
 	__u8	scan_rsp_len;
-	__u8	data[0];
+	__u8	data[];
 } __packed;
 #define MGMT_ADD_ADVERTISING_SIZE	11
 struct mgmt_rp_add_advertising {
@@ -603,7 +603,7 @@ struct mgmt_rp_read_ext_info {
 	__le32   supported_settings;
 	__le32   current_settings;
 	__le16   eir_len;
-	__u8     eir[0];
+	__u8     eir[];
 } __packed;
 
 #define MGMT_OP_SET_APPEARANCE		0x0043
@@ -668,7 +668,7 @@ struct mgmt_blocked_key_info {
 
 struct mgmt_cp_set_blocked_keys {
 	__le16 key_count;
-	struct mgmt_blocked_key_info keys[0];
+	struct mgmt_blocked_key_info keys[];
 } __packed;
 #define MGMT_OP_SET_BLOCKED_KEYS_SIZE 2
 
@@ -678,14 +678,14 @@ struct mgmt_cp_set_blocked_keys {
 #define MGMT_READ_SECURITY_INFO_SIZE	0
 struct mgmt_rp_read_security_info {
 	__le16   sec_len;
-	__u8     sec[0];
+	__u8     sec[];
 } __packed;
 
 #define MGMT_EV_CMD_COMPLETE		0x0001
 struct mgmt_ev_cmd_complete {
 	__le16	opcode;
 	__u8	status;
-	__u8	data[0];
+	__u8	data[];
 } __packed;
 
 #define MGMT_EV_CMD_STATUS		0x0002
@@ -733,7 +733,7 @@ struct mgmt_ev_device_connected {
 	struct mgmt_addr_info addr;
 	__le32	flags;
 	__le16	eir_len;
-	__u8	eir[0];
+	__u8	eir[];
 } __packed;
 
 #define MGMT_DEV_DISCONN_UNKNOWN	0x00
@@ -788,7 +788,7 @@ struct mgmt_ev_device_found {
 	__s8	rssi;
 	__le32	flags;
 	__le16	eir_len;
-	__u8	eir[0];
+	__u8	eir[];
 } __packed;
 
 #define MGMT_EV_DISCOVERING		0x0013
@@ -883,7 +883,7 @@ struct mgmt_ev_ext_index {
 struct mgmt_ev_local_oob_data_updated {
 	__u8    type;
 	__le16	eir_len;
-	__u8	eir[0];
+	__u8	eir[];
 } __packed;
 
 #define MGMT_EV_ADVERTISING_ADDED	0x0023
@@ -899,7 +899,7 @@ struct mgmt_ev_advertising_removed {
 #define MGMT_EV_EXT_INFO_CHANGED	0x0025
 struct mgmt_ev_ext_info_changed {
 	__le16	eir_len;
-	__u8	eir[0];
+	__u8	eir[];
 } __packed;
 
 #define MGMT_EV_PHY_CONFIGURATION_CHANGED	0x0026
-- 
cgit v1.2.3-59-g8ed1b


From d5cc6626b33780699c7a4986f3521361306862fe Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Wed, 6 May 2020 09:57:49 +0200
Subject: Bluetooth: Introduce HCI_MGMT_HDEV_OPTIONAL option

When setting HCI_MGMT_HDEV_OPTIONAL it is possible to target a specific
conntroller or a global interface.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci_core.h |  1 +
 net/bluetooth/hci_sock.c         | 12 +++++++-----
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 239ab72f16c6..0c7f3ad76665 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1554,6 +1554,7 @@ void hci_sock_dev_event(struct hci_dev *hdev, int event);
 #define HCI_MGMT_NO_HDEV	BIT(1)
 #define HCI_MGMT_UNTRUSTED	BIT(2)
 #define HCI_MGMT_UNCONFIGURED	BIT(3)
+#define HCI_MGMT_HDEV_OPTIONAL	BIT(4)
 
 struct hci_mgmt_handler {
 	int (*func) (struct sock *sk, struct hci_dev *hdev, void *data,
diff --git a/net/bluetooth/hci_sock.c b/net/bluetooth/hci_sock.c
index 9c4a093f8960..caf38a8ea6a8 100644
--- a/net/bluetooth/hci_sock.c
+++ b/net/bluetooth/hci_sock.c
@@ -1579,11 +1579,13 @@ static int hci_mgmt_cmd(struct hci_mgmt_chan *chan, struct sock *sk,
 		}
 	}
 
-	no_hdev = (handler->flags & HCI_MGMT_NO_HDEV);
-	if (no_hdev != !hdev) {
-		err = mgmt_cmd_status(sk, index, opcode,
-				      MGMT_STATUS_INVALID_INDEX);
-		goto done;
+	if (!(handler->flags & HCI_MGMT_HDEV_OPTIONAL)) {
+		no_hdev = (handler->flags & HCI_MGMT_NO_HDEV);
+		if (no_hdev != !hdev) {
+			err = mgmt_cmd_status(sk, index, opcode,
+					      MGMT_STATUS_INVALID_INDEX);
+			goto done;
+		}
 	}
 
 	var_len = (handler->flags & HCI_MGMT_VAR_LEN);
-- 
cgit v1.2.3-59-g8ed1b


From 568602457c1ab6d26db828de168e4ef35b88f1bc Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Wed, 6 May 2020 09:57:50 +0200
Subject: Bluetooth: Replace BT_DBG with bt_dev_dbg for security manager
 support

The security manager operates on a specific controller and thus use
bt_dev_dbg to indetify the controller for each debug message.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 net/bluetooth/smp.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index df22cbf94693..5510017cf9ff 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -508,7 +508,7 @@ bool smp_irk_matches(struct hci_dev *hdev, const u8 irk[16],
 	if (!chan || !chan->data)
 		return false;
 
-	BT_DBG("RPA %pMR IRK %*phN", bdaddr, 16, irk);
+	bt_dev_dbg(hdev, "RPA %pMR IRK %*phN", bdaddr, 16, irk);
 
 	err = smp_ah(irk, &bdaddr->b[3], hash);
 	if (err)
@@ -534,7 +534,7 @@ int smp_generate_rpa(struct hci_dev *hdev, const u8 irk[16], bdaddr_t *rpa)
 	if (err < 0)
 		return err;
 
-	BT_DBG("RPA %pMR", rpa);
+	bt_dev_dbg(hdev, "RPA %pMR", rpa);
 
 	return 0;
 }
@@ -551,7 +551,7 @@ int smp_generate_oob(struct hci_dev *hdev, u8 hash[16], u8 rand[16])
 	smp = chan->data;
 
 	if (hci_dev_test_flag(hdev, HCI_USE_DEBUG_KEYS)) {
-		BT_DBG("Using debug keys");
+		bt_dev_dbg(hdev, "Using debug keys");
 		err = set_ecdh_privkey(smp->tfm_ecdh, debug_sk);
 		if (err)
 			return err;
@@ -1867,7 +1867,7 @@ static u8 sc_send_public_key(struct smp_chan *smp)
 {
 	struct hci_dev *hdev = smp->conn->hcon->hdev;
 
-	BT_DBG("");
+	bt_dev_dbg(hdev, "");
 
 	if (test_bit(SMP_FLAG_LOCAL_OOB, &smp->flags)) {
 		struct l2cap_chan *chan = hdev->smp_data;
-- 
cgit v1.2.3-59-g8ed1b


From a10c907ce0e5e138c3da091fcb7c3d109a15aec5 Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Wed, 6 May 2020 09:57:51 +0200
Subject: Bluetooth: Add support for experimental features configuration

To enable platform specific experimental features, introduce this new set of
management commands and events.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/hci.h  |  1 +
 include/net/bluetooth/mgmt.h | 27 +++++++++++++++++++++
 net/bluetooth/mgmt.c         | 58 ++++++++++++++++++++++++++++++++++++++++++++
 3 files changed, 86 insertions(+)

diff --git a/include/net/bluetooth/hci.h b/include/net/bluetooth/hci.h
index e5bc1dfe809a..16ab6ce87883 100644
--- a/include/net/bluetooth/hci.h
+++ b/include/net/bluetooth/hci.h
@@ -258,6 +258,7 @@ enum {
 	HCI_MGMT_DEV_CLASS_EVENTS,
 	HCI_MGMT_LOCAL_NAME_EVENTS,
 	HCI_MGMT_OOB_DATA_EVENTS,
+	HCI_MGMT_EXP_FEATURE_EVENTS,
 };
 
 /*
diff --git a/include/net/bluetooth/mgmt.h b/include/net/bluetooth/mgmt.h
index 9d4d87c6028e..16e0d87bd8fa 100644
--- a/include/net/bluetooth/mgmt.h
+++ b/include/net/bluetooth/mgmt.h
@@ -681,6 +681,27 @@ struct mgmt_rp_read_security_info {
 	__u8     sec[];
 } __packed;
 
+#define MGMT_OP_READ_EXP_FEATURES_INFO	0x0049
+#define MGMT_READ_EXP_FEATURES_INFO_SIZE 0
+struct mgmt_rp_read_exp_features_info {
+	__le16 feature_count;
+	struct {
+		__u8   uuid[16];
+		__le32 flags;
+	} features[];
+} __packed;
+
+#define MGMT_OP_SET_EXP_FEATURE		0x004a
+struct mgmt_cp_set_exp_feature {
+	__u8   uuid[16];
+	__u8   param[];
+} __packed;
+#define MGMT_SET_EXP_FEATURE_SIZE	16
+struct mgmt_rp_set_exp_feature {
+	__u8   uuid[16];
+	__le32 flags;
+} __packed;
+
 #define MGMT_EV_CMD_COMPLETE		0x0001
 struct mgmt_ev_cmd_complete {
 	__le16	opcode;
@@ -906,3 +927,9 @@ struct mgmt_ev_ext_info_changed {
 struct mgmt_ev_phy_configuration_changed {
 	__le32	selected_phys;
 } __packed;
+
+#define MGMT_EV_EXP_FEATURE_CHANGED	0x0027
+struct mgmt_ev_exp_feature_changed {
+	__u8	uuid[16];
+	__le32	flags;
+} __packed;
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 78cf72b64014..3c6be70d98ef 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -109,6 +109,8 @@ static const u16 mgmt_commands[] = {
 	MGMT_OP_SET_BLOCKED_KEYS,
 	MGMT_OP_SET_WIDEBAND_SPEECH,
 	MGMT_OP_READ_SECURITY_INFO,
+	MGMT_OP_READ_EXP_FEATURES_INFO,
+	MGMT_OP_SET_EXP_FEATURE,
 };
 
 static const u16 mgmt_events[] = {
@@ -148,6 +150,7 @@ static const u16 mgmt_events[] = {
 	MGMT_EV_ADVERTISING_REMOVED,
 	MGMT_EV_EXT_INFO_CHANGED,
 	MGMT_EV_PHY_CONFIGURATION_CHANGED,
+	MGMT_EV_EXP_FEATURE_CHANGED,
 };
 
 static const u16 mgmt_untrusted_commands[] = {
@@ -158,6 +161,7 @@ static const u16 mgmt_untrusted_commands[] = {
 	MGMT_OP_READ_EXT_INDEX_LIST,
 	MGMT_OP_READ_EXT_INFO,
 	MGMT_OP_READ_SECURITY_INFO,
+	MGMT_OP_READ_EXP_FEATURES_INFO,
 };
 
 static const u16 mgmt_untrusted_events[] = {
@@ -172,6 +176,7 @@ static const u16 mgmt_untrusted_events[] = {
 	MGMT_EV_EXT_INDEX_ADDED,
 	MGMT_EV_EXT_INDEX_REMOVED,
 	MGMT_EV_EXT_INFO_CHANGED,
+	MGMT_EV_EXP_FEATURE_CHANGED,
 };
 
 #define CACHE_TIMEOUT	msecs_to_jiffies(2 * 1000)
@@ -3710,6 +3715,53 @@ static int read_security_info(struct sock *sk, struct hci_dev *hdev,
 				 rp, sizeof(*rp) + sec_len);
 }
 
+static int read_exp_features_info(struct sock *sk, struct hci_dev *hdev,
+				  void *data, u16 data_len)
+{
+	char buf[42];
+	struct mgmt_rp_read_exp_features_info *rp = (void *)buf;
+	u16 idx = 0;
+
+	bt_dev_dbg(hdev, "sock %p", sk);
+
+	memset(&buf, 0, sizeof(buf));
+
+	rp->feature_count = cpu_to_le16(idx);
+
+	/* After reading the experimental features information, enable
+	 * the events to update client on any future change.
+	 */
+	hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS);
+
+	return mgmt_cmd_complete(sk, hdev ? hdev->id : MGMT_INDEX_NONE,
+				 MGMT_OP_READ_EXP_FEATURES_INFO,
+				 0, rp, sizeof(*rp) + (20 * idx));
+}
+
+static int set_exp_feature(struct sock *sk, struct hci_dev *hdev,
+			   void *data, u16 data_len)
+{
+	struct mgmt_cp_set_exp_feature *cp = data;
+	struct mgmt_rp_set_exp_feature rp;
+
+	bt_dev_dbg(hdev, "sock %p", sk);
+
+	if (!memcmp(cp->uuid, ZERO_KEY, 16)) {
+		memset(rp.uuid, 0, 16);
+		rp.flags = cpu_to_le32(0);
+
+		hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS);
+
+		return mgmt_cmd_complete(sk, hdev ? hdev->id : MGMT_INDEX_NONE,
+					 MGMT_OP_SET_EXP_FEATURE, 0,
+					 &rp, sizeof(rp));
+	}
+
+	return mgmt_cmd_status(sk, hdev ? hdev->id : MGMT_INDEX_NONE,
+			       MGMT_OP_SET_EXP_FEATURE,
+			       MGMT_STATUS_NOT_SUPPORTED);
+}
+
 static void read_local_oob_data_complete(struct hci_dev *hdev, u8 status,
 				         u16 opcode, struct sk_buff *skb)
 {
@@ -7152,6 +7204,12 @@ static const struct hci_mgmt_handler mgmt_handlers[] = {
 	{ set_wideband_speech,	   MGMT_SETTING_SIZE },
 	{ read_security_info,      MGMT_READ_SECURITY_INFO_SIZE,
 						HCI_MGMT_UNTRUSTED },
+	{ read_exp_features_info,  MGMT_READ_EXP_FEATURES_INFO_SIZE,
+						HCI_MGMT_UNTRUSTED |
+						HCI_MGMT_HDEV_OPTIONAL },
+	{ set_exp_feature,         MGMT_SET_EXP_FEATURE_SIZE,
+						HCI_MGMT_VAR_LEN |
+						HCI_MGMT_HDEV_OPTIONAL },
 };
 
 void mgmt_index_added(struct hci_dev *hdev)
-- 
cgit v1.2.3-59-g8ed1b


From e625e50ceee18bc1e3fb1a6375e089405a797a4d Mon Sep 17 00:00:00 2001
From: Marcel Holtmann <marcel@holtmann.org>
Date: Wed, 6 May 2020 09:57:52 +0200
Subject: Bluetooth: Introduce debug feature when dynamic debug is disabled

In case dynamic debug is disabled, this feature allows a vendor platform
to provide debug statement printing.

Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
Signed-off-by: Johan Hedberg <johan.hedberg@intel.com>
---
 include/net/bluetooth/bluetooth.h | 11 +++++
 net/bluetooth/Kconfig             |  7 ++++
 net/bluetooth/lib.c               | 33 +++++++++++++++
 net/bluetooth/mgmt.c              | 87 +++++++++++++++++++++++++++++++++++++++
 4 files changed, 138 insertions(+)

diff --git a/include/net/bluetooth/bluetooth.h b/include/net/bluetooth/bluetooth.h
index 3fa7b1e3c5d9..18190055374c 100644
--- a/include/net/bluetooth/bluetooth.h
+++ b/include/net/bluetooth/bluetooth.h
@@ -153,6 +153,12 @@ __printf(1, 2)
 void bt_warn(const char *fmt, ...);
 __printf(1, 2)
 void bt_err(const char *fmt, ...);
+#if IS_ENABLED(CONFIG_BT_FEATURE_DEBUG)
+void bt_dbg_set(bool enable);
+bool bt_dbg_get(void);
+__printf(1, 2)
+void bt_dbg(const char *fmt, ...);
+#endif
 __printf(1, 2)
 void bt_warn_ratelimited(const char *fmt, ...);
 __printf(1, 2)
@@ -161,7 +167,12 @@ void bt_err_ratelimited(const char *fmt, ...);
 #define BT_INFO(fmt, ...)	bt_info(fmt "\n", ##__VA_ARGS__)
 #define BT_WARN(fmt, ...)	bt_warn(fmt "\n", ##__VA_ARGS__)
 #define BT_ERR(fmt, ...)	bt_err(fmt "\n", ##__VA_ARGS__)
+
+#if IS_ENABLED(CONFIG_BT_FEATURE_DEBUG)
+#define BT_DBG(fmt, ...)	bt_dbg(fmt "\n", ##__VA_ARGS__)
+#else
 #define BT_DBG(fmt, ...)	pr_debug(fmt "\n", ##__VA_ARGS__)
+#endif
 
 #define bt_dev_info(hdev, fmt, ...)				\
 	BT_INFO("%s: " fmt, (hdev)->name, ##__VA_ARGS__)
diff --git a/net/bluetooth/Kconfig b/net/bluetooth/Kconfig
index 9e25c6570170..1d6d243cdde9 100644
--- a/net/bluetooth/Kconfig
+++ b/net/bluetooth/Kconfig
@@ -135,4 +135,11 @@ config BT_SELFTEST_SMP
 	  Run test cases for SMP cryptographic functionality, including both
 	  legacy SMP as well as the Secure Connections features.
 
+config BT_FEATURE_DEBUG
+	bool "Enable runtime option for debugging statements"
+	depends on BT && !DYNAMIC_DEBUG
+	help
+	  This provides an option to enable/disable debugging statements
+	  at runtime via the experimental features interface.
+
 source "drivers/bluetooth/Kconfig"
diff --git a/net/bluetooth/lib.c b/net/bluetooth/lib.c
index c09e0a3a0ed9..5326f41a58b7 100644
--- a/net/bluetooth/lib.c
+++ b/net/bluetooth/lib.c
@@ -183,6 +183,39 @@ void bt_err(const char *format, ...)
 }
 EXPORT_SYMBOL(bt_err);
 
+#ifdef CONFIG_BT_FEATURE_DEBUG
+static bool debug_enable;
+
+void bt_dbg_set(bool enable)
+{
+	debug_enable = enable;
+}
+
+bool bt_dbg_get(void)
+{
+	return debug_enable;
+}
+
+void bt_dbg(const char *format, ...)
+{
+	struct va_format vaf;
+	va_list args;
+
+	if (likely(!debug_enable))
+		return;
+
+	va_start(args, format);
+
+	vaf.fmt = format;
+	vaf.va = &args;
+
+	printk(KERN_DEBUG pr_fmt("%pV"), &vaf);
+
+	va_end(args);
+}
+EXPORT_SYMBOL(bt_dbg);
+#endif
+
 void bt_warn_ratelimited(const char *format, ...)
 {
 	struct va_format vaf;
diff --git a/net/bluetooth/mgmt.c b/net/bluetooth/mgmt.c
index 3c6be70d98ef..9e8a3cccc6ca 100644
--- a/net/bluetooth/mgmt.c
+++ b/net/bluetooth/mgmt.c
@@ -3715,6 +3715,14 @@ static int read_security_info(struct sock *sk, struct hci_dev *hdev,
 				 rp, sizeof(*rp) + sec_len);
 }
 
+#ifdef CONFIG_BT_FEATURE_DEBUG
+/* d4992530-b9ec-469f-ab01-6c481c47da1c */
+static const u8 debug_uuid[16] = {
+	0x1c, 0xda, 0x47, 0x1c, 0x48, 0x6c, 0x01, 0xab,
+	0x9f, 0x46, 0xec, 0xb9, 0x30, 0x25, 0x99, 0xd4,
+};
+#endif
+
 static int read_exp_features_info(struct sock *sk, struct hci_dev *hdev,
 				  void *data, u16 data_len)
 {
@@ -3726,6 +3734,16 @@ static int read_exp_features_info(struct sock *sk, struct hci_dev *hdev,
 
 	memset(&buf, 0, sizeof(buf));
 
+#ifdef CONFIG_BT_FEATURE_DEBUG
+	if (!hdev) {
+		u32 flags = bt_dbg_get() ? BIT(0) : 0;
+
+		memcpy(rp->features[idx].uuid, debug_uuid, 16);
+		rp->features[idx].flags = cpu_to_le32(flags);
+		idx++;
+	}
+#endif
+
 	rp->feature_count = cpu_to_le16(idx);
 
 	/* After reading the experimental features information, enable
@@ -3738,6 +3756,21 @@ static int read_exp_features_info(struct sock *sk, struct hci_dev *hdev,
 				 0, rp, sizeof(*rp) + (20 * idx));
 }
 
+#ifdef CONFIG_BT_FEATURE_DEBUG
+static int exp_debug_feature_changed(bool enabled, struct sock *skip)
+{
+	struct mgmt_ev_exp_feature_changed ev;
+
+	memset(&ev, 0, sizeof(ev));
+	memcpy(ev.uuid, debug_uuid, 16);
+	ev.flags = cpu_to_le32(enabled ? BIT(0) : 0);
+
+	return mgmt_limited_event(MGMT_EV_EXP_FEATURE_CHANGED, NULL,
+				  &ev, sizeof(ev),
+				  HCI_MGMT_EXP_FEATURE_EVENTS, skip);
+}
+#endif
+
 static int set_exp_feature(struct sock *sk, struct hci_dev *hdev,
 			   void *data, u16 data_len)
 {
@@ -3750,6 +3783,17 @@ static int set_exp_feature(struct sock *sk, struct hci_dev *hdev,
 		memset(rp.uuid, 0, 16);
 		rp.flags = cpu_to_le32(0);
 
+#ifdef CONFIG_BT_FEATURE_DEBUG
+		if (!hdev) {
+			bool changed = bt_dbg_get();
+
+			bt_dbg_set(false);
+
+			if (changed)
+				exp_debug_feature_changed(false, sk);
+		}
+#endif
+
 		hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS);
 
 		return mgmt_cmd_complete(sk, hdev ? hdev->id : MGMT_INDEX_NONE,
@@ -3757,6 +3801,49 @@ static int set_exp_feature(struct sock *sk, struct hci_dev *hdev,
 					 &rp, sizeof(rp));
 	}
 
+#ifdef CONFIG_BT_FEATURE_DEBUG
+	if (!memcmp(cp->uuid, debug_uuid, 16)) {
+		bool val, changed;
+		int err;
+
+		/* Command requires to use the non-controller index */
+		if (hdev)
+			return mgmt_cmd_status(sk, hdev->id,
+					       MGMT_OP_SET_EXP_FEATURE,
+					       MGMT_STATUS_INVALID_INDEX);
+
+		/* Parameters are limited to a single octet */
+		if (data_len != MGMT_SET_EXP_FEATURE_SIZE + 1)
+			return mgmt_cmd_status(sk, MGMT_INDEX_NONE,
+					       MGMT_OP_SET_EXP_FEATURE,
+					       MGMT_STATUS_INVALID_PARAMS);
+
+		/* Only boolean on/off is supported */
+		if (cp->param[0] != 0x00 && cp->param[0] != 0x01)
+			return mgmt_cmd_status(sk, MGMT_INDEX_NONE,
+					       MGMT_OP_SET_EXP_FEATURE,
+					       MGMT_STATUS_INVALID_PARAMS);
+
+		val = !!cp->param[0];
+		changed = val ? !bt_dbg_get() : bt_dbg_get();
+		bt_dbg_set(val);
+
+		memcpy(rp.uuid, debug_uuid, 16);
+		rp.flags = cpu_to_le32(val ? BIT(0) : 0);
+
+		hci_sock_set_flag(sk, HCI_MGMT_EXP_FEATURE_EVENTS);
+
+		err = mgmt_cmd_complete(sk, MGMT_INDEX_NONE,
+					MGMT_OP_SET_EXP_FEATURE, 0,
+					&rp, sizeof(rp));
+
+		if (changed)
+			exp_debug_feature_changed(val, sk);
+
+		return err;
+	}
+#endif
+
 	return mgmt_cmd_status(sk, hdev ? hdev->id : MGMT_INDEX_NONE,
 			       MGMT_OP_SET_EXP_FEATURE,
 			       MGMT_STATUS_NOT_SUPPORTED);
-- 
cgit v1.2.3-59-g8ed1b


From 32a2be499c01ee523b28018d451b39ded4297b11 Mon Sep 17 00:00:00 2001
From: Miles Hu <milehu@codeaurora.org>
Date: Fri, 8 May 2020 05:54:57 +0300
Subject: ath11k: remove stale monitor status descriptor

The driver is not handling monitor status descriptor whenever
the done bit of status descriptor is not set by hardware. This leave
a stale entry in monitor status ring and flooding warning message.
Fix that by removing the descriptor and move forward to next one
in monitor status ring.

Co-developed-by: Rajkumar Manoharan <rmanohar@codeaurora.org>
Signed-off-by: Rajkumar Manoharan <rmanohar@codeaurora.org>
Signed-off-by: Miles Hu <milehu@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1588642063-6950-1-git-send-email-rmanohar@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/dp_rx.c | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/dp_rx.c b/drivers/net/wireless/ath/ath11k/dp_rx.c
index 85670608c3e2..a54610d75c40 100644
--- a/drivers/net/wireless/ath/ath11k/dp_rx.c
+++ b/drivers/net/wireless/ath/ath11k/dp_rx.c
@@ -2728,7 +2728,7 @@ static int ath11k_dp_rx_reap_mon_status_ring(struct ath11k_base *ab, int mac_id,
 				ath11k_warn(ab, "rx monitor status with invalid buf_id %d\n",
 					    buf_id);
 				spin_unlock_bh(&rx_ring->idr_lock);
-				continue;
+				goto move_next;
 			}
 
 			idr_remove(&rx_ring->bufs_idr, buf_id);
@@ -2747,13 +2747,16 @@ static int ath11k_dp_rx_reap_mon_status_ring(struct ath11k_base *ab, int mac_id,
 			tlv = (struct hal_tlv_hdr *)skb->data;
 			if (FIELD_GET(HAL_TLV_HDR_TAG, tlv->tl) !=
 					HAL_RX_STATUS_BUFFER_DONE) {
-				ath11k_hal_srng_src_get_next_entry(ab, srng);
-				continue;
+				ath11k_warn(ab, "mon status DONE not set %lx\n",
+					    FIELD_GET(HAL_TLV_HDR_TAG,
+						      tlv->tl));
+				dev_kfree_skb_any(skb);
+				goto move_next;
 			}
 
 			__skb_queue_tail(skb_list, skb);
 		}
-
+move_next:
 		skb = ath11k_dp_rx_alloc_mon_status_buf(ab, rx_ring,
 							&buf_id, GFP_ATOMIC);
 
-- 
cgit v1.2.3-59-g8ed1b


From 7c6d67b136ceb0aebc7a3153b300e925ed915daf Mon Sep 17 00:00:00 2001
From: Rakesh Pillai <pillair@codeaurora.org>
Date: Fri, 8 May 2020 05:55:07 +0300
Subject: ath10k: Skip handling del_server during driver exit

The qmi infrastructure sends the client a del_server
event when the client releases its qmi handle. This
is not the msg indicating the actual qmi server exiting.
In such cases the del_server msg should not be processed,
since the wifi firmware does not reset its qmi state.

Hence skip the processing of del_server event when the
driver is unloading.

Tested HW: WCN3990
Tested FW: WLAN.HL.3.1-01040-QCAHLSWMTPLZ-1

Fixes: ba94c753ccb4 ("ath10k: add QMI message handshake for wcn3990 client")
Signed-off-by: Rakesh Pillai <pillair@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1588663061-12138-1-git-send-email-pillair@codeaurora.org
---
 drivers/net/wireless/ath/ath10k/qmi.c | 13 ++++++++++++-
 drivers/net/wireless/ath/ath10k/qmi.h |  6 ++++++
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath10k/qmi.c b/drivers/net/wireless/ath/ath10k/qmi.c
index 5ae829b46c3d..5468a41e928e 100644
--- a/drivers/net/wireless/ath/ath10k/qmi.c
+++ b/drivers/net/wireless/ath/ath10k/qmi.c
@@ -961,7 +961,16 @@ static void ath10k_qmi_del_server(struct qmi_handle *qmi_hdl,
 		container_of(qmi_hdl, struct ath10k_qmi, qmi_hdl);
 
 	qmi->fw_ready = false;
-	ath10k_qmi_driver_event_post(qmi, ATH10K_QMI_EVENT_SERVER_EXIT, NULL);
+
+	/*
+	 * The del_server event is to be processed only if coming from
+	 * the qmi server. The qmi infrastructure sends del_server, when
+	 * any client releases the qmi handle. In this case do not process
+	 * this del_server event.
+	 */
+	if (qmi->state == ATH10K_QMI_STATE_INIT_DONE)
+		ath10k_qmi_driver_event_post(qmi, ATH10K_QMI_EVENT_SERVER_EXIT,
+					     NULL);
 }
 
 static struct qmi_ops ath10k_qmi_ops = {
@@ -1046,6 +1055,7 @@ int ath10k_qmi_init(struct ath10k *ar, u32 msa_size)
 	if (ret)
 		goto err_qmi_lookup;
 
+	qmi->state = ATH10K_QMI_STATE_INIT_DONE;
 	return 0;
 
 err_qmi_lookup:
@@ -1064,6 +1074,7 @@ int ath10k_qmi_deinit(struct ath10k *ar)
 	struct ath10k_snoc *ar_snoc = ath10k_snoc_priv(ar);
 	struct ath10k_qmi *qmi = ar_snoc->qmi;
 
+	qmi->state = ATH10K_QMI_STATE_DEINIT;
 	qmi_handle_release(&qmi->qmi_hdl);
 	cancel_work_sync(&qmi->event_work);
 	destroy_workqueue(qmi->event_wq);
diff --git a/drivers/net/wireless/ath/ath10k/qmi.h b/drivers/net/wireless/ath/ath10k/qmi.h
index 450be18b60ad..16190511318d 100644
--- a/drivers/net/wireless/ath/ath10k/qmi.h
+++ b/drivers/net/wireless/ath/ath10k/qmi.h
@@ -83,6 +83,11 @@ struct ath10k_qmi_driver_event {
 	void *data;
 };
 
+enum ath10k_qmi_state {
+	ATH10K_QMI_STATE_INIT_DONE,
+	ATH10K_QMI_STATE_DEINIT,
+};
+
 struct ath10k_qmi {
 	struct ath10k *ar;
 	struct qmi_handle qmi_hdl;
@@ -102,6 +107,7 @@ struct ath10k_qmi {
 	char fw_build_timestamp[MAX_TIMESTAMP_LEN + 1];
 	struct ath10k_qmi_cal_data cal_data[MAX_NUM_CAL_V01];
 	bool msa_fixed_perm;
+	enum ath10k_qmi_state state;
 };
 
 int ath10k_qmi_wlan_enable(struct ath10k *ar,
-- 
cgit v1.2.3-59-g8ed1b


From c730c477176ad4af86d9aae4d360a7ad840b073a Mon Sep 17 00:00:00 2001
From: Rakesh Pillai <pillair@codeaurora.org>
Date: Fri, 8 May 2020 05:55:18 +0300
Subject: ath10k: Remove msdu from idr when management pkt send fails

Currently when the sending of any management pkt
via wmi command fails, the packet is being unmapped
freed in the error handling. But the idr entry added,
which is used to track these packet is not getting removed.

Hence, during unload, in wmi cleanup, all the entries
in IDR are removed and the corresponding buffer is
attempted to be freed. This can cause a situation where
one packet is attempted to be freed twice.

Fix this error by rmeoving the msdu from the idr
list when the sending of a management packet over
wmi fails.

Tested HW: WCN3990
Tested FW: WLAN.HL.3.1-01040-QCAHLSWMTPLZ-1

Fixes: 1807da49733e ("ath10k: wmi: add management tx by reference support over wmi")
Signed-off-by: Rakesh Pillai <pillair@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1588667015-25490-1-git-send-email-pillair@codeaurora.org
---
 drivers/net/wireless/ath/ath10k/mac.c     |  3 +++
 drivers/net/wireless/ath/ath10k/wmi-ops.h | 10 ++++++++++
 drivers/net/wireless/ath/ath10k/wmi-tlv.c | 15 +++++++++++++++
 3 files changed, 28 insertions(+)

diff --git a/drivers/net/wireless/ath/ath10k/mac.c b/drivers/net/wireless/ath/ath10k/mac.c
index 91f5444ecedb..919d15584d4a 100644
--- a/drivers/net/wireless/ath/ath10k/mac.c
+++ b/drivers/net/wireless/ath/ath10k/mac.c
@@ -3967,6 +3967,9 @@ void ath10k_mgmt_over_wmi_tx_work(struct work_struct *work)
 			if (ret) {
 				ath10k_warn(ar, "failed to transmit management frame by ref via WMI: %d\n",
 					    ret);
+				/* remove this msdu from idr tracking */
+				ath10k_wmi_cleanup_mgmt_tx_send(ar, skb);
+
 				dma_unmap_single(ar->dev, paddr, skb->len,
 						 DMA_TO_DEVICE);
 				ieee80211_free_txskb(ar->hw, skb);
diff --git a/drivers/net/wireless/ath/ath10k/wmi-ops.h b/drivers/net/wireless/ath/ath10k/wmi-ops.h
index 6b730f59fd5b..0dd484f85082 100644
--- a/drivers/net/wireless/ath/ath10k/wmi-ops.h
+++ b/drivers/net/wireless/ath/ath10k/wmi-ops.h
@@ -140,6 +140,7 @@ struct wmi_ops {
 	struct sk_buff *(*gen_mgmt_tx_send)(struct ath10k *ar,
 					    struct sk_buff *skb,
 					    dma_addr_t paddr);
+	int (*cleanup_mgmt_tx_send)(struct ath10k *ar, struct sk_buff *msdu);
 	struct sk_buff *(*gen_dbglog_cfg)(struct ath10k *ar, u64 module_enable,
 					  u32 log_level);
 	struct sk_buff *(*gen_pktlog_enable)(struct ath10k *ar, u32 filter);
@@ -448,6 +449,15 @@ ath10k_wmi_get_txbf_conf_scheme(struct ath10k *ar)
 	return ar->wmi.ops->get_txbf_conf_scheme(ar);
 }
 
+static inline int
+ath10k_wmi_cleanup_mgmt_tx_send(struct ath10k *ar, struct sk_buff *msdu)
+{
+	if (!ar->wmi.ops->cleanup_mgmt_tx_send)
+		return -EOPNOTSUPP;
+
+	return ar->wmi.ops->cleanup_mgmt_tx_send(ar, msdu);
+}
+
 static inline int
 ath10k_wmi_mgmt_tx_send(struct ath10k *ar, struct sk_buff *msdu,
 			dma_addr_t paddr)
diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.c b/drivers/net/wireless/ath/ath10k/wmi-tlv.c
index 9187b62b331c..afa03cb3aaf5 100644
--- a/drivers/net/wireless/ath/ath10k/wmi-tlv.c
+++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.c
@@ -3015,6 +3015,18 @@ ath10k_wmi_tlv_op_gen_request_peer_stats_info(struct ath10k *ar,
 	return skb;
 }
 
+static int
+ath10k_wmi_tlv_op_cleanup_mgmt_tx_send(struct ath10k *ar,
+				       struct sk_buff *msdu)
+{
+	struct ath10k_skb_cb *cb = ATH10K_SKB_CB(msdu);
+	struct ath10k_wmi *wmi = &ar->wmi;
+
+	idr_remove(&wmi->mgmt_pending_tx, cb->msdu_id);
+
+	return 0;
+}
+
 static int
 ath10k_wmi_mgmt_tx_alloc_msdu_id(struct ath10k *ar, struct sk_buff *skb,
 				 dma_addr_t paddr)
@@ -3089,6 +3101,8 @@ ath10k_wmi_tlv_op_gen_mgmt_tx_send(struct ath10k *ar, struct sk_buff *msdu,
 	if (desc_id < 0)
 		goto err_free_skb;
 
+	cb->msdu_id = desc_id;
+
 	ptr = (void *)skb->data;
 	tlv = ptr;
 	tlv->tag = __cpu_to_le16(WMI_TLV_TAG_STRUCT_MGMT_TX_CMD);
@@ -4540,6 +4554,7 @@ static const struct wmi_ops wmi_tlv_ops = {
 	.gen_force_fw_hang = ath10k_wmi_tlv_op_gen_force_fw_hang,
 	/* .gen_mgmt_tx = not implemented; HTT is used */
 	.gen_mgmt_tx_send = ath10k_wmi_tlv_op_gen_mgmt_tx_send,
+	.cleanup_mgmt_tx_send = ath10k_wmi_tlv_op_cleanup_mgmt_tx_send,
 	.gen_dbglog_cfg = ath10k_wmi_tlv_op_gen_dbglog_cfg,
 	.gen_pktlog_enable = ath10k_wmi_tlv_op_gen_pktlog_enable,
 	.gen_pktlog_disable = ath10k_wmi_tlv_op_gen_pktlog_disable,
-- 
cgit v1.2.3-59-g8ed1b


From 04a4d3416372ae19471c98ea964a4740d289beac Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 8 May 2020 05:55:30 +0300
Subject: wil6210: avoid gcc-10 zero-length-bounds warning

gcc-10 warns about accesses inside of a zero-length array:

drivers/net/wireless/ath/wil6210/cfg80211.c: In function 'wil_cfg80211_scan':
drivers/net/wireless/ath/wil6210/cfg80211.c:970:23: error: array subscript 255 is outside the bounds of an interior zero-length array 'struct <anonymous>[0]' [-Werror=zero-length-bounds]
  970 |   cmd.cmd.channel_list[cmd.cmd.num_channels++].channel = ch - 1;
      |   ~~~~~~~~~~~~~~~~~~~~^~~~~~~~~~~~~~~~~~~~~~~~
In file included from drivers/net/wireless/ath/wil6210/wil6210.h:17,
                 from drivers/net/wireless/ath/wil6210/cfg80211.c:11:
drivers/net/wireless/ath/wil6210/wmi.h:477:4: note: while referencing 'channel_list'
  477 |  } channel_list[0];
      |    ^~~~~~~~~~~~

Turn this into a flexible array to avoid the warning.

Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200505143332.1398524-1-arnd@arndb.de
---
 drivers/net/wireless/ath/wil6210/wmi.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/wil6210/wmi.h b/drivers/net/wireless/ath/wil6210/wmi.h
index e3558136e0c4..5bba45c1de48 100644
--- a/drivers/net/wireless/ath/wil6210/wmi.h
+++ b/drivers/net/wireless/ath/wil6210/wmi.h
@@ -474,7 +474,7 @@ struct wmi_start_scan_cmd {
 	struct {
 		u8 channel;
 		u8 reserved;
-	} channel_list[0];
+	} channel_list[];
 } __packed;
 
 #define WMI_MAX_PNO_SSID_NUM	(16)
-- 
cgit v1.2.3-59-g8ed1b


From e47210f72a681b5bd5189831f286053f7a04acb5 Mon Sep 17 00:00:00 2001
From: Wen Gong <wgong@codeaurora.org>
Date: Fri, 8 May 2020 05:55:38 +0300
Subject: ath10k: fix __le32 warning in
 ath10k_wmi_tlv_op_gen_request_peer_stats_info()

Sparse warned:

drivers/net/wireless/ath/ath10k/wmi-tlv.c:3013:34: warning: incorrect
type in assignment (different base types)
drivers/net/wireless/ath/ath10k/wmi-tlv.c:3013:34:    expected
restricted __le32 [usertype] reset_after_request
drivers/net/wireless/ath/ath10k/wmi-tlv.c:3013:34:    got unsigned int
[usertype] reset

Tested with QCA6174 SDIO with firmware WLAN.RMH.4.4.1-00042.

Fixes: 0f7cb26830a6 ("ath10k: add rx bitrate report for SDIO")
Signed-off-by: Wen Gong <wgong@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1588747649-18051-1-git-send-email-kvalo@codeaurora.org
---
 drivers/net/wireless/ath/ath10k/wmi-tlv.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.c b/drivers/net/wireless/ath/ath10k/wmi-tlv.c
index afa03cb3aaf5..932266d1111b 100644
--- a/drivers/net/wireless/ath/ath10k/wmi-tlv.c
+++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.c
@@ -3010,7 +3010,7 @@ ath10k_wmi_tlv_op_gen_request_peer_stats_info(struct ath10k *ar,
 	if (type == WMI_REQUEST_ONE_PEER_STATS_INFO)
 		ether_addr_copy(cmd->peer_macaddr.addr, addr);
 
-	cmd->reset_after_request = reset;
+	cmd->reset_after_request = __cpu_to_le32(reset);
 	ath10k_dbg(ar, ATH10K_DBG_WMI, "wmi tlv request peer stats info\n");
 	return skb;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 31858805f91ac79f0f0d9d982e90c68d6d3ae164 Mon Sep 17 00:00:00 2001
From: Govind Singh <govinds@codeaurora.org>
Date: Fri, 8 May 2020 05:55:44 +0300
Subject: ath11k: Add support for multibus support

Current design supports only AHB interface for
11ax chipset. Refactor the code by adding hif layer
for bus level abstraction to support  PCI based device.

Signed-off-by: Govind Singh <govinds@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200506094400.4740-2-govinds@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/ahb.c    | 49 ++++++++++++++----
 drivers/net/wireless/ath/ath11k/ahb.h    | 22 --------
 drivers/net/wireless/ath/ath11k/core.c   | 41 ++++-----------
 drivers/net/wireless/ath/ath11k/core.h   |  4 +-
 drivers/net/wireless/ath/ath11k/dp.c     |  1 +
 drivers/net/wireless/ath/ath11k/hal.c    | 86 ++++++++++++++++----------------
 drivers/net/wireless/ath/ath11k/hal_rx.c | 21 ++++----
 drivers/net/wireless/ath/ath11k/hal_tx.c | 11 ++--
 drivers/net/wireless/ath/ath11k/hif.h    | 65 ++++++++++++++++++++++++
 drivers/net/wireless/ath/ath11k/htc.c    |  3 +-
 10 files changed, 182 insertions(+), 121 deletions(-)
 create mode 100644 drivers/net/wireless/ath/ath11k/hif.h

diff --git a/drivers/net/wireless/ath/ath11k/ahb.c b/drivers/net/wireless/ath/ath11k/ahb.c
index 3b2b76d602f2..eaba5a189b7f 100644
--- a/drivers/net/wireless/ath/ath11k/ahb.c
+++ b/drivers/net/wireless/ath/ath11k/ahb.c
@@ -10,6 +10,7 @@
 #include <linux/dma-mapping.h>
 #include "ahb.h"
 #include "debug.h"
+#include "hif.h"
 #include <linux/remoteproc.h>
 
 static const struct of_device_id ath11k_ahb_of_match[] = {
@@ -434,6 +435,16 @@ enum ext_irq_num {
 	tcl2host_status_ring,
 };
 
+static inline u32 ath11k_ahb_read32(struct ath11k_base *ab, u32 offset)
+{
+	return ioread32(ab->mem + offset);
+}
+
+static inline void ath11k_ahb_write32(struct ath11k_base *ab, u32 offset, u32 value)
+{
+	iowrite32(value, ab->mem + offset);
+}
+
 static void ath11k_ahb_kill_tasklets(struct ath11k_base *ab)
 {
 	int i;
@@ -575,7 +586,7 @@ static void ath11k_ahb_ce_irqs_disable(struct ath11k_base *ab)
 	}
 }
 
-int ath11k_ahb_start(struct ath11k_base *ab)
+static int ath11k_ahb_start(struct ath11k_base *ab)
 {
 	ath11k_ahb_ce_irqs_enable(ab);
 	ath11k_ce_rx_post_buf(ab);
@@ -583,7 +594,7 @@ int ath11k_ahb_start(struct ath11k_base *ab)
 	return 0;
 }
 
-void ath11k_ahb_ext_irq_enable(struct ath11k_base *ab)
+static void ath11k_ahb_ext_irq_enable(struct ath11k_base *ab)
 {
 	int i;
 
@@ -595,13 +606,13 @@ void ath11k_ahb_ext_irq_enable(struct ath11k_base *ab)
 	}
 }
 
-void ath11k_ahb_ext_irq_disable(struct ath11k_base *ab)
+static void ath11k_ahb_ext_irq_disable(struct ath11k_base *ab)
 {
 	__ath11k_ahb_ext_irq_disable(ab);
 	ath11k_ahb_sync_ext_irqs(ab);
 }
 
-void ath11k_ahb_stop(struct ath11k_base *ab)
+static void ath11k_ahb_stop(struct ath11k_base *ab)
 {
 	if (!test_bit(ATH11K_FLAG_CRASH_FLUSH, &ab->dev_flags))
 		ath11k_ahb_ce_irqs_disable(ab);
@@ -611,7 +622,7 @@ void ath11k_ahb_stop(struct ath11k_base *ab)
 	ath11k_ce_cleanup_pipes(ab);
 }
 
-int ath11k_ahb_power_up(struct ath11k_base *ab)
+static int ath11k_ahb_power_up(struct ath11k_base *ab)
 {
 	int ret;
 
@@ -622,7 +633,7 @@ int ath11k_ahb_power_up(struct ath11k_base *ab)
 	return ret;
 }
 
-void ath11k_ahb_power_down(struct ath11k_base *ab)
+static void ath11k_ahb_power_down(struct ath11k_base *ab)
 {
 	rproc_shutdown(ab->tgt_rproc);
 }
@@ -834,8 +845,8 @@ static int ath11k_ahb_config_irq(struct ath11k_base *ab)
 	return ret;
 }
 
-int ath11k_ahb_map_service_to_pipe(struct ath11k_base *ab, u16 service_id,
-				   u8 *ul_pipe, u8 *dl_pipe)
+static int ath11k_ahb_map_service_to_pipe(struct ath11k_base *ab, u16 service_id,
+					  u8 *ul_pipe, u8 *dl_pipe)
 {
 	const struct service_to_pipe *entry;
 	bool ul_set = false, dl_set = false;
@@ -877,6 +888,18 @@ int ath11k_ahb_map_service_to_pipe(struct ath11k_base *ab, u16 service_id,
 	return 0;
 }
 
+static const struct ath11k_hif_ops ath11k_ahb_hif_ops = {
+	.start = ath11k_ahb_start,
+	.stop = ath11k_ahb_stop,
+	.read32 = ath11k_ahb_read32,
+	.write32 = ath11k_ahb_write32,
+	.irq_enable = ath11k_ahb_ext_irq_enable,
+	.irq_disable = ath11k_ahb_ext_irq_disable,
+	.map_service_to_pipe = ath11k_ahb_map_service_to_pipe,
+	.power_down = ath11k_ahb_power_down,
+	.power_up = ath11k_ahb_power_up,
+};
+
 static int ath11k_ahb_probe(struct platform_device *pdev)
 {
 	struct ath11k_base *ab;
@@ -915,6 +938,7 @@ static int ath11k_ahb_probe(struct platform_device *pdev)
 		return -ENOMEM;
 	}
 
+	ab->hif.ops = &ath11k_ahb_hif_ops;
 	ab->pdev = pdev;
 	ab->hw_rev = (enum ath11k_hw_rev)of_id->data;
 	ab->mem = mem;
@@ -993,12 +1017,17 @@ static struct platform_driver ath11k_ahb_driver = {
 	.remove = ath11k_ahb_remove,
 };
 
-int ath11k_ahb_init(void)
+static int ath11k_ahb_init(void)
 {
 	return platform_driver_register(&ath11k_ahb_driver);
 }
+module_init(ath11k_ahb_init);
 
-void ath11k_ahb_exit(void)
+static void ath11k_ahb_exit(void)
 {
 	platform_driver_unregister(&ath11k_ahb_driver);
 }
+module_exit(ath11k_ahb_exit);
+
+MODULE_DESCRIPTION("Driver support for Qualcomm Technologies 802.11ax wireless chip");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/ath/ath11k/ahb.h b/drivers/net/wireless/ath/ath11k/ahb.h
index 93f46dfe22df..6c7b26ac6545 100644
--- a/drivers/net/wireless/ath/ath11k/ahb.h
+++ b/drivers/net/wireless/ath/ath11k/ahb.h
@@ -10,26 +10,4 @@
 #define ATH11K_AHB_RECOVERY_TIMEOUT (3 * HZ)
 struct ath11k_base;
 
-static inline u32 ath11k_ahb_read32(struct ath11k_base *ab, u32 offset)
-{
-	return ioread32(ab->mem + offset);
-}
-
-static inline void ath11k_ahb_write32(struct ath11k_base *ab, u32 offset, u32 value)
-{
-	iowrite32(value, ab->mem + offset);
-}
-
-void ath11k_ahb_ext_irq_enable(struct ath11k_base *ab);
-void ath11k_ahb_ext_irq_disable(struct ath11k_base *ab);
-int ath11k_ahb_start(struct ath11k_base *ab);
-void ath11k_ahb_stop(struct ath11k_base *ab);
-int ath11k_ahb_power_up(struct ath11k_base *ab);
-void ath11k_ahb_power_down(struct ath11k_base *ab);
-int ath11k_ahb_map_service_to_pipe(struct ath11k_base *ab, u16 service_id,
-				   u8 *ul_pipe, u8 *dl_pipe);
-
-int ath11k_ahb_init(void);
-void ath11k_ahb_exit(void);
-
 #endif
diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c
index bf5657d2ae18..985ea3bcadb9 100644
--- a/drivers/net/wireless/ath/ath11k/core.c
+++ b/drivers/net/wireless/ath/ath11k/core.c
@@ -12,6 +12,7 @@
 #include "dp_tx.h"
 #include "dp_rx.h"
 #include "debug.h"
+#include "hif.h"
 
 unsigned int ath11k_debug_mask;
 module_param_named(debug_mask, ath11k_debug_mask, uint, 0644);
@@ -41,6 +42,7 @@ u8 ath11k_core_get_hw_mac_id(struct ath11k_base *ab, int pdev_idx)
 		return ATH11K_INVALID_HW_MAC_ID;
 	}
 }
+EXPORT_SYMBOL(ath11k_core_get_hw_mac_id);
 
 static int ath11k_core_create_board_name(struct ath11k_base *ab, char *name,
 					 size_t name_len)
@@ -324,7 +326,7 @@ static void ath11k_core_stop(struct ath11k_base *ab)
 {
 	if (!test_bit(ATH11K_FLAG_CRASH_FLUSH, &ab->dev_flags))
 		ath11k_qmi_firmware_stop(ab);
-	ath11k_ahb_stop(ab);
+	ath11k_hif_stop(ab);
 	ath11k_wmi_detach(ab);
 	ath11k_dp_pdev_reo_cleanup(ab);
 
@@ -347,7 +349,7 @@ static int ath11k_core_soc_create(struct ath11k_base *ab)
 		goto err_qmi_deinit;
 	}
 
-	ret = ath11k_ahb_power_up(ab);
+	ret = ath11k_hif_power_up(ab);
 	if (ret) {
 		ath11k_err(ab, "failed to power up :%d\n", ret);
 		goto err_debugfs_reg;
@@ -415,7 +417,7 @@ static void ath11k_core_pdev_destroy(struct ath11k_base *ab)
 {
 	ath11k_thermal_unregister(ab);
 	ath11k_mac_unregister(ab);
-	ath11k_ahb_ext_irq_disable(ab);
+	ath11k_hif_irq_disable(ab);
 	ath11k_dp_pdev_free(ab);
 	ath11k_debug_pdev_destroy(ab);
 }
@@ -443,7 +445,7 @@ static int ath11k_core_start(struct ath11k_base *ab,
 		goto err_wmi_detach;
 	}
 
-	ret = ath11k_ahb_start(ab);
+	ret = ath11k_hif_start(ab);
 	if (ret) {
 		ath11k_err(ab, "failed to start HIF: %d\n", ret);
 		goto err_wmi_detach;
@@ -522,7 +524,7 @@ err_reo_cleanup:
 err_mac_destroy:
 	ath11k_mac_destroy(ab);
 err_hif_stop:
-	ath11k_ahb_stop(ab);
+	ath11k_hif_stop(ab);
 err_wmi_detach:
 	ath11k_wmi_detach(ab);
 err_firmware_stop:
@@ -559,7 +561,7 @@ int ath11k_core_qmi_firmware_ready(struct ath11k_base *ab)
 		ath11k_err(ab, "failed to create pdev core: %d\n", ret);
 		goto err_core_stop;
 	}
-	ath11k_ahb_ext_irq_enable(ab);
+	ath11k_hif_irq_enable(ab);
 	mutex_unlock(&ab->core_lock);
 
 	return 0;
@@ -579,9 +581,9 @@ static int ath11k_core_reconfigure_on_crash(struct ath11k_base *ab)
 
 	mutex_lock(&ab->core_lock);
 	ath11k_thermal_unregister(ab);
-	ath11k_ahb_ext_irq_disable(ab);
+	ath11k_hif_irq_disable(ab);
 	ath11k_dp_pdev_free(ab);
-	ath11k_ahb_stop(ab);
+	ath11k_hif_stop(ab);
 	ath11k_wmi_detach(ab);
 	ath11k_dp_pdev_reo_cleanup(ab);
 	mutex_unlock(&ab->core_lock);
@@ -744,7 +746,7 @@ void ath11k_core_deinit(struct ath11k_base *ab)
 
 	mutex_unlock(&ab->core_lock);
 
-	ath11k_ahb_power_down(ab);
+	ath11k_hif_power_down(ab);
 	ath11k_mac_destroy(ab);
 	ath11k_core_soc_destroy(ab);
 }
@@ -784,24 +786,3 @@ err_sc_free:
 	kfree(ab);
 	return NULL;
 }
-
-static int __init ath11k_init(void)
-{
-	int ret;
-
-	ret = ath11k_ahb_init();
-	if (ret)
-		printk(KERN_ERR "failed to register ath11k ahb driver: %d\n",
-		       ret);
-	return ret;
-}
-module_init(ath11k_init);
-
-static void __exit ath11k_exit(void)
-{
-	ath11k_ahb_exit();
-}
-module_exit(ath11k_exit);
-
-MODULE_DESCRIPTION("Driver support for Qualcomm Technologies 802.11ax wireless chip");
-MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/ath/ath11k/core.h b/drivers/net/wireless/ath/ath11k/core.h
index 70ec544eee67..e86513f5b757 100644
--- a/drivers/net/wireless/ath/ath11k/core.h
+++ b/drivers/net/wireless/ath/ath11k/core.h
@@ -607,7 +607,9 @@ struct ath11k_base {
 	void __iomem *mem;
 	unsigned long mem_len;
 
-	const struct ath11k_hif_ops *hif_ops;
+	struct {
+		const struct ath11k_hif_ops *ops;
+	} hif;
 
 	struct ath11k_ce ce;
 	struct timer_list rx_replenish_retry;
diff --git a/drivers/net/wireless/ath/ath11k/dp.c b/drivers/net/wireless/ath/ath11k/dp.c
index 145015d2f49c..9ae743e528af 100644
--- a/drivers/net/wireless/ath/ath11k/dp.c
+++ b/drivers/net/wireless/ath/ath11k/dp.c
@@ -701,6 +701,7 @@ int ath11k_dp_service_srng(struct ath11k_base *ab,
 done:
 	return tot_work_done;
 }
+EXPORT_SYMBOL(ath11k_dp_service_srng);
 
 void ath11k_dp_pdev_free(struct ath11k_base *ab)
 {
diff --git a/drivers/net/wireless/ath/ath11k/hal.c b/drivers/net/wireless/ath/ath11k/hal.c
index 9e40c4bdd674..6d937674215e 100644
--- a/drivers/net/wireless/ath/ath11k/hal.c
+++ b/drivers/net/wireless/ath/ath11k/hal.c
@@ -7,6 +7,7 @@
 #include "hal_tx.h"
 #include "debug.h"
 #include "hal_desc.h"
+#include "hif.h"
 
 static const struct hal_srng_config hw_srng_config[] = {
 	/* TODO: max_rings can populated by querying HW capabilities */
@@ -351,11 +352,12 @@ static void ath11k_hal_ce_dst_setup(struct ath11k_base *ab,
 	addr = HAL_CE_DST_RING_CTRL +
 	       srng_config->reg_start[HAL_SRNG_REG_GRP_R0] +
 	       ring_num * srng_config->reg_size[HAL_SRNG_REG_GRP_R0];
-	val = ath11k_ahb_read32(ab, addr);
+
+	val = ath11k_hif_read32(ab, addr);
 	val &= ~HAL_CE_DST_R0_DEST_CTRL_MAX_LEN;
 	val |= FIELD_PREP(HAL_CE_DST_R0_DEST_CTRL_MAX_LEN,
 			  srng->u.dst_ring.max_buffer_length);
-	ath11k_ahb_write32(ab, addr, val);
+	ath11k_hif_write32(ab, addr, val);
 }
 
 static void ath11k_hal_srng_dst_hw_init(struct ath11k_base *ab,
@@ -369,34 +371,34 @@ static void ath11k_hal_srng_dst_hw_init(struct ath11k_base *ab,
 	reg_base = srng->hwreg_base[HAL_SRNG_REG_GRP_R0];
 
 	if (srng->flags & HAL_SRNG_FLAGS_MSI_INTR) {
-		ath11k_ahb_write32(ab, reg_base +
-				       HAL_REO1_RING_MSI1_BASE_LSB_OFFSET,
+		ath11k_hif_write32(ab, reg_base +
+				   HAL_REO1_RING_MSI1_BASE_LSB_OFFSET,
 				   (u32)srng->msi_addr);
 
 		val = FIELD_PREP(HAL_REO1_RING_MSI1_BASE_MSB_ADDR,
 				 ((u64)srng->msi_addr >>
 				  HAL_ADDR_MSB_REG_SHIFT)) |
 		      HAL_REO1_RING_MSI1_BASE_MSB_MSI1_ENABLE;
-		ath11k_ahb_write32(ab, reg_base +
+		ath11k_hif_write32(ab, reg_base +
 				       HAL_REO1_RING_MSI1_BASE_MSB_OFFSET, val);
 
-		ath11k_ahb_write32(ab,
+		ath11k_hif_write32(ab,
 				   reg_base + HAL_REO1_RING_MSI1_DATA_OFFSET,
 				   srng->msi_data);
 	}
 
-	ath11k_ahb_write32(ab, reg_base, (u32)srng->ring_base_paddr);
+	ath11k_hif_write32(ab, reg_base, (u32)srng->ring_base_paddr);
 
 	val = FIELD_PREP(HAL_REO1_RING_BASE_MSB_RING_BASE_ADDR_MSB,
 			 ((u64)srng->ring_base_paddr >>
 			  HAL_ADDR_MSB_REG_SHIFT)) |
 	      FIELD_PREP(HAL_REO1_RING_BASE_MSB_RING_SIZE,
 			 (srng->entry_size * srng->num_entries));
-	ath11k_ahb_write32(ab, reg_base + HAL_REO1_RING_BASE_MSB_OFFSET, val);
+	ath11k_hif_write32(ab, reg_base + HAL_REO1_RING_BASE_MSB_OFFSET, val);
 
 	val = FIELD_PREP(HAL_REO1_RING_ID_RING_ID, srng->ring_id) |
 	      FIELD_PREP(HAL_REO1_RING_ID_ENTRY_SIZE, srng->entry_size);
-	ath11k_ahb_write32(ab, reg_base + HAL_REO1_RING_ID_OFFSET, val);
+	ath11k_hif_write32(ab, reg_base + HAL_REO1_RING_ID_OFFSET, val);
 
 	/* interrupt setup */
 	val = FIELD_PREP(HAL_REO1_RING_PRDR_INT_SETUP_INTR_TMR_THOLD,
@@ -406,22 +408,22 @@ static void ath11k_hal_srng_dst_hw_init(struct ath11k_base *ab,
 			  (srng->intr_batch_cntr_thres_entries *
 			   srng->entry_size));
 
-	ath11k_ahb_write32(ab,
+	ath11k_hif_write32(ab,
 			   reg_base + HAL_REO1_RING_PRODUCER_INT_SETUP_OFFSET,
 			   val);
 
 	hp_addr = hal->rdp.paddr +
 		  ((unsigned long)srng->u.dst_ring.hp_addr -
 		   (unsigned long)hal->rdp.vaddr);
-	ath11k_ahb_write32(ab, reg_base + HAL_REO1_RING_HP_ADDR_LSB_OFFSET,
+	ath11k_hif_write32(ab, reg_base + HAL_REO1_RING_HP_ADDR_LSB_OFFSET,
 			   hp_addr & HAL_ADDR_LSB_REG_MASK);
-	ath11k_ahb_write32(ab, reg_base + HAL_REO1_RING_HP_ADDR_MSB_OFFSET,
+	ath11k_hif_write32(ab, reg_base + HAL_REO1_RING_HP_ADDR_MSB_OFFSET,
 			   hp_addr >> HAL_ADDR_MSB_REG_SHIFT);
 
 	/* Initialize head and tail pointers to indicate ring is empty */
 	reg_base = srng->hwreg_base[HAL_SRNG_REG_GRP_R2];
-	ath11k_ahb_write32(ab, reg_base, 0);
-	ath11k_ahb_write32(ab, reg_base + HAL_REO1_RING_TP_OFFSET, 0);
+	ath11k_hif_write32(ab, reg_base, 0);
+	ath11k_hif_write32(ab, reg_base + HAL_REO1_RING_TP_OFFSET, 0);
 	*srng->u.dst_ring.hp_addr = 0;
 
 	reg_base = srng->hwreg_base[HAL_SRNG_REG_GRP_R0];
@@ -434,7 +436,7 @@ static void ath11k_hal_srng_dst_hw_init(struct ath11k_base *ab,
 		val |= HAL_REO1_RING_MISC_MSI_SWAP;
 	val |= HAL_REO1_RING_MISC_SRNG_ENABLE;
 
-	ath11k_ahb_write32(ab, reg_base + HAL_REO1_RING_MISC_OFFSET, val);
+	ath11k_hif_write32(ab, reg_base + HAL_REO1_RING_MISC_OFFSET, val);
 }
 
 static void ath11k_hal_srng_src_hw_init(struct ath11k_base *ab,
@@ -448,34 +450,34 @@ static void ath11k_hal_srng_src_hw_init(struct ath11k_base *ab,
 	reg_base = srng->hwreg_base[HAL_SRNG_REG_GRP_R0];
 
 	if (srng->flags & HAL_SRNG_FLAGS_MSI_INTR) {
-		ath11k_ahb_write32(ab, reg_base +
-				       HAL_TCL1_RING_MSI1_BASE_LSB_OFFSET,
+		ath11k_hif_write32(ab, reg_base +
+				    HAL_TCL1_RING_MSI1_BASE_LSB_OFFSET,
 				   (u32)srng->msi_addr);
 
 		val = FIELD_PREP(HAL_TCL1_RING_MSI1_BASE_MSB_ADDR,
 				 ((u64)srng->msi_addr >>
 				  HAL_ADDR_MSB_REG_SHIFT)) |
 		      HAL_TCL1_RING_MSI1_BASE_MSB_MSI1_ENABLE;
-		ath11k_ahb_write32(ab, reg_base +
+		ath11k_hif_write32(ab, reg_base +
 				       HAL_TCL1_RING_MSI1_BASE_MSB_OFFSET,
 				   val);
 
-		ath11k_ahb_write32(ab, reg_base +
+		ath11k_hif_write32(ab, reg_base +
 				       HAL_TCL1_RING_MSI1_DATA_OFFSET,
 				   srng->msi_data);
 	}
 
-	ath11k_ahb_write32(ab, reg_base, (u32)srng->ring_base_paddr);
+	ath11k_hif_write32(ab, reg_base, (u32)srng->ring_base_paddr);
 
 	val = FIELD_PREP(HAL_TCL1_RING_BASE_MSB_RING_BASE_ADDR_MSB,
 			 ((u64)srng->ring_base_paddr >>
 			  HAL_ADDR_MSB_REG_SHIFT)) |
 	      FIELD_PREP(HAL_TCL1_RING_BASE_MSB_RING_SIZE,
 			 (srng->entry_size * srng->num_entries));
-	ath11k_ahb_write32(ab, reg_base + HAL_TCL1_RING_BASE_MSB_OFFSET, val);
+	ath11k_hif_write32(ab, reg_base + HAL_TCL1_RING_BASE_MSB_OFFSET, val);
 
 	val = FIELD_PREP(HAL_REO1_RING_ID_ENTRY_SIZE, srng->entry_size);
-	ath11k_ahb_write32(ab, reg_base + HAL_TCL1_RING_ID_OFFSET, val);
+	ath11k_hif_write32(ab, reg_base + HAL_TCL1_RING_ID_OFFSET, val);
 
 	/* interrupt setup */
 	/* NOTE: IPQ8074 v2 requires the interrupt timer threshold in the
@@ -488,7 +490,7 @@ static void ath11k_hal_srng_src_hw_init(struct ath11k_base *ab,
 			  (srng->intr_batch_cntr_thres_entries *
 			   srng->entry_size));
 
-	ath11k_ahb_write32(ab,
+	ath11k_hif_write32(ab,
 			   reg_base + HAL_TCL1_RING_CONSR_INT_SETUP_IX0_OFFSET,
 			   val);
 
@@ -497,7 +499,7 @@ static void ath11k_hal_srng_src_hw_init(struct ath11k_base *ab,
 		val |= FIELD_PREP(HAL_TCL1_RING_CONSR_INT_SETUP_IX1_LOW_THOLD,
 				  srng->u.src_ring.low_threshold);
 	}
-	ath11k_ahb_write32(ab,
+	ath11k_hif_write32(ab,
 			   reg_base + HAL_TCL1_RING_CONSR_INT_SETUP_IX1_OFFSET,
 			   val);
 
@@ -505,18 +507,18 @@ static void ath11k_hal_srng_src_hw_init(struct ath11k_base *ab,
 		tp_addr = hal->rdp.paddr +
 			  ((unsigned long)srng->u.src_ring.tp_addr -
 			   (unsigned long)hal->rdp.vaddr);
-		ath11k_ahb_write32(ab,
+		ath11k_hif_write32(ab,
 				   reg_base + HAL_TCL1_RING_TP_ADDR_LSB_OFFSET,
 				   tp_addr & HAL_ADDR_LSB_REG_MASK);
-		ath11k_ahb_write32(ab,
+		ath11k_hif_write32(ab,
 				   reg_base + HAL_TCL1_RING_TP_ADDR_MSB_OFFSET,
 				   tp_addr >> HAL_ADDR_MSB_REG_SHIFT);
 	}
 
 	/* Initialize head and tail pointers to indicate ring is empty */
 	reg_base = srng->hwreg_base[HAL_SRNG_REG_GRP_R2];
-	ath11k_ahb_write32(ab, reg_base, 0);
-	ath11k_ahb_write32(ab, reg_base + HAL_TCL1_RING_TP_OFFSET, 0);
+	ath11k_hif_write32(ab, reg_base, 0);
+	ath11k_hif_write32(ab, reg_base + HAL_TCL1_RING_TP_OFFSET, 0);
 	*srng->u.src_ring.tp_addr = 0;
 
 	reg_base = srng->hwreg_base[HAL_SRNG_REG_GRP_R0];
@@ -533,7 +535,7 @@ static void ath11k_hal_srng_src_hw_init(struct ath11k_base *ab,
 
 	val |= HAL_TCL1_RING_MISC_SRNG_ENABLE;
 
-	ath11k_ahb_write32(ab, reg_base + HAL_TCL1_RING_MISC_OFFSET, val);
+	ath11k_hif_write32(ab, reg_base + HAL_TCL1_RING_MISC_OFFSET, val);
 }
 
 static void ath11k_hal_srng_hw_init(struct ath11k_base *ab,
@@ -889,13 +891,13 @@ void ath11k_hal_srng_access_end(struct ath11k_base *ab, struct hal_srng *srng)
 		if (srng->ring_dir == HAL_SRNG_DIR_SRC) {
 			srng->u.src_ring.last_tp =
 				*(volatile u32 *)srng->u.src_ring.tp_addr;
-			ath11k_ahb_write32(ab,
+			ath11k_hif_write32(ab,
 					   (unsigned long)srng->u.src_ring.hp_addr -
 					   (unsigned long)ab->mem,
 					   srng->u.src_ring.hp);
 		} else {
 			srng->u.dst_ring.last_hp = *srng->u.dst_ring.hp_addr;
-			ath11k_ahb_write32(ab,
+			ath11k_hif_write32(ab,
 					   (unsigned long)srng->u.dst_ring.tp_addr -
 					   (unsigned long)ab->mem,
 					   srng->u.dst_ring.tp);
@@ -929,20 +931,20 @@ void ath11k_hal_setup_link_idle_list(struct ath11k_base *ab,
 			     HAL_WBM_IDLE_SCATTER_BUF_SIZE;
 	}
 
-	ath11k_ahb_write32(ab,
+	ath11k_hif_write32(ab,
 			   HAL_SEQ_WCSS_UMAC_WBM_REG + HAL_WBM_R0_IDLE_LIST_CONTROL_ADDR,
 			   FIELD_PREP(HAL_WBM_SCATTER_BUFFER_SIZE, reg_scatter_buf_sz) |
 			   FIELD_PREP(HAL_WBM_LINK_DESC_IDLE_LIST_MODE, 0x1));
-	ath11k_ahb_write32(ab,
+	ath11k_hif_write32(ab,
 			   HAL_SEQ_WCSS_UMAC_WBM_REG + HAL_WBM_R0_IDLE_LIST_SIZE_ADDR,
 			   FIELD_PREP(HAL_WBM_SCATTER_RING_SIZE_OF_IDLE_LINK_DESC_LIST,
 				      reg_scatter_buf_sz * nsbufs));
-	ath11k_ahb_write32(ab,
+	ath11k_hif_write32(ab,
 			   HAL_SEQ_WCSS_UMAC_WBM_REG +
 			   HAL_WBM_SCATTERED_RING_BASE_LSB,
 			   FIELD_PREP(BUFFER_ADDR_INFO0_ADDR,
 				      sbuf[0].paddr & HAL_ADDR_LSB_REG_MASK));
-	ath11k_ahb_write32(ab,
+	ath11k_hif_write32(ab,
 			   HAL_SEQ_WCSS_UMAC_WBM_REG +
 			   HAL_WBM_SCATTERED_RING_BASE_MSB,
 			   FIELD_PREP(
@@ -953,12 +955,12 @@ void ath11k_hal_setup_link_idle_list(struct ath11k_base *ab,
 				BASE_ADDR_MATCH_TAG_VAL));
 
 	/* Setup head and tail pointers for the idle list */
-	ath11k_ahb_write32(ab,
+	ath11k_hif_write32(ab,
 			   HAL_SEQ_WCSS_UMAC_WBM_REG +
 			   HAL_WBM_SCATTERED_DESC_PTR_HEAD_INFO_IX0,
 			   FIELD_PREP(BUFFER_ADDR_INFO0_ADDR,
 				      sbuf[nsbufs - 1].paddr));
-	ath11k_ahb_write32(ab,
+	ath11k_hif_write32(ab,
 			   HAL_SEQ_WCSS_UMAC_WBM_REG +
 			   HAL_WBM_SCATTERED_DESC_PTR_HEAD_INFO_IX1,
 			   FIELD_PREP(
@@ -967,18 +969,18 @@ void ath11k_hal_setup_link_idle_list(struct ath11k_base *ab,
 				 HAL_ADDR_MSB_REG_SHIFT)) |
 			   FIELD_PREP(HAL_WBM_SCATTERED_DESC_HEAD_P_OFFSET_IX1,
 				      (end_offset >> 2)));
-	ath11k_ahb_write32(ab,
+	ath11k_hif_write32(ab,
 			   HAL_SEQ_WCSS_UMAC_WBM_REG +
 			   HAL_WBM_SCATTERED_DESC_PTR_HEAD_INFO_IX0,
 			   FIELD_PREP(BUFFER_ADDR_INFO0_ADDR,
 				      sbuf[0].paddr));
 
-	ath11k_ahb_write32(ab,
+	ath11k_hif_write32(ab,
 			   HAL_SEQ_WCSS_UMAC_WBM_REG +
 			   HAL_WBM_SCATTERED_DESC_PTR_TAIL_INFO_IX0,
 			   FIELD_PREP(BUFFER_ADDR_INFO0_ADDR,
 				      sbuf[0].paddr));
-	ath11k_ahb_write32(ab,
+	ath11k_hif_write32(ab,
 			   HAL_SEQ_WCSS_UMAC_WBM_REG +
 			   HAL_WBM_SCATTERED_DESC_PTR_TAIL_INFO_IX1,
 			   FIELD_PREP(
@@ -986,13 +988,13 @@ void ath11k_hal_setup_link_idle_list(struct ath11k_base *ab,
 				((u64)sbuf[0].paddr >> HAL_ADDR_MSB_REG_SHIFT)) |
 			   FIELD_PREP(HAL_WBM_SCATTERED_DESC_TAIL_P_OFFSET_IX1,
 				      0));
-	ath11k_ahb_write32(ab,
+	ath11k_hif_write32(ab,
 			   HAL_SEQ_WCSS_UMAC_WBM_REG +
 			   HAL_WBM_SCATTERED_DESC_PTR_HP_ADDR,
 			   2 * tot_link_desc);
 
 	/* Enable the SRNG */
-	ath11k_ahb_write32(ab,
+	ath11k_hif_write32(ab,
 			   HAL_SEQ_WCSS_UMAC_WBM_REG +
 			   HAL_WBM_IDLE_LINK_RING_MISC_ADDR, 0x40);
 }
diff --git a/drivers/net/wireless/ath/ath11k/hal_rx.c b/drivers/net/wireless/ath/ath11k/hal_rx.c
index f277c9434a25..69b0248a7baf 100644
--- a/drivers/net/wireless/ath/ath11k/hal_rx.c
+++ b/drivers/net/wireless/ath/ath11k/hal_rx.c
@@ -9,6 +9,7 @@
 #include "hal_tx.h"
 #include "hal_rx.h"
 #include "hal_desc.h"
+#include "hif.h"
 
 static void ath11k_hal_reo_set_desc_hdr(struct hal_desc_header *hdr,
 					u8 owner, u8 buffer_type, u32 magic)
@@ -804,34 +805,34 @@ void ath11k_hal_reo_hw_setup(struct ath11k_base *ab, u32 ring_hash_map)
 	u32 reo_base = HAL_SEQ_WCSS_UMAC_REO_REG;
 	u32 val;
 
-	val = ath11k_ahb_read32(ab, reo_base + HAL_REO1_GEN_ENABLE);
+	val = ath11k_hif_read32(ab, reo_base + HAL_REO1_GEN_ENABLE);
 
 	val &= ~HAL_REO1_GEN_ENABLE_FRAG_DST_RING;
 	val |= FIELD_PREP(HAL_REO1_GEN_ENABLE_FRAG_DST_RING,
 			  HAL_SRNG_RING_ID_REO2SW1) |
 	       FIELD_PREP(HAL_REO1_GEN_ENABLE_AGING_LIST_ENABLE, 1) |
 	       FIELD_PREP(HAL_REO1_GEN_ENABLE_AGING_FLUSH_ENABLE, 1);
-	ath11k_ahb_write32(ab, reo_base + HAL_REO1_GEN_ENABLE, val);
+	ath11k_hif_write32(ab, reo_base + HAL_REO1_GEN_ENABLE, val);
 
-	ath11k_ahb_write32(ab, reo_base + HAL_REO1_AGING_THRESH_IX_0,
+	ath11k_hif_write32(ab, reo_base + HAL_REO1_AGING_THRESH_IX_0,
 			   HAL_DEFAULT_REO_TIMEOUT_USEC);
-	ath11k_ahb_write32(ab, reo_base + HAL_REO1_AGING_THRESH_IX_1,
+	ath11k_hif_write32(ab, reo_base + HAL_REO1_AGING_THRESH_IX_1,
 			   HAL_DEFAULT_REO_TIMEOUT_USEC);
-	ath11k_ahb_write32(ab, reo_base + HAL_REO1_AGING_THRESH_IX_2,
+	ath11k_hif_write32(ab, reo_base + HAL_REO1_AGING_THRESH_IX_2,
 			   HAL_DEFAULT_REO_TIMEOUT_USEC);
-	ath11k_ahb_write32(ab, reo_base + HAL_REO1_AGING_THRESH_IX_3,
+	ath11k_hif_write32(ab, reo_base + HAL_REO1_AGING_THRESH_IX_3,
 			   HAL_DEFAULT_REO_TIMEOUT_USEC);
 
-	ath11k_ahb_write32(ab, reo_base + HAL_REO1_DEST_RING_CTRL_IX_0,
+	ath11k_hif_write32(ab, reo_base + HAL_REO1_DEST_RING_CTRL_IX_0,
 			   FIELD_PREP(HAL_REO_DEST_RING_CTRL_HASH_RING_MAP,
 				      ring_hash_map));
-	ath11k_ahb_write32(ab, reo_base + HAL_REO1_DEST_RING_CTRL_IX_1,
+	ath11k_hif_write32(ab, reo_base + HAL_REO1_DEST_RING_CTRL_IX_1,
 			   FIELD_PREP(HAL_REO_DEST_RING_CTRL_HASH_RING_MAP,
 				      ring_hash_map));
-	ath11k_ahb_write32(ab, reo_base + HAL_REO1_DEST_RING_CTRL_IX_2,
+	ath11k_hif_write32(ab, reo_base + HAL_REO1_DEST_RING_CTRL_IX_2,
 			   FIELD_PREP(HAL_REO_DEST_RING_CTRL_HASH_RING_MAP,
 				      ring_hash_map));
-	ath11k_ahb_write32(ab, reo_base + HAL_REO1_DEST_RING_CTRL_IX_3,
+	ath11k_hif_write32(ab, reo_base + HAL_REO1_DEST_RING_CTRL_IX_3,
 			   FIELD_PREP(HAL_REO_DEST_RING_CTRL_HASH_RING_MAP,
 				      ring_hash_map));
 }
diff --git a/drivers/net/wireless/ath/ath11k/hal_tx.c b/drivers/net/wireless/ath/ath11k/hal_tx.c
index e4aa7e8a1284..b364c077c1f7 100644
--- a/drivers/net/wireless/ath/ath11k/hal_tx.c
+++ b/drivers/net/wireless/ath/ath11k/hal_tx.c
@@ -6,6 +6,7 @@
 #include "ahb.h"
 #include "hal.h"
 #include "hal_tx.h"
+#include "hif.h"
 
 #define DSCP_TID_MAP_TBL_ENTRY_SIZE 64
 
@@ -83,11 +84,11 @@ void ath11k_hal_tx_set_dscp_tid_map(struct ath11k_base *ab, int id)
 	u32 value;
 	int cnt = 0;
 
-	ctrl_reg_val = ath11k_ahb_read32(ab, HAL_SEQ_WCSS_UMAC_TCL_REG +
+	ctrl_reg_val = ath11k_hif_read32(ab, HAL_SEQ_WCSS_UMAC_TCL_REG +
 					 HAL_TCL1_RING_CMN_CTRL_REG);
 	/* Enable read/write access */
 	ctrl_reg_val |= HAL_TCL1_RING_CMN_CTRL_DSCP_TID_MAP_PROG_EN;
-	ath11k_ahb_write32(ab, HAL_SEQ_WCSS_UMAC_TCL_REG +
+	ath11k_hif_write32(ab, HAL_SEQ_WCSS_UMAC_TCL_REG +
 			   HAL_TCL1_RING_CMN_CTRL_REG, ctrl_reg_val);
 
 	addr = HAL_SEQ_WCSS_UMAC_TCL_REG + HAL_TCL1_RING_DSCP_TID_MAP +
@@ -118,15 +119,15 @@ void ath11k_hal_tx_set_dscp_tid_map(struct ath11k_base *ab, int id)
 	}
 
 	for (i = 0; i < HAL_DSCP_TID_TBL_SIZE; i += 4) {
-		ath11k_ahb_write32(ab, addr, *(u32 *)&hw_map_val[i]);
+		ath11k_hif_write32(ab, addr, *(u32 *)&hw_map_val[i]);
 		addr += 4;
 	}
 
 	/* Disable read/write access */
-	ctrl_reg_val = ath11k_ahb_read32(ab, HAL_SEQ_WCSS_UMAC_TCL_REG +
+	ctrl_reg_val = ath11k_hif_read32(ab, HAL_SEQ_WCSS_UMAC_TCL_REG +
 					 HAL_TCL1_RING_CMN_CTRL_REG);
 	ctrl_reg_val &= ~HAL_TCL1_RING_CMN_CTRL_DSCP_TID_MAP_PROG_EN;
-	ath11k_ahb_write32(ab, HAL_SEQ_WCSS_UMAC_TCL_REG +
+	ath11k_hif_write32(ab, HAL_SEQ_WCSS_UMAC_TCL_REG +
 			   HAL_TCL1_RING_CMN_CTRL_REG,
 			   ctrl_reg_val);
 }
diff --git a/drivers/net/wireless/ath/ath11k/hif.h b/drivers/net/wireless/ath/ath11k/hif.h
new file mode 100644
index 000000000000..165f7e51c238
--- /dev/null
+++ b/drivers/net/wireless/ath/ath11k/hif.h
@@ -0,0 +1,65 @@
+/* SPDX-License-Identifier: BSD-3-Clause-Clear */
+/*
+ * Copyright (c) 2019-2020 The Linux Foundation. All rights reserved.
+ */
+
+#include "core.h"
+
+struct ath11k_hif_ops {
+	u32 (*read32)(struct ath11k_base *sc, u32 address);
+	void (*write32)(struct ath11k_base *sc, u32 address, u32 data);
+	void (*irq_enable)(struct ath11k_base *sc);
+	void (*irq_disable)(struct ath11k_base *sc);
+	int (*start)(struct ath11k_base *sc);
+	void (*stop)(struct ath11k_base *sc);
+	int (*power_up)(struct ath11k_base *sc);
+	void (*power_down)(struct ath11k_base *sc);
+	int (*map_service_to_pipe)(struct ath11k_base *sc, u16 service_id,
+				   u8 *ul_pipe, u8 *dl_pipe);
+};
+
+static inline int ath11k_hif_start(struct ath11k_base *sc)
+{
+	return sc->hif.ops->start(sc);
+}
+
+static inline void ath11k_hif_stop(struct ath11k_base *sc)
+{
+	sc->hif.ops->stop(sc);
+}
+
+static inline void ath11k_hif_irq_enable(struct ath11k_base *sc)
+{
+	sc->hif.ops->irq_enable(sc);
+}
+
+static inline void ath11k_hif_irq_disable(struct ath11k_base *sc)
+{
+	sc->hif.ops->irq_disable(sc);
+}
+
+static inline int ath11k_hif_power_up(struct ath11k_base *sc)
+{
+	return sc->hif.ops->power_up(sc);
+}
+
+static inline void ath11k_hif_power_down(struct ath11k_base *sc)
+{
+	sc->hif.ops->power_down(sc);
+}
+
+static inline u32 ath11k_hif_read32(struct ath11k_base *sc, u32 address)
+{
+	return sc->hif.ops->read32(sc, address);
+}
+
+static inline void ath11k_hif_write32(struct ath11k_base *sc, u32 address, u32 data)
+{
+	sc->hif.ops->write32(sc, address, data);
+}
+
+static inline int ath11k_hif_map_service_to_pipe(struct ath11k_base *sc, u16 service_id,
+						 u8 *ul_pipe, u8 *dl_pipe)
+{
+	return sc->hif.ops->map_service_to_pipe(sc, service_id, ul_pipe, dl_pipe);
+}
diff --git a/drivers/net/wireless/ath/ath11k/htc.c b/drivers/net/wireless/ath/ath11k/htc.c
index 8f54f58b83e6..1909fc3287ba 100644
--- a/drivers/net/wireless/ath/ath11k/htc.c
+++ b/drivers/net/wireless/ath/ath11k/htc.c
@@ -7,6 +7,7 @@
 
 #include "ahb.h"
 #include "debug.h"
+#include "hif.h"
 
 struct sk_buff *ath11k_htc_alloc_skb(struct ath11k_base *ab, int size)
 {
@@ -672,7 +673,7 @@ setup:
 	/* copy all the callbacks */
 	ep->ep_ops = conn_req->ep_ops;
 
-	status = ath11k_ahb_map_service_to_pipe(htc->ab,
+	status = ath11k_hif_map_service_to_pipe(htc->ab,
 						ep->service_id,
 						&ep->ul_pipe_id,
 						&ep->dl_pipe_id);
-- 
cgit v1.2.3-59-g8ed1b


From 630ad41c195c7064d16cbe7c53a65f276efcb02c Mon Sep 17 00:00:00 2001
From: Govind Singh <govinds@codeaurora.org>
Date: Fri, 8 May 2020 05:55:48 +0300
Subject: ath11k: Add drv private for bus opaque struct

Add drv private opaque structure to have bus level
structure for multibus support.

Signed-off-by: Govind Singh <govinds@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200506094400.4740-3-govinds@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/ahb.c  | 2 +-
 drivers/net/wireless/ath/ath11k/core.c | 5 +++--
 drivers/net/wireless/ath/ath11k/core.h | 6 +++++-
 drivers/net/wireless/ath/ath11k/hw.h   | 5 +++++
 4 files changed, 14 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/ahb.c b/drivers/net/wireless/ath/ath11k/ahb.c
index eaba5a189b7f..ef1038aa5692 100644
--- a/drivers/net/wireless/ath/ath11k/ahb.c
+++ b/drivers/net/wireless/ath/ath11k/ahb.c
@@ -932,7 +932,7 @@ static int ath11k_ahb_probe(struct platform_device *pdev)
 		return ret;
 	}
 
-	ab = ath11k_core_alloc(&pdev->dev);
+	ab = ath11k_core_alloc(&pdev->dev, 0, ATH11K_BUS_AHB);
 	if (!ab) {
 		dev_err(&pdev->dev, "failed to allocate ath11k base\n");
 		return -ENOMEM;
diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c
index 985ea3bcadb9..a91eae6a4e57 100644
--- a/drivers/net/wireless/ath/ath11k/core.c
+++ b/drivers/net/wireless/ath/ath11k/core.c
@@ -756,11 +756,12 @@ void ath11k_core_free(struct ath11k_base *ab)
 	kfree(ab);
 }
 
-struct ath11k_base *ath11k_core_alloc(struct device *dev)
+struct ath11k_base *ath11k_core_alloc(struct device *dev, size_t priv_size,
+				      enum ath11k_bus bus)
 {
 	struct ath11k_base *ab;
 
-	ab = kzalloc(sizeof(*ab), GFP_KERNEL);
+	ab = kzalloc(sizeof(*ab) + priv_size, GFP_KERNEL);
 	if (!ab)
 		return NULL;
 
diff --git a/drivers/net/wireless/ath/ath11k/core.h b/drivers/net/wireless/ath/ath11k/core.h
index e86513f5b757..e04f0e711779 100644
--- a/drivers/net/wireless/ath/ath11k/core.h
+++ b/drivers/net/wireless/ath/ath11k/core.h
@@ -667,6 +667,9 @@ struct ath11k_base {
 
 	/* Round robbin based TCL ring selector */
 	atomic_t tcl_ring_selector;
+
+	/* must be last */
+	u8 drv_priv[0] __aligned(sizeof(void *));
 };
 
 struct ath11k_fw_stats_pdev {
@@ -803,7 +806,8 @@ struct ath11k_peer *ath11k_peer_find_by_id(struct ath11k_base *ab, int peer_id);
 int ath11k_core_qmi_firmware_ready(struct ath11k_base *ab);
 int ath11k_core_init(struct ath11k_base *ath11k);
 void ath11k_core_deinit(struct ath11k_base *ath11k);
-struct ath11k_base *ath11k_core_alloc(struct device *dev);
+struct ath11k_base *ath11k_core_alloc(struct device *dev, size_t priv_size,
+				      enum ath11k_bus bus);
 void ath11k_core_free(struct ath11k_base *ath11k);
 int ath11k_core_fetch_bdf(struct ath11k_base *ath11k,
 			  struct ath11k_board_data *bd);
diff --git a/drivers/net/wireless/ath/ath11k/hw.h b/drivers/net/wireless/ath/ath11k/hw.h
index cdec95644758..dc4434aefbbe 100644
--- a/drivers/net/wireless/ath/ath11k/hw.h
+++ b/drivers/net/wireless/ath/ath11k/hw.h
@@ -99,6 +99,11 @@ enum ath11k_hw_rate_ofdm {
 	ATH11K_HW_RATE_OFDM_9M,
 };
 
+enum ath11k_bus {
+	ATH11K_BUS_AHB,
+	ATH11K_BUS_PCI,
+};
+
 struct ath11k_hw_params {
 	const char *name;
 	struct {
-- 
cgit v1.2.3-59-g8ed1b


From 27143fa93c3b000c968992ca2620e784249862d3 Mon Sep 17 00:00:00 2001
From: Govind Singh <govinds@codeaurora.org>
Date: Fri, 8 May 2020 05:55:48 +0300
Subject: ath11k: Remove bus layer includes from upper layer

Bus level header files needs to be abstracted by upper
layer. Remove bus layer includes by adding appropriate header
files.

Signed-off-by: Govind Singh <govinds@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200506094400.4740-4-govinds@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/core.c     | 1 -
 drivers/net/wireless/ath/ath11k/hal.c      | 1 -
 drivers/net/wireless/ath/ath11k/hal_desc.h | 2 ++
 drivers/net/wireless/ath/ath11k/hal_rx.c   | 1 -
 drivers/net/wireless/ath/ath11k/hal_tx.c   | 2 +-
 drivers/net/wireless/ath/ath11k/hal_tx.h   | 1 +
 drivers/net/wireless/ath/ath11k/htc.c      | 1 -
 7 files changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/core.c b/drivers/net/wireless/ath/ath11k/core.c
index a91eae6a4e57..02501cc154fe 100644
--- a/drivers/net/wireless/ath/ath11k/core.c
+++ b/drivers/net/wireless/ath/ath11k/core.c
@@ -7,7 +7,6 @@
 #include <linux/slab.h>
 #include <linux/remoteproc.h>
 #include <linux/firmware.h>
-#include "ahb.h"
 #include "core.h"
 #include "dp_tx.h"
 #include "dp_rx.h"
diff --git a/drivers/net/wireless/ath/ath11k/hal.c b/drivers/net/wireless/ath/ath11k/hal.c
index 6d937674215e..d63785178afa 100644
--- a/drivers/net/wireless/ath/ath11k/hal.c
+++ b/drivers/net/wireless/ath/ath11k/hal.c
@@ -3,7 +3,6 @@
  * Copyright (c) 2018-2019 The Linux Foundation. All rights reserved.
  */
 #include <linux/dma-mapping.h>
-#include "ahb.h"
 #include "hal_tx.h"
 #include "debug.h"
 #include "hal_desc.h"
diff --git a/drivers/net/wireless/ath/ath11k/hal_desc.h b/drivers/net/wireless/ath/ath11k/hal_desc.h
index a1f747c1c44d..8a592814efa0 100644
--- a/drivers/net/wireless/ath/ath11k/hal_desc.h
+++ b/drivers/net/wireless/ath/ath11k/hal_desc.h
@@ -2,6 +2,8 @@
 /*
  * Copyright (c) 2018-2019 The Linux Foundation. All rights reserved.
  */
+#include "core.h"
+
 #ifndef ATH11K_HAL_DESC_H
 #define ATH11K_HAL_DESC_H
 
diff --git a/drivers/net/wireless/ath/ath11k/hal_rx.c b/drivers/net/wireless/ath/ath11k/hal_rx.c
index 69b0248a7baf..129c9e1efeb9 100644
--- a/drivers/net/wireless/ath/ath11k/hal_rx.c
+++ b/drivers/net/wireless/ath/ath11k/hal_rx.c
@@ -3,7 +3,6 @@
  * Copyright (c) 2018-2019 The Linux Foundation. All rights reserved.
  */
 
-#include "ahb.h"
 #include "debug.h"
 #include "hal.h"
 #include "hal_tx.h"
diff --git a/drivers/net/wireless/ath/ath11k/hal_tx.c b/drivers/net/wireless/ath/ath11k/hal_tx.c
index b364c077c1f7..81937c29ffca 100644
--- a/drivers/net/wireless/ath/ath11k/hal_tx.c
+++ b/drivers/net/wireless/ath/ath11k/hal_tx.c
@@ -3,7 +3,7 @@
  * Copyright (c) 2018-2019 The Linux Foundation. All rights reserved.
  */
 
-#include "ahb.h"
+#include "hal_desc.h"
 #include "hal.h"
 #include "hal_tx.h"
 #include "hif.h"
diff --git a/drivers/net/wireless/ath/ath11k/hal_tx.h b/drivers/net/wireless/ath/ath11k/hal_tx.h
index ce48a61bfb66..d4760a20fdac 100644
--- a/drivers/net/wireless/ath/ath11k/hal_tx.h
+++ b/drivers/net/wireless/ath/ath11k/hal_tx.h
@@ -7,6 +7,7 @@
 #define ATH11K_HAL_TX_H
 
 #include "hal_desc.h"
+#include "core.h"
 
 #define HAL_TX_ADDRX_EN			1
 #define HAL_TX_ADDRY_EN			2
diff --git a/drivers/net/wireless/ath/ath11k/htc.c b/drivers/net/wireless/ath/ath11k/htc.c
index 1909fc3287ba..ad13c648b679 100644
--- a/drivers/net/wireless/ath/ath11k/htc.c
+++ b/drivers/net/wireless/ath/ath11k/htc.c
@@ -5,7 +5,6 @@
 #include <linux/skbuff.h>
 #include <linux/ctype.h>
 
-#include "ahb.h"
 #include "debug.h"
 #include "hif.h"
 
-- 
cgit v1.2.3-59-g8ed1b


From d3ed0cf047cf054fd54589e5d4247f0001c5d85c Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Fri, 8 May 2020 05:55:52 +0300
Subject: ath10k: Replace zero-length array with flexible-array

The current codebase makes use of the zero-length array language
extension to the C90 standard, but the preferred mechanism to declare
variable-length types such as these ones is a flexible array member[1][2],
introduced in C99:

struct foo {
        int stuff;
        struct boo array[];
};

By making use of the mechanism above, we will get a compiler warning
in case the flexible array does not occur last in the structure, which
will help us prevent some kind of undefined behavior bugs from being
inadvertently introduced[3] to the codebase from now on.

Also, notice that, dynamic memory allocations won't be affected by
this change:

"Flexible array members have incomplete type, and so the sizeof operator
may not be applied. As a quirk of the original implementation of
zero-length arrays, sizeof evaluates to zero."[1]

sizeof(flexible-array-member) triggers a warning because flexible array
members have incomplete type[1]. There are some instances of code in
which the sizeof operator is being incorrectly/erroneously applied to
zero-length arrays and the result is zero. Such instances may be hiding
some bugs. So, this work (flexible-array member conversions) will also
help to get completely rid of those sorts of issues.

This issue was found with the help of Coccinelle.

[1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html
[2] https://github.com/KSPP/linux/issues/21
[3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour")

Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200507041127.GA31587@embeddedor
---
 drivers/net/wireless/ath/ath10k/ce.h       |  2 +-
 drivers/net/wireless/ath/ath10k/core.h     |  2 +-
 drivers/net/wireless/ath/ath10k/coredump.h |  4 +--
 drivers/net/wireless/ath/ath10k/debug.h    |  2 +-
 drivers/net/wireless/ath/ath10k/htt.h      | 42 +++++++++++++++---------------
 drivers/net/wireless/ath/ath10k/hw.h       |  2 +-
 drivers/net/wireless/ath/ath10k/wmi-tlv.h  |  6 ++---
 drivers/net/wireless/ath/ath10k/wmi.h      | 42 +++++++++++++++---------------
 8 files changed, 51 insertions(+), 51 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/ce.h b/drivers/net/wireless/ath/ath10k/ce.h
index 9711f0eb9117..75df79d43120 100644
--- a/drivers/net/wireless/ath/ath10k/ce.h
+++ b/drivers/net/wireless/ath/ath10k/ce.h
@@ -110,7 +110,7 @@ struct ath10k_ce_ring {
 	struct ce_desc_64 *shadow_base;
 
 	/* keep last */
-	void *per_transfer_context[0];
+	void *per_transfer_context[];
 };
 
 struct ath10k_ce_pipe {
diff --git a/drivers/net/wireless/ath/ath10k/core.h b/drivers/net/wireless/ath/ath10k/core.h
index ceac76553b8f..5c18f6c20462 100644
--- a/drivers/net/wireless/ath/ath10k/core.h
+++ b/drivers/net/wireless/ath/ath10k/core.h
@@ -1262,7 +1262,7 @@ struct ath10k {
 	int coex_gpio_pin;
 
 	/* must be last */
-	u8 drv_priv[0] __aligned(sizeof(void *));
+	u8 drv_priv[] __aligned(sizeof(void *));
 };
 
 static inline bool ath10k_peer_stats_enabled(struct ath10k *ar)
diff --git a/drivers/net/wireless/ath/ath10k/coredump.h b/drivers/net/wireless/ath/ath10k/coredump.h
index 8bf03e8c1d3a..e760ce1a5f1e 100644
--- a/drivers/net/wireless/ath/ath10k/coredump.h
+++ b/drivers/net/wireless/ath/ath10k/coredump.h
@@ -88,7 +88,7 @@ struct ath10k_dump_file_data {
 	u8 unused[128];
 
 	/* struct ath10k_tlv_dump_data + more */
-	u8 data[0];
+	u8 data[];
 } __packed;
 
 struct ath10k_dump_ram_data_hdr {
@@ -100,7 +100,7 @@ struct ath10k_dump_ram_data_hdr {
 	/* length of payload data, not including this header */
 	__le32 length;
 
-	u8 data[0];
+	u8 data[];
 };
 
 /* magic number to fill the holes not copied due to sections in regions */
diff --git a/drivers/net/wireless/ath/ath10k/debug.h b/drivers/net/wireless/ath/ath10k/debug.h
index 4cbfd9279d6f..997c1c80aba7 100644
--- a/drivers/net/wireless/ath/ath10k/debug.h
+++ b/drivers/net/wireless/ath/ath10k/debug.h
@@ -65,7 +65,7 @@ struct ath10k_pktlog_hdr {
 	__le16 log_type; /* Type of log information foll this header */
 	__le16 size; /* Size of variable length log information in bytes */
 	__le32 timestamp;
-	u8 payload[0];
+	u8 payload[];
 } __packed;
 
 /* FIXME: How to calculate the buffer size sanely? */
diff --git a/drivers/net/wireless/ath/ath10k/htt.h b/drivers/net/wireless/ath/ath10k/htt.h
index 8f3710cf28f4..e504be63173a 100644
--- a/drivers/net/wireless/ath/ath10k/htt.h
+++ b/drivers/net/wireless/ath/ath10k/htt.h
@@ -289,12 +289,12 @@ struct htt_rx_ring_setup_hdr {
 
 struct htt_rx_ring_setup_32 {
 	struct htt_rx_ring_setup_hdr hdr;
-	struct htt_rx_ring_setup_ring32 rings[0];
+	struct htt_rx_ring_setup_ring32 rings[];
 } __packed;
 
 struct htt_rx_ring_setup_64 {
 	struct htt_rx_ring_setup_hdr hdr;
-	struct htt_rx_ring_setup_ring64 rings[0];
+	struct htt_rx_ring_setup_ring64 rings[];
 } __packed;
 
 /*
@@ -732,7 +732,7 @@ struct htt_rx_indication {
 	 * %mpdu_ranges starts after &%prefix + roundup(%fw_rx_desc_bytes, 4)
 	 * and has %num_mpdu_ranges elements.
 	 */
-	struct htt_rx_indication_mpdu_range mpdu_ranges[0];
+	struct htt_rx_indication_mpdu_range mpdu_ranges[];
 } __packed;
 
 /* High latency version of the RX indication */
@@ -741,7 +741,7 @@ struct htt_rx_indication_hl {
 	struct htt_rx_indication_ppdu ppdu;
 	struct htt_rx_indication_prefix prefix;
 	struct fw_rx_desc_hl fw_desc;
-	struct htt_rx_indication_mpdu_range mpdu_ranges[0];
+	struct htt_rx_indication_mpdu_range mpdu_ranges[];
 } __packed;
 
 struct htt_hl_rx_desc {
@@ -908,7 +908,7 @@ struct htt_append_retries {
 struct htt_data_tx_completion_ext {
 	struct htt_append_retries a_retries;
 	__le32 t_stamp;
-	__le16 msdus_rssi[0];
+	__le16 msdus_rssi[];
 } __packed;
 
 /**
@@ -992,7 +992,7 @@ struct htt_data_tx_completion {
 	} __packed;
 	u8 num_msdus;
 	u8 flags2; /* HTT_TX_CMPL_FLAG_DATA_RSSI */
-	__le16 msdus[0]; /* variable length based on %num_msdus */
+	__le16 msdus[]; /* variable length based on %num_msdus */
 } __packed;
 
 #define HTT_TX_PPDU_DUR_INFO0_PEER_ID_MASK	GENMASK(15, 0)
@@ -1007,7 +1007,7 @@ struct htt_data_tx_ppdu_dur {
 
 struct htt_data_tx_compl_ppdu_dur {
 	__le32 info0; /* HTT_TX_COMPL_PPDU_DUR_INFO0_ */
-	struct htt_data_tx_ppdu_dur ppdu_dur[0];
+	struct htt_data_tx_ppdu_dur ppdu_dur[];
 } __packed;
 
 struct htt_tx_compl_ind_base {
@@ -1033,7 +1033,7 @@ struct htt_rc_update {
 	u8 addr[6];
 	u8 num_elems;
 	u8 rsvd0;
-	struct htt_rc_tx_done_params params[0]; /* variable length %num_elems */
+	struct htt_rc_tx_done_params params[]; /* variable length %num_elems */
 } __packed;
 
 /* see htt_rx_indication for similar fields and descriptions */
@@ -1050,7 +1050,7 @@ struct htt_rx_fragment_indication {
 	__le16 fw_rx_desc_bytes;
 	__le16 rsvd0;
 
-	u8 fw_msdu_rx_desc[0];
+	u8 fw_msdu_rx_desc[];
 } __packed;
 
 #define ATH10K_IEEE80211_EXTIV               BIT(5)
@@ -1075,7 +1075,7 @@ struct htt_rx_pn_ind {
 	u8 seqno_end;
 	u8 pn_ie_count;
 	u8 reserved;
-	u8 pn_ies[0];
+	u8 pn_ies[];
 } __packed;
 
 struct htt_rx_offload_msdu {
@@ -1084,7 +1084,7 @@ struct htt_rx_offload_msdu {
 	u8 vdev_id;
 	u8 tid;
 	u8 fw_desc;
-	u8 payload[0];
+	u8 payload[];
 } __packed;
 
 struct htt_rx_offload_ind {
@@ -1167,7 +1167,7 @@ struct htt_rx_test {
 	 *  a) num_ints * sizeof(__le32)
 	 *  b) num_chars * sizeof(u8) aligned to 4bytes
 	 */
-	u8 payload[0];
+	u8 payload[];
 } __packed;
 
 static inline __le32 *htt_rx_test_get_ints(struct htt_rx_test *rx_test)
@@ -1201,7 +1201,7 @@ static inline u8 *htt_rx_test_get_chars(struct htt_rx_test *rx_test)
  */
 struct htt_pktlog_msg {
 	u8 pad[3];
-	u8 payload[0];
+	u8 payload[];
 } __packed;
 
 struct htt_dbg_stats_rx_reorder_stats {
@@ -1490,7 +1490,7 @@ struct htt_stats_conf_item {
 	} __packed;
 	u8 pad;
 	__le16 length;
-	u8 payload[0]; /* roundup(length, 4) long */
+	u8 payload[]; /* roundup(length, 4) long */
 } __packed;
 
 struct htt_stats_conf {
@@ -1499,7 +1499,7 @@ struct htt_stats_conf {
 	__le32 cookie_msb;
 
 	/* each item has variable length! */
-	struct htt_stats_conf_item items[0];
+	struct htt_stats_conf_item items[];
 } __packed;
 
 static inline struct htt_stats_conf_item *htt_stats_conf_next_item(
@@ -1674,7 +1674,7 @@ struct htt_tx_fetch_ind {
 	__le16 num_resp_ids;
 	__le16 num_records;
 	struct htt_tx_fetch_record records[0];
-	__le32 resp_ids[0]; /* ath10k_htt_get_tx_fetch_ind_resp_ids() */
+	__le32 resp_ids[]; /* ath10k_htt_get_tx_fetch_ind_resp_ids() */
 } __packed;
 
 static inline void *
@@ -1689,13 +1689,13 @@ struct htt_tx_fetch_resp {
 	__le16 fetch_seq_num;
 	__le16 num_records;
 	__le32 token;
-	struct htt_tx_fetch_record records[0];
+	struct htt_tx_fetch_record records[];
 } __packed;
 
 struct htt_tx_fetch_confirm {
 	u8 pad0;
 	__le16 num_resp_ids;
-	__le32 resp_ids[0];
+	__le32 resp_ids[];
 } __packed;
 
 enum htt_tx_mode_switch_mode {
@@ -1727,7 +1727,7 @@ struct htt_tx_mode_switch_ind {
 	__le16 info0; /* HTT_TX_MODE_SWITCH_IND_INFO0_ */
 	__le16 info1; /* HTT_TX_MODE_SWITCH_IND_INFO1_ */
 	u8 pad1[2];
-	struct htt_tx_mode_switch_record records[0];
+	struct htt_tx_mode_switch_record records[];
 } __packed;
 
 struct htt_channel_change {
@@ -1757,7 +1757,7 @@ struct htt_peer_tx_stats {
 	u8 num_ppdu;
 	u8 ppdu_len;
 	u8 version;
-	u8 payload[0];
+	u8 payload[];
 } __packed;
 
 #define ATH10K_10_2_TX_STATS_OFFSET	136
@@ -2206,7 +2206,7 @@ struct htt_rx_desc {
 		struct rx_ppdu_end ppdu_end;
 	} __packed;
 	u8 rx_hdr_status[RX_HTT_HDR_STATUS_LEN];
-	u8 msdu_payload[0];
+	u8 msdu_payload[];
 };
 
 #define HTT_RX_DESC_HL_INFO_SEQ_NUM_MASK           0x00000fff
diff --git a/drivers/net/wireless/ath/ath10k/hw.h b/drivers/net/wireless/ath/ath10k/hw.h
index d9907a4648a8..f16edcb9f326 100644
--- a/drivers/net/wireless/ath/ath10k/hw.h
+++ b/drivers/net/wireless/ath/ath10k/hw.h
@@ -165,7 +165,7 @@ enum qca9377_chip_id_rev {
 struct ath10k_fw_ie {
 	__le32 id;
 	__le32 len;
-	u8 data[0];
+	u8 data[];
 };
 
 enum ath10k_fw_ie_type {
diff --git a/drivers/net/wireless/ath/ath10k/wmi-tlv.h b/drivers/net/wireless/ath/ath10k/wmi-tlv.h
index 6e0537dabd1d..e77b97ca5c7f 100644
--- a/drivers/net/wireless/ath/ath10k/wmi-tlv.h
+++ b/drivers/net/wireless/ath/ath10k/wmi-tlv.h
@@ -1637,7 +1637,7 @@ wmi_tlv_svc_map_ext(const __le32 *in, unsigned long *out, size_t len)
 struct wmi_tlv {
 	__le16 len;
 	__le16 tag;
-	u8 value[0];
+	u8 value[];
 } __packed;
 
 struct ath10k_mgmt_tx_pkt_addr {
@@ -2037,7 +2037,7 @@ struct wmi_tlv_bcn_tx_status_ev {
 struct wmi_tlv_bcn_prb_info {
 	__le32 caps;
 	__le32 erp;
-	u8 ies[0];
+	u8 ies[];
 } __packed;
 
 struct wmi_tlv_bcn_tmpl_cmd {
@@ -2068,7 +2068,7 @@ struct wmi_tlv_diag_item {
 	__le16 len;
 	__le32 timestamp;
 	__le32 code;
-	u8 payload[0];
+	u8 payload[];
 } __packed;
 
 struct wmi_tlv_diag_data_ev {
diff --git a/drivers/net/wireless/ath/ath10k/wmi.h b/drivers/net/wireless/ath/ath10k/wmi.h
index 0f05405bebc0..511144b36231 100644
--- a/drivers/net/wireless/ath/ath10k/wmi.h
+++ b/drivers/net/wireless/ath/ath10k/wmi.h
@@ -2292,7 +2292,7 @@ struct wmi_service_ready_event {
 	 * where FW can access this memory directly (or) by DMA.
 	 */
 	__le32 num_mem_reqs;
-	struct wlan_host_mem_req mem_reqs[0];
+	struct wlan_host_mem_req mem_reqs[];
 } __packed;
 
 /* This is the definition from 10.X firmware branch */
@@ -2331,7 +2331,7 @@ struct wmi_10x_service_ready_event {
 	 */
 	__le32 num_mem_reqs;
 
-	struct wlan_host_mem_req mem_reqs[0];
+	struct wlan_host_mem_req mem_reqs[];
 } __packed;
 
 #define WMI_SERVICE_READY_TIMEOUT_HZ (5 * HZ)
@@ -3086,19 +3086,19 @@ struct wmi_chan_list_entry {
 struct wmi_chan_list {
 	__le32 tag; /* WMI_CHAN_LIST_TAG */
 	__le32 num_chan;
-	struct wmi_chan_list_entry channel_list[0];
+	struct wmi_chan_list_entry channel_list[];
 } __packed;
 
 struct wmi_bssid_list {
 	__le32 tag; /* WMI_BSSID_LIST_TAG */
 	__le32 num_bssid;
-	struct wmi_mac_addr bssid_list[0];
+	struct wmi_mac_addr bssid_list[];
 } __packed;
 
 struct wmi_ie_data {
 	__le32 tag; /* WMI_IE_TAG */
 	__le32 ie_len;
-	u8 ie_data[0];
+	u8 ie_data[];
 } __packed;
 
 struct wmi_ssid {
@@ -3109,7 +3109,7 @@ struct wmi_ssid {
 struct wmi_ssid_list {
 	__le32 tag; /* WMI_SSID_LIST_TAG */
 	__le32 num_ssids;
-	struct wmi_ssid ssids[0];
+	struct wmi_ssid ssids[];
 } __packed;
 
 /* prefix used by scan requestor ids on the host */
@@ -3311,7 +3311,7 @@ struct wmi_stop_scan_arg {
 
 struct wmi_scan_chan_list_cmd {
 	__le32 num_scan_chans;
-	struct wmi_channel chan_info[0];
+	struct wmi_channel chan_info[];
 } __packed;
 
 struct wmi_scan_chan_list_arg {
@@ -3395,12 +3395,12 @@ struct wmi_mgmt_rx_hdr_v2 {
 
 struct wmi_mgmt_rx_event_v1 {
 	struct wmi_mgmt_rx_hdr_v1 hdr;
-	u8 buf[0];
+	u8 buf[];
 } __packed;
 
 struct wmi_mgmt_rx_event_v2 {
 	struct wmi_mgmt_rx_hdr_v2 hdr;
-	u8 buf[0];
+	u8 buf[];
 } __packed;
 
 struct wmi_10_4_mgmt_rx_hdr {
@@ -3415,7 +3415,7 @@ struct wmi_10_4_mgmt_rx_hdr {
 
 struct wmi_10_4_mgmt_rx_event {
 	struct wmi_10_4_mgmt_rx_hdr hdr;
-	u8 buf[0];
+	u8 buf[];
 } __packed;
 
 struct wmi_mgmt_rx_ext_info {
@@ -3455,14 +3455,14 @@ struct wmi_phyerr {
 	__le32 rssi_chains[4];
 	__le16 nf_chains[4];
 	__le32 buf_len;
-	u8 buf[0];
+	u8 buf[];
 } __packed;
 
 struct wmi_phyerr_event {
 	__le32 num_phyerrs;
 	__le32 tsf_l32;
 	__le32 tsf_u32;
-	struct wmi_phyerr phyerrs[0];
+	struct wmi_phyerr phyerrs[];
 } __packed;
 
 struct wmi_10_4_phyerr_event {
@@ -3479,7 +3479,7 @@ struct wmi_10_4_phyerr_event {
 	__le32 phy_err_mask[2];
 	__le32 tsf_timestamp;
 	__le32 buf_len;
-	u8 buf[0];
+	u8 buf[];
 } __packed;
 
 struct wmi_radar_found_info {
@@ -3592,7 +3592,7 @@ struct wmi_mgmt_tx_hdr {
 
 struct wmi_mgmt_tx_cmd {
 	struct wmi_mgmt_tx_hdr hdr;
-	u8 buf[0];
+	u8 buf[];
 } __packed;
 
 struct wmi_echo_event {
@@ -4628,7 +4628,7 @@ struct wmi_stats_event {
 	 *  By having a zero sized array, the pointer to data area
 	 *  becomes available without increasing the struct size
 	 */
-	u8 data[0];
+	u8 data[];
 } __packed;
 
 struct wmi_10_2_stats_event {
@@ -4638,7 +4638,7 @@ struct wmi_10_2_stats_event {
 	__le32 num_vdev_stats;
 	__le32 num_peer_stats;
 	__le32 num_bcnflt_stats;
-	u8 data[0];
+	u8 data[];
 } __packed;
 
 /*
@@ -5033,7 +5033,7 @@ struct wmi_vdev_install_key_cmd {
 	__le32 key_rxmic_len;
 
 	/* contains key followed by tx mic followed by rx mic */
-	u8 key_data[0];
+	u8 key_data[];
 } __packed;
 
 struct wmi_vdev_install_key_arg {
@@ -5703,7 +5703,7 @@ struct wmi_bcn_tx_hdr {
 
 struct wmi_bcn_tx_cmd {
 	struct wmi_bcn_tx_hdr hdr;
-	u8 *bcn[0];
+	u8 *bcn[];
 } __packed;
 
 struct wmi_bcn_tx_arg {
@@ -6120,7 +6120,7 @@ struct wmi_bcn_info {
 
 struct wmi_host_swba_event {
 	__le32 vdev_map;
-	struct wmi_bcn_info bcn_info[0];
+	struct wmi_bcn_info bcn_info[];
 } __packed;
 
 struct wmi_10_2_4_bcn_info {
@@ -6130,7 +6130,7 @@ struct wmi_10_2_4_bcn_info {
 
 struct wmi_10_2_4_host_swba_event {
 	__le32 vdev_map;
-	struct wmi_10_2_4_bcn_info bcn_info[0];
+	struct wmi_10_2_4_bcn_info bcn_info[];
 } __packed;
 
 /* 16 words = 512 client + 1 word = for guard */
@@ -6171,7 +6171,7 @@ struct wmi_10_4_bcn_info {
 
 struct wmi_10_4_host_swba_event {
 	__le32 vdev_map;
-	struct wmi_10_4_bcn_info bcn_info[0];
+	struct wmi_10_4_bcn_info bcn_info[];
 } __packed;
 
 #define WMI_MAX_AP_VDEV 16
-- 
cgit v1.2.3-59-g8ed1b


From a86308fc534edeceaf64670c691e17485436a4f4 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Fri, 8 May 2020 05:56:03 +0300
Subject: wcn36xx: Fix error handling path in 'wcn36xx_probe()'

In case of error, 'qcom_wcnss_open_channel()' must be undone by a call to
'rpmsg_destroy_ept()', as already done in the remove function.

Fixes: 5052de8deff5 ("soc: qcom: smd: Transition client drivers from smd to rpmsg")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Reviewed-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200507043619.200051-1-christophe.jaillet@wanadoo.fr
---
 drivers/net/wireless/ath/wcn36xx/main.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/ath/wcn36xx/main.c b/drivers/net/wireless/ath/wcn36xx/main.c
index e49c306e0eef..702b689c06df 100644
--- a/drivers/net/wireless/ath/wcn36xx/main.c
+++ b/drivers/net/wireless/ath/wcn36xx/main.c
@@ -1339,7 +1339,7 @@ static int wcn36xx_probe(struct platform_device *pdev)
 	if (addr && ret != ETH_ALEN) {
 		wcn36xx_err("invalid local-mac-address\n");
 		ret = -EINVAL;
-		goto out_wq;
+		goto out_destroy_ept;
 	} else if (addr) {
 		wcn36xx_info("mac address: %pM\n", addr);
 		SET_IEEE80211_PERM_ADDR(wcn->hw, addr);
@@ -1347,7 +1347,7 @@ static int wcn36xx_probe(struct platform_device *pdev)
 
 	ret = wcn36xx_platform_get_resources(wcn, pdev);
 	if (ret)
-		goto out_wq;
+		goto out_destroy_ept;
 
 	wcn36xx_init_ieee80211(wcn);
 	ret = ieee80211_register_hw(wcn->hw);
@@ -1359,6 +1359,8 @@ static int wcn36xx_probe(struct platform_device *pdev)
 out_unmap:
 	iounmap(wcn->ccu_base);
 	iounmap(wcn->dxe_base);
+out_destroy_ept:
+	rpmsg_destroy_ept(wcn->smd_channel);
 out_wq:
 	ieee80211_free_hw(hw);
 out_err:
-- 
cgit v1.2.3-59-g8ed1b


From 4d0f3604c4d181b98104024c837f0e81912f55ef Mon Sep 17 00:00:00 2001
From: Govind Singh <govinds@codeaurora.org>
Date: Fri, 8 May 2020 05:56:10 +0300
Subject: ath10k: remove experimental tag from SDIO and SNOC busses in Kconfig

ath10k(sdio/snoc) is no longer experimental. Remove experimental tag for
SDIO/SNOC from ath10k Kconfig.

Signed-off-by: Govind Singh <govinds@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200507055324.15564-1-govinds@codeaurora.org
---
 drivers/net/wireless/ath/ath10k/Kconfig | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/Kconfig b/drivers/net/wireless/ath/ath10k/Kconfig
index 6b3ff02a373d..b99fd0eff994 100644
--- a/drivers/net/wireless/ath/ath10k/Kconfig
+++ b/drivers/net/wireless/ath/ath10k/Kconfig
@@ -28,11 +28,10 @@ config ATH10K_AHB
 	  This module adds support for AHB bus
 
 config ATH10K_SDIO
-	tristate "Atheros ath10k SDIO support (EXPERIMENTAL)"
+	tristate "Atheros ath10k SDIO support"
 	depends on ATH10K && MMC
 	---help---
-	  This module adds experimental support for SDIO/MMC bus. Currently
-	  work in progress and will not fully work.
+	  This module adds support for SDIO/MMC bus.
 
 config ATH10K_USB
 	tristate "Atheros ath10k USB support (EXPERIMENTAL)"
@@ -42,7 +41,7 @@ config ATH10K_USB
 	  work in progress and will not fully work.
 
 config ATH10K_SNOC
-	tristate "Qualcomm ath10k SNOC support (EXPERIMENTAL)"
+	tristate "Qualcomm ath10k SNOC support"
 	depends on ATH10K
 	depends on ARCH_QCOM || COMPILE_TEST
 	select QCOM_QMI_HELPERS
-- 
cgit v1.2.3-59-g8ed1b


From 7d4343d501f9b5ddbc92f278adba339d16d010e1 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 11 May 2020 10:33:42 +0200
Subject: xfrm: fix unused variable warning if CONFIG_NETFILTER=n

After recent change 'x' is only used when CONFIG_NETFILTER is set:

net/ipv4/xfrm4_output.c: In function '__xfrm4_output':
net/ipv4/xfrm4_output.c:19:21: warning: unused variable 'x' [-Wunused-variable]
   19 |  struct xfrm_state *x = skb_dst(skb)->xfrm;

Expand the CONFIG_NETFILTER scope to avoid this.

Fixes: 2ab6096db2f1 ("xfrm: remove output_finish indirection from xfrm_state_afinfo")
Reported-by: Stephen Rothwell <sfr@canb.auug.org.au>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: Steffen Klassert <steffen.klassert@secunet.com>
---
 net/ipv4/xfrm4_output.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/xfrm4_output.c b/net/ipv4/xfrm4_output.c
index 502eb189d852..3cff51ba72bb 100644
--- a/net/ipv4/xfrm4_output.c
+++ b/net/ipv4/xfrm4_output.c
@@ -16,9 +16,9 @@
 
 static int __xfrm4_output(struct net *net, struct sock *sk, struct sk_buff *skb)
 {
+#ifdef CONFIG_NETFILTER
 	struct xfrm_state *x = skb_dst(skb)->xfrm;
 
-#ifdef CONFIG_NETFILTER
 	if (!x) {
 		IPCB(skb)->flags |= IPSKB_REROUTED;
 		return dst_output(net, sk, skb);
-- 
cgit v1.2.3-59-g8ed1b


From 385bbf7b119a4feb6d6bcf3586f1bb1dd9c5b0a0 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Thu, 7 May 2020 13:50:57 -0500
Subject: bpf, libbpf: Replace zero-length array with flexible-array

The current codebase makes use of the zero-length array language
extension to the C90 standard, but the preferred mechanism to declare
variable-length types such as these ones is a flexible array member[1][2],
introduced in C99:

struct foo {
        int stuff;
        struct boo array[];
};

By making use of the mechanism above, we will get a compiler warning
in case the flexible array does not occur last in the structure, which
will help us prevent some kind of undefined behavior bugs from being
inadvertently introduced[3] to the codebase from now on.

Also, notice that, dynamic memory allocations won't be affected by
this change:

"Flexible array members have incomplete type, and so the sizeof operator
may not be applied. As a quirk of the original implementation of
zero-length arrays, sizeof evaluates to zero."[1]

sizeof(flexible-array-member) triggers a warning because flexible array
members have incomplete type[1]. There are some instances of code in
which the sizeof operator is being incorrectly/erroneously applied to
zero-length arrays and the result is zero. Such instances may be hiding
some bugs. So, this work (flexible-array member conversions) will also
help to get completely rid of those sorts of issues.

This issue was found with the help of Coccinelle.

[1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html
[2] https://github.com/KSPP/linux/issues/21
[3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour")

Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200507185057.GA13981@embeddedor
---
 kernel/bpf/queue_stack_maps.c                        | 2 +-
 tools/lib/bpf/libbpf.c                               | 2 +-
 tools/lib/bpf/libbpf_internal.h                      | 2 +-
 tools/testing/selftests/bpf/progs/core_reloc_types.h | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c
index f697647ceb54..30e1373fd437 100644
--- a/kernel/bpf/queue_stack_maps.c
+++ b/kernel/bpf/queue_stack_maps.c
@@ -19,7 +19,7 @@ struct bpf_queue_stack {
 	u32 head, tail;
 	u32 size; /* max_entries + 1 */
 
-	char elements[0] __aligned(8);
+	char elements[] __aligned(8);
 };
 
 static struct bpf_queue_stack *bpf_queue_stack(struct bpf_map *map)
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 6c2f46908f4d..3da66540b54b 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -8352,7 +8352,7 @@ error:
 struct perf_sample_raw {
 	struct perf_event_header header;
 	uint32_t size;
-	char data[0];
+	char data[];
 };
 
 struct perf_sample_lost {
diff --git a/tools/lib/bpf/libbpf_internal.h b/tools/lib/bpf/libbpf_internal.h
index 8c3afbd97747..50d70e90d5f1 100644
--- a/tools/lib/bpf/libbpf_internal.h
+++ b/tools/lib/bpf/libbpf_internal.h
@@ -153,7 +153,7 @@ struct btf_ext_info_sec {
 	__u32	sec_name_off;
 	__u32	num_info;
 	/* Followed by num_info * record_size number of bytes */
-	__u8	data[0];
+	__u8	data[];
 };
 
 /* The minimum bpf_func_info checked by the loader */
diff --git a/tools/testing/selftests/bpf/progs/core_reloc_types.h b/tools/testing/selftests/bpf/progs/core_reloc_types.h
index 6d598cfbdb3e..34d84717c946 100644
--- a/tools/testing/selftests/bpf/progs/core_reloc_types.h
+++ b/tools/testing/selftests/bpf/progs/core_reloc_types.h
@@ -379,7 +379,7 @@ struct core_reloc_arrays___equiv_zero_sz_arr {
 	struct core_reloc_arrays_substruct c[3];
 	struct core_reloc_arrays_substruct d[1][2];
 	/* equivalent to flexible array */
-	struct core_reloc_arrays_substruct f[0][2];
+	struct core_reloc_arrays_substruct f[][2];
 };
 
 struct core_reloc_arrays___fixed_arr {
-- 
cgit v1.2.3-59-g8ed1b


From 6e7e034e88e8e22cb14765c86da92416017e45b8 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin@isovalent.com>
Date: Mon, 11 May 2020 17:15:33 +0100
Subject: tools, bpftool: Poison and replace kernel integer typedefs

Replace the use of kernel-only integer typedefs (u8, u32, etc.) by their
user space counterpart (__u8, __u32, etc.).

Similarly to what libbpf does, poison the typedefs to avoid introducing
them again in the future.

Signed-off-by: Quentin Monnet <quentin@isovalent.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200511161536.29853-2-quentin@isovalent.com
---
 tools/bpf/bpftool/btf_dumper.c    | 4 ++--
 tools/bpf/bpftool/cfg.c           | 4 ++--
 tools/bpf/bpftool/main.h          | 3 +++
 tools/bpf/bpftool/map_perf_ring.c | 2 +-
 tools/bpf/bpftool/prog.c          | 2 +-
 5 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/tools/bpf/bpftool/btf_dumper.c b/tools/bpf/bpftool/btf_dumper.c
index 497807bec675..ede162f83eea 100644
--- a/tools/bpf/bpftool/btf_dumper.c
+++ b/tools/bpf/bpftool/btf_dumper.c
@@ -271,8 +271,8 @@ static void btf_int128_print(json_writer_t *jw, const void *data,
 	}
 }
 
-static void btf_int128_shift(__u64 *print_num, u16 left_shift_bits,
-			     u16 right_shift_bits)
+static void btf_int128_shift(__u64 *print_num, __u16 left_shift_bits,
+			     __u16 right_shift_bits)
 {
 	__u64 upper_num, lower_num;
 
diff --git a/tools/bpf/bpftool/cfg.c b/tools/bpf/bpftool/cfg.c
index 3e21f994f262..1951219a9af7 100644
--- a/tools/bpf/bpftool/cfg.c
+++ b/tools/bpf/bpftool/cfg.c
@@ -157,7 +157,7 @@ static bool cfg_partition_funcs(struct cfg *cfg, struct bpf_insn *cur,
 	return false;
 }
 
-static bool is_jmp_insn(u8 code)
+static bool is_jmp_insn(__u8 code)
 {
 	return BPF_CLASS(code) == BPF_JMP || BPF_CLASS(code) == BPF_JMP32;
 }
@@ -176,7 +176,7 @@ static bool func_partition_bb_head(struct func_node *func)
 
 	for (; cur <= end; cur++) {
 		if (is_jmp_insn(cur->code)) {
-			u8 opcode = BPF_OP(cur->code);
+			__u8 opcode = BPF_OP(cur->code);
 
 			if (opcode == BPF_EXIT || opcode == BPF_CALL)
 				continue;
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index a41cefabccaf..f89ac70ef973 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -18,6 +18,9 @@
 
 #include "json_writer.h"
 
+/* Make sure we do not use kernel-only integer typedefs */
+#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
+
 #define ptr_to_u64(ptr)	((__u64)(unsigned long)(ptr))
 
 #define NEXT_ARG()	({ argc--; argv++; if (argc < 0) usage(); })
diff --git a/tools/bpf/bpftool/map_perf_ring.c b/tools/bpf/bpftool/map_perf_ring.c
index d9b29c17fbb8..825f29f93a57 100644
--- a/tools/bpf/bpftool/map_perf_ring.c
+++ b/tools/bpf/bpftool/map_perf_ring.c
@@ -39,7 +39,7 @@ struct event_ring_info {
 
 struct perf_event_sample {
 	struct perf_event_header header;
-	u64 time;
+	__u64 time;
 	__u32 size;
 	unsigned char data[];
 };
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index f6a5974a7b0a..b6e5ba568f98 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -238,7 +238,7 @@ exit_free:
 	return fd;
 }
 
-static void show_prog_maps(int fd, u32 num_maps)
+static void show_prog_maps(int fd, __u32 num_maps)
 {
 	struct bpf_prog_info info = {};
 	__u32 len = sizeof(info);
-- 
cgit v1.2.3-59-g8ed1b


From c8caa0bb4b383a86a77f4c8727a4f7c7f9825260 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin@isovalent.com>
Date: Mon, 11 May 2020 17:15:34 +0100
Subject: tools, bpftool: Minor fixes for documentation

Bring minor improvements to bpftool documentation. Fix or harmonise
formatting, update map types (including in interactive help), improve
description for "map create", fix a build warning due to a missing line
after the double-colon for the "bpftool prog profile" example,
complete/harmonise/sort the list of related bpftool man pages in
footers.

v2:
- Remove (instead of changing) mark-up on "value" in bpftool-map.rst,
  when it does not refer to something passed on the command line.
- Fix an additional typo ("hexadeximal") in the same file.

Signed-off-by: Quentin Monnet <quentin@isovalent.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200511161536.29853-3-quentin@isovalent.com
---
 tools/bpf/bpftool/Documentation/bpftool-btf.rst    | 11 +++++--
 tools/bpf/bpftool/Documentation/bpftool-cgroup.rst | 12 ++++---
 .../bpf/bpftool/Documentation/bpftool-feature.rst  | 12 ++++---
 tools/bpf/bpftool/Documentation/bpftool-gen.rst    | 21 ++++++------
 tools/bpf/bpftool/Documentation/bpftool-iter.rst   | 12 +++----
 tools/bpf/bpftool/Documentation/bpftool-link.rst   |  9 ++++--
 tools/bpf/bpftool/Documentation/bpftool-map.rst    | 37 ++++++++++++++--------
 tools/bpf/bpftool/Documentation/bpftool-net.rst    | 12 ++++---
 tools/bpf/bpftool/Documentation/bpftool-perf.rst   | 12 ++++---
 tools/bpf/bpftool/Documentation/bpftool-prog.rst   | 23 ++++++++------
 .../bpftool/Documentation/bpftool-struct_ops.rst   | 11 ++++---
 tools/bpf/bpftool/Documentation/bpftool.rst        | 11 ++++---
 tools/bpf/bpftool/map.c                            |  3 +-
 13 files changed, 116 insertions(+), 70 deletions(-)

diff --git a/tools/bpf/bpftool/Documentation/bpftool-btf.rst b/tools/bpf/bpftool/Documentation/bpftool-btf.rst
index 39615f8e145b..ce3a724f50c1 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-btf.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-btf.rst
@@ -230,9 +230,14 @@ SEE ALSO
 	**bpf**\ (2),
 	**bpf-helpers**\ (7),
 	**bpftool**\ (8),
-	**bpftool-map**\ (8),
-	**bpftool-prog**\ (8),
+	**bpftool-btf**\ (8),
 	**bpftool-cgroup**\ (8),
 	**bpftool-feature**\ (8),
+	**bpftool-gen**\ (8),
+	**bpftool-iter**\ (8),
+	**bpftool-link**\ (8),
+	**bpftool-map**\ (8),
 	**bpftool-net**\ (8),
-	**bpftool-perf**\ (8)
+	**bpftool-perf**\ (8),
+	**bpftool-prog**\ (8),
+	**bpftool-struct_ops**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
index 06a28b07787d..e4d9da654e84 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
@@ -20,7 +20,7 @@ SYNOPSIS
 CGROUP COMMANDS
 ===============
 
-|	**bpftool** **cgroup { show | list }** *CGROUP* [**effective**]
+|	**bpftool** **cgroup** { **show** | **list** } *CGROUP* [**effective**]
 |	**bpftool** **cgroup tree** [*CGROUP_ROOT*] [**effective**]
 |	**bpftool** **cgroup attach** *CGROUP* *ATTACH_TYPE* *PROG* [*ATTACH_FLAGS*]
 |	**bpftool** **cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG*
@@ -160,9 +160,13 @@ SEE ALSO
 	**bpf**\ (2),
 	**bpf-helpers**\ (7),
 	**bpftool**\ (8),
-	**bpftool-prog**\ (8),
-	**bpftool-map**\ (8),
+	**bpftool-btf**\ (8),
 	**bpftool-feature**\ (8),
+	**bpftool-gen**\ (8),
+	**bpftool-iter**\ (8),
+	**bpftool-link**\ (8),
+	**bpftool-map**\ (8),
 	**bpftool-net**\ (8),
 	**bpftool-perf**\ (8),
-	**bpftool-btf**\ (8)
+	**bpftool-prog**\ (8),
+	**bpftool-struct_ops**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-feature.rst b/tools/bpf/bpftool/Documentation/bpftool-feature.rst
index 1fa755f55e0c..8609f06e71de 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-feature.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-feature.rst
@@ -28,7 +28,7 @@ DESCRIPTION
 ===========
 	**bpftool feature probe** [**kernel**] [**full**] [**macros** [**prefix** *PREFIX*]]
 		  Probe the running kernel and dump a number of eBPF-related
-		  parameters, such as availability of the **bpf()** system call,
+		  parameters, such as availability of the **bpf**\ () system call,
 		  JIT status, eBPF program types availability, eBPF helper
 		  functions availability, and more.
 
@@ -93,9 +93,13 @@ SEE ALSO
 	**bpf**\ (2),
 	**bpf-helpers**\ (7),
 	**bpftool**\ (8),
-	**bpftool-prog**\ (8),
-	**bpftool-map**\ (8),
+	**bpftool-btf**\ (8),
 	**bpftool-cgroup**\ (8),
+	**bpftool-gen**\ (8),
+	**bpftool-iter**\ (8),
+	**bpftool-link**\ (8),
+	**bpftool-map**\ (8),
 	**bpftool-net**\ (8),
 	**bpftool-perf**\ (8),
-	**bpftool-btf**\ (8)
+	**bpftool-prog**\ (8),
+	**bpftool-struct_ops**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-gen.rst b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
index 94d91322895a..df85dbd962c0 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-gen.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-gen.rst
@@ -14,7 +14,7 @@ SYNOPSIS
 
 	*OPTIONS* := { { **-j** | **--json** } [{ **-p** | **--pretty** }] }
 
-	*COMMAND* := { **skeleton | **help** }
+	*COMMAND* := { **skeleton** | **help** }
 
 GEN COMMANDS
 =============
@@ -36,12 +36,12 @@ DESCRIPTION
 		  etc. Skeleton eliminates the need to lookup mentioned
 		  components by name. Instead, if skeleton instantiation
 		  succeeds, they are populated in skeleton structure as valid
-		  libbpf types (e.g., struct bpf_map pointer) and can be
+		  libbpf types (e.g., **struct bpf_map** pointer) and can be
 		  passed to existing generic libbpf APIs.
 
 		  In addition to simple and reliable access to maps and
-		  programs, skeleton provides a storage for BPF links (struct
-		  bpf_link) for each BPF program within BPF object. When
+		  programs, skeleton provides a storage for BPF links (**struct
+		  bpf_link**) for each BPF program within BPF object. When
 		  requested, supported BPF programs will be automatically
 		  attached and resulting BPF links stored for further use by
 		  user in pre-allocated fields in skeleton struct. For BPF
@@ -82,14 +82,14 @@ DESCRIPTION
 
 		  - **example__open** and **example__open_opts**.
 		    These functions are used to instantiate skeleton. It
-		    corresponds to libbpf's **bpf_object__open()** API.
+		    corresponds to libbpf's **bpf_object__open**\ () API.
 		    **_opts** variants accepts extra **bpf_object_open_opts**
 		    options.
 
 		  - **example__load**.
 		    This function creates maps, loads and verifies BPF
 		    programs, initializes global data maps. It corresponds to
-		    libppf's **bpf_object__load** API.
+		    libppf's **bpf_object__load**\ () API.
 
 		  - **example__open_and_load** combines **example__open** and
 		    **example__load** invocations in one commonly used
@@ -296,10 +296,13 @@ SEE ALSO
 	**bpf**\ (2),
 	**bpf-helpers**\ (7),
 	**bpftool**\ (8),
-	**bpftool-map**\ (8),
-	**bpftool-prog**\ (8),
+	**bpftool-btf**\ (8),
 	**bpftool-cgroup**\ (8),
 	**bpftool-feature**\ (8),
+	**bpftool-iter**\ (8),
+	**bpftool-link**\ (8),
+	**bpftool-map**\ (8),
 	**bpftool-net**\ (8),
 	**bpftool-perf**\ (8),
-	**bpftool-btf**\ (8)
+	**bpftool-prog**\ (8),
+	**bpftool-struct_ops**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-iter.rst b/tools/bpf/bpftool/Documentation/bpftool-iter.rst
index 13b173d93890..8dce698eab79 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-iter.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-iter.rst
@@ -22,7 +22,6 @@ ITER COMMANDS
 |
 |	*OBJ* := /a/file/of/bpf_iter_target.o
 
-
 DESCRIPTION
 ===========
 	**bpftool iter pin** *OBJ* *PATH*
@@ -65,19 +64,18 @@ EXAMPLES
    Create a file-based bpf iterator from bpf_iter_netlink.o and pin it
    to /sys/fs/bpf/my_netlink
 
-
 SEE ALSO
 ========
 	**bpf**\ (2),
 	**bpf-helpers**\ (7),
 	**bpftool**\ (8),
-	**bpftool-prog**\ (8),
-	**bpftool-map**\ (8),
-	**bpftool-link**\ (8),
+	**bpftool-btf**\ (8),
 	**bpftool-cgroup**\ (8),
 	**bpftool-feature**\ (8),
+	**bpftool-gen**\ (8),
+	**bpftool-link**\ (8),
+	**bpftool-map**\ (8),
 	**bpftool-net**\ (8),
 	**bpftool-perf**\ (8),
-	**bpftool-btf**\ (8)
-	**bpftool-gen**\ (8)
+	**bpftool-prog**\ (8),
 	**bpftool-struct_ops**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-link.rst b/tools/bpf/bpftool/Documentation/bpftool-link.rst
index ee6500d6e6e4..0e43d7b06c11 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-link.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-link.rst
@@ -109,10 +109,13 @@ SEE ALSO
 	**bpf**\ (2),
 	**bpf-helpers**\ (7),
 	**bpftool**\ (8),
-	**bpftool-prog\ (8),
-	**bpftool-map**\ (8),
+	**bpftool-btf**\ (8),
 	**bpftool-cgroup**\ (8),
 	**bpftool-feature**\ (8),
+	**bpftool-gen**\ (8),
+	**bpftool-iter**\ (8),
+	**bpftool-map**\ (8),
 	**bpftool-net**\ (8),
 	**bpftool-perf**\ (8),
-	**bpftool-btf**\ (8)
+	**bpftool-prog**\ (8),
+	**bpftool-struct_ops**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-map.rst b/tools/bpf/bpftool/Documentation/bpftool-map.rst
index cdeae8ae90ba..31101643e57c 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-map.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-map.rst
@@ -21,7 +21,7 @@ SYNOPSIS
 MAP COMMANDS
 =============
 
-|	**bpftool** **map { show | list }**   [*MAP*]
+|	**bpftool** **map** { **show** | **list** }   [*MAP*]
 |	**bpftool** **map create**     *FILE* **type** *TYPE* **key** *KEY_SIZE* **value** *VALUE_SIZE* \
 |		**entries** *MAX_ENTRIES* **name** *NAME* [**flags** *FLAGS*] [**dev** *NAME*]
 |	**bpftool** **map dump**       *MAP*
@@ -49,7 +49,7 @@ MAP COMMANDS
 |		| **lru_percpu_hash** | **lpm_trie** | **array_of_maps** | **hash_of_maps**
 |		| **devmap** | **devmap_hash** | **sockmap** | **cpumap** | **xskmap** | **sockhash**
 |		| **cgroup_storage** | **reuseport_sockarray** | **percpu_cgroup_storage**
-|		| **queue** | **stack** }
+|		| **queue** | **stack** | **sk_storage** | **struct_ops** }
 
 DESCRIPTION
 ===========
@@ -66,6 +66,13 @@ DESCRIPTION
 		  Create a new map with given parameters and pin it to *bpffs*
 		  as *FILE*.
 
+		  *FLAGS* should be an integer which is the combination of
+		  desired flags, e.g. 1024 for **BPF_F_MMAPABLE** (see bpf.h
+		  UAPI header for existing flags).
+
+		  Keyword **dev** expects a network interface name, and is used
+		  to request hardware offload for the map.
+
 	**bpftool map dump**    *MAP*
 		  Dump all entries in a given *MAP*.  In case of **name**,
 		  *MAP* may match several maps which will all be dumped.
@@ -78,7 +85,7 @@ DESCRIPTION
 		  exists; **noexist** update only if entry doesn't exist.
 
 		  If the **hex** keyword is provided in front of the bytes
-		  sequence, the bytes are parsed as hexadeximal values, even if
+		  sequence, the bytes are parsed as hexadecimal values, even if
 		  no "0x" prefix is added. If the keyword is not provided, then
 		  the bytes are parsed as decimal values, unless a "0x" prefix
 		  (for hexadecimal) or a "0" prefix (for octal) is provided.
@@ -100,10 +107,10 @@ DESCRIPTION
 		  extensions of *bpffs*.
 
 	**bpftool** **map event_pipe** *MAP* [**cpu** *N* **index** *M*]
-		  Read events from a BPF_MAP_TYPE_PERF_EVENT_ARRAY map.
+		  Read events from a **BPF_MAP_TYPE_PERF_EVENT_ARRAY** map.
 
 		  Install perf rings into a perf event array map and dump
-		  output of any bpf_perf_event_output() call in the kernel.
+		  output of any **bpf_perf_event_output**\ () call in the kernel.
 		  By default read the number of CPUs on the system and
 		  install perf ring for each CPU in the corresponding index
 		  in the array.
@@ -116,24 +123,24 @@ DESCRIPTION
 		  receiving events if it installed its rings earlier.
 
 	**bpftool map peek**  *MAP*
-		  Peek next **value** in the queue or stack.
+		  Peek next value in the queue or stack.
 
 	**bpftool map push**  *MAP* **value** *VALUE*
-		  Push **value** onto the stack.
+		  Push *VALUE* onto the stack.
 
 	**bpftool map pop**  *MAP*
-		  Pop and print **value** from the stack.
+		  Pop and print value from the stack.
 
 	**bpftool map enqueue**  *MAP* **value** *VALUE*
-		  Enqueue **value** into the queue.
+		  Enqueue *VALUE* into the queue.
 
 	**bpftool map dequeue**  *MAP*
-		  Dequeue and print **value** from the queue.
+		  Dequeue and print value from the queue.
 
 	**bpftool map freeze**  *MAP*
 		  Freeze the map as read-only from user space. Entries from a
 		  frozen map can not longer be updated or deleted with the
-		  **bpf\ ()** system call. This operation is not reversible,
+		  **bpf**\ () system call. This operation is not reversible,
 		  and the map remains immutable from user space until its
 		  destruction. However, read and write permissions for BPF
 		  programs to the map remain unchanged.
@@ -269,9 +276,13 @@ SEE ALSO
 	**bpf**\ (2),
 	**bpf-helpers**\ (7),
 	**bpftool**\ (8),
-	**bpftool-prog**\ (8),
+	**bpftool-btf**\ (8),
 	**bpftool-cgroup**\ (8),
 	**bpftool-feature**\ (8),
+	**bpftool-gen**\ (8),
+	**bpftool-iter**\ (8),
+	**bpftool-link**\ (8),
 	**bpftool-net**\ (8),
 	**bpftool-perf**\ (8),
-	**bpftool-btf**\ (8)
+	**bpftool-prog**\ (8),
+	**bpftool-struct_ops**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-net.rst b/tools/bpf/bpftool/Documentation/bpftool-net.rst
index 8651b00b81ea..aa7450736179 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-net.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-net.rst
@@ -20,7 +20,7 @@ SYNOPSIS
 NET COMMANDS
 ============
 
-|	**bpftool** **net { show | list }** [ **dev** *NAME* ]
+|	**bpftool** **net** { **show** | **list** } [ **dev** *NAME* ]
 |	**bpftool** **net attach** *ATTACH_TYPE* *PROG* **dev** *NAME* [ **overwrite** ]
 |	**bpftool** **net detach** *ATTACH_TYPE* **dev** *NAME*
 |	**bpftool** **net help**
@@ -194,9 +194,13 @@ SEE ALSO
 	**bpf**\ (2),
 	**bpf-helpers**\ (7),
 	**bpftool**\ (8),
-	**bpftool-prog**\ (8),
-	**bpftool-map**\ (8),
+	**bpftool-btf**\ (8),
 	**bpftool-cgroup**\ (8),
 	**bpftool-feature**\ (8),
+	**bpftool-gen**\ (8),
+	**bpftool-iter**\ (8),
+	**bpftool-link**\ (8),
+	**bpftool-map**\ (8),
 	**bpftool-perf**\ (8),
-	**bpftool-btf**\ (8)
+	**bpftool-prog**\ (8),
+	**bpftool-struct_ops**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-perf.rst b/tools/bpf/bpftool/Documentation/bpftool-perf.rst
index e252bd0bc434..9c592b7c6775 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-perf.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-perf.rst
@@ -20,7 +20,7 @@ SYNOPSIS
 PERF COMMANDS
 =============
 
-|	**bpftool** **perf { show | list }**
+|	**bpftool** **perf** { **show** | **list** }
 |	**bpftool** **perf help**
 
 DESCRIPTION
@@ -85,9 +85,13 @@ SEE ALSO
 	**bpf**\ (2),
 	**bpf-helpers**\ (7),
 	**bpftool**\ (8),
-	**bpftool-prog**\ (8),
-	**bpftool-map**\ (8),
+	**bpftool-btf**\ (8),
 	**bpftool-cgroup**\ (8),
 	**bpftool-feature**\ (8),
+	**bpftool-gen**\ (8),
+	**bpftool-iter**\ (8),
+	**bpftool-link**\ (8),
+	**bpftool-map**\ (8),
 	**bpftool-net**\ (8),
-	**bpftool-btf**\ (8)
+	**bpftool-prog**\ (8),
+	**bpftool-struct_ops**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index 9f19404f470e..5948e9d89c8d 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -21,11 +21,11 @@ SYNOPSIS
 PROG COMMANDS
 =============
 
-|	**bpftool** **prog { show | list }** [*PROG*]
+|	**bpftool** **prog** { **show** | **list** } [*PROG*]
 |	**bpftool** **prog dump xlated** *PROG* [{**file** *FILE* | **opcodes** | **visual** | **linum**}]
 |	**bpftool** **prog dump jited**  *PROG* [{**file** *FILE* | **opcodes** | **linum**}]
 |	**bpftool** **prog pin** *PROG* *FILE*
-|	**bpftool** **prog { load | loadall }** *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] [**pinmaps** *MAP_DIR*]
+|	**bpftool** **prog** { **load** | **loadall** } *OBJ* *PATH* [**type** *TYPE*] [**map** {**idx** *IDX* | **name** *NAME*} *MAP*] [**dev** *NAME*] [**pinmaps** *MAP_DIR*]
 |	**bpftool** **prog attach** *PROG* *ATTACH_TYPE* [*MAP*]
 |	**bpftool** **prog detach** *PROG* *ATTACH_TYPE* [*MAP*]
 |	**bpftool** **prog tracelog**
@@ -49,7 +49,7 @@ PROG COMMANDS
 |       *ATTACH_TYPE* := {
 |		**msg_verdict** | **stream_verdict** | **stream_parser** | **flow_dissector**
 |	}
-|	*METRIC* := {
+|	*METRICs* := {
 |		**cycles** | **instructions** | **l1d_loads** | **llc_misses**
 |	}
 
@@ -155,7 +155,7 @@ DESCRIPTION
 	**bpftool prog tracelog**
 		  Dump the trace pipe of the system to the console (stdout).
 		  Hit <Ctrl+C> to stop printing. BPF programs can write to this
-		  trace pipe at runtime with the **bpf_trace_printk()** helper.
+		  trace pipe at runtime with the **bpf_trace_printk**\ () helper.
 		  This should be used only for debugging purposes. For
 		  streaming data from BPF programs to user space, one can use
 		  perf events (see also **bpftool-map**\ (8)).
@@ -195,9 +195,9 @@ DESCRIPTION
 
 	**bpftool prog profile** *PROG* [**duration** *DURATION*] *METRICs*
 		  Profile *METRICs* for bpf program *PROG* for *DURATION*
-		  seconds or until user hits Ctrl-C. *DURATION* is optional.
+		  seconds or until user hits <Ctrl+C>. *DURATION* is optional.
 		  If *DURATION* is not specified, the profiling will run up to
-		  UINT_MAX seconds.
+		  **UINT_MAX** seconds.
 
 	**bpftool prog help**
 		  Print short help message.
@@ -267,7 +267,7 @@ EXAMPLES
 
 |
 | **# bpftool prog dump xlated id 10 file /tmp/t**
-| **# ls -l /tmp/t**
+| **$ ls -l /tmp/t**
 
 ::
 
@@ -325,6 +325,7 @@ EXAMPLES
 | **# bpftool prog profile id 337 duration 10 cycles instructions llc_misses**
 
 ::
+
          51397 run_cnt
       40176203 cycles                                                 (83.05%)
       42518139 instructions    #   1.06 insns per cycle               (83.39%)
@@ -335,9 +336,13 @@ SEE ALSO
 	**bpf**\ (2),
 	**bpf-helpers**\ (7),
 	**bpftool**\ (8),
-	**bpftool-map**\ (8),
+	**bpftool-btf**\ (8),
 	**bpftool-cgroup**\ (8),
 	**bpftool-feature**\ (8),
+	**bpftool-gen**\ (8),
+	**bpftool-iter**\ (8),
+	**bpftool-link**\ (8),
+	**bpftool-map**\ (8),
 	**bpftool-net**\ (8),
 	**bpftool-perf**\ (8),
-	**bpftool-btf**\ (8)
+	**bpftool-struct_ops**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst b/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst
index f045cc89dd6d..d93cd1cb8b0f 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-struct_ops.rst
@@ -105,12 +105,13 @@ SEE ALSO
 	**bpf**\ (2),
 	**bpf-helpers**\ (7),
 	**bpftool**\ (8),
-	**bpftool-prog**\ (8),
-	**bpftool-map**\ (8),
+	**bpftool-btf**\ (8),
 	**bpftool-cgroup**\ (8),
 	**bpftool-feature**\ (8),
+	**bpftool-gen**\ (8),
+	**bpftool-iter**\ (8),
+	**bpftool-link**\ (8),
+	**bpftool-map**\ (8),
 	**bpftool-net**\ (8),
 	**bpftool-perf**\ (8),
-	**bpftool-btf**\ (8)
-	**bpftool-gen**\ (8)
-	
+	**bpftool-prog**\ (8)
diff --git a/tools/bpf/bpftool/Documentation/bpftool.rst b/tools/bpf/bpftool/Documentation/bpftool.rst
index 34239fda69ed..420d4d5df8b6 100644
--- a/tools/bpf/bpftool/Documentation/bpftool.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool.rst
@@ -75,11 +75,14 @@ SEE ALSO
 ========
 	**bpf**\ (2),
 	**bpf-helpers**\ (7),
-	**bpftool-prog**\ (8),
-	**bpftool-map**\ (8),
+	**bpftool-btf**\ (8),
 	**bpftool-cgroup**\ (8),
 	**bpftool-feature**\ (8),
+	**bpftool-gen**\ (8),
+	**bpftool-iter**\ (8),
+	**bpftool-link**\ (8),
+	**bpftool-map**\ (8),
 	**bpftool-net**\ (8),
 	**bpftool-perf**\ (8),
-	**bpftool-btf**\ (8),
-	**bpftool-gen**\ (8),
+	**bpftool-prog**\ (8),
+	**bpftool-struct_ops**\ (8)
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index 693a632f6813..85cbe9a19170 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -1589,7 +1589,8 @@ static int do_help(int argc, char **argv)
 		"                 percpu_array | stack_trace | cgroup_array | lru_hash |\n"
 		"                 lru_percpu_hash | lpm_trie | array_of_maps | hash_of_maps |\n"
 		"                 devmap | devmap_hash | sockmap | cpumap | xskmap | sockhash |\n"
-		"                 cgroup_storage | reuseport_sockarray | percpu_cgroup_storage }\n"
+		"                 cgroup_storage | reuseport_sockarray | percpu_cgroup_storage |\n"
+		"                 queue | stack | sk_storage | struct_ops }\n"
 		"       " HELP_SPEC_OPTIONS "\n"
 		"",
 		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
-- 
cgit v1.2.3-59-g8ed1b


From ab8d78093dfa2e7820ca0c28dda9142aa771c510 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin@isovalent.com>
Date: Mon, 11 May 2020 17:15:35 +0100
Subject: bpf: Minor fixes to BPF helpers documentation

Minor improvements to the documentation for BPF helpers:

* Fix formatting for the description of "bpf_socket" for
  bpf_getsockopt() and bpf_setsockopt(), thus suppressing two warnings
  from rst2man about "Unexpected indentation".
* Fix formatting for return values for bpf_sk_assign() and seq_file
  helpers.
* Fix and harmonise formatting, in particular for function/struct names.
* Remove blank lines before "Return:" sections.
* Replace tabs found in the middle of text lines.
* Fix typos.
* Add a note to the footer (in Python script) about "bpftool feature
  probe", including for listing features available to unprivileged
  users, and add a reference to bpftool man page.

Thanks to Florian for reporting two typos (duplicated words).

Signed-off-by: Quentin Monnet <quentin@isovalent.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200511161536.29853-4-quentin@isovalent.com
---
 include/uapi/linux/bpf.h   | 109 ++++++++++++++++++++++++---------------------
 scripts/bpf_helpers_doc.py |   6 +++
 2 files changed, 65 insertions(+), 50 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 9d1932e23cec..bfb31c1be219 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -675,8 +675,8 @@ union bpf_attr {
  * 		For tracing programs, safely attempt to read *size* bytes from
  * 		kernel space address *unsafe_ptr* and store the data in *dst*.
  *
- * 		Generally, use bpf_probe_read_user() or bpf_probe_read_kernel()
- * 		instead.
+ * 		Generally, use **bpf_probe_read_user**\ () or
+ * 		**bpf_probe_read_kernel**\ () instead.
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
@@ -684,7 +684,7 @@ union bpf_attr {
  * 	Description
  * 		Return the time elapsed since system boot, in nanoseconds.
  * 		Does not include time the system was suspended.
- * 		See: clock_gettime(CLOCK_MONOTONIC)
+ * 		See: **clock_gettime**\ (**CLOCK_MONOTONIC**)
  * 	Return
  * 		Current *ktime*.
  *
@@ -1543,11 +1543,11 @@ union bpf_attr {
  * int bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr)
  * 	Description
  * 		Copy a NUL terminated string from an unsafe kernel address
- * 		*unsafe_ptr* to *dst*. See bpf_probe_read_kernel_str() for
+ * 		*unsafe_ptr* to *dst*. See **bpf_probe_read_kernel_str**\ () for
  * 		more details.
  *
- * 		Generally, use bpf_probe_read_user_str() or bpf_probe_read_kernel_str()
- * 		instead.
+ * 		Generally, use **bpf_probe_read_user_str**\ () or
+ * 		**bpf_probe_read_kernel_str**\ () instead.
  * 	Return
  * 		On success, the strictly positive length of the string,
  * 		including the trailing NUL character. On error, a negative
@@ -1575,7 +1575,7 @@ union bpf_attr {
  *
  * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx)
  * 	Description
- * 		Equivalent to bpf_get_socket_cookie() helper that accepts
+ * 		Equivalent to **bpf_get_socket_cookie**\ () helper that accepts
  * 		*skb*, but gets socket from **struct bpf_sock_ops** context.
  * 	Return
  * 		A 8-byte long non-decreasing number.
@@ -1604,6 +1604,7 @@ union bpf_attr {
  * 		The option value of length *optlen* is pointed by *optval*.
  *
  * 		*bpf_socket* should be one of the following:
+ *
  * 		* **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**.
  * 		* **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
  * 		  and **BPF_CGROUP_INET6_CONNECT**.
@@ -1672,12 +1673,12 @@ union bpf_attr {
  *
  * 		The lower two bits of *flags* are used as the return code if
  * 		the map lookup fails. This is so that the return value can be
- * 		one of the XDP program return codes up to XDP_TX, as chosen by
- * 		the caller. Any higher bits in the *flags* argument must be
+ * 		one of the XDP program return codes up to **XDP_TX**, as chosen
+ * 		by the caller. Any higher bits in the *flags* argument must be
  * 		unset.
  *
- * 		See also bpf_redirect(), which only supports redirecting to an
- * 		ifindex, but doesn't require a map to do so.
+ * 		See also **bpf_redirect**\ (), which only supports redirecting
+ * 		to an ifindex, but doesn't require a map to do so.
  * 	Return
  * 		**XDP_REDIRECT** on success, or the value of the two lower bits
  * 		of the *flags* argument on error.
@@ -1785,7 +1786,7 @@ union bpf_attr {
  * 		the time running for event since last normalization. The
  * 		enabled and running times are accumulated since the perf event
  * 		open. To achieve scaling factor between two invocations of an
- * 		eBPF program, users can can use CPU id as the key (which is
+ * 		eBPF program, users can use CPU id as the key (which is
  * 		typical for perf array usage model) to remember the previous
  * 		value and do the calculation inside the eBPF program.
  * 	Return
@@ -1812,6 +1813,7 @@ union bpf_attr {
  * 		*opval* and of length *optlen*.
  *
  * 		*bpf_socket* should be one of the following:
+ *
  * 		* **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**.
  * 		* **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
  * 		  and **BPF_CGROUP_INET6_CONNECT**.
@@ -1833,7 +1835,7 @@ union bpf_attr {
  * 		The first argument is the context *regs* on which the kprobe
  * 		works.
  *
- * 		This helper works by setting setting the PC (program counter)
+ * 		This helper works by setting the PC (program counter)
  * 		to an override function which is run in place of the original
  * 		probed function. This means the probed function is not run at
  * 		all. The replacement function just returns with the required
@@ -2300,7 +2302,7 @@ union bpf_attr {
  *		**bpf_rc_keydown**\ () again with the same values, or calling
  *		**bpf_rc_repeat**\ ().
  *
- *		Some protocols include a toggle bit, in case the button	was
+ *		Some protocols include a toggle bit, in case the button was
  *		released and pressed again between consecutive scancodes.
  *
  *		The *ctx* should point to the lirc sample as passed into
@@ -2646,7 +2648,6 @@ union bpf_attr {
  *
  * 		*th* points to the start of the TCP header, while *th_len*
  * 		contains **sizeof**\ (**struct tcphdr**).
- *
  * 	Return
  * 		0 if *iph* and *th* are a valid SYN cookie ACK, or a negative
  * 		error otherwise.
@@ -2829,7 +2830,6 @@ union bpf_attr {
  *
  *		*th* points to the start of the TCP header, while *th_len*
  *		contains the length of the TCP header.
- *
  *	Return
  *		On success, lower 32 bits hold the generated SYN cookie in
  *		followed by 16 bits which hold the MSS value for that cookie,
@@ -2912,7 +2912,7 @@ union bpf_attr {
  * 				// size, after checking its boundaries.
  * 			}
  *
- * 		In comparison, using **bpf_probe_read_user()** helper here
+ * 		In comparison, using **bpf_probe_read_user**\ () helper here
  * 		instead to read the string would require to estimate the length
  * 		at compile time, and would often result in copying more memory
  * 		than necessary.
@@ -2930,14 +2930,14 @@ union bpf_attr {
  * int bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr)
  * 	Description
  * 		Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr*
- * 		to *dst*. Same semantics as with bpf_probe_read_user_str() apply.
+ * 		to *dst*. Same semantics as with **bpf_probe_read_user_str**\ () apply.
  * 	Return
- * 		On success, the strictly positive length of the string,	including
+ * 		On success, the strictly positive length of the string, including
  * 		the trailing NUL character. On error, a negative value.
  *
  * int bpf_tcp_send_ack(void *tp, u32 rcv_nxt)
  *	Description
- *		Send out a tcp-ack. *tp* is the in-kernel struct tcp_sock.
+ *		Send out a tcp-ack. *tp* is the in-kernel struct **tcp_sock**.
  *		*rcv_nxt* is the ack_seq to be sent out.
  *	Return
  *		0 on success, or a negative error in case of failure.
@@ -2965,19 +2965,19 @@ union bpf_attr {
  * int bpf_read_branch_records(struct bpf_perf_event_data *ctx, void *buf, u32 size, u64 flags)
  *	Description
  *		For an eBPF program attached to a perf event, retrieve the
- *		branch records (struct perf_branch_entry) associated to *ctx*
- *		and store it in	the buffer pointed by *buf* up to size
+ *		branch records (**struct perf_branch_entry**) associated to *ctx*
+ *		and store it in the buffer pointed by *buf* up to size
  *		*size* bytes.
  *	Return
  *		On success, number of bytes written to *buf*. On error, a
  *		negative value.
  *
  *		The *flags* can be set to **BPF_F_GET_BRANCH_RECORDS_SIZE** to
- *		instead	return the number of bytes required to store all the
+ *		instead return the number of bytes required to store all the
  *		branch entries. If this flag is set, *buf* may be NULL.
  *
  *		**-EINVAL** if arguments invalid or **size** not a multiple
- *		of sizeof(struct perf_branch_entry).
+ *		of **sizeof**\ (**struct perf_branch_entry**\ ).
  *
  *		**-ENOENT** if architecture does not support branch records.
  *
@@ -2985,8 +2985,8 @@ union bpf_attr {
  *	Description
  *		Returns 0 on success, values for *pid* and *tgid* as seen from the current
  *		*namespace* will be returned in *nsdata*.
- *
- *		On failure, the returned value is one of the following:
+ *	Return
+ *		0 on success, or one of the following in case of failure:
  *
  *		**-EINVAL** if dev and inum supplied don't match dev_t and inode number
  *              with nsfs of current task, or if dev conversion to dev_t lost high bits.
@@ -3025,8 +3025,8 @@ union bpf_attr {
  * 		a global identifier that can be assumed unique. If *ctx* is
  * 		NULL, then the helper returns the cookie for the initial
  * 		network namespace. The cookie itself is very similar to that
- * 		of bpf_get_socket_cookie() helper, but for network namespaces
- * 		instead of sockets.
+ * 		of **bpf_get_socket_cookie**\ () helper, but for network
+ * 		namespaces instead of sockets.
  * 	Return
  * 		A 8-byte long opaque number.
  *
@@ -3061,57 +3061,66 @@ union bpf_attr {
  *
  *		The *flags* argument must be zero.
  *	Return
- *		0 on success, or a negative errno in case of failure.
+ *		0 on success, or a negative error in case of failure:
  *
- *		* **-EINVAL**		Unsupported flags specified.
- *		* **-ENOENT**		Socket is unavailable for assignment.
- *		* **-ENETUNREACH**	Socket is unreachable (wrong netns).
- *		* **-EOPNOTSUPP**	Unsupported operation, for example a
- *					call from outside of TC ingress.
- *		* **-ESOCKTNOSUPPORT**	Socket type not supported (reuseport).
+ *		**-EINVAL** if specified *flags* are not supported.
+ *
+ *		**-ENOENT** if the socket is unavailable for assignment.
+ *
+ *		**-ENETUNREACH** if the socket is unreachable (wrong netns).
+ *
+ *		**-EOPNOTSUPP** if the operation is not supported, for example
+ *		a call from outside of TC ingress.
+ *
+ *		**-ESOCKTNOSUPPORT** if the socket type is not supported
+ *		(reuseport).
  *
  * u64 bpf_ktime_get_boot_ns(void)
  * 	Description
  * 		Return the time elapsed since system boot, in nanoseconds.
  * 		Does include the time the system was suspended.
- * 		See: clock_gettime(CLOCK_BOOTTIME)
+ * 		See: **clock_gettime**\ (**CLOCK_BOOTTIME**)
  * 	Return
  * 		Current *ktime*.
  *
  * int bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len)
  * 	Description
- * 		seq_printf uses seq_file seq_printf() to print out the format string.
+ * 		**bpf_seq_printf**\ () uses seq_file **seq_printf**\ () to print
+ * 		out the format string.
  * 		The *m* represents the seq_file. The *fmt* and *fmt_size* are for
  * 		the format string itself. The *data* and *data_len* are format string
- * 		arguments. The *data* are a u64 array and corresponding format string
+ * 		arguments. The *data* are a **u64** array and corresponding format string
  * 		values are stored in the array. For strings and pointers where pointees
  * 		are accessed, only the pointer values are stored in the *data* array.
- * 		The *data_len* is the *data* size in term of bytes.
+ * 		The *data_len* is the size of *data* in bytes.
  *
  *		Formats **%s**, **%p{i,I}{4,6}** requires to read kernel memory.
  *		Reading kernel memory may fail due to either invalid address or
  *		valid address but requiring a major memory fault. If reading kernel memory
  *		fails, the string for **%s** will be an empty string, and the ip
  *		address for **%p{i,I}{4,6}** will be 0. Not returning error to
- *		bpf program is consistent with what bpf_trace_printk() does for now.
+ *		bpf program is consistent with what **bpf_trace_printk**\ () does for now.
  * 	Return
- * 		0 on success, or a negative errno in case of failure.
+ * 		0 on success, or a negative error in case of failure:
+ *
+ *		**-EBUSY** if per-CPU memory copy buffer is busy, can try again
+ *		by returning 1 from bpf program.
+ *
+ *		**-EINVAL** if arguments are invalid, or if *fmt* is invalid/unsupported.
+ *
+ *		**-E2BIG** if *fmt* contains too many format specifiers.
  *
- *		* **-EBUSY**		Percpu memory copy buffer is busy, can try again
- *					by returning 1 from bpf program.
- *		* **-EINVAL**		Invalid arguments, or invalid/unsupported formats.
- *		* **-E2BIG**		Too many format specifiers.
- *		* **-EOVERFLOW**	Overflow happens, the same object will be tried again.
+ *		**-EOVERFLOW** if an overflow happened: The same object will be tried again.
  *
  * int bpf_seq_write(struct seq_file *m, const void *data, u32 len)
  * 	Description
- * 		seq_write uses seq_file seq_write() to write the data.
+ * 		**bpf_seq_write**\ () uses seq_file **seq_write**\ () to write the data.
  * 		The *m* represents the seq_file. The *data* and *len* represent the
- *		data to write in bytes.
+ * 		data to write in bytes.
  * 	Return
- * 		0 on success, or a negative errno in case of failure.
+ * 		0 on success, or a negative error in case of failure:
  *
- *		* **-EOVERFLOW**	Overflow happens, the same object will be tried again.
+ *		**-EOVERFLOW** if an overflow happened: The same object will be tried again.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
diff --git a/scripts/bpf_helpers_doc.py b/scripts/bpf_helpers_doc.py
index ded304c96a05..91fa668fa860 100755
--- a/scripts/bpf_helpers_doc.py
+++ b/scripts/bpf_helpers_doc.py
@@ -318,6 +318,11 @@ may be interested in:
   of eBPF maps are used with a given helper function.
 * *kernel/bpf/* directory contains other files in which additional helpers are
   defined (for cgroups, sockmaps, etc.).
+* The bpftool utility can be used to probe the availability of helper functions
+  on the system (as well as supported program and map types, and a number of
+  other parameters). To do so, run **bpftool feature probe** (see
+  **bpftool-feature**\ (8) for details). Add the **unprivileged** keyword to
+  list features available to unprivileged users.
 
 Compatibility between helper functions and program types can generally be found
 in the files where helper functions are defined. Look for the **struct
@@ -338,6 +343,7 @@ SEE ALSO
 ========
 
 **bpf**\ (2),
+**bpftool**\ (8),
 **cgroups**\ (7),
 **ip**\ (8),
 **perf_event_open**\ (2),
-- 
cgit v1.2.3-59-g8ed1b


From ff20460e94af5d11ebffd9d97c1eaa00e520ecbe Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin@isovalent.com>
Date: Mon, 11 May 2020 17:15:36 +0100
Subject: tools, bpf: Synchronise BPF UAPI header with tools

Synchronise the bpf.h header under tools, to report the fixes recently
brought to the documentation for the BPF helpers.

Signed-off-by: Quentin Monnet <quentin@isovalent.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200511161536.29853-5-quentin@isovalent.com
---
 tools/include/uapi/linux/bpf.h | 109 ++++++++++++++++++++++-------------------
 1 file changed, 59 insertions(+), 50 deletions(-)

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 9d1932e23cec..bfb31c1be219 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -675,8 +675,8 @@ union bpf_attr {
  * 		For tracing programs, safely attempt to read *size* bytes from
  * 		kernel space address *unsafe_ptr* and store the data in *dst*.
  *
- * 		Generally, use bpf_probe_read_user() or bpf_probe_read_kernel()
- * 		instead.
+ * 		Generally, use **bpf_probe_read_user**\ () or
+ * 		**bpf_probe_read_kernel**\ () instead.
  * 	Return
  * 		0 on success, or a negative error in case of failure.
  *
@@ -684,7 +684,7 @@ union bpf_attr {
  * 	Description
  * 		Return the time elapsed since system boot, in nanoseconds.
  * 		Does not include time the system was suspended.
- * 		See: clock_gettime(CLOCK_MONOTONIC)
+ * 		See: **clock_gettime**\ (**CLOCK_MONOTONIC**)
  * 	Return
  * 		Current *ktime*.
  *
@@ -1543,11 +1543,11 @@ union bpf_attr {
  * int bpf_probe_read_str(void *dst, u32 size, const void *unsafe_ptr)
  * 	Description
  * 		Copy a NUL terminated string from an unsafe kernel address
- * 		*unsafe_ptr* to *dst*. See bpf_probe_read_kernel_str() for
+ * 		*unsafe_ptr* to *dst*. See **bpf_probe_read_kernel_str**\ () for
  * 		more details.
  *
- * 		Generally, use bpf_probe_read_user_str() or bpf_probe_read_kernel_str()
- * 		instead.
+ * 		Generally, use **bpf_probe_read_user_str**\ () or
+ * 		**bpf_probe_read_kernel_str**\ () instead.
  * 	Return
  * 		On success, the strictly positive length of the string,
  * 		including the trailing NUL character. On error, a negative
@@ -1575,7 +1575,7 @@ union bpf_attr {
  *
  * u64 bpf_get_socket_cookie(struct bpf_sock_ops *ctx)
  * 	Description
- * 		Equivalent to bpf_get_socket_cookie() helper that accepts
+ * 		Equivalent to **bpf_get_socket_cookie**\ () helper that accepts
  * 		*skb*, but gets socket from **struct bpf_sock_ops** context.
  * 	Return
  * 		A 8-byte long non-decreasing number.
@@ -1604,6 +1604,7 @@ union bpf_attr {
  * 		The option value of length *optlen* is pointed by *optval*.
  *
  * 		*bpf_socket* should be one of the following:
+ *
  * 		* **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**.
  * 		* **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
  * 		  and **BPF_CGROUP_INET6_CONNECT**.
@@ -1672,12 +1673,12 @@ union bpf_attr {
  *
  * 		The lower two bits of *flags* are used as the return code if
  * 		the map lookup fails. This is so that the return value can be
- * 		one of the XDP program return codes up to XDP_TX, as chosen by
- * 		the caller. Any higher bits in the *flags* argument must be
+ * 		one of the XDP program return codes up to **XDP_TX**, as chosen
+ * 		by the caller. Any higher bits in the *flags* argument must be
  * 		unset.
  *
- * 		See also bpf_redirect(), which only supports redirecting to an
- * 		ifindex, but doesn't require a map to do so.
+ * 		See also **bpf_redirect**\ (), which only supports redirecting
+ * 		to an ifindex, but doesn't require a map to do so.
  * 	Return
  * 		**XDP_REDIRECT** on success, or the value of the two lower bits
  * 		of the *flags* argument on error.
@@ -1785,7 +1786,7 @@ union bpf_attr {
  * 		the time running for event since last normalization. The
  * 		enabled and running times are accumulated since the perf event
  * 		open. To achieve scaling factor between two invocations of an
- * 		eBPF program, users can can use CPU id as the key (which is
+ * 		eBPF program, users can use CPU id as the key (which is
  * 		typical for perf array usage model) to remember the previous
  * 		value and do the calculation inside the eBPF program.
  * 	Return
@@ -1812,6 +1813,7 @@ union bpf_attr {
  * 		*opval* and of length *optlen*.
  *
  * 		*bpf_socket* should be one of the following:
+ *
  * 		* **struct bpf_sock_ops** for **BPF_PROG_TYPE_SOCK_OPS**.
  * 		* **struct bpf_sock_addr** for **BPF_CGROUP_INET4_CONNECT**
  * 		  and **BPF_CGROUP_INET6_CONNECT**.
@@ -1833,7 +1835,7 @@ union bpf_attr {
  * 		The first argument is the context *regs* on which the kprobe
  * 		works.
  *
- * 		This helper works by setting setting the PC (program counter)
+ * 		This helper works by setting the PC (program counter)
  * 		to an override function which is run in place of the original
  * 		probed function. This means the probed function is not run at
  * 		all. The replacement function just returns with the required
@@ -2300,7 +2302,7 @@ union bpf_attr {
  *		**bpf_rc_keydown**\ () again with the same values, or calling
  *		**bpf_rc_repeat**\ ().
  *
- *		Some protocols include a toggle bit, in case the button	was
+ *		Some protocols include a toggle bit, in case the button was
  *		released and pressed again between consecutive scancodes.
  *
  *		The *ctx* should point to the lirc sample as passed into
@@ -2646,7 +2648,6 @@ union bpf_attr {
  *
  * 		*th* points to the start of the TCP header, while *th_len*
  * 		contains **sizeof**\ (**struct tcphdr**).
- *
  * 	Return
  * 		0 if *iph* and *th* are a valid SYN cookie ACK, or a negative
  * 		error otherwise.
@@ -2829,7 +2830,6 @@ union bpf_attr {
  *
  *		*th* points to the start of the TCP header, while *th_len*
  *		contains the length of the TCP header.
- *
  *	Return
  *		On success, lower 32 bits hold the generated SYN cookie in
  *		followed by 16 bits which hold the MSS value for that cookie,
@@ -2912,7 +2912,7 @@ union bpf_attr {
  * 				// size, after checking its boundaries.
  * 			}
  *
- * 		In comparison, using **bpf_probe_read_user()** helper here
+ * 		In comparison, using **bpf_probe_read_user**\ () helper here
  * 		instead to read the string would require to estimate the length
  * 		at compile time, and would often result in copying more memory
  * 		than necessary.
@@ -2930,14 +2930,14 @@ union bpf_attr {
  * int bpf_probe_read_kernel_str(void *dst, u32 size, const void *unsafe_ptr)
  * 	Description
  * 		Copy a NUL terminated string from an unsafe kernel address *unsafe_ptr*
- * 		to *dst*. Same semantics as with bpf_probe_read_user_str() apply.
+ * 		to *dst*. Same semantics as with **bpf_probe_read_user_str**\ () apply.
  * 	Return
- * 		On success, the strictly positive length of the string,	including
+ * 		On success, the strictly positive length of the string, including
  * 		the trailing NUL character. On error, a negative value.
  *
  * int bpf_tcp_send_ack(void *tp, u32 rcv_nxt)
  *	Description
- *		Send out a tcp-ack. *tp* is the in-kernel struct tcp_sock.
+ *		Send out a tcp-ack. *tp* is the in-kernel struct **tcp_sock**.
  *		*rcv_nxt* is the ack_seq to be sent out.
  *	Return
  *		0 on success, or a negative error in case of failure.
@@ -2965,19 +2965,19 @@ union bpf_attr {
  * int bpf_read_branch_records(struct bpf_perf_event_data *ctx, void *buf, u32 size, u64 flags)
  *	Description
  *		For an eBPF program attached to a perf event, retrieve the
- *		branch records (struct perf_branch_entry) associated to *ctx*
- *		and store it in	the buffer pointed by *buf* up to size
+ *		branch records (**struct perf_branch_entry**) associated to *ctx*
+ *		and store it in the buffer pointed by *buf* up to size
  *		*size* bytes.
  *	Return
  *		On success, number of bytes written to *buf*. On error, a
  *		negative value.
  *
  *		The *flags* can be set to **BPF_F_GET_BRANCH_RECORDS_SIZE** to
- *		instead	return the number of bytes required to store all the
+ *		instead return the number of bytes required to store all the
  *		branch entries. If this flag is set, *buf* may be NULL.
  *
  *		**-EINVAL** if arguments invalid or **size** not a multiple
- *		of sizeof(struct perf_branch_entry).
+ *		of **sizeof**\ (**struct perf_branch_entry**\ ).
  *
  *		**-ENOENT** if architecture does not support branch records.
  *
@@ -2985,8 +2985,8 @@ union bpf_attr {
  *	Description
  *		Returns 0 on success, values for *pid* and *tgid* as seen from the current
  *		*namespace* will be returned in *nsdata*.
- *
- *		On failure, the returned value is one of the following:
+ *	Return
+ *		0 on success, or one of the following in case of failure:
  *
  *		**-EINVAL** if dev and inum supplied don't match dev_t and inode number
  *              with nsfs of current task, or if dev conversion to dev_t lost high bits.
@@ -3025,8 +3025,8 @@ union bpf_attr {
  * 		a global identifier that can be assumed unique. If *ctx* is
  * 		NULL, then the helper returns the cookie for the initial
  * 		network namespace. The cookie itself is very similar to that
- * 		of bpf_get_socket_cookie() helper, but for network namespaces
- * 		instead of sockets.
+ * 		of **bpf_get_socket_cookie**\ () helper, but for network
+ * 		namespaces instead of sockets.
  * 	Return
  * 		A 8-byte long opaque number.
  *
@@ -3061,57 +3061,66 @@ union bpf_attr {
  *
  *		The *flags* argument must be zero.
  *	Return
- *		0 on success, or a negative errno in case of failure.
+ *		0 on success, or a negative error in case of failure:
  *
- *		* **-EINVAL**		Unsupported flags specified.
- *		* **-ENOENT**		Socket is unavailable for assignment.
- *		* **-ENETUNREACH**	Socket is unreachable (wrong netns).
- *		* **-EOPNOTSUPP**	Unsupported operation, for example a
- *					call from outside of TC ingress.
- *		* **-ESOCKTNOSUPPORT**	Socket type not supported (reuseport).
+ *		**-EINVAL** if specified *flags* are not supported.
+ *
+ *		**-ENOENT** if the socket is unavailable for assignment.
+ *
+ *		**-ENETUNREACH** if the socket is unreachable (wrong netns).
+ *
+ *		**-EOPNOTSUPP** if the operation is not supported, for example
+ *		a call from outside of TC ingress.
+ *
+ *		**-ESOCKTNOSUPPORT** if the socket type is not supported
+ *		(reuseport).
  *
  * u64 bpf_ktime_get_boot_ns(void)
  * 	Description
  * 		Return the time elapsed since system boot, in nanoseconds.
  * 		Does include the time the system was suspended.
- * 		See: clock_gettime(CLOCK_BOOTTIME)
+ * 		See: **clock_gettime**\ (**CLOCK_BOOTTIME**)
  * 	Return
  * 		Current *ktime*.
  *
  * int bpf_seq_printf(struct seq_file *m, const char *fmt, u32 fmt_size, const void *data, u32 data_len)
  * 	Description
- * 		seq_printf uses seq_file seq_printf() to print out the format string.
+ * 		**bpf_seq_printf**\ () uses seq_file **seq_printf**\ () to print
+ * 		out the format string.
  * 		The *m* represents the seq_file. The *fmt* and *fmt_size* are for
  * 		the format string itself. The *data* and *data_len* are format string
- * 		arguments. The *data* are a u64 array and corresponding format string
+ * 		arguments. The *data* are a **u64** array and corresponding format string
  * 		values are stored in the array. For strings and pointers where pointees
  * 		are accessed, only the pointer values are stored in the *data* array.
- * 		The *data_len* is the *data* size in term of bytes.
+ * 		The *data_len* is the size of *data* in bytes.
  *
  *		Formats **%s**, **%p{i,I}{4,6}** requires to read kernel memory.
  *		Reading kernel memory may fail due to either invalid address or
  *		valid address but requiring a major memory fault. If reading kernel memory
  *		fails, the string for **%s** will be an empty string, and the ip
  *		address for **%p{i,I}{4,6}** will be 0. Not returning error to
- *		bpf program is consistent with what bpf_trace_printk() does for now.
+ *		bpf program is consistent with what **bpf_trace_printk**\ () does for now.
  * 	Return
- * 		0 on success, or a negative errno in case of failure.
+ * 		0 on success, or a negative error in case of failure:
+ *
+ *		**-EBUSY** if per-CPU memory copy buffer is busy, can try again
+ *		by returning 1 from bpf program.
+ *
+ *		**-EINVAL** if arguments are invalid, or if *fmt* is invalid/unsupported.
+ *
+ *		**-E2BIG** if *fmt* contains too many format specifiers.
  *
- *		* **-EBUSY**		Percpu memory copy buffer is busy, can try again
- *					by returning 1 from bpf program.
- *		* **-EINVAL**		Invalid arguments, or invalid/unsupported formats.
- *		* **-E2BIG**		Too many format specifiers.
- *		* **-EOVERFLOW**	Overflow happens, the same object will be tried again.
+ *		**-EOVERFLOW** if an overflow happened: The same object will be tried again.
  *
  * int bpf_seq_write(struct seq_file *m, const void *data, u32 len)
  * 	Description
- * 		seq_write uses seq_file seq_write() to write the data.
+ * 		**bpf_seq_write**\ () uses seq_file **seq_write**\ () to write the data.
  * 		The *m* represents the seq_file. The *data* and *len* represent the
- *		data to write in bytes.
+ * 		data to write in bytes.
  * 	Return
- * 		0 on success, or a negative errno in case of failure.
+ * 		0 on success, or a negative error in case of failure:
  *
- *		* **-EOVERFLOW**	Overflow happens, the same object will be tried again.
+ *		**-EOVERFLOW** if an overflow happened: The same object will be tried again.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
-- 
cgit v1.2.3-59-g8ed1b


From 0fa39d6dd0478b080a420aa764280e1a3bdb0cee Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Thu, 7 May 2020 14:02:16 -0500
Subject: ipv6: Replace zero-length array with flexible-array

The current codebase makes use of the zero-length array language
extension to the C90 standard, but the preferred mechanism to declare
variable-length types such as these ones is a flexible array member[1][2],
introduced in C99:

struct foo {
        int stuff;
        struct boo array[];
};

By making use of the mechanism above, we will get a compiler warning
in case the flexible array does not occur last in the structure, which
will help us prevent some kind of undefined behavior bugs from being
inadvertently introduced[3] to the codebase from now on.

Also, notice that, dynamic memory allocations won't be affected by
this change:

"Flexible array members have incomplete type, and so the sizeof operator
may not be applied. As a quirk of the original implementation of
zero-length arrays, sizeof evaluates to zero."[1]

sizeof(flexible-array-member) triggers a warning because flexible array
members have incomplete type[1]. There are some instances of code in
which the sizeof operator is being incorrectly/erroneously applied to
zero-length arrays and the result is zero. Such instances may be hiding
some bugs. So, this work (flexible-array member conversions) will also
help to get completely rid of those sorts of issues.

This issue was found with the help of Coccinelle.

[1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html
[2] https://github.com/KSPP/linux/issues/21
[3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour")

Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/if_inet6.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/net/if_inet6.h b/include/net/if_inet6.h
index 212eb278bda6..8bf5906073bc 100644
--- a/include/net/if_inet6.h
+++ b/include/net/if_inet6.h
@@ -78,7 +78,7 @@ struct inet6_ifaddr {
 struct ip6_sf_socklist {
 	unsigned int		sl_max;
 	unsigned int		sl_count;
-	struct in6_addr		sl_addr[0];
+	struct in6_addr		sl_addr[];
 };
 
 #define IP6_SFLSIZE(count)	(sizeof(struct ip6_sf_socklist) + \
-- 
cgit v1.2.3-59-g8ed1b


From c2dfc7d2a9be9a14897eab6dd27eeea1bd4ea79b Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Thu, 7 May 2020 14:01:33 -0500
Subject: net: atarilance: Replace zero-length array with flexible-array

The current codebase makes use of the zero-length array language
extension to the C90 standard, but the preferred mechanism to declare
variable-length types such as these ones is a flexible array member[1][2],
introduced in C99:

struct foo {
        int stuff;
        struct boo array[];
};

By making use of the mechanism above, we will get a compiler warning
in case the flexible array does not occur last in the structure, which
will help us prevent some kind of undefined behavior bugs from being
inadvertently introduced[3] to the codebase from now on.

Also, notice that, dynamic memory allocations won't be affected by
this change:

"Flexible array members have incomplete type, and so the sizeof operator
may not be applied. As a quirk of the original implementation of
zero-length arrays, sizeof evaluates to zero."[1]

sizeof(flexible-array-member) triggers a warning because flexible array
members have incomplete type[1]. There are some instances of code in
which the sizeof operator is being incorrectly/erroneously applied to
zero-length arrays and the result is zero. Such instances may be hiding
some bugs. So, this work (flexible-array member conversions) will also
help to get completely rid of those sorts of issues.

This issue was found with the help of Coccinelle.

[1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html
[2] https://github.com/KSPP/linux/issues/21
[3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour")

Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amd/atarilance.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/amd/atarilance.c b/drivers/net/ethernet/amd/atarilance.c
index 4e36122609a3..961796abab35 100644
--- a/drivers/net/ethernet/amd/atarilance.c
+++ b/drivers/net/ethernet/amd/atarilance.c
@@ -156,7 +156,7 @@ struct lance_memory {
 	struct lance_init_block	init;
 	struct lance_tx_head	tx_head[TX_RING_SIZE];
 	struct lance_rx_head	rx_head[RX_RING_SIZE];
-	char					packet_area[0];	/* packet data follow after the
+	char					packet_area[];	/* packet data follow after the
 											 * init block and the ring
 											 * descriptors and are located
 											 * at runtime */
-- 
cgit v1.2.3-59-g8ed1b


From 9c8255c888bac9221739c822132b405d4196bdd8 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Thu, 7 May 2020 14:25:07 -0500
Subject: team: Replace zero-length array with flexible-array

The current codebase makes use of the zero-length array language
extension to the C90 standard, but the preferred mechanism to declare
variable-length types such as these ones is a flexible array member[1][2],
introduced in C99:

struct foo {
        int stuff;
        struct boo array[];
};

By making use of the mechanism above, we will get a compiler warning
in case the flexible array does not occur last in the structure, which
will help us prevent some kind of undefined behavior bugs from being
inadvertently introduced[3] to the codebase from now on.

Also, notice that, dynamic memory allocations won't be affected by
this change:

"Flexible array members have incomplete type, and so the sizeof operator
may not be applied. As a quirk of the original implementation of
zero-length arrays, sizeof evaluates to zero."[1]

sizeof(flexible-array-member) triggers a warning because flexible array
members have incomplete type[1]. There are some instances of code in
which the sizeof operator is being incorrectly/erroneously applied to
zero-length arrays and the result is zero. Such instances may be hiding
some bugs. So, this work (flexible-array member conversions) will also
help to get completely rid of those sorts of issues.

This issue was found with the help of Coccinelle.

[1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html
[2] https://github.com/KSPP/linux/issues/21
[3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour")

Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/if_team.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/if_team.h b/include/linux/if_team.h
index 537dc2b8c879..add607943c95 100644
--- a/include/linux/if_team.h
+++ b/include/linux/if_team.h
@@ -67,7 +67,7 @@ struct team_port {
 	u16 queue_id;
 	struct list_head qom_list; /* node in queue override mapping list */
 	struct rcu_head	rcu;
-	long mode_priv[0];
+	long mode_priv[];
 };
 
 static inline struct team_port *team_port_get_rcu(const struct net_device *dev)
-- 
cgit v1.2.3-59-g8ed1b


From 01f2b3dac8c4bebeb0ec15c4b7b59993766493cc Mon Sep 17 00:00:00 2001
From: Luo bin <luobin9@huawei.com>
Date: Mon, 11 May 2020 05:58:57 +0000
Subject: hinic: add link_ksettings ethtool_ops support

add set_link_ksettings implementation and improve the implementation
of get_link_ksettings

Signed-off-by: Luo bin <luobin9@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/huawei/hinic/hinic_ethtool.c | 420 +++++++++++++++++++++-
 drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c  |  13 +
 drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h  |  19 +
 drivers/net/ethernet/huawei/hinic/hinic_hw_if.h   |   2 +-
 drivers/net/ethernet/huawei/hinic/hinic_port.c    | 129 +++++++
 drivers/net/ethernet/huawei/hinic/hinic_port.h    | 108 ++++++
 6 files changed, 682 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c b/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c
index 966aea949c0b..b426eeced069 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c
@@ -33,6 +33,99 @@
 #include "hinic_rx.h"
 #include "hinic_dev.h"
 
+#define SET_LINK_STR_MAX_LEN	128
+
+#define GET_SUPPORTED_MODE	0
+#define GET_ADVERTISED_MODE	1
+
+#define ETHTOOL_ADD_SUPPORTED_SPEED_LINK_MODE(ecmd, mode)	\
+		((ecmd)->supported |=	\
+		(1UL << hw_to_ethtool_link_mode_table[mode].link_mode_bit))
+#define ETHTOOL_ADD_ADVERTISED_SPEED_LINK_MODE(ecmd, mode)	\
+		((ecmd)->advertising |=	\
+		(1UL << hw_to_ethtool_link_mode_table[mode].link_mode_bit))
+#define ETHTOOL_ADD_SUPPORTED_LINK_MODE(ecmd, mode)	\
+				((ecmd)->supported |= SUPPORTED_##mode)
+#define ETHTOOL_ADD_ADVERTISED_LINK_MODE(ecmd, mode)	\
+				((ecmd)->advertising |= ADVERTISED_##mode)
+
+struct hw2ethtool_link_mode {
+	enum ethtool_link_mode_bit_indices link_mode_bit;
+	u32 speed;
+	enum hinic_link_mode hw_link_mode;
+};
+
+struct cmd_link_settings {
+	u64	supported;
+	u64	advertising;
+
+	u32	speed;
+	u8	duplex;
+	u8	port;
+	u8	autoneg;
+};
+
+static u32 hw_to_ethtool_speed[LINK_SPEED_LEVELS] = {
+	SPEED_10, SPEED_100,
+	SPEED_1000, SPEED_10000,
+	SPEED_25000, SPEED_40000,
+	SPEED_100000
+};
+
+static struct hw2ethtool_link_mode
+	hw_to_ethtool_link_mode_table[HINIC_LINK_MODE_NUMBERS] = {
+	{
+		.link_mode_bit = ETHTOOL_LINK_MODE_10000baseKR_Full_BIT,
+		.speed = SPEED_10000,
+		.hw_link_mode = HINIC_10GE_BASE_KR,
+	},
+	{
+		.link_mode_bit = ETHTOOL_LINK_MODE_40000baseKR4_Full_BIT,
+		.speed = SPEED_40000,
+		.hw_link_mode = HINIC_40GE_BASE_KR4,
+	},
+	{
+		.link_mode_bit = ETHTOOL_LINK_MODE_40000baseCR4_Full_BIT,
+		.speed = SPEED_40000,
+		.hw_link_mode = HINIC_40GE_BASE_CR4,
+	},
+	{
+		.link_mode_bit = ETHTOOL_LINK_MODE_100000baseKR4_Full_BIT,
+		.speed = SPEED_100000,
+		.hw_link_mode = HINIC_100GE_BASE_KR4,
+	},
+	{
+		.link_mode_bit = ETHTOOL_LINK_MODE_100000baseCR4_Full_BIT,
+		.speed = SPEED_100000,
+		.hw_link_mode = HINIC_100GE_BASE_CR4,
+	},
+	{
+		.link_mode_bit = ETHTOOL_LINK_MODE_25000baseKR_Full_BIT,
+		.speed = SPEED_25000,
+		.hw_link_mode = HINIC_25GE_BASE_KR_S,
+	},
+	{
+		.link_mode_bit = ETHTOOL_LINK_MODE_25000baseCR_Full_BIT,
+		.speed = SPEED_25000,
+		.hw_link_mode = HINIC_25GE_BASE_CR_S,
+	},
+	{
+		.link_mode_bit = ETHTOOL_LINK_MODE_25000baseKR_Full_BIT,
+		.speed = SPEED_25000,
+		.hw_link_mode = HINIC_25GE_BASE_KR,
+	},
+	{
+		.link_mode_bit = ETHTOOL_LINK_MODE_25000baseCR_Full_BIT,
+		.speed = SPEED_25000,
+		.hw_link_mode = HINIC_25GE_BASE_CR,
+	},
+	{
+		.link_mode_bit = ETHTOOL_LINK_MODE_1000baseKX_Full_BIT,
+		.speed = SPEED_1000,
+		.hw_link_mode = HINIC_GE_BASE_KX,
+	},
+};
+
 static void set_link_speed(struct ethtool_link_ksettings *link_ksettings,
 			   enum hinic_speed speed)
 {
@@ -71,18 +164,91 @@ static void set_link_speed(struct ethtool_link_ksettings *link_ksettings,
 	}
 }
 
+static int hinic_get_link_mode_index(enum hinic_link_mode link_mode)
+{
+	int i = 0;
+
+	for (i = 0; i < HINIC_LINK_MODE_NUMBERS; i++) {
+		if (link_mode == hw_to_ethtool_link_mode_table[i].hw_link_mode)
+			break;
+	}
+
+	return i;
+}
+
+static void hinic_add_ethtool_link_mode(struct cmd_link_settings *link_settings,
+					enum hinic_link_mode hw_link_mode,
+					u32 name)
+{
+	enum hinic_link_mode link_mode;
+	int idx = 0;
+
+	for (link_mode = 0; link_mode < HINIC_LINK_MODE_NUMBERS; link_mode++) {
+		if (hw_link_mode & ((u32)1 << link_mode)) {
+			idx = hinic_get_link_mode_index(link_mode);
+			if (idx >= HINIC_LINK_MODE_NUMBERS)
+				continue;
+
+			if (name == GET_SUPPORTED_MODE)
+				ETHTOOL_ADD_SUPPORTED_SPEED_LINK_MODE
+					(link_settings, idx);
+			else
+				ETHTOOL_ADD_ADVERTISED_SPEED_LINK_MODE
+					(link_settings, idx);
+		}
+	}
+}
+
+static void hinic_link_port_type(struct cmd_link_settings *link_settings,
+				 enum hinic_port_type port_type)
+{
+	switch (port_type) {
+	case HINIC_PORT_ELEC:
+	case HINIC_PORT_TP:
+		ETHTOOL_ADD_SUPPORTED_LINK_MODE(link_settings, TP);
+		ETHTOOL_ADD_ADVERTISED_LINK_MODE(link_settings, TP);
+		link_settings->port = PORT_TP;
+		break;
+
+	case HINIC_PORT_AOC:
+	case HINIC_PORT_FIBRE:
+		ETHTOOL_ADD_SUPPORTED_LINK_MODE(link_settings, FIBRE);
+		ETHTOOL_ADD_ADVERTISED_LINK_MODE(link_settings, FIBRE);
+		link_settings->port = PORT_FIBRE;
+		break;
+
+	case HINIC_PORT_COPPER:
+		ETHTOOL_ADD_SUPPORTED_LINK_MODE(link_settings, FIBRE);
+		ETHTOOL_ADD_ADVERTISED_LINK_MODE(link_settings, FIBRE);
+		link_settings->port = PORT_DA;
+		break;
+
+	case HINIC_PORT_BACKPLANE:
+		ETHTOOL_ADD_SUPPORTED_LINK_MODE(link_settings, Backplane);
+		ETHTOOL_ADD_ADVERTISED_LINK_MODE(link_settings, Backplane);
+		link_settings->port = PORT_NONE;
+		break;
+
+	default:
+		link_settings->port = PORT_OTHER;
+		break;
+	}
+}
+
 static int hinic_get_link_ksettings(struct net_device *netdev,
 				    struct ethtool_link_ksettings
 				    *link_ksettings)
 {
 	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	struct hinic_link_mode_cmd link_mode = { 0 };
+	struct hinic_pause_config pause_info = { 0 };
+	struct cmd_link_settings settings = { 0 };
 	enum hinic_port_link_state link_state;
 	struct hinic_port_cap port_cap;
 	int err;
 
+	ethtool_link_ksettings_zero_link_mode(link_ksettings, supported);
 	ethtool_link_ksettings_zero_link_mode(link_ksettings, advertising);
-	ethtool_link_ksettings_add_link_mode(link_ksettings, supported,
-					     Autoneg);
 
 	link_ksettings->base.speed = SPEED_UNKNOWN;
 	link_ksettings->base.autoneg = AUTONEG_DISABLE;
@@ -92,14 +258,19 @@ static int hinic_get_link_ksettings(struct net_device *netdev,
 	if (err)
 		return err;
 
+	hinic_link_port_type(&settings, port_cap.port_type);
+	link_ksettings->base.port = settings.port;
+
 	err = hinic_port_link_state(nic_dev, &link_state);
 	if (err)
 		return err;
 
-	if (link_state != HINIC_LINK_STATE_UP)
-		return err;
-
-	set_link_speed(link_ksettings, port_cap.speed);
+	if (link_state == HINIC_LINK_STATE_UP) {
+		set_link_speed(link_ksettings, port_cap.speed);
+		link_ksettings->base.duplex =
+			(port_cap.duplex == HINIC_DUPLEX_FULL) ?
+			DUPLEX_FULL : DUPLEX_HALF;
+	}
 
 	if (!!(port_cap.autoneg_cap & HINIC_AUTONEG_SUPPORTED))
 		ethtool_link_ksettings_add_link_mode(link_ksettings,
@@ -108,11 +279,243 @@ static int hinic_get_link_ksettings(struct net_device *netdev,
 	if (port_cap.autoneg_state == HINIC_AUTONEG_ACTIVE)
 		link_ksettings->base.autoneg = AUTONEG_ENABLE;
 
-	link_ksettings->base.duplex = (port_cap.duplex == HINIC_DUPLEX_FULL) ?
-					   DUPLEX_FULL : DUPLEX_HALF;
+	err = hinic_get_link_mode(nic_dev->hwdev, &link_mode);
+	if (err || link_mode.supported == HINIC_SUPPORTED_UNKNOWN ||
+	    link_mode.advertised == HINIC_SUPPORTED_UNKNOWN)
+		return -EIO;
+
+	hinic_add_ethtool_link_mode(&settings, link_mode.supported,
+				    GET_SUPPORTED_MODE);
+	hinic_add_ethtool_link_mode(&settings, link_mode.advertised,
+				    GET_ADVERTISED_MODE);
+
+	if (!HINIC_IS_VF(nic_dev->hwdev->hwif)) {
+		err = hinic_get_hw_pause_info(nic_dev->hwdev, &pause_info);
+		if (err)
+			return err;
+		ETHTOOL_ADD_SUPPORTED_LINK_MODE(&settings, Pause);
+		if (pause_info.rx_pause && pause_info.tx_pause) {
+			ETHTOOL_ADD_ADVERTISED_LINK_MODE(&settings, Pause);
+		} else if (pause_info.tx_pause) {
+			ETHTOOL_ADD_ADVERTISED_LINK_MODE(&settings, Asym_Pause);
+		} else if (pause_info.rx_pause) {
+			ETHTOOL_ADD_ADVERTISED_LINK_MODE(&settings, Pause);
+			ETHTOOL_ADD_ADVERTISED_LINK_MODE(&settings, Asym_Pause);
+		}
+	}
+
+	bitmap_copy(link_ksettings->link_modes.supported,
+		    (unsigned long *)&settings.supported,
+		    __ETHTOOL_LINK_MODE_MASK_NBITS);
+	bitmap_copy(link_ksettings->link_modes.advertising,
+		    (unsigned long *)&settings.advertising,
+		    __ETHTOOL_LINK_MODE_MASK_NBITS);
+
 	return 0;
 }
 
+static int hinic_ethtool_to_hw_speed_level(u32 speed)
+{
+	int i;
+
+	for (i = 0; i < LINK_SPEED_LEVELS; i++) {
+		if (hw_to_ethtool_speed[i] == speed)
+			break;
+	}
+
+	return i;
+}
+
+static bool hinic_is_support_speed(enum hinic_link_mode supported_link,
+				   u32 speed)
+{
+	enum hinic_link_mode link_mode;
+	int idx;
+
+	for (link_mode = 0; link_mode < HINIC_LINK_MODE_NUMBERS; link_mode++) {
+		if (!(supported_link & ((u32)1 << link_mode)))
+			continue;
+
+		idx = hinic_get_link_mode_index(link_mode);
+		if (idx >= HINIC_LINK_MODE_NUMBERS)
+			continue;
+
+		if (hw_to_ethtool_link_mode_table[idx].speed == speed)
+			return true;
+	}
+
+	return false;
+}
+
+static bool hinic_is_speed_legal(struct hinic_dev *nic_dev, u32 speed)
+{
+	struct hinic_link_mode_cmd link_mode = { 0 };
+	struct net_device *netdev = nic_dev->netdev;
+	enum nic_speed_level speed_level = 0;
+	int err;
+
+	err = hinic_get_link_mode(nic_dev->hwdev, &link_mode);
+	if (err)
+		return false;
+
+	if (link_mode.supported == HINIC_SUPPORTED_UNKNOWN ||
+	    link_mode.advertised == HINIC_SUPPORTED_UNKNOWN)
+		return false;
+
+	speed_level = hinic_ethtool_to_hw_speed_level(speed);
+	if (speed_level >= LINK_SPEED_LEVELS ||
+	    !hinic_is_support_speed(link_mode.supported, speed)) {
+		netif_err(nic_dev, drv, netdev,
+			  "Unsupported speed: %d\n", speed);
+		return false;
+	}
+
+	return true;
+}
+
+static int get_link_settings_type(struct hinic_dev *nic_dev,
+				  u8 autoneg, u32 speed, u32 *set_settings)
+{
+	struct hinic_port_cap port_cap = { 0 };
+	int err;
+
+	err = hinic_port_get_cap(nic_dev, &port_cap);
+	if (err)
+		return err;
+
+	/* always set autonegotiation */
+	if (port_cap.autoneg_cap)
+		*set_settings |= HILINK_LINK_SET_AUTONEG;
+
+	if (autoneg == AUTONEG_ENABLE) {
+		if (!port_cap.autoneg_cap) {
+			netif_err(nic_dev, drv, nic_dev->netdev, "Not support autoneg\n");
+			return -EOPNOTSUPP;
+		}
+	} else if (speed != (u32)SPEED_UNKNOWN) {
+		/* set speed only when autoneg is disabled */
+		if (!hinic_is_speed_legal(nic_dev, speed))
+			return -EINVAL;
+		*set_settings |= HILINK_LINK_SET_SPEED;
+	} else {
+		netif_err(nic_dev, drv, nic_dev->netdev, "Need to set speed when autoneg is off\n");
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int set_link_settings_separate_cmd(struct hinic_dev *nic_dev,
+					  u32 set_settings, u8 autoneg,
+					  u32 speed)
+{
+	enum nic_speed_level speed_level = 0;
+	int err = 0;
+
+	if (set_settings & HILINK_LINK_SET_AUTONEG) {
+		err = hinic_set_autoneg(nic_dev->hwdev,
+					(autoneg == AUTONEG_ENABLE));
+		if (err)
+			netif_err(nic_dev, drv, nic_dev->netdev, "%s autoneg failed\n",
+				  (autoneg == AUTONEG_ENABLE) ?
+				  "Enable" : "Disable");
+		else
+			netif_info(nic_dev, drv, nic_dev->netdev, "%s autoneg successfully\n",
+				   (autoneg == AUTONEG_ENABLE) ?
+				   "Enable" : "Disable");
+	}
+
+	if (!err && (set_settings & HILINK_LINK_SET_SPEED)) {
+		speed_level = hinic_ethtool_to_hw_speed_level(speed);
+		err = hinic_set_speed(nic_dev->hwdev, speed_level);
+		if (err)
+			netif_err(nic_dev, drv, nic_dev->netdev, "Set speed %d failed\n",
+				  speed);
+		else
+			netif_info(nic_dev, drv, nic_dev->netdev, "Set speed %d successfully\n",
+				   speed);
+	}
+
+	return err;
+}
+
+static int hinic_set_settings_to_hw(struct hinic_dev *nic_dev,
+				    u32 set_settings, u8 autoneg, u32 speed)
+{
+	struct hinic_link_ksettings_info settings = {0};
+	char set_link_str[SET_LINK_STR_MAX_LEN] = {0};
+	struct net_device *netdev = nic_dev->netdev;
+	enum nic_speed_level speed_level = 0;
+	int err;
+
+	err = snprintf(set_link_str, SET_LINK_STR_MAX_LEN, "%s",
+		       (set_settings & HILINK_LINK_SET_AUTONEG) ?
+		       (autoneg ? "autong enable " : "autong disable ") : "");
+	if (err < 0 || err >= SET_LINK_STR_MAX_LEN) {
+		netif_err(nic_dev, drv, netdev, "Failed to snprintf link state, function return(%d) and dest_len(%d)\n",
+			  err, SET_LINK_STR_MAX_LEN);
+		return -EFAULT;
+	}
+
+	if (set_settings & HILINK_LINK_SET_SPEED) {
+		speed_level = hinic_ethtool_to_hw_speed_level(speed);
+		err = snprintf(set_link_str, SET_LINK_STR_MAX_LEN,
+			       "%sspeed %d ", set_link_str, speed);
+		if (err <= 0 || err >= SET_LINK_STR_MAX_LEN) {
+			netif_err(nic_dev, drv, netdev, "Failed to snprintf link speed, function return(%d) and dest_len(%d)\n",
+				  err, SET_LINK_STR_MAX_LEN);
+			return -EFAULT;
+		}
+	}
+
+	settings.func_id = HINIC_HWIF_FUNC_IDX(nic_dev->hwdev->hwif);
+	settings.valid_bitmap = set_settings;
+	settings.autoneg = autoneg;
+	settings.speed = speed_level;
+
+	err = hinic_set_link_settings(nic_dev->hwdev, &settings);
+	if (err != HINIC_MGMT_CMD_UNSUPPORTED) {
+		if (err)
+			netif_err(nic_dev, drv, netdev, "Set %s failed\n",
+				  set_link_str);
+		else
+			netif_info(nic_dev, drv, netdev, "Set %s successfully\n",
+				   set_link_str);
+
+		return err;
+	}
+
+	return set_link_settings_separate_cmd(nic_dev, set_settings, autoneg,
+					      speed);
+}
+
+static int set_link_settings(struct net_device *netdev, u8 autoneg, u32 speed)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	u32 set_settings = 0;
+	int err;
+
+	err = get_link_settings_type(nic_dev, autoneg, speed, &set_settings);
+	if (err)
+		return err;
+
+	if (set_settings)
+		err = hinic_set_settings_to_hw(nic_dev, set_settings,
+					       autoneg, speed);
+	else
+		netif_info(nic_dev, drv, netdev, "Nothing changed, exit without setting anything\n");
+
+	return err;
+}
+
+static int hinic_set_link_ksettings(struct net_device *netdev, const struct
+				    ethtool_link_ksettings *link_settings)
+{
+	/* only support to set autoneg and speed */
+	return set_link_settings(netdev, link_settings->base.autoneg,
+				 link_settings->base.speed);
+}
+
 static void hinic_get_drvinfo(struct net_device *netdev,
 			      struct ethtool_drvinfo *info)
 {
@@ -741,6 +1144,7 @@ static void hinic_get_strings(struct net_device *netdev,
 
 static const struct ethtool_ops hinic_ethtool_ops = {
 	.get_link_ksettings = hinic_get_link_ksettings,
+	.set_link_ksettings = hinic_set_link_ksettings,
 	.get_drvinfo = hinic_get_drvinfo,
 	.get_link = ethtool_op_get_link,
 	.get_ringparam = hinic_get_ringparam,
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
index 1ce8b8d572cf..2879b0445eba 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
@@ -219,6 +219,19 @@ int hinic_port_msg_cmd(struct hinic_hwdev *hwdev, enum hinic_port_cmd cmd,
 				 HINIC_MGMT_MSG_SYNC);
 }
 
+int hinic_hilink_msg_cmd(struct hinic_hwdev *hwdev, enum hinic_hilink_cmd cmd,
+			 void *buf_in, u16 in_size, void *buf_out,
+			 u16 *out_size)
+{
+	struct hinic_pfhwdev *pfhwdev;
+
+	pfhwdev = container_of(hwdev, struct hinic_pfhwdev, hwdev);
+
+	return hinic_msg_to_mgmt(&pfhwdev->pf_to_mgmt, HINIC_MOD_HILINK, cmd,
+				 buf_in, in_size, buf_out, out_size,
+				 HINIC_MGMT_MSG_SYNC);
+}
+
 /**
  * init_fw_ctxt- Init Firmware tables before network mgmt and io operations
  * @hwdev: the NIC HW device
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
index c8f62a024a58..ce57914bef72 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
@@ -54,6 +54,9 @@ enum hinic_port_cmd {
 
 	HINIC_PORT_CMD_SET_RX_MODE      = 12,
 
+	HINIC_PORT_CMD_GET_PAUSE_INFO	= 20,
+	HINIC_PORT_CMD_SET_PAUSE_INFO	= 21,
+
 	HINIC_PORT_CMD_GET_LINK_STATE   = 24,
 
 	HINIC_PORT_CMD_SET_LRO		= 25,
@@ -116,11 +119,23 @@ enum hinic_port_cmd {
 
 	HINIC_PORT_CMD_GET_CAP          = 170,
 
+	HINIC_PORT_CMD_GET_LINK_MODE	= 217,
+
+	HINIC_PORT_CMD_SET_SPEED	= 218,
+
+	HINIC_PORT_CMD_SET_AUTONEG	= 219,
+
 	HINIC_PORT_CMD_SET_LRO_TIMER	= 244,
 
 	HINIC_PORT_CMD_SET_VF_MAX_MIN_RATE = 249,
 };
 
+/* cmd of mgmt CPU message for HILINK module */
+enum hinic_hilink_cmd {
+	HINIC_HILINK_CMD_GET_LINK_INFO		= 0x3,
+	HINIC_HILINK_CMD_SET_LINK_SETTINGS	= 0x8,
+};
+
 enum hinic_ucode_cmd {
 	HINIC_UCODE_CMD_MODIFY_QUEUE_CONTEXT    = 0,
 	HINIC_UCODE_CMD_CLEAN_QUEUE_CONTEXT,
@@ -328,6 +343,10 @@ int hinic_port_msg_cmd(struct hinic_hwdev *hwdev, enum hinic_port_cmd cmd,
 		       void *buf_in, u16 in_size, void *buf_out,
 		       u16 *out_size);
 
+int hinic_hilink_msg_cmd(struct hinic_hwdev *hwdev, enum hinic_hilink_cmd cmd,
+			 void *buf_in, u16 in_size, void *buf_out,
+			 u16 *out_size);
+
 int hinic_hwdev_ifup(struct hinic_hwdev *hwdev);
 
 void hinic_hwdev_ifdown(struct hinic_hwdev *hwdev);
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h
index 5bb6ec4dcb7c..0872e035faa1 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_if.h
@@ -192,7 +192,7 @@ enum hinic_mod_type {
 	HINIC_MOD_COMM  = 0,    /* HW communication module */
 	HINIC_MOD_L2NIC = 1,    /* L2NIC module */
 	HINIC_MOD_CFGM  = 7,    /* Configuration module */
-
+	HINIC_MOD_HILINK = 14,  /* Hilink module */
 	HINIC_MOD_MAX   = 15
 };
 
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_port.c b/drivers/net/ethernet/huawei/hinic/hinic_port.c
index 714d8279c591..2edb6127f9fb 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_port.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_port.c
@@ -1072,3 +1072,132 @@ int hinic_get_mgmt_version(struct hinic_dev *nic_dev, u8 *mgmt_ver)
 
 	return 0;
 }
+
+int hinic_get_link_mode(struct hinic_hwdev *hwdev,
+			struct hinic_link_mode_cmd *link_mode)
+{
+	u16 out_size;
+	int err;
+
+	if (!hwdev || !link_mode)
+		return -EINVAL;
+
+	out_size = sizeof(*link_mode);
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_GET_LINK_MODE,
+				 link_mode, sizeof(*link_mode),
+				 link_mode, &out_size);
+	if (err || !out_size || link_mode->status) {
+		dev_err(&hwdev->hwif->pdev->dev,
+			"Failed to get link mode, err: %d, status: 0x%x, out size: 0x%x\n",
+			err, link_mode->status, out_size);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+int hinic_set_autoneg(struct hinic_hwdev *hwdev, bool enable)
+{
+	struct hinic_set_autoneg_cmd autoneg = {0};
+	u16 out_size = sizeof(autoneg);
+	int err;
+
+	if (!hwdev)
+		return -EINVAL;
+
+	autoneg.func_id = HINIC_HWIF_FUNC_IDX(hwdev->hwif);
+	autoneg.enable = enable;
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_SET_AUTONEG,
+				 &autoneg, sizeof(autoneg),
+				 &autoneg, &out_size);
+	if (err || !out_size || autoneg.status) {
+		dev_err(&hwdev->hwif->pdev->dev, "Failed to %s autoneg, err: %d, status: 0x%x, out size: 0x%x\n",
+			enable ? "enable" : "disable", err, autoneg.status,
+			out_size);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+int hinic_set_speed(struct hinic_hwdev *hwdev, enum nic_speed_level speed)
+{
+	struct hinic_speed_cmd speed_info = {0};
+	u16 out_size = sizeof(speed_info);
+	int err;
+
+	if (!hwdev)
+		return -EINVAL;
+
+	speed_info.func_id = HINIC_HWIF_FUNC_IDX(hwdev->hwif);
+	speed_info.speed = speed;
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_SET_SPEED,
+				 &speed_info, sizeof(speed_info),
+				 &speed_info, &out_size);
+	if (err || !out_size || speed_info.status) {
+		dev_err(&hwdev->hwif->pdev->dev,
+			"Failed to set speed, err: %d, status: 0x%x, out size: 0x%x\n",
+			err, speed_info.status, out_size);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+int hinic_set_link_settings(struct hinic_hwdev *hwdev,
+			    struct hinic_link_ksettings_info *info)
+{
+	u16 out_size = sizeof(*info);
+	int err;
+
+	err = hinic_hilink_msg_cmd(hwdev, HINIC_HILINK_CMD_SET_LINK_SETTINGS,
+				   info, sizeof(*info), info, &out_size);
+	if ((info->status != HINIC_MGMT_CMD_UNSUPPORTED &&
+	     info->status) || err || !out_size) {
+		dev_err(&hwdev->hwif->pdev->dev,
+			"Failed to set link settings, err: %d, status: 0x%x, out size: 0x%x\n",
+			err, info->status, out_size);
+		return -EFAULT;
+	}
+
+	return info->status;
+}
+
+int hinic_get_hw_pause_info(struct hinic_hwdev *hwdev,
+			    struct hinic_pause_config *pause_info)
+{
+	u16 out_size = sizeof(*pause_info);
+	int err;
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_GET_PAUSE_INFO,
+				 pause_info, sizeof(*pause_info),
+				 pause_info, &out_size);
+	if (err || !out_size || pause_info->status) {
+		dev_err(&hwdev->hwif->pdev->dev, "Failed to get pause info, err: %d, status: 0x%x, out size: 0x%x\n",
+			err, pause_info->status, out_size);
+		return -EIO;
+	}
+
+	return 0;
+}
+
+int hinic_set_hw_pause_info(struct hinic_hwdev *hwdev,
+			    struct hinic_pause_config *pause_info)
+{
+	u16 out_size = sizeof(*pause_info);
+	int err;
+
+	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_SET_PAUSE_INFO,
+				 pause_info, sizeof(*pause_info),
+				 pause_info, &out_size);
+	if (err || !out_size || pause_info->status) {
+		dev_err(&hwdev->hwif->pdev->dev, "Failed to set pause info, err: %d, status: 0x%x, out size: 0x%x\n",
+			err, pause_info->status, out_size);
+		return -EIO;
+	}
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_port.h b/drivers/net/ethernet/huawei/hinic/hinic_port.h
index f2781521970e..5f34308abd2b 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_port.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_port.h
@@ -79,6 +79,42 @@ enum hinic_speed {
 	HINIC_SPEED_UNKNOWN = 0xFF,
 };
 
+enum hinic_link_mode {
+	HINIC_10GE_BASE_KR = 0,
+	HINIC_40GE_BASE_KR4 = 1,
+	HINIC_40GE_BASE_CR4 = 2,
+	HINIC_100GE_BASE_KR4 = 3,
+	HINIC_100GE_BASE_CR4 = 4,
+	HINIC_25GE_BASE_KR_S = 5,
+	HINIC_25GE_BASE_CR_S = 6,
+	HINIC_25GE_BASE_KR = 7,
+	HINIC_25GE_BASE_CR = 8,
+	HINIC_GE_BASE_KX = 9,
+	HINIC_LINK_MODE_NUMBERS,
+
+	HINIC_SUPPORTED_UNKNOWN = 0xFFFF,
+};
+
+enum hinic_port_type {
+	HINIC_PORT_TP,		/* BASET */
+	HINIC_PORT_AUI,
+	HINIC_PORT_MII,
+	HINIC_PORT_FIBRE,	/* OPTICAL */
+	HINIC_PORT_BNC,
+	HINIC_PORT_ELEC,
+	HINIC_PORT_COPPER,	/* PORT_DA */
+	HINIC_PORT_AOC,
+	HINIC_PORT_BACKPLANE,
+	HINIC_PORT_NONE = 0xEF,
+	HINIC_PORT_OTHER = 0xFF,
+};
+
+enum hinic_valid_link_settings {
+	HILINK_LINK_SET_SPEED = 0x1,
+	HILINK_LINK_SET_AUTONEG = 0x2,
+	HILINK_LINK_SET_FEC = 0x4,
+};
+
 enum hinic_tso_state {
 	HINIC_TSO_DISABLE = 0,
 	HINIC_TSO_ENABLE  = 1,
@@ -179,6 +215,50 @@ struct hinic_port_cap {
 	u8      rsvd2[3];
 };
 
+struct hinic_link_mode_cmd {
+	u8	status;
+	u8	version;
+	u8	rsvd0[6];
+
+	u16	func_id;
+	u16	rsvd1;
+	u16	supported;	/* 0xFFFF represents invalid value */
+	u16	advertised;
+};
+
+struct hinic_speed_cmd {
+	u8	status;
+	u8	version;
+	u8	rsvd0[6];
+
+	u16	func_id;
+	u16	speed;
+};
+
+struct hinic_set_autoneg_cmd {
+	u8	status;
+	u8	version;
+	u8	rsvd0[6];
+
+	u16	func_id;
+	u16	enable;	/* 1: enable , 0: disable */
+};
+
+struct hinic_link_ksettings_info {
+	u8	status;
+	u8	version;
+	u8	rsvd0[6];
+
+	u16	func_id;
+	u16	rsvd1;
+
+	u32	valid_bitmap;
+	u32	speed;		/* enum nic_speed_level */
+	u8	autoneg;	/* 0 - off; 1 - on */
+	u8	fec;		/* 0 - RSFEC; 1 - BASEFEC; 2 - NOFEC */
+	u8	rsvd2[18];	/* reserved for duplex, port, etc. */
+};
+
 struct hinic_tso_config {
 	u8	status;
 	u8	version;
@@ -549,6 +629,18 @@ struct hinic_spoofchk_set {
 	u16	func_id;
 };
 
+struct hinic_pause_config {
+	u8	status;
+	u8	version;
+	u8	rsvd0[6];
+
+	u16	func_id;
+	u16	rsvd1;
+	u32	auto_neg;
+	u32	rx_pause;
+	u32	tx_pause;
+};
+
 int hinic_port_add_mac(struct hinic_dev *nic_dev, const u8 *addr,
 		       u16 vlan_id);
 
@@ -628,4 +720,20 @@ int hinic_set_rx_vlan_offload(struct hinic_dev *nic_dev, u8 en);
 
 int hinic_get_mgmt_version(struct hinic_dev *nic_dev, u8 *mgmt_ver);
 
+int hinic_set_link_settings(struct hinic_hwdev *hwdev,
+			    struct hinic_link_ksettings_info *info);
+
+int hinic_get_link_mode(struct hinic_hwdev *hwdev,
+			struct hinic_link_mode_cmd *link_mode);
+
+int hinic_set_autoneg(struct hinic_hwdev *hwdev, bool enable);
+
+int hinic_set_speed(struct hinic_hwdev *hwdev, enum nic_speed_level speed);
+
+int hinic_get_hw_pause_info(struct hinic_hwdev *hwdev,
+			    struct hinic_pause_config *pause_info);
+
+int hinic_set_hw_pause_info(struct hinic_hwdev *hwdev,
+			    struct hinic_pause_config *pause_info);
+
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From 73e030977f7884dbe1be0018bab517e8d02760f8 Mon Sep 17 00:00:00 2001
From: Bhupesh Sharma <bhsharma@redhat.com>
Date: Mon, 11 May 2020 15:41:41 +0530
Subject: net: qed*: Reduce RX and TX default ring count when running inside
 kdump kernel

Normally kdump kernel(s) run under severe memory constraint with the
basic idea being to save the crashdump vmcore reliably when the primary
kernel panics/hangs.

Currently the qed* ethernet driver ends up consuming a lot of memory in
the kdump kernel, leading to kdump kernel panic when one tries to save
the vmcore via ssh/nfs (thus utilizing the services of the underlying
qed* network interfaces).

An example OOM message log seen in the kdump kernel can be seen here
[1], with crashkernel size reservation of 512M.

Using tools like memstrack (see [2]), we can track the modules taking up
the bulk of memory in the kdump kernel and organize the memory usage
output as per 'highest allocator first'. An example log for the OOM case
indicates that the qed* modules end up allocating approximately 216M
memory, which is a large part of the total crashkernel size:

 dracut-pre-pivot[676]: ======== Report format module_summary: ========
 dracut-pre-pivot[676]: Module qed using 149.6MB (2394 pages), peak allocation 149.6MB (2394 pages)
 dracut-pre-pivot[676]: Module qede using 65.3MB (1045 pages), peak allocation 65.3MB (1045 pages)

This patch reduces the default RX and TX ring count from 1024 to 64
when running inside kdump kernel, which leads to a significant memory
saving.

An example log with the patch applied shows the reduced memory
allocation in the kdump kernel:
 dracut-pre-pivot[674]: ======== Report format module_summary: ========
 dracut-pre-pivot[674]: Module qed using 141.8MB (2268 pages), peak allocation 141.8MB (2268 pages)
 <..snip..>
[dracut-pre-pivot[674]: Module qede using 4.8MB (76 pages), peak allocation 4.9MB (78 pages)

Tested crashdump vmcore save via ssh/nfs protocol using underlying qed*
network interface after applying this patch.

[1] OOM log:
------------

 kworker/0:6: page allocation failure: order:6,
 mode:0x60c0c0(GFP_KERNEL|__GFP_COMP|__GFP_ZERO), nodemask=(null)
 kworker/0:6 cpuset=/ mems_allowed=0
 CPU: 0 PID: 145 Comm: kworker/0:6 Not tainted 4.18.0-109.el8.aarch64 #1
 Hardware name: To be filled by O.E.M. Saber/Saber, BIOS 0ACKL025
 01/18/2019
 Workqueue: events work_for_cpu_fn
 Call trace:
  dump_backtrace+0x0/0x188
  show_stack+0x24/0x30
  dump_stack+0x90/0xb4
  warn_alloc+0xf4/0x178
  __alloc_pages_nodemask+0xcac/0xd58
  alloc_pages_current+0x8c/0xf8
  kmalloc_order_trace+0x38/0x108
  qed_iov_alloc+0x40/0x248 [qed]
  qed_resc_alloc+0x224/0x518 [qed]
  qed_slowpath_start+0x254/0x928 [qed]
   __qede_probe+0xf8/0x5e0 [qede]
  qede_probe+0x68/0xd8 [qede]
  local_pci_probe+0x44/0xa8
  work_for_cpu_fn+0x20/0x30
  process_one_work+0x1ac/0x3e8
  worker_thread+0x44/0x448
  kthread+0x130/0x138
  ret_from_fork+0x10/0x18
  Cannot start slowpath
  qede: probe of 0000:05:00.1 failed with error -12

[2]. Memstrack tool: https://github.com/ryncsn/memstrack

Cc: kexec@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Cc: Ariel Elior <aelior@marvell.com>
Cc: GR-everest-linux-l2@marvell.com
Cc: Manish Chopra <manishc@marvell.com>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Bhupesh Sharma <bhsharma@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qede/qede.h      |  2 ++
 drivers/net/ethernet/qlogic/qede/qede_main.c | 11 +++++++++--
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
index 1a708f95ce94..f6f0b51620ab 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -575,12 +575,14 @@ int qede_add_tc_flower_fltr(struct qede_dev *edev, __be16 proto,
 #define RX_RING_SIZE		((u16)BIT(RX_RING_SIZE_POW))
 #define NUM_RX_BDS_MAX		(RX_RING_SIZE - 1)
 #define NUM_RX_BDS_MIN		128
+#define NUM_RX_BDS_KDUMP_MIN	63
 #define NUM_RX_BDS_DEF		((u16)BIT(10) - 1)
 
 #define TX_RING_SIZE_POW	13
 #define TX_RING_SIZE		((u16)BIT(TX_RING_SIZE_POW))
 #define NUM_TX_BDS_MAX		(TX_RING_SIZE - 1)
 #define NUM_TX_BDS_MIN		128
+#define NUM_TX_BDS_KDUMP_MIN	63
 #define NUM_TX_BDS_DEF		NUM_TX_BDS_MAX
 
 #define QEDE_MIN_PKT_LEN		64
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 256506024b88..8cb2408b12ac 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -29,6 +29,7 @@
  * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
+#include <linux/crash_dump.h>
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/version.h>
@@ -715,8 +716,14 @@ static struct qede_dev *qede_alloc_etherdev(struct qed_dev *cdev,
 	edev->dp_module = dp_module;
 	edev->dp_level = dp_level;
 	edev->ops = qed_ops;
-	edev->q_num_rx_buffers = NUM_RX_BDS_DEF;
-	edev->q_num_tx_buffers = NUM_TX_BDS_DEF;
+
+	if (is_kdump_kernel()) {
+		edev->q_num_rx_buffers = NUM_RX_BDS_KDUMP_MIN;
+		edev->q_num_tx_buffers = NUM_TX_BDS_KDUMP_MIN;
+	} else {
+		edev->q_num_rx_buffers = NUM_RX_BDS_DEF;
+		edev->q_num_tx_buffers = NUM_TX_BDS_DEF;
+	}
 
 	DP_INFO(edev, "Allocated netdev with %d tx queues and %d rx queues\n",
 		info->num_queues, info->num_queues);
-- 
cgit v1.2.3-59-g8ed1b


From 37d4f8a6b41f622608671ab8434194c819a5e444 Mon Sep 17 00:00:00 2001
From: Bhupesh Sharma <bhsharma@redhat.com>
Date: Mon, 11 May 2020 15:41:42 +0530
Subject: net: qed: Disable SRIOV functionality inside kdump kernel

Since we have kdump kernel(s) running under severe memory constraint
it makes sense to disable the qed SRIOV functionality when running the
kdump kernel as kdump configurations on several distributions don't
support SRIOV targets for saving the vmcore (see [1] for example).

Currently the qed SRIOV functionality ends up consuming memory in
the kdump kernel, when we don't really use the same.

An example log seen in the kdump kernel with the SRIOV functionality
enabled can be seen below (obtained via memstrack tool, see [2]):
 dracut-pre-pivot[676]: ======== Report format module_summary: ========
 dracut-pre-pivot[676]: Module qed using 149.6MB (2394 pages), peak allocation 149.6MB (2394 pages)

This patch disables the SRIOV functionality inside kdump kernel and with
the same applied the memory consumption goes down:
 dracut-pre-pivot[671]: ======== Report format module_summary: ========
 dracut-pre-pivot[671]: Module qed using 124.6MB (1993 pages), peak allocation 124.7MB (1995 pages)

[1]. https://access.redhat.com/documentation/en-us/red_hat_enterprise_linux/8/html/managing_monitoring_and_updating_the_kernel/installing-and-configuring-kdump_managing-monitoring-and-updating-the-kernel#supported-kdump-targets_supported-kdump-configurations-and-targets
[2]. Memstrack tool: https://github.com/ryncsn/memstrack

Cc: kexec@lists.infradead.org
Cc: linux-kernel@vger.kernel.org
Cc: Ariel Elior <aelior@marvell.com>
Cc: GR-everest-linux-l2@marvell.com
Cc: Manish Chopra <manishc@marvell.com>
Cc: David S. Miller <davem@davemloft.net>
Signed-off-by: Bhupesh Sharma <bhsharma@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_sriov.h  | 10 +++++++---
 drivers/net/ethernet/qlogic/qede/qede_main.c |  2 +-
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_sriov.h b/drivers/net/ethernet/qlogic/qed/qed_sriov.h
index 368e88565783..aabeaf03135e 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_sriov.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_sriov.h
@@ -32,6 +32,7 @@
 
 #ifndef _QED_SRIOV_H
 #define _QED_SRIOV_H
+#include <linux/crash_dump.h>
 #include <linux/types.h>
 #include "qed_vf.h"
 
@@ -40,9 +41,12 @@
 #define QED_VF_ARRAY_LENGTH (3)
 
 #ifdef CONFIG_QED_SRIOV
-#define IS_VF(cdev)             ((cdev)->b_is_vf)
-#define IS_PF(cdev)             (!((cdev)->b_is_vf))
-#define IS_PF_SRIOV(p_hwfn)     (!!((p_hwfn)->cdev->p_iov_info))
+#define IS_VF(cdev)             (is_kdump_kernel() ? \
+				 (0) : ((cdev)->b_is_vf))
+#define IS_PF(cdev)             (is_kdump_kernel() ? \
+				 (1) : !((cdev)->b_is_vf))
+#define IS_PF_SRIOV(p_hwfn)     (is_kdump_kernel() ? \
+				 (0) : !!((p_hwfn)->cdev->p_iov_info))
 #else
 #define IS_VF(cdev)             (0)
 #define IS_PF(cdev)             (1)
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 8cb2408b12ac..300405369c37 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -1214,7 +1214,7 @@ static int qede_probe(struct pci_dev *pdev, const struct pci_device_id *id)
 	case QEDE_PRIVATE_VF:
 		if (debug & QED_LOG_VERBOSE_MASK)
 			dev_err(&pdev->dev, "Probing a VF\n");
-		is_vf = true;
+		is_vf = is_kdump_kernel() ? false : true;
 		break;
 	default:
 		if (debug & QED_LOG_VERBOSE_MASK)
-- 
cgit v1.2.3-59-g8ed1b


From dfcabb078847479cc2874c11af3f6cb3b79ddd03 Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Mon, 11 May 2020 13:28:20 +0100
Subject: sfc: move vport_id to struct efx_nic

Remove some usage of ef10-specific nic_data structs from common MCDI
 functions, in preparation for using them from a non-EF10 driver.

Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/ef10.c           | 16 ++++++++--------
 drivers/net/ethernet/sfc/ef10_sriov.c     | 27 +++++++++++++--------------
 drivers/net/ethernet/sfc/mcdi_filters.c   |  5 ++---
 drivers/net/ethernet/sfc/mcdi_functions.c |  8 ++------
 drivers/net/ethernet/sfc/mcdi_port.c      |  7 ++-----
 drivers/net/ethernet/sfc/net_driver.h     |  2 ++
 drivers/net/ethernet/sfc/nic.h            |  2 --
 7 files changed, 29 insertions(+), 38 deletions(-)

diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index 3f16bd807c6e..0ad311ff6796 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -553,7 +553,7 @@ static int efx_ef10_probe(struct efx_nic *efx)
 
 	efx->rss_context.context_id = EFX_MCDI_RSS_CONTEXT_INVALID;
 
-	nic_data->vport_id = EVB_PORT_ID_ASSIGNED;
+	efx->vport_id = EVB_PORT_ID_ASSIGNED;
 
 	/* In case we're recovering from a crash (kexec), we want to
 	 * cancel any outstanding request by the previous user of this
@@ -1335,7 +1335,7 @@ static void efx_ef10_table_reset_mc_allocations(struct efx_nic *efx)
 
 	/* Driver-created vswitches and vports must be re-created */
 	nic_data->must_probe_vswitching = true;
-	nic_data->vport_id = EVB_PORT_ID_ASSIGNED;
+	efx->vport_id = EVB_PORT_ID_ASSIGNED;
 #ifdef CONFIG_SFC_SRIOV
 	if (nic_data->vf)
 		for (i = 0; i < efx->vf_count; i++)
@@ -3158,22 +3158,22 @@ static int efx_ef10_vport_set_mac_address(struct efx_nic *efx)
 	efx_mcdi_filter_table_remove(efx);
 	up_write(&efx->filter_sem);
 
-	rc = efx_ef10_vadaptor_free(efx, nic_data->vport_id);
+	rc = efx_ef10_vadaptor_free(efx, efx->vport_id);
 	if (rc)
 		goto restore_filters;
 
 	ether_addr_copy(mac_old, nic_data->vport_mac);
-	rc = efx_ef10_vport_del_mac(efx, nic_data->vport_id,
+	rc = efx_ef10_vport_del_mac(efx, efx->vport_id,
 				    nic_data->vport_mac);
 	if (rc)
 		goto restore_vadaptor;
 
-	rc = efx_ef10_vport_add_mac(efx, nic_data->vport_id,
+	rc = efx_ef10_vport_add_mac(efx, efx->vport_id,
 				    efx->net_dev->dev_addr);
 	if (!rc) {
 		ether_addr_copy(nic_data->vport_mac, efx->net_dev->dev_addr);
 	} else {
-		rc2 = efx_ef10_vport_add_mac(efx, nic_data->vport_id, mac_old);
+		rc2 = efx_ef10_vport_add_mac(efx, efx->vport_id, mac_old);
 		if (rc2) {
 			/* Failed to add original MAC, so clear vport_mac */
 			eth_zero_addr(nic_data->vport_mac);
@@ -3182,7 +3182,7 @@ static int efx_ef10_vport_set_mac_address(struct efx_nic *efx)
 	}
 
 restore_vadaptor:
-	rc2 = efx_ef10_vadaptor_alloc(efx, nic_data->vport_id);
+	rc2 = efx_ef10_vadaptor_alloc(efx, efx->vport_id);
 	if (rc2)
 		goto reset_nic;
 restore_filters:
@@ -3225,7 +3225,7 @@ static int efx_ef10_set_mac_address(struct efx_nic *efx)
 	ether_addr_copy(MCDI_PTR(inbuf, VADAPTOR_SET_MAC_IN_MACADDR),
 			efx->net_dev->dev_addr);
 	MCDI_SET_DWORD(inbuf, VADAPTOR_SET_MAC_IN_UPSTREAM_PORT_ID,
-		       nic_data->vport_id);
+		       efx->vport_id);
 	rc = efx_mcdi_rpc_quiet(efx, MC_CMD_VADAPTOR_SET_MAC, inbuf,
 				sizeof(inbuf), NULL, 0, NULL);
 
diff --git a/drivers/net/ethernet/sfc/ef10_sriov.c b/drivers/net/ethernet/sfc/ef10_sriov.c
index 4580b30caae1..21fa6c0e8873 100644
--- a/drivers/net/ethernet/sfc/ef10_sriov.c
+++ b/drivers/net/ethernet/sfc/ef10_sriov.c
@@ -232,15 +232,14 @@ fail:
 
 static int efx_ef10_vadaptor_alloc_set_features(struct efx_nic *efx)
 {
-	struct efx_ef10_nic_data *nic_data = efx->nic_data;
 	u32 port_flags;
 	int rc;
 
-	rc = efx_ef10_vadaptor_alloc(efx, nic_data->vport_id);
+	rc = efx_ef10_vadaptor_alloc(efx, efx->vport_id);
 	if (rc)
 		goto fail_vadaptor_alloc;
 
-	rc = efx_ef10_vadaptor_query(efx, nic_data->vport_id,
+	rc = efx_ef10_vadaptor_query(efx, efx->vport_id,
 				     &port_flags, NULL, NULL);
 	if (rc)
 		goto fail_vadaptor_query;
@@ -281,11 +280,11 @@ int efx_ef10_vswitching_probe_pf(struct efx_nic *efx)
 
 	rc = efx_ef10_vport_alloc(efx, EVB_PORT_ID_ASSIGNED,
 				  MC_CMD_VPORT_ALLOC_IN_VPORT_TYPE_NORMAL,
-				  EFX_EF10_NO_VLAN, &nic_data->vport_id);
+				  EFX_EF10_NO_VLAN, &efx->vport_id);
 	if (rc)
 		goto fail2;
 
-	rc = efx_ef10_vport_add_mac(efx, nic_data->vport_id, net_dev->dev_addr);
+	rc = efx_ef10_vport_add_mac(efx, efx->vport_id, net_dev->dev_addr);
 	if (rc)
 		goto fail3;
 	ether_addr_copy(nic_data->vport_mac, net_dev->dev_addr);
@@ -296,11 +295,11 @@ int efx_ef10_vswitching_probe_pf(struct efx_nic *efx)
 
 	return 0;
 fail4:
-	efx_ef10_vport_del_mac(efx, nic_data->vport_id, nic_data->vport_mac);
+	efx_ef10_vport_del_mac(efx, efx->vport_id, nic_data->vport_mac);
 	eth_zero_addr(nic_data->vport_mac);
 fail3:
-	efx_ef10_vport_free(efx, nic_data->vport_id);
-	nic_data->vport_id = EVB_PORT_ID_ASSIGNED;
+	efx_ef10_vport_free(efx, efx->vport_id);
+	efx->vport_id = EVB_PORT_ID_ASSIGNED;
 fail2:
 	efx_ef10_vswitch_free(efx, EVB_PORT_ID_ASSIGNED);
 fail1:
@@ -355,22 +354,22 @@ void efx_ef10_vswitching_remove_pf(struct efx_nic *efx)
 
 	efx_ef10_sriov_free_vf_vswitching(efx);
 
-	efx_ef10_vadaptor_free(efx, nic_data->vport_id);
+	efx_ef10_vadaptor_free(efx, efx->vport_id);
 
-	if (nic_data->vport_id == EVB_PORT_ID_ASSIGNED)
+	if (efx->vport_id == EVB_PORT_ID_ASSIGNED)
 		return; /* No vswitch was ever created */
 
 	if (!is_zero_ether_addr(nic_data->vport_mac)) {
-		efx_ef10_vport_del_mac(efx, nic_data->vport_id,
+		efx_ef10_vport_del_mac(efx, efx->vport_id,
 				       efx->net_dev->dev_addr);
 		eth_zero_addr(nic_data->vport_mac);
 	}
-	efx_ef10_vport_free(efx, nic_data->vport_id);
-	nic_data->vport_id = EVB_PORT_ID_ASSIGNED;
+	efx_ef10_vport_free(efx, efx->vport_id);
+	efx->vport_id = EVB_PORT_ID_ASSIGNED;
 
 	/* Only free the vswitch if no VFs are assigned */
 	if (!pci_vfs_assigned(efx->pci_dev))
-		efx_ef10_vswitch_free(efx, nic_data->vport_id);
+		efx_ef10_vswitch_free(efx, efx->vport_id);
 }
 
 void efx_ef10_vswitching_remove_vf(struct efx_nic *efx)
diff --git a/drivers/net/ethernet/sfc/mcdi_filters.c b/drivers/net/ethernet/sfc/mcdi_filters.c
index 4310ae5bd898..e6268556b030 100644
--- a/drivers/net/ethernet/sfc/mcdi_filters.c
+++ b/drivers/net/ethernet/sfc/mcdi_filters.c
@@ -186,7 +186,6 @@ static void efx_mcdi_filter_push_prep(struct efx_nic *efx,
 				      struct efx_rss_context *ctx,
 				      bool replacing)
 {
-	struct efx_ef10_nic_data *nic_data = efx->nic_data;
 	u32 flags = spec->flags;
 
 	memset(inbuf, 0, MC_CMD_FILTER_OP_EXT_IN_LEN);
@@ -211,7 +210,7 @@ static void efx_mcdi_filter_push_prep(struct efx_nic *efx,
 		efx_mcdi_filter_push_prep_set_match_fields(efx, spec, inbuf);
 	}
 
-	MCDI_SET_DWORD(inbuf, FILTER_OP_IN_PORT_ID, nic_data->vport_id);
+	MCDI_SET_DWORD(inbuf, FILTER_OP_IN_PORT_ID, efx->vport_id);
 	MCDI_SET_DWORD(inbuf, FILTER_OP_IN_RX_DEST,
 		       spec->dmaq_id == EFX_FILTER_RX_DMAQ_ID_DROP ?
 		       MC_CMD_FILTER_OP_IN_RX_DEST_DROP :
@@ -1944,7 +1943,7 @@ static int efx_mcdi_filter_alloc_rss_context(struct efx_nic *efx, bool exclusive
 		return -EOPNOTSUPP;
 
 	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_ALLOC_IN_UPSTREAM_PORT_ID,
-		       nic_data->vport_id);
+		       efx->vport_id);
 	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_ALLOC_IN_TYPE, alloc_type);
 	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_ALLOC_IN_NUM_QUEUES, rss_spread);
 
diff --git a/drivers/net/ethernet/sfc/mcdi_functions.c b/drivers/net/ethernet/sfc/mcdi_functions.c
index dcfe78b0fa5a..962d8395d958 100644
--- a/drivers/net/ethernet/sfc/mcdi_functions.c
+++ b/drivers/net/ethernet/sfc/mcdi_functions.c
@@ -168,21 +168,18 @@ int efx_mcdi_tx_init(struct efx_tx_queue *tx_queue, bool tso_v2)
 	size_t entries = tx_queue->txd.buf.len / EFX_BUF_SIZE;
 	struct efx_channel *channel = tx_queue->channel;
 	struct efx_nic *efx = tx_queue->efx;
-	struct efx_ef10_nic_data *nic_data;
 	dma_addr_t dma_addr;
 	size_t inlen;
 	int rc, i;
 
 	BUILD_BUG_ON(MC_CMD_INIT_TXQ_OUT_LEN != 0);
 
-	nic_data = efx->nic_data;
-
 	MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_SIZE, tx_queue->ptr_mask + 1);
 	MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_TARGET_EVQ, channel->channel);
 	MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_LABEL, tx_queue->queue);
 	MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_INSTANCE, tx_queue->queue);
 	MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_OWNER_ID, 0);
-	MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_PORT_ID, nic_data->vport_id);
+	MCDI_SET_DWORD(inbuf, INIT_TXQ_IN_PORT_ID, efx->vport_id);
 
 	dma_addr = tx_queue->txd.buf.dma_addr;
 
@@ -276,7 +273,6 @@ void efx_mcdi_rx_init(struct efx_rx_queue *rx_queue)
 	struct efx_channel *channel = efx_rx_queue_channel(rx_queue);
 	size_t entries = rx_queue->rxd.buf.len / EFX_BUF_SIZE;
 	struct efx_nic *efx = rx_queue->efx;
-	struct efx_ef10_nic_data *nic_data = efx->nic_data;
 	dma_addr_t dma_addr;
 	size_t inlen;
 	int rc;
@@ -295,7 +291,7 @@ void efx_mcdi_rx_init(struct efx_rx_queue *rx_queue)
 			      INIT_RXQ_IN_FLAG_PREFIX, 1,
 			      INIT_RXQ_IN_FLAG_TIMESTAMP, 1);
 	MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_OWNER_ID, 0);
-	MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_PORT_ID, nic_data->vport_id);
+	MCDI_SET_DWORD(inbuf, INIT_RXQ_IN_PORT_ID, efx->vport_id);
 
 	dma_addr = rx_queue->rxd.buf.dma_addr;
 
diff --git a/drivers/net/ethernet/sfc/mcdi_port.c b/drivers/net/ethernet/sfc/mcdi_port.c
index ab5227b13ae6..b807871d8f69 100644
--- a/drivers/net/ethernet/sfc/mcdi_port.c
+++ b/drivers/net/ethernet/sfc/mcdi_port.c
@@ -722,11 +722,8 @@ static int efx_mcdi_mac_stats(struct efx_nic *efx,
 			      MAC_STATS_IN_PERIOD_MS, period);
 	MCDI_SET_DWORD(inbuf, MAC_STATS_IN_DMA_LEN, dma_len);
 
-	if (efx_nic_rev(efx) >= EFX_REV_HUNT_A0) {
-		struct efx_ef10_nic_data *nic_data = efx->nic_data;
-
-		MCDI_SET_DWORD(inbuf, MAC_STATS_IN_PORT_ID, nic_data->vport_id);
-	}
+	if (efx_nic_rev(efx) >= EFX_REV_HUNT_A0)
+		MCDI_SET_DWORD(inbuf, MAC_STATS_IN_PORT_ID, efx->vport_id);
 
 	rc = efx_mcdi_rpc_quiet(efx, MC_CMD_MAC_STATS, inbuf, sizeof(inbuf),
 				NULL, 0, NULL);
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index b084e623b5f4..d43f22c8f31c 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -887,6 +887,7 @@ struct efx_async_filter_insertion {
  * @rss_context: Main RSS context.  Its @list member is the head of the list of
  *	RSS contexts created by user requests
  * @rss_lock: Protects custom RSS context software state in @rss_context.list
+ * @vport_id: The function's vport ID, only relevant for PFs
  * @int_error_count: Number of internal errors seen recently
  * @int_error_expire: Time at which error count will be expired
  * @irq_soft_enabled: Are IRQs soft-enabled? If not, IRQ handler will
@@ -1044,6 +1045,7 @@ struct efx_nic {
 	bool rx_scatter;
 	struct efx_rss_context rss_context;
 	struct mutex rss_lock;
+	u32 vport_id;
 
 	unsigned int_error_count;
 	unsigned long int_error_expire;
diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h
index 6670fda8f35a..9e2e387a4b1c 100644
--- a/drivers/net/ethernet/sfc/nic.h
+++ b/drivers/net/ethernet/sfc/nic.h
@@ -385,7 +385,6 @@ enum {
  * %MC_CMD_GET_CAPABILITIES response)
  * @rx_dpcpu_fw_id: Firmware ID of the RxDPCPU
  * @tx_dpcpu_fw_id: Firmware ID of the TxDPCPU
- * @vport_id: The function's vport ID, only relevant for PFs
  * @must_probe_vswitching: Flag: vswitching has yet to be setup after MC reboot
  * @pf_index: The number for this PF, or the parent PF if this is a VF
 #ifdef CONFIG_SFC_SRIOV
@@ -423,7 +422,6 @@ struct efx_ef10_nic_data {
 	u32 datapath_caps2;
 	unsigned int rx_dpcpu_fw_id;
 	unsigned int tx_dpcpu_fw_id;
-	unsigned int vport_id;
 	bool must_probe_vswitching;
 	unsigned int pf_index;
 	u8 port_id[ETH_ALEN];
-- 
cgit v1.2.3-59-g8ed1b


From be904b855200ef4672c765918b31338b59c4847c Mon Sep 17 00:00:00 2001
From: Tom Zhao <tzhao@solarflare.com>
Date: Mon, 11 May 2020 13:28:40 +0100
Subject: sfc: make capability checking a nic_type function

Various MCDI functions (especially in filter handling) need to check the
 datapath caps, but those live in nic_data (since they don't exist on
 Siena).  Decouple from ef10-specific data structures by adding check_caps
 to the nic_type, to allow using these functions from non-ef10 drivers.

Also add a convenience macro efx_has_cap() to reduce the amount of
 boilerplate involved in calling it.

Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/ef10.c       | 18 ++++++++++++++++++
 drivers/net/ethernet/sfc/mcdi.h       | 12 ++++++++++++
 drivers/net/ethernet/sfc/net_driver.h |  3 +++
 drivers/net/ethernet/sfc/siena.c      |  7 +++++++
 4 files changed, 40 insertions(+)

diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index 0ad311ff6796..7b3c6214dee6 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -3961,6 +3961,22 @@ out_unlock:
 	return rc;
 }
 
+static unsigned int ef10_check_caps(const struct efx_nic *efx,
+				    u8 flag,
+				    u32 offset)
+{
+	const struct efx_ef10_nic_data *nic_data = efx->nic_data;
+
+	switch (offset) {
+	case(MC_CMD_GET_CAPABILITIES_V4_OUT_FLAGS1_OFST):
+		return nic_data->datapath_caps & BIT_ULL(flag);
+	case(MC_CMD_GET_CAPABILITIES_V4_OUT_FLAGS2_OFST):
+		return nic_data->datapath_caps2 & BIT_ULL(flag);
+	default:
+		return 0;
+	}
+}
+
 #define EF10_OFFLOAD_FEATURES		\
 	(NETIF_F_IP_CSUM |		\
 	 NETIF_F_HW_VLAN_CTAG_FILTER |	\
@@ -4073,6 +4089,7 @@ const struct efx_nic_type efx_hunt_a0_vf_nic_type = {
 	.hwtstamp_filters = 1 << HWTSTAMP_FILTER_NONE |
 			    1 << HWTSTAMP_FILTER_ALL,
 	.rx_hash_key_size = 40,
+	.check_caps = ef10_check_caps,
 };
 
 const struct efx_nic_type efx_hunt_a0_nic_type = {
@@ -4208,4 +4225,5 @@ const struct efx_nic_type efx_hunt_a0_nic_type = {
 	.hwtstamp_filters = 1 << HWTSTAMP_FILTER_NONE |
 			    1 << HWTSTAMP_FILTER_ALL,
 	.rx_hash_key_size = 40,
+	.check_caps = ef10_check_caps,
 };
diff --git a/drivers/net/ethernet/sfc/mcdi.h b/drivers/net/ethernet/sfc/mcdi.h
index 54a45010b576..b107e4c00285 100644
--- a/drivers/net/ethernet/sfc/mcdi.h
+++ b/drivers/net/ethernet/sfc/mcdi.h
@@ -326,6 +326,18 @@ void efx_mcdi_sensor_event(struct efx_nic *efx, efx_qword_t *ev);
 #define MCDI_EVENT_FIELD(_ev, _field)			\
 	EFX_QWORD_FIELD(_ev, MCDI_EVENT_ ## _field)
 
+#define MCDI_CAPABILITY(field)						\
+	MC_CMD_GET_CAPABILITIES_V4_OUT_ ## field ## _LBN
+
+#define MCDI_CAPABILITY_OFST(field) \
+	MC_CMD_GET_CAPABILITIES_V4_OUT_ ## field ## _OFST
+
+/* field is FLAGS1 or FLAGS2 */
+#define efx_has_cap(efx, flag, field) \
+	efx->type->check_caps(efx, \
+			      MCDI_CAPABILITY(flag), \
+			      MCDI_CAPABILITY_OFST(field))
+
 void efx_mcdi_print_fwver(struct efx_nic *efx, char *buf, size_t len);
 int efx_mcdi_get_board_cfg(struct efx_nic *efx, u8 *mac_address,
 			   u16 *fw_subtype_list, u32 *capabilities);
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index d43f22c8f31c..bdeea48ff938 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -1354,6 +1354,9 @@ struct efx_nic_type {
 	void (*get_wol)(struct efx_nic *efx, struct ethtool_wolinfo *wol);
 	int (*set_wol)(struct efx_nic *efx, u32 type);
 	void (*resume_wol)(struct efx_nic *efx);
+	unsigned int (*check_caps)(const struct efx_nic *efx,
+				   u8 flag,
+				   u32 offset);
 	int (*test_chip)(struct efx_nic *efx, struct efx_self_tests *tests);
 	int (*test_nvram)(struct efx_nic *efx);
 	void (*mcdi_request)(struct efx_nic *efx,
diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c
index baa464161626..ed1cb6caa69d 100644
--- a/drivers/net/ethernet/sfc/siena.c
+++ b/drivers/net/ethernet/sfc/siena.c
@@ -948,6 +948,13 @@ fail:
 
 #endif /* CONFIG_SFC_MTD */
 
+unsigned int siena_check_caps(const struct efx_nic *efx,
+			      u8 flag, u32 offset)
+{
+	/* Siena did not support MC_CMD_GET_CAPABILITIES */
+	return 0;
+}
+
 /**************************************************************************
  *
  * Revision-dependent attributes used by efx.c and nic.c
-- 
cgit v1.2.3-59-g8ed1b


From 484a75b1dbc492cca4893718e70daccc05c2ce0b Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Mon, 11 May 2020 13:28:56 +0100
Subject: sfc: use efx_has_cap for capability checks outside of NIC-specific
 code

Removes some efx_ef10_nic_data references from common code.

Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/mcdi_filters.c | 13 ++++---------
 drivers/net/ethernet/sfc/ptp.c          |  7 +------
 2 files changed, 5 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/sfc/mcdi_filters.c b/drivers/net/ethernet/sfc/mcdi_filters.c
index e6268556b030..39f8a91c1222 100644
--- a/drivers/net/ethernet/sfc/mcdi_filters.c
+++ b/drivers/net/ethernet/sfc/mcdi_filters.c
@@ -829,8 +829,7 @@ static int efx_mcdi_filter_insert_def(struct efx_nic *efx,
 		efx_filter_set_uc_def(&spec);
 
 	if (encap_type) {
-		if (nic_data->datapath_caps &
-		    (1 << MC_CMD_GET_CAPABILITIES_OUT_VXLAN_NVGRE_LBN))
+		if (efx_has_cap(efx, VXLAN_NVGRE, FLAGS1))
 			efx_filter_set_encap_type(&spec, encap_type);
 		else
 			/*
@@ -1309,8 +1308,7 @@ int efx_mcdi_filter_table_probe(struct efx_nic *efx)
 	rc = efx_mcdi_filter_table_probe_matches(efx, table, false);
 	if (rc)
 		goto fail;
-	if (nic_data->datapath_caps &
-		   (1 << MC_CMD_GET_CAPABILITIES_OUT_VXLAN_NVGRE_LBN))
+	if (efx_has_cap(efx, VXLAN_NVGRE, FLAGS1))
 		rc = efx_mcdi_filter_table_probe_matches(efx, table, true);
 	if (rc)
 		goto fail;
@@ -1920,7 +1918,6 @@ static int efx_mcdi_filter_alloc_rss_context(struct efx_nic *efx, bool exclusive
 {
 	MCDI_DECLARE_BUF(inbuf, MC_CMD_RSS_CONTEXT_ALLOC_IN_LEN);
 	MCDI_DECLARE_BUF(outbuf, MC_CMD_RSS_CONTEXT_ALLOC_OUT_LEN);
-	struct efx_ef10_nic_data *nic_data = efx->nic_data;
 	size_t outlen;
 	int rc;
 	u32 alloc_type = exclusive ?
@@ -1938,8 +1935,7 @@ static int efx_mcdi_filter_alloc_rss_context(struct efx_nic *efx, bool exclusive
 		return 0;
 	}
 
-	if (nic_data->datapath_caps &
-	    1 << MC_CMD_GET_CAPABILITIES_OUT_RX_RSS_LIMITED_LBN)
+	if (efx_has_cap(efx, RX_RSS_LIMITED, FLAGS1))
 		return -EOPNOTSUPP;
 
 	MCDI_SET_DWORD(inbuf, RSS_CONTEXT_ALLOC_IN_UPSTREAM_PORT_ID,
@@ -1960,8 +1956,7 @@ static int efx_mcdi_filter_alloc_rss_context(struct efx_nic *efx, bool exclusive
 	if (context_size)
 		*context_size = rss_spread;
 
-	if (nic_data->datapath_caps &
-	    1 << MC_CMD_GET_CAPABILITIES_OUT_ADDITIONAL_RSS_MODES_LBN)
+	if (efx_has_cap(efx, ADDITIONAL_RSS_MODES, FLAGS1))
 		efx_mcdi_set_rss_context_flags(efx, ctx);
 
 	return 0;
diff --git a/drivers/net/ethernet/sfc/ptp.c b/drivers/net/ethernet/sfc/ptp.c
index 59b4f16896a8..04c7283d205e 100644
--- a/drivers/net/ethernet/sfc/ptp.c
+++ b/drivers/net/ethernet/sfc/ptp.c
@@ -352,12 +352,7 @@ static int efx_phc_enable(struct ptp_clock_info *ptp,
 
 bool efx_ptp_use_mac_tx_timestamps(struct efx_nic *efx)
 {
-	struct efx_ef10_nic_data *nic_data = efx->nic_data;
-
-	return ((efx_nic_rev(efx) >= EFX_REV_HUNT_A0) &&
-		(nic_data->datapath_caps2 &
-		 (1 << MC_CMD_GET_CAPABILITIES_V2_OUT_TX_MAC_TIMESTAMPING_LBN)
-		));
+	return efx_has_cap(efx, TX_MAC_TIMESTAMPING, FLAGS2);
 }
 
 /* PTP 'extra' channel is still a traffic channel, but we only create TX queues
-- 
cgit v1.2.3-59-g8ed1b


From e4fe938cff0464643b1fbca872e5e10b1ec4c478 Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Mon, 11 May 2020 13:29:09 +0100
Subject: sfc: move 'must restore' flags out of ef10-specific nic_data

Common code in mcdi_filters.c uses these flags, so by moving them to
 either struct efx_nic (in the case of must_realloc_vis) or struct
 efx_mcdi_filter_table (for must_restore_rss_contexts and
 must_restore_filters), decouple this code from ef10's nic_data.

Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/ef10.c         | 14 ++++++--------
 drivers/net/ethernet/sfc/mcdi_filters.c | 24 ++++++++++++++++--------
 drivers/net/ethernet/sfc/mcdi_filters.h |  6 ++++++
 drivers/net/ethernet/sfc/net_driver.h   |  2 ++
 drivers/net/ethernet/sfc/nic.h          |  7 -------
 5 files changed, 30 insertions(+), 23 deletions(-)

diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index 7b3c6214dee6..b33bd6b77501 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -1281,13 +1281,13 @@ static int efx_ef10_init_nic(struct efx_nic *efx)
 		nic_data->must_check_datapath_caps = false;
 	}
 
-	if (nic_data->must_realloc_vis) {
+	if (efx->must_realloc_vis) {
 		/* We cannot let the number of VIs change now */
 		rc = efx_ef10_alloc_vis(efx, nic_data->n_allocated_vis,
 					nic_data->n_allocated_vis);
 		if (rc)
 			return rc;
-		nic_data->must_realloc_vis = false;
+		efx->must_realloc_vis = false;
 	}
 
 	if (nic_data->must_restore_piobufs && nic_data->n_piobufs) {
@@ -1326,9 +1326,8 @@ static void efx_ef10_table_reset_mc_allocations(struct efx_nic *efx)
 #endif
 
 	/* All our allocations have been reset */
-	nic_data->must_realloc_vis = true;
-	nic_data->must_restore_rss_contexts = true;
-	nic_data->must_restore_filters = true;
+	efx->must_realloc_vis = true;
+	efx_mcdi_filter_table_reset_mc_allocations(efx);
 	nic_data->must_restore_piobufs = true;
 	efx_ef10_forget_old_piobufs(efx);
 	efx->rss_context.context_id = EFX_MCDI_RSS_CONTEXT_INVALID;
@@ -3100,16 +3099,15 @@ void efx_ef10_handle_drain_event(struct efx_nic *efx)
 
 static int efx_ef10_fini_dmaq(struct efx_nic *efx)
 {
-	struct efx_ef10_nic_data *nic_data = efx->nic_data;
-	struct efx_channel *channel;
 	struct efx_tx_queue *tx_queue;
 	struct efx_rx_queue *rx_queue;
+	struct efx_channel *channel;
 	int pending;
 
 	/* If the MC has just rebooted, the TX/RX queues will have already been
 	 * torn down, but efx->active_queues needs to be set to zero.
 	 */
-	if (nic_data->must_realloc_vis) {
+	if (efx->must_realloc_vis) {
 		atomic_set(&efx->active_queues, 0);
 		return 0;
 	}
diff --git a/drivers/net/ethernet/sfc/mcdi_filters.c b/drivers/net/ethernet/sfc/mcdi_filters.c
index 39f8a91c1222..bb29fc0063bf 100644
--- a/drivers/net/ethernet/sfc/mcdi_filters.c
+++ b/drivers/net/ethernet/sfc/mcdi_filters.c
@@ -331,7 +331,6 @@ static s32 efx_mcdi_filter_insert_locked(struct efx_nic *efx,
 					 bool replace_equal)
 {
 	DECLARE_BITMAP(mc_rem_map, EFX_EF10_FILTER_SEARCH_LIMIT);
-	struct efx_ef10_nic_data *nic_data = efx->nic_data;
 	struct efx_mcdi_filter_table *table;
 	struct efx_filter_spec *saved_spec;
 	struct efx_rss_context *ctx = NULL;
@@ -460,7 +459,7 @@ static s32 efx_mcdi_filter_insert_locked(struct efx_nic *efx,
 	rc = efx_mcdi_filter_push(efx, spec, &table->entry[ins_index].handle,
 				  ctx, replacing);
 
-	if (rc == -EINVAL && nic_data->must_realloc_vis)
+	if (rc == -EINVAL && efx->must_realloc_vis)
 		/* The MC rebooted under us, causing it to reject our filter
 		 * insertion as pointing to an invalid VI (spec->dmaq_id).
 		 */
@@ -1355,6 +1354,16 @@ fail:
 	return rc;
 }
 
+void efx_mcdi_filter_table_reset_mc_allocations(struct efx_nic *efx)
+{
+	struct efx_mcdi_filter_table *table = efx->filter_state;
+
+	if (table) {
+		table->must_restore_filters = true;
+		table->must_restore_rss_contexts = true;
+	}
+}
+
 /*
  * Caller must hold efx->filter_sem for read if race against
  * efx_mcdi_filter_table_remove() is possible
@@ -1362,7 +1371,6 @@ fail:
 void efx_mcdi_filter_table_restore(struct efx_nic *efx)
 {
 	struct efx_mcdi_filter_table *table = efx->filter_state;
-	struct efx_ef10_nic_data *nic_data = efx->nic_data;
 	unsigned int invalid_filters = 0, failed = 0;
 	struct efx_mcdi_filter_vlan *vlan;
 	struct efx_filter_spec *spec;
@@ -1374,7 +1382,7 @@ void efx_mcdi_filter_table_restore(struct efx_nic *efx)
 
 	WARN_ON(!rwsem_is_locked(&efx->filter_sem));
 
-	if (!nic_data->must_restore_filters)
+	if (!table->must_restore_filters)
 		return;
 
 	if (!table)
@@ -1453,7 +1461,7 @@ not_restored:
 		netif_err(efx, hw, efx->net_dev,
 			  "unable to restore %u filters\n", failed);
 	else
-		nic_data->must_restore_filters = false;
+		table->must_restore_filters = false;
 }
 
 void efx_mcdi_filter_table_remove(struct efx_nic *efx)
@@ -2176,13 +2184,13 @@ int efx_mcdi_rx_pull_rss_config(struct efx_nic *efx)
 
 void efx_mcdi_rx_restore_rss_contexts(struct efx_nic *efx)
 {
-	struct efx_ef10_nic_data *nic_data = efx->nic_data;
+	struct efx_mcdi_filter_table *table = efx->filter_state;
 	struct efx_rss_context *ctx;
 	int rc;
 
 	WARN_ON(!mutex_is_locked(&efx->rss_lock));
 
-	if (!nic_data->must_restore_rss_contexts)
+	if (!table->must_restore_rss_contexts)
 		return;
 
 	list_for_each_entry(ctx, &efx->rss_context.list, list) {
@@ -2198,7 +2206,7 @@ void efx_mcdi_rx_restore_rss_contexts(struct efx_nic *efx)
 				   "; RSS filters may fail to be applied\n",
 				   ctx->user_id, rc);
 	}
-	nic_data->must_restore_rss_contexts = false;
+	table->must_restore_rss_contexts = false;
 }
 
 int efx_mcdi_pf_rx_push_rss_config(struct efx_nic *efx, bool user,
diff --git a/drivers/net/ethernet/sfc/mcdi_filters.h b/drivers/net/ethernet/sfc/mcdi_filters.h
index 1837f4f5d661..884ba9731131 100644
--- a/drivers/net/ethernet/sfc/mcdi_filters.h
+++ b/drivers/net/ethernet/sfc/mcdi_filters.h
@@ -75,6 +75,10 @@ struct efx_mcdi_filter_table {
 /* Whether in multicast promiscuous mode when last changed */
 	bool mc_promisc_last;
 	bool mc_overflow; /* Too many MC addrs; should always imply mc_promisc */
+	/* RSS contexts have yet to be restored after MC reboot */
+	bool must_restore_rss_contexts;
+	/* filters have yet to be restored after MC reboot */
+	bool must_restore_filters;
 	bool vlan_filter;
 	struct list_head vlan_list;
 };
@@ -83,6 +87,8 @@ int efx_mcdi_filter_table_probe(struct efx_nic *efx);
 void efx_mcdi_filter_table_remove(struct efx_nic *efx);
 void efx_mcdi_filter_table_restore(struct efx_nic *efx);
 
+void efx_mcdi_filter_table_reset_mc_allocations(struct efx_nic *efx);
+
 /*
  * The filter table(s) are managed by firmware and we have write-only
  * access.  When removing filters we must identify them to the
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index bdeea48ff938..ae9756811dfe 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -890,6 +890,7 @@ struct efx_async_filter_insertion {
  * @vport_id: The function's vport ID, only relevant for PFs
  * @int_error_count: Number of internal errors seen recently
  * @int_error_expire: Time at which error count will be expired
+ * @must_realloc_vis: Flag: VIs have yet to be reallocated after MC reboot
  * @irq_soft_enabled: Are IRQs soft-enabled? If not, IRQ handler will
  *	acknowledge but do nothing else.
  * @irq_status: Interrupt status buffer
@@ -1050,6 +1051,7 @@ struct efx_nic {
 	unsigned int_error_count;
 	unsigned long int_error_expire;
 
+	bool must_realloc_vis;
 	bool irq_soft_enabled;
 	struct efx_buffer irq_status;
 	unsigned irq_zero_count;
diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h
index 9e2e387a4b1c..46583ba8fa24 100644
--- a/drivers/net/ethernet/sfc/nic.h
+++ b/drivers/net/ethernet/sfc/nic.h
@@ -360,10 +360,6 @@ enum {
  * @warm_boot_count: Last seen MC warm boot count
  * @vi_base: Absolute index of first VI in this function
  * @n_allocated_vis: Number of VIs allocated to this function
- * @must_realloc_vis: Flag: VIs have yet to be reallocated after MC reboot
- * @must_restore_rss_contexts: Flag: RSS contexts have yet to be restored after
- *	MC reboot
- * @must_restore_filters: Flag: filters have yet to be restored after MC reboot
  * @n_piobufs: Number of PIO buffers allocated to this function
  * @wc_membase: Base address of write-combining mapping of the memory BAR
  * @pio_write_base: Base address for writing PIO buffers
@@ -403,9 +399,6 @@ struct efx_ef10_nic_data {
 	u16 warm_boot_count;
 	unsigned int vi_base;
 	unsigned int n_allocated_vis;
-	bool must_realloc_vis;
-	bool must_restore_rss_contexts;
-	bool must_restore_filters;
 	unsigned int n_piobufs;
 	void __iomem *wc_membase, *pio_write_base;
 	unsigned int pio_write_vi_base;
-- 
cgit v1.2.3-59-g8ed1b


From fd14e5fd136b1868d10f032a389a36603fb3a119 Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Mon, 11 May 2020 13:29:23 +0100
Subject: sfc: rework handling of (firmware) multicast chaining state

Store the mc_chaining bit in struct efx_mcdi_filter_table, so that common
 code in mcdi_filters.c doesn't need to get it from ef10-specific nic_data.
Also, probe the firmware workaround just before the call to
 efx_mcdi_filter_table_probe(), rather than in a random other part of the
 driver bringup, to ensure that (a) it gets probed in time and (b) it gets
 reprobed as necessary on resets, no matter how the surrounding code gets
 reorganised and reordered.

Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/ef10.c         | 141 +++++++++++++++++---------------
 drivers/net/ethernet/sfc/mcdi_filters.c |  15 ++--
 drivers/net/ethernet/sfc/mcdi_filters.h |   9 +-
 3 files changed, 90 insertions(+), 75 deletions(-)

diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index b33bd6b77501..0779dda7d29f 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -2388,6 +2388,76 @@ static void efx_ef10_tx_write(struct efx_tx_queue *tx_queue)
 	}
 }
 
+static int efx_ef10_probe_multicast_chaining(struct efx_nic *efx)
+{
+	struct efx_ef10_nic_data *nic_data = efx->nic_data;
+	unsigned int enabled, implemented;
+	bool want_workaround_26807;
+	int rc;
+
+	rc = efx_mcdi_get_workarounds(efx, &implemented, &enabled);
+	if (rc == -ENOSYS) {
+		/* GET_WORKAROUNDS was implemented before this workaround,
+		 * thus it must be unavailable in this firmware.
+		 */
+		nic_data->workaround_26807 = false;
+		return 0;
+	}
+	if (rc)
+		return rc;
+	want_workaround_26807 =
+		implemented & MC_CMD_GET_WORKAROUNDS_OUT_BUG26807;
+	nic_data->workaround_26807 =
+		!!(enabled & MC_CMD_GET_WORKAROUNDS_OUT_BUG26807);
+
+	if (want_workaround_26807 && !nic_data->workaround_26807) {
+		unsigned int flags;
+
+		rc = efx_mcdi_set_workaround(efx,
+					     MC_CMD_WORKAROUND_BUG26807,
+					     true, &flags);
+		if (!rc) {
+			if (flags &
+			    1 << MC_CMD_WORKAROUND_EXT_OUT_FLR_DONE_LBN) {
+				netif_info(efx, drv, efx->net_dev,
+					   "other functions on NIC have been reset\n");
+
+				/* With MCFW v4.6.x and earlier, the
+				 * boot count will have incremented,
+				 * so re-read the warm_boot_count
+				 * value now to ensure this function
+				 * doesn't think it has changed next
+				 * time it checks.
+				 */
+				rc = efx_ef10_get_warm_boot_count(efx);
+				if (rc >= 0) {
+					nic_data->warm_boot_count = rc;
+					rc = 0;
+				}
+			}
+			nic_data->workaround_26807 = true;
+		} else if (rc == -EPERM) {
+			rc = 0;
+		}
+	}
+	return rc;
+}
+
+static int efx_ef10_filter_table_probe(struct efx_nic *efx)
+{
+	struct efx_ef10_nic_data *nic_data = efx->nic_data;
+	int rc = efx_ef10_probe_multicast_chaining(efx);
+
+	if (rc)
+		return rc;
+	rc = efx_mcdi_filter_table_probe(efx, nic_data->workaround_26807);
+
+	if (rc)
+		return rc;
+
+	return 0;
+}
+
 /* This creates an entry in the RX descriptor queue */
 static inline void
 efx_ef10_build_rx_desc(struct efx_rx_queue *rx_queue, unsigned int index)
@@ -2463,75 +2533,14 @@ static int efx_ef10_ev_init(struct efx_channel *channel)
 {
 	struct efx_nic *efx = channel->efx;
 	struct efx_ef10_nic_data *nic_data;
-	unsigned int enabled, implemented;
 	bool use_v2, cut_thru;
-	int rc;
 
 	nic_data = efx->nic_data;
 	use_v2 = nic_data->datapath_caps2 &
 			    1 << MC_CMD_GET_CAPABILITIES_V2_OUT_INIT_EVQ_V2_LBN;
 	cut_thru = !(nic_data->datapath_caps &
 			      1 << MC_CMD_GET_CAPABILITIES_OUT_RX_BATCHING_LBN);
-	rc = efx_mcdi_ev_init(channel, cut_thru, use_v2);
-
-	/* IRQ return is ignored */
-	if (channel->channel || rc)
-		return rc;
-
-	/* Successfully created event queue on channel 0 */
-	rc = efx_mcdi_get_workarounds(efx, &implemented, &enabled);
-	if (rc == -ENOSYS) {
-		/* GET_WORKAROUNDS was implemented before this workaround,
-		 * thus it must be unavailable in this firmware.
-		 */
-		nic_data->workaround_26807 = false;
-		rc = 0;
-	} else if (rc) {
-		goto fail;
-	} else {
-		nic_data->workaround_26807 =
-			!!(enabled & MC_CMD_GET_WORKAROUNDS_OUT_BUG26807);
-
-		if (implemented & MC_CMD_GET_WORKAROUNDS_OUT_BUG26807 &&
-		    !nic_data->workaround_26807) {
-			unsigned int flags;
-
-			rc = efx_mcdi_set_workaround(efx,
-						     MC_CMD_WORKAROUND_BUG26807,
-						     true, &flags);
-
-			if (!rc) {
-				if (flags &
-				    1 << MC_CMD_WORKAROUND_EXT_OUT_FLR_DONE_LBN) {
-					netif_info(efx, drv, efx->net_dev,
-						   "other functions on NIC have been reset\n");
-
-					/* With MCFW v4.6.x and earlier, the
-					 * boot count will have incremented,
-					 * so re-read the warm_boot_count
-					 * value now to ensure this function
-					 * doesn't think it has changed next
-					 * time it checks.
-					 */
-					rc = efx_ef10_get_warm_boot_count(efx);
-					if (rc >= 0) {
-						nic_data->warm_boot_count = rc;
-						rc = 0;
-					}
-				}
-				nic_data->workaround_26807 = true;
-			} else if (rc == -EPERM) {
-				rc = 0;
-			}
-		}
-	}
-
-	if (!rc)
-		return 0;
-
-fail:
-	efx_mcdi_ev_fini(channel);
-	return rc;
+	return efx_mcdi_ev_init(channel, cut_thru, use_v2);
 }
 
 static void efx_ef10_handle_rx_wrong_queue(struct efx_rx_queue *rx_queue,
@@ -3185,7 +3194,7 @@ restore_vadaptor:
 		goto reset_nic;
 restore_filters:
 	down_write(&efx->filter_sem);
-	rc2 = efx_mcdi_filter_table_probe(efx);
+	rc2 = efx_ef10_filter_table_probe(efx);
 	up_write(&efx->filter_sem);
 	if (rc2)
 		goto reset_nic;
@@ -3227,7 +3236,7 @@ static int efx_ef10_set_mac_address(struct efx_nic *efx)
 	rc = efx_mcdi_rpc_quiet(efx, MC_CMD_VADAPTOR_SET_MAC, inbuf,
 				sizeof(inbuf), NULL, 0, NULL);
 
-	efx_mcdi_filter_table_probe(efx);
+	efx_ef10_filter_table_probe(efx);
 	up_write(&efx->filter_sem);
 	mutex_unlock(&efx->mac_lock);
 
@@ -4041,7 +4050,7 @@ const struct efx_nic_type efx_hunt_a0_vf_nic_type = {
 	.ev_process = efx_ef10_ev_process,
 	.ev_read_ack = efx_ef10_ev_read_ack,
 	.ev_test_generate = efx_ef10_ev_test_generate,
-	.filter_table_probe = efx_mcdi_filter_table_probe,
+	.filter_table_probe = efx_ef10_filter_table_probe,
 	.filter_table_restore = efx_mcdi_filter_table_restore,
 	.filter_table_remove = efx_mcdi_filter_table_remove,
 	.filter_update_rx_scatter = efx_mcdi_update_rx_scatter,
@@ -4154,7 +4163,7 @@ const struct efx_nic_type efx_hunt_a0_nic_type = {
 	.ev_process = efx_ef10_ev_process,
 	.ev_read_ack = efx_ef10_ev_read_ack,
 	.ev_test_generate = efx_ef10_ev_test_generate,
-	.filter_table_probe = efx_mcdi_filter_table_probe,
+	.filter_table_probe = efx_ef10_filter_table_probe,
 	.filter_table_restore = efx_mcdi_filter_table_restore,
 	.filter_table_remove = efx_mcdi_filter_table_remove,
 	.filter_update_rx_scatter = efx_mcdi_update_rx_scatter,
diff --git a/drivers/net/ethernet/sfc/mcdi_filters.c b/drivers/net/ethernet/sfc/mcdi_filters.c
index bb29fc0063bf..d3c2e6eb3191 100644
--- a/drivers/net/ethernet/sfc/mcdi_filters.c
+++ b/drivers/net/ethernet/sfc/mcdi_filters.c
@@ -811,7 +811,7 @@ static int efx_mcdi_filter_insert_def(struct efx_nic *efx,
 				      enum efx_encap_type encap_type,
 				      bool multicast, bool rollback)
 {
-	struct efx_ef10_nic_data *nic_data = efx->nic_data;
+	struct efx_mcdi_filter_table *table = efx->filter_state;
 	enum efx_filter_flags filter_flags;
 	struct efx_filter_spec spec;
 	u8 baddr[ETH_ALEN];
@@ -896,7 +896,7 @@ static int efx_mcdi_filter_insert_def(struct efx_nic *efx,
 
 		EFX_WARN_ON_PARANOID(*id != EFX_EF10_FILTER_ID_INVALID);
 		*id = efx_mcdi_filter_get_unsafe_id(rc);
-		if (!nic_data->workaround_26807 && !encap_type) {
+		if (!table->mc_chaining && !encap_type) {
 			/* Also need an Ethernet broadcast filter */
 			efx_filter_init_rx(&spec, EFX_FILTER_PRI_AUTO,
 					   filter_flags, 0);
@@ -962,7 +962,6 @@ static void efx_mcdi_filter_vlan_sync_rx_mode(struct efx_nic *efx,
 					      struct efx_mcdi_filter_vlan *vlan)
 {
 	struct efx_mcdi_filter_table *table = efx->filter_state;
-	struct efx_ef10_nic_data *nic_data = efx->nic_data;
 
 	/*
 	 * Do not install unspecified VID if VLAN filtering is enabled.
@@ -1009,11 +1008,10 @@ static void efx_mcdi_filter_vlan_sync_rx_mode(struct efx_nic *efx,
 	 * If changing promiscuous state with cascaded multicast filters, remove
 	 * old filters first, so that packets are dropped rather than duplicated
 	 */
-	if (nic_data->workaround_26807 &&
-	    table->mc_promisc_last != table->mc_promisc)
+	if (table->mc_chaining && table->mc_promisc_last != table->mc_promisc)
 		efx_mcdi_filter_remove_old(efx);
 	if (table->mc_promisc) {
-		if (nic_data->workaround_26807) {
+		if (table->mc_chaining) {
 			/*
 			 * If we failed to insert promiscuous filters, rollback
 			 * and fall back to individual multicast filters
@@ -1048,7 +1046,7 @@ static void efx_mcdi_filter_vlan_sync_rx_mode(struct efx_nic *efx,
 		 */
 		if (efx_mcdi_filter_insert_addr_list(efx, vlan, true, true)) {
 			/* Changing promisc state, so remove old filters */
-			if (nic_data->workaround_26807)
+			if (table->mc_chaining)
 				efx_mcdi_filter_remove_old(efx);
 			if (efx_mcdi_filter_insert_def(efx, vlan,
 						       EFX_ENCAP_TYPE_NONE,
@@ -1285,7 +1283,7 @@ efx_mcdi_filter_table_probe_matches(struct efx_nic *efx,
 	return 0;
 }
 
-int efx_mcdi_filter_table_probe(struct efx_nic *efx)
+int efx_mcdi_filter_table_probe(struct efx_nic *efx, bool multicast_chaining)
 {
 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
 	struct net_device *net_dev = efx->net_dev;
@@ -1303,6 +1301,7 @@ int efx_mcdi_filter_table_probe(struct efx_nic *efx)
 	if (!table)
 		return -ENOMEM;
 
+	table->mc_chaining = multicast_chaining;
 	table->rx_match_count = 0;
 	rc = efx_mcdi_filter_table_probe_matches(efx, table, false);
 	if (rc)
diff --git a/drivers/net/ethernet/sfc/mcdi_filters.h b/drivers/net/ethernet/sfc/mcdi_filters.h
index 884ba9731131..15b5d62e3670 100644
--- a/drivers/net/ethernet/sfc/mcdi_filters.h
+++ b/drivers/net/ethernet/sfc/mcdi_filters.h
@@ -79,11 +79,18 @@ struct efx_mcdi_filter_table {
 	bool must_restore_rss_contexts;
 	/* filters have yet to be restored after MC reboot */
 	bool must_restore_filters;
+	/* Multicast filter chaining allows less-specific filters to receive
+	 * multicast packets that matched more-specific filters.  Early EF10
+	 * firmware didn't support this (SF bug 26807); if mc_chaining == false
+	 * then we still subscribe the dev_mc_list even when mc_promisc to
+	 * prevent another VI stealing the traffic.
+	 */
+	bool mc_chaining;
 	bool vlan_filter;
 	struct list_head vlan_list;
 };
 
-int efx_mcdi_filter_table_probe(struct efx_nic *efx);
+int efx_mcdi_filter_table_probe(struct efx_nic *efx, bool multicast_chaining);
 void efx_mcdi_filter_table_remove(struct efx_nic *efx);
 void efx_mcdi_filter_table_restore(struct efx_nic *efx);
 
-- 
cgit v1.2.3-59-g8ed1b


From dbf2c669062c1247345c0a3c0c3112e386ffe3f0 Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Mon, 11 May 2020 13:29:34 +0100
Subject: sfc: move rx_rss_context_exclusive into struct efx_mcdi_filter_table

It's both set and used solely by mcdi_filters.c, so there's no reason
 for it to be in ef10-specific nic_data.

Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/mcdi_filters.c | 10 +++++-----
 drivers/net/ethernet/sfc/mcdi_filters.h |  2 ++
 drivers/net/ethernet/sfc/nic.h          |  2 --
 3 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/sfc/mcdi_filters.c b/drivers/net/ethernet/sfc/mcdi_filters.c
index d3c2e6eb3191..e99b3149c4ae 100644
--- a/drivers/net/ethernet/sfc/mcdi_filters.c
+++ b/drivers/net/ethernet/sfc/mcdi_filters.c
@@ -2031,14 +2031,14 @@ void efx_mcdi_rx_free_indir_table(struct efx_nic *efx)
 static int efx_mcdi_filter_rx_push_shared_rss_config(struct efx_nic *efx,
 					      unsigned *context_size)
 {
-	struct efx_ef10_nic_data *nic_data = efx->nic_data;
+	struct efx_mcdi_filter_table *table = efx->filter_state;
 	int rc = efx_mcdi_filter_alloc_rss_context(efx, false, &efx->rss_context,
 					    context_size);
 
 	if (rc != 0)
 		return rc;
 
-	nic_data->rx_rss_context_exclusive = false;
+	table->rx_rss_context_exclusive = false;
 	efx_set_default_rx_indir_table(efx, &efx->rss_context);
 	return 0;
 }
@@ -2047,12 +2047,12 @@ static int efx_mcdi_filter_rx_push_exclusive_rss_config(struct efx_nic *efx,
 						 const u32 *rx_indir_table,
 						 const u8 *key)
 {
+	struct efx_mcdi_filter_table *table = efx->filter_state;
 	u32 old_rx_rss_context = efx->rss_context.context_id;
-	struct efx_ef10_nic_data *nic_data = efx->nic_data;
 	int rc;
 
 	if (efx->rss_context.context_id == EFX_MCDI_RSS_CONTEXT_INVALID ||
-	    !nic_data->rx_rss_context_exclusive) {
+	    !table->rx_rss_context_exclusive) {
 		rc = efx_mcdi_filter_alloc_rss_context(efx, true, &efx->rss_context,
 						NULL);
 		if (rc == -EOPNOTSUPP)
@@ -2069,7 +2069,7 @@ static int efx_mcdi_filter_rx_push_exclusive_rss_config(struct efx_nic *efx,
 	if (efx->rss_context.context_id != old_rx_rss_context &&
 	    old_rx_rss_context != EFX_MCDI_RSS_CONTEXT_INVALID)
 		WARN_ON(efx_mcdi_filter_free_rss_context(efx, old_rx_rss_context) != 0);
-	nic_data->rx_rss_context_exclusive = true;
+	table->rx_rss_context_exclusive = true;
 	if (rx_indir_table != efx->rss_context.rx_indir_table)
 		memcpy(efx->rss_context.rx_indir_table, rx_indir_table,
 		       sizeof(efx->rss_context.rx_indir_table));
diff --git a/drivers/net/ethernet/sfc/mcdi_filters.h b/drivers/net/ethernet/sfc/mcdi_filters.h
index 15b5d62e3670..03a8bf74c733 100644
--- a/drivers/net/ethernet/sfc/mcdi_filters.h
+++ b/drivers/net/ethernet/sfc/mcdi_filters.h
@@ -55,6 +55,8 @@ struct efx_mcdi_filter_table {
 	u32 rx_match_mcdi_flags[
 		MC_CMD_GET_PARSER_DISP_INFO_OUT_SUPPORTED_MATCHES_MAXNUM * 2];
 	unsigned int rx_match_count;
+	/* Our RSS context is exclusive (as opposed to shared) */
+	bool rx_rss_context_exclusive;
 
 	struct rw_semaphore lock; /* Protects entries */
 	struct {
diff --git a/drivers/net/ethernet/sfc/nic.h b/drivers/net/ethernet/sfc/nic.h
index 46583ba8fa24..8f73c5d996eb 100644
--- a/drivers/net/ethernet/sfc/nic.h
+++ b/drivers/net/ethernet/sfc/nic.h
@@ -368,7 +368,6 @@ enum {
  * @piobuf_size: size of a single PIO buffer
  * @must_restore_piobufs: Flag: PIO buffers have yet to be restored after MC
  *	reboot
- * @rx_rss_context_exclusive: Whether our RSS context is exclusive or shared
  * @stats: Hardware statistics
  * @workaround_35388: Flag: firmware supports workaround for bug 35388
  * @workaround_26807: Flag: firmware supports workaround for bug 26807
@@ -405,7 +404,6 @@ struct efx_ef10_nic_data {
 	unsigned int piobuf_handle[EF10_TX_PIOBUF_COUNT];
 	u16 piobuf_size;
 	bool must_restore_piobufs;
-	bool rx_rss_context_exclusive;
 	u64 stats[EF10_STAT_COUNT];
 	bool workaround_35388;
 	bool workaround_26807;
-- 
cgit v1.2.3-59-g8ed1b


From ed02112cff9212232cc42f871cbe84c2c4c81850 Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Mon, 11 May 2020 13:29:45 +0100
Subject: sfc: make filter table probe caller responsible for adding VLANs

By making the caller of efx_mcdi_filter_table_probe() loop over the
 vlan_list calling efx_mcdi_filter_add_vlan(), instead of doing it in
 efx_mcdi_filter_table_probe(), the latter avoids looking in ef10-
 specific nic_data.

Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/ef10.c         | 10 ++++++++++
 drivers/net/ethernet/sfc/mcdi_filters.c | 12 ------------
 2 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index 0779dda7d29f..d7d2edc4d81a 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -2447,6 +2447,7 @@ static int efx_ef10_filter_table_probe(struct efx_nic *efx)
 {
 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
 	int rc = efx_ef10_probe_multicast_chaining(efx);
+	struct efx_mcdi_filter_vlan *vlan;
 
 	if (rc)
 		return rc;
@@ -2455,7 +2456,16 @@ static int efx_ef10_filter_table_probe(struct efx_nic *efx)
 	if (rc)
 		return rc;
 
+	list_for_each_entry(vlan, &nic_data->vlan_list, list) {
+		rc = efx_mcdi_filter_add_vlan(efx, vlan->vid);
+		if (rc)
+			goto fail_add_vlan;
+	}
 	return 0;
+
+fail_add_vlan:
+	efx_mcdi_filter_table_remove(efx);
+	return rc;
 }
 
 /* This creates an entry in the RX descriptor queue */
diff --git a/drivers/net/ethernet/sfc/mcdi_filters.c b/drivers/net/ethernet/sfc/mcdi_filters.c
index e99b3149c4ae..88de95a8c08c 100644
--- a/drivers/net/ethernet/sfc/mcdi_filters.c
+++ b/drivers/net/ethernet/sfc/mcdi_filters.c
@@ -1285,10 +1285,8 @@ efx_mcdi_filter_table_probe_matches(struct efx_nic *efx,
 
 int efx_mcdi_filter_table_probe(struct efx_nic *efx, bool multicast_chaining)
 {
-	struct efx_ef10_nic_data *nic_data = efx->nic_data;
 	struct net_device *net_dev = efx->net_dev;
 	struct efx_mcdi_filter_table *table;
-	struct efx_mcdi_filter_vlan *vlan;
 	int rc;
 
 	if (!efx_rwsem_assert_write_locked(&efx->filter_sem))
@@ -1337,17 +1335,7 @@ int efx_mcdi_filter_table_probe(struct efx_nic *efx, bool multicast_chaining)
 
 	efx->filter_state = table;
 
-	list_for_each_entry(vlan, &nic_data->vlan_list, list) {
-		rc = efx_mcdi_filter_add_vlan(efx, vlan->vid);
-		if (rc)
-			goto fail_add_vlan;
-	}
-
 	return 0;
-
-fail_add_vlan:
-	efx_mcdi_filter_cleanup_vlans(efx);
-	efx->filter_state = NULL;
 fail:
 	kfree(table);
 	return rc;
-- 
cgit v1.2.3-59-g8ed1b


From 9b46132cff75653f02241640b44b31c29499afc1 Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Mon, 11 May 2020 13:30:00 +0100
Subject: sfc: make firmware-variant printing a nic_type function

Instead of having efx_mcdi_print_fwver() look at efx_nic_rev and
 conditionally poke around inside ef10-specific nic_data, add a new
 efx->type->print_additional_fwver() method to do this work.

Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/ef10.c       | 15 +++++++++++++++
 drivers/net/ethernet/sfc/mcdi.c       | 25 +++++++++----------------
 drivers/net/ethernet/sfc/net_driver.h |  3 +++
 3 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index d7d2edc4d81a..e634e8110585 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -3978,6 +3978,19 @@ out_unlock:
 	return rc;
 }
 
+/* EF10 may have multiple datapath firmware variants within a
+ * single version.  Report which variants are running.
+ */
+static size_t efx_ef10_print_additional_fwver(struct efx_nic *efx, char *buf,
+					      size_t len)
+{
+	struct efx_ef10_nic_data *nic_data = efx->nic_data;
+
+	return scnprintf(buf, len, " rx%x tx%x",
+			 nic_data->rx_dpcpu_fw_id,
+			 nic_data->tx_dpcpu_fw_id);
+}
+
 static unsigned int ef10_check_caps(const struct efx_nic *efx,
 				    u8 flag,
 				    u32 offset)
@@ -4107,6 +4120,7 @@ const struct efx_nic_type efx_hunt_a0_vf_nic_type = {
 			    1 << HWTSTAMP_FILTER_ALL,
 	.rx_hash_key_size = 40,
 	.check_caps = ef10_check_caps,
+	.print_additional_fwver = efx_ef10_print_additional_fwver,
 };
 
 const struct efx_nic_type efx_hunt_a0_nic_type = {
@@ -4243,4 +4257,5 @@ const struct efx_nic_type efx_hunt_a0_nic_type = {
 			    1 << HWTSTAMP_FILTER_ALL,
 	.rx_hash_key_size = 40,
 	.check_caps = ef10_check_caps,
+	.print_additional_fwver = efx_ef10_print_additional_fwver,
 };
diff --git a/drivers/net/ethernet/sfc/mcdi.c b/drivers/net/ethernet/sfc/mcdi.c
index 15c731d04065..a8cc3881edce 100644
--- a/drivers/net/ethernet/sfc/mcdi.c
+++ b/drivers/net/ethernet/sfc/mcdi.c
@@ -1425,23 +1425,16 @@ void efx_mcdi_print_fwver(struct efx_nic *efx, char *buf, size_t len)
 			   le16_to_cpu(ver_words[2]),
 			   le16_to_cpu(ver_words[3]));
 
-	/* EF10 may have multiple datapath firmware variants within a
-	 * single version.  Report which variants are running.
-	 */
-	if (efx_nic_rev(efx) >= EFX_REV_HUNT_A0) {
-		struct efx_ef10_nic_data *nic_data = efx->nic_data;
-
-		offset += scnprintf(buf + offset, len - offset, " rx%x tx%x",
-				    nic_data->rx_dpcpu_fw_id,
-				    nic_data->tx_dpcpu_fw_id);
+	if (efx->type->print_additional_fwver)
+		offset += efx->type->print_additional_fwver(efx, buf + offset,
+							    len - offset);
 
-		/* It's theoretically possible for the string to exceed 31
-		 * characters, though in practice the first three version
-		 * components are short enough that this doesn't happen.
-		 */
-		if (WARN_ON(offset >= len))
-			buf[0] = 0;
-	}
+	/* It's theoretically possible for the string to exceed 31
+	 * characters, though in practice the first three version
+	 * components are short enough that this doesn't happen.
+	 */
+	if (WARN_ON(offset >= len))
+		buf[0] = 0;
 
 	return;
 
diff --git a/drivers/net/ethernet/sfc/net_driver.h b/drivers/net/ethernet/sfc/net_driver.h
index ae9756811dfe..1afb58feb9ab 100644
--- a/drivers/net/ethernet/sfc/net_driver.h
+++ b/drivers/net/ethernet/sfc/net_driver.h
@@ -1296,6 +1296,7 @@ struct efx_udp_tunnel {
  * @udp_tnl_add_port: Add a UDP tunnel port
  * @udp_tnl_has_port: Check if a port has been added as UDP tunnel
  * @udp_tnl_del_port: Remove a UDP tunnel port
+ * @print_additional_fwver: Dump NIC-specific additional FW version info
  * @revision: Hardware architecture revision
  * @txd_ptr_tbl_base: TX descriptor ring base address
  * @rxd_ptr_tbl_base: RX descriptor ring base address
@@ -1469,6 +1470,8 @@ struct efx_nic_type {
 	int (*udp_tnl_add_port)(struct efx_nic *efx, struct efx_udp_tunnel tnl);
 	bool (*udp_tnl_has_port)(struct efx_nic *efx, __be16 port);
 	int (*udp_tnl_del_port)(struct efx_nic *efx, struct efx_udp_tunnel tnl);
+	size_t (*print_additional_fwver)(struct efx_nic *efx, char *buf,
+					 size_t len);
 
 	int revision;
 	unsigned int txd_ptr_tbl_base;
-- 
cgit v1.2.3-59-g8ed1b


From 2fa3888bb7a9fb3966c77555c8cd6f4bd6a1439a Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Mon, 11 May 2020 16:47:14 -0700
Subject: net: dsa: ocelot: Constify dsa_device_ops

ocelot_netdev_ops should be const since that is what the DSA layer
expects.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/tag_ocelot.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/dsa/tag_ocelot.c b/net/dsa/tag_ocelot.c
index 59de1315100f..b0c98ee4e13b 100644
--- a/net/dsa/tag_ocelot.c
+++ b/net/dsa/tag_ocelot.c
@@ -228,7 +228,7 @@ static struct sk_buff *ocelot_rcv(struct sk_buff *skb,
 	return skb;
 }
 
-static struct dsa_device_ops ocelot_netdev_ops = {
+static const struct dsa_device_ops ocelot_netdev_ops = {
 	.name			= "ocelot",
 	.proto			= DSA_TAG_PROTO_OCELOT,
 	.xmit			= ocelot_xmit,
-- 
cgit v1.2.3-59-g8ed1b


From 097f024454fca0e13bbba0ab54dfe63ac5610953 Mon Sep 17 00:00:00 2001
From: Florian Fainelli <f.fainelli@gmail.com>
Date: Mon, 11 May 2020 16:47:15 -0700
Subject: net: dsa: tag_sja1105: Constify dsa_device_ops

sja1105_netdev_ops should be const since that is what the DSA layer
expects.

Signed-off-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/tag_sja1105.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
index d553bf36bd41..5ecac5921a7d 100644
--- a/net/dsa/tag_sja1105.c
+++ b/net/dsa/tag_sja1105.c
@@ -304,7 +304,7 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
 					      is_meta);
 }
 
-static struct dsa_device_ops sja1105_netdev_ops = {
+static const struct dsa_device_ops sja1105_netdev_ops = {
 	.name = "sja1105",
 	.proto = DSA_TAG_PROTO_SJA1105,
 	.xmit = sja1105_xmit,
-- 
cgit v1.2.3-59-g8ed1b


From 0462b6bdb6445b887b8896f28be92e0d94c92e7b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 11 May 2020 13:59:11 +0200
Subject: net: add a CMSG_USER_DATA macro

Add a variant of CMSG_DATA that operates on user pointer to avoid
sparse warnings about casting to/from user pointers.  Also fix up
CMSG_DATA to rely on the gcc extension that allows void pointer
arithmetics to cut down on the amount of casts.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/socket.h | 5 ++++-
 net/core/scm.c         | 4 ++--
 2 files changed, 6 insertions(+), 3 deletions(-)

diff --git a/include/linux/socket.h b/include/linux/socket.h
index 54338fac45cb..4cc64d611cf4 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -94,7 +94,10 @@ struct cmsghdr {
 
 #define CMSG_ALIGN(len) ( ((len)+sizeof(long)-1) & ~(sizeof(long)-1) )
 
-#define CMSG_DATA(cmsg)	((void *)((char *)(cmsg) + sizeof(struct cmsghdr)))
+#define CMSG_DATA(cmsg) \
+	((void *)(cmsg) + sizeof(struct cmsghdr))
+#define CMSG_USER_DATA(cmsg) \
+	((void __user *)(cmsg) + sizeof(struct cmsghdr))
 #define CMSG_SPACE(len) (sizeof(struct cmsghdr) + CMSG_ALIGN(len))
 #define CMSG_LEN(len) (sizeof(struct cmsghdr) + (len))
 
diff --git a/net/core/scm.c b/net/core/scm.c
index dc6fed1f221c..abfdc85a64c1 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -236,7 +236,7 @@ int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data)
 	err = -EFAULT;
 	if (copy_to_user(cm, &cmhdr, sizeof cmhdr))
 		goto out;
-	if (copy_to_user(CMSG_DATA(cm), data, cmlen - sizeof(struct cmsghdr)))
+	if (copy_to_user(CMSG_USER_DATA(cm), data, cmlen - sizeof(*cm)))
 		goto out;
 	cmlen = CMSG_SPACE(len);
 	if (msg->msg_controllen < cmlen)
@@ -300,7 +300,7 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
 	if (fdnum < fdmax)
 		fdmax = fdnum;
 
-	for (i=0, cmfptr=(__force int __user *)CMSG_DATA(cm); i<fdmax;
+	for (i=0, cmfptr =(int __user *)CMSG_USER_DATA(cm); i<fdmax;
 	     i++, cmfptr++)
 	{
 		struct socket *sock;
-- 
cgit v1.2.3-59-g8ed1b


From 2618d530dd8b7ac0fdcb83f4c95b88f7b0d37ce6 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 11 May 2020 13:59:12 +0200
Subject: net/scm: cleanup scm_detach_fds

Factor out two helpes to keep the code tidy.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/scm.c | 94 +++++++++++++++++++++++++++++++---------------------------
 1 file changed, 51 insertions(+), 43 deletions(-)

diff --git a/net/core/scm.c b/net/core/scm.c
index abfdc85a64c1..168b006a52ff 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -277,78 +277,86 @@ void put_cmsg_scm_timestamping(struct msghdr *msg, struct scm_timestamping_inter
 }
 EXPORT_SYMBOL(put_cmsg_scm_timestamping);
 
+static int __scm_install_fd(struct file *file, int __user *ufd, int o_flags)
+{
+	struct socket *sock;
+	int new_fd;
+	int error;
+
+	error = security_file_receive(file);
+	if (error)
+		return error;
+
+	new_fd = get_unused_fd_flags(o_flags);
+	if (new_fd < 0)
+		return new_fd;
+
+	error = put_user(new_fd, ufd);
+	if (error) {
+		put_unused_fd(new_fd);
+		return error;
+	}
+
+	/* Bump the usage count and install the file. */
+	sock = sock_from_file(file, &error);
+	if (sock) {
+		sock_update_netprioidx(&sock->sk->sk_cgrp_data);
+		sock_update_classid(&sock->sk->sk_cgrp_data);
+	}
+	fd_install(new_fd, get_file(file));
+	return error;
+}
+
+static int scm_max_fds(struct msghdr *msg)
+{
+	if (msg->msg_controllen <= sizeof(struct cmsghdr))
+		return 0;
+	return (msg->msg_controllen - sizeof(struct cmsghdr)) / sizeof(int);
+}
+
 void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
 {
 	struct cmsghdr __user *cm
 		= (__force struct cmsghdr __user*)msg->msg_control;
-
-	int fdmax = 0;
-	int fdnum = scm->fp->count;
-	struct file **fp = scm->fp->fp;
-	int __user *cmfptr;
+	int o_flags = (msg->msg_flags & MSG_CMSG_CLOEXEC) ? O_CLOEXEC : 0;
+	int fdmax = min_t(int, scm_max_fds(msg), scm->fp->count);
+	int __user *cmsg_data = CMSG_USER_DATA(cm);
 	int err = 0, i;
 
-	if (MSG_CMSG_COMPAT & msg->msg_flags) {
+	if (msg->msg_flags & MSG_CMSG_COMPAT) {
 		scm_detach_fds_compat(msg, scm);
 		return;
 	}
 
-	if (msg->msg_controllen > sizeof(struct cmsghdr))
-		fdmax = ((msg->msg_controllen - sizeof(struct cmsghdr))
-			 / sizeof(int));
-
-	if (fdnum < fdmax)
-		fdmax = fdnum;
-
-	for (i=0, cmfptr =(int __user *)CMSG_USER_DATA(cm); i<fdmax;
-	     i++, cmfptr++)
-	{
-		struct socket *sock;
-		int new_fd;
-		err = security_file_receive(fp[i]);
+	for (i = 0; i < fdmax; i++) {
+		err = __scm_install_fd(scm->fp->fp[i], cmsg_data + i, o_flags);
 		if (err)
 			break;
-		err = get_unused_fd_flags(MSG_CMSG_CLOEXEC & msg->msg_flags
-					  ? O_CLOEXEC : 0);
-		if (err < 0)
-			break;
-		new_fd = err;
-		err = put_user(new_fd, cmfptr);
-		if (err) {
-			put_unused_fd(new_fd);
-			break;
-		}
-		/* Bump the usage count and install the file. */
-		sock = sock_from_file(fp[i], &err);
-		if (sock) {
-			sock_update_netprioidx(&sock->sk->sk_cgrp_data);
-			sock_update_classid(&sock->sk->sk_cgrp_data);
-		}
-		fd_install(new_fd, get_file(fp[i]));
 	}
 
-	if (i > 0)
-	{
-		int cmlen = CMSG_LEN(i*sizeof(int));
+	if (i > 0)  {
+		int cmlen = CMSG_LEN(i * sizeof(int));
+
 		err = put_user(SOL_SOCKET, &cm->cmsg_level);
 		if (!err)
 			err = put_user(SCM_RIGHTS, &cm->cmsg_type);
 		if (!err)
 			err = put_user(cmlen, &cm->cmsg_len);
 		if (!err) {
-			cmlen = CMSG_SPACE(i*sizeof(int));
+			cmlen = CMSG_SPACE(i * sizeof(int));
 			if (msg->msg_controllen < cmlen)
 				cmlen = msg->msg_controllen;
 			msg->msg_control += cmlen;
 			msg->msg_controllen -= cmlen;
 		}
 	}
-	if (i < fdnum || (fdnum && fdmax <= 0))
+
+	if (i < scm->fp->count || (scm->fp->count && fdmax <= 0))
 		msg->msg_flags |= MSG_CTRUNC;
 
 	/*
-	 * All of the files that fit in the message have had their
-	 * usage counts incremented, so we just free the list.
+	 * All of the files that fit in the message have had their usage counts
+	 * incremented, so we just free the list.
 	 */
 	__scm_destroy(scm);
 }
-- 
cgit v1.2.3-59-g8ed1b


From 1f466e1f15cf1dac7c86798d694649fc42cd868a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 11 May 2020 13:59:13 +0200
Subject: net: cleanly handle kernel vs user buffers for ->msg_control

The msg_control field in struct msghdr can either contain a user
pointer when used with the recvmsg system call, or a kernel pointer
when used with sendmsg.  To complicate things further kernel_recvmsg
can stuff a kernel pointer in and then use set_fs to make the uaccess
helpers accept it.

Replace it with a union of a kernel pointer msg_control field, and
a user pointer msg_control_user one, and allow kernel_recvmsg operate
on a proper kernel pointer using a bitfield to override the normal
choice of a user pointer for recvmsg.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/socket.h | 12 +++++++++++-
 net/compat.c           |  5 +++--
 net/core/scm.c         | 49 ++++++++++++++++++++++++++++---------------------
 net/ipv4/ip_sockglue.c |  3 ++-
 net/socket.c           | 22 ++++++----------------
 5 files changed, 50 insertions(+), 41 deletions(-)

diff --git a/include/linux/socket.h b/include/linux/socket.h
index 4cc64d611cf4..04d2bc97f497 100644
--- a/include/linux/socket.h
+++ b/include/linux/socket.h
@@ -50,7 +50,17 @@ struct msghdr {
 	void		*msg_name;	/* ptr to socket address structure */
 	int		msg_namelen;	/* size of socket address structure */
 	struct iov_iter	msg_iter;	/* data */
-	void		*msg_control;	/* ancillary data */
+
+	/*
+	 * Ancillary data. msg_control_user is the user buffer used for the
+	 * recv* side when msg_control_is_user is set, msg_control is the kernel
+	 * buffer used for all other cases.
+	 */
+	union {
+		void		*msg_control;
+		void __user	*msg_control_user;
+	};
+	bool		msg_control_is_user : 1;
 	__kernel_size_t	msg_controllen;	/* ancillary data buffer length */
 	unsigned int	msg_flags;	/* flags on received message */
 	struct kiocb	*msg_iocb;	/* ptr to iocb for async requests */
diff --git a/net/compat.c b/net/compat.c
index 4bed96e84d9a..69fc6d1e4e6e 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -56,7 +56,8 @@ int __get_compat_msghdr(struct msghdr *kmsg,
 	if (kmsg->msg_namelen > sizeof(struct sockaddr_storage))
 		kmsg->msg_namelen = sizeof(struct sockaddr_storage);
 
-	kmsg->msg_control = compat_ptr(msg.msg_control);
+	kmsg->msg_control_is_user = true;
+	kmsg->msg_control_user = compat_ptr(msg.msg_control);
 	kmsg->msg_controllen = msg.msg_controllen;
 
 	if (save_addr)
@@ -121,7 +122,7 @@ int get_compat_msghdr(struct msghdr *kmsg,
 	((ucmlen) >= sizeof(struct compat_cmsghdr) && \
 	 (ucmlen) <= (unsigned long) \
 	 ((mhdr)->msg_controllen - \
-	  ((char *)(ucmsg) - (char *)(mhdr)->msg_control)))
+	  ((char __user *)(ucmsg) - (char __user *)(mhdr)->msg_control_user)))
 
 static inline struct compat_cmsghdr __user *cmsg_compat_nxthdr(struct msghdr *msg,
 		struct compat_cmsghdr __user *cmsg, int cmsg_len)
diff --git a/net/core/scm.c b/net/core/scm.c
index 168b006a52ff..a75cd637a71f 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -212,16 +212,12 @@ EXPORT_SYMBOL(__scm_send);
 
 int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data)
 {
-	struct cmsghdr __user *cm
-		= (__force struct cmsghdr __user *)msg->msg_control;
-	struct cmsghdr cmhdr;
 	int cmlen = CMSG_LEN(len);
-	int err;
 
-	if (MSG_CMSG_COMPAT & msg->msg_flags)
+	if (msg->msg_flags & MSG_CMSG_COMPAT)
 		return put_cmsg_compat(msg, level, type, len, data);
 
-	if (cm==NULL || msg->msg_controllen < sizeof(*cm)) {
+	if (!msg->msg_control || msg->msg_controllen < sizeof(struct cmsghdr)) {
 		msg->msg_flags |= MSG_CTRUNC;
 		return 0; /* XXX: return error? check spec. */
 	}
@@ -229,23 +225,30 @@ int put_cmsg(struct msghdr * msg, int level, int type, int len, void *data)
 		msg->msg_flags |= MSG_CTRUNC;
 		cmlen = msg->msg_controllen;
 	}
-	cmhdr.cmsg_level = level;
-	cmhdr.cmsg_type = type;
-	cmhdr.cmsg_len = cmlen;
-
-	err = -EFAULT;
-	if (copy_to_user(cm, &cmhdr, sizeof cmhdr))
-		goto out;
-	if (copy_to_user(CMSG_USER_DATA(cm), data, cmlen - sizeof(*cm)))
-		goto out;
-	cmlen = CMSG_SPACE(len);
-	if (msg->msg_controllen < cmlen)
-		cmlen = msg->msg_controllen;
+
+	if (msg->msg_control_is_user) {
+		struct cmsghdr __user *cm = msg->msg_control_user;
+		struct cmsghdr cmhdr;
+
+		cmhdr.cmsg_level = level;
+		cmhdr.cmsg_type = type;
+		cmhdr.cmsg_len = cmlen;
+		if (copy_to_user(cm, &cmhdr, sizeof cmhdr) ||
+		    copy_to_user(CMSG_USER_DATA(cm), data, cmlen - sizeof(*cm)))
+			return -EFAULT;
+	} else {
+		struct cmsghdr *cm = msg->msg_control;
+
+		cm->cmsg_level = level;
+		cm->cmsg_type = type;
+		cm->cmsg_len = cmlen;
+		memcpy(CMSG_DATA(cm), data, cmlen - sizeof(*cm));
+	}
+
+	cmlen = min(CMSG_SPACE(len), msg->msg_controllen);
 	msg->msg_control += cmlen;
 	msg->msg_controllen -= cmlen;
-	err = 0;
-out:
-	return err;
+	return 0;
 }
 EXPORT_SYMBOL(put_cmsg);
 
@@ -328,6 +331,10 @@ void scm_detach_fds(struct msghdr *msg, struct scm_cookie *scm)
 		return;
 	}
 
+	/* no use for FD passing from kernel space callers */
+	if (WARN_ON_ONCE(!msg->msg_control_is_user))
+		return;
+
 	for (i = 0; i < fdmax; i++) {
 		err = __scm_install_fd(scm->fp->fp[i], cmsg_data + i, o_flags);
 		if (err)
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index aa3fd61818c4..8206047d70b6 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1492,7 +1492,8 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
 		if (sk->sk_type != SOCK_STREAM)
 			return -ENOPROTOOPT;
 
-		msg.msg_control = (__force void *) optval;
+		msg.msg_control_is_user = true;
+		msg.msg_control_user = optval;
 		msg.msg_controllen = len;
 		msg.msg_flags = flags;
 
diff --git a/net/socket.c b/net/socket.c
index 2dd739fba866..1c9a7260a41d 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -924,14 +924,9 @@ EXPORT_SYMBOL(sock_recvmsg);
 int kernel_recvmsg(struct socket *sock, struct msghdr *msg,
 		   struct kvec *vec, size_t num, size_t size, int flags)
 {
-	mm_segment_t oldfs = get_fs();
-	int result;
-
+	msg->msg_control_is_user = false;
 	iov_iter_kvec(&msg->msg_iter, READ, vec, num, size);
-	set_fs(KERNEL_DS);
-	result = sock_recvmsg(sock, msg, flags);
-	set_fs(oldfs);
-	return result;
+	return sock_recvmsg(sock, msg, flags);
 }
 EXPORT_SYMBOL(kernel_recvmsg);
 
@@ -2239,7 +2234,8 @@ int __copy_msghdr_from_user(struct msghdr *kmsg,
 	if (copy_from_user(&msg, umsg, sizeof(*umsg)))
 		return -EFAULT;
 
-	kmsg->msg_control = (void __force *)msg.msg_control;
+	kmsg->msg_control_is_user = true;
+	kmsg->msg_control_user = msg.msg_control;
 	kmsg->msg_controllen = msg.msg_controllen;
 	kmsg->msg_flags = msg.msg_flags;
 
@@ -2331,16 +2327,10 @@ static int ____sys_sendmsg(struct socket *sock, struct msghdr *msg_sys,
 				goto out;
 		}
 		err = -EFAULT;
-		/*
-		 * Careful! Before this, msg_sys->msg_control contains a user pointer.
-		 * Afterwards, it will be a kernel pointer. Thus the compiler-assisted
-		 * checking falls down on this.
-		 */
-		if (copy_from_user(ctl_buf,
-				   (void __user __force *)msg_sys->msg_control,
-				   ctl_len))
+		if (copy_from_user(ctl_buf, msg_sys->msg_control_user, ctl_len))
 			goto out_freectl;
 		msg_sys->msg_control = ctl_buf;
+		msg_sys->msg_control_is_user = false;
 	}
 	msg_sys->msg_flags = flags;
 
-- 
cgit v1.2.3-59-g8ed1b


From 6b9ea5ff5abdcda9d1291d9b8bbad44c93c7ccef Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Mon, 11 May 2020 10:08:07 -0700
Subject: checkpatch: warn about uses of ENOTSUPP

ENOTSUPP often feels like the right error code to use, but it's
in fact not a standard Unix error. E.g.:

$ python
>>> import errno
>>> errno.errorcode[errno.ENOTSUPP]
Traceback (most recent call last):
  File "<stdin>", line 1, in <module>
AttributeError: module 'errno' has no attribute 'ENOTSUPP'

There were numerous commits converting the uses back to EOPNOTSUPP
but in some cases we are stuck with the high error code for backward
compatibility reasons.

Let's try prevent more ENOTSUPPs from getting into the kernel.

Recent example:
https://lore.kernel.org/netdev/20200510182252.GA411829@lunn.ch/

v3 (Joe):
 - fix the "not file" condition.

v2 (Joe):
 - add a link to recent discussion,
 - don't match when scanning files, not patches to avoid sudden
   influx of conversion patches.
https://lore.kernel.org/netdev/20200511165319.2251678-1-kuba@kernel.org/

v1:
https://lore.kernel.org/netdev/20200510185148.2230767-1-kuba@kernel.org/

Suggested-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Acked-by: Joe Perches <joe@perches.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 scripts/checkpatch.pl | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/scripts/checkpatch.pl b/scripts/checkpatch.pl
index eac40f0abd56..2be07ed4d70c 100755
--- a/scripts/checkpatch.pl
+++ b/scripts/checkpatch.pl
@@ -4199,6 +4199,17 @@ sub process {
 			     "ENOSYS means 'invalid syscall nr' and nothing else\n" . $herecurr);
 		}
 
+# ENOTSUPP is not a standard error code and should be avoided in new patches.
+# Folks usually mean EOPNOTSUPP (also called ENOTSUP), when they type ENOTSUPP.
+# Similarly to ENOSYS warning a small number of false positives is expected.
+		if (!$file && $line =~ /\bENOTSUPP\b/) {
+			if (WARN("ENOTSUPP",
+				 "ENOTSUPP is not a SUSV4 error code, prefer EOPNOTSUPP\n" . $herecurr) &&
+			    $fix) {
+				$fixed[$fixlinenr] =~ s/\bENOTSUPP\b/EOPNOTSUPP/;
+			}
+		}
+
 # function brace can't be on same line, except for #defines of do while,
 # or if closed on same line
 		if ($perl_version_ok &&
-- 
cgit v1.2.3-59-g8ed1b


From 103dc3dab295159cb2c5374a0dc499cd3962d49f Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Thu, 7 May 2020 10:19:21 -0500
Subject: carl9170: Replace zero-length array with flexible-array

The current codebase makes use of the zero-length array language
extension to the C90 standard, but the preferred mechanism to declare
variable-length types such as these ones is a flexible array member[1][2],
introduced in C99:

struct foo {
        int stuff;
        struct boo array[];
};

By making use of the mechanism above, we will get a compiler warning
in case the flexible array does not occur last in the structure, which
will help us prevent some kind of undefined behavior bugs from being
inadvertently introduced[3] to the codebase from now on.

Also, notice that, dynamic memory allocations won't be affected by
this change:

"Flexible array members have incomplete type, and so the sizeof operator
may not be applied. As a quirk of the original implementation of
zero-length arrays, sizeof evaluates to zero."[1]

sizeof(flexible-array-member) triggers a warning because flexible array
members have incomplete type[1]. There are some instances of code in
which the sizeof operator is being incorrectly/erroneously applied to
zero-length arrays and the result is zero. Such instances may be hiding
some bugs. So, this work (flexible-array member conversions) will also
help to get completely rid of those sorts of issues.

This issue was found with the help of Coccinelle.

[1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html
[2] https://github.com/KSPP/linux/issues/21
[3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour")

Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200507151921.GA5083@embeddedor
---
 drivers/net/wireless/ath/carl9170/fwcmd.h | 2 +-
 drivers/net/wireless/ath/carl9170/hw.h    | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/ath/carl9170/fwcmd.h b/drivers/net/wireless/ath/carl9170/fwcmd.h
index ea1d80f9a50e..56999a3b9d3b 100644
--- a/drivers/net/wireless/ath/carl9170/fwcmd.h
+++ b/drivers/net/wireless/ath/carl9170/fwcmd.h
@@ -127,7 +127,7 @@ struct carl9170_write_reg {
 struct carl9170_write_reg_byte {
 	__le32	addr;
 	__le32  count;
-	u8	val[0];
+	u8	val[];
 } __packed;
 
 #define	CARL9170FW_PHY_HT_ENABLE		0x4
diff --git a/drivers/net/wireless/ath/carl9170/hw.h b/drivers/net/wireless/ath/carl9170/hw.h
index 08e0ae9c5836..555ad4975970 100644
--- a/drivers/net/wireless/ath/carl9170/hw.h
+++ b/drivers/net/wireless/ath/carl9170/hw.h
@@ -851,7 +851,7 @@ struct ar9170_stream {
 	__le16 length;
 	__le16 tag;
 
-	u8 payload[0];
+	u8 payload[];
 } __packed __aligned(4);
 #define AR9170_STREAM_LEN				4
 
-- 
cgit v1.2.3-59-g8ed1b


From 9f12bebd512c560e9e667a6ac3cf0e04d9d7f43e Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Mon, 11 May 2020 15:53:35 +0300
Subject: ath10k: fix gcc-10 zero-length-bounds warnings

gcc-10 started warning about out-of-bounds access for zero-length
arrays:

In file included from drivers/net/wireless/ath/ath10k/core.h:18,
                 from drivers/net/wireless/ath/ath10k/htt_rx.c:8:
drivers/net/wireless/ath/ath10k/htt_rx.c: In function 'ath10k_htt_rx_tx_fetch_ind':
drivers/net/wireless/ath/ath10k/htt.h:1683:17: warning: array subscript 65535 is outside the bounds of an interior zero-length array 'struct htt_tx_fetch_record[0]' [-Wzero-length-bounds]
 1683 |  return (void *)&ind->records[le16_to_cpu(ind->num_records)];
      |                 ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
drivers/net/wireless/ath/ath10k/htt.h:1676:29: note: while referencing 'records'
 1676 |  struct htt_tx_fetch_record records[0];
      |                             ^~~~~~~

Make records[] a flexible array member to allow this, moving it behind
the other zero-length member that is not accessed in a way that gcc
warns about.

Fixes: 22e6b3bc5d96 ("ath10k: add new htt definitions")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200509120707.188595-1-arnd@arndb.de
---
 drivers/net/wireless/ath/ath10k/htt.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/htt.h b/drivers/net/wireless/ath/ath10k/htt.h
index e504be63173a..cad59494f175 100644
--- a/drivers/net/wireless/ath/ath10k/htt.h
+++ b/drivers/net/wireless/ath/ath10k/htt.h
@@ -1673,8 +1673,8 @@ struct htt_tx_fetch_ind {
 	__le32 token;
 	__le16 num_resp_ids;
 	__le16 num_records;
-	struct htt_tx_fetch_record records[0];
-	__le32 resp_ids[]; /* ath10k_htt_get_tx_fetch_ind_resp_ids() */
+	__le32 resp_ids[0]; /* ath10k_htt_get_tx_fetch_ind_resp_ids() */
+	struct htt_tx_fetch_record records[];
 } __packed;
 
 static inline void *
-- 
cgit v1.2.3-59-g8ed1b


From 32221df6765b3773ff1af37c77f8531ebc48f246 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Sat, 9 May 2020 14:06:33 +0200
Subject: ath10k: fix ath10k_pci struct layout

gcc-10 correctly points out a bug with a zero-length array in
struct ath10k_pci:

drivers/net/wireless/ath/ath10k/ahb.c: In function 'ath10k_ahb_remove':
drivers/net/wireless/ath/ath10k/ahb.c:30:9: error: array subscript 0 is outside the bounds of an interior zero-length array 'struct ath10k_ahb[0]' [-Werror=zero-length-bounds]
   30 |  return &((struct ath10k_pci *)ar->drv_priv)->ahb[0];
      |         ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
In file included from drivers/net/wireless/ath/ath10k/ahb.c:13:
drivers/net/wireless/ath/ath10k/pci.h:185:20: note: while referencing 'ahb'
  185 |  struct ath10k_ahb ahb[0];
      |                    ^~~

The last addition to the struct ignored the comments and added
new members behind the array that must remain last.

Change it to a flexible-array member and move it last again to
make it work correctly, prevent the same thing from happening
again (all compilers warn about flexible-array members in the
middle of a struct) and get it to build without warnings.

Fixes: 521fc37be3d8 ("ath10k: Avoid override CE5 configuration for QCA99X0 chipsets")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200509120707.188595-2-arnd@arndb.de
---
 drivers/net/wireless/ath/ath10k/pci.h | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/ath/ath10k/pci.h b/drivers/net/wireless/ath/ath10k/pci.h
index e3cbd259a2dc..862d0901c5b8 100644
--- a/drivers/net/wireless/ath/ath10k/pci.h
+++ b/drivers/net/wireless/ath/ath10k/pci.h
@@ -178,15 +178,16 @@ struct ath10k_pci {
 	 */
 	u32 (*targ_cpu_to_ce_addr)(struct ath10k *ar, u32 addr);
 
+	struct ce_attr *attr;
+	struct ce_pipe_config *pipe_config;
+	struct ce_service_to_pipe *serv_to_pipe;
+
 	/* Keep this entry in the last, memory for struct ath10k_ahb is
 	 * allocated (ahb support enabled case) in the continuation of
 	 * this struct.
 	 */
-	struct ath10k_ahb ahb[0];
+	struct ath10k_ahb ahb[];
 
-	struct ce_attr *attr;
-	struct ce_pipe_config *pipe_config;
-	struct ce_service_to_pipe *serv_to_pipe;
 };
 
 static inline struct ath10k_pci *ath10k_pci_priv(struct ath10k *ar)
-- 
cgit v1.2.3-59-g8ed1b


From 52b776fa592178c3dc341a93fec7bc760b38cc15 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 7 May 2020 17:43:18 +0100
Subject: ath11k: remove redundant initialization of pointer info

Pointer info is being assigned twice, once at the start of the function
and secondly when it is just about to be accessed. Remove the redundant
initialization and keep the original assignment to info that is close
to the memcpy that uses it.

Addresses-Coverity: ("Unused value")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200507164318.56570-1-colin.king@canonical.com
---
 drivers/net/wireless/ath/ath11k/mac.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c
index 5ffe55801ca4..4e1185a4cd01 100644
--- a/drivers/net/wireless/ath/ath11k/mac.c
+++ b/drivers/net/wireless/ath/ath11k/mac.c
@@ -3693,7 +3693,7 @@ static int __ath11k_set_antenna(struct ath11k *ar, u32 tx_ant, u32 rx_ant)
 int ath11k_mac_tx_mgmt_pending_free(int buf_id, void *skb, void *ctx)
 {
 	struct sk_buff *msdu = skb;
-	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(msdu);
+	struct ieee80211_tx_info *info;
 	struct ath11k *ar = ctx;
 	struct ath11k_base *ab = ar->ab;
 
-- 
cgit v1.2.3-59-g8ed1b


From c8ffcd122760612c1fac45c44d008a3fe8b2cbb4 Mon Sep 17 00:00:00 2001
From: Wei Yongjun <weiyongjun1@huawei.com>
Date: Fri, 8 May 2020 07:53:23 +0000
Subject: ath11k: convert to devm_platform_get_and_ioremap_resource

Use the helper function that wraps the calls to platform_get_resource()
and devm_ioremap_resource() together.

Signed-off-by: Wei Yongjun <weiyongjun1@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200508075323.81128-1-weiyongjun1@huawei.com
---
 drivers/net/wireless/ath/ath11k/ahb.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/ahb.c b/drivers/net/wireless/ath/ath11k/ahb.c
index ef1038aa5692..30092841ac46 100644
--- a/drivers/net/wireless/ath/ath11k/ahb.c
+++ b/drivers/net/wireless/ath/ath11k/ahb.c
@@ -914,13 +914,7 @@ static int ath11k_ahb_probe(struct platform_device *pdev)
 		return -EINVAL;
 	}
 
-	mem_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (!mem_res) {
-		dev_err(&pdev->dev, "failed to get IO memory resource\n");
-		return -ENXIO;
-	}
-
-	mem = devm_ioremap_resource(&pdev->dev, mem_res);
+	mem = devm_platform_get_and_ioremap_resource(pdev, 0, &mem_res);
 	if (IS_ERR(mem)) {
 		dev_err(&pdev->dev, "ioremap error\n");
 		return PTR_ERR(mem);
-- 
cgit v1.2.3-59-g8ed1b


From 104f3d95d8d633ceebcef811c5c7c3ba56bedc7f Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Thu, 7 May 2020 10:11:20 -0500
Subject: wil6210: Replace zero-length array with flexible-array

The current codebase makes use of the zero-length array language
extension to the C90 standard, but the preferred mechanism to declare
variable-length types such as these ones is a flexible array member[1][2],
introduced in C99:

struct foo {
        int stuff;
        struct boo array[];
};

By making use of the mechanism above, we will get a compiler warning
in case the flexible array does not occur last in the structure, which
will help us prevent some kind of undefined behavior bugs from being
inadvertently introduced[3] to the codebase from now on.

Also, notice that, dynamic memory allocations won't be affected by
this change:

"Flexible array members have incomplete type, and so the sizeof operator
may not be applied. As a quirk of the original implementation of
zero-length arrays, sizeof evaluates to zero."[1]

sizeof(flexible-array-member) triggers a warning because flexible array
members have incomplete type[1]. There are some instances of code in
which the sizeof operator is being incorrectly/erroneously applied to
zero-length arrays and the result is zero. Such instances may be hiding
some bugs. So, this work (flexible-array member conversions) will also
help to get completely rid of those sorts of issues.

This issue was found with the help of Coccinelle.

[1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html
[2] https://github.com/KSPP/linux/issues/21
[3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour")

Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200507151120.GA4469@embeddedor
---
 drivers/net/wireless/ath/wil6210/fw.h  | 16 +++++-----
 drivers/net/wireless/ath/wil6210/wmi.c |  2 +-
 drivers/net/wireless/ath/wil6210/wmi.h | 56 +++++++++++++++++-----------------
 3 files changed, 37 insertions(+), 37 deletions(-)

diff --git a/drivers/net/wireless/ath/wil6210/fw.h b/drivers/net/wireless/ath/wil6210/fw.h
index 540fa1607794..440614d61156 100644
--- a/drivers/net/wireless/ath/wil6210/fw.h
+++ b/drivers/net/wireless/ath/wil6210/fw.h
@@ -33,7 +33,7 @@ struct wil_fw_record_head {
  */
 struct wil_fw_record_data { /* type == wil_fw_type_data */
 	__le32 addr;
-	__le32 data[0]; /* [data_size], see above */
+	__le32 data[]; /* [data_size], see above */
 } __packed;
 
 /* fill with constant @value, @size bytes starting from @addr */
@@ -61,7 +61,7 @@ struct wil_fw_record_capabilities { /* type == wil_fw_type_comment */
 	/* identifies capabilities record */
 	struct wil_fw_record_comment_hdr hdr;
 	/* capabilities (variable size), see enum wmi_fw_capability */
-	u8 capabilities[0];
+	u8 capabilities[];
 } __packed;
 
 /* FW VIF concurrency encoded inside a comment record
@@ -80,7 +80,7 @@ struct wil_fw_concurrency_combo {
 	u8 n_diff_channels; /* total number of different channels allowed */
 	u8 same_bi; /* for APs, 1 if all APs must have same BI */
 	/* keep last - concurrency limits, variable size by n_limits */
-	struct wil_fw_concurrency_limit limits[0];
+	struct wil_fw_concurrency_limit limits[];
 } __packed;
 
 struct wil_fw_record_concurrency { /* type == wil_fw_type_comment */
@@ -93,7 +93,7 @@ struct wil_fw_record_concurrency { /* type == wil_fw_type_comment */
 	/* number of concurrency combinations that follow */
 	__le16 n_combos;
 	/* keep last - combinations, variable size by n_combos */
-	struct wil_fw_concurrency_combo combos[0];
+	struct wil_fw_concurrency_combo combos[];
 } __packed;
 
 /* brd file info encoded inside a comment record */
@@ -108,7 +108,7 @@ struct wil_fw_record_brd_file { /* type == wil_fw_type_comment */
 	/* identifies brd file record */
 	struct wil_fw_record_comment_hdr hdr;
 	__le32 version;
-	struct brd_info brd_info[0];
+	struct brd_info brd_info[];
 } __packed;
 
 /* perform action
@@ -116,7 +116,7 @@ struct wil_fw_record_brd_file { /* type == wil_fw_type_comment */
  */
 struct wil_fw_record_action { /* type == wil_fw_type_action */
 	__le32 action; /* action to perform: reset, wait for fw ready etc. */
-	__le32 data[0]; /* action specific, [data_size], see above */
+	__le32 data[]; /* action specific, [data_size], see above */
 } __packed;
 
 /* data block for struct wil_fw_record_direct_write */
@@ -179,7 +179,7 @@ struct wil_fw_record_gateway_data { /* type == wil_fw_type_gateway_data */
 #define WIL_FW_GW_CTL_BUSY	BIT(29) /* gateway busy performing operation */
 #define WIL_FW_GW_CTL_RUN	BIT(30) /* start gateway operation */
 	__le32 command;
-	struct wil_fw_data_gw data[0]; /* total size [data_size], see above */
+	struct wil_fw_data_gw data[]; /* total size [data_size], see above */
 } __packed;
 
 /* 4-dword gateway */
@@ -201,7 +201,7 @@ struct wil_fw_record_gateway_data4 { /* type == wil_fw_type_gateway_data4 */
 	__le32 gateway_cmd_addr;
 	__le32 gateway_ctrl_address; /* same logic as for 1-dword gw */
 	__le32 command;
-	struct wil_fw_data_gw4 data[0]; /* total size [data_size], see above */
+	struct wil_fw_data_gw4 data[]; /* total size [data_size], see above */
 } __packed;
 
 #endif /* __WIL_FW_H__ */
diff --git a/drivers/net/wireless/ath/wil6210/wmi.c b/drivers/net/wireless/ath/wil6210/wmi.c
index 23e1ed6a9d6d..c7136ce567ee 100644
--- a/drivers/net/wireless/ath/wil6210/wmi.c
+++ b/drivers/net/wireless/ath/wil6210/wmi.c
@@ -222,7 +222,7 @@ struct auth_no_hdr {
 	__le16 auth_transaction;
 	__le16 status_code;
 	/* possibly followed by Challenge text */
-	u8 variable[0];
+	u8 variable[];
 } __packed;
 
 u8 led_polarity = LED_POLARITY_LOW_ACTIVE;
diff --git a/drivers/net/wireless/ath/wil6210/wmi.h b/drivers/net/wireless/ath/wil6210/wmi.h
index 5bba45c1de48..9affa4525609 100644
--- a/drivers/net/wireless/ath/wil6210/wmi.h
+++ b/drivers/net/wireless/ath/wil6210/wmi.h
@@ -530,7 +530,7 @@ struct wmi_update_ft_ies_cmd {
 	/* Length of the FT IEs */
 	__le16 ie_len;
 	u8 reserved[2];
-	u8 ie_info[0];
+	u8 ie_info[];
 } __packed;
 
 /* WMI_SET_PROBED_SSID_CMDID */
@@ -575,7 +575,7 @@ struct wmi_set_appie_cmd {
 	u8 reserved;
 	/* Length of the IE to be added to MGMT frame */
 	__le16 ie_len;
-	u8 ie_info[0];
+	u8 ie_info[];
 } __packed;
 
 /* WMI_PXMT_RANGE_CFG_CMDID */
@@ -850,7 +850,7 @@ struct wmi_pcp_start_cmd {
 struct wmi_sw_tx_req_cmd {
 	u8 dst_mac[WMI_MAC_LEN];
 	__le16 len;
-	u8 payload[0];
+	u8 payload[];
 } __packed;
 
 /* WMI_SW_TX_REQ_EXT_CMDID */
@@ -861,7 +861,7 @@ struct wmi_sw_tx_req_ext_cmd {
 	/* Channel to use, 0xFF for currently active channel */
 	u8 channel;
 	u8 reserved[5];
-	u8 payload[0];
+	u8 payload[];
 } __packed;
 
 /* WMI_VRING_SWITCH_TIMING_CONFIG_CMDID */
@@ -1423,7 +1423,7 @@ struct wmi_rf_xpm_write_cmd {
 	u8 verify;
 	u8 reserved1[3];
 	/* actual size=num_bytes */
-	u8 data_bytes[0];
+	u8 data_bytes[];
 } __packed;
 
 /* Possible modes for temperature measurement */
@@ -1572,7 +1572,7 @@ struct wmi_tof_session_start_cmd {
 	u8 aoa_type;
 	__le16 num_of_dest;
 	u8 reserved[4];
-	struct wmi_ftm_dest_info ftm_dest_info[0];
+	struct wmi_ftm_dest_info ftm_dest_info[];
 } __packed;
 
 /* WMI_TOF_CFG_RESPONDER_CMDID */
@@ -1766,7 +1766,7 @@ struct wmi_internal_fw_ioctl_cmd {
 	/* payload max size is WMI_MAX_IOCTL_PAYLOAD_SIZE
 	 * Must be the last member of the struct
 	 */
-	__le32 payload[0];
+	__le32 payload[];
 } __packed;
 
 /* WMI_INTERNAL_FW_IOCTL_EVENTID */
@@ -1778,7 +1778,7 @@ struct wmi_internal_fw_ioctl_event {
 	/* payload max size is WMI_MAX_IOCTL_REPLY_PAYLOAD_SIZE
 	 * Must be the last member of the struct
 	 */
-	__le32 payload[0];
+	__le32 payload[];
 } __packed;
 
 /* WMI_INTERNAL_FW_EVENT_EVENTID */
@@ -1788,7 +1788,7 @@ struct wmi_internal_fw_event_event {
 	/* payload max size is WMI_MAX_INTERNAL_EVENT_PAYLOAD_SIZE
 	 * Must be the last member of the struct
 	 */
-	__le32 payload[0];
+	__le32 payload[];
 } __packed;
 
 /* WMI_SET_VRING_PRIORITY_WEIGHT_CMDID */
@@ -1818,7 +1818,7 @@ struct wmi_set_vring_priority_cmd {
 	 */
 	u8 num_of_vrings;
 	u8 reserved[3];
-	struct wmi_vring_priority vring_priority[0];
+	struct wmi_vring_priority vring_priority[];
 } __packed;
 
 /* WMI_BF_CONTROL_CMDID - deprecated */
@@ -1910,7 +1910,7 @@ struct wmi_bf_control_ex_cmd {
 	u8 each_mcs_cfg_size;
 	u8 reserved1;
 	/* Configuration for each MCS */
-	struct wmi_bf_control_ex_mcs each_mcs_cfg[0];
+	struct wmi_bf_control_ex_mcs each_mcs_cfg[];
 } __packed;
 
 /* WMI_LINK_STATS_CMD */
@@ -2192,7 +2192,7 @@ struct wmi_fw_ver_event {
 	/* FW capabilities info
 	 * Must be the last member of the struct
 	 */
-	__le32 fw_capabilities[0];
+	__le32 fw_capabilities[];
 } __packed;
 
 /* WMI_GET_RF_STATUS_EVENTID */
@@ -2270,7 +2270,7 @@ struct wmi_mac_addr_resp_event {
 struct wmi_eapol_rx_event {
 	u8 src_mac[WMI_MAC_LEN];
 	__le16 eapol_len;
-	u8 eapol[0];
+	u8 eapol[];
 } __packed;
 
 /* WMI_READY_EVENTID */
@@ -2343,7 +2343,7 @@ struct wmi_connect_event {
 	u8 aid;
 	u8 reserved2[2];
 	/* not in use */
-	u8 assoc_info[0];
+	u8 assoc_info[];
 } __packed;
 
 /* disconnect_reason */
@@ -2376,7 +2376,7 @@ struct wmi_disconnect_event {
 	/* last assoc req may passed to host - not in used */
 	u8 assoc_resp_len;
 	/* last assoc req may passed to host - not in used */
-	u8 assoc_info[0];
+	u8 assoc_info[];
 } __packed;
 
 /* WMI_SCAN_COMPLETE_EVENTID */
@@ -2400,7 +2400,7 @@ struct wmi_ft_auth_status_event {
 	u8 reserved[3];
 	u8 mac_addr[WMI_MAC_LEN];
 	__le16 ie_len;
-	u8 ie_info[0];
+	u8 ie_info[];
 } __packed;
 
 /* WMI_FT_REASSOC_STATUS_EVENTID */
@@ -2418,7 +2418,7 @@ struct wmi_ft_reassoc_status_event {
 	__le16 reassoc_req_ie_len;
 	__le16 reassoc_resp_ie_len;
 	u8 reserved[4];
-	u8 ie_info[0];
+	u8 ie_info[];
 } __packed;
 
 /* wmi_rx_mgmt_info */
@@ -2461,7 +2461,7 @@ struct wmi_stop_sched_scan_event {
 
 struct wmi_sched_scan_result_event {
 	struct wmi_rx_mgmt_info info;
-	u8 payload[0];
+	u8 payload[];
 } __packed;
 
 /* WMI_ACS_PASSIVE_SCAN_COMPLETE_EVENT */
@@ -2492,7 +2492,7 @@ struct wmi_acs_passive_scan_complete_event {
 	__le16 filled;
 	u8 num_scanned_channels;
 	u8 reserved;
-	struct scan_acs_info scan_info_list[0];
+	struct scan_acs_info scan_info_list[];
 } __packed;
 
 /* WMI_BA_STATUS_EVENTID */
@@ -2751,7 +2751,7 @@ struct wmi_rf_xpm_read_result_event {
 	u8 status;
 	u8 reserved[3];
 	/* requested num_bytes of data */
-	u8 data_bytes[0];
+	u8 data_bytes[];
 } __packed;
 
 /* EVENT: WMI_RF_XPM_WRITE_RESULT_EVENTID */
@@ -2769,7 +2769,7 @@ struct wmi_tx_mgmt_packet_event {
 /* WMI_RX_MGMT_PACKET_EVENTID */
 struct wmi_rx_mgmt_packet_event {
 	struct wmi_rx_mgmt_info info;
-	u8 payload[0];
+	u8 payload[];
 } __packed;
 
 /* WMI_ECHO_RSP_EVENTID */
@@ -2969,7 +2969,7 @@ struct wmi_rs_cfg_ex_cmd {
 	u8 each_mcs_cfg_size;
 	u8 reserved[3];
 	/* Configuration for each MCS */
-	struct wmi_rs_cfg_ex_mcs each_mcs_cfg[0];
+	struct wmi_rs_cfg_ex_mcs each_mcs_cfg[];
 } __packed;
 
 /* WMI_RS_CFG_EX_EVENTID */
@@ -3178,7 +3178,7 @@ struct wmi_get_detailed_rs_res_ex_event {
 	u8 each_mcs_results_size;
 	u8 reserved1[3];
 	/* Results for each MCS */
-	struct wmi_rs_results_ex_mcs each_mcs_results[0];
+	struct wmi_rs_results_ex_mcs each_mcs_results[];
 } __packed;
 
 /* BRP antenna limit mode */
@@ -3320,7 +3320,7 @@ struct wmi_set_link_monitor_cmd {
 	u8 rssi_hyst;
 	u8 reserved[12];
 	u8 rssi_thresholds_list_size;
-	s8 rssi_thresholds_list[0];
+	s8 rssi_thresholds_list[];
 } __packed;
 
 /* wmi_link_monitor_event_type */
@@ -3637,7 +3637,7 @@ struct wmi_tof_ftm_per_dest_res_event {
 	/* Measurments are from RFs, defined by the mask */
 	__le32 meas_rf_mask;
 	u8 reserved0[3];
-	struct wmi_responder_ftm_res responder_ftm_res[0];
+	struct wmi_responder_ftm_res responder_ftm_res[];
 } __packed;
 
 /* WMI_TOF_CFG_RESPONDER_EVENTID */
@@ -3669,7 +3669,7 @@ struct wmi_tof_channel_info_event {
 	/* data report length */
 	u8 len;
 	/* data report payload */
-	u8 report[0];
+	u8 report[];
 } __packed;
 
 /* WMI_TOF_SET_TX_RX_OFFSET_EVENTID */
@@ -4085,7 +4085,7 @@ struct wmi_link_stats_event {
 	u8 has_next;
 	u8 reserved[5];
 	/* a stream of wmi_link_stats_record_s */
-	u8 payload[0];
+	u8 payload[];
 } __packed;
 
 /* WMI_LINK_STATS_EVENT */
@@ -4094,7 +4094,7 @@ struct wmi_link_stats_record {
 	u8 record_type_id;
 	u8 reserved;
 	__le16 record_size;
-	u8 record[0];
+	u8 record[];
 } __packed;
 
 /* WMI_LINK_STATS_TYPE_BASIC */
-- 
cgit v1.2.3-59-g8ed1b


From 92072e5fb099136fc3ea8d7a44352c1d4230b8df Mon Sep 17 00:00:00 2001
From: Saravanan Shanmugham <saravanan.shanmugham@cypress.com>
Date: Tue, 5 May 2020 01:51:26 -0500
Subject: brcmfmac: map 802.1d priority to precedence level based on AP WMM
 params

In WLAN, priority among various access categories of traffic is
always set by the AP using WMM parameters and this may not always
follow the standard 802.1d priority.

In this change, priority is adjusted based on the AP WMM params
received as part of the Assoc Response and the same is later used
to map the priority of all incoming traffic.

In a specific scenario where EDCA parameters are configured to be same
for all ACs, use the default FW priority definition to avoid queuing
packets of all ACs to the same priority queue.

This change fixes the following 802.11 certification tests:
* 11n - 5.2.31 ACM Bit Conformance test
* 11n - 5.2.32 AC Parameter Modification test
* 11ac - 5.2.33 TXOP Limit test

Signed-off-by: Saravanan Shanmugham <saravanan.shanmugham@cypress.com>
Signed-off-by: Justin Li <justin.li@cypress.com>
Signed-off-by: Madhan Mohan R <madhanmohan.r@cypress.com>
Signed-off-by: Chi-hsien Lin <chi-hsien.lin@cypress.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1588661487-21884-2-git-send-email-chi-hsien.lin@cypress.com
---
 .../broadcom/brcm80211/brcmfmac/cfg80211.c         | 152 +++++++++++++++++++++
 .../broadcom/brcm80211/brcmfmac/cfg80211.h         |  24 ++++
 .../wireless/broadcom/brcm80211/brcmfmac/common.h  |   4 +
 .../broadcom/brcm80211/brcmfmac/fwsignal.c         |  26 +---
 .../broadcom/brcm80211/brcmfmac/fwsignal.h         |  23 ++++
 .../wireless/broadcom/brcm80211/brcmfmac/sdio.c    |  17 +--
 6 files changed, 213 insertions(+), 33 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
index f2f84af923a9..537e5ae35ca0 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
@@ -23,6 +23,7 @@
 #include "p2p.h"
 #include "btcoex.h"
 #include "pno.h"
+#include "fwsignal.h"
 #include "cfg80211.h"
 #include "feature.h"
 #include "fwil.h"
@@ -5586,12 +5587,151 @@ static void brcmf_clear_assoc_ies(struct brcmf_cfg80211_info *cfg)
 	conn_info->resp_ie_len = 0;
 }
 
+u8 brcmf_map_prio_to_prec(void *config, u8 prio)
+{
+	struct brcmf_cfg80211_info *cfg = (struct brcmf_cfg80211_info *)config;
+
+	if (!cfg)
+		return (prio == PRIO_8021D_NONE || prio == PRIO_8021D_BE) ?
+		       (prio ^ 2) : prio;
+
+	/* For those AC(s) with ACM flag set to 1, convert its 4-level priority
+	 * to an 8-level precedence which is the same as BE's
+	 */
+	if (prio > PRIO_8021D_EE &&
+	    cfg->ac_priority[prio] == cfg->ac_priority[PRIO_8021D_BE])
+		return cfg->ac_priority[prio] * 2;
+
+	/* Conversion of 4-level priority to 8-level precedence */
+	if (prio == PRIO_8021D_BE || prio == PRIO_8021D_BK ||
+	    prio == PRIO_8021D_CL || prio == PRIO_8021D_VO)
+		return cfg->ac_priority[prio] * 2;
+	else
+		return cfg->ac_priority[prio] * 2 + 1;
+}
+
+u8 brcmf_map_prio_to_aci(void *config, u8 prio)
+{
+	/* Prio here refers to the 802.1d priority in range of 0 to 7.
+	 * ACI here refers to the WLAN AC Index in range of 0 to 3.
+	 * This function will return ACI corresponding to input prio.
+	 */
+	struct brcmf_cfg80211_info *cfg = (struct brcmf_cfg80211_info *)config;
+
+	if (cfg)
+		return cfg->ac_priority[prio];
+
+	return prio;
+}
+
+static void brcmf_init_wmm_prio(u8 *priority)
+{
+	/* Initialize AC priority array to default
+	 * 802.1d priority as per following table:
+	 * 802.1d prio 0,3 maps to BE
+	 * 802.1d prio 1,2 maps to BK
+	 * 802.1d prio 4,5 maps to VI
+	 * 802.1d prio 6,7 maps to VO
+	 */
+	priority[0] = BRCMF_FWS_FIFO_AC_BE;
+	priority[3] = BRCMF_FWS_FIFO_AC_BE;
+	priority[1] = BRCMF_FWS_FIFO_AC_BK;
+	priority[2] = BRCMF_FWS_FIFO_AC_BK;
+	priority[4] = BRCMF_FWS_FIFO_AC_VI;
+	priority[5] = BRCMF_FWS_FIFO_AC_VI;
+	priority[6] = BRCMF_FWS_FIFO_AC_VO;
+	priority[7] = BRCMF_FWS_FIFO_AC_VO;
+}
+
+static void brcmf_wifi_prioritize_acparams(const
+	struct brcmf_cfg80211_edcf_acparam *acp, u8 *priority)
+{
+	u8 aci;
+	u8 aifsn;
+	u8 ecwmin;
+	u8 ecwmax;
+	u8 acm;
+	u8 ranking_basis[EDCF_AC_COUNT];
+	u8 aci_prio[EDCF_AC_COUNT]; /* AC_BE, AC_BK, AC_VI, AC_VO */
+	u8 index;
+
+	for (aci = 0; aci < EDCF_AC_COUNT; aci++, acp++) {
+		aifsn  = acp->ACI & EDCF_AIFSN_MASK;
+		acm = (acp->ACI & EDCF_ACM_MASK) ? 1 : 0;
+		ecwmin = acp->ECW & EDCF_ECWMIN_MASK;
+		ecwmax = (acp->ECW & EDCF_ECWMAX_MASK) >> EDCF_ECWMAX_SHIFT;
+		brcmf_dbg(CONN, "ACI %d aifsn %d acm %d ecwmin %d ecwmax %d\n",
+			  aci, aifsn, acm, ecwmin, ecwmax);
+		/* Default AC_VO will be the lowest ranking value */
+		ranking_basis[aci] = aifsn + ecwmin + ecwmax;
+		/* Initialise priority starting at 0 (AC_BE) */
+		aci_prio[aci] = 0;
+
+		/* If ACM is set, STA can't use this AC as per 802.11.
+		 * Change the ranking to BE
+		 */
+		if (aci != AC_BE && aci != AC_BK && acm == 1)
+			ranking_basis[aci] = ranking_basis[AC_BE];
+	}
+
+	/* Ranking method which works for AC priority
+	 * swapping when values for cwmin, cwmax and aifsn are varied
+	 * Compare each aci_prio against each other aci_prio
+	 */
+	for (aci = 0; aci < EDCF_AC_COUNT; aci++) {
+		for (index = 0; index < EDCF_AC_COUNT; index++) {
+			if (index != aci) {
+				/* Smaller ranking value has higher priority,
+				 * so increment priority for each ACI which has
+				 * a higher ranking value
+				 */
+				if (ranking_basis[aci] < ranking_basis[index])
+					aci_prio[aci]++;
+			}
+		}
+	}
+
+	/* By now, aci_prio[] will be in range of 0 to 3.
+	 * Use ACI prio to get the new priority value for
+	 * each 802.1d traffic type, in this range.
+	 */
+	if (!(aci_prio[AC_BE] == aci_prio[AC_BK] &&
+	      aci_prio[AC_BK] == aci_prio[AC_VI] &&
+	      aci_prio[AC_VI] == aci_prio[AC_VO])) {
+		/* 802.1d 0,3 maps to BE */
+		priority[0] = aci_prio[AC_BE];
+		priority[3] = aci_prio[AC_BE];
+
+		/* 802.1d 1,2 maps to BK */
+		priority[1] = aci_prio[AC_BK];
+		priority[2] = aci_prio[AC_BK];
+
+		/* 802.1d 4,5 maps to VO */
+		priority[4] = aci_prio[AC_VI];
+		priority[5] = aci_prio[AC_VI];
+
+		/* 802.1d 6,7 maps to VO */
+		priority[6] = aci_prio[AC_VO];
+		priority[7] = aci_prio[AC_VO];
+	} else {
+		/* Initialize to default priority */
+		brcmf_init_wmm_prio(priority);
+	}
+
+	brcmf_dbg(CONN, "Adj prio BE 0->%d, BK 1->%d, BK 2->%d, BE 3->%d\n",
+		  priority[0], priority[1], priority[2], priority[3]);
+
+	brcmf_dbg(CONN, "Adj prio VI 4->%d, VI 5->%d, VO 6->%d, VO 7->%d\n",
+		  priority[4], priority[5], priority[6], priority[7]);
+}
+
 static s32 brcmf_get_assoc_ies(struct brcmf_cfg80211_info *cfg,
 			       struct brcmf_if *ifp)
 {
 	struct brcmf_pub *drvr = cfg->pub;
 	struct brcmf_cfg80211_assoc_ielen_le *assoc_info;
 	struct brcmf_cfg80211_connect_info *conn_info = cfg_to_conn(cfg);
+	struct brcmf_cfg80211_edcf_acparam edcf_acparam_info[EDCF_AC_COUNT];
 	u32 req_len;
 	u32 resp_len;
 	s32 err = 0;
@@ -5640,6 +5780,17 @@ static s32 brcmf_get_assoc_ies(struct brcmf_cfg80211_info *cfg,
 			    GFP_KERNEL);
 		if (!conn_info->resp_ie)
 			conn_info->resp_ie_len = 0;
+
+		err = brcmf_fil_iovar_data_get(ifp, "wme_ac_sta",
+					       edcf_acparam_info,
+					       sizeof(edcf_acparam_info));
+		if (err) {
+			brcmf_err("could not get wme_ac_sta (%d)\n", err);
+			return err;
+		}
+
+		brcmf_wifi_prioritize_acparams(edcf_acparam_info,
+					       cfg->ac_priority);
 	} else {
 		conn_info->resp_ie_len = 0;
 		conn_info->resp_ie = NULL;
@@ -6056,6 +6207,7 @@ static s32 wl_init_priv(struct brcmf_cfg80211_info *cfg)
 	mutex_init(&cfg->usr_sync);
 	brcmf_init_escan(cfg);
 	brcmf_init_conf(cfg->conf);
+	brcmf_init_wmm_prio(cfg->ac_priority);
 	init_completion(&cfg->vif_disabled);
 	return err;
 }
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.h
index 3ca8c07d6370..333fdf394f95 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.h
@@ -23,6 +23,23 @@
 #define WL_ROAM_TRIGGER_LEVEL		-75
 #define WL_ROAM_DELTA			20
 
+/* WME Access Category Indices (ACIs) */
+#define AC_BE			0	/* Best Effort */
+#define AC_BK			1	/* Background */
+#define AC_VI			2	/* Video */
+#define AC_VO			3	/* Voice */
+#define EDCF_AC_COUNT		4
+#define MAX_8021D_PRIO		8
+
+#define EDCF_ACI_MASK			0x60
+#define EDCF_ACI_SHIFT			5
+#define EDCF_ACM_MASK                  0x10
+#define EDCF_ECWMIN_MASK		0x0f
+#define EDCF_ECWMAX_SHIFT		4
+#define EDCF_AIFSN_MASK			0x0f
+#define EDCF_AIFSN_MAX			15
+#define EDCF_ECWMAX_MASK		0xf0
+
 /* Keep BRCMF_ESCAN_BUF_SIZE below 64K (65536). Allocing over 64K can be
  * problematic on some systems and should be avoided.
  */
@@ -209,6 +226,12 @@ struct brcmf_cfg80211_assoc_ielen_le {
 	__le32 resp_len;
 };
 
+struct brcmf_cfg80211_edcf_acparam {
+	u8 ACI;
+	u8 ECW;
+	u16 TXOP;        /* stored in network order (ls octet first) */
+};
+
 /* dongle escan state */
 enum wl_escan_state {
 	WL_ESCAN_STATE_IDLE,
@@ -327,6 +350,7 @@ struct brcmf_cfg80211_info {
 	struct brcmf_assoclist_le assoclist;
 	struct brcmf_cfg80211_wowl wowl;
 	struct brcmf_pno_info *pno;
+	u8 ac_priority[MAX_8021D_PRIO];
 };
 
 /**
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.h
index 144cf4570bc3..8b5f49997c8b 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/common.h
@@ -72,4 +72,8 @@ static inline void
 brcmf_dmi_probe(struct brcmf_mp_device *settings, u32 chip, u32 chiprev) {}
 #endif
 
+u8 brcmf_map_prio_to_prec(void *cfg, u8 prio);
+
+u8 brcmf_map_prio_to_aci(void *cfg, u8 prio);
+
 #endif /* BRCMFMAC_COMMON_H */
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c
index 2b7837887c0b..09701262330d 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.c
@@ -311,28 +311,6 @@ struct brcmf_skbuff_cb {
 /* How long to defer borrowing in jiffies */
 #define BRCMF_FWS_BORROW_DEFER_PERIOD		(HZ / 10)
 
-/**
- * enum brcmf_fws_fifo - fifo indices used by dongle firmware.
- *
- * @BRCMF_FWS_FIFO_FIRST: first fifo, ie. background.
- * @BRCMF_FWS_FIFO_AC_BK: fifo for background traffic.
- * @BRCMF_FWS_FIFO_AC_BE: fifo for best-effort traffic.
- * @BRCMF_FWS_FIFO_AC_VI: fifo for video traffic.
- * @BRCMF_FWS_FIFO_AC_VO: fifo for voice traffic.
- * @BRCMF_FWS_FIFO_BCMC: fifo for broadcast/multicast (AP only).
- * @BRCMF_FWS_FIFO_ATIM: fifo for ATIM (AP only).
- * @BRCMF_FWS_FIFO_COUNT: number of fifos.
- */
-enum brcmf_fws_fifo {
-	BRCMF_FWS_FIFO_FIRST,
-	BRCMF_FWS_FIFO_AC_BK = BRCMF_FWS_FIFO_FIRST,
-	BRCMF_FWS_FIFO_AC_BE,
-	BRCMF_FWS_FIFO_AC_VI,
-	BRCMF_FWS_FIFO_AC_VO,
-	BRCMF_FWS_FIFO_BCMC,
-	BRCMF_FWS_FIFO_ATIM,
-	BRCMF_FWS_FIFO_COUNT
-};
 
 /**
  * enum brcmf_fws_txstatus - txstatus flag values.
@@ -2130,8 +2108,10 @@ int brcmf_fws_process_skb(struct brcmf_if *ifp, struct sk_buff *skb)
 	skcb->if_flags = 0;
 	skcb->state = BRCMF_FWS_SKBSTATE_NEW;
 	brcmf_skb_if_flags_set_field(skb, INDEX, ifp->ifidx);
+
+	/* mapping from 802.1d priority to firmware fifo index */
 	if (!multicast)
-		fifo = brcmf_fws_prio2fifo[skb->priority];
+		fifo = brcmf_map_prio_to_aci(drvr->config, skb->priority);
 
 	brcmf_fws_lock(fws);
 	if (fifo != BRCMF_FWS_FIFO_AC_BE && fifo < BRCMF_FWS_FIFO_BCMC)
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.h b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.h
index b486d578ec96..b16a9d1c0508 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.h
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/fwsignal.h
@@ -6,6 +6,29 @@
 #ifndef FWSIGNAL_H_
 #define FWSIGNAL_H_
 
+/**
+ * enum brcmf_fws_fifo - fifo indices used by dongle firmware.
+ *
+ * @BRCMF_FWS_FIFO_FIRST: first fifo, ie. background.
+ * @BRCMF_FWS_FIFO_AC_BK: fifo for background traffic.
+ * @BRCMF_FWS_FIFO_AC_BE: fifo for best-effort traffic.
+ * @BRCMF_FWS_FIFO_AC_VI: fifo for video traffic.
+ * @BRCMF_FWS_FIFO_AC_VO: fifo for voice traffic.
+ * @BRCMF_FWS_FIFO_BCMC: fifo for broadcast/multicast (AP only).
+ * @BRCMF_FWS_FIFO_ATIM: fifo for ATIM (AP only).
+ * @BRCMF_FWS_FIFO_COUNT: number of fifos.
+ */
+enum brcmf_fws_fifo {
+	BRCMF_FWS_FIFO_FIRST,
+	BRCMF_FWS_FIFO_AC_BK = BRCMF_FWS_FIFO_FIRST,
+	BRCMF_FWS_FIFO_AC_BE,
+	BRCMF_FWS_FIFO_AC_VI,
+	BRCMF_FWS_FIFO_AC_VO,
+	BRCMF_FWS_FIFO_BCMC,
+	BRCMF_FWS_FIFO_ATIM,
+	BRCMF_FWS_FIFO_COUNT
+};
+
 struct brcmf_fws_info *brcmf_fws_attach(struct brcmf_pub *drvr);
 void brcmf_fws_detach(struct brcmf_fws_info *fws);
 void brcmf_fws_debugfs_create(struct brcmf_pub *drvr);
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
index 3a08252f1a53..ce6f15284277 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
@@ -315,15 +315,6 @@ struct rte_console {
 #define MAX_KSO_ATTEMPTS (PMU_MAX_TRANSITION_DLY/KSO_WAIT_US)
 #define BRCMF_SDIO_MAX_ACCESS_ERRORS	5
 
-/*
- * Conversion of 802.1D priority to precedence level
- */
-static uint prio2prec(u32 prio)
-{
-	return (prio == PRIO_8021D_NONE || prio == PRIO_8021D_BE) ?
-	       (prio^2) : prio;
-}
-
 #ifdef DEBUG
 /* Device console log buffer state */
 struct brcmf_console {
@@ -2774,7 +2765,13 @@ static int brcmf_sdio_bus_txdata(struct device *dev, struct sk_buff *pkt)
 	skb_push(pkt, bus->tx_hdrlen);
 	/* precondition: IS_ALIGNED((unsigned long)(pkt->data), 2) */
 
-	prec = prio2prec((pkt->priority & PRIOMASK));
+	/* In WLAN, priority is always set by the AP using WMM parameters
+	 * and this need not always follow the standard 802.1d priority.
+	 * Based on AP WMM config, map from 802.1d priority to corresponding
+	 * precedence level.
+	 */
+	prec = brcmf_map_prio_to_prec(bus_if->drvr->config,
+				      (pkt->priority & PRIOMASK));
 
 	/* Check for existing queue, current flow-control,
 			 pending event, or pending clock */
-- 
cgit v1.2.3-59-g8ed1b


From f5da2a370f2f3885dff6d2d6815be75e60580784 Mon Sep 17 00:00:00 2001
From: Pramod Prakash <pramod.prakash@cypress.com>
Date: Tue, 5 May 2020 01:51:27 -0500
Subject: brcmfmac: fix 802.1d priority to ac mapping for pcie dongles

802.1d defines 0,3 for BE and 1,2 for BK. In pcie dongles, 0 & 3 are
mapped to 0 and 1,2 are mapped to 1. This change corrects this mapping,
so that BE & BK are given access precedence accordingly by pcie dongles.

Signed-off-by: Pramod Prakash <pramod.prakash@cypress.com>
Signed-off-by: Chi-hsien Lin <chi-hsien.lin@cypress.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1588661487-21884-3-git-send-email-chi-hsien.lin@cypress.com
---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/flowring.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/flowring.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/flowring.c
index 8e9d067bdfed..096f6b969dd8 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/flowring.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/flowring.c
@@ -26,10 +26,10 @@
 #define BRCMF_FLOWRING_HASH_STA(fifo, ifidx) (fifo + ifidx * 16)
 
 static const u8 brcmf_flowring_prio2fifo[] = {
-	1,
-	0,
 	0,
 	1,
+	1,
+	0,
 	2,
 	2,
 	3,
-- 
cgit v1.2.3-59-g8ed1b


From b46f1546a708588700eb5adad0e69e944a7a000d Mon Sep 17 00:00:00 2001
From: Jia-Shyr Chuang <joseph.chuang@cypress.com>
Date: Wed, 6 May 2020 08:03:19 -0500
Subject: brcmfmac: set security after reiniting interface

Host driver parses and sets security params into FW passed by
supplicant. This has to be done after reiniting interface in the
firmware.

Signed-off-by: Jia-Shyr Chuang <joseph.chuang@cypress.com>
Signed-off-by: Chi-Hsien Lin <chi-hsien.lin@cypress.com>
Signed-off-by: Wright Feng <wright.feng@cypress.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1588770201-54361-2-git-send-email-wright.feng@cypress.com
---
 .../broadcom/brcm80211/brcmfmac/cfg80211.c         | 90 ++++++++++++++--------
 1 file changed, 58 insertions(+), 32 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
index 537e5ae35ca0..181f4df79866 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
@@ -4613,6 +4613,48 @@ brcmf_config_ap_mgmt_ie(struct brcmf_cfg80211_vif *vif,
 	return err;
 }
 
+static s32
+brcmf_parse_configure_security(struct brcmf_if *ifp,
+			       struct cfg80211_ap_settings *settings,
+			       enum nl80211_iftype dev_role)
+{
+	const struct brcmf_tlv *rsn_ie;
+	const struct brcmf_vs_tlv *wpa_ie;
+	s32 err = 0;
+
+	/* find the RSN_IE */
+	rsn_ie = brcmf_parse_tlvs((u8 *)settings->beacon.tail,
+				  settings->beacon.tail_len, WLAN_EID_RSN);
+
+	/* find the WPA_IE */
+	wpa_ie = brcmf_find_wpaie((u8 *)settings->beacon.tail,
+				  settings->beacon.tail_len);
+
+	if (wpa_ie || rsn_ie) {
+		brcmf_dbg(TRACE, "WPA(2) IE is found\n");
+		if (wpa_ie) {
+			/* WPA IE */
+			err = brcmf_configure_wpaie(ifp, wpa_ie, false);
+			if (err < 0)
+				return err;
+		} else {
+			struct brcmf_vs_tlv *tmp_ie;
+
+			tmp_ie = (struct brcmf_vs_tlv *)rsn_ie;
+
+			/* RSN IE */
+			err = brcmf_configure_wpaie(ifp, tmp_ie, true);
+			if (err < 0)
+				return err;
+		}
+	} else {
+		brcmf_dbg(TRACE, "No WPA(2) IEs found\n");
+		brcmf_configure_opensecurity(ifp);
+	}
+
+	return err;
+}
+
 static s32
 brcmf_cfg80211_start_ap(struct wiphy *wiphy, struct net_device *ndev,
 			struct cfg80211_ap_settings *settings)
@@ -4625,8 +4667,6 @@ brcmf_cfg80211_start_ap(struct wiphy *wiphy, struct net_device *ndev,
 	const struct brcmf_tlv *country_ie;
 	struct brcmf_ssid_le ssid_le;
 	s32 err = -EPERM;
-	const struct brcmf_tlv *rsn_ie;
-	const struct brcmf_vs_tlv *wpa_ie;
 	struct brcmf_join_params join_params;
 	enum nl80211_iftype dev_role;
 	struct brcmf_fil_bss_enable_le bss_enable;
@@ -4680,36 +4720,6 @@ brcmf_cfg80211_start_ap(struct wiphy *wiphy, struct net_device *ndev,
 		brcmf_configure_arp_nd_offload(ifp, false);
 	}
 
-	/* find the RSN_IE */
-	rsn_ie = brcmf_parse_tlvs((u8 *)settings->beacon.tail,
-				  settings->beacon.tail_len, WLAN_EID_RSN);
-
-	/* find the WPA_IE */
-	wpa_ie = brcmf_find_wpaie((u8 *)settings->beacon.tail,
-				  settings->beacon.tail_len);
-
-	if ((wpa_ie != NULL || rsn_ie != NULL)) {
-		brcmf_dbg(TRACE, "WPA(2) IE is found\n");
-		if (wpa_ie != NULL) {
-			/* WPA IE */
-			err = brcmf_configure_wpaie(ifp, wpa_ie, false);
-			if (err < 0)
-				goto exit;
-		} else {
-			struct brcmf_vs_tlv *tmp_ie;
-
-			tmp_ie = (struct brcmf_vs_tlv *)rsn_ie;
-
-			/* RSN IE */
-			err = brcmf_configure_wpaie(ifp, tmp_ie, true);
-			if (err < 0)
-				goto exit;
-		}
-	} else {
-		brcmf_dbg(TRACE, "No WPA(2) IEs found\n");
-		brcmf_configure_opensecurity(ifp);
-	}
-
 	/* Parameters shared by all radio interfaces */
 	if (!mbss) {
 		if ((supports_11d) && (is_11d != ifp->vif->is_11d)) {
@@ -4791,6 +4801,14 @@ brcmf_cfg80211_start_ap(struct wiphy *wiphy, struct net_device *ndev,
 			bphy_err(drvr, "BRCMF_C_UP error (%d)\n", err);
 			goto exit;
 		}
+
+		err = brcmf_parse_configure_security(ifp, settings,
+						     NL80211_IFTYPE_AP);
+		if (err < 0) {
+			bphy_err(drvr, "brcmf_parse_configure_security error\n");
+			goto exit;
+		}
+
 		/* On DOWN the firmware removes the WEP keys, reconfigure
 		 * them if they were set.
 		 */
@@ -4823,6 +4841,14 @@ brcmf_cfg80211_start_ap(struct wiphy *wiphy, struct net_device *ndev,
 				 chanspec, err);
 			goto exit;
 		}
+
+		err = brcmf_parse_configure_security(ifp, settings,
+						     NL80211_IFTYPE_P2P_GO);
+		if (err < 0) {
+			brcmf_err("brcmf_parse_configure_security error\n");
+			goto exit;
+		}
+
 		err = brcmf_fil_bsscfg_data_set(ifp, "ssid", &ssid_le,
 						sizeof(ssid_le));
 		if (err < 0) {
-- 
cgit v1.2.3-59-g8ed1b


From 30fb1b2729099a3506222bbeff09a7ed6bca6fb0 Mon Sep 17 00:00:00 2001
From: Ryohei Kondo <ryohei.kondo@cypress.com>
Date: Wed, 6 May 2020 08:03:20 -0500
Subject: brcmfmac: use actframe_abort to cancel ongoing action frame

The driver sends an action frame down and waits for dwell time to be
completed or aborted before sending out the next action frame.
Driver issues "scan abort" to cancel the current time slot, but this
doesn't have any effect because, we are not using scan engine for
sending action frame.
Fix is to use "actframe_abort" to cancels the current action frame.

Signed-off-by: Ryohei Kondo <ryohei.kondo@cypress.com>
Signed-off-by: Chi-Hsien Lin <chi-hsien.lin@cypress.com>
Signed-off-by: Wright Feng <wright.feng@cypress.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1588770201-54361-3-git-send-email-wright.feng@cypress.com
---
 .../net/wireless/broadcom/brcm80211/brcmfmac/p2p.c | 34 ++++++++++++++++++++--
 1 file changed, 32 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
index e32c24a2670d..cd5801e32488 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
@@ -1267,6 +1267,30 @@ bool brcmf_p2p_scan_finding_common_channel(struct brcmf_cfg80211_info *cfg,
 	return true;
 }
 
+/**
+ * brcmf_p2p_abort_action_frame() - abort action frame.
+ *
+ * @cfg: common configuration struct.
+ *
+ */
+static s32 brcmf_p2p_abort_action_frame(struct brcmf_cfg80211_info *cfg)
+{
+	struct brcmf_p2p_info *p2p = &cfg->p2p;
+	struct brcmf_cfg80211_vif *vif;
+	s32 err;
+	s32 int_val = 1;
+
+	brcmf_dbg(TRACE, "Enter\n");
+
+	vif = p2p->bss_idx[P2PAPI_BSSCFG_DEVICE].vif;
+	err = brcmf_fil_bsscfg_data_set(vif->ifp, "actframe_abort", &int_val,
+					sizeof(s32));
+	if (err)
+		brcmf_err(" aborting action frame has failed (%d)\n", err);
+
+	return err;
+}
+
 /**
  * brcmf_p2p_stop_wait_next_action_frame() - finish scan if af tx complete.
  *
@@ -1278,6 +1302,7 @@ brcmf_p2p_stop_wait_next_action_frame(struct brcmf_cfg80211_info *cfg)
 {
 	struct brcmf_p2p_info *p2p = &cfg->p2p;
 	struct brcmf_if *ifp = p2p->bss_idx[P2PAPI_BSSCFG_PRIMARY].vif->ifp;
+	s32 err;
 
 	if (test_bit(BRCMF_P2P_STATUS_SENDING_ACT_FRAME, &p2p->status) &&
 	    (test_bit(BRCMF_P2P_STATUS_ACTION_TX_COMPLETED, &p2p->status) ||
@@ -1286,8 +1311,13 @@ brcmf_p2p_stop_wait_next_action_frame(struct brcmf_cfg80211_info *cfg)
 		/* if channel is not zero, "actfame" uses off channel scan.
 		 * So abort scan for off channel completion.
 		 */
-		if (p2p->af_sent_channel)
-			brcmf_notify_escan_complete(cfg, ifp, true, true);
+		if (p2p->af_sent_channel) {
+			/* abort actframe using actframe_abort or abort scan */
+			err = brcmf_p2p_abort_action_frame(cfg);
+			if (err)
+				brcmf_notify_escan_complete(cfg, ifp, true,
+							    true);
+		}
 	} else if (test_bit(BRCMF_P2P_STATUS_WAITING_NEXT_AF_LISTEN,
 			    &p2p->status)) {
 		brcmf_dbg(TRACE, "*** Wake UP ** abort listen for next af frame\n");
-- 
cgit v1.2.3-59-g8ed1b


From 78db077db638b6b7679ebbadda7b19eef65a6720 Mon Sep 17 00:00:00 2001
From: Soontak Lee <soontak.lee@cypress.com>
Date: Wed, 6 May 2020 08:03:21 -0500
Subject: brcmfmac: Use seq/seq_len and set iv_initialize when plumbing of rxiv
 in (GTK) keys

When plumbing rxiv for (GTK) keys, current code does not use seq/seq_len
when present nor set iv_initialized for iovar wsec_key. This could
result in missing broadcast traffic after GTK rekey. The fix is setting
iv_initialized and using seq/seq_len for iovar wsec_key.

Signed-off-by: Soontak Lee <soontak.lee@cypress.com>
Signed-off-by: Chi-Hsien Lin <chi-hsien.lin@cypress.com>
Signed-off-by: Wright Feng <wright.feng@cypress.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1588770201-54361-4-git-send-email-wright.feng@cypress.com
---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
index 181f4df79866..579b9306f533 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
@@ -2469,6 +2469,17 @@ brcmf_cfg80211_add_key(struct wiphy *wiphy, struct net_device *ndev,
 	if (!ext_key)
 		key->flags = BRCMF_PRIMARY_KEY;
 
+	if (params->seq && params->seq_len == 6) {
+		/* rx iv */
+		u8 *ivptr;
+
+		ivptr = (u8 *)params->seq;
+		key->rxiv.hi = (ivptr[5] << 24) | (ivptr[4] << 16) |
+			(ivptr[3] << 8) | ivptr[2];
+		key->rxiv.lo = (ivptr[1] << 8) | ivptr[0];
+		key->iv_initialized = true;
+	}
+
 	switch (params->cipher) {
 	case WLAN_CIPHER_SUITE_WEP40:
 		key->algo = CRYPTO_ALGO_WEP1;
-- 
cgit v1.2.3-59-g8ed1b


From 7294ee6f564281bc805496648ec56fce203d8ee4 Mon Sep 17 00:00:00 2001
From: Chen Zhou <chenzhou10@huawei.com>
Date: Fri, 8 May 2020 09:32:49 +0800
Subject: brcmfmac: make non-global functions static

Fix sparse warning:
drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c:2206:5:
	warning: symbol 'brcmf_p2p_get_conn_idx' was not declared. Should it be static?

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Chen Zhou <chenzhou10@huawei.com>
Reviewed-by: Wright Feng <wright.feng@cypress.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200508013249.95196-1-chenzhou10@huawei.com
---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
index cd5801e32488..b0a41cf06c99 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
@@ -2233,7 +2233,7 @@ fail:
 	return ERR_PTR(err);
 }
 
-int brcmf_p2p_get_conn_idx(struct brcmf_cfg80211_info *cfg)
+static int brcmf_p2p_get_conn_idx(struct brcmf_cfg80211_info *cfg)
 {
 	int i;
 	struct brcmf_if *ifp = netdev_priv(cfg_to_ndev(cfg));
-- 
cgit v1.2.3-59-g8ed1b


From 78a6fb42f67c567f80338a9eaec0090678dbd58e Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Fri, 8 May 2020 15:43:51 +0800
Subject: brcmfmac: remove Comparison to bool in brcmf_p2p_send_action_frame()

Fix the following coccicheck warning:

drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c:1785:5-8:
WARNING: Comparison to bool

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Reviewed-by: Chi-hsien Lin <chi-hsien.lin@cypress.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200508074351.19193-1-yanaijie@huawei.com
---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
index b0a41cf06c99..d2795dc17c46 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/p2p.c
@@ -1866,7 +1866,7 @@ bool brcmf_p2p_send_action_frame(struct brcmf_cfg80211_info *cfg,
 		dwell_overflow = brcmf_p2p_check_dwell_overflow(requested_dwell,
 								dwell_jiffies);
 	}
-	if (ack == false) {
+	if (!ack) {
 		bphy_err(drvr, "Failed to send Action Frame(retry %d)\n",
 			 tx_retry);
 		clear_bit(BRCMF_P2P_STATUS_GO_NEG_PHASE, &p2p->status);
-- 
cgit v1.2.3-59-g8ed1b


From f2cd32a443da694ac4e28fbf4ac6f9d5cc63a539 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavo@embeddedor.com>
Date: Tue, 5 May 2020 18:52:05 -0500
Subject: rndis_wlan: Remove logically dead code

caps_buf is always of size sizeof(*caps) because
sizeof(caps->auth_encr_pair) * 16 is always zero. Notice
that when using zero-length arrays, sizeof evaluates to zero[1].

So, the code introduced by
commit 0308383f9591 ("rndis_wlan: get max_num_pmkids from device")
is logically dead, hence is never executed and can be removed. As a
consequence, the rest of the related code can be refactored a bit.

Notice that this code has been out there since March 2010.

[1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html
Signed-off-by: Gustavo A. R. Silva <gustavo@embeddedor.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200505235205.GA18539@embeddedor
Link: https://lore.kernel.org/r/20200507110741.37757-1-yanaijie@huawei.com
---
 drivers/net/wireless/rndis_wlan.c | 24 ++++++++----------------
 1 file changed, 8 insertions(+), 16 deletions(-)

diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c
index c8f8fe5497a8..78a4325bfe1b 100644
--- a/drivers/net/wireless/rndis_wlan.c
+++ b/drivers/net/wireless/rndis_wlan.c
@@ -312,17 +312,11 @@ struct ndis_80211_assoc_info {
 	__le32 offset_resp_ies;
 } __packed;
 
-struct ndis_80211_auth_encr_pair {
-	__le32 auth_mode;
-	__le32 encr_mode;
-} __packed;
-
 struct ndis_80211_capability {
 	__le32 length;
 	__le32 version;
 	__le32 num_pmkids;
 	__le32 num_auth_encr_pair;
-	struct ndis_80211_auth_encr_pair auth_encr_pair[0];
 } __packed;
 
 struct ndis_80211_bssid_info {
@@ -3109,8 +3103,7 @@ static int rndis_wlan_get_caps(struct usbnet *usbdev, struct wiphy *wiphy)
 		__le32	num_items;
 		__le32	items[8];
 	} networks_supported;
-	struct ndis_80211_capability *caps;
-	u8 caps_buf[sizeof(*caps) + sizeof(caps->auth_encr_pair) * 16];
+	struct ndis_80211_capability caps;
 	int len, retval, i, n;
 	struct rndis_wlan_private *priv = get_rndis_wlan_priv(usbdev);
 
@@ -3140,19 +3133,18 @@ static int rndis_wlan_get_caps(struct usbnet *usbdev, struct wiphy *wiphy)
 	}
 
 	/* get device 802.11 capabilities, number of PMKIDs */
-	caps = (struct ndis_80211_capability *)caps_buf;
-	len = sizeof(caps_buf);
+	len = sizeof(caps);
 	retval = rndis_query_oid(usbdev,
 				 RNDIS_OID_802_11_CAPABILITY,
-				 caps, &len);
+				 &caps, &len);
 	if (retval >= 0) {
 		netdev_dbg(usbdev->net, "RNDIS_OID_802_11_CAPABILITY -> len %d, "
 				"ver %d, pmkids %d, auth-encr-pairs %d\n",
-				le32_to_cpu(caps->length),
-				le32_to_cpu(caps->version),
-				le32_to_cpu(caps->num_pmkids),
-				le32_to_cpu(caps->num_auth_encr_pair));
-		wiphy->max_num_pmkids = le32_to_cpu(caps->num_pmkids);
+				le32_to_cpu(caps.length),
+				le32_to_cpu(caps.version),
+				le32_to_cpu(caps.num_pmkids),
+				le32_to_cpu(caps.num_auth_encr_pair));
+		wiphy->max_num_pmkids = le32_to_cpu(caps.num_pmkids);
 	} else
 		wiphy->max_num_pmkids = 0;
 
-- 
cgit v1.2.3-59-g8ed1b


From e0e05f20c200c41b34294078bbc29d67bfd607ea Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Thu, 7 May 2020 13:54:51 -0500
Subject: ipw2x00: Replace zero-length array with flexible-array

The current codebase makes use of the zero-length array language
extension to the C90 standard, but the preferred mechanism to declare
variable-length types such as these ones is a flexible array member[1][2],
introduced in C99:

struct foo {
        int stuff;
        struct boo array[];
};

By making use of the mechanism above, we will get a compiler warning
in case the flexible array does not occur last in the structure, which
will help us prevent some kind of undefined behavior bugs from being
inadvertently introduced[3] to the codebase from now on.

Also, notice that, dynamic memory allocations won't be affected by
this change:

"Flexible array members have incomplete type, and so the sizeof operator
may not be applied. As a quirk of the original implementation of
zero-length arrays, sizeof evaluates to zero."[1]

sizeof(flexible-array-member) triggers a warning because flexible array
members have incomplete type[1]. There are some instances of code in
which the sizeof operator is being incorrectly/erroneously applied to
zero-length arrays and the result is zero. Such instances may be hiding
some bugs. So, this work (flexible-array member conversions) will also
help to get completely rid of those sorts of issues.

This issue was found with the help of Coccinelle.

[1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html
[2] https://github.com/KSPP/linux/issues/21
[3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour")

Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200507185451.GA14603@embeddedor
---
 drivers/net/wireless/intel/ipw2x00/ipw2200.c |  2 +-
 drivers/net/wireless/intel/ipw2x00/ipw2200.h | 10 +++++-----
 drivers/net/wireless/intel/ipw2x00/libipw.h  | 28 ++++++++++++++--------------
 3 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2200.c b/drivers/net/wireless/intel/ipw2x00/ipw2200.c
index e9e686ad57b1..661e63bfc892 100644
--- a/drivers/net/wireless/intel/ipw2x00/ipw2200.c
+++ b/drivers/net/wireless/intel/ipw2x00/ipw2200.c
@@ -3386,7 +3386,7 @@ struct ipw_fw {
 	__le32 boot_size;
 	__le32 ucode_size;
 	__le32 fw_size;
-	u8 data[0];
+	u8 data[];
 };
 
 static int ipw_get_fw(struct ipw_priv *priv,
diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2200.h b/drivers/net/wireless/intel/ipw2x00/ipw2200.h
index 4346520545c4..09fa7f19050f 100644
--- a/drivers/net/wireless/intel/ipw2x00/ipw2200.h
+++ b/drivers/net/wireless/intel/ipw2x00/ipw2200.h
@@ -448,7 +448,7 @@ struct tfd_command {
 	u8 index;
 	u8 length;
 	__le16 reserved;
-	u8 payload[0];
+	u8 payload[];
 } __packed;
 
 struct tfd_data {
@@ -675,7 +675,7 @@ struct ipw_rx_frame {
 	// is identical)
 	u8 rtscts_seen;		// 0x1 RTS seen ; 0x2 CTS seen
 	__le16 length;
-	u8 data[0];
+	u8 data[];
 } __packed;
 
 struct ipw_rx_header {
@@ -1002,7 +1002,7 @@ struct ipw_cmd {	 /* XXX */
    * Incoming parameters listed 1-st, followed by outcoming params.
    * nParams=(len+3)/4+status_len
    */
-	u32 param[0];
+	u32 param[];
 } __packed;
 
 #define STATUS_HCMD_ACTIVE      (1<<0)	/**< host command in progress */
@@ -1108,7 +1108,7 @@ struct ipw_fw_error {	 /* XXX */
 	u32 log_len;
 	struct ipw_error_elem *elem;
 	struct ipw_event *log;
-	u8 payload[0];
+	u8 payload[];
 } __packed;
 
 #ifdef CONFIG_IPW2200_PROMISCUOUS
@@ -1153,7 +1153,7 @@ struct ipw_rt_hdr {
 	s8 rt_dbmsignal;	/* signal in dbM, kluged to signed */
 	s8 rt_dbmnoise;
 	u8 rt_antenna;	/* antenna number */
-	u8 payload[0];  /* payload... */
+	u8 payload[];  /* payload... */
 } __packed;
 #endif
 
diff --git a/drivers/net/wireless/intel/ipw2x00/libipw.h b/drivers/net/wireless/intel/ipw2x00/libipw.h
index e4a6ab4e8391..e87538a8b88b 100644
--- a/drivers/net/wireless/intel/ipw2x00/libipw.h
+++ b/drivers/net/wireless/intel/ipw2x00/libipw.h
@@ -334,7 +334,7 @@ struct libipw_hdr_1addr {
 	__le16 frame_ctl;
 	__le16 duration_id;
 	u8 addr1[ETH_ALEN];
-	u8 payload[0];
+	u8 payload[];
 } __packed;
 
 struct libipw_hdr_2addr {
@@ -342,7 +342,7 @@ struct libipw_hdr_2addr {
 	__le16 duration_id;
 	u8 addr1[ETH_ALEN];
 	u8 addr2[ETH_ALEN];
-	u8 payload[0];
+	u8 payload[];
 } __packed;
 
 struct libipw_hdr_3addr {
@@ -352,7 +352,7 @@ struct libipw_hdr_3addr {
 	u8 addr2[ETH_ALEN];
 	u8 addr3[ETH_ALEN];
 	__le16 seq_ctl;
-	u8 payload[0];
+	u8 payload[];
 } __packed;
 
 struct libipw_hdr_4addr {
@@ -363,7 +363,7 @@ struct libipw_hdr_4addr {
 	u8 addr3[ETH_ALEN];
 	__le16 seq_ctl;
 	u8 addr4[ETH_ALEN];
-	u8 payload[0];
+	u8 payload[];
 } __packed;
 
 struct libipw_hdr_3addrqos {
@@ -380,7 +380,7 @@ struct libipw_hdr_3addrqos {
 struct libipw_info_element {
 	u8 id;
 	u8 len;
-	u8 data[0];
+	u8 data[];
 } __packed;
 
 /*
@@ -406,7 +406,7 @@ struct libipw_auth {
 	__le16 transaction;
 	__le16 status;
 	/* challenge */
-	struct libipw_info_element info_element[0];
+	struct libipw_info_element info_element[];
 } __packed;
 
 struct libipw_channel_switch {
@@ -442,7 +442,7 @@ struct libipw_disassoc {
 struct libipw_probe_request {
 	struct libipw_hdr_3addr header;
 	/* SSID, supported rates */
-	struct libipw_info_element info_element[0];
+	struct libipw_info_element info_element[];
 } __packed;
 
 struct libipw_probe_response {
@@ -452,7 +452,7 @@ struct libipw_probe_response {
 	__le16 capability;
 	/* SSID, supported rates, FH params, DS params,
 	 * CF params, IBSS params, TIM (if beacon), RSN */
-	struct libipw_info_element info_element[0];
+	struct libipw_info_element info_element[];
 } __packed;
 
 /* Alias beacon for probe_response */
@@ -463,7 +463,7 @@ struct libipw_assoc_request {
 	__le16 capability;
 	__le16 listen_interval;
 	/* SSID, supported rates, RSN */
-	struct libipw_info_element info_element[0];
+	struct libipw_info_element info_element[];
 } __packed;
 
 struct libipw_reassoc_request {
@@ -471,7 +471,7 @@ struct libipw_reassoc_request {
 	__le16 capability;
 	__le16 listen_interval;
 	u8 current_ap[ETH_ALEN];
-	struct libipw_info_element info_element[0];
+	struct libipw_info_element info_element[];
 } __packed;
 
 struct libipw_assoc_response {
@@ -480,7 +480,7 @@ struct libipw_assoc_response {
 	__le16 status;
 	__le16 aid;
 	/* supported rates */
-	struct libipw_info_element info_element[0];
+	struct libipw_info_element info_element[];
 } __packed;
 
 struct libipw_txb {
@@ -490,7 +490,7 @@ struct libipw_txb {
 	u8 reserved;
 	u16 frag_size;
 	u16 payload_size;
-	struct sk_buff *fragments[0];
+	struct sk_buff *fragments[];
 };
 
 /* SWEEP TABLE ENTRIES NUMBER */
@@ -594,7 +594,7 @@ struct libipw_ibss_dfs {
 	struct libipw_info_element ie;
 	u8 owner[ETH_ALEN];
 	u8 recovery_interval;
-	struct libipw_channel_map channel_map[0];
+	struct libipw_channel_map channel_map[];
 };
 
 struct libipw_csa {
@@ -830,7 +830,7 @@ struct libipw_device {
 
 	/* This must be the last item so that it points to the data
 	 * allocated beyond this structure by alloc_libipw */
-	u8 priv[0];
+	u8 priv[];
 };
 
 #define IEEE_A            (1<<0)
-- 
cgit v1.2.3-59-g8ed1b


From 8863b1212aab2c738be9526f90389bc0fc18f90f Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Thu, 7 May 2020 13:55:29 -0500
Subject: iwlegacy: Replace zero-length array with flexible-array

The current codebase makes use of the zero-length array language
extension to the C90 standard, but the preferred mechanism to declare
variable-length types such as these ones is a flexible array member[1][2],
introduced in C99:

struct foo {
        int stuff;
        struct boo array[];
};

By making use of the mechanism above, we will get a compiler warning
in case the flexible array does not occur last in the structure, which
will help us prevent some kind of undefined behavior bugs from being
inadvertently introduced[3] to the codebase from now on.

Also, notice that, dynamic memory allocations won't be affected by
this change:

"Flexible array members have incomplete type, and so the sizeof operator
may not be applied. As a quirk of the original implementation of
zero-length arrays, sizeof evaluates to zero."[1]

sizeof(flexible-array-member) triggers a warning because flexible array
members have incomplete type[1]. There are some instances of code in
which the sizeof operator is being incorrectly/erroneously applied to
zero-length arrays and the result is zero. Such instances may be hiding
some bugs. So, this work (flexible-array member conversions) will also
help to get completely rid of those sorts of issues.

This issue was found with the help of Coccinelle.

[1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html
[2] https://github.com/KSPP/linux/issues/21
[3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour")

Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200507185529.GA14639@embeddedor
---
 drivers/net/wireless/intel/iwlegacy/commands.h     | 22 +++++++++++-----------
 drivers/net/wireless/intel/iwlegacy/iwl-spectrum.h |  4 ++--
 2 files changed, 13 insertions(+), 13 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlegacy/commands.h b/drivers/net/wireless/intel/iwlegacy/commands.h
index dd744135c956..89c6671b32bc 100644
--- a/drivers/net/wireless/intel/iwlegacy/commands.h
+++ b/drivers/net/wireless/intel/iwlegacy/commands.h
@@ -203,7 +203,7 @@ struct il_cmd_header {
 	__le16 sequence;
 
 	/* command or response/notification data follows immediately */
-	u8 data[0];
+	u8 data[];
 } __packed;
 
 /**
@@ -1112,7 +1112,7 @@ struct il_wep_cmd {
 	u8 global_key_type;
 	u8 flags;
 	u8 reserved;
-	struct il_wep_key key[0];
+	struct il_wep_key key[];
 } __packed;
 
 #define WEP_KEY_WEP_TYPE 1
@@ -1166,7 +1166,7 @@ struct il3945_rx_frame_stats {
 	u8 agc;
 	__le16 sig_avg;
 	__le16 noise_diff;
-	u8 payload[0];
+	u8 payload[];
 } __packed;
 
 struct il3945_rx_frame_hdr {
@@ -1175,7 +1175,7 @@ struct il3945_rx_frame_hdr {
 	u8 reserved1;
 	u8 rate;
 	__le16 len;
-	u8 payload[0];
+	u8 payload[];
 } __packed;
 
 struct il3945_rx_frame_end {
@@ -1211,7 +1211,7 @@ struct il4965_rx_non_cfg_phy {
 	__le16 ant_selection;	/* ant A bit 4, ant B bit 5, ant C bit 6 */
 	__le16 agc_info;	/* agc code 0:6, agc dB 7:13, reserved 14:15 */
 	u8 rssi_info[6];	/* we use even entries, 0/2/4 for A/B/C rssi */
-	u8 pad[0];
+	u8 pad[];
 } __packed;
 
 /*
@@ -1409,7 +1409,7 @@ struct il3945_tx_cmd {
 	 * length is 26 or 30 bytes, followed by payload data
 	 */
 	u8 payload[0];
-	struct ieee80211_hdr hdr[0];
+	struct ieee80211_hdr hdr[];
 } __packed;
 
 /*
@@ -1511,7 +1511,7 @@ struct il_tx_cmd {
 	 * length is 26 or 30 bytes, followed by payload data
 	 */
 	u8 payload[0];
-	struct ieee80211_hdr hdr[0];
+	struct ieee80211_hdr hdr[];
 } __packed;
 
 /* TX command response is sent after *3945* transmission attempts.
@@ -2520,7 +2520,7 @@ struct il3945_scan_cmd {
 	 * for one scan to complete (i.e. receive N_SCAN_COMPLETE)
 	 * before requesting another scan.
 	 */
-	u8 data[0];
+	u8 data[];
 } __packed;
 
 struct il_scan_cmd {
@@ -2564,7 +2564,7 @@ struct il_scan_cmd {
 	 * for one scan to complete (i.e. receive N_SCAN_COMPLETE)
 	 * before requesting another scan.
 	 */
-	u8 data[0];
+	u8 data[];
 } __packed;
 
 /* Can abort will notify by complete notification with abort status. */
@@ -2664,7 +2664,7 @@ struct il3945_tx_beacon_cmd {
 	__le16 tim_idx;
 	u8 tim_size;
 	u8 reserved1;
-	struct ieee80211_hdr frame[0];	/* beacon frame */
+	struct ieee80211_hdr frame[];	/* beacon frame */
 } __packed;
 
 struct il_tx_beacon_cmd {
@@ -2672,7 +2672,7 @@ struct il_tx_beacon_cmd {
 	__le16 tim_idx;
 	u8 tim_size;
 	u8 reserved1;
-	struct ieee80211_hdr frame[0];	/* beacon frame */
+	struct ieee80211_hdr frame[];	/* beacon frame */
 } __packed;
 
 /******************************************************************************
diff --git a/drivers/net/wireless/intel/iwlegacy/iwl-spectrum.h b/drivers/net/wireless/intel/iwlegacy/iwl-spectrum.h
index a3b490501a70..1e8ab704dbfb 100644
--- a/drivers/net/wireless/intel/iwlegacy/iwl-spectrum.h
+++ b/drivers/net/wireless/intel/iwlegacy/iwl-spectrum.h
@@ -53,7 +53,7 @@ struct ieee80211_measurement_params {
 struct ieee80211_info_element {
 	u8 id;
 	u8 len;
-	u8 data[0];
+	u8 data[];
 } __packed;
 
 struct ieee80211_measurement_request {
@@ -61,7 +61,7 @@ struct ieee80211_measurement_request {
 	u8 token;
 	u8 mode;
 	u8 type;
-	struct ieee80211_measurement_params params[0];
+	struct ieee80211_measurement_params params[];
 } __packed;
 
 struct ieee80211_measurement_report {
-- 
cgit v1.2.3-59-g8ed1b


From 174812346c30321158046d879912c85d638cd1b7 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Thu, 7 May 2020 13:59:14 -0500
Subject: mwl8k: Replace zero-length array with flexible-array

The current codebase makes use of the zero-length array language
extension to the C90 standard, but the preferred mechanism to declare
variable-length types such as these ones is a flexible array member[1][2],
introduced in C99:

struct foo {
        int stuff;
        struct boo array[];
};

By making use of the mechanism above, we will get a compiler warning
in case the flexible array does not occur last in the structure, which
will help us prevent some kind of undefined behavior bugs from being
inadvertently introduced[3] to the codebase from now on.

Also, notice that, dynamic memory allocations won't be affected by
this change:

"Flexible array members have incomplete type, and so the sizeof operator
may not be applied. As a quirk of the original implementation of
zero-length arrays, sizeof evaluates to zero."[1]

sizeof(flexible-array-member) triggers a warning because flexible array
members have incomplete type[1]. There are some instances of code in
which the sizeof operator is being incorrectly/erroneously applied to
zero-length arrays and the result is zero. Such instances may be hiding
some bugs. So, this work (flexible-array member conversions) will also
help to get completely rid of those sorts of issues.

This issue was found with the help of Coccinelle.

[1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html
[2] https://github.com/KSPP/linux/issues/21
[3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour")

Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200507185914.GA15124@embeddedor
---
 drivers/net/wireless/marvell/mwl8k.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/marvell/mwl8k.c b/drivers/net/wireless/marvell/mwl8k.c
index 47fb4b3ea004..97f23f93f6e7 100644
--- a/drivers/net/wireless/marvell/mwl8k.c
+++ b/drivers/net/wireless/marvell/mwl8k.c
@@ -2668,7 +2668,7 @@ struct mwl8k_cmd_mac_multicast_adr {
 	struct mwl8k_cmd_pkt header;
 	__le16 action;
 	__le16 numaddr;
-	__u8 addr[0][ETH_ALEN];
+	__u8 addr[][ETH_ALEN];
 };
 
 #define MWL8K_ENABLE_RX_DIRECTED	0x0001
-- 
cgit v1.2.3-59-g8ed1b


From 8d7d7a93d5265468a0019f11d0bcd2e005ce9ebd Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Thu, 7 May 2020 14:02:10 -0500
Subject: prism54: Replace zero-length array with flexible-array

The current codebase makes use of the zero-length array language
extension to the C90 standard, but the preferred mechanism to declare
variable-length types such as these ones is a flexible array member[1][2],
introduced in C99:

struct foo {
        int stuff;
        struct boo array[];
};

By making use of the mechanism above, we will get a compiler warning
in case the flexible array does not occur last in the structure, which
will help us prevent some kind of undefined behavior bugs from being
inadvertently introduced[3] to the codebase from now on.

Also, notice that, dynamic memory allocations won't be affected by
this change:

"Flexible array members have incomplete type, and so the sizeof operator
may not be applied. As a quirk of the original implementation of
zero-length arrays, sizeof evaluates to zero."[1]

sizeof(flexible-array-member) triggers a warning because flexible array
members have incomplete type[1]. There are some instances of code in
which the sizeof operator is being incorrectly/erroneously applied to
zero-length arrays and the result is zero. Such instances may be hiding
some bugs. So, this work (flexible-array member conversions) will also
help to get completely rid of those sorts of issues.

This issue was found with the help of Coccinelle.

[1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html
[2] https://github.com/KSPP/linux/issues/21
[3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour")

Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200507190210.GA15375@embeddedor
---
 drivers/net/wireless/intersil/prism54/isl_oid.h    | 8 ++++----
 drivers/net/wireless/intersil/prism54/islpci_mgt.h | 2 +-
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/intersil/prism54/isl_oid.h b/drivers/net/wireless/intersil/prism54/isl_oid.h
index 5441c1f9f2fc..1afc2ccf94ca 100644
--- a/drivers/net/wireless/intersil/prism54/isl_oid.h
+++ b/drivers/net/wireless/intersil/prism54/isl_oid.h
@@ -37,7 +37,7 @@ struct obj_mlmeex {
 	u16 state;
 	u16 code;
 	u16 size;
-	u8 data[0];
+	u8 data[];
 } __packed;
 
 struct obj_buffer {
@@ -68,12 +68,12 @@ struct obj_bss {
 
 struct obj_bsslist {
 	u32 nr;
-	struct obj_bss bsslist[0];
+	struct obj_bss bsslist[];
 } __packed;
 
 struct obj_frequencies {
 	u16 nr;
-	u16 mhz[0];
+	u16 mhz[];
 } __packed;
 
 struct obj_attachment {
@@ -81,7 +81,7 @@ struct obj_attachment {
 	char reserved;
 	short id;
 	short size;
-	char data[0];
+	char data[];
 } __packed;
 
 /*
diff --git a/drivers/net/wireless/intersil/prism54/islpci_mgt.h b/drivers/net/wireless/intersil/prism54/islpci_mgt.h
index d6bbbac46b4a..1f87d0aea60c 100644
--- a/drivers/net/wireless/intersil/prism54/islpci_mgt.h
+++ b/drivers/net/wireless/intersil/prism54/islpci_mgt.h
@@ -99,7 +99,7 @@ struct islpci_mgmtframe {
 	pimfor_header_t *header;      /* payload header, points into buf */
 	void *data;		      /* payload ex header, points into buf */
         struct work_struct ws;	      /* argument for schedule_work() */
-	char buf[0];		      /* fragment buffer */
+	char buf[];		      /* fragment buffer */
 };
 
 int
-- 
cgit v1.2.3-59-g8ed1b


From 55bb8a2b01a3f1531a5154457ec1c7041f5c6f9e Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Thu, 7 May 2020 14:19:26 -0500
Subject: qtnfmac: Replace zero-length array with flexible-array

The current codebase makes use of the zero-length array language
extension to the C90 standard, but the preferred mechanism to declare
variable-length types such as these ones is a flexible array member[1][2],
introduced in C99:

struct foo {
        int stuff;
        struct boo array[];
};

By making use of the mechanism above, we will get a compiler warning
in case the flexible array does not occur last in the structure, which
will help us prevent some kind of undefined behavior bugs from being
inadvertently introduced[3] to the codebase from now on.

Also, notice that, dynamic memory allocations won't be affected by
this change:

"Flexible array members have incomplete type, and so the sizeof operator
may not be applied. As a quirk of the original implementation of
zero-length arrays, sizeof evaluates to zero."[1]

sizeof(flexible-array-member) triggers a warning because flexible array
members have incomplete type[1]. There are some instances of code in
which the sizeof operator is being incorrectly/erroneously applied to
zero-length arrays and the result is zero. Such instances may be hiding
some bugs. So, this work (flexible-array member conversions) will also
help to get completely rid of those sorts of issues.

This issue was found with the help of Coccinelle.

[1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html
[2] https://github.com/KSPP/linux/issues/21
[3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour")

Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Reviewed-by: Sergey Matyukevich <sergey.matyukevich.os@quantenna.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200507191926.GA15970@embeddedor
---
 drivers/net/wireless/quantenna/qtnfmac/bus.h   |  2 +-
 drivers/net/wireless/quantenna/qtnfmac/qlink.h | 54 +++++++++++++-------------
 2 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/drivers/net/wireless/quantenna/qtnfmac/bus.h b/drivers/net/wireless/quantenna/qtnfmac/bus.h
index 87d048df09d1..3334c45aac13 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/bus.h
+++ b/drivers/net/wireless/quantenna/qtnfmac/bus.h
@@ -69,7 +69,7 @@ struct qtnf_bus {
 	struct notifier_block netdev_nb;
 	u8 hw_id[ETH_ALEN];
 	/* bus private data */
-	char bus_priv[0] __aligned(sizeof(void *));
+	char bus_priv[] __aligned(sizeof(void *));
 };
 
 static inline bool qtnf_fw_is_up(struct qtnf_bus *bus)
diff --git a/drivers/net/wireless/quantenna/qtnfmac/qlink.h b/drivers/net/wireless/quantenna/qtnfmac/qlink.h
index 4d22a54c034f..2dda4c5d7427 100644
--- a/drivers/net/wireless/quantenna/qtnfmac/qlink.h
+++ b/drivers/net/wireless/quantenna/qtnfmac/qlink.h
@@ -362,7 +362,7 @@ struct qlink_cmd {
 struct qlink_cmd_init_fw {
 	struct qlink_cmd chdr;
 	__le32 qlink_proto_ver;
-	u8 var_info[0];
+	u8 var_info[];
 } __packed;
 
 /**
@@ -434,7 +434,7 @@ struct qlink_cmd_frame_tx {
 	__le32 cookie;
 	__le16 freq;
 	__le16 flags;
-	u8 frame_data[0];
+	u8 frame_data[];
 } __packed;
 
 /**
@@ -466,7 +466,7 @@ struct qlink_cmd_add_key {
 	__le32 cipher;
 	__le16 vlanid;
 	u8 rsvd[2];
-	u8 key_data[0];
+	u8 key_data[];
 } __packed;
 
 /**
@@ -578,7 +578,7 @@ struct qlink_cmd_connect {
 	u8 mfp;
 	u8 pbss;
 	u8 rsvd[2];
-	u8 payload[0];
+	u8 payload[];
 } __packed;
 
 /**
@@ -592,7 +592,7 @@ struct qlink_cmd_external_auth {
 	struct qlink_cmd chdr;
 	u8 peer[ETH_ALEN];
 	__le16 status;
-	u8 payload[0];
+	u8 payload[];
 } __packed;
 
 /**
@@ -698,7 +698,7 @@ struct qlink_cmd_reg_notify {
 	u8 dfs_region;
 	u8 slave_radar;
 	u8 dfs_offload;
-	u8 info[0];
+	u8 info[];
 } __packed;
 
 /**
@@ -773,7 +773,7 @@ struct qlink_cmd_start_ap {
 	struct qlink_sr_params sr_params;
 	u8 twt_responder;
 	u8 rsvd[3];
-	u8 info[0];
+	u8 info[];
 } __packed;
 
 /**
@@ -807,7 +807,7 @@ struct qlink_mac_address {
 struct qlink_acl_data {
 	__le32 policy;
 	__le32 num_entries;
-	struct qlink_mac_address mac_addrs[0];
+	struct qlink_mac_address mac_addrs[];
 } __packed;
 
 /**
@@ -882,7 +882,7 @@ enum qlink_wowlan_trigger {
 struct qlink_cmd_wowlan_set {
 	struct qlink_cmd chdr;
 	__le32 triggers;
-	u8 data[0];
+	u8 data[];
 } __packed;
 
 enum qlink_ndev_event_type {
@@ -958,7 +958,7 @@ struct qlink_cmd_scan {
 	u8 bssid[ETH_ALEN];
 	u8 scan_width;
 	u8 rsvd[3];
-	u8 var_info[0];
+	u8 var_info[];
 } __packed;
 
 /**
@@ -972,7 +972,7 @@ struct qlink_cmd_update_owe {
 	struct qlink_cmd chdr;
 	u8 peer[ETH_ALEN];
 	__le16 status;
-	u8 ies[0];
+	u8 ies[];
 } __packed;
 
 /* QLINK Command Responses messages related definitions
@@ -1106,7 +1106,7 @@ struct qlink_resp_get_mac_info {
 	u8 n_reg_rules;
 	u8 dfs_region;
 	u8 rsvd[3];
-	u8 var_info[0];
+	u8 var_info[];
 } __packed;
 
 /**
@@ -1131,7 +1131,7 @@ struct qlink_resp_get_hw_info {
 	u8 mac_bitmap;
 	u8 total_tx_chain;
 	u8 total_rx_chain;
-	u8 info[0];
+	u8 info[];
 } __packed;
 
 /**
@@ -1167,7 +1167,7 @@ struct qlink_resp_get_sta_info {
 	struct qlink_resp rhdr;
 	u8 sta_addr[ETH_ALEN];
 	u8 rsvd[2];
-	u8 info[0];
+	u8 info[];
 } __packed;
 
 /**
@@ -1184,7 +1184,7 @@ struct qlink_resp_band_info_get {
 	u8 num_chans;
 	u8 num_bitrates;
 	u8 rsvd[1];
-	u8 info[0];
+	u8 info[];
 } __packed;
 
 /**
@@ -1196,7 +1196,7 @@ struct qlink_resp_band_info_get {
 struct qlink_resp_get_chan_stats {
 	struct qlink_resp rhdr;
 	__le32 chan_freq;
-	u8 info[0];
+	u8 info[];
 } __packed;
 
 /**
@@ -1270,7 +1270,7 @@ struct qlink_event_sta_assoc {
 	struct qlink_event ehdr;
 	u8 sta_addr[ETH_ALEN];
 	__le16 frame_control;
-	u8 ies[0];
+	u8 ies[];
 } __packed;
 
 /**
@@ -1297,7 +1297,7 @@ struct qlink_event_bss_join {
 	struct qlink_chandef chan;
 	u8 bssid[ETH_ALEN];
 	__le16 status;
-	u8 ies[0];
+	u8 ies[];
 } __packed;
 
 /**
@@ -1339,7 +1339,7 @@ struct qlink_event_rxmgmt {
 	__le32 flags;
 	s8 sig_dbm;
 	u8 rsvd[3];
-	u8 frame_data[0];
+	u8 frame_data[];
 } __packed;
 
 /**
@@ -1367,7 +1367,7 @@ struct qlink_event_scan_result {
 	u8 ssid[IEEE80211_MAX_SSID_LEN];
 	u8 bssid[ETH_ALEN];
 	u8 rsvd[2];
-	u8 payload[0];
+	u8 payload[];
 } __packed;
 
 /**
@@ -1456,7 +1456,7 @@ struct qlink_event_update_owe {
 	struct qlink_event ehdr;
 	u8 peer[ETH_ALEN];
 	u8 rsvd[2];
-	u8 ies[0];
+	u8 ies[];
 } __packed;
 
 /* QLINK TLVs (Type-Length Values) definitions
@@ -1512,7 +1512,7 @@ enum qlink_tlv_id {
 struct qlink_tlv_hdr {
 	__le16 type;
 	__le16 len;
-	u8 val[0];
+	u8 val[];
 } __packed;
 
 struct qlink_iface_limit {
@@ -1524,7 +1524,7 @@ struct qlink_iface_limit_record {
 	__le16 max_interfaces;
 	u8 num_different_channels;
 	u8 n_limits;
-	struct qlink_iface_limit limits[0];
+	struct qlink_iface_limit limits[];
 } __packed;
 
 #define QLINK_RSSI_OFFSET	120
@@ -1647,7 +1647,7 @@ struct qlink_tlv_ie_set {
 	u8 type;
 	u8 flags;
 	u8 rsvd[2];
-	u8 ie_data[0];
+	u8 ie_data[];
 } __packed;
 
 /**
@@ -1660,7 +1660,7 @@ struct qlink_tlv_ext_ie {
 	struct qlink_tlv_hdr hdr;
 	u8 eid_ext;
 	u8 rsvd[3];
-	u8 ie_data[0];
+	u8 ie_data[];
 } __packed;
 
 #define IEEE80211_HE_PPE_THRES_MAX_LEN		25
@@ -1681,7 +1681,7 @@ struct qlink_tlv_iftype_data {
 	struct qlink_tlv_hdr hdr;
 	u8 n_iftype_data;
 	u8 rsvd[3];
-	struct qlink_sband_iftype_data iftype_data[0];
+	struct qlink_sband_iftype_data iftype_data[];
 } __packed;
 
 /**
@@ -1867,7 +1867,7 @@ struct qlink_random_mac_addr {
 struct qlink_wowlan_capab_data {
 	__le16 version;
 	__le16 len;
-	u8 data[0];
+	u8 data[];
 } __packed;
 
 /**
-- 
cgit v1.2.3-59-g8ed1b


From bd7db3021aa7a31f7d5122af1a863e66c75f88f5 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Thu, 7 May 2020 14:26:47 -0500
Subject: rndis_wlan: Replace zero-length array with flexible-array

The current codebase makes use of the zero-length array language
extension to the C90 standard, but the preferred mechanism to declare
variable-length types such as these ones is a flexible array member[1][2],
introduced in C99:

struct foo {
        int stuff;
        struct boo array[];
};

By making use of the mechanism above, we will get a compiler warning
in case the flexible array does not occur last in the structure, which
will help us prevent some kind of undefined behavior bugs from being
inadvertently introduced[3] to the codebase from now on.

Also, notice that, dynamic memory allocations won't be affected by
this change:

"Flexible array members have incomplete type, and so the sizeof operator
may not be applied. As a quirk of the original implementation of
zero-length arrays, sizeof evaluates to zero."[1]

sizeof(flexible-array-member) triggers a warning because flexible array
members have incomplete type[1]. There are some instances of code in
which the sizeof operator is being incorrectly/erroneously applied to
zero-length arrays and the result is zero. Such instances may be hiding
some bugs. So, this work (flexible-array member conversions) will also
help to get completely rid of those sorts of issues.

This issue was found with the help of Coccinelle.

[1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html
[2] https://github.com/KSPP/linux/issues/21
[3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour")

Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200507192647.GA16710@embeddedor
---
 drivers/net/wireless/rndis_wlan.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/rndis_wlan.c b/drivers/net/wireless/rndis_wlan.c
index 78a4325bfe1b..8852a1832951 100644
--- a/drivers/net/wireless/rndis_wlan.c
+++ b/drivers/net/wireless/rndis_wlan.c
@@ -201,7 +201,7 @@ struct ndis_80211_pmkid_candidate {
 struct ndis_80211_pmkid_cand_list {
 	__le32 version;
 	__le32 num_candidates;
-	struct ndis_80211_pmkid_candidate candidate_list[0];
+	struct ndis_80211_pmkid_candidate candidate_list[];
 } __packed;
 
 struct ndis_80211_status_indication {
@@ -246,12 +246,12 @@ struct ndis_80211_bssid_ex {
 	__le32 net_infra;
 	u8 rates[NDIS_802_11_LENGTH_RATES_EX];
 	__le32 ie_length;
-	u8 ies[0];
+	u8 ies[];
 } __packed;
 
 struct ndis_80211_bssid_list_ex {
 	__le32 num_items;
-	struct ndis_80211_bssid_ex bssid[0];
+	struct ndis_80211_bssid_ex bssid[];
 } __packed;
 
 struct ndis_80211_fixed_ies {
@@ -327,7 +327,7 @@ struct ndis_80211_bssid_info {
 struct ndis_80211_pmkid {
 	__le32 length;
 	__le32 bssid_info_count;
-	struct ndis_80211_bssid_info bssid_info[0];
+	struct ndis_80211_bssid_info bssid_info[];
 } __packed;
 
 /*
-- 
cgit v1.2.3-59-g8ed1b


From 5bb4e125815aa769a7d2ab7dc203593925bba0ba Mon Sep 17 00:00:00 2001
From: Pali Rohár <pali@kernel.org>
Date: Fri, 8 May 2020 21:51:39 +0200
Subject: ipw2x00: Fix comment for CLOCK_BOOTTIME constant
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Correct name of constant is CLOCK_BOOTTIME and not CLOCK_BOOTIME.

Signed-off-by: Pali Rohár <pali@kernel.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200508195139.20078-1-pali@kernel.org
---
 drivers/net/wireless/intel/ipw2x00/ipw2200.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/intel/ipw2x00/ipw2200.h b/drivers/net/wireless/intel/ipw2x00/ipw2200.h
index 09fa7f19050f..e1ec1c96dcd8 100644
--- a/drivers/net/wireless/intel/ipw2x00/ipw2200.h
+++ b/drivers/net/wireless/intel/ipw2x00/ipw2200.h
@@ -1329,7 +1329,7 @@ struct ipw_priv {
 
 	s8 tx_power;
 
-	/* Track time in suspend using CLOCK_BOOTIME */
+	/* Track time in suspend using CLOCK_BOOTTIME */
 	time64_t suspend_at;
 	time64_t suspend_time;
 
-- 
cgit v1.2.3-59-g8ed1b


From fd6c2dfa49b762ffe773a835ba62fa692df4c1b0 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Sun, 16 Feb 2020 16:08:58 +0100
Subject: mt76: mt76x02: fix handling MCU timeouts during hw restart

If a MCU timeout occurs before a hw restart completes, another hw restart
is scheduled, and the station state gets corrupted.
To speed up dealing with that, do not issue any MCU commands after the first
timeout, and defer handling timeouts until the reset has completed.
Also ignore errors in MCU commands during start/config to avoid making user
space fail on this condition. If it happens, another restart is scheduled
quickly, and that usually recovers the hardware properly.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt76.h            |  1 +
 drivers/net/wireless/mediatek/mt76/mt76x0/pci.c      |  2 ++
 drivers/net/wireless/mediatek/mt76/mt76x02.h         |  2 ++
 drivers/net/wireless/mediatek/mt76/mt76x02_mcu.c     |  3 +++
 drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c    | 16 ++++++++++++++++
 drivers/net/wireless/mediatek/mt76/mt76x2/pci_init.c |  1 +
 drivers/net/wireless/mediatek/mt76/mt76x2/pci_main.c | 19 ++++++-------------
 7 files changed, 31 insertions(+), 13 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index 8e4759bc8f59..6d60187e88ed 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -275,6 +275,7 @@ enum {
 	MT76_STATE_RUNNING,
 	MT76_STATE_MCU_RUNNING,
 	MT76_SCANNING,
+	MT76_RESTART,
 	MT76_RESET,
 	MT76_MCU_RESET,
 	MT76_REMOVED,
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c
index 0b520ae08d01..57091d41eb85 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c
@@ -29,6 +29,7 @@ static void mt76x0e_stop_hw(struct mt76x02_dev *dev)
 {
 	cancel_delayed_work_sync(&dev->cal_work);
 	cancel_delayed_work_sync(&dev->mt76.mac_work);
+	clear_bit(MT76_RESTART, &dev->mphy.state);
 
 	if (!mt76_poll(dev, MT_WPDMA_GLO_CFG, MT_WPDMA_GLO_CFG_TX_DMA_BUSY,
 		       0, 1000))
@@ -83,6 +84,7 @@ static const struct ieee80211_ops mt76x0e_ops = {
 	.set_coverage_class = mt76x02_set_coverage_class,
 	.set_rts_threshold = mt76x02_set_rts_threshold,
 	.get_antenna = mt76_get_antenna,
+	.reconfig_complete = mt76x02_reconfig_complete,
 };
 
 static int mt76x0e_register_device(struct mt76x02_dev *dev)
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02.h b/drivers/net/wireless/mediatek/mt76/mt76x02.h
index 23040c193ca5..94dd142cb3d7 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02.h
@@ -187,6 +187,8 @@ void mt76x02_sta_ps(struct mt76_dev *dev, struct ieee80211_sta *sta, bool ps);
 void mt76x02_bss_info_changed(struct ieee80211_hw *hw,
 			      struct ieee80211_vif *vif,
 			      struct ieee80211_bss_conf *info, u32 changed);
+void mt76x02_reconfig_complete(struct ieee80211_hw *hw,
+			       enum ieee80211_reconfig_type reconfig_type);
 
 struct beacon_bc_data {
 	struct mt76x02_dev *dev;
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mcu.c
index 5664749ad6c1..8247611d9b18 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mcu.c
@@ -20,6 +20,9 @@ int mt76x02_mcu_msg_send(struct mt76_dev *mdev, int cmd, const void *data,
 	int ret;
 	u8 seq;
 
+	if (mt76_is_mmio(&dev->mt76) && dev->mcu_timeout)
+		return -EIO;
+
 	skb = mt76x02_mcu_msg_alloc(data, len);
 	if (!skb)
 		return -ENOMEM;
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c
index 7dcc5d342e9f..7e389dbccfeb 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c
@@ -520,6 +520,7 @@ static void mt76x02_watchdog_reset(struct mt76x02_dev *dev)
 	}
 
 	if (restart) {
+		set_bit(MT76_RESTART, &dev->mphy.state);
 		mt76x02_mcu_function_select(dev, Q_SELECT, 1);
 		ieee80211_restart_hw(dev->mt76.hw);
 	} else {
@@ -528,8 +529,23 @@ static void mt76x02_watchdog_reset(struct mt76x02_dev *dev)
 	}
 }
 
+void mt76x02_reconfig_complete(struct ieee80211_hw *hw,
+			       enum ieee80211_reconfig_type reconfig_type)
+{
+	struct mt76x02_dev *dev = hw->priv;
+
+	if (reconfig_type != IEEE80211_RECONFIG_TYPE_RESTART)
+		return;
+
+	clear_bit(MT76_RESTART, &dev->mphy.state);
+}
+EXPORT_SYMBOL_GPL(mt76x02_reconfig_complete);
+
 static void mt76x02_check_tx_hang(struct mt76x02_dev *dev)
 {
+	if (test_bit(MT76_RESTART, &dev->mphy.state))
+		return;
+
 	if (mt76x02_tx_hang(dev)) {
 		if (++dev->tx_hang_check >= MT_TX_HANG_TH)
 			goto restart;
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci_init.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci_init.c
index c69579e5f647..f27774f57438 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci_init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci_init.c
@@ -256,6 +256,7 @@ void mt76x2_stop_hardware(struct mt76x02_dev *dev)
 	cancel_delayed_work_sync(&dev->cal_work);
 	cancel_delayed_work_sync(&dev->mt76.mac_work);
 	cancel_delayed_work_sync(&dev->wdt_work);
+	clear_bit(MT76_RESTART, &dev->mphy.state);
 	mt76x02_mcu_set_radio_state(dev, false);
 	mt76x2_mac_stop(dev, false);
 }
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci_main.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci_main.c
index 105e5b99b3f9..a74599f7f729 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci_main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci_main.c
@@ -10,12 +10,9 @@ static int
 mt76x2_start(struct ieee80211_hw *hw)
 {
 	struct mt76x02_dev *dev = hw->priv;
-	int ret;
 
 	mt76x02_mac_start(dev);
-	ret = mt76x2_phy_start(dev);
-	if (ret)
-		return ret;
+	mt76x2_phy_start(dev);
 
 	ieee80211_queue_delayed_work(mt76_hw(dev), &dev->mt76.mac_work,
 				     MT_MAC_WORK_INTERVAL);
@@ -35,11 +32,9 @@ mt76x2_stop(struct ieee80211_hw *hw)
 	mt76x2_stop_hardware(dev);
 }
 
-static int
+static void
 mt76x2_set_channel(struct mt76x02_dev *dev, struct cfg80211_chan_def *chandef)
 {
-	int ret;
-
 	cancel_delayed_work_sync(&dev->cal_work);
 	tasklet_disable(&dev->mt76.pre_tbtt_tasklet);
 	tasklet_disable(&dev->dfs_pd.dfs_tasklet);
@@ -50,7 +45,7 @@ mt76x2_set_channel(struct mt76x02_dev *dev, struct cfg80211_chan_def *chandef)
 	mt76_set_channel(&dev->mphy);
 
 	mt76x2_mac_stop(dev, true);
-	ret = mt76x2_phy_set_channel(dev, chandef);
+	mt76x2_phy_set_channel(dev, chandef);
 
 	mt76x02_mac_cc_reset(dev);
 	mt76x02_dfs_init_params(dev);
@@ -64,15 +59,12 @@ mt76x2_set_channel(struct mt76x02_dev *dev, struct cfg80211_chan_def *chandef)
 	tasklet_enable(&dev->mt76.pre_tbtt_tasklet);
 
 	mt76_txq_schedule_all(&dev->mphy);
-
-	return ret;
 }
 
 static int
 mt76x2_config(struct ieee80211_hw *hw, u32 changed)
 {
 	struct mt76x02_dev *dev = hw->priv;
-	int ret = 0;
 
 	mutex_lock(&dev->mt76.mutex);
 
@@ -101,11 +93,11 @@ mt76x2_config(struct ieee80211_hw *hw, u32 changed)
 
 	if (changed & IEEE80211_CONF_CHANGE_CHANNEL) {
 		ieee80211_stop_queues(hw);
-		ret = mt76x2_set_channel(dev, &hw->conf.chandef);
+		mt76x2_set_channel(dev, &hw->conf.chandef);
 		ieee80211_wake_queues(hw);
 	}
 
-	return ret;
+	return 0;
 }
 
 static void
@@ -162,5 +154,6 @@ const struct ieee80211_ops mt76x2_ops = {
 	.set_antenna = mt76x2_set_antenna,
 	.get_antenna = mt76_get_antenna,
 	.set_rts_threshold = mt76x02_set_rts_threshold,
+	.reconfig_complete = mt76x02_reconfig_complete,
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From b2934279c3e9719145ff4090d4ab951e340df17e Mon Sep 17 00:00:00 2001
From: Matthew Garrett <matthewgarrett@google.com>
Date: Wed, 18 Mar 2020 16:07:48 -0700
Subject: mt76: mt76x02u: Add support for newer versions of the XBox One wifi
 adapter

The current version has a new USB ID and reports as an 0x7632 device.
Adding the IDs results in it working out of the box.

Signed-off-by: Matthew Garrett <mjg59@google.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt76x02.h    | 1 +
 drivers/net/wireless/mediatek/mt76/mt76x2/usb.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02.h b/drivers/net/wireless/mediatek/mt76/mt76x02.h
index 94dd142cb3d7..6ea210bd3f07 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02.h
@@ -218,6 +218,7 @@ static inline bool is_mt76x0(struct mt76x02_dev *dev)
 static inline bool is_mt76x2(struct mt76x02_dev *dev)
 {
 	return mt76_chip(&dev->mt76) == 0x7612 ||
+	       mt76_chip(&dev->mt76) == 0x7632 ||
 	       mt76_chip(&dev->mt76) == 0x7662 ||
 	       mt76_chip(&dev->mt76) == 0x7602;
 }
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c b/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c
index eafa283ca699..6376734282b7 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c
@@ -18,6 +18,7 @@ static const struct usb_device_id mt76x2u_device_table[] = {
 	{ USB_DEVICE(0x7392, 0xb711) },	/* Edimax EW 7722 UAC */
 	{ USB_DEVICE(0x0846, 0x9053) },	/* Netgear A6210 */
 	{ USB_DEVICE(0x045e, 0x02e6) },	/* XBox One Wireless Adapter */
+	{ USB_DEVICE(0x045e, 0x02fe) },	/* XBox One Wireless Adapter */
 	{ },
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From 3efdfbe0b5b5e08d7b699f5d3138c1801ffb3714 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sat, 21 Mar 2020 16:14:42 +0100
Subject: mt76: mt76x2u: introduce Mercury UD13 support

Introduce Mercury UD13 dual-band dongle support to mt76x2u driver

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt76x2/usb.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c b/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c
index 6376734282b7..3a4e41724af1 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2/usb.c
@@ -16,6 +16,7 @@ static const struct usb_device_id mt76x2u_device_table[] = {
 	{ USB_DEVICE(0x0e8d, 0x7612) },	/* Aukey USBAC1200 - Alfa AWUS036ACM */
 	{ USB_DEVICE(0x057c, 0x8503) },	/* Avm FRITZ!WLAN AC860 */
 	{ USB_DEVICE(0x7392, 0xb711) },	/* Edimax EW 7722 UAC */
+	{ USB_DEVICE(0x2c4e, 0x0103) },	/* Mercury UD13 */
 	{ USB_DEVICE(0x0846, 0x9053) },	/* Netgear A6210 */
 	{ USB_DEVICE(0x045e, 0x02e6) },	/* XBox One Wireless Adapter */
 	{ USB_DEVICE(0x045e, 0x02fe) },	/* XBox One Wireless Adapter */
-- 
cgit v1.2.3-59-g8ed1b


From 663e69b141cd1e31039a9bebdaeb3aab0fe9c661 Mon Sep 17 00:00:00 2001
From: Pawel Dembicki <paweldembicki@gmail.com>
Date: Wed, 25 Mar 2020 06:55:23 +0100
Subject: mt76: mt76x0: pci: add mt7610 PCI ID

Add mt7610 PCI id found on D-Link DWR-960 to pci_device_id table.

Run-tested on D-Link DWR-960 with no-name half-size mPCIE card
with mt7610e.

Signed-off-by: Pawel Dembicki <paweldembicki@gmail.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt76x0/pci.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c b/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c
index 57091d41eb85..f7ec3400e368 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x0/pci.c
@@ -218,6 +218,7 @@ mt76x0e_remove(struct pci_dev *pdev)
 }
 
 static const struct pci_device_id mt76x0e_device_table[] = {
+	{ PCI_DEVICE(0x14c3, 0x7610) },
 	{ PCI_DEVICE(0x14c3, 0x7630) },
 	{ PCI_DEVICE(0x14c3, 0x7650) },
 	{ },
-- 
cgit v1.2.3-59-g8ed1b


From f8de2bf26ce9e6611cbded70e6e9c78a5b4fd107 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Tue, 31 Mar 2020 14:51:35 +0800
Subject: mt76: mt7615: modify mt7615_ampdu_stat_read for each phy

This is a preliminary patch to add more Tx counters.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c | 20 ++++++++++++++++++--
 1 file changed, 18 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c
index b4d0795154e3..9fd40d723201 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c
@@ -120,12 +120,17 @@ mt7615_reset_test_set(void *data, u64 val)
 DEFINE_DEBUGFS_ATTRIBUTE(fops_reset_test, NULL,
 			 mt7615_reset_test_set, "%lld\n");
 
-static int
-mt7615_ampdu_stat_read(struct seq_file *file, void *data)
+static void
+mt7615_ampdu_stat_read_phy(struct mt7615_phy *phy,
+			   struct seq_file *file)
 {
 	struct mt7615_dev *dev = file->private;
+	bool ext_phy = phy != &dev->phy;
 	int bound[7], i, range;
 
+	if (!phy)
+		return;
+
 	range = mt76_rr(dev, MT_AGG_ASRCR0);
 	for (i = 0; i < 4; i++)
 		bound[i] = MT_AGG_ASRCR_RANGE(range, i) + 1;
@@ -133,6 +138,8 @@ mt7615_ampdu_stat_read(struct seq_file *file, void *data)
 	for (i = 0; i < 3; i++)
 		bound[i + 4] = MT_AGG_ASRCR_RANGE(range, i) + 1;
 
+	seq_printf(file, "\nPhy %d\n", ext_phy);
+
 	seq_printf(file, "Length: %8d | ", bound[0]);
 	for (i = 0; i < ARRAY_SIZE(bound) - 1; i++)
 		seq_printf(file, "%3d -%3d | ",
@@ -141,6 +148,15 @@ mt7615_ampdu_stat_read(struct seq_file *file, void *data)
 	for (i = 0; i < ARRAY_SIZE(bound); i++)
 		seq_printf(file, "%8d | ", dev->mt76.aggr_stats[i]);
 	seq_puts(file, "\n");
+}
+
+static int
+mt7615_ampdu_stat_read(struct seq_file *file, void *data)
+{
+	struct mt7615_dev *dev = file->private;
+
+	mt7615_ampdu_stat_read_phy(&dev->phy, file);
+	mt7615_ampdu_stat_read_phy(mt7615_ext_phy(dev), file);
 
 	return 0;
 }
-- 
cgit v1.2.3-59-g8ed1b


From b473fdbb745612e6ed50a176825ded1c5ba42c3f Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Tue, 31 Mar 2020 14:51:36 +0800
Subject: mt76: mt7615: enable aggr_stats for both phy

Use bottom half of aggr_stats for second phy.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c |  4 +++-
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c     | 21 +++++++++++----------
 drivers/net/wireless/mediatek/mt76/mt7615/regs.h    |  3 ++-
 3 files changed, 16 insertions(+), 12 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c
index 9fd40d723201..980e7e3cf37e 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c
@@ -145,8 +145,10 @@ mt7615_ampdu_stat_read_phy(struct mt7615_phy *phy,
 		seq_printf(file, "%3d -%3d | ",
 			   bound[i], bound[i + 1]);
 	seq_puts(file, "\nCount:  ");
+
+	range = ext_phy ? ARRAY_SIZE(dev->mt76.aggr_stats) / 2 : 0;
 	for (i = 0; i < ARRAY_SIZE(bound); i++)
-		seq_printf(file, "%8d | ", dev->mt76.aggr_stats[i]);
+		seq_printf(file, "%8d | ", dev->mt76.aggr_stats[i + range]);
 	seq_puts(file, "\n");
 }
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index a27a6d164009..8572973cc4c8 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -82,8 +82,10 @@ void mt7615_mac_reset_counters(struct mt7615_dev *dev)
 {
 	int i;
 
-	for (i = 0; i < 4; i++)
-		mt76_rr(dev, MT_TX_AGG_CNT(i));
+	for (i = 0; i < 4; i++) {
+		mt76_rr(dev, MT_TX_AGG_CNT(0, i));
+		mt76_rr(dev, MT_TX_AGG_CNT(1, i));
+	}
 
 	memset(dev->mt76.aggr_stats, 0, sizeof(dev->mt76.aggr_stats));
 	dev->mt76.phy.survey_time = ktime_get_boottime();
@@ -1751,13 +1753,14 @@ mt7615_mac_update_mib_stats(struct mt7615_phy *phy)
 	struct mt7615_dev *dev = phy->dev;
 	struct mib_stats *mib = &phy->mib;
 	bool ext_phy = phy != &dev->phy;
-	int i;
+	int i, aggr;
 
 	memset(mib, 0, sizeof(*mib));
 
 	mib->fcs_err_cnt = mt76_get_field(dev, MT_MIB_SDR3(ext_phy),
 					  MT_MIB_SDR3_FCS_ERR_MASK);
 
+	aggr = ext_phy ? ARRAY_SIZE(dev->mt76.aggr_stats) / 2 : 0;
 	for (i = 0; i < 4; i++) {
 		u32 data, val, val2;
 
@@ -1772,6 +1775,11 @@ mt7615_mac_update_mib_stats(struct mt7615_phy *phy)
 			mib->rts_cnt = FIELD_GET(MT_MIB_RTS_COUNT_MASK, val2);
 			mib->rts_retries_cnt = data;
 		}
+
+		val = mt76_rr(dev, MT_TX_AGG_CNT(ext_phy, i));
+
+		dev->mt76.aggr_stats[aggr++] += val & 0xffff;
+		dev->mt76.aggr_stats[aggr++] += val >> 16;
 	}
 }
 
@@ -1779,7 +1787,6 @@ void mt7615_mac_work(struct work_struct *work)
 {
 	struct mt7615_dev *dev;
 	struct mt7615_phy *ext_phy;
-	int i, idx;
 
 	dev = (struct mt7615_dev *)container_of(work, struct mt76_dev,
 						mac_work.work);
@@ -1799,12 +1806,6 @@ void mt7615_mac_work(struct work_struct *work)
 		dev->mac_work_count = 0;
 	}
 
-	for (i = 0, idx = 0; i < 4; i++) {
-		u32 val = mt76_rr(dev, MT_TX_AGG_CNT(i));
-
-		dev->mt76.aggr_stats[idx++] += val & 0xffff;
-		dev->mt76.aggr_stats[idx++] += val >> 16;
-	}
 	mutex_unlock(&dev->mt76.mutex);
 
 	mt76_tx_status_check(&dev->mt76, NULL, false);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/regs.h b/drivers/net/wireless/mediatek/mt76/mt7615/regs.h
index 1e0d95b917e1..d91041613df8 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/regs.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/regs.h
@@ -406,7 +406,8 @@ enum mt7615_reg_base {
 						  ((n) << 4))
 #define MT_MIB_ACK_FAIL_COUNT_MASK	GENMASK(31, 16)
 
-#define MT_TX_AGG_CNT(n)		MT_WF_MIB(0xa8 + ((n) << 2))
+#define MT_TX_AGG_CNT(_band, n)		MT_WF_MIB(0xa8 + ((_band) << 9) + \
+						  ((n) << 2))
 
 #define MT_DMA_SHDL(ofs)		(dev->reg_map[MT_DMA_SHDL_BASE] + (ofs))
 
-- 
cgit v1.2.3-59-g8ed1b


From b7825ca0a29e1e679af1b0c6187c0d09a76c8554 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Tue, 31 Mar 2020 14:51:37 +0800
Subject: mt76: mt7615: cleanup mib related defines and structs

Simplify mib macros and use proper type for related counters.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c    | 16 +++++++--------
 drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h |  8 ++++----
 drivers/net/wireless/mediatek/mt76/mt7615/regs.h   | 23 ++++++++++------------
 3 files changed, 22 insertions(+), 25 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index 8572973cc4c8..f30dc015e88e 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -1535,8 +1535,8 @@ void mt7615_mac_set_scs(struct mt7615_dev *dev, bool enable)
 		mt76_set(dev, MT_WF_PHY_MIN_PRI_PWR(1),
 			 MT_WF_PHY_PD_BLK(1));
 		if (is_mt7622(&dev->mt76)) {
-			mt76_set(dev, MT_MIB_M0_MISC_CR, 0x7 << 8);
-			mt76_set(dev, MT_MIB_M0_MISC_CR, 0x7);
+			mt76_set(dev, MT_MIB_M0_MISC_CR(0), 0x7 << 8);
+			mt76_set(dev, MT_MIB_M0_MISC_CR(0), 0x7);
 		}
 	} else {
 		mt76_clear(dev, MT_WF_PHY_MIN_PRI_PWR(0),
@@ -1762,18 +1762,18 @@ mt7615_mac_update_mib_stats(struct mt7615_phy *phy)
 
 	aggr = ext_phy ? ARRAY_SIZE(dev->mt76.aggr_stats) / 2 : 0;
 	for (i = 0; i < 4; i++) {
-		u32 data, val, val2;
+		u32 val, val2;
 
 		val = mt76_get_field(dev, MT_MIB_MB_SDR1(ext_phy, i),
 				     MT_MIB_ACK_FAIL_COUNT_MASK);
 		if (val > mib->ack_fail_cnt)
 			mib->ack_fail_cnt = val;
 
-		val2 = mt76_rr(dev, MT_MIB_MB_SDR0(ext_phy, i));
-		data = FIELD_GET(MT_MIB_RTS_RETRIES_COUNT_MASK, val2);
-		if (data > mib->rts_retries_cnt) {
-			mib->rts_cnt = FIELD_GET(MT_MIB_RTS_COUNT_MASK, val2);
-			mib->rts_retries_cnt = data;
+		val = mt76_rr(dev, MT_MIB_MB_SDR0(ext_phy, i));
+		val2 = FIELD_GET(MT_MIB_RTS_RETRIES_COUNT_MASK, val);
+		if (val2 > mib->rts_retries_cnt) {
+			mib->rts_cnt = FIELD_GET(MT_MIB_RTS_COUNT_MASK, val);
+			mib->rts_retries_cnt = val2;
 		}
 
 		val = mt76_rr(dev, MT_TX_AGG_CNT(ext_phy, i));
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index 676ca622c35a..000070769217 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -113,10 +113,10 @@ struct mt7615_vif {
 };
 
 struct mib_stats {
-	u32 ack_fail_cnt;
-	u32 fcs_err_cnt;
-	u32 rts_cnt;
-	u32 rts_retries_cnt;
+	u16 ack_fail_cnt;
+	u16 fcs_err_cnt;
+	u16 rts_cnt;
+	u16 rts_retries_cnt;
 };
 
 struct mt7615_phy {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/regs.h b/drivers/net/wireless/mediatek/mt76/mt7615/regs.h
index d91041613df8..58aaa57fb451 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/regs.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/regs.h
@@ -379,35 +379,32 @@ enum mt7615_reg_base {
 #define MT_LPON_UTTR1			MT_LPON(0x01c)
 
 #define MT_WF_MIB_BASE			(dev->reg_map[MT_MIB_BASE])
-#define MT_WF_MIB(ofs)			(MT_WF_MIB_BASE + (ofs))
+#define MT_WF_MIB(_band, ofs)		(MT_WF_MIB_BASE + (ofs) + (_band) * 0x200)
 
-#define MT_MIB_M0_MISC_CR		MT_WF_MIB(0x00c)
+#define MT_MIB_M0_MISC_CR(_band)	MT_WF_MIB(_band, 0x00c)
 
-#define MT_MIB_SDR3(n)			MT_WF_MIB(0x014 + ((n) << 9))
+#define MT_MIB_SDR3(_band)		MT_WF_MIB(_band, 0x014)
 #define MT_MIB_SDR3_FCS_ERR_MASK	GENMASK(15, 0)
 
-#define MT_MIB_SDR9(n)			MT_WF_MIB(0x02c + ((n) << 9))
+#define MT_MIB_SDR9(_band)		MT_WF_MIB(_band, 0x02c)
 #define MT_MIB_SDR9_BUSY_MASK		GENMASK(23, 0)
 
-#define MT_MIB_SDR16(n)			MT_WF_MIB(0x048 + ((n) << 9))
+#define MT_MIB_SDR16(_band)		MT_WF_MIB(_band, 0x048)
 #define MT_MIB_SDR16_BUSY_MASK		GENMASK(23, 0)
 
-#define MT_MIB_SDR36(n)			MT_WF_MIB(0x098 + ((n) << 9))
+#define MT_MIB_SDR36(_band)		MT_WF_MIB(_band, 0x098)
 #define MT_MIB_SDR36_TXTIME_MASK	GENMASK(23, 0)
-#define MT_MIB_SDR37(n)			MT_WF_MIB(0x09c + ((n) << 9))
+#define MT_MIB_SDR37(_band)		MT_WF_MIB(_band, 0x09c)
 #define MT_MIB_SDR37_RXTIME_MASK	GENMASK(23, 0)
 
-#define MT_MIB_MB_SDR0(_band, n)	MT_WF_MIB(0x100 + ((_band) << 9) + \
-						  ((n) << 4))
+#define MT_MIB_MB_SDR0(_band, n)	MT_WF_MIB(_band, 0x100 + ((n) << 4))
 #define MT_MIB_RTS_RETRIES_COUNT_MASK	GENMASK(31, 16)
 #define MT_MIB_RTS_COUNT_MASK		GENMASK(15, 0)
 
-#define MT_MIB_MB_SDR1(_band, n)	MT_WF_MIB(0x104 + ((_band) << 9) + \
-						  ((n) << 4))
+#define MT_MIB_MB_SDR1(_band, n)	MT_WF_MIB(_band, 0x104 + ((n) << 4))
 #define MT_MIB_ACK_FAIL_COUNT_MASK	GENMASK(31, 16)
 
-#define MT_TX_AGG_CNT(_band, n)		MT_WF_MIB(0xa8 + ((_band) << 9) + \
-						  ((n) << 2))
+#define MT_TX_AGG_CNT(_band, n)		MT_WF_MIB(_band, 0xa8 + ((n) << 2))
 
 #define MT_DMA_SHDL(ofs)		(dev->reg_map[MT_DMA_SHDL_BASE] + (ofs))
 
-- 
cgit v1.2.3-59-g8ed1b


From aef16345ba8db6455c816fdbe584e140dde84d32 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Tue, 31 Mar 2020 14:51:38 +0800
Subject: mt76: mt7615: add more useful Tx mib counters

Add ba_miss_cnt and ampdu_per in mib_stats.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c |  4 ++++
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c     | 20 +++++++++++++++-----
 drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h  |  2 ++
 drivers/net/wireless/mediatek/mt76/mt7615/regs.h    |  7 +++++++
 4 files changed, 28 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c
index 980e7e3cf37e..641bfada5756 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c
@@ -150,6 +150,10 @@ mt7615_ampdu_stat_read_phy(struct mt7615_phy *phy,
 	for (i = 0; i < ARRAY_SIZE(bound); i++)
 		seq_printf(file, "%8d | ", dev->mt76.aggr_stats[i + range]);
 	seq_puts(file, "\n");
+
+	seq_printf(file, "BA miss count: %d\n", phy->mib.ba_miss_cnt);
+	seq_printf(file, "PER: %ld.%1ld%%\n",
+		   phy->mib.aggr_per / 10, phy->mib.aggr_per % 10);
 }
 
 static int
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index f30dc015e88e..5451c0b8c9f2 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -1754,20 +1754,30 @@ mt7615_mac_update_mib_stats(struct mt7615_phy *phy)
 	struct mib_stats *mib = &phy->mib;
 	bool ext_phy = phy != &dev->phy;
 	int i, aggr;
+	u32 val, val2;
 
 	memset(mib, 0, sizeof(*mib));
 
 	mib->fcs_err_cnt = mt76_get_field(dev, MT_MIB_SDR3(ext_phy),
 					  MT_MIB_SDR3_FCS_ERR_MASK);
 
+	val = mt76_get_field(dev, MT_MIB_SDR14(ext_phy),
+			     MT_MIB_AMPDU_MPDU_COUNT);
+	val2 = mt76_get_field(dev, MT_MIB_SDR15(ext_phy),
+			      MT_MIB_AMPDU_ACK_COUNT);
+	mib->aggr_per = 1000 * (val - val2) / val;
+
 	aggr = ext_phy ? ARRAY_SIZE(dev->mt76.aggr_stats) / 2 : 0;
 	for (i = 0; i < 4; i++) {
-		u32 val, val2;
+		val = mt76_rr(dev, MT_MIB_MB_SDR1(ext_phy, i));
+
+		val2 = FIELD_GET(MT_MIB_ACK_FAIL_COUNT_MASK, val);
+		if (val2 > mib->ack_fail_cnt)
+			mib->ack_fail_cnt = val2;
 
-		val = mt76_get_field(dev, MT_MIB_MB_SDR1(ext_phy, i),
-				     MT_MIB_ACK_FAIL_COUNT_MASK);
-		if (val > mib->ack_fail_cnt)
-			mib->ack_fail_cnt = val;
+		val2 = FIELD_GET(MT_MIB_BA_MISS_COUNT_MASK, val);
+		if (val2 > mib->ba_miss_cnt)
+			mib->ba_miss_cnt = val2;
 
 		val = mt76_rr(dev, MT_MIB_MB_SDR0(ext_phy, i));
 		val2 = FIELD_GET(MT_MIB_RTS_RETRIES_COUNT_MASK, val);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index 000070769217..052031d25f38 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -117,6 +117,8 @@ struct mib_stats {
 	u16 fcs_err_cnt;
 	u16 rts_cnt;
 	u16 rts_retries_cnt;
+	u16 ba_miss_cnt;
+	unsigned long aggr_per;
 };
 
 struct mt7615_phy {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/regs.h b/drivers/net/wireless/mediatek/mt76/mt7615/regs.h
index 58aaa57fb451..29c56b0723cd 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/regs.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/regs.h
@@ -389,6 +389,12 @@ enum mt7615_reg_base {
 #define MT_MIB_SDR9(_band)		MT_WF_MIB(_band, 0x02c)
 #define MT_MIB_SDR9_BUSY_MASK		GENMASK(23, 0)
 
+#define MT_MIB_SDR14(_band)		MT_WF_MIB(_band, 0x040)
+#define MT_MIB_AMPDU_MPDU_COUNT		GENMASK(23, 0)
+
+#define MT_MIB_SDR15(_band)		MT_WF_MIB(_band, 0x044)
+#define MT_MIB_AMPDU_ACK_COUNT		GENMASK(23, 0)
+
 #define MT_MIB_SDR16(_band)		MT_WF_MIB(_band, 0x048)
 #define MT_MIB_SDR16_BUSY_MASK		GENMASK(23, 0)
 
@@ -402,6 +408,7 @@ enum mt7615_reg_base {
 #define MT_MIB_RTS_COUNT_MASK		GENMASK(15, 0)
 
 #define MT_MIB_MB_SDR1(_band, n)	MT_WF_MIB(_band, 0x104 + ((n) << 4))
+#define MT_MIB_BA_MISS_COUNT_MASK	GENMASK(15, 0)
 #define MT_MIB_ACK_FAIL_COUNT_MASK	GENMASK(31, 16)
 
 #define MT_TX_AGG_CNT(_band, n)		MT_WF_MIB(_band, 0xa8 + ((n) << 2))
-- 
cgit v1.2.3-59-g8ed1b


From 886a862d3677ac0d3b57d19ffcf5b2d48b9c5267 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 2 Apr 2020 15:06:31 +0200
Subject: mt76: mt7663: fix mt7615_mac_cca_stats_reset routine

Fix PHYMUX_5 register definition for mt7663 in
mt7615_mac_cca_stats_reset routine

Fixes: f40ac0f3d3c0 ("mt76: mt7615: introduce mt7663e support")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c  | 8 +++++++-
 drivers/net/wireless/mediatek/mt76/mt7615/regs.h | 1 +
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index 5451c0b8c9f2..778b47912475 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -1576,8 +1576,14 @@ void mt7615_mac_cca_stats_reset(struct mt7615_phy *phy)
 {
 	struct mt7615_dev *dev = phy->dev;
 	bool ext_phy = phy != &dev->phy;
-	u32 reg = MT_WF_PHY_R0_PHYMUX_5(ext_phy);
+	u32 reg;
 
+	if (is_mt7663(&dev->mt76))
+		reg = MT7663_WF_PHY_R0_PHYMUX_5;
+	else
+		reg = MT_WF_PHY_R0_PHYMUX_5(ext_phy);
+
+	/* reset PD and MDRDY counters */
 	mt76_clear(dev, reg, GENMASK(22, 20));
 	mt76_set(dev, reg, BIT(22) | BIT(20));
 }
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/regs.h b/drivers/net/wireless/mediatek/mt76/mt7615/regs.h
index 29c56b0723cd..a255a6199680 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/regs.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/regs.h
@@ -151,6 +151,7 @@ enum mt7615_reg_base {
 #define MT_WF_PHY_WF2_RFCTRL0_LPBCN_EN	BIT(9)
 
 #define MT_WF_PHY_R0_PHYMUX_5(_phy)	MT_WF_PHY(0x0614 + ((_phy) << 9))
+#define MT7663_WF_PHY_R0_PHYMUX_5	MT_WF_PHY(0x0414)
 
 #define MT_WF_PHY_R0_PHYCTRL_STS0(_phy)	MT_WF_PHY(0x020c + ((_phy) << 9))
 #define MT_WF_PHYCTRL_STAT_PD_OFDM	GENMASK(31, 16)
-- 
cgit v1.2.3-59-g8ed1b


From b61e45eb891fd8fb0704fc05aaae3be53e7687ae Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 2 Apr 2020 15:06:32 +0200
Subject: mt76: mt7663: enable nf estimation

Enable Noise floor estimation for mt7663 driver

Co-developed-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c  | 16 +++++++++++-----
 drivers/net/wireless/mediatek/mt76/mt7615/regs.h |  2 ++
 2 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index 778b47912475..e6d312dd81c2 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -1558,10 +1558,12 @@ out:
 
 void mt7615_mac_enable_nf(struct mt7615_dev *dev, bool ext_phy)
 {
-	u32 rxtd;
+	u32 rxtd, reg;
 
 	if (is_mt7663(&dev->mt76))
-		return;
+		reg = MT7663_WF_PHY_R0_PHYMUX_5;
+	else
+		reg = MT_WF_PHY_R0_PHYMUX_5(ext_phy);
 
 	if (ext_phy)
 		rxtd = MT_WF_PHY_RXTD2(10);
@@ -1569,7 +1571,7 @@ void mt7615_mac_enable_nf(struct mt7615_dev *dev, bool ext_phy)
 		rxtd = MT_WF_PHY_RXTD(12);
 
 	mt76_set(dev, rxtd, BIT(18) | BIT(29));
-	mt76_set(dev, MT_WF_PHY_R0_PHYMUX_5(ext_phy), 0x5 << 12);
+	mt76_set(dev, reg, 0x5 << 12);
 }
 
 void mt7615_mac_cca_stats_reset(struct mt7615_phy *phy)
@@ -1693,10 +1695,14 @@ static u8
 mt7615_phy_get_nf(struct mt7615_dev *dev, int idx)
 {
 	static const u8 nf_power[] = { 92, 89, 86, 83, 80, 75, 70, 65, 60, 55, 52 };
-	u32 reg = idx ? MT_WF_PHY_RXTD2(17) : MT_WF_PHY_RXTD(20);
-	u32 val, sum = 0, n = 0;
+	u32 reg, val, sum = 0, n = 0;
 	int i;
 
+	if (is_mt7663(&dev->mt76))
+		reg = MT7663_WF_PHY_RXTD(20);
+	else
+		reg = idx ? MT_WF_PHY_RXTD2(17) : MT_WF_PHY_RXTD(20);
+
 	for (i = 0; i < ARRAY_SIZE(nf_power); i++, reg += 4) {
 		val = mt76_rr(dev, reg);
 		sum += val * nf_power[i];
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/regs.h b/drivers/net/wireless/mediatek/mt76/mt7615/regs.h
index a255a6199680..481e4d941e72 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/regs.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/regs.h
@@ -170,6 +170,8 @@ enum mt7615_reg_base {
 #define MT_WF_PHY_RXTD_BASE		MT_WF_PHY(0x2200)
 #define MT_WF_PHY_RXTD(_n)		(MT_WF_PHY_RXTD_BASE + ((_n) << 2))
 
+#define MT7663_WF_PHY_RXTD(_n)		(MT_WF_PHY(0x25b0) + ((_n) << 2))
+
 #define MT_WF_PHY_RXTD_CCK_PD(_phy)	MT_WF_PHY((_phy) ? 0x2314 : 0x2310)
 #define MT_WF_PHY_PD_CCK_MASK(_phy)	(_phy) ? GENMASK(31, 24) : \
 					 GENMASK(8, 1)
-- 
cgit v1.2.3-59-g8ed1b


From 594034b788673a42a0fad785b8a72c523568721f Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 2 Apr 2020 15:06:33 +0200
Subject: mt76: mt7615: make scs configurable per phy

Make scs configurable per phy since most of the chipsets do not
support dbdc

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../net/wireless/mediatek/mt76/mt7615/debugfs.c    |  8 ++++--
 drivers/net/wireless/mediatek/mt76/mt7615/init.c   |  4 ++-
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c    | 29 ++++++++--------------
 drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h |  5 ++--
 4 files changed, 23 insertions(+), 23 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c
index 641bfada5756..777c7f9bd760 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c
@@ -20,11 +20,15 @@ static int
 mt7615_scs_set(void *data, u64 val)
 {
 	struct mt7615_dev *dev = data;
+	struct mt7615_phy *ext_phy;
 
 	if (!mt7615_wait_for_mcu_init(dev))
 		return 0;
 
-	mt7615_mac_set_scs(dev, val);
+	mt7615_mac_set_scs(&dev->phy, val);
+	ext_phy = mt7615_ext_phy(dev);
+	if (ext_phy)
+		mt7615_mac_set_scs(ext_phy, val);
 
 	return 0;
 }
@@ -34,7 +38,7 @@ mt7615_scs_get(void *data, u64 *val)
 {
 	struct mt7615_dev *dev = data;
 
-	*val = dev->scs_en;
+	*val = dev->phy.scs_en;
 
 	return 0;
 }
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
index 03b1e56534d6..8b7d8118d9ab 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
@@ -90,7 +90,7 @@ static void mt7615_mac_init(struct mt7615_dev *dev)
 		 MT_TMAC_CTCR0_INS_DDLMT_EN);
 
 	mt7615_mcu_set_rts_thresh(&dev->phy, 0x92b);
-	mt7615_mac_set_scs(dev, true);
+	mt7615_mac_set_scs(&dev->phy, true);
 
 	mt76_rmw(dev, MT_AGG_SCR, MT_AGG_SCR_NLNAV_MID_PTEC_DIS,
 		 MT_AGG_SCR_NLNAV_MID_PTEC_DIS);
@@ -411,6 +411,8 @@ int mt7615_register_ext_phy(struct mt7615_dev *dev)
 	mphy->antenna_mask = BIT(hweight8(phy->chainmask)) - 1;
 	mt7615_init_wiphy(mphy->hw);
 
+	mt7615_mac_set_scs(phy, true);
+
 	/*
 	 * Make the secondary PHY MAC address local without overlapping with
 	 * the usual MAC address allocation scheme on multiple virtual interfaces
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index e6d312dd81c2..6b013e8dadd7 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -1517,40 +1517,33 @@ mt7615_mac_set_default_sensitivity(struct mt7615_phy *phy)
 	phy->last_cca_adj = jiffies;
 }
 
-void mt7615_mac_set_scs(struct mt7615_dev *dev, bool enable)
+void mt7615_mac_set_scs(struct mt7615_phy *phy, bool enable)
 {
-	struct mt7615_phy *ext_phy;
+	struct mt7615_dev *dev = phy->dev;
+	bool ext_phy = phy != &dev->phy;
 
 	mutex_lock(&dev->mt76.mutex);
 
-	if (dev->scs_en == enable)
+	if (phy->scs_en == enable)
 		goto out;
 
 	if (is_mt7663(&dev->mt76))
 		goto out;
 
 	if (enable) {
-		mt76_set(dev, MT_WF_PHY_MIN_PRI_PWR(0),
-			 MT_WF_PHY_PD_BLK(0));
-		mt76_set(dev, MT_WF_PHY_MIN_PRI_PWR(1),
-			 MT_WF_PHY_PD_BLK(1));
+		mt76_set(dev, MT_WF_PHY_MIN_PRI_PWR(ext_phy),
+			 MT_WF_PHY_PD_BLK(ext_phy));
 		if (is_mt7622(&dev->mt76)) {
 			mt76_set(dev, MT_MIB_M0_MISC_CR(0), 0x7 << 8);
 			mt76_set(dev, MT_MIB_M0_MISC_CR(0), 0x7);
 		}
 	} else {
-		mt76_clear(dev, MT_WF_PHY_MIN_PRI_PWR(0),
-			   MT_WF_PHY_PD_BLK(0));
-		mt76_clear(dev, MT_WF_PHY_MIN_PRI_PWR(1),
-			   MT_WF_PHY_PD_BLK(1));
+		mt76_clear(dev, MT_WF_PHY_MIN_PRI_PWR(ext_phy),
+			   MT_WF_PHY_PD_BLK(ext_phy));
 	}
 
-	mt7615_mac_set_default_sensitivity(&dev->phy);
-	ext_phy = mt7615_ext_phy(dev);
-	if (ext_phy)
-		mt7615_mac_set_default_sensitivity(ext_phy);
-
-	dev->scs_en = enable;
+	mt7615_mac_set_default_sensitivity(phy);
+	phy->scs_en = enable;
 
 out:
 	mutex_unlock(&dev->mt76.mutex);
@@ -1663,7 +1656,7 @@ mt7615_mac_scs_check(struct mt7615_phy *phy)
 	u32 mdrdy_cck, mdrdy_ofdm, pd_cck, pd_ofdm;
 	bool ext_phy = phy != &dev->phy;
 
-	if (!dev->scs_en)
+	if (!phy->scs_en)
 		return;
 
 	val = mt76_rr(dev, MT_WF_PHY_R0_PHYCTRL_STS0(ext_phy));
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index 052031d25f38..e531c416f85d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -130,6 +130,8 @@ struct mt7615_phy {
 
 	u16 noise;
 
+	bool scs_en;
+
 	unsigned long last_cca_adj;
 	int false_cca_ofdm, false_cca_cck;
 	s8 ofdm_sensitivity;
@@ -211,7 +213,6 @@ struct mt7615_dev {
 	u32 hw_pattern;
 
 	u8 mac_work_count;
-	bool scs_en;
 	bool fw_debug;
 
 	spinlock_t token_lock;
@@ -369,7 +370,7 @@ void mt7615_update_channel(struct mt76_dev *mdev);
 bool mt7615_mac_wtbl_update(struct mt7615_dev *dev, int idx, u32 mask);
 void mt7615_mac_reset_counters(struct mt7615_dev *dev);
 void mt7615_mac_cca_stats_reset(struct mt7615_phy *phy);
-void mt7615_mac_set_scs(struct mt7615_dev *dev, bool enable);
+void mt7615_mac_set_scs(struct mt7615_phy *phy, bool enable);
 void mt7615_mac_enable_nf(struct mt7615_dev *dev, bool ext_phy);
 void mt7615_mac_sta_poll(struct mt7615_dev *dev);
 int mt7615_mac_write_txwi(struct mt7615_dev *dev, __le32 *txwi,
-- 
cgit v1.2.3-59-g8ed1b


From 02c655919c431fc72171b3016dead174a4bf4f8e Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 2 Apr 2020 15:06:34 +0200
Subject: mt76: mt7663: disable RDD commands

Disable dfs RDD mcu commands for mt7663 driver since they are not
currently supported by the 7663 firmware

Co-developed-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index 6b013e8dadd7..e88914685807 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -2047,6 +2047,9 @@ int mt7615_dfs_init_radar_detector(struct mt7615_phy *phy)
 	bool ext_phy = phy != &dev->phy;
 	int err;
 
+	if (is_mt7663(&dev->mt76))
+		return 0;
+
 	if (dev->mt76.region == NL80211_DFS_UNSET) {
 		phy->dfs_state = -1;
 		if (phy->rdd_state)
-- 
cgit v1.2.3-59-g8ed1b


From 938d76bb3a29591e18da9aa8d8c8fc6a4454e2b7 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 2 Apr 2020 15:06:35 +0200
Subject: mt76: mt7615: add ethool support to mt7663 driver

Report n9 firmware version using ethtool

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 610cfa918c7b..32e3e4219076 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -1914,8 +1914,14 @@ static int mt7663_load_n9(struct mt7615_dev *dev, const char *name)
 	}
 
 	ret = mt7615_mcu_start_firmware(dev, override_addr, flag);
-	if (ret)
+	if (ret) {
 		dev_err(dev->mt76.dev, "Failed to start N9 firmware\n");
+		goto out;
+	}
+
+	snprintf(dev->mt76.hw->wiphy->fw_version,
+		 sizeof(dev->mt76.hw->wiphy->fw_version),
+		 "%.10s-%.15s", hdr->fw_ver, hdr->build_date);
 
 out:
 	release_firmware(fw);
-- 
cgit v1.2.3-59-g8ed1b


From bf18fcdc9a4ba0861743cb7c8c8a6349ad0114f5 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 2 Apr 2020 15:06:36 +0200
Subject: mt76: mt7615: introduce mt7615_mcu_set_channel_domain mcu command

Introduce mt7615_mcu_set_channel_domain routines in order to instruct
the mcu about supported band/channels. This is a preliminary patch to
add hw scan support to mt7663e driver

Co-developed-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/main.c   |  1 +
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c    | 72 +++++++++++++++++++++-
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.h    |  9 ++-
 drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h |  6 ++
 4 files changed, 85 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
index 6586176c29af..20f6e31a017d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
@@ -50,6 +50,7 @@ static int mt7615_start(struct ieee80211_hw *hw)
 		mt7615_mac_enable_nf(dev, 1);
 	}
 
+	mt7615_mcu_set_channel_domain(phy);
 	mt7615_mcu_set_chan_info(phy, MCU_EXT_CMD_SET_RX_PATH);
 
 	set_bit(MT76_STATE_RUNNING, &phy->mt76->state);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 32e3e4219076..9ce08be2f7c7 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -135,14 +135,21 @@ void mt7615_mcu_fill_msg(struct mt7615_dev *dev, struct sk_buff *skb,
 	mcu_txd->pkt_type = MCU_PKT_ID;
 	mcu_txd->seq = seq;
 
-	if (cmd & MCU_FW_PREFIX) {
+	switch (cmd & ~MCU_CMD_MASK) {
+	case MCU_FW_PREFIX:
 		mcu_txd->set_query = MCU_Q_NA;
 		mcu_txd->cid = mcu_cmd;
-	} else {
+		break;
+	case MCU_CE_PREFIX:
+		mcu_txd->set_query = MCU_Q_SET;
+		mcu_txd->cid = mcu_cmd;
+		break;
+	default:
 		mcu_txd->cid = MCU_CMD_EXT_CID;
 		mcu_txd->set_query = MCU_Q_SET;
 		mcu_txd->ext_cid = cmd;
 		mcu_txd->ext_cid_ack = 1;
+		break;
 	}
 }
 
@@ -2421,3 +2428,64 @@ int mt7615_mcu_set_sku_en(struct mt7615_phy *phy, bool enable)
 	return __mt76_mcu_send_msg(&dev->mt76, MCU_EXT_CMD_TX_POWER_FEATURE_CTRL, &req,
 				   sizeof(req), true);
 }
+
+int mt7615_mcu_set_channel_domain(struct mt7615_phy *phy)
+{
+	struct mt76_phy *mphy = phy->mt76;
+	struct mt7615_dev *dev = phy->dev;
+	struct mt7615_mcu_channel_domain {
+		__le32 country_code; /* regulatory_request.alpha2 */
+		u8 bw_2g; /* BW_20_40M		0
+			   * BW_20M		1
+			   * BW_20_40_80M	2
+			   * BW_20_40_80_160M	3
+			   * BW_20_40_80_8080M	4
+			   */
+		u8 bw_5g;
+		__le16 pad;
+		u8 n_2ch;
+		u8 n_5ch;
+		__le16 pad2;
+	} __packed hdr = {
+		.bw_2g = 0,
+		.bw_5g = 3,
+		.n_2ch = mphy->sband_2g.sband.n_channels,
+		.n_5ch = mphy->sband_5g.sband.n_channels,
+	};
+	struct mt7615_mcu_chan {
+		__le16 hw_value;
+		__le16 pad;
+		__le32 flags;
+	} __packed;
+	int i, n_channels = hdr.n_2ch + hdr.n_5ch;
+	int len = sizeof(hdr) + n_channels * sizeof(struct mt7615_mcu_chan);
+	struct sk_buff *skb;
+
+	if (!mt7615_firmware_offload(dev))
+		return 0;
+
+	skb = mt7615_mcu_msg_alloc(NULL, len);
+	if (!skb)
+		return -ENOMEM;
+
+	skb_put_data(skb, &hdr, sizeof(hdr));
+
+	for (i = 0; i < n_channels; i++) {
+		struct ieee80211_channel *chan;
+		struct mt7615_mcu_chan channel;
+
+		if (i < hdr.n_2ch)
+			chan = &mphy->sband_2g.sband.channels[i];
+		else
+			chan = &mphy->sband_5g.sband.channels[i - hdr.n_2ch];
+
+		channel.hw_value = cpu_to_le16(chan->hw_value);
+		channel.flags = cpu_to_le32(chan->flags);
+		channel.pad = 0;
+
+		skb_put_data(skb, &channel, sizeof(channel));
+	}
+
+	return __mt76_mcu_skb_send_msg(&dev->mt76, skb,
+				       MCU_CMD_SET_CHAN_DOMAIN, false);
+}
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
index d1f7391472fc..c03541328ad7 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
@@ -232,7 +232,9 @@ enum {
 
 #define MCU_FW_PREFIX		BIT(31)
 #define MCU_UNI_PREFIX		BIT(30)
-#define MCU_CMD_MASK		~(MCU_FW_PREFIX | MCU_UNI_PREFIX)
+#define MCU_CE_PREFIX		BIT(29)
+#define MCU_CMD_MASK		~(MCU_FW_PREFIX | MCU_UNI_PREFIX |	\
+				  MCU_CE_PREFIX)
 
 enum {
 	MCU_CMD_TARGET_ADDRESS_LEN_REQ = MCU_FW_PREFIX | 0x01,
@@ -275,6 +277,11 @@ enum {
 	MCU_UNI_CMD_STA_REC_UPDATE = MCU_UNI_PREFIX | 0x03,
 };
 
+/* offload mcu commands */
+enum {
+	MCU_CMD_SET_CHAN_DOMAIN = MCU_CE_PREFIX | 0x0f,
+};
+
 #define MCU_CMD_ACK		BIT(0)
 #define MCU_CMD_UNI		BIT(1)
 #define MCU_CMD_QUERY		BIT(2)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index e531c416f85d..c61dc2f54c52 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -366,6 +366,11 @@ static inline void mt7615_irq_disable(struct mt7615_dev *dev, u32 mask)
 	mt76_set_irq_mask(&dev->mt76, MT_INT_MASK_CSR, mask, 0);
 }
 
+static inline bool mt7615_firmware_offload(struct mt7615_dev *dev)
+{
+	return dev->fw_ver > MT7615_FIRMWARE_V2;
+}
+
 void mt7615_update_channel(struct mt76_dev *mdev);
 bool mt7615_mac_wtbl_update(struct mt7615_dev *dev, int idx, u32 mask);
 void mt7615_mac_reset_counters(struct mt7615_dev *dev);
@@ -395,6 +400,7 @@ int mt7615_mcu_get_temperature(struct mt7615_dev *dev, int index);
 void mt7615_mcu_exit(struct mt7615_dev *dev);
 void mt7615_mcu_fill_msg(struct mt7615_dev *dev, struct sk_buff *skb,
 			 int cmd, int *wait_seq);
+int mt7615_mcu_set_channel_domain(struct mt7615_phy *phy);
 
 int mt7615_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr,
 			  enum mt76_txq_id qid, struct mt76_wcid *wcid,
-- 
cgit v1.2.3-59-g8ed1b


From daf250f89f4c5261997b76abc13323be0dc0205e Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Thu, 2 Apr 2020 15:06:37 +0200
Subject: mt76: mt7663: keep Rx filters as the default

Keep Rx filters default value if the firmware supports offload and
low power features.

Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Co-developed-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/init.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
index 8b7d8118d9ab..96b37f6ebf1e 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
@@ -22,7 +22,7 @@ static void mt7615_phy_init(struct mt7615_dev *dev)
 static void
 mt7615_init_mac_chain(struct mt7615_dev *dev, int chain)
 {
-	u32 val, mask, set;
+	u32 val;
 
 	if (!chain)
 		val = MT_CFG_CCR_MAC_D0_1X_GC_EN | MT_CFG_CCR_MAC_D0_2X_GC_EN;
@@ -62,15 +62,19 @@ mt7615_init_mac_chain(struct mt7615_dev *dev, int chain)
 		FIELD_PREP(MT_AGG_ARxCR_LIMIT(6), MT7615_RATE_RETRY - 1) |
 		FIELD_PREP(MT_AGG_ARxCR_LIMIT(7), MT7615_RATE_RETRY - 1));
 
-	mask = MT_DMA_RCFR0_MCU_RX_MGMT |
-	       MT_DMA_RCFR0_MCU_RX_CTL_NON_BAR |
-	       MT_DMA_RCFR0_MCU_RX_CTL_BAR |
-	       MT_DMA_RCFR0_MCU_RX_BYPASS |
-	       MT_DMA_RCFR0_RX_DROPPED_UCAST |
-	       MT_DMA_RCFR0_RX_DROPPED_MCAST;
-	set = FIELD_PREP(MT_DMA_RCFR0_RX_DROPPED_UCAST, 2) |
-	      FIELD_PREP(MT_DMA_RCFR0_RX_DROPPED_MCAST, 2);
-	mt76_rmw(dev, MT_DMA_RCFR0(chain), mask, set);
+	if (!mt7615_firmware_offload(dev)) {
+		u32 mask, set;
+
+		mask = MT_DMA_RCFR0_MCU_RX_MGMT |
+		       MT_DMA_RCFR0_MCU_RX_CTL_NON_BAR |
+		       MT_DMA_RCFR0_MCU_RX_CTL_BAR |
+		       MT_DMA_RCFR0_MCU_RX_BYPASS |
+		       MT_DMA_RCFR0_RX_DROPPED_UCAST |
+		       MT_DMA_RCFR0_RX_DROPPED_MCAST;
+		set = FIELD_PREP(MT_DMA_RCFR0_RX_DROPPED_UCAST, 2) |
+		      FIELD_PREP(MT_DMA_RCFR0_RX_DROPPED_MCAST, 2);
+		mt76_rmw(dev, MT_DMA_RCFR0(chain), mask, set);
+	}
 }
 
 static void mt7615_mac_init(struct mt7615_dev *dev)
-- 
cgit v1.2.3-59-g8ed1b


From fcdfc29e58ee3b4db894f356fb6b12a6546f57bd Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 2 Apr 2020 15:06:38 +0200
Subject: mt76: mt7615: introduce hw scan support

Introduce hw scan support to mt7663e driver

Co-developed-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt76.h          |   1 +
 drivers/net/wireless/mediatek/mt76/mt7615/init.c   |   8 ++
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c    |  19 +++-
 drivers/net/wireless/mediatek/mt76/mt7615/main.c   |  34 ++++++
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c    | 125 +++++++++++++++++++++
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.h    |  90 +++++++++++++++
 drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h |  11 ++
 7 files changed, 285 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index 6d60187e88ed..09f407cb1c48 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -275,6 +275,7 @@ enum {
 	MT76_STATE_RUNNING,
 	MT76_STATE_MCU_RUNNING,
 	MT76_SCANNING,
+	MT76_HW_SCANNING,
 	MT76_RESTART,
 	MT76_RESET,
 	MT76_MCU_RESET,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
index 96b37f6ebf1e..04bcc1a2aa8f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
@@ -349,8 +349,13 @@ mt7615_init_wiphy(struct ieee80211_hw *hw)
 	wiphy->n_iface_combinations = ARRAY_SIZE(if_comb);
 	wiphy->reg_notifier = mt7615_regd_notifier;
 
+	wiphy->max_scan_ie_len = MT7615_SCAN_IE_LEN;
+	wiphy->max_scan_ssids = 4;
+
+	wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_SET_SCAN_DWELL);
 	wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_VHT_IBSS);
 
+	ieee80211_hw_set(hw, SINGLE_SCAN_ON_ALL_BANDS);
 	ieee80211_hw_set(hw, TX_STATUS_NO_AMPDU_LEN);
 
 	if (is_mt7615(&phy->dev->mt76))
@@ -403,6 +408,8 @@ int mt7615_register_ext_phy(struct mt7615_dev *dev)
 	if (phy)
 		return 0;
 
+	INIT_DELAYED_WORK(&phy->scan_work, mt7615_scan_work);
+
 	mt7615_cap_dbdc_enable(dev);
 	mphy = mt76_alloc_phy(&dev->mt76, sizeof(*phy), &mt7615_ops);
 	if (!mphy)
@@ -459,6 +466,7 @@ void mt7615_init_device(struct mt7615_dev *dev)
 	dev->phy.mt76 = &dev->mt76.phy;
 	dev->mt76.phy.priv = &dev->phy;
 	INIT_DELAYED_WORK(&dev->mt76.mac_work, mt7615_mac_work);
+	INIT_DELAYED_WORK(&dev->phy.scan_work, mt7615_scan_work);
 	INIT_LIST_HEAD(&dev->sta_poll_list);
 	spin_lock_init(&dev->sta_poll_lock);
 	init_waitqueue_head(&dev->reset_wait);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index e88914685807..d2319fb2f006 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -166,6 +166,20 @@ void mt7615_mac_set_timing(struct mt7615_phy *phy)
 
 }
 
+static void
+mt7615_get_status_freq_info(struct mt7615_dev *dev, struct mt76_phy *mphy,
+			    struct mt76_rx_status *status, u8 chfreq)
+{
+	if (!test_bit(MT76_HW_SCANNING, &mphy->state)) {
+		status->freq = mphy->chandef.chan->center_freq;
+		status->band = mphy->chandef.chan->band;
+		return;
+	}
+
+	status->band = chfreq <= 14 ? NL80211_BAND_2GHZ : NL80211_BAND_5GHZ;
+	status->freq = ieee80211_channel_to_frequency(chfreq, status->band);
+}
+
 int mt7615_mac_fill_rx(struct mt7615_dev *dev, struct sk_buff *skb)
 {
 	struct mt76_rx_status *status = (struct mt76_rx_status *)skb->cb;
@@ -284,11 +298,10 @@ int mt7615_mac_fill_rx(struct mt7615_dev *dev, struct sk_buff *skb)
 		status->ext_phy = true;
 	}
 
-	if (chfreq != phy->chfreq)
+	if (!mt7615_firmware_offload(dev) && chfreq != phy->chfreq)
 		return -EINVAL;
 
-	status->freq = mphy->chandef.chan->center_freq;
-	status->band = mphy->chandef.chan->band;
+	mt7615_get_status_freq_info(dev, mphy, status, chfreq);
 	if (status->band == NL80211_BAND_5GHZ)
 		sband = &mphy->sband_5g.sband;
 	else
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
index 20f6e31a017d..6a7d802c69a6 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
@@ -77,6 +77,7 @@ static void mt7615_stop(struct ieee80211_hw *hw)
 	mutex_lock(&dev->mt76.mutex);
 
 	clear_bit(MT76_STATE_RUNNING, &phy->mt76->state);
+	cancel_delayed_work_sync(&phy->scan_work);
 
 	if (phy != &dev->phy) {
 		mt7615_mcu_set_pm(dev, 1, 1);
@@ -702,6 +703,37 @@ mt7615_set_antenna(struct ieee80211_hw *hw, u32 tx_ant, u32 rx_ant)
 	return 0;
 }
 
+void mt7615_scan_work(struct work_struct *work)
+{
+	struct cfg80211_scan_info info = {
+		.aborted = false,
+	};
+	struct mt7615_phy *phy;
+
+	phy = (struct mt7615_phy *)container_of(work, struct mt7615_phy,
+						scan_work.work);
+
+	clear_bit(MT76_HW_SCANNING, &phy->mt76->state);
+	ieee80211_scan_completed(phy->mt76->hw, &info);
+}
+
+static int
+mt7615_hw_scan(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+	       struct ieee80211_scan_request *req)
+{
+	struct mt76_phy *mphy = hw->priv;
+
+	return mt7615_mcu_hw_scan(mphy->priv, vif, req);
+}
+
+static void
+mt7615_cancel_hw_scan(struct ieee80211_hw *hw, struct ieee80211_vif *vif)
+{
+	struct mt76_phy *mphy = hw->priv;
+
+	mt7615_mcu_cancel_hw_scan(mphy->priv, vif);
+}
+
 const struct ieee80211_ops mt7615_ops = {
 	.tx = mt7615_tx,
 	.start = mt7615_start,
@@ -731,6 +763,8 @@ const struct ieee80211_ops mt7615_ops = {
 	.get_antenna = mt76_get_antenna,
 	.set_antenna = mt7615_set_antenna,
 	.set_coverage_class = mt7615_set_coverage_class,
+	.hw_scan = mt7615_hw_scan,
+	.cancel_hw_scan = mt7615_cancel_hw_scan,
 };
 
 static int __init mt7615_init(void)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 9ce08be2f7c7..0db81ca9c730 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -317,6 +317,26 @@ mt7615_mcu_rx_ext_event(struct mt7615_dev *dev, struct sk_buff *skb)
 	}
 }
 
+static void
+mt7615_mcu_scan_done_event(struct mt7615_dev *dev, struct sk_buff *skb)
+{
+	struct mt7615_mcu_rxd *rxd = (struct mt7615_mcu_rxd *)skb->data;
+	struct mt7615_hw_scan_done *event;
+	struct mt7615_phy *phy;
+	struct mt76_phy *mphy;
+
+	skb_pull(skb, sizeof(*rxd));
+	event = (struct mt7615_hw_scan_done *)skb->data;
+	if (event->seq_num & BIT(7) && dev->mt76.phy2)
+		mphy = dev->mt76.phy2;
+	else
+		mphy = &dev->mt76.phy;
+
+	phy = (struct mt7615_phy *)mphy->priv;
+	ieee80211_queue_delayed_work(mphy->hw, &phy->scan_work,
+				     MT7615_HW_SCAN_TIMEOUT);
+}
+
 static void
 mt7615_mcu_rx_unsolicited_event(struct mt7615_dev *dev, struct sk_buff *skb)
 {
@@ -326,6 +346,9 @@ mt7615_mcu_rx_unsolicited_event(struct mt7615_dev *dev, struct sk_buff *skb)
 	case MCU_EVENT_EXT:
 		mt7615_mcu_rx_ext_event(dev, skb);
 		break;
+	case MCU_EVENT_SCAN_DONE:
+		mt7615_mcu_scan_done_event(dev, skb);
+		break;
 	default:
 		break;
 	}
@@ -340,6 +363,7 @@ void mt7615_mcu_rx_event(struct mt7615_dev *dev, struct sk_buff *skb)
 	    rxd->ext_eid == MCU_EXT_EVENT_FW_LOG_2_HOST ||
 	    rxd->ext_eid == MCU_EXT_EVENT_ASSERT_DUMP ||
 	    rxd->ext_eid == MCU_EXT_EVENT_PS_SYNC ||
+	    rxd->eid == MCU_EVENT_SCAN_DONE ||
 	    !rxd->seq)
 		mt7615_mcu_rx_unsolicited_event(dev, skb);
 	else
@@ -2489,3 +2513,104 @@ int mt7615_mcu_set_channel_domain(struct mt7615_phy *phy)
 	return __mt76_mcu_skb_send_msg(&dev->mt76, skb,
 				       MCU_CMD_SET_CHAN_DOMAIN, false);
 }
+
+#define MT7615_SCAN_CHANNEL_TIME	120
+int mt7615_mcu_hw_scan(struct mt7615_phy *phy, struct ieee80211_vif *vif,
+		       struct ieee80211_scan_request *scan_req)
+{
+	struct mt7615_vif *mvif = (struct mt7615_vif *)vif->drv_priv;
+	struct cfg80211_scan_request *sreq = &scan_req->req;
+	int ext_channels_num = max_t(int, sreq->n_channels - 32, 0);
+	struct ieee80211_channel **scan_list = sreq->channels;
+	int err, i, duration = MT7615_SCAN_CHANNEL_TIME;
+	struct mt7615_dev *dev = phy->dev;
+	bool ext_phy = phy != &dev->phy;
+	struct mt7615_mcu_scan_channel *chan;
+	struct mt7615_hw_scan_req *req;
+	struct sk_buff *skb;
+
+	/* fall-back to sw-scan */
+	if (!mt7615_firmware_offload(dev))
+		return 1;
+
+	skb = mt7615_mcu_msg_alloc(NULL, sizeof(*req));
+	if (!skb)
+		return -ENOMEM;
+
+	set_bit(MT76_HW_SCANNING, &phy->mt76->state);
+	mvif->scan_seq_num = (mvif->scan_seq_num + 1) & 0x7f;
+
+	req = (struct mt7615_hw_scan_req *)skb_put(skb, sizeof(*req));
+
+	req->seq_num = mvif->scan_seq_num | ext_phy << 7;
+	req->bss_idx = mvif->idx;
+	req->scan_type = 1;
+	req->ssid_type = 1;
+	req->probe_req_num = 2;
+	req->version = 1;
+	req->channel_type = 4;
+
+	for (i = 0; i < sreq->n_ssids; i++) {
+		req->ssids[i].ssid_len = cpu_to_le32(sreq->ssids[i].ssid_len);
+		memcpy(req->ssids[i].ssid, sreq->ssids[i].ssid,
+		       sreq->ssids[i].ssid_len);
+	}
+
+	req->timeout_value = cpu_to_le16(sreq->n_channels * duration);
+	req->channel_min_dwell_time = cpu_to_le16(duration);
+	req->channel_dwell_time = cpu_to_le16(duration);
+
+	req->channels_num = min_t(u8, sreq->n_channels, 32);
+	req->ext_channels_num = min_t(u8, ext_channels_num, 32);
+	for (i = 0; i < req->channels_num + req->ext_channels_num; i++) {
+		if (i >= 32)
+			chan = &req->ext_channels[i - 32];
+		else
+			chan = &req->channels[i];
+
+		chan->band = scan_list[i]->band == NL80211_BAND_2GHZ ? 1 : 2;
+		chan->channel_num = scan_list[i]->hw_value;
+	}
+
+	if (sreq->ie_len > 0) {
+		memcpy(req->ies, sreq->ie, sreq->ie_len);
+		req->ies_len = cpu_to_le16(sreq->ie_len);
+	}
+
+	memcpy(req->bssid, sreq->bssid, ETH_ALEN);
+	if (sreq->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) {
+		get_random_mask_addr(req->random_mac, sreq->mac_addr,
+				     sreq->mac_addr_mask);
+		req->scan_func = 1;
+	}
+
+	err = __mt76_mcu_skb_send_msg(&dev->mt76, skb, MCU_CMD_START_HW_SCAN,
+				      false);
+	if (err < 0)
+		clear_bit(MT76_HW_SCANNING, &phy->mt76->state);
+
+	return err;
+}
+
+int mt7615_mcu_cancel_hw_scan(struct mt7615_phy *phy,
+			      struct ieee80211_vif *vif)
+{
+	struct mt7615_vif *mvif = (struct mt7615_vif *)vif->drv_priv;
+	struct mt7615_dev *dev = phy->dev;
+	struct cfg80211_scan_info info = {
+		.aborted = true,
+	};
+	struct {
+		u8 seq_num;
+		u8 is_ext_channel;
+		u8 rsv[2];
+	} __packed req = {
+		.seq_num = mvif->scan_seq_num,
+	};
+
+	ieee80211_scan_completed(phy->mt76->hw, &info);
+	clear_bit(MT76_HW_SCANNING, &phy->mt76->state);
+
+	return __mt76_mcu_send_msg(&dev->mt76,  MCU_CMD_CANCEL_HW_SCAN, &req,
+				   sizeof(req), false);
+}
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
index c03541328ad7..0ce709d2158f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
@@ -81,6 +81,7 @@ enum {
 	MCU_EVENT_GENERIC = 0x01,
 	MCU_EVENT_ACCESS_REG = 0x02,
 	MCU_EVENT_MT_PATCH_SEM = 0x04,
+	MCU_EVENT_SCAN_DONE = 0x0d,
 	MCU_EVENT_CH_PRIVILEGE = 0x18,
 	MCU_EVENT_EXT = 0xed,
 	MCU_EVENT_RESTART_DL = 0xef,
@@ -277,9 +278,98 @@ enum {
 	MCU_UNI_CMD_STA_REC_UPDATE = MCU_UNI_PREFIX | 0x03,
 };
 
+struct mt7615_mcu_scan_ssid {
+	__le32 ssid_len;
+	u8 ssid[IEEE80211_MAX_SSID_LEN];
+} __packed;
+
+struct mt7615_mcu_scan_channel {
+	u8 band; /* 1: 2.4GHz
+		  * 2: 5.0GHz
+		  * Others: Reserved
+		  */
+	u8 channel_num;
+} __packed;
+
+struct mt7615_hw_scan_req {
+	u8 seq_num;
+	u8 bss_idx;
+	u8 scan_type; /* 0: PASSIVE SCAN
+		       * 1: ACTIVE SCAN
+		       */
+	u8 ssid_type; /* BIT(0) wildcard SSID
+		       * BIT(1) P2P wildcard SSID
+		       * BIT(2) specified SSID
+		       */
+	u8 ssids_num;
+	u8 probe_req_num; /* Number of probe request for each SSID */
+	u8 scan_func; /* BIT(0) Enable random MAC scan
+		       * BIT(1) Disable DBDC scan type 1~3.
+		       * BIT(2) Use DBDC scan type 3 (dedicated one RF to scan).
+		       */
+	u8 version; /* 0: Not support fields after ies.
+		     * 1: Support fields after ies.
+		     */
+	struct mt7615_mcu_scan_ssid ssids[4];
+	__le16 probe_delay_time;
+	__le16 channel_dwell_time; /* channel Dwell interval */
+	__le16 timeout_value;
+	u8 channel_type; /* 0: Full channels
+			  * 1: Only 2.4GHz channels
+			  * 2: Only 5GHz channels
+			  * 3: P2P social channel only (channel #1, #6 and #11)
+			  * 4: Specified channels
+			  * Others: Reserved
+			  */
+	u8 channels_num; /* valid when channel_type is 4 */
+	/* valid when channels_num is set */
+	struct mt7615_mcu_scan_channel channels[32];
+	__le16 ies_len;
+	u8 ies[MT7615_SCAN_IE_LEN];
+	/* following fields are valid if version > 0 */
+	u8 ext_channels_num;
+	u8 ext_ssids_num;
+	__le16 channel_min_dwell_time;
+	struct mt7615_mcu_scan_channel ext_channels[32];
+	struct mt7615_mcu_scan_ssid ext_ssids[6];
+	u8 bssid[ETH_ALEN];
+	u8 random_mac[ETH_ALEN]; /* valid when BIT(1) in scan_func is set. */
+	u8 pad[64];
+} __packed;
+
+#define SCAN_DONE_EVENT_MAX_CHANNEL_NUM	64
+struct mt7615_hw_scan_done {
+	u8 seq_num;
+	u8 sparse_channel_num;
+	struct mt7615_mcu_scan_channel sparse_channel;
+	u8 complete_channel_num;
+	u8 current_state;
+	u8 version;
+	u8 pad;
+	__le32 beacon_scan_num;
+	u8 pno_enabled;
+	u8 pad2[3];
+	u8 sparse_channel_valid_num;
+	u8 pad3[3];
+	u8 channel_num[SCAN_DONE_EVENT_MAX_CHANNEL_NUM];
+	/* idle format for channel_idle_time
+	 * 0: first bytes: idle time(ms) 2nd byte: dwell time(ms)
+	 * 1: first bytes: idle time(8ms) 2nd byte: dwell time(8ms)
+	 * 2: dwell time (16us)
+	 */
+	__le16 channel_idle_time[SCAN_DONE_EVENT_MAX_CHANNEL_NUM];
+	/* beacon and probe response count */
+	u8 beacon_probe_num[SCAN_DONE_EVENT_MAX_CHANNEL_NUM];
+	u8 mdrdy_count[SCAN_DONE_EVENT_MAX_CHANNEL_NUM];
+	__le32 beacon_2g_num;
+	__le32 beacon_5g_num;
+} __packed;
+
 /* offload mcu commands */
 enum {
+	MCU_CMD_START_HW_SCAN = MCU_CE_PREFIX | 0x03,
 	MCU_CMD_SET_CHAN_DOMAIN = MCU_CE_PREFIX | 0x0f,
+	MCU_CMD_CANCEL_HW_SCAN = MCU_CE_PREFIX | 0x1b,
 };
 
 #define MCU_CMD_ACK		BIT(0)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index c61dc2f54c52..c2faa9ef654d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -18,6 +18,7 @@
 					 MT7615_MAX_INTERFACES)
 
 #define MT7615_WATCHDOG_TIME		(HZ / 10)
+#define MT7615_HW_SCAN_TIMEOUT		(HZ / 10)
 #define MT7615_RESET_TIMEOUT		(30 * HZ)
 #define MT7615_RATE_RETRY		2
 
@@ -57,6 +58,8 @@
 #define MT7615_CFEND_RATE_DEFAULT	0x49 /* OFDM 24M */
 #define MT7615_CFEND_RATE_11B		0x03 /* 11B LP, 11M */
 
+#define MT7615_SCAN_IE_LEN		600
+
 struct mt7615_vif;
 struct mt7615_sta;
 struct mt7615_dfs_pulse;
@@ -108,6 +111,7 @@ struct mt7615_vif {
 	u8 omac_idx;
 	u8 band_idx;
 	u8 wmm_idx;
+	u8 scan_seq_num;
 
 	struct mt7615_sta sta;
 };
@@ -150,6 +154,8 @@ struct mt7615_phy {
 	u32 ampdu_ref;
 
 	struct mib_stats mib;
+
+	struct delayed_work scan_work;
 };
 
 #define mt7615_mcu_add_tx_ba(dev, ...)	(dev)->mcu_ops->add_tx_ba((dev), __VA_ARGS__)
@@ -371,6 +377,7 @@ static inline bool mt7615_firmware_offload(struct mt7615_dev *dev)
 	return dev->fw_ver > MT7615_FIRMWARE_V2;
 }
 
+void mt7615_scan_work(struct work_struct *work);
 void mt7615_update_channel(struct mt76_dev *mdev);
 bool mt7615_mac_wtbl_update(struct mt7615_dev *dev, int idx, u32 mask);
 void mt7615_mac_reset_counters(struct mt7615_dev *dev);
@@ -401,6 +408,10 @@ void mt7615_mcu_exit(struct mt7615_dev *dev);
 void mt7615_mcu_fill_msg(struct mt7615_dev *dev, struct sk_buff *skb,
 			 int cmd, int *wait_seq);
 int mt7615_mcu_set_channel_domain(struct mt7615_phy *phy);
+int mt7615_mcu_hw_scan(struct mt7615_phy *phy, struct ieee80211_vif *vif,
+		       struct ieee80211_scan_request *scan_req);
+int mt7615_mcu_cancel_hw_scan(struct mt7615_phy *phy,
+			      struct ieee80211_vif *vif);
 
 int mt7615_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr,
 			  enum mt76_txq_id qid, struct mt76_wcid *wcid,
-- 
cgit v1.2.3-59-g8ed1b


From 20305f98177432b48892d8add9cf6b05577b5d5d Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 2 Apr 2020 15:06:39 +0200
Subject: mt76: mt7615: introduce scheduled scan support

Introduce scheduled scan support for mt7663e devices

Co-developed-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt76.h          |   1 +
 drivers/net/wireless/mediatek/mt76/mt7615/init.c   |   7 ++
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c    |   3 +-
 drivers/net/wireless/mediatek/mt76/mt7615/main.c   |  55 ++++++++++-
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c    | 110 +++++++++++++++++++--
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.h    |  35 +++++++
 drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h |  10 ++
 7 files changed, 207 insertions(+), 14 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index 09f407cb1c48..ca7475a95356 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -276,6 +276,7 @@ enum {
 	MT76_STATE_MCU_RUNNING,
 	MT76_SCANNING,
 	MT76_HW_SCANNING,
+	MT76_HW_SCHED_SCANNING,
 	MT76_RESTART,
 	MT76_RESET,
 	MT76_MCU_RESET,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
index 04bcc1a2aa8f..f4775945e3d7 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
@@ -349,7 +349,12 @@ mt7615_init_wiphy(struct ieee80211_hw *hw)
 	wiphy->n_iface_combinations = ARRAY_SIZE(if_comb);
 	wiphy->reg_notifier = mt7615_regd_notifier;
 
+	wiphy->max_sched_scan_plan_interval = MT7615_MAX_SCHED_SCAN_INTERVAL;
+	wiphy->max_sched_scan_ie_len = IEEE80211_MAX_DATA_LEN;
 	wiphy->max_scan_ie_len = MT7615_SCAN_IE_LEN;
+	wiphy->max_sched_scan_ssids = MT7615_MAX_SCHED_SCAN_SSID;
+	wiphy->max_match_sets = MT7615_MAX_SCAN_MATCH;
+	wiphy->max_sched_scan_reqs = 1;
 	wiphy->max_scan_ssids = 4;
 
 	wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_SET_SCAN_DWELL);
@@ -409,6 +414,7 @@ int mt7615_register_ext_phy(struct mt7615_dev *dev)
 		return 0;
 
 	INIT_DELAYED_WORK(&phy->scan_work, mt7615_scan_work);
+	skb_queue_head_init(&phy->scan_event_list);
 
 	mt7615_cap_dbdc_enable(dev);
 	mphy = mt76_alloc_phy(&dev->mt76, sizeof(*phy), &mt7615_ops);
@@ -467,6 +473,7 @@ void mt7615_init_device(struct mt7615_dev *dev)
 	dev->mt76.phy.priv = &dev->phy;
 	INIT_DELAYED_WORK(&dev->mt76.mac_work, mt7615_mac_work);
 	INIT_DELAYED_WORK(&dev->phy.scan_work, mt7615_scan_work);
+	skb_queue_head_init(&dev->phy.scan_event_list);
 	INIT_LIST_HEAD(&dev->sta_poll_list);
 	spin_lock_init(&dev->sta_poll_lock);
 	init_waitqueue_head(&dev->reset_wait);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index d2319fb2f006..c9534fab2911 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -170,7 +170,8 @@ static void
 mt7615_get_status_freq_info(struct mt7615_dev *dev, struct mt76_phy *mphy,
 			    struct mt76_rx_status *status, u8 chfreq)
 {
-	if (!test_bit(MT76_HW_SCANNING, &mphy->state)) {
+	if (!test_bit(MT76_HW_SCANNING, &mphy->state) &&
+	    !test_bit(MT76_HW_SCHED_SCANNING, &mphy->state)) {
 		status->freq = mphy->chandef.chan->center_freq;
 		status->band = mphy->chandef.chan->band;
 		return;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
index 6a7d802c69a6..2c2d763b667c 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
@@ -705,16 +705,36 @@ mt7615_set_antenna(struct ieee80211_hw *hw, u32 tx_ant, u32 rx_ant)
 
 void mt7615_scan_work(struct work_struct *work)
 {
-	struct cfg80211_scan_info info = {
-		.aborted = false,
-	};
 	struct mt7615_phy *phy;
 
 	phy = (struct mt7615_phy *)container_of(work, struct mt7615_phy,
 						scan_work.work);
 
-	clear_bit(MT76_HW_SCANNING, &phy->mt76->state);
-	ieee80211_scan_completed(phy->mt76->hw, &info);
+	while (true) {
+		struct mt7615_mcu_rxd *rxd;
+		struct sk_buff *skb;
+
+		spin_lock_bh(&phy->dev->mt76.lock);
+		skb = __skb_dequeue(&phy->scan_event_list);
+		spin_unlock_bh(&phy->dev->mt76.lock);
+
+		if (!skb)
+			break;
+
+		rxd = (struct mt7615_mcu_rxd *)skb->data;
+		if (rxd->eid == MCU_EVENT_SCAN_DONE) {
+			struct cfg80211_scan_info info = {
+				.aborted = false,
+			};
+
+			clear_bit(MT76_HW_SCANNING, &phy->mt76->state);
+			ieee80211_scan_completed(phy->mt76->hw, &info);
+		} else {
+			clear_bit(MT76_HW_SCHED_SCANNING, &phy->mt76->state);
+			ieee80211_sched_scan_results(phy->mt76->hw);
+		}
+		dev_kfree_skb(skb);
+	}
 }
 
 static int
@@ -734,6 +754,29 @@ mt7615_cancel_hw_scan(struct ieee80211_hw *hw, struct ieee80211_vif *vif)
 	mt7615_mcu_cancel_hw_scan(mphy->priv, vif);
 }
 
+static int
+mt7615_start_sched_scan(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+			struct cfg80211_sched_scan_request *req,
+			struct ieee80211_scan_ies *ies)
+{
+	struct mt76_phy *mphy = hw->priv;
+	int err;
+
+	err = mt7615_mcu_sched_scan_req(mphy->priv, vif, req);
+	if (err < 0)
+		return err;
+
+	return mt7615_mcu_sched_scan_enable(mphy->priv, vif, true);
+}
+
+static int
+mt7615_stop_sched_scan(struct ieee80211_hw *hw, struct ieee80211_vif *vif)
+{
+	struct mt76_phy *mphy = hw->priv;
+
+	return mt7615_mcu_sched_scan_enable(mphy->priv, vif, false);
+}
+
 const struct ieee80211_ops mt7615_ops = {
 	.tx = mt7615_tx,
 	.start = mt7615_start,
@@ -765,6 +808,8 @@ const struct ieee80211_ops mt7615_ops = {
 	.set_coverage_class = mt7615_set_coverage_class,
 	.hw_scan = mt7615_hw_scan,
 	.cancel_hw_scan = mt7615_cancel_hw_scan,
+	.sched_scan_start = mt7615_start_sched_scan,
+	.sched_scan_stop = mt7615_stop_sched_scan,
 };
 
 static int __init mt7615_init(void)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 0db81ca9c730..193808cca905 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -318,21 +318,23 @@ mt7615_mcu_rx_ext_event(struct mt7615_dev *dev, struct sk_buff *skb)
 }
 
 static void
-mt7615_mcu_scan_done_event(struct mt7615_dev *dev, struct sk_buff *skb)
+mt7615_mcu_scan_event(struct mt7615_dev *dev, struct sk_buff *skb)
 {
-	struct mt7615_mcu_rxd *rxd = (struct mt7615_mcu_rxd *)skb->data;
-	struct mt7615_hw_scan_done *event;
+	u8 *seq_num = skb->data + sizeof(struct mt7615_mcu_rxd);
 	struct mt7615_phy *phy;
 	struct mt76_phy *mphy;
 
-	skb_pull(skb, sizeof(*rxd));
-	event = (struct mt7615_hw_scan_done *)skb->data;
-	if (event->seq_num & BIT(7) && dev->mt76.phy2)
+	if (*seq_num & BIT(7) && dev->mt76.phy2)
 		mphy = dev->mt76.phy2;
 	else
 		mphy = &dev->mt76.phy;
 
 	phy = (struct mt7615_phy *)mphy->priv;
+
+	spin_lock_bh(&dev->mt76.lock);
+	__skb_queue_tail(&phy->scan_event_list, skb);
+	spin_unlock_bh(&dev->mt76.lock);
+
 	ieee80211_queue_delayed_work(mphy->hw, &phy->scan_work,
 				     MT7615_HW_SCAN_TIMEOUT);
 }
@@ -346,9 +348,10 @@ mt7615_mcu_rx_unsolicited_event(struct mt7615_dev *dev, struct sk_buff *skb)
 	case MCU_EVENT_EXT:
 		mt7615_mcu_rx_ext_event(dev, skb);
 		break;
+	case MCU_EVENT_SCHED_SCAN_DONE:
 	case MCU_EVENT_SCAN_DONE:
-		mt7615_mcu_scan_done_event(dev, skb);
-		break;
+		mt7615_mcu_scan_event(dev, skb);
+		return;
 	default:
 		break;
 	}
@@ -363,6 +366,7 @@ void mt7615_mcu_rx_event(struct mt7615_dev *dev, struct sk_buff *skb)
 	    rxd->ext_eid == MCU_EXT_EVENT_FW_LOG_2_HOST ||
 	    rxd->ext_eid == MCU_EXT_EVENT_ASSERT_DUMP ||
 	    rxd->ext_eid == MCU_EXT_EVENT_PS_SYNC ||
+	    rxd->eid == MCU_EVENT_SCHED_SCAN_DONE ||
 	    rxd->eid == MCU_EVENT_SCAN_DONE ||
 	    !rxd->seq)
 		mt7615_mcu_rx_unsolicited_event(dev, skb);
@@ -2614,3 +2618,93 @@ int mt7615_mcu_cancel_hw_scan(struct mt7615_phy *phy,
 	return __mt76_mcu_send_msg(&dev->mt76,  MCU_CMD_CANCEL_HW_SCAN, &req,
 				   sizeof(req), false);
 }
+
+int mt7615_mcu_sched_scan_req(struct mt7615_phy *phy,
+			      struct ieee80211_vif *vif,
+			      struct cfg80211_sched_scan_request *sreq)
+{
+	struct mt7615_vif *mvif = (struct mt7615_vif *)vif->drv_priv;
+	struct ieee80211_channel **scan_list = sreq->channels;
+	struct mt7615_dev *dev = phy->dev;
+	bool ext_phy = phy != &dev->phy;
+	struct mt7615_mcu_scan_channel *chan;
+	struct mt7615_sched_scan_req *req;
+	struct cfg80211_match_set *match;
+	struct cfg80211_ssid *ssid;
+	struct sk_buff *skb;
+	int i;
+
+	if (!mt7615_firmware_offload(dev))
+		return -ENOTSUPP;
+
+	skb = mt7615_mcu_msg_alloc(NULL, sizeof(*req) + sreq->ie_len);
+	if (!skb)
+		return -ENOMEM;
+
+	mvif->scan_seq_num = (mvif->scan_seq_num + 1) & 0x7f;
+
+	req = (struct mt7615_sched_scan_req *)skb_put(skb, sizeof(*req));
+	req->version = 1;
+	req->seq_num = mvif->scan_seq_num | ext_phy << 7;
+	req->scan_func = !!(sreq->flags & NL80211_SCAN_FLAG_RANDOM_ADDR);
+
+	req->ssids_num = sreq->n_ssids;
+	for (i = 0; i < req->ssids_num; i++) {
+		ssid = &sreq->ssids[i];
+		memcpy(req->ssids[i].ssid, ssid->ssid, ssid->ssid_len);
+		req->ssids[i].ssid_len = cpu_to_le32(ssid->ssid_len);
+	}
+
+	req->match_num = sreq->n_match_sets;
+	for (i = 0; i < req->match_num; i++) {
+		match = &sreq->match_sets[i];
+		memcpy(req->match[i].ssid, match->ssid.ssid,
+		       match->ssid.ssid_len);
+		req->match[i].rssi_th = cpu_to_le32(match->rssi_thold);
+		req->match[i].ssid_len = match->ssid.ssid_len;
+	}
+
+	req->channel_type = 4;
+	req->channels_num = min_t(u8, sreq->n_channels, 64);
+	for (i = 0; i < req->channels_num; i++) {
+		chan = &req->channels[i];
+		chan->band = scan_list[i]->band == NL80211_BAND_2GHZ ? 1 : 2;
+		chan->channel_num = scan_list[i]->hw_value;
+	}
+
+	req->intervals_num = sreq->n_scan_plans;
+	for (i = 0; i < req->intervals_num; i++)
+		req->intervals[i] = cpu_to_le16(sreq->scan_plans[i].interval);
+
+	if (sreq->ie_len > 0) {
+		req->ie_len = cpu_to_le16(sreq->ie_len);
+		memcpy(skb_put(skb, sreq->ie_len), sreq->ie, sreq->ie_len);
+	}
+
+	return __mt76_mcu_skb_send_msg(&dev->mt76, skb,
+				       MCU_CMD_SCHED_SCAN_REQ, false);
+}
+
+int mt7615_mcu_sched_scan_enable(struct mt7615_phy *phy,
+				 struct ieee80211_vif *vif,
+				 bool enable)
+{
+	struct mt7615_dev *dev = phy->dev;
+	struct {
+		u8 active; /* 0: enabled 1: disabled */
+		u8 rsv[3];
+	} __packed req = {
+		.active = !enable,
+	};
+
+	if (!mt7615_firmware_offload(dev))
+		return -ENOTSUPP;
+
+	if (enable)
+		set_bit(MT76_HW_SCHED_SCANNING, &phy->mt76->state);
+	else
+		clear_bit(MT76_HW_SCHED_SCANNING, &phy->mt76->state);
+
+	return __mt76_mcu_send_msg(&dev->mt76, MCU_CMD_SCHED_SCAN_ENABLE,
+				   &req, sizeof(req), false);
+}
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
index 0ce709d2158f..7e55da2b6573 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
@@ -83,6 +83,7 @@ enum {
 	MCU_EVENT_MT_PATCH_SEM = 0x04,
 	MCU_EVENT_SCAN_DONE = 0x0d,
 	MCU_EVENT_CH_PRIVILEGE = 0x18,
+	MCU_EVENT_SCHED_SCAN_DONE = 0x23,
 	MCU_EVENT_EXT = 0xed,
 	MCU_EVENT_RESTART_DL = 0xef,
 };
@@ -291,6 +292,13 @@ struct mt7615_mcu_scan_channel {
 	u8 channel_num;
 } __packed;
 
+struct mt7615_mcu_scan_match {
+	__le32 rssi_th;
+	u8 ssid[IEEE80211_MAX_SSID_LEN];
+	u8 ssid_len;
+	u8 rsv[3];
+} __packed;
+
 struct mt7615_hw_scan_req {
 	u8 seq_num;
 	u8 bss_idx;
@@ -365,11 +373,38 @@ struct mt7615_hw_scan_done {
 	__le32 beacon_5g_num;
 } __packed;
 
+struct mt7615_sched_scan_req {
+	u8 version;
+	u8 seq_num;
+	u8 stop_on_match;
+	u8 ssids_num;
+	u8 match_num;
+	u8 pad;
+	__le16 ie_len;
+	struct mt7615_mcu_scan_ssid ssids[MT7615_MAX_SCHED_SCAN_SSID];
+	struct mt7615_mcu_scan_match match[MT7615_MAX_SCAN_MATCH];
+	u8 channel_type;
+	u8 channels_num;
+	u8 intervals_num;
+	u8 scan_func;
+	struct mt7615_mcu_scan_channel channels[64];
+	__le16 intervals[MT7615_MAX_SCHED_SCAN_INTERVAL];
+	u8 pad2[64];
+} __packed;
+
+struct nt7615_sched_scan_done {
+	u8 seq_num;
+	u8 status; /* 0: ssid found */
+	__le16 pad;
+} __packed;
+
 /* offload mcu commands */
 enum {
 	MCU_CMD_START_HW_SCAN = MCU_CE_PREFIX | 0x03,
 	MCU_CMD_SET_CHAN_DOMAIN = MCU_CE_PREFIX | 0x0f,
 	MCU_CMD_CANCEL_HW_SCAN = MCU_CE_PREFIX | 0x1b,
+	MCU_CMD_SCHED_SCAN_ENABLE = MCU_CE_PREFIX | 0x61,
+	MCU_CMD_SCHED_SCAN_REQ = MCU_CE_PREFIX | 0x62,
 };
 
 #define MCU_CMD_ACK		BIT(0)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index c2faa9ef654d..9206157e255c 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -59,6 +59,9 @@
 #define MT7615_CFEND_RATE_11B		0x03 /* 11B LP, 11M */
 
 #define MT7615_SCAN_IE_LEN		600
+#define MT7615_MAX_SCHED_SCAN_INTERVAL	10
+#define MT7615_MAX_SCHED_SCAN_SSID	10
+#define MT7615_MAX_SCAN_MATCH		16
 
 struct mt7615_vif;
 struct mt7615_sta;
@@ -155,6 +158,7 @@ struct mt7615_phy {
 
 	struct mib_stats mib;
 
+	struct sk_buff_head scan_event_list;
 	struct delayed_work scan_work;
 };
 
@@ -412,6 +416,12 @@ int mt7615_mcu_hw_scan(struct mt7615_phy *phy, struct ieee80211_vif *vif,
 		       struct ieee80211_scan_request *scan_req);
 int mt7615_mcu_cancel_hw_scan(struct mt7615_phy *phy,
 			      struct ieee80211_vif *vif);
+int mt7615_mcu_sched_scan_req(struct mt7615_phy *phy,
+			      struct ieee80211_vif *vif,
+			      struct cfg80211_sched_scan_request *sreq);
+int mt7615_mcu_sched_scan_enable(struct mt7615_phy *phy,
+				 struct ieee80211_vif *vif,
+				 bool enable);
 
 int mt7615_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr,
 			  enum mt76_txq_id qid, struct mt76_wcid *wcid,
-- 
cgit v1.2.3-59-g8ed1b


From bb366c5b88be7b96ba4ec09c9de3ca649c89c9d8 Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Thu, 2 Apr 2020 15:06:40 +0200
Subject: mt76: mt7615: introduce BSS absence event

Introduce BSS absence event that is reported when the fw
is leaving or entering current operational channel.

Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Co-developed-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c | 24 ++++++++++++++++++++++++
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.h |  8 ++++++++
 2 files changed, 32 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 193808cca905..5385d16e435d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -339,6 +339,26 @@ mt7615_mcu_scan_event(struct mt7615_dev *dev, struct sk_buff *skb)
 				     MT7615_HW_SCAN_TIMEOUT);
 }
 
+static void
+mt7615_mcu_bss_event(struct mt7615_dev *dev, struct sk_buff *skb)
+{
+	struct mt7615_mcu_bss_event *event;
+	struct mt76_phy *mphy;
+
+	event = (struct mt7615_mcu_bss_event *)(skb->data +
+						sizeof(struct mt7615_mcu_rxd));
+
+	if (event->bss_idx && dev->mt76.phy2)
+		mphy = dev->mt76.phy2;
+	else
+		mphy = &dev->mt76.phy;
+
+	if (event->is_absent)
+		ieee80211_stop_queues(mphy->hw);
+	else
+		ieee80211_wake_queues(mphy->hw);
+}
+
 static void
 mt7615_mcu_rx_unsolicited_event(struct mt7615_dev *dev, struct sk_buff *skb)
 {
@@ -352,6 +372,9 @@ mt7615_mcu_rx_unsolicited_event(struct mt7615_dev *dev, struct sk_buff *skb)
 	case MCU_EVENT_SCAN_DONE:
 		mt7615_mcu_scan_event(dev, skb);
 		return;
+	case MCU_EVENT_BSS_ABSENCE:
+		mt7615_mcu_bss_event(dev, skb);
+		break;
 	default:
 		break;
 	}
@@ -367,6 +390,7 @@ void mt7615_mcu_rx_event(struct mt7615_dev *dev, struct sk_buff *skb)
 	    rxd->ext_eid == MCU_EXT_EVENT_ASSERT_DUMP ||
 	    rxd->ext_eid == MCU_EXT_EVENT_PS_SYNC ||
 	    rxd->eid == MCU_EVENT_SCHED_SCAN_DONE ||
+	    rxd->eid == MCU_EVENT_BSS_ABSENCE ||
 	    rxd->eid == MCU_EVENT_SCAN_DONE ||
 	    !rxd->seq)
 		mt7615_mcu_rx_unsolicited_event(dev, skb);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
index 7e55da2b6573..e6a927fd209f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
@@ -82,6 +82,7 @@ enum {
 	MCU_EVENT_ACCESS_REG = 0x02,
 	MCU_EVENT_MT_PATCH_SEM = 0x04,
 	MCU_EVENT_SCAN_DONE = 0x0d,
+	MCU_EVENT_BSS_ABSENCE  = 0x11,
 	MCU_EVENT_CH_PRIVILEGE = 0x18,
 	MCU_EVENT_SCHED_SCAN_DONE = 0x23,
 	MCU_EVENT_EXT = 0xed,
@@ -398,6 +399,13 @@ struct nt7615_sched_scan_done {
 	__le16 pad;
 } __packed;
 
+struct mt7615_mcu_bss_event {
+	u8 bss_idx;
+	u8 is_absent;
+	u8 free_quota;
+	u8 pad;
+} __packed;
+
 /* offload mcu commands */
 enum {
 	MCU_CMD_START_HW_SCAN = MCU_CE_PREFIX | 0x03,
-- 
cgit v1.2.3-59-g8ed1b


From 5d3a4a4b57aa557d9d48bf2133fbd99f3bf1f354 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 2 Apr 2020 15:06:41 +0200
Subject: mt76: mt7615: introduce rlm tlv in bss_info mcu command

Introduce rlm tlv header in bss_info mcu command in order to
inform the mcu about operating channel. Rlm header is necessary only if
the mcu is running low power functionalities (e.g offloaded scan)

Co-developed-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/main.c   |   6 +-
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c    | 111 ++++++++++++++++++---
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.h    |   1 +
 drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h |   4 +-
 4 files changed, 104 insertions(+), 18 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
index 2c2d763b667c..c91f0896b09f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
@@ -410,15 +410,15 @@ static void mt7615_bss_info_changed(struct ieee80211_hw *hw,
 				    u32 changed)
 {
 	struct mt7615_dev *dev = mt7615_hw_dev(hw);
+	struct mt7615_phy *phy = mt7615_hw_phy(hw);
 
 	mutex_lock(&dev->mt76.mutex);
 
 	if (changed & BSS_CHANGED_ASSOC)
-		mt7615_mcu_add_bss_info(dev, vif, info->assoc);
+		mt7615_mcu_add_bss_info(phy, vif, info->assoc);
 
 	if (changed & BSS_CHANGED_ERP_SLOT) {
 		int slottime = info->use_short_slot ? 9 : 20;
-		struct mt7615_phy *phy = mt7615_hw_phy(hw);
 
 		if (slottime != phy->slottime) {
 			phy->slottime = slottime;
@@ -427,7 +427,7 @@ static void mt7615_bss_info_changed(struct ieee80211_hw *hw,
 	}
 
 	if (changed & BSS_CHANGED_BEACON_ENABLED) {
-		mt7615_mcu_add_bss_info(dev, vif, info->enable_beacon);
+		mt7615_mcu_add_bss_info(phy, vif, info->enable_beacon);
 		mt7615_mcu_sta_add(dev, vif, NULL, info->enable_beacon);
 	}
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 5385d16e435d..9bb65de0cc64 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -967,10 +967,11 @@ mt7615_mcu_wtbl_ht_tlv(struct sk_buff *skb, struct ieee80211_sta *sta,
 }
 
 static int
-mt7615_mcu_add_bss(struct mt7615_dev *dev, struct ieee80211_vif *vif,
+mt7615_mcu_add_bss(struct mt7615_phy *phy, struct ieee80211_vif *vif,
 		   bool enable)
 {
 	struct mt7615_vif *mvif = (struct mt7615_vif *)vif->drv_priv;
+	struct mt7615_dev *dev = phy->dev;
 	struct sk_buff *skb;
 
 	skb = mt7615_mcu_alloc_sta_req(mvif, NULL);
@@ -1244,12 +1245,15 @@ mt7615_mcu_uni_ctrl_pm_state(struct mt7615_dev *dev, int band, int state)
 }
 
 static int
-mt7615_mcu_uni_add_bss(struct mt7615_dev *dev,
+mt7615_mcu_uni_add_bss(struct mt7615_phy *phy,
 		       struct ieee80211_vif *vif, bool enable)
 {
 	struct mt7615_vif *mvif = (struct mt7615_vif *)vif->drv_priv;
+	struct cfg80211_chan_def *chandef = &phy->mt76->chandef;
+	int freq1 = chandef->center_freq1, freq2 = chandef->center_freq2;
+	struct mt7615_dev *dev = phy->dev;
 	struct {
-		struct req_hdr {
+		struct {
 			u8 bss_idx;
 			u8 pad[3];
 		} __packed hdr;
@@ -1267,12 +1271,18 @@ mt7615_mcu_uni_add_bss(struct mt7615_dev *dev,
 			__le16 bmc_tx_wlan_idx;
 			__le16 bcn_interval;
 			u8 dtim_period;
-			u8 phymode;
+			u8 phymode; /* bit(0): A
+				     * bit(1): B
+				     * bit(2): G
+				     * bit(3): GN
+				     * bit(4): AN
+				     * bit(5): AC
+				     */
 			__le16 sta_idx;
 			u8 nonht_basic_phy;
 			u8 pad[3];
 		} __packed basic;
-	} req = {
+	} basic_req = {
 		.hdr = {
 			.bss_idx = mvif->idx,
 		},
@@ -1285,17 +1295,53 @@ mt7615_mcu_uni_add_bss(struct mt7615_dev *dev,
 			.band_idx = mvif->band_idx,
 			.wmm_idx = mvif->wmm_idx,
 			.active = enable,
+			.phymode = 0x38,
+		},
+	};
+	struct {
+		struct {
+			u8 bss_idx;
+			u8 pad[3];
+		} __packed hdr;
+		struct rlm_tlv {
+			__le16 tag;
+			__le16 len;
+			u8 control_channel;
+			u8 center_chan;
+			u8 center_chan2;
+			u8 bw;
+			u8 tx_streams;
+			u8 rx_streams;
+			u8 short_st;
+			u8 ht_op_info;
+			u8 sco;
+			u8 pad[3];
+		} __packed rlm;
+	} __packed rlm_req = {
+		.hdr = {
+			.bss_idx = mvif->idx,
+		},
+		.rlm = {
+			.tag = cpu_to_le16(UNI_BSS_INFO_RLM),
+			.len = cpu_to_le16(sizeof(struct rlm_tlv)),
+			.control_channel = chandef->chan->hw_value,
+			.center_chan = ieee80211_frequency_to_channel(freq1),
+			.center_chan2 = ieee80211_frequency_to_channel(freq2),
+			.tx_streams = hweight8(phy->mt76->antenna_mask),
+			.rx_streams = phy->chainmask,
+			.short_st = true,
 		},
 	};
 	u8 idx, tx_wlan_idx = 0;
+	int err;
 
 	idx = mvif->omac_idx > EXT_BSSID_START ? HW_BSSID_0 : mvif->omac_idx;
-	req.basic.hw_bss_idx = idx;
+	basic_req.basic.hw_bss_idx = idx;
 
 	switch (vif->type) {
 	case NL80211_IFTYPE_MESH_POINT:
 	case NL80211_IFTYPE_AP:
-		req.basic.conn_type = cpu_to_le32(CONNECTION_INFRA_AP);
+		basic_req.basic.conn_type = cpu_to_le32(CONNECTION_INFRA_AP);
 		tx_wlan_idx = mvif->sta.wcid.idx;
 		break;
 	case NL80211_IFTYPE_STATION:
@@ -1314,20 +1360,59 @@ mt7615_mcu_uni_add_bss(struct mt7615_dev *dev,
 			tx_wlan_idx = msta->wcid.idx;
 			rcu_read_unlock();
 		}
-		req.basic.conn_type = cpu_to_le32(CONNECTION_INFRA_STA);
+		basic_req.basic.conn_type = cpu_to_le32(CONNECTION_INFRA_STA);
 		break;
 	default:
 		WARN_ON(1);
 		break;
 	}
 
-	memcpy(req.basic.bssid, vif->bss_conf.bssid, ETH_ALEN);
-	req.basic.bmc_tx_wlan_idx = cpu_to_le16(tx_wlan_idx);
-	req.basic.sta_idx = cpu_to_le16(tx_wlan_idx);
-	req.basic.conn_state = !enable;
+	memcpy(basic_req.basic.bssid, vif->bss_conf.bssid, ETH_ALEN);
+	basic_req.basic.bmc_tx_wlan_idx = cpu_to_le16(tx_wlan_idx);
+	basic_req.basic.sta_idx = cpu_to_le16(tx_wlan_idx);
+	basic_req.basic.conn_state = !enable;
+
+	err = __mt76_mcu_send_msg(&dev->mt76, MCU_UNI_CMD_BSS_INFO_UPDATE,
+				  &basic_req, sizeof(basic_req), true);
+	if (err < 0)
+		return err;
+
+	if (!mt7615_firmware_offload(dev))
+		return 0;
+
+	switch (chandef->width) {
+	case NL80211_CHAN_WIDTH_40:
+		rlm_req.rlm.bw = CMD_CBW_40MHZ;
+		break;
+	case NL80211_CHAN_WIDTH_80:
+		rlm_req.rlm.bw = CMD_CBW_80MHZ;
+		break;
+	case NL80211_CHAN_WIDTH_80P80:
+		rlm_req.rlm.bw = CMD_CBW_8080MHZ;
+		break;
+	case NL80211_CHAN_WIDTH_160:
+		rlm_req.rlm.bw = CMD_CBW_160MHZ;
+		break;
+	case NL80211_CHAN_WIDTH_5:
+		rlm_req.rlm.bw = CMD_CBW_5MHZ;
+		break;
+	case NL80211_CHAN_WIDTH_10:
+		rlm_req.rlm.bw = CMD_CBW_10MHZ;
+		break;
+	case NL80211_CHAN_WIDTH_20_NOHT:
+	case NL80211_CHAN_WIDTH_20:
+	default:
+		rlm_req.rlm.bw = CMD_CBW_20MHZ;
+		break;
+	}
+
+	if (rlm_req.rlm.control_channel < rlm_req.rlm.center_chan)
+		rlm_req.rlm.sco = 1; /* SCA */
+	else if (rlm_req.rlm.control_channel > rlm_req.rlm.center_chan)
+		rlm_req.rlm.sco = 3; /* SCB */
 
 	return __mt76_mcu_send_msg(&dev->mt76, MCU_UNI_CMD_BSS_INFO_UPDATE,
-				   &req, sizeof(req), true);
+				   &rlm_req, sizeof(rlm_req), true);
 }
 
 static int
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
index e6a927fd209f..43c13a47cea2 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
@@ -423,6 +423,7 @@ enum {
 
 enum {
 	UNI_BSS_INFO_BASIC = 0,
+	UNI_BSS_INFO_RLM = 2,
 	UNI_BSS_INFO_BCN_CONTENT = 7,
 };
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index 9206157e255c..a0ddb35b5e26 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -166,7 +166,7 @@ struct mt7615_phy {
 #define mt7615_mcu_add_rx_ba(dev, ...)	(dev)->mcu_ops->add_rx_ba((dev), __VA_ARGS__)
 #define mt7615_mcu_sta_add(dev, ...)	(dev)->mcu_ops->sta_add((dev),  __VA_ARGS__)
 #define mt7615_mcu_add_dev_info(dev, ...) (dev)->mcu_ops->add_dev_info((dev),  __VA_ARGS__)
-#define mt7615_mcu_add_bss_info(dev, ...) (dev)->mcu_ops->add_bss_info((dev),  __VA_ARGS__)
+#define mt7615_mcu_add_bss_info(phy, ...) (phy->dev)->mcu_ops->add_bss_info((phy),  __VA_ARGS__)
 #define mt7615_mcu_add_beacon(dev, ...)	(dev)->mcu_ops->add_beacon_offload((dev),  __VA_ARGS__)
 #define mt7615_mcu_set_pm(dev, ...)	(dev)->mcu_ops->set_pm_state((dev),  __VA_ARGS__)
 struct mt7615_mcu_ops {
@@ -181,7 +181,7 @@ struct mt7615_mcu_ops {
 		       struct ieee80211_sta *sta, bool enable);
 	int (*add_dev_info)(struct mt7615_dev *dev,
 			    struct ieee80211_vif *vif, bool enable);
-	int (*add_bss_info)(struct mt7615_dev *dev, struct ieee80211_vif *vif,
+	int (*add_bss_info)(struct mt7615_phy *phy, struct ieee80211_vif *vif,
 			    bool enable);
 	int (*add_beacon_offload)(struct mt7615_dev *dev,
 				  struct ieee80211_hw *hw,
-- 
cgit v1.2.3-59-g8ed1b


From 34cdf1a618eb071a62362eb79428835167630531 Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Thu, 2 Apr 2020 15:06:42 +0200
Subject: mt76: mt7615: remove unnecessary register operations

Remove mt76_wr(dev, MT_CSR(0x010), 0x8208) that would cause
MT_PCIE_IRQ_ENABLE to be disabled; MT_PCIE_IRQ_ENABLE should always keep
on enabled when the driver is running.

0x44064 is a not existing address

Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/init.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
index f4775945e3d7..be0f3a77e22f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
@@ -117,8 +117,6 @@ static void mt7615_mac_init(struct mt7615_dev *dev)
 		FIELD_PREP(MT_DMA_DCR0_MAX_RX_LEN, 3072) |
 		MT_DMA_DCR0_RX_VEC_DROP);
 	if (is_mt7663(&dev->mt76)) {
-		mt76_wr(dev, MT_CSR(0x010), 0x8208);
-		mt76_wr(dev, 0x44064, 0x2000000);
 		mt76_wr(dev, MT_WF_AGG(0x160), 0x5c341c02);
 		mt76_wr(dev, MT_WF_AGG(0x164), 0x70708040);
 	} else {
-- 
cgit v1.2.3-59-g8ed1b


From bb31a80eb2b36cccf5d72b616381aff650ac3961 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 2 Apr 2020 20:18:48 +0200
Subject: mt76: add headroom and tailroom to mt76_mcu_ops data structure

Introduce headroom and tailroom to mt76_mcu_ops data structure in order
to unify the routine used for mcu message allocation. This is a
preliminary patch to add mt7663u support

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mcu.c           |  9 ++--
 drivers/net/wireless/mediatek/mt76/mt76.h          |  7 ++-
 drivers/net/wireless/mediatek/mt76/mt7603/mcu.c    |  3 +-
 drivers/net/wireless/mediatek/mt76/mt7603/mcu.h    |  7 ---
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c    | 63 ++++++++++++----------
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.h    |  7 ---
 drivers/net/wireless/mediatek/mt76/mt76x02_mcu.c   |  2 +-
 drivers/net/wireless/mediatek/mt76/mt76x02_mcu.h   |  6 ---
 .../net/wireless/mediatek/mt76/mt76x02_usb_mcu.c   |  4 +-
 9 files changed, 51 insertions(+), 57 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mcu.c b/drivers/net/wireless/mediatek/mt76/mcu.c
index 4048f446e3ee..ade61a5334c6 100644
--- a/drivers/net/wireless/mediatek/mt76/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mcu.c
@@ -6,10 +6,11 @@
 #include "mt76.h"
 
 struct sk_buff *
-mt76_mcu_msg_alloc(const void *data, int head_len,
-		   int data_len, int tail_len)
+mt76_mcu_msg_alloc(struct mt76_dev *dev, const void *data,
+		   int data_len)
 {
-	int length = head_len + data_len + tail_len;
+	const struct mt76_mcu_ops *ops = dev->mcu_ops;
+	int length = ops->headroom + data_len + ops->tailroom;
 	struct sk_buff *skb;
 
 	skb = alloc_skb(length, GFP_KERNEL);
@@ -17,7 +18,7 @@ mt76_mcu_msg_alloc(const void *data, int head_len,
 		return NULL;
 
 	memset(skb->head, 0, length);
-	skb_reserve(skb, head_len);
+	skb_reserve(skb, ops->headroom);
 
 	if (data && data_len)
 		skb_put_data(skb, data, data_len);
diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index ca7475a95356..e31d98a4f88f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -137,6 +137,9 @@ struct mt76_sw_queue {
 };
 
 struct mt76_mcu_ops {
+	u32 headroom;
+	u32 tailroom;
+
 	int (*mcu_send_msg)(struct mt76_dev *dev, int cmd, const void *data,
 			    int len, bool wait_resp);
 	int (*mcu_skb_send_msg)(struct mt76_dev *dev, struct sk_buff *skb,
@@ -914,8 +917,8 @@ int mt76u_resume_rx(struct mt76_dev *dev);
 void mt76u_queues_deinit(struct mt76_dev *dev);
 
 struct sk_buff *
-mt76_mcu_msg_alloc(const void *data, int head_len,
-		   int data_len, int tail_len);
+mt76_mcu_msg_alloc(struct mt76_dev *dev, const void *data,
+		   int data_len);
 void mt76_mcu_rx_event(struct mt76_dev *dev, struct sk_buff *skb);
 struct sk_buff *mt76_mcu_get_response(struct mt76_dev *dev,
 				      unsigned long expires);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7603/mcu.c
index 77985d81c447..a47a3a644ecc 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/mcu.c
@@ -62,7 +62,7 @@ mt7603_mcu_msg_send(struct mt76_dev *mdev, int cmd, const void *data,
 	struct sk_buff *skb;
 	int ret, seq;
 
-	skb = mt7603_mcu_msg_alloc(data, len);
+	skb = mt76_mcu_msg_alloc(mdev, data, len);
 	if (!skb)
 		return -ENOMEM;
 
@@ -265,6 +265,7 @@ out:
 int mt7603_mcu_init(struct mt7603_dev *dev)
 {
 	static const struct mt76_mcu_ops mt7603_mcu_ops = {
+		.headroom = sizeof(struct mt7603_mcu_txd),
 		.mcu_send_msg = mt7603_mcu_msg_send,
 		.mcu_restart = mt7603_mcu_restart,
 	};
diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7603/mcu.h
index 1bba369d5c8a..30df8a3fd11a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/mcu.h
@@ -100,11 +100,4 @@ enum {
 	MCU_EXT_EVENT_BCN_UPDATE = 0x31,
 };
 
-static inline struct sk_buff *
-mt7603_mcu_msg_alloc(const void *data, int len)
-{
-	return mt76_mcu_msg_alloc(data, sizeof(struct mt7603_mcu_txd),
-				  len, 0);
-}
-
 #endif
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 9bb65de0cc64..12759158e69a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -244,7 +244,7 @@ mt7615_mcu_msg_send(struct mt76_dev *mdev, int cmd, const void *data,
 {
 	struct sk_buff *skb;
 
-	skb = mt7615_mcu_msg_alloc(data, len);
+	skb = mt76_mcu_msg_alloc(mdev, data, len);
 	if (!skb)
 		return -ENOMEM;
 
@@ -552,7 +552,8 @@ mt7615_mcu_ctrl_pm_state(struct mt7615_dev *dev, int band, int state)
 }
 
 static struct sk_buff *
-mt7615_mcu_alloc_sta_req(struct mt7615_vif *mvif, struct mt7615_sta *msta)
+mt7615_mcu_alloc_sta_req(struct mt7615_dev *dev, struct mt7615_vif *mvif,
+			 struct mt7615_sta *msta)
 {
 	struct sta_req_hdr hdr = {
 		.bss_idx = mvif->idx,
@@ -562,7 +563,7 @@ mt7615_mcu_alloc_sta_req(struct mt7615_vif *mvif, struct mt7615_sta *msta)
 	};
 	struct sk_buff *skb;
 
-	skb = mt7615_mcu_msg_alloc(NULL, MT7615_STA_UPDATE_MAX_SIZE);
+	skb = mt76_mcu_msg_alloc(&dev->mt76, NULL, MT7615_STA_UPDATE_MAX_SIZE);
 	if (!skb)
 		return ERR_PTR(-ENOMEM);
 
@@ -572,8 +573,8 @@ mt7615_mcu_alloc_sta_req(struct mt7615_vif *mvif, struct mt7615_sta *msta)
 }
 
 static struct wtbl_req_hdr *
-mt7615_mcu_alloc_wtbl_req(struct mt7615_sta *msta, int cmd,
-			  void *sta_wtbl, struct sk_buff **skb)
+mt7615_mcu_alloc_wtbl_req(struct mt7615_dev *dev, struct mt7615_sta *msta,
+			  int cmd, void *sta_wtbl, struct sk_buff **skb)
 {
 	struct tlv *sta_hdr = sta_wtbl;
 	struct wtbl_req_hdr hdr = {
@@ -583,7 +584,8 @@ mt7615_mcu_alloc_wtbl_req(struct mt7615_sta *msta, int cmd,
 	struct sk_buff *nskb = *skb;
 
 	if (!nskb) {
-		nskb = mt7615_mcu_msg_alloc(NULL, MT7615_WTBL_UPDATE_BA_SIZE);
+		nskb = mt76_mcu_msg_alloc(&dev->mt76, NULL,
+					  MT7615_WTBL_UPDATE_BA_SIZE);
 		if (!nskb)
 			return ERR_PTR(-ENOMEM);
 
@@ -974,7 +976,7 @@ mt7615_mcu_add_bss(struct mt7615_phy *phy, struct ieee80211_vif *vif,
 	struct mt7615_dev *dev = phy->dev;
 	struct sk_buff *skb;
 
-	skb = mt7615_mcu_alloc_sta_req(mvif, NULL);
+	skb = mt7615_mcu_alloc_sta_req(dev, mvif, NULL);
 	if (IS_ERR(skb))
 		return PTR_ERR(skb);
 
@@ -1001,7 +1003,7 @@ mt7615_mcu_wtbl_tx_ba(struct mt7615_dev *dev,
 	struct sk_buff *skb = NULL;
 	int err;
 
-	wtbl_hdr = mt7615_mcu_alloc_wtbl_req(msta, WTBL_SET, NULL, &skb);
+	wtbl_hdr = mt7615_mcu_alloc_wtbl_req(dev, msta, WTBL_SET, NULL, &skb);
 	if (IS_ERR(wtbl_hdr))
 		return PTR_ERR(wtbl_hdr);
 
@@ -1012,7 +1014,7 @@ mt7615_mcu_wtbl_tx_ba(struct mt7615_dev *dev,
 	if (err < 0)
 		return err;
 
-	skb = mt7615_mcu_alloc_sta_req(mvif, msta);
+	skb = mt7615_mcu_alloc_sta_req(dev, mvif, msta);
 	if (IS_ERR(skb))
 		return PTR_ERR(skb);
 
@@ -1033,7 +1035,7 @@ mt7615_mcu_wtbl_rx_ba(struct mt7615_dev *dev,
 	struct sk_buff *skb;
 	int err;
 
-	skb = mt7615_mcu_alloc_sta_req(mvif, msta);
+	skb = mt7615_mcu_alloc_sta_req(dev, mvif, msta);
 	if (IS_ERR(skb))
 		return PTR_ERR(skb);
 
@@ -1045,7 +1047,7 @@ mt7615_mcu_wtbl_rx_ba(struct mt7615_dev *dev,
 		return err;
 
 	skb = NULL;
-	wtbl_hdr = mt7615_mcu_alloc_wtbl_req(msta, WTBL_SET, NULL, &skb);
+	wtbl_hdr = mt7615_mcu_alloc_wtbl_req(dev, msta, WTBL_SET, NULL, &skb);
 	if (IS_ERR(wtbl_hdr))
 		return PTR_ERR(wtbl_hdr);
 
@@ -1067,7 +1069,7 @@ mt7615_mcu_wtbl_sta_add(struct mt7615_dev *dev, struct ieee80211_vif *vif,
 
 	msta = sta ? (struct mt7615_sta *)sta->drv_priv : &mvif->sta;
 
-	sskb = mt7615_mcu_alloc_sta_req(mvif, msta);
+	sskb = mt7615_mcu_alloc_sta_req(dev, mvif, msta);
 	if (IS_ERR(sskb))
 		return PTR_ERR(sskb);
 
@@ -1075,8 +1077,8 @@ mt7615_mcu_wtbl_sta_add(struct mt7615_dev *dev, struct ieee80211_vif *vif,
 	if (enable && sta)
 		mt7615_mcu_sta_ht_tlv(sskb, sta);
 
-	wtbl_hdr = mt7615_mcu_alloc_wtbl_req(msta, WTBL_RESET_AND_SET, NULL,
-					     &wskb);
+	wtbl_hdr = mt7615_mcu_alloc_wtbl_req(dev, msta, WTBL_RESET_AND_SET,
+					     NULL, &wskb);
 	if (IS_ERR(wtbl_hdr))
 		return PTR_ERR(wtbl_hdr);
 
@@ -1120,7 +1122,7 @@ mt7615_mcu_sta_ba(struct mt7615_dev *dev,
 	struct tlv *sta_wtbl;
 	struct sk_buff *skb;
 
-	skb = mt7615_mcu_alloc_sta_req(mvif, msta);
+	skb = mt7615_mcu_alloc_sta_req(dev, mvif, msta);
 	if (IS_ERR(skb))
 		return PTR_ERR(skb);
 
@@ -1128,7 +1130,8 @@ mt7615_mcu_sta_ba(struct mt7615_dev *dev,
 
 	sta_wtbl = mt7615_mcu_add_tlv(skb, STA_REC_WTBL, sizeof(struct tlv));
 
-	wtbl_hdr = mt7615_mcu_alloc_wtbl_req(msta, WTBL_SET, sta_wtbl, &skb);
+	wtbl_hdr = mt7615_mcu_alloc_wtbl_req(dev, msta, WTBL_SET, sta_wtbl,
+					     &skb);
 	mt7615_mcu_wtbl_ba_tlv(skb, params, enable, tx, sta_wtbl, wtbl_hdr);
 
 	return __mt76_mcu_skb_send_msg(&dev->mt76, skb,
@@ -1163,7 +1166,7 @@ mt7615_mcu_add_sta_cmd(struct mt7615_dev *dev, struct ieee80211_vif *vif,
 
 	msta = sta ? (struct mt7615_sta *)sta->drv_priv : &mvif->sta;
 
-	skb = mt7615_mcu_alloc_sta_req(mvif, msta);
+	skb = mt7615_mcu_alloc_sta_req(dev, mvif, msta);
 	if (IS_ERR(skb))
 		return PTR_ERR(skb);
 
@@ -1173,7 +1176,7 @@ mt7615_mcu_add_sta_cmd(struct mt7615_dev *dev, struct ieee80211_vif *vif,
 
 	sta_wtbl = mt7615_mcu_add_tlv(skb, STA_REC_WTBL, sizeof(struct tlv));
 
-	wtbl_hdr = mt7615_mcu_alloc_wtbl_req(msta, WTBL_RESET_AND_SET,
+	wtbl_hdr = mt7615_mcu_alloc_wtbl_req(dev, msta, WTBL_RESET_AND_SET,
 					     sta_wtbl, &skb);
 	if (enable) {
 		mt7615_mcu_wtbl_generic_tlv(skb, vif, sta, sta_wtbl, wtbl_hdr);
@@ -1499,13 +1502,14 @@ mt7615_mcu_uni_tx_ba(struct mt7615_dev *dev,
 	struct sk_buff *skb;
 	int err;
 
-	skb = mt7615_mcu_alloc_sta_req(mvif, msta);
+	skb = mt7615_mcu_alloc_sta_req(dev, mvif, msta);
 	if (IS_ERR(skb))
 		return PTR_ERR(skb);
 
 	sta_wtbl = mt7615_mcu_add_tlv(skb, STA_REC_WTBL, sizeof(struct tlv));
 
-	wtbl_hdr = mt7615_mcu_alloc_wtbl_req(msta, WTBL_SET, sta_wtbl, &skb);
+	wtbl_hdr = mt7615_mcu_alloc_wtbl_req(dev, msta, WTBL_SET, sta_wtbl,
+					     &skb);
 	if (IS_ERR(wtbl_hdr))
 		return PTR_ERR(wtbl_hdr);
 
@@ -1517,7 +1521,7 @@ mt7615_mcu_uni_tx_ba(struct mt7615_dev *dev,
 	if (err < 0)
 		return err;
 
-	skb = mt7615_mcu_alloc_sta_req(mvif, msta);
+	skb = mt7615_mcu_alloc_sta_req(dev, mvif, msta);
 	if (IS_ERR(skb))
 		return PTR_ERR(skb);
 
@@ -1539,7 +1543,7 @@ mt7615_mcu_uni_rx_ba(struct mt7615_dev *dev,
 	struct sk_buff *skb;
 	int err;
 
-	skb = mt7615_mcu_alloc_sta_req(mvif, msta);
+	skb = mt7615_mcu_alloc_sta_req(dev, mvif, msta);
 	if (IS_ERR(skb))
 		return PTR_ERR(skb);
 
@@ -1550,13 +1554,14 @@ mt7615_mcu_uni_rx_ba(struct mt7615_dev *dev,
 	if (err < 0 || !enable)
 		return err;
 
-	skb = mt7615_mcu_alloc_sta_req(mvif, msta);
+	skb = mt7615_mcu_alloc_sta_req(dev, mvif, msta);
 	if (IS_ERR(skb))
 		return PTR_ERR(skb);
 
 	sta_wtbl = mt7615_mcu_add_tlv(skb, STA_REC_WTBL, sizeof(struct tlv));
 
-	wtbl_hdr = mt7615_mcu_alloc_wtbl_req(msta, WTBL_SET, sta_wtbl, &skb);
+	wtbl_hdr = mt7615_mcu_alloc_wtbl_req(dev, msta, WTBL_SET, sta_wtbl,
+					     &skb);
 	if (IS_ERR(wtbl_hdr))
 		return PTR_ERR(wtbl_hdr);
 
@@ -2114,6 +2119,7 @@ static int mt7663_load_firmware(struct mt7615_dev *dev)
 int mt7615_mcu_init(struct mt7615_dev *dev)
 {
 	static const struct mt76_mcu_ops mt7615_mcu_ops = {
+		.headroom = sizeof(struct mt7615_mcu_txd),
 		.mcu_skb_send_msg = mt7615_mcu_send_message,
 		.mcu_send_msg = mt7615_mcu_msg_send,
 		.mcu_restart = mt7615_mcu_restart,
@@ -2186,7 +2192,7 @@ int mt7615_mcu_set_eeprom(struct mt7615_dev *dev)
 
 	req_hdr.len = cpu_to_le16(eep_len);
 
-	skb = mt7615_mcu_msg_alloc(NULL, sizeof(req_hdr) + eep_len);
+	skb = mt76_mcu_msg_alloc(&dev->mt76, NULL, sizeof(req_hdr) + eep_len);
 	if (!skb)
 		return -ENOMEM;
 
@@ -2601,7 +2607,7 @@ int mt7615_mcu_set_channel_domain(struct mt7615_phy *phy)
 	if (!mt7615_firmware_offload(dev))
 		return 0;
 
-	skb = mt7615_mcu_msg_alloc(NULL, len);
+	skb = mt76_mcu_msg_alloc(&dev->mt76, NULL, len);
 	if (!skb)
 		return -ENOMEM;
 
@@ -2646,7 +2652,7 @@ int mt7615_mcu_hw_scan(struct mt7615_phy *phy, struct ieee80211_vif *vif,
 	if (!mt7615_firmware_offload(dev))
 		return 1;
 
-	skb = mt7615_mcu_msg_alloc(NULL, sizeof(*req));
+	skb = mt76_mcu_msg_alloc(&dev->mt76, NULL, sizeof(*req));
 	if (!skb)
 		return -ENOMEM;
 
@@ -2746,7 +2752,8 @@ int mt7615_mcu_sched_scan_req(struct mt7615_phy *phy,
 	if (!mt7615_firmware_offload(dev))
 		return -ENOTSUPP;
 
-	skb = mt7615_mcu_msg_alloc(NULL, sizeof(*req) + sreq->ie_len);
+	skb = mt76_mcu_msg_alloc(&dev->mt76, NULL,
+				 sizeof(*req) + sreq->ie_len);
 	if (!skb)
 		return -ENOMEM;
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
index 43c13a47cea2..69cb68d6465d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
@@ -845,11 +845,4 @@ enum {
 	CH_SWITCH_SCAN_BYPASS_DPD = 9
 };
 
-static inline struct sk_buff *
-mt7615_mcu_msg_alloc(const void *data, int len)
-{
-	return mt76_mcu_msg_alloc(data, sizeof(struct mt7615_mcu_txd),
-				  len, 0);
-}
-
 #endif
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mcu.c
index 8247611d9b18..89a8992d84fa 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mcu.c
@@ -23,7 +23,7 @@ int mt76x02_mcu_msg_send(struct mt76_dev *mdev, int cmd, const void *data,
 	if (mt76_is_mmio(&dev->mt76) && dev->mcu_timeout)
 		return -EIO;
 
-	skb = mt76x02_mcu_msg_alloc(data, len);
+	skb = mt76_mcu_msg_alloc(mdev, data, len);
 	if (!skb)
 		return -ENOMEM;
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mcu.h b/drivers/net/wireless/mediatek/mt76/mt76x02_mcu.h
index c81a9655c4c9..5fba1266c648 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mcu.h
@@ -85,12 +85,6 @@ struct mt76x02_patch_header {
 	u8 pad[2];
 };
 
-static inline struct sk_buff *
-mt76x02_mcu_msg_alloc(const void *data, int len)
-{
-	return mt76_mcu_msg_alloc(data, 0, len, 0);
-}
-
 int mt76x02_mcu_cleanup(struct mt76x02_dev *dev);
 int mt76x02_mcu_calibrate(struct mt76x02_dev *dev, int type, u32 param);
 int mt76x02_mcu_msg_send(struct mt76_dev *mdev, int cmd, const void *data,
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_usb_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76x02_usb_mcu.c
index 843b86560ed4..a30bb536fc8a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_usb_mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_usb_mcu.c
@@ -123,7 +123,7 @@ mt76x02u_mcu_send_msg(struct mt76_dev *dev, int cmd, const void *data,
 	struct sk_buff *skb;
 	int err;
 
-	skb = mt76_mcu_msg_alloc(data, MT_CMD_HDR_LEN, len, 8);
+	skb = mt76_mcu_msg_alloc(dev, data, len);
 	if (!skb)
 		return -ENOMEM;
 
@@ -291,6 +291,8 @@ EXPORT_SYMBOL_GPL(mt76x02u_mcu_fw_send_data);
 void mt76x02u_init_mcu(struct mt76_dev *dev)
 {
 	static const struct mt76_mcu_ops mt76x02u_mcu_ops = {
+		.headroom = MT_CMD_HDR_LEN,
+		.tailroom = 8,
 		.mcu_send_msg = mt76x02u_mcu_send_msg,
 		.mcu_wr_rp = mt76x02u_mcu_wr_rp,
 		.mcu_rd_rp = mt76x02u_mcu_rd_rp,
-- 
cgit v1.2.3-59-g8ed1b


From d4bf77bd749300123481a22df82b8ab618844153 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 2 Apr 2020 20:18:49 +0200
Subject: mt76: mt7615: introduce mt7663u support to mt7615_write_txwi

Extend mt7615_write_txwi routine to support usb txwi configuration

Co-developed-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c | 12 ++++++++----
 drivers/net/wireless/mediatek/mt76/mt7615/mac.h |  1 +
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index c9534fab2911..dafd47cc7f6a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -528,11 +528,12 @@ int mt7615_mac_write_txwi(struct mt7615_dev *dev, __le32 *txwi,
 	struct ieee80211_vif *vif = info->control.vif;
 	struct mt76_phy *mphy = &dev->mphy;
 	bool ext_phy = info->hw_queue & MT_TX_HW_QUEUE_EXT_PHY;
+	bool is_usb = mt76_is_usb(&dev->mt76);
 	int tx_count = 8;
 	u8 fc_type, fc_stype, p_fmt, q_idx, omac_idx = 0, wmm_idx = 0;
 	__le16 fc = hdr->frame_control;
+	u32 val, sz_txd = is_usb ? MT_USB_TXD_SIZE : MT_TXD_SIZE;
 	u16 seqno = 0;
-	u32 val;
 
 	if (vif) {
 		struct mt7615_vif *mvif = (struct mt7615_vif *)vif->drv_priv;
@@ -556,7 +557,7 @@ int mt7615_mac_write_txwi(struct mt7615_dev *dev, __le32 *txwi,
 	if (ieee80211_is_data(fc) || ieee80211_is_bufferable_mmpdu(fc)) {
 		q_idx = wmm_idx * MT7615_MAX_WMM_SETS +
 			skb_get_queue_mapping(skb);
-		p_fmt = MT_TX_TYPE_CT;
+		p_fmt = is_usb ? MT_TX_TYPE_SF : MT_TX_TYPE_CT;
 	} else if (beacon) {
 		if (ext_phy)
 			q_idx = MT_LMAC_BCN1;
@@ -568,10 +569,10 @@ int mt7615_mac_write_txwi(struct mt7615_dev *dev, __le32 *txwi,
 			q_idx = MT_LMAC_ALTX1;
 		else
 			q_idx = MT_LMAC_ALTX0;
-		p_fmt = MT_TX_TYPE_CT;
+		p_fmt = is_usb ? MT_TX_TYPE_SF : MT_TX_TYPE_CT;
 	}
 
-	val = FIELD_PREP(MT_TXD0_TX_BYTES, skb->len + MT_TXD_SIZE) |
+	val = FIELD_PREP(MT_TXD0_TX_BYTES, skb->len + sz_txd) |
 	      FIELD_PREP(MT_TXD0_P_IDX, MT_TX_PORT_IDX_LMAC) |
 	      FIELD_PREP(MT_TXD0_Q_IDX, q_idx);
 	txwi[0] = cpu_to_le32(val);
@@ -665,6 +666,9 @@ int mt7615_mac_write_txwi(struct mt7615_dev *dev, __le32 *txwi,
 
 	txwi[7] = FIELD_PREP(MT_TXD7_TYPE, fc_type) |
 		  FIELD_PREP(MT_TXD7_SUB_TYPE, fc_stype);
+	if (is_usb)
+		txwi[8] = FIELD_PREP(MT_TXD8_L_TYPE, fc_type) |
+			  FIELD_PREP(MT_TXD8_L_SUB_TYPE, fc_stype);
 
 	return 0;
 }
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.h b/drivers/net/wireless/mediatek/mt76/mt7615/mac.h
index e0b89257db90..422a140fd812 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.h
@@ -165,6 +165,7 @@ enum tx_phy_bandwidth {
 #define MT_CT_INFO_NONE_CIPHER_FRAME	BIT(3)
 #define MT_CT_INFO_HSR2_TX		BIT(4)
 
+#define MT_USB_TXD_SIZE			(MT_TXD_SIZE + 8 * 4)
 #define MT_TXD_SIZE			(8 * 4)
 
 #define MT_TXD0_P_IDX			BIT(31)
-- 
cgit v1.2.3-59-g8ed1b


From 8f93af9cac300e063e7a66447e29810e9f44eabf Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 2 Apr 2020 20:18:50 +0200
Subject: mt76: mt7615: introduce mt7615_mac_update_rate_desc routine

Move register configuration out of mt7615_mac_set_rates since usb
driver can't access device register in interrupt context. Introduce
mt7615_mac_update_rate_desc routine to report rate info to
mt7615_mac_set_rates

Co-developed-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c    | 102 ++++++++++++---------
 drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h |  12 +++
 2 files changed, 71 insertions(+), 43 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index dafd47cc7f6a..6c8a479a3a11 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -813,26 +813,19 @@ void mt7615_mac_sta_poll(struct mt7615_dev *dev)
 	rcu_read_unlock();
 }
 
-void mt7615_mac_set_rates(struct mt7615_phy *phy, struct mt7615_sta *sta,
-			  struct ieee80211_tx_rate *probe_rate,
-			  struct ieee80211_tx_rate *rates)
+static void
+mt7615_mac_update_rate_desc(struct mt7615_phy *phy, struct mt7615_sta *sta,
+			    struct ieee80211_tx_rate *probe_rate,
+			    struct ieee80211_tx_rate *rates,
+			    struct mt7615_rate_desc *rd)
 {
 	struct mt7615_dev *dev = phy->dev;
 	struct mt76_phy *mphy = phy->mt76;
 	struct ieee80211_tx_rate *ref;
-	int wcid = sta->wcid.idx;
-	u32 addr = mt7615_mac_wtbl_addr(dev, wcid);
-	bool stbc = false;
+	bool rateset, stbc = false;
 	int n_rates = sta->n_rates;
-	u8 bw, bw_prev, bw_idx = 0;
-	u16 val[4];
-	u16 probe_val;
-	u32 w5, w27;
-	bool rateset;
-	int i, k;
-
-	if (!mt76_poll(dev, MT_WTBL_UPDATE, MT_WTBL_UPDATE_BUSY, 0, 5000))
-		return;
+	u8 bw, bw_prev;
+	int i, j;
 
 	for (i = n_rates; i < 4; i++)
 		rates[i] = rates[n_rates - 1];
@@ -860,10 +853,10 @@ void mt7615_mac_set_rates(struct mt7615_phy *phy, struct mt7615_sta *sta,
 		if ((ref->flags ^ rates[i].flags) & IEEE80211_TX_RC_SHORT_GI)
 			rates[i].flags ^= IEEE80211_TX_RC_SHORT_GI;
 
-		for (k = 0; k < i; k++) {
-			if (rates[i].idx != rates[k].idx)
+		for (j = 0; j < i; j++) {
+			if (rates[i].idx != rates[j].idx)
 				continue;
-			if ((rates[i].flags ^ rates[k].flags) &
+			if ((rates[i].flags ^ rates[j].flags) &
 			    (IEEE80211_TX_RC_40_MHZ_WIDTH |
 			     IEEE80211_TX_RC_80_MHZ_WIDTH |
 			     IEEE80211_TX_RC_160_MHZ_WIDTH))
@@ -876,65 +869,87 @@ void mt7615_mac_set_rates(struct mt7615_phy *phy, struct mt7615_sta *sta,
 		}
 	}
 
-	val[0] = mt7615_mac_tx_rate_val(dev, mphy, &rates[0], stbc, &bw);
+	rd->val[0] = mt7615_mac_tx_rate_val(dev, mphy, &rates[0], stbc, &bw);
 	bw_prev = bw;
 
 	if (probe_rate) {
-		probe_val = mt7615_mac_tx_rate_val(dev, mphy, probe_rate,
-						   stbc, &bw);
+		rd->probe_val = mt7615_mac_tx_rate_val(dev, mphy, probe_rate,
+						       stbc, &bw);
 		if (bw)
-			bw_idx = 1;
+			rd->bw_idx = 1;
 		else
 			bw_prev = 0;
 	} else {
-		probe_val = val[0];
+		rd->probe_val = rd->val[0];
 	}
 
-	val[1] = mt7615_mac_tx_rate_val(dev, mphy, &rates[1], stbc, &bw);
+	rd->val[1] = mt7615_mac_tx_rate_val(dev, mphy, &rates[1], stbc, &bw);
 	if (bw_prev) {
-		bw_idx = 3;
+		rd->bw_idx = 3;
 		bw_prev = bw;
 	}
 
-	val[2] = mt7615_mac_tx_rate_val(dev, mphy, &rates[2], stbc, &bw);
+	rd->val[2] = mt7615_mac_tx_rate_val(dev, mphy, &rates[2], stbc, &bw);
 	if (bw_prev) {
-		bw_idx = 5;
+		rd->bw_idx = 5;
 		bw_prev = bw;
 	}
 
-	val[3] = mt7615_mac_tx_rate_val(dev, mphy, &rates[3], stbc, &bw);
+	rd->val[3] = mt7615_mac_tx_rate_val(dev, mphy, &rates[3], stbc, &bw);
 	if (bw_prev)
-		bw_idx = 7;
+		rd->bw_idx = 7;
+
+	rd->rateset = rateset;
+	rd->sta = sta;
+	rd->bw = bw;
+}
+
+void mt7615_mac_set_rates(struct mt7615_phy *phy, struct mt7615_sta *sta,
+			  struct ieee80211_tx_rate *probe_rate,
+			  struct ieee80211_tx_rate *rates)
+{
+	int wcid = sta->wcid.idx, n_rates = sta->n_rates;
+	struct mt7615_dev *dev = phy->dev;
+	struct mt7615_rate_desc rd;
+	u32 w5, w27, addr;
+
+	if (!mt76_poll(dev, MT_WTBL_UPDATE, MT_WTBL_UPDATE_BUSY, 0, 5000))
+		return;
+
+	memset(&rd, 0, sizeof(struct mt7615_rate_desc));
+	mt7615_mac_update_rate_desc(phy, sta, probe_rate, rates, &rd);
 
+	addr = mt7615_mac_wtbl_addr(dev, wcid);
 	w27 = mt76_rr(dev, addr + 27 * 4);
 	w27 &= ~MT_WTBL_W27_CC_BW_SEL;
-	w27 |= FIELD_PREP(MT_WTBL_W27_CC_BW_SEL, bw);
+	w27 |= FIELD_PREP(MT_WTBL_W27_CC_BW_SEL, rd.bw);
 
 	w5 = mt76_rr(dev, addr + 5 * 4);
 	w5 &= ~(MT_WTBL_W5_BW_CAP | MT_WTBL_W5_CHANGE_BW_RATE |
 		MT_WTBL_W5_MPDU_OK_COUNT |
 		MT_WTBL_W5_MPDU_FAIL_COUNT |
 		MT_WTBL_W5_RATE_IDX);
-	w5 |= FIELD_PREP(MT_WTBL_W5_BW_CAP, bw) |
-	      FIELD_PREP(MT_WTBL_W5_CHANGE_BW_RATE, bw_idx ? bw_idx - 1 : 7);
+	w5 |= FIELD_PREP(MT_WTBL_W5_BW_CAP, rd.bw) |
+	      FIELD_PREP(MT_WTBL_W5_CHANGE_BW_RATE,
+			 rd.bw_idx ? rd.bw_idx - 1 : 7);
 
 	mt76_wr(dev, MT_WTBL_RIUCR0, w5);
 
 	mt76_wr(dev, MT_WTBL_RIUCR1,
-		FIELD_PREP(MT_WTBL_RIUCR1_RATE0, probe_val) |
-		FIELD_PREP(MT_WTBL_RIUCR1_RATE1, val[0]) |
-		FIELD_PREP(MT_WTBL_RIUCR1_RATE2_LO, val[1]));
+		FIELD_PREP(MT_WTBL_RIUCR1_RATE0, rd.probe_val) |
+		FIELD_PREP(MT_WTBL_RIUCR1_RATE1, rd.val[0]) |
+		FIELD_PREP(MT_WTBL_RIUCR1_RATE2_LO, rd.val[1]));
 
 	mt76_wr(dev, MT_WTBL_RIUCR2,
-		FIELD_PREP(MT_WTBL_RIUCR2_RATE2_HI, val[1] >> 8) |
-		FIELD_PREP(MT_WTBL_RIUCR2_RATE3, val[1]) |
-		FIELD_PREP(MT_WTBL_RIUCR2_RATE4, val[2]) |
-		FIELD_PREP(MT_WTBL_RIUCR2_RATE5_LO, val[2]));
+		FIELD_PREP(MT_WTBL_RIUCR2_RATE2_HI, rd.val[1] >> 8) |
+		FIELD_PREP(MT_WTBL_RIUCR2_RATE3, rd.val[1]) |
+		FIELD_PREP(MT_WTBL_RIUCR2_RATE4, rd.val[2]) |
+		FIELD_PREP(MT_WTBL_RIUCR2_RATE5_LO, rd.val[2]));
 
 	mt76_wr(dev, MT_WTBL_RIUCR3,
-		FIELD_PREP(MT_WTBL_RIUCR3_RATE5_HI, val[2] >> 4) |
-		FIELD_PREP(MT_WTBL_RIUCR3_RATE6, val[3]) |
-		FIELD_PREP(MT_WTBL_RIUCR3_RATE7, val[3]));
+		FIELD_PREP(MT_WTBL_RIUCR3_RATE5_HI, rd.val[2] >> 4) |
+		FIELD_PREP(MT_WTBL_RIUCR3_RATE6, rd.val[3]) |
+		FIELD_PREP(MT_WTBL_RIUCR3_RATE7, rd.val[3]));
 
 	mt76_wr(dev, MT_WTBL_UPDATE,
 		FIELD_PREP(MT_WTBL_UPDATE_WLAN_IDX, wcid) |
@@ -944,7 +959,8 @@ void mt7615_mac_set_rates(struct mt7615_phy *phy, struct mt7615_sta *sta,
 	mt76_wr(dev, addr + 27 * 4, w27);
 
 	mt76_set(dev, MT_LPON_T0CR, MT_LPON_T0CR_MODE); /* TSF read */
-	sta->rate_set_tsf = (mt76_rr(dev, MT_LPON_UTTR0) & ~BIT(0)) | rateset;
+	sta->rate_set_tsf = mt76_rr(dev, MT_LPON_UTTR0) & ~BIT(0);
+	sta->rate_set_tsf |= rd.rateset;
 
 	if (!(sta->wcid.tx_info & MT_WCID_TX_INFO_SET))
 		mt76_poll(dev, MT_WTBL_UPDATE, MT_WTBL_UPDATE_BUSY, 0, 5000);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index a0ddb35b5e26..23c7e35e93c0 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -90,6 +90,18 @@ struct mt7615_rate_set {
 	struct ieee80211_tx_rate rates[4];
 };
 
+struct mt7615_rate_desc {
+	struct list_head node;
+
+	struct mt7615_sta *sta;
+
+	bool rateset;
+	u16 probe_val;
+	u16 val[4];
+	u8 bw_idx;
+	u8 bw;
+};
+
 struct mt7615_sta {
 	struct mt76_wcid wcid; /* must be first */
 
-- 
cgit v1.2.3-59-g8ed1b


From 8915c3ceb92948a158bbce6060794b3500a006c8 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 2 Apr 2020 20:18:51 +0200
Subject: mt76: mt7615: introduce __mt7663_load_firmware routine

Introduce __mt7663_load_firmware routine to load firmware for usb
devices.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c    | 22 +++++++++++++++++-----
 drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h |  1 +
 2 files changed, 18 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 12759158e69a..dcd97606e827 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -2078,12 +2078,10 @@ out:
 	return ret;
 }
 
-static int mt7663_load_firmware(struct mt7615_dev *dev)
+int __mt7663_load_firmware(struct mt7615_dev *dev)
 {
 	int ret;
 
-	mt76_set(dev, MT_WPDMA_GLO_CFG, MT_WPDMA_GLO_CFG_BYPASS_TX_SCH);
-
 	ret = mt76_get_field(dev, MT_CONN_ON_MISC, MT_TOP_MISC2_FW_N9_RDY);
 	if (ret) {
 		dev_dbg(dev->mt76.dev, "Firmware is already download\n");
@@ -2109,12 +2107,26 @@ static int mt7663_load_firmware(struct mt7615_dev *dev)
 		return -EIO;
 	}
 
-	mt76_clear(dev, MT_WPDMA_GLO_CFG, MT_WPDMA_GLO_CFG_BYPASS_TX_SCH);
-
 	dev_dbg(dev->mt76.dev, "Firmware init done\n");
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(__mt7663_load_firmware);
+
+static int mt7663_load_firmware(struct mt7615_dev *dev)
+{
+	int ret;
+
+	mt76_set(dev, MT_WPDMA_GLO_CFG, MT_WPDMA_GLO_CFG_BYPASS_TX_SCH);
+
+	ret = __mt7663_load_firmware(dev);
+	if (ret)
+		return ret;
+
+	mt76_clear(dev, MT_WPDMA_GLO_CFG, MT_WPDMA_GLO_CFG_BYPASS_TX_SCH);
+
+	return 0;
+}
 
 int mt7615_mcu_init(struct mt7615_dev *dev)
 {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index 23c7e35e93c0..d0b5c704f61c 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -462,5 +462,6 @@ int mt7615_mcu_set_sku_en(struct mt7615_phy *phy, bool enable);
 int mt7615_dfs_init_radar_detector(struct mt7615_phy *phy);
 
 int mt7615_init_debugfs(struct mt7615_dev *dev);
+int __mt7663_load_firmware(struct mt7615_dev *dev);
 
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From d506017ed4854963e967d4017be33a4cc5e1285b Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 2 Apr 2020 20:18:52 +0200
Subject: mt76: mt7615: move mt7615_mac_wtbl_addr in mac.h

Move mt7615_mac_wtbl_addr in mac.h and add inline qualifier in order to
be reused adding usb support to mt7615 driver

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c | 5 -----
 drivers/net/wireless/mediatek/mt76/mt7615/mac.h | 5 +++++
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index 6c8a479a3a11..6800ca3706bd 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -723,11 +723,6 @@ void mt7615_txp_skb_unmap(struct mt76_dev *dev,
 		mt7615_txp_skb_unmap_hw(dev, &txp->hw);
 }
 
-static u32 mt7615_mac_wtbl_addr(struct mt7615_dev *dev, int wcid)
-{
-	return MT_WTBL_BASE(dev) + wcid * MT_WTBL_ENTRY_SIZE;
-}
-
 bool mt7615_mac_wtbl_update(struct mt7615_dev *dev, int idx, u32 mask)
 {
 	mt76_rmw(dev, MT_WTBL_UPDATE, MT_WTBL_UPDATE_WLAN_IDX,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.h b/drivers/net/wireless/mediatek/mt76/mt7615/mac.h
index 422a140fd812..2c368b99cc0c 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.h
@@ -407,4 +407,9 @@ mt7615_txwi_to_txp(struct mt76_dev *dev, struct mt76_txwi_cache *t)
 	return (struct mt7615_txp_common *)(txwi + MT_TXD_SIZE);
 }
 
+static inline u32 mt7615_mac_wtbl_addr(struct mt7615_dev *dev, int wcid)
+{
+	return MT_WTBL_BASE(dev) + wcid * MT_WTBL_ENTRY_SIZE;
+}
+
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From 3a1880565aaf71818310a42fb2e2e11801d5111d Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 2 Apr 2020 20:18:53 +0200
Subject: mt76: mt76u: rely on mt7622 queue scheme for mt7663u

Rely on the mt7622 endpoint definitions for mt7663u

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/usb.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/usb.c b/drivers/net/wireless/mediatek/mt76/usb.c
index a981da6c35a5..fb97ea25b4d4 100644
--- a/drivers/net/wireless/mediatek/mt76/usb.c
+++ b/drivers/net/wireless/mediatek/mt76/usb.c
@@ -1009,8 +1009,19 @@ static void mt76u_tx_kick(struct mt76_dev *dev, struct mt76_queue *q)
 
 static u8 mt76u_ac_to_hwq(struct mt76_dev *dev, u8 ac)
 {
-	if (mt76_chip(dev) == 0x7663)
-		return ac ^ 0x3;
+	if (mt76_chip(dev) == 0x7663) {
+		static const u8 wmm_queue_map[] = {
+			[IEEE80211_AC_VO] = 0,
+			[IEEE80211_AC_VI] = 1,
+			[IEEE80211_AC_BE] = 2,
+			[IEEE80211_AC_BK] = 4,
+		};
+
+		if (WARN_ON(ac >= ARRAY_SIZE(wmm_queue_map)))
+			return 2; /* BE */
+
+		return wmm_queue_map[ac];
+	}
 
 	return mt76_ac_to_hwq(ac);
 }
-- 
cgit v1.2.3-59-g8ed1b


From 294f17aea22aeb236cefdc9d7bd8af536291a7f8 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 2 Apr 2020 20:18:54 +0200
Subject: mt76: mt7615: rework wtbl key configuration

Remove key dependency from mt7615_mac_wtbl_update_key and export
mt7615_mac_wtbl_update_key, mt7615_mac_wtbl_update_pk and
mt7615_mac_wtbl_update_cipher in order to reuse them in usb code.
Move mt7615_mac_get_cipher in mac.h

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c    | 69 ++++++++--------------
 drivers/net/wireless/mediatek/mt76/mt7615/mac.h    | 27 +++++++++
 drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h | 14 +++++
 3 files changed, 64 insertions(+), 46 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index 6800ca3706bd..f2bee76b8c29 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -964,58 +964,31 @@ void mt7615_mac_set_rates(struct mt7615_phy *phy, struct mt7615_sta *sta,
 	sta->wcid.tx_info |= MT_WCID_TX_INFO_SET;
 }
 
-static enum mt7615_cipher_type
-mt7615_mac_get_cipher(int cipher)
-{
-	switch (cipher) {
-	case WLAN_CIPHER_SUITE_WEP40:
-		return MT_CIPHER_WEP40;
-	case WLAN_CIPHER_SUITE_WEP104:
-		return MT_CIPHER_WEP104;
-	case WLAN_CIPHER_SUITE_TKIP:
-		return MT_CIPHER_TKIP;
-	case WLAN_CIPHER_SUITE_AES_CMAC:
-		return MT_CIPHER_BIP_CMAC_128;
-	case WLAN_CIPHER_SUITE_CCMP:
-		return MT_CIPHER_AES_CCMP;
-	case WLAN_CIPHER_SUITE_CCMP_256:
-		return MT_CIPHER_CCMP_256;
-	case WLAN_CIPHER_SUITE_GCMP:
-		return MT_CIPHER_GCMP;
-	case WLAN_CIPHER_SUITE_GCMP_256:
-		return MT_CIPHER_GCMP_256;
-	case WLAN_CIPHER_SUITE_SMS4:
-		return MT_CIPHER_WAPI;
-	default:
-		return MT_CIPHER_NONE;
-	}
-}
-
-static int
-mt7615_mac_wtbl_update_key(struct mt7615_dev *dev, struct mt76_wcid *wcid,
-			   struct ieee80211_key_conf *key,
-			   enum mt7615_cipher_type cipher,
-			   enum set_key_cmd cmd)
+int mt7615_mac_wtbl_update_key(struct mt7615_dev *dev,
+			       struct mt76_wcid *wcid,
+			       u8 *key, u8 keylen,
+			       enum mt7615_cipher_type cipher,
+			       enum set_key_cmd cmd)
 {
 	u32 addr = mt7615_mac_wtbl_addr(dev, wcid->idx) + 30 * 4;
 	u8 data[32] = {};
 
-	if (key->keylen > sizeof(data))
+	if (keylen > sizeof(data))
 		return -EINVAL;
 
 	mt76_rr_copy(dev, addr, data, sizeof(data));
 	if (cmd == SET_KEY) {
 		if (cipher == MT_CIPHER_TKIP) {
 			/* Rx/Tx MIC keys are swapped */
-			memcpy(data + 16, key->key + 24, 8);
-			memcpy(data + 24, key->key + 16, 8);
+			memcpy(data + 16, key + 24, 8);
+			memcpy(data + 24, key + 16, 8);
 		}
 		if (cipher != MT_CIPHER_BIP_CMAC_128 && wcid->cipher)
 			memmove(data + 16, data, 16);
 		if (cipher != MT_CIPHER_BIP_CMAC_128 || !wcid->cipher)
-			memcpy(data, key->key, key->keylen);
+			memcpy(data, key, keylen);
 		else if (cipher == MT_CIPHER_BIP_CMAC_128)
-			memcpy(data + 16, key->key, 16);
+			memcpy(data + 16, key, 16);
 	} else {
 		if (wcid->cipher & ~BIT(cipher)) {
 			if (cipher != MT_CIPHER_BIP_CMAC_128)
@@ -1029,11 +1002,12 @@ mt7615_mac_wtbl_update_key(struct mt7615_dev *dev, struct mt76_wcid *wcid,
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(mt7615_mac_wtbl_update_key);
 
-static int
-mt7615_mac_wtbl_update_pk(struct mt7615_dev *dev, struct mt76_wcid *wcid,
-			  enum mt7615_cipher_type cipher, int keyidx,
-			  enum set_key_cmd cmd)
+int mt7615_mac_wtbl_update_pk(struct mt7615_dev *dev,
+			      struct mt76_wcid *wcid,
+			      enum mt7615_cipher_type cipher,
+			      int keyidx, enum set_key_cmd cmd)
 {
 	u32 addr = mt7615_mac_wtbl_addr(dev, wcid->idx), w0, w1;
 
@@ -1065,11 +1039,12 @@ mt7615_mac_wtbl_update_pk(struct mt7615_dev *dev, struct mt76_wcid *wcid,
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(mt7615_mac_wtbl_update_pk);
 
-static void
-mt7615_mac_wtbl_update_cipher(struct mt7615_dev *dev, struct mt76_wcid *wcid,
-			      enum mt7615_cipher_type cipher,
-			      enum set_key_cmd cmd)
+void mt7615_mac_wtbl_update_cipher(struct mt7615_dev *dev,
+				   struct mt76_wcid *wcid,
+				   enum mt7615_cipher_type cipher,
+				   enum set_key_cmd cmd)
 {
 	u32 addr = mt7615_mac_wtbl_addr(dev, wcid->idx);
 
@@ -1087,6 +1062,7 @@ mt7615_mac_wtbl_update_cipher(struct mt7615_dev *dev, struct mt76_wcid *wcid,
 			mt76_clear(dev, addr + 2 * 4, MT_WTBL_W2_KEY_TYPE);
 	}
 }
+EXPORT_SYMBOL_GPL(mt7615_mac_wtbl_update_cipher);
 
 int mt7615_mac_wtbl_set_key(struct mt7615_dev *dev,
 			    struct mt76_wcid *wcid,
@@ -1103,7 +1079,8 @@ int mt7615_mac_wtbl_set_key(struct mt7615_dev *dev,
 	spin_lock_bh(&dev->mt76.lock);
 
 	mt7615_mac_wtbl_update_cipher(dev, wcid, cipher, cmd);
-	err = mt7615_mac_wtbl_update_key(dev, wcid, key, cipher, cmd);
+	err = mt7615_mac_wtbl_update_key(dev, wcid, key->key, key->keylen,
+					 cipher, cmd);
 	if (err < 0)
 		goto out;
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.h b/drivers/net/wireless/mediatek/mt76/mt7615/mac.h
index 2c368b99cc0c..f3e0070896a1 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.h
@@ -394,6 +394,33 @@ enum mt7615_cipher_type {
 	MT_CIPHER_GCMP_256,
 };
 
+static inline enum mt7615_cipher_type
+mt7615_mac_get_cipher(int cipher)
+{
+	switch (cipher) {
+	case WLAN_CIPHER_SUITE_WEP40:
+		return MT_CIPHER_WEP40;
+	case WLAN_CIPHER_SUITE_WEP104:
+		return MT_CIPHER_WEP104;
+	case WLAN_CIPHER_SUITE_TKIP:
+		return MT_CIPHER_TKIP;
+	case WLAN_CIPHER_SUITE_AES_CMAC:
+		return MT_CIPHER_BIP_CMAC_128;
+	case WLAN_CIPHER_SUITE_CCMP:
+		return MT_CIPHER_AES_CCMP;
+	case WLAN_CIPHER_SUITE_CCMP_256:
+		return MT_CIPHER_CCMP_256;
+	case WLAN_CIPHER_SUITE_GCMP:
+		return MT_CIPHER_GCMP;
+	case WLAN_CIPHER_SUITE_GCMP_256:
+		return MT_CIPHER_GCMP_256;
+	case WLAN_CIPHER_SUITE_SMS4:
+		return MT_CIPHER_WAPI;
+	default:
+		return MT_CIPHER_NONE;
+	}
+}
+
 static inline struct mt7615_txp_common *
 mt7615_txwi_to_txp(struct mt76_dev *dev, struct mt76_txwi_cache *t)
 {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index d0b5c704f61c..ed34b9a0e04c 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -67,6 +67,7 @@ struct mt7615_vif;
 struct mt7615_sta;
 struct mt7615_dfs_pulse;
 struct mt7615_dfs_pattern;
+enum mt7615_cipher_type;
 
 enum mt7615_hw_txq_id {
 	MT7615_TXQ_MAIN,
@@ -412,6 +413,19 @@ void mt7615_mac_tx_free(struct mt7615_dev *dev, struct sk_buff *skb);
 int mt7615_mac_wtbl_set_key(struct mt7615_dev *dev, struct mt76_wcid *wcid,
 			    struct ieee80211_key_conf *key,
 			    enum set_key_cmd cmd);
+int mt7615_mac_wtbl_update_pk(struct mt7615_dev *dev,
+			      struct mt76_wcid *wcid,
+			      enum mt7615_cipher_type cipher,
+			      int keyidx, enum set_key_cmd cmd);
+void mt7615_mac_wtbl_update_cipher(struct mt7615_dev *dev,
+				   struct mt76_wcid *wcid,
+				   enum mt7615_cipher_type cipher,
+				   enum set_key_cmd cmd);
+int mt7615_mac_wtbl_update_key(struct mt7615_dev *dev,
+			       struct mt76_wcid *wcid,
+			       u8 *key, u8 keylen,
+			       enum mt7615_cipher_type cipher,
+			       enum set_key_cmd cmd);
 void mt7615_mac_reset_work(struct work_struct *work);
 
 int mt7615_mcu_wait_response(struct mt7615_dev *dev, int cmd, int seq);
-- 
cgit v1.2.3-59-g8ed1b


From 7d9f1d108c3a05905ecefaff058c5688458f2f48 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 2 Apr 2020 20:18:55 +0200
Subject: mt76: mt7615: introduce mt7615_wtbl_desc data structure

Generalize mt7615_rate_desc introducing mt7615_wtbl_desc and
mt7615_key_desc data structures in order to configure the hw wtbl
in a non-atomic context for usb devices

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c    |  1 -
 drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h | 29 +++++++++++++++++++---
 2 files changed, 25 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index f2bee76b8c29..ed6eb19f48c5 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -895,7 +895,6 @@ mt7615_mac_update_rate_desc(struct mt7615_phy *phy, struct mt7615_sta *sta,
 		rd->bw_idx = 7;
 
 	rd->rateset = rateset;
-	rd->sta = sta;
 	rd->bw = bw;
 }
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index ed34b9a0e04c..f3c912672a6c 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -92,10 +92,6 @@ struct mt7615_rate_set {
 };
 
 struct mt7615_rate_desc {
-	struct list_head node;
-
-	struct mt7615_sta *sta;
-
 	bool rateset;
 	u16 probe_val;
 	u16 val[4];
@@ -103,6 +99,31 @@ struct mt7615_rate_desc {
 	u8 bw;
 };
 
+enum mt7615_wtbl_desc_type {
+	MT7615_WTBL_RATE_DESC,
+	MT7615_WTBL_KEY_DESC
+};
+
+struct mt7615_key_desc {
+	enum set_key_cmd cmd;
+	u32 cipher;
+	s8 keyidx;
+	u8 keylen;
+	u8 *key;
+};
+
+struct mt7615_wtbl_desc {
+	struct list_head node;
+
+	enum mt7615_wtbl_desc_type type;
+	struct mt7615_sta *sta;
+
+	union {
+		struct mt7615_rate_desc rate;
+		struct mt7615_key_desc key;
+	};
+};
+
 struct mt7615_sta {
 	struct mt76_wcid wcid; /* must be first */
 
-- 
cgit v1.2.3-59-g8ed1b


From 6e5d2099a483c4ab3557d23e0c5c89ff7bd7b0a8 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 2 Apr 2020 20:18:56 +0200
Subject: mt76: mt7615: add address parameter to mt7615_eeprom_init

Introduce address parameter to mt7615_eeprom_init routine in order to be
reused adding usb support to mt7615 driver

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c | 12 ++++++------
 drivers/net/wireless/mediatek/mt76/mt7615/init.c   |  3 ++-
 drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h |  2 +-
 3 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c
index dfa9a08b896d..23ed20553de2 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c
@@ -40,11 +40,11 @@ static int mt7615_efuse_read(struct mt7615_dev *dev, u32 base,
 	return 0;
 }
 
-static int mt7615_efuse_init(struct mt7615_dev *dev)
+static int mt7615_efuse_init(struct mt7615_dev *dev, u32 base)
 {
-	u32 val, base = mt7615_reg_map(dev, MT_EFUSE_BASE);
 	int i, len = MT7615_EEPROM_SIZE;
 	void *buf;
+	u32 val;
 
 	val = mt76_rr(dev, base + MT_EFUSE_BASE_CTRL);
 	if (val & MT_EFUSE_BASE_CTRL_EMPTY)
@@ -67,7 +67,7 @@ static int mt7615_efuse_init(struct mt7615_dev *dev)
 	return 0;
 }
 
-static int mt7615_eeprom_load(struct mt7615_dev *dev)
+static int mt7615_eeprom_load(struct mt7615_dev *dev, u32 addr)
 {
 	int ret;
 
@@ -75,7 +75,7 @@ static int mt7615_eeprom_load(struct mt7615_dev *dev)
 	if (ret < 0)
 		return ret;
 
-	return mt7615_efuse_init(dev);
+	return mt7615_efuse_init(dev, addr);
 }
 
 static int mt7615_check_eeprom(struct mt76_dev *dev)
@@ -265,11 +265,11 @@ static void mt7615_cal_free_data(struct mt7615_dev *dev)
 	}
 }
 
-int mt7615_eeprom_init(struct mt7615_dev *dev)
+int mt7615_eeprom_init(struct mt7615_dev *dev, u32 addr)
 {
 	int ret;
 
-	ret = mt7615_eeprom_load(dev);
+	ret = mt7615_eeprom_load(dev, addr);
 	if (ret < 0)
 		return ret;
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
index be0f3a77e22f..6acaaf2732df 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
@@ -146,6 +146,7 @@ static void mt7615_init_work(struct work_struct *work)
 
 static int mt7615_init_hardware(struct mt7615_dev *dev)
 {
+	u32 addr = mt7615_reg_map(dev, MT_EFUSE_BASE);
 	int ret, idx;
 
 	mt76_wr(dev, MT_INT_SOURCE_CSR, ~0);
@@ -154,7 +155,7 @@ static int mt7615_init_hardware(struct mt7615_dev *dev)
 	spin_lock_init(&dev->token_lock);
 	idr_init(&dev->token);
 
-	ret = mt7615_eeprom_init(dev);
+	ret = mt7615_eeprom_init(dev, addr);
 	if (ret < 0)
 		return ret;
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index f3c912672a6c..170775ac8eae 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -360,7 +360,7 @@ int mt7615_register_device(struct mt7615_dev *dev);
 void mt7615_unregister_device(struct mt7615_dev *dev);
 int mt7615_register_ext_phy(struct mt7615_dev *dev);
 void mt7615_unregister_ext_phy(struct mt7615_dev *dev);
-int mt7615_eeprom_init(struct mt7615_dev *dev);
+int mt7615_eeprom_init(struct mt7615_dev *dev, u32 addr);
 int mt7615_eeprom_get_power_index(struct mt7615_dev *dev,
 				  struct ieee80211_channel *chan,
 				  u8 chain_idx);
-- 
cgit v1.2.3-59-g8ed1b


From 044883e37ddad04690e57f037e9eae48287e2613 Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Fri, 3 Apr 2020 17:09:16 +0800
Subject: mt76: mt7663: correct the name of the rom patch

Rom patch is shared between Bluetooth and Wifi devices, so correct the
naming to allow two drivers to share the same file.

Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index 170775ac8eae..304abde1482e 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -41,7 +41,7 @@
 #define MT7615_FIRMWARE_V2		2
 #define MT7615_FIRMWARE_V3		3
 
-#define MT7663_ROM_PATCH		"mediatek/mt7663pr2h_v3.bin"
+#define MT7663_ROM_PATCH		"mediatek/mt7663pr2h.bin"
 #define MT7663_FIRMWARE_N9              "mediatek/mt7663_n9_v3.bin"
 
 #define MT7615_EEPROM_SIZE		1024
-- 
cgit v1.2.3-59-g8ed1b


From fdb786cce0ef3615dcbb30d8baf06a1d4cb7a344 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Fri, 3 Apr 2020 21:01:56 +0200
Subject: mt76: mt7615: do not always reset the dfs state setting the channel

mac80211/hostapd runs mt7615_set_channel with the same channel
parameters sending multiple rdd commands overwriting the previous ones.
This behaviour is causing tpt issues on dfs channels.
Fix the issue checking new channel freq/width with the running one.

Fixes: 5dabdf71e94e ("mt76: mt7615: add multiple wiphy support to the dfs support code")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/main.c | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
index c91f0896b09f..3e0d26183905 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
@@ -220,6 +220,25 @@ static void mt7615_remove_interface(struct ieee80211_hw *hw,
 	spin_unlock_bh(&dev->sta_poll_lock);
 }
 
+static void mt7615_init_dfs_state(struct mt7615_phy *phy)
+{
+	struct mt76_phy *mphy = phy->mt76;
+	struct ieee80211_hw *hw = mphy->hw;
+	struct cfg80211_chan_def *chandef = &hw->conf.chandef;
+
+	if (hw->conf.flags & IEEE80211_CONF_OFFCHANNEL)
+		return;
+
+	if (!(chandef->chan->flags & IEEE80211_CHAN_RADAR))
+		return;
+
+	if (mphy->chandef.chan->center_freq == chandef->chan->center_freq &&
+	    mphy->chandef.width == chandef->width)
+		return;
+
+	phy->dfs_state = -1;
+}
+
 static int mt7615_set_channel(struct mt7615_phy *phy)
 {
 	struct mt7615_dev *dev = phy->dev;
@@ -231,7 +250,7 @@ static int mt7615_set_channel(struct mt7615_phy *phy)
 	mutex_lock(&dev->mt76.mutex);
 	set_bit(MT76_RESET, &phy->mt76->state);
 
-	phy->dfs_state = -1;
+	mt7615_init_dfs_state(phy);
 	mt76_set_channel(phy->mt76);
 
 	ret = mt7615_mcu_set_chan_info(phy, MCU_EXT_CMD_CHANNEL_SWITCH);
-- 
cgit v1.2.3-59-g8ed1b


From 373ab334226853a71903c3f62c0d89ddfaa0aeb2 Mon Sep 17 00:00:00 2001
From: Markus Elfring <elfring@users.sourceforge.net>
Date: Sun, 5 Apr 2020 16:45:48 +0200
Subject: mt76: mt7615: Delete an error message in mt7622_wmac_probe()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The function “platform_get_irq” can log an error already.
Thus omit a redundant message for the exception handling in the
calling function.

This issue was detected by using the Coccinelle software.

Signed-off-by: Markus Elfring <elfring@users.sourceforge.net>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/soc.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/soc.c b/drivers/net/wireless/mediatek/mt76/mt7615/soc.c
index 43aa49706c66..9aa5183c7a56 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/soc.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/soc.c
@@ -36,10 +36,8 @@ static int mt7622_wmac_probe(struct platform_device *pdev)
 	int irq;
 
 	irq = platform_get_irq(pdev, 0);
-	if (irq < 0) {
-		dev_err(&pdev->dev, "Failed to get device IRQ\n");
+	if (irq < 0)
 		return irq;
-	}
 
 	mem_base = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(mem_base)) {
-- 
cgit v1.2.3-59-g8ed1b


From c3129ea44065f8d2a605e4489e809f47f5ff9524 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Mon, 6 Apr 2020 12:16:22 +0200
Subject: dt-bindings: net: wireless: mt76: document mediatek,eeprom-merge-otp
 property

It is used to enable merging of Flash EEPROM data with OTP ROM calibration.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 Documentation/devicetree/bindings/net/wireless/mediatek,mt76.txt | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.txt b/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.txt
index 3a76d8faaaed..ab7e7a00e534 100644
--- a/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.txt
+++ b/Documentation/devicetree/bindings/net/wireless/mediatek,mt76.txt
@@ -25,6 +25,9 @@ Optional properties:
 - mediatek,mtd-eeprom: Specify a MTD partition + offset containing EEPROM data
 - big-endian: if the radio eeprom partition is written in big-endian, specify
   this property
+- mediatek,eeprom-merge-otp: Merge EEPROM data with OTP data. Can be used on
+  boards where the flash calibration data is generic and specific calibration
+  data should be pulled from the OTP ROM
 
 The MAC address can as well be set with corresponding optional properties
 defined in net/ethernet.txt.
-- 
cgit v1.2.3-59-g8ed1b


From b90728f82b085f97a4993ef7ed71f38e3ae031ea Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Mon, 6 Apr 2020 10:19:48 +0200
Subject: mt76: mt7615: disable merge of OTP ROM data by default

The reference driver does not seem to enable it by default, only under certain
conditions, e.g. when a .bin file is loaded.
Make it opt-in via a device tree property for now, in case it is needed on some
boards.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c
index 23ed20553de2..34c33d23d79f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c
@@ -5,6 +5,7 @@
  *         Felix Fietkau <nbd@nbd.name>
  */
 
+#include <linux/of.h>
 #include "mt7615.h"
 #include "eeprom.h"
 
@@ -255,6 +256,11 @@ static void mt7622_apply_cal_free_data(struct mt7615_dev *dev)
 
 static void mt7615_cal_free_data(struct mt7615_dev *dev)
 {
+	struct device_node *np = dev->mt76.dev->of_node;
+
+	if (!np || !of_property_read_bool(np, "mediatek,eeprom-merge-otp"))
+		return;
+
 	switch (mt76_chip(&dev->mt76)) {
 	case 0x7622:
 		mt7622_apply_cal_free_data(dev);
-- 
cgit v1.2.3-59-g8ed1b


From ad380ad1ebbe6a9a243150fef1123670164ae278 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Mon, 30 Mar 2020 15:02:45 +0200
Subject: mt76: mt7615: add support for applying DC offset calibration from
 EEPROM

When the EEPROM data is read from flash, it can contain DC offset calibration
data. Add support for sending the data to the firmware.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c |   9 +-
 drivers/net/wireless/mediatek/mt76/mt7615/eeprom.h |  19 ++
 drivers/net/wireless/mediatek/mt76/mt7615/main.c   |   3 +
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c    | 195 ++++++++++++++++++---
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.h    |   1 +
 drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h |   2 +
 6 files changed, 200 insertions(+), 29 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c
index 34c33d23d79f..7a09427463b0 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c
@@ -72,7 +72,8 @@ static int mt7615_eeprom_load(struct mt7615_dev *dev, u32 addr)
 {
 	int ret;
 
-	ret = mt76_eeprom_init(&dev->mt76, MT7615_EEPROM_SIZE);
+	ret = mt76_eeprom_init(&dev->mt76, MT7615_EEPROM_SIZE +
+					   MT7615_EEPROM_EXTRA_DATA);
 	if (ret < 0)
 		return ret;
 
@@ -280,11 +281,13 @@ int mt7615_eeprom_init(struct mt7615_dev *dev, u32 addr)
 		return ret;
 
 	ret = mt7615_check_eeprom(&dev->mt76);
-	if (ret && dev->mt76.otp.data)
+	if (ret && dev->mt76.otp.data) {
 		memcpy(dev->mt76.eeprom.data, dev->mt76.otp.data,
 		       MT7615_EEPROM_SIZE);
-	else
+	} else {
+		dev->flash_eeprom = true;
 		mt7615_cal_free_data(dev);
+	}
 
 	mt7615_eeprom_parse_hw_cap(dev);
 	memcpy(dev->mt76.macaddr, dev->mt76.eeprom.data + MT_EE_MAC_ADDR,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.h b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.h
index 8a2a64b7fcd3..bd2ac1e0e01a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.h
@@ -6,6 +6,21 @@
 
 #include "mt7615.h"
 
+
+#define MT7615_EEPROM_DCOC_OFFSET		MT7615_EEPROM_SIZE
+#define MT7615_EEPROM_DCOC_SIZE			256
+#define MT7615_EEPROM_DCOC_COUNT		34
+
+#define MT7615_EEPROM_TXDPD_OFFSET		(MT7615_EEPROM_SIZE + \
+						 MT7615_EEPROM_DCOC_COUNT * \
+						 MT7615_EEPROM_DCOC_SIZE)
+#define MT7615_EEPROM_TXDPD_SIZE		216
+#define MT7615_EEPROM_TXDPD_COUNT		(44 + 3)
+
+#define MT7615_EEPROM_EXTRA_DATA		(MT7615_EEPROM_TXDPD_OFFSET + \
+						 MT7615_EEPROM_TXDPD_COUNT * \
+						 MT7615_EEPROM_TXDPD_SIZE)
+
 enum mt7615_eeprom_field {
 	MT_EE_CHIP_ID =				0x000,
 	MT_EE_VERSION =				0x002,
@@ -13,6 +28,7 @@ enum mt7615_eeprom_field {
 	MT_EE_NIC_CONF_0 =			0x034,
 	MT_EE_NIC_CONF_1 =			0x036,
 	MT_EE_WIFI_CONF =			0x03e,
+	MT_EE_CALDATA_FLASH =			0x052,
 	MT_EE_TX0_2G_TARGET_POWER =		0x058,
 	MT_EE_TX0_5G_G0_TARGET_POWER =		0x070,
 	MT_EE_TX1_5G_G0_TARGET_POWER =		0x098,
@@ -27,6 +43,9 @@ enum mt7615_eeprom_field {
 	MT7663_EE_MAX =				0x400,
 };
 
+#define MT_EE_CALDATA_FLASH_TX_DPD		BIT(0)
+#define MT_EE_CALDATA_FLASH_RX_CAL		BIT(1)
+
 #define MT_EE_NIC_CONF_TX_MASK			GENMASK(7, 4)
 #define MT_EE_NIC_CONF_RX_MASK			GENMASK(3, 0)
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
index 3e0d26183905..ef8e9336f627 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
@@ -253,6 +253,9 @@ static int mt7615_set_channel(struct mt7615_phy *phy)
 	mt7615_init_dfs_state(phy);
 	mt76_set_channel(phy->mt76);
 
+	if (is_mt7615(&dev->mt76) && dev->flash_eeprom)
+		mt7615_mcu_apply_rx_dcoc(phy);
+
 	ret = mt7615_mcu_set_chan_info(phy, MCU_EXT_CMD_CHANNEL_SWITCH);
 	if (ret)
 		goto out;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index dcd97606e827..a9fd68692e96 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -2481,6 +2481,25 @@ static void mt7615_mcu_set_txpower_sku(struct mt7615_phy *phy, u8 *sku)
 	}
 }
 
+static u8 mt7615_mcu_chan_bw(struct cfg80211_chan_def *chandef)
+{
+	static const u8 width_to_bw[] = {
+		[NL80211_CHAN_WIDTH_40] = CMD_CBW_40MHZ,
+		[NL80211_CHAN_WIDTH_80] = CMD_CBW_80MHZ,
+		[NL80211_CHAN_WIDTH_80P80] = CMD_CBW_8080MHZ,
+		[NL80211_CHAN_WIDTH_160] = CMD_CBW_160MHZ,
+		[NL80211_CHAN_WIDTH_5] = CMD_CBW_5MHZ,
+		[NL80211_CHAN_WIDTH_10] = CMD_CBW_10MHZ,
+		[NL80211_CHAN_WIDTH_20] = CMD_CBW_20MHZ,
+		[NL80211_CHAN_WIDTH_20_NOHT] = CMD_CBW_20MHZ,
+	};
+
+	if (chandef->width >= ARRAY_SIZE(width_to_bw))
+		return 0;
+
+	return width_to_bw[chandef->width];
+}
+
 int mt7615_mcu_set_chan_info(struct mt7615_phy *phy, int cmd)
 {
 	struct mt7615_dev *dev = phy->dev;
@@ -2521,32 +2540,7 @@ int mt7615_mcu_set_chan_info(struct mt7615_phy *phy, int cmd)
 		req.switch_reason = CH_SWITCH_NORMAL;
 
 	req.band_idx = phy != &dev->phy;
-
-	switch (chandef->width) {
-	case NL80211_CHAN_WIDTH_40:
-		req.bw = CMD_CBW_40MHZ;
-		break;
-	case NL80211_CHAN_WIDTH_80:
-		req.bw = CMD_CBW_80MHZ;
-		break;
-	case NL80211_CHAN_WIDTH_80P80:
-		req.bw = CMD_CBW_8080MHZ;
-		break;
-	case NL80211_CHAN_WIDTH_160:
-		req.bw = CMD_CBW_160MHZ;
-		break;
-	case NL80211_CHAN_WIDTH_5:
-		req.bw = CMD_CBW_5MHZ;
-		break;
-	case NL80211_CHAN_WIDTH_10:
-		req.bw = CMD_CBW_10MHZ;
-		break;
-	case NL80211_CHAN_WIDTH_20_NOHT:
-	case NL80211_CHAN_WIDTH_20:
-	default:
-		req.bw = CMD_CBW_20MHZ;
-		break;
-	}
+	req.bw = mt7615_mcu_chan_bw(chandef);
 
 	mt7615_mcu_set_txpower_sku(phy, req.txpower_sku);
 
@@ -2836,3 +2830,152 @@ int mt7615_mcu_sched_scan_enable(struct mt7615_phy *phy,
 	return __mt76_mcu_send_msg(&dev->mt76, MCU_CMD_SCHED_SCAN_ENABLE,
 				   &req, sizeof(req), false);
 }
+
+static int mt7615_find_freq_idx(const u16 *freqs, int n_freqs, u16 cur)
+{
+	int i;
+
+	for (i = 0; i < n_freqs; i++)
+		if (cur == freqs[i])
+			return i;
+
+	return -1;
+}
+
+static int mt7615_dcoc_freq_idx(u16 freq, u8 bw)
+{
+	static const u16 freq_list[] = {
+		4980, 5805, 5905, 5190,
+		5230, 5270, 5310, 5350,
+		5390, 5430, 5470, 5510,
+		5550, 5590, 5630, 5670,
+		5710, 5755, 5795, 5835,
+		5875, 5210, 5290, 5370,
+		5450, 5530, 5610, 5690,
+		5775, 5855
+	};
+	static const u16 freq_bw40[] = {
+		5190, 5230, 5270, 5310,
+		5350, 5390, 5430, 5470,
+		5510, 5550, 5590, 5630,
+		5670, 5710, 5755, 5795,
+		5835, 5875
+	};
+	int offset_2g = ARRAY_SIZE(freq_list);
+	int idx;
+
+	if (freq < 4000) {
+		if (freq < 2427)
+			return offset_2g;
+		if (freq < 2442)
+			return offset_2g + 1;
+		if (freq < 2457)
+			return offset_2g + 2;
+
+		return offset_2g + 3;
+	}
+
+	switch (bw) {
+	case NL80211_CHAN_WIDTH_80:
+	case NL80211_CHAN_WIDTH_80P80:
+	case NL80211_CHAN_WIDTH_160:
+		break;
+	default:
+		idx = mt7615_find_freq_idx(freq_bw40, ARRAY_SIZE(freq_bw40),
+					   freq + 10);
+		if (idx >= 0) {
+			freq = freq_bw40[idx];
+			break;
+		}
+
+		idx = mt7615_find_freq_idx(freq_bw40, ARRAY_SIZE(freq_bw40),
+					   freq - 10);
+		if (idx >= 0) {
+			freq = freq_bw40[idx];
+			break;
+		}
+		/* fall through */
+	case NL80211_CHAN_WIDTH_40:
+		idx = mt7615_find_freq_idx(freq_bw40, ARRAY_SIZE(freq_bw40),
+					   freq);
+		if (idx >= 0)
+			break;
+
+		return -1;
+
+	}
+
+	return mt7615_find_freq_idx(freq_list, ARRAY_SIZE(freq_list), freq);
+}
+
+int mt7615_mcu_apply_rx_dcoc(struct mt7615_phy *phy)
+{
+	struct mt7615_dev *dev = phy->dev;
+	struct cfg80211_chan_def *chandef = &phy->mt76->chandef;
+	int freq2 = chandef->center_freq2;
+	int ret;
+	struct {
+		u8 direction;
+		u8 runtime_calibration;
+		u8 _rsv[2];
+
+		__le16 center_freq;
+		u8 bw;
+		u8 band;
+		u8 is_freq2;
+		u8 success;
+		u8 dbdc_en;
+
+		u8 _rsv2;
+
+		struct {
+			__le32 sx0_i_lna[4];
+			__le32 sx0_q_lna[4];
+
+			__le32 sx2_i_lna[4];
+			__le32 sx2_q_lna[4];
+		} dcoc_data[4];
+	} req = {
+		.direction = 1,
+
+		.bw = mt7615_mcu_chan_bw(chandef),
+		.band = chandef->center_freq1 > 4000,
+		.dbdc_en = !!dev->mt76.phy2,
+	};
+	u16 center_freq = chandef->center_freq1;
+	int freq_idx;
+	u8 *eep = dev->mt76.eeprom.data;
+
+	if (!(eep[MT_EE_CALDATA_FLASH] & MT_EE_CALDATA_FLASH_RX_CAL))
+		return 0;
+
+	if (chandef->width == NL80211_CHAN_WIDTH_160) {
+		freq2 = center_freq + 40;
+		center_freq -= 40;
+	}
+
+again:
+	req.runtime_calibration = 1;
+	freq_idx = mt7615_dcoc_freq_idx(center_freq, chandef->width);
+	if (freq_idx < 0)
+		goto out;
+
+	memcpy(req.dcoc_data, eep + MT7615_EEPROM_DCOC_OFFSET +
+			      freq_idx * MT7615_EEPROM_DCOC_SIZE,
+	       sizeof(req.dcoc_data));
+	req.runtime_calibration = 0;
+
+out:
+	req.center_freq = cpu_to_le16(center_freq);
+	ret = __mt76_mcu_send_msg(&dev->mt76, MCU_EXT_CMD_RXDCOC_CAL, &req,
+				  sizeof(req), true);
+
+	if ((chandef->width == NL80211_CHAN_WIDTH_80P80 ||
+	     chandef->width == NL80211_CHAN_WIDTH_160) && !req.is_freq2) {
+		req.is_freq2 = true;
+		center_freq = freq2;
+		goto again;
+	}
+
+	return ret;
+}
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
index 69cb68d6465d..427478002e67 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
@@ -270,6 +270,7 @@ enum {
 	MCU_EXT_CMD_BCN_OFFLOAD = 0x49,
 	MCU_EXT_CMD_SET_RX_PATH = 0x4e,
 	MCU_EXT_CMD_TX_POWER_FEATURE_CTRL = 0x58,
+	MCU_EXT_CMD_RXDCOC_CAL = 0x59,
 	MCU_EXT_CMD_SET_RDD_TH = 0x7c,
 	MCU_EXT_CMD_SET_RDD_PATTERN = 0x7d,
 };
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index 304abde1482e..39c792826c48 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -258,6 +258,7 @@ struct mt7615_dev {
 
 	u8 mac_work_count;
 	bool fw_debug;
+	bool flash_eeprom;
 
 	spinlock_t token_lock;
 	struct idr token;
@@ -494,6 +495,7 @@ int mt7615_mcu_set_pulse_th(struct mt7615_dev *dev,
 int mt7615_mcu_set_radar_th(struct mt7615_dev *dev, int index,
 			    const struct mt7615_dfs_pattern *pattern);
 int mt7615_mcu_set_sku_en(struct mt7615_phy *phy, bool enable);
+int mt7615_mcu_apply_rx_dcoc(struct mt7615_phy *phy);
 int mt7615_dfs_init_radar_detector(struct mt7615_phy *phy);
 
 int mt7615_init_debugfs(struct mt7615_dev *dev);
-- 
cgit v1.2.3-59-g8ed1b


From 371a59d151df588795f43c22dc3f882b9719559b Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Mon, 6 Apr 2020 11:14:56 +0200
Subject: mt76: mt7615: add support for applying tx DPD calibration from EEPROM

When the EEPROM data is read from flash, it can contain tx DPD calibration
data. Add support for sending the data to the firmware.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/main.c   |   4 +-
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c    | 121 +++++++++++++++++++++
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.h    |   1 +
 drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h |   1 +
 4 files changed, 126 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
index ef8e9336f627..b346080458bc 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
@@ -253,8 +253,10 @@ static int mt7615_set_channel(struct mt7615_phy *phy)
 	mt7615_init_dfs_state(phy);
 	mt76_set_channel(phy->mt76);
 
-	if (is_mt7615(&dev->mt76) && dev->flash_eeprom)
+	if (is_mt7615(&dev->mt76) && dev->flash_eeprom) {
 		mt7615_mcu_apply_rx_dcoc(phy);
+		mt7615_mcu_apply_tx_dpd(phy);
+	}
 
 	ret = mt7615_mcu_set_chan_info(phy, MCU_EXT_CMD_CHANNEL_SWITCH);
 	if (ret)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index a9fd68692e96..f51d252c5f49 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -2979,3 +2979,124 @@ out:
 
 	return ret;
 }
+
+static int mt7615_dpd_freq_idx(u16 freq, u8 bw)
+{
+	static const u16 freq_list[] = {
+		4920, 4940, 4960, 4980,
+		5040, 5060, 5080, 5180,
+		5200, 5220, 5240, 5260,
+		5280, 5300, 5320, 5340,
+		5360, 5380, 5400, 5420,
+		5440, 5460, 5480, 5500,
+		5520, 5540, 5560, 5580,
+		5600, 5620, 5640, 5660,
+		5680, 5700, 5720, 5745,
+		5765, 5785, 5805, 5825,
+		5845, 5865, 5885, 5905
+	};
+	int offset_2g = ARRAY_SIZE(freq_list);
+	int idx;
+
+	if (freq < 4000) {
+		if (freq < 2432)
+			return offset_2g;
+		if (freq < 2457)
+			return offset_2g + 1;
+
+		return offset_2g + 2;
+	}
+
+	if (bw != NL80211_CHAN_WIDTH_20) {
+		idx = mt7615_find_freq_idx(freq_list, ARRAY_SIZE(freq_list),
+					   freq + 10);
+		if (idx >= 0)
+			return idx;
+
+		idx = mt7615_find_freq_idx(freq_list, ARRAY_SIZE(freq_list),
+					   freq - 10);
+		if (idx >= 0)
+			return idx;
+	}
+
+	return mt7615_find_freq_idx(freq_list, ARRAY_SIZE(freq_list), freq);
+}
+
+
+int mt7615_mcu_apply_tx_dpd(struct mt7615_phy *phy)
+{
+	struct mt7615_dev *dev = phy->dev;
+	struct cfg80211_chan_def *chandef = &phy->mt76->chandef;
+	int freq2 = chandef->center_freq2;
+	int ret;
+	struct {
+		u8 direction;
+		u8 runtime_calibration;
+		u8 _rsv[2];
+
+		__le16 center_freq;
+		u8 bw;
+		u8 band;
+		u8 is_freq2;
+		u8 success;
+		u8 dbdc_en;
+
+		u8 _rsv2;
+
+		struct {
+			struct {
+				u32 dpd_g0;
+				u8 data[32];
+			} wf0, wf1;
+
+			struct {
+				u32 dpd_g0_prim;
+				u32 dpd_g0_sec;
+				u8 data_prim[32];
+				u8 data_sec[32];
+			} wf2, wf3;
+		} dpd_data;
+	} req = {
+		.direction = 1,
+
+		.bw = mt7615_mcu_chan_bw(chandef),
+		.band = chandef->center_freq1 > 4000,
+		.dbdc_en = !!dev->mt76.phy2,
+	};
+	u16 center_freq = chandef->center_freq1;
+	int freq_idx;
+	u8 *eep = dev->mt76.eeprom.data;
+
+	if (!(eep[MT_EE_CALDATA_FLASH] & MT_EE_CALDATA_FLASH_TX_DPD))
+		return 0;
+
+	if (chandef->width == NL80211_CHAN_WIDTH_160) {
+		freq2 = center_freq + 40;
+		center_freq -= 40;
+	}
+
+again:
+	req.runtime_calibration = 1;
+	freq_idx = mt7615_dpd_freq_idx(center_freq, chandef->width);
+	if (freq_idx < 0)
+		goto out;
+
+	memcpy(&req.dpd_data, eep + MT7615_EEPROM_TXDPD_OFFSET +
+			      freq_idx * MT7615_EEPROM_TXDPD_SIZE,
+	       sizeof(req.dpd_data));
+	req.runtime_calibration = 0;
+
+out:
+	req.center_freq = cpu_to_le16(center_freq);
+	ret = __mt76_mcu_send_msg(&dev->mt76, MCU_EXT_CMD_TXDPD_CAL, &req,
+				  sizeof(req), true);
+
+	if ((chandef->width == NL80211_CHAN_WIDTH_80P80 ||
+	     chandef->width == NL80211_CHAN_WIDTH_160) && !req.is_freq2) {
+		req.is_freq2 = true;
+		center_freq = freq2;
+		goto again;
+	}
+
+	return ret;
+}
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
index 427478002e67..983625fb72ee 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
@@ -271,6 +271,7 @@ enum {
 	MCU_EXT_CMD_SET_RX_PATH = 0x4e,
 	MCU_EXT_CMD_TX_POWER_FEATURE_CTRL = 0x58,
 	MCU_EXT_CMD_RXDCOC_CAL = 0x59,
+	MCU_EXT_CMD_TXDPD_CAL = 0x60,
 	MCU_EXT_CMD_SET_RDD_TH = 0x7c,
 	MCU_EXT_CMD_SET_RDD_PATTERN = 0x7d,
 };
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index 39c792826c48..65a2b30fc588 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -496,6 +496,7 @@ int mt7615_mcu_set_radar_th(struct mt7615_dev *dev, int index,
 			    const struct mt7615_dfs_pattern *pattern);
 int mt7615_mcu_set_sku_en(struct mt7615_phy *phy, bool enable);
 int mt7615_mcu_apply_rx_dcoc(struct mt7615_phy *phy);
+int mt7615_mcu_apply_tx_dpd(struct mt7615_phy *phy);
 int mt7615_dfs_init_radar_detector(struct mt7615_phy *phy);
 
 int mt7615_init_debugfs(struct mt7615_dev *dev);
-- 
cgit v1.2.3-59-g8ed1b


From 6d3390a664bbac544d1656503a3ccf947f154770 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Mon, 6 Apr 2020 14:01:56 +0200
Subject: mt76: mt7603: disable merge of OTP ROM data by default

The reference driver does not seem to enable it by default, only under certain
conditions, e.g. when a .bin file is loaded.
Make it opt-in via a device tree property for now, in case it is needed on some
boards.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7603/eeprom.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt7603/eeprom.c
index 2b6a4d8a8dc7..3ee06e2577b8 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/eeprom.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/eeprom.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: ISC
 
+#include <linux/of.h>
 #include "mt7603.h"
 #include "eeprom.h"
 
@@ -100,10 +101,14 @@ mt7603_apply_cal_free_data(struct mt7603_dev *dev, u8 *efuse)
 		MT_EE_TX_POWER_1_START_2G,
 		MT_EE_TX_POWER_1_START_2G + 1,
 	};
+	struct device_node *np = dev->mt76.dev->of_node;
 	u8 *eeprom = dev->mt76.eeprom.data;
 	int n = ARRAY_SIZE(cal_free_bytes);
 	int i;
 
+	if (!np || !of_property_read_bool(np, "mediatek,eeprom-merge-otp"))
+		return;
+
 	if (!mt7603_has_cal_free_data(dev, efuse))
 		return;
 
-- 
cgit v1.2.3-59-g8ed1b


From 9a865741d84ec08dfd04fc28a5fca210f919f54d Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Mon, 6 Apr 2020 14:03:06 +0200
Subject: mt76: mt76x2: disable merge of OTP ROM data by default

The reference driver does not seem to enable it by default, only under certain
conditions, e.g. when a .bin file is loaded.
Make it opt-in via a device tree property for now, in case it is needed on some
boards.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt76x2/eeprom.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt76x2/eeprom.c
index 4a748a6f0ce2..410ffce3baff 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2/eeprom.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2/eeprom.c
@@ -4,6 +4,7 @@
  */
 
 #include <linux/module.h>
+#include <linux/of.h>
 #include <asm/unaligned.h>
 #include "mt76x2.h"
 #include "eeprom.h"
@@ -76,6 +77,7 @@ mt76x2_apply_cal_free_data(struct mt76x02_dev *dev, u8 *efuse)
 		MT_EE_RF_5G_GRP4_5_RX_HIGH_GAIN,
 		MT_EE_RF_5G_GRP4_5_RX_HIGH_GAIN + 1,
 	};
+	struct device_node *np = dev->mt76.dev->of_node;
 	u8 *eeprom = dev->mt76.eeprom.data;
 	u8 prev_grp0[4] = {
 		eeprom[MT_EE_TX_POWER_0_START_5G],
@@ -86,6 +88,9 @@ mt76x2_apply_cal_free_data(struct mt76x02_dev *dev, u8 *efuse)
 	u16 val;
 	int i;
 
+	if (!np || !of_property_read_bool(np, "mediatek,eeprom-merge-otp"))
+		return;
+
 	if (!mt76x2_has_cal_free_data(dev, efuse))
 		return;
 
-- 
cgit v1.2.3-59-g8ed1b


From 1855ad5360a18131e5c4b4afa1a1c5dea6e456c6 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Mon, 6 Apr 2020 23:56:30 +0200
Subject: mt76: mt7615: fix possible division by 0 in
 mt7615_mac_update_mib_stats

Check that val is not zero before aggr_per estimation in order to avoid a
possible division by 0

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index ed6eb19f48c5..1ef988883660 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -1773,9 +1773,11 @@ mt7615_mac_update_mib_stats(struct mt7615_phy *phy)
 
 	val = mt76_get_field(dev, MT_MIB_SDR14(ext_phy),
 			     MT_MIB_AMPDU_MPDU_COUNT);
-	val2 = mt76_get_field(dev, MT_MIB_SDR15(ext_phy),
-			      MT_MIB_AMPDU_ACK_COUNT);
-	mib->aggr_per = 1000 * (val - val2) / val;
+	if (val) {
+		val2 = mt76_get_field(dev, MT_MIB_SDR15(ext_phy),
+				      MT_MIB_AMPDU_ACK_COUNT);
+		mib->aggr_per = 1000 * (val - val2) / val;
+	}
 
 	aggr = ext_phy ? ARRAY_SIZE(dev->mt76.aggr_stats) / 2 : 0;
 	for (i = 0; i < 4; i++) {
-- 
cgit v1.2.3-59-g8ed1b


From 6bcfdabbadffa19fabafb5b6c7bcf41322b62695 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Wed, 8 Apr 2020 14:20:39 +0200
Subject: mt76: mt7663: fix aggr range entry in debugfs

Fix register definitions for aggr range counter registers for mt7663
chipset

Fixes: f40ac0f3d3c0 ("mt76: mt7615: introduce mt7663e support")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c | 6 ++++--
 drivers/net/wireless/mediatek/mt76/mt7615/init.c    | 1 +
 drivers/net/wireless/mediatek/mt76/mt7615/regs.h    | 5 +++++
 3 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c
index 777c7f9bd760..2163a22967c7 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c
@@ -129,16 +129,18 @@ mt7615_ampdu_stat_read_phy(struct mt7615_phy *phy,
 			   struct seq_file *file)
 {
 	struct mt7615_dev *dev = file->private;
+	u32 reg = is_mt7663(&dev->mt76) ? MT_MIB_ARNG(0) : MT_AGG_ASRCR0;
 	bool ext_phy = phy != &dev->phy;
 	int bound[7], i, range;
 
 	if (!phy)
 		return;
 
-	range = mt76_rr(dev, MT_AGG_ASRCR0);
+	range = mt76_rr(dev, reg);
 	for (i = 0; i < 4; i++)
 		bound[i] = MT_AGG_ASRCR_RANGE(range, i) + 1;
-	range = mt76_rr(dev, MT_AGG_ASRCR1);
+
+	range = mt76_rr(dev, reg + 4);
 	for (i = 0; i < 3; i++)
 		bound[i + 4] = MT_AGG_ASRCR_RANGE(range, i) + 1;
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
index 6acaaf2732df..07d4b259fe8a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
@@ -116,6 +116,7 @@ static void mt7615_mac_init(struct mt7615_dev *dev)
 	mt76_wr(dev, MT_DMA_DCR0,
 		FIELD_PREP(MT_DMA_DCR0_MAX_RX_LEN, 3072) |
 		MT_DMA_DCR0_RX_VEC_DROP);
+	mt76_set(dev, MT_WF_MIB_SCR0, MT_MIB_SCR0_AGG_CNT_RANGE_EN);
 	if (is_mt7663(&dev->mt76)) {
 		mt76_wr(dev, MT_WF_AGG(0x160), 0x5c341c02);
 		mt76_wr(dev, MT_WF_AGG(0x164), 0x70708040);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/regs.h b/drivers/net/wireless/mediatek/mt76/mt7615/regs.h
index 481e4d941e72..a3333f382350 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/regs.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/regs.h
@@ -384,6 +384,9 @@ enum mt7615_reg_base {
 #define MT_WF_MIB_BASE			(dev->reg_map[MT_MIB_BASE])
 #define MT_WF_MIB(_band, ofs)		(MT_WF_MIB_BASE + (ofs) + (_band) * 0x200)
 
+#define MT_WF_MIB_SCR0			MT_WF_MIB(0, 0)
+#define MT_MIB_SCR0_AGG_CNT_RANGE_EN	BIT(21)
+
 #define MT_MIB_M0_MISC_CR(_band)	MT_WF_MIB(_band, 0x00c)
 
 #define MT_MIB_SDR3(_band)		MT_WF_MIB(_band, 0x014)
@@ -414,6 +417,8 @@ enum mt7615_reg_base {
 #define MT_MIB_BA_MISS_COUNT_MASK	GENMASK(15, 0)
 #define MT_MIB_ACK_FAIL_COUNT_MASK	GENMASK(31, 16)
 
+#define MT_MIB_ARNG(n)			MT_WF_MIB(0, 0x4b8 + ((n) << 2))
+
 #define MT_TX_AGG_CNT(_band, n)		MT_WF_MIB(_band, 0xa8 + ((n) << 2))
 
 #define MT_DMA_SHDL(ofs)		(dev->reg_map[MT_DMA_SHDL_BASE] + (ofs))
-- 
cgit v1.2.3-59-g8ed1b


From 635cb0105e01f4c90e69b3cf8e2c1569479e4bf7 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Thu, 9 Apr 2020 11:51:17 +0200
Subject: mt76: mt7615: disable hw/sched scan ops for non-offload firmware

Avoid having to attempt hw scan and fall back to software for every scan
on devices/firmware without hw scan support

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/init.c   | 7 +++++++
 drivers/net/wireless/mediatek/mt76/mt7615/mmio.c   | 8 +++++++-
 drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h | 1 +
 3 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
index 07d4b259fe8a..7f3683205b8e 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
@@ -143,6 +143,13 @@ static void mt7615_init_work(struct work_struct *work)
 	mt7615_mac_init(dev);
 	mt7615_phy_init(dev);
 	mt7615_mcu_del_wtbl_all(dev);
+
+	if (!mt7615_firmware_offload(dev)) {
+		dev->ops->hw_scan = NULL;
+		dev->ops->cancel_hw_scan = NULL;
+		dev->ops->sched_scan_start = NULL;
+		dev->ops->sched_scan_stop = NULL;
+	}
 }
 
 static int mt7615_init_hardware(struct mt7615_dev *dev)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c b/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c
index d2eff5442824..3849bb6b49d0 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c
@@ -139,11 +139,16 @@ int mt7615_mmio_probe(struct device *pdev, void __iomem *mem_base,
 		.sta_remove = mt7615_mac_sta_remove,
 		.update_survey = mt7615_update_channel,
 	};
+	struct ieee80211_ops *ops;
 	struct mt7615_dev *dev;
 	struct mt76_dev *mdev;
 	int ret;
 
-	mdev = mt76_alloc_device(pdev, sizeof(*dev), &mt7615_ops, &drv_ops);
+	ops = devm_kmemdup(pdev, &mt7615_ops, sizeof(mt7615_ops), GFP_KERNEL);
+	if (!ops)
+		return -ENOMEM;
+
+	mdev = mt76_alloc_device(pdev, sizeof(*dev), ops, &drv_ops);
 	if (!mdev)
 		return -ENOMEM;
 
@@ -151,6 +156,7 @@ int mt7615_mmio_probe(struct device *pdev, void __iomem *mem_base,
 	mt76_mmio_init(&dev->mt76, mem_base);
 
 	dev->reg_map = map;
+	dev->ops = ops;
 	mdev->rev = (mt76_rr(dev, MT_HW_CHIPID) << 16) |
 		    (mt76_rr(dev, MT_HW_REV) & 0xff);
 	dev_dbg(mdev->dev, "ASIC revision: %04x\n", mdev->rev);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index 65a2b30fc588..4f0d29e5e595 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -235,6 +235,7 @@ struct mt7615_dev {
 
 	u16 chainmask;
 
+	struct ieee80211_ops *ops;
 	const struct mt7615_mcu_ops *mcu_ops;
 	struct regmap *infracfg;
 	const u32 *reg_map;
-- 
cgit v1.2.3-59-g8ed1b


From 639e25a125857be1cba52892959d47384177dd72 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 8 Apr 2020 18:10:35 +0200
Subject: mt76: mt7615: set hw scan limits only for firmware with offload
 support

They do not apply to software scan

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/init.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
index 7f3683205b8e..96b7c6284833 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
@@ -145,10 +145,19 @@ static void mt7615_init_work(struct work_struct *work)
 	mt7615_mcu_del_wtbl_all(dev);
 
 	if (!mt7615_firmware_offload(dev)) {
+		struct wiphy *wiphy = mt76_hw(dev)->wiphy;
+
 		dev->ops->hw_scan = NULL;
 		dev->ops->cancel_hw_scan = NULL;
 		dev->ops->sched_scan_start = NULL;
 		dev->ops->sched_scan_stop = NULL;
+
+		wiphy->max_sched_scan_plan_interval = 0;
+		wiphy->max_sched_scan_ie_len = 0;
+		wiphy->max_scan_ie_len = IEEE80211_MAX_DATA_LEN;
+		wiphy->max_sched_scan_ssids = 0;
+		wiphy->max_match_sets = 0;
+		wiphy->max_sched_scan_reqs = 0;
 	}
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From c0f8055b3986f9c9f990268b578173259769ba1c Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 9 Apr 2020 13:14:56 +0200
Subject: mt76: mt7622: fix DMA unmap length

Fix DMA unmap length estimation in mt7615_txp_skb_unmap_hw for mt7622
chipset

Fixes: 6aa4ed7927f1 ("mt76: mt7615: implement DMA support for MT7622")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c | 6 +++---
 drivers/net/wireless/mediatek/mt76/mt7615/mac.h | 1 +
 2 files changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index 1ef988883660..b7a96d514656 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -695,7 +695,7 @@ mt7615_txp_skb_unmap_hw(struct mt76_dev *dev, struct mt7615_hw_txp *txp)
 
 		len = le16_to_cpu(ptr->len0);
 		last = len & MT_TXD_LEN_MSDU_LAST;
-		len &= ~MT_TXD_LEN_MSDU_LAST;
+		len &= MT_TXD_LEN_MASK;
 		dma_unmap_single(dev->dev, le32_to_cpu(ptr->buf0), len,
 				 DMA_TO_DEVICE);
 		if (last)
@@ -703,7 +703,7 @@ mt7615_txp_skb_unmap_hw(struct mt76_dev *dev, struct mt7615_hw_txp *txp)
 
 		len = le16_to_cpu(ptr->len1);
 		last = len & MT_TXD_LEN_MSDU_LAST;
-		len &= ~MT_TXD_LEN_MSDU_LAST;
+		len &= MT_TXD_LEN_MASK;
 		dma_unmap_single(dev->dev, le32_to_cpu(ptr->buf1), len,
 				 DMA_TO_DEVICE);
 		if (last)
@@ -1114,8 +1114,8 @@ mt7615_write_hw_txp(struct mt7615_dev *dev, struct mt76_tx_info *tx_info,
 	txp->msdu_id[0] = cpu_to_le16(id | MT_MSDU_ID_VALID);
 
 	for (i = 0; i < nbuf; i++) {
+		u16 len = tx_info->buf[i + 1].len & MT_TXD_LEN_MASK;
 		u32 addr = tx_info->buf[i + 1].addr;
-		u16 len = tx_info->buf[i + 1].len;
 
 		if (i == nbuf - 1)
 			len |= MT_TXD_LEN_MSDU_LAST |
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.h b/drivers/net/wireless/mediatek/mt76/mt7615/mac.h
index f3e0070896a1..6260f97432a0 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.h
@@ -253,6 +253,7 @@ enum tx_phy_bandwidth {
 
 #define MT_MSDU_ID_VALID		BIT(15)
 
+#define MT_TXD_LEN_MASK			GENMASK(11, 0)
 #define MT_TXD_LEN_MSDU_LAST		BIT(14)
 #define MT_TXD_LEN_AMSDU_LAST		BIT(15)
 
-- 
cgit v1.2.3-59-g8ed1b


From 89829c9e65ab680f7e5a1658cb74bc6316ab036e Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 9 Apr 2020 13:14:57 +0200
Subject: mt76: mt7663: fix DMA unmap length

Fix DMA unmap length for mt7663e devices in mt7615_txp_skb_unmap_hw

Fixes: f40ac0f3d3c0 ("mt76: mt7615: introduce mt7663e support")
Co-developed-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Co-developed-by: Soul Huang <soul.huang@mediatek.com>
Signed-off-by: Soul Huang <soul.huang@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c | 20 ++++++++++++++------
 drivers/net/wireless/mediatek/mt76/mt7615/mac.h |  2 ++
 2 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index b7a96d514656..c38bc395c5a3 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -686,15 +686,18 @@ mt7615_txp_skb_unmap_fw(struct mt76_dev *dev, struct mt7615_fw_txp *txp)
 static void
 mt7615_txp_skb_unmap_hw(struct mt76_dev *dev, struct mt7615_hw_txp *txp)
 {
+	u32 last_mask;
 	int i;
 
+	last_mask = is_mt7663(dev) ? MT_TXD_LEN_LAST : MT_TXD_LEN_MSDU_LAST;
+
 	for (i = 0; i < ARRAY_SIZE(txp->ptr); i++) {
 		struct mt7615_txp_ptr *ptr = &txp->ptr[i];
 		bool last;
 		u16 len;
 
 		len = le16_to_cpu(ptr->len0);
-		last = len & MT_TXD_LEN_MSDU_LAST;
+		last = len & last_mask;
 		len &= MT_TXD_LEN_MASK;
 		dma_unmap_single(dev->dev, le32_to_cpu(ptr->buf0), len,
 				 DMA_TO_DEVICE);
@@ -702,7 +705,7 @@ mt7615_txp_skb_unmap_hw(struct mt76_dev *dev, struct mt7615_hw_txp *txp)
 			break;
 
 		len = le16_to_cpu(ptr->len1);
-		last = len & MT_TXD_LEN_MSDU_LAST;
+		last = len & last_mask;
 		len &= MT_TXD_LEN_MASK;
 		dma_unmap_single(dev->dev, le32_to_cpu(ptr->buf1), len,
 				 DMA_TO_DEVICE);
@@ -1105,21 +1108,26 @@ mt7615_write_hw_txp(struct mt7615_dev *dev, struct mt76_tx_info *tx_info,
 {
 	struct mt7615_hw_txp *txp = txp_ptr;
 	struct mt7615_txp_ptr *ptr = &txp->ptr[0];
-	int nbuf = tx_info->nbuf - 1;
-	int i;
+	int i, nbuf = tx_info->nbuf - 1;
+	u32 last_mask;
 
 	tx_info->buf[0].len = MT_TXD_SIZE + sizeof(*txp);
 	tx_info->nbuf = 1;
 
 	txp->msdu_id[0] = cpu_to_le16(id | MT_MSDU_ID_VALID);
 
+	if (is_mt7663(&dev->mt76))
+		last_mask = MT_TXD_LEN_LAST;
+	else
+		last_mask = MT_TXD_LEN_AMSDU_LAST |
+			    MT_TXD_LEN_MSDU_LAST;
+
 	for (i = 0; i < nbuf; i++) {
 		u16 len = tx_info->buf[i + 1].len & MT_TXD_LEN_MASK;
 		u32 addr = tx_info->buf[i + 1].addr;
 
 		if (i == nbuf - 1)
-			len |= MT_TXD_LEN_MSDU_LAST |
-			       MT_TXD_LEN_AMSDU_LAST;
+			len |= last_mask;
 
 		if (i & 1) {
 			ptr->buf1 = cpu_to_le32(addr);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.h b/drivers/net/wireless/mediatek/mt76/mt7615/mac.h
index 6260f97432a0..8ee57d220d71 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.h
@@ -256,6 +256,8 @@ enum tx_phy_bandwidth {
 #define MT_TXD_LEN_MASK			GENMASK(11, 0)
 #define MT_TXD_LEN_MSDU_LAST		BIT(14)
 #define MT_TXD_LEN_AMSDU_LAST		BIT(15)
+/* mt7663 */
+#define MT_TXD_LEN_LAST			BIT(15)
 
 struct mt7615_txp_ptr {
 	__le32 buf0;
-- 
cgit v1.2.3-59-g8ed1b


From 9b90ab32f871eae0de982a6567fe32039a756af5 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Thu, 9 Apr 2020 14:37:50 +0200
Subject: mt76: mt7615: rework IRQ handling to prepare for MSI support

With MSI interrupts, IRQs must not be enabled from within the IRQ handler,
because that can lead to lost events.
Defer IRQ processing to a tasklet, which is also responsible for enabling
IRQs (to avoid race conditions against the handler)

Co-developed-by: Soul Huang <Soul.Huang@mediatek.com>
Acked-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Soul Huang <soul.huang@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mmio.c          |  3 ++-
 drivers/net/wireless/mediatek/mt76/mt7615/init.c   |  2 ++
 drivers/net/wireless/mediatek/mt76/mt7615/mmio.c   | 29 ++++++++++++++++------
 drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h |  9 +++----
 4 files changed, 29 insertions(+), 14 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mmio.c b/drivers/net/wireless/mediatek/mt76/mmio.c
index 7ead6620bb8b..26353b6bce97 100644
--- a/drivers/net/wireless/mediatek/mt76/mmio.c
+++ b/drivers/net/wireless/mediatek/mt76/mmio.c
@@ -73,7 +73,8 @@ void mt76_set_irq_mask(struct mt76_dev *dev, u32 addr,
 	spin_lock_irqsave(&dev->mmio.irq_lock, flags);
 	dev->mmio.irqmask &= ~clear;
 	dev->mmio.irqmask |= set;
-	mt76_mmio_wr(dev, addr, dev->mmio.irqmask);
+	if (addr)
+		mt76_mmio_wr(dev, addr, dev->mmio.irqmask);
 	spin_unlock_irqrestore(&dev->mmio.irq_lock, flags);
 }
 EXPORT_SYMBOL_GPL(mt76_set_irq_mask);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
index 96b7c6284833..cb626a2d9197 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
@@ -561,5 +561,7 @@ void mt7615_unregister_device(struct mt7615_dev *dev)
 	spin_unlock_bh(&dev->token_lock);
 	idr_destroy(&dev->token);
 
+	tasklet_disable(&dev->irq_tasklet);
+
 	mt76_free_device(&dev->mt76);
 }
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c b/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c
index 3849bb6b49d0..e9f9cda6bb59 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c
@@ -80,30 +80,42 @@ mt7615_rx_poll_complete(struct mt76_dev *mdev, enum mt76_rxq_id q)
 static irqreturn_t mt7615_irq_handler(int irq, void *dev_instance)
 {
 	struct mt7615_dev *dev = dev_instance;
-	u32 intr;
 
-	intr = mt76_rr(dev, MT_INT_SOURCE_CSR);
-	mt76_wr(dev, MT_INT_SOURCE_CSR, intr);
+	mt76_wr(dev, MT_INT_MASK_CSR, 0);
 
 	if (!test_bit(MT76_STATE_INITIALIZED, &dev->mphy.state))
 		return IRQ_NONE;
 
-	trace_dev_irq(&dev->mt76, intr, dev->mt76.mmio.irqmask);
+	tasklet_schedule(&dev->irq_tasklet);
+
+	return IRQ_HANDLED;
+}
+
+static void mt7615_irq_tasklet(unsigned long data)
+{
+	struct mt7615_dev *dev = (struct mt7615_dev *)data;
+	u32 intr, mask = 0;
+
+	mt76_wr(dev, MT_INT_MASK_CSR, 0);
 
+	intr = mt76_rr(dev, MT_INT_SOURCE_CSR);
+	mt76_wr(dev, MT_INT_SOURCE_CSR, intr);
+
+	trace_dev_irq(&dev->mt76, intr, dev->mt76.mmio.irqmask);
 	intr &= dev->mt76.mmio.irqmask;
 
 	if (intr & MT_INT_TX_DONE_ALL) {
-		mt7615_irq_disable(dev, MT_INT_TX_DONE_ALL);
+		mask |= MT_INT_TX_DONE_ALL;
 		napi_schedule(&dev->mt76.tx_napi);
 	}
 
 	if (intr & MT_INT_RX_DONE(0)) {
-		mt7615_irq_disable(dev, MT_INT_RX_DONE(0));
+		mask |= MT_INT_RX_DONE(0);
 		napi_schedule(&dev->mt76.napi[0]);
 	}
 
 	if (intr & MT_INT_RX_DONE(1)) {
-		mt7615_irq_disable(dev, MT_INT_RX_DONE(1));
+		mask |= MT_INT_RX_DONE(1);
 		napi_schedule(&dev->mt76.napi[1]);
 	}
 
@@ -117,7 +129,7 @@ static irqreturn_t mt7615_irq_handler(int irq, void *dev_instance)
 		}
 	}
 
-	return IRQ_HANDLED;
+	mt76_set_irq_mask(&dev->mt76, MT_INT_MASK_CSR, mask, 0);
 }
 
 int mt7615_mmio_probe(struct device *pdev, void __iomem *mem_base,
@@ -154,6 +166,7 @@ int mt7615_mmio_probe(struct device *pdev, void __iomem *mem_base,
 
 	dev = container_of(mdev, struct mt7615_dev, mt76);
 	mt76_mmio_init(&dev->mt76, mem_base);
+	tasklet_init(&dev->irq_tasklet, mt7615_irq_tasklet, (unsigned long)dev);
 
 	dev->reg_map = map;
 	dev->ops = ops;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index 4f0d29e5e595..10a98d38f77e 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -229,6 +229,8 @@ struct mt7615_dev {
 		struct mt76_phy mphy;
 	};
 
+	struct tasklet_struct irq_tasklet;
+
 	struct mt7615_phy phy;
 	u32 vif_mask;
 	u32 omac_mask;
@@ -404,12 +406,9 @@ static inline bool is_mt7663(struct mt76_dev *dev)
 
 static inline void mt7615_irq_enable(struct mt7615_dev *dev, u32 mask)
 {
-	mt76_set_irq_mask(&dev->mt76, MT_INT_MASK_CSR, 0, mask);
-}
+	mt76_set_irq_mask(&dev->mt76, 0, 0, mask);
 
-static inline void mt7615_irq_disable(struct mt7615_dev *dev, u32 mask)
-{
-	mt76_set_irq_mask(&dev->mt76, MT_INT_MASK_CSR, mask, 0);
+	tasklet_schedule(&dev->irq_tasklet);
 }
 
 static inline bool mt7615_firmware_offload(struct mt7615_dev *dev)
-- 
cgit v1.2.3-59-g8ed1b


From 9fbb4b8621d7cac34b5b4b20462341b00d9d596c Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 9 Apr 2020 18:37:04 +0200
Subject: mt76: mt7615: enable MSI by default

Enable MSI/MSI-X PCI interrupts by default. This patch has been tested
using Banana Pi r64 board

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/pci.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/pci.c b/drivers/net/wireless/mediatek/mt76/mt7615/pci.c
index c8d0f893a47f..4f8a3c637a98 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/pci.c
@@ -33,6 +33,10 @@ static int mt7615_pci_probe(struct pci_dev *pdev,
 
 	pci_set_master(pdev);
 
+	ret = pci_alloc_irq_vectors(pdev, 1, 1, PCI_IRQ_ALL_TYPES);
+	if (ret < 0)
+		return ret;
+
 	ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
 	if (ret)
 		return ret;
@@ -48,6 +52,7 @@ static void mt7615_pci_remove(struct pci_dev *pdev)
 	struct mt7615_dev *dev = container_of(mdev, struct mt7615_dev, mt76);
 
 	mt7615_unregister_device(dev);
+	pci_free_irq_vectors(pdev);
 }
 
 struct pci_driver mt7615_pci_driver = {
-- 
cgit v1.2.3-59-g8ed1b


From 5da612090d9a7b1481b7587b3b1deede30a11baf Mon Sep 17 00:00:00 2001
From: Jules Irenge <jbi.octave@gmail.com>
Date: Sat, 11 Apr 2020 01:19:26 +0100
Subject: mt76: remove unnecessary annotations

Sparse report warnings at mt76_tx_status_unlock() and mt76_tx_status_lock()

warning: context imbalance in mt76_tx_status_lock() - wrong count at exit
warning: context imbalance in mt76_tx_status_unlock() - unexpected unlock

The root cause is the additional __acquire(&dev->status_list.lock)
and __release(&dev->status_list.unlock) called
 from inside mt76_tx_status_lock() and mt76_tx_status_unlock().

Remove __acquire(&dev->status_list.lock) annotation
Remove __releases(&dev->status_list.unlock)
Correct &dev->status_list.unlock to &dev->status_list.lock
	-unlock not defined in the sk_buff_head struct

Signed-off-by: Jules Irenge <jbi.octave@gmail.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/tx.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/tx.c b/drivers/net/wireless/mediatek/mt76/tx.c
index eff522dbda34..fca38ea2441f 100644
--- a/drivers/net/wireless/mediatek/mt76/tx.c
+++ b/drivers/net/wireless/mediatek/mt76/tx.c
@@ -101,19 +101,17 @@ mt76_tx_status_lock(struct mt76_dev *dev, struct sk_buff_head *list)
 {
 	__skb_queue_head_init(list);
 	spin_lock_bh(&dev->status_list.lock);
-	__acquire(&dev->status_list.lock);
 }
 EXPORT_SYMBOL_GPL(mt76_tx_status_lock);
 
 void
 mt76_tx_status_unlock(struct mt76_dev *dev, struct sk_buff_head *list)
-		      __releases(&dev->status_list.unlock)
+		      __releases(&dev->status_list.lock)
 {
 	struct ieee80211_hw *hw;
 	struct sk_buff *skb;
 
 	spin_unlock_bh(&dev->status_list.lock);
-	__release(&dev->status_list.unlock);
 
 	while ((skb = __skb_dequeue(list)) != NULL) {
 		hw = mt76_tx_status_get_hw(dev, skb);
-- 
cgit v1.2.3-59-g8ed1b


From 4fcf6e770b4487db3dbcf48993a36f16f8836680 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Mon, 13 Apr 2020 16:28:48 +0200
Subject: mt76: mt7615: fix possible deadlock in mt7615_stop

make mac_work per phy instead of per device and fix a possible deadlock
in mt7615_stop since mt7615_mac_work runs holding mt76 mutex

Fixes: fdd2e570764c2 ("mt76: mt7615: add dual-phy support for mac80211 ops")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/init.c   |  3 +-
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c    | 58 ++++++++++++----------
 drivers/net/wireless/mediatek/mt76/mt7615/main.c   | 19 +++----
 drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h |  4 +-
 4 files changed, 45 insertions(+), 39 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
index cb626a2d9197..f06de74d009d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
@@ -429,6 +429,7 @@ int mt7615_register_ext_phy(struct mt7615_dev *dev)
 	if (phy)
 		return 0;
 
+	INIT_DELAYED_WORK(&phy->mac_work, mt7615_mac_work);
 	INIT_DELAYED_WORK(&phy->scan_work, mt7615_scan_work);
 	skb_queue_head_init(&phy->scan_event_list);
 
@@ -487,7 +488,7 @@ void mt7615_init_device(struct mt7615_dev *dev)
 	dev->phy.dev = dev;
 	dev->phy.mt76 = &dev->mt76.phy;
 	dev->mt76.phy.priv = &dev->phy;
-	INIT_DELAYED_WORK(&dev->mt76.mac_work, mt7615_mac_work);
+	INIT_DELAYED_WORK(&dev->phy.mac_work, mt7615_mac_work);
 	INIT_DELAYED_WORK(&dev->phy.scan_work, mt7615_scan_work);
 	skb_queue_head_init(&dev->phy.scan_event_list);
 	INIT_LIST_HEAD(&dev->sta_poll_list);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index c38bc395c5a3..5614cd691885 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -1815,31 +1815,27 @@ mt7615_mac_update_mib_stats(struct mt7615_phy *phy)
 
 void mt7615_mac_work(struct work_struct *work)
 {
-	struct mt7615_dev *dev;
-	struct mt7615_phy *ext_phy;
+	struct mt7615_phy *phy;
+	struct mt76_dev *mdev;
 
-	dev = (struct mt7615_dev *)container_of(work, struct mt76_dev,
+	phy = (struct mt7615_phy *)container_of(work, struct mt7615_phy,
 						mac_work.work);
+	mdev = &phy->dev->mt76;
 
-	mutex_lock(&dev->mt76.mutex);
-	mt76_update_survey(&dev->mt76);
-	if (++dev->mac_work_count == 5) {
-		ext_phy = mt7615_ext_phy(dev);
-
-		mt7615_mac_update_mib_stats(&dev->phy);
-		mt7615_mac_scs_check(&dev->phy);
-		if (ext_phy) {
-			mt7615_mac_update_mib_stats(ext_phy);
-			mt7615_mac_scs_check(ext_phy);
-		}
+	mutex_lock(&mdev->mutex);
 
-		dev->mac_work_count = 0;
+	mt76_update_survey(mdev);
+	if (++phy->mac_work_count == 5) {
+		phy->mac_work_count = 0;
+
+		mt7615_mac_update_mib_stats(phy);
+		mt7615_mac_scs_check(phy);
 	}
 
-	mutex_unlock(&dev->mt76.mutex);
+	mutex_unlock(&mdev->mutex);
 
-	mt76_tx_status_check(&dev->mt76, NULL, false);
-	ieee80211_queue_delayed_work(mt76_hw(dev), &dev->mt76.mac_work,
+	mt76_tx_status_check(mdev, NULL, false);
+	ieee80211_queue_delayed_work(phy->mt76->hw, &phy->mac_work,
 				     MT7615_WATCHDOG_TIME);
 }
 
@@ -1902,26 +1898,32 @@ mt7615_dma_reset(struct mt7615_dev *dev)
 
 void mt7615_mac_reset_work(struct work_struct *work)
 {
+	struct mt7615_phy *phy2;
+	struct mt76_phy *ext_phy;
 	struct mt7615_dev *dev;
 
 	dev = container_of(work, struct mt7615_dev, reset_work);
+	ext_phy = dev->mt76.phy2;
+	phy2 = ext_phy ? ext_phy->priv : NULL;
 
 	if (!(READ_ONCE(dev->reset_state) & MT_MCU_CMD_STOP_PDMA))
 		return;
 
 	ieee80211_stop_queues(mt76_hw(dev));
-	if (dev->mt76.phy2)
-		ieee80211_stop_queues(dev->mt76.phy2->hw);
+	if (ext_phy)
+		ieee80211_stop_queues(ext_phy->hw);
 
 	set_bit(MT76_RESET, &dev->mphy.state);
 	set_bit(MT76_MCU_RESET, &dev->mphy.state);
 	wake_up(&dev->mt76.mcu.wait);
-	cancel_delayed_work_sync(&dev->mt76.mac_work);
+	cancel_delayed_work_sync(&dev->phy.mac_work);
+	if (phy2)
+		cancel_delayed_work_sync(&phy2->mac_work);
 
 	/* lock/unlock all queues to ensure that no tx is pending */
 	mt76_txq_schedule_all(&dev->mphy);
-	if (dev->mt76.phy2)
-		mt76_txq_schedule_all(dev->mt76.phy2);
+	if (ext_phy)
+		mt76_txq_schedule_all(ext_phy);
 
 	tasklet_disable(&dev->mt76.tx_tasklet);
 	napi_disable(&dev->mt76.napi[0]);
@@ -1955,8 +1957,8 @@ void mt7615_mac_reset_work(struct work_struct *work)
 	napi_schedule(&dev->mt76.napi[1]);
 
 	ieee80211_wake_queues(mt76_hw(dev));
-	if (dev->mt76.phy2)
-		ieee80211_wake_queues(dev->mt76.phy2->hw);
+	if (ext_phy)
+		ieee80211_wake_queues(ext_phy->hw);
 
 	mt76_wr(dev, MT_MCU_INT_EVENT, MT_MCU_INT_EVENT_RESET_DONE);
 	mt7615_wait_reset_state(dev, MT_MCU_CMD_NORMAL_STATE);
@@ -1965,8 +1967,12 @@ void mt7615_mac_reset_work(struct work_struct *work)
 
 	mt7615_update_beacons(dev);
 
-	ieee80211_queue_delayed_work(mt76_hw(dev), &dev->mt76.mac_work,
+	ieee80211_queue_delayed_work(mt76_hw(dev), &dev->phy.mac_work,
 				     MT7615_WATCHDOG_TIME);
+	if (phy2)
+		ieee80211_queue_delayed_work(ext_phy->hw, &phy2->mac_work,
+					     MT7615_WATCHDOG_TIME);
+
 }
 
 static void mt7615_dfs_stop_radar_detector(struct mt7615_phy *phy)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
index b346080458bc..33f67c7ccbf8 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
@@ -55,15 +55,12 @@ static int mt7615_start(struct ieee80211_hw *hw)
 
 	set_bit(MT76_STATE_RUNNING, &phy->mt76->state);
 
-	if (running)
-		goto out;
-
-	mt7615_mac_reset_counters(dev);
-
-	ieee80211_queue_delayed_work(mt76_hw(dev), &dev->mt76.mac_work,
+	ieee80211_queue_delayed_work(hw, &phy->mac_work,
 				     MT7615_WATCHDOG_TIME);
 
-out:
+	if (!running)
+		mt7615_mac_reset_counters(dev);
+
 	mutex_unlock(&dev->mt76.mutex);
 
 	return 0;
@@ -74,6 +71,8 @@ static void mt7615_stop(struct ieee80211_hw *hw)
 	struct mt7615_dev *dev = mt7615_hw_dev(hw);
 	struct mt7615_phy *phy = mt7615_hw_phy(hw);
 
+	cancel_delayed_work_sync(&phy->mac_work);
+
 	mutex_lock(&dev->mt76.mutex);
 
 	clear_bit(MT76_STATE_RUNNING, &phy->mt76->state);
@@ -85,8 +84,6 @@ static void mt7615_stop(struct ieee80211_hw *hw)
 	}
 
 	if (!mt7615_dev_running(dev)) {
-		cancel_delayed_work_sync(&dev->mt76.mac_work);
-
 		mt7615_mcu_set_pm(dev, 0, 1);
 		mt7615_mcu_set_mac_enable(dev, 0, false);
 	}
@@ -245,7 +242,7 @@ static int mt7615_set_channel(struct mt7615_phy *phy)
 	bool ext_phy = phy != &dev->phy;
 	int ret;
 
-	cancel_delayed_work_sync(&dev->mt76.mac_work);
+	cancel_delayed_work_sync(&phy->mac_work);
 
 	mutex_lock(&dev->mt76.mutex);
 	set_bit(MT76_RESET, &phy->mt76->state);
@@ -276,7 +273,7 @@ out:
 	mutex_unlock(&dev->mt76.mutex);
 
 	mt76_txq_schedule_all(phy->mt76);
-	ieee80211_queue_delayed_work(mt76_hw(dev), &dev->mt76.mac_work,
+	ieee80211_queue_delayed_work(phy->mt76->hw, &phy->mac_work,
 				     MT7615_WATCHDOG_TIME);
 	return ret;
 }
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index 10a98d38f77e..57c9c1ef8ffc 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -192,6 +192,9 @@ struct mt7615_phy {
 
 	struct mib_stats mib;
 
+	struct delayed_work mac_work;
+	u8 mac_work_count;
+
 	struct sk_buff_head scan_event_list;
 	struct delayed_work scan_work;
 };
@@ -259,7 +262,6 @@ struct mt7615_dev {
 	} radar_pattern;
 	u32 hw_pattern;
 
-	u8 mac_work_count;
 	bool fw_debug;
 	bool flash_eeprom;
 
-- 
cgit v1.2.3-59-g8ed1b


From e90354e0452d33f3dc77d7f5c0ff7033f97e1fbf Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 16 Apr 2020 16:32:50 +0200
Subject: mt76: mt7615: move core shared code in mt7615-common module

Create mt7615-common module in order to collect shared code between usb
and mmio code. Move the following source files in mt7615-common module:
- main.c
- init.c
- mcu.c
- mac.c
- debugfs.c
- eeprom.c
- trace.c

Create the following source files for mmio only source code and move them
in mt7615e module:
- pci_init.c
- dma.c
- pci_mac.c

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/Makefile        |   2 +-
 drivers/net/wireless/mediatek/mt76/mt7615/Kconfig  |   7 +-
 drivers/net/wireless/mediatek/mt76/mt7615/Makefile |   7 +-
 .../net/wireless/mediatek/mt76/mt7615/debugfs.c    |   1 +
 drivers/net/wireless/mediatek/mt76/mt7615/dma.c    |  39 ----
 drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c |   1 +
 drivers/net/wireless/mediatek/mt76/mt7615/init.c   | 194 ++----------------
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c    | 221 +++++----------------
 drivers/net/wireless/mediatek/mt76/mt7615/main.c   |  32 +--
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c    |  15 +-
 drivers/net/wireless/mediatek/mt76/mt7615/mmio.c   |  30 +++
 drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h |  12 +-
 .../net/wireless/mediatek/mt76/mt7615/pci_init.c   | 189 ++++++++++++++++++
 .../net/wireless/mediatek/mt76/mt7615/pci_mac.c    | 184 +++++++++++++++++
 14 files changed, 503 insertions(+), 431 deletions(-)
 create mode 100644 drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c
 create mode 100644 drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c

diff --git a/drivers/net/wireless/mediatek/mt76/Makefile b/drivers/net/wireless/mediatek/mt76/Makefile
index d7a1ddc9e407..a1dfafec431b 100644
--- a/drivers/net/wireless/mediatek/mt76/Makefile
+++ b/drivers/net/wireless/mediatek/mt76/Makefile
@@ -26,4 +26,4 @@ mt76x02-usb-y := mt76x02_usb_mcu.o mt76x02_usb_core.o
 obj-$(CONFIG_MT76x0_COMMON) += mt76x0/
 obj-$(CONFIG_MT76x2_COMMON) += mt76x2/
 obj-$(CONFIG_MT7603E) += mt7603/
-obj-$(CONFIG_MT7615E) += mt7615/
+obj-$(CONFIG_MT7615_COMMON) += mt7615/
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/Kconfig b/drivers/net/wireless/mediatek/mt76/mt7615/Kconfig
index 6afd4aea67ed..16385767d8b9 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/Kconfig
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/Kconfig
@@ -1,7 +1,12 @@
 # SPDX-License-Identifier: GPL-2.0-only
+
+config MT7615_COMMON
+	tristate
+	select MT76_CORE
+
 config MT7615E
 	tristate "MediaTek MT7615E (PCIe) support"
-	select MT76_CORE
+	select MT7615_COMMON
 	depends on MAC80211
 	depends on PCI
 	help
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/Makefile b/drivers/net/wireless/mediatek/mt76/mt7615/Makefile
index 5c6a220ed7e3..2a7937b4394f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/Makefile
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/Makefile
@@ -1,9 +1,12 @@
 #SPDX-License-Identifier: ISC
 
+obj-$(CONFIG_MT7615_COMMON) += mt7615-common.o
 obj-$(CONFIG_MT7615E) += mt7615e.o
 
 CFLAGS_trace.o := -I$(src)
 
-mt7615e-y := pci.o init.o dma.o eeprom.o main.o mcu.o mac.o mmio.o \
-	     debugfs.o trace.o
+mt7615-common-y := main.o init.o mcu.o eeprom.o mac.o \
+		   debugfs.o trace.o
+
+mt7615e-y := pci.o pci_init.o dma.o pci_mac.o mmio.o
 mt7615e-$(CONFIG_MT7622_WMAC) += soc.o
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c
index 2163a22967c7..150036488e3f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c
@@ -325,3 +325,4 @@ int mt7615_init_debugfs(struct mt7615_dev *dev)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(mt7615_init_debugfs);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/dma.c b/drivers/net/wireless/mediatek/mt76/mt7615/dma.c
index b19f208e3d54..b0ba0e7807bf 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/dma.c
@@ -94,45 +94,6 @@ mt7615_init_tx_queues(struct mt7615_dev *dev)
 	return 0;
 }
 
-void mt7615_queue_rx_skb(struct mt76_dev *mdev, enum mt76_rxq_id q,
-			 struct sk_buff *skb)
-{
-	struct mt7615_dev *dev = container_of(mdev, struct mt7615_dev, mt76);
-	__le32 *rxd = (__le32 *)skb->data;
-	__le32 *end = (__le32 *)&skb->data[skb->len];
-	enum rx_pkt_type type;
-	u16 flag;
-
-	type = FIELD_GET(MT_RXD0_PKT_TYPE, le32_to_cpu(rxd[0]));
-	flag = FIELD_GET(MT_RXD0_PKT_FLAG, le32_to_cpu(rxd[0]));
-	if (type == PKT_TYPE_RX_EVENT && flag == 0x1)
-		type = PKT_TYPE_NORMAL_MCU;
-
-	switch (type) {
-	case PKT_TYPE_TXS:
-		for (rxd++; rxd + 7 <= end; rxd += 7)
-			mt7615_mac_add_txs(dev, rxd);
-		dev_kfree_skb(skb);
-		break;
-	case PKT_TYPE_TXRX_NOTIFY:
-		mt7615_mac_tx_free(dev, skb);
-		break;
-	case PKT_TYPE_RX_EVENT:
-		mt7615_mcu_rx_event(dev, skb);
-		break;
-	case PKT_TYPE_NORMAL_MCU:
-	case PKT_TYPE_NORMAL:
-		if (!mt7615_mac_fill_rx(dev, skb)) {
-			mt76_rx(&dev->mt76, q, skb);
-			return;
-		}
-		/* fall through */
-	default:
-		dev_kfree_skb(skb);
-		break;
-	}
-}
-
 static void
 mt7615_tx_cleanup(struct mt7615_dev *dev)
 {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c
index 7a09427463b0..521705015036 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c
@@ -297,3 +297,4 @@ int mt7615_eeprom_init(struct mt7615_dev *dev, u32 addr)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(mt7615_eeprom_init);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
index f06de74d009d..9d9f73b4561e 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
@@ -12,12 +12,13 @@
 #include "mac.h"
 #include "eeprom.h"
 
-static void mt7615_phy_init(struct mt7615_dev *dev)
+void mt7615_phy_init(struct mt7615_dev *dev)
 {
 	/* disable rf low power beacon mode */
 	mt76_set(dev, MT_WF_PHY_WF2_RFCTRL0(0), MT_WF_PHY_WF2_RFCTRL0_LPBCN_EN);
 	mt76_set(dev, MT_WF_PHY_WF2_RFCTRL0(1), MT_WF_PHY_WF2_RFCTRL0_LPBCN_EN);
 }
+EXPORT_SYMBOL_GPL(mt7615_phy_init);
 
 static void
 mt7615_init_mac_chain(struct mt7615_dev *dev, int chain)
@@ -77,7 +78,7 @@ mt7615_init_mac_chain(struct mt7615_dev *dev, int chain)
 	}
 }
 
-static void mt7615_mac_init(struct mt7615_dev *dev)
+void mt7615_mac_init(struct mt7615_dev *dev)
 {
 	int i;
 
@@ -124,6 +125,7 @@ static void mt7615_mac_init(struct mt7615_dev *dev)
 		mt7615_init_mac_chain(dev, 1);
 	}
 }
+EXPORT_SYMBOL_GPL(mt7615_mac_init);
 
 bool mt7615_wait_for_mcu_init(struct mt7615_dev *dev)
 {
@@ -131,68 +133,7 @@ bool mt7615_wait_for_mcu_init(struct mt7615_dev *dev)
 
 	return test_bit(MT76_STATE_MCU_RUNNING, &dev->mphy.state);
 }
-
-static void mt7615_init_work(struct work_struct *work)
-{
-	struct mt7615_dev *dev = container_of(work, struct mt7615_dev, mcu_work);
-
-	if (mt7615_mcu_init(dev))
-		return;
-
-	mt7615_mcu_set_eeprom(dev);
-	mt7615_mac_init(dev);
-	mt7615_phy_init(dev);
-	mt7615_mcu_del_wtbl_all(dev);
-
-	if (!mt7615_firmware_offload(dev)) {
-		struct wiphy *wiphy = mt76_hw(dev)->wiphy;
-
-		dev->ops->hw_scan = NULL;
-		dev->ops->cancel_hw_scan = NULL;
-		dev->ops->sched_scan_start = NULL;
-		dev->ops->sched_scan_stop = NULL;
-
-		wiphy->max_sched_scan_plan_interval = 0;
-		wiphy->max_sched_scan_ie_len = 0;
-		wiphy->max_scan_ie_len = IEEE80211_MAX_DATA_LEN;
-		wiphy->max_sched_scan_ssids = 0;
-		wiphy->max_match_sets = 0;
-		wiphy->max_sched_scan_reqs = 0;
-	}
-}
-
-static int mt7615_init_hardware(struct mt7615_dev *dev)
-{
-	u32 addr = mt7615_reg_map(dev, MT_EFUSE_BASE);
-	int ret, idx;
-
-	mt76_wr(dev, MT_INT_SOURCE_CSR, ~0);
-
-	INIT_WORK(&dev->mcu_work, mt7615_init_work);
-	spin_lock_init(&dev->token_lock);
-	idr_init(&dev->token);
-
-	ret = mt7615_eeprom_init(dev, addr);
-	if (ret < 0)
-		return ret;
-
-	ret = mt7615_dma_init(dev);
-	if (ret)
-		return ret;
-
-	set_bit(MT76_STATE_INITIALIZED, &dev->mphy.state);
-
-	/* Beacon and mgmt frames should occupy wcid 0 */
-	idx = mt76_wcid_alloc(dev->mt76.wcid_mask, MT7615_WTBL_STA - 1);
-	if (idx)
-		return -ENOSPC;
-
-	dev->mt76.global_wcid.idx = idx;
-	dev->mt76.global_wcid.hw_key_idx = -1;
-	rcu_assign_pointer(dev->mt76.wcid[idx], &dev->mt76.global_wcid);
-
-	return 0;
-}
+EXPORT_SYMBOL_GPL(mt7615_wait_for_mcu_init);
 
 #define CCK_RATE(_idx, _rate) {						\
 	.bitrate = _rate,						\
@@ -207,7 +148,7 @@ static int mt7615_init_hardware(struct mt7615_dev *dev)
 	.hw_value_short = (MT_PHY_TYPE_OFDM << 8) | (_idx),		\
 }
 
-static struct ieee80211_rate mt7615_rates[] = {
+struct ieee80211_rate mt7615_rates[] = {
 	CCK_RATE(0, 10),
 	CCK_RATE(1, 20),
 	CCK_RATE(2, 55),
@@ -221,6 +162,7 @@ static struct ieee80211_rate mt7615_rates[] = {
 	OFDM_RATE(8,  480),
 	OFDM_RATE(12, 540),
 };
+EXPORT_SYMBOL_GPL(mt7615_rates);
 
 static const struct ieee80211_iface_limit if_limits[] = {
 	{
@@ -246,61 +188,8 @@ static const struct ieee80211_iface_combination if_comb[] = {
 	}
 };
 
-static void
-mt7615_led_set_config(struct led_classdev *led_cdev,
-		      u8 delay_on, u8 delay_off)
-{
-	struct mt7615_dev *dev;
-	struct mt76_dev *mt76;
-	u32 val, addr;
-
-	mt76 = container_of(led_cdev, struct mt76_dev, led_cdev);
-	dev = container_of(mt76, struct mt7615_dev, mt76);
-	val = FIELD_PREP(MT_LED_STATUS_DURATION, 0xffff) |
-	      FIELD_PREP(MT_LED_STATUS_OFF, delay_off) |
-	      FIELD_PREP(MT_LED_STATUS_ON, delay_on);
-
-	addr = mt7615_reg_map(dev, MT_LED_STATUS_0(mt76->led_pin));
-	mt76_wr(dev, addr, val);
-	addr = mt7615_reg_map(dev, MT_LED_STATUS_1(mt76->led_pin));
-	mt76_wr(dev, addr, val);
-
-	val = MT_LED_CTRL_REPLAY(mt76->led_pin) |
-	      MT_LED_CTRL_KICK(mt76->led_pin);
-	if (mt76->led_al)
-		val |= MT_LED_CTRL_POLARITY(mt76->led_pin);
-	addr = mt7615_reg_map(dev, MT_LED_CTRL);
-	mt76_wr(dev, addr, val);
-}
-
-static int
-mt7615_led_set_blink(struct led_classdev *led_cdev,
-		     unsigned long *delay_on,
-		     unsigned long *delay_off)
-{
-	u8 delta_on, delta_off;
-
-	delta_off = max_t(u8, *delay_off / 10, 1);
-	delta_on = max_t(u8, *delay_on / 10, 1);
-
-	mt7615_led_set_config(led_cdev, delta_on, delta_off);
-
-	return 0;
-}
-
-static void
-mt7615_led_set_brightness(struct led_classdev *led_cdev,
-			  enum led_brightness brightness)
-{
-	if (!brightness)
-		mt7615_led_set_config(led_cdev, 0, 0xff);
-	else
-		mt7615_led_set_config(led_cdev, 0xff, 0);
-}
-
-static void
-mt7615_init_txpower(struct mt7615_dev *dev,
-		    struct ieee80211_supported_band *sband)
+void mt7615_init_txpower(struct mt7615_dev *dev,
+			 struct ieee80211_supported_band *sband)
 {
 	int i, n_chains = hweight8(dev->mphy.antenna_mask), target_chains;
 	u8 *eep = (u8 *)dev->mt76.eeprom.data;
@@ -326,6 +215,7 @@ mt7615_init_txpower(struct mt7615_dev *dev,
 		chan->orig_mpwr = target_power;
 	}
 }
+EXPORT_SYMBOL_GPL(mt7615_init_txpower);
 
 static void
 mt7615_regd_notifier(struct wiphy *wiphy,
@@ -467,6 +357,7 @@ int mt7615_register_ext_phy(struct mt7615_dev *dev)
 
 	return ret;
 }
+EXPORT_SYMBOL_GPL(mt7615_register_ext_phy);
 
 void mt7615_unregister_ext_phy(struct mt7615_dev *dev)
 {
@@ -480,6 +371,7 @@ void mt7615_unregister_ext_phy(struct mt7615_dev *dev)
 	mt76_unregister_phy(mphy);
 	ieee80211_free_hw(mphy->hw);
 }
+EXPORT_SYMBOL_GPL(mt7615_unregister_ext_phy);
 
 void mt7615_init_device(struct mt7615_dev *dev)
 {
@@ -505,64 +397,4 @@ void mt7615_init_device(struct mt7615_dev *dev)
 	mt7615_cap_dbdc_disable(dev);
 	dev->phy.dfs_state = -1;
 }
-
-int mt7615_register_device(struct mt7615_dev *dev)
-{
-	int ret;
-
-	mt7615_init_device(dev);
-
-	/* init led callbacks */
-	if (IS_ENABLED(CONFIG_MT76_LEDS)) {
-		dev->mt76.led_cdev.brightness_set = mt7615_led_set_brightness;
-		dev->mt76.led_cdev.blink_set = mt7615_led_set_blink;
-	}
-
-	ret = mt7622_wmac_init(dev);
-	if (ret)
-		return ret;
-
-	ret = mt7615_init_hardware(dev);
-	if (ret)
-		return ret;
-
-	ret = mt76_register_device(&dev->mt76, true, mt7615_rates,
-				   ARRAY_SIZE(mt7615_rates));
-	if (ret)
-		return ret;
-
-	ieee80211_queue_work(mt76_hw(dev), &dev->mcu_work);
-	mt7615_init_txpower(dev, &dev->mphy.sband_2g.sband);
-	mt7615_init_txpower(dev, &dev->mphy.sband_5g.sband);
-
-	return mt7615_init_debugfs(dev);
-}
-
-void mt7615_unregister_device(struct mt7615_dev *dev)
-{
-	struct mt76_txwi_cache *txwi;
-	bool mcu_running;
-	int id;
-
-	mcu_running = mt7615_wait_for_mcu_init(dev);
-
-	mt7615_unregister_ext_phy(dev);
-	mt76_unregister_device(&dev->mt76);
-	if (mcu_running)
-		mt7615_mcu_exit(dev);
-	mt7615_dma_cleanup(dev);
-
-	spin_lock_bh(&dev->token_lock);
-	idr_for_each_entry(&dev->token, txwi, id) {
-		mt7615_txp_skb_unmap(&dev->mt76, txwi);
-		if (txwi->skb)
-			dev_kfree_skb_any(txwi->skb);
-		mt76_put_txwi(&dev->mt76, txwi);
-	}
-	spin_unlock_bh(&dev->token_lock);
-	idr_destroy(&dev->token);
-
-	tasklet_disable(&dev->irq_tasklet);
-
-	mt76_free_device(&dev->mt76);
-}
+EXPORT_SYMBOL_GPL(mt7615_init_device);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index 5614cd691885..adaf52058bc5 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -181,7 +181,7 @@ mt7615_get_status_freq_info(struct mt7615_dev *dev, struct mt76_phy *mphy,
 	status->freq = ieee80211_channel_to_frequency(chfreq, status->band);
 }
 
-int mt7615_mac_fill_rx(struct mt7615_dev *dev, struct sk_buff *skb)
+static int mt7615_mac_fill_rx(struct mt7615_dev *dev, struct sk_buff *skb)
 {
 	struct mt76_rx_status *status = (struct mt76_rx_status *)skb->cb;
 	struct mt76_phy *mphy = &dev->mt76.phy;
@@ -424,40 +424,7 @@ int mt7615_mac_fill_rx(struct mt7615_dev *dev, struct sk_buff *skb)
 void mt7615_sta_ps(struct mt76_dev *mdev, struct ieee80211_sta *sta, bool ps)
 {
 }
-
-void mt7615_tx_complete_skb(struct mt76_dev *mdev, enum mt76_txq_id qid,
-			    struct mt76_queue_entry *e)
-{
-	if (!e->txwi) {
-		dev_kfree_skb_any(e->skb);
-		return;
-	}
-
-	/* error path */
-	if (e->skb == DMA_DUMMY_DATA) {
-		struct mt76_txwi_cache *t;
-		struct mt7615_dev *dev;
-		struct mt7615_txp_common *txp;
-		u16 token;
-
-		dev = container_of(mdev, struct mt7615_dev, mt76);
-		txp = mt7615_txwi_to_txp(mdev, e->txwi);
-
-		if (is_mt7615(&dev->mt76))
-			token = le16_to_cpu(txp->fw.token);
-		else
-			token = le16_to_cpu(txp->hw.msdu_id[0]) &
-				~MT_MSDU_ID_VALID;
-
-		spin_lock_bh(&dev->token_lock);
-		t = idr_remove(&dev->token, token);
-		spin_unlock_bh(&dev->token_lock);
-		e->skb = t ? t->skb : NULL;
-	}
-
-	if (e->skb)
-		mt76_tx_complete_skb(mdev, e->skb);
-}
+EXPORT_SYMBOL_GPL(mt7615_sta_ps);
 
 static u16
 mt7615_mac_tx_rate_val(struct mt7615_dev *dev,
@@ -672,6 +639,7 @@ int mt7615_mac_write_txwi(struct mt7615_dev *dev, __le32 *txwi,
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(mt7615_mac_write_txwi);
 
 static void
 mt7615_txp_skb_unmap_fw(struct mt76_dev *dev, struct mt7615_fw_txp *txp)
@@ -725,6 +693,7 @@ void mt7615_txp_skb_unmap(struct mt76_dev *dev,
 	else
 		mt7615_txp_skb_unmap_hw(dev, &txp->hw);
 }
+EXPORT_SYMBOL_GPL(mt7615_txp_skb_unmap);
 
 bool mt7615_mac_wtbl_update(struct mt7615_dev *dev, int idx, u32 mask)
 {
@@ -810,6 +779,7 @@ void mt7615_mac_sta_poll(struct mt7615_dev *dev)
 
 	rcu_read_unlock();
 }
+EXPORT_SYMBOL_GPL(mt7615_mac_sta_poll);
 
 static void
 mt7615_mac_update_rate_desc(struct mt7615_phy *phy, struct mt7615_sta *sta,
@@ -965,6 +935,7 @@ void mt7615_mac_set_rates(struct mt7615_phy *phy, struct mt7615_sta *sta,
 	sta->rate_count = 2 * MT7615_RATE_RETRY * n_rates;
 	sta->wcid.tx_info |= MT_WCID_TX_INFO_SET;
 }
+EXPORT_SYMBOL_GPL(mt7615_mac_set_rates);
 
 int mt7615_mac_wtbl_update_key(struct mt7615_dev *dev,
 			       struct mt76_wcid *wcid,
@@ -1102,141 +1073,6 @@ out:
 	return err;
 }
 
-static void
-mt7615_write_hw_txp(struct mt7615_dev *dev, struct mt76_tx_info *tx_info,
-		    void *txp_ptr, u32 id)
-{
-	struct mt7615_hw_txp *txp = txp_ptr;
-	struct mt7615_txp_ptr *ptr = &txp->ptr[0];
-	int i, nbuf = tx_info->nbuf - 1;
-	u32 last_mask;
-
-	tx_info->buf[0].len = MT_TXD_SIZE + sizeof(*txp);
-	tx_info->nbuf = 1;
-
-	txp->msdu_id[0] = cpu_to_le16(id | MT_MSDU_ID_VALID);
-
-	if (is_mt7663(&dev->mt76))
-		last_mask = MT_TXD_LEN_LAST;
-	else
-		last_mask = MT_TXD_LEN_AMSDU_LAST |
-			    MT_TXD_LEN_MSDU_LAST;
-
-	for (i = 0; i < nbuf; i++) {
-		u16 len = tx_info->buf[i + 1].len & MT_TXD_LEN_MASK;
-		u32 addr = tx_info->buf[i + 1].addr;
-
-		if (i == nbuf - 1)
-			len |= last_mask;
-
-		if (i & 1) {
-			ptr->buf1 = cpu_to_le32(addr);
-			ptr->len1 = cpu_to_le16(len);
-			ptr++;
-		} else {
-			ptr->buf0 = cpu_to_le32(addr);
-			ptr->len0 = cpu_to_le16(len);
-		}
-	}
-}
-
-static void
-mt7615_write_fw_txp(struct mt7615_dev *dev, struct mt76_tx_info *tx_info,
-		    void *txp_ptr, u32 id)
-{
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx_info->skb->data;
-	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx_info->skb);
-	struct ieee80211_key_conf *key = info->control.hw_key;
-	struct ieee80211_vif *vif = info->control.vif;
-	struct mt7615_fw_txp *txp = txp_ptr;
-	int nbuf = tx_info->nbuf - 1;
-	int i;
-
-	for (i = 0; i < nbuf; i++) {
-		txp->buf[i] = cpu_to_le32(tx_info->buf[i + 1].addr);
-		txp->len[i] = cpu_to_le16(tx_info->buf[i + 1].len);
-	}
-	txp->nbuf = nbuf;
-
-	/* pass partial skb header to fw */
-	tx_info->buf[0].len = MT_TXD_SIZE + sizeof(*txp);
-	tx_info->buf[1].len = MT_CT_PARSE_LEN;
-	tx_info->nbuf = MT_CT_DMA_BUF_NUM;
-
-	txp->flags = cpu_to_le16(MT_CT_INFO_APPLY_TXD);
-
-	if (!key)
-		txp->flags |= cpu_to_le16(MT_CT_INFO_NONE_CIPHER_FRAME);
-
-	if (ieee80211_is_mgmt(hdr->frame_control))
-		txp->flags |= cpu_to_le16(MT_CT_INFO_MGMT_FRAME);
-
-	if (vif) {
-		struct mt7615_vif *mvif = (struct mt7615_vif *)vif->drv_priv;
-
-		txp->bss_idx = mvif->idx;
-	}
-
-	txp->token = cpu_to_le16(id);
-	txp->rept_wds_wcid = 0xff;
-}
-
-int mt7615_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr,
-			  enum mt76_txq_id qid, struct mt76_wcid *wcid,
-			  struct ieee80211_sta *sta,
-			  struct mt76_tx_info *tx_info)
-{
-	struct mt7615_dev *dev = container_of(mdev, struct mt7615_dev, mt76);
-	struct mt7615_sta *msta = container_of(wcid, struct mt7615_sta, wcid);
-	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx_info->skb);
-	struct ieee80211_key_conf *key = info->control.hw_key;
-	int pid, id;
-	u8 *txwi = (u8 *)txwi_ptr;
-	struct mt76_txwi_cache *t;
-	void *txp;
-
-	if (!wcid)
-		wcid = &dev->mt76.global_wcid;
-
-	pid = mt76_tx_status_skb_add(mdev, wcid, tx_info->skb);
-
-	if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) {
-		struct mt7615_phy *phy = &dev->phy;
-
-		if ((info->hw_queue & MT_TX_HW_QUEUE_EXT_PHY) && mdev->phy2)
-			phy = mdev->phy2->priv;
-
-		spin_lock_bh(&dev->mt76.lock);
-		mt7615_mac_set_rates(phy, msta, &info->control.rates[0],
-				     msta->rates);
-		msta->rate_probe = true;
-		spin_unlock_bh(&dev->mt76.lock);
-	}
-
-	t = (struct mt76_txwi_cache *)(txwi + mdev->drv->txwi_size);
-	t->skb = tx_info->skb;
-
-	spin_lock_bh(&dev->token_lock);
-	id = idr_alloc(&dev->token, t, 0, MT7615_TOKEN_SIZE, GFP_ATOMIC);
-	spin_unlock_bh(&dev->token_lock);
-	if (id < 0)
-		return id;
-
-	mt7615_mac_write_txwi(dev, txwi_ptr, tx_info->skb, wcid, sta,
-			      pid, key, false);
-
-	txp = txwi + MT_TXD_SIZE;
-	memset(txp, 0, sizeof(struct mt7615_txp_common));
-	if (is_mt7615(&dev->mt76))
-		mt7615_write_fw_txp(dev, tx_info, txp, id);
-	else
-		mt7615_write_hw_txp(dev, tx_info, txp, id);
-
-	tx_info->skb = DMA_DUMMY_DATA;
-
-	return 0;
-}
-
 static bool mt7615_fill_txs(struct mt7615_dev *dev, struct mt7615_sta *sta,
 			    struct ieee80211_tx_info *info, __le32 *txs_data)
 {
@@ -1414,7 +1250,7 @@ static bool mt7615_mac_add_txs_skb(struct mt7615_dev *dev,
 	return !!skb;
 }
 
-void mt7615_mac_add_txs(struct mt7615_dev *dev, void *data)
+static void mt7615_mac_add_txs(struct mt7615_dev *dev, void *data)
 {
 	struct ieee80211_tx_info info = {};
 	struct ieee80211_sta *sta = NULL;
@@ -1491,7 +1327,7 @@ mt7615_mac_tx_free_token(struct mt7615_dev *dev, u16 token)
 	mt76_put_txwi(mdev, txwi);
 }
 
-void mt7615_mac_tx_free(struct mt7615_dev *dev, struct sk_buff *skb)
+static void mt7615_mac_tx_free(struct mt7615_dev *dev, struct sk_buff *skb)
 {
 	struct mt7615_tx_free *free = (struct mt7615_tx_free *)skb->data;
 	u8 i, count;
@@ -1512,6 +1348,46 @@ void mt7615_mac_tx_free(struct mt7615_dev *dev, struct sk_buff *skb)
 	dev_kfree_skb(skb);
 }
 
+void mt7615_queue_rx_skb(struct mt76_dev *mdev, enum mt76_rxq_id q,
+			 struct sk_buff *skb)
+{
+	struct mt7615_dev *dev = container_of(mdev, struct mt7615_dev, mt76);
+	__le32 *rxd = (__le32 *)skb->data;
+	__le32 *end = (__le32 *)&skb->data[skb->len];
+	enum rx_pkt_type type;
+	u16 flag;
+
+	type = FIELD_GET(MT_RXD0_PKT_TYPE, le32_to_cpu(rxd[0]));
+	flag = FIELD_GET(MT_RXD0_PKT_FLAG, le32_to_cpu(rxd[0]));
+	if (type == PKT_TYPE_RX_EVENT && flag == 0x1)
+		type = PKT_TYPE_NORMAL_MCU;
+
+	switch (type) {
+	case PKT_TYPE_TXS:
+		for (rxd++; rxd + 7 <= end; rxd += 7)
+			mt7615_mac_add_txs(dev, rxd);
+		dev_kfree_skb(skb);
+		break;
+	case PKT_TYPE_TXRX_NOTIFY:
+		mt7615_mac_tx_free(dev, skb);
+		break;
+	case PKT_TYPE_RX_EVENT:
+		mt7615_mcu_rx_event(dev, skb);
+		break;
+	case PKT_TYPE_NORMAL_MCU:
+	case PKT_TYPE_NORMAL:
+		if (!mt7615_mac_fill_rx(dev, skb)) {
+			mt76_rx(&dev->mt76, q, skb);
+			return;
+		}
+		/* fall through */
+	default:
+		dev_kfree_skb(skb);
+		break;
+	}
+}
+EXPORT_SYMBOL_GPL(mt7615_queue_rx_skb);
+
 static void
 mt7615_mac_set_default_sensitivity(struct mt7615_phy *phy)
 {
@@ -1764,6 +1640,7 @@ void mt7615_update_channel(struct mt76_dev *mdev)
 	/* reset obss airtime */
 	mt76_set(dev, MT_WF_RMAC_MIB_TIME0, MT_WF_RMAC_MIB_RXTIME_CLR);
 }
+EXPORT_SYMBOL_GPL(mt7615_update_channel);
 
 static void
 mt7615_mac_update_mib_stats(struct mt7615_phy *phy)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
index 33f67c7ccbf8..5777e5e62ef0 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
@@ -4,11 +4,10 @@
  * Author: Roy Luo <royluo@google.com>
  *         Ryder Lee <ryder.lee@mediatek.com>
  *         Felix Fietkau <nbd@nbd.name>
+ *         Lorenzo Bianconi <lorenzo@kernel.org>
  */
 
 #include <linux/etherdevice.h>
-#include <linux/platform_device.h>
-#include <linux/pci.h>
 #include <linux/module.h>
 #include "mt7615.h"
 #include "mcu.h"
@@ -496,6 +495,7 @@ int mt7615_mac_sta_add(struct mt76_dev *mdev, struct ieee80211_vif *vif,
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(mt7615_mac_sta_add);
 
 void mt7615_mac_sta_remove(struct mt76_dev *mdev, struct ieee80211_vif *vif,
 			   struct ieee80211_sta *sta)
@@ -512,6 +512,7 @@ void mt7615_mac_sta_remove(struct mt76_dev *mdev, struct ieee80211_vif *vif,
 		list_del_init(&msta->poll_list);
 	spin_unlock_bh(&dev->sta_poll_lock);
 }
+EXPORT_SYMBOL_GPL(mt7615_mac_sta_remove);
 
 static void mt7615_sta_rate_tbl_update(struct ieee80211_hw *hw,
 				       struct ieee80211_vif *vif,
@@ -832,31 +833,6 @@ const struct ieee80211_ops mt7615_ops = {
 	.sched_scan_start = mt7615_start_sched_scan,
 	.sched_scan_stop = mt7615_stop_sched_scan,
 };
+EXPORT_SYMBOL_GPL(mt7615_ops);
 
-static int __init mt7615_init(void)
-{
-	int ret;
-
-	ret = pci_register_driver(&mt7615_pci_driver);
-	if (ret)
-		return ret;
-
-	if (IS_ENABLED(CONFIG_MT7622_WMAC)) {
-		ret = platform_driver_register(&mt7622_wmac_driver);
-		if (ret)
-			pci_unregister_driver(&mt7615_pci_driver);
-	}
-
-	return ret;
-}
-
-static void __exit mt7615_exit(void)
-{
-	if (IS_ENABLED(CONFIG_MT7622_WMAC))
-		platform_driver_unregister(&mt7622_wmac_driver);
-	pci_unregister_driver(&mt7615_pci_driver);
-}
-
-module_init(mt7615_init);
-module_exit(mt7615_exit);
 MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index f51d252c5f49..39d596436480 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -152,6 +152,7 @@ void mt7615_mcu_fill_msg(struct mt7615_dev *dev, struct sk_buff *skb,
 		break;
 	}
 }
+EXPORT_SYMBOL_GPL(mt7615_mcu_fill_msg);
 
 static int __mt7615_mcu_msg_send(struct mt7615_dev *dev, struct sk_buff *skb,
 				 int cmd, int *wait_seq)
@@ -215,6 +216,7 @@ int mt7615_mcu_wait_response(struct mt7615_dev *dev, int cmd, int seq)
 
 	return ret;
 }
+EXPORT_SYMBOL_GPL(mt7615_mcu_wait_response);
 
 static int
 mt7615_mcu_send_message(struct mt76_dev *mdev, struct sk_buff *skb,
@@ -238,9 +240,8 @@ out:
 	return ret;
 }
 
-static int
-mt7615_mcu_msg_send(struct mt76_dev *mdev, int cmd, const void *data,
-		    int len, bool wait_resp)
+int mt7615_mcu_msg_send(struct mt76_dev *mdev, int cmd, const void *data,
+			int len, bool wait_resp)
 {
 	struct sk_buff *skb;
 
@@ -250,6 +251,7 @@ mt7615_mcu_msg_send(struct mt76_dev *mdev, int cmd, const void *data,
 
 	return __mt76_mcu_skb_send_msg(mdev, skb, cmd, wait_resp);
 }
+EXPORT_SYMBOL_GPL(mt7615_mcu_msg_send);
 
 static void
 mt7615_mcu_csa_finish(void *priv, u8 *mac, struct ieee80211_vif *vif)
@@ -1629,11 +1631,12 @@ static int mt7615_mcu_start_firmware(struct mt7615_dev *dev, u32 addr,
 				   &req, sizeof(req), true);
 }
 
-static int mt7615_mcu_restart(struct mt76_dev *dev)
+int mt7615_mcu_restart(struct mt76_dev *dev)
 {
 	return __mt76_mcu_send_msg(dev, MCU_CMD_RESTART_DL_REQ, NULL,
 				   0, true);
 }
+EXPORT_SYMBOL_GPL(mt7615_mcu_restart);
 
 static int mt7615_mcu_patch_sem_ctrl(struct mt7615_dev *dev, bool get)
 {
@@ -2165,6 +2168,7 @@ int mt7615_mcu_init(struct mt7615_dev *dev)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(mt7615_mcu_init);
 
 void mt7615_mcu_exit(struct mt7615_dev *dev)
 {
@@ -2172,6 +2176,7 @@ void mt7615_mcu_exit(struct mt7615_dev *dev)
 	mt7615_firmware_own(dev);
 	skb_queue_purge(&dev->mt76.mcu.res_q);
 }
+EXPORT_SYMBOL_GPL(mt7615_mcu_exit);
 
 int mt7615_mcu_set_eeprom(struct mt7615_dev *dev)
 {
@@ -2214,6 +2219,7 @@ int mt7615_mcu_set_eeprom(struct mt7615_dev *dev)
 	return __mt76_mcu_skb_send_msg(&dev->mt76, skb,
 				       MCU_EXT_CMD_EFUSE_BUFFER_MODE, true);
 }
+EXPORT_SYMBOL_GPL(mt7615_mcu_set_eeprom);
 
 int mt7615_mcu_set_mac_enable(struct mt7615_dev *dev, int band, bool enable)
 {
@@ -2355,6 +2361,7 @@ int mt7615_mcu_del_wtbl_all(struct mt7615_dev *dev)
 	return __mt76_mcu_send_msg(&dev->mt76, MCU_EXT_CMD_WTBL_UPDATE,
 				   &req, sizeof(req), true);
 }
+EXPORT_SYMBOL_GPL(mt7615_mcu_del_wtbl_all);
 
 int mt7615_mcu_rdd_cmd(struct mt7615_dev *dev,
 		       enum mt7615_rdd_cmd cmd, u8 index,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c b/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c
index e9f9cda6bb59..2c4b1a315d5b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c
@@ -1,5 +1,7 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
+#include <linux/platform_device.h>
+#include <linux/pci.h>
 
 #include "mt7615.h"
 #include "regs.h"
@@ -191,3 +193,31 @@ error:
 	ieee80211_free_hw(mt76_hw(dev));
 	return ret;
 }
+
+static int __init mt7615_init(void)
+{
+	int ret;
+
+	ret = pci_register_driver(&mt7615_pci_driver);
+	if (ret)
+		return ret;
+
+	if (IS_ENABLED(CONFIG_MT7622_WMAC)) {
+		ret = platform_driver_register(&mt7622_wmac_driver);
+		if (ret)
+			pci_unregister_driver(&mt7615_pci_driver);
+	}
+
+	return ret;
+}
+
+static void __exit mt7615_exit(void)
+{
+	if (IS_ENABLED(CONFIG_MT7622_WMAC))
+		platform_driver_unregister(&mt7622_wmac_driver);
+	pci_unregister_driver(&mt7615_pci_driver);
+}
+
+module_init(mt7615_init);
+module_exit(mt7615_exit);
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index 57c9c1ef8ffc..fb891b80718b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -342,6 +342,7 @@ mt7615_ext_phy(struct mt7615_dev *dev)
 	return phy->priv;
 }
 
+extern struct ieee80211_rate mt7615_rates[12];
 extern const struct ieee80211_ops mt7615_ops;
 extern const u32 mt7615e_reg_map[__MT_BASE_MAX];
 extern const u32 mt7663e_reg_map[__MT_BASE_MAX];
@@ -419,6 +420,12 @@ static inline bool mt7615_firmware_offload(struct mt7615_dev *dev)
 }
 
 void mt7615_scan_work(struct work_struct *work);
+void mt7615_init_txpower(struct mt7615_dev *dev,
+			 struct ieee80211_supported_band *sband);
+void mt7615_phy_init(struct mt7615_dev *dev);
+void mt7615_mac_init(struct mt7615_dev *dev);
+
+int mt7615_mcu_restart(struct mt76_dev *dev);
 void mt7615_update_channel(struct mt76_dev *mdev);
 bool mt7615_mac_wtbl_update(struct mt7615_dev *dev, int idx, u32 mask);
 void mt7615_mac_reset_counters(struct mt7615_dev *dev);
@@ -431,9 +438,6 @@ int mt7615_mac_write_txwi(struct mt7615_dev *dev, __le32 *txwi,
 			  struct ieee80211_sta *sta, int pid,
 			  struct ieee80211_key_conf *key, bool beacon);
 void mt7615_mac_set_timing(struct mt7615_phy *phy);
-int mt7615_mac_fill_rx(struct mt7615_dev *dev, struct sk_buff *skb);
-void mt7615_mac_add_txs(struct mt7615_dev *dev, void *data);
-void mt7615_mac_tx_free(struct mt7615_dev *dev, struct sk_buff *skb);
 int mt7615_mac_wtbl_set_key(struct mt7615_dev *dev, struct mt76_wcid *wcid,
 			    struct ieee80211_key_conf *key,
 			    enum set_key_cmd cmd);
@@ -453,6 +457,8 @@ int mt7615_mac_wtbl_update_key(struct mt7615_dev *dev,
 void mt7615_mac_reset_work(struct work_struct *work);
 
 int mt7615_mcu_wait_response(struct mt7615_dev *dev, int cmd, int seq);
+int mt7615_mcu_msg_send(struct mt76_dev *mdev, int cmd, const void *data,
+			int len, bool wait_resp);
 int mt7615_mcu_set_dbdc(struct mt7615_dev *dev);
 int mt7615_mcu_set_eeprom(struct mt7615_dev *dev);
 int mt7615_mcu_set_mac_enable(struct mt7615_dev *dev, int band, bool enable);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c b/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c
new file mode 100644
index 000000000000..3a8dd334b53e
--- /dev/null
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c
@@ -0,0 +1,189 @@
+// SPDX-License-Identifier: ISC
+/* Copyright (C) 2019 MediaTek Inc.
+ *
+ * Author: Roy Luo <royluo@google.com>
+ *         Ryder Lee <ryder.lee@mediatek.com>
+ *         Felix Fietkau <nbd@nbd.name>
+ *         Lorenzo Bianconi <lorenzo@kernel.org>
+ */
+
+#include <linux/etherdevice.h>
+#include "mt7615.h"
+#include "mac.h"
+#include "eeprom.h"
+
+static void mt7615_init_work(struct work_struct *work)
+{
+	struct mt7615_dev *dev = container_of(work, struct mt7615_dev,
+					      mcu_work);
+
+	if (mt7615_mcu_init(dev))
+		return;
+
+	mt7615_mcu_set_eeprom(dev);
+	mt7615_mac_init(dev);
+	mt7615_phy_init(dev);
+	mt7615_mcu_del_wtbl_all(dev);
+
+	if (!mt7615_firmware_offload(dev)) {
+		struct wiphy *wiphy = mt76_hw(dev)->wiphy;
+
+		dev->ops->hw_scan = NULL;
+		dev->ops->cancel_hw_scan = NULL;
+		dev->ops->sched_scan_start = NULL;
+		dev->ops->sched_scan_stop = NULL;
+
+		wiphy->max_sched_scan_plan_interval = 0;
+		wiphy->max_sched_scan_ie_len = 0;
+		wiphy->max_scan_ie_len = IEEE80211_MAX_DATA_LEN;
+		wiphy->max_sched_scan_ssids = 0;
+		wiphy->max_match_sets = 0;
+		wiphy->max_sched_scan_reqs = 0;
+	}
+}
+
+static int mt7615_init_hardware(struct mt7615_dev *dev)
+{
+	u32 addr = mt7615_reg_map(dev, MT_EFUSE_BASE);
+	int ret, idx;
+
+	mt76_wr(dev, MT_INT_SOURCE_CSR, ~0);
+
+	INIT_WORK(&dev->mcu_work, mt7615_init_work);
+	spin_lock_init(&dev->token_lock);
+	idr_init(&dev->token);
+
+	ret = mt7615_eeprom_init(dev, addr);
+	if (ret < 0)
+		return ret;
+
+	ret = mt7615_dma_init(dev);
+	if (ret)
+		return ret;
+
+	set_bit(MT76_STATE_INITIALIZED, &dev->mphy.state);
+
+	/* Beacon and mgmt frames should occupy wcid 0 */
+	idx = mt76_wcid_alloc(dev->mt76.wcid_mask, MT7615_WTBL_STA - 1);
+	if (idx)
+		return -ENOSPC;
+
+	dev->mt76.global_wcid.idx = idx;
+	dev->mt76.global_wcid.hw_key_idx = -1;
+	rcu_assign_pointer(dev->mt76.wcid[idx], &dev->mt76.global_wcid);
+
+	return 0;
+}
+
+static void
+mt7615_led_set_config(struct led_classdev *led_cdev,
+		      u8 delay_on, u8 delay_off)
+{
+	struct mt7615_dev *dev;
+	struct mt76_dev *mt76;
+	u32 val, addr;
+
+	mt76 = container_of(led_cdev, struct mt76_dev, led_cdev);
+	dev = container_of(mt76, struct mt7615_dev, mt76);
+	val = FIELD_PREP(MT_LED_STATUS_DURATION, 0xffff) |
+	      FIELD_PREP(MT_LED_STATUS_OFF, delay_off) |
+	      FIELD_PREP(MT_LED_STATUS_ON, delay_on);
+
+	addr = mt7615_reg_map(dev, MT_LED_STATUS_0(mt76->led_pin));
+	mt76_wr(dev, addr, val);
+	addr = mt7615_reg_map(dev, MT_LED_STATUS_1(mt76->led_pin));
+	mt76_wr(dev, addr, val);
+
+	val = MT_LED_CTRL_REPLAY(mt76->led_pin) |
+	      MT_LED_CTRL_KICK(mt76->led_pin);
+	if (mt76->led_al)
+		val |= MT_LED_CTRL_POLARITY(mt76->led_pin);
+	addr = mt7615_reg_map(dev, MT_LED_CTRL);
+	mt76_wr(dev, addr, val);
+}
+
+static int
+mt7615_led_set_blink(struct led_classdev *led_cdev,
+		     unsigned long *delay_on,
+		     unsigned long *delay_off)
+{
+	u8 delta_on, delta_off;
+
+	delta_off = max_t(u8, *delay_off / 10, 1);
+	delta_on = max_t(u8, *delay_on / 10, 1);
+
+	mt7615_led_set_config(led_cdev, delta_on, delta_off);
+
+	return 0;
+}
+
+static void
+mt7615_led_set_brightness(struct led_classdev *led_cdev,
+			  enum led_brightness brightness)
+{
+	if (!brightness)
+		mt7615_led_set_config(led_cdev, 0, 0xff);
+	else
+		mt7615_led_set_config(led_cdev, 0xff, 0);
+}
+
+int mt7615_register_device(struct mt7615_dev *dev)
+{
+	int ret;
+
+	mt7615_init_device(dev);
+
+	/* init led callbacks */
+	if (IS_ENABLED(CONFIG_MT76_LEDS)) {
+		dev->mt76.led_cdev.brightness_set = mt7615_led_set_brightness;
+		dev->mt76.led_cdev.blink_set = mt7615_led_set_blink;
+	}
+
+	ret = mt7622_wmac_init(dev);
+	if (ret)
+		return ret;
+
+	ret = mt7615_init_hardware(dev);
+	if (ret)
+		return ret;
+
+	ret = mt76_register_device(&dev->mt76, true, mt7615_rates,
+				   ARRAY_SIZE(mt7615_rates));
+	if (ret)
+		return ret;
+
+	ieee80211_queue_work(mt76_hw(dev), &dev->mcu_work);
+	mt7615_init_txpower(dev, &dev->mphy.sband_2g.sband);
+	mt7615_init_txpower(dev, &dev->mphy.sband_5g.sband);
+
+	return mt7615_init_debugfs(dev);
+}
+
+void mt7615_unregister_device(struct mt7615_dev *dev)
+{
+	struct mt76_txwi_cache *txwi;
+	bool mcu_running;
+	int id;
+
+	mcu_running = mt7615_wait_for_mcu_init(dev);
+
+	mt7615_unregister_ext_phy(dev);
+	mt76_unregister_device(&dev->mt76);
+	if (mcu_running)
+		mt7615_mcu_exit(dev);
+	mt7615_dma_cleanup(dev);
+
+	spin_lock_bh(&dev->token_lock);
+	idr_for_each_entry(&dev->token, txwi, id) {
+		mt7615_txp_skb_unmap(&dev->mt76, txwi);
+		if (txwi->skb)
+			dev_kfree_skb_any(txwi->skb);
+		mt76_put_txwi(&dev->mt76, txwi);
+	}
+	spin_unlock_bh(&dev->token_lock);
+	idr_destroy(&dev->token);
+
+	tasklet_disable(&dev->irq_tasklet);
+
+	mt76_free_device(&dev->mt76);
+}
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c
new file mode 100644
index 000000000000..7ec91c0856f5
--- /dev/null
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/pci_mac.c
@@ -0,0 +1,184 @@
+// SPDX-License-Identifier: ISC
+/* Copyright (C) 2020 MediaTek Inc.
+ *
+ * Author: Ryder Lee <ryder.lee@mediatek.com>
+ *         Roy Luo <royluo@google.com>
+ *         Felix Fietkau <nbd@nbd.name>
+ *         Lorenzo Bianconi <lorenzo@kernel.org>
+ */
+
+#include <linux/etherdevice.h>
+#include <linux/timekeeping.h>
+
+#include "mt7615.h"
+#include "../dma.h"
+#include "mac.h"
+
+void mt7615_tx_complete_skb(struct mt76_dev *mdev, enum mt76_txq_id qid,
+			    struct mt76_queue_entry *e)
+{
+	if (!e->txwi) {
+		dev_kfree_skb_any(e->skb);
+		return;
+	}
+
+	/* error path */
+	if (e->skb == DMA_DUMMY_DATA) {
+		struct mt76_txwi_cache *t;
+		struct mt7615_dev *dev;
+		struct mt7615_txp_common *txp;
+		u16 token;
+
+		dev = container_of(mdev, struct mt7615_dev, mt76);
+		txp = mt7615_txwi_to_txp(mdev, e->txwi);
+
+		if (is_mt7615(&dev->mt76))
+			token = le16_to_cpu(txp->fw.token);
+		else
+			token = le16_to_cpu(txp->hw.msdu_id[0]) &
+				~MT_MSDU_ID_VALID;
+
+		spin_lock_bh(&dev->token_lock);
+		t = idr_remove(&dev->token, token);
+		spin_unlock_bh(&dev->token_lock);
+		e->skb = t ? t->skb : NULL;
+	}
+
+	if (e->skb)
+		mt76_tx_complete_skb(mdev, e->skb);
+}
+
+static void
+mt7615_write_hw_txp(struct mt7615_dev *dev, struct mt76_tx_info *tx_info,
+		    void *txp_ptr, u32 id)
+{
+	struct mt7615_hw_txp *txp = txp_ptr;
+	struct mt7615_txp_ptr *ptr = &txp->ptr[0];
+	int i, nbuf = tx_info->nbuf - 1;
+	u32 last_mask;
+
+	tx_info->buf[0].len = MT_TXD_SIZE + sizeof(*txp);
+	tx_info->nbuf = 1;
+
+	txp->msdu_id[0] = cpu_to_le16(id | MT_MSDU_ID_VALID);
+
+	if (is_mt7663(&dev->mt76))
+		last_mask = MT_TXD_LEN_LAST;
+	else
+		last_mask = MT_TXD_LEN_AMSDU_LAST |
+			    MT_TXD_LEN_MSDU_LAST;
+
+	for (i = 0; i < nbuf; i++) {
+		u16 len = tx_info->buf[i + 1].len & MT_TXD_LEN_MASK;
+		u32 addr = tx_info->buf[i + 1].addr;
+
+		if (i == nbuf - 1)
+			len |= last_mask;
+
+		if (i & 1) {
+			ptr->buf1 = cpu_to_le32(addr);
+			ptr->len1 = cpu_to_le16(len);
+			ptr++;
+		} else {
+			ptr->buf0 = cpu_to_le32(addr);
+			ptr->len0 = cpu_to_le16(len);
+		}
+	}
+}
+
+static void
+mt7615_write_fw_txp(struct mt7615_dev *dev, struct mt76_tx_info *tx_info,
+		    void *txp_ptr, u32 id)
+{
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx_info->skb->data;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx_info->skb);
+	struct ieee80211_key_conf *key = info->control.hw_key;
+	struct ieee80211_vif *vif = info->control.vif;
+	struct mt7615_fw_txp *txp = txp_ptr;
+	int nbuf = tx_info->nbuf - 1;
+	int i;
+
+	for (i = 0; i < nbuf; i++) {
+		txp->buf[i] = cpu_to_le32(tx_info->buf[i + 1].addr);
+		txp->len[i] = cpu_to_le16(tx_info->buf[i + 1].len);
+	}
+	txp->nbuf = nbuf;
+
+	/* pass partial skb header to fw */
+	tx_info->buf[0].len = MT_TXD_SIZE + sizeof(*txp);
+	tx_info->buf[1].len = MT_CT_PARSE_LEN;
+	tx_info->nbuf = MT_CT_DMA_BUF_NUM;
+
+	txp->flags = cpu_to_le16(MT_CT_INFO_APPLY_TXD);
+
+	if (!key)
+		txp->flags |= cpu_to_le16(MT_CT_INFO_NONE_CIPHER_FRAME);
+
+	if (ieee80211_is_mgmt(hdr->frame_control))
+		txp->flags |= cpu_to_le16(MT_CT_INFO_MGMT_FRAME);
+
+	if (vif) {
+		struct mt7615_vif *mvif = (struct mt7615_vif *)vif->drv_priv;
+
+		txp->bss_idx = mvif->idx;
+	}
+
+	txp->token = cpu_to_le16(id);
+	txp->rept_wds_wcid = 0xff;
+}
+
+int mt7615_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr,
+			  enum mt76_txq_id qid, struct mt76_wcid *wcid,
+			  struct ieee80211_sta *sta,
+			  struct mt76_tx_info *tx_info)
+{
+	struct mt7615_dev *dev = container_of(mdev, struct mt7615_dev, mt76);
+	struct mt7615_sta *msta = container_of(wcid, struct mt7615_sta, wcid);
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx_info->skb);
+	struct ieee80211_key_conf *key = info->control.hw_key;
+	int pid, id;
+	u8 *txwi = (u8 *)txwi_ptr;
+	struct mt76_txwi_cache *t;
+	void *txp;
+
+	if (!wcid)
+		wcid = &dev->mt76.global_wcid;
+
+	pid = mt76_tx_status_skb_add(mdev, wcid, tx_info->skb);
+
+	if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) {
+		struct mt7615_phy *phy = &dev->phy;
+
+		if ((info->hw_queue & MT_TX_HW_QUEUE_EXT_PHY) && mdev->phy2)
+			phy = mdev->phy2->priv;
+
+		spin_lock_bh(&dev->mt76.lock);
+		mt7615_mac_set_rates(phy, msta, &info->control.rates[0],
+				     msta->rates);
+		msta->rate_probe = true;
+		spin_unlock_bh(&dev->mt76.lock);
+	}
+
+	t = (struct mt76_txwi_cache *)(txwi + mdev->drv->txwi_size);
+	t->skb = tx_info->skb;
+
+	spin_lock_bh(&dev->token_lock);
+	id = idr_alloc(&dev->token, t, 0, MT7615_TOKEN_SIZE, GFP_ATOMIC);
+	spin_unlock_bh(&dev->token_lock);
+	if (id < 0)
+		return id;
+
+	mt7615_mac_write_txwi(dev, txwi_ptr, tx_info->skb, wcid, sta,
+			      pid, key, false);
+
+	txp = txwi + MT_TXD_SIZE;
+	memset(txp, 0, sizeof(struct mt7615_txp_common));
+	if (is_mt7615(&dev->mt76))
+		mt7615_write_fw_txp(dev, tx_info, txp, id);
+	else
+		mt7615_write_hw_txp(dev, tx_info, txp, id);
+
+	tx_info->skb = DMA_DUMMY_DATA;
+
+	return 0;
+}
-- 
cgit v1.2.3-59-g8ed1b


From eb99cc95c3b6513b495c4839ac4917206705f657 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 16 Apr 2020 16:32:51 +0200
Subject: mt76: mt7615: introduce mt7663u support

Introduce support for mt7663u 802.11ac 2x2:2 chipset to mt7615 driver.
Main difference respect to pcie code base is the usb code needs to
configure wtbl from non-atomic context

Co-developed-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt76.h          |   1 +
 drivers/net/wireless/mediatek/mt76/mt7615/Kconfig  |  11 +
 drivers/net/wireless/mediatek/mt76/mt7615/Makefile |   3 +
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c    |  28 ++
 drivers/net/wireless/mediatek/mt76/mt7615/mac.h    |   5 +-
 drivers/net/wireless/mediatek/mt76/mt7615/main.c   |  34 ++
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c    |   3 +-
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.h    |   5 +
 drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h |  10 +
 drivers/net/wireless/mediatek/mt76/mt7615/regs.h   |  26 ++
 drivers/net/wireless/mediatek/mt76/mt7615/usb.c    | 396 +++++++++++++++++++++
 .../net/wireless/mediatek/mt76/mt7615/usb_init.c   | 144 ++++++++
 .../net/wireless/mediatek/mt76/mt7615/usb_mcu.c    |  93 +++++
 13 files changed, 756 insertions(+), 3 deletions(-)
 create mode 100644 drivers/net/wireless/mediatek/mt76/mt7615/usb.c
 create mode 100644 drivers/net/wireless/mediatek/mt76/mt7615/usb_init.c
 create mode 100644 drivers/net/wireless/mediatek/mt76/mt7615/usb_mcu.c

diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index e31d98a4f88f..577465c01827 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -285,6 +285,7 @@ enum {
 	MT76_MCU_RESET,
 	MT76_REMOVED,
 	MT76_READING_STATS,
+	MT76_STATE_POWER_OFF,
 };
 
 struct mt76_hw_cap {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/Kconfig b/drivers/net/wireless/mediatek/mt76/mt7615/Kconfig
index 16385767d8b9..a84317fb856f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/Kconfig
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/Kconfig
@@ -27,3 +27,14 @@ config MT7622_WMAC
 	  This adds support for the built-in WMAC on MT7622 SoC devices
 	  which has the same feature set as a MT7615, but limited to
 	  2.4 GHz only.
+
+config MT7663U
+	tristate "MediaTek MT7663U (USB) support"
+	select MT76_USB
+	select MT7615_COMMON
+	depends on MAC80211
+	depends on USB
+	help
+	  This adds support for MT7663U 802.11ax 2x2:2 wireless devices.
+
+	  To compile this driver as a module, choose M here.
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/Makefile b/drivers/net/wireless/mediatek/mt76/mt7615/Makefile
index 2a7937b4394f..99f353b8b9aa 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/Makefile
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/Makefile
@@ -2,6 +2,7 @@
 
 obj-$(CONFIG_MT7615_COMMON) += mt7615-common.o
 obj-$(CONFIG_MT7615E) += mt7615e.o
+obj-$(CONFIG_MT7663U) += mt7663u.o
 
 CFLAGS_trace.o := -I$(src)
 
@@ -10,3 +11,5 @@ mt7615-common-y := main.o init.o mcu.o eeprom.o mac.o \
 
 mt7615e-y := pci.o pci_init.o dma.o pci_mac.o mmio.o
 mt7615e-$(CONFIG_MT7622_WMAC) += soc.o
+
+mt7663u-y := usb.o usb_mcu.o usb_init.o
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index adaf52058bc5..571554ffe8b6 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -871,6 +871,29 @@ mt7615_mac_update_rate_desc(struct mt7615_phy *phy, struct mt7615_sta *sta,
 	rd->bw = bw;
 }
 
+static int
+mt7615_mac_queue_rate_update(struct mt7615_phy *phy, struct mt7615_sta *sta,
+			     struct ieee80211_tx_rate *probe_rate,
+			     struct ieee80211_tx_rate *rates)
+{
+	struct mt7615_dev *dev = phy->dev;
+	struct mt7615_wtbl_desc *wd;
+
+	wd = kzalloc(sizeof(*wd), GFP_ATOMIC);
+	if (!wd)
+		return -ENOMEM;
+
+	wd->type = MT7615_WTBL_RATE_DESC;
+	wd->sta = sta;
+
+	mt7615_mac_update_rate_desc(phy, sta, probe_rate, rates,
+				    &wd->rate);
+	list_add_tail(&wd->node, &dev->wd_head);
+	queue_work(dev->mt76.usb.wq, &dev->wtbl_work);
+
+	return 0;
+}
+
 void mt7615_mac_set_rates(struct mt7615_phy *phy, struct mt7615_sta *sta,
 			  struct ieee80211_tx_rate *probe_rate,
 			  struct ieee80211_tx_rate *rates)
@@ -880,6 +903,11 @@ void mt7615_mac_set_rates(struct mt7615_phy *phy, struct mt7615_sta *sta,
 	struct mt7615_rate_desc rd;
 	u32 w5, w27, addr;
 
+	if (mt76_is_usb(&dev->mt76)) {
+		mt7615_mac_queue_rate_update(phy, sta, probe_rate, rates);
+		return;
+	}
+
 	if (!mt76_poll(dev, MT_WTBL_UPDATE, MT_WTBL_UPDATE_BUSY, 0, 5000))
 		return;
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.h b/drivers/net/wireless/mediatek/mt76/mt7615/mac.h
index 8ee57d220d71..f0d4b29a52a2 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.h
@@ -165,9 +165,12 @@ enum tx_phy_bandwidth {
 #define MT_CT_INFO_NONE_CIPHER_FRAME	BIT(3)
 #define MT_CT_INFO_HSR2_TX		BIT(4)
 
-#define MT_USB_TXD_SIZE			(MT_TXD_SIZE + 8 * 4)
 #define MT_TXD_SIZE			(8 * 4)
 
+#define MT_USB_TXD_SIZE			(MT_TXD_SIZE + 8 * 4)
+#define MT_USB_HDR_SIZE			4
+#define MT_USB_TAIL_SIZE		4
+
 #define MT_TXD0_P_IDX			BIT(31)
 #define MT_TXD0_Q_IDX			GENMASK(30, 26)
 #define MT_TXD0_UDP_TCP_SUM		BIT(24)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
index 5777e5e62ef0..f7fc2185da3b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
@@ -277,6 +277,37 @@ out:
 	return ret;
 }
 
+static int
+mt7615_queue_key_update(struct mt7615_dev *dev, enum set_key_cmd cmd,
+			struct mt7615_sta *msta,
+			struct ieee80211_key_conf *key)
+{
+	struct mt7615_wtbl_desc *wd;
+
+	wd = kzalloc(sizeof(*wd), GFP_KERNEL);
+	if (!wd)
+		return -ENOMEM;
+
+	wd->type = MT7615_WTBL_KEY_DESC;
+	wd->sta = msta;
+
+	wd->key.key = kzalloc(key->keylen, GFP_KERNEL);
+	if (!wd->key.key) {
+		kfree(wd);
+		return -ENOMEM;
+	}
+	memcpy(wd->key.key, key->key, key->keylen);
+	wd->key.cipher = key->cipher;
+	wd->key.keyidx = key->keyidx;
+	wd->key.keylen = key->keylen;
+	wd->key.cmd = cmd;
+
+	list_add_tail(&wd->node, &dev->wd_head);
+	queue_work(dev->mt76.usb.wq, &dev->wtbl_work);
+
+	return 0;
+}
+
 static int mt7615_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 			  struct ieee80211_vif *vif, struct ieee80211_sta *sta,
 			  struct ieee80211_key_conf *key)
@@ -325,6 +356,9 @@ static int mt7615_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 	mt76_wcid_key_setup(&dev->mt76, wcid,
 			    cmd == SET_KEY ? key : NULL);
 
+	if (mt76_is_usb(&dev->mt76))
+		return mt7615_queue_key_update(dev, cmd, msta, key);
+
 	return mt7615_mac_wtbl_set_key(dev, wcid, key, cmd);
 }
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 39d596436480..939aeb03b568 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -1598,8 +1598,7 @@ static int mt7615_mcu_send_firmware(struct mt7615_dev *dev, const void *data,
 	int ret = 0, cur_len;
 
 	while (len > 0) {
-		cur_len = min_t(int, 4096 - sizeof(struct mt7615_mcu_txd),
-				len);
+		cur_len = min_t(int, 4096 - dev->mt76.mcu_ops->headroom, len);
 
 		ret = __mt76_mcu_send_msg(&dev->mt76, MCU_CMD_FW_SCATTER,
 					  data, cur_len, false);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
index 983625fb72ee..dff3f3632faf 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
@@ -449,6 +449,11 @@ enum {
 	FW_STATE_CR4_RDY          = 7
 };
 
+enum {
+	FW_STATE_PWR_ON = 1,
+	FW_STATE_N9_RDY = 2,
+};
+
 #define STA_TYPE_STA		BIT(0)
 #define STA_TYPE_AP		BIT(1)
 #define STA_TYPE_ADHOC		BIT(2)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index fb891b80718b..2bc77a0478a9 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -269,6 +269,9 @@ struct mt7615_dev {
 	struct idr token;
 
 	u8 fw_ver;
+
+	struct work_struct wtbl_work;
+	struct list_head wd_head;
 };
 
 enum {
@@ -508,6 +511,13 @@ int mt7615_mcu_apply_tx_dpd(struct mt7615_phy *phy);
 int mt7615_dfs_init_radar_detector(struct mt7615_phy *phy);
 
 int mt7615_init_debugfs(struct mt7615_dev *dev);
+int mt7615_mcu_wait_response(struct mt7615_dev *dev, int cmd, int seq);
+
 int __mt7663_load_firmware(struct mt7615_dev *dev);
 
+/* usb */
+void mt7663u_wtbl_work(struct work_struct *work);
+int mt7663u_mcu_init(struct mt7615_dev *dev);
+int mt7663u_register_device(struct mt7615_dev *dev);
+
 #endif
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/regs.h b/drivers/net/wireless/mediatek/mt76/mt7615/regs.h
index a3333f382350..370e03432e81 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/regs.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/regs.h
@@ -43,6 +43,7 @@ enum mt7615_reg_base {
 #define MT_TOP_MISC2_FW_STATE		GENMASK(2, 0)
 
 #define MT7663_TOP_MISC2_FW_STATE	GENMASK(3, 1)
+#define MT_TOP_MISC2_FW_PWR_ON		BIT(1)
 
 #define MT_MCU_BASE			0x2000
 #define MT_MCU(ofs)			(MT_MCU_BASE + (ofs))
@@ -58,6 +59,8 @@ enum mt7615_reg_base {
 #define MT_PCIE_REMAP_BASE_2		((dev)->reg_map[MT_PCIE_REMAP_BASE2])
 
 #define MT_HIF(ofs)			((dev)->reg_map[MT_HIF_BASE] + (ofs))
+#define MT_HIF_RST			MT_HIF(0x100)
+#define MT_HIF_LOGIC_RST_N		BIT(4)
 
 #define MT7663_MCU_PCIE_REMAP_2_OFFSET	GENMASK(15, 0)
 #define MT7663_MCU_PCIE_REMAP_2_BASE	GENMASK(31, 16)
@@ -483,4 +486,27 @@ enum mt7615_reg_base {
 #define MT_INFRACFG_MISC		0x700
 #define MT_INFRACFG_MISC_AP2CONN_WAKE	BIT(1)
 
+#define MT_UMAC_BASE			0x7c000000
+#define MT_UMAC(ofs)			(MT_UMAC_BASE + (ofs))
+#define MT_UDMA_TX_QSEL			MT_UMAC(0x008)
+#define MT_FW_DL_EN			BIT(3)
+
+#define MT_UDMA_WLCFG_1			MT_UMAC(0x00c)
+#define MT_WL_RX_AGG_PKT_LMT		GENMASK(7, 0)
+#define MT_WL_TX_TMOUT_LMT		GENMASK(27, 8)
+
+#define MT_UDMA_WLCFG_0			MT_UMAC(0x18)
+#define MT_WL_RX_AGG_TO			GENMASK(7, 0)
+#define MT_WL_RX_AGG_LMT		GENMASK(15, 8)
+#define MT_WL_TX_TMOUT_FUNC_EN		BIT(16)
+#define MT_WL_TX_DPH_CHK_EN		BIT(17)
+#define MT_WL_RX_MPSZ_PAD0		BIT(18)
+#define MT_WL_RX_FLUSH			BIT(19)
+#define MT_TICK_1US_EN			BIT(20)
+#define MT_WL_RX_AGG_EN			BIT(21)
+#define MT_WL_RX_EN			BIT(22)
+#define MT_WL_TX_EN			BIT(23)
+#define MT_WL_RX_BUSY			BIT(30)
+#define MT_WL_TX_BUSY			BIT(31)
+
 #endif
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/usb.c b/drivers/net/wireless/mediatek/mt76/mt7615/usb.c
new file mode 100644
index 000000000000..ad5219006987
--- /dev/null
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/usb.c
@@ -0,0 +1,396 @@
+// SPDX-License-Identifier: ISC
+/* Copyright (C) 2019 MediaTek Inc.
+ *
+ * Author: Felix Fietkau <nbd@nbd.name>
+ *	   Lorenzo Bianconi <lorenzo@kernel.org>
+ *	   Sean Wang <sean.wang@mediatek.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/usb.h>
+
+#include "mt7615.h"
+#include "mac.h"
+#include "mcu.h"
+#include "regs.h"
+
+static const u32 mt7663u_reg_map[] = {
+	[MT_TOP_CFG_BASE]	= 0x80020000,
+	[MT_HW_BASE]		= 0x80000000,
+	[MT_DMA_SHDL_BASE]	= 0x5000a000,
+	[MT_HIF_BASE]		= 0x50000000,
+	[MT_CSR_BASE]		= 0x40000000,
+	[MT_EFUSE_ADDR_BASE]	= 0x78011000,
+	[MT_TOP_MISC_BASE]	= 0x81020000,
+	[MT_PHY_BASE]		= 0x82070000,
+	[MT_WTBL_BASE_ADDR]	= 0x820e0000,
+	[MT_CFG_BASE]		= 0x820f0000,
+	[MT_AGG_BASE]		= 0x820f2000,
+	[MT_ARB_BASE]		= 0x820f3000,
+	[MT_TMAC_BASE]		= 0x820f4000,
+	[MT_RMAC_BASE]		= 0x820f5000,
+	[MT_DMA_BASE]		= 0x820f7000,
+	[MT_WTBL_BASE_ON]	= 0x820f9000,
+	[MT_WTBL_BASE_OFF]	= 0x820f9800,
+	[MT_LPON_BASE]		= 0x820fb000,
+	[MT_MIB_BASE]		= 0x820fd000,
+};
+
+static const struct usb_device_id mt7615_device_table[] = {
+	{ USB_DEVICE_AND_INTERFACE_INFO(0x0e8d, 0x7663, 0xff, 0xff, 0xff) },
+	{ },
+};
+
+static void mt7663u_stop(struct ieee80211_hw *hw)
+{
+	struct mt7615_phy *phy = mt7615_hw_phy(hw);
+	struct mt7615_dev *dev = hw->priv;
+
+	clear_bit(MT76_STATE_RUNNING, &dev->mphy.state);
+	cancel_delayed_work_sync(&phy->scan_work);
+	cancel_delayed_work_sync(&phy->mac_work);
+	mt76u_stop_tx(&dev->mt76);
+}
+
+static void mt7663u_cleanup(struct mt7615_dev *dev)
+{
+	clear_bit(MT76_STATE_INITIALIZED, &dev->mphy.state);
+	mt76u_queues_deinit(&dev->mt76);
+}
+
+static void
+mt7663u_mac_write_txwi(struct mt7615_dev *dev, struct mt76_wcid *wcid,
+		       enum mt76_txq_id qid, struct ieee80211_sta *sta,
+		       struct sk_buff *skb)
+{
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+	__le32 *txwi;
+	int pid;
+
+	if (!wcid)
+		wcid = &dev->mt76.global_wcid;
+
+	pid = mt76_tx_status_skb_add(&dev->mt76, wcid, skb);
+
+	txwi = (__le32 *)(skb->data - MT_USB_TXD_SIZE);
+	memset(txwi, 0, MT_USB_TXD_SIZE);
+	mt7615_mac_write_txwi(dev, txwi, skb, wcid, sta,
+			      pid, info->control.hw_key, false);
+	skb_push(skb, MT_USB_TXD_SIZE);
+}
+
+static int
+__mt7663u_mac_set_rates(struct mt7615_dev *dev,
+			struct mt7615_wtbl_desc *wd)
+{
+	struct mt7615_rate_desc *rate = &wd->rate;
+	struct mt7615_sta *sta = wd->sta;
+	u32 w5, w27, addr, val;
+
+	lockdep_assert_held(&dev->mt76.mutex);
+
+	if (!sta)
+		return -EINVAL;
+
+	if (!mt76_poll(dev, MT_WTBL_UPDATE, MT_WTBL_UPDATE_BUSY, 0, 5000))
+		return -ETIMEDOUT;
+
+	addr = mt7615_mac_wtbl_addr(dev, sta->wcid.idx);
+
+	w27 = mt76_rr(dev, addr + 27 * 4);
+	w27 &= ~MT_WTBL_W27_CC_BW_SEL;
+	w27 |= FIELD_PREP(MT_WTBL_W27_CC_BW_SEL, rate->bw);
+
+	w5 = mt76_rr(dev, addr + 5 * 4);
+	w5 &= ~(MT_WTBL_W5_BW_CAP | MT_WTBL_W5_CHANGE_BW_RATE |
+		MT_WTBL_W5_MPDU_OK_COUNT |
+		MT_WTBL_W5_MPDU_FAIL_COUNT |
+		MT_WTBL_W5_RATE_IDX);
+	w5 |= FIELD_PREP(MT_WTBL_W5_BW_CAP, rate->bw) |
+	      FIELD_PREP(MT_WTBL_W5_CHANGE_BW_RATE,
+			 rate->bw_idx ? rate->bw_idx - 1 : 7);
+
+	mt76_wr(dev, MT_WTBL_RIUCR0, w5);
+
+	mt76_wr(dev, MT_WTBL_RIUCR1,
+		FIELD_PREP(MT_WTBL_RIUCR1_RATE0, rate->probe_val) |
+		FIELD_PREP(MT_WTBL_RIUCR1_RATE1, rate->val[0]) |
+		FIELD_PREP(MT_WTBL_RIUCR1_RATE2_LO, rate->val[1]));
+
+	mt76_wr(dev, MT_WTBL_RIUCR2,
+		FIELD_PREP(MT_WTBL_RIUCR2_RATE2_HI, rate->val[1] >> 8) |
+		FIELD_PREP(MT_WTBL_RIUCR2_RATE3, rate->val[1]) |
+		FIELD_PREP(MT_WTBL_RIUCR2_RATE4, rate->val[2]) |
+		FIELD_PREP(MT_WTBL_RIUCR2_RATE5_LO, rate->val[2]));
+
+	mt76_wr(dev, MT_WTBL_RIUCR3,
+		FIELD_PREP(MT_WTBL_RIUCR3_RATE5_HI, rate->val[2] >> 4) |
+		FIELD_PREP(MT_WTBL_RIUCR3_RATE6, rate->val[3]) |
+		FIELD_PREP(MT_WTBL_RIUCR3_RATE7, rate->val[3]));
+
+	mt76_wr(dev, MT_WTBL_UPDATE,
+		FIELD_PREP(MT_WTBL_UPDATE_WLAN_IDX, sta->wcid.idx) |
+		MT_WTBL_UPDATE_RATE_UPDATE |
+		MT_WTBL_UPDATE_TX_COUNT_CLEAR);
+
+	mt76_wr(dev, addr + 27 * 4, w27);
+
+	mt76_set(dev, MT_LPON_T0CR, MT_LPON_T0CR_MODE); /* TSF read */
+	val = mt76_rr(dev, MT_LPON_UTTR0);
+	sta->rate_set_tsf = (val & ~BIT(0)) | rate->rateset;
+
+	if (!(sta->wcid.tx_info & MT_WCID_TX_INFO_SET))
+		mt76_poll(dev, MT_WTBL_UPDATE, MT_WTBL_UPDATE_BUSY, 0, 5000);
+
+	sta->rate_count = 2 * MT7615_RATE_RETRY * sta->n_rates;
+	sta->wcid.tx_info |= MT_WCID_TX_INFO_SET;
+
+	return 0;
+}
+
+static int
+__mt7663u_mac_set_key(struct mt7615_dev *dev,
+		      struct mt7615_wtbl_desc *wd)
+{
+	struct mt7615_key_desc *key = &wd->key;
+	struct mt7615_sta *sta = wd->sta;
+	enum mt7615_cipher_type cipher;
+	struct mt76_wcid *wcid;
+	int err;
+
+	lockdep_assert_held(&dev->mt76.mutex);
+
+	if (!sta)
+		return -EINVAL;
+
+	cipher = mt7615_mac_get_cipher(key->cipher);
+	if (cipher == MT_CIPHER_NONE)
+		return -EOPNOTSUPP;
+
+	wcid = &wd->sta->wcid;
+
+	mt7615_mac_wtbl_update_cipher(dev, wcid, cipher, key->cmd);
+	err = mt7615_mac_wtbl_update_key(dev, wcid, key->key, key->keylen,
+					 cipher, key->cmd);
+	if (err < 0)
+		return err;
+
+	err = mt7615_mac_wtbl_update_pk(dev, wcid, cipher, key->keyidx,
+					key->cmd);
+	if (err < 0)
+		return err;
+
+	if (key->cmd == SET_KEY)
+		wcid->cipher |= BIT(cipher);
+	else
+		wcid->cipher &= ~BIT(cipher);
+
+	return 0;
+}
+
+void mt7663u_wtbl_work(struct work_struct *work)
+{
+	struct mt7615_wtbl_desc *wd, *wd_next;
+	struct mt7615_dev *dev;
+
+	dev = (struct mt7615_dev *)container_of(work, struct mt7615_dev,
+						wtbl_work);
+
+	list_for_each_entry_safe(wd, wd_next, &dev->wd_head, node) {
+		spin_lock_bh(&dev->mt76.lock);
+		list_del(&wd->node);
+		spin_unlock_bh(&dev->mt76.lock);
+
+		mutex_lock(&dev->mt76.mutex);
+		switch (wd->type) {
+		case MT7615_WTBL_RATE_DESC:
+			__mt7663u_mac_set_rates(dev, wd);
+			break;
+		case MT7615_WTBL_KEY_DESC:
+			__mt7663u_mac_set_key(dev, wd);
+			break;
+		}
+		mutex_unlock(&dev->mt76.mutex);
+
+		kfree(wd);
+	}
+}
+
+static void
+mt7663u_tx_complete_skb(struct mt76_dev *mdev, enum mt76_txq_id qid,
+			struct mt76_queue_entry *e)
+{
+	skb_pull(e->skb, MT_USB_HDR_SIZE + MT_USB_TXD_SIZE);
+	mt76_tx_complete_skb(mdev, e->skb);
+}
+
+static int
+mt7663u_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr,
+		       enum mt76_txq_id qid, struct mt76_wcid *wcid,
+		       struct ieee80211_sta *sta,
+		       struct mt76_tx_info *tx_info)
+{
+	struct mt7615_dev *dev = container_of(mdev, struct mt7615_dev, mt76);
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx_info->skb);
+
+	if (info->flags & IEEE80211_TX_CTL_RATE_CTRL_PROBE) {
+		struct mt7615_sta *msta;
+
+		msta = container_of(wcid, struct mt7615_sta, wcid);
+		spin_lock_bh(&dev->mt76.lock);
+		mt7615_mac_set_rates(&dev->phy, msta, &info->control.rates[0],
+				     msta->rates);
+		msta->rate_probe = true;
+		spin_unlock_bh(&dev->mt76.lock);
+	}
+	mt7663u_mac_write_txwi(dev, wcid, qid, sta, tx_info->skb);
+
+	return mt76u_skb_dma_info(tx_info->skb, tx_info->skb->len);
+}
+
+static int mt7663u_probe(struct usb_interface *usb_intf,
+			 const struct usb_device_id *id)
+{
+	static const struct mt76_driver_ops drv_ops = {
+		.txwi_size = MT_USB_TXD_SIZE,
+		.drv_flags = MT_DRV_RX_DMA_HDR,
+		.tx_prepare_skb = mt7663u_tx_prepare_skb,
+		.tx_complete_skb = mt7663u_tx_complete_skb,
+		.rx_skb = mt7615_queue_rx_skb,
+		.sta_ps = mt7615_sta_ps,
+		.sta_add = mt7615_mac_sta_add,
+		.sta_remove = mt7615_mac_sta_remove,
+		.update_survey = mt7615_update_channel,
+	};
+	struct usb_device *udev = interface_to_usbdev(usb_intf);
+	struct ieee80211_ops *ops;
+	struct mt7615_dev *dev;
+	struct mt76_dev *mdev;
+	int ret;
+
+	ops = devm_kmemdup(&usb_intf->dev, &mt7615_ops, sizeof(mt7615_ops),
+			   GFP_KERNEL);
+	if (!ops)
+		return -ENOMEM;
+
+	ops->stop = mt7663u_stop;
+
+	mdev = mt76_alloc_device(&usb_intf->dev, sizeof(*dev), ops, &drv_ops);
+	if (!mdev)
+		return -ENOMEM;
+
+	dev = container_of(mdev, struct mt7615_dev, mt76);
+	udev = usb_get_dev(udev);
+	usb_reset_device(udev);
+
+	usb_set_intfdata(usb_intf, dev);
+
+	dev->reg_map = mt7663u_reg_map;
+	dev->ops = ops;
+	ret = mt76u_init(mdev, usb_intf, true);
+	if (ret < 0)
+		goto error;
+
+	mdev->rev = (mt76_rr(dev, MT_HW_CHIPID) << 16) |
+		    (mt76_rr(dev, MT_HW_REV) & 0xff);
+	dev_dbg(mdev->dev, "ASIC revision: %04x\n", mdev->rev);
+
+	if (mt76_poll_msec(dev, MT_CONN_ON_MISC, MT_TOP_MISC2_FW_PWR_ON,
+			   FW_STATE_PWR_ON << 1, 500)) {
+		dev_dbg(dev->mt76.dev, "Usb device already powered on\n");
+		set_bit(MT76_STATE_POWER_OFF, &dev->mphy.state);
+		goto alloc_queues;
+	}
+
+	ret = mt76u_vendor_request(&dev->mt76, MT_VEND_POWER_ON,
+				   USB_DIR_OUT | USB_TYPE_VENDOR,
+				   0x0, 0x1, NULL, 0);
+	if (ret)
+		goto error;
+
+	if (!mt76_poll_msec(dev, MT_CONN_ON_MISC, MT_TOP_MISC2_FW_PWR_ON,
+			    FW_STATE_PWR_ON << 1, 500)) {
+		dev_err(dev->mt76.dev, "Timeout for power on\n");
+		return -EIO;
+	}
+
+alloc_queues:
+	ret = mt76u_alloc_mcu_queue(&dev->mt76);
+	if (ret)
+		goto error;
+
+	ret = mt76u_alloc_queues(&dev->mt76);
+	if (ret)
+		goto error;
+
+	ret = mt7663u_register_device(dev);
+	if (ret)
+		goto error_freeq;
+
+	return 0;
+
+error_freeq:
+	mt76u_queues_deinit(&dev->mt76);
+error:
+	mt76u_deinit(&dev->mt76);
+	usb_set_intfdata(usb_intf, NULL);
+	usb_put_dev(interface_to_usbdev(usb_intf));
+
+	ieee80211_free_hw(mdev->hw);
+
+	return ret;
+}
+
+static void mt7663u_disconnect(struct usb_interface *usb_intf)
+{
+	struct mt7615_dev *dev = usb_get_intfdata(usb_intf);
+
+	if (!test_bit(MT76_STATE_INITIALIZED, &dev->mphy.state))
+		return;
+
+	ieee80211_unregister_hw(dev->mt76.hw);
+	mt7663u_cleanup(dev);
+
+	usb_set_intfdata(usb_intf, NULL);
+	usb_put_dev(interface_to_usbdev(usb_intf));
+
+	mt76u_deinit(&dev->mt76);
+	ieee80211_free_hw(dev->mt76.hw);
+}
+
+static int __maybe_unused
+mt7663u_suspend(struct usb_interface *intf,
+		pm_message_t state)
+{
+	return 0;
+}
+
+static int __maybe_unused
+mt7663u_resume(struct usb_interface *intf)
+{
+	return 0;
+}
+
+MODULE_DEVICE_TABLE(usb, mt7615_device_table);
+MODULE_FIRMWARE(MT7663_FIRMWARE_N9);
+MODULE_FIRMWARE(MT7663_ROM_PATCH);
+
+static struct usb_driver mt7663u_driver = {
+	.name		= KBUILD_MODNAME,
+	.id_table	= mt7615_device_table,
+	.probe		= mt7663u_probe,
+	.disconnect	= mt7663u_disconnect,
+#ifdef CONFIG_PM
+	.suspend	= mt7663u_suspend,
+	.resume		= mt7663u_resume,
+	.reset_resume	= mt7663u_resume,
+#endif /* CONFIG_PM */
+	.soft_unbind	= 1,
+	.disable_hub_initiated_lpm = 1,
+};
+module_usb_driver(mt7663u_driver);
+
+MODULE_AUTHOR("Sean Wang <sean.wang@mediatek.com>");
+MODULE_AUTHOR("Lorenzo Bianconi <lorenzo@kernel.org>");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/usb_init.c b/drivers/net/wireless/mediatek/mt76/mt7615/usb_init.c
new file mode 100644
index 000000000000..a05f0eda21dd
--- /dev/null
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/usb_init.c
@@ -0,0 +1,144 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2019 MediaTek Inc.
+ *
+ * Author: Felix Fietkau <nbd@nbd.name>
+ *	   Lorenzo Bianconi <lorenzo@kernel.org>
+ *	   Sean Wang <sean.wang@mediatek.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include "mt7615.h"
+#include "mac.h"
+#include "regs.h"
+
+static int mt7663u_dma_sched_init(struct mt7615_dev *dev)
+{
+	int i;
+
+	mt76_rmw(dev, MT_DMA_SHDL(MT_DMASHDL_PKT_MAX_SIZE),
+		 MT_DMASHDL_PKT_MAX_SIZE_PLE | MT_DMASHDL_PKT_MAX_SIZE_PSE,
+		 FIELD_PREP(MT_DMASHDL_PKT_MAX_SIZE_PLE, 1) |
+		 FIELD_PREP(MT_DMASHDL_PKT_MAX_SIZE_PSE, 8));
+
+	/* disable refill group 5 - group 15 and raise group 2
+	 * and 3 as high priority.
+	 */
+	mt76_wr(dev, MT_DMA_SHDL(MT_DMASHDL_REFILL), 0xffe00006);
+	mt76_clear(dev, MT_DMA_SHDL(MT_DMASHDL_PAGE), BIT(16));
+
+	for (i = 0; i < 5; i++)
+		mt76_wr(dev, MT_DMA_SHDL(MT_DMASHDL_GROUP_QUOTA(i)),
+			FIELD_PREP(MT_DMASHDL_GROUP_QUOTA_MIN, 0x3) |
+			FIELD_PREP(MT_DMASHDL_GROUP_QUOTA_MAX, 0x1ff));
+
+	mt76_wr(dev, MT_DMA_SHDL(MT_DMASHDL_Q_MAP(0)), 0x42104210);
+	mt76_wr(dev, MT_DMA_SHDL(MT_DMASHDL_Q_MAP(1)), 0x42104210);
+
+	mt76_wr(dev, MT_DMA_SHDL(MT_DMASHDL_Q_MAP(2)), 0x4444);
+
+	/* group pririority from high to low:
+	 * 15 (cmd groups) > 4 > 3 > 2 > 1 > 0.
+	 */
+	mt76_wr(dev, MT_DMA_SHDL(MT_DMASHDL_SCHED_SET0), 0x6501234f);
+	mt76_wr(dev, MT_DMA_SHDL(MT_DMASHDL_SCHED_SET1), 0xedcba987);
+	mt76_wr(dev, MT_DMA_SHDL(MT_DMASHDL_OPTIONAL), 0x7004801c);
+
+	mt76_wr(dev, MT_UDMA_WLCFG_1,
+		FIELD_PREP(MT_WL_TX_TMOUT_LMT, 80000) |
+		FIELD_PREP(MT_WL_RX_AGG_PKT_LMT, 1));
+
+	/* setup UDMA Rx Flush */
+	mt76_clear(dev, MT_UDMA_WLCFG_0, MT_WL_RX_FLUSH);
+	/* hif reset */
+	mt76_set(dev, MT_HIF_RST, MT_HIF_LOGIC_RST_N);
+
+	mt76_set(dev, MT_UDMA_WLCFG_0,
+		 MT_WL_RX_AGG_EN | MT_WL_RX_EN | MT_WL_TX_EN |
+		 MT_WL_RX_MPSZ_PAD0 | MT_TICK_1US_EN |
+		 MT_WL_TX_TMOUT_FUNC_EN);
+	mt76_rmw(dev, MT_UDMA_WLCFG_0, MT_WL_RX_AGG_LMT | MT_WL_RX_AGG_TO,
+		 FIELD_PREP(MT_WL_RX_AGG_LMT, 32) |
+		 FIELD_PREP(MT_WL_RX_AGG_TO, 100));
+
+	return 0;
+}
+
+static int mt7663u_init_hardware(struct mt7615_dev *dev)
+{
+	int ret, idx;
+
+	ret = mt7615_eeprom_init(dev, MT_EFUSE_BASE);
+	if (ret < 0)
+		return ret;
+
+	ret = mt7663u_dma_sched_init(dev);
+	if (ret)
+		return ret;
+
+	set_bit(MT76_STATE_INITIALIZED, &dev->mphy.state);
+
+	/* Beacon and mgmt frames should occupy wcid 0 */
+	idx = mt76_wcid_alloc(dev->mt76.wcid_mask, MT7615_WTBL_STA - 1);
+	if (idx)
+		return -ENOSPC;
+
+	dev->mt76.global_wcid.idx = idx;
+	dev->mt76.global_wcid.hw_key_idx = -1;
+	rcu_assign_pointer(dev->mt76.wcid[idx], &dev->mt76.global_wcid);
+
+	return 0;
+}
+
+static void mt7663u_init_work(struct work_struct *work)
+{
+	struct mt7615_dev *dev;
+
+	dev = container_of(work, struct mt7615_dev, mcu_work);
+	if (mt7663u_mcu_init(dev))
+		return;
+
+	mt7615_mcu_set_eeprom(dev);
+	mt7615_mac_init(dev);
+	mt7615_phy_init(dev);
+	mt7615_mcu_del_wtbl_all(dev);
+}
+
+int mt7663u_register_device(struct mt7615_dev *dev)
+{
+	struct ieee80211_hw *hw = mt76_hw(dev);
+	int err;
+
+	INIT_WORK(&dev->wtbl_work, mt7663u_wtbl_work);
+	INIT_WORK(&dev->mcu_work, mt7663u_init_work);
+	INIT_LIST_HEAD(&dev->wd_head);
+	mt7615_init_device(dev);
+
+	err = mt7663u_init_hardware(dev);
+	if (err)
+		return err;
+
+	hw->extra_tx_headroom += MT_USB_HDR_SIZE + MT_USB_TXD_SIZE;
+	/* check hw sg support in order to enable AMSDU */
+	hw->max_tx_fragments = dev->mt76.usb.sg_en ? MT_HW_TXP_MAX_BUF_NUM : 1;
+
+	err = mt76_register_device(&dev->mt76, true, mt7615_rates,
+				   ARRAY_SIZE(mt7615_rates));
+	if (err < 0)
+		return err;
+
+	if (!dev->mt76.usb.sg_en) {
+		struct ieee80211_sta_vht_cap *vht_cap;
+
+		/* decrease max A-MSDU size if SG is not supported */
+		vht_cap = &dev->mphy.sband_5g.sband.vht_cap;
+		vht_cap->cap &= ~IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454;
+	}
+
+	ieee80211_queue_work(hw, &dev->mcu_work);
+	mt7615_init_txpower(dev, &dev->mphy.sband_2g.sband);
+	mt7615_init_txpower(dev, &dev->mphy.sband_5g.sband);
+
+	return mt7615_init_debugfs(dev);
+}
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/usb_mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/usb_mcu.c
new file mode 100644
index 000000000000..cd709fd617db
--- /dev/null
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/usb_mcu.c
@@ -0,0 +1,93 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2019 MediaTek Inc.
+ *
+ * Author: Felix Fietkau <nbd@nbd.name>
+ *	   Lorenzo Bianconi <lorenzo@kernel.org>
+ *	   Sean Wang <sean.wang@mediatek.com>
+ */
+#include <linux/kernel.h>
+#include <linux/module.h>
+
+#include "mt7615.h"
+#include "mac.h"
+#include "mcu.h"
+#include "regs.h"
+
+static int
+mt7663u_mcu_send_message(struct mt76_dev *mdev, struct sk_buff *skb,
+			 int cmd, bool wait_resp)
+{
+	struct mt7615_dev *dev = container_of(mdev, struct mt7615_dev, mt76);
+	int ret, seq, ep;
+
+	mutex_lock(&mdev->mcu.mutex);
+
+	mt7615_mcu_fill_msg(dev, skb, cmd, &seq);
+	if (cmd != MCU_CMD_FW_SCATTER)
+		ep = MT_EP_OUT_INBAND_CMD;
+	else
+		ep = MT_EP_OUT_AC_BE;
+
+	ret = mt76u_skb_dma_info(skb, skb->len);
+	if (ret < 0)
+		goto out;
+
+	ret = mt76u_bulk_msg(&dev->mt76, skb->data, skb->len, NULL,
+			     1000, ep);
+	dev_kfree_skb(skb);
+	if (ret < 0)
+		goto out;
+
+	if (wait_resp)
+		ret = mt7615_mcu_wait_response(dev, cmd, seq);
+
+out:
+	mutex_unlock(&mdev->mcu.mutex);
+
+	return ret;
+}
+
+int mt7663u_mcu_init(struct mt7615_dev *dev)
+{
+	static const struct mt76_mcu_ops mt7663u_mcu_ops = {
+		.headroom = MT_USB_HDR_SIZE + sizeof(struct mt7615_mcu_txd),
+		.tailroom = MT_USB_TAIL_SIZE,
+		.mcu_skb_send_msg = mt7663u_mcu_send_message,
+		.mcu_send_msg = mt7615_mcu_msg_send,
+		.mcu_restart = mt7615_mcu_restart,
+	};
+	int ret;
+
+	dev->mt76.mcu_ops = &mt7663u_mcu_ops,
+
+	mt76_set(dev, MT_UDMA_TX_QSEL, MT_FW_DL_EN);
+
+	if (test_and_clear_bit(MT76_STATE_POWER_OFF, &dev->mphy.state)) {
+		mt7615_mcu_restart(&dev->mt76);
+		if (!mt76_poll_msec(dev, MT_CONN_ON_MISC,
+				    MT_TOP_MISC2_FW_PWR_ON, 0, 500))
+			return -EIO;
+
+		ret = mt76u_vendor_request(&dev->mt76, MT_VEND_POWER_ON,
+					   USB_DIR_OUT | USB_TYPE_VENDOR,
+					   0x0, 0x1, NULL, 0);
+		if (ret)
+			return ret;
+
+		if (!mt76_poll_msec(dev, MT_CONN_ON_MISC,
+				    MT_TOP_MISC2_FW_PWR_ON,
+				    FW_STATE_PWR_ON << 1, 500)) {
+			dev_err(dev->mt76.dev, "Timeout for power on\n");
+			return -EIO;
+		}
+	}
+
+	ret = __mt7663_load_firmware(dev);
+	if (ret)
+		return ret;
+
+	mt76_clear(dev, MT_UDMA_TX_QSEL, MT_FW_DL_EN);
+	set_bit(MT76_STATE_MCU_RUNNING, &dev->mphy.state);
+
+	return 0;
+}
-- 
cgit v1.2.3-59-g8ed1b


From ad6b0be6f4e922ef0f2aea9d0e09f2c4cf3adc5e Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 16 Apr 2020 16:36:19 +0200
Subject: mt76: mt7615: enable scs for mt7663 driver

Add missing register definitions in order to enable sensitivity tuning
for mt7663 driver

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c  | 77 +++++++++++++++---------
 drivers/net/wireless/mediatek/mt76/mt7615/regs.h |  8 +++
 2 files changed, 58 insertions(+), 27 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index 571554ffe8b6..b6e92bb3b128 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -1417,17 +1417,40 @@ void mt7615_queue_rx_skb(struct mt76_dev *mdev, enum mt76_rxq_id q,
 EXPORT_SYMBOL_GPL(mt7615_queue_rx_skb);
 
 static void
-mt7615_mac_set_default_sensitivity(struct mt7615_phy *phy)
+mt7615_mac_set_sensitivity(struct mt7615_phy *phy, int val, bool ofdm)
 {
 	struct mt7615_dev *dev = phy->dev;
 	bool ext_phy = phy != &dev->phy;
 
-	mt76_rmw(dev, MT_WF_PHY_MIN_PRI_PWR(ext_phy),
-		 MT_WF_PHY_PD_OFDM_MASK(ext_phy),
-		 MT_WF_PHY_PD_OFDM(ext_phy, 0x13c));
-	mt76_rmw(dev, MT_WF_PHY_RXTD_CCK_PD(ext_phy),
-		 MT_WF_PHY_PD_CCK_MASK(ext_phy),
-		 MT_WF_PHY_PD_CCK(ext_phy, 0x92));
+	if (is_mt7663(&dev->mt76)) {
+		if (ofdm)
+			mt76_rmw(dev, MT7663_WF_PHY_MIN_PRI_PWR(ext_phy),
+				 MT_WF_PHY_PD_OFDM_MASK(0),
+				 MT_WF_PHY_PD_OFDM(0, val));
+		else
+			mt76_rmw(dev, MT7663_WF_PHY_RXTD_CCK_PD(ext_phy),
+				 MT_WF_PHY_PD_CCK_MASK(ext_phy),
+				 MT_WF_PHY_PD_CCK(ext_phy, val));
+		return;
+	}
+
+	if (ofdm)
+		mt76_rmw(dev, MT_WF_PHY_MIN_PRI_PWR(ext_phy),
+			 MT_WF_PHY_PD_OFDM_MASK(ext_phy),
+			 MT_WF_PHY_PD_OFDM(ext_phy, val));
+	else
+		mt76_rmw(dev, MT_WF_PHY_RXTD_CCK_PD(ext_phy),
+			 MT_WF_PHY_PD_CCK_MASK(ext_phy),
+			 MT_WF_PHY_PD_CCK(ext_phy, val));
+}
+
+static void
+mt7615_mac_set_default_sensitivity(struct mt7615_phy *phy)
+{
+	/* ofdm */
+	mt7615_mac_set_sensitivity(phy, 0x13c, true);
+	/* cck */
+	mt7615_mac_set_sensitivity(phy, 0x92, false);
 
 	phy->ofdm_sensitivity = -98;
 	phy->cck_sensitivity = -110;
@@ -1438,25 +1461,29 @@ void mt7615_mac_set_scs(struct mt7615_phy *phy, bool enable)
 {
 	struct mt7615_dev *dev = phy->dev;
 	bool ext_phy = phy != &dev->phy;
+	u32 reg, mask;
 
 	mutex_lock(&dev->mt76.mutex);
 
 	if (phy->scs_en == enable)
 		goto out;
 
-	if (is_mt7663(&dev->mt76))
-		goto out;
+	if (is_mt7663(&dev->mt76)) {
+		reg = MT7663_WF_PHY_MIN_PRI_PWR(ext_phy);
+		mask = MT_WF_PHY_PD_BLK(0);
+	} else {
+		reg = MT_WF_PHY_MIN_PRI_PWR(ext_phy);
+		mask = MT_WF_PHY_PD_BLK(ext_phy);
+	}
 
 	if (enable) {
-		mt76_set(dev, MT_WF_PHY_MIN_PRI_PWR(ext_phy),
-			 MT_WF_PHY_PD_BLK(ext_phy));
+		mt76_set(dev, reg, mask);
 		if (is_mt7622(&dev->mt76)) {
 			mt76_set(dev, MT_MIB_M0_MISC_CR(0), 0x7 << 8);
 			mt76_set(dev, MT_MIB_M0_MISC_CR(0), 0x7);
 		}
 	} else {
-		mt76_clear(dev, MT_WF_PHY_MIN_PRI_PWR(ext_phy),
-			   MT_WF_PHY_PD_BLK(ext_phy));
+		mt76_clear(dev, reg, mask);
 	}
 
 	mt7615_mac_set_default_sensitivity(phy);
@@ -1547,19 +1574,9 @@ mt7615_mac_adjust_sensitivity(struct mt7615_phy *phy,
 	}
 
 	if (update) {
-		u16 val;
+		u16 val = ofdm ? *sensitivity * 2 + 512 : *sensitivity + 256;
 
-		if (ofdm) {
-			val = *sensitivity * 2 + 512;
-			mt76_rmw(dev, MT_WF_PHY_MIN_PRI_PWR(ext_phy),
-				 MT_WF_PHY_PD_OFDM_MASK(ext_phy),
-				 MT_WF_PHY_PD_OFDM(ext_phy, val));
-		} else {
-			val = *sensitivity + 256;
-			mt76_rmw(dev, MT_WF_PHY_RXTD_CCK_PD(ext_phy),
-				 MT_WF_PHY_PD_CCK_MASK(ext_phy),
-				 MT_WF_PHY_PD_CCK(ext_phy, val));
-		}
+		mt7615_mac_set_sensitivity(phy, val, ofdm);
 		phy->last_cca_adj = jiffies;
 	}
 }
@@ -1576,11 +1593,17 @@ mt7615_mac_scs_check(struct mt7615_phy *phy)
 	if (!phy->scs_en)
 		return;
 
-	val = mt76_rr(dev, MT_WF_PHY_R0_PHYCTRL_STS0(ext_phy));
+	if (is_mt7663(&dev->mt76))
+		val = mt76_rr(dev, MT7663_WF_PHY_R0_PHYCTRL_STS0(ext_phy));
+	else
+		val = mt76_rr(dev, MT_WF_PHY_R0_PHYCTRL_STS0(ext_phy));
 	pd_cck = FIELD_GET(MT_WF_PHYCTRL_STAT_PD_CCK, val);
 	pd_ofdm = FIELD_GET(MT_WF_PHYCTRL_STAT_PD_OFDM, val);
 
-	val = mt76_rr(dev, MT_WF_PHY_R0_PHYCTRL_STS5(ext_phy));
+	if (is_mt7663(&dev->mt76))
+		val = mt76_rr(dev, MT7663_WF_PHY_R0_PHYCTRL_STS5(ext_phy));
+	else
+		val = mt76_rr(dev, MT_WF_PHY_R0_PHYCTRL_STS5(ext_phy));
 	mdrdy_cck = FIELD_GET(MT_WF_PHYCTRL_STAT_MDRDY_CCK, val);
 	mdrdy_ofdm = FIELD_GET(MT_WF_PHYCTRL_STAT_MDRDY_OFDM, val);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/regs.h b/drivers/net/wireless/mediatek/mt76/mt7615/regs.h
index 370e03432e81..ab3c6b77df07 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/regs.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/regs.h
@@ -160,16 +160,22 @@ enum mt7615_reg_base {
 #define MT_WF_PHYCTRL_STAT_PD_OFDM	GENMASK(31, 16)
 #define MT_WF_PHYCTRL_STAT_PD_CCK	GENMASK(15, 0)
 
+#define MT7663_WF_PHY_R0_PHYCTRL_STS0(_phy)	MT_WF_PHY(0x0210 + ((_phy) << 12))
+
 #define MT_WF_PHY_R0_PHYCTRL_STS5(_phy)	MT_WF_PHY(0x0220 + ((_phy) << 9))
 #define MT_WF_PHYCTRL_STAT_MDRDY_OFDM	GENMASK(31, 16)
 #define MT_WF_PHYCTRL_STAT_MDRDY_CCK	GENMASK(15, 0)
 
+#define MT7663_WF_PHY_R0_PHYCTRL_STS5(_phy)	MT_WF_PHY(0x0224 + ((_phy) << 12))
+
 #define MT_WF_PHY_MIN_PRI_PWR(_phy)	MT_WF_PHY((_phy) ? 0x084 : 0x229c)
 #define MT_WF_PHY_PD_OFDM_MASK(_phy)	((_phy) ? GENMASK(24, 16) : \
 					 GENMASK(28, 20))
 #define MT_WF_PHY_PD_OFDM(_phy, v)	((v) << ((_phy) ? 16 : 20))
 #define MT_WF_PHY_PD_BLK(_phy)		((_phy) ? BIT(25) : BIT(19))
 
+#define MT7663_WF_PHY_MIN_PRI_PWR(_phy)	MT_WF_PHY((_phy) ? 0x2aec : 0x22f0)
+
 #define MT_WF_PHY_RXTD_BASE		MT_WF_PHY(0x2200)
 #define MT_WF_PHY_RXTD(_n)		(MT_WF_PHY_RXTD_BASE + ((_n) << 2))
 
@@ -180,6 +186,8 @@ enum mt7615_reg_base {
 					 GENMASK(8, 1)
 #define MT_WF_PHY_PD_CCK(_phy, v)	((v) << ((_phy) ? 24 : 1))
 
+#define MT7663_WF_PHY_RXTD_CCK_PD(_phy)	MT_WF_PHY((_phy) ? 0x2350 : 0x234c)
+
 #define MT_WF_PHY_RXTD2_BASE		MT_WF_PHY(0x2a00)
 #define MT_WF_PHY_RXTD2(_n)		(MT_WF_PHY_RXTD2_BASE + ((_n) << 2))
 
-- 
cgit v1.2.3-59-g8ed1b


From 450affca7b3d1964a6181e9acc033897bd8bab55 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Fri, 17 Apr 2020 12:10:54 +0200
Subject: mt76: mt7615: disable aspm by default

The vendor SDK also disables ASPM by default

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/pci.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/pci.c b/drivers/net/wireless/mediatek/mt76/mt7615/pci.c
index 4f8a3c637a98..21b3ec29aa12 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/pci.c
@@ -41,6 +41,8 @@ static int mt7615_pci_probe(struct pci_dev *pdev,
 	if (ret)
 		return ret;
 
+	mt76_pci_disable_aspm(pdev);
+
 	map = id->device == 0x7663 ? mt7663e_reg_map : mt7615e_reg_map;
 	return mt7615_mmio_probe(&pdev->dev, pcim_iomap_table(pdev)[0],
 				 pdev->irq, map);
-- 
cgit v1.2.3-59-g8ed1b


From 60cb9843f33480d52eaa41ac2fc72940f3bfa17b Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Fri, 17 Apr 2020 13:10:04 +0200
Subject: mt76: mt7615: provide aid info to the mcu

For sta mode mac80211 provides aid in vif->bss_conf.aid.
In order to properly support 802.11 power-save, configure correct aid
to mcu during sta association

Co-developed-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 939aeb03b568..530d6302b53a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -790,12 +790,15 @@ mt7615_mcu_sta_basic_tlv(struct sk_buff *skb, struct ieee80211_vif *vif,
 	case NL80211_IFTYPE_MESH_POINT:
 	case NL80211_IFTYPE_AP:
 		basic->conn_type = cpu_to_le32(CONNECTION_INFRA_STA);
+		basic->aid = cpu_to_le16(sta->aid);
 		break;
 	case NL80211_IFTYPE_STATION:
 		basic->conn_type = cpu_to_le32(CONNECTION_INFRA_AP);
+		basic->aid = cpu_to_le16(vif->bss_conf.aid);
 		break;
 	case NL80211_IFTYPE_ADHOC:
 		basic->conn_type = cpu_to_le32(CONNECTION_IBSS_ADHOC);
+		basic->aid = cpu_to_le16(sta->aid);
 		break;
 	default:
 		WARN_ON(1);
@@ -803,7 +806,6 @@ mt7615_mcu_sta_basic_tlv(struct sk_buff *skb, struct ieee80211_vif *vif,
 	}
 
 	memcpy(basic->peer_addr, sta->addr, ETH_ALEN);
-	basic->aid = cpu_to_le16(sta->aid);
 	basic->qos = sta->wme;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From a7df11520997e8c67e28675e3170ae7e92e4a165 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Fri, 17 Apr 2020 13:10:05 +0200
Subject: mt76: remove PS_NULLFUNC_STACK capability

remove IEEE80211_HW_PS_NULLFUNC_STACK capability from mt76_phy_init
routine since 802.11 ps is not currently supported by any devices and it
will conflict with mt7663 ps fw support

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mac80211.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c
index f44f99184c10..39abedc45e4a 100644
--- a/drivers/net/wireless/mediatek/mt76/mac80211.c
+++ b/drivers/net/wireless/mediatek/mt76/mac80211.c
@@ -294,7 +294,6 @@ mt76_phy_init(struct mt76_dev *dev, struct ieee80211_hw *hw)
 		hw->max_tx_fragments = 16;
 
 	ieee80211_hw_set(hw, SIGNAL_DBM);
-	ieee80211_hw_set(hw, PS_NULLFUNC_STACK);
 	ieee80211_hw_set(hw, AMPDU_AGGREGATION);
 	ieee80211_hw_set(hw, SUPPORTS_RC_TABLE);
 	ieee80211_hw_set(hw, SUPPORT_FAST_XMIT);
-- 
cgit v1.2.3-59-g8ed1b


From 7f8ebafebce3b0eae48e328ff75475d7181f83ac Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Fri, 17 Apr 2020 13:10:06 +0200
Subject: mt76: mt7663: introduce 802.11 PS support in sta mode

Enable 802.11 power-save support available in mt7663 firmware

Co-developed-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt76.h          |  1 +
 drivers/net/wireless/mediatek/mt76/mt7615/init.c   |  4 ++++
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c    |  7 +++++--
 drivers/net/wireless/mediatek/mt76/mt7615/main.c   | 23 ++++++++++++++++++++++
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c    | 21 ++++++++++++++++++++
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.h    |  1 +
 drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h |  4 ++++
 .../net/wireless/mediatek/mt76/mt7615/pci_init.c   |  8 ++++++--
 .../net/wireless/mediatek/mt76/mt7615/usb_init.c   |  3 +++
 9 files changed, 68 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index 577465c01827..6106dc4fea44 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -286,6 +286,7 @@ enum {
 	MT76_REMOVED,
 	MT76_READING_STATS,
 	MT76_STATE_POWER_OFF,
+	MT76_STATE_PS,
 };
 
 struct mt76_hw_cap {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
index 9d9f73b4561e..6fc3f5aa94c0 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
@@ -323,6 +323,8 @@ int mt7615_register_ext_phy(struct mt7615_dev *dev)
 	INIT_DELAYED_WORK(&phy->scan_work, mt7615_scan_work);
 	skb_queue_head_init(&phy->scan_event_list);
 
+	INIT_WORK(&dev->phy.ps_work, mt7615_ps_work);
+
 	mt7615_cap_dbdc_enable(dev);
 	mphy = mt76_alloc_phy(&dev->mt76, sizeof(*phy), &mt7615_ops);
 	if (!mphy)
@@ -386,7 +388,9 @@ void mt7615_init_device(struct mt7615_dev *dev)
 	INIT_LIST_HEAD(&dev->sta_poll_list);
 	spin_lock_init(&dev->sta_poll_lock);
 	init_waitqueue_head(&dev->reset_wait);
+
 	INIT_WORK(&dev->reset_work, mt7615_mac_reset_work);
+	INIT_WORK(&dev->phy.ps_work, mt7615_ps_work);
 
 	mt7615_init_wiphy(hw);
 	dev->mphy.sband_2g.sband.ht_cap.cap |= IEEE80211_HT_CAP_LDPC_CODING;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index b6e92bb3b128..3dab07d3eb1d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -605,8 +605,11 @@ int mt7615_mac_write_txwi(struct mt7615_dev *dev, __le32 *txwi,
 	}
 
 	if (!ieee80211_is_beacon(fc)) {
-		val = MT_TXD5_TX_STATUS_HOST | MT_TXD5_SW_POWER_MGMT |
-		      FIELD_PREP(MT_TXD5_PID, pid);
+		struct ieee80211_hw *hw = mt76_hw(dev);
+
+		val = MT_TXD5_TX_STATUS_HOST | FIELD_PREP(MT_TXD5_PID, pid);
+		if (!ieee80211_hw_check(hw, SUPPORTS_PS))
+			val |= MT_TXD5_SW_POWER_MGMT;
 		txwi[5] = cpu_to_le32(val);
 	} else {
 		txwi[5] = 0;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
index f7fc2185da3b..8f8ad632d6ba 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
@@ -71,6 +71,7 @@ static void mt7615_stop(struct ieee80211_hw *hw)
 	struct mt7615_phy *phy = mt7615_hw_phy(hw);
 
 	cancel_delayed_work_sync(&phy->mac_work);
+	cancel_work_sync(&phy->ps_work);
 
 	mutex_lock(&dev->mt76.mutex);
 
@@ -362,6 +363,20 @@ static int mt7615_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 	return mt7615_mac_wtbl_set_key(dev, wcid, key, cmd);
 }
 
+void mt7615_ps_work(struct work_struct *work)
+{
+	struct mt7615_phy *phy;
+
+	phy = (struct mt7615_phy *)container_of(work, struct mt7615_phy,
+						ps_work);
+
+	mutex_lock(&phy->dev->mt76.mutex);
+	ieee80211_iterate_active_interfaces(phy->mt76->hw,
+					    IEEE80211_IFACE_ITER_RESUME_ALL,
+					    m7615_mcu_set_ps_iter, phy);
+	mutex_unlock(&phy->dev->mt76.mutex);
+}
+
 static int mt7615_config(struct ieee80211_hw *hw, u32 changed)
 {
 	struct mt7615_dev *dev = mt7615_hw_dev(hw);
@@ -387,6 +402,14 @@ static int mt7615_config(struct ieee80211_hw *hw, u32 changed)
 		mt76_wr(dev, MT_WF_RFCR(band), phy->rxfilter);
 	}
 
+	if (changed & IEEE80211_CONF_CHANGE_PS) {
+		if (hw->conf.flags & IEEE80211_CONF_PS)
+			set_bit(MT76_STATE_PS, &phy->mt76->state);
+		else
+			clear_bit(MT76_STATE_PS, &phy->mt76->state);
+		ieee80211_queue_work(hw, &phy->ps_work);
+	}
+
 	mutex_unlock(&dev->mt76.mutex);
 
 	return ret;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 530d6302b53a..74cbb975be75 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -2586,6 +2586,27 @@ int mt7615_mcu_set_sku_en(struct mt7615_phy *phy, bool enable)
 				   sizeof(req), true);
 }
 
+void m7615_mcu_set_ps_iter(void *priv, u8 *mac, struct ieee80211_vif *vif)
+{
+	struct mt7615_vif *mvif = (struct mt7615_vif *)vif->drv_priv;
+	struct mt7615_phy *phy = priv;
+	struct mt76_phy *mphy = phy->mt76;
+	struct {
+		u8 bss_idx;
+		u8 ps_state; /* 0: device awake
+			      * 1: static power save
+			      * 2: dynamic power saving
+			      */
+	} req = {
+		.bss_idx = mvif->idx,
+		.ps_state = test_bit(MT76_STATE_PS, &mphy->state) ? 2 : 0,
+	};
+
+	if (vif->type == NL80211_IFTYPE_STATION)
+		__mt76_mcu_send_msg(&phy->dev->mt76,  MCU_CMD_SET_PS_PROFILE,
+				    &req, sizeof(req), false);
+}
+
 int mt7615_mcu_set_channel_domain(struct mt7615_phy *phy)
 {
 	struct mt76_phy *mphy = phy->mt76;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
index dff3f3632faf..5440f24a834a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
@@ -411,6 +411,7 @@ struct mt7615_mcu_bss_event {
 /* offload mcu commands */
 enum {
 	MCU_CMD_START_HW_SCAN = MCU_CE_PREFIX | 0x03,
+	MCU_CMD_SET_PS_PROFILE = MCU_CE_PREFIX | 0x05,
 	MCU_CMD_SET_CHAN_DOMAIN = MCU_CE_PREFIX | 0x0f,
 	MCU_CMD_CANCEL_HW_SCAN = MCU_CE_PREFIX | 0x1b,
 	MCU_CMD_SCHED_SCAN_ENABLE = MCU_CE_PREFIX | 0x61,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index 2bc77a0478a9..c9533282e7e3 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -197,6 +197,8 @@ struct mt7615_phy {
 
 	struct sk_buff_head scan_event_list;
 	struct delayed_work scan_work;
+
+	struct work_struct ps_work;
 };
 
 #define mt7615_mcu_add_tx_ba(dev, ...)	(dev)->mcu_ops->add_tx_ba((dev), __VA_ARGS__)
@@ -423,6 +425,7 @@ static inline bool mt7615_firmware_offload(struct mt7615_dev *dev)
 }
 
 void mt7615_scan_work(struct work_struct *work);
+void mt7615_ps_work(struct work_struct *work);
 void mt7615_init_txpower(struct mt7615_dev *dev,
 			 struct ieee80211_supported_band *sband);
 void mt7615_phy_init(struct mt7615_dev *dev);
@@ -508,6 +511,7 @@ int mt7615_mcu_set_radar_th(struct mt7615_dev *dev, int index,
 int mt7615_mcu_set_sku_en(struct mt7615_phy *phy, bool enable);
 int mt7615_mcu_apply_rx_dcoc(struct mt7615_phy *phy);
 int mt7615_mcu_apply_tx_dpd(struct mt7615_phy *phy);
+void m7615_mcu_set_ps_iter(void *priv, u8 *mac, struct ieee80211_vif *vif);
 int mt7615_dfs_init_radar_detector(struct mt7615_phy *phy);
 
 int mt7615_init_debugfs(struct mt7615_dev *dev);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c b/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c
index 3a8dd334b53e..cd3ccafa7d11 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c
@@ -16,6 +16,7 @@ static void mt7615_init_work(struct work_struct *work)
 {
 	struct mt7615_dev *dev = container_of(work, struct mt7615_dev,
 					      mcu_work);
+	struct ieee80211_hw *hw = mt76_hw(dev);
 
 	if (mt7615_mcu_init(dev))
 		return;
@@ -25,8 +26,11 @@ static void mt7615_init_work(struct work_struct *work)
 	mt7615_phy_init(dev);
 	mt7615_mcu_del_wtbl_all(dev);
 
-	if (!mt7615_firmware_offload(dev)) {
-		struct wiphy *wiphy = mt76_hw(dev)->wiphy;
+	if (mt7615_firmware_offload(dev)) {
+		ieee80211_hw_set(hw, SUPPORTS_PS);
+		ieee80211_hw_set(hw, SUPPORTS_DYNAMIC_PS);
+	} else {
+		struct wiphy *wiphy = hw->wiphy;
 
 		dev->ops->hw_scan = NULL;
 		dev->ops->cancel_hw_scan = NULL;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/usb_init.c b/drivers/net/wireless/mediatek/mt76/mt7615/usb_init.c
index a05f0eda21dd..39642065531f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/usb_init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/usb_init.c
@@ -119,6 +119,9 @@ int mt7663u_register_device(struct mt7615_dev *dev)
 	if (err)
 		return err;
 
+	ieee80211_hw_set(hw, SUPPORTS_PS);
+	ieee80211_hw_set(hw, SUPPORTS_DYNAMIC_PS);
+
 	hw->extra_tx_headroom += MT_USB_HDR_SIZE + MT_USB_TXD_SIZE;
 	/* check hw sg support in order to enable AMSDU */
 	hw->max_tx_fragments = dev->mt76.usb.sg_en ? MT_HW_TXP_MAX_BUF_NUM : 1;
-- 
cgit v1.2.3-59-g8ed1b


From 18ab1d7a37576c10bdeb6409147ae715ddc2f925 Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Fri, 17 Apr 2020 13:13:28 +0200
Subject: mt76: mt7615: make Kconfig entry obvious for MT7663E

Make Kconfig entry obvious for MT7663E

Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Co-developed-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/Kconfig b/drivers/net/wireless/mediatek/mt76/mt7615/Kconfig
index a84317fb856f..e25db1135eda 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/Kconfig
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/Kconfig
@@ -5,7 +5,7 @@ config MT7615_COMMON
 	select MT76_CORE
 
 config MT7615E
-	tristate "MediaTek MT7615E (PCIe) support"
+	tristate "MediaTek MT7615E and MT7663E (PCIe) support"
 	select MT7615_COMMON
 	depends on MAC80211
 	depends on PCI
-- 
cgit v1.2.3-59-g8ed1b


From 6ea62c50792c6ad8b283c02d19f4304c7f3a3ccf Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sun, 19 Apr 2020 22:11:41 +0200
Subject: mt76: add rx queues info to mt76 debugfs

Introduce rx-queues debugfs node in order to dump rx queues status.
This would be useful for mcu fw debugging

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/debugfs.c        | 21 +++++++++++++++++++++
 drivers/net/wireless/mediatek/mt76/mt7603/debugfs.c |  2 +-
 drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c |  4 ++--
 .../net/wireless/mediatek/mt76/mt76x02_debugfs.c    |  2 +-
 4 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/debugfs.c b/drivers/net/wireless/mediatek/mt76/debugfs.c
index d2202acb8dc6..0278e1b44576 100644
--- a/drivers/net/wireless/mediatek/mt76/debugfs.c
+++ b/drivers/net/wireless/mediatek/mt76/debugfs.c
@@ -46,6 +46,25 @@ int mt76_queues_read(struct seq_file *s, void *data)
 }
 EXPORT_SYMBOL_GPL(mt76_queues_read);
 
+static int mt76_rx_queues_read(struct seq_file *s, void *data)
+{
+	struct mt76_dev *dev = dev_get_drvdata(s->private);
+	int i, queued;
+
+	for (i = 0; i < ARRAY_SIZE(dev->q_rx); i++) {
+		struct mt76_queue *q = &dev->q_rx[i];
+
+		if (!q->ndesc)
+			continue;
+
+		queued = mt76_is_usb(dev) ? q->ndesc - q->queued : q->queued;
+		seq_printf(s, "%d:	queued=%d head=%d tail=%d\n",
+			   i, queued, q->head, q->tail);
+	}
+
+	return 0;
+}
+
 void mt76_seq_puts_array(struct seq_file *file, const char *str,
 			 s8 *val, int len)
 {
@@ -92,6 +111,8 @@ struct dentry *mt76_register_debugfs(struct mt76_dev *dev)
 		debugfs_create_blob("otp", 0400, dir, &dev->otp);
 	debugfs_create_devm_seqfile(dev->dev, "rate_txpower", dir,
 				    mt76_read_rate_txpower);
+	debugfs_create_devm_seqfile(dev->dev, "rx-queues", dir,
+				    mt76_rx_queues_read);
 
 	return dir;
 }
diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7603/debugfs.c
index cc7c788abedd..8ce6880b2bb8 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/debugfs.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/debugfs.c
@@ -113,7 +113,7 @@ void mt7603_init_debugfs(struct mt7603_dev *dev)
 		return;
 
 	debugfs_create_file("ampdu_stat", 0400, dir, dev, &fops_ampdu_stat);
-	debugfs_create_devm_seqfile(dev->mt76.dev, "queues", dir,
+	debugfs_create_devm_seqfile(dev->mt76.dev, "xmit-queues", dir,
 				    mt76_queues_read);
 	debugfs_create_file("edcca", 0600, dir, dev, &fops_edcca);
 	debugfs_create_u32("reset_test", 0600, dir, &dev->reset_test);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c
index 150036488e3f..fd3ef483a87c 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/debugfs.c
@@ -293,10 +293,10 @@ int mt7615_init_debugfs(struct mt7615_dev *dev)
 		return -ENOMEM;
 
 	if (is_mt7615(&dev->mt76))
-		debugfs_create_devm_seqfile(dev->mt76.dev, "queues", dir,
+		debugfs_create_devm_seqfile(dev->mt76.dev, "xmit-queues", dir,
 					    mt7615_queues_read);
 	else
-		debugfs_create_devm_seqfile(dev->mt76.dev, "queues", dir,
+		debugfs_create_devm_seqfile(dev->mt76.dev, "xmit-queues", dir,
 					    mt76_queues_read);
 	debugfs_create_devm_seqfile(dev->mt76.dev, "acq", dir,
 				    mt7615_queues_acq);
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_debugfs.c b/drivers/net/wireless/mediatek/mt76/mt76x02_debugfs.c
index 68b40d63a46d..ff448a1ad4e3 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_debugfs.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_debugfs.c
@@ -144,7 +144,7 @@ void mt76x02_init_debugfs(struct mt76x02_dev *dev)
 	if (!dir)
 		return;
 
-	debugfs_create_devm_seqfile(dev->mt76.dev, "queues", dir,
+	debugfs_create_devm_seqfile(dev->mt76.dev, "xmit-queues", dir,
 				    mt76_queues_read);
 	debugfs_create_u8("temperature", 0400, dir, &dev->cal.temp);
 	debugfs_create_bool("tpc", 0600, dir, &dev->enable_tpc);
-- 
cgit v1.2.3-59-g8ed1b


From df5ab0d58b2e3952acef5be1c8c183a6cf31daab Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Mon, 20 Apr 2020 13:58:44 +0200
Subject: mt76: mt7615: parse mcu return code for unified commands

Add return code parsing for the following unified commands:
- MCU_UNI_CMD_DEV_INFO_UPDATE
- MCU_UNI_CMD_BSS_INFO_UPDATE
- MCU_UNI_CMD_STA_REC_UPDATE

Co-developed-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c | 10 ++++++++++
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.h |  6 ++++++
 2 files changed, 16 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 74cbb975be75..5329bd4f8237 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -187,6 +187,16 @@ mt7615_mcu_parse_response(struct mt7615_dev *dev, int cmd,
 		skb_pull(skb, sizeof(*rxd));
 		ret = le32_to_cpu(*(__le32 *)skb->data);
 		break;
+	case MCU_UNI_CMD_DEV_INFO_UPDATE:
+	case MCU_UNI_CMD_BSS_INFO_UPDATE:
+	case MCU_UNI_CMD_STA_REC_UPDATE: {
+		struct mt7615_mcu_uni_event *event;
+
+		skb_pull(skb, sizeof(*rxd));
+		event = (struct mt7615_mcu_uni_event *)skb->data;
+		ret = le32_to_cpu(event->status);
+		break;
+	}
 	default:
 		break;
 	}
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
index 5440f24a834a..032b5f98608e 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
@@ -282,6 +282,12 @@ enum {
 	MCU_UNI_CMD_STA_REC_UPDATE = MCU_UNI_PREFIX | 0x03,
 };
 
+struct mt7615_mcu_uni_event {
+	u8 cid;
+	u8 pad[3];
+	__le32 status; /* 0: success, others: fail */
+} __packed;
+
 struct mt7615_mcu_scan_ssid {
 	__le32 ssid_len;
 	u8 ssid[IEEE80211_MAX_SSID_LEN];
-- 
cgit v1.2.3-59-g8ed1b


From becdf0d5d7a46f5ed1f12405ffae4b04764fe27c Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Mon, 20 Apr 2020 14:07:45 +0200
Subject: mt76: mt7615: fix mt7615_firmware_own for mt7663e

Check the firmware-own configuration has been applied polling
MT_CONN_HIF_ON_LPCTL register

Fixes: f40ac0f3d3c0 ("mt76: mt7615: introduce mt7663e support")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 5329bd4f8237..68d48b7ed06b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -1710,9 +1710,8 @@ static int mt7615_firmware_own(struct mt7615_dev *dev)
 
 	mt76_wr(dev, addr, MT_CFG_LPCR_HOST_FW_OWN);
 
-	if (is_mt7622(&dev->mt76) &&
-	    !mt76_poll_msec(dev, MT_CFG_LPCR_HOST,
-			    MT_CFG_LPCR_HOST_FW_OWN,
+	if (!is_mt7615(&dev->mt76) &&
+	    !mt76_poll_msec(dev, addr, MT_CFG_LPCR_HOST_FW_OWN,
 			    MT_CFG_LPCR_HOST_FW_OWN, 3000)) {
 		dev_err(dev->mt76.dev, "Timeout for firmware own\n");
 		return -EIO;
-- 
cgit v1.2.3-59-g8ed1b


From 8f997dddc0784f6a9f0abece590a97da817830f8 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Mon, 20 Apr 2020 14:39:02 +0200
Subject: mt76: mt7615: fix max wtbl size for 7663

Current mt7663 offload firmware can support up to 32 wtbl entries

Fixes: f40ac0f3d3c0 ("mt76: mt7615: introduce mt7663e support")
Co-developed-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h | 11 ++++++++++-
 drivers/net/wireless/mediatek/mt76/util.c          |  2 +-
 2 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index c9533282e7e3..ffb146840377 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -12,8 +12,9 @@
 
 #define MT7615_MAX_INTERFACES		4
 #define MT7615_MAX_WMM_SETS		4
+#define MT7663_WTBL_SIZE		32
 #define MT7615_WTBL_SIZE		128
-#define MT7615_WTBL_RESERVED		(MT7615_WTBL_SIZE - 1)
+#define MT7615_WTBL_RESERVED		(mt7615_wtbl_size(dev) - 1)
 #define MT7615_WTBL_STA			(MT7615_WTBL_RESERVED - \
 					 MT7615_MAX_INTERFACES)
 
@@ -424,6 +425,14 @@ static inline bool mt7615_firmware_offload(struct mt7615_dev *dev)
 	return dev->fw_ver > MT7615_FIRMWARE_V2;
 }
 
+static inline u16 mt7615_wtbl_size(struct mt7615_dev *dev)
+{
+	if (is_mt7663(&dev->mt76) && mt7615_firmware_offload(dev))
+		return MT7663_WTBL_SIZE;
+	else
+		return MT7615_WTBL_SIZE;
+}
+
 void mt7615_scan_work(struct work_struct *work);
 void mt7615_ps_work(struct work_struct *work);
 void mt7615_init_txpower(struct mt7615_dev *dev,
diff --git a/drivers/net/wireless/mediatek/mt76/util.c b/drivers/net/wireless/mediatek/mt76/util.c
index 8c60c450125a..07cf71242d9e 100644
--- a/drivers/net/wireless/mediatek/mt76/util.c
+++ b/drivers/net/wireless/mediatek/mt76/util.c
@@ -46,7 +46,7 @@ int mt76_wcid_alloc(unsigned long *mask, int size)
 {
 	int i, idx = 0, cur;
 
-	for (i = 0; i < size / BITS_PER_LONG; i++) {
+	for (i = 0; i < DIV_ROUND_UP(size, BITS_PER_LONG); i++) {
 		idx = ffs(~mask[i]);
 		if (!idx)
 			continue;
-- 
cgit v1.2.3-59-g8ed1b


From 338061619185133f56ac17365deb1e75eaecc604 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Mon, 20 Apr 2020 22:40:55 +0200
Subject: mt76: mt7615: fix mt7615_driver_own routine

Introduce MT_PCIE_DOORBELL_PUSH register to fix mt7615_driver_own
routine for mt7663e

Fixes: f40ac0f3d3c0 ("mt76: mt7615: introduce mt7663e support")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c  | 6 +++++-
 drivers/net/wireless/mediatek/mt76/mt7615/regs.h | 1 +
 2 files changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 68d48b7ed06b..c267280dab4e 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -1686,16 +1686,20 @@ static void mt7622_trigger_hif_int(struct mt7615_dev *dev, bool en)
 
 static int mt7615_driver_own(struct mt7615_dev *dev)
 {
+	struct mt76_dev *mdev = &dev->mt76;
 	u32 addr;
 
-	addr = is_mt7663(&dev->mt76) ? MT_CONN_HIF_ON_LPCTL : MT_CFG_LPCR_HOST;
+	addr = is_mt7663(mdev) ? MT_PCIE_DOORBELL_PUSH : MT_CFG_LPCR_HOST;
 	mt76_wr(dev, addr, MT_CFG_LPCR_HOST_DRV_OWN);
 
 	mt7622_trigger_hif_int(dev, true);
+
+	addr = is_mt7663(mdev) ? MT_CONN_HIF_ON_LPCTL : MT_CFG_LPCR_HOST;
 	if (!mt76_poll_msec(dev, addr, MT_CFG_LPCR_HOST_FW_OWN, 0, 3000)) {
 		dev_err(dev->mt76.dev, "Timeout for driver own\n");
 		return -EIO;
 	}
+
 	mt7622_trigger_hif_int(dev, false);
 
 	return 0;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/regs.h b/drivers/net/wireless/mediatek/mt76/mt7615/regs.h
index ab3c6b77df07..054831ecad68 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/regs.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/regs.h
@@ -68,6 +68,7 @@ enum mt7615_reg_base {
 #define MT_HIF2_BASE			0xf0000
 #define MT_HIF2(ofs)			(MT_HIF2_BASE + (ofs))
 #define MT_PCIE_IRQ_ENABLE		MT_HIF2(0x188)
+#define MT_PCIE_DOORBELL_PUSH		MT_HIF2(0x1484)
 
 #define MT_CFG_LPCR_HOST		MT_HIF(0x1f0)
 #define MT_CFG_LPCR_HOST_FW_OWN		BIT(0)
-- 
cgit v1.2.3-59-g8ed1b


From fdf433121f82766ff508a6f06665d2aca3e258d5 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Tue, 21 Apr 2020 16:31:40 +0200
Subject: mt76: mt7615: fix aid configuration in mt7615_mcu_wtbl_generic_tlv

If the vif is running in station mode the aid will be passed by mac80211
using bss_conf.aid. Fix aid configuration in mt7615_mcu_wtbl_generic_tlv

Fixes: 04b8e65922f6 ("mt76: add mac80211 driver for MT7615 PCIe-based chipsets")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index c267280dab4e..e4baee37cac1 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -898,8 +898,11 @@ mt7615_mcu_wtbl_generic_tlv(struct sk_buff *skb, struct ieee80211_vif *vif,
 	generic = (struct wtbl_generic *)tlv;
 
 	if (sta) {
+		if (vif->type == NL80211_IFTYPE_STATION)
+			generic->partial_aid = cpu_to_le16(vif->bss_conf.aid);
+		else
+			generic->partial_aid = cpu_to_le16(sta->aid);
 		memcpy(generic->peer_addr, sta->addr, ETH_ALEN);
-		generic->partial_aid = cpu_to_le16(sta->aid);
 		generic->muar_idx = mvif->omac_idx;
 		generic->qos = sta->wme;
 	} else {
-- 
cgit v1.2.3-59-g8ed1b


From a621372a04ac6435edbf270ff85edae8a3e04c91 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Wed, 22 Apr 2020 10:47:23 +0200
Subject: mt76: mt7615: rework mt7615_mac_sta_poll for usb code

Since usb code can't access device registers in interrupt context, move
rcu_read_lock/rcu_read_unlock in mt7615_poll_tx routine. Moreover loop
over a local msta list in mt7615_mac_sta_poll since mt7663u driver will
not be able to complete the inner while loop before sta_poll_list list
is refilled by mt7615_mac_add_txs/mt7615_mac_fill_rx

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/dma.c |  2 ++
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c | 20 ++++++++------------
 2 files changed, 10 insertions(+), 12 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/dma.c b/drivers/net/wireless/mediatek/mt76/mt7615/dma.c
index b0ba0e7807bf..5cdbe3747901 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/dma.c
@@ -121,7 +121,9 @@ static int mt7615_poll_tx(struct napi_struct *napi, int budget)
 
 	mt7615_tx_cleanup(dev);
 
+	rcu_read_lock();
 	mt7615_mac_sta_poll(dev);
+	rcu_read_unlock();
 
 	tasklet_schedule(&dev->mt76.tx_tasklet);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index 3dab07d3eb1d..396f11e1bbf6 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -724,22 +724,20 @@ void mt7615_mac_sta_poll(struct mt7615_dev *dev)
 	struct ieee80211_sta *sta;
 	struct mt7615_sta *msta;
 	u32 addr, tx_time[4], rx_time[4];
+	struct list_head sta_poll_list;
 	int i;
 
-	rcu_read_lock();
+	INIT_LIST_HEAD(&sta_poll_list);
+	spin_lock_bh(&dev->sta_poll_lock);
+	list_splice_init(&dev->sta_poll_list, &sta_poll_list);
+	spin_unlock_bh(&dev->sta_poll_lock);
 
-	while (true) {
+	while (!list_empty(&sta_poll_list)) {
 		bool clear = false;
 
-		spin_lock_bh(&dev->sta_poll_lock);
-		if (list_empty(&dev->sta_poll_list)) {
-			spin_unlock_bh(&dev->sta_poll_lock);
-			break;
-		}
-		msta = list_first_entry(&dev->sta_poll_list,
-					struct mt7615_sta, poll_list);
+		msta = list_first_entry(&sta_poll_list, struct mt7615_sta,
+					poll_list);
 		list_del_init(&msta->poll_list);
-		spin_unlock_bh(&dev->sta_poll_lock);
 
 		addr = mt7615_mac_wtbl_addr(dev, msta->wcid.idx) + 19 * 4;
 
@@ -779,8 +777,6 @@ void mt7615_mac_sta_poll(struct mt7615_dev *dev)
 						       rx_cur);
 		}
 	}
-
-	rcu_read_unlock();
 }
 EXPORT_SYMBOL_GPL(mt7615_mac_sta_poll);
 
-- 
cgit v1.2.3-59-g8ed1b


From 5416651c9e00f4cbc6f8528bb42fc33e6c600309 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Wed, 22 Apr 2020 10:47:24 +0200
Subject: mt76: mt7663u: enable AirTimeFairness

Initialize tx_status_data pointer in order to enable Air Time Fairness
for mt7663u chipset

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/usb.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/usb.c b/drivers/net/wireless/mediatek/mt76/mt7615/usb.c
index ad5219006987..eee6f820959f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/usb.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/usb.c
@@ -249,6 +249,17 @@ mt7663u_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr,
 	return mt76u_skb_dma_info(tx_info->skb, tx_info->skb->len);
 }
 
+static bool mt7663u_tx_status_data(struct mt76_dev *mdev, u8 *update)
+{
+	struct mt7615_dev *dev = container_of(mdev, struct mt7615_dev, mt76);
+
+	mutex_lock(&dev->mt76.mutex);
+	mt7615_mac_sta_poll(dev);
+	mutex_unlock(&dev->mt76.mutex);
+
+	return 0;
+}
+
 static int mt7663u_probe(struct usb_interface *usb_intf,
 			 const struct usb_device_id *id)
 {
@@ -257,6 +268,7 @@ static int mt7663u_probe(struct usb_interface *usb_intf,
 		.drv_flags = MT_DRV_RX_DMA_HDR,
 		.tx_prepare_skb = mt7663u_tx_prepare_skb,
 		.tx_complete_skb = mt7663u_tx_complete_skb,
+		.tx_status_data = mt7663u_tx_status_data,
 		.rx_skb = mt7615_queue_rx_skb,
 		.sta_ps = mt7615_sta_ps,
 		.sta_add = mt7615_mac_sta_add,
-- 
cgit v1.2.3-59-g8ed1b


From d0116058c7f637842097741cee357496326f2b9d Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Mon, 20 Apr 2020 15:49:35 +0200
Subject: mt76: mt7615: fix sta ampdu factor for VHT

If VHT has a larger A-MPDU size limit, pass it to the MCU via the wtbl_ht
TLV element.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index e4baee37cac1..1d1b2a7b4325 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -924,11 +924,10 @@ mt7615_mcu_wtbl_ht_tlv(struct sk_buff *skb, struct ieee80211_sta *sta,
 		       void *sta_wtbl, void *wtbl_tlv)
 {
 	struct tlv *tlv;
+	struct wtbl_ht *ht = NULL;
 	u32 flags = 0;
 
 	if (sta->ht_cap.ht_supported) {
-		struct wtbl_ht *ht;
-
 		tlv = mt7615_mcu_add_nested_tlv(skb, WTBL_HT, sizeof(*ht),
 						wtbl_tlv, sta_wtbl);
 		ht = (struct wtbl_ht *)tlv;
@@ -945,6 +944,7 @@ mt7615_mcu_wtbl_ht_tlv(struct sk_buff *skb, struct ieee80211_sta *sta,
 
 	if (sta->vht_cap.vht_supported) {
 		struct wtbl_vht *vht;
+		u8 af;
 
 		tlv = mt7615_mcu_add_nested_tlv(skb, WTBL_VHT, sizeof(*vht),
 						wtbl_tlv, sta_wtbl);
@@ -952,6 +952,13 @@ mt7615_mcu_wtbl_ht_tlv(struct sk_buff *skb, struct ieee80211_sta *sta,
 		vht->ldpc = sta->vht_cap.cap & IEEE80211_VHT_CAP_RXLDPC,
 		vht->vht = 1;
 
+		af = (sta->vht_cap.cap &
+		      IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK) >>
+		      IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_SHIFT;
+
+		if (ht)
+		    ht->af = max(ht->af, af);
+
 		if (sta->vht_cap.cap & IEEE80211_VHT_CAP_SHORT_GI_80)
 			flags |= MT_WTBL_W5_SHORT_GI_80;
 		if (sta->vht_cap.cap & IEEE80211_VHT_CAP_SHORT_GI_160)
-- 
cgit v1.2.3-59-g8ed1b


From 55961d8be35d0268d66f4ffe2cbbccff4936aae5 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Mon, 20 Apr 2020 16:34:16 +0200
Subject: mt76: fix A-MPDU density handling

The hardware requirements for A-MPDU density are entirely on the tx side,
not the rx side. Because of that, the IE value should stay at 0 and the
minimum value should instead be enforced in WTBL/TXWI

MT7615 has no restrictions here

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mac80211.c    | 1 -
 drivers/net/wireless/mediatek/mt76/mt7603/mac.c  | 5 +++++
 drivers/net/wireless/mediatek/mt76/mt76x02_mac.c | 7 +++++--
 3 files changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c
index 39abedc45e4a..b6c0a6d0dfc1 100644
--- a/drivers/net/wireless/mediatek/mt76/mac80211.c
+++ b/drivers/net/wireless/mediatek/mt76/mac80211.c
@@ -198,7 +198,6 @@ mt76_init_sband(struct mt76_dev *dev, struct mt76_sband *msband,
 
 	ht_cap->mcs.tx_params = IEEE80211_HT_MCS_TX_DEFINED;
 	ht_cap->ampdu_factor = IEEE80211_HT_MAX_AMPDU_64K;
-	ht_cap->ampdu_density = IEEE80211_HT_MPDU_DENSITY_4;
 
 	mt76_init_stream_cap(dev, sband, vht);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
index 39b7c5d6e6cd..73c6a3ff399c 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
@@ -318,11 +318,16 @@ void mt7603_wtbl_update_cap(struct mt7603_dev *dev, struct ieee80211_sta *sta)
 {
 	struct mt7603_sta *msta = (struct mt7603_sta *)sta->drv_priv;
 	int idx = msta->wcid.idx;
+	u8 ampdu_density;
 	u32 addr;
 	u32 val;
 
 	addr = mt7603_wtbl1_addr(idx);
 
+	ampdu_density = sta->ht_cap.ampdu_density;
+	if (ampdu_density < IEEE80211_HT_MPDU_DENSITY_4)
+		ampdu_density = IEEE80211_HT_MPDU_DENSITY_4;
+
 	val = mt76_rr(dev, addr + 2 * 4);
 	val &= MT_WTBL1_W2_KEY_TYPE | MT_WTBL1_W2_ADMISSION_CONTROL;
 	val |= FIELD_PREP(MT_WTBL1_W2_AMPDU_FACTOR, sta->ht_cap.ampdu_factor) |
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c
index 8b072277ea10..a5a3bcd30d6f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c
@@ -409,6 +409,7 @@ void mt76x02_mac_write_txwi(struct mt76x02_dev *dev, struct mt76x02_txwi *txwi,
 		txwi->ack_ctl |= MT_TXWI_ACK_CTL_NSEQ;
 	if ((info->flags & IEEE80211_TX_CTL_AMPDU) && sta) {
 		u8 ba_size = IEEE80211_MIN_AMPDU_BUF;
+		u8 ampdu_density = sta->ht_cap.ampdu_density;
 
 		ba_size <<= sta->ht_cap.ampdu_factor;
 		ba_size = min_t(int, 63, ba_size - 1);
@@ -416,9 +417,11 @@ void mt76x02_mac_write_txwi(struct mt76x02_dev *dev, struct mt76x02_txwi *txwi,
 			ba_size = 0;
 		txwi->ack_ctl |= FIELD_PREP(MT_TXWI_ACK_CTL_BA_WINDOW, ba_size);
 
+		if (ampdu_density < IEEE80211_HT_MPDU_DENSITY_4)
+			ampdu_density = IEEE80211_HT_MPDU_DENSITY_4;
+
 		txwi_flags |= MT_TXWI_FLAGS_AMPDU |
-			 FIELD_PREP(MT_TXWI_FLAGS_MPDU_DENSITY,
-				    sta->ht_cap.ampdu_density);
+			 FIELD_PREP(MT_TXWI_FLAGS_MPDU_DENSITY, ampdu_density);
 	}
 
 	if (ieee80211_is_probe_resp(hdr->frame_control) ||
-- 
cgit v1.2.3-59-g8ed1b


From 61cb60758a05de1eaf5017e37a099fe52c5cce25 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Mon, 20 Apr 2020 16:53:28 +0200
Subject: mt76: mt7615: use larger rx buffers if VHT is supported

In VHT mode we can receive larger MPDUs. Increasing the buffer size reduces
fragmentation here, which should improve performance.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/dma.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/dma.c b/drivers/net/wireless/mediatek/mt76/mt7615/dma.c
index 5cdbe3747901..0b1fbddd1c3f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/dma.c
@@ -192,8 +192,13 @@ static void mt7663_dma_sched_init(struct mt7615_dev *dev)
 int mt7615_dma_init(struct mt7615_dev *dev)
 {
 	int rx_ring_size = MT7615_RX_RING_SIZE;
+	int rx_buf_size = MT_RX_BUF_SIZE;
 	int ret;
 
+	/* Increase buffer size to receive large VHT MPDUs */
+	if (dev->mt76.cap.has_5ghz)
+		rx_buf_size *= 2;
+
 	mt76_dma_attach(&dev->mt76);
 
 	mt76_wr(dev, MT_WPDMA_GLO_CFG,
@@ -234,7 +239,7 @@ int mt7615_dma_init(struct mt7615_dev *dev)
 
 	/* init rx queues */
 	ret = mt76_queue_alloc(dev, &dev->mt76.q_rx[MT_RXQ_MCU], 1,
-			       MT7615_RX_MCU_RING_SIZE, MT_RX_BUF_SIZE,
+			       MT7615_RX_MCU_RING_SIZE, rx_buf_size,
 			       MT_RX_RING_BASE);
 	if (ret)
 		return ret;
@@ -243,7 +248,7 @@ int mt7615_dma_init(struct mt7615_dev *dev)
 	    rx_ring_size /= 2;
 
 	ret = mt76_queue_alloc(dev, &dev->mt76.q_rx[MT_RXQ_MAIN], 0,
-			       rx_ring_size, MT_RX_BUF_SIZE, MT_RX_RING_BASE);
+			       rx_ring_size, rx_buf_size, MT_RX_RING_BASE);
 	if (ret)
 		return ret;
 
-- 
cgit v1.2.3-59-g8ed1b


From ab9a1ed229ba668c4b0481cd174096f7876933a0 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 22 Apr 2020 12:25:54 +0200
Subject: mt76: mt7615: never use an 802.11b CF-End rate on 5GHz

Sometimes mt7615_mac_set_timing gets called while the slot time is still
configured to 20. Ensure that in this case it always uses the OFDM CFend
rate.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index 396f11e1bbf6..63c7acfd4cc2 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -117,8 +117,9 @@ void mt7615_mac_set_timing(struct mt7615_phy *phy)
 	u32 ofdm = FIELD_PREP(MT_TIMEOUT_VAL_PLCP, 60) |
 		   FIELD_PREP(MT_TIMEOUT_VAL_CCA, 24);
 	int sifs, offset;
+	bool is_5ghz = phy->mt76->chandef.chan->band == NL80211_BAND_5GHZ;
 
-	if (phy->mt76->chandef.chan->band == NL80211_BAND_5GHZ)
+	if (is_5ghz)
 		sifs = 16;
 	else
 		sifs = 10;
@@ -151,7 +152,7 @@ void mt7615_mac_set_timing(struct mt7615_phy *phy)
 		FIELD_PREP(MT_IFS_SIFS, sifs) |
 		FIELD_PREP(MT_IFS_SLOT, phy->slottime));
 
-	if (phy->slottime < 20)
+	if (phy->slottime < 20 || is_5ghz)
 		val = MT7615_CFEND_RATE_DEFAULT;
 	else
 		val = MT7615_CFEND_RATE_11B;
-- 
cgit v1.2.3-59-g8ed1b


From e0b4fe832c3b400b34c0dfcce95f134dac2a5ef2 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 22 Apr 2020 17:46:58 +0200
Subject: mt76: mt7603: never use an 802.11b CF-End rate on 5GHz

Sometimes mt7615_mac_set_timing gets called while the slot time is still
configured to 20. Ensure that in this case it always uses the OFDM CFend
rate.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7603/mac.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
index 73c6a3ff399c..d34828715e1c 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
@@ -51,10 +51,11 @@ void mt7603_mac_set_timing(struct mt7603_dev *dev)
 	int offset = 3 * dev->coverage_class;
 	u32 reg_offset = FIELD_PREP(MT_TIMEOUT_VAL_PLCP, offset) |
 			 FIELD_PREP(MT_TIMEOUT_VAL_CCA, offset);
+	bool is_5ghz = dev->mphy.chandef.chan->band == NL80211_BAND_5GHZ;
 	int sifs;
 	u32 val;
 
-	if (dev->mphy.chandef.chan->band == NL80211_BAND_5GHZ)
+	if (is_5ghz)
 		sifs = 16;
 	else
 		sifs = 10;
@@ -71,7 +72,7 @@ void mt7603_mac_set_timing(struct mt7603_dev *dev)
 		FIELD_PREP(MT_IFS_SIFS, sifs) |
 		FIELD_PREP(MT_IFS_SLOT, dev->slottime));
 
-	if (dev->slottime < 20)
+	if (dev->slottime < 20 || is_5ghz)
 		val = MT7603_CFEND_RATE_DEFAULT;
 	else
 		val = MT7603_CFEND_RATE_11B;
-- 
cgit v1.2.3-59-g8ed1b


From ed2bde56ab1fcda71faa24d720df06353d91e237 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 22 Apr 2020 12:28:19 +0200
Subject: mt76: mt7615: adjust timing in mt7615_mac_set_timing to match fw/hw
 values

Slightly improves performance

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index 63c7acfd4cc2..88cbf5ffa290 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -115,7 +115,7 @@ void mt7615_mac_set_timing(struct mt7615_phy *phy)
 	u32 cck = FIELD_PREP(MT_TIMEOUT_VAL_PLCP, 231) |
 		  FIELD_PREP(MT_TIMEOUT_VAL_CCA, 48);
 	u32 ofdm = FIELD_PREP(MT_TIMEOUT_VAL_PLCP, 60) |
-		   FIELD_PREP(MT_TIMEOUT_VAL_CCA, 24);
+		   FIELD_PREP(MT_TIMEOUT_VAL_CCA, 28);
 	int sifs, offset;
 	bool is_5ghz = phy->mt76->chandef.chan->band == NL80211_BAND_5GHZ;
 
-- 
cgit v1.2.3-59-g8ed1b


From ad333c2ad23ec179b0c907e8553779873f4cc1c8 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 22 Apr 2020 12:32:10 +0200
Subject: mt76: mt7615: do not adjust MAC timings if the device is not running

Avoids register writes and MAC start/stop when the hardware isn't ready for it

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index 88cbf5ffa290..89aed6d67de0 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -119,6 +119,9 @@ void mt7615_mac_set_timing(struct mt7615_phy *phy)
 	int sifs, offset;
 	bool is_5ghz = phy->mt76->chandef.chan->band == NL80211_BAND_5GHZ;
 
+	if (!test_bit(MT76_STATE_RUNNING, &phy->mt76->state))
+		return;
+
 	if (is_5ghz)
 		sifs = 16;
 	else
-- 
cgit v1.2.3-59-g8ed1b


From 97507b38a4de63e55801bc91a107c2794159ccd6 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 22 Apr 2020 13:10:00 +0200
Subject: mt76: mt7615: fix tx status rate index calculation

A switch from one rate index to the next only happens when tx count from
the current slot is greater than MT7615_RATE_RETRY, which is 1 has to be
subtracted from count, instead of added to it.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index 89aed6d67de0..e7a76032caff 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -1148,7 +1148,7 @@ static bool mt7615_fill_txs(struct mt7615_dev *dev, struct mt7615_sta *sta,
 	if (ampdu || (info->flags & IEEE80211_TX_CTL_AMPDU))
 		info->flags |= IEEE80211_TX_STAT_AMPDU | IEEE80211_TX_CTL_AMPDU;
 
-	first_idx = max_t(int, 0, last_idx - (count + 1) / MT7615_RATE_RETRY);
+	first_idx = max_t(int, 0, last_idx - (count - 1) / MT7615_RATE_RETRY);
 
 	if (fixed_rate && !probe) {
 		info->status.rates[0].count = count;
-- 
cgit v1.2.3-59-g8ed1b


From 40a61c9b2c725da82bd60a39bc54b8884b0a57b5 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 22 Apr 2020 13:15:06 +0200
Subject: mt76: mt7603: fix tx status rate index calculation

A switch from one rate index to the next only happens when tx count from
the current slot is greater than MT7615_RATE_RETRY, which is 1 has to be
subtracted from count, instead of added to it.

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7603/mac.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
index d34828715e1c..f8c0c957ca01 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
@@ -1103,7 +1103,7 @@ mt7603_fill_txs(struct mt7603_dev *dev, struct mt7603_sta *sta,
 	if (ampdu || (info->flags & IEEE80211_TX_CTL_AMPDU))
 		info->flags |= IEEE80211_TX_STAT_AMPDU | IEEE80211_TX_CTL_AMPDU;
 
-	first_idx = max_t(int, 0, last_idx - (count + 1) / MT7603_RATE_RETRY);
+	first_idx = max_t(int, 0, last_idx - (count - 1) / MT7603_RATE_RETRY);
 
 	if (fixed_rate && !probe) {
 		info->status.rates[0].count = count;
-- 
cgit v1.2.3-59-g8ed1b


From dcc4c74f9260fdc746d6ab4af17518cfe9d4e071 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Wed, 22 Apr 2020 13:07:44 +0200
Subject: mt76: mt7615: move mcu bss upload before creating the sta

Run mt7615_mcu_add_bss_info routine before mt7615_mcu_sta_add since
the firmware requires the bss is created before the relative sta

Tested-by: Sean Wang <sean.wang@mediatek.com>
Suggested-by: YF Luo <yf.luo@mediatek.com>
Suggested-by: Lucy Hsu <lucy.hsu@mediatek.com>
Co-developed-by: Soul Huang <soul.huang@mediatek.com>
Signed-off-by: Soul Huang <soul.huang@mediatek.com>
Co-developed-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/main.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
index 8f8ad632d6ba..3c35b8d0489c 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
@@ -491,9 +491,6 @@ static void mt7615_bss_info_changed(struct ieee80211_hw *hw,
 
 	mutex_lock(&dev->mt76.mutex);
 
-	if (changed & BSS_CHANGED_ASSOC)
-		mt7615_mcu_add_bss_info(phy, vif, info->assoc);
-
 	if (changed & BSS_CHANGED_ERP_SLOT) {
 		int slottime = info->use_short_slot ? 9 : 20;
 
@@ -545,9 +542,14 @@ int mt7615_mac_sta_add(struct mt76_dev *mdev, struct ieee80211_vif *vif,
 	msta->wcid.idx = idx;
 	msta->wcid.ext_phy = mvif->band_idx;
 
+	if (vif->type == NL80211_IFTYPE_STATION) {
+		struct mt7615_phy *phy;
+
+		phy = mvif->band_idx ? mt7615_ext_phy(dev) : &dev->phy;
+		mt7615_mcu_add_bss_info(phy, vif, true);
+	}
 	mt7615_mac_wtbl_update(dev, idx,
 			       MT_WTBL_UPDATE_ADM_COUNT_CLEAR);
-
 	mt7615_mcu_sta_add(dev, vif, sta, true);
 
 	return 0;
@@ -563,6 +565,13 @@ void mt7615_mac_sta_remove(struct mt76_dev *mdev, struct ieee80211_vif *vif,
 	mt7615_mcu_sta_add(dev, vif, sta, false);
 	mt7615_mac_wtbl_update(dev, msta->wcid.idx,
 			       MT_WTBL_UPDATE_ADM_COUNT_CLEAR);
+	if (vif->type == NL80211_IFTYPE_STATION) {
+		struct mt7615_vif *mvif = (struct mt7615_vif *)vif->drv_priv;
+		struct mt7615_phy *phy;
+
+		phy = mvif->band_idx ? mt7615_ext_phy(dev) : &dev->phy;
+		mt7615_mcu_add_bss_info(phy, vif, false);
+	}
 
 	spin_lock_bh(&dev->sta_poll_lock);
 	if (!list_empty(&msta->poll_list))
-- 
cgit v1.2.3-59-g8ed1b


From dd89a0133c0ce80bb8c2f873a85b28f2d33640bd Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Wed, 22 Apr 2020 18:29:32 +0200
Subject: mt76: enable TDLS support

Enable mac80211 TDLS support by default. Disable TDLS hw filtering for
mt7615 devices

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mac80211.c    | 3 ++-
 drivers/net/wireless/mediatek/mt76/mt7615/init.c | 3 +++
 drivers/net/wireless/mediatek/mt76/mt7615/main.c | 4 ++--
 drivers/net/wireless/mediatek/mt76/mt7615/mmio.c | 2 ++
 drivers/net/wireless/mediatek/mt76/mt7615/regs.h | 8 ++++++++
 drivers/net/wireless/mediatek/mt76/mt7615/usb.c  | 1 +
 6 files changed, 18 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c
index b6c0a6d0dfc1..176c22a5319a 100644
--- a/drivers/net/wireless/mediatek/mt76/mac80211.c
+++ b/drivers/net/wireless/mediatek/mt76/mac80211.c
@@ -278,7 +278,8 @@ mt76_phy_init(struct mt76_dev *dev, struct ieee80211_hw *hw)
 	SET_IEEE80211_PERM_ADDR(hw, dev->macaddr);
 
 	wiphy->features |= NL80211_FEATURE_ACTIVE_MONITOR;
-	wiphy->flags |= WIPHY_FLAG_HAS_CHANNEL_SWITCH;
+	wiphy->flags |= WIPHY_FLAG_HAS_CHANNEL_SWITCH |
+			WIPHY_FLAG_SUPPORTS_TDLS;
 
 	wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_CQM_RSSI_LIST);
 	wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_AIRTIME_FAIRNESS);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
index 6fc3f5aa94c0..9880643888ba 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
@@ -63,6 +63,7 @@ mt7615_init_mac_chain(struct mt7615_dev *dev, int chain)
 		FIELD_PREP(MT_AGG_ARxCR_LIMIT(6), MT7615_RATE_RETRY - 1) |
 		FIELD_PREP(MT_AGG_ARxCR_LIMIT(7), MT7615_RATE_RETRY - 1));
 
+	mt76_clear(dev, MT_DMA_RCFR0(chain), MT_DMA_RCFR0_MCU_RX_TDLS);
 	if (!mt7615_firmware_offload(dev)) {
 		u32 mask, set;
 
@@ -117,6 +118,8 @@ void mt7615_mac_init(struct mt7615_dev *dev)
 	mt76_wr(dev, MT_DMA_DCR0,
 		FIELD_PREP(MT_DMA_DCR0_MAX_RX_LEN, 3072) |
 		MT_DMA_DCR0_RX_VEC_DROP);
+	/* disable TDLS filtering */
+	mt76_clear(dev, MT_WF_PFCR, MT_WF_PFCR_TDLS_EN);
 	mt76_set(dev, MT_WF_MIB_SCR0, MT_MIB_SCR0_AGG_CNT_RANGE_EN);
 	if (is_mt7663(&dev->mt76)) {
 		mt76_wr(dev, MT_WF_AGG(0x160), 0x5c341c02);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
index 3c35b8d0489c..4e1d162b2664 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
@@ -542,7 +542,7 @@ int mt7615_mac_sta_add(struct mt76_dev *mdev, struct ieee80211_vif *vif,
 	msta->wcid.idx = idx;
 	msta->wcid.ext_phy = mvif->band_idx;
 
-	if (vif->type == NL80211_IFTYPE_STATION) {
+	if (vif->type == NL80211_IFTYPE_STATION && !sta->tdls) {
 		struct mt7615_phy *phy;
 
 		phy = mvif->band_idx ? mt7615_ext_phy(dev) : &dev->phy;
@@ -565,7 +565,7 @@ void mt7615_mac_sta_remove(struct mt76_dev *mdev, struct ieee80211_vif *vif,
 	mt7615_mcu_sta_add(dev, vif, sta, false);
 	mt7615_mac_wtbl_update(dev, msta->wcid.idx,
 			       MT_WTBL_UPDATE_ADM_COUNT_CLEAR);
-	if (vif->type == NL80211_IFTYPE_STATION) {
+	if (vif->type == NL80211_IFTYPE_STATION && !sta->tdls) {
 		struct mt7615_vif *mvif = (struct mt7615_vif *)vif->drv_priv;
 		struct mt7615_phy *phy;
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c b/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c
index 2c4b1a315d5b..c9f5b1ce70ae 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c
@@ -21,6 +21,7 @@ const u32 mt7615e_reg_map[] = {
 	[MT_TMAC_BASE]		= 0x21000,
 	[MT_RMAC_BASE]		= 0x21200,
 	[MT_DMA_BASE]		= 0x21800,
+	[MT_PF_BASE]		= 0x22000,
 	[MT_WTBL_BASE_ON]	= 0x23000,
 	[MT_WTBL_BASE_OFF]	= 0x23400,
 	[MT_LPON_BASE]		= 0x24200,
@@ -45,6 +46,7 @@ const u32 mt7663e_reg_map[] = {
 	[MT_TMAC_BASE]		= 0x24000,
 	[MT_RMAC_BASE]		= 0x25000,
 	[MT_DMA_BASE]		= 0x27000,
+	[MT_PF_BASE]		= 0x28000,
 	[MT_WTBL_BASE_ON]	= 0x29000,
 	[MT_WTBL_BASE_OFF]	= 0x29800,
 	[MT_LPON_BASE]		= 0x2b000,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/regs.h b/drivers/net/wireless/mediatek/mt76/mt7615/regs.h
index 054831ecad68..68d30bcc087a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/regs.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/regs.h
@@ -18,6 +18,7 @@ enum mt7615_reg_base {
 	MT_TMAC_BASE,
 	MT_RMAC_BASE,
 	MT_DMA_BASE,
+	MT_PF_BASE,
 	MT_WTBL_BASE_ON,
 	MT_WTBL_BASE_OFF,
 	MT_LPON_BASE,
@@ -321,10 +322,17 @@ enum mt7615_reg_base {
 #define MT_DMA_RCFR0_MCU_RX_MGMT	BIT(2)
 #define MT_DMA_RCFR0_MCU_RX_CTL_NON_BAR	BIT(3)
 #define MT_DMA_RCFR0_MCU_RX_CTL_BAR	BIT(4)
+#define MT_DMA_RCFR0_MCU_RX_TDLS	BIT(19)
 #define MT_DMA_RCFR0_MCU_RX_BYPASS	BIT(21)
 #define MT_DMA_RCFR0_RX_DROPPED_UCAST	GENMASK(25, 24)
 #define MT_DMA_RCFR0_RX_DROPPED_MCAST	GENMASK(27, 26)
 
+#define MT_WF_PF_BASE			((dev)->reg_map[MT_PF_BASE])
+#define MT_WF_PF(ofs)			(MT_WF_PF_BASE + (ofs))
+
+#define MT_WF_PFCR			MT_WF_PF(0x000)
+#define MT_WF_PFCR_TDLS_EN		BIT(9)
+
 #define MT_WTBL_BASE(dev)		((dev)->reg_map[MT_WTBL_BASE_ADDR])
 #define MT_WTBL_ENTRY_SIZE		256
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/usb.c b/drivers/net/wireless/mediatek/mt76/mt7615/usb.c
index eee6f820959f..bcd131969923 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/usb.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/usb.c
@@ -31,6 +31,7 @@ static const u32 mt7663u_reg_map[] = {
 	[MT_TMAC_BASE]		= 0x820f4000,
 	[MT_RMAC_BASE]		= 0x820f5000,
 	[MT_DMA_BASE]		= 0x820f7000,
+	[MT_PF_BASE]		= 0x820f8000,
 	[MT_WTBL_BASE_ON]	= 0x820f9000,
 	[MT_WTBL_BASE_OFF]	= 0x820f9800,
 	[MT_LPON_BASE]		= 0x820fb000,
-- 
cgit v1.2.3-59-g8ed1b


From 5ac2e2a3485a13ac922249cd03b9dcecff59af5f Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Thu, 23 Apr 2020 15:47:54 +0200
Subject: mt76: mt7663: fix up BMC entry indicated to unicmd firmware

BMC entry for MT7663 unicmd firmware should be a broadcast/multicast entry,
not a unicast entry, that is GTK rekey offload would rely on.

Fixes: 138860679b2a ("mt76: mt7615: add more uni mcu commands")
Cc: Soul Huang <Soul.Huang@mediatek.com>
Suggested-by: YF Luo <Yf.Luo@mediatek.com>
Co-developed-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Co-developed-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c | 22 +++-------------------
 1 file changed, 3 insertions(+), 19 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 1d1b2a7b4325..b1dea61fb9ee 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -1359,8 +1359,8 @@ mt7615_mcu_uni_add_bss(struct mt7615_phy *phy,
 			.short_st = true,
 		},
 	};
-	u8 idx, tx_wlan_idx = 0;
 	int err;
+	u8 idx;
 
 	idx = mvif->omac_idx > EXT_BSSID_START ? HW_BSSID_0 : mvif->omac_idx;
 	basic_req.basic.hw_bss_idx = idx;
@@ -1369,24 +1369,8 @@ mt7615_mcu_uni_add_bss(struct mt7615_phy *phy,
 	case NL80211_IFTYPE_MESH_POINT:
 	case NL80211_IFTYPE_AP:
 		basic_req.basic.conn_type = cpu_to_le32(CONNECTION_INFRA_AP);
-		tx_wlan_idx = mvif->sta.wcid.idx;
 		break;
 	case NL80211_IFTYPE_STATION:
-		if (enable) {
-			struct ieee80211_sta *sta;
-			struct mt7615_sta *msta;
-
-			rcu_read_lock();
-			sta = ieee80211_find_sta(vif, vif->bss_conf.bssid);
-			if (!sta) {
-				rcu_read_unlock();
-				return -EINVAL;
-			}
-
-			msta = (struct mt7615_sta *)sta->drv_priv;
-			tx_wlan_idx = msta->wcid.idx;
-			rcu_read_unlock();
-		}
 		basic_req.basic.conn_type = cpu_to_le32(CONNECTION_INFRA_STA);
 		break;
 	default:
@@ -1395,8 +1379,8 @@ mt7615_mcu_uni_add_bss(struct mt7615_phy *phy,
 	}
 
 	memcpy(basic_req.basic.bssid, vif->bss_conf.bssid, ETH_ALEN);
-	basic_req.basic.bmc_tx_wlan_idx = cpu_to_le16(tx_wlan_idx);
-	basic_req.basic.sta_idx = cpu_to_le16(tx_wlan_idx);
+	basic_req.basic.bmc_tx_wlan_idx = cpu_to_le16(mvif->sta.wcid.idx);
+	basic_req.basic.sta_idx = cpu_to_le16(mvif->sta.wcid.idx);
 	basic_req.basic.conn_state = !enable;
 
 	err = __mt76_mcu_send_msg(&dev->mt76, MCU_UNI_CMD_BSS_INFO_UPDATE,
-- 
cgit v1.2.3-59-g8ed1b


From f559685035f818693a2930052283d837738dbc2f Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 23 Apr 2020 15:47:55 +0200
Subject: mt76: mt7615: add sta pointer to mt7615_mcu_add_bss_info signature

Introduce sta pointer to mt7615_mcu_add_bss_info signature in order to
avoid sta lookup in mt7615_mcu_bss_basic_tlv routine

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/main.c   |  6 +++---
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c    | 21 ++++++---------------
 drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h |  2 +-
 3 files changed, 10 insertions(+), 19 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
index 4e1d162b2664..23bc7f1262d0 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
@@ -501,7 +501,7 @@ static void mt7615_bss_info_changed(struct ieee80211_hw *hw,
 	}
 
 	if (changed & BSS_CHANGED_BEACON_ENABLED) {
-		mt7615_mcu_add_bss_info(phy, vif, info->enable_beacon);
+		mt7615_mcu_add_bss_info(phy, vif, NULL, info->enable_beacon);
 		mt7615_mcu_sta_add(dev, vif, NULL, info->enable_beacon);
 	}
 
@@ -546,7 +546,7 @@ int mt7615_mac_sta_add(struct mt76_dev *mdev, struct ieee80211_vif *vif,
 		struct mt7615_phy *phy;
 
 		phy = mvif->band_idx ? mt7615_ext_phy(dev) : &dev->phy;
-		mt7615_mcu_add_bss_info(phy, vif, true);
+		mt7615_mcu_add_bss_info(phy, vif, sta, true);
 	}
 	mt7615_mac_wtbl_update(dev, idx,
 			       MT_WTBL_UPDATE_ADM_COUNT_CLEAR);
@@ -570,7 +570,7 @@ void mt7615_mac_sta_remove(struct mt76_dev *mdev, struct ieee80211_vif *vif,
 		struct mt7615_phy *phy;
 
 		phy = mvif->band_idx ? mt7615_ext_phy(dev) : &dev->phy;
-		mt7615_mcu_add_bss_info(phy, vif, false);
+		mt7615_mcu_add_bss_info(phy, vif, sta, false);
 	}
 
 	spin_lock_bh(&dev->sta_poll_lock);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index b1dea61fb9ee..a65d814153ab 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -645,7 +645,7 @@ mt7615_mcu_add_tlv(struct sk_buff *skb, int tag, int len)
 
 static int
 mt7615_mcu_bss_basic_tlv(struct sk_buff *skb, struct ieee80211_vif *vif,
-			 bool enable)
+			 struct ieee80211_sta *sta, bool enable)
 {
 	struct mt7615_vif *mvif = (struct mt7615_vif *)vif->drv_priv;
 	struct bss_info_basic *bss;
@@ -661,20 +661,11 @@ mt7615_mcu_bss_basic_tlv(struct sk_buff *skb, struct ieee80211_vif *vif,
 		break;
 	case NL80211_IFTYPE_STATION:
 		/* TODO: enable BSS_INFO_UAPSD & BSS_INFO_PM */
-		if (enable) {
-			struct ieee80211_sta *sta;
+		if (enable && sta) {
 			struct mt7615_sta *msta;
 
-			rcu_read_lock();
-			sta = ieee80211_find_sta(vif, vif->bss_conf.bssid);
-			if (!sta) {
-				rcu_read_unlock();
-				return -EINVAL;
-			}
-
 			msta = (struct mt7615_sta *)sta->drv_priv;
 			wlan_idx = msta->wcid.idx;
-			rcu_read_unlock();
 		}
 		break;
 	case NL80211_IFTYPE_ADHOC:
@@ -994,7 +985,7 @@ mt7615_mcu_wtbl_ht_tlv(struct sk_buff *skb, struct ieee80211_sta *sta,
 
 static int
 mt7615_mcu_add_bss(struct mt7615_phy *phy, struct ieee80211_vif *vif,
-		   bool enable)
+		   struct ieee80211_sta *sta, bool enable)
 {
 	struct mt7615_vif *mvif = (struct mt7615_vif *)vif->drv_priv;
 	struct mt7615_dev *dev = phy->dev;
@@ -1007,7 +998,7 @@ mt7615_mcu_add_bss(struct mt7615_phy *phy, struct ieee80211_vif *vif,
 	if (enable)
 		mt7615_mcu_bss_omac_tlv(skb, vif);
 
-	mt7615_mcu_bss_basic_tlv(skb, vif, enable);
+	mt7615_mcu_bss_basic_tlv(skb, vif, sta, enable);
 
 	if (enable && mvif->omac_idx > EXT_BSSID_START)
 		mt7615_mcu_bss_ext_tlv(skb, mvif);
@@ -1272,8 +1263,8 @@ mt7615_mcu_uni_ctrl_pm_state(struct mt7615_dev *dev, int band, int state)
 }
 
 static int
-mt7615_mcu_uni_add_bss(struct mt7615_phy *phy,
-		       struct ieee80211_vif *vif, bool enable)
+mt7615_mcu_uni_add_bss(struct mt7615_phy *phy, struct ieee80211_vif *vif,
+		       struct ieee80211_sta *sta, bool enable)
 {
 	struct mt7615_vif *mvif = (struct mt7615_vif *)vif->drv_priv;
 	struct cfg80211_chan_def *chandef = &phy->mt76->chandef;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index ffb146840377..44eb3d8dca78 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -222,7 +222,7 @@ struct mt7615_mcu_ops {
 	int (*add_dev_info)(struct mt7615_dev *dev,
 			    struct ieee80211_vif *vif, bool enable);
 	int (*add_bss_info)(struct mt7615_phy *phy, struct ieee80211_vif *vif,
-			    bool enable);
+			    struct ieee80211_sta *sta, bool enable);
 	int (*add_beacon_offload)(struct mt7615_dev *dev,
 				  struct ieee80211_hw *hw,
 				  struct ieee80211_vif *vif, bool enable);
-- 
cgit v1.2.3-59-g8ed1b


From ffc54ee2f03d0299146323c09cebb1c1de4d73d3 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Fri, 24 Apr 2020 14:51:29 +0200
Subject: mt76: mt7615: fix event report in mt7615_mcu_bss_event

Currently mt7663 devices do not support DBDC so fw events have no info
about it. Fix mt7615_mcu_bss_event that wrongly use bss_idx as DBDC
band_idx while it is vif index.

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index a65d814153ab..8f745c64e9d9 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -356,11 +356,12 @@ mt7615_mcu_bss_event(struct mt7615_dev *dev, struct sk_buff *skb)
 {
 	struct mt7615_mcu_bss_event *event;
 	struct mt76_phy *mphy;
+	u8 band_idx = 0; /* DBDC support */
 
 	event = (struct mt7615_mcu_bss_event *)(skb->data +
 						sizeof(struct mt7615_mcu_rxd));
 
-	if (event->bss_idx && dev->mt76.phy2)
+	if (band_idx && dev->mt76.phy2)
 		mphy = dev->mt76.phy2;
 	else
 		mphy = &dev->mt76.phy;
-- 
cgit v1.2.3-59-g8ed1b


From 65ba7fa4b829588b011c8c285968c1a8d779b981 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sat, 25 Apr 2020 14:11:09 +0200
Subject: mt76: mt76x0: enable MCS 8 and MCS9

Enable MCS8 and MCS9 for mt76x0{u,e} devices

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt76x0/init.c | 26 +++++-------------------
 1 file changed, 5 insertions(+), 21 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76x0/init.c b/drivers/net/wireless/mediatek/mt76/mt76x0/init.c
index 57f8d56737eb..dc8bf4c6969a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x0/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x0/init.c
@@ -12,24 +12,6 @@
 #include "initvals.h"
 #include "../mt76x02_phy.h"
 
-static void mt76x0_vht_cap_mask(struct ieee80211_supported_band *sband)
-{
-	struct ieee80211_sta_vht_cap *vht_cap = &sband->vht_cap;
-	u16 mcs_map = 0;
-	int i;
-
-	vht_cap->cap &= ~IEEE80211_VHT_CAP_RXLDPC;
-	for (i = 0; i < 8; i++) {
-		if (!i)
-			mcs_map |= (IEEE80211_VHT_MCS_SUPPORT_0_7 << (i * 2));
-		else
-			mcs_map |=
-				(IEEE80211_VHT_MCS_NOT_SUPPORTED << (i * 2));
-	}
-	vht_cap->vht_mcs.rx_mcs_map = cpu_to_le16(mcs_map);
-	vht_cap->vht_mcs.tx_mcs_map = cpu_to_le16(mcs_map);
-}
-
 static void
 mt76x0_set_wlan_state(struct mt76x02_dev *dev, u32 val, bool enable)
 {
@@ -263,9 +245,11 @@ int mt76x0_register_device(struct mt76x02_dev *dev)
 		return ret;
 
 	if (dev->mt76.cap.has_5ghz) {
-		/* overwrite unsupported features */
-		mt76x0_vht_cap_mask(&dev->mphy.sband_5g.sband);
-		mt76x0_init_txpower(dev, &dev->mphy.sband_5g.sband);
+		struct ieee80211_supported_band *sband;
+
+		sband = &dev->mphy.sband_5g.sband;
+		sband->vht_cap.cap &= ~IEEE80211_VHT_CAP_RXLDPC;
+		mt76x0_init_txpower(dev, sband);
 	}
 
 	if (dev->mt76.cap.has_2ghz)
-- 
cgit v1.2.3-59-g8ed1b


From f2dc8ea11fbeec8e8d35c30a9fbd1f97d9c079d4 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Tue, 28 Apr 2020 15:34:09 +0200
Subject: mt76: mt7663: add the possibility to load firmware v2

mt7663 firmware v2 is used for embedded devices since it has more completed
features in AP mode.
Add the capability to specify which firmware load first (v3 or v2)
using prefer_offload_fw kernel parameter and fallback to the other one
if the selected firmware fails to load

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c    | 55 +++++++++++++++++++---
 drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h |  6 ++-
 drivers/net/wireless/mediatek/mt76/mt7615/pci.c    |  2 +
 drivers/net/wireless/mediatek/mt76/mt7615/usb.c    |  2 +
 4 files changed, 57 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 8f745c64e9d9..f4966d29d098 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -11,6 +11,11 @@
 #include "mac.h"
 #include "eeprom.h"
 
+static bool prefer_offload_fw = true;
+module_param(prefer_offload_fw, bool, 0644);
+MODULE_PARM_DESC(prefer_offload_fw,
+		 "Prefer client mode offload firmware (MT7663)");
+
 struct mt7615_patch_hdr {
 	char build_date[16];
 	char platform[4];
@@ -1728,7 +1733,7 @@ static int mt7615_load_patch(struct mt7615_dev *dev, u32 addr, const char *name)
 		return -EAGAIN;
 	}
 
-	ret = request_firmware(&fw, name, dev->mt76.dev);
+	ret = firmware_request_nowarn(&fw, name, dev->mt76.dev);
 	if (ret)
 		goto out;
 
@@ -2081,8 +2086,49 @@ out:
 	return ret;
 }
 
+static int
+mt7663_load_rom_patch(struct mt7615_dev *dev, const char **n9_firmware)
+{
+	const char *selected_rom, *secondary_rom = MT7663_ROM_PATCH;
+	const char *primary_rom = MT7663_OFFLOAD_ROM_PATCH;
+	int ret;
+
+	if (!prefer_offload_fw) {
+		secondary_rom = MT7663_OFFLOAD_ROM_PATCH;
+		primary_rom = MT7663_ROM_PATCH;
+	}
+	selected_rom = primary_rom;
+
+	ret = mt7615_load_patch(dev, MT7663_PATCH_ADDRESS, primary_rom);
+	if (ret) {
+		dev_info(dev->mt76.dev, "%s not found, switching to %s",
+			 primary_rom, secondary_rom);
+		ret = mt7615_load_patch(dev, MT7663_PATCH_ADDRESS,
+					secondary_rom);
+		if (ret) {
+			dev_err(dev->mt76.dev, "failed to load %s",
+				secondary_rom);
+			return ret;
+		}
+		selected_rom = secondary_rom;
+	}
+
+	if (!strcmp(selected_rom, MT7663_OFFLOAD_ROM_PATCH)) {
+		*n9_firmware = MT7663_OFFLOAD_FIRMWARE_N9;
+		dev->fw_ver = MT7615_FIRMWARE_V3;
+		dev->mcu_ops = &uni_update_ops;
+	} else {
+		*n9_firmware = MT7663_FIRMWARE_N9;
+		dev->fw_ver = MT7615_FIRMWARE_V2;
+		dev->mcu_ops = &sta_update_ops;
+	}
+
+	return 0;
+}
+
 int __mt7663_load_firmware(struct mt7615_dev *dev)
 {
+	const char *n9_firmware;
 	int ret;
 
 	ret = mt76_get_field(dev, MT_CONN_ON_MISC, MT_TOP_MISC2_FW_N9_RDY);
@@ -2091,14 +2137,11 @@ int __mt7663_load_firmware(struct mt7615_dev *dev)
 		return -EIO;
 	}
 
-	ret = mt7615_load_patch(dev, MT7663_PATCH_ADDRESS, MT7663_ROM_PATCH);
+	ret = mt7663_load_rom_patch(dev, &n9_firmware);
 	if (ret)
 		return ret;
 
-	dev->fw_ver = MT7615_FIRMWARE_V3;
-	dev->mcu_ops = &uni_update_ops;
-
-	ret = mt7663_load_n9(dev, MT7663_FIRMWARE_N9);
+	ret = mt7663_load_n9(dev, n9_firmware);
 	if (ret)
 		return ret;
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index 44eb3d8dca78..0476b9426b03 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -42,8 +42,10 @@
 #define MT7615_FIRMWARE_V2		2
 #define MT7615_FIRMWARE_V3		3
 
-#define MT7663_ROM_PATCH		"mediatek/mt7663pr2h.bin"
-#define MT7663_FIRMWARE_N9              "mediatek/mt7663_n9_v3.bin"
+#define MT7663_OFFLOAD_ROM_PATCH	"mediatek/mt7663pr2h.bin"
+#define MT7663_OFFLOAD_FIRMWARE_N9	"mediatek/mt7663_n9_v3.bin"
+#define MT7663_ROM_PATCH		"mediatek/mt7663pr2h_rebb.bin"
+#define MT7663_FIRMWARE_N9		"mediatek/mt7663_n9_rebb.bin"
 
 #define MT7615_EEPROM_SIZE		1024
 #define MT7615_TOKEN_SIZE		4096
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/pci.c b/drivers/net/wireless/mediatek/mt76/mt7615/pci.c
index 21b3ec29aa12..f9469198cabd 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/pci.c
@@ -68,5 +68,7 @@ MODULE_DEVICE_TABLE(pci, mt7615_pci_device_table);
 MODULE_FIRMWARE(MT7615_FIRMWARE_CR4);
 MODULE_FIRMWARE(MT7615_FIRMWARE_N9);
 MODULE_FIRMWARE(MT7615_ROM_PATCH);
+MODULE_FIRMWARE(MT7663_OFFLOAD_FIRMWARE_N9);
+MODULE_FIRMWARE(MT7663_OFFLOAD_ROM_PATCH);
 MODULE_FIRMWARE(MT7663_FIRMWARE_N9);
 MODULE_FIRMWARE(MT7663_ROM_PATCH);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/usb.c b/drivers/net/wireless/mediatek/mt76/mt7615/usb.c
index bcd131969923..9353175b139b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/usb.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/usb.c
@@ -386,6 +386,8 @@ mt7663u_resume(struct usb_interface *intf)
 }
 
 MODULE_DEVICE_TABLE(usb, mt7615_device_table);
+MODULE_FIRMWARE(MT7663_OFFLOAD_FIRMWARE_N9);
+MODULE_FIRMWARE(MT7663_OFFLOAD_ROM_PATCH);
 MODULE_FIRMWARE(MT7663_FIRMWARE_N9);
 MODULE_FIRMWARE(MT7663_ROM_PATCH);
 
-- 
cgit v1.2.3-59-g8ed1b


From 404d1cd401cacf6922a800a63ff158f15615530d Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sun, 26 Apr 2020 16:42:52 +0200
Subject: mt76: mt7663: remove check in mt7663_load_n9

Get rid of useless check in mt7663_load_n9 since it is used only for
mt7663 devices

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index f4966d29d098..5fd4a4ab5120 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -2005,7 +2005,7 @@ int mt7615_mcu_fw_log_2_host(struct mt7615_dev *dev, u8 ctrl)
 
 static int mt7663_load_n9(struct mt7615_dev *dev, const char *name)
 {
-	u32 offset = 0, override_addr = 0, flag = 0;
+	u32 offset = 0, override_addr = 0, flag = FW_START_DLYCAL;
 	const struct mt7663_fw_trailer *hdr;
 	const struct mt7663_fw_buf *buf;
 	const struct firmware *fw;
@@ -2061,14 +2061,11 @@ static int mt7663_load_n9(struct mt7615_dev *dev, const char *name)
 		}
 	}
 
-	if (is_mt7663(&dev->mt76)) {
-		flag |= FW_START_DLYCAL;
-		if (override_addr)
-			flag |= FW_START_OVERRIDE;
+	if (override_addr)
+		flag |= FW_START_OVERRIDE;
 
-		dev_info(dev->mt76.dev, "override_addr = 0x%08x, option = %d\n",
-			 override_addr, flag);
-	}
+	dev_info(dev->mt76.dev, "override_addr = 0x%08x, option = %d\n",
+		 override_addr, flag);
 
 	ret = mt7615_mcu_start_firmware(dev, override_addr, flag);
 	if (ret) {
-- 
cgit v1.2.3-59-g8ed1b


From 801f809aeeb127736a1f871dc21c800382afc4d2 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Mon, 27 Apr 2020 19:45:02 +0200
Subject: mt76: mt7615: set spatial extension index

The vendor driver sets this in firmware rate control (which we don't use)

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c | 3 ++-
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c | 6 ++++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index e7a76032caff..5c09787b0d76 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -639,7 +639,8 @@ int mt7615_mac_write_txwi(struct mt7615_dev *dev, __le32 *txwi,
 		txwi[3] |= cpu_to_le32(MT_TXD3_NO_ACK);
 
 	txwi[7] = FIELD_PREP(MT_TXD7_TYPE, fc_type) |
-		  FIELD_PREP(MT_TXD7_SUB_TYPE, fc_stype);
+		  FIELD_PREP(MT_TXD7_SUB_TYPE, fc_stype) |
+		  FIELD_PREP(MT_TXD7_SPE_IDX, 0x18);
 	if (is_usb)
 		txwi[8] = FIELD_PREP(MT_TXD8_L_TYPE, fc_type) |
 			  FIELD_PREP(MT_TXD8_L_SUB_TYPE, fc_stype);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 5fd4a4ab5120..cbad854d7497 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -887,6 +887,7 @@ mt7615_mcu_wtbl_generic_tlv(struct sk_buff *skb, struct ieee80211_vif *vif,
 	struct mt7615_vif *mvif = (struct mt7615_vif *)vif->drv_priv;
 	struct wtbl_generic *generic;
 	struct wtbl_rx *rx;
+	struct wtbl_spe *spe;
 	struct tlv *tlv;
 
 	tlv = mt7615_mcu_add_nested_tlv(skb, WTBL_GENERIC, sizeof(*generic),
@@ -914,6 +915,11 @@ mt7615_mcu_wtbl_generic_tlv(struct sk_buff *skb, struct ieee80211_vif *vif,
 	rx->rca1 = sta ? vif->type != NL80211_IFTYPE_AP : 1;
 	rx->rca2 = 1;
 	rx->rv = 1;
+
+	tlv = mt7615_mcu_add_nested_tlv(skb, WTBL_SPE, sizeof(*spe),
+					wtbl_tlv, sta_wtbl);
+	spe = (struct wtbl_spe *)tlv;
+	spe->spe_idx = 24;
 }
 
 static void
-- 
cgit v1.2.3-59-g8ed1b


From 89f8bc6a344b2b786210a4045256346b3f8f6c4c Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 29 Apr 2020 14:34:23 +0200
Subject: mt76: mt7615: fix getting maximum tx power from eeprom

On top of the EEPROM target power, each rate can also has a power offset.
On many devices, this power offset is used to boost the tx power of lower
rates. Take this into account when parsing rate power.
The assumption here is, that the first rate (OFDM 6M or CCK 1M) has the
highest tx power

Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/eeprom.h |  6 ++++++
 drivers/net/wireless/mediatek/mt76/mt7615/init.c   | 11 +++++++++++
 2 files changed, 17 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.h b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.h
index bd2ac1e0e01a..3dd7009e5836 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.h
@@ -32,6 +32,8 @@ enum mt7615_eeprom_field {
 	MT_EE_TX0_2G_TARGET_POWER =		0x058,
 	MT_EE_TX0_5G_G0_TARGET_POWER =		0x070,
 	MT_EE_TX1_5G_G0_TARGET_POWER =		0x098,
+	MT_EE_2G_RATE_POWER =			0x0be,
+	MT_EE_5G_RATE_POWER =			0x0d5,
 	MT_EE_EXT_PA_2G_TARGET_POWER =		0x0f2,
 	MT_EE_EXT_PA_5G_TARGET_POWER =		0x0f3,
 	MT7663_EE_TX0_2G_TARGET_POWER =		0x123,
@@ -43,6 +45,10 @@ enum mt7615_eeprom_field {
 	MT7663_EE_MAX =				0x400,
 };
 
+#define MT_EE_RATE_POWER_MASK			GENMASK(5, 0)
+#define MT_EE_RATE_POWER_SIGN			BIT(6)
+#define MT_EE_RATE_POWER_EN			BIT(7)
+
 #define MT_EE_CALDATA_FLASH_TX_DPD		BIT(0)
 #define MT_EE_CALDATA_FLASH_RX_CAL		BIT(1)
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
index 9880643888ba..7e201525305b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
@@ -198,6 +198,17 @@ void mt7615_init_txpower(struct mt7615_dev *dev,
 	u8 *eep = (u8 *)dev->mt76.eeprom.data;
 	enum nl80211_band band = sband->band;
 	int delta = mt76_tx_power_nss_delta(n_chains);
+	u8 rate_val;
+
+	/* assume the first rate has the highest power offset */
+	if (band == NL80211_BAND_2GHZ)
+		rate_val = eep[MT_EE_2G_RATE_POWER];
+	else
+		rate_val = eep[MT_EE_5G_RATE_POWER];
+
+	if ((rate_val & ~MT_EE_RATE_POWER_MASK) ==
+	    (MT_EE_RATE_POWER_EN | MT_EE_RATE_POWER_SIGN))
+		delta += rate_val & MT_EE_RATE_POWER_MASK;
 
 	target_chains = mt7615_ext_pa_enabled(dev, band) ? 1 : n_chains;
 	for (i = 0; i < sband->n_channels; i++) {
-- 
cgit v1.2.3-59-g8ed1b


From fb602b303b0eece7b8822e03f71a9ba3f35a4023 Mon Sep 17 00:00:00 2001
From: Dejin Zheng <zhengdejin5@gmail.com>
Date: Tue, 28 Apr 2020 22:31:52 +0800
Subject: mt76: mt7603: remove duplicate error message

it will print an error message by itself when
devm_platform_ioremap_resource() goes wrong. so remove the duplicate
error message.

Signed-off-by: Dejin Zheng <zhengdejin5@gmail.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7603/soc.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/soc.c b/drivers/net/wireless/mediatek/mt76/mt7603/soc.c
index 68efb300c0d8..de170765e938 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/soc.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/soc.c
@@ -20,10 +20,8 @@ mt76_wmac_probe(struct platform_device *pdev)
 		return irq;
 
 	mem_base = devm_platform_ioremap_resource(pdev, 0);
-	if (IS_ERR(mem_base)) {
-		dev_err(&pdev->dev, "Failed to get memory resource\n");
+	if (IS_ERR(mem_base))
 		return PTR_ERR(mem_base);
-	}
 
 	mdev = mt76_alloc_device(&pdev->dev, sizeof(*dev), &mt7603_ops,
 				 &mt7603_drv_ops);
-- 
cgit v1.2.3-59-g8ed1b


From 0fe96975d1dfd0e7aabe9400d0b82947e0a0d30e Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Wed, 29 Apr 2020 19:48:53 +0200
Subject: mt76: mt7615: fix ssid configuration in mt7615_mcu_hw_scan

Fix SSID configuration performing hw frequency scanning

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index cbad854d7497..f4e835f03b8d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -2719,9 +2719,9 @@ int mt7615_mcu_hw_scan(struct mt7615_phy *phy, struct ieee80211_vif *vif,
 {
 	struct mt7615_vif *mvif = (struct mt7615_vif *)vif->drv_priv;
 	struct cfg80211_scan_request *sreq = &scan_req->req;
+	int n_ssids = 0, err, i, duration = MT7615_SCAN_CHANNEL_TIME;
 	int ext_channels_num = max_t(int, sreq->n_channels - 32, 0);
 	struct ieee80211_channel **scan_list = sreq->channels;
-	int err, i, duration = MT7615_SCAN_CHANNEL_TIME;
 	struct mt7615_dev *dev = phy->dev;
 	bool ext_phy = phy != &dev->phy;
 	struct mt7615_mcu_scan_channel *chan;
@@ -2744,16 +2744,21 @@ int mt7615_mcu_hw_scan(struct mt7615_phy *phy, struct ieee80211_vif *vif,
 	req->seq_num = mvif->scan_seq_num | ext_phy << 7;
 	req->bss_idx = mvif->idx;
 	req->scan_type = 1;
-	req->ssid_type = 1;
 	req->probe_req_num = 2;
 	req->version = 1;
 	req->channel_type = 4;
 
 	for (i = 0; i < sreq->n_ssids; i++) {
+		if (!sreq->ssids[i].ssid_len)
+			continue;
+
 		req->ssids[i].ssid_len = cpu_to_le32(sreq->ssids[i].ssid_len);
 		memcpy(req->ssids[i].ssid, sreq->ssids[i].ssid,
 		       sreq->ssids[i].ssid_len);
+		n_ssids++;
 	}
+	req->ssid_type = n_ssids ? BIT(2) : BIT(0);
+	req->ssids_num = n_ssids;
 
 	req->timeout_value = cpu_to_le16(sreq->n_channels * duration);
 	req->channel_min_dwell_time = cpu_to_le16(duration);
-- 
cgit v1.2.3-59-g8ed1b


From 53b42ae291a0adf227c9f32e4024819a276ded02 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Wed, 29 Apr 2020 19:52:15 +0200
Subject: mt76: mt7615: introduce mt7615_check_offload_capability routine

Introduce mt7615_check_offload_capability routine to set hw/wiphy
offload capabilities according to the running firmware

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/init.c   | 26 ++++++++++++++++++++++
 drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h |  1 +
 .../net/wireless/mediatek/mt76/mt7615/pci_init.c   | 21 +----------------
 .../net/wireless/mediatek/mt76/mt7615/usb_init.c   |  4 +---
 4 files changed, 29 insertions(+), 23 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
index 7e201525305b..145af8a3ae57 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
@@ -130,6 +130,32 @@ void mt7615_mac_init(struct mt7615_dev *dev)
 }
 EXPORT_SYMBOL_GPL(mt7615_mac_init);
 
+void mt7615_check_offload_capability(struct mt7615_dev *dev)
+{
+	struct ieee80211_hw *hw = mt76_hw(dev);
+	struct wiphy *wiphy = hw->wiphy;
+
+	if (mt7615_firmware_offload(dev)) {
+		ieee80211_hw_set(hw, SUPPORTS_PS);
+		ieee80211_hw_set(hw, SUPPORTS_DYNAMIC_PS);
+
+		wiphy->features |= NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR;
+	} else {
+		dev->ops->hw_scan = NULL;
+		dev->ops->cancel_hw_scan = NULL;
+		dev->ops->sched_scan_start = NULL;
+		dev->ops->sched_scan_stop = NULL;
+
+		wiphy->max_sched_scan_plan_interval = 0;
+		wiphy->max_sched_scan_ie_len = 0;
+		wiphy->max_scan_ie_len = IEEE80211_MAX_DATA_LEN;
+		wiphy->max_sched_scan_ssids = 0;
+		wiphy->max_match_sets = 0;
+		wiphy->max_sched_scan_reqs = 0;
+	}
+}
+EXPORT_SYMBOL_GPL(mt7615_check_offload_capability);
+
 bool mt7615_wait_for_mcu_init(struct mt7615_dev *dev)
 {
 	flush_work(&dev->mcu_work);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index 0476b9426b03..2321a1f23ec8 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -370,6 +370,7 @@ int mt7615_mmio_probe(struct device *pdev, void __iomem *mem_base,
 		      int irq, const u32 *map);
 u32 mt7615_reg_map(struct mt7615_dev *dev, u32 addr);
 
+void mt7615_check_offload_capability(struct mt7615_dev *dev);
 void mt7615_init_device(struct mt7615_dev *dev);
 int mt7615_register_device(struct mt7615_dev *dev);
 void mt7615_unregister_device(struct mt7615_dev *dev);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c b/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c
index cd3ccafa7d11..69cba8609edf 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/pci_init.c
@@ -16,7 +16,6 @@ static void mt7615_init_work(struct work_struct *work)
 {
 	struct mt7615_dev *dev = container_of(work, struct mt7615_dev,
 					      mcu_work);
-	struct ieee80211_hw *hw = mt76_hw(dev);
 
 	if (mt7615_mcu_init(dev))
 		return;
@@ -25,25 +24,7 @@ static void mt7615_init_work(struct work_struct *work)
 	mt7615_mac_init(dev);
 	mt7615_phy_init(dev);
 	mt7615_mcu_del_wtbl_all(dev);
-
-	if (mt7615_firmware_offload(dev)) {
-		ieee80211_hw_set(hw, SUPPORTS_PS);
-		ieee80211_hw_set(hw, SUPPORTS_DYNAMIC_PS);
-	} else {
-		struct wiphy *wiphy = hw->wiphy;
-
-		dev->ops->hw_scan = NULL;
-		dev->ops->cancel_hw_scan = NULL;
-		dev->ops->sched_scan_start = NULL;
-		dev->ops->sched_scan_stop = NULL;
-
-		wiphy->max_sched_scan_plan_interval = 0;
-		wiphy->max_sched_scan_ie_len = 0;
-		wiphy->max_scan_ie_len = IEEE80211_MAX_DATA_LEN;
-		wiphy->max_sched_scan_ssids = 0;
-		wiphy->max_match_sets = 0;
-		wiphy->max_sched_scan_reqs = 0;
-	}
+	mt7615_check_offload_capability(dev);
 }
 
 static int mt7615_init_hardware(struct mt7615_dev *dev)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/usb_init.c b/drivers/net/wireless/mediatek/mt76/mt7615/usb_init.c
index 39642065531f..1fbc9601391d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/usb_init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/usb_init.c
@@ -103,6 +103,7 @@ static void mt7663u_init_work(struct work_struct *work)
 	mt7615_mac_init(dev);
 	mt7615_phy_init(dev);
 	mt7615_mcu_del_wtbl_all(dev);
+	mt7615_check_offload_capability(dev);
 }
 
 int mt7663u_register_device(struct mt7615_dev *dev)
@@ -119,9 +120,6 @@ int mt7663u_register_device(struct mt7615_dev *dev)
 	if (err)
 		return err;
 
-	ieee80211_hw_set(hw, SUPPORTS_PS);
-	ieee80211_hw_set(hw, SUPPORTS_DYNAMIC_PS);
-
 	hw->extra_tx_headroom += MT_USB_HDR_SIZE + MT_USB_TXD_SIZE;
 	/* check hw sg support in order to enable AMSDU */
 	hw->max_tx_fragments = dev->mt76.usb.sg_en ? MT_HW_TXP_MAX_BUF_NUM : 1;
-- 
cgit v1.2.3-59-g8ed1b


From 6c5974c10c2c6cc8166d4708b957ff3245b4eecb Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 30 Apr 2020 09:59:31 +0200
Subject: mt76: mt7615: do not mark sched_scan disabled in mt7615_scan_work

For the moment offload firmware supports just one entry in the scheduled
scan plan and so it runs till it is disabled by the userspace.
Do not mark the hw scheduled scan as disabled in mt7615_scan_work
after receiving a scan result

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/main.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
index 23bc7f1262d0..5e54c7ffae0b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
@@ -818,7 +818,6 @@ void mt7615_scan_work(struct work_struct *work)
 			clear_bit(MT76_HW_SCANNING, &phy->mt76->state);
 			ieee80211_scan_completed(phy->mt76->hw, &info);
 		} else {
-			clear_bit(MT76_HW_SCHED_SCANNING, &phy->mt76->state);
 			ieee80211_sched_scan_results(phy->mt76->hw);
 		}
 		dev_kfree_skb(skb);
-- 
cgit v1.2.3-59-g8ed1b


From 4432119d17828ce7a66d74f898c52ca4a147ba92 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 30 Apr 2020 11:11:34 +0200
Subject: mt76: mt7615: add passive mode for hw scan

Introduce support for passive frequency scanning to mt7615_mcu_hw_scan

Tested-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index f4e835f03b8d..ae527b934926 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -2743,8 +2743,8 @@ int mt7615_mcu_hw_scan(struct mt7615_phy *phy, struct ieee80211_vif *vif,
 
 	req->seq_num = mvif->scan_seq_num | ext_phy << 7;
 	req->bss_idx = mvif->idx;
-	req->scan_type = 1;
-	req->probe_req_num = 2;
+	req->scan_type = sreq->n_ssids ? 1 : 0;
+	req->probe_req_num = sreq->n_ssids ? 2 : 0;
 	req->version = 1;
 	req->channel_type = 4;
 
-- 
cgit v1.2.3-59-g8ed1b


From a69b0b30e661eadfe1931c4c5751c2de7905f016 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 30 Apr 2020 22:31:03 +0200
Subject: mt76: mt7615: free pci_vector if mt7615_pci_probe fails

Always free pci irq vector if mt7615_pci_probe routine fails
Moreover free irq in mt7615_pci_remove routine

Co-developed-by: Soul Huang <sean.wang@mediatek.com>
Signed-off-by: Soul Huang <sean.wang@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/pci.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/pci.c b/drivers/net/wireless/mediatek/mt76/mt7615/pci.c
index f9469198cabd..0605c908059e 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/pci.c
@@ -39,13 +39,21 @@ static int mt7615_pci_probe(struct pci_dev *pdev,
 
 	ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
 	if (ret)
-		return ret;
+		goto error;
 
 	mt76_pci_disable_aspm(pdev);
 
 	map = id->device == 0x7663 ? mt7663e_reg_map : mt7615e_reg_map;
-	return mt7615_mmio_probe(&pdev->dev, pcim_iomap_table(pdev)[0],
-				 pdev->irq, map);
+	ret = mt7615_mmio_probe(&pdev->dev, pcim_iomap_table(pdev)[0],
+				pdev->irq, map);
+	if (ret)
+		goto error;
+
+	return 0;
+error:
+	pci_free_irq_vectors(pdev);
+
+	return ret;
 }
 
 static void mt7615_pci_remove(struct pci_dev *pdev)
@@ -54,6 +62,7 @@ static void mt7615_pci_remove(struct pci_dev *pdev)
 	struct mt7615_dev *dev = container_of(mdev, struct mt7615_dev, mt76);
 
 	mt7615_unregister_device(dev);
+	devm_free_irq(&pdev->dev, pdev->irq, dev);
 	pci_free_irq_vectors(pdev);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From c8646872d6e632f793a06a42708e282950ec982a Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Fri, 1 May 2020 12:36:11 +0200
Subject: mt76: mt7615: introduce support for hardware beacon filter

Introduce support for hw beacon filter if available in the firmware

Co-developed-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c    | 41 ++++++++++++++++++++++
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.h    |  2 ++
 drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h |  2 ++
 3 files changed, 45 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index ae527b934926..7de7daf4d067 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -2652,6 +2652,47 @@ void m7615_mcu_set_ps_iter(void *priv, u8 *mac, struct ieee80211_vif *vif)
 				    &req, sizeof(req), false);
 }
 
+int mt7615_mcu_set_bss_pm(struct mt7615_dev *dev, struct ieee80211_vif *vif,
+			  bool enable)
+{
+	struct mt7615_vif *mvif = (struct mt7615_vif *)vif->drv_priv;
+	struct {
+		u8 bss_idx;
+		u8 dtim_period;
+		__le16 aid;
+		__le16 bcn_interval;
+		__le16 atim_window;
+		u8 uapsd;
+		u8 bmc_delivered_ac;
+		u8 bmc_triggered_ac;
+		u8 pad;
+	} req = {
+		.bss_idx = mvif->idx,
+		.aid = cpu_to_le16(vif->bss_conf.aid),
+		.dtim_period = vif->bss_conf.dtim_period,
+		.bcn_interval = cpu_to_le16(vif->bss_conf.beacon_int),
+	};
+	struct {
+		u8 bss_idx;
+		u8 pad[3];
+	} req_hdr = {
+		.bss_idx = mvif->idx,
+	};
+	int err;
+
+	if (vif->type != NL80211_IFTYPE_STATION ||
+	    !mt7615_firmware_offload(dev))
+		return -ENOTSUPP;
+
+	err = __mt76_mcu_send_msg(&dev->mt76, MCU_CMD_SET_BSS_ABORT,
+				  &req_hdr, sizeof(req_hdr), false);
+	if (err < 0 || !enable)
+		return err;
+
+	return __mt76_mcu_send_msg(&dev->mt76, MCU_CMD_SET_BSS_CONNECTED,
+				   &req, sizeof(req), false);
+}
+
 int mt7615_mcu_set_channel_domain(struct mt7615_phy *phy)
 {
 	struct mt76_phy *mphy = phy->mt76;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
index 032b5f98608e..08dcfadceaf6 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
@@ -419,6 +419,8 @@ enum {
 	MCU_CMD_START_HW_SCAN = MCU_CE_PREFIX | 0x03,
 	MCU_CMD_SET_PS_PROFILE = MCU_CE_PREFIX | 0x05,
 	MCU_CMD_SET_CHAN_DOMAIN = MCU_CE_PREFIX | 0x0f,
+	MCU_CMD_SET_BSS_CONNECTED = MCU_CE_PREFIX | 0x16,
+	MCU_CMD_SET_BSS_ABORT = MCU_CE_PREFIX | 0x17,
 	MCU_CMD_CANCEL_HW_SCAN = MCU_CE_PREFIX | 0x1b,
 	MCU_CMD_SCHED_SCAN_ENABLE = MCU_CE_PREFIX | 0x61,
 	MCU_CMD_SCHED_SCAN_REQ = MCU_CE_PREFIX | 0x62,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index 2321a1f23ec8..fdb20fbdfb90 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -474,6 +474,8 @@ int mt7615_mac_wtbl_update_key(struct mt7615_dev *dev,
 			       enum set_key_cmd cmd);
 void mt7615_mac_reset_work(struct work_struct *work);
 
+int mt7615_mcu_set_bss_pm(struct mt7615_dev *dev, struct ieee80211_vif *vif,
+			  bool enable);
 int mt7615_mcu_wait_response(struct mt7615_dev *dev, int cmd, int seq);
 int mt7615_mcu_msg_send(struct mt76_dev *mdev, int cmd, const void *data,
 			int len, bool wait_resp);
-- 
cgit v1.2.3-59-g8ed1b


From 6f117852009c8c0d7542f6de0750dfda18c6578c Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Fri, 1 May 2020 12:36:12 +0200
Subject: mt76: mt7615: introduce mt7615_mcu_set_hif_suspend mcu command

Introduce the mt7615_mcu_set_hif_suspend mcu command, which is usually
used to configure the interface including PCIe, USB or SDIO to the right
state during operation suspend / resume.

Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Co-developed-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c    | 36 +++++++++++++++++++++-
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.h    |  1 +
 drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h |  2 ++
 3 files changed, 38 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 7de7daf4d067..e7f6ab5ab2db 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -194,7 +194,8 @@ mt7615_mcu_parse_response(struct mt7615_dev *dev, int cmd,
 		break;
 	case MCU_UNI_CMD_DEV_INFO_UPDATE:
 	case MCU_UNI_CMD_BSS_INFO_UPDATE:
-	case MCU_UNI_CMD_STA_REC_UPDATE: {
+	case MCU_UNI_CMD_STA_REC_UPDATE:
+	case MCU_UNI_CMD_HIF_CTRL: {
 		struct mt7615_mcu_uni_event *event;
 
 		skb_pull(skb, sizeof(*rxd));
@@ -3220,3 +3221,36 @@ out:
 
 	return ret;
 }
+
+int mt7615_mcu_set_hif_suspend(struct mt7615_dev *dev, bool suspend)
+{
+	struct {
+		struct {
+			u8 hif_type; /* 0x0: HIF_SDIO
+				      * 0x1: HIF_USB
+				      * 0x2: HIF_PCIE
+				      */
+			u8 pad[3];
+		} __packed hdr;
+		struct hif_suspend_tlv {
+			__le16 tag;
+			__le16 len;
+			u8 suspend;
+		} __packed hif_suspend;
+	} req = {
+		.hif_suspend = {
+			.tag = cpu_to_le16(0), /* 0: UNI_HIF_CTRL_BASIC */
+			.len = cpu_to_le16(sizeof(struct hif_suspend_tlv)),
+			.suspend = suspend,
+		},
+	};
+
+	if (mt76_is_mmio(&dev->mt76))
+		req.hdr.hif_type = 2;
+	else if (mt76_is_usb(&dev->mt76))
+		req.hdr.hif_type = 1;
+
+	return __mt76_mcu_send_msg(&dev->mt76, MCU_UNI_CMD_HIF_CTRL,
+				   &req, sizeof(req), true);
+}
+EXPORT_SYMBOL_GPL(mt7615_mcu_set_hif_suspend);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
index 08dcfadceaf6..1579ad944479 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
@@ -280,6 +280,7 @@ enum {
 	MCU_UNI_CMD_DEV_INFO_UPDATE = MCU_UNI_PREFIX | 0x01,
 	MCU_UNI_CMD_BSS_INFO_UPDATE = MCU_UNI_PREFIX | 0x02,
 	MCU_UNI_CMD_STA_REC_UPDATE = MCU_UNI_PREFIX | 0x03,
+	MCU_UNI_CMD_HIF_CTRL = MCU_UNI_PREFIX | 0x07,
 };
 
 struct mt7615_mcu_uni_event {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index fdb20fbdfb90..6a26555dcce6 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -531,6 +531,8 @@ int mt7615_dfs_init_radar_detector(struct mt7615_phy *phy);
 int mt7615_init_debugfs(struct mt7615_dev *dev);
 int mt7615_mcu_wait_response(struct mt7615_dev *dev, int cmd, int seq);
 
+int mt7615_mcu_set_hif_suspend(struct mt7615_dev *dev, bool suspend);
+
 int __mt7663_load_firmware(struct mt7615_dev *dev);
 
 /* usb */
-- 
cgit v1.2.3-59-g8ed1b


From c6bf20109a3fae92402cb76ad709ec5256bcd169 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Fri, 1 May 2020 12:36:13 +0200
Subject: mt76: mt7615: add WoW support

Introduce WoW support to mt7615 driver.
Current supported trigger are:
- magic-packet
- disconnect
- user-pattern

Co-developed-by: Wan-Feng Jiang <Wan-Feng.Jiang@mediatek.com>
Signed-off-by: Wan-Feng Jiang <Wan-Feng.Jiang@mediatek.com>
Co-developed-by: Soul Huang <Soul.Huang@mediatek.com>
Signed-off-by: Soul Huang <Soul.Huang@mediatek.com>
Co-developed-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt76.h          |   1 +
 drivers/net/wireless/mediatek/mt76/mt7615/main.c   |  79 ++++++++
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c    | 220 +++++++++++++++++----
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.h    |  62 ++++++
 drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h |   4 +-
 5 files changed, 322 insertions(+), 44 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index 6106dc4fea44..f1c9116b6ab4 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -287,6 +287,7 @@ enum {
 	MT76_READING_STATS,
 	MT76_STATE_POWER_OFF,
 	MT76_STATE_PS,
+	MT76_STATE_SUSPEND,
 };
 
 struct mt76_hw_cap {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
index 5e54c7ffae0b..b93a47509423 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
@@ -864,6 +864,80 @@ mt7615_stop_sched_scan(struct ieee80211_hw *hw, struct ieee80211_vif *vif)
 	return mt7615_mcu_sched_scan_enable(mphy->priv, vif, false);
 }
 
+#ifdef CONFIG_PM
+static int mt7615_suspend(struct ieee80211_hw *hw,
+			  struct cfg80211_wowlan *wowlan)
+{
+	struct mt7615_dev *dev = mt7615_hw_dev(hw);
+	struct mt7615_phy *phy = mt7615_hw_phy(hw);
+	bool ext_phy = phy != &dev->phy;
+	int err = 0;
+
+	mutex_lock(&dev->mt76.mutex);
+
+	clear_bit(MT76_STATE_RUNNING, &phy->mt76->state);
+	cancel_delayed_work_sync(&phy->scan_work);
+	cancel_delayed_work_sync(&phy->mac_work);
+
+	mt76_set(dev, MT_WF_RFCR(ext_phy), MT_WF_RFCR_DROP_OTHER_BEACON);
+
+	set_bit(MT76_STATE_SUSPEND, &phy->mt76->state);
+	ieee80211_iterate_active_interfaces(hw,
+					    IEEE80211_IFACE_ITER_RESUME_ALL,
+					    mt7615_mcu_set_suspend_iter, phy);
+
+	if (!mt7615_dev_running(dev))
+		err = mt7615_mcu_set_hif_suspend(dev, true);
+
+	mutex_unlock(&dev->mt76.mutex);
+
+	return err;
+}
+
+static int mt7615_resume(struct ieee80211_hw *hw)
+{
+	struct mt7615_dev *dev = mt7615_hw_dev(hw);
+	struct mt7615_phy *phy = mt7615_hw_phy(hw);
+	bool running, ext_phy = phy != &dev->phy;
+
+	mutex_lock(&dev->mt76.mutex);
+
+	running = mt7615_dev_running(dev);
+	set_bit(MT76_STATE_RUNNING, &phy->mt76->state);
+
+	if (!running) {
+		int err;
+
+		err = mt7615_mcu_set_hif_suspend(dev, false);
+		if (err < 0) {
+			mutex_unlock(&dev->mt76.mutex);
+			return err;
+		}
+	}
+
+	clear_bit(MT76_STATE_SUSPEND, &phy->mt76->state);
+	ieee80211_iterate_active_interfaces(hw,
+					    IEEE80211_IFACE_ITER_RESUME_ALL,
+					    mt7615_mcu_set_suspend_iter, phy);
+
+	ieee80211_queue_delayed_work(hw, &phy->mac_work,
+				     MT7615_WATCHDOG_TIME);
+	mt76_clear(dev, MT_WF_RFCR(ext_phy), MT_WF_RFCR_DROP_OTHER_BEACON);
+
+	mutex_unlock(&dev->mt76.mutex);
+
+	return 0;
+}
+
+static void mt7615_set_wakeup(struct ieee80211_hw *hw, bool enabled)
+{
+	struct mt7615_dev *dev = mt7615_hw_dev(hw);
+	struct mt76_dev *mdev = &dev->mt76;
+
+	device_set_wakeup_enable(mdev->dev, enabled);
+}
+#endif /* CONFIG_PM */
+
 const struct ieee80211_ops mt7615_ops = {
 	.tx = mt7615_tx,
 	.start = mt7615_start,
@@ -897,6 +971,11 @@ const struct ieee80211_ops mt7615_ops = {
 	.cancel_hw_scan = mt7615_cancel_hw_scan,
 	.sched_scan_start = mt7615_start_sched_scan,
 	.sched_scan_stop = mt7615_stop_sched_scan,
+#ifdef CONFIG_PM
+	.suspend = mt7615_suspend,
+	.resume = mt7615_resume,
+	.set_wakeup = mt7615_set_wakeup,
+#endif /* CONFIG_PM */
 };
 EXPORT_SYMBOL_GPL(mt7615_ops);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index e7f6ab5ab2db..72e2e1cbab59 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -195,7 +195,8 @@ mt7615_mcu_parse_response(struct mt7615_dev *dev, int cmd,
 	case MCU_UNI_CMD_DEV_INFO_UPDATE:
 	case MCU_UNI_CMD_BSS_INFO_UPDATE:
 	case MCU_UNI_CMD_STA_REC_UPDATE:
-	case MCU_UNI_CMD_HIF_CTRL: {
+	case MCU_UNI_CMD_HIF_CTRL:
+	case MCU_UNI_CMD_SUSPEND: {
 		struct mt7615_mcu_uni_event *event;
 
 		skb_pull(skb, sizeof(*rxd));
@@ -1835,6 +1836,13 @@ mt7615_mcu_send_ram_firmware(struct mt7615_dev *dev,
 	return 0;
 }
 
+static const struct wiphy_wowlan_support mt7615_wowlan_support = {
+	.flags = WIPHY_WOWLAN_MAGIC_PKT | WIPHY_WOWLAN_DISCONNECT,
+	.n_patterns = 1,
+	.pattern_min_len = 1,
+	.pattern_max_len = MT7615_WOW_PATTEN_MAX_LEN,
+};
+
 static int mt7615_load_n9(struct mt7615_dev *dev, const char *name)
 {
 	const struct mt7615_fw_trailer *hdr;
@@ -2157,6 +2165,11 @@ int __mt7663_load_firmware(struct mt7615_dev *dev)
 		return -EIO;
 	}
 
+#ifdef CONFIG_PM
+	if (mt7615_firmware_offload(dev))
+		dev->mt76.hw->wiphy->wowlan = &mt7615_wowlan_support;
+#endif /* CONFIG_PM */
+
 	dev_dbg(dev->mt76.dev, "Firmware init done\n");
 
 	return 0;
@@ -2653,47 +2666,6 @@ void m7615_mcu_set_ps_iter(void *priv, u8 *mac, struct ieee80211_vif *vif)
 				    &req, sizeof(req), false);
 }
 
-int mt7615_mcu_set_bss_pm(struct mt7615_dev *dev, struct ieee80211_vif *vif,
-			  bool enable)
-{
-	struct mt7615_vif *mvif = (struct mt7615_vif *)vif->drv_priv;
-	struct {
-		u8 bss_idx;
-		u8 dtim_period;
-		__le16 aid;
-		__le16 bcn_interval;
-		__le16 atim_window;
-		u8 uapsd;
-		u8 bmc_delivered_ac;
-		u8 bmc_triggered_ac;
-		u8 pad;
-	} req = {
-		.bss_idx = mvif->idx,
-		.aid = cpu_to_le16(vif->bss_conf.aid),
-		.dtim_period = vif->bss_conf.dtim_period,
-		.bcn_interval = cpu_to_le16(vif->bss_conf.beacon_int),
-	};
-	struct {
-		u8 bss_idx;
-		u8 pad[3];
-	} req_hdr = {
-		.bss_idx = mvif->idx,
-	};
-	int err;
-
-	if (vif->type != NL80211_IFTYPE_STATION ||
-	    !mt7615_firmware_offload(dev))
-		return -ENOTSUPP;
-
-	err = __mt76_mcu_send_msg(&dev->mt76, MCU_CMD_SET_BSS_ABORT,
-				  &req_hdr, sizeof(req_hdr), false);
-	if (err < 0 || !enable)
-		return err;
-
-	return __mt76_mcu_send_msg(&dev->mt76, MCU_CMD_SET_BSS_CONNECTED,
-				   &req, sizeof(req), false);
-}
-
 int mt7615_mcu_set_channel_domain(struct mt7615_phy *phy)
 {
 	struct mt76_phy *mphy = phy->mt76;
@@ -3222,6 +3194,7 @@ out:
 	return ret;
 }
 
+#ifdef CONFIG_PM
 int mt7615_mcu_set_hif_suspend(struct mt7615_dev *dev, bool suspend)
 {
 	struct {
@@ -3254,3 +3227,166 @@ int mt7615_mcu_set_hif_suspend(struct mt7615_dev *dev, bool suspend)
 				   &req, sizeof(req), true);
 }
 EXPORT_SYMBOL_GPL(mt7615_mcu_set_hif_suspend);
+
+static int
+mt7615_mcu_set_bss_pm(struct mt7615_dev *dev, struct ieee80211_vif *vif,
+		      bool enable)
+{
+	struct mt7615_vif *mvif = (struct mt7615_vif *)vif->drv_priv;
+	struct {
+		u8 bss_idx;
+		u8 dtim_period;
+		__le16 aid;
+		__le16 bcn_interval;
+		__le16 atim_window;
+		u8 uapsd;
+		u8 bmc_delivered_ac;
+		u8 bmc_triggered_ac;
+		u8 pad;
+	} req = {
+		.bss_idx = mvif->idx,
+		.aid = cpu_to_le16(vif->bss_conf.aid),
+		.dtim_period = vif->bss_conf.dtim_period,
+		.bcn_interval = cpu_to_le16(vif->bss_conf.beacon_int),
+	};
+	struct {
+		u8 bss_idx;
+		u8 pad[3];
+	} req_hdr = {
+		.bss_idx = mvif->idx,
+	};
+	int err;
+
+	if (vif->type != NL80211_IFTYPE_STATION ||
+	    !mt7615_firmware_offload(dev))
+		return -ENOTSUPP;
+
+	err = __mt76_mcu_send_msg(&dev->mt76, MCU_CMD_SET_BSS_ABORT,
+				  &req_hdr, sizeof(req_hdr), false);
+	if (err < 0 || !enable)
+		return err;
+
+	return __mt76_mcu_send_msg(&dev->mt76, MCU_CMD_SET_BSS_CONNECTED,
+				   &req, sizeof(req), false);
+}
+
+static int
+mt7615_mcu_set_wow_ctrl(struct mt7615_dev *dev, struct ieee80211_vif *vif,
+			bool suspend, struct cfg80211_wowlan *wowlan)
+{
+	struct mt7615_vif *mvif = (struct mt7615_vif *)vif->drv_priv;
+	struct {
+		struct {
+			u8 bss_idx;
+			u8 pad[3];
+		} __packed hdr;
+		struct mt7615_wow_ctrl_tlv wow_ctrl_tlv;
+	} req = {
+		.hdr = {
+			.bss_idx = mvif->idx,
+		},
+		.wow_ctrl_tlv = {
+			.tag = cpu_to_le16(UNI_SUSPEND_WOW_CTRL),
+			.len = cpu_to_le16(sizeof(struct mt7615_wow_ctrl_tlv)),
+			.cmd = suspend ? 1 : 2,
+		},
+	};
+
+	if (wowlan->magic_pkt)
+		req.wow_ctrl_tlv.trigger |= BIT(0);
+	if (wowlan->disconnect)
+		req.wow_ctrl_tlv.trigger |= BIT(2);
+
+	if (mt76_is_mmio(&dev->mt76))
+		req.wow_ctrl_tlv.wakeup_hif = 2;
+	else if (mt76_is_usb(&dev->mt76))
+		req.wow_ctrl_tlv.wakeup_hif = 1;
+
+	return __mt76_mcu_send_msg(&dev->mt76, MCU_UNI_CMD_SUSPEND,
+				   &req, sizeof(req), true);
+}
+
+static int
+mt7615_mcu_set_wow_pattern(struct mt7615_dev *dev,
+			   struct ieee80211_vif *vif,
+			   u8 index, bool enable,
+			   struct cfg80211_pkt_pattern *pattern)
+{
+	struct mt7615_vif *mvif = (struct mt7615_vif *)vif->drv_priv;
+	struct mt7615_wow_pattern_tlv *ptlv;
+	struct sk_buff *skb;
+	struct req_hdr {
+		u8 bss_idx;
+		u8 pad[3];
+	} __packed hdr = {
+		.bss_idx = mvif->idx,
+	};
+
+	skb = mt76_mcu_msg_alloc(&dev->mt76, NULL,
+				 sizeof(hdr) + sizeof(*ptlv));
+	if (!skb)
+		return -ENOMEM;
+
+	skb_put_data(skb, &hdr, sizeof(hdr));
+	ptlv = (struct mt7615_wow_pattern_tlv *)skb_put(skb, sizeof(*ptlv));
+	ptlv->tag = cpu_to_le16(UNI_SUSPEND_WOW_PATTERN);
+	ptlv->len = cpu_to_le16(sizeof(*ptlv));
+	ptlv->data_len = pattern->pattern_len;
+	ptlv->enable = enable;
+	ptlv->index = index;
+
+	memcpy(ptlv->pattern, pattern->pattern, pattern->pattern_len);
+	memcpy(ptlv->mask, pattern->mask, pattern->pattern_len / 8);
+
+	return __mt76_mcu_skb_send_msg(&dev->mt76, skb,
+				       MCU_UNI_CMD_SUSPEND, true);
+}
+
+static int
+mt7615_mcu_set_suspend_mode(struct mt7615_dev *dev,
+			    struct ieee80211_vif *vif,
+			    bool enable, u8 mdtim, bool wow_suspend)
+{
+	struct mt7615_vif *mvif = (struct mt7615_vif *)vif->drv_priv;
+	struct {
+		struct {
+			u8 bss_idx;
+			u8 pad[3];
+		} __packed hdr;
+		struct mt7615_suspend_tlv suspend_tlv;
+	} req = {
+		.hdr = {
+			.bss_idx = mvif->idx,
+		},
+		.suspend_tlv = {
+			.tag = cpu_to_le16(UNI_SUSPEND_MODE_SETTING),
+			.len = cpu_to_le16(sizeof(struct mt7615_suspend_tlv)),
+			.enable = enable,
+			.mdtim = mdtim,
+			.wow_suspend = wow_suspend,
+		},
+	};
+
+	return __mt76_mcu_send_msg(&dev->mt76, MCU_UNI_CMD_SUSPEND,
+				   &req, sizeof(req), true);
+}
+
+void mt7615_mcu_set_suspend_iter(void *priv, u8 *mac,
+				 struct ieee80211_vif *vif)
+{
+	struct mt7615_phy *phy = priv;
+	bool suspend = test_bit(MT76_STATE_SUSPEND, &phy->mt76->state);
+	struct ieee80211_hw *hw = phy->mt76->hw;
+	struct cfg80211_wowlan *wowlan = hw->wiphy->wowlan_config;
+	int i;
+
+	mt7615_mcu_set_bss_pm(phy->dev, vif, suspend);
+
+	mt7615_mcu_set_suspend_mode(phy->dev, vif, suspend, 1, true);
+
+	for (i = 0; i < wowlan->n_patterns; i++)
+		mt7615_mcu_set_wow_pattern(phy->dev, vif, i, suspend,
+					   &wowlan->patterns[i]);
+	mt7615_mcu_set_wow_ctrl(phy->dev, vif, suspend, wowlan);
+}
+#endif /* CONFIG_PM */
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
index 1579ad944479..960ff234c72d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
@@ -280,6 +280,7 @@ enum {
 	MCU_UNI_CMD_DEV_INFO_UPDATE = MCU_UNI_PREFIX | 0x01,
 	MCU_UNI_CMD_BSS_INFO_UPDATE = MCU_UNI_PREFIX | 0x02,
 	MCU_UNI_CMD_STA_REC_UPDATE = MCU_UNI_PREFIX | 0x03,
+	MCU_UNI_CMD_SUSPEND = MCU_UNI_PREFIX | 0x05,
 	MCU_UNI_CMD_HIF_CTRL = MCU_UNI_PREFIX | 0x07,
 };
 
@@ -415,6 +416,59 @@ struct mt7615_mcu_bss_event {
 	u8 pad;
 } __packed;
 
+struct mt7615_wow_ctrl_tlv {
+	__le16 tag;
+	__le16 len;
+	u8 cmd; /* 0x1: PM_WOWLAN_REQ_START
+		 * 0x2: PM_WOWLAN_REQ_STOP
+		 * 0x3: PM_WOWLAN_PARAM_CLEAR
+		 */
+	u8 trigger; /* 0: NONE
+		     * BIT(0): NL80211_WOWLAN_TRIG_MAGIC_PKT
+		     * BIT(1): NL80211_WOWLAN_TRIG_ANY
+		     * BIT(2): NL80211_WOWLAN_TRIG_DISCONNECT
+		     * BIT(3): NL80211_WOWLAN_TRIG_GTK_REKEY_FAILURE
+		     * BIT(4): BEACON_LOST
+		     * BIT(5): NL80211_WOWLAN_TRIG_NET_DETECT
+		     */
+	u8 wakeup_hif; /* 0x0: HIF_SDIO
+			* 0x1: HIF_USB
+			* 0x2: HIF_PCIE
+			* 0x3: HIF_GPIO
+			*/
+	u8 pad;
+	u8 rsv[4];
+} __packed;
+
+#define MT7615_WOW_MASK_MAX_LEN		16
+#define MT7615_WOW_PATTEN_MAX_LEN	128
+struct mt7615_wow_pattern_tlv {
+	__le16 tag;
+	__le16 len;
+	u8 index; /* pattern index */
+	u8 enable; /* 0: disable
+		    * 1: enable
+		    */
+	u8 data_len; /* pattern length */
+	u8 pad;
+	u8 mask[MT7615_WOW_MASK_MAX_LEN];
+	u8 pattern[MT7615_WOW_PATTEN_MAX_LEN];
+	u8 rsv[4];
+} __packed;
+
+struct mt7615_suspend_tlv {
+	__le16 tag;
+	__le16 len;
+	u8 enable; /* 0: suspend mode disabled
+		    * 1: suspend mode enabled
+		    */
+	u8 mdtim; /* LP parameter */
+	u8 wow_suspend; /* 0: update by origin policy
+			 * 1: update by wow dtim
+			 */
+	u8 pad[5];
+} __packed;
+
 /* offload mcu commands */
 enum {
 	MCU_CMD_START_HW_SCAN = MCU_CE_PREFIX | 0x03,
@@ -439,6 +493,14 @@ enum {
 	UNI_BSS_INFO_BCN_CONTENT = 7,
 };
 
+enum {
+	UNI_SUSPEND_MODE_SETTING,
+	UNI_SUSPEND_WOW_CTRL,
+	UNI_SUSPEND_WOW_GPIO_PARAM,
+	UNI_SUSPEND_WOW_WAKEUP_PORT,
+	UNI_SUSPEND_WOW_PATTERN,
+};
+
 enum {
 	PATCH_SEM_RELEASE = 0x0,
 	PATCH_SEM_GET	  = 0x1
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index 6a26555dcce6..3e6bc3ce914a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -474,8 +474,6 @@ int mt7615_mac_wtbl_update_key(struct mt7615_dev *dev,
 			       enum set_key_cmd cmd);
 void mt7615_mac_reset_work(struct work_struct *work);
 
-int mt7615_mcu_set_bss_pm(struct mt7615_dev *dev, struct ieee80211_vif *vif,
-			  bool enable);
 int mt7615_mcu_wait_response(struct mt7615_dev *dev, int cmd, int seq);
 int mt7615_mcu_msg_send(struct mt76_dev *mdev, int cmd, const void *data,
 			int len, bool wait_resp);
@@ -532,6 +530,8 @@ int mt7615_init_debugfs(struct mt7615_dev *dev);
 int mt7615_mcu_wait_response(struct mt7615_dev *dev, int cmd, int seq);
 
 int mt7615_mcu_set_hif_suspend(struct mt7615_dev *dev, bool suspend);
+void mt7615_mcu_set_suspend_iter(void *priv, u8 *mac,
+				 struct ieee80211_vif *vif);
 
 int __mt7663_load_firmware(struct mt7615_dev *dev);
 
-- 
cgit v1.2.3-59-g8ed1b


From d0846f0867f3361c551e2f431f94f64c58646d56 Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Fri, 1 May 2020 12:36:14 +0200
Subject: mt76: mt7663u: introduce suspend/resume to mt7663u

Tested on Chromebok by "echo mem to /sys/power/state" to suspend and
then waked up by keyboard keystrokes to resume system.

Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Co-developed-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt76.h       |  1 +
 drivers/net/wireless/mediatek/mt76/mt7615/usb.c | 44 +++++++++++++++++++++----
 2 files changed, 39 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index f1c9116b6ab4..728a2fb8b14d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -381,6 +381,7 @@ enum mt_vendor_req {
 	MT_VEND_READ_CFG =	0x47,
 	MT_VEND_READ_EXT =	0x63,
 	MT_VEND_WRITE_EXT =	0x66,
+	MT_VEND_FEATURE_SET =	0x91,
 };
 
 enum mt76u_in_ep {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/usb.c b/drivers/net/wireless/mediatek/mt76/mt7615/usb.c
index 9353175b139b..f2825b8f4539 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/usb.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/usb.c
@@ -372,18 +372,50 @@ static void mt7663u_disconnect(struct usb_interface *usb_intf)
 	ieee80211_free_hw(dev->mt76.hw);
 }
 
-static int __maybe_unused
-mt7663u_suspend(struct usb_interface *intf,
-		pm_message_t state)
+#ifdef CONFIG_PM
+static int mt7663u_suspend(struct usb_interface *intf, pm_message_t state)
 {
+	struct mt7615_dev *dev = usb_get_intfdata(intf);
+
+	if (!test_bit(MT76_STATE_SUSPEND, &dev->mphy.state) &&
+	    mt7615_firmware_offload(dev)) {
+		int err;
+
+		err = mt7615_mcu_set_hif_suspend(dev, true);
+		if (err < 0)
+			return err;
+	}
+
+	mt76u_stop_rx(&dev->mt76);
+
+	mt76u_stop_tx(&dev->mt76);
+	tasklet_kill(&dev->mt76.tx_tasklet);
+
 	return 0;
 }
 
-static int __maybe_unused
-mt7663u_resume(struct usb_interface *intf)
+static int mt7663u_resume(struct usb_interface *intf)
 {
-	return 0;
+	struct mt7615_dev *dev = usb_get_intfdata(intf);
+	int err;
+
+	err = mt76u_vendor_request(&dev->mt76, MT_VEND_FEATURE_SET,
+				   USB_DIR_OUT | USB_TYPE_VENDOR,
+				   0x5, 0x0, NULL, 0);
+	if (err)
+		return err;
+
+	err = mt76u_resume_rx(&dev->mt76);
+	if (err < 0)
+		return err;
+
+	if (!test_bit(MT76_STATE_SUSPEND, &dev->mphy.state) &&
+	    mt7615_firmware_offload(dev))
+		err = mt7615_mcu_set_hif_suspend(dev, false);
+
+	return err;
 }
+#endif /* CONFIG_PM */
 
 MODULE_DEVICE_TABLE(usb, mt7615_device_table);
 MODULE_FIRMWARE(MT7663_OFFLOAD_FIRMWARE_N9);
-- 
cgit v1.2.3-59-g8ed1b


From 6dd4072c1e8bc46ee81bdd5f779590bc43ab9ae4 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Fri, 1 May 2020 12:36:15 +0200
Subject: mt76: mt7615: introduce PM support

Introduce suspend/resume to mt7615e driver

Co-developed-by: Wan-Feng Jiang <Wan-Feng.Jiang@mediatek.com>
Signed-off-by: Wan-Feng Jiang <Wan-Feng.Jiang@mediatek.com>
Co-developed-by: Soul Huang <Soul.Huang@mediatek.com>
Signed-off-by: Soul Huang <Soul.Huang@mediatek.com>
Co-developed-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/dma.c    |  37 +++++++
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c    |   4 +-
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c    |   6 +-
 drivers/net/wireless/mediatek/mt76/mt7615/mmio.c   |   4 +
 drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h |   5 +
 drivers/net/wireless/mediatek/mt76/mt7615/pci.c    | 108 +++++++++++++++++++++
 drivers/net/wireless/mediatek/mt76/mt7615/regs.h   |  28 +++++-
 7 files changed, 186 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/dma.c b/drivers/net/wireless/mediatek/mt76/mt7615/dma.c
index 0b1fbddd1c3f..5a124610d4af 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/dma.c
@@ -130,6 +130,43 @@ static int mt7615_poll_tx(struct napi_struct *napi, int budget)
 	return 0;
 }
 
+int mt7615_wait_pdma_busy(struct mt7615_dev *dev)
+{
+	struct mt76_dev *mdev = &dev->mt76;
+
+	if (!is_mt7663(mdev)) {
+		u32 mask = MT_PDMA_TX_BUSY | MT_PDMA_RX_BUSY;
+		u32 reg = mt7615_reg_map(dev, MT_PDMA_BUSY);
+
+		if (!mt76_poll_msec(dev, reg, mask, 0, 1000)) {
+			dev_err(mdev->dev, "PDMA engine busy\n");
+			return -EIO;
+		}
+
+		return 0;
+	}
+
+	if (!mt76_poll_msec(dev, MT_PDMA_BUSY_STATUS,
+			    MT_PDMA_TX_IDX_BUSY, 0, 1000)) {
+		dev_err(mdev->dev, "PDMA engine tx busy\n");
+		return -EIO;
+	}
+
+	if (!mt76_poll_msec(dev, MT_PSE_PG_INFO,
+			    MT_PSE_SRC_CNT, 0, 1000)) {
+		dev_err(mdev->dev, "PSE engine busy\n");
+		return -EIO;
+	}
+
+	if (!mt76_poll_msec(dev, MT_PDMA_BUSY_STATUS,
+			    MT_PDMA_BUSY_IDX, 0, 1000)) {
+		dev_err(mdev->dev, "PDMA engine busy\n");
+		return -EIO;
+	}
+
+	return 0;
+}
+
 static void mt7622_dma_sched_init(struct mt7615_dev *dev)
 {
 	u32 reg = mt7615_reg_map(dev, MT_DMASHDL_BASE);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index 5c09787b0d76..7d65a3fb0c23 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -1807,8 +1807,7 @@ mt7615_update_beacons(struct mt7615_dev *dev)
 		mt7615_update_vif_beacon, dev->mt76.phy2->hw);
 }
 
-static void
-mt7615_dma_reset(struct mt7615_dev *dev)
+void mt7615_dma_reset(struct mt7615_dev *dev)
 {
 	int i;
 
@@ -1827,6 +1826,7 @@ mt7615_dma_reset(struct mt7615_dev *dev)
 		 MT_WPDMA_GLO_CFG_RX_DMA_EN | MT_WPDMA_GLO_CFG_TX_DMA_EN |
 		 MT_WPDMA_GLO_CFG_TX_WRITEBACK_DONE);
 }
+EXPORT_SYMBOL_GPL(mt7615_dma_reset);
 
 void mt7615_mac_reset_work(struct work_struct *work)
 {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 72e2e1cbab59..9b1340224448 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -1683,7 +1683,7 @@ static void mt7622_trigger_hif_int(struct mt7615_dev *dev, bool en)
 			   !en * MT_INFRACFG_MISC_AP2CONN_WAKE);
 }
 
-static int mt7615_driver_own(struct mt7615_dev *dev)
+int mt7615_driver_own(struct mt7615_dev *dev)
 {
 	struct mt76_dev *mdev = &dev->mt76;
 	u32 addr;
@@ -1703,8 +1703,9 @@ static int mt7615_driver_own(struct mt7615_dev *dev)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(mt7615_driver_own);
 
-static int mt7615_firmware_own(struct mt7615_dev *dev)
+int mt7615_firmware_own(struct mt7615_dev *dev)
 {
 	u32 addr;
 
@@ -1723,6 +1724,7 @@ static int mt7615_firmware_own(struct mt7615_dev *dev)
 
 	return 0;
 }
+EXPORT_SYMBOL_GPL(mt7615_firmware_own);
 
 static int mt7615_load_patch(struct mt7615_dev *dev, u32 addr, const char *name)
 {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c b/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c
index c9f5b1ce70ae..e670393506f0 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mmio.c
@@ -15,6 +15,8 @@ const u32 mt7615e_reg_map[] = {
 	[MT_ARB_BASE]		= 0x20c00,
 	[MT_HIF_BASE]		= 0x04000,
 	[MT_CSR_BASE]		= 0x07000,
+	[MT_PLE_BASE]		= 0x08000,
+	[MT_PSE_BASE]		= 0x0c000,
 	[MT_PHY_BASE]		= 0x10000,
 	[MT_CFG_BASE]		= 0x20200,
 	[MT_AGG_BASE]		= 0x20a00,
@@ -40,6 +42,8 @@ const u32 mt7663e_reg_map[] = {
 	[MT_ARB_BASE]		= 0x20c00,
 	[MT_HIF_BASE]		= 0x04000,
 	[MT_CSR_BASE]		= 0x07000,
+	[MT_PLE_BASE]		= 0x08000,
+	[MT_PSE_BASE]		= 0x0c000,
 	[MT_PHY_BASE]		= 0x10000,
 	[MT_CFG_BASE]		= 0x20000,
 	[MT_AGG_BASE]		= 0x22000,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index 3e6bc3ce914a..be9188e40259 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -380,6 +380,7 @@ int mt7615_eeprom_init(struct mt7615_dev *dev, u32 addr);
 int mt7615_eeprom_get_power_index(struct mt7615_dev *dev,
 				  struct ieee80211_channel *chan,
 				  u8 chain_idx);
+int mt7615_wait_pdma_busy(struct mt7615_dev *dev);
 int mt7615_dma_init(struct mt7615_dev *dev);
 void mt7615_dma_cleanup(struct mt7615_dev *dev);
 int mt7615_mcu_init(struct mt7615_dev *dev);
@@ -436,6 +437,7 @@ static inline u16 mt7615_wtbl_size(struct mt7615_dev *dev)
 		return MT7615_WTBL_SIZE;
 }
 
+void mt7615_dma_reset(struct mt7615_dev *dev);
 void mt7615_scan_work(struct work_struct *work);
 void mt7615_ps_work(struct work_struct *work);
 void mt7615_init_txpower(struct mt7615_dev *dev,
@@ -526,6 +528,9 @@ int mt7615_mcu_apply_tx_dpd(struct mt7615_phy *phy);
 void m7615_mcu_set_ps_iter(void *priv, u8 *mac, struct ieee80211_vif *vif);
 int mt7615_dfs_init_radar_detector(struct mt7615_phy *phy);
 
+int mt7615_firmware_own(struct mt7615_dev *dev);
+int mt7615_driver_own(struct mt7615_dev *dev);
+
 int mt7615_init_debugfs(struct mt7615_dev *dev);
 int mt7615_mcu_wait_response(struct mt7615_dev *dev, int cmd, int seq);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/pci.c b/drivers/net/wireless/mediatek/mt76/mt7615/pci.c
index 0605c908059e..88ff14564521 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/pci.c
@@ -66,11 +66,119 @@ static void mt7615_pci_remove(struct pci_dev *pdev)
 	pci_free_irq_vectors(pdev);
 }
 
+#ifdef CONFIG_PM
+static int mt7615_pci_suspend(struct pci_dev *pdev, pm_message_t state)
+{
+	struct mt76_dev *mdev = pci_get_drvdata(pdev);
+	struct mt7615_dev *dev = container_of(mdev, struct mt7615_dev, mt76);
+	bool hif_suspend;
+	int i, err;
+
+	hif_suspend = !test_bit(MT76_STATE_SUSPEND, &dev->mphy.state) &&
+		      mt7615_firmware_offload(dev);
+	if (hif_suspend) {
+		err = mt7615_mcu_set_hif_suspend(dev, true);
+		if (err)
+			return err;
+	}
+
+	napi_disable(&mdev->tx_napi);
+	tasklet_kill(&mdev->tx_tasklet);
+
+	for (i = 0; i < ARRAY_SIZE(mdev->q_rx); i++)
+		napi_disable(&mdev->napi[i]);
+	tasklet_kill(&dev->irq_tasklet);
+
+	mt7615_dma_reset(dev);
+
+	err = mt7615_wait_pdma_busy(dev);
+	if (err)
+		goto restore;
+
+	if (is_mt7663(mdev)) {
+		mt76_set(dev, MT_PDMA_SLP_PROT, MT_PDMA_AXI_SLPPROT_ENABLE);
+		if (!mt76_poll_msec(dev, MT_PDMA_SLP_PROT,
+				    MT_PDMA_AXI_SLPPROT_RDY,
+				    MT_PDMA_AXI_SLPPROT_RDY, 1000)) {
+			dev_err(mdev->dev, "PDMA sleep protection failed\n");
+			err = -EIO;
+			goto restore;
+		}
+	}
+
+	pci_enable_wake(pdev, pci_choose_state(pdev, state), true);
+	pci_save_state(pdev);
+	err = pci_set_power_state(pdev, pci_choose_state(pdev, state));
+	if (err)
+		goto restore;
+
+	err = mt7615_firmware_own(dev);
+	if (err)
+		goto restore;
+
+	return 0;
+
+restore:
+	for (i = 0; i < ARRAY_SIZE(mdev->q_rx); i++)
+		napi_enable(&mdev->napi[i]);
+	napi_enable(&mdev->tx_napi);
+	if (hif_suspend)
+		mt7615_mcu_set_hif_suspend(dev, false);
+
+	return err;
+}
+
+static int mt7615_pci_resume(struct pci_dev *pdev)
+{
+	struct mt76_dev *mdev = pci_get_drvdata(pdev);
+	struct mt7615_dev *dev = container_of(mdev, struct mt7615_dev, mt76);
+	bool pdma_reset;
+	int i, err;
+
+	err = mt7615_driver_own(dev);
+	if (err < 0)
+		return err;
+
+	err = pci_set_power_state(pdev, PCI_D0);
+	if (err)
+		return err;
+
+	pci_restore_state(pdev);
+
+	if (is_mt7663(&dev->mt76)) {
+		mt76_clear(dev, MT_PDMA_SLP_PROT, MT_PDMA_AXI_SLPPROT_ENABLE);
+		mt76_wr(dev, MT_PCIE_IRQ_ENABLE, 1);
+	}
+
+	pdma_reset = !mt76_rr(dev, MT_WPDMA_TX_RING0_CTRL0) &&
+		     !mt76_rr(dev, MT_WPDMA_TX_RING0_CTRL1);
+	if (pdma_reset)
+		dev_err(mdev->dev, "PDMA engine must be reinitialized\n");
+
+	for (i = 0; i < ARRAY_SIZE(mdev->q_rx); i++) {
+		napi_enable(&mdev->napi[i]);
+		napi_schedule(&mdev->napi[i]);
+	}
+	napi_enable(&mdev->tx_napi);
+	napi_schedule(&mdev->tx_napi);
+
+	if (!test_bit(MT76_STATE_SUSPEND, &dev->mphy.state) &&
+	    mt7615_firmware_offload(dev))
+		err = mt7615_mcu_set_hif_suspend(dev, false);
+
+	return err;
+}
+#endif /* CONFIG_PM */
+
 struct pci_driver mt7615_pci_driver = {
 	.name		= KBUILD_MODNAME,
 	.id_table	= mt7615_pci_device_table,
 	.probe		= mt7615_pci_probe,
 	.remove		= mt7615_pci_remove,
+#ifdef CONFIG_PM
+	.suspend	= mt7615_pci_suspend,
+	.resume		= mt7615_pci_resume,
+#endif /* CONFIG_PM */
 };
 
 MODULE_DEVICE_TABLE(pci, mt7615_pci_device_table);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/regs.h b/drivers/net/wireless/mediatek/mt76/mt7615/regs.h
index 68d30bcc087a..aee433a9eff6 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/regs.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/regs.h
@@ -12,6 +12,8 @@ enum mt7615_reg_base {
 	MT_ARB_BASE,
 	MT_HIF_BASE,
 	MT_CSR_BASE,
+	MT_PLE_BASE,
+	MT_PSE_BASE,
 	MT_PHY_BASE,
 	MT_CFG_BASE,
 	MT_AGG_BASE,
@@ -63,6 +65,17 @@ enum mt7615_reg_base {
 #define MT_HIF_RST			MT_HIF(0x100)
 #define MT_HIF_LOGIC_RST_N		BIT(4)
 
+#define MT_PDMA_SLP_PROT		MT_HIF(0x154)
+#define MT_PDMA_AXI_SLPPROT_ENABLE	BIT(0)
+#define MT_PDMA_AXI_SLPPROT_RDY		BIT(16)
+
+#define MT_PDMA_BUSY_STATUS		MT_HIF(0x168)
+#define MT_PDMA_TX_IDX_BUSY		BIT(2)
+#define MT_PDMA_BUSY_IDX		BIT(31)
+
+#define MT_WPDMA_TX_RING0_CTRL0		MT_HIF(0x300)
+#define MT_WPDMA_TX_RING0_CTRL1		MT_HIF(0x304)
+
 #define MT7663_MCU_PCIE_REMAP_2_OFFSET	GENMASK(15, 0)
 #define MT7663_MCU_PCIE_REMAP_2_BASE	GENMASK(31, 16)
 
@@ -138,8 +151,7 @@ enum mt7615_reg_base {
 #define MT_CSR(ofs)			((dev)->reg_map[MT_CSR_BASE] + (ofs))
 #define MT_CONN_HIF_ON_LPCTL		MT_CSR(0x000)
 
-#define MT_PLE_BASE			0x8000
-#define MT_PLE(ofs)			(MT_PLE_BASE + (ofs))
+#define MT_PLE(ofs)			((dev)->reg_map[MT_PLE_BASE] + (ofs))
 
 #define MT_PLE_FL_Q0_CTRL		MT_PLE(0x1b0)
 #define MT_PLE_FL_Q1_CTRL		MT_PLE(0x1b4)
@@ -149,6 +161,14 @@ enum mt7615_reg_base {
 #define MT_PLE_AC_QEMPTY(ac, n)		MT_PLE(0x300 + 0x10 * (ac) + \
 					       ((n) << 2))
 
+#define MT_PSE(ofs)			((dev)->reg_map[MT_PSE_BASE] + (ofs))
+#define MT_PSE_QUEUE_EMPTY		MT_PSE(0x0b4)
+#define MT_HIF_0_EMPTY_MASK		BIT(16)
+#define MT_HIF_1_EMPTY_MASK		BIT(17)
+#define MT_HIF_ALL_EMPTY_MASK		GENMASK(17, 16)
+#define MT_PSE_PG_INFO			MT_PSE(0x194)
+#define MT_PSE_SRC_CNT			GENMASK(27, 16)
+
 #define MT_WF_PHY_BASE			((dev)->reg_map[MT_PHY_BASE])
 #define MT_WF_PHY(ofs)			(MT_WF_PHY_BASE + (ofs))
 
@@ -482,6 +502,10 @@ enum mt7615_reg_base {
 #define MT_LED_STATUS_ON		GENMASK(23, 16)
 #define MT_LED_STATUS_DURATION		GENMASK(15, 0)
 
+#define MT_PDMA_BUSY			0x82000504
+#define MT_PDMA_TX_BUSY			BIT(0)
+#define MT_PDMA_RX_BUSY			BIT(1)
+
 #define MT_EFUSE_BASE			((dev)->reg_map[MT_EFUSE_ADDR_BASE])
 #define MT_EFUSE_BASE_CTRL		0x000
 #define MT_EFUSE_BASE_CTRL_EMPTY	BIT(30)
-- 
cgit v1.2.3-59-g8ed1b


From b47e21e75c80966be1afc7fe28c75c6798b3e48e Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Fri, 1 May 2020 12:36:16 +0200
Subject: mt76: mt7615: add gtk rekey offload support

Add KCK and KEK offload support to mt7615 driver in order to
support GTK rekeying during PM suspend

Co-developed-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Co-developed-by: Wan-Feng Jiang <Wan-Feng.Jiang@mediatek.com>
Signed-off-by: Wan-Feng Jiang <Wan-Feng.Jiang@mediatek.com>
Co-developed-by: Soul Huang <Soul.Huang@mediatek.com>
Signed-off-by: Soul Huang <Soul.Huang@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/init.c   |   1 +
 drivers/net/wireless/mediatek/mt76/mt7615/main.c   |   8 ++
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c    | 102 ++++++++++++++++++++-
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.h    |  29 ++++++
 drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h |   3 +
 5 files changed, 142 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
index 145af8a3ae57..81294bb2b06b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
@@ -145,6 +145,7 @@ void mt7615_check_offload_capability(struct mt7615_dev *dev)
 		dev->ops->cancel_hw_scan = NULL;
 		dev->ops->sched_scan_start = NULL;
 		dev->ops->sched_scan_stop = NULL;
+		dev->ops->set_rekey_data = NULL;
 
 		wiphy->max_sched_scan_plan_interval = 0;
 		wiphy->max_sched_scan_ie_len = 0;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
index b93a47509423..cfe024b71677 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
@@ -936,6 +936,13 @@ static void mt7615_set_wakeup(struct ieee80211_hw *hw, bool enabled)
 
 	device_set_wakeup_enable(mdev->dev, enabled);
 }
+
+static void mt7615_set_rekey_data(struct ieee80211_hw *hw,
+				  struct ieee80211_vif *vif,
+				  struct cfg80211_gtk_rekey_data *data)
+{
+	mt7615_mcu_update_gtk_rekey(hw, vif, data);
+}
 #endif /* CONFIG_PM */
 
 const struct ieee80211_ops mt7615_ops = {
@@ -975,6 +982,7 @@ const struct ieee80211_ops mt7615_ops = {
 	.suspend = mt7615_suspend,
 	.resume = mt7615_resume,
 	.set_wakeup = mt7615_set_wakeup,
+	.set_rekey_data = mt7615_set_rekey_data,
 #endif /* CONFIG_PM */
 };
 EXPORT_SYMBOL_GPL(mt7615_ops);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 9b1340224448..da50e12f1c03 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -196,6 +196,7 @@ mt7615_mcu_parse_response(struct mt7615_dev *dev, int cmd,
 	case MCU_UNI_CMD_BSS_INFO_UPDATE:
 	case MCU_UNI_CMD_STA_REC_UPDATE:
 	case MCU_UNI_CMD_HIF_CTRL:
+	case MCU_UNI_CMD_OFFLOAD:
 	case MCU_UNI_CMD_SUSPEND: {
 		struct mt7615_mcu_uni_event *event;
 
@@ -1839,7 +1840,8 @@ mt7615_mcu_send_ram_firmware(struct mt7615_dev *dev,
 }
 
 static const struct wiphy_wowlan_support mt7615_wowlan_support = {
-	.flags = WIPHY_WOWLAN_MAGIC_PKT | WIPHY_WOWLAN_DISCONNECT,
+	.flags = WIPHY_WOWLAN_MAGIC_PKT | WIPHY_WOWLAN_DISCONNECT |
+		 WIPHY_WOWLAN_SUPPORTS_GTK_REKEY,
 	.n_patterns = 1,
 	.pattern_min_len = 1,
 	.pattern_max_len = MT7615_WOW_PATTEN_MAX_LEN,
@@ -3373,6 +3375,33 @@ mt7615_mcu_set_suspend_mode(struct mt7615_dev *dev,
 				   &req, sizeof(req), true);
 }
 
+static int
+mt7615_mcu_set_gtk_rekey(struct mt7615_dev *dev,
+			 struct ieee80211_vif *vif,
+			 bool suspend)
+{
+	struct mt7615_vif *mvif = (struct mt7615_vif *)vif->drv_priv;
+	struct {
+		struct {
+			u8 bss_idx;
+			u8 pad[3];
+		} __packed hdr;
+		struct mt7615_gtk_rekey_tlv gtk_tlv;
+	} __packed req = {
+		.hdr = {
+			.bss_idx = mvif->idx,
+		},
+		.gtk_tlv = {
+			.tag = cpu_to_le16(UNI_OFFLOAD_OFFLOAD_GTK_REKEY),
+			.len = cpu_to_le16(sizeof(struct mt7615_gtk_rekey_tlv)),
+			.rekey_mode = !suspend,
+		},
+	};
+
+	return __mt76_mcu_send_msg(&dev->mt76, MCU_UNI_CMD_OFFLOAD,
+				   &req, sizeof(req), true);
+}
+
 void mt7615_mcu_set_suspend_iter(void *priv, u8 *mac,
 				 struct ieee80211_vif *vif)
 {
@@ -3384,6 +3413,8 @@ void mt7615_mcu_set_suspend_iter(void *priv, u8 *mac,
 
 	mt7615_mcu_set_bss_pm(phy->dev, vif, suspend);
 
+	mt7615_mcu_set_gtk_rekey(phy->dev, vif, suspend);
+
 	mt7615_mcu_set_suspend_mode(phy->dev, vif, suspend, 1, true);
 
 	for (i = 0; i < wowlan->n_patterns; i++)
@@ -3391,4 +3422,73 @@ void mt7615_mcu_set_suspend_iter(void *priv, u8 *mac,
 					   &wowlan->patterns[i]);
 	mt7615_mcu_set_wow_ctrl(phy->dev, vif, suspend, wowlan);
 }
+
+static void
+mt7615_mcu_key_iter(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+		    struct ieee80211_sta *sta, struct ieee80211_key_conf *key,
+		    void *data)
+{
+	struct mt7615_gtk_rekey_tlv *gtk_tlv = data;
+	u32 cipher;
+
+	if (key->cipher != WLAN_CIPHER_SUITE_AES_CMAC &&
+	    key->cipher != WLAN_CIPHER_SUITE_CCMP &&
+	    key->cipher != WLAN_CIPHER_SUITE_TKIP)
+		return;
+
+	if (key->cipher == WLAN_CIPHER_SUITE_TKIP) {
+		gtk_tlv->proto = cpu_to_le32(NL80211_WPA_VERSION_1);
+		cipher = BIT(3);
+	} else {
+		gtk_tlv->proto = cpu_to_le32(NL80211_WPA_VERSION_2);
+		cipher = BIT(4);
+	}
+
+	/* we are assuming here to have a single pairwise key */
+	if (key->flags & IEEE80211_KEY_FLAG_PAIRWISE) {
+		gtk_tlv->pairwise_cipher = cpu_to_le32(cipher);
+		gtk_tlv->group_cipher = cpu_to_le32(cipher);
+		gtk_tlv->keyid = key->keyidx;
+	}
+}
+
+int mt7615_mcu_update_gtk_rekey(struct ieee80211_hw *hw,
+				struct ieee80211_vif *vif,
+				struct cfg80211_gtk_rekey_data *key)
+{
+	struct mt7615_vif *mvif = (struct mt7615_vif *)vif->drv_priv;
+	struct mt7615_dev *dev = mt7615_hw_dev(hw);
+	struct mt7615_gtk_rekey_tlv *gtk_tlv;
+	struct sk_buff *skb;
+	struct {
+		u8 bss_idx;
+		u8 pad[3];
+	} __packed hdr = {
+		.bss_idx = mvif->idx,
+	};
+
+	skb = mt76_mcu_msg_alloc(&dev->mt76, NULL,
+				 sizeof(hdr) + sizeof(*gtk_tlv));
+	if (!skb)
+		return -ENOMEM;
+
+	skb_put_data(skb, &hdr, sizeof(hdr));
+	gtk_tlv = (struct mt7615_gtk_rekey_tlv *)skb_put(skb,
+							 sizeof(*gtk_tlv));
+	gtk_tlv->tag = cpu_to_le16(UNI_OFFLOAD_OFFLOAD_GTK_REKEY);
+	gtk_tlv->len = cpu_to_le16(sizeof(*gtk_tlv));
+	gtk_tlv->rekey_mode = 2;
+	gtk_tlv->option = 1;
+
+	rcu_read_lock();
+	ieee80211_iter_keys_rcu(hw, vif, mt7615_mcu_key_iter, gtk_tlv);
+	rcu_read_unlock();
+
+	memcpy(gtk_tlv->kek, key->kek, NL80211_KEK_LEN);
+	memcpy(gtk_tlv->kck, key->kck, NL80211_KCK_LEN);
+	memcpy(gtk_tlv->replay_ctr, key->replay_ctr, NL80211_REPLAY_CTR_LEN);
+
+	return __mt76_mcu_skb_send_msg(&dev->mt76, skb,
+				       MCU_UNI_CMD_OFFLOAD, true);
+}
 #endif /* CONFIG_PM */
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
index 960ff234c72d..890a202acfc9 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
@@ -281,6 +281,7 @@ enum {
 	MCU_UNI_CMD_BSS_INFO_UPDATE = MCU_UNI_PREFIX | 0x02,
 	MCU_UNI_CMD_STA_REC_UPDATE = MCU_UNI_PREFIX | 0x03,
 	MCU_UNI_CMD_SUSPEND = MCU_UNI_PREFIX | 0x05,
+	MCU_UNI_CMD_OFFLOAD = MCU_UNI_PREFIX | 0x06,
 	MCU_UNI_CMD_HIF_CTRL = MCU_UNI_PREFIX | 0x07,
 };
 
@@ -469,6 +470,27 @@ struct mt7615_suspend_tlv {
 	u8 pad[5];
 } __packed;
 
+struct mt7615_gtk_rekey_tlv {
+	__le16 tag;
+	__le16 len;
+	u8 kek[NL80211_KEK_LEN];
+	u8 kck[NL80211_KCK_LEN];
+	u8 replay_ctr[NL80211_REPLAY_CTR_LEN];
+	u8 rekey_mode; /* 0: rekey offload enable
+			* 1: rekey offload disable
+			* 2: rekey update
+			*/
+	u8 keyid;
+	u8 pad[2];
+	__le32 proto; /* WPA-RSN-WAPI-OPSN */
+	__le32 pairwise_cipher;
+	__le32 group_cipher;
+	__le32 key_mgmt; /* NONE-PSK-IEEE802.1X */
+	__le32 mgmt_group_cipher;
+	u8 option; /* 1: rekey data update without enabling offload */
+	u8 reserverd[3];
+} __packed;
+
 /* offload mcu commands */
 enum {
 	MCU_CMD_START_HW_SCAN = MCU_CE_PREFIX | 0x03,
@@ -501,6 +523,13 @@ enum {
 	UNI_SUSPEND_WOW_PATTERN,
 };
 
+enum {
+	UNI_OFFLOAD_OFFLOAD_ARPNS_IPV4,
+	UNI_OFFLOAD_OFFLOAD_ARPNS_IPV6,
+	UNI_OFFLOAD_OFFLOAD_GTK_REKEY,
+	UNI_OFFLOAD_OFFLOAD_BMC_RPY_DETECT,
+};
+
 enum {
 	PATCH_SEM_RELEASE = 0x0,
 	PATCH_SEM_GET	  = 0x1
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index be9188e40259..d3a83f3ed54e 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -537,6 +537,9 @@ int mt7615_mcu_wait_response(struct mt7615_dev *dev, int cmd, int seq);
 int mt7615_mcu_set_hif_suspend(struct mt7615_dev *dev, bool suspend);
 void mt7615_mcu_set_suspend_iter(void *priv, u8 *mac,
 				 struct ieee80211_vif *vif);
+int mt7615_mcu_update_gtk_rekey(struct ieee80211_hw *hw,
+				struct ieee80211_vif *vif,
+				struct cfg80211_gtk_rekey_data *key);
 
 int __mt7663_load_firmware(struct mt7615_dev *dev);
 
-- 
cgit v1.2.3-59-g8ed1b


From 86c60179e5537c28145cbfa5ed2c16f776a497a6 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Fri, 1 May 2020 12:36:17 +0200
Subject: mt76: mt7615: introduce beacon_loss mcu event

If device has enabled beacon hw filter rx beacons are not reported to
the host. Introduce beacon_loss mcu event to trigger mac80211 mlme
connection state machine in this configuration.
IEEE80211_VIF_BEACON_FILTER has not set in vif flags since hw beacon
filter is not enabled yet

Co-developed-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c | 38 +++++++++++++++++++++++++
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.h |  7 +++++
 2 files changed, 45 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index da50e12f1c03..6da819a06748 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -359,6 +359,40 @@ mt7615_mcu_scan_event(struct mt7615_dev *dev, struct sk_buff *skb)
 				     MT7615_HW_SCAN_TIMEOUT);
 }
 
+static void
+mt7615_mcu_beacon_loss_iter(void *priv, u8 *mac, struct ieee80211_vif *vif)
+{
+	struct mt7615_vif *mvif = (struct mt7615_vif *)vif->drv_priv;
+	struct mt7615_beacon_loss_event *event = priv;
+
+	if (mvif->idx != event->bss_idx)
+		return;
+
+	if (!(vif->driver_flags & IEEE80211_VIF_BEACON_FILTER))
+		return;
+
+	ieee80211_beacon_loss(vif);
+}
+
+static void
+mt7615_mcu_beacon_loss_event(struct mt7615_dev *dev, struct sk_buff *skb)
+{
+	struct mt7615_beacon_loss_event *event;
+	struct mt76_phy *mphy;
+	u8 band_idx = 0; /* DBDC support */
+
+	skb_pull(skb, sizeof(struct mt7615_mcu_rxd));
+	event = (struct mt7615_beacon_loss_event *)skb->data;
+	if (band_idx && dev->mt76.phy2)
+		mphy = dev->mt76.phy2;
+	else
+		mphy = &dev->mt76.phy;
+
+	ieee80211_iterate_active_interfaces_atomic(mphy->hw,
+					IEEE80211_IFACE_ITER_RESUME_ALL,
+					mt7615_mcu_beacon_loss_iter, event);
+}
+
 static void
 mt7615_mcu_bss_event(struct mt7615_dev *dev, struct sk_buff *skb)
 {
@@ -389,6 +423,9 @@ mt7615_mcu_rx_unsolicited_event(struct mt7615_dev *dev, struct sk_buff *skb)
 	case MCU_EVENT_EXT:
 		mt7615_mcu_rx_ext_event(dev, skb);
 		break;
+	case MCU_EVENT_BSS_BEACON_LOSS:
+		mt7615_mcu_beacon_loss_event(dev, skb);
+		break;
 	case MCU_EVENT_SCHED_SCAN_DONE:
 	case MCU_EVENT_SCAN_DONE:
 		mt7615_mcu_scan_event(dev, skb);
@@ -410,6 +447,7 @@ void mt7615_mcu_rx_event(struct mt7615_dev *dev, struct sk_buff *skb)
 	    rxd->ext_eid == MCU_EXT_EVENT_FW_LOG_2_HOST ||
 	    rxd->ext_eid == MCU_EXT_EVENT_ASSERT_DUMP ||
 	    rxd->ext_eid == MCU_EXT_EVENT_PS_SYNC ||
+	    rxd->eid == MCU_EVENT_BSS_BEACON_LOSS ||
 	    rxd->eid == MCU_EVENT_SCHED_SCAN_DONE ||
 	    rxd->eid == MCU_EVENT_BSS_ABSENCE ||
 	    rxd->eid == MCU_EVENT_SCAN_DONE ||
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
index 890a202acfc9..737ccec6dd96 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
@@ -83,6 +83,7 @@ enum {
 	MCU_EVENT_MT_PATCH_SEM = 0x04,
 	MCU_EVENT_SCAN_DONE = 0x0d,
 	MCU_EVENT_BSS_ABSENCE  = 0x11,
+	MCU_EVENT_BSS_BEACON_LOSS = 0x13,
 	MCU_EVENT_CH_PRIVILEGE = 0x18,
 	MCU_EVENT_SCHED_SCAN_DONE = 0x23,
 	MCU_EVENT_EXT = 0xed,
@@ -291,6 +292,12 @@ struct mt7615_mcu_uni_event {
 	__le32 status; /* 0: success, others: fail */
 } __packed;
 
+struct mt7615_beacon_loss_event {
+	u8 bss_idx;
+	u8 reason;
+	u8 pad[2];
+} __packed;
+
 struct mt7615_mcu_scan_ssid {
 	__le32 ssid_len;
 	u8 ssid[IEEE80211_MAX_SSID_LEN];
-- 
cgit v1.2.3-59-g8ed1b


From eb7bd8d7fb727dcdbf4b1c145bba1169bc3c2d4a Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sat, 2 May 2020 01:13:11 +0200
Subject: mt76: mt7663: read tx streams from eeprom

Read tx stream configuration from eeprom/efuse

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c | 11 ++++++-----
 drivers/net/wireless/mediatek/mt76/mt7615/eeprom.h |  3 +++
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c
index 521705015036..7440ad13c74b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c
@@ -130,14 +130,15 @@ mt7615_eeprom_parse_hw_band_cap(struct mt7615_dev *dev)
 static void mt7615_eeprom_parse_hw_cap(struct mt7615_dev *dev)
 {
 	u8 *eeprom = dev->mt76.eeprom.data;
-	u8 tx_mask;
+	u8 tx_mask, max_nss;
 
 	mt7615_eeprom_parse_hw_band_cap(dev);
 
 	if (is_mt7663(&dev->mt76)) {
-		tx_mask = 2;
+		max_nss = 2;
+		tx_mask = FIELD_GET(MT_EE_HW_CONF1_TX_MASK,
+				    eeprom[MT7663_EE_HW_CONF1]);
 	} else {
-		u8 max_nss;
 		u32 val;
 
 		/* read tx-rx mask from eeprom */
@@ -146,9 +147,9 @@ static void mt7615_eeprom_parse_hw_cap(struct mt7615_dev *dev)
 
 		tx_mask =  FIELD_GET(MT_EE_NIC_CONF_TX_MASK,
 				     eeprom[MT_EE_NIC_CONF_0]);
-		if (!tx_mask || tx_mask > max_nss)
-			tx_mask = max_nss;
 	}
+	if (!tx_mask || tx_mask > max_nss)
+		tx_mask = max_nss;
 
 	dev->chainmask = BIT(tx_mask) - 1;
 	dev->mphy.antenna_mask = dev->chainmask;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.h b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.h
index 3dd7009e5836..aad82b600c63 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.h
@@ -39,6 +39,7 @@ enum mt7615_eeprom_field {
 	MT7663_EE_TX0_2G_TARGET_POWER =		0x123,
 	MT_EE_TX2_5G_G0_TARGET_POWER =		0x142,
 	MT_EE_TX3_5G_G0_TARGET_POWER =		0x16a,
+	MT7663_EE_HW_CONF1 =			0x1b0,
 
 	MT7615_EE_MAX =				0x3bf,
 	MT7622_EE_MAX =				0x3db,
@@ -55,6 +56,8 @@ enum mt7615_eeprom_field {
 #define MT_EE_NIC_CONF_TX_MASK			GENMASK(7, 4)
 #define MT_EE_NIC_CONF_RX_MASK			GENMASK(3, 0)
 
+#define MT_EE_HW_CONF1_TX_MASK			GENMASK(2, 0)
+
 #define MT_EE_NIC_CONF_TSSI_2G			BIT(5)
 #define MT_EE_NIC_CONF_TSSI_5G			BIT(6)
 
-- 
cgit v1.2.3-59-g8ed1b


From cc5f58aee180f1f8dbdbc136ecb3cdd190b29068 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sat, 2 May 2020 01:16:10 +0200
Subject: mt76: mt7615: check return value of mt7615_eeprom_get_power_index

mt7615_eeprom_get_power_index can return negative error value.
Check mt7615_eeprom_get_power_index return value before using it

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/init.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
index 81294bb2b06b..d831d647d237 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
@@ -247,6 +247,9 @@ void mt7615_init_txpower(struct mt7615_dev *dev,
 			int index;
 
 			index = mt7615_eeprom_get_power_index(dev, chan, j);
+			if (index < 0)
+				continue;
+
 			target_power = max(target_power, eep[index]);
 		}
 
-- 
cgit v1.2.3-59-g8ed1b


From 9582d5bdef0cbebbfa451b4894518f0d790f05c8 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sat, 2 May 2020 18:00:41 +0200
Subject: mt76: mt7615: fix ibss mode for mt7663

Fix the following kernel warning adding an adhoc interface to a
mt7663e device

[  233.363394] WARNING: CPU: 0 PID: 2345 at drivers/net/wireless/mt76/mt7615/mcu.c:1449 mt7615_mcu_uni_add_bss+0x15f/0x24e [mt7615_common]
[  233.363432] CPU: 0 PID: 2345 Comm: iw Tainted: G        W       4.14.171 #12
[  233.363434] Hardware name: HP Meep/Meep, BIOS Google_Meep.11297.75.0 06/17/2019
[  233.363436] task: ffff9a1a4020e3c0 task.stack: ffffb9124113c000
[  233.363441] RIP: 0010:mt7615_mcu_uni_add_bss+0x15f/0x24e [mt7615_common]
[  233.363443] RSP: 0018:ffffb9124113f730 EFLAGS: 00010246
[  233.363446] RAX: 0000000000000024 RBX: ffff9a1a788c74e8 RCX: 41826d413aea9200
[  233.363448] RDX: 0000000000000007 RSI: 0000000000000006 RDI: ffff9a1a7fc15418
[  233.363450] RBP: ffffb9124113f7c0 R08: 0000000000000356 R09: 00000000ffff0a10
[  233.363452] R10: 0000001000000000 R11: ffffffff93f2a4be R12: 0000000000000000
[  233.363454] R13: ffff9a1a7383bd48 R14: ffffb9124113f77a R15: 0000000000000000
[  233.363456] FS:  00007f203314ab80(0000) GS:ffff9a1a7fc00000(0000) knlGS:0000000000000000
[  233.363458] CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
[  233.363460] CR2: 00005a13d647c950 CR3: 0000000171238000 CR4: 00000000003406f0
[  233.363462] Call Trace:
[  233.363470]  mt7615_bss_info_changed+0x98/0xf4 [mt7615_common]
[  233.363484]  ieee80211_bss_info_change_notify+0x139/0x1d4 [mt76_mac80211]
[  233.363496]  ieee80211_ibss_disconnect+0x183/0x1bb [mt76_mac80211]
[  233.363507]  ieee80211_ibss_leave+0x14/0xa0 [mt76_mac80211]
[  233.363519]  __cfg80211_leave_ibss+0xa6/0x13a [cfg80211]
[  233.363528]  cfg80211_netdev_notifier_call+0x8b/0x631 [cfg80211]
[  233.363535]  ? packet_notifier+0x196/0x1a3
[  233.363540]  raw_notifier_call_chain+0x39/0x58
[  233.363544]  __dev_close_many+0x6b/0xf0
[  233.363548]  dev_close_many+0x62/0xe8
[  233.363552]  ? _raw_spin_unlock_irq+0xe/0x21
[  233.363555]  rollback_registered_many+0xf6/0x35c
[  233.363560]  ? __rcu_read_unlock+0x4a/0x4a
[  233.363563]  unregister_netdevice_queue+0x7f/0x105
[  233.363573]  ieee80211_del_iface+0x12/0x16 [mt76_mac80211]
[  233.363582]  nl80211_del_interface+0xa8/0x124 [cfg80211]
[  233.363588]  genl_rcv_msg+0x40b/0x481
[  233.363592]  ? genl_unbind+0xb8/0xb8
[  233.363595]  netlink_rcv_skb+0x85/0xf8
[  233.363598]  genl_rcv+0x28/0x36
[  233.363601]  netlink_unicast+0x165/0x1f8
[  233.363604]  netlink_sendmsg+0x35f/0x3a6
[  233.363608]  sock_sendmsg+0x38/0x48
[  233.363611]  ___sys_sendmsg+0x1bf/0x267
[  233.363615]  ? __inode_wait_for_writeback+0x72/0xd7
[  233.363619]  ? dentry_kill+0x69/0x76
[  233.363622]  ? dput+0xd1/0x170
[  233.363624]  __sys_sendmsg+0x52/0x8f
[  233.363628]  do_syscall_64+0x6b/0xf7
[  233.363632]  entry_SYSCALL_64_after_hwframe+0x3d/0xa2
[  233.363635] RIP: 0033:0x7f2032ca1264
[  233.363637] RSP: 002b:00007ffec3668e38 EFLAGS: 00000246 ORIG_RAX: 000000000000002e
[  233.363639] RAX: ffffffffffffffda RBX: 000058f7175e7880 RCX: 00007f2032ca1264
[  233.363641] RDX: 0000000000000000 RSI: 00007ffec3668e98 RDI: 0000000000000003
[  233.363643] RBP: 00007ffec3668e70 R08: 0000000000000001 R09: 00007f2032ce1fd0
[  233.363645] R10: 000058f7175e2010 R11: 0000000000000246 R12: 000058f7175e7740
[  233.363646] R13: 00007ffec3668ff0 R14: 000058f7175e2350 R15: 00007ffec3668e98

Fixes: f40ac0f3d3c0 ("mt76: mt7615: introduce mt7663e support")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 6da819a06748..7adf0f7e7e2b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -1417,6 +1417,9 @@ mt7615_mcu_uni_add_bss(struct mt7615_phy *phy, struct ieee80211_vif *vif,
 	case NL80211_IFTYPE_STATION:
 		basic_req.basic.conn_type = cpu_to_le32(CONNECTION_INFRA_STA);
 		break;
+	case NL80211_IFTYPE_ADHOC:
+		basic_req.basic.conn_type = cpu_to_le32(CONNECTION_IBSS_ADHOC);
+		break;
 	default:
 		WARN_ON(1);
 		break;
-- 
cgit v1.2.3-59-g8ed1b


From e0ec633d76dcef31d8a15a3296815e7e8ad6aa73 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sun, 3 May 2020 17:23:54 +0200
Subject: mt76: mt7663: fix target power parsing

Fix target parsing from eeprom/efuse partition for 7663 chipsets

Fixes: f40ac0f3d3c0 ("mt76: mt7615: introduce mt7663e support")
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c | 31 +++++++++++++++++++---
 drivers/net/wireless/mediatek/mt76/mt7615/eeprom.h |  4 ++-
 drivers/net/wireless/mediatek/mt76/mt7615/init.c   |  8 ++++--
 drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h |  6 ++---
 4 files changed, 40 insertions(+), 9 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c
index 7440ad13c74b..97f173343ae5 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c
@@ -156,12 +156,37 @@ static void mt7615_eeprom_parse_hw_cap(struct mt7615_dev *dev)
 	dev->phy.chainmask = dev->chainmask;
 }
 
-int mt7615_eeprom_get_power_index(struct mt7615_dev *dev,
-				  struct ieee80211_channel *chan,
-				  u8 chain_idx)
+static int mt7663_eeprom_get_target_power_index(struct mt7615_dev *dev,
+						struct ieee80211_channel *chan,
+						u8 chain_idx)
+{
+	int index, group;
+
+	if (chain_idx > 1)
+		return -EINVAL;
+
+	if (chan->band == NL80211_BAND_2GHZ)
+		return MT7663_EE_TX0_2G_TARGET_POWER + (chain_idx << 4);
+
+	group = mt7615_get_channel_group(chan->hw_value);
+	if (chain_idx == 1)
+		index = MT7663_EE_TX1_5G_G0_TARGET_POWER;
+	else
+		index = MT7663_EE_TX0_5G_G0_TARGET_POWER;
+
+	return index + group * 3;
+}
+
+int mt7615_eeprom_get_target_power_index(struct mt7615_dev *dev,
+					 struct ieee80211_channel *chan,
+					 u8 chain_idx)
 {
 	int index;
 
+	if (is_mt7663(&dev->mt76))
+		return mt7663_eeprom_get_target_power_index(dev, chan,
+							    chain_idx);
+
 	if (chain_idx > 3)
 		return -EINVAL;
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.h b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.h
index aad82b600c63..a497f04b5e31 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.h
@@ -34,12 +34,14 @@ enum mt7615_eeprom_field {
 	MT_EE_TX1_5G_G0_TARGET_POWER =		0x098,
 	MT_EE_2G_RATE_POWER =			0x0be,
 	MT_EE_5G_RATE_POWER =			0x0d5,
+	MT7663_EE_TX0_2G_TARGET_POWER =		0x0e3,
 	MT_EE_EXT_PA_2G_TARGET_POWER =		0x0f2,
 	MT_EE_EXT_PA_5G_TARGET_POWER =		0x0f3,
-	MT7663_EE_TX0_2G_TARGET_POWER =		0x123,
 	MT_EE_TX2_5G_G0_TARGET_POWER =		0x142,
 	MT_EE_TX3_5G_G0_TARGET_POWER =		0x16a,
 	MT7663_EE_HW_CONF1 =			0x1b0,
+	MT7663_EE_TX0_5G_G0_TARGET_POWER =	0x245,
+	MT7663_EE_TX1_5G_G0_TARGET_POWER =	0x2b5,
 
 	MT7615_EE_MAX =				0x3bf,
 	MT7622_EE_MAX =				0x3db,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
index d831d647d237..c6c1701e9e4d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
@@ -237,7 +237,11 @@ void mt7615_init_txpower(struct mt7615_dev *dev,
 	    (MT_EE_RATE_POWER_EN | MT_EE_RATE_POWER_SIGN))
 		delta += rate_val & MT_EE_RATE_POWER_MASK;
 
-	target_chains = mt7615_ext_pa_enabled(dev, band) ? 1 : n_chains;
+	if (!is_mt7663(&dev->mt76) && mt7615_ext_pa_enabled(dev, band))
+		target_chains = 1;
+	else
+		target_chains = n_chains;
+
 	for (i = 0; i < sband->n_channels; i++) {
 		struct ieee80211_channel *chan = &sband->channels[i];
 		u8 target_power = 0;
@@ -246,7 +250,7 @@ void mt7615_init_txpower(struct mt7615_dev *dev,
 		for (j = 0; j < target_chains; j++) {
 			int index;
 
-			index = mt7615_eeprom_get_power_index(dev, chan, j);
+			index = mt7615_eeprom_get_target_power_index(dev, chan, j);
 			if (index < 0)
 				continue;
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index d3a83f3ed54e..d28e6380b338 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -377,9 +377,9 @@ void mt7615_unregister_device(struct mt7615_dev *dev);
 int mt7615_register_ext_phy(struct mt7615_dev *dev);
 void mt7615_unregister_ext_phy(struct mt7615_dev *dev);
 int mt7615_eeprom_init(struct mt7615_dev *dev, u32 addr);
-int mt7615_eeprom_get_power_index(struct mt7615_dev *dev,
-				  struct ieee80211_channel *chan,
-				  u8 chain_idx);
+int mt7615_eeprom_get_target_power_index(struct mt7615_dev *dev,
+					 struct ieee80211_channel *chan,
+					 u8 chain_idx);
 int mt7615_wait_pdma_busy(struct mt7615_dev *dev);
 int mt7615_dma_init(struct mt7615_dev *dev);
 void mt7615_dma_cleanup(struct mt7615_dev *dev);
-- 
cgit v1.2.3-59-g8ed1b


From c88bf52b15872d960d5a0c30f0bac6351fb295df Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sun, 3 May 2020 17:30:53 +0200
Subject: mt76: mt7615: fix delta tx power for mt7663

Fix mt7663 eeprom definitions for delta tx power parsing

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c | 17 +++++++++++++++++
 drivers/net/wireless/mediatek/mt76/mt7615/eeprom.h |  1 +
 drivers/net/wireless/mediatek/mt76/mt7615/init.c   | 10 +++-------
 drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h |  2 ++
 4 files changed, 23 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c
index 97f173343ae5..6a5ae047c63b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c
@@ -225,6 +225,23 @@ int mt7615_eeprom_get_target_power_index(struct mt7615_dev *dev,
 	return index;
 }
 
+int mt7615_eeprom_get_power_delta_index(struct mt7615_dev *dev,
+					enum nl80211_band band)
+{
+	/* assume the first rate has the highest power offset */
+	if (is_mt7663(&dev->mt76)) {
+		if (band == NL80211_BAND_2GHZ)
+			return MT_EE_TX0_5G_G0_TARGET_POWER;
+		else
+			return MT7663_EE_5G_RATE_POWER;
+	}
+
+	if (band == NL80211_BAND_2GHZ)
+		return MT_EE_2G_RATE_POWER;
+	else
+		return MT_EE_5G_RATE_POWER;
+}
+
 static void mt7615_apply_cal_free_data(struct mt7615_dev *dev)
 {
 	static const u16 ical[] = {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.h b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.h
index a497f04b5e31..40fed7adc58a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.h
@@ -31,6 +31,7 @@ enum mt7615_eeprom_field {
 	MT_EE_CALDATA_FLASH =			0x052,
 	MT_EE_TX0_2G_TARGET_POWER =		0x058,
 	MT_EE_TX0_5G_G0_TARGET_POWER =		0x070,
+	MT7663_EE_5G_RATE_POWER =		0x089,
 	MT_EE_TX1_5G_G0_TARGET_POWER =		0x098,
 	MT_EE_2G_RATE_POWER =			0x0be,
 	MT_EE_5G_RATE_POWER =			0x0d5,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
index c6c1701e9e4d..1d49d65d1acd 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
@@ -222,17 +222,13 @@ void mt7615_init_txpower(struct mt7615_dev *dev,
 			 struct ieee80211_supported_band *sband)
 {
 	int i, n_chains = hweight8(dev->mphy.antenna_mask), target_chains;
+	int delta_idx, delta = mt76_tx_power_nss_delta(n_chains);
 	u8 *eep = (u8 *)dev->mt76.eeprom.data;
 	enum nl80211_band band = sband->band;
-	int delta = mt76_tx_power_nss_delta(n_chains);
 	u8 rate_val;
 
-	/* assume the first rate has the highest power offset */
-	if (band == NL80211_BAND_2GHZ)
-		rate_val = eep[MT_EE_2G_RATE_POWER];
-	else
-		rate_val = eep[MT_EE_5G_RATE_POWER];
-
+	delta_idx = mt7615_eeprom_get_power_delta_index(dev, band);
+	rate_val = eep[delta_idx];
 	if ((rate_val & ~MT_EE_RATE_POWER_MASK) ==
 	    (MT_EE_RATE_POWER_EN | MT_EE_RATE_POWER_SIGN))
 		delta += rate_val & MT_EE_RATE_POWER_MASK;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index d28e6380b338..dc60abb0a130 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -380,6 +380,8 @@ int mt7615_eeprom_init(struct mt7615_dev *dev, u32 addr);
 int mt7615_eeprom_get_target_power_index(struct mt7615_dev *dev,
 					 struct ieee80211_channel *chan,
 					 u8 chain_idx);
+int mt7615_eeprom_get_power_delta_index(struct mt7615_dev *dev,
+					enum nl80211_band band);
 int mt7615_wait_pdma_busy(struct mt7615_dev *dev);
 int mt7615_dma_init(struct mt7615_dev *dev);
 void mt7615_dma_cleanup(struct mt7615_dev *dev);
-- 
cgit v1.2.3-59-g8ed1b


From a72ad451e7049c18211f91bf12c7784a32ecbe1b Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Wed, 6 May 2020 11:53:35 +0200
Subject: mt76: mt7663: introduce WoW with net detect support

Introduce WoW with net detect support

Co-developed-by: Wan-Feng Jiang <Wan-Feng.Jiang@mediatek.com>
Signed-off-by: Wan-Feng Jiang <Wan-Feng.Jiang@mediatek.com>
Co-developed-by: Soul Huang <Soul.Huang@mediatek.com>
Signed-off-by: Soul Huang <Soul.Huang@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Co-developed-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c | 13 ++++++++++---
 1 file changed, 10 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 7adf0f7e7e2b..2775238b36ca 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -1882,10 +1882,11 @@ mt7615_mcu_send_ram_firmware(struct mt7615_dev *dev,
 
 static const struct wiphy_wowlan_support mt7615_wowlan_support = {
 	.flags = WIPHY_WOWLAN_MAGIC_PKT | WIPHY_WOWLAN_DISCONNECT |
-		 WIPHY_WOWLAN_SUPPORTS_GTK_REKEY,
+		 WIPHY_WOWLAN_SUPPORTS_GTK_REKEY | WIPHY_WOWLAN_NET_DETECT,
 	.n_patterns = 1,
 	.pattern_min_len = 1,
 	.pattern_max_len = MT7615_WOW_PATTEN_MAX_LEN,
+	.max_nd_match_sets = 10,
 };
 
 static int mt7615_load_n9(struct mt7615_dev *dev, const char *name)
@@ -3316,10 +3317,11 @@ mt7615_mcu_set_bss_pm(struct mt7615_dev *dev, struct ieee80211_vif *vif,
 }
 
 static int
-mt7615_mcu_set_wow_ctrl(struct mt7615_dev *dev, struct ieee80211_vif *vif,
+mt7615_mcu_set_wow_ctrl(struct mt7615_phy *phy, struct ieee80211_vif *vif,
 			bool suspend, struct cfg80211_wowlan *wowlan)
 {
 	struct mt7615_vif *mvif = (struct mt7615_vif *)vif->drv_priv;
+	struct mt7615_dev *dev = phy->dev;
 	struct {
 		struct {
 			u8 bss_idx;
@@ -3341,6 +3343,11 @@ mt7615_mcu_set_wow_ctrl(struct mt7615_dev *dev, struct ieee80211_vif *vif,
 		req.wow_ctrl_tlv.trigger |= BIT(0);
 	if (wowlan->disconnect)
 		req.wow_ctrl_tlv.trigger |= BIT(2);
+	if (wowlan->nd_config) {
+		mt7615_mcu_sched_scan_req(phy, vif, wowlan->nd_config);
+		req.wow_ctrl_tlv.trigger |= BIT(5);
+	}
+	mt7615_mcu_sched_scan_enable(phy, vif, suspend);
 
 	if (mt76_is_mmio(&dev->mt76))
 		req.wow_ctrl_tlv.wakeup_hif = 2;
@@ -3461,7 +3468,7 @@ void mt7615_mcu_set_suspend_iter(void *priv, u8 *mac,
 	for (i = 0; i < wowlan->n_patterns; i++)
 		mt7615_mcu_set_wow_pattern(phy->dev, vif, i, suspend,
 					   &wowlan->patterns[i]);
-	mt7615_mcu_set_wow_ctrl(phy->dev, vif, suspend, wowlan);
+	mt7615_mcu_set_wow_ctrl(phy, vif, suspend, wowlan);
 }
 
 static void
-- 
cgit v1.2.3-59-g8ed1b


From 5fdba8a77be6e7814698d5ac068cc31775b9930d Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Wed, 6 May 2020 11:55:42 +0200
Subject: mt76: mt7663: add support to sched scan with randomise addr

Add support to sched scan with randomise addr

Co-developed-by: Wan-Feng Jiang <Wan-Feng.Jiang@mediatek.com>
Signed-off-by: Wan-Feng Jiang <Wan-Feng.Jiang@mediatek.com>
Co-developed-by: Soul Huang <Soul.Huang@mediatek.com>
Signed-off-by: Soul Huang <Soul.Huang@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Co-developed-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/init.c | 3 ++-
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c  | 7 ++++++-
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.h  | 5 +++--
 3 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
index 1d49d65d1acd..37fc70197f92 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
@@ -139,7 +139,8 @@ void mt7615_check_offload_capability(struct mt7615_dev *dev)
 		ieee80211_hw_set(hw, SUPPORTS_PS);
 		ieee80211_hw_set(hw, SUPPORTS_DYNAMIC_PS);
 
-		wiphy->features |= NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR;
+		wiphy->features |= NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR |
+				   NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR;
 	} else {
 		dev->ops->hw_scan = NULL;
 		dev->ops->cancel_hw_scan = NULL;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 2775238b36ca..19b59a7550b6 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -2907,7 +2907,12 @@ int mt7615_mcu_sched_scan_req(struct mt7615_phy *phy,
 	req = (struct mt7615_sched_scan_req *)skb_put(skb, sizeof(*req));
 	req->version = 1;
 	req->seq_num = mvif->scan_seq_num | ext_phy << 7;
-	req->scan_func = !!(sreq->flags & NL80211_SCAN_FLAG_RANDOM_ADDR);
+
+	if (sreq->flags & NL80211_SCAN_FLAG_RANDOM_ADDR) {
+		get_random_mask_addr(req->random_mac, sreq->mac_addr,
+				     sreq->mac_addr_mask);
+		req->scan_func = 1;
+	}
 
 	req->ssids_num = sreq->n_ssids;
 	for (i = 0; i < req->ssids_num; i++) {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
index 737ccec6dd96..0f12e6da89af 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
@@ -405,10 +405,11 @@ struct mt7615_sched_scan_req {
 	u8 channel_type;
 	u8 channels_num;
 	u8 intervals_num;
-	u8 scan_func;
+	u8 scan_func; /* BIT(0) eable random mac address */
 	struct mt7615_mcu_scan_channel channels[64];
 	__le16 intervals[MT7615_MAX_SCHED_SCAN_INTERVAL];
-	u8 pad2[64];
+	u8 random_mac[ETH_ALEN]; /* valid when BIT(0) in scan_func is set */
+	u8 pad2[58];
 } __packed;
 
 struct nt7615_sched_scan_done {
-- 
cgit v1.2.3-59-g8ed1b


From 1245fe6a83a1c550d86d15501d86f9b2de5f97ff Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Wed, 6 May 2020 11:58:32 +0200
Subject: mt76: mt7615: scan all channels if not specified

Configure the mcu to scan all available channels if mac80211 does not
provide any frequency list

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 19b59a7550b6..96bf39a4a3da 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -2806,7 +2806,6 @@ int mt7615_mcu_hw_scan(struct mt7615_phy *phy, struct ieee80211_vif *vif,
 	req->scan_type = sreq->n_ssids ? 1 : 0;
 	req->probe_req_num = sreq->n_ssids ? 2 : 0;
 	req->version = 1;
-	req->channel_type = 4;
 
 	for (i = 0; i < sreq->n_ssids; i++) {
 		if (!sreq->ssids[i].ssid_len)
@@ -2835,6 +2834,7 @@ int mt7615_mcu_hw_scan(struct mt7615_phy *phy, struct ieee80211_vif *vif,
 		chan->band = scan_list[i]->band == NL80211_BAND_2GHZ ? 1 : 2;
 		chan->channel_num = scan_list[i]->hw_value;
 	}
+	req->channel_type = sreq->n_channels ? 4 : 0;
 
 	if (sreq->ie_len > 0) {
 		memcpy(req->ies, sreq->ie, sreq->ie_len);
@@ -2930,7 +2930,7 @@ int mt7615_mcu_sched_scan_req(struct mt7615_phy *phy,
 		req->match[i].ssid_len = match->ssid.ssid_len;
 	}
 
-	req->channel_type = 4;
+	req->channel_type = sreq->n_channels ? 4 : 0;
 	req->channels_num = min_t(u8, sreq->n_channels, 64);
 	for (i = 0; i < req->channels_num; i++) {
 		chan = &req->channels[i];
-- 
cgit v1.2.3-59-g8ed1b


From 7c4f744d6703757be959f521a7a441bf34745d99 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Sat, 25 Apr 2020 03:32:22 +0800
Subject: mt76: avoid rx reorder buffer overflow

Enlarge slot to support 11ax 256 BA (256 MPDUs in an AMPDU)

Signed-off-by: Chih-Min Chen <chih-min.chen@mediatek.com>
Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/agg-rx.c | 8 ++++----
 drivers/net/wireless/mediatek/mt76/mt76.h   | 6 +++---
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/agg-rx.c b/drivers/net/wireless/mediatek/mt76/agg-rx.c
index f77f03530259..acdbe6f8248d 100644
--- a/drivers/net/wireless/mediatek/mt76/agg-rx.c
+++ b/drivers/net/wireless/mediatek/mt76/agg-rx.c
@@ -152,8 +152,8 @@ void mt76_rx_aggr_reorder(struct sk_buff *skb, struct sk_buff_head *frames)
 	struct ieee80211_sta *sta;
 	struct mt76_rx_tid *tid;
 	bool sn_less;
-	u16 seqno, head, size;
-	u8 ackp, idx;
+	u16 seqno, head, size, idx;
+	u8 ackp;
 
 	__skb_queue_tail(frames, skb);
 
@@ -239,7 +239,7 @@ out:
 }
 
 int mt76_rx_aggr_start(struct mt76_dev *dev, struct mt76_wcid *wcid, u8 tidno,
-		       u16 ssn, u8 size)
+		       u16 ssn, u16 size)
 {
 	struct mt76_rx_tid *tid;
 
@@ -264,7 +264,7 @@ EXPORT_SYMBOL_GPL(mt76_rx_aggr_start);
 
 static void mt76_rx_aggr_shutdown(struct mt76_dev *dev, struct mt76_rx_tid *tid)
 {
-	u8 size = tid->size;
+	u16 size = tid->size;
 	int i;
 
 	spin_lock_bh(&tid->lock);
diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index 728a2fb8b14d..67cab570bb71 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -244,8 +244,8 @@ struct mt76_rx_tid {
 	struct delayed_work reorder_work;
 
 	u16 head;
-	u8 size;
-	u8 nframes;
+	u16 size;
+	u16 nframes;
 
 	u8 num;
 
@@ -798,7 +798,7 @@ int mt76_get_survey(struct ieee80211_hw *hw, int idx,
 void mt76_set_stream_caps(struct mt76_dev *dev, bool vht);
 
 int mt76_rx_aggr_start(struct mt76_dev *dev, struct mt76_wcid *wcid, u8 tid,
-		       u16 ssn, u8 size);
+		       u16 ssn, u16 size);
 void mt76_rx_aggr_stop(struct mt76_dev *dev, struct mt76_wcid *wcid, u8 tid);
 
 void mt76_wcid_key_setup(struct mt76_dev *dev, struct mt76_wcid *wcid,
-- 
cgit v1.2.3-59-g8ed1b


From af4a2f2fdd6fe4f6ffc61eec84da999dbff37d3c Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Sat, 25 Apr 2020 03:32:23 +0800
Subject: mt76: add support for HE RX rate reporting

Add support for encoding and reporting HE RX rates.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mac80211.c | 4 +++-
 drivers/net/wireless/mediatek/mt76/mt76.h     | 3 ++-
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c
index 176c22a5319a..f13114b07b6e 100644
--- a/drivers/net/wireless/mediatek/mt76/mac80211.c
+++ b/drivers/net/wireless/mediatek/mt76/mac80211.c
@@ -676,7 +676,6 @@ mt76_rx_convert(struct mt76_dev *dev, struct sk_buff *skb,
 		struct ieee80211_hw **hw,
 		struct ieee80211_sta **sta)
 {
-
 	struct ieee80211_rx_status *status = IEEE80211_SKB_RXCB(skb);
 	struct mt76_rx_status mstat;
 
@@ -688,6 +687,9 @@ mt76_rx_convert(struct mt76_dev *dev, struct sk_buff *skb,
 	status->enc_flags = mstat.enc_flags;
 	status->encoding = mstat.encoding;
 	status->bw = mstat.bw;
+	status->he_ru = mstat.he_ru;
+	status->he_gi = mstat.he_gi;
+	status->he_dcm = mstat.he_dcm;
 	status->rate_idx = mstat.rate_idx;
 	status->nss = mstat.nss;
 	status->band = mstat.band;
diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index 67cab570bb71..083d87462533 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -462,7 +462,8 @@ struct mt76_rx_status {
 	u16 freq;
 	u32 flag;
 	u8 enc_flags;
-	u8 encoding:2, bw:3;
+	u8 encoding:2, bw:3, he_ru:3;
+	u8 he_gi:2, he_dcm:1;
 	u8 rate_idx;
 	u8 nss;
 	u8 band;
-- 
cgit v1.2.3-59-g8ed1b


From 77ae1d5e13eb51651899fbfb6d7a34bc5ee7d4af Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Sat, 25 Apr 2020 03:32:24 +0800
Subject: mt76: add Rx stats support for radiotap

HE deivces need to add Rx radiotap header.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/agg-rx.c   |  4 ++--
 drivers/net/wireless/mediatek/mt76/mac80211.c |  6 +++---
 drivers/net/wireless/mediatek/mt76/mt76.h     | 19 +++++++++++++++++++
 3 files changed, 24 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/agg-rx.c b/drivers/net/wireless/mediatek/mt76/agg-rx.c
index acdbe6f8248d..df25c00d9e06 100644
--- a/drivers/net/wireless/mediatek/mt76/agg-rx.c
+++ b/drivers/net/wireless/mediatek/mt76/agg-rx.c
@@ -119,7 +119,7 @@ static void
 mt76_rx_aggr_check_ctl(struct sk_buff *skb, struct sk_buff_head *frames)
 {
 	struct mt76_rx_status *status = (struct mt76_rx_status *)skb->cb;
-	struct ieee80211_bar *bar = (struct ieee80211_bar *)skb->data;
+	struct ieee80211_bar *bar = mt76_skb_get_hdr(skb);
 	struct mt76_wcid *wcid = status->wcid;
 	struct mt76_rx_tid *tid;
 	u16 seqno;
@@ -147,7 +147,7 @@ mt76_rx_aggr_check_ctl(struct sk_buff *skb, struct sk_buff_head *frames)
 void mt76_rx_aggr_reorder(struct sk_buff *skb, struct sk_buff_head *frames)
 {
 	struct mt76_rx_status *status = (struct mt76_rx_status *)skb->cb;
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+	struct ieee80211_hdr *hdr = mt76_skb_get_hdr(skb);
 	struct mt76_wcid *wcid = status->wcid;
 	struct ieee80211_sta *sta;
 	struct mt76_rx_tid *tid;
diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c
index f13114b07b6e..f65e1b3e8f7a 100644
--- a/drivers/net/wireless/mediatek/mt76/mac80211.c
+++ b/drivers/net/wireless/mediatek/mt76/mac80211.c
@@ -726,7 +726,7 @@ mt76_check_ccmp_pn(struct sk_buff *skb)
 		 * Validate the first fragment both here and in mac80211
 		 * All further fragments will be validated by mac80211 only.
 		 */
-		hdr = (struct ieee80211_hdr *)skb->data;
+		hdr = mt76_skb_get_hdr(skb);
 		if (ieee80211_is_frag(hdr) &&
 		    !ieee80211_is_first_frag(hdr->frame_control))
 			return 0;
@@ -799,7 +799,7 @@ mt76_airtime_flush_ampdu(struct mt76_dev *dev)
 static void
 mt76_airtime_check(struct mt76_dev *dev, struct sk_buff *skb)
 {
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+	struct ieee80211_hdr *hdr = mt76_skb_get_hdr(skb);
 	struct mt76_rx_status *status = (struct mt76_rx_status *)skb->cb;
 	struct mt76_wcid *wcid = status->wcid;
 
@@ -836,7 +836,7 @@ static void
 mt76_check_sta(struct mt76_dev *dev, struct sk_buff *skb)
 {
 	struct mt76_rx_status *status = (struct mt76_rx_status *)skb->cb;
-	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+	struct ieee80211_hdr *hdr = mt76_skb_get_hdr(skb);
 	struct ieee80211_sta *sta;
 	struct ieee80211_hw *hw;
 	struct mt76_wcid *wcid = status->wcid;
diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index 083d87462533..38c91db2dea7 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -746,6 +746,25 @@ static inline struct mt76_tx_cb *mt76_tx_skb_cb(struct sk_buff *skb)
 	return ((void *)IEEE80211_SKB_CB(skb)->status.status_driver_data);
 }
 
+static inline void *mt76_skb_get_hdr(struct sk_buff *skb)
+{
+	struct mt76_rx_status mstat;
+	u8 *data = skb->data;
+
+	/* Alignment concerns */
+	BUILD_BUG_ON(sizeof(struct ieee80211_radiotap_he) % 4);
+	BUILD_BUG_ON(sizeof(struct ieee80211_radiotap_he_mu) % 4);
+
+	mstat = *((struct mt76_rx_status *)skb->cb);
+
+	if (mstat.flag & RX_FLAG_RADIOTAP_HE)
+		data += sizeof(struct ieee80211_radiotap_he);
+	if (mstat.flag & RX_FLAG_RADIOTAP_HE_MU)
+		data += sizeof(struct ieee80211_radiotap_he_mu);
+
+	return data;
+}
+
 static inline void mt76_insert_hdr_pad(struct sk_buff *skb)
 {
 	int len = ieee80211_get_hdrlen_from_skb(skb);
-- 
cgit v1.2.3-59-g8ed1b


From 49e649c3e0a6ec8a12976e331a2c1f29dc7dd3a9 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Sat, 25 Apr 2020 03:32:25 +0800
Subject: mt76: adjust wcid size to support new 802.11ax generation

The newer 802.11ax devices (i.e. MT7915E) can connect to much more
peers than previous generations.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt76.h | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index 38c91db2dea7..25a7cda90766 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -181,7 +181,7 @@ enum mt76_wcid_flags {
 	MT_WCID_FLAG_PS,
 };
 
-#define MT76_N_WCIDS 128
+#define MT76_N_WCIDS 288
 
 /* stored in ieee80211_tx_info::hw_queue */
 #define MT_TX_HW_QUEUE_EXT_PHY		BIT(3)
@@ -201,7 +201,7 @@ struct mt76_wcid {
 	struct ewma_signal rssi;
 	int inactive_count;
 
-	u8 idx;
+	u16 idx;
 	u8 hw_key_idx;
 
 	u8 sta:1;
@@ -268,7 +268,7 @@ struct mt76_rx_tid {
 
 struct mt76_tx_cb {
 	unsigned long jiffies;
-	u8 wcid;
+	u16 wcid;
 	u8 pktid;
 	u8 flags;
 };
@@ -445,7 +445,7 @@ struct mt76_mmio {
 struct mt76_rx_status {
 	union {
 		struct mt76_wcid *wcid;
-		u8 wcid_idx;
+		u16 wcid_idx;
 	};
 
 	unsigned long reorder_time;
@@ -622,7 +622,7 @@ enum mt76_phy_type {
 #define mt76_hw(dev) (dev)->mphy.hw
 
 static inline struct ieee80211_hw *
-mt76_wcid_hw(struct mt76_dev *dev, u8 wcid)
+mt76_wcid_hw(struct mt76_dev *dev, u16 wcid)
 {
 	if (wcid <= MT76_N_WCIDS &&
 	    mt76_wcid_mask_test(dev->wcid_phy_mask, wcid))
-- 
cgit v1.2.3-59-g8ed1b


From d3377b78cec6eb32241a2ac3dc0c43a0bf71129a Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Sat, 25 Apr 2020 03:32:26 +0800
Subject: mt76: add HE phy modes and hardware queue

This is a preliminary patch to support 11ax deivces.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt76.h | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index 25a7cda90766..88959c1d5d1e 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -60,6 +60,7 @@ enum mt76_txq_id {
 	MT_TXQ_BK = IEEE80211_AC_BK,
 	MT_TXQ_PSD,
 	MT_TXQ_MCU,
+	MT_TXQ_MCU_WA,
 	MT_TXQ_BEACON,
 	MT_TXQ_CAB,
 	MT_TXQ_FWDL,
@@ -69,6 +70,7 @@ enum mt76_txq_id {
 enum mt76_rxq_id {
 	MT_RXQ_MAIN,
 	MT_RXQ_MCU,
+	MT_RXQ_MCU_WA,
 	__MT_RXQ_MAX
 };
 
@@ -581,6 +583,10 @@ enum mt76_phy_type {
 	MT_PHY_TYPE_HT,
 	MT_PHY_TYPE_HT_GF,
 	MT_PHY_TYPE_VHT,
+	MT_PHY_TYPE_HE_SU = 8,
+	MT_PHY_TYPE_HE_EXT_SU,
+	MT_PHY_TYPE_HE_TB,
+	MT_PHY_TYPE_HE_MU,
 };
 
 #define __mt76_rr(dev, ...)	(dev)->bus->rr((dev), __VA_ARGS__)
-- 
cgit v1.2.3-59-g8ed1b


From e57b7901469fc0b021930b83a8094baaf3d81b09 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Sat, 25 Apr 2020 03:32:27 +0800
Subject: mt76: add mac80211 driver for MT7915 PCIe-based chipsets

Add support for the MediaTek latest generation IEEE 802.11ax 4x4
device MT7915E, which supports concurrent dual-band operation at
both 5GHz and 2.4GHz.

Note that this patch just add basic part and will add more HE
capabilities support in the further patches.

The driver supports AP, Station, Mesh and monitor mode.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Shayne Chen <shayne.chen@mediatek.com>
Signed-off-by: Chih-Min Chen <chih-min.chen@mediatek.com>
Suggested-by: Shihwei Lin <shihwei.lin@mediatek.com>
Tested-by: Evelyn Tsai <evelyn.tsai@mediatek.com>
Acked-by: Yiwei Chung <yiwei.chung@mediatek.com>
Acked-by: YF Luo <yf.luo@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/Kconfig         |    1 +
 drivers/net/wireless/mediatek/mt76/Makefile        |    1 +
 drivers/net/wireless/mediatek/mt76/mt7915/Kconfig  |   13 +
 drivers/net/wireless/mediatek/mt76/mt7915/Makefile |    6 +
 .../net/wireless/mediatek/mt76/mt7915/debugfs.c    |  245 +++
 drivers/net/wireless/mediatek/mt76/mt7915/dma.c    |  285 +++
 drivers/net/wireless/mediatek/mt76/mt7915/eeprom.c |  125 ++
 drivers/net/wireless/mediatek/mt76/mt7915/eeprom.h |   78 +
 drivers/net/wireless/mediatek/mt76/mt7915/init.c   |  395 ++++
 drivers/net/wireless/mediatek/mt76/mt7915/mac.c    | 1298 +++++++++++
 drivers/net/wireless/mediatek/mt76/mt7915/mac.h    |  328 +++
 drivers/net/wireless/mediatek/mt76/mt7915/main.c   |  741 +++++++
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.c    | 2313 ++++++++++++++++++++
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.h    |  837 +++++++
 drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h |  441 ++++
 drivers/net/wireless/mediatek/mt76/mt7915/pci.c    |  166 ++
 drivers/net/wireless/mediatek/mt76/mt7915/regs.h   |  343 +++
 17 files changed, 7616 insertions(+)
 create mode 100644 drivers/net/wireless/mediatek/mt76/mt7915/Kconfig
 create mode 100644 drivers/net/wireless/mediatek/mt76/mt7915/Makefile
 create mode 100644 drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
 create mode 100644 drivers/net/wireless/mediatek/mt76/mt7915/dma.c
 create mode 100644 drivers/net/wireless/mediatek/mt76/mt7915/eeprom.c
 create mode 100644 drivers/net/wireless/mediatek/mt76/mt7915/eeprom.h
 create mode 100644 drivers/net/wireless/mediatek/mt76/mt7915/init.c
 create mode 100644 drivers/net/wireless/mediatek/mt76/mt7915/mac.c
 create mode 100644 drivers/net/wireless/mediatek/mt76/mt7915/mac.h
 create mode 100644 drivers/net/wireless/mediatek/mt76/mt7915/main.c
 create mode 100644 drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
 create mode 100644 drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
 create mode 100644 drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
 create mode 100644 drivers/net/wireless/mediatek/mt76/mt7915/pci.c
 create mode 100644 drivers/net/wireless/mediatek/mt76/mt7915/regs.h

diff --git a/drivers/net/wireless/mediatek/mt76/Kconfig b/drivers/net/wireless/mediatek/mt76/Kconfig
index cbc2d8a5d354..41533a0e1720 100644
--- a/drivers/net/wireless/mediatek/mt76/Kconfig
+++ b/drivers/net/wireless/mediatek/mt76/Kconfig
@@ -24,3 +24,4 @@ source "drivers/net/wireless/mediatek/mt76/mt76x0/Kconfig"
 source "drivers/net/wireless/mediatek/mt76/mt76x2/Kconfig"
 source "drivers/net/wireless/mediatek/mt76/mt7603/Kconfig"
 source "drivers/net/wireless/mediatek/mt76/mt7615/Kconfig"
+source "drivers/net/wireless/mediatek/mt76/mt7915/Kconfig"
diff --git a/drivers/net/wireless/mediatek/mt76/Makefile b/drivers/net/wireless/mediatek/mt76/Makefile
index a1dfafec431b..ef663b873b0b 100644
--- a/drivers/net/wireless/mediatek/mt76/Makefile
+++ b/drivers/net/wireless/mediatek/mt76/Makefile
@@ -27,3 +27,4 @@ obj-$(CONFIG_MT76x0_COMMON) += mt76x0/
 obj-$(CONFIG_MT76x2_COMMON) += mt76x2/
 obj-$(CONFIG_MT7603E) += mt7603/
 obj-$(CONFIG_MT7615_COMMON) += mt7615/
+obj-$(CONFIG_MT7915E) += mt7915/
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/Kconfig b/drivers/net/wireless/mediatek/mt76/mt7915/Kconfig
new file mode 100644
index 000000000000..d98225da694c
--- /dev/null
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/Kconfig
@@ -0,0 +1,13 @@
+# SPDX-License-Identifier: ISC
+config MT7915E
+	tristate "MediaTek MT7915E (PCIe) support"
+	select MT76_CORE
+	depends on MAC80211
+	depends on PCI
+	help
+	  This adds support for MT7915-based wireless PCIe devices,
+	  which support concurrent dual-band operation at both 5GHz
+	  and 2.4GHz IEEE 802.11ax 4x4:4SS 1024-QAM, 160MHz channels,
+	  OFDMA, spatial reuse and dual carrier modulation.
+
+	  To compile this driver as a module, choose M here.
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/Makefile b/drivers/net/wireless/mediatek/mt76/mt7915/Makefile
new file mode 100644
index 000000000000..57fe726cc38b
--- /dev/null
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/Makefile
@@ -0,0 +1,6 @@
+#SPDX-License-Identifier: ISC
+
+obj-$(CONFIG_MT7915E) += mt7915e.o
+
+mt7915e-y := pci.o init.o dma.o eeprom.o main.o mcu.o mac.o \
+	     debugfs.o
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
new file mode 100644
index 000000000000..152ae0617f3d
--- /dev/null
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
@@ -0,0 +1,245 @@
+// SPDX-License-Identifier: ISC
+/* Copyright (C) 2020 MediaTek Inc. */
+
+#include "mt7915.h"
+#include "eeprom.h"
+
+/** global debugfs **/
+
+/* test knob of system layer 1/2 error recovery */
+static int mt7915_ser_trigger_set(void *data, u64 val)
+{
+	enum {
+		SER_SET_RECOVER_L1 = 1,
+		SER_SET_RECOVER_L2,
+		SER_ENABLE = 2,
+		SER_RECOVER
+	};
+	struct mt7915_dev *dev = data;
+	int ret = 0;
+
+	switch (val) {
+	case SER_SET_RECOVER_L1:
+	case SER_SET_RECOVER_L2:
+		/* fall through */
+		ret = mt7915_mcu_set_ser(dev, SER_ENABLE, BIT(val), 0);
+		if (ret)
+			return ret;
+
+		return mt7915_mcu_set_ser(dev, SER_RECOVER, val, 0);
+	default:
+		break;
+	}
+
+	return ret;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(fops_ser_trigger, NULL,
+			 mt7915_ser_trigger_set, "%lld\n");
+
+static int
+mt7915_radar_trigger(void *data, u64 val)
+{
+	struct mt7915_dev *dev = data;
+
+	return mt7915_mcu_rdd_cmd(dev, RDD_RADAR_EMULATE, 1, 0, 0);
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(fops_radar_trigger, NULL,
+			 mt7915_radar_trigger, "%lld\n");
+
+static int
+mt7915_dbdc_set(void *data, u64 val)
+{
+	struct mt7915_dev *dev = data;
+
+	if (val)
+		mt7915_register_ext_phy(dev);
+	else
+		mt7915_unregister_ext_phy(dev);
+
+	return 0;
+}
+
+static int
+mt7915_dbdc_get(void *data, u64 *val)
+{
+	struct mt7915_dev *dev = data;
+
+	*val = !!mt7915_ext_phy(dev);
+
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(fops_dbdc, mt7915_dbdc_get,
+			 mt7915_dbdc_set, "%lld\n");
+
+static void
+mt7915_ampdu_stat_read_phy(struct mt7915_phy *phy,
+			   struct seq_file *file)
+{
+	struct mt7915_dev *dev = file->private;
+	bool ext_phy = phy != &dev->phy;
+	int bound[15], range[4], i, n;
+
+	if (!phy)
+		return;
+
+	/* Tx ampdu stat */
+	for (i = 0; i < ARRAY_SIZE(range); i++)
+		range[i] = mt76_rr(dev, MT_MIB_ARNG(ext_phy, i));
+
+	for (i = 0; i < ARRAY_SIZE(bound); i++)
+		bound[i] = MT_MIB_ARNCR_RANGE(range[i / 4], i) + 1;
+
+	seq_printf(file, "\nPhy %d\n", ext_phy);
+
+	seq_printf(file, "Length: %8d | ", bound[0]);
+	for (i = 0; i < ARRAY_SIZE(bound) - 1; i++)
+		seq_printf(file, "%3d -%3d | ",
+			   bound[i] + 1, bound[i + 1]);
+
+	seq_puts(file, "\nCount:  ");
+	n = ext_phy ? ARRAY_SIZE(dev->mt76.aggr_stats) / 2 : 0;
+	for (i = 0; i < ARRAY_SIZE(bound); i++)
+		seq_printf(file, "%8d | ", dev->mt76.aggr_stats[i + n]);
+	seq_puts(file, "\n");
+
+	seq_printf(file, "BA miss count: %d\n", phy->mib.ba_miss_cnt);
+}
+
+static int
+mt7915_tx_stats_read(struct seq_file *file, void *data)
+{
+	struct mt7915_dev *dev = file->private;
+	int stat[8], i, n;
+
+	mt7915_ampdu_stat_read_phy(&dev->phy, file);
+	mt7915_ampdu_stat_read_phy(mt7915_ext_phy(dev), file);
+
+	/* Tx amsdu info */
+	seq_puts(file, "Tx MSDU stat:\n");
+	for (i = 0, n = 0; i < ARRAY_SIZE(stat); i++) {
+		stat[i] = mt76_rr(dev,  MT_PLE_AMSDU_PACK_MSDU_CNT(i));
+		n += stat[i];
+	}
+
+	for (i = 0; i < ARRAY_SIZE(stat); i++) {
+		seq_printf(file, "AMSDU pack count of %d MSDU in TXD: 0x%x ",
+			   i + 1, stat[i]);
+		if (n != 0)
+			seq_printf(file, "(%d%%)\n", stat[i] * 100 / n);
+		else
+			seq_puts(file, "\n");
+	}
+
+	return 0;
+}
+
+static int
+mt7915_tx_stats_open(struct inode *inode, struct file *f)
+{
+	return single_open(f, mt7915_tx_stats_read, inode->i_private);
+}
+
+static const struct file_operations fops_tx_stats = {
+	.open = mt7915_tx_stats_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+static int mt7915_read_temperature(struct seq_file *s, void *data)
+{
+	struct mt7915_dev *dev = dev_get_drvdata(s->private);
+	int temp;
+
+	/* cpu */
+	temp = mt7915_mcu_get_temperature(dev, 0);
+	seq_printf(s, "Temperature: %d\n", temp);
+
+	return 0;
+}
+
+static int
+mt7915_queues_acq(struct seq_file *s, void *data)
+{
+	struct mt7915_dev *dev = dev_get_drvdata(s->private);
+	int i;
+
+	for (i = 0; i < 16; i++) {
+		int j, acs = i / 4, index = i % 4;
+		u32 ctrl, val, qlen = 0;
+
+		val = mt76_rr(dev, MT_PLE_AC_QEMPTY(acs, index));
+		ctrl = BIT(31) | BIT(15) | (acs << 8);
+
+		for (j = 0; j < 32; j++) {
+			if (val & BIT(j))
+				continue;
+
+			mt76_wr(dev, MT_PLE_FL_Q0_CTRL,
+				ctrl | (j + (index << 5)));
+			qlen += mt76_get_field(dev, MT_PLE_FL_Q3_CTRL,
+					       GENMASK(11, 0));
+		}
+		seq_printf(s, "AC%d%d: queued=%d\n", acs, index, qlen);
+	}
+
+	return 0;
+}
+
+static int
+mt7915_queues_read(struct seq_file *s, void *data)
+{
+	struct mt7915_dev *dev = dev_get_drvdata(s->private);
+	static const struct {
+		char *queue;
+		int id;
+	} queue_map[] = {
+		{ "WFDMA0", MT_TXQ_BE },
+		{ "MCUWM", MT_TXQ_MCU },
+		{ "MCUWA", MT_TXQ_MCU_WA },
+		{ "MCUFWQ", MT_TXQ_FWDL },
+	};
+	int i;
+
+	for (i = 0; i < ARRAY_SIZE(queue_map); i++) {
+		struct mt76_sw_queue *q = &dev->mt76.q_tx[queue_map[i].id];
+
+		if (!q->q)
+			continue;
+
+		seq_printf(s,
+			   "%s:	queued=%d head=%d tail=%d\n",
+			   queue_map[i].queue, q->q->queued, q->q->head,
+			   q->q->tail);
+	}
+
+	return 0;
+}
+
+int mt7915_init_debugfs(struct mt7915_dev *dev)
+{
+	struct dentry *dir;
+
+	dir = mt76_register_debugfs(&dev->mt76);
+	if (!dir)
+		return -ENOMEM;
+
+	debugfs_create_devm_seqfile(dev->mt76.dev, "queues", dir,
+				    mt7915_queues_read);
+	debugfs_create_devm_seqfile(dev->mt76.dev, "acq", dir,
+				    mt7915_queues_acq);
+	debugfs_create_file("tx_stats", 0400, dir, dev, &fops_tx_stats);
+	debugfs_create_file("dbdc", 0600, dir, dev, &fops_dbdc);
+	debugfs_create_u32("dfs_hw_pattern", 0400, dir, &dev->hw_pattern);
+	/* test knobs */
+	debugfs_create_file("radar_trigger", 0200, dir, dev,
+			    &fops_radar_trigger);
+	debugfs_create_file("ser_trigger", 0200, dir, dev, &fops_ser_trigger);
+	debugfs_create_devm_seqfile(dev->mt76.dev, "temperature", dir,
+				    mt7915_read_temperature);
+
+	return 0;
+}
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/dma.c b/drivers/net/wireless/mediatek/mt76/mt7915/dma.c
new file mode 100644
index 000000000000..766185d1aa21
--- /dev/null
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/dma.c
@@ -0,0 +1,285 @@
+// SPDX-License-Identifier: ISC
+/* Copyright (C) 2020 MediaTek Inc. */
+
+#include "mt7915.h"
+#include "../dma.h"
+#include "mac.h"
+
+static int
+mt7915_init_tx_queues(struct mt7915_dev *dev, int n_desc)
+{
+	struct mt76_sw_queue *q;
+	struct mt76_queue *hwq;
+	int err, i;
+
+	hwq = devm_kzalloc(dev->mt76.dev, sizeof(*hwq), GFP_KERNEL);
+	if (!hwq)
+		return -ENOMEM;
+
+	err = mt76_queue_alloc(dev, hwq, MT7915_TXQ_BAND0, n_desc, 0,
+			       MT_TX_RING_BASE);
+	if (err < 0)
+		return err;
+
+	for (i = 0; i < MT_TXQ_MCU; i++) {
+		q = &dev->mt76.q_tx[i];
+		INIT_LIST_HEAD(&q->swq);
+		q->q = hwq;
+	}
+
+	return 0;
+}
+
+static int
+mt7915_init_mcu_queue(struct mt7915_dev *dev, struct mt76_sw_queue *q,
+		      int idx, int n_desc)
+{
+	struct mt76_queue *hwq;
+	int err;
+
+	hwq = devm_kzalloc(dev->mt76.dev, sizeof(*hwq), GFP_KERNEL);
+	if (!hwq)
+		return -ENOMEM;
+
+	err = mt76_queue_alloc(dev, hwq, idx, n_desc, 0, MT_TX_RING_BASE);
+	if (err < 0)
+		return err;
+
+	INIT_LIST_HEAD(&q->swq);
+	q->q = hwq;
+
+	return 0;
+}
+
+void mt7915_queue_rx_skb(struct mt76_dev *mdev, enum mt76_rxq_id q,
+			 struct sk_buff *skb)
+{
+	struct mt7915_dev *dev = container_of(mdev, struct mt7915_dev, mt76);
+	__le32 *rxd = (__le32 *)skb->data;
+	enum rx_pkt_type type;
+
+	type = FIELD_GET(MT_RXD0_PKT_TYPE, le32_to_cpu(rxd[0]));
+
+	switch (type) {
+	case PKT_TYPE_TXRX_NOTIFY:
+		mt7915_mac_tx_free(dev, skb);
+		break;
+	case PKT_TYPE_RX_EVENT:
+		mt7915_mcu_rx_event(dev, skb);
+		break;
+	case PKT_TYPE_NORMAL:
+		if (!mt7915_mac_fill_rx(dev, skb)) {
+			mt76_rx(&dev->mt76, q, skb);
+			return;
+		}
+		/* fall through */
+	default:
+		dev_kfree_skb(skb);
+		break;
+	}
+}
+
+static int mt7915_poll_tx(struct napi_struct *napi, int budget)
+{
+	static const u8 queue_map[] = {
+		MT_TXQ_MCU,
+		MT_TXQ_MCU_WA,
+		MT_TXQ_BE
+	};
+	struct mt7915_dev *dev;
+	int i;
+
+	dev = container_of(napi, struct mt7915_dev, mt76.tx_napi);
+
+	for (i = 0; i < ARRAY_SIZE(queue_map); i++)
+		mt76_queue_tx_cleanup(dev, queue_map[i], false);
+
+	if (napi_complete_done(napi, 0))
+		mt7915_irq_enable(dev, MT_INT_TX_DONE_ALL);
+
+	for (i = 0; i < ARRAY_SIZE(queue_map); i++)
+		mt76_queue_tx_cleanup(dev, queue_map[i], false);
+
+	mt7915_mac_sta_poll(dev);
+
+	tasklet_schedule(&dev->mt76.tx_tasklet);
+
+	return 0;
+}
+
+void mt7915_dma_prefetch(struct mt7915_dev *dev)
+{
+#define PREFETCH(base, depth)	((base) << 16 | (depth))
+
+	mt76_wr(dev, MT_WFDMA0_RX_RING0_EXT_CTRL, PREFETCH(0x0, 0x4));
+	mt76_wr(dev, MT_WFDMA0_RX_RING1_EXT_CTRL, PREFETCH(0x40, 0x4));
+	mt76_wr(dev, MT_WFDMA0_RX_RING2_EXT_CTRL, PREFETCH(0x80, 0x0));
+
+	mt76_wr(dev, MT_WFDMA1_TX_RING0_EXT_CTRL, PREFETCH(0x80, 0x4));
+	mt76_wr(dev, MT_WFDMA1_TX_RING1_EXT_CTRL, PREFETCH(0xc0, 0x4));
+	mt76_wr(dev, MT_WFDMA1_TX_RING2_EXT_CTRL, PREFETCH(0x100, 0x4));
+	mt76_wr(dev, MT_WFDMA1_TX_RING3_EXT_CTRL, PREFETCH(0x140, 0x4));
+	mt76_wr(dev, MT_WFDMA1_TX_RING4_EXT_CTRL, PREFETCH(0x180, 0x4));
+	mt76_wr(dev, MT_WFDMA1_TX_RING5_EXT_CTRL, PREFETCH(0x1c0, 0x4));
+	mt76_wr(dev, MT_WFDMA1_TX_RING6_EXT_CTRL, PREFETCH(0x200, 0x4));
+	mt76_wr(dev, MT_WFDMA1_TX_RING7_EXT_CTRL, PREFETCH(0x240, 0x4));
+
+	mt76_wr(dev, MT_WFDMA1_TX_RING16_EXT_CTRL, PREFETCH(0x280, 0x4));
+	mt76_wr(dev, MT_WFDMA1_TX_RING17_EXT_CTRL, PREFETCH(0x2c0, 0x4));
+	mt76_wr(dev, MT_WFDMA1_TX_RING18_EXT_CTRL, PREFETCH(0x300, 0x4));
+	mt76_wr(dev, MT_WFDMA1_TX_RING19_EXT_CTRL, PREFETCH(0x340, 0x4));
+	mt76_wr(dev, MT_WFDMA1_TX_RING20_EXT_CTRL, PREFETCH(0x380, 0x4));
+	mt76_wr(dev, MT_WFDMA1_TX_RING21_EXT_CTRL, PREFETCH(0x3c0, 0x0));
+
+	mt76_wr(dev, MT_WFDMA1_RX_RING0_EXT_CTRL, PREFETCH(0x3c0, 0x4));
+	mt76_wr(dev, MT_WFDMA1_RX_RING1_EXT_CTRL, PREFETCH(0x400, 0x4));
+	mt76_wr(dev, MT_WFDMA1_RX_RING2_EXT_CTRL, PREFETCH(0x440, 0x4));
+	mt76_wr(dev, MT_WFDMA1_RX_RING3_EXT_CTRL, PREFETCH(0x480, 0x0));
+}
+
+int mt7915_dma_init(struct mt7915_dev *dev)
+{
+	/* Increase buffer size to receive large VHT/HE MPDUs */
+	int rx_buf_size = MT_RX_BUF_SIZE * 2;
+	int ret;
+
+	mt76_dma_attach(&dev->mt76);
+
+	/* configure global setting */
+	mt76_set(dev, MT_WFDMA1_GLO_CFG,
+		 MT_WFDMA1_GLO_CFG_OMIT_TX_INFO |
+		 MT_WFDMA1_GLO_CFG_OMIT_RX_INFO);
+
+	/* configure perfetch settings */
+	mt7915_dma_prefetch(dev);
+
+	/* reset dma idx */
+	mt76_wr(dev, MT_WFDMA0_RST_DTX_PTR, ~0);
+	mt76_wr(dev, MT_WFDMA1_RST_DTX_PTR, ~0);
+
+	/* configure delay interrupt */
+	mt76_wr(dev, MT_WFDMA0_PRI_DLY_INT_CFG0, 0);
+	mt76_wr(dev, MT_WFDMA1_PRI_DLY_INT_CFG0, 0);
+
+	/* init tx queue */
+	ret = mt7915_init_tx_queues(dev, MT7915_TX_RING_SIZE);
+	if (ret)
+		return ret;
+
+	/* command to WM */
+	ret = mt7915_init_mcu_queue(dev, &dev->mt76.q_tx[MT_TXQ_MCU],
+				    MT7915_TXQ_MCU_WM,
+				    MT7915_TX_MCU_RING_SIZE);
+	if (ret)
+		return ret;
+
+	/* command to WA */
+	ret = mt7915_init_mcu_queue(dev, &dev->mt76.q_tx[MT_TXQ_MCU_WA],
+				    MT7915_TXQ_MCU_WA,
+				    MT7915_TX_MCU_RING_SIZE);
+	if (ret)
+		return ret;
+
+	/* firmware download */
+	ret = mt7915_init_mcu_queue(dev, &dev->mt76.q_tx[MT_TXQ_FWDL],
+				    MT7915_TXQ_FWDL,
+				    MT7915_TX_FWDL_RING_SIZE);
+	if (ret)
+		return ret;
+
+	/* event from WM */
+	ret = mt76_queue_alloc(dev, &dev->mt76.q_rx[MT_RXQ_MCU],
+			       MT7915_RXQ_MCU_WM, MT7915_RX_MCU_RING_SIZE,
+			       rx_buf_size, MT_RX_EVENT_RING_BASE);
+	if (ret)
+		return ret;
+
+	/* event from WA */
+	ret = mt76_queue_alloc(dev, &dev->mt76.q_rx[MT_RXQ_MCU_WA],
+			       MT7915_RXQ_MCU_WA, MT7915_RX_MCU_RING_SIZE,
+			       rx_buf_size, MT_RX_EVENT_RING_BASE);
+	if (ret)
+		return ret;
+
+	/* rx data */
+	ret = mt76_queue_alloc(dev, &dev->mt76.q_rx[MT_RXQ_MAIN], 0,
+			       MT7915_RX_RING_SIZE, rx_buf_size,
+			       MT_RX_DATA_RING_BASE);
+	if (ret)
+		return ret;
+
+	ret = mt76_init_queues(dev);
+	if (ret < 0)
+		return ret;
+
+	netif_tx_napi_add(&dev->mt76.napi_dev, &dev->mt76.tx_napi,
+			  mt7915_poll_tx, NAPI_POLL_WEIGHT);
+	napi_enable(&dev->mt76.tx_napi);
+
+	/* hif wait WFDMA idle */
+	mt76_set(dev, MT_WFDMA0_BUSY_ENA,
+		 MT_WFDMA0_BUSY_ENA_TX_FIFO0 |
+		 MT_WFDMA0_BUSY_ENA_TX_FIFO1 |
+		 MT_WFDMA0_BUSY_ENA_RX_FIFO);
+
+	mt76_set(dev, MT_WFDMA1_BUSY_ENA,
+		 MT_WFDMA1_BUSY_ENA_TX_FIFO0 |
+		 MT_WFDMA1_BUSY_ENA_TX_FIFO1 |
+		 MT_WFDMA1_BUSY_ENA_RX_FIFO);
+
+	mt76_set(dev, MT_WFDMA0_PCIE1_BUSY_ENA,
+		 MT_WFDMA0_PCIE1_BUSY_ENA_TX_FIFO0 |
+		 MT_WFDMA0_PCIE1_BUSY_ENA_TX_FIFO1 |
+		 MT_WFDMA0_PCIE1_BUSY_ENA_RX_FIFO);
+
+	mt76_set(dev, MT_WFDMA1_PCIE1_BUSY_ENA,
+		 MT_WFDMA1_PCIE1_BUSY_ENA_TX_FIFO0 |
+		 MT_WFDMA1_PCIE1_BUSY_ENA_TX_FIFO1 |
+		 MT_WFDMA1_PCIE1_BUSY_ENA_RX_FIFO);
+
+	mt76_poll(dev, MT_WFDMA_EXT_CSR_HIF_MISC,
+		  MT_WFDMA_EXT_CSR_HIF_MISC_BUSY, 0, 1000);
+
+	/* set WFDMA Tx/Rx */
+	mt76_set(dev, MT_WFDMA0_GLO_CFG,
+		 MT_WFDMA0_GLO_CFG_TX_DMA_EN | MT_WFDMA0_GLO_CFG_RX_DMA_EN);
+	mt76_set(dev, MT_WFDMA1_GLO_CFG,
+		 MT_WFDMA1_GLO_CFG_TX_DMA_EN | MT_WFDMA1_GLO_CFG_RX_DMA_EN);
+
+	/* enable interrupts for TX/RX rings */
+	mt7915_irq_enable(dev, MT_INT_RX_DONE_ALL | MT_INT_TX_DONE_ALL |
+			  MT_INT_MCU_CMD);
+
+	return 0;
+}
+
+void mt7915_dma_cleanup(struct mt7915_dev *dev)
+{
+	/* disable */
+	mt76_clear(dev, MT_WFDMA0_GLO_CFG,
+		   MT_WFDMA0_GLO_CFG_TX_DMA_EN |
+		   MT_WFDMA0_GLO_CFG_RX_DMA_EN);
+	mt76_clear(dev, MT_WFDMA1_GLO_CFG,
+		   MT_WFDMA1_GLO_CFG_TX_DMA_EN |
+		   MT_WFDMA1_GLO_CFG_RX_DMA_EN);
+
+	/* reset */
+	mt76_clear(dev, MT_WFDMA1_RST,
+		   MT_WFDMA1_RST_DMASHDL_ALL_RST |
+		   MT_WFDMA1_RST_LOGIC_RST);
+
+	mt76_set(dev, MT_WFDMA1_RST,
+		 MT_WFDMA1_RST_DMASHDL_ALL_RST |
+		 MT_WFDMA1_RST_LOGIC_RST);
+
+	mt76_clear(dev, MT_WFDMA0_RST,
+		   MT_WFDMA0_RST_DMASHDL_ALL_RST |
+		   MT_WFDMA0_RST_LOGIC_RST);
+
+	mt76_set(dev, MT_WFDMA0_RST,
+		 MT_WFDMA0_RST_DMASHDL_ALL_RST |
+		 MT_WFDMA0_RST_LOGIC_RST);
+
+	tasklet_kill(&dev->mt76.tx_tasklet);
+	mt76_dma_cleanup(&dev->mt76);
+}
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.c
new file mode 100644
index 000000000000..2099dd40530f
--- /dev/null
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.c
@@ -0,0 +1,125 @@
+// SPDX-License-Identifier: ISC
+/* Copyright (C) 2020 MediaTek Inc. */
+
+#include "mt7915.h"
+#include "eeprom.h"
+
+static inline bool mt7915_efuse_valid(u8 val)
+{
+	return !(val == 0xff);
+}
+
+u32 mt7915_eeprom_read(struct mt7915_dev *dev, u32 offset)
+{
+	u8 *data = dev->mt76.eeprom.data;
+
+	if (!mt7915_efuse_valid(data[offset]))
+		mt7915_mcu_get_eeprom(dev, offset);
+
+	return data[offset];
+}
+
+static int mt7915_eeprom_load(struct mt7915_dev *dev)
+{
+	int ret;
+
+	ret = mt76_eeprom_init(&dev->mt76, MT7915_EEPROM_SIZE);
+	if (ret < 0)
+		return ret;
+
+	memset(dev->mt76.eeprom.data, -1, MT7915_EEPROM_SIZE);
+
+	return 0;
+}
+
+static int mt7915_check_eeprom(struct mt7915_dev *dev)
+{
+	u16 val;
+	u8 *eeprom = dev->mt76.eeprom.data;
+
+	mt7915_eeprom_read(dev, 0);
+	val = get_unaligned_le16(eeprom);
+
+	switch (val) {
+	case 0x7915:
+		return 0;
+	default:
+		return -EINVAL;
+	}
+}
+
+static void mt7915_eeprom_parse_hw_cap(struct mt7915_dev *dev)
+{
+	u8 *eeprom = dev->mt76.eeprom.data;
+	u8 tx_mask, max_nss = 4;
+	u32 val = mt7915_eeprom_read(dev, MT_EE_WIFI_CONF);
+
+	val = FIELD_GET(MT_EE_WIFI_CONF_BAND_SEL, val);
+	switch (val) {
+	case MT_EE_5GHZ:
+		dev->mt76.cap.has_5ghz = true;
+		break;
+	case MT_EE_2GHZ:
+		dev->mt76.cap.has_2ghz = true;
+		break;
+	default:
+		dev->mt76.cap.has_2ghz = true;
+		dev->mt76.cap.has_5ghz = true;
+		break;
+	}
+
+	/* read tx mask from eeprom */
+	tx_mask =  FIELD_GET(MT_EE_WIFI_CONF_TX_MASK,
+			     eeprom[MT_EE_WIFI_CONF]);
+	if (!tx_mask || tx_mask > max_nss)
+		tx_mask = max_nss;
+
+	dev->chainmask = BIT(tx_mask) - 1;
+	dev->mphy.antenna_mask = dev->chainmask;
+	dev->phy.chainmask = dev->chainmask;
+}
+
+int mt7915_eeprom_init(struct mt7915_dev *dev)
+{
+	int ret;
+
+	ret = mt7915_eeprom_load(dev);
+	if (ret < 0)
+		return ret;
+
+	ret = mt7915_check_eeprom(dev);
+	if (ret)
+		return ret;
+
+	mt7915_eeprom_parse_hw_cap(dev);
+	memcpy(dev->mt76.macaddr, dev->mt76.eeprom.data + MT_EE_MAC_ADDR,
+	       ETH_ALEN);
+
+	mt76_eeprom_override(&dev->mt76);
+
+	return 0;
+}
+
+int mt7915_eeprom_get_target_power(struct mt7915_dev *dev,
+				   struct ieee80211_channel *chan,
+				   u8 chain_idx)
+{
+	int index;
+	bool tssi_on;
+
+	if (chain_idx > 3)
+		return -EINVAL;
+
+	tssi_on = mt7915_tssi_enabled(dev, chan->band);
+
+	if (chan->band == NL80211_BAND_2GHZ) {
+		index = MT_EE_TX0_POWER_2G + chain_idx * 3 + !tssi_on;
+	} else {
+		int group = tssi_on ?
+			    mt7915_get_channel_group(chan->hw_value) : 8;
+
+		index = MT_EE_TX0_POWER_5G + chain_idx * 12 + group;
+	}
+
+	return mt7915_eeprom_read(dev, index);
+}
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.h b/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.h
new file mode 100644
index 000000000000..30fc607e466d
--- /dev/null
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.h
@@ -0,0 +1,78 @@
+/* SPDX-License-Identifier: ISC */
+/* Copyright (C) 2020 MediaTek Inc. */
+
+#ifndef __MT7915_EEPROM_H
+#define __MT7915_EEPROM_H
+
+#include "mt7915.h"
+
+struct cal_data {
+	u8 count;
+	u16 offset[60];
+};
+
+enum mt7915_eeprom_field {
+	MT_EE_CHIP_ID =		0x000,
+	MT_EE_VERSION =		0x002,
+	MT_EE_MAC_ADDR =	0x004,
+	MT_EE_DDIE_FT_VERSION =	0x050,
+	MT_EE_WIFI_CONF =	0x190,
+	MT_EE_TX0_POWER_2G =	0x2fc,
+	MT_EE_TX0_POWER_5G =	0x34b,
+	MT_EE_ADIE_FT_VERSION =	0x9a0,
+
+	__MT_EE_MAX =		0xe00
+};
+
+#define MT_EE_WIFI_CONF_TX_MASK			GENMASK(2, 0)
+#define MT_EE_WIFI_CONF_BAND_SEL		GENMASK(7, 6)
+#define MT_EE_WIFI_CONF_TSSI0_2G		BIT(0)
+#define MT_EE_WIFI_CONF_TSSI0_5G		BIT(2)
+#define MT_EE_WIFI_CONF_TSSI1_5G		BIT(4)
+
+enum mt7915_eeprom_band {
+	MT_EE_DUAL_BAND,
+	MT_EE_5GHZ,
+	MT_EE_2GHZ,
+	MT_EE_DBDC,
+};
+
+struct sku_group {
+	u8 len;
+	u16 offset[2];
+	const u8 *delta_map;
+};
+
+static inline int
+mt7915_get_channel_group(int channel)
+{
+	if (channel >= 184 && channel <= 196)
+		return 0;
+	if (channel <= 48)
+		return 1;
+	if (channel <= 64)
+		return 2;
+	if (channel <= 96)
+		return 3;
+	if (channel <= 112)
+		return 4;
+	if (channel <= 128)
+		return 5;
+	if (channel <= 144)
+		return 6;
+	return 7;
+}
+
+static inline bool
+mt7915_tssi_enabled(struct mt7915_dev *dev, enum nl80211_band band)
+{
+	u8 *eep = dev->mt76.eeprom.data;
+
+	/* TODO: DBDC */
+	if (band == NL80211_BAND_5GHZ)
+		return eep[MT_EE_WIFI_CONF + 7] & MT_EE_WIFI_CONF_TSSI0_5G;
+	else
+		return eep[MT_EE_WIFI_CONF + 7] & MT_EE_WIFI_CONF_TSSI0_2G;
+}
+
+#endif
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
new file mode 100644
index 000000000000..fb596afdf0be
--- /dev/null
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
@@ -0,0 +1,395 @@
+// SPDX-License-Identifier: ISC
+/* Copyright (C) 2020 MediaTek Inc. */
+
+#include <linux/etherdevice.h>
+#include "mt7915.h"
+#include "mac.h"
+#include "eeprom.h"
+
+static void
+mt7915_mac_init_band(struct mt7915_dev *dev, u8 band)
+{
+	u32 mask, set;
+
+	mt76_rmw_field(dev, MT_TMAC_CTCR0(band),
+		       MT_TMAC_CTCR0_INS_DDLMT_REFTIME, 0x3f);
+	mt76_set(dev, MT_TMAC_CTCR0(band),
+		 MT_TMAC_CTCR0_INS_DDLMT_VHT_SMPDU_EN |
+		 MT_TMAC_CTCR0_INS_DDLMT_EN);
+
+	mask = MT_MDP_RCFR0_MCU_RX_MGMT |
+	       MT_MDP_RCFR0_MCU_RX_CTL_NON_BAR |
+	       MT_MDP_RCFR0_MCU_RX_CTL_BAR;
+	set = FIELD_PREP(MT_MDP_RCFR0_MCU_RX_MGMT, MT_MDP_TO_HIF) |
+	      FIELD_PREP(MT_MDP_RCFR0_MCU_RX_CTL_NON_BAR, MT_MDP_TO_HIF) |
+	      FIELD_PREP(MT_MDP_RCFR0_MCU_RX_CTL_BAR, MT_MDP_TO_HIF);
+	mt76_rmw(dev, MT_MDP_BNRCFR0(band), mask, set);
+
+	mask = MT_MDP_RCFR1_MCU_RX_BYPASS |
+	       MT_MDP_RCFR1_RX_DROPPED_UCAST |
+	       MT_MDP_RCFR1_RX_DROPPED_MCAST;
+	set = FIELD_PREP(MT_MDP_RCFR1_MCU_RX_BYPASS, MT_MDP_TO_HIF) |
+	      FIELD_PREP(MT_MDP_RCFR1_RX_DROPPED_UCAST, MT_MDP_TO_HIF) |
+	      FIELD_PREP(MT_MDP_RCFR1_RX_DROPPED_MCAST, MT_MDP_TO_HIF);
+	mt76_rmw(dev, MT_MDP_BNRCFR1(band), mask, set);
+
+	mt76_set(dev, MT_WF_RMAC_MIB_TIME0(band), MT_WF_RMAC_MIB_RXTIME_EN);
+	mt76_set(dev, MT_WF_RMAC_MIB_AIRTIME0(band), MT_WF_RMAC_MIB_RXTIME_EN);
+}
+
+static void mt7915_mac_init(struct mt7915_dev *dev)
+{
+	int i;
+
+	mt76_rmw_field(dev, MT_DMA_DCR0, MT_DMA_DCR0_MAX_RX_LEN, 1536);
+	mt76_rmw_field(dev, MT_MDP_DCR1, MT_MDP_DCR1_MAX_RX_LEN, 1536);
+	/* disable hardware de-agg */
+	mt76_clear(dev, MT_MDP_DCR0, MT_MDP_DCR0_DAMSDU_EN);
+
+	for (i = 0; i < MT7915_WTBL_SIZE; i++)
+		mt7915_mac_wtbl_update(dev, i,
+				       MT_WTBL_UPDATE_ADM_COUNT_CLEAR);
+
+	mt7915_mac_init_band(dev, 0);
+	mt7915_mac_init_band(dev, 1);
+	mt7915_mcu_set_rts_thresh(&dev->phy, 0x92b);
+}
+
+static void
+mt7915_init_txpower_band(struct mt7915_dev *dev,
+			 struct ieee80211_supported_band *sband)
+{
+	int i, n_chains = hweight8(dev->mphy.antenna_mask);
+
+	for (i = 0; i < sband->n_channels; i++) {
+		struct ieee80211_channel *chan = &sband->channels[i];
+		u32 target_power = 0;
+		int j;
+
+		for (j = 0; j < n_chains; j++) {
+			u32 val;
+
+			val = mt7915_eeprom_get_target_power(dev, chan, j);
+			target_power = max(target_power, val);
+		}
+
+		chan->max_power = min_t(int, chan->max_reg_power,
+					target_power / 2);
+		chan->orig_mpwr = target_power / 2;
+	}
+}
+
+static void mt7915_init_txpower(struct mt7915_dev *dev)
+{
+	mt7915_init_txpower_band(dev, &dev->mphy.sband_2g.sband);
+	mt7915_init_txpower_band(dev, &dev->mphy.sband_5g.sband);
+}
+
+static void mt7915_init_work(struct work_struct *work)
+{
+	struct mt7915_dev *dev = container_of(work, struct mt7915_dev,
+				 init_work);
+
+	mt7915_mcu_set_eeprom(dev);
+	mt7915_mac_init(dev);
+	mt7915_init_txpower(dev);
+}
+
+static int mt7915_init_hardware(struct mt7915_dev *dev)
+{
+	int ret, idx;
+
+	mt76_wr(dev, MT_INT_SOURCE_CSR, ~0);
+
+	INIT_WORK(&dev->init_work, mt7915_init_work);
+	spin_lock_init(&dev->token_lock);
+	idr_init(&dev->token);
+
+	ret = mt7915_dma_init(dev);
+	if (ret)
+		return ret;
+
+	set_bit(MT76_STATE_INITIALIZED, &dev->mphy.state);
+
+	ret = mt7915_mcu_init(dev);
+	if (ret)
+		return ret;
+
+	ret = mt7915_eeprom_init(dev);
+	if (ret < 0)
+		return ret;
+
+	/* Beacon and mgmt frames should occupy wcid 0 */
+	idx = mt76_wcid_alloc(dev->mt76.wcid_mask, MT7915_WTBL_STA - 1);
+	if (idx)
+		return -ENOSPC;
+
+	dev->mt76.global_wcid.idx = idx;
+	dev->mt76.global_wcid.hw_key_idx = -1;
+	dev->mt76.global_wcid.tx_info |= MT_WCID_TX_INFO_SET;
+	rcu_assign_pointer(dev->mt76.wcid[idx], &dev->mt76.global_wcid);
+
+	return 0;
+}
+
+#define CCK_RATE(_idx, _rate) {						\
+	.bitrate = _rate,						\
+	.flags = IEEE80211_RATE_SHORT_PREAMBLE,				\
+	.hw_value = (MT_PHY_TYPE_CCK << 8) | (_idx),			\
+	.hw_value_short = (MT_PHY_TYPE_CCK << 8) | (4 + (_idx)),	\
+}
+
+#define OFDM_RATE(_idx, _rate) {					\
+	.bitrate = _rate,						\
+	.hw_value = (MT_PHY_TYPE_OFDM << 8) | (_idx),			\
+	.hw_value_short = (MT_PHY_TYPE_OFDM << 8) | (_idx),		\
+}
+
+static struct ieee80211_rate mt7915_rates[] = {
+	CCK_RATE(0, 10),
+	CCK_RATE(1, 20),
+	CCK_RATE(2, 55),
+	CCK_RATE(3, 110),
+	OFDM_RATE(11, 60),
+	OFDM_RATE(15, 90),
+	OFDM_RATE(10, 120),
+	OFDM_RATE(14, 180),
+	OFDM_RATE(9,  240),
+	OFDM_RATE(13, 360),
+	OFDM_RATE(8,  480),
+	OFDM_RATE(12, 540),
+};
+
+static const struct ieee80211_iface_limit if_limits[] = {
+	{
+		.max = 1,
+		.types = BIT(NL80211_IFTYPE_ADHOC)
+	}, {
+		.max = MT7915_MAX_INTERFACES,
+		.types = BIT(NL80211_IFTYPE_AP) |
+#ifdef CONFIG_MAC80211_MESH
+			 BIT(NL80211_IFTYPE_MESH_POINT) |
+#endif
+			 BIT(NL80211_IFTYPE_STATION)
+	}
+};
+
+static const struct ieee80211_iface_combination if_comb[] = {
+	{
+		.limits = if_limits,
+		.n_limits = ARRAY_SIZE(if_limits),
+		.max_interfaces = 4,
+		.num_different_channels = 1,
+		.beacon_int_infra_match = true,
+		.radar_detect_widths = BIT(NL80211_CHAN_WIDTH_20_NOHT) |
+				       BIT(NL80211_CHAN_WIDTH_20) |
+				       BIT(NL80211_CHAN_WIDTH_40) |
+				       BIT(NL80211_CHAN_WIDTH_80) |
+				       BIT(NL80211_CHAN_WIDTH_160) |
+				       BIT(NL80211_CHAN_WIDTH_80P80),
+	}
+};
+
+static void
+mt7915_regd_notifier(struct wiphy *wiphy,
+		     struct regulatory_request *request)
+{
+	struct ieee80211_hw *hw = wiphy_to_ieee80211_hw(wiphy);
+	struct mt7915_dev *dev = mt7915_hw_dev(hw);
+	struct mt76_phy *mphy = hw->priv;
+	struct mt7915_phy *phy = mphy->priv;
+	struct cfg80211_chan_def *chandef = &mphy->chandef;
+
+	dev->mt76.region = request->dfs_region;
+
+	if (!(chandef->chan->flags & IEEE80211_CHAN_RADAR))
+		return;
+
+	mt7915_dfs_init_radar_detector(phy);
+}
+
+static void
+mt7915_init_wiphy(struct ieee80211_hw *hw)
+{
+	struct mt7915_phy *phy = mt7915_hw_phy(hw);
+	struct wiphy *wiphy = hw->wiphy;
+
+	hw->queues = 4;
+	hw->max_rx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF;
+	hw->max_tx_aggregation_subframes = IEEE80211_MAX_AMPDU_BUF;
+
+	phy->slottime = 9;
+
+	hw->sta_data_size = sizeof(struct mt7915_sta);
+	hw->vif_data_size = sizeof(struct mt7915_vif);
+
+	wiphy->iface_combinations = if_comb;
+	wiphy->n_iface_combinations = ARRAY_SIZE(if_comb);
+	wiphy->reg_notifier = mt7915_regd_notifier;
+	wiphy->flags |= WIPHY_FLAG_HAS_CHANNEL_SWITCH;
+
+	wiphy_ext_feature_set(wiphy, NL80211_EXT_FEATURE_VHT_IBSS);
+
+	ieee80211_hw_set(hw, HAS_RATE_CONTROL);
+
+	hw->max_tx_fragments = 4;
+}
+
+static void
+mt7915_cap_dbdc_enable(struct mt7915_dev *dev)
+{
+	dev->mphy.sband_5g.sband.vht_cap.cap &=
+			~(IEEE80211_VHT_CAP_SHORT_GI_160 |
+			  IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ);
+
+	if (dev->chainmask == 0xf)
+		dev->mphy.antenna_mask = dev->chainmask >> 2;
+	else
+		dev->mphy.antenna_mask = dev->chainmask >> 1;
+
+	dev->phy.chainmask = dev->mphy.antenna_mask;
+	dev->mphy.hw->wiphy->available_antennas_rx = dev->phy.chainmask;
+	dev->mphy.hw->wiphy->available_antennas_tx = dev->phy.chainmask;
+
+	mt76_set_stream_caps(&dev->mt76, true);
+}
+
+static void
+mt7915_cap_dbdc_disable(struct mt7915_dev *dev)
+{
+	dev->mphy.sband_5g.sband.vht_cap.cap |=
+			IEEE80211_VHT_CAP_SHORT_GI_160 |
+			IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ;
+
+	dev->mphy.antenna_mask = dev->chainmask;
+	dev->phy.chainmask = dev->chainmask;
+	dev->mphy.hw->wiphy->available_antennas_rx = dev->chainmask;
+	dev->mphy.hw->wiphy->available_antennas_tx = dev->chainmask;
+
+	mt76_set_stream_caps(&dev->mt76, true);
+}
+
+int mt7915_register_ext_phy(struct mt7915_dev *dev)
+{
+	struct mt7915_phy *phy = mt7915_ext_phy(dev);
+	struct mt76_phy *mphy;
+	int ret;
+	bool bound;
+
+	/* TODO: enble DBDC */
+	bound = mt7915_l1_rr(dev, MT_HW_BOUND) & BIT(5);
+	if (!bound)
+		return -EINVAL;
+
+	if (test_bit(MT76_STATE_RUNNING, &dev->mphy.state))
+		return -EINVAL;
+
+	if (phy)
+		return 0;
+
+	mt7915_cap_dbdc_enable(dev);
+	mphy = mt76_alloc_phy(&dev->mt76, sizeof(*phy), &mt7915_ops);
+	if (!mphy)
+		return -ENOMEM;
+
+	phy = mphy->priv;
+	phy->dev = dev;
+	phy->mt76 = mphy;
+	phy->chainmask = dev->chainmask & ~dev->phy.chainmask;
+	mphy->antenna_mask = BIT(hweight8(phy->chainmask)) - 1;
+	mt7915_init_wiphy(mphy->hw);
+
+	/*
+	 * Make the secondary PHY MAC address local without overlapping with
+	 * the usual MAC address allocation scheme on multiple virtual interfaces
+	 */
+	mphy->hw->wiphy->perm_addr[0] |= 2;
+	mphy->hw->wiphy->perm_addr[0] ^= BIT(7);
+
+	/* The second interface does not get any packets unless it has a vif */
+	ieee80211_hw_set(mphy->hw, WANT_MONITOR_VIF);
+
+	ret = mt76_register_phy(mphy);
+	if (ret)
+		ieee80211_free_hw(mphy->hw);
+
+	return ret;
+}
+
+void mt7915_unregister_ext_phy(struct mt7915_dev *dev)
+{
+	struct mt7915_phy *phy = mt7915_ext_phy(dev);
+	struct mt76_phy *mphy = dev->mt76.phy2;
+
+	if (!phy)
+		return;
+
+	mt7915_cap_dbdc_disable(dev);
+	mt76_unregister_phy(mphy);
+	ieee80211_free_hw(mphy->hw);
+}
+
+int mt7915_register_device(struct mt7915_dev *dev)
+{
+	struct ieee80211_hw *hw = mt76_hw(dev);
+	int ret;
+
+	dev->phy.dev = dev;
+	dev->phy.mt76 = &dev->mt76.phy;
+	dev->mt76.phy.priv = &dev->phy;
+	INIT_DELAYED_WORK(&dev->mt76.mac_work, mt7915_mac_work);
+	INIT_LIST_HEAD(&dev->sta_poll_list);
+	spin_lock_init(&dev->sta_poll_lock);
+
+	init_waitqueue_head(&dev->reset_wait);
+	INIT_WORK(&dev->reset_work, mt7915_mac_reset_work);
+
+	ret = mt7915_init_hardware(dev);
+	if (ret)
+		return ret;
+
+	mt7915_init_wiphy(hw);
+	dev->mphy.sband_2g.sband.ht_cap.cap |=
+			IEEE80211_HT_CAP_LDPC_CODING |
+			IEEE80211_HT_CAP_MAX_AMSDU;
+	dev->mphy.sband_5g.sband.ht_cap.cap |=
+			IEEE80211_HT_CAP_LDPC_CODING |
+			IEEE80211_HT_CAP_MAX_AMSDU;
+	dev->mphy.sband_5g.sband.vht_cap.cap |=
+			IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991 |
+			IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK;
+	mt7915_cap_dbdc_disable(dev);
+	dev->phy.dfs_state = -1;
+
+	ret = mt76_register_device(&dev->mt76, true, mt7915_rates,
+				   ARRAY_SIZE(mt7915_rates));
+	if (ret)
+		return ret;
+
+	ieee80211_queue_work(mt76_hw(dev), &dev->init_work);
+
+	return mt7915_init_debugfs(dev);
+}
+
+void mt7915_unregister_device(struct mt7915_dev *dev)
+{
+	struct mt76_txwi_cache *txwi;
+	int id;
+
+	mt7915_unregister_ext_phy(dev);
+	mt76_unregister_device(&dev->mt76);
+	mt7915_mcu_exit(dev);
+	mt7915_dma_cleanup(dev);
+
+	spin_lock_bh(&dev->token_lock);
+	idr_for_each_entry(&dev->token, txwi, id) {
+		mt7915_txp_skb_unmap(&dev->mt76, txwi);
+		if (txwi->skb)
+			dev_kfree_skb_any(txwi->skb);
+		mt76_put_txwi(&dev->mt76, txwi);
+	}
+	spin_unlock_bh(&dev->token_lock);
+	idr_destroy(&dev->token);
+
+	mt76_free_device(&dev->mt76);
+}
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
new file mode 100644
index 000000000000..ddba360046c3
--- /dev/null
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
@@ -0,0 +1,1298 @@
+// SPDX-License-Identifier: ISC
+/* Copyright (C) 2020 MediaTek Inc. */
+
+#include <linux/etherdevice.h>
+#include <linux/timekeeping.h>
+#include "mt7915.h"
+#include "../dma.h"
+#include "mac.h"
+
+#define to_rssi(field, rxv)	((FIELD_GET(field, rxv) - 220) / 2)
+
+static const struct mt7915_dfs_radar_spec etsi_radar_specs = {
+	.pulse_th = { 110, -10, -80, 40, 5200, 128, 5200 },
+	.radar_pattern = {
+		[5] =  { 1, 0,  6, 32, 28, 0,  990, 5010, 17, 1, 1 },
+		[6] =  { 1, 0,  9, 32, 28, 0,  615, 5010, 27, 1, 1 },
+		[7] =  { 1, 0, 15, 32, 28, 0,  240,  445, 27, 1, 1 },
+		[8] =  { 1, 0, 12, 32, 28, 0,  240,  510, 42, 1, 1 },
+		[9] =  { 1, 1,  0,  0,  0, 0, 2490, 3343, 14, 0, 0, 12, 32, 28, { }, 126 },
+		[10] = { 1, 1,  0,  0,  0, 0, 2490, 3343, 14, 0, 0, 15, 32, 24, { }, 126 },
+		[11] = { 1, 1,  0,  0,  0, 0,  823, 2510, 14, 0, 0, 18, 32, 28, { },  54 },
+		[12] = { 1, 1,  0,  0,  0, 0,  823, 2510, 14, 0, 0, 27, 32, 24, { },  54 },
+	},
+};
+
+static const struct mt7915_dfs_radar_spec fcc_radar_specs = {
+	.pulse_th = { 110, -10, -80, 40, 5200, 128, 5200 },
+	.radar_pattern = {
+		[0] = { 1, 0,  8,  32, 28, 0, 508, 3076, 13, 1,  1 },
+		[1] = { 1, 0, 12,  32, 28, 0, 140,  240, 17, 1,  1 },
+		[2] = { 1, 0,  8,  32, 28, 0, 190,  510, 22, 1,  1 },
+		[3] = { 1, 0,  6,  32, 28, 0, 190,  510, 32, 1,  1 },
+		[4] = { 1, 0,  9, 255, 28, 0, 323,  343, 13, 1, 32 },
+	},
+};
+
+static const struct mt7915_dfs_radar_spec jp_radar_specs = {
+	.pulse_th = { 110, -10, -80, 40, 5200, 128, 5200 },
+	.radar_pattern = {
+		[0] =  { 1, 0,  8,  32, 28, 0,  508, 3076,  13, 1,  1 },
+		[1] =  { 1, 0, 12,  32, 28, 0,  140,  240,  17, 1,  1 },
+		[2] =  { 1, 0,  8,  32, 28, 0,  190,  510,  22, 1,  1 },
+		[3] =  { 1, 0,  6,  32, 28, 0,  190,  510,  32, 1,  1 },
+		[4] =  { 1, 0,  9, 255, 28, 0,  323,  343,  13, 1, 32 },
+		[13] = { 1, 0,  7,  32, 28, 0, 3836, 3856,  14, 1,  1 },
+		[14] = { 1, 0,  6,  32, 28, 0,  615, 5010, 110, 1,  1 },
+		[15] = { 1, 1,  0,   0,  0, 0,   15, 5010, 110, 0,  0, 12, 32, 28 },
+	},
+};
+
+static struct mt76_wcid *mt7915_rx_get_wcid(struct mt7915_dev *dev,
+					    u16 idx, bool unicast)
+{
+	struct mt7915_sta *sta;
+	struct mt76_wcid *wcid;
+
+	if (idx >= ARRAY_SIZE(dev->mt76.wcid))
+		return NULL;
+
+	wcid = rcu_dereference(dev->mt76.wcid[idx]);
+	if (unicast || !wcid)
+		return wcid;
+
+	if (!wcid->sta)
+		return NULL;
+
+	sta = container_of(wcid, struct mt7915_sta, wcid);
+	if (!sta->vif)
+		return NULL;
+
+	return &sta->vif->sta.wcid;
+}
+
+void mt7915_sta_ps(struct mt76_dev *mdev, struct ieee80211_sta *sta, bool ps)
+{
+}
+
+bool mt7915_mac_wtbl_update(struct mt7915_dev *dev, int idx, u32 mask)
+{
+	mt76_rmw(dev, MT_WTBL_UPDATE, MT_WTBL_UPDATE_WLAN_IDX,
+		 FIELD_PREP(MT_WTBL_UPDATE_WLAN_IDX, idx) | mask);
+
+	return mt76_poll(dev, MT_WTBL_UPDATE, MT_WTBL_UPDATE_BUSY,
+			 0, 5000);
+}
+
+static u32 mt7915_mac_wtbl_lmac_read(struct mt7915_dev *dev, u16 wcid,
+				     u16 addr)
+{
+	mt76_wr(dev, MT_WTBLON_TOP_WDUCR,
+		FIELD_PREP(MT_WTBLON_TOP_WDUCR_GROUP, (wcid >> 7)));
+
+	return mt76_rr(dev, MT_WTBL_LMAC_OFFS(wcid, addr));
+}
+
+/* TODO: use txfree airtime info to avoid runtime accessing in the long run */
+void mt7915_mac_sta_poll(struct mt7915_dev *dev)
+{
+	static const u8 ac_to_tid[] = {
+		[IEEE80211_AC_BE] = 0,
+		[IEEE80211_AC_BK] = 1,
+		[IEEE80211_AC_VI] = 4,
+		[IEEE80211_AC_VO] = 6
+	};
+	static const u8 hw_queue_map[] = {
+		[IEEE80211_AC_BK] = 0,
+		[IEEE80211_AC_BE] = 1,
+		[IEEE80211_AC_VI] = 2,
+		[IEEE80211_AC_VO] = 3,
+	};
+	struct ieee80211_sta *sta;
+	struct mt7915_sta *msta;
+	u32 tx_time[IEEE80211_NUM_ACS], rx_time[IEEE80211_NUM_ACS];
+	int i;
+
+	rcu_read_lock();
+
+	while (true) {
+		bool clear = false;
+		u16 idx;
+
+		spin_lock_bh(&dev->sta_poll_lock);
+		if (list_empty(&dev->sta_poll_list)) {
+			spin_unlock_bh(&dev->sta_poll_lock);
+			break;
+		}
+		msta = list_first_entry(&dev->sta_poll_list,
+					struct mt7915_sta, poll_list);
+		list_del_init(&msta->poll_list);
+		spin_unlock_bh(&dev->sta_poll_lock);
+
+		for (i = 0, idx = msta->wcid.idx; i < IEEE80211_NUM_ACS; i++) {
+			u32 tx_last = msta->airtime_ac[i];
+			u32 rx_last = msta->airtime_ac[i + IEEE80211_NUM_ACS];
+
+			msta->airtime_ac[i] =
+				mt7915_mac_wtbl_lmac_read(dev, idx, 20 + i);
+			msta->airtime_ac[i + IEEE80211_NUM_ACS] =
+				mt7915_mac_wtbl_lmac_read(dev, idx, 21 + i);
+			tx_time[i] = msta->airtime_ac[i] - tx_last;
+			rx_time[i] = msta->airtime_ac[i + IEEE80211_NUM_ACS] -
+				     rx_last;
+
+			if ((tx_last | rx_last) & BIT(30))
+				clear = true;
+		}
+
+		if (clear) {
+			mt7915_mac_wtbl_update(dev, idx,
+					       MT_WTBL_UPDATE_ADM_COUNT_CLEAR);
+			memset(msta->airtime_ac, 0, sizeof(msta->airtime_ac));
+		}
+
+		if (!msta->wcid.sta)
+			continue;
+
+		sta = container_of((void *)msta, struct ieee80211_sta,
+				   drv_priv);
+		for (i = 0; i < IEEE80211_NUM_ACS; i++) {
+			u32 tx_cur = tx_time[i];
+			u32 rx_cur = rx_time[hw_queue_map[i]];
+			u8 tid = ac_to_tid[i];
+
+			if (!tx_cur && !rx_cur)
+				continue;
+
+			ieee80211_sta_register_airtime(sta, tid, tx_cur,
+						       rx_cur);
+		}
+	}
+
+	rcu_read_unlock();
+}
+
+int mt7915_mac_fill_rx(struct mt7915_dev *dev, struct sk_buff *skb)
+{
+	struct mt76_rx_status *status = (struct mt76_rx_status *)skb->cb;
+	struct mt76_phy *mphy = &dev->mt76.phy;
+	struct mt7915_phy *phy = &dev->phy;
+	struct ieee80211_supported_band *sband;
+	struct ieee80211_hdr *hdr;
+	struct mt7915_rxv rxv = {};
+	__le32 *rxd = (__le32 *)skb->data;
+	u32 rxd1 = le32_to_cpu(rxd[1]);
+	u32 rxd2 = le32_to_cpu(rxd[2]);
+	u32 rxd3 = le32_to_cpu(rxd[3]);
+	bool unicast, insert_ccmp_hdr = false;
+	u8 remove_pad;
+	int i, idx;
+
+	memset(status, 0, sizeof(*status));
+
+	if (rxd1 & MT_RXD1_NORMAL_BAND_IDX) {
+		mphy = dev->mt76.phy2;
+		if (!mphy)
+			return -EINVAL;
+
+		phy = mphy->priv;
+		status->ext_phy = true;
+	}
+
+	if (!test_bit(MT76_STATE_RUNNING, &mphy->state))
+		return -EINVAL;
+
+	unicast = FIELD_GET(MT_RXD3_NORMAL_ADDR_TYPE, rxd3) == MT_RXD3_NORMAL_U2M;
+	idx = FIELD_GET(MT_RXD1_NORMAL_WLAN_IDX, rxd1);
+	status->wcid = mt7915_rx_get_wcid(dev, idx, unicast);
+
+	if (status->wcid) {
+		struct mt7915_sta *msta;
+
+		msta = container_of(status->wcid, struct mt7915_sta, wcid);
+		spin_lock_bh(&dev->sta_poll_lock);
+		if (list_empty(&msta->poll_list))
+			list_add_tail(&msta->poll_list, &dev->sta_poll_list);
+		spin_unlock_bh(&dev->sta_poll_lock);
+	}
+
+	status->freq = mphy->chandef.chan->center_freq;
+	status->band = mphy->chandef.chan->band;
+	if (status->band == NL80211_BAND_5GHZ)
+		sband = &mphy->sband_5g.sband;
+	else
+		sband = &mphy->sband_2g.sband;
+
+	if (!sband->channels)
+		return -EINVAL;
+
+	if (rxd1 & MT_RXD1_NORMAL_FCS_ERR)
+		status->flag |= RX_FLAG_FAILED_FCS_CRC;
+
+	if (rxd1 & MT_RXD1_NORMAL_TKIP_MIC_ERR)
+		status->flag |= RX_FLAG_MMIC_ERROR;
+
+	if (FIELD_GET(MT_RXD1_NORMAL_SEC_MODE, rxd1) != 0 &&
+	    !(rxd1 & (MT_RXD1_NORMAL_CLM | MT_RXD1_NORMAL_CM))) {
+		status->flag |= RX_FLAG_DECRYPTED;
+		status->flag |= RX_FLAG_IV_STRIPPED;
+		status->flag |= RX_FLAG_MMIC_STRIPPED | RX_FLAG_MIC_STRIPPED;
+	}
+
+	if (!(rxd2 & MT_RXD2_NORMAL_NON_AMPDU)) {
+		status->flag |= RX_FLAG_AMPDU_DETAILS;
+
+		/* all subframes of an A-MPDU have the same timestamp */
+		if (phy->rx_ampdu_ts != rxd[14]) {
+			if (!++phy->ampdu_ref)
+				phy->ampdu_ref++;
+		}
+		phy->rx_ampdu_ts = rxd[14];
+
+		status->ampdu_ref = phy->ampdu_ref;
+	}
+
+	remove_pad = FIELD_GET(MT_RXD2_NORMAL_HDR_OFFSET, rxd2);
+
+	if (rxd2 & MT_RXD2_NORMAL_MAX_LEN_ERROR)
+		return -EINVAL;
+
+	rxd += 6;
+	if (rxd1 & MT_RXD1_NORMAL_GROUP_4) {
+		rxd += 4;
+		if ((u8 *)rxd - skb->data >= skb->len)
+			return -EINVAL;
+	}
+
+	if (rxd1 & MT_RXD1_NORMAL_GROUP_1) {
+		u8 *data = (u8 *)rxd;
+
+		if (status->flag & RX_FLAG_DECRYPTED) {
+			status->iv[0] = data[5];
+			status->iv[1] = data[4];
+			status->iv[2] = data[3];
+			status->iv[3] = data[2];
+			status->iv[4] = data[1];
+			status->iv[5] = data[0];
+
+			insert_ccmp_hdr = FIELD_GET(MT_RXD2_NORMAL_FRAG, rxd2);
+		}
+		rxd += 4;
+		if ((u8 *)rxd - skb->data >= skb->len)
+			return -EINVAL;
+	}
+
+	if (rxd1 & MT_RXD1_NORMAL_GROUP_2) {
+		rxd += 2;
+		if ((u8 *)rxd - skb->data >= skb->len)
+			return -EINVAL;
+	}
+
+	/* RXD Group 3 - P-RXV */
+	if (rxd1 & MT_RXD1_NORMAL_GROUP_3) {
+		memcpy(rxv.v, rxd, sizeof(rxv.v));
+
+		rxd += 2;
+		if ((u8 *)rxd - skb->data >= skb->len)
+			return -EINVAL;
+
+		if (rxv.v[0] & MT_PRXV_HT_AD_CODE)
+			status->enc_flags |= RX_ENC_FLAG_LDPC;
+
+		status->chains = mphy->antenna_mask;
+		status->chain_signal[0] = to_rssi(MT_PRXV_RCPI0, rxv.v[1]);
+		status->chain_signal[1] = to_rssi(MT_PRXV_RCPI1, rxv.v[1]);
+		status->chain_signal[2] = to_rssi(MT_PRXV_RCPI2, rxv.v[1]);
+		status->chain_signal[3] = to_rssi(MT_PRXV_RCPI3, rxv.v[1]);
+		status->signal = status->chain_signal[0];
+
+		for (i = 1; i < hweight8(mphy->antenna_mask); i++) {
+			if (!(status->chains & BIT(i)))
+				continue;
+
+			status->signal = max(status->signal,
+					     status->chain_signal[i]);
+		}
+
+		/* RXD Group 5 - C-RXV */
+		if (rxd1 & MT_RXD1_NORMAL_GROUP_5) {
+			u8 stbc = FIELD_GET(MT_CRXV_HT_STBC, rxv.v[2]);
+			u8 gi = FIELD_GET(MT_CRXV_HT_SHORT_GI, rxv.v[2]);
+			bool cck = false;
+
+			rxd += 18;
+			if ((u8 *)rxd - skb->data >= skb->len)
+				return -EINVAL;
+
+			idx = i = FIELD_GET(MT_PRXV_TX_RATE, rxv.v[0]);
+			rxv.phy = FIELD_GET(MT_CRXV_TX_MODE, rxv.v[2]);
+
+			switch (rxv.phy) {
+			case MT_PHY_TYPE_CCK:
+				cck = true;
+				/* fall through */
+			case MT_PHY_TYPE_OFDM:
+				i = mt76_get_rate(&dev->mt76, sband, i, cck);
+				break;
+			case MT_PHY_TYPE_HT_GF:
+			case MT_PHY_TYPE_HT:
+				status->encoding = RX_ENC_HT;
+				if (i > 31)
+					return -EINVAL;
+				break;
+			case MT_PHY_TYPE_VHT:
+				status->nss =
+					FIELD_GET(MT_PRXV_NSTS, rxv.v[0]) + 1;
+				status->encoding = RX_ENC_VHT;
+				if (i > 9)
+					return -EINVAL;
+				break;
+			default:
+				return -EINVAL;
+			}
+			status->rate_idx = i;
+
+			switch (FIELD_GET(MT_CRXV_FRAME_MODE, rxv.v[2])) {
+			case IEEE80211_STA_RX_BW_20:
+				break;
+			case IEEE80211_STA_RX_BW_40:
+				status->bw = RATE_INFO_BW_40;
+				break;
+			case IEEE80211_STA_RX_BW_80:
+				status->bw = RATE_INFO_BW_80;
+				break;
+			case IEEE80211_STA_RX_BW_160:
+				status->bw = RATE_INFO_BW_160;
+				break;
+			default:
+				return -EINVAL;
+			}
+
+			status->enc_flags |= RX_ENC_FLAG_STBC_MASK * stbc;
+			if (gi)
+				status->enc_flags |= RX_ENC_FLAG_SHORT_GI;
+		}
+	}
+
+	skb_pull(skb, (u8 *)rxd - skb->data + 2 * remove_pad);
+
+	if (insert_ccmp_hdr) {
+		u8 key_id = FIELD_GET(MT_RXD1_NORMAL_KEY_ID, rxd1);
+
+		mt76_insert_ccmp_hdr(skb, key_id);
+	}
+
+	hdr = mt76_skb_get_hdr(skb);
+	if (!status->wcid || !ieee80211_is_data_qos(hdr->frame_control))
+		return 0;
+
+	status->aggr = unicast &&
+		       !ieee80211_is_qos_nullfunc(hdr->frame_control);
+	status->tid = *ieee80211_get_qos_ctl(hdr) & IEEE80211_QOS_CTL_TID_MASK;
+	status->seqno = IEEE80211_SEQ_TO_SN(le16_to_cpu(hdr->seq_ctrl));
+
+	return 0;
+}
+
+void mt7915_mac_write_txwi(struct mt7915_dev *dev, __le32 *txwi,
+			   struct sk_buff *skb, struct mt76_wcid *wcid,
+			   struct ieee80211_key_conf *key, bool beacon)
+{
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+	bool multicast = is_multicast_ether_addr(hdr->addr1);
+	struct ieee80211_vif *vif = info->control.vif;
+	struct mt76_phy *mphy = &dev->mphy;
+	bool ext_phy = info->hw_queue & MT_TX_HW_QUEUE_EXT_PHY;
+	u8 fc_type, fc_stype, p_fmt, q_idx, omac_idx = 0, wmm_idx = 0;
+	__le16 fc = hdr->frame_control;
+	u16 tx_count = 4, seqno = 0;
+	u32 val;
+
+	if (vif) {
+		struct mt7915_vif *mvif = (struct mt7915_vif *)vif->drv_priv;
+
+		omac_idx = mvif->omac_idx;
+		wmm_idx = mvif->wmm_idx;
+	}
+
+	if (ext_phy && dev->mt76.phy2)
+		mphy = dev->mt76.phy2;
+
+	fc_type = (le16_to_cpu(fc) & IEEE80211_FCTL_FTYPE) >> 2;
+	fc_stype = (le16_to_cpu(fc) & IEEE80211_FCTL_STYPE) >> 4;
+
+	if (ieee80211_is_data(fc) || ieee80211_is_bufferable_mmpdu(fc)) {
+		q_idx = wmm_idx * MT7915_MAX_WMM_SETS +
+			skb_get_queue_mapping(skb);
+		p_fmt = MT_TX_TYPE_CT;
+	} else if (beacon) {
+		q_idx = MT_LMAC_BCN0;
+		p_fmt = MT_TX_TYPE_FW;
+	} else {
+		q_idx = MT_LMAC_ALTX0;
+		p_fmt = MT_TX_TYPE_CT;
+	}
+
+	val = FIELD_PREP(MT_TXD0_TX_BYTES, skb->len + MT_TXD_SIZE) |
+	      FIELD_PREP(MT_TXD0_PKT_FMT, p_fmt) |
+	      FIELD_PREP(MT_TXD0_Q_IDX, q_idx);
+	txwi[0] = cpu_to_le32(val);
+
+	val = MT_TXD1_LONG_FORMAT |
+	      FIELD_PREP(MT_TXD1_WLAN_IDX, wcid->idx) |
+	      FIELD_PREP(MT_TXD1_HDR_FORMAT, MT_HDR_FORMAT_802_11) |
+	      FIELD_PREP(MT_TXD1_HDR_INFO,
+			 ieee80211_get_hdrlen_from_skb(skb) / 2) |
+	      FIELD_PREP(MT_TXD1_TID,
+			 skb->priority & IEEE80211_QOS_CTL_TID_MASK) |
+	      FIELD_PREP(MT_TXD1_OWN_MAC, omac_idx);
+	if (ext_phy && q_idx >= MT_LMAC_ALTX0 && q_idx <= MT_LMAC_BCN0)
+		val |= cpu_to_le32(MT_TXD1_TGID);
+
+	txwi[1] = cpu_to_le32(val);
+
+	val = FIELD_PREP(MT_TXD2_FRAME_TYPE, fc_type) |
+	      FIELD_PREP(MT_TXD2_SUB_TYPE, fc_stype) |
+	      FIELD_PREP(MT_TXD2_MULTICAST, multicast);
+	if (key) {
+		if (multicast && ieee80211_is_robust_mgmt_frame(skb) &&
+		    key->cipher == WLAN_CIPHER_SUITE_AES_CMAC) {
+			val |= MT_TXD2_BIP;
+			txwi[3] = 0;
+		} else {
+			txwi[3] = cpu_to_le32(MT_TXD3_PROTECT_FRAME);
+		}
+	} else {
+		txwi[3] = 0;
+	}
+	txwi[2] = cpu_to_le32(val);
+
+	txwi[4] = 0;
+	txwi[5] = 0;
+	txwi[6] = 0;
+
+	if (!ieee80211_is_data(fc) || multicast) {
+		u16 rate;
+
+		/* hardware won't add HTC for mgmt/ctrl frame */
+		txwi[2] |= cpu_to_le32(MT_TXD2_FIX_RATE | MT_TXD2_HTC_VLD);
+
+		if (mphy->chandef.chan->band == NL80211_BAND_5GHZ)
+			rate = MT7915_5G_RATE_DEFAULT;
+		else
+			rate = MT7915_2G_RATE_DEFAULT;
+
+		val = MT_TXD6_FIXED_BW |
+		      FIELD_PREP(MT_TXD6_TX_RATE, rate);
+		txwi[6] |= cpu_to_le32(val);
+		txwi[3] |= cpu_to_le32(MT_TXD3_BA_DISABLE);
+	}
+
+	if (!ieee80211_is_beacon(fc))
+		txwi[3] |= cpu_to_le32(MT_TXD3_SW_POWER_MGMT);
+	else
+		tx_count = 0x1f;
+
+	if (info->flags & IEEE80211_TX_CTL_NO_ACK)
+		txwi[3] |= cpu_to_le32(MT_TXD3_NO_ACK);
+
+	val = FIELD_PREP(MT_TXD7_TYPE, fc_type) |
+	      FIELD_PREP(MT_TXD7_SUB_TYPE, fc_stype);
+	txwi[7] = cpu_to_le32(val);
+
+	val = FIELD_PREP(MT_TXD3_REM_TX_COUNT, tx_count);
+	if (ieee80211_is_data_qos(fc)) {
+		seqno = IEEE80211_SEQ_TO_SN(le16_to_cpu(hdr->seq_ctrl));
+		val |= MT_TXD3_SN_VALID;
+	} else if (ieee80211_is_back_req(fc)) {
+		struct ieee80211_bar *bar;
+
+		bar = (struct ieee80211_bar *)skb->data;
+		seqno = IEEE80211_SEQ_TO_SN(le16_to_cpu(bar->start_seq_num));
+		val |= MT_TXD3_SN_VALID;
+	}
+	val |= FIELD_PREP(MT_TXD3_SEQ, seqno);
+	txwi[3] |= cpu_to_le32(val);
+}
+
+int mt7915_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr,
+			  enum mt76_txq_id qid, struct mt76_wcid *wcid,
+			  struct ieee80211_sta *sta,
+			  struct mt76_tx_info *tx_info)
+{
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)tx_info->skb->data;
+	struct mt7915_dev *dev = container_of(mdev, struct mt7915_dev, mt76);
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(tx_info->skb);
+	struct ieee80211_key_conf *key = info->control.hw_key;
+	struct ieee80211_vif *vif = info->control.vif;
+	struct mt76_tx_cb *cb = mt76_tx_skb_cb(tx_info->skb);
+	struct mt76_txwi_cache *t;
+	struct mt7915_txp *txp;
+	int id, i, nbuf = tx_info->nbuf - 1;
+	u8 *txwi = (u8 *)txwi_ptr;
+
+	if (!wcid)
+		wcid = &dev->mt76.global_wcid;
+
+	cb->wcid = wcid->idx;
+
+	mt7915_mac_write_txwi(dev, txwi_ptr, tx_info->skb, wcid, key,
+			      false);
+
+	txp = (struct mt7915_txp *)(txwi + MT_TXD_SIZE);
+	for (i = 0; i < nbuf; i++) {
+		txp->buf[i] = cpu_to_le32(tx_info->buf[i + 1].addr);
+		txp->len[i] = cpu_to_le16(tx_info->buf[i + 1].len);
+	}
+	txp->nbuf = nbuf;
+
+	/* pass partial skb header to fw */
+	tx_info->buf[1].len = MT_CT_PARSE_LEN;
+	tx_info->nbuf = MT_CT_DMA_BUF_NUM;
+
+	txp->flags = cpu_to_le16(MT_CT_INFO_APPLY_TXD);
+
+	if (!key)
+		txp->flags |= cpu_to_le16(MT_CT_INFO_NONE_CIPHER_FRAME);
+
+	if (ieee80211_is_mgmt(hdr->frame_control))
+		txp->flags |= cpu_to_le16(MT_CT_INFO_MGMT_FRAME);
+
+	if (vif) {
+		struct mt7915_vif *mvif = (struct mt7915_vif *)vif->drv_priv;
+
+		txp->bss_idx = mvif->idx;
+	}
+
+	t = (struct mt76_txwi_cache *)(txwi + mdev->drv->txwi_size);
+	t->skb = tx_info->skb;
+
+	spin_lock_bh(&dev->token_lock);
+	id = idr_alloc(&dev->token, t, 0, MT7915_TOKEN_SIZE, GFP_ATOMIC);
+	spin_unlock_bh(&dev->token_lock);
+	if (id < 0)
+		return id;
+
+	txp->token = cpu_to_le16(id);
+	txp->rept_wds_wcid = 0xff;
+	tx_info->skb = DMA_DUMMY_DATA;
+
+	return 0;
+}
+
+static inline bool
+mt7915_tx_check_aggr_tid(struct mt7915_sta *msta, u8 tid)
+{
+	bool ret = false;
+
+	spin_lock_bh(&msta->ampdu_lock);
+	if (msta->ampdu_state[tid] == MT7915_AGGR_STOP)
+		ret = true;
+	spin_unlock_bh(&msta->ampdu_lock);
+
+	return ret;
+}
+
+static void
+mt7915_tx_check_aggr(struct ieee80211_sta *sta, struct sk_buff *skb)
+{
+	struct ieee80211_hdr *hdr = (struct ieee80211_hdr *)skb->data;
+	struct mt7915_sta *msta;
+	u16 tid;
+
+	if (!sta->ht_cap.ht_supported)
+		return;
+
+	if (skb_get_queue_mapping(skb) == IEEE80211_AC_VO)
+		return;
+
+	if (unlikely(!ieee80211_is_data_qos(hdr->frame_control)))
+		return;
+
+	if (unlikely(skb->protocol == cpu_to_be16(ETH_P_PAE)))
+		return;
+
+	msta = (struct mt7915_sta *)sta->drv_priv;
+	tid = ieee80211_get_tid(hdr);
+
+	if (mt7915_tx_check_aggr_tid(msta, tid)) {
+		ieee80211_start_tx_ba_session(sta, tid, 0);
+		mt7915_set_aggr_state(msta, tid, MT7915_AGGR_PROGRESS);
+	}
+}
+
+static inline void
+mt7915_tx_status(struct ieee80211_sta *sta, struct ieee80211_hw *hw,
+		 struct ieee80211_tx_info *info, struct sk_buff *skb)
+{
+	struct ieee80211_tx_status status = {
+		.sta = sta,
+		.info = info,
+	};
+
+	if (skb)
+		status.skb = skb;
+
+	if (sta) {
+		struct mt7915_sta *msta;
+
+		msta = (struct mt7915_sta *)sta->drv_priv;
+		status.rate = &msta->stats.tx_rate;
+	}
+
+	/* use status_ext to report HE rate */
+	ieee80211_tx_status_ext(hw, &status);
+}
+
+static void
+mt7915_tx_complete_status(struct mt76_dev *mdev, struct sk_buff *skb,
+			  struct ieee80211_sta *sta, u8 stat)
+{
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+	struct ieee80211_hw *hw;
+
+	hw = mt76_tx_status_get_hw(mdev, skb);
+
+	if (info->flags & IEEE80211_TX_CTL_AMPDU)
+		info->flags |= IEEE80211_TX_STAT_AMPDU;
+	else if (sta)
+		mt7915_tx_check_aggr(sta, skb);
+
+	if (stat)
+		ieee80211_tx_info_clear_status(info);
+
+	if (!(info->flags & IEEE80211_TX_CTL_NO_ACK))
+		info->flags |= IEEE80211_TX_STAT_ACK;
+
+	info->status.tx_time = 0;
+
+	if (info->flags & IEEE80211_TX_CTL_REQ_TX_STATUS) {
+		mt7915_tx_status(sta, hw, info, skb);
+		return;
+	}
+
+	if (sta || !(info->flags & IEEE80211_TX_CTL_NO_ACK))
+		mt7915_tx_status(sta, hw, info, NULL);
+
+	dev_kfree_skb(skb);
+}
+
+void mt7915_txp_skb_unmap(struct mt76_dev *dev,
+			  struct mt76_txwi_cache *t)
+{
+	struct mt7915_txp *txp;
+	int i;
+
+	txp = mt7915_txwi_to_txp(dev, t);
+	for (i = 1; i < txp->nbuf; i++)
+		dma_unmap_single(dev->dev, le32_to_cpu(txp->buf[i]),
+				 le16_to_cpu(txp->len[i]), DMA_TO_DEVICE);
+}
+
+void mt7915_mac_tx_free(struct mt7915_dev *dev, struct sk_buff *skb)
+{
+	struct mt7915_tx_free *free = (struct mt7915_tx_free *)skb->data;
+	struct mt76_dev *mdev = &dev->mt76;
+	struct mt76_txwi_cache *txwi;
+	struct ieee80211_sta *sta = NULL;
+	u8 i, count;
+
+	/*
+	 * TODO: MT_TX_FREE_LATENCY is msdu time from the TXD is queued into PLE,
+	 * to the time ack is received or dropped by hw (air + hw queue time).
+	 * Should avoid accessing WTBL to get Tx airtime, and use it instead.
+	 */
+	count = FIELD_GET(MT_TX_FREE_MSDU_CNT, le16_to_cpu(free->ctrl));
+	for (i = 0; i < count; i++) {
+		u32 msdu, info = le32_to_cpu(free->info[i]);
+		u8 stat;
+
+		/*
+		 * 1'b1: new wcid pair.
+		 * 1'b0: msdu_id with the same 'wcid pair' as above.
+		 */
+		if (info & MT_TX_FREE_PAIR) {
+			struct mt7915_sta *msta;
+			struct mt76_wcid *wcid;
+			u16 idx;
+
+			count++;
+			idx = FIELD_GET(MT_TX_FREE_WLAN_ID, info);
+			wcid = rcu_dereference(dev->mt76.wcid[idx]);
+			sta = wcid_to_sta(wcid);
+			if (!sta)
+				continue;
+
+			msta = container_of(wcid, struct mt7915_sta, wcid);
+			ieee80211_queue_work(mt76_hw(dev), &msta->stats_work);
+			continue;
+		}
+
+		msdu = FIELD_GET(MT_TX_FREE_MSDU_ID, info);
+		stat = FIELD_GET(MT_TX_FREE_STATUS, info);
+
+		spin_lock_bh(&dev->token_lock);
+		txwi = idr_remove(&dev->token, msdu);
+		spin_unlock_bh(&dev->token_lock);
+
+		if (!txwi)
+			continue;
+
+		mt7915_txp_skb_unmap(mdev, txwi);
+		if (txwi->skb) {
+			mt7915_tx_complete_status(mdev, txwi->skb, sta, stat);
+			txwi->skb = NULL;
+		}
+
+		mt76_put_txwi(mdev, txwi);
+	}
+	dev_kfree_skb(skb);
+}
+
+void mt7915_tx_complete_skb(struct mt76_dev *mdev, enum mt76_txq_id qid,
+			    struct mt76_queue_entry *e)
+{
+	struct mt7915_dev *dev;
+
+	if (!e->txwi) {
+		dev_kfree_skb_any(e->skb);
+		return;
+	}
+
+	dev = container_of(mdev, struct mt7915_dev, mt76);
+
+	/* error path */
+	if (e->skb == DMA_DUMMY_DATA) {
+		struct mt76_txwi_cache *t;
+		struct mt7915_txp *txp;
+
+		txp = mt7915_txwi_to_txp(mdev, e->txwi);
+
+		spin_lock_bh(&dev->token_lock);
+		t = idr_remove(&dev->token, le16_to_cpu(txp->token));
+		spin_unlock_bh(&dev->token_lock);
+		e->skb = t ? t->skb : NULL;
+	}
+
+	if (e->skb) {
+		struct mt76_tx_cb *cb = mt76_tx_skb_cb(e->skb);
+		struct mt76_wcid *wcid;
+
+		wcid = rcu_dereference(dev->mt76.wcid[cb->wcid]);
+
+		mt7915_tx_complete_status(mdev, e->skb, wcid_to_sta(wcid), 0);
+	}
+}
+
+void mt7915_mac_cca_stats_reset(struct mt7915_phy *phy)
+{
+	struct mt7915_dev *dev = phy->dev;
+	bool ext_phy = phy != &dev->phy;
+	u32 reg = MT_WF_PHY_RX_CTRL1(ext_phy);
+
+	mt7915_l2_clear(dev, reg, MT_WF_PHY_RX_CTRL1_STSCNT_EN);
+	mt7915_l2_set(dev, reg, BIT(11) | BIT(9));
+}
+
+void mt7915_mac_reset_counters(struct mt7915_phy *phy)
+{
+	struct mt7915_dev *dev = phy->dev;
+	bool ext_phy = phy != &dev->phy;
+	int i;
+
+	for (i = 0; i < 4; i++) {
+		mt76_rr(dev, MT_TX_AGG_CNT(ext_phy, i));
+		mt76_rr(dev, MT_TX_AGG_CNT2(ext_phy, i));
+	}
+
+	if (ext_phy) {
+		dev->mt76.phy2->survey_time = ktime_get_boottime();
+		i = ARRAY_SIZE(dev->mt76.aggr_stats) / 2;
+	} else {
+		dev->mt76.phy.survey_time = ktime_get_boottime();
+		i = 0;
+	}
+	memset(&dev->mt76.aggr_stats[i], 0, sizeof(dev->mt76.aggr_stats) / 2);
+
+	/* reset airtime counters */
+	mt76_rr(dev, MT_MIB_SDR9(ext_phy));
+	mt76_rr(dev, MT_MIB_SDR36(ext_phy));
+	mt76_rr(dev, MT_MIB_SDR37(ext_phy));
+
+	mt76_set(dev, MT_WF_RMAC_MIB_TIME0(ext_phy),
+		 MT_WF_RMAC_MIB_RXTIME_CLR);
+	mt76_set(dev, MT_WF_RMAC_MIB_AIRTIME0(ext_phy),
+		 MT_WF_RMAC_MIB_RXTIME_CLR);
+}
+
+void mt7915_mac_set_timing(struct mt7915_phy *phy)
+{
+	s16 coverage_class = phy->coverage_class;
+	struct mt7915_dev *dev = phy->dev;
+	bool ext_phy = phy != &dev->phy;
+	u32 val, reg_offset;
+	u32 cck = FIELD_PREP(MT_TIMEOUT_VAL_PLCP, 231) |
+		  FIELD_PREP(MT_TIMEOUT_VAL_CCA, 48);
+	u32 ofdm = FIELD_PREP(MT_TIMEOUT_VAL_PLCP, 60) |
+		   FIELD_PREP(MT_TIMEOUT_VAL_CCA, 28);
+	int sifs, offset;
+	bool is_5ghz = phy->mt76->chandef.chan->band == NL80211_BAND_5GHZ;
+
+	if (!test_bit(MT76_STATE_RUNNING, &phy->mt76->state))
+		return;
+
+	if (is_5ghz)
+		sifs = 16;
+	else
+		sifs = 10;
+
+	if (ext_phy) {
+		coverage_class = max_t(s16, dev->phy.coverage_class,
+				       coverage_class);
+	} else {
+		struct mt7915_phy *phy_ext = mt7915_ext_phy(dev);
+
+		if (phy_ext)
+			coverage_class = max_t(s16, phy_ext->coverage_class,
+					       coverage_class);
+	}
+	mt76_set(dev, MT_ARB_SCR(ext_phy),
+		 MT_ARB_SCR_TX_DISABLE | MT_ARB_SCR_RX_DISABLE);
+	udelay(1);
+
+	offset = 3 * coverage_class;
+	reg_offset = FIELD_PREP(MT_TIMEOUT_VAL_PLCP, offset) |
+		     FIELD_PREP(MT_TIMEOUT_VAL_CCA, offset);
+
+	mt76_wr(dev, MT_TMAC_CDTR(ext_phy), cck + reg_offset);
+	mt76_wr(dev, MT_TMAC_ODTR(ext_phy), ofdm + reg_offset);
+	mt76_wr(dev, MT_TMAC_ICR0(ext_phy),
+		FIELD_PREP(MT_IFS_EIFS, 360) |
+		FIELD_PREP(MT_IFS_RIFS, 2) |
+		FIELD_PREP(MT_IFS_SIFS, sifs) |
+		FIELD_PREP(MT_IFS_SLOT, phy->slottime));
+
+	if (phy->slottime < 20 || is_5ghz)
+		val = MT7915_CFEND_RATE_DEFAULT;
+	else
+		val = MT7915_CFEND_RATE_11B;
+
+	mt76_rmw_field(dev, MT_AGG_ACR0(ext_phy), MT_AGG_ACR_CFEND_RATE, val);
+	mt76_clear(dev, MT_ARB_SCR(ext_phy),
+		   MT_ARB_SCR_TX_DISABLE | MT_ARB_SCR_RX_DISABLE);
+}
+
+/*
+ * TODO: mib counters are read-clear and there're many HE functionalities need
+ * such info, hence firmware prepares a task to read the fields out to a shared
+ * structure. User should switch to use event format to avoid race condition.
+ */
+static void
+mt7915_phy_update_channel(struct mt76_phy *mphy, int idx)
+{
+	struct mt7915_dev *dev = container_of(mphy->dev, struct mt7915_dev, mt76);
+	struct mt76_channel_state *state;
+	u64 busy_time, tx_time, rx_time, obss_time;
+
+	busy_time = mt76_get_field(dev, MT_MIB_SDR9(idx),
+				   MT_MIB_SDR9_BUSY_MASK);
+	tx_time = mt76_get_field(dev, MT_MIB_SDR36(idx),
+				 MT_MIB_SDR36_TXTIME_MASK);
+	rx_time = mt76_get_field(dev, MT_MIB_SDR37(idx),
+				 MT_MIB_SDR37_RXTIME_MASK);
+	obss_time = mt76_get_field(dev, MT_WF_RMAC_MIB_AIRTIME14(idx),
+				   MT_MIB_OBSSTIME_MASK);
+
+	/* TODO: state->noise */
+	state = mphy->chan_state;
+	state->cc_busy += busy_time;
+	state->cc_tx += tx_time;
+	state->cc_rx += rx_time + obss_time;
+	state->cc_bss_rx += rx_time;
+}
+
+void mt7915_update_channel(struct mt76_dev *mdev)
+{
+	struct mt7915_dev *dev = container_of(mdev, struct mt7915_dev, mt76);
+
+	mt7915_phy_update_channel(&mdev->phy, 0);
+	if (mdev->phy2)
+		mt7915_phy_update_channel(mdev->phy2, 1);
+
+	/* reset obss airtime */
+	mt76_set(dev, MT_WF_RMAC_MIB_TIME0(0), MT_WF_RMAC_MIB_RXTIME_CLR);
+	if (mdev->phy2)
+		mt76_set(dev, MT_WF_RMAC_MIB_TIME0(1),
+			 MT_WF_RMAC_MIB_RXTIME_CLR);
+}
+
+static bool
+mt7915_wait_reset_state(struct mt7915_dev *dev, u32 state)
+{
+	bool ret;
+
+	ret = wait_event_timeout(dev->reset_wait,
+				 (READ_ONCE(dev->reset_state) & state),
+				 MT7915_RESET_TIMEOUT);
+
+	WARN(!ret, "Timeout waiting for MCU reset state %x\n", state);
+	return ret;
+}
+
+static void
+mt7915_update_vif_beacon(void *priv, u8 *mac, struct ieee80211_vif *vif)
+{
+	struct ieee80211_hw *hw = priv;
+
+	mt7915_mcu_add_beacon(hw, vif, vif->bss_conf.enable_beacon);
+}
+
+static void
+mt7915_update_beacons(struct mt7915_dev *dev)
+{
+	ieee80211_iterate_active_interfaces(dev->mt76.hw,
+		IEEE80211_IFACE_ITER_RESUME_ALL,
+		mt7915_update_vif_beacon, dev->mt76.hw);
+
+	if (!dev->mt76.phy2)
+		return;
+
+	ieee80211_iterate_active_interfaces(dev->mt76.phy2->hw,
+		IEEE80211_IFACE_ITER_RESUME_ALL,
+		mt7915_update_vif_beacon, dev->mt76.phy2->hw);
+}
+
+static void
+mt7915_dma_reset(struct mt7915_dev *dev)
+{
+	int i;
+
+	mt76_clear(dev, MT_WFDMA0_GLO_CFG,
+		   MT_WFDMA0_GLO_CFG_TX_DMA_EN | MT_WFDMA0_GLO_CFG_RX_DMA_EN);
+	mt76_clear(dev, MT_WFDMA1_GLO_CFG,
+		   MT_WFDMA1_GLO_CFG_TX_DMA_EN | MT_WFDMA1_GLO_CFG_RX_DMA_EN);
+	usleep_range(1000, 2000);
+
+	for (i = 0; i < __MT_TXQ_MAX; i++)
+		mt76_queue_tx_cleanup(dev, i, true);
+
+	for (i = 0; i < ARRAY_SIZE(dev->mt76.q_rx); i++)
+		mt76_queue_rx_reset(dev, i);
+
+	/* re-init prefetch settings after reset */
+	mt7915_dma_prefetch(dev);
+
+	mt76_set(dev, MT_WFDMA0_GLO_CFG,
+		 MT_WFDMA0_GLO_CFG_TX_DMA_EN | MT_WFDMA0_GLO_CFG_RX_DMA_EN);
+	mt76_set(dev, MT_WFDMA1_GLO_CFG,
+		 MT_WFDMA1_GLO_CFG_TX_DMA_EN | MT_WFDMA1_GLO_CFG_RX_DMA_EN);
+}
+
+/* system error recovery */
+void mt7915_mac_reset_work(struct work_struct *work)
+{
+	struct mt7915_dev *dev;
+
+	dev = container_of(work, struct mt7915_dev, reset_work);
+
+	if (!(READ_ONCE(dev->reset_state) & MT_MCU_CMD_STOP_DMA))
+		return;
+
+	ieee80211_stop_queues(mt76_hw(dev));
+	if (dev->mt76.phy2)
+		ieee80211_stop_queues(dev->mt76.phy2->hw);
+
+	set_bit(MT76_RESET, &dev->mphy.state);
+	set_bit(MT76_MCU_RESET, &dev->mphy.state);
+	wake_up(&dev->mt76.mcu.wait);
+	cancel_delayed_work_sync(&dev->mt76.mac_work);
+
+	/* lock/unlock all queues to ensure that no tx is pending */
+	mt76_txq_schedule_all(&dev->mphy);
+	if (dev->mt76.phy2)
+		mt76_txq_schedule_all(dev->mt76.phy2);
+
+	tasklet_disable(&dev->mt76.tx_tasklet);
+	napi_disable(&dev->mt76.napi[0]);
+	napi_disable(&dev->mt76.napi[1]);
+	napi_disable(&dev->mt76.napi[2]);
+	napi_disable(&dev->mt76.tx_napi);
+
+	mutex_lock(&dev->mt76.mutex);
+
+	mt76_wr(dev, MT_MCU_INT_EVENT, MT_MCU_INT_EVENT_DMA_STOPPED);
+
+	if (mt7915_wait_reset_state(dev, MT_MCU_CMD_RESET_DONE)) {
+		mt7915_dma_reset(dev);
+
+		mt76_wr(dev, MT_MCU_INT_EVENT, MT_MCU_INT_EVENT_DMA_INIT);
+		mt7915_wait_reset_state(dev, MT_MCU_CMD_RECOVERY_DONE);
+	}
+
+	clear_bit(MT76_MCU_RESET, &dev->mphy.state);
+	clear_bit(MT76_RESET, &dev->mphy.state);
+
+	tasklet_enable(&dev->mt76.tx_tasklet);
+	napi_enable(&dev->mt76.tx_napi);
+	napi_schedule(&dev->mt76.tx_napi);
+
+	napi_enable(&dev->mt76.napi[0]);
+	napi_schedule(&dev->mt76.napi[0]);
+
+	napi_enable(&dev->mt76.napi[1]);
+	napi_schedule(&dev->mt76.napi[1]);
+
+	napi_enable(&dev->mt76.napi[2]);
+	napi_schedule(&dev->mt76.napi[2]);
+
+	ieee80211_wake_queues(mt76_hw(dev));
+	if (dev->mt76.phy2)
+		ieee80211_wake_queues(dev->mt76.phy2->hw);
+
+	mt76_wr(dev, MT_MCU_INT_EVENT, MT_MCU_INT_EVENT_RESET_DONE);
+	mt7915_wait_reset_state(dev, MT_MCU_CMD_NORMAL_STATE);
+
+	mutex_unlock(&dev->mt76.mutex);
+
+	mt7915_update_beacons(dev);
+
+	ieee80211_queue_delayed_work(mt76_hw(dev), &dev->mt76.mac_work,
+				     MT7915_WATCHDOG_TIME);
+}
+
+static void
+mt7915_mac_update_mib_stats(struct mt7915_phy *phy)
+{
+	struct mt7915_dev *dev = phy->dev;
+	struct mib_stats *mib = &phy->mib;
+	bool ext_phy = phy != &dev->phy;
+	int i, aggr0, aggr1;
+
+	memset(mib, 0, sizeof(*mib));
+
+	mib->fcs_err_cnt = mt76_get_field(dev, MT_MIB_SDR3(ext_phy),
+					  MT_MIB_SDR3_FCS_ERR_MASK);
+
+	aggr0 = ext_phy ? ARRAY_SIZE(dev->mt76.aggr_stats) / 2 : 0;
+	for (i = 0, aggr1 = aggr0 + 4; i < 4; i++) {
+		u32 val, val2;
+
+		val = mt76_rr(dev, MT_MIB_MB_SDR1(ext_phy, i));
+
+		val2 = FIELD_GET(MT_MIB_ACK_FAIL_COUNT_MASK, val);
+		if (val2 > mib->ack_fail_cnt)
+			mib->ack_fail_cnt = val2;
+
+		val2 = FIELD_GET(MT_MIB_BA_MISS_COUNT_MASK, val);
+		if (val2 > mib->ba_miss_cnt)
+			mib->ba_miss_cnt = val2;
+
+		val = mt76_rr(dev, MT_MIB_MB_SDR0(ext_phy, i));
+		val2 = FIELD_GET(MT_MIB_RTS_RETRIES_COUNT_MASK, val);
+		if (val2 > mib->rts_retries_cnt) {
+			mib->rts_cnt = FIELD_GET(MT_MIB_RTS_COUNT_MASK, val);
+			mib->rts_retries_cnt = val2;
+		}
+
+		val = mt76_rr(dev, MT_TX_AGG_CNT(ext_phy, i));
+		val2 = mt76_rr(dev, MT_TX_AGG_CNT2(ext_phy, i));
+
+		dev->mt76.aggr_stats[aggr0++] += val & 0xffff;
+		dev->mt76.aggr_stats[aggr0++] += val >> 16;
+		dev->mt76.aggr_stats[aggr1++] += val2 & 0xffff;
+		dev->mt76.aggr_stats[aggr1++] += val2 >> 16;
+	}
+}
+
+void mt7915_mac_sta_stats_work(struct work_struct *work)
+{
+	struct ieee80211_sta *sta;
+	struct ieee80211_vif *vif;
+	struct mt7915_sta_stats *stats;
+	struct mt7915_sta *msta;
+	struct mt7915_dev *dev;
+
+	msta = container_of(work, struct mt7915_sta, stats_work);
+	sta = container_of((void *)msta, struct ieee80211_sta, drv_priv);
+	vif = container_of((void *)msta->vif, struct ieee80211_vif, drv_priv);
+	dev = msta->vif->dev;
+	stats = &msta->stats;
+
+	/* use MT_TX_FREE_RATE to report Tx rate for further devices */
+	if (time_after(jiffies, stats->jiffies + HZ)) {
+		mt7915_mcu_get_rate_info(dev, RATE_CTRL_RU_INFO,
+					 msta->wcid.idx);
+
+		stats->jiffies = jiffies;
+	}
+
+	if (test_and_clear_bit(IEEE80211_RC_SUPP_RATES_CHANGED |
+			       IEEE80211_RC_NSS_CHANGED |
+			       IEEE80211_RC_BW_CHANGED, &stats->changed))
+		mt7915_mcu_add_rate_ctrl(dev, vif, sta);
+
+	if (test_and_clear_bit(IEEE80211_RC_SMPS_CHANGED, &stats->changed))
+		mt7915_mcu_add_smps(dev, vif, sta);
+
+	spin_lock_bh(&dev->sta_poll_lock);
+	if (list_empty(&msta->poll_list))
+		list_add_tail(&msta->poll_list, &dev->sta_poll_list);
+	spin_unlock_bh(&dev->sta_poll_lock);
+}
+
+void mt7915_mac_work(struct work_struct *work)
+{
+	struct mt7915_dev *dev;
+
+	dev = (struct mt7915_dev *)container_of(work, struct mt76_dev,
+						mac_work.work);
+
+	mutex_lock(&dev->mt76.mutex);
+	mt76_update_survey(&dev->mt76);
+	if (++dev->mac_work_count == 5) {
+		struct mt7915_phy *ext_phy = mt7915_ext_phy(dev);
+
+		mt7915_mac_update_mib_stats(&dev->phy);
+		if (ext_phy)
+			mt7915_mac_update_mib_stats(ext_phy);
+
+		dev->mac_work_count = 0;
+	}
+	mutex_unlock(&dev->mt76.mutex);
+
+	ieee80211_queue_delayed_work(mt76_hw(dev), &dev->mt76.mac_work,
+				     MT7915_WATCHDOG_TIME);
+}
+
+static void mt7915_dfs_stop_radar_detector(struct mt7915_phy *phy)
+{
+	struct mt7915_dev *dev = phy->dev;
+
+	if (phy->rdd_state & BIT(0))
+		mt7915_mcu_rdd_cmd(dev, RDD_STOP, 0, MT_RX_SEL0, 0);
+	if (phy->rdd_state & BIT(1))
+		mt7915_mcu_rdd_cmd(dev, RDD_STOP, 1, MT_RX_SEL0, 0);
+}
+
+static int mt7915_dfs_start_rdd(struct mt7915_dev *dev, int chain)
+{
+	int err;
+
+	err = mt7915_mcu_rdd_cmd(dev, RDD_START, chain, MT_RX_SEL0, 0);
+	if (err < 0)
+		return err;
+
+	return mt7915_mcu_rdd_cmd(dev, RDD_DET_MODE, chain, MT_RX_SEL0, 1);
+}
+
+static int mt7915_dfs_start_radar_detector(struct mt7915_phy *phy)
+{
+	struct cfg80211_chan_def *chandef = &phy->mt76->chandef;
+	struct mt7915_dev *dev = phy->dev;
+	bool ext_phy = phy != &dev->phy;
+	int err;
+
+	/* start CAC */
+	err = mt7915_mcu_rdd_cmd(dev, RDD_CAC_START, ext_phy, MT_RX_SEL0, 0);
+	if (err < 0)
+		return err;
+
+	err = mt7915_dfs_start_rdd(dev, ext_phy);
+	if (err < 0)
+		return err;
+
+	phy->rdd_state |= BIT(ext_phy);
+
+	if (chandef->width == NL80211_CHAN_WIDTH_160 ||
+	    chandef->width == NL80211_CHAN_WIDTH_80P80) {
+		err = mt7915_dfs_start_rdd(dev, 1);
+		if (err < 0)
+			return err;
+
+		phy->rdd_state |= BIT(1);
+	}
+
+	return 0;
+}
+
+static int
+mt7915_dfs_init_radar_specs(struct mt7915_phy *phy)
+{
+	const struct mt7915_dfs_radar_spec *radar_specs;
+	struct mt7915_dev *dev = phy->dev;
+	int err, i;
+
+	switch (dev->mt76.region) {
+	case NL80211_DFS_FCC:
+		radar_specs = &fcc_radar_specs;
+		err = mt7915_mcu_set_fcc5_lpn(dev, 8);
+		if (err < 0)
+			return err;
+		break;
+	case NL80211_DFS_ETSI:
+		radar_specs = &etsi_radar_specs;
+		break;
+	case NL80211_DFS_JP:
+		radar_specs = &jp_radar_specs;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(radar_specs->radar_pattern); i++) {
+		err = mt7915_mcu_set_radar_th(dev, i,
+					      &radar_specs->radar_pattern[i]);
+		if (err < 0)
+			return err;
+	}
+
+	return mt7915_mcu_set_pulse_th(dev, &radar_specs->pulse_th);
+}
+
+int mt7915_dfs_init_radar_detector(struct mt7915_phy *phy)
+{
+	struct cfg80211_chan_def *chandef = &phy->mt76->chandef;
+	struct mt7915_dev *dev = phy->dev;
+	bool ext_phy = phy != &dev->phy;
+	int err;
+
+	if (dev->mt76.region == NL80211_DFS_UNSET) {
+		phy->dfs_state = -1;
+		if (phy->rdd_state)
+			goto stop;
+
+		return 0;
+	}
+
+	if (test_bit(MT76_SCANNING, &phy->mt76->state))
+		return 0;
+
+	if (phy->dfs_state == chandef->chan->dfs_state)
+		return 0;
+
+	err = mt7915_dfs_init_radar_specs(phy);
+	if (err < 0) {
+		phy->dfs_state = -1;
+		goto stop;
+	}
+
+	phy->dfs_state = chandef->chan->dfs_state;
+
+	if (chandef->chan->flags & IEEE80211_CHAN_RADAR) {
+		if (chandef->chan->dfs_state != NL80211_DFS_AVAILABLE)
+			return mt7915_dfs_start_radar_detector(phy);
+
+		return mt7915_mcu_rdd_cmd(dev, RDD_CAC_END, ext_phy,
+					  MT_RX_SEL0, 0);
+	}
+
+stop:
+	err = mt7915_mcu_rdd_cmd(dev, RDD_NORMAL_START, ext_phy,
+				 MT_RX_SEL0, 0);
+	if (err < 0)
+		return err;
+
+	mt7915_dfs_stop_radar_detector(phy);
+	return 0;
+}
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.h b/drivers/net/wireless/mediatek/mt76/mt7915/mac.h
new file mode 100644
index 000000000000..7da7551f98e4
--- /dev/null
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.h
@@ -0,0 +1,328 @@
+/* SPDX-License-Identifier: ISC */
+/* Copyright (C) 2020 MediaTek Inc. */
+
+#ifndef __MT7915_MAC_H
+#define __MT7915_MAC_H
+
+#define MT_CT_PARSE_LEN			72
+#define MT_CT_DMA_BUF_NUM		2
+
+#define MT_RXD0_LENGTH			GENMASK(15, 0)
+#define MT_RXD0_PKT_TYPE		GENMASK(31, 27)
+
+#define MT_RXD0_NORMAL_ETH_TYPE_OFS	GENMASK(22, 16)
+#define MT_RXD0_NORMAL_IP_SUM		BIT(23)
+#define MT_RXD0_NORMAL_UDP_TCP_SUM	BIT(24)
+
+enum rx_pkt_type {
+	PKT_TYPE_TXS,
+	PKT_TYPE_TXRXV,
+	PKT_TYPE_NORMAL,
+	PKT_TYPE_RX_DUP_RFB,
+	PKT_TYPE_RX_TMR,
+	PKT_TYPE_RETRIEVE,
+	PKT_TYPE_TXRX_NOTIFY,
+	PKT_TYPE_RX_EVENT,
+};
+
+/* RXD DW1 */
+#define MT_RXD1_NORMAL_WLAN_IDX		GENMASK(9, 0)
+#define MT_RXD1_NORMAL_GROUP_1		BIT(11)
+#define MT_RXD1_NORMAL_GROUP_2		BIT(12)
+#define MT_RXD1_NORMAL_GROUP_3		BIT(13)
+#define MT_RXD1_NORMAL_GROUP_4		BIT(14)
+#define MT_RXD1_NORMAL_GROUP_5		BIT(15)
+#define MT_RXD1_NORMAL_SEC_MODE		GENMASK(20, 16)
+#define MT_RXD1_NORMAL_KEY_ID		GENMASK(22, 21)
+#define MT_RXD1_NORMAL_CM		BIT(23)
+#define MT_RXD1_NORMAL_CLM		BIT(24)
+#define MT_RXD1_NORMAL_ICV_ERR		BIT(25)
+#define MT_RXD1_NORMAL_TKIP_MIC_ERR	BIT(26)
+#define MT_RXD1_NORMAL_FCS_ERR		BIT(27)
+#define MT_RXD1_NORMAL_BAND_IDX		BIT(28)
+#define MT_RXD1_NORMAL_SPP_EN		BIT(29)
+#define MT_RXD1_NORMAL_ADD_OM		BIT(30)
+#define MT_RXD1_NORMAL_SEC_DONE		BIT(31)
+
+/* RXD DW2 */
+#define MT_RXD2_NORMAL_BSSID		GENMASK(5, 0)
+#define MT_RXD2_NORMAL_CO_ANT		BIT(6)
+#define MT_RXD2_NORMAL_BF_CQI		BIT(7)
+#define MT_RXD2_NORMAL_MAC_HDR_LEN	GENMASK(12, 8)
+#define MT_RXD2_NORMAL_HDR_TRANS	BIT(13)
+#define MT_RXD2_NORMAL_HDR_OFFSET	GENMASK(15, 14)
+#define MT_RXD2_NORMAL_TID		GENMASK(19, 16)
+#define MT_RXD2_NORMAL_MU_BAR		BIT(21)
+#define MT_RXD2_NORMAL_SW_BIT		BIT(22)
+#define MT_RXD2_NORMAL_AMSDU_ERR	BIT(23)
+#define MT_RXD2_NORMAL_MAX_LEN_ERROR	BIT(24)
+#define MT_RXD2_NORMAL_HDR_TRANS_ERROR	BIT(25)
+#define MT_RXD2_NORMAL_INT_FRAME	BIT(26)
+#define MT_RXD2_NORMAL_FRAG		BIT(27)
+#define MT_RXD2_NORMAL_NULL_FRAME	BIT(28)
+#define MT_RXD2_NORMAL_NDATA		BIT(29)
+#define MT_RXD2_NORMAL_NON_AMPDU	BIT(30)
+#define MT_RXD2_NORMAL_BF_REPORT	BIT(31)
+
+/* RXD DW3 */
+#define MT_RXD3_NORMAL_RXV_SEQ		GENMASK(7, 0)
+#define MT_RXD3_NORMAL_CH_FREQ		GENMASK(15, 8)
+#define MT_RXD3_NORMAL_ADDR_TYPE	GENMASK(17, 16)
+#define MT_RXD3_NORMAL_U2M		BIT(0)
+#define MT_RXD3_NORMAL_HTC_VLD		BIT(0)
+#define MT_RXD3_NORMAL_TSF_COMPARE_LOSS	BIT(19)
+#define MT_RXD3_NORMAL_BEACON_MC	BIT(20)
+#define MT_RXD3_NORMAL_BEACON_UC	BIT(21)
+#define MT_RXD3_NORMAL_AMSDU		BIT(22)
+#define MT_RXD3_NORMAL_MESH		BIT(23)
+#define MT_RXD3_NORMAL_MHCP		BIT(24)
+#define MT_RXD3_NORMAL_NO_INFO_WB	BIT(25)
+#define MT_RXD3_NORMAL_DISABLE_RX_HDR_TRANS	BIT(26)
+#define MT_RXD3_NORMAL_POWER_SAVE_STAT	BIT(27)
+#define MT_RXD3_NORMAL_MORE		BIT(28)
+#define MT_RXD3_NORMAL_UNWANT		BIT(29)
+#define MT_RXD3_NORMAL_RX_DROP		BIT(30)
+#define MT_RXD3_NORMAL_VLAN2ETH		BIT(31)
+
+/* RXD DW4 */
+#define MT_RXD4_NORMAL_PAYLOAD_FORMAT	GENMASK(1, 0)
+#define MT_RXD4_NORMAL_PATTERN_DROP	BIT(9)
+#define MT_RXD4_NORMAL_CLS		BIT(10)
+#define MT_RXD4_NORMAL_OFLD		GENMASK(12, 11)
+#define MT_RXD4_NORMAL_MAGIC_PKT	BIT(13)
+#define MT_RXD4_NORMAL_WOL		GENMASK(18, 14)
+#define MT_RXD4_NORMAL_CLS_BITMAP	GENMASK(28, 19)
+#define MT_RXD3_NORMAL_PF_MODE		BIT(29)
+#define MT_RXD3_NORMAL_PF_STS		GENMASK(31, 30)
+
+/* P-RXV */
+#define MT_PRXV_TX_RATE			GENMASK(6, 0)
+#define MT_PRXV_NSTS			GENMASK(9, 7)
+#define MT_PRXV_HT_AD_CODE		BIT(11)
+#define MT_PRXV_RCPI3			GENMASK(31, 24)
+#define MT_PRXV_RCPI2			GENMASK(23, 16)
+#define MT_PRXV_RCPI1			GENMASK(15, 8)
+#define MT_PRXV_RCPI0			GENMASK(7, 0)
+
+/* C-RXV */
+#define MT_CRXV_HT_STBC			GENMASK(1, 0)
+#define MT_CRXV_TX_MODE			GENMASK(7, 4)
+#define MT_CRXV_FRAME_MODE		GENMASK(10, 8)
+#define MT_CRXV_HT_SHORT_GI		GENMASK(14, 13)
+
+struct mt7915_rxv {
+	u32 phy;
+
+	/* P-RXV: bit 0~1, C-RXV: bit 2~19 */
+	__le32 v[20];
+};
+
+enum tx_header_format {
+	MT_HDR_FORMAT_802_3,
+	MT_HDR_FORMAT_CMD,
+	MT_HDR_FORMAT_802_11,
+	MT_HDR_FORMAT_802_11_EXT,
+};
+
+enum tx_pkt_type {
+	MT_TX_TYPE_CT,
+	MT_TX_TYPE_SF,
+	MT_TX_TYPE_CMD,
+	MT_TX_TYPE_FW,
+};
+
+enum tx_pkt_queue_idx {
+	MT_LMAC_AC00,
+	MT_LMAC_AC01,
+	MT_LMAC_AC02,
+	MT_LMAC_AC03,
+	MT_LMAC_ALTX0 = 0x10,
+	MT_LMAC_BMC0 = 0x10,
+	MT_LMAC_BCN0 = 0x12,
+};
+
+enum tx_port_idx {
+	MT_TX_PORT_IDX_LMAC,
+	MT_TX_PORT_IDX_MCU
+};
+
+enum tx_mcu_port_q_idx {
+	MT_TX_MCU_PORT_RX_Q0 = 0x20,
+	MT_TX_MCU_PORT_RX_Q1,
+	MT_TX_MCU_PORT_RX_Q2,
+	MT_TX_MCU_PORT_RX_Q3,
+	MT_TX_MCU_PORT_RX_FWDL = 0x3e
+};
+
+#define MT_CT_INFO_APPLY_TXD		BIT(0)
+#define MT_CT_INFO_COPY_HOST_TXD_ALL	BIT(1)
+#define MT_CT_INFO_MGMT_FRAME		BIT(2)
+#define MT_CT_INFO_NONE_CIPHER_FRAME	BIT(3)
+#define MT_CT_INFO_HSR2_TX		BIT(4)
+
+#define MT_TXD_SIZE			(8 * 4)
+
+#define MT_TXD0_Q_IDX			GENMASK(31, 25)
+#define MT_TXD0_PKT_FMT			GENMASK(24, 23)
+#define MT_TXD0_ETH_TYPE_OFFSET		GENMASK(22, 16)
+#define MT_TXD0_TX_BYTES		GENMASK(15, 0)
+
+#define MT_TXD1_LONG_FORMAT		BIT(31)
+#define MT_TXD1_TGID			BIT(30)
+#define MT_TXD1_OWN_MAC			GENMASK(29, 24)
+#define MT_TXD1_AMSDU			BIT(23)
+#define MT_TXD1_TID			GENMASK(22, 20)
+#define MT_TXD1_HDR_PAD			GENMASK(19, 18)
+#define MT_TXD1_HDR_FORMAT		GENMASK(17, 16)
+#define MT_TXD1_HDR_INFO		GENMASK(15, 11)
+#define MT_TXD1_VTA			BIT(10)
+#define MT_TXD1_WLAN_IDX		GENMASK(9, 0)
+
+#define MT_TXD2_FIX_RATE		BIT(31)
+#define MT_TXD2_FIXED_RATE		BIT(30)
+#define MT_TXD2_POWER_OFFSET		GENMASK(29, 24)
+#define MT_TXD2_MAX_TX_TIME		GENMASK(23, 16)
+#define MT_TXD2_FRAG			GENMASK(15, 14)
+#define MT_TXD2_HTC_VLD			BIT(13)
+#define MT_TXD2_DURATION		BIT(12)
+#define MT_TXD2_BIP			BIT(11)
+#define MT_TXD2_MULTICAST		BIT(10)
+#define MT_TXD2_RTS			BIT(9)
+#define MT_TXD2_SOUNDING		BIT(8)
+#define MT_TXD2_NDPA			BIT(7)
+#define MT_TXD2_NDP			BIT(6)
+#define MT_TXD2_FRAME_TYPE		GENMASK(5, 4)
+#define MT_TXD2_SUB_TYPE		GENMASK(3, 0)
+
+#define MT_TXD3_SN_VALID		BIT(31)
+#define MT_TXD3_PN_VALID		BIT(30)
+#define MT_TXD3_SW_POWER_MGMT		BIT(29)
+#define MT_TXD3_BA_DISABLE		BIT(28)
+#define MT_TXD3_SEQ			GENMASK(27, 16)
+#define MT_TXD3_REM_TX_COUNT		GENMASK(15, 11)
+#define MT_TXD3_TX_COUNT		GENMASK(10, 6)
+#define MT_TXD3_TIMING_MEASURE		BIT(5)
+#define MT_TXD3_DAS			BIT(4)
+#define MT_TXD3_EEOSP			BIT(3)
+#define MT_TXD3_EMRD			BIT(2)
+#define MT_TXD3_PROTECT_FRAME		BIT(1)
+#define MT_TXD3_NO_ACK			BIT(0)
+
+#define MT_TXD4_PN_LOW			GENMASK(31, 0)
+
+#define MT_TXD5_PN_HIGH			GENMASK(31, 16)
+#define MT_TXD5_MD			BIT(15)
+#define MT_TXD5_ADD_BA			BIT(14)
+#define MT_TXD5_TX_STATUS_HOST		BIT(10)
+#define MT_TXD5_TX_STATUS_MCU		BIT(9)
+#define MT_TXD5_TX_STATUS_FMT		BIT(8)
+#define MT_TXD5_PID			GENMASK(7, 0)
+
+#define MT_TXD6_TX_IBF			BIT(31)
+#define MT_TXD6_TX_EBF			BIT(30)
+#define MT_TXD6_TX_RATE			GENMASK(29, 16)
+#define MT_TXD6_SGI			GENMASK(15, 14)
+#define MT_TXD6_HELTF			GENMASK(13, 12)
+#define MT_TXD6_LDPC			BIT(11)
+#define MT_TXD6_SPE_ID_IDX		BIT(10)
+#define MT_TXD6_ANT_ID			GENMASK(7, 4)
+#define MT_TXD6_DYN_BW			BIT(3)
+#define MT_TXD6_FIXED_BW		BIT(2)
+#define MT_TXD6_BW			GENMASK(2, 0)
+
+#define MT_TXD7_TXD_LEN			GENMASK(31, 30)
+#define MT_TXD7_UDP_TCP_SUM		BIT(29)
+#define MT_TXD7_IP_SUM			BIT(28)
+
+#define MT_TXD7_TYPE			GENMASK(21, 20)
+#define MT_TXD7_SUB_TYPE		GENMASK(19, 16)
+
+#define MT_TXD7_PSE_FID			GENMASK(27, 16)
+#define MT_TXD7_SPE_IDX			GENMASK(15, 11)
+#define MT_TXD7_HW_AMSDU		BIT(10)
+#define MT_TXD7_TX_TIME			GENMASK(9, 0)
+
+#define MT_TX_RATE_STBC			BIT(13)
+#define MT_TX_RATE_NSS			GENMASK(12, 10)
+#define MT_TX_RATE_MODE			GENMASK(9, 6)
+#define MT_TX_RATE_IDX			GENMASK(5, 0)
+
+#define MT_TXP_MAX_BUF_NUM		6
+
+struct mt7915_txp {
+	__le16 flags;
+	__le16 token;
+	u8 bss_idx;
+	u8 rept_wds_wcid;
+	u8 rsv;
+	u8 nbuf;
+	__le32 buf[MT_TXP_MAX_BUF_NUM];
+	__le16 len[MT_TXP_MAX_BUF_NUM];
+} __packed __aligned(4);
+
+struct mt7915_tx_free {
+	__le16 rx_byte_cnt;
+	__le16 ctrl;
+	u8 txd_cnt;
+	u8 rsv[3];
+	__le32 info[];
+} __packed __aligned(4);
+
+#define MT_TX_FREE_MSDU_CNT		GENMASK(9, 0)
+#define MT_TX_FREE_WLAN_ID		GENMASK(23, 14)
+#define MT_TX_FREE_LATENCY		GENMASK(12, 0)
+/* 0: success, others: dropped */
+#define MT_TX_FREE_STATUS		GENMASK(14, 13)
+#define MT_TX_FREE_MSDU_ID		GENMASK(30, 16)
+#define MT_TX_FREE_PAIR			BIT(31)
+/* will support this field in further revision */
+#define MT_TX_FREE_RATE			GENMASK(13, 0)
+
+struct mt7915_dfs_pulse {
+	u32 max_width;		/* us */
+	int max_pwr;		/* dbm */
+	int min_pwr;		/* dbm */
+	u32 min_stgr_pri;	/* us */
+	u32 max_stgr_pri;	/* us */
+	u32 min_cr_pri;		/* us */
+	u32 max_cr_pri;		/* us */
+};
+
+struct mt7915_dfs_pattern {
+	u8 enb;
+	u8 stgr;
+	u8 min_crpn;
+	u8 max_crpn;
+	u8 min_crpr;
+	u8 min_pw;
+	u32 min_pri;
+	u32 max_pri;
+	u8 max_pw;
+	u8 min_crbn;
+	u8 max_crbn;
+	u8 min_stgpn;
+	u8 max_stgpn;
+	u8 min_stgpr;
+	u8 rsv[2];
+	u32 min_stgpr_diff;
+} __packed;
+
+struct mt7915_dfs_radar_spec {
+	struct mt7915_dfs_pulse pulse_th;
+	struct mt7915_dfs_pattern radar_pattern[16];
+};
+
+static inline struct mt7915_txp *
+mt7915_txwi_to_txp(struct mt76_dev *dev, struct mt76_txwi_cache *t)
+{
+	u8 *txwi;
+
+	if (!t)
+		return NULL;
+
+	txwi = mt76_get_txwi_ptr(dev, t);
+
+	return (struct mt7915_txp *)(txwi + MT_TXD_SIZE);
+}
+
+#endif
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/main.c b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
new file mode 100644
index 000000000000..ae5ed41d337b
--- /dev/null
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
@@ -0,0 +1,741 @@
+// SPDX-License-Identifier: ISC
+/* Copyright (C) 2020 MediaTek Inc. */
+
+#include <linux/etherdevice.h>
+#include <linux/platform_device.h>
+#include <linux/pci.h>
+#include <linux/module.h>
+#include "mt7915.h"
+#include "mcu.h"
+
+static bool mt7915_dev_running(struct mt7915_dev *dev)
+{
+	struct mt7915_phy *phy;
+
+	if (test_bit(MT76_STATE_RUNNING, &dev->mphy.state))
+		return true;
+
+	phy = mt7915_ext_phy(dev);
+
+	return phy && test_bit(MT76_STATE_RUNNING, &phy->mt76->state);
+}
+
+static int mt7915_start(struct ieee80211_hw *hw)
+{
+	struct mt7915_dev *dev = mt7915_hw_dev(hw);
+	struct mt7915_phy *phy = mt7915_hw_phy(hw);
+	bool running;
+
+	mutex_lock(&dev->mt76.mutex);
+
+	running = mt7915_dev_running(dev);
+
+	if (!running) {
+		mt7915_mcu_set_pm(dev, 0, 0);
+		mt7915_mcu_set_mac(dev, 0, true, false);
+		mt7915_mcu_set_scs(dev, 0, true);
+	}
+
+	if (phy != &dev->phy) {
+		mt7915_mcu_set_pm(dev, 1, 0);
+		mt7915_mcu_set_mac(dev, 1, true, false);
+		mt7915_mcu_set_scs(dev, 1, true);
+	}
+
+	mt7915_mcu_set_chan_info(phy, MCU_EXT_CMD_SET_RX_PATH);
+
+	set_bit(MT76_STATE_RUNNING, &phy->mt76->state);
+
+	if (running)
+		goto out;
+
+	mt7915_mac_reset_counters(phy);
+
+	ieee80211_queue_delayed_work(mt76_hw(dev), &dev->mt76.mac_work,
+				     MT7915_WATCHDOG_TIME);
+out:
+	mutex_unlock(&dev->mt76.mutex);
+
+	return 0;
+}
+
+static void mt7915_stop(struct ieee80211_hw *hw)
+{
+	struct mt7915_dev *dev = mt7915_hw_dev(hw);
+	struct mt7915_phy *phy = mt7915_hw_phy(hw);
+
+	mutex_lock(&dev->mt76.mutex);
+
+	clear_bit(MT76_STATE_RUNNING, &phy->mt76->state);
+
+	if (phy != &dev->phy) {
+		mt7915_mcu_set_pm(dev, 1, 1);
+		mt7915_mcu_set_mac(dev, 1, false, false);
+	}
+
+	if (!mt7915_dev_running(dev)) {
+		cancel_delayed_work_sync(&dev->mt76.mac_work);
+
+		mt7915_mcu_set_pm(dev, 0, 1);
+		mt7915_mcu_set_mac(dev, 0, false, false);
+	}
+
+	mutex_unlock(&dev->mt76.mutex);
+}
+
+static int get_omac_idx(enum nl80211_iftype type, u32 mask)
+{
+	int i;
+
+	switch (type) {
+	case NL80211_IFTYPE_MONITOR:
+	case NL80211_IFTYPE_AP:
+		/* ap uses hw bssid 0 and ext bssid */
+		if (~mask & BIT(HW_BSSID_0))
+			return HW_BSSID_0;
+
+		for (i = EXT_BSSID_1; i < EXT_BSSID_END; i++)
+			if (~mask & BIT(i))
+				return i;
+		break;
+	case NL80211_IFTYPE_MESH_POINT:
+	case NL80211_IFTYPE_ADHOC:
+	case NL80211_IFTYPE_STATION:
+		/* station uses hw bssid other than 0 */
+		for (i = HW_BSSID_1; i < HW_BSSID_MAX; i++)
+			if (~mask & BIT(i))
+				return i;
+		break;
+	default:
+		WARN_ON(1);
+		break;
+	}
+
+	return -1;
+}
+
+static int mt7915_add_interface(struct ieee80211_hw *hw,
+				struct ieee80211_vif *vif)
+{
+	struct mt7915_vif *mvif = (struct mt7915_vif *)vif->drv_priv;
+	struct mt7915_dev *dev = mt7915_hw_dev(hw);
+	struct mt7915_phy *phy = mt7915_hw_phy(hw);
+	struct mt76_txq *mtxq;
+	bool ext_phy = phy != &dev->phy;
+	int idx, ret = 0;
+
+	mutex_lock(&dev->mt76.mutex);
+
+	mvif->idx = ffs(~phy->vif_mask) - 1;
+	if (mvif->idx >= MT7915_MAX_INTERFACES) {
+		ret = -ENOSPC;
+		goto out;
+	}
+
+	idx = get_omac_idx(vif->type, phy->omac_mask);
+	if (idx < 0) {
+		ret = -ENOSPC;
+		goto out;
+	}
+	mvif->omac_idx = idx;
+	mvif->dev = dev;
+	mvif->band_idx = ext_phy;
+
+	if (ext_phy)
+		mvif->wmm_idx = ext_phy * (MT7915_MAX_WMM_SETS / 2) +
+				mvif->idx % (MT7915_MAX_WMM_SETS / 2);
+	else
+		mvif->wmm_idx = mvif->idx % MT7915_MAX_WMM_SETS;
+
+	ret = mt7915_mcu_add_dev_info(dev, vif, true);
+	if (ret)
+		goto out;
+
+	phy->vif_mask |= BIT(mvif->idx);
+	phy->omac_mask |= BIT(mvif->omac_idx);
+
+	idx = MT7915_WTBL_RESERVED - mvif->idx;
+
+	INIT_LIST_HEAD(&mvif->sta.poll_list);
+	mvif->sta.wcid.idx = idx;
+	mvif->sta.wcid.ext_phy = mvif->band_idx;
+	mvif->sta.wcid.hw_key_idx = -1;
+	mvif->sta.wcid.tx_info |= MT_WCID_TX_INFO_SET;
+	mt7915_mac_wtbl_update(dev, idx,
+			       MT_WTBL_UPDATE_ADM_COUNT_CLEAR);
+
+	rcu_assign_pointer(dev->mt76.wcid[idx], &mvif->sta.wcid);
+	if (vif->txq) {
+		mtxq = (struct mt76_txq *)vif->txq->drv_priv;
+		mtxq->wcid = &mvif->sta.wcid;
+		mt76_txq_init(&dev->mt76, vif->txq);
+	}
+
+out:
+	mutex_unlock(&dev->mt76.mutex);
+
+	return ret;
+}
+
+static void mt7915_remove_interface(struct ieee80211_hw *hw,
+				    struct ieee80211_vif *vif)
+{
+	struct mt7915_vif *mvif = (struct mt7915_vif *)vif->drv_priv;
+	struct mt7915_sta *msta = &mvif->sta;
+	struct mt7915_dev *dev = mt7915_hw_dev(hw);
+	struct mt7915_phy *phy = mt7915_hw_phy(hw);
+	int idx = msta->wcid.idx;
+
+	/* TODO: disable beacon for the bss */
+
+	mt7915_mcu_add_dev_info(dev, vif, false);
+
+	rcu_assign_pointer(dev->mt76.wcid[idx], NULL);
+	if (vif->txq)
+		mt76_txq_remove(&dev->mt76, vif->txq);
+
+	mutex_lock(&dev->mt76.mutex);
+	phy->vif_mask &= ~BIT(mvif->idx);
+	phy->omac_mask &= ~BIT(mvif->omac_idx);
+	mutex_unlock(&dev->mt76.mutex);
+
+	spin_lock_bh(&dev->sta_poll_lock);
+	if (!list_empty(&msta->poll_list))
+		list_del_init(&msta->poll_list);
+	spin_unlock_bh(&dev->sta_poll_lock);
+}
+
+static void mt7915_init_dfs_state(struct mt7915_phy *phy)
+{
+	struct mt76_phy *mphy = phy->mt76;
+	struct ieee80211_hw *hw = mphy->hw;
+	struct cfg80211_chan_def *chandef = &hw->conf.chandef;
+
+	if (hw->conf.flags & IEEE80211_CONF_OFFCHANNEL)
+		return;
+
+	if (!(chandef->chan->flags & IEEE80211_CHAN_RADAR))
+		return;
+
+	if (mphy->chandef.chan->center_freq == chandef->chan->center_freq &&
+	    mphy->chandef.width == chandef->width)
+		return;
+
+	phy->dfs_state = -1;
+}
+
+static int mt7915_set_channel(struct mt7915_phy *phy)
+{
+	struct mt7915_dev *dev = phy->dev;
+	int ret;
+
+	cancel_delayed_work_sync(&dev->mt76.mac_work);
+
+	mutex_lock(&dev->mt76.mutex);
+	set_bit(MT76_RESET, &phy->mt76->state);
+
+	mt7915_init_dfs_state(phy);
+	mt76_set_channel(phy->mt76);
+
+	ret = mt7915_mcu_set_chan_info(phy, MCU_EXT_CMD_CHANNEL_SWITCH);
+	if (ret)
+		goto out;
+
+	mt7915_mac_set_timing(phy);
+	ret = mt7915_dfs_init_radar_detector(phy);
+	mt7915_mac_cca_stats_reset(phy);
+
+	mt7915_mac_reset_counters(phy);
+	phy->noise = 0;
+
+out:
+	clear_bit(MT76_RESET, &phy->mt76->state);
+	mutex_unlock(&dev->mt76.mutex);
+
+	mt76_txq_schedule_all(phy->mt76);
+	ieee80211_queue_delayed_work(mt76_hw(dev), &dev->mt76.mac_work,
+				     MT7915_WATCHDOG_TIME);
+
+	return ret;
+}
+
+static int mt7915_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
+			  struct ieee80211_vif *vif, struct ieee80211_sta *sta,
+			  struct ieee80211_key_conf *key)
+{
+	struct mt7915_dev *dev = mt7915_hw_dev(hw);
+	struct mt7915_vif *mvif = (struct mt7915_vif *)vif->drv_priv;
+	struct mt7915_sta *msta = sta ? (struct mt7915_sta *)sta->drv_priv :
+				  &mvif->sta;
+	struct mt76_wcid *wcid = &msta->wcid;
+	int idx = key->keyidx;
+
+	/* The hardware does not support per-STA RX GTK, fallback
+	 * to software mode for these.
+	 */
+	if ((vif->type == NL80211_IFTYPE_ADHOC ||
+	     vif->type == NL80211_IFTYPE_MESH_POINT) &&
+	    (key->cipher == WLAN_CIPHER_SUITE_TKIP ||
+	     key->cipher == WLAN_CIPHER_SUITE_CCMP) &&
+	    !(key->flags & IEEE80211_KEY_FLAG_PAIRWISE))
+		return -EOPNOTSUPP;
+
+	/* fall back to sw encryption for unsupported ciphers */
+	switch (key->cipher) {
+	case WLAN_CIPHER_SUITE_AES_CMAC:
+		key->flags |= IEEE80211_KEY_FLAG_GENERATE_MMIE;
+		break;
+	case WLAN_CIPHER_SUITE_WEP40:
+	case WLAN_CIPHER_SUITE_WEP104:
+	case WLAN_CIPHER_SUITE_TKIP:
+	case WLAN_CIPHER_SUITE_CCMP:
+	case WLAN_CIPHER_SUITE_CCMP_256:
+	case WLAN_CIPHER_SUITE_GCMP:
+	case WLAN_CIPHER_SUITE_GCMP_256:
+	case WLAN_CIPHER_SUITE_SMS4:
+		break;
+	default:
+		return -EOPNOTSUPP;
+	}
+
+	if (cmd == SET_KEY) {
+		key->hw_key_idx = wcid->idx;
+		wcid->hw_key_idx = idx;
+	} else if (idx == wcid->hw_key_idx) {
+		wcid->hw_key_idx = -1;
+	}
+	mt76_wcid_key_setup(&dev->mt76, wcid,
+			    cmd == SET_KEY ? key : NULL);
+
+	return mt7915_mcu_add_key(dev, vif, msta, key, cmd);
+}
+
+static int mt7915_config(struct ieee80211_hw *hw, u32 changed)
+{
+	struct mt7915_dev *dev = mt7915_hw_dev(hw);
+	struct mt7915_phy *phy = mt7915_hw_phy(hw);
+	bool band = phy != &dev->phy;
+	int ret;
+
+	if (changed & IEEE80211_CONF_CHANGE_CHANNEL) {
+		ieee80211_stop_queues(hw);
+		ret = mt7915_set_channel(phy);
+		if (ret)
+			return ret;
+		ieee80211_wake_queues(hw);
+	}
+
+	mutex_lock(&dev->mt76.mutex);
+
+	if (changed & IEEE80211_CONF_CHANGE_MONITOR) {
+		if (!(hw->conf.flags & IEEE80211_CONF_MONITOR))
+			phy->rxfilter |= MT_WF_RFCR_DROP_OTHER_UC;
+		else
+			phy->rxfilter &= ~MT_WF_RFCR_DROP_OTHER_UC;
+
+		mt76_wr(dev, MT_WF_RFCR(band), phy->rxfilter);
+	}
+
+	mutex_unlock(&dev->mt76.mutex);
+
+	return 0;
+}
+
+static int
+mt7915_conf_tx(struct ieee80211_hw *hw, struct ieee80211_vif *vif, u16 queue,
+	       const struct ieee80211_tx_queue_params *params)
+{
+	struct mt7915_vif *mvif = (struct mt7915_vif *)vif->drv_priv;
+
+	/* no need to update right away, we'll get BSS_CHANGED_QOS */
+	mvif->wmm[queue].cw_min = params->cw_min;
+	mvif->wmm[queue].cw_max = params->cw_max;
+	mvif->wmm[queue].aifs = params->aifs;
+	mvif->wmm[queue].txop = params->txop;
+
+	return 0;
+}
+
+static void mt7915_configure_filter(struct ieee80211_hw *hw,
+				    unsigned int changed_flags,
+				    unsigned int *total_flags,
+				    u64 multicast)
+{
+	struct mt7915_dev *dev = mt7915_hw_dev(hw);
+	struct mt7915_phy *phy = mt7915_hw_phy(hw);
+	bool band = phy != &dev->phy;
+
+	u32 ctl_flags = MT_WF_RFCR1_DROP_ACK |
+			MT_WF_RFCR1_DROP_BF_POLL |
+			MT_WF_RFCR1_DROP_BA |
+			MT_WF_RFCR1_DROP_CFEND |
+			MT_WF_RFCR1_DROP_CFACK;
+	u32 flags = 0;
+
+#define MT76_FILTER(_flag, _hw) do {					\
+		flags |= *total_flags & FIF_##_flag;			\
+		phy->rxfilter &= ~(_hw);				\
+		phy->rxfilter |= !(flags & FIF_##_flag) * (_hw);	\
+	} while (0)
+
+	phy->rxfilter &= ~(MT_WF_RFCR_DROP_OTHER_BSS |
+			   MT_WF_RFCR_DROP_OTHER_BEACON |
+			   MT_WF_RFCR_DROP_FRAME_REPORT |
+			   MT_WF_RFCR_DROP_PROBEREQ |
+			   MT_WF_RFCR_DROP_MCAST_FILTERED |
+			   MT_WF_RFCR_DROP_MCAST |
+			   MT_WF_RFCR_DROP_BCAST |
+			   MT_WF_RFCR_DROP_DUPLICATE |
+			   MT_WF_RFCR_DROP_A2_BSSID |
+			   MT_WF_RFCR_DROP_UNWANTED_CTL |
+			   MT_WF_RFCR_DROP_STBC_MULTI);
+
+	MT76_FILTER(OTHER_BSS, MT_WF_RFCR_DROP_OTHER_TIM |
+			       MT_WF_RFCR_DROP_A3_MAC |
+			       MT_WF_RFCR_DROP_A3_BSSID);
+
+	MT76_FILTER(FCSFAIL, MT_WF_RFCR_DROP_FCSFAIL);
+
+	MT76_FILTER(CONTROL, MT_WF_RFCR_DROP_CTS |
+			     MT_WF_RFCR_DROP_RTS |
+			     MT_WF_RFCR_DROP_CTL_RSV |
+			     MT_WF_RFCR_DROP_NDPA);
+
+	*total_flags = flags;
+	mt76_wr(dev, MT_WF_RFCR(band), phy->rxfilter);
+
+	if (*total_flags & FIF_CONTROL)
+		mt76_clear(dev, MT_WF_RFCR1(band), ctl_flags);
+	else
+		mt76_set(dev, MT_WF_RFCR1(band), ctl_flags);
+}
+
+static void mt7915_bss_info_changed(struct ieee80211_hw *hw,
+				    struct ieee80211_vif *vif,
+				    struct ieee80211_bss_conf *info,
+				    u32 changed)
+{
+	struct mt7915_phy *phy = mt7915_hw_phy(hw);
+	struct mt7915_dev *dev = mt7915_hw_dev(hw);
+
+	mutex_lock(&dev->mt76.mutex);
+
+	/*
+	 * station mode uses BSSID to map the wlan entry to a peer,
+	 * and then peer references bss_info_rfch to set bandwidth cap.
+	 */
+	if (changed & BSS_CHANGED_BSSID &&
+	    vif->type == NL80211_IFTYPE_STATION) {
+		bool join = !is_zero_ether_addr(info->bssid);
+
+		mt7915_mcu_add_bss_info(phy, vif, join);
+		mt7915_mcu_add_sta(dev, vif, NULL, join);
+	}
+
+	if (changed & BSS_CHANGED_ASSOC)
+		mt7915_mcu_add_bss_info(phy, vif, info->assoc);
+
+	if (changed & BSS_CHANGED_ERP_SLOT) {
+		int slottime = info->use_short_slot ? 9 : 20;
+
+		if (slottime != phy->slottime) {
+			phy->slottime = slottime;
+			mt7915_mac_set_timing(phy);
+		}
+	}
+
+	if (changed & BSS_CHANGED_BEACON_ENABLED) {
+		mt7915_mcu_add_bss_info(phy, vif, info->enable_beacon);
+		mt7915_mcu_add_sta(dev, vif, NULL, info->enable_beacon);
+	}
+
+	/* ensure that enable txcmd_mode after bss_info */
+	if (changed & (BSS_CHANGED_QOS | BSS_CHANGED_BEACON_ENABLED))
+		mt7915_mcu_set_tx(dev, vif);
+
+	if (changed & (BSS_CHANGED_BEACON |
+		       BSS_CHANGED_BEACON_ENABLED))
+		mt7915_mcu_add_beacon(hw, vif, info->enable_beacon);
+
+	mutex_unlock(&dev->mt76.mutex);
+}
+
+static void
+mt7915_channel_switch_beacon(struct ieee80211_hw *hw,
+			     struct ieee80211_vif *vif,
+			     struct cfg80211_chan_def *chandef)
+{
+	struct mt7915_dev *dev = mt7915_hw_dev(hw);
+
+	mutex_lock(&dev->mt76.mutex);
+	mt7915_mcu_add_beacon(hw, vif, true);
+	mutex_unlock(&dev->mt76.mutex);
+}
+
+int mt7915_mac_sta_add(struct mt76_dev *mdev, struct ieee80211_vif *vif,
+		       struct ieee80211_sta *sta)
+{
+	struct mt7915_dev *dev = container_of(mdev, struct mt7915_dev, mt76);
+	struct mt7915_sta *msta = (struct mt7915_sta *)sta->drv_priv;
+	struct mt7915_vif *mvif = (struct mt7915_vif *)vif->drv_priv;
+	int idx;
+
+	idx = mt76_wcid_alloc(dev->mt76.wcid_mask, MT7915_WTBL_STA - 1);
+	if (idx < 0)
+		return -ENOSPC;
+
+	INIT_LIST_HEAD(&msta->poll_list);
+	INIT_WORK(&msta->stats_work, mt7915_mac_sta_stats_work);
+	spin_lock_init(&msta->ampdu_lock);
+	msta->vif = mvif;
+	msta->wcid.sta = 1;
+	msta->wcid.idx = idx;
+	msta->wcid.ext_phy = mvif->band_idx;
+	msta->wcid.tx_info |= MT_WCID_TX_INFO_SET;
+	msta->stats.jiffies = jiffies;
+
+	mt7915_mac_wtbl_update(dev, idx,
+			       MT_WTBL_UPDATE_ADM_COUNT_CLEAR);
+
+	mt7915_mcu_add_sta(dev, vif, sta, true);
+
+	return 0;
+}
+
+void mt7915_mac_sta_remove(struct mt76_dev *mdev, struct ieee80211_vif *vif,
+			   struct ieee80211_sta *sta)
+{
+	struct mt7915_dev *dev = container_of(mdev, struct mt7915_dev, mt76);
+	struct mt7915_sta *msta = (struct mt7915_sta *)sta->drv_priv;
+
+	mt7915_mcu_add_sta(dev, vif, sta, false);
+
+	mt7915_mac_wtbl_update(dev, msta->wcid.idx,
+			       MT_WTBL_UPDATE_ADM_COUNT_CLEAR);
+
+	spin_lock_bh(&dev->sta_poll_lock);
+	if (!list_empty(&msta->poll_list))
+		list_del_init(&msta->poll_list);
+	spin_unlock_bh(&dev->sta_poll_lock);
+}
+
+static void mt7915_tx(struct ieee80211_hw *hw,
+		      struct ieee80211_tx_control *control,
+		      struct sk_buff *skb)
+{
+	struct mt7915_dev *dev = mt7915_hw_dev(hw);
+	struct mt76_phy *mphy = hw->priv;
+	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+	struct ieee80211_vif *vif = info->control.vif;
+	struct mt76_wcid *wcid = &dev->mt76.global_wcid;
+
+	if (control->sta) {
+		struct mt7915_sta *sta;
+
+		sta = (struct mt7915_sta *)control->sta->drv_priv;
+		wcid = &sta->wcid;
+	}
+
+	if (vif && !control->sta) {
+		struct mt7915_vif *mvif;
+
+		mvif = (struct mt7915_vif *)vif->drv_priv;
+		wcid = &mvif->sta.wcid;
+	}
+
+	mt76_tx(mphy, control->sta, wcid, skb);
+}
+
+static int mt7915_set_rts_threshold(struct ieee80211_hw *hw, u32 val)
+{
+	struct mt7915_dev *dev = mt7915_hw_dev(hw);
+	struct mt7915_phy *phy = mt7915_hw_phy(hw);
+
+	mutex_lock(&dev->mt76.mutex);
+	mt7915_mcu_set_rts_thresh(phy, val);
+	mutex_unlock(&dev->mt76.mutex);
+
+	return 0;
+}
+
+static int
+mt7915_ampdu_action(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+		    struct ieee80211_ampdu_params *params)
+{
+	enum ieee80211_ampdu_mlme_action action = params->action;
+	struct mt7915_dev *dev = mt7915_hw_dev(hw);
+	struct ieee80211_sta *sta = params->sta;
+	struct ieee80211_txq *txq = sta->txq[params->tid];
+	struct mt7915_sta *msta = (struct mt7915_sta *)sta->drv_priv;
+	u16 tid = params->tid;
+	u16 ssn = params->ssn;
+	struct mt76_txq *mtxq;
+	int ret = 0;
+
+	if (!txq)
+		return -EINVAL;
+
+	mtxq = (struct mt76_txq *)txq->drv_priv;
+
+	mutex_lock(&dev->mt76.mutex);
+	switch (action) {
+	case IEEE80211_AMPDU_RX_START:
+		mt76_rx_aggr_start(&dev->mt76, &msta->wcid, tid, ssn,
+				   params->buf_size);
+		mt7915_mcu_add_rx_ba(dev, params, true);
+		break;
+	case IEEE80211_AMPDU_RX_STOP:
+		mt76_rx_aggr_stop(&dev->mt76, &msta->wcid, tid);
+		mt7915_mcu_add_rx_ba(dev, params, false);
+		break;
+	case IEEE80211_AMPDU_TX_OPERATIONAL:
+		mtxq->aggr = true;
+		mtxq->send_bar = false;
+		mt7915_set_aggr_state(msta, tid, MT7915_AGGR_OPERATIONAL);
+		mt7915_mcu_add_tx_ba(dev, params, true);
+		break;
+	case IEEE80211_AMPDU_TX_STOP_FLUSH:
+	case IEEE80211_AMPDU_TX_STOP_FLUSH_CONT:
+		mtxq->aggr = false;
+		mt7915_set_aggr_state(msta, tid, MT7915_AGGR_STOP);
+		mt7915_mcu_add_tx_ba(dev, params, false);
+		break;
+	case IEEE80211_AMPDU_TX_START:
+		mtxq->agg_ssn = IEEE80211_SN_TO_SEQ(ssn);
+		mt7915_set_aggr_state(msta, tid, MT7915_AGGR_START);
+		ret = IEEE80211_AMPDU_TX_START_IMMEDIATE;
+		break;
+	case IEEE80211_AMPDU_TX_STOP_CONT:
+		mtxq->aggr = false;
+		mt7915_set_aggr_state(msta, tid, MT7915_AGGR_STOP);
+		mt7915_mcu_add_tx_ba(dev, params, false);
+		ieee80211_stop_tx_ba_cb_irqsafe(vif, sta->addr, tid);
+		break;
+	}
+	mutex_unlock(&dev->mt76.mutex);
+
+	return ret;
+}
+
+static int
+mt7915_sta_add(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+	       struct ieee80211_sta *sta)
+{
+	return mt76_sta_state(hw, vif, sta, IEEE80211_STA_NOTEXIST,
+			      IEEE80211_STA_NONE);
+}
+
+static int
+mt7915_sta_remove(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+		  struct ieee80211_sta *sta)
+{
+	return mt76_sta_state(hw, vif, sta, IEEE80211_STA_NONE,
+			      IEEE80211_STA_NOTEXIST);
+}
+
+static int
+mt7915_get_stats(struct ieee80211_hw *hw,
+		 struct ieee80211_low_level_stats *stats)
+{
+	struct mt7915_phy *phy = mt7915_hw_phy(hw);
+	struct mib_stats *mib = &phy->mib;
+
+	stats->dot11RTSSuccessCount = mib->rts_cnt;
+	stats->dot11RTSFailureCount = mib->rts_retries_cnt;
+	stats->dot11FCSErrorCount = mib->fcs_err_cnt;
+	stats->dot11ACKFailureCount = mib->ack_fail_cnt;
+
+	return 0;
+}
+
+static void
+mt7915_set_coverage_class(struct ieee80211_hw *hw, s16 coverage_class)
+{
+	struct mt7915_phy *phy = mt7915_hw_phy(hw);
+
+	phy->coverage_class = max_t(s16, coverage_class, 0);
+	mt7915_mac_set_timing(phy);
+}
+
+static int
+mt7915_set_antenna(struct ieee80211_hw *hw, u32 tx_ant, u32 rx_ant)
+{
+	struct mt7915_dev *dev = mt7915_hw_dev(hw);
+	struct mt7915_phy *phy = mt7915_hw_phy(hw);
+	int max_nss = hweight8(hw->wiphy->available_antennas_tx);
+	bool ext_phy = phy != &dev->phy;
+
+	if (!tx_ant || tx_ant != rx_ant || ffs(tx_ant) > max_nss)
+		return -EINVAL;
+
+	if ((BIT(hweight8(tx_ant)) - 1) != tx_ant)
+		tx_ant = BIT(ffs(tx_ant) - 1) - 1;
+
+	mutex_lock(&dev->mt76.mutex);
+
+	phy->mt76->antenna_mask = tx_ant;
+
+	if (ext_phy) {
+		if (dev->chainmask == 0xf)
+			tx_ant <<= 2;
+		else
+			tx_ant <<= 1;
+	}
+	phy->chainmask = tx_ant;
+
+	mt76_set_stream_caps(&dev->mt76, true);
+
+	mutex_unlock(&dev->mt76.mutex);
+
+	return 0;
+}
+
+static void
+mt7915_sta_rc_update(struct ieee80211_hw *hw,
+		     struct ieee80211_vif *vif,
+		     struct ieee80211_sta *sta,
+		     u32 changed)
+{
+	struct mt7915_sta *msta = (struct mt7915_sta *)sta->drv_priv;
+
+	rcu_read_lock();
+	sta = ieee80211_find_sta(vif, sta->addr);
+	if (!sta) {
+		rcu_read_unlock();
+		return;
+	}
+	rcu_read_lock();
+
+	set_bit(changed, &msta->stats.changed);
+	ieee80211_queue_work(hw, &msta->stats_work);
+}
+
+const struct ieee80211_ops mt7915_ops = {
+	.tx = mt7915_tx,
+	.start = mt7915_start,
+	.stop = mt7915_stop,
+	.add_interface = mt7915_add_interface,
+	.remove_interface = mt7915_remove_interface,
+	.config = mt7915_config,
+	.conf_tx = mt7915_conf_tx,
+	.configure_filter = mt7915_configure_filter,
+	.bss_info_changed = mt7915_bss_info_changed,
+	.sta_add = mt7915_sta_add,
+	.sta_remove = mt7915_sta_remove,
+	.sta_pre_rcu_remove = mt76_sta_pre_rcu_remove,
+	.sta_rc_update = mt7915_sta_rc_update,
+	.set_key = mt7915_set_key,
+	.ampdu_action = mt7915_ampdu_action,
+	.set_rts_threshold = mt7915_set_rts_threshold,
+	.wake_tx_queue = mt76_wake_tx_queue,
+	.sw_scan_start = mt76_sw_scan,
+	.sw_scan_complete = mt76_sw_scan_complete,
+	.release_buffered_frames = mt76_release_buffered_frames,
+	.get_txpower = mt76_get_txpower,
+	.channel_switch_beacon = mt7915_channel_switch_beacon,
+	.get_stats = mt7915_get_stats,
+	.get_survey = mt76_get_survey,
+	.get_antenna = mt76_get_antenna,
+	.set_antenna = mt7915_set_antenna,
+	.set_coverage_class = mt7915_set_coverage_class,
+};
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
new file mode 100644
index 000000000000..dad2c300d0f2
--- /dev/null
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -0,0 +1,2313 @@
+// SPDX-License-Identifier: ISC
+/* Copyright (C) 2020 MediaTek Inc. */
+
+#include <linux/firmware.h>
+#include <linux/fs.h>
+#include "mt7915.h"
+#include "mcu.h"
+#include "mac.h"
+#include "eeprom.h"
+
+struct mt7915_patch_hdr {
+	char build_date[16];
+	char platform[4];
+	__be32 hw_sw_ver;
+	__be32 patch_ver;
+	__be16 checksum;
+	u16 reserved;
+	struct {
+		__be32 patch_ver;
+		__be32 subsys;
+		__be32 feature;
+		__be32 n_region;
+		__be32 crc;
+		u32 reserved[11];
+	} desc;
+} __packed;
+
+struct mt7915_patch_sec {
+	__be32 type;
+	__be32 offs;
+	__be32 size;
+	union {
+		__be32 spec[13];
+		struct {
+			__be32 addr;
+			__be32 len;
+			__be32 sec_key_idx;
+			__be32 align_len;
+			u32 reserved[9];
+		} info;
+	};
+} __packed;
+
+struct mt7915_fw_trailer {
+	u8 chip_id;
+	u8 eco_code;
+	u8 n_region;
+	u8 format_ver;
+	u8 format_flag;
+	u8 reserved[2];
+	char fw_ver[10];
+	char build_date[15];
+	u32 crc;
+} __packed;
+
+struct mt7915_fw_region {
+	__le32 decomp_crc;
+	__le32 decomp_len;
+	__le32 decomp_blk_sz;
+	u8 reserved[4];
+	__le32 addr;
+	__le32 len;
+	u8 feature_set;
+	u8 reserved1[15];
+} __packed;
+
+#define MCU_PATCH_ADDRESS		0x200000
+
+#define FW_FEATURE_SET_ENCRYPT		BIT(0)
+#define FW_FEATURE_SET_KEY_IDX		GENMASK(2, 1)
+#define FW_FEATURE_OVERRIDE_ADDR	BIT(5)
+
+#define DL_MODE_ENCRYPT			BIT(0)
+#define DL_MODE_KEY_IDX			GENMASK(2, 1)
+#define DL_MODE_RESET_SEC_IV		BIT(3)
+#define DL_MODE_WORKING_PDA_CR4		BIT(4)
+#define DL_MODE_NEED_RSP		BIT(31)
+
+#define FW_START_OVERRIDE		BIT(0)
+#define FW_START_WORKING_PDA_CR4	BIT(2)
+
+#define PATCH_SEC_TYPE_MASK		GENMASK(15, 0)
+#define PATCH_SEC_TYPE_INFO		0x2
+
+#define to_wcid_lo(id)			FIELD_GET(GENMASK(7, 0), (u16)id)
+#define to_wcid_hi(id)			FIELD_GET(GENMASK(9, 8), (u16)id)
+
+static enum mt7915_cipher_type
+mt7915_mcu_get_cipher(int cipher)
+{
+	switch (cipher) {
+	case WLAN_CIPHER_SUITE_WEP40:
+		return MT_CIPHER_WEP40;
+	case WLAN_CIPHER_SUITE_WEP104:
+		return MT_CIPHER_WEP104;
+	case WLAN_CIPHER_SUITE_TKIP:
+		return MT_CIPHER_TKIP;
+	case WLAN_CIPHER_SUITE_AES_CMAC:
+		return MT_CIPHER_BIP_CMAC_128;
+	case WLAN_CIPHER_SUITE_CCMP:
+		return MT_CIPHER_AES_CCMP;
+	case WLAN_CIPHER_SUITE_CCMP_256:
+		return MT_CIPHER_CCMP_256;
+	case WLAN_CIPHER_SUITE_GCMP:
+		return MT_CIPHER_GCMP;
+	case WLAN_CIPHER_SUITE_GCMP_256:
+		return MT_CIPHER_GCMP_256;
+	case WLAN_CIPHER_SUITE_SMS4:
+		return MT_CIPHER_WAPI;
+	default:
+		return MT_CIPHER_NONE;
+	}
+}
+
+static u8 mt7915_mcu_chan_bw(struct cfg80211_chan_def *chandef)
+{
+	static const u8 width_to_bw[] = {
+		[NL80211_CHAN_WIDTH_40] = CMD_CBW_40MHZ,
+		[NL80211_CHAN_WIDTH_80] = CMD_CBW_80MHZ,
+		[NL80211_CHAN_WIDTH_80P80] = CMD_CBW_8080MHZ,
+		[NL80211_CHAN_WIDTH_160] = CMD_CBW_160MHZ,
+		[NL80211_CHAN_WIDTH_5] = CMD_CBW_5MHZ,
+		[NL80211_CHAN_WIDTH_10] = CMD_CBW_10MHZ,
+		[NL80211_CHAN_WIDTH_20] = CMD_CBW_20MHZ,
+		[NL80211_CHAN_WIDTH_20_NOHT] = CMD_CBW_20MHZ,
+	};
+
+	if (chandef->width >= ARRAY_SIZE(width_to_bw))
+		return 0;
+
+	return width_to_bw[chandef->width];
+}
+
+static u8
+mt7915_get_phy_mode(struct mt7915_dev *dev, struct ieee80211_vif *vif,
+		    enum nl80211_band band, struct ieee80211_sta *sta)
+{
+	struct ieee80211_sta_ht_cap *ht_cap;
+	struct ieee80211_sta_vht_cap *vht_cap;
+	u8 mode = 0;
+
+	if (sta) {
+		ht_cap = &sta->ht_cap;
+		vht_cap = &sta->vht_cap;
+	} else {
+		struct ieee80211_supported_band *sband;
+
+		if (band == NL80211_BAND_2GHZ)
+			sband = &dev->mphy.sband_2g.sband;
+		else
+			sband = &dev->mphy.sband_5g.sband;
+
+		ht_cap = &sband->ht_cap;
+		vht_cap = &sband->vht_cap;
+	}
+
+	if (band == NL80211_BAND_2GHZ) {
+		mode |= PHY_MODE_B | PHY_MODE_G;
+
+		if (ht_cap->ht_supported)
+			mode |= PHY_MODE_GN;
+	} else if (band == NL80211_BAND_5GHZ) {
+		mode |= PHY_MODE_A;
+
+		if (ht_cap->ht_supported)
+			mode |= PHY_MODE_AN;
+
+		if (vht_cap->vht_supported)
+			mode |= PHY_MODE_AC;
+	}
+
+	return mode;
+}
+
+static int __mt7915_mcu_msg_send(struct mt7915_dev *dev, struct sk_buff *skb,
+				 int cmd, int *wait_seq)
+{
+	struct mt7915_mcu_txd *mcu_txd;
+	u8 seq, pkt_fmt, qidx;
+	enum mt7915_txq_id txq;
+	__le32 *txd;
+	u32 val;
+
+	seq = ++dev->mt76.mcu.msg_seq & 0xf;
+	if (!seq)
+		seq = ++dev->mt76.mcu.msg_seq & 0xf;
+
+	if (cmd == -MCU_CMD_FW_SCATTER) {
+		txq = MT_TXQ_FWDL;
+		goto exit;
+	}
+
+	mcu_txd = (struct mt7915_mcu_txd *)skb_push(skb, sizeof(*mcu_txd));
+
+	if (test_bit(MT76_STATE_MCU_RUNNING, &dev->mphy.state)) {
+		txq = MT_TXQ_MCU_WA;
+		qidx = MT_TX_MCU_PORT_RX_Q0;
+		pkt_fmt = MT_TX_TYPE_CMD;
+	} else {
+		txq = MT_TXQ_MCU;
+		qidx = MT_TX_MCU_PORT_RX_Q0;
+		pkt_fmt = MT_TX_TYPE_CMD;
+	}
+
+	txd = mcu_txd->txd;
+
+	val = FIELD_PREP(MT_TXD0_TX_BYTES, skb->len) |
+	      FIELD_PREP(MT_TXD0_PKT_FMT, pkt_fmt) |
+	      FIELD_PREP(MT_TXD0_Q_IDX, qidx);
+	txd[0] = cpu_to_le32(val);
+
+	val = MT_TXD1_LONG_FORMAT |
+	      FIELD_PREP(MT_TXD1_HDR_FORMAT, MT_HDR_FORMAT_CMD);
+	txd[1] = cpu_to_le32(val);
+
+	mcu_txd->len = cpu_to_le16(skb->len - sizeof(mcu_txd->txd));
+	mcu_txd->pq_id = cpu_to_le16(MCU_PQ_ID(MT_TX_PORT_IDX_MCU, qidx));
+	mcu_txd->pkt_type = MCU_PKT_ID;
+	mcu_txd->seq = seq;
+
+	if (cmd < 0) {
+		mcu_txd->set_query = MCU_Q_NA;
+		mcu_txd->cid = -cmd;
+	} else {
+		mcu_txd->cid = MCU_CMD_EXT_CID;
+		mcu_txd->ext_cid = cmd;
+		mcu_txd->ext_cid_ack = 1;
+
+		/* do not use Q_SET for efuse */
+		if (cmd == MCU_EXT_CMD_EFUSE_ACCESS)
+			mcu_txd->set_query = MCU_Q_QUERY;
+		else
+			mcu_txd->set_query = MCU_Q_SET;
+	}
+
+	mcu_txd->s2d_index = MCU_S2D_H2N;
+	BUILD_BUG_ON(cmd == MCU_EXT_CMD_EFUSE_ACCESS &&
+		     mcu_txd->set_query != MCU_Q_QUERY);
+
+exit:
+	if (wait_seq)
+		*wait_seq = seq;
+
+	return mt76_tx_queue_skb_raw(dev, txq, skb, 0);
+}
+
+static int
+mt7915_mcu_parse_eeprom(struct mt7915_dev *dev, struct sk_buff *skb)
+{
+	struct mt7915_mcu_eeprom_info *res;
+	u8 *buf;
+
+	if (!skb)
+		return -EINVAL;
+
+	skb_pull(skb, sizeof(struct mt7915_mcu_rxd));
+
+	res = (struct mt7915_mcu_eeprom_info *)skb->data;
+	buf = dev->mt76.eeprom.data + le32_to_cpu(res->addr);
+	memcpy(buf, res->data, 16);
+
+	return 0;
+}
+
+static int
+mt7915_mcu_parse_response(struct mt7915_dev *dev, int cmd,
+			  struct sk_buff *skb, int seq)
+{
+	struct mt7915_mcu_rxd *rxd = (struct mt7915_mcu_rxd *)skb->data;
+	int ret = 0;
+
+	if (seq != rxd->seq)
+		return -EAGAIN;
+
+	switch (cmd) {
+	case -MCU_CMD_PATCH_SEM_CONTROL:
+		skb_pull(skb, sizeof(*rxd) - 4);
+		ret = *skb->data;
+		break;
+	case MCU_EXT_CMD_THERMAL_CTRL:
+		skb_pull(skb, sizeof(*rxd) + 4);
+		ret = le32_to_cpu(*(__le32 *)skb->data);
+		break;
+	case MCU_EXT_CMD_EFUSE_ACCESS:
+		ret = mt7915_mcu_parse_eeprom(dev, skb);
+		break;
+	default:
+		break;
+	}
+	dev_kfree_skb(skb);
+
+	return ret;
+}
+
+static int
+mt7915_mcu_wait_response(struct mt7915_dev *dev, int cmd, int seq)
+{
+	unsigned long expires = jiffies + 20 * HZ;
+	struct sk_buff *skb;
+	int ret = 0;
+
+	while (true) {
+		skb = mt76_mcu_get_response(&dev->mt76, expires);
+		if (!skb) {
+			dev_err(dev->mt76.dev, "Message %d (seq %d) timeout\n",
+				cmd, seq);
+			return -ETIMEDOUT;
+		}
+
+		ret = mt7915_mcu_parse_response(dev, cmd, skb, seq);
+		if (ret != -EAGAIN)
+			break;
+	}
+
+	return ret;
+}
+
+static int
+mt7915_mcu_send_message(struct mt76_dev *mdev, struct sk_buff *skb,
+			int cmd, bool wait_resp)
+{
+	struct mt7915_dev *dev = container_of(mdev, struct mt7915_dev, mt76);
+	int ret, seq;
+
+	mutex_lock(&mdev->mcu.mutex);
+
+	ret = __mt7915_mcu_msg_send(dev, skb, cmd, &seq);
+	if (ret)
+		goto out;
+
+	if (wait_resp)
+		ret = mt7915_mcu_wait_response(dev, cmd, seq);
+
+out:
+	mutex_unlock(&mdev->mcu.mutex);
+
+	return ret;
+}
+
+static int
+mt7915_mcu_msg_send(struct mt76_dev *mdev, int cmd, const void *data,
+		    int len, bool wait_resp)
+{
+	struct sk_buff *skb;
+
+	skb = mt76_mcu_msg_alloc(mdev, data, len);
+	if (!skb)
+		return -ENOMEM;
+
+	return __mt76_mcu_skb_send_msg(mdev, skb, cmd, wait_resp);
+}
+
+static void
+mt7915_mcu_csa_finish(void *priv, u8 *mac, struct ieee80211_vif *vif)
+{
+	if (vif->csa_active)
+		ieee80211_csa_finish(vif);
+}
+
+static void
+mt7915_mcu_rx_radar_detected(struct mt7915_dev *dev, struct sk_buff *skb)
+{
+	struct mt76_phy *mphy = &dev->mt76.phy;
+	struct mt7915_mcu_rdd_report *r;
+
+	r = (struct mt7915_mcu_rdd_report *)skb->data;
+
+	if (r->idx && dev->mt76.phy2)
+		mphy = dev->mt76.phy2;
+
+	ieee80211_radar_detected(mphy->hw);
+	dev->hw_pattern++;
+}
+
+static void
+mt7915_mcu_tx_rate_cal(struct mt76_phy *mphy, struct mt7915_mcu_ra_info *ra,
+		       struct rate_info *rate, u16 r)
+{
+	struct ieee80211_supported_band *sband;
+	u16 ru_idx = le16_to_cpu(ra->ru_idx);
+	u16 flags = 0;
+
+	rate->mcs = FIELD_GET(MT_RA_RATE_MCS, r);
+	rate->nss = FIELD_GET(MT_RA_RATE_NSS, r) + 1;
+
+	switch (FIELD_GET(MT_RA_RATE_TX_MODE, r)) {
+	case MT_PHY_TYPE_CCK:
+	case MT_PHY_TYPE_OFDM:
+		if (mphy->chandef.chan->band == NL80211_BAND_5GHZ)
+			sband = &mphy->sband_5g.sband;
+		else
+			sband = &mphy->sband_2g.sband;
+
+		rate->legacy = sband->bitrates[rate->mcs].bitrate;
+		break;
+	case MT_PHY_TYPE_HT:
+	case MT_PHY_TYPE_HT_GF:
+		rate->mcs += (rate->nss - 1) * 8;
+		flags |= RATE_INFO_FLAGS_MCS;
+
+		if (ra->gi)
+			flags |= RATE_INFO_FLAGS_SHORT_GI;
+		break;
+	case MT_PHY_TYPE_VHT:
+		flags |= RATE_INFO_FLAGS_VHT_MCS;
+
+		if (ra->gi)
+			flags |= RATE_INFO_FLAGS_SHORT_GI;
+		break;
+	case MT_PHY_TYPE_HE_SU:
+	case MT_PHY_TYPE_HE_EXT_SU:
+	case MT_PHY_TYPE_HE_TB:
+	case MT_PHY_TYPE_HE_MU:
+		rate->he_gi = ra->gi;
+		rate->he_dcm = FIELD_GET(MT_RA_RATE_DCM_EN, r);
+
+		flags |= RATE_INFO_FLAGS_HE_MCS;
+		break;
+	default:
+		break;
+	}
+	rate->flags = flags;
+
+	if (ru_idx) {
+		switch (ru_idx) {
+		case 1 ... 2:
+			rate->he_ru_alloc = NL80211_RATE_INFO_HE_RU_ALLOC_996;
+			break;
+		case 3 ... 6:
+			rate->he_ru_alloc = NL80211_RATE_INFO_HE_RU_ALLOC_484;
+			break;
+		case 7 ... 14:
+			rate->he_ru_alloc = NL80211_RATE_INFO_HE_RU_ALLOC_242;
+			break;
+		default:
+			rate->he_ru_alloc = NL80211_RATE_INFO_HE_RU_ALLOC_106;
+			break;
+		}
+		rate->bw = RATE_INFO_BW_HE_RU;
+	} else {
+		u8 bw = mt7915_mcu_chan_bw(&mphy->chandef) -
+			FIELD_GET(MT_RA_RATE_BW, r);
+
+		switch (bw) {
+		case IEEE80211_STA_RX_BW_160:
+			rate->bw = RATE_INFO_BW_160;
+			break;
+		case IEEE80211_STA_RX_BW_80:
+			rate->bw = RATE_INFO_BW_80;
+			break;
+		case IEEE80211_STA_RX_BW_40:
+			rate->bw = RATE_INFO_BW_40;
+			break;
+		default:
+			rate->bw = RATE_INFO_BW_20;
+			break;
+		}
+	}
+}
+
+static void
+mt7915_mcu_tx_rate_report(struct mt7915_dev *dev, struct sk_buff *skb)
+{
+	struct mt7915_mcu_ra_info *ra = (struct mt7915_mcu_ra_info *)skb->data;
+	u16 wcidx = le16_to_cpu(ra->wlan_idx);
+	struct mt76_wcid *wcid = rcu_dereference(dev->mt76.wcid[wcidx]);
+	struct mt7915_sta *msta = container_of(wcid, struct mt7915_sta, wcid);
+	struct mt7915_sta_stats *stats = &msta->stats;
+	struct mt76_phy *mphy = &dev->mphy;
+	struct rate_info rate = {}, prob_rate = {};
+	u16 attempts = le16_to_cpu(ra->attempts);
+	u16 curr = le16_to_cpu(ra->curr_rate);
+	u16 probe = le16_to_cpu(ra->prob_up_rate);
+
+	if (msta->wcid.ext_phy && dev->mt76.phy2)
+		mphy = dev->mt76.phy2;
+
+	/* current rate */
+	mt7915_mcu_tx_rate_cal(mphy, ra, &rate, curr);
+	stats->tx_rate = rate;
+
+	/* probing rate */
+	mt7915_mcu_tx_rate_cal(mphy, ra, &prob_rate, probe);
+	stats->prob_rate = prob_rate;
+
+	if (attempts) {
+		u16 success = le16_to_cpu(ra->success);
+
+		stats->per = 1000 * (attempts - success) / attempts;
+	}
+}
+
+static void
+mt7915_mcu_rx_ext_event(struct mt7915_dev *dev, struct sk_buff *skb)
+{
+	struct mt7915_mcu_rxd *rxd = (struct mt7915_mcu_rxd *)skb->data;
+
+	switch (rxd->ext_eid) {
+	case MCU_EXT_EVENT_RDD_REPORT:
+		mt7915_mcu_rx_radar_detected(dev, skb);
+		break;
+	case MCU_EXT_EVENT_CSA_NOTIFY:
+		ieee80211_iterate_active_interfaces_atomic(dev->mt76.hw,
+				IEEE80211_IFACE_ITER_RESUME_ALL,
+				mt7915_mcu_csa_finish, dev);
+		break;
+	case MCU_EXT_EVENT_RATE_REPORT:
+		mt7915_mcu_tx_rate_report(dev, skb);
+		break;
+	default:
+		break;
+	}
+}
+
+static void
+mt7915_mcu_rx_unsolicited_event(struct mt7915_dev *dev, struct sk_buff *skb)
+{
+	struct mt7915_mcu_rxd *rxd = (struct mt7915_mcu_rxd *)skb->data;
+
+	switch (rxd->eid) {
+	case MCU_EVENT_EXT:
+		mt7915_mcu_rx_ext_event(dev, skb);
+		break;
+	default:
+		break;
+	}
+	dev_kfree_skb(skb);
+}
+
+void mt7915_mcu_rx_event(struct mt7915_dev *dev, struct sk_buff *skb)
+{
+	struct mt7915_mcu_rxd *rxd = (struct mt7915_mcu_rxd *)skb->data;
+
+	if (rxd->ext_eid == MCU_EXT_EVENT_THERMAL_PROTECT ||
+	    rxd->ext_eid == MCU_EXT_EVENT_ASSERT_DUMP ||
+	    rxd->ext_eid == MCU_EXT_EVENT_PS_SYNC ||
+	    rxd->ext_eid == MCU_EXT_EVENT_RATE_REPORT ||
+	    !rxd->seq)
+		mt7915_mcu_rx_unsolicited_event(dev, skb);
+	else
+		mt76_mcu_rx_event(&dev->mt76, skb);
+}
+
+static struct sk_buff *
+mt7915_mcu_alloc_sta_req(struct mt7915_dev *dev, struct mt7915_vif *mvif,
+			 struct mt7915_sta *msta, int len)
+{
+	struct sta_req_hdr hdr = {
+		.bss_idx = mvif->idx,
+		.wlan_idx_lo = msta ? to_wcid_lo(msta->wcid.idx) : 0,
+		.wlan_idx_hi = msta ? to_wcid_hi(msta->wcid.idx) : 0,
+		.muar_idx = msta ? mvif->omac_idx : 0,
+		.is_tlv_append = 1,
+	};
+	struct sk_buff *skb;
+
+	skb = mt76_mcu_msg_alloc(&dev->mt76, NULL, len);
+	if (!skb)
+		return ERR_PTR(-ENOMEM);
+
+	skb_put_data(skb, &hdr, sizeof(hdr));
+
+	return skb;
+}
+
+static struct wtbl_req_hdr *
+mt7915_mcu_alloc_wtbl_req(struct mt7915_dev *dev, struct mt7915_sta *msta,
+			  int cmd, void *sta_wtbl, struct sk_buff **skb)
+{
+	struct tlv *sta_hdr = sta_wtbl;
+	struct wtbl_req_hdr hdr = {
+		.wlan_idx_lo = to_wcid_lo(msta->wcid.idx),
+		.wlan_idx_hi = to_wcid_hi(msta->wcid.idx),
+		.operation = cmd,
+	};
+	struct sk_buff *nskb = *skb;
+
+	if (!nskb) {
+		nskb = mt76_mcu_msg_alloc(&dev->mt76, NULL,
+					  MT7915_WTBL_UPDATE_BA_SIZE);
+		if (!nskb)
+			return ERR_PTR(-ENOMEM);
+
+		*skb = nskb;
+	}
+
+	if (sta_hdr)
+		sta_hdr->len = cpu_to_le16(sizeof(hdr));
+
+	return skb_put_data(nskb, &hdr, sizeof(hdr));
+}
+
+static struct tlv *
+mt7915_mcu_add_nested_tlv(struct sk_buff *skb, int tag, int len,
+			  void *sta_ntlv, void *sta_wtbl)
+{
+	struct sta_ntlv_hdr *ntlv_hdr = sta_ntlv;
+	struct tlv *sta_hdr = sta_wtbl;
+	struct tlv *ptlv, tlv = {
+		.tag = cpu_to_le16(tag),
+		.len = cpu_to_le16(len),
+	};
+	u16 ntlv;
+
+	ptlv = skb_put(skb, len);
+	memcpy(ptlv, &tlv, sizeof(tlv));
+
+	ntlv = le16_to_cpu(ntlv_hdr->tlv_num);
+	ntlv_hdr->tlv_num = cpu_to_le16(ntlv + 1);
+
+	if (sta_hdr) {
+		u16 size = le16_to_cpu(sta_hdr->len);
+
+		sta_hdr->len = cpu_to_le16(size + len);
+	}
+
+	return ptlv;
+}
+
+static struct tlv *
+mt7915_mcu_add_tlv(struct sk_buff *skb, int tag, int len)
+{
+	return mt7915_mcu_add_nested_tlv(skb, tag, len, skb->data, NULL);
+}
+
+static struct tlv *
+mt7915_mcu_add_nested_subtlv(struct sk_buff *skb, int sub_tag, int sub_len,
+			     __le16 *sub_ntlv, __le16 *len)
+{
+	struct tlv *ptlv, tlv = {
+		.tag = cpu_to_le16(sub_tag),
+		.len = cpu_to_le16(sub_len),
+	};
+
+	ptlv = skb_put(skb, sub_len);
+	memcpy(ptlv, &tlv, sizeof(tlv));
+
+	*sub_ntlv = cpu_to_le16(le16_to_cpu(*sub_ntlv) + 1);
+	*len = cpu_to_le16(le16_to_cpu(*len) + sub_len);
+
+	return ptlv;
+}
+
+/** bss info **/
+static int
+mt7915_mcu_bss_basic_tlv(struct sk_buff *skb, struct ieee80211_vif *vif,
+			 struct mt7915_phy *phy, bool enable)
+{
+	struct mt7915_vif *mvif = (struct mt7915_vif *)vif->drv_priv;
+	struct cfg80211_chan_def *chandef = &phy->mt76->chandef;
+	enum nl80211_band band = chandef->chan->band;
+	struct bss_info_basic *bss;
+	u16 wlan_idx = mvif->sta.wcid.idx;
+	u32 type = NETWORK_INFRA;
+	struct tlv *tlv;
+
+	tlv = mt7915_mcu_add_tlv(skb, BSS_INFO_BASIC, sizeof(*bss));
+
+	switch (vif->type) {
+	case NL80211_IFTYPE_MESH_POINT:
+	case NL80211_IFTYPE_AP:
+		break;
+	case NL80211_IFTYPE_STATION:
+		/* TODO: enable BSS_INFO_UAPSD & BSS_INFO_PM */
+		if (enable) {
+			struct ieee80211_sta *sta;
+			struct mt7915_sta *msta;
+
+			rcu_read_lock();
+			sta = ieee80211_find_sta(vif, vif->bss_conf.bssid);
+			if (!sta) {
+				rcu_read_unlock();
+				return -EINVAL;
+			}
+
+			msta = (struct mt7915_sta *)sta->drv_priv;
+			wlan_idx = msta->wcid.idx;
+			rcu_read_unlock();
+		}
+		break;
+	case NL80211_IFTYPE_ADHOC:
+		type = NETWORK_IBSS;
+		break;
+	default:
+		WARN_ON(1);
+		break;
+	}
+
+	bss = (struct bss_info_basic *)tlv;
+	memcpy(bss->bssid, vif->bss_conf.bssid, ETH_ALEN);
+	bss->bcn_interval = cpu_to_le16(vif->bss_conf.beacon_int);
+	bss->network_type = cpu_to_le32(type);
+	bss->dtim_period = vif->bss_conf.dtim_period;
+	bss->bmc_wcid_lo = to_wcid_lo(wlan_idx);
+	bss->bmc_wcid_hi = to_wcid_hi(wlan_idx);
+	bss->phy_mode = mt7915_get_phy_mode(phy->dev, vif, band, NULL);
+	bss->wmm_idx = mvif->wmm_idx;
+	bss->active = enable;
+
+	return 0;
+}
+
+static void
+mt7915_mcu_bss_omac_tlv(struct sk_buff *skb, struct ieee80211_vif *vif)
+{
+	struct mt7915_vif *mvif = (struct mt7915_vif *)vif->drv_priv;
+	struct bss_info_omac *omac;
+	struct tlv *tlv;
+	u32 type = 0;
+	u8 idx;
+
+	tlv = mt7915_mcu_add_tlv(skb, BSS_INFO_OMAC, sizeof(*omac));
+
+	switch (vif->type) {
+	case NL80211_IFTYPE_MESH_POINT:
+	case NL80211_IFTYPE_AP:
+		type = CONNECTION_INFRA_AP;
+		break;
+	case NL80211_IFTYPE_STATION:
+		type = CONNECTION_INFRA_STA;
+		break;
+	case NL80211_IFTYPE_ADHOC:
+		type = CONNECTION_IBSS_ADHOC;
+		break;
+	default:
+		WARN_ON(1);
+		break;
+	}
+
+	omac = (struct bss_info_omac *)tlv;
+	idx = mvif->omac_idx > EXT_BSSID_START ? HW_BSSID_0 : mvif->omac_idx;
+	omac->conn_type = cpu_to_le32(type);
+	omac->omac_idx = mvif->omac_idx;
+	omac->band_idx = mvif->band_idx;
+	omac->hw_bss_idx = idx;
+}
+
+static void
+mt7915_mcu_bss_rfch_tlv(struct sk_buff *skb, struct ieee80211_vif *vif,
+			struct mt7915_phy *phy)
+{
+	struct cfg80211_chan_def *chandef = &phy->mt76->chandef;
+	struct bss_info_rf_ch *ch;
+	struct tlv *tlv;
+	int freq1 = chandef->center_freq1;
+
+	tlv = mt7915_mcu_add_tlv(skb, BSS_INFO_RF_CH, sizeof(*ch));
+
+	ch = (struct bss_info_rf_ch *)tlv;
+	ch->pri_ch = chandef->chan->hw_value;
+	ch->center_ch0 = ieee80211_frequency_to_channel(freq1);
+	ch->bw = mt7915_mcu_chan_bw(chandef);
+
+	if (chandef->width == NL80211_CHAN_WIDTH_80P80) {
+		int freq2 = chandef->center_freq2;
+
+		ch->center_ch1 = ieee80211_frequency_to_channel(freq2);
+	}
+
+	ch->he_all_disable = true;
+}
+
+static void
+mt7915_mcu_bss_ra_tlv(struct sk_buff *skb, struct ieee80211_vif *vif,
+		      struct mt7915_phy *phy)
+{
+	struct bss_info_ra *ra;
+	struct tlv *tlv;
+	int max_nss = hweight8(phy->chainmask);
+
+	tlv = mt7915_mcu_add_tlv(skb, BSS_INFO_RA, sizeof(*ra));
+
+	ra = (struct bss_info_ra *)tlv;
+	ra->op_mode = vif->type == NL80211_IFTYPE_AP;
+	ra->adhoc_en = vif->type == NL80211_IFTYPE_ADHOC;
+	ra->short_preamble = true;
+	ra->tx_streams = max_nss;
+	ra->rx_streams = max_nss;
+	ra->algo = 4;
+	ra->train_up_rule = 2;
+	ra->train_up_high_thres = 110;
+	ra->train_up_rule_rssi = -70;
+	ra->low_traffic_thres = 2;
+	ra->phy_cap = cpu_to_le32(0xfdf);
+	ra->interval = cpu_to_le32(500);
+	ra->fast_interval = cpu_to_le32(100);
+}
+
+static void
+mt7915_mcu_bss_ext_tlv(struct sk_buff *skb, struct mt7915_vif *mvif)
+{
+/* SIFS 20us + 512 byte beacon tranmitted by 1Mbps (3906us) */
+#define BCN_TX_ESTIMATE_TIME	(4096 + 20)
+	struct bss_info_ext_bss *ext;
+	int ext_bss_idx, tsf_offset;
+	struct tlv *tlv;
+
+	ext_bss_idx = mvif->omac_idx - EXT_BSSID_START;
+	if (ext_bss_idx < 0)
+		return;
+
+	tlv = mt7915_mcu_add_tlv(skb, BSS_INFO_EXT_BSS, sizeof(*ext));
+
+	ext = (struct bss_info_ext_bss *)tlv;
+	tsf_offset = ext_bss_idx * BCN_TX_ESTIMATE_TIME;
+	ext->mbss_tsf_offset = cpu_to_le32(tsf_offset);
+}
+
+static void
+mt7915_mcu_bss_bmc_tlv(struct sk_buff *skb, struct mt7915_phy *phy)
+{
+	struct bss_info_bmc_rate *bmc;
+	struct cfg80211_chan_def *chandef = &phy->mt76->chandef;
+	enum nl80211_band band = chandef->chan->band;
+	struct tlv *tlv;
+
+	tlv = mt7915_mcu_add_tlv(skb, BSS_INFO_BMC_RATE, sizeof(*bmc));
+
+	bmc = (struct bss_info_bmc_rate *)tlv;
+	if (band == NL80211_BAND_2GHZ) {
+		bmc->short_preamble = true;
+	} else {
+		bmc->bc_trans = cpu_to_le16(0x2000);
+		bmc->mc_trans = cpu_to_le16(0x2080);
+	}
+}
+
+static void
+mt7915_mcu_bss_sync_tlv(struct sk_buff *skb, struct ieee80211_vif *vif)
+{
+	struct bss_info_sync_mode *sync;
+	struct tlv *tlv;
+
+	tlv = mt7915_mcu_add_tlv(skb, BSS_INFO_SYNC_MODE, sizeof(*sync));
+
+	sync = (struct bss_info_sync_mode *)tlv;
+	sync->bcn_interval = cpu_to_le16(vif->bss_conf.beacon_int);
+	sync->dtim_period = vif->bss_conf.dtim_period;
+	sync->enable = true;
+}
+
+int mt7915_mcu_add_bss_info(struct mt7915_phy *phy,
+			    struct ieee80211_vif *vif, int enable)
+{
+	struct mt7915_vif *mvif = (struct mt7915_vif *)vif->drv_priv;
+	struct sk_buff *skb;
+
+	skb = mt7915_mcu_alloc_sta_req(phy->dev, mvif, NULL,
+				       MT7915_STA_UPDATE_MAX_SIZE);
+	if (IS_ERR(skb))
+		return PTR_ERR(skb);
+
+	/* bss_omac must be first */
+	if (enable)
+		mt7915_mcu_bss_omac_tlv(skb, vif);
+
+	mt7915_mcu_bss_basic_tlv(skb, vif, phy, enable);
+
+	if (enable) {
+		mt7915_mcu_bss_rfch_tlv(skb, vif, phy);
+		mt7915_mcu_bss_bmc_tlv(skb, phy);
+		mt7915_mcu_bss_ra_tlv(skb, vif, phy);
+
+		if (mvif->omac_idx > HW_BSSID_MAX)
+			mt7915_mcu_bss_ext_tlv(skb, mvif);
+		else
+			mt7915_mcu_bss_sync_tlv(skb, vif);
+	}
+
+	return __mt76_mcu_skb_send_msg(&phy->dev->mt76, skb,
+				       MCU_EXT_CMD_BSS_INFO_UPDATE, true);
+}
+
+/** starec & wtbl **/
+static int
+mt7915_mcu_sta_key_tlv(struct sk_buff *skb, struct ieee80211_key_conf *key,
+		       enum set_key_cmd cmd)
+{
+	struct sta_rec_sec *sec;
+	struct tlv *tlv;
+	u32 len = sizeof(*sec);
+
+	tlv = mt7915_mcu_add_tlv(skb, STA_REC_KEY_V2, sizeof(*sec));
+
+	sec = (struct sta_rec_sec *)tlv;
+	sec->add = cmd;
+
+	if (cmd == SET_KEY) {
+		struct sec_key *sec_key;
+		u8 cipher;
+
+		cipher = mt7915_mcu_get_cipher(key->cipher);
+		if (cipher == MT_CIPHER_NONE)
+			return -EOPNOTSUPP;
+
+		sec_key = &sec->key[0];
+		sec_key->cipher_len = sizeof(*sec_key);
+		sec_key->key_id = key->keyidx;
+
+		if (cipher == MT_CIPHER_BIP_CMAC_128) {
+			sec_key->cipher_id = MT_CIPHER_AES_CCMP;
+			sec_key->key_len = 16;
+			memcpy(sec_key->key, key->key, 16);
+
+			sec_key = &sec->key[1];
+			sec_key->cipher_id = MT_CIPHER_BIP_CMAC_128;
+			sec_key->cipher_len = sizeof(*sec_key);
+			sec_key->key_len = 16;
+			memcpy(sec_key->key, key->key + 16, 16);
+
+			sec->n_cipher = 2;
+		} else {
+			sec_key->cipher_id = cipher;
+			sec_key->key_len = key->keylen;
+			memcpy(sec_key->key, key->key, key->keylen);
+
+			if (cipher == MT_CIPHER_TKIP) {
+				/* Rx/Tx MIC keys are swapped */
+				memcpy(sec_key->key + 16, key->key + 24, 8);
+				memcpy(sec_key->key + 24, key->key + 16, 8);
+			}
+
+			len -= sizeof(*sec_key);
+			sec->n_cipher = 1;
+		}
+	} else {
+		len -= sizeof(sec->key);
+		sec->n_cipher = 0;
+	}
+	sec->len = cpu_to_le16(len);
+
+	return 0;
+}
+
+int mt7915_mcu_add_key(struct mt7915_dev *dev, struct ieee80211_vif *vif,
+		       struct mt7915_sta *msta, struct ieee80211_key_conf *key,
+		       enum set_key_cmd cmd)
+{
+	struct mt7915_vif *mvif = (struct mt7915_vif *)vif->drv_priv;
+	struct sk_buff *skb;
+	int ret;
+
+	skb = mt7915_mcu_alloc_sta_req(dev, mvif, msta,
+				       MT7915_STA_UPDATE_MAX_SIZE);
+	if (IS_ERR(skb))
+		return PTR_ERR(skb);
+
+	ret = mt7915_mcu_sta_key_tlv(skb, key, cmd);
+	if (ret)
+		return ret;
+
+	return __mt76_mcu_skb_send_msg(&dev->mt76, skb,
+				       MCU_EXT_CMD_STA_REC_UPDATE, true);
+}
+
+static void
+mt7915_mcu_sta_ba_tlv(struct sk_buff *skb,
+		      struct ieee80211_ampdu_params *params,
+		      bool enable, bool tx)
+{
+	struct sta_rec_ba *ba;
+	struct tlv *tlv;
+
+	tlv = mt7915_mcu_add_tlv(skb, STA_REC_BA, sizeof(*ba));
+
+	ba = (struct sta_rec_ba *)tlv;
+	ba->ba_type = tx ? MT_BA_TYPE_ORIGINATOR : MT_BA_TYPE_RECIPIENT,
+	ba->winsize = cpu_to_le16(params->buf_size);
+	ba->ssn = cpu_to_le16(params->ssn);
+	ba->ba_en = enable << params->tid;
+	ba->amsdu = params->amsdu;
+	ba->tid = params->tid;
+}
+
+static void
+mt7915_mcu_wtbl_ba_tlv(struct sk_buff *skb,
+		       struct ieee80211_ampdu_params *params,
+		       bool enable, bool tx, void *sta_wtbl,
+		       void *wtbl_tlv)
+{
+	struct wtbl_ba *ba;
+	struct tlv *tlv;
+
+	tlv = mt7915_mcu_add_nested_tlv(skb, WTBL_BA, sizeof(*ba),
+					wtbl_tlv, sta_wtbl);
+
+	ba = (struct wtbl_ba *)tlv;
+	ba->tid = params->tid;
+
+	if (tx) {
+		ba->ba_type = MT_BA_TYPE_ORIGINATOR;
+		ba->sn = enable ? cpu_to_le16(params->ssn) : 0;
+		ba->ba_en = enable;
+	} else {
+		memcpy(ba->peer_addr, params->sta->addr, ETH_ALEN);
+		ba->ba_type = MT_BA_TYPE_RECIPIENT;
+		ba->rst_ba_tid = params->tid;
+		ba->rst_ba_sel = RST_BA_MAC_TID_MATCH;
+		ba->rst_ba_sb = 1;
+	}
+
+	if (enable && tx)
+		ba->ba_winsize = cpu_to_le16(params->buf_size);
+}
+
+static int
+mt7915_mcu_sta_ba(struct mt7915_dev *dev,
+		  struct ieee80211_ampdu_params *params,
+		  bool enable, bool tx)
+{
+	struct mt7915_sta *msta = (struct mt7915_sta *)params->sta->drv_priv;
+	struct mt7915_vif *mvif = msta->vif;
+	struct wtbl_req_hdr *wtbl_hdr;
+	struct tlv *sta_wtbl;
+	struct sk_buff *skb;
+
+	skb = mt7915_mcu_alloc_sta_req(dev, mvif, msta,
+				       MT7915_STA_UPDATE_MAX_SIZE);
+	if (IS_ERR(skb))
+		return PTR_ERR(skb);
+
+	mt7915_mcu_sta_ba_tlv(skb, params, enable, tx);
+	sta_wtbl = mt7915_mcu_add_tlv(skb, STA_REC_WTBL, sizeof(struct tlv));
+
+	wtbl_hdr = mt7915_mcu_alloc_wtbl_req(dev, msta, WTBL_SET, sta_wtbl,
+					     &skb);
+	mt7915_mcu_wtbl_ba_tlv(skb, params, enable, tx, sta_wtbl, wtbl_hdr);
+
+	return __mt76_mcu_skb_send_msg(&dev->mt76, skb,
+				       MCU_EXT_CMD_STA_REC_UPDATE, true);
+}
+
+int mt7915_mcu_add_tx_ba(struct mt7915_dev *dev,
+			 struct ieee80211_ampdu_params *params,
+			 bool enable)
+{
+	return mt7915_mcu_sta_ba(dev, params, enable, true);
+}
+
+int mt7915_mcu_add_rx_ba(struct mt7915_dev *dev,
+			 struct ieee80211_ampdu_params *params,
+			 bool enable)
+{
+	return mt7915_mcu_sta_ba(dev, params, enable, false);
+}
+
+static void
+mt7915_mcu_wtbl_generic_tlv(struct sk_buff *skb, struct ieee80211_vif *vif,
+			    struct ieee80211_sta *sta, void *sta_wtbl,
+			    void *wtbl_tlv)
+{
+	struct mt7915_vif *mvif = (struct mt7915_vif *)vif->drv_priv;
+	struct wtbl_generic *generic;
+	struct wtbl_rx *rx;
+	struct tlv *tlv;
+
+	tlv = mt7915_mcu_add_nested_tlv(skb, WTBL_GENERIC, sizeof(*generic),
+					wtbl_tlv, sta_wtbl);
+
+	generic = (struct wtbl_generic *)tlv;
+
+	if (sta) {
+		memcpy(generic->peer_addr, sta->addr, ETH_ALEN);
+		generic->partial_aid = cpu_to_le16(sta->aid);
+		generic->muar_idx = mvif->omac_idx;
+		generic->qos = sta->wme;
+	} else {
+		/* use BSSID in station mode */
+		if (vif->type == NL80211_IFTYPE_STATION)
+			memcpy(generic->peer_addr, vif->bss_conf.bssid,
+			       ETH_ALEN);
+		else
+			eth_broadcast_addr(generic->peer_addr);
+
+		generic->muar_idx = 0xe;
+	}
+
+	tlv = mt7915_mcu_add_nested_tlv(skb, WTBL_RX, sizeof(*rx),
+					wtbl_tlv, sta_wtbl);
+
+	rx = (struct wtbl_rx *)tlv;
+	rx->rca1 = sta ? vif->type != NL80211_IFTYPE_AP : 1;
+	rx->rca2 = 1;
+	rx->rv = 1;
+}
+
+static void
+mt7915_mcu_sta_basic_tlv(struct sk_buff *skb, struct ieee80211_vif *vif,
+			 struct ieee80211_sta *sta, bool enable)
+{
+#define EXTRA_INFO_VER          BIT(0)
+#define EXTRA_INFO_NEW          BIT(1)
+	struct sta_rec_basic *basic;
+	struct tlv *tlv;
+
+	tlv = mt7915_mcu_add_tlv(skb, STA_REC_BASIC, sizeof(*basic));
+
+	basic = (struct sta_rec_basic *)tlv;
+	basic->extra_info = cpu_to_le16(EXTRA_INFO_VER);
+
+	if (enable) {
+		basic->extra_info |= cpu_to_le16(EXTRA_INFO_NEW);
+		basic->conn_state = CONN_STATE_PORT_SECURE;
+	} else {
+		basic->conn_state = CONN_STATE_DISCONNECT;
+	}
+
+	if (!sta) {
+		basic->conn_type = cpu_to_le32(CONNECTION_INFRA_BC);
+		eth_broadcast_addr(basic->peer_addr);
+		return;
+	}
+
+	switch (vif->type) {
+	case NL80211_IFTYPE_MESH_POINT:
+	case NL80211_IFTYPE_AP:
+		basic->conn_type = cpu_to_le32(CONNECTION_INFRA_STA);
+		break;
+	case NL80211_IFTYPE_STATION:
+		basic->conn_type = cpu_to_le32(CONNECTION_INFRA_AP);
+		break;
+	case NL80211_IFTYPE_ADHOC:
+		basic->conn_type = cpu_to_le32(CONNECTION_IBSS_ADHOC);
+		break;
+	default:
+		WARN_ON(1);
+		break;
+	}
+
+	memcpy(basic->peer_addr, sta->addr, ETH_ALEN);
+	basic->aid = cpu_to_le16(sta->aid);
+	basic->qos = sta->wme;
+}
+
+static void
+mt7915_mcu_sta_tlv(struct mt7915_dev *dev, struct sk_buff *skb,
+		   struct ieee80211_sta *sta)
+{
+	struct tlv *tlv;
+
+	if (sta->ht_cap.ht_supported) {
+		struct sta_rec_ht *ht;
+
+		/* starec ht */
+		tlv = mt7915_mcu_add_tlv(skb, STA_REC_HT, sizeof(*ht));
+		ht = (struct sta_rec_ht *)tlv;
+		ht->ht_cap = cpu_to_le16(sta->ht_cap.cap);
+	}
+
+	/* starec vht */
+	if (sta->vht_cap.vht_supported) {
+		struct sta_rec_vht *vht;
+
+		tlv = mt7915_mcu_add_tlv(skb, STA_REC_VHT, sizeof(*vht));
+		vht = (struct sta_rec_vht *)tlv;
+		vht->vht_cap = cpu_to_le32(sta->vht_cap.cap);
+		vht->vht_rx_mcs_map = sta->vht_cap.vht_mcs.rx_mcs_map;
+		vht->vht_tx_mcs_map = sta->vht_cap.vht_mcs.tx_mcs_map;
+	}
+}
+
+static void
+mt7915_mcu_wtbl_smps_tlv(struct sk_buff *skb, struct ieee80211_sta *sta,
+			 void *sta_wtbl, void *wtbl_tlv)
+{
+	struct wtbl_smps *smps;
+	struct tlv *tlv;
+
+	tlv = mt7915_mcu_add_nested_tlv(skb, WTBL_SMPS, sizeof(*smps),
+					wtbl_tlv, sta_wtbl);
+	smps = (struct wtbl_smps *)tlv;
+
+	if (sta->smps_mode == IEEE80211_SMPS_DYNAMIC)
+		smps->smps = true;
+}
+
+static void
+mt7915_mcu_wtbl_ht_tlv(struct sk_buff *skb, struct ieee80211_sta *sta,
+		       void *sta_wtbl, void *wtbl_tlv)
+{
+	struct wtbl_ht *ht = NULL;
+	struct tlv *tlv;
+
+	/* wtbl ht */
+	if (sta->ht_cap.ht_supported) {
+		tlv = mt7915_mcu_add_nested_tlv(skb, WTBL_HT, sizeof(*ht),
+						wtbl_tlv, sta_wtbl);
+		ht = (struct wtbl_ht *)tlv;
+		ht->ldpc = sta->ht_cap.cap & IEEE80211_HT_CAP_LDPC_CODING;
+		ht->af = sta->ht_cap.ampdu_factor;
+		ht->mm = sta->ht_cap.ampdu_density;
+		ht->ht = true;
+	}
+
+	/* wtbl vht */
+	if (sta->vht_cap.vht_supported) {
+		struct wtbl_vht *vht;
+		u8 af;
+
+		tlv = mt7915_mcu_add_nested_tlv(skb, WTBL_VHT, sizeof(*vht),
+						wtbl_tlv, sta_wtbl);
+		vht = (struct wtbl_vht *)tlv;
+		vht->ldpc = sta->vht_cap.cap & IEEE80211_VHT_CAP_RXLDPC,
+		vht->vht = true;
+
+		af = FIELD_GET(IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK,
+			       sta->vht_cap.cap);
+		if (ht)
+			ht->af = max_t(u8, ht->af, af);
+	}
+
+	mt7915_mcu_wtbl_smps_tlv(skb, sta, sta_wtbl, wtbl_tlv);
+}
+
+int mt7915_mcu_add_smps(struct mt7915_dev *dev, struct ieee80211_vif *vif,
+			struct ieee80211_sta *sta)
+{
+	struct mt7915_vif *mvif = (struct mt7915_vif *)vif->drv_priv;
+	struct mt7915_sta *msta = (struct mt7915_sta *)sta->drv_priv;
+	struct wtbl_req_hdr *wtbl_hdr;
+	struct tlv *sta_wtbl;
+	struct sk_buff *skb;
+
+	skb = mt7915_mcu_alloc_sta_req(dev, mvif, msta,
+				       MT7915_STA_UPDATE_MAX_SIZE);
+	if (IS_ERR(skb))
+		return PTR_ERR(skb);
+
+	sta_wtbl = mt7915_mcu_add_tlv(skb, STA_REC_WTBL, sizeof(struct tlv));
+
+	wtbl_hdr = mt7915_mcu_alloc_wtbl_req(dev, msta, WTBL_SET, sta_wtbl,
+					     &skb);
+	mt7915_mcu_wtbl_smps_tlv(skb, sta, sta_wtbl, wtbl_hdr);
+
+	return __mt76_mcu_skb_send_msg(&dev->mt76, skb,
+				       MCU_EXT_CMD_STA_REC_UPDATE, true);
+}
+
+static void
+mt7915_mcu_sta_rate_ctrl_tlv(struct sk_buff *skb, struct mt7915_dev *dev,
+			     struct ieee80211_vif *vif,
+			     struct ieee80211_sta *sta)
+{
+	struct cfg80211_chan_def *chandef = &dev->mphy.chandef;
+	struct sta_rec_ra *ra;
+	struct tlv *tlv;
+	enum nl80211_band band = chandef->chan->band;
+	u32 supp_rate = sta->supp_rates[band];
+	int n_rates = hweight32(supp_rate);
+	u32 cap = sta->wme ? STA_CAP_WMM : 0;
+	u8 i, nss = sta->rx_nss, mcs = 0;
+
+	tlv = mt7915_mcu_add_tlv(skb, STA_REC_RA, sizeof(*ra));
+
+	ra = (struct sta_rec_ra *)tlv;
+	ra->valid = true;
+	ra->auto_rate = true;
+	ra->phy_mode = mt7915_get_phy_mode(dev, vif, band, sta);
+	ra->channel = chandef->chan->hw_value;
+	ra->bw = sta->bandwidth;
+	ra->rate_len = n_rates;
+	ra->phy.bw = sta->bandwidth;
+
+	if (n_rates) {
+		if (band == NL80211_BAND_2GHZ) {
+			ra->supp_mode = MODE_CCK;
+			ra->supp_cck_rate = supp_rate & GENMASK(3, 0);
+			ra->phy.type = MT_PHY_TYPE_CCK;
+
+			if (n_rates > 4) {
+				ra->supp_mode |= MODE_OFDM;
+				ra->supp_ofdm_rate = supp_rate >> 4;
+				ra->phy.type = MT_PHY_TYPE_OFDM;
+			}
+		} else {
+			ra->supp_mode = MODE_OFDM;
+			ra->supp_ofdm_rate = supp_rate;
+			ra->phy.type = MT_PHY_TYPE_OFDM;
+		}
+	}
+
+	if (sta->ht_cap.ht_supported) {
+		for (i = 0; i < nss; i++)
+			ra->ht_mcs[i] = sta->ht_cap.mcs.rx_mask[i];
+
+		ra->supp_ht_mcs = *(__le32 *)ra->ht_mcs;
+		ra->supp_mode |= MODE_HT;
+		mcs = hweight32(ra->supp_ht_mcs) - 1;
+		ra->af = sta->ht_cap.ampdu_factor;
+		ra->ht_gf = !!(sta->ht_cap.cap & IEEE80211_HT_CAP_GRN_FLD);
+
+		cap |= STA_CAP_HT;
+		if (sta->ht_cap.cap & IEEE80211_HT_CAP_SGI_20)
+			cap |= STA_CAP_SGI_20;
+		if (sta->ht_cap.cap & IEEE80211_HT_CAP_SGI_40)
+			cap |= STA_CAP_SGI_40;
+		if (sta->ht_cap.cap & IEEE80211_HT_CAP_TX_STBC)
+			cap |= STA_CAP_TX_STBC;
+		if (sta->ht_cap.cap & IEEE80211_HT_CAP_RX_STBC)
+			cap |= STA_CAP_RX_STBC;
+		if (sta->ht_cap.cap & IEEE80211_HT_CAP_LDPC_CODING)
+			cap |= STA_CAP_LDPC;
+	}
+
+	if (sta->vht_cap.vht_supported) {
+		__le16 mcs_map = sta->vht_cap.vht_mcs.rx_mcs_map;
+		u16 vht_mcs;
+		u8 af, mcs_prev;
+
+		af = FIELD_GET(IEEE80211_VHT_CAP_MAX_A_MPDU_LENGTH_EXPONENT_MASK,
+			       sta->vht_cap.cap);
+		ra->af = max_t(u8, ra->af, af);
+
+		cap |= STA_CAP_VHT;
+		if (sta->vht_cap.cap & IEEE80211_VHT_CAP_SHORT_GI_80)
+			cap |= STA_CAP_VHT_SGI_80;
+		if (sta->vht_cap.cap & IEEE80211_VHT_CAP_SHORT_GI_160)
+			cap |= STA_CAP_VHT_SGI_160;
+		if (sta->vht_cap.cap & IEEE80211_VHT_CAP_TXSTBC)
+			cap |= STA_CAP_VHT_TX_STBC;
+		if (sta->vht_cap.cap & IEEE80211_VHT_CAP_RXSTBC_1)
+			cap |= STA_CAP_VHT_RX_STBC;
+		if (sta->vht_cap.cap & IEEE80211_VHT_CAP_RXLDPC)
+			cap |= STA_CAP_VHT_LDPC;
+
+		ra->supp_mode |= MODE_VHT;
+		for (mcs = 0, i = 0; i < nss; i++, mcs_map >>= 2) {
+			switch (mcs_map & 0x3) {
+			case IEEE80211_VHT_MCS_SUPPORT_0_9:
+				vht_mcs = GENMASK(9, 0);
+				break;
+			case IEEE80211_VHT_MCS_SUPPORT_0_8:
+				vht_mcs = GENMASK(8, 0);
+				break;
+			case IEEE80211_VHT_MCS_SUPPORT_0_7:
+				vht_mcs = GENMASK(7, 0);
+				break;
+			default:
+				vht_mcs = 0;
+			}
+
+			ra->supp_vht_mcs[i] = cpu_to_le16(vht_mcs);
+
+			mcs_prev = hweight16(vht_mcs) - 1;
+			if (mcs_prev > mcs)
+				mcs = mcs_prev;
+
+			/* only support 2ss on 160MHz */
+			if (i > 1 && (ra->bw == CMD_CBW_160MHZ ||
+				      ra->bw == CMD_CBW_8080MHZ))
+				break;
+		}
+	}
+
+	ra->sta_status = cpu_to_le32(cap);
+
+	switch (BIT(fls(ra->supp_mode) - 1)) {
+	case MODE_VHT:
+		ra->phy.type = MT_PHY_TYPE_VHT;
+		ra->phy.mcs = mcs;
+		ra->phy.nss = nss;
+		ra->phy.stbc = !!(sta->vht_cap.cap & IEEE80211_VHT_CAP_TXSTBC);
+		ra->phy.ldpc = !!(sta->vht_cap.cap & IEEE80211_VHT_CAP_RXLDPC);
+		ra->phy.sgi =
+			!!(sta->vht_cap.cap & IEEE80211_VHT_CAP_SHORT_GI_80);
+		break;
+	case MODE_HT:
+		ra->phy.type = MT_PHY_TYPE_HT;
+		ra->phy.mcs = mcs;
+		ra->phy.ldpc = sta->ht_cap.cap & IEEE80211_HT_CAP_LDPC_CODING;
+		ra->phy.stbc = !!(sta->ht_cap.cap & IEEE80211_HT_CAP_TX_STBC);
+		ra->phy.sgi = !!(sta->ht_cap.cap & IEEE80211_HT_CAP_SGI_20);
+		break;
+	default:
+		break;
+	}
+}
+
+int mt7915_mcu_add_rate_ctrl(struct mt7915_dev *dev, struct ieee80211_vif *vif,
+			     struct ieee80211_sta *sta)
+{
+	struct mt7915_vif *mvif = (struct mt7915_vif *)vif->drv_priv;
+	struct mt7915_sta *msta = (struct mt7915_sta *)sta->drv_priv;
+	struct sk_buff *skb;
+
+	skb = mt7915_mcu_alloc_sta_req(dev, mvif, msta,
+				       MT7915_STA_UPDATE_MAX_SIZE);
+	if (IS_ERR(skb))
+		return PTR_ERR(skb);
+
+	mt7915_mcu_sta_rate_ctrl_tlv(skb, dev, vif, sta);
+
+	return __mt76_mcu_skb_send_msg(&dev->mt76, skb,
+				       MCU_EXT_CMD_STA_REC_UPDATE, true);
+}
+
+int mt7915_mcu_add_sta(struct mt7915_dev *dev, struct ieee80211_vif *vif,
+		       struct ieee80211_sta *sta, bool enable)
+{
+	struct mt7915_vif *mvif = (struct mt7915_vif *)vif->drv_priv;
+	struct wtbl_req_hdr *wtbl_hdr;
+	struct mt7915_sta *msta;
+	struct tlv *sta_wtbl;
+	struct sk_buff *skb;
+	int ret;
+
+	msta = sta ? (struct mt7915_sta *)sta->drv_priv : &mvif->sta;
+
+	skb = mt7915_mcu_alloc_sta_req(dev, mvif, msta,
+				       MT7915_STA_UPDATE_MAX_SIZE);
+	if (IS_ERR(skb))
+		return PTR_ERR(skb);
+
+	mt7915_mcu_sta_basic_tlv(skb, vif, sta, enable);
+	if (enable && sta)
+		mt7915_mcu_sta_tlv(dev, skb, sta);
+
+	sta_wtbl = mt7915_mcu_add_tlv(skb, STA_REC_WTBL, sizeof(struct tlv));
+
+	wtbl_hdr = mt7915_mcu_alloc_wtbl_req(dev, msta, WTBL_RESET_AND_SET,
+					     sta_wtbl, &skb);
+	if (enable) {
+		mt7915_mcu_wtbl_generic_tlv(skb, vif, sta, sta_wtbl, wtbl_hdr);
+		if (sta)
+			mt7915_mcu_wtbl_ht_tlv(skb, sta, sta_wtbl, wtbl_hdr);
+	}
+
+	ret = __mt76_mcu_skb_send_msg(&dev->mt76, skb,
+				      MCU_EXT_CMD_STA_REC_UPDATE, true);
+	if (ret)
+		return ret;
+
+	if (enable && sta)
+		return mt7915_mcu_add_rate_ctrl(dev, vif, sta);
+
+	return 0;
+}
+
+int mt7915_mcu_add_dev_info(struct mt7915_dev *dev,
+			    struct ieee80211_vif *vif, bool enable)
+{
+	struct mt7915_vif *mvif = (struct mt7915_vif *)vif->drv_priv;
+	struct {
+		struct req_hdr {
+			u8 omac_idx;
+			u8 dbdc_idx;
+			__le16 tlv_num;
+			u8 is_tlv_append;
+			u8 rsv[3];
+		} __packed hdr;
+		struct req_tlv {
+			__le16 tag;
+			__le16 len;
+			u8 active;
+			u8 dbdc_idx;
+			u8 omac_addr[ETH_ALEN];
+		} __packed tlv;
+	} data = {
+		.hdr = {
+			.omac_idx = mvif->omac_idx,
+			.dbdc_idx = mvif->band_idx,
+			.tlv_num = cpu_to_le16(1),
+			.is_tlv_append = 1,
+		},
+		.tlv = {
+			.tag = cpu_to_le16(DEV_INFO_ACTIVE),
+			.len = cpu_to_le16(sizeof(struct req_tlv)),
+			.active = enable,
+			.dbdc_idx = mvif->band_idx,
+		},
+	};
+
+	memcpy(data.tlv.omac_addr, vif->addr, ETH_ALEN);
+	return __mt76_mcu_send_msg(&dev->mt76, MCU_EXT_CMD_DEV_INFO_UPDATE,
+				   &data, sizeof(data), true);
+}
+
+static void
+mt7915_mcu_beacon_csa(struct sk_buff *rskb, struct sk_buff *skb,
+		      struct bss_info_bcn *bcn,
+		      struct ieee80211_mutable_offsets *offs)
+{
+	if (offs->csa_counter_offs[0]) {
+		struct tlv *tlv;
+		struct bss_info_bcn_csa *csa;
+
+		tlv = mt7915_mcu_add_nested_subtlv(rskb, BSS_INFO_BCN_CSA,
+						   sizeof(*csa), &bcn->sub_ntlv,
+						   &bcn->len);
+		csa = (struct bss_info_bcn_csa *)tlv;
+		csa->cnt = skb->data[offs->csa_counter_offs[0]];
+	}
+}
+
+static void
+mt7915_mcu_beacon_cont(struct mt7915_dev *dev, struct sk_buff *rskb,
+		       struct sk_buff *skb, struct bss_info_bcn *bcn,
+		       struct ieee80211_mutable_offsets *offs)
+{
+	struct mt76_wcid *wcid = &dev->mt76.global_wcid;
+	struct bss_info_bcn_cont *cont;
+	struct tlv *tlv;
+	u8 *buf;
+	int len = sizeof(*cont) + MT_TXD_SIZE + skb->len;
+
+	tlv = mt7915_mcu_add_nested_subtlv(rskb, BSS_INFO_BCN_CONTENT,
+					   len, &bcn->sub_ntlv, &bcn->len);
+
+	cont = (struct bss_info_bcn_cont *)tlv;
+	cont->pkt_len = cpu_to_le16(MT_TXD_SIZE + skb->len);
+	cont->tim_ofs = cpu_to_le16(offs->tim_offset);
+
+	if (offs->csa_counter_offs[0])
+		cont->csa_ofs = cpu_to_le16(offs->csa_counter_offs[0] - 4);
+
+	buf = (u8 *)tlv + sizeof(*cont);
+	mt7915_mac_write_txwi(dev, (__le32 *)buf, skb, wcid, NULL,
+			      true);
+	memcpy(buf + MT_TXD_SIZE, skb->data, skb->len);
+}
+
+int mt7915_mcu_add_beacon(struct ieee80211_hw *hw,
+			  struct ieee80211_vif *vif, int en)
+{
+#define MAX_BEACON_SIZE 512
+	struct mt7915_dev *dev = mt7915_hw_dev(hw);
+	struct mt7915_phy *phy = mt7915_hw_phy(hw);
+	struct mt7915_vif *mvif = (struct mt7915_vif *)vif->drv_priv;
+	struct ieee80211_mutable_offsets offs;
+	struct ieee80211_tx_info *info;
+	struct sk_buff *skb, *rskb;
+	struct tlv *tlv;
+	struct bss_info_bcn *bcn;
+	int len = MT7915_BEACON_UPDATE_SIZE + MAX_BEACON_SIZE;
+
+	rskb = mt7915_mcu_alloc_sta_req(dev, mvif, NULL, len);
+	if (IS_ERR(rskb))
+		return PTR_ERR(rskb);
+
+	tlv = mt7915_mcu_add_tlv(rskb, BSS_INFO_OFFLOAD, sizeof(*bcn));
+	bcn = (struct bss_info_bcn *)tlv;
+	bcn->enable = en;
+
+	skb = ieee80211_beacon_get_template(hw, vif, &offs);
+	if (!skb)
+		return -EINVAL;
+
+	if (skb->len > MAX_BEACON_SIZE - MT_TXD_SIZE) {
+		dev_err(dev->mt76.dev, "Bcn size limit exceed\n");
+		dev_kfree_skb(skb);
+		return -EINVAL;
+	}
+
+	if (mvif->band_idx) {
+		info = IEEE80211_SKB_CB(skb);
+		info->hw_queue |= MT_TX_HW_QUEUE_EXT_PHY;
+	}
+
+	/* TODO: subtag - bss color count & 11v MBSSID */
+	mt7915_mcu_beacon_csa(rskb, skb, bcn, &offs);
+	mt7915_mcu_beacon_cont(dev, rskb, skb, bcn, &offs);
+	dev_kfree_skb(skb);
+
+	return __mt76_mcu_skb_send_msg(&phy->dev->mt76, rskb,
+				       MCU_EXT_CMD_BSS_INFO_UPDATE, true);
+}
+
+static int mt7915_mcu_send_firmware(struct mt7915_dev *dev, const void *data,
+				    int len)
+{
+	int ret = 0, cur_len;
+
+	while (len > 0) {
+		cur_len = min_t(int, 4096 - sizeof(struct mt7915_mcu_txd),
+				len);
+
+		ret = __mt76_mcu_send_msg(&dev->mt76, -MCU_CMD_FW_SCATTER,
+					  data, cur_len, false);
+		if (ret)
+			break;
+
+		data += cur_len;
+		len -= cur_len;
+		mt76_queue_tx_cleanup(dev, MT_TXQ_FWDL, false);
+	}
+
+	return ret;
+}
+
+static int mt7915_mcu_start_firmware(struct mt7915_dev *dev, u32 addr,
+				     u32 option)
+{
+	struct {
+		__le32 option;
+		__le32 addr;
+	} req = {
+		.option = cpu_to_le32(option),
+		.addr = cpu_to_le32(addr),
+	};
+
+	return __mt76_mcu_send_msg(&dev->mt76, -MCU_CMD_FW_START_REQ,
+				   &req, sizeof(req), true);
+}
+
+static int mt7915_mcu_restart(struct mt76_dev *dev)
+{
+	struct {
+		u8 power_mode;
+		u8 rsv[3];
+	} req = {
+		.power_mode = 1,
+	};
+
+	return __mt76_mcu_send_msg(dev, -MCU_CMD_NIC_POWER_CTRL, &req,
+				   sizeof(req), false);
+}
+
+static int mt7915_mcu_patch_sem_ctrl(struct mt7915_dev *dev, bool get)
+{
+	struct {
+		__le32 op;
+	} req = {
+		.op = cpu_to_le32(get ? PATCH_SEM_GET : PATCH_SEM_RELEASE),
+	};
+
+	return __mt76_mcu_send_msg(&dev->mt76, -MCU_CMD_PATCH_SEM_CONTROL,
+				   &req, sizeof(req), true);
+}
+
+static int mt7915_mcu_start_patch(struct mt7915_dev *dev)
+{
+	struct {
+		u8 check_crc;
+		u8 reserved[3];
+	} req = {
+		.check_crc = 0,
+	};
+
+	return __mt76_mcu_send_msg(&dev->mt76, -MCU_CMD_PATCH_FINISH_REQ,
+				   &req, sizeof(req), true);
+}
+
+static int mt7915_driver_own(struct mt7915_dev *dev)
+{
+	u32 reg = mt7915_reg_map_l1(dev, MT_TOP_LPCR_HOST_BAND0);
+
+	mt76_wr(dev, reg, MT_TOP_LPCR_HOST_DRV_OWN);
+	if (!mt76_poll_msec(dev, reg, MT_TOP_LPCR_HOST_FW_OWN,
+			    0, 500)) {
+		dev_err(dev->mt76.dev, "Timeout for driver own\n");
+		return -EIO;
+	}
+
+	return 0;
+}
+
+static int mt7915_mcu_init_download(struct mt7915_dev *dev, u32 addr,
+				    u32 len, u32 mode)
+{
+	struct {
+		__le32 addr;
+		__le32 len;
+		__le32 mode;
+	} req = {
+		.addr = cpu_to_le32(addr),
+		.len = cpu_to_le32(len),
+		.mode = cpu_to_le32(mode),
+	};
+	int attr;
+
+	if (req.addr == MCU_PATCH_ADDRESS)
+		attr = -MCU_CMD_PATCH_START_REQ;
+	else
+		attr = -MCU_CMD_TARGET_ADDRESS_LEN_REQ;
+
+	return __mt76_mcu_send_msg(&dev->mt76, attr, &req, sizeof(req), true);
+}
+
+static int mt7915_load_patch(struct mt7915_dev *dev)
+{
+	const struct mt7915_patch_hdr *hdr;
+	const struct firmware *fw = NULL;
+	int i, ret, sem;
+
+	sem = mt7915_mcu_patch_sem_ctrl(dev, 1);
+	switch (sem) {
+	case PATCH_IS_DL:
+		return 0;
+	case PATCH_NOT_DL_SEM_SUCCESS:
+		break;
+	default:
+		dev_err(dev->mt76.dev, "Failed to get patch semaphore\n");
+		return -EAGAIN;
+	}
+
+	ret = request_firmware(&fw, MT7915_ROM_PATCH, dev->mt76.dev);
+	if (ret)
+		goto out;
+
+	if (!fw || !fw->data || fw->size < sizeof(*hdr)) {
+		dev_err(dev->mt76.dev, "Invalid firmware\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
+	hdr = (const struct mt7915_patch_hdr *)(fw->data);
+
+	dev_info(dev->mt76.dev, "HW/SW Version: 0x%x, Build Time: %.16s\n",
+		 be32_to_cpu(hdr->hw_sw_ver), hdr->build_date);
+
+	for (i = 0; i < be32_to_cpu(hdr->desc.n_region); i++) {
+		struct mt7915_patch_sec *sec;
+		const u8 *dl;
+		u32 len, addr;
+
+		sec = (struct mt7915_patch_sec *)(fw->data + sizeof(*hdr) +
+						  i * sizeof(*sec));
+		if ((be32_to_cpu(sec->type) & PATCH_SEC_TYPE_MASK) !=
+		    PATCH_SEC_TYPE_INFO) {
+			ret = -EINVAL;
+			goto out;
+		}
+
+		addr = be32_to_cpu(sec->info.addr);
+		len = be32_to_cpu(sec->info.len);
+		dl = fw->data + be32_to_cpu(sec->offs);
+
+		ret = mt7915_mcu_init_download(dev, addr, len,
+					       DL_MODE_NEED_RSP);
+		if (ret) {
+			dev_err(dev->mt76.dev, "Download request failed\n");
+			goto out;
+		}
+
+		ret = mt7915_mcu_send_firmware(dev, dl, len);
+		if (ret) {
+			dev_err(dev->mt76.dev, "Failed to send patch\n");
+			goto out;
+		}
+	}
+
+	ret = mt7915_mcu_start_patch(dev);
+	if (ret)
+		dev_err(dev->mt76.dev, "Failed to start patch\n");
+
+out:
+	sem = mt7915_mcu_patch_sem_ctrl(dev, 0);
+	switch (sem) {
+	case PATCH_REL_SEM_SUCCESS:
+		break;
+	default:
+		ret = -EAGAIN;
+		dev_err(dev->mt76.dev, "Failed to release patch semaphore\n");
+		goto out;
+	}
+	release_firmware(fw);
+
+	return ret;
+}
+
+static u32 mt7915_mcu_gen_dl_mode(u8 feature_set, bool is_wa)
+{
+	u32 ret = 0;
+
+	ret |= (feature_set & FW_FEATURE_SET_ENCRYPT) ?
+	       (DL_MODE_ENCRYPT | DL_MODE_RESET_SEC_IV) : 0;
+	ret |= FIELD_PREP(DL_MODE_KEY_IDX,
+			  FIELD_GET(FW_FEATURE_SET_KEY_IDX, feature_set));
+	ret |= DL_MODE_NEED_RSP;
+	ret |= is_wa ? DL_MODE_WORKING_PDA_CR4 : 0;
+
+	return ret;
+}
+
+static int
+mt7915_mcu_send_ram_firmware(struct mt7915_dev *dev,
+			     const struct mt7915_fw_trailer *hdr,
+			     const u8 *data, bool is_wa)
+{
+	int i, offset = 0;
+	u32 override = 0, option = 0;
+
+	for (i = 0; i < hdr->n_region; i++) {
+		const struct mt7915_fw_region *region;
+		int err;
+		u32 len, addr, mode;
+
+		region = (const struct mt7915_fw_region *)((const u8 *)hdr -
+			 (hdr->n_region - i) * sizeof(*region));
+		mode = mt7915_mcu_gen_dl_mode(region->feature_set, is_wa);
+		len = le32_to_cpu(region->len);
+		addr = le32_to_cpu(region->addr);
+
+		if (region->feature_set & FW_FEATURE_OVERRIDE_ADDR)
+			override = addr;
+
+		err = mt7915_mcu_init_download(dev, addr, len, mode);
+		if (err) {
+			dev_err(dev->mt76.dev, "Download request failed\n");
+			return err;
+		}
+
+		err = mt7915_mcu_send_firmware(dev, data + offset, len);
+		if (err) {
+			dev_err(dev->mt76.dev, "Failed to send firmware.\n");
+			return err;
+		}
+
+		offset += len;
+	}
+
+	if (override)
+		option |= FW_START_OVERRIDE;
+
+	if (is_wa)
+		option |= FW_START_WORKING_PDA_CR4;
+
+	return mt7915_mcu_start_firmware(dev, override, option);
+}
+
+static int mt7915_load_ram(struct mt7915_dev *dev)
+{
+	const struct mt7915_fw_trailer *hdr;
+	const struct firmware *fw;
+	int ret;
+
+	ret = request_firmware(&fw, MT7915_FIRMWARE_WM, dev->mt76.dev);
+	if (ret)
+		return ret;
+
+	if (!fw || !fw->data || fw->size < sizeof(*hdr)) {
+		dev_err(dev->mt76.dev, "Invalid firmware\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
+	hdr = (const struct mt7915_fw_trailer *)(fw->data + fw->size -
+					sizeof(*hdr));
+
+	dev_info(dev->mt76.dev, "WM Firmware Version: %.10s, Build Time: %.15s\n",
+		 hdr->fw_ver, hdr->build_date);
+
+	ret = mt7915_mcu_send_ram_firmware(dev, hdr, fw->data, false);
+	if (ret) {
+		dev_err(dev->mt76.dev, "Failed to start WM firmware\n");
+		goto out;
+	}
+
+	release_firmware(fw);
+
+	ret = request_firmware(&fw, MT7915_FIRMWARE_WA, dev->mt76.dev);
+	if (ret)
+		return ret;
+
+	if (!fw || !fw->data || fw->size < sizeof(*hdr)) {
+		dev_err(dev->mt76.dev, "Invalid firmware\n");
+		ret = -EINVAL;
+		goto out;
+	}
+
+	hdr = (const struct mt7915_fw_trailer *)(fw->data + fw->size -
+					sizeof(*hdr));
+
+	dev_info(dev->mt76.dev, "WA Firmware Version: %.10s, Build Time: %.15s\n",
+		 hdr->fw_ver, hdr->build_date);
+
+	ret = mt7915_mcu_send_ram_firmware(dev, hdr, fw->data, true);
+	if (ret) {
+		dev_err(dev->mt76.dev, "Failed to start WA firmware\n");
+		goto out;
+	}
+
+	snprintf(dev->mt76.hw->wiphy->fw_version,
+		 sizeof(dev->mt76.hw->wiphy->fw_version),
+		 "%.10s-%.15s", hdr->fw_ver, hdr->build_date);
+
+out:
+	release_firmware(fw);
+
+	return ret;
+}
+
+static int mt7915_load_firmware(struct mt7915_dev *dev)
+{
+	int ret;
+	u32 val, reg = mt7915_reg_map_l1(dev, MT_TOP_MISC);
+
+	val = FIELD_PREP(MT_TOP_MISC_FW_STATE, FW_STATE_FW_DOWNLOAD);
+
+	if (!mt76_poll_msec(dev, reg, MT_TOP_MISC_FW_STATE, val, 1000)) {
+		/* restart firmware once */
+		__mt76_mcu_restart(&dev->mt76);
+		if (!mt76_poll_msec(dev, reg, MT_TOP_MISC_FW_STATE,
+				    val, 1000)) {
+			dev_err(dev->mt76.dev,
+				"Firmware is not ready for download\n");
+			return -EIO;
+		}
+	}
+
+	ret = mt7915_load_patch(dev);
+	if (ret)
+		return ret;
+
+	ret = mt7915_load_ram(dev);
+	if (ret)
+		return ret;
+
+	if (!mt76_poll_msec(dev, reg, MT_TOP_MISC_FW_STATE,
+			    FIELD_PREP(MT_TOP_MISC_FW_STATE,
+				       FW_STATE_WACPU_RDY), 1000)) {
+		dev_err(dev->mt76.dev, "Timeout for initializing firmware\n");
+		return -EIO;
+	}
+
+	mt76_queue_tx_cleanup(dev, MT_TXQ_FWDL, false);
+
+	dev_dbg(dev->mt76.dev, "Firmware init done\n");
+
+	return 0;
+}
+
+int mt7915_mcu_init(struct mt7915_dev *dev)
+{
+	static const struct mt76_mcu_ops mt7915_mcu_ops = {
+		.headroom = sizeof(struct mt7915_mcu_txd),
+		.mcu_skb_send_msg = mt7915_mcu_send_message,
+		.mcu_send_msg = mt7915_mcu_msg_send,
+		.mcu_restart = mt7915_mcu_restart,
+	};
+	int ret;
+
+	dev->mt76.mcu_ops = &mt7915_mcu_ops,
+
+	ret = mt7915_driver_own(dev);
+	if (ret)
+		return ret;
+
+	ret = mt7915_load_firmware(dev);
+	if (ret)
+		return ret;
+
+	set_bit(MT76_STATE_MCU_RUNNING, &dev->mphy.state);
+
+	return 0;
+}
+
+void mt7915_mcu_exit(struct mt7915_dev *dev)
+{
+	u32 reg = mt7915_reg_map_l1(dev, MT_TOP_MISC);
+
+	__mt76_mcu_restart(&dev->mt76);
+	if (!mt76_poll_msec(dev, reg, MT_TOP_MISC_FW_STATE,
+			    FIELD_PREP(MT_TOP_MISC_FW_STATE,
+				       FW_STATE_FW_DOWNLOAD), 1000)) {
+		dev_err(dev->mt76.dev, "Failed to exit mcu\n");
+		return;
+	}
+
+	reg = mt7915_reg_map_l1(dev, MT_TOP_LPCR_HOST_BAND0);
+	mt76_wr(dev, reg, MT_TOP_LPCR_HOST_FW_OWN);
+	skb_queue_purge(&dev->mt76.mcu.res_q);
+}
+
+int mt7915_mcu_set_mac(struct mt7915_dev *dev, int band,
+		       bool enable, bool hdr_trans)
+{
+	struct {
+		u8 operation;
+		u8 enable;
+		u8 check_bssid;
+		u8 insert_vlan;
+		u8 remove_vlan;
+		u8 tid;
+		u8 mode;
+		u8 rsv;
+	} __packed req_trans = {
+		.enable = hdr_trans,
+	};
+	struct {
+		u8 enable;
+		u8 band;
+		u8 rsv[2];
+	} __packed req_mac = {
+		.enable = enable,
+		.band = band,
+	};
+	int ret;
+
+	ret = __mt76_mcu_send_msg(&dev->mt76, MCU_EXT_CMD_RX_HDR_TRANS,
+				  &req_trans, sizeof(req_trans), false);
+	if (ret)
+		return ret;
+
+	return __mt76_mcu_send_msg(&dev->mt76, MCU_EXT_CMD_MAC_INIT_CTRL,
+				   &req_mac, sizeof(req_mac), true);
+}
+
+int mt7915_mcu_set_scs(struct mt7915_dev *dev, u8 band, bool enable)
+{
+	struct {
+		__le32 cmd;
+		u8 band;
+		u8 enable;
+	} __packed req = {
+		.cmd = cpu_to_le32(SCS_ENABLE),
+		.band = band,
+		.enable = enable + 1,
+	};
+
+	return __mt76_mcu_send_msg(&dev->mt76, MCU_EXT_CMD_SCS_CTRL, &req,
+				   sizeof(req), false);
+}
+
+int mt7915_mcu_set_rts_thresh(struct mt7915_phy *phy, u32 val)
+{
+	struct mt7915_dev *dev = phy->dev;
+	struct {
+		u8 prot_idx;
+		u8 band;
+		u8 rsv[2];
+		__le32 len_thresh;
+		__le32 pkt_thresh;
+	} __packed req = {
+		.prot_idx = 1,
+		.band = phy != &dev->phy,
+		.len_thresh = cpu_to_le32(val),
+		.pkt_thresh = cpu_to_le32(0x2),
+	};
+
+	return __mt76_mcu_send_msg(&dev->mt76, MCU_EXT_CMD_PROTECT_CTRL,
+				   &req, sizeof(req), true);
+}
+
+int mt7915_mcu_set_tx(struct mt7915_dev *dev, struct ieee80211_vif *vif)
+{
+#define WMM_AIFS_SET		BIT(0)
+#define WMM_CW_MIN_SET		BIT(1)
+#define WMM_CW_MAX_SET		BIT(2)
+#define WMM_TXOP_SET		BIT(3)
+#define WMM_PARAM_SET		GENMASK(3, 0)
+#define TX_CMD_MODE		1
+	struct edca {
+		u8 queue;
+		u8 set;
+		u8 aifs;
+		u8 cw_min;
+		__le16 cw_max;
+		__le16 txop;
+	};
+	struct mt7915_mcu_tx {
+		u8 total;
+		u8 action;
+		u8 valid;
+		u8 mode;
+
+		struct edca edca[IEEE80211_NUM_ACS];
+	} __packed req = {
+		.valid = true,
+		.mode = TX_CMD_MODE,
+		.total = IEEE80211_NUM_ACS,
+	};
+	struct mt7915_vif *mvif = (struct mt7915_vif *)vif->drv_priv;
+	int ac;
+
+	for (ac = 0; ac < IEEE80211_NUM_ACS; ac++) {
+		struct edca *e = &req.edca[ac];
+
+		e->queue = ac + mvif->wmm_idx * MT7915_MAX_WMM_SETS;
+		e->aifs = mvif->wmm[ac].aifs;
+		e->txop = cpu_to_le16(mvif->wmm[ac].txop);
+
+		if (mvif->wmm[ac].cw_min)
+			e->cw_min = fls(mvif->wmm[ac].cw_max);
+		else
+			e->cw_min = 5;
+
+		if (mvif->wmm[ac].cw_max)
+			e->cw_max = cpu_to_le16(fls(mvif->wmm[ac].cw_max));
+		else
+			e->cw_max = cpu_to_le16(10);
+	}
+
+	return __mt76_mcu_send_msg(&dev->mt76, MCU_EXT_CMD_EDCA_UPDATE,
+				  &req, sizeof(req), true);
+}
+
+int mt7915_mcu_set_pm(struct mt7915_dev *dev, int band, int enter)
+{
+#define ENTER_PM_STATE		1
+#define EXIT_PM_STATE		2
+	struct {
+		u8 pm_number;
+		u8 pm_state;
+		u8 bssid[ETH_ALEN];
+		u8 dtim_period;
+		u8 wlan_idx_lo;
+		__le16 bcn_interval;
+		__le32 aid;
+		__le32 rx_filter;
+		u8 band_idx;
+		u8 wlan_idx_hi;
+		u8 rsv[2];
+		__le32 feature;
+		u8 omac_idx;
+		u8 wmm_idx;
+		u8 bcn_loss_cnt;
+		u8 bcn_sp_duration;
+	} __packed req = {
+		.pm_number = 5,
+		.pm_state = (enter) ? ENTER_PM_STATE : EXIT_PM_STATE,
+		.band_idx = band,
+	};
+
+	return __mt76_mcu_send_msg(&dev->mt76, MCU_EXT_CMD_PM_STATE_CTRL,
+				   &req, sizeof(req), true);
+}
+
+int mt7915_mcu_rdd_cmd(struct mt7915_dev *dev,
+		       enum mt7915_rdd_cmd cmd, u8 index,
+		       u8 rx_sel, u8 val)
+{
+	struct {
+		u8 ctrl;
+		u8 rdd_idx;
+		u8 rdd_rx_sel;
+		u8 val;
+		u8 rsv[4];
+	} __packed req = {
+		.ctrl = cmd,
+		.rdd_idx = index,
+		.rdd_rx_sel = rx_sel,
+		.val = val,
+	};
+
+	return __mt76_mcu_send_msg(&dev->mt76, MCU_EXT_CMD_SET_RDD_CTRL,
+				   &req, sizeof(req), true);
+}
+
+int mt7915_mcu_set_fcc5_lpn(struct mt7915_dev *dev, int val)
+{
+	struct {
+		u32 tag;
+		u16 min_lpn;
+		u8 rsv[2];
+	} __packed req = {
+		.tag = 0x1,
+		.min_lpn = val,
+	};
+
+	return __mt76_mcu_send_msg(&dev->mt76, MCU_EXT_CMD_SET_RDD_TH,
+				   &req, sizeof(req), true);
+}
+
+int mt7915_mcu_set_pulse_th(struct mt7915_dev *dev,
+			    const struct mt7915_dfs_pulse *pulse)
+{
+	struct {
+		u32 tag;
+		struct mt7915_dfs_pulse pulse;
+	} __packed req = {
+		.tag = 0x3,
+	};
+
+	memcpy(&req.pulse, pulse, sizeof(*pulse));
+
+	return __mt76_mcu_send_msg(&dev->mt76, MCU_EXT_CMD_SET_RDD_TH,
+				   &req, sizeof(req), true);
+}
+
+int mt7915_mcu_set_radar_th(struct mt7915_dev *dev, int index,
+			    const struct mt7915_dfs_pattern *pattern)
+{
+	struct {
+		u32 tag;
+		u16 radar_type;
+		struct mt7915_dfs_pattern pattern;
+	} __packed req = {
+		.tag = 0x2,
+		.radar_type = index,
+	};
+
+	memcpy(&req.pattern, pattern, sizeof(*pattern));
+
+	return __mt76_mcu_send_msg(&dev->mt76, MCU_EXT_CMD_SET_RDD_TH,
+				   &req, sizeof(req), true);
+}
+
+int mt7915_mcu_set_chan_info(struct mt7915_phy *phy, int cmd)
+{
+	struct mt7915_dev *dev = phy->dev;
+	struct cfg80211_chan_def *chandef = &phy->mt76->chandef;
+	int freq1 = chandef->center_freq1;
+	struct {
+		u8 control_ch;
+		u8 center_ch;
+		u8 bw;
+		u8 tx_streams_num;
+		u8 rx_streams;	/* mask or num */
+		u8 switch_reason;
+		u8 band_idx;
+		u8 center_ch2;	/* for 80+80 only */
+		__le16 cac_case;
+		u8 channel_band;
+		u8 rsv0;
+		__le32 outband_freq;
+		u8 txpower_drop;
+		u8 ap_bw;
+		u8 ap_center_ch;
+		u8 rsv1[57];
+	} __packed req = {
+		.control_ch = chandef->chan->hw_value,
+		.center_ch = ieee80211_frequency_to_channel(freq1),
+		.bw = mt7915_mcu_chan_bw(chandef),
+		.tx_streams_num = hweight8(phy->mt76->antenna_mask),
+		.rx_streams = phy->chainmask,
+		.band_idx = phy != &dev->phy,
+		.channel_band = chandef->chan->band,
+	};
+
+	if ((chandef->chan->flags & IEEE80211_CHAN_RADAR) &&
+	    chandef->chan->dfs_state != NL80211_DFS_AVAILABLE)
+		req.switch_reason = CH_SWITCH_DFS;
+	else
+		req.switch_reason = CH_SWITCH_NORMAL;
+
+	if (cmd == MCU_EXT_CMD_CHANNEL_SWITCH)
+		req.rx_streams = hweight8(req.rx_streams);
+
+	if (chandef->width == NL80211_CHAN_WIDTH_80P80) {
+		int freq2 = chandef->center_freq2;
+
+		req.center_ch2 = ieee80211_frequency_to_channel(freq2);
+	}
+
+	return __mt76_mcu_send_msg(&dev->mt76, cmd, &req, sizeof(req), true);
+}
+
+int mt7915_mcu_set_eeprom(struct mt7915_dev *dev)
+{
+	struct req_hdr {
+		u8 buffer_mode;
+		u8 format;
+		__le16 len;
+	} __packed req = {
+		.buffer_mode = EE_MODE_EFUSE,
+		.format = EE_FORMAT_WHOLE,
+	};
+
+	return __mt76_mcu_send_msg(&dev->mt76, MCU_EXT_CMD_EFUSE_BUFFER_MODE,
+				   &req, sizeof(req), true);
+}
+
+int mt7915_mcu_get_eeprom(struct mt7915_dev *dev, u32 offset)
+{
+	struct mt7915_mcu_eeprom_info req = {
+		.addr = cpu_to_le32(round_down(offset, 16)),
+	};
+
+	return __mt76_mcu_send_msg(&dev->mt76, MCU_EXT_CMD_EFUSE_ACCESS, &req,
+				   sizeof(req), true);
+}
+
+int mt7915_mcu_get_temperature(struct mt7915_dev *dev, int index)
+{
+	struct {
+		u8 ctrl_id;
+		u8 action;
+		u8 band;
+		u8 rsv[5];
+	} req = {
+		.ctrl_id = THERMAL_SENSOR_TEMP_QUERY,
+		.action = index,
+	};
+
+	return __mt76_mcu_send_msg(&dev->mt76, MCU_EXT_CMD_THERMAL_CTRL, &req,
+				   sizeof(req), true);
+}
+
+int mt7915_mcu_get_rate_info(struct mt7915_dev *dev, u32 cmd, u16 wlan_idx)
+{
+	struct {
+		__le32 cmd;
+		__le16 wlan_idx;
+		__le16 ru_idx;
+		__le16 direction;
+		__le16 dump_group;
+	} req = {
+		.cmd = cpu_to_le32(cmd),
+		.wlan_idx = cpu_to_le16(wlan_idx),
+		.dump_group = cpu_to_le16(1),
+	};
+
+	return __mt76_mcu_send_msg(&dev->mt76, MCU_EXT_CMD_RATE_CTRL, &req,
+				   sizeof(req), false);
+}
+
+int mt7915_mcu_set_ser(struct mt7915_dev *dev, u8 action, u8 set, u8 band)
+{
+	struct {
+		u8 action;
+		u8 set;
+		u8 band;
+		u8 rsv;
+	} req = {
+		.action = action,
+		.set = set,
+		.band = band,
+	};
+
+	return __mt76_mcu_send_msg(&dev->mt76, MCU_EXT_CMD_SET_SER_TRIGGER,
+				   &req, sizeof(req), false);
+}
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
new file mode 100644
index 000000000000..5e4708861ede
--- /dev/null
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
@@ -0,0 +1,837 @@
+/* SPDX-License-Identifier: ISC */
+/* Copyright (C) 2020 MediaTek Inc. */
+
+#ifndef __MT7915_MCU_H
+#define __MT7915_MCU_H
+
+struct mt7915_mcu_txd {
+	__le32 txd[8];
+
+	__le16 len;
+	__le16 pq_id;
+
+	u8 cid;
+	u8 pkt_type;
+	u8 set_query; /* FW don't care */
+	u8 seq;
+
+	u8 uc_d2b0_rev;
+	u8 ext_cid;
+	u8 s2d_index;
+	u8 ext_cid_ack;
+
+	u32 reserved[5];
+} __packed __aligned(4);
+
+/* event table */
+enum {
+	MCU_EVENT_TARGET_ADDRESS_LEN = 0x01,
+	MCU_EVENT_FW_START = 0x01,
+	MCU_EVENT_GENERIC = 0x01,
+	MCU_EVENT_ACCESS_REG = 0x02,
+	MCU_EVENT_MT_PATCH_SEM = 0x04,
+	MCU_EVENT_CH_PRIVILEGE = 0x18,
+	MCU_EVENT_EXT = 0xed,
+	MCU_EVENT_RESTART_DL = 0xef,
+};
+
+/* ext event table */
+enum {
+	MCU_EXT_EVENT_PS_SYNC = 0x5,
+	MCU_EXT_EVENT_THERMAL_PROTECT = 0x22,
+	MCU_EXT_EVENT_ASSERT_DUMP = 0x23,
+	MCU_EXT_EVENT_RDD_REPORT = 0x3a,
+	MCU_EXT_EVENT_CSA_NOTIFY = 0x4f,
+	MCU_EXT_EVENT_RATE_REPORT = 0x87,
+};
+
+struct mt7915_mcu_rxd {
+	__le32 rxd[6];
+
+	__le16 len;
+	__le16 pkt_type_id;
+
+	u8 eid;
+	u8 seq;
+	__le16 __rsv;
+
+	u8 ext_eid;
+	u8 __rsv1[2];
+	u8 s2d_index;
+};
+
+struct mt7915_mcu_rdd_report {
+	struct mt7915_mcu_rxd rxd;
+
+	u8 idx;
+	u8 long_detected;
+	u8 constant_prf_detected;
+	u8 staggered_prf_detected;
+	u8 radar_type_idx;
+	u8 periodic_pulse_num;
+	u8 long_pulse_num;
+	u8 hw_pulse_num;
+
+	u8 out_lpn;
+	u8 out_spn;
+	u8 out_crpn;
+	u8 out_crpw;
+	u8 out_crbn;
+	u8 out_stgpn;
+	u8 out_stgpw;
+
+	u8 rsv;
+
+	__le32 out_pri_const;
+	__le32 out_pri_stg[3];
+
+	struct {
+		__le32 start;
+		__le16 pulse_width;
+		__le16 pulse_power;
+		u8 mdrdy_flag;
+		u8 rsv[3];
+	} long_pulse[32];
+
+	struct {
+		__le32 start;
+		__le16 pulse_width;
+		__le16 pulse_power;
+		u8 mdrdy_flag;
+		u8 rsv[3];
+	} periodic_pulse[32];
+
+	struct {
+		__le32 start;
+		__le16 pulse_width;
+		__le16 pulse_power;
+		u8 sc_pass;
+		u8 sw_reset;
+		u8 mdrdy_flag;
+		u8 tx_active;
+	} hw_pulse[32];
+} __packed;
+
+struct mt7915_mcu_eeprom_info {
+	__le32 addr;
+	__le32 valid;
+	u8 data[16];
+} __packed;
+
+struct mt7915_mcu_ra_info {
+	struct mt7915_mcu_rxd rxd;
+
+	__le32 event_id;
+	__le16 wlan_idx;
+	__le16 ru_idx;
+	__le16 direction;
+	__le16 dump_group;
+
+	__le32 suggest_rate;
+	__le32 min_rate;	/* for dynamic sounding */
+	__le32 max_rate;	/* for dynamic sounding */
+	__le32 init_rate_down_rate;
+
+	__le16 curr_rate;
+	__le16 init_rate_down_total;
+	__le16 init_rate_down_succ;
+	__le16 success;
+	__le16 attempts;
+
+	__le16 prev_rate;
+	__le16 prob_up_rate;
+	u8 no_rate_up_cnt;
+	u8 ppdu_cnt;
+	u8 gi;
+
+	u8 try_up_fail;
+	u8 try_up_total;
+	u8 suggest_wf;
+	u8 try_up_check;
+	u8 prob_up_period;
+	u8 prob_down_pending;
+} __packed;
+
+#define MT_RA_RATE_NSS			GENMASK(8, 6)
+#define MT_RA_RATE_MCS			GENMASK(3, 0)
+#define MT_RA_RATE_TX_MODE		GENMASK(12, 9)
+#define MT_RA_RATE_DCM_EN		BIT(4)
+#define MT_RA_RATE_BW			GENMASK(14, 13)
+
+#define MCU_PQ_ID(p, q)			(((p) << 15) | ((q) << 10))
+#define MCU_PKT_ID			0xa0
+
+enum {
+	MCU_Q_QUERY,
+	MCU_Q_SET,
+	MCU_Q_RESERVED,
+	MCU_Q_NA
+};
+
+enum {
+	MCU_S2D_H2N,
+	MCU_S2D_C2N,
+	MCU_S2D_H2C,
+	MCU_S2D_H2CN
+};
+
+enum {
+	MCU_CMD_TARGET_ADDRESS_LEN_REQ = 0x01,
+	MCU_CMD_FW_START_REQ = 0x02,
+	MCU_CMD_INIT_ACCESS_REG = 0x3,
+	MCU_CMD_NIC_POWER_CTRL = 0x4,
+	MCU_CMD_PATCH_START_REQ = 0x05,
+	MCU_CMD_PATCH_FINISH_REQ = 0x07,
+	MCU_CMD_PATCH_SEM_CONTROL = 0x10,
+	MCU_CMD_EXT_CID = 0xED,
+	MCU_CMD_FW_SCATTER = 0xEE,
+	MCU_CMD_RESTART_DL_REQ = 0xEF,
+};
+
+enum {
+	MCU_EXT_CMD_EFUSE_ACCESS = 0x01,
+	MCU_EXT_CMD_PM_STATE_CTRL = 0x07,
+	MCU_EXT_CMD_CHANNEL_SWITCH = 0x08,
+	MCU_EXT_CMD_EFUSE_BUFFER_MODE = 0x21,
+	MCU_EXT_CMD_STA_REC_UPDATE = 0x25,
+	MCU_EXT_CMD_BSS_INFO_UPDATE = 0x26,
+	MCU_EXT_CMD_EDCA_UPDATE = 0x27,
+	MCU_EXT_CMD_DEV_INFO_UPDATE = 0x2A,
+	MCU_EXT_CMD_THERMAL_CTRL = 0x2c,
+	MCU_EXT_CMD_SET_RDD_CTRL = 0x3a,
+	MCU_EXT_CMD_PROTECT_CTRL = 0x3e,
+	MCU_EXT_CMD_MAC_INIT_CTRL = 0x46,
+	MCU_EXT_CMD_RX_HDR_TRANS = 0x47,
+	MCU_EXT_CMD_SET_RX_PATH = 0x4e,
+	MCU_EXT_CMD_SET_SER_TRIGGER = 0x81,
+	MCU_EXT_CMD_SCS_CTRL = 0x82,
+	MCU_EXT_CMD_RATE_CTRL = 0x87,
+	MCU_EXT_CMD_SET_RDD_TH = 0x9d,
+};
+
+enum {
+	PATCH_SEM_RELEASE,
+	PATCH_SEM_GET
+};
+
+enum {
+	PATCH_NOT_DL_SEM_FAIL,
+	PATCH_IS_DL,
+	PATCH_NOT_DL_SEM_SUCCESS,
+	PATCH_REL_SEM_SUCCESS
+};
+
+enum {
+	FW_STATE_INITIAL,
+	FW_STATE_FW_DOWNLOAD,
+	FW_STATE_NORMAL_OPERATION,
+	FW_STATE_NORMAL_TRX,
+	FW_STATE_WACPU_RDY        = 7
+};
+
+enum {
+	EE_MODE_EFUSE,
+	EE_MODE_BUFFER,
+};
+
+enum {
+	EE_FORMAT_BIN,
+	EE_FORMAT_WHOLE,
+	EE_FORMAT_MULTIPLE,
+};
+
+#define STA_TYPE_STA			BIT(0)
+#define STA_TYPE_AP			BIT(1)
+#define STA_TYPE_ADHOC			BIT(2)
+#define STA_TYPE_WDS			BIT(4)
+#define STA_TYPE_BC			BIT(5)
+
+#define NETWORK_INFRA			BIT(16)
+#define NETWORK_P2P			BIT(17)
+#define NETWORK_IBSS			BIT(18)
+#define NETWORK_WDS			BIT(21)
+
+#define CONNECTION_INFRA_STA		(STA_TYPE_STA | NETWORK_INFRA)
+#define CONNECTION_INFRA_AP		(STA_TYPE_AP | NETWORK_INFRA)
+#define CONNECTION_P2P_GC		(STA_TYPE_STA | NETWORK_P2P)
+#define CONNECTION_P2P_GO		(STA_TYPE_AP | NETWORK_P2P)
+#define CONNECTION_IBSS_ADHOC		(STA_TYPE_ADHOC | NETWORK_IBSS)
+#define CONNECTION_WDS			(STA_TYPE_WDS | NETWORK_WDS)
+#define CONNECTION_INFRA_BC		(STA_TYPE_BC | NETWORK_INFRA)
+
+#define CONN_STATE_DISCONNECT		0
+#define CONN_STATE_CONNECT		1
+#define CONN_STATE_PORT_SECURE		2
+
+enum {
+	DEV_INFO_ACTIVE,
+	DEV_INFO_MAX_NUM
+};
+
+enum {
+	SCS_SEND_DATA,
+	SCS_SET_MANUAL_PD_TH,
+	SCS_CONFIG,
+	SCS_ENABLE,
+	SCS_SHOW_INFO,
+	SCS_GET_GLO_ADDR,
+	SCS_GET_GLO_ADDR_EVENT,
+};
+
+enum {
+	CMD_CBW_20MHZ = IEEE80211_STA_RX_BW_20,
+	CMD_CBW_40MHZ = IEEE80211_STA_RX_BW_40,
+	CMD_CBW_80MHZ = IEEE80211_STA_RX_BW_80,
+	CMD_CBW_160MHZ = IEEE80211_STA_RX_BW_160,
+	CMD_CBW_10MHZ,
+	CMD_CBW_5MHZ,
+	CMD_CBW_8080MHZ,
+
+	CMD_HE_MCS_BW80 = 0,
+	CMD_HE_MCS_BW160,
+	CMD_HE_MCS_BW8080,
+	CMD_HE_MCS_BW_NUM
+};
+
+struct tlv {
+	__le16 tag;
+	__le16 len;
+} __packed;
+
+struct bss_info_omac {
+	__le16 tag;
+	__le16 len;
+	u8 hw_bss_idx;
+	u8 omac_idx;
+	u8 band_idx;
+	u8 rsv0;
+	__le32 conn_type;
+	u32 rsv1;
+} __packed;
+
+struct bss_info_basic {
+	__le16 tag;
+	__le16 len;
+	__le32 network_type;
+	u8 active;
+	u8 rsv0;
+	__le16 bcn_interval;
+	u8 bssid[ETH_ALEN];
+	u8 wmm_idx;
+	u8 dtim_period;
+	u8 bmc_wcid_lo;
+	u8 cipher;
+	u8 phy_mode;
+	u8 max_bssid;	/* max BSSID. range: 1 ~ 8, 0: MBSSID disabled */
+	u8 non_tx_bssid;/* non-transmitted BSSID, 0: transmitted BSSID */
+	u8 bmc_wcid_hi;	/* high Byte and version */
+	u8 rsv[2];
+} __packed;
+
+struct bss_info_rf_ch {
+	__le16 tag;
+	__le16 len;
+	u8 pri_ch;
+	u8 center_ch0;
+	u8 center_ch1;
+	u8 bw;
+	u8 he_ru26_block;	/* 1: don't send HETB in RU26, 0: allow */
+	u8 he_all_disable;	/* 1: disallow all HETB, 0: allow */
+	u8 rsv[2];
+} __packed;
+
+struct bss_info_ext_bss {
+	__le16 tag;
+	__le16 len;
+	__le32 mbss_tsf_offset; /* in unit of us */
+	u8 rsv[8];
+} __packed;
+
+struct bss_info_sync_mode {
+	__le16 tag;
+	__le16 len;
+	__le16 bcn_interval;
+	u8 enable;
+	u8 dtim_period;
+	u8 rsv[8];
+} __packed;
+
+struct bss_info_bmc_rate {
+	__le16 tag;
+	__le16 len;
+	__le16 bc_trans;
+	__le16 mc_trans;
+	u8 short_preamble;
+	u8 rsv[7];
+} __packed;
+
+struct bss_info_ra {
+	__le16 tag;
+	__le16 len;
+	u8 op_mode;
+	u8 adhoc_en;
+	u8 short_preamble;
+	u8 tx_streams;
+	u8 rx_streams;
+	u8 algo;
+	u8 force_sgi;
+	u8 force_gf;
+	u8 ht_mode;
+	u8 has_20_sta;		/* Check if any sta support GF. */
+	u8 bss_width_trigger_events;
+	u8 vht_nss_cap;
+	u8 vht_bw_signal;	/* not use */
+	u8 vht_force_sgi;	/* not use */
+	u8 se_off;
+	u8 antenna_idx;
+	u8 train_up_rule;
+	u8 rsv[3];
+	unsigned short train_up_high_thres;
+	short train_up_rule_rssi;
+	unsigned short low_traffic_thres;
+	__le16 max_phyrate;
+	__le32 phy_cap;
+	__le32 interval;
+	__le32 fast_interval;
+} __packed;
+
+struct bss_info_bcn {
+	__le16 tag;
+	__le16 len;
+	u8 ver;
+	u8 enable;
+	__le16 sub_ntlv;
+} __packed __aligned(4);
+
+struct bss_info_bcn_csa {
+	__le16 tag;
+	__le16 len;
+	u8 cnt;
+	u8 rsv[3];
+} __packed __aligned(4);
+
+struct bss_info_bcn_bcc {
+	__le16 tag;
+	__le16 len;
+	u8 cnt;
+	u8 rsv[3];
+} __packed __aligned(4);
+
+struct bss_info_bcn_mbss {
+#define MAX_BEACON_NUM	32
+	__le16 tag;
+	__le16 len;
+	__le32 bitmap;
+	__le16 offset[MAX_BEACON_NUM];
+	u8 rsv[8];
+} __packed __aligned(4);
+
+struct bss_info_bcn_cont {
+	__le16 tag;
+	__le16 len;
+	__le16 tim_ofs;
+	__le16 csa_ofs;
+	__le16 bcc_ofs;
+	__le16 pkt_len;
+} __packed __aligned(4);
+
+enum {
+	BSS_INFO_BCN_CSA,
+	BSS_INFO_BCN_BCC,
+	BSS_INFO_BCN_MBSSID,
+	BSS_INFO_BCN_CONTENT,
+	BSS_INFO_BCN_MAX
+};
+
+enum {
+	BSS_INFO_OMAC,
+	BSS_INFO_BASIC,
+	BSS_INFO_RF_CH,		/* optional, for BT/LTE coex */
+	BSS_INFO_PM,		/* sta only */
+	BSS_INFO_UAPSD,		/* sta only */
+	BSS_INFO_ROAM_DETECT,	/* obsoleted */
+	BSS_INFO_LQ_RM,		/* obsoleted */
+	BSS_INFO_EXT_BSS,
+	BSS_INFO_BMC_RATE,	/* for bmc rate control in CR4 */
+	BSS_INFO_SYNC_MODE,
+	BSS_INFO_RA,
+	BSS_INFO_HW_AMSDU,
+	BSS_INFO_BSS_COLOR,
+	BSS_INFO_HE_BASIC,
+	BSS_INFO_PROTECT_INFO,
+	BSS_INFO_OFFLOAD,
+	BSS_INFO_11V_MBSSID,
+	BSS_INFO_MAX_NUM
+};
+
+enum {
+	WTBL_RESET_AND_SET = 1,
+	WTBL_SET,
+	WTBL_QUERY,
+	WTBL_RESET_ALL
+};
+
+struct wtbl_req_hdr {
+	u8 wlan_idx_lo;
+	u8 operation;
+	__le16 tlv_num;
+	u8 wlan_idx_hi;
+	u8 rsv[3];
+} __packed;
+
+struct wtbl_generic {
+	__le16 tag;
+	__le16 len;
+	u8 peer_addr[ETH_ALEN];
+	u8 muar_idx;
+	u8 skip_tx;
+	u8 cf_ack;
+	u8 qos;
+	u8 mesh;
+	u8 adm;
+	__le16 partial_aid;
+	u8 baf_en;
+	u8 aad_om;
+} __packed;
+
+struct wtbl_rx {
+	__le16 tag;
+	__le16 len;
+	u8 rcid;
+	u8 rca1;
+	u8 rca2;
+	u8 rv;
+	u8 rsv[4];
+} __packed;
+
+struct wtbl_ht {
+	__le16 tag;
+	__le16 len;
+	u8 ht;
+	u8 ldpc;
+	u8 af;
+	u8 mm;
+	u8 rsv[4];
+} __packed;
+
+struct wtbl_vht {
+	__le16 tag;
+	__le16 len;
+	u8 ldpc;
+	u8 dyn_bw;
+	u8 vht;
+	u8 txop_ps;
+	u8 rsv[4];
+} __packed;
+
+enum {
+	MT_BA_TYPE_INVALID,
+	MT_BA_TYPE_ORIGINATOR,
+	MT_BA_TYPE_RECIPIENT
+};
+
+enum {
+	RST_BA_MAC_TID_MATCH,
+	RST_BA_MAC_MATCH,
+	RST_BA_NO_MATCH
+};
+
+struct wtbl_ba {
+	__le16 tag;
+	__le16 len;
+	/* common */
+	u8 tid;
+	u8 ba_type;
+	u8 rsv0[2];
+	/* originator only */
+	__le16 sn;
+	u8 ba_en;
+	u8 ba_winsize_idx;
+	__le16 ba_winsize;
+	/* recipient only */
+	u8 peer_addr[ETH_ALEN];
+	u8 rst_ba_tid;
+	u8 rst_ba_sel;
+	u8 rst_ba_sb;
+	u8 band_idx;
+	u8 rsv1[4];
+} __packed;
+
+struct wtbl_bf {
+	__le16 tag;
+	__le16 len;
+	u8 ibf;
+	u8 ebf;
+	u8 ibf_vht;
+	u8 ebf_vht;
+	u8 gid;
+	u8 pfmu_idx;
+	u8 rsv[2];
+} __packed;
+
+struct wtbl_smps {
+	__le16 tag;
+	__le16 len;
+	u8 smps;
+	u8 rsv[3];
+} __packed;
+
+struct wtbl_spe {
+	__le16 tag;
+	__le16 len;
+	u8 spe_idx;
+	u8 rsv[3];
+} __packed;
+
+enum {
+	WTBL_GENERIC,
+	WTBL_RX,
+	WTBL_HT,
+	WTBL_VHT,
+	WTBL_PEER_PS,		/* not used */
+	WTBL_TX_PS,
+	WTBL_HDR_TRANS,
+	WTBL_SEC_KEY,
+	WTBL_BA,
+	WTBL_RDG,		/* obsoleted */
+	WTBL_PROTECT,		/* not used */
+	WTBL_CLEAR,		/* not used */
+	WTBL_BF,
+	WTBL_SMPS,
+	WTBL_RAW_DATA,		/* debug only */
+	WTBL_PN,
+	WTBL_SPE,
+	WTBL_MAX_NUM
+};
+
+struct sta_ntlv_hdr {
+	u8 rsv[2];
+	__le16 tlv_num;
+} __packed;
+
+struct sta_req_hdr {
+	u8 bss_idx;
+	u8 wlan_idx_lo;
+	__le16 tlv_num;
+	u8 is_tlv_append;
+	u8 muar_idx;
+	u8 wlan_idx_hi;
+	u8 rsv;
+} __packed;
+
+struct sta_rec_basic {
+	__le16 tag;
+	__le16 len;
+	__le32 conn_type;
+	u8 conn_state;
+	u8 qos;
+	__le16 aid;
+	u8 peer_addr[ETH_ALEN];
+	__le16 extra_info;
+} __packed;
+
+struct sta_rec_ht {
+	__le16 tag;
+	__le16 len;
+	__le16 ht_cap;
+	u16 rsv;
+} __packed;
+
+struct sta_rec_vht {
+	__le16 tag;
+	__le16 len;
+	__le32 vht_cap;
+	__le16 vht_rx_mcs_map;
+	__le16 vht_tx_mcs_map;
+	u8 rts_bw_sig;
+	u8 rsv[3];
+} __packed;
+
+struct sta_rec_ba {
+	__le16 tag;
+	__le16 len;
+	u8 tid;
+	u8 ba_type;
+	u8 amsdu;
+	u8 ba_en;
+	__le16 ssn;
+	__le16 winsize;
+} __packed;
+
+struct sec_key {
+	u8 cipher_id;
+	u8 cipher_len;
+	u8 key_id;
+	u8 key_len;
+	u8 key[32];
+} __packed;
+
+struct sta_rec_sec {
+	__le16 tag;
+	__le16 len;
+	u8 add;
+	u8 n_cipher;
+	u8 rsv[2];
+
+	struct sec_key key[2];
+} __packed;
+
+struct ra_phy {
+	u8 type;
+	u8 flag;
+	u8 stbc;
+	u8 sgi;
+	u8 bw;
+	u8 ldpc;
+	u8 mcs;
+	u8 nss;
+	u8 he_ltf;
+};
+
+struct sta_rec_ra {
+	__le16 tag;
+	__le16 len;
+
+	u8 valid;
+	u8 auto_rate;
+	u8 phy_mode;
+	u8 channel;
+	u8 bw;
+	u8 disable_cck;
+	u8 ht_mcs32;
+	u8 ht_gf;
+	u8 ht_mcs[4];
+	u8 mmps_mode;
+	u8 gband_256;
+	u8 af;
+	u8 auth_wapi_mode;
+	u8 rate_len;
+
+	u8 supp_mode;
+	u8 supp_cck_rate;
+	u8 supp_ofdm_rate;
+	__le32 supp_ht_mcs;
+	__le16 supp_vht_mcs[4];
+
+	u8 op_mode;
+	u8 op_vht_chan_width;
+	u8 op_vht_rx_nss;
+	u8 op_vht_rx_nss_type;
+
+	__le32 sta_status;
+
+	struct ra_phy phy;
+} __packed;
+
+enum {
+	STA_REC_BASIC,
+	STA_REC_RA,
+	STA_REC_RA_CMM_INFO,
+	STA_REC_RA_UPDATE,
+	STA_REC_BF,
+	STA_REC_AMSDU,
+	STA_REC_BA,
+	STA_REC_RED,		/* not used */
+	STA_REC_TX_PROC,	/* for hdr trans and CSO in CR4 */
+	STA_REC_HT,
+	STA_REC_VHT,
+	STA_REC_APPS,
+	STA_REC_KEY,
+	STA_REC_WTBL,
+	STA_REC_HE,
+	STA_REC_HW_AMSDU,
+	STA_REC_WTBL_AADOM,
+	STA_REC_KEY_V2,
+	STA_REC_MURU,
+	STA_REC_MUEDCA,
+	STA_REC_MAX_NUM
+};
+
+enum mt7915_cipher_type {
+	MT_CIPHER_NONE,
+	MT_CIPHER_WEP40,
+	MT_CIPHER_WEP104,
+	MT_CIPHER_WEP128,
+	MT_CIPHER_TKIP,
+	MT_CIPHER_AES_CCMP,
+	MT_CIPHER_CCMP_256,
+	MT_CIPHER_GCMP,
+	MT_CIPHER_GCMP_256,
+	MT_CIPHER_WAPI,
+	MT_CIPHER_BIP_CMAC_128,
+};
+
+enum {
+	CH_SWITCH_NORMAL = 0,
+	CH_SWITCH_SCAN = 3,
+	CH_SWITCH_MCC = 4,
+	CH_SWITCH_DFS = 5,
+	CH_SWITCH_BACKGROUND_SCAN_START = 6,
+	CH_SWITCH_BACKGROUND_SCAN_RUNNING = 7,
+	CH_SWITCH_BACKGROUND_SCAN_STOP = 8,
+	CH_SWITCH_SCAN_BYPASS_DPD = 9
+};
+
+enum {
+	THERMAL_SENSOR_TEMP_QUERY,
+	THERMAL_SENSOR_MANUAL_CTRL,
+	THERMAL_SENSOR_INFO_QUERY,
+	THERMAL_SENSOR_TASK_CTRL,
+};
+
+#define MT7915_WTBL_UPDATE_MAX_SIZE	(sizeof(struct wtbl_req_hdr) +	\
+					 sizeof(struct wtbl_generic) +	\
+					 sizeof(struct wtbl_rx) +	\
+					 sizeof(struct wtbl_ht) +	\
+					 sizeof(struct wtbl_vht) +	\
+					 sizeof(struct wtbl_ba) +	\
+					 sizeof(struct wtbl_bf) +	\
+					 sizeof(struct wtbl_smps) +	\
+					 sizeof(struct wtbl_spe))
+
+#define MT7915_STA_UPDATE_MAX_SIZE	(sizeof(struct sta_req_hdr) +	\
+					 sizeof(struct sta_rec_basic) +	\
+					 sizeof(struct sta_rec_ht) +	\
+					 sizeof(struct sta_rec_ba) +	\
+					 sizeof(struct sta_rec_vht) +	\
+					 sizeof(struct tlv) +		\
+					 sizeof(struct sta_rec_sec) +	\
+					 sizeof(struct sta_rec_ra) +	\
+					 MT7915_WTBL_UPDATE_MAX_SIZE)
+
+#define MT7915_WTBL_UPDATE_BA_SIZE	(sizeof(struct wtbl_req_hdr) +	\
+					 sizeof(struct wtbl_ba))
+
+#define MT7915_BEACON_UPDATE_SIZE	(sizeof(struct sta_req_hdr) +	\
+					 sizeof(struct bss_info_bcn_csa) + \
+					 sizeof(struct bss_info_bcn_bcc) + \
+					 sizeof(struct bss_info_bcn_mbss) + \
+					 sizeof(struct bss_info_bcn_cont))
+
+#define PHY_MODE_A			BIT(0)
+#define PHY_MODE_B			BIT(1)
+#define PHY_MODE_G			BIT(2)
+#define PHY_MODE_GN			BIT(3)
+#define PHY_MODE_AN			BIT(4)
+#define PHY_MODE_AC			BIT(5)
+
+#define MODE_CCK			BIT(0)
+#define MODE_OFDM			BIT(1)
+#define MODE_HT				BIT(2)
+#define MODE_VHT			BIT(3)
+
+#define STA_CAP_WMM			BIT(0)
+#define STA_CAP_SGI_20			BIT(4)
+#define STA_CAP_SGI_40			BIT(5)
+#define STA_CAP_TX_STBC			BIT(6)
+#define STA_CAP_RX_STBC			BIT(7)
+#define STA_CAP_VHT_SGI_80		BIT(16)
+#define STA_CAP_VHT_SGI_160		BIT(17)
+#define STA_CAP_VHT_TX_STBC		BIT(18)
+#define STA_CAP_VHT_RX_STBC		BIT(19)
+#define STA_CAP_VHT_LDPC		BIT(23)
+#define STA_CAP_LDPC			BIT(24)
+#define STA_CAP_HT			BIT(26)
+#define STA_CAP_VHT			BIT(27)
+
+#endif
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
new file mode 100644
index 000000000000..b23ab3f215e0
--- /dev/null
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
@@ -0,0 +1,441 @@
+/* SPDX-License-Identifier: ISC */
+/* Copyright (C) 2020 MediaTek Inc. */
+
+#ifndef __MT7915_H
+#define __MT7915_H
+
+#include <linux/interrupt.h>
+#include <linux/ktime.h>
+#include "../mt76.h"
+#include "regs.h"
+
+#define MT7915_MAX_INTERFACES		4
+#define MT7915_MAX_WMM_SETS		4
+#define MT7915_WTBL_SIZE		288
+#define MT7915_WTBL_RESERVED		(MT7915_WTBL_SIZE - 1)
+#define MT7915_WTBL_STA			(MT7915_WTBL_RESERVED - \
+					 MT7915_MAX_INTERFACES)
+
+#define MT7915_WATCHDOG_TIME		(HZ / 10)
+#define MT7915_RESET_TIMEOUT		(30 * HZ)
+
+#define MT7915_TX_RING_SIZE		2048
+#define MT7915_TX_MCU_RING_SIZE		256
+#define MT7915_TX_FWDL_RING_SIZE	128
+
+#define MT7915_RX_RING_SIZE		1536
+#define MT7915_RX_MCU_RING_SIZE		512
+
+#define MT7915_FIRMWARE_WA		"mediatek/mt7915_wa.bin"
+#define MT7915_FIRMWARE_WM		"mediatek/mt7915_wm.bin"
+#define MT7915_ROM_PATCH		"mediatek/mt7915_rom_patch.bin"
+
+#define MT7915_EEPROM_SIZE		3584
+#define MT7915_TOKEN_SIZE		8192
+
+#define MT7915_CFEND_RATE_DEFAULT	0x49	/* OFDM 24M */
+#define MT7915_CFEND_RATE_11B		0x03	/* 11B LP, 11M */
+#define MT7915_5G_RATE_DEFAULT		0x4b	/* OFDM 6M */
+#define MT7915_2G_RATE_DEFAULT		0x0	/* CCK 1M */
+
+
+struct mt7915_vif;
+struct mt7915_sta;
+struct mt7915_dfs_pulse;
+struct mt7915_dfs_pattern;
+
+enum mt7915_txq_id {
+	MT7915_TXQ_FWDL = 16,
+	MT7915_TXQ_MCU_WM,
+	MT7915_TXQ_BAND0,
+	MT7915_TXQ_BAND1,
+	MT7915_TXQ_MCU_WA,
+};
+
+enum mt7915_rxq_id {
+	MT7915_RXQ_BAND0 = 0,
+	MT7915_RXQ_BAND1,
+	MT7915_RXQ_MCU_WM = 0,
+	MT7915_RXQ_MCU_WA,
+};
+
+enum mt7915_ampdu_state {
+	MT7915_AGGR_STOP,
+	MT7915_AGGR_PROGRESS,
+	MT7915_AGGR_START,
+	MT7915_AGGR_OPERATIONAL
+};
+
+struct mt7915_sta_stats {
+	struct rate_info prob_rate;
+	struct rate_info tx_rate;
+
+	unsigned long per;
+	unsigned long changed;
+	unsigned long jiffies;
+};
+
+struct mt7915_sta {
+	struct mt76_wcid wcid; /* must be first */
+
+	struct mt7915_vif *vif;
+
+	struct list_head poll_list;
+	u32 airtime_ac[8];
+
+	struct mt7915_sta_stats stats;
+	struct work_struct stats_work;
+
+	spinlock_t ampdu_lock;
+	enum mt7915_ampdu_state ampdu_state[IEEE80211_NUM_TIDS];
+};
+
+struct mt7915_vif {
+	u16 idx;
+	u8 omac_idx;
+	u8 band_idx;
+	u8 wmm_idx;
+
+	struct {
+		u16 cw_min;
+		u16 cw_max;
+		u16 txop;
+		u8 aifs;
+	} wmm[IEEE80211_NUM_ACS];
+
+	struct mt7915_sta sta;
+	struct mt7915_dev *dev;
+};
+
+struct mib_stats {
+	u16 ack_fail_cnt;
+	u16 fcs_err_cnt;
+	u16 rts_cnt;
+	u16 rts_retries_cnt;
+	u16 ba_miss_cnt;
+};
+
+struct mt7915_phy {
+	struct mt76_phy *mt76;
+	struct mt7915_dev *dev;
+
+	u32 rxfilter;
+	u32 vif_mask;
+	u32 omac_mask;
+
+	u16 noise;
+	u16 chainmask;
+
+	s16 coverage_class;
+	u8 slottime;
+
+	u8 rdd_state;
+	int dfs_state;
+
+	__le32 rx_ampdu_ts;
+	u32 ampdu_ref;
+
+	struct mib_stats mib;
+};
+
+struct mt7915_dev {
+	union { /* must be first */
+		struct mt76_dev mt76;
+		struct mt76_phy mphy;
+	};
+
+	struct mt7915_phy phy;
+
+	u16 chainmask;
+
+	struct work_struct init_work;
+	struct work_struct reset_work;
+	wait_queue_head_t reset_wait;
+	u32 reset_state;
+
+	struct list_head sta_poll_list;
+	spinlock_t sta_poll_lock;
+
+	u32 hw_pattern;
+
+	spinlock_t token_lock;
+	struct idr token;
+
+	u8 mac_work_count;
+	bool fw_debug;
+};
+
+enum {
+	HW_BSSID_0 = 0x0,
+	HW_BSSID_1,
+	HW_BSSID_2,
+	HW_BSSID_3,
+	HW_BSSID_MAX,
+	EXT_BSSID_START = 0x10,
+	EXT_BSSID_1,
+	EXT_BSSID_2,
+	EXT_BSSID_3,
+	EXT_BSSID_4,
+	EXT_BSSID_5,
+	EXT_BSSID_6,
+	EXT_BSSID_7,
+	EXT_BSSID_8,
+	EXT_BSSID_9,
+	EXT_BSSID_10,
+	EXT_BSSID_11,
+	EXT_BSSID_12,
+	EXT_BSSID_13,
+	EXT_BSSID_14,
+	EXT_BSSID_15,
+	EXT_BSSID_END
+};
+
+enum {
+	MT_RX_SEL0,
+	MT_RX_SEL1,
+};
+
+enum mt7915_rdd_cmd {
+	RDD_STOP,
+	RDD_START,
+	RDD_DET_MODE,
+	RDD_RADAR_EMULATE,
+	RDD_START_TXQ = 20,
+	RDD_CAC_START = 50,
+	RDD_CAC_END,
+	RDD_NORMAL_START,
+	RDD_DISABLE_DFS_CAL,
+	RDD_PULSE_DBG,
+	RDD_READ_PULSE,
+	RDD_RESUME_BF,
+	RDD_IRQ_OFF,
+};
+
+enum {
+	RATE_CTRL_RU_INFO,
+	RATE_CTRL_FIXED_RATE_INFO,
+	RATE_CTRL_DUMP_INFO,
+	RATE_CTRL_MU_INFO,
+};
+
+static inline struct mt7915_phy *
+mt7915_hw_phy(struct ieee80211_hw *hw)
+{
+	struct mt76_phy *phy = hw->priv;
+
+	return phy->priv;
+}
+
+static inline struct mt7915_dev *
+mt7915_hw_dev(struct ieee80211_hw *hw)
+{
+	struct mt76_phy *phy = hw->priv;
+
+	return container_of(phy->dev, struct mt7915_dev, mt76);
+}
+
+static inline struct mt7915_phy *
+mt7915_ext_phy(struct mt7915_dev *dev)
+{
+	struct mt76_phy *phy = dev->mt76.phy2;
+
+	if (!phy)
+		return NULL;
+
+	return phy->priv;
+}
+
+static inline void
+mt7915_set_aggr_state(struct mt7915_sta *msta, u8 tid,
+		      enum mt7915_ampdu_state state)
+{
+	spin_lock_bh(&msta->ampdu_lock);
+	msta->ampdu_state[tid] = state;
+	spin_unlock_bh(&msta->ampdu_lock);
+}
+
+extern const struct ieee80211_ops mt7915_ops;
+extern struct pci_driver mt7915_pci_driver;
+
+u32 mt7915_reg_map(struct mt7915_dev *dev, u32 addr);
+
+int mt7915_register_device(struct mt7915_dev *dev);
+void mt7915_unregister_device(struct mt7915_dev *dev);
+int mt7915_register_ext_phy(struct mt7915_dev *dev);
+void mt7915_unregister_ext_phy(struct mt7915_dev *dev);
+int mt7915_eeprom_init(struct mt7915_dev *dev);
+u32 mt7915_eeprom_read(struct mt7915_dev *dev, u32 offset);
+int mt7915_eeprom_get_target_power(struct mt7915_dev *dev,
+				   struct ieee80211_channel *chan,
+				   u8 chain_idx);
+int mt7915_dma_init(struct mt7915_dev *dev);
+void mt7915_dma_prefetch(struct mt7915_dev *dev);
+void mt7915_dma_cleanup(struct mt7915_dev *dev);
+int mt7915_mcu_init(struct mt7915_dev *dev);
+int mt7915_mcu_add_dev_info(struct mt7915_dev *dev,
+			    struct ieee80211_vif *vif, bool enable);
+int mt7915_mcu_add_bss_info(struct mt7915_phy *phy,
+			    struct ieee80211_vif *vif, int enable);
+int mt7915_mcu_add_sta(struct mt7915_dev *dev, struct ieee80211_vif *vif,
+		       struct ieee80211_sta *sta, bool enable);
+int mt7915_mcu_add_tx_ba(struct mt7915_dev *dev,
+			 struct ieee80211_ampdu_params *params,
+			 bool add);
+int mt7915_mcu_add_rx_ba(struct mt7915_dev *dev,
+			 struct ieee80211_ampdu_params *params,
+			 bool add);
+int mt7915_mcu_add_key(struct mt7915_dev *dev, struct ieee80211_vif *vif,
+		       struct mt7915_sta *msta, struct ieee80211_key_conf *key,
+		       enum set_key_cmd cmd);
+int mt7915_mcu_add_beacon(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+			  int enable);
+int mt7915_mcu_add_rate_ctrl(struct mt7915_dev *dev, struct ieee80211_vif *vif,
+			     struct ieee80211_sta *sta);
+int mt7915_mcu_add_smps(struct mt7915_dev *dev, struct ieee80211_vif *vif,
+			struct ieee80211_sta *sta);
+int mt7915_mcu_set_chan_info(struct mt7915_phy *phy, int cmd);
+int mt7915_mcu_set_tx(struct mt7915_dev *dev, struct ieee80211_vif *vif);
+int mt7915_mcu_set_eeprom(struct mt7915_dev *dev);
+int mt7915_mcu_get_eeprom(struct mt7915_dev *dev, u32 offset);
+int mt7915_mcu_set_mac(struct mt7915_dev *dev, int band, bool enable,
+		       bool hdr_trans);
+int mt7915_mcu_set_scs(struct mt7915_dev *dev, u8 band, bool enable);
+int mt7915_mcu_set_ser(struct mt7915_dev *dev, u8 action, u8 set, u8 band);
+int mt7915_mcu_set_rts_thresh(struct mt7915_phy *phy, u32 val);
+int mt7915_mcu_set_pm(struct mt7915_dev *dev, int band, int enter);
+int mt7915_mcu_set_fcc5_lpn(struct mt7915_dev *dev, int val);
+int mt7915_mcu_set_pulse_th(struct mt7915_dev *dev,
+			    const struct mt7915_dfs_pulse *pulse);
+int mt7915_mcu_set_radar_th(struct mt7915_dev *dev, int index,
+			    const struct mt7915_dfs_pattern *pattern);
+int mt7915_mcu_get_rate_info(struct mt7915_dev *dev, u32 cmd, u16 wlan_idx);
+int mt7915_mcu_get_temperature(struct mt7915_dev *dev, int index);
+int mt7915_mcu_rdd_cmd(struct mt7915_dev *dev, enum mt7915_rdd_cmd cmd,
+		       u8 index, u8 rx_sel, u8 val);
+void mt7915_mcu_rx_event(struct mt7915_dev *dev, struct sk_buff *skb);
+void mt7915_mcu_exit(struct mt7915_dev *dev);
+
+static inline bool is_mt7915(struct mt76_dev *dev)
+{
+	return mt76_chip(dev) == 0x7915;
+}
+
+static inline void mt7915_irq_enable(struct mt7915_dev *dev, u32 mask)
+{
+	mt76_set_irq_mask(&dev->mt76, MT_INT_MASK_CSR, 0, mask);
+}
+
+static inline void mt7915_irq_disable(struct mt7915_dev *dev, u32 mask)
+{
+	mt76_set_irq_mask(&dev->mt76, MT_INT_MASK_CSR, mask, 0);
+}
+
+static inline u32
+mt7915_reg_map_l1(struct mt7915_dev *dev, u32 addr)
+{
+	u32 offset = FIELD_GET(MT_HIF_REMAP_L1_OFFSET, addr);
+	u32 base = FIELD_GET(MT_HIF_REMAP_L1_BASE, addr);
+
+	mt76_rmw_field(dev, MT_HIF_REMAP_L1, MT_HIF_REMAP_L1_MASK, base);
+	/* use read to push write */
+	mt76_rr(dev, MT_HIF_REMAP_L1);
+
+	return MT_HIF_REMAP_BASE_L1 + offset;
+}
+
+static inline u32
+mt7915_l1_rr(struct mt7915_dev *dev, u32 addr)
+{
+	return mt76_rr(dev, mt7915_reg_map_l1(dev, addr));
+}
+
+static inline void
+mt7915_l1_wr(struct mt7915_dev *dev, u32 addr, u32 val)
+{
+	mt76_wr(dev, mt7915_reg_map_l1(dev, addr), val);
+}
+
+static inline u32
+mt7915_l1_rmw(struct mt7915_dev *dev, u32 addr, u32 mask, u32 val)
+{
+	val |= mt7915_l1_rr(dev, addr) & ~mask;
+	mt7915_l1_wr(dev, addr, val);
+
+	return val;
+}
+
+#define mt7915_l1_set(dev, addr, val)	mt7915_l1_rmw(dev, addr, 0, val)
+#define mt7915_l1_clear(dev, addr, val)	mt7915_l1_rmw(dev, addr, val, 0)
+
+static inline u32
+mt7915_reg_map_l2(struct mt7915_dev *dev, u32 addr)
+{
+	u32 offset = FIELD_GET(MT_HIF_REMAP_L2_OFFSET, addr);
+	u32 base = FIELD_GET(MT_HIF_REMAP_L2_BASE, addr);
+
+	mt76_rmw_field(dev, MT_HIF_REMAP_L2, MT_HIF_REMAP_L2_MASK, base);
+	/* use read to push write */
+	mt76_rr(dev, MT_HIF_REMAP_L2);
+
+	return MT_HIF_REMAP_BASE_L2 + offset;
+}
+
+static inline u32
+mt7915_l2_rr(struct mt7915_dev *dev, u32 addr)
+{
+	return mt76_rr(dev, mt7915_reg_map_l2(dev, addr));
+}
+
+static inline void
+mt7915_l2_wr(struct mt7915_dev *dev, u32 addr, u32 val)
+{
+	mt76_wr(dev, mt7915_reg_map_l2(dev, addr), val);
+}
+
+static inline u32
+mt7915_l2_rmw(struct mt7915_dev *dev, u32 addr, u32 mask, u32 val)
+{
+	val |= mt7915_l2_rr(dev, addr) & ~mask;
+	mt7915_l2_wr(dev, addr, val);
+
+	return val;
+}
+
+#define mt7915_l2_set(dev, addr, val)	mt7915_l2_rmw(dev, addr, 0, val)
+#define mt7915_l2_clear(dev, addr, val)	mt7915_l2_rmw(dev, addr, val, 0)
+
+bool mt7915_mac_wtbl_update(struct mt7915_dev *dev, int idx, u32 mask);
+void mt7915_mac_reset_counters(struct mt7915_phy *phy);
+void mt7915_mac_cca_stats_reset(struct mt7915_phy *phy);
+void mt7915_mac_sta_poll(struct mt7915_dev *dev);
+void mt7915_mac_write_txwi(struct mt7915_dev *dev, __le32 *txwi,
+			   struct sk_buff *skb, struct mt76_wcid *wcid,
+			   struct ieee80211_key_conf *key, bool beacon);
+void mt7915_mac_set_timing(struct mt7915_phy *phy);
+int mt7915_mac_fill_rx(struct mt7915_dev *dev, struct sk_buff *skb);
+void mt7915_mac_tx_free(struct mt7915_dev *dev, struct sk_buff *skb);
+int mt7915_mac_sta_add(struct mt76_dev *mdev, struct ieee80211_vif *vif,
+		       struct ieee80211_sta *sta);
+void mt7915_mac_sta_remove(struct mt76_dev *mdev, struct ieee80211_vif *vif,
+			   struct ieee80211_sta *sta);
+void mt7915_mac_work(struct work_struct *work);
+void mt7915_mac_reset_work(struct work_struct *work);
+void mt7915_mac_sta_stats_work(struct work_struct *work);
+int mt7915_tx_prepare_skb(struct mt76_dev *mdev, void *txwi_ptr,
+			  enum mt76_txq_id qid, struct mt76_wcid *wcid,
+			  struct ieee80211_sta *sta,
+			  struct mt76_tx_info *tx_info);
+void mt7915_tx_complete_skb(struct mt76_dev *mdev, enum mt76_txq_id qid,
+			    struct mt76_queue_entry *e);
+void mt7915_queue_rx_skb(struct mt76_dev *mdev, enum mt76_rxq_id q,
+			 struct sk_buff *skb);
+void mt7915_sta_ps(struct mt76_dev *mdev, struct ieee80211_sta *sta, bool ps);
+void mt7915_stats_work(struct work_struct *work);
+void mt7915_txp_skb_unmap(struct mt76_dev *dev,
+			  struct mt76_txwi_cache *txwi);
+int mt76_dfs_start_rdd(struct mt7915_dev *dev, bool force);
+int mt7915_dfs_init_radar_detector(struct mt7915_phy *phy);
+void mt7915_update_channel(struct mt76_dev *mdev);
+int mt7915_init_debugfs(struct mt7915_dev *dev);
+
+#endif
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/pci.c b/drivers/net/wireless/mediatek/mt76/mt7915/pci.c
new file mode 100644
index 000000000000..b79b5060cd77
--- /dev/null
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/pci.c
@@ -0,0 +1,166 @@
+// SPDX-License-Identifier: ISC
+/* Copyright (C) 2020 MediaTek Inc.
+ *
+ * Author: Ryder Lee <ryder.lee@mediatek.com>
+ */
+
+#include <linux/kernel.h>
+#include <linux/module.h>
+#include <linux/pci.h>
+
+#include "mt7915.h"
+#include "mac.h"
+#include "../trace.h"
+
+static const struct pci_device_id mt7915_pci_device_table[] = {
+	{ PCI_DEVICE(0x14c3, 0x7915) },
+	{ },
+};
+
+static void
+mt7915_rx_poll_complete(struct mt76_dev *mdev, enum mt76_rxq_id q)
+{
+	struct mt7915_dev *dev = container_of(mdev, struct mt7915_dev, mt76);
+
+	mt7915_irq_enable(dev, MT_INT_RX_DONE(q));
+}
+
+/* TODO: support 2/4/6/8 MSI-X vectors */
+static irqreturn_t mt7915_irq_handler(int irq, void *dev_instance)
+{
+	struct mt7915_dev *dev = dev_instance;
+	u32 intr;
+
+	intr = mt76_rr(dev, MT_INT_SOURCE_CSR);
+	mt76_wr(dev, MT_INT_SOURCE_CSR, intr);
+
+	if (!test_bit(MT76_STATE_INITIALIZED, &dev->mphy.state))
+		return IRQ_NONE;
+
+	trace_dev_irq(&dev->mt76, intr, dev->mt76.mmio.irqmask);
+
+	intr &= dev->mt76.mmio.irqmask;
+
+	if (intr & MT_INT_TX_DONE_ALL) {
+		mt7915_irq_disable(dev, MT_INT_TX_DONE_ALL);
+		napi_schedule(&dev->mt76.tx_napi);
+	}
+
+	if (intr & MT_INT_RX_DONE_DATA) {
+		mt7915_irq_disable(dev, MT_INT_RX_DONE_DATA);
+		napi_schedule(&dev->mt76.napi[0]);
+	}
+
+	if (intr & MT_INT_RX_DONE_WM) {
+		mt7915_irq_disable(dev, MT_INT_RX_DONE_WM);
+		napi_schedule(&dev->mt76.napi[1]);
+	}
+
+	if (intr & MT_INT_RX_DONE_WA) {
+		mt7915_irq_disable(dev, MT_INT_RX_DONE_WA);
+		napi_schedule(&dev->mt76.napi[2]);
+	}
+
+	if (intr & MT_INT_MCU_CMD) {
+		u32 val = mt76_rr(dev, MT_MCU_CMD);
+
+		mt76_wr(dev, MT_MCU_CMD, val);
+		if (val & MT_MCU_CMD_ERROR_MASK) {
+			dev->reset_state = val;
+			ieee80211_queue_work(mt76_hw(dev), &dev->reset_work);
+			wake_up(&dev->reset_wait);
+		}
+	}
+
+	return IRQ_HANDLED;
+}
+
+static int mt7915_pci_probe(struct pci_dev *pdev,
+			    const struct pci_device_id *id)
+{
+	static const struct mt76_driver_ops drv_ops = {
+		/* txwi_size = txd size + txp size */
+		.txwi_size = MT_TXD_SIZE + sizeof(struct mt7915_txp),
+		.drv_flags = MT_DRV_TXWI_NO_FREE,
+		.survey_flags = SURVEY_INFO_TIME_TX |
+				SURVEY_INFO_TIME_RX |
+				SURVEY_INFO_TIME_BSS_RX,
+		.tx_prepare_skb = mt7915_tx_prepare_skb,
+		.tx_complete_skb = mt7915_tx_complete_skb,
+		.rx_skb = mt7915_queue_rx_skb,
+		.rx_poll_complete = mt7915_rx_poll_complete,
+		.sta_ps = mt7915_sta_ps,
+		.sta_add = mt7915_mac_sta_add,
+		.sta_remove = mt7915_mac_sta_remove,
+		.update_survey = mt7915_update_channel,
+	};
+	struct mt7915_dev *dev;
+	struct mt76_dev *mdev;
+	int ret;
+
+	ret = pcim_enable_device(pdev);
+	if (ret)
+		return ret;
+
+	ret = pcim_iomap_regions(pdev, BIT(0), pci_name(pdev));
+	if (ret)
+		return ret;
+
+	pci_set_master(pdev);
+
+	ret = pci_set_dma_mask(pdev, DMA_BIT_MASK(32));
+	if (ret)
+		return ret;
+
+	mdev = mt76_alloc_device(&pdev->dev, sizeof(*dev), &mt7915_ops,
+				 &drv_ops);
+	if (!mdev)
+		return -ENOMEM;
+
+	dev = container_of(mdev, struct mt7915_dev, mt76);
+
+	mt76_mmio_init(&dev->mt76, pcim_iomap_table(pdev)[0]);
+	mdev->rev = (mt7915_l1_rr(dev, MT_HW_CHIPID) << 16) |
+		    (mt7915_l1_rr(dev, MT_HW_REV) & 0xff);
+	dev_dbg(mdev->dev, "ASIC revision: %04x\n", mdev->rev);
+
+	/* master switch of PCIe tnterrupt enable */
+	mt7915_l1_wr(dev, MT_PCIE_MAC_INT_ENABLE, 0xff);
+
+	ret = devm_request_irq(mdev->dev, pdev->irq, mt7915_irq_handler,
+			       IRQF_SHARED, KBUILD_MODNAME, dev);
+	if (ret)
+		goto error;
+
+	ret = mt7915_register_device(dev);
+	if (ret)
+		goto error;
+
+	return 0;
+error:
+	ieee80211_free_hw(mt76_hw(dev));
+	return ret;
+}
+
+static void mt7915_pci_remove(struct pci_dev *pdev)
+{
+	struct mt76_dev *mdev = pci_get_drvdata(pdev);
+	struct mt7915_dev *dev = container_of(mdev, struct mt7915_dev, mt76);
+
+	mt7915_unregister_device(dev);
+}
+
+struct pci_driver mt7915_pci_driver = {
+	.name		= KBUILD_MODNAME,
+	.id_table	= mt7915_pci_device_table,
+	.probe		= mt7915_pci_probe,
+	.remove		= mt7915_pci_remove,
+};
+
+module_pci_driver(mt7915_pci_driver);
+
+MODULE_DEVICE_TABLE(pci, mt7915_pci_device_table);
+MODULE_FIRMWARE(MT7915_FIRMWARE_WA);
+MODULE_FIRMWARE(MT7915_FIRMWARE_WM);
+MODULE_FIRMWARE(MT7915_ROM_PATCH);
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/regs.h b/drivers/net/wireless/mediatek/mt76/mt7915/regs.h
new file mode 100644
index 000000000000..ef87579056fe
--- /dev/null
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/regs.h
@@ -0,0 +1,343 @@
+/* SPDX-License-Identifier: ISC */
+/* Copyright (C) 2020 MediaTek Inc. */
+
+#ifndef __MT7915_REGS_H
+#define __MT7915_REGS_H
+
+/* MCU WFDMA1 */
+#define MT_MCU_WFDMA1_BASE		0x3000
+#define MT_MCU_WFDMA1(ofs)		(MT_MCU_WFDMA1_BASE + (ofs))
+
+#define MT_MCU_INT_EVENT		MT_MCU_WFDMA1(0x108)
+#define MT_MCU_INT_EVENT_DMA_STOPPED	BIT(0)
+#define MT_MCU_INT_EVENT_DMA_INIT	BIT(1)
+#define MT_MCU_INT_EVENT_SER_TRIGGER	BIT(2)
+#define MT_MCU_INT_EVENT_RESET_DONE	BIT(3)
+
+#define MT_PLE_BASE			0x8000
+#define MT_PLE(ofs)			(MT_PLE_BASE + (ofs))
+
+#define MT_PLE_FL_Q0_CTRL		MT_PLE(0x1b0)
+#define MT_PLE_FL_Q1_CTRL		MT_PLE(0x1b4)
+#define MT_PLE_FL_Q2_CTRL		MT_PLE(0x1b8)
+#define MT_PLE_FL_Q3_CTRL		MT_PLE(0x1bc)
+
+#define MT_PLE_AC_QEMPTY(ac, n)		MT_PLE(0x300 + 0x10 * (ac) + \
+					       ((n) << 2))
+#define MT_PLE_AMSDU_PACK_MSDU_CNT(n)	MT_PLE(0x10e0 + ((n) << 2))
+
+#define MT_MDP_BASE			0xf000
+#define MT_MDP(ofs)			(MT_MDP_BASE + (ofs))
+
+#define MT_MDP_DCR0			MT_MDP(0x000)
+#define MT_MDP_DCR0_DAMSDU_EN		BIT(15)
+
+#define MT_MDP_DCR1			MT_MDP(0x004)
+#define MT_MDP_DCR1_MAX_RX_LEN		GENMASK(15, 3)
+
+#define MT_MDP_BNRCFR0(_band)		MT_MDP(0x070 + ((_band) << 8))
+#define MT_MDP_RCFR0_MCU_RX_MGMT	GENMASK(5, 4)
+#define MT_MDP_RCFR0_MCU_RX_CTL_NON_BAR	GENMASK(7, 6)
+#define MT_MDP_RCFR0_MCU_RX_CTL_BAR	GENMASK(9, 8)
+
+#define MT_MDP_BNRCFR1(_band)		MT_MDP(0x074 + ((_band) << 8))
+#define MT_MDP_RCFR1_MCU_RX_BYPASS	GENMASK(23, 22)
+#define MT_MDP_RCFR1_RX_DROPPED_UCAST	GENMASK(28, 27)
+#define MT_MDP_RCFR1_RX_DROPPED_MCAST	GENMASK(30, 29)
+#define MT_MDP_TO_HIF			0
+#define MT_MDP_TO_WM			1
+
+/* TMAC: band 0(0x21000), band 1(0xa1000) */
+#define MT_WF_TMAC_BASE(_band)		((_band) ? 0xa1000 : 0x21000)
+#define MT_WF_TMAC(_band, ofs)		(MT_WF_TMAC_BASE(_band) + (ofs))
+
+#define MT_TMAC_CDTR(_band)		MT_WF_TMAC(_band, 0x090)
+#define MT_TMAC_ODTR(_band)		MT_WF_TMAC(_band, 0x094)
+#define MT_TIMEOUT_VAL_PLCP		GENMASK(15, 0)
+#define MT_TIMEOUT_VAL_CCA		GENMASK(31, 16)
+
+#define MT_TMAC_ICR0(_band)		MT_WF_TMAC(_band, 0x0a4)
+#define MT_IFS_EIFS			GENMASK(8, 0)
+#define MT_IFS_RIFS			GENMASK(14, 10)
+#define MT_IFS_SIFS			GENMASK(22, 16)
+#define MT_IFS_SLOT			GENMASK(30, 24)
+
+#define MT_TMAC_CTCR0(_band)			MT_WF_TMAC(_band, 0x0f4)
+#define MT_TMAC_CTCR0_INS_DDLMT_REFTIME		GENMASK(5, 0)
+#define MT_TMAC_CTCR0_INS_DDLMT_EN		BIT(17)
+#define MT_TMAC_CTCR0_INS_DDLMT_VHT_SMPDU_EN	BIT(18)
+
+/* DMA Band 0 */
+#define MT_WF_DMA_BASE			0x21e00
+#define MT_WF_DMA(ofs)			(MT_WF_DMA_BASE + (ofs))
+
+#define MT_DMA_DCR0			MT_WF_DMA(0x000)
+#define MT_DMA_DCR0_MAX_RX_LEN		GENMASK(15, 3)
+
+/* MIB: band 0(0x24800), band 1(0xa4800) */
+#define MT_WF_MIB_BASE(_band)		((_band) ? 0xa4800 : 0x24800)
+#define MT_WF_MIB(_band, ofs)		(MT_WF_MIB_BASE(_band) + (ofs))
+
+#define MT_MIB_SDR3(_band)		MT_WF_MIB(_band, 0x014)
+#define MT_MIB_SDR3_FCS_ERR_MASK	GENMASK(15, 0)
+
+#define MT_MIB_SDR9(_band)		MT_WF_MIB(_band, 0x02c)
+#define MT_MIB_SDR9_BUSY_MASK		GENMASK(23, 0)
+
+#define MT_MIB_SDR16(_band)		MT_WF_MIB(_band, 0x048)
+#define MT_MIB_SDR16_BUSY_MASK		GENMASK(23, 0)
+
+#define MT_MIB_SDR36(_band)		MT_WF_MIB(_band, 0x098)
+#define MT_MIB_SDR36_TXTIME_MASK	GENMASK(23, 0)
+#define MT_MIB_SDR37(_band)		MT_WF_MIB(_band, 0x09c)
+#define MT_MIB_SDR37_RXTIME_MASK	GENMASK(23, 0)
+
+#define MT_MIB_MB_SDR0(_band, n)	MT_WF_MIB(_band, 0x100 + ((n) << 4))
+#define MT_MIB_RTS_RETRIES_COUNT_MASK	GENMASK(31, 16)
+#define MT_MIB_RTS_COUNT_MASK		GENMASK(15, 0)
+
+#define MT_MIB_MB_SDR1(_band, n)	MT_WF_MIB(_band, 0x104 + ((n) << 4))
+#define MT_MIB_BA_MISS_COUNT_MASK	GENMASK(15, 0)
+#define MT_MIB_ACK_FAIL_COUNT_MASK	GENMASK(31, 16)
+
+#define MT_MIB_MB_SDR2(_band, n)	MT_WF_MIB(_band, 0x108 + ((n) << 4))
+#define MT_MIB_FRAME_RETRIES_COUNT_MASK	GENMASK(15, 0)
+
+#define MT_TX_AGG_CNT(_band, n)		MT_WF_MIB(_band, 0x0a8 + ((n) << 2))
+#define MT_TX_AGG_CNT2(_band, n)	MT_WF_MIB(_band, 0x164 + ((n) << 2))
+#define MT_MIB_ARNG(_band, n)		MT_WF_MIB(_band, 0x4b8 + ((n) << 2))
+#define MT_MIB_ARNCR_RANGE(val, n)	(((val) >> ((n) << 3)) & GENMASK(7, 0))
+
+#define MT_WTBLON_TOP_BASE		0x34000
+#define MT_WTBLON_TOP(ofs)		(MT_WTBLON_TOP_BASE + (ofs))
+#define MT_WTBLON_TOP_WDUCR		MT_WTBLON_TOP(0x0)
+#define MT_WTBLON_TOP_WDUCR_GROUP	GENMASK(2, 0)
+
+#define MT_WTBL_UPDATE			MT_WTBLON_TOP(0x030)
+#define MT_WTBL_UPDATE_WLAN_IDX		GENMASK(9, 0)
+#define MT_WTBL_UPDATE_ADM_COUNT_CLEAR	BIT(12)
+#define MT_WTBL_UPDATE_BUSY		BIT(31)
+
+#define MT_WTBL_BASE			0x38000
+#define MT_WTBL_LMAC_ID			GENMASK(14, 8)
+#define MT_WTBL_LMAC_DW			GENMASK(7, 2)
+#define MT_WTBL_LMAC_OFFS(_id, _dw)	(MT_WTBL_BASE | \
+					FIELD_PREP(MT_WTBL_LMAC_ID, _id) | \
+					FIELD_PREP(MT_WTBL_LMAC_DW, _dw))
+
+/* AGG: band 0(0x20800), band 1(0xa0800) */
+#define MT_WF_AGG_BASE(_band)		((_band) ? 0xa0800 : 0x20800)
+#define MT_WF_AGG(_band, ofs)		(MT_WF_AGG_BASE(_band) + (ofs))
+
+#define MT_AGG_ACR0(_band)		MT_WF_AGG(_band, 0x084)
+#define MT_AGG_ACR_CFEND_RATE		GENMASK(13, 0)
+#define MT_AGG_ACR_BAR_RATE		GENMASK(29, 16)
+
+/* ARB: band 0(0x20c00), band 1(0xa0c00) */
+#define MT_WF_ARB_BASE(_band)		((_band) ? 0xa0c00 : 0x20c00)
+#define MT_WF_ARB(_band, ofs)		(MT_WF_ARB_BASE(_band) + (ofs))
+
+#define MT_ARB_SCR(_band)		MT_WF_ARB(_band, 0x080)
+#define MT_ARB_SCR_TX_DISABLE		BIT(8)
+#define MT_ARB_SCR_RX_DISABLE		BIT(9)
+
+/* RMAC: band 0(0x21400), band 1(0xa1400) */
+#define MT_WF_RMAC_BASE(_band)		((_band) ? 0xa1400 : 0x21400)
+#define MT_WF_RMAC(_band, ofs)		(MT_WF_RMAC_BASE(_band) + (ofs))
+
+#define MT_WF_RFCR(_band)		MT_WF_RMAC(_band, 0x000)
+#define MT_WF_RFCR_DROP_STBC_MULTI	BIT(0)
+#define MT_WF_RFCR_DROP_FCSFAIL		BIT(1)
+#define MT_WF_RFCR_DROP_VERSION		BIT(3)
+#define MT_WF_RFCR_DROP_PROBEREQ	BIT(4)
+#define MT_WF_RFCR_DROP_MCAST		BIT(5)
+#define MT_WF_RFCR_DROP_BCAST		BIT(6)
+#define MT_WF_RFCR_DROP_MCAST_FILTERED	BIT(7)
+#define MT_WF_RFCR_DROP_A3_MAC		BIT(8)
+#define MT_WF_RFCR_DROP_A3_BSSID	BIT(9)
+#define MT_WF_RFCR_DROP_A2_BSSID	BIT(10)
+#define MT_WF_RFCR_DROP_OTHER_BEACON	BIT(11)
+#define MT_WF_RFCR_DROP_FRAME_REPORT	BIT(12)
+#define MT_WF_RFCR_DROP_CTL_RSV		BIT(13)
+#define MT_WF_RFCR_DROP_CTS		BIT(14)
+#define MT_WF_RFCR_DROP_RTS		BIT(15)
+#define MT_WF_RFCR_DROP_DUPLICATE	BIT(16)
+#define MT_WF_RFCR_DROP_OTHER_BSS	BIT(17)
+#define MT_WF_RFCR_DROP_OTHER_UC	BIT(18)
+#define MT_WF_RFCR_DROP_OTHER_TIM	BIT(19)
+#define MT_WF_RFCR_DROP_NDPA		BIT(20)
+#define MT_WF_RFCR_DROP_UNWANTED_CTL	BIT(21)
+
+#define MT_WF_RFCR1(_band)		MT_WF_RMAC(_band, 0x004)
+#define MT_WF_RFCR1_DROP_ACK		BIT(4)
+#define MT_WF_RFCR1_DROP_BF_POLL	BIT(5)
+#define MT_WF_RFCR1_DROP_BA		BIT(6)
+#define MT_WF_RFCR1_DROP_CFEND		BIT(7)
+#define MT_WF_RFCR1_DROP_CFACK		BIT(8)
+
+#define MT_WF_RMAC_MIB_TIME0(_band)	MT_WF_RMAC(_band, 0x03c4)
+#define MT_WF_RMAC_MIB_RXTIME_CLR	BIT(31)
+#define MT_WF_RMAC_MIB_RXTIME_EN	BIT(30)
+
+#define MT_WF_RMAC_MIB_AIRTIME14(_band)	MT_WF_RMAC(_band, 0x03b8)
+#define MT_MIB_OBSSTIME_MASK		GENMASK(23, 0)
+#define MT_WF_RMAC_MIB_AIRTIME0(_band)	MT_WF_RMAC(_band, 0x0380)
+
+/* WFDMA0 */
+#define MT_WFDMA0_BASE			0xd4000
+#define MT_WFDMA0(ofs)			(MT_WFDMA0_BASE + (ofs))
+
+#define MT_WFDMA0_RST			MT_WFDMA0(0x100)
+#define MT_WFDMA0_RST_LOGIC_RST		BIT(4)
+#define MT_WFDMA0_RST_DMASHDL_ALL_RST	BIT(5)
+
+#define MT_WFDMA0_BUSY_ENA		MT_WFDMA0(0x13c)
+#define MT_WFDMA0_BUSY_ENA_TX_FIFO0	BIT(0)
+#define MT_WFDMA0_BUSY_ENA_TX_FIFO1	BIT(1)
+#define MT_WFDMA0_BUSY_ENA_RX_FIFO	BIT(2)
+
+#define MT_WFDMA0_GLO_CFG		MT_WFDMA0(0x208)
+#define MT_WFDMA0_GLO_CFG_TX_DMA_EN	BIT(0)
+#define MT_WFDMA0_GLO_CFG_RX_DMA_EN	BIT(2)
+
+#define MT_WFDMA0_RST_DTX_PTR		MT_WFDMA0(0x20c)
+#define MT_WFDMA0_PRI_DLY_INT_CFG0	MT_WFDMA0(0x2f0)
+
+#define MT_RX_DATA_RING_BASE		MT_WFDMA0(0x500)
+
+#define MT_WFDMA0_RX_RING0_EXT_CTRL	MT_WFDMA0(0x680)
+#define MT_WFDMA0_RX_RING1_EXT_CTRL	MT_WFDMA0(0x684)
+#define MT_WFDMA0_RX_RING2_EXT_CTRL	MT_WFDMA0(0x688)
+
+/* WFDMA1 */
+#define MT_WFDMA1_BASE			0xd5000
+#define MT_WFDMA1(ofs)			(MT_WFDMA1_BASE + (ofs))
+
+#define MT_WFDMA1_RST			MT_WFDMA1(0x100)
+#define MT_WFDMA1_RST_LOGIC_RST		BIT(4)
+#define MT_WFDMA1_RST_DMASHDL_ALL_RST	BIT(5)
+
+#define MT_WFDMA1_BUSY_ENA		MT_WFDMA1(0x13c)
+#define MT_WFDMA1_BUSY_ENA_TX_FIFO0	BIT(0)
+#define MT_WFDMA1_BUSY_ENA_TX_FIFO1	BIT(1)
+#define MT_WFDMA1_BUSY_ENA_RX_FIFO	BIT(2)
+
+#define MT_MCU_CMD			MT_WFDMA1(0x1f0)
+#define MT_MCU_CMD_STOP_DMA_FW_RELOAD	BIT(1)
+#define MT_MCU_CMD_STOP_DMA		BIT(2)
+#define MT_MCU_CMD_RESET_DONE		BIT(3)
+#define MT_MCU_CMD_RECOVERY_DONE	BIT(4)
+#define MT_MCU_CMD_NORMAL_STATE		BIT(5)
+#define MT_MCU_CMD_ERROR_MASK		GENMASK(5, 1)
+
+#define MT_WFDMA1_GLO_CFG		MT_WFDMA1(0x208)
+#define MT_WFDMA1_GLO_CFG_TX_DMA_EN	BIT(0)
+#define MT_WFDMA1_GLO_CFG_RX_DMA_EN	BIT(2)
+#define MT_WFDMA1_GLO_CFG_OMIT_TX_INFO	BIT(28)
+#define MT_WFDMA1_GLO_CFG_OMIT_RX_INFO	BIT(27)
+
+#define MT_WFDMA1_RST_DTX_PTR		MT_WFDMA1(0x20c)
+#define MT_WFDMA1_PRI_DLY_INT_CFG0	MT_WFDMA1(0x2f0)
+
+#define MT_TX_RING_BASE			MT_WFDMA1(0x300)
+#define MT_RX_EVENT_RING_BASE		MT_WFDMA1(0x500)
+
+#define MT_WFDMA1_TX_RING0_EXT_CTRL	MT_WFDMA1(0x600)
+#define MT_WFDMA1_TX_RING1_EXT_CTRL	MT_WFDMA1(0x604)
+#define MT_WFDMA1_TX_RING2_EXT_CTRL	MT_WFDMA1(0x608)
+#define MT_WFDMA1_TX_RING3_EXT_CTRL	MT_WFDMA1(0x60c)
+#define MT_WFDMA1_TX_RING4_EXT_CTRL	MT_WFDMA1(0x610)
+#define MT_WFDMA1_TX_RING5_EXT_CTRL	MT_WFDMA1(0x614)
+#define MT_WFDMA1_TX_RING6_EXT_CTRL	MT_WFDMA1(0x618)
+#define MT_WFDMA1_TX_RING7_EXT_CTRL	MT_WFDMA1(0x61c)
+
+#define MT_WFDMA1_TX_RING16_EXT_CTRL	MT_WFDMA1(0x640)
+#define MT_WFDMA1_TX_RING17_EXT_CTRL	MT_WFDMA1(0x644)
+#define MT_WFDMA1_TX_RING18_EXT_CTRL	MT_WFDMA1(0x648)
+#define MT_WFDMA1_TX_RING19_EXT_CTRL	MT_WFDMA1(0x64c)
+#define MT_WFDMA1_TX_RING20_EXT_CTRL	MT_WFDMA1(0x650)
+#define MT_WFDMA1_TX_RING21_EXT_CTRL	MT_WFDMA1(0x654)
+#define MT_WFDMA1_TX_RING22_EXT_CTRL	MT_WFDMA1(0x658)
+#define MT_WFDMA1_TX_RING23_EXT_CTRL	MT_WFDMA1(0x65c)
+
+#define MT_WFDMA1_RX_RING0_EXT_CTRL	MT_WFDMA1(0x680)
+#define MT_WFDMA1_RX_RING1_EXT_CTRL	MT_WFDMA1(0x684)
+#define MT_WFDMA1_RX_RING2_EXT_CTRL	MT_WFDMA1(0x688)
+#define MT_WFDMA1_RX_RING3_EXT_CTRL	MT_WFDMA1(0x68c)
+
+/* WFDMA CSR */
+#define MT_WFDMA_EXT_CSR_BASE		0xd7000
+#define MT_WFDMA_EXT_CSR(ofs)		(MT_WFDMA_EXT_CSR_BASE + (ofs))
+
+#define MT_INT_SOURCE_CSR		MT_WFDMA_EXT_CSR(0x10)
+#define MT_INT_MASK_CSR			MT_WFDMA_EXT_CSR(0x14)
+#define MT_INT_RX_DONE_DATA		BIT(16)
+#define MT_INT_RX_DONE_WM		BIT(0)
+#define MT_INT_RX_DONE_WA		BIT(1)
+#define MT_INT_RX_DONE(_n)		((_n) ? BIT((_n) - 1) : BIT(16))
+#define MT_INT_RX_DONE_ALL		(BIT(0) | BIT(1) | BIT(16))
+#define MT_INT_TX_DONE_ALL		(BIT(15) | GENMASK(27, 26) | BIT(30))
+#define MT_INT_MCU_CMD			BIT(29)
+
+#define MT_WFDMA_EXT_CSR_HIF_MISC	MT_WFDMA_EXT_CSR(0x44)
+#define MT_WFDMA_EXT_CSR_HIF_MISC_BUSY	BIT(0)
+
+/* WFDMA0 PCIE1 */
+#define MT_WFDMA0_PCIE1_BASE			0xd8000
+#define MT_WFDMA0_PCIE1(ofs)			(MT_WFDMA0_PCIE1_BASE + (ofs))
+
+#define MT_WFDMA0_PCIE1_BUSY_ENA		MT_WFDMA0_PCIE1(0x13c)
+#define MT_WFDMA0_PCIE1_BUSY_ENA_TX_FIFO0	BIT(0)
+#define MT_WFDMA0_PCIE1_BUSY_ENA_TX_FIFO1	BIT(1)
+#define MT_WFDMA0_PCIE1_BUSY_ENA_RX_FIFO	BIT(2)
+
+/* WFDMA1 PCIE1 */
+#define MT_WFDMA1_PCIE1_BASE			0xd9000
+#define MT_WFDMA1_PCIE1(ofs)			(MT_WFDMA0_PCIE1_BASE + (ofs))
+
+#define MT_WFDMA1_PCIE1_BUSY_ENA		MT_WFDMA1_PCIE1(0x13c)
+#define MT_WFDMA1_PCIE1_BUSY_ENA_TX_FIFO0	BIT(0)
+#define MT_WFDMA1_PCIE1_BUSY_ENA_TX_FIFO1	BIT(1)
+#define MT_WFDMA1_PCIE1_BUSY_ENA_RX_FIFO	BIT(2)
+
+#define MT_INFRA_CFG_BASE		0xf1000
+#define MT_INFRA(ofs)			(MT_INFRA_CFG_BASE + (ofs))
+
+#define MT_HIF_REMAP_L1			MT_INFRA(0x1ac)
+#define MT_HIF_REMAP_L1_MASK		GENMASK(15, 0)
+#define MT_HIF_REMAP_L1_OFFSET		GENMASK(15, 0)
+#define MT_HIF_REMAP_L1_BASE		GENMASK(31, 16)
+#define MT_HIF_REMAP_BASE_L1		0xe0000
+
+#define MT_HIF_REMAP_L2			MT_INFRA(0x1b0)
+#define MT_HIF_REMAP_L2_MASK		GENMASK(19, 0)
+#define MT_HIF_REMAP_L2_OFFSET		GENMASK(11, 0)
+#define MT_HIF_REMAP_L2_BASE		GENMASK(31, 12)
+#define MT_HIF_REMAP_BASE_L2		0x00000
+
+#define MT_TOP_BASE			0x18060000
+#define MT_TOP(ofs)			(MT_TOP_BASE + (ofs))
+
+#define MT_TOP_LPCR_HOST_BAND0		MT_TOP(0x10)
+#define MT_TOP_LPCR_HOST_FW_OWN		BIT(0)
+#define MT_TOP_LPCR_HOST_DRV_OWN	BIT(1)
+
+#define MT_TOP_MISC			MT_TOP(0xf0)
+#define MT_TOP_MISC_FW_STATE		GENMASK(2, 0)
+
+#define MT_HW_BOUND			0x70010020
+#define MT_HW_CHIPID			0x70010200
+#define MT_HW_REV			0x70010204
+
+#define MT_PCIE_MAC_BASE		0x74030000
+#define MT_PCIE_MAC(ofs)		(MT_PCIE_MAC_BASE + (ofs))
+#define MT_PCIE_MAC_INT_ENABLE		MT_PCIE_MAC(0x188)
+
+/* PHY: band 0(0x83080000), band 1(0x83090000) */
+#define MT_WF_PHY_BASE			0x83080000
+#define MT_WF_PHY(ofs)			(MT_WF_PHY_BASE + (ofs))
+
+#define MT_WF_PHY_RX_CTRL1(_phy)	MT_WF_PHY(0x2004 + ((_phy) << 16))
+#define MT_WF_PHY_RX_CTRL1_STSCNT_EN	GENMASK(11, 9)
+
+#endif
-- 
cgit v1.2.3-59-g8ed1b


From 5205071a519c5dd7b479343e17a109fb3cb19629 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Sat, 25 Apr 2020 03:32:28 +0800
Subject: mt76: mt7915: enable Rx HE rate reporting

Enable HE rate reporting in Rx path

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Shayne Chen <shayne.chen@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/init.c |  2 ++
 drivers/net/wireless/mediatek/mt76/mt7915/mac.c  | 27 ++++++++++++++++++++++--
 drivers/net/wireless/mediatek/mt76/mt7915/mac.h  |  2 ++
 drivers/net/wireless/mediatek/mt76/mt7915/regs.h |  1 +
 4 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
index fb596afdf0be..f9384e1dab74 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
@@ -43,6 +43,8 @@ static void mt7915_mac_init(struct mt7915_dev *dev)
 
 	mt76_rmw_field(dev, MT_DMA_DCR0, MT_DMA_DCR0_MAX_RX_LEN, 1536);
 	mt76_rmw_field(dev, MT_MDP_DCR1, MT_MDP_DCR1_MAX_RX_LEN, 1536);
+	/* enable rx rate report */
+	mt76_set(dev, MT_DMA_DCR0, MT_DMA_DCR0_RXD_G5_EN);
 	/* disable hardware de-agg */
 	mt76_clear(dev, MT_MDP_DCR0, MT_MDP_DCR0_DAMSDU_EN);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
index ddba360046c3..dd5b30a066b3 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
@@ -347,6 +347,22 @@ int mt7915_mac_fill_rx(struct mt7915_dev *dev, struct sk_buff *skb)
 				if (i > 9)
 					return -EINVAL;
 				break;
+			case MT_PHY_TYPE_HE_MU:
+				/* fall through */
+			case MT_PHY_TYPE_HE_SU:
+			case MT_PHY_TYPE_HE_EXT_SU:
+			case MT_PHY_TYPE_HE_TB:
+				status->nss =
+					FIELD_GET(MT_PRXV_NSTS, rxv.v[0]) + 1;
+				status->encoding = RX_ENC_HE;
+				i &= GENMASK(3, 0);
+
+				if (gi <= NL80211_RATE_INFO_HE_GI_3_2)
+					status->he_gi = gi;
+
+				if (idx & MT_PRXV_TX_DCM)
+					status->he_dcm = true;
+				break;
 			default:
 				return -EINVAL;
 			}
@@ -356,7 +372,14 @@ int mt7915_mac_fill_rx(struct mt7915_dev *dev, struct sk_buff *skb)
 			case IEEE80211_STA_RX_BW_20:
 				break;
 			case IEEE80211_STA_RX_BW_40:
-				status->bw = RATE_INFO_BW_40;
+				if (rxv.phy & MT_PHY_TYPE_HE_EXT_SU &&
+				    (idx & MT_PRXV_TX_ER_SU_106T)) {
+					status->bw = RATE_INFO_BW_HE_RU;
+					status->he_ru =
+						NL80211_RATE_INFO_HE_RU_ALLOC_106;
+				} else {
+					status->bw = RATE_INFO_BW_40;
+				}
 				break;
 			case IEEE80211_STA_RX_BW_80:
 				status->bw = RATE_INFO_BW_80;
@@ -369,7 +392,7 @@ int mt7915_mac_fill_rx(struct mt7915_dev *dev, struct sk_buff *skb)
 			}
 
 			status->enc_flags |= RX_ENC_FLAG_STBC_MASK * stbc;
-			if (gi)
+			if (rxv.phy < MT_PHY_TYPE_HE_SU && gi)
 				status->enc_flags |= RX_ENC_FLAG_SHORT_GI;
 		}
 	}
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.h b/drivers/net/wireless/mediatek/mt76/mt7915/mac.h
index 7da7551f98e4..712753484aee 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.h
@@ -97,6 +97,8 @@ enum rx_pkt_type {
 
 /* P-RXV */
 #define MT_PRXV_TX_RATE			GENMASK(6, 0)
+#define MT_PRXV_TX_DCM			BIT(4)
+#define MT_PRXV_TX_ER_SU_106T		BIT(5)
 #define MT_PRXV_NSTS			GENMASK(9, 7)
 #define MT_PRXV_HT_AD_CODE		BIT(11)
 #define MT_PRXV_RCPI3			GENMASK(31, 24)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/regs.h b/drivers/net/wireless/mediatek/mt76/mt7915/regs.h
index ef87579056fe..572bdc16fb2d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/regs.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/regs.h
@@ -73,6 +73,7 @@
 
 #define MT_DMA_DCR0			MT_WF_DMA(0x000)
 #define MT_DMA_DCR0_MAX_RX_LEN		GENMASK(15, 3)
+#define MT_DMA_DCR0_RXD_G5_EN		BIT(23)
 
 /* MIB: band 0(0x24800), band 1(0xa4800) */
 #define MT_WF_MIB_BASE(_band)		((_band) ? 0xa4800 : 0x24800)
-- 
cgit v1.2.3-59-g8ed1b


From f1d962369d5687b08d3270dad421e9346caee0ca Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Sat, 25 Apr 2020 03:32:29 +0800
Subject: mt76: mt7915: implement HE per-rate tx power support

Use firmware support for applying per-rate limit and power offsets.
This can support all HE RU types.

Signed-off-by: Shayne Chen <shayne.chen@mediatek.com>
Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Tested-by: Chih-Min Chen <chih-min.chen@mediatek.com>
Tested-by: Evelyn Tsai <evelyn.tsai@mediatek.com>
Acked-by: Yiwei Chung <yiwei.chung@mediatek.com>
Acked-by: YF Luo <yf.luo@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../net/wireless/mediatek/mt76/mt7915/debugfs.c    |  59 +++++++++++
 drivers/net/wireless/mediatek/mt76/mt7915/eeprom.c | 118 +++++++++++++++++++++
 drivers/net/wireless/mediatek/mt76/mt7915/eeprom.h |  47 ++++++++
 drivers/net/wireless/mediatek/mt76/mt7915/init.c   |   2 +
 drivers/net/wireless/mediatek/mt76/mt7915/main.c   |   7 ++
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.c    |  48 +++++++++
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.h    |   1 +
 drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h |   8 ++
 drivers/net/wireless/mediatek/mt76/mt7915/pci.c    |  25 +++++
 9 files changed, 315 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
index 152ae0617f3d..2e3f05f73697 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
@@ -219,6 +219,63 @@ mt7915_queues_read(struct seq_file *s, void *data)
 	return 0;
 }
 
+static void
+mt7915_puts_rate_txpower(struct seq_file *s, s8 *delta,
+			 s8 txpower_cur, int band)
+{
+	static const char * const sku_group_name[] = {
+		"CCK", "OFDM", "HT20", "HT40",
+		"VHT20", "VHT40", "VHT80", "VHT160",
+		"RU26", "RU52", "RU106", "RU242/SU20",
+		"RU484/SU40", "RU996/SU80", "RU2x996/SU160"
+	};
+	s8 txpower[MT7915_SKU_RATE_NUM];
+	int i, idx = 0;
+
+	for (i = 0; i < MT7915_SKU_RATE_NUM; i++)
+		txpower[i] = DIV_ROUND_UP(txpower_cur + delta[i], 2);
+
+	for (i = 0; i < MAX_SKU_RATE_GROUP_NUM; i++) {
+		const struct sku_group *sku = &mt7915_sku_groups[i];
+		u32 offset = sku->offset[band];
+
+		if (!offset) {
+			idx += sku->len;
+			continue;
+		}
+
+		mt76_seq_puts_array(s, sku_group_name[i],
+				    txpower + idx, sku->len);
+		idx += sku->len;
+	}
+}
+
+static int
+mt7915_read_rate_txpower(struct seq_file *s, void *data)
+{
+	struct mt7915_dev *dev = dev_get_drvdata(s->private);
+	struct mt76_phy *mphy = &dev->mphy;
+	enum nl80211_band band = mphy->chandef.chan->band;
+	s8 *delta = dev->rate_power[band];
+	s8 txpower_base = mphy->txpower_cur - delta[MT7915_SKU_MAX_DELTA_IDX];
+
+	seq_puts(s, "Band 0:\n");
+	mt7915_puts_rate_txpower(s, delta, txpower_base, band);
+
+	if (dev->mt76.phy2) {
+		mphy = dev->mt76.phy2;
+		band = mphy->chandef.chan->band;
+		delta = dev->rate_power[band];
+		txpower_base = mphy->txpower_cur -
+			       delta[MT7915_SKU_MAX_DELTA_IDX];
+
+		seq_puts(s, "Band 1:\n");
+		mt7915_puts_rate_txpower(s, delta, txpower_base, band);
+	}
+
+	return 0;
+}
+
 int mt7915_init_debugfs(struct mt7915_dev *dev)
 {
 	struct dentry *dir;
@@ -240,6 +297,8 @@ int mt7915_init_debugfs(struct mt7915_dev *dev)
 	debugfs_create_file("ser_trigger", 0200, dir, dev, &fops_ser_trigger);
 	debugfs_create_devm_seqfile(dev->mt76.dev, "temperature", dir,
 				    mt7915_read_temperature);
+	debugfs_create_devm_seqfile(dev->mt76.dev, "txpower_sku", dir,
+				    mt7915_read_rate_txpower);
 
 	return 0;
 }
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.c
index 2099dd40530f..7deba7ebd68a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.c
@@ -123,3 +123,121 @@ int mt7915_eeprom_get_target_power(struct mt7915_dev *dev,
 
 	return mt7915_eeprom_read(dev, index);
 }
+
+static const u8 sku_cck_delta_map[] = {
+	SKU_CCK_GROUP0,
+	SKU_CCK_GROUP0,
+	SKU_CCK_GROUP1,
+	SKU_CCK_GROUP1,
+};
+
+static const u8 sku_ofdm_delta_map[] = {
+	SKU_OFDM_GROUP0,
+	SKU_OFDM_GROUP0,
+	SKU_OFDM_GROUP1,
+	SKU_OFDM_GROUP1,
+	SKU_OFDM_GROUP2,
+	SKU_OFDM_GROUP2,
+	SKU_OFDM_GROUP3,
+	SKU_OFDM_GROUP4,
+};
+
+static const u8 sku_mcs_delta_map[] = {
+	SKU_MCS_GROUP0,
+	SKU_MCS_GROUP1,
+	SKU_MCS_GROUP1,
+	SKU_MCS_GROUP2,
+	SKU_MCS_GROUP2,
+	SKU_MCS_GROUP3,
+	SKU_MCS_GROUP4,
+	SKU_MCS_GROUP5,
+	SKU_MCS_GROUP6,
+	SKU_MCS_GROUP7,
+	SKU_MCS_GROUP8,
+	SKU_MCS_GROUP9,
+};
+
+#define SKU_GROUP(_mode, _len, _ofs_2g, _ofs_5g, _map)	\
+	[_mode] = {					\
+	.len = _len,					\
+	.offset = {					\
+		_ofs_2g,				\
+		_ofs_5g,				\
+	},						\
+	.delta_map = _map				\
+}
+
+const struct sku_group mt7915_sku_groups[] = {
+	SKU_GROUP(SKU_CCK, 4, 0x252, 0, sku_cck_delta_map),
+	SKU_GROUP(SKU_OFDM, 8, 0x254, 0x29d, sku_ofdm_delta_map),
+
+	SKU_GROUP(SKU_HT_BW20, 8, 0x259, 0x2a2, sku_mcs_delta_map),
+	SKU_GROUP(SKU_HT_BW40, 9, 0x262, 0x2ab, sku_mcs_delta_map),
+	SKU_GROUP(SKU_VHT_BW20, 12, 0x259, 0x2a2, sku_mcs_delta_map),
+	SKU_GROUP(SKU_VHT_BW40, 12, 0x262, 0x2ab, sku_mcs_delta_map),
+	SKU_GROUP(SKU_VHT_BW80, 12, 0, 0x2b4, sku_mcs_delta_map),
+	SKU_GROUP(SKU_VHT_BW160, 12, 0, 0, sku_mcs_delta_map),
+
+	SKU_GROUP(SKU_HE_RU26, 12, 0x27f, 0x2dd, sku_mcs_delta_map),
+	SKU_GROUP(SKU_HE_RU52, 12, 0x289, 0x2e7, sku_mcs_delta_map),
+	SKU_GROUP(SKU_HE_RU106, 12, 0x293, 0x2f1, sku_mcs_delta_map),
+	SKU_GROUP(SKU_HE_RU242, 12, 0x26b, 0x2bf, sku_mcs_delta_map),
+	SKU_GROUP(SKU_HE_RU484, 12, 0x275, 0x2c9, sku_mcs_delta_map),
+	SKU_GROUP(SKU_HE_RU996, 12, 0, 0x2d3, sku_mcs_delta_map),
+	SKU_GROUP(SKU_HE_RU2x996, 12, 0, 0, sku_mcs_delta_map),
+};
+
+static s8
+mt7915_get_sku_delta(struct mt7915_dev *dev, u32 addr)
+{
+	u32 val = mt7915_eeprom_read(dev, addr);
+	s8 delta = FIELD_GET(SKU_DELTA_VAL, val);
+
+	if (!(val & SKU_DELTA_EN))
+		return 0;
+
+	return val & SKU_DELTA_ADD ? delta : -delta;
+}
+
+static void
+mt7915_eeprom_init_sku_band(struct mt7915_dev *dev,
+			    struct ieee80211_supported_band *sband)
+{
+	int i, band = sband->band;
+	s8 *rate_power = dev->rate_power[band], max_delta = 0;
+	u8 idx = 0;
+
+	for (i = 0; i < ARRAY_SIZE(mt7915_sku_groups); i++) {
+		const struct sku_group *sku = &mt7915_sku_groups[i];
+		u32 offset = sku->offset[band];
+		int j;
+
+		if (!offset) {
+			idx += sku->len;
+			continue;
+		}
+
+		rate_power[idx++] = mt7915_get_sku_delta(dev, offset);
+		if (rate_power[idx - 1] > max_delta)
+			max_delta = rate_power[idx - 1];
+
+		if (i == SKU_HT_BW20 || i == SKU_VHT_BW20)
+			offset += 1;
+
+		for (j = 1; j < sku->len; j++) {
+			u32 addr = offset + sku->delta_map[j];
+
+			rate_power[idx++] = mt7915_get_sku_delta(dev, addr);
+			if (rate_power[idx - 1] > max_delta)
+				max_delta = rate_power[idx - 1];
+		}
+	}
+
+	rate_power[idx] = max_delta;
+}
+
+void mt7915_eeprom_init_sku(struct mt7915_dev *dev)
+{
+	mt7915_eeprom_init_sku_band(dev, &dev->mphy.sband_2g.sband);
+	mt7915_eeprom_init_sku_band(dev, &dev->mphy.sband_5g.sband);
+}
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.h b/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.h
index 30fc607e466d..4e31d6ab4fa6 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/eeprom.h
@@ -37,6 +37,51 @@ enum mt7915_eeprom_band {
 	MT_EE_DBDC,
 };
 
+#define SKU_DELTA_VAL		GENMASK(5, 0)
+#define SKU_DELTA_ADD		BIT(6)
+#define SKU_DELTA_EN		BIT(7)
+
+enum mt7915_sku_delta_group {
+	SKU_CCK_GROUP0,
+	SKU_CCK_GROUP1,
+
+	SKU_OFDM_GROUP0 = 0,
+	SKU_OFDM_GROUP1,
+	SKU_OFDM_GROUP2,
+	SKU_OFDM_GROUP3,
+	SKU_OFDM_GROUP4,
+
+	SKU_MCS_GROUP0 = 0,
+	SKU_MCS_GROUP1,
+	SKU_MCS_GROUP2,
+	SKU_MCS_GROUP3,
+	SKU_MCS_GROUP4,
+	SKU_MCS_GROUP5,
+	SKU_MCS_GROUP6,
+	SKU_MCS_GROUP7,
+	SKU_MCS_GROUP8,
+	SKU_MCS_GROUP9,
+};
+
+enum mt7915_sku_rate_group {
+	SKU_CCK,
+	SKU_OFDM,
+	SKU_HT_BW20,
+	SKU_HT_BW40,
+	SKU_VHT_BW20,
+	SKU_VHT_BW40,
+	SKU_VHT_BW80,
+	SKU_VHT_BW160,
+	SKU_HE_RU26,
+	SKU_HE_RU52,
+	SKU_HE_RU106,
+	SKU_HE_RU242,
+	SKU_HE_RU484,
+	SKU_HE_RU996,
+	SKU_HE_RU2x996,
+	MAX_SKU_RATE_GROUP_NUM,
+};
+
 struct sku_group {
 	u8 len;
 	u16 offset[2];
@@ -75,4 +120,6 @@ mt7915_tssi_enabled(struct mt7915_dev *dev, enum nl80211_band band)
 		return eep[MT_EE_WIFI_CONF + 7] & MT_EE_WIFI_CONF_TSSI0_2G;
 }
 
+extern const struct sku_group mt7915_sku_groups[];
+
 #endif
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
index f9384e1dab74..abce37ca9252 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
@@ -85,6 +85,8 @@ static void mt7915_init_txpower(struct mt7915_dev *dev)
 {
 	mt7915_init_txpower_band(dev, &dev->mphy.sband_2g.sband);
 	mt7915_init_txpower_band(dev, &dev->mphy.sband_5g.sband);
+
+	mt7915_eeprom_init_sku(dev);
 }
 
 static void mt7915_init_work(struct work_struct *work)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/main.c b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
index ae5ed41d337b..231bae4184ca 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
@@ -42,6 +42,7 @@ static int mt7915_start(struct ieee80211_hw *hw)
 		mt7915_mcu_set_scs(dev, 1, true);
 	}
 
+	mt7915_mcu_set_sku_en(phy, true);
 	mt7915_mcu_set_chan_info(phy, MCU_EXT_CMD_SET_RX_PATH);
 
 	set_bit(MT76_STATE_RUNNING, &phy->mt76->state);
@@ -325,6 +326,12 @@ static int mt7915_config(struct ieee80211_hw *hw, u32 changed)
 		ieee80211_wake_queues(hw);
 	}
 
+	if (changed & IEEE80211_CONF_CHANGE_POWER) {
+		ret = mt7915_mcu_set_sku(phy);
+		if (ret)
+			return ret;
+	}
+
 	mutex_lock(&dev->mt76.mutex);
 
 	if (changed & IEEE80211_CONF_CHANGE_MONITOR) {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index dad2c300d0f2..b275d18e5d9b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -2295,6 +2295,54 @@ int mt7915_mcu_get_rate_info(struct mt7915_dev *dev, u32 cmd, u16 wlan_idx)
 				   sizeof(req), false);
 }
 
+int mt7915_mcu_set_sku(struct mt7915_phy *phy)
+{
+	struct mt7915_dev *dev = phy->dev;
+	struct mt76_phy *mphy = phy->mt76;
+	struct ieee80211_hw *hw = mphy->hw;
+	struct mt7915_sku_val {
+		u8 format_id;
+		u8 limit_type;
+		u8 dbdc_idx;
+		s8 val[MT7915_SKU_RATE_NUM];
+	} __packed req = {
+		.format_id = 4,
+		.dbdc_idx = phy != &dev->phy,
+	};
+	int i;
+	s8 *delta;
+
+	delta = dev->rate_power[mphy->chandef.chan->band];
+	mphy->txpower_cur = hw->conf.power_level * 2 +
+			    delta[MT7915_SKU_MAX_DELTA_IDX];
+
+	for (i = 0; i < MT7915_SKU_RATE_NUM; i++)
+		req.val[i] = hw->conf.power_level * 2 + delta[i];
+
+	return __mt76_mcu_send_msg(&dev->mt76,
+				   MCU_EXT_CMD_TX_POWER_FEATURE_CTRL,
+				   &req, sizeof(req), true);
+}
+
+int mt7915_mcu_set_sku_en(struct mt7915_phy *phy, bool enable)
+{
+	struct mt7915_dev *dev = phy->dev;
+	struct mt7915_sku {
+		u8 format_id;
+		u8 sku_enable;
+		u8 dbdc_idx;
+		u8 rsv;
+	} __packed req = {
+		.format_id = 0,
+		.dbdc_idx = phy != &dev->phy,
+		.sku_enable = enable,
+	};
+
+	return __mt76_mcu_send_msg(&dev->mt76,
+				   MCU_EXT_CMD_TX_POWER_FEATURE_CTRL,
+				   &req, sizeof(req), true);
+}
+
 int mt7915_mcu_set_ser(struct mt7915_dev *dev, u8 action, u8 set, u8 band)
 {
 	struct {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
index 5e4708861ede..d9c9aab7e6dc 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
@@ -203,6 +203,7 @@ enum {
 	MCU_EXT_CMD_MAC_INIT_CTRL = 0x46,
 	MCU_EXT_CMD_RX_HDR_TRANS = 0x47,
 	MCU_EXT_CMD_SET_RX_PATH = 0x4e,
+	MCU_EXT_CMD_TX_POWER_FEATURE_CTRL = 0x58,
 	MCU_EXT_CMD_SET_SER_TRIGGER = 0x81,
 	MCU_EXT_CMD_SCS_CTRL = 0x82,
 	MCU_EXT_CMD_RATE_CTRL = 0x87,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
index b23ab3f215e0..6d3fde7f635a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
@@ -38,6 +38,9 @@
 #define MT7915_5G_RATE_DEFAULT		0x4b	/* OFDM 6M */
 #define MT7915_2G_RATE_DEFAULT		0x0	/* CCK 1M */
 
+#define MT7915_SKU_RATE_NUM		161
+#define MT7915_SKU_MAX_DELTA_IDX	MT7915_SKU_RATE_NUM
+#define MT7915_SKU_TABLE_SIZE		(MT7915_SKU_RATE_NUM + 1)
 
 struct mt7915_vif;
 struct mt7915_sta;
@@ -161,6 +164,8 @@ struct mt7915_dev {
 	spinlock_t token_lock;
 	struct idr token;
 
+	s8 **rate_power; /* TODO: use mt76_rate_power */
+
 	u8 mac_work_count;
 	bool fw_debug;
 };
@@ -268,6 +273,7 @@ u32 mt7915_eeprom_read(struct mt7915_dev *dev, u32 offset);
 int mt7915_eeprom_get_target_power(struct mt7915_dev *dev,
 				   struct ieee80211_channel *chan,
 				   u8 chain_idx);
+void mt7915_eeprom_init_sku(struct mt7915_dev *dev);
 int mt7915_dma_init(struct mt7915_dev *dev);
 void mt7915_dma_prefetch(struct mt7915_dev *dev);
 void mt7915_dma_cleanup(struct mt7915_dev *dev);
@@ -303,6 +309,8 @@ int mt7915_mcu_set_scs(struct mt7915_dev *dev, u8 band, bool enable);
 int mt7915_mcu_set_ser(struct mt7915_dev *dev, u8 action, u8 set, u8 band);
 int mt7915_mcu_set_rts_thresh(struct mt7915_phy *phy, u32 val);
 int mt7915_mcu_set_pm(struct mt7915_dev *dev, int band, int enter);
+int mt7915_mcu_set_sku_en(struct mt7915_phy *phy, bool enable);
+int mt7915_mcu_set_sku(struct mt7915_phy *phy);
 int mt7915_mcu_set_fcc5_lpn(struct mt7915_dev *dev, int val);
 int mt7915_mcu_set_pulse_th(struct mt7915_dev *dev,
 			    const struct mt7915_dfs_pulse *pulse);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/pci.c b/drivers/net/wireless/mediatek/mt76/mt7915/pci.c
index b79b5060cd77..7937c6965f59 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/pci.c
@@ -75,6 +75,28 @@ static irqreturn_t mt7915_irq_handler(int irq, void *dev_instance)
 	return IRQ_HANDLED;
 }
 
+static int
+mt7915_alloc_device(struct pci_dev *pdev, struct mt7915_dev *dev)
+{
+#define NUM_BANDS	2
+	int i;
+	s8 **sku;
+
+	sku = devm_kzalloc(&pdev->dev, NUM_BANDS * sizeof(*sku), GFP_KERNEL);
+	if (!sku)
+		return -ENOMEM;
+
+	for (i = 0; i < NUM_BANDS; i++) {
+		sku[i] = devm_kzalloc(&pdev->dev, MT7915_SKU_TABLE_SIZE *
+				      sizeof(**sku), GFP_KERNEL);
+		if (!sku[i])
+			return -ENOMEM;
+	}
+	dev->rate_power = sku;
+
+	return 0;
+}
+
 static int mt7915_pci_probe(struct pci_dev *pdev,
 			    const struct pci_device_id *id)
 {
@@ -118,6 +140,9 @@ static int mt7915_pci_probe(struct pci_dev *pdev,
 		return -ENOMEM;
 
 	dev = container_of(mdev, struct mt7915_dev, mt76);
+	ret = mt7915_alloc_device(pdev, dev);
+	if (ret)
+		return ret;
 
 	mt76_mmio_init(&dev->mt76, pcim_iomap_table(pdev)[0]);
 	mdev->rev = (mt7915_l1_rr(dev, MT_HW_CHIPID) << 16) |
-- 
cgit v1.2.3-59-g8ed1b


From 37f4ca907c462d7c8a1ac9e7e3473681b5f893dd Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Sat, 25 Apr 2020 03:32:30 +0800
Subject: mt76: mt7915: register per-phy HE capabilities for each interface

The capabilities for the HE interfaces are generated from the capabilities
reported by the firmware.

This should move to common file once we got other HE devices support.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/init.c   | 190 +++++++++++++++++++++
 drivers/net/wireless/mediatek/mt76/mt7915/main.c   |   1 +
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.c    |   9 +
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.h    |   3 +
 drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h |   3 +
 5 files changed, 206 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
index abce37ca9252..bb8325e2edbd 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
@@ -239,6 +239,194 @@ mt7915_init_wiphy(struct ieee80211_hw *hw)
 	hw->max_tx_fragments = 4;
 }
 
+static void
+mt7915_gen_ppe_thresh(u8 *he_ppet)
+{
+	int ru, nss, max_nss = 1, max_ru = 3;
+	u8 bit = 7, ru_bit_mask = 0x7;
+	u8 ppet16_ppet8_ru3_ru0[] = {0x1c, 0xc7, 0x71};
+
+	he_ppet[0] = max_nss & IEEE80211_PPE_THRES_NSS_MASK;
+	he_ppet[0] |= (ru_bit_mask <<
+		       IEEE80211_PPE_THRES_RU_INDEX_BITMASK_POS) &
+			IEEE80211_PPE_THRES_RU_INDEX_BITMASK_MASK;
+
+	for (nss = 0; nss <= max_nss; nss++) {
+		for (ru = 0; ru < max_ru; ru++) {
+			u8 val;
+			int i;
+
+			if (!(ru_bit_mask & BIT(ru)))
+				continue;
+
+			val = (ppet16_ppet8_ru3_ru0[nss] >> (ru * 6)) &
+			       0x3f;
+			val = ((val >> 3) & 0x7) | ((val & 0x7) << 3);
+			for (i = 5; i >= 0; i--) {
+				he_ppet[bit / 8] |=
+					((val >> i) & 0x1) << ((bit % 8));
+				bit++;
+			}
+		}
+	}
+}
+
+static int
+mt7915_init_he_caps(struct mt7915_phy *phy, enum nl80211_band band,
+		    struct ieee80211_sband_iftype_data *data)
+{
+	int i, idx = 0;
+	int nss = hweight8(phy->chainmask);
+	u16 mcs_map = 0;
+
+	for (i = 0; i < 8; i++) {
+		if (i < nss)
+			mcs_map |= (IEEE80211_HE_MCS_SUPPORT_0_11 << (i * 2));
+		else
+			mcs_map |= (IEEE80211_HE_MCS_NOT_SUPPORTED << (i * 2));
+	}
+
+	for (i = 0; i < NUM_NL80211_IFTYPES; i++) {
+		struct ieee80211_sta_he_cap *he_cap = &data[idx].he_cap;
+		struct ieee80211_he_cap_elem *he_cap_elem =
+				&he_cap->he_cap_elem;
+		struct ieee80211_he_mcs_nss_supp *he_mcs =
+				&he_cap->he_mcs_nss_supp;
+
+		switch (i) {
+		case NL80211_IFTYPE_STATION:
+		case NL80211_IFTYPE_AP:
+#ifdef CONFIG_MAC80211_MESH
+		case NL80211_IFTYPE_MESH_POINT:
+#endif
+			break;
+		default:
+			continue;
+		}
+
+		data[idx].types_mask = BIT(i);
+		he_cap->has_he = true;
+
+		he_cap_elem->mac_cap_info[0] =
+			IEEE80211_HE_MAC_CAP0_HTC_HE;
+		he_cap_elem->mac_cap_info[1] =
+			IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_0US |
+			IEEE80211_HE_MAC_CAP1_MULTI_TID_AGG_RX_QOS_1;
+		he_cap_elem->mac_cap_info[2] =
+			IEEE80211_HE_MAC_CAP2_BSR;
+		he_cap_elem->mac_cap_info[3] =
+			IEEE80211_HE_MAC_CAP3_OMI_CONTROL |
+			IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_RESERVED;
+		he_cap_elem->mac_cap_info[4] =
+			IEEE80211_HE_MAC_CAP4_AMDSU_IN_AMPDU;
+
+		if (band == NL80211_BAND_2GHZ)
+			he_cap_elem->phy_cap_info[0] =
+				IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G;
+		else if (band == NL80211_BAND_5GHZ)
+			he_cap_elem->phy_cap_info[0] =
+				IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G |
+				IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G |
+				IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G;
+
+		he_cap_elem->phy_cap_info[1] =
+			IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD;
+		he_cap_elem->phy_cap_info[2] =
+			IEEE80211_HE_PHY_CAP2_NDP_4x_LTF_AND_3_2US |
+			IEEE80211_HE_PHY_CAP2_STBC_TX_UNDER_80MHZ |
+			IEEE80211_HE_PHY_CAP2_STBC_RX_UNDER_80MHZ;
+
+		/* TODO: TxBF & MU & MESH */
+
+		switch (i) {
+		case NL80211_IFTYPE_AP:
+			he_cap_elem->mac_cap_info[0] |=
+				IEEE80211_HE_MAC_CAP0_TWT_RES;
+			he_cap_elem->mac_cap_info[4] |=
+				IEEE80211_HE_MAC_CAP4_BQR;
+			he_cap_elem->phy_cap_info[3] |=
+				IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_QPSK |
+				IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_RX_QPSK;
+			he_cap_elem->phy_cap_info[6] |=
+				IEEE80211_HE_PHY_CAP6_PPE_THRESHOLD_PRESENT;
+			he_cap_elem->phy_cap_info[9] |=
+				IEEE80211_HE_PHY_CAP9_RX_1024_QAM_LESS_THAN_242_TONE_RU;
+			break;
+		case NL80211_IFTYPE_STATION:
+			he_cap_elem->mac_cap_info[0] |=
+				IEEE80211_HE_MAC_CAP0_TWT_REQ;
+			he_cap_elem->mac_cap_info[3] |=
+				IEEE80211_HE_MAC_CAP3_FLEX_TWT_SCHED;
+
+			if (band == NL80211_BAND_2GHZ)
+				he_cap_elem->phy_cap_info[0] |=
+					IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_RU_MAPPING_IN_2G;
+			else if (band == NL80211_BAND_5GHZ)
+				he_cap_elem->phy_cap_info[0] |=
+					IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_RU_MAPPING_IN_5G;
+
+			he_cap_elem->phy_cap_info[1] |=
+				IEEE80211_HE_PHY_CAP1_DEVICE_CLASS_A;
+			he_cap_elem->phy_cap_info[8] |=
+				IEEE80211_HE_PHY_CAP8_20MHZ_IN_40MHZ_HE_PPDU_IN_2G |
+				IEEE80211_HE_PHY_CAP8_20MHZ_IN_160MHZ_HE_PPDU |
+				IEEE80211_HE_PHY_CAP8_80MHZ_IN_160MHZ_HE_PPDU;
+			he_cap_elem->phy_cap_info[9] |=
+				IEEE80211_HE_PHY_CAP9_TX_1024_QAM_LESS_THAN_242_TONE_RU;
+			break;
+#ifdef CONFIG_MAC80211_MESH
+		case NL80211_IFTYPE_MESH_POINT:
+			break;
+#endif
+		}
+
+		he_mcs->rx_mcs_80 = cpu_to_le16(mcs_map);
+		he_mcs->tx_mcs_80 = cpu_to_le16(mcs_map);
+		he_mcs->rx_mcs_160 = cpu_to_le16(mcs_map);
+		he_mcs->tx_mcs_160 = cpu_to_le16(mcs_map);
+		he_mcs->rx_mcs_80p80 = cpu_to_le16(mcs_map);
+		he_mcs->tx_mcs_80p80 = cpu_to_le16(mcs_map);
+
+		memset(he_cap->ppe_thres, 0, sizeof(he_cap->ppe_thres));
+		if (he_cap_elem->phy_cap_info[6] &
+		    IEEE80211_HE_PHY_CAP6_PPE_THRESHOLD_PRESENT) {
+			mt7915_gen_ppe_thresh(he_cap->ppe_thres);
+		} else {
+			he_cap_elem->phy_cap_info[9] |=
+				IEEE80211_HE_PHY_CAP9_NOMIMAL_PKT_PADDING_16US;
+		}
+		idx++;
+	}
+
+	return idx;
+}
+
+void mt7915_set_stream_he_caps(struct mt7915_phy *phy)
+{
+	struct ieee80211_sband_iftype_data *data;
+	struct ieee80211_supported_band *band;
+	struct mt76_dev *mdev = &phy->dev->mt76;
+	int n;
+
+	if (mdev->cap.has_2ghz) {
+		data = phy->iftype[NL80211_BAND_2GHZ];
+		n = mt7915_init_he_caps(phy, NL80211_BAND_2GHZ, data);
+
+		band = &phy->mt76->sband_2g.sband;
+		band->iftype_data = data;
+		band->n_iftype_data = n;
+	}
+
+	if (mdev->cap.has_5ghz) {
+		data = phy->iftype[NL80211_BAND_5GHZ];
+		n = mt7915_init_he_caps(phy, NL80211_BAND_5GHZ, data);
+
+		band = &phy->mt76->sband_5g.sband;
+		band->iftype_data = data;
+		band->n_iftype_data = n;
+	}
+}
+
 static void
 mt7915_cap_dbdc_enable(struct mt7915_dev *dev)
 {
@@ -256,6 +444,7 @@ mt7915_cap_dbdc_enable(struct mt7915_dev *dev)
 	dev->mphy.hw->wiphy->available_antennas_tx = dev->phy.chainmask;
 
 	mt76_set_stream_caps(&dev->mt76, true);
+	mt7915_set_stream_he_caps(&dev->phy);
 }
 
 static void
@@ -271,6 +460,7 @@ mt7915_cap_dbdc_disable(struct mt7915_dev *dev)
 	dev->mphy.hw->wiphy->available_antennas_tx = dev->chainmask;
 
 	mt76_set_stream_caps(&dev->mt76, true);
+	mt7915_set_stream_he_caps(&dev->phy);
 }
 
 int mt7915_register_ext_phy(struct mt7915_dev *dev)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/main.c b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
index 231bae4184ca..097225692026 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
@@ -691,6 +691,7 @@ mt7915_set_antenna(struct ieee80211_hw *hw, u32 tx_ant, u32 rx_ant)
 	phy->chainmask = tx_ant;
 
 	mt76_set_stream_caps(&dev->mt76, true);
+	mt7915_set_stream_he_caps(phy);
 
 	mutex_unlock(&dev->mt76.mutex);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index b275d18e5d9b..47c0ff8f41bf 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -137,11 +137,13 @@ mt7915_get_phy_mode(struct mt7915_dev *dev, struct ieee80211_vif *vif,
 {
 	struct ieee80211_sta_ht_cap *ht_cap;
 	struct ieee80211_sta_vht_cap *vht_cap;
+	const struct ieee80211_sta_he_cap *he_cap;
 	u8 mode = 0;
 
 	if (sta) {
 		ht_cap = &sta->ht_cap;
 		vht_cap = &sta->vht_cap;
+		he_cap = &sta->he_cap;
 	} else {
 		struct ieee80211_supported_band *sband;
 
@@ -152,6 +154,7 @@ mt7915_get_phy_mode(struct mt7915_dev *dev, struct ieee80211_vif *vif,
 
 		ht_cap = &sband->ht_cap;
 		vht_cap = &sband->vht_cap;
+		he_cap = ieee80211_get_he_iftype_cap(sband, vif->type);
 	}
 
 	if (band == NL80211_BAND_2GHZ) {
@@ -159,6 +162,9 @@ mt7915_get_phy_mode(struct mt7915_dev *dev, struct ieee80211_vif *vif,
 
 		if (ht_cap->ht_supported)
 			mode |= PHY_MODE_GN;
+
+		if (he_cap->has_he)
+			mode |= PHY_MODE_AX_24G;
 	} else if (band == NL80211_BAND_5GHZ) {
 		mode |= PHY_MODE_A;
 
@@ -167,6 +173,9 @@ mt7915_get_phy_mode(struct mt7915_dev *dev, struct ieee80211_vif *vif,
 
 		if (vht_cap->vht_supported)
 			mode |= PHY_MODE_AC;
+
+		if (he_cap->has_he)
+			mode |= PHY_MODE_AX_5G;
 	}
 
 	return mode;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
index d9c9aab7e6dc..c71161aec767 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
@@ -815,6 +815,9 @@ enum {
 #define PHY_MODE_GN			BIT(3)
 #define PHY_MODE_AN			BIT(4)
 #define PHY_MODE_AC			BIT(5)
+#define PHY_MODE_AX_24G			BIT(6)
+#define PHY_MODE_AX_5G			BIT(7)
+#define PHY_MODE_AX_6G			BIT(8)
 
 #define MODE_CCK			BIT(0)
 #define MODE_OFDM			BIT(1)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
index 6d3fde7f635a..92a6bf746d73 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
@@ -122,6 +122,8 @@ struct mt7915_phy {
 	struct mt76_phy *mt76;
 	struct mt7915_dev *dev;
 
+	struct ieee80211_sband_iftype_data iftype[2][NUM_NL80211_IFTYPES];
+
 	u32 rxfilter;
 	u32 vif_mask;
 	u32 omac_mask;
@@ -443,6 +445,7 @@ void mt7915_txp_skb_unmap(struct mt76_dev *dev,
 			  struct mt76_txwi_cache *txwi);
 int mt76_dfs_start_rdd(struct mt7915_dev *dev, bool force);
 int mt7915_dfs_init_radar_detector(struct mt7915_phy *phy);
+void mt7915_set_stream_he_caps(struct mt7915_phy *phy);
 void mt7915_update_channel(struct mt76_dev *mdev);
 int mt7915_init_debugfs(struct mt7915_dev *dev);
 
-- 
cgit v1.2.3-59-g8ed1b


From 6094f86fb3713e1b7d0c7f264c3a76263745efae Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Sat, 25 Apr 2020 03:32:31 +0800
Subject: mt76: mt7915: add HE bss_conf support for interfaces

Add basic HE BSS's info for interfaces. As for the advanced features
will be added gradually in the future patches.
(i.e. BSS color, TWT, spatial reuse and OFDMA)

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Tested-by: Shayne Chen <shayne.chen@mediatek.com>
Tested-by: Chih-Min Chen <chih-min.chen@mediatek.com>
Tested-by: Evelyn Tsai <evelyn.tsai@mediatek.com>
Acked-by: Yiwei Chung <yiwei.chung@mediatek.com>
Acked-by: YF Luo <yf.luo@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.c | 95 ++++++++++++++++++++++++-
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.h | 10 +++
 2 files changed, 104 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index 47c0ff8f41bf..61bab6bdcea0 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -744,6 +744,47 @@ mt7915_mcu_bss_omac_tlv(struct sk_buff *skb, struct ieee80211_vif *vif)
 	omac->hw_bss_idx = idx;
 }
 
+struct mt7915_he_obss_narrow_bw_ru_data {
+	bool tolerated;
+};
+
+static void mt7915_check_he_obss_narrow_bw_ru_iter(struct wiphy *wiphy,
+						   struct cfg80211_bss *bss,
+						   void *_data)
+{
+	struct mt7915_he_obss_narrow_bw_ru_data *data = _data;
+	const struct element *elem;
+
+	elem = cfg80211_find_elem(WLAN_EID_EXT_CAPABILITY, bss->ies->data,
+				  bss->ies->len);
+
+	if (!elem || elem->datalen < 10 ||
+	    !(elem->data[10] &
+	      WLAN_EXT_CAPA10_OBSS_NARROW_BW_RU_TOLERANCE_SUPPORT))
+		data->tolerated = false;
+}
+
+static bool mt7915_check_he_obss_narrow_bw_ru(struct ieee80211_hw *hw,
+					      struct ieee80211_vif *vif)
+{
+	struct mt7915_he_obss_narrow_bw_ru_data iter_data = {
+		.tolerated = true,
+	};
+
+	if (!(vif->bss_conf.chandef.chan->flags & IEEE80211_CHAN_RADAR))
+		return false;
+
+	cfg80211_bss_iter(hw->wiphy, &vif->bss_conf.chandef,
+			  mt7915_check_he_obss_narrow_bw_ru_iter,
+			  &iter_data);
+
+	/*
+	 * If there is at least one AP on radar channel that cannot
+	 * tolerate 26-tone RU UL OFDMA transmissions using HE TB PPDU.
+	 */
+	return !iter_data.tolerated;
+}
+
 static void
 mt7915_mcu_bss_rfch_tlv(struct sk_buff *skb, struct ieee80211_vif *vif,
 			struct mt7915_phy *phy)
@@ -766,7 +807,20 @@ mt7915_mcu_bss_rfch_tlv(struct sk_buff *skb, struct ieee80211_vif *vif,
 		ch->center_ch1 = ieee80211_frequency_to_channel(freq2);
 	}
 
-	ch->he_all_disable = true;
+	if (vif->bss_conf.he_support && vif->type == NL80211_IFTYPE_STATION) {
+		struct mt7915_dev *dev = phy->dev;
+		struct mt76_phy *mphy = &dev->mt76.phy;
+		bool ext_phy = phy != &dev->phy;
+
+		if (ext_phy && dev->mt76.phy2)
+			mphy = dev->mt76.phy2;
+
+		ch->he_ru26_block =
+			mt7915_check_he_obss_narrow_bw_ru(mphy->hw, vif);
+		ch->he_all_disable = false;
+	} else {
+		ch->he_all_disable = true;
+	}
 }
 
 static void
@@ -795,6 +849,42 @@ mt7915_mcu_bss_ra_tlv(struct sk_buff *skb, struct ieee80211_vif *vif,
 	ra->fast_interval = cpu_to_le32(100);
 }
 
+static void
+mt7915_mcu_bss_he_tlv(struct sk_buff *skb, struct ieee80211_vif *vif,
+		      struct mt7915_phy *phy)
+{
+#define DEFAULT_HE_PE_DURATION		4
+#define DEFAULT_HE_DURATION_RTS_THRES	1023
+	struct cfg80211_chan_def *chandef = &phy->mt76->chandef;
+	enum nl80211_band band = chandef->chan->band;
+	struct ieee80211_supported_band *sband;
+	const struct ieee80211_sta_he_cap *cap;
+	struct bss_info_he *he;
+	struct tlv *tlv;
+
+	if (band == NL80211_BAND_2GHZ)
+		sband = &phy->mt76->sband_2g.sband;
+	else
+		sband = &phy->mt76->sband_5g.sband;
+
+	cap = ieee80211_get_he_iftype_cap(sband, vif->type);
+
+	tlv = mt7915_mcu_add_tlv(skb, BSS_INFO_HE_BASIC, sizeof(*he));
+
+	he = (struct bss_info_he *)tlv;
+	he->he_pe_duration = vif->bss_conf.htc_trig_based_pkt_ext * 4;
+	if (!he->he_pe_duration)
+		he->he_pe_duration = DEFAULT_HE_PE_DURATION;
+
+	he->he_rts_thres = cpu_to_le16(vif->bss_conf.frame_time_rts_th * 32);
+	if (!he->he_rts_thres)
+		he->he_rts_thres = cpu_to_le16(DEFAULT_HE_DURATION_RTS_THRES);
+
+	he->max_nss_mcs[CMD_HE_MCS_BW80] = cap->he_mcs_nss_supp.tx_mcs_80;
+	he->max_nss_mcs[CMD_HE_MCS_BW160] = cap->he_mcs_nss_supp.tx_mcs_160;
+	he->max_nss_mcs[CMD_HE_MCS_BW8080] = cap->he_mcs_nss_supp.tx_mcs_80p80;
+}
+
 static void
 mt7915_mcu_bss_ext_tlv(struct sk_buff *skb, struct mt7915_vif *mvif)
 {
@@ -870,6 +960,9 @@ int mt7915_mcu_add_bss_info(struct mt7915_phy *phy,
 		mt7915_mcu_bss_bmc_tlv(skb, phy);
 		mt7915_mcu_bss_ra_tlv(skb, vif, phy);
 
+		if (vif->bss_conf.he_support)
+			mt7915_mcu_bss_he_tlv(skb, vif, phy);
+
 		if (mvif->omac_idx > HW_BSSID_MAX)
 			mt7915_mcu_bss_ext_tlv(skb, mvif);
 		else
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
index c71161aec767..85ef1b35b265 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
@@ -396,6 +396,16 @@ struct bss_info_ra {
 	__le32 fast_interval;
 } __packed;
 
+struct bss_info_he {
+	__le16 tag;
+	__le16 len;
+	u8 he_pe_duration;
+	u8 vht_op_info_present;
+	__le16 he_rts_thres;
+	__le16 max_nss_mcs[CMD_HE_MCS_BW_NUM];
+	u8 rsv[6];
+} __packed;
+
 struct bss_info_bcn {
 	__le16 tag;
 	__le16 len;
-- 
cgit v1.2.3-59-g8ed1b


From c336318f57a92d3971719598ef4416b531811d72 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Sat, 25 Apr 2020 03:32:32 +0800
Subject: mt76: mt7915: add HE capabilities support for peers

Set peer's bsic HE capabilities through starec.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Suggested-by: Shihwei Lin <shihwei.lin@mediatek.com>
Tested-by: Shayne Chen <shayne.chen@mediatek.com>
Tested-by: Chih-Min Chen <chih-min.chen@mediatek.com>
Tested-by: Evelyn Tsai <evelyn.tsai@mediatek.com>
Acked-by: Yiwei Chung <yiwei.chung@mediatek.com>
Acked-by: YF Luo <yf.luo@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.c | 197 ++++++++++++++++++++++++
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.h | 100 ++++++++++++
 2 files changed, 297 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index 61bab6bdcea0..aa24f1306941 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -85,6 +85,9 @@ struct mt7915_fw_region {
 #define to_wcid_lo(id)			FIELD_GET(GENMASK(7, 0), (u16)id)
 #define to_wcid_hi(id)			FIELD_GET(GENMASK(9, 8), (u16)id)
 
+#define HE_PHY(p, c)			u8_get_bits(c, IEEE80211_HE_PHY_##p)
+#define HE_MAC(m, c)			u8_get_bits(c, IEEE80211_HE_MAC_##m)
+
 static enum mt7915_cipher_type
 mt7915_mcu_get_cipher(int cipher)
 {
@@ -1234,6 +1237,187 @@ mt7915_mcu_sta_basic_tlv(struct sk_buff *skb, struct ieee80211_vif *vif,
 	basic->qos = sta->wme;
 }
 
+static void
+mt7915_mcu_sta_he_tlv(struct sk_buff *skb, struct ieee80211_sta *sta)
+{
+	struct ieee80211_sta_he_cap *he_cap = &sta->he_cap;
+	struct ieee80211_he_cap_elem *elem = &he_cap->he_cap_elem;
+	struct sta_rec_he *he;
+	struct tlv *tlv;
+	u32 cap = 0;
+
+	tlv = mt7915_mcu_add_tlv(skb, STA_REC_HE, sizeof(*he));
+
+	he = (struct sta_rec_he *)tlv;
+
+	if (elem->mac_cap_info[0] & IEEE80211_HE_MAC_CAP0_HTC_HE)
+		cap |= STA_REC_HE_CAP_HTC;
+
+	if (elem->mac_cap_info[2] & IEEE80211_HE_MAC_CAP2_BSR)
+		cap |= STA_REC_HE_CAP_BSR;
+
+	if (elem->mac_cap_info[3] & IEEE80211_HE_MAC_CAP3_OMI_CONTROL)
+		cap |= STA_REC_HE_CAP_OM;
+
+	if (elem->mac_cap_info[4] & IEEE80211_HE_MAC_CAP4_AMDSU_IN_AMPDU)
+		cap |= STA_REC_HE_CAP_AMSDU_IN_AMPDU;
+
+	if (elem->mac_cap_info[4] & IEEE80211_HE_MAC_CAP4_BQR)
+		cap |= STA_REC_HE_CAP_BQR;
+
+	if (elem->phy_cap_info[0] &
+	    (IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_RU_MAPPING_IN_2G |
+	     IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_RU_MAPPING_IN_5G))
+		cap |= STA_REC_HE_CAP_BW20_RU242_SUPPORT;
+
+	if (elem->phy_cap_info[1] &
+	    IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD)
+		cap |= STA_REC_HE_CAP_LDPC;
+
+	if (elem->phy_cap_info[1] &
+	    IEEE80211_HE_PHY_CAP1_HE_LTF_AND_GI_FOR_HE_PPDUS_0_8US)
+		cap |= STA_REC_HE_CAP_SU_PPDU_1LTF_8US_GI;
+
+	if (elem->phy_cap_info[2] &
+	    IEEE80211_HE_PHY_CAP2_NDP_4x_LTF_AND_3_2US)
+		cap |= STA_REC_HE_CAP_NDP_4LTF_3DOT2MS_GI;
+
+	if (elem->phy_cap_info[2] &
+	    IEEE80211_HE_PHY_CAP2_STBC_TX_UNDER_80MHZ)
+		cap |= STA_REC_HE_CAP_LE_EQ_80M_TX_STBC;
+
+	if (elem->phy_cap_info[2] &
+	    IEEE80211_HE_PHY_CAP2_STBC_RX_UNDER_80MHZ)
+		cap |= STA_REC_HE_CAP_LE_EQ_80M_RX_STBC;
+
+	if (elem->phy_cap_info[6] &
+	    IEEE80211_HE_PHY_CAP6_PARTIAL_BW_EXT_RANGE)
+		cap |= STA_REC_HE_CAP_PARTIAL_BW_EXT_RANGE;
+
+	if (elem->phy_cap_info[7] &
+	    IEEE80211_HE_PHY_CAP7_HE_SU_MU_PPDU_4XLTF_AND_08_US_GI)
+		cap |= STA_REC_HE_CAP_SU_MU_PPDU_4LTF_8US_GI;
+
+	if (elem->phy_cap_info[7] &
+	    IEEE80211_HE_PHY_CAP7_STBC_TX_ABOVE_80MHZ)
+		cap |= STA_REC_HE_CAP_GT_80M_TX_STBC;
+
+	if (elem->phy_cap_info[7] &
+	    IEEE80211_HE_PHY_CAP7_STBC_RX_ABOVE_80MHZ)
+		cap |= STA_REC_HE_CAP_GT_80M_RX_STBC;
+
+	if (elem->phy_cap_info[8] &
+	    IEEE80211_HE_PHY_CAP8_HE_ER_SU_PPDU_4XLTF_AND_08_US_GI)
+		cap |= STA_REC_HE_CAP_ER_SU_PPDU_4LTF_8US_GI;
+
+	if (elem->phy_cap_info[8] &
+	    IEEE80211_HE_PHY_CAP8_HE_ER_SU_1XLTF_AND_08_US_GI)
+		cap |= STA_REC_HE_CAP_ER_SU_PPDU_1LTF_8US_GI;
+
+	if (elem->phy_cap_info[9] &
+	    IEEE80211_HE_PHY_CAP9_NON_TRIGGERED_CQI_FEEDBACK)
+		cap |= STA_REC_HE_CAP_TRIG_CQI_FK;
+
+	if (elem->phy_cap_info[9] &
+	    IEEE80211_HE_PHY_CAP9_TX_1024_QAM_LESS_THAN_242_TONE_RU)
+		cap |= STA_REC_HE_CAP_TX_1024QAM_UNDER_RU242;
+
+	if (elem->phy_cap_info[9] &
+	    IEEE80211_HE_PHY_CAP9_RX_1024_QAM_LESS_THAN_242_TONE_RU)
+		cap |= STA_REC_HE_CAP_RX_1024QAM_UNDER_RU242;
+
+	he->he_cap = cpu_to_le32(cap);
+
+	switch (sta->bandwidth) {
+	case IEEE80211_STA_RX_BW_160:
+		if (elem->phy_cap_info[0] &
+		    IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G)
+			he->max_nss_mcs[CMD_HE_MCS_BW8080] =
+				he_cap->he_mcs_nss_supp.rx_mcs_80p80;
+
+		he->max_nss_mcs[CMD_HE_MCS_BW160] =
+				he_cap->he_mcs_nss_supp.rx_mcs_160;
+		/* fall through */
+	default:
+		he->max_nss_mcs[CMD_HE_MCS_BW80] =
+				he_cap->he_mcs_nss_supp.rx_mcs_80;
+		break;
+	}
+
+	he->t_frame_dur =
+		HE_MAC(CAP1_TF_MAC_PAD_DUR_MASK, elem->mac_cap_info[1]);
+	he->max_ampdu_exp =
+		HE_MAC(CAP3_MAX_AMPDU_LEN_EXP_MASK, elem->mac_cap_info[3]);
+
+	he->bw_set =
+		HE_PHY(CAP0_CHANNEL_WIDTH_SET_MASK, elem->phy_cap_info[0]);
+	he->device_class =
+		HE_PHY(CAP1_DEVICE_CLASS_A, elem->phy_cap_info[1]);
+	he->punc_pream_rx =
+		HE_PHY(CAP1_PREAMBLE_PUNC_RX_MASK, elem->phy_cap_info[1]);
+
+	he->dcm_tx_mode =
+		HE_PHY(CAP3_DCM_MAX_CONST_TX_MASK, elem->phy_cap_info[3]);
+	he->dcm_tx_max_nss =
+		HE_PHY(CAP3_DCM_MAX_TX_NSS_2, elem->phy_cap_info[3]);
+	he->dcm_rx_mode =
+		HE_PHY(CAP3_DCM_MAX_CONST_RX_MASK, elem->phy_cap_info[3]);
+	he->dcm_rx_max_nss =
+		HE_PHY(CAP3_DCM_MAX_RX_NSS_2, elem->phy_cap_info[3]);
+	he->dcm_rx_max_nss =
+		HE_PHY(CAP8_DCM_MAX_RU_MASK, elem->phy_cap_info[8]);
+
+	he->pkt_ext = 2;
+}
+
+static void
+mt7915_mcu_sta_muru_tlv(struct sk_buff *skb, struct ieee80211_sta *sta)
+{
+	struct ieee80211_sta_he_cap *he_cap = &sta->he_cap;
+	struct ieee80211_he_cap_elem *elem = &he_cap->he_cap_elem;
+	struct sta_rec_muru *muru;
+	struct tlv *tlv;
+
+	tlv = mt7915_mcu_add_tlv(skb, STA_REC_MURU, sizeof(*muru));
+
+	muru = (struct sta_rec_muru *)tlv;
+	muru->cfg.ofdma_dl_en = true;
+	muru->cfg.ofdma_ul_en = true;
+	muru->cfg.mimo_dl_en = true;
+	muru->cfg.mimo_ul_en = true;
+
+	muru->ofdma_dl.punc_pream_rx =
+		HE_PHY(CAP1_PREAMBLE_PUNC_RX_MASK, elem->phy_cap_info[1]);
+	muru->ofdma_dl.he_20m_in_40m_2g =
+		HE_PHY(CAP8_20MHZ_IN_40MHZ_HE_PPDU_IN_2G, elem->phy_cap_info[8]);
+	muru->ofdma_dl.he_20m_in_160m =
+		HE_PHY(CAP8_20MHZ_IN_160MHZ_HE_PPDU, elem->phy_cap_info[8]);
+	muru->ofdma_dl.he_80m_in_160m =
+		HE_PHY(CAP8_80MHZ_IN_160MHZ_HE_PPDU, elem->phy_cap_info[8]);
+	muru->ofdma_dl.lt16_sigb = 0;
+	muru->ofdma_dl.rx_su_comp_sigb = 0;
+	muru->ofdma_dl.rx_su_non_comp_sigb = 0;
+
+	muru->ofdma_ul.t_frame_dur =
+		HE_MAC(CAP1_TF_MAC_PAD_DUR_MASK, elem->mac_cap_info[1]);
+	muru->ofdma_ul.mu_cascading =
+		HE_MAC(CAP2_MU_CASCADING, elem->mac_cap_info[2]);
+	muru->ofdma_ul.uo_ra =
+		HE_MAC(CAP3_OFDMA_RA, elem->mac_cap_info[3]);
+	muru->ofdma_ul.he_2x996_tone = 0;
+	muru->ofdma_ul.rx_t_frame_11ac = 0;
+
+	muru->mimo_dl.vht_mu_bfee =
+		!!(sta->vht_cap.cap & IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE);
+	muru->mimo_dl.partial_bw_dl_mimo =
+		HE_PHY(CAP6_PARTIAL_BANDWIDTH_DL_MUMIMO, elem->phy_cap_info[6]);
+
+	muru->mimo_ul.full_ul_mimo =
+		HE_PHY(CAP2_UL_MU_FULL_MU_MIMO, elem->phy_cap_info[2]);
+	muru->mimo_ul.partial_ul_mimo =
+		HE_PHY(CAP2_UL_MU_PARTIAL_MU_MIMO, elem->phy_cap_info[2]);
+}
+
 static void
 mt7915_mcu_sta_tlv(struct mt7915_dev *dev, struct sk_buff *skb,
 		   struct ieee80211_sta *sta)
@@ -1259,6 +1443,14 @@ mt7915_mcu_sta_tlv(struct mt7915_dev *dev, struct sk_buff *skb,
 		vht->vht_rx_mcs_map = sta->vht_cap.vht_mcs.rx_mcs_map;
 		vht->vht_tx_mcs_map = sta->vht_cap.vht_mcs.tx_mcs_map;
 	}
+
+	/* starec he */
+	if (sta->he_cap.has_he)
+		mt7915_mcu_sta_he_tlv(skb, sta);
+
+	/* starec muru */
+	if (sta->he_cap.has_he || sta->vht_cap.vht_supported)
+		mt7915_mcu_sta_muru_tlv(skb, sta);
 }
 
 static void
@@ -1454,6 +1646,11 @@ mt7915_mcu_sta_rate_ctrl_tlv(struct sk_buff *skb, struct mt7915_dev *dev,
 		}
 	}
 
+	if (sta->he_cap.has_he) {
+		ra->supp_mode |= MODE_HE;
+		cap |= STA_CAP_HE;
+	}
+
 	ra->sta_status = cpu_to_le32(cap);
 
 	switch (BIT(fls(ra->supp_mode) - 1)) {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
index 85ef1b35b265..cc0e27730aab 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
@@ -658,6 +658,75 @@ struct sta_rec_vht {
 	u8 rsv[3];
 } __packed;
 
+struct sta_rec_muru {
+	__le16 tag;
+	__le16 len;
+
+	struct {
+		bool ofdma_dl_en;
+		bool ofdma_ul_en;
+		bool mimo_dl_en;
+		bool mimo_ul_en;
+		bool rsv[4];
+	} cfg;
+
+	struct {
+		u8 punc_pream_rx;
+		bool he_20m_in_40m_2g;
+		bool he_20m_in_160m;
+		bool he_80m_in_160m;
+		bool lt16_sigb;
+		bool rx_su_comp_sigb;
+		bool rx_su_non_comp_sigb;
+		bool rsv;
+	} ofdma_dl;
+
+	struct {
+		u8 t_frame_dur;
+		u8 mu_cascading;
+		u8 uo_ra;
+		u8 he_2x996_tone;
+		u8 rx_t_frame_11ac;
+		u8 rsv[3];
+	} ofdma_ul;
+
+	struct {
+		bool vht_mu_bfee;
+		bool partial_bw_dl_mimo;
+		u8 rsv[2];
+	} mimo_dl;
+
+	struct {
+		bool full_ul_mimo;
+		bool partial_ul_mimo;
+		u8 rsv[2];
+	} mimo_ul;
+} __packed;
+
+struct sta_rec_he {
+	__le16 tag;
+	__le16 len;
+
+	__le32 he_cap;
+
+	u8 t_frame_dur;
+	u8 max_ampdu_exp;
+	u8 bw_set;
+	u8 device_class;
+	u8 dcm_tx_mode;
+	u8 dcm_tx_max_nss;
+	u8 dcm_rx_mode;
+	u8 dcm_rx_max_nss;
+	u8 dcm_max_ru;
+	u8 punc_pream_rx;
+	u8 pkt_ext;
+	u8 rsv1;
+
+	__le16 max_nss_mcs[CMD_HE_MCS_BW_NUM];
+
+	u8 rsv2[2];
+} __packed;
+
 struct sta_rec_ba {
 	__le16 tag;
 	__le16 len;
@@ -803,9 +872,11 @@ enum {
 #define MT7915_STA_UPDATE_MAX_SIZE	(sizeof(struct sta_req_hdr) +	\
 					 sizeof(struct sta_rec_basic) +	\
 					 sizeof(struct sta_rec_ht) +	\
+					 sizeof(struct sta_rec_he) +	\
 					 sizeof(struct sta_rec_ba) +	\
 					 sizeof(struct sta_rec_vht) +	\
 					 sizeof(struct tlv) +		\
+					 sizeof(struct sta_rec_muru) +	\
 					 sizeof(struct sta_rec_sec) +	\
 					 sizeof(struct sta_rec_ra) +	\
 					 MT7915_WTBL_UPDATE_MAX_SIZE)
@@ -833,6 +904,7 @@ enum {
 #define MODE_OFDM			BIT(1)
 #define MODE_HT				BIT(2)
 #define MODE_VHT			BIT(3)
+#define MODE_HE				BIT(4)
 
 #define STA_CAP_WMM			BIT(0)
 #define STA_CAP_SGI_20			BIT(4)
@@ -847,5 +919,33 @@ enum {
 #define STA_CAP_LDPC			BIT(24)
 #define STA_CAP_HT			BIT(26)
 #define STA_CAP_VHT			BIT(27)
+#define STA_CAP_HE			BIT(28)
+
+/* HE MAC */
+#define STA_REC_HE_CAP_HTC			BIT(0)
+#define STA_REC_HE_CAP_BQR			BIT(1)
+#define STA_REC_HE_CAP_BSR			BIT(2)
+#define STA_REC_HE_CAP_OM			BIT(3)
+#define STA_REC_HE_CAP_AMSDU_IN_AMPDU		BIT(4)
+/* HE PHY */
+#define STA_REC_HE_CAP_DUAL_BAND		BIT(5)
+#define STA_REC_HE_CAP_LDPC			BIT(6)
+#define STA_REC_HE_CAP_TRIG_CQI_FK		BIT(7)
+#define STA_REC_HE_CAP_PARTIAL_BW_EXT_RANGE	BIT(8)
+/* STBC */
+#define STA_REC_HE_CAP_LE_EQ_80M_TX_STBC	BIT(9)
+#define STA_REC_HE_CAP_LE_EQ_80M_RX_STBC	BIT(10)
+#define STA_REC_HE_CAP_GT_80M_TX_STBC		BIT(11)
+#define STA_REC_HE_CAP_GT_80M_RX_STBC		BIT(12)
+/* GI */
+#define STA_REC_HE_CAP_SU_PPDU_1LTF_8US_GI	BIT(13)
+#define STA_REC_HE_CAP_SU_MU_PPDU_4LTF_8US_GI	BIT(14)
+#define STA_REC_HE_CAP_ER_SU_PPDU_1LTF_8US_GI	BIT(15)
+#define STA_REC_HE_CAP_ER_SU_PPDU_4LTF_8US_GI	BIT(16)
+#define STA_REC_HE_CAP_NDP_4LTF_3DOT2MS_GI	BIT(17)
+/* 242 TONE */
+#define STA_REC_HE_CAP_BW20_RU242_SUPPORT	BIT(18)
+#define STA_REC_HE_CAP_TX_1024QAM_UNDER_RU242	BIT(19)
+#define STA_REC_HE_CAP_RX_1024QAM_UNDER_RU242	BIT(20)
 
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From a82dd24d128d6399bfcb7f692732caf33827e1df Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Sat, 25 Apr 2020 03:32:33 +0800
Subject: mt76: mt7915: add Rx radiotap header support

mac80211 expects the definition of what HE rate info is available
inside astruct prepended to the skb.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/mac.c | 141 ++++++++++++++++++++++++
 drivers/net/wireless/mediatek/mt76/mt7915/mac.h |  16 +++
 2 files changed, 157 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
index dd5b30a066b3..751363b4b7a2 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
@@ -9,6 +9,10 @@
 
 #define to_rssi(field, rxv)	((FIELD_GET(field, rxv) - 220) / 2)
 
+#define HE_BITS(f)		cpu_to_le16(IEEE80211_RADIOTAP_HE_##f)
+#define HE_PREP(f, m, v)	le16_encode_bits(le32_get_bits(v, MT_CRXV_HE_##m),\
+						 IEEE80211_RADIOTAP_HE_##f)
+
 static const struct mt7915_dfs_radar_spec etsi_radar_specs = {
 	.pulse_th = { 110, -10, -80, 40, 5200, 128, 5200 },
 	.radar_pattern = {
@@ -172,6 +176,138 @@ void mt7915_mac_sta_poll(struct mt7915_dev *dev)
 	rcu_read_unlock();
 }
 
+static void
+mt7915_mac_decode_he_radiotap_ru(struct mt76_rx_status *status,
+				 struct mt7915_rxv *rxv,
+				 struct ieee80211_radiotap_he *he)
+{
+	u32 ru_h, ru_l;
+	u8 ru, offs = 0;
+
+	ru_l = FIELD_GET(MT_PRXV_HE_RU_ALLOC_L, le32_to_cpu(rxv->v[0]));
+	ru_h = FIELD_GET(MT_PRXV_HE_RU_ALLOC_H, le32_to_cpu(rxv->v[1]));
+	ru = (u8)(ru_l | ru_h << 4);
+
+	status->bw = RATE_INFO_BW_HE_RU;
+
+	switch (ru) {
+	case 0 ... 36:
+		status->he_ru = NL80211_RATE_INFO_HE_RU_ALLOC_26;
+		offs = ru;
+		break;
+	case 37 ... 52:
+		status->he_ru = NL80211_RATE_INFO_HE_RU_ALLOC_52;
+		offs = ru - 37;
+		break;
+	case 53 ... 60:
+		status->he_ru = NL80211_RATE_INFO_HE_RU_ALLOC_106;
+		offs = ru - 53;
+		break;
+	case 61 ... 64:
+		status->he_ru = NL80211_RATE_INFO_HE_RU_ALLOC_242;
+		offs = ru - 61;
+		break;
+	case 65 ... 66:
+		status->he_ru = NL80211_RATE_INFO_HE_RU_ALLOC_484;
+		offs = ru - 65;
+		break;
+	case 67:
+		status->he_ru = NL80211_RATE_INFO_HE_RU_ALLOC_996;
+		break;
+	case 68:
+		status->he_ru = NL80211_RATE_INFO_HE_RU_ALLOC_2x996;
+		break;
+	}
+
+	he->data1 |= HE_BITS(DATA1_BW_RU_ALLOC_KNOWN);
+	he->data2 |= HE_BITS(DATA2_RU_OFFSET_KNOWN) |
+		     le16_encode_bits(offs,
+				      IEEE80211_RADIOTAP_HE_DATA2_RU_OFFSET);
+}
+
+static void
+mt7915_mac_decode_he_radiotap(struct sk_buff *skb,
+			      struct mt76_rx_status *status,
+			      struct mt7915_rxv *rxv)
+{
+	/* TODO: struct ieee80211_radiotap_he_mu */
+	static const struct ieee80211_radiotap_he known = {
+		.data1 = HE_BITS(DATA1_DATA_MCS_KNOWN) |
+			 HE_BITS(DATA1_DATA_DCM_KNOWN) |
+			 HE_BITS(DATA1_STBC_KNOWN) |
+			 HE_BITS(DATA1_CODING_KNOWN),
+		.data2 = HE_BITS(DATA2_GI_KNOWN) |
+			 HE_BITS(DATA2_TXBF_KNOWN),
+	};
+	struct ieee80211_radiotap_he *he = NULL;
+	__le32 v2 = rxv->v[2];
+	__le32 v11 = rxv->v[11];
+	__le32 v14 = rxv->v[14];
+	u32 ltf_size = le32_get_bits(v2, MT_CRXV_HE_LTF_SIZE) + 1;
+
+	he = skb_push(skb, sizeof(known));
+	memcpy(he, &known, sizeof(known));
+
+	he->data1 = HE_BITS(DATA1_LDPC_XSYMSEG_KNOWN) |
+		    HE_BITS(DATA1_DOPPLER_KNOWN) |
+		    HE_BITS(DATA1_BSS_COLOR_KNOWN);
+	he->data2 = HE_BITS(DATA2_PE_DISAMBIG_KNOWN) |
+		    HE_BITS(DATA2_TXOP_KNOWN);
+
+	he->data3 = HE_PREP(DATA3_BSS_COLOR, BSS_COLOR, v14) |
+		    HE_PREP(DATA3_LDPC_XSYMSEG, LDPC_EXT_SYM, v2);
+	he->data5 = HE_PREP(DATA5_PE_DISAMBIG, PE_DISAMBIG, v2) |
+		    le16_encode_bits(ltf_size,
+				     IEEE80211_RADIOTAP_HE_DATA5_LTF_SIZE);
+	he->data6 = HE_PREP(DATA6_TXOP, TXOP_DUR, v14) |
+		    HE_PREP(DATA6_DOPPLER, DOPPLER, v14);
+
+	switch (rxv->phy) {
+	case MT_PHY_TYPE_HE_SU:
+		he->data1 |= HE_BITS(DATA1_FORMAT_SU) |
+			     HE_BITS(DATA1_UL_DL_KNOWN) |
+			     HE_BITS(DATA1_BEAM_CHANGE_KNOWN) |
+			     HE_BITS(DATA1_SPTL_REUSE_KNOWN);
+
+		he->data3 |= HE_PREP(DATA3_BEAM_CHANGE, BEAM_CHNG, v14) |
+			     HE_PREP(DATA3_UL_DL, UPLINK, v2);
+		he->data4 |= HE_PREP(DATA4_SU_MU_SPTL_REUSE, SR_MASK, v11);
+		break;
+	case MT_PHY_TYPE_HE_EXT_SU:
+		he->data1 |= HE_BITS(DATA1_FORMAT_EXT_SU) |
+			     HE_BITS(DATA1_UL_DL_KNOWN);
+
+		he->data3 |= HE_PREP(DATA3_UL_DL, UPLINK, v2);
+		break;
+	case MT_PHY_TYPE_HE_MU:
+		he->data1 |= HE_BITS(DATA1_FORMAT_MU) |
+			     HE_BITS(DATA1_UL_DL_KNOWN) |
+			     HE_BITS(DATA1_SPTL_REUSE_KNOWN);
+
+		he->data3 |= HE_PREP(DATA3_UL_DL, UPLINK, v2);
+		he->data4 |= HE_PREP(DATA4_SU_MU_SPTL_REUSE, SR_MASK, v11);
+
+		mt7915_mac_decode_he_radiotap_ru(status, rxv, he);
+		break;
+	case MT_PHY_TYPE_HE_TB:
+		he->data1 |= HE_BITS(DATA1_FORMAT_TRIG) |
+			     HE_BITS(DATA1_SPTL_REUSE_KNOWN) |
+			     HE_BITS(DATA1_SPTL_REUSE2_KNOWN) |
+			     HE_BITS(DATA1_SPTL_REUSE3_KNOWN) |
+			     HE_BITS(DATA1_SPTL_REUSE4_KNOWN);
+
+		he->data4 = HE_PREP(DATA4_TB_SPTL_REUSE1, SR_MASK, v11) |
+			    HE_PREP(DATA4_TB_SPTL_REUSE2, SR1_MASK, v11) |
+			    HE_PREP(DATA4_TB_SPTL_REUSE3, SR2_MASK, v11) |
+			    HE_PREP(DATA4_TB_SPTL_REUSE4, SR3_MASK, v11);
+
+		mt7915_mac_decode_he_radiotap_ru(status, rxv, he);
+		break;
+	default:
+		break;
+	}
+}
+
 int mt7915_mac_fill_rx(struct mt7915_dev *dev, struct sk_buff *skb)
 {
 	struct mt76_rx_status *status = (struct mt76_rx_status *)skb->cb;
@@ -348,6 +484,7 @@ int mt7915_mac_fill_rx(struct mt7915_dev *dev, struct sk_buff *skb)
 					return -EINVAL;
 				break;
 			case MT_PHY_TYPE_HE_MU:
+				status->flag |= RX_FLAG_RADIOTAP_HE_MU;
 				/* fall through */
 			case MT_PHY_TYPE_HE_SU:
 			case MT_PHY_TYPE_HE_EXT_SU:
@@ -355,6 +492,7 @@ int mt7915_mac_fill_rx(struct mt7915_dev *dev, struct sk_buff *skb)
 				status->nss =
 					FIELD_GET(MT_PRXV_NSTS, rxv.v[0]) + 1;
 				status->encoding = RX_ENC_HE;
+				status->flag |= RX_FLAG_RADIOTAP_HE;
 				i &= GENMASK(3, 0);
 
 				if (gi <= NL80211_RATE_INFO_HE_GI_3_2)
@@ -405,6 +543,9 @@ int mt7915_mac_fill_rx(struct mt7915_dev *dev, struct sk_buff *skb)
 		mt76_insert_ccmp_hdr(skb, key_id);
 	}
 
+	if (status->flag & RX_FLAG_RADIOTAP_HE)
+		mt7915_mac_decode_he_radiotap(skb, status, &rxv);
+
 	hdr = mt76_skb_get_hdr(skb);
 	if (!status->wcid || !ieee80211_is_data_qos(hdr->frame_control))
 		return 0;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.h b/drivers/net/wireless/mediatek/mt76/mt7915/mac.h
index 712753484aee..b9bc8b25b031 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.h
@@ -101,6 +101,8 @@ enum rx_pkt_type {
 #define MT_PRXV_TX_ER_SU_106T		BIT(5)
 #define MT_PRXV_NSTS			GENMASK(9, 7)
 #define MT_PRXV_HT_AD_CODE		BIT(11)
+#define MT_PRXV_HE_RU_ALLOC_L		GENMASK(31, 28)
+#define MT_PRXV_HE_RU_ALLOC_H		GENMASK(3, 0)
 #define MT_PRXV_RCPI3			GENMASK(31, 24)
 #define MT_PRXV_RCPI2			GENMASK(23, 16)
 #define MT_PRXV_RCPI1			GENMASK(15, 8)
@@ -111,6 +113,20 @@ enum rx_pkt_type {
 #define MT_CRXV_TX_MODE			GENMASK(7, 4)
 #define MT_CRXV_FRAME_MODE		GENMASK(10, 8)
 #define MT_CRXV_HT_SHORT_GI		GENMASK(14, 13)
+#define MT_CRXV_HE_LTF_SIZE		GENMASK(18, 17)
+#define MT_CRXV_HE_LDPC_EXT_SYM		BIT(20)
+#define MT_CRXV_HE_PE_DISAMBIG		BIT(23)
+#define MT_CRXV_HE_UPLINK		BIT(31)
+
+#define MT_CRXV_HE_SR_MASK		GENMASK(11, 8)
+#define MT_CRXV_HE_SR1_MASK		GENMASK(16, 12)
+#define MT_CRXV_HE_SR2_MASK             GENMASK(20, 17)
+#define MT_CRXV_HE_SR3_MASK             GENMASK(24, 21)
+
+#define MT_CRXV_HE_BSS_COLOR		GENMASK(5, 0)
+#define MT_CRXV_HE_TXOP_DUR		GENMASK(12, 6)
+#define MT_CRXV_HE_BEAM_CHNG		BIT(13)
+#define MT_CRXV_HE_DOPPLER		BIT(16)
 
 struct mt7915_rxv {
 	u32 phy;
-- 
cgit v1.2.3-59-g8ed1b


From ec9742a8f38ef69876e9f04be68d985c6bbb8f5f Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Sat, 25 Apr 2020 03:32:34 +0800
Subject: mt76: mt7915: add .sta_add_debugfs support

This generation supports much more per-peer statistics than legacy ones,
so add .sta_add_debugfs accordingly.

This is convenient to set/get more settings/counters in the long run.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../net/wireless/mediatek/mt76/mt7915/debugfs.c    | 65 ++++++++++++++++++++++
 drivers/net/wireless/mediatek/mt76/mt7915/main.c   |  3 +
 drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h |  4 ++
 3 files changed, 72 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
index 2e3f05f73697..7f67a1a8013b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
@@ -302,3 +302,68 @@ int mt7915_init_debugfs(struct mt7915_dev *dev)
 
 	return 0;
 }
+
+/** per-station debugfs **/
+
+static int
+mt7915_sta_stats_read(struct seq_file *s, void *data)
+{
+	struct ieee80211_sta *sta = s->private;
+	struct mt7915_sta *msta = (struct mt7915_sta *)sta->drv_priv;
+	struct mt7915_sta_stats *stats = &msta->stats;
+	struct rate_info *rate = &stats->prob_rate;
+	static const char * const bw[] = {
+		"BW20", "BW5", "BW10", "BW40",
+		"BW80", "BW160", "BW_HE_RU"
+	};
+
+	if (!rate->legacy && !rate->flags)
+		return 0;
+
+	seq_puts(s, "Probing rate - ");
+	if (rate->flags & RATE_INFO_FLAGS_MCS)
+		seq_puts(s, "HT ");
+	else if (rate->flags & RATE_INFO_FLAGS_VHT_MCS)
+		seq_puts(s, "VHT ");
+	else if (rate->flags & RATE_INFO_FLAGS_HE_MCS)
+		seq_puts(s, "HE ");
+	else
+		seq_printf(s, "Bitrate %d\n", rate->legacy);
+
+	if (rate->flags) {
+		seq_printf(s, "%s NSS%d MCS%d ",
+			   bw[rate->bw], rate->nss, rate->mcs);
+
+		if (rate->flags & RATE_INFO_FLAGS_SHORT_GI)
+			seq_puts(s, "SGI ");
+		else if (rate->he_gi)
+			seq_puts(s, "HE GI ");
+
+		if (rate->he_dcm)
+			seq_puts(s, "DCM ");
+	}
+
+	seq_printf(s, "\nPPDU PER: %ld.%1ld%%\n",
+		   stats->per / 10, stats->per % 10);
+
+	return 0;
+}
+
+static int
+mt7915_sta_stats_open(struct inode *inode, struct file *f)
+{
+	return single_open(f, mt7915_sta_stats_read, inode->i_private);
+}
+
+static const struct file_operations fops_sta_stats = {
+	.open = mt7915_sta_stats_open,
+	.read = seq_read,
+	.llseek = seq_lseek,
+	.release = single_release,
+};
+
+void mt7915_sta_add_debugfs(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+			    struct ieee80211_sta *sta, struct dentry *dir)
+{
+	debugfs_create_file("stats", 0400, dir, sta, &fops_sta_stats);
+}
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/main.c b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
index 097225692026..6cb69ae6cb0e 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
@@ -746,4 +746,7 @@ const struct ieee80211_ops mt7915_ops = {
 	.get_antenna = mt76_get_antenna,
 	.set_antenna = mt7915_set_antenna,
 	.set_coverage_class = mt7915_set_coverage_class,
+#ifdef CONFIG_MAC80211_DEBUGFS
+	.sta_add_debugfs = mt7915_sta_add_debugfs,
+#endif
 };
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
index 92a6bf746d73..35e34d270c15 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
@@ -448,5 +448,9 @@ int mt7915_dfs_init_radar_detector(struct mt7915_phy *phy);
 void mt7915_set_stream_he_caps(struct mt7915_phy *phy);
 void mt7915_update_channel(struct mt76_dev *mdev);
 int mt7915_init_debugfs(struct mt7915_dev *dev);
+#ifdef CONFIG_MAC80211_DEBUGFS
+void mt7915_sta_add_debugfs(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+			    struct ieee80211_sta *sta, struct dentry *dir);
+#endif
 
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From b02eafae42099943928bf7c253640d3bb0a3a949 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Sat, 25 Apr 2020 03:32:35 +0800
Subject: mt76: mt7915: add .sta_statistics support

Add useful debug counters since this generation uses struct rate_info
to report HE tx rate.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/main.c | 26 ++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/main.c b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
index 6cb69ae6cb0e..b3e26ff32287 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
@@ -698,6 +698,31 @@ mt7915_set_antenna(struct ieee80211_hw *hw, u32 tx_ant, u32 rx_ant)
 	return 0;
 }
 
+static void mt7915_sta_statistics(struct ieee80211_hw *hw,
+				  struct ieee80211_vif *vif,
+				  struct ieee80211_sta *sta,
+				  struct station_info *sinfo)
+{
+	struct mt7915_sta *msta = (struct mt7915_sta *)sta->drv_priv;
+	struct mt7915_sta_stats *stats = &msta->stats;
+
+	if (!stats->tx_rate.legacy && !stats->tx_rate.flags)
+		return;
+
+	if (stats->tx_rate.legacy) {
+		sinfo->txrate.legacy = stats->tx_rate.legacy;
+	} else {
+		sinfo->txrate.mcs = stats->tx_rate.mcs;
+		sinfo->txrate.nss = stats->tx_rate.nss;
+		sinfo->txrate.bw = stats->tx_rate.bw;
+		sinfo->txrate.he_gi = stats->tx_rate.he_gi;
+		sinfo->txrate.he_dcm = stats->tx_rate.he_dcm;
+		sinfo->txrate.he_ru_alloc = stats->tx_rate.he_ru_alloc;
+	}
+	sinfo->txrate.flags = stats->tx_rate.flags;
+	sinfo->filled |= BIT_ULL(NL80211_STA_INFO_TX_BITRATE);
+}
+
 static void
 mt7915_sta_rc_update(struct ieee80211_hw *hw,
 		     struct ieee80211_vif *vif,
@@ -746,6 +771,7 @@ const struct ieee80211_ops mt7915_ops = {
 	.get_antenna = mt76_get_antenna,
 	.set_antenna = mt7915_set_antenna,
 	.set_coverage_class = mt7915_set_coverage_class,
+	.sta_statistics = mt7915_sta_statistics,
 #ifdef CONFIG_MAC80211_DEBUGFS
 	.sta_add_debugfs = mt7915_sta_add_debugfs,
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From 9fac3c81eebd81bbce8b050e15b03d3490841717 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Sat, 25 Apr 2020 03:32:36 +0800
Subject: mt76: mt7915: set peer Tx fixed rate through debugfs

Driver can manually set fixed rate for each peer through debugfs.

May use .set_bitrate_mask callback and iterate stations under the
current vif to achieve the aim once it supports HE rate.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Shayne Chen <shayne.chen@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../net/wireless/mediatek/mt76/mt7915/debugfs.c    | 13 ++++++
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.c    | 46 ++++++++++++++++++++++
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.h    | 28 +++++++++++++
 drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h |  2 +
 4 files changed, 89 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
index 7f67a1a8013b..cf3b60ded6ef 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
@@ -305,6 +305,18 @@ int mt7915_init_debugfs(struct mt7915_dev *dev)
 
 /** per-station debugfs **/
 
+/* usage: <tx mode> <ldpc> <stbc> <bw> <gi> <nss> <mcs> */
+static int mt7915_sta_fixed_rate_set(void *data, u64 rate)
+{
+	struct ieee80211_sta *sta = data;
+	struct mt7915_sta *msta = (struct mt7915_sta *)sta->drv_priv;
+
+	return mt7915_mcu_set_fixed_rate(msta->vif->dev, sta, rate);
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(fops_fixed_rate, NULL,
+			 mt7915_sta_fixed_rate_set, "%llx\n");
+
 static int
 mt7915_sta_stats_read(struct seq_file *s, void *data)
 {
@@ -365,5 +377,6 @@ static const struct file_operations fops_sta_stats = {
 void mt7915_sta_add_debugfs(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 			    struct ieee80211_sta *sta, struct dentry *dir)
 {
+	debugfs_create_file("fixed_rate", 0600, dir, sta, &fops_fixed_rate);
 	debugfs_create_file("stats", 0400, dir, sta, &fops_sta_stats);
 }
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index aa24f1306941..cbf96a56947a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -1735,6 +1735,52 @@ int mt7915_mcu_add_sta(struct mt7915_dev *dev, struct ieee80211_vif *vif,
 	return 0;
 }
 
+int mt7915_mcu_set_fixed_rate(struct mt7915_dev *dev,
+			      struct ieee80211_sta *sta, u32 rate)
+{
+	struct mt7915_sta *msta = (struct mt7915_sta *)sta->drv_priv;
+	struct mt7915_vif *mvif = msta->vif;
+	struct sta_rec_ra_fixed *ra;
+	struct sk_buff *skb;
+	struct tlv *tlv;
+
+	skb = mt7915_mcu_alloc_sta_req(dev, mvif, msta,
+				       MT7915_STA_UPDATE_MAX_SIZE);
+	if (IS_ERR(skb))
+		return PTR_ERR(skb);
+
+	tlv = mt7915_mcu_add_tlv(skb, STA_REC_RA_UPDATE, sizeof(*ra));
+	ra = (struct sta_rec_ra_fixed *)tlv;
+
+	if (!rate) {
+		ra->field = cpu_to_le32(RATE_PARAM_AUTO);
+		goto out;
+	} else {
+		ra->field = cpu_to_le32(RATE_PARAM_FIXED);
+	}
+
+	ra->phy.type = FIELD_GET(RATE_CFG_PHY_TYPE, rate);
+	ra->phy.bw = FIELD_GET(RATE_CFG_BW, rate);
+	ra->phy.nss = FIELD_GET(RATE_CFG_NSS, rate);
+	ra->phy.mcs = FIELD_GET(RATE_CFG_MCS, rate);
+	ra->phy.stbc = FIELD_GET(RATE_CFG_STBC, rate);
+
+	if (ra->phy.bw)
+		ra->phy.ldpc = 7;
+	else
+		ra->phy.ldpc = FIELD_GET(RATE_CFG_LDPC, rate) * 7;
+
+	/* HT/VHT - SGI: 1, LGI: 0; HE - SGI: 0, MGI: 1, LGI: 2 */
+	if (ra->phy.type > MT_PHY_TYPE_VHT)
+		ra->phy.sgi = ra->phy.mcs * 85;
+	else
+		ra->phy.sgi = ra->phy.mcs * 15;
+
+out:
+	return __mt76_mcu_skb_send_msg(&dev->mt76, skb,
+				       MCU_EXT_CMD_STA_REC_UPDATE, true);
+}
+
 int mt7915_mcu_add_dev_info(struct mt7915_dev *dev,
 			    struct ieee80211_vif *vif, bool enable)
 {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
index cc0e27730aab..b8d28d971974 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
@@ -803,6 +803,34 @@ struct sta_rec_ra {
 	struct ra_phy phy;
 } __packed;
 
+struct sta_rec_ra_fixed {
+	__le16 tag;
+	__le16 len;
+
+	__le32 field;
+	u8 op_mode;
+	u8 op_vht_chan_width;
+	u8 op_vht_rx_nss;
+	u8 op_vht_rx_nss_type;
+
+	struct ra_phy phy;
+
+	u8 spe_en;
+	u8 short_preamble;
+	u8 is_5g;
+	u8 mmps_mode;
+} __packed;
+
+#define RATE_PARAM_FIXED		3
+#define RATE_PARAM_AUTO			20
+#define RATE_CFG_MCS			GENMASK(3, 0)
+#define RATE_CFG_NSS			GENMASK(7, 4)
+#define RATE_CFG_GI			GENMASK(11, 8)
+#define RATE_CFG_BW			GENMASK(15, 12)
+#define RATE_CFG_STBC			GENMASK(19, 16)
+#define RATE_CFG_LDPC			GENMASK(23, 20)
+#define RATE_CFG_PHY_TYPE		GENMASK(27, 24)
+
 enum {
 	STA_REC_BASIC,
 	STA_REC_RA,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
index 35e34d270c15..e5821d144c85 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
@@ -303,6 +303,8 @@ int mt7915_mcu_add_smps(struct mt7915_dev *dev, struct ieee80211_vif *vif,
 			struct ieee80211_sta *sta);
 int mt7915_mcu_set_chan_info(struct mt7915_phy *phy, int cmd);
 int mt7915_mcu_set_tx(struct mt7915_dev *dev, struct ieee80211_vif *vif);
+int mt7915_mcu_set_fixed_rate(struct mt7915_dev *dev,
+			      struct ieee80211_sta *sta, u32 rate);
 int mt7915_mcu_set_eeprom(struct mt7915_dev *dev);
 int mt7915_mcu_get_eeprom(struct mt7915_dev *dev, u32 offset);
 int mt7915_mcu_set_mac(struct mt7915_dev *dev, int band, bool enable,
-- 
cgit v1.2.3-59-g8ed1b


From 32add88f641bc0b3b63661771aea8dcee1d84396 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Sat, 25 Apr 2020 03:32:37 +0800
Subject: mt76: mt7915: add tsf related callbacks

It is useful for IBSS Mesh to adjust t_clockdrift.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Reported-by: Shayne Chen <shayne.chen@mediatek.com>
Tested-by: Evelyn Tsai <evelyn.tsai@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/main.c | 53 ++++++++++++++++++++++++
 drivers/net/wireless/mediatek/mt76/mt7915/regs.h | 11 +++++
 2 files changed, 64 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/main.c b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
index b3e26ff32287..ef0c91990b49 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
@@ -655,6 +655,57 @@ mt7915_get_stats(struct ieee80211_hw *hw,
 	return 0;
 }
 
+static u64
+mt7915_get_tsf(struct ieee80211_hw *hw, struct ieee80211_vif *vif)
+{
+	struct mt7915_vif *mvif = (struct mt7915_vif *)vif->drv_priv;
+	struct mt7915_dev *dev = mt7915_hw_dev(hw);
+	struct mt7915_phy *phy = mt7915_hw_phy(hw);
+	bool band = phy != &dev->phy;
+	union {
+		u64 t64;
+		u32 t32[2];
+	} tsf;
+	u16 n;
+
+	mutex_lock(&dev->mt76.mutex);
+
+	n = mvif->omac_idx > HW_BSSID_MAX ? HW_BSSID_0 : mvif->omac_idx;
+	/* TSF software read */
+	mt76_set(dev, MT_LPON_TCR(band, n), MT_LPON_TCR_SW_MODE);
+	tsf.t32[0] = mt76_rr(dev, MT_LPON_UTTR0(band));
+	tsf.t32[1] = mt76_rr(dev, MT_LPON_UTTR1(band));
+
+	mutex_unlock(&dev->mt76.mutex);
+
+	return tsf.t64;
+}
+
+static void
+mt7915_set_tsf(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
+	       u64 timestamp)
+{
+	struct mt7915_vif *mvif = (struct mt7915_vif *)vif->drv_priv;
+	struct mt7915_dev *dev = mt7915_hw_dev(hw);
+	struct mt7915_phy *phy = mt7915_hw_phy(hw);
+	bool band = phy != &dev->phy;
+	union {
+		u64 t64;
+		u32 t32[2];
+	} tsf = { .t64 = timestamp, };
+	u16 n;
+
+	mutex_lock(&dev->mt76.mutex);
+
+	n = mvif->omac_idx > HW_BSSID_MAX ? HW_BSSID_0 : mvif->omac_idx;
+	mt76_wr(dev, MT_LPON_UTTR0(band), tsf.t32[0]);
+	mt76_wr(dev, MT_LPON_UTTR1(band), tsf.t32[1]);
+	/* TSF software overwrite */
+	mt76_set(dev, MT_LPON_TCR(band, n), MT_LPON_TCR_SW_WRITE);
+
+	mutex_unlock(&dev->mt76.mutex);
+}
+
 static void
 mt7915_set_coverage_class(struct ieee80211_hw *hw, s16 coverage_class)
 {
@@ -767,6 +818,8 @@ const struct ieee80211_ops mt7915_ops = {
 	.get_txpower = mt76_get_txpower,
 	.channel_switch_beacon = mt7915_channel_switch_beacon,
 	.get_stats = mt7915_get_stats,
+	.get_tsf = mt7915_get_tsf,
+	.set_tsf = mt7915_set_tsf,
 	.get_survey = mt76_get_survey,
 	.get_antenna = mt76_get_antenna,
 	.set_antenna = mt7915_set_antenna,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/regs.h b/drivers/net/wireless/mediatek/mt76/mt7915/regs.h
index 572bdc16fb2d..6600fc625196 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/regs.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/regs.h
@@ -75,6 +75,17 @@
 #define MT_DMA_DCR0_MAX_RX_LEN		GENMASK(15, 3)
 #define MT_DMA_DCR0_RXD_G5_EN		BIT(23)
 
+/* LPON: band 0(0x24200), band 1(0xa4200) */
+#define MT_WF_LPON_BASE(_band)		((_band) ? 0xa4200 : 0x24200)
+#define MT_WF_LPON(_band, ofs)		(MT_WF_LPON_BASE(_band) + (ofs))
+
+#define MT_LPON_UTTR0(_band)		MT_WF_LPON(_band, 0x080)
+#define MT_LPON_UTTR1(_band)		MT_WF_LPON(_band, 0x084)
+
+#define MT_LPON_TCR(_band, n)		MT_WF_LPON(_band, 0x0a8 + (n) * 4)
+#define MT_LPON_TCR_SW_MODE		GENMASK(1, 0)
+#define MT_LPON_TCR_SW_WRITE		BIT(0)
+
 /* MIB: band 0(0x24800), band 1(0xa4800) */
 #define MT_WF_MIB_BASE(_band)		((_band) ? 0xa4800 : 0x24800)
 #define MT_WF_MIB(_band, ofs)		(MT_WF_MIB_BASE(_band) + (ofs))
-- 
cgit v1.2.3-59-g8ed1b


From 5517f78b0063d0463d042c68ac0c651db47ecf90 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Sat, 25 Apr 2020 03:32:38 +0800
Subject: mt76: mt7915: enable firmware module debug support

This allows host driver to get useful information of some important modules.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Chih-Min Chen <chih-min.chen@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../net/wireless/mediatek/mt76/mt7915/debugfs.c    | 36 ++++++++++++++
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.c    | 58 ++++++++++++++++++++++
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.h    |  3 ++
 drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h |  2 +
 4 files changed, 99 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
index cf3b60ded6ef..c6c009cd773e 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
@@ -74,6 +74,41 @@ mt7915_dbdc_get(void *data, u64 *val)
 DEFINE_DEBUGFS_ATTRIBUTE(fops_dbdc, mt7915_dbdc_get,
 			 mt7915_dbdc_set, "%lld\n");
 
+static int
+mt7915_fw_debug_set(void *data, u64 val)
+{
+	struct mt7915_dev *dev = data;
+	enum {
+		DEBUG_TXCMD = 62,
+		DEBUG_CMD_RPT_TX,
+		DEBUG_CMD_RPT_TRIG,
+		DEBUG_SPL,
+		DEBUG_RPT_RX,
+	} debug;
+
+	dev->fw_debug = !!val;
+
+	mt7915_mcu_fw_log_2_host(dev, dev->fw_debug ? 2 : 0);
+
+	for (debug = DEBUG_TXCMD; debug <= DEBUG_RPT_RX; debug++)
+		mt7915_mcu_fw_dbg_ctrl(dev, debug, dev->fw_debug);
+
+	return 0;
+}
+
+static int
+mt7915_fw_debug_get(void *data, u64 *val)
+{
+	struct mt7915_dev *dev = data;
+
+	*val = dev->fw_debug;
+
+	return 0;
+}
+
+DEFINE_DEBUGFS_ATTRIBUTE(fops_fw_debug, mt7915_fw_debug_get,
+			 mt7915_fw_debug_set, "%lld\n");
+
 static void
 mt7915_ampdu_stat_read_phy(struct mt7915_phy *phy,
 			   struct seq_file *file)
@@ -290,6 +325,7 @@ int mt7915_init_debugfs(struct mt7915_dev *dev)
 				    mt7915_queues_acq);
 	debugfs_create_file("tx_stats", 0400, dir, dev, &fops_tx_stats);
 	debugfs_create_file("dbdc", 0600, dir, dev, &fops_dbdc);
+	debugfs_create_file("fw_debug", 0600, dir, dev, &fops_fw_debug);
 	debugfs_create_u32("dfs_hw_pattern", 0400, dir, &dev->hw_pattern);
 	/* test knobs */
 	debugfs_create_file("radar_trigger", 0200, dir, dev,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index cbf96a56947a..3e3d6f1d555a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -502,6 +502,28 @@ mt7915_mcu_tx_rate_report(struct mt7915_dev *dev, struct sk_buff *skb)
 	}
 }
 
+static void
+mt7915_mcu_rx_log_message(struct mt7915_dev *dev, struct sk_buff *skb)
+{
+	struct mt7915_mcu_rxd *rxd = (struct mt7915_mcu_rxd *)skb->data;
+	const char *data = (char *)&rxd[1];
+	const char *type;
+
+	switch (rxd->s2d_index) {
+	case 0:
+		type = "WM";
+		break;
+	case 2:
+		type = "WA";
+		break;
+	default:
+		type = "unknown";
+		break;
+	}
+
+	wiphy_info(mt76_hw(dev)->wiphy, "%s: %s", type, data);
+}
+
 static void
 mt7915_mcu_rx_ext_event(struct mt7915_dev *dev, struct sk_buff *skb)
 {
@@ -519,6 +541,9 @@ mt7915_mcu_rx_ext_event(struct mt7915_dev *dev, struct sk_buff *skb)
 	case MCU_EXT_EVENT_RATE_REPORT:
 		mt7915_mcu_tx_rate_report(dev, skb);
 		break;
+	case MCU_EXT_EVENT_FW_LOG_2_HOST:
+		mt7915_mcu_rx_log_message(dev, skb);
+		break;
 	default:
 		break;
 	}
@@ -544,6 +569,7 @@ void mt7915_mcu_rx_event(struct mt7915_dev *dev, struct sk_buff *skb)
 	struct mt7915_mcu_rxd *rxd = (struct mt7915_mcu_rxd *)skb->data;
 
 	if (rxd->ext_eid == MCU_EXT_EVENT_THERMAL_PROTECT ||
+	    rxd->ext_eid == MCU_EXT_EVENT_FW_LOG_2_HOST ||
 	    rxd->ext_eid == MCU_EXT_EVENT_ASSERT_DUMP ||
 	    rxd->ext_eid == MCU_EXT_EVENT_PS_SYNC ||
 	    rxd->ext_eid == MCU_EXT_EVENT_RATE_REPORT ||
@@ -2265,6 +2291,37 @@ static int mt7915_load_firmware(struct mt7915_dev *dev)
 	return 0;
 }
 
+int mt7915_mcu_fw_log_2_host(struct mt7915_dev *dev, u8 ctrl)
+{
+	struct {
+		u8 ctrl_val;
+		u8 pad[3];
+	} data = {
+		.ctrl_val = ctrl
+	};
+
+	return __mt76_mcu_send_msg(&dev->mt76, MCU_EXT_CMD_FW_LOG_2_HOST,
+				   &data, sizeof(data), true);
+}
+
+int mt7915_mcu_fw_dbg_ctrl(struct mt7915_dev *dev, u32 module, u8 level)
+{
+	struct {
+		u8 ver;
+		u8 pad;
+		u16 len;
+		u8 level;
+		u8 rsv[3];
+		u32 module_idx;
+	} data = {
+		.module_idx = cpu_to_le32(module),
+		.level = level,
+	};
+
+	return __mt76_mcu_send_msg(&dev->mt76, MCU_EXT_CMD_FW_DBG_CTRL,
+				   &data, sizeof(data), false);
+}
+
 int mt7915_mcu_init(struct mt7915_dev *dev)
 {
 	static const struct mt76_mcu_ops mt7915_mcu_ops = {
@@ -2286,6 +2343,7 @@ int mt7915_mcu_init(struct mt7915_dev *dev)
 		return ret;
 
 	set_bit(MT76_STATE_MCU_RUNNING, &dev->mphy.state);
+	mt7915_mcu_fw_log_2_host(dev, 0);
 
 	return 0;
 }
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
index b8d28d971974..f68d2094ac6f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
@@ -38,6 +38,7 @@ enum {
 /* ext event table */
 enum {
 	MCU_EXT_EVENT_PS_SYNC = 0x5,
+	MCU_EXT_EVENT_FW_LOG_2_HOST = 0x13,
 	MCU_EXT_EVENT_THERMAL_PROTECT = 0x22,
 	MCU_EXT_EVENT_ASSERT_DUMP = 0x23,
 	MCU_EXT_EVENT_RDD_REPORT = 0x3a,
@@ -192,6 +193,7 @@ enum {
 	MCU_EXT_CMD_EFUSE_ACCESS = 0x01,
 	MCU_EXT_CMD_PM_STATE_CTRL = 0x07,
 	MCU_EXT_CMD_CHANNEL_SWITCH = 0x08,
+	MCU_EXT_CMD_FW_LOG_2_HOST = 0x13,
 	MCU_EXT_CMD_EFUSE_BUFFER_MODE = 0x21,
 	MCU_EXT_CMD_STA_REC_UPDATE = 0x25,
 	MCU_EXT_CMD_BSS_INFO_UPDATE = 0x26,
@@ -207,6 +209,7 @@ enum {
 	MCU_EXT_CMD_SET_SER_TRIGGER = 0x81,
 	MCU_EXT_CMD_SCS_CTRL = 0x82,
 	MCU_EXT_CMD_RATE_CTRL = 0x87,
+	MCU_EXT_CMD_FW_DBG_CTRL = 0x95,
 	MCU_EXT_CMD_SET_RDD_TH = 0x9d,
 };
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
index e5821d144c85..43c956bc09f5 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
@@ -324,6 +324,8 @@ int mt7915_mcu_get_rate_info(struct mt7915_dev *dev, u32 cmd, u16 wlan_idx);
 int mt7915_mcu_get_temperature(struct mt7915_dev *dev, int index);
 int mt7915_mcu_rdd_cmd(struct mt7915_dev *dev, enum mt7915_rdd_cmd cmd,
 		       u8 index, u8 rx_sel, u8 val);
+int mt7915_mcu_fw_log_2_host(struct mt7915_dev *dev, u8 ctrl);
+int mt7915_mcu_fw_dbg_ctrl(struct mt7915_dev *dev, u32 module, u8 level);
 void mt7915_mcu_rx_event(struct mt7915_dev *dev, struct sk_buff *skb);
 void mt7915_mcu_exit(struct mt7915_dev *dev);
 
-- 
cgit v1.2.3-59-g8ed1b


From bb3e3fec509e2b6103b8782b652f447a42212ae8 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Sat, 25 Apr 2020 03:32:39 +0800
Subject: mt76: set runtime stream caps by mt76_phy

This patch can support concurrent dual-band operation.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mac80211.c        | 16 ++++++++--------
 drivers/net/wireless/mediatek/mt76/mt76.h            |  2 +-
 drivers/net/wireless/mediatek/mt76/mt7615/init.c     |  4 ++--
 drivers/net/wireless/mediatek/mt76/mt7615/main.c     |  2 +-
 drivers/net/wireless/mediatek/mt76/mt76x2/pci_main.c |  2 +-
 drivers/net/wireless/mediatek/mt76/mt7915/init.c     |  4 ++--
 drivers/net/wireless/mediatek/mt76/mt7915/main.c     |  2 +-
 7 files changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c
index f65e1b3e8f7a..21407704f1b3 100644
--- a/drivers/net/wireless/mediatek/mt76/mac80211.c
+++ b/drivers/net/wireless/mediatek/mt76/mac80211.c
@@ -116,12 +116,12 @@ static void mt76_led_cleanup(struct mt76_dev *dev)
 	led_classdev_unregister(&dev->led_cdev);
 }
 
-static void mt76_init_stream_cap(struct mt76_dev *dev,
+static void mt76_init_stream_cap(struct mt76_phy *phy,
 				 struct ieee80211_supported_band *sband,
 				 bool vht)
 {
 	struct ieee80211_sta_ht_cap *ht_cap = &sband->ht_cap;
-	int i, nstream = hweight8(dev->phy.antenna_mask);
+	int i, nstream = hweight8(phy->antenna_mask);
 	struct ieee80211_sta_vht_cap *vht_cap;
 	u16 mcs_map = 0;
 
@@ -153,12 +153,12 @@ static void mt76_init_stream_cap(struct mt76_dev *dev,
 	vht_cap->vht_mcs.tx_mcs_map = cpu_to_le16(mcs_map);
 }
 
-void mt76_set_stream_caps(struct mt76_dev *dev, bool vht)
+void mt76_set_stream_caps(struct mt76_phy *phy, bool vht)
 {
-	if (dev->cap.has_2ghz)
-		mt76_init_stream_cap(dev, &dev->phy.sband_2g.sband, false);
-	if (dev->cap.has_5ghz)
-		mt76_init_stream_cap(dev, &dev->phy.sband_5g.sband, vht);
+	if (phy->dev->cap.has_2ghz)
+		mt76_init_stream_cap(phy, &phy->sband_2g.sband, false);
+	if (phy->dev->cap.has_5ghz)
+		mt76_init_stream_cap(phy, &phy->sband_5g.sband, vht);
 }
 EXPORT_SYMBOL_GPL(mt76_set_stream_caps);
 
@@ -199,7 +199,7 @@ mt76_init_sband(struct mt76_dev *dev, struct mt76_sband *msband,
 	ht_cap->mcs.tx_params = IEEE80211_HT_MCS_TX_DEFINED;
 	ht_cap->ampdu_factor = IEEE80211_HT_MAX_AMPDU_64K;
 
-	mt76_init_stream_cap(dev, sband, vht);
+	mt76_init_stream_cap(&dev->phy, sband, vht);
 
 	if (!vht)
 		return 0;
diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index 88959c1d5d1e..e6de4a1b8f26 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -821,7 +821,7 @@ void mt76_set_channel(struct mt76_phy *phy);
 void mt76_update_survey(struct mt76_dev *dev);
 int mt76_get_survey(struct ieee80211_hw *hw, int idx,
 		    struct survey_info *survey);
-void mt76_set_stream_caps(struct mt76_dev *dev, bool vht);
+void mt76_set_stream_caps(struct mt76_phy *phy, bool vht);
 
 int mt76_rx_aggr_start(struct mt76_dev *dev, struct mt76_wcid *wcid, u8 tid,
 		       u16 ssn, u16 size);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
index 37fc70197f92..6e1a17f08f5e 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
@@ -333,7 +333,7 @@ mt7615_cap_dbdc_enable(struct mt7615_dev *dev)
 	dev->phy.chainmask = dev->mphy.antenna_mask;
 	dev->mphy.hw->wiphy->available_antennas_rx = dev->phy.chainmask;
 	dev->mphy.hw->wiphy->available_antennas_tx = dev->phy.chainmask;
-	mt76_set_stream_caps(&dev->mt76, true);
+	mt76_set_stream_caps(&dev->mphy, true);
 }
 
 static void
@@ -346,7 +346,7 @@ mt7615_cap_dbdc_disable(struct mt7615_dev *dev)
 	dev->phy.chainmask = dev->chainmask;
 	dev->mphy.hw->wiphy->available_antennas_rx = dev->chainmask;
 	dev->mphy.hw->wiphy->available_antennas_tx = dev->chainmask;
-	mt76_set_stream_caps(&dev->mt76, true);
+	mt76_set_stream_caps(&dev->mphy, true);
 }
 
 int mt7615_register_ext_phy(struct mt7615_dev *dev)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
index cfe024b71677..402ff38f7dcf 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
@@ -784,7 +784,7 @@ mt7615_set_antenna(struct ieee80211_hw *hw, u32 tx_ant, u32 rx_ant)
 	}
 	phy->chainmask = tx_ant;
 
-	mt76_set_stream_caps(&dev->mt76, true);
+	mt76_set_stream_caps(phy->mt76, true);
 
 	mutex_unlock(&dev->mt76.mutex);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x2/pci_main.c b/drivers/net/wireless/mediatek/mt76/mt76x2/pci_main.c
index a74599f7f729..98f4cf398320 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x2/pci_main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x2/pci_main.c
@@ -119,7 +119,7 @@ static int mt76x2_set_antenna(struct ieee80211_hw *hw, u32 tx_ant,
 	dev->chainmask = (tx_ant == 3) ? 0x202 : 0x101;
 	dev->mphy.antenna_mask = tx_ant;
 
-	mt76_set_stream_caps(&dev->mt76, true);
+	mt76_set_stream_caps(&dev->mphy, true);
 	mt76x2_phy_set_antenna(dev);
 
 	mutex_unlock(&dev->mt76.mutex);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
index bb8325e2edbd..7d59571216e3 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
@@ -443,7 +443,7 @@ mt7915_cap_dbdc_enable(struct mt7915_dev *dev)
 	dev->mphy.hw->wiphy->available_antennas_rx = dev->phy.chainmask;
 	dev->mphy.hw->wiphy->available_antennas_tx = dev->phy.chainmask;
 
-	mt76_set_stream_caps(&dev->mt76, true);
+	mt76_set_stream_caps(&dev->mphy, true);
 	mt7915_set_stream_he_caps(&dev->phy);
 }
 
@@ -459,7 +459,7 @@ mt7915_cap_dbdc_disable(struct mt7915_dev *dev)
 	dev->mphy.hw->wiphy->available_antennas_rx = dev->chainmask;
 	dev->mphy.hw->wiphy->available_antennas_tx = dev->chainmask;
 
-	mt76_set_stream_caps(&dev->mt76, true);
+	mt76_set_stream_caps(&dev->mphy, true);
 	mt7915_set_stream_he_caps(&dev->phy);
 }
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/main.c b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
index ef0c91990b49..697045e65b24 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
@@ -741,7 +741,7 @@ mt7915_set_antenna(struct ieee80211_hw *hw, u32 tx_ant, u32 rx_ant)
 	}
 	phy->chainmask = tx_ant;
 
-	mt76_set_stream_caps(&dev->mt76, true);
+	mt76_set_stream_caps(phy->mt76, true);
 	mt7915_set_stream_he_caps(phy);
 
 	mutex_unlock(&dev->mt76.mutex);
-- 
cgit v1.2.3-59-g8ed1b


From 3fb31939b782cda8a9cd187bde60b867a5e13bf4 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Fri, 8 May 2020 18:26:29 +0200
Subject: mt76: mt7663u: copy key pointer in mt7663u_mac_write_txwi

Copy key pointer value before running mt76_tx_status_skb_add() in
mt7663u_mac_write_txwi since it will be overwritten setting
mt76_tx_cb for probing frames

Co-developed-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Johannes Berg <johannes@sipsolutions.net>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/usb.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/usb.c b/drivers/net/wireless/mediatek/mt76/mt7615/usb.c
index f2825b8f4539..06235f83f903 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/usb.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/usb.c
@@ -66,6 +66,7 @@ mt7663u_mac_write_txwi(struct mt7615_dev *dev, struct mt76_wcid *wcid,
 		       struct sk_buff *skb)
 {
 	struct ieee80211_tx_info *info = IEEE80211_SKB_CB(skb);
+	struct ieee80211_key_conf *key = info->control.hw_key;
 	__le32 *txwi;
 	int pid;
 
@@ -76,8 +77,7 @@ mt7663u_mac_write_txwi(struct mt7615_dev *dev, struct mt76_wcid *wcid,
 
 	txwi = (__le32 *)(skb->data - MT_USB_TXD_SIZE);
 	memset(txwi, 0, MT_USB_TXD_SIZE);
-	mt7615_mac_write_txwi(dev, txwi, skb, wcid, sta,
-			      pid, info->control.hw_key, false);
+	mt7615_mac_write_txwi(dev, txwi, skb, wcid, sta, pid, key, false);
 	skb_push(skb, MT_USB_TXD_SIZE);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 6e7b2ebac329d5cafc5868386e01627efcd95b32 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sat, 9 May 2020 23:38:35 +0200
Subject: mt76: mt7663u: add missing register definitions

Add missing PLE/PSE base register definitions for mt7663u

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/usb.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/usb.c b/drivers/net/wireless/mediatek/mt76/mt7615/usb.c
index 06235f83f903..01d60c02512e 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/usb.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/usb.c
@@ -23,6 +23,8 @@ static const u32 mt7663u_reg_map[] = {
 	[MT_CSR_BASE]		= 0x40000000,
 	[MT_EFUSE_ADDR_BASE]	= 0x78011000,
 	[MT_TOP_MISC_BASE]	= 0x81020000,
+	[MT_PLE_BASE]		= 0x82060000,
+	[MT_PSE_BASE]		= 0x82068000,
 	[MT_PHY_BASE]		= 0x82070000,
 	[MT_WTBL_BASE_ADDR]	= 0x820e0000,
 	[MT_CFG_BASE]		= 0x820f0000,
-- 
cgit v1.2.3-59-g8ed1b


From da9e36ca163166fc6e53a69866b85a4ed2a273f9 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sat, 9 May 2020 23:40:05 +0200
Subject: mt76: mt7615: usb: cancel ps work stopping the vif

Cancel possible power_save work before stopping the mt7663u interface

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/usb.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/usb.c b/drivers/net/wireless/mediatek/mt76/mt7615/usb.c
index 01d60c02512e..d74253319622 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/usb.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/usb.c
@@ -51,6 +51,7 @@ static void mt7663u_stop(struct ieee80211_hw *hw)
 	struct mt7615_dev *dev = hw->priv;
 
 	clear_bit(MT76_STATE_RUNNING, &dev->mphy.state);
+	cancel_work_sync(&phy->ps_work);
 	cancel_delayed_work_sync(&phy->scan_work);
 	cancel_delayed_work_sync(&phy->mac_work);
 	mt76u_stop_tx(&dev->mt76);
-- 
cgit v1.2.3-59-g8ed1b


From b0efe6dd21e6a7c7fc591d95cceb203a365ffc30 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Tue, 12 May 2020 00:06:32 +0800
Subject: mt76: mt7915: introduce mt7915_get_he_phy_cap

Add a helper to reduce duplicate codes.
This is a preliminary patch to add Tx beamforming support.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.c | 31 ++++++++++++++-----------
 1 file changed, 18 insertions(+), 13 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index 3e3d6f1d555a..c147a033250a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -134,6 +134,18 @@ static u8 mt7915_mcu_chan_bw(struct cfg80211_chan_def *chandef)
 	return width_to_bw[chandef->width];
 }
 
+static const struct ieee80211_sta_he_cap *
+mt7915_get_he_phy_cap(struct mt7915_phy *phy, struct ieee80211_vif *vif)
+{
+	struct ieee80211_supported_band *sband;
+	enum nl80211_band band;
+
+	band = phy->mt76->chandef.chan->band;
+	sband = phy->mt76->hw->wiphy->bands[band];
+
+	return ieee80211_get_he_iftype_cap(sband, vif->type);
+}
+
 static u8
 mt7915_get_phy_mode(struct mt7915_dev *dev, struct ieee80211_vif *vif,
 		    enum nl80211_band band, struct ieee80211_sta *sta)
@@ -149,11 +161,12 @@ mt7915_get_phy_mode(struct mt7915_dev *dev, struct ieee80211_vif *vif,
 		he_cap = &sta->he_cap;
 	} else {
 		struct ieee80211_supported_band *sband;
+		struct mt7915_phy *phy;
+		struct mt7915_vif *mvif;
 
-		if (band == NL80211_BAND_2GHZ)
-			sband = &dev->mphy.sband_2g.sband;
-		else
-			sband = &dev->mphy.sband_5g.sband;
+		mvif = (struct mt7915_vif *)vif->drv_priv;
+		phy = mvif->band_idx ? mt7915_ext_phy(dev) : &dev->phy;
+		sband = phy->mt76->hw->wiphy->bands[band];
 
 		ht_cap = &sband->ht_cap;
 		vht_cap = &sband->vht_cap;
@@ -884,19 +897,11 @@ mt7915_mcu_bss_he_tlv(struct sk_buff *skb, struct ieee80211_vif *vif,
 {
 #define DEFAULT_HE_PE_DURATION		4
 #define DEFAULT_HE_DURATION_RTS_THRES	1023
-	struct cfg80211_chan_def *chandef = &phy->mt76->chandef;
-	enum nl80211_band band = chandef->chan->band;
-	struct ieee80211_supported_band *sband;
 	const struct ieee80211_sta_he_cap *cap;
 	struct bss_info_he *he;
 	struct tlv *tlv;
 
-	if (band == NL80211_BAND_2GHZ)
-		sband = &phy->mt76->sband_2g.sband;
-	else
-		sband = &phy->mt76->sband_5g.sband;
-
-	cap = ieee80211_get_he_iftype_cap(sband, vif->type);
+	cap = mt7915_get_he_phy_cap(phy, vif);
 
 	tlv = mt7915_mcu_add_tlv(skb, BSS_INFO_HE_BASIC, sizeof(*he));
 
-- 
cgit v1.2.3-59-g8ed1b


From 89029a85482cbcf68026a89fc974e8f6898d6b37 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Tue, 12 May 2020 00:06:33 +0800
Subject: mt76: mt7915: add Tx beamformer support

Enable TxBF modules and trigger sounding process to support Tx beamformer.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Tested-by: Evelyn Tsai <evelyn.tsai@mediatek.com
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/init.c   |  20 ++
 drivers/net/wireless/mediatek/mt76/mt7915/main.c   |   9 +-
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.c    | 358 ++++++++++++++++++++-
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.h    |  78 +++--
 drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h |   4 +
 5 files changed, 434 insertions(+), 35 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
index 7d59571216e3..3721bd632b4d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
@@ -57,6 +57,25 @@ static void mt7915_mac_init(struct mt7915_dev *dev)
 	mt7915_mcu_set_rts_thresh(&dev->phy, 0x92b);
 }
 
+static int mt7915_txbf_init(struct mt7915_dev *dev)
+{
+	int ret;
+
+	/*
+	 * TODO: DBDC & check whether iBF phase calibration data has
+	 * been stored in eeprom offset 0x651~0x7b8, then write down
+	 * 0x1111 into 0x651 and 0x651 to trigger iBF.
+	 */
+
+	/* trigger sounding packets */
+	ret = mt7915_mcu_set_txbf_sounding(dev);
+	if (ret)
+		return ret;
+
+	/* enable iBF & eBF */
+	return mt7915_mcu_set_txbf_type(dev);
+}
+
 static void
 mt7915_init_txpower_band(struct mt7915_dev *dev,
 			 struct ieee80211_supported_band *sband)
@@ -97,6 +116,7 @@ static void mt7915_init_work(struct work_struct *work)
 	mt7915_mcu_set_eeprom(dev);
 	mt7915_mac_init(dev);
 	mt7915_init_txpower(dev);
+	mt7915_txbf_init(dev);
 }
 
 static int mt7915_init_hardware(struct mt7915_dev *dev)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/main.c b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
index 697045e65b24..f3a373fc07e5 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
@@ -485,7 +485,7 @@ int mt7915_mac_sta_add(struct mt76_dev *mdev, struct ieee80211_vif *vif,
 	struct mt7915_dev *dev = container_of(mdev, struct mt7915_dev, mt76);
 	struct mt7915_sta *msta = (struct mt7915_sta *)sta->drv_priv;
 	struct mt7915_vif *mvif = (struct mt7915_vif *)vif->drv_priv;
-	int idx;
+	int ret, idx;
 
 	idx = mt76_wcid_alloc(dev->mt76.wcid_mask, MT7915_WTBL_STA - 1);
 	if (idx < 0)
@@ -504,9 +504,11 @@ int mt7915_mac_sta_add(struct mt76_dev *mdev, struct ieee80211_vif *vif,
 	mt7915_mac_wtbl_update(dev, idx,
 			       MT_WTBL_UPDATE_ADM_COUNT_CLEAR);
 
-	mt7915_mcu_add_sta(dev, vif, sta, true);
+	ret = mt7915_mcu_add_sta(dev, vif, sta, true);
+	if (ret)
+		return ret;
 
-	return 0;
+	return mt7915_mcu_add_sta_adv(dev, vif, sta, true);
 }
 
 void mt7915_mac_sta_remove(struct mt76_dev *mdev, struct ieee80211_vif *vif,
@@ -515,6 +517,7 @@ void mt7915_mac_sta_remove(struct mt76_dev *mdev, struct ieee80211_vif *vif,
 	struct mt7915_dev *dev = container_of(mdev, struct mt7915_dev, mt76);
 	struct mt7915_sta *msta = (struct mt7915_sta *)sta->drv_priv;
 
+	mt7915_mcu_add_sta_adv(dev, vif, sta, false);
 	mt7915_mcu_add_sta(dev, vif, sta, false);
 
 	mt7915_mac_wtbl_update(dev, msta->wcid.idx,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index c147a033250a..58d0adca8b31 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -66,6 +66,8 @@ struct mt7915_fw_region {
 
 #define MCU_PATCH_ADDRESS		0x200000
 
+#define MT_STA_BFER			BIT(0)
+
 #define FW_FEATURE_SET_ENCRYPT		BIT(0)
 #define FW_FEATURE_SET_KEY_IDX		GENMASK(2, 1)
 #define FW_FEATURE_OVERRIDE_ADDR	BIT(5)
@@ -197,6 +199,21 @@ mt7915_get_phy_mode(struct mt7915_dev *dev, struct ieee80211_vif *vif,
 	return mode;
 }
 
+static u8
+mt7915_mcu_get_sta_nss(u16 mcs_map)
+{
+	u8 nss;
+
+	for (nss = 8; nss > 0; nss--) {
+		u8 nss_mcs = (mcs_map >> (2 * (nss - 1))) & 3;
+
+		if (nss_mcs != IEEE80211_VHT_MCS_NOT_SUPPORTED)
+			break;
+	}
+
+	return nss - 1;
+}
+
 static int __mt7915_mcu_msg_send(struct mt7915_dev *dev, struct sk_buff *skb,
 				 int cmd, int *wait_seq)
 {
@@ -1561,6 +1578,279 @@ int mt7915_mcu_add_smps(struct mt7915_dev *dev, struct ieee80211_vif *vif,
 				       MCU_EXT_CMD_STA_REC_UPDATE, true);
 }
 
+static void
+mt7915_mcu_sta_sounding_rate(struct sta_rec_bf *bf)
+{
+	bf->sounding_phy = MT_PHY_TYPE_OFDM;
+	bf->ndp_rate = 0;				/* mcs0 */
+	bf->ndpa_rate = MT7915_CFEND_RATE_DEFAULT;	/* ofdm 24m */
+	bf->rept_poll_rate = MT7915_CFEND_RATE_DEFAULT;	/* ofdm 24m */
+}
+
+static void
+mt7915_mcu_sta_bfer_ht(struct ieee80211_sta *sta, struct sta_rec_bf *bf)
+{
+	struct ieee80211_mcs_info *mcs = &sta->ht_cap.mcs;
+	u8 n = 0;
+
+	bf->tx_mode = MT_PHY_TYPE_HT;
+	bf->bf_cap |= MT_IBF;
+
+	if (mcs->tx_params & IEEE80211_HT_MCS_TX_RX_DIFF &&
+	    (mcs->tx_params & IEEE80211_HT_MCS_TX_DEFINED))
+		n = FIELD_GET(IEEE80211_HT_MCS_TX_MAX_STREAMS_MASK,
+			      mcs->tx_params);
+	else if (mcs->rx_mask[3])
+		n = 3;
+	else if (mcs->rx_mask[2])
+		n = 2;
+	else if (mcs->rx_mask[1])
+		n = 1;
+
+	bf->nc = min_t(u8, bf->nr, n);
+	bf->ibf_ncol = bf->nc;
+
+	if (sta->bandwidth <= IEEE80211_STA_RX_BW_40 && !bf->nc)
+		bf->ibf_timeout = 0x48;
+}
+
+static void
+mt7915_mcu_sta_bfer_vht(struct ieee80211_sta *sta, struct mt7915_phy *phy,
+			struct sta_rec_bf *bf)
+{
+	struct ieee80211_sta_vht_cap *pc = &sta->vht_cap;
+	struct ieee80211_sta_vht_cap *vc = &phy->mt76->sband_5g.sband.vht_cap;
+	u8 bfee_nr, bfer_nr, n, tx_ant = hweight8(phy->chainmask) - 1;
+	u16 mcs_map;
+
+	bf->tx_mode = MT_PHY_TYPE_VHT;
+	bf->bf_cap |= MT_EBF;
+
+	mt7915_mcu_sta_sounding_rate(bf);
+
+	bfee_nr = FIELD_GET(IEEE80211_VHT_CAP_BEAMFORMEE_STS_MASK,
+			    pc->cap);
+	bfer_nr = FIELD_GET(IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_MASK,
+			    vc->cap);
+	mcs_map = le16_to_cpu(pc->vht_mcs.rx_mcs_map);
+
+	n = min_t(u8, bfer_nr, bfee_nr);
+	bf->nr = min_t(u8, n, tx_ant);
+	n = mt7915_mcu_get_sta_nss(mcs_map);
+
+	bf->nc = min_t(u8, n, bf->nr);
+	bf->ibf_ncol = bf->nc;
+
+	/* force nr from 4 to 2 */
+	if (sta->bandwidth == IEEE80211_STA_RX_BW_160)
+		bf->nr = 1;
+}
+
+static void
+mt7915_mcu_sta_bfer_he(struct ieee80211_sta *sta, struct ieee80211_vif *vif,
+		       struct mt7915_phy *phy, struct sta_rec_bf *bf)
+{
+	struct ieee80211_sta_he_cap *pc = &sta->he_cap;
+	struct ieee80211_he_cap_elem *pe = &pc->he_cap_elem;
+	const struct ieee80211_he_cap_elem *ve;
+	const struct ieee80211_sta_he_cap *vc;
+	u8 bfee_nr, bfer_nr, nss_mcs;
+	u16 mcs_map;
+
+	vc = mt7915_get_he_phy_cap(phy, vif);
+	ve = &vc->he_cap_elem;
+
+	bf->tx_mode = MT_PHY_TYPE_HE_SU;
+	bf->bf_cap |= MT_EBF;
+
+	mt7915_mcu_sta_sounding_rate(bf);
+
+	bf->trigger_su = HE_PHY(CAP6_TRIG_SU_BEAMFORMER_FB,
+				pe->phy_cap_info[6]);
+	bf->trigger_mu = HE_PHY(CAP6_TRIG_MU_BEAMFORMER_FB,
+				pe->phy_cap_info[6]);
+	bfer_nr = HE_PHY(CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_MASK,
+			 ve->phy_cap_info[5]);
+	bfee_nr = HE_PHY(CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_MASK,
+			 pe->phy_cap_info[4]);
+
+	mcs_map = le16_to_cpu(pc->he_mcs_nss_supp.tx_mcs_80);
+	nss_mcs = mt7915_mcu_get_sta_nss(mcs_map);
+
+	bf->nr = min_t(u8, bfer_nr, bfee_nr);
+	bf->nc = min_t(u8, nss_mcs, bf->nr);
+	bf->ibf_ncol = bf->nc;
+
+	if (sta->bandwidth != IEEE80211_STA_RX_BW_160)
+		return;
+
+	/* go over for 160MHz and 80p80 */
+	if (pe->phy_cap_info[0] &
+	    IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G) {
+		mcs_map = le16_to_cpu(pc->he_mcs_nss_supp.rx_mcs_160);
+		nss_mcs = mt7915_mcu_get_sta_nss(mcs_map);
+
+		bf->nc_bw160 = nss_mcs;
+	}
+
+	if (pe->phy_cap_info[0] &
+	    IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G) {
+		mcs_map = le16_to_cpu(pc->he_mcs_nss_supp.rx_mcs_80p80);
+		nss_mcs = mt7915_mcu_get_sta_nss(mcs_map);
+
+		if (bf->nc_bw160)
+			bf->nc_bw160 = min_t(u8, bf->nc_bw160, nss_mcs);
+		else
+			bf->nc_bw160 = nss_mcs;
+	}
+
+	bfer_nr = HE_PHY(CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_MASK,
+			 ve->phy_cap_info[5]);
+	bfee_nr = HE_PHY(CAP4_BEAMFORMEE_MAX_STS_ABOVE_80MHZ_MASK,
+			 pe->phy_cap_info[4]);
+
+	bf->nr_bw160 = min_t(int, bfer_nr, bfee_nr);
+}
+
+static void
+mt7915_mcu_sta_bfer_tlv(struct sk_buff *skb, struct ieee80211_sta *sta,
+			struct ieee80211_vif *vif, struct mt7915_phy *phy,
+			bool enable)
+{
+	struct sta_rec_bf *bf;
+	struct tlv *tlv;
+	int tx_ant = hweight8(phy->chainmask) - 1;
+	const u8 matrix[4][4] = {
+		{0, 0, 0, 0},
+		{1, 1, 0, 0},	/* 2x1, 2x2, 2x3, 2x4 */
+		{2, 4, 4, 0},	/* 3x1, 3x2, 3x3, 3x4 */
+		{3, 5, 6, 0}	/* 4x1, 4x2, 4x3, 4x4 */
+	};
+
+#define MT_BFER_FREE		cpu_to_le16(GENMASK(15, 0))
+
+	tlv = mt7915_mcu_add_tlv(skb, STA_REC_BF, sizeof(*bf));
+	bf = (struct sta_rec_bf *)tlv;
+
+	if (!enable) {
+		bf->pfmu = MT_BFER_FREE;
+		return;
+	}
+
+	bf->bw = sta->bandwidth;
+	bf->ibf_dbw = sta->bandwidth;
+	bf->ibf_nrow = tx_ant;
+	bf->ibf_timeout = 0x18;
+
+	if (sta->he_cap.has_he)
+		mt7915_mcu_sta_bfer_he(sta, vif, phy, bf);
+	else if (sta->vht_cap.vht_supported)
+		mt7915_mcu_sta_bfer_vht(sta, phy, bf);
+	else if (sta->ht_cap.ht_supported)
+		mt7915_mcu_sta_bfer_ht(sta, bf);
+
+	if (bf->bf_cap & MT_EBF && bf->nr != tx_ant)
+		bf->mem_20m = matrix[tx_ant][bf->nc];
+	else
+		bf->mem_20m = matrix[bf->nr][bf->nc];
+
+	switch (sta->bandwidth) {
+	case IEEE80211_STA_RX_BW_160:
+	case IEEE80211_STA_RX_BW_80:
+		bf->mem_total = bf->mem_20m * 2;
+		break;
+	case IEEE80211_STA_RX_BW_40:
+		bf->mem_total = bf->mem_20m;
+		break;
+	case IEEE80211_STA_RX_BW_20:
+	default:
+		break;
+	}
+}
+
+static u8
+mt7915_mcu_sta_txbf_type(struct mt7915_phy *phy, struct ieee80211_vif *vif,
+			 struct ieee80211_sta *sta)
+{
+	struct mt7915_sta *msta;
+	u8 type = 0;
+
+	if (vif->type != NL80211_IFTYPE_STATION &&
+	    vif->type != NL80211_IFTYPE_AP)
+		return 0;
+
+	msta = (struct mt7915_sta *)sta->drv_priv;
+
+	if (sta->he_cap.has_he) {
+		struct ieee80211_he_cap_elem *pe;
+		const struct ieee80211_he_cap_elem *ve;
+		const struct ieee80211_sta_he_cap *vc;
+
+		pe = &sta->he_cap.he_cap_elem;
+		vc = mt7915_get_he_phy_cap(phy, vif);
+		ve = &vc->he_cap_elem;
+
+		if ((HE_PHY(CAP3_SU_BEAMFORMER, ve->phy_cap_info[3]) ||
+		     HE_PHY(CAP4_MU_BEAMFORMER, ve->phy_cap_info[4])) &&
+		    HE_PHY(CAP4_SU_BEAMFORMEE, pe->phy_cap_info[4]))
+			type |= MT_STA_BFER;
+	} else if (sta->vht_cap.vht_supported) {
+		struct ieee80211_sta_vht_cap *pc;
+		struct ieee80211_sta_vht_cap *vc;
+		u32 cr, ce;
+
+		pc = &sta->vht_cap;
+		vc = &phy->mt76->sband_5g.sband.vht_cap;
+		cr = IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE |
+		     IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE;
+		ce = IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE |
+		     IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE;
+
+		if ((vc->cap & cr) && (pc->cap & ce))
+			type |= MT_STA_BFER;
+	} else if (sta->ht_cap.ht_supported) {
+		/* TODO: iBF */
+	}
+
+	return type;
+}
+
+static int
+mt7915_mcu_add_txbf(struct mt7915_dev *dev, struct ieee80211_vif *vif,
+		    struct ieee80211_sta *sta, bool enable)
+{
+	struct mt7915_vif *mvif = (struct mt7915_vif *)vif->drv_priv;
+	struct mt7915_sta *msta = (struct mt7915_sta *)sta->drv_priv;
+	struct mt7915_phy *phy;
+	struct sk_buff *skb;
+	int r, len;
+	u8 type;
+
+	phy = mvif->band_idx ? mt7915_ext_phy(dev) : &dev->phy;
+
+	type = mt7915_mcu_sta_txbf_type(phy, vif, sta);
+
+	/* must keep each tag independent */
+
+	/* starec bf */
+	if (type & MT_STA_BFER) {
+		len = sizeof(struct sta_req_hdr) + sizeof(struct sta_rec_bf);
+
+		skb = mt7915_mcu_alloc_sta_req(dev, mvif, msta, len);
+		if (IS_ERR(skb))
+			return PTR_ERR(skb);
+
+		mt7915_mcu_sta_bfer_tlv(skb, sta, vif, phy, enable);
+
+		r = __mt76_mcu_skb_send_msg(&dev->mt76, skb,
+					    MCU_EXT_CMD_STA_REC_UPDATE, true);
+		if (r)
+			return r;
+	}
+
+	return 0;
+}
+
 static void
 mt7915_mcu_sta_rate_ctrl_tlv(struct sk_buff *skb, struct mt7915_dev *dev,
 			     struct ieee80211_vif *vif,
@@ -1724,6 +2014,25 @@ int mt7915_mcu_add_rate_ctrl(struct mt7915_dev *dev, struct ieee80211_vif *vif,
 				       MCU_EXT_CMD_STA_REC_UPDATE, true);
 }
 
+int mt7915_mcu_add_sta_adv(struct mt7915_dev *dev, struct ieee80211_vif *vif,
+			   struct ieee80211_sta *sta, bool enable)
+{
+	int ret;
+
+	if (!sta)
+		return 0;
+
+	/* must keep the order */
+	ret = mt7915_mcu_add_txbf(dev, vif, sta, enable);
+	if (ret)
+		return ret;
+
+	if (enable)
+		return mt7915_mcu_add_rate_ctrl(dev, vif, sta);
+
+	return 0;
+}
+
 int mt7915_mcu_add_sta(struct mt7915_dev *dev, struct ieee80211_vif *vif,
 		       struct ieee80211_sta *sta, bool enable)
 {
@@ -1732,7 +2041,6 @@ int mt7915_mcu_add_sta(struct mt7915_dev *dev, struct ieee80211_vif *vif,
 	struct mt7915_sta *msta;
 	struct tlv *sta_wtbl;
 	struct sk_buff *skb;
-	int ret;
 
 	msta = sta ? (struct mt7915_sta *)sta->drv_priv : &mvif->sta;
 
@@ -1755,15 +2063,8 @@ int mt7915_mcu_add_sta(struct mt7915_dev *dev, struct ieee80211_vif *vif,
 			mt7915_mcu_wtbl_ht_tlv(skb, sta, sta_wtbl, wtbl_hdr);
 	}
 
-	ret = __mt76_mcu_skb_send_msg(&dev->mt76, skb,
-				      MCU_EXT_CMD_STA_REC_UPDATE, true);
-	if (ret)
-		return ret;
-
-	if (enable && sta)
-		return mt7915_mcu_add_rate_ctrl(dev, vif, sta);
-
-	return 0;
+	return __mt76_mcu_skb_send_msg(&dev->mt76, skb,
+				       MCU_EXT_CMD_STA_REC_UPDATE, true);
 }
 
 int mt7915_mcu_set_fixed_rate(struct mt7915_dev *dev,
@@ -2767,3 +3068,40 @@ int mt7915_mcu_set_ser(struct mt7915_dev *dev, u8 action, u8 set, u8 band)
 	return __mt76_mcu_send_msg(&dev->mt76, MCU_EXT_CMD_SET_SER_TRIGGER,
 				   &req, sizeof(req), false);
 }
+
+int mt7915_mcu_set_txbf_type(struct mt7915_dev *dev)
+{
+#define MT_BF_TYPE_UPDATE		20
+	struct {
+		u8 action;
+		bool ebf;
+		bool ibf;
+		u8 rsv;
+	} __packed req = {
+		.action = MT_BF_TYPE_UPDATE,
+		.ebf = true,
+		.ibf = false,
+	};
+
+	return __mt76_mcu_send_msg(&dev->mt76, MCU_EXT_CMD_TXBF_ACTION,
+				   &req, sizeof(req), true);
+}
+
+int mt7915_mcu_set_txbf_sounding(struct mt7915_dev *dev)
+{
+#define MT_BF_PROCESSING		4
+	struct {
+		u8 action;
+		u8 snd_mode;
+		u8 sta_num;
+		u8 rsv;
+		u8 wlan_idx[4];
+		__le32 snd_period;	/* ms */
+	} __packed req = {
+		.action = true,
+		.snd_mode = MT_BF_PROCESSING,
+	};
+
+	return __mt76_mcu_send_msg(&dev->mt76, MCU_EXT_CMD_TXBF_ACTION,
+				   &req, sizeof(req), true);
+}
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
index f68d2094ac6f..53d1e1f0cea9 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
@@ -194,6 +194,7 @@ enum {
 	MCU_EXT_CMD_PM_STATE_CTRL = 0x07,
 	MCU_EXT_CMD_CHANNEL_SWITCH = 0x08,
 	MCU_EXT_CMD_FW_LOG_2_HOST = 0x13,
+	MCU_EXT_CMD_TXBF_ACTION = 0x1e,
 	MCU_EXT_CMD_EFUSE_BUFFER_MODE = 0x21,
 	MCU_EXT_CMD_STA_REC_UPDATE = 0x25,
 	MCU_EXT_CMD_BSS_INFO_UPDATE = 0x26,
@@ -571,18 +572,6 @@ struct wtbl_ba {
 	u8 rsv1[4];
 } __packed;
 
-struct wtbl_bf {
-	__le16 tag;
-	__le16 len;
-	u8 ibf;
-	u8 ebf;
-	u8 ibf_vht;
-	u8 ebf_vht;
-	u8 gid;
-	u8 pfmu_idx;
-	u8 rsv[2];
-} __packed;
-
 struct wtbl_smps {
 	__le16 tag;
 	__le16 len;
@@ -590,13 +579,6 @@ struct wtbl_smps {
 	u8 rsv[3];
 } __packed;
 
-struct wtbl_spe {
-	__le16 tag;
-	__le16 len;
-	u8 spe_idx;
-	u8 rsv[3];
-} __packed;
-
 enum {
 	WTBL_GENERIC,
 	WTBL_RX,
@@ -834,6 +816,55 @@ struct sta_rec_ra_fixed {
 #define RATE_CFG_LDPC			GENMASK(23, 20)
 #define RATE_CFG_PHY_TYPE		GENMASK(27, 24)
 
+struct sta_rec_bf {
+	__le16 tag;
+	__le16 len;
+
+	__le16 pfmu;		/* 0xffff: no access right for PFMU */
+	bool su_mu;		/* 0: SU, 1: MU */
+	u8 bf_cap;		/* 0: iBF, 1: eBF */
+	u8 sounding_phy;	/* 0: legacy, 1: OFDM, 2: HT, 4: VHT */
+	u8 ndpa_rate;
+	u8 ndp_rate;
+	u8 rept_poll_rate;
+	u8 tx_mode;		/* 0: legacy, 1: OFDM, 2: HT, 4: VHT ... */
+	u8 nc;
+	u8 nr;
+	u8 bw;			/* 0: 20M, 1: 40M, 2: 80M, 3: 160M */
+
+	u8 mem_total;
+	u8 mem_20m;
+	struct {
+		u8 row;
+		u8 col: 6, row_msb: 2;
+	} mem[4];
+
+	__le16 smart_ant;
+	u8 se_idx;
+	u8 auto_sounding;	/* b7: low traffic indicator
+				 * b6: Stop sounding for this entry
+				 * b5 ~ b0: postpone sounding
+				 */
+	u8 ibf_timeout;
+	u8 ibf_dbw;
+	u8 ibf_ncol;
+	u8 ibf_nrow;
+	u8 nr_bw160;
+	u8 nc_bw160;
+	u8 ru_start_idx;
+	u8 ru_end_idx;
+
+	bool trigger_su;
+	bool trigger_mu;
+	bool ng16_su;
+	bool ng16_mu;
+	bool codebook42_su;
+	bool codebook75_mu;
+
+	u8 he_ltf;
+	u8 rsv[2];
+} __packed;
+
 enum {
 	STA_REC_BASIC,
 	STA_REC_RA,
@@ -890,15 +921,18 @@ enum {
 	THERMAL_SENSOR_TASK_CTRL,
 };
 
+enum {
+	MT_EBF = BIT(0),	/* explicit beamforming */
+	MT_IBF = BIT(1)		/* implicit beamforming */
+};
+
 #define MT7915_WTBL_UPDATE_MAX_SIZE	(sizeof(struct wtbl_req_hdr) +	\
 					 sizeof(struct wtbl_generic) +	\
 					 sizeof(struct wtbl_rx) +	\
 					 sizeof(struct wtbl_ht) +	\
 					 sizeof(struct wtbl_vht) +	\
 					 sizeof(struct wtbl_ba) +	\
-					 sizeof(struct wtbl_bf) +	\
-					 sizeof(struct wtbl_smps) +	\
-					 sizeof(struct wtbl_spe))
+					 sizeof(struct wtbl_smps))
 
 #define MT7915_STA_UPDATE_MAX_SIZE	(sizeof(struct sta_req_hdr) +	\
 					 sizeof(struct sta_rec_basic) +	\
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
index 43c956bc09f5..0d5b448292b5 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
@@ -286,6 +286,8 @@ int mt7915_mcu_add_bss_info(struct mt7915_phy *phy,
 			    struct ieee80211_vif *vif, int enable);
 int mt7915_mcu_add_sta(struct mt7915_dev *dev, struct ieee80211_vif *vif,
 		       struct ieee80211_sta *sta, bool enable);
+int mt7915_mcu_add_sta_adv(struct mt7915_dev *dev, struct ieee80211_vif *vif,
+			   struct ieee80211_sta *sta, bool enable);
 int mt7915_mcu_add_tx_ba(struct mt7915_dev *dev,
 			 struct ieee80211_ampdu_params *params,
 			 bool add);
@@ -315,6 +317,8 @@ int mt7915_mcu_set_rts_thresh(struct mt7915_phy *phy, u32 val);
 int mt7915_mcu_set_pm(struct mt7915_dev *dev, int band, int enter);
 int mt7915_mcu_set_sku_en(struct mt7915_phy *phy, bool enable);
 int mt7915_mcu_set_sku(struct mt7915_phy *phy);
+int mt7915_mcu_set_txbf_type(struct mt7915_dev *dev);
+int mt7915_mcu_set_txbf_sounding(struct mt7915_dev *dev);
 int mt7915_mcu_set_fcc5_lpn(struct mt7915_dev *dev, int val);
 int mt7915_mcu_set_pulse_th(struct mt7915_dev *dev,
 			    const struct mt7915_dfs_pulse *pulse);
-- 
cgit v1.2.3-59-g8ed1b


From 2af34fa3b5a76ca3ed553550f93dbc793a2965cb Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Tue, 12 May 2020 00:06:34 +0800
Subject: mt76: mt7915: add Tx beamformee support

Enable beamformee support.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Tested-by: Evelyn Tsai <evelyn.tsai@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.c | 54 +++++++++++++++++++++++++
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.h |  9 +++++
 2 files changed, 63 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index 58d0adca8b31..52e349b17246 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -67,6 +67,7 @@ struct mt7915_fw_region {
 #define MCU_PATCH_ADDRESS		0x200000
 
 #define MT_STA_BFER			BIT(0)
+#define MT_STA_BFEE			BIT(1)
 
 #define FW_FEATURE_SET_ENCRYPT		BIT(0)
 #define FW_FEATURE_SET_KEY_IDX		GENMASK(2, 1)
@@ -1768,6 +1769,35 @@ mt7915_mcu_sta_bfer_tlv(struct sk_buff *skb, struct ieee80211_sta *sta,
 	}
 }
 
+static void
+mt7915_mcu_sta_bfee_tlv(struct sk_buff *skb, struct ieee80211_sta *sta,
+			struct mt7915_phy *phy)
+{
+	struct sta_rec_bfee *bfee;
+	struct tlv *tlv;
+	int tx_ant = hweight8(phy->chainmask) - 1;
+	u8 nr = 0;
+
+	tlv = mt7915_mcu_add_tlv(skb, STA_REC_BFEE, sizeof(*bfee));
+	bfee = (struct sta_rec_bfee *)tlv;
+
+	if (sta->he_cap.has_he) {
+		struct ieee80211_he_cap_elem *pe = &sta->he_cap.he_cap_elem;
+
+		nr = HE_PHY(CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_MASK,
+			    pe->phy_cap_info[5]);
+	} else if (sta->vht_cap.vht_supported) {
+		struct ieee80211_sta_vht_cap *pc = &sta->vht_cap;
+
+		nr = FIELD_GET(IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_MASK,
+			       pc->cap);
+	}
+
+	/* reply with identity matrix to avoid 2x2 BF negative gain */
+	if (nr == 1 && tx_ant == 2)
+		bfee->fb_identity_matrix = true;
+}
+
 static u8
 mt7915_mcu_sta_txbf_type(struct mt7915_phy *phy, struct ieee80211_vif *vif,
 			 struct ieee80211_sta *sta)
@@ -1790,6 +1820,11 @@ mt7915_mcu_sta_txbf_type(struct mt7915_phy *phy, struct ieee80211_vif *vif,
 		vc = mt7915_get_he_phy_cap(phy, vif);
 		ve = &vc->he_cap_elem;
 
+		if ((HE_PHY(CAP3_SU_BEAMFORMER, pe->phy_cap_info[3]) ||
+		     HE_PHY(CAP4_MU_BEAMFORMER, pe->phy_cap_info[4])) &&
+		    HE_PHY(CAP4_SU_BEAMFORMEE, ve->phy_cap_info[4]))
+			type |= MT_STA_BFEE;
+
 		if ((HE_PHY(CAP3_SU_BEAMFORMER, ve->phy_cap_info[3]) ||
 		     HE_PHY(CAP4_MU_BEAMFORMER, ve->phy_cap_info[4])) &&
 		    HE_PHY(CAP4_SU_BEAMFORMEE, pe->phy_cap_info[4]))
@@ -1806,6 +1841,9 @@ mt7915_mcu_sta_txbf_type(struct mt7915_phy *phy, struct ieee80211_vif *vif,
 		ce = IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE |
 		     IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE;
 
+		if ((pc->cap & cr) && (vc->cap & ce))
+			type |= MT_STA_BFEE;
+
 		if ((vc->cap & cr) && (pc->cap & ce))
 			type |= MT_STA_BFER;
 	} else if (sta->ht_cap.ht_supported) {
@@ -1848,6 +1886,22 @@ mt7915_mcu_add_txbf(struct mt7915_dev *dev, struct ieee80211_vif *vif,
 			return r;
 	}
 
+	/* starec bfee */
+	if (type & MT_STA_BFEE) {
+		len = sizeof(struct sta_req_hdr) + sizeof(struct sta_rec_bfee);
+
+		skb = mt7915_mcu_alloc_sta_req(dev, mvif, msta, len);
+		if (IS_ERR(skb))
+			return PTR_ERR(skb);
+
+		mt7915_mcu_sta_bfee_tlv(skb, sta, phy);
+
+		r = __mt76_mcu_skb_send_msg(&dev->mt76, skb,
+					    MCU_EXT_CMD_STA_REC_UPDATE, true);
+		if (r)
+			return r;
+	}
+
 	return 0;
 }
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
index 53d1e1f0cea9..cdeba5a0ef34 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
@@ -865,6 +865,14 @@ struct sta_rec_bf {
 	u8 rsv[2];
 } __packed;
 
+struct sta_rec_bfee {
+	__le16 tag;
+	__le16 len;
+	bool fb_identity_matrix;	/* 1: feedback identity matrix */
+	bool ignore_feedback;		/* 1: ignore */
+	u8 rsv[2];
+} __packed;
+
 enum {
 	STA_REC_BASIC,
 	STA_REC_RA,
@@ -886,6 +894,7 @@ enum {
 	STA_REC_KEY_V2,
 	STA_REC_MURU,
 	STA_REC_MUEDCA,
+	STA_REC_BFEE,
 	STA_REC_MAX_NUM
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From 00b2e16e006390069480e90478aa8b6e924996d7 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Tue, 12 May 2020 00:06:35 +0800
Subject: mt76: mt7915: add TxBF capabilities

This allows to set HE TxBF runtime stream capabilities

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Tested-by: Evelyn Tsai <evelyn.tsai@mediatek.com
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/init.c   | 95 +++++++++++++++++++++-
 drivers/net/wireless/mediatek/mt76/mt7915/main.c   |  1 +
 drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h |  1 +
 3 files changed, 95 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
index 3721bd632b4d..e2b0ea33053c 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
@@ -259,6 +259,94 @@ mt7915_init_wiphy(struct ieee80211_hw *hw)
 	hw->max_tx_fragments = 4;
 }
 
+void mt7915_set_stream_vht_txbf_caps(struct mt7915_phy *phy)
+{
+	int nss = hweight8(phy->chainmask);
+	u32 *cap = &phy->mt76->sband_5g.sband.vht_cap.cap;
+
+	*cap |= IEEE80211_VHT_CAP_SU_BEAMFORMEE_CAPABLE |
+		IEEE80211_VHT_CAP_MU_BEAMFORMEE_CAPABLE |
+		(3 << IEEE80211_VHT_CAP_BEAMFORMEE_STS_SHIFT);
+
+	*cap &= ~(IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_MASK |
+		  IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE |
+		  IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE);
+
+	if (nss < 2)
+		return;
+
+	*cap |= IEEE80211_VHT_CAP_SU_BEAMFORMER_CAPABLE |
+		IEEE80211_VHT_CAP_MU_BEAMFORMER_CAPABLE |
+		FIELD_PREP(IEEE80211_VHT_CAP_SOUNDING_DIMENSIONS_MASK,
+			   nss - 1);
+}
+
+static void
+mt7915_set_stream_he_txbf_caps(struct ieee80211_sta_he_cap *he_cap,
+			       int vif, int nss)
+{
+	struct ieee80211_he_cap_elem *elem = &he_cap->he_cap_elem;
+	struct ieee80211_he_mcs_nss_supp *mcs = &he_cap->he_mcs_nss_supp;
+	u8 c;
+
+#ifdef CONFIG_MAC80211_MESH
+	if (vif == NL80211_IFTYPE_MESH_POINT)
+		return;
+#endif
+
+	elem->phy_cap_info[3] &= ~IEEE80211_HE_PHY_CAP3_SU_BEAMFORMER;
+	elem->phy_cap_info[4] &= ~IEEE80211_HE_PHY_CAP4_MU_BEAMFORMER;
+
+	c = IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_UNDER_80MHZ_MASK |
+	    IEEE80211_HE_PHY_CAP5_BEAMFORMEE_NUM_SND_DIM_ABOVE_80MHZ_MASK;
+	elem->phy_cap_info[5] &= ~c;
+
+	c = IEEE80211_HE_PHY_CAP6_TRIG_SU_BEAMFORMER_FB |
+	    IEEE80211_HE_PHY_CAP6_TRIG_MU_BEAMFORMER_FB;
+	elem->phy_cap_info[6] &= ~c;
+
+	elem->phy_cap_info[7] &= ~IEEE80211_HE_PHY_CAP7_MAX_NC_MASK;
+
+	c = IEEE80211_HE_PHY_CAP2_NDP_4x_LTF_AND_3_2US |
+	    IEEE80211_HE_PHY_CAP2_UL_MU_FULL_MU_MIMO |
+	    IEEE80211_HE_PHY_CAP2_UL_MU_PARTIAL_MU_MIMO;
+	elem->phy_cap_info[2] |= c;
+
+	c = IEEE80211_HE_PHY_CAP4_SU_BEAMFORMEE |
+	    IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_4 |
+	    IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_ABOVE_80MHZ_4;
+	elem->phy_cap_info[4] |= c;
+
+	/* do not support NG16 due to spec D4.0 changes subcarrier idx */
+	c = IEEE80211_HE_PHY_CAP6_CODEBOOK_SIZE_42_SU |
+	    IEEE80211_HE_PHY_CAP6_CODEBOOK_SIZE_75_MU;
+
+	if (vif == NL80211_IFTYPE_STATION)
+		c |= IEEE80211_HE_PHY_CAP6_PARTIAL_BANDWIDTH_DL_MUMIMO;
+
+	elem->phy_cap_info[6] |= c;
+
+	if (nss < 2)
+		return;
+
+	if (vif != NL80211_IFTYPE_AP)
+		return;
+
+	elem->phy_cap_info[3] |= IEEE80211_HE_PHY_CAP3_SU_BEAMFORMER;
+	elem->phy_cap_info[4] |= IEEE80211_HE_PHY_CAP4_MU_BEAMFORMER;
+
+	/* num_snd_dim */
+	c = (nss - 1) | (max_t(int, mcs->tx_mcs_160, 1) << 3);
+	elem->phy_cap_info[5] |= c;
+
+	c = IEEE80211_HE_PHY_CAP6_TRIG_SU_BEAMFORMER_FB |
+	    IEEE80211_HE_PHY_CAP6_TRIG_MU_BEAMFORMER_FB;
+	elem->phy_cap_info[6] |= c;
+
+	/* the maximum cap is 4 x 3, (Nr, Nc) = (3, 2) */
+	elem->phy_cap_info[7] |= min_t(int, nss - 1, 2) << 3;
+}
+
 static void
 mt7915_gen_ppe_thresh(u8 *he_ppet)
 {
@@ -352,11 +440,10 @@ mt7915_init_he_caps(struct mt7915_phy *phy, enum nl80211_band band,
 		he_cap_elem->phy_cap_info[1] =
 			IEEE80211_HE_PHY_CAP1_LDPC_CODING_IN_PAYLOAD;
 		he_cap_elem->phy_cap_info[2] =
-			IEEE80211_HE_PHY_CAP2_NDP_4x_LTF_AND_3_2US |
 			IEEE80211_HE_PHY_CAP2_STBC_TX_UNDER_80MHZ |
 			IEEE80211_HE_PHY_CAP2_STBC_RX_UNDER_80MHZ;
 
-		/* TODO: TxBF & MU & MESH */
+		/* TODO: OFDMA */
 
 		switch (i) {
 		case NL80211_IFTYPE_AP:
@@ -407,6 +494,8 @@ mt7915_init_he_caps(struct mt7915_phy *phy, enum nl80211_band band,
 		he_mcs->rx_mcs_80p80 = cpu_to_le16(mcs_map);
 		he_mcs->tx_mcs_80p80 = cpu_to_le16(mcs_map);
 
+		mt7915_set_stream_he_txbf_caps(he_cap, i, nss);
+
 		memset(he_cap->ppe_thres, 0, sizeof(he_cap->ppe_thres));
 		if (he_cap_elem->phy_cap_info[6] &
 		    IEEE80211_HE_PHY_CAP6_PPE_THRESHOLD_PRESENT) {
@@ -464,6 +553,7 @@ mt7915_cap_dbdc_enable(struct mt7915_dev *dev)
 	dev->mphy.hw->wiphy->available_antennas_tx = dev->phy.chainmask;
 
 	mt76_set_stream_caps(&dev->mphy, true);
+	mt7915_set_stream_vht_txbf_caps(&dev->phy);
 	mt7915_set_stream_he_caps(&dev->phy);
 }
 
@@ -480,6 +570,7 @@ mt7915_cap_dbdc_disable(struct mt7915_dev *dev)
 	dev->mphy.hw->wiphy->available_antennas_tx = dev->chainmask;
 
 	mt76_set_stream_caps(&dev->mphy, true);
+	mt7915_set_stream_vht_txbf_caps(&dev->phy);
 	mt7915_set_stream_he_caps(&dev->phy);
 }
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/main.c b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
index f3a373fc07e5..147ab7da7aa9 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
@@ -745,6 +745,7 @@ mt7915_set_antenna(struct ieee80211_hw *hw, u32 tx_ant, u32 rx_ant)
 	phy->chainmask = tx_ant;
 
 	mt76_set_stream_caps(phy->mt76, true);
+	mt7915_set_stream_vht_txbf_caps(phy);
 	mt7915_set_stream_he_caps(phy);
 
 	mutex_unlock(&dev->mt76.mutex);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
index 0d5b448292b5..537fc126289f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
@@ -454,6 +454,7 @@ void mt7915_txp_skb_unmap(struct mt76_dev *dev,
 int mt76_dfs_start_rdd(struct mt7915_dev *dev, bool force);
 int mt7915_dfs_init_radar_detector(struct mt7915_phy *phy);
 void mt7915_set_stream_he_caps(struct mt7915_phy *phy);
+void mt7915_set_stream_vht_txbf_caps(struct mt7915_phy *phy);
 void mt7915_update_channel(struct mt76_dev *mdev);
 int mt7915_init_debugfs(struct mt7915_dev *dev);
 #ifdef CONFIG_MAC80211_DEBUGFS
-- 
cgit v1.2.3-59-g8ed1b


From babdad50f781c21c2e5511bf406dbb9728da05cb Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Tue, 12 May 2020 00:06:36 +0800
Subject: mt76: mt7915: add debugfs to track TxBF status

Add debug counters to track status of beamformer and beamformee.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 .../net/wireless/mediatek/mt76/mt7915/debugfs.c    | 45 ++++++++++++++++++++++
 drivers/net/wireless/mediatek/mt76/mt7915/regs.h   | 20 ++++++++++
 2 files changed, 65 insertions(+)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
index c6c009cd773e..ee0066fedd04 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
@@ -143,6 +143,48 @@ mt7915_ampdu_stat_read_phy(struct mt7915_phy *phy,
 	seq_printf(file, "BA miss count: %d\n", phy->mib.ba_miss_cnt);
 }
 
+static void
+mt7915_txbf_stat_read_phy(struct mt7915_phy *phy, struct seq_file *s)
+{
+	struct mt7915_dev *dev = s->private;
+	bool ext_phy = phy != &dev->phy;
+	int cnt;
+
+	if (!phy)
+		return;
+
+	/* Tx Beamformer monitor */
+	seq_puts(s, "\nTx Beamformer applied PPDU counts: ");
+
+	cnt = mt76_rr(dev, MT_ETBF_TX_APP_CNT(ext_phy));
+	seq_printf(s, "iBF: %ld, eBF: %ld\n",
+		   FIELD_GET(MT_ETBF_TX_IBF_CNT, cnt),
+		   FIELD_GET(MT_ETBF_TX_EBF_CNT, cnt));
+
+	/* Tx Beamformer Rx feedback monitor */
+	seq_puts(s, "Tx Beamformer Rx feedback statistics: ");
+
+	cnt = mt76_rr(dev, MT_ETBF_RX_FB_CNT(ext_phy));
+	seq_printf(s, "All: %ld, HE: %ld, VHT: %ld, HT: %ld\n",
+		   FIELD_GET(MT_ETBF_RX_FB_ALL, cnt),
+		   FIELD_GET(MT_ETBF_RX_FB_HE, cnt),
+		   FIELD_GET(MT_ETBF_RX_FB_VHT, cnt),
+		   FIELD_GET(MT_ETBF_RX_FB_HT, cnt));
+
+	/* Tx Beamformee Rx NDPA & Tx feedback report */
+	cnt = mt76_rr(dev, MT_ETBF_TX_NDP_BFRP(ext_phy));
+	seq_printf(s, "Tx Beamformee sucessful feedback frames: %ld\n",
+		   FIELD_GET(MT_ETBF_TX_FB_CPL, cnt));
+	seq_printf(s, "Tx Beamformee feedback triggerd counts: %ld\n",
+		   FIELD_GET(MT_ETBF_TX_FB_TRI, cnt));
+
+	/* Tx SU counters */
+	cnt = mt76_rr(dev, MT_MIB_DR11(ext_phy));
+	seq_printf(s, "Tx single-user sucessful MPDU counts: %d\n", cnt);
+
+	seq_puts(s, "\n");
+}
+
 static int
 mt7915_tx_stats_read(struct seq_file *file, void *data)
 {
@@ -150,7 +192,10 @@ mt7915_tx_stats_read(struct seq_file *file, void *data)
 	int stat[8], i, n;
 
 	mt7915_ampdu_stat_read_phy(&dev->phy, file);
+	mt7915_txbf_stat_read_phy(&dev->phy, file);
+
 	mt7915_ampdu_stat_read_phy(mt7915_ext_phy(dev), file);
+	mt7915_txbf_stat_read_phy(mt7915_ext_phy(dev), file);
 
 	/* Tx amsdu info */
 	seq_puts(file, "Tx MSDU stat:\n");
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/regs.h b/drivers/net/wireless/mediatek/mt76/mt7915/regs.h
index 6600fc625196..c121715f8bff 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/regs.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/regs.h
@@ -75,6 +75,24 @@
 #define MT_DMA_DCR0_MAX_RX_LEN		GENMASK(15, 3)
 #define MT_DMA_DCR0_RXD_G5_EN		BIT(23)
 
+/* ETBF: band 0(0x24000), band 1(0xa4000) */
+#define MT_WF_ETBF_BASE(_band)		((_band) ? 0xa4000 : 0x24000)
+#define MT_WF_ETBF(_band, ofs)		(MT_WF_ETBF_BASE(_band) + (ofs))
+
+#define MT_ETBF_TX_NDP_BFRP(_band)	MT_WF_ETBF(_band, 0x040)
+#define MT_ETBF_TX_FB_CPL		GENMASK(31, 16)
+#define MT_ETBF_TX_FB_TRI		GENMASK(15, 0)
+
+#define MT_ETBF_TX_APP_CNT(_band)	MT_WF_ETBF(_band, 0x0f0)
+#define MT_ETBF_TX_IBF_CNT		GENMASK(31, 16)
+#define MT_ETBF_TX_EBF_CNT		GENMASK(15, 0)
+
+#define MT_ETBF_RX_FB_CNT(_band)	MT_WF_ETBF(_band, 0x0f8)
+#define MT_ETBF_RX_FB_ALL		GENMASK(31, 24)
+#define MT_ETBF_RX_FB_HE		GENMASK(23, 16)
+#define MT_ETBF_RX_FB_VHT		GENMASK(15, 8)
+#define MT_ETBF_RX_FB_HT		GENMASK(7, 0)
+
 /* LPON: band 0(0x24200), band 1(0xa4200) */
 #define MT_WF_LPON_BASE(_band)		((_band) ? 0xa4200 : 0x24200)
 #define MT_WF_LPON(_band, ofs)		(MT_WF_LPON_BASE(_band) + (ofs))
@@ -104,6 +122,8 @@
 #define MT_MIB_SDR37(_band)		MT_WF_MIB(_band, 0x09c)
 #define MT_MIB_SDR37_RXTIME_MASK	GENMASK(23, 0)
 
+#define MT_MIB_DR11(_band)		MT_WF_MIB(_band, 0x0cc)
+
 #define MT_MIB_MB_SDR0(_band, n)	MT_WF_MIB(_band, 0x100 + ((n) << 4))
 #define MT_MIB_RTS_RETRIES_COUNT_MASK	GENMASK(31, 16)
 #define MT_MIB_RTS_COUNT_MASK		GENMASK(15, 0)
-- 
cgit v1.2.3-59-g8ed1b


From 3e68af622254bad75f5989c39663fd12a8efeddd Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Tue, 12 May 2020 00:06:37 +0800
Subject: mt76: mt7915: allocate proper size for tlv tags

Allocating proper memory size according to tlv usage.

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.c | 14 +++++++-------
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.h | 12 ++++++++++--
 2 files changed, 17 insertions(+), 9 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index 52e349b17246..f00ad2b66761 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -997,7 +997,7 @@ int mt7915_mcu_add_bss_info(struct mt7915_phy *phy,
 	struct sk_buff *skb;
 
 	skb = mt7915_mcu_alloc_sta_req(phy->dev, mvif, NULL,
-				       MT7915_STA_UPDATE_MAX_SIZE);
+				       MT7915_BSS_UPDATE_MAX_SIZE);
 	if (IS_ERR(skb))
 		return PTR_ERR(skb);
 
@@ -1092,10 +1092,10 @@ int mt7915_mcu_add_key(struct mt7915_dev *dev, struct ieee80211_vif *vif,
 {
 	struct mt7915_vif *mvif = (struct mt7915_vif *)vif->drv_priv;
 	struct sk_buff *skb;
+	int len = sizeof(struct sta_req_hdr) + sizeof(struct sta_rec_sec);
 	int ret;
 
-	skb = mt7915_mcu_alloc_sta_req(dev, mvif, msta,
-				       MT7915_STA_UPDATE_MAX_SIZE);
+	skb = mt7915_mcu_alloc_sta_req(dev, mvif, msta, len);
 	if (IS_ERR(skb))
 		return PTR_ERR(skb);
 
@@ -2056,9 +2056,9 @@ int mt7915_mcu_add_rate_ctrl(struct mt7915_dev *dev, struct ieee80211_vif *vif,
 	struct mt7915_vif *mvif = (struct mt7915_vif *)vif->drv_priv;
 	struct mt7915_sta *msta = (struct mt7915_sta *)sta->drv_priv;
 	struct sk_buff *skb;
+	int len = sizeof(struct sta_req_hdr) + sizeof(struct sta_rec_ra);
 
-	skb = mt7915_mcu_alloc_sta_req(dev, mvif, msta,
-				       MT7915_STA_UPDATE_MAX_SIZE);
+	skb = mt7915_mcu_alloc_sta_req(dev, mvif, msta, len);
 	if (IS_ERR(skb))
 		return PTR_ERR(skb);
 
@@ -2129,9 +2129,9 @@ int mt7915_mcu_set_fixed_rate(struct mt7915_dev *dev,
 	struct sta_rec_ra_fixed *ra;
 	struct sk_buff *skb;
 	struct tlv *tlv;
+	int len = sizeof(struct sta_req_hdr) + sizeof(*ra);
 
-	skb = mt7915_mcu_alloc_sta_req(dev, mvif, msta,
-				       MT7915_STA_UPDATE_MAX_SIZE);
+	skb = mt7915_mcu_alloc_sta_req(dev, mvif, msta, len);
 	if (IS_ERR(skb))
 		return PTR_ERR(skb);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
index cdeba5a0ef34..34ace6e672d0 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
@@ -951,13 +951,21 @@ enum {
 					 sizeof(struct sta_rec_vht) +	\
 					 sizeof(struct tlv) +		\
 					 sizeof(struct sta_rec_muru) +	\
-					 sizeof(struct sta_rec_sec) +	\
-					 sizeof(struct sta_rec_ra) +	\
 					 MT7915_WTBL_UPDATE_MAX_SIZE)
 
 #define MT7915_WTBL_UPDATE_BA_SIZE	(sizeof(struct wtbl_req_hdr) +	\
 					 sizeof(struct wtbl_ba))
 
+#define MT7915_BSS_UPDATE_MAX_SIZE	(sizeof(struct sta_req_hdr) +	\
+					 sizeof(struct bss_info_omac) +	\
+					 sizeof(struct bss_info_basic) +\
+					 sizeof(struct bss_info_rf_ch) +\
+					 sizeof(struct bss_info_ra) +	\
+					 sizeof(struct bss_info_he) +	\
+					 sizeof(struct bss_info_bmc_rate) +\
+					 sizeof(struct bss_info_ext_bss) +\
+					 sizeof(struct bss_info_sync_mode))
+
 #define MT7915_BEACON_UPDATE_SIZE	(sizeof(struct sta_req_hdr) +	\
 					 sizeof(struct bss_info_bcn_csa) + \
 					 sizeof(struct bss_info_bcn_bcc) + \
-- 
cgit v1.2.3-59-g8ed1b


From 57b9df6fa5f56b98baa73f62ed92db81db3de391 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Tue, 12 May 2020 00:06:38 +0800
Subject: mt76: mt7915: fix possible deadlock in mt7915_stop

make mac_work per phy instead of per device and fix a possible deadlock
in mt7915_stop since mt7915_mac_work runs holding mt76 mutex

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/init.c   |  3 +-
 drivers/net/wireless/mediatek/mt76/mt7915/mac.c    | 49 +++++++++++++---------
 drivers/net/wireless/mediatek/mt76/mt7915/main.c   | 18 ++++----
 drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h |  4 +-
 4 files changed, 42 insertions(+), 32 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
index e2b0ea33053c..6f200ab3ac28 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
@@ -592,6 +592,7 @@ int mt7915_register_ext_phy(struct mt7915_dev *dev)
 	if (phy)
 		return 0;
 
+	INIT_DELAYED_WORK(&phy->mac_work, mt7915_mac_work);
 	mt7915_cap_dbdc_enable(dev);
 	mphy = mt76_alloc_phy(&dev->mt76, sizeof(*phy), &mt7915_ops);
 	if (!mphy)
@@ -642,7 +643,7 @@ int mt7915_register_device(struct mt7915_dev *dev)
 	dev->phy.dev = dev;
 	dev->phy.mt76 = &dev->mt76.phy;
 	dev->mt76.phy.priv = &dev->phy;
-	INIT_DELAYED_WORK(&dev->mt76.mac_work, mt7915_mac_work);
+	INIT_DELAYED_WORK(&dev->phy.mac_work, mt7915_mac_work);
 	INIT_LIST_HEAD(&dev->sta_poll_list);
 	spin_lock_init(&dev->sta_poll_lock);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
index 751363b4b7a2..7ad7c2b7afdc 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
@@ -1156,26 +1156,32 @@ mt7915_dma_reset(struct mt7915_dev *dev)
 /* system error recovery */
 void mt7915_mac_reset_work(struct work_struct *work)
 {
+	struct mt7915_phy *phy2;
+	struct mt76_phy *ext_phy;
 	struct mt7915_dev *dev;
 
 	dev = container_of(work, struct mt7915_dev, reset_work);
+	ext_phy = dev->mt76.phy2;
+	phy2 = ext_phy ? ext_phy->priv : NULL;
 
 	if (!(READ_ONCE(dev->reset_state) & MT_MCU_CMD_STOP_DMA))
 		return;
 
 	ieee80211_stop_queues(mt76_hw(dev));
-	if (dev->mt76.phy2)
-		ieee80211_stop_queues(dev->mt76.phy2->hw);
+	if (ext_phy)
+		ieee80211_stop_queues(ext_phy->hw);
 
 	set_bit(MT76_RESET, &dev->mphy.state);
 	set_bit(MT76_MCU_RESET, &dev->mphy.state);
 	wake_up(&dev->mt76.mcu.wait);
-	cancel_delayed_work_sync(&dev->mt76.mac_work);
+	cancel_delayed_work_sync(&dev->phy.mac_work);
+	if (phy2)
+		cancel_delayed_work_sync(&phy2->mac_work);
 
 	/* lock/unlock all queues to ensure that no tx is pending */
 	mt76_txq_schedule_all(&dev->mphy);
-	if (dev->mt76.phy2)
-		mt76_txq_schedule_all(dev->mt76.phy2);
+	if (ext_phy)
+		mt76_txq_schedule_all(ext_phy);
 
 	tasklet_disable(&dev->mt76.tx_tasklet);
 	napi_disable(&dev->mt76.napi[0]);
@@ -1211,8 +1217,8 @@ void mt7915_mac_reset_work(struct work_struct *work)
 	napi_schedule(&dev->mt76.napi[2]);
 
 	ieee80211_wake_queues(mt76_hw(dev));
-	if (dev->mt76.phy2)
-		ieee80211_wake_queues(dev->mt76.phy2->hw);
+	if (ext_phy)
+		ieee80211_wake_queues(ext_phy->hw);
 
 	mt76_wr(dev, MT_MCU_INT_EVENT, MT_MCU_INT_EVENT_RESET_DONE);
 	mt7915_wait_reset_state(dev, MT_MCU_CMD_NORMAL_STATE);
@@ -1221,8 +1227,11 @@ void mt7915_mac_reset_work(struct work_struct *work)
 
 	mt7915_update_beacons(dev);
 
-	ieee80211_queue_delayed_work(mt76_hw(dev), &dev->mt76.mac_work,
+	ieee80211_queue_delayed_work(mt76_hw(dev), &dev->phy.mac_work,
 				     MT7915_WATCHDOG_TIME);
+	if (phy2)
+		ieee80211_queue_delayed_work(ext_phy->hw, &phy2->mac_work,
+					     MT7915_WATCHDOG_TIME);
 }
 
 static void
@@ -1307,25 +1316,25 @@ void mt7915_mac_sta_stats_work(struct work_struct *work)
 
 void mt7915_mac_work(struct work_struct *work)
 {
-	struct mt7915_dev *dev;
+	struct mt7915_phy *phy;
+	struct mt76_dev *mdev;
 
-	dev = (struct mt7915_dev *)container_of(work, struct mt76_dev,
+	phy = (struct mt7915_phy *)container_of(work, struct mt7915_phy,
 						mac_work.work);
+	mdev = &phy->dev->mt76;
 
-	mutex_lock(&dev->mt76.mutex);
-	mt76_update_survey(&dev->mt76);
-	if (++dev->mac_work_count == 5) {
-		struct mt7915_phy *ext_phy = mt7915_ext_phy(dev);
+	mutex_lock(&mdev->mutex);
 
-		mt7915_mac_update_mib_stats(&dev->phy);
-		if (ext_phy)
-			mt7915_mac_update_mib_stats(ext_phy);
+	mt76_update_survey(mdev);
+	if (++phy->mac_work_count == 5) {
+		phy->mac_work_count = 0;
 
-		dev->mac_work_count = 0;
+		mt7915_mac_update_mib_stats(phy);
 	}
-	mutex_unlock(&dev->mt76.mutex);
 
-	ieee80211_queue_delayed_work(mt76_hw(dev), &dev->mt76.mac_work,
+	mutex_unlock(&mdev->mutex);
+
+	ieee80211_queue_delayed_work(phy->mt76->hw, &phy->mac_work,
 				     MT7915_WATCHDOG_TIME);
 }
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/main.c b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
index 147ab7da7aa9..98567374c2c9 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
@@ -47,14 +47,12 @@ static int mt7915_start(struct ieee80211_hw *hw)
 
 	set_bit(MT76_STATE_RUNNING, &phy->mt76->state);
 
-	if (running)
-		goto out;
+	ieee80211_queue_delayed_work(hw, &phy->mac_work,
+				     MT7915_WATCHDOG_TIME);
 
-	mt7915_mac_reset_counters(phy);
+	if (!running)
+		mt7915_mac_reset_counters(phy);
 
-	ieee80211_queue_delayed_work(mt76_hw(dev), &dev->mt76.mac_work,
-				     MT7915_WATCHDOG_TIME);
-out:
 	mutex_unlock(&dev->mt76.mutex);
 
 	return 0;
@@ -65,6 +63,8 @@ static void mt7915_stop(struct ieee80211_hw *hw)
 	struct mt7915_dev *dev = mt7915_hw_dev(hw);
 	struct mt7915_phy *phy = mt7915_hw_phy(hw);
 
+	cancel_delayed_work_sync(&phy->mac_work);
+
 	mutex_lock(&dev->mt76.mutex);
 
 	clear_bit(MT76_STATE_RUNNING, &phy->mt76->state);
@@ -75,8 +75,6 @@ static void mt7915_stop(struct ieee80211_hw *hw)
 	}
 
 	if (!mt7915_dev_running(dev)) {
-		cancel_delayed_work_sync(&dev->mt76.mac_work);
-
 		mt7915_mcu_set_pm(dev, 0, 1);
 		mt7915_mcu_set_mac(dev, 0, false, false);
 	}
@@ -230,7 +228,7 @@ static int mt7915_set_channel(struct mt7915_phy *phy)
 	struct mt7915_dev *dev = phy->dev;
 	int ret;
 
-	cancel_delayed_work_sync(&dev->mt76.mac_work);
+	cancel_delayed_work_sync(&phy->mac_work);
 
 	mutex_lock(&dev->mt76.mutex);
 	set_bit(MT76_RESET, &phy->mt76->state);
@@ -254,7 +252,7 @@ out:
 	mutex_unlock(&dev->mt76.mutex);
 
 	mt76_txq_schedule_all(phy->mt76);
-	ieee80211_queue_delayed_work(mt76_hw(dev), &dev->mt76.mac_work,
+	ieee80211_queue_delayed_work(phy->mt76->hw, &phy->mac_work,
 				     MT7915_WATCHDOG_TIME);
 
 	return ret;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
index 537fc126289f..5392292a838e 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
@@ -141,6 +141,9 @@ struct mt7915_phy {
 	u32 ampdu_ref;
 
 	struct mib_stats mib;
+
+	struct delayed_work mac_work;
+	u8 mac_work_count;
 };
 
 struct mt7915_dev {
@@ -168,7 +171,6 @@ struct mt7915_dev {
 
 	s8 **rate_power; /* TODO: use mt76_rate_power */
 
-	u8 mac_work_count;
 	bool fw_debug;
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From 5b3f3f2a71ed1cecf6fcf9e8c858a89589415449 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Mon, 11 May 2020 17:59:27 -0700
Subject: ionic: support longer tx sg lists

The version 1 Tx queues can use longer SG lists than the
original version 0 queues, but we need to check to see if the
firmware supports the v1 Tx queues.  This implements the queue
type query for all queue types, and uses the information to
set up for using the longer Tx SG lists.

Because the Tx SG list can be longer, we need to limit the
max ring length to be sure we stay inside the boundaries of a
DMA allocation max size, so we lower the max Tx ring size.

The driver sets its highest known version in the Q_IDENTITY
command, and the FW returns the highest version that it knows,
bounded by the driver's version.  The negotiated version number
is later used in the Q_INIT commands.

Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_dev.c    |  14 +++
 drivers/net/ethernet/pensando/ionic/ionic_dev.h    |   7 +-
 .../net/ethernet/pensando/ionic/ionic_ethtool.c    |   4 +-
 drivers/net/ethernet/pensando/ionic/ionic_if.h     | 112 +++++++++++++++++---
 drivers/net/ethernet/pensando/ionic/ionic_lif.c    | 114 ++++++++++++++++++++-
 drivers/net/ethernet/pensando/ionic/ionic_lif.h    |  13 +++
 drivers/net/ethernet/pensando/ionic/ionic_main.c   |   2 +
 drivers/net/ethernet/pensando/ionic/ionic_txrx.c   |  27 +++--
 8 files changed, 263 insertions(+), 30 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.c b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
index f4ae40ae1e53..d83eff0ae0ac 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.c
@@ -388,6 +388,19 @@ int ionic_set_vf_config(struct ionic *ionic, int vf, u8 attr, u8 *data)
 }
 
 /* LIF commands */
+void ionic_dev_cmd_queue_identify(struct ionic_dev *idev,
+				  u16 lif_type, u8 qtype, u8 qver)
+{
+	union ionic_dev_cmd cmd = {
+		.q_identify.opcode = IONIC_CMD_Q_IDENTIFY,
+		.q_identify.lif_type = lif_type,
+		.q_identify.type = qtype,
+		.q_identify.ver = qver,
+	};
+
+	ionic_dev_cmd_go(idev, &cmd);
+}
+
 void ionic_dev_cmd_lif_identify(struct ionic_dev *idev, u8 type, u8 ver)
 {
 	union ionic_dev_cmd cmd = {
@@ -431,6 +444,7 @@ void ionic_dev_cmd_adminq_init(struct ionic_dev *idev, struct ionic_qcq *qcq,
 		.q_init.opcode = IONIC_CMD_Q_INIT,
 		.q_init.lif_index = cpu_to_le16(lif_index),
 		.q_init.type = q->type,
+		.q_init.ver = qcq->q.lif->qtype_info[q->type].version,
 		.q_init.index = cpu_to_le32(q->index),
 		.q_init.flags = cpu_to_le16(IONIC_QINIT_F_IRQ |
 					    IONIC_QINIT_F_ENA),
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
index 587398b01997..33519a8765eb 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
@@ -12,7 +12,8 @@
 
 #define IONIC_MIN_MTU			ETH_MIN_MTU
 #define IONIC_MAX_MTU			9194
-#define IONIC_MAX_TXRX_DESC		16384
+#define IONIC_MAX_TX_DESC		8192
+#define IONIC_MAX_RX_DESC		16384
 #define IONIC_MIN_TXRX_DESC		16
 #define IONIC_DEF_TXRX_DESC		4096
 #define IONIC_LIFS_MAX			1024
@@ -83,6 +84,8 @@ static_assert(sizeof(struct ionic_q_init_cmd) == 64);
 static_assert(sizeof(struct ionic_q_init_comp) == 16);
 static_assert(sizeof(struct ionic_q_control_cmd) == 64);
 static_assert(sizeof(ionic_q_control_comp) == 16);
+static_assert(sizeof(struct ionic_q_identify_cmd) == 64);
+static_assert(sizeof(struct ionic_q_identify_comp) == 16);
 
 static_assert(sizeof(struct ionic_rx_mode_set_cmd) == 64);
 static_assert(sizeof(ionic_rx_mode_set_comp) == 16);
@@ -283,6 +286,8 @@ void ionic_dev_cmd_port_fec(struct ionic_dev *idev, u8 fec_type);
 void ionic_dev_cmd_port_pause(struct ionic_dev *idev, u8 pause_type);
 
 int ionic_set_vf_config(struct ionic *ionic, int vf, u8 attr, u8 *data);
+void ionic_dev_cmd_queue_identify(struct ionic_dev *idev,
+				  u16 lif_type, u8 qtype, u8 qver);
 void ionic_dev_cmd_lif_identify(struct ionic_dev *idev, u8 type, u8 ver);
 void ionic_dev_cmd_lif_init(struct ionic_dev *idev, u16 lif_index,
 			    dma_addr_t addr);
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
index 6996229facfd..3f9a73aaef61 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
@@ -458,9 +458,9 @@ static void ionic_get_ringparam(struct net_device *netdev,
 {
 	struct ionic_lif *lif = netdev_priv(netdev);
 
-	ring->tx_max_pending = IONIC_MAX_TXRX_DESC;
+	ring->tx_max_pending = IONIC_MAX_TX_DESC;
 	ring->tx_pending = lif->ntxq_descs;
-	ring->rx_max_pending = IONIC_MAX_TXRX_DESC;
+	ring->rx_max_pending = IONIC_MAX_RX_DESC;
 	ring->rx_pending = lif->nrxq_descs;
 }
 
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_if.h b/drivers/net/ethernet/pensando/ionic/ionic_if.h
index ceeb7629e7a0..799f3ea599e9 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_if.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_if.h
@@ -40,6 +40,7 @@ enum ionic_cmd_opcode {
 	IONIC_CMD_RX_FILTER_DEL			= 32,
 
 	/* Queue commands */
+	IONIC_CMD_Q_IDENTIFY			= 39,
 	IONIC_CMD_Q_INIT			= 40,
 	IONIC_CMD_Q_CONTROL			= 41,
 
@@ -469,6 +470,66 @@ struct ionic_lif_init_comp {
 	u8 rsvd2[12];
 };
 
+ /**
+  * struct ionic_q_identify_cmd - queue identify command
+  * @opcode:     opcode
+  * @lif_type:   LIF type (enum ionic_lif_type)
+  * @type:       Logical queue type (enum ionic_logical_qtype)
+  * @ver:        Highest queue type version that the driver supports
+  */
+struct ionic_q_identify_cmd {
+	u8     opcode;
+	u8     rsvd;
+	__le16 lif_type;
+	u8     type;
+	u8     ver;
+	u8     rsvd2[58];
+};
+
+/**
+ * struct ionic_q_identify_comp - queue identify command completion
+ * @status:     Status of the command (enum ionic_status_code)
+ * @comp_index: Index in the descriptor ring for which this is the completion
+ * @ver:        Queue type version that can be used with FW
+ */
+struct ionic_q_identify_comp {
+	u8     status;
+	u8     rsvd;
+	__le16 comp_index;
+	u8     ver;
+	u8     rsvd2[11];
+};
+
+/**
+ * union ionic_q_identity - queue identity information
+ *     @version:        Queue type version that can be used with FW
+ *     @supported:      Bitfield of queue versions, first bit = ver 0
+ *     @features:       Queue features
+ *     @desc_sz:        Descriptor size
+ *     @comp_sz:        Completion descriptor size
+ *     @sg_desc_sz:     Scatter/Gather descriptor size
+ *     @max_sg_elems:   Maximum number of Scatter/Gather elements
+ *     @sg_desc_stride: Number of Scatter/Gather elements per descriptor
+ */
+union ionic_q_identity {
+	struct {
+		u8      version;
+		u8      supported;
+		u8      rsvd[6];
+#define IONIC_QIDENT_F_CQ	0x01	/* queue has completion ring */
+#define IONIC_QIDENT_F_SG	0x02	/* queue has scatter/gather ring */
+#define IONIC_QIDENT_F_EQ	0x04	/* queue can use event queue */
+#define IONIC_QIDENT_F_CMB	0x08	/* queue is in cmb bar */
+		__le64  features;
+		__le16  desc_sz;
+		__le16  comp_sz;
+		__le16  sg_desc_sz;
+		__le16  max_sg_elems;
+		__le16  sg_desc_stride;
+	};
+	__le32 words[478];
+};
+
 /**
  * struct ionic_q_init_cmd - Queue init command
  * @opcode:       opcode
@@ -733,20 +794,31 @@ static inline void decode_txq_desc_cmd(u64 cmd, u8 *opcode, u8 *flags,
 	*addr = (cmd >> IONIC_TXQ_DESC_ADDR_SHIFT) & IONIC_TXQ_DESC_ADDR_MASK;
 };
 
-#define IONIC_TX_MAX_SG_ELEMS	8
-#define IONIC_RX_MAX_SG_ELEMS	8
-
 /**
- * struct ionic_txq_sg_desc - Transmit scatter-gather (SG) list
+ * struct ionic_txq_sg_elem - Transmit scatter-gather (SG) descriptor element
  * @addr:      DMA address of SG element data buffer
  * @len:       Length of SG element data buffer, in bytes
  */
+struct ionic_txq_sg_elem {
+	__le64 addr;
+	__le16 len;
+	__le16 rsvd[3];
+};
+
+/**
+ * struct ionic_txq_sg_desc - Transmit scatter-gather (SG) list
+ * @elems:     Scatter-gather elements
+ */
 struct ionic_txq_sg_desc {
-	struct ionic_txq_sg_elem {
-		__le64 addr;
-		__le16 len;
-		__le16 rsvd[3];
-	} elems[IONIC_TX_MAX_SG_ELEMS];
+#define IONIC_TX_MAX_SG_ELEMS		8
+#define IONIC_TX_SG_DESC_STRIDE		8
+	struct ionic_txq_sg_elem elems[IONIC_TX_MAX_SG_ELEMS];
+};
+
+struct ionic_txq_sg_desc_v1 {
+#define IONIC_TX_MAX_SG_ELEMS_V1		15
+#define IONIC_TX_SG_DESC_STRIDE_V1		16
+	struct ionic_txq_sg_elem elems[IONIC_TX_SG_DESC_STRIDE_V1];
 };
 
 /**
@@ -791,16 +863,24 @@ struct ionic_rxq_desc {
 };
 
 /**
- * struct ionic_rxq_sg_desc - Receive scatter-gather (SG) list
+ * struct ionic_rxq_sg_desc - Receive scatter-gather (SG) descriptor element
  * @addr:      DMA address of SG element data buffer
  * @len:       Length of SG element data buffer, in bytes
  */
+struct ionic_rxq_sg_elem {
+	__le64 addr;
+	__le16 len;
+	__le16 rsvd[3];
+};
+
+/**
+ * struct ionic_rxq_sg_desc - Receive scatter-gather (SG) list
+ * @elems:     Scatter-gather elements
+ */
 struct ionic_rxq_sg_desc {
-	struct ionic_rxq_sg_elem {
-		__le64 addr;
-		__le16 len;
-		__le16 rsvd[3];
-	} elems[IONIC_RX_MAX_SG_ELEMS];
+#define IONIC_RX_MAX_SG_ELEMS		8
+#define IONIC_RX_SG_DESC_STRIDE		8
+	struct ionic_rxq_sg_elem elems[IONIC_RX_SG_DESC_STRIDE];
 };
 
 /**
@@ -2389,6 +2469,7 @@ union ionic_dev_cmd {
 	struct ionic_qos_init_cmd qos_init;
 	struct ionic_qos_reset_cmd qos_reset;
 
+	struct ionic_q_identify_cmd q_identify;
 	struct ionic_q_init_cmd q_init;
 };
 
@@ -2421,6 +2502,7 @@ union ionic_dev_cmd_comp {
 	ionic_qos_init_comp qos_init;
 	ionic_qos_reset_comp qos_reset;
 
+	struct ionic_q_identify_comp q_identify;
 	struct ionic_q_init_comp q_init;
 };
 
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index d5293bfded29..0049f537ee40 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -17,6 +17,16 @@
 #include "ionic_ethtool.h"
 #include "ionic_debugfs.h"
 
+/* queuetype support level */
+static const u8 ionic_qtype_versions[IONIC_QTYPE_MAX] = {
+	[IONIC_QTYPE_ADMINQ]  = 0,   /* 0 = Base version with CQ support */
+	[IONIC_QTYPE_NOTIFYQ] = 0,   /* 0 = Base version */
+	[IONIC_QTYPE_RXQ]     = 0,   /* 0 = Base version with CQ+SG support */
+	[IONIC_QTYPE_TXQ]     = 1,   /* 0 = Base version with CQ+SG support
+				      * 1 =   ... with Tx SG version 1
+				      */
+};
+
 static void ionic_lif_rx_mode(struct ionic_lif *lif, unsigned int rx_mode);
 static int ionic_lif_addr_add(struct ionic_lif *lif, const u8 *addr);
 static int ionic_lif_addr_del(struct ionic_lif *lif, const u8 *addr);
@@ -27,6 +37,7 @@ static void ionic_lif_set_netdev_info(struct ionic_lif *lif);
 
 static int ionic_start_queues(struct ionic_lif *lif);
 static void ionic_stop_queues(struct ionic_lif *lif);
+static void ionic_lif_queue_identify(struct ionic_lif *lif);
 
 static void ionic_lif_deferred_work(struct work_struct *work)
 {
@@ -597,6 +608,7 @@ static int ionic_lif_txq_init(struct ionic_lif *lif, struct ionic_qcq *qcq)
 			.opcode = IONIC_CMD_Q_INIT,
 			.lif_index = cpu_to_le16(lif->index),
 			.type = q->type,
+			.ver = lif->qtype_info[q->type].version,
 			.index = cpu_to_le32(q->index),
 			.flags = cpu_to_le16(IONIC_QINIT_F_IRQ |
 					     IONIC_QINIT_F_SG),
@@ -614,6 +626,8 @@ static int ionic_lif_txq_init(struct ionic_lif *lif, struct ionic_qcq *qcq)
 	dev_dbg(dev, "txq_init.index %d\n", ctx.cmd.q_init.index);
 	dev_dbg(dev, "txq_init.ring_base 0x%llx\n", ctx.cmd.q_init.ring_base);
 	dev_dbg(dev, "txq_init.ring_size %d\n", ctx.cmd.q_init.ring_size);
+	dev_dbg(dev, "txq_init.flags 0x%x\n", ctx.cmd.q_init.flags);
+	dev_dbg(dev, "txq_init.ver %d\n", ctx.cmd.q_init.ver);
 
 	q->tail = q->info;
 	q->head = q->tail;
@@ -646,6 +660,7 @@ static int ionic_lif_rxq_init(struct ionic_lif *lif, struct ionic_qcq *qcq)
 			.opcode = IONIC_CMD_Q_INIT,
 			.lif_index = cpu_to_le16(lif->index),
 			.type = q->type,
+			.ver = lif->qtype_info[q->type].version,
 			.index = cpu_to_le32(q->index),
 			.flags = cpu_to_le16(IONIC_QINIT_F_IRQ |
 					     IONIC_QINIT_F_SG),
@@ -663,6 +678,8 @@ static int ionic_lif_rxq_init(struct ionic_lif *lif, struct ionic_qcq *qcq)
 	dev_dbg(dev, "rxq_init.index %d\n", ctx.cmd.q_init.index);
 	dev_dbg(dev, "rxq_init.ring_base 0x%llx\n", ctx.cmd.q_init.ring_base);
 	dev_dbg(dev, "rxq_init.ring_size %d\n", ctx.cmd.q_init.ring_size);
+	dev_dbg(dev, "rxq_init.flags 0x%x\n", ctx.cmd.q_init.flags);
+	dev_dbg(dev, "rxq_init.ver %d\n", ctx.cmd.q_init.ver);
 
 	q->tail = q->info;
 	q->head = q->tail;
@@ -726,7 +743,7 @@ static bool ionic_notifyq_service(struct ionic_cq *cq,
 		}
 		break;
 	default:
-		netdev_warn(netdev, "Notifyq unknown event ecode=%d eid=%lld\n",
+		netdev_warn(netdev, "Notifyq event ecode=%d eid=%lld\n",
 			    comp->event.ecode, eid);
 		break;
 	}
@@ -1509,17 +1526,25 @@ static void ionic_txrx_free(struct ionic_lif *lif)
 
 static int ionic_txrx_alloc(struct ionic_lif *lif)
 {
+	unsigned int sg_desc_sz;
 	unsigned int flags;
 	unsigned int i;
 	int err = 0;
 
+	if (lif->qtype_info[IONIC_QTYPE_TXQ].version >= 1 &&
+	    lif->qtype_info[IONIC_QTYPE_TXQ].sg_desc_sz ==
+					  sizeof(struct ionic_txq_sg_desc_v1))
+		sg_desc_sz = sizeof(struct ionic_txq_sg_desc_v1);
+	else
+		sg_desc_sz = sizeof(struct ionic_txq_sg_desc);
+
 	flags = IONIC_QCQ_F_TX_STATS | IONIC_QCQ_F_SG;
 	for (i = 0; i < lif->nxqs; i++) {
 		err = ionic_qcq_alloc(lif, IONIC_QTYPE_TXQ, i, "tx", flags,
 				      lif->ntxq_descs,
 				      sizeof(struct ionic_txq_desc),
 				      sizeof(struct ionic_txq_comp),
-				      sizeof(struct ionic_txq_sg_desc),
+				      sg_desc_sz,
 				      lif->kern_pid, &lif->txqcqs[i].qcq);
 		if (err)
 			goto err_out;
@@ -2065,9 +2090,17 @@ int ionic_lifs_alloc(struct ionic *ionic)
 
 	/* only build the first lif, others are for later features */
 	set_bit(0, ionic->lifbits);
+
 	lif = ionic_lif_alloc(ionic, 0);
+	if (IS_ERR_OR_NULL(lif)) {
+		clear_bit(0, ionic->lifbits);
+		return -ENOMEM;
+	}
+
+	lif->lif_type = IONIC_LIF_TYPE_CLASSIC;
+	ionic_lif_queue_identify(lif);
 
-	return PTR_ERR_OR_ZERO(lif);
+	return 0;
 }
 
 static void ionic_lif_reset(struct ionic_lif *lif)
@@ -2291,6 +2324,7 @@ static int ionic_lif_notifyq_init(struct ionic_lif *lif)
 			.opcode = IONIC_CMD_Q_INIT,
 			.lif_index = cpu_to_le16(lif->index),
 			.type = q->type,
+			.ver = lif->qtype_info[q->type].version,
 			.index = cpu_to_le32(q->index),
 			.flags = cpu_to_le16(IONIC_QINIT_F_IRQ |
 					     IONIC_QINIT_F_ENA),
@@ -2573,6 +2607,80 @@ void ionic_lifs_unregister(struct ionic *ionic)
 		unregister_netdev(ionic->master_lif->netdev);
 }
 
+static void ionic_lif_queue_identify(struct ionic_lif *lif)
+{
+	struct ionic *ionic = lif->ionic;
+	union ionic_q_identity *q_ident;
+	struct ionic_dev *idev;
+	int qtype;
+	int err;
+
+	idev = &lif->ionic->idev;
+	q_ident = (union ionic_q_identity *)&idev->dev_cmd_regs->data;
+
+	for (qtype = 0; qtype < ARRAY_SIZE(ionic_qtype_versions); qtype++) {
+		struct ionic_qtype_info *qti = &lif->qtype_info[qtype];
+
+		/* filter out the ones we know about */
+		switch (qtype) {
+		case IONIC_QTYPE_ADMINQ:
+		case IONIC_QTYPE_NOTIFYQ:
+		case IONIC_QTYPE_RXQ:
+		case IONIC_QTYPE_TXQ:
+			break;
+		default:
+			continue;
+		}
+
+		memset(qti, 0, sizeof(*qti));
+
+		mutex_lock(&ionic->dev_cmd_lock);
+		ionic_dev_cmd_queue_identify(idev, lif->lif_type, qtype,
+					     ionic_qtype_versions[qtype]);
+		err = ionic_dev_cmd_wait(ionic, DEVCMD_TIMEOUT);
+		if (!err) {
+			qti->version   = q_ident->version;
+			qti->supported = q_ident->supported;
+			qti->features  = le64_to_cpu(q_ident->features);
+			qti->desc_sz   = le16_to_cpu(q_ident->desc_sz);
+			qti->comp_sz   = le16_to_cpu(q_ident->comp_sz);
+			qti->sg_desc_sz   = le16_to_cpu(q_ident->sg_desc_sz);
+			qti->max_sg_elems = le16_to_cpu(q_ident->max_sg_elems);
+			qti->sg_desc_stride = le16_to_cpu(q_ident->sg_desc_stride);
+		}
+		mutex_unlock(&ionic->dev_cmd_lock);
+
+		if (err == -EINVAL) {
+			dev_err(ionic->dev, "qtype %d not supported\n", qtype);
+			continue;
+		} else if (err == -EIO) {
+			dev_err(ionic->dev, "q_ident failed, not supported on older FW\n");
+			return;
+		} else if (err) {
+			dev_err(ionic->dev, "q_ident failed, qtype %d: %d\n",
+				qtype, err);
+			return;
+		}
+
+		dev_dbg(ionic->dev, " qtype[%d].version = %d\n",
+			qtype, qti->version);
+		dev_dbg(ionic->dev, " qtype[%d].supported = 0x%02x\n",
+			qtype, qti->supported);
+		dev_dbg(ionic->dev, " qtype[%d].features = 0x%04llx\n",
+			qtype, qti->features);
+		dev_dbg(ionic->dev, " qtype[%d].desc_sz = %d\n",
+			qtype, qti->desc_sz);
+		dev_dbg(ionic->dev, " qtype[%d].comp_sz = %d\n",
+			qtype, qti->comp_sz);
+		dev_dbg(ionic->dev, " qtype[%d].sg_desc_sz = %d\n",
+			qtype, qti->sg_desc_sz);
+		dev_dbg(ionic->dev, " qtype[%d].max_sg_elems = %d\n",
+			qtype, qti->max_sg_elems);
+		dev_dbg(ionic->dev, " qtype[%d].sg_desc_stride = %d\n",
+			qtype, qti->sg_desc_stride);
+	}
+}
+
 int ionic_lif_identify(struct ionic *ionic, u8 lif_type,
 		       union ionic_lif_identity *lid)
 {
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
index 5d4ffda5c05f..1a30f0fb20b9 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
@@ -133,6 +133,17 @@ enum ionic_lif_state_flags {
 	IONIC_LIF_F_STATE_SIZE
 };
 
+struct ionic_qtype_info {
+	u8  version;
+	u8  supported;
+	u64 features;
+	u16 desc_sz;
+	u16 comp_sz;
+	u16 sg_desc_sz;
+	u16 max_sg_elems;
+	u16 sg_desc_stride;
+};
+
 #define IONIC_LIF_NAME_MAX_SZ		32
 struct ionic_lif {
 	char name[IONIC_LIF_NAME_MAX_SZ];
@@ -161,11 +172,13 @@ struct ionic_lif {
 	bool mc_overflow;
 	unsigned int nmcast;
 	bool uc_overflow;
+	u16 lif_type;
 	unsigned int nucast;
 
 	struct ionic_lif_info *info;
 	dma_addr_t info_pa;
 	u32 info_sz;
+	struct ionic_qtype_info qtype_info[IONIC_QTYPE_MAX];
 
 	u16 rss_types;
 	u8 rss_hash_key[IONIC_RSS_HASH_KEY_SIZE];
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c
index 3ed150512091..8e2436d14621 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_main.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c
@@ -152,6 +152,8 @@ static const char *ionic_opcode_to_str(enum ionic_cmd_opcode opcode)
 		return "IONIC_CMD_RX_FILTER_ADD";
 	case IONIC_CMD_RX_FILTER_DEL:
 		return "IONIC_CMD_RX_FILTER_DEL";
+	case IONIC_CMD_Q_IDENTIFY:
+		return "IONIC_CMD_Q_IDENTIFY";
 	case IONIC_CMD_Q_INIT:
 		return "IONIC_CMD_Q_INIT";
 	case IONIC_CMD_Q_CONTROL:
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
index d233b6e77b1e..6b14e55a6780 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -10,8 +10,10 @@
 #include "ionic_lif.h"
 #include "ionic_txrx.h"
 
-static void ionic_rx_clean(struct ionic_queue *q, struct ionic_desc_info *desc_info,
-			   struct ionic_cq_info *cq_info, void *cb_arg);
+static void ionic_rx_clean(struct ionic_queue *q,
+			   struct ionic_desc_info *desc_info,
+			   struct ionic_cq_info *cq_info,
+			   void *cb_arg);
 
 static inline void ionic_txq_post(struct ionic_queue *q, bool ring_dbell,
 				  ionic_desc_cb cb_func, void *cb_arg)
@@ -140,8 +142,10 @@ static struct sk_buff *ionic_rx_copybreak(struct ionic_queue *q,
 	return skb;
 }
 
-static void ionic_rx_clean(struct ionic_queue *q, struct ionic_desc_info *desc_info,
-			   struct ionic_cq_info *cq_info, void *cb_arg)
+static void ionic_rx_clean(struct ionic_queue *q,
+			   struct ionic_desc_info *desc_info,
+			   struct ionic_cq_info *cq_info,
+			   void *cb_arg)
 {
 	struct ionic_rxq_comp *comp = cq_info->cq_desc;
 	struct ionic_qcq *qcq = q_to_qcq(q);
@@ -475,7 +479,8 @@ int ionic_rx_napi(struct napi_struct *napi, int budget)
 	return work_done;
 }
 
-static dma_addr_t ionic_tx_map_single(struct ionic_queue *q, void *data, size_t len)
+static dma_addr_t ionic_tx_map_single(struct ionic_queue *q,
+				      void *data, size_t len)
 {
 	struct ionic_tx_stats *stats = q_to_tx_stats(q);
 	struct device *dev = q->lif->ionic->dev;
@@ -491,7 +496,8 @@ static dma_addr_t ionic_tx_map_single(struct ionic_queue *q, void *data, size_t
 	return dma_addr;
 }
 
-static dma_addr_t ionic_tx_map_frag(struct ionic_queue *q, const skb_frag_t *frag,
+static dma_addr_t ionic_tx_map_frag(struct ionic_queue *q,
+				    const skb_frag_t *frag,
 				    size_t offset, size_t len)
 {
 	struct ionic_tx_stats *stats = q_to_tx_stats(q);
@@ -507,8 +513,10 @@ static dma_addr_t ionic_tx_map_frag(struct ionic_queue *q, const skb_frag_t *fra
 	return dma_addr;
 }
 
-static void ionic_tx_clean(struct ionic_queue *q, struct ionic_desc_info *desc_info,
-			   struct ionic_cq_info *cq_info, void *cb_arg)
+static void ionic_tx_clean(struct ionic_queue *q,
+			   struct ionic_desc_info *desc_info,
+			   struct ionic_cq_info *cq_info,
+			   void *cb_arg)
 {
 	struct ionic_txq_sg_desc *sg_desc = desc_info->sg_desc;
 	struct ionic_txq_sg_elem *elem = sg_desc->elems;
@@ -989,6 +997,7 @@ static int ionic_tx(struct ionic_queue *q, struct sk_buff *skb)
 
 static int ionic_tx_descs_needed(struct ionic_queue *q, struct sk_buff *skb)
 {
+	int sg_elems = q->lif->qtype_info[IONIC_QTYPE_TXQ].max_sg_elems;
 	struct ionic_tx_stats *stats = q_to_tx_stats(q);
 	int err;
 
@@ -997,7 +1006,7 @@ static int ionic_tx_descs_needed(struct ionic_queue *q, struct sk_buff *skb)
 		return (skb->len / skb_shinfo(skb)->gso_size) + 1;
 
 	/* If non-TSO, just need 1 desc and nr_frags sg elems */
-	if (skb_shinfo(skb)->nr_frags <= IONIC_TX_MAX_SG_ELEMS)
+	if (skb_shinfo(skb)->nr_frags <= sg_elems)
 		return 1;
 
 	/* Too many frags, so linearize */
-- 
cgit v1.2.3-59-g8ed1b


From c4e7a75a096c02035a102686e2569e7b0341a122 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Mon, 11 May 2020 17:59:28 -0700
Subject: ionic: updates to ionic FW api description

Lots of comment cleanup for better documentation, a few new
fields added, and a few minor mistakes fixed up.

Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_if.h | 979 +++++++++++++++----------
 1 file changed, 576 insertions(+), 403 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_if.h b/drivers/net/ethernet/pensando/ionic/ionic_if.h
index 799f3ea599e9..7e22ba4ed915 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_if.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_if.h
@@ -1,5 +1,5 @@
 /* SPDX-License-Identifier: (GPL-2.0 OR Linux-OpenIB) OR BSD-2-Clause */
-/* Copyright (c) 2017-2019 Pensando Systems, Inc.  All rights reserved. */
+/* Copyright (c) 2017-2020 Pensando Systems, Inc.  All rights reserved. */
 
 #ifndef _IONIC_IF_H_
 #define _IONIC_IF_H_
@@ -9,7 +9,7 @@
 #define IONIC_IFNAMSIZ				16
 
 /**
- * Commands
+ * enum ionic_cmd_opcode - Device commands
  */
 enum ionic_cmd_opcode {
 	IONIC_CMD_NOP				= 0,
@@ -58,6 +58,7 @@ enum ionic_cmd_opcode {
 	IONIC_CMD_QOS_CLASS_IDENTIFY		= 240,
 	IONIC_CMD_QOS_CLASS_INIT		= 241,
 	IONIC_CMD_QOS_CLASS_RESET		= 242,
+	IONIC_CMD_QOS_CLASS_UPDATE		= 243,
 
 	/* Firmware commands */
 	IONIC_CMD_FW_DOWNLOAD			= 254,
@@ -65,7 +66,7 @@ enum ionic_cmd_opcode {
 };
 
 /**
- * Command Return codes
+ * enum ionic_status_code - Device command return codes
  */
 enum ionic_status_code {
 	IONIC_RC_SUCCESS	= 0,	/* Success */
@@ -98,6 +99,7 @@ enum ionic_notifyq_opcode {
 	IONIC_EVENT_RESET		= 2,
 	IONIC_EVENT_HEARTBEAT		= 3,
 	IONIC_EVENT_LOG			= 4,
+	IONIC_EVENT_XCVR		= 5,
 };
 
 /**
@@ -115,12 +117,11 @@ struct ionic_admin_cmd {
 
 /**
  * struct ionic_admin_comp - General admin command completion format
- * @status:     The status of the command (enum status_code)
- * @comp_index: The index in the descriptor ring for which this
- *              is the completion.
- * @cmd_data:   Command-specific bytes.
- * @color:      Color bit.  (Always 0 for commands issued to the
- *              Device Cmd Registers.)
+ * @status:     Status of the command (enum ionic_status_code)
+ * @comp_index: Index in the descriptor ring for which this is the completion
+ * @cmd_data:   Command-specific bytes
+ * @color:      Color bit (Always 0 for commands issued to the
+ *              Device Cmd Registers)
  */
 struct ionic_admin_comp {
 	u8     status;
@@ -147,7 +148,7 @@ struct ionic_nop_cmd {
 
 /**
  * struct ionic_nop_comp - NOP command completion
- * @status: The status of the command (enum status_code)
+ * @status: Status of the command (enum ionic_status_code)
  */
 struct ionic_nop_comp {
 	u8 status;
@@ -157,7 +158,7 @@ struct ionic_nop_comp {
 /**
  * struct ionic_dev_init_cmd - Device init command
  * @opcode:    opcode
- * @type:      device type
+ * @type:      Device type
  */
 struct ionic_dev_init_cmd {
 	u8     opcode;
@@ -167,7 +168,7 @@ struct ionic_dev_init_cmd {
 
 /**
  * struct init_comp - Device init command completion
- * @status: The status of the command (enum status_code)
+ * @status: Status of the command (enum ionic_status_code)
  */
 struct ionic_dev_init_comp {
 	u8 status;
@@ -185,7 +186,7 @@ struct ionic_dev_reset_cmd {
 
 /**
  * struct reset_comp - Reset command completion
- * @status: The status of the command (enum status_code)
+ * @status: Status of the command (enum ionic_status_code)
  */
 struct ionic_dev_reset_comp {
 	u8 status;
@@ -206,8 +207,8 @@ struct ionic_dev_identify_cmd {
 };
 
 /**
- * struct dev_identify_comp - Driver/device identify command completion
- * @status: The status of the command (enum status_code)
+ * struct ionic_dev_identify_comp - Driver/device identify command completion
+ * @status: Status of the command (enum ionic_status_code)
  * @ver:    Version of identify returned by device
  */
 struct ionic_dev_identify_comp {
@@ -226,8 +227,8 @@ enum ionic_os_type {
 };
 
 /**
- * union drv_identity - driver identity information
- * @os_type:          OS type (see enum os_type)
+ * union ionic_drv_identity - driver identity information
+ * @os_type:          OS type (see enum ionic_os_type)
  * @os_dist:          OS distribution, numeric format
  * @os_dist_str:      OS distribution, string format
  * @kernel_ver:       Kernel version, numeric format
@@ -243,26 +244,26 @@ union ionic_drv_identity {
 		char   kernel_ver_str[32];
 		char   driver_ver_str[32];
 	};
-	__le32 words[512];
+	__le32 words[478];
 };
 
 /**
- * union dev_identity - device identity information
+ * union ionic_dev_identity - device identity information
  * @version:          Version of device identify
  * @type:             Identify type (0 for now)
  * @nports:           Number of ports provisioned
  * @nlifs:            Number of LIFs provisioned
  * @nintrs:           Number of interrupts provisioned
  * @ndbpgs_per_lif:   Number of doorbell pages per LIF
- * @intr_coal_mult:   Interrupt coalescing multiplication factor.
+ * @intr_coal_mult:   Interrupt coalescing multiplication factor
  *                    Scale user-supplied interrupt coalescing
  *                    value in usecs to device units using:
  *                    device units = usecs * mult / div
- * @intr_coal_div:    Interrupt coalescing division factor.
+ * @intr_coal_div:    Interrupt coalescing division factor
  *                    Scale user-supplied interrupt coalescing
  *                    value in usecs to device units using:
  *                    device units = usecs * mult / div
- *
+ * @eq_count:         Number of shared event queues
  */
 union ionic_dev_identity {
 	struct {
@@ -276,8 +277,9 @@ union ionic_dev_identity {
 		__le32 ndbpgs_per_lif;
 		__le32 intr_coal_mult;
 		__le32 intr_coal_div;
+		__le32 eq_count;
 	};
-	__le32 words[512];
+	__le32 words[478];
 };
 
 enum ionic_lif_type {
@@ -287,10 +289,10 @@ enum ionic_lif_type {
 };
 
 /**
- * struct ionic_lif_identify_cmd - lif identify command
+ * struct ionic_lif_identify_cmd - LIF identify command
  * @opcode:  opcode
- * @type:    lif type (enum lif_type)
- * @ver:     version of identify returned by device
+ * @type:    LIF type (enum ionic_lif_type)
+ * @ver:     Version of identify returned by device
  */
 struct ionic_lif_identify_cmd {
 	u8 opcode;
@@ -300,9 +302,9 @@ struct ionic_lif_identify_cmd {
 };
 
 /**
- * struct ionic_lif_identify_comp - lif identify command completion
- * @status:  status of the command (enum status_code)
- * @ver:     version of identify returned by device
+ * struct ionic_lif_identify_comp - LIF identify command completion
+ * @status:  Status of the command (enum ionic_status_code)
+ * @ver:     Version of identify returned by device
  */
 struct ionic_lif_identify_comp {
 	u8 status;
@@ -310,13 +312,24 @@ struct ionic_lif_identify_comp {
 	u8 rsvd2[14];
 };
 
+/**
+ * enum ionic_lif_capability - LIF capabilities
+ * @IONIC_LIF_CAP_ETH:     LIF supports Ethernet
+ * @IONIC_LIF_CAP_RDMA:    LIF support RDMA
+ */
 enum ionic_lif_capability {
 	IONIC_LIF_CAP_ETH        = BIT(0),
 	IONIC_LIF_CAP_RDMA       = BIT(1),
 };
 
 /**
- * Logical Queue Types
+ * enum ionic_logical_qtype - Logical Queue Types
+ * @IONIC_QTYPE_ADMINQ:    Administrative Queue
+ * @IONIC_QTYPE_NOTIFYQ:   Notify Queue
+ * @IONIC_QTYPE_RXQ:       Receive Queue
+ * @IONIC_QTYPE_TXQ:       Transmit Queue
+ * @IONIC_QTYPE_EQ:        Event Queue
+ * @IONIC_QTYPE_MAX:       Max queue type supported
  */
 enum ionic_logical_qtype {
 	IONIC_QTYPE_ADMINQ  = 0,
@@ -328,10 +341,10 @@ enum ionic_logical_qtype {
 };
 
 /**
- * struct ionic_lif_logical_qtype - Descriptor of logical to hardware queue type.
- * @qtype:          Hardware Queue Type.
- * @qid_count:      Number of Queue IDs of the logical type.
- * @qid_base:       Minimum Queue ID of the logical type.
+ * struct ionic_lif_logical_qtype - Descriptor of logical to HW queue type
+ * @qtype:          Hardware Queue Type
+ * @qid_count:      Number of Queue IDs of the logical type
+ * @qid_base:       Minimum Queue ID of the logical type
  */
 struct ionic_lif_logical_qtype {
 	u8     qtype;
@@ -340,6 +353,12 @@ struct ionic_lif_logical_qtype {
 	__le32 qid_base;
 };
 
+/**
+ * enum ionic_lif_state - LIF state
+ * @IONIC_LIF_DISABLE:     LIF disabled
+ * @IONIC_LIF_ENABLE:      LIF enabled
+ * @IONIC_LIF_HANG_RESET:  LIF hung, being reset
+ */
 enum ionic_lif_state {
 	IONIC_LIF_DISABLE	= 0,
 	IONIC_LIF_ENABLE	= 1,
@@ -347,13 +366,13 @@ enum ionic_lif_state {
 };
 
 /**
- * LIF configuration
- * @state:          lif state (enum lif_state)
- * @name:           lif name
- * @mtu:            mtu
- * @mac:            station mac address
- * @features:       features (enum ionic_eth_hw_features)
- * @queue_count:    queue counts per queue-type
+ * union ionic_lif_config - LIF configuration
+ * @state:          LIF state (enum ionic_lif_state)
+ * @name:           LIF name
+ * @mtu:            MTU
+ * @mac:            Station MAC address
+ * @features:       Features (enum ionic_eth_hw_features)
+ * @queue_count:    Queue counts per queue-type
  */
 union ionic_lif_config {
 	struct {
@@ -370,37 +389,36 @@ union ionic_lif_config {
 };
 
 /**
- * struct ionic_lif_identity - lif identity information (type-specific)
+ * struct ionic_lif_identity - LIF identity information (type-specific)
  *
- * @capabilities    LIF capabilities
+ * @capabilities:        LIF capabilities
  *
- * Ethernet:
- *     @version:          Ethernet identify structure version.
- *     @features:         Ethernet features supported on this lif type.
- *     @max_ucast_filters:  Number of perfect unicast addresses supported.
- *     @max_mcast_filters:  Number of perfect multicast addresses supported.
- *     @min_frame_size:   Minimum size of frames to be sent
- *     @max_frame_size:   Maximim size of frames to be sent
- *     @config:           LIF config struct with features, mtu, mac, q counts
+ * @eth:                    Ethernet identify structure
+ *     @version:            Ethernet identify structure version
+ *     @max_ucast_filters:  Number of perfect unicast addresses supported
+ *     @max_mcast_filters:  Number of perfect multicast addresses supported
+ *     @min_frame_size:     Minimum size of frames to be sent
+ *     @max_frame_size:     Maximim size of frames to be sent
+ *     @config:             LIF config struct with features, mtu, mac, q counts
  *
- * RDMA:
- *     @version:         RDMA version of opcodes and queue descriptors.
- *     @qp_opcodes:      Number of rdma queue pair opcodes supported.
- *     @admin_opcodes:   Number of rdma admin opcodes supported.
- *     @npts_per_lif:    Page table size per lif
- *     @nmrs_per_lif:    Number of memory regions per lif
- *     @nahs_per_lif:    Number of address handles per lif
- *     @max_stride:      Max work request stride.
- *     @cl_stride:       Cache line stride.
- *     @pte_stride:      Page table entry stride.
- *     @rrq_stride:      Remote RQ work request stride.
- *     @rsq_stride:      Remote SQ work request stride.
+ * @rdma:                RDMA identify structure
+ *     @version:         RDMA version of opcodes and queue descriptors
+ *     @qp_opcodes:      Number of RDMA queue pair opcodes supported
+ *     @admin_opcodes:   Number of RDMA admin opcodes supported
+ *     @npts_per_lif:    Page table size per LIF
+ *     @nmrs_per_lif:    Number of memory regions per LIF
+ *     @nahs_per_lif:    Number of address handles per LIF
+ *     @max_stride:      Max work request stride
+ *     @cl_stride:       Cache line stride
+ *     @pte_stride:      Page table entry stride
+ *     @rrq_stride:      Remote RQ work request stride
+ *     @rsq_stride:      Remote SQ work request stride
  *     @dcqcn_profiles:  Number of DCQCN profiles
- *     @aq_qtype:        RDMA Admin Qtype.
- *     @sq_qtype:        RDMA Send Qtype.
- *     @rq_qtype:        RDMA Receive Qtype.
- *     @cq_qtype:        RDMA Completion Qtype.
- *     @eq_qtype:        RDMA Event Qtype.
+ *     @aq_qtype:        RDMA Admin Qtype
+ *     @sq_qtype:        RDMA Send Qtype
+ *     @rq_qtype:        RDMA Receive Qtype
+ *     @cq_qtype:        RDMA Completion Qtype
+ *     @eq_qtype:        RDMA Event Qtype
  */
 union ionic_lif_identity {
 	struct {
@@ -440,15 +458,15 @@ union ionic_lif_identity {
 			struct ionic_lif_logical_qtype eq_qtype;
 		} __packed rdma;
 	} __packed;
-	__le32 words[512];
+	__le32 words[478];
 };
 
 /**
  * struct ionic_lif_init_cmd - LIF init command
- * @opcode:       opcode
- * @type:         LIF type (enum lif_type)
+ * @opcode:       Opcode
+ * @type:         LIF type (enum ionic_lif_type)
  * @index:        LIF index
- * @info_pa:      destination address for lif info (struct ionic_lif_info)
+ * @info_pa:      Destination address for LIF info (struct ionic_lif_info)
  */
 struct ionic_lif_init_cmd {
 	u8     opcode;
@@ -461,7 +479,8 @@ struct ionic_lif_init_cmd {
 
 /**
  * struct ionic_lif_init_comp - LIF init command completion
- * @status: The status of the command (enum status_code)
+ * @status:	Status of the command (enum ionic_status_code)
+ * @hw_index:	Hardware index of the initialized LIF
  */
 struct ionic_lif_init_comp {
 	u8 status;
@@ -534,10 +553,10 @@ union ionic_q_identity {
  * struct ionic_q_init_cmd - Queue init command
  * @opcode:       opcode
  * @type:         Logical queue type
- * @ver:          Queue version (defines opcode/descriptor scope)
+ * @ver:          Queue type version
  * @lif_index:    LIF index
- * @index:        (lif, qtype) relative admin queue index
- * @intr_index:   Interrupt control register index
+ * @index:        (LIF, qtype) relative admin queue index
+ * @intr_index:   Interrupt control register index, or Event queue index
  * @pid:          Process ID
  * @flags:
  *    IRQ:        Interrupt requested on completion
@@ -555,12 +574,11 @@ union ionic_q_identity {
  *                descriptors.  Values of ring_size <2 and >16 are
  *                reserved.
  *    EQ:         Enable the Event Queue
- * @cos:          Class of service for this queue.
+ * @cos:          Class of service for this queue
  * @ring_size:    Queue ring size, encoded as a log2(size)
  * @ring_base:    Queue ring base address
  * @cq_ring_base: Completion queue ring base address
  * @sg_ring_base: Scatter/Gather ring base address
- * @eq_index:	  Event queue index
  */
 struct ionic_q_init_cmd {
 	u8     opcode;
@@ -577,29 +595,27 @@ struct ionic_q_init_cmd {
 #define IONIC_QINIT_F_ENA	0x02	/* Enable the queue */
 #define IONIC_QINIT_F_SG	0x04	/* Enable scatter/gather on the queue */
 #define IONIC_QINIT_F_EQ	0x08	/* Enable event queue */
-#define IONIC_QINIT_F_DEBUG 0x80	/* Enable queue debugging */
+#define IONIC_QINIT_F_CMB	0x10	/* Enable cmb-based queue */
+#define IONIC_QINIT_F_DEBUG	0x80	/* Enable queue debugging */
 	u8     cos;
 	u8     ring_size;
 	__le64 ring_base;
 	__le64 cq_ring_base;
 	__le64 sg_ring_base;
-	__le32 eq_index;
-	u8     rsvd2[16];
+	u8     rsvd2[20];
 } __packed;
 
 /**
  * struct ionic_q_init_comp - Queue init command completion
- * @status:     The status of the command (enum status_code)
- * @ver:        Queue version (defines opcode/descriptor scope)
- * @comp_index: The index in the descriptor ring for which this
- *              is the completion.
+ * @status:     Status of the command (enum ionic_status_code)
+ * @comp_index: Index in the descriptor ring for which this is the completion
  * @hw_index:   Hardware Queue ID
  * @hw_type:    Hardware Queue type
  * @color:      Color
  */
 struct ionic_q_init_comp {
 	u8     status;
-	u8     ver;
+	u8     rsvd;
 	__le16 comp_index;
 	__le32 hw_index;
 	u8     hw_type;
@@ -620,10 +636,9 @@ enum ionic_txq_desc_opcode {
 
 /**
  * struct ionic_txq_desc - Ethernet Tx queue descriptor format
- * @opcode:       Tx operation, see TXQ_DESC_OPCODE_*:
+ * @cmd:          Tx operation, see IONIC_TXQ_DESC_OPCODE_*:
  *
  *                   IONIC_TXQ_DESC_OPCODE_CSUM_NONE:
- *
  *                      Non-offload send.  No segmentation,
  *                      fragmentation or checksum calc/insertion is
  *                      performed by device; packet is prepared
@@ -631,7 +646,6 @@ enum ionic_txq_desc_opcode {
  *                      no further manipulation from device.
  *
  *                   IONIC_TXQ_DESC_OPCODE_CSUM_PARTIAL:
- *
  *                      Offload 16-bit L4 checksum
  *                      calculation/insertion.  The device will
  *                      calculate the L4 checksum value and
@@ -640,14 +654,16 @@ enum ionic_txq_desc_opcode {
  *                      is calculated starting at @csum_start bytes
  *                      into the packet to the end of the packet.
  *                      The checksum insertion position is given
- *                      in @csum_offset.  This feature is only
- *                      applicable to protocols such as TCP, UDP
- *                      and ICMP where a standard (i.e. the
- *                      'IP-style' checksum) one's complement
- *                      16-bit checksum is used, using an IP
- *                      pseudo-header to seed the calculation.
- *                      Software will preload the L4 checksum
- *                      field with the IP pseudo-header checksum.
+ *                      in @csum_offset, which is the offset from
+ *                      @csum_start to the checksum field in the L4
+ *                      header.  This feature is only applicable to
+ *                      protocols such as TCP, UDP and ICMP where a
+ *                      standard (i.e. the 'IP-style' checksum)
+ *                      one's complement 16-bit checksum is used,
+ *                      using an IP pseudo-header to seed the
+ *                      calculation.  Software will preload the L4
+ *                      checksum field with the IP pseudo-header
+ *                      checksum.
  *
  *                      For tunnel encapsulation, @csum_start and
  *                      @csum_offset refer to the inner L4
@@ -663,7 +679,6 @@ enum ionic_txq_desc_opcode {
  *                      for more info).
  *
  *                   IONIC_TXQ_DESC_OPCODE_CSUM_HW:
- *
  *                      Offload 16-bit checksum computation to hardware.
  *                      If @csum_l3 is set then the packet's L3 checksum is
  *                      updated. Similarly, if @csum_l4 is set the the L4
@@ -671,7 +686,6 @@ enum ionic_txq_desc_opcode {
  *                      checksums are also updated.
  *
  *                   IONIC_TXQ_DESC_OPCODE_TSO:
- *
  *                      Device preforms TCP segmentation offload
  *                      (TSO).  @hdr_len is the number of bytes
  *                      to the end of TCP header (the offset to
@@ -698,40 +712,41 @@ enum ionic_txq_desc_opcode {
  *                      clear CWR in remaining segments.
  * @flags:
  *                vlan:
- *                    Insert an L2 VLAN header using @vlan_tci.
+ *                    Insert an L2 VLAN header using @vlan_tci
  *                encap:
- *                    Calculate encap header checksum.
+ *                    Calculate encap header checksum
  *                csum_l3:
- *                    Compute L3 header checksum.
+ *                    Compute L3 header checksum
  *                csum_l4:
- *                    Compute L4 header checksum.
+ *                    Compute L4 header checksum
  *                tso_sot:
  *                    TSO start
  *                tso_eot:
  *                    TSO end
  * @num_sg_elems: Number of scatter-gather elements in SG
  *                descriptor
- * @addr:         First data buffer's DMA address.
- *                (Subsequent data buffers are on txq_sg_desc).
+ * @addr:         First data buffer's DMA address
+ *                (Subsequent data buffers are on txq_sg_desc)
  * @len:          First data buffer's length, in bytes
  * @vlan_tci:     VLAN tag to insert in the packet (if requested
  *                by @V-bit).  Includes .1p and .1q tags
  * @hdr_len:      Length of packet headers, including
- *                encapsulating outer header, if applicable.
- *                Valid for opcodes TXQ_DESC_OPCODE_CALC_CSUM and
- *                TXQ_DESC_OPCODE_TSO.  Should be set to zero for
+ *                encapsulating outer header, if applicable
+ *                Valid for opcodes IONIC_TXQ_DESC_OPCODE_CALC_CSUM and
+ *                IONIC_TXQ_DESC_OPCODE_TSO.  Should be set to zero for
  *                all other modes.  For
- *                TXQ_DESC_OPCODE_CALC_CSUM, @hdr_len is length
+ *                IONIC_TXQ_DESC_OPCODE_CALC_CSUM, @hdr_len is length
  *                of headers up to inner-most L4 header.  For
- *                TXQ_DESC_OPCODE_TSO, @hdr_len is up to
+ *                IONIC_TXQ_DESC_OPCODE_TSO, @hdr_len is up to
  *                inner-most L4 payload, so inclusive of
  *                inner-most L4 header.
- * @mss:          Desired MSS value for TSO.  Only applicable for
- *                TXQ_DESC_OPCODE_TSO.
- * @csum_start:   Offset into inner-most L3 header of checksum
- * @csum_offset:  Offset into inner-most L4 header of checksum
+ * @mss:          Desired MSS value for TSO; only applicable for
+ *                IONIC_TXQ_DESC_OPCODE_TSO
+ * @csum_start:   Offset from packet to first byte checked in L4 checksum
+ * @csum_offset:  Offset from csum_start to L4 checksum field
  */
-
+struct ionic_txq_desc {
+	__le64  cmd;
 #define IONIC_TXQ_DESC_OPCODE_MASK		0xf
 #define IONIC_TXQ_DESC_OPCODE_SHIFT		4
 #define IONIC_TXQ_DESC_FLAGS_MASK		0xf
@@ -753,8 +768,6 @@ enum ionic_txq_desc_opcode {
 #define IONIC_TXQ_DESC_FLAG_TSO_SOT		0x4
 #define IONIC_TXQ_DESC_FLAG_TSO_EOT		0x8
 
-struct ionic_txq_desc {
-	__le64  cmd;
 	__le16  len;
 	union {
 		__le16  vlan_tci;
@@ -823,10 +836,9 @@ struct ionic_txq_sg_desc_v1 {
 
 /**
  * struct ionic_txq_comp - Ethernet transmit queue completion descriptor
- * @status:     The status of the command (enum status_code)
- * @comp_index: The index in the descriptor ring for which this
- *                 is the completion.
- * @color:      Color bit.
+ * @status:     Status of the command (enum ionic_status_code)
+ * @comp_index: Index in the descriptor ring for which this is the completion
+ * @color:      Color bit
  */
 struct ionic_txq_comp {
 	u8     status;
@@ -843,16 +855,15 @@ enum ionic_rxq_desc_opcode {
 
 /**
  * struct ionic_rxq_desc - Ethernet Rx queue descriptor format
- * @opcode:       Rx operation, see RXQ_DESC_OPCODE_*:
- *
- *                   RXQ_DESC_OPCODE_SIMPLE:
+ * @opcode:       Rx operation, see IONIC_RXQ_DESC_OPCODE_*:
  *
+ *                   IONIC_RXQ_DESC_OPCODE_SIMPLE:
  *                      Receive full packet into data buffer
  *                      starting at @addr.  Results of
  *                      receive, including actual bytes received,
  *                      are recorded in Rx completion descriptor.
  *
- * @len:          Data buffer's length, in bytes.
+ * @len:          Data buffer's length, in bytes
  * @addr:         Data buffer's DMA address
  */
 struct ionic_rxq_desc {
@@ -863,7 +874,7 @@ struct ionic_rxq_desc {
 };
 
 /**
- * struct ionic_rxq_sg_desc - Receive scatter-gather (SG) descriptor element
+ * struct ionic_rxq_sg_elem - Receive scatter-gather (SG) descriptor element
  * @addr:      DMA address of SG element data buffer
  * @len:       Length of SG element data buffer, in bytes
  */
@@ -885,12 +896,11 @@ struct ionic_rxq_sg_desc {
 
 /**
  * struct ionic_rxq_comp - Ethernet receive queue completion descriptor
- * @status:       The status of the command (enum status_code)
+ * @status:       Status of the command (enum ionic_status_code)
  * @num_sg_elems: Number of SG elements used by this descriptor
- * @comp_index:   The index in the descriptor ring for which this
- *                is the completion.
+ * @comp_index:   Index in the descriptor ring for which this is the completion
  * @rss_hash:     32-bit RSS hash
- * @csum:         16-bit sum of the packet's L2 payload.
+ * @csum:         16-bit sum of the packet's L2 payload
  *                If the packet's L2 payload is odd length, an extra
  *                zero-value byte is included in the @csum calculation but
  *                not included in @len.
@@ -898,33 +908,51 @@ struct ionic_rxq_sg_desc {
  *                set.  Includes .1p and .1q tags.
  * @len:          Received packet length, in bytes.  Excludes FCS.
  * @csum_calc     L2 payload checksum is computed or not
- * @csum_tcp_ok:  The TCP checksum calculated by the device
- *                matched the checksum in the receive packet's
- *                TCP header
- * @csum_tcp_bad: The TCP checksum calculated by the device did
- *                not match the checksum in the receive packet's
- *                TCP header.
- * @csum_udp_ok:  The UDP checksum calculated by the device
- *                matched the checksum in the receive packet's
- *                UDP header
- * @csum_udp_bad: The UDP checksum calculated by the device did
- *                not match the checksum in the receive packet's
- *                UDP header.
- * @csum_ip_ok:   The IPv4 checksum calculated by the device
- *                matched the checksum in the receive packet's
- *                first IPv4 header.  If the receive packet
- *                contains both a tunnel IPv4 header and a
- *                transport IPv4 header, the device validates the
- *                checksum for the both IPv4 headers.
- * @csum_ip_bad:  The IPv4 checksum calculated by the device did
- *                not match the checksum in the receive packet's
- *                first IPv4 header. If the receive packet
- *                contains both a tunnel IPv4 header and a
- *                transport IPv4 header, the device validates the
- *                checksum for both IP headers.
- * @VLAN:         VLAN header was stripped and placed in @vlan_tci.
- * @pkt_type:     Packet type
- * @color:        Color bit.
+ * @csum_flags:   See IONIC_RXQ_COMP_CSUM_F_*:
+ *
+ *                  IONIC_RXQ_COMP_CSUM_F_TCP_OK:
+ *                    The TCP checksum calculated by the device
+ *                    matched the checksum in the receive packet's
+ *                    TCP header.
+ *
+ *                  IONIC_RXQ_COMP_CSUM_F_TCP_BAD:
+ *                    The TCP checksum calculated by the device did
+ *                    not match the checksum in the receive packet's
+ *                    TCP header.
+ *
+ *                  IONIC_RXQ_COMP_CSUM_F_UDP_OK:
+ *                    The UDP checksum calculated by the device
+ *                    matched the checksum in the receive packet's
+ *                    UDP header
+ *
+ *                  IONIC_RXQ_COMP_CSUM_F_UDP_BAD:
+ *                    The UDP checksum calculated by the device did
+ *                    not match the checksum in the receive packet's
+ *                    UDP header.
+ *
+ *                  IONIC_RXQ_COMP_CSUM_F_IP_OK:
+ *                    The IPv4 checksum calculated by the device
+ *                    matched the checksum in the receive packet's
+ *                    first IPv4 header.  If the receive packet
+ *                    contains both a tunnel IPv4 header and a
+ *                    transport IPv4 header, the device validates the
+ *                    checksum for the both IPv4 headers.
+ *
+ *                  IONIC_RXQ_COMP_CSUM_F_IP_BAD:
+ *                    The IPv4 checksum calculated by the device did
+ *                    not match the checksum in the receive packet's
+ *                    first IPv4 header. If the receive packet
+ *                    contains both a tunnel IPv4 header and a
+ *                    transport IPv4 header, the device validates the
+ *                    checksum for both IP headers.
+ *
+ *                  IONIC_RXQ_COMP_CSUM_F_VLAN:
+ *                    The VLAN header was stripped and placed in @vlan_tci.
+ *
+ *                  IONIC_RXQ_COMP_CSUM_F_CALC:
+ *                    The checksum was calculated by the device.
+ *
+ * @pkt_type_color: Packet type and color bit; see IONIC_RXQ_COMP_PKT_TYPE_MASK
  */
 struct ionic_rxq_comp {
 	u8     status;
@@ -971,8 +999,8 @@ enum ionic_eth_hw_features {
 	IONIC_ETH_HW_TSO_ECN		= BIT(10),
 	IONIC_ETH_HW_TSO_GRE		= BIT(11),
 	IONIC_ETH_HW_TSO_GRE_CSUM	= BIT(12),
-	IONIC_ETH_HW_TSO_IPXIP4	= BIT(13),
-	IONIC_ETH_HW_TSO_IPXIP6	= BIT(14),
+	IONIC_ETH_HW_TSO_IPXIP4		= BIT(13),
+	IONIC_ETH_HW_TSO_IPXIP6		= BIT(14),
 	IONIC_ETH_HW_TSO_UDP		= BIT(15),
 	IONIC_ETH_HW_TSO_UDP_CSUM	= BIT(16),
 };
@@ -1003,7 +1031,10 @@ enum q_control_oper {
 };
 
 /**
- * Physical connection type
+ * enum ionic_phy_type - Physical connection type
+ * @IONIC_PHY_TYPE_NONE:    No PHY installed
+ * @IONIC_PHY_TYPE_COPPER:  Copper PHY
+ * @IONIC_PHY_TYPE_FIBER:   Fiber PHY
  */
 enum ionic_phy_type {
 	IONIC_PHY_TYPE_NONE	= 0,
@@ -1012,18 +1043,23 @@ enum ionic_phy_type {
 };
 
 /**
- * Transceiver status
+ * enum ionic_xcvr_state - Transceiver status
+ * @IONIC_XCVR_STATE_REMOVED:        Transceiver removed
+ * @IONIC_XCVR_STATE_INSERTED:       Transceiver inserted
+ * @IONIC_XCVR_STATE_PENDING:        Transceiver pending
+ * @IONIC_XCVR_STATE_SPROM_READ:     Transceiver data read
+ * @IONIC_XCVR_STATE_SPROM_READ_ERR: Transceiver data read error
  */
 enum ionic_xcvr_state {
 	IONIC_XCVR_STATE_REMOVED	 = 0,
 	IONIC_XCVR_STATE_INSERTED	 = 1,
 	IONIC_XCVR_STATE_PENDING	 = 2,
 	IONIC_XCVR_STATE_SPROM_READ	 = 3,
-	IONIC_XCVR_STATE_SPROM_READ_ERR  = 4,
+	IONIC_XCVR_STATE_SPROM_READ_ERR	 = 4,
 };
 
 /**
- * Supported link modes
+ * enum ionic_xcvr_pid - Supported link modes
  */
 enum ionic_xcvr_pid {
 	IONIC_XCVR_PID_UNKNOWN           = 0,
@@ -1057,64 +1093,83 @@ enum ionic_xcvr_pid {
 	IONIC_XCVR_PID_SFP_10GBASE_CU   = 68,
 	IONIC_XCVR_PID_QSFP_100G_CWDM4  = 69,
 	IONIC_XCVR_PID_QSFP_100G_PSM4   = 70,
+	IONIC_XCVR_PID_SFP_25GBASE_ACC  = 71,
 };
 
 /**
- * Port types
+ * enum ionic_port_type - Port types
+ * @IONIC_PORT_TYPE_NONE:           Port type not configured
+ * @IONIC_PORT_TYPE_ETH:            Port carries ethernet traffic (inband)
+ * @IONIC_PORT_TYPE_MGMT:           Port carries mgmt traffic (out-of-band)
  */
 enum ionic_port_type {
-	IONIC_PORT_TYPE_NONE = 0,  /* port type not configured */
-	IONIC_PORT_TYPE_ETH  = 1,  /* port carries ethernet traffic (inband) */
-	IONIC_PORT_TYPE_MGMT = 2,  /* port carries mgmt traffic (out-of-band) */
+	IONIC_PORT_TYPE_NONE = 0,
+	IONIC_PORT_TYPE_ETH  = 1,
+	IONIC_PORT_TYPE_MGMT = 2,
 };
 
 /**
- * Port config state
+ * enum ionic_port_admin_state - Port config state
+ * @IONIC_PORT_ADMIN_STATE_NONE:    Port admin state not configured
+ * @IONIC_PORT_ADMIN_STATE_DOWN:    Port admin disabled
+ * @IONIC_PORT_ADMIN_STATE_UP:      Port admin enabled
  */
 enum ionic_port_admin_state {
-	IONIC_PORT_ADMIN_STATE_NONE = 0,   /* port admin state not configured */
-	IONIC_PORT_ADMIN_STATE_DOWN = 1,   /* port is admin disabled */
-	IONIC_PORT_ADMIN_STATE_UP   = 2,   /* port is admin enabled */
+	IONIC_PORT_ADMIN_STATE_NONE = 0,
+	IONIC_PORT_ADMIN_STATE_DOWN = 1,
+	IONIC_PORT_ADMIN_STATE_UP   = 2,
 };
 
 /**
- * Port operational status
+ * enum ionic_port_oper_status - Port operational status
+ * @IONIC_PORT_OPER_STATUS_NONE:    Port disabled
+ * @IONIC_PORT_OPER_STATUS_UP:      Port link status up
+ * @IONIC_PORT_OPER_STATUS_DOWN:    Port link status down
  */
 enum ionic_port_oper_status {
-	IONIC_PORT_OPER_STATUS_NONE  = 0,	/* port is disabled */
-	IONIC_PORT_OPER_STATUS_UP    = 1,	/* port is linked up */
-	IONIC_PORT_OPER_STATUS_DOWN  = 2,	/* port link status is down */
+	IONIC_PORT_OPER_STATUS_NONE  = 0,
+	IONIC_PORT_OPER_STATUS_UP    = 1,
+	IONIC_PORT_OPER_STATUS_DOWN  = 2,
 };
 
 /**
- * Ethernet Forward error correction (fec) modes
+ * enum ionic_port_fec_type - Ethernet Forward error correction (FEC) modes
+ * @IONIC_PORT_FEC_TYPE_NONE:       FEC Disabled
+ * @IONIC_PORT_FEC_TYPE_FC:         FireCode FEC
+ * @IONIC_PORT_FEC_TYPE_RS:         ReedSolomon FEC
  */
 enum ionic_port_fec_type {
-	IONIC_PORT_FEC_TYPE_NONE = 0,		/* Disabled */
-	IONIC_PORT_FEC_TYPE_FC   = 1,		/* FireCode */
-	IONIC_PORT_FEC_TYPE_RS   = 2,		/* ReedSolomon */
+	IONIC_PORT_FEC_TYPE_NONE = 0,
+	IONIC_PORT_FEC_TYPE_FC   = 1,
+	IONIC_PORT_FEC_TYPE_RS   = 2,
 };
 
 /**
- * Ethernet pause (flow control) modes
+ * enum ionic_port_pause_type - Ethernet pause (flow control) modes
+ * @IONIC_PORT_PAUSE_TYPE_NONE:     Disable Pause
+ * @IONIC_PORT_PAUSE_TYPE_LINK:     Link level pause
+ * @IONIC_PORT_PAUSE_TYPE_PFC:      Priority-Flow Control
  */
 enum ionic_port_pause_type {
-	IONIC_PORT_PAUSE_TYPE_NONE = 0,	/* Disable Pause */
-	IONIC_PORT_PAUSE_TYPE_LINK = 1,	/* Link level pause */
-	IONIC_PORT_PAUSE_TYPE_PFC  = 2,	/* Priority-Flow control */
+	IONIC_PORT_PAUSE_TYPE_NONE = 0,
+	IONIC_PORT_PAUSE_TYPE_LINK = 1,
+	IONIC_PORT_PAUSE_TYPE_PFC  = 2,
 };
 
 /**
- * Loopback modes
+ * enum ionic_port_loopback_mode - Loopback modes
+ * @IONIC_PORT_LOOPBACK_MODE_NONE:  Disable loopback
+ * @IONIC_PORT_LOOPBACK_MODE_MAC:   MAC loopback
+ * @IONIC_PORT_LOOPBACK_MODE_PHY:   PHY/SerDes loopback
  */
 enum ionic_port_loopback_mode {
-	IONIC_PORT_LOOPBACK_MODE_NONE = 0,	/* Disable loopback */
-	IONIC_PORT_LOOPBACK_MODE_MAC  = 1,	/* MAC loopback */
-	IONIC_PORT_LOOPBACK_MODE_PHY  = 2,	/* PHY/Serdes loopback */
+	IONIC_PORT_LOOPBACK_MODE_NONE = 0,
+	IONIC_PORT_LOOPBACK_MODE_MAC  = 1,
+	IONIC_PORT_LOOPBACK_MODE_PHY  = 2,
 };
 
 /**
- * Transceiver Status information
+ * struct ionic_xcvr_status - Transceiver Status information
  * @state:    Transceiver status (enum ionic_xcvr_state)
  * @phy:      Physical connection type (enum ionic_phy_type)
  * @pid:      Transceiver link mode (enum pid)
@@ -1128,7 +1183,7 @@ struct ionic_xcvr_status {
 };
 
 /**
- * Port configuration
+ * union ionic_port_config - Port configuration
  * @speed:              port speed (in Mbps)
  * @mtu:                mtu
  * @state:              port admin state (enum port_admin_state)
@@ -1161,17 +1216,21 @@ union ionic_port_config {
 };
 
 /**
- * Port Status information
+ * struct ionic_port_status - Port Status information
  * @status:             link status (enum ionic_port_oper_status)
  * @id:                 port id
  * @speed:              link speed (in Mbps)
+ * @link_down_count:    number of times link went from from up to down
+ * @fec_type:           fec type (enum ionic_port_fec_type)
  * @xcvr:               tranceiver status
  */
 struct ionic_port_status {
 	__le32 id;
 	__le32 speed;
 	u8     status;
-	u8     rsvd[51];
+	__le16 link_down_count;
+	u8     fec_type;
+	u8     rsvd[48];
 	struct ionic_xcvr_status  xcvr;
 } __packed;
 
@@ -1190,7 +1249,7 @@ struct ionic_port_identify_cmd {
 
 /**
  * struct ionic_port_identify_comp - Port identify command completion
- * @status: The status of the command (enum status_code)
+ * @status: Status of the command (enum ionic_status_code)
  * @ver:    Version of identify returned by device
  */
 struct ionic_port_identify_comp {
@@ -1215,7 +1274,7 @@ struct ionic_port_init_cmd {
 
 /**
  * struct ionic_port_init_comp - Port initialization command completion
- * @status: The status of the command (enum status_code)
+ * @status: Status of the command (enum ionic_status_code)
  */
 struct ionic_port_init_comp {
 	u8 status;
@@ -1235,7 +1294,7 @@ struct ionic_port_reset_cmd {
 
 /**
  * struct ionic_port_reset_comp - Port reset command completion
- * @status: The status of the command (enum status_code)
+ * @status: Status of the command (enum ionic_status_code)
  */
 struct ionic_port_reset_comp {
 	u8 status;
@@ -1243,15 +1302,23 @@ struct ionic_port_reset_comp {
 };
 
 /**
- * enum stats_ctl_cmd - List of commands for stats control
+ * enum ionic_stats_ctl_cmd - List of commands for stats control
+ * @IONIC_STATS_CTL_RESET:      Reset statistics
  */
 enum ionic_stats_ctl_cmd {
 	IONIC_STATS_CTL_RESET		= 0,
 };
 
-
 /**
  * enum ionic_port_attr - List of device attributes
+ * @IONIC_PORT_ATTR_STATE:      Port state attribute
+ * @IONIC_PORT_ATTR_SPEED:      Port speed attribute
+ * @IONIC_PORT_ATTR_MTU:        Port MTU attribute
+ * @IONIC_PORT_ATTR_AUTONEG:    Port autonegotation attribute
+ * @IONIC_PORT_ATTR_FEC:        Port FEC attribute
+ * @IONIC_PORT_ATTR_PAUSE:      Port pause attribute
+ * @IONIC_PORT_ATTR_LOOPBACK:   Port loopback attribute
+ * @IONIC_PORT_ATTR_STATS_CTRL: Port statistics control attribute
  */
 enum ionic_port_attr {
 	IONIC_PORT_ATTR_STATE		= 0,
@@ -1266,9 +1333,17 @@ enum ionic_port_attr {
 
 /**
  * struct ionic_port_setattr_cmd - Set port attributes on the NIC
- * @opcode:     Opcode
- * @index:      port index
- * @attr:       Attribute type (enum ionic_port_attr)
+ * @opcode:         Opcode
+ * @index:          Port index
+ * @attr:           Attribute type (enum ionic_port_attr)
+ * @state:          Port state
+ * @speed:          Port speed
+ * @mtu:            Port MTU
+ * @an_enable:      Port autonegotiation setting
+ * @fec_type:       Port FEC type setting
+ * @pause_type:     Port pause type setting
+ * @loopback_mode:  Port loopback mode
+ * @stats_ctl:      Port stats setting
  */
 struct ionic_port_setattr_cmd {
 	u8     opcode;
@@ -1283,14 +1358,14 @@ struct ionic_port_setattr_cmd {
 		u8      fec_type;
 		u8      pause_type;
 		u8      loopback_mode;
-		u8	stats_ctl;
+		u8      stats_ctl;
 		u8      rsvd2[60];
 	};
 };
 
 /**
  * struct ionic_port_setattr_comp - Port set attr command completion
- * @status:     The status of the command (enum status_code)
+ * @status:     Status of the command (enum ionic_status_code)
  * @color:      Color bit
  */
 struct ionic_port_setattr_comp {
@@ -1314,8 +1389,15 @@ struct ionic_port_getattr_cmd {
 
 /**
  * struct ionic_port_getattr_comp - Port get attr command completion
- * @status:     The status of the command (enum status_code)
- * @color:      Color bit
+ * @status:         Status of the command (enum ionic_status_code)
+ * @state:          Port state
+ * @speed:          Port speed
+ * @mtu:            Port MTU
+ * @an_enable:      Port autonegotiation setting
+ * @fec_type:       Port FEC type setting
+ * @pause_type:     Port pause type setting
+ * @loopback_mode:  Port loopback mode
+ * @color:          Color bit
  */
 struct ionic_port_getattr_comp {
 	u8     status;
@@ -1334,12 +1416,12 @@ struct ionic_port_getattr_comp {
 };
 
 /**
- * struct ionic_lif_status - Lif status register
+ * struct ionic_lif_status - LIF status register
  * @eid:             most recent NotifyQ event id
- * @port_num:        port the lif is connected to
+ * @port_num:        port the LIF is connected to
  * @link_status:     port status (enum ionic_port_oper_status)
  * @link_speed:      speed of link in Mbps
- * @link_down_count: number of times link status changes
+ * @link_down_count: number of times link went from up to down
  */
 struct ionic_lif_status {
 	__le64 eid;
@@ -1373,6 +1455,9 @@ enum ionic_dev_state {
 
 /**
  * enum ionic_dev_attr - List of device attributes
+ * @IONIC_DEV_ATTR_STATE:     Device state attribute
+ * @IONIC_DEV_ATTR_NAME:      Device name attribute
+ * @IONIC_DEV_ATTR_FEATURES:  Device feature attributes
  */
 enum ionic_dev_attr {
 	IONIC_DEV_ATTR_STATE    = 0,
@@ -1402,7 +1487,7 @@ struct ionic_dev_setattr_cmd {
 
 /**
  * struct ionic_dev_setattr_comp - Device set attr command completion
- * @status:     The status of the command (enum status_code)
+ * @status:     Status of the command (enum ionic_status_code)
  * @features:   Device features
  * @color:      Color bit
  */
@@ -1429,7 +1514,7 @@ struct ionic_dev_getattr_cmd {
 
 /**
  * struct ionic_dev_setattr_comp - Device set attr command completion
- * @status:     The status of the command (enum status_code)
+ * @status:     Status of the command (enum ionic_status_code)
  * @features:   Device features
  * @color:      Color bit
  */
@@ -1459,6 +1544,13 @@ enum ionic_rss_hash_types {
 
 /**
  * enum ionic_lif_attr - List of LIF attributes
+ * @IONIC_LIF_ATTR_STATE:       LIF state attribute
+ * @IONIC_LIF_ATTR_NAME:        LIF name attribute
+ * @IONIC_LIF_ATTR_MTU:         LIF MTU attribute
+ * @IONIC_LIF_ATTR_MAC:         LIF MAC attribute
+ * @IONIC_LIF_ATTR_FEATURES:    LIF features attribute
+ * @IONIC_LIF_ATTR_RSS:         LIF RSS attribute
+ * @IONIC_LIF_ATTR_STATS_CTRL:  LIF statistics control attribute
  */
 enum ionic_lif_attr {
 	IONIC_LIF_ATTR_STATE        = 0,
@@ -1473,18 +1565,18 @@ enum ionic_lif_attr {
 /**
  * struct ionic_lif_setattr_cmd - Set LIF attributes on the NIC
  * @opcode:     Opcode
- * @type:       Attribute type (enum ionic_lif_attr)
+ * @attr:       Attribute type (enum ionic_lif_attr)
  * @index:      LIF index
- * @state:      lif state (enum lif_state)
+ * @state:      LIF state (enum ionic_lif_state)
  * @name:       The netdev name string, 0 terminated
  * @mtu:        Mtu
  * @mac:        Station mac
  * @features:   Features (enum ionic_eth_hw_features)
  * @rss:        RSS properties
- *              @types:     The hash types to enable (see rss_hash_types).
- *              @key:       The hash secret key.
- *              @addr:      Address for the indirection table shared memory.
- * @stats_ctl:  stats control commands (enum stats_ctl_cmd)
+ *              @types:     The hash types to enable (see rss_hash_types)
+ *              @key:       The hash secret key
+ *              @addr:      Address for the indirection table shared memory
+ * @stats_ctl:  stats control commands (enum ionic_stats_ctl_cmd)
  */
 struct ionic_lif_setattr_cmd {
 	u8     opcode;
@@ -1502,16 +1594,15 @@ struct ionic_lif_setattr_cmd {
 			u8     rsvd[6];
 			__le64 addr;
 		} rss;
-		u8	stats_ctl;
+		u8      stats_ctl;
 		u8      rsvd[60];
 	} __packed;
 };
 
 /**
  * struct ionic_lif_setattr_comp - LIF set attr command completion
- * @status:     The status of the command (enum status_code)
- * @comp_index: The index in the descriptor ring for which this
- *              is the completion.
+ * @status:     Status of the command (enum ionic_status_code)
+ * @comp_index: Index in the descriptor ring for which this is the completion
  * @features:   features (enum ionic_eth_hw_features)
  * @color:      Color bit
  */
@@ -1541,10 +1632,9 @@ struct ionic_lif_getattr_cmd {
 
 /**
  * struct ionic_lif_getattr_comp - LIF get attr command completion
- * @status:     The status of the command (enum status_code)
- * @comp_index: The index in the descriptor ring for which this
- *              is the completion.
- * @state:      lif state (enum lif_state)
+ * @status:     Status of the command (enum ionic_status_code)
+ * @comp_index: Index in the descriptor ring for which this is the completion
+ * @state:      LIF state (enum ionic_lif_state)
  * @name:       The netdev name string, 0 terminated
  * @mtu:        Mtu
  * @mac:        Station mac
@@ -1566,11 +1656,12 @@ struct ionic_lif_getattr_comp {
 };
 
 enum ionic_rx_mode {
-	IONIC_RX_MODE_F_UNICAST    = BIT(0),
-	IONIC_RX_MODE_F_MULTICAST  = BIT(1),
-	IONIC_RX_MODE_F_BROADCAST  = BIT(2),
-	IONIC_RX_MODE_F_PROMISC    = BIT(3),
-	IONIC_RX_MODE_F_ALLMULTI   = BIT(4),
+	IONIC_RX_MODE_F_UNICAST		= BIT(0),
+	IONIC_RX_MODE_F_MULTICAST	= BIT(1),
+	IONIC_RX_MODE_F_BROADCAST	= BIT(2),
+	IONIC_RX_MODE_F_PROMISC		= BIT(3),
+	IONIC_RX_MODE_F_ALLMULTI	= BIT(4),
+	IONIC_RX_MODE_F_RDMA_SNIFFER	= BIT(5),
 };
 
 /**
@@ -1578,11 +1669,12 @@ enum ionic_rx_mode {
  * @opcode:     opcode
  * @lif_index:  LIF index
  * @rx_mode:    Rx mode flags:
- *                  IONIC_RX_MODE_F_UNICAST: Accept known unicast packets.
- *                  IONIC_RX_MODE_F_MULTICAST: Accept known multicast packets.
- *                  IONIC_RX_MODE_F_BROADCAST: Accept broadcast packets.
- *                  IONIC_RX_MODE_F_PROMISC: Accept any packets.
- *                  IONIC_RX_MODE_F_ALLMULTI: Accept any multicast packets.
+ *                  IONIC_RX_MODE_F_UNICAST: Accept known unicast packets
+ *                  IONIC_RX_MODE_F_MULTICAST: Accept known multicast packets
+ *                  IONIC_RX_MODE_F_BROADCAST: Accept broadcast packets
+ *                  IONIC_RX_MODE_F_PROMISC: Accept any packets
+ *                  IONIC_RX_MODE_F_ALLMULTI: Accept any multicast packets
+ *                  IONIC_RX_MODE_F_RDMA_SNIFFER: Sniff RDMA packets
  */
 struct ionic_rx_mode_set_cmd {
 	u8     opcode;
@@ -1606,9 +1698,14 @@ enum ionic_rx_filter_match_type {
  * @qtype:      Queue type
  * @lif_index:  LIF index
  * @qid:        Queue ID
- * @match:      Rx filter match type.  (See IONIC_RX_FILTER_MATCH_xxx)
- * @vlan:       VLAN ID
- * @addr:       MAC address (network-byte order)
+ * @match:      Rx filter match type (see IONIC_RX_FILTER_MATCH_xxx)
+ * @vlan:       VLAN filter
+ *              @vlan:  VLAN ID
+ * @mac:        MAC filter
+ *              @addr:  MAC address (network-byte order)
+ * @mac_vlan:   MACVLAN filter
+ *              @vlan:  VLAN ID
+ *              @addr:  MAC address (network-byte order)
  */
 struct ionic_rx_filter_add_cmd {
 	u8     opcode;
@@ -1633,11 +1730,10 @@ struct ionic_rx_filter_add_cmd {
 
 /**
  * struct ionic_rx_filter_add_comp - Add LIF Rx filter command completion
- * @status:     The status of the command (enum status_code)
- * @comp_index: The index in the descriptor ring for which this
- *              is the completion.
+ * @status:     Status of the command (enum ionic_status_code)
+ * @comp_index: Index in the descriptor ring for which this is the completion
  * @filter_id:  Filter ID
- * @color:      Color bit.
+ * @color:      Color bit
  */
 struct ionic_rx_filter_add_comp {
 	u8     status;
@@ -1664,63 +1760,6 @@ struct ionic_rx_filter_del_cmd {
 
 typedef struct ionic_admin_comp ionic_rx_filter_del_comp;
 
-/**
- * struct ionic_qos_identify_cmd - QoS identify command
- * @opcode:    opcode
- * @ver:     Highest version of identify supported by driver
- *
- */
-struct ionic_qos_identify_cmd {
-	u8 opcode;
-	u8 ver;
-	u8 rsvd[62];
-};
-
-/**
- * struct ionic_qos_identify_comp - QoS identify command completion
- * @status: The status of the command (enum status_code)
- * @ver:    Version of identify returned by device
- */
-struct ionic_qos_identify_comp {
-	u8 status;
-	u8 ver;
-	u8 rsvd[14];
-};
-
-#define IONIC_QOS_CLASS_MAX		7
-#define IONIC_QOS_CLASS_NAME_SZ		32
-#define IONIC_QOS_DSCP_MAX_VALUES	64
-
-/**
- * enum ionic_qos_class
- */
-enum ionic_qos_class {
-	IONIC_QOS_CLASS_DEFAULT		= 0,
-	IONIC_QOS_CLASS_USER_DEFINED_1	= 1,
-	IONIC_QOS_CLASS_USER_DEFINED_2	= 2,
-	IONIC_QOS_CLASS_USER_DEFINED_3	= 3,
-	IONIC_QOS_CLASS_USER_DEFINED_4	= 4,
-	IONIC_QOS_CLASS_USER_DEFINED_5	= 5,
-	IONIC_QOS_CLASS_USER_DEFINED_6	= 6,
-};
-
-/**
- * enum ionic_qos_class_type - Traffic classification criteria
- */
-enum ionic_qos_class_type {
-	IONIC_QOS_CLASS_TYPE_NONE	= 0,
-	IONIC_QOS_CLASS_TYPE_PCP	= 1,	/* Dot1Q pcp */
-	IONIC_QOS_CLASS_TYPE_DSCP	= 2,	/* IP dscp */
-};
-
-/**
- * enum ionic_qos_sched_type - Qos class scheduling type
- */
-enum ionic_qos_sched_type {
-	IONIC_QOS_SCHED_TYPE_STRICT	= 0,	/* Strict priority */
-	IONIC_QOS_SCHED_TYPE_DWRR	= 1,	/* Deficit weighted round-robin */
-};
-
 enum ionic_vf_attr {
 	IONIC_VF_ATTR_SPOOFCHK	= 1,
 	IONIC_VF_ATTR_TRUST	= 2,
@@ -1732,26 +1771,29 @@ enum ionic_vf_attr {
 };
 
 /**
- * VF link status
+ * enum ionic_vf_link_status - Virtual Function link status
+ * @IONIC_VF_LINK_STATUS_AUTO:   Use link state of the uplink
+ * @IONIC_VF_LINK_STATUS_UP:     Link always up
+ * @IONIC_VF_LINK_STATUS_DOWN:   Link always down
  */
 enum ionic_vf_link_status {
-	IONIC_VF_LINK_STATUS_AUTO = 0,	/* link state of the uplink */
-	IONIC_VF_LINK_STATUS_UP   = 1,	/* link is always up */
-	IONIC_VF_LINK_STATUS_DOWN = 2,	/* link is always down */
+	IONIC_VF_LINK_STATUS_AUTO = 0,
+	IONIC_VF_LINK_STATUS_UP   = 1,
+	IONIC_VF_LINK_STATUS_DOWN = 2,
 };
 
 /**
  * struct ionic_vf_setattr_cmd - Set VF attributes on the NIC
  * @opcode:     Opcode
- * @index:      VF index
  * @attr:       Attribute type (enum ionic_vf_attr)
- *	macaddr		mac address
- *	vlanid		vlan ID
- *	maxrate		max Tx rate in Mbps
- *	spoofchk	enable address spoof checking
- *	trust		enable VF trust
- *	linkstate	set link up or down
- *	stats_pa	set DMA address for VF stats
+ * @vf_index:   VF index
+ *	@macaddr:	mac address
+ *	@vlanid:	vlan ID
+ *	@maxrate:	max Tx rate in Mbps
+ *	@spoofchk:	enable address spoof checking
+ *	@trust:		enable VF trust
+ *	@linkstate:	set link up or down
+ *	@stats_pa:	set DMA address for VF stats
  */
 struct ionic_vf_setattr_cmd {
 	u8     opcode;
@@ -1781,8 +1823,8 @@ struct ionic_vf_setattr_comp {
 /**
  * struct ionic_vf_getattr_cmd - Get VF attributes from the NIC
  * @opcode:     Opcode
- * @index:      VF index
  * @attr:       Attribute type (enum ionic_vf_attr)
+ * @vf_index:   VF index
  */
 struct ionic_vf_getattr_cmd {
 	u8     opcode;
@@ -1809,19 +1851,85 @@ struct ionic_vf_getattr_comp {
 };
 
 /**
- * union ionic_qos_config - Qos configuration structure
+ * struct ionic_qos_identify_cmd - QoS identify command
+ * @opcode:  opcode
+ * @ver:     Highest version of identify supported by driver
+ *
+ */
+struct ionic_qos_identify_cmd {
+	u8 opcode;
+	u8 ver;
+	u8 rsvd[62];
+};
+
+/**
+ * struct ionic_qos_identify_comp - QoS identify command completion
+ * @status: Status of the command (enum ionic_status_code)
+ * @ver:    Version of identify returned by device
+ */
+struct ionic_qos_identify_comp {
+	u8 status;
+	u8 ver;
+	u8 rsvd[14];
+};
+
+#define IONIC_QOS_TC_MAX		8
+/* Capri max supported, should be renamed. */
+#define IONIC_QOS_CLASS_MAX		7
+#define IONIC_QOS_PCP_MAX		8
+#define IONIC_QOS_CLASS_NAME_SZ	32
+#define IONIC_QOS_DSCP_MAX		64
+#define IONIC_QOS_ALL_PCP		0xFF
+
+/**
+ * enum ionic_qos_class
+ */
+enum ionic_qos_class {
+	IONIC_QOS_CLASS_DEFAULT		= 0,
+	IONIC_QOS_CLASS_USER_DEFINED_1	= 1,
+	IONIC_QOS_CLASS_USER_DEFINED_2	= 2,
+	IONIC_QOS_CLASS_USER_DEFINED_3	= 3,
+	IONIC_QOS_CLASS_USER_DEFINED_4	= 4,
+	IONIC_QOS_CLASS_USER_DEFINED_5	= 5,
+	IONIC_QOS_CLASS_USER_DEFINED_6	= 6,
+};
+
+/**
+ * enum ionic_qos_class_type - Traffic classification criteria
+ * @IONIC_QOS_CLASS_TYPE_NONE:    No QoS
+ * @IONIC_QOS_CLASS_TYPE_PCP:     Dot1Q PCP
+ * @IONIC_QOS_CLASS_TYPE_DSCP:    IP DSCP
+ */
+enum ionic_qos_class_type {
+	IONIC_QOS_CLASS_TYPE_NONE	= 0,
+	IONIC_QOS_CLASS_TYPE_PCP	= 1,
+	IONIC_QOS_CLASS_TYPE_DSCP	= 2,
+};
+
+/**
+ * enum ionic_qos_sched_type - QoS class scheduling type
+ * @IONIC_QOS_SCHED_TYPE_STRICT:  Strict priority
+ * @IONIC_QOS_SCHED_TYPE_DWRR:    Deficit weighted round-robin
+ */
+enum ionic_qos_sched_type {
+	IONIC_QOS_SCHED_TYPE_STRICT	= 0,
+	IONIC_QOS_SCHED_TYPE_DWRR	= 1,
+};
+
+/**
+ * union ionic_qos_config - QoS configuration structure
  * @flags:		Configuration flags
  *	IONIC_QOS_CONFIG_F_ENABLE		enable
- *	IONIC_QOS_CONFIG_F_DROP			drop/nodrop
+ *	IONIC_QOS_CONFIG_F_NO_DROP		drop/nodrop
  *	IONIC_QOS_CONFIG_F_RW_DOT1Q_PCP		enable dot1q pcp rewrite
  *	IONIC_QOS_CONFIG_F_RW_IP_DSCP		enable ip dscp rewrite
- * @sched_type:		Qos class scheduling type (enum ionic_qos_sched_type)
- * @class_type:		Qos class type (enum ionic_qos_class_type)
- * @pause_type:		Qos pause type (enum ionic_qos_pause_type)
- * @name:		Qos class name
+ * @sched_type:		QoS class scheduling type (enum ionic_qos_sched_type)
+ * @class_type:		QoS class type (enum ionic_qos_class_type)
+ * @pause_type:		QoS pause type (enum ionic_qos_pause_type)
+ * @name:		QoS class name
  * @mtu:		MTU of the class
- * @pfc_dot1q_pcp:	Pcp value for pause frames (valid iff F_NODROP)
- * @dwrr_weight:	Qos class scheduling weight
+ * @pfc_cos:		Priority-Flow Control class of service
+ * @dwrr_weight:	QoS class scheduling weight
  * @strict_rlmt:	Rate limit for strict priority scheduling
  * @rw_dot1q_pcp:	Rewrite dot1q pcp to this value	(valid iff F_RW_DOT1Q_PCP)
  * @rw_ip_dscp:		Rewrite ip dscp to this value	(valid iff F_RW_IP_DSCP)
@@ -1832,7 +1940,8 @@ struct ionic_vf_getattr_comp {
 union ionic_qos_config {
 	struct {
 #define IONIC_QOS_CONFIG_F_ENABLE		BIT(0)
-#define IONIC_QOS_CONFIG_F_DROP			BIT(1)
+#define IONIC_QOS_CONFIG_F_NO_DROP		BIT(1)
+/* Used to rewrite PCP or DSCP value. */
 #define IONIC_QOS_CONFIG_F_RW_DOT1Q_PCP		BIT(2)
 #define IONIC_QOS_CONFIG_F_RW_IP_DSCP		BIT(3)
 		u8      flags;
@@ -1849,6 +1958,7 @@ union ionic_qos_config {
 			__le64  strict_rlmt;
 		};
 		/* marking */
+		/* Used to rewrite PCP or DSCP value. */
 		union {
 			u8      rw_dot1q_pcp;
 			u8      rw_ip_dscp;
@@ -1858,7 +1968,7 @@ union ionic_qos_config {
 			u8      dot1q_pcp;
 			struct {
 				u8      ndscp;
-				u8      ip_dscp[IONIC_QOS_DSCP_MAX_VALUES];
+				u8      ip_dscp[IONIC_QOS_DSCP_MAX];
 			};
 		};
 	};
@@ -1877,15 +1987,15 @@ union ionic_qos_identity {
 		u8     version;
 		u8     type;
 		u8     rsvd[62];
-		union  ionic_qos_config config[IONIC_QOS_CLASS_MAX];
+		union ionic_qos_config config[IONIC_QOS_CLASS_MAX];
 	};
-	__le32 words[512];
+	__le32 words[478];
 };
 
 /**
- * struct qos_init_cmd - QoS config init command
+ * struct ionic_qos_init_cmd - QoS config init command
  * @opcode:	Opcode
- * @group:	Qos class id
+ * @group:	QoS class id
  * @info_pa:	destination address for qos info
  */
 struct ionic_qos_init_cmd {
@@ -1899,8 +2009,9 @@ struct ionic_qos_init_cmd {
 typedef struct ionic_admin_comp ionic_qos_init_comp;
 
 /**
- * struct ionic_qos_reset_cmd - Qos config reset command
+ * struct ionic_qos_reset_cmd - QoS config reset command
  * @opcode:	Opcode
+ * @group:	QoS class id
  */
 struct ionic_qos_reset_cmd {
 	u8    opcode;
@@ -1927,10 +2038,16 @@ struct ionic_fw_download_cmd {
 
 typedef struct ionic_admin_comp ionic_fw_download_comp;
 
+/**
+ * enum ionic_fw_control_oper - FW control operations
+ * @IONIC_FW_RESET:     Reset firmware
+ * @IONIC_FW_INSTALL:   Install firmware
+ * @IONIC_FW_ACTIVATE:  Activate firmware
+ */
 enum ionic_fw_control_oper {
-	IONIC_FW_RESET		= 0,	/* Reset firmware */
-	IONIC_FW_INSTALL	= 1,	/* Install firmware */
-	IONIC_FW_ACTIVATE	= 2,	/* Activate firmware */
+	IONIC_FW_RESET		= 0,
+	IONIC_FW_INSTALL	= 1,
+	IONIC_FW_ACTIVATE	= 2,
 };
 
 /**
@@ -1949,8 +2066,10 @@ struct ionic_fw_control_cmd {
 
 /**
  * struct ionic_fw_control_comp - Firmware control copletion
- * @opcode:    opcode
- * @slot:      slot where the firmware was installed
+ * @status:     Status of the command (enum ionic_status_code)
+ * @comp_index: Index in the descriptor ring for which this is the completion
+ * @slot:       Slot where the firmware was installed
+ * @color:      Color bit
  */
 struct ionic_fw_control_comp {
 	u8     status;
@@ -1968,11 +2087,11 @@ struct ionic_fw_control_comp {
 /**
  * struct ionic_rdma_reset_cmd - Reset RDMA LIF cmd
  * @opcode:        opcode
- * @lif_index:     lif index
+ * @lif_index:     LIF index
  *
- * There is no rdma specific dev command completion struct.  Completion uses
+ * There is no RDMA specific dev command completion struct.  Completion uses
  * the common struct ionic_admin_comp.  Only the status is indicated.
- * Nonzero status means the LIF does not support rdma.
+ * Nonzero status means the LIF does not support RDMA.
  **/
 struct ionic_rdma_reset_cmd {
 	u8     opcode;
@@ -1984,30 +2103,29 @@ struct ionic_rdma_reset_cmd {
 /**
  * struct ionic_rdma_queue_cmd - Create RDMA Queue command
  * @opcode:        opcode, 52, 53
- * @lif_index      lif index
- * @qid_ver:       (qid | (rdma version << 24))
+ * @lif_index:     LIF index
+ * @qid_ver:       (qid | (RDMA version << 24))
  * @cid:           intr, eq_id, or cq_id
  * @dbid:          doorbell page id
  * @depth_log2:    log base two of queue depth
  * @stride_log2:   log base two of queue stride
  * @dma_addr:      address of the queue memory
- * @xxx_table_index: temporary, but should not need pgtbl for contig. queues.
  *
- * The same command struct is used to create an rdma event queue, completion
- * queue, or rdma admin queue.  The cid is an interrupt number for an event
+ * The same command struct is used to create an RDMA event queue, completion
+ * queue, or RDMA admin queue.  The cid is an interrupt number for an event
  * queue, an event queue id for a completion queue, or a completion queue id
- * for an rdma admin queue.
+ * for an RDMA admin queue.
  *
  * The queue created via a dev command must be contiguous in dma space.
  *
  * The dev commands are intended only to be used during driver initialization,
- * to create queues supporting the rdma admin queue.  Other queues, and other
- * types of rdma resources like memory regions, will be created and registered
- * via the rdma admin queue, and will support a more complete interface
+ * to create queues supporting the RDMA admin queue.  Other queues, and other
+ * types of RDMA resources like memory regions, will be created and registered
+ * via the RDMA admin queue, and will support a more complete interface
  * providing scatter gather lists for larger, scattered queue buffers and
  * memory registration.
  *
- * There is no rdma specific dev command completion struct.  Completion uses
+ * There is no RDMA specific dev command completion struct.  Completion uses
  * the common struct ionic_admin_comp.  Only the status is indicated.
  **/
 struct ionic_rdma_queue_cmd {
@@ -2020,8 +2138,7 @@ struct ionic_rdma_queue_cmd {
 	u8     depth_log2;
 	u8     stride_log2;
 	__le64 dma_addr;
-	u8     rsvd2[36];
-	__le32 xxx_table_index;
+	u8     rsvd2[40];
 };
 
 /******************************************************************
@@ -2029,7 +2146,7 @@ struct ionic_rdma_queue_cmd {
  ******************************************************************/
 
 /**
- * struct ionic_notifyq_event
+ * struct ionic_notifyq_event - Generic event reporting structure
  * @eid:   event number
  * @ecode: event code
  * @data:  unspecified data about the event
@@ -2044,9 +2161,9 @@ struct ionic_notifyq_event {
 };
 
 /**
- * struct ionic_link_change_event
+ * struct ionic_link_change_event - Link change event notification
  * @eid:		event number
- * @ecode:		event code = EVENT_OPCODE_LINK_CHANGE
+ * @ecode:		event code = IONIC_EVENT_LINK_CHANGE
  * @link_status:	link up or down, with error bits (enum port_status)
  * @link_speed:		speed of the network link
  *
@@ -2061,9 +2178,9 @@ struct ionic_link_change_event {
 };
 
 /**
- * struct ionic_reset_event
+ * struct ionic_reset_event - Reset event notification
  * @eid:		event number
- * @ecode:		event code = EVENT_OPCODE_RESET
+ * @ecode:		event code = IONIC_EVENT_RESET
  * @reset_code:		reset type
  * @state:		0=pending, 1=complete, 2=error
  *
@@ -2079,11 +2196,9 @@ struct ionic_reset_event {
 };
 
 /**
- * struct ionic_heartbeat_event
+ * struct ionic_heartbeat_event - Sent periodically by NIC to indicate health
  * @eid:	event number
- * @ecode:	event code = EVENT_OPCODE_HEARTBEAT
- *
- * Sent periodically by the NIC to indicate continued health
+ * @ecode:	event code = IONIC_EVENT_HEARTBEAT
  */
 struct ionic_heartbeat_event {
 	__le64 eid;
@@ -2092,12 +2207,10 @@ struct ionic_heartbeat_event {
 };
 
 /**
- * struct ionic_log_event
+ * struct ionic_log_event - Sent to notify the driver of an internal error
  * @eid:	event number
- * @ecode:	event code = EVENT_OPCODE_LOG
+ * @ecode:	event code = IONIC_EVENT_LOG
  * @data:	log data
- *
- * Sent to notify the driver of an internal error.
  */
 struct ionic_log_event {
 	__le64 eid;
@@ -2106,7 +2219,18 @@ struct ionic_log_event {
 };
 
 /**
- * struct ionic_port_stats
+ * struct ionic_xcvr_event - Transceiver change event
+ * @eid:	event number
+ * @ecode:	event code = IONIC_EVENT_XCVR
+ */
+struct ionic_xcvr_event {
+	__le64 eid;
+	__le16 ecode;
+	u8     rsvd[54];
+};
+
+/**
+ * struct ionic_port_stats - Port statistics structure
  */
 struct ionic_port_stats {
 	__le64 frames_rx_ok;
@@ -2211,28 +2335,61 @@ struct ionic_mgmt_port_stats {
 	__le64 frames_rx_multicast;
 	__le64 frames_rx_broadcast;
 	__le64 frames_rx_pause;
-	__le64 frames_rx_bad_length0;
-	__le64 frames_rx_undersized1;
-	__le64 frames_rx_oversized2;
-	__le64 frames_rx_fragments3;
-	__le64 frames_rx_jabber4;
-	__le64 frames_rx_64b5;
-	__le64 frames_rx_65b_127b6;
-	__le64 frames_rx_128b_255b7;
-	__le64 frames_rx_256b_511b8;
-	__le64 frames_rx_512b_1023b9;
-	__le64 frames_rx_1024b_1518b0;
-	__le64 frames_rx_gt_1518b1;
-	__le64 frames_rx_fifo_full2;
-	__le64 frames_tx_ok3;
-	__le64 frames_tx_all4;
-	__le64 frames_tx_bad5;
-	__le64 octets_tx_ok6;
-	__le64 octets_tx_total7;
-	__le64 frames_tx_unicast8;
-	__le64 frames_tx_multicast9;
-	__le64 frames_tx_broadcast0;
-	__le64 frames_tx_pause1;
+	__le64 frames_rx_bad_length;
+	__le64 frames_rx_undersized;
+	__le64 frames_rx_oversized;
+	__le64 frames_rx_fragments;
+	__le64 frames_rx_jabber;
+	__le64 frames_rx_64b;
+	__le64 frames_rx_65b_127b;
+	__le64 frames_rx_128b_255b;
+	__le64 frames_rx_256b_511b;
+	__le64 frames_rx_512b_1023b;
+	__le64 frames_rx_1024b_1518b;
+	__le64 frames_rx_gt_1518b;
+	__le64 frames_rx_fifo_full;
+	__le64 frames_tx_ok;
+	__le64 frames_tx_all;
+	__le64 frames_tx_bad;
+	__le64 octets_tx_ok;
+	__le64 octets_tx_total;
+	__le64 frames_tx_unicast;
+	__le64 frames_tx_multicast;
+	__le64 frames_tx_broadcast;
+	__le64 frames_tx_pause;
+};
+
+enum ionic_pb_buffer_drop_stats {
+	IONIC_BUFFER_INTRINSIC_DROP = 0,
+	IONIC_BUFFER_DISCARDED,
+	IONIC_BUFFER_ADMITTED,
+	IONIC_BUFFER_OUT_OF_CELLS_DROP,
+	IONIC_BUFFER_OUT_OF_CELLS_DROP_2,
+	IONIC_BUFFER_OUT_OF_CREDIT_DROP,
+	IONIC_BUFFER_TRUNCATION_DROP,
+	IONIC_BUFFER_PORT_DISABLED_DROP,
+	IONIC_BUFFER_COPY_TO_CPU_TAIL_DROP,
+	IONIC_BUFFER_SPAN_TAIL_DROP,
+	IONIC_BUFFER_MIN_SIZE_VIOLATION_DROP,
+	IONIC_BUFFER_ENQUEUE_ERROR_DROP,
+	IONIC_BUFFER_INVALID_PORT_DROP,
+	IONIC_BUFFER_INVALID_OUTPUT_QUEUE_DROP,
+	IONIC_BUFFER_DROP_MAX,
+};
+
+/**
+ * struct port_pb_stats - packet buffers system stats
+ * uses ionic_pb_buffer_drop_stats for drop_counts[]
+ */
+struct ionic_port_pb_stats {
+	__le64 sop_count_in;
+	__le64 eop_count_in;
+	__le64 sop_count_out;
+	__le64 eop_count_out;
+	__le64 drop_counts[IONIC_BUFFER_DROP_MAX];
+	__le64 input_queue_buffer_occupancy[IONIC_QOS_TC_MAX];
+	__le64 input_queue_port_monitor[IONIC_QOS_TC_MAX];
+	__le64 output_queue_port_monitor[IONIC_QOS_TC_MAX];
 };
 
 /**
@@ -2264,22 +2421,31 @@ union ionic_port_identity {
 		u8     rsvd2[44];
 		union ionic_port_config config;
 	};
-	__le32 words[512];
+	__le32 words[478];
 };
 
 /**
  * struct ionic_port_info - port info structure
- * @port_status:     port status
- * @port_stats:      port stats
+ * @config:          Port configuration data
+ * @status:          Port status data
+ * @stats:           Port statistics data
+ * @mgmt_stats:      Port management statistics data
+ * @port_pb_drop_stats:   uplink pb drop stats
  */
 struct ionic_port_info {
 	union ionic_port_config config;
 	struct ionic_port_status status;
-	struct ionic_port_stats stats;
+	union {
+		struct ionic_port_stats      stats;
+		struct ionic_mgmt_port_stats mgmt_stats;
+	};
+	/* room for pb_stats to start at 2k offset */
+	u8                          rsvd[760];
+	struct ionic_port_pb_stats  pb_stats;
 };
 
 /**
- * struct ionic_lif_stats
+ * struct ionic_lif_stats - LIF statistics structure
  */
 struct ionic_lif_stats {
 	/* RX */
@@ -2332,7 +2498,7 @@ struct ionic_lif_stats {
 	__le64 tx_queue_error;
 	__le64 tx_desc_fetch_error;
 	__le64 tx_desc_data_error;
-	__le64 rsvd9;
+	__le64 tx_queue_empty;
 	__le64 rsvd10;
 	__le64 rsvd11;
 	__le64 rsvd12;
@@ -2433,7 +2599,10 @@ struct ionic_lif_stats {
 };
 
 /**
- * struct ionic_lif_info - lif info structure
+ * struct ionic_lif_info - LIF info structure
+ * @config:	LIF configuration structure
+ * @status:	LIF status structure
+ * @stats:	LIF statistics structure
  */
 struct ionic_lif_info {
 	union ionic_lif_config config;
@@ -2471,6 +2640,7 @@ union ionic_dev_cmd {
 
 	struct ionic_q_identify_cmd q_identify;
 	struct ionic_q_init_cmd q_init;
+	struct ionic_q_control_cmd q_control;
 };
 
 union ionic_dev_cmd_comp {
@@ -2507,15 +2677,15 @@ union ionic_dev_cmd_comp {
 };
 
 /**
- * union dev_info - Device info register format (read-only)
- * @signature:       Signature value of 0x44455649 ('DEVI').
- * @version:         Current version of info.
- * @asic_type:       Asic type.
- * @asic_rev:        Asic revision.
- * @fw_status:       Firmware status.
- * @fw_heartbeat:    Firmware heartbeat counter.
- * @serial_num:      Serial number.
- * @fw_version:      Firmware version.
+ * union ionic_dev_info_regs - Device info register format (read-only)
+ * @signature:       Signature value of 0x44455649 ('DEVI')
+ * @version:         Current version of info
+ * @asic_type:       Asic type
+ * @asic_rev:        Asic revision
+ * @fw_status:       Firmware status
+ * @fw_heartbeat:    Firmware heartbeat counter
+ * @serial_num:      Serial number
+ * @fw_version:      Firmware version
  */
 union ionic_dev_info_regs {
 #define IONIC_DEVINFO_FWVERS_BUFLEN 32
@@ -2536,10 +2706,10 @@ union ionic_dev_info_regs {
 
 /**
  * union ionic_dev_cmd_regs - Device command register format (read-write)
- * @doorbell:        Device Cmd Doorbell, write-only.
+ * @doorbell:        Device Cmd Doorbell, write-only
  *                   Write a 1 to signal device to process cmd,
  *                   poll done for completion.
- * @done:            Done indicator, bit 0 == 1 when command is complete.
+ * @done:            Done indicator, bit 0 == 1 when command is complete
  * @cmd:             Opcode-specific command bytes
  * @comp:            Opcode-specific response bytes
  * @data:            Opcode-specific side-data
@@ -2557,7 +2727,7 @@ union ionic_dev_cmd_regs {
 };
 
 /**
- * union ionic_dev_regs - Device register format in for bar 0 page 0
+ * union ionic_dev_regs - Device register format for bar 0 page 0
  * @info:            Device info registers
  * @devcmd:          Device command registers
  */
@@ -2572,6 +2742,7 @@ union ionic_dev_regs {
 union ionic_adminq_cmd {
 	struct ionic_admin_cmd cmd;
 	struct ionic_nop_cmd nop;
+	struct ionic_q_identify_cmd q_identify;
 	struct ionic_q_init_cmd q_init;
 	struct ionic_q_control_cmd q_control;
 	struct ionic_lif_setattr_cmd lif_setattr;
@@ -2588,6 +2759,7 @@ union ionic_adminq_cmd {
 union ionic_adminq_comp {
 	struct ionic_admin_comp comp;
 	struct ionic_nop_comp nop;
+	struct ionic_q_identify_comp q_identify;
 	struct ionic_q_init_comp q_init;
 	struct ionic_lif_setattr_comp lif_setattr;
 	struct ionic_lif_getattr_comp lif_getattr;
@@ -2613,14 +2785,14 @@ union ionic_adminq_comp {
 /**
  * struct ionic_doorbell - Doorbell register layout
  * @p_index: Producer index
- * @ring:    Selects the specific ring of the queue to update.
+ * @ring:    Selects the specific ring of the queue to update
  *           Type-specific meaning:
- *              ring=0: Default producer/consumer queue.
+ *              ring=0: Default producer/consumer queue
  *              ring=1: (CQ, EQ) Re-Arm queue.  RDMA CQs
  *              send events to EQs when armed.  EQs send
  *              interrupts when armed.
- * @qid:     The queue id selects the queue destination for the
- *           producer index and flags.
+ * @qid_lo:  Queue destination for the producer index and flags (low bits)
+ * @qid_hi:  Queue destination for the producer index and flags (high bits)
  */
 struct ionic_doorbell {
 	__le16 p_index;
@@ -2653,6 +2825,7 @@ struct ionic_identity {
 	union ionic_lif_identity lif;
 	union ionic_port_identity port;
 	union ionic_qos_identity qos;
+	union ionic_q_identity txq;
 };
 
 #endif /* _IONIC_IF_H_ */
-- 
cgit v1.2.3-59-g8ed1b


From a836c352291d4ad4031743a97a61f7916fe519b7 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Mon, 11 May 2020 17:59:29 -0700
Subject: ionic: protect vf calls from fw reset

When going into a firmware upgrade cycle, we set the device as
not present to keep some user commands from trying to change
the driver while we're only half there.  Unfortunately, the
ndo_vf_* calls don't check netif_device_present() so we need
to add a check in the callbacks.

Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_lif.c | 26 ++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 0049f537ee40..5f63c611d1fd 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -1707,7 +1707,7 @@ int ionic_stop(struct net_device *netdev)
 {
 	struct ionic_lif *lif = netdev_priv(netdev);
 
-	if (test_bit(IONIC_LIF_F_FW_RESET, lif->state))
+	if (!netif_device_present(netdev))
 		return 0;
 
 	ionic_stop_queues(lif);
@@ -1724,6 +1724,9 @@ static int ionic_get_vf_config(struct net_device *netdev,
 	struct ionic *ionic = lif->ionic;
 	int ret = 0;
 
+	if (!netif_device_present(netdev))
+		return -EBUSY;
+
 	down_read(&ionic->vf_op_lock);
 
 	if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
@@ -1751,6 +1754,9 @@ static int ionic_get_vf_stats(struct net_device *netdev, int vf,
 	struct ionic_lif_stats *vs;
 	int ret = 0;
 
+	if (!netif_device_present(netdev))
+		return -EBUSY;
+
 	down_read(&ionic->vf_op_lock);
 
 	if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
@@ -1786,6 +1792,9 @@ static int ionic_set_vf_mac(struct net_device *netdev, int vf, u8 *mac)
 	if (!(is_zero_ether_addr(mac) || is_valid_ether_addr(mac)))
 		return -EINVAL;
 
+	if (!netif_device_present(netdev))
+		return -EBUSY;
+
 	down_write(&ionic->vf_op_lock);
 
 	if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
@@ -1817,6 +1826,9 @@ static int ionic_set_vf_vlan(struct net_device *netdev, int vf, u16 vlan,
 	if (proto != htons(ETH_P_8021Q))
 		return -EPROTONOSUPPORT;
 
+	if (!netif_device_present(netdev))
+		return -EBUSY;
+
 	down_write(&ionic->vf_op_lock);
 
 	if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
@@ -1843,6 +1855,9 @@ static int ionic_set_vf_rate(struct net_device *netdev, int vf,
 	if (tx_min)
 		return -EINVAL;
 
+	if (!netif_device_present(netdev))
+		return -EBUSY;
+
 	down_write(&ionic->vf_op_lock);
 
 	if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
@@ -1865,6 +1880,9 @@ static int ionic_set_vf_spoofchk(struct net_device *netdev, int vf, bool set)
 	u8 data = set;  /* convert to u8 for config */
 	int ret;
 
+	if (!netif_device_present(netdev))
+		return -EBUSY;
+
 	down_write(&ionic->vf_op_lock);
 
 	if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
@@ -1887,6 +1905,9 @@ static int ionic_set_vf_trust(struct net_device *netdev, int vf, bool set)
 	u8 data = set;  /* convert to u8 for config */
 	int ret;
 
+	if (!netif_device_present(netdev))
+		return -EBUSY;
+
 	down_write(&ionic->vf_op_lock);
 
 	if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
@@ -1923,6 +1944,9 @@ static int ionic_set_vf_link_state(struct net_device *netdev, int vf, int set)
 		return -EINVAL;
 	}
 
+	if (!netif_device_present(netdev))
+		return -EBUSY;
+
 	down_write(&ionic->vf_op_lock);
 
 	if (vf >= pci_num_vf(ionic->pdev) || !ionic->vfs) {
-- 
cgit v1.2.3-59-g8ed1b


From cba155d591aa28689332bc568632d2f868690be1 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Mon, 11 May 2020 17:59:30 -0700
Subject: ionic: add support for more xcvr types

Add a couple more SFP and QSFP transceiver types to our
ethtool get link ksettings.

Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_ethtool.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
index 3f9a73aaef61..170e72f31197 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
@@ -159,6 +159,8 @@ static int ionic_get_link_ksettings(struct net_device *netdev,
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     100000baseSR4_Full);
 		break;
+	case IONIC_XCVR_PID_QSFP_100G_CWDM4:
+	case IONIC_XCVR_PID_QSFP_100G_PSM4:
 	case IONIC_XCVR_PID_QSFP_100G_LR4:
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     100000baseLR4_ER4_Full);
@@ -178,6 +180,7 @@ static int ionic_get_link_ksettings(struct net_device *netdev,
 		break;
 	case IONIC_XCVR_PID_SFP_25GBASE_SR:
 	case IONIC_XCVR_PID_SFP_25GBASE_AOC:
+	case IONIC_XCVR_PID_SFP_25GBASE_ACC:
 		ethtool_link_ksettings_add_link_mode(ks, supported,
 						     25000baseSR_Full);
 		break;
-- 
cgit v1.2.3-59-g8ed1b


From 62ba8766f775e5e26c21731c695f68541d504ea6 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Mon, 11 May 2020 17:59:31 -0700
Subject: ionic: shorter dev cmd wait time

Shorten our msleep time while polling for the dev command
request to finish.  Yes, checkpatch.pl complains that the
msleep might actually go longer - that won't hurt, but we'll
take the shorter time if we can get it.

Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c
index 8e2436d14621..c3f0f84164d3 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_main.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c
@@ -358,7 +358,7 @@ try_again:
 		done = ionic_dev_cmd_done(idev);
 		if (done)
 			break;
-		msleep(20);
+		msleep(5);
 		hb = ionic_heartbeat_check(ionic);
 	} while (!done && !hb && time_before(jiffies, max_wait));
 	duration = jiffies - start_time;
-- 
cgit v1.2.3-59-g8ed1b


From 5c7843112543c7726e7fe31ecc76476f296b7960 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Mon, 11 May 2020 17:59:32 -0700
Subject: ionic: reset device at probe

Once we're talking to the device, tell it to reset to
be sure we've got a fresh, clean environment.

Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_main.c b/drivers/net/ethernet/pensando/ionic/ionic_main.c
index c3f0f84164d3..92110abcff96 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_main.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_main.c
@@ -415,6 +415,7 @@ int ionic_setup(struct ionic *ionic)
 	err = ionic_dev_setup(ionic);
 	if (err)
 		return err;
+	ionic_reset(ionic);
 
 	return 0;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 36ac2c50924892a28e17ff463e354fec7650ee19 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Mon, 11 May 2020 17:59:33 -0700
Subject: ionic: ionic_intr_free parameter change

Change the ionic_intr_free parameter from struct ionic_lif to
struct ionic since that's what it actually cares about.

Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_lif.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 5f63c611d1fd..9bf142446645 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -197,10 +197,10 @@ static int ionic_intr_alloc(struct ionic_lif *lif, struct ionic_intr_info *intr)
 	return 0;
 }
 
-static void ionic_intr_free(struct ionic_lif *lif, int index)
+static void ionic_intr_free(struct ionic *ionic, int index)
 {
-	if (index != INTR_INDEX_NOT_ASSIGNED && index < lif->ionic->nintrs)
-		clear_bit(index, lif->ionic->intrs);
+	if (index != INTR_INDEX_NOT_ASSIGNED && index < ionic->nintrs)
+		clear_bit(index, ionic->intrs);
 }
 
 static int ionic_qcq_enable(struct ionic_qcq *qcq)
@@ -310,7 +310,7 @@ static void ionic_qcq_free(struct ionic_lif *lif, struct ionic_qcq *qcq)
 		irq_set_affinity_hint(qcq->intr.vector, NULL);
 		devm_free_irq(dev, qcq->intr.vector, &qcq->napi);
 		qcq->intr.vector = 0;
-		ionic_intr_free(lif, qcq->intr.index);
+		ionic_intr_free(lif->ionic, qcq->intr.index);
 	}
 
 	devm_kfree(dev, qcq->cq.info);
@@ -356,7 +356,7 @@ static void ionic_link_qcq_interrupts(struct ionic_qcq *src_qcq,
 				      struct ionic_qcq *n_qcq)
 {
 	if (WARN_ON(n_qcq->flags & IONIC_QCQ_F_INTR)) {
-		ionic_intr_free(n_qcq->cq.lif, n_qcq->intr.index);
+		ionic_intr_free(n_qcq->cq.lif->ionic, n_qcq->intr.index);
 		n_qcq->flags &= ~IONIC_QCQ_F_INTR;
 	}
 
@@ -508,7 +508,7 @@ err_out_free_irq:
 		devm_free_irq(dev, new->intr.vector, &new->napi);
 err_out_free_intr:
 	if (flags & IONIC_QCQ_F_INTR)
-		ionic_intr_free(lif, new->intr.index);
+		ionic_intr_free(lif->ionic, new->intr.index);
 err_out:
 	dev_err(dev, "qcq alloc of %s%d failed %d\n", name, index, err);
 	return err;
-- 
cgit v1.2.3-59-g8ed1b


From c06107cabea356db62f45bf8049a260c238fadf2 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Mon, 11 May 2020 17:59:34 -0700
Subject: ionic: more ionic name tweaks

Fix up a few more local names that need an "ionic" prefix.

Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_dev.h     | 10 +++++-----
 drivers/net/ethernet/pensando/ionic/ionic_ethtool.c | 13 +++++++------
 drivers/net/ethernet/pensando/ionic/ionic_lif.c     |  4 ++--
 3 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_dev.h b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
index 33519a8765eb..525434f10025 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_dev.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_dev.h
@@ -182,7 +182,7 @@ struct ionic_desc_info {
 	void *cb_arg;
 };
 
-#define QUEUE_NAME_MAX_SZ		32
+#define IONIC_QUEUE_NAME_MAX_SZ		32
 
 struct ionic_queue {
 	u64 dbell_count;
@@ -207,14 +207,14 @@ struct ionic_queue {
 	unsigned int desc_size;
 	unsigned int sg_desc_size;
 	unsigned int pid;
-	char name[QUEUE_NAME_MAX_SZ];
+	char name[IONIC_QUEUE_NAME_MAX_SZ];
 };
 
-#define INTR_INDEX_NOT_ASSIGNED		-1
-#define INTR_NAME_MAX_SZ		32
+#define IONIC_INTR_INDEX_NOT_ASSIGNED	-1
+#define IONIC_INTR_NAME_MAX_SZ		32
 
 struct ionic_intr_info {
-	char name[INTR_NAME_MAX_SZ];
+	char name[IONIC_INTR_NAME_MAX_SZ];
 	unsigned int index;
 	unsigned int vector;
 	u64 rearm_count;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
index 170e72f31197..f7e3ce3de04d 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_ethtool.c
@@ -12,10 +12,11 @@
 #include "ionic_stats.h"
 
 static const char ionic_priv_flags_strings[][ETH_GSTRING_LEN] = {
-#define PRIV_F_SW_DBG_STATS		BIT(0)
+#define IONIC_PRIV_F_SW_DBG_STATS	BIT(0)
 	"sw-dbg-stats",
 };
-#define PRIV_FLAGS_COUNT ARRAY_SIZE(ionic_priv_flags_strings)
+
+#define IONIC_PRIV_FLAGS_COUNT ARRAY_SIZE(ionic_priv_flags_strings)
 
 static void ionic_get_stats_strings(struct ionic_lif *lif, u8 *buf)
 {
@@ -58,7 +59,7 @@ static int ionic_get_sset_count(struct net_device *netdev, int sset)
 		count = ionic_get_stats_count(lif);
 		break;
 	case ETH_SS_PRIV_FLAGS:
-		count = PRIV_FLAGS_COUNT;
+		count = IONIC_PRIV_FLAGS_COUNT;
 		break;
 	}
 	return count;
@@ -75,7 +76,7 @@ static void ionic_get_strings(struct net_device *netdev,
 		break;
 	case ETH_SS_PRIV_FLAGS:
 		memcpy(buf, ionic_priv_flags_strings,
-		       PRIV_FLAGS_COUNT * ETH_GSTRING_LEN);
+		       IONIC_PRIV_FLAGS_COUNT * ETH_GSTRING_LEN);
 		break;
 	}
 }
@@ -557,7 +558,7 @@ static u32 ionic_get_priv_flags(struct net_device *netdev)
 	u32 priv_flags = 0;
 
 	if (test_bit(IONIC_LIF_F_SW_DEBUG_STATS, lif->state))
-		priv_flags |= PRIV_F_SW_DBG_STATS;
+		priv_flags |= IONIC_PRIV_F_SW_DBG_STATS;
 
 	return priv_flags;
 }
@@ -567,7 +568,7 @@ static int ionic_set_priv_flags(struct net_device *netdev, u32 priv_flags)
 	struct ionic_lif *lif = netdev_priv(netdev);
 
 	clear_bit(IONIC_LIF_F_SW_DEBUG_STATS, lif->state);
-	if (priv_flags & PRIV_F_SW_DBG_STATS)
+	if (priv_flags & IONIC_PRIV_F_SW_DBG_STATS)
 		set_bit(IONIC_LIF_F_SW_DEBUG_STATS, lif->state);
 
 	return 0;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 9bf142446645..4da94c07d1d3 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -199,7 +199,7 @@ static int ionic_intr_alloc(struct ionic_lif *lif, struct ionic_intr_info *intr)
 
 static void ionic_intr_free(struct ionic *ionic, int index)
 {
-	if (index != INTR_INDEX_NOT_ASSIGNED && index < ionic->nintrs)
+	if (index != IONIC_INTR_INDEX_NOT_ASSIGNED && index < ionic->nintrs)
 		clear_bit(index, ionic->intrs);
 }
 
@@ -455,7 +455,7 @@ static int ionic_qcq_alloc(struct ionic_lif *lif, unsigned int type,
 			cpumask_set_cpu(new->intr.cpu,
 					&new->intr.affinity_mask);
 	} else {
-		new->intr.index = INTR_INDEX_NOT_ASSIGNED;
+		new->intr.index = IONIC_INTR_INDEX_NOT_ASSIGNED;
 	}
 
 	new->cq.info = devm_kzalloc(dev, sizeof(*new->cq.info) * num_descs,
-- 
cgit v1.2.3-59-g8ed1b


From f64e0c5698b7b1abb08b3d5bc07f95db45e87d76 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Mon, 11 May 2020 17:59:35 -0700
Subject: ionic: add more ethtool stats

Add hardware port stats and a few more driver collected
statistics to the ethtool stats output.

Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/pensando/ionic/ionic_lif.c   |   4 +-
 drivers/net/ethernet/pensando/ionic/ionic_lif.h   |  15 ++-
 drivers/net/ethernet/pensando/ionic/ionic_stats.c | 136 +++++++++++++++++++++-
 drivers/net/ethernet/pensando/ionic/ionic_stats.h |   6 +
 drivers/net/ethernet/pensando/ionic/ionic_txrx.c  |  22 ++--
 5 files changed, 170 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.c b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
index 4da94c07d1d3..80b4d8332109 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.c
@@ -792,8 +792,8 @@ static int ionic_adminq_napi(struct napi_struct *napi, int budget)
 	return max(n_work, a_work);
 }
 
-static void ionic_get_stats64(struct net_device *netdev,
-			      struct rtnl_link_stats64 *ns)
+void ionic_get_stats64(struct net_device *netdev,
+		       struct rtnl_link_stats64 *ns)
 {
 	struct ionic_lif *lif = netdev_priv(netdev);
 	struct ionic_lif_stats *ls;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_lif.h b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
index 1a30f0fb20b9..c3428034a17b 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_lif.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_lif.h
@@ -20,11 +20,13 @@ struct ionic_tx_stats {
 	u64 bytes;
 	u64 clean;
 	u64 linearize;
-	u64 no_csum;
+	u64 csum_none;
 	u64 csum;
 	u64 crc32_csum;
 	u64 tso;
+	u64 tso_bytes;
 	u64 frags;
+	u64 vlan_inserted;
 	u64 sg_cntr[IONIC_MAX_NUM_SG_CNTR];
 };
 
@@ -38,6 +40,7 @@ struct ionic_rx_stats {
 	u64 csum_error;
 	u64 buffers_posted;
 	u64 dropped;
+	u64 vlan_stripped;
 };
 
 #define IONIC_QCQ_F_INITED		BIT(0)
@@ -114,11 +117,17 @@ struct ionic_lif_sw_stats {
 	u64 rx_packets;
 	u64 rx_bytes;
 	u64 tx_tso;
-	u64 tx_no_csum;
+	u64 tx_tso_bytes;
+	u64 tx_csum_none;
 	u64 tx_csum;
 	u64 rx_csum_none;
 	u64 rx_csum_complete;
 	u64 rx_csum_error;
+	u64 hw_tx_dropped;
+	u64 hw_rx_dropped;
+	u64 hw_rx_over_errors;
+	u64 hw_rx_missed_errors;
+	u64 hw_tx_aborted_errors;
 };
 
 enum ionic_lif_state_flags {
@@ -240,6 +249,8 @@ static inline u32 ionic_coal_hw_to_usec(struct ionic *ionic, u32 units)
 }
 
 void ionic_link_status_check_request(struct ionic_lif *lif);
+void ionic_get_stats64(struct net_device *netdev,
+		       struct rtnl_link_stats64 *ns);
 void ionic_lif_deferred_enqueue(struct ionic_deferred *def,
 				struct ionic_deferred_work *work);
 int ionic_lifs_alloc(struct ionic *ionic);
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_stats.c b/drivers/net/ethernet/pensando/ionic/ionic_stats.c
index 8f2a8fb029f1..2a1885da58a6 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_stats.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_stats.c
@@ -15,11 +15,109 @@ static const struct ionic_stat_desc ionic_lif_stats_desc[] = {
 	IONIC_LIF_STAT_DESC(rx_packets),
 	IONIC_LIF_STAT_DESC(rx_bytes),
 	IONIC_LIF_STAT_DESC(tx_tso),
-	IONIC_LIF_STAT_DESC(tx_no_csum),
+	IONIC_LIF_STAT_DESC(tx_tso_bytes),
+	IONIC_LIF_STAT_DESC(tx_csum_none),
 	IONIC_LIF_STAT_DESC(tx_csum),
 	IONIC_LIF_STAT_DESC(rx_csum_none),
 	IONIC_LIF_STAT_DESC(rx_csum_complete),
 	IONIC_LIF_STAT_DESC(rx_csum_error),
+	IONIC_LIF_STAT_DESC(hw_tx_dropped),
+	IONIC_LIF_STAT_DESC(hw_rx_dropped),
+	IONIC_LIF_STAT_DESC(hw_rx_over_errors),
+	IONIC_LIF_STAT_DESC(hw_rx_missed_errors),
+	IONIC_LIF_STAT_DESC(hw_tx_aborted_errors),
+};
+
+static const struct ionic_stat_desc ionic_port_stats_desc[] = {
+	IONIC_PORT_STAT_DESC(frames_rx_ok),
+	IONIC_PORT_STAT_DESC(frames_rx_all),
+	IONIC_PORT_STAT_DESC(frames_rx_bad_fcs),
+	IONIC_PORT_STAT_DESC(frames_rx_bad_all),
+	IONIC_PORT_STAT_DESC(octets_rx_ok),
+	IONIC_PORT_STAT_DESC(octets_rx_all),
+	IONIC_PORT_STAT_DESC(frames_rx_unicast),
+	IONIC_PORT_STAT_DESC(frames_rx_multicast),
+	IONIC_PORT_STAT_DESC(frames_rx_broadcast),
+	IONIC_PORT_STAT_DESC(frames_rx_pause),
+	IONIC_PORT_STAT_DESC(frames_rx_bad_length),
+	IONIC_PORT_STAT_DESC(frames_rx_undersized),
+	IONIC_PORT_STAT_DESC(frames_rx_oversized),
+	IONIC_PORT_STAT_DESC(frames_rx_fragments),
+	IONIC_PORT_STAT_DESC(frames_rx_jabber),
+	IONIC_PORT_STAT_DESC(frames_rx_pripause),
+	IONIC_PORT_STAT_DESC(frames_rx_stomped_crc),
+	IONIC_PORT_STAT_DESC(frames_rx_too_long),
+	IONIC_PORT_STAT_DESC(frames_rx_vlan_good),
+	IONIC_PORT_STAT_DESC(frames_rx_dropped),
+	IONIC_PORT_STAT_DESC(frames_rx_less_than_64b),
+	IONIC_PORT_STAT_DESC(frames_rx_64b),
+	IONIC_PORT_STAT_DESC(frames_rx_65b_127b),
+	IONIC_PORT_STAT_DESC(frames_rx_128b_255b),
+	IONIC_PORT_STAT_DESC(frames_rx_256b_511b),
+	IONIC_PORT_STAT_DESC(frames_rx_512b_1023b),
+	IONIC_PORT_STAT_DESC(frames_rx_1024b_1518b),
+	IONIC_PORT_STAT_DESC(frames_rx_1519b_2047b),
+	IONIC_PORT_STAT_DESC(frames_rx_2048b_4095b),
+	IONIC_PORT_STAT_DESC(frames_rx_4096b_8191b),
+	IONIC_PORT_STAT_DESC(frames_rx_8192b_9215b),
+	IONIC_PORT_STAT_DESC(frames_rx_other),
+	IONIC_PORT_STAT_DESC(frames_tx_ok),
+	IONIC_PORT_STAT_DESC(frames_tx_all),
+	IONIC_PORT_STAT_DESC(frames_tx_bad),
+	IONIC_PORT_STAT_DESC(octets_tx_ok),
+	IONIC_PORT_STAT_DESC(octets_tx_total),
+	IONIC_PORT_STAT_DESC(frames_tx_unicast),
+	IONIC_PORT_STAT_DESC(frames_tx_multicast),
+	IONIC_PORT_STAT_DESC(frames_tx_broadcast),
+	IONIC_PORT_STAT_DESC(frames_tx_pause),
+	IONIC_PORT_STAT_DESC(frames_tx_pripause),
+	IONIC_PORT_STAT_DESC(frames_tx_vlan),
+	IONIC_PORT_STAT_DESC(frames_tx_less_than_64b),
+	IONIC_PORT_STAT_DESC(frames_tx_64b),
+	IONIC_PORT_STAT_DESC(frames_tx_65b_127b),
+	IONIC_PORT_STAT_DESC(frames_tx_128b_255b),
+	IONIC_PORT_STAT_DESC(frames_tx_256b_511b),
+	IONIC_PORT_STAT_DESC(frames_tx_512b_1023b),
+	IONIC_PORT_STAT_DESC(frames_tx_1024b_1518b),
+	IONIC_PORT_STAT_DESC(frames_tx_1519b_2047b),
+	IONIC_PORT_STAT_DESC(frames_tx_2048b_4095b),
+	IONIC_PORT_STAT_DESC(frames_tx_4096b_8191b),
+	IONIC_PORT_STAT_DESC(frames_tx_8192b_9215b),
+	IONIC_PORT_STAT_DESC(frames_tx_other),
+	IONIC_PORT_STAT_DESC(frames_tx_pri_0),
+	IONIC_PORT_STAT_DESC(frames_tx_pri_1),
+	IONIC_PORT_STAT_DESC(frames_tx_pri_2),
+	IONIC_PORT_STAT_DESC(frames_tx_pri_3),
+	IONIC_PORT_STAT_DESC(frames_tx_pri_4),
+	IONIC_PORT_STAT_DESC(frames_tx_pri_5),
+	IONIC_PORT_STAT_DESC(frames_tx_pri_6),
+	IONIC_PORT_STAT_DESC(frames_tx_pri_7),
+	IONIC_PORT_STAT_DESC(frames_rx_pri_0),
+	IONIC_PORT_STAT_DESC(frames_rx_pri_1),
+	IONIC_PORT_STAT_DESC(frames_rx_pri_2),
+	IONIC_PORT_STAT_DESC(frames_rx_pri_3),
+	IONIC_PORT_STAT_DESC(frames_rx_pri_4),
+	IONIC_PORT_STAT_DESC(frames_rx_pri_5),
+	IONIC_PORT_STAT_DESC(frames_rx_pri_6),
+	IONIC_PORT_STAT_DESC(frames_rx_pri_7),
+	IONIC_PORT_STAT_DESC(tx_pripause_0_1us_count),
+	IONIC_PORT_STAT_DESC(tx_pripause_1_1us_count),
+	IONIC_PORT_STAT_DESC(tx_pripause_2_1us_count),
+	IONIC_PORT_STAT_DESC(tx_pripause_3_1us_count),
+	IONIC_PORT_STAT_DESC(tx_pripause_4_1us_count),
+	IONIC_PORT_STAT_DESC(tx_pripause_5_1us_count),
+	IONIC_PORT_STAT_DESC(tx_pripause_6_1us_count),
+	IONIC_PORT_STAT_DESC(tx_pripause_7_1us_count),
+	IONIC_PORT_STAT_DESC(rx_pripause_0_1us_count),
+	IONIC_PORT_STAT_DESC(rx_pripause_1_1us_count),
+	IONIC_PORT_STAT_DESC(rx_pripause_2_1us_count),
+	IONIC_PORT_STAT_DESC(rx_pripause_3_1us_count),
+	IONIC_PORT_STAT_DESC(rx_pripause_4_1us_count),
+	IONIC_PORT_STAT_DESC(rx_pripause_5_1us_count),
+	IONIC_PORT_STAT_DESC(rx_pripause_6_1us_count),
+	IONIC_PORT_STAT_DESC(rx_pripause_7_1us_count),
+	IONIC_PORT_STAT_DESC(rx_pause_1us_count),
+	IONIC_PORT_STAT_DESC(frames_tx_truncated),
 };
 
 static const struct ionic_stat_desc ionic_tx_stats_desc[] = {
@@ -29,6 +127,11 @@ static const struct ionic_stat_desc ionic_tx_stats_desc[] = {
 	IONIC_TX_STAT_DESC(dma_map_err),
 	IONIC_TX_STAT_DESC(linearize),
 	IONIC_TX_STAT_DESC(frags),
+	IONIC_TX_STAT_DESC(tso),
+	IONIC_TX_STAT_DESC(tso_bytes),
+	IONIC_TX_STAT_DESC(csum_none),
+	IONIC_TX_STAT_DESC(csum),
+	IONIC_TX_STAT_DESC(vlan_inserted),
 };
 
 static const struct ionic_stat_desc ionic_rx_stats_desc[] = {
@@ -40,6 +143,7 @@ static const struct ionic_stat_desc ionic_rx_stats_desc[] = {
 	IONIC_RX_STAT_DESC(csum_complete),
 	IONIC_RX_STAT_DESC(csum_error),
 	IONIC_RX_STAT_DESC(dropped),
+	IONIC_RX_STAT_DESC(vlan_stripped),
 };
 
 static const struct ionic_stat_desc ionic_txq_stats_desc[] = {
@@ -62,6 +166,7 @@ static const struct ionic_stat_desc ionic_dbg_napi_stats_desc[] = {
 };
 
 #define IONIC_NUM_LIF_STATS ARRAY_SIZE(ionic_lif_stats_desc)
+#define IONIC_NUM_PORT_STATS ARRAY_SIZE(ionic_port_stats_desc)
 #define IONIC_NUM_TX_STATS ARRAY_SIZE(ionic_tx_stats_desc)
 #define IONIC_NUM_RX_STATS ARRAY_SIZE(ionic_rx_stats_desc)
 #define IONIC_NUM_TX_Q_STATS ARRAY_SIZE(ionic_txq_stats_desc)
@@ -76,6 +181,7 @@ static void ionic_get_lif_stats(struct ionic_lif *lif,
 {
 	struct ionic_tx_stats *tstats;
 	struct ionic_rx_stats *rstats;
+	struct rtnl_link_stats64 ns;
 	struct ionic_qcq *txqcq;
 	struct ionic_qcq *rxqcq;
 	int q_num;
@@ -89,7 +195,8 @@ static void ionic_get_lif_stats(struct ionic_lif *lif,
 			stats->tx_packets += tstats->pkts;
 			stats->tx_bytes += tstats->bytes;
 			stats->tx_tso += tstats->tso;
-			stats->tx_no_csum += tstats->no_csum;
+			stats->tx_tso_bytes += tstats->tso_bytes;
+			stats->tx_csum_none += tstats->csum_none;
 			stats->tx_csum += tstats->csum;
 		}
 
@@ -103,6 +210,13 @@ static void ionic_get_lif_stats(struct ionic_lif *lif,
 			stats->rx_csum_error += rstats->csum_error;
 		}
 	}
+
+	ionic_get_stats64(lif->netdev, &ns);
+	stats->hw_tx_dropped = ns.tx_dropped;
+	stats->hw_rx_dropped = ns.rx_dropped;
+	stats->hw_rx_over_errors = ns.rx_over_errors;
+	stats->hw_rx_missed_errors = ns.rx_missed_errors;
+	stats->hw_tx_aborted_errors = ns.tx_aborted_errors;
 }
 
 static u64 ionic_sw_stats_get_count(struct ionic_lif *lif)
@@ -118,6 +232,9 @@ static u64 ionic_sw_stats_get_count(struct ionic_lif *lif)
 	/* rx stats */
 	total += MAX_Q(lif) * IONIC_NUM_RX_STATS;
 
+	/* port stats */
+	total += IONIC_NUM_PORT_STATS;
+
 	if (test_bit(IONIC_LIF_F_UP, lif->state) &&
 	    test_bit(IONIC_LIF_F_SW_DEBUG_STATS, lif->state)) {
 		/* tx debug stats */
@@ -144,6 +261,13 @@ static void ionic_sw_stats_get_strings(struct ionic_lif *lif, u8 **buf)
 		snprintf(*buf, ETH_GSTRING_LEN, ionic_lif_stats_desc[i].name);
 		*buf += ETH_GSTRING_LEN;
 	}
+
+	for (i = 0; i < IONIC_NUM_PORT_STATS; i++) {
+		snprintf(*buf, ETH_GSTRING_LEN,
+			 ionic_port_stats_desc[i].name);
+		*buf += ETH_GSTRING_LEN;
+	}
+
 	for (q_num = 0; q_num < MAX_Q(lif); q_num++) {
 		for (i = 0; i < IONIC_NUM_TX_STATS; i++) {
 			snprintf(*buf, ETH_GSTRING_LEN, "tx_%d_%s",
@@ -225,6 +349,7 @@ static void ionic_sw_stats_get_strings(struct ionic_lif *lif, u8 **buf)
 
 static void ionic_sw_stats_get_values(struct ionic_lif *lif, u64 **buf)
 {
+	struct ionic_port_stats *port_stats;
 	struct ionic_lif_sw_stats lif_stats;
 	struct ionic_qcq *txqcq, *rxqcq;
 	struct ionic_tx_stats *txstats;
@@ -238,6 +363,13 @@ static void ionic_sw_stats_get_values(struct ionic_lif *lif, u64 **buf)
 		(*buf)++;
 	}
 
+	port_stats = &lif->ionic->idev.port_info->stats;
+	for (i = 0; i < IONIC_NUM_PORT_STATS; i++) {
+		**buf = IONIC_READ_STAT_LE64(port_stats,
+					     &ionic_port_stats_desc[i]);
+		(*buf)++;
+	}
+
 	for (q_num = 0; q_num < MAX_Q(lif); q_num++) {
 		txstats = &lif_to_txstats(lif, q_num);
 
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_stats.h b/drivers/net/ethernet/pensando/ionic/ionic_stats.h
index d2c1122a2c6e..3f543512616e 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_stats.h
+++ b/drivers/net/ethernet/pensando/ionic/ionic_stats.h
@@ -11,6 +11,9 @@
 	.offset = IONIC_STAT_TO_OFFSET(type, stat_name) \
 }
 
+#define IONIC_PORT_STAT_DESC(stat_name) \
+	IONIC_STAT_DESC(struct ionic_port_stats, stat_name)
+
 #define IONIC_LIF_STAT_DESC(stat_name) \
 	IONIC_STAT_DESC(struct ionic_lif_sw_stats, stat_name)
 
@@ -45,6 +48,9 @@ extern const int ionic_num_stats_grps;
 #define IONIC_READ_STAT64(base_ptr, desc_ptr) \
 	(*((u64 *)(((u8 *)(base_ptr)) + (desc_ptr)->offset)))
 
+#define IONIC_READ_STAT_LE64(base_ptr, desc_ptr) \
+	__le64_to_cpu(*((u64 *)(((u8 *)(base_ptr)) + (desc_ptr)->offset)))
+
 struct ionic_stat_desc {
 	char name[ETH_GSTRING_LEN];
 	u64 offset;
diff --git a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
index 6b14e55a6780..b7f900c11834 100644
--- a/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
+++ b/drivers/net/ethernet/pensando/ionic/ionic_txrx.c
@@ -214,10 +214,11 @@ static void ionic_rx_clean(struct ionic_queue *q,
 		     (comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_IP_BAD)))
 		stats->csum_error++;
 
-	if (likely(netdev->features & NETIF_F_HW_VLAN_CTAG_RX)) {
-		if (comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_VLAN)
-			__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
-					       le16_to_cpu(comp->vlan_tci));
+	if (likely(netdev->features & NETIF_F_HW_VLAN_CTAG_RX) &&
+	    (comp->csum_flags & IONIC_RXQ_COMP_CSUM_F_VLAN)) {
+		__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q),
+				       le16_to_cpu(comp->vlan_tci));
+		stats->vlan_stripped++;
 	}
 
 	if (le16_to_cpu(comp->len) <= q->lif->rx_copybreak)
@@ -860,6 +861,7 @@ static int ionic_tx_tso(struct ionic_queue *q, struct sk_buff *skb)
 	stats->pkts += total_pkts;
 	stats->bytes += total_bytes;
 	stats->tso++;
+	stats->tso_bytes += total_bytes;
 
 	return 0;
 
@@ -898,9 +900,12 @@ static int ionic_tx_calc_csum(struct ionic_queue *q, struct sk_buff *skb)
 				  flags, skb_shinfo(skb)->nr_frags, dma_addr);
 	desc->cmd = cpu_to_le64(cmd);
 	desc->len = cpu_to_le16(skb_headlen(skb));
-	desc->vlan_tci = cpu_to_le16(skb_vlan_tag_get(skb));
 	desc->csum_start = cpu_to_le16(skb_checksum_start_offset(skb));
 	desc->csum_offset = cpu_to_le16(skb->csum_offset);
+	if (has_vlan) {
+		desc->vlan_tci = cpu_to_le16(skb_vlan_tag_get(skb));
+		stats->vlan_inserted++;
+	}
 
 	if (skb->csum_not_inet)
 		stats->crc32_csum++;
@@ -935,9 +940,12 @@ static int ionic_tx_calc_no_csum(struct ionic_queue *q, struct sk_buff *skb)
 				  flags, skb_shinfo(skb)->nr_frags, dma_addr);
 	desc->cmd = cpu_to_le64(cmd);
 	desc->len = cpu_to_le16(skb_headlen(skb));
-	desc->vlan_tci = cpu_to_le16(skb_vlan_tag_get(skb));
+	if (has_vlan) {
+		desc->vlan_tci = cpu_to_le16(skb_vlan_tag_get(skb));
+		stats->vlan_inserted++;
+	}
 
-	stats->no_csum++;
+	stats->csum_none++;
 
 	return 0;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 7c7b58ecd604ce599311f6f6abbf43804f263384 Mon Sep 17 00:00:00 2001
From: Shannon Nelson <snelson@pensando.io>
Date: Mon, 11 May 2020 17:59:36 -0700
Subject: ionic: update doc files

Update the basic doc file with some configuration hints and a
little bit of stats information.

Signed-off-by: Shannon Nelson <snelson@pensando.io>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../networking/device_drivers/pensando/ionic.rst   | 231 ++++++++++++++++++++-
 1 file changed, 230 insertions(+), 1 deletion(-)

diff --git a/Documentation/networking/device_drivers/pensando/ionic.rst b/Documentation/networking/device_drivers/pensando/ionic.rst
index c17d680cf334..0eabbc347d6c 100644
--- a/Documentation/networking/device_drivers/pensando/ionic.rst
+++ b/Documentation/networking/device_drivers/pensando/ionic.rst
@@ -11,6 +11,9 @@ Contents
 ========
 
 - Identifying the Adapter
+- Enabling the driver
+- Configuring the driver
+- Statistics
 - Support
 
 Identifying the Adapter
@@ -28,12 +31,238 @@ and configure them for use.  There should be log entries in the kernel
 messages such as these::
 
   $ dmesg | grep ionic
-  ionic Pensando Ethernet NIC Driver, ver 0.15.0-k
+  ionic 0000:b5:00.0: 126.016 Gb/s available PCIe bandwidth (8.0 GT/s PCIe x16 link)
   ionic 0000:b5:00.0 enp181s0: renamed from eth0
+  ionic 0000:b5:00.0 enp181s0: Link up - 100 Gbps
+  ionic 0000:b6:00.0: 126.016 Gb/s available PCIe bandwidth (8.0 GT/s PCIe x16 link)
   ionic 0000:b6:00.0 enp182s0: renamed from eth0
+  ionic 0000:b6:00.0 enp182s0: Link up - 100 Gbps
+
+Driver and firmware version information can be gathered with either of
+ethtool or devlink tools::
+
+  $ ethtool -i enp181s0
+  driver: ionic
+  version: 5.7.0
+  firmware-version: 1.8.0-28
+  ...
+
+  $ devlink dev info pci/0000:b5:00.0
+  pci/0000:b5:00.0:
+    driver ionic
+    serial_number FLM18420073
+    versions:
+        fixed:
+          asic.id 0x0
+          asic.rev 0x0
+        running:
+          fw 1.8.0-28
+
+See Documentation/networking/devlink/ionic.rst for more information
+on the devlink dev info data.
+
+Enabling the driver
+===================
+
+The driver is enabled via the standard kernel configuration system,
+using the make command::
+
+  make oldconfig/menuconfig/etc.
+
+The driver is located in the menu structure at:
+
+  -> Device Drivers
+    -> Network device support (NETDEVICES [=y])
+      -> Ethernet driver support
+        -> Pensando devices
+          -> Pensando Ethernet IONIC Support
+
+Configuring the Driver
+======================
+
+MTU
+---
+
+Jumbo frame support is available with a maximim size of 9194 bytes.
+
+Interrupt coalescing
+--------------------
+
+Interrupt coalescing can be configured by changing the rx-usecs value with
+the "ethtool -C" command.  The rx-usecs range is 0-190.  The tx-usecs value
+reflects the rx-usecs value as they are tied together on the same interrupt.
+
+SR-IOV
+------
+
+Minimal SR-IOV support is currently offered and can be enabled by setting
+the sysfs 'sriov_numvfs' value, if supported by your particular firmware
+configuration.
+
+Statistics
+==========
+
+Basic hardware stats
+--------------------
+
+The commands ``netstat -i``, ``ip -s link show``, and ``ifconfig`` show
+a limited set of statistics taken directly from firmware.  For example::
+
+  $ ip -s link show enp181s0
+  7: enp181s0: <BROADCAST,MULTICAST,UP,LOWER_UP> mtu 1500 qdisc mq state UP mode DEFAULT group default qlen 1000
+      link/ether 00:ae:cd:00:07:68 brd ff:ff:ff:ff:ff:ff
+      RX: bytes  packets  errors  dropped overrun mcast
+      414        5        0       0       0       0
+      TX: bytes  packets  errors  dropped carrier collsns
+      1384       18       0       0       0       0
+
+ethtool -S
+----------
+
+The statistics shown from the ``ethtool -S`` command includes a combination of
+driver counters and firmware counters, including port and queue specific values.
+The driver values are counters computed by the driver, and the firmware values
+are gathered by the firmware from the port hardware and passed through the
+driver with no further interpretation.
+
+Driver port specific::
+
+     tx_packets: 12
+     tx_bytes: 964
+     rx_packets: 5
+     rx_bytes: 414
+     tx_tso: 0
+     tx_tso_bytes: 0
+     tx_csum_none: 12
+     tx_csum: 0
+     rx_csum_none: 0
+     rx_csum_complete: 3
+     rx_csum_error: 0
+
+Driver queue specific::
+
+     tx_0_pkts: 3
+     tx_0_bytes: 294
+     tx_0_clean: 3
+     tx_0_dma_map_err: 0
+     tx_0_linearize: 0
+     tx_0_frags: 0
+     tx_0_tso: 0
+     tx_0_tso_bytes: 0
+     tx_0_csum_none: 3
+     tx_0_csum: 0
+     tx_0_vlan_inserted: 0
+     rx_0_pkts: 2
+     rx_0_bytes: 120
+     rx_0_dma_map_err: 0
+     rx_0_alloc_err: 0
+     rx_0_csum_none: 0
+     rx_0_csum_complete: 0
+     rx_0_csum_error: 0
+     rx_0_dropped: 0
+     rx_0_vlan_stripped: 0
+
+Firmware port specific::
+
+     hw_tx_dropped: 0
+     hw_rx_dropped: 0
+     hw_rx_over_errors: 0
+     hw_rx_missed_errors: 0
+     hw_tx_aborted_errors: 0
+     frames_rx_ok: 15
+     frames_rx_all: 15
+     frames_rx_bad_fcs: 0
+     frames_rx_bad_all: 0
+     octets_rx_ok: 1290
+     octets_rx_all: 1290
+     frames_rx_unicast: 10
+     frames_rx_multicast: 5
+     frames_rx_broadcast: 0
+     frames_rx_pause: 0
+     frames_rx_bad_length: 0
+     frames_rx_undersized: 0
+     frames_rx_oversized: 0
+     frames_rx_fragments: 0
+     frames_rx_jabber: 0
+     frames_rx_pripause: 0
+     frames_rx_stomped_crc: 0
+     frames_rx_too_long: 0
+     frames_rx_vlan_good: 3
+     frames_rx_dropped: 0
+     frames_rx_less_than_64b: 0
+     frames_rx_64b: 4
+     frames_rx_65b_127b: 11
+     frames_rx_128b_255b: 0
+     frames_rx_256b_511b: 0
+     frames_rx_512b_1023b: 0
+     frames_rx_1024b_1518b: 0
+     frames_rx_1519b_2047b: 0
+     frames_rx_2048b_4095b: 0
+     frames_rx_4096b_8191b: 0
+     frames_rx_8192b_9215b: 0
+     frames_rx_other: 0
+     frames_tx_ok: 31
+     frames_tx_all: 31
+     frames_tx_bad: 0
+     octets_tx_ok: 2614
+     octets_tx_total: 2614
+     frames_tx_unicast: 8
+     frames_tx_multicast: 21
+     frames_tx_broadcast: 2
+     frames_tx_pause: 0
+     frames_tx_pripause: 0
+     frames_tx_vlan: 0
+     frames_tx_less_than_64b: 0
+     frames_tx_64b: 4
+     frames_tx_65b_127b: 27
+     frames_tx_128b_255b: 0
+     frames_tx_256b_511b: 0
+     frames_tx_512b_1023b: 0
+     frames_tx_1024b_1518b: 0
+     frames_tx_1519b_2047b: 0
+     frames_tx_2048b_4095b: 0
+     frames_tx_4096b_8191b: 0
+     frames_tx_8192b_9215b: 0
+     frames_tx_other: 0
+     frames_tx_pri_0: 0
+     frames_tx_pri_1: 0
+     frames_tx_pri_2: 0
+     frames_tx_pri_3: 0
+     frames_tx_pri_4: 0
+     frames_tx_pri_5: 0
+     frames_tx_pri_6: 0
+     frames_tx_pri_7: 0
+     frames_rx_pri_0: 0
+     frames_rx_pri_1: 0
+     frames_rx_pri_2: 0
+     frames_rx_pri_3: 0
+     frames_rx_pri_4: 0
+     frames_rx_pri_5: 0
+     frames_rx_pri_6: 0
+     frames_rx_pri_7: 0
+     tx_pripause_0_1us_count: 0
+     tx_pripause_1_1us_count: 0
+     tx_pripause_2_1us_count: 0
+     tx_pripause_3_1us_count: 0
+     tx_pripause_4_1us_count: 0
+     tx_pripause_5_1us_count: 0
+     tx_pripause_6_1us_count: 0
+     tx_pripause_7_1us_count: 0
+     rx_pripause_0_1us_count: 0
+     rx_pripause_1_1us_count: 0
+     rx_pripause_2_1us_count: 0
+     rx_pripause_3_1us_count: 0
+     rx_pripause_4_1us_count: 0
+     rx_pripause_5_1us_count: 0
+     rx_pripause_6_1us_count: 0
+     rx_pripause_7_1us_count: 0
+     rx_pause_1us_count: 0
+     frames_tx_truncated: 0
+
 
 Support
 =======
+
 For general Linux networking support, please use the netdev mailing
 list, which is monitored by Pensando personnel::
 
-- 
cgit v1.2.3-59-g8ed1b


From 966a5c08af1b1399fe1014f24877578e8493ffe1 Mon Sep 17 00:00:00 2001
From: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Date: Tue, 12 May 2020 16:26:50 +0900
Subject: dt-bindings: net: Convert UniPhier AVE4 controller to json-schema

Convert the UniPhier AVE4 controller binding to DT schema format.

Signed-off-by: Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
Reviewed-by: Rob Herring <robh@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../bindings/net/socionext,uniphier-ave4.txt       |  64 ------------
 .../bindings/net/socionext,uniphier-ave4.yaml      | 111 +++++++++++++++++++++
 MAINTAINERS                                        |   2 +-
 3 files changed, 112 insertions(+), 65 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/net/socionext,uniphier-ave4.txt
 create mode 100644 Documentation/devicetree/bindings/net/socionext,uniphier-ave4.yaml

diff --git a/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.txt b/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.txt
deleted file mode 100644
index 4e85fc495e87..000000000000
--- a/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.txt
+++ /dev/null
@@ -1,64 +0,0 @@
-* Socionext AVE ethernet controller
-
-This describes the devicetree bindings for AVE ethernet controller
-implemented on Socionext UniPhier SoCs.
-
-Required properties:
- - compatible: Should be
-	- "socionext,uniphier-pro4-ave4" : for Pro4 SoC
-	- "socionext,uniphier-pxs2-ave4" : for PXs2 SoC
-	- "socionext,uniphier-ld11-ave4" : for LD11 SoC
-	- "socionext,uniphier-ld20-ave4" : for LD20 SoC
-	- "socionext,uniphier-pxs3-ave4" : for PXs3 SoC
- - reg: Address where registers are mapped and size of region.
- - interrupts: Should contain the MAC interrupt.
- - phy-mode: See ethernet.txt in the same directory. Allow to choose
-	"rgmii", "rmii", "mii", or "internal" according to the PHY.
-	The acceptable mode is SoC-dependent.
- - phy-handle: Should point to the external phy device.
-	See ethernet.txt file in the same directory.
- - clocks: A phandle to the clock for the MAC.
-	For Pro4 SoC, that is "socionext,uniphier-pro4-ave4",
-	another MAC clock, GIO bus clock and PHY clock are also required.
- - clock-names: Should contain
-	- "ether", "ether-gb", "gio", "ether-phy" for Pro4 SoC
-	- "ether" for others
- - resets: A phandle to the reset control for the MAC. For Pro4 SoC,
-	GIO bus reset is also required.
- - reset-names: Should contain
-	- "ether", "gio" for Pro4 SoC
-	- "ether" for others
- - socionext,syscon-phy-mode: A phandle to syscon with one argument
-	that configures phy mode. The argument is the ID of MAC instance.
-
-The MAC address will be determined using the optional properties
-defined in ethernet.txt.
-
-Required subnode:
- - mdio: A container for child nodes representing phy nodes.
-         See phy.txt in the same directory.
-
-Example:
-
-	ether: ethernet@65000000 {
-		compatible = "socionext,uniphier-ld20-ave4";
-		reg = <0x65000000 0x8500>;
-		interrupts = <0 66 4>;
-		phy-mode = "rgmii";
-		phy-handle = <&ethphy>;
-		clock-names = "ether";
-		clocks = <&sys_clk 6>;
-		reset-names = "ether";
-		resets = <&sys_rst 6>;
-		socionext,syscon-phy-mode = <&soc_glue 0>;
-		local-mac-address = [00 00 00 00 00 00];
-
-		mdio {
-			#address-cells = <1>;
-			#size-cells = <0>;
-
-			ethphy: ethphy@1 {
-				reg = <1>;
-			};
-		};
-	};
diff --git a/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.yaml b/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.yaml
new file mode 100644
index 000000000000..7d84a863b9b9
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/socionext,uniphier-ave4.yaml
@@ -0,0 +1,111 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/socionext,uniphier-ave4.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: Socionext AVE ethernet controller
+
+maintainers:
+  - Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
+
+description: |
+  This describes the devicetree bindings for AVE ethernet controller
+  implemented on Socionext UniPhier SoCs.
+
+allOf:
+  - $ref: ethernet-controller.yaml#
+
+properties:
+  compatible:
+    enum:
+      - socionext,uniphier-pro4-ave4
+      - socionext,uniphier-pxs2-ave4
+      - socionext,uniphier-ld11-ave4
+      - socionext,uniphier-ld20-ave4
+      - socionext,uniphier-pxs3-ave4
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  phy-mode: true
+
+  phy-handle: true
+
+  mac-address: true
+
+  local-mac-address: true
+
+  clocks:
+    minItems: 1
+    maxItems: 4
+
+  clock-names:
+    oneOf:
+      - items:          # for Pro4
+        - const: gio
+        - const: ether
+        - const: ether-gb
+        - const: ether-phy
+      - const: ether    # for others
+
+  resets:
+    minItems: 1
+    maxItems: 2
+
+  reset-names:
+    oneOf:
+      - items:          # for Pro4
+        - const: gio
+        - const: ether
+      - const: ether    # for others
+
+  socionext,syscon-phy-mode:
+    $ref: /schemas/types.yaml#definitions/phandle-array
+    description:
+      A phandle to syscon with one argument that configures phy mode.
+      The argument is the ID of MAC instance.
+
+  mdio:
+    $ref: mdio.yaml#
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - phy-mode
+  - phy-handle
+  - clocks
+  - clock-names
+  - resets
+  - reset-names
+  - mdio
+
+additionalProperties: false
+
+examples:
+  - |
+    ether: ethernet@65000000 {
+        compatible = "socionext,uniphier-ld20-ave4";
+                reg = <0x65000000 0x8500>;
+                interrupts = <0 66 4>;
+                phy-mode = "rgmii";
+                phy-handle = <&ethphy>;
+                clock-names = "ether";
+                clocks = <&sys_clk 6>;
+                reset-names = "ether";
+                resets = <&sys_rst 6>;
+                socionext,syscon-phy-mode = <&soc_glue 0>;
+
+                mdio {
+                        #address-cells = <1>;
+                        #size-cells = <0>;
+
+                        ethphy: ethernet-phy@1 {
+                                reg = <1>;
+                        };
+                };
+        };
diff --git a/MAINTAINERS b/MAINTAINERS
index e581ae499057..734cccf1d1e5 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -15542,7 +15542,7 @@ SOCIONEXT (SNI) AVE NETWORK DRIVER
 M:	Kunihiko Hayashi <hayashi.kunihiko@socionext.com>
 L:	netdev@vger.kernel.org
 S:	Maintained
-F:	Documentation/devicetree/bindings/net/socionext,uniphier-ave4.txt
+F:	Documentation/devicetree/bindings/net/socionext,uniphier-ave4.yaml
 F:	drivers/net/ethernet/socionext/sni_ave.c
 
 SOCIONEXT (SNI) NETSEC NETWORK DRIVER
-- 
cgit v1.2.3-59-g8ed1b


From 527c1e615b4c6616abb5c7b14c2ff5b04a029175 Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Tue, 12 May 2020 14:24:34 +0100
Subject: sfc: actually wire up siena_check_caps()

Assign it to siena_a0_nic_type.check_caps function pointer.

Fixes: be904b855200 ("sfc: make capability checking a nic_type function")
Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/siena.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c
index ed1cb6caa69d..d8b052979b1b 100644
--- a/drivers/net/ethernet/sfc/siena.c
+++ b/drivers/net/ethernet/sfc/siena.c
@@ -1093,4 +1093,5 @@ const struct efx_nic_type siena_a0_nic_type = {
 			     1 << HWTSTAMP_FILTER_PTP_V1_L4_EVENT |
 			     1 << HWTSTAMP_FILTER_PTP_V2_L4_EVENT),
 	.rx_hash_key_size = 16,
+	.check_caps = siena_check_caps,
 };
-- 
cgit v1.2.3-59-g8ed1b


From 1b0cde4091877cd7fe4b29f67645cc391b86c9ca Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Tue, 12 May 2020 14:24:58 +0100
Subject: sfc: siena_check_caps() can be static

Reported-by: Jakub Kicinski <kuba@kernel.org>
Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/siena.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/sfc/siena.c b/drivers/net/ethernet/sfc/siena.c
index d8b052979b1b..891e9fb6abec 100644
--- a/drivers/net/ethernet/sfc/siena.c
+++ b/drivers/net/ethernet/sfc/siena.c
@@ -948,8 +948,8 @@ fail:
 
 #endif /* CONFIG_SFC_MTD */
 
-unsigned int siena_check_caps(const struct efx_nic *efx,
-			      u8 flag, u32 offset)
+static unsigned int siena_check_caps(const struct efx_nic *efx,
+				     u8 flag, u32 offset)
 {
 	/* Siena did not support MC_CMD_GET_CAPABILITIES */
 	return 0;
-- 
cgit v1.2.3-59-g8ed1b


From 54a0ed0df49609f4e3f098f8943e38e389dc2e15 Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Tue, 12 May 2020 20:20:25 +0300
Subject: net: dsa: provide an option for drivers to always receive bridge
 VLANs

DSA assumes that a bridge which has vlan filtering disabled is not
vlan aware, and ignores all vlan configuration. However, the kernel
software bridge code allows configuration in this state.

This causes the kernel's idea of the bridge vlan state and the
hardware state to disagree, so "bridge vlan show" indicates a correct
configuration but the hardware lacks all configuration. Even worse,
enabling vlan filtering on a DSA bridge immediately blocks all traffic
which, given the output of "bridge vlan show", is very confusing.

Provide an option that drivers can set to indicate they want to receive
vlan configuration even when vlan filtering is disabled. At the very
least, this is safe for Marvell DSA bridges, which do not look up
ingress traffic in the VTU if the port is in 8021Q disabled state. It is
also safe for the Ocelot switch family. Whether this change is suitable
for all DSA bridges is not known.

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/dsa.h  |  7 +++++++
 net/dsa/dsa_priv.h |  1 +
 net/dsa/port.c     | 14 ++++++++++++++
 net/dsa/slave.c    |  8 ++++----
 4 files changed, 26 insertions(+), 4 deletions(-)

diff --git a/include/net/dsa.h b/include/net/dsa.h
index 312c2f067e65..50389772c597 100644
--- a/include/net/dsa.h
+++ b/include/net/dsa.h
@@ -282,6 +282,13 @@ struct dsa_switch {
 	 */
 	bool			vlan_filtering_is_global;
 
+	/* Pass .port_vlan_add and .port_vlan_del to drivers even for bridges
+	 * that have vlan_filtering=0. All drivers should ideally set this (and
+	 * then the option would get removed), but it is unknown whether this
+	 * would break things or not.
+	 */
+	bool			configure_vlan_while_not_filtering;
+
 	/* In case vlan_filtering_is_global is set, the VLAN awareness state
 	 * should be retrieved from here and not from the per-port settings.
 	 */
diff --git a/net/dsa/dsa_priv.h b/net/dsa/dsa_priv.h
index a1a0ae242012..adecf73bd608 100644
--- a/net/dsa/dsa_priv.h
+++ b/net/dsa/dsa_priv.h
@@ -138,6 +138,7 @@ int dsa_port_bridge_join(struct dsa_port *dp, struct net_device *br);
 void dsa_port_bridge_leave(struct dsa_port *dp, struct net_device *br);
 int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
 			    struct switchdev_trans *trans);
+bool dsa_port_skip_vlan_configuration(struct dsa_port *dp);
 int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock,
 			 struct switchdev_trans *trans);
 int dsa_port_mtu_change(struct dsa_port *dp, int new_mtu,
diff --git a/net/dsa/port.c b/net/dsa/port.c
index ebc8d6cbd1d4..e23ece229c7e 100644
--- a/net/dsa/port.c
+++ b/net/dsa/port.c
@@ -257,6 +257,20 @@ int dsa_port_vlan_filtering(struct dsa_port *dp, bool vlan_filtering,
 	return 0;
 }
 
+/* This enforces legacy behavior for switch drivers which assume they can't
+ * receive VLAN configuration when enslaved to a bridge with vlan_filtering=0
+ */
+bool dsa_port_skip_vlan_configuration(struct dsa_port *dp)
+{
+	struct dsa_switch *ds = dp->ds;
+
+	if (!dp->bridge_dev)
+		return false;
+
+	return (!ds->configure_vlan_while_not_filtering &&
+		!br_vlan_enabled(dp->bridge_dev));
+}
+
 int dsa_port_ageing_time(struct dsa_port *dp, clock_t ageing_clock,
 			 struct switchdev_trans *trans)
 {
diff --git a/net/dsa/slave.c b/net/dsa/slave.c
index 61b0de52040a..886490fb203d 100644
--- a/net/dsa/slave.c
+++ b/net/dsa/slave.c
@@ -314,7 +314,7 @@ static int dsa_slave_vlan_add(struct net_device *dev,
 	if (obj->orig_dev != dev)
 		return -EOPNOTSUPP;
 
-	if (dp->bridge_dev && !br_vlan_enabled(dp->bridge_dev))
+	if (dsa_port_skip_vlan_configuration(dp))
 		return 0;
 
 	vlan = *SWITCHDEV_OBJ_PORT_VLAN(obj);
@@ -381,7 +381,7 @@ static int dsa_slave_vlan_del(struct net_device *dev,
 	if (obj->orig_dev != dev)
 		return -EOPNOTSUPP;
 
-	if (dp->bridge_dev && !br_vlan_enabled(dp->bridge_dev))
+	if (dsa_port_skip_vlan_configuration(dp))
 		return 0;
 
 	/* Do not deprogram the CPU port as it may be shared with other user
@@ -1240,7 +1240,7 @@ static int dsa_slave_vlan_rx_add_vid(struct net_device *dev, __be16 proto,
 	 * need to emulate the switchdev prepare + commit phase.
 	 */
 	if (dp->bridge_dev) {
-		if (!br_vlan_enabled(dp->bridge_dev))
+		if (dsa_port_skip_vlan_configuration(dp))
 			return 0;
 
 		/* br_vlan_get_info() returns -EINVAL or -ENOENT if the
@@ -1274,7 +1274,7 @@ static int dsa_slave_vlan_rx_kill_vid(struct net_device *dev, __be16 proto,
 	 * need to emulate the switchdev prepare + commit phase.
 	 */
 	if (dp->bridge_dev) {
-		if (!br_vlan_enabled(dp->bridge_dev))
+		if (dsa_port_skip_vlan_configuration(dp))
 			return 0;
 
 		/* br_vlan_get_info() returns -EINVAL or -ENOENT if the
-- 
cgit v1.2.3-59-g8ed1b


From 1f66b0f0aec671f8fbc86d75b2efdf7c7e0f7880 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 12 May 2020 20:20:26 +0300
Subject: net: dsa: tag_8021q: introduce a vid_is_dsa_8021q helper

This function returns a boolean denoting whether the VLAN passed as
argument is part of the 1024-3071 range that the dsa_8021q tagging
scheme uses.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dsa/8021q.h | 7 +++++++
 net/dsa/tag_8021q.c       | 7 +++++++
 2 files changed, 14 insertions(+)

diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h
index b8daaec0896e..ebc245ff838a 100644
--- a/include/linux/dsa/8021q.h
+++ b/include/linux/dsa/8021q.h
@@ -50,6 +50,8 @@ int dsa_8021q_rx_switch_id(u16 vid);
 
 int dsa_8021q_rx_source_port(u16 vid);
 
+bool vid_is_dsa_8021q(u16 vid);
+
 #else
 
 int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int index,
@@ -107,6 +109,11 @@ int dsa_8021q_rx_source_port(u16 vid)
 	return 0;
 }
 
+bool vid_is_dsa_8021q(u16 vid)
+{
+	return false;
+}
+
 #endif /* IS_ENABLED(CONFIG_NET_DSA_TAG_8021Q) */
 
 #endif /* _NET_DSA_8021Q_H */
diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c
index ff9c5bf64bda..4774ecd1f8fc 100644
--- a/net/dsa/tag_8021q.c
+++ b/net/dsa/tag_8021q.c
@@ -93,6 +93,13 @@ int dsa_8021q_rx_source_port(u16 vid)
 }
 EXPORT_SYMBOL_GPL(dsa_8021q_rx_source_port);
 
+bool vid_is_dsa_8021q(u16 vid)
+{
+	return ((vid & DSA_8021Q_DIR_MASK) == DSA_8021Q_DIR_RX ||
+		(vid & DSA_8021Q_DIR_MASK) == DSA_8021Q_DIR_TX);
+}
+EXPORT_SYMBOL_GPL(vid_is_dsa_8021q);
+
 static int dsa_8021q_restore_pvid(struct dsa_switch *ds, int port)
 {
 	struct bridge_vlan_info vinfo;
-- 
cgit v1.2.3-59-g8ed1b


From 7f14937facdca1e0da1bacfacd089bcf4271e75a Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 12 May 2020 20:20:27 +0300
Subject: net: dsa: sja1105: keep the VLAN awareness state in a driver variable

Soon we'll add a third operating mode to the driver. Introduce a
vlan_state to make things more easy to manage, and use it where
applicable.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105.h      |  6 ++++++
 drivers/net/dsa/sja1105/sja1105_main.c | 18 +++++++++++++-----
 drivers/net/dsa/sja1105/sja1105_vl.c   | 24 ++++++++++++++----------
 3 files changed, 33 insertions(+), 15 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
index a64ace07b89f..5b2b275d01a7 100644
--- a/drivers/net/dsa/sja1105/sja1105.h
+++ b/drivers/net/dsa/sja1105/sja1105.h
@@ -178,6 +178,11 @@ struct sja1105_flow_block {
 	int num_virtual_links;
 };
 
+enum sja1105_vlan_state {
+	SJA1105_VLAN_UNAWARE,
+	SJA1105_VLAN_FILTERING_FULL,
+};
+
 struct sja1105_private {
 	struct sja1105_static_config static_config;
 	bool rgmii_rx_delay[SJA1105_NUM_PORTS];
@@ -193,6 +198,7 @@ struct sja1105_private {
 	 * the switch doesn't confuse them with one another.
 	 */
 	struct mutex mgmt_lock;
+	enum sja1105_vlan_state vlan_state;
 	struct sja1105_tagger_data tagger_data;
 	struct sja1105_ptp_data ptp_data;
 	struct sja1105_tas_data tas_data;
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index d5de9305df25..e7b675909288 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -1303,7 +1303,7 @@ int sja1105pqrs_fdb_add(struct dsa_switch *ds, int port,
 	l2_lookup.vlanid = vid;
 	l2_lookup.iotag = SJA1105_S_TAG;
 	l2_lookup.mask_macaddr = GENMASK_ULL(ETH_ALEN * 8 - 1, 0);
-	if (dsa_port_is_vlan_filtering(dsa_to_port(ds, port))) {
+	if (priv->vlan_state != SJA1105_VLAN_UNAWARE) {
 		l2_lookup.mask_vlanid = VLAN_VID_MASK;
 		l2_lookup.mask_iotag = BIT(0);
 	} else {
@@ -1366,7 +1366,7 @@ int sja1105pqrs_fdb_del(struct dsa_switch *ds, int port,
 	l2_lookup.vlanid = vid;
 	l2_lookup.iotag = SJA1105_S_TAG;
 	l2_lookup.mask_macaddr = GENMASK_ULL(ETH_ALEN * 8 - 1, 0);
-	if (dsa_port_is_vlan_filtering(dsa_to_port(ds, port))) {
+	if (priv->vlan_state != SJA1105_VLAN_UNAWARE) {
 		l2_lookup.mask_vlanid = VLAN_VID_MASK;
 		l2_lookup.mask_iotag = BIT(0);
 	} else {
@@ -1412,7 +1412,7 @@ static int sja1105_fdb_add(struct dsa_switch *ds, int port,
 	 * for what gets printed in 'bridge fdb show'.  In the case of zero,
 	 * no VID gets printed at all.
 	 */
-	if (!dsa_port_is_vlan_filtering(dsa_to_port(ds, port)))
+	if (priv->vlan_state != SJA1105_VLAN_FILTERING_FULL)
 		vid = 0;
 
 	return priv->info->fdb_add_cmd(ds, port, addr, vid);
@@ -1423,7 +1423,7 @@ static int sja1105_fdb_del(struct dsa_switch *ds, int port,
 {
 	struct sja1105_private *priv = ds->priv;
 
-	if (!dsa_port_is_vlan_filtering(dsa_to_port(ds, port)))
+	if (priv->vlan_state != SJA1105_VLAN_FILTERING_FULL)
 		vid = 0;
 
 	return priv->info->fdb_del_cmd(ds, port, addr, vid);
@@ -1462,7 +1462,7 @@ static int sja1105_fdb_dump(struct dsa_switch *ds, int port,
 		u64_to_ether_addr(l2_lookup.macaddr, macaddr);
 
 		/* We need to hide the dsa_8021q VLANs from the user. */
-		if (!dsa_port_is_vlan_filtering(dsa_to_port(ds, port)))
+		if (priv->vlan_state == SJA1105_VLAN_UNAWARE)
 			l2_lookup.vlanid = 0;
 		cb(macaddr, l2_lookup.vlanid, l2_lookup.lockeds, data);
 	}
@@ -1917,6 +1917,7 @@ static int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled)
 	struct sja1105_l2_lookup_params_entry *l2_lookup_params;
 	struct sja1105_general_params_entry *general_params;
 	struct sja1105_private *priv = ds->priv;
+	enum sja1105_vlan_state state;
 	struct sja1105_table *table;
 	struct sja1105_rule *rule;
 	u16 tpid, tpid2;
@@ -1940,6 +1941,13 @@ static int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled)
 		tpid2 = ETH_P_SJA1105;
 	}
 
+	if (!enabled)
+		state = SJA1105_VLAN_UNAWARE;
+	else
+		state = SJA1105_VLAN_FILTERING_FULL;
+
+	priv->vlan_state = state;
+
 	table = &priv->static_config.tables[BLK_IDX_GENERAL_PARAMS];
 	general_params = table->entries;
 	/* EtherType used to identify inner tagged (C-tag) VLAN traffic */
diff --git a/drivers/net/dsa/sja1105/sja1105_vl.c b/drivers/net/dsa/sja1105/sja1105_vl.c
index aa9b0b92f437..312401995b54 100644
--- a/drivers/net/dsa/sja1105/sja1105_vl.c
+++ b/drivers/net/dsa/sja1105/sja1105_vl.c
@@ -353,14 +353,14 @@ int sja1105_vl_redirect(struct sja1105_private *priv, int port,
 	struct sja1105_rule *rule = sja1105_rule_find(priv, cookie);
 	int rc;
 
-	if (dsa_port_is_vlan_filtering(dsa_to_port(priv->ds, port)) &&
-	    key->type != SJA1105_KEY_VLAN_AWARE_VL) {
+	if (priv->vlan_state == SJA1105_VLAN_UNAWARE &&
+	    key->type != SJA1105_KEY_VLAN_UNAWARE_VL) {
 		NL_SET_ERR_MSG_MOD(extack,
-				   "Can only redirect based on {DMAC, VID, PCP}");
+				   "Can only redirect based on DMAC");
 		return -EOPNOTSUPP;
-	} else if (key->type != SJA1105_KEY_VLAN_UNAWARE_VL) {
+	} else if (key->type != SJA1105_KEY_VLAN_AWARE_VL) {
 		NL_SET_ERR_MSG_MOD(extack,
-				   "Can only redirect based on DMAC");
+				   "Can only redirect based on {DMAC, VID, PCP}");
 		return -EOPNOTSUPP;
 	}
 
@@ -602,14 +602,18 @@ int sja1105_vl_gate(struct sja1105_private *priv, int port,
 		return -ERANGE;
 	}
 
-	if (dsa_port_is_vlan_filtering(dsa_to_port(priv->ds, port)) &&
-	    key->type != SJA1105_KEY_VLAN_AWARE_VL) {
+	if (priv->vlan_state == SJA1105_VLAN_UNAWARE &&
+	    key->type != SJA1105_KEY_VLAN_UNAWARE_VL) {
+		dev_err(priv->ds->dev, "1: vlan state %d key type %d\n",
+			priv->vlan_state, key->type);
 		NL_SET_ERR_MSG_MOD(extack,
-				   "Can only gate based on {DMAC, VID, PCP}");
+				   "Can only gate based on DMAC");
 		return -EOPNOTSUPP;
-	} else if (key->type != SJA1105_KEY_VLAN_UNAWARE_VL) {
+	} else if (key->type != SJA1105_KEY_VLAN_AWARE_VL) {
+		dev_err(priv->ds->dev, "2: vlan state %d key type %d\n",
+			priv->vlan_state, key->type);
 		NL_SET_ERR_MSG_MOD(extack,
-				   "Can only gate based on DMAC");
+				   "Can only gate based on {DMAC, VID, PCP}");
 		return -EOPNOTSUPP;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 60b33aeb7e0e664865ace822c0a7aeeb5ebe521c Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 12 May 2020 20:20:28 +0300
Subject: net: dsa: sja1105: deny alterations of dsa_8021q VLANs from the
 bridge

At the moment, this can never happen. The 2 modes that we operate in do
not permit that:

 - SJA1105_VLAN_UNAWARE: we are guarded from bridge VLANs added by the
   user by the DSA core. We will later lift this restriction by setting
   ds->vlan_bridge_vtu = true, and that is where we'll need it.

 - SJA1105_VLAN_FILTERING_FULL: in this mode, dsa_8021q configuration is
   disabled. So the user is free to add these VLANs in the 1024-3071
   range.

The reason for the patch is that we'll introduce a third VLAN awareness
state, where both dsa_8021q as well as the bridge are going to call our
.port_vlan_add and .port_vlan_del methods.

For that, we need a good way to discriminate between the 2. The easiest
(and less intrusive way for upper layers) is to recognize the fact that
dsa_8021q configurations are always driven by our driver - we _know_
when a .port_vlan_add method will be called from dsa_8021q because _we_
initiated it.

So introduce an expect_dsa_8021q boolean which is only used, at the
moment, for blacklisting VLANs in range 1024-3071 in the modes when
dsa_8021q is active.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105.h      |  1 +
 drivers/net/dsa/sja1105/sja1105_main.c | 31 ++++++++++++++++++++++++++++++-
 2 files changed, 31 insertions(+), 1 deletion(-)

diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
index 5b2b275d01a7..667056d0c819 100644
--- a/drivers/net/dsa/sja1105/sja1105.h
+++ b/drivers/net/dsa/sja1105/sja1105.h
@@ -198,6 +198,7 @@ struct sja1105_private {
 	 * the switch doesn't confuse them with one another.
 	 */
 	struct mutex mgmt_lock;
+	bool expect_dsa_8021q;
 	enum sja1105_vlan_state vlan_state;
 	struct sja1105_tagger_data tagger_data;
 	struct sja1105_ptp_data ptp_data;
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index e7b675909288..8e68adba9144 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -1811,15 +1811,19 @@ static int sja1105_crosschip_bridge_join(struct dsa_switch *ds,
 		if (dsa_to_port(ds, port)->bridge_dev != br)
 			continue;
 
+		other_priv->expect_dsa_8021q = true;
 		rc = dsa_8021q_crosschip_bridge_join(ds, port, other_ds,
 						     other_port, br,
 						     &priv->crosschip_links);
+		other_priv->expect_dsa_8021q = false;
 		if (rc)
 			return rc;
 
+		priv->expect_dsa_8021q = true;
 		rc = dsa_8021q_crosschip_bridge_join(other_ds, other_port, ds,
 						     port, br,
 						     &other_priv->crosschip_links);
+		priv->expect_dsa_8021q = false;
 		if (rc)
 			return rc;
 	}
@@ -1846,12 +1850,16 @@ static void sja1105_crosschip_bridge_leave(struct dsa_switch *ds,
 		if (dsa_to_port(ds, port)->bridge_dev != br)
 			continue;
 
+		other_priv->expect_dsa_8021q = true;
 		dsa_8021q_crosschip_bridge_leave(ds, port, other_ds, other_port,
 						 br, &priv->crosschip_links);
+		other_priv->expect_dsa_8021q = false;
 
+		priv->expect_dsa_8021q = true;
 		dsa_8021q_crosschip_bridge_leave(other_ds, other_port, ds,
 						 port, br,
 						 &other_priv->crosschip_links);
+		priv->expect_dsa_8021q = false;
 	}
 }
 
@@ -1862,8 +1870,10 @@ static int sja1105_replay_crosschip_vlans(struct dsa_switch *ds, bool enabled)
 	int rc;
 
 	list_for_each_entry(c, &priv->crosschip_links, list) {
+		priv->expect_dsa_8021q = true;
 		rc = dsa_8021q_crosschip_link_apply(ds, c->port, c->other_ds,
 						    c->other_port, enabled);
+		priv->expect_dsa_8021q = false;
 		if (rc)
 			break;
 	}
@@ -1873,10 +1883,13 @@ static int sja1105_replay_crosschip_vlans(struct dsa_switch *ds, bool enabled)
 
 static int sja1105_setup_8021q_tagging(struct dsa_switch *ds, bool enabled)
 {
+	struct sja1105_private *priv = ds->priv;
 	int rc, i;
 
 	for (i = 0; i < SJA1105_NUM_PORTS; i++) {
+		priv->expect_dsa_8021q = true;
 		rc = dsa_port_setup_8021q_tagging(ds, i, enabled);
+		priv->expect_dsa_8021q = false;
 		if (rc < 0) {
 			dev_err(ds->dev, "Failed to setup VLAN tagging for port %d: %d\n",
 				i, rc);
@@ -1901,10 +1914,26 @@ sja1105_get_tag_protocol(struct dsa_switch *ds, int port,
 	return DSA_TAG_PROTO_SJA1105;
 }
 
-/* This callback needs to be present */
 static int sja1105_vlan_prepare(struct dsa_switch *ds, int port,
 				const struct switchdev_obj_port_vlan *vlan)
 {
+	struct sja1105_private *priv = ds->priv;
+	u16 vid;
+
+	if (priv->vlan_state == SJA1105_VLAN_FILTERING_FULL)
+		return 0;
+
+	/* If the user wants best-effort VLAN filtering (aka vlan_filtering
+	 * bridge plus tagging), be sure to at least deny alterations to the
+	 * configuration done by dsa_8021q.
+	 */
+	for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) {
+		if (!priv->expect_dsa_8021q && vid_is_dsa_8021q(vid)) {
+			dev_err(ds->dev, "Range 1024-3071 reserved for dsa_8021q operation\n");
+			return -EBUSY;
+		}
+	}
+
 	return 0;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From ec5ae61076d07be986df19773662506220757c9f Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 12 May 2020 20:20:29 +0300
Subject: net: dsa: sja1105: save/restore VLANs using a delta commit method

Managing the VLAN table that is present in hardware will become very
difficult once we add a third operating state
(best_effort_vlan_filtering). That is because correct cleanup (not too
little, not too much) becomes virtually impossible, when VLANs can be
added from the bridge layer, from dsa_8021q for basic tagging, for
cross-chip bridging, as well as retagging rules for sub-VLANs and
cross-chip sub-VLANs. So we need to rethink VLAN interaction with the
switch in a more scalable way.

In preparation for that, use the priv->expect_dsa_8021q boolean to
classify any VLAN request received through .port_vlan_add or
.port_vlan_del towards either one of 2 internal lists: bridge VLANs and
dsa_8021q VLANs.

Then, implement a central sja1105_build_vlan_table method that creates a
VLAN configuration from scratch based on the 2 lists of VLANs kept by
the driver, and based on the VLAN awareness state. Currently, if we are
VLAN-unaware, install the dsa_8021q VLANs, otherwise the bridge VLANs.

Then, implement a delta commit procedure that identifies which VLANs
from this new configuration are actually different from the config
previously committed to hardware. We apply the delta through the dynamic
configuration interface (we don't reset the switch). The result is that
the hardware should see the exact sequence of operations as before this
patch.

This also helps remove the "br" argument passed to
dsa_8021q_crosschip_bridge_join, which it was only using to figure out
whether it should commit the configuration back to us or not, based on
the VLAN awareness state of the bridge. We can simplify that, by always
allowing those VLANs inside of our dsa_8021q_vlans list, and committing
those to hardware when necessary.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105.h      |  10 +
 drivers/net/dsa/sja1105/sja1105_main.c | 493 ++++++++++++++++++++++++---------
 include/linux/dsa/8021q.h              |  19 +-
 net/dsa/tag_8021q.c                    |  45 ++-
 4 files changed, 393 insertions(+), 174 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
index 667056d0c819..c80f1999c694 100644
--- a/drivers/net/dsa/sja1105/sja1105.h
+++ b/drivers/net/dsa/sja1105/sja1105.h
@@ -178,6 +178,14 @@ struct sja1105_flow_block {
 	int num_virtual_links;
 };
 
+struct sja1105_bridge_vlan {
+	struct list_head list;
+	int port;
+	u16 vid;
+	bool pvid;
+	bool untagged;
+};
+
 enum sja1105_vlan_state {
 	SJA1105_VLAN_UNAWARE,
 	SJA1105_VLAN_FILTERING_FULL,
@@ -191,6 +199,8 @@ struct sja1105_private {
 	struct gpio_desc *reset_gpio;
 	struct spi_device *spidev;
 	struct dsa_switch *ds;
+	struct list_head dsa_8021q_vlans;
+	struct list_head bridge_vlans;
 	struct list_head crosschip_links;
 	struct sja1105_flow_block flow_block;
 	struct sja1105_port ports[SJA1105_NUM_PORTS];
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 8e68adba9144..fb95130299b1 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -303,7 +303,8 @@ static int sja1105_init_static_vlan(struct sja1105_private *priv)
 		.tag_port = 0,
 		.vlanid = 1,
 	};
-	int i;
+	struct dsa_switch *ds = priv->ds;
+	int port;
 
 	table = &priv->static_config.tables[BLK_IDX_VLAN_LOOKUP];
 
@@ -324,12 +325,31 @@ static int sja1105_init_static_vlan(struct sja1105_private *priv)
 	table->entry_count = 1;
 
 	/* VLAN 1: all DT-defined ports are members; no restrictions on
-	 * forwarding; always transmit priority-tagged frames as untagged.
+	 * forwarding; always transmit as untagged.
 	 */
-	for (i = 0; i < SJA1105_NUM_PORTS; i++) {
-		pvid.vmemb_port |= BIT(i);
-		pvid.vlan_bc |= BIT(i);
-		pvid.tag_port &= ~BIT(i);
+	for (port = 0; port < ds->num_ports; port++) {
+		struct sja1105_bridge_vlan *v;
+
+		if (dsa_is_unused_port(ds, port))
+			continue;
+
+		pvid.vmemb_port |= BIT(port);
+		pvid.vlan_bc |= BIT(port);
+		pvid.tag_port &= ~BIT(port);
+
+		/* Let traffic that don't need dsa_8021q (e.g. STP, PTP) be
+		 * transmitted as untagged.
+		 */
+		v = kzalloc(sizeof(*v), GFP_KERNEL);
+		if (!v)
+			return -ENOMEM;
+
+		v->port = port;
+		v->vid = 1;
+		v->untagged = true;
+		if (dsa_is_cpu_port(ds, port))
+			v->pvid = true;
+		list_add(&v->list, &priv->dsa_8021q_vlans);
 	}
 
 	((struct sja1105_vlan_lookup_entry *)table->entries)[0] = pvid;
@@ -1717,82 +1737,6 @@ static int sja1105_pvid_apply(struct sja1105_private *priv, int port, u16 pvid)
 					   &mac[port], true);
 }
 
-static int sja1105_is_vlan_configured(struct sja1105_private *priv, u16 vid)
-{
-	struct sja1105_vlan_lookup_entry *vlan;
-	int count, i;
-
-	vlan = priv->static_config.tables[BLK_IDX_VLAN_LOOKUP].entries;
-	count = priv->static_config.tables[BLK_IDX_VLAN_LOOKUP].entry_count;
-
-	for (i = 0; i < count; i++)
-		if (vlan[i].vlanid == vid)
-			return i;
-
-	/* Return an invalid entry index if not found */
-	return -1;
-}
-
-static int sja1105_vlan_apply(struct sja1105_private *priv, int port, u16 vid,
-			      bool enabled, bool untagged)
-{
-	struct sja1105_vlan_lookup_entry *vlan;
-	struct sja1105_table *table;
-	bool keep = true;
-	int match, rc;
-
-	table = &priv->static_config.tables[BLK_IDX_VLAN_LOOKUP];
-
-	match = sja1105_is_vlan_configured(priv, vid);
-	if (match < 0) {
-		/* Can't delete a missing entry. */
-		if (!enabled)
-			return 0;
-		rc = sja1105_table_resize(table, table->entry_count + 1);
-		if (rc)
-			return rc;
-		match = table->entry_count - 1;
-	}
-	/* Assign pointer after the resize (it's new memory) */
-	vlan = table->entries;
-	vlan[match].vlanid = vid;
-	if (enabled) {
-		vlan[match].vlan_bc |= BIT(port);
-		vlan[match].vmemb_port |= BIT(port);
-	} else {
-		vlan[match].vlan_bc &= ~BIT(port);
-		vlan[match].vmemb_port &= ~BIT(port);
-	}
-	/* Also unset tag_port if removing this VLAN was requested,
-	 * just so we don't have a confusing bitmap (no practical purpose).
-	 */
-	if (untagged || !enabled)
-		vlan[match].tag_port &= ~BIT(port);
-	else
-		vlan[match].tag_port |= BIT(port);
-	/* If there's no port left as member of this VLAN,
-	 * it's time for it to go.
-	 */
-	if (!vlan[match].vmemb_port)
-		keep = false;
-
-	dev_dbg(priv->ds->dev,
-		"%s: port %d, vid %llu, broadcast domain 0x%llx, "
-		"port members 0x%llx, tagged ports 0x%llx, keep %d\n",
-		__func__, port, vlan[match].vlanid, vlan[match].vlan_bc,
-		vlan[match].vmemb_port, vlan[match].tag_port, keep);
-
-	rc = sja1105_dynamic_config_write(priv, BLK_IDX_VLAN_LOOKUP, vid,
-					  &vlan[match], keep);
-	if (rc < 0)
-		return rc;
-
-	if (!keep)
-		return sja1105_table_delete_entry(table, match);
-
-	return 0;
-}
-
 static int sja1105_crosschip_bridge_join(struct dsa_switch *ds,
 					 int tree_index, int sw_index,
 					 int other_port, struct net_device *br)
@@ -1813,7 +1757,7 @@ static int sja1105_crosschip_bridge_join(struct dsa_switch *ds,
 
 		other_priv->expect_dsa_8021q = true;
 		rc = dsa_8021q_crosschip_bridge_join(ds, port, other_ds,
-						     other_port, br,
+						     other_port,
 						     &priv->crosschip_links);
 		other_priv->expect_dsa_8021q = false;
 		if (rc)
@@ -1821,7 +1765,7 @@ static int sja1105_crosschip_bridge_join(struct dsa_switch *ds,
 
 		priv->expect_dsa_8021q = true;
 		rc = dsa_8021q_crosschip_bridge_join(other_ds, other_port, ds,
-						     port, br,
+						     port,
 						     &other_priv->crosschip_links);
 		priv->expect_dsa_8021q = false;
 		if (rc)
@@ -1852,35 +1796,16 @@ static void sja1105_crosschip_bridge_leave(struct dsa_switch *ds,
 
 		other_priv->expect_dsa_8021q = true;
 		dsa_8021q_crosschip_bridge_leave(ds, port, other_ds, other_port,
-						 br, &priv->crosschip_links);
+						 &priv->crosschip_links);
 		other_priv->expect_dsa_8021q = false;
 
 		priv->expect_dsa_8021q = true;
-		dsa_8021q_crosschip_bridge_leave(other_ds, other_port, ds,
-						 port, br,
+		dsa_8021q_crosschip_bridge_leave(other_ds, other_port, ds, port,
 						 &other_priv->crosschip_links);
 		priv->expect_dsa_8021q = false;
 	}
 }
 
-static int sja1105_replay_crosschip_vlans(struct dsa_switch *ds, bool enabled)
-{
-	struct sja1105_private *priv = ds->priv;
-	struct dsa_8021q_crosschip_link *c;
-	int rc;
-
-	list_for_each_entry(c, &priv->crosschip_links, list) {
-		priv->expect_dsa_8021q = true;
-		rc = dsa_8021q_crosschip_link_apply(ds, c->port, c->other_ds,
-						    c->other_port, enabled);
-		priv->expect_dsa_8021q = false;
-		if (rc)
-			break;
-	}
-
-	return rc;
-}
-
 static int sja1105_setup_8021q_tagging(struct dsa_switch *ds, bool enabled)
 {
 	struct sja1105_private *priv = ds->priv;
@@ -1896,11 +1821,6 @@ static int sja1105_setup_8021q_tagging(struct dsa_switch *ds, bool enabled)
 			return rc;
 		}
 	}
-	rc = sja1105_replay_crosschip_vlans(ds, enabled);
-	if (rc) {
-		dev_err(ds->dev, "Failed to replay crosschip VLANs: %d\n", rc);
-		return rc;
-	}
 
 	dev_info(ds->dev, "%s switch tagging\n",
 		 enabled ? "Enabled" : "Disabled");
@@ -1914,6 +1834,269 @@ sja1105_get_tag_protocol(struct dsa_switch *ds, int port,
 	return DSA_TAG_PROTO_SJA1105;
 }
 
+static int sja1105_is_vlan_configured(struct sja1105_private *priv, u16 vid)
+{
+	struct sja1105_vlan_lookup_entry *vlan;
+	int count, i;
+
+	vlan = priv->static_config.tables[BLK_IDX_VLAN_LOOKUP].entries;
+	count = priv->static_config.tables[BLK_IDX_VLAN_LOOKUP].entry_count;
+
+	for (i = 0; i < count; i++)
+		if (vlan[i].vlanid == vid)
+			return i;
+
+	/* Return an invalid entry index if not found */
+	return -1;
+}
+
+static int sja1105_commit_vlans(struct sja1105_private *priv,
+				struct sja1105_vlan_lookup_entry *new_vlan)
+{
+	struct sja1105_vlan_lookup_entry *vlan;
+	struct sja1105_table *table;
+	int num_vlans = 0;
+	int rc, i, k = 0;
+
+	/* VLAN table */
+	table = &priv->static_config.tables[BLK_IDX_VLAN_LOOKUP];
+	vlan = table->entries;
+
+	for (i = 0; i < VLAN_N_VID; i++) {
+		int match = sja1105_is_vlan_configured(priv, i);
+
+		if (new_vlan[i].vlanid != VLAN_N_VID)
+			num_vlans++;
+
+		if (new_vlan[i].vlanid == VLAN_N_VID && match >= 0) {
+			/* Was there before, no longer is. Delete */
+			dev_dbg(priv->ds->dev, "Deleting VLAN %d\n", i);
+			rc = sja1105_dynamic_config_write(priv,
+							  BLK_IDX_VLAN_LOOKUP,
+							  i, &vlan[match], false);
+			if (rc < 0)
+				return rc;
+		} else if (new_vlan[i].vlanid != VLAN_N_VID) {
+			/* Nothing changed, don't do anything */
+			if (match >= 0 &&
+			    vlan[match].vlanid == new_vlan[i].vlanid &&
+			    vlan[match].tag_port == new_vlan[i].tag_port &&
+			    vlan[match].vlan_bc == new_vlan[i].vlan_bc &&
+			    vlan[match].vmemb_port == new_vlan[i].vmemb_port)
+				continue;
+			/* Update entry */
+			dev_dbg(priv->ds->dev, "Updating VLAN %d\n", i);
+			rc = sja1105_dynamic_config_write(priv,
+							  BLK_IDX_VLAN_LOOKUP,
+							  i, &new_vlan[i],
+							  true);
+			if (rc < 0)
+				return rc;
+		}
+	}
+
+	if (table->entry_count)
+		kfree(table->entries);
+
+	table->entries = kcalloc(num_vlans, table->ops->unpacked_entry_size,
+				 GFP_KERNEL);
+	if (!table->entries)
+		return -ENOMEM;
+
+	table->entry_count = num_vlans;
+	vlan = table->entries;
+
+	for (i = 0; i < VLAN_N_VID; i++) {
+		if (new_vlan[i].vlanid == VLAN_N_VID)
+			continue;
+		vlan[k++] = new_vlan[i];
+	}
+
+	return 0;
+}
+
+struct sja1105_crosschip_switch {
+	struct list_head list;
+	struct dsa_switch *other_ds;
+};
+
+static int sja1105_commit_pvid(struct sja1105_private *priv)
+{
+	struct sja1105_bridge_vlan *v;
+	struct list_head *vlan_list;
+	int rc = 0;
+
+	if (priv->vlan_state == SJA1105_VLAN_FILTERING_FULL)
+		vlan_list = &priv->bridge_vlans;
+	else
+		vlan_list = &priv->dsa_8021q_vlans;
+
+	list_for_each_entry(v, vlan_list, list) {
+		if (v->pvid) {
+			rc = sja1105_pvid_apply(priv, v->port, v->vid);
+			if (rc)
+				break;
+		}
+	}
+
+	return rc;
+}
+
+static int
+sja1105_build_bridge_vlans(struct sja1105_private *priv,
+			   struct sja1105_vlan_lookup_entry *new_vlan)
+{
+	struct sja1105_bridge_vlan *v;
+
+	if (priv->vlan_state == SJA1105_VLAN_UNAWARE)
+		return 0;
+
+	list_for_each_entry(v, &priv->bridge_vlans, list) {
+		int match = v->vid;
+
+		new_vlan[match].vlanid = v->vid;
+		new_vlan[match].vmemb_port |= BIT(v->port);
+		new_vlan[match].vlan_bc |= BIT(v->port);
+		if (!v->untagged)
+			new_vlan[match].tag_port |= BIT(v->port);
+	}
+
+	return 0;
+}
+
+static int
+sja1105_build_dsa_8021q_vlans(struct sja1105_private *priv,
+			      struct sja1105_vlan_lookup_entry *new_vlan)
+{
+	struct sja1105_bridge_vlan *v;
+
+	if (priv->vlan_state == SJA1105_VLAN_FILTERING_FULL)
+		return 0;
+
+	list_for_each_entry(v, &priv->dsa_8021q_vlans, list) {
+		int match = v->vid;
+
+		new_vlan[match].vlanid = v->vid;
+		new_vlan[match].vmemb_port |= BIT(v->port);
+		new_vlan[match].vlan_bc |= BIT(v->port);
+		if (!v->untagged)
+			new_vlan[match].tag_port |= BIT(v->port);
+	}
+
+	return 0;
+}
+
+static int sja1105_build_vlan_table(struct sja1105_private *priv, bool notify);
+
+static int sja1105_notify_crosschip_switches(struct sja1105_private *priv)
+{
+	struct sja1105_crosschip_switch *s, *pos;
+	struct list_head crosschip_switches;
+	struct dsa_8021q_crosschip_link *c;
+	int rc = 0;
+
+	INIT_LIST_HEAD(&crosschip_switches);
+
+	list_for_each_entry(c, &priv->crosschip_links, list) {
+		bool already_added = false;
+
+		list_for_each_entry(s, &crosschip_switches, list) {
+			if (s->other_ds == c->other_ds) {
+				already_added = true;
+				break;
+			}
+		}
+
+		if (already_added)
+			continue;
+
+		s = kzalloc(sizeof(*s), GFP_KERNEL);
+		if (!s) {
+			dev_err(priv->ds->dev, "Failed to allocate memory\n");
+			rc = -ENOMEM;
+			goto out;
+		}
+		s->other_ds = c->other_ds;
+		list_add(&s->list, &crosschip_switches);
+	}
+
+	list_for_each_entry(s, &crosschip_switches, list) {
+		struct sja1105_private *other_priv = s->other_ds->priv;
+
+		rc = sja1105_build_vlan_table(other_priv, false);
+		if (rc)
+			goto out;
+	}
+
+out:
+	list_for_each_entry_safe(s, pos, &crosschip_switches, list) {
+		list_del(&s->list);
+		kfree(s);
+	}
+
+	return rc;
+}
+
+static int sja1105_build_vlan_table(struct sja1105_private *priv, bool notify)
+{
+	struct sja1105_vlan_lookup_entry *new_vlan;
+	struct sja1105_table *table;
+	int rc;
+	int i;
+
+	table = &priv->static_config.tables[BLK_IDX_VLAN_LOOKUP];
+	new_vlan = kcalloc(VLAN_N_VID,
+			   table->ops->unpacked_entry_size, GFP_KERNEL);
+	if (!new_vlan)
+		return -ENOMEM;
+
+	for (i = 0; i < VLAN_N_VID; i++)
+		new_vlan[i].vlanid = VLAN_N_VID;
+
+	/* Bridge VLANs */
+	rc = sja1105_build_bridge_vlans(priv, new_vlan);
+	if (rc)
+		goto out;
+
+	/* VLANs necessary for dsa_8021q operation, given to us by tag_8021q.c:
+	 * - RX VLANs
+	 * - TX VLANs
+	 * - Crosschip links
+	 */
+	rc = sja1105_build_dsa_8021q_vlans(priv, new_vlan);
+	if (rc)
+		goto out;
+
+	rc = sja1105_commit_vlans(priv, new_vlan);
+	if (rc)
+		goto out;
+
+	rc = sja1105_commit_pvid(priv);
+	if (rc)
+		goto out;
+
+	if (notify) {
+		rc = sja1105_notify_crosschip_switches(priv);
+		if (rc)
+			goto out;
+	}
+
+out:
+	kfree(new_vlan);
+
+	return rc;
+}
+
+/* Select the list to which we should add this VLAN. */
+static struct list_head *sja1105_classify_vlan(struct sja1105_private *priv,
+					       u16 vid)
+{
+	if (priv->expect_dsa_8021q)
+		return &priv->dsa_8021q_vlans;
+
+	return &priv->bridge_vlans;
+}
+
 static int sja1105_vlan_prepare(struct dsa_switch *ds, int port,
 				const struct switchdev_obj_port_vlan *vlan)
 {
@@ -2026,45 +2209,80 @@ static void sja1105_vlan_add(struct dsa_switch *ds, int port,
 			     const struct switchdev_obj_port_vlan *vlan)
 {
 	struct sja1105_private *priv = ds->priv;
+	bool vlan_table_changed = false;
 	u16 vid;
 	int rc;
 
 	for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) {
-		rc = sja1105_vlan_apply(priv, port, vid, true, vlan->flags &
-					BRIDGE_VLAN_INFO_UNTAGGED);
-		if (rc < 0) {
-			dev_err(ds->dev, "Failed to add VLAN %d to port %d: %d\n",
-				vid, port, rc);
-			return;
-		}
-		if (vlan->flags & BRIDGE_VLAN_INFO_PVID) {
-			rc = sja1105_pvid_apply(ds->priv, port, vid);
-			if (rc < 0) {
-				dev_err(ds->dev, "Failed to set pvid %d on port %d: %d\n",
-					vid, port, rc);
-				return;
+		bool untagged = vlan->flags & BRIDGE_VLAN_INFO_UNTAGGED;
+		bool pvid = vlan->flags & BRIDGE_VLAN_INFO_PVID;
+		struct sja1105_bridge_vlan *v;
+		struct list_head *vlan_list;
+		bool already_added = false;
+
+		vlan_list = sja1105_classify_vlan(priv, vid);
+
+		list_for_each_entry(v, vlan_list, list) {
+			if (v->port == port && v->vid == vid &&
+			    v->untagged == untagged && v->pvid == pvid) {
+				already_added = true;
+				break;
 			}
 		}
+
+		if (already_added)
+			continue;
+
+		v = kzalloc(sizeof(*v), GFP_KERNEL);
+		if (!v) {
+			dev_err(ds->dev, "Out of memory while storing VLAN\n");
+			return;
+		}
+
+		v->port = port;
+		v->vid = vid;
+		v->untagged = untagged;
+		v->pvid = pvid;
+		list_add(&v->list, vlan_list);
+
+		vlan_table_changed = true;
 	}
+
+	if (!vlan_table_changed)
+		return;
+
+	rc = sja1105_build_vlan_table(priv, true);
+	if (rc)
+		dev_err(ds->dev, "Failed to build VLAN table: %d\n", rc);
 }
 
 static int sja1105_vlan_del(struct dsa_switch *ds, int port,
 			    const struct switchdev_obj_port_vlan *vlan)
 {
 	struct sja1105_private *priv = ds->priv;
+	bool vlan_table_changed = false;
 	u16 vid;
-	int rc;
 
 	for (vid = vlan->vid_begin; vid <= vlan->vid_end; vid++) {
-		rc = sja1105_vlan_apply(priv, port, vid, false, vlan->flags &
-					BRIDGE_VLAN_INFO_UNTAGGED);
-		if (rc < 0) {
-			dev_err(ds->dev, "Failed to remove VLAN %d from port %d: %d\n",
-				vid, port, rc);
-			return rc;
+		struct sja1105_bridge_vlan *v, *n;
+		struct list_head *vlan_list;
+
+		vlan_list = sja1105_classify_vlan(priv, vid);
+
+		list_for_each_entry_safe(v, n, vlan_list, list) {
+			if (v->port == port && v->vid == vid) {
+				list_del(&v->list);
+				kfree(v);
+				vlan_table_changed = true;
+				break;
+			}
 		}
 	}
-	return 0;
+
+	if (!vlan_table_changed)
+		return 0;
+
+	return sja1105_build_vlan_table(priv, true);
 }
 
 /* The programming model for the SJA1105 switch is "all-at-once" via static
@@ -2142,6 +2360,7 @@ static int sja1105_setup(struct dsa_switch *ds)
 static void sja1105_teardown(struct dsa_switch *ds)
 {
 	struct sja1105_private *priv = ds->priv;
+	struct sja1105_bridge_vlan *v, *n;
 	int port;
 
 	for (port = 0; port < SJA1105_NUM_PORTS; port++) {
@@ -2158,6 +2377,16 @@ static void sja1105_teardown(struct dsa_switch *ds)
 	sja1105_tas_teardown(ds);
 	sja1105_ptp_clock_unregister(ds);
 	sja1105_static_config_free(&priv->static_config);
+
+	list_for_each_entry_safe(v, n, &priv->dsa_8021q_vlans, list) {
+		list_del(&v->list);
+		kfree(v);
+	}
+
+	list_for_each_entry_safe(v, n, &priv->bridge_vlans, list) {
+		list_del(&v->list);
+		kfree(v);
+	}
 }
 
 static int sja1105_port_enable(struct dsa_switch *ds, int port,
@@ -2598,6 +2827,8 @@ static int sja1105_probe(struct spi_device *spi)
 	mutex_init(&priv->mgmt_lock);
 
 	INIT_LIST_HEAD(&priv->crosschip_links);
+	INIT_LIST_HEAD(&priv->bridge_vlans);
+	INIT_LIST_HEAD(&priv->dsa_8021q_vlans);
 
 	sja1105_tas_setup(ds);
 	sja1105_flower_setup(ds);
diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h
index ebc245ff838a..404bd2cce642 100644
--- a/include/linux/dsa/8021q.h
+++ b/include/linux/dsa/8021q.h
@@ -25,18 +25,14 @@ struct dsa_8021q_crosschip_link {
 int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int index,
 				 bool enabled);
 
-int dsa_8021q_crosschip_link_apply(struct dsa_switch *ds, int port,
-				   struct dsa_switch *other_ds,
-				   int other_port, bool enabled);
-
 int dsa_8021q_crosschip_bridge_join(struct dsa_switch *ds, int port,
 				    struct dsa_switch *other_ds,
-				    int other_port, struct net_device *br,
+				    int other_port,
 				    struct list_head *crosschip_links);
 
 int dsa_8021q_crosschip_bridge_leave(struct dsa_switch *ds, int port,
 				     struct dsa_switch *other_ds,
-				     int other_port, struct net_device *br,
+				     int other_port,
 				     struct list_head *crosschip_links);
 
 struct sk_buff *dsa_8021q_xmit(struct sk_buff *skb, struct net_device *netdev,
@@ -60,16 +56,9 @@ int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int index,
 	return 0;
 }
 
-int dsa_8021q_crosschip_link_apply(struct dsa_switch *ds, int port,
-				   struct dsa_switch *other_ds,
-				   int other_port, bool enabled)
-{
-	return 0;
-}
-
 int dsa_8021q_crosschip_bridge_join(struct dsa_switch *ds, int port,
 				    struct dsa_switch *other_ds,
-				    int other_port, struct net_device *br,
+				    int other_port,
 				    struct list_head *crosschip_links)
 {
 	return 0;
@@ -77,7 +66,7 @@ int dsa_8021q_crosschip_bridge_join(struct dsa_switch *ds, int port,
 
 int dsa_8021q_crosschip_bridge_leave(struct dsa_switch *ds, int port,
 				     struct dsa_switch *other_ds,
-				     int other_port, struct net_device *br,
+				     int other_port,
 				     struct list_head *crosschip_links)
 {
 	return 0;
diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c
index 4774ecd1f8fc..3236fbbf85b9 100644
--- a/net/dsa/tag_8021q.c
+++ b/net/dsa/tag_8021q.c
@@ -296,9 +296,9 @@ int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int port, bool enabled)
 }
 EXPORT_SYMBOL_GPL(dsa_port_setup_8021q_tagging);
 
-int dsa_8021q_crosschip_link_apply(struct dsa_switch *ds, int port,
-				   struct dsa_switch *other_ds,
-				   int other_port, bool enabled)
+static int dsa_8021q_crosschip_link_apply(struct dsa_switch *ds, int port,
+					  struct dsa_switch *other_ds,
+					  int other_port, bool enabled)
 {
 	u16 rx_vid = dsa_8021q_rx_vid(ds, port);
 
@@ -308,7 +308,6 @@ int dsa_8021q_crosschip_link_apply(struct dsa_switch *ds, int port,
 	return dsa_8021q_vid_apply(other_ds, other_port, rx_vid,
 				   BRIDGE_VLAN_INFO_UNTAGGED, enabled);
 }
-EXPORT_SYMBOL_GPL(dsa_8021q_crosschip_link_apply);
 
 static int dsa_8021q_crosschip_link_add(struct dsa_switch *ds, int port,
 					struct dsa_switch *other_ds,
@@ -369,7 +368,7 @@ static void dsa_8021q_crosschip_link_del(struct dsa_switch *ds,
  */
 int dsa_8021q_crosschip_bridge_join(struct dsa_switch *ds, int port,
 				    struct dsa_switch *other_ds,
-				    int other_port, struct net_device *br,
+				    int other_port,
 				    struct list_head *crosschip_links)
 {
 	/* @other_upstream is how @other_ds reaches us. If we are part
@@ -385,12 +384,10 @@ int dsa_8021q_crosschip_bridge_join(struct dsa_switch *ds, int port,
 	if (rc)
 		return rc;
 
-	if (!br_vlan_enabled(br)) {
-		rc = dsa_8021q_crosschip_link_apply(ds, port, other_ds,
-						    other_port, true);
-		if (rc)
-			return rc;
-	}
+	rc = dsa_8021q_crosschip_link_apply(ds, port, other_ds,
+					    other_port, true);
+	if (rc)
+		return rc;
 
 	rc = dsa_8021q_crosschip_link_add(ds, port, other_ds,
 					  other_upstream,
@@ -398,20 +395,14 @@ int dsa_8021q_crosschip_bridge_join(struct dsa_switch *ds, int port,
 	if (rc)
 		return rc;
 
-	if (!br_vlan_enabled(br)) {
-		rc = dsa_8021q_crosschip_link_apply(ds, port, other_ds,
-						    other_upstream, true);
-		if (rc)
-			return rc;
-	}
-
-	return 0;
+	return dsa_8021q_crosschip_link_apply(ds, port, other_ds,
+					      other_upstream, true);
 }
 EXPORT_SYMBOL_GPL(dsa_8021q_crosschip_bridge_join);
 
 int dsa_8021q_crosschip_bridge_leave(struct dsa_switch *ds, int port,
 				     struct dsa_switch *other_ds,
-				     int other_port, struct net_device *br,
+				     int other_port,
 				     struct list_head *crosschip_links)
 {
 	int other_upstream = dsa_upstream_port(other_ds, other_port);
@@ -431,14 +422,12 @@ int dsa_8021q_crosschip_bridge_leave(struct dsa_switch *ds, int port,
 			if (keep)
 				continue;
 
-			if (!br_vlan_enabled(br)) {
-				rc = dsa_8021q_crosschip_link_apply(ds, port,
-								    other_ds,
-								    other_port,
-								    false);
-				if (rc)
-					return rc;
-			}
+			rc = dsa_8021q_crosschip_link_apply(ds, port,
+							    other_ds,
+							    other_port,
+							    false);
+			if (rc)
+				return rc;
 		}
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From fa83e5d9183fd9bc5f94f937b069c5e15162e974 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 12 May 2020 20:20:30 +0300
Subject: net: dsa: sja1105: allow VLAN configuration from the bridge in all
 states

Let the DSA core call our .port_vlan_add methods every time the bridge
layer requests so. We will deal internally with saving/restoring VLANs
depending on our VLAN awareness state.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_main.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index fb95130299b1..ca5a9baa0b2f 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -2350,6 +2350,8 @@ static int sja1105_setup(struct dsa_switch *ds)
 
 	ds->mtu_enforcement_ingress = true;
 
+	ds->configure_vlan_while_not_filtering = true;
+
 	/* The DSA/switchdev model brings up switch ports in standalone mode by
 	 * default, and that means vlan_filtering is 0 since they're not under
 	 * a bridge, so it's safe to set up switch tagging at this time.
-- 
cgit v1.2.3-59-g8ed1b


From cfa36b1fff422660fe7fc3a10c17a618d0371796 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 12 May 2020 20:20:31 +0300
Subject: net: dsa: sja1105: exit sja1105_vlan_filtering when called multiple
 times

VLAN filtering is a global property for sja1105, and that means that we
rely on the DSA core to not call us more than once.

But we need to introduce some per-port state for the tagger, namely the
xmit_tpid, and the best place to do that is where the xmit_tpid changes,
namely in sja1105_vlan_filtering. So at the moment, exit early from the
function to avoid unnecessarily resetting the switch for each port call.
Then we'll change the xmit_tpid prior to the early exit in the next
patch.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_main.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index ca5a9baa0b2f..7b9c3db98e1d 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -2158,6 +2158,9 @@ static int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled)
 	else
 		state = SJA1105_VLAN_FILTERING_FULL;
 
+	if (priv->vlan_state == state)
+		return 0;
+
 	priv->vlan_state = state;
 
 	table = &priv->static_config.tables[BLK_IDX_GENERAL_PARAMS];
-- 
cgit v1.2.3-59-g8ed1b


From 38b5beeae7a4cde87edabb0196fac1f55ae668ee Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 12 May 2020 20:20:32 +0300
Subject: net: dsa: sja1105: prepare tagger for handling DSA tags and VLAN
 simultaneously

In VLAN-unaware mode, sja1105 uses VLAN tags with a custom TPID of
0xdadb. While in the yet-to-be introduced best_effort_vlan_filtering
mode, it needs to work with normal VLAN TPID values.

A complication arises when we must transmit a VLAN-tagged packet to the
switch when it's in VLAN-aware mode. We need to construct a packet with
2 VLAN tags, and the switch will use the outer header for routing and
pop it on egress. But sadly, here the 2 hardware generations don't
behave the same:

- E/T switches won't pop an ETH_P_8021AD tag on egress, it seems
  (packets will remain double-tagged).
- P/Q/R/S switches will drop a packet with 2 ETH_P_8021Q tags (it looks
  like it tries to prevent VLAN hopping).

But looks like the reverse is also true:

- E/T switches have no problem popping the outer tag from packets with
  2 ETH_P_8021Q tags.
- P/Q/R/S will have no problem popping a single tag even if that is
  ETH_P_8021AD.

So it is clear that if we want the hardware to work with dsa_8021q
tagging in VLAN-aware mode, we need to send different TPIDs depending on
revision. Keep that information in priv->info->qinq_tpid.

The per-port tagger structure will hold an xmit_tpid value that depends
not only upon the qinq_tpid, but also upon the VLAN awareness state
itself (in case we must transmit using 0xdadb).

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105.h      |  6 ++++++
 drivers/net/dsa/sja1105/sja1105_main.c | 10 ++++++++++
 drivers/net/dsa/sja1105/sja1105_spi.c  |  6 ++++++
 include/linux/dsa/sja1105.h            |  1 +
 net/dsa/tag_sja1105.c                  | 32 +++++++++++++++++++++-----------
 5 files changed, 44 insertions(+), 11 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
index c80f1999c694..a019ffae38f1 100644
--- a/drivers/net/dsa/sja1105/sja1105.h
+++ b/drivers/net/dsa/sja1105/sja1105.h
@@ -87,6 +87,12 @@ struct sja1105_info {
 	const struct sja1105_dynamic_table_ops *dyn_ops;
 	const struct sja1105_table_ops *static_ops;
 	const struct sja1105_regs *regs;
+	/* Both E/T and P/Q/R/S have quirks when it comes to popping the S-Tag
+	 * from double-tagged frames. E/T will pop it only when it's equal to
+	 * TPID from the General Parameters Table, while P/Q/R/S will only
+	 * pop it when it's equal to TPID2.
+	 */
+	u16 qinq_tpid;
 	int (*reset_cmd)(struct dsa_switch *ds);
 	int (*setup_rgmii_delay)(const void *ctx, int port);
 	/* Prototypes from include/net/dsa.h */
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 7b9c3db98e1d..b7e4a85caade 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -2153,6 +2153,15 @@ static int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled)
 		tpid2 = ETH_P_SJA1105;
 	}
 
+	for (port = 0; port < ds->num_ports; port++) {
+		struct sja1105_port *sp = &priv->ports[port];
+
+		if (enabled)
+			sp->xmit_tpid = priv->info->qinq_tpid;
+		else
+			sp->xmit_tpid = ETH_P_SJA1105;
+	}
+
 	if (!enabled)
 		state = SJA1105_VLAN_UNAWARE;
 	else
@@ -2866,6 +2875,7 @@ static int sja1105_probe(struct spi_device *spi)
 			goto out;
 		}
 		skb_queue_head_init(&sp->xmit_queue);
+		sp->xmit_tpid = ETH_P_SJA1105;
 	}
 
 	return 0;
diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c
index 0be75c49e6c3..a0dacae803cc 100644
--- a/drivers/net/dsa/sja1105/sja1105_spi.c
+++ b/drivers/net/dsa/sja1105/sja1105_spi.c
@@ -512,6 +512,7 @@ struct sja1105_info sja1105e_info = {
 	.part_no		= SJA1105ET_PART_NO,
 	.static_ops		= sja1105e_table_ops,
 	.dyn_ops		= sja1105et_dyn_ops,
+	.qinq_tpid		= ETH_P_8021Q,
 	.ptp_ts_bits		= 24,
 	.ptpegr_ts_bytes	= 4,
 	.reset_cmd		= sja1105et_reset_cmd,
@@ -526,6 +527,7 @@ struct sja1105_info sja1105t_info = {
 	.part_no		= SJA1105ET_PART_NO,
 	.static_ops		= sja1105t_table_ops,
 	.dyn_ops		= sja1105et_dyn_ops,
+	.qinq_tpid		= ETH_P_8021Q,
 	.ptp_ts_bits		= 24,
 	.ptpegr_ts_bytes	= 4,
 	.reset_cmd		= sja1105et_reset_cmd,
@@ -540,6 +542,7 @@ struct sja1105_info sja1105p_info = {
 	.part_no		= SJA1105P_PART_NO,
 	.static_ops		= sja1105p_table_ops,
 	.dyn_ops		= sja1105pqrs_dyn_ops,
+	.qinq_tpid		= ETH_P_8021AD,
 	.ptp_ts_bits		= 32,
 	.ptpegr_ts_bytes	= 8,
 	.setup_rgmii_delay	= sja1105pqrs_setup_rgmii_delay,
@@ -555,6 +558,7 @@ struct sja1105_info sja1105q_info = {
 	.part_no		= SJA1105Q_PART_NO,
 	.static_ops		= sja1105q_table_ops,
 	.dyn_ops		= sja1105pqrs_dyn_ops,
+	.qinq_tpid		= ETH_P_8021AD,
 	.ptp_ts_bits		= 32,
 	.ptpegr_ts_bytes	= 8,
 	.setup_rgmii_delay	= sja1105pqrs_setup_rgmii_delay,
@@ -570,6 +574,7 @@ struct sja1105_info sja1105r_info = {
 	.part_no		= SJA1105R_PART_NO,
 	.static_ops		= sja1105r_table_ops,
 	.dyn_ops		= sja1105pqrs_dyn_ops,
+	.qinq_tpid		= ETH_P_8021AD,
 	.ptp_ts_bits		= 32,
 	.ptpegr_ts_bytes	= 8,
 	.setup_rgmii_delay	= sja1105pqrs_setup_rgmii_delay,
@@ -586,6 +591,7 @@ struct sja1105_info sja1105s_info = {
 	.static_ops		= sja1105s_table_ops,
 	.dyn_ops		= sja1105pqrs_dyn_ops,
 	.regs			= &sja1105pqrs_regs,
+	.qinq_tpid		= ETH_P_8021AD,
 	.ptp_ts_bits		= 32,
 	.ptpegr_ts_bytes	= 8,
 	.setup_rgmii_delay	= sja1105pqrs_setup_rgmii_delay,
diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h
index fa5735c353cd..f821d08b1b5f 100644
--- a/include/linux/dsa/sja1105.h
+++ b/include/linux/dsa/sja1105.h
@@ -59,6 +59,7 @@ struct sja1105_port {
 	struct sja1105_tagger_data *data;
 	struct dsa_port *dp;
 	bool hwts_tx_en;
+	u16 xmit_tpid;
 };
 
 #endif /* _NET_DSA_SJA1105_H */
diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
index 5ecac5921a7d..398e2b9a1b96 100644
--- a/net/dsa/tag_sja1105.c
+++ b/net/dsa/tag_sja1105.c
@@ -69,12 +69,25 @@ static inline bool sja1105_is_meta_frame(const struct sk_buff *skb)
 	return true;
 }
 
+static bool sja1105_can_use_vlan_as_tags(const struct sk_buff *skb)
+{
+	struct vlan_ethhdr *hdr = vlan_eth_hdr(skb);
+
+	if (hdr->h_vlan_proto == ntohs(ETH_P_SJA1105))
+		return true;
+
+	if (hdr->h_vlan_proto != ntohs(ETH_P_8021Q))
+		return false;
+
+	return vid_is_dsa_8021q(ntohs(hdr->h_vlan_TCI) & VLAN_VID_MASK);
+}
+
 /* This is the first time the tagger sees the frame on RX.
  * Figure out if we can decode it.
  */
 static bool sja1105_filter(const struct sk_buff *skb, struct net_device *dev)
 {
-	if (!dsa_port_is_vlan_filtering(dev->dsa_ptr))
+	if (sja1105_can_use_vlan_as_tags(skb))
 		return true;
 	if (sja1105_is_link_local(skb))
 		return true;
@@ -96,6 +109,11 @@ static struct sk_buff *sja1105_defer_xmit(struct sja1105_port *sp,
 	return NULL;
 }
 
+static u16 sja1105_xmit_tpid(struct sja1105_port *sp)
+{
+	return sp->xmit_tpid;
+}
+
 static struct sk_buff *sja1105_xmit(struct sk_buff *skb,
 				    struct net_device *netdev)
 {
@@ -111,15 +129,7 @@ static struct sk_buff *sja1105_xmit(struct sk_buff *skb,
 	if (unlikely(sja1105_is_link_local(skb)))
 		return sja1105_defer_xmit(dp->priv, skb);
 
-	/* If we are under a vlan_filtering bridge, IP termination on
-	 * switch ports based on 802.1Q tags is simply too brittle to
-	 * be passable. So just defer to the dsa_slave_notag_xmit
-	 * implementation.
-	 */
-	if (dsa_port_is_vlan_filtering(dp))
-		return skb;
-
-	return dsa_8021q_xmit(skb, netdev, ETH_P_SJA1105,
+	return dsa_8021q_xmit(skb, netdev, sja1105_xmit_tpid(dp->priv),
 			     ((pcp << VLAN_PRIO_SHIFT) | tx_vid));
 }
 
@@ -258,7 +268,7 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
 
 	hdr = eth_hdr(skb);
 	tpid = ntohs(hdr->h_proto);
-	is_tagged = (tpid == ETH_P_SJA1105);
+	is_tagged = (tpid == ETH_P_SJA1105 || tpid == ETH_P_8021Q);
 	is_link_local = sja1105_is_link_local(skb);
 	is_meta = sja1105_is_meta_frame(skb);
 
-- 
cgit v1.2.3-59-g8ed1b


From 3eaae1d05f2b5be1be834bfad64f8fc2ad39a56d Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 12 May 2020 20:20:33 +0300
Subject: net: dsa: tag_8021q: support up to 8 VLANs per port using sub-VLANs

For switches that support VLAN retagging, such as sja1105, we extend
dsa_8021q by encoding a "sub-VLAN" into the remaining 3 free bits in the
dsa_8021q tag.

A sub-VLAN is nothing more than a number in the range 0-7, which serves
as an index into a per-port driver lookup table. The sub-VLAN value of
zero means that traffic is untagged (this is also backwards-compatible
with dsa_8021q without retagging).

The switch should be configured to retag VLAN-tagged traffic that gets
transmitted towards the CPU port (and towards the CPU only). Example:

bridge vlan add dev sw1p0 vid 100

The switch retags frames received on port 0, going to the CPU, and
having VID 100, to the VID of 1104 (0x0450). In dsa_8021q language:

 | 11  | 10  |  9  |  8  |  7  |  6  |  5  |  4  |  3  |  2  |  1  |  0  |
 +-----------+-----+-----------------+-----------+-----------------------+
 |    DIR    | SVL |    SWITCH_ID    |  SUBVLAN  |          PORT         |
 +-----------+-----+-----------------+-----------+-----------------------+

0x0450 means:
 - DIR = 0b01: this is an RX VLAN
 - SUBVLAN = 0b001: this is subvlan #1
 - SWITCH_ID = 0b001: this is switch 1 (see the name "sw1p0")
 - PORT = 0b0000: this is port 0 (see the name "sw1p0")

The driver also remembers the "1 -> 100" mapping. In the hotpath, if the
sub-VLAN from the tag encodes a non-untagged frame, this mapping is used
to create a VLAN hwaccel tag, with the value of 100.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/dsa/8021q.h | 16 ++++++++++++++
 net/dsa/tag_8021q.c       | 56 ++++++++++++++++++++++++++++++++++++++++-------
 2 files changed, 64 insertions(+), 8 deletions(-)

diff --git a/include/linux/dsa/8021q.h b/include/linux/dsa/8021q.h
index 404bd2cce642..311aa04e7520 100644
--- a/include/linux/dsa/8021q.h
+++ b/include/linux/dsa/8021q.h
@@ -20,6 +20,8 @@ struct dsa_8021q_crosschip_link {
 	refcount_t refcount;
 };
 
+#define DSA_8021Q_N_SUBVLAN			8
+
 #if IS_ENABLED(CONFIG_NET_DSA_TAG_8021Q)
 
 int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int index,
@@ -42,10 +44,14 @@ u16 dsa_8021q_tx_vid(struct dsa_switch *ds, int port);
 
 u16 dsa_8021q_rx_vid(struct dsa_switch *ds, int port);
 
+u16 dsa_8021q_rx_vid_subvlan(struct dsa_switch *ds, int port, u16 subvlan);
+
 int dsa_8021q_rx_switch_id(u16 vid);
 
 int dsa_8021q_rx_source_port(u16 vid);
 
+u16 dsa_8021q_rx_subvlan(u16 vid);
+
 bool vid_is_dsa_8021q(u16 vid);
 
 #else
@@ -88,6 +94,11 @@ u16 dsa_8021q_rx_vid(struct dsa_switch *ds, int port)
 	return 0;
 }
 
+u16 dsa_8021q_rx_vid_subvlan(struct dsa_switch *ds, int port, u16 subvlan)
+{
+	return 0;
+}
+
 int dsa_8021q_rx_switch_id(u16 vid)
 {
 	return 0;
@@ -98,6 +109,11 @@ int dsa_8021q_rx_source_port(u16 vid)
 	return 0;
 }
 
+u16 dsa_8021q_rx_subvlan(u16 vid)
+{
+	return 0;
+}
+
 bool vid_is_dsa_8021q(u16 vid)
 {
 	return false;
diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c
index 3236fbbf85b9..3052da668156 100644
--- a/net/dsa/tag_8021q.c
+++ b/net/dsa/tag_8021q.c
@@ -17,7 +17,7 @@
  *
  * | 11  | 10  |  9  |  8  |  7  |  6  |  5  |  4  |  3  |  2  |  1  |  0  |
  * +-----------+-----+-----------------+-----------+-----------------------+
- * |    DIR    | RSV |    SWITCH_ID    |    RSV    |          PORT         |
+ * |    DIR    | SVL |    SWITCH_ID    |  SUBVLAN  |          PORT         |
  * +-----------+-----+-----------------+-----------+-----------------------+
  *
  * DIR - VID[11:10]:
@@ -27,17 +27,24 @@
  *	These values make the special VIDs of 0, 1 and 4095 to be left
  *	unused by this coding scheme.
  *
- * RSV - VID[9]:
- *	To be used for further expansion of SWITCH_ID or for other purposes.
- *	Must be transmitted as zero and ignored on receive.
+ * SVL/SUBVLAN - { VID[9], VID[5:4] }:
+ *	Sub-VLAN encoding. Valid only when DIR indicates an RX VLAN.
+ *	* 0 (0b000): Field does not encode a sub-VLAN, either because
+ *	received traffic is untagged, PVID-tagged or because a second
+ *	VLAN tag is present after this tag and not inside of it.
+ *	* 1 (0b001): Received traffic is tagged with a VID value private
+ *	to the host. This field encodes the index in the host's lookup
+ *	table through which the value of the ingress VLAN ID can be
+ *	recovered.
+ *	* 2 (0b010): Field encodes a sub-VLAN.
+ *	...
+ *	* 7 (0b111): Field encodes a sub-VLAN.
+ *	When DIR indicates a TX VLAN, SUBVLAN must be transmitted as zero
+ *	(by the host) and ignored on receive (by the switch).
  *
  * SWITCH_ID - VID[8:6]:
  *	Index of switch within DSA tree. Must be between 0 and 7.
  *
- * RSV - VID[5:4]:
- *	To be used for further expansion of PORT or for other purposes.
- *	Must be transmitted as zero and ignored on receive.
- *
  * PORT - VID[3:0]:
  *	Index of switch port. Must be between 0 and 15.
  */
@@ -54,6 +61,18 @@
 #define DSA_8021Q_SWITCH_ID(x)		(((x) << DSA_8021Q_SWITCH_ID_SHIFT) & \
 						 DSA_8021Q_SWITCH_ID_MASK)
 
+#define DSA_8021Q_SUBVLAN_HI_SHIFT	9
+#define DSA_8021Q_SUBVLAN_HI_MASK	GENMASK(9, 9)
+#define DSA_8021Q_SUBVLAN_LO_SHIFT	4
+#define DSA_8021Q_SUBVLAN_LO_MASK	GENMASK(4, 3)
+#define DSA_8021Q_SUBVLAN_HI(x)		(((x) & GENMASK(2, 2)) >> 2)
+#define DSA_8021Q_SUBVLAN_LO(x)		((x) & GENMASK(1, 0))
+#define DSA_8021Q_SUBVLAN(x)		\
+		(((DSA_8021Q_SUBVLAN_LO(x) << DSA_8021Q_SUBVLAN_LO_SHIFT) & \
+		  DSA_8021Q_SUBVLAN_LO_MASK) | \
+		 ((DSA_8021Q_SUBVLAN_HI(x) << DSA_8021Q_SUBVLAN_HI_SHIFT) & \
+		  DSA_8021Q_SUBVLAN_HI_MASK))
+
 #define DSA_8021Q_PORT_SHIFT		0
 #define DSA_8021Q_PORT_MASK		GENMASK(3, 0)
 #define DSA_8021Q_PORT(x)		(((x) << DSA_8021Q_PORT_SHIFT) & \
@@ -79,6 +98,13 @@ u16 dsa_8021q_rx_vid(struct dsa_switch *ds, int port)
 }
 EXPORT_SYMBOL_GPL(dsa_8021q_rx_vid);
 
+u16 dsa_8021q_rx_vid_subvlan(struct dsa_switch *ds, int port, u16 subvlan)
+{
+	return DSA_8021Q_DIR_RX | DSA_8021Q_SWITCH_ID(ds->index) |
+	       DSA_8021Q_PORT(port) | DSA_8021Q_SUBVLAN(subvlan);
+}
+EXPORT_SYMBOL_GPL(dsa_8021q_rx_vid_subvlan);
+
 /* Returns the decoded switch ID from the RX VID. */
 int dsa_8021q_rx_switch_id(u16 vid)
 {
@@ -93,6 +119,20 @@ int dsa_8021q_rx_source_port(u16 vid)
 }
 EXPORT_SYMBOL_GPL(dsa_8021q_rx_source_port);
 
+/* Returns the decoded subvlan from the RX VID. */
+u16 dsa_8021q_rx_subvlan(u16 vid)
+{
+	u16 svl_hi, svl_lo;
+
+	svl_hi = (vid & DSA_8021Q_SUBVLAN_HI_MASK) >>
+		 DSA_8021Q_SUBVLAN_HI_SHIFT;
+	svl_lo = (vid & DSA_8021Q_SUBVLAN_LO_MASK) >>
+		 DSA_8021Q_SUBVLAN_LO_SHIFT;
+
+	return (svl_hi << 2) | svl_lo;
+}
+EXPORT_SYMBOL_GPL(dsa_8021q_rx_subvlan);
+
 bool vid_is_dsa_8021q(u16 vid)
 {
 	return ((vid & DSA_8021Q_DIR_MASK) == DSA_8021Q_DIR_RX ||
-- 
cgit v1.2.3-59-g8ed1b


From 84eeb5d460e399795e9a92a0cd44999254886150 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 12 May 2020 20:20:34 +0300
Subject: net: dsa: tag_sja1105: implement sub-VLAN decoding

Create a subvlan_map as part of each port's tagger private structure.
This keeps reverse mappings of bridge-to-dsa_8021q VLAN retagging rules.

Note that as of this patch, this piece of code is never engaged, due to
the fact that the driver hasn't installed any retagging rule, so we'll
always see packets with a subvlan code of 0 (untagged).

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_main.c |  4 ++++
 include/linux/dsa/sja1105.h            |  2 ++
 net/dsa/tag_sja1105.c                  | 19 +++++++++++++++++++
 3 files changed, 25 insertions(+)

diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index b7e4a85caade..fd15a18596ea 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -2856,6 +2856,7 @@ static int sja1105_probe(struct spi_device *spi)
 		struct sja1105_port *sp = &priv->ports[port];
 		struct dsa_port *dp = dsa_to_port(ds, port);
 		struct net_device *slave;
+		int subvlan;
 
 		if (!dsa_is_user_port(ds, port))
 			continue;
@@ -2876,6 +2877,9 @@ static int sja1105_probe(struct spi_device *spi)
 		}
 		skb_queue_head_init(&sp->xmit_queue);
 		sp->xmit_tpid = ETH_P_SJA1105;
+
+		for (subvlan = 0; subvlan < DSA_8021Q_N_SUBVLAN; subvlan++)
+			sp->subvlan_map[subvlan] = VLAN_N_VID;
 	}
 
 	return 0;
diff --git a/include/linux/dsa/sja1105.h b/include/linux/dsa/sja1105.h
index f821d08b1b5f..dd93735ae228 100644
--- a/include/linux/dsa/sja1105.h
+++ b/include/linux/dsa/sja1105.h
@@ -9,6 +9,7 @@
 
 #include <linux/skbuff.h>
 #include <linux/etherdevice.h>
+#include <linux/dsa/8021q.h>
 #include <net/dsa.h>
 
 #define ETH_P_SJA1105				ETH_P_DSA_8021Q
@@ -53,6 +54,7 @@ struct sja1105_skb_cb {
 	((struct sja1105_skb_cb *)DSA_SKB_CB_PRIV(skb))
 
 struct sja1105_port {
+	u16 subvlan_map[DSA_8021Q_N_SUBVLAN];
 	struct kthread_worker *xmit_worker;
 	struct kthread_work xmit_work;
 	struct sk_buff_head xmit_queue;
diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
index 398e2b9a1b96..ad105550b145 100644
--- a/net/dsa/tag_sja1105.c
+++ b/net/dsa/tag_sja1105.c
@@ -254,6 +254,20 @@ static struct sk_buff
 	return skb;
 }
 
+static void sja1105_decode_subvlan(struct sk_buff *skb, u16 subvlan)
+{
+	struct dsa_port *dp = dsa_slave_to_port(skb->dev);
+	struct sja1105_port *sp = dp->priv;
+	u16 vid = sp->subvlan_map[subvlan];
+	u16 vlan_tci;
+
+	if (vid == VLAN_N_VID)
+		return;
+
+	vlan_tci = (skb->priority << VLAN_PRIO_SHIFT) | vid;
+	__vlan_hwaccel_put_tag(skb, htons(ETH_P_8021Q), vlan_tci);
+}
+
 static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
 				   struct net_device *netdev,
 				   struct packet_type *pt)
@@ -263,6 +277,7 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
 	struct ethhdr *hdr;
 	u16 tpid, vid, tci;
 	bool is_link_local;
+	u16 subvlan = 0;
 	bool is_tagged;
 	bool is_meta;
 
@@ -286,6 +301,7 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
 		source_port = dsa_8021q_rx_source_port(vid);
 		switch_id = dsa_8021q_rx_switch_id(vid);
 		skb->priority = (tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
+		subvlan = dsa_8021q_rx_subvlan(vid);
 	} else if (is_link_local) {
 		/* Management traffic path. Switch embeds the switch ID and
 		 * port ID into bytes of the destination MAC, courtesy of
@@ -310,6 +326,9 @@ static struct sk_buff *sja1105_rcv(struct sk_buff *skb,
 		return NULL;
 	}
 
+	if (subvlan)
+		sja1105_decode_subvlan(skb, subvlan);
+
 	return sja1105_rcv_meta_state_machine(skb, &meta, is_link_local,
 					      is_meta);
 }
-- 
cgit v1.2.3-59-g8ed1b


From 2cafa72e516f61b6d82c2416b4f5963fb48fd9ce Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 12 May 2020 20:20:35 +0300
Subject: net: dsa: sja1105: add a new best_effort_vlan_filtering devlink
 parameter

This devlink parameter enables the handling of DSA tags when enslaved to
a bridge with vlan_filtering=1. There are very good reasons to want
this, but there are also very good reasons for not enabling it by
default. So a devlink param named best_effort_vlan_filtering, currently
driver-specific and exported only by sja1105, is used to configure this.

In practice, this is perhaps the way that most users are going to use
the switch in. It assumes that no more than 7 VLANs are needed per port.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105.h      |   2 +
 drivers/net/dsa/sja1105/sja1105_main.c | 122 +++++++++++++++++++++++++++++++--
 2 files changed, 120 insertions(+), 4 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
index a019ffae38f1..1dcaecab0912 100644
--- a/drivers/net/dsa/sja1105/sja1105.h
+++ b/drivers/net/dsa/sja1105/sja1105.h
@@ -194,6 +194,7 @@ struct sja1105_bridge_vlan {
 
 enum sja1105_vlan_state {
 	SJA1105_VLAN_UNAWARE,
+	SJA1105_VLAN_BEST_EFFORT,
 	SJA1105_VLAN_FILTERING_FULL,
 };
 
@@ -201,6 +202,7 @@ struct sja1105_private {
 	struct sja1105_static_config static_config;
 	bool rgmii_rx_delay[SJA1105_NUM_PORTS];
 	bool rgmii_tx_delay[SJA1105_NUM_PORTS];
+	bool best_effort_vlan_filtering;
 	const struct sja1105_info *info;
 	struct gpio_desc *reset_gpio;
 	struct spi_device *spidev;
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index fd15a18596ea..775a6766288e 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -2132,6 +2132,7 @@ static int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled)
 	enum sja1105_vlan_state state;
 	struct sja1105_table *table;
 	struct sja1105_rule *rule;
+	bool want_tagging;
 	u16 tpid, tpid2;
 	int rc;
 
@@ -2164,6 +2165,8 @@ static int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled)
 
 	if (!enabled)
 		state = SJA1105_VLAN_UNAWARE;
+	else if (priv->best_effort_vlan_filtering)
+		state = SJA1105_VLAN_BEST_EFFORT;
 	else
 		state = SJA1105_VLAN_FILTERING_FULL;
 
@@ -2171,6 +2174,8 @@ static int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled)
 		return 0;
 
 	priv->vlan_state = state;
+	want_tagging = (state == SJA1105_VLAN_UNAWARE ||
+			state == SJA1105_VLAN_BEST_EFFORT);
 
 	table = &priv->static_config.tables[BLK_IDX_GENERAL_PARAMS];
 	general_params = table->entries;
@@ -2184,8 +2189,10 @@ static int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled)
 	general_params->incl_srcpt1 = enabled;
 	general_params->incl_srcpt0 = enabled;
 
+	want_tagging = priv->best_effort_vlan_filtering || !enabled;
+
 	/* VLAN filtering => independent VLAN learning.
-	 * No VLAN filtering => shared VLAN learning.
+	 * No VLAN filtering (or best effort) => shared VLAN learning.
 	 *
 	 * In shared VLAN learning mode, untagged traffic still gets
 	 * pvid-tagged, and the FDB table gets populated with entries
@@ -2204,7 +2211,7 @@ static int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled)
 	 */
 	table = &priv->static_config.tables[BLK_IDX_L2_LOOKUP_PARAMS];
 	l2_lookup_params = table->entries;
-	l2_lookup_params->shared_learn = !enabled;
+	l2_lookup_params->shared_learn = want_tagging;
 
 	rc = sja1105_static_config_reload(priv, SJA1105_VLAN_FILTERING);
 	if (rc)
@@ -2212,9 +2219,10 @@ static int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled)
 
 	/* Switch port identification based on 802.1Q is only passable
 	 * if we are not under a vlan_filtering bridge. So make sure
-	 * the two configurations are mutually exclusive.
+	 * the two configurations are mutually exclusive (of course, the
+	 * user may know better, i.e. best_effort_vlan_filtering).
 	 */
-	return sja1105_setup_8021q_tagging(ds, !enabled);
+	return sja1105_setup_8021q_tagging(ds, want_tagging);
 }
 
 static void sja1105_vlan_add(struct dsa_switch *ds, int port,
@@ -2297,6 +2305,105 @@ static int sja1105_vlan_del(struct dsa_switch *ds, int port,
 	return sja1105_build_vlan_table(priv, true);
 }
 
+static int sja1105_best_effort_vlan_filtering_get(struct sja1105_private *priv,
+						  bool *be_vlan)
+{
+	*be_vlan = priv->best_effort_vlan_filtering;
+
+	return 0;
+}
+
+static int sja1105_best_effort_vlan_filtering_set(struct sja1105_private *priv,
+						  bool be_vlan)
+{
+	struct dsa_switch *ds = priv->ds;
+	bool vlan_filtering;
+	int port;
+	int rc;
+
+	priv->best_effort_vlan_filtering = be_vlan;
+
+	rtnl_lock();
+	for (port = 0; port < ds->num_ports; port++) {
+		struct dsa_port *dp;
+
+		if (!dsa_is_user_port(ds, port))
+			continue;
+
+		dp = dsa_to_port(ds, port);
+		vlan_filtering = dsa_port_is_vlan_filtering(dp);
+
+		rc = sja1105_vlan_filtering(ds, port, vlan_filtering);
+		if (rc)
+			break;
+	}
+	rtnl_unlock();
+
+	return rc;
+}
+
+enum sja1105_devlink_param_id {
+	SJA1105_DEVLINK_PARAM_ID_BASE = DEVLINK_PARAM_GENERIC_ID_MAX,
+	SJA1105_DEVLINK_PARAM_ID_BEST_EFFORT_VLAN_FILTERING,
+};
+
+static int sja1105_devlink_param_get(struct dsa_switch *ds, u32 id,
+				     struct devlink_param_gset_ctx *ctx)
+{
+	struct sja1105_private *priv = ds->priv;
+	int err;
+
+	switch (id) {
+	case SJA1105_DEVLINK_PARAM_ID_BEST_EFFORT_VLAN_FILTERING:
+		err = sja1105_best_effort_vlan_filtering_get(priv,
+							     &ctx->val.vbool);
+		break;
+	default:
+		err = -EOPNOTSUPP;
+		break;
+	}
+
+	return err;
+}
+
+static int sja1105_devlink_param_set(struct dsa_switch *ds, u32 id,
+				     struct devlink_param_gset_ctx *ctx)
+{
+	struct sja1105_private *priv = ds->priv;
+	int err;
+
+	switch (id) {
+	case SJA1105_DEVLINK_PARAM_ID_BEST_EFFORT_VLAN_FILTERING:
+		err = sja1105_best_effort_vlan_filtering_set(priv,
+							     ctx->val.vbool);
+		break;
+	default:
+		err = -EOPNOTSUPP;
+		break;
+	}
+
+	return err;
+}
+
+static const struct devlink_param sja1105_devlink_params[] = {
+	DSA_DEVLINK_PARAM_DRIVER(SJA1105_DEVLINK_PARAM_ID_BEST_EFFORT_VLAN_FILTERING,
+				 "best_effort_vlan_filtering",
+				 DEVLINK_PARAM_TYPE_BOOL,
+				 BIT(DEVLINK_PARAM_CMODE_RUNTIME)),
+};
+
+static int sja1105_setup_devlink_params(struct dsa_switch *ds)
+{
+	return dsa_devlink_params_register(ds, sja1105_devlink_params,
+					   ARRAY_SIZE(sja1105_devlink_params));
+}
+
+static void sja1105_teardown_devlink_params(struct dsa_switch *ds)
+{
+	dsa_devlink_params_unregister(ds, sja1105_devlink_params,
+				      ARRAY_SIZE(sja1105_devlink_params));
+}
+
 /* The programming model for the SJA1105 switch is "all-at-once" via static
  * configuration tables. Some of these can be dynamically modified at runtime,
  * but not the xMII mode parameters table.
@@ -2364,6 +2471,10 @@ static int sja1105_setup(struct dsa_switch *ds)
 
 	ds->configure_vlan_while_not_filtering = true;
 
+	rc = sja1105_setup_devlink_params(ds);
+	if (rc < 0)
+		return rc;
+
 	/* The DSA/switchdev model brings up switch ports in standalone mode by
 	 * default, and that means vlan_filtering is 0 since they're not under
 	 * a bridge, so it's safe to set up switch tagging at this time.
@@ -2387,6 +2498,7 @@ static void sja1105_teardown(struct dsa_switch *ds)
 			kthread_destroy_worker(sp->xmit_worker);
 	}
 
+	sja1105_teardown_devlink_params(ds);
 	sja1105_flower_teardown(ds);
 	sja1105_tas_teardown(ds);
 	sja1105_ptp_clock_unregister(ds);
@@ -2738,6 +2850,8 @@ static const struct dsa_switch_ops sja1105_switch_ops = {
 	.cls_flower_stats	= sja1105_cls_flower_stats,
 	.crosschip_bridge_join	= sja1105_crosschip_bridge_join,
 	.crosschip_bridge_leave	= sja1105_crosschip_bridge_leave,
+	.devlink_param_get	= sja1105_devlink_param_get,
+	.devlink_param_set	= sja1105_devlink_param_set,
 };
 
 static int sja1105_check_device_id(struct sja1105_private *priv)
-- 
cgit v1.2.3-59-g8ed1b


From 88cac0fa534d22ee333c415099ba000d3882fbba Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 12 May 2020 20:20:36 +0300
Subject: net: dsa: sja1105: add packing ops for the Retagging Table

The Retagging Table is an optional feature that allows the switch to
match frames against a {ingress port, egress port, vid} rule and change
their VLAN ID. The retagged frames are by default clones of the original
ones (since the hardware-foreseen use case was to mirror traffic for
debugging purposes and to tag it with a special VLAN for this purpose),
but we can force the original frames to be dropped by removing the
pre-retagging VLAN from the port membership list of the egress port.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105.h                |  2 +
 drivers/net/dsa/sja1105/sja1105_dynamic_config.c | 33 +++++++++++++
 drivers/net/dsa/sja1105/sja1105_static_config.c  | 62 +++++++++++++++++++++++-
 drivers/net/dsa/sja1105/sja1105_static_config.h  | 15 ++++++
 4 files changed, 110 insertions(+), 2 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
index 1dcaecab0912..1ecdfd6be4c2 100644
--- a/drivers/net/dsa/sja1105/sja1105.h
+++ b/drivers/net/dsa/sja1105/sja1105.h
@@ -328,6 +328,8 @@ size_t sja1105et_l2_lookup_entry_packing(void *buf, void *entry_ptr,
 					 enum packing_op op);
 size_t sja1105_vlan_lookup_entry_packing(void *buf, void *entry_ptr,
 					 enum packing_op op);
+size_t sja1105_retagging_entry_packing(void *buf, void *entry_ptr,
+				       enum packing_op op);
 size_t sja1105pqrs_mac_config_entry_packing(void *buf, void *entry_ptr,
 					    enum packing_op op);
 size_t sja1105pqrs_avb_params_entry_packing(void *buf, void *entry_ptr,
diff --git a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
index bdee01811960..2a8fbd7fdedc 100644
--- a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
+++ b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
@@ -133,6 +133,9 @@
 #define SJA1105PQRS_SIZE_AVB_PARAMS_DYN_CMD			\
 	(SJA1105_SIZE_DYN_CMD + SJA1105PQRS_SIZE_AVB_PARAMS_ENTRY)
 
+#define SJA1105_SIZE_RETAGGING_DYN_CMD				\
+	(SJA1105_SIZE_DYN_CMD + SJA1105_SIZE_RETAGGING_ENTRY)
+
 #define SJA1105_MAX_DYN_CMD_SIZE				\
 	SJA1105PQRS_SIZE_MAC_CONFIG_DYN_CMD
 
@@ -525,6 +528,20 @@ sja1105pqrs_avb_params_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
 	sja1105_packing(p, &cmd->rdwrset, 29, 29, size, op);
 }
 
+static void
+sja1105_retagging_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
+			      enum packing_op op)
+{
+	u8 *p = buf + SJA1105_SIZE_RETAGGING_ENTRY;
+	const int size = SJA1105_SIZE_DYN_CMD;
+
+	sja1105_packing(p, &cmd->valid,    31, 31, size, op);
+	sja1105_packing(p, &cmd->errors,   30, 30, size, op);
+	sja1105_packing(p, &cmd->valident, 29, 29, size, op);
+	sja1105_packing(p, &cmd->rdwrset,  28, 28, size, op);
+	sja1105_packing(p, &cmd->index,     5,  0, size, op);
+}
+
 #define OP_READ		BIT(0)
 #define OP_WRITE	BIT(1)
 #define OP_DEL		BIT(2)
@@ -606,6 +623,14 @@ struct sja1105_dynamic_table_ops sja1105et_dyn_ops[BLK_IDX_MAX_DYN] = {
 		.packed_size = SJA1105ET_SIZE_GENERAL_PARAMS_DYN_CMD,
 		.addr = 0x34,
 	},
+	[BLK_IDX_RETAGGING] = {
+		.entry_packing = sja1105_retagging_entry_packing,
+		.cmd_packing = sja1105_retagging_cmd_packing,
+		.max_entry_count = SJA1105_MAX_RETAGGING_COUNT,
+		.access = (OP_WRITE | OP_DEL),
+		.packed_size = SJA1105_SIZE_RETAGGING_DYN_CMD,
+		.addr = 0x31,
+	},
 	[BLK_IDX_XMII_PARAMS] = {0},
 };
 
@@ -692,6 +717,14 @@ struct sja1105_dynamic_table_ops sja1105pqrs_dyn_ops[BLK_IDX_MAX_DYN] = {
 		.packed_size = SJA1105ET_SIZE_GENERAL_PARAMS_DYN_CMD,
 		.addr = 0x34,
 	},
+	[BLK_IDX_RETAGGING] = {
+		.entry_packing = sja1105_retagging_entry_packing,
+		.cmd_packing = sja1105_retagging_cmd_packing,
+		.max_entry_count = SJA1105_MAX_RETAGGING_COUNT,
+		.access = (OP_READ | OP_WRITE | OP_DEL),
+		.packed_size = SJA1105_SIZE_RETAGGING_DYN_CMD,
+		.addr = 0x38,
+	},
 	[BLK_IDX_XMII_PARAMS] = {0},
 };
 
diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.c b/drivers/net/dsa/sja1105/sja1105_static_config.c
index b68c9c92c248..780aca034cdc 100644
--- a/drivers/net/dsa/sja1105/sja1105_static_config.c
+++ b/drivers/net/dsa/sja1105/sja1105_static_config.c
@@ -541,6 +541,22 @@ static size_t sja1105_xmii_params_entry_packing(void *buf, void *entry_ptr,
 	return size;
 }
 
+size_t sja1105_retagging_entry_packing(void *buf, void *entry_ptr,
+				       enum packing_op op)
+{
+	struct sja1105_retagging_entry *entry = entry_ptr;
+	const size_t size = SJA1105_SIZE_RETAGGING_ENTRY;
+
+	sja1105_packing(buf, &entry->egr_port,       63, 59, size, op);
+	sja1105_packing(buf, &entry->ing_port,       58, 54, size, op);
+	sja1105_packing(buf, &entry->vlan_ing,       53, 42, size, op);
+	sja1105_packing(buf, &entry->vlan_egr,       41, 30, size, op);
+	sja1105_packing(buf, &entry->do_not_learn,   29, 29, size, op);
+	sja1105_packing(buf, &entry->use_dest_ports, 28, 28, size, op);
+	sja1105_packing(buf, &entry->destports,      27, 23, size, op);
+	return size;
+}
+
 size_t sja1105_table_header_packing(void *buf, void *entry_ptr,
 				    enum packing_op op)
 {
@@ -603,6 +619,7 @@ static u64 blk_id_map[BLK_IDX_MAX] = {
 	[BLK_IDX_L2_FORWARDING_PARAMS] = BLKID_L2_FORWARDING_PARAMS,
 	[BLK_IDX_AVB_PARAMS] = BLKID_AVB_PARAMS,
 	[BLK_IDX_GENERAL_PARAMS] = BLKID_GENERAL_PARAMS,
+	[BLK_IDX_RETAGGING] = BLKID_RETAGGING,
 	[BLK_IDX_XMII_PARAMS] = BLKID_XMII_PARAMS,
 };
 
@@ -646,7 +663,7 @@ static_config_check_memory_size(const struct sja1105_table *tables)
 {
 	const struct sja1105_l2_forwarding_params_entry *l2_fwd_params;
 	const struct sja1105_vl_forwarding_params_entry *vl_fwd_params;
-	int i, mem = 0;
+	int i, max_mem, mem = 0;
 
 	l2_fwd_params = tables[BLK_IDX_L2_FORWARDING_PARAMS].entries;
 
@@ -659,7 +676,12 @@ static_config_check_memory_size(const struct sja1105_table *tables)
 			mem += vl_fwd_params->partspc[i];
 	}
 
-	if (mem > SJA1105_MAX_FRAME_MEMORY)
+	if (tables[BLK_IDX_RETAGGING].entry_count)
+		max_mem = SJA1105_MAX_FRAME_MEMORY_RETAGGING;
+	else
+		max_mem = SJA1105_MAX_FRAME_MEMORY;
+
+	if (mem > max_mem)
 		return SJA1105_OVERCOMMITTED_FRAME_MEMORY;
 
 	return SJA1105_CONFIG_OK;
@@ -881,6 +903,12 @@ struct sja1105_table_ops sja1105e_table_ops[BLK_IDX_MAX] = {
 		.packed_entry_size = SJA1105ET_SIZE_GENERAL_PARAMS_ENTRY,
 		.max_entry_count = SJA1105_MAX_GENERAL_PARAMS_COUNT,
 	},
+	[BLK_IDX_RETAGGING] = {
+		.packing = sja1105_retagging_entry_packing,
+		.unpacked_entry_size = sizeof(struct sja1105_retagging_entry),
+		.packed_entry_size = SJA1105_SIZE_RETAGGING_ENTRY,
+		.max_entry_count = SJA1105_MAX_RETAGGING_COUNT,
+	},
 	[BLK_IDX_XMII_PARAMS] = {
 		.packing = sja1105_xmii_params_entry_packing,
 		.unpacked_entry_size = sizeof(struct sja1105_xmii_params_entry),
@@ -993,6 +1021,12 @@ struct sja1105_table_ops sja1105t_table_ops[BLK_IDX_MAX] = {
 		.packed_entry_size = SJA1105ET_SIZE_GENERAL_PARAMS_ENTRY,
 		.max_entry_count = SJA1105_MAX_GENERAL_PARAMS_COUNT,
 	},
+	[BLK_IDX_RETAGGING] = {
+		.packing = sja1105_retagging_entry_packing,
+		.unpacked_entry_size = sizeof(struct sja1105_retagging_entry),
+		.packed_entry_size = SJA1105_SIZE_RETAGGING_ENTRY,
+		.max_entry_count = SJA1105_MAX_RETAGGING_COUNT,
+	},
 	[BLK_IDX_XMII_PARAMS] = {
 		.packing = sja1105_xmii_params_entry_packing,
 		.unpacked_entry_size = sizeof(struct sja1105_xmii_params_entry),
@@ -1065,6 +1099,12 @@ struct sja1105_table_ops sja1105p_table_ops[BLK_IDX_MAX] = {
 		.packed_entry_size = SJA1105PQRS_SIZE_GENERAL_PARAMS_ENTRY,
 		.max_entry_count = SJA1105_MAX_GENERAL_PARAMS_COUNT,
 	},
+	[BLK_IDX_RETAGGING] = {
+		.packing = sja1105_retagging_entry_packing,
+		.unpacked_entry_size = sizeof(struct sja1105_retagging_entry),
+		.packed_entry_size = SJA1105_SIZE_RETAGGING_ENTRY,
+		.max_entry_count = SJA1105_MAX_RETAGGING_COUNT,
+	},
 	[BLK_IDX_XMII_PARAMS] = {
 		.packing = sja1105_xmii_params_entry_packing,
 		.unpacked_entry_size = sizeof(struct sja1105_xmii_params_entry),
@@ -1177,6 +1217,12 @@ struct sja1105_table_ops sja1105q_table_ops[BLK_IDX_MAX] = {
 		.packed_entry_size = SJA1105PQRS_SIZE_GENERAL_PARAMS_ENTRY,
 		.max_entry_count = SJA1105_MAX_GENERAL_PARAMS_COUNT,
 	},
+	[BLK_IDX_RETAGGING] = {
+		.packing = sja1105_retagging_entry_packing,
+		.unpacked_entry_size = sizeof(struct sja1105_retagging_entry),
+		.packed_entry_size = SJA1105_SIZE_RETAGGING_ENTRY,
+		.max_entry_count = SJA1105_MAX_RETAGGING_COUNT,
+	},
 	[BLK_IDX_XMII_PARAMS] = {
 		.packing = sja1105_xmii_params_entry_packing,
 		.unpacked_entry_size = sizeof(struct sja1105_xmii_params_entry),
@@ -1249,6 +1295,12 @@ struct sja1105_table_ops sja1105r_table_ops[BLK_IDX_MAX] = {
 		.packed_entry_size = SJA1105PQRS_SIZE_GENERAL_PARAMS_ENTRY,
 		.max_entry_count = SJA1105_MAX_GENERAL_PARAMS_COUNT,
 	},
+	[BLK_IDX_RETAGGING] = {
+		.packing = sja1105_retagging_entry_packing,
+		.unpacked_entry_size = sizeof(struct sja1105_retagging_entry),
+		.packed_entry_size = SJA1105_SIZE_RETAGGING_ENTRY,
+		.max_entry_count = SJA1105_MAX_RETAGGING_COUNT,
+	},
 	[BLK_IDX_XMII_PARAMS] = {
 		.packing = sja1105_xmii_params_entry_packing,
 		.unpacked_entry_size = sizeof(struct sja1105_xmii_params_entry),
@@ -1361,6 +1413,12 @@ struct sja1105_table_ops sja1105s_table_ops[BLK_IDX_MAX] = {
 		.packed_entry_size = SJA1105PQRS_SIZE_GENERAL_PARAMS_ENTRY,
 		.max_entry_count = SJA1105_MAX_GENERAL_PARAMS_COUNT,
 	},
+	[BLK_IDX_RETAGGING] = {
+		.packing = sja1105_retagging_entry_packing,
+		.unpacked_entry_size = sizeof(struct sja1105_retagging_entry),
+		.packed_entry_size = SJA1105_SIZE_RETAGGING_ENTRY,
+		.max_entry_count = SJA1105_MAX_RETAGGING_COUNT,
+	},
 	[BLK_IDX_XMII_PARAMS] = {
 		.packing = sja1105_xmii_params_entry_packing,
 		.unpacked_entry_size = sizeof(struct sja1105_xmii_params_entry),
diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.h b/drivers/net/dsa/sja1105/sja1105_static_config.h
index b569e3de3590..d96044d86b11 100644
--- a/drivers/net/dsa/sja1105/sja1105_static_config.h
+++ b/drivers/net/dsa/sja1105/sja1105_static_config.h
@@ -20,6 +20,7 @@
 #define SJA1105_SIZE_VLAN_LOOKUP_ENTRY			8
 #define SJA1105_SIZE_L2_FORWARDING_ENTRY		8
 #define SJA1105_SIZE_L2_FORWARDING_PARAMS_ENTRY		12
+#define SJA1105_SIZE_RETAGGING_ENTRY			8
 #define SJA1105_SIZE_XMII_PARAMS_ENTRY			4
 #define SJA1105_SIZE_SCHEDULE_PARAMS_ENTRY		12
 #define SJA1105_SIZE_SCHEDULE_ENTRY_POINTS_PARAMS_ENTRY	4
@@ -54,6 +55,7 @@ enum {
 	BLKID_L2_FORWARDING_PARAMS			= 0x0E,
 	BLKID_AVB_PARAMS				= 0x10,
 	BLKID_GENERAL_PARAMS				= 0x11,
+	BLKID_RETAGGING					= 0x12,
 	BLKID_XMII_PARAMS				= 0x4E,
 };
 
@@ -75,6 +77,7 @@ enum sja1105_blk_idx {
 	BLK_IDX_L2_FORWARDING_PARAMS,
 	BLK_IDX_AVB_PARAMS,
 	BLK_IDX_GENERAL_PARAMS,
+	BLK_IDX_RETAGGING,
 	BLK_IDX_XMII_PARAMS,
 	BLK_IDX_MAX,
 	/* Fake block indices that are only valid for dynamic access */
@@ -99,10 +102,12 @@ enum sja1105_blk_idx {
 #define SJA1105_MAX_L2_LOOKUP_PARAMS_COUNT		1
 #define SJA1105_MAX_L2_FORWARDING_PARAMS_COUNT		1
 #define SJA1105_MAX_GENERAL_PARAMS_COUNT		1
+#define SJA1105_MAX_RETAGGING_COUNT			32
 #define SJA1105_MAX_XMII_PARAMS_COUNT			1
 #define SJA1105_MAX_AVB_PARAMS_COUNT			1
 
 #define SJA1105_MAX_FRAME_MEMORY			929
+#define SJA1105_MAX_FRAME_MEMORY_RETAGGING		910
 
 #define SJA1105E_DEVICE_ID				0x9C00000Cull
 #define SJA1105T_DEVICE_ID				0x9E00030Eull
@@ -273,6 +278,16 @@ struct sja1105_mac_config_entry {
 	u64 ingress;
 };
 
+struct sja1105_retagging_entry {
+	u64 egr_port;
+	u64 ing_port;
+	u64 vlan_ing;
+	u64 vlan_egr;
+	u64 do_not_learn;
+	u64 use_dest_ports;
+	u64 destports;
+};
+
 struct sja1105_xmii_params_entry {
 	u64 phy_mac[5];
 	u64 xmii_mode[5];
-- 
cgit v1.2.3-59-g8ed1b


From aaa270c638caa337ce34bb590b0a14ee09f1876d Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 12 May 2020 20:20:37 +0300
Subject: net: dsa: sja1105: implement a common frame memory partitioning
 function

There are 2 different features that require some reserved frame memory
space: VLAN retagging and virtual links. Create a central function that
modifies the static config and ensures frame memory is never
overcommitted.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105.h               |  2 ++
 drivers/net/dsa/sja1105/sja1105_main.c          | 37 +++++++++++++++++++++++++
 drivers/net/dsa/sja1105/sja1105_static_config.h |  1 +
 drivers/net/dsa/sja1105/sja1105_vl.c            | 20 ++-----------
 4 files changed, 42 insertions(+), 18 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
index 1ecdfd6be4c2..198d2a7d7f95 100644
--- a/drivers/net/dsa/sja1105/sja1105.h
+++ b/drivers/net/dsa/sja1105/sja1105.h
@@ -244,6 +244,8 @@ enum sja1105_reset_reason {
 int sja1105_static_config_reload(struct sja1105_private *priv,
 				 enum sja1105_reset_reason reason);
 
+void sja1105_frame_memory_partitioning(struct sja1105_private *priv);
+
 /* From sja1105_spi.c */
 int sja1105_xfer_buf(const struct sja1105_private *priv,
 		     sja1105_spi_rw_mode_t rw, u64 reg_addr,
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 775a6766288e..77462219261e 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -432,6 +432,41 @@ static int sja1105_init_l2_forwarding_params(struct sja1105_private *priv)
 	return 0;
 }
 
+void sja1105_frame_memory_partitioning(struct sja1105_private *priv)
+{
+	struct sja1105_l2_forwarding_params_entry *l2_fwd_params;
+	struct sja1105_vl_forwarding_params_entry *vl_fwd_params;
+	struct sja1105_table *table;
+	int max_mem;
+
+	/* VLAN retagging is implemented using a loopback port that consumes
+	 * frame buffers. That leaves less for us.
+	 */
+	if (priv->vlan_state == SJA1105_VLAN_BEST_EFFORT)
+		max_mem = SJA1105_MAX_FRAME_MEMORY_RETAGGING;
+	else
+		max_mem = SJA1105_MAX_FRAME_MEMORY;
+
+	table = &priv->static_config.tables[BLK_IDX_L2_FORWARDING_PARAMS];
+	l2_fwd_params = table->entries;
+	l2_fwd_params->part_spc[0] = max_mem;
+
+	/* If we have any critical-traffic virtual links, we need to reserve
+	 * some frame buffer memory for them. At the moment, hardcode the value
+	 * at 100 blocks of 128 bytes of memory each. This leaves 829 blocks
+	 * remaining for best-effort traffic. TODO: figure out a more flexible
+	 * way to perform the frame buffer partitioning.
+	 */
+	if (!priv->static_config.tables[BLK_IDX_VL_FORWARDING].entry_count)
+		return;
+
+	table = &priv->static_config.tables[BLK_IDX_VL_FORWARDING_PARAMS];
+	vl_fwd_params = table->entries;
+
+	l2_fwd_params->part_spc[0] -= SJA1105_VL_FRAME_MEMORY;
+	vl_fwd_params->partspc[0] = SJA1105_VL_FRAME_MEMORY;
+}
+
 static int sja1105_init_general_params(struct sja1105_private *priv)
 {
 	struct sja1105_general_params_entry default_general_params = {
@@ -2213,6 +2248,8 @@ static int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled)
 	l2_lookup_params = table->entries;
 	l2_lookup_params->shared_learn = want_tagging;
 
+	sja1105_frame_memory_partitioning(priv);
+
 	rc = sja1105_static_config_reload(priv, SJA1105_VLAN_FILTERING);
 	if (rc)
 		dev_err(ds->dev, "Failed to change VLAN Ethertype\n");
diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.h b/drivers/net/dsa/sja1105/sja1105_static_config.h
index d96044d86b11..5946847bb5b9 100644
--- a/drivers/net/dsa/sja1105/sja1105_static_config.h
+++ b/drivers/net/dsa/sja1105/sja1105_static_config.h
@@ -108,6 +108,7 @@ enum sja1105_blk_idx {
 
 #define SJA1105_MAX_FRAME_MEMORY			929
 #define SJA1105_MAX_FRAME_MEMORY_RETAGGING		910
+#define SJA1105_VL_FRAME_MEMORY				100
 
 #define SJA1105E_DEVICE_ID				0x9C00000Cull
 #define SJA1105T_DEVICE_ID				0x9E00030Eull
diff --git a/drivers/net/dsa/sja1105/sja1105_vl.c b/drivers/net/dsa/sja1105/sja1105_vl.c
index 312401995b54..f37611885376 100644
--- a/drivers/net/dsa/sja1105/sja1105_vl.c
+++ b/drivers/net/dsa/sja1105/sja1105_vl.c
@@ -5,7 +5,6 @@
 #include <linux/dsa/8021q.h>
 #include "sja1105.h"
 
-#define SJA1105_VL_FRAME_MEMORY			100
 #define SJA1105_SIZE_VL_STATUS			8
 
 /* The switch flow classification core implements TTEthernet, which 'thinks' in
@@ -141,8 +140,6 @@ static bool sja1105_vl_key_lower(struct sja1105_vl_lookup_entry *a,
 static int sja1105_init_virtual_links(struct sja1105_private *priv,
 				      struct netlink_ext_ack *extack)
 {
-	struct sja1105_l2_forwarding_params_entry *l2_fwd_params;
-	struct sja1105_vl_forwarding_params_entry *vl_fwd_params;
 	struct sja1105_vl_policing_entry *vl_policing;
 	struct sja1105_vl_forwarding_entry *vl_fwd;
 	struct sja1105_vl_lookup_entry *vl_lookup;
@@ -153,10 +150,6 @@ static int sja1105_init_virtual_links(struct sja1105_private *priv,
 	int max_sharindx = 0;
 	int i, j, k;
 
-	table = &priv->static_config.tables[BLK_IDX_L2_FORWARDING_PARAMS];
-	l2_fwd_params = table->entries;
-	l2_fwd_params->part_spc[0] = SJA1105_MAX_FRAME_MEMORY;
-
 	/* Figure out the dimensioning of the problem */
 	list_for_each_entry(rule, &priv->flow_block.rules, list) {
 		if (rule->type != SJA1105_RULE_VL)
@@ -308,17 +301,6 @@ static int sja1105_init_virtual_links(struct sja1105_private *priv,
 	if (!table->entries)
 		return -ENOMEM;
 	table->entry_count = 1;
-	vl_fwd_params = table->entries;
-
-	/* Reserve some frame buffer memory for the critical-traffic virtual
-	 * links (this needs to be done). At the moment, hardcode the value
-	 * at 100 blocks of 128 bytes of memory each. This leaves 829 blocks
-	 * remaining for best-effort traffic. TODO: figure out a more flexible
-	 * way to perform the frame buffer partitioning.
-	 */
-	l2_fwd_params->part_spc[0] = SJA1105_MAX_FRAME_MEMORY -
-				     SJA1105_VL_FRAME_MEMORY;
-	vl_fwd_params->partspc[0] = SJA1105_VL_FRAME_MEMORY;
 
 	for (i = 0; i < num_virtual_links; i++) {
 		unsigned long cookie = vl_lookup[i].flow_cookie;
@@ -342,6 +324,8 @@ static int sja1105_init_virtual_links(struct sja1105_private *priv,
 		}
 	}
 
+	sja1105_frame_memory_partitioning(priv);
+
 	return 0;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 3f01c91aab9276ca48acccd20f6c379cf48a51f9 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 12 May 2020 20:20:38 +0300
Subject: net: dsa: sja1105: implement VLAN retagging for dsa_8021q sub-VLANs

Expand the delta commit procedure for VLANs with additional logic for
treating bridge_vlans in the newly introduced operating mode,
SJA1105_VLAN_BEST_EFFORT.

For every bridge VLAN on every user port, a sub-VLAN index is calculated
and retagging rules are installed towards a dsa_8021q rx_vid that
encodes that sub-VLAN index. This way, the tagger can identify the
original VLANs.

Extra care is taken for VLANs to still work as intended in cross-chip
scenarios. Retagging may have unintended consequences for these because
a sub-VLAN encoding that works for the CPU does not make any sense for a
front-panel port.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_main.c | 412 ++++++++++++++++++++++++++++++++-
 1 file changed, 409 insertions(+), 3 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 77462219261e..44ce7882dfb1 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -1869,6 +1869,57 @@ sja1105_get_tag_protocol(struct dsa_switch *ds, int port,
 	return DSA_TAG_PROTO_SJA1105;
 }
 
+static int sja1105_find_free_subvlan(u16 *subvlan_map, bool pvid)
+{
+	int subvlan;
+
+	if (pvid)
+		return 0;
+
+	for (subvlan = 1; subvlan < DSA_8021Q_N_SUBVLAN; subvlan++)
+		if (subvlan_map[subvlan] == VLAN_N_VID)
+			return subvlan;
+
+	return -1;
+}
+
+static int sja1105_find_subvlan(u16 *subvlan_map, u16 vid)
+{
+	int subvlan;
+
+	for (subvlan = 0; subvlan < DSA_8021Q_N_SUBVLAN; subvlan++)
+		if (subvlan_map[subvlan] == vid)
+			return subvlan;
+
+	return -1;
+}
+
+static int sja1105_find_committed_subvlan(struct sja1105_private *priv,
+					  int port, u16 vid)
+{
+	struct sja1105_port *sp = &priv->ports[port];
+
+	return sja1105_find_subvlan(sp->subvlan_map, vid);
+}
+
+static void sja1105_init_subvlan_map(u16 *subvlan_map)
+{
+	int subvlan;
+
+	for (subvlan = 0; subvlan < DSA_8021Q_N_SUBVLAN; subvlan++)
+		subvlan_map[subvlan] = VLAN_N_VID;
+}
+
+static void sja1105_commit_subvlan_map(struct sja1105_private *priv, int port,
+				       u16 *subvlan_map)
+{
+	struct sja1105_port *sp = &priv->ports[port];
+	int subvlan;
+
+	for (subvlan = 0; subvlan < DSA_8021Q_N_SUBVLAN; subvlan++)
+		sp->subvlan_map[subvlan] = subvlan_map[subvlan];
+}
+
 static int sja1105_is_vlan_configured(struct sja1105_private *priv, u16 vid)
 {
 	struct sja1105_vlan_lookup_entry *vlan;
@@ -1885,9 +1936,29 @@ static int sja1105_is_vlan_configured(struct sja1105_private *priv, u16 vid)
 	return -1;
 }
 
+static int
+sja1105_find_retagging_entry(struct sja1105_retagging_entry *retagging,
+			     int count, int from_port, u16 from_vid,
+			     u16 to_vid)
+{
+	int i;
+
+	for (i = 0; i < count; i++)
+		if (retagging[i].ing_port == BIT(from_port) &&
+		    retagging[i].vlan_ing == from_vid &&
+		    retagging[i].vlan_egr == to_vid)
+			return i;
+
+	/* Return an invalid entry index if not found */
+	return -1;
+}
+
 static int sja1105_commit_vlans(struct sja1105_private *priv,
-				struct sja1105_vlan_lookup_entry *new_vlan)
+				struct sja1105_vlan_lookup_entry *new_vlan,
+				struct sja1105_retagging_entry *new_retagging,
+				int num_retagging)
 {
+	struct sja1105_retagging_entry *retagging;
 	struct sja1105_vlan_lookup_entry *vlan;
 	struct sja1105_table *table;
 	int num_vlans = 0;
@@ -1947,9 +2018,50 @@ static int sja1105_commit_vlans(struct sja1105_private *priv,
 		vlan[k++] = new_vlan[i];
 	}
 
+	/* VLAN Retagging Table */
+	table = &priv->static_config.tables[BLK_IDX_RETAGGING];
+	retagging = table->entries;
+
+	for (i = 0; i < table->entry_count; i++) {
+		rc = sja1105_dynamic_config_write(priv, BLK_IDX_RETAGGING,
+						  i, &retagging[i], false);
+		if (rc)
+			return rc;
+	}
+
+	if (table->entry_count)
+		kfree(table->entries);
+
+	table->entries = kcalloc(num_retagging, table->ops->unpacked_entry_size,
+				 GFP_KERNEL);
+	if (!table->entries)
+		return -ENOMEM;
+
+	table->entry_count = num_retagging;
+	retagging = table->entries;
+
+	for (i = 0; i < num_retagging; i++) {
+		retagging[i] = new_retagging[i];
+
+		/* Update entry */
+		rc = sja1105_dynamic_config_write(priv, BLK_IDX_RETAGGING,
+						  i, &retagging[i], true);
+		if (rc < 0)
+			return rc;
+	}
+
 	return 0;
 }
 
+struct sja1105_crosschip_vlan {
+	struct list_head list;
+	u16 vid;
+	bool untagged;
+	int port;
+	int other_port;
+	struct dsa_switch *other_ds;
+};
+
 struct sja1105_crosschip_switch {
 	struct list_head list;
 	struct dsa_switch *other_ds;
@@ -2021,6 +2133,265 @@ sja1105_build_dsa_8021q_vlans(struct sja1105_private *priv,
 	return 0;
 }
 
+static int sja1105_build_subvlans(struct sja1105_private *priv,
+				  u16 subvlan_map[][DSA_8021Q_N_SUBVLAN],
+				  struct sja1105_vlan_lookup_entry *new_vlan,
+				  struct sja1105_retagging_entry *new_retagging,
+				  int *num_retagging)
+{
+	struct sja1105_bridge_vlan *v;
+	int k = *num_retagging;
+
+	if (priv->vlan_state != SJA1105_VLAN_BEST_EFFORT)
+		return 0;
+
+	list_for_each_entry(v, &priv->bridge_vlans, list) {
+		int upstream = dsa_upstream_port(priv->ds, v->port);
+		int match, subvlan;
+		u16 rx_vid;
+
+		/* Only sub-VLANs on user ports need to be applied.
+		 * Bridge VLANs also include VLANs added automatically
+		 * by DSA on the CPU port.
+		 */
+		if (!dsa_is_user_port(priv->ds, v->port))
+			continue;
+
+		subvlan = sja1105_find_subvlan(subvlan_map[v->port],
+					       v->vid);
+		if (subvlan < 0) {
+			subvlan = sja1105_find_free_subvlan(subvlan_map[v->port],
+							    v->pvid);
+			if (subvlan < 0) {
+				dev_err(priv->ds->dev, "No more free subvlans\n");
+				return -ENOSPC;
+			}
+		}
+
+		rx_vid = dsa_8021q_rx_vid_subvlan(priv->ds, v->port, subvlan);
+
+		/* @v->vid on @v->port needs to be retagged to @rx_vid
+		 * on @upstream. Assume @v->vid on @v->port and on
+		 * @upstream was already configured by the previous
+		 * iteration over bridge_vlans.
+		 */
+		match = rx_vid;
+		new_vlan[match].vlanid = rx_vid;
+		new_vlan[match].vmemb_port |= BIT(v->port);
+		new_vlan[match].vmemb_port |= BIT(upstream);
+		new_vlan[match].vlan_bc |= BIT(v->port);
+		new_vlan[match].vlan_bc |= BIT(upstream);
+		/* The "untagged" flag is set the same as for the
+		 * original VLAN
+		 */
+		if (!v->untagged)
+			new_vlan[match].tag_port |= BIT(v->port);
+		/* But it's always tagged towards the CPU */
+		new_vlan[match].tag_port |= BIT(upstream);
+
+		/* The Retagging Table generates packet *clones* with
+		 * the new VLAN. This is a very odd hardware quirk
+		 * which we need to suppress by dropping the original
+		 * packet.
+		 * Deny egress of the original VLAN towards the CPU
+		 * port. This will force the switch to drop it, and
+		 * we'll see only the retagged packets.
+		 */
+		match = v->vid;
+		new_vlan[match].vlan_bc &= ~BIT(upstream);
+
+		/* And the retagging itself */
+		new_retagging[k].vlan_ing = v->vid;
+		new_retagging[k].vlan_egr = rx_vid;
+		new_retagging[k].ing_port = BIT(v->port);
+		new_retagging[k].egr_port = BIT(upstream);
+		if (k++ == SJA1105_MAX_RETAGGING_COUNT) {
+			dev_err(priv->ds->dev, "No more retagging rules\n");
+			return -ENOSPC;
+		}
+
+		subvlan_map[v->port][subvlan] = v->vid;
+	}
+
+	*num_retagging = k;
+
+	return 0;
+}
+
+/* Sadly, in crosschip scenarios where the CPU port is also the link to another
+ * switch, we should retag backwards (the dsa_8021q vid to the original vid) on
+ * the CPU port of neighbour switches.
+ */
+static int
+sja1105_build_crosschip_subvlans(struct sja1105_private *priv,
+				 struct sja1105_vlan_lookup_entry *new_vlan,
+				 struct sja1105_retagging_entry *new_retagging,
+				 int *num_retagging)
+{
+	struct sja1105_crosschip_vlan *tmp, *pos;
+	struct dsa_8021q_crosschip_link *c;
+	struct sja1105_bridge_vlan *v, *w;
+	struct list_head crosschip_vlans;
+	int k = *num_retagging;
+	int rc = 0;
+
+	if (priv->vlan_state != SJA1105_VLAN_BEST_EFFORT)
+		return 0;
+
+	INIT_LIST_HEAD(&crosschip_vlans);
+
+	list_for_each_entry(c, &priv->crosschip_links, list) {
+		struct sja1105_private *other_priv = c->other_ds->priv;
+
+		if (other_priv->vlan_state == SJA1105_VLAN_FILTERING_FULL)
+			continue;
+
+		/* Crosschip links are also added to the CPU ports.
+		 * Ignore those.
+		 */
+		if (!dsa_is_user_port(priv->ds, c->port))
+			continue;
+		if (!dsa_is_user_port(c->other_ds, c->other_port))
+			continue;
+
+		/* Search for VLANs on the remote port */
+		list_for_each_entry(v, &other_priv->bridge_vlans, list) {
+			bool already_added = false;
+			bool we_have_it = false;
+
+			if (v->port != c->other_port)
+				continue;
+
+			/* If @v is a pvid on @other_ds, it does not need
+			 * re-retagging, because its SVL field is 0 and we
+			 * already allow that, via the dsa_8021q crosschip
+			 * links.
+			 */
+			if (v->pvid)
+				continue;
+
+			/* Search for the VLAN on our local port */
+			list_for_each_entry(w, &priv->bridge_vlans, list) {
+				if (w->port == c->port && w->vid == v->vid) {
+					we_have_it = true;
+					break;
+				}
+			}
+
+			if (!we_have_it)
+				continue;
+
+			list_for_each_entry(tmp, &crosschip_vlans, list) {
+				if (tmp->vid == v->vid &&
+				    tmp->untagged == v->untagged &&
+				    tmp->port == c->port &&
+				    tmp->other_port == v->port &&
+				    tmp->other_ds == c->other_ds) {
+					already_added = true;
+					break;
+				}
+			}
+
+			if (already_added)
+				continue;
+
+			tmp = kzalloc(sizeof(*tmp), GFP_KERNEL);
+			if (!tmp) {
+				dev_err(priv->ds->dev, "Failed to allocate memory\n");
+				rc = -ENOMEM;
+				goto out;
+			}
+			tmp->vid = v->vid;
+			tmp->port = c->port;
+			tmp->other_port = v->port;
+			tmp->other_ds = c->other_ds;
+			tmp->untagged = v->untagged;
+			list_add(&tmp->list, &crosschip_vlans);
+		}
+	}
+
+	list_for_each_entry(tmp, &crosschip_vlans, list) {
+		struct sja1105_private *other_priv = tmp->other_ds->priv;
+		int upstream = dsa_upstream_port(priv->ds, tmp->port);
+		int match, subvlan;
+		u16 rx_vid;
+
+		subvlan = sja1105_find_committed_subvlan(other_priv,
+							 tmp->other_port,
+							 tmp->vid);
+		/* If this happens, it's a bug. The neighbour switch does not
+		 * have a subvlan for tmp->vid on tmp->other_port, but it
+		 * should, since we already checked for its vlan_state.
+		 */
+		if (WARN_ON(subvlan < 0)) {
+			rc = -EINVAL;
+			goto out;
+		}
+
+		rx_vid = dsa_8021q_rx_vid_subvlan(tmp->other_ds,
+						  tmp->other_port,
+						  subvlan);
+
+		/* The @rx_vid retagged from @tmp->vid on
+		 * {@tmp->other_ds, @tmp->other_port} needs to be
+		 * re-retagged to @tmp->vid on the way back to us.
+		 *
+		 * Assume the original @tmp->vid is already configured
+		 * on this local switch, otherwise we wouldn't be
+		 * retagging its subvlan on the other switch in the
+		 * first place. We just need to add a reverse retagging
+		 * rule for @rx_vid and install @rx_vid on our ports.
+		 */
+		match = rx_vid;
+		new_vlan[match].vlanid = rx_vid;
+		new_vlan[match].vmemb_port |= BIT(tmp->port);
+		new_vlan[match].vmemb_port |= BIT(upstream);
+		/* The "untagged" flag is set the same as for the
+		 * original VLAN. And towards the CPU, it doesn't
+		 * really matter, because @rx_vid will only receive
+		 * traffic on that port. For consistency with other dsa_8021q
+		 * VLANs, we'll keep the CPU port tagged.
+		 */
+		if (!tmp->untagged)
+			new_vlan[match].tag_port |= BIT(tmp->port);
+		new_vlan[match].tag_port |= BIT(upstream);
+		/* Deny egress of @rx_vid towards our front-panel port.
+		 * This will force the switch to drop it, and we'll see
+		 * only the re-retagged packets (having the original,
+		 * pre-initial-retagging, VLAN @tmp->vid).
+		 */
+		new_vlan[match].vlan_bc &= ~BIT(tmp->port);
+
+		/* On reverse retagging, the same ingress VLAN goes to multiple
+		 * ports. So we have an opportunity to create composite rules
+		 * to not waste the limited space in the retagging table.
+		 */
+		k = sja1105_find_retagging_entry(new_retagging, *num_retagging,
+						 upstream, rx_vid, tmp->vid);
+		if (k < 0) {
+			if (*num_retagging == SJA1105_MAX_RETAGGING_COUNT) {
+				dev_err(priv->ds->dev, "No more retagging rules\n");
+				rc = -ENOSPC;
+				goto out;
+			}
+			k = (*num_retagging)++;
+		}
+		/* And the retagging itself */
+		new_retagging[k].vlan_ing = rx_vid;
+		new_retagging[k].vlan_egr = tmp->vid;
+		new_retagging[k].ing_port = BIT(upstream);
+		new_retagging[k].egr_port |= BIT(tmp->port);
+	}
+
+out:
+	list_for_each_entry_safe(tmp, pos, &crosschip_vlans, list) {
+		list_del(&tmp->list);
+		kfree(tmp);
+	}
+
+	return rc;
+}
+
 static int sja1105_build_vlan_table(struct sja1105_private *priv, bool notify);
 
 static int sja1105_notify_crosschip_switches(struct sja1105_private *priv)
@@ -2074,10 +2445,12 @@ out:
 
 static int sja1105_build_vlan_table(struct sja1105_private *priv, bool notify)
 {
+	u16 subvlan_map[SJA1105_NUM_PORTS][DSA_8021Q_N_SUBVLAN];
+	struct sja1105_retagging_entry *new_retagging;
 	struct sja1105_vlan_lookup_entry *new_vlan;
 	struct sja1105_table *table;
+	int i, num_retagging = 0;
 	int rc;
-	int i;
 
 	table = &priv->static_config.tables[BLK_IDX_VLAN_LOOKUP];
 	new_vlan = kcalloc(VLAN_N_VID,
@@ -2085,9 +2458,23 @@ static int sja1105_build_vlan_table(struct sja1105_private *priv, bool notify)
 	if (!new_vlan)
 		return -ENOMEM;
 
+	table = &priv->static_config.tables[BLK_IDX_VLAN_LOOKUP];
+	new_retagging = kcalloc(SJA1105_MAX_RETAGGING_COUNT,
+				table->ops->unpacked_entry_size, GFP_KERNEL);
+	if (!new_retagging) {
+		kfree(new_vlan);
+		return -ENOMEM;
+	}
+
 	for (i = 0; i < VLAN_N_VID; i++)
 		new_vlan[i].vlanid = VLAN_N_VID;
 
+	for (i = 0; i < SJA1105_MAX_RETAGGING_COUNT; i++)
+		new_retagging[i].vlan_ing = VLAN_N_VID;
+
+	for (i = 0; i < priv->ds->num_ports; i++)
+		sja1105_init_subvlan_map(subvlan_map[i]);
+
 	/* Bridge VLANs */
 	rc = sja1105_build_bridge_vlans(priv, new_vlan);
 	if (rc)
@@ -2102,7 +2489,22 @@ static int sja1105_build_vlan_table(struct sja1105_private *priv, bool notify)
 	if (rc)
 		goto out;
 
-	rc = sja1105_commit_vlans(priv, new_vlan);
+	/* Private VLANs necessary for dsa_8021q operation, which we need to
+	 * determine on our own:
+	 * - Sub-VLANs
+	 * - Sub-VLANs of crosschip switches
+	 */
+	rc = sja1105_build_subvlans(priv, subvlan_map, new_vlan, new_retagging,
+				    &num_retagging);
+	if (rc)
+		goto out;
+
+	rc = sja1105_build_crosschip_subvlans(priv, new_vlan, new_retagging,
+					      &num_retagging);
+	if (rc)
+		goto out;
+
+	rc = sja1105_commit_vlans(priv, new_vlan, new_retagging, num_retagging);
 	if (rc)
 		goto out;
 
@@ -2110,6 +2512,9 @@ static int sja1105_build_vlan_table(struct sja1105_private *priv, bool notify)
 	if (rc)
 		goto out;
 
+	for (i = 0; i < priv->ds->num_ports; i++)
+		sja1105_commit_subvlan_map(priv, i, subvlan_map[i]);
+
 	if (notify) {
 		rc = sja1105_notify_crosschip_switches(priv);
 		if (rc)
@@ -2118,6 +2523,7 @@ static int sja1105_build_vlan_table(struct sja1105_private *priv, bool notify)
 
 out:
 	kfree(new_vlan);
+	kfree(new_retagging);
 
 	return rc;
 }
-- 
cgit v1.2.3-59-g8ed1b


From a20bc43bfb2e8c2fcdaaa4d5c11f2bbf0b690328 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Tue, 12 May 2020 20:20:39 +0300
Subject: docs: net: dsa: sja1105: document the best_effort_vlan_filtering
 option

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../networking/devlink-params-sja1105.txt          |  27 +++
 Documentation/networking/dsa/sja1105.rst           | 211 ++++++++++++++++++---
 2 files changed, 212 insertions(+), 26 deletions(-)
 create mode 100644 Documentation/networking/devlink-params-sja1105.txt

diff --git a/Documentation/networking/devlink-params-sja1105.txt b/Documentation/networking/devlink-params-sja1105.txt
new file mode 100644
index 000000000000..1d71742e270a
--- /dev/null
+++ b/Documentation/networking/devlink-params-sja1105.txt
@@ -0,0 +1,27 @@
+best_effort_vlan_filtering
+			[DEVICE, DRIVER-SPECIFIC]
+			Allow plain ETH_P_8021Q headers to be used as DSA tags.
+			Benefits:
+			- Can terminate untagged traffic over switch net
+			  devices even when enslaved to a bridge with
+			  vlan_filtering=1.
+			- Can terminate VLAN-tagged traffic over switch net
+			  devices even when enslaved to a bridge with
+			  vlan_filtering=1, with some constraints (no more than
+			  7 non-pvid VLANs per user port).
+			- Can do QoS based on VLAN PCP and VLAN membership
+			  admission control for autonomously forwarded frames
+			  (regardless of whether they can be terminated on the
+			  CPU or not).
+			Drawbacks:
+			- User cannot use VLANs in range 1024-3071. If the
+			  switch receives frames with such VIDs, it will
+			  misinterpret them as DSA tags.
+			- Switch uses Shared VLAN Learning (FDB lookup uses
+			  only DMAC as key).
+			- When VLANs span cross-chip topologies, the total
+			  number of permitted VLANs may be less than 7 per
+			  port, due to a maximum number of 32 VLAN retagging
+			  rules per switch.
+			Configuration mode: runtime
+			Type: bool.
diff --git a/Documentation/networking/dsa/sja1105.rst b/Documentation/networking/dsa/sja1105.rst
index 34581629dd3f..b6bbc17814fb 100644
--- a/Documentation/networking/dsa/sja1105.rst
+++ b/Documentation/networking/dsa/sja1105.rst
@@ -66,34 +66,193 @@ reprogrammed with the updated static configuration.
 Traffic support
 ===============
 
-The switches do not support switch tagging in hardware. But they do support
-customizing the TPID by which VLAN traffic is identified as such. The switch
-driver is leveraging ``CONFIG_NET_DSA_TAG_8021Q`` by requesting that special
-VLANs (with a custom TPID of ``ETH_P_EDSA`` instead of ``ETH_P_8021Q``) are
-installed on its ports when not in ``vlan_filtering`` mode. This does not
-interfere with the reception and transmission of real 802.1Q-tagged traffic,
-because the switch does no longer parse those packets as VLAN after the TPID
-change.
-The TPID is restored when ``vlan_filtering`` is requested by the user through
-the bridge layer, and general IP termination becomes no longer possible through
-the switch netdevices in this mode.
-
-The switches have two programmable filters for link-local destination MACs.
+The switches do not have hardware support for DSA tags, except for "slow
+protocols" for switch control as STP and PTP. For these, the switches have two
+programmable filters for link-local destination MACs.
 These are used to trap BPDUs and PTP traffic to the master netdevice, and are
 further used to support STP and 1588 ordinary clock/boundary clock
-functionality.
-
-The following traffic modes are supported over the switch netdevices:
-
-+--------------------+------------+------------------+------------------+
-|                    | Standalone | Bridged with     | Bridged with     |
-|                    | ports      | vlan_filtering 0 | vlan_filtering 1 |
-+====================+============+==================+==================+
-| Regular traffic    |     Yes    |       Yes        |  No (use master) |
-+--------------------+------------+------------------+------------------+
-| Management traffic |     Yes    |       Yes        |       Yes        |
-| (BPDU, PTP)        |            |                  |                  |
-+--------------------+------------+------------------+------------------+
+functionality. For frames trapped to the CPU, source port and switch ID
+information is encoded by the hardware into the frames.
+
+But by leveraging ``CONFIG_NET_DSA_TAG_8021Q`` (a software-defined DSA tagging
+format based on VLANs), general-purpose traffic termination through the network
+stack can be supported under certain circumstances.
+
+Depending on VLAN awareness state, the following operating modes are possible
+with the switch:
+
+- Mode 1 (VLAN-unaware): a port is in this mode when it is used as a standalone
+  net device, or when it is enslaved to a bridge with ``vlan_filtering=0``.
+- Mode 2 (fully VLAN-aware): a port is in this mode when it is enslaved to a
+  bridge with ``vlan_filtering=1``. Access to the entire VLAN range is given to
+  the user through ``bridge vlan`` commands, but general-purpose (anything
+  other than STP, PTP etc) traffic termination is not possible through the
+  switch net devices. The other packets can be still by user space processed
+  through the DSA master interface (similar to ``DSA_TAG_PROTO_NONE``).
+- Mode 3 (best-effort VLAN-aware): a port is in this mode when enslaved to a
+  bridge with ``vlan_filtering=1``, and the devlink property of its parent
+  switch named ``best_effort_vlan_filtering`` is set to ``true``. When
+  configured like this, the range of usable VIDs is reduced (0 to 1023 and 3072
+  to 4094), so is the number of usable VIDs (maximum of 7 non-pvid VLANs per
+  port*), and shared VLAN learning is performed (FDB lookup is done only by
+  DMAC, not also by VID).
+
+To summarize, in each mode, the following types of traffic are supported over
+the switch net devices:
+
++-------------+-----------+--------------+------------+
+|             |   Mode 1  |    Mode 2    |   Mode 3   |
++=============+===========+==============+============+
+|   Regular   |    Yes    |      No      |     Yes    |
+|   traffic   |           | (use master) |            |
++-------------+-----------+--------------+------------+
+| Management  |    Yes    |     Yes      |     Yes    |
+|   traffic   |           |              |            |
+| (BPDU, PTP) |           |              |            |
++-------------+-----------+--------------+------------+
+
+To configure the switch to operate in Mode 3, the following steps can be
+followed::
+
+  ip link add dev br0 type bridge
+  # swp2 operates in Mode 1 now
+  ip link set dev swp2 master br0
+  # swp2 temporarily moves to Mode 2
+  ip link set dev br0 type bridge vlan_filtering 1
+  [   61.204770] sja1105 spi0.1: Reset switch and programmed static config. Reason: VLAN filtering
+  [   61.239944] sja1105 spi0.1: Disabled switch tagging
+  # swp3 now operates in Mode 3
+  devlink dev param set spi/spi0.1 name best_effort_vlan_filtering value true cmode runtime
+  [   64.682927] sja1105 spi0.1: Reset switch and programmed static config. Reason: VLAN filtering
+  [   64.711925] sja1105 spi0.1: Enabled switch tagging
+  # Cannot use VLANs in range 1024-3071 while in Mode 3.
+  bridge vlan add dev swp2 vid 1025 untagged pvid
+  RTNETLINK answers: Operation not permitted
+  bridge vlan add dev swp2 vid 100
+  bridge vlan add dev swp2 vid 101 untagged
+  bridge vlan
+  port    vlan ids
+  swp5     1 PVID Egress Untagged
+
+  swp2     1 PVID Egress Untagged
+           100
+           101 Egress Untagged
+
+  swp3     1 PVID Egress Untagged
+
+  swp4     1 PVID Egress Untagged
+
+  br0      1 PVID Egress Untagged
+  bridge vlan add dev swp2 vid 102
+  bridge vlan add dev swp2 vid 103
+  bridge vlan add dev swp2 vid 104
+  bridge vlan add dev swp2 vid 105
+  bridge vlan add dev swp2 vid 106
+  bridge vlan add dev swp2 vid 107
+  # Cannot use mode than 7 VLANs per port while in Mode 3.
+  [ 3885.216832] sja1105 spi0.1: No more free subvlans
+
+\* "maximum of 7 non-pvid VLANs per port": Decoding VLAN-tagged packets on the
+CPU in mode 3 is possible through VLAN retagging of packets that go from the
+switch to the CPU. In cross-chip topologies, the port that goes to the CPU
+might also go to other switches. In that case, those other switches will see
+only a retagged packet (which only has meaning for the CPU). So if they are
+interested in this VLAN, they need to apply retagging in the reverse direction,
+to recover the original value from it. This consumes extra hardware resources
+for this switch. There is a maximum of 32 entries in the Retagging Table of
+each switch device.
+
+As an example, consider this cross-chip topology::
+
+  +-------------------------------------------------+
+  | Host SoC                                        |
+  |           +-------------------------+           |
+  |           | DSA master for embedded |           |
+  |           |   switch (non-sja1105)  |           |
+  |  +--------+-------------------------+--------+  |
+  |  |   embedded L2 switch                      |  |
+  |  |                                           |  |
+  |  |   +--------------+     +--------------+   |  |
+  |  |   |DSA master for|     |DSA master for|   |  |
+  |  |   |  SJA1105 1   |     |  SJA1105 2   |   |  |
+  +--+---+--------------+-----+--------------+---+--+
+
+  +-----------------------+ +-----------------------+
+  |   SJA1105 switch 1    | |   SJA1105 switch 2    |
+  +-----+-----+-----+-----+ +-----+-----+-----+-----+
+  |sw1p0|sw1p1|sw1p2|sw1p3| |sw2p0|sw2p1|sw2p2|sw2p3|
+  +-----+-----+-----+-----+ +-----+-----+-----+-----+
+
+To reach the CPU, SJA1105 switch 1 (spi/spi2.1) uses the same port as is uses
+to reach SJA1105 switch 2 (spi/spi2.2), which would be port 4 (not drawn).
+Similarly for SJA1105 switch 2.
+
+Also consider the following commands, that add VLAN 100 to every sja1105 user
+port::
+
+  devlink dev param set spi/spi2.1 name best_effort_vlan_filtering value true cmode runtime
+  devlink dev param set spi/spi2.2 name best_effort_vlan_filtering value true cmode runtime
+  ip link add dev br0 type bridge
+  for port in sw1p0 sw1p1 sw1p2 sw1p3 \
+              sw2p0 sw2p1 sw2p2 sw2p3; do
+      ip link set dev $port master br0
+  done
+  ip link set dev br0 type bridge vlan_filtering 1
+  for port in sw1p0 sw1p1 sw1p2 sw1p3 \
+              sw2p0 sw2p1 sw2p2; do
+      bridge vlan add dev $port vid 100
+  done
+  ip link add link br0 name br0.100 type vlan id 100 && ip link set dev br0.100 up
+  ip addr add 192.168.100.3/24 dev br0.100
+  bridge vlan add dev br0 vid 100 self
+
+  bridge vlan
+  port    vlan ids
+  sw1p0    1 PVID Egress Untagged
+           100
+
+  sw1p1    1 PVID Egress Untagged
+           100
+
+  sw1p2    1 PVID Egress Untagged
+           100
+
+  sw1p3    1 PVID Egress Untagged
+           100
+
+  sw2p0    1 PVID Egress Untagged
+           100
+
+  sw2p1    1 PVID Egress Untagged
+           100
+
+  sw2p2    1 PVID Egress Untagged
+           100
+
+  sw2p3    1 PVID Egress Untagged
+
+  br0      1 PVID Egress Untagged
+           100
+
+SJA1105 switch 1 consumes 1 retagging entry for each VLAN on each user port
+towards the CPU. It also consumes 1 retagging entry for each non-pvid VLAN that
+it is also interested in, which is configured on any port of any neighbor
+switch.
+
+In this case, SJA1105 switch 1 consumes a total of 11 retagging entries, as
+follows:
+- 8 retagging entries for VLANs 1 and 100 installed on its user ports
+  (``sw1p0`` - ``sw1p3``)
+- 3 retagging entries for VLAN 100 installed on the user ports of SJA1105
+  switch 2 (``sw2p0`` - ``sw2p2``), because it also has ports that are
+  interested in it. The VLAN 1 is a pvid on SJA1105 switch 2 and does not need
+  reverse retagging.
+
+SJA1105 switch 2 also consumes 11 retagging entries, but organized as follows:
+- 7 retagging entries for the bridge VLANs on its user ports (``sw2p0`` -
+  ``sw2p3``).
+- 4 retagging entries for VLAN 100 installed on the user ports of SJA1105
+  switch 1 (``sw1p0`` - ``sw1p3``).
 
 Switching features
 ==================
-- 
cgit v1.2.3-59-g8ed1b


From 51fa960d3b5163b1af22efdebcabfccc5d615ad6 Mon Sep 17 00:00:00 2001
From: William Tu <u9012063@gmail.com>
Date: Tue, 12 May 2020 10:36:23 -0700
Subject: erspan: Check IFLA_GRE_ERSPAN_VER is set.

Add a check to make sure the IFLA_GRE_ERSPAN_VER is provided by users.

Fixes: f989d546a2d5 ("erspan: Add type I version 0 support.")
Cc: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: William Tu <u9012063@gmail.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_gre.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index e29cd48674d7..0ce9b91ff55c 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -1087,7 +1087,8 @@ static int erspan_validate(struct nlattr *tb[], struct nlattr *data[],
 	if (ret)
 		return ret;
 
-	if (nla_get_u8(data[IFLA_GRE_ERSPAN_VER]) == 0)
+	if (data[IFLA_GRE_ERSPAN_VER] &&
+	    nla_get_u8(data[IFLA_GRE_ERSPAN_VER]) == 0)
 		return 0;
 
 	/* ERSPAN type II/III should only have GRE sequence and key flag */
-- 
cgit v1.2.3-59-g8ed1b


From fb9f2e92864f51d25e790947cca2ac4426a12f9c Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <olteanv@gmail.com>
Date: Wed, 13 May 2020 03:23:27 +0300
Subject: net: dsa: tag_sja1105: appease sparse checks for ethertype accessors

A comparison between a value from the packet and an integer constant
value needs to be done by converting the value from the packet from
net->host, or the constant from host->net. Not the other way around.
Even though it makes no practical difference, correct that.

Fixes: 38b5beeae7a4 ("net: dsa: sja1105: prepare tagger for handling DSA tags and VLAN simultaneously")
Signed-off-by: Vladimir Oltean <olteanv@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/tag_sja1105.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/dsa/tag_sja1105.c b/net/dsa/tag_sja1105.c
index ad105550b145..9b4a4d719291 100644
--- a/net/dsa/tag_sja1105.c
+++ b/net/dsa/tag_sja1105.c
@@ -73,10 +73,10 @@ static bool sja1105_can_use_vlan_as_tags(const struct sk_buff *skb)
 {
 	struct vlan_ethhdr *hdr = vlan_eth_hdr(skb);
 
-	if (hdr->h_vlan_proto == ntohs(ETH_P_SJA1105))
+	if (hdr->h_vlan_proto == htons(ETH_P_SJA1105))
 		return true;
 
-	if (hdr->h_vlan_proto != ntohs(ETH_P_8021Q))
+	if (hdr->h_vlan_proto != htons(ETH_P_8021Q))
 		return false;
 
 	return vid_is_dsa_8021q(ntohs(hdr->h_vlan_TCI) & VLAN_VID_MASK);
-- 
cgit v1.2.3-59-g8ed1b


From aaebf8e6088270e45d30314031b5d9a88a589cb9 Mon Sep 17 00:00:00 2001
From: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
Date: Tue, 12 May 2020 19:09:32 -0700
Subject: Bluetooth: Fix incorrect type for window and interval

The types for window and interval should be uint16, not uint8.

Signed-off-by: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_request.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 3f470f0e432c..f6870e98faab 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -890,7 +890,7 @@ void hci_req_add_le_passive_scan(struct hci_request *req)
 	struct hci_dev *hdev = req->hdev;
 	u8 own_addr_type;
 	u8 filter_policy;
-	u8 window, interval;
+	u16 window, interval;
 
 	if (hdev->scanning_paused) {
 		bt_dev_dbg(hdev, "Scanning is paused for suspend");
-- 
cgit v1.2.3-59-g8ed1b


From 91779665c129d361c032d2a590b37a077b6cb9d7 Mon Sep 17 00:00:00 2001
From: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
Date: Tue, 12 May 2020 19:09:33 -0700
Subject: Bluetooth: Modify LE window and interval for suspend

When a device is suspended, it doesn't need to be as responsive to
connection events. Increase the interval to 640ms (creating a duty cycle
of roughly 1.75%) so that passive scanning uses much less power (vs
previous duty cycle of 18.75%). The new window + interval combination
has been tested to work with HID devices (which are currently the only
devices capable of wake up).

Signed-off-by: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_request.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index f6870e98faab..6b45e31432a7 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -35,7 +35,7 @@
 #define HCI_REQ_CANCELED  2
 
 #define LE_SUSPEND_SCAN_WINDOW		0x0012
-#define LE_SUSPEND_SCAN_INTERVAL	0x0060
+#define LE_SUSPEND_SCAN_INTERVAL	0x0400
 
 void hci_req_init(struct hci_request *req, struct hci_dev *hdev)
 {
-- 
cgit v1.2.3-59-g8ed1b


From 0d2c9825e46d45f8a520135c9c791b5c73a165ab Mon Sep 17 00:00:00 2001
From: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
Date: Tue, 12 May 2020 19:19:25 -0700
Subject: Bluetooth: Rename BT_SUSPEND_COMPLETE

Renamed BT_SUSPEND_COMPLETE to BT_SUSPEND_CONFIGURE_WAKE since it sets
up the event filter and whitelist for wake-up.

Signed-off-by: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
Reviewed-by: Alain Michaud <alainm@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 2 +-
 net/bluetooth/hci_core.c         | 2 +-
 net/bluetooth/hci_request.c      | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 0c7f3ad76665..869ee2b30a4c 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -110,7 +110,7 @@ enum suspend_tasks {
 enum suspended_state {
 	BT_RUNNING = 0,
 	BT_SUSPEND_DISCONNECT,
-	BT_SUSPEND_COMPLETE,
+	BT_SUSPEND_CONFIGURE_WAKE,
 };
 
 struct hci_conn_hash {
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index 51d399273276..de1f4e72ec06 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -3353,7 +3353,7 @@ static int hci_suspend_notifier(struct notifier_block *nb, unsigned long action,
 		/* Only configure whitelist if disconnect succeeded */
 		if (!ret)
 			ret = hci_change_suspend_state(hdev,
-						       BT_SUSPEND_COMPLETE);
+						BT_SUSPEND_CONFIGURE_WAKE);
 	} else if (action == PM_POST_SUSPEND) {
 		ret = hci_change_suspend_state(hdev, BT_RUNNING);
 	}
diff --git a/net/bluetooth/hci_request.c b/net/bluetooth/hci_request.c
index 6b45e31432a7..1fc55685da62 100644
--- a/net/bluetooth/hci_request.c
+++ b/net/bluetooth/hci_request.c
@@ -1090,7 +1090,7 @@ void hci_req_prepare_suspend(struct hci_dev *hdev, enum suspended_state next)
 				   disconnect_counter);
 			set_bit(SUSPEND_DISCONNECTING, hdev->suspend_tasks);
 		}
-	} else if (next == BT_SUSPEND_COMPLETE) {
+	} else if (next == BT_SUSPEND_CONFIGURE_WAKE) {
 		/* Unpause to take care of updating scanning params */
 		hdev->scanning_paused = false;
 		/* Enable event filter for paired devices */
-- 
cgit v1.2.3-59-g8ed1b


From 81dafad53c67abe4f09b0b04131fe490e76e5602 Mon Sep 17 00:00:00 2001
From: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
Date: Tue, 12 May 2020 19:19:26 -0700
Subject: Bluetooth: Add hook for driver to prevent wake from suspend

Let drivers have a hook to disable configuring scanning during suspend.
Drivers should use the device_may_wakeup function call to determine
whether hci should be configured for wakeup.

For example, an implementation for btusb may look like the following:

  bool btusb_prevent_wake(struct hci_dev *hdev)
  {
        struct btusb_data *data = hci_get_drvdata(hdev);
        return !device_may_wakeup(&data->udev->dev);
  }

Signed-off-by: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
Reviewed-by: Alain Michaud <alainm@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 1 +
 net/bluetooth/hci_core.c         | 6 ++++--
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 869ee2b30a4c..5dcf85f186c6 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -503,6 +503,7 @@ struct hci_dev {
 	int (*set_diag)(struct hci_dev *hdev, bool enable);
 	int (*set_bdaddr)(struct hci_dev *hdev, const bdaddr_t *bdaddr);
 	void (*cmd_timeout)(struct hci_dev *hdev);
+	bool (*prevent_wake)(struct hci_dev *hdev);
 };
 
 #define HCI_PHY_HANDLE(handle)	(handle & 0xff)
diff --git a/net/bluetooth/hci_core.c b/net/bluetooth/hci_core.c
index de1f4e72ec06..dbe2d79f233f 100644
--- a/net/bluetooth/hci_core.c
+++ b/net/bluetooth/hci_core.c
@@ -3350,8 +3350,10 @@ static int hci_suspend_notifier(struct notifier_block *nb, unsigned long action,
 		 */
 		ret = hci_change_suspend_state(hdev, BT_SUSPEND_DISCONNECT);
 
-		/* Only configure whitelist if disconnect succeeded */
-		if (!ret)
+		/* Only configure whitelist if disconnect succeeded and wake
+		 * isn't being prevented.
+		 */
+		if (!ret && !(hdev->prevent_wake && hdev->prevent_wake(hdev)))
 			ret = hci_change_suspend_state(hdev,
 						BT_SUSPEND_CONFIGURE_WAKE);
 	} else if (action == PM_POST_SUSPEND) {
-- 
cgit v1.2.3-59-g8ed1b


From b7d0bf11a7ad0e97721a5c4a209ba749649f8b9e Mon Sep 17 00:00:00 2001
From: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
Date: Tue, 12 May 2020 19:19:27 -0700
Subject: Bluetooth: btusb: Implement hdev->prevent_wake

Implement the prevent_wake hook by checking device_may_wakeup on the usb
interface. This prevents the Bluetooth core from enabling scanning when
the device isn't expected to wake from suspend.

Signed-off-by: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
Reviewed-by: Alain Michaud <alainm@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btusb.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index 8ae3ad7a6013..c17359c0e320 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -3698,6 +3698,13 @@ static void btusb_check_needs_reset_resume(struct usb_interface *intf)
 		interface_to_usbdev(intf)->quirks |= USB_QUIRK_RESET_RESUME;
 }
 
+static bool btusb_prevent_wake(struct hci_dev *hdev)
+{
+	struct btusb_data *data = hci_get_drvdata(hdev);
+
+	return !device_may_wakeup(&data->udev->dev);
+}
+
 static int btusb_probe(struct usb_interface *intf,
 		       const struct usb_device_id *id)
 {
@@ -3831,6 +3838,7 @@ static int btusb_probe(struct usb_interface *intf,
 	hdev->flush  = btusb_flush;
 	hdev->send   = btusb_send_frame;
 	hdev->notify = btusb_notify;
+	hdev->prevent_wake = btusb_prevent_wake;
 
 #ifdef CONFIG_PM
 	err = btusb_config_oob_wake(hdev);
-- 
cgit v1.2.3-59-g8ed1b


From 875e16759005e3bdaa84eb2741281f37ba35b886 Mon Sep 17 00:00:00 2001
From: Raghuram Hegde <raghuram.hegde@intel.com>
Date: Mon, 11 May 2020 16:40:40 +0530
Subject: Bluetooth: btusb: Add support for Intel Bluetooth Device Typhoon Peak
 (8087:0032)

Device from /sys/kernel/debug/usb/devices:

T:  Bus=01 Lev=01 Prnt=01 Port=13 Cnt=02 Dev#=  3 Spd=12   MxCh= 0
D:  Ver= 2.01 Cls=e0(wlcon) Sub=01 Prot=01 MxPS=64 #Cfgs=  1
P:  Vendor=8087 ProdID=0032 Rev= 0.00
C:* #Ifs= 2 Cfg#= 1 Atr=e0 MxPwr=100mA
I:* If#= 0 Alt= 0 #EPs= 3 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=81(I) Atr=03(Int.) MxPS=  64 Ivl=1ms
E:  Ad=02(O) Atr=02(Bulk) MxPS=  64 Ivl=0ms
E:  Ad=82(I) Atr=02(Bulk) MxPS=  64 Ivl=0ms
I:* If#= 1 Alt= 0 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=   0 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=   0 Ivl=1ms
I:  If#= 1 Alt= 1 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=   9 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=   9 Ivl=1ms
I:  If#= 1 Alt= 2 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=  17 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=  17 Ivl=1ms
I:  If#= 1 Alt= 3 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=  25 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=  25 Ivl=1ms
I:  If#= 1 Alt= 4 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=  33 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=  33 Ivl=1ms
I:  If#= 1 Alt= 5 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=  49 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=  49 Ivl=1ms
I:  If#= 1 Alt= 6 #EPs= 2 Cls=e0(wlcon) Sub=01 Prot=01 Driver=btusb
E:  Ad=03(O) Atr=01(Isoc) MxPS=  63 Ivl=1ms
E:  Ad=83(I) Atr=01(Isoc) MxPS=  63 Ivl=1ms

Signed-off-by: Raghuram Hegde <raghuram.hegde@intel.com>
Signed-off-by: Chethan T N <chethan.tumkur.narayan@intel.com>
Signed-off-by: Amit K Bag <amit.k.bag@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btusb.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/bluetooth/btusb.c b/drivers/bluetooth/btusb.c
index c17359c0e320..5f022e9cf667 100644
--- a/drivers/bluetooth/btusb.c
+++ b/drivers/bluetooth/btusb.c
@@ -342,6 +342,8 @@ static const struct usb_device_id blacklist_table[] = {
 						     BTUSB_WIDEBAND_SPEECH },
 	{ USB_DEVICE(0x8087, 0x0029), .driver_info = BTUSB_INTEL_NEW |
 						     BTUSB_WIDEBAND_SPEECH },
+	{ USB_DEVICE(0x8087, 0x0032), .driver_info = BTUSB_INTEL_NEW |
+						     BTUSB_WIDEBAND_SPEECH},
 	{ USB_DEVICE(0x8087, 0x07da), .driver_info = BTUSB_CSR },
 	{ USB_DEVICE(0x8087, 0x07dc), .driver_info = BTUSB_INTEL },
 	{ USB_DEVICE(0x8087, 0x0a2a), .driver_info = BTUSB_INTEL },
-- 
cgit v1.2.3-59-g8ed1b


From 608c39f4308cefde9d1f24b661261de2b3f3fef4 Mon Sep 17 00:00:00 2001
From: Rikard Falkeborn <rikard.falkeborn@gmail.com>
Date: Sat, 9 May 2020 15:17:19 +0200
Subject: Bluetooth: serdev: Constify serdev_device_ops

serdev_device_ops is not modified and can be const. Also, remove the
unneeded declaration of it.

Output from the file command before and after:

Before:
   text    data     bss     dec     hex filename
   7192    2408     192    9792    2640 drivers/bluetooth/hci_serdev.o

After:
   text    data     bss     dec     hex filename
   7256    2344     192    9792    2640 drivers/bluetooth/hci_serdev.o

Signed-off-by: Rikard Falkeborn <rikard.falkeborn@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/hci_serdev.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/bluetooth/hci_serdev.c b/drivers/bluetooth/hci_serdev.c
index 4652896d4990..599855e4c57c 100644
--- a/drivers/bluetooth/hci_serdev.c
+++ b/drivers/bluetooth/hci_serdev.c
@@ -21,8 +21,6 @@
 
 #include "hci_uart.h"
 
-static struct serdev_device_ops hci_serdev_client_ops;
-
 static inline void hci_uart_tx_complete(struct hci_uart *hu, int pkt_type)
 {
 	struct hci_dev *hdev = hu->hdev;
@@ -260,7 +258,7 @@ static int hci_uart_receive_buf(struct serdev_device *serdev, const u8 *data,
 	return count;
 }
 
-static struct serdev_device_ops hci_serdev_client_ops = {
+static const struct serdev_device_ops hci_serdev_client_ops = {
 	.receive_buf = hci_uart_receive_buf,
 	.write_wakeup = hci_uart_write_wakeup,
 };
-- 
cgit v1.2.3-59-g8ed1b


From adf1d6926444029396861413aba8a0f2a805742a Mon Sep 17 00:00:00 2001
From: Sonny Sasaka <sonnysasaka@chromium.org>
Date: Wed, 6 May 2020 12:55:03 -0700
Subject: Bluetooth: Handle Inquiry Cancel error after Inquiry Complete

After sending Inquiry Cancel command to the controller, it is possible
that Inquiry Complete event comes before Inquiry Cancel command complete
event. In this case the Inquiry Cancel command will have status of
Command Disallowed since there is no Inquiry session to be cancelled.
This case should not be treated as error, otherwise we can reach an
inconsistent state.

Example of a btmon trace when this happened:

< HCI Command: Inquiry Cancel (0x01|0x0002) plen 0
> HCI Event: Inquiry Complete (0x01) plen 1
        Status: Success (0x00)
> HCI Event: Command Complete (0x0e) plen 4
      Inquiry Cancel (0x01|0x0002) ncmd 1
        Status: Command Disallowed (0x0c)

Signed-off-by: Sonny Sasaka <sonnysasaka@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_event.c | 19 +++++++++++++++++--
 1 file changed, 17 insertions(+), 2 deletions(-)

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 006c24e04b44..73aabca0064b 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -42,12 +42,27 @@
 
 /* Handle HCI Event packets */
 
-static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb)
+static void hci_cc_inquiry_cancel(struct hci_dev *hdev, struct sk_buff *skb,
+				  u8 *new_status)
 {
 	__u8 status = *((__u8 *) skb->data);
 
 	BT_DBG("%s status 0x%2.2x", hdev->name, status);
 
+	/* It is possible that we receive Inquiry Complete event right
+	 * before we receive Inquiry Cancel Command Complete event, in
+	 * which case the latter event should have status of Command
+	 * Disallowed (0x0c). This should not be treated as error, since
+	 * we actually achieve what Inquiry Cancel wants to achieve,
+	 * which is to end the last Inquiry session.
+	 */
+	if (status == 0x0c && !test_bit(HCI_INQUIRY, &hdev->flags)) {
+		bt_dev_warn(hdev, "Ignoring error of Inquiry Cancel command");
+		status = 0x00;
+	}
+
+	*new_status = status;
+
 	if (status)
 		return;
 
@@ -3233,7 +3248,7 @@ static void hci_cmd_complete_evt(struct hci_dev *hdev, struct sk_buff *skb,
 
 	switch (*opcode) {
 	case HCI_OP_INQUIRY_CANCEL:
-		hci_cc_inquiry_cancel(hdev, skb);
+		hci_cc_inquiry_cancel(hdev, skb, status);
 		break;
 
 	case HCI_OP_PERIODIC_INQ:
-- 
cgit v1.2.3-59-g8ed1b


From 5b440676c15bbe1a40f2546ec92db83ed66d9e22 Mon Sep 17 00:00:00 2001
From: Archie Pusaka <apusaka@chromium.org>
Date: Tue, 14 Apr 2020 16:08:40 +0800
Subject: Bluetooth: L2CAP: add support for waiting disconnection resp

Whenever we disconnect a L2CAP connection, we would immediately
report a disconnection event (EPOLLHUP) to the upper layer, without
waiting for the response of the other device.

This patch offers an option to wait until we receive a disconnection
response before reporting disconnection event, by using the "how"
parameter in l2cap_sock_shutdown(). Therefore, upper layer can opt
to wait for disconnection response by shutdown(sock, SHUT_WR).

This can be used to enforce proper disconnection order in HID,
where the disconnection of the interrupt channel must be complete
before attempting to disconnect the control channel.

Signed-off-by: Archie Pusaka <apusaka@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/l2cap_sock.c | 30 +++++++++++++++++++++++-------
 1 file changed, 23 insertions(+), 7 deletions(-)

diff --git a/net/bluetooth/l2cap_sock.c b/net/bluetooth/l2cap_sock.c
index 1cea42ee1e92..a995d2c51fa7 100644
--- a/net/bluetooth/l2cap_sock.c
+++ b/net/bluetooth/l2cap_sock.c
@@ -1271,14 +1271,21 @@ static int l2cap_sock_shutdown(struct socket *sock, int how)
 	struct l2cap_conn *conn;
 	int err = 0;
 
-	BT_DBG("sock %p, sk %p", sock, sk);
+	BT_DBG("sock %p, sk %p, how %d", sock, sk, how);
+
+	/* 'how' parameter is mapped to sk_shutdown as follows:
+	 * SHUT_RD   (0) --> RCV_SHUTDOWN  (1)
+	 * SHUT_WR   (1) --> SEND_SHUTDOWN (2)
+	 * SHUT_RDWR (2) --> SHUTDOWN_MASK (3)
+	 */
+	how++;
 
 	if (!sk)
 		return 0;
 
 	lock_sock(sk);
 
-	if (sk->sk_shutdown)
+	if ((sk->sk_shutdown & how) == how)
 		goto shutdown_already;
 
 	BT_DBG("Handling sock shutdown");
@@ -1301,11 +1308,20 @@ static int l2cap_sock_shutdown(struct socket *sock, int how)
 		 * has already been actioned to close the L2CAP
 		 * link such as by l2cap_disconnection_req().
 		 */
-		if (sk->sk_shutdown)
-			goto has_shutdown;
+		if ((sk->sk_shutdown & how) == how)
+			goto shutdown_matched;
 	}
 
-	sk->sk_shutdown = SHUTDOWN_MASK;
+	/* Try setting the RCV_SHUTDOWN bit, return early if SEND_SHUTDOWN
+	 * is already set
+	 */
+	if ((how & RCV_SHUTDOWN) && !(sk->sk_shutdown & RCV_SHUTDOWN)) {
+		sk->sk_shutdown |= RCV_SHUTDOWN;
+		if ((sk->sk_shutdown & how) == how)
+			goto shutdown_matched;
+	}
+
+	sk->sk_shutdown |= SEND_SHUTDOWN;
 	release_sock(sk);
 
 	l2cap_chan_lock(chan);
@@ -1335,7 +1351,7 @@ static int l2cap_sock_shutdown(struct socket *sock, int how)
 		err = bt_sock_wait_state(sk, BT_CLOSED,
 					 sk->sk_lingertime);
 
-has_shutdown:
+shutdown_matched:
 	l2cap_chan_put(chan);
 	sock_put(sk);
 
@@ -1363,7 +1379,7 @@ static int l2cap_sock_release(struct socket *sock)
 
 	bt_sock_unlink(&l2cap_sk_list, sk);
 
-	err = l2cap_sock_shutdown(sock, 2);
+	err = l2cap_sock_shutdown(sock, SHUT_RDWR);
 	chan = l2cap_pi(sk)->chan;
 
 	l2cap_chan_hold(chan);
-- 
cgit v1.2.3-59-g8ed1b


From 309b81f0fdc4209d998bc63f0da52c2e96340d4e Mon Sep 17 00:00:00 2001
From: Yauheni Kaliuta <yauheni.kaliuta@redhat.com>
Date: Wed, 13 May 2020 05:17:22 +0300
Subject: selftests/bpf: Install generated test progs

Before commit 74b5a5968fe8 ("selftests/bpf: Replace test_progs and
test_maps w/ general rule") selftests/bpf used generic install
target from selftests/lib.mk to install generated bpf test progs
by mentioning them in TEST_GEN_FILES variable.

Take that functionality back.

Fixes: 74b5a5968fe8 ("selftests/bpf: Replace test_progs and test_maps w/ general rule")
Signed-off-by: Yauheni Kaliuta <yauheni.kaliuta@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200513021722.7787-1-yauheni.kaliuta@redhat.com
---
 tools/testing/selftests/bpf/Makefile | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 8f25966b500b..1f878dcd2bf6 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -265,6 +265,7 @@ TRUNNER_BPF_OBJS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.o, $$(TRUNNER_BPF_SRCS)
 TRUNNER_BPF_SKELS := $$(patsubst %.c,$$(TRUNNER_OUTPUT)/%.skel.h,	\
 				 $$(filter-out $(SKEL_BLACKLIST),	\
 					       $$(TRUNNER_BPF_SRCS)))
+TEST_GEN_FILES += $$(TRUNNER_BPF_OBJS)
 
 # Evaluate rules now with extra TRUNNER_XXX variables above already defined
 $$(eval $$(call DEFINE_TEST_RUNNER_RULES,$1,$2))
-- 
cgit v1.2.3-59-g8ed1b


From fd9eef1a132d1974405c3ebf9d5688ec5c51da94 Mon Sep 17 00:00:00 2001
From: Eelco Chaudron <echaudro@redhat.com>
Date: Tue, 12 May 2020 11:04:40 +0200
Subject: libbpf: Fix probe code to return EPERM if encountered

When the probe code was failing for any reason ENOTSUP was returned, even
if this was due to not having enough lock space. This patch fixes this by
returning EPERM to the user application, so it can respond and increase
the RLIMIT_MEMLOCK size.

Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/158927424896.2342.10402475603585742943.stgit@ebuild
---
 tools/lib/bpf/libbpf.c | 36 +++++++++++++++++++++++++++++-------
 1 file changed, 29 insertions(+), 7 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 3da66540b54b..fd882616ab52 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -3237,7 +3237,7 @@ int bpf_map__resize(struct bpf_map *map, __u32 max_entries)
 }
 
 static int
-bpf_object__probe_name(struct bpf_object *obj)
+bpf_object__probe_loading(struct bpf_object *obj)
 {
 	struct bpf_load_program_attr attr;
 	char *cp, errmsg[STRERR_BUFSIZE];
@@ -3257,15 +3257,36 @@ bpf_object__probe_name(struct bpf_object *obj)
 
 	ret = bpf_load_program_xattr(&attr, NULL, 0);
 	if (ret < 0) {
-		cp = libbpf_strerror_r(errno, errmsg, sizeof(errmsg));
-		pr_warn("Error in %s():%s(%d). Couldn't load basic 'r0 = 0' BPF program.\n",
-			__func__, cp, errno);
-		return -errno;
+		ret = errno;
+		cp = libbpf_strerror_r(ret, errmsg, sizeof(errmsg));
+		pr_warn("Error in %s():%s(%d). Couldn't load trivial BPF "
+			"program. Make sure your kernel supports BPF "
+			"(CONFIG_BPF_SYSCALL=y) and/or that RLIMIT_MEMLOCK is "
+			"set to big enough value.\n", __func__, cp, ret);
+		return -ret;
 	}
 	close(ret);
 
-	/* now try the same program, but with the name */
+	return 0;
+}
+
+static int
+bpf_object__probe_name(struct bpf_object *obj)
+{
+	struct bpf_load_program_attr attr;
+	struct bpf_insn insns[] = {
+		BPF_MOV64_IMM(BPF_REG_0, 0),
+		BPF_EXIT_INSN(),
+	};
+	int ret;
+
+	/* make sure loading with name works */
 
+	memset(&attr, 0, sizeof(attr));
+	attr.prog_type = BPF_PROG_TYPE_SOCKET_FILTER;
+	attr.insns = insns;
+	attr.insns_cnt = ARRAY_SIZE(insns);
+	attr.license = "GPL";
 	attr.name = "test";
 	ret = bpf_load_program_xattr(&attr, NULL, 0);
 	if (ret >= 0) {
@@ -5636,7 +5657,8 @@ int bpf_object__load_xattr(struct bpf_object_load_attr *attr)
 
 	obj->loaded = true;
 
-	err = bpf_object__probe_caps(obj);
+	err = bpf_object__probe_loading(obj);
+	err = err ? : bpf_object__probe_caps(obj);
 	err = err ? : bpf_object__resolve_externs(obj, obj->kconfig);
 	err = err ? : bpf_object__sanitize_and_load_btf(obj);
 	err = err ? : bpf_object__sanitize_maps(obj);
-- 
cgit v1.2.3-59-g8ed1b


From b2fe11f0777311a764e47e2f9437809b4673b7b1 Mon Sep 17 00:00:00 2001
From: Chung-Hsien Hsu <stanley.hsu@cypress.com>
Date: Tue, 12 May 2020 05:03:08 -0500
Subject: brcmfmac: fix WPA/WPA2-PSK 4-way handshake offload and SAE offload
 failures

An incorrect value of use_fwsup is set for 4-way handshake offload for
WPA//WPA2-PSK, caused by commit 3b1e0a7bdfee ("brcmfmac: add support for
SAE authentication offload"). It results in missing bit
BRCMF_VIF_STATUS_EAP_SUCCESS set in brcmf_is_linkup() and causes the
failure. This patch correct the value for the case.

Also setting bit BRCMF_VIF_STATUS_EAP_SUCCESS for SAE offload case in
brcmf_is_linkup() to fix SAE offload failure.

Fixes: 3b1e0a7bdfee ("brcmfmac: add support for SAE authentication offload")
Signed-off-by: Chung-Hsien Hsu <stanley.hsu@cypress.com>
Signed-off-by: Chi-Hsien Lin <chi-hsien.lin@cypress.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1589277788-119966-1-git-send-email-chi-hsien.lin@cypress.com
---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
index 579b9306f533..fbec6dd551d6 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/cfg80211.c
@@ -1820,6 +1820,10 @@ brcmf_set_key_mgmt(struct net_device *ndev, struct cfg80211_connect_params *sme)
 		switch (sme->crypto.akm_suites[0]) {
 		case WLAN_AKM_SUITE_SAE:
 			val = WPA3_AUTH_SAE_PSK;
+			if (sme->crypto.sae_pwd) {
+				brcmf_dbg(INFO, "using SAE offload\n");
+				profile->use_fwsup = BRCMF_PROFILE_FWSUP_SAE;
+			}
 			break;
 		default:
 			bphy_err(drvr, "invalid cipher group (%d)\n",
@@ -2105,11 +2109,6 @@ brcmf_cfg80211_connect(struct wiphy *wiphy, struct net_device *ndev,
 		goto done;
 	}
 
-	if (sme->crypto.sae_pwd) {
-		brcmf_dbg(INFO, "using SAE offload\n");
-		profile->use_fwsup = BRCMF_PROFILE_FWSUP_SAE;
-	}
-
 	if (sme->crypto.psk &&
 	    profile->use_fwsup != BRCMF_PROFILE_FWSUP_SAE) {
 		if (WARN_ON(profile->use_fwsup != BRCMF_PROFILE_FWSUP_NONE)) {
@@ -5548,7 +5547,8 @@ static bool brcmf_is_linkup(struct brcmf_cfg80211_vif *vif,
 	u32 event = e->event_code;
 	u32 status = e->status;
 
-	if (vif->profile.use_fwsup == BRCMF_PROFILE_FWSUP_PSK &&
+	if ((vif->profile.use_fwsup == BRCMF_PROFILE_FWSUP_PSK ||
+	     vif->profile.use_fwsup == BRCMF_PROFILE_FWSUP_SAE) &&
 	    event == BRCMF_E_PSK_SUP &&
 	    status == BRCMF_E_STATUS_FWSUP_COMPLETED)
 		set_bit(BRCMF_VIF_STATUS_EAP_SUCCESS, &vif->sme_state);
-- 
cgit v1.2.3-59-g8ed1b


From f71eb7f603170bf8742c53c9fb72a64df04cb6e4 Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Tue, 12 May 2020 18:26:13 +0800
Subject: rtw88: 8723d: Add LC calibration

LC calibration is done by hardware circuit. Driver sets the LCK bit to kick
start, and then poll the bit to check if it's done.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200512102621.5148-2-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/main.h     |  1 +
 drivers/net/wireless/realtek/rtw88/rtw8723d.c | 31 +++++++++++++++++++++++++++
 drivers/net/wireless/realtek/rtw88/rtw8723d.h |  3 +++
 3 files changed, 35 insertions(+)

diff --git a/drivers/net/wireless/realtek/rtw88/main.h b/drivers/net/wireless/realtek/rtw88/main.h
index e0365a70c6f7..c5046986f9af 100644
--- a/drivers/net/wireless/realtek/rtw88/main.h
+++ b/drivers/net/wireless/realtek/rtw88/main.h
@@ -11,6 +11,7 @@
 #include <linux/average.h>
 #include <linux/bitops.h>
 #include <linux/bitfield.h>
+#include <linux/iopoll.h>
 #include <linux/interrupt.h>
 
 #include "util.h"
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.c b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
index 92c742d1ce6d..0f5ddd41b019 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8723d.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
@@ -64,6 +64,34 @@ static const struct rtw_hw_reg rtw8723d_txagc[] = {
 #define WLAN_LTR_CTRL1		0xCB004010
 #define WLAN_LTR_CTRL2		0x01233425
 
+static void rtw8723d_lck(struct rtw_dev *rtwdev)
+{
+	u32 lc_cal;
+	u8 val_ctx, rf_val;
+	int ret;
+
+	val_ctx = rtw_read8(rtwdev, REG_CTX);
+	if ((val_ctx & BIT_MASK_CTX_TYPE) != 0)
+		rtw_write8(rtwdev, REG_CTX, val_ctx & ~BIT_MASK_CTX_TYPE);
+	else
+		rtw_write8(rtwdev, REG_TXPAUSE, 0xFF);
+	lc_cal = rtw_read_rf(rtwdev, RF_PATH_A, RF_CFGCH, RFREG_MASK);
+
+	rtw_write_rf(rtwdev, RF_PATH_A, RF_CFGCH, RFREG_MASK, lc_cal | BIT_LCK);
+
+	ret = read_poll_timeout(rtw_read_rf, rf_val, rf_val != 0x1,
+				10000, 1000000, false,
+				rtwdev, RF_PATH_A, RF_CFGCH, BIT_LCK);
+	if (ret)
+		rtw_warn(rtwdev, "failed to poll LCK status bit\n");
+
+	rtw_write_rf(rtwdev, RF_PATH_A, RF_CFGCH, RFREG_MASK, lc_cal);
+	if ((val_ctx & BIT_MASK_CTX_TYPE) != 0)
+		rtw_write8(rtwdev, REG_CTX, val_ctx);
+	else
+		rtw_write8(rtwdev, REG_TXPAUSE, 0x00);
+}
+
 static void rtw8723d_phy_set_param(struct rtw_dev *rtwdev)
 {
 	u8 xtal_cap;
@@ -125,6 +153,9 @@ static void rtw8723d_phy_set_param(struct rtw_dev *rtwdev)
 	rtw_phy_init(rtwdev);
 
 	rtw_write16_set(rtwdev, REG_TXDMA_OFFSET_CHK, BIT_DROP_DATA_EN);
+
+	rtw8723d_lck(rtwdev);
+
 	rtw_write32_mask(rtwdev, REG_OFDM0_XAAGC1, MASKBYTE0, 0x50);
 	rtw_write32_mask(rtwdev, REG_OFDM0_XAAGC1, MASKBYTE0, 0x20);
 }
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.h b/drivers/net/wireless/realtek/rtw88/rtw8723d.h
index ac66f672bec8..6a7d58992df5 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8723d.h
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.h
@@ -78,6 +78,7 @@ struct rtw8723d_efuse {
 #define RFCFGCH_BW_20M		(BIT(11) | BIT(10))
 #define RFCFGCH_BW_40M		BIT(10)
 #define BIT_MASK_RFMOD		BIT(0)
+#define BIT_LCK			BIT(15)
 
 #define REG_PSDFN		0x0808
 #define REG_ANALOG_P4		0x088c
@@ -115,6 +116,8 @@ struct rtw8723d_efuse {
 #define REG_OFDM_FA_RSTD_11N	0x0d00
 #define BIT_MASK_OFDM_FA_RST1	BIT(27)
 #define BIT_MASK_OFDM_FA_KEEP1	BIT(31)
+#define REG_CTX			0x0d03
+#define BIT_MASK_CTX_TYPE	GENMASK(6, 4)
 #define REG_OFDM1_CFOTRK	0x0d2c
 #define BIT_EN_CFOTRK		BIT(28)
 #define REG_OFDM1_CSI1		0x0d40
-- 
cgit v1.2.3-59-g8ed1b


From 1d229e88e53c20717a51ef8d8f9a69573167543a Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Tue, 12 May 2020 18:26:14 +0800
Subject: rtw88: 8723d: add IQ calibration

IQ calibration is used to calibrate RF characteristic to yield expected
performance. Basically, we do calibration twice and compare the similarity
to determine calibration is good or not, if not we do the third
calibration, and then compare with the results of first and second
calibration. If it still not similar, IQK is failed.

Before doing calibration, we need to backup registers that will be
modified in calibration procedure, and restore these registers after
calibration is done.

A calibration procedure can divided into four sub-procedures that are
S1-TX, S1-RX, S0-TX and S0-RX. Where, S1 and S0 represent to path A and B
respectively. Each sub-procedure configure proper registers, and then
rigger one-shot calibration and poll until completion. For RX calibration,
it needs to do twice one-shot calibration, first one is to yield parameter
used by second one.

The result of TX part is stored for TX power tracking that adjusts TX AGC
to output expected power.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200512102621.5148-3-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/main.h     |  12 +
 drivers/net/wireless/realtek/rtw88/phy.c      |   2 +
 drivers/net/wireless/realtek/rtw88/reg.h      |  10 +
 drivers/net/wireless/realtek/rtw88/rtw8723d.c | 817 ++++++++++++++++++++++++++
 drivers/net/wireless/realtek/rtw88/rtw8723d.h | 122 ++++
 5 files changed, 963 insertions(+)

diff --git a/drivers/net/wireless/realtek/rtw88/main.h b/drivers/net/wireless/realtek/rtw88/main.h
index c5046986f9af..af8c50e3687d 100644
--- a/drivers/net/wireless/realtek/rtw88/main.h
+++ b/drivers/net/wireless/realtek/rtw88/main.h
@@ -1400,6 +1400,16 @@ struct rtw_pkt_count {
 DECLARE_EWMA(evm, 10, 4);
 DECLARE_EWMA(snr, 10, 4);
 
+struct rtw_iqk_info {
+	bool done;
+	struct {
+		u32 s1_x;
+		u32 s1_y;
+		u32 s0_x;
+		u32 s0_y;
+	} result;
+};
+
 struct rtw_dm_info {
 	u32 cck_fa_cnt;
 	u32 ofdm_fa_cnt;
@@ -1459,6 +1469,8 @@ struct rtw_dm_info {
 	struct rtw_pkt_count last_pkt_count;
 	struct ewma_evm ewma_evm[RTW_EVM_NUM];
 	struct ewma_snr ewma_snr[RTW_SNR_NUM];
+
+	struct rtw_iqk_info iqk;
 };
 
 struct rtw_efuse {
diff --git a/drivers/net/wireless/realtek/rtw88/phy.c b/drivers/net/wireless/realtek/rtw88/phy.c
index 72a16eff9db3..837b7bd04126 100644
--- a/drivers/net/wireless/realtek/rtw88/phy.c
+++ b/drivers/net/wireless/realtek/rtw88/phy.c
@@ -134,6 +134,8 @@ void rtw_phy_init(struct rtw_dev *rtwdev)
 	mask = chip->dig[0].mask;
 	dm_info->igi_history[0] = rtw_read32_mask(rtwdev, addr, mask);
 	rtw_phy_cck_pd_init(rtwdev);
+
+	dm_info->iqk.done = false;
 }
 
 void rtw_phy_dig_write(struct rtw_dev *rtwdev, u8 igi)
diff --git a/drivers/net/wireless/realtek/rtw88/reg.h b/drivers/net/wireless/realtek/rtw88/reg.h
index 9fdfcdc5c5cf..d57de1a6cdcc 100644
--- a/drivers/net/wireless/realtek/rtw88/reg.h
+++ b/drivers/net/wireless/realtek/rtw88/reg.h
@@ -69,6 +69,7 @@
 #define BIT_DPDT_SEL_EN		BIT(23)
 #define REG_LEDCFG2		0x004E
 #define REG_PAD_CTRL1		0x0064
+#define BIT_BT_BTG_SEL		BIT(31)
 #define BIT_PAPE_WLBT_SEL	BIT(29)
 #define BIT_LNAON_WLBT_SEL	BIT(28)
 #define BIT_BTGP_JTAG_EN	BIT(24)
@@ -611,7 +612,10 @@
 
 #define REG_IGN_GNTBT4	0x4160
 
+#define RF_MODE		0x00
 #define RF_MODOPT	0x01
+#define RF_WLINT	0x01
+#define RF_WLSEL	0x02
 #define RF_DTXLOK	0x08
 #define RF_CFGCH	0x18
 #define RF_RCK		0x1d
@@ -619,9 +623,15 @@
 #define RF_LUTWD1	0x3e
 #define RF_LUTWD0	0x3f
 #define RF_T_METER	0x42
+#define RF_BSPAD	0x54
+#define RF_GAINTX	0x56
+#define RF_TXATANK	0x64
+#define RF_TRXIQ	0x66
+#define RF_RXIQGEN	0x8d
 #define RF_XTALX2	0xb8
 #define RF_MALSEL	0xbe
 #define RF_RCKD		0xde
+#define RF_TXADBG	0xde
 #define RF_LUTDBG	0xdf
 #define RF_LUTWE2	0xee
 #define RF_LUTWE	0xef
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.c b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
index 0f5ddd41b019..0cfa493e7742 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8723d.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
@@ -634,6 +634,822 @@ static void rtw8723d_false_alarm_statistics(struct rtw_dev *rtwdev)
 	rtw_write32_mask(rtwdev, REG_PAGE_F_RST_11N, BIT_MASK_F_RST_ALL, 0);
 }
 
+static const u32 iqk_adda_regs[] = {
+	0x85c, 0xe6c, 0xe70, 0xe74, 0xe78, 0xe7c, 0xe80, 0xe84, 0xe88, 0xe8c,
+	0xed0, 0xed4, 0xed8, 0xedc, 0xee0, 0xeec
+};
+
+static const u32 iqk_mac8_regs[] = {0x522, 0x550, 0x551};
+static const u32 iqk_mac32_regs[] = {0x40};
+
+static const u32 iqk_bb_regs[] = {
+	0xc04, 0xc08, 0x874, 0xb68, 0xb6c, 0x870, 0x860, 0x864, 0xa04
+};
+
+#define IQK_ADDA_REG_NUM	ARRAY_SIZE(iqk_adda_regs)
+#define IQK_MAC8_REG_NUM	ARRAY_SIZE(iqk_mac8_regs)
+#define IQK_MAC32_REG_NUM	ARRAY_SIZE(iqk_mac32_regs)
+#define IQK_BB_REG_NUM		ARRAY_SIZE(iqk_bb_regs)
+
+struct iqk_backup_regs {
+	u32 adda[IQK_ADDA_REG_NUM];
+	u8 mac8[IQK_MAC8_REG_NUM];
+	u32 mac32[IQK_MAC32_REG_NUM];
+	u32 bb[IQK_BB_REG_NUM];
+
+	u32 lte_path;
+	u32 lte_gnt;
+
+	u32 bb_sel_btg;
+	u8 btg_sel;
+
+	u8 igia;
+	u8 igib;
+};
+
+static void rtw8723d_iqk_backup_regs(struct rtw_dev *rtwdev,
+				     struct iqk_backup_regs *backup)
+{
+	int i;
+
+	for (i = 0; i < IQK_ADDA_REG_NUM; i++)
+		backup->adda[i] = rtw_read32(rtwdev, iqk_adda_regs[i]);
+
+	for (i = 0; i < IQK_MAC8_REG_NUM; i++)
+		backup->mac8[i] = rtw_read8(rtwdev, iqk_mac8_regs[i]);
+	for (i = 0; i < IQK_MAC32_REG_NUM; i++)
+		backup->mac32[i] = rtw_read32(rtwdev, iqk_mac32_regs[i]);
+
+	for (i = 0; i < IQK_BB_REG_NUM; i++)
+		backup->bb[i] = rtw_read32(rtwdev, iqk_bb_regs[i]);
+
+	backup->igia = rtw_read32_mask(rtwdev, REG_OFDM0_XAAGC1, MASKBYTE0);
+	backup->igib = rtw_read32_mask(rtwdev, REG_OFDM0_XBAGC1, MASKBYTE0);
+
+	backup->bb_sel_btg = rtw_read32(rtwdev, REG_BB_SEL_BTG);
+}
+
+static void rtw8723d_iqk_restore_regs(struct rtw_dev *rtwdev,
+				      const struct iqk_backup_regs *backup)
+{
+	int i;
+
+	for (i = 0; i < IQK_ADDA_REG_NUM; i++)
+		rtw_write32(rtwdev, iqk_adda_regs[i], backup->adda[i]);
+
+	for (i = 0; i < IQK_MAC8_REG_NUM; i++)
+		rtw_write8(rtwdev, iqk_mac8_regs[i], backup->mac8[i]);
+	for (i = 0; i < IQK_MAC32_REG_NUM; i++)
+		rtw_write32(rtwdev, iqk_mac32_regs[i], backup->mac32[i]);
+
+	for (i = 0; i < IQK_BB_REG_NUM; i++)
+		rtw_write32(rtwdev, iqk_bb_regs[i], backup->bb[i]);
+
+	rtw_write32_mask(rtwdev, REG_OFDM0_XAAGC1, MASKBYTE0, 0x50);
+	rtw_write32_mask(rtwdev, REG_OFDM0_XAAGC1, MASKBYTE0, backup->igia);
+
+	rtw_write32_mask(rtwdev, REG_OFDM0_XBAGC1, MASKBYTE0, 0x50);
+	rtw_write32_mask(rtwdev, REG_OFDM0_XBAGC1, MASKBYTE0, backup->igib);
+
+	rtw_write32(rtwdev, REG_TXIQK_TONE_A_11N, 0x01008c00);
+	rtw_write32(rtwdev, REG_RXIQK_TONE_A_11N, 0x01008c00);
+}
+
+static void rtw8723d_iqk_backup_path_ctrl(struct rtw_dev *rtwdev,
+					  struct iqk_backup_regs *backup)
+{
+	backup->btg_sel = rtw_read8(rtwdev, REG_BTG_SEL);
+	rtw_dbg(rtwdev, RTW_DBG_RFK, "[IQK] original 0x67 = 0x%x\n",
+		backup->btg_sel);
+}
+
+static void rtw8723d_iqk_config_path_ctrl(struct rtw_dev *rtwdev)
+{
+	rtw_write32_mask(rtwdev, REG_PAD_CTRL1, BIT_BT_BTG_SEL, 0x1);
+	rtw_dbg(rtwdev, RTW_DBG_RFK, "[IQK] set 0x67 = 0x%x\n",
+		rtw_read32_mask(rtwdev, REG_PAD_CTRL1, MASKBYTE3));
+}
+
+static void rtw8723d_iqk_restore_path_ctrl(struct rtw_dev *rtwdev,
+					   const struct iqk_backup_regs *backup)
+{
+	rtw_write8(rtwdev, REG_BTG_SEL, backup->btg_sel);
+	rtw_dbg(rtwdev, RTW_DBG_RFK, "[IQK] restore 0x67 = 0x%x\n",
+		rtw_read32_mask(rtwdev, REG_PAD_CTRL1, MASKBYTE3));
+}
+
+static void rtw8723d_iqk_backup_lte_path_gnt(struct rtw_dev *rtwdev,
+					     struct iqk_backup_regs *backup)
+{
+	backup->lte_path = rtw_read32(rtwdev, REG_LTECOEX_PATH_CONTROL);
+	rtw_write32(rtwdev, REG_LTECOEX_CTRL, 0x800f0038);
+	mdelay(1);
+	backup->lte_gnt = rtw_read32(rtwdev, REG_LTECOEX_READ_DATA);
+	rtw_dbg(rtwdev, RTW_DBG_RFK, "[IQK] OriginalGNT = 0x%x\n",
+		backup->lte_gnt);
+}
+
+static void rtw8723d_iqk_config_lte_path_gnt(struct rtw_dev *rtwdev)
+{
+	rtw_write32(rtwdev, REG_LTECOEX_WRITE_DATA, 0x0000ff00);
+	rtw_write32(rtwdev, REG_LTECOEX_CTRL, 0xc0020038);
+	rtw_write32_mask(rtwdev, REG_LTECOEX_PATH_CONTROL, BIT_LTE_MUX_CTRL_PATH, 0x1);
+}
+
+static void rtw8723d_iqk_restore_lte_path_gnt(struct rtw_dev *rtwdev,
+					      const struct iqk_backup_regs *bak)
+{
+	rtw_write32(rtwdev, REG_LTECOEX_WRITE_DATA, bak->lte_gnt);
+	rtw_write32(rtwdev, REG_LTECOEX_CTRL, 0xc00f0038);
+	rtw_write32(rtwdev, REG_LTECOEX_PATH_CONTROL, bak->lte_path);
+}
+
+struct rtw_8723d_iqk_cfg {
+	const char *name;
+	u32 val_bb_sel_btg;
+	u32 reg_lutwe;
+	u32 val_txiqk_pi;
+	u32 reg_padlut;
+	u32 reg_gaintx;
+	u32 reg_bspad;
+	u32 val_wlint;
+	u32 val_wlsel;
+	u32 val_iqkpts;
+};
+
+static const struct rtw_8723d_iqk_cfg iqk_tx_cfg[PATH_NR] = {
+	[PATH_S1] = {
+		.name = "S1",
+		.val_bb_sel_btg = 0x99000000,
+		.reg_lutwe = RF_LUTWE,
+		.val_txiqk_pi = 0x8214019f,
+		.reg_padlut = RF_LUTDBG,
+		.reg_gaintx = RF_GAINTX,
+		.reg_bspad = RF_BSPAD,
+		.val_wlint = 0xe0d,
+		.val_wlsel = 0x60d,
+		.val_iqkpts = 0xfa000000,
+	},
+	[PATH_S0] = {
+		.name = "S0",
+		.val_bb_sel_btg = 0x99000280,
+		.reg_lutwe = RF_LUTWE2,
+		.val_txiqk_pi = 0x8214018a,
+		.reg_padlut = RF_TXADBG,
+		.reg_gaintx = RF_TRXIQ,
+		.reg_bspad = RF_TXATANK,
+		.val_wlint = 0xe6d,
+		.val_wlsel = 0x66d,
+		.val_iqkpts = 0xf9000000,
+	},
+};
+
+static u8 rtw8723d_iqk_check_tx_failed(struct rtw_dev *rtwdev,
+				       const struct rtw_8723d_iqk_cfg *iqk_cfg)
+{
+	s32 tx_x, tx_y;
+	u32 tx_fail;
+
+	rtw_dbg(rtwdev, RTW_DBG_RFK, "[IQK] 0xeac = 0x%x\n",
+		rtw_read32(rtwdev, REG_IQK_RES_RY));
+	rtw_dbg(rtwdev, RTW_DBG_RFK, "[IQK] 0xe94 = 0x%x, 0xe9c = 0x%x\n",
+		rtw_read32(rtwdev, REG_IQK_RES_TX),
+		rtw_read32(rtwdev, REG_IQK_RES_TY));
+	rtw_dbg(rtwdev, RTW_DBG_RFK,
+		"[IQK] 0xe90(before IQK)= 0x%x, 0xe98(afer IQK) = 0x%x\n",
+		rtw_read32(rtwdev, 0xe90),
+		rtw_read32(rtwdev, 0xe98));
+
+	tx_fail = rtw_read32_mask(rtwdev, REG_IQK_RES_RY, BIT_IQK_TX_FAIL);
+	tx_x = rtw_read32_mask(rtwdev, REG_IQK_RES_TX, BIT_MASK_RES_TX);
+	tx_y = rtw_read32_mask(rtwdev, REG_IQK_RES_TY, BIT_MASK_RES_TY);
+
+	if (!tx_fail && tx_x != IQK_TX_X_ERR && tx_y != IQK_TX_Y_ERR)
+		return IQK_TX_OK;
+
+	rtw_dbg(rtwdev, RTW_DBG_RFK, "[IQK] %s TXIQK is failed\n",
+		iqk_cfg->name);
+
+	return 0;
+}
+
+static u8 rtw8723d_iqk_check_rx_failed(struct rtw_dev *rtwdev,
+				       const struct rtw_8723d_iqk_cfg *iqk_cfg)
+{
+	s32 rx_x, rx_y;
+	u32 rx_fail;
+
+	rtw_dbg(rtwdev, RTW_DBG_RFK, "[IQK] 0xea4 = 0x%x, 0xeac = 0x%x\n",
+		rtw_read32(rtwdev, REG_IQK_RES_RX),
+		rtw_read32(rtwdev, REG_IQK_RES_RY));
+
+	rtw_dbg(rtwdev, RTW_DBG_RFK,
+		"[IQK] 0xea0(before IQK)= 0x%x, 0xea8(afer IQK) = 0x%x\n",
+		rtw_read32(rtwdev, 0xea0),
+		rtw_read32(rtwdev, 0xea8));
+
+	rx_fail = rtw_read32_mask(rtwdev, REG_IQK_RES_RY, BIT_IQK_RX_FAIL);
+	rx_x = rtw_read32_mask(rtwdev, REG_IQK_RES_RX, BIT_MASK_RES_RX);
+	rx_y = rtw_read32_mask(rtwdev, REG_IQK_RES_RY, BIT_MASK_RES_RY);
+	rx_y = abs(iqkxy_to_s32(rx_y));
+
+	if (!rx_fail && rx_x < IQK_RX_X_UPPER && rx_x > IQK_RX_X_LOWER &&
+	    rx_y < IQK_RX_Y_LMT)
+		return IQK_RX_OK;
+
+	rtw_dbg(rtwdev, RTW_DBG_RFK, "[IQK] %s RXIQK STEP2 is failed\n",
+		iqk_cfg->name);
+
+	return 0;
+}
+
+static void rtw8723d_iqk_one_shot(struct rtw_dev *rtwdev, bool tx,
+				  const struct rtw_8723d_iqk_cfg *iqk_cfg)
+{
+	u32 pts = (tx ? iqk_cfg->val_iqkpts : 0xf9000000);
+
+	/* enter IQK mode */
+	rtw_write32_mask(rtwdev, REG_FPGA0_IQK_11N, BIT_MASK_IQK_MOD, EN_IQK);
+	rtw8723d_iqk_config_lte_path_gnt(rtwdev);
+
+	rtw_write32(rtwdev, REG_LTECOEX_CTRL, 0x800f0054);
+	mdelay(1);
+	rtw_dbg(rtwdev, RTW_DBG_RFK, "[IQK] GNT_BT @%s %sIQK1 = 0x%x\n",
+		iqk_cfg->name, tx ? "TX" : "RX",
+		rtw_read32(rtwdev, REG_LTECOEX_READ_DATA));
+	rtw_dbg(rtwdev, RTW_DBG_RFK, "[IQK] 0x948 @%s %sIQK1 = 0x%x\n",
+		iqk_cfg->name, tx ? "TX" : "RX",
+		rtw_read32(rtwdev, REG_BB_SEL_BTG));
+
+	/* One shot, LOK & IQK */
+	rtw_write32(rtwdev, REG_IQK_AGC_PTS_11N, pts);
+	rtw_write32(rtwdev, REG_IQK_AGC_PTS_11N, 0xf8000000);
+
+	if (!check_hw_ready(rtwdev, REG_IQK_RES_RY, BIT_IQK_DONE, 1))
+		rtw_warn(rtwdev, "%s %s IQK isn't done\n", iqk_cfg->name,
+			 tx ? "TX" : "RX");
+}
+
+static void rtw8723d_iqk_txrx_path_post(struct rtw_dev *rtwdev,
+					const struct rtw_8723d_iqk_cfg *iqk_cfg,
+					const struct iqk_backup_regs *backup)
+{
+	rtw8723d_iqk_restore_lte_path_gnt(rtwdev, backup);
+	rtw_write32(rtwdev, REG_BB_SEL_BTG, backup->bb_sel_btg);
+
+	/* leave IQK mode */
+	rtw_write32_mask(rtwdev, REG_FPGA0_IQK_11N, BIT_MASK_IQK_MOD, RST_IQK);
+	mdelay(1);
+	rtw_write_rf(rtwdev, RF_PATH_A, iqk_cfg->reg_padlut, 0x800, 0x0);
+	rtw_write_rf(rtwdev, RF_PATH_A, RF_WLINT, BIT(0), 0x0);
+	rtw_write_rf(rtwdev, RF_PATH_A, RF_WLSEL, BIT(0), 0x0);
+}
+
+static u8 rtw8723d_iqk_tx_path(struct rtw_dev *rtwdev,
+			       const struct rtw_8723d_iqk_cfg *iqk_cfg,
+			       const struct iqk_backup_regs *backup)
+{
+	u8 status;
+
+	rtw_dbg(rtwdev, RTW_DBG_RFK, "[IQK] path %s TXIQK!!\n", iqk_cfg->name);
+	rtw_dbg(rtwdev, RTW_DBG_RFK, "[IQK] 0x67 @%s TXIQK = 0x%x\n",
+		iqk_cfg->name,
+		rtw_read32_mask(rtwdev, REG_PAD_CTRL1, MASKBYTE3));
+
+	rtw_write32(rtwdev, REG_BB_SEL_BTG, iqk_cfg->val_bb_sel_btg);
+	rtw_write32_mask(rtwdev, REG_FPGA0_IQK_11N, BIT_MASK_IQK_MOD, RST_IQK);
+	mdelay(1);
+	rtw_write_rf(rtwdev, RF_PATH_A, iqk_cfg->reg_lutwe, RFREG_MASK, 0x80000);
+	rtw_write_rf(rtwdev, RF_PATH_A, RF_LUTWA, RFREG_MASK, 0x00004);
+	rtw_write_rf(rtwdev, RF_PATH_A, RF_LUTWD1, RFREG_MASK, 0x0005d);
+	rtw_write_rf(rtwdev, RF_PATH_A, RF_LUTWD0, RFREG_MASK, 0xBFFE0);
+	rtw_write_rf(rtwdev, RF_PATH_A, iqk_cfg->reg_lutwe, RFREG_MASK, 0x00000);
+
+	/* IQK setting */
+	rtw_write32(rtwdev, REG_TXIQK_TONE_A_11N, 0x08008c0c);
+	rtw_write32(rtwdev, REG_RXIQK_TONE_A_11N, 0x38008c1c);
+	rtw_write32(rtwdev, REG_TXIQK_PI_A_11N, iqk_cfg->val_txiqk_pi);
+	rtw_write32(rtwdev, REG_RXIQK_PI_A_11N, 0x28160200);
+	rtw_write32(rtwdev, REG_TXIQK_11N, 0x01007c00);
+	rtw_write32(rtwdev, REG_RXIQK_11N, 0x01004800);
+
+	/* LOK setting */
+	rtw_write32(rtwdev, REG_IQK_AGC_RSP_11N, 0x00462911);
+
+	/* PA, PAD setting */
+	rtw_write_rf(rtwdev, RF_PATH_A, iqk_cfg->reg_padlut, 0x800, 0x1);
+	rtw_write_rf(rtwdev, RF_PATH_A, iqk_cfg->reg_gaintx, 0x600, 0x0);
+	rtw_write_rf(rtwdev, RF_PATH_A, iqk_cfg->reg_gaintx, 0x1E0, 0x3);
+	rtw_write_rf(rtwdev, RF_PATH_A, RF_RXIQGEN, 0x1F, 0xf);
+
+	/* LOK setting for 8723D */
+	rtw_write_rf(rtwdev, RF_PATH_A, iqk_cfg->reg_lutwe, 0x10, 0x1);
+	rtw_write_rf(rtwdev, RF_PATH_A, iqk_cfg->reg_bspad, 0x1, 0x1);
+
+	rtw_write_rf(rtwdev, RF_PATH_A, RF_WLINT, RFREG_MASK, iqk_cfg->val_wlint);
+	rtw_write_rf(rtwdev, RF_PATH_A, RF_WLSEL, RFREG_MASK, iqk_cfg->val_wlsel);
+
+	rtw_dbg(rtwdev, RTW_DBG_RFK, "[IQK] RF0x1 @%s TXIQK = 0x%x\n",
+		iqk_cfg->name,
+		rtw_read_rf(rtwdev, RF_PATH_A, RF_WLINT, RFREG_MASK));
+	rtw_dbg(rtwdev, RTW_DBG_RFK, "[IQK] RF0x2 @%s TXIQK = 0x%x\n",
+		iqk_cfg->name,
+		rtw_read_rf(rtwdev, RF_PATH_A, RF_WLSEL, RFREG_MASK));
+
+	rtw8723d_iqk_one_shot(rtwdev, true, iqk_cfg);
+	status = rtw8723d_iqk_check_tx_failed(rtwdev, iqk_cfg);
+
+	rtw8723d_iqk_txrx_path_post(rtwdev, iqk_cfg, backup);
+
+	return status;
+}
+
+static u8 rtw8723d_iqk_rx_path(struct rtw_dev *rtwdev,
+			       const struct rtw_8723d_iqk_cfg *iqk_cfg,
+			       const struct iqk_backup_regs *backup)
+{
+	u32 tx_x, tx_y;
+	u8 status;
+
+	rtw_dbg(rtwdev, RTW_DBG_RFK, "[IQK] path %s RXIQK Step1!!\n",
+		iqk_cfg->name);
+	rtw_dbg(rtwdev, RTW_DBG_RFK, "[IQK] 0x67 @%s RXIQK1 = 0x%x\n",
+		iqk_cfg->name,
+		rtw_read32_mask(rtwdev, REG_PAD_CTRL1, MASKBYTE3));
+	rtw_write32(rtwdev, REG_BB_SEL_BTG, iqk_cfg->val_bb_sel_btg);
+
+	rtw_write32_mask(rtwdev, REG_FPGA0_IQK_11N, BIT_MASK_IQK_MOD, RST_IQK);
+
+	/* IQK setting */
+	rtw_write32(rtwdev, REG_TXIQK_11N, 0x01007c00);
+	rtw_write32(rtwdev, REG_RXIQK_11N, 0x01004800);
+
+	/* path IQK setting */
+	rtw_write32(rtwdev, REG_TXIQK_TONE_A_11N, 0x18008c1c);
+	rtw_write32(rtwdev, REG_RXIQK_TONE_A_11N, 0x38008c1c);
+	rtw_write32(rtwdev, REG_TX_IQK_TONE_B, 0x38008c1c);
+	rtw_write32(rtwdev, REG_RX_IQK_TONE_B, 0x38008c1c);
+	rtw_write32(rtwdev, REG_TXIQK_PI_A_11N, 0x82160000);
+	rtw_write32(rtwdev, REG_RXIQK_PI_A_11N, 0x28160000);
+
+	/* LOK setting */
+	rtw_write32(rtwdev, REG_IQK_AGC_RSP_11N, 0x0046a911);
+
+	/* RXIQK mode */
+	rtw_write_rf(rtwdev, RF_PATH_A, iqk_cfg->reg_lutwe, RFREG_MASK, 0x80000);
+	rtw_write_rf(rtwdev, RF_PATH_A, RF_LUTWA, RFREG_MASK, 0x00006);
+	rtw_write_rf(rtwdev, RF_PATH_A, RF_LUTWD1, RFREG_MASK, 0x0005f);
+	rtw_write_rf(rtwdev, RF_PATH_A, RF_LUTWD0, RFREG_MASK, 0xa7ffb);
+	rtw_write_rf(rtwdev, RF_PATH_A, iqk_cfg->reg_lutwe, RFREG_MASK, 0x00000);
+
+	/* PA/PAD=0 */
+	rtw_write_rf(rtwdev, RF_PATH_A, iqk_cfg->reg_padlut, 0x800, 0x1);
+	rtw_write_rf(rtwdev, RF_PATH_A, iqk_cfg->reg_gaintx, 0x600, 0x0);
+	rtw_write_rf(rtwdev, RF_PATH_A, RF_WLINT, RFREG_MASK, iqk_cfg->val_wlint);
+	rtw_write_rf(rtwdev, RF_PATH_A, RF_WLSEL, RFREG_MASK, iqk_cfg->val_wlsel);
+
+	rtw_dbg(rtwdev, RTW_DBG_RFK, "[IQK] RF0x1@ path %s RXIQK1 = 0x%x\n",
+		iqk_cfg->name,
+		rtw_read_rf(rtwdev, RF_PATH_A, RF_WLINT, RFREG_MASK));
+	rtw_dbg(rtwdev, RTW_DBG_RFK, "[IQK] RF0x2@ path %s RXIQK1 = 0x%x\n",
+		iqk_cfg->name,
+		rtw_read_rf(rtwdev, RF_PATH_A, RF_WLSEL, RFREG_MASK));
+
+	rtw8723d_iqk_one_shot(rtwdev, false, iqk_cfg);
+	status = rtw8723d_iqk_check_tx_failed(rtwdev, iqk_cfg);
+
+	if (!status)
+		goto restore;
+
+	/* second round */
+	tx_x = rtw_read32_mask(rtwdev, REG_IQK_RES_TX, BIT_MASK_RES_TX);
+	tx_y = rtw_read32_mask(rtwdev, REG_IQK_RES_TY, BIT_MASK_RES_TY);
+
+	rtw_write32(rtwdev, REG_TXIQK_11N, BIT_SET_TXIQK_11N(tx_x, tx_y));
+	rtw_dbg(rtwdev, RTW_DBG_RFK, "[IQK] 0xe40 = 0x%x u4tmp = 0x%x\n",
+		rtw_read32(rtwdev, REG_TXIQK_11N),
+		BIT_SET_TXIQK_11N(tx_x, tx_y));
+
+	rtw_dbg(rtwdev, RTW_DBG_RFK, "[IQK] path %s RXIQK STEP2!!\n",
+		iqk_cfg->name);
+	rtw_dbg(rtwdev, RTW_DBG_RFK, "[IQK] 0x67 @%s RXIQK2 = 0x%x\n",
+		iqk_cfg->name,
+		rtw_read32_mask(rtwdev, REG_PAD_CTRL1, MASKBYTE3));
+
+	rtw_write32(rtwdev, REG_RXIQK_11N, 0x01004800);
+	rtw_write32(rtwdev, REG_TXIQK_TONE_A_11N, 0x38008c1c);
+	rtw_write32(rtwdev, REG_RXIQK_TONE_A_11N, 0x18008c1c);
+	rtw_write32(rtwdev, REG_TX_IQK_TONE_B, 0x38008c1c);
+	rtw_write32(rtwdev, REG_RX_IQK_TONE_B, 0x38008c1c);
+	rtw_write32(rtwdev, REG_TXIQK_PI_A_11N, 0x82170000);
+	rtw_write32(rtwdev, REG_RXIQK_PI_A_11N, 0x28171400);
+
+	/* LOK setting */
+	rtw_write32(rtwdev, REG_IQK_AGC_RSP_11N, 0x0046a8d1);
+
+	/* RXIQK mode */
+	rtw_write32_mask(rtwdev, REG_FPGA0_IQK_11N, BIT_MASK_IQK_MOD, RST_IQK);
+	mdelay(1);
+	rtw_write_rf(rtwdev, RF_PATH_A, iqk_cfg->reg_lutwe, 0x80000, 0x1);
+	rtw_write_rf(rtwdev, RF_PATH_A, RF_LUTWA, RFREG_MASK, 0x00007);
+	rtw_write_rf(rtwdev, RF_PATH_A, RF_LUTWD1, RFREG_MASK, 0x0005f);
+	rtw_write_rf(rtwdev, RF_PATH_A, RF_LUTWD0, RFREG_MASK, 0xb3fdb);
+	rtw_write_rf(rtwdev, RF_PATH_A, iqk_cfg->reg_lutwe, RFREG_MASK, 0x00000);
+
+	rtw_dbg(rtwdev, RTW_DBG_RFK, "[IQK] RF0x1 @%s RXIQK2 = 0x%x\n",
+		iqk_cfg->name,
+		rtw_read_rf(rtwdev, RF_PATH_A, RF_WLINT, RFREG_MASK));
+	rtw_dbg(rtwdev, RTW_DBG_RFK, "[IQK] RF0x2 @%s RXIQK2 = 0x%x\n",
+		iqk_cfg->name,
+		rtw_read_rf(rtwdev, RF_PATH_A, RF_WLSEL, RFREG_MASK));
+
+	rtw8723d_iqk_one_shot(rtwdev, false, iqk_cfg);
+	status |= rtw8723d_iqk_check_rx_failed(rtwdev, iqk_cfg);
+
+restore:
+	rtw8723d_iqk_txrx_path_post(rtwdev, iqk_cfg, backup);
+
+	return status;
+}
+
+static
+void rtw8723d_iqk_fill_s1_matrix(struct rtw_dev *rtwdev, const s32 result[])
+{
+	s32 oldval_1;
+	s32 x, y;
+	s32 tx1_a, tx1_a_ext;
+	s32 tx1_c, tx1_c_ext;
+
+	if (result[IQK_S1_TX_X] == 0)
+		return;
+
+	oldval_1 = rtw_read32_mask(rtwdev, REG_OFDM_0_XA_TX_IQ_IMBALANCE,
+				   BIT_MASK_TXIQ_ELM_D);
+
+	x = iqkxy_to_s32(result[IQK_S1_TX_X]);
+	tx1_a = iqk_mult(x, oldval_1, &tx1_a_ext);
+	rtw_write32_mask(rtwdev, REG_OFDM_0_XA_TX_IQ_IMBALANCE,
+			 BIT_MASK_TXIQ_ELM_A, tx1_a);
+	rtw_write32_mask(rtwdev, REG_OFDM_0_ECCA_THRESHOLD,
+			 BIT_MASK_OFDM0_EXT_A, tx1_a_ext);
+
+	y = iqkxy_to_s32(result[IQK_S1_TX_Y]);
+	tx1_c = iqk_mult(y, oldval_1, &tx1_c_ext);
+	rtw_write32_mask(rtwdev, REG_TXIQK_MATRIXA_LSB2_11N, MASKH4BITS,
+			 BIT_SET_TXIQ_ELM_C1(tx1_c));
+	rtw_write32_mask(rtwdev, REG_OFDM_0_XA_TX_IQ_IMBALANCE,
+			 BIT_MASK_TXIQ_ELM_C, BIT_SET_TXIQ_ELM_C2(tx1_c));
+	rtw_write32_mask(rtwdev, REG_OFDM_0_ECCA_THRESHOLD,
+			 BIT_MASK_OFDM0_EXT_C, tx1_c_ext);
+
+	rtw_dbg(rtwdev, RTW_DBG_RFK,
+		"[IQK] X = 0x%x, TX1_A = 0x%x, oldval_1 0x%x\n",
+		x, tx1_a, oldval_1);
+	rtw_dbg(rtwdev, RTW_DBG_RFK,
+		"[IQK] Y = 0x%x, TX1_C = 0x%x\n", y, tx1_c);
+
+	if (result[IQK_S1_RX_X] == 0)
+		return;
+
+	rtw_write32_mask(rtwdev, REG_A_RXIQI, BIT_MASK_RXIQ_S1_X,
+			 result[IQK_S1_RX_X]);
+	rtw_write32_mask(rtwdev, REG_A_RXIQI, BIT_MASK_RXIQ_S1_Y1,
+			 BIT_SET_RXIQ_S1_Y1(result[IQK_S1_RX_Y]));
+	rtw_write32_mask(rtwdev, REG_RXIQK_MATRIX_LSB_11N, BIT_MASK_RXIQ_S1_Y2,
+			 BIT_SET_RXIQ_S1_Y2(result[IQK_S1_RX_Y]));
+}
+
+static
+void rtw8723d_iqk_fill_s0_matrix(struct rtw_dev *rtwdev, const s32 result[])
+{
+	s32 oldval_0;
+	s32 x, y;
+	s32 tx0_a, tx0_a_ext;
+	s32 tx0_c, tx0_c_ext;
+
+	if (result[IQK_S0_TX_X] == 0)
+		return;
+
+	oldval_0 = rtw_read32_mask(rtwdev, REG_TXIQ_CD_S0, BIT_MASK_TXIQ_D_S0);
+
+	x = iqkxy_to_s32(result[IQK_S0_TX_X]);
+	tx0_a = iqk_mult(x, oldval_0, &tx0_a_ext);
+
+	rtw_write32_mask(rtwdev, REG_TXIQ_AB_S0, BIT_MASK_TXIQ_A_S0, tx0_a);
+	rtw_write32_mask(rtwdev, REG_TXIQ_AB_S0, BIT_MASK_TXIQ_A_EXT_S0, tx0_a_ext);
+
+	y = iqkxy_to_s32(result[IQK_S0_TX_Y]);
+	tx0_c = iqk_mult(y, oldval_0, &tx0_c_ext);
+
+	rtw_write32_mask(rtwdev, REG_TXIQ_CD_S0, BIT_MASK_TXIQ_C_S0, tx0_c);
+	rtw_write32_mask(rtwdev, REG_TXIQ_CD_S0, BIT_MASK_TXIQ_C_EXT_S0, tx0_c_ext);
+
+	if (result[IQK_S0_RX_X] == 0)
+		return;
+
+	rtw_write32_mask(rtwdev, REG_RXIQ_AB_S0, BIT_MASK_RXIQ_X_S0,
+			 result[IQK_S0_RX_X]);
+	rtw_write32_mask(rtwdev, REG_RXIQ_AB_S0, BIT_MASK_RXIQ_Y_S0,
+			 result[IQK_S0_RX_Y]);
+}
+
+static void rtw8723d_iqk_path_adda_on(struct rtw_dev *rtwdev)
+{
+	int i;
+
+	for (i = 0; i < IQK_ADDA_REG_NUM; i++)
+		rtw_write32(rtwdev, iqk_adda_regs[i], 0x03c00016);
+}
+
+static void rtw8723d_iqk_config_mac(struct rtw_dev *rtwdev)
+{
+	rtw_write8(rtwdev, REG_TXPAUSE, 0xff);
+}
+
+static
+void rtw8723d_iqk_rf_standby(struct rtw_dev *rtwdev, enum rtw_rf_path path)
+{
+	rtw_dbg(rtwdev, RTW_DBG_RFK, "[IQK] path-%s standby mode!\n",
+		path == RF_PATH_A ? "S1" : "S0");
+
+	rtw_write32_mask(rtwdev, REG_FPGA0_IQK_11N, BIT_MASK_IQK_MOD, RST_IQK);
+	mdelay(1);
+	rtw_write_rf(rtwdev, path, RF_MODE, RFREG_MASK, 0x10000);
+	rtw_write32_mask(rtwdev, REG_FPGA0_IQK_11N, BIT_MASK_IQK_MOD, EN_IQK);
+}
+
+static
+bool rtw8723d_iqk_similarity_cmp(struct rtw_dev *rtwdev, s32 result[][IQK_NR],
+				 u8 c1, u8 c2)
+{
+	u32 i, j, diff;
+	u32 bitmap = 0;
+	u8 candidate[PATH_NR] = {IQK_ROUND_INVALID, IQK_ROUND_INVALID};
+	bool ret = true;
+
+	s32 tmp1, tmp2;
+
+	for (i = 0; i < IQK_NR; i++) {
+		tmp1 = iqkxy_to_s32(result[c1][i]);
+		tmp2 = iqkxy_to_s32(result[c2][i]);
+
+		diff = abs(tmp1 - tmp2);
+
+		if (diff <= MAX_TOLERANCE)
+			continue;
+
+		if ((i == IQK_S1_RX_X || i == IQK_S0_RX_X) && !bitmap) {
+			if (result[c1][i] + result[c1][i + 1] == 0)
+				candidate[i / IQK_SX_NR] = c2;
+			else if (result[c2][i] + result[c2][i + 1] == 0)
+				candidate[i / IQK_SX_NR] = c1;
+			else
+				bitmap |= BIT(i);
+		} else {
+			bitmap |= BIT(i);
+		}
+	}
+
+	if (bitmap != 0)
+		goto check_sim;
+
+	for (i = 0; i < PATH_NR; i++) {
+		if (candidate[i] == IQK_ROUND_INVALID)
+			continue;
+
+		for (j = i * IQK_SX_NR; j < i * IQK_SX_NR + 2; j++)
+			result[IQK_ROUND_HYBRID][j] = result[candidate[i]][j];
+		ret = false;
+	}
+
+	return ret;
+
+check_sim:
+	for (i = 0; i < IQK_NR; i++) {
+		j = i & ~1;	/* 2 bits are a pair for IQ[X, Y] */
+		if (bitmap & GENMASK(j + 1, j))
+			continue;
+
+		result[IQK_ROUND_HYBRID][i] = result[c1][i];
+	}
+
+	return false;
+}
+
+static
+void rtw8723d_iqk_precfg_path(struct rtw_dev *rtwdev, enum rtw8723d_path path)
+{
+	if (path == PATH_S0) {
+		rtw8723d_iqk_rf_standby(rtwdev, RF_PATH_A);
+		rtw8723d_iqk_path_adda_on(rtwdev);
+	}
+
+	rtw_write32_mask(rtwdev, REG_FPGA0_IQK_11N, BIT_MASK_IQK_MOD, EN_IQK);
+	rtw_write32(rtwdev, REG_TXIQK_11N, 0x01007c00);
+	rtw_write32(rtwdev, REG_RXIQK_11N, 0x01004800);
+
+	if (path == PATH_S1) {
+		rtw8723d_iqk_rf_standby(rtwdev, RF_PATH_B);
+		rtw8723d_iqk_path_adda_on(rtwdev);
+	}
+}
+
+static
+void rtw8723d_iqk_one_round(struct rtw_dev *rtwdev, s32 result[][IQK_NR], u8 t,
+			    const struct iqk_backup_regs *backup)
+{
+	u32 i;
+	u8 s1_ok, s0_ok;
+
+	rtw_dbg(rtwdev, RTW_DBG_RFK,
+		"[IQK] IQ Calibration for 1T1R_S0/S1 for %d times\n", t);
+
+	rtw8723d_iqk_path_adda_on(rtwdev);
+	rtw8723d_iqk_config_mac(rtwdev);
+	rtw_write32_mask(rtwdev, REG_CCK_ANT_SEL_11N, 0x0f000000, 0xf);
+	rtw_write32(rtwdev, REG_BB_RX_PATH_11N, 0x03a05611);
+	rtw_write32(rtwdev, REG_TRMUX_11N, 0x000800e4);
+	rtw_write32(rtwdev, REG_BB_PWR_SAV1_11N, 0x25204200);
+	rtw8723d_iqk_precfg_path(rtwdev, PATH_S1);
+
+	for (i = 0; i < PATH_IQK_RETRY; i++) {
+		s1_ok = rtw8723d_iqk_tx_path(rtwdev, &iqk_tx_cfg[PATH_S1], backup);
+		if (s1_ok == IQK_TX_OK) {
+			rtw_dbg(rtwdev, RTW_DBG_RFK,
+				"[IQK] path S1 Tx IQK Success!!\n");
+			result[t][IQK_S1_TX_X] =
+			  rtw_read32_mask(rtwdev, REG_IQK_RES_TX, BIT_MASK_RES_TX);
+			result[t][IQK_S1_TX_Y] =
+			  rtw_read32_mask(rtwdev, REG_IQK_RES_TY, BIT_MASK_RES_TY);
+			break;
+		}
+
+		rtw_dbg(rtwdev, RTW_DBG_RFK, "[IQK] path S1 Tx IQK Fail!!\n");
+		result[t][IQK_S1_TX_X] = 0x100;
+		result[t][IQK_S1_TX_Y] = 0x0;
+	}
+
+	for (i = 0; i < PATH_IQK_RETRY; i++) {
+		s1_ok = rtw8723d_iqk_rx_path(rtwdev, &iqk_tx_cfg[PATH_S1], backup);
+		if (s1_ok == (IQK_TX_OK | IQK_RX_OK)) {
+			rtw_dbg(rtwdev, RTW_DBG_RFK,
+				"[IQK] path S1 Rx IQK Success!!\n");
+			result[t][IQK_S1_RX_X] =
+			  rtw_read32_mask(rtwdev, REG_IQK_RES_RX, BIT_MASK_RES_RX);
+			result[t][IQK_S1_RX_Y] =
+			  rtw_read32_mask(rtwdev, REG_IQK_RES_RY, BIT_MASK_RES_RY);
+			break;
+		}
+
+		rtw_dbg(rtwdev, RTW_DBG_RFK, "[IQK] path S1 Rx IQK Fail!!\n");
+		result[t][IQK_S1_RX_X] = 0x100;
+		result[t][IQK_S1_RX_Y] = 0x0;
+	}
+
+	if (s1_ok == 0x0)
+		rtw_dbg(rtwdev, RTW_DBG_RFK, "[IQK] path S1 IQK is failed!!\n");
+
+	rtw8723d_iqk_precfg_path(rtwdev, PATH_S0);
+
+	for (i = 0; i < PATH_IQK_RETRY; i++) {
+		s0_ok = rtw8723d_iqk_tx_path(rtwdev, &iqk_tx_cfg[PATH_S0], backup);
+		if (s0_ok == IQK_TX_OK) {
+			rtw_dbg(rtwdev, RTW_DBG_RFK,
+				"[IQK] path S0 Tx IQK Success!!\n");
+			result[t][IQK_S0_TX_X] =
+			  rtw_read32_mask(rtwdev, REG_IQK_RES_TX, BIT_MASK_RES_TX);
+			result[t][IQK_S0_TX_Y] =
+			  rtw_read32_mask(rtwdev, REG_IQK_RES_TY, BIT_MASK_RES_TY);
+			break;
+		}
+
+		rtw_dbg(rtwdev, RTW_DBG_RFK, "[IQK] path S0 Tx IQK Fail!!\n");
+		result[t][IQK_S0_TX_X] = 0x100;
+		result[t][IQK_S0_TX_Y] = 0x0;
+	}
+
+	for (i = 0; i < PATH_IQK_RETRY; i++) {
+		s0_ok = rtw8723d_iqk_rx_path(rtwdev, &iqk_tx_cfg[PATH_S0], backup);
+		if (s0_ok == (IQK_TX_OK | IQK_RX_OK)) {
+			rtw_dbg(rtwdev, RTW_DBG_RFK,
+				"[IQK] path S0 Rx IQK Success!!\n");
+
+			result[t][IQK_S0_RX_X] =
+			  rtw_read32_mask(rtwdev, REG_IQK_RES_RX, BIT_MASK_RES_RX);
+			result[t][IQK_S0_RX_Y] =
+			  rtw_read32_mask(rtwdev, REG_IQK_RES_RY, BIT_MASK_RES_RY);
+			break;
+		}
+
+		rtw_dbg(rtwdev, RTW_DBG_RFK, "[IQK] path S0 Rx IQK Fail!!\n");
+		result[t][IQK_S0_RX_X] = 0x100;
+		result[t][IQK_S0_RX_Y] = 0x0;
+	}
+
+	if (s0_ok == 0x0)
+		rtw_dbg(rtwdev, RTW_DBG_RFK, "[IQK] path S0 IQK is failed!!\n");
+
+	rtw_write32_mask(rtwdev, REG_FPGA0_IQK_11N, BIT_MASK_IQK_MOD, RST_IQK);
+	mdelay(1);
+
+	rtw_dbg(rtwdev, RTW_DBG_RFK,
+		"[IQK] back to BB mode, load original value!\n");
+}
+
+static void rtw8723d_phy_calibration(struct rtw_dev *rtwdev)
+{
+	struct rtw_dm_info *dm_info = &rtwdev->dm_info;
+	s32 result[IQK_ROUND_SIZE][IQK_NR];
+	struct iqk_backup_regs backup;
+	u8 i, j;
+	u8 final_candidate = IQK_ROUND_INVALID;
+	bool good;
+
+	rtw_dbg(rtwdev, RTW_DBG_RFK, "[IQK] Start!!!\n");
+
+	memset(result, 0, sizeof(result));
+
+	rtw8723d_iqk_backup_path_ctrl(rtwdev, &backup);
+	rtw8723d_iqk_backup_lte_path_gnt(rtwdev, &backup);
+	rtw8723d_iqk_backup_regs(rtwdev, &backup);
+
+	for (i = IQK_ROUND_0; i <= IQK_ROUND_2; i++) {
+		rtw8723d_iqk_config_path_ctrl(rtwdev);
+		rtw8723d_iqk_config_lte_path_gnt(rtwdev);
+
+		rtw8723d_iqk_one_round(rtwdev, result, i, &backup);
+
+		if (i > IQK_ROUND_0)
+			rtw8723d_iqk_restore_regs(rtwdev, &backup);
+		rtw8723d_iqk_restore_lte_path_gnt(rtwdev, &backup);
+		rtw8723d_iqk_restore_path_ctrl(rtwdev, &backup);
+
+		for (j = IQK_ROUND_0; j < i; j++) {
+			good = rtw8723d_iqk_similarity_cmp(rtwdev, result, j, i);
+
+			if (good) {
+				final_candidate = j;
+				rtw_dbg(rtwdev, RTW_DBG_RFK,
+					"[IQK] cmp %d:%d final_candidate is %x\n",
+					j, i, final_candidate);
+				goto iqk_done;
+			}
+		}
+	}
+
+	if (final_candidate == IQK_ROUND_INVALID) {
+		s32 reg_tmp = 0;
+
+		for (i = 0; i < IQK_NR; i++)
+			reg_tmp += result[IQK_ROUND_HYBRID][i];
+
+		if (reg_tmp != 0) {
+			final_candidate = IQK_ROUND_HYBRID;
+		} else {
+			WARN(1, "IQK is failed\n");
+			goto out;
+		}
+	}
+
+iqk_done:
+	rtw8723d_iqk_fill_s1_matrix(rtwdev, result[final_candidate]);
+	rtw8723d_iqk_fill_s0_matrix(rtwdev, result[final_candidate]);
+
+	dm_info->iqk.result.s1_x = result[final_candidate][IQK_S1_TX_X];
+	dm_info->iqk.result.s1_y = result[final_candidate][IQK_S1_TX_Y];
+	dm_info->iqk.result.s0_x = result[final_candidate][IQK_S0_TX_X];
+	dm_info->iqk.result.s0_y = result[final_candidate][IQK_S0_TX_Y];
+	dm_info->iqk.done = true;
+
+out:
+	rtw_write32(rtwdev, REG_BB_SEL_BTG, backup.bb_sel_btg);
+
+	rtw_dbg(rtwdev, RTW_DBG_RFK, "[IQK] final_candidate is %x\n",
+		final_candidate);
+
+	for (i = IQK_ROUND_0; i < IQK_ROUND_SIZE; i++)
+		rtw_dbg(rtwdev, RTW_DBG_RFK,
+			"[IQK] Result %u: rege94_s1=%x rege9c_s1=%x regea4_s1=%x regeac_s1=%x rege94_s0=%x rege9c_s0=%x regea4_s0=%x regeac_s0=%x %s\n",
+			i,
+			result[i][0], result[i][1], result[i][2], result[i][3],
+			result[i][4], result[i][5], result[i][6], result[i][7],
+			final_candidate == i ? "(final candidate)" : "");
+
+	rtw_dbg(rtwdev, RTW_DBG_RFK,
+		"[IQK]0xc80 = 0x%x 0xc94 = 0x%x 0xc14 = 0x%x 0xca0 = 0x%x\n",
+		rtw_read32(rtwdev, REG_OFDM_0_XA_TX_IQ_IMBALANCE),
+		rtw_read32(rtwdev, REG_TXIQK_MATRIXA_LSB2_11N),
+		rtw_read32(rtwdev, REG_A_RXIQI),
+		rtw_read32(rtwdev, REG_RXIQK_MATRIX_LSB_11N));
+	rtw_dbg(rtwdev, RTW_DBG_RFK,
+		"[IQK]0xcd0 = 0x%x 0xcd4 = 0x%x 0xcd8 = 0x%x\n",
+		rtw_read32(rtwdev, REG_TXIQ_AB_S0),
+		rtw_read32(rtwdev, REG_TXIQ_CD_S0),
+		rtw_read32(rtwdev, REG_RXIQ_AB_S0));
+
+	rtw_dbg(rtwdev, RTW_DBG_RFK, "[IQK] finished\n");
+}
+
 static struct rtw_chip_ops rtw8723d_ops = {
 	.phy_set_param		= rtw8723d_phy_set_param,
 	.read_efuse		= rtw8723d_read_efuse,
@@ -647,6 +1463,7 @@ static struct rtw_chip_ops rtw8723d_ops = {
 	.cfg_ldo25		= rtw8723d_cfg_ldo25,
 	.efuse_grant		= rtw8723d_efuse_grant,
 	.false_alarm_statistics	= rtw8723d_false_alarm_statistics,
+	.phy_calibration	= rtw8723d_phy_calibration,
 	.config_bfee		= NULL,
 	.set_gid_table		= NULL,
 	.cfg_csi_rate		= NULL,
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.h b/drivers/net/wireless/realtek/rtw88/rtw8723d.h
index 6a7d58992df5..549dfcf7f5da 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8723d.h
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.h
@@ -5,6 +5,34 @@
 #ifndef __RTW8723D_H__
 #define __RTW8723D_H__
 
+enum rtw8723d_path {
+	PATH_S1,
+	PATH_S0,
+	PATH_NR,
+};
+
+enum rtw8723d_iqk_round {
+	IQK_ROUND_0,
+	IQK_ROUND_1,
+	IQK_ROUND_2,
+	IQK_ROUND_HYBRID,
+	IQK_ROUND_SIZE,
+	IQK_ROUND_INVALID = 0xff,
+};
+
+enum rtw8723d_iqk_result {
+	IQK_S1_TX_X,
+	IQK_S1_TX_Y,
+	IQK_S1_RX_X,
+	IQK_S1_RX_Y,
+	IQK_S0_TX_X,
+	IQK_S0_TX_Y,
+	IQK_S0_RX_X,
+	IQK_S0_RX_Y,
+	IQK_NR,
+	IQK_SX_NR = IQK_NR / PATH_NR,
+};
+
 struct rtw8723de_efuse {
 	u8 mac_addr[ETH_ALEN];		/* 0xd0 */
 	u8 vender_id[2];
@@ -66,6 +94,34 @@ struct rtw8723d_efuse {
 #define GET_PHY_STAT_P1_RXSNR_A(phy_stat)                                      \
 	le32_get_bits(*((__le32 *)(phy_stat) + 0x06), GENMASK(7, 0))
 
+static inline s32 iqkxy_to_s32(s32 val)
+{
+	/* val is Q10.8 */
+	return sign_extend32(val, 9);
+}
+
+static inline s32 iqk_mult(s32 x, s32 y, s32 *ext)
+{
+	/* x, y and return value are Q10.8 */
+	s32 t;
+
+	t = x * y;
+	if (ext)
+		*ext = (t >> 7) & 0x1;	/* Q.16 --> Q.9; get LSB of Q.9 */
+
+	return (t >> 8);	/* Q.16 --> Q.8 */
+}
+
+#define MAX_TOLERANCE	5
+#define IQK_TX_X_ERR	0x142
+#define IQK_TX_Y_ERR	0x42
+#define IQK_RX_X_UPPER	0x11a
+#define IQK_RX_X_LOWER	0xe6
+#define IQK_RX_Y_LMT	0x1a
+#define IQK_TX_OK	BIT(0)
+#define IQK_RX_OK	BIT(1)
+#define PATH_IQK_RETRY	2
+
 #define SPUR_THRES		0x16
 #define CCK_DFIR_NR		3
 #define DIS_3WIRE		0xccf000c0
@@ -80,15 +136,20 @@ struct rtw8723d_efuse {
 #define BIT_MASK_RFMOD		BIT(0)
 #define BIT_LCK			BIT(15)
 
+#define REG_BTG_SEL		0x0067
+#define REG_LTECOEX_PATH_CONTROL	0x0070
 #define REG_PSDFN		0x0808
+#define REG_BB_PWR_SAV1_11N	0x0874
 #define REG_ANALOG_P4		0x088c
 #define REG_PSDRPT		0x08b4
 #define REG_FPGA1_RFMOD		0x0900
+#define REG_BB_SEL_BTG		0x0948
 #define REG_BBRX_DFIR		0x0954
 #define BIT_MASK_RXBB_DFIR	GENMASK(27, 24)
 #define BIT_RXBB_DFIR_EN	BIT(19)
 #define REG_CCK0_SYS		0x0a00
 #define BIT_CCK_SIDE_BAND	BIT(4)
+#define REG_CCK_ANT_SEL_11N	0x0a04
 #define REG_CCK_FA_RST_11N	0x0a2c
 #define BIT_MASK_CCK_CNT_KEEP	BIT(12)
 #define BIT_MASK_CCK_CNT_EN	BIT(13)
@@ -103,13 +164,48 @@ struct rtw8723d_efuse {
 #define BIT_MASK_CCK_FA_LSB	GENMASK(15, 8)
 #define REG_OFDM_FA_HOLDC_11N	0x0c00
 #define BIT_MASK_OFDM_FA_KEEP	BIT(31)
+#define REG_BB_RX_PATH_11N	0x0c04
+#define REG_TRMUX_11N		0x0c08
 #define REG_OFDM_FA_RSTC_11N	0x0c0c
 #define BIT_MASK_OFDM_FA_RST	BIT(31)
+#define REG_A_RXIQI		0x0c14
+#define BIT_MASK_RXIQ_S1_X	0x000003FF
+#define BIT_MASK_RXIQ_S1_Y1	0x0000FC00
+#define BIT_SET_RXIQ_S1_Y1(y)	((y) & 0x3F)
 #define REG_OFDM0_RXDSP		0x0c40
 #define BIT_MASK_RXDSP		GENMASK(28, 24)
 #define BIT_EN_RXDSP		BIT(9)
+#define REG_OFDM_0_ECCA_THRESHOLD	0x0c4c
+#define BIT_MASK_OFDM0_EXT_A	BIT(31)
+#define BIT_MASK_OFDM0_EXT_C	BIT(29)
+#define BIT_MASK_OFDM0_EXTS	(BIT(31) | BIT(29) | BIT(28))
+#define BIT_SET_OFDM0_EXTS(a, c, d) (((a) << 31) | ((c) << 29) | ((d) << 28))
 #define REG_OFDM0_XAAGC1	0x0c50
 #define REG_OFDM0_XBAGC1	0x0c58
+#define REG_OFDM_0_XA_TX_IQ_IMBALANCE	0x0c80
+#define BIT_MASK_TXIQ_ELM_A	0x03ff
+#define BIT_SET_TXIQ_ELM_ACD(a, c, d) (((d) << 22) | (((c) & 0x3F) << 16) |    \
+				       ((a) & 0x03ff))
+#define BIT_MASK_TXIQ_ELM_C	GENMASK(21, 16)
+#define BIT_SET_TXIQ_ELM_C2(c)	((c) & 0x3F)
+#define BIT_MASK_TXIQ_ELM_D	GENMASK(31, 22)
+#define REG_TXIQK_MATRIXA_LSB2_11N	0x0c94
+#define BIT_SET_TXIQ_ELM_C1(c)	(((c) & 0x000003C0) >> 6)
+#define REG_RXIQK_MATRIX_LSB_11N	0x0ca0
+#define BIT_MASK_RXIQ_S1_Y2	0xF0000000
+#define BIT_SET_RXIQ_S1_Y2(y)	(((y) >> 6) & 0xF)
+#define REG_TXIQ_AB_S0		0x0cd0
+#define BIT_MASK_TXIQ_A_S0	0x000007FE
+#define BIT_MASK_TXIQ_A_EXT_S0	BIT(0)
+#define BIT_MASK_TXIQ_B_S0	0x0007E000
+#define REG_TXIQ_CD_S0		0x0cd4
+#define BIT_MASK_TXIQ_C_S0	0x000007FE
+#define BIT_MASK_TXIQ_C_EXT_S0	BIT(0)
+#define BIT_MASK_TXIQ_D_S0	GENMASK(22, 13)
+#define BIT_MASK_TXIQ_D_EXT_S0	BIT(12)
+#define REG_RXIQ_AB_S0		0x0cd8
+#define BIT_MASK_RXIQ_X_S0	0x000003FF
+#define BIT_MASK_RXIQ_Y_S0	0x003FF000
 #define REG_OFDM_FA_TYPE1_11N	0x0cf0
 #define BIT_MASK_OFDM_FF_CNT	GENMASK(15, 0)
 #define BIT_MASK_OFDM_SF_CNT	GENMASK(31, 16)
@@ -132,6 +228,32 @@ struct rtw8723d_efuse {
 #define BIT_MASK_OFDM_CRC_CNT	GENMASK(31, 16)
 #define REG_OFDM_FA_TYPE4_11N	0x0da8
 #define BIT_MASK_OFDM_MNS_CNT	GENMASK(15, 0)
+#define REG_FPGA0_IQK_11N	0x0e28
+#define BIT_MASK_IQK_MOD	0xffffff00
+#define EN_IQK			0x808000
+#define RST_IQK			0x000000
+#define REG_TXIQK_TONE_A_11N	0x0e30
+#define REG_RXIQK_TONE_A_11N	0x0e34
+#define REG_TXIQK_PI_A_11N	0x0e38
+#define REG_RXIQK_PI_A_11N	0x0e3c
+#define REG_TXIQK_11N		0x0e40
+#define BIT_SET_TXIQK_11N(x, y)	(0x80007C00 | ((x) << 16) | (y))
+#define REG_RXIQK_11N		0x0e44
+#define REG_IQK_AGC_PTS_11N	0x0e48
+#define REG_IQK_AGC_RSP_11N	0x0e4c
+#define REG_TX_IQK_TONE_B	0x0e50
+#define REG_RX_IQK_TONE_B	0x0e54
+#define REG_IQK_RES_TX		0x0e94
+#define BIT_MASK_RES_TX		GENMASK(25, 16)
+#define REG_IQK_RES_TY		0x0e9c
+#define BIT_MASK_RES_TY		GENMASK(25, 16)
+#define REG_IQK_RES_RX		0x0ea4
+#define BIT_MASK_RES_RX		GENMASK(25, 16)
+#define REG_IQK_RES_RY		0x0eac
+#define BIT_IQK_TX_FAIL		BIT(28)
+#define BIT_IQK_RX_FAIL		BIT(27)
+#define BIT_IQK_DONE		BIT(26)
+#define BIT_MASK_RES_RY		GENMASK(25, 16)
 #define REG_PAGE_F_RST_11N		0x0f14
 #define BIT_MASK_F_RST_ALL		BIT(16)
 #define REG_IGI_C_11N			0x0f84
-- 
cgit v1.2.3-59-g8ed1b


From 608d2a08f842d8f1ca877ced7bf092f084717553 Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Tue, 12 May 2020 18:26:15 +0800
Subject: rtw88: 8723d: Add power tracking

When chip's temperature is changed, RF characters are changed. To keep the
characters to be consistent, 8723d uses thermal meter to assist in
calibrating LCK, IQK, crystal and TX power.

A base thermal value is programmed in efuse, all calibration data in
MP process is based on this thermal value. So we calucate the delta of
thermal value between the base value, and use this delta to reference XTAL
and TX power offset tables to know how much we need to adjust.

For IQK and LCK, driver checks if delta of thermal value is over 8, then
they are triggered.

For crystal adjustment, when delta of thermal value is changed, we check
XTAL tables to get offset of XTAL value. If thermal value is larger than
base value, positive table (_p as suffix) is used. Otherwise, we use
negative table (_n as suffix). Then, we add offset to XTAL default value
programmed in efuse, and write sum value to register.

To compensate TX power, there are two hierarchical tables. First level use
delta of thermal value to access eight tables to yield delta of TX power
index. Then, plus base TX power index to get index of BB swing table
(second level tables) where register value is induced.

BB swing table can't deal with all cases, if index of BB swing table is
over the size of the table. In this case, TX AGC is used to compensate the
remnant part. Assume 'upper' is the upper bound of BB swing table, and
'target' is the desired index. Then, we can illustrate them as

  compensation method    BB swing        TX AGC
  -------------------    --------    --------------
  target > upper         upper       target - upper
  target < 0             0           target
  otherwise              target      0

For debug purpose, add a column 'rem' to tx_pwr_tbl entry, and it looks
like

  path rate       pwr       base      (byr  lmt ) rem
    A  CCK_1M     32(0x20)   34   -2 (   0   -2)    0

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200512102621.5148-4-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/debug.c    |   9 +-
 drivers/net/wireless/realtek/rtw88/main.h     |   4 +
 drivers/net/wireless/realtek/rtw88/phy.c      |   6 +-
 drivers/net/wireless/realtek/rtw88/phy.h      |   1 +
 drivers/net/wireless/realtek/rtw88/rtw8723d.c | 400 ++++++++++++++++++++++++++
 drivers/net/wireless/realtek/rtw88/rtw8723d.h |   7 +
 6 files changed, 422 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtw88/debug.c b/drivers/net/wireless/realtek/rtw88/debug.c
index b4964306de61..09f04feb8fe1 100644
--- a/drivers/net/wireless/realtek/rtw88/debug.c
+++ b/drivers/net/wireless/realtek/rtw88/debug.c
@@ -531,8 +531,8 @@ static int rtw_debugfs_get_tx_pwr_tbl(struct seq_file *m, void *v)
 	u8 ch = hal->current_channel;
 	u8 regd = rtwdev->regd.txpwr_regd;
 
-	seq_printf(m, "%-4s %-10s %-3s%6s %-4s %4s (%-4s %-4s)\n",
-		   "path", "rate", "pwr", "", "base", "", "byr", "lmt");
+	seq_printf(m, "%-4s %-10s %-3s%6s %-4s %4s (%-4s %-4s) %-4s\n",
+		   "path", "rate", "pwr", "", "base", "", "byr", "lmt", "rem");
 
 	mutex_lock(&hal->tx_power_mutex);
 	for (path = RF_PATH_A; path <= RF_PATH_B; path++) {
@@ -554,13 +554,14 @@ static int rtw_debugfs_get_tx_pwr_tbl(struct seq_file *m, void *v)
 
 			seq_printf(m, "%4c ", path + 'A');
 			rtw_print_rate(m, rate);
-			seq_printf(m, " %3u(0x%02x) %4u %4d (%4d %4d)\n",
+			seq_printf(m, " %3u(0x%02x) %4u %4d (%4d %4d) %4d\n",
 				   hal->tx_pwr_tbl[path][rate],
 				   hal->tx_pwr_tbl[path][rate],
 				   pwr_param.pwr_base,
 				   min_t(s8, pwr_param.pwr_offset,
 					 pwr_param.pwr_limit),
-				   pwr_param.pwr_offset, pwr_param.pwr_limit);
+				   pwr_param.pwr_offset, pwr_param.pwr_limit,
+				   pwr_param.pwr_remnant);
 		}
 	}
 
diff --git a/drivers/net/wireless/realtek/rtw88/main.h b/drivers/net/wireless/realtek/rtw88/main.h
index af8c50e3687d..bfdc27c187f6 100644
--- a/drivers/net/wireless/realtek/rtw88/main.h
+++ b/drivers/net/wireless/realtek/rtw88/main.h
@@ -1056,6 +1056,8 @@ struct rtw_pwr_track_tbl {
 	const u8 *pwrtrk_2g_cckb_p;
 	const u8 *pwrtrk_2g_ccka_n;
 	const u8 *pwrtrk_2g_ccka_p;
+	const s8 *pwrtrk_xtal_n;
+	const s8 *pwrtrk_xtal_p;
 };
 
 enum rtw_wlan_cpu {
@@ -1447,6 +1449,8 @@ struct rtw_dm_info {
 	bool pwr_trk_triggered;
 	bool pwr_trk_init_trigger;
 	struct ewma_thermal avg_thermal[RTW_RF_PATH_MAX];
+	s8 txagc_remnant_cck;
+	s8 txagc_remnant_ofdm;
 
 	/* backup dack results for each path and I/Q */
 	u32 dack_adck[RTW_RF_PATH_MAX];
diff --git a/drivers/net/wireless/realtek/rtw88/phy.c b/drivers/net/wireless/realtek/rtw88/phy.c
index 837b7bd04126..45181f602e3f 100644
--- a/drivers/net/wireless/realtek/rtw88/phy.c
+++ b/drivers/net/wireless/realtek/rtw88/phy.c
@@ -1785,11 +1785,13 @@ void rtw_get_tx_power_params(struct rtw_dev *rtwdev, u8 path, u8 rate, u8 bw,
 			     u8 ch, u8 regd, struct rtw_power_params *pwr_param)
 {
 	struct rtw_hal *hal = &rtwdev->hal;
+	struct rtw_dm_info *dm_info = &rtwdev->dm_info;
 	struct rtw_txpwr_idx *pwr_idx;
 	u8 group, band;
 	u8 *base = &pwr_param->pwr_base;
 	s8 *offset = &pwr_param->pwr_offset;
 	s8 *limit = &pwr_param->pwr_limit;
+	s8 *remnant = &pwr_param->pwr_remnant;
 
 	pwr_idx = &rtwdev->efuse.txpwr_idx_table[path];
 	group = rtw_get_channel_group(ch);
@@ -1811,6 +1813,8 @@ void rtw_get_tx_power_params(struct rtw_dev *rtwdev, u8 path, u8 rate, u8 bw,
 
 	*limit = rtw_phy_get_tx_power_limit(rtwdev, band, bw, path,
 					    rate, ch, regd);
+	*remnant = (rate <= DESC_RATE11M ? dm_info->txagc_remnant_cck :
+		    dm_info->txagc_remnant_ofdm);
 }
 
 u8
@@ -1830,7 +1834,7 @@ rtw_phy_get_tx_power_index(struct rtw_dev *rtwdev, u8 rf_path, u8 rate,
 	if (rtwdev->chip->en_dis_dpd)
 		offset += rtw_phy_get_dis_dpd_by_rate_diff(rtwdev, rate);
 
-	tx_power += offset;
+	tx_power += offset + pwr_param.pwr_remnant;
 
 	if (tx_power > rtwdev->chip->max_power_index)
 		tx_power = rtwdev->chip->max_power_index;
diff --git a/drivers/net/wireless/realtek/rtw88/phy.h b/drivers/net/wireless/realtek/rtw88/phy.h
index 413bf7165cc0..b924ed07630a 100644
--- a/drivers/net/wireless/realtek/rtw88/phy.h
+++ b/drivers/net/wireless/realtek/rtw88/phy.h
@@ -141,6 +141,7 @@ struct rtw_power_params {
 	u8 pwr_base;
 	s8 pwr_offset;
 	s8 pwr_limit;
+	s8 pwr_remnant;
 };
 
 void
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.c b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
index 0cfa493e7742..4fc2d0bb2704 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8723d.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
@@ -92,6 +92,46 @@ static void rtw8723d_lck(struct rtw_dev *rtwdev)
 		rtw_write8(rtwdev, REG_TXPAUSE, 0x00);
 }
 
+static const u32 rtw8723d_ofdm_swing_table[] = {
+	0x0b40002d, 0x0c000030, 0x0cc00033, 0x0d800036, 0x0e400039, 0x0f00003c,
+	0x10000040, 0x11000044, 0x12000048, 0x1300004c, 0x14400051, 0x15800056,
+	0x16c0005b, 0x18000060, 0x19800066, 0x1b00006c, 0x1c800072, 0x1e400079,
+	0x20000080, 0x22000088, 0x24000090, 0x26000098, 0x288000a2, 0x2ac000ab,
+	0x2d4000b5, 0x300000c0, 0x32c000cb, 0x35c000d7, 0x390000e4, 0x3c8000f2,
+	0x40000100, 0x43c0010f, 0x47c0011f, 0x4c000130, 0x50800142, 0x55400155,
+	0x5a400169, 0x5fc0017f, 0x65400195, 0x6b8001ae, 0x71c001c7, 0x788001e2,
+	0x7f8001fe,
+};
+
+static const u32 rtw8723d_cck_swing_table[] = {
+	0x0CD, 0x0D9, 0x0E6, 0x0F3, 0x102, 0x111, 0x121, 0x132, 0x144, 0x158,
+	0x16C, 0x182, 0x198, 0x1B1, 0x1CA, 0x1E5, 0x202, 0x221, 0x241, 0x263,
+	0x287, 0x2AE, 0x2D6, 0x301, 0x32F, 0x35F, 0x392, 0x3C9, 0x402, 0x43F,
+	0x47F, 0x4C3, 0x50C, 0x558, 0x5A9, 0x5FF, 0x65A, 0x6BA, 0x720, 0x78C,
+	0x7FF,
+};
+
+#define RTW_OFDM_SWING_TABLE_SIZE	ARRAY_SIZE(rtw8723d_ofdm_swing_table)
+#define RTW_CCK_SWING_TABLE_SIZE	ARRAY_SIZE(rtw8723d_cck_swing_table)
+
+static void rtw8723d_pwrtrack_init(struct rtw_dev *rtwdev)
+{
+	struct rtw_dm_info *dm_info = &rtwdev->dm_info;
+	u8 path;
+
+	dm_info->default_ofdm_index = RTW_DEF_OFDM_SWING_INDEX;
+
+	for (path = RF_PATH_A; path < rtwdev->hal.rf_path_num; path++) {
+		ewma_thermal_init(&dm_info->avg_thermal[path]);
+		dm_info->delta_power_index[path] = 0;
+	}
+	dm_info->pwr_trk_triggered = false;
+	dm_info->pwr_trk_init_trigger = true;
+	dm_info->thermal_meter_k = rtwdev->efuse.thermal_meter_k;
+	dm_info->txagc_remnant_cck = 0;
+	dm_info->txagc_remnant_ofdm = 0;
+}
+
 static void rtw8723d_phy_set_param(struct rtw_dev *rtwdev)
 {
 	u8 xtal_cap;
@@ -158,6 +198,8 @@ static void rtw8723d_phy_set_param(struct rtw_dev *rtwdev)
 
 	rtw_write32_mask(rtwdev, REG_OFDM0_XAAGC1, MASKBYTE0, 0x50);
 	rtw_write32_mask(rtwdev, REG_OFDM0_XAAGC1, MASKBYTE0, 0x20);
+
+	rtw8723d_pwrtrack_init(rtwdev);
 }
 
 static void rtw8723de_efuse_parsing(struct rtw_efuse *efuse,
@@ -1450,6 +1492,298 @@ out:
 	rtw_dbg(rtwdev, RTW_DBG_RFK, "[IQK] finished\n");
 }
 
+static u8 rtw8723d_pwrtrack_get_limit_ofdm(struct rtw_dev *rtwdev)
+{
+	struct rtw_dm_info *dm_info = &rtwdev->dm_info;
+	u8 tx_rate = dm_info->tx_rate;
+	u8 limit_ofdm = 30;
+
+	switch (tx_rate) {
+	case DESC_RATE1M...DESC_RATE5_5M:
+	case DESC_RATE11M:
+		break;
+	case DESC_RATE6M...DESC_RATE48M:
+		limit_ofdm = 36;
+		break;
+	case DESC_RATE54M:
+		limit_ofdm = 34;
+		break;
+	case DESC_RATEMCS0...DESC_RATEMCS2:
+		limit_ofdm = 38;
+		break;
+	case DESC_RATEMCS3...DESC_RATEMCS4:
+		limit_ofdm = 36;
+		break;
+	case DESC_RATEMCS5...DESC_RATEMCS7:
+		limit_ofdm = 34;
+		break;
+	default:
+		rtw_warn(rtwdev, "pwrtrack unhandled tx_rate 0x%x\n", tx_rate);
+		break;
+	}
+
+	return limit_ofdm;
+}
+
+static void rtw8723d_set_iqk_matrix_by_result(struct rtw_dev *rtwdev,
+					      u32 ofdm_swing, u8 rf_path)
+{
+	struct rtw_dm_info *dm_info = &rtwdev->dm_info;
+	s32 ele_A, ele_D, ele_C;
+	s32 ele_A_ext, ele_C_ext, ele_D_ext;
+	s32 iqk_result_x;
+	s32 iqk_result_y;
+	s32 value32;
+
+	switch (rf_path) {
+	default:
+	case RF_PATH_A:
+		iqk_result_x = dm_info->iqk.result.s1_x;
+		iqk_result_y = dm_info->iqk.result.s1_y;
+		break;
+	case RF_PATH_B:
+		iqk_result_x = dm_info->iqk.result.s0_x;
+		iqk_result_y = dm_info->iqk.result.s0_y;
+		break;
+	}
+
+	/* new element D */
+	ele_D = OFDM_SWING_D(ofdm_swing);
+	iqk_mult(iqk_result_x, ele_D, &ele_D_ext);
+	/* new element A */
+	iqk_result_x = iqkxy_to_s32(iqk_result_x);
+	ele_A = iqk_mult(iqk_result_x, ele_D, &ele_A_ext);
+	/* new element C */
+	iqk_result_y = iqkxy_to_s32(iqk_result_y);
+	ele_C = iqk_mult(iqk_result_y, ele_D, &ele_C_ext);
+
+	switch (rf_path) {
+	case RF_PATH_A:
+	default:
+		/* write new elements A, C, D, and element B is always 0 */
+		value32 = BIT_SET_TXIQ_ELM_ACD(ele_A, ele_C, ele_D);
+		rtw_write32(rtwdev, REG_OFDM_0_XA_TX_IQ_IMBALANCE, value32);
+		value32 = BIT_SET_TXIQ_ELM_C1(ele_C);
+		rtw_write32_mask(rtwdev, REG_TXIQK_MATRIXA_LSB2_11N, MASKH4BITS,
+				 value32);
+		value32 = rtw_read32(rtwdev, REG_OFDM_0_ECCA_THRESHOLD);
+		value32 &= ~BIT_MASK_OFDM0_EXTS;
+		value32 |= BIT_SET_OFDM0_EXTS(ele_A_ext, ele_C_ext, ele_D_ext);
+		rtw_write32(rtwdev, REG_OFDM_0_ECCA_THRESHOLD, value32);
+		break;
+
+	case RF_PATH_B:
+		/* write new elements A, C, D, and element B is always 0 */
+		rtw_write32_mask(rtwdev, REG_TXIQ_CD_S0, BIT_MASK_TXIQ_D_S0, ele_D);
+		rtw_write32_mask(rtwdev, REG_TXIQ_CD_S0, BIT_MASK_TXIQ_C_S0, ele_C);
+		rtw_write32_mask(rtwdev, REG_TXIQ_AB_S0, BIT_MASK_TXIQ_A_S0, ele_A);
+
+		rtw_write32_mask(rtwdev, REG_TXIQ_CD_S0, BIT_MASK_TXIQ_D_EXT_S0,
+				 ele_D_ext);
+		rtw_write32_mask(rtwdev, REG_TXIQ_AB_S0, BIT_MASK_TXIQ_A_EXT_S0,
+				 ele_A_ext);
+		rtw_write32_mask(rtwdev, REG_TXIQ_CD_S0, BIT_MASK_TXIQ_C_EXT_S0,
+				 ele_C_ext);
+		break;
+	}
+}
+
+static void rtw8723d_set_iqk_matrix(struct rtw_dev *rtwdev, s8 ofdm_index,
+				    u8 rf_path)
+{
+	struct rtw_dm_info *dm_info = &rtwdev->dm_info;
+	s32 value32;
+	u32 ofdm_swing;
+
+	if (ofdm_index >= RTW_OFDM_SWING_TABLE_SIZE)
+		ofdm_index = RTW_OFDM_SWING_TABLE_SIZE - 1;
+	else if (ofdm_index < 0)
+		ofdm_index = 0;
+
+	ofdm_swing = rtw8723d_ofdm_swing_table[ofdm_index];
+
+	if (dm_info->iqk.done) {
+		rtw8723d_set_iqk_matrix_by_result(rtwdev, ofdm_swing, rf_path);
+		return;
+	}
+
+	switch (rf_path) {
+	case RF_PATH_A:
+	default:
+		rtw_write32(rtwdev, REG_OFDM_0_XA_TX_IQ_IMBALANCE, ofdm_swing);
+		rtw_write32_mask(rtwdev, REG_TXIQK_MATRIXA_LSB2_11N, MASKH4BITS,
+				 0x00);
+		value32 = rtw_read32(rtwdev, REG_OFDM_0_ECCA_THRESHOLD);
+		value32 &= ~BIT_MASK_OFDM0_EXTS;
+		rtw_write32(rtwdev, REG_OFDM_0_ECCA_THRESHOLD, value32);
+		break;
+
+	case RF_PATH_B:
+		/* image S1:c80 to S0:Cd0 and Cd4 */
+		rtw_write32_mask(rtwdev, REG_TXIQ_AB_S0, BIT_MASK_TXIQ_A_S0,
+				 OFDM_SWING_A(ofdm_swing));
+		rtw_write32_mask(rtwdev, REG_TXIQ_AB_S0, BIT_MASK_TXIQ_B_S0,
+				 OFDM_SWING_B(ofdm_swing));
+		rtw_write32_mask(rtwdev, REG_TXIQ_CD_S0, BIT_MASK_TXIQ_C_S0,
+				 OFDM_SWING_C(ofdm_swing));
+		rtw_write32_mask(rtwdev, REG_TXIQ_CD_S0, BIT_MASK_TXIQ_D_S0,
+				 OFDM_SWING_D(ofdm_swing));
+		rtw_write32_mask(rtwdev, REG_TXIQ_CD_S0, BIT_MASK_TXIQ_D_EXT_S0, 0x0);
+		rtw_write32_mask(rtwdev, REG_TXIQ_CD_S0, BIT_MASK_TXIQ_C_EXT_S0, 0x0);
+		rtw_write32_mask(rtwdev, REG_TXIQ_AB_S0, BIT_MASK_TXIQ_A_EXT_S0, 0x0);
+		break;
+	}
+}
+
+static void rtw8723d_pwrtrack_set_ofdm_pwr(struct rtw_dev *rtwdev, s8 swing_idx,
+					   s8 txagc_idx)
+{
+	struct rtw_dm_info *dm_info = &rtwdev->dm_info;
+
+	dm_info->txagc_remnant_ofdm = txagc_idx;
+
+	rtw8723d_set_iqk_matrix(rtwdev, swing_idx, RF_PATH_A);
+	rtw8723d_set_iqk_matrix(rtwdev, swing_idx, RF_PATH_B);
+}
+
+static void rtw8723d_pwrtrack_set_cck_pwr(struct rtw_dev *rtwdev, s8 swing_idx,
+					  s8 txagc_idx)
+{
+	struct rtw_dm_info *dm_info = &rtwdev->dm_info;
+
+	dm_info->txagc_remnant_cck = txagc_idx;
+
+	rtw_write32_mask(rtwdev, 0xab4, 0x000007FF,
+			 rtw8723d_cck_swing_table[swing_idx]);
+}
+
+static void rtw8723d_pwrtrack_set(struct rtw_dev *rtwdev, u8 path)
+{
+	struct rtw_dm_info *dm_info = &rtwdev->dm_info;
+	struct rtw_hal *hal = &rtwdev->hal;
+	u8 limit_ofdm;
+	u8 limit_cck = 40;
+	s8 final_ofdm_swing_index;
+	s8 final_cck_swing_index;
+
+	limit_ofdm = rtw8723d_pwrtrack_get_limit_ofdm(rtwdev);
+
+	final_ofdm_swing_index = RTW_DEF_OFDM_SWING_INDEX +
+				 dm_info->delta_power_index[path];
+	final_cck_swing_index = RTW_DEF_CCK_SWING_INDEX +
+				dm_info->delta_power_index[path];
+
+	if (final_ofdm_swing_index > limit_ofdm)
+		rtw8723d_pwrtrack_set_ofdm_pwr(rtwdev, limit_ofdm,
+					       final_ofdm_swing_index - limit_ofdm);
+	else if (final_ofdm_swing_index < 0)
+		rtw8723d_pwrtrack_set_ofdm_pwr(rtwdev, 0,
+					       final_ofdm_swing_index);
+	else
+		rtw8723d_pwrtrack_set_ofdm_pwr(rtwdev, final_ofdm_swing_index, 0);
+
+	if (final_cck_swing_index > limit_cck)
+		rtw8723d_pwrtrack_set_cck_pwr(rtwdev, limit_cck,
+					      final_cck_swing_index - limit_cck);
+	else if (final_cck_swing_index < 0)
+		rtw8723d_pwrtrack_set_cck_pwr(rtwdev, 0,
+					      final_cck_swing_index);
+	else
+		rtw8723d_pwrtrack_set_cck_pwr(rtwdev, final_cck_swing_index, 0);
+
+	rtw_phy_set_tx_power_level(rtwdev, hal->current_channel);
+}
+
+static void rtw8723d_pwrtrack_set_xtal(struct rtw_dev *rtwdev, u8 therm_path,
+				       u8 delta)
+{
+	struct rtw_dm_info *dm_info = &rtwdev->dm_info;
+	const struct rtw_pwr_track_tbl *tbl = rtwdev->chip->pwr_track_tbl;
+	const s8 *pwrtrk_xtal;
+	s8 xtal_cap;
+
+	if (dm_info->thermal_avg[therm_path] >
+	    rtwdev->efuse.thermal_meter[therm_path])
+		pwrtrk_xtal = tbl->pwrtrk_xtal_p;
+	else
+		pwrtrk_xtal = tbl->pwrtrk_xtal_n;
+
+	xtal_cap = rtwdev->efuse.crystal_cap & 0x3F;
+	xtal_cap = clamp_t(s8, xtal_cap + pwrtrk_xtal[delta], 0, 0x3F);
+	rtw_write32_mask(rtwdev, REG_AFE_CTRL3, BIT_MASK_XTAL,
+			 xtal_cap | (xtal_cap << 6));
+}
+
+static void rtw8723d_phy_pwrtrack(struct rtw_dev *rtwdev)
+{
+	struct rtw_dm_info *dm_info = &rtwdev->dm_info;
+	struct rtw_swing_table swing_table;
+	u8 thermal_value, delta, path;
+	bool do_iqk = false;
+
+	rtw_phy_config_swing_table(rtwdev, &swing_table);
+
+	if (rtwdev->efuse.thermal_meter[0] == 0xff)
+		return;
+
+	thermal_value = rtw_read_rf(rtwdev, RF_PATH_A, RF_T_METER, 0xfc00);
+
+	rtw_phy_pwrtrack_avg(rtwdev, thermal_value, RF_PATH_A);
+
+	do_iqk = rtw_phy_pwrtrack_need_iqk(rtwdev);
+
+	if (do_iqk)
+		rtw8723d_lck(rtwdev);
+
+	if (dm_info->pwr_trk_init_trigger)
+		dm_info->pwr_trk_init_trigger = false;
+	else if (!rtw_phy_pwrtrack_thermal_changed(rtwdev, thermal_value,
+						   RF_PATH_A))
+		goto iqk;
+
+	delta = rtw_phy_pwrtrack_get_delta(rtwdev, RF_PATH_A);
+
+	delta = min_t(u8, delta, RTW_PWR_TRK_TBL_SZ - 1);
+
+	for (path = 0; path < rtwdev->hal.rf_path_num; path++) {
+		s8 delta_cur, delta_last;
+
+		delta_last = dm_info->delta_power_index[path];
+		delta_cur = rtw_phy_pwrtrack_get_pwridx(rtwdev, &swing_table,
+							path, RF_PATH_A, delta);
+		if (delta_last == delta_cur)
+			continue;
+
+		dm_info->delta_power_index[path] = delta_cur;
+		rtw8723d_pwrtrack_set(rtwdev, path);
+	}
+
+	rtw8723d_pwrtrack_set_xtal(rtwdev, RF_PATH_A, delta);
+
+iqk:
+	if (do_iqk)
+		rtw8723d_phy_calibration(rtwdev);
+}
+
+void rtw8723d_pwr_track(struct rtw_dev *rtwdev)
+{
+	struct rtw_efuse *efuse = &rtwdev->efuse;
+	struct rtw_dm_info *dm_info = &rtwdev->dm_info;
+
+	if (efuse->power_track_type != 0)
+		return;
+
+	if (!dm_info->pwr_trk_triggered) {
+		rtw_write_rf(rtwdev, RF_PATH_A, RF_T_METER,
+			     GENMASK(17, 16), 0x03);
+		dm_info->pwr_trk_triggered = true;
+		return;
+	}
+
+	rtw8723d_phy_pwrtrack(rtwdev);
+	dm_info->pwr_trk_triggered = false;
+}
+
 static struct rtw_chip_ops rtw8723d_ops = {
 	.phy_set_param		= rtw8723d_phy_set_param,
 	.read_efuse		= rtw8723d_read_efuse,
@@ -1464,6 +1798,7 @@ static struct rtw_chip_ops rtw8723d_ops = {
 	.efuse_grant		= rtw8723d_efuse_grant,
 	.false_alarm_statistics	= rtw8723d_false_alarm_statistics,
 	.phy_calibration	= rtw8723d_phy_calibration,
+	.pwr_track		= rtw8723d_pwr_track,
 	.config_bfee		= NULL,
 	.set_gid_table		= NULL,
 	.cfg_csi_rate		= NULL,
@@ -1937,6 +2272,69 @@ static const struct rtw_rfe_def rtw8723d_rfe_defs[] = {
 		.txpwr_lmt_tbl	= &rtw8723d_txpwr_lmt_tbl,},
 };
 
+static const u8 rtw8723d_pwrtrk_2gb_n[] = {
+	0, 0, 1, 1, 1, 2, 2, 3, 4, 4, 4, 4, 5, 5, 5,
+	6, 6, 7, 7, 8, 8, 8, 9, 9, 9, 10, 10, 10, 10, 10
+};
+
+static const u8 rtw8723d_pwrtrk_2gb_p[] = {
+	0, 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7,
+	7, 8, 8, 8, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10
+};
+
+static const u8 rtw8723d_pwrtrk_2ga_n[] = {
+	0, 0, 1, 1, 1, 2, 2, 3, 4, 4, 4, 4, 5, 5, 5,
+	6, 6, 7, 7, 8, 8, 8, 9, 9, 9, 10, 10, 10, 10, 10
+};
+
+static const u8 rtw8723d_pwrtrk_2ga_p[] = {
+	0, 0, 1, 1, 2, 2, 2, 3, 3, 4, 4, 5, 5, 6, 7,
+	7, 8, 8, 8, 9, 9, 10, 10, 10, 10, 10, 10, 10, 10, 10
+};
+
+static const u8 rtw8723d_pwrtrk_2g_cck_b_n[] = {
+	0, 1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
+	6, 7, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 11, 11, 11
+};
+
+static const u8 rtw8723d_pwrtrk_2g_cck_b_p[] = {
+	0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7,
+	7, 8, 9, 9, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11
+};
+
+static const u8 rtw8723d_pwrtrk_2g_cck_a_n[] = {
+	0, 1, 1, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6,
+	6, 7, 7, 7, 8, 8, 9, 9, 10, 10, 11, 11, 11, 11, 11
+};
+
+static const u8 rtw8723d_pwrtrk_2g_cck_a_p[] = {
+	0, 0, 1, 1, 2, 2, 3, 3, 4, 4, 5, 5, 6, 6, 7,
+	7, 8, 9, 9, 10, 10, 11, 11, 11, 11, 11, 11, 11, 11, 11
+};
+
+static const s8 rtw8723d_pwrtrk_xtal_n[] = {
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0
+};
+
+static const s8 rtw8723d_pwrtrk_xtal_p[] = {
+	0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+	0, -10, -12, -14, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16, -16
+};
+
+static const struct rtw_pwr_track_tbl rtw8723d_rtw_pwr_track_tbl = {
+	.pwrtrk_2gb_n = rtw8723d_pwrtrk_2gb_n,
+	.pwrtrk_2gb_p = rtw8723d_pwrtrk_2gb_p,
+	.pwrtrk_2ga_n = rtw8723d_pwrtrk_2ga_n,
+	.pwrtrk_2ga_p = rtw8723d_pwrtrk_2ga_p,
+	.pwrtrk_2g_cckb_n = rtw8723d_pwrtrk_2g_cck_b_n,
+	.pwrtrk_2g_cckb_p = rtw8723d_pwrtrk_2g_cck_b_p,
+	.pwrtrk_2g_ccka_n = rtw8723d_pwrtrk_2g_cck_a_n,
+	.pwrtrk_2g_ccka_p = rtw8723d_pwrtrk_2g_cck_a_p,
+	.pwrtrk_xtal_p = rtw8723d_pwrtrk_xtal_p,
+	.pwrtrk_xtal_n = rtw8723d_pwrtrk_xtal_n,
+};
+
 struct rtw_chip_info rtw8723d_hw_spec = {
 	.ops = &rtw8723d_ops,
 	.id = RTW_CHIP_TYPE_8723D,
@@ -1979,6 +2377,8 @@ struct rtw_chip_info rtw8723d_hw_spec = {
 	.rfe_defs = rtw8723d_rfe_defs,
 	.rfe_defs_size = ARRAY_SIZE(rtw8723d_rfe_defs),
 	.rx_ldpc = false,
+	.pwr_track_tbl = &rtw8723d_rtw_pwr_track_tbl,
+	.iqk_threshold = 8,
 };
 EXPORT_SYMBOL(rtw8723d_hw_spec);
 
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.h b/drivers/net/wireless/realtek/rtw88/rtw8723d.h
index 549dfcf7f5da..843472a1cd54 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8723d.h
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.h
@@ -112,6 +112,13 @@ static inline s32 iqk_mult(s32 x, s32 y, s32 *ext)
 	return (t >> 8);	/* Q.16 --> Q.8 */
 }
 
+#define OFDM_SWING_A(swing)		FIELD_GET(GENMASK(9, 0), swing)
+#define OFDM_SWING_B(swing)		FIELD_GET(GENMASK(15, 10), swing)
+#define OFDM_SWING_C(swing)		FIELD_GET(GENMASK(21, 16), swing)
+#define OFDM_SWING_D(swing)		FIELD_GET(GENMASK(31, 22), swing)
+#define RTW_DEF_OFDM_SWING_INDEX	28
+#define RTW_DEF_CCK_SWING_INDEX		28
+
 #define MAX_TOLERANCE	5
 #define IQK_TX_X_ERR	0x142
 #define IQK_TX_Y_ERR	0x42
-- 
cgit v1.2.3-59-g8ed1b


From 05202746ed70ea9601ab78657025adcdf945293c Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Tue, 12 May 2020 18:26:16 +0800
Subject: rtw88: 8723d: Add shutdown callback to disable BT USB suspend

Without this patch, wifi card can't initialize properly due to BT in USB
suspend state. So, we disable BT USB suspend (wakeup) in shutdown callback
that is the moment before rebooting. To save BT USB power, we can't do this
in 'remove' callback.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200512102621.5148-5-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/main.h     |  1 +
 drivers/net/wireless/realtek/rtw88/pci.c      | 17 +++++++++++++++++
 drivers/net/wireless/realtek/rtw88/reg.h      |  1 +
 drivers/net/wireless/realtek/rtw88/rtw8723d.c |  6 ++++++
 4 files changed, 25 insertions(+)

diff --git a/drivers/net/wireless/realtek/rtw88/main.h b/drivers/net/wireless/realtek/rtw88/main.h
index bfdc27c187f6..592b28ed9376 100644
--- a/drivers/net/wireless/realtek/rtw88/main.h
+++ b/drivers/net/wireless/realtek/rtw88/main.h
@@ -795,6 +795,7 @@ struct rtw_regulatory {
 
 struct rtw_chip_ops {
 	int (*mac_init)(struct rtw_dev *rtwdev);
+	void (*shutdown)(struct rtw_dev *rtwdev);
 	int (*read_efuse)(struct rtw_dev *rtwdev, u8 *map);
 	void (*phy_set_param)(struct rtw_dev *rtwdev);
 	void (*set_channel)(struct rtw_dev *rtwdev, u8 channel,
diff --git a/drivers/net/wireless/realtek/rtw88/pci.c b/drivers/net/wireless/realtek/rtw88/pci.c
index a9752c34c9d8..e5ea30c04ac5 100644
--- a/drivers/net/wireless/realtek/rtw88/pci.c
+++ b/drivers/net/wireless/realtek/rtw88/pci.c
@@ -1573,6 +1573,22 @@ static void rtw_pci_remove(struct pci_dev *pdev)
 	ieee80211_free_hw(hw);
 }
 
+static void rtw_pci_shutdown(struct pci_dev *pdev)
+{
+	struct ieee80211_hw *hw = pci_get_drvdata(pdev);
+	struct rtw_dev *rtwdev;
+	struct rtw_chip_info *chip;
+
+	if (!hw)
+		return;
+
+	rtwdev = hw->priv;
+	chip = rtwdev->chip;
+
+	if (chip->ops->shutdown)
+		chip->ops->shutdown(rtwdev);
+}
+
 static const struct pci_device_id rtw_pci_id_table[] = {
 #ifdef CONFIG_RTW88_8822BE
 	{ RTK_PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0xB822, rtw8822b_hw_spec) },
@@ -1593,6 +1609,7 @@ static struct pci_driver rtw_pci_driver = {
 	.probe = rtw_pci_probe,
 	.remove = rtw_pci_remove,
 	.driver.pm = &rtw_pm_ops,
+	.shutdown = rtw_pci_shutdown,
 };
 module_pci_driver(rtw_pci_driver);
 
diff --git a/drivers/net/wireless/realtek/rtw88/reg.h b/drivers/net/wireless/realtek/rtw88/reg.h
index d57de1a6cdcc..5a3e9cc7c400 100644
--- a/drivers/net/wireless/realtek/rtw88/reg.h
+++ b/drivers/net/wireless/realtek/rtw88/reg.h
@@ -83,6 +83,7 @@
 #define BIT_DBG_GNT_WL_BT	BIT(27)
 #define BIT_LTE_MUX_CTRL_PATH	BIT(26)
 #define REG_HCI_OPT_CTRL	0x0074
+#define BIT_USB_SUS_DIS		BIT(8)
 
 #define REG_AFE_CTRL_4		0x0078
 #define BIT_CK320M_AFE_EN	BIT(4)
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.c b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
index 4fc2d0bb2704..bfad27d10c40 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8723d.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
@@ -549,6 +549,11 @@ static int rtw8723d_mac_init(struct rtw_dev *rtwdev)
 	return 0;
 }
 
+static void rtw8723d_shutdown(struct rtw_dev *rtwdev)
+{
+	rtw_write16_set(rtwdev, REG_HCI_OPT_CTRL, BIT_USB_SUS_DIS);
+}
+
 static void rtw8723d_cfg_ldo25(struct rtw_dev *rtwdev, bool enable)
 {
 	u8 ldo_pwr;
@@ -1790,6 +1795,7 @@ static struct rtw_chip_ops rtw8723d_ops = {
 	.query_rx_desc		= rtw8723d_query_rx_desc,
 	.set_channel		= rtw8723d_set_channel,
 	.mac_init		= rtw8723d_mac_init,
+	.shutdown		= rtw8723d_shutdown,
 	.read_rf		= rtw_phy_read_rf_sipi,
 	.write_rf		= rtw_phy_write_rf_reg_sipi,
 	.set_tx_power_index	= rtw8723d_set_tx_power_index,
-- 
cgit v1.2.3-59-g8ed1b


From 7d754f974ac79640c8efda86d35d766704ebaa8e Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Tue, 12 May 2020 18:26:17 +0800
Subject: rtw88: 8723d: implement flush queue

Flush queue is used to check if queue is empty, before doing something
else. Since 8723D uses different registers and page number of
availabl/reserved occupy 8 bits instead of 16 bits, so use a 'wsize' field
to discriminate which rtw_read{8,16} is adopted.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200512102621.5148-6-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/mac.c      | 29 ++++++++++-----------------
 drivers/net/wireless/realtek/rtw88/main.h     | 11 ++++++++++
 drivers/net/wireless/realtek/rtw88/rtw8723d.c | 17 ++++++++++++++++
 drivers/net/wireless/realtek/rtw88/rtw8822b.c | 17 ++++++++++++++++
 drivers/net/wireless/realtek/rtw88/rtw8822c.c | 17 ++++++++++++++++
 5 files changed, 73 insertions(+), 18 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtw88/mac.c b/drivers/net/wireless/realtek/rtw88/mac.c
index e8ffeb338584..3a5cfebe5a62 100644
--- a/drivers/net/wireless/realtek/rtw88/mac.c
+++ b/drivers/net/wireless/realtek/rtw88/mac.c
@@ -919,31 +919,24 @@ static u32 get_priority_queues(struct rtw_dev *rtwdev, u32 queues)
 static void __rtw_mac_flush_prio_queue(struct rtw_dev *rtwdev,
 				       u32 prio_queue, bool drop)
 {
-	u32 addr;
+	struct rtw_chip_info *chip = rtwdev->chip;
+	const struct rtw_prioq_addr *addr;
+	bool wsize;
 	u16 avail_page, rsvd_page;
 	int i;
 
-	switch (prio_queue) {
-	case RTW_DMA_MAPPING_EXTRA:
-		addr = REG_FIFOPAGE_INFO_4;
-		break;
-	case RTW_DMA_MAPPING_LOW:
-		addr = REG_FIFOPAGE_INFO_2;
-		break;
-	case RTW_DMA_MAPPING_NORMAL:
-		addr = REG_FIFOPAGE_INFO_3;
-		break;
-	case RTW_DMA_MAPPING_HIGH:
-		addr = REG_FIFOPAGE_INFO_1;
-		break;
-	default:
+	if (prio_queue >= RTW_DMA_MAPPING_MAX)
 		return;
-	}
+
+	addr = &chip->prioq_addrs->prio[prio_queue];
+	wsize = chip->prioq_addrs->wsize;
 
 	/* check if all of the reserved pages are available for 100 msecs */
 	for (i = 0; i < 5; i++) {
-		rsvd_page = rtw_read16(rtwdev, addr);
-		avail_page = rtw_read16(rtwdev, addr + 2);
+		rsvd_page = wsize ? rtw_read16(rtwdev, addr->rsvd) :
+				     rtw_read8(rtwdev, addr->rsvd);
+		avail_page = wsize ? rtw_read16(rtwdev, addr->avail) :
+				      rtw_read8(rtwdev, addr->avail);
 		if (rsvd_page == avail_page)
 			return;
 
diff --git a/drivers/net/wireless/realtek/rtw88/main.h b/drivers/net/wireless/realtek/rtw88/main.h
index 592b28ed9376..28f88c44cb84 100644
--- a/drivers/net/wireless/realtek/rtw88/main.h
+++ b/drivers/net/wireless/realtek/rtw88/main.h
@@ -945,6 +945,16 @@ struct rtw_rqpn {
 	enum rtw_dma_mapping dma_map_hi;
 };
 
+struct rtw_prioq_addr {
+	u32 rsvd;
+	u32 avail;
+};
+
+struct rtw_prioq_addrs {
+	struct rtw_prioq_addr prio[RTW_DMA_MAPPING_MAX];
+	bool wsize;
+};
+
 struct rtw_page_table {
 	u16 hq_num;
 	u16 nq_num;
@@ -1101,6 +1111,7 @@ struct rtw_chip_info {
 	const struct rtw_pwr_seq_cmd **pwr_on_seq;
 	const struct rtw_pwr_seq_cmd **pwr_off_seq;
 	const struct rtw_rqpn *rqpn_table;
+	const struct rtw_prioq_addrs *prioq_addrs;
 	const struct rtw_page_table *page_table;
 	const struct rtw_intf_phy_para_table *intf_table;
 
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.c b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
index bfad27d10c40..66bf907131d7 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8723d.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
@@ -2237,6 +2237,22 @@ static const struct rtw_rqpn rqpn_table_8723d[] = {
 	 RTW_DMA_MAPPING_EXTRA, RTW_DMA_MAPPING_HIGH},
 };
 
+static const struct rtw_prioq_addrs prioq_addrs_8723d = {
+	.prio[RTW_DMA_MAPPING_EXTRA] = {
+		.rsvd = REG_RQPN_NPQ + 2, .avail = REG_RQPN_NPQ + 3,
+	},
+	.prio[RTW_DMA_MAPPING_LOW] = {
+		.rsvd = REG_RQPN + 1, .avail = REG_FIFOPAGE_CTRL_2 + 1,
+	},
+	.prio[RTW_DMA_MAPPING_NORMAL] = {
+		.rsvd = REG_RQPN_NPQ, .avail = REG_RQPN_NPQ + 1,
+	},
+	.prio[RTW_DMA_MAPPING_HIGH] = {
+		.rsvd = REG_RQPN, .avail = REG_FIFOPAGE_CTRL_2,
+	},
+	.wsize = false,
+};
+
 static const struct rtw_intf_phy_para pcie_gen1_param_8723d[] = {
 	{0x0008, 0x4a22,
 	 RTW_IP_SEL_PHY,
@@ -2370,6 +2386,7 @@ struct rtw_chip_info rtw8723d_hw_spec = {
 	.pwr_off_seq = card_disable_flow_8723d,
 	.page_table = page_table_8723d,
 	.rqpn_table = rqpn_table_8723d,
+	.prioq_addrs = &prioq_addrs_8723d,
 	.intf_table = &phy_para_table_8723d,
 	.dig = rtw8723d_dig,
 	.dig_cck = rtw8723d_dig_cck,
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822b.c b/drivers/net/wireless/realtek/rtw88/rtw8822b.c
index 45636382dafd..22a7baeb87f6 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8822b.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822b.c
@@ -2083,6 +2083,22 @@ static const struct rtw_rqpn rqpn_table_8822b[] = {
 	 RTW_DMA_MAPPING_EXTRA, RTW_DMA_MAPPING_HIGH},
 };
 
+static struct rtw_prioq_addrs prioq_addrs_8822b = {
+	.prio[RTW_DMA_MAPPING_EXTRA] = {
+		.rsvd = REG_FIFOPAGE_INFO_4, .avail = REG_FIFOPAGE_INFO_4 + 2,
+	},
+	.prio[RTW_DMA_MAPPING_LOW] = {
+		.rsvd = REG_FIFOPAGE_INFO_2, .avail = REG_FIFOPAGE_INFO_2 + 2,
+	},
+	.prio[RTW_DMA_MAPPING_NORMAL] = {
+		.rsvd = REG_FIFOPAGE_INFO_3, .avail = REG_FIFOPAGE_INFO_3 + 2,
+	},
+	.prio[RTW_DMA_MAPPING_HIGH] = {
+		.rsvd = REG_FIFOPAGE_INFO_1, .avail = REG_FIFOPAGE_INFO_1 + 2,
+	},
+	.wsize = true,
+};
+
 static struct rtw_chip_ops rtw8822b_ops = {
 	.phy_set_param		= rtw8822b_phy_set_param,
 	.read_efuse		= rtw8822b_read_efuse,
@@ -2433,6 +2449,7 @@ struct rtw_chip_info rtw8822b_hw_spec = {
 	.pwr_off_seq = card_disable_flow_8822b,
 	.page_table = page_table_8822b,
 	.rqpn_table = rqpn_table_8822b,
+	.prioq_addrs = &prioq_addrs_8822b,
 	.intf_table = &phy_para_table_8822b,
 	.dig = rtw8822b_dig,
 	.dig_cck = NULL,
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822c.c b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
index 64b77a7cbffd..e4b184485cad 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8822c.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
@@ -3933,6 +3933,22 @@ static const struct rtw_rqpn rqpn_table_8822c[] = {
 	 RTW_DMA_MAPPING_EXTRA, RTW_DMA_MAPPING_HIGH},
 };
 
+static struct rtw_prioq_addrs prioq_addrs_8822c = {
+	.prio[RTW_DMA_MAPPING_EXTRA] = {
+		.rsvd = REG_FIFOPAGE_INFO_4, .avail = REG_FIFOPAGE_INFO_4 + 2,
+	},
+	.prio[RTW_DMA_MAPPING_LOW] = {
+		.rsvd = REG_FIFOPAGE_INFO_2, .avail = REG_FIFOPAGE_INFO_2 + 2,
+	},
+	.prio[RTW_DMA_MAPPING_NORMAL] = {
+		.rsvd = REG_FIFOPAGE_INFO_3, .avail = REG_FIFOPAGE_INFO_3 + 2,
+	},
+	.prio[RTW_DMA_MAPPING_HIGH] = {
+		.rsvd = REG_FIFOPAGE_INFO_1, .avail = REG_FIFOPAGE_INFO_1 + 2,
+	},
+	.wsize = true,
+};
+
 static struct rtw_chip_ops rtw8822c_ops = {
 	.phy_set_param		= rtw8822c_phy_set_param,
 	.read_efuse		= rtw8822c_read_efuse,
@@ -4295,6 +4311,7 @@ struct rtw_chip_info rtw8822c_hw_spec = {
 	.pwr_off_seq = card_disable_flow_8822c,
 	.page_table = page_table_8822c,
 	.rqpn_table = rqpn_table_8822c,
+	.prioq_addrs = &prioq_addrs_8822c,
 	.intf_table = &phy_para_table_8822c,
 	.dig = rtw8822c_dig,
 	.dig_cck = NULL,
-- 
cgit v1.2.3-59-g8ed1b


From 7e14936881caf0a36b49833bd8aca13f2c5a8efe Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Tue, 12 May 2020 18:26:18 +0800
Subject: rtw88: 8723d: set ltecoex register address in chip_info

Since 8723D use different address of ltecoex register, this commit add a
new field in chip_info and fill proper address. Then, ltecoex_read_reg()
and ltecoex_reg_write() can use them to access ltecoex according to chip.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200512102621.5148-7-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/main.h     |  7 +++++++
 drivers/net/wireless/realtek/rtw88/rtw8723d.c |  7 +++++++
 drivers/net/wireless/realtek/rtw88/rtw8723d.h |  3 +++
 drivers/net/wireless/realtek/rtw88/rtw8822b.c |  7 +++++++
 drivers/net/wireless/realtek/rtw88/rtw8822c.c |  7 +++++++
 drivers/net/wireless/realtek/rtw88/util.c     | 18 ++++++++++++------
 6 files changed, 43 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtw88/main.h b/drivers/net/wireless/realtek/rtw88/main.h
index 28f88c44cb84..c9156b9b532c 100644
--- a/drivers/net/wireless/realtek/rtw88/main.h
+++ b/drivers/net/wireless/realtek/rtw88/main.h
@@ -519,6 +519,12 @@ struct rtw_hw_reg {
 	u32 mask;
 };
 
+struct rtw_ltecoex_addr {
+	u32 ctrl;
+	u32 wdata;
+	u32 rdata;
+};
+
 struct rtw_reg_domain {
 	u32 addr;
 	u32 mask;
@@ -1121,6 +1127,7 @@ struct rtw_chip_info {
 	u32 rf_sipi_addr[2];
 	const struct rtw_rf_sipi_addr *rf_sipi_read_addr;
 	u8 fix_rf_phy_num;
+	const struct rtw_ltecoex_addr *ltecoex_addr;
 
 	const struct rtw_table *mac_tbl;
 	const struct rtw_table *agc_tbl;
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.c b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
index 66bf907131d7..e3dc27d6a6ad 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8723d.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
@@ -2289,6 +2289,12 @@ static const struct rtw_rf_sipi_addr rtw8723d_rf_sipi_addr[] = {
 			.hssi_2 = 0x82c, .lssi_read_pi = 0x8bc},
 };
 
+static const struct rtw_ltecoex_addr rtw8723d_ltecoex_addr = {
+	.ctrl = REG_LTECOEX_CTRL,
+	.wdata = REG_LTECOEX_WRITE_DATA,
+	.rdata = REG_LTECOEX_READ_DATA,
+};
+
 static const struct rtw_rfe_def rtw8723d_rfe_defs[] = {
 	[0] = { .phy_pg_tbl	= &rtw8723d_bb_pg_tbl,
 		.txpwr_lmt_tbl	= &rtw8723d_txpwr_lmt_tbl,},
@@ -2393,6 +2399,7 @@ struct rtw_chip_info rtw8723d_hw_spec = {
 	.rf_sipi_addr = {0x840, 0x844},
 	.rf_sipi_read_addr = rtw8723d_rf_sipi_addr,
 	.fix_rf_phy_num = 2,
+	.ltecoex_addr = &rtw8723d_ltecoex_addr,
 	.mac_tbl = &rtw8723d_mac_tbl,
 	.agc_tbl = &rtw8723d_agc_tbl,
 	.bb_tbl = &rtw8723d_bb_tbl,
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.h b/drivers/net/wireless/realtek/rtw88/rtw8723d.h
index 843472a1cd54..d1f1c1a594ad 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8723d.h
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.h
@@ -145,6 +145,9 @@ static inline s32 iqk_mult(s32 x, s32 y, s32 *ext)
 
 #define REG_BTG_SEL		0x0067
 #define REG_LTECOEX_PATH_CONTROL	0x0070
+#define REG_LTECOEX_CTRL	0x07c0
+#define REG_LTECOEX_WRITE_DATA	0x07c4
+#define REG_LTECOEX_READ_DATA	0x07c8
 #define REG_PSDFN		0x0808
 #define REG_BB_PWR_SAV1_11N	0x0874
 #define REG_ANALOG_P4		0x088c
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822b.c b/drivers/net/wireless/realtek/rtw88/rtw8822b.c
index 22a7baeb87f6..18c5a5a96d90 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8822b.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822b.c
@@ -2057,6 +2057,12 @@ static const struct rtw_hw_reg rtw8822b_dig[] = {
 	[1] = { .addr = 0xe50, .mask = 0x7f },
 };
 
+static const struct rtw_ltecoex_addr rtw8822b_ltecoex_addr = {
+	.ctrl = LTECOEX_ACCESS_CTRL,
+	.wdata = LTECOEX_WRITE_DATA,
+	.rdata = LTECOEX_READ_DATA,
+};
+
 static const struct rtw_page_table page_table_8822b[] = {
 	{64, 64, 64, 64, 1},
 	{64, 64, 64, 64, 1},
@@ -2455,6 +2461,7 @@ struct rtw_chip_info rtw8822b_hw_spec = {
 	.dig_cck = NULL,
 	.rf_base_addr = {0x2800, 0x2c00},
 	.rf_sipi_addr = {0xc90, 0xe90},
+	.ltecoex_addr = &rtw8822b_ltecoex_addr,
 	.mac_tbl = &rtw8822b_mac_tbl,
 	.agc_tbl = &rtw8822b_agc_tbl,
 	.bb_tbl = &rtw8822b_bb_tbl,
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822c.c b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
index e4b184485cad..d697d70170af 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8822c.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
@@ -3907,6 +3907,12 @@ static const struct rtw_hw_reg rtw8822c_dig[] = {
 	[1] = { .addr = 0x1d70, .mask = 0x7f00 },
 };
 
+static const struct rtw_ltecoex_addr rtw8822c_ltecoex_addr = {
+	.ctrl = LTECOEX_ACCESS_CTRL,
+	.wdata = LTECOEX_WRITE_DATA,
+	.rdata = LTECOEX_READ_DATA,
+};
+
 static const struct rtw_page_table page_table_8822c[] = {
 	{64, 64, 64, 64, 1},
 	{64, 64, 64, 64, 1},
@@ -4317,6 +4323,7 @@ struct rtw_chip_info rtw8822c_hw_spec = {
 	.dig_cck = NULL,
 	.rf_base_addr = {0x3c00, 0x4c00},
 	.rf_sipi_addr = {0x1808, 0x4108},
+	.ltecoex_addr = &rtw8822c_ltecoex_addr,
 	.mac_tbl = &rtw8822c_mac_tbl,
 	.agc_tbl = &rtw8822c_agc_tbl,
 	.bb_tbl = &rtw8822c_bb_tbl,
diff --git a/drivers/net/wireless/realtek/rtw88/util.c b/drivers/net/wireless/realtek/rtw88/util.c
index 10f1117c0cfb..42cf177cd445 100644
--- a/drivers/net/wireless/realtek/rtw88/util.c
+++ b/drivers/net/wireless/realtek/rtw88/util.c
@@ -22,22 +22,28 @@ bool check_hw_ready(struct rtw_dev *rtwdev, u32 addr, u32 mask, u32 target)
 
 bool ltecoex_read_reg(struct rtw_dev *rtwdev, u16 offset, u32 *val)
 {
-	if (!check_hw_ready(rtwdev, LTECOEX_ACCESS_CTRL, LTECOEX_READY, 1))
+	struct rtw_chip_info *chip = rtwdev->chip;
+	const struct rtw_ltecoex_addr *ltecoex = chip->ltecoex_addr;
+
+	if (!check_hw_ready(rtwdev, ltecoex->ctrl, LTECOEX_READY, 1))
 		return false;
 
-	rtw_write32(rtwdev, LTECOEX_ACCESS_CTRL, 0x800F0000 | offset);
-	*val = rtw_read32(rtwdev, LTECOEX_READ_DATA);
+	rtw_write32(rtwdev, ltecoex->ctrl, 0x800F0000 | offset);
+	*val = rtw_read32(rtwdev, ltecoex->rdata);
 
 	return true;
 }
 
 bool ltecoex_reg_write(struct rtw_dev *rtwdev, u16 offset, u32 value)
 {
-	if (!check_hw_ready(rtwdev, LTECOEX_ACCESS_CTRL, LTECOEX_READY, 1))
+	struct rtw_chip_info *chip = rtwdev->chip;
+	const struct rtw_ltecoex_addr *ltecoex = chip->ltecoex_addr;
+
+	if (!check_hw_ready(rtwdev, ltecoex->ctrl, LTECOEX_READY, 1))
 		return false;
 
-	rtw_write32(rtwdev, LTECOEX_WRITE_DATA, value);
-	rtw_write32(rtwdev, LTECOEX_ACCESS_CTRL, 0xC00F0000 | offset);
+	rtw_write32(rtwdev, ltecoex->wdata, value);
+	rtw_write32(rtwdev, ltecoex->ctrl, 0xC00F0000 | offset);
 
 	return true;
 }
-- 
cgit v1.2.3-59-g8ed1b


From d1391c490085156222e0baa8124b93fc494d96d8 Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Tue, 12 May 2020 18:26:19 +0800
Subject: rtw88: 8723d: Add coex support

8723D is a Wifi+BT combo card. To make them work properly, we need coex
mechanism to avoid interference, such as TX simultaneously. Basically,
coex.c provide main algorithm to deal with many use cases, and this commit
adds some parameters and ops differ from other chips, because coex
hardware and WiFi generation are changed.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200512102621.5148-8-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/rtw8723d.c | 327 ++++++++++++++++++++++++++
 drivers/net/wireless/realtek/rtw88/rtw8723d.h |   3 +
 2 files changed, 330 insertions(+)

diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.c b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
index e3dc27d6a6ad..400364aec393 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8723d.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
@@ -1497,6 +1497,132 @@ out:
 	rtw_dbg(rtwdev, RTW_DBG_RFK, "[IQK] finished\n");
 }
 
+/* for coex */
+static void rtw8723d_coex_cfg_init(struct rtw_dev *rtwdev)
+{
+	/* enable TBTT nterrupt */
+	rtw_write8_set(rtwdev, REG_BCN_CTRL, BIT_EN_BCN_FUNCTION);
+
+	/* BT report packet sample rate	 */
+	/* 0x790[5:0]=0x5 */
+	rtw_write8_set(rtwdev, REG_BT_TDMA_TIME, 0x05);
+
+	/* enable BT counter statistics */
+	rtw_write8(rtwdev, REG_BT_STAT_CTRL, 0x1);
+
+	/* enable PTA (3-wire function form BT side) */
+	rtw_write32_set(rtwdev, REG_GPIO_MUXCFG, BIT_BT_PTA_EN);
+	rtw_write32_set(rtwdev, REG_GPIO_MUXCFG, BIT_BT_AOD_GPIO3);
+
+	/* enable PTA (tx/rx signal form WiFi side) */
+	rtw_write8_set(rtwdev, REG_QUEUE_CTRL, BIT_PTA_WL_TX_EN);
+}
+
+static void rtw8723d_coex_cfg_gnt_fix(struct rtw_dev *rtwdev)
+{
+}
+
+static void rtw8723d_coex_cfg_gnt_debug(struct rtw_dev *rtwdev)
+{
+	rtw_write8_mask(rtwdev, REG_LEDCFG2, BIT(6), 0);
+	rtw_write8_mask(rtwdev, REG_PAD_CTRL1 + 3, BIT(0), 0);
+	rtw_write8_mask(rtwdev, REG_GPIO_INTM + 2, BIT(4), 0);
+	rtw_write8_mask(rtwdev, REG_GPIO_MUXCFG + 2, BIT(1), 0);
+	rtw_write8_mask(rtwdev, REG_PAD_CTRL1 + 3, BIT(1), 0);
+	rtw_write8_mask(rtwdev, REG_PAD_CTRL1 + 2, BIT(7), 0);
+	rtw_write8_mask(rtwdev, REG_SYS_CLKR + 1, BIT(1), 0);
+	rtw_write8_mask(rtwdev, REG_SYS_SDIO_CTRL + 3, BIT(3), 0);
+}
+
+static void rtw8723d_coex_cfg_rfe_type(struct rtw_dev *rtwdev)
+{
+	struct rtw_efuse *efuse = &rtwdev->efuse;
+	struct rtw_coex *coex = &rtwdev->coex;
+	struct rtw_coex_rfe *coex_rfe = &coex->rfe;
+	bool aux = efuse->bt_setting & BIT(6);
+
+	coex_rfe->rfe_module_type = rtwdev->efuse.rfe_option;
+	coex_rfe->ant_switch_polarity = 0;
+	coex_rfe->ant_switch_exist = false;
+	coex_rfe->ant_switch_with_bt = false;
+	coex_rfe->ant_switch_diversity = false;
+	coex_rfe->wlg_at_btg = true;
+
+	/* decide antenna at main or aux */
+	if (efuse->share_ant) {
+		if (aux)
+			rtw_write16(rtwdev, REG_BB_SEL_BTG, 0x80);
+		else
+			rtw_write16(rtwdev, REG_BB_SEL_BTG, 0x200);
+	} else {
+		if (aux)
+			rtw_write16(rtwdev, REG_BB_SEL_BTG, 0x280);
+		else
+			rtw_write16(rtwdev, REG_BB_SEL_BTG, 0x0);
+	}
+
+	/* disable LTE coex in wifi side */
+	rtw_coex_write_indirect_reg(rtwdev, LTE_COEX_CTRL, BIT_LTE_COEX_EN, 0x0);
+	rtw_coex_write_indirect_reg(rtwdev, LTE_WL_TRX_CTRL, MASKLWORD, 0xffff);
+	rtw_coex_write_indirect_reg(rtwdev, LTE_BT_TRX_CTRL, MASKLWORD, 0xffff);
+}
+
+static void rtw8723d_coex_cfg_wl_tx_power(struct rtw_dev *rtwdev, u8 wl_pwr)
+{
+	struct rtw_coex *coex = &rtwdev->coex;
+	struct rtw_coex_dm *coex_dm = &coex->dm;
+	static const u8	wl_tx_power[] = {0xb2, 0x90};
+	u8 pwr;
+
+	if (wl_pwr == coex_dm->cur_wl_pwr_lvl)
+		return;
+
+	coex_dm->cur_wl_pwr_lvl = wl_pwr;
+
+	if (coex_dm->cur_wl_pwr_lvl >= ARRAY_SIZE(wl_tx_power))
+		coex_dm->cur_wl_pwr_lvl = ARRAY_SIZE(wl_tx_power) - 1;
+
+	pwr = wl_tx_power[coex_dm->cur_wl_pwr_lvl];
+
+	rtw_write8(rtwdev, REG_ANA_PARAM1 + 3, pwr);
+}
+
+static void rtw8723d_coex_cfg_wl_rx_gain(struct rtw_dev *rtwdev, bool low_gain)
+{
+	struct rtw_coex *coex = &rtwdev->coex;
+	struct rtw_coex_dm *coex_dm = &coex->dm;
+	/* WL Rx Low gain on */
+	static const u32 wl_rx_low_gain_on[] = {
+		0xec120101, 0xeb130101, 0xce140101, 0xcd150101, 0xcc160101,
+		0xcb170101, 0xca180101, 0x8d190101, 0x8c1a0101, 0x8b1b0101,
+		0x4f1c0101, 0x4e1d0101, 0x4d1e0101, 0x4c1f0101, 0x0e200101,
+		0x0d210101, 0x0c220101, 0x0b230101, 0xcf240001, 0xce250001,
+		0xcd260001, 0xcc270001, 0x8f280001
+	};
+	/* WL Rx Low gain off */
+	static const u32 wl_rx_low_gain_off[] = {
+		0xec120101, 0xeb130101, 0xea140101, 0xe9150101, 0xe8160101,
+		0xe7170101, 0xe6180101, 0xe5190101, 0xe41a0101, 0xe31b0101,
+		0xe21c0101, 0xe11d0101, 0xe01e0101, 0x861f0101, 0x85200101,
+		0x84210101, 0x83220101, 0x82230101, 0x81240101, 0x80250101,
+		0x44260101, 0x43270101, 0x42280101
+	};
+	u8 i;
+
+	if (low_gain == coex_dm->cur_wl_rx_low_gain_en)
+		return;
+
+	coex_dm->cur_wl_rx_low_gain_en = low_gain;
+
+	if (coex_dm->cur_wl_rx_low_gain_en) {
+		for (i = 0; i < ARRAY_SIZE(wl_rx_low_gain_on); i++)
+			rtw_write32(rtwdev, REG_AGCRSSI, wl_rx_low_gain_on[i]);
+	} else {
+		for (i = 0; i < ARRAY_SIZE(wl_rx_low_gain_off); i++)
+			rtw_write32(rtwdev, REG_AGCRSSI, wl_rx_low_gain_off[i]);
+	}
+}
+
 static u8 rtw8723d_pwrtrack_get_limit_ofdm(struct rtw_dev *rtwdev)
 {
 	struct rtw_dm_info *dm_info = &rtwdev->dm_info;
@@ -1808,6 +1934,156 @@ static struct rtw_chip_ops rtw8723d_ops = {
 	.config_bfee		= NULL,
 	.set_gid_table		= NULL,
 	.cfg_csi_rate		= NULL,
+
+	.coex_set_init		= rtw8723d_coex_cfg_init,
+	.coex_set_ant_switch	= NULL,
+	.coex_set_gnt_fix	= rtw8723d_coex_cfg_gnt_fix,
+	.coex_set_gnt_debug	= rtw8723d_coex_cfg_gnt_debug,
+	.coex_set_rfe_type	= rtw8723d_coex_cfg_rfe_type,
+	.coex_set_wl_tx_power	= rtw8723d_coex_cfg_wl_tx_power,
+	.coex_set_wl_rx_gain	= rtw8723d_coex_cfg_wl_rx_gain,
+};
+
+/* Shared-Antenna Coex Table */
+static const struct coex_table_para table_sant_8723d[] = {
+	{0xffffffff, 0xffffffff}, /* case-0 */
+	{0x55555555, 0x55555555},
+	{0x65555555, 0x65555555},
+	{0xaaaaaaaa, 0xaaaaaaaa},
+	{0x5a5a5a5a, 0x5a5a5a5a},
+	{0xfafafafa, 0xfafafafa}, /* case-5 */
+	{0xa5555555, 0xaaaa5aaa},
+	{0x6a5a5a5a, 0x5a5a5a5a},
+	{0x6a5a5a5a, 0x6a5a5a5a},
+	{0x65555555, 0x5a5a5a5a},
+	{0x65555555, 0x6a5a5a5a}, /* case-10 */
+	{0x65555555, 0xfafafafa},
+	{0x65555555, 0x6a5a5aaa},
+	{0x65555555, 0x5aaa5aaa},
+	{0x65555555, 0xaaaa5aaa},
+	{0x65555555, 0xaaaaaaaa}, /* case-15 */
+	{0xffff55ff, 0xfafafafa},
+	{0xffff55ff, 0x6afa5afa},
+	{0xaaffffaa, 0xfafafafa},
+	{0xaa5555aa, 0x5a5a5a5a},
+	{0xaa5555aa, 0x6a5a5a5a}, /* case-20 */
+	{0xaa5555aa, 0xaaaaaaaa},
+	{0xffffffff, 0x5a5a5a5a},
+	{0xffffffff, 0x6a5a5a5a},
+	{0xffffffff, 0x55555555},
+	{0xffffffff, 0x6a5a5aaa}, /* case-25 */
+	{0x55555555, 0x5a5a5a5a},
+	{0x55555555, 0xaaaaaaaa},
+	{0x55555555, 0x6a6a6a6a},
+	{0x656a656a, 0x656a656a}
+};
+
+/* Non-Shared-Antenna Coex Table */
+static const struct coex_table_para table_nsant_8723d[] = {
+	{0xffffffff, 0xffffffff}, /* case-100 */
+	{0x55555555, 0x55555555},
+	{0x65555555, 0x65555555},
+	{0xaaaaaaaa, 0xaaaaaaaa},
+	{0x5a5a5a5a, 0x5a5a5a5a},
+	{0xfafafafa, 0xfafafafa}, /* case-105 */
+	{0x5afa5afa, 0x5afa5afa},
+	{0x55555555, 0xfafafafa},
+	{0x65555555, 0xfafafafa},
+	{0x65555555, 0x5a5a5a5a},
+	{0x65555555, 0x6a5a5a5a}, /* case-110 */
+	{0x65555555, 0xaaaaaaaa},
+	{0xffff55ff, 0xfafafafa},
+	{0xffff55ff, 0x5afa5afa},
+	{0xffff55ff, 0xaaaaaaaa},
+	{0xaaffffaa, 0xfafafafa}, /* case-115 */
+	{0xaaffffaa, 0x5afa5afa},
+	{0xaaffffaa, 0xaaaaaaaa},
+	{0xffffffff, 0xfafafafa},
+	{0xffffffff, 0x5afa5afa},
+	{0xffffffff, 0xaaaaaaaa},/* case-120 */
+	{0x55ff55ff, 0x5afa5afa},
+	{0x55ff55ff, 0xaaaaaaaa},
+	{0x55ff55ff, 0x55ff55ff}
+};
+
+/* Shared-Antenna TDMA */
+static const struct coex_tdma_para tdma_sant_8723d[] = {
+	{ {0x08, 0x00, 0x00, 0x00, 0x00} }, /* case-0 */
+	{ {0x61, 0x45, 0x03, 0x11, 0x11} }, /* case-1 */
+	{ {0x61, 0x3a, 0x03, 0x11, 0x11} },
+	{ {0x61, 0x20, 0x03, 0x11, 0x11} },
+	{ {0x61, 0x30, 0x03, 0x11, 0x11} },
+	{ {0x61, 0x10, 0x03, 0x11, 0x11} }, /* case-5 */
+	{ {0x61, 0x48, 0x03, 0x11, 0x10} },
+	{ {0x61, 0x3a, 0x03, 0x11, 0x10} },
+	{ {0x61, 0x30, 0x03, 0x11, 0x10} },
+	{ {0x61, 0x20, 0x03, 0x11, 0x10} },
+	{ {0x61, 0x10, 0x03, 0x11, 0x10} }, /* case-10 */
+	{ {0x61, 0x10, 0x03, 0x11, 0x14} },
+	{ {0x61, 0x08, 0x03, 0x10, 0x14} },
+	{ {0x51, 0x10, 0x03, 0x10, 0x54} },
+	{ {0x51, 0x10, 0x03, 0x10, 0x55} },
+	{ {0x51, 0x10, 0x07, 0x10, 0x54} }, /* case-15 */
+	{ {0x51, 0x45, 0x03, 0x10, 0x50} },
+	{ {0x51, 0x3a, 0x03, 0x10, 0x50} },
+	{ {0x51, 0x30, 0x03, 0x10, 0x50} },
+	{ {0x51, 0x20, 0x03, 0x10, 0x50} },
+	{ {0x51, 0x15, 0x03, 0x10, 0x50} }, /* case-20 */
+	{ {0x51, 0x4a, 0x03, 0x10, 0x50} },
+	{ {0x51, 0x0c, 0x03, 0x10, 0x54} },
+	{ {0x55, 0x08, 0x03, 0x10, 0x54} },
+	{ {0x65, 0x10, 0x03, 0x11, 0x11} },
+	{ {0x51, 0x10, 0x03, 0x10, 0x51} },
+	{ {0x61, 0x15, 0x03, 0x11, 0x10} }
+};
+
+/* Non-Shared-Antenna TDMA */
+static const struct coex_tdma_para tdma_nsant_8723d[] = {
+	{ {0x00, 0x00, 0x00, 0x40, 0x00} }, /* case-100 */
+	{ {0x61, 0x45, 0x03, 0x11, 0x11} }, /* case-101 */
+	{ {0x61, 0x3a, 0x03, 0x11, 0x11} },
+	{ {0x61, 0x30, 0x03, 0x11, 0x11} },
+	{ {0x61, 0x20, 0x03, 0x11, 0x11} },
+	{ {0x61, 0x10, 0x03, 0x11, 0x11} }, /* case-105 */
+	{ {0x61, 0x45, 0x03, 0x11, 0x10} },
+	{ {0x61, 0x3a, 0x03, 0x11, 0x10} },
+	{ {0x61, 0x30, 0x03, 0x11, 0x10} },
+	{ {0x61, 0x20, 0x03, 0x11, 0x10} },
+	{ {0x61, 0x10, 0x03, 0x11, 0x10} }, /* case-110 */
+	{ {0x61, 0x08, 0x03, 0x11, 0x14} },
+	{ {0x61, 0x08, 0x03, 0x10, 0x14} },
+	{ {0x51, 0x08, 0x03, 0x10, 0x54} },
+	{ {0x51, 0x08, 0x03, 0x10, 0x55} },
+	{ {0x51, 0x08, 0x07, 0x10, 0x54} }, /* case-115 */
+	{ {0x51, 0x45, 0x03, 0x10, 0x50} },
+	{ {0x51, 0x3a, 0x03, 0x10, 0x50} },
+	{ {0x51, 0x30, 0x03, 0x10, 0x50} },
+	{ {0x51, 0x20, 0x03, 0x10, 0x50} },
+	{ {0x51, 0x10, 0x03, 0x10, 0x50} }
+};
+
+/* rssi in percentage % (dbm = % - 100) */
+static const u8 wl_rssi_step_8723d[] = {60, 50, 44, 30};
+static const u8 bt_rssi_step_8723d[] = {30, 30, 30, 30};
+static const struct coex_5g_afh_map afh_5g_8723d[] = { {0, 0, 0} };
+
+/* wl_tx_dec_power, bt_tx_dec_power, wl_rx_gain, bt_rx_lna_constrain */
+static const struct coex_rf_para rf_para_tx_8723d[] = {
+	{0, 0, false, 7},  /* for normal */
+	{0, 10, false, 7}, /* for WL-CPT */
+	{1, 0, true, 4},
+	{1, 2, true, 4},
+	{1, 10, true, 4},
+	{1, 15, true, 4}
+};
+
+static const struct coex_rf_para rf_para_rx_8723d[] = {
+	{0, 0, false, 7},  /* for normal */
+	{0, 10, false, 7}, /* for WL-CPT */
+	{1, 0, true, 5},
+	{1, 2, true, 5},
+	{1, 10, true, 5},
+	{1, 15, true, 5}
 };
 
 static const struct rtw_pwr_seq_cmd trans_carddis_to_cardemu_8723d[] = {
@@ -2363,6 +2639,28 @@ static const struct rtw_pwr_track_tbl rtw8723d_rtw_pwr_track_tbl = {
 	.pwrtrk_xtal_n = rtw8723d_pwrtrk_xtal_n,
 };
 
+static const struct rtw_reg_domain coex_info_hw_regs_8723d[] = {
+	{0x948, MASKDWORD, RTW_REG_DOMAIN_MAC32},
+	{0x67, BIT(7), RTW_REG_DOMAIN_MAC8},
+	{0, 0, RTW_REG_DOMAIN_NL},
+	{0x964, BIT(1), RTW_REG_DOMAIN_MAC8},
+	{0x864, BIT(0), RTW_REG_DOMAIN_MAC8},
+	{0xab7, BIT(5), RTW_REG_DOMAIN_MAC8},
+	{0xa01, BIT(7), RTW_REG_DOMAIN_MAC8},
+	{0, 0, RTW_REG_DOMAIN_NL},
+	{0x430, MASKDWORD, RTW_REG_DOMAIN_MAC32},
+	{0x434, MASKDWORD, RTW_REG_DOMAIN_MAC32},
+	{0x42a, MASKLWORD, RTW_REG_DOMAIN_MAC16},
+	{0x426, MASKBYTE0, RTW_REG_DOMAIN_MAC8},
+	{0x45e, BIT(3), RTW_REG_DOMAIN_MAC8},
+	{0, 0, RTW_REG_DOMAIN_NL},
+	{0x4c6, BIT(4), RTW_REG_DOMAIN_MAC8},
+	{0x40, BIT(5), RTW_REG_DOMAIN_MAC8},
+	{0x550, MASKDWORD, RTW_REG_DOMAIN_MAC32},
+	{0x522, MASKBYTE0, RTW_REG_DOMAIN_MAC8},
+	{0x953, BIT(1), RTW_REG_DOMAIN_MAC8},
+};
+
 struct rtw_chip_info rtw8723d_hw_spec = {
 	.ops = &rtw8723d_ops,
 	.id = RTW_CHIP_TYPE_8723D,
@@ -2409,6 +2707,35 @@ struct rtw_chip_info rtw8723d_hw_spec = {
 	.rx_ldpc = false,
 	.pwr_track_tbl = &rtw8723d_rtw_pwr_track_tbl,
 	.iqk_threshold = 8,
+
+	.coex_para_ver = 0x1905302f,
+	.bt_desired_ver = 0x2f,
+	.scbd_support = true,
+	.new_scbd10_def = true,
+	.pstdma_type = COEX_PSTDMA_FORCE_LPSOFF,
+	.bt_rssi_type = COEX_BTRSSI_RATIO,
+	.ant_isolation = 15,
+	.rssi_tolerance = 2,
+	.wl_rssi_step = wl_rssi_step_8723d,
+	.bt_rssi_step = bt_rssi_step_8723d,
+	.table_sant_num = ARRAY_SIZE(table_sant_8723d),
+	.table_sant = table_sant_8723d,
+	.table_nsant_num = ARRAY_SIZE(table_nsant_8723d),
+	.table_nsant = table_nsant_8723d,
+	.tdma_sant_num = ARRAY_SIZE(tdma_sant_8723d),
+	.tdma_sant = tdma_sant_8723d,
+	.tdma_nsant_num = ARRAY_SIZE(tdma_nsant_8723d),
+	.tdma_nsant = tdma_nsant_8723d,
+	.wl_rf_para_num = ARRAY_SIZE(rf_para_tx_8723d),
+	.wl_rf_para_tx = rf_para_tx_8723d,
+	.wl_rf_para_rx = rf_para_rx_8723d,
+	.bt_afh_span_bw20 = 0x20,
+	.bt_afh_span_bw40 = 0x30,
+	.afh_5g_num = ARRAY_SIZE(afh_5g_8723d),
+	.afh_5g = afh_5g_8723d,
+
+	.coex_info_hw_regs_num = ARRAY_SIZE(coex_info_hw_regs_8723d),
+	.coex_info_hw_regs = coex_info_hw_regs_8723d,
 };
 EXPORT_SYMBOL(rtw8723d_hw_spec);
 
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.h b/drivers/net/wireless/realtek/rtw88/rtw8723d.h
index d1f1c1a594ad..31b8ed9ee652 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8723d.h
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.h
@@ -143,6 +143,7 @@ static inline s32 iqk_mult(s32 x, s32 y, s32 *ext)
 #define BIT_MASK_RFMOD		BIT(0)
 #define BIT_LCK			BIT(15)
 
+#define REG_GPIO_INTM		0x0048
 #define REG_BTG_SEL		0x0067
 #define REG_LTECOEX_PATH_CONTROL	0x0070
 #define REG_LTECOEX_CTRL	0x07c0
@@ -150,6 +151,7 @@ static inline s32 iqk_mult(s32 x, s32 y, s32 *ext)
 #define REG_LTECOEX_READ_DATA	0x07c8
 #define REG_PSDFN		0x0808
 #define REG_BB_PWR_SAV1_11N	0x0874
+#define REG_ANA_PARAM1		0x0880
 #define REG_ANALOG_P4		0x088c
 #define REG_PSDRPT		0x08b4
 #define REG_FPGA1_RFMOD		0x0900
@@ -192,6 +194,7 @@ static inline s32 iqk_mult(s32 x, s32 y, s32 *ext)
 #define BIT_SET_OFDM0_EXTS(a, c, d) (((a) << 31) | ((c) << 29) | ((d) << 28))
 #define REG_OFDM0_XAAGC1	0x0c50
 #define REG_OFDM0_XBAGC1	0x0c58
+#define REG_AGCRSSI		0x0c78
 #define REG_OFDM_0_XA_TX_IQ_IMBALANCE	0x0c80
 #define BIT_MASK_TXIQ_ELM_A	0x03ff
 #define BIT_SET_TXIQ_ELM_ACD(a, c, d) (((d) << 22) | (((c) & 0x3F) << 16) |    \
-- 
cgit v1.2.3-59-g8ed1b


From 504e2b288258e8e28d46f3e314be7cb460bdd655 Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Tue, 12 May 2020 18:26:20 +0800
Subject: rtw88: fill zeros to words 0x06 and 0x07 of security cam entry

8723D adds some experimental features to word 0x06 of cam entry, so fill
zeros to initialize them to off state. For existing chips, these two words
are reserved and always zeros, so this change is harmless for them.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200512102621.5148-9-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/sec.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/realtek/rtw88/sec.c b/drivers/net/wireless/realtek/rtw88/sec.c
index d0d7fbb10d58..ce46e5b4a60a 100644
--- a/drivers/net/wireless/realtek/rtw88/sec.c
+++ b/drivers/net/wireless/realtek/rtw88/sec.c
@@ -44,7 +44,7 @@ void rtw_sec_write_cam(struct rtw_dev *rtwdev,
 
 	write_cmd = RTW_SEC_CMD_WRITE_ENABLE | RTW_SEC_CMD_POLLING;
 	addr = hw_key_idx << RTW_SEC_CAM_ENTRY_SHIFT;
-	for (i = 5; i >= 0; i--) {
+	for (i = 7; i >= 0; i--) {
 		switch (i) {
 		case 0:
 			content = ((key->keyidx & 0x3))		|
@@ -60,6 +60,10 @@ void rtw_sec_write_cam(struct rtw_dev *rtwdev,
 				  (cam->addr[4]		<< 16)	|
 				  (cam->addr[5]		<< 24);
 			break;
+		case 6:
+		case 7:
+			content = 0;
+			break;
 		default:
 			j = (i - 2) << 2;
 			content = (key->key[j])			|
-- 
cgit v1.2.3-59-g8ed1b


From f5df1a8b4376551f7fb2416135c58896b70a1467 Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Tue, 12 May 2020 18:26:21 +0800
Subject: rtw88: 8723d: Add 8723DE to Kconfig and Makefile

Since 8723D code is ready, we can build it.

Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200512102621.5148-10-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/Kconfig  | 10 ++++++++++
 drivers/net/wireless/realtek/rtw88/Makefile |  1 +
 2 files changed, 11 insertions(+)

diff --git a/drivers/net/wireless/realtek/rtw88/Kconfig b/drivers/net/wireless/realtek/rtw88/Kconfig
index 33bd7ed797ff..7a5fa68945c4 100644
--- a/drivers/net/wireless/realtek/rtw88/Kconfig
+++ b/drivers/net/wireless/realtek/rtw88/Kconfig
@@ -36,6 +36,16 @@ config RTW88_8822CE
 
 	  802.11ac PCIe wireless network adapter
 
+config RTW88_8723DE
+	bool "Realtek 8723DE PCI wireless network adapter"
+	depends on PCI
+	select RTW88_CORE
+	select RTW88_PCI
+	help
+	  Select this option will enable support for 8723DE chipset
+
+	  802.11n PCIe wireless network adapter
+
 config RTW88_DEBUG
 	bool "Realtek rtw88 debug support"
 	depends on RTW88_CORE
diff --git a/drivers/net/wireless/realtek/rtw88/Makefile b/drivers/net/wireless/realtek/rtw88/Makefile
index cac148d13cf1..385facc0dd20 100644
--- a/drivers/net/wireless/realtek/rtw88/Makefile
+++ b/drivers/net/wireless/realtek/rtw88/Makefile
@@ -20,6 +20,7 @@ rtw88-y += main.o \
 
 rtw88-$(CONFIG_RTW88_8822BE)	+= rtw8822b.o rtw8822b_table.o
 rtw88-$(CONFIG_RTW88_8822CE)	+= rtw8822c.o rtw8822c_table.o
+rtw88-$(CONFIG_RTW88_8723DE)	+= rtw8723d.o rtw8723d_table.o
 
 obj-$(CONFIG_RTW88_PCI)		+= rtwpci.o
 rtwpci-objs			:= pci.o
-- 
cgit v1.2.3-59-g8ed1b


From 0c4402385ac46afe5dc8c5854f9ce9028afcf69e Mon Sep 17 00:00:00 2001
From: Yan-Hsuan Chuang <yhchuang@realtek.com>
Date: Tue, 12 May 2020 18:35:34 +0800
Subject: rtw88: 8723d: fix sparse warnings for power tracking

sparse warnings: (new ones prefixed by >>)

>> drivers/net/wireless/realtek/rtw88/rtw8723d.c:1899:6: sparse: sparse:
symbol 'rtw8723d_pwr_track' was not declared. Should it be static?

Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200512103534.5889-1-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/rtw8723d.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.c b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
index 400364aec393..b517af417e0e 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8723d.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
@@ -1896,7 +1896,7 @@ iqk:
 		rtw8723d_phy_calibration(rtwdev);
 }
 
-void rtw8723d_pwr_track(struct rtw_dev *rtwdev)
+static void rtw8723d_pwr_track(struct rtw_dev *rtwdev)
 {
 	struct rtw_efuse *efuse = &rtwdev->efuse;
 	struct rtw_dm_info *dm_info = &rtwdev->dm_info;
-- 
cgit v1.2.3-59-g8ed1b


From b6ba5761faad24fa5d6b931682a74071f3511fdc Mon Sep 17 00:00:00 2001
From: ChenTao <chentao107@huawei.com>
Date: Wed, 13 May 2020 09:17:54 +0800
Subject: rtl8187: Remove unused variable rtl8225z2_tx_power_ofdm
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix the following warning:

drivers/net/wireless/realtek/rtl818x/rtl8187/rtl8225.c:609:17: warning:
‘rtl8225z2_tx_power_ofdm’ defined but not used
 static const u8 rtl8225z2_tx_power_ofdm[] = {

Acked-by: Hin-Tak Leung <htl10@users.sourceforge.net>
Acked-by: Larry Finger <Larry.Finger@lwfinger.net>
Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: ChenTao <chentao107@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200513011754.28432-1-chentao107@huawei.com
---
 drivers/net/wireless/realtek/rtl818x/rtl8187/rtl8225.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtl818x/rtl8187/rtl8225.c b/drivers/net/wireless/realtek/rtl818x/rtl8187/rtl8225.c
index b2616d61b66d..585784258c66 100644
--- a/drivers/net/wireless/realtek/rtl818x/rtl8187/rtl8225.c
+++ b/drivers/net/wireless/realtek/rtl818x/rtl8187/rtl8225.c
@@ -606,10 +606,6 @@ static const u8 rtl8225z2_tx_power_cck[] = {
 	0x26, 0x25, 0x21, 0x1b, 0x14, 0x0d, 0x06, 0x03
 };
 
-static const u8 rtl8225z2_tx_power_ofdm[] = {
-	0x42, 0x00, 0x40, 0x00, 0x40
-};
-
 static const u8 rtl8225z2_tx_gain_cck_ofdm[] = {
 	0x00, 0x01, 0x02, 0x03, 0x04, 0x05,
 	0x06, 0x07, 0x08, 0x09, 0x0a, 0x0b,
-- 
cgit v1.2.3-59-g8ed1b


From 14c129e30152f7d74c8b25ec06ae742f4291e166 Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@mellanox.com>
Date: Mon, 4 May 2020 08:30:09 +0300
Subject: {IB/net}/mlx5: Simplify don't trap code

The fs_core already supports creation of rules with multiple
actions/destinations. Refactor fs_core to handle the case
when don't trap rule is created with destination. Adapt the
calling code in the driver.

Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Reviewed-by: Mark Zhang <markz@mellanox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/infiniband/hw/mlx5/main.c                 | 47 +++-----------
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 79 +++++++++++++----------
 2 files changed, 56 insertions(+), 70 deletions(-)

diff --git a/drivers/infiniband/hw/mlx5/main.c b/drivers/infiniband/hw/mlx5/main.c
index 65e0e24d463b..566b42f3fb18 100644
--- a/drivers/infiniband/hw/mlx5/main.c
+++ b/drivers/infiniband/hw/mlx5/main.c
@@ -3698,12 +3698,13 @@ static struct mlx5_ib_flow_handler *_create_flow_rule(struct mlx5_ib_dev *dev,
 		if (!dest_num)
 			rule_dst = NULL;
 	} else {
+		if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP)
+			flow_act.action |=
+				MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
 		if (is_egress)
 			flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_ALLOW;
-		else
-			flow_act.action |=
-				dest_num ?  MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
-					MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
+		else if (dest_num)
+			flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 	}
 
 	if ((spec->flow_context.flags & FLOW_CONTEXT_HAS_TAG)  &&
@@ -3747,30 +3748,6 @@ static struct mlx5_ib_flow_handler *create_flow_rule(struct mlx5_ib_dev *dev,
 	return _create_flow_rule(dev, ft_prio, flow_attr, dst, 0, NULL);
 }
 
-static struct mlx5_ib_flow_handler *create_dont_trap_rule(struct mlx5_ib_dev *dev,
-							  struct mlx5_ib_flow_prio *ft_prio,
-							  struct ib_flow_attr *flow_attr,
-							  struct mlx5_flow_destination *dst)
-{
-	struct mlx5_ib_flow_handler *handler_dst = NULL;
-	struct mlx5_ib_flow_handler *handler = NULL;
-
-	handler = create_flow_rule(dev, ft_prio, flow_attr, NULL);
-	if (!IS_ERR(handler)) {
-		handler_dst = create_flow_rule(dev, ft_prio,
-					       flow_attr, dst);
-		if (IS_ERR(handler_dst)) {
-			mlx5_del_flow_rules(handler->rule);
-			ft_prio->refcount--;
-			kfree(handler);
-			handler = handler_dst;
-		} else {
-			list_add(&handler_dst->list, &handler->list);
-		}
-	}
-
-	return handler;
-}
 enum {
 	LEFTOVERS_MC,
 	LEFTOVERS_UC,
@@ -3974,15 +3951,11 @@ static struct ib_flow *mlx5_ib_create_flow(struct ib_qp *qp,
 	}
 
 	if (flow_attr->type == IB_FLOW_ATTR_NORMAL) {
-		if (flow_attr->flags & IB_FLOW_ATTR_FLAGS_DONT_TRAP)  {
-			handler = create_dont_trap_rule(dev, ft_prio,
-							flow_attr, dst);
-		} else {
-			underlay_qpn = (mqp->flags & MLX5_IB_QP_UNDERLAY) ?
-					mqp->underlay_qpn : 0;
-			handler = _create_flow_rule(dev, ft_prio, flow_attr,
-						    dst, underlay_qpn, ucmd);
-		}
+		underlay_qpn = (mqp->flags & IB_QP_CREATE_SOURCE_QPN) ?
+				       mqp->underlay_qpn :
+				       0;
+		handler = _create_flow_rule(dev, ft_prio, flow_attr, dst,
+					    underlay_qpn, ucmd);
 	} else if (flow_attr->type == IB_FLOW_ATTR_ALL_DEFAULT ||
 		   flow_attr->type == IB_FLOW_ATTR_MC_DEFAULT) {
 		handler = create_leftovers_rule(dev, ft_prio, flow_attr,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index d5defe09339a..705f433e2590 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -254,7 +254,7 @@ static void del_sw_flow_group(struct fs_node *node);
 static void del_sw_fte(struct fs_node *node);
 static void del_sw_prio(struct fs_node *node);
 static void del_sw_ns(struct fs_node *node);
-/* Delete rule (destination) is special case that 
+/* Delete rule (destination) is special case that
  * requires to lock the FTE for all the deletion process.
  */
 static void del_sw_hw_rule(struct fs_node *node);
@@ -1899,48 +1899,61 @@ mlx5_add_flow_rules(struct mlx5_flow_table *ft,
 {
 	struct mlx5_flow_root_namespace *root = find_root(&ft->node);
 	static const struct mlx5_flow_spec zero_spec = {};
-	struct mlx5_flow_destination gen_dest = {};
+	struct mlx5_flow_destination *gen_dest = NULL;
 	struct mlx5_flow_table *next_ft = NULL;
 	struct mlx5_flow_handle *handle = NULL;
 	u32 sw_action = flow_act->action;
 	struct fs_prio *prio;
+	int i;
 
 	if (!spec)
 		spec = &zero_spec;
 
-	fs_get_obj(prio, ft->node.parent);
-	if (flow_act->action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
-		if (!fwd_next_prio_supported(ft))
-			return ERR_PTR(-EOPNOTSUPP);
-		if (num_dest)
-			return ERR_PTR(-EINVAL);
-		mutex_lock(&root->chain_lock);
-		next_ft = find_next_chained_ft(prio);
-		if (next_ft) {
-			gen_dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
-			gen_dest.ft = next_ft;
-			dest = &gen_dest;
-			num_dest = 1;
-			flow_act->action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
-		} else {
-			mutex_unlock(&root->chain_lock);
-			return ERR_PTR(-EOPNOTSUPP);
-		}
-	}
+	if (!(sw_action & MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO))
+		return _mlx5_add_flow_rules(ft, spec, flow_act, dest, num_dest);
 
-	handle = _mlx5_add_flow_rules(ft, spec, flow_act, dest, num_dest);
+	if (!fwd_next_prio_supported(ft))
+		return ERR_PTR(-EOPNOTSUPP);
 
-	if (sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
-		if (!IS_ERR_OR_NULL(handle) &&
-		    (list_empty(&handle->rule[0]->next_ft))) {
-			mutex_lock(&next_ft->lock);
-			list_add(&handle->rule[0]->next_ft,
-				 &next_ft->fwd_rules);
-			mutex_unlock(&next_ft->lock);
-			handle->rule[0]->sw_action = MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
-		}
-		mutex_unlock(&root->chain_lock);
-	}
+	mutex_lock(&root->chain_lock);
+	fs_get_obj(prio, ft->node.parent);
+	next_ft = find_next_chained_ft(prio);
+	if (!next_ft) {
+		handle = ERR_PTR(-EOPNOTSUPP);
+		goto unlock;
+	}
+
+	gen_dest = kcalloc(num_dest + 1, sizeof(*dest),
+			   GFP_KERNEL);
+	if (!gen_dest) {
+		handle = ERR_PTR(-ENOMEM);
+		goto unlock;
+	}
+	for (i = 0; i < num_dest; i++)
+		gen_dest[i] = dest[i];
+	gen_dest[i].type =
+		MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+	gen_dest[i].ft = next_ft;
+	dest = gen_dest;
+	num_dest++;
+	flow_act->action &=
+		~MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
+	flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+	handle = _mlx5_add_flow_rules(ft, spec, flow_act, dest, num_dest);
+	if (IS_ERR(handle))
+		goto unlock;
+
+	if (list_empty(&handle->rule[num_dest - 1]->next_ft)) {
+		mutex_lock(&next_ft->lock);
+		list_add(&handle->rule[num_dest - 1]->next_ft,
+			 &next_ft->fwd_rules);
+		mutex_unlock(&next_ft->lock);
+		handle->rule[num_dest - 1]->sw_action =
+			MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
+	}
+unlock:
+	mutex_unlock(&root->chain_lock);
+	kfree(gen_dest);
 	return handle;
 }
 EXPORT_SYMBOL(mlx5_add_flow_rules);
-- 
cgit v1.2.3-59-g8ed1b


From 9254f8ed15b6dcc9b04b9ad32863a7518cc5a5b1 Mon Sep 17 00:00:00 2001
From: Maor Gottlieb <maorg@mellanox.com>
Date: Mon, 4 May 2020 08:30:10 +0300
Subject: net/mlx5: Add support in forward to namespace

Currently, fs_core supports rule of forward the traffic
to continue matching in the next priority, now we add support
to forward the traffic matching in the next namespace.

Signed-off-by: Maor Gottlieb <maorg@mellanox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
Reviewed-by: Mark Zhang <markz@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 57 +++++++++++++++++++----
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.h |  2 +
 include/linux/mlx5/fs.h                           |  1 +
 3 files changed, 51 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 705f433e2590..41aa1fa0c69e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -384,6 +384,12 @@ static struct fs_prio *find_prio(struct mlx5_flow_namespace *ns,
 	return NULL;
 }
 
+static bool is_fwd_next_action(u32 action)
+{
+	return action & (MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO |
+			 MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS);
+}
+
 static bool check_valid_spec(const struct mlx5_flow_spec *spec)
 {
 	int i;
@@ -502,7 +508,7 @@ static void del_sw_hw_rule(struct fs_node *node)
 	fs_get_obj(rule, node);
 	fs_get_obj(fte, rule->node.parent);
 	trace_mlx5_fs_del_rule(rule);
-	if (rule->sw_action == MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO) {
+	if (is_fwd_next_action(rule->sw_action)) {
 		mutex_lock(&rule->dest_attr.ft->lock);
 		list_del(&rule->next_ft);
 		mutex_unlock(&rule->dest_attr.ft->lock);
@@ -826,6 +832,36 @@ static struct mlx5_flow_table *find_prev_chained_ft(struct fs_prio *prio)
 	return find_closest_ft(prio, true);
 }
 
+static struct fs_prio *find_fwd_ns_prio(struct mlx5_flow_root_namespace *root,
+					struct mlx5_flow_namespace *ns)
+{
+	struct mlx5_flow_namespace *root_ns = &root->ns;
+	struct fs_prio *iter_prio;
+	struct fs_prio *prio;
+
+	fs_get_obj(prio, ns->node.parent);
+	list_for_each_entry(iter_prio, &root_ns->node.children, node.list) {
+		if (iter_prio == prio &&
+		    !list_is_last(&prio->node.children, &iter_prio->node.list))
+			return list_next_entry(iter_prio, node.list);
+	}
+	return NULL;
+}
+
+static struct mlx5_flow_table *find_next_fwd_ft(struct mlx5_flow_table *ft,
+						struct mlx5_flow_act *flow_act)
+{
+	struct mlx5_flow_root_namespace *root = find_root(&ft->node);
+	struct fs_prio *prio;
+
+	if (flow_act->action & MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS)
+		prio = find_fwd_ns_prio(root, ft->ns);
+	else
+		fs_get_obj(prio, ft->node.parent);
+
+	return (prio) ? find_next_chained_ft(prio) : NULL;
+}
+
 static int connect_fts_in_prio(struct mlx5_core_dev *dev,
 			       struct fs_prio *prio,
 			       struct mlx5_flow_table *ft)
@@ -976,6 +1012,10 @@ static int connect_fwd_rules(struct mlx5_core_dev *dev,
 	list_splice_init(&old_next_ft->fwd_rules, &new_next_ft->fwd_rules);
 	mutex_unlock(&old_next_ft->lock);
 	list_for_each_entry(iter, &new_next_ft->fwd_rules, next_ft) {
+		if ((iter->sw_action & MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS) &&
+		    iter->ft->ns == new_next_ft->ns)
+			continue;
+
 		err = _mlx5_modify_rule_destination(iter, &dest);
 		if (err)
 			pr_err("mlx5_core: failed to modify rule to point on flow table %d\n",
@@ -1077,6 +1117,7 @@ static struct mlx5_flow_table *__mlx5_create_flow_table(struct mlx5_flow_namespa
 	next_ft = unmanaged ? ft_attr->next_ft :
 			      find_next_chained_ft(fs_prio);
 	ft->def_miss_action = ns->def_miss_action;
+	ft->ns = ns;
 	err = root->cmds->create_flow_table(root, ft, log_table_sz, next_ft);
 	if (err)
 		goto free_ft;
@@ -1903,21 +1944,19 @@ mlx5_add_flow_rules(struct mlx5_flow_table *ft,
 	struct mlx5_flow_table *next_ft = NULL;
 	struct mlx5_flow_handle *handle = NULL;
 	u32 sw_action = flow_act->action;
-	struct fs_prio *prio;
 	int i;
 
 	if (!spec)
 		spec = &zero_spec;
 
-	if (!(sw_action & MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO))
+	if (!is_fwd_next_action(sw_action))
 		return _mlx5_add_flow_rules(ft, spec, flow_act, dest, num_dest);
 
 	if (!fwd_next_prio_supported(ft))
 		return ERR_PTR(-EOPNOTSUPP);
 
 	mutex_lock(&root->chain_lock);
-	fs_get_obj(prio, ft->node.parent);
-	next_ft = find_next_chained_ft(prio);
+	next_ft = find_next_fwd_ft(ft, flow_act);
 	if (!next_ft) {
 		handle = ERR_PTR(-EOPNOTSUPP);
 		goto unlock;
@@ -1936,8 +1975,8 @@ mlx5_add_flow_rules(struct mlx5_flow_table *ft,
 	gen_dest[i].ft = next_ft;
 	dest = gen_dest;
 	num_dest++;
-	flow_act->action &=
-		~MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
+	flow_act->action &= ~(MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO |
+			      MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS);
 	flow_act->action |= MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
 	handle = _mlx5_add_flow_rules(ft, spec, flow_act, dest, num_dest);
 	if (IS_ERR(handle))
@@ -1948,8 +1987,8 @@ mlx5_add_flow_rules(struct mlx5_flow_table *ft,
 		list_add(&handle->rule[num_dest - 1]->next_ft,
 			 &next_ft->fwd_rules);
 		mutex_unlock(&next_ft->lock);
-		handle->rule[num_dest - 1]->sw_action =
-			MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO;
+		handle->rule[num_dest - 1]->sw_action = sw_action;
+		handle->rule[num_dest - 1]->ft = ft;
 	}
 unlock:
 	mutex_unlock(&root->chain_lock);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
index 508108c58dae..825b662f809b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.h
@@ -138,6 +138,7 @@ struct fs_node {
 
 struct mlx5_flow_rule {
 	struct fs_node				node;
+	struct mlx5_flow_table			*ft;
 	struct mlx5_flow_destination		dest_attr;
 	/* next_ft should be accessed under chain_lock and only of
 	 * destination type is FWD_NEXT_fT.
@@ -175,6 +176,7 @@ struct mlx5_flow_table {
 	u32				flags;
 	struct rhltable			fgs_hash;
 	enum mlx5_flow_table_miss_action def_miss_action;
+	struct mlx5_flow_namespace	*ns;
 };
 
 struct mlx5_ft_underlay_qp {
diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h
index e2d13e074067..6c5aa0a21425 100644
--- a/include/linux/mlx5/fs.h
+++ b/include/linux/mlx5/fs.h
@@ -42,6 +42,7 @@ enum {
 	MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_PRIO	= 1 << 16,
 	MLX5_FLOW_CONTEXT_ACTION_ENCRYPT	= 1 << 17,
 	MLX5_FLOW_CONTEXT_ACTION_DECRYPT	= 1 << 18,
+	MLX5_FLOW_CONTEXT_ACTION_FWD_NEXT_NS	= 1 << 19,
 };
 
 enum {
-- 
cgit v1.2.3-59-g8ed1b


From 012f81456818dfb49d2939373b163945da3a4032 Mon Sep 17 00:00:00 2001
From: Pradeep Kumar Chitrapu <pradeepc@codeaurora.org>
Date: Tue, 12 May 2020 10:42:02 +0300
Subject: ath11k: fix htt stats module not handle multiple skbs

HTT EXT stats comes in stream of TLVs spanning over multiple
messages. Currently completion is being sent for each message
which is creating a race where stats_req is being accessed
for filling in second message after the memory is already
freed in release operation. Fix this by issuing completion
once all the messages are received and processed. Driver
knows this info from DONE bit set in htt msg.

Also fix locking required for htt stats.

Co-developed-by: Miles Hu <milehu@codeaurora.org>
Signed-off-by: Miles Hu <milehu@codeaurora.org>
Signed-off-by: Pradeep Kumar Chitrapu <pradeepc@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1589221074-28778-1-git-send-email-pradeepc@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/debug_htt_stats.c | 48 ++++++++++++++++++-----
 drivers/net/wireless/ath/ath11k/dp.h              |  1 +
 2 files changed, 39 insertions(+), 10 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/debug_htt_stats.c b/drivers/net/wireless/ath/ath11k/debug_htt_stats.c
index 5db0c27de475..6b532dc99c98 100644
--- a/drivers/net/wireless/ath/ath11k/debug_htt_stats.c
+++ b/drivers/net/wireless/ath/ath11k/debug_htt_stats.c
@@ -4306,6 +4306,7 @@ void ath11k_dbg_htt_ext_stats_handler(struct ath11k_base *ab,
 	u32 len;
 	u64 cookie;
 	int ret;
+	bool send_completion = false;
 	u8 pdev_id;
 
 	msg = (struct ath11k_htt_extd_stats_msg *)skb->data;
@@ -4330,11 +4331,11 @@ void ath11k_dbg_htt_ext_stats_handler(struct ath11k_base *ab,
 		return;
 
 	spin_lock_bh(&ar->debug.htt_stats.lock);
-	if (stats_req->done) {
-		spin_unlock_bh(&ar->debug.htt_stats.lock);
-		return;
-	}
-	stats_req->done = true;
+
+	stats_req->done = FIELD_GET(HTT_T2H_EXT_STATS_INFO1_DONE, msg->info1);
+	if (stats_req->done)
+		send_completion = true;
+
 	spin_unlock_bh(&ar->debug.htt_stats.lock);
 
 	len = FIELD_GET(HTT_T2H_EXT_STATS_INFO1_LENGTH, msg->info1);
@@ -4344,7 +4345,8 @@ void ath11k_dbg_htt_ext_stats_handler(struct ath11k_base *ab,
 	if (ret)
 		ath11k_warn(ab, "Failed to parse tlv %d\n", ret);
 
-	complete(&stats_req->cmpln);
+	if (send_completion)
+		complete(&stats_req->cmpln);
 }
 
 static ssize_t ath11k_read_htt_stats_type(struct file *file,
@@ -4497,28 +4499,54 @@ static int ath11k_open_htt_stats(struct inode *inode, struct file *file)
 	if (type == ATH11K_DBG_HTT_EXT_STATS_RESET)
 		return -EPERM;
 
+	mutex_lock(&ar->conf_mutex);
+
+	if (ar->state != ATH11K_STATE_ON) {
+		ret = -ENETDOWN;
+		goto err_unlock;
+	}
+
+	if (ar->debug.htt_stats.stats_req) {
+		ret = -EAGAIN;
+		goto err_unlock;
+	}
+
 	stats_req = vzalloc(sizeof(*stats_req) + ATH11K_HTT_STATS_BUF_SIZE);
-	if (!stats_req)
-		return -ENOMEM;
+	if (!stats_req) {
+		ret = -ENOMEM;
+		goto err_unlock;
+	}
 
-	mutex_lock(&ar->conf_mutex);
 	ar->debug.htt_stats.stats_req = stats_req;
 	stats_req->type = type;
+
 	ret = ath11k_dbg_htt_stats_req(ar);
-	mutex_unlock(&ar->conf_mutex);
 	if (ret < 0)
 		goto out;
 
 	file->private_data = stats_req;
+
+	mutex_unlock(&ar->conf_mutex);
+
 	return 0;
 out:
 	vfree(stats_req);
+	ar->debug.htt_stats.stats_req = NULL;
+err_unlock:
+	mutex_unlock(&ar->conf_mutex);
+
 	return ret;
 }
 
 static int ath11k_release_htt_stats(struct inode *inode, struct file *file)
 {
+	struct ath11k *ar = inode->i_private;
+
+	mutex_lock(&ar->conf_mutex);
 	vfree(file->private_data);
+	ar->debug.htt_stats.stats_req = NULL;
+	mutex_unlock(&ar->conf_mutex);
+
 	return 0;
 }
 
diff --git a/drivers/net/wireless/ath/ath11k/dp.h b/drivers/net/wireless/ath/ath11k/dp.h
index 222de10e4b93..058a5c1d86ff 100644
--- a/drivers/net/wireless/ath/ath11k/dp.h
+++ b/drivers/net/wireless/ath/ath11k/dp.h
@@ -1517,6 +1517,7 @@ struct htt_ext_stats_cfg_params {
  *       4 bytes.
  */
 
+#define HTT_T2H_EXT_STATS_INFO1_DONE	BIT(11)
 #define HTT_T2H_EXT_STATS_INFO1_LENGTH   GENMASK(31, 16)
 
 struct ath11k_htt_extd_stats_msg {
-- 
cgit v1.2.3-59-g8ed1b


From ec431188b4197aaccfbc6e6ece4fb88f79723635 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Thu, 7 May 2020 10:17:58 -0500
Subject: wcn36xx: Replace zero-length array with flexible-array

The current codebase makes use of the zero-length array language
extension to the C90 standard, but the preferred mechanism to declare
variable-length types such as these ones is a flexible array member[1][2],
introduced in C99:

struct foo {
        int stuff;
        struct boo array[];
};

By making use of the mechanism above, we will get a compiler warning
in case the flexible array does not occur last in the structure, which
will help us prevent some kind of undefined behavior bugs from being
inadvertently introduced[3] to the codebase from now on.

Also, notice that, dynamic memory allocations won't be affected by
this change:

"Flexible array members have incomplete type, and so the sizeof operator
may not be applied. As a quirk of the original implementation of
zero-length arrays, sizeof evaluates to zero."[1]

sizeof(flexible-array-member) triggers a warning because flexible array
members have incomplete type[1]. There are some instances of code in
which the sizeof operator is being incorrectly/erroneously applied to
zero-length arrays and the result is zero. Such instances may be hiding
some bugs. So, this work (flexible-array member conversions) will also
help to get completely rid of those sorts of issues.

This issue was found with the help of Coccinelle.

[1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html
[2] https://github.com/KSPP/linux/issues/21
[3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour")

Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200507151758.GA4962@embeddedor
---
 drivers/net/wireless/ath/wcn36xx/hal.h      | 4 ++--
 drivers/net/wireless/ath/wcn36xx/testmode.h | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/ath/wcn36xx/hal.h b/drivers/net/wireless/ath/wcn36xx/hal.h
index 6ba0fd57c951..aab5a58616fc 100644
--- a/drivers/net/wireless/ath/wcn36xx/hal.h
+++ b/drivers/net/wireless/ath/wcn36xx/hal.h
@@ -2240,7 +2240,7 @@ struct wcn36xx_hal_process_ptt_msg_req_msg {
 	struct wcn36xx_hal_msg_header header;
 
 	/* Actual FTM Command body */
-	u8 ptt_msg[0];
+	u8 ptt_msg[];
 } __packed;
 
 struct wcn36xx_hal_process_ptt_msg_rsp_msg {
@@ -2249,7 +2249,7 @@ struct wcn36xx_hal_process_ptt_msg_rsp_msg {
 	/* FTM Command response status */
 	u32 ptt_msg_resp_status;
 	/* Actual FTM Command body */
-	u8 ptt_msg[0];
+	u8 ptt_msg[];
 } __packed;
 
 struct update_edca_params_req_msg {
diff --git a/drivers/net/wireless/ath/wcn36xx/testmode.h b/drivers/net/wireless/ath/wcn36xx/testmode.h
index 4c6cfdb46580..09d68fab9add 100644
--- a/drivers/net/wireless/ath/wcn36xx/testmode.h
+++ b/drivers/net/wireless/ath/wcn36xx/testmode.h
@@ -20,7 +20,7 @@ struct ftm_rsp_msg {
 	u16 msg_id;
 	u16 msg_body_length;
 	u32 resp_status;
-	u8 msg_response[0];
+	u8 msg_response[];
 } __packed;
 
 /* The request buffer of FTM which contains a byte of command and the request */
-- 
cgit v1.2.3-59-g8ed1b


From f8d6379932dd08263d3131ab64427b134ea9e035 Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Tue, 12 May 2020 21:39:13 +0200
Subject: mt76: mt7663: fix the usage WoW with net detect support

mt7615_mcu_sched_scan_enable should be taken along with
mt7615_mcu_sched_scan_req to have proper scan plans initialization.

Fixes: bd39bd2f00c3 ("mt76: mt7663: introduce WoW with net detect support")
Co-developed-by: Wan-Feng Jiang <Wan-Feng.Jiang@mediatek.com>
Signed-off-by: Wan-Feng Jiang <Wan-Feng.Jiang@mediatek.com>
Co-developed-by: Soul Huang <Soul.Huang@mediatek.com>
Signed-off-by: Soul Huang <Soul.Huang@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 96bf39a4a3da..ef50b9a885f4 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -3351,8 +3351,8 @@ mt7615_mcu_set_wow_ctrl(struct mt7615_phy *phy, struct ieee80211_vif *vif,
 	if (wowlan->nd_config) {
 		mt7615_mcu_sched_scan_req(phy, vif, wowlan->nd_config);
 		req.wow_ctrl_tlv.trigger |= BIT(5);
+		mt7615_mcu_sched_scan_enable(phy, vif, suspend);
 	}
-	mt7615_mcu_sched_scan_enable(phy, vif, suspend);
 
 	if (mt76_is_mmio(&dev->mt76))
 		req.wow_ctrl_tlv.wakeup_hif = 2;
-- 
cgit v1.2.3-59-g8ed1b


From 7cba8c30aa7baf79650dec6b7713b985be84c5f7 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Tue, 12 May 2020 22:02:47 +0200
Subject: mt76: mt7615: do not report scan_complete twice to mac80211

Fix the following kernel warning that occurs if scan work is scheduled
and the hw scan has been cancelled by mac80211

WARNING: CPU: 1 PID: 502 at net/mac80211/scan.c:391 __ieee80211_scan_completed+0x299/0x690
Modules linked in:
CPU: 1 PID: 502 Comm: kworker/u4:4 Not tainted 5.7.0-rc1+ #2882
Hardware name: QEMU Standard PC (Q35 + ICH9, 2009), BIOS 1.13.0-2.fc32 04/01/2014
Workqueue: phy0 ieee80211_scan_work
RIP: 0010:__ieee80211_scan_completed+0x299/0x690
RSP: 0018:ffffc9000036fda8 EFLAGS: 00010246
RAX: 0000000000000000 RBX: 0000000000000000 RCX: 8c6318c6318c6320
RDX: 0000000000000000 RSI: 00000000ffffffff RDI: ffff88803bb129b8
RBP: ffff88803bb10d80 R08: 0000000000000001 R09: 0000000000000001
R10: 0000000000000001 R11: 0000000000000000 R12: 0000000000000000
R13: 0000000000000000 R14: 0000000000000000 R15: ffff88803bb12ae8
FS:  0000000000000000(0000) GS:ffff88803ec00000(0000) knlGS:0000000000000000
CS:  0010 DS: 0000 ES: 0000 CR0: 0000000080050033
CR2: 00007f13ee5c1470 CR3: 000000003c790000 CR4: 00000000000006a0
Call Trace:
 ieee80211_scan_work+0x170/0x850
 ? sched_clock_cpu+0x11/0xb0
 process_one_work+0x24f/0x580
 ? worker_thread+0xcc/0x3e0
 worker_thread+0x4f/0x3e0
 ? process_one_work+0x580/0x580
 kthread+0x11b/0x140
 ? __kthread_bind_mask+0x60/0x60
 ret_from_fork+0x3a/0x50
irq event stamp: 9255532
hardirqs last  enabled at (9255531): [<ffffffff81a504e4>] _raw_spin_unlock_irq+0x24/0x30
hardirqs last disabled at (9255532): [<ffffffff81001b3f>] trace_hardirqs_off_thunk+0x1a/0x1c
softirqs last  enabled at (9255526): [<ffffffff8107c9ef>] process_one_work+0x24f/0x580
softirqs last disabled at (9255524): [<ffffffff815f2f41>] mt7615_scan_work+0x91/0xc0

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/main.c |  8 ++++----
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c  | 12 +++++++-----
 2 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
index 402ff38f7dcf..c8705f91bafc 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
@@ -810,15 +810,15 @@ void mt7615_scan_work(struct work_struct *work)
 			break;
 
 		rxd = (struct mt7615_mcu_rxd *)skb->data;
-		if (rxd->eid == MCU_EVENT_SCAN_DONE) {
+		if (rxd->eid == MCU_EVENT_SCHED_SCAN_DONE) {
+			ieee80211_sched_scan_results(phy->mt76->hw);
+		} else if (test_and_clear_bit(MT76_HW_SCANNING,
+					      &phy->mt76->state)) {
 			struct cfg80211_scan_info info = {
 				.aborted = false,
 			};
 
-			clear_bit(MT76_HW_SCANNING, &phy->mt76->state);
 			ieee80211_scan_completed(phy->mt76->hw, &info);
-		} else {
-			ieee80211_sched_scan_results(phy->mt76->hw);
 		}
 		dev_kfree_skb(skb);
 	}
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index ef50b9a885f4..9cb8a9bb912b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -2861,9 +2861,6 @@ int mt7615_mcu_cancel_hw_scan(struct mt7615_phy *phy,
 {
 	struct mt7615_vif *mvif = (struct mt7615_vif *)vif->drv_priv;
 	struct mt7615_dev *dev = phy->dev;
-	struct cfg80211_scan_info info = {
-		.aborted = true,
-	};
 	struct {
 		u8 seq_num;
 		u8 is_ext_channel;
@@ -2872,8 +2869,13 @@ int mt7615_mcu_cancel_hw_scan(struct mt7615_phy *phy,
 		.seq_num = mvif->scan_seq_num,
 	};
 
-	ieee80211_scan_completed(phy->mt76->hw, &info);
-	clear_bit(MT76_HW_SCANNING, &phy->mt76->state);
+	if (test_and_clear_bit(MT76_HW_SCANNING, &phy->mt76->state)) {
+		struct cfg80211_scan_info info = {
+			.aborted = true,
+		};
+
+		ieee80211_scan_completed(phy->mt76->hw, &info);
+	}
 
 	return __mt76_mcu_send_msg(&dev->mt76,  MCU_CMD_CANCEL_HW_SCAN, &req,
 				   sizeof(req), false);
-- 
cgit v1.2.3-59-g8ed1b


From 8f3dab33da2ca4f7f72ac49281b9f2ac390d0824 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Tue, 12 May 2020 22:02:48 +0200
Subject: mt76: mt7615: reduce hw scan timeout

Differentiate hw scan channel time between passive and active scan

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 9cb8a9bb912b..e9ae7e94d8fd 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -2773,7 +2773,7 @@ int mt7615_mcu_set_channel_domain(struct mt7615_phy *phy)
 				       MCU_CMD_SET_CHAN_DOMAIN, false);
 }
 
-#define MT7615_SCAN_CHANNEL_TIME	120
+#define MT7615_SCAN_CHANNEL_TIME	60
 int mt7615_mcu_hw_scan(struct mt7615_phy *phy, struct ieee80211_vif *vif,
 		       struct ieee80211_scan_request *scan_req)
 {
@@ -2819,6 +2819,9 @@ int mt7615_mcu_hw_scan(struct mt7615_phy *phy, struct ieee80211_vif *vif,
 	req->ssid_type = n_ssids ? BIT(2) : BIT(0);
 	req->ssids_num = n_ssids;
 
+	/* increase channel time for passive scan */
+	if (!sreq->n_ssids)
+		duration *= 2;
 	req->timeout_value = cpu_to_le16(sreq->n_channels * duration);
 	req->channel_min_dwell_time = cpu_to_le16(duration);
 	req->channel_dwell_time = cpu_to_le16(duration);
-- 
cgit v1.2.3-59-g8ed1b


From 50eb0a884a4cbb158369cf3128b98a97c8f431a0 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Tue, 12 May 2020 22:02:49 +0200
Subject: mt76: enable p2p support

Introduce p2p-go/p2p-client suppor to mt76 driver

Co-developed-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mac80211.c      |  2 +
 drivers/net/wireless/mediatek/mt76/mt7603/init.c   |  2 +
 drivers/net/wireless/mediatek/mt76/mt7615/init.c   |  6 ++-
 drivers/net/wireless/mediatek/mt76/mt7615/main.c   |  3 ++
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c    | 61 +++++++++++++++++++---
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.h    |  1 +
 drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h |  2 +
 drivers/net/wireless/mediatek/mt76/mt76x02_util.c  |  4 ++
 8 files changed, 72 insertions(+), 9 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mac80211.c b/drivers/net/wireless/mediatek/mt76/mac80211.c
index 21407704f1b3..907098101898 100644
--- a/drivers/net/wireless/mediatek/mt76/mac80211.c
+++ b/drivers/net/wireless/mediatek/mt76/mac80211.c
@@ -313,6 +313,8 @@ mt76_phy_init(struct mt76_dev *dev, struct ieee80211_hw *hw)
 #ifdef CONFIG_MAC80211_MESH
 		BIT(NL80211_IFTYPE_MESH_POINT) |
 #endif
+		BIT(NL80211_IFTYPE_P2P_CLIENT) |
+		BIT(NL80211_IFTYPE_P2P_GO) |
 		BIT(NL80211_IFTYPE_ADHOC);
 }
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/init.c b/drivers/net/wireless/mediatek/mt76/mt7603/init.c
index f641a8b56b39..94196599797e 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/init.c
@@ -342,6 +342,8 @@ static const struct ieee80211_iface_limit if_limits[] = {
 #ifdef CONFIG_MAC80211_MESH
 			 BIT(NL80211_IFTYPE_MESH_POINT) |
 #endif
+			 BIT(NL80211_IFTYPE_P2P_CLIENT) |
+			 BIT(NL80211_IFTYPE_P2P_GO) |
 			 BIT(NL80211_IFTYPE_AP)
 	 },
 };
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
index 6e1a17f08f5e..b5bbe9f5f7dd 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
@@ -140,7 +140,9 @@ void mt7615_check_offload_capability(struct mt7615_dev *dev)
 		ieee80211_hw_set(hw, SUPPORTS_DYNAMIC_PS);
 
 		wiphy->features |= NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR |
-				   NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR;
+				   NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR |
+				   NL80211_FEATURE_P2P_GO_CTWIN |
+				   NL80211_FEATURE_P2P_GO_OPPPS;
 	} else {
 		dev->ops->hw_scan = NULL;
 		dev->ops->cancel_hw_scan = NULL;
@@ -205,6 +207,8 @@ static const struct ieee80211_iface_limit if_limits[] = {
 #ifdef CONFIG_MAC80211_MESH
 			 BIT(NL80211_IFTYPE_MESH_POINT) |
 #endif
+			 BIT(NL80211_IFTYPE_P2P_CLIENT) |
+			 BIT(NL80211_IFTYPE_P2P_GO) |
 			 BIT(NL80211_IFTYPE_STATION)
 	}
 };
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
index c8705f91bafc..e87a5843c718 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
@@ -503,6 +503,9 @@ static void mt7615_bss_info_changed(struct ieee80211_hw *hw,
 	if (changed & BSS_CHANGED_BEACON_ENABLED) {
 		mt7615_mcu_add_bss_info(phy, vif, NULL, info->enable_beacon);
 		mt7615_mcu_sta_add(dev, vif, NULL, info->enable_beacon);
+
+		if (vif->p2p && info->enable_beacon)
+			mt7615_mcu_set_p2p_oppps(hw, vif);
 	}
 
 	if (changed & (BSS_CHANGED_BEACON |
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index e9ae7e94d8fd..92ea9dc3c1c6 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -695,9 +695,9 @@ mt7615_mcu_bss_basic_tlv(struct sk_buff *skb, struct ieee80211_vif *vif,
 			 struct ieee80211_sta *sta, bool enable)
 {
 	struct mt7615_vif *mvif = (struct mt7615_vif *)vif->drv_priv;
+	u32 type = vif->p2p ? NETWORK_P2P : NETWORK_INFRA;
 	struct bss_info_basic *bss;
 	u8 wlan_idx = mvif->sta.wcid.idx;
-	u32 type = NETWORK_INFRA;
 	struct tlv *tlv;
 
 	tlv = mt7615_mcu_add_tlv(skb, BSS_INFO_BASIC, sizeof(*bss));
@@ -749,10 +749,16 @@ mt7615_mcu_bss_omac_tlv(struct sk_buff *skb, struct ieee80211_vif *vif)
 	switch (vif->type) {
 	case NL80211_IFTYPE_MESH_POINT:
 	case NL80211_IFTYPE_AP:
-		type = CONNECTION_INFRA_AP;
+		if (vif->p2p)
+			type = CONNECTION_P2P_GO;
+		else
+			type = CONNECTION_INFRA_AP;
 		break;
 	case NL80211_IFTYPE_STATION:
-		type = CONNECTION_INFRA_STA;
+		if (vif->p2p)
+			type = CONNECTION_P2P_GC;
+		else
+			type = CONNECTION_INFRA_STA;
 		break;
 	case NL80211_IFTYPE_ADHOC:
 		type = CONNECTION_IBSS_ADHOC;
@@ -815,6 +821,7 @@ mt7615_mcu_sta_basic_tlv(struct sk_buff *skb, struct ieee80211_vif *vif,
 {
 	struct sta_rec_basic *basic;
 	struct tlv *tlv;
+	int conn_type;
 
 	tlv = mt7615_mcu_add_tlv(skb, STA_REC_BASIC, sizeof(*basic));
 
@@ -837,11 +844,19 @@ mt7615_mcu_sta_basic_tlv(struct sk_buff *skb, struct ieee80211_vif *vif,
 	switch (vif->type) {
 	case NL80211_IFTYPE_MESH_POINT:
 	case NL80211_IFTYPE_AP:
-		basic->conn_type = cpu_to_le32(CONNECTION_INFRA_STA);
+		if (vif->p2p)
+			conn_type = CONNECTION_P2P_GC;
+		else
+			conn_type = CONNECTION_INFRA_STA;
+		basic->conn_type = cpu_to_le32(conn_type);
 		basic->aid = cpu_to_le16(sta->aid);
 		break;
 	case NL80211_IFTYPE_STATION:
-		basic->conn_type = cpu_to_le32(CONNECTION_INFRA_AP);
+		if (vif->p2p)
+			conn_type = CONNECTION_P2P_GO;
+		else
+			conn_type = CONNECTION_INFRA_AP;
+		basic->conn_type = cpu_to_le32(conn_type);
 		basic->aid = cpu_to_le16(vif->bss_conf.aid);
 		break;
 	case NL80211_IFTYPE_ADHOC:
@@ -1403,7 +1418,7 @@ mt7615_mcu_uni_add_bss(struct mt7615_phy *phy, struct ieee80211_vif *vif,
 			.short_st = true,
 		},
 	};
-	int err;
+	int err, conn_type;
 	u8 idx;
 
 	idx = mvif->omac_idx > EXT_BSSID_START ? HW_BSSID_0 : mvif->omac_idx;
@@ -1412,10 +1427,18 @@ mt7615_mcu_uni_add_bss(struct mt7615_phy *phy, struct ieee80211_vif *vif,
 	switch (vif->type) {
 	case NL80211_IFTYPE_MESH_POINT:
 	case NL80211_IFTYPE_AP:
-		basic_req.basic.conn_type = cpu_to_le32(CONNECTION_INFRA_AP);
+		if (vif->p2p)
+			conn_type = CONNECTION_P2P_GO;
+		else
+			conn_type = CONNECTION_INFRA_AP;
+		basic_req.basic.conn_type = cpu_to_le32(conn_type);
 		break;
 	case NL80211_IFTYPE_STATION:
-		basic_req.basic.conn_type = cpu_to_le32(CONNECTION_INFRA_STA);
+		if (vif->p2p)
+			conn_type = CONNECTION_P2P_GC;
+		else
+			conn_type = CONNECTION_INFRA_STA;
+		basic_req.basic.conn_type = cpu_to_le32(conn_type);
 		break;
 	case NL80211_IFTYPE_ADHOC:
 		basic_req.basic.conn_type = cpu_to_le32(CONNECTION_IBSS_ADHOC);
@@ -3550,3 +3573,25 @@ int mt7615_mcu_update_gtk_rekey(struct ieee80211_hw *hw,
 				       MCU_UNI_CMD_OFFLOAD, true);
 }
 #endif /* CONFIG_PM */
+
+int mt7615_mcu_set_p2p_oppps(struct ieee80211_hw *hw,
+			     struct ieee80211_vif *vif)
+{
+	struct mt7615_vif *mvif = (struct mt7615_vif *)vif->drv_priv;
+	int ct_window = vif->bss_conf.p2p_noa_attr.oppps_ctwindow;
+	struct mt7615_dev *dev = mt7615_hw_dev(hw);
+	struct {
+		__le32 ct_win;
+		u8 bss_idx;
+		u8 rsv[3];
+	} __packed req = {
+		.ct_win = cpu_to_le32(ct_window),
+		.bss_idx = mvif->idx,
+	};
+
+	if (!mt7615_firmware_offload(dev))
+		return -ENOTSUPP;
+
+	return __mt76_mcu_send_msg(&dev->mt76, MCU_CMD_SET_P2P_OPPPS,
+				   &req, sizeof(req), false);
+}
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
index 0f12e6da89af..338fd077f575 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
@@ -507,6 +507,7 @@ enum {
 	MCU_CMD_SET_BSS_CONNECTED = MCU_CE_PREFIX | 0x16,
 	MCU_CMD_SET_BSS_ABORT = MCU_CE_PREFIX | 0x17,
 	MCU_CMD_CANCEL_HW_SCAN = MCU_CE_PREFIX | 0x1b,
+	MCU_CMD_SET_P2P_OPPPS = MCU_CE_PREFIX | 0x33,
 	MCU_CMD_SCHED_SCAN_ENABLE = MCU_CE_PREFIX | 0x61,
 	MCU_CMD_SCHED_SCAN_REQ = MCU_CE_PREFIX | 0x62,
 };
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index dc60abb0a130..ebdfca64b079 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -530,6 +530,8 @@ int mt7615_mcu_apply_tx_dpd(struct mt7615_phy *phy);
 void m7615_mcu_set_ps_iter(void *priv, u8 *mac, struct ieee80211_vif *vif);
 int mt7615_dfs_init_radar_detector(struct mt7615_phy *phy);
 
+int mt7615_mcu_set_p2p_oppps(struct ieee80211_hw *hw,
+			     struct ieee80211_vif *vif);
 int mt7615_firmware_own(struct mt7615_dev *dev);
 int mt7615_driver_own(struct mt7615_dev *dev);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c
index b7a120b0856d..9a2c9afa2fb5 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c
@@ -46,6 +46,8 @@ static const struct ieee80211_iface_limit mt76x02_if_limits[] = {
 #ifdef CONFIG_MAC80211_MESH
 			 BIT(NL80211_IFTYPE_MESH_POINT) |
 #endif
+			 BIT(NL80211_IFTYPE_P2P_CLIENT) |
+			 BIT(NL80211_IFTYPE_P2P_GO) |
 			 BIT(NL80211_IFTYPE_AP)
 	 },
 };
@@ -60,6 +62,8 @@ static const struct ieee80211_iface_limit mt76x02u_if_limits[] = {
 #ifdef CONFIG_MAC80211_MESH
 			 BIT(NL80211_IFTYPE_MESH_POINT) |
 #endif
+			 BIT(NL80211_IFTYPE_P2P_CLIENT) |
+			 BIT(NL80211_IFTYPE_P2P_GO) |
 			 BIT(NL80211_IFTYPE_AP)
 	},
 };
-- 
cgit v1.2.3-59-g8ed1b


From 7a20388ed848a9e82e878d6847e0e0021be64985 Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Tue, 12 May 2020 22:02:50 +0200
Subject: mt76: mt7615: configure bss info adding the interface

Introduce essential bss information for multiple vifs hw_scan in
mt7615_mcu_uni_add_dev adding the interface

Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Co-developed-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/main.c |  5 +-
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c  | 97 +++++++++++++++---------
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.h  | 26 +++++++
 3 files changed, 89 insertions(+), 39 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
index e87a5843c718..2e9e9d3519d7 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
@@ -156,10 +156,6 @@ static int mt7615_add_interface(struct ieee80211_hw *hw,
 	else
 		mvif->wmm_idx = mvif->idx % MT7615_MAX_WMM_SETS;
 
-	ret = mt7615_mcu_add_dev_info(dev, vif, true);
-	if (ret)
-		goto out;
-
 	dev->vif_mask |= BIT(mvif->idx);
 	dev->omac_mask |= BIT(mvif->omac_idx);
 	phy->omac_mask |= BIT(mvif->omac_idx);
@@ -182,6 +178,7 @@ static int mt7615_add_interface(struct ieee80211_hw *hw,
 		mt76_txq_init(&dev->mt76, vif->txq);
 	}
 
+	ret = mt7615_mcu_add_dev_info(dev, vif, true);
 out:
 	mutex_unlock(&dev->mt76.mutex);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 92ea9dc3c1c6..b944f372738a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -1294,7 +1294,7 @@ mt7615_mcu_uni_add_dev(struct mt7615_dev *dev,
 {
 	struct mt7615_vif *mvif = (struct mt7615_vif *)vif->drv_priv;
 	struct {
-		struct req_hdr {
+		struct {
 			u8 omac_idx;
 			u8 band_idx;
 			__le16 pad;
@@ -1306,7 +1306,7 @@ mt7615_mcu_uni_add_dev(struct mt7615_dev *dev,
 			u8 pad;
 			u8 omac_addr[ETH_ALEN];
 		} __packed tlv;
-	} data = {
+	} dev_req = {
 		.hdr = {
 			.omac_idx = mvif->omac_idx,
 			.band_idx = mvif->band_idx,
@@ -1317,11 +1317,65 @@ mt7615_mcu_uni_add_dev(struct mt7615_dev *dev,
 			.active = enable,
 		},
 	};
+	struct {
+		struct {
+			u8 bss_idx;
+			u8 pad[3];
+		} __packed hdr;
+		struct mt7615_bss_basic_tlv basic;
+	} basic_req = {
+		.hdr = {
+			.bss_idx = mvif->idx,
+		},
+		.basic = {
+			.tag = cpu_to_le16(UNI_BSS_INFO_BASIC),
+			.len = cpu_to_le16(sizeof(struct mt7615_bss_basic_tlv)),
+			.omac_idx = mvif->omac_idx,
+			.band_idx = mvif->band_idx,
+			.wmm_idx = mvif->wmm_idx,
+			.active = enable,
+			.bmc_tx_wlan_idx = cpu_to_le16(mvif->sta.wcid.idx),
+			.sta_idx = cpu_to_le16(mvif->sta.wcid.idx),
+			.conn_state = 1,
+		},
+	};
+	int err, idx, cmd, len;
+	void *data;
 
-	memcpy(data.tlv.omac_addr, vif->addr, ETH_ALEN);
+	switch (vif->type) {
+	case NL80211_IFTYPE_MESH_POINT:
+	case NL80211_IFTYPE_AP:
+		basic_req.basic.conn_type = cpu_to_le32(CONNECTION_INFRA_AP);
+		break;
+	case NL80211_IFTYPE_STATION:
+		basic_req.basic.conn_type = cpu_to_le32(CONNECTION_INFRA_STA);
+		break;
+	case NL80211_IFTYPE_ADHOC:
+		basic_req.basic.conn_type = cpu_to_le32(CONNECTION_IBSS_ADHOC);
+		break;
+	default:
+		WARN_ON(1);
+		break;
+	}
 
-	return __mt76_mcu_send_msg(&dev->mt76, MCU_UNI_CMD_DEV_INFO_UPDATE,
-				   &data, sizeof(data), true);
+	idx = mvif->omac_idx > EXT_BSSID_START ? HW_BSSID_0 : mvif->omac_idx;
+	basic_req.basic.hw_bss_idx = idx;
+
+	memcpy(dev_req.tlv.omac_addr, vif->addr, ETH_ALEN);
+
+	cmd = enable ? MCU_UNI_CMD_DEV_INFO_UPDATE : MCU_UNI_CMD_BSS_INFO_UPDATE;
+	data = enable ? (void *)&dev_req : (void *)&basic_req;
+	len = enable ? sizeof(dev_req) : sizeof(basic_req);
+
+	err = __mt76_mcu_send_msg(&dev->mt76, cmd, data, len, true);
+	if (err < 0)
+		return err;
+
+	cmd = enable ? MCU_UNI_CMD_BSS_INFO_UPDATE : MCU_UNI_CMD_DEV_INFO_UPDATE;
+	data = enable ? (void *)&basic_req : (void *)&dev_req;
+	len = enable ? sizeof(basic_req) : sizeof(dev_req);
+
+	return __mt76_mcu_send_msg(&dev->mt76, cmd, data, len, true);
 }
 
 static int
@@ -1343,44 +1397,20 @@ mt7615_mcu_uni_add_bss(struct mt7615_phy *phy, struct ieee80211_vif *vif,
 			u8 bss_idx;
 			u8 pad[3];
 		} __packed hdr;
-		struct basic_tlv {
-			__le16 tag;
-			__le16 len;
-			u8 active;
-			u8 omac_idx;
-			u8 hw_bss_idx;
-			u8 band_idx;
-			__le32 conn_type;
-			u8 conn_state;
-			u8 wmm_idx;
-			u8 bssid[ETH_ALEN];
-			__le16 bmc_tx_wlan_idx;
-			__le16 bcn_interval;
-			u8 dtim_period;
-			u8 phymode; /* bit(0): A
-				     * bit(1): B
-				     * bit(2): G
-				     * bit(3): GN
-				     * bit(4): AN
-				     * bit(5): AC
-				     */
-			__le16 sta_idx;
-			u8 nonht_basic_phy;
-			u8 pad[3];
-		} __packed basic;
+		struct mt7615_bss_basic_tlv basic;
 	} basic_req = {
 		.hdr = {
 			.bss_idx = mvif->idx,
 		},
 		.basic = {
 			.tag = cpu_to_le16(UNI_BSS_INFO_BASIC),
-			.len = cpu_to_le16(sizeof(struct basic_tlv)),
+			.len = cpu_to_le16(sizeof(struct mt7615_bss_basic_tlv)),
 			.bcn_interval = cpu_to_le16(vif->bss_conf.beacon_int),
 			.dtim_period = vif->bss_conf.dtim_period,
 			.omac_idx = mvif->omac_idx,
 			.band_idx = mvif->band_idx,
 			.wmm_idx = mvif->wmm_idx,
-			.active = enable,
+			.active = true, /* keep bss deactivated */
 			.phymode = 0x38,
 		},
 	};
@@ -1458,9 +1488,6 @@ mt7615_mcu_uni_add_bss(struct mt7615_phy *phy, struct ieee80211_vif *vif,
 	if (err < 0)
 		return err;
 
-	if (!mt7615_firmware_offload(dev))
-		return 0;
-
 	switch (chandef->width) {
 	case NL80211_CHAN_WIDTH_40:
 		rlm_req.rlm.bw = CMD_CBW_40MHZ;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
index 338fd077f575..348521b0d44c 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
@@ -425,6 +425,32 @@ struct mt7615_mcu_bss_event {
 	u8 pad;
 } __packed;
 
+struct mt7615_bss_basic_tlv {
+	__le16 tag;
+	__le16 len;
+	u8 active;
+	u8 omac_idx;
+	u8 hw_bss_idx;
+	u8 band_idx;
+	__le32 conn_type;
+	u8 conn_state;
+	u8 wmm_idx;
+	u8 bssid[ETH_ALEN];
+	__le16 bmc_tx_wlan_idx;
+	__le16 bcn_interval;
+	u8 dtim_period;
+	u8 phymode; /* bit(0): A
+		     * bit(1): B
+		     * bit(2): G
+		     * bit(3): GN
+		     * bit(4): AN
+		     * bit(5): AC
+		     */
+	__le16 sta_idx;
+	u8 nonht_basic_phy;
+	u8 pad[3];
+} __packed;
+
 struct mt7615_wow_ctrl_tlv {
 	__le16 tag;
 	__le16 len;
-- 
cgit v1.2.3-59-g8ed1b


From bd2b3161dba88ea11c99ce957cc52940905500b9 Mon Sep 17 00:00:00 2001
From: Xiaoliang Yang <xiaoliang.yang_1@nxp.com>
Date: Wed, 13 May 2020 10:25:08 +0800
Subject: net: dsa: felix: qos classified based on pcp

Set the default QoS Classification based on PCP and DEI of vlan tag,
after that, frames can be Classified to different Qos based on PCP tag.
If there is no vlan tag or vlan ignored, use port default Qos.

Signed-off-by: Xiaoliang Yang <xiaoliang.yang_1@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/ocelot/felix.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c
index a2dfd73f8a1a..58d6b0f454e5 100644
--- a/drivers/net/dsa/ocelot/felix.c
+++ b/drivers/net/dsa/ocelot/felix.c
@@ -289,6 +289,27 @@ static void felix_phylink_mac_link_up(struct dsa_switch *ds, int port,
 			 QSYS_SWITCH_PORT_MODE, port);
 }
 
+static void felix_port_qos_map_init(struct ocelot *ocelot, int port)
+{
+	int i;
+
+	ocelot_rmw_gix(ocelot,
+		       ANA_PORT_QOS_CFG_QOS_PCP_ENA,
+		       ANA_PORT_QOS_CFG_QOS_PCP_ENA,
+		       ANA_PORT_QOS_CFG,
+		       port);
+
+	for (i = 0; i < FELIX_NUM_TC * 2; i++) {
+		ocelot_rmw_ix(ocelot,
+			      (ANA_PORT_PCP_DEI_MAP_DP_PCP_DEI_VAL & i) |
+			      ANA_PORT_PCP_DEI_MAP_QOS_PCP_DEI_VAL(i),
+			      ANA_PORT_PCP_DEI_MAP_DP_PCP_DEI_VAL |
+			      ANA_PORT_PCP_DEI_MAP_QOS_PCP_DEI_VAL_M,
+			      ANA_PORT_PCP_DEI_MAP,
+			      port, i);
+	}
+}
+
 static void felix_get_strings(struct dsa_switch *ds, int port,
 			      u32 stringset, u8 *data)
 {
@@ -547,6 +568,11 @@ static int felix_setup(struct dsa_switch *ds)
 			ocelot_configure_cpu(ocelot, port,
 					     OCELOT_TAG_PREFIX_NONE,
 					     OCELOT_TAG_PREFIX_LONG);
+
+		/* Set the default QoS Classification based on PCP and DEI
+		 * bits of vlan tag.
+		 */
+		felix_port_qos_map_init(ocelot, port);
 	}
 
 	/* Include the CPU port module in the forwarding mask for unknown
-- 
cgit v1.2.3-59-g8ed1b


From de143c0e274b95ba0513acf8e60b3b87d24335fa Mon Sep 17 00:00:00 2001
From: Xiaoliang Yang <xiaoliang.yang_1@nxp.com>
Date: Wed, 13 May 2020 10:25:09 +0800
Subject: net: dsa: felix: Configure Time-Aware Scheduler via taprio offload

Ocelot VSC9959 switch supports time-based egress shaping in hardware
according to IEEE 802.1Qbv. This patch add support for TAS configuration
on egress port of VSC9959 switch.

Felix driver is an instance of Ocelot family, with a DSA front-end. The
patch uses tc taprio hardware offload to setup TAS set function on felix
driver.

Signed-off-by: Xiaoliang Yang <xiaoliang.yang_1@nxp.com>
Reviewed-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/ocelot/felix.c         |  19 +++++
 drivers/net/dsa/ocelot/felix.h         |   5 ++
 drivers/net/dsa/ocelot/felix_vsc9959.c | 140 +++++++++++++++++++++++++++++++++
 3 files changed, 164 insertions(+)

diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c
index 58d6b0f454e5..d2b114c96952 100644
--- a/drivers/net/dsa/ocelot/felix.c
+++ b/drivers/net/dsa/ocelot/felix.c
@@ -237,6 +237,10 @@ static void felix_phylink_mac_config(struct dsa_switch *ds, int port,
 
 	if (felix->info->pcs_init)
 		felix->info->pcs_init(ocelot, port, link_an_mode, state);
+
+	if (felix->info->port_sched_speed_set)
+		felix->info->port_sched_speed_set(ocelot, port,
+						  state->speed);
 }
 
 static void felix_phylink_mac_an_restart(struct dsa_switch *ds, int port)
@@ -730,6 +734,19 @@ static void felix_port_policer_del(struct dsa_switch *ds, int port)
 	ocelot_port_policer_del(ocelot, port);
 }
 
+static int felix_port_setup_tc(struct dsa_switch *ds, int port,
+			       enum tc_setup_type type,
+			       void *type_data)
+{
+	struct ocelot *ocelot = ds->priv;
+	struct felix *felix = ocelot_to_felix(ocelot);
+
+	if (felix->info->port_setup_tc)
+		return felix->info->port_setup_tc(ds, port, type, type_data);
+	else
+		return -EOPNOTSUPP;
+}
+
 static const struct dsa_switch_ops felix_switch_ops = {
 	.get_tag_protocol	= felix_get_tag_protocol,
 	.setup			= felix_setup,
@@ -768,6 +785,7 @@ static const struct dsa_switch_ops felix_switch_ops = {
 	.cls_flower_add		= felix_cls_flower_add,
 	.cls_flower_del		= felix_cls_flower_del,
 	.cls_flower_stats	= felix_cls_flower_stats,
+	.port_setup_tc          = felix_port_setup_tc,
 };
 
 static struct felix_info *felix_instance_tbl[] = {
@@ -856,6 +874,7 @@ static int felix_pci_probe(struct pci_dev *pdev,
 
 	ds->dev = &pdev->dev;
 	ds->num_ports = felix->info->num_ports;
+	ds->num_tx_queues = felix->info->num_tx_queues;
 	ds->ops = &felix_switch_ops;
 	ds->priv = ocelot;
 	felix->ds = ds;
diff --git a/drivers/net/dsa/ocelot/felix.h b/drivers/net/dsa/ocelot/felix.h
index b94386fa8d63..352f7b940af7 100644
--- a/drivers/net/dsa/ocelot/felix.h
+++ b/drivers/net/dsa/ocelot/felix.h
@@ -20,6 +20,7 @@ struct felix_info {
 	const struct ocelot_stat_layout	*stats_layout;
 	unsigned int			num_stats;
 	int				num_ports;
+	int                             num_tx_queues;
 	struct vcap_field		*vcap_is2_keys;
 	struct vcap_field		*vcap_is2_actions;
 	const struct vcap_props		*vcap;
@@ -35,6 +36,10 @@ struct felix_info {
 				  struct phylink_link_state *state);
 	int	(*prevalidate_phy_mode)(struct ocelot *ocelot, int port,
 					phy_interface_t phy_mode);
+	int	(*port_setup_tc)(struct dsa_switch *ds, int port,
+				 enum tc_setup_type type, void *type_data);
+	void	(*port_sched_speed_set)(struct ocelot *ocelot, int port,
+					u32 speed);
 };
 
 extern struct felix_info		felix_info_vsc9959;
diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c
index 1c56568d5aca..efdcc547e0c9 100644
--- a/drivers/net/dsa/ocelot/felix_vsc9959.c
+++ b/drivers/net/dsa/ocelot/felix_vsc9959.c
@@ -3,9 +3,12 @@
  * Copyright 2018-2019 NXP Semiconductors
  */
 #include <linux/fsl/enetc_mdio.h>
+#include <soc/mscc/ocelot_qsys.h>
 #include <soc/mscc/ocelot_vcap.h>
+#include <soc/mscc/ocelot_ptp.h>
 #include <soc/mscc/ocelot_sys.h>
 #include <soc/mscc/ocelot.h>
+#include <net/pkt_sched.h>
 #include <linux/iopoll.h>
 #include <linux/pci.h>
 #include "felix.h"
@@ -27,6 +30,8 @@
 #define USXGMII_LPA_DUPLEX(lpa)		(((lpa) & GENMASK(12, 12)) >> 12)
 #define USXGMII_LPA_SPEED(lpa)		(((lpa) & GENMASK(11, 9)) >> 9)
 
+#define VSC9959_TAS_GCL_ENTRY_MAX	63
+
 enum usxgmii_speed {
 	USXGMII_SPEED_10	= 0,
 	USXGMII_SPEED_100	= 1,
@@ -1209,6 +1214,138 @@ static void vsc9959_mdio_bus_free(struct ocelot *ocelot)
 	mdiobus_unregister(felix->imdio);
 }
 
+static void vsc9959_sched_speed_set(struct ocelot *ocelot, int port,
+				    u32 speed)
+{
+	ocelot_rmw_rix(ocelot,
+		       QSYS_TAG_CONFIG_LINK_SPEED(speed),
+		       QSYS_TAG_CONFIG_LINK_SPEED_M,
+		       QSYS_TAG_CONFIG, port);
+}
+
+static void vsc9959_new_base_time(struct ocelot *ocelot, ktime_t base_time,
+				  u64 cycle_time,
+				  struct timespec64 *new_base_ts)
+{
+	struct timespec64 ts;
+	ktime_t new_base_time;
+	ktime_t current_time;
+
+	ocelot_ptp_gettime64(&ocelot->ptp_info, &ts);
+	current_time = timespec64_to_ktime(ts);
+	new_base_time = base_time;
+
+	if (base_time < current_time) {
+		u64 nr_of_cycles = current_time - base_time;
+
+		do_div(nr_of_cycles, cycle_time);
+		new_base_time += cycle_time * (nr_of_cycles + 1);
+	}
+
+	*new_base_ts = ktime_to_timespec64(new_base_time);
+}
+
+static u32 vsc9959_tas_read_cfg_status(struct ocelot *ocelot)
+{
+	return ocelot_read(ocelot, QSYS_TAS_PARAM_CFG_CTRL);
+}
+
+static void vsc9959_tas_gcl_set(struct ocelot *ocelot, const u32 gcl_ix,
+				struct tc_taprio_sched_entry *entry)
+{
+	ocelot_write(ocelot,
+		     QSYS_GCL_CFG_REG_1_GCL_ENTRY_NUM(gcl_ix) |
+		     QSYS_GCL_CFG_REG_1_GATE_STATE(entry->gate_mask),
+		     QSYS_GCL_CFG_REG_1);
+	ocelot_write(ocelot, entry->interval, QSYS_GCL_CFG_REG_2);
+}
+
+static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port,
+				    struct tc_taprio_qopt_offload *taprio)
+{
+	struct timespec64 base_ts;
+	int ret, i;
+	u32 val;
+
+	if (!taprio->enable) {
+		ocelot_rmw_rix(ocelot,
+			       QSYS_TAG_CONFIG_INIT_GATE_STATE(0xFF),
+			       QSYS_TAG_CONFIG_ENABLE |
+			       QSYS_TAG_CONFIG_INIT_GATE_STATE_M,
+			       QSYS_TAG_CONFIG, port);
+
+		return 0;
+	}
+
+	if (taprio->cycle_time > NSEC_PER_SEC ||
+	    taprio->cycle_time_extension >= NSEC_PER_SEC)
+		return -EINVAL;
+
+	if (taprio->num_entries > VSC9959_TAS_GCL_ENTRY_MAX)
+		return -ERANGE;
+
+	ocelot_rmw(ocelot, QSYS_TAS_PARAM_CFG_CTRL_PORT_NUM(port) |
+		   QSYS_TAS_PARAM_CFG_CTRL_ALWAYS_GUARD_BAND_SCH_Q,
+		   QSYS_TAS_PARAM_CFG_CTRL_PORT_NUM_M |
+		   QSYS_TAS_PARAM_CFG_CTRL_ALWAYS_GUARD_BAND_SCH_Q,
+		   QSYS_TAS_PARAM_CFG_CTRL);
+
+	/* Hardware errata -  Admin config could not be overwritten if
+	 * config is pending, need reset the TAS module
+	 */
+	val = ocelot_read(ocelot, QSYS_PARAM_STATUS_REG_8);
+	if (val & QSYS_PARAM_STATUS_REG_8_CONFIG_PENDING)
+		return  -EBUSY;
+
+	ocelot_rmw_rix(ocelot,
+		       QSYS_TAG_CONFIG_ENABLE |
+		       QSYS_TAG_CONFIG_INIT_GATE_STATE(0xFF) |
+		       QSYS_TAG_CONFIG_SCH_TRAFFIC_QUEUES(0xFF),
+		       QSYS_TAG_CONFIG_ENABLE |
+		       QSYS_TAG_CONFIG_INIT_GATE_STATE_M |
+		       QSYS_TAG_CONFIG_SCH_TRAFFIC_QUEUES_M,
+		       QSYS_TAG_CONFIG, port);
+
+	vsc9959_new_base_time(ocelot, taprio->base_time,
+			      taprio->cycle_time, &base_ts);
+	ocelot_write(ocelot, base_ts.tv_nsec, QSYS_PARAM_CFG_REG_1);
+	ocelot_write(ocelot, lower_32_bits(base_ts.tv_sec), QSYS_PARAM_CFG_REG_2);
+	val = upper_32_bits(base_ts.tv_sec);
+	ocelot_write(ocelot,
+		     QSYS_PARAM_CFG_REG_3_BASE_TIME_SEC_MSB(val) |
+		     QSYS_PARAM_CFG_REG_3_LIST_LENGTH(taprio->num_entries),
+		     QSYS_PARAM_CFG_REG_3);
+	ocelot_write(ocelot, taprio->cycle_time, QSYS_PARAM_CFG_REG_4);
+	ocelot_write(ocelot, taprio->cycle_time_extension, QSYS_PARAM_CFG_REG_5);
+
+	for (i = 0; i < taprio->num_entries; i++)
+		vsc9959_tas_gcl_set(ocelot, i, &taprio->entries[i]);
+
+	ocelot_rmw(ocelot, QSYS_TAS_PARAM_CFG_CTRL_CONFIG_CHANGE,
+		   QSYS_TAS_PARAM_CFG_CTRL_CONFIG_CHANGE,
+		   QSYS_TAS_PARAM_CFG_CTRL);
+
+	ret = readx_poll_timeout(vsc9959_tas_read_cfg_status, ocelot, val,
+				 !(val & QSYS_TAS_PARAM_CFG_CTRL_CONFIG_CHANGE),
+				 10, 100000);
+
+	return ret;
+}
+
+static int vsc9959_port_setup_tc(struct dsa_switch *ds, int port,
+				 enum tc_setup_type type,
+				 void *type_data)
+{
+	struct ocelot *ocelot = ds->priv;
+
+	switch (type) {
+	case TC_SETUP_QDISC_TAPRIO:
+		return vsc9959_qos_port_tas_set(ocelot, port, type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
 struct felix_info felix_info_vsc9959 = {
 	.target_io_res		= vsc9959_target_io_res,
 	.port_io_res		= vsc9959_port_io_res,
@@ -1224,6 +1361,7 @@ struct felix_info felix_info_vsc9959 = {
 	.shared_queue_sz	= 128 * 1024,
 	.num_mact_rows		= 2048,
 	.num_ports		= 6,
+	.num_tx_queues		= FELIX_NUM_TC,
 	.switch_pci_bar		= 4,
 	.imdio_pci_bar		= 0,
 	.mdio_bus_alloc		= vsc9959_mdio_bus_alloc,
@@ -1232,4 +1370,6 @@ struct felix_info felix_info_vsc9959 = {
 	.pcs_an_restart		= vsc9959_pcs_an_restart,
 	.pcs_link_state		= vsc9959_pcs_link_state,
 	.prevalidate_phy_mode	= vsc9959_prevalidate_phy_mode,
+	.port_setup_tc          = vsc9959_port_setup_tc,
+	.port_sched_speed_set   = vsc9959_sched_speed_set,
 };
-- 
cgit v1.2.3-59-g8ed1b


From 0fbabf875d184eee21b94d8b8a9c83d5df5cb3d4 Mon Sep 17 00:00:00 2001
From: Xiaoliang Yang <xiaoliang.yang_1@nxp.com>
Date: Wed, 13 May 2020 10:25:10 +0800
Subject: net: dsa: felix: add support Credit Based Shaper(CBS) for hardware
 offload

VSC9959 hardware support the Credit Based Shaper(CBS) which part
of the IEEE-802.1Qav. This patch support sch_cbs set for VSC9959.

Signed-off-by: Xiaoliang Yang <xiaoliang.yang_1@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/ocelot/felix_vsc9959.c | 50 +++++++++++++++++++++++++++++++++-
 1 file changed, 49 insertions(+), 1 deletion(-)

diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c
index efdcc547e0c9..df4498c0e864 100644
--- a/drivers/net/dsa/ocelot/felix_vsc9959.c
+++ b/drivers/net/dsa/ocelot/felix_vsc9959.c
@@ -207,7 +207,7 @@ static const u32 vsc9959_qsys_regmap[] = {
 	REG(QSYS_QMAXSDU_CFG_6,			0x00f62c),
 	REG(QSYS_QMAXSDU_CFG_7,			0x00f648),
 	REG(QSYS_PREEMPTION_CFG,		0x00f664),
-	REG_RESERVED(QSYS_CIR_CFG),
+	REG(QSYS_CIR_CFG,			0x000000),
 	REG(QSYS_EIR_CFG,			0x000004),
 	REG(QSYS_SE_CFG,			0x000008),
 	REG(QSYS_SE_DWRR_CFG,			0x00000c),
@@ -1332,6 +1332,52 @@ static int vsc9959_qos_port_tas_set(struct ocelot *ocelot, int port,
 	return ret;
 }
 
+static int vsc9959_qos_port_cbs_set(struct dsa_switch *ds, int port,
+				    struct tc_cbs_qopt_offload *cbs_qopt)
+{
+	struct ocelot *ocelot = ds->priv;
+	int port_ix = port * 8 + cbs_qopt->queue;
+	u32 rate, burst;
+
+	if (cbs_qopt->queue >= ds->num_tx_queues)
+		return -EINVAL;
+
+	if (!cbs_qopt->enable) {
+		ocelot_write_gix(ocelot, QSYS_CIR_CFG_CIR_RATE(0) |
+				 QSYS_CIR_CFG_CIR_BURST(0),
+				 QSYS_CIR_CFG, port_ix);
+
+		ocelot_rmw_gix(ocelot, 0, QSYS_SE_CFG_SE_AVB_ENA,
+			       QSYS_SE_CFG, port_ix);
+
+		return 0;
+	}
+
+	/* Rate unit is 100 kbps */
+	rate = DIV_ROUND_UP(cbs_qopt->idleslope, 100);
+	/* Avoid using zero rate */
+	rate = clamp_t(u32, rate, 1, GENMASK(14, 0));
+	/* Burst unit is 4kB */
+	burst = DIV_ROUND_UP(cbs_qopt->hicredit, 4096);
+	/* Avoid using zero burst size */
+	burst = clamp_t(u32, rate, 1, GENMASK(5, 0));
+	ocelot_write_gix(ocelot,
+			 QSYS_CIR_CFG_CIR_RATE(rate) |
+			 QSYS_CIR_CFG_CIR_BURST(burst),
+			 QSYS_CIR_CFG,
+			 port_ix);
+
+	ocelot_rmw_gix(ocelot,
+		       QSYS_SE_CFG_SE_FRM_MODE(0) |
+		       QSYS_SE_CFG_SE_AVB_ENA,
+		       QSYS_SE_CFG_SE_AVB_ENA |
+		       QSYS_SE_CFG_SE_FRM_MODE_M,
+		       QSYS_SE_CFG,
+		       port_ix);
+
+	return 0;
+}
+
 static int vsc9959_port_setup_tc(struct dsa_switch *ds, int port,
 				 enum tc_setup_type type,
 				 void *type_data)
@@ -1341,6 +1387,8 @@ static int vsc9959_port_setup_tc(struct dsa_switch *ds, int port,
 	switch (type) {
 	case TC_SETUP_QDISC_TAPRIO:
 		return vsc9959_qos_port_tas_set(ocelot, port, type_data);
+	case TC_SETUP_QDISC_CBS:
+		return vsc9959_qos_port_cbs_set(ds, port, type_data);
 	default:
 		return -EOPNOTSUPP;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From cd49291ce18aeef3f2ec950bc99bd72d5a05fa86 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 12 May 2020 12:24:42 -0700
Subject: selftests/bpf: Extract parse_num_list into generic testing_helpers.c

Add testing_helpers.c, which will contain generic helpers for test runners and
tests needing some common generic functionality, like parsing a set of
numbers.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200512192445.2351848-2-andriin@fb.com
---
 tools/testing/selftests/bpf/Makefile          |  3 +-
 tools/testing/selftests/bpf/test_progs.c      | 67 ++-------------------------
 tools/testing/selftests/bpf/test_progs.h      |  1 +
 tools/testing/selftests/bpf/testing_helpers.c | 66 ++++++++++++++++++++++++++
 tools/testing/selftests/bpf/testing_helpers.h |  5 ++
 5 files changed, 78 insertions(+), 64 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/testing_helpers.c
 create mode 100644 tools/testing/selftests/bpf/testing_helpers.h

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 1f878dcd2bf6..975b97b85bca 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -355,7 +355,8 @@ endef
 TRUNNER_TESTS_DIR := prog_tests
 TRUNNER_BPF_PROGS_DIR := progs
 TRUNNER_EXTRA_SOURCES := test_progs.c cgroup_helpers.c trace_helpers.c	\
-			 network_helpers.c flow_dissector_load.h
+			 network_helpers.c testing_helpers.c		\
+			 flow_dissector_load.h
 TRUNNER_EXTRA_FILES := $(OUTPUT)/urandom_read				\
 		       $(wildcard progs/btf_dump_test_case_*.c)
 TRUNNER_BPF_BUILD_RULE := CLANG_BPF_BUILD_RULE
diff --git a/tools/testing/selftests/bpf/test_progs.c b/tools/testing/selftests/bpf/test_progs.c
index 0f411fdc4f6d..54fa5fa688ce 100644
--- a/tools/testing/selftests/bpf/test_progs.c
+++ b/tools/testing/selftests/bpf/test_progs.c
@@ -438,67 +438,6 @@ err:
 	return -ENOMEM;
 }
 
-int parse_num_list(const char *s, struct test_selector *sel)
-{
-	int i, set_len = 0, new_len, num, start = 0, end = -1;
-	bool *set = NULL, *tmp, parsing_end = false;
-	char *next;
-
-	while (s[0]) {
-		errno = 0;
-		num = strtol(s, &next, 10);
-		if (errno)
-			return -errno;
-
-		if (parsing_end)
-			end = num;
-		else
-			start = num;
-
-		if (!parsing_end && *next == '-') {
-			s = next + 1;
-			parsing_end = true;
-			continue;
-		} else if (*next == ',') {
-			parsing_end = false;
-			s = next + 1;
-			end = num;
-		} else if (*next == '\0') {
-			parsing_end = false;
-			s = next;
-			end = num;
-		} else {
-			return -EINVAL;
-		}
-
-		if (start > end)
-			return -EINVAL;
-
-		if (end + 1 > set_len) {
-			new_len = end + 1;
-			tmp = realloc(set, new_len);
-			if (!tmp) {
-				free(set);
-				return -ENOMEM;
-			}
-			for (i = set_len; i < start; i++)
-				tmp[i] = false;
-			set = tmp;
-			set_len = new_len;
-		}
-		for (i = start; i <= end; i++)
-			set[i] = true;
-	}
-
-	if (!set)
-		return -EINVAL;
-
-	sel->num_set = set;
-	sel->num_set_len = set_len;
-
-	return 0;
-}
-
 extern int extra_prog_load_log_flags;
 
 static error_t parse_arg(int key, char *arg, struct argp_state *state)
@@ -512,13 +451,15 @@ static error_t parse_arg(int key, char *arg, struct argp_state *state)
 		if (subtest_str) {
 			*subtest_str = '\0';
 			if (parse_num_list(subtest_str + 1,
-					   &env->subtest_selector)) {
+					   &env->subtest_selector.num_set,
+					   &env->subtest_selector.num_set_len)) {
 				fprintf(stderr,
 					"Failed to parse subtest numbers.\n");
 				return -EINVAL;
 			}
 		}
-		if (parse_num_list(arg, &env->test_selector)) {
+		if (parse_num_list(arg, &env->test_selector.num_set,
+				   &env->test_selector.num_set_len)) {
 			fprintf(stderr, "Failed to parse test numbers.\n");
 			return -EINVAL;
 		}
diff --git a/tools/testing/selftests/bpf/test_progs.h b/tools/testing/selftests/bpf/test_progs.h
index 83287c76332b..f4503c926aca 100644
--- a/tools/testing/selftests/bpf/test_progs.h
+++ b/tools/testing/selftests/bpf/test_progs.h
@@ -37,6 +37,7 @@ typedef __u16 __sum16;
 #include "bpf_util.h"
 #include <bpf/bpf_endian.h>
 #include "trace_helpers.h"
+#include "testing_helpers.h"
 #include "flow_dissector_load.h"
 
 enum verbosity {
diff --git a/tools/testing/selftests/bpf/testing_helpers.c b/tools/testing/selftests/bpf/testing_helpers.c
new file mode 100644
index 000000000000..0af6337a8962
--- /dev/null
+++ b/tools/testing/selftests/bpf/testing_helpers.c
@@ -0,0 +1,66 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/* Copyright (C) 2020 Facebook, Inc. */
+#include <stdlib.h>
+#include <errno.h>
+#include "testing_helpers.h"
+
+int parse_num_list(const char *s, bool **num_set, int *num_set_len)
+{
+	int i, set_len = 0, new_len, num, start = 0, end = -1;
+	bool *set = NULL, *tmp, parsing_end = false;
+	char *next;
+
+	while (s[0]) {
+		errno = 0;
+		num = strtol(s, &next, 10);
+		if (errno)
+			return -errno;
+
+		if (parsing_end)
+			end = num;
+		else
+			start = num;
+
+		if (!parsing_end && *next == '-') {
+			s = next + 1;
+			parsing_end = true;
+			continue;
+		} else if (*next == ',') {
+			parsing_end = false;
+			s = next + 1;
+			end = num;
+		} else if (*next == '\0') {
+			parsing_end = false;
+			s = next;
+			end = num;
+		} else {
+			return -EINVAL;
+		}
+
+		if (start > end)
+			return -EINVAL;
+
+		if (end + 1 > set_len) {
+			new_len = end + 1;
+			tmp = realloc(set, new_len);
+			if (!tmp) {
+				free(set);
+				return -ENOMEM;
+			}
+			for (i = set_len; i < start; i++)
+				tmp[i] = false;
+			set = tmp;
+			set_len = new_len;
+		}
+		for (i = start; i <= end; i++)
+			set[i] = true;
+	}
+
+	if (!set)
+		return -EINVAL;
+
+	*num_set = set;
+	*num_set_len = set_len;
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/testing_helpers.h b/tools/testing/selftests/bpf/testing_helpers.h
new file mode 100644
index 000000000000..923b51762759
--- /dev/null
+++ b/tools/testing/selftests/bpf/testing_helpers.h
@@ -0,0 +1,5 @@
+/* SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) */
+/* Copyright (C) 2020 Facebook, Inc. */
+#include <stdbool.h>
+
+int parse_num_list(const char *s, bool **set, int *set_len);
-- 
cgit v1.2.3-59-g8ed1b


From 8e7c2a023ac04e04c72cd7b640329511dda92672 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 12 May 2020 12:24:43 -0700
Subject: selftests/bpf: Add benchmark runner infrastructure
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

While working on BPF ringbuf implementation, testing, and benchmarking, I've
developed a pretty generic and modular benchmark runner, which seems to be
generically useful, as I've already used it for one more purpose (testing
fastest way to trigger BPF program, to minimize overhead of in-kernel code).

This patch adds generic part of benchmark runner and sets up Makefile for
extending it with more sets of benchmarks.

Benchmarker itself operates by spinning up specified number of producer and
consumer threads, setting up interval timer sending SIGALARM signal to
application once a second. Every second, current snapshot with hits/drops
counters are collected and stored in an array. Drops are useful for
producer/consumer benchmarks in which producer might overwhelm consumers.

Once test finishes after given amount of warm-up and testing seconds, mean and
stddev are calculated (ignoring warm-up results) and is printed out to stdout.
This setup seems to give consistent and accurate results.

To validate behavior, I added two atomic counting tests: global and local.
For global one, all the producer threads are atomically incrementing same
counter as fast as possible. This, of course, leads to huge drop of
performance once there is more than one producer thread due to CPUs fighting
for the same memory location.

Local counting, on the other hand, maintains one counter per each producer
thread, incremented independently. Once per second, all counters are read and
added together to form final "counting throughput" measurement. As expected,
such setup demonstrates linear scalability with number of producers (as long
as there are enough physical CPU cores, of course). See example output below.
Also, this setup can nicely demonstrate disastrous effects of false sharing,
if care is not taken to take those per-producer counters apart into
independent cache lines.

Demo output shows global counter first with 1 producer, then with 4. Both
total and per-producer performance significantly drop. The last run is local
counter with 4 producers, demonstrating near-perfect scalability.

$ ./bench -a -w1 -d2 -p1 count-global
Setting up benchmark 'count-global'...
Benchmark 'count-global' started.
Iter   0 ( 24.822us): hits  148.179M/s (148.179M/prod), drops    0.000M/s
Iter   1 ( 37.939us): hits  149.308M/s (149.308M/prod), drops    0.000M/s
Iter   2 (-10.774us): hits  150.717M/s (150.717M/prod), drops    0.000M/s
Iter   3 (  3.807us): hits  151.435M/s (151.435M/prod), drops    0.000M/s
Summary: hits  150.488 ± 1.079M/s (150.488M/prod), drops    0.000 ± 0.000M/s

$ ./bench -a -w1 -d2 -p4 count-global
Setting up benchmark 'count-global'...
Benchmark 'count-global' started.
Iter   0 ( 60.659us): hits   53.910M/s ( 13.477M/prod), drops    0.000M/s
Iter   1 (-17.658us): hits   53.722M/s ( 13.431M/prod), drops    0.000M/s
Iter   2 (  5.865us): hits   53.495M/s ( 13.374M/prod), drops    0.000M/s
Iter   3 (  0.104us): hits   53.606M/s ( 13.402M/prod), drops    0.000M/s
Summary: hits   53.608 ± 0.113M/s ( 13.402M/prod), drops    0.000 ± 0.000M/s

$ ./bench -a -w1 -d2 -p4 count-local
Setting up benchmark 'count-local'...
Benchmark 'count-local' started.
Iter   0 ( 23.388us): hits  640.450M/s (160.113M/prod), drops    0.000M/s
Iter   1 (  2.291us): hits  605.661M/s (151.415M/prod), drops    0.000M/s
Iter   2 ( -6.415us): hits  607.092M/s (151.773M/prod), drops    0.000M/s
Iter   3 ( -1.361us): hits  601.796M/s (150.449M/prod), drops    0.000M/s
Summary: hits  604.849 ± 2.739M/s (151.212M/prod), drops    0.000 ± 0.000M/s

Benchmark runner supports setting thread affinity for producer and consumer
threads. You can use -a flag for default CPU selection scheme, where first
consumer gets CPU #0, next one gets CPU #1, and so on. Then producer threads
pick up next CPU and increment one-by-one as well. But user can also specify
a set of CPUs independently for producers and consumers with --prod-affinity
1,2-10,15 and --cons-affinity <set-of-cpus>. The latter allows to force
producers and consumers to share same set of CPUs, if necessary.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200512192445.2351848-3-andriin@fb.com
---
 tools/testing/selftests/bpf/.gitignore           |   1 +
 tools/testing/selftests/bpf/Makefile             |  13 +-
 tools/testing/selftests/bpf/bench.c              | 423 +++++++++++++++++++++++
 tools/testing/selftests/bpf/bench.h              |  81 +++++
 tools/testing/selftests/bpf/benchs/bench_count.c |  91 +++++
 5 files changed, 608 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/bench.c
 create mode 100644 tools/testing/selftests/bpf/bench.h
 create mode 100644 tools/testing/selftests/bpf/benchs/bench_count.c

diff --git a/tools/testing/selftests/bpf/.gitignore b/tools/testing/selftests/bpf/.gitignore
index 3ff031972975..1bb204cee853 100644
--- a/tools/testing/selftests/bpf/.gitignore
+++ b/tools/testing/selftests/bpf/.gitignore
@@ -38,3 +38,4 @@ test_cpp
 /bpf_gcc
 /tools
 /runqslower
+/bench
diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 975b97b85bca..f414b2442181 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -77,7 +77,7 @@ TEST_PROGS_EXTENDED := with_addr.sh \
 # Compile but not part of 'make run_tests'
 TEST_GEN_PROGS_EXTENDED = test_sock_addr test_skb_cgroup_id_user \
 	flow_dissector_load test_flow_dissector test_tcp_check_syncookie_user \
-	test_lirc_mode2_user xdping test_cpp runqslower
+	test_lirc_mode2_user xdping test_cpp runqslower bench
 
 TEST_CUSTOM_PROGS = urandom_read
 
@@ -407,6 +407,17 @@ $(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ)
 	$(call msg,CXX,,$@)
 	$(CXX) $(CFLAGS) $^ $(LDLIBS) -o $@
 
+# Benchmark runner
+$(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h
+	$(call msg,CC,,$@)
+	$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@
+$(OUTPUT)/bench.o: bench.h testing_helpers.h
+$(OUTPUT)/bench: LDLIBS += -lm
+$(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \
+		 $(OUTPUT)/bench_count.o
+	$(call msg,BINARY,,$@)
+	$(CC) $(LDFLAGS) -o $@ $(filter %.a %.o,$^) $(LDLIBS)
+
 EXTRA_CLEAN := $(TEST_CUSTOM_PROGS) $(SCRATCH_DIR)			\
 	prog_tests/tests.h map_tests/tests.h verifier/tests.h		\
 	feature								\
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
new file mode 100644
index 000000000000..3972da8b19e8
--- /dev/null
+++ b/tools/testing/selftests/bpf/bench.c
@@ -0,0 +1,423 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#define _GNU_SOURCE
+#include <argp.h>
+#include <linux/compiler.h>
+#include <sys/time.h>
+#include <sched.h>
+#include <fcntl.h>
+#include <pthread.h>
+#include <sys/sysinfo.h>
+#include <sys/resource.h>
+#include <signal.h>
+#include "bench.h"
+#include "testing_helpers.h"
+
+struct env env = {
+	.warmup_sec = 1,
+	.duration_sec = 5,
+	.affinity = false,
+	.consumer_cnt = 1,
+	.producer_cnt = 1,
+};
+
+static int libbpf_print_fn(enum libbpf_print_level level,
+		    const char *format, va_list args)
+{
+	if (level == LIBBPF_DEBUG && !env.verbose)
+		return 0;
+	return vfprintf(stderr, format, args);
+}
+
+static int bump_memlock_rlimit(void)
+{
+	struct rlimit rlim_new = {
+		.rlim_cur	= RLIM_INFINITY,
+		.rlim_max	= RLIM_INFINITY,
+	};
+
+	return setrlimit(RLIMIT_MEMLOCK, &rlim_new);
+}
+
+void setup_libbpf()
+{
+	int err;
+
+	libbpf_set_print(libbpf_print_fn);
+
+	err = bump_memlock_rlimit();
+	if (err)
+		fprintf(stderr, "failed to increase RLIMIT_MEMLOCK: %d", err);
+}
+
+void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns)
+{
+	double hits_per_sec, drops_per_sec;
+	double hits_per_prod;
+
+	hits_per_sec = res->hits / 1000000.0 / (delta_ns / 1000000000.0);
+	hits_per_prod = hits_per_sec / env.producer_cnt;
+	drops_per_sec = res->drops / 1000000.0 / (delta_ns / 1000000000.0);
+
+	printf("Iter %3d (%7.3lfus): ",
+	       iter, (delta_ns - 1000000000) / 1000.0);
+
+	printf("hits %8.3lfM/s (%7.3lfM/prod), drops %8.3lfM/s\n",
+	       hits_per_sec, hits_per_prod, drops_per_sec);
+}
+
+void hits_drops_report_final(struct bench_res res[], int res_cnt)
+{
+	int i;
+	double hits_mean = 0.0, drops_mean = 0.0;
+	double hits_stddev = 0.0, drops_stddev = 0.0;
+
+	for (i = 0; i < res_cnt; i++) {
+		hits_mean += res[i].hits / 1000000.0 / (0.0 + res_cnt);
+		drops_mean += res[i].drops / 1000000.0 / (0.0 + res_cnt);
+	}
+
+	if (res_cnt > 1)  {
+		for (i = 0; i < res_cnt; i++) {
+			hits_stddev += (hits_mean - res[i].hits / 1000000.0) *
+				       (hits_mean - res[i].hits / 1000000.0) /
+				       (res_cnt - 1.0);
+			drops_stddev += (drops_mean - res[i].drops / 1000000.0) *
+					(drops_mean - res[i].drops / 1000000.0) /
+					(res_cnt - 1.0);
+		}
+		hits_stddev = sqrt(hits_stddev);
+		drops_stddev = sqrt(drops_stddev);
+	}
+	printf("Summary: hits %8.3lf \u00B1 %5.3lfM/s (%7.3lfM/prod), ",
+	       hits_mean, hits_stddev, hits_mean / env.producer_cnt);
+	printf("drops %8.3lf \u00B1 %5.3lfM/s\n",
+	       drops_mean, drops_stddev);
+}
+
+const char *argp_program_version = "benchmark";
+const char *argp_program_bug_address = "<bpf@vger.kernel.org>";
+const char argp_program_doc[] =
+"benchmark    Generic benchmarking framework.\n"
+"\n"
+"This tool runs benchmarks.\n"
+"\n"
+"USAGE: benchmark <bench-name>\n"
+"\n"
+"EXAMPLES:\n"
+"    # run 'count-local' benchmark with 1 producer and 1 consumer\n"
+"    benchmark count-local\n"
+"    # run 'count-local' with 16 producer and 8 consumer thread, pinned to CPUs\n"
+"    benchmark -p16 -c8 -a count-local\n";
+
+enum {
+	ARG_PROD_AFFINITY_SET = 1000,
+	ARG_CONS_AFFINITY_SET = 1001,
+};
+
+static const struct argp_option opts[] = {
+	{ "list", 'l', NULL, 0, "List available benchmarks"},
+	{ "duration", 'd', "SEC", 0, "Duration of benchmark, seconds"},
+	{ "warmup", 'w', "SEC", 0, "Warm-up period, seconds"},
+	{ "producers", 'p', "NUM", 0, "Number of producer threads"},
+	{ "consumers", 'c', "NUM", 0, "Number of consumer threads"},
+	{ "verbose", 'v', NULL, 0, "Verbose debug output"},
+	{ "affinity", 'a', NULL, 0, "Set consumer/producer thread affinity"},
+	{ "prod-affinity", ARG_PROD_AFFINITY_SET, "CPUSET", 0,
+	  "Set of CPUs for producer threads; implies --affinity"},
+	{ "cons-affinity", ARG_CONS_AFFINITY_SET, "CPUSET", 0,
+	  "Set of CPUs for consumer threads; implies --affinity"},
+	{},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+	static int pos_args;
+
+	switch (key) {
+	case 'v':
+		env.verbose = true;
+		break;
+	case 'l':
+		env.list = true;
+		break;
+	case 'd':
+		env.duration_sec = strtol(arg, NULL, 10);
+		if (env.duration_sec <= 0) {
+			fprintf(stderr, "Invalid duration: %s\n", arg);
+			argp_usage(state);
+		}
+		break;
+	case 'w':
+		env.warmup_sec = strtol(arg, NULL, 10);
+		if (env.warmup_sec <= 0) {
+			fprintf(stderr, "Invalid warm-up duration: %s\n", arg);
+			argp_usage(state);
+		}
+		break;
+	case 'p':
+		env.producer_cnt = strtol(arg, NULL, 10);
+		if (env.producer_cnt <= 0) {
+			fprintf(stderr, "Invalid producer count: %s\n", arg);
+			argp_usage(state);
+		}
+		break;
+	case 'c':
+		env.consumer_cnt = strtol(arg, NULL, 10);
+		if (env.consumer_cnt <= 0) {
+			fprintf(stderr, "Invalid consumer count: %s\n", arg);
+			argp_usage(state);
+		}
+		break;
+	case 'a':
+		env.affinity = true;
+		break;
+	case ARG_PROD_AFFINITY_SET:
+		env.affinity = true;
+		if (parse_num_list(arg, &env.prod_cpus.cpus,
+				   &env.prod_cpus.cpus_len)) {
+			fprintf(stderr, "Invalid format of CPU set for producers.");
+			argp_usage(state);
+		}
+		break;
+	case ARG_CONS_AFFINITY_SET:
+		env.affinity = true;
+		if (parse_num_list(arg, &env.cons_cpus.cpus,
+				   &env.cons_cpus.cpus_len)) {
+			fprintf(stderr, "Invalid format of CPU set for consumers.");
+			argp_usage(state);
+		}
+		break;
+	case ARGP_KEY_ARG:
+		if (pos_args++) {
+			fprintf(stderr,
+				"Unrecognized positional argument: %s\n", arg);
+			argp_usage(state);
+		}
+		env.bench_name = strdup(arg);
+		break;
+	default:
+		return ARGP_ERR_UNKNOWN;
+	}
+	return 0;
+}
+
+static void parse_cmdline_args(int argc, char **argv)
+{
+	static const struct argp argp = {
+		.options = opts,
+		.parser = parse_arg,
+		.doc = argp_program_doc,
+	};
+	if (argp_parse(&argp, argc, argv, 0, NULL, NULL))
+		exit(1);
+	if (!env.list && !env.bench_name) {
+		argp_help(&argp, stderr, ARGP_HELP_DOC, "bench");
+		exit(1);
+	}
+}
+
+static void collect_measurements(long delta_ns);
+
+static __u64 last_time_ns;
+static void sigalarm_handler(int signo)
+{
+	long new_time_ns = get_time_ns();
+	long delta_ns = new_time_ns - last_time_ns;
+
+	collect_measurements(delta_ns);
+
+	last_time_ns = new_time_ns;
+}
+
+/* set up periodic 1-second timer */
+static void setup_timer()
+{
+	static struct sigaction sigalarm_action = {
+		.sa_handler = sigalarm_handler,
+	};
+	struct itimerval timer_settings = {};
+	int err;
+
+	last_time_ns = get_time_ns();
+	err = sigaction(SIGALRM, &sigalarm_action, NULL);
+	if (err < 0) {
+		fprintf(stderr, "failed to install SIGALARM handler: %d\n", -errno);
+		exit(1);
+	}
+	timer_settings.it_interval.tv_sec = 1;
+	timer_settings.it_value.tv_sec = 1;
+	err = setitimer(ITIMER_REAL, &timer_settings, NULL);
+	if (err < 0) {
+		fprintf(stderr, "failed to arm interval timer: %d\n", -errno);
+		exit(1);
+	}
+}
+
+static void set_thread_affinity(pthread_t thread, int cpu)
+{
+	cpu_set_t cpuset;
+
+	CPU_ZERO(&cpuset);
+	CPU_SET(cpu, &cpuset);
+	if (pthread_setaffinity_np(thread, sizeof(cpuset), &cpuset)) {
+		fprintf(stderr, "setting affinity to CPU #%d failed: %d\n",
+			cpu, errno);
+		exit(1);
+	}
+}
+
+static int next_cpu(struct cpu_set *cpu_set)
+{
+	if (cpu_set->cpus) {
+		int i;
+
+		/* find next available CPU */
+		for (i = cpu_set->next_cpu; i < cpu_set->cpus_len; i++) {
+			if (cpu_set->cpus[i]) {
+				cpu_set->next_cpu = i + 1;
+				return i;
+			}
+		}
+		fprintf(stderr, "Not enough CPUs specified, need CPU #%d or higher.\n", i);
+		exit(1);
+	}
+
+	return cpu_set->next_cpu++;
+}
+
+static struct bench_state {
+	int res_cnt;
+	struct bench_res *results;
+	pthread_t *consumers;
+	pthread_t *producers;
+} state;
+
+const struct bench *bench = NULL;
+
+extern const struct bench bench_count_global;
+extern const struct bench bench_count_local;
+
+static const struct bench *benchs[] = {
+	&bench_count_global,
+	&bench_count_local,
+};
+
+static void setup_benchmark()
+{
+	int i, err;
+
+	if (!env.bench_name) {
+		fprintf(stderr, "benchmark name is not specified\n");
+		exit(1);
+	}
+
+	for (i = 0; i < ARRAY_SIZE(benchs); i++) {
+		if (strcmp(benchs[i]->name, env.bench_name) == 0) {
+			bench = benchs[i];
+			break;
+		}
+	}
+	if (!bench) {
+		fprintf(stderr, "benchmark '%s' not found\n", env.bench_name);
+		exit(1);
+	}
+
+	printf("Setting up benchmark '%s'...\n", bench->name);
+
+	state.producers = calloc(env.producer_cnt, sizeof(*state.producers));
+	state.consumers = calloc(env.consumer_cnt, sizeof(*state.consumers));
+	state.results = calloc(env.duration_sec + env.warmup_sec + 2,
+			       sizeof(*state.results));
+	if (!state.producers || !state.consumers || !state.results)
+		exit(1);
+
+	if (bench->validate)
+		bench->validate();
+	if (bench->setup)
+		bench->setup();
+
+	for (i = 0; i < env.consumer_cnt; i++) {
+		err = pthread_create(&state.consumers[i], NULL,
+				     bench->consumer_thread, (void *)(long)i);
+		if (err) {
+			fprintf(stderr, "failed to create consumer thread #%d: %d\n",
+				i, -errno);
+			exit(1);
+		}
+		if (env.affinity)
+			set_thread_affinity(state.consumers[i],
+					    next_cpu(&env.cons_cpus));
+	}
+
+	/* unless explicit producer CPU list is specified, continue after
+	 * last consumer CPU
+	 */
+	if (!env.prod_cpus.cpus)
+		env.prod_cpus.next_cpu = env.cons_cpus.next_cpu;
+
+	for (i = 0; i < env.producer_cnt; i++) {
+		err = pthread_create(&state.producers[i], NULL,
+				     bench->producer_thread, (void *)(long)i);
+		if (err) {
+			fprintf(stderr, "failed to create producer thread #%d: %d\n",
+				i, -errno);
+			exit(1);
+		}
+		if (env.affinity)
+			set_thread_affinity(state.producers[i],
+					    next_cpu(&env.prod_cpus));
+	}
+
+	printf("Benchmark '%s' started.\n", bench->name);
+}
+
+static pthread_mutex_t bench_done_mtx = PTHREAD_MUTEX_INITIALIZER;
+static pthread_cond_t bench_done = PTHREAD_COND_INITIALIZER;
+
+static void collect_measurements(long delta_ns) {
+	int iter = state.res_cnt++;
+	struct bench_res *res = &state.results[iter];
+
+	bench->measure(res);
+
+	if (bench->report_progress)
+		bench->report_progress(iter, res, delta_ns);
+
+	if (iter == env.duration_sec + env.warmup_sec) {
+		pthread_mutex_lock(&bench_done_mtx);
+		pthread_cond_signal(&bench_done);
+		pthread_mutex_unlock(&bench_done_mtx);
+	}
+}
+
+int main(int argc, char **argv)
+{
+	parse_cmdline_args(argc, argv);
+
+	if (env.list) {
+		int i;
+
+		printf("Available benchmarks:\n");
+		for (i = 0; i < ARRAY_SIZE(benchs); i++) {
+			printf("- %s\n", benchs[i]->name);
+		}
+		return 0;
+	}
+
+	setup_benchmark();
+
+	setup_timer();
+
+	pthread_mutex_lock(&bench_done_mtx);
+	pthread_cond_wait(&bench_done, &bench_done_mtx);
+	pthread_mutex_unlock(&bench_done_mtx);
+
+	if (bench->report_final)
+		/* skip first sample */
+		bench->report_final(state.results + env.warmup_sec,
+				    state.res_cnt - env.warmup_sec);
+
+	return 0;
+}
+
diff --git a/tools/testing/selftests/bpf/bench.h b/tools/testing/selftests/bpf/bench.h
new file mode 100644
index 000000000000..c1f48a473b02
--- /dev/null
+++ b/tools/testing/selftests/bpf/bench.h
@@ -0,0 +1,81 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#pragma once
+#include <stdlib.h>
+#include <stdbool.h>
+#include <linux/err.h>
+#include <errno.h>
+#include <unistd.h>
+#include <bpf/bpf.h>
+#include <bpf/libbpf.h>
+#include <math.h>
+#include <time.h>
+#include <sys/syscall.h>
+
+struct cpu_set {
+	bool *cpus;
+	int cpus_len;
+	int next_cpu;
+};
+
+struct env {
+	char *bench_name;
+	int duration_sec;
+	int warmup_sec;
+	bool verbose;
+	bool list;
+	bool affinity;
+	int consumer_cnt;
+	int producer_cnt;
+	struct cpu_set prod_cpus;
+	struct cpu_set cons_cpus;
+};
+
+struct bench_res {
+	long hits;
+	long drops;
+};
+
+struct bench {
+	const char *name;
+	void (*validate)();
+	void (*setup)();
+	void *(*producer_thread)(void *ctx);
+	void *(*consumer_thread)(void *ctx);
+	void (*measure)(struct bench_res* res);
+	void (*report_progress)(int iter, struct bench_res* res, long delta_ns);
+	void (*report_final)(struct bench_res res[], int res_cnt);
+};
+
+struct counter {
+	long value;
+} __attribute__((aligned(128)));
+
+extern struct env env;
+extern const struct bench *bench;
+
+void setup_libbpf();
+void hits_drops_report_progress(int iter, struct bench_res *res, long delta_ns);
+void hits_drops_report_final(struct bench_res res[], int res_cnt);
+
+static inline __u64 get_time_ns() {
+	struct timespec t;
+
+	clock_gettime(CLOCK_MONOTONIC, &t);
+
+	return (u64)t.tv_sec * 1000000000 + t.tv_nsec;
+}
+
+static inline void atomic_inc(long *value)
+{
+	(void)__atomic_add_fetch(value, 1, __ATOMIC_RELAXED);
+}
+
+static inline void atomic_add(long *value, long n)
+{
+	(void)__atomic_add_fetch(value, n, __ATOMIC_RELAXED);
+}
+
+static inline long atomic_swap(long *value, long n)
+{
+	return __atomic_exchange_n(value, n, __ATOMIC_RELAXED);
+}
diff --git a/tools/testing/selftests/bpf/benchs/bench_count.c b/tools/testing/selftests/bpf/benchs/bench_count.c
new file mode 100644
index 000000000000..befba7a82643
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_count.c
@@ -0,0 +1,91 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bench.h"
+
+/* COUNT-GLOBAL benchmark */
+
+static struct count_global_ctx {
+	struct counter hits;
+} count_global_ctx;
+
+static void *count_global_producer(void *input)
+{
+	struct count_global_ctx *ctx = &count_global_ctx;
+
+	while (true) {
+		atomic_inc(&ctx->hits.value);
+	}
+	return NULL;
+}
+
+static void *count_global_consumer(void *input)
+{
+	return NULL;
+}
+
+static void count_global_measure(struct bench_res *res)
+{
+	struct count_global_ctx *ctx = &count_global_ctx;
+
+	res->hits = atomic_swap(&ctx->hits.value, 0);
+}
+
+/* COUNT-local benchmark */
+
+static struct count_local_ctx {
+	struct counter *hits;
+} count_local_ctx;
+
+static void count_local_setup()
+{
+	struct count_local_ctx *ctx = &count_local_ctx;
+
+	ctx->hits = calloc(env.consumer_cnt, sizeof(*ctx->hits));
+	if (!ctx->hits)
+		exit(1);
+}
+
+static void *count_local_producer(void *input)
+{
+	struct count_local_ctx *ctx = &count_local_ctx;
+	int idx = (long)input;
+
+	while (true) {
+		atomic_inc(&ctx->hits[idx].value);
+	}
+	return NULL;
+}
+
+static void *count_local_consumer(void *input)
+{
+	return NULL;
+}
+
+static void count_local_measure(struct bench_res *res)
+{
+	struct count_local_ctx *ctx = &count_local_ctx;
+	int i;
+
+	for (i = 0; i < env.producer_cnt; i++) {
+		res->hits += atomic_swap(&ctx->hits[i].value, 0);
+	}
+}
+
+const struct bench bench_count_global = {
+	.name = "count-global",
+	.producer_thread = count_global_producer,
+	.consumer_thread = count_global_consumer,
+	.measure = count_global_measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
+const struct bench bench_count_local = {
+	.name = "count-local",
+	.setup = count_local_setup,
+	.producer_thread = count_local_producer,
+	.consumer_thread = count_local_consumer,
+	.measure = count_local_measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
-- 
cgit v1.2.3-59-g8ed1b


From 4eaf0b5c5e04c21a866431bd763ab4b1f24c4d16 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 12 May 2020 12:24:44 -0700
Subject: selftest/bpf: Fmod_ret prog and implement test_overhead as part of
 bench
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add fmod_ret BPF program to existing test_overhead selftest. Also re-implement
user-space benchmarking part into benchmark runner to compare results. Results
with ./bench are consistently somewhat lower than test_overhead's, but relative
performance of various types of BPF programs stay consisten (e.g., kretprobe is
noticeably slower). This slowdown seems to be coming from the fact that
test_overhead is single-threaded, while benchmark always spins off at least
one thread for producer. This has been confirmed by hacking multi-threaded
test_overhead variant and also single-threaded bench variant. Resutls are
below. run_bench_rename.sh script from benchs/ subdirectory was used to
produce results for ./bench.

Single-threaded implementations
===============================

/* bench: single-threaded, atomics */
base      :    4.622 ± 0.049M/s
kprobe    :    3.673 ± 0.052M/s
kretprobe :    2.625 ± 0.052M/s
rawtp     :    4.369 ± 0.089M/s
fentry    :    4.201 ± 0.558M/s
fexit     :    4.309 ± 0.148M/s
fmodret   :    4.314 ± 0.203M/s

/* selftest: single-threaded, no atomics */
task_rename base        4555K events per sec
task_rename kprobe      3643K events per sec
task_rename kretprobe   2506K events per sec
task_rename raw_tp      4303K events per sec
task_rename fentry      4307K events per sec
task_rename fexit       4010K events per sec
task_rename fmod_ret    3984K events per sec

Multi-threaded implementations
==============================

/* bench: multi-threaded w/ atomics */
base      :    3.910 ± 0.023M/s
kprobe    :    3.048 ± 0.037M/s
kretprobe :    2.300 ± 0.015M/s
rawtp     :    3.687 ± 0.034M/s
fentry    :    3.740 ± 0.087M/s
fexit     :    3.510 ± 0.009M/s
fmodret   :    3.485 ± 0.050M/s

/* selftest: multi-threaded w/ atomics */
task_rename base        3872K events per sec
task_rename kprobe      3068K events per sec
task_rename kretprobe   2350K events per sec
task_rename raw_tp      3731K events per sec
task_rename fentry      3639K events per sec
task_rename fexit       3558K events per sec
task_rename fmod_ret    3511K events per sec

/* selftest: multi-threaded, no atomics */
task_rename base        3945K events per sec
task_rename kprobe      3298K events per sec
task_rename kretprobe   2451K events per sec
task_rename raw_tp      3718K events per sec
task_rename fentry      3782K events per sec
task_rename fexit       3543K events per sec
task_rename fmod_ret    3526K events per sec

Note that the fact that ./bench benchmark always uses atomic increments for
counting, while test_overhead doesn't, doesn't influence test results all that
much.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200512192445.2351848-4-andriin@fb.com
---
 tools/testing/selftests/bpf/Makefile               |   4 +-
 tools/testing/selftests/bpf/bench.c                |  14 ++
 tools/testing/selftests/bpf/benchs/bench_rename.c  | 195 +++++++++++++++++++++
 .../selftests/bpf/benchs/run_bench_rename.sh       |   9 +
 .../selftests/bpf/prog_tests/test_overhead.c       |  14 +-
 tools/testing/selftests/bpf/progs/test_overhead.c  |   6 +
 6 files changed, 240 insertions(+), 2 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/benchs/bench_rename.c
 create mode 100755 tools/testing/selftests/bpf/benchs/run_bench_rename.sh

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index f414b2442181..1a079e91482f 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -411,10 +411,12 @@ $(OUTPUT)/test_cpp: test_cpp.cpp $(OUTPUT)/test_core_extern.skel.h $(BPFOBJ)
 $(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h
 	$(call msg,CC,,$@)
 	$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@
+$(OUTPUT)/bench_rename.o: $(OUTPUT)/test_overhead.skel.h
 $(OUTPUT)/bench.o: bench.h testing_helpers.h
 $(OUTPUT)/bench: LDLIBS += -lm
 $(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \
-		 $(OUTPUT)/bench_count.o
+		 $(OUTPUT)/bench_count.o \
+		 $(OUTPUT)/bench_rename.o
 	$(call msg,BINARY,,$@)
 	$(CC) $(LDFLAGS) -o $@ $(filter %.a %.o,$^) $(LDLIBS)
 
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index 3972da8b19e8..c9e8b7dbaf66 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -297,10 +297,24 @@ const struct bench *bench = NULL;
 
 extern const struct bench bench_count_global;
 extern const struct bench bench_count_local;
+extern const struct bench bench_rename_base;
+extern const struct bench bench_rename_kprobe;
+extern const struct bench bench_rename_kretprobe;
+extern const struct bench bench_rename_rawtp;
+extern const struct bench bench_rename_fentry;
+extern const struct bench bench_rename_fexit;
+extern const struct bench bench_rename_fmodret;
 
 static const struct bench *benchs[] = {
 	&bench_count_global,
 	&bench_count_local,
+	&bench_rename_base,
+	&bench_rename_kprobe,
+	&bench_rename_kretprobe,
+	&bench_rename_rawtp,
+	&bench_rename_fentry,
+	&bench_rename_fexit,
+	&bench_rename_fmodret,
 };
 
 static void setup_benchmark()
diff --git a/tools/testing/selftests/bpf/benchs/bench_rename.c b/tools/testing/selftests/bpf/benchs/bench_rename.c
new file mode 100644
index 000000000000..e74cff40f4fe
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_rename.c
@@ -0,0 +1,195 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <fcntl.h>
+#include "bench.h"
+#include "test_overhead.skel.h"
+
+/* BPF triggering benchmarks */
+static struct ctx {
+	struct test_overhead *skel;
+	struct counter hits;
+	int fd;
+} ctx;
+
+static void validate()
+{
+	if (env.producer_cnt != 1) {
+		fprintf(stderr, "benchmark doesn't support multi-producer!\n");
+		exit(1);
+	}
+	if (env.consumer_cnt != 1) {
+		fprintf(stderr, "benchmark doesn't support multi-consumer!\n");
+		exit(1);
+	}
+}
+
+static void *producer(void *input)
+{
+	char buf[] = "test_overhead";
+	int err;
+
+	while (true) {
+		err = write(ctx.fd, buf, sizeof(buf));
+		if (err < 0) {
+			fprintf(stderr, "write failed\n");
+			exit(1);
+		}
+		atomic_inc(&ctx.hits.value);
+	}
+}
+
+static void measure(struct bench_res *res)
+{
+	res->hits = atomic_swap(&ctx.hits.value, 0);
+}
+
+static void setup_ctx()
+{
+	setup_libbpf();
+
+	ctx.skel = test_overhead__open_and_load();
+	if (!ctx.skel) {
+		fprintf(stderr, "failed to open skeleton\n");
+		exit(1);
+	}
+
+	ctx.fd = open("/proc/self/comm", O_WRONLY|O_TRUNC);
+	if (ctx.fd < 0) {
+		fprintf(stderr, "failed to open /proc/self/comm: %d\n", -errno);
+		exit(1);
+	}
+}
+
+static void attach_bpf(struct bpf_program *prog)
+{
+	struct bpf_link *link;
+
+	link = bpf_program__attach(prog);
+	if (IS_ERR(link)) {
+		fprintf(stderr, "failed to attach program!\n");
+		exit(1);
+	}
+}
+
+static void setup_base()
+{
+	setup_ctx();
+}
+
+static void setup_kprobe()
+{
+	setup_ctx();
+	attach_bpf(ctx.skel->progs.prog1);
+}
+
+static void setup_kretprobe()
+{
+	setup_ctx();
+	attach_bpf(ctx.skel->progs.prog2);
+}
+
+static void setup_rawtp()
+{
+	setup_ctx();
+	attach_bpf(ctx.skel->progs.prog3);
+}
+
+static void setup_fentry()
+{
+	setup_ctx();
+	attach_bpf(ctx.skel->progs.prog4);
+}
+
+static void setup_fexit()
+{
+	setup_ctx();
+	attach_bpf(ctx.skel->progs.prog5);
+}
+
+static void setup_fmodret()
+{
+	setup_ctx();
+	attach_bpf(ctx.skel->progs.prog6);
+}
+
+static void *consumer(void *input)
+{
+	return NULL;
+}
+
+const struct bench bench_rename_base = {
+	.name = "rename-base",
+	.validate = validate,
+	.setup = setup_base,
+	.producer_thread = producer,
+	.consumer_thread = consumer,
+	.measure = measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
+const struct bench bench_rename_kprobe = {
+	.name = "rename-kprobe",
+	.validate = validate,
+	.setup = setup_kprobe,
+	.producer_thread = producer,
+	.consumer_thread = consumer,
+	.measure = measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
+const struct bench bench_rename_kretprobe = {
+	.name = "rename-kretprobe",
+	.validate = validate,
+	.setup = setup_kretprobe,
+	.producer_thread = producer,
+	.consumer_thread = consumer,
+	.measure = measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
+const struct bench bench_rename_rawtp = {
+	.name = "rename-rawtp",
+	.validate = validate,
+	.setup = setup_rawtp,
+	.producer_thread = producer,
+	.consumer_thread = consumer,
+	.measure = measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
+const struct bench bench_rename_fentry = {
+	.name = "rename-fentry",
+	.validate = validate,
+	.setup = setup_fentry,
+	.producer_thread = producer,
+	.consumer_thread = consumer,
+	.measure = measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
+const struct bench bench_rename_fexit = {
+	.name = "rename-fexit",
+	.validate = validate,
+	.setup = setup_fexit,
+	.producer_thread = producer,
+	.consumer_thread = consumer,
+	.measure = measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
+const struct bench bench_rename_fmodret = {
+	.name = "rename-fmodret",
+	.validate = validate,
+	.setup = setup_fmodret,
+	.producer_thread = producer,
+	.consumer_thread = consumer,
+	.measure = measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_rename.sh b/tools/testing/selftests/bpf/benchs/run_bench_rename.sh
new file mode 100755
index 000000000000..16f774b1cdbe
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_rename.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+set -eufo pipefail
+
+for i in base kprobe kretprobe rawtp fentry fexit fmodret
+do
+	summary=$(sudo ./bench -w2 -d5 -a rename-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-)
+	printf "%-10s: %s\n" $i "$summary"
+done
diff --git a/tools/testing/selftests/bpf/prog_tests/test_overhead.c b/tools/testing/selftests/bpf/prog_tests/test_overhead.c
index 465b371a561d..2702df2b2343 100644
--- a/tools/testing/selftests/bpf/prog_tests/test_overhead.c
+++ b/tools/testing/selftests/bpf/prog_tests/test_overhead.c
@@ -61,9 +61,10 @@ void test_test_overhead(void)
 	const char *raw_tp_name = "raw_tp/task_rename";
 	const char *fentry_name = "fentry/__set_task_comm";
 	const char *fexit_name = "fexit/__set_task_comm";
+	const char *fmodret_name = "fmod_ret/__set_task_comm";
 	const char *kprobe_func = "__set_task_comm";
 	struct bpf_program *kprobe_prog, *kretprobe_prog, *raw_tp_prog;
-	struct bpf_program *fentry_prog, *fexit_prog;
+	struct bpf_program *fentry_prog, *fexit_prog, *fmodret_prog;
 	struct bpf_object *obj;
 	struct bpf_link *link;
 	int err, duration = 0;
@@ -96,6 +97,10 @@ void test_test_overhead(void)
 	if (CHECK(!fexit_prog, "find_probe",
 		  "prog '%s' not found\n", fexit_name))
 		goto cleanup;
+	fmodret_prog = bpf_object__find_program_by_title(obj, fmodret_name);
+	if (CHECK(!fmodret_prog, "find_probe",
+		  "prog '%s' not found\n", fmodret_name))
+		goto cleanup;
 
 	err = bpf_object__load(obj);
 	if (CHECK(err, "obj_load", "err %d\n", err))
@@ -142,6 +147,13 @@ void test_test_overhead(void)
 		goto cleanup;
 	test_run("fexit");
 	bpf_link__destroy(link);
+
+	/* attach fmod_ret */
+	link = bpf_program__attach_trace(fmodret_prog);
+	if (CHECK(IS_ERR(link), "attach fmod_ret", "err %ld\n", PTR_ERR(link)))
+		goto cleanup;
+	test_run("fmod_ret");
+	bpf_link__destroy(link);
 cleanup:
 	prctl(PR_SET_NAME, comm, 0L, 0L, 0L);
 	bpf_object__close(obj);
diff --git a/tools/testing/selftests/bpf/progs/test_overhead.c b/tools/testing/selftests/bpf/progs/test_overhead.c
index 56a50b25cd33..450bf819beac 100644
--- a/tools/testing/selftests/bpf/progs/test_overhead.c
+++ b/tools/testing/selftests/bpf/progs/test_overhead.c
@@ -39,4 +39,10 @@ int BPF_PROG(prog5, struct task_struct *tsk, const char *buf, bool exec)
 	return !tsk;
 }
 
+SEC("fmod_ret/__set_task_comm")
+int BPF_PROG(prog6, struct task_struct *tsk, const char *buf, bool exec)
+{
+	return !tsk;
+}
+
 char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3-59-g8ed1b


From c5d420c32cb44fdd10d76f0f01bcd0b09383d0b5 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 12 May 2020 12:24:45 -0700
Subject: selftest/bpf: Add BPF triggering benchmark
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

It is sometimes desirable to be able to trigger BPF program from user-space
with minimal overhead. sys_enter would seem to be a good candidate, yet in
a lot of cases there will be a lot of noise from syscalls triggered by other
processes on the system. So while searching for low-overhead alternative, I've
stumbled upon getpgid() syscall, which seems to be specific enough to not
suffer from accidental syscall by other apps.

This set of benchmarks compares tp, raw_tp w/ filtering by syscall ID, kprobe,
fentry and fmod_ret with returning error (so that syscall would not be
executed), to determine the lowest-overhead way. Here are results on my
machine (using benchs/run_bench_trigger.sh script):

  base      :    9.200 ± 0.319M/s
  tp        :    6.690 ± 0.125M/s
  rawtp     :    8.571 ± 0.214M/s
  kprobe    :    6.431 ± 0.048M/s
  fentry    :    8.955 ± 0.241M/s
  fmodret   :    8.903 ± 0.135M/s

So it seems like fmodret doesn't give much benefit for such lightweight
syscall. Raw tracepoint is pretty decent despite additional filtering logic,
but it will be called for any other syscall in the system, which rules it out.
Fentry, though, seems to be adding the least amoung of overhead and achieves
97.3% of performance of baseline no-BPF-attached syscall.

Using getpgid() seems to be preferable to set_task_comm() approach from
test_overhead, as it's about 2.35x faster in a baseline performance.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: John Fastabend <john.fastabend@gmail.com>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200512192445.2351848-5-andriin@fb.com
---
 tools/testing/selftests/bpf/Makefile               |   4 +-
 tools/testing/selftests/bpf/bench.c                |  12 ++
 tools/testing/selftests/bpf/benchs/bench_trigger.c | 167 +++++++++++++++++++++
 .../selftests/bpf/benchs/run_bench_trigger.sh      |   9 ++
 tools/testing/selftests/bpf/progs/trigger_bench.c  |  47 ++++++
 5 files changed, 238 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/benchs/bench_trigger.c
 create mode 100755 tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
 create mode 100644 tools/testing/selftests/bpf/progs/trigger_bench.c

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 1a079e91482f..e716e931d0c9 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -412,11 +412,13 @@ $(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h
 	$(call msg,CC,,$@)
 	$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@
 $(OUTPUT)/bench_rename.o: $(OUTPUT)/test_overhead.skel.h
+$(OUTPUT)/bench_trigger.o: $(OUTPUT)/trigger_bench.skel.h
 $(OUTPUT)/bench.o: bench.h testing_helpers.h
 $(OUTPUT)/bench: LDLIBS += -lm
 $(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \
 		 $(OUTPUT)/bench_count.o \
-		 $(OUTPUT)/bench_rename.o
+		 $(OUTPUT)/bench_rename.o \
+		 $(OUTPUT)/bench_trigger.o
 	$(call msg,BINARY,,$@)
 	$(CC) $(LDFLAGS) -o $@ $(filter %.a %.o,$^) $(LDLIBS)
 
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index c9e8b7dbaf66..8c0dfbfe6088 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -304,6 +304,12 @@ extern const struct bench bench_rename_rawtp;
 extern const struct bench bench_rename_fentry;
 extern const struct bench bench_rename_fexit;
 extern const struct bench bench_rename_fmodret;
+extern const struct bench bench_trig_base;
+extern const struct bench bench_trig_tp;
+extern const struct bench bench_trig_rawtp;
+extern const struct bench bench_trig_kprobe;
+extern const struct bench bench_trig_fentry;
+extern const struct bench bench_trig_fmodret;
 
 static const struct bench *benchs[] = {
 	&bench_count_global,
@@ -315,6 +321,12 @@ static const struct bench *benchs[] = {
 	&bench_rename_fentry,
 	&bench_rename_fexit,
 	&bench_rename_fmodret,
+	&bench_trig_base,
+	&bench_trig_tp,
+	&bench_trig_rawtp,
+	&bench_trig_kprobe,
+	&bench_trig_fentry,
+	&bench_trig_fmodret,
 };
 
 static void setup_benchmark()
diff --git a/tools/testing/selftests/bpf/benchs/bench_trigger.c b/tools/testing/selftests/bpf/benchs/bench_trigger.c
new file mode 100644
index 000000000000..49c22832f216
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_trigger.c
@@ -0,0 +1,167 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include "bench.h"
+#include "trigger_bench.skel.h"
+
+/* BPF triggering benchmarks */
+static struct trigger_ctx {
+	struct trigger_bench *skel;
+} ctx;
+
+static struct counter base_hits;
+
+static void trigger_validate()
+{
+	if (env.consumer_cnt != 1) {
+		fprintf(stderr, "benchmark doesn't support multi-consumer!\n");
+		exit(1);
+	}
+}
+
+static void *trigger_base_producer(void *input)
+{
+	while (true) {
+		(void)syscall(__NR_getpgid);
+		atomic_inc(&base_hits.value);
+	}
+	return NULL;
+}
+
+static void trigger_base_measure(struct bench_res *res)
+{
+	res->hits = atomic_swap(&base_hits.value, 0);
+}
+
+static void *trigger_producer(void *input)
+{
+	while (true)
+		(void)syscall(__NR_getpgid);
+	return NULL;
+}
+
+static void trigger_measure(struct bench_res *res)
+{
+	res->hits = atomic_swap(&ctx.skel->bss->hits, 0);
+}
+
+static void setup_ctx()
+{
+	setup_libbpf();
+
+	ctx.skel = trigger_bench__open_and_load();
+	if (!ctx.skel) {
+		fprintf(stderr, "failed to open skeleton\n");
+		exit(1);
+	}
+}
+
+static void attach_bpf(struct bpf_program *prog)
+{
+	struct bpf_link *link;
+
+	link = bpf_program__attach(prog);
+	if (IS_ERR(link)) {
+		fprintf(stderr, "failed to attach program!\n");
+		exit(1);
+	}
+}
+
+static void trigger_tp_setup()
+{
+	setup_ctx();
+	attach_bpf(ctx.skel->progs.bench_trigger_tp);
+}
+
+static void trigger_rawtp_setup()
+{
+	setup_ctx();
+	attach_bpf(ctx.skel->progs.bench_trigger_raw_tp);
+}
+
+static void trigger_kprobe_setup()
+{
+	setup_ctx();
+	attach_bpf(ctx.skel->progs.bench_trigger_kprobe);
+}
+
+static void trigger_fentry_setup()
+{
+	setup_ctx();
+	attach_bpf(ctx.skel->progs.bench_trigger_fentry);
+}
+
+static void trigger_fmodret_setup()
+{
+	setup_ctx();
+	attach_bpf(ctx.skel->progs.bench_trigger_fmodret);
+}
+
+static void *trigger_consumer(void *input)
+{
+	return NULL;
+}
+
+const struct bench bench_trig_base = {
+	.name = "trig-base",
+	.validate = trigger_validate,
+	.producer_thread = trigger_base_producer,
+	.consumer_thread = trigger_consumer,
+	.measure = trigger_base_measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_tp = {
+	.name = "trig-tp",
+	.validate = trigger_validate,
+	.setup = trigger_tp_setup,
+	.producer_thread = trigger_producer,
+	.consumer_thread = trigger_consumer,
+	.measure = trigger_measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_rawtp = {
+	.name = "trig-rawtp",
+	.validate = trigger_validate,
+	.setup = trigger_rawtp_setup,
+	.producer_thread = trigger_producer,
+	.consumer_thread = trigger_consumer,
+	.measure = trigger_measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_kprobe = {
+	.name = "trig-kprobe",
+	.validate = trigger_validate,
+	.setup = trigger_kprobe_setup,
+	.producer_thread = trigger_producer,
+	.consumer_thread = trigger_consumer,
+	.measure = trigger_measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_fentry = {
+	.name = "trig-fentry",
+	.validate = trigger_validate,
+	.setup = trigger_fentry_setup,
+	.producer_thread = trigger_producer,
+	.consumer_thread = trigger_consumer,
+	.measure = trigger_measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
+const struct bench bench_trig_fmodret = {
+	.name = "trig-fmodret",
+	.validate = trigger_validate,
+	.setup = trigger_fmodret_setup,
+	.producer_thread = trigger_producer,
+	.consumer_thread = trigger_consumer,
+	.measure = trigger_measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
new file mode 100755
index 000000000000..78e83f243294
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_trigger.sh
@@ -0,0 +1,9 @@
+#!/bin/bash
+
+set -eufo pipefail
+
+for i in base tp rawtp kprobe fentry fmodret
+do
+	summary=$(sudo ./bench -w2 -d5 -a trig-$i | tail -n1 | cut -d'(' -f1 | cut -d' ' -f3-)
+	printf "%-10s: %s\n" $i "$summary"
+done
diff --git a/tools/testing/selftests/bpf/progs/trigger_bench.c b/tools/testing/selftests/bpf/progs/trigger_bench.c
new file mode 100644
index 000000000000..8b36b6640e7e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/trigger_bench.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <asm/unistd.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
+
+char _license[] SEC("license") = "GPL";
+
+long hits = 0;
+
+SEC("tp/syscalls/sys_enter_getpgid")
+int bench_trigger_tp(void *ctx)
+{
+	__sync_add_and_fetch(&hits, 1);
+	return 0;
+}
+
+SEC("raw_tp/sys_enter")
+int BPF_PROG(bench_trigger_raw_tp, struct pt_regs *regs, long id)
+{
+	if (id == __NR_getpgid)
+		__sync_add_and_fetch(&hits, 1);
+	return 0;
+}
+
+SEC("kprobe/__x64_sys_getpgid")
+int bench_trigger_kprobe(void *ctx)
+{
+	__sync_add_and_fetch(&hits, 1);
+	return 0;
+}
+
+SEC("fentry/__x64_sys_getpgid")
+int bench_trigger_fentry(void *ctx)
+{
+	__sync_add_and_fetch(&hits, 1);
+	return 0;
+}
+
+SEC("fmod_ret/__x64_sys_getpgid")
+int bench_trigger_fmodret(void *ctx)
+{
+	__sync_add_and_fetch(&hits, 1);
+	return -22;
+}
-- 
cgit v1.2.3-59-g8ed1b


From 7af4c8451d80d0a8622483c27ab141a7c1a94573 Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Tue, 12 May 2020 23:10:56 +0200
Subject: dt-bindings: net: meson-dwmac: Add the amlogic,rx-delay-ns property

The PRG_ETHERNET registers on Meson8b and newer SoCs can add an RX
delay. Add a property with the known supported values so it can be
configured according to the board layout.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../devicetree/bindings/net/amlogic,meson-dwmac.yaml        | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml b/Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml
index ae91aa9d8616..66074314e57a 100644
--- a/Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml
@@ -67,6 +67,19 @@ allOf:
             PHY and MAC are adding a delay).
             Any configuration is ignored when the phy-mode is set to "rmii".
 
+        amlogic,rx-delay-ns:
+          enum:
+            - 0
+            - 2
+          default: 0
+          description:
+            The internal RGMII RX clock delay (provided by this IP block) in
+            nanoseconds. When phy-mode is set to "rgmii" then the RX delay
+            should be explicitly configured. When the phy-mode is set to
+            either "rgmii-id" or "rgmii-rxid" the RX clock delay is already
+            provided by the PHY. Any configuration is ignored when the
+            phy-mode is set to "rmii".
+
 properties:
   compatible:
     additionalItems: true
-- 
cgit v1.2.3-59-g8ed1b


From ee0b8e6d02186321be0ea4ec0fb2cbd35bec7e29 Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Tue, 12 May 2020 23:10:57 +0200
Subject: dt-bindings: net: dwmac-meson: Document the "timing-adjustment" clock

The PRG_ETHERNET registers can add an RX delay in RGMII mode. This
requires an internal re-timing circuit whose input clock is called
"timing adjustment clock". Document this clock input so the clock can be
enabled as needed.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml | 10 +++++++---
 1 file changed, 7 insertions(+), 3 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml b/Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml
index 66074314e57a..64c20c92c07d 100644
--- a/Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml
+++ b/Documentation/devicetree/bindings/net/amlogic,meson-dwmac.yaml
@@ -40,18 +40,22 @@ allOf:
     then:
       properties:
         clocks:
+          minItems: 3
+          maxItems: 4
           items:
             - description: GMAC main clock
             - description: First parent clock of the internal mux
             - description: Second parent clock of the internal mux
+            - description: The clock which drives the timing adjustment logic
 
         clock-names:
           minItems: 3
-          maxItems: 3
+          maxItems: 4
           items:
             - const: stmmaceth
             - const: clkin0
             - const: clkin1
+            - const: timing-adjustment
 
         amlogic,tx-delay-ns:
           $ref: /schemas/types.yaml#definitions/uint32
@@ -120,7 +124,7 @@ examples:
          reg = <0xc9410000 0x10000>, <0xc8834540 0x8>;
          interrupts = <8>;
          interrupt-names = "macirq";
-         clocks = <&clk_eth>, <&clkc_fclk_div2>, <&clk_mpll2>;
-         clock-names = "stmmaceth", "clkin0", "clkin1";
+         clocks = <&clk_eth>, <&clk_fclk_div2>, <&clk_mpll2>, <&clk_fclk_div2>;
+         clock-names = "stmmaceth", "clkin0", "clkin1", "timing-adjustment";
          phy-mode = "rgmii";
     };
-- 
cgit v1.2.3-59-g8ed1b


From 3649abe43251de4357bdd6ef0163de25f96554e9 Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Tue, 12 May 2020 23:10:58 +0200
Subject: net: stmmac: dwmac-meson8b: use FIELD_PREP instead of open-coding it

Use FIELD_PREP() to shift a value to the correct offset based on a
bitmask instead of open-coding the logic.
No functional changes.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
index a3934ca6a043..c9ec0cb68082 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
@@ -5,6 +5,7 @@
  * Copyright (C) 2016 Martin Blumenstingl <martin.blumenstingl@googlemail.com>
  */
 
+#include <linux/bitfield.h>
 #include <linux/clk.h>
 #include <linux/clk-provider.h>
 #include <linux/device.h>
@@ -32,7 +33,6 @@
 #define PRG_ETH0_CLK_M250_SEL_SHIFT	4
 #define PRG_ETH0_CLK_M250_SEL_MASK	GENMASK(4, 4)
 
-#define PRG_ETH0_TXDLY_SHIFT		5
 #define PRG_ETH0_TXDLY_MASK		GENMASK(6, 5)
 
 /* divider for the result of m250_sel */
@@ -262,7 +262,8 @@ static int meson8b_init_prg_eth(struct meson8b_dwmac *dwmac)
 					PRG_ETH0_INVERTED_RMII_CLK, 0);
 
 		meson8b_dwmac_mask_bits(dwmac, PRG_ETH0, PRG_ETH0_TXDLY_MASK,
-					tx_dly_val << PRG_ETH0_TXDLY_SHIFT);
+					FIELD_PREP(PRG_ETH0_TXDLY_MASK,
+						   tx_dly_val));
 
 		/* Configure the 125MHz RGMII TX clock, the IP block changes
 		 * the output automatically (= without us having to configure
-- 
cgit v1.2.3-59-g8ed1b


From 889df20305ffeae0a6bbd435761810ba658e223d Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Tue, 12 May 2020 23:10:59 +0200
Subject: net: stmmac: dwmac-meson8b: Move the documentation for the TX delay

Move the documentation for the TX delay above the PRG_ETH0_TXDLY_MASK
definition. Future commits will add more registers also with
documentation above their register bit definitions. Move the existing
comment so it will be consistent with the upcoming changes.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
index c9ec0cb68082..1d7526ee09dd 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
@@ -33,6 +33,10 @@
 #define PRG_ETH0_CLK_M250_SEL_SHIFT	4
 #define PRG_ETH0_CLK_M250_SEL_MASK	GENMASK(4, 4)
 
+/* TX clock delay in ns = "8ns / 4 * tx_dly_val" (where 8ns are exactly one
+ * cycle of the 125MHz RGMII TX clock):
+ * 0ns = 0x0, 2ns = 0x1, 4ns = 0x2, 6ns = 0x3
+ */
 #define PRG_ETH0_TXDLY_MASK		GENMASK(6, 5)
 
 /* divider for the result of m250_sel */
@@ -248,10 +252,6 @@ static int meson8b_init_prg_eth(struct meson8b_dwmac *dwmac)
 	switch (dwmac->phy_mode) {
 	case PHY_INTERFACE_MODE_RGMII:
 	case PHY_INTERFACE_MODE_RGMII_RXID:
-		/* TX clock delay in ns = "8ns / 4 * tx_dly_val" (where
-		 * 8ns are exactly one cycle of the 125MHz RGMII TX clock):
-		 * 0ns = 0x0, 2ns = 0x1, 4ns = 0x2, 6ns = 0x3
-		 */
 		tx_dly_val = dwmac->tx_delay_ns >> 1;
 		/* fall through */
 
-- 
cgit v1.2.3-59-g8ed1b


From c92d1d2311a0513d8f7f8311f5c2b1d7e78005a0 Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Tue, 12 May 2020 23:11:00 +0200
Subject: net: stmmac: dwmac-meson8b: Add the PRG_ETH0_ADJ_* bits

The PRG_ETH0_ADJ_* are used for applying the RGMII RX delay. The public
datasheets only have very limited description for these registers, but
Jianxin Pan provided more detailed documentation from an (unnamed)
Amlogic engineer. Add the PRG_ETH0_ADJ_* bits along with the improved
description.

Suggested-by: Jianxin Pan <jianxin.pan@amlogic.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
index 1d7526ee09dd..70075628c58e 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
@@ -48,6 +48,27 @@
 #define PRG_ETH0_INVERTED_RMII_CLK	BIT(11)
 #define PRG_ETH0_TX_AND_PHY_REF_CLK	BIT(12)
 
+/* Bypass (= 0, the signal from the GPIO input directly connects to the
+ * internal sampling) or enable (= 1) the internal logic for RXEN and RXD[3:0]
+ * timing tuning.
+ */
+#define PRG_ETH0_ADJ_ENABLE		BIT(13)
+/* Controls whether the RXEN and RXD[3:0] signals should be aligned with the
+ * input RX rising/falling edge and sent to the Ethernet internals. This sets
+ * the automatically delay and skew automatically (internally).
+ */
+#define PRG_ETH0_ADJ_SETUP		BIT(14)
+/* An internal counter based on the "timing-adjustment" clock. The counter is
+ * cleared on both, the falling and rising edge of the RX_CLK. This selects the
+ * delay (= the counter value) when to start sampling RXEN and RXD[3:0].
+ */
+#define PRG_ETH0_ADJ_DELAY		GENMASK(19, 15)
+/* Adjusts the skew between each bit of RXEN and RXD[3:0]. If a signal has a
+ * large input delay, the bit for that signal (RXEN = bit 0, RXD[3] = bit 1,
+ * ...) can be configured to be 1 to compensate for a delay of about 1ns.
+ */
+#define PRG_ETH0_ADJ_SKEW		GENMASK(24, 20)
+
 #define MUX_CLK_NUM_PARENTS		2
 
 struct meson8b_dwmac;
-- 
cgit v1.2.3-59-g8ed1b


From e4227bff804fc77e2f78c77470d3fbd2d4a6a8d0 Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Tue, 12 May 2020 23:11:01 +0200
Subject: net: stmmac: dwmac-meson8b: Fetch the "timing-adjustment" clock

The PRG_ETHERNET registers have a built-in timing adjustment circuit
which can provide the RX delay in RGMII mode. This is driven by an
external (to this IP, but internal to the SoC) clock input. Fetch this
clock as optional (even though it's there on all supported SoCs) since
we just learned about it and existing .dtbs don't specify it.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
index 70075628c58e..41f3ef6bea66 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
@@ -85,6 +85,7 @@ struct meson8b_dwmac {
 	phy_interface_t			phy_mode;
 	struct clk			*rgmii_tx_clk;
 	u32				tx_delay_ns;
+	struct clk			*timing_adj_clk;
 };
 
 struct meson8b_dwmac_clk_configs {
@@ -380,6 +381,13 @@ static int meson8b_dwmac_probe(struct platform_device *pdev)
 				 &dwmac->tx_delay_ns))
 		dwmac->tx_delay_ns = 2;
 
+	dwmac->timing_adj_clk = devm_clk_get_optional(dwmac->dev,
+						      "timing-adjustment");
+	if (IS_ERR(dwmac->timing_adj_clk)) {
+		ret = PTR_ERR(dwmac->timing_adj_clk);
+		goto err_remove_config_dt;
+	}
+
 	ret = meson8b_init_rgmii_tx_clk(dwmac);
 	if (ret)
 		goto err_remove_config_dt;
-- 
cgit v1.2.3-59-g8ed1b


From a54dc4a4904568fe2c6b2ba249dcc97612affebb Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Tue, 12 May 2020 23:11:02 +0200
Subject: net: stmmac: dwmac-meson8b: Make the clock enabling code re-usable

The timing adjustment clock will need similar logic as the RGMII clock:
It has to be enabled in the driver conditionally and when the driver is
unloaded it should be disabled again. Extract the existing code for the
RGMII clock into a new function so it can be re-used.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/stmicro/stmmac/dwmac-meson8b.c    | 23 +++++++++++++++++-----
 1 file changed, 18 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
index 41f3ef6bea66..d31f79c455de 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
@@ -266,6 +266,22 @@ static int meson_axg_set_phy_mode(struct meson8b_dwmac *dwmac)
 	return 0;
 }
 
+static int meson8b_devm_clk_prepare_enable(struct meson8b_dwmac *dwmac,
+					   struct clk *clk)
+{
+	int ret;
+
+	ret = clk_prepare_enable(clk);
+	if (ret)
+		return ret;
+
+	devm_add_action_or_reset(dwmac->dev,
+				 (void(*)(void *))clk_disable_unprepare,
+				 dwmac->rgmii_tx_clk);
+
+	return 0;
+}
+
 static int meson8b_init_prg_eth(struct meson8b_dwmac *dwmac)
 {
 	int ret;
@@ -299,16 +315,13 @@ static int meson8b_init_prg_eth(struct meson8b_dwmac *dwmac)
 			return ret;
 		}
 
-		ret = clk_prepare_enable(dwmac->rgmii_tx_clk);
+		ret = meson8b_devm_clk_prepare_enable(dwmac,
+						      dwmac->rgmii_tx_clk);
 		if (ret) {
 			dev_err(dwmac->dev,
 				"failed to enable the RGMII TX clock\n");
 			return ret;
 		}
-
-		devm_add_action_or_reset(dwmac->dev,
-					(void(*)(void *))clk_disable_unprepare,
-					dwmac->rgmii_tx_clk);
 		break;
 
 	case PHY_INTERFACE_MODE_RMII:
-- 
cgit v1.2.3-59-g8ed1b


From 9308c47640d515d16e06a7fdf333c51a39c1b0b1 Mon Sep 17 00:00:00 2001
From: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Date: Tue, 12 May 2020 23:11:03 +0200
Subject: net: stmmac: dwmac-meson8b: add support for the RX delay
 configuration

Configure the PRG_ETH0_ADJ_* bits to enable or disable the RX delay
based on the various RGMII PHY modes. For now the only supported RX
delay settings are:
- disabled, use for example for phy-mode "rgmii-id"
- 0ns - this is treated identical to "disabled", used for example on
  boards where the PHY provides 2ns TX delay and the PCB trace length
  already adds 2ns RX delay
- 2ns - for whenever the PHY cannot add the RX delay and the traces on
  the PCB don't add any RX delay

Disabling the RX delay (in case u-boot enables it, which is the case
for example on Meson8b Odroid-C1) simply means that PRG_ETH0_ADJ_ENABLE,
PRG_ETH0_ADJ_SETUP, PRG_ETH0_ADJ_DELAY and PRG_ETH0_ADJ_SKEW should be
disabled (just disabling PRG_ETH0_ADJ_ENABLE may be enough, since that
disables the whole re-timing logic - but I find it makes more sense to
clear the other bits as well since they depend on that setting).

u-boot on Odroid-C1 uses the following steps to enable a 2ns RX delay:
- enabling enabling the timing adjustment clock
- enabling the timing adjustment logic by setting PRG_ETH0_ADJ_ENABLE
- setting the PRG_ETH0_ADJ_SETUP bit

The documentation for the PRG_ETH0_ADJ_DELAY and PRG_ETH0_ADJ_SKEW
registers indicates that we can even set different RX delays. However,
I could not find out how this works exactly, so for now we only support
a 2ns RX delay using the exact same way that Odroid-C1's u-boot does.

Signed-off-by: Martin Blumenstingl <martin.blumenstingl@googlemail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/stmicro/stmmac/dwmac-meson8b.c    | 85 ++++++++++++++++------
 1 file changed, 62 insertions(+), 23 deletions(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
index d31f79c455de..234e8b6816ce 100644
--- a/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-meson8b.c
@@ -85,6 +85,7 @@ struct meson8b_dwmac {
 	phy_interface_t			phy_mode;
 	struct clk			*rgmii_tx_clk;
 	u32				tx_delay_ns;
+	u32				rx_delay_ns;
 	struct clk			*timing_adj_clk;
 };
 
@@ -284,25 +285,64 @@ static int meson8b_devm_clk_prepare_enable(struct meson8b_dwmac *dwmac,
 
 static int meson8b_init_prg_eth(struct meson8b_dwmac *dwmac)
 {
+	u32 tx_dly_config, rx_dly_config, delay_config;
 	int ret;
-	u8 tx_dly_val = 0;
+
+	tx_dly_config = FIELD_PREP(PRG_ETH0_TXDLY_MASK,
+				   dwmac->tx_delay_ns >> 1);
+
+	if (dwmac->rx_delay_ns == 2)
+		rx_dly_config = PRG_ETH0_ADJ_ENABLE | PRG_ETH0_ADJ_SETUP;
+	else
+		rx_dly_config = 0;
 
 	switch (dwmac->phy_mode) {
 	case PHY_INTERFACE_MODE_RGMII:
+		delay_config = tx_dly_config | rx_dly_config;
+		break;
 	case PHY_INTERFACE_MODE_RGMII_RXID:
-		tx_dly_val = dwmac->tx_delay_ns >> 1;
-		/* fall through */
-
-	case PHY_INTERFACE_MODE_RGMII_ID:
+		delay_config = tx_dly_config;
+		break;
 	case PHY_INTERFACE_MODE_RGMII_TXID:
+		delay_config = rx_dly_config;
+		break;
+	case PHY_INTERFACE_MODE_RGMII_ID:
+	case PHY_INTERFACE_MODE_RMII:
+		delay_config = 0;
+		break;
+	default:
+		dev_err(dwmac->dev, "unsupported phy-mode %s\n",
+			phy_modes(dwmac->phy_mode));
+		return -EINVAL;
+	};
+
+	if (rx_dly_config & PRG_ETH0_ADJ_ENABLE) {
+		if (!dwmac->timing_adj_clk) {
+			dev_err(dwmac->dev,
+				"The timing-adjustment clock is mandatory for the RX delay re-timing\n");
+			return -EINVAL;
+		}
+
+		/* The timing adjustment logic is driven by a separate clock */
+		ret = meson8b_devm_clk_prepare_enable(dwmac,
+						      dwmac->timing_adj_clk);
+		if (ret) {
+			dev_err(dwmac->dev,
+				"Failed to enable the timing-adjustment clock\n");
+			return ret;
+		}
+	}
+
+	meson8b_dwmac_mask_bits(dwmac, PRG_ETH0, PRG_ETH0_TXDLY_MASK |
+				PRG_ETH0_ADJ_ENABLE | PRG_ETH0_ADJ_SETUP |
+				PRG_ETH0_ADJ_DELAY | PRG_ETH0_ADJ_SKEW,
+				delay_config);
+
+	if (phy_interface_mode_is_rgmii(dwmac->phy_mode)) {
 		/* only relevant for RMII mode -> disable in RGMII mode */
 		meson8b_dwmac_mask_bits(dwmac, PRG_ETH0,
 					PRG_ETH0_INVERTED_RMII_CLK, 0);
 
-		meson8b_dwmac_mask_bits(dwmac, PRG_ETH0, PRG_ETH0_TXDLY_MASK,
-					FIELD_PREP(PRG_ETH0_TXDLY_MASK,
-						   tx_dly_val));
-
 		/* Configure the 125MHz RGMII TX clock, the IP block changes
 		 * the output automatically (= without us having to configure
 		 * a register) based on the line-speed (125MHz for Gbit speeds,
@@ -322,24 +362,11 @@ static int meson8b_init_prg_eth(struct meson8b_dwmac *dwmac)
 				"failed to enable the RGMII TX clock\n");
 			return ret;
 		}
-		break;
-
-	case PHY_INTERFACE_MODE_RMII:
+	} else {
 		/* invert internal clk_rmii_i to generate 25/2.5 tx_rx_clk */
 		meson8b_dwmac_mask_bits(dwmac, PRG_ETH0,
 					PRG_ETH0_INVERTED_RMII_CLK,
 					PRG_ETH0_INVERTED_RMII_CLK);
-
-		/* TX clock delay cannot be configured in RMII mode */
-		meson8b_dwmac_mask_bits(dwmac, PRG_ETH0, PRG_ETH0_TXDLY_MASK,
-					0);
-
-		break;
-
-	default:
-		dev_err(dwmac->dev, "unsupported phy-mode %s\n",
-			phy_modes(dwmac->phy_mode));
-		return -EINVAL;
 	}
 
 	/* enable TX_CLK and PHY_REF_CLK generator */
@@ -394,6 +421,18 @@ static int meson8b_dwmac_probe(struct platform_device *pdev)
 				 &dwmac->tx_delay_ns))
 		dwmac->tx_delay_ns = 2;
 
+	/* use 0ns as fallback since this is what most boards actually use */
+	if (of_property_read_u32(pdev->dev.of_node, "amlogic,rx-delay-ns",
+				 &dwmac->rx_delay_ns))
+		dwmac->rx_delay_ns = 0;
+
+	if (dwmac->rx_delay_ns != 0 && dwmac->rx_delay_ns != 2) {
+		dev_err(&pdev->dev,
+			"The only allowed RX delays values are: 0ns, 2ns");
+		ret = -EINVAL;
+		goto err_remove_config_dt;
+	}
+
 	dwmac->timing_adj_clk = devm_clk_get_optional(dwmac->dev,
 						      "timing-adjustment");
 	if (IS_ERR(dwmac->timing_adj_clk)) {
-- 
cgit v1.2.3-59-g8ed1b


From 99aaf53e2f7c4a1b152b7f300c6b07ffbc2fe192 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Wed, 13 May 2020 11:02:15 -0700
Subject: tools/bpf: selftests : Explain bpf_iter test failures with llvm
 10.0.0

Commit 6879c042e105 ("tools/bpf: selftests: Add bpf_iter selftests")
added self tests for bpf_iter feature. But two subtests
ipv6_route and netlink needs llvm latest 10.x release branch
or trunk due to a bug in llvm BPF backend. This patch added
the file README.rst to document these two failures
so people using llvm 10.0.0 can be aware of them.

Suggested-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200513180215.2949237-1-yhs@fb.com
---
 tools/testing/selftests/bpf/README.rst | 43 ++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/README.rst

diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
new file mode 100644
index 000000000000..0f67f1b470b0
--- /dev/null
+++ b/tools/testing/selftests/bpf/README.rst
@@ -0,0 +1,43 @@
+==================
+BPF Selftest Notes
+==================
+
+Additional information about selftest failures are
+documented here.
+
+bpf_iter test failures with clang/llvm 10.0.0
+=============================================
+
+With clang/llvm 10.0.0, the following two bpf_iter tests failed:
+  * ``bpf_iter/ipv6_route``
+  * ``bpf_iter/netlink``
+
+The symptom for ``bpf_iter/ipv6_route`` looks like
+
+.. code-block:: c
+
+  2: (79) r8 = *(u64 *)(r1 +8)
+  ...
+  14: (bf) r2 = r8
+  15: (0f) r2 += r1
+  ; BPF_SEQ_PRINTF(seq, "%pi6 %02x ", &rt->fib6_dst.addr, rt->fib6_dst.plen);
+  16: (7b) *(u64 *)(r8 +64) = r2
+  only read is supported
+
+The symptom for ``bpf_iter/netlink`` looks like
+
+.. code-block:: c
+
+  ; struct netlink_sock *nlk = ctx->sk;
+  2: (79) r7 = *(u64 *)(r1 +8)
+  ...
+  15: (bf) r2 = r7
+  16: (0f) r2 += r1
+  ; BPF_SEQ_PRINTF(seq, "%pK %-3d ", s, s->sk_protocol);
+  17: (7b) *(u64 *)(r7 +0) = r2
+  only read is supported
+
+This is due to a llvm BPF backend bug. The fix 
+  https://reviews.llvm.org/D78466
+has been pushed to llvm 10.x release branch and will be
+available in 10.0.1. The fix is available in llvm 11.0.0 trunk.
-- 
cgit v1.2.3-59-g8ed1b


From 21aef70eade22a656297c28d5da93301915d2ac2 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Wed, 13 May 2020 11:02:16 -0700
Subject: bpf: Change btf_iter func proto prefix to "bpf_iter_"

This is to be consistent with tracing and lsm programs
which have prefix "bpf_trace_" and "bpf_lsm_" respectively.

Suggested-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200513180216.2949387-1-yhs@fb.com
---
 include/linux/bpf.h    | 6 +++---
 tools/lib/bpf/libbpf.c | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index cf4b6e44f2bc..ab94dfd8826f 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1131,10 +1131,10 @@ struct bpf_link *bpf_link_get_from_fd(u32 ufd);
 int bpf_obj_pin_user(u32 ufd, const char __user *pathname);
 int bpf_obj_get_user(const char __user *pathname, int flags);
 
-#define BPF_ITER_FUNC_PREFIX "__bpf_iter__"
+#define BPF_ITER_FUNC_PREFIX "bpf_iter_"
 #define DEFINE_BPF_ITER_FUNC(target, args...)			\
-	extern int __bpf_iter__ ## target(args);		\
-	int __init __bpf_iter__ ## target(args) { return 0; }
+	extern int bpf_iter_ ## target(args);			\
+	int __init bpf_iter_ ## target(args) { return 0; }
 
 typedef int (*bpf_iter_init_seq_priv_t)(void *private_data);
 typedef void (*bpf_iter_fini_seq_priv_t)(void *private_data);
diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index fd882616ab52..292257995487 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -6919,7 +6919,7 @@ invalid_prog:
 
 #define BTF_TRACE_PREFIX "btf_trace_"
 #define BTF_LSM_PREFIX "bpf_lsm_"
-#define BTF_ITER_PREFIX "__bpf_iter__"
+#define BTF_ITER_PREFIX "bpf_iter_"
 #define BTF_MAX_NAME_SIZE 128
 
 static int find_btf_by_prefix_kind(const struct btf *btf, const char *prefix,
-- 
cgit v1.2.3-59-g8ed1b


From 2e3ed68bfcd9c5ca2cf8b88ba23a34992ccd0b1f Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Wed, 13 May 2020 11:02:18 -0700
Subject: bpf: Add comments to interpret bpf_prog return values

Add a short comment in bpf_iter_run_prog() function to
explain how bpf_prog return value is converted to
seq_ops->show() return value:
  bpf_prog return           seq_ops()->show() return
     0                         0
     1                         -EAGAIN

When show() return value is -EAGAIN, the current
bpf_seq_read() will end. If the current seq_file buffer
is empty, -EAGAIN will return to user space. Otherwise,
the buffer will be copied to user space.
In both cases, the next bpf_seq_read() call will
try to show the same object which returned -EAGAIN
previously.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200513180218.2949517-1-yhs@fb.com
---
 kernel/bpf/bpf_iter.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index 30efd15cd4a0..0a45a6cdfabd 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -526,5 +526,11 @@ int bpf_iter_run_prog(struct bpf_prog *prog, void *ctx)
 	migrate_enable();
 	rcu_read_unlock();
 
+	/* bpf program can only return 0 or 1:
+	 *  0 : okay
+	 *  1 : retry the same object
+	 * The bpf_iter_run_prog() return value
+	 * will be seq_ops->show() return value.
+	 */
 	return ret == 0 ? 0 : -EAGAIN;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 15172a46fa2796c1a1358a36babd31274716ed41 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Wed, 13 May 2020 11:02:19 -0700
Subject: bpf: net: Refactor bpf_iter target registration

Currently bpf_iter_reg_target takes parameters from target
and allocates memory to save them. This is really not
necessary, esp. in the future we may grow information
passed from targets to bpf_iter manager.

The patch refactors the code so target reg_info
becomes static and bpf_iter manager can just take
a reference to it.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200513180219.2949605-1-yhs@fb.com
---
 include/linux/bpf.h      |  2 +-
 kernel/bpf/bpf_iter.c    | 36 +++++++++++++++++-------------------
 kernel/bpf/map_iter.c    | 18 +++++++++---------
 kernel/bpf/task_iter.c   | 30 ++++++++++++++++--------------
 net/ipv6/route.c         | 18 +++++++++---------
 net/netlink/af_netlink.c | 18 +++++++++---------
 6 files changed, 61 insertions(+), 61 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index ab94dfd8826f..6fa773e2d1bf 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1153,7 +1153,7 @@ struct bpf_iter_meta {
 	u64 seq_num;
 };
 
-int bpf_iter_reg_target(struct bpf_iter_reg *reg_info);
+int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info);
 void bpf_iter_unreg_target(const char *target);
 bool bpf_iter_prog_supported(struct bpf_prog *prog);
 int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index 0a45a6cdfabd..051fb8cab62a 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -8,11 +8,7 @@
 
 struct bpf_iter_target_info {
 	struct list_head list;
-	const char *target;
-	const struct seq_operations *seq_ops;
-	bpf_iter_init_seq_priv_t init_seq_private;
-	bpf_iter_fini_seq_priv_t fini_seq_private;
-	u32 seq_priv_size;
+	const struct bpf_iter_reg *reg_info;
 	u32 btf_id;	/* cached value */
 };
 
@@ -222,8 +218,8 @@ static int iter_release(struct inode *inode, struct file *file)
 	iter_priv = container_of(seq->private, struct bpf_iter_priv_data,
 				 target_private);
 
-	if (iter_priv->tinfo->fini_seq_private)
-		iter_priv->tinfo->fini_seq_private(seq->private);
+	if (iter_priv->tinfo->reg_info->fini_seq_private)
+		iter_priv->tinfo->reg_info->fini_seq_private(seq->private);
 
 	bpf_prog_put(iter_priv->prog);
 	seq->private = iter_priv;
@@ -238,7 +234,12 @@ const struct file_operations bpf_iter_fops = {
 	.release	= iter_release,
 };
 
-int bpf_iter_reg_target(struct bpf_iter_reg *reg_info)
+/* The argument reg_info will be cached in bpf_iter_target_info.
+ * The common practice is to declare target reg_info as
+ * a const static variable and passed as an argument to
+ * bpf_iter_reg_target().
+ */
+int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info)
 {
 	struct bpf_iter_target_info *tinfo;
 
@@ -246,11 +247,7 @@ int bpf_iter_reg_target(struct bpf_iter_reg *reg_info)
 	if (!tinfo)
 		return -ENOMEM;
 
-	tinfo->target = reg_info->target;
-	tinfo->seq_ops = reg_info->seq_ops;
-	tinfo->init_seq_private = reg_info->init_seq_private;
-	tinfo->fini_seq_private = reg_info->fini_seq_private;
-	tinfo->seq_priv_size = reg_info->seq_priv_size;
+	tinfo->reg_info = reg_info;
 	INIT_LIST_HEAD(&tinfo->list);
 
 	mutex_lock(&targets_mutex);
@@ -267,7 +264,7 @@ void bpf_iter_unreg_target(const char *target)
 
 	mutex_lock(&targets_mutex);
 	list_for_each_entry(tinfo, &targets, list) {
-		if (!strcmp(target, tinfo->target)) {
+		if (!strcmp(target, tinfo->reg_info->target)) {
 			list_del(&tinfo->list);
 			kfree(tinfo);
 			found = true;
@@ -303,7 +300,7 @@ bool bpf_iter_prog_supported(struct bpf_prog *prog)
 			supported = true;
 			break;
 		}
-		if (!strcmp(attach_fname + prefix_len, tinfo->target)) {
+		if (!strcmp(attach_fname + prefix_len, tinfo->reg_info->target)) {
 			cache_btf_id(tinfo, prog);
 			supported = true;
 			break;
@@ -431,15 +428,16 @@ static int prepare_seq_file(struct file *file, struct bpf_iter_link *link)
 
 	tinfo = link->tinfo;
 	total_priv_dsize = offsetof(struct bpf_iter_priv_data, target_private) +
-			   tinfo->seq_priv_size;
-	priv_data = __seq_open_private(file, tinfo->seq_ops, total_priv_dsize);
+			   tinfo->reg_info->seq_priv_size;
+	priv_data = __seq_open_private(file, tinfo->reg_info->seq_ops,
+				       total_priv_dsize);
 	if (!priv_data) {
 		err = -ENOMEM;
 		goto release_prog;
 	}
 
-	if (tinfo->init_seq_private) {
-		err = tinfo->init_seq_private(priv_data->target_private);
+	if (tinfo->reg_info->init_seq_private) {
+		err = tinfo->reg_info->init_seq_private(priv_data->target_private);
 		if (err)
 			goto release_seq_file;
 	}
diff --git a/kernel/bpf/map_iter.c b/kernel/bpf/map_iter.c
index 8162e0c00b9f..c6216a5fe56e 100644
--- a/kernel/bpf/map_iter.c
+++ b/kernel/bpf/map_iter.c
@@ -81,17 +81,17 @@ static const struct seq_operations bpf_map_seq_ops = {
 	.show	= bpf_map_seq_show,
 };
 
+static const struct bpf_iter_reg bpf_map_reg_info = {
+	.target			= "bpf_map",
+	.seq_ops		= &bpf_map_seq_ops,
+	.init_seq_private	= NULL,
+	.fini_seq_private	= NULL,
+	.seq_priv_size		= sizeof(struct bpf_iter_seq_map_info),
+};
+
 static int __init bpf_map_iter_init(void)
 {
-	struct bpf_iter_reg reg_info = {
-		.target			= "bpf_map",
-		.seq_ops		= &bpf_map_seq_ops,
-		.init_seq_private	= NULL,
-		.fini_seq_private	= NULL,
-		.seq_priv_size		= sizeof(struct bpf_iter_seq_map_info),
-	};
-
-	return bpf_iter_reg_target(&reg_info);
+	return bpf_iter_reg_target(&bpf_map_reg_info);
 }
 
 late_initcall(bpf_map_iter_init);
diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index aeed662d8451..bd7bfd83d9e0 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -306,22 +306,24 @@ static const struct seq_operations task_file_seq_ops = {
 	.show	= task_file_seq_show,
 };
 
+static const struct bpf_iter_reg task_reg_info = {
+	.target			= "task",
+	.seq_ops		= &task_seq_ops,
+	.init_seq_private	= init_seq_pidns,
+	.fini_seq_private	= fini_seq_pidns,
+	.seq_priv_size		= sizeof(struct bpf_iter_seq_task_info),
+};
+
+static const struct bpf_iter_reg task_file_reg_info = {
+	.target			= "task_file",
+	.seq_ops		= &task_file_seq_ops,
+	.init_seq_private	= init_seq_pidns,
+	.fini_seq_private	= fini_seq_pidns,
+	.seq_priv_size		= sizeof(struct bpf_iter_seq_task_file_info),
+};
+
 static int __init task_iter_init(void)
 {
-	struct bpf_iter_reg task_file_reg_info = {
-		.target			= "task_file",
-		.seq_ops		= &task_file_seq_ops,
-		.init_seq_private	= init_seq_pidns,
-		.fini_seq_private	= fini_seq_pidns,
-		.seq_priv_size		= sizeof(struct bpf_iter_seq_task_file_info),
-	};
-	struct bpf_iter_reg task_reg_info = {
-		.target			= "task",
-		.seq_ops		= &task_seq_ops,
-		.init_seq_private	= init_seq_pidns,
-		.fini_seq_private	= fini_seq_pidns,
-		.seq_priv_size		= sizeof(struct bpf_iter_seq_task_info),
-	};
 	int ret;
 
 	ret = bpf_iter_reg_target(&task_reg_info);
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 25f6d3e619d0..6ad2fa51a23a 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -6397,17 +6397,17 @@ void __init ip6_route_init_special_entries(void)
 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
 DEFINE_BPF_ITER_FUNC(ipv6_route, struct bpf_iter_meta *meta, struct fib6_info *rt)
 
+static const struct bpf_iter_reg ipv6_route_reg_info = {
+	.target			= "ipv6_route",
+	.seq_ops		= &ipv6_route_seq_ops,
+	.init_seq_private	= bpf_iter_init_seq_net,
+	.fini_seq_private	= bpf_iter_fini_seq_net,
+	.seq_priv_size		= sizeof(struct ipv6_route_iter),
+};
+
 static int __init bpf_iter_register(void)
 {
-	struct bpf_iter_reg reg_info = {
-		.target			= "ipv6_route",
-		.seq_ops		= &ipv6_route_seq_ops,
-		.init_seq_private	= bpf_iter_init_seq_net,
-		.fini_seq_private	= bpf_iter_fini_seq_net,
-		.seq_priv_size		= sizeof(struct ipv6_route_iter),
-	};
-
-	return bpf_iter_reg_target(&reg_info);
+	return bpf_iter_reg_target(&ipv6_route_reg_info);
 }
 
 static void bpf_iter_unregister(void)
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 33cda9baa979..839827227e98 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2803,17 +2803,17 @@ static const struct rhashtable_params netlink_rhashtable_params = {
 };
 
 #if defined(CONFIG_BPF_SYSCALL) && defined(CONFIG_PROC_FS)
+static const struct bpf_iter_reg netlink_reg_info = {
+	.target			= "netlink",
+	.seq_ops		= &netlink_seq_ops,
+	.init_seq_private	= bpf_iter_init_seq_net,
+	.fini_seq_private	= bpf_iter_fini_seq_net,
+	.seq_priv_size		= sizeof(struct nl_seq_iter),
+};
+
 static int __init bpf_iter_register(void)
 {
-	struct bpf_iter_reg reg_info = {
-		.target			= "netlink",
-		.seq_ops		= &netlink_seq_ops,
-		.init_seq_private	= bpf_iter_init_seq_net,
-		.fini_seq_private	= bpf_iter_fini_seq_net,
-		.seq_priv_size		= sizeof(struct nl_seq_iter),
-	};
-
-	return bpf_iter_reg_target(&reg_info);
+	return bpf_iter_reg_target(&netlink_reg_info);
 }
 #endif
 
-- 
cgit v1.2.3-59-g8ed1b


From ab2ee4fcb9d61fd57db70db694adbcf54662bd80 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Wed, 13 May 2020 11:02:20 -0700
Subject: bpf: Change func bpf_iter_unreg_target() signature

Change func bpf_iter_unreg_target() parameter from target
name to target reg_info, similar to bpf_iter_reg_target().

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200513180220.2949737-1-yhs@fb.com
---
 include/linux/bpf.h   | 2 +-
 kernel/bpf/bpf_iter.c | 4 ++--
 net/ipv6/route.c      | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 6fa773e2d1bf..534174eca86b 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1154,7 +1154,7 @@ struct bpf_iter_meta {
 };
 
 int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info);
-void bpf_iter_unreg_target(const char *target);
+void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info);
 bool bpf_iter_prog_supported(struct bpf_prog *prog);
 int bpf_iter_link_attach(const union bpf_attr *attr, struct bpf_prog *prog);
 int bpf_iter_new_fd(struct bpf_link *link);
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index 051fb8cab62a..644f8626b2c0 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -257,14 +257,14 @@ int bpf_iter_reg_target(const struct bpf_iter_reg *reg_info)
 	return 0;
 }
 
-void bpf_iter_unreg_target(const char *target)
+void bpf_iter_unreg_target(const struct bpf_iter_reg *reg_info)
 {
 	struct bpf_iter_target_info *tinfo;
 	bool found = false;
 
 	mutex_lock(&targets_mutex);
 	list_for_each_entry(tinfo, &targets, list) {
-		if (!strcmp(target, tinfo->reg_info->target)) {
+		if (reg_info == tinfo->reg_info) {
 			list_del(&tinfo->list);
 			kfree(tinfo);
 			found = true;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 6ad2fa51a23a..22bf4e36c093 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -6412,7 +6412,7 @@ static int __init bpf_iter_register(void)
 
 static void bpf_iter_unregister(void)
 {
-	bpf_iter_unreg_target("ipv6_route");
+	bpf_iter_unreg_target(&ipv6_route_reg_info);
 }
 #endif
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From 3c32cc1bceba8a1755dc35cd97516f6c67856844 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Wed, 13 May 2020 11:02:21 -0700
Subject: bpf: Enable bpf_iter targets registering ctx argument types

Commit b121b341e598 ("bpf: Add PTR_TO_BTF_ID_OR_NULL
support") adds a field btf_id_or_null_non0_off to
bpf_prog->aux structure to indicate that the
first ctx argument is PTR_TO_BTF_ID reg_type and
all others are PTR_TO_BTF_ID_OR_NULL.
This approach does not really scale if we have
other different reg types in the future, e.g.,
a pointer to a buffer.

This patch enables bpf_iter targets registering ctx argument
reg types which may be different from the default one.
For example, for pointers to structures, the default reg_type
is PTR_TO_BTF_ID for tracing program. The target can register
a particular pointer type as PTR_TO_BTF_ID_OR_NULL which can
be used by the verifier to enforce accesses.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200513180221.2949882-1-yhs@fb.com
---
 include/linux/bpf.h      | 12 +++++++++++-
 include/net/ip6_fib.h    |  7 +++++++
 kernel/bpf/bpf_iter.c    |  5 +++++
 kernel/bpf/btf.c         | 15 ++++++++++-----
 kernel/bpf/map_iter.c    |  5 +++++
 kernel/bpf/task_iter.c   | 12 ++++++++++++
 kernel/bpf/verifier.c    |  1 -
 net/ipv6/ip6_fib.c       |  5 -----
 net/ipv6/route.c         |  5 +++++
 net/netlink/af_netlink.c |  5 +++++
 10 files changed, 60 insertions(+), 12 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index 534174eca86b..c45d198ac38c 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -643,6 +643,12 @@ struct bpf_jit_poke_descriptor {
 	u16 reason;
 };
 
+/* reg_type info for ctx arguments */
+struct bpf_ctx_arg_aux {
+	u32 offset;
+	enum bpf_reg_type reg_type;
+};
+
 struct bpf_prog_aux {
 	atomic64_t refcnt;
 	u32 used_map_cnt;
@@ -654,12 +660,13 @@ struct bpf_prog_aux {
 	u32 func_cnt; /* used by non-func prog as the number of func progs */
 	u32 func_idx; /* 0 for non-func prog, the index in func array for func prog */
 	u32 attach_btf_id; /* in-kernel BTF type id to attach to */
+	u32 ctx_arg_info_size;
+	const struct bpf_ctx_arg_aux *ctx_arg_info;
 	struct bpf_prog *linked_prog;
 	bool verifier_zext; /* Zero extensions has been inserted by verifier. */
 	bool offload_requested;
 	bool attach_btf_trace; /* true if attaching to BTF-enabled raw tp */
 	bool func_proto_unreliable;
-	bool btf_id_or_null_non0_off;
 	enum bpf_tramp_prog_type trampoline_prog_type;
 	struct bpf_trampoline *trampoline;
 	struct hlist_node tramp_hlist;
@@ -1139,12 +1146,15 @@ int bpf_obj_get_user(const char __user *pathname, int flags);
 typedef int (*bpf_iter_init_seq_priv_t)(void *private_data);
 typedef void (*bpf_iter_fini_seq_priv_t)(void *private_data);
 
+#define BPF_ITER_CTX_ARG_MAX 2
 struct bpf_iter_reg {
 	const char *target;
 	const struct seq_operations *seq_ops;
 	bpf_iter_init_seq_priv_t init_seq_private;
 	bpf_iter_fini_seq_priv_t fini_seq_private;
 	u32 seq_priv_size;
+	u32 ctx_arg_info_size;
+	struct bpf_ctx_arg_aux ctx_arg_info[BPF_ITER_CTX_ARG_MAX];
 };
 
 struct bpf_iter_meta {
diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index 80262d2980f5..870b646c5797 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -540,6 +540,13 @@ static inline bool fib6_metric_locked(struct fib6_info *f6i, int metric)
 	return !!(f6i->fib6_metrics->metrics[RTAX_LOCK - 1] & (1 << metric));
 }
 
+#if IS_BUILTIN(CONFIG_IPV6) && defined(CONFIG_BPF_SYSCALL)
+struct bpf_iter__ipv6_route {
+	__bpf_md_ptr(struct bpf_iter_meta *, meta);
+	__bpf_md_ptr(struct fib6_info *, rt);
+};
+#endif
+
 #ifdef CONFIG_IPV6_MULTIPLE_TABLES
 static inline bool fib6_has_custom_rules(const struct net *net)
 {
diff --git a/kernel/bpf/bpf_iter.c b/kernel/bpf/bpf_iter.c
index 644f8626b2c0..dd612b80b9fe 100644
--- a/kernel/bpf/bpf_iter.c
+++ b/kernel/bpf/bpf_iter.c
@@ -308,6 +308,11 @@ bool bpf_iter_prog_supported(struct bpf_prog *prog)
 	}
 	mutex_unlock(&targets_mutex);
 
+	if (supported) {
+		prog->aux->ctx_arg_info_size = tinfo->reg_info->ctx_arg_info_size;
+		prog->aux->ctx_arg_info = tinfo->reg_info->ctx_arg_info;
+	}
+
 	return supported;
 }
 
diff --git a/kernel/bpf/btf.c b/kernel/bpf/btf.c
index dcd233139294..58c9af1d4808 100644
--- a/kernel/bpf/btf.c
+++ b/kernel/bpf/btf.c
@@ -3694,7 +3694,7 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
 	struct bpf_verifier_log *log = info->log;
 	const struct btf_param *args;
 	u32 nr_args, arg;
-	int ret;
+	int i, ret;
 
 	if (off % 8) {
 		bpf_log(log, "func '%s' offset %d is not multiple of 8\n",
@@ -3790,10 +3790,15 @@ bool btf_ctx_access(int off, int size, enum bpf_access_type type,
 		return true;
 
 	/* this is a pointer to another type */
-	if (off != 0 && prog->aux->btf_id_or_null_non0_off)
-		info->reg_type = PTR_TO_BTF_ID_OR_NULL;
-	else
-		info->reg_type = PTR_TO_BTF_ID;
+	info->reg_type = PTR_TO_BTF_ID;
+	for (i = 0; i < prog->aux->ctx_arg_info_size; i++) {
+		const struct bpf_ctx_arg_aux *ctx_arg_info = &prog->aux->ctx_arg_info[i];
+
+		if (ctx_arg_info->offset == off) {
+			info->reg_type = ctx_arg_info->reg_type;
+			break;
+		}
+	}
 
 	if (tgt_prog) {
 		ret = btf_translate_to_vmlinux(log, btf, t, tgt_prog->type, arg);
diff --git a/kernel/bpf/map_iter.c b/kernel/bpf/map_iter.c
index c6216a5fe56e..c69071e334bf 100644
--- a/kernel/bpf/map_iter.c
+++ b/kernel/bpf/map_iter.c
@@ -87,6 +87,11 @@ static const struct bpf_iter_reg bpf_map_reg_info = {
 	.init_seq_private	= NULL,
 	.fini_seq_private	= NULL,
 	.seq_priv_size		= sizeof(struct bpf_iter_seq_map_info),
+	.ctx_arg_info_size	= 1,
+	.ctx_arg_info		= {
+		{ offsetof(struct bpf_iter__bpf_map, map),
+		  PTR_TO_BTF_ID_OR_NULL },
+	},
 };
 
 static int __init bpf_map_iter_init(void)
diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index bd7bfd83d9e0..a9b7264dda08 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -312,6 +312,11 @@ static const struct bpf_iter_reg task_reg_info = {
 	.init_seq_private	= init_seq_pidns,
 	.fini_seq_private	= fini_seq_pidns,
 	.seq_priv_size		= sizeof(struct bpf_iter_seq_task_info),
+	.ctx_arg_info_size	= 1,
+	.ctx_arg_info		= {
+		{ offsetof(struct bpf_iter__task, task),
+		  PTR_TO_BTF_ID_OR_NULL },
+	},
 };
 
 static const struct bpf_iter_reg task_file_reg_info = {
@@ -320,6 +325,13 @@ static const struct bpf_iter_reg task_file_reg_info = {
 	.init_seq_private	= init_seq_pidns,
 	.fini_seq_private	= fini_seq_pidns,
 	.seq_priv_size		= sizeof(struct bpf_iter_seq_task_file_info),
+	.ctx_arg_info_size	= 2,
+	.ctx_arg_info		= {
+		{ offsetof(struct bpf_iter__task_file, task),
+		  PTR_TO_BTF_ID_OR_NULL },
+		{ offsetof(struct bpf_iter__task_file, file),
+		  PTR_TO_BTF_ID_OR_NULL },
+	},
 };
 
 static int __init task_iter_init(void)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 2a1826c76bb6..a3f2af756fd6 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -10652,7 +10652,6 @@ static int check_attach_btf_id(struct bpf_verifier_env *env)
 		prog->aux->attach_func_proto = t;
 		if (!bpf_iter_prog_supported(prog))
 			return -EINVAL;
-		prog->aux->btf_id_or_null_non0_off = true;
 		ret = btf_distill_func_proto(&env->log, btf, t,
 					     tname, &fmodel);
 		return ret;
diff --git a/net/ipv6/ip6_fib.c b/net/ipv6/ip6_fib.c
index a1fcc0ca21af..250ff52c674e 100644
--- a/net/ipv6/ip6_fib.c
+++ b/net/ipv6/ip6_fib.c
@@ -2638,11 +2638,6 @@ static void ipv6_route_native_seq_stop(struct seq_file *seq, void *v)
 }
 
 #if IS_BUILTIN(CONFIG_IPV6) && defined(CONFIG_BPF_SYSCALL)
-struct bpf_iter__ipv6_route {
-	__bpf_md_ptr(struct bpf_iter_meta *, meta);
-	__bpf_md_ptr(struct fib6_info *, rt);
-};
-
 static int ipv6_route_prog_seq_show(struct bpf_prog *prog,
 				    struct bpf_iter_meta *meta,
 				    void *v)
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index 22bf4e36c093..22e56465f14d 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -6403,6 +6403,11 @@ static const struct bpf_iter_reg ipv6_route_reg_info = {
 	.init_seq_private	= bpf_iter_init_seq_net,
 	.fini_seq_private	= bpf_iter_fini_seq_net,
 	.seq_priv_size		= sizeof(struct ipv6_route_iter),
+	.ctx_arg_info_size	= 1,
+	.ctx_arg_info		= {
+		{ offsetof(struct bpf_iter__ipv6_route, rt),
+		  PTR_TO_BTF_ID_OR_NULL },
+	},
 };
 
 static int __init bpf_iter_register(void)
diff --git a/net/netlink/af_netlink.c b/net/netlink/af_netlink.c
index 839827227e98..4f2c3b14ddbf 100644
--- a/net/netlink/af_netlink.c
+++ b/net/netlink/af_netlink.c
@@ -2809,6 +2809,11 @@ static const struct bpf_iter_reg netlink_reg_info = {
 	.init_seq_private	= bpf_iter_init_seq_net,
 	.fini_seq_private	= bpf_iter_fini_seq_net,
 	.seq_priv_size		= sizeof(struct nl_seq_iter),
+	.ctx_arg_info_size	= 1,
+	.ctx_arg_info		= {
+		{ offsetof(struct bpf_iter__netlink, sk),
+		  PTR_TO_BTF_ID_OR_NULL },
+	},
 };
 
 static int __init bpf_iter_register(void)
-- 
cgit v1.2.3-59-g8ed1b


From 03421a92f5627430d23ed95df55958e04848f184 Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Wed, 13 May 2020 11:02:23 -0700
Subject: samples/bpf: Remove compiler warnings

Commit 5fbc220862fc ("tools/libpf: Add offsetof/container_of macro
in bpf_helpers.h") added macros offsetof/container_of to
bpf_helpers.h. Unfortunately, it caused compilation warnings
below for a few samples/bpf programs:
  In file included from /data/users/yhs/work/net-next/samples/bpf/sockex2_kern.c:4:
  In file included from /data/users/yhs/work/net-next/include/uapi/linux/in.h:24:
  In file included from /data/users/yhs/work/net-next/include/linux/socket.h:8:
  In file included from /data/users/yhs/work/net-next/include/linux/uio.h:8:
  /data/users/yhs/work/net-next/include/linux/kernel.h:992:9: warning: 'container_of' macro redefined [-Wmacro-redefined]
          ^
  /data/users/yhs/work/net-next/tools/lib/bpf/bpf_helpers.h:46:9: note: previous definition is here
          ^
  1 warning generated.
    CLANG-bpf  samples/bpf/sockex3_kern.o

In all these cases, bpf_helpers.h is included first, followed by other
standard headers. The macro container_of is defined unconditionally
in kernel.h, causing the compiler warning.

The fix is to move bpf_helpers.h after standard headers.

Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200513180223.2949987-1-yhs@fb.com
---
 samples/bpf/offwaketime_kern.c | 4 ++--
 samples/bpf/sockex2_kern.c     | 4 ++--
 samples/bpf/sockex3_kern.c     | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/samples/bpf/offwaketime_kern.c b/samples/bpf/offwaketime_kern.c
index c4ec10dbfc3b..d459f73412a4 100644
--- a/samples/bpf/offwaketime_kern.c
+++ b/samples/bpf/offwaketime_kern.c
@@ -5,12 +5,12 @@
  * License as published by the Free Software Foundation.
  */
 #include <uapi/linux/bpf.h>
-#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_tracing.h>
 #include <uapi/linux/ptrace.h>
 #include <uapi/linux/perf_event.h>
 #include <linux/version.h>
 #include <linux/sched.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_tracing.h>
 
 #define _(P) ({typeof(P) val; bpf_probe_read(&val, sizeof(val), &P); val;})
 
diff --git a/samples/bpf/sockex2_kern.c b/samples/bpf/sockex2_kern.c
index a41dd520bc53..b7997541f7ee 100644
--- a/samples/bpf/sockex2_kern.c
+++ b/samples/bpf/sockex2_kern.c
@@ -1,12 +1,12 @@
 #include <uapi/linux/bpf.h>
-#include <bpf/bpf_helpers.h>
-#include "bpf_legacy.h"
 #include <uapi/linux/in.h>
 #include <uapi/linux/if.h>
 #include <uapi/linux/if_ether.h>
 #include <uapi/linux/ip.h>
 #include <uapi/linux/ipv6.h>
 #include <uapi/linux/if_tunnel.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_legacy.h"
 #define IP_MF		0x2000
 #define IP_OFFSET	0x1FFF
 
diff --git a/samples/bpf/sockex3_kern.c b/samples/bpf/sockex3_kern.c
index 36d4dac23549..779a5249c418 100644
--- a/samples/bpf/sockex3_kern.c
+++ b/samples/bpf/sockex3_kern.c
@@ -5,8 +5,6 @@
  * License as published by the Free Software Foundation.
  */
 #include <uapi/linux/bpf.h>
-#include <bpf/bpf_helpers.h>
-#include "bpf_legacy.h"
 #include <uapi/linux/in.h>
 #include <uapi/linux/if.h>
 #include <uapi/linux/if_ether.h>
@@ -14,6 +12,8 @@
 #include <uapi/linux/ipv6.h>
 #include <uapi/linux/if_tunnel.h>
 #include <uapi/linux/mpls.h>
+#include <bpf/bpf_helpers.h>
+#include "bpf_legacy.h"
 #define IP_MF		0x2000
 #define IP_OFFSET	0x1FFF
 
-- 
cgit v1.2.3-59-g8ed1b


From 6e8a4f9dda3823274fa8a4c1aa5e6a93f9775749 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 13 May 2020 13:07:59 +0200
Subject: net: ignore sock_from_file errors in __scm_install_fd

The code had historically been ignoring these errors, and my recent
refactoring changed that, which broke ssh in some setups.

Fixes: 2618d530dd8b ("net/scm: cleanup scm_detach_fds")
Reported-by: Ido Schimmel <idosch@idosch.org>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Tested-by: Ido Schimmel <idosch@mellanox.com>
Tested-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/scm.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/core/scm.c b/net/core/scm.c
index a75cd637a71f..875df1c2989d 100644
--- a/net/core/scm.c
+++ b/net/core/scm.c
@@ -307,7 +307,7 @@ static int __scm_install_fd(struct file *file, int __user *ufd, int o_flags)
 		sock_update_classid(&sock->sk->sk_cgrp_data);
 	}
 	fd_install(new_fd, get_file(file));
-	return error;
+	return 0;
 }
 
 static int scm_max_fds(struct msghdr *msg)
-- 
cgit v1.2.3-59-g8ed1b


From ea13d71887bd589d2f08ca74a3e1961dd68b678d Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Wed, 13 May 2020 14:34:40 +0200
Subject: net: phy: tja11xx: add cable-test support

Add initial cable testing support.
This PHY needs only 100usec for this test and it is recommended to run it
before the link is up. For now, provide at least ethtool support, so it
can be tested by more developers.

This patch was tested with TJA1102 PHY with following results:
- No cable, is detected as open
- 1m cable, with no connected other end and detected as open
- a 40m cable (out of spec, max lenght should be 15m) is detected as OK.

Current patch do not provide polarity test support. This test would
indicate not proper wire connection, where "+" wire of main phy is
connected to the "-" wire of the link partner.

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/nxp-tja11xx.c | 106 +++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 105 insertions(+), 1 deletion(-)

diff --git a/drivers/net/phy/nxp-tja11xx.c b/drivers/net/phy/nxp-tja11xx.c
index ca5f9d4dc57e..8b743d25002b 100644
--- a/drivers/net/phy/nxp-tja11xx.c
+++ b/drivers/net/phy/nxp-tja11xx.c
@@ -5,6 +5,7 @@
  */
 #include <linux/delay.h>
 #include <linux/ethtool.h>
+#include <linux/ethtool_netlink.h>
 #include <linux/kernel.h>
 #include <linux/mdio.h>
 #include <linux/mii.h>
@@ -26,6 +27,7 @@
 #define MII_ECTRL_POWER_MODE_NO_CHANGE	(0x0 << 11)
 #define MII_ECTRL_POWER_MODE_NORMAL	(0x3 << 11)
 #define MII_ECTRL_POWER_MODE_STANDBY	(0xc << 11)
+#define MII_ECTRL_CABLE_TEST		BIT(5)
 #define MII_ECTRL_CONFIG_EN		BIT(2)
 #define MII_ECTRL_WAKE_REQUEST		BIT(0)
 
@@ -55,6 +57,11 @@
 #define MII_GENSTAT			24
 #define MII_GENSTAT_PLL_LOCKED		BIT(14)
 
+#define MII_EXTSTAT			25
+#define MII_EXTSTAT_SHORT_DETECT	BIT(8)
+#define MII_EXTSTAT_OPEN_DETECT		BIT(7)
+#define MII_EXTSTAT_POLARITY_DETECT	BIT(6)
+
 #define MII_COMMCFG			27
 #define MII_COMMCFG_AUTO_OP		BIT(15)
 
@@ -111,6 +118,11 @@ static int tja11xx_enable_link_control(struct phy_device *phydev)
 	return phy_set_bits(phydev, MII_ECTRL, MII_ECTRL_LINK_CONTROL);
 }
 
+static int tja11xx_disable_link_control(struct phy_device *phydev)
+{
+	return phy_clear_bits(phydev, MII_ECTRL, MII_ECTRL_LINK_CONTROL);
+}
+
 static int tja11xx_wakeup(struct phy_device *phydev)
 {
 	int ret;
@@ -536,6 +548,93 @@ static int tja11xx_config_intr(struct phy_device *phydev)
 	return phy_write(phydev, MII_INTEN, value);
 }
 
+static int tja11xx_cable_test_start(struct phy_device *phydev)
+{
+	int ret;
+
+	ret = phy_clear_bits(phydev, MII_COMMCFG, MII_COMMCFG_AUTO_OP);
+	if (ret)
+		return ret;
+
+	ret = tja11xx_wakeup(phydev);
+	if (ret < 0)
+		return ret;
+
+	ret = tja11xx_disable_link_control(phydev);
+	if (ret < 0)
+		return ret;
+
+	return phy_set_bits(phydev, MII_ECTRL, MII_ECTRL_CABLE_TEST);
+}
+
+/*
+ * | BI_DA+           | BI_DA-                 | Result
+ * | open             | open                   | open
+ * | + short to -     | - short to +           | short
+ * | short to Vdd     | open                   | open
+ * | open             | shot to Vdd            | open
+ * | short to Vdd     | short to Vdd           | short
+ * | shot to GND      | open                   | open
+ * | open             | shot to GND            | open
+ * | short to GND     | shot to GND            | short
+ * | connected to active link partner (master) | shot and open
+ */
+static int tja11xx_cable_test_report_trans(u32 result)
+{
+	u32 mask = MII_EXTSTAT_SHORT_DETECT | MII_EXTSTAT_OPEN_DETECT;
+
+	if ((result & mask) == mask) {
+		/* connected to active link partner (master) */
+		return ETHTOOL_A_CABLE_RESULT_CODE_UNSPEC;
+	} else if ((result & mask) == 0) {
+		return ETHTOOL_A_CABLE_RESULT_CODE_OK;
+	} else if (result & MII_EXTSTAT_SHORT_DETECT) {
+		return ETHTOOL_A_CABLE_RESULT_CODE_SAME_SHORT;
+	} else if (result & MII_EXTSTAT_OPEN_DETECT) {
+		return ETHTOOL_A_CABLE_RESULT_CODE_OPEN;
+	} else {
+		return ETHTOOL_A_CABLE_RESULT_CODE_UNSPEC;
+	}
+}
+
+static int tja11xx_cable_test_report(struct phy_device *phydev)
+{
+	int ret;
+
+	ret = phy_read(phydev, MII_EXTSTAT);
+	if (ret < 0)
+		return ret;
+
+	ethnl_cable_test_result(phydev, ETHTOOL_A_CABLE_PAIR_A,
+				tja11xx_cable_test_report_trans(ret));
+
+	return 0;
+}
+
+static int tja11xx_cable_test_get_status(struct phy_device *phydev,
+					 bool *finished)
+{
+	int ret;
+
+	*finished = false;
+
+	ret = phy_read(phydev, MII_ECTRL);
+	if (ret < 0)
+		return ret;
+
+	if (!(ret & MII_ECTRL_CABLE_TEST)) {
+		*finished = true;
+
+		ret = phy_set_bits(phydev, MII_COMMCFG, MII_COMMCFG_AUTO_OP);
+		if (ret)
+			return ret;
+
+		return tja11xx_cable_test_report(phydev);
+	}
+
+	return 0;
+}
+
 static struct phy_driver tja11xx_driver[] = {
 	{
 		PHY_ID_MATCH_MODEL(PHY_ID_TJA1100),
@@ -572,6 +671,7 @@ static struct phy_driver tja11xx_driver[] = {
 	}, {
 		.name		= "NXP TJA1102 Port 0",
 		.features       = PHY_BASIC_T1_FEATURES,
+		.flags          = PHY_POLL_CABLE_TEST,
 		.probe		= tja1102_p0_probe,
 		.soft_reset	= tja11xx_soft_reset,
 		.config_aneg	= tja11xx_config_aneg,
@@ -587,10 +687,12 @@ static struct phy_driver tja11xx_driver[] = {
 		.get_stats	= tja11xx_get_stats,
 		.ack_interrupt	= tja11xx_ack_interrupt,
 		.config_intr	= tja11xx_config_intr,
-
+		.cable_test_start = tja11xx_cable_test_start,
+		.cable_test_get_status = tja11xx_cable_test_get_status,
 	}, {
 		.name		= "NXP TJA1102 Port 1",
 		.features       = PHY_BASIC_T1_FEATURES,
+		.flags          = PHY_POLL_CABLE_TEST,
 		/* currently no probe for Port 1 is need */
 		.soft_reset	= tja11xx_soft_reset,
 		.config_aneg	= tja11xx_config_aneg,
@@ -606,6 +708,8 @@ static struct phy_driver tja11xx_driver[] = {
 		.get_stats	= tja11xx_get_stats,
 		.ack_interrupt	= tja11xx_ack_interrupt,
 		.config_intr	= tja11xx_config_intr,
+		.cable_test_start = tja11xx_cable_test_start,
+		.cable_test_get_status = tja11xx_cable_test_get_status,
 	}
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From 7d7e7bce768b9e21eb1b741a901c19c2bef1cccc Mon Sep 17 00:00:00 2001
From: Michael Walle <michael@walle.cc>
Date: Wed, 13 May 2020 18:35:21 +0200
Subject: net: phy: broadcom: add exp register access methods without buslock

Add helper to read and write expansion registers without taking the mdio
lock.

Please note, that this changes the semantics of the read and write.
Before there was no lock between selecting the expansion register and
the actual read/write. This may lead to access failures if there are
parallel accesses. Instead take the bus lock during the whole access
cycle.

Signed-off-by: Michael Walle <michael@walle.cc>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/bcm-phy-lib.c | 38 +++++++++++++++++++++++++++++++-------
 drivers/net/phy/bcm-phy-lib.h |  2 ++
 2 files changed, 33 insertions(+), 7 deletions(-)

diff --git a/drivers/net/phy/bcm-phy-lib.c b/drivers/net/phy/bcm-phy-lib.c
index d5f9a2701989..a390812714ed 100644
--- a/drivers/net/phy/bcm-phy-lib.c
+++ b/drivers/net/phy/bcm-phy-lib.c
@@ -14,33 +14,57 @@
 #define MII_BCM_CHANNEL_WIDTH     0x2000
 #define BCM_CL45VEN_EEE_ADV       0x3c
 
-int bcm_phy_write_exp(struct phy_device *phydev, u16 reg, u16 val)
+int __bcm_phy_write_exp(struct phy_device *phydev, u16 reg, u16 val)
 {
 	int rc;
 
-	rc = phy_write(phydev, MII_BCM54XX_EXP_SEL, reg);
+	rc = __phy_write(phydev, MII_BCM54XX_EXP_SEL, reg);
 	if (rc < 0)
 		return rc;
 
-	return phy_write(phydev, MII_BCM54XX_EXP_DATA, val);
+	return __phy_write(phydev, MII_BCM54XX_EXP_DATA, val);
+}
+EXPORT_SYMBOL_GPL(__bcm_phy_write_exp);
+
+int bcm_phy_write_exp(struct phy_device *phydev, u16 reg, u16 val)
+{
+	int rc;
+
+	phy_lock_mdio_bus(phydev);
+	rc = __bcm_phy_write_exp(phydev, reg, val);
+	phy_unlock_mdio_bus(phydev);
+
+	return rc;
 }
 EXPORT_SYMBOL_GPL(bcm_phy_write_exp);
 
-int bcm_phy_read_exp(struct phy_device *phydev, u16 reg)
+int __bcm_phy_read_exp(struct phy_device *phydev, u16 reg)
 {
 	int val;
 
-	val = phy_write(phydev, MII_BCM54XX_EXP_SEL, reg);
+	val = __phy_write(phydev, MII_BCM54XX_EXP_SEL, reg);
 	if (val < 0)
 		return val;
 
-	val = phy_read(phydev, MII_BCM54XX_EXP_DATA);
+	val = __phy_read(phydev, MII_BCM54XX_EXP_DATA);
 
 	/* Restore default value.  It's O.K. if this write fails. */
-	phy_write(phydev, MII_BCM54XX_EXP_SEL, 0);
+	__phy_write(phydev, MII_BCM54XX_EXP_SEL, 0);
 
 	return val;
 }
+EXPORT_SYMBOL_GPL(__bcm_phy_read_exp);
+
+int bcm_phy_read_exp(struct phy_device *phydev, u16 reg)
+{
+	int rc;
+
+	phy_lock_mdio_bus(phydev);
+	rc = __bcm_phy_read_exp(phydev, reg);
+	phy_unlock_mdio_bus(phydev);
+
+	return rc;
+}
 EXPORT_SYMBOL_GPL(bcm_phy_read_exp);
 
 int bcm54xx_auxctl_read(struct phy_device *phydev, u16 regnum)
diff --git a/drivers/net/phy/bcm-phy-lib.h b/drivers/net/phy/bcm-phy-lib.h
index 4d3de91cda6c..0eb5333cda39 100644
--- a/drivers/net/phy/bcm-phy-lib.h
+++ b/drivers/net/phy/bcm-phy-lib.h
@@ -27,6 +27,8 @@
 #define AFE_HPF_TRIM_OTHERS		MISC_ADDR(0x3a, 0)
 
 
+int __bcm_phy_write_exp(struct phy_device *phydev, u16 reg, u16 val);
+int __bcm_phy_read_exp(struct phy_device *phydev, u16 reg);
 int bcm_phy_write_exp(struct phy_device *phydev, u16 reg, u16 val);
 int bcm_phy_read_exp(struct phy_device *phydev, u16 reg);
 
-- 
cgit v1.2.3-59-g8ed1b


From e184a9072f8995e78ad6cef48bfeab2b987945ec Mon Sep 17 00:00:00 2001
From: Michael Walle <michael@walle.cc>
Date: Wed, 13 May 2020 18:35:22 +0200
Subject: net: phy: broadcom: add bcm_phy_modify_exp()

Add the convenience function to do a read-modify-write. This has the
additional benefit of saving one write to the selection register.

Signed-off-by: Michael Walle <michael@walle.cc>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/bcm-phy-lib.c | 32 ++++++++++++++++++++++++++++++++
 drivers/net/phy/bcm-phy-lib.h |  2 ++
 2 files changed, 34 insertions(+)

diff --git a/drivers/net/phy/bcm-phy-lib.c b/drivers/net/phy/bcm-phy-lib.c
index a390812714ed..41c728fbcfb2 100644
--- a/drivers/net/phy/bcm-phy-lib.c
+++ b/drivers/net/phy/bcm-phy-lib.c
@@ -67,6 +67,38 @@ int bcm_phy_read_exp(struct phy_device *phydev, u16 reg)
 }
 EXPORT_SYMBOL_GPL(bcm_phy_read_exp);
 
+int __bcm_phy_modify_exp(struct phy_device *phydev, u16 reg, u16 mask, u16 set)
+{
+	int new, ret;
+
+	ret = __phy_write(phydev, MII_BCM54XX_EXP_SEL, reg);
+	if (ret < 0)
+		return ret;
+
+	ret = __phy_read(phydev, MII_BCM54XX_EXP_DATA);
+	if (ret < 0)
+		return ret;
+
+	new = (ret & ~mask) | set;
+	if (new == ret)
+		return 0;
+
+	return __phy_write(phydev, MII_BCM54XX_EXP_DATA, new);
+}
+EXPORT_SYMBOL_GPL(__bcm_phy_modify_exp);
+
+int bcm_phy_modify_exp(struct phy_device *phydev, u16 reg, u16 mask, u16 set)
+{
+	int ret;
+
+	phy_lock_mdio_bus(phydev);
+	ret = __bcm_phy_modify_exp(phydev, reg, mask, set);
+	phy_unlock_mdio_bus(phydev);
+
+	return ret;
+}
+EXPORT_SYMBOL_GPL(bcm_phy_modify_exp);
+
 int bcm54xx_auxctl_read(struct phy_device *phydev, u16 regnum)
 {
 	/* The register must be written to both the Shadow Register Select and
diff --git a/drivers/net/phy/bcm-phy-lib.h b/drivers/net/phy/bcm-phy-lib.h
index 0eb5333cda39..b35d880220b9 100644
--- a/drivers/net/phy/bcm-phy-lib.h
+++ b/drivers/net/phy/bcm-phy-lib.h
@@ -29,8 +29,10 @@
 
 int __bcm_phy_write_exp(struct phy_device *phydev, u16 reg, u16 val);
 int __bcm_phy_read_exp(struct phy_device *phydev, u16 reg);
+int __bcm_phy_modify_exp(struct phy_device *phydev, u16 reg, u16 mask, u16 set);
 int bcm_phy_write_exp(struct phy_device *phydev, u16 reg, u16 val);
 int bcm_phy_read_exp(struct phy_device *phydev, u16 reg);
+int bcm_phy_modify_exp(struct phy_device *phydev, u16 reg, u16 mask, u16 set);
 
 static inline int bcm_phy_write_exp_sel(struct phy_device *phydev,
 					u16 reg, u16 val)
-- 
cgit v1.2.3-59-g8ed1b


From 11ecf8c55b91806e4dc6a1b9fe7cbf68cdc9b006 Mon Sep 17 00:00:00 2001
From: Michael Walle <michael@walle.cc>
Date: Wed, 13 May 2020 18:35:23 +0200
Subject: net: phy: broadcom: add cable test support

Most modern broadcom PHYs support ECD (enhanced cable diagnostics). Add
support for it in the bcm-phy-lib so they can easily be used in the PHY
driver.

There are two access methods for ECD: legacy by expansion registers and
via the new RDB registers which are exclusive. Provide functions in two
variants where the PHY driver can choose from. To keep things simple for
now, we just switch the register access to expansion registers in the
RDB variant for now. On the flipside, we have to keep a bus lock to
prevent any other non-legacy access on the PHY.

The results of the intra-pair tests are inconclusive (at least for the
BCM54140). Most of the times half the length is reported but sometimes
the length is correct.

Signed-off-by: Michael Walle <michael@walle.cc>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/bcm-phy-lib.c | 189 ++++++++++++++++++++++++++++++++++++++++++
 drivers/net/phy/bcm-phy-lib.h |   6 ++
 include/linux/brcmphy.h       |  52 ++++++++++++
 3 files changed, 247 insertions(+)

diff --git a/drivers/net/phy/bcm-phy-lib.c b/drivers/net/phy/bcm-phy-lib.c
index 41c728fbcfb2..cb92786e3ded 100644
--- a/drivers/net/phy/bcm-phy-lib.c
+++ b/drivers/net/phy/bcm-phy-lib.c
@@ -4,12 +4,14 @@
  */
 
 #include "bcm-phy-lib.h"
+#include <linux/bitfield.h>
 #include <linux/brcmphy.h>
 #include <linux/export.h>
 #include <linux/mdio.h>
 #include <linux/module.h>
 #include <linux/phy.h>
 #include <linux/ethtool.h>
+#include <linux/ethtool_netlink.h>
 
 #define MII_BCM_CHANNEL_WIDTH     0x2000
 #define BCM_CL45VEN_EEE_ADV       0x3c
@@ -581,6 +583,193 @@ int bcm_phy_enable_jumbo(struct phy_device *phydev)
 }
 EXPORT_SYMBOL_GPL(bcm_phy_enable_jumbo);
 
+int __bcm_phy_enable_rdb_access(struct phy_device *phydev)
+{
+	return __bcm_phy_write_exp(phydev, BCM54XX_EXP_REG7E, 0);
+}
+EXPORT_SYMBOL_GPL(__bcm_phy_enable_rdb_access);
+
+int __bcm_phy_enable_legacy_access(struct phy_device *phydev)
+{
+	return __bcm_phy_write_rdb(phydev, BCM54XX_RDB_REG0087,
+				   BCM54XX_ACCESS_MODE_LEGACY_EN);
+}
+EXPORT_SYMBOL_GPL(__bcm_phy_enable_legacy_access);
+
+static int _bcm_phy_cable_test_start(struct phy_device *phydev, bool is_rdb)
+{
+	u16 mask, set;
+	int ret;
+
+	/* Auto-negotiation must be enabled for cable diagnostics to work, but
+	 * don't advertise any capabilities.
+	 */
+	phy_write(phydev, MII_BMCR, BMCR_ANENABLE);
+	phy_write(phydev, MII_ADVERTISE, ADVERTISE_CSMA);
+	phy_write(phydev, MII_CTRL1000, 0);
+
+	phy_lock_mdio_bus(phydev);
+	if (is_rdb) {
+		ret = __bcm_phy_enable_legacy_access(phydev);
+		if (ret)
+			goto out;
+	}
+
+	mask = BCM54XX_ECD_CTRL_CROSS_SHORT_DIS | BCM54XX_ECD_CTRL_UNIT_MASK;
+	set = BCM54XX_ECD_CTRL_RUN | BCM54XX_ECD_CTRL_BREAK_LINK |
+	      FIELD_PREP(BCM54XX_ECD_CTRL_UNIT_MASK,
+			 BCM54XX_ECD_CTRL_UNIT_CM);
+
+	ret = __bcm_phy_modify_exp(phydev, BCM54XX_EXP_ECD_CTRL, mask, set);
+
+out:
+	/* re-enable the RDB access even if there was an error */
+	if (is_rdb)
+		ret = __bcm_phy_enable_rdb_access(phydev) ? : ret;
+
+	phy_unlock_mdio_bus(phydev);
+
+	return ret;
+}
+
+static int bcm_phy_cable_test_report_trans(int result)
+{
+	switch (result) {
+	case BCM54XX_ECD_FAULT_TYPE_OK:
+		return ETHTOOL_A_CABLE_RESULT_CODE_OK;
+	case BCM54XX_ECD_FAULT_TYPE_OPEN:
+		return ETHTOOL_A_CABLE_RESULT_CODE_OPEN;
+	case BCM54XX_ECD_FAULT_TYPE_SAME_SHORT:
+		return ETHTOOL_A_CABLE_RESULT_CODE_SAME_SHORT;
+	case BCM54XX_ECD_FAULT_TYPE_CROSS_SHORT:
+		return ETHTOOL_A_CABLE_RESULT_CODE_CROSS_SHORT;
+	case BCM54XX_ECD_FAULT_TYPE_INVALID:
+	case BCM54XX_ECD_FAULT_TYPE_BUSY:
+	default:
+		return ETHTOOL_A_CABLE_RESULT_CODE_UNSPEC;
+	}
+}
+
+static bool bcm_phy_distance_valid(int result)
+{
+	switch (result) {
+	case BCM54XX_ECD_FAULT_TYPE_OPEN:
+	case BCM54XX_ECD_FAULT_TYPE_SAME_SHORT:
+	case BCM54XX_ECD_FAULT_TYPE_CROSS_SHORT:
+		return true;
+	}
+	return false;
+}
+
+static int bcm_phy_report_length(struct phy_device *phydev, int pair)
+{
+	int val;
+
+	val = __bcm_phy_read_exp(phydev,
+				 BCM54XX_EXP_ECD_PAIR_A_LENGTH_RESULTS + pair);
+	if (val < 0)
+		return val;
+
+	if (val == BCM54XX_ECD_LENGTH_RESULTS_INVALID)
+		return 0;
+
+	ethnl_cable_test_fault_length(phydev, pair, val);
+
+	return 0;
+}
+
+static int _bcm_phy_cable_test_get_status(struct phy_device *phydev,
+					  bool *finished, bool is_rdb)
+{
+	int pair_a, pair_b, pair_c, pair_d, ret;
+
+	*finished = false;
+
+	phy_lock_mdio_bus(phydev);
+
+	if (is_rdb) {
+		ret = __bcm_phy_enable_legacy_access(phydev);
+		if (ret)
+			goto out;
+	}
+
+	ret = __bcm_phy_read_exp(phydev, BCM54XX_EXP_ECD_CTRL);
+	if (ret < 0)
+		goto out;
+
+	if (ret & BCM54XX_ECD_CTRL_IN_PROGRESS) {
+		ret = 0;
+		goto out;
+	}
+
+	ret = __bcm_phy_read_exp(phydev, BCM54XX_EXP_ECD_FAULT_TYPE);
+	if (ret < 0)
+		goto out;
+
+	pair_a = FIELD_GET(BCM54XX_ECD_FAULT_TYPE_PAIR_A_MASK, ret);
+	pair_b = FIELD_GET(BCM54XX_ECD_FAULT_TYPE_PAIR_B_MASK, ret);
+	pair_c = FIELD_GET(BCM54XX_ECD_FAULT_TYPE_PAIR_C_MASK, ret);
+	pair_d = FIELD_GET(BCM54XX_ECD_FAULT_TYPE_PAIR_D_MASK, ret);
+
+	ethnl_cable_test_result(phydev, ETHTOOL_A_CABLE_PAIR_A,
+				bcm_phy_cable_test_report_trans(pair_a));
+	ethnl_cable_test_result(phydev, ETHTOOL_A_CABLE_PAIR_B,
+				bcm_phy_cable_test_report_trans(pair_b));
+	ethnl_cable_test_result(phydev, ETHTOOL_A_CABLE_PAIR_C,
+				bcm_phy_cable_test_report_trans(pair_c));
+	ethnl_cable_test_result(phydev, ETHTOOL_A_CABLE_PAIR_D,
+				bcm_phy_cable_test_report_trans(pair_d));
+
+	if (bcm_phy_distance_valid(pair_a))
+		bcm_phy_report_length(phydev, 0);
+	if (bcm_phy_distance_valid(pair_b))
+		bcm_phy_report_length(phydev, 1);
+	if (bcm_phy_distance_valid(pair_c))
+		bcm_phy_report_length(phydev, 2);
+	if (bcm_phy_distance_valid(pair_d))
+		bcm_phy_report_length(phydev, 3);
+
+	ret = 0;
+	*finished = true;
+out:
+	/* re-enable the RDB access even if there was an error */
+	if (is_rdb)
+		ret = __bcm_phy_enable_rdb_access(phydev) ? : ret;
+
+	phy_unlock_mdio_bus(phydev);
+
+	return ret;
+}
+
+int bcm_phy_cable_test_start(struct phy_device *phydev)
+{
+	return _bcm_phy_cable_test_start(phydev, false);
+}
+EXPORT_SYMBOL_GPL(bcm_phy_cable_test_start);
+
+int bcm_phy_cable_test_get_status(struct phy_device *phydev, bool *finished)
+{
+	return _bcm_phy_cable_test_get_status(phydev, finished, false);
+}
+EXPORT_SYMBOL_GPL(bcm_phy_cable_test_get_status);
+
+/* We assume that all PHYs which support RDB access can be switched to legacy
+ * mode. If, in the future, this is not true anymore, we have to re-implement
+ * this with RDB access.
+ */
+int bcm_phy_cable_test_start_rdb(struct phy_device *phydev)
+{
+	return _bcm_phy_cable_test_start(phydev, true);
+}
+EXPORT_SYMBOL_GPL(bcm_phy_cable_test_start_rdb);
+
+int bcm_phy_cable_test_get_status_rdb(struct phy_device *phydev,
+				      bool *finished)
+{
+	return _bcm_phy_cable_test_get_status(phydev, finished, true);
+}
+EXPORT_SYMBOL_GPL(bcm_phy_cable_test_get_status_rdb);
+
 MODULE_DESCRIPTION("Broadcom PHY Library");
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR("Broadcom Corporation");
diff --git a/drivers/net/phy/bcm-phy-lib.h b/drivers/net/phy/bcm-phy-lib.h
index b35d880220b9..237a8503c9b4 100644
--- a/drivers/net/phy/bcm-phy-lib.h
+++ b/drivers/net/phy/bcm-phy-lib.h
@@ -80,4 +80,10 @@ void bcm_phy_r_rc_cal_reset(struct phy_device *phydev);
 int bcm_phy_28nm_a0b0_afe_config_init(struct phy_device *phydev);
 int bcm_phy_enable_jumbo(struct phy_device *phydev);
 
+int bcm_phy_cable_test_get_status_rdb(struct phy_device *phydev,
+				      bool *finished);
+int bcm_phy_cable_test_start_rdb(struct phy_device *phydev);
+int bcm_phy_cable_test_start(struct phy_device *phydev);
+int bcm_phy_cable_test_get_status(struct phy_device *phydev, bool *finished);
+
 #endif /* _LINUX_BCM_PHY_LIB_H */
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index 58d0150acc3e..d41624db6de2 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -119,6 +119,11 @@
 #define MII_BCM54XX_RDB_ADDR	0x1e
 #define MII_BCM54XX_RDB_DATA	0x1f
 
+/* legacy access control via rdb/expansion register */
+#define BCM54XX_RDB_REG0087		0x0087
+#define BCM54XX_EXP_REG7E		(MII_BCM54XX_EXP_SEL_ER + 0x7E)
+#define BCM54XX_ACCESS_MODE_LEGACY_EN	BIT(15)
+
 /*
  * AUXILIARY CONTROL SHADOW ACCESS REGISTERS.  (PHY REG 0x18)
  */
@@ -294,4 +299,51 @@
 #define MII_BRCM_CORE_EXPB0	0xB0
 #define MII_BRCM_CORE_EXPB1	0xB1
 
+/* Enhanced Cable Diagnostics */
+#define BCM54XX_RDB_ECD_CTRL			0x2a0
+#define BCM54XX_EXP_ECD_CTRL			(MII_BCM54XX_EXP_SEL_ER + 0xc0)
+
+#define BCM54XX_ECD_CTRL_CABLE_TYPE_CAT3	1	/* CAT3 or worse */
+#define BCM54XX_ECD_CTRL_CABLE_TYPE_CAT5	0	/* CAT5 or better */
+#define BCM54XX_ECD_CTRL_CABLE_TYPE_MASK	BIT(0)	/* cable type */
+#define BCM54XX_ECD_CTRL_INVALID		BIT(3)	/* invalid result */
+#define BCM54XX_ECD_CTRL_UNIT_CM		0	/* centimeters */
+#define BCM54XX_ECD_CTRL_UNIT_M			1	/* meters */
+#define BCM54XX_ECD_CTRL_UNIT_MASK		BIT(10)	/* cable length unit */
+#define BCM54XX_ECD_CTRL_IN_PROGRESS		BIT(11)	/* test in progress */
+#define BCM54XX_ECD_CTRL_BREAK_LINK		BIT(12)	/* unconnect link
+							 * during test
+							 */
+#define BCM54XX_ECD_CTRL_CROSS_SHORT_DIS	BIT(13)	/* disable inter-pair
+							 * short check
+							 */
+#define BCM54XX_ECD_CTRL_RUN			BIT(15)	/* run immediate */
+
+#define BCM54XX_RDB_ECD_FAULT_TYPE		0x2a1
+#define BCM54XX_EXP_ECD_FAULT_TYPE		(MII_BCM54XX_EXP_SEL_ER + 0xc1)
+#define BCM54XX_ECD_FAULT_TYPE_INVALID		0x0
+#define BCM54XX_ECD_FAULT_TYPE_OK		0x1
+#define BCM54XX_ECD_FAULT_TYPE_OPEN		0x2
+#define BCM54XX_ECD_FAULT_TYPE_SAME_SHORT	0x3 /* short same pair */
+#define BCM54XX_ECD_FAULT_TYPE_CROSS_SHORT	0x4 /* short different pairs */
+#define BCM54XX_ECD_FAULT_TYPE_BUSY		0x9
+#define BCM54XX_ECD_FAULT_TYPE_PAIR_D_MASK	GENMASK(3, 0)
+#define BCM54XX_ECD_FAULT_TYPE_PAIR_C_MASK	GENMASK(7, 4)
+#define BCM54XX_ECD_FAULT_TYPE_PAIR_B_MASK	GENMASK(11, 8)
+#define BCM54XX_ECD_FAULT_TYPE_PAIR_A_MASK	GENMASK(15, 12)
+#define BCM54XX_ECD_PAIR_A_LENGTH_RESULTS	0x2a2
+#define BCM54XX_ECD_PAIR_B_LENGTH_RESULTS	0x2a3
+#define BCM54XX_ECD_PAIR_C_LENGTH_RESULTS	0x2a4
+#define BCM54XX_ECD_PAIR_D_LENGTH_RESULTS	0x2a5
+
+#define BCM54XX_RDB_ECD_PAIR_A_LENGTH_RESULTS	0x2a2
+#define BCM54XX_EXP_ECD_PAIR_A_LENGTH_RESULTS	(MII_BCM54XX_EXP_SEL_ER + 0xc2)
+#define BCM54XX_RDB_ECD_PAIR_B_LENGTH_RESULTS	0x2a3
+#define BCM54XX_EXP_ECD_PAIR_B_LENGTH_RESULTS	(MII_BCM54XX_EXP_SEL_ER + 0xc3)
+#define BCM54XX_RDB_ECD_PAIR_C_LENGTH_RESULTS	0x2a4
+#define BCM54XX_EXP_ECD_PAIR_C_LENGTH_RESULTS	(MII_BCM54XX_EXP_SEL_ER + 0xc4)
+#define BCM54XX_RDB_ECD_PAIR_D_LENGTH_RESULTS	0x2a5
+#define BCM54XX_EXP_ECD_PAIR_D_LENGTH_RESULTS	(MII_BCM54XX_EXP_SEL_ER + 0xc5)
+#define BCM54XX_ECD_LENGTH_RESULTS_INVALID	0xffff
+
 #endif /* _LINUX_BRCMPHY_H */
-- 
cgit v1.2.3-59-g8ed1b


From f956af3fd474c1f47332920abd656cd713febe3f Mon Sep 17 00:00:00 2001
From: Michael Walle <michael@walle.cc>
Date: Wed, 13 May 2020 18:35:24 +0200
Subject: net: phy: bcm54140: add cable diagnostics support

Use the generic cable tester functions from bcm-phy-lib to add cable
tester support.

100m cable, A/B/C/D open:
  Cable test started for device eth0.
  Cable test completed for device eth0.
  Pair: Pair A, result: Open Circuit
  Pair: Pair B, result: Open Circuit
  Pair: Pair C, result: Open Circuit
  Pair: Pair D, result: Open Circuit
  Pair: Pair A, fault length: 106.60m
  Pair: Pair B, fault length: 103.32m
  Pair: Pair C, fault length: 104.96m
  Pair: Pair D, fault length: 106.60m

1m cable, A/B connected, pair C shorted, D open:
  Cable test started for device eth0.
  Cable test completed for device eth0.
  Pair: Pair A, result: OK
  Pair: Pair B, result: OK
  Pair: Pair C, result: Short within Pair
  Pair: Pair D, result: Open Circuit
  Pair: Pair C, fault length: 0.82m
  Pair: Pair D, fault length: 1.64m

1m cable, A/B connected, pair C shorted with D:
  Cable test started for device eth0.
  Cable test completed for device eth0.
  Pair: Pair A, result: OK
  Pair: Pair B, result: OK
  Pair: Pair C, result: Short to another pair
  Pair: Pair D, result: Short to another pair
  Pair: Pair C, fault length: 1.64m
  Pair: Pair D, fault length: 1.64m

The granularity of the length measurement seems to be 82cm.

Signed-off-by: Michael Walle <michael@walle.cc>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/bcm54140.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/phy/bcm54140.c b/drivers/net/phy/bcm54140.c
index 9ef37a3bc2bb..8998e68bb26b 100644
--- a/drivers/net/phy/bcm54140.c
+++ b/drivers/net/phy/bcm54140.c
@@ -831,6 +831,7 @@ static struct phy_driver bcm54140_drivers[] = {
 		.phy_id         = PHY_ID_BCM54140,
 		.phy_id_mask    = BCM54140_PHY_ID_MASK,
 		.name           = "Broadcom BCM54140",
+		.flags		= PHY_POLL_CABLE_TEST,
 		.features       = PHY_GBIT_FEATURES,
 		.config_init    = bcm54140_config_init,
 		.did_interrupt	= bcm54140_did_interrupt,
@@ -842,6 +843,8 @@ static struct phy_driver bcm54140_drivers[] = {
 		.soft_reset	= genphy_soft_reset,
 		.get_tunable	= bcm54140_get_tunable,
 		.set_tunable	= bcm54140_set_tunable,
+		.cable_test_start = bcm_phy_cable_test_start_rdb,
+		.cable_test_get_status = bcm_phy_cable_test_get_status_rdb,
 	},
 };
 module_phy_driver(bcm54140_drivers);
-- 
cgit v1.2.3-59-g8ed1b


From 1b2f08df0a886e0565c71821d5230cba395f5c18 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 13 May 2020 21:36:41 +0200
Subject: ipv6: set msg_control_is_user in do_ipv6_getsockopt

While do_ipv6_getsockopt does not call the high-level recvmsg helper,
the msghdr eventually ends up being passed to put_cmsg anyway, and thus
needs msg_control_is_user set to the proper value.

Fixes: 1f466e1f15cf ("net: cleanly handle kernel vs user buffers for ->msg_control")
Reported-by: Eric Dumazet <eric.dumazet@gmail.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ipv6_sockglue.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 18d05403d3b5..a0e50cc57e54 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -1075,6 +1075,7 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 		msg.msg_control = optval;
 		msg.msg_controllen = len;
 		msg.msg_flags = flags;
+		msg.msg_control_is_user = true;
 
 		lock_sock(sk);
 		skb = np->pktoptions;
-- 
cgit v1.2.3-59-g8ed1b


From 6cb7576710aea4dfd11449b4f514a03cd9f03505 Mon Sep 17 00:00:00 2001
From: Michael Walle <michael@walle.cc>
Date: Wed, 13 May 2020 22:38:07 +0200
Subject: net: phy: at803x: add cable diagnostics support

The AR8031/AR8033 and the AR8035 support cable diagnostics. Adding
driver support is straightforward, so lets add it.

The PHY just do one pair at a time, so we have to start the test four
times. The cable_test_get_status() can block and therefore we can just
busy poll the test completion and continue with the next pair until we
are done.
The time delta counter seems to run at 125MHz which just gives us a
resolution of about 82.4cm per tick.

100m cable, A/B/C/D open:
  Cable test started for device eth0.
  Cable test completed for device eth0.
  Pair: Pair A, result: Open Circuit
  Pair: Pair A, fault length: 107.94m
  Pair: Pair B, result: Open Circuit
  Pair: Pair B, fault length: 104.64m
  Pair: Pair C, result: Open Circuit
  Pair: Pair C, fault length: 105.47m
  Pair: Pair D, result: Open Circuit
  Pair: Pair D, fault length: 107.94m

1m cable, A/B connected, C shorted, D open:
  Cable test started for device eth0.
  Cable test completed for device eth0.
  Pair: Pair A, result: OK
  Pair: Pair B, result: OK
  Pair: Pair C, result: Short within Pair
  Pair: Pair C, fault length: 0.82m
  Pair: Pair D, result: Open Circuit
  Pair: Pair D, fault length: 0.82m

Signed-off-by: Michael Walle <michael@walle.cc>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/at803x.c | 176 +++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 176 insertions(+)

diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c
index f4fec5f644e9..acd51b29a476 100644
--- a/drivers/net/phy/at803x.c
+++ b/drivers/net/phy/at803x.c
@@ -12,6 +12,7 @@
 #include <linux/string.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
+#include <linux/ethtool_netlink.h>
 #include <linux/of_gpio.h>
 #include <linux/bitfield.h>
 #include <linux/gpio/consumer.h>
@@ -46,6 +47,16 @@
 #define AT803X_SMART_SPEED_ENABLE		BIT(5)
 #define AT803X_SMART_SPEED_RETRY_LIMIT_MASK	GENMASK(4, 2)
 #define AT803X_SMART_SPEED_BYPASS_TIMER		BIT(1)
+#define AT803X_CDT				0x16
+#define AT803X_CDT_MDI_PAIR_MASK		GENMASK(9, 8)
+#define AT803X_CDT_ENABLE_TEST			BIT(0)
+#define AT803X_CDT_STATUS			0x1c
+#define AT803X_CDT_STATUS_STAT_NORMAL		0
+#define AT803X_CDT_STATUS_STAT_SHORT		1
+#define AT803X_CDT_STATUS_STAT_OPEN		2
+#define AT803X_CDT_STATUS_STAT_FAIL		3
+#define AT803X_CDT_STATUS_STAT_MASK		GENMASK(9, 8)
+#define AT803X_CDT_STATUS_DELTA_TIME_MASK	GENMASK(7, 0)
 #define AT803X_LED_CONTROL			0x18
 
 #define AT803X_DEVICE_ADDR			0x03
@@ -794,12 +805,172 @@ static int at803x_set_tunable(struct phy_device *phydev,
 	}
 }
 
+static int at803x_cable_test_result_trans(u16 status)
+{
+	switch (FIELD_GET(AT803X_CDT_STATUS_STAT_MASK, status)) {
+	case AT803X_CDT_STATUS_STAT_NORMAL:
+		return ETHTOOL_A_CABLE_RESULT_CODE_OK;
+	case AT803X_CDT_STATUS_STAT_SHORT:
+		return ETHTOOL_A_CABLE_RESULT_CODE_SAME_SHORT;
+	case AT803X_CDT_STATUS_STAT_OPEN:
+		return ETHTOOL_A_CABLE_RESULT_CODE_OPEN;
+	case AT803X_CDT_STATUS_STAT_FAIL:
+	default:
+		return ETHTOOL_A_CABLE_RESULT_CODE_UNSPEC;
+	}
+}
+
+static bool at803x_cdt_test_failed(u16 status)
+{
+	return FIELD_GET(AT803X_CDT_STATUS_STAT_MASK, status) ==
+		AT803X_CDT_STATUS_STAT_FAIL;
+}
+
+static bool at803x_cdt_fault_length_valid(u16 status)
+{
+	switch (FIELD_GET(AT803X_CDT_STATUS_STAT_MASK, status)) {
+	case AT803X_CDT_STATUS_STAT_OPEN:
+	case AT803X_CDT_STATUS_STAT_SHORT:
+		return true;
+	}
+	return false;
+}
+
+static int at803x_cdt_fault_length(u16 status)
+{
+	int dt;
+
+	/* According to the datasheet the distance to the fault is
+	 * DELTA_TIME * 0.824 meters.
+	 *
+	 * The author suspect the correct formula is:
+	 *
+	 *   fault_distance = DELTA_TIME * (c * VF) / 125MHz / 2
+	 *
+	 * where c is the speed of light, VF is the velocity factor of
+	 * the twisted pair cable, 125MHz the counter frequency and
+	 * we need to divide by 2 because the hardware will measure the
+	 * round trip time to the fault and back to the PHY.
+	 *
+	 * With a VF of 0.69 we get the factor 0.824 mentioned in the
+	 * datasheet.
+	 */
+	dt = FIELD_GET(AT803X_CDT_STATUS_DELTA_TIME_MASK, status);
+
+	return (dt * 824) / 10;
+}
+
+static int at803x_cdt_start(struct phy_device *phydev, int pair)
+{
+	u16 cdt;
+
+	cdt = FIELD_PREP(AT803X_CDT_MDI_PAIR_MASK, pair) |
+	      AT803X_CDT_ENABLE_TEST;
+
+	return phy_write(phydev, AT803X_CDT, cdt);
+}
+
+static int at803x_cdt_wait_for_completion(struct phy_device *phydev)
+{
+	int val, ret;
+
+	/* One test run takes about 25ms */
+	ret = phy_read_poll_timeout(phydev, AT803X_CDT, val,
+				    !(val & AT803X_CDT_ENABLE_TEST),
+				    30000, 100000, true);
+
+	return ret < 0 ? ret : 0;
+}
+
+static int at803x_cable_test_one_pair(struct phy_device *phydev, int pair)
+{
+	static const int ethtool_pair[] = {
+		ETHTOOL_A_CABLE_PAIR_A,
+		ETHTOOL_A_CABLE_PAIR_B,
+		ETHTOOL_A_CABLE_PAIR_C,
+		ETHTOOL_A_CABLE_PAIR_D,
+	};
+	int ret, val;
+
+	ret = at803x_cdt_start(phydev, pair);
+	if (ret)
+		return ret;
+
+	ret = at803x_cdt_wait_for_completion(phydev);
+	if (ret)
+		return ret;
+
+	val = phy_read(phydev, AT803X_CDT_STATUS);
+	if (val < 0)
+		return val;
+
+	if (at803x_cdt_test_failed(val))
+		return 0;
+
+	ethnl_cable_test_result(phydev, ethtool_pair[pair],
+				at803x_cable_test_result_trans(val));
+
+	if (at803x_cdt_fault_length_valid(val))
+		ethnl_cable_test_fault_length(phydev, ethtool_pair[pair],
+					      at803x_cdt_fault_length(val));
+
+	return 1;
+}
+
+static int at803x_cable_test_get_status(struct phy_device *phydev,
+					bool *finished)
+{
+	unsigned long pair_mask = 0xf;
+	int retries = 20;
+	int pair, ret;
+
+	*finished = false;
+
+	/* According to the datasheet the CDT can be performed when
+	 * there is no link partner or when the link partner is
+	 * auto-negotiating. Starting the test will restart the AN
+	 * automatically. It seems that doing this repeatedly we will
+	 * get a slot where our link partner won't disturb our
+	 * measurement.
+	 */
+	while (pair_mask && retries--) {
+		for_each_set_bit(pair, &pair_mask, 4) {
+			ret = at803x_cable_test_one_pair(phydev, pair);
+			if (ret < 0)
+				return ret;
+			if (ret)
+				clear_bit(pair, &pair_mask);
+		}
+		if (pair_mask)
+			msleep(250);
+	}
+
+	*finished = true;
+
+	return 0;
+}
+
+static int at803x_cable_test_start(struct phy_device *phydev)
+{
+	/* Enable auto-negotiation, but advertise no capabilities, no link
+	 * will be established. A restart of the auto-negotiation is not
+	 * required, because the cable test will automatically break the link.
+	 */
+	phy_write(phydev, MII_BMCR, BMCR_ANENABLE);
+	phy_write(phydev, MII_ADVERTISE, ADVERTISE_CSMA);
+	phy_write(phydev, MII_CTRL1000, 0);
+
+	/* we do all the (time consuming) work later */
+	return 0;
+}
+
 static struct phy_driver at803x_driver[] = {
 {
 	/* Qualcomm Atheros AR8035 */
 	.phy_id			= ATH8035_PHY_ID,
 	.name			= "Qualcomm Atheros AR8035",
 	.phy_id_mask		= AT803X_PHY_ID_MASK,
+	.flags			= PHY_POLL_CABLE_TEST,
 	.probe			= at803x_probe,
 	.remove			= at803x_remove,
 	.config_init		= at803x_config_init,
@@ -814,6 +985,8 @@ static struct phy_driver at803x_driver[] = {
 	.config_intr		= at803x_config_intr,
 	.get_tunable		= at803x_get_tunable,
 	.set_tunable		= at803x_set_tunable,
+	.cable_test_start	= at803x_cable_test_start,
+	.cable_test_get_status	= at803x_cable_test_get_status,
 }, {
 	/* Qualcomm Atheros AR8030 */
 	.phy_id			= ATH8030_PHY_ID,
@@ -835,6 +1008,7 @@ static struct phy_driver at803x_driver[] = {
 	.phy_id			= ATH8031_PHY_ID,
 	.name			= "Qualcomm Atheros AR8031/AR8033",
 	.phy_id_mask		= AT803X_PHY_ID_MASK,
+	.flags			= PHY_POLL_CABLE_TEST,
 	.probe			= at803x_probe,
 	.remove			= at803x_remove,
 	.config_init		= at803x_config_init,
@@ -850,6 +1024,8 @@ static struct phy_driver at803x_driver[] = {
 	.config_intr		= &at803x_config_intr,
 	.get_tunable		= at803x_get_tunable,
 	.set_tunable		= at803x_set_tunable,
+	.cable_test_start	= at803x_cable_test_start,
+	.cable_test_get_status	= at803x_cable_test_get_status,
 }, {
 	/* Qualcomm Atheros AR8032 */
 	PHY_ID_MATCH_EXACT(ATH8032_PHY_ID),
-- 
cgit v1.2.3-59-g8ed1b


From 6545be82807cc01712411321730656ad8ad30474 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 12 May 2020 18:13:55 +0100
Subject: sfc: fix dereference of table before it is null checked

Currently pointer table is being dereferenced on a null check of
table->must_restore_filters before it is being null checked, leading
to a potential null pointer dereference issue.  Fix this by null
checking table before dereferencing it when checking for a null
table->must_restore_filters.

Addresses-Coverity: ("Dereference before null check")
Fixes: e4fe938cff04 ("sfc: move 'must restore' flags out of ef10-specific nic_data")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/mcdi_filters.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/sfc/mcdi_filters.c b/drivers/net/ethernet/sfc/mcdi_filters.c
index 88de95a8c08c..455a62814fb9 100644
--- a/drivers/net/ethernet/sfc/mcdi_filters.c
+++ b/drivers/net/ethernet/sfc/mcdi_filters.c
@@ -1369,10 +1369,7 @@ void efx_mcdi_filter_table_restore(struct efx_nic *efx)
 
 	WARN_ON(!rwsem_is_locked(&efx->filter_sem));
 
-	if (!table->must_restore_filters)
-		return;
-
-	if (!table)
+	if (!table || !table->must_restore_filters)
 		return;
 
 	down_write(&table->lock);
-- 
cgit v1.2.3-59-g8ed1b


From 5e3768a436bb70c9c3e27aaba6b73f8ef8f5dcf3 Mon Sep 17 00:00:00 2001
From: Daniel González Cabanelas <dgcbueu@gmail.com>
Date: Tue, 12 May 2020 19:59:48 +0200
Subject: net: mvneta: speed down the PHY, if WoL used, to save energy
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Some PHYs connected to this ethernet hardware support the WoL feature.
But when WoL is enabled and the machine is powered off, the PHY remains
waiting for a magic packet at max speed (i.e. 1Gbps), which is a waste of
energy.

Slow down the PHY speed before stopping the ethernet if WoL is enabled,
and save some energy while the machine is powered off or sleeping.

Tested using an Armada 370 based board (LS421DE) equipped with a Marvell
88E1518 PHY.

Signed-off-by: Daniel González Cabanelas <dgcbueu@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvneta.c | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 51889770958d..e0e9e56830c0 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -3561,6 +3561,10 @@ static void mvneta_start_dev(struct mvneta_port *pp)
 		    MVNETA_CAUSE_LINK_CHANGE);
 
 	phylink_start(pp->phylink);
+
+	/* We may have called phy_speed_down before */
+	phy_speed_up(pp->dev->phydev);
+
 	netif_tx_start_all_queues(pp->dev);
 }
 
@@ -3568,6 +3572,9 @@ static void mvneta_stop_dev(struct mvneta_port *pp)
 {
 	unsigned int cpu;
 
+	if (device_may_wakeup(&pp->dev->dev))
+		phy_speed_down(pp->dev->phydev, false);
+
 	phylink_stop(pp->phylink);
 
 	if (!pp->neta_armada3700) {
@@ -4040,6 +4047,10 @@ static int mvneta_mdio_probe(struct mvneta_port *pp)
 	phylink_ethtool_get_wol(pp->phylink, &wol);
 	device_set_wakeup_capable(&pp->dev->dev, !!wol.supported);
 
+	/* PHY WoL may be enabled but device wakeup disabled */
+	if (wol.supported)
+		device_set_wakeup_enable(&pp->dev->dev, !!wol.wolopts);
+
 	return err;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 38152ea37d8bdaffa22603e0a5b5b86cfa8714c9 Mon Sep 17 00:00:00 2001
From: DENG Qingfang <dqfext@gmail.com>
Date: Wed, 13 May 2020 23:37:17 +0800
Subject: net: dsa: mt7530: set CPU port to fallback mode

Currently, setting a bridge's self PVID to other value and deleting
the default VID 1 renders untagged ports of that VLAN unable to talk to
the CPU port:

	bridge vlan add dev br0 vid 2 pvid untagged self
	bridge vlan del dev br0 vid 1 self
	bridge vlan add dev sw0p0 vid 2 pvid untagged
	bridge vlan del dev sw0p0 vid 1
	# br0 cannot send untagged frames out of sw0p0 anymore

That is because the CPU port is set to security mode and its PVID is
still 1, and untagged frames are dropped due to VLAN member violation.

Set the CPU port to fallback mode so untagged frames can pass through.

Fixes: 83163f7dca56 ("net: dsa: mediatek: add VLAN support for MT7530")
Signed-off-by: DENG Qingfang <dqfext@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mt7530.c | 11 ++++++++---
 drivers/net/dsa/mt7530.h |  6 ++++++
 2 files changed, 14 insertions(+), 3 deletions(-)

diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index 5c444cd722bd..a063d914c23f 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -810,10 +810,15 @@ mt7530_port_set_vlan_aware(struct dsa_switch *ds, int port)
 		   PCR_MATRIX_MASK, PCR_MATRIX(MT7530_ALL_MEMBERS));
 
 	/* Trapped into security mode allows packet forwarding through VLAN
-	 * table lookup.
+	 * table lookup. CPU port is set to fallback mode to let untagged
+	 * frames pass through.
 	 */
-	mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK,
-		   MT7530_PORT_SECURITY_MODE);
+	if (dsa_is_cpu_port(ds, port))
+		mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK,
+			   MT7530_PORT_FALLBACK_MODE);
+	else
+		mt7530_rmw(priv, MT7530_PCR_P(port), PCR_PORT_VLAN_MASK,
+			   MT7530_PORT_SECURITY_MODE);
 
 	/* Set the port as a user port which is to be able to recognize VID
 	 * from incoming packets before fetching entry within the VLAN table.
diff --git a/drivers/net/dsa/mt7530.h b/drivers/net/dsa/mt7530.h
index 979bb6374678..d45eb7540703 100644
--- a/drivers/net/dsa/mt7530.h
+++ b/drivers/net/dsa/mt7530.h
@@ -152,6 +152,12 @@ enum mt7530_port_mode {
 	/* Port Matrix Mode: Frames are forwarded by the PCR_MATRIX members. */
 	MT7530_PORT_MATRIX_MODE = PORT_VLAN(0),
 
+	/* Fallback Mode: Forward received frames with ingress ports that do
+	 * not belong to the VLAN member. Frames whose VID is not listed on
+	 * the VLAN table are forwarded by the PCR_MATRIX members.
+	 */
+	MT7530_PORT_FALLBACK_MODE = PORT_VLAN(1),
+
 	/* Security Mode: Discard any frame due to ingress membership
 	 * violation or VID missed on the VLAN table.
 	 */
-- 
cgit v1.2.3-59-g8ed1b


From 12a87174accd29ff943d4c5fb735e1541b92630b Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Wed, 13 May 2020 15:53:04 +0200
Subject: mt76: mt7615: fix typo defining ps work

Fix typo defining ps_work in mt7615_register_ext_phy(). This is not a
real issue since 802.11 power save is not yet support by the external phy

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/init.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
index b5bbe9f5f7dd..1d8fdc7e062b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
@@ -372,7 +372,7 @@ int mt7615_register_ext_phy(struct mt7615_dev *dev)
 	INIT_DELAYED_WORK(&phy->scan_work, mt7615_scan_work);
 	skb_queue_head_init(&phy->scan_event_list);
 
-	INIT_WORK(&dev->phy.ps_work, mt7615_ps_work);
+	INIT_WORK(&phy->ps_work, mt7615_ps_work);
 
 	mt7615_cap_dbdc_enable(dev);
 	mphy = mt76_alloc_phy(&dev->mt76, sizeof(*phy), &mt7615_ops);
-- 
cgit v1.2.3-59-g8ed1b


From 0531b0357ba37464e5c0033e1b7c69bbf5ecd8fb Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Thu, 14 May 2020 09:35:52 +0300
Subject: selftests: fix flower parent qdisc

Flower tests used to create ingress filter with specified parent qdisc
"parent ffff:" but dump them on "ingress". With recent commit that fixed
tcm_parent handling in dump those are not considered same parent anymore,
which causes iproute2 tc to emit additional "parent ffff:" in first line of
filter dump output. The change in output causes filter match in tests to
fail.

Prevent parent qdisc output when dumping filters in flower tests by always
correctly specifying "ingress" parent both when creating and dumping
filters.

Fixes: a7df4870d79b ("net_sched: fix tcm_parent in tc filter dump")
Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/tc-testing/tc-tests/filters/tests.json | 6 +++---
 tools/testing/selftests/tc-testing/tdc_batch.py                | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
index 8877f7b2b809..12aa4bc1f6a0 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
@@ -32,7 +32,7 @@
         "setup": [
             "$TC qdisc add dev $DEV2 ingress"
         ],
-        "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 parent ffff: handle 0xffffffff flower action ok",
+        "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress handle 0xffffffff flower action ok",
         "expExitCode": "0",
         "verifyCmd": "$TC filter show dev $DEV2 ingress",
         "matchPattern": "filter protocol ip pref 1 flower.*handle 0xffffffff",
@@ -77,9 +77,9 @@
         },
         "setup": [
             "$TC qdisc add dev $DEV2 ingress",
-            "$TC filter add dev $DEV2 protocol ip prio 1 parent ffff: flower dst_mac e4:11:22:11:4a:51 src_mac e4:11:22:11:4a:50 ip_proto tcp src_ip 1.1.1.1 dst_ip 2.2.2.2 action drop"
+            "$TC filter add dev $DEV2 protocol ip prio 1 ingress flower dst_mac e4:11:22:11:4a:51 src_mac e4:11:22:11:4a:50 ip_proto tcp src_ip 1.1.1.1 dst_ip 2.2.2.2 action drop"
         ],
-        "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip prio 1 parent ffff: flower dst_mac e4:11:22:11:4a:51 src_mac e4:11:22:11:4a:50 ip_proto tcp src_ip 1.1.1.1 dst_ip 2.2.2.2 action drop",
+        "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip prio 1 ingress flower dst_mac e4:11:22:11:4a:51 src_mac e4:11:22:11:4a:50 ip_proto tcp src_ip 1.1.1.1 dst_ip 2.2.2.2 action drop",
         "expExitCode": "2",
         "verifyCmd": "$TC -s filter show dev $DEV2 ingress",
         "matchPattern": "filter protocol ip pref 1 flower chain 0 handle",
diff --git a/tools/testing/selftests/tc-testing/tdc_batch.py b/tools/testing/selftests/tc-testing/tdc_batch.py
index 6a2bd2cf528e..995f66ce43eb 100755
--- a/tools/testing/selftests/tc-testing/tdc_batch.py
+++ b/tools/testing/selftests/tc-testing/tdc_batch.py
@@ -72,21 +72,21 @@ mac_prefix = args.mac_prefix
 
 def format_add_filter(device, prio, handle, skip, src_mac, dst_mac,
                       share_action):
-    return ("filter add dev {} {} protocol ip parent ffff: handle {} "
+    return ("filter add dev {} {} protocol ip ingress handle {} "
             " flower {} src_mac {} dst_mac {} action drop {}".format(
                 device, prio, handle, skip, src_mac, dst_mac, share_action))
 
 
 def format_rep_filter(device, prio, handle, skip, src_mac, dst_mac,
                       share_action):
-    return ("filter replace dev {} {} protocol ip parent ffff: handle {} "
+    return ("filter replace dev {} {} protocol ip ingress handle {} "
             " flower {} src_mac {} dst_mac {} action drop {}".format(
                 device, prio, handle, skip, src_mac, dst_mac, share_action))
 
 
 def format_del_filter(device, prio, handle, skip, src_mac, dst_mac,
                       share_action):
-    return ("filter del dev {} {} protocol ip parent ffff: handle {} "
+    return ("filter del dev {} {} protocol ip ingress handle {} "
             "flower".format(device, prio, handle))
 
 
-- 
cgit v1.2.3-59-g8ed1b


From acb6d3771a0390fdfae18082f232fda40d5ab514 Mon Sep 17 00:00:00 2001
From: Thierry Reding <treding@nvidia.com>
Date: Thu, 14 May 2020 14:38:48 +0200
Subject: r8152: Use MAC address from device tree if available

If a MAC address was passed via the device tree node for the r8152
device, use it and fall back to reading from EEPROM otherwise. This is
useful for devices where the r8152 EEPROM was not programmed with a
valid MAC address, or if users want to explicitly set a MAC address in
the bootloader and pass that to the kernel.

Signed-off-by: Thierry Reding <treding@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/usb/r8152.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/drivers/net/usb/r8152.c b/drivers/net/usb/r8152.c
index 8f8d9883d363..1af72ec284ca 100644
--- a/drivers/net/usb/r8152.c
+++ b/drivers/net/usb/r8152.c
@@ -1504,15 +1504,19 @@ static int determine_ethernet_addr(struct r8152 *tp, struct sockaddr *sa)
 
 	sa->sa_family = dev->type;
 
-	if (tp->version == RTL_VER_01) {
-		ret = pla_ocp_read(tp, PLA_IDR, 8, sa->sa_data);
-	} else {
-		/* if device doesn't support MAC pass through this will
-		 * be expected to be non-zero
-		 */
-		ret = vendor_mac_passthru_addr_read(tp, sa);
-		if (ret < 0)
-			ret = pla_ocp_read(tp, PLA_BACKUP, 8, sa->sa_data);
+	ret = eth_platform_get_mac_address(&dev->dev, sa->sa_data);
+	if (ret < 0) {
+		if (tp->version == RTL_VER_01) {
+			ret = pla_ocp_read(tp, PLA_IDR, 8, sa->sa_data);
+		} else {
+			/* if device doesn't support MAC pass through this will
+			 * be expected to be non-zero
+			 */
+			ret = vendor_mac_passthru_addr_read(tp, sa);
+			if (ret < 0)
+				ret = pla_ocp_read(tp, PLA_BACKUP, 8,
+						   sa->sa_data);
+		}
 	}
 
 	if (ret < 0) {
-- 
cgit v1.2.3-59-g8ed1b


From 96b8e87838a1acfcb1a168537a44f727dda7f53f Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Thu, 14 May 2020 20:41:22 +0800
Subject: net: hns3: modify some incorrect spelling

This patch modifies some incorrect spelling.

Reported-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h            |  2 +-
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c | 14 +++++++-------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
index 21a736174fda..1ffe8fac702d 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hclge_mbx.h
@@ -24,7 +24,7 @@ enum HCLGE_MBX_OPCODE {
 	HCLGE_MBX_GET_RETA,		/* (VF -> PF) get RETA */
 	HCLGE_MBX_GET_RSS_KEY,		/* (VF -> PF) get RSS key */
 	HCLGE_MBX_GET_MAC_ADDR,		/* (VF -> PF) get MAC addr */
-	HCLGE_MBX_PF_VF_RESP,		/* (PF -> VF) generate respone to VF */
+	HCLGE_MBX_PF_VF_RESP,		/* (PF -> VF) generate response to VF */
 	HCLGE_MBX_GET_BDNUM,		/* (VF -> PF) get BD num */
 	HCLGE_MBX_GET_BUFSIZE,		/* (VF -> PF) get buffer size */
 	HCLGE_MBX_GET_STREAMID,		/* (VF -> PF) get stream id */
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
index 48c115c50db5..26f6f068b01d 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_debugfs.c
@@ -691,7 +691,7 @@ static void hclge_dbg_dump_tm_map(struct hclge_dev *hdev,
 	enum hclge_opcode_type cmd;
 	struct hclge_desc desc;
 	int queue_id, group_id;
-	u32 qset_maping[32];
+	u32 qset_mapping[32];
 	int tc_id, qset_id;
 	int pri_id, ret;
 	u32 i;
@@ -746,7 +746,7 @@ static void hclge_dbg_dump_tm_map(struct hclge_dev *hdev,
 		if (ret)
 			goto err_tm_map_cmd_send;
 
-		qset_maping[group_id] =
+		qset_mapping[group_id] =
 			le32_to_cpu(bp_to_qs_map_cmd->qs_bit_map);
 	}
 
@@ -756,11 +756,11 @@ static void hclge_dbg_dump_tm_map(struct hclge_dev *hdev,
 	for (group_id = 0; group_id < 4; group_id++) {
 		dev_info(&hdev->pdev->dev,
 			 "%04d  | %08x:%08x:%08x:%08x:%08x:%08x:%08x:%08x\n",
-			 group_id * 256, qset_maping[(u32)(i + 7)],
-			 qset_maping[(u32)(i + 6)], qset_maping[(u32)(i + 5)],
-			 qset_maping[(u32)(i + 4)], qset_maping[(u32)(i + 3)],
-			 qset_maping[(u32)(i + 2)], qset_maping[(u32)(i + 1)],
-			 qset_maping[i]);
+			 group_id * 256, qset_mapping[(u32)(i + 7)],
+			 qset_mapping[(u32)(i + 6)], qset_mapping[(u32)(i + 5)],
+			 qset_mapping[(u32)(i + 4)], qset_mapping[(u32)(i + 3)],
+			 qset_mapping[(u32)(i + 2)], qset_mapping[(u32)(i + 1)],
+			 qset_mapping[i]);
 		i += 8;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 727f514bd677420d4253ad84509710040e808899 Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Thu, 14 May 2020 20:41:23 +0800
Subject: net: hns3: remove a duplicated printing in hclge_configure()

Since hclge_get_cfg() already has error print, so hclge_configure()
should not print error when calling hclge_get_cfg() fail.

Reported-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 1ff896a9576c..b796d3fb5b0b 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -1363,10 +1363,8 @@ static int hclge_configure(struct hclge_dev *hdev)
 	int ret;
 
 	ret = hclge_get_cfg(hdev, &cfg);
-	if (ret) {
-		dev_err(&hdev->pdev->dev, "get mac mode error %d.\n", ret);
+	if (ret)
 		return ret;
-	}
 
 	hdev->num_vmdq_vport = cfg.vmdq_vport_num;
 	hdev->base_tqp_pid = 0;
-- 
cgit v1.2.3-59-g8ed1b


From cb25a6072b517b34d88f05ae29971c595d77df6e Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Thu, 14 May 2020 20:41:24 +0800
Subject: net: hns3: modify an incorrect error log in hclge_mbx_handler()

When handling HCLGE_MBX_GET_LINK_STATUS, PF will return the link
status to the VF, so the error log of hclge_get_link_info() is
incorrect.

Reported-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
index ac70fafd15d5..0874ae47cb03 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mbx.c
@@ -742,7 +742,7 @@ void hclge_mbx_handler(struct hclge_dev *hdev)
 			ret = hclge_get_link_info(vport, req);
 			if (ret)
 				dev_err(&hdev->pdev->dev,
-					"PF fail(%d) to get link stat for VF\n",
+					"failed to inform link stat to VF, ret = %d\n",
 					ret);
 			break;
 		case HCLGE_MBX_QUEUE_RESET:
-- 
cgit v1.2.3-59-g8ed1b


From bd13f7e12936b83a363ffada725bdf0fe229f337 Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Thu, 14 May 2020 20:41:25 +0800
Subject: net: hns3: remove some unused macros

There are some macros defined in hns3_enet.h, but not used in
anywhere.

Reported-by: Yonglong Liu <liuyonglong@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.h | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index 240ba06cd0eb..60f82ad89957 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -46,23 +46,6 @@ enum hns3_nic_state {
 #define HNS3_RING_CFG_VF_NUM_REG		0x00080
 #define HNS3_RING_ASID_REG			0x0008C
 #define HNS3_RING_EN_REG			0x00090
-#define HNS3_RING_T0_BE_RST			0x00094
-#define HNS3_RING_COULD_BE_RST			0x00098
-#define HNS3_RING_WRR_WEIGHT_REG		0x0009c
-
-#define HNS3_RING_INTMSK_RXWL_REG		0x000A0
-#define HNS3_RING_INTSTS_RX_RING_REG		0x000A4
-#define HNS3_RX_RING_INT_STS_REG		0x000A8
-#define HNS3_RING_INTMSK_TXWL_REG		0x000AC
-#define HNS3_RING_INTSTS_TX_RING_REG		0x000B0
-#define HNS3_TX_RING_INT_STS_REG		0x000B4
-#define HNS3_RING_INTMSK_RX_OVERTIME_REG	0x000B8
-#define HNS3_RING_INTSTS_RX_OVERTIME_REG	0x000BC
-#define HNS3_RING_INTMSK_TX_OVERTIME_REG	0x000C4
-#define HNS3_RING_INTSTS_TX_OVERTIME_REG	0x000C8
-
-#define HNS3_RING_MB_CTRL_REG			0x00100
-#define HNS3_RING_MB_DATA_BASE_REG		0x00200
 
 #define HNS3_TX_REG_OFFSET			0x40
 
-- 
cgit v1.2.3-59-g8ed1b


From 5c6cfd309faa7a1958c14b937ba9b95abb1427de Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Thu, 14 May 2020 20:41:26 +0800
Subject: net: hns3: remove unnecessary frag list checking in
 hns3_nic_net_xmit()

The skb_has_frag_list() in hns3_nic_net_xmit() is redundant, since
skb_walk_frags() includes this checking implicitly.

Reported-by: Yunsheng Lin <linyunsheng@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index c79d6a391105..9fe40c7773b4 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -1445,9 +1445,6 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
 
 	bd_num += ret;
 
-	if (!skb_has_frag_list(skb))
-		goto out;
-
 	skb_walk_frags(skb, frag_skb) {
 		ret = hns3_fill_skb_to_desc(ring, frag_skb,
 					    DESC_TYPE_FRAGLIST_SKB);
@@ -1456,7 +1453,7 @@ netdev_tx_t hns3_nic_net_xmit(struct sk_buff *skb, struct net_device *netdev)
 
 		bd_num += ret;
 	}
-out:
+
 	pre_ntu = ring->next_to_use ? (ring->next_to_use - 1) :
 					(ring->desc_num - 1);
 	ring->desc[pre_ntu].tx.bdtp_fe_sc_vld_ra_ri |=
-- 
cgit v1.2.3-59-g8ed1b


From d639836ab3363f935a9a4336cb4ea3828d0437dd Mon Sep 17 00:00:00 2001
From: Igor Russkikh <irusskikh@marvell.com>
Date: Thu, 14 May 2020 12:57:17 +0300
Subject: net: qed: adding hw_err states and handling

Here we introduce qed device error tracking flags and error types.

qed_hw_err_notify is an entrace point to report errors.
It'll notify higher level drivers (qede/qedr/etc) to handle and recover
the error.

List of posible errors comes from hardware interfaces, but could be
extended in future.

Signed-off-by: Ariel Elior <ariel.elior@marvell.com>
Signed-off-by: Michal Kalderon <michal.kalderon@marvell.com>
Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed.h      |  2 ++
 drivers/net/ethernet/qlogic/qed/qed_hw.c   | 32 ++++++++++++++++++++++++++++++
 drivers/net/ethernet/qlogic/qed/qed_hw.h   | 15 ++++++++++++++
 drivers/net/ethernet/qlogic/qed/qed_main.c | 29 +++++++++++++++++++++++++++
 include/linux/qed/qed_if.h                 | 12 +++++++++++
 5 files changed, 90 insertions(+)

diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index fa41bf08a589..12c40ce3d876 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -1020,6 +1020,8 @@ u32 qed_unzip_data(struct qed_hwfn *p_hwfn,
 		   u32 input_len, u8 *input_buf,
 		   u32 max_size, u8 *unzip_buf);
 void qed_schedule_recovery_handler(struct qed_hwfn *p_hwfn);
+void qed_hw_error_occurred(struct qed_hwfn *p_hwfn,
+			   enum qed_hw_err_type err_type);
 void qed_get_protocol_stats(struct qed_dev *cdev,
 			    enum qed_mcp_protocol_type type,
 			    union qed_mcp_protocol_stats *stats);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.c b/drivers/net/ethernet/qlogic/qed/qed_hw.c
index 4ab8cfaf63d1..90b777019cf5 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hw.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_hw.c
@@ -837,6 +837,38 @@ int qed_dmae_host2host(struct qed_hwfn *p_hwfn,
 	return rc;
 }
 
+void qed_hw_err_notify(struct qed_hwfn *p_hwfn,
+		       struct qed_ptt *p_ptt,
+		       enum qed_hw_err_type err_type, char *fmt, ...)
+{
+	char buf[QED_HW_ERR_MAX_STR_SIZE];
+	va_list vl;
+	int len;
+
+	if (fmt) {
+		va_start(vl, fmt);
+		len = vsnprintf(buf, QED_HW_ERR_MAX_STR_SIZE, fmt, vl);
+		va_end(vl);
+
+		if (len > QED_HW_ERR_MAX_STR_SIZE - 1)
+			len = QED_HW_ERR_MAX_STR_SIZE - 1;
+
+		DP_NOTICE(p_hwfn, "%s", buf);
+	}
+
+	/* Fan failure cannot be masked by handling of another HW error */
+	if (p_hwfn->cdev->recov_in_prog &&
+	    err_type != QED_HW_ERR_FAN_FAIL) {
+		DP_VERBOSE(p_hwfn,
+			   NETIF_MSG_DRV,
+			   "Recovery is in progress. Avoid notifying about HW error %d.\n",
+			   err_type);
+		return;
+	}
+
+	qed_hw_error_occurred(p_hwfn, err_type);
+}
+
 int qed_dmae_sanity(struct qed_hwfn *p_hwfn,
 		    struct qed_ptt *p_ptt, const char *phase)
 {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.h b/drivers/net/ethernet/qlogic/qed/qed_hw.h
index 505e94db939d..f5b109b04b66 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hw.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hw.h
@@ -315,4 +315,19 @@ int qed_init_fw_data(struct qed_dev *cdev,
 int qed_dmae_sanity(struct qed_hwfn *p_hwfn,
 		    struct qed_ptt *p_ptt, const char *phase);
 
+#define QED_HW_ERR_MAX_STR_SIZE 256
+
+/**
+ * @brief qed_hw_err_notify - Notify upper layer driver and management FW
+ *	about a HW error.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param err_type
+ * @param fmt - debug data buffer to send to the MFW
+ * @param ... - buffer format args
+ */
+void qed_hw_err_notify(struct qed_hwfn *p_hwfn,
+		       struct qed_ptt *p_ptt,
+		       enum qed_hw_err_type err_type, char *fmt, ...);
 #endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index 38a1d26ca9db..d7c9d94e4c59 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -2468,6 +2468,35 @@ void qed_schedule_recovery_handler(struct qed_hwfn *p_hwfn)
 		ops->schedule_recovery_handler(cookie);
 }
 
+char *qed_hw_err_type_descr[] = {
+	[QED_HW_ERR_FAN_FAIL]		= "Fan Failure",
+	[QED_HW_ERR_MFW_RESP_FAIL]	= "MFW Response Failure",
+	[QED_HW_ERR_HW_ATTN]		= "HW Attention",
+	[QED_HW_ERR_DMAE_FAIL]		= "DMAE Failure",
+	[QED_HW_ERR_RAMROD_FAIL]	= "Ramrod Failure",
+	[QED_HW_ERR_FW_ASSERT]		= "FW Assertion",
+	[QED_HW_ERR_LAST]		= "Unknown",
+};
+
+void qed_hw_error_occurred(struct qed_hwfn *p_hwfn,
+			   enum qed_hw_err_type err_type)
+{
+	struct qed_common_cb_ops *ops = p_hwfn->cdev->protocol_ops.common;
+	void *cookie = p_hwfn->cdev->ops_cookie;
+	char *err_str;
+
+	if (err_type > QED_HW_ERR_LAST)
+		err_type = QED_HW_ERR_LAST;
+	err_str = qed_hw_err_type_descr[err_type];
+
+	DP_NOTICE(p_hwfn, "HW error occurred [%s]\n", err_str);
+
+	/* Call the HW error handler of the protocol driver
+	 */
+	if (ops && ops->schedule_hw_err_handler)
+		ops->schedule_hw_err_handler(cookie, err_type);
+}
+
 static int qed_set_coalesce(struct qed_dev *cdev, u16 rx_coal, u16 tx_coal,
 			    void *handle)
 {
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index 8f29e0d8a7b3..1b7d9548ee43 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -607,6 +607,16 @@ struct qed_sb_info {
 	struct qed_dev *cdev;
 };
 
+enum qed_hw_err_type {
+	QED_HW_ERR_FAN_FAIL,
+	QED_HW_ERR_MFW_RESP_FAIL,
+	QED_HW_ERR_HW_ATTN,
+	QED_HW_ERR_DMAE_FAIL,
+	QED_HW_ERR_RAMROD_FAIL,
+	QED_HW_ERR_FW_ASSERT,
+	QED_HW_ERR_LAST,
+};
+
 enum qed_dev_type {
 	QED_DEV_TYPE_BB,
 	QED_DEV_TYPE_AH,
@@ -814,6 +824,8 @@ struct qed_common_cb_ops {
 	void	(*link_update)(void			*dev,
 			       struct qed_link_output	*link);
 	void (*schedule_recovery_handler)(void *dev);
+	void (*schedule_hw_err_handler)(void *dev,
+					enum qed_hw_err_type err_type);
 	void	(*dcbx_aen)(void *dev, struct qed_dcbx_get *get, u32 mib_type);
 	void (*get_generic_tlv_data)(void *dev, struct qed_generic_tlvs *data);
 	void (*get_protocol_tlv_data)(void *dev, void *data);
-- 
cgit v1.2.3-59-g8ed1b


From a8736ea83b80526529e21db29595e5337bfa95c2 Mon Sep 17 00:00:00 2001
From: Igor Russkikh <irusskikh@marvell.com>
Date: Thu, 14 May 2020 12:57:18 +0300
Subject: net: qede: add hw err scheduled handler

qede (ethernet level driver) registers a callback handler.
This handler maintains eth dev state flags/bits to track error processing.

It implements in place processing part for nonsleeping context (WARN_ON
trigger), and a deferred (delayed work) part which triggers recovery
process for recoverable errors.

In later patches this atomic handler will come with more meat.

We introduce err_flags on ethdevice structure, its being used to record
error handling properties.

Signed-off-by: Ariel Elior <ariel.elior@marvell.com>
Signed-off-by: Michal Kalderon <michal.kalderon@marvell.com>
Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qede/qede.h      | 13 +++-
 drivers/net/ethernet/qlogic/qede/qede_main.c | 95 +++++++++++++++++++++++++++-
 2 files changed, 106 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
index f6f0b51620ab..695d645d9ba9 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -278,6 +278,14 @@ struct qede_dev {
 	struct qede_rdma_dev		rdma_info;
 
 	struct bpf_prog *xdp_prog;
+
+	unsigned long err_flags;
+#define QEDE_ERR_IS_HANDLED	31
+#define QEDE_ERR_ATTN_CLR_EN	0
+#define QEDE_ERR_GET_DBG_INFO	1
+#define QEDE_ERR_IS_RECOVERABLE	2
+#define QEDE_ERR_WARN		3
+
 	struct qede_dump_info		dump_info;
 };
 
@@ -485,12 +493,15 @@ struct qede_fastpath {
 
 #define QEDE_SP_RECOVERY		0
 #define QEDE_SP_RX_MODE			1
+#define QEDE_SP_RSVD1                   2
+#define QEDE_SP_RSVD2                   3
+#define QEDE_SP_HW_ERR                  4
+#define QEDE_SP_ARFS_CONFIG             5
 #define QEDE_SP_AER			7
 
 #ifdef CONFIG_RFS_ACCEL
 int qede_rx_flow_steer(struct net_device *dev, const struct sk_buff *skb,
 		       u16 rxq_index, u32 flow_id);
-#define QEDE_SP_ARFS_CONFIG	4
 #define QEDE_SP_TASK_POLL_DELAY	(5 * HZ)
 #endif
 
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index 300405369c37..e67d5da23792 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -139,10 +139,12 @@ static void qede_shutdown(struct pci_dev *pdev);
 static void qede_link_update(void *dev, struct qed_link_output *link);
 static void qede_schedule_recovery_handler(void *dev);
 static void qede_recovery_handler(struct qede_dev *edev);
+static void qede_schedule_hw_err_handler(void *dev,
+					 enum qed_hw_err_type err_type);
 static void qede_get_eth_tlv_data(void *edev, void *data);
 static void qede_get_generic_tlv_data(void *edev,
 				      struct qed_generic_tlvs *data);
-
+static void qede_generic_hw_err_handler(struct qede_dev *edev);
 #ifdef CONFIG_QED_SRIOV
 static int qede_set_vf_vlan(struct net_device *ndev, int vf, u16 vlan, u8 qos,
 			    __be16 vlan_proto)
@@ -230,6 +232,7 @@ static struct qed_eth_cb_ops qede_ll_ops = {
 #endif
 		.link_update = qede_link_update,
 		.schedule_recovery_handler = qede_schedule_recovery_handler,
+		.schedule_hw_err_handler = qede_schedule_hw_err_handler,
 		.get_generic_tlv_data = qede_get_generic_tlv_data,
 		.get_protocol_tlv_data = qede_get_eth_tlv_data,
 	},
@@ -1009,6 +1012,8 @@ static void qede_sp_task(struct work_struct *work)
 			qede_process_arfs_filters(edev, false);
 	}
 #endif
+	if (test_and_clear_bit(QEDE_SP_HW_ERR, &edev->sp_flags))
+		qede_generic_hw_err_handler(edev);
 	__qede_unlock(edev);
 
 	if (test_and_clear_bit(QEDE_SP_AER, &edev->sp_flags)) {
@@ -2509,6 +2514,94 @@ err:
 	qede_recovery_failed(edev);
 }
 
+static void qede_atomic_hw_err_handler(struct qede_dev *edev)
+{
+	DP_NOTICE(edev,
+		  "Generic non-sleepable HW error handling started - err_flags 0x%lx\n",
+		  edev->err_flags);
+
+	/* Get a call trace of the flow that led to the error */
+	WARN_ON(test_bit(QEDE_ERR_WARN, &edev->err_flags));
+
+	DP_NOTICE(edev, "Generic non-sleepable HW error handling is done\n");
+}
+
+static void qede_generic_hw_err_handler(struct qede_dev *edev)
+{
+	struct qed_dev *cdev = edev->cdev;
+
+	DP_NOTICE(edev,
+		  "Generic sleepable HW error handling started - err_flags 0x%lx\n",
+		  edev->err_flags);
+
+	/* Trigger a recovery process.
+	 * This is placed in the sleep requiring section just to make
+	 * sure it is the last one, and that all the other operations
+	 * were completed.
+	 */
+	if (test_bit(QEDE_ERR_IS_RECOVERABLE, &edev->err_flags))
+		edev->ops->common->recovery_process(cdev);
+
+	clear_bit(QEDE_ERR_IS_HANDLED, &edev->err_flags);
+
+	DP_NOTICE(edev, "Generic sleepable HW error handling is done\n");
+}
+
+static void qede_set_hw_err_flags(struct qede_dev *edev,
+				  enum qed_hw_err_type err_type)
+{
+	unsigned long err_flags = 0;
+
+	switch (err_type) {
+	case QED_HW_ERR_DMAE_FAIL:
+		set_bit(QEDE_ERR_WARN, &err_flags);
+		fallthrough;
+	case QED_HW_ERR_MFW_RESP_FAIL:
+	case QED_HW_ERR_HW_ATTN:
+	case QED_HW_ERR_RAMROD_FAIL:
+	case QED_HW_ERR_FW_ASSERT:
+		set_bit(QEDE_ERR_ATTN_CLR_EN, &err_flags);
+		set_bit(QEDE_ERR_GET_DBG_INFO, &err_flags);
+		break;
+
+	default:
+		DP_NOTICE(edev, "Unexpected HW error [%d]\n", err_type);
+		break;
+	}
+
+	edev->err_flags |= err_flags;
+}
+
+static void qede_schedule_hw_err_handler(void *dev,
+					 enum qed_hw_err_type err_type)
+{
+	struct qede_dev *edev = dev;
+
+	/* Fan failure cannot be masked by handling of another HW error or by a
+	 * concurrent recovery process.
+	 */
+	if ((test_and_set_bit(QEDE_ERR_IS_HANDLED, &edev->err_flags) ||
+	     edev->state == QEDE_STATE_RECOVERY) &&
+	     err_type != QED_HW_ERR_FAN_FAIL) {
+		DP_INFO(edev,
+			"Avoid scheduling an error handling while another HW error is being handled\n");
+		return;
+	}
+
+	if (err_type >= QED_HW_ERR_LAST) {
+		DP_NOTICE(edev, "Unknown HW error [%d]\n", err_type);
+		clear_bit(QEDE_ERR_IS_HANDLED, &edev->err_flags);
+		return;
+	}
+
+	qede_set_hw_err_flags(edev, err_type);
+	qede_atomic_hw_err_handler(edev);
+	set_bit(QEDE_SP_HW_ERR, &edev->sp_flags);
+	schedule_delayed_work(&edev->sp_task, 0);
+
+	DP_INFO(edev, "Scheduled a error handler [err_type %d]\n", err_type);
+}
+
 static bool qede_is_txq_full(struct qede_dev *edev, struct qede_tx_queue *txq)
 {
 	struct netdev_queue *netdev_txq;
-- 
cgit v1.2.3-59-g8ed1b


From 2ec276d5b224d0d409ad3ad790b68f6d13822250 Mon Sep 17 00:00:00 2001
From: Igor Russkikh <irusskikh@marvell.com>
Date: Thu, 14 May 2020 12:57:19 +0300
Subject: net: qed: invoke err notify on critical areas

In a number of critical places not only debug trace should be printed,
but the appropriate hw error condition should be raised and error
handling/recovery should start.

Introduce our new qed_hw_err_notify invocation in these places to
record and indicate critical error conditions in hardware.

Signed-off-by: Ariel Elior <ariel.elior@marvell.com>
Signed-off-by: Michal Kalderon <michal.kalderon@marvell.com>
Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_dev.c |  4 +++-
 drivers/net/ethernet/qlogic/qed/qed_hw.c  |  7 ++++---
 drivers/net/ethernet/qlogic/qed/qed_int.c | 20 ++++++++++++++++----
 drivers/net/ethernet/qlogic/qed/qed_mcp.c |  2 ++
 drivers/net/ethernet/qlogic/qed/qed_spq.c | 16 ++++++++++------
 5 files changed, 35 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 7119a18af19e..6e857468e993 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -3085,7 +3085,9 @@ int qed_hw_init(struct qed_dev *cdev, struct qed_hw_init_params *p_params)
 			rc = qed_final_cleanup(p_hwfn, p_hwfn->p_main_ptt,
 					       p_hwfn->rel_pf_id, false);
 			if (rc) {
-				DP_NOTICE(p_hwfn, "Final cleanup failed\n");
+				qed_hw_err_notify(p_hwfn, p_hwfn->p_main_ptt,
+						  QED_HW_ERR_RAMROD_FAIL,
+						  "Final cleanup failed\n");
 				goto load_err;
 			}
 		}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.c b/drivers/net/ethernet/qlogic/qed/qed_hw.c
index 90b777019cf5..2d176e1b508c 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hw.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_hw.c
@@ -762,9 +762,10 @@ static int qed_dmae_execute_command(struct qed_hwfn *p_hwfn,
 							    dst_type,
 							    length_cur);
 		if (qed_status) {
-			DP_NOTICE(p_hwfn,
-				  "qed_dmae_execute_sub_operation Failed with error 0x%x. source_addr 0x%llx, destination addr 0x%llx, size_in_dwords 0x%x\n",
-				  qed_status, src_addr, dst_addr, length_cur);
+			qed_hw_err_notify(p_hwfn, p_ptt, QED_HW_ERR_DMAE_FAIL,
+					  "qed_dmae_execute_sub_operation Failed with error 0x%x. source_addr 0x%llx, destination addr 0x%llx, size_in_dwords 0x%x\n",
+					  qed_status, src_addr,
+					  dst_addr, length_cur);
 			break;
 		}
 	}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c
index 9f5113639eaf..1b1447b2f059 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.c
@@ -363,6 +363,14 @@ static int qed_pglueb_rbc_attn_cb(struct qed_hwfn *p_hwfn)
 	return qed_pglueb_rbc_attn_handler(p_hwfn, p_hwfn->p_dpc_ptt);
 }
 
+static int qed_fw_assertion(struct qed_hwfn *p_hwfn)
+{
+	qed_hw_err_notify(p_hwfn, p_hwfn->p_dpc_ptt, QED_HW_ERR_FW_ASSERT,
+			  "FW assertion!\n");
+
+	return -EINVAL;
+}
+
 #define QED_DORQ_ATTENTION_REASON_MASK  (0xfffff)
 #define QED_DORQ_ATTENTION_OPAQUE_MASK  (0xffff)
 #define QED_DORQ_ATTENTION_OPAQUE_SHIFT (0x0)
@@ -606,7 +614,8 @@ static struct aeu_invert_reg aeu_descs[NUM_ATTN_REGS] = {
 	{
 		{       /* After Invert 4 */
 			{"General Attention 32", ATTENTION_SINGLE,
-			 NULL, MAX_BLOCK_ID},
+			 qed_fw_assertion,
+			 MAX_BLOCK_ID},
 			{"General Attention %d",
 			 (2 << ATTENTION_LENGTH_SHIFT) |
 			 (33 << ATTENTION_OFFSET_SHIFT), NULL, MAX_BLOCK_ID},
@@ -927,9 +936,12 @@ qed_int_deassertion_aeu_bit(struct qed_hwfn *p_hwfn,
 		qed_int_attn_print(p_hwfn, p_aeu->block_index,
 				   ATTN_TYPE_INTERRUPT, !b_fatal);
 
-
-	/* If the attention is benign, no need to prevent it */
-	if (!rc)
+	/* Reach assertion if attention is fatal */
+	if (b_fatal)
+		qed_hw_err_notify(p_hwfn, p_hwfn->p_dpc_ptt, QED_HW_ERR_HW_ATTN,
+				  "`%s': Fatal attention\n",
+				  p_bit_name);
+	else /* If the attention is benign, no need to prevent it */
 		goto out;
 
 	/* Prevent this Attention from being asserted in the future */
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index 280527cc0578..46653afc385c 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -575,6 +575,8 @@ _qed_mcp_cmd_and_union(struct qed_hwfn *p_hwfn,
 		if (!QED_MB_FLAGS_IS_SET(p_mb_params, AVOID_BLOCK))
 			qed_mcp_cmd_set_blocking(p_hwfn, true);
 
+		qed_hw_err_notify(p_hwfn, p_ptt,
+				  QED_HW_ERR_MFW_RESP_FAIL, NULL);
 		return -EAGAIN;
 	}
 
diff --git a/drivers/net/ethernet/qlogic/qed/qed_spq.c b/drivers/net/ethernet/qlogic/qed/qed_spq.c
index f5f3c03b9dd2..790c28d696a0 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_spq.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_spq.c
@@ -160,12 +160,16 @@ static int qed_spq_block(struct qed_hwfn *p_hwfn,
 		return 0;
 	}
 err:
-	DP_NOTICE(p_hwfn,
-		  "Ramrod is stuck [CID %08x cmd %02x protocol %02x echo %04x]\n",
-		  le32_to_cpu(p_ent->elem.hdr.cid),
-		  p_ent->elem.hdr.cmd_id,
-		  p_ent->elem.hdr.protocol_id,
-		  le16_to_cpu(p_ent->elem.hdr.echo));
+	p_ptt = qed_ptt_acquire(p_hwfn);
+	if (!p_ptt)
+		return -EBUSY;
+	qed_hw_err_notify(p_hwfn, p_ptt, QED_HW_ERR_RAMROD_FAIL,
+			  "Ramrod is stuck [CID %08x cmd %02x protocol %02x echo %04x]\n",
+			  le32_to_cpu(p_ent->elem.hdr.cid),
+			  p_ent->elem.hdr.cmd_id,
+			  p_ent->elem.hdr.protocol_id,
+			  le16_to_cpu(p_ent->elem.hdr.echo));
+	qed_ptt_release(p_hwfn, p_ptt);
 
 	return -EBUSY;
 }
-- 
cgit v1.2.3-59-g8ed1b


From d8d6c5a7be97304f4baa67a31d6bfa86ff457980 Mon Sep 17 00:00:00 2001
From: Igor Russkikh <irusskikh@marvell.com>
Date: Thu, 14 May 2020 12:57:20 +0300
Subject: net: qed: critical err reporting to management firmware

On various critical errors, notification handler should also report
the err information into the management firmware.

MFW can interact with server/motherboard backend agents - these are
used by server manufacturers to monitor server HW health.

Thus, it is important for driver to report on any faulty conditions

Signed-off-by: Ariel Elior <ariel.elior@marvell.com>
Signed-off-by: Michal Kalderon <michal.kalderon@marvell.com>
Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_hsi.h |  19 +++++
 drivers/net/ethernet/qlogic/qed/qed_hw.c  |   3 +
 drivers/net/ethernet/qlogic/qed/qed_mcp.c | 124 ++++++++++++++++++++++++++++++
 drivers/net/ethernet/qlogic/qed/qed_mcp.h |  15 ++++
 4 files changed, 161 insertions(+)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index 4597015b8bff..21d53b00c2e6 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -12492,6 +12492,8 @@ struct public_drv_mb {
 #define DRV_MSG_CODE_GET_ENGINE_CONFIG		0x00370000
 #define DRV_MSG_CODE_GET_PPFID_BITMAP		0x43000000
 
+#define DRV_MSG_CODE_DEBUG_DATA_SEND		0xc0040000
+
 #define RESOURCE_CMD_REQ_RESC_MASK		0x0000001F
 #define RESOURCE_CMD_REQ_RESC_SHIFT		0
 #define RESOURCE_CMD_REQ_OPCODE_MASK		0x000000E0
@@ -12626,6 +12628,17 @@ struct public_drv_mb {
 #define DRV_MB_PARAM_FEATURE_SUPPORT_PORT_EEE		0x00000002
 #define DRV_MB_PARAM_FEATURE_SUPPORT_FUNC_VLINK		0x00010000
 
+/* DRV_MSG_CODE_DEBUG_DATA_SEND parameters */
+#define DRV_MSG_CODE_DEBUG_DATA_SEND_SIZE_OFFSET	0
+#define DRV_MSG_CODE_DEBUG_DATA_SEND_SIZE_MASK		0xFF
+
+/* Driver attributes params */
+#define DRV_MB_PARAM_ATTRIBUTE_KEY_OFFSET		0
+#define DRV_MB_PARAM_ATTRIBUTE_KEY_MASK			0x00FFFFFF
+#define DRV_MB_PARAM_ATTRIBUTE_CMD_OFFSET		24
+#define DRV_MB_PARAM_ATTRIBUTE_CMD_MASK			0xFF000000
+
+#define DRV_MB_PARAM_NVM_CFG_OPTION_ID_OFFSET		0
 #define DRV_MB_PARAM_NVM_CFG_OPTION_ID_SHIFT		0
 #define DRV_MB_PARAM_NVM_CFG_OPTION_ID_MASK		0x0000FFFF
 #define DRV_MB_PARAM_NVM_CFG_OPTION_ALL_SHIFT		16
@@ -12678,6 +12691,12 @@ struct public_drv_mb {
 #define FW_MSG_CODE_DRV_CFG_PF_VFS_MSIX_DONE	0x00870000
 #define FW_MSG_SEQ_NUMBER_MASK			0x0000ffff
 
+#define FW_MSG_CODE_DEBUG_DATA_SEND_INV_ARG	0xb0070000
+#define FW_MSG_CODE_DEBUG_DATA_SEND_BUF_FULL	0xb0080000
+#define FW_MSG_CODE_DEBUG_DATA_SEND_NO_BUF	0xb0090000
+#define FW_MSG_CODE_DEBUG_NOT_ENABLED		0xb00a0000
+#define FW_MSG_CODE_DEBUG_DATA_SEND_OK		0xb00b0000
+
 	u32 fw_mb_param;
 #define FW_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_MASK	0xFFFF0000
 #define FW_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_SHIFT	16
diff --git a/drivers/net/ethernet/qlogic/qed/qed_hw.c b/drivers/net/ethernet/qlogic/qed/qed_hw.c
index 2d176e1b508c..5fa251489536 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hw.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_hw.c
@@ -868,6 +868,9 @@ void qed_hw_err_notify(struct qed_hwfn *p_hwfn,
 	}
 
 	qed_hw_error_occurred(p_hwfn, err_type);
+
+	if (fmt)
+		qed_mcp_send_raw_debug_data(p_hwfn, p_ptt, buf, len);
 }
 
 int qed_dmae_sanity(struct qed_hwfn *p_hwfn,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index 46653afc385c..db7cf120527a 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -3821,3 +3821,127 @@ int qed_mcp_nvm_set_cfg(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 				  DRV_MSG_CODE_SET_NVM_CFG_OPTION,
 				  mb_param, &resp, &param, len, (u32 *)p_buf);
 }
+
+#define QED_MCP_DBG_DATA_MAX_SIZE               MCP_DRV_NVM_BUF_LEN
+#define QED_MCP_DBG_DATA_MAX_HEADER_SIZE        sizeof(u32)
+#define QED_MCP_DBG_DATA_MAX_PAYLOAD_SIZE \
+	(QED_MCP_DBG_DATA_MAX_SIZE - QED_MCP_DBG_DATA_MAX_HEADER_SIZE)
+
+static int
+__qed_mcp_send_debug_data(struct qed_hwfn *p_hwfn,
+			  struct qed_ptt *p_ptt, u8 *p_buf, u8 size)
+{
+	struct qed_mcp_mb_params mb_params;
+	int rc;
+
+	if (size > QED_MCP_DBG_DATA_MAX_SIZE) {
+		DP_ERR(p_hwfn,
+		       "Debug data size is %d while it should not exceed %d\n",
+		       size, QED_MCP_DBG_DATA_MAX_SIZE);
+		return -EINVAL;
+	}
+
+	memset(&mb_params, 0, sizeof(mb_params));
+	mb_params.cmd = DRV_MSG_CODE_DEBUG_DATA_SEND;
+	SET_MFW_FIELD(mb_params.param, DRV_MSG_CODE_DEBUG_DATA_SEND_SIZE, size);
+	mb_params.p_data_src = p_buf;
+	mb_params.data_src_size = size;
+	rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
+	if (rc)
+		return rc;
+
+	if (mb_params.mcp_resp == FW_MSG_CODE_UNSUPPORTED) {
+		DP_INFO(p_hwfn,
+			"The DEBUG_DATA_SEND command is unsupported by the MFW\n");
+		return -EOPNOTSUPP;
+	} else if (mb_params.mcp_resp == (u32)FW_MSG_CODE_DEBUG_NOT_ENABLED) {
+		DP_INFO(p_hwfn, "The DEBUG_DATA_SEND command is not enabled\n");
+		return -EBUSY;
+	} else if (mb_params.mcp_resp != (u32)FW_MSG_CODE_DEBUG_DATA_SEND_OK) {
+		DP_NOTICE(p_hwfn,
+			  "Failed to send debug data to the MFW [resp 0x%08x]\n",
+			  mb_params.mcp_resp);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+enum qed_mcp_dbg_data_type {
+	QED_MCP_DBG_DATA_TYPE_RAW,
+};
+
+/* Header format: [31:28] PFID, [27:20] flags, [19:12] type, [11:0] S/N */
+#define QED_MCP_DBG_DATA_HDR_SN_OFFSET  0
+#define QED_MCP_DBG_DATA_HDR_SN_MASK            0x00000fff
+#define QED_MCP_DBG_DATA_HDR_TYPE_OFFSET        12
+#define QED_MCP_DBG_DATA_HDR_TYPE_MASK  0x000ff000
+#define QED_MCP_DBG_DATA_HDR_FLAGS_OFFSET       20
+#define QED_MCP_DBG_DATA_HDR_FLAGS_MASK 0x0ff00000
+#define QED_MCP_DBG_DATA_HDR_PF_OFFSET  28
+#define QED_MCP_DBG_DATA_HDR_PF_MASK            0xf0000000
+
+#define QED_MCP_DBG_DATA_HDR_FLAGS_FIRST        0x1
+#define QED_MCP_DBG_DATA_HDR_FLAGS_LAST 0x2
+
+static int
+qed_mcp_send_debug_data(struct qed_hwfn *p_hwfn,
+			struct qed_ptt *p_ptt,
+			enum qed_mcp_dbg_data_type type, u8 *p_buf, u32 size)
+{
+	u8 raw_data[QED_MCP_DBG_DATA_MAX_SIZE], *p_tmp_buf = p_buf;
+	u32 tmp_size = size, *p_header, *p_payload;
+	u8 flags = 0;
+	u16 seq;
+	int rc;
+
+	p_header = (u32 *)raw_data;
+	p_payload = (u32 *)(raw_data + QED_MCP_DBG_DATA_MAX_HEADER_SIZE);
+
+	seq = (u16)atomic_inc_return(&p_hwfn->mcp_info->dbg_data_seq);
+
+	/* First chunk is marked as 'first' */
+	flags |= QED_MCP_DBG_DATA_HDR_FLAGS_FIRST;
+
+	*p_header = 0;
+	SET_MFW_FIELD(*p_header, QED_MCP_DBG_DATA_HDR_SN, seq);
+	SET_MFW_FIELD(*p_header, QED_MCP_DBG_DATA_HDR_TYPE, type);
+	SET_MFW_FIELD(*p_header, QED_MCP_DBG_DATA_HDR_FLAGS, flags);
+	SET_MFW_FIELD(*p_header, QED_MCP_DBG_DATA_HDR_PF, p_hwfn->abs_pf_id);
+
+	while (tmp_size > QED_MCP_DBG_DATA_MAX_PAYLOAD_SIZE) {
+		memcpy(p_payload, p_tmp_buf, QED_MCP_DBG_DATA_MAX_PAYLOAD_SIZE);
+		rc = __qed_mcp_send_debug_data(p_hwfn, p_ptt, raw_data,
+					       QED_MCP_DBG_DATA_MAX_SIZE);
+		if (rc)
+			return rc;
+
+		/* Clear the 'first' marking after sending the first chunk */
+		if (p_tmp_buf == p_buf) {
+			flags &= ~QED_MCP_DBG_DATA_HDR_FLAGS_FIRST;
+			SET_MFW_FIELD(*p_header, QED_MCP_DBG_DATA_HDR_FLAGS,
+				      flags);
+		}
+
+		p_tmp_buf += QED_MCP_DBG_DATA_MAX_PAYLOAD_SIZE;
+		tmp_size -= QED_MCP_DBG_DATA_MAX_PAYLOAD_SIZE;
+	}
+
+	/* Last chunk is marked as 'last' */
+	flags |= QED_MCP_DBG_DATA_HDR_FLAGS_LAST;
+	SET_MFW_FIELD(*p_header, QED_MCP_DBG_DATA_HDR_FLAGS, flags);
+	memcpy(p_payload, p_tmp_buf, tmp_size);
+
+	/* Casting the left size to u8 is ok since at this point it is <= 32 */
+	return __qed_mcp_send_debug_data(p_hwfn, p_ptt, raw_data,
+					 (u8)(QED_MCP_DBG_DATA_MAX_HEADER_SIZE +
+					 tmp_size));
+}
+
+int
+qed_mcp_send_raw_debug_data(struct qed_hwfn *p_hwfn,
+			    struct qed_ptt *p_ptt, u8 *p_buf, u32 size)
+{
+	return qed_mcp_send_debug_data(p_hwfn, p_ptt,
+				       QED_MCP_DBG_DATA_TYPE_RAW, p_buf, size);
+}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
index 9c4c2763de8d..bc248418a5f5 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
@@ -685,6 +685,18 @@ int qed_mcp_bist_nvm_get_image_att(struct qed_hwfn *p_hwfn,
  */
 int qed_mfw_process_tlv_req(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
 
+/**
+ * @brief Send raw debug data to the MFW
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param p_buf - raw debug data buffer
+ * @param size - buffer size
+ */
+int
+qed_mcp_send_raw_debug_data(struct qed_hwfn *p_hwfn,
+			    struct qed_ptt *p_ptt, u8 *p_buf, u32 size);
+
 /* Using hwfn number (and not pf_num) is required since in CMT mode,
  * same pf_num may be used by two different hwfn
  * TODO - this shouldn't really be in .h file, but until all fields
@@ -731,6 +743,9 @@ struct qed_mcp_info {
 
 	/* Capabilties negotiated with the MFW */
 	u32					capabilities;
+
+	/* S/N for debug data mailbox commands */
+	atomic_t dbg_data_seq;
 };
 
 struct qed_mcp_mb_params {
-- 
cgit v1.2.3-59-g8ed1b


From ca352f00756e7bd7d31a353a8586a29429810a95 Mon Sep 17 00:00:00 2001
From: Igor Russkikh <irusskikh@marvell.com>
Date: Thu, 14 May 2020 12:57:21 +0300
Subject: net: qed: cleanup debug related declarations

Thats probably a legacy code had double declaration of some fields.
Cleanup this, removing copy and fixing references.

Signed-off-by: Ariel Elior <ariel.elior@marvell.com>
Signed-off-by: Michal Kalderon <michal.kalderon@marvell.com>
Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed.h       | 11 +++--------
 drivers/net/ethernet/qlogic/qed/qed_debug.c | 26 +++++++++++++-------------
 2 files changed, 16 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index 12c40ce3d876..07f6ef930b52 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -740,12 +740,6 @@ struct qed_dbg_feature {
 	u32 dumped_dwords;
 };
 
-struct qed_dbg_params {
-	struct qed_dbg_feature features[DBG_FEATURE_NUM];
-	u8 engine_for_debug;
-	bool print_data;
-};
-
 struct qed_dev {
 	u32	dp_module;
 	u8	dp_level;
@@ -872,17 +866,18 @@ struct qed_dev {
 	} protocol_ops;
 	void				*ops_cookie;
 
-	struct qed_dbg_params		dbg_params;
-
 #ifdef CONFIG_QED_LL2
 	struct qed_cb_ll2_info		*ll2;
 	u8				ll2_mac_address[ETH_ALEN];
 #endif
 	struct qed_dbg_feature dbg_features[DBG_FEATURE_NUM];
+	u8 engine_for_debug;
 	bool disable_ilt_dump;
 	DECLARE_HASHTABLE(connections, 10);
 	const struct firmware		*firmware;
 
+	bool print_dbg_data;
+
 	u32 rdma_max_sge;
 	u32 rdma_max_inline;
 	u32 rdma_max_srq_sge;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_debug.c b/drivers/net/ethernet/qlogic/qed/qed_debug.c
index f4eebaabb6d0..57a0dab88431 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_debug.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_debug.c
@@ -7453,7 +7453,7 @@ static enum dbg_status format_feature(struct qed_hwfn *p_hwfn,
 				      enum qed_dbg_features feature_idx)
 {
 	struct qed_dbg_feature *feature =
-	    &p_hwfn->cdev->dbg_params.features[feature_idx];
+	    &p_hwfn->cdev->dbg_features[feature_idx];
 	u32 text_size_bytes, null_char_pos, i;
 	enum dbg_status rc;
 	char *text_buf;
@@ -7502,7 +7502,7 @@ static enum dbg_status format_feature(struct qed_hwfn *p_hwfn,
 		text_buf[i] = '\n';
 
 	/* Dump printable feature to log */
-	if (p_hwfn->cdev->dbg_params.print_data)
+	if (p_hwfn->cdev->print_dbg_data)
 		qed_dbg_print_feature(text_buf, text_size_bytes);
 
 	/* Free the old dump_buf and point the dump_buf to the newly allocagted
@@ -7523,7 +7523,7 @@ static enum dbg_status qed_dbg_dump(struct qed_hwfn *p_hwfn,
 				    enum qed_dbg_features feature_idx)
 {
 	struct qed_dbg_feature *feature =
-	    &p_hwfn->cdev->dbg_params.features[feature_idx];
+	    &p_hwfn->cdev->dbg_features[feature_idx];
 	u32 buf_size_dwords;
 	enum dbg_status rc;
 
@@ -7648,7 +7648,7 @@ static int qed_dbg_nvm_image(struct qed_dev *cdev, void *buffer,
 			     enum qed_nvm_images image_id)
 {
 	struct qed_hwfn *p_hwfn =
-		&cdev->hwfns[cdev->dbg_params.engine_for_debug];
+		&cdev->hwfns[cdev->engine_for_debug];
 	u32 len_rounded, i;
 	__be32 val;
 	int rc;
@@ -7780,7 +7780,7 @@ int qed_dbg_all_data(struct qed_dev *cdev, void *buffer)
 {
 	u8 cur_engine, omit_engine = 0, org_engine;
 	struct qed_hwfn *p_hwfn =
-		&cdev->hwfns[cdev->dbg_params.engine_for_debug];
+		&cdev->hwfns[cdev->engine_for_debug];
 	struct dbg_tools_data *dev_data = &p_hwfn->dbg_info;
 	int grc_params[MAX_DBG_GRC_PARAMS], i;
 	u32 offset = 0, feature_size;
@@ -8000,7 +8000,7 @@ int qed_dbg_all_data(struct qed_dev *cdev, void *buffer)
 int qed_dbg_all_data_size(struct qed_dev *cdev)
 {
 	struct qed_hwfn *p_hwfn =
-		&cdev->hwfns[cdev->dbg_params.engine_for_debug];
+		&cdev->hwfns[cdev->engine_for_debug];
 	u32 regs_len = 0, image_len = 0, ilt_len = 0, total_ilt_len = 0;
 	u8 cur_engine, org_engine;
 
@@ -8059,9 +8059,9 @@ int qed_dbg_feature(struct qed_dev *cdev, void *buffer,
 		    enum qed_dbg_features feature, u32 *num_dumped_bytes)
 {
 	struct qed_hwfn *p_hwfn =
-		&cdev->hwfns[cdev->dbg_params.engine_for_debug];
+		&cdev->hwfns[cdev->engine_for_debug];
 	struct qed_dbg_feature *qed_feature =
-		&cdev->dbg_params.features[feature];
+		&cdev->dbg_features[feature];
 	enum dbg_status dbg_rc;
 	struct qed_ptt *p_ptt;
 	int rc = 0;
@@ -8084,7 +8084,7 @@ int qed_dbg_feature(struct qed_dev *cdev, void *buffer,
 	DP_VERBOSE(cdev, QED_MSG_DEBUG,
 		   "copying debugfs feature to external buffer\n");
 	memcpy(buffer, qed_feature->dump_buf, qed_feature->buf_size);
-	*num_dumped_bytes = cdev->dbg_params.features[feature].dumped_dwords *
+	*num_dumped_bytes = cdev->dbg_features[feature].dumped_dwords *
 			    4;
 
 out:
@@ -8095,7 +8095,7 @@ out:
 int qed_dbg_feature_size(struct qed_dev *cdev, enum qed_dbg_features feature)
 {
 	struct qed_hwfn *p_hwfn =
-		&cdev->hwfns[cdev->dbg_params.engine_for_debug];
+		&cdev->hwfns[cdev->engine_for_debug];
 	struct qed_dbg_feature *qed_feature = &cdev->dbg_features[feature];
 	struct qed_ptt *p_ptt = qed_ptt_acquire(p_hwfn);
 	u32 buf_size_dwords;
@@ -8120,14 +8120,14 @@ int qed_dbg_feature_size(struct qed_dev *cdev, enum qed_dbg_features feature)
 
 u8 qed_get_debug_engine(struct qed_dev *cdev)
 {
-	return cdev->dbg_params.engine_for_debug;
+	return cdev->engine_for_debug;
 }
 
 void qed_set_debug_engine(struct qed_dev *cdev, int engine_number)
 {
 	DP_VERBOSE(cdev, QED_MSG_DEBUG, "set debug engine to %d\n",
 		   engine_number);
-	cdev->dbg_params.engine_for_debug = engine_number;
+	cdev->engine_for_debug = engine_number;
 }
 
 void qed_dbg_pf_init(struct qed_dev *cdev)
@@ -8146,7 +8146,7 @@ void qed_dbg_pf_init(struct qed_dev *cdev)
 	}
 
 	/* Set the hwfn to be 0 as default */
-	cdev->dbg_params.engine_for_debug = 0;
+	cdev->engine_for_debug = 0;
 }
 
 void qed_dbg_pf_exit(struct qed_dev *cdev)
-- 
cgit v1.2.3-59-g8ed1b


From 936c7ba4dd5e94a3fc784f2296de5d577a9b5e43 Mon Sep 17 00:00:00 2001
From: Igor Russkikh <irusskikh@marvell.com>
Date: Thu, 14 May 2020 12:57:22 +0300
Subject: net: qed: attention clearing properties

On different hardware events we have to respond differently,
on some of hardware indications hw attention (error condition)
should be cleared by the driver to continue normal functioning.

Here we introduce attention clear flags, and put them on some
important events (in aeu_descs).

Signed-off-by: Ariel Elior <ariel.elior@marvell.com>
Signed-off-by: Michal Kalderon <michal.kalderon@marvell.com>
Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed.h        |  3 +++
 drivers/net/ethernet/qlogic/qed/qed_int.c    | 22 ++++++++++++++++++----
 drivers/net/ethernet/qlogic/qed/qed_int.h    | 11 +++++++++++
 drivers/net/ethernet/qlogic/qed/qed_main.c   |  7 ++++++-
 drivers/net/ethernet/qlogic/qede/qede_main.c |  6 ++++++
 include/linux/qed/qed_if.h                   |  9 +++++++++
 6 files changed, 53 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed.h b/drivers/net/ethernet/qlogic/qed/qed.h
index 07f6ef930b52..66ed39d6f357 100644
--- a/drivers/net/ethernet/qlogic/qed/qed.h
+++ b/drivers/net/ethernet/qlogic/qed/qed.h
@@ -838,6 +838,9 @@ struct qed_dev {
 	/* Recovery */
 	bool recov_in_prog;
 
+	/* Indicates whether should prevent attentions from being reasserted */
+	bool attn_clr_en;
+
 	/* LLH info */
 	u8 ppfid_bitmap;
 	struct qed_llh_info *p_llh_info;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.c b/drivers/net/ethernet/qlogic/qed/qed_int.c
index 1b1447b2f059..b7b974f0ef21 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.c
@@ -96,6 +96,7 @@ struct aeu_invert_reg_bit {
 #define ATTENTION_BB(value)             (value << ATTENTION_BB_SHIFT)
 #define ATTENTION_BB_DIFFERENT          BIT(23)
 
+#define ATTENTION_CLEAR_ENABLE          BIT(28)
 	unsigned int flags;
 
 	/* Callback to call if attention will be triggered */
@@ -371,6 +372,13 @@ static int qed_fw_assertion(struct qed_hwfn *p_hwfn)
 	return -EINVAL;
 }
 
+static int qed_general_attention_35(struct qed_hwfn *p_hwfn)
+{
+	DP_INFO(p_hwfn, "General attention 35!\n");
+
+	return 0;
+}
+
 #define QED_DORQ_ATTENTION_REASON_MASK  (0xfffff)
 #define QED_DORQ_ATTENTION_OPAQUE_MASK  (0xffff)
 #define QED_DORQ_ATTENTION_OPAQUE_SHIFT (0x0)
@@ -613,14 +621,15 @@ static struct aeu_invert_reg aeu_descs[NUM_ATTN_REGS] = {
 
 	{
 		{       /* After Invert 4 */
-			{"General Attention 32", ATTENTION_SINGLE,
-			 qed_fw_assertion,
+			{"General Attention 32", ATTENTION_SINGLE |
+			 ATTENTION_CLEAR_ENABLE, qed_fw_assertion,
 			 MAX_BLOCK_ID},
 			{"General Attention %d",
 			 (2 << ATTENTION_LENGTH_SHIFT) |
 			 (33 << ATTENTION_OFFSET_SHIFT), NULL, MAX_BLOCK_ID},
-			{"General Attention 35", ATTENTION_SINGLE,
-			 NULL, MAX_BLOCK_ID},
+			{"General Attention 35", ATTENTION_SINGLE |
+			 ATTENTION_CLEAR_ENABLE, qed_general_attention_35,
+			 MAX_BLOCK_ID},
 			{"NWS Parity",
 			 ATTENTION_PAR | ATTENTION_BB_DIFFERENT |
 			 ATTENTION_BB(AEU_INVERT_REG_SPECIAL_CNIG_0),
@@ -2361,6 +2370,11 @@ void qed_int_disable_post_isr_release(struct qed_dev *cdev)
 		cdev->hwfns[i].b_int_requested = false;
 }
 
+void qed_int_attn_clr_enable(struct qed_dev *cdev, bool clr_enable)
+{
+	cdev->attn_clr_en = clr_enable;
+}
+
 int qed_int_set_timer_res(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt,
 			  u8 timer_res, u16 sb_id, bool tx)
 {
diff --git a/drivers/net/ethernet/qlogic/qed/qed_int.h b/drivers/net/ethernet/qlogic/qed/qed_int.h
index 9ad568d93ae6..e09db3386367 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_int.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_int.h
@@ -190,6 +190,17 @@ void qed_int_get_num_sbs(struct qed_hwfn	*p_hwfn,
  */
 void qed_int_disable_post_isr_release(struct qed_dev *cdev);
 
+/**
+ * @brief qed_int_attn_clr_enable - sets whether the general behavior is
+ *        preventing attentions from being reasserted, or following the
+ *        attributes of the specific attention.
+ *
+ * @param cdev
+ * @param clr_enable
+ *
+ */
+void qed_int_attn_clr_enable(struct qed_dev *cdev, bool clr_enable);
+
 /**
  * @brief - Doorbell Recovery handler.
  *          Run doorbell recovery in case of PF overflow (and flush DORQ if
diff --git a/drivers/net/ethernet/qlogic/qed/qed_main.c b/drivers/net/ethernet/qlogic/qed/qed_main.c
index d7c9d94e4c59..83e798d4eebb 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_main.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_main.c
@@ -2491,10 +2491,14 @@ void qed_hw_error_occurred(struct qed_hwfn *p_hwfn,
 
 	DP_NOTICE(p_hwfn, "HW error occurred [%s]\n", err_str);
 
-	/* Call the HW error handler of the protocol driver
+	/* Call the HW error handler of the protocol driver.
+	 * If it is not available - perform a minimal handling of preventing
+	 * HW attentions from being reasserted.
 	 */
 	if (ops && ops->schedule_hw_err_handler)
 		ops->schedule_hw_err_handler(cookie, err_type);
+	else
+		qed_int_attn_clr_enable(p_hwfn->cdev, true);
 }
 
 static int qed_set_coalesce(struct qed_dev *cdev, u16 rx_coal, u16 tx_coal,
@@ -2718,6 +2722,7 @@ const struct qed_common_ops qed_common_ops_pass = {
 	.set_led = &qed_set_led,
 	.recovery_process = &qed_recovery_process,
 	.recovery_prolog = &qed_recovery_prolog,
+	.attn_clr_enable = &qed_int_attn_clr_enable,
 	.update_drv_state = &qed_update_drv_state,
 	.update_mac = &qed_update_mac,
 	.update_mtu = &qed_update_mtu,
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index e67d5da23792..ee7662da6413 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -2516,6 +2516,8 @@ err:
 
 static void qede_atomic_hw_err_handler(struct qede_dev *edev)
 {
+	struct qed_dev *cdev = edev->cdev;
+
 	DP_NOTICE(edev,
 		  "Generic non-sleepable HW error handling started - err_flags 0x%lx\n",
 		  edev->err_flags);
@@ -2523,6 +2525,10 @@ static void qede_atomic_hw_err_handler(struct qede_dev *edev)
 	/* Get a call trace of the flow that led to the error */
 	WARN_ON(test_bit(QEDE_ERR_WARN, &edev->err_flags));
 
+	/* Prevent HW attentions from being reasserted */
+	if (test_bit(QEDE_ERR_ATTN_CLR_EN, &edev->err_flags))
+		edev->ops->common->attn_clr_enable(cdev, true);
+
 	DP_NOTICE(edev, "Generic non-sleepable HW error handling is done\n");
 }
 
diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index 1b7d9548ee43..978e91e9ab65 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -1046,6 +1046,15 @@ struct qed_common_ops {
  */
 	int (*set_led)(struct qed_dev *cdev,
 		       enum qed_led_mode mode);
+
+/**
+ * @brief attn_clr_enable - Prevent attentions from being reasserted
+ *
+ * @param cdev
+ * @param clr_enable
+ */
+	void (*attn_clr_enable)(struct qed_dev *cdev, bool clr_enable);
+
 /**
  * @brief db_recovery_add - add doorbell information to the doorbell
  * recovery mechanism.
-- 
cgit v1.2.3-59-g8ed1b


From 7d9acd87bd55f401ada67b9c6a9c83a7e68c4ddf Mon Sep 17 00:00:00 2001
From: Igor Russkikh <irusskikh@marvell.com>
Date: Thu, 14 May 2020 12:57:23 +0300
Subject: net: qede: optional hw recovery procedure

Driver has an ability to initiate a recovery process as a reaction to
detected errors. But the codepath (recovery_process) was disabled and
never active.

Here we add ethtool private flag to allow user have the recovery
procedure activated.

We still do not enable this by default though, since in some configurations
this is not desirable. E.g. this may impact other PFs/VFs.

Signed-off-by: Ariel Elior <ariel.elior@marvell.com>
Signed-off-by: Michal Kalderon <michal.kalderon@marvell.com>
Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qede/qede_ethtool.c | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

diff --git a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
index 812c7766e096..24cc68391ac4 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_ethtool.c
@@ -190,12 +190,14 @@ static const struct {
 enum {
 	QEDE_PRI_FLAG_CMT,
 	QEDE_PRI_FLAG_SMART_AN_SUPPORT, /* MFW supports SmartAN */
+	QEDE_PRI_FLAG_RECOVER_ON_ERROR,
 	QEDE_PRI_FLAG_LEN,
 };
 
 static const char qede_private_arr[QEDE_PRI_FLAG_LEN][ETH_GSTRING_LEN] = {
 	"Coupled-Function",
 	"SmartAN capable",
+	"Recover on error",
 };
 
 enum qede_ethtool_tests {
@@ -417,9 +419,30 @@ static u32 qede_get_priv_flags(struct net_device *dev)
 	if (edev->dev_info.common.smart_an)
 		flags |= BIT(QEDE_PRI_FLAG_SMART_AN_SUPPORT);
 
+	if (edev->err_flags & BIT(QEDE_ERR_IS_RECOVERABLE))
+		flags |= BIT(QEDE_PRI_FLAG_RECOVER_ON_ERROR);
+
 	return flags;
 }
 
+static int qede_set_priv_flags(struct net_device *dev, u32 flags)
+{
+	struct qede_dev *edev = netdev_priv(dev);
+	u32 cflags = qede_get_priv_flags(dev);
+	u32 dflags = flags ^ cflags;
+
+	/* can only change RECOVER_ON_ERROR flag */
+	if (dflags & ~BIT(QEDE_PRI_FLAG_RECOVER_ON_ERROR))
+		return -EINVAL;
+
+	if (flags & BIT(QEDE_PRI_FLAG_RECOVER_ON_ERROR))
+		set_bit(QEDE_ERR_IS_RECOVERABLE, &edev->err_flags);
+	else
+		clear_bit(QEDE_ERR_IS_RECOVERABLE, &edev->err_flags);
+
+	return 0;
+}
+
 struct qede_link_mode_mapping {
 	u32 qed_link_mode;
 	u32 ethtool_link_mode;
@@ -2098,6 +2121,7 @@ static const struct ethtool_ops qede_ethtool_ops = {
 	.set_phys_id = qede_set_phys_id,
 	.get_ethtool_stats = qede_get_ethtool_stats,
 	.get_priv_flags = qede_get_priv_flags,
+	.set_priv_flags = qede_set_priv_flags,
 	.get_sset_count = qede_get_sset_count,
 	.get_rxnfc = qede_get_rxnfc,
 	.set_rxnfc = qede_set_rxnfc,
-- 
cgit v1.2.3-59-g8ed1b


From 5144e9f439d53460c38eb5c34dd67837ac512db1 Mon Sep 17 00:00:00 2001
From: Denis Bolotin <dbolotin@marvell.com>
Date: Thu, 14 May 2020 12:57:24 +0300
Subject: net: qede: Implement ndo_tx_timeout

Upon tx timeout detection we do disable carrier and print TX queue
info on TX timeout. We then raise hw error condition and trigger
service task to handle this.

This handler will capture extra debug info and then optionally
trigger recovery procedure to try restore function.

Signed-off-by: Denis Bolotin <dbolotin@marvell.com>
Signed-off-by: Ariel Elior <aelior@marvell.com>
Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qede/qede.h      |  1 -
 drivers/net/ethernet/qlogic/qede/qede_main.c | 46 ++++++++++++++++++++++++++++
 2 files changed, 46 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qede/qede.h b/drivers/net/ethernet/qlogic/qede/qede.h
index 695d645d9ba9..8857da1208d7 100644
--- a/drivers/net/ethernet/qlogic/qede/qede.h
+++ b/drivers/net/ethernet/qlogic/qede/qede.h
@@ -533,7 +533,6 @@ u16 qede_select_queue(struct net_device *dev, struct sk_buff *skb,
 netdev_features_t qede_features_check(struct sk_buff *skb,
 				      struct net_device *dev,
 				      netdev_features_t features);
-void qede_tx_log_print(struct qede_dev *edev, struct qede_fastpath *fp);
 int qede_alloc_rx_buffer(struct qede_rx_queue *rxq, bool allow_lazy);
 int qede_free_tx_pkt(struct qede_dev *edev,
 		     struct qede_tx_queue *txq, int *len);
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index ee7662da6413..f50d9a9b76be 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -539,6 +539,51 @@ static int qede_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	return 0;
 }
 
+static void qede_tx_log_print(struct qede_dev *edev, struct qede_tx_queue *txq)
+{
+	DP_NOTICE(edev,
+		  "Txq[%d]: FW cons [host] %04x, SW cons %04x, SW prod %04x [Jiffies %lu]\n",
+		  txq->index, le16_to_cpu(*txq->hw_cons_ptr),
+		  qed_chain_get_cons_idx(&txq->tx_pbl),
+		  qed_chain_get_prod_idx(&txq->tx_pbl),
+		  jiffies);
+}
+
+static void qede_tx_timeout(struct net_device *dev, unsigned int txqueue)
+{
+	struct qede_dev *edev = netdev_priv(dev);
+	struct qede_tx_queue *txq;
+	int cos;
+
+	netif_carrier_off(dev);
+	DP_NOTICE(edev, "TX timeout on queue %u!\n", txqueue);
+
+	if (!(edev->fp_array[txqueue].type & QEDE_FASTPATH_TX))
+		return;
+
+	for_each_cos_in_txq(edev, cos) {
+		txq = &edev->fp_array[txqueue].txq[cos];
+
+		if (qed_chain_get_cons_idx(&txq->tx_pbl) !=
+		    qed_chain_get_prod_idx(&txq->tx_pbl))
+			qede_tx_log_print(edev, txq);
+	}
+
+	if (IS_VF(edev))
+		return;
+
+	if (test_and_set_bit(QEDE_ERR_IS_HANDLED, &edev->err_flags) ||
+	    edev->state == QEDE_STATE_RECOVERY) {
+		DP_INFO(edev,
+			"Avoid handling a Tx timeout while another HW error is being handled\n");
+		return;
+	}
+
+	set_bit(QEDE_ERR_GET_DBG_INFO, &edev->err_flags);
+	set_bit(QEDE_SP_HW_ERR, &edev->sp_flags);
+	schedule_delayed_work(&edev->sp_task, 0);
+}
+
 static int qede_setup_tc(struct net_device *ndev, u8 num_tc)
 {
 	struct qede_dev *edev = netdev_priv(ndev);
@@ -626,6 +671,7 @@ static const struct net_device_ops qede_netdev_ops = {
 	.ndo_validate_addr = eth_validate_addr,
 	.ndo_change_mtu = qede_change_mtu,
 	.ndo_do_ioctl = qede_ioctl,
+	.ndo_tx_timeout = qede_tx_timeout,
 #ifdef CONFIG_QED_SRIOV
 	.ndo_set_vf_mac = qede_set_vf_mac,
 	.ndo_set_vf_vlan = qede_set_vf_vlan,
-- 
cgit v1.2.3-59-g8ed1b


From 3e99c2111026244326deee5fcaf8ea744627214b Mon Sep 17 00:00:00 2001
From: Igor Russkikh <irusskikh@marvell.com>
Date: Thu, 14 May 2020 12:57:25 +0300
Subject: net: qed: introduce critical fan failure handler

Fan failure is sent by firmware, driver reacts on this error with
newly introduced notification path. It will collect dump and shut down
the device to prevent physical breakage

Signed-off-by: Ariel Elior <ariel.elior@marvell.com>
Signed-off-by: Michal Kalderon <michal.kalderon@marvell.com>
Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_hsi.h |  2 +-
 drivers/net/ethernet/qlogic/qed/qed_mcp.c | 14 ++++++++++++++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index 21d53b00c2e6..ab042b835797 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -12761,7 +12761,7 @@ enum MFW_DRV_MSG_TYPE {
 	MFW_DRV_MSG_GET_FCOE_STATS,
 	MFW_DRV_MSG_GET_ISCSI_STATS,
 	MFW_DRV_MSG_GET_RDMA_STATS,
-	MFW_DRV_MSG_BW_UPDATE10,
+	MFW_DRV_MSG_FAILURE_DETECTED,
 	MFW_DRV_MSG_TRANSCEIVER_STATE_CHANGE,
 	MFW_DRV_MSG_BW_UPDATE11,
 	MFW_DRV_MSG_RESERVED,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index db7cf120527a..d5be2d2046c6 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -1706,6 +1706,17 @@ static void qed_mcp_update_stag(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 		    &resp, &param);
 }
 
+static void qed_mcp_handle_fan_failure(struct qed_hwfn *p_hwfn,
+				       struct qed_ptt *p_ptt)
+{
+	/* A single notification should be sent to upper driver in CMT mode */
+	if (p_hwfn != QED_LEADING_HWFN(p_hwfn->cdev))
+		return;
+
+	qed_hw_err_notify(p_hwfn, p_ptt, QED_HW_ERR_FAN_FAIL,
+			  "Fan failure was detected on the network interface card and it's going to be shut down.\n");
+}
+
 void qed_mcp_read_ufp_config(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	struct public_func shmem_info;
@@ -1852,6 +1863,9 @@ int qed_mcp_handle_events(struct qed_hwfn *p_hwfn,
 		case MFW_DRV_MSG_S_TAG_UPDATE:
 			qed_mcp_update_stag(p_hwfn, p_ptt);
 			break;
+		case MFW_DRV_MSG_FAILURE_DETECTED:
+			qed_mcp_handle_fan_failure(p_hwfn, p_ptt);
+			break;
 		case MFW_DRV_MSG_GET_TLV_REQ:
 			qed_mfw_tlv_req(p_hwfn);
 			break;
-- 
cgit v1.2.3-59-g8ed1b


From ebf64bf4df6331fc612927aae904c310947a02f8 Mon Sep 17 00:00:00 2001
From: Igor Russkikh <irusskikh@marvell.com>
Date: Thu, 14 May 2020 12:57:26 +0300
Subject: net: qed: introduce critical hardware error handler

MCP may signal driver about generic critical failure.
Driver has to collect mdump information (get_retain),
it pushes that to logs and triggers generic notification on
"hardware attention" event.

Signed-off-by: Ariel Elior <ariel.elior@marvell.com>
Signed-off-by: Michal Kalderon <michal.kalderon@marvell.com>
Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_hsi.h |  28 +++++++-
 drivers/net/ethernet/qlogic/qed/qed_mcp.c | 113 ++++++++++++++++++++++++++++++
 drivers/net/ethernet/qlogic/qed/qed_mcp.h |  13 ++++
 3 files changed, 153 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_hsi.h b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
index ab042b835797..f00460d00cab 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_hsi.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_hsi.h
@@ -12400,6 +12400,13 @@ struct load_rsp_stc {
 #define LOAD_RSP_FLAGS0_DRV_EXISTS      (0x1 << 0)
 };
 
+struct mdump_retain_data_stc {
+	u32 valid;
+	u32 epoch;
+	u32 pf;
+	u32 status;
+};
+
 union drv_union_data {
 	u32 ver_str[MCP_DRV_VER_STR_SIZE_DWORD];
 	struct mcp_mac wol_mac;
@@ -12488,6 +12495,8 @@ struct public_drv_mb {
 #define DRV_MSG_CODE_BIST_TEST			0x001e0000
 #define DRV_MSG_CODE_SET_LED_MODE		0x00200000
 #define DRV_MSG_CODE_RESOURCE_CMD		0x00230000
+/* Send crash dump commands with param[3:0] - opcode */
+#define DRV_MSG_CODE_MDUMP_CMD			0x00250000
 #define DRV_MSG_CODE_GET_TLV_DONE		0x002f0000
 #define DRV_MSG_CODE_GET_ENGINE_CONFIG		0x00370000
 #define DRV_MSG_CODE_GET_PPFID_BITMAP		0x43000000
@@ -12519,6 +12528,21 @@ struct public_drv_mb {
 
 #define RESOURCE_DUMP				0
 
+/* DRV_MSG_CODE_MDUMP_CMD parameters */
+#define MDUMP_DRV_PARAM_OPCODE_MASK             0x0000000f
+#define DRV_MSG_CODE_MDUMP_ACK                  0x01
+#define DRV_MSG_CODE_MDUMP_SET_VALUES           0x02
+#define DRV_MSG_CODE_MDUMP_TRIGGER              0x03
+#define DRV_MSG_CODE_MDUMP_GET_CONFIG           0x04
+#define DRV_MSG_CODE_MDUMP_SET_ENABLE           0x05
+#define DRV_MSG_CODE_MDUMP_CLEAR_LOGS           0x06
+#define DRV_MSG_CODE_MDUMP_GET_RETAIN           0x07
+#define DRV_MSG_CODE_MDUMP_CLR_RETAIN           0x08
+
+#define DRV_MSG_CODE_HW_DUMP_TRIGGER            0x0a
+#define DRV_MSG_CODE_MDUMP_GEN_MDUMP2           0x0b
+#define DRV_MSG_CODE_MDUMP_FREE_MDUMP2          0x0c
+
 #define DRV_MSG_CODE_GET_PF_RDMA_PROTOCOL	0x002b0000
 #define DRV_MSG_CODE_OS_WOL			0x002e0000
 
@@ -12697,6 +12721,8 @@ struct public_drv_mb {
 #define FW_MSG_CODE_DEBUG_NOT_ENABLED		0xb00a0000
 #define FW_MSG_CODE_DEBUG_DATA_SEND_OK		0xb00b0000
 
+#define FW_MSG_CODE_MDUMP_INVALID_CMD		0x00030000
+
 	u32 fw_mb_param;
 #define FW_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_MASK	0xFFFF0000
 #define FW_MB_PARAM_RESOURCE_ALLOC_VERSION_MAJOR_SHIFT	16
@@ -12763,7 +12789,7 @@ enum MFW_DRV_MSG_TYPE {
 	MFW_DRV_MSG_GET_RDMA_STATS,
 	MFW_DRV_MSG_FAILURE_DETECTED,
 	MFW_DRV_MSG_TRANSCEIVER_STATE_CHANGE,
-	MFW_DRV_MSG_BW_UPDATE11,
+	MFW_DRV_MSG_CRITICAL_ERROR_OCCURRED,
 	MFW_DRV_MSG_RESERVED,
 	MFW_DRV_MSG_GET_TLV_REQ,
 	MFW_DRV_MSG_OEM_CFG_UPDATE,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.c b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
index d5be2d2046c6..9624616806e7 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.c
@@ -1717,6 +1717,116 @@ static void qed_mcp_handle_fan_failure(struct qed_hwfn *p_hwfn,
 			  "Fan failure was detected on the network interface card and it's going to be shut down.\n");
 }
 
+struct qed_mdump_cmd_params {
+	u32 cmd;
+	void *p_data_src;
+	u8 data_src_size;
+	void *p_data_dst;
+	u8 data_dst_size;
+	u32 mcp_resp;
+};
+
+static int
+qed_mcp_mdump_cmd(struct qed_hwfn *p_hwfn,
+		  struct qed_ptt *p_ptt,
+		  struct qed_mdump_cmd_params *p_mdump_cmd_params)
+{
+	struct qed_mcp_mb_params mb_params;
+	int rc;
+
+	memset(&mb_params, 0, sizeof(mb_params));
+	mb_params.cmd = DRV_MSG_CODE_MDUMP_CMD;
+	mb_params.param = p_mdump_cmd_params->cmd;
+	mb_params.p_data_src = p_mdump_cmd_params->p_data_src;
+	mb_params.data_src_size = p_mdump_cmd_params->data_src_size;
+	mb_params.p_data_dst = p_mdump_cmd_params->p_data_dst;
+	mb_params.data_dst_size = p_mdump_cmd_params->data_dst_size;
+	rc = qed_mcp_cmd_and_union(p_hwfn, p_ptt, &mb_params);
+	if (rc)
+		return rc;
+
+	p_mdump_cmd_params->mcp_resp = mb_params.mcp_resp;
+
+	if (p_mdump_cmd_params->mcp_resp == FW_MSG_CODE_MDUMP_INVALID_CMD) {
+		DP_INFO(p_hwfn,
+			"The mdump sub command is unsupported by the MFW [mdump_cmd 0x%x]\n",
+			p_mdump_cmd_params->cmd);
+		rc = -EOPNOTSUPP;
+	} else if (p_mdump_cmd_params->mcp_resp == FW_MSG_CODE_UNSUPPORTED) {
+		DP_INFO(p_hwfn,
+			"The mdump command is not supported by the MFW\n");
+		rc = -EOPNOTSUPP;
+	}
+
+	return rc;
+}
+
+static int qed_mcp_mdump_ack(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
+{
+	struct qed_mdump_cmd_params mdump_cmd_params;
+
+	memset(&mdump_cmd_params, 0, sizeof(mdump_cmd_params));
+	mdump_cmd_params.cmd = DRV_MSG_CODE_MDUMP_ACK;
+
+	return qed_mcp_mdump_cmd(p_hwfn, p_ptt, &mdump_cmd_params);
+}
+
+int
+qed_mcp_mdump_get_retain(struct qed_hwfn *p_hwfn,
+			 struct qed_ptt *p_ptt,
+			 struct mdump_retain_data_stc *p_mdump_retain)
+{
+	struct qed_mdump_cmd_params mdump_cmd_params;
+	int rc;
+
+	memset(&mdump_cmd_params, 0, sizeof(mdump_cmd_params));
+	mdump_cmd_params.cmd = DRV_MSG_CODE_MDUMP_GET_RETAIN;
+	mdump_cmd_params.p_data_dst = p_mdump_retain;
+	mdump_cmd_params.data_dst_size = sizeof(*p_mdump_retain);
+
+	rc = qed_mcp_mdump_cmd(p_hwfn, p_ptt, &mdump_cmd_params);
+	if (rc)
+		return rc;
+
+	if (mdump_cmd_params.mcp_resp != FW_MSG_CODE_OK) {
+		DP_INFO(p_hwfn,
+			"Failed to get the mdump retained data [mcp_resp 0x%x]\n",
+			mdump_cmd_params.mcp_resp);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static void qed_mcp_handle_critical_error(struct qed_hwfn *p_hwfn,
+					  struct qed_ptt *p_ptt)
+{
+	struct mdump_retain_data_stc mdump_retain;
+	int rc;
+
+	/* In CMT mode - no need for more than a single acknowledgment to the
+	 * MFW, and no more than a single notification to the upper driver.
+	 */
+	if (p_hwfn != QED_LEADING_HWFN(p_hwfn->cdev))
+		return;
+
+	rc = qed_mcp_mdump_get_retain(p_hwfn, p_ptt, &mdump_retain);
+	if (rc == 0 && mdump_retain.valid)
+		DP_NOTICE(p_hwfn,
+			  "The MFW notified that a critical error occurred in the device [epoch 0x%08x, pf 0x%x, status 0x%08x]\n",
+			  mdump_retain.epoch,
+			  mdump_retain.pf, mdump_retain.status);
+	else
+		DP_NOTICE(p_hwfn,
+			  "The MFW notified that a critical error occurred in the device\n");
+
+	DP_NOTICE(p_hwfn,
+		  "Acknowledging the notification to not allow the MFW crash dump [driver debug data collection is preferable]\n");
+	qed_mcp_mdump_ack(p_hwfn, p_ptt);
+
+	qed_hw_err_notify(p_hwfn, p_ptt, QED_HW_ERR_HW_ATTN, NULL);
+}
+
 void qed_mcp_read_ufp_config(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt)
 {
 	struct public_func shmem_info;
@@ -1866,6 +1976,9 @@ int qed_mcp_handle_events(struct qed_hwfn *p_hwfn,
 		case MFW_DRV_MSG_FAILURE_DETECTED:
 			qed_mcp_handle_fan_failure(p_hwfn, p_ptt);
 			break;
+		case MFW_DRV_MSG_CRITICAL_ERROR_OCCURRED:
+			qed_mcp_handle_critical_error(p_hwfn, p_ptt);
+			break;
 		case MFW_DRV_MSG_GET_TLV_REQ:
 			qed_mfw_tlv_req(p_hwfn);
 			break;
diff --git a/drivers/net/ethernet/qlogic/qed/qed_mcp.h b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
index bc248418a5f5..5750b4c5ef63 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_mcp.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_mcp.h
@@ -1016,6 +1016,19 @@ int __qed_configure_pf_min_bandwidth(struct qed_hwfn *p_hwfn,
 int qed_mcp_mask_parities(struct qed_hwfn *p_hwfn,
 			  struct qed_ptt *p_ptt, u32 mask_parities);
 
+/* @brief - Gets the mdump retained data from the MFW.
+ *
+ * @param p_hwfn
+ * @param p_ptt
+ * @param p_mdump_retain
+ *
+ * @param return 0 upon success.
+ */
+int
+qed_mcp_mdump_get_retain(struct qed_hwfn *p_hwfn,
+			 struct qed_ptt *p_ptt,
+			 struct mdump_retain_data_stc *p_mdump_retain);
+
 /**
  * @brief - Sets the MFW's max value for the given resource
  *
-- 
cgit v1.2.3-59-g8ed1b


From 8f76812e1cc4d561c3efc3b2586c686b5428d31f Mon Sep 17 00:00:00 2001
From: Igor Russkikh <irusskikh@marvell.com>
Date: Thu, 14 May 2020 12:57:27 +0300
Subject: net: qed: fix bad formatting

On some adjacent code, fix bad code formatting

Signed-off-by: Ariel Elior <ariel.elior@marvell.com>
Signed-off-by: Michal Kalderon <michal.kalderon@marvell.com>
Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/qed/qed_if.h | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h
index 978e91e9ab65..48325d7790f8 100644
--- a/include/linux/qed/qed_if.h
+++ b/include/linux/qed/qed_if.h
@@ -821,12 +821,11 @@ enum qed_nvm_flash_cmd {
 
 struct qed_common_cb_ops {
 	void (*arfs_filter_op)(void *dev, void *fltr, u8 fw_rc);
-	void	(*link_update)(void			*dev,
-			       struct qed_link_output	*link);
+	void (*link_update)(void *dev, struct qed_link_output *link);
 	void (*schedule_recovery_handler)(void *dev);
 	void (*schedule_hw_err_handler)(void *dev,
 					enum qed_hw_err_type err_type);
-	void	(*dcbx_aen)(void *dev, struct qed_dcbx_get *get, u32 mib_type);
+	void (*dcbx_aen)(void *dev, struct qed_dcbx_get *get, u32 mib_type);
 	void (*get_generic_tlv_data)(void *dev, struct qed_generic_tlvs *data);
 	void (*get_protocol_tlv_data)(void *dev, void *data);
 };
-- 
cgit v1.2.3-59-g8ed1b


From ec008fa2a9e5686081053750893de5f407a8d076 Mon Sep 17 00:00:00 2001
From: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
Date: Wed, 13 May 2020 09:26:14 -0400
Subject: ethernet: ti: am65-cpts: add routines to support taprio offload

TAPRIO/EST offload support in CPSW2G requires EST scheduler
function enabled in CPTS. So this patch add a function to
set cycle time for EST scheduler.  It also add a function for
getting time in ns of PHC clock for taprio qdisc configuration.
Mostly to verify if timer update is needed or to get actual
state of oper/admin schedule.

Signed-off-by: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
Signed-off-by: Murali Karicheri <m-karicheri2@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/am65-cpts.c | 48 +++++++++++++++++++++++++++++++++++++
 drivers/net/ethernet/ti/am65-cpts.h | 24 +++++++++++++++++++
 2 files changed, 72 insertions(+)

diff --git a/drivers/net/ethernet/ti/am65-cpts.c b/drivers/net/ethernet/ti/am65-cpts.c
index 51c94b2a77b1..c59a289e428c 100644
--- a/drivers/net/ethernet/ti/am65-cpts.c
+++ b/drivers/net/ethernet/ti/am65-cpts.c
@@ -450,6 +450,19 @@ static int am65_cpts_ptp_gettimex(struct ptp_clock_info *ptp,
 	return 0;
 }
 
+u64 am65_cpts_ns_gettime(struct am65_cpts *cpts)
+{
+	u64 ns;
+
+	/* reuse ptp_clk_lock as it serialize ts push */
+	mutex_lock(&cpts->ptp_clk_lock);
+	ns = am65_cpts_gettime(cpts, NULL);
+	mutex_unlock(&cpts->ptp_clk_lock);
+
+	return ns;
+}
+EXPORT_SYMBOL_GPL(am65_cpts_ns_gettime);
+
 static int am65_cpts_ptp_settime(struct ptp_clock_info *ptp,
 				 const struct timespec64 *ts)
 {
@@ -494,6 +507,41 @@ static int am65_cpts_extts_enable(struct am65_cpts *cpts, u32 index, int on)
 	return 0;
 }
 
+int am65_cpts_estf_enable(struct am65_cpts *cpts, int idx,
+			  struct am65_cpts_estf_cfg *cfg)
+{
+	u64 cycles;
+	u32 val;
+
+	cycles = cfg->ns_period * cpts->refclk_freq;
+	cycles = DIV_ROUND_UP(cycles, NSEC_PER_SEC);
+	if (cycles > U32_MAX)
+		return -EINVAL;
+
+	/* according to TRM should be zeroed */
+	am65_cpts_write32(cpts, 0, estf[idx].length);
+
+	val = upper_32_bits(cfg->ns_start);
+	am65_cpts_write32(cpts, val, estf[idx].comp_hi);
+	val = lower_32_bits(cfg->ns_start);
+	am65_cpts_write32(cpts, val, estf[idx].comp_lo);
+	val = lower_32_bits(cycles);
+	am65_cpts_write32(cpts, val, estf[idx].length);
+
+	dev_dbg(cpts->dev, "%s: ESTF:%u enabled\n", __func__, idx);
+
+	return 0;
+}
+EXPORT_SYMBOL_GPL(am65_cpts_estf_enable);
+
+void am65_cpts_estf_disable(struct am65_cpts *cpts, int idx)
+{
+	am65_cpts_write32(cpts, 0, estf[idx].length);
+
+	dev_dbg(cpts->dev, "%s: ESTF:%u disabled\n", __func__, idx);
+}
+EXPORT_SYMBOL_GPL(am65_cpts_estf_disable);
+
 static void am65_cpts_perout_enable_hw(struct am65_cpts *cpts,
 				       struct ptp_perout_request *req, int on)
 {
diff --git a/drivers/net/ethernet/ti/am65-cpts.h b/drivers/net/ethernet/ti/am65-cpts.h
index 0b55dc12ba48..98c1960b20b9 100644
--- a/drivers/net/ethernet/ti/am65-cpts.h
+++ b/drivers/net/ethernet/ti/am65-cpts.h
@@ -12,6 +12,11 @@
 
 struct am65_cpts;
 
+struct am65_cpts_estf_cfg {
+	u64 ns_period;
+	u64 ns_start;
+};
+
 #if IS_ENABLED(CONFIG_TI_K3_AM65_CPTS)
 struct am65_cpts *am65_cpts_create(struct device *dev, void __iomem *regs,
 				   struct device_node *node);
@@ -19,6 +24,10 @@ int am65_cpts_phc_index(struct am65_cpts *cpts);
 void am65_cpts_tx_timestamp(struct am65_cpts *cpts, struct sk_buff *skb);
 void am65_cpts_prep_tx_timestamp(struct am65_cpts *cpts, struct sk_buff *skb);
 void am65_cpts_rx_enable(struct am65_cpts *cpts, bool en);
+u64 am65_cpts_ns_gettime(struct am65_cpts *cpts);
+int am65_cpts_estf_enable(struct am65_cpts *cpts, int idx,
+			  struct am65_cpts_estf_cfg *cfg);
+void am65_cpts_estf_disable(struct am65_cpts *cpts, int idx);
 #else
 static inline struct am65_cpts *am65_cpts_create(struct device *dev,
 						 void __iomem *regs,
@@ -45,6 +54,21 @@ static inline void am65_cpts_prep_tx_timestamp(struct am65_cpts *cpts,
 static inline void am65_cpts_rx_enable(struct am65_cpts *cpts, bool en)
 {
 }
+
+static s64 am65_cpts_ns_gettime(struct am65_cpts *cpts)
+{
+	return 0;
+}
+
+static int am65_cpts_estf_enable(struct am65_cpts *cpts,
+				 int idx, struct am65_cpts_estf_cfg *cfg)
+{
+	return 0;
+}
+
+static void am65_cpts_estf_disable(struct am65_cpts *cpts, int idx)
+{
+}
 #endif
 
 #endif /* K3_CPTS_H_ */
-- 
cgit v1.2.3-59-g8ed1b


From 8127224c2708aa1558e2be7bbd1b7e9b07860de6 Mon Sep 17 00:00:00 2001
From: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
Date: Wed, 13 May 2020 09:26:15 -0400
Subject: ethernet: ti: am65-cpsw-qos: add TAPRIO offload support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

AM65 CPSW h/w supports Enhanced Scheduled Traffic (EST – defined
in P802.1Qbv/D2.2 that later got included in IEEE 802.1Q-2018)
configuration. EST allows express queue traffic to be scheduled
(placed) on the wire at specific repeatable time intervals. In
Linux kernel, EST configuration is done through tc command and
the taprio scheduler in the net core implements a software only
scheduler (SCH_TAPRIO). If the NIC is capable of EST configuration,
user indicate "flag 2" in the command which is then parsed by
taprio scheduler in net core and indicate that the command is to
be offloaded to h/w. taprio then offloads the command to the
driver by calling ndo_setup_tc() ndo ops. This patch implements
ndo_setup_tc() to offload EST configuration to CPSW h/w.

Currently driver supports only SetGateStates operation. EST
operates on a repeating time interval generated by the CPTS EST
function generator. Each Ethernet port has a global EST fetch
RAM that can be configured as 2 buffers, each of 64 locations
or one large buffer of 128 locations. In 2 buffer configuration,
a ping pong mechanism is used to hold the active schedule (oper)
in one buffer and new (admin) command in the other. Each 22-bit
fetch command consists of a 14-bit fetch count (14 MSB’s) and an
8-bit priority fetch allow (8 LSB’s) that will be applied for the
fetch count time in wireside clocks. Driver process each of the
sched-entry in the offload command and update the fetch RAM.
Driver configures duration in sched-entry into the fetch count
and Gate mask into the priority fetch bits of the RAM. Then
configures the CPTS EST function generator to activate the
schedule. Currently driver supports only 2 buffer configuration
which means driver supports a max cycle time of ~8 msec.

CPSW supports a configurable number of priority queues (up to 8)
and needs to be switched to this mode from the default round
robin mode before EST can be offloaded. User configures
these through ethtool commands (-L for changing number of
queues and --set-priv-flags to disable round robin mode).
Driver doesn't enable EST if pf_p0_rx_ptype_rrobin privat flag
is set. The flag is common for all ports, and so can't be just
overridden by taprio configuration w/o user involvement.
Command fails if pf_p0_rx_ptype_rrobin is already set in the
driver.

Scheds (commands) configuration depends on interface speed so
driver translates the duration to the fetch count based on
link speed. Each schedule can be constructed with several
command entries in fetch RAM  depending on interval. For example
if each sched has timer interval < ~130us on 1000 Mb link then
each sched consumes one command and have 1:1 mapping. When
Ethernet link goes down, driver purge the configuration if link
is down for more than 1 second.

The patch allows to update the timer and scheds memory only if it's
really needed, and skip cases required the user to stop timer by
configuring only shceds memory.

Signed-off-by: Ivan Khoronzhuk <ivan.khoronzhuk@linaro.org>
Signed-off-by: Murali Karicheri <m-karicheri2@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/Kconfig             |   9 +
 drivers/net/ethernet/ti/Makefile            |   2 +-
 drivers/net/ethernet/ti/am65-cpsw-ethtool.c |  12 +-
 drivers/net/ethernet/ti/am65-cpsw-nuss.c    |   9 +
 drivers/net/ethernet/ti/am65-cpsw-nuss.h    |   5 +
 drivers/net/ethernet/ti/am65-cpsw-qos.c     | 626 ++++++++++++++++++++++++++++
 drivers/net/ethernet/ti/am65-cpsw-qos.h     |  29 ++
 7 files changed, 689 insertions(+), 3 deletions(-)
 create mode 100644 drivers/net/ethernet/ti/am65-cpsw-qos.c
 create mode 100644 drivers/net/ethernet/ti/am65-cpsw-qos.h

diff --git a/drivers/net/ethernet/ti/Kconfig b/drivers/net/ethernet/ti/Kconfig
index 988e907e3322..4d4852f00ff7 100644
--- a/drivers/net/ethernet/ti/Kconfig
+++ b/drivers/net/ethernet/ti/Kconfig
@@ -123,6 +123,15 @@ config TI_K3_AM65_CPTS
 	  protocol, Ethernet Enhanced Scheduled Traffic Operations (CPTS_ESTFn)
 	  and PCIe Subsystem Precision Time Measurement (PTM).
 
+config TI_AM65_CPSW_TAS
+	bool "Enable TAS offload in AM65 CPSW"
+	depends on TI_K3_AM65_CPSW_NUSS && NET_SCH_TAPRIO && TI_K3_AM65_CPTS
+	help
+	  Say y here to support Time Aware Shaper(TAS) offload in AM65 CPSW.
+	  AM65 CPSW hardware supports Enhanced Scheduled Traffic (EST)
+	  defined in IEEE 802.1Q 2018. The EST scheduler runs on CPTS and the
+	  TAS/EST schedule is updated in the Fetch RAM memory of the CPSW.
+
 config TI_KEYSTONE_NETCP
 	tristate "TI Keystone NETCP Core Support"
 	select TI_DAVINCI_MDIO
diff --git a/drivers/net/ethernet/ti/Makefile b/drivers/net/ethernet/ti/Makefile
index bf86067f9b4c..be95512d80b5 100644
--- a/drivers/net/ethernet/ti/Makefile
+++ b/drivers/net/ethernet/ti/Makefile
@@ -25,5 +25,5 @@ obj-$(CONFIG_TI_KEYSTONE_NETCP_ETHSS) += keystone_netcp_ethss.o
 keystone_netcp_ethss-y := netcp_ethss.o netcp_sgmii.o netcp_xgbepcsr.o cpsw_ale.o
 
 obj-$(CONFIG_TI_K3_AM65_CPSW_NUSS) += ti-am65-cpsw-nuss.o
-ti-am65-cpsw-nuss-y := am65-cpsw-nuss.o cpsw_sl.o am65-cpsw-ethtool.o cpsw_ale.o k3-cppi-desc-pool.o
+ti-am65-cpsw-nuss-y := am65-cpsw-nuss.o cpsw_sl.o am65-cpsw-ethtool.o cpsw_ale.o k3-cppi-desc-pool.o am65-cpsw-qos.o
 obj-$(CONFIG_TI_K3_AM65_CPTS) += am65-cpts.o
diff --git a/drivers/net/ethernet/ti/am65-cpsw-ethtool.c b/drivers/net/ethernet/ti/am65-cpsw-ethtool.c
index 23661a6ed426..8c4690f3ebcb 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-ethtool.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-ethtool.c
@@ -730,9 +730,17 @@ static u32 am65_cpsw_get_ethtool_priv_flags(struct net_device *ndev)
 static int am65_cpsw_set_ethtool_priv_flags(struct net_device *ndev, u32 flags)
 {
 	struct am65_cpsw_common *common = am65_ndev_to_common(ndev);
+	int rrobin;
 
-	common->pf_p0_rx_ptype_rrobin =
-			!!(flags & AM65_CPSW_PRIV_P0_RX_PTYPE_RROBIN);
+	rrobin = !!(flags & AM65_CPSW_PRIV_P0_RX_PTYPE_RROBIN);
+
+	if (common->est_enabled && rrobin) {
+		netdev_err(ndev,
+			   "p0-rx-ptype-rrobin flag conflicts with QOS\n");
+		return -EINVAL;
+	}
+
+	common->pf_p0_rx_ptype_rrobin = rrobin;
 	am65_cpsw_nuss_set_p0_ptype(common);
 
 	return 0;
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.c b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
index 8cdbb2b9b13a..4a8229864ae4 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.c
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.c
@@ -37,12 +37,14 @@
 #define AM65_CPSW_XGMII_BASE	0x2100
 #define AM65_CPSW_CPSW_NU_BASE	0x20000
 #define AM65_CPSW_NU_PORTS_BASE	0x1000
+#define AM65_CPSW_NU_FRAM_BASE	0x12000
 #define AM65_CPSW_NU_STATS_BASE	0x1a000
 #define AM65_CPSW_NU_ALE_BASE	0x1e000
 #define AM65_CPSW_NU_CPTS_BASE	0x1d000
 
 #define AM65_CPSW_NU_PORTS_OFFSET	0x1000
 #define AM65_CPSW_NU_STATS_PORT_OFFSET	0x200
+#define AM65_CPSW_NU_FRAM_PORT_OFFSET	0x200
 
 #define AM65_CPSW_MAX_PORTS	8
 
@@ -188,9 +190,11 @@ void am65_cpsw_nuss_adjust_link(struct net_device *ndev)
 		cpsw_ale_control_set(common->ale, port->port_id,
 				     ALE_PORT_STATE, ALE_PORT_STATE_FORWARD);
 
+		am65_cpsw_qos_link_up(ndev, phy->speed);
 		netif_tx_wake_all_queues(ndev);
 	} else {
 		int tmo;
+
 		/* disable forwarding */
 		cpsw_ale_control_set(common->ale, port->port_id,
 				     ALE_PORT_STATE, ALE_PORT_STATE_DISABLE);
@@ -204,6 +208,7 @@ void am65_cpsw_nuss_adjust_link(struct net_device *ndev)
 
 		cpsw_sl_ctl_reset(port->slave.mac_sl);
 
+		am65_cpsw_qos_link_down(ndev);
 		netif_tx_stop_all_queues(ndev);
 	}
 
@@ -1378,6 +1383,7 @@ static const struct net_device_ops am65_cpsw_nuss_netdev_ops_2g = {
 	.ndo_vlan_rx_kill_vid	= am65_cpsw_nuss_ndo_slave_kill_vid,
 	.ndo_do_ioctl		= am65_cpsw_nuss_ndo_slave_ioctl,
 	.ndo_set_features	= am65_cpsw_nuss_ndo_slave_set_features,
+	.ndo_setup_tc           = am65_cpsw_qos_ndo_setup_tc,
 };
 
 static void am65_cpsw_nuss_slave_disable_unused(struct am65_cpsw_port *port)
@@ -1739,6 +1745,9 @@ static int am65_cpsw_nuss_init_slave_ports(struct am65_cpsw_common *common)
 		port->stat_base = common->cpsw_base + AM65_CPSW_NU_STATS_BASE +
 				  (AM65_CPSW_NU_STATS_PORT_OFFSET * port_id);
 		port->name = of_get_property(port_np, "label", NULL);
+		port->fetch_ram_base =
+				common->cpsw_base + AM65_CPSW_NU_FRAM_BASE +
+				(AM65_CPSW_NU_FRAM_PORT_OFFSET * (port_id - 1));
 
 		port->disabled = !of_device_is_available(port_np);
 		if (port->disabled)
diff --git a/drivers/net/ethernet/ti/am65-cpsw-nuss.h b/drivers/net/ethernet/ti/am65-cpsw-nuss.h
index 8a6382188cb5..9faf4fb1409b 100644
--- a/drivers/net/ethernet/ti/am65-cpsw-nuss.h
+++ b/drivers/net/ethernet/ti/am65-cpsw-nuss.h
@@ -9,7 +9,9 @@
 #include <linux/kernel.h>
 #include <linux/module.h>
 #include <linux/netdevice.h>
+#include <linux/phy.h>
 #include <linux/platform_device.h>
+#include "am65-cpsw-qos.h"
 
 struct am65_cpts;
 
@@ -38,10 +40,12 @@ struct am65_cpsw_port {
 	u32				port_id;
 	void __iomem			*port_base;
 	void __iomem			*stat_base;
+	void __iomem			*fetch_ram_base;
 	bool				disabled;
 	struct am65_cpsw_slave_data	slave;
 	bool				tx_ts_enabled;
 	bool				rx_ts_enabled;
+	struct am65_cpsw_qos		qos;
 };
 
 struct am65_cpsw_host {
@@ -104,6 +108,7 @@ struct am65_cpsw_common {
 	u32			cpsw_ver;
 	bool			pf_p0_rx_ptype_rrobin;
 	struct am65_cpts	*cpts;
+	int			est_enabled;
 };
 
 struct am65_cpsw_ndev_stats {
diff --git a/drivers/net/ethernet/ti/am65-cpsw-qos.c b/drivers/net/ethernet/ti/am65-cpsw-qos.c
new file mode 100644
index 000000000000..32eac04468bb
--- /dev/null
+++ b/drivers/net/ethernet/ti/am65-cpsw-qos.c
@@ -0,0 +1,626 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Texas Instruments K3 AM65 Ethernet QoS submodule
+ * Copyright (C) 2020 Texas Instruments Incorporated - http://www.ti.com/
+ *
+ * quality of service module includes:
+ * Enhanced Scheduler Traffic (EST - P802.1Qbv/D2.2)
+ */
+
+#include <linux/pm_runtime.h>
+#include <linux/time.h>
+
+#include "am65-cpsw-nuss.h"
+#include "am65-cpsw-qos.h"
+#include "am65-cpts.h"
+
+#define AM65_CPSW_REG_CTL			0x004
+#define AM65_CPSW_PN_REG_CTL			0x004
+#define AM65_CPSW_PN_REG_FIFO_STATUS		0x050
+#define AM65_CPSW_PN_REG_EST_CTL		0x060
+
+/* AM65_CPSW_REG_CTL register fields */
+#define AM65_CPSW_CTL_EST_EN			BIT(18)
+
+/* AM65_CPSW_PN_REG_CTL register fields */
+#define AM65_CPSW_PN_CTL_EST_PORT_EN		BIT(17)
+
+/* AM65_CPSW_PN_REG_EST_CTL register fields */
+#define AM65_CPSW_PN_EST_ONEBUF			BIT(0)
+#define AM65_CPSW_PN_EST_BUFSEL			BIT(1)
+#define AM65_CPSW_PN_EST_TS_EN			BIT(2)
+#define AM65_CPSW_PN_EST_TS_FIRST		BIT(3)
+#define AM65_CPSW_PN_EST_ONEPRI			BIT(4)
+#define AM65_CPSW_PN_EST_TS_PRI_MSK		GENMASK(7, 5)
+
+/* AM65_CPSW_PN_REG_FIFO_STATUS register fields */
+#define AM65_CPSW_PN_FST_TX_PRI_ACTIVE_MSK	GENMASK(7, 0)
+#define AM65_CPSW_PN_FST_TX_E_MAC_ALLOW_MSK	GENMASK(15, 8)
+#define AM65_CPSW_PN_FST_EST_CNT_ERR		BIT(16)
+#define AM65_CPSW_PN_FST_EST_ADD_ERR		BIT(17)
+#define AM65_CPSW_PN_FST_EST_BUFACT		BIT(18)
+
+/* EST FETCH COMMAND RAM */
+#define AM65_CPSW_FETCH_RAM_CMD_NUM		0x80
+#define AM65_CPSW_FETCH_CNT_MSK			GENMASK(21, 8)
+#define AM65_CPSW_FETCH_CNT_MAX			(AM65_CPSW_FETCH_CNT_MSK >> 8)
+#define AM65_CPSW_FETCH_CNT_OFFSET		8
+#define AM65_CPSW_FETCH_ALLOW_MSK		GENMASK(7, 0)
+#define AM65_CPSW_FETCH_ALLOW_MAX		AM65_CPSW_FETCH_ALLOW_MSK
+
+enum timer_act {
+	TACT_PROG,		/* need program timer */
+	TACT_NEED_STOP,		/* need stop first */
+	TACT_SKIP_PROG,		/* just buffer can be updated */
+};
+
+static int am65_cpsw_port_est_enabled(struct am65_cpsw_port *port)
+{
+	return port->qos.est_oper || port->qos.est_admin;
+}
+
+static void am65_cpsw_est_enable(struct am65_cpsw_common *common, int enable)
+{
+	u32 val;
+
+	val = readl(common->cpsw_base + AM65_CPSW_REG_CTL);
+
+	if (enable)
+		val |= AM65_CPSW_CTL_EST_EN;
+	else
+		val &= ~AM65_CPSW_CTL_EST_EN;
+
+	writel(val, common->cpsw_base + AM65_CPSW_REG_CTL);
+	common->est_enabled = enable;
+}
+
+static void am65_cpsw_port_est_enable(struct am65_cpsw_port *port, int enable)
+{
+	u32 val;
+
+	val = readl(port->port_base + AM65_CPSW_PN_REG_CTL);
+	if (enable)
+		val |= AM65_CPSW_PN_CTL_EST_PORT_EN;
+	else
+		val &= ~AM65_CPSW_PN_CTL_EST_PORT_EN;
+
+	writel(val, port->port_base + AM65_CPSW_PN_REG_CTL);
+}
+
+/* target new EST RAM buffer, actual toggle happens after cycle completion */
+static void am65_cpsw_port_est_assign_buf_num(struct net_device *ndev,
+					      int buf_num)
+{
+	struct am65_cpsw_port *port = am65_ndev_to_port(ndev);
+	u32 val;
+
+	val = readl(port->port_base + AM65_CPSW_PN_REG_EST_CTL);
+	if (buf_num)
+		val |= AM65_CPSW_PN_EST_BUFSEL;
+	else
+		val &= ~AM65_CPSW_PN_EST_BUFSEL;
+
+	writel(val, port->port_base + AM65_CPSW_PN_REG_EST_CTL);
+}
+
+/* am65_cpsw_port_est_is_swapped() - Indicate if h/w is transitioned
+ * admin -> oper or not
+ *
+ * Return true if already transitioned. i.e oper is equal to admin and buf
+ * numbers match (est_oper->buf match with est_admin->buf).
+ * false if before transition. i.e oper is not equal to admin, (i.e a
+ * previous admin command is waiting to be transitioned to oper state
+ * and est_oper->buf not match with est_oper->buf).
+ */
+static int am65_cpsw_port_est_is_swapped(struct net_device *ndev, int *oper,
+					 int *admin)
+{
+	struct am65_cpsw_port *port = am65_ndev_to_port(ndev);
+	u32 val;
+
+	val = readl(port->port_base + AM65_CPSW_PN_REG_FIFO_STATUS);
+	*oper = !!(val & AM65_CPSW_PN_FST_EST_BUFACT);
+
+	val = readl(port->port_base + AM65_CPSW_PN_REG_EST_CTL);
+	*admin = !!(val & AM65_CPSW_PN_EST_BUFSEL);
+
+	return *admin == *oper;
+}
+
+/* am65_cpsw_port_est_get_free_buf_num() - Get free buffer number for
+ * Admin to program the new schedule.
+ *
+ * Logic as follows:-
+ * If oper is same as admin, return the other buffer (!oper) as the admin
+ * buffer.  If oper is not the same, driver let the current oper to continue
+ * as it is in the process of transitioning from admin -> oper. So keep the
+ * oper by selecting the same oper buffer by writing to EST_BUFSEL bit in
+ * EST CTL register. In the second iteration they will match and code returns.
+ * The actual buffer to write command is selected later before it is ready
+ * to update the schedule.
+ */
+static int am65_cpsw_port_est_get_free_buf_num(struct net_device *ndev)
+{
+	int oper, admin;
+	int roll = 2;
+
+	while (roll--) {
+		if (am65_cpsw_port_est_is_swapped(ndev, &oper, &admin))
+			return !oper;
+
+		/* admin is not set, so hinder transition as it's not allowed
+		 * to touch memory in-flight, by targeting same oper buf.
+		 */
+		am65_cpsw_port_est_assign_buf_num(ndev, oper);
+
+		dev_info(&ndev->dev,
+			 "Prev. EST admin cycle is in transit %d -> %d\n",
+			 oper, admin);
+	}
+
+	return admin;
+}
+
+static void am65_cpsw_admin_to_oper(struct net_device *ndev)
+{
+	struct am65_cpsw_port *port = am65_ndev_to_port(ndev);
+
+	if (port->qos.est_oper)
+		devm_kfree(&ndev->dev, port->qos.est_oper);
+
+	port->qos.est_oper = port->qos.est_admin;
+	port->qos.est_admin = NULL;
+}
+
+static void am65_cpsw_port_est_get_buf_num(struct net_device *ndev,
+					   struct am65_cpsw_est *est_new)
+{
+	struct am65_cpsw_port *port = am65_ndev_to_port(ndev);
+	u32 val;
+
+	val = readl(port->port_base + AM65_CPSW_PN_REG_EST_CTL);
+	val &= ~AM65_CPSW_PN_EST_ONEBUF;
+	writel(val, port->port_base + AM65_CPSW_PN_REG_EST_CTL);
+
+	est_new->buf = am65_cpsw_port_est_get_free_buf_num(ndev);
+
+	/* rolled buf num means changed buf while configuring */
+	if (port->qos.est_oper && port->qos.est_admin &&
+	    est_new->buf == port->qos.est_oper->buf)
+		am65_cpsw_admin_to_oper(ndev);
+}
+
+static void am65_cpsw_est_set(struct net_device *ndev, int enable)
+{
+	struct am65_cpsw_port *port = am65_ndev_to_port(ndev);
+	struct am65_cpsw_common *common = port->common;
+	int common_enable = 0;
+	int i;
+
+	am65_cpsw_port_est_enable(port, enable);
+
+	for (i = 0; i < common->port_num; i++)
+		common_enable |= am65_cpsw_port_est_enabled(&common->ports[i]);
+
+	common_enable |= enable;
+	am65_cpsw_est_enable(common, common_enable);
+}
+
+/* This update is supposed to be used in any routine before getting real state
+ * of admin -> oper transition, particularly it's supposed to be used in some
+ * generic routine for providing real state to Taprio Qdisc.
+ */
+static void am65_cpsw_est_update_state(struct net_device *ndev)
+{
+	struct am65_cpsw_port *port = am65_ndev_to_port(ndev);
+	int oper, admin;
+
+	if (!port->qos.est_admin)
+		return;
+
+	if (!am65_cpsw_port_est_is_swapped(ndev, &oper, &admin))
+		return;
+
+	am65_cpsw_admin_to_oper(ndev);
+}
+
+/* Fetch command count it's number of bytes in Gigabit mode or nibbles in
+ * 10/100Mb mode. So, having speed and time in ns, recalculate ns to number of
+ * bytes/nibbles that can be sent while transmission on given speed.
+ */
+static int am65_est_cmd_ns_to_cnt(u64 ns, int link_speed)
+{
+	u64 temp;
+
+	temp = ns * link_speed;
+	if (link_speed < SPEED_1000)
+		temp <<= 1;
+
+	return DIV_ROUND_UP(temp, 8 * 1000);
+}
+
+static void __iomem *am65_cpsw_est_set_sched_cmds(void __iomem *addr,
+						  int fetch_cnt,
+						  int fetch_allow)
+{
+	u32 prio_mask, cmd_fetch_cnt, cmd;
+
+	do {
+		if (fetch_cnt > AM65_CPSW_FETCH_CNT_MAX) {
+			fetch_cnt -= AM65_CPSW_FETCH_CNT_MAX;
+			cmd_fetch_cnt = AM65_CPSW_FETCH_CNT_MAX;
+		} else {
+			cmd_fetch_cnt = fetch_cnt;
+			/* fetch count can't be less than 16? */
+			if (cmd_fetch_cnt && cmd_fetch_cnt < 16)
+				cmd_fetch_cnt = 16;
+
+			fetch_cnt = 0;
+		}
+
+		prio_mask = fetch_allow & AM65_CPSW_FETCH_ALLOW_MSK;
+		cmd = (cmd_fetch_cnt << AM65_CPSW_FETCH_CNT_OFFSET) | prio_mask;
+
+		writel(cmd, addr);
+		addr += 4;
+	} while (fetch_cnt);
+
+	return addr;
+}
+
+static int am65_cpsw_est_calc_cmd_num(struct net_device *ndev,
+				      struct tc_taprio_qopt_offload *taprio,
+				      int link_speed)
+{
+	int i, cmd_cnt, cmd_sum = 0;
+	u32 fetch_cnt;
+
+	for (i = 0; i < taprio->num_entries; i++) {
+		if (taprio->entries[i].command != TC_TAPRIO_CMD_SET_GATES) {
+			dev_err(&ndev->dev, "Only SET command is supported");
+			return -EINVAL;
+		}
+
+		fetch_cnt = am65_est_cmd_ns_to_cnt(taprio->entries[i].interval,
+						   link_speed);
+
+		cmd_cnt = DIV_ROUND_UP(fetch_cnt, AM65_CPSW_FETCH_CNT_MAX);
+		if (!cmd_cnt)
+			cmd_cnt++;
+
+		cmd_sum += cmd_cnt;
+
+		if (!fetch_cnt)
+			break;
+	}
+
+	return cmd_sum;
+}
+
+static int am65_cpsw_est_check_scheds(struct net_device *ndev,
+				      struct am65_cpsw_est *est_new)
+{
+	struct am65_cpsw_port *port = am65_ndev_to_port(ndev);
+	int cmd_num;
+
+	cmd_num = am65_cpsw_est_calc_cmd_num(ndev, &est_new->taprio,
+					     port->qos.link_speed);
+	if (cmd_num < 0)
+		return cmd_num;
+
+	if (cmd_num > AM65_CPSW_FETCH_RAM_CMD_NUM / 2) {
+		dev_err(&ndev->dev, "No fetch RAM");
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+static void am65_cpsw_est_set_sched_list(struct net_device *ndev,
+					 struct am65_cpsw_est *est_new)
+{
+	struct am65_cpsw_port *port = am65_ndev_to_port(ndev);
+	u32 fetch_cnt, fetch_allow, all_fetch_allow = 0;
+	void __iomem *ram_addr, *max_ram_addr;
+	struct tc_taprio_sched_entry *entry;
+	int i, ram_size;
+
+	ram_addr = port->fetch_ram_base;
+	ram_size = AM65_CPSW_FETCH_RAM_CMD_NUM * 2;
+	ram_addr += est_new->buf * ram_size;
+
+	max_ram_addr = ram_size + ram_addr;
+	for (i = 0; i < est_new->taprio.num_entries; i++) {
+		entry = &est_new->taprio.entries[i];
+
+		fetch_cnt = am65_est_cmd_ns_to_cnt(entry->interval,
+						   port->qos.link_speed);
+		fetch_allow = entry->gate_mask;
+		if (fetch_allow > AM65_CPSW_FETCH_ALLOW_MAX)
+			dev_dbg(&ndev->dev, "fetch_allow > 8 bits: %d\n",
+				fetch_allow);
+
+		ram_addr = am65_cpsw_est_set_sched_cmds(ram_addr, fetch_cnt,
+							fetch_allow);
+
+		if (!fetch_cnt && i < est_new->taprio.num_entries - 1) {
+			dev_info(&ndev->dev,
+				 "next scheds after %d have no impact", i + 1);
+			break;
+		}
+
+		all_fetch_allow |= fetch_allow;
+	}
+
+	/* end cmd, enabling non-timed queues for potential over cycle time */
+	if (ram_addr < max_ram_addr)
+		writel(~all_fetch_allow & AM65_CPSW_FETCH_ALLOW_MSK, ram_addr);
+}
+
+/**
+ * Enable ESTf periodic output, set cycle start time and interval.
+ */
+static int am65_cpsw_timer_set(struct net_device *ndev,
+			       struct am65_cpsw_est *est_new)
+{
+	struct am65_cpsw_port *port = am65_ndev_to_port(ndev);
+	struct am65_cpsw_common *common = port->common;
+	struct am65_cpts *cpts = common->cpts;
+	struct am65_cpts_estf_cfg cfg;
+
+	cfg.ns_period = est_new->taprio.cycle_time;
+	cfg.ns_start = est_new->taprio.base_time;
+
+	return am65_cpts_estf_enable(cpts, port->port_id - 1, &cfg);
+}
+
+static void am65_cpsw_timer_stop(struct net_device *ndev)
+{
+	struct am65_cpsw_port *port = am65_ndev_to_port(ndev);
+	struct am65_cpts *cpts = port->common->cpts;
+
+	am65_cpts_estf_disable(cpts, port->port_id - 1);
+}
+
+static enum timer_act am65_cpsw_timer_act(struct net_device *ndev,
+					  struct am65_cpsw_est *est_new)
+{
+	struct tc_taprio_qopt_offload *taprio_oper, *taprio_new;
+	struct am65_cpsw_port *port = am65_ndev_to_port(ndev);
+	struct am65_cpts *cpts = port->common->cpts;
+	u64 cur_time;
+	s64 diff;
+
+	if (!port->qos.est_oper)
+		return TACT_PROG;
+
+	taprio_new = &est_new->taprio;
+	taprio_oper = &port->qos.est_oper->taprio;
+
+	if (taprio_new->cycle_time != taprio_oper->cycle_time)
+		return TACT_NEED_STOP;
+
+	/* in order to avoid timer reset get base_time form oper taprio */
+	if (!taprio_new->base_time && taprio_oper)
+		taprio_new->base_time = taprio_oper->base_time;
+
+	if (taprio_new->base_time == taprio_oper->base_time)
+		return TACT_SKIP_PROG;
+
+	/* base times are cycle synchronized */
+	diff = taprio_new->base_time - taprio_oper->base_time;
+	diff = diff < 0 ? -diff : diff;
+	if (diff % taprio_new->cycle_time)
+		return TACT_NEED_STOP;
+
+	cur_time = am65_cpts_ns_gettime(cpts);
+	if (taprio_new->base_time <= cur_time + taprio_new->cycle_time)
+		return TACT_SKIP_PROG;
+
+	/* TODO: Admin schedule at future time is not currently supported */
+	return TACT_NEED_STOP;
+}
+
+static void am65_cpsw_stop_est(struct net_device *ndev)
+{
+	am65_cpsw_est_set(ndev, 0);
+	am65_cpsw_timer_stop(ndev);
+}
+
+static void am65_cpsw_purge_est(struct net_device *ndev)
+{
+	struct am65_cpsw_port *port = am65_ndev_to_port(ndev);
+
+	am65_cpsw_stop_est(ndev);
+
+	if (port->qos.est_admin)
+		devm_kfree(&ndev->dev, port->qos.est_admin);
+
+	if (port->qos.est_oper)
+		devm_kfree(&ndev->dev, port->qos.est_oper);
+
+	port->qos.est_oper = NULL;
+	port->qos.est_admin = NULL;
+}
+
+static int am65_cpsw_configure_taprio(struct net_device *ndev,
+				      struct am65_cpsw_est *est_new)
+{
+	struct am65_cpsw_common *common = am65_ndev_to_common(ndev);
+	struct am65_cpts *cpts = common->cpts;
+	int ret = 0, tact = TACT_PROG;
+
+	am65_cpsw_est_update_state(ndev);
+
+	if (!est_new->taprio.enable) {
+		am65_cpsw_stop_est(ndev);
+		return ret;
+	}
+
+	ret = am65_cpsw_est_check_scheds(ndev, est_new);
+	if (ret < 0)
+		return ret;
+
+	tact = am65_cpsw_timer_act(ndev, est_new);
+	if (tact == TACT_NEED_STOP) {
+		dev_err(&ndev->dev,
+			"Can't toggle estf timer, stop taprio first");
+		return -EINVAL;
+	}
+
+	if (tact == TACT_PROG)
+		am65_cpsw_timer_stop(ndev);
+
+	if (!est_new->taprio.base_time)
+		est_new->taprio.base_time = am65_cpts_ns_gettime(cpts);
+
+	am65_cpsw_port_est_get_buf_num(ndev, est_new);
+	am65_cpsw_est_set_sched_list(ndev, est_new);
+	am65_cpsw_port_est_assign_buf_num(ndev, est_new->buf);
+
+	am65_cpsw_est_set(ndev, est_new->taprio.enable);
+
+	if (tact == TACT_PROG) {
+		ret = am65_cpsw_timer_set(ndev, est_new);
+		if (ret) {
+			dev_err(&ndev->dev, "Failed to set cycle time");
+			return ret;
+		}
+	}
+
+	return ret;
+}
+
+static void am65_cpsw_cp_taprio(struct tc_taprio_qopt_offload *from,
+				struct tc_taprio_qopt_offload *to)
+{
+	int i;
+
+	*to = *from;
+	for (i = 0; i < from->num_entries; i++)
+		to->entries[i] = from->entries[i];
+}
+
+static int am65_cpsw_set_taprio(struct net_device *ndev, void *type_data)
+{
+	struct am65_cpsw_port *port = am65_ndev_to_port(ndev);
+	struct tc_taprio_qopt_offload *taprio = type_data;
+	struct am65_cpsw_est *est_new;
+	size_t size;
+	int ret = 0;
+
+	if (taprio->cycle_time_extension) {
+		dev_err(&ndev->dev, "Failed to set cycle time extension");
+		return -EOPNOTSUPP;
+	}
+
+	size = sizeof(struct tc_taprio_sched_entry) * taprio->num_entries +
+	       sizeof(struct am65_cpsw_est);
+
+	est_new = devm_kzalloc(&ndev->dev, size, GFP_KERNEL);
+	if (!est_new)
+		return -ENOMEM;
+
+	am65_cpsw_cp_taprio(taprio, &est_new->taprio);
+	ret = am65_cpsw_configure_taprio(ndev, est_new);
+	if (!ret) {
+		if (taprio->enable) {
+			if (port->qos.est_admin)
+				devm_kfree(&ndev->dev, port->qos.est_admin);
+
+			port->qos.est_admin = est_new;
+		} else {
+			devm_kfree(&ndev->dev, est_new);
+			am65_cpsw_purge_est(ndev);
+		}
+	} else {
+		devm_kfree(&ndev->dev, est_new);
+	}
+
+	return ret;
+}
+
+static void am65_cpsw_est_link_up(struct net_device *ndev, int link_speed)
+{
+	struct am65_cpsw_port *port = am65_ndev_to_port(ndev);
+	ktime_t cur_time;
+	s64 delta;
+
+	port->qos.link_speed = link_speed;
+	if (!am65_cpsw_port_est_enabled(port))
+		return;
+
+	if (port->qos.link_down_time) {
+		cur_time = ktime_get();
+		delta = ktime_us_delta(cur_time, port->qos.link_down_time);
+		if (delta > USEC_PER_SEC) {
+			dev_err(&ndev->dev,
+				"Link has been lost too long, stopping TAS");
+			goto purge_est;
+		}
+	}
+
+	return;
+
+purge_est:
+	am65_cpsw_purge_est(ndev);
+}
+
+static int am65_cpsw_setup_taprio(struct net_device *ndev, void *type_data)
+{
+	struct am65_cpsw_port *port = am65_ndev_to_port(ndev);
+	struct am65_cpsw_common *common = port->common;
+
+	if (!IS_ENABLED(CONFIG_TI_AM65_CPSW_TAS))
+		return -ENODEV;
+
+	if (!netif_running(ndev)) {
+		dev_err(&ndev->dev, "interface is down, link speed unknown\n");
+		return -ENETDOWN;
+	}
+
+	if (common->pf_p0_rx_ptype_rrobin) {
+		dev_err(&ndev->dev,
+			"p0-rx-ptype-rrobin flag conflicts with taprio qdisc\n");
+		return -EINVAL;
+	}
+
+	if (port->qos.link_speed == SPEED_UNKNOWN)
+		return -ENOLINK;
+
+	return am65_cpsw_set_taprio(ndev, type_data);
+}
+
+int am65_cpsw_qos_ndo_setup_tc(struct net_device *ndev, enum tc_setup_type type,
+			       void *type_data)
+{
+	switch (type) {
+	case TC_SETUP_QDISC_TAPRIO:
+		return am65_cpsw_setup_taprio(ndev, type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+void am65_cpsw_qos_link_up(struct net_device *ndev, int link_speed)
+{
+	struct am65_cpsw_port *port = am65_ndev_to_port(ndev);
+
+	if (!IS_ENABLED(CONFIG_TI_AM65_CPSW_TAS))
+		return;
+
+	am65_cpsw_est_link_up(ndev, link_speed);
+	port->qos.link_down_time = 0;
+}
+
+void am65_cpsw_qos_link_down(struct net_device *ndev)
+{
+	struct am65_cpsw_port *port = am65_ndev_to_port(ndev);
+
+	if (!IS_ENABLED(CONFIG_TI_AM65_CPSW_TAS))
+		return;
+
+	if (!port->qos.link_down_time)
+		port->qos.link_down_time = ktime_get();
+
+	port->qos.link_speed = SPEED_UNKNOWN;
+}
diff --git a/drivers/net/ethernet/ti/am65-cpsw-qos.h b/drivers/net/ethernet/ti/am65-cpsw-qos.h
new file mode 100644
index 000000000000..e8f1b6b59e93
--- /dev/null
+++ b/drivers/net/ethernet/ti/am65-cpsw-qos.h
@@ -0,0 +1,29 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2020 Texas Instruments Incorporated - http://www.ti.com/
+ */
+
+#ifndef AM65_CPSW_QOS_H_
+#define AM65_CPSW_QOS_H_
+
+#include <linux/netdevice.h>
+#include <net/pkt_sched.h>
+
+struct am65_cpsw_est {
+	int buf;
+	/* has to be the last one */
+	struct tc_taprio_qopt_offload taprio;
+};
+
+struct am65_cpsw_qos {
+	struct am65_cpsw_est *est_admin;
+	struct am65_cpsw_est *est_oper;
+	ktime_t link_down_time;
+	int link_speed;
+};
+
+int am65_cpsw_qos_ndo_setup_tc(struct net_device *ndev, enum tc_setup_type type,
+			       void *type_data);
+void am65_cpsw_qos_link_up(struct net_device *ndev, int link_speed);
+void am65_cpsw_qos_link_down(struct net_device *ndev);
+
+#endif /* AM65_CPSW_QOS_H_ */
-- 
cgit v1.2.3-59-g8ed1b


From 5a46b062e28f57bffde767437fad3ab1d0cee2c7 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Wed, 13 May 2020 10:28:22 -0700
Subject: devlink: refactor end checks in devlink_nl_cmd_region_read_dumpit

Clean up after recent fixes, move address calculations
around and change the variable init, so that we can have
just one start_offset == end_offset check.

Make the check a little stricter to preserve the -EINVAL
error if requested start offset is larger than the region
itself.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/devlink.c                                 | 41 +++++++++-------------
 .../selftests/drivers/net/netdevsim/devlink.sh     | 15 ++++++++
 2 files changed, 31 insertions(+), 25 deletions(-)

diff --git a/net/core/devlink.c b/net/core/devlink.c
index 20f935fa29f5..7b76e5fffc10 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -4215,7 +4215,6 @@ static int devlink_nl_region_read_snapshot_fill(struct sk_buff *skb,
 						struct nlattr **attrs,
 						u64 start_offset,
 						u64 end_offset,
-						bool dump,
 						u64 *new_offset)
 {
 	struct devlink_snapshot *snapshot;
@@ -4230,9 +4229,6 @@ static int devlink_nl_region_read_snapshot_fill(struct sk_buff *skb,
 	if (!snapshot)
 		return -EINVAL;
 
-	if (end_offset > region->size || dump)
-		end_offset = region->size;
-
 	while (curr_offset < end_offset) {
 		u32 data_size;
 		u8 *data;
@@ -4260,13 +4256,12 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
 					     struct netlink_callback *cb)
 {
 	const struct genl_dumpit_info *info = genl_dumpit_info(cb);
-	u64 ret_offset, start_offset, end_offset = 0;
+	u64 ret_offset, start_offset, end_offset = U64_MAX;
 	struct nlattr **attrs = info->attrs;
 	struct devlink_region *region;
 	struct nlattr *chunks_attr;
 	const char *region_name;
 	struct devlink *devlink;
-	bool dump = true;
 	void *hdr;
 	int err;
 
@@ -4294,8 +4289,21 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
 		goto out_unlock;
 	}
 
+	if (attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR] &&
+	    attrs[DEVLINK_ATTR_REGION_CHUNK_LEN]) {
+		if (!start_offset)
+			start_offset =
+				nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR]);
+
+		end_offset = nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR]);
+		end_offset += nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_LEN]);
+	}
+
+	if (end_offset > region->size)
+		end_offset = region->size;
+
 	/* return 0 if there is no further data to read */
-	if (start_offset >= region->size) {
+	if (start_offset == end_offset) {
 		err = 0;
 		goto out_unlock;
 	}
@@ -4322,27 +4330,10 @@ static int devlink_nl_cmd_region_read_dumpit(struct sk_buff *skb,
 		goto nla_put_failure;
 	}
 
-	if (attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR] &&
-	    attrs[DEVLINK_ATTR_REGION_CHUNK_LEN]) {
-		if (!start_offset)
-			start_offset =
-				nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR]);
-
-		end_offset = nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_ADDR]);
-		end_offset += nla_get_u64(attrs[DEVLINK_ATTR_REGION_CHUNK_LEN]);
-		dump = false;
-
-		if (start_offset == end_offset) {
-			err = 0;
-			goto nla_put_failure;
-		}
-	}
-
 	err = devlink_nl_region_read_snapshot_fill(skb, devlink,
 						   region, attrs,
 						   start_offset,
-						   end_offset, dump,
-						   &ret_offset);
+						   end_offset, &ret_offset);
 
 	if (err && err != -EMSGSIZE)
 		goto nla_put_failure;
diff --git a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
index ad539eccddcb..de4b32fc4223 100755
--- a/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
+++ b/tools/testing/selftests/drivers/net/netdevsim/devlink.sh
@@ -146,6 +146,21 @@ regions_test()
 
 	check_region_snapshot_count dummy post-first-request 3
 
+	devlink region dump $DL_HANDLE/dummy snapshot 25 >> /dev/null
+	check_err $? "Failed to dump snapshot with id 25"
+
+	devlink region read $DL_HANDLE/dummy snapshot 25 addr 0 len 1 >> /dev/null
+	check_err $? "Failed to read snapshot with id 25 (1 byte)"
+
+	devlink region read $DL_HANDLE/dummy snapshot 25 addr 128 len 128 >> /dev/null
+	check_err $? "Failed to read snapshot with id 25 (128 bytes)"
+
+	devlink region read $DL_HANDLE/dummy snapshot 25 addr 128 len $((1<<32)) >> /dev/null
+	check_err $? "Failed to read snapshot with id 25 (oversized)"
+
+	devlink region read $DL_HANDLE/dummy snapshot 25 addr $((1<<32)) len 128 >> /dev/null 2>&1
+	check_fail $? "Bad read of snapshot with id 25 did not fail"
+
 	devlink region del $DL_HANDLE/dummy snapshot 25
 	check_err $? "Failed to delete snapshot with id 25"
 
-- 
cgit v1.2.3-59-g8ed1b


From bcab67822d77142c31f69656dd24226f23acc82e Mon Sep 17 00:00:00 2001
From: Luo bin <luobin9@huawei.com>
Date: Wed, 13 May 2020 22:37:33 +0000
Subject: hinic: add set_ringparam ethtool_ops support

support to change TX/RX queue depth with ethtool -G

Signed-off-by: Luo bin <luobin9@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/huawei/hinic/hinic_dev.h     |  2 +
 drivers/net/ethernet/huawei/hinic/hinic_ethtool.c | 78 +++++++++++++++++++++--
 drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c  | 11 ++--
 drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h  |  2 +-
 drivers/net/ethernet/huawei/hinic/hinic_hw_io.c   |  4 +-
 drivers/net/ethernet/huawei/hinic/hinic_hw_io.h   |  3 +
 drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c   |  1 +
 drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h   |  3 +
 drivers/net/ethernet/huawei/hinic/hinic_main.c    |  9 ++-
 drivers/net/ethernet/huawei/hinic/hinic_port.c    |  2 +-
 drivers/net/ethernet/huawei/hinic/hinic_port.h    |  4 ++
 11 files changed, 104 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_dev.h b/drivers/net/ethernet/huawei/hinic/hinic_dev.h
index a621ebbf7610..48b40be3e84d 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_dev.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_dev.h
@@ -69,6 +69,8 @@ struct hinic_dev {
 
 	struct hinic_txq                *txqs;
 	struct hinic_rxq                *rxqs;
+	u16				sq_depth;
+	u16				rq_depth;
 
 	struct hinic_txq_stats          tx_stats;
 	struct hinic_rxq_stats          rx_stats;
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c b/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c
index b426eeced069..ace18d258049 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c
@@ -538,12 +538,81 @@ static void hinic_get_drvinfo(struct net_device *netdev,
 static void hinic_get_ringparam(struct net_device *netdev,
 				struct ethtool_ringparam *ring)
 {
-	ring->rx_max_pending = HINIC_RQ_DEPTH;
-	ring->tx_max_pending = HINIC_SQ_DEPTH;
-	ring->rx_pending = HINIC_RQ_DEPTH;
-	ring->tx_pending = HINIC_SQ_DEPTH;
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+
+	ring->rx_max_pending = HINIC_MAX_QUEUE_DEPTH;
+	ring->tx_max_pending = HINIC_MAX_QUEUE_DEPTH;
+	ring->rx_pending = nic_dev->rq_depth;
+	ring->tx_pending = nic_dev->sq_depth;
 }
 
+static int check_ringparam_valid(struct hinic_dev *nic_dev,
+				 struct ethtool_ringparam *ring)
+{
+	if (ring->rx_jumbo_pending || ring->rx_mini_pending) {
+		netif_err(nic_dev, drv, nic_dev->netdev,
+			  "Unsupported rx_jumbo_pending/rx_mini_pending\n");
+		return -EINVAL;
+	}
+
+	if (ring->tx_pending > HINIC_MAX_QUEUE_DEPTH ||
+	    ring->tx_pending < HINIC_MIN_QUEUE_DEPTH ||
+	    ring->rx_pending > HINIC_MAX_QUEUE_DEPTH ||
+	    ring->rx_pending < HINIC_MIN_QUEUE_DEPTH) {
+		netif_err(nic_dev, drv, nic_dev->netdev,
+			  "Queue depth out of range [%d-%d]\n",
+			  HINIC_MIN_QUEUE_DEPTH, HINIC_MAX_QUEUE_DEPTH);
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int hinic_set_ringparam(struct net_device *netdev,
+			       struct ethtool_ringparam *ring)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	u16 new_sq_depth, new_rq_depth;
+	int err;
+
+	err = check_ringparam_valid(nic_dev, ring);
+	if (err)
+		return err;
+
+	new_sq_depth = (u16)(1U << (u16)ilog2(ring->tx_pending));
+	new_rq_depth = (u16)(1U << (u16)ilog2(ring->rx_pending));
+
+	if (new_sq_depth == nic_dev->sq_depth &&
+	    new_rq_depth == nic_dev->rq_depth)
+		return 0;
+
+	netif_info(nic_dev, drv, netdev,
+		   "Change Tx/Rx ring depth from %d/%d to %d/%d\n",
+		   nic_dev->sq_depth, nic_dev->rq_depth,
+		   new_sq_depth, new_rq_depth);
+
+	nic_dev->sq_depth = new_sq_depth;
+	nic_dev->rq_depth = new_rq_depth;
+
+	if (netif_running(netdev)) {
+		netif_info(nic_dev, drv, netdev, "Restarting netdev\n");
+		err = hinic_close(netdev);
+		if (err) {
+			netif_err(nic_dev, drv, netdev,
+				  "Failed to close netdev\n");
+			return -EFAULT;
+		}
+
+		err = hinic_open(netdev);
+		if (err) {
+			netif_err(nic_dev, drv, netdev,
+				  "Failed to open netdev\n");
+			return -EFAULT;
+		}
+	}
+
+	return 0;
+}
 static void hinic_get_channels(struct net_device *netdev,
 			       struct ethtool_channels *channels)
 {
@@ -1148,6 +1217,7 @@ static const struct ethtool_ops hinic_ethtool_ops = {
 	.get_drvinfo = hinic_get_drvinfo,
 	.get_link = ethtool_op_get_link,
 	.get_ringparam = hinic_get_ringparam,
+	.set_ringparam = hinic_set_ringparam,
 	.get_channels = hinic_get_channels,
 	.get_rxnfc = hinic_get_rxnfc,
 	.set_rxnfc = hinic_set_rxnfc,
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
index 2879b0445eba..0245da02efbb 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.c
@@ -269,8 +269,8 @@ static int init_fw_ctxt(struct hinic_hwdev *hwdev)
  *
  * Return 0 - Success, negative - Failure
  **/
-static int set_hw_ioctxt(struct hinic_hwdev *hwdev, unsigned int rq_depth,
-			 unsigned int sq_depth)
+static int set_hw_ioctxt(struct hinic_hwdev *hwdev, unsigned int sq_depth,
+			 unsigned int rq_depth)
 {
 	struct hinic_hwif *hwif = hwdev->hwif;
 	struct hinic_cmd_hw_ioctxt hw_ioctxt;
@@ -435,7 +435,7 @@ static int get_base_qpn(struct hinic_hwdev *hwdev, u16 *base_qpn)
  *
  * Return 0 - Success, negative - Failure
  **/
-int hinic_hwdev_ifup(struct hinic_hwdev *hwdev)
+int hinic_hwdev_ifup(struct hinic_hwdev *hwdev, u16 sq_depth, u16 rq_depth)
 {
 	struct hinic_func_to_io *func_to_io = &hwdev->func_to_io;
 	struct hinic_cap *nic_cap = &hwdev->nic_cap;
@@ -458,6 +458,9 @@ int hinic_hwdev_ifup(struct hinic_hwdev *hwdev)
 
 	ceq_msix_entries = &hwdev->msix_entries[num_aeqs];
 	func_to_io->hwdev = hwdev;
+	func_to_io->sq_depth = sq_depth;
+	func_to_io->rq_depth = rq_depth;
+
 	err = hinic_io_init(func_to_io, hwif, nic_cap->max_qps, num_ceqs,
 			    ceq_msix_entries);
 	if (err) {
@@ -482,7 +485,7 @@ int hinic_hwdev_ifup(struct hinic_hwdev *hwdev)
 		hinic_db_state_set(hwif, HINIC_DB_ENABLE);
 	}
 
-	err = set_hw_ioctxt(hwdev, HINIC_SQ_DEPTH, HINIC_RQ_DEPTH);
+	err = set_hw_ioctxt(hwdev, sq_depth, rq_depth);
 	if (err) {
 		dev_err(&pdev->dev, "Failed to set HW IO ctxt\n");
 		goto err_hw_ioctxt;
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
index ce57914bef72..71ea7e46dbbc 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_dev.h
@@ -347,7 +347,7 @@ int hinic_hilink_msg_cmd(struct hinic_hwdev *hwdev, enum hinic_hilink_cmd cmd,
 			 void *buf_in, u16 in_size, void *buf_out,
 			 u16 *out_size);
 
-int hinic_hwdev_ifup(struct hinic_hwdev *hwdev);
+int hinic_hwdev_ifup(struct hinic_hwdev *hwdev, u16 sq_depth, u16 rq_depth);
 
 void hinic_hwdev_ifdown(struct hinic_hwdev *hwdev);
 
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_io.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_io.c
index a4581c988a63..3e3fa742e476 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_io.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_io.c
@@ -282,7 +282,7 @@ static int init_qp(struct hinic_func_to_io *func_to_io,
 
 	err = hinic_wq_allocate(&func_to_io->wqs, &func_to_io->sq_wq[q_id],
 				HINIC_SQ_WQEBB_SIZE, HINIC_SQ_PAGE_SIZE,
-				HINIC_SQ_DEPTH, HINIC_SQ_WQE_MAX_SIZE);
+				func_to_io->sq_depth, HINIC_SQ_WQE_MAX_SIZE);
 	if (err) {
 		dev_err(&pdev->dev, "Failed to allocate WQ for SQ\n");
 		return err;
@@ -290,7 +290,7 @@ static int init_qp(struct hinic_func_to_io *func_to_io,
 
 	err = hinic_wq_allocate(&func_to_io->wqs, &func_to_io->rq_wq[q_id],
 				HINIC_RQ_WQEBB_SIZE, HINIC_RQ_PAGE_SIZE,
-				HINIC_RQ_DEPTH, HINIC_RQ_WQE_SIZE);
+				func_to_io->rq_depth, HINIC_RQ_WQE_SIZE);
 	if (err) {
 		dev_err(&pdev->dev, "Failed to allocate WQ for RQ\n");
 		goto err_rq_alloc;
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_io.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_io.h
index 28c0594f636d..214f162f7579 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_io.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_io.h
@@ -60,6 +60,9 @@ struct hinic_func_to_io {
 	struct hinic_qp         *qps;
 	u16                     max_qps;
 
+	u16			sq_depth;
+	u16			rq_depth;
+
 	void __iomem            **sq_db;
 	void __iomem            *db_base;
 
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c
index 20c5c8ea452e..fcf7bfe4aa47 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.c
@@ -643,6 +643,7 @@ void hinic_sq_write_db(struct hinic_sq *sq, u16 prod_idx, unsigned int wqe_size,
 
 	/* increment prod_idx to the next */
 	prod_idx += ALIGN(wqe_size, wq->wqebb_size) / wq->wqebb_size;
+	prod_idx = SQ_MASKED_IDX(sq, prod_idx);
 
 	wmb();  /* Write all before the doorbell */
 
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h
index c30d092e48d5..ca3e2d060284 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_hw_qp.h
@@ -44,6 +44,9 @@
 #define HINIC_SQ_DEPTH                          SZ_4K
 #define HINIC_RQ_DEPTH                          SZ_4K
 
+#define HINIC_MAX_QUEUE_DEPTH			SZ_4K
+#define HINIC_MIN_QUEUE_DEPTH			128
+
 /* In any change to HINIC_RX_BUF_SZ, HINIC_RX_BUF_SZ_IDX must be changed */
 #define HINIC_RX_BUF_SZ                         2048
 #define HINIC_RX_BUF_SZ_IDX			HINIC_RX_BUF_SZ_2048_IDX
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c
index 3d6569d7bac8..e3ff119fe341 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_main.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c
@@ -372,14 +372,15 @@ static void hinic_enable_rss(struct hinic_dev *nic_dev)
 		netif_err(nic_dev, drv, netdev, "Failed to init rss\n");
 }
 
-static int hinic_open(struct net_device *netdev)
+int hinic_open(struct net_device *netdev)
 {
 	struct hinic_dev *nic_dev = netdev_priv(netdev);
 	enum hinic_port_link_state link_state;
 	int err, ret;
 
 	if (!(nic_dev->flags & HINIC_INTF_UP)) {
-		err = hinic_hwdev_ifup(nic_dev->hwdev);
+		err = hinic_hwdev_ifup(nic_dev->hwdev, nic_dev->sq_depth,
+				       nic_dev->rq_depth);
 		if (err) {
 			netif_err(nic_dev, drv, netdev,
 				  "Failed - HW interface up\n");
@@ -483,7 +484,7 @@ err_create_txqs:
 	return err;
 }
 
-static int hinic_close(struct net_device *netdev)
+int hinic_close(struct net_device *netdev)
 {
 	struct hinic_dev *nic_dev = netdev_priv(netdev);
 	unsigned int flags;
@@ -1038,6 +1039,8 @@ static int nic_dev_init(struct pci_dev *pdev)
 	nic_dev->rxqs = NULL;
 	nic_dev->tx_weight = tx_weight;
 	nic_dev->rx_weight = rx_weight;
+	nic_dev->sq_depth = HINIC_SQ_DEPTH;
+	nic_dev->rq_depth = HINIC_RQ_DEPTH;
 	nic_dev->sriov_info.hwdev = hwdev;
 	nic_dev->sriov_info.pdev = pdev;
 
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_port.c b/drivers/net/ethernet/huawei/hinic/hinic_port.c
index 2edb6127f9fb..175c0ee00038 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_port.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_port.c
@@ -473,7 +473,7 @@ int hinic_set_max_qnum(struct hinic_dev *nic_dev, u8 num_rqs)
 
 	rq_num.func_id = HINIC_HWIF_FUNC_IDX(hwif);
 	rq_num.num_rqs = num_rqs;
-	rq_num.rq_depth = ilog2(HINIC_SQ_DEPTH);
+	rq_num.rq_depth = ilog2(nic_dev->rq_depth);
 
 	err = hinic_port_msg_cmd(hwdev, HINIC_PORT_CMD_SET_RQ_IQ_MAP,
 				 &rq_num, sizeof(rq_num),
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_port.h b/drivers/net/ethernet/huawei/hinic/hinic_port.h
index 5f34308abd2b..661c6322dc15 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_port.h
+++ b/drivers/net/ethernet/huawei/hinic/hinic_port.h
@@ -736,4 +736,8 @@ int hinic_get_hw_pause_info(struct hinic_hwdev *hwdev,
 int hinic_set_hw_pause_info(struct hinic_hwdev *hwdev,
 			    struct hinic_pause_config *pause_info);
 
+int hinic_open(struct net_device *netdev);
+
+int hinic_close(struct net_device *netdev);
+
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From 3f044d26f80b0c2ee53f8409cdbb2aca28fa90b1 Mon Sep 17 00:00:00 2001
From: Luo bin <luobin9@huawei.com>
Date: Wed, 13 May 2020 22:50:49 +0000
Subject: hinic: update huawei ethernet driver maintainer

update huawei ethernet driver maintainer from aviad to Bin luo

Signed-off-by: Luo bin <luobin9@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 734cccf1d1e5..d853f70181a0 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -7811,7 +7811,7 @@ F:	Documentation/devicetree/bindings/iio/humidity/hts221.txt
 F:	drivers/iio/humidity/hts221*
 
 HUAWEI ETHERNET DRIVER
-M:	Aviad Krawczyk <aviad.krawczyk@huawei.com>
+M:	Bin Luo <luobin9@huawei.com>
 L:	netdev@vger.kernel.org
 S:	Supported
 F:	Documentation/networking/hinic.rst
-- 
cgit v1.2.3-59-g8ed1b


From e90b651e7b7f07a7c2b1ec74bfa18dc41ff126bc Mon Sep 17 00:00:00 2001
From: Dan Murphy <dmurphy@ti.com>
Date: Thu, 14 May 2020 09:50:12 -0500
Subject: dt-bindings: net: dp83869: Update licensing info

Add BSD 2 Clause to the licensing.

CC: Rob Herring <robh@kernel.org>
Signed-off-by: Dan Murphy <dmurphy@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/ti,dp83869.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/devicetree/bindings/net/ti,dp83869.yaml b/Documentation/devicetree/bindings/net/ti,dp83869.yaml
index 6fe3e451da8a..5b69ef03bbf7 100644
--- a/Documentation/devicetree/bindings/net/ti,dp83869.yaml
+++ b/Documentation/devicetree/bindings/net/ti,dp83869.yaml
@@ -1,4 +1,4 @@
-# SPDX-License-Identifier: GPL-2.0
+# SPDX-License-Identifier: (GPL-2.0+ OR BSD-2-Clause)
 # Copyright (C) 2019 Texas Instruments Incorporated
 %YAML 1.2
 ---
-- 
cgit v1.2.3-59-g8ed1b


From 74ac28f16486aca295ae27aafc8d7e088464b772 Mon Sep 17 00:00:00 2001
From: Dan Murphy <dmurphy@ti.com>
Date: Thu, 14 May 2020 10:59:05 -0500
Subject: dt-bindings: dp83867: Convert DP83867 to yaml

Convert the dp83867 binding to yaml.

Signed-off-by: Dan Murphy <dmurphy@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../devicetree/bindings/net/ti,dp83867.txt         |  68 -----------
 .../devicetree/bindings/net/ti,dp83867.yaml        | 127 +++++++++++++++++++++
 2 files changed, 127 insertions(+), 68 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/net/ti,dp83867.txt
 create mode 100644 Documentation/devicetree/bindings/net/ti,dp83867.yaml

diff --git a/Documentation/devicetree/bindings/net/ti,dp83867.txt b/Documentation/devicetree/bindings/net/ti,dp83867.txt
deleted file mode 100644
index 44e2a4fab29e..000000000000
--- a/Documentation/devicetree/bindings/net/ti,dp83867.txt
+++ /dev/null
@@ -1,68 +0,0 @@
-* Texas Instruments - dp83867 Giga bit ethernet phy
-
-Required properties:
-	- reg - The ID number for the phy, usually a small integer
-	- ti,rx-internal-delay - RGMII Receive Clock Delay - see dt-bindings/net/ti-dp83867.h
-		for applicable values. Required only if interface type is
-		PHY_INTERFACE_MODE_RGMII_ID or PHY_INTERFACE_MODE_RGMII_RXID
-	- ti,tx-internal-delay - RGMII Transmit Clock Delay - see dt-bindings/net/ti-dp83867.h
-		for applicable values. Required only if interface type is
-		PHY_INTERFACE_MODE_RGMII_ID or PHY_INTERFACE_MODE_RGMII_TXID
-
-Note: If the interface type is PHY_INTERFACE_MODE_RGMII the TX/RX clock delays
-      will be left at their default values, as set by the PHY's pin strapping.
-      The default strapping will use a delay of 2.00 ns.  Thus
-      PHY_INTERFACE_MODE_RGMII, by default, does not behave as RGMII with no
-      internal delay, but as PHY_INTERFACE_MODE_RGMII_ID.  The device tree
-      should use "rgmii-id" if internal delays are desired as this may be
-      changed in future to cause "rgmii" mode to disable delays.
-
-Optional property:
-	- ti,min-output-impedance - MAC Interface Impedance control to set
-				    the programmable output impedance to
-				    minimum value (35 ohms).
-	- ti,max-output-impedance - MAC Interface Impedance control to set
-				    the programmable output impedance to
-				    maximum value (70 ohms).
-	- ti,dp83867-rxctrl-strap-quirk - This denotes the fact that the
-				    board has RX_DV/RX_CTRL pin strapped in
-				    mode 1 or 2. To ensure PHY operation,
-				    there are specific actions that
-				    software needs to take when this pin is
-				    strapped in these modes. See data manual
-				    for details.
-	- ti,clk-output-sel - Muxing option for CLK_OUT pin.  See dt-bindings/net/ti-dp83867.h
-			      for applicable values.  The CLK_OUT pin can also
-			      be disabled by this property.  When omitted, the
-			      PHY's default will be left as is.
-	- ti,sgmii-ref-clock-output-enable - This denotes which
-				    SGMII configuration is used (4 or 6-wire modes).
-				    Some MACs work with differential SGMII clock.
-				    See data manual for details.
-
-	- ti,fifo-depth - Transmitt FIFO depth- see dt-bindings/net/ti-dp83867.h
-		for applicable values (deprecated)
-
-	-tx-fifo-depth - As defined in the ethernet-controller.yaml.  Values for
-			 the depth can be found in dt-bindings/net/ti-dp83867.h
-	-rx-fifo-depth - As defined in the ethernet-controller.yaml.  Values for
-			 the depth can be found in dt-bindings/net/ti-dp83867.h
-
-Note: ti,min-output-impedance and ti,max-output-impedance are mutually
-      exclusive. When both properties are present ti,max-output-impedance
-      takes precedence.
-
-Default child nodes are standard Ethernet PHY device
-nodes as described in Documentation/devicetree/bindings/net/phy.txt
-
-Example:
-
-	ethernet-phy@0 {
-		reg = <0>;
-		ti,rx-internal-delay = <DP83867_RGMIIDCTL_2_25_NS>;
-		ti,tx-internal-delay = <DP83867_RGMIIDCTL_2_75_NS>;
-		tx-fifo-depth = <DP83867_PHYCR_FIFO_DEPTH_4_B_NIB>;
-	};
-
-Datasheet can be found:
-http://www.ti.com/product/DP83867IR/datasheet
diff --git a/Documentation/devicetree/bindings/net/ti,dp83867.yaml b/Documentation/devicetree/bindings/net/ti,dp83867.yaml
new file mode 100644
index 000000000000..554dcd7a40a9
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/ti,dp83867.yaml
@@ -0,0 +1,127 @@
+# SPDX-License-Identifier: (GPL-2.0+ OR BSD-2-Clause)
+# Copyright (C) 2019 Texas Instruments Incorporated
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/net/ti,dp83867.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: TI DP83867 ethernet PHY
+
+allOf:
+  - $ref: "ethernet-controller.yaml#"
+
+maintainers:
+  - Dan Murphy <dmurphy@ti.com>
+
+description: |
+  The DP83867 device is a robust, low power, fully featured Physical Layer
+  transceiver with integrated PMD sublayers to support 10BASE-Te, 100BASE-TX
+  and 1000BASE-T Ethernet protocols.
+
+  The DP83867 is designed for easy implementation of 10/100/1000 Mbps Ethernet
+  LANs. It interfaces directly to twisted pair media via an external
+  transformer. This device interfaces directly to the MAC layer through the
+  IEEE 802.3 Standard Media Independent Interface (MII), the IEEE 802.3 Gigabit
+  Media Independent Interface (GMII) or Reduced GMII (RGMII).
+
+  Specifications about the charger can be found at:
+    https://www.ti.com/lit/gpn/dp83867ir
+
+properties:
+  reg:
+    maxItems: 1
+
+  ti,min-output-impedance:
+    type: boolean
+    description: |
+       MAC Interface Impedance control to set the programmable output impedance
+       to a minimum value (35 ohms).
+
+  ti,max-output-impedance:
+    type: boolean
+    description: |
+      MAC Interface Impedance control to set the programmable output impedance
+      to a maximum value (70 ohms).
+      Note: ti,min-output-impedance and ti,max-output-impedance are mutually
+        exclusive. When both properties are present ti,max-output-impedance
+        takes precedence.
+
+  tx-fifo-depth:
+    $ref: /schemas/types.yaml#definitions/uint32
+    description: |
+       Transmitt FIFO depth see dt-bindings/net/ti-dp83867.h for values
+
+  rx-fifo-depth:
+    $ref: /schemas/types.yaml#definitions/uint32
+    description: |
+       Receive FIFO depth see dt-bindings/net/ti-dp83867.h for values
+
+  ti,clk-output-sel:
+    $ref: /schemas/types.yaml#definitions/uint32
+    description: |
+      Muxing option for CLK_OUT pin.  See dt-bindings/net/ti-dp83867.h
+      for applicable values. The CLK_OUT pin can also be disabled by this
+      property.  When omitted, the PHY's default will be left as is.
+
+  ti,rx-internal-delay:
+    $ref: /schemas/types.yaml#definitions/uint32
+    description: |
+      RGMII Receive Clock Delay - see dt-bindings/net/ti-dp83867.h
+      for applicable values. Required only if interface type is
+      PHY_INTERFACE_MODE_RGMII_ID or PHY_INTERFACE_MODE_RGMII_RXID.
+
+  ti,tx-internal-delay:
+    $ref: /schemas/types.yaml#definitions/uint32
+    description: |
+      RGMII Transmit Clock Delay - see dt-bindings/net/ti-dp83867.h
+      for applicable values. Required only if interface type is
+      PHY_INTERFACE_MODE_RGMII_ID or PHY_INTERFACE_MODE_RGMII_TXID.
+
+        Note: If the interface type is PHY_INTERFACE_MODE_RGMII the TX/RX clock
+          delays will be left at their default values, as set by the PHY's pin
+          strapping. The default strapping will use a delay of 2.00 ns.  Thus
+          PHY_INTERFACE_MODE_RGMII, by default, does not behave as RGMII with no
+          internal delay, but as PHY_INTERFACE_MODE_RGMII_ID.  The device tree
+          should use "rgmii-id" if internal delays are desired as this may be
+          changed in future to cause "rgmii" mode to disable delays.
+
+  ti,dp83867-rxctrl-strap-quirk:
+    type: boolean
+    description: |
+      This denotes the fact that the board has RX_DV/RX_CTRL pin strapped in
+      mode 1 or 2. To ensure PHY operation, there are specific actions that
+      software needs to take when this pin is strapped in these modes.
+      See data manual for details.
+
+  ti,sgmii-ref-clock-output-enable:
+    type: boolean
+    description: |
+      This denotes which SGMII configuration is used (4 or 6-wire modes).
+      Some MACs work with differential SGMII clock. See data manual for details.
+
+  ti,fifo-depth:
+    deprecated: true
+    $ref: /schemas/types.yaml#definitions/uint32
+    description: |
+      Transmitt FIFO depth- see dt-bindings/net/ti-dp83867.h for applicable
+      values.
+
+required:
+  - reg
+
+examples:
+  - |
+    #include <dt-bindings/net/ti-dp83867.h>
+    mdio0 {
+      #address-cells = <1>;
+      #size-cells = <0>;
+      ethphy0: ethernet-phy@0 {
+        reg = <0>;
+        tx-fifo-depth = <DP83867_PHYCR_FIFO_DEPTH_4_B_NIB>;
+        rx-fifo-depth = <DP83867_PHYCR_FIFO_DEPTH_4_B_NIB>;
+        ti,max-output-impedance;
+        ti,clk-output-sel = <DP83867_CLK_O_SEL_CHN_A_RCLK>;
+        ti,rx-internal-delay = <DP83867_RGMIIDCTL_2_25_NS>;
+        ti,tx-internal-delay = <DP83867_RGMIIDCTL_2_75_NS>;
+      };
+    };
-- 
cgit v1.2.3-59-g8ed1b


From 140ad6c8c6c7a89546cdcbc46da6cd56011cfee1 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Thu, 14 May 2020 18:59:38 +0200
Subject: net: phy: mdio-moxart: remove unneeded include

mdio-moxart doesn't use regulators in the driver code. We can remove
the regulator include.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio-moxart.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/phy/mdio-moxart.c b/drivers/net/phy/mdio-moxart.c
index 2d16fc4173c1..b72c6d185175 100644
--- a/drivers/net/phy/mdio-moxart.c
+++ b/drivers/net/phy/mdio-moxart.c
@@ -12,7 +12,6 @@
 #include <linux/of_mdio.h>
 #include <linux/phy.h>
 #include <linux/platform_device.h>
-#include <linux/regulator/consumer.h>
 
 #define REG_PHY_CTRL            0
 #define REG_PHY_WRITE_DATA      4
-- 
cgit v1.2.3-59-g8ed1b


From b014d0430bcfae581d07d2e90a939b0af783d3a9 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 14 May 2020 19:33:02 +0100
Subject: net: dsa: felix: fix incorrect clamp calculation for burst

Currently burst is clamping on rate and not burst, the assignment
of burst from the clamping discards the previous assignment of burst.
This looks like a cut-n-paste error from the previous clamping
calculation on ramp.  Fix this by replacing ramp with burst.

Addresses-Coverity: ("Unused value")
Fixes: 0fbabf875d18 ("net: dsa: felix: add support Credit Based Shaper(CBS) for hardware offload")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/ocelot/felix_vsc9959.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/dsa/ocelot/felix_vsc9959.c b/drivers/net/dsa/ocelot/felix_vsc9959.c
index df4498c0e864..85e34d85cc51 100644
--- a/drivers/net/dsa/ocelot/felix_vsc9959.c
+++ b/drivers/net/dsa/ocelot/felix_vsc9959.c
@@ -1360,7 +1360,7 @@ static int vsc9959_qos_port_cbs_set(struct dsa_switch *ds, int port,
 	/* Burst unit is 4kB */
 	burst = DIV_ROUND_UP(cbs_qopt->hicredit, 4096);
 	/* Avoid using zero burst size */
-	burst = clamp_t(u32, rate, 1, GENMASK(5, 0));
+	burst = clamp_t(u32, burst, 1, GENMASK(5, 0));
 	ocelot_write_gix(ocelot,
 			 QSYS_CIR_CFG_CIR_RATE(rate) |
 			 QSYS_CIR_CFG_CIR_BURST(burst),
-- 
cgit v1.2.3-59-g8ed1b


From aa443b3f8f3affdc2ea6508ecb20706eee3f67bf Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Thu, 14 May 2020 23:39:34 +0200
Subject: r8169: remove not needed checks in rtl8169_set_eee

After 9de5d235b60a ("net: phy: fix aneg restart in phy_ethtool_set_eee")
we don't need the check for aneg being enabled any longer, and as
discussed with Russell configuring the EEE advertisement should be
supported even if we're in a half-duplex mode currently.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index b4e49c446bd0..a2e778417144 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -1919,12 +1919,6 @@ static int rtl8169_set_eee(struct net_device *dev, struct ethtool_eee *data)
 		goto out;
 	}
 
-	if (dev->phydev->autoneg == AUTONEG_DISABLE ||
-	    dev->phydev->duplex != DUPLEX_FULL) {
-		ret = -EPROTONOSUPPORT;
-		goto out;
-	}
-
 	ret = phy_ethtool_set_eee(tp->phydev, data);
 
 	if (!ret)
-- 
cgit v1.2.3-59-g8ed1b


From 9b65d2ffe853e4cf81585eaf60ce00237b277dc0 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Thu, 14 May 2020 23:44:07 +0200
Subject: r8169: don't include linux/moduleparam.h

93882c6f210a ("r8169: switch from netif_xxx message functions to
netdev_xxx") removed the last module parameter from the driver,
therefore there's no need any longer to include linux/moduleparam.h.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index a2e778417144..d926583b407f 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -10,7 +10,6 @@
  */
 
 #include <linux/module.h>
-#include <linux/moduleparam.h>
 #include <linux/pci.h>
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
-- 
cgit v1.2.3-59-g8ed1b


From 6a09815428547657f3ffd2f5c31ac2a191e7fdf3 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Tue, 12 May 2020 18:30:40 +0200
Subject: samples/bpf: xdp_redirect_cpu: Set MAX_CPUS according to NR_CPUS

xdp_redirect_cpu is currently failing in bpf_prog_load_xattr()
allocating cpu_map map if CONFIG_NR_CPUS is less than 64 since
cpu_map_alloc() requires max_entries to be less than NR_CPUS.
Set cpu_map max_entries according to NR_CPUS in xdp_redirect_cpu_kern.c
and get currently running cpus in xdp_redirect_cpu_user.c

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/374472755001c260158c4e4b22f193bdd3c56fb7.1589300442.git.lorenzo@kernel.org
---
 samples/bpf/xdp_redirect_cpu_kern.c |  2 +-
 samples/bpf/xdp_redirect_cpu_user.c | 29 ++++++++++++++++-------------
 2 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/samples/bpf/xdp_redirect_cpu_kern.c b/samples/bpf/xdp_redirect_cpu_kern.c
index 313a8fe6d125..2baf8db1f7e7 100644
--- a/samples/bpf/xdp_redirect_cpu_kern.c
+++ b/samples/bpf/xdp_redirect_cpu_kern.c
@@ -15,7 +15,7 @@
 #include <bpf/bpf_helpers.h>
 #include "hash_func01.h"
 
-#define MAX_CPUS 64 /* WARNING - sync with _user.c */
+#define MAX_CPUS NR_CPUS
 
 /* Special map type that can XDP_REDIRECT frames to another CPU */
 struct {
diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c
index 15bdf047a222..9b8f21abeac4 100644
--- a/samples/bpf/xdp_redirect_cpu_user.c
+++ b/samples/bpf/xdp_redirect_cpu_user.c
@@ -13,6 +13,7 @@ static const char *__doc__ =
 #include <unistd.h>
 #include <locale.h>
 #include <sys/resource.h>
+#include <sys/sysinfo.h>
 #include <getopt.h>
 #include <net/if.h>
 #include <time.h>
@@ -24,8 +25,6 @@ static const char *__doc__ =
 #include <arpa/inet.h>
 #include <linux/if_link.h>
 
-#define MAX_CPUS 64 /* WARNING - sync with _kern.c */
-
 /* How many xdp_progs are defined in _kern.c */
 #define MAX_PROG 6
 
@@ -40,6 +39,7 @@ static char *ifname;
 static __u32 prog_id;
 
 static __u32 xdp_flags = XDP_FLAGS_UPDATE_IF_NOEXIST;
+static int n_cpus;
 static int cpu_map_fd;
 static int rx_cnt_map_fd;
 static int redirect_err_cnt_map_fd;
@@ -170,7 +170,7 @@ struct stats_record {
 	struct record redir_err;
 	struct record kthread;
 	struct record exception;
-	struct record enq[MAX_CPUS];
+	struct record enq[];
 };
 
 static bool map_collect_percpu(int fd, __u32 key, struct record *rec)
@@ -225,10 +225,11 @@ static struct datarec *alloc_record_per_cpu(void)
 static struct stats_record *alloc_stats_record(void)
 {
 	struct stats_record *rec;
-	int i;
+	int i, size;
 
-	rec = malloc(sizeof(*rec));
-	memset(rec, 0, sizeof(*rec));
+	size = sizeof(*rec) + n_cpus * sizeof(struct record);
+	rec = malloc(size);
+	memset(rec, 0, size);
 	if (!rec) {
 		fprintf(stderr, "Mem alloc error\n");
 		exit(EXIT_FAIL_MEM);
@@ -237,7 +238,7 @@ static struct stats_record *alloc_stats_record(void)
 	rec->redir_err.cpu = alloc_record_per_cpu();
 	rec->kthread.cpu   = alloc_record_per_cpu();
 	rec->exception.cpu = alloc_record_per_cpu();
-	for (i = 0; i < MAX_CPUS; i++)
+	for (i = 0; i < n_cpus; i++)
 		rec->enq[i].cpu = alloc_record_per_cpu();
 
 	return rec;
@@ -247,7 +248,7 @@ static void free_stats_record(struct stats_record *r)
 {
 	int i;
 
-	for (i = 0; i < MAX_CPUS; i++)
+	for (i = 0; i < n_cpus; i++)
 		free(r->enq[i].cpu);
 	free(r->exception.cpu);
 	free(r->kthread.cpu);
@@ -350,7 +351,7 @@ static void stats_print(struct stats_record *stats_rec,
 	}
 
 	/* cpumap enqueue stats */
-	for (to_cpu = 0; to_cpu < MAX_CPUS; to_cpu++) {
+	for (to_cpu = 0; to_cpu < n_cpus; to_cpu++) {
 		char *fmt = "%-15s %3d:%-3d %'-14.0f %'-11.0f %'-10.2f %s\n";
 		char *fm2 = "%-15s %3s:%-3d %'-14.0f %'-11.0f %'-10.2f %s\n";
 		char *errstr = "";
@@ -475,7 +476,7 @@ static void stats_collect(struct stats_record *rec)
 	map_collect_percpu(fd, 1, &rec->redir_err);
 
 	fd = cpumap_enqueue_cnt_map_fd;
-	for (i = 0; i < MAX_CPUS; i++)
+	for (i = 0; i < n_cpus; i++)
 		map_collect_percpu(fd, i, &rec->enq[i]);
 
 	fd = cpumap_kthread_cnt_map_fd;
@@ -549,10 +550,10 @@ static int create_cpu_entry(__u32 cpu, __u32 queue_size,
  */
 static void mark_cpus_unavailable(void)
 {
-	__u32 invalid_cpu = MAX_CPUS;
+	__u32 invalid_cpu = n_cpus;
 	int ret, i;
 
-	for (i = 0; i < MAX_CPUS; i++) {
+	for (i = 0; i < n_cpus; i++) {
 		ret = bpf_map_update_elem(cpus_available_map_fd, &i,
 					  &invalid_cpu, 0);
 		if (ret) {
@@ -688,6 +689,8 @@ int main(int argc, char **argv)
 	int prog_fd;
 	__u32 qsize;
 
+	n_cpus = get_nprocs_conf();
+
 	/* Notice: choosing he queue size is very important with the
 	 * ixgbe driver, because it's driver page recycling trick is
 	 * dependend on pages being returned quickly.  The number of
@@ -757,7 +760,7 @@ int main(int argc, char **argv)
 		case 'c':
 			/* Add multiple CPUs */
 			add_cpu = strtoul(optarg, NULL, 0);
-			if (add_cpu >= MAX_CPUS) {
+			if (add_cpu >= n_cpus) {
 				fprintf(stderr,
 				"--cpu nr too large for cpumap err(%d):%s\n",
 					errno, strerror(errno));
-- 
cgit v1.2.3-59-g8ed1b


From 7aebfa1b3885b5aa29fcb4a596d0485ac463bbe8 Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Wed, 13 May 2020 18:50:27 -0700
Subject: bpf: Support narrow loads from bpf_sock_addr.user_port

bpf_sock_addr.user_port supports only 4-byte load and it leads to ugly
code in BPF programs, like:

	volatile __u32 user_port = ctx->user_port;
	__u16 port = bpf_ntohs(user_port);

Since otherwise clang may optimize the load to be 2-byte and it's
rejected by verifier.

Add support for 1- and 2-byte loads same way as it's supported for other
fields in bpf_sock_addr like user_ip4, msg_src_ip4, etc.

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/c1e983f4c17573032601d0b2b1f9d1274f24bc16.1589420814.git.rdna@fb.com
---
 include/uapi/linux/bpf.h       |  2 +-
 net/core/filter.c              | 15 +++++++--------
 tools/include/uapi/linux/bpf.h |  2 +-
 3 files changed, 9 insertions(+), 10 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index bfb31c1be219..85cfdffde182 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3728,7 +3728,7 @@ struct bpf_sock_addr {
 	__u32 user_ip6[4];	/* Allows 1,2,4,8-byte read and 4,8-byte write.
 				 * Stored in network byte order.
 				 */
-	__u32 user_port;	/* Allows 4-byte read and write.
+	__u32 user_port;	/* Allows 1,2,4-byte read and 4-byte write.
 				 * Stored in network byte order
 				 */
 	__u32 family;		/* Allows 4-byte read, but no write */
diff --git a/net/core/filter.c b/net/core/filter.c
index da0634979f53..1fe8c0c2d408 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -7029,6 +7029,7 @@ static bool sock_addr_is_valid_access(int off, int size,
 	case bpf_ctx_range(struct bpf_sock_addr, msg_src_ip4):
 	case bpf_ctx_range_till(struct bpf_sock_addr, msg_src_ip6[0],
 				msg_src_ip6[3]):
+	case bpf_ctx_range(struct bpf_sock_addr, user_port):
 		if (type == BPF_READ) {
 			bpf_ctx_record_field_size(info, size_default);
 
@@ -7059,10 +7060,6 @@ static bool sock_addr_is_valid_access(int off, int size,
 				return false;
 		}
 		break;
-	case bpf_ctx_range(struct bpf_sock_addr, user_port):
-		if (size != size_default)
-			return false;
-		break;
 	case offsetof(struct bpf_sock_addr, sk):
 		if (type != BPF_READ)
 			return false;
@@ -7958,8 +7955,8 @@ static u32 sock_addr_convert_ctx_access(enum bpf_access_type type,
 					struct bpf_insn *insn_buf,
 					struct bpf_prog *prog, u32 *target_size)
 {
+	int off, port_size = sizeof_field(struct sockaddr_in6, sin6_port);
 	struct bpf_insn *insn = insn_buf;
-	int off;
 
 	switch (si->off) {
 	case offsetof(struct bpf_sock_addr, user_family):
@@ -7994,9 +7991,11 @@ static u32 sock_addr_convert_ctx_access(enum bpf_access_type type,
 			     offsetof(struct sockaddr_in6, sin6_port));
 		BUILD_BUG_ON(sizeof_field(struct sockaddr_in, sin_port) !=
 			     sizeof_field(struct sockaddr_in6, sin6_port));
-		SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD(struct bpf_sock_addr_kern,
-						     struct sockaddr_in6, uaddr,
-						     sin6_port, tmp_reg);
+		/* Account for sin6_port being smaller than user_port. */
+		port_size = min(port_size, BPF_LDST_BYTES(si));
+		SOCK_ADDR_LOAD_OR_STORE_NESTED_FIELD_SIZE_OFF(
+			struct bpf_sock_addr_kern, struct sockaddr_in6, uaddr,
+			sin6_port, bytes_to_bpf_size(port_size), 0, tmp_reg);
 		break;
 
 	case offsetof(struct bpf_sock_addr, family):
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index bfb31c1be219..85cfdffde182 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3728,7 +3728,7 @@ struct bpf_sock_addr {
 	__u32 user_ip6[4];	/* Allows 1,2,4,8-byte read and 4,8-byte write.
 				 * Stored in network byte order.
 				 */
-	__u32 user_port;	/* Allows 4-byte read and write.
+	__u32 user_port;	/* Allows 1,2,4-byte read and 4-byte write.
 				 * Stored in network byte order
 				 */
 	__u32 family;		/* Allows 4-byte read, but no write */
-- 
cgit v1.2.3-59-g8ed1b


From 0645f7eb6f6af78aba2bdd37ae776bd8754bc8f0 Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Wed, 13 May 2020 18:50:28 -0700
Subject: selftests/bpf: Test narrow loads for bpf_sock_addr.user_port

Test 1,2,4-byte loads from bpf_sock_addr.user_port in sock_addr
programs.

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/e5c734a58cca4041ab30cb5471e644246f8cdb5a.1589420814.git.rdna@fb.com
---
 tools/testing/selftests/bpf/test_sock_addr.c | 38 ++++++++++++++++++++--------
 1 file changed, 28 insertions(+), 10 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_sock_addr.c b/tools/testing/selftests/bpf/test_sock_addr.c
index 61fd95b89af8..0358814c67dc 100644
--- a/tools/testing/selftests/bpf/test_sock_addr.c
+++ b/tools/testing/selftests/bpf/test_sock_addr.c
@@ -677,7 +677,7 @@ static int bind4_prog_load(const struct sock_addr_test *test)
 		uint8_t u4_addr8[4];
 		uint16_t u4_addr16[2];
 		uint32_t u4_addr32;
-	} ip4;
+	} ip4, port;
 	struct sockaddr_in addr4_rw;
 
 	if (inet_pton(AF_INET, SERV4_IP, (void *)&ip4) != 1) {
@@ -685,6 +685,8 @@ static int bind4_prog_load(const struct sock_addr_test *test)
 		return -1;
 	}
 
+	port.u4_addr32 = htons(SERV4_PORT);
+
 	if (mk_sockaddr(AF_INET, SERV4_REWRITE_IP, SERV4_REWRITE_PORT,
 			(struct sockaddr *)&addr4_rw, sizeof(addr4_rw)) == -1)
 		return -1;
@@ -696,49 +698,65 @@ static int bind4_prog_load(const struct sock_addr_test *test)
 		/* if (sk.family == AF_INET && */
 		BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
 			    offsetof(struct bpf_sock_addr, family)),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET, 24),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, AF_INET, 32),
 
 		/*     (sk.type == SOCK_DGRAM || sk.type == SOCK_STREAM) && */
 		BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
 			    offsetof(struct bpf_sock_addr, type)),
 		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_DGRAM, 1),
 		BPF_JMP_A(1),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_STREAM, 20),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, SOCK_STREAM, 28),
 
 		/*     1st_byte_of_user_ip4 == expected && */
 		BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
 			    offsetof(struct bpf_sock_addr, user_ip4)),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[0], 18),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[0], 26),
 
 		/*     2nd_byte_of_user_ip4 == expected && */
 		BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
 			    offsetof(struct bpf_sock_addr, user_ip4) + 1),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[1], 16),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[1], 24),
 
 		/*     3rd_byte_of_user_ip4 == expected && */
 		BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
 			    offsetof(struct bpf_sock_addr, user_ip4) + 2),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[2], 14),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[2], 22),
 
 		/*     4th_byte_of_user_ip4 == expected && */
 		BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
 			    offsetof(struct bpf_sock_addr, user_ip4) + 3),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[3], 12),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr8[3], 20),
 
 		/*     1st_half_of_user_ip4 == expected && */
 		BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6,
 			    offsetof(struct bpf_sock_addr, user_ip4)),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[0], 10),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[0], 18),
 
 		/*     2nd_half_of_user_ip4 == expected && */
 		BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6,
 			    offsetof(struct bpf_sock_addr, user_ip4) + 2),
-		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[1], 8),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, ip4.u4_addr16[1], 16),
 
-		/*     whole_user_ip4 == expected) { */
+		/*     whole_user_ip4 == expected && */
 		BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
 			    offsetof(struct bpf_sock_addr, user_ip4)),
 		BPF_LD_IMM64(BPF_REG_8, ip4.u4_addr32), /* See [2]. */
+		BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_8, 12),
+
+		/*     1st_byte_of_user_port == expected && */
+		BPF_LDX_MEM(BPF_B, BPF_REG_7, BPF_REG_6,
+			    offsetof(struct bpf_sock_addr, user_port)),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, port.u4_addr8[0], 10),
+
+		/*     1st_half_of_user_port == expected && */
+		BPF_LDX_MEM(BPF_H, BPF_REG_7, BPF_REG_6,
+			    offsetof(struct bpf_sock_addr, user_port)),
+		BPF_JMP_IMM(BPF_JNE, BPF_REG_7, port.u4_addr16[0], 8),
+
+		/*     user_port == expected) { */
+		BPF_LDX_MEM(BPF_W, BPF_REG_7, BPF_REG_6,
+			    offsetof(struct bpf_sock_addr, user_port)),
+		BPF_LD_IMM64(BPF_REG_8, port.u4_addr32), /* See [2]. */
 		BPF_JMP_REG(BPF_JNE, BPF_REG_7, BPF_REG_8, 4),
 
 		/*      user_ip4 = addr4_rw.sin_addr */
-- 
cgit v1.2.3-59-g8ed1b


From c70f34a8ac66c2cb05593ef5760142e5f862a9b4 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Wed, 13 May 2020 22:51:37 -0700
Subject: bpf: Fix bpf_iter's task iterator logic

task_seq_get_next might stop prematurely if get_pid_task() fails to get
task_struct. Failure to do so doesn't mean that there are no more tasks with
higher pids. Procfs's iteration algorithm (see next_tgid in fs/proc/base.c)
does a retry in such case. After this fix, instead of stopping prematurely
after about 300 tasks on my server, bpf_iter program now returns >4000, which
sounds much closer to reality.

Fixes: eaaacd23910f ("bpf: Add task and task/file iterator targets")
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200514055137.1564581-1-andriin@fb.com
---
 kernel/bpf/task_iter.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/kernel/bpf/task_iter.c b/kernel/bpf/task_iter.c
index a9b7264dda08..4dbf2b6035f8 100644
--- a/kernel/bpf/task_iter.c
+++ b/kernel/bpf/task_iter.c
@@ -27,9 +27,15 @@ static struct task_struct *task_seq_get_next(struct pid_namespace *ns,
 	struct pid *pid;
 
 	rcu_read_lock();
+retry:
 	pid = idr_get_next(&ns->idr, tid);
-	if (pid)
+	if (pid) {
 		task = get_pid_task(pid, PIDTYPE_PID);
+		if (!task) {
+			++*tid;
+			goto retry;
+		}
+	}
 	rcu_read_unlock();
 
 	return task;
-- 
cgit v1.2.3-59-g8ed1b


From 5b0004d92b4511c39db0df23aa84395722f1d706 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 14 May 2020 13:15:29 +0100
Subject: selftest/bpf: Fix spelling mistake "SIGALARM" -> "SIGALRM"

There is a spelling mistake in an error message, fix it.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200514121529.259668-1-colin.king@canonical.com
---
 tools/testing/selftests/bpf/bench.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index 8c0dfbfe6088..14390689ef90 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -242,7 +242,7 @@ static void setup_timer()
 	last_time_ns = get_time_ns();
 	err = sigaction(SIGALRM, &sigalarm_action, NULL);
 	if (err < 0) {
-		fprintf(stderr, "failed to install SIGALARM handler: %d\n", -errno);
+		fprintf(stderr, "failed to install SIGALRM handler: %d\n", -errno);
 		exit(1);
 	}
 	timer_settings.it_interval.tv_sec = 1;
-- 
cgit v1.2.3-59-g8ed1b


From d56c2f95adb3d401bf982b6cf8fc4bb6d2f7acdd Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Thu, 14 May 2020 13:03:45 -0700
Subject: bpf: Allow sk lookup helpers in cgroup skb

Currently sk lookup helpers are allowed in tc, xdp, sk skb, and cgroup
sock_addr programs.

But they would be useful in cgroup skb as well so that for example
cgroup skb ingress program can lookup a peer socket a packet comes from
on same host and make a decision whether to allow or deny this packet
based on the properties of that socket, e.g. cgroup that peer socket
belongs to.

Allow the following sk lookup helpers in cgroup skb:
* bpf_sk_lookup_tcp;
* bpf_sk_lookup_udp;
* bpf_sk_release;
* bpf_skc_lookup_tcp.

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/f8c7ee280f1582b586629436d777b6db00597d63.1589486450.git.rdna@fb.com
---
 net/core/filter.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/net/core/filter.c b/net/core/filter.c
index 1fe8c0c2d408..9c3eada5c86c 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -6159,6 +6159,14 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_skb_cgroup_id_proto;
 #endif
 #ifdef CONFIG_INET
+	case BPF_FUNC_sk_lookup_tcp:
+		return &bpf_sk_lookup_tcp_proto;
+	case BPF_FUNC_sk_lookup_udp:
+		return &bpf_sk_lookup_udp_proto;
+	case BPF_FUNC_sk_release:
+		return &bpf_sk_release_proto;
+	case BPF_FUNC_skc_lookup_tcp:
+		return &bpf_skc_lookup_tcp_proto;
 	case BPF_FUNC_tcp_sock:
 		return &bpf_tcp_sock_proto;
 	case BPF_FUNC_get_listener_sock:
-- 
cgit v1.2.3-59-g8ed1b


From 06d3e4c9f11afc849dc201ecf9ef7a43eeb1dddd Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Thu, 14 May 2020 13:03:46 -0700
Subject: bpf: Allow skb_ancestor_cgroup_id helper in cgroup skb

cgroup skb programs already can use bpf_skb_cgroup_id. Allow
bpf_skb_ancestor_cgroup_id as well so that container policies can be
implemented for a container that can have sub-cgroups dynamically
created, but policies should still be implemented based on cgroup id of
container itself not on an id of a sub-cgroup.

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/8874194d6041eba190356453ea9f6071edf5f658.1589486450.git.rdna@fb.com
---
 net/core/filter.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/net/core/filter.c b/net/core/filter.c
index 9c3eada5c86c..a47dc5b9dad4 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -6157,6 +6157,8 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 #ifdef CONFIG_SOCK_CGROUP_DATA
 	case BPF_FUNC_skb_cgroup_id:
 		return &bpf_skb_cgroup_id_proto;
+	case BPF_FUNC_skb_ancestor_cgroup_id:
+		return &bpf_skb_ancestor_cgroup_id_proto;
 #endif
 #ifdef CONFIG_INET
 	case BPF_FUNC_sk_lookup_tcp:
-- 
cgit v1.2.3-59-g8ed1b


From f307fa2cb4c935f7f1ff0aeb880c7b44fb9a642b Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Thu, 14 May 2020 13:03:47 -0700
Subject: bpf: Introduce bpf_sk_{, ancestor_}cgroup_id helpers

With having ability to lookup sockets in cgroup skb programs it becomes
useful to access cgroup id of retrieved sockets so that policies can be
implemented based on origin cgroup of such socket.

For example, a container running in a cgroup can have cgroup skb ingress
program that can lookup peer socket that is sending packets to a process
inside the container and decide whether those packets should be allowed
or denied based on cgroup id of the peer.

More specifically such ingress program can implement intra-host policy
"allow incoming packets only from this same container and not from any
other container on same host" w/o relying on source IP addresses since
quite often it can be the case that containers share same IP address on
the host.

Introduce two new helpers for this use-case: bpf_sk_cgroup_id() and
bpf_sk_ancestor_cgroup_id().

These helpers are similar to existing bpf_skb_{,ancestor_}cgroup_id
helpers with the only difference that sk is used to get cgroup id
instead of skb, and share code with them.

See documentation in UAPI for more details.

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/f5884981249ce911f63e9b57ecd5d7d19154ff39.1589486450.git.rdna@fb.com
---
 include/uapi/linux/bpf.h       | 36 ++++++++++++++++++++++++-
 net/core/filter.c              | 60 +++++++++++++++++++++++++++++++++++-------
 tools/include/uapi/linux/bpf.h | 36 ++++++++++++++++++++++++-
 3 files changed, 121 insertions(+), 11 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 85cfdffde182..146c742f1d49 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3121,6 +3121,38 @@ union bpf_attr {
  * 		0 on success, or a negative error in case of failure:
  *
  *		**-EOVERFLOW** if an overflow happened: The same object will be tried again.
+ *
+ * u64 bpf_sk_cgroup_id(struct bpf_sock *sk)
+ *	Description
+ *		Return the cgroup v2 id of the socket *sk*.
+ *
+ *		*sk* must be a non-**NULL** pointer to a full socket, e.g. one
+ *		returned from **bpf_sk_lookup_xxx**\ (),
+ *		**bpf_sk_fullsock**\ (), etc. The format of returned id is
+ *		same as in **bpf_skb_cgroup_id**\ ().
+ *
+ *		This helper is available only if the kernel was compiled with
+ *		the **CONFIG_SOCK_CGROUP_DATA** configuration option.
+ *	Return
+ *		The id is returned or 0 in case the id could not be retrieved.
+ *
+ * u64 bpf_sk_ancestor_cgroup_id(struct bpf_sock *sk, int ancestor_level)
+ *	Description
+ *		Return id of cgroup v2 that is ancestor of cgroup associated
+ *		with the *sk* at the *ancestor_level*.  The root cgroup is at
+ *		*ancestor_level* zero and each step down the hierarchy
+ *		increments the level. If *ancestor_level* == level of cgroup
+ *		associated with *sk*, then return value will be same as that
+ *		of **bpf_sk_cgroup_id**\ ().
+ *
+ *		The helper is useful to implement policies based on cgroups
+ *		that are upper in hierarchy than immediate cgroup associated
+ *		with *sk*.
+ *
+ *		The format of returned id and helper limitations are same as in
+ *		**bpf_sk_cgroup_id**\ ().
+ *	Return
+ *		The id is returned or 0 in case the id could not be retrieved.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3250,7 +3282,9 @@ union bpf_attr {
 	FN(sk_assign),			\
 	FN(ktime_get_boot_ns),		\
 	FN(seq_printf),			\
-	FN(seq_write),
+	FN(seq_write),			\
+	FN(sk_cgroup_id),		\
+	FN(sk_ancestor_cgroup_id),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
diff --git a/net/core/filter.c b/net/core/filter.c
index a47dc5b9dad4..5815902bb617 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4003,16 +4003,22 @@ static const struct bpf_func_proto bpf_skb_under_cgroup_proto = {
 };
 
 #ifdef CONFIG_SOCK_CGROUP_DATA
+static inline u64 __bpf_sk_cgroup_id(struct sock *sk)
+{
+	struct cgroup *cgrp;
+
+	cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
+	return cgroup_id(cgrp);
+}
+
 BPF_CALL_1(bpf_skb_cgroup_id, const struct sk_buff *, skb)
 {
 	struct sock *sk = skb_to_full_sk(skb);
-	struct cgroup *cgrp;
 
 	if (!sk || !sk_fullsock(sk))
 		return 0;
 
-	cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
-	return cgroup_id(cgrp);
+	return __bpf_sk_cgroup_id(sk);
 }
 
 static const struct bpf_func_proto bpf_skb_cgroup_id_proto = {
@@ -4022,16 +4028,12 @@ static const struct bpf_func_proto bpf_skb_cgroup_id_proto = {
 	.arg1_type      = ARG_PTR_TO_CTX,
 };
 
-BPF_CALL_2(bpf_skb_ancestor_cgroup_id, const struct sk_buff *, skb, int,
-	   ancestor_level)
+static inline u64 __bpf_sk_ancestor_cgroup_id(struct sock *sk,
+					      int ancestor_level)
 {
-	struct sock *sk = skb_to_full_sk(skb);
 	struct cgroup *ancestor;
 	struct cgroup *cgrp;
 
-	if (!sk || !sk_fullsock(sk))
-		return 0;
-
 	cgrp = sock_cgroup_ptr(&sk->sk_cgrp_data);
 	ancestor = cgroup_ancestor(cgrp, ancestor_level);
 	if (!ancestor)
@@ -4040,6 +4042,17 @@ BPF_CALL_2(bpf_skb_ancestor_cgroup_id, const struct sk_buff *, skb, int,
 	return cgroup_id(ancestor);
 }
 
+BPF_CALL_2(bpf_skb_ancestor_cgroup_id, const struct sk_buff *, skb, int,
+	   ancestor_level)
+{
+	struct sock *sk = skb_to_full_sk(skb);
+
+	if (!sk || !sk_fullsock(sk))
+		return 0;
+
+	return __bpf_sk_ancestor_cgroup_id(sk, ancestor_level);
+}
+
 static const struct bpf_func_proto bpf_skb_ancestor_cgroup_id_proto = {
 	.func           = bpf_skb_ancestor_cgroup_id,
 	.gpl_only       = false,
@@ -4047,6 +4060,31 @@ static const struct bpf_func_proto bpf_skb_ancestor_cgroup_id_proto = {
 	.arg1_type      = ARG_PTR_TO_CTX,
 	.arg2_type      = ARG_ANYTHING,
 };
+
+BPF_CALL_1(bpf_sk_cgroup_id, struct sock *, sk)
+{
+	return __bpf_sk_cgroup_id(sk);
+}
+
+static const struct bpf_func_proto bpf_sk_cgroup_id_proto = {
+	.func           = bpf_sk_cgroup_id,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type      = ARG_PTR_TO_SOCKET,
+};
+
+BPF_CALL_2(bpf_sk_ancestor_cgroup_id, struct sock *, sk, int, ancestor_level)
+{
+	return __bpf_sk_ancestor_cgroup_id(sk, ancestor_level);
+}
+
+static const struct bpf_func_proto bpf_sk_ancestor_cgroup_id_proto = {
+	.func           = bpf_sk_ancestor_cgroup_id,
+	.gpl_only       = false,
+	.ret_type       = RET_INTEGER,
+	.arg1_type      = ARG_PTR_TO_SOCKET,
+	.arg2_type      = ARG_ANYTHING,
+};
 #endif
 
 static unsigned long bpf_xdp_copy(void *dst_buff, const void *src_buff,
@@ -6159,6 +6197,10 @@ cg_skb_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_skb_cgroup_id_proto;
 	case BPF_FUNC_skb_ancestor_cgroup_id:
 		return &bpf_skb_ancestor_cgroup_id_proto;
+	case BPF_FUNC_sk_cgroup_id:
+		return &bpf_sk_cgroup_id_proto;
+	case BPF_FUNC_sk_ancestor_cgroup_id:
+		return &bpf_sk_ancestor_cgroup_id_proto;
 #endif
 #ifdef CONFIG_INET
 	case BPF_FUNC_sk_lookup_tcp:
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 85cfdffde182..146c742f1d49 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3121,6 +3121,38 @@ union bpf_attr {
  * 		0 on success, or a negative error in case of failure:
  *
  *		**-EOVERFLOW** if an overflow happened: The same object will be tried again.
+ *
+ * u64 bpf_sk_cgroup_id(struct bpf_sock *sk)
+ *	Description
+ *		Return the cgroup v2 id of the socket *sk*.
+ *
+ *		*sk* must be a non-**NULL** pointer to a full socket, e.g. one
+ *		returned from **bpf_sk_lookup_xxx**\ (),
+ *		**bpf_sk_fullsock**\ (), etc. The format of returned id is
+ *		same as in **bpf_skb_cgroup_id**\ ().
+ *
+ *		This helper is available only if the kernel was compiled with
+ *		the **CONFIG_SOCK_CGROUP_DATA** configuration option.
+ *	Return
+ *		The id is returned or 0 in case the id could not be retrieved.
+ *
+ * u64 bpf_sk_ancestor_cgroup_id(struct bpf_sock *sk, int ancestor_level)
+ *	Description
+ *		Return id of cgroup v2 that is ancestor of cgroup associated
+ *		with the *sk* at the *ancestor_level*.  The root cgroup is at
+ *		*ancestor_level* zero and each step down the hierarchy
+ *		increments the level. If *ancestor_level* == level of cgroup
+ *		associated with *sk*, then return value will be same as that
+ *		of **bpf_sk_cgroup_id**\ ().
+ *
+ *		The helper is useful to implement policies based on cgroups
+ *		that are upper in hierarchy than immediate cgroup associated
+ *		with *sk*.
+ *
+ *		The format of returned id and helper limitations are same as in
+ *		**bpf_sk_cgroup_id**\ ().
+ *	Return
+ *		The id is returned or 0 in case the id could not be retrieved.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3250,7 +3282,9 @@ union bpf_attr {
 	FN(sk_assign),			\
 	FN(ktime_get_boot_ns),		\
 	FN(seq_printf),			\
-	FN(seq_write),
+	FN(seq_write),			\
+	FN(sk_cgroup_id),		\
+	FN(sk_ancestor_cgroup_id),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
-- 
cgit v1.2.3-59-g8ed1b


From 383724e17ab02d8e440def7792c4e151b13ef4d4 Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Thu, 14 May 2020 13:03:48 -0700
Subject: selftests/bpf: Add connect_fd_to_fd, connect_wait net helpers

Add two new network helpers.

connect_fd_to_fd connects an already created client socket fd to address
of server fd. Sometimes it's useful to separate client socket creation
and connecting this socket to a server, e.g. if client socket has to be
created in a cgroup different from that of server cgroup.

Additionally connect_to_fd is now implemented using connect_fd_to_fd,
both helpers don't treat EINPROGRESS as an error and let caller decide
how to proceed with it.

connect_wait is a helper to work with non-blocking client sockets so
that if connect_to_fd or connect_fd_to_fd returned -1 with errno ==
EINPROGRESS, caller can wait for connect to finish or for connection
timeout. The helper returns -1 on error, 0 on timeout (1sec,
hard-coded), and positive number on success.

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/1403fab72300f379ca97ead4820ae43eac4414ef.1589486450.git.rdna@fb.com
---
 tools/testing/selftests/bpf/network_helpers.c | 74 ++++++++++++++++++++++-----
 tools/testing/selftests/bpf/network_helpers.h |  2 +
 2 files changed, 63 insertions(+), 13 deletions(-)

diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index 0ff64b70b746..999a775484c1 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -4,10 +4,14 @@
 #include <stdio.h>
 #include <string.h>
 #include <unistd.h>
+
+#include <sys/epoll.h>
+
 #include <linux/err.h>
 #include <linux/in.h>
 #include <linux/in6.h>
 
+#include "bpf_util.h"
 #include "network_helpers.h"
 
 #define clean_errno() (errno == 0 ? "None" : strerror(errno))
@@ -77,9 +81,7 @@ static const size_t timeo_optlen = sizeof(timeo_sec);
 
 int connect_to_fd(int family, int type, int server_fd)
 {
-	struct sockaddr_storage addr;
-	socklen_t len = sizeof(addr);
-	int fd;
+	int fd, save_errno;
 
 	fd = socket(family, type, 0);
 	if (fd < 0) {
@@ -87,24 +89,70 @@ int connect_to_fd(int family, int type, int server_fd)
 		return -1;
 	}
 
-	if (setsockopt(fd, SOL_SOCKET, SO_RCVTIMEO, &timeo_sec, timeo_optlen)) {
+	if (connect_fd_to_fd(fd, server_fd) < 0 && errno != EINPROGRESS) {
+		save_errno = errno;
+		close(fd);
+		errno = save_errno;
+		return -1;
+	}
+
+	return fd;
+}
+
+int connect_fd_to_fd(int client_fd, int server_fd)
+{
+	struct sockaddr_storage addr;
+	socklen_t len = sizeof(addr);
+	int save_errno;
+
+	if (setsockopt(client_fd, SOL_SOCKET, SO_RCVTIMEO, &timeo_sec,
+		       timeo_optlen)) {
 		log_err("Failed to set SO_RCVTIMEO");
-		goto out;
+		return -1;
 	}
 
 	if (getsockname(server_fd, (struct sockaddr *)&addr, &len)) {
 		log_err("Failed to get server addr");
-		goto out;
+		return -1;
 	}
 
-	if (connect(fd, (const struct sockaddr *)&addr, len) < 0) {
-		log_err("Fail to connect to server with family %d", family);
-		goto out;
+	if (connect(client_fd, (const struct sockaddr *)&addr, len) < 0) {
+		if (errno != EINPROGRESS) {
+			save_errno = errno;
+			log_err("Failed to connect to server");
+			errno = save_errno;
+		}
+		return -1;
 	}
 
-	return fd;
+	return 0;
+}
+
+int connect_wait(int fd)
+{
+	struct epoll_event ev = {}, events[2];
+	int timeout_ms = 1000;
+	int efd, nfd;
+
+	efd = epoll_create1(EPOLL_CLOEXEC);
+	if (efd < 0) {
+		log_err("Failed to open epoll fd");
+		return -1;
+	}
+
+	ev.events = EPOLLRDHUP | EPOLLOUT;
+	ev.data.fd = fd;
+
+	if (epoll_ctl(efd, EPOLL_CTL_ADD, fd, &ev) < 0) {
+		log_err("Failed to register fd=%d on epoll fd=%d", fd, efd);
+		close(efd);
+		return -1;
+	}
+
+	nfd = epoll_wait(efd, events, ARRAY_SIZE(events), timeout_ms);
+	if (nfd < 0)
+		log_err("Failed to wait for I/O event on epoll fd=%d", efd);
 
-out:
-	close(fd);
-	return -1;
+	close(efd);
+	return nfd;
 }
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
index a0be7db4f67d..86914e6e7b53 100644
--- a/tools/testing/selftests/bpf/network_helpers.h
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -35,5 +35,7 @@ extern struct ipv6_packet pkt_v6;
 
 int start_server(int family, int type);
 int connect_to_fd(int family, int type, int server_fd);
+int connect_fd_to_fd(int client_fd, int server_fd);
+int connect_wait(int client_fd);
 
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From 68e916bc8d3211ffe0b4c418184ab1b57398200c Mon Sep 17 00:00:00 2001
From: Andrey Ignatov <rdna@fb.com>
Date: Thu, 14 May 2020 13:03:49 -0700
Subject: selftests/bpf: Test for sk helpers in cgroup skb

Test bpf_sk_lookup_tcp, bpf_sk_release, bpf_sk_cgroup_id and
bpf_sk_ancestor_cgroup_id helpers from cgroup skb program.

The test creates a testing cgroup, starts a TCPv6 server inside the
cgroup and creates two client sockets: one inside testing cgroup and one
outside.

Then it attaches cgroup skb program to the cgroup that checks all TCP
segments coming to the server and allows only those coming from the
cgroup of the server. If a segment comes from a peer outside of the
cgroup, it'll be dropped.

Finally the test checks that client from inside testing cgroup can
successfully connect to the server, but client outside the cgroup fails
to connect by timeout.

The main goal of the test is to check newly introduced
bpf_sk_{,ancestor_}cgroup_id helpers.

It also checks a couple of socket lookup helpers (tcp & release), but
lookup helpers were introduced much earlier and covered by other tests.
Here it's mostly checked that they can be called from cgroup skb.

Signed-off-by: Andrey Ignatov <rdna@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/171f4c5d75e8ff4fe1c4e8c1c12288b5240a4549.1589486450.git.rdna@fb.com
---
 .../bpf/prog_tests/cgroup_skb_sk_lookup.c          | 95 +++++++++++++++++++++
 .../bpf/progs/cgroup_skb_sk_lookup_kern.c          | 97 ++++++++++++++++++++++
 2 files changed, 192 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c
 create mode 100644 tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c

diff --git a/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c
new file mode 100644
index 000000000000..059047af7df3
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/cgroup_skb_sk_lookup.c
@@ -0,0 +1,95 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <test_progs.h>
+
+#include "network_helpers.h"
+#include "cgroup_skb_sk_lookup_kern.skel.h"
+
+static void run_lookup_test(__u16 *g_serv_port, int out_sk)
+{
+	int serv_sk = -1, in_sk = -1, serv_in_sk = -1, err;
+	struct sockaddr_in6 addr = {};
+	socklen_t addr_len = sizeof(addr);
+	__u32 duration = 0;
+
+	serv_sk = start_server(AF_INET6, SOCK_STREAM);
+	if (CHECK(serv_sk < 0, "start_server", "failed to start server\n"))
+		return;
+
+	err = getsockname(serv_sk, (struct sockaddr *)&addr, &addr_len);
+	if (CHECK(err, "getsockname", "errno %d\n", errno))
+		goto cleanup;
+
+	*g_serv_port = addr.sin6_port;
+
+	/* Client outside of test cgroup should fail to connect by timeout. */
+	err = connect_fd_to_fd(out_sk, serv_sk);
+	if (CHECK(!err || errno != EINPROGRESS, "connect_fd_to_fd",
+		  "unexpected result err %d errno %d\n", err, errno))
+		goto cleanup;
+
+	err = connect_wait(out_sk);
+	if (CHECK(err, "connect_wait", "unexpected result %d\n", err))
+		goto cleanup;
+
+	/* Client inside test cgroup should connect just fine. */
+	in_sk = connect_to_fd(AF_INET6, SOCK_STREAM, serv_sk);
+	if (CHECK(in_sk < 0, "connect_to_fd", "errno %d\n", errno))
+		goto cleanup;
+
+	serv_in_sk = accept(serv_sk, NULL, NULL);
+	if (CHECK(serv_in_sk < 0, "accept", "errno %d\n", errno))
+		goto cleanup;
+
+cleanup:
+	close(serv_in_sk);
+	close(in_sk);
+	close(serv_sk);
+}
+
+static void run_cgroup_bpf_test(const char *cg_path, int out_sk)
+{
+	struct cgroup_skb_sk_lookup_kern *skel;
+	struct bpf_link *link;
+	__u32 duration = 0;
+	int cgfd = -1;
+
+	skel = cgroup_skb_sk_lookup_kern__open_and_load();
+	if (CHECK(!skel, "skel_open_load", "open_load failed\n"))
+		return;
+
+	cgfd = test__join_cgroup(cg_path);
+	if (CHECK(cgfd < 0, "cgroup_join", "cgroup setup failed\n"))
+		goto cleanup;
+
+	link = bpf_program__attach_cgroup(skel->progs.ingress_lookup, cgfd);
+	if (CHECK(IS_ERR(link), "cgroup_attach", "err: %ld\n", PTR_ERR(link)))
+		goto cleanup;
+
+	run_lookup_test(&skel->bss->g_serv_port, out_sk);
+
+	bpf_link__destroy(link);
+
+cleanup:
+	close(cgfd);
+	cgroup_skb_sk_lookup_kern__destroy(skel);
+}
+
+void test_cgroup_skb_sk_lookup(void)
+{
+	const char *cg_path = "/foo";
+	int out_sk;
+
+	/* Create a socket before joining testing cgroup so that its cgroup id
+	 * differs from that of testing cgroup. Moving selftests process to
+	 * testing cgroup won't change cgroup id of an already created socket.
+	 */
+	out_sk = socket(AF_INET6, SOCK_STREAM | SOCK_NONBLOCK, 0);
+	if (CHECK_FAIL(out_sk < 0))
+		return;
+
+	run_cgroup_bpf_test(cg_path, out_sk);
+
+	close(out_sk);
+}
diff --git a/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c
new file mode 100644
index 000000000000..3f757e30d7a0
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/cgroup_skb_sk_lookup_kern.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <bpf/bpf_endian.h>
+#include <bpf/bpf_helpers.h>
+
+#include <linux/if_ether.h>
+#include <linux/in.h>
+#include <linux/in6.h>
+#include <linux/ipv6.h>
+#include <linux/tcp.h>
+
+#include <sys/types.h>
+#include <sys/socket.h>
+
+int _version SEC("version") = 1;
+char _license[] SEC("license") = "GPL";
+
+__u16 g_serv_port = 0;
+
+static inline void set_ip(__u32 *dst, const struct in6_addr *src)
+{
+	dst[0] = src->in6_u.u6_addr32[0];
+	dst[1] = src->in6_u.u6_addr32[1];
+	dst[2] = src->in6_u.u6_addr32[2];
+	dst[3] = src->in6_u.u6_addr32[3];
+}
+
+static inline void set_tuple(struct bpf_sock_tuple *tuple,
+			     const struct ipv6hdr *ip6h,
+			     const struct tcphdr *tcph)
+{
+	set_ip(tuple->ipv6.saddr, &ip6h->daddr);
+	set_ip(tuple->ipv6.daddr, &ip6h->saddr);
+	tuple->ipv6.sport = tcph->dest;
+	tuple->ipv6.dport = tcph->source;
+}
+
+static inline int is_allowed_peer_cg(struct __sk_buff *skb,
+				     const struct ipv6hdr *ip6h,
+				     const struct tcphdr *tcph)
+{
+	__u64 cgid, acgid, peer_cgid, peer_acgid;
+	struct bpf_sock_tuple tuple;
+	size_t tuple_len = sizeof(tuple.ipv6);
+	struct bpf_sock *peer_sk;
+
+	set_tuple(&tuple, ip6h, tcph);
+
+	peer_sk = bpf_sk_lookup_tcp(skb, &tuple, tuple_len,
+				    BPF_F_CURRENT_NETNS, 0);
+	if (!peer_sk)
+		return 0;
+
+	cgid = bpf_skb_cgroup_id(skb);
+	peer_cgid = bpf_sk_cgroup_id(peer_sk);
+
+	acgid = bpf_skb_ancestor_cgroup_id(skb, 2);
+	peer_acgid = bpf_sk_ancestor_cgroup_id(peer_sk, 2);
+
+	bpf_sk_release(peer_sk);
+
+	return cgid && cgid == peer_cgid && acgid && acgid == peer_acgid;
+}
+
+SEC("cgroup_skb/ingress")
+int ingress_lookup(struct __sk_buff *skb)
+{
+	__u32 serv_port_key = 0;
+	struct ipv6hdr ip6h;
+	struct tcphdr tcph;
+
+	if (skb->protocol != bpf_htons(ETH_P_IPV6))
+		return 1;
+
+	/* For SYN packets coming to listening socket skb->remote_port will be
+	 * zero, so IPv6/TCP headers are loaded to identify remote peer
+	 * instead.
+	 */
+	if (bpf_skb_load_bytes(skb, 0, &ip6h, sizeof(ip6h)))
+		return 1;
+
+	if (ip6h.nexthdr != IPPROTO_TCP)
+		return 1;
+
+	if (bpf_skb_load_bytes(skb, sizeof(ip6h), &tcph, sizeof(tcph)))
+		return 1;
+
+	if (!g_serv_port)
+		return 0;
+
+	if (tcph.dest != g_serv_port)
+		return 1;
+
+	return is_allowed_peer_cg(skb, &ip6h, &tcph);
+}
-- 
cgit v1.2.3-59-g8ed1b


From f95f0f95cfb7f180ed7571d4915432d5098df7ec Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Thu, 14 May 2020 12:49:02 +0200
Subject: xdp: Add frame size to xdp_buff
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

XDP have evolved to support several frame sizes, but xdp_buff was not
updated with this information. The frame size (frame_sz) member of
xdp_buff is introduced to know the real size of the memory the frame is
delivered in.

When introducing this also make it clear that some tailroom is
reserved/required when creating SKBs using build_skb().

It would also have been an option to introduce a pointer to
data_hard_end (with reserved offset). The advantage with frame_sz is
that (like rxq) drivers only need to setup/assign this value once per
NAPI cycle. Due to XDP-generic (and some drivers) it's not possible to
store frame_sz inside xdp_rxq_info, because it's varies per packet as it
can be based/depend on packet length.

V2: nitpick: deduct -> deduce

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/bpf/158945334261.97035.555255657490688547.stgit@firesoul
---
 include/net/xdp.h | 13 +++++++++++++
 1 file changed, 13 insertions(+)

diff --git a/include/net/xdp.h b/include/net/xdp.h
index 3cc6d5d84aa4..a764af4ae0ea 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -6,6 +6,8 @@
 #ifndef __LINUX_NET_XDP_H__
 #define __LINUX_NET_XDP_H__
 
+#include <linux/skbuff.h> /* skb_shared_info */
+
 /**
  * DOC: XDP RX-queue information
  *
@@ -70,8 +72,19 @@ struct xdp_buff {
 	void *data_hard_start;
 	unsigned long handle;
 	struct xdp_rxq_info *rxq;
+	u32 frame_sz; /* frame size to deduce data_hard_end/reserved tailroom*/
 };
 
+/* Reserve memory area at end-of data area.
+ *
+ * This macro reserves tailroom in the XDP buffer by limiting the
+ * XDP/BPF data access to data_hard_end.  Notice same area (and size)
+ * is used for XDP_PASS, when constructing the SKB via build_skb().
+ */
+#define xdp_data_hard_end(xdp)				\
+	((xdp)->data_hard_start + (xdp)->frame_sz -	\
+	 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
+
 struct xdp_frame {
 	void *data;
 	u16 len;
-- 
cgit v1.2.3-59-g8ed1b


From 63fe91ab3d1c5c0b4497b993b8eeaa54f6688d53 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Thu, 14 May 2020 12:49:07 +0200
Subject: bnxt: Add XDP frame size to driver

This driver uses full PAGE_SIZE pages when XDP is enabled.

In case of XDP uses driver uses __bnxt_alloc_rx_page which does full
page DMA-map. Thus, xdp_adjust_tail grow is DMA compliant for XDP_TX
action that does DMA-sync.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Andy Gospodarek <gospo@broadcom.com>
Cc: Michael Chan <michael.chan@broadcom.com>
Cc: Andy Gospodarek <andrew.gospodarek@broadcom.com>
Link: https://lore.kernel.org/bpf/158945334769.97035.13437970179897613984.stgit@firesoul
---
 drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
index c6f6f2033880..5e3b4a3b69ea 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_xdp.c
@@ -138,6 +138,7 @@ bool bnxt_rx_xdp(struct bnxt *bp, struct bnxt_rx_ring_info *rxr, u16 cons,
 	xdp_set_data_meta_invalid(&xdp);
 	xdp.data_end = *data_ptr + *len;
 	xdp.rxq = &rxr->xdp_rxq;
+	xdp.frame_sz = PAGE_SIZE; /* BNXT_RX_PAGE_MODE(bp) when XDP enabled */
 	orig_data = xdp.data;
 
 	rcu_read_lock();
-- 
cgit v1.2.3-59-g8ed1b


From 983e43451830742fa93f83656ccbdcb865ea4259 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Thu, 14 May 2020 12:49:12 +0200
Subject: sfc: Add XDP frame size

This driver uses RX page-split when possible. It was recently fixed
in commit 86e85bf6981c ("sfc: fix XDP-redirect in this driver") to
add needed tailroom for XDP-redirect.

After the fix efx->rx_page_buf_step is the frame size, with enough
head and tail-room for XDP-redirect.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/158945335278.97035.14611425333184621652.stgit@firesoul
---
 drivers/net/ethernet/sfc/rx.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c
index 260352d97d9d..68c47a8c71df 100644
--- a/drivers/net/ethernet/sfc/rx.c
+++ b/drivers/net/ethernet/sfc/rx.c
@@ -308,6 +308,7 @@ static bool efx_do_xdp(struct efx_nic *efx, struct efx_channel *channel,
 	xdp_set_data_meta_invalid(&xdp);
 	xdp.data_end = xdp.data + rx_buf->len;
 	xdp.rxq = &rx_queue->xdp_rxq_info;
+	xdp.frame_sz = efx->rx_page_buf_step;
 
 	xdp_act = bpf_prog_run_xdp(xdp_prog, &xdp);
 	rcu_read_unlock();
-- 
cgit v1.2.3-59-g8ed1b


From 494f44d54e25dd79af0ed6734c2d6be0aa0b6d94 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Thu, 14 May 2020 12:49:17 +0200
Subject: mvneta: Add XDP frame size to driver

This marvell driver mvneta uses PAGE_SIZE frames, which makes it
really easy to convert.  Driver updates rxq and now frame_sz
once per NAPI call.

This driver takes advantage of page_pool PP_FLAG_DMA_SYNC_DEV that
can help reduce the number of cache-lines that need to be flushed
when doing DMA sync for_device. Due to xdp_adjust_tail can grow the
area accessible to the by the CPU (can possibly write into), then max
sync length *after* bpf_prog_run_xdp() needs to be taken into account.

For XDP_TX action the driver is smart and does DMA-sync. When growing
tail this is still safe, because page_pool have DMA-mapped the entire
page size.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Lorenzo Bianconi <lorenzo@kernel.org>
Cc: thomas.petazzoni@bootlin.com
Link: https://lore.kernel.org/bpf/158945335786.97035.12714388304493736747.stgit@firesoul
---
 drivers/net/ethernet/marvell/mvneta.c | 25 +++++++++++++++----------
 1 file changed, 15 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index e0e9e56830c0..41d2a0eac5fa 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -2148,12 +2148,17 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
 	       struct bpf_prog *prog, struct xdp_buff *xdp,
 	       struct mvneta_stats *stats)
 {
-	unsigned int len;
+	unsigned int len, sync;
+	struct page *page;
 	u32 ret, act;
 
 	len = xdp->data_end - xdp->data_hard_start - pp->rx_offset_correction;
 	act = bpf_prog_run_xdp(prog, xdp);
 
+	/* Due xdp_adjust_tail: DMA sync for_device cover max len CPU touch */
+	sync = xdp->data_end - xdp->data_hard_start - pp->rx_offset_correction;
+	sync = max(sync, len);
+
 	switch (act) {
 	case XDP_PASS:
 		stats->xdp_pass++;
@@ -2164,9 +2169,8 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
 		err = xdp_do_redirect(pp->dev, xdp, prog);
 		if (unlikely(err)) {
 			ret = MVNETA_XDP_DROPPED;
-			page_pool_put_page(rxq->page_pool,
-					   virt_to_head_page(xdp->data), len,
-					   true);
+			page = virt_to_head_page(xdp->data);
+			page_pool_put_page(rxq->page_pool, page, sync, true);
 		} else {
 			ret = MVNETA_XDP_REDIR;
 			stats->xdp_redirect++;
@@ -2175,10 +2179,10 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
 	}
 	case XDP_TX:
 		ret = mvneta_xdp_xmit_back(pp, xdp);
-		if (ret != MVNETA_XDP_TX)
-			page_pool_put_page(rxq->page_pool,
-					   virt_to_head_page(xdp->data), len,
-					   true);
+		if (ret != MVNETA_XDP_TX) {
+			page = virt_to_head_page(xdp->data);
+			page_pool_put_page(rxq->page_pool, page, sync, true);
+		}
 		break;
 	default:
 		bpf_warn_invalid_xdp_action(act);
@@ -2187,8 +2191,8 @@ mvneta_run_xdp(struct mvneta_port *pp, struct mvneta_rx_queue *rxq,
 		trace_xdp_exception(pp->dev, prog, act);
 		/* fall through */
 	case XDP_DROP:
-		page_pool_put_page(rxq->page_pool,
-				   virt_to_head_page(xdp->data), len, true);
+		page = virt_to_head_page(xdp->data);
+		page_pool_put_page(rxq->page_pool, page, sync, true);
 		ret = MVNETA_XDP_DROPPED;
 		stats->xdp_drop++;
 		break;
@@ -2320,6 +2324,7 @@ static int mvneta_rx_swbm(struct napi_struct *napi,
 	rcu_read_lock();
 	xdp_prog = READ_ONCE(pp->xdp_prog);
 	xdp_buf.rxq = &rxq->xdp_rxq;
+	xdp_buf.frame_sz = PAGE_SIZE;
 
 	/* Fairness NAPI loop */
 	while (rx_proc < budget && rx_proc < rx_todo) {
-- 
cgit v1.2.3-59-g8ed1b


From 495de55f70199bd7ea09079c484283e58bf75c82 Mon Sep 17 00:00:00 2001
From: Ilias Apalodimas <ilias.apalodimas@linaro.org>
Date: Thu, 14 May 2020 12:49:23 +0200
Subject: net: netsec: Add support for XDP frame size

This driver takes advantage of page_pool PP_FLAG_DMA_SYNC_DEV that
can help reduce the number of cache-lines that need to be flushed
when doing DMA sync for_device. Due to xdp_adjust_tail can grow the
area accessible to the by the CPU (can possibly write into), then max
sync length *after* bpf_prog_run_xdp() needs to be taken into account.

For XDP_TX action the driver is smart and does DMA-sync. When growing
tail this is still safe, because page_pool have DMA-mapped the entire
page size.

Signed-off-by: Ilias Apalodimas <ilias.apalodimas@linaro.org>
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Lorenzo Bianconi <lorenzo@kernel.org>
Link: https://lore.kernel.org/bpf/158945336295.97035.15034759661036971024.stgit@firesoul
---
 drivers/net/ethernet/socionext/netsec.c | 30 ++++++++++++++++++------------
 1 file changed, 18 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c
index a5a0fb60193a..e1f4be4b3d69 100644
--- a/drivers/net/ethernet/socionext/netsec.c
+++ b/drivers/net/ethernet/socionext/netsec.c
@@ -884,23 +884,28 @@ static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog,
 			  struct xdp_buff *xdp)
 {
 	struct netsec_desc_ring *dring = &priv->desc_ring[NETSEC_RING_RX];
-	unsigned int len = xdp->data_end - xdp->data;
+	unsigned int sync, len = xdp->data_end - xdp->data;
 	u32 ret = NETSEC_XDP_PASS;
+	struct page *page;
 	int err;
 	u32 act;
 
 	act = bpf_prog_run_xdp(prog, xdp);
 
+	/* Due xdp_adjust_tail: DMA sync for_device cover max len CPU touch */
+	sync = xdp->data_end - xdp->data_hard_start - NETSEC_RXBUF_HEADROOM;
+	sync = max(sync, len);
+
 	switch (act) {
 	case XDP_PASS:
 		ret = NETSEC_XDP_PASS;
 		break;
 	case XDP_TX:
 		ret = netsec_xdp_xmit_back(priv, xdp);
-		if (ret != NETSEC_XDP_TX)
-			page_pool_put_page(dring->page_pool,
-					   virt_to_head_page(xdp->data), len,
-					   true);
+		if (ret != NETSEC_XDP_TX) {
+			page = virt_to_head_page(xdp->data);
+			page_pool_put_page(dring->page_pool, page, sync, true);
+		}
 		break;
 	case XDP_REDIRECT:
 		err = xdp_do_redirect(priv->ndev, xdp, prog);
@@ -908,9 +913,8 @@ static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog,
 			ret = NETSEC_XDP_REDIR;
 		} else {
 			ret = NETSEC_XDP_CONSUMED;
-			page_pool_put_page(dring->page_pool,
-					   virt_to_head_page(xdp->data), len,
-					   true);
+			page = virt_to_head_page(xdp->data);
+			page_pool_put_page(dring->page_pool, page, sync, true);
 		}
 		break;
 	default:
@@ -921,8 +925,8 @@ static u32 netsec_run_xdp(struct netsec_priv *priv, struct bpf_prog *prog,
 		/* fall through -- handle aborts by dropping packet */
 	case XDP_DROP:
 		ret = NETSEC_XDP_CONSUMED;
-		page_pool_put_page(dring->page_pool,
-				   virt_to_head_page(xdp->data), len, true);
+		page = virt_to_head_page(xdp->data);
+		page_pool_put_page(dring->page_pool, page, sync, true);
 		break;
 	}
 
@@ -936,10 +940,14 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget)
 	struct netsec_rx_pkt_info rx_info;
 	enum dma_data_direction dma_dir;
 	struct bpf_prog *xdp_prog;
+	struct xdp_buff xdp;
 	u16 xdp_xmit = 0;
 	u32 xdp_act = 0;
 	int done = 0;
 
+	xdp.rxq = &dring->xdp_rxq;
+	xdp.frame_sz = PAGE_SIZE;
+
 	rcu_read_lock();
 	xdp_prog = READ_ONCE(priv->xdp_prog);
 	dma_dir = page_pool_get_dma_dir(dring->page_pool);
@@ -953,7 +961,6 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget)
 		struct sk_buff *skb = NULL;
 		u16 pkt_len, desc_len;
 		dma_addr_t dma_handle;
-		struct xdp_buff xdp;
 		void *buf_addr;
 
 		if (de->attr & (1U << NETSEC_RX_PKT_OWN_FIELD)) {
@@ -1002,7 +1009,6 @@ static int netsec_process_rx(struct netsec_priv *priv, int budget)
 		xdp.data = desc->addr + NETSEC_RXBUF_HEADROOM;
 		xdp_set_data_meta_invalid(&xdp);
 		xdp.data_end = xdp.data + pkt_len;
-		xdp.rxq = &dring->xdp_rxq;
 
 		if (xdp_prog) {
 			xdp_result = netsec_run_xdp(priv, xdp_prog, &xdp);
-- 
cgit v1.2.3-59-g8ed1b


From a075767bbdc659066b89be282c8377fa880e9dc4 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Thu, 14 May 2020 12:49:28 +0200
Subject: net: XDP-generic determining XDP frame size
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The SKB "head" pointer points to the data area that contains
skb_shared_info, that can be found via skb_end_pointer(). Given
xdp->data_hard_start have been established (basically pointing to
skb->head), frame size is between skb_end_pointer() and data_hard_start,
plus the size reserved to skb_shared_info.

Change the bpf_xdp_adjust_tail offset adjust of skb->len, to be a positive
offset number on grow, and negative number on shrink.  As this seems more
natural when reading the code.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/bpf/158945336804.97035.7164852191163722056.stgit@firesoul
---
 net/core/dev.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/net/core/dev.c b/net/core/dev.c
index 4c91de39890a..f937a3ff668d 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -4617,6 +4617,11 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
 	xdp->data_meta = xdp->data;
 	xdp->data_end = xdp->data + hlen;
 	xdp->data_hard_start = skb->data - skb_headroom(skb);
+
+	/* SKB "head" area always have tailroom for skb_shared_info */
+	xdp->frame_sz  = (void *)skb_end_pointer(skb) - xdp->data_hard_start;
+	xdp->frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+
 	orig_data_end = xdp->data_end;
 	orig_data = xdp->data;
 	eth = (struct ethhdr *)xdp->data;
@@ -4640,14 +4645,11 @@ static u32 netif_receive_generic_xdp(struct sk_buff *skb,
 		skb_reset_network_header(skb);
 	}
 
-	/* check if bpf_xdp_adjust_tail was used. it can only "shrink"
-	 * pckt.
-	 */
-	off = orig_data_end - xdp->data_end;
+	/* check if bpf_xdp_adjust_tail was used */
+	off = xdp->data_end - orig_data_end;
 	if (off != 0) {
 		skb_set_tail_pointer(skb, xdp->data_end - xdp->data);
-		skb->len -= off;
-
+		skb->len += off; /* positive on grow, negative on shrink */
 	}
 
 	/* check if XDP changed eth hdr such SKB needs update */
-- 
cgit v1.2.3-59-g8ed1b


From 34cc0b338a61de3eee3a2bfcaf4f9d6e9fae091a Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Thu, 14 May 2020 12:49:33 +0200
Subject: xdp: Xdp_frame add member frame_sz and handle in convert_to_xdp_frame
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use hole in struct xdp_frame, when adding member frame_sz, which keeps
same sizeof struct (32 bytes)

Drivers ixgbe and sfc had bug cases where the necessary/expected
tailroom was not reserved. This can lead to some hard to catch memory
corruption issues. Having the drivers frame_sz this can be detected when
packet length/end via xdp->data_end exceed the xdp_data_hard_end
pointer, which accounts for the reserved the tailroom.

When detecting this driver issue, simply fail the conversion with NULL,
which results in feedback to driver (failing xdp_do_redirect()) causing
driver to drop packet. Given the lack of consistent XDP stats, this can
be hard to troubleshoot. And given this is a driver bug, we want to
generate some more noise in form of a WARN stack dump (to ID the driver
code that inlined convert_to_xdp_frame).

Inlining the WARN macro is problematic, because it adds an asm
instruction (on Intel CPUs ud2) what influence instruction cache
prefetching. Thus, introduce xdp_warn and macro XDP_WARN, to avoid this
and at the same time make identifying the function and line of this
inlined function easier.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/bpf/158945337313.97035.10015729316710496600.stgit@firesoul
---
 include/net/xdp.h | 14 +++++++++++++-
 net/core/xdp.c    |  8 ++++++++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/include/net/xdp.h b/include/net/xdp.h
index a764af4ae0ea..3094fccf5a88 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -89,7 +89,8 @@ struct xdp_frame {
 	void *data;
 	u16 len;
 	u16 headroom;
-	u16 metasize;
+	u32 metasize:8;
+	u32 frame_sz:24;
 	/* Lifetime of xdp_rxq_info is limited to NAPI/enqueue time,
 	 * while mem info is valid on remote CPU.
 	 */
@@ -104,6 +105,10 @@ static inline void xdp_scrub_frame(struct xdp_frame *frame)
 	frame->dev_rx = NULL;
 }
 
+/* Avoids inlining WARN macro in fast-path */
+void xdp_warn(const char *msg, const char *func, const int line);
+#define XDP_WARN(msg) xdp_warn(msg, __func__, __LINE__)
+
 struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp);
 
 /* Convert xdp_buff to xdp_frame */
@@ -124,6 +129,12 @@ struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
 	if (unlikely((headroom - metasize) < sizeof(*xdp_frame)))
 		return NULL;
 
+	/* Catch if driver didn't reserve tailroom for skb_shared_info */
+	if (unlikely(xdp->data_end > xdp_data_hard_end(xdp))) {
+		XDP_WARN("Driver BUG: missing reserved tailroom");
+		return NULL;
+	}
+
 	/* Store info in top of packet */
 	xdp_frame = xdp->data_hard_start;
 
@@ -131,6 +142,7 @@ struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
 	xdp_frame->len  = xdp->data_end - xdp->data;
 	xdp_frame->headroom = headroom - sizeof(*xdp_frame);
 	xdp_frame->metasize = metasize;
+	xdp_frame->frame_sz = xdp->frame_sz;
 
 	/* rxq only valid until napi_schedule ends, convert to xdp_mem_info */
 	xdp_frame->mem = xdp->rxq->mem;
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 4c7ea85486af..490b8f5fa8ee 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -11,6 +11,7 @@
 #include <linux/slab.h>
 #include <linux/idr.h>
 #include <linux/rhashtable.h>
+#include <linux/bug.h>
 #include <net/page_pool.h>
 
 #include <net/xdp.h>
@@ -496,3 +497,10 @@ struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp)
 	return xdpf;
 }
 EXPORT_SYMBOL_GPL(xdp_convert_zc_to_xdp_frame);
+
+/* Used by XDP_WARN macro, to avoid inlining WARN() in fast-path */
+void xdp_warn(const char *msg, const char *func, const int line)
+{
+	WARN(1, "XDP_WARN: %s(line:%d): %s\n", func, line, msg);
+};
+EXPORT_SYMBOL_GPL(xdp_warn);
-- 
cgit v1.2.3-59-g8ed1b


From db612f749e2454c506f20155bba2871f0307d133 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Thu, 14 May 2020 12:49:38 +0200
Subject: xdp: Cpumap redirect use frame_sz and increase skb_tailroom
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Knowing the memory size backing the packet/xdp_frame data area, and
knowing it already have reserved room for skb_shared_info, simplifies
using build_skb significantly.

With this change we no-longer lie about the SKB truesize, but more
importantly a significant larger skb_tailroom is now provided, e.g. when
drivers uses a full PAGE_SIZE. This extra tailroom (in linear area) can be
used by the network stack when coalescing SKBs (e.g. in skb_try_coalesce,
see TCP cases where tcp_queue_rcv() can 'eat' skb).

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/bpf/158945337822.97035.13557959180460986059.stgit@firesoul
---
 kernel/bpf/cpumap.c | 21 +++------------------
 1 file changed, 3 insertions(+), 18 deletions(-)

diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 3fe0b006d2d2..a71790dab12d 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -162,25 +162,10 @@ static struct sk_buff *cpu_map_build_skb(struct bpf_cpu_map_entry *rcpu,
 	/* Part of headroom was reserved to xdpf */
 	hard_start_headroom = sizeof(struct xdp_frame) +  xdpf->headroom;
 
-	/* build_skb need to place skb_shared_info after SKB end, and
-	 * also want to know the memory "truesize".  Thus, need to
-	 * know the memory frame size backing xdp_buff.
-	 *
-	 * XDP was designed to have PAGE_SIZE frames, but this
-	 * assumption is not longer true with ixgbe and i40e.  It
-	 * would be preferred to set frame_size to 2048 or 4096
-	 * depending on the driver.
-	 *   frame_size = 2048;
-	 *   frame_len  = frame_size - sizeof(*xdp_frame);
-	 *
-	 * Instead, with info avail, skb_shared_info in placed after
-	 * packet len.  This, unfortunately fakes the truesize.
-	 * Another disadvantage of this approach, the skb_shared_info
-	 * is not at a fixed memory location, with mixed length
-	 * packets, which is bad for cache-line hotness.
+	/* Memory size backing xdp_frame data already have reserved
+	 * room for build_skb to place skb_shared_info in tailroom.
 	 */
-	frame_size = SKB_DATA_ALIGN(xdpf->len + hard_start_headroom) +
-		SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+	frame_size = xdpf->frame_sz;
 
 	pkt_data_start = xdpf->data - hard_start_headroom;
 	skb = build_skb_around(skb, pkt_data_start, frame_size);
-- 
cgit v1.2.3-59-g8ed1b


From 5c8572251fabc5bb49fd623c064e95a9daf6a3e3 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Thu, 14 May 2020 12:49:43 +0200
Subject: veth: Adjust hard_start offset on redirect XDP frames
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

When native XDP redirect into a veth device, the frame arrives in the
xdp_frame structure. It is then processed in veth_xdp_rcv_one(),
which can run a new XDP bpf_prog on the packet. Doing so requires
converting xdp_frame to xdp_buff, but the tricky part is that
xdp_frame memory area is located in the top (data_hard_start) memory
area that xdp_buff will point into.

The current code tried to protect the xdp_frame area, by assigning
xdp_buff.data_hard_start past this memory. This results in 32 bytes
less headroom to expand into via BPF-helper bpf_xdp_adjust_head().

This protect step is actually not needed, because BPF-helper
bpf_xdp_adjust_head() already reserve this area, and don't allow
BPF-prog to expand into it. Thus, it is safe to point data_hard_start
directly at xdp_frame memory area.

Fixes: 9fc8d518d9d5 ("veth: Handle xdp_frames in xdp napi ring")
Reported-by: Mao Wenan <maowenan@huawei.com>
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Toshiaki Makita <toshiaki.makita1@gmail.com>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/bpf/158945338331.97035.5923525383710752178.stgit@firesoul
---
 drivers/net/veth.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index aece0e5eec8c..d5691bb84448 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -564,13 +564,15 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq,
 					struct veth_stats *stats)
 {
 	void *hard_start = frame->data - frame->headroom;
-	void *head = hard_start - sizeof(struct xdp_frame);
 	int len = frame->len, delta = 0;
 	struct xdp_frame orig_frame;
 	struct bpf_prog *xdp_prog;
 	unsigned int headroom;
 	struct sk_buff *skb;
 
+	/* bpf_xdp_adjust_head() assures BPF cannot access xdp_frame area */
+	hard_start -= sizeof(struct xdp_frame);
+
 	rcu_read_lock();
 	xdp_prog = rcu_dereference(rq->xdp_prog);
 	if (likely(xdp_prog)) {
@@ -592,7 +594,6 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq,
 			break;
 		case XDP_TX:
 			orig_frame = *frame;
-			xdp.data_hard_start = head;
 			xdp.rxq->mem = frame->mem;
 			if (unlikely(veth_xdp_tx(rq, &xdp, bq) < 0)) {
 				trace_xdp_exception(rq->dev, xdp_prog, act);
@@ -605,7 +606,6 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq,
 			goto xdp_xmit;
 		case XDP_REDIRECT:
 			orig_frame = *frame;
-			xdp.data_hard_start = head;
 			xdp.rxq->mem = frame->mem;
 			if (xdp_do_redirect(rq->dev, &xdp, xdp_prog)) {
 				frame = &orig_frame;
@@ -629,7 +629,7 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq,
 	rcu_read_unlock();
 
 	headroom = sizeof(struct xdp_frame) + frame->headroom - delta;
-	skb = veth_build_skb(head, headroom, len, 0);
+	skb = veth_build_skb(hard_start, headroom, len, 0);
 	if (!skb) {
 		xdp_return_frame(frame);
 		stats->rx_drops++;
-- 
cgit v1.2.3-59-g8ed1b


From 45a9e6d8a687e6a0ea6c2f78f15955ae96be4720 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Thu, 14 May 2020 12:49:48 +0200
Subject: veth: Xdp using frame_sz in veth driver
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The veth driver can run XDP in "native" mode in it's own NAPI
handler, and since commit 9fc8d518d9d5 ("veth: Handle xdp_frames in
xdp napi ring") packets can come in two forms either xdp_frame or
skb, calling respectively veth_xdp_rcv_one() or veth_xdp_rcv_skb().

For packets to arrive in xdp_frame format, they will have been
redirected from an XDP native driver. In case of XDP_PASS or no
XDP-prog attached, the veth driver will allocate and create an SKB.

The current code in veth_xdp_rcv_one() xdp_frame case, had to guess
the frame truesize of the incoming xdp_frame, when using
veth_build_skb(). With xdp_frame->frame_sz this is not longer
necessary.

Calculating the frame_sz in veth_xdp_rcv_skb() skb case, is done
similar to the XDP-generic handling code in net/core/dev.c.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Lorenzo Bianconi <lorenzo@kernel.org>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Acked-by: Toshiaki Makita <toshiaki.makita1@gmail.com>
Link: https://lore.kernel.org/bpf/158945338840.97035.935897116345700902.stgit@firesoul
---
 drivers/net/veth.c | 22 +++++++++++++---------
 1 file changed, 13 insertions(+), 9 deletions(-)

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index d5691bb84448..b586d2fa5551 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -405,10 +405,6 @@ static struct sk_buff *veth_build_skb(void *head, int headroom, int len,
 {
 	struct sk_buff *skb;
 
-	if (!buflen) {
-		buflen = SKB_DATA_ALIGN(headroom + len) +
-			 SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
-	}
 	skb = build_skb(head, buflen);
 	if (!skb)
 		return NULL;
@@ -583,6 +579,7 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq,
 		xdp.data = frame->data;
 		xdp.data_end = frame->data + frame->len;
 		xdp.data_meta = frame->data - frame->metasize;
+		xdp.frame_sz = frame->frame_sz;
 		xdp.rxq = &rq->xdp_rxq;
 
 		act = bpf_prog_run_xdp(xdp_prog, &xdp);
@@ -629,7 +626,7 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq,
 	rcu_read_unlock();
 
 	headroom = sizeof(struct xdp_frame) + frame->headroom - delta;
-	skb = veth_build_skb(hard_start, headroom, len, 0);
+	skb = veth_build_skb(hard_start, headroom, len, frame->frame_sz);
 	if (!skb) {
 		xdp_return_frame(frame);
 		stats->rx_drops++;
@@ -695,9 +692,8 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
 			goto drop;
 		}
 
-		nskb = veth_build_skb(head,
-				      VETH_XDP_HEADROOM + mac_len, skb->len,
-				      PAGE_SIZE);
+		nskb = veth_build_skb(head, VETH_XDP_HEADROOM + mac_len,
+				      skb->len, PAGE_SIZE);
 		if (!nskb) {
 			page_frag_free(head);
 			goto drop;
@@ -715,6 +711,11 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
 	xdp.data_end = xdp.data + pktlen;
 	xdp.data_meta = xdp.data;
 	xdp.rxq = &rq->xdp_rxq;
+
+	/* SKB "head" area always have tailroom for skb_shared_info */
+	xdp.frame_sz = (void *)skb_end_pointer(skb) - xdp.data_hard_start;
+	xdp.frame_sz += SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+
 	orig_data = xdp.data;
 	orig_data_end = xdp.data_end;
 
@@ -758,6 +759,7 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
 	}
 	rcu_read_unlock();
 
+	/* check if bpf_xdp_adjust_head was used */
 	delta = orig_data - xdp.data;
 	off = mac_len + delta;
 	if (off > 0)
@@ -765,9 +767,11 @@ static struct sk_buff *veth_xdp_rcv_skb(struct veth_rq *rq,
 	else if (off < 0)
 		__skb_pull(skb, -off);
 	skb->mac_header -= delta;
+
+	/* check if bpf_xdp_adjust_tail was used */
 	off = xdp.data_end - orig_data_end;
 	if (off != 0)
-		__skb_put(skb, off);
+		__skb_put(skb, off); /* positive on grow, negative on shrink */
 	skb->protocol = eth_type_trans(skb, rq->dev);
 
 	metalen = xdp.data - xdp.data_meta;
-- 
cgit v1.2.3-59-g8ed1b


From 4a9b052a590d6217237502efde7d598156966080 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Thu, 14 May 2020 12:49:53 +0200
Subject: dpaa2-eth: Add XDP frame size

The dpaa2-eth driver reserve some headroom used for hardware and
software annotation area in RX/TX buffers. Thus, xdp.data_hard_start
doesn't start at page boundary.

When XDP is configured the area reserved via dpaa2_fd_get_offset(fd) is
448 bytes of which XDP have reserved 256 bytes. As frame_sz is
calculated as an offset from xdp_buff.data_hard_start, an adjust from
the full PAGE_SIZE == DPAA2_ETH_RX_BUF_RAW_SIZE.

When doing XDP_REDIRECT, the driver doesn't need this reserved headroom
any-longer and allows xdp_do_redirect() to use it. This is an advantage
for the drivers own ndo-xdp_xmit, as it uses part of this headroom for
itself.  Patch also adjust frame_sz in this case.

The driver cannot support XDP data_meta, because it uses the headroom
just before xdp.data for struct dpaa2_eth_swa (DPAA2_ETH_SWA_SIZE=64),
when transmitting the packet. When transmitting a xdp_frame in
dpaa2_eth_xdp_xmit_frame (call via ndo_xdp_xmit) is uses this area to
store a pointer to xdp_frame and dma_size, which is used in TX
completion (free_tx_fd) to return frame via xdp_return_frame().

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Cc: Ioana Radulescu <ruxandra.radulescu@nxp.com>
Link: https://lore.kernel.org/bpf/158945339348.97035.8562488847066908856.stgit@firesoul
---
 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index 0f3e842a4fd6..8c8d95aa1dfd 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -331,6 +331,9 @@ static u32 run_xdp(struct dpaa2_eth_priv *priv,
 	xdp_set_data_meta_invalid(&xdp);
 	xdp.rxq = &ch->xdp_rxq;
 
+	xdp.frame_sz = DPAA2_ETH_RX_BUF_RAW_SIZE -
+		(dpaa2_fd_get_offset(fd) - XDP_PACKET_HEADROOM);
+
 	xdp_act = bpf_prog_run_xdp(xdp_prog, &xdp);
 
 	/* xdp.data pointer may have changed */
@@ -366,7 +369,11 @@ static u32 run_xdp(struct dpaa2_eth_priv *priv,
 		dma_unmap_page(priv->net_dev->dev.parent, addr,
 			       DPAA2_ETH_RX_BUF_SIZE, DMA_BIDIRECTIONAL);
 		ch->buf_count--;
+
+		/* Allow redirect use of full headroom */
 		xdp.data_hard_start = vaddr;
+		xdp.frame_sz = DPAA2_ETH_RX_BUF_RAW_SIZE;
+
 		err = xdp_do_redirect(priv->net_dev, &xdp, xdp_prog);
 		if (unlikely(err))
 			ch->stats.xdp_drop++;
-- 
cgit v1.2.3-59-g8ed1b


From 7358877ac11041a22ce1cb35c352809051eac48f Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Thu, 14 May 2020 12:49:58 +0200
Subject: hv_netvsc: Add XDP frame size to driver

The hyperv NIC driver does memory allocation and copy even without XDP.
In XDP mode it will allocate a new page for each packet and copy over
the payload, before invoking the XDP BPF-prog.

The positive thing it that its easy to determine the xdp.frame_sz.

The XDP implementation for hv_netvsc transparently passes xdp_prog
to the associated VF NIC. Many of the Azure VMs are using SRIOV, so
majority of the data are actually processed directly on the VF driver's XDP
path. So the overhead of the synthetic data path (hv_netvsc) is minimal.

Then XDP is enabled on this driver, XDP_PASS and XDP_TX will create the
SKB via build_skb (based on the newly allocated page). Now using XDP
frame_sz this will provide more skb_tailroom, which netstack can use for
SKB coalescing (e.g tcp_try_coalesce -> skb_try_coalesce).

V3: Adjust patch desc to be more positive.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Cc: Wei Liu <wei.liu@kernel.org>
Cc: "K. Y. Srinivasan" <kys@microsoft.com>
Cc: Haiyang Zhang <haiyangz@microsoft.com>
Cc: Stephen Hemminger <sthemmin@microsoft.com>
Link: https://lore.kernel.org/bpf/158945339857.97035.10212138582505736163.stgit@firesoul
---
 drivers/net/hyperv/netvsc_bpf.c | 1 +
 drivers/net/hyperv/netvsc_drv.c | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/hyperv/netvsc_bpf.c b/drivers/net/hyperv/netvsc_bpf.c
index b86611041db6..1e0c024b0a93 100644
--- a/drivers/net/hyperv/netvsc_bpf.c
+++ b/drivers/net/hyperv/netvsc_bpf.c
@@ -49,6 +49,7 @@ u32 netvsc_run_xdp(struct net_device *ndev, struct netvsc_channel *nvchan,
 	xdp_set_data_meta_invalid(xdp);
 	xdp->data_end = xdp->data + len;
 	xdp->rxq = &nvchan->xdp_rxq;
+	xdp->frame_sz = PAGE_SIZE;
 	xdp->handle = 0;
 
 	memcpy(xdp->data, data, len);
diff --git a/drivers/net/hyperv/netvsc_drv.c b/drivers/net/hyperv/netvsc_drv.c
index 5de57fc3ec60..6267f706e8ee 100644
--- a/drivers/net/hyperv/netvsc_drv.c
+++ b/drivers/net/hyperv/netvsc_drv.c
@@ -795,7 +795,7 @@ static struct sk_buff *netvsc_alloc_recv_skb(struct net_device *net,
 	if (xbuf) {
 		unsigned int hdroom = xdp->data - xdp->data_hard_start;
 		unsigned int xlen = xdp->data_end - xdp->data;
-		unsigned int frag_size = netvsc_xdp_fraglen(hdroom + xlen);
+		unsigned int frag_size = xdp->frame_sz;
 
 		skb = build_skb(xbuf, frag_size);
 
-- 
cgit v1.2.3-59-g8ed1b


From bc1c5745d77963a4f4684c78cc2b3323900af68b Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Thu, 14 May 2020 12:50:03 +0200
Subject: qlogic/qede: Add XDP frame size to driver

The driver qede uses a full page, when XDP is enabled. The drivers value
in rx_buf_seg_size (struct qede_rx_queue) will be PAGE_SIZE when an
XDP bpf_prog is attached.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Cc: Ariel Elior <aelior@marvell.com>
Cc: GR-everest-linux-l2@marvell.com
Link: https://lore.kernel.org/bpf/158945340366.97035.7764939691580349618.stgit@firesoul
---
 drivers/net/ethernet/qlogic/qede/qede_fp.c   | 1 +
 drivers/net/ethernet/qlogic/qede/qede_main.c | 2 +-
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/qlogic/qede/qede_fp.c b/drivers/net/ethernet/qlogic/qede/qede_fp.c
index c6c20776b474..7598ebe0962a 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_fp.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_fp.c
@@ -1066,6 +1066,7 @@ static bool qede_rx_xdp(struct qede_dev *edev,
 	xdp_set_data_meta_invalid(&xdp);
 	xdp.data_end = xdp.data + *len;
 	xdp.rxq = &rxq->xdp_rxq;
+	xdp.frame_sz = rxq->rx_buf_seg_size; /* PAGE_SIZE when XDP enabled */
 
 	/* Queues always have a full reset currently, so for the time
 	 * being until there's atomic program replace just mark read
diff --git a/drivers/net/ethernet/qlogic/qede/qede_main.c b/drivers/net/ethernet/qlogic/qede/qede_main.c
index f50d9a9b76be..b2d154258b07 100644
--- a/drivers/net/ethernet/qlogic/qede/qede_main.c
+++ b/drivers/net/ethernet/qlogic/qede/qede_main.c
@@ -1476,7 +1476,7 @@ static int qede_alloc_mem_rxq(struct qede_dev *edev, struct qede_rx_queue *rxq)
 	if (rxq->rx_buf_size + size > PAGE_SIZE)
 		rxq->rx_buf_size = PAGE_SIZE - size;
 
-	/* Segment size to spilt a page in multiple equal parts ,
+	/* Segment size to split a page in multiple equal parts,
 	 * unless XDP is used in which case we'd use the entire page.
 	 */
 	if (!edev->xdp_prog) {
-- 
cgit v1.2.3-59-g8ed1b


From c88c35181d6ab83e439855681032653ef8728045 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Thu, 14 May 2020 12:50:08 +0200
Subject: net: ethernet: ti: Add XDP frame size to driver cpsw

The driver code cpsw.c and cpsw_new.c both use page_pool
with default order-0 pages or their RX-pages.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Grygorii Strashko <grygorii.strashko@ti.com>
Cc: Ilias Apalodimas <ilias.apalodimas@linaro.org>
Link: https://lore.kernel.org/bpf/158945340875.97035.752144756428532878.stgit@firesoul
---
 drivers/net/ethernet/ti/cpsw.c     | 1 +
 drivers/net/ethernet/ti/cpsw_new.c | 1 +
 2 files changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/ti/cpsw.c b/drivers/net/ethernet/ti/cpsw.c
index 09f98fa2fb4e..ce0645ada6e7 100644
--- a/drivers/net/ethernet/ti/cpsw.c
+++ b/drivers/net/ethernet/ti/cpsw.c
@@ -406,6 +406,7 @@ static void cpsw_rx_handler(void *token, int len, int status)
 
 		xdp.data_hard_start = pa;
 		xdp.rxq = &priv->xdp_rxq[ch];
+		xdp.frame_sz = PAGE_SIZE;
 
 		port = priv->emac_port + cpsw->data.dual_emac;
 		ret = cpsw_run_xdp(priv, ch, &xdp, page, port);
diff --git a/drivers/net/ethernet/ti/cpsw_new.c b/drivers/net/ethernet/ti/cpsw_new.c
index dce49311d3d3..1247d35d42ef 100644
--- a/drivers/net/ethernet/ti/cpsw_new.c
+++ b/drivers/net/ethernet/ti/cpsw_new.c
@@ -348,6 +348,7 @@ static void cpsw_rx_handler(void *token, int len, int status)
 
 		xdp.data_hard_start = pa;
 		xdp.rxq = &priv->xdp_rxq[ch];
+		xdp.frame_sz = PAGE_SIZE;
 
 		ret = cpsw_run_xdp(priv, ch, &xdp, page, priv->emac_port);
 		if (ret != CPSW_XDP_PASS)
-- 
cgit v1.2.3-59-g8ed1b


From 08fc1cfd2d250be853d33d6505ae11ff52b83b74 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Thu, 14 May 2020 12:50:13 +0200
Subject: ena: Add XDP frame size to amazon NIC driver

Frame size ENA_PAGE_SIZE is limited to 16K on systems with larger
PAGE_SIZE than 16K. Change ENA_XDP_MAX_MTU to also take into account
the reserved tailroom.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Sameeh Jubran <sameehj@amazon.com>
Cc: Arthur Kiyanovski <akiyano@amazon.com>
Link: https://lore.kernel.org/bpf/158945341384.97035.907403694833419456.stgit@firesoul
---
 drivers/net/ethernet/amazon/ena/ena_netdev.c | 1 +
 drivers/net/ethernet/amazon/ena/ena_netdev.h | 5 +++--
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 2818965427e9..85b87ed02dd5 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -1606,6 +1606,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
 		  "%s qid %d\n", __func__, rx_ring->qid);
 	res_budget = budget;
 	xdp.rxq = &rx_ring->xdp_rxq;
+	xdp.frame_sz = ENA_PAGE_SIZE;
 
 	do {
 		xdp_verdict = XDP_PASS;
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
index 7df67bf09b93..680099afcccf 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h
@@ -151,8 +151,9 @@
  * The buffer size we share with the device is defined to be ENA_PAGE_SIZE
  */
 
-#define ENA_XDP_MAX_MTU (ENA_PAGE_SIZE - ETH_HLEN - ETH_FCS_LEN - \
-				VLAN_HLEN - XDP_PACKET_HEADROOM)
+#define ENA_XDP_MAX_MTU (ENA_PAGE_SIZE - ETH_HLEN - ETH_FCS_LEN -	\
+			 VLAN_HLEN - XDP_PACKET_HEADROOM -		\
+			 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))
 
 #define ENA_IS_XDP_INDEX(adapter, index) (((index) >= (adapter)->xdp_first_ring) && \
 	((index) < (adapter)->xdp_first_ring + (adapter)->xdp_num_queues))
-- 
cgit v1.2.3-59-g8ed1b


From d201ea9ebc519fb34ad9ef1f49ab2ab31f5111ea Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Thu, 14 May 2020 12:50:18 +0200
Subject: mlx4: Add XDP frame size and adjust max XDP MTU

The mlx4 drivers size of memory backing the RX packet is stored in
frag_stride. For XDP mode this will be PAGE_SIZE (normally 4096).
For normal mode frag_stride is 2048.

Also adjust MLX4_EN_MAX_XDP_MTU to take tailroom into account.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Cc: Saeed Mahameed <saeedm@mellanox.com>
Link: https://lore.kernel.org/bpf/158945341893.97035.2688142527052329942.stgit@firesoul
---
 drivers/net/ethernet/mellanox/mlx4/en_netdev.c | 3 ++-
 drivers/net/ethernet/mellanox/mlx4/en_rx.c     | 1 +
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
index 43dcbd8214c6..5bd3cd37d50f 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_netdev.c
@@ -51,7 +51,8 @@
 #include "en_port.h"
 
 #define MLX4_EN_MAX_XDP_MTU ((int)(PAGE_SIZE - ETH_HLEN - (2 * VLAN_HLEN) - \
-				   XDP_PACKET_HEADROOM))
+				XDP_PACKET_HEADROOM -			    \
+				SKB_DATA_ALIGN(sizeof(struct skb_shared_info))))
 
 int mlx4_en_setup_tc(struct net_device *dev, u8 up)
 {
diff --git a/drivers/net/ethernet/mellanox/mlx4/en_rx.c b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
index 787139219813..8a10285b0e10 100644
--- a/drivers/net/ethernet/mellanox/mlx4/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx4/en_rx.c
@@ -683,6 +683,7 @@ int mlx4_en_process_rx_cq(struct net_device *dev, struct mlx4_en_cq *cq, int bud
 	rcu_read_lock();
 	xdp_prog = rcu_dereference(ring->xdp_prog);
 	xdp.rxq = &ring->xdp_rxq;
+	xdp.frame_sz = priv->frag_info[0].frag_stride;
 	doorbell_pending = 0;
 
 	/* We assume a 1:1 mapping between CQEs and Rx descriptors, so Rx
-- 
cgit v1.2.3-59-g8ed1b


From c8145b263dd85f9e589c7c7ba531423d82ca96ae Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Thu, 14 May 2020 12:50:24 +0200
Subject: net: thunderx: Add XDP frame size

To help reviewers these are the defines related to RCV_FRAG_LEN

 #define DMA_BUFFER_LEN	1536 /* In multiples of 128bytes */
 #define RCV_FRAG_LEN	(SKB_DATA_ALIGN(DMA_BUFFER_LEN + NET_SKB_PAD) + \
			 SKB_DATA_ALIGN(sizeof(struct skb_shared_info)))

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Cc: Sunil Goutham <sgoutham@marvell.com>
Cc: Robert Richter <rrichter@marvell.com>
Link: https://lore.kernel.org/bpf/158945342402.97035.12649844447148990032.stgit@firesoul
---
 drivers/net/ethernet/cavium/thunder/nicvf_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/cavium/thunder/nicvf_main.c b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
index b4b33368698f..2ba0ce115e63 100644
--- a/drivers/net/ethernet/cavium/thunder/nicvf_main.c
+++ b/drivers/net/ethernet/cavium/thunder/nicvf_main.c
@@ -552,6 +552,7 @@ static inline bool nicvf_xdp_rx(struct nicvf *nic, struct bpf_prog *prog,
 	xdp_set_data_meta_invalid(&xdp);
 	xdp.data_end = xdp.data + len;
 	xdp.rxq = &rq->xdp_rxq;
+	xdp.frame_sz = RCV_FRAG_LEN + XDP_PACKET_HEADROOM;
 	orig_data = xdp.data;
 
 	rcu_read_lock();
-- 
cgit v1.2.3-59-g8ed1b


From fa6540b8efd8944f8627c2f304114663ef4aadc4 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Thu, 14 May 2020 12:50:29 +0200
Subject: nfp: Add XDP frame size to netronome driver

The netronome nfp driver use PAGE_SIZE when xdp_prog is set, but
xdp.data_hard_start begins at offset NFP_NET_RX_BUF_HEADROOM.
Thus, adjust for this when setting xdp.frame_sz, as it counts
from data_hard_start.

When doing XDP_TX this driver is smart and instead of a full DMA-map
does a DMA-sync on with packet length. As xdp_adjust_tail can now
grow packet length, add checks to make sure that grow size is within
the DMA-mapped size.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Jakub Kicinski <kuba@kernel.org>
Link: https://lore.kernel.org/bpf/158945342911.97035.11214251236208648808.stgit@firesoul
---
 drivers/net/ethernet/netronome/nfp/nfp_net_common.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
index 9bfb3b077bc1..0e0cc3d58bdc 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_common.c
@@ -1741,10 +1741,15 @@ nfp_net_tx_xdp_buf(struct nfp_net_dp *dp, struct nfp_net_rx_ring *rx_ring,
 		   struct nfp_net_rx_buf *rxbuf, unsigned int dma_off,
 		   unsigned int pkt_len, bool *completed)
 {
+	unsigned int dma_map_sz = dp->fl_bufsz - NFP_NET_RX_BUF_NON_DATA;
 	struct nfp_net_tx_buf *txbuf;
 	struct nfp_net_tx_desc *txd;
 	int wr_idx;
 
+	/* Reject if xdp_adjust_tail grow packet beyond DMA area */
+	if (pkt_len + dma_off > dma_map_sz)
+		return false;
+
 	if (unlikely(nfp_net_tx_full(tx_ring, 1))) {
 		if (!*completed) {
 			nfp_net_xdp_complete(tx_ring);
@@ -1817,6 +1822,7 @@ static int nfp_net_rx(struct nfp_net_rx_ring *rx_ring, int budget)
 	rcu_read_lock();
 	xdp_prog = READ_ONCE(dp->xdp_prog);
 	true_bufsz = xdp_prog ? PAGE_SIZE : dp->fl_bufsz;
+	xdp.frame_sz = PAGE_SIZE - NFP_NET_RX_BUF_HEADROOM;
 	xdp.rxq = &rx_ring->xdp_rxq;
 	tx_ring = r_vec->xdp_ring;
 
-- 
cgit v1.2.3-59-g8ed1b


From fb3e6e9307973d2f70a173f1b33d1054fa2b691f Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Thu, 14 May 2020 12:50:34 +0200
Subject: tun: Add XDP frame size

The tun driver have two code paths for running XDP (bpf_prog_run_xdp).
In both cases 'buflen' contains enough tailroom for skb_shared_info.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Link: https://lore.kernel.org/bpf/158945343419.97035.9594485183958037621.stgit@firesoul
---
 drivers/net/tun.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index 44889eba1dbc..c54f967e2c66 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1671,6 +1671,7 @@ static struct sk_buff *tun_build_skb(struct tun_struct *tun,
 		xdp_set_data_meta_invalid(&xdp);
 		xdp.data_end = xdp.data + len;
 		xdp.rxq = &tfile->xdp_rxq;
+		xdp.frame_sz = buflen;
 
 		act = bpf_prog_run_xdp(xdp_prog, &xdp);
 		if (act == XDP_REDIRECT || act == XDP_TX) {
@@ -2411,6 +2412,7 @@ static int tun_xdp_one(struct tun_struct *tun,
 		}
 		xdp_set_data_meta_invalid(xdp);
 		xdp->rxq = &tfile->xdp_rxq;
+		xdp->frame_sz = buflen;
 
 		act = bpf_prog_run_xdp(xdp_prog, xdp);
 		err = tun_xdp_act(tun, xdp_prog, xdp, act);
-- 
cgit v1.2.3-59-g8ed1b


From 05afee298afc2f2497b7400b53e9d60fcc24d525 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Thu, 14 May 2020 12:50:39 +0200
Subject: vhost_net: Also populate XDP frame size

In vhost_net_build_xdp() the 'buf' that gets queued via an xdp_buff
have embedded a struct tun_xdp_hdr (located at xdp->data_hard_start)
which contains the buffer length 'buflen' (with tailroom for
skb_shared_info). Also storing this buflen in xdp->frame_sz, does not
obsolete struct tun_xdp_hdr, as it also contains a struct
virtio_net_hdr with other information.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Link: https://lore.kernel.org/bpf/158945343928.97035.4620233649151726289.stgit@firesoul
---
 drivers/vhost/net.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/vhost/net.c b/drivers/vhost/net.c
index 2927f02cc7e1..516519dcc8ff 100644
--- a/drivers/vhost/net.c
+++ b/drivers/vhost/net.c
@@ -747,6 +747,7 @@ static int vhost_net_build_xdp(struct vhost_net_virtqueue *nvq,
 	xdp->data = buf + pad;
 	xdp->data_end = xdp->data + len;
 	hdr->buflen = buflen;
+	xdp->frame_sz = buflen;
 
 	--net->refcnt_bias;
 	alloc_frag->offset += buflen;
-- 
cgit v1.2.3-59-g8ed1b


From 9ce6146ec7b50718fa5ef5287f1d6561b25a5da8 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Thu, 14 May 2020 12:50:44 +0200
Subject: virtio_net: Add XDP frame size in two code paths

The virtio_net driver is running inside the guest-OS. There are two
XDP receive code-paths in virtio_net, namely receive_small() and
receive_mergeable(). The receive_big() function does not support XDP.

In receive_small() the frame size is available in buflen. The buffer
backing these frames are allocated in add_recvbuf_small() with same
size, except for the headroom, but tailroom have reserved room for
skb_shared_info. The headroom is encoded in ctx pointer as a value.

In receive_mergeable() the frame size is more dynamic. There are two
basic cases: (1) buffer size is based on a exponentially weighted
moving average (see DECLARE_EWMA) of packet length. Or (2) in case
virtnet_get_headroom() have any headroom then buffer size is
PAGE_SIZE. The ctx pointer is this time used for encoding two values;
the buffer len "truesize" and headroom. In case (1) if the rx buffer
size is underestimated, the packet will have been split over more
buffers (num_buf info in virtio_net_hdr_mrg_rxbuf placed in top of
buffer area). If that happens the XDP path does a xdp_linearize_page
operation.

V3: Adjust frame_sz in receive_mergeable() case, spotted by Jason Wang.

The code is really hard to follow, so some hints to reviewers.
The receive_mergeable() case gets frames that were allocated in
add_recvbuf_mergeable() which uses headroom=virtnet_get_headroom(),
and 'buf' ptr is advanced this headroom.  The headroom can only
be 0 or VIRTIO_XDP_HEADROOM, as virtnet_get_headroom is really
simple:

  static unsigned int virtnet_get_headroom(struct virtnet_info *vi)
  {
	return vi->xdp_queue_pairs ? VIRTIO_XDP_HEADROOM : 0;
  }

As frame_sz is an offset size from xdp.data_hard_start, reviewers
should notice how this is calculated in receive_mergeable():

  int offset = buf - page_address(page);
  [...]
  data = page_address(xdp_page) + offset;
  xdp.data_hard_start = data - VIRTIO_XDP_HEADROOM + vi->hdr_len;

The calculated offset will always be VIRTIO_XDP_HEADROOM when
reaching this code.  Thus, xdp.data_hard_start will be page-start
address plus vi->hdr_len.  Given this xdp.frame_sz need to be
reduced with vi->hdr_len size.

IMHO a followup patch should cleanup this code to make it easier
to maintain and understand, but it is outside the scope of this
patchset.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Michael S. Tsirkin <mst@redhat.com>
Acked-by: Jason Wang <jasowang@redhat.com>
Link: https://lore.kernel.org/bpf/158945344436.97035.9445115070189151680.stgit@firesoul
---
 drivers/net/virtio_net.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index 11f722460513..9e1b5d748586 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -689,6 +689,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
 		xdp.data_end = xdp.data + len;
 		xdp.data_meta = xdp.data;
 		xdp.rxq = &rq->xdp_rxq;
+		xdp.frame_sz = buflen;
 		orig_data = xdp.data;
 		act = bpf_prog_run_xdp(xdp_prog, &xdp);
 		stats->xdp_packets++;
@@ -797,10 +798,11 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 	int offset = buf - page_address(page);
 	struct sk_buff *head_skb, *curr_skb;
 	struct bpf_prog *xdp_prog;
-	unsigned int truesize;
+	unsigned int truesize = mergeable_ctx_to_truesize(ctx);
 	unsigned int headroom = mergeable_ctx_to_headroom(ctx);
-	int err;
 	unsigned int metasize = 0;
+	unsigned int frame_sz;
+	int err;
 
 	head_skb = NULL;
 	stats->bytes += len - vi->hdr_len;
@@ -821,6 +823,11 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 		if (unlikely(hdr->hdr.gso_type))
 			goto err_xdp;
 
+		/* Buffers with headroom use PAGE_SIZE as alloc size,
+		 * see add_recvbuf_mergeable() + get_mergeable_buf_len()
+		 */
+		frame_sz = headroom ? PAGE_SIZE : truesize;
+
 		/* This happens when rx buffer size is underestimated
 		 * or headroom is not enough because of the buffer
 		 * was refilled before XDP is set. This should only
@@ -834,6 +841,8 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 						      page, offset,
 						      VIRTIO_XDP_HEADROOM,
 						      &len);
+			frame_sz = PAGE_SIZE;
+
 			if (!xdp_page)
 				goto err_xdp;
 			offset = VIRTIO_XDP_HEADROOM;
@@ -850,6 +859,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 		xdp.data_end = xdp.data + (len - vi->hdr_len);
 		xdp.data_meta = xdp.data;
 		xdp.rxq = &rq->xdp_rxq;
+		xdp.frame_sz = frame_sz - vi->hdr_len;
 
 		act = bpf_prog_run_xdp(xdp_prog, &xdp);
 		stats->xdp_packets++;
@@ -924,7 +934,6 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 	}
 	rcu_read_unlock();
 
-	truesize = mergeable_ctx_to_truesize(ctx);
 	if (unlikely(len > truesize)) {
 		pr_debug("%s: rx error: len %u exceeds truesize %lu\n",
 			 dev->name, len, (unsigned long)ctx);
-- 
cgit v1.2.3-59-g8ed1b


From 88eb0ee17b2ece64fcf6689a4557a5c2e7a89c4b Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Thu, 14 May 2020 12:50:49 +0200
Subject: ixgbe: Fix XDP redirect on archs with PAGE_SIZE above 4K

The ixgbe driver have another memory model when compiled on archs with
PAGE_SIZE above 4096 bytes. In this mode it doesn't split the page in
two halves, but instead increment rx_buffer->page_offset by truesize of
packet (which include headroom and tailroom for skb_shared_info).

This is done correctly in ixgbe_build_skb(), but in ixgbe_rx_buffer_flip
which is currently only called on XDP_TX and XDP_REDIRECT, it forgets
to add the tailroom for skb_shared_info. This breaks XDP_REDIRECT, for
veth and cpumap.  Fix by adding size of skb_shared_info tailroom.

Maintainers notice: This fix have been queued to Jeff.

Fixes: 6453073987ba ("ixgbe: add initial support for xdp redirect")
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Cc: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Link: https://lore.kernel.org/bpf/158945344946.97035.17031588499266605743.stgit@firesoul
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 718931d951bc..ea6834bae04c 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -2254,7 +2254,8 @@ static void ixgbe_rx_buffer_flip(struct ixgbe_ring *rx_ring,
 	rx_buffer->page_offset ^= truesize;
 #else
 	unsigned int truesize = ring_uses_build_skb(rx_ring) ?
-				SKB_DATA_ALIGN(IXGBE_SKB_PAD + size) :
+				SKB_DATA_ALIGN(IXGBE_SKB_PAD + size) +
+				SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
 				SKB_DATA_ALIGN(size);
 
 	rx_buffer->page_offset += truesize;
-- 
cgit v1.2.3-59-g8ed1b


From cf02512899805d6f3d48c0cf1825148f5d24fe71 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Thu, 14 May 2020 12:50:54 +0200
Subject: ixgbe: Add XDP frame size to driver

This driver uses different memory models depending on PAGE_SIZE at
compile time. For PAGE_SIZE 4K it uses page splitting, meaning for
normal MTU frame size is 2048 bytes (and headroom 192 bytes). For
larger MTUs the driver still use page splitting, by allocating
order-1 pages (8192 bytes) for RX frames. For PAGE_SIZE larger than
4K, driver instead advance its rx_buffer->page_offset with the frame
size "truesize".

For XDP frame size calculations, this mean that in PAGE_SIZE larger
than 4K mode the frame_sz change on a per packet basis. For the page
split 4K PAGE_SIZE mode, xdp.frame_sz is more constant and can be
updated once outside the main NAPI loop.

The default setting in the driver uses build_skb(), which provides
the necessary headroom and tailroom for XDP-redirect in RX-frame
(in both modes).

There is one complication, which is legacy-rx mode (configurable via
ethtool priv-flags). There are zero headroom in this mode, which is a
requirement for XDP-redirect to work. The conversion to xdp_frame
(convert_to_xdp_frame) will detect this insufficient space, and
xdp_do_redirect() call will fail. This is deemed acceptable, as it
allows other XDP actions to still work in legacy-mode. In
legacy-mode + larger PAGE_SIZE due to lacking tailroom, we also
accept that xdp_adjust_tail shrink doesn't work.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Cc: intel-wired-lan@lists.osuosl.org
Cc: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Cc: Alexander Duyck <alexander.duyck@gmail.com>
Link: https://lore.kernel.org/bpf/158945345455.97035.14334355929030628741.stgit@firesoul
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 34 ++++++++++++++++++++-------
 1 file changed, 26 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index ea6834bae04c..eab5934b04f5 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -2244,20 +2244,30 @@ xdp_out:
 	return ERR_PTR(-result);
 }
 
+static unsigned int ixgbe_rx_frame_truesize(struct ixgbe_ring *rx_ring,
+					    unsigned int size)
+{
+	unsigned int truesize;
+
+#if (PAGE_SIZE < 8192)
+	truesize = ixgbe_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */
+#else
+	truesize = ring_uses_build_skb(rx_ring) ?
+		SKB_DATA_ALIGN(IXGBE_SKB_PAD + size) +
+		SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
+		SKB_DATA_ALIGN(size);
+#endif
+	return truesize;
+}
+
 static void ixgbe_rx_buffer_flip(struct ixgbe_ring *rx_ring,
 				 struct ixgbe_rx_buffer *rx_buffer,
 				 unsigned int size)
 {
+	unsigned int truesize = ixgbe_rx_frame_truesize(rx_ring, size);
 #if (PAGE_SIZE < 8192)
-	unsigned int truesize = ixgbe_rx_pg_size(rx_ring) / 2;
-
 	rx_buffer->page_offset ^= truesize;
 #else
-	unsigned int truesize = ring_uses_build_skb(rx_ring) ?
-				SKB_DATA_ALIGN(IXGBE_SKB_PAD + size) +
-				SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
-				SKB_DATA_ALIGN(size);
-
 	rx_buffer->page_offset += truesize;
 #endif
 }
@@ -2291,6 +2301,11 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
 
 	xdp.rxq = &rx_ring->xdp_rxq;
 
+	/* Frame size depend on rx_ring setup when PAGE_SIZE=4K */
+#if (PAGE_SIZE < 8192)
+	xdp.frame_sz = ixgbe_rx_frame_truesize(rx_ring, 0);
+#endif
+
 	while (likely(total_rx_packets < budget)) {
 		union ixgbe_adv_rx_desc *rx_desc;
 		struct ixgbe_rx_buffer *rx_buffer;
@@ -2324,7 +2339,10 @@ static int ixgbe_clean_rx_irq(struct ixgbe_q_vector *q_vector,
 			xdp.data_hard_start = xdp.data -
 					      ixgbe_rx_offset(rx_ring);
 			xdp.data_end = xdp.data + size;
-
+#if (PAGE_SIZE > 4096)
+			/* At larger PAGE_SIZE, frame_sz depend on len size */
+			xdp.frame_sz = ixgbe_rx_frame_truesize(rx_ring, size);
+#endif
 			skb = ixgbe_run_xdp(adapter, rx_ring, &xdp);
 		}
 
-- 
cgit v1.2.3-59-g8ed1b


From 81f3c6283cff03efae139a85851602a4c1c6bd72 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Thu, 14 May 2020 12:50:59 +0200
Subject: ixgbevf: Add XDP frame size to VF driver

This patch mirrors the changes to ixgbe in previous patch.

This VF driver doesn't support XDP_REDIRECT, but correct tailroom is
still necessary for BPF-helper xdp_adjust_tail.  In legacy-mode +
larger PAGE_SIZE, due to lacking tailroom, we accept that
xdp_adjust_tail shrink doesn't work.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Cc: intel-wired-lan@lists.osuosl.org
Cc: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Cc: Alexander Duyck <alexander.duyck@gmail.com>
Link: https://lore.kernel.org/bpf/158945345984.97035.13518286183248025173.stgit@firesoul
---
 drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c | 34 ++++++++++++++++++-----
 1 file changed, 27 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
index 4622c4ea2e46..a39e2cb384dd 100644
--- a/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
+++ b/drivers/net/ethernet/intel/ixgbevf/ixgbevf_main.c
@@ -1095,19 +1095,31 @@ xdp_out:
 	return ERR_PTR(-result);
 }
 
+static unsigned int ixgbevf_rx_frame_truesize(struct ixgbevf_ring *rx_ring,
+					      unsigned int size)
+{
+	unsigned int truesize;
+
+#if (PAGE_SIZE < 8192)
+	truesize = ixgbevf_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */
+#else
+	truesize = ring_uses_build_skb(rx_ring) ?
+		SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size) +
+		SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
+		SKB_DATA_ALIGN(size);
+#endif
+	return truesize;
+}
+
 static void ixgbevf_rx_buffer_flip(struct ixgbevf_ring *rx_ring,
 				   struct ixgbevf_rx_buffer *rx_buffer,
 				   unsigned int size)
 {
-#if (PAGE_SIZE < 8192)
-	unsigned int truesize = ixgbevf_rx_pg_size(rx_ring) / 2;
+	unsigned int truesize = ixgbevf_rx_frame_truesize(rx_ring, size);
 
+#if (PAGE_SIZE < 8192)
 	rx_buffer->page_offset ^= truesize;
 #else
-	unsigned int truesize = ring_uses_build_skb(rx_ring) ?
-				SKB_DATA_ALIGN(IXGBEVF_SKB_PAD + size) :
-				SKB_DATA_ALIGN(size);
-
 	rx_buffer->page_offset += truesize;
 #endif
 }
@@ -1125,6 +1137,11 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
 
 	xdp.rxq = &rx_ring->xdp_rxq;
 
+	/* Frame size depend on rx_ring setup when PAGE_SIZE=4K */
+#if (PAGE_SIZE < 8192)
+	xdp.frame_sz = ixgbevf_rx_frame_truesize(rx_ring, 0);
+#endif
+
 	while (likely(total_rx_packets < budget)) {
 		struct ixgbevf_rx_buffer *rx_buffer;
 		union ixgbe_adv_rx_desc *rx_desc;
@@ -1157,7 +1174,10 @@ static int ixgbevf_clean_rx_irq(struct ixgbevf_q_vector *q_vector,
 			xdp.data_hard_start = xdp.data -
 					      ixgbevf_rx_offset(rx_ring);
 			xdp.data_end = xdp.data + size;
-
+#if (PAGE_SIZE > 4096)
+			/* At larger PAGE_SIZE, frame_sz depend on len size */
+			xdp.frame_sz = ixgbevf_rx_frame_truesize(rx_ring, size);
+#endif
 			skb = ixgbevf_run_xdp(adapter, rx_ring, &xdp);
 		}
 
-- 
cgit v1.2.3-59-g8ed1b


From 24104024ce0553ae7738bb1ea5e6e3ed6619160d Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Thu, 14 May 2020 12:51:04 +0200
Subject: i40e: Add XDP frame size to driver

This driver uses different memory models depending on PAGE_SIZE at
compile time. For PAGE_SIZE 4K it uses page splitting, meaning for
normal MTU frame size is 2048 bytes (and headroom 192 bytes). For
larger MTUs the driver still use page splitting, by allocating
order-1 pages (8192 bytes) for RX frames. For PAGE_SIZE larger than
4K, driver instead advance its rx_buffer->page_offset with the frame
size "truesize".

For XDP frame size calculations, this mean that in PAGE_SIZE larger
than 4K mode the frame_sz change on a per packet basis. For the page
split 4K PAGE_SIZE mode, xdp.frame_sz is more constant and can be
updated once outside the main NAPI loop.

The default setting in the driver uses build_skb(), which provides
the necessary headroom and tailroom for XDP-redirect in RX-frame
(in both modes).

There is one complication, which is legacy-rx mode (configurable via
ethtool priv-flags). There are zero headroom in this mode, which is a
requirement for XDP-redirect to work. The conversion to xdp_frame
(convert_to_xdp_frame) will detect this insufficient space, and
xdp_do_redirect() call will fail. This is deemed acceptable, as it
allows other XDP actions to still work in legacy-mode. In
legacy-mode + larger PAGE_SIZE due to lacking tailroom, we also
accept that xdp_adjust_tail shrink doesn't work.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Cc: intel-wired-lan@lists.osuosl.org
Cc: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Cc: Alexander Duyck <alexander.duyck@gmail.com>
Link: https://lore.kernel.org/bpf/158945346494.97035.12809400414566061815.stgit@firesoul
---
 drivers/net/ethernet/intel/i40e/i40e_txrx.c | 30 ++++++++++++++++++++++++-----
 1 file changed, 25 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index b8496037ef7f..a3772beffe02 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -1507,6 +1507,22 @@ static inline unsigned int i40e_rx_offset(struct i40e_ring *rx_ring)
 	return ring_uses_build_skb(rx_ring) ? I40E_SKB_PAD : 0;
 }
 
+static unsigned int i40e_rx_frame_truesize(struct i40e_ring *rx_ring,
+					   unsigned int size)
+{
+	unsigned int truesize;
+
+#if (PAGE_SIZE < 8192)
+	truesize = i40e_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */
+#else
+	truesize = i40e_rx_offset(rx_ring) ?
+		SKB_DATA_ALIGN(size + i40e_rx_offset(rx_ring)) +
+		SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
+		SKB_DATA_ALIGN(size);
+#endif
+	return truesize;
+}
+
 /**
  * i40e_alloc_mapped_page - recycle or make a new page
  * @rx_ring: ring to use
@@ -2246,13 +2262,11 @@ static void i40e_rx_buffer_flip(struct i40e_ring *rx_ring,
 				struct i40e_rx_buffer *rx_buffer,
 				unsigned int size)
 {
-#if (PAGE_SIZE < 8192)
-	unsigned int truesize = i40e_rx_pg_size(rx_ring) / 2;
+	unsigned int truesize = i40e_rx_frame_truesize(rx_ring, size);
 
+#if (PAGE_SIZE < 8192)
 	rx_buffer->page_offset ^= truesize;
 #else
-	unsigned int truesize = SKB_DATA_ALIGN(i40e_rx_offset(rx_ring) + size);
-
 	rx_buffer->page_offset += truesize;
 #endif
 }
@@ -2335,6 +2349,9 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
 	bool failure = false;
 	struct xdp_buff xdp;
 
+#if (PAGE_SIZE < 8192)
+	xdp.frame_sz = i40e_rx_frame_truesize(rx_ring, 0);
+#endif
 	xdp.rxq = &rx_ring->xdp_rxq;
 
 	while (likely(total_rx_packets < (unsigned int)budget)) {
@@ -2389,7 +2406,10 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
 			xdp.data_hard_start = xdp.data -
 					      i40e_rx_offset(rx_ring);
 			xdp.data_end = xdp.data + size;
-
+#if (PAGE_SIZE > 4096)
+			/* At larger PAGE_SIZE, frame_sz depend on len size */
+			xdp.frame_sz = i40e_rx_frame_truesize(rx_ring, size);
+#endif
 			skb = i40e_run_xdp(rx_ring, &xdp);
 		}
 
-- 
cgit v1.2.3-59-g8ed1b


From d4ecdbf7aa2fa4feac09befb04cdaf44e6dc938b Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Thu, 14 May 2020 12:51:10 +0200
Subject: ice: Add XDP frame size to driver

This driver uses different memory models depending on PAGE_SIZE at
compile time. For PAGE_SIZE 4K it uses page splitting, meaning for
normal MTU frame size is 2048 bytes (and headroom 192 bytes). For
larger MTUs the driver still use page splitting, by allocating
order-1 pages (8192 bytes) for RX frames. For PAGE_SIZE larger than
4K, driver instead advance its rx_buffer->page_offset with the frame
size "truesize".

For XDP frame size calculations, this mean that in PAGE_SIZE larger
than 4K mode the frame_sz change on a per packet basis. For the page
split 4K PAGE_SIZE mode, xdp.frame_sz is more constant and can be
updated once outside the main NAPI loop.

The default setting in the driver uses build_skb(), which provides
the necessary headroom and tailroom for XDP-redirect in RX-frame
(in both modes).

There is one complication, which is legacy-rx mode (configurable via
ethtool priv-flags). There are zero headroom in this mode, which is a
requirement for XDP-redirect to work. The conversion to xdp_frame
(convert_to_xdp_frame) will detect this insufficient space, and
xdp_do_redirect() call will fail. This is deemed acceptable, as it
allows other XDP actions to still work in legacy-mode. In
legacy-mode + larger PAGE_SIZE due to lacking tailroom, we also
accept that xdp_adjust_tail shrink doesn't work.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Cc: intel-wired-lan@lists.osuosl.org
Cc: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
Cc: Alexander Duyck <alexander.duyck@gmail.com>
Link: https://lore.kernel.org/bpf/158945347002.97035.328088795813704587.stgit@firesoul
---
 drivers/net/ethernet/intel/ice/ice_txrx.c | 34 +++++++++++++++++++++++--------
 1 file changed, 25 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index f67e8362958c..69b21b436f9a 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -423,6 +423,22 @@ static unsigned int ice_rx_offset(struct ice_ring *rx_ring)
 	return 0;
 }
 
+static unsigned int ice_rx_frame_truesize(struct ice_ring *rx_ring,
+					  unsigned int size)
+{
+	unsigned int truesize;
+
+#if (PAGE_SIZE < 8192)
+	truesize = ice_rx_pg_size(rx_ring) / 2; /* Must be power-of-2 */
+#else
+	truesize = ice_rx_offset(rx_ring) ?
+		SKB_DATA_ALIGN(ice_rx_offset(rx_ring) + size) +
+		SKB_DATA_ALIGN(sizeof(struct skb_shared_info)) :
+		SKB_DATA_ALIGN(size);
+#endif
+	return truesize;
+}
+
 /**
  * ice_run_xdp - Executes an XDP program on initialized xdp_buff
  * @rx_ring: Rx ring
@@ -991,6 +1007,10 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
 	bool failure;
 
 	xdp.rxq = &rx_ring->xdp_rxq;
+	/* Frame size depend on rx_ring setup when PAGE_SIZE=4K */
+#if (PAGE_SIZE < 8192)
+	xdp.frame_sz = ice_rx_frame_truesize(rx_ring, 0);
+#endif
 
 	/* start the loop to process Rx packets bounded by 'budget' */
 	while (likely(total_rx_pkts < (unsigned int)budget)) {
@@ -1038,6 +1058,10 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
 		xdp.data_hard_start = xdp.data - ice_rx_offset(rx_ring);
 		xdp.data_meta = xdp.data;
 		xdp.data_end = xdp.data + size;
+#if (PAGE_SIZE > 4096)
+		/* At larger PAGE_SIZE, frame_sz depend on len size */
+		xdp.frame_sz = ice_rx_frame_truesize(rx_ring, size);
+#endif
 
 		rcu_read_lock();
 		xdp_prog = READ_ONCE(rx_ring->xdp_prog);
@@ -1051,16 +1075,8 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
 		if (!xdp_res)
 			goto construct_skb;
 		if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR)) {
-			unsigned int truesize;
-
-#if (PAGE_SIZE < 8192)
-			truesize = ice_rx_pg_size(rx_ring) / 2;
-#else
-			truesize = SKB_DATA_ALIGN(ice_rx_offset(rx_ring) +
-						  size);
-#endif
 			xdp_xmit |= xdp_res;
-			ice_rx_buf_adjust_pg_offset(rx_buf, truesize);
+			ice_rx_buf_adjust_pg_offset(rx_buf, xdp.frame_sz);
 		} else {
 			rx_buf->pagecnt_bias++;
 		}
-- 
cgit v1.2.3-59-g8ed1b


From 2a637c5b1aaf3b21418fadffad7e56ff27cee6f7 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Thu, 14 May 2020 12:51:15 +0200
Subject: xdp: For Intel AF_XDP drivers add XDP frame_sz
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Intel drivers implement native AF_XDP zerocopy in separate C-files,
that have its own invocation of bpf_prog_run_xdp(). The setup of
xdp_buff is also handled in separately from normal code path.

This patch update XDP frame_sz for AF_XDP zerocopy drivers i40e, ice
and ixgbe, as the code changes needed are very similar.  Introduce a
helper function xsk_umem_xdp_frame_sz() for calculating frame size.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Björn Töpel <bjorn.topel@intel.com>
Cc: intel-wired-lan@lists.osuosl.org
Cc: Magnus Karlsson <magnus.karlsson@intel.com>
Link: https://lore.kernel.org/bpf/158945347511.97035.8536753731329475655.stgit@firesoul
---
 drivers/net/ethernet/intel/i40e/i40e_xsk.c   |  2 ++
 drivers/net/ethernet/intel/ice/ice_xsk.c     |  2 ++
 drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c |  2 ++
 include/net/xdp_sock.h                       | 11 +++++++++++
 4 files changed, 17 insertions(+)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index 0b7d29192b2c..2b9184aead5f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -531,12 +531,14 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
 {
 	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
 	u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
+	struct xdp_umem *umem = rx_ring->xsk_umem;
 	unsigned int xdp_res, xdp_xmit = 0;
 	bool failure = false;
 	struct sk_buff *skb;
 	struct xdp_buff xdp;
 
 	xdp.rxq = &rx_ring->xdp_rxq;
+	xdp.frame_sz = xsk_umem_xdp_frame_sz(umem);
 
 	while (likely(total_rx_packets < (unsigned int)budget)) {
 		struct i40e_rx_buffer *bi;
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index 8279db15e870..23e5515d4527 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -840,11 +840,13 @@ int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget)
 {
 	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
 	u16 cleaned_count = ICE_DESC_UNUSED(rx_ring);
+	struct xdp_umem *umem = rx_ring->xsk_umem;
 	unsigned int xdp_xmit = 0;
 	bool failure = false;
 	struct xdp_buff xdp;
 
 	xdp.rxq = &rx_ring->xdp_rxq;
+	xdp.frame_sz = xsk_umem_xdp_frame_sz(umem);
 
 	while (likely(total_rx_packets < (unsigned int)budget)) {
 		union ice_32b_rx_flex_desc *rx_desc;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
index 74b540ebb3dc..a656ee9a1fae 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
@@ -431,12 +431,14 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector,
 	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
 	struct ixgbe_adapter *adapter = q_vector->adapter;
 	u16 cleaned_count = ixgbe_desc_unused(rx_ring);
+	struct xdp_umem *umem = rx_ring->xsk_umem;
 	unsigned int xdp_res, xdp_xmit = 0;
 	bool failure = false;
 	struct sk_buff *skb;
 	struct xdp_buff xdp;
 
 	xdp.rxq = &rx_ring->xdp_rxq;
+	xdp.frame_sz = xsk_umem_xdp_frame_sz(umem);
 
 	while (likely(total_rx_packets < budget)) {
 		union ixgbe_adv_rx_desc *rx_desc;
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 67191ccaab85..abd72de25fa4 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -236,6 +236,12 @@ static inline u64 xsk_umem_adjust_offset(struct xdp_umem *umem, u64 address,
 	else
 		return address + offset;
 }
+
+static inline u32 xsk_umem_xdp_frame_sz(struct xdp_umem *umem)
+{
+	return umem->chunk_size_nohr + umem->headroom;
+}
+
 #else
 static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
 {
@@ -366,6 +372,11 @@ static inline u64 xsk_umem_adjust_offset(struct xdp_umem *umem, u64 handle,
 	return 0;
 }
 
+static inline u32 xsk_umem_xdp_frame_sz(struct xdp_umem *umem)
+{
+	return 0;
+}
+
 static inline int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp)
 {
 	return -EOPNOTSUPP;
-- 
cgit v1.2.3-59-g8ed1b


From d628ee4fef1dbd6f2fa11e3548322c7839319537 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Thu, 14 May 2020 12:51:20 +0200
Subject: mlx5: Rx queue setup time determine frame_sz for XDP

The mlx5 driver have multiple memory models, which are also changed
according to whether a XDP bpf_prog is attached.

The 'rx_striding_rq' setting is adjusted via ethtool priv-flags e.g.:
 # ethtool --set-priv-flags mlx5p2 rx_striding_rq off

On the general case with 4K page_size and regular MTU packet, then
the frame_sz is 2048 and 4096 when XDP is enabled, in both modes.

The info on the given frame size is stored differently depending on the
RQ-mode and encoded in a union in struct mlx5e_rq union wqe/mpwqe.
In rx striding mode rq->mpwqe.log_stride_sz is either 11 or 12, which
corresponds to 2048 or 4096 (MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ).
In non-striding mode (MLX5_WQ_TYPE_CYCLIC) the frag_stride is stored
in rq->wqe.info.arr[0].frag_stride, for the first fragment, which is
what the XDP case cares about.

To reduce effect on fast-path, this patch determine the frame_sz at
setup time, to avoid determining the memory model runtime. Variable
is named frame0_sz to make it clear that this is only the frame
size of the first fragment.

This mlx5 driver does a DMA-sync on XDP_TX action, but grow is safe
as it have done a DMA-map on the entire PAGE_SIZE. The driver also
already does a XDP length check against sq->hw_mtu on the possible
XDP xmit paths mlx5e_xmit_xdp_frame() + mlx5e_xmit_xdp_frame_mpwqe().

V3+4: Change variable name first_frame_sz to frame0_sz

V2: Fix that frag_size need to be recalc before creating SKB.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Tariq Toukan <tariqt@mellanox.com>
Cc: Saeed Mahameed <saeedm@mellanox.com>
Link: https://lore.kernel.org/bpf/158945348021.97035.12295039384250022883.stgit@firesoul
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h      | 1 +
 drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c  | 1 +
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 6 ++++++
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c   | 2 ++
 4 files changed, 10 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 3bd64c63865b..26911b15f8fe 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -625,6 +625,7 @@ struct mlx5e_rq {
 	struct {
 		u16            umem_headroom;
 		u16            headroom;
+		u32            frame0_sz;
 		u8             map_dir;   /* dma map direction */
 	} buff;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index c4a7fb4ecd14..761c8979bd41 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -137,6 +137,7 @@ bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
 	if (xsk)
 		xdp.handle = di->xsk.handle;
 	xdp.rxq = &rq->xdp_rxq;
+	xdp.frame_sz = rq->buff.frame0_sz;
 
 	act = bpf_prog_run_xdp(prog, &xdp);
 	if (xsk) {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 0a9dfc31de3e..0e4ca08ddca9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -462,6 +462,8 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 		rq->mpwqe.num_strides =
 			BIT(mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk));
 
+		rq->buff.frame0_sz = (1 << rq->mpwqe.log_stride_sz);
+
 		err = mlx5e_create_rq_umr_mkey(mdev, rq);
 		if (err)
 			goto err_rq_wq_destroy;
@@ -485,6 +487,8 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 			num_xsk_frames = wq_sz << rq->wqe.info.log_num_frags;
 
 		rq->wqe.info = rqp->frags_info;
+		rq->buff.frame0_sz = rq->wqe.info.arr[0].frag_stride;
+
 		rq->wqe.frags =
 			kvzalloc_node(array_size(sizeof(*rq->wqe.frags),
 					(wq_sz << rq->wqe.info.log_num_frags)),
@@ -522,6 +526,8 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 	}
 
 	if (xsk) {
+		rq->buff.frame0_sz = xsk_umem_xdp_frame_sz(umem);
+
 		err = mlx5e_xsk_resize_reuseq(umem, num_xsk_frames);
 		if (unlikely(err)) {
 			mlx5_core_err(mdev, "Unable to allocate the Reuse Ring for %u frames\n",
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 779600bebcca..821f94beda7a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -1070,6 +1070,7 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
 	if (consumed)
 		return NULL; /* page/packet was consumed by XDP */
 
+	frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt);
 	skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt);
 	if (unlikely(!skb))
 		return NULL;
@@ -1371,6 +1372,7 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
 		return NULL; /* page/packet was consumed by XDP */
 	}
 
+	frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt32);
 	skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt32);
 	if (unlikely(!skb))
 		return NULL;
-- 
cgit v1.2.3-59-g8ed1b


From c8741e2bfe872425ea6f10bb6f7dc1d67bc60c3a Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Thu, 14 May 2020 12:51:25 +0200
Subject: xdp: Allow bpf_xdp_adjust_tail() to grow packet size

Finally, after all drivers have a frame size, allow BPF-helper
bpf_xdp_adjust_tail() to grow or extend packet size at frame tail.

Remember that helper/macro xdp_data_hard_end have reserved some
tailroom.  Thus, this helper makes sure that the BPF-prog don't have
access to this tailroom area.

V2: Remove one chicken check and use WARN_ONCE for other

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/158945348530.97035.12577148209134239291.stgit@firesoul
---
 include/uapi/linux/bpf.h |  4 ++--
 net/core/filter.c        | 11 +++++++++--
 2 files changed, 11 insertions(+), 4 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 32cbf36c7729..b9b8a0f63b91 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -2015,8 +2015,8 @@ union bpf_attr {
  * int bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta)
  * 	Description
  * 		Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is
- * 		only possible to shrink the packet as of this writing,
- * 		therefore *delta* must be a negative integer.
+ * 		possible to both shrink and grow the packet tail.
+ * 		Shrink done via *delta* being a negative integer.
  *
  * 		A call to this helper is susceptible to change the underlying
  * 		packet buffer. Therefore, at load time, all checks on pointers
diff --git a/net/core/filter.c b/net/core/filter.c
index 5815902bb617..e7b033dad44e 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3411,12 +3411,19 @@ static const struct bpf_func_proto bpf_xdp_adjust_head_proto = {
 
 BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset)
 {
+	void *data_hard_end = xdp_data_hard_end(xdp); /* use xdp->frame_sz */
 	void *data_end = xdp->data_end + offset;
 
-	/* only shrinking is allowed for now. */
-	if (unlikely(offset >= 0))
+	/* Notice that xdp_data_hard_end have reserved some tailroom */
+	if (unlikely(data_end > data_hard_end))
 		return -EINVAL;
 
+	/* ALL drivers MUST init xdp->frame_sz, chicken check below */
+	if (unlikely(xdp->frame_sz > PAGE_SIZE)) {
+		WARN_ONCE(1, "Too BIG xdp->frame_sz = %d\n", xdp->frame_sz);
+		return -EINVAL;
+	}
+
 	if (unlikely(data_end < xdp->data + ETH_HLEN))
 		return -EINVAL;
 
-- 
cgit v1.2.3-59-g8ed1b


From ddb47d518ca10948d1f64a983cb9274720f691cd Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Thu, 14 May 2020 12:51:30 +0200
Subject: xdp: Clear grow memory in bpf_xdp_adjust_tail()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Clearing memory of tail when grow happens, because it is too easy
to write a XDP_PASS program that extend the tail, which expose
this memory to users that can run tcpdump.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/bpf/158945349039.97035.5262100484553494.stgit@firesoul
---
 net/core/filter.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/core/filter.c b/net/core/filter.c
index e7b033dad44e..a85eb538d4d6 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3427,6 +3427,10 @@ BPF_CALL_2(bpf_xdp_adjust_tail, struct xdp_buff *, xdp, int, offset)
 	if (unlikely(data_end < xdp->data + ETH_HLEN))
 		return -EINVAL;
 
+	/* Clear memory area on grow, can contain uninit kernel memory */
+	if (offset > 0)
+		memset(xdp->data_end, 0, offset);
+
 	xdp->data_end = data_end;
 
 	return 0;
-- 
cgit v1.2.3-59-g8ed1b


From bc56c919fce782f616823b76fb70a788f4762cf5 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Thu, 14 May 2020 12:51:35 +0200
Subject: bpf: Add xdp.frame_sz in bpf_prog_test_run_xdp().

Update the memory requirements, when adding xdp.frame_sz in BPF test_run
function bpf_prog_test_run_xdp() which e.g. is used by XDP selftests.

Specifically add the expected reserved tailroom, but also allocated a
larger memory area to reflect that XDP frames usually comes in this
format. Limit the provided packet data size to 4096 minus headroom +
tailroom, as this also reflect a common 3520 bytes MTU limit with XDP.

Note that bpf_test_init already use a memory allocation method that clears
memory.  Thus, this already guards against leaking uninit kernel memory.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/158945349549.97035.15316291762482444006.stgit@firesoul
---
 net/bpf/test_run.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 29dbdd4c29f6..30ba7d38941d 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -470,25 +470,34 @@ out:
 int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
 			  union bpf_attr __user *uattr)
 {
+	u32 tailroom = SKB_DATA_ALIGN(sizeof(struct skb_shared_info));
+	u32 headroom = XDP_PACKET_HEADROOM;
 	u32 size = kattr->test.data_size_in;
 	u32 repeat = kattr->test.repeat;
 	struct netdev_rx_queue *rxqueue;
 	struct xdp_buff xdp = {};
 	u32 retval, duration;
+	u32 max_data_sz;
 	void *data;
 	int ret;
 
 	if (kattr->test.ctx_in || kattr->test.ctx_out)
 		return -EINVAL;
 
-	data = bpf_test_init(kattr, size, XDP_PACKET_HEADROOM + NET_IP_ALIGN, 0);
+	/* XDP have extra tailroom as (most) drivers use full page */
+	max_data_sz = 4096 - headroom - tailroom;
+	if (size > max_data_sz)
+		return -EINVAL;
+
+	data = bpf_test_init(kattr, max_data_sz, headroom, tailroom);
 	if (IS_ERR(data))
 		return PTR_ERR(data);
 
 	xdp.data_hard_start = data;
-	xdp.data = data + XDP_PACKET_HEADROOM + NET_IP_ALIGN;
+	xdp.data = data + headroom;
 	xdp.data_meta = xdp.data;
 	xdp.data_end = xdp.data + size;
+	xdp.frame_sz = headroom + max_data_sz + tailroom;
 
 	rxqueue = __netif_get_rx_queue(current->nsproxy->net_ns->loopback_dev, 0);
 	xdp.rxq = &rxqueue->xdp_rxq;
@@ -496,8 +505,7 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
 	ret = bpf_test_run(prog, &xdp, repeat, &retval, &duration, true);
 	if (ret)
 		goto out;
-	if (xdp.data != data + XDP_PACKET_HEADROOM + NET_IP_ALIGN ||
-	    xdp.data_end != xdp.data + size)
+	if (xdp.data != data + headroom || xdp.data_end != xdp.data + size)
 		size = xdp.data_end - xdp.data;
 	ret = bpf_test_finish(kattr, uattr, xdp.data, size, retval, duration);
 out:
-- 
cgit v1.2.3-59-g8ed1b


From 68545fb6f2ff621de26d96a3f15868abfb6897b0 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Thu, 14 May 2020 12:51:40 +0200
Subject: selftests/bpf: Adjust BPF selftest for xdp_adjust_tail

Current selftest for BPF-helper xdp_adjust_tail only shrink tail.
Make it more clear that this is a shrink test case.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/158945350058.97035.17280775016196207372.stgit@firesoul
---
 .../selftests/bpf/prog_tests/xdp_adjust_tail.c     |  9 +++++--
 .../testing/selftests/bpf/progs/test_adjust_tail.c | 30 ----------------------
 .../bpf/progs/test_xdp_adjust_tail_shrink.c        | 30 ++++++++++++++++++++++
 3 files changed, 37 insertions(+), 32 deletions(-)
 delete mode 100644 tools/testing/selftests/bpf/progs/test_adjust_tail.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c

diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
index 6c8ca1c93f9b..a76dd81dfce9 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
@@ -2,9 +2,9 @@
 #include <test_progs.h>
 #include <network_helpers.h>
 
-void test_xdp_adjust_tail(void)
+void test_xdp_adjust_tail_shrink(void)
 {
-	const char *file = "./test_adjust_tail.o";
+	const char *file = "./test_xdp_adjust_tail_shrink.o";
 	struct bpf_object *obj;
 	char buf[128];
 	__u32 duration, retval, size;
@@ -28,3 +28,8 @@ void test_xdp_adjust_tail(void)
 	      err, errno, retval, size);
 	bpf_object__close(obj);
 }
+
+void test_xdp_adjust_tail(void)
+{
+	test_xdp_adjust_tail_shrink();
+}
diff --git a/tools/testing/selftests/bpf/progs/test_adjust_tail.c b/tools/testing/selftests/bpf/progs/test_adjust_tail.c
deleted file mode 100644
index b7fc85769bdc..000000000000
--- a/tools/testing/selftests/bpf/progs/test_adjust_tail.c
+++ /dev/null
@@ -1,30 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0
- * Copyright (c) 2018 Facebook
- *
- * This program is free software; you can redistribute it and/or
- * modify it under the terms of version 2 of the GNU General Public
- * License as published by the Free Software Foundation.
- */
-#include <linux/bpf.h>
-#include <linux/if_ether.h>
-#include <bpf/bpf_helpers.h>
-
-int _version SEC("version") = 1;
-
-SEC("xdp_adjust_tail")
-int _xdp_adjust_tail(struct xdp_md *xdp)
-{
-	void *data_end = (void *)(long)xdp->data_end;
-	void *data = (void *)(long)xdp->data;
-	int offset = 0;
-
-	if (data_end - data == 54)
-		offset = 256;
-	else
-		offset = 20;
-	if (bpf_xdp_adjust_tail(xdp, 0 - offset))
-		return XDP_DROP;
-	return XDP_TX;
-}
-
-char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c
new file mode 100644
index 000000000000..22065a9cfb25
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_shrink.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2018 Facebook
+ *
+ * This program is free software; you can redistribute it and/or
+ * modify it under the terms of version 2 of the GNU General Public
+ * License as published by the Free Software Foundation.
+ */
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <bpf/bpf_helpers.h>
+
+int _version SEC("version") = 1;
+
+SEC("xdp_adjust_tail_shrink")
+int _xdp_adjust_tail_shrink(struct xdp_md *xdp)
+{
+	void *data_end = (void *)(long)xdp->data_end;
+	void *data = (void *)(long)xdp->data;
+	int offset = 0;
+
+	if (data_end - data == 54) /* sizeof(pkt_v4) */
+		offset = 256; /* shrink too much */
+	else
+		offset = 20;
+	if (bpf_xdp_adjust_tail(xdp, 0 - offset))
+		return XDP_DROP;
+	return XDP_TX;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3-59-g8ed1b


From 7ae2e00e8fc23f10169079fadd388317d81012be Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Thu, 14 May 2020 12:51:45 +0200
Subject: selftests/bpf: Xdp_adjust_tail add grow tail tests

Extend BPF selftest xdp_adjust_tail with grow tail tests, which is added
as subtest's. The first grow test stays in same form as original shrink
test. The second grow test use the newer bpf_prog_test_run_xattr() calls,
and does extra checking of data contents.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/158945350567.97035.9632611946765811876.stgit@firesoul
---
 .../selftests/bpf/prog_tests/xdp_adjust_tail.c     | 116 ++++++++++++++++++++-
 .../bpf/progs/test_xdp_adjust_tail_grow.c          |  33 ++++++
 2 files changed, 144 insertions(+), 5 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c

diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
index a76dd81dfce9..d5c98f2cb12f 100644
--- a/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_adjust_tail.c
@@ -5,10 +5,10 @@
 void test_xdp_adjust_tail_shrink(void)
 {
 	const char *file = "./test_xdp_adjust_tail_shrink.o";
+	__u32 duration, retval, size, expect_sz;
 	struct bpf_object *obj;
-	char buf[128];
-	__u32 duration, retval, size;
 	int err, prog_fd;
+	char buf[128];
 
 	err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
 	if (CHECK_FAIL(err))
@@ -21,15 +21,121 @@ void test_xdp_adjust_tail_shrink(void)
 	      "ipv4", "err %d errno %d retval %d size %d\n",
 	      err, errno, retval, size);
 
+	expect_sz = sizeof(pkt_v6) - 20;  /* Test shrink with 20 bytes */
 	err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6),
 				buf, &size, &retval, &duration);
-	CHECK(err || retval != XDP_TX || size != 54,
-	      "ipv6", "err %d errno %d retval %d size %d\n",
+	CHECK(err || retval != XDP_TX || size != expect_sz,
+	      "ipv6", "err %d errno %d retval %d size %d expect-size %d\n",
+	      err, errno, retval, size, expect_sz);
+	bpf_object__close(obj);
+}
+
+void test_xdp_adjust_tail_grow(void)
+{
+	const char *file = "./test_xdp_adjust_tail_grow.o";
+	struct bpf_object *obj;
+	char buf[4096]; /* avoid segfault: large buf to hold grow results */
+	__u32 duration, retval, size, expect_sz;
+	int err, prog_fd;
+
+	err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &prog_fd);
+	if (CHECK_FAIL(err))
+		return;
+
+	err = bpf_prog_test_run(prog_fd, 1, &pkt_v4, sizeof(pkt_v4),
+				buf, &size, &retval, &duration);
+	CHECK(err || retval != XDP_DROP,
+	      "ipv4", "err %d errno %d retval %d size %d\n",
 	      err, errno, retval, size);
+
+	expect_sz = sizeof(pkt_v6) + 40; /* Test grow with 40 bytes */
+	err = bpf_prog_test_run(prog_fd, 1, &pkt_v6, sizeof(pkt_v6) /* 74 */,
+				buf, &size, &retval, &duration);
+	CHECK(err || retval != XDP_TX || size != expect_sz,
+	      "ipv6", "err %d errno %d retval %d size %d expect-size %d\n",
+	      err, errno, retval, size, expect_sz);
+
+	bpf_object__close(obj);
+}
+
+void test_xdp_adjust_tail_grow2(void)
+{
+	const char *file = "./test_xdp_adjust_tail_grow.o";
+	char buf[4096]; /* avoid segfault: large buf to hold grow results */
+	int tailroom = 320; /* SKB_DATA_ALIGN(sizeof(struct skb_shared_info))*/;
+	struct bpf_object *obj;
+	int err, cnt, i;
+	int max_grow;
+
+	struct bpf_prog_test_run_attr tattr = {
+		.repeat		= 1,
+		.data_in	= &buf,
+		.data_out	= &buf,
+		.data_size_in	= 0, /* Per test */
+		.data_size_out	= 0, /* Per test */
+	};
+
+	err = bpf_prog_load(file, BPF_PROG_TYPE_XDP, &obj, &tattr.prog_fd);
+	if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno))
+		return;
+
+	/* Test case-64 */
+	memset(buf, 1, sizeof(buf));
+	tattr.data_size_in  =  64; /* Determine test case via pkt size */
+	tattr.data_size_out = 128; /* Limit copy_size */
+	/* Kernel side alloc packet memory area that is zero init */
+	err = bpf_prog_test_run_xattr(&tattr);
+
+	CHECK_ATTR(errno != ENOSPC /* Due limit copy_size in bpf_test_finish */
+		   || tattr.retval != XDP_TX
+		   || tattr.data_size_out != 192, /* Expected grow size */
+		   "case-64",
+		   "err %d errno %d retval %d size %d\n",
+		   err, errno, tattr.retval, tattr.data_size_out);
+
+	/* Extra checks for data contents */
+	CHECK_ATTR(tattr.data_size_out != 192
+		   || buf[0]   != 1 ||  buf[63]  != 1  /*  0-63  memset to 1 */
+		   || buf[64]  != 0 ||  buf[127] != 0  /* 64-127 memset to 0 */
+		   || buf[128] != 1 ||  buf[191] != 1, /*128-191 memset to 1 */
+		   "case-64-data",
+		   "err %d errno %d retval %d size %d\n",
+		   err, errno, tattr.retval, tattr.data_size_out);
+
+	/* Test case-128 */
+	memset(buf, 2, sizeof(buf));
+	tattr.data_size_in  = 128; /* Determine test case via pkt size */
+	tattr.data_size_out = sizeof(buf);   /* Copy everything */
+	err = bpf_prog_test_run_xattr(&tattr);
+
+	max_grow = 4096 - XDP_PACKET_HEADROOM -	tailroom; /* 3520 */
+	CHECK_ATTR(err
+		   || tattr.retval != XDP_TX
+		   || tattr.data_size_out != max_grow,/* Expect max grow size */
+		   "case-128",
+		   "err %d errno %d retval %d size %d expect-size %d\n",
+		   err, errno, tattr.retval, tattr.data_size_out, max_grow);
+
+	/* Extra checks for data content: Count grow size, will contain zeros */
+	for (i = 0, cnt = 0; i < sizeof(buf); i++) {
+		if (buf[i] == 0)
+			cnt++;
+	}
+	CHECK_ATTR((cnt != (max_grow - tattr.data_size_in)) /* Grow increase */
+		   || tattr.data_size_out != max_grow, /* Total grow size */
+		   "case-128-data",
+		   "err %d errno %d retval %d size %d grow-size %d\n",
+		   err, errno, tattr.retval, tattr.data_size_out, cnt);
+
 	bpf_object__close(obj);
 }
 
 void test_xdp_adjust_tail(void)
 {
-	test_xdp_adjust_tail_shrink();
+	if (test__start_subtest("xdp_adjust_tail_shrink"))
+		test_xdp_adjust_tail_shrink();
+	if (test__start_subtest("xdp_adjust_tail_grow"))
+		test_xdp_adjust_tail_grow();
+	if (test__start_subtest("xdp_adjust_tail_grow2"))
+		test_xdp_adjust_tail_grow2();
 }
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
new file mode 100644
index 000000000000..3d66599eee2e
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_adjust_tail_grow.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+SEC("xdp_adjust_tail_grow")
+int _xdp_adjust_tail_grow(struct xdp_md *xdp)
+{
+	void *data_end = (void *)(long)xdp->data_end;
+	void *data = (void *)(long)xdp->data;
+	unsigned int data_len;
+	int offset = 0;
+
+	/* Data length determine test case */
+	data_len = data_end - data;
+
+	if (data_len == 54) { /* sizeof(pkt_v4) */
+		offset = 4096; /* test too large offset */
+	} else if (data_len == 74) { /* sizeof(pkt_v6) */
+		offset = 40;
+	} else if (data_len == 64) {
+		offset = 128;
+	} else if (data_len == 128) {
+		offset = 4096 - 256 - 320 - data_len; /* Max tail grow 3520 */
+	} else {
+		return XDP_ABORTED; /* No matching test */
+	}
+
+	if (bpf_xdp_adjust_tail(xdp, offset))
+		return XDP_DROP;
+	return XDP_TX;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3-59-g8ed1b


From 0ee52c0f6c67e187ff1906f6048af7c96df320c7 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Wed, 13 May 2020 09:58:49 +0200
Subject: bpf, bpftool: Allow probing for CONFIG_HZ from kernel config

In Cilium we've recently switched to make use of bpf_jiffies64() for
parts of our tc and XDP datapath since bpf_ktime_get_ns() is more
expensive and high-precision is not needed for our timeouts we have
anyway. Our agent has a probe manager which picks up the json of
bpftool's feature probe and we also use the macro output in our C
programs e.g. to have workarounds when helpers are not available on
older kernels.

Extend the kernel config info dump to also include the kernel's
CONFIG_HZ, and rework the probe_kernel_image_config() for allowing a
macro dump such that CONFIG_HZ can be propagated to BPF C code as a
simple define if available via config. Latter allows to have _compile-
time_ resolution of jiffies <-> sec conversion in our code since all
are propagated as known constants.

Given we cannot generally assume availability of kconfig everywhere,
we also have a kernel hz probe [0] as a fallback. Potentially, bpftool
could have an integrated probe fallback as well, although to derive it,
we might need to place it under 'bpftool feature probe full' or similar
given it would slow down the probing process overall. Yet 'full' doesn't
fit either for us since we don't want to pollute the kernel log with
warning messages from bpf_probe_write_user() and bpf_trace_printk() on
agent startup; I've left it out for the time being.

  [0] https://github.com/cilium/cilium/blob/master/bpf/cilium-probe-kernel-hz.c

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Quentin Monnet <quentin@isovalent.com>
Cc: Martin KaFai Lau <kafai@fb.com>
Link: https://lore.kernel.org/bpf/20200513075849.20868-1-daniel@iogearbox.net
---
 tools/bpf/bpftool/feature.c | 120 +++++++++++++++++++++++++-------------------
 1 file changed, 67 insertions(+), 53 deletions(-)

diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
index f54347f55ee0..1b73e63274b5 100644
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -80,13 +80,12 @@ print_bool_feature(const char *feat_name, const char *plain_name,
 		printf("%s is %savailable\n", plain_name, res ? "" : "NOT ");
 }
 
-static void print_kernel_option(const char *name, const char *value)
+static void print_kernel_option(const char *name, const char *value,
+				const char *define_prefix)
 {
 	char *endptr;
 	int res;
 
-	/* No support for C-style ouptut */
-
 	if (json_output) {
 		if (!value) {
 			jsonw_null_field(json_wtr, name);
@@ -98,6 +97,12 @@ static void print_kernel_option(const char *name, const char *value)
 			jsonw_int_field(json_wtr, name, res);
 		else
 			jsonw_string_field(json_wtr, name, value);
+	} else if (define_prefix) {
+		if (value)
+			printf("#define %s%s %s\n", define_prefix,
+			       name, value);
+		else
+			printf("/* %s%s is not set */\n", define_prefix, name);
 	} else {
 		if (value)
 			printf("%s is set to %s\n", name, value);
@@ -315,77 +320,84 @@ static bool read_next_kernel_config_option(gzFile file, char *buf, size_t n,
 	return false;
 }
 
-static void probe_kernel_image_config(void)
+static void probe_kernel_image_config(const char *define_prefix)
 {
-	static const char * const options[] = {
+	static const struct {
+		const char * const name;
+		bool macro_dump;
+	} options[] = {
 		/* Enable BPF */
-		"CONFIG_BPF",
+		{ "CONFIG_BPF", },
 		/* Enable bpf() syscall */
-		"CONFIG_BPF_SYSCALL",
+		{ "CONFIG_BPF_SYSCALL", },
 		/* Does selected architecture support eBPF JIT compiler */
-		"CONFIG_HAVE_EBPF_JIT",
+		{ "CONFIG_HAVE_EBPF_JIT", },
 		/* Compile eBPF JIT compiler */
-		"CONFIG_BPF_JIT",
+		{ "CONFIG_BPF_JIT", },
 		/* Avoid compiling eBPF interpreter (use JIT only) */
-		"CONFIG_BPF_JIT_ALWAYS_ON",
+		{ "CONFIG_BPF_JIT_ALWAYS_ON", },
 
 		/* cgroups */
-		"CONFIG_CGROUPS",
+		{ "CONFIG_CGROUPS", },
 		/* BPF programs attached to cgroups */
-		"CONFIG_CGROUP_BPF",
+		{ "CONFIG_CGROUP_BPF", },
 		/* bpf_get_cgroup_classid() helper */
-		"CONFIG_CGROUP_NET_CLASSID",
+		{ "CONFIG_CGROUP_NET_CLASSID", },
 		/* bpf_skb_{,ancestor_}cgroup_id() helpers */
-		"CONFIG_SOCK_CGROUP_DATA",
+		{ "CONFIG_SOCK_CGROUP_DATA", },
 
 		/* Tracing: attach BPF to kprobes, tracepoints, etc. */
-		"CONFIG_BPF_EVENTS",
+		{ "CONFIG_BPF_EVENTS", },
 		/* Kprobes */
-		"CONFIG_KPROBE_EVENTS",
+		{ "CONFIG_KPROBE_EVENTS", },
 		/* Uprobes */
-		"CONFIG_UPROBE_EVENTS",
+		{ "CONFIG_UPROBE_EVENTS", },
 		/* Tracepoints */
-		"CONFIG_TRACING",
+		{ "CONFIG_TRACING", },
 		/* Syscall tracepoints */
-		"CONFIG_FTRACE_SYSCALLS",
+		{ "CONFIG_FTRACE_SYSCALLS", },
 		/* bpf_override_return() helper support for selected arch */
-		"CONFIG_FUNCTION_ERROR_INJECTION",
+		{ "CONFIG_FUNCTION_ERROR_INJECTION", },
 		/* bpf_override_return() helper */
-		"CONFIG_BPF_KPROBE_OVERRIDE",
+		{ "CONFIG_BPF_KPROBE_OVERRIDE", },
 
 		/* Network */
-		"CONFIG_NET",
+		{ "CONFIG_NET", },
 		/* AF_XDP sockets */
-		"CONFIG_XDP_SOCKETS",
+		{ "CONFIG_XDP_SOCKETS", },
 		/* BPF_PROG_TYPE_LWT_* and related helpers */
-		"CONFIG_LWTUNNEL_BPF",
+		{ "CONFIG_LWTUNNEL_BPF", },
 		/* BPF_PROG_TYPE_SCHED_ACT, TC (traffic control) actions */
-		"CONFIG_NET_ACT_BPF",
+		{ "CONFIG_NET_ACT_BPF", },
 		/* BPF_PROG_TYPE_SCHED_CLS, TC filters */
-		"CONFIG_NET_CLS_BPF",
+		{ "CONFIG_NET_CLS_BPF", },
 		/* TC clsact qdisc */
-		"CONFIG_NET_CLS_ACT",
+		{ "CONFIG_NET_CLS_ACT", },
 		/* Ingress filtering with TC */
-		"CONFIG_NET_SCH_INGRESS",
+		{ "CONFIG_NET_SCH_INGRESS", },
 		/* bpf_skb_get_xfrm_state() helper */
-		"CONFIG_XFRM",
+		{ "CONFIG_XFRM", },
 		/* bpf_get_route_realm() helper */
-		"CONFIG_IP_ROUTE_CLASSID",
+		{ "CONFIG_IP_ROUTE_CLASSID", },
 		/* BPF_PROG_TYPE_LWT_SEG6_LOCAL and related helpers */
-		"CONFIG_IPV6_SEG6_BPF",
+		{ "CONFIG_IPV6_SEG6_BPF", },
 		/* BPF_PROG_TYPE_LIRC_MODE2 and related helpers */
-		"CONFIG_BPF_LIRC_MODE2",
+		{ "CONFIG_BPF_LIRC_MODE2", },
 		/* BPF stream parser and BPF socket maps */
-		"CONFIG_BPF_STREAM_PARSER",
+		{ "CONFIG_BPF_STREAM_PARSER", },
 		/* xt_bpf module for passing BPF programs to netfilter  */
-		"CONFIG_NETFILTER_XT_MATCH_BPF",
+		{ "CONFIG_NETFILTER_XT_MATCH_BPF", },
 		/* bpfilter back-end for iptables */
-		"CONFIG_BPFILTER",
+		{ "CONFIG_BPFILTER", },
 		/* bpftilter module with "user mode helper" */
-		"CONFIG_BPFILTER_UMH",
+		{ "CONFIG_BPFILTER_UMH", },
 
 		/* test_bpf module for BPF tests */
-		"CONFIG_TEST_BPF",
+		{ "CONFIG_TEST_BPF", },
+
+		/* Misc configs useful in BPF C programs */
+		/* jiffies <-> sec conversion for bpf_jiffies64() helper */
+		{ "CONFIG_HZ", true, }
 	};
 	char *values[ARRAY_SIZE(options)] = { };
 	struct utsname utsn;
@@ -427,7 +439,8 @@ static void probe_kernel_image_config(void)
 
 	while (read_next_kernel_config_option(file, buf, sizeof(buf), &value)) {
 		for (i = 0; i < ARRAY_SIZE(options); i++) {
-			if (values[i] || strcmp(buf, options[i]))
+			if ((define_prefix && !options[i].macro_dump) ||
+			    values[i] || strcmp(buf, options[i].name))
 				continue;
 
 			values[i] = strdup(value);
@@ -439,7 +452,9 @@ end_parse:
 		gzclose(file);
 
 	for (i = 0; i < ARRAY_SIZE(options); i++) {
-		print_kernel_option(options[i], values[i]);
+		if (define_prefix && !options[i].macro_dump)
+			continue;
+		print_kernel_option(options[i].name, values[i], define_prefix);
 		free(values[i]);
 	}
 }
@@ -632,23 +647,22 @@ section_system_config(enum probe_component target, const char *define_prefix)
 	switch (target) {
 	case COMPONENT_KERNEL:
 	case COMPONENT_UNSPEC:
-		if (define_prefix)
-			break;
-
 		print_start_section("system_config",
 				    "Scanning system configuration...",
-				    NULL, /* define_comment never used here */
-				    NULL); /* define_prefix always NULL here */
-		if (check_procfs()) {
-			probe_unprivileged_disabled();
-			probe_jit_enable();
-			probe_jit_harden();
-			probe_jit_kallsyms();
-			probe_jit_limit();
-		} else {
-			p_info("/* procfs not mounted, skipping related probes */");
+				    "/*** Misc kernel config items ***/",
+				    define_prefix);
+		if (!define_prefix) {
+			if (check_procfs()) {
+				probe_unprivileged_disabled();
+				probe_jit_enable();
+				probe_jit_harden();
+				probe_jit_kallsyms();
+				probe_jit_limit();
+			} else {
+				p_info("/* procfs not mounted, skipping related probes */");
+			}
 		}
-		probe_kernel_image_config();
+		probe_kernel_image_config(define_prefix);
 		print_end_section();
 		break;
 	default:
-- 
cgit v1.2.3-59-g8ed1b


From a17b53c4a4b55ec322c132b6670743612229ee9c Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Wed, 13 May 2020 16:03:53 -0700
Subject: bpf, capability: Introduce CAP_BPF

Split BPF operations that are allowed under CAP_SYS_ADMIN into
combination of CAP_BPF, CAP_PERFMON, CAP_NET_ADMIN.
For backward compatibility include them in CAP_SYS_ADMIN as well.

The end result provides simple safety model for applications that use BPF:
- to load tracing program types
  BPF_PROG_TYPE_{KPROBE, TRACEPOINT, PERF_EVENT, RAW_TRACEPOINT, etc}
  use CAP_BPF and CAP_PERFMON
- to load networking program types
  BPF_PROG_TYPE_{SCHED_CLS, XDP, SK_SKB, etc}
  use CAP_BPF and CAP_NET_ADMIN

There are few exceptions from this rule:
- bpf_trace_printk() is allowed in networking programs, but it's using
  tracing mechanism, hence this helper needs additional CAP_PERFMON
  if networking program is using this helper.
- BPF_F_ZERO_SEED flag for hash/lru map is allowed under CAP_SYS_ADMIN only
  to discourage production use.
- BPF HW offload is allowed under CAP_SYS_ADMIN.
- bpf_probe_write_user() is allowed under CAP_SYS_ADMIN only.

CAPs are not checked at attach/detach time with two exceptions:
- loading BPF_PROG_TYPE_CGROUP_SKB is allowed for unprivileged users,
  hence CAP_NET_ADMIN is required at attach time.
- flow_dissector detach doesn't check prog FD at detach,
  hence CAP_NET_ADMIN is required at detach time.

CAP_SYS_ADMIN is required to iterate BPF objects (progs, maps, links) via get_next_id
command and convert them to file descriptor via GET_FD_BY_ID command.
This restriction guarantees that mutliple tasks with CAP_BPF are not able to
affect each other. That leads to clean isolation of tasks. For example:
task A with CAP_BPF and CAP_NET_ADMIN loads and attaches a firewall via bpf_link.
task B with the same capabilities cannot detach that firewall unless
task A explicitly passed link FD to task B via scm_rights or bpffs.
CAP_SYS_ADMIN can still detach/unload everything.

Two networking user apps with CAP_SYS_ADMIN and CAP_NET_ADMIN can
accidentely mess with each other programs and maps.
Two networking user apps with CAP_NET_ADMIN and CAP_BPF cannot affect each other.

CAP_NET_ADMIN + CAP_BPF allows networking programs access only packet data.
Such networking progs cannot access arbitrary kernel memory or leak pointers.

bpftool, bpftrace, bcc tools binaries should NOT be installed with
CAP_BPF and CAP_PERFMON, since unpriv users will be able to read kernel secrets.
But users with these two permissions will be able to use these tracing tools.

CAP_PERFMON is least secure, since it allows kprobes and kernel memory access.
CAP_NET_ADMIN can stop network traffic via iproute2.
CAP_BPF is the safest from security point of view and harmless on its own.

Having CAP_BPF and/or CAP_NET_ADMIN is not enough to write into arbitrary map
and if that map is used by firewall-like bpf prog.
CAP_BPF allows many bpf prog_load commands in parallel. The verifier
may consume large amount of memory and significantly slow down the system.

Existing unprivileged BPF operations are not affected.
In particular unprivileged users are allowed to load socket_filter and cg_skb
program types and to create array, hash, prog_array, map-in-map map types.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200513230355.7858-2-alexei.starovoitov@gmail.com
---
 include/linux/capability.h          |  5 +++++
 include/uapi/linux/capability.h     | 34 +++++++++++++++++++++++++++++++++-
 security/selinux/include/classmap.h |  4 ++--
 3 files changed, 40 insertions(+), 3 deletions(-)

diff --git a/include/linux/capability.h b/include/linux/capability.h
index 027d7e4a853b..b4345b38a6be 100644
--- a/include/linux/capability.h
+++ b/include/linux/capability.h
@@ -256,6 +256,11 @@ static inline bool perfmon_capable(void)
 	return capable(CAP_PERFMON) || capable(CAP_SYS_ADMIN);
 }
 
+static inline bool bpf_capable(void)
+{
+	return capable(CAP_BPF) || capable(CAP_SYS_ADMIN);
+}
+
 /* audit system wants to get cap info from files as well */
 extern int get_vfs_caps_from_disk(const struct dentry *dentry, struct cpu_vfs_cap_data *cpu_caps);
 
diff --git a/include/uapi/linux/capability.h b/include/uapi/linux/capability.h
index e58c9636741b..c7372180a0a9 100644
--- a/include/uapi/linux/capability.h
+++ b/include/uapi/linux/capability.h
@@ -274,6 +274,7 @@ struct vfs_ns_cap_data {
    arbitrary SCSI commands */
 /* Allow setting encryption key on loopback filesystem */
 /* Allow setting zone reclaim policy */
+/* Allow everything under CAP_BPF and CAP_PERFMON for backward compatibility */
 
 #define CAP_SYS_ADMIN        21
 
@@ -374,7 +375,38 @@ struct vfs_ns_cap_data {
 
 #define CAP_PERFMON		38
 
-#define CAP_LAST_CAP         CAP_PERFMON
+/*
+ * CAP_BPF allows the following BPF operations:
+ * - Creating all types of BPF maps
+ * - Advanced verifier features
+ *   - Indirect variable access
+ *   - Bounded loops
+ *   - BPF to BPF function calls
+ *   - Scalar precision tracking
+ *   - Larger complexity limits
+ *   - Dead code elimination
+ *   - And potentially other features
+ * - Loading BPF Type Format (BTF) data
+ * - Retrieve xlated and JITed code of BPF programs
+ * - Use bpf_spin_lock() helper
+ *
+ * CAP_PERFMON relaxes the verifier checks further:
+ * - BPF progs can use of pointer-to-integer conversions
+ * - speculation attack hardening measures are bypassed
+ * - bpf_probe_read to read arbitrary kernel memory is allowed
+ * - bpf_trace_printk to print kernel memory is allowed
+ *
+ * CAP_SYS_ADMIN is required to use bpf_probe_write_user.
+ *
+ * CAP_SYS_ADMIN is required to iterate system wide loaded
+ * programs, maps, links, BTFs and convert their IDs to file descriptors.
+ *
+ * CAP_PERFMON and CAP_BPF are required to load tracing programs.
+ * CAP_NET_ADMIN and CAP_BPF are required to load networking programs.
+ */
+#define CAP_BPF			39
+
+#define CAP_LAST_CAP         CAP_BPF
 
 #define cap_valid(x) ((x) >= 0 && (x) <= CAP_LAST_CAP)
 
diff --git a/security/selinux/include/classmap.h b/security/selinux/include/classmap.h
index d233ab3f1533..98e1513b608a 100644
--- a/security/selinux/include/classmap.h
+++ b/security/selinux/include/classmap.h
@@ -27,9 +27,9 @@
 	    "audit_control", "setfcap"
 
 #define COMMON_CAP2_PERMS  "mac_override", "mac_admin", "syslog", \
-		"wake_alarm", "block_suspend", "audit_read", "perfmon"
+		"wake_alarm", "block_suspend", "audit_read", "perfmon", "bpf"
 
-#if CAP_LAST_CAP > CAP_PERFMON
+#if CAP_LAST_CAP > CAP_BPF
 #error New capability defined, please update COMMON_CAP2_PERMS.
 #endif
 
-- 
cgit v1.2.3-59-g8ed1b


From 2c78ee898d8f10ae6fb2fa23a3fbaec96b1b7366 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Wed, 13 May 2020 16:03:54 -0700
Subject: bpf: Implement CAP_BPF

Implement permissions as stated in uapi/linux/capability.h
In order to do that the verifier allow_ptr_leaks flag is split
into four flags and they are set as:
  env->allow_ptr_leaks = bpf_allow_ptr_leaks();
  env->bypass_spec_v1 = bpf_bypass_spec_v1();
  env->bypass_spec_v4 = bpf_bypass_spec_v4();
  env->bpf_capable = bpf_capable();

The first three currently equivalent to perfmon_capable(), since leaking kernel
pointers and reading kernel memory via side channel attacks is roughly
equivalent to reading kernel memory with cap_perfmon.

'bpf_capable' enables bounded loops, precision tracking, bpf to bpf calls and
other verifier features. 'allow_ptr_leaks' enable ptr leaks, ptr conversions,
subtraction of pointers. 'bypass_spec_v1' disables speculative analysis in the
verifier, run time mitigations in bpf array, and enables indirect variable
access in bpf programs. 'bypass_spec_v4' disables emission of sanitation code
by the verifier.

That means that the networking BPF program loaded with CAP_BPF + CAP_NET_ADMIN
will have speculative checks done by the verifier and other spectre mitigation
applied. Such networking BPF program will not be able to leak kernel pointers
and will not be able to access arbitrary kernel memory.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200513230355.7858-3-alexei.starovoitov@gmail.com
---
 drivers/media/rc/bpf-lirc.c   |  2 +-
 include/linux/bpf.h           | 18 ++++++++-
 include/linux/bpf_verifier.h  |  3 ++
 kernel/bpf/arraymap.c         | 10 ++---
 kernel/bpf/bpf_struct_ops.c   |  2 +-
 kernel/bpf/core.c             |  2 +-
 kernel/bpf/cpumap.c           |  2 +-
 kernel/bpf/hashtab.c          |  4 +-
 kernel/bpf/helpers.c          |  4 +-
 kernel/bpf/lpm_trie.c         |  2 +-
 kernel/bpf/map_in_map.c       |  2 +-
 kernel/bpf/queue_stack_maps.c |  2 +-
 kernel/bpf/reuseport_array.c  |  2 +-
 kernel/bpf/stackmap.c         |  2 +-
 kernel/bpf/syscall.c          | 89 +++++++++++++++++++++++++++++++++----------
 kernel/bpf/verifier.c         | 37 +++++++++---------
 kernel/trace/bpf_trace.c      |  3 ++
 net/core/bpf_sk_storage.c     |  4 +-
 net/core/filter.c             |  4 +-
 19 files changed, 134 insertions(+), 60 deletions(-)

diff --git a/drivers/media/rc/bpf-lirc.c b/drivers/media/rc/bpf-lirc.c
index 069c42f22a8c..5bb144435c16 100644
--- a/drivers/media/rc/bpf-lirc.c
+++ b/drivers/media/rc/bpf-lirc.c
@@ -110,7 +110,7 @@ lirc_mode2_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	case BPF_FUNC_get_prandom_u32:
 		return &bpf_get_prandom_u32_proto;
 	case BPF_FUNC_trace_printk:
-		if (capable(CAP_SYS_ADMIN))
+		if (perfmon_capable())
 			return bpf_get_trace_printk_proto();
 		/* fall through */
 	default:
diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index c45d198ac38c..efe8836b5c48 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -19,6 +19,7 @@
 #include <linux/mutex.h>
 #include <linux/module.h>
 #include <linux/kallsyms.h>
+#include <linux/capability.h>
 
 struct bpf_verifier_env;
 struct bpf_verifier_log;
@@ -119,7 +120,7 @@ struct bpf_map {
 	struct bpf_map_memory memory;
 	char name[BPF_OBJ_NAME_LEN];
 	u32 btf_vmlinux_value_type_id;
-	bool unpriv_array;
+	bool bypass_spec_v1;
 	bool frozen; /* write-once; write-protected by freeze_mutex */
 	/* 22 bytes hole */
 
@@ -1095,6 +1096,21 @@ struct bpf_map *bpf_map_get_curr_or_next(u32 *id);
 
 extern int sysctl_unprivileged_bpf_disabled;
 
+static inline bool bpf_allow_ptr_leaks(void)
+{
+	return perfmon_capable();
+}
+
+static inline bool bpf_bypass_spec_v1(void)
+{
+	return perfmon_capable();
+}
+
+static inline bool bpf_bypass_spec_v4(void)
+{
+	return perfmon_capable();
+}
+
 int bpf_map_new_fd(struct bpf_map *map, int flags);
 int bpf_prog_new_fd(struct bpf_prog *prog);
 
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index 6abd5a778fcd..ea833087e853 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -375,6 +375,9 @@ struct bpf_verifier_env {
 	u32 used_map_cnt;		/* number of used maps */
 	u32 id_gen;			/* used to generate unique reg IDs */
 	bool allow_ptr_leaks;
+	bool bpf_capable;
+	bool bypass_spec_v1;
+	bool bypass_spec_v4;
 	bool seen_direct_write;
 	struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */
 	const struct bpf_line_info *prev_linfo;
diff --git a/kernel/bpf/arraymap.c b/kernel/bpf/arraymap.c
index 95d77770353c..1d5bb0d983b2 100644
--- a/kernel/bpf/arraymap.c
+++ b/kernel/bpf/arraymap.c
@@ -77,7 +77,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
 	bool percpu = attr->map_type == BPF_MAP_TYPE_PERCPU_ARRAY;
 	int ret, numa_node = bpf_map_attr_numa_node(attr);
 	u32 elem_size, index_mask, max_entries;
-	bool unpriv = !capable(CAP_SYS_ADMIN);
+	bool bypass_spec_v1 = bpf_bypass_spec_v1();
 	u64 cost, array_size, mask64;
 	struct bpf_map_memory mem;
 	struct bpf_array *array;
@@ -95,7 +95,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
 	mask64 -= 1;
 
 	index_mask = mask64;
-	if (unpriv) {
+	if (!bypass_spec_v1) {
 		/* round up array size to nearest power of 2,
 		 * since cpu will speculate within index_mask limits
 		 */
@@ -149,7 +149,7 @@ static struct bpf_map *array_map_alloc(union bpf_attr *attr)
 		return ERR_PTR(-ENOMEM);
 	}
 	array->index_mask = index_mask;
-	array->map.unpriv_array = unpriv;
+	array->map.bypass_spec_v1 = bypass_spec_v1;
 
 	/* copy mandatory map attributes */
 	bpf_map_init_from_attr(&array->map, attr);
@@ -219,7 +219,7 @@ static u32 array_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
 
 	*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
 	*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
-	if (map->unpriv_array) {
+	if (!map->bypass_spec_v1) {
 		*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 4);
 		*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
 	} else {
@@ -1053,7 +1053,7 @@ static u32 array_of_map_gen_lookup(struct bpf_map *map,
 
 	*insn++ = BPF_ALU64_IMM(BPF_ADD, map_ptr, offsetof(struct bpf_array, value));
 	*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
-	if (map->unpriv_array) {
+	if (!map->bypass_spec_v1) {
 		*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 6);
 		*insn++ = BPF_ALU32_IMM(BPF_AND, ret, array->index_mask);
 	} else {
diff --git a/kernel/bpf/bpf_struct_ops.c b/kernel/bpf/bpf_struct_ops.c
index 26cb51f2db72..c6b0decaa46a 100644
--- a/kernel/bpf/bpf_struct_ops.c
+++ b/kernel/bpf/bpf_struct_ops.c
@@ -557,7 +557,7 @@ static struct bpf_map *bpf_struct_ops_map_alloc(union bpf_attr *attr)
 	struct bpf_map *map;
 	int err;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!bpf_capable())
 		return ERR_PTR(-EPERM);
 
 	st_ops = bpf_struct_ops_find_value(attr->btf_vmlinux_value_type_id);
diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index 6aa11de67315..c40ff4cf9880 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -646,7 +646,7 @@ static bool bpf_prog_kallsyms_verify_off(const struct bpf_prog *fp)
 void bpf_prog_kallsyms_add(struct bpf_prog *fp)
 {
 	if (!bpf_prog_kallsyms_candidate(fp) ||
-	    !capable(CAP_SYS_ADMIN))
+	    !bpf_capable())
 		return;
 
 	bpf_prog_ksym_set_addr(fp);
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index a71790dab12d..8b85bfddfac7 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -85,7 +85,7 @@ static struct bpf_map *cpu_map_alloc(union bpf_attr *attr)
 	u64 cost;
 	int ret;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!bpf_capable())
 		return ERR_PTR(-EPERM);
 
 	/* check sanity of attributes */
diff --git a/kernel/bpf/hashtab.c b/kernel/bpf/hashtab.c
index d541c8486c95..b4b288a3c3c9 100644
--- a/kernel/bpf/hashtab.c
+++ b/kernel/bpf/hashtab.c
@@ -359,9 +359,9 @@ static int htab_map_alloc_check(union bpf_attr *attr)
 	BUILD_BUG_ON(offsetof(struct htab_elem, fnode.next) !=
 		     offsetof(struct htab_elem, hash_node.pprev));
 
-	if (lru && !capable(CAP_SYS_ADMIN))
+	if (lru && !bpf_capable())
 		/* LRU implementation is much complicated than other
-		 * maps.  Hence, limit to CAP_SYS_ADMIN for now.
+		 * maps.  Hence, limit to CAP_BPF.
 		 */
 		return -EPERM;
 
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 5c0290e0696e..886949fdcece 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -633,7 +633,7 @@ bpf_base_func_proto(enum bpf_func_id func_id)
 		break;
 	}
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!bpf_capable())
 		return NULL;
 
 	switch (func_id) {
@@ -642,6 +642,8 @@ bpf_base_func_proto(enum bpf_func_id func_id)
 	case BPF_FUNC_spin_unlock:
 		return &bpf_spin_unlock_proto;
 	case BPF_FUNC_trace_printk:
+		if (!perfmon_capable())
+			return NULL;
 		return bpf_get_trace_printk_proto();
 	case BPF_FUNC_jiffies64:
 		return &bpf_jiffies64_proto;
diff --git a/kernel/bpf/lpm_trie.c b/kernel/bpf/lpm_trie.c
index 65c236cf341e..c8cc4e4cf98d 100644
--- a/kernel/bpf/lpm_trie.c
+++ b/kernel/bpf/lpm_trie.c
@@ -543,7 +543,7 @@ static struct bpf_map *trie_alloc(union bpf_attr *attr)
 	u64 cost = sizeof(*trie), cost_per_node;
 	int ret;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!bpf_capable())
 		return ERR_PTR(-EPERM);
 
 	/* check sanity of attributes */
diff --git a/kernel/bpf/map_in_map.c b/kernel/bpf/map_in_map.c
index b3c48d1533cb..17738c93bec8 100644
--- a/kernel/bpf/map_in_map.c
+++ b/kernel/bpf/map_in_map.c
@@ -60,7 +60,7 @@ struct bpf_map *bpf_map_meta_alloc(int inner_map_ufd)
 	/* Misc members not needed in bpf_map_meta_equal() check. */
 	inner_map_meta->ops = inner_map->ops;
 	if (inner_map->ops == &array_map_ops) {
-		inner_map_meta->unpriv_array = inner_map->unpriv_array;
+		inner_map_meta->bypass_spec_v1 = inner_map->bypass_spec_v1;
 		container_of(inner_map_meta, struct bpf_array, map)->index_mask =
 		     container_of(inner_map, struct bpf_array, map)->index_mask;
 	}
diff --git a/kernel/bpf/queue_stack_maps.c b/kernel/bpf/queue_stack_maps.c
index 30e1373fd437..05c8e043b9d2 100644
--- a/kernel/bpf/queue_stack_maps.c
+++ b/kernel/bpf/queue_stack_maps.c
@@ -45,7 +45,7 @@ static bool queue_stack_map_is_full(struct bpf_queue_stack *qs)
 /* Called from syscall */
 static int queue_stack_map_alloc_check(union bpf_attr *attr)
 {
-	if (!capable(CAP_SYS_ADMIN))
+	if (!bpf_capable())
 		return -EPERM;
 
 	/* check sanity of attributes */
diff --git a/kernel/bpf/reuseport_array.c b/kernel/bpf/reuseport_array.c
index 01badd3eda7a..21cde24386db 100644
--- a/kernel/bpf/reuseport_array.c
+++ b/kernel/bpf/reuseport_array.c
@@ -154,7 +154,7 @@ static struct bpf_map *reuseport_array_alloc(union bpf_attr *attr)
 	struct bpf_map_memory mem;
 	u64 array_size;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!bpf_capable())
 		return ERR_PTR(-EPERM);
 
 	array_size = sizeof(*array);
diff --git a/kernel/bpf/stackmap.c b/kernel/bpf/stackmap.c
index db76339fe358..7b8381ce40a0 100644
--- a/kernel/bpf/stackmap.c
+++ b/kernel/bpf/stackmap.c
@@ -93,7 +93,7 @@ static struct bpf_map *stack_map_alloc(union bpf_attr *attr)
 	u64 cost, n_buckets;
 	int err;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!bpf_capable())
 		return ERR_PTR(-EPERM);
 
 	if (attr->map_flags & ~STACK_CREATE_FLAG_MASK)
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index de2a75500233..79bcd8d056d2 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1534,7 +1534,7 @@ static int map_freeze(const union bpf_attr *attr)
 		err = -EBUSY;
 		goto err_put;
 	}
-	if (!capable(CAP_SYS_ADMIN)) {
+	if (!bpf_capable()) {
 		err = -EPERM;
 		goto err_put;
 	}
@@ -2009,6 +2009,55 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
 	}
 }
 
+static bool is_net_admin_prog_type(enum bpf_prog_type prog_type)
+{
+	switch (prog_type) {
+	case BPF_PROG_TYPE_SCHED_CLS:
+	case BPF_PROG_TYPE_SCHED_ACT:
+	case BPF_PROG_TYPE_XDP:
+	case BPF_PROG_TYPE_LWT_IN:
+	case BPF_PROG_TYPE_LWT_OUT:
+	case BPF_PROG_TYPE_LWT_XMIT:
+	case BPF_PROG_TYPE_LWT_SEG6LOCAL:
+	case BPF_PROG_TYPE_SK_SKB:
+	case BPF_PROG_TYPE_SK_MSG:
+	case BPF_PROG_TYPE_LIRC_MODE2:
+	case BPF_PROG_TYPE_FLOW_DISSECTOR:
+	case BPF_PROG_TYPE_CGROUP_DEVICE:
+	case BPF_PROG_TYPE_CGROUP_SOCK:
+	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
+	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
+	case BPF_PROG_TYPE_CGROUP_SYSCTL:
+	case BPF_PROG_TYPE_SOCK_OPS:
+	case BPF_PROG_TYPE_EXT: /* extends any prog */
+		return true;
+	case BPF_PROG_TYPE_CGROUP_SKB:
+		/* always unpriv */
+	case BPF_PROG_TYPE_SK_REUSEPORT:
+		/* equivalent to SOCKET_FILTER. need CAP_BPF only */
+	default:
+		return false;
+	}
+}
+
+static bool is_perfmon_prog_type(enum bpf_prog_type prog_type)
+{
+	switch (prog_type) {
+	case BPF_PROG_TYPE_KPROBE:
+	case BPF_PROG_TYPE_TRACEPOINT:
+	case BPF_PROG_TYPE_PERF_EVENT:
+	case BPF_PROG_TYPE_RAW_TRACEPOINT:
+	case BPF_PROG_TYPE_RAW_TRACEPOINT_WRITABLE:
+	case BPF_PROG_TYPE_TRACING:
+	case BPF_PROG_TYPE_LSM:
+	case BPF_PROG_TYPE_STRUCT_OPS: /* has access to struct sock */
+	case BPF_PROG_TYPE_EXT: /* extends any prog */
+		return true;
+	default:
+		return false;
+	}
+}
+
 /* last field in 'union bpf_attr' used by this command */
 #define	BPF_PROG_LOAD_LAST_FIELD attach_prog_fd
 
@@ -2031,7 +2080,7 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
 
 	if (!IS_ENABLED(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) &&
 	    (attr->prog_flags & BPF_F_ANY_ALIGNMENT) &&
-	    !capable(CAP_SYS_ADMIN))
+	    !bpf_capable())
 		return -EPERM;
 
 	/* copy eBPF program license from user space */
@@ -2044,11 +2093,16 @@ static int bpf_prog_load(union bpf_attr *attr, union bpf_attr __user *uattr)
 	is_gpl = license_is_gpl_compatible(license);
 
 	if (attr->insn_cnt == 0 ||
-	    attr->insn_cnt > (capable(CAP_SYS_ADMIN) ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS))
+	    attr->insn_cnt > (bpf_capable() ? BPF_COMPLEXITY_LIMIT_INSNS : BPF_MAXINSNS))
 		return -E2BIG;
 	if (type != BPF_PROG_TYPE_SOCKET_FILTER &&
 	    type != BPF_PROG_TYPE_CGROUP_SKB &&
-	    !capable(CAP_SYS_ADMIN))
+	    !bpf_capable())
+		return -EPERM;
+
+	if (is_net_admin_prog_type(type) && !capable(CAP_NET_ADMIN))
+		return -EPERM;
+	if (is_perfmon_prog_type(type) && !perfmon_capable())
 		return -EPERM;
 
 	bpf_prog_load_fixup_attach_type(attr);
@@ -2682,6 +2736,11 @@ static int bpf_prog_attach_check_attach_type(const struct bpf_prog *prog,
 	case BPF_PROG_TYPE_CGROUP_SOCKOPT:
 		return attach_type == prog->expected_attach_type ? 0 : -EINVAL;
 	case BPF_PROG_TYPE_CGROUP_SKB:
+		if (!capable(CAP_NET_ADMIN))
+			/* cg-skb progs can be loaded by unpriv user.
+			 * check permissions at attach time.
+			 */
+			return -EPERM;
 		return prog->enforce_expected_attach_type &&
 			prog->expected_attach_type != attach_type ?
 			-EINVAL : 0;
@@ -2747,9 +2806,6 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 	struct bpf_prog *prog;
 	int ret;
 
-	if (!capable(CAP_NET_ADMIN))
-		return -EPERM;
-
 	if (CHECK_ATTR(BPF_PROG_ATTACH))
 		return -EINVAL;
 
@@ -2804,9 +2860,6 @@ static int bpf_prog_detach(const union bpf_attr *attr)
 {
 	enum bpf_prog_type ptype;
 
-	if (!capable(CAP_NET_ADMIN))
-		return -EPERM;
-
 	if (CHECK_ATTR(BPF_PROG_DETACH))
 		return -EINVAL;
 
@@ -2819,6 +2872,8 @@ static int bpf_prog_detach(const union bpf_attr *attr)
 	case BPF_PROG_TYPE_LIRC_MODE2:
 		return lirc_prog_detach(attr);
 	case BPF_PROG_TYPE_FLOW_DISSECTOR:
+		if (!capable(CAP_NET_ADMIN))
+			return -EPERM;
 		return skb_flow_dissector_bpf_prog_detach(attr);
 	case BPF_PROG_TYPE_CGROUP_DEVICE:
 	case BPF_PROG_TYPE_CGROUP_SKB:
@@ -2882,8 +2937,6 @@ static int bpf_prog_test_run(const union bpf_attr *attr,
 	struct bpf_prog *prog;
 	int ret = -ENOTSUPP;
 
-	if (!capable(CAP_SYS_ADMIN))
-		return -EPERM;
 	if (CHECK_ATTR(BPF_PROG_TEST_RUN))
 		return -EINVAL;
 
@@ -3184,7 +3237,7 @@ static int bpf_prog_get_info_by_fd(struct bpf_prog *prog,
 	info.run_time_ns = stats.nsecs;
 	info.run_cnt = stats.cnt;
 
-	if (!capable(CAP_SYS_ADMIN)) {
+	if (!bpf_capable()) {
 		info.jited_prog_len = 0;
 		info.xlated_prog_len = 0;
 		info.nr_jited_ksyms = 0;
@@ -3543,7 +3596,7 @@ static int bpf_btf_load(const union bpf_attr *attr)
 	if (CHECK_ATTR(BPF_BTF_LOAD))
 		return -EINVAL;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!bpf_capable())
 		return -EPERM;
 
 	return btf_new_fd(attr);
@@ -3766,9 +3819,6 @@ static int link_create(union bpf_attr *attr)
 	struct bpf_prog *prog;
 	int ret;
 
-	if (!capable(CAP_NET_ADMIN))
-		return -EPERM;
-
 	if (CHECK_ATTR(BPF_LINK_CREATE))
 		return -EINVAL;
 
@@ -3817,9 +3867,6 @@ static int link_update(union bpf_attr *attr)
 	u32 flags;
 	int ret;
 
-	if (!capable(CAP_NET_ADMIN))
-		return -EPERM;
-
 	if (CHECK_ATTR(BPF_LINK_UPDATE))
 		return -EINVAL;
 
@@ -3988,7 +4035,7 @@ SYSCALL_DEFINE3(bpf, int, cmd, union bpf_attr __user *, uattr, unsigned int, siz
 	union bpf_attr attr;
 	int err;
 
-	if (sysctl_unprivileged_bpf_disabled && !capable(CAP_SYS_ADMIN))
+	if (sysctl_unprivileged_bpf_disabled && !bpf_capable())
 		return -EPERM;
 
 	err = bpf_check_uarg_tail_zero(uattr, sizeof(attr), size);
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index a3f2af756fd6..180933f6fba9 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -1295,7 +1295,7 @@ static void __mark_reg_unknown(const struct bpf_verifier_env *env,
 	reg->type = SCALAR_VALUE;
 	reg->var_off = tnum_unknown;
 	reg->frameno = 0;
-	reg->precise = env->subprog_cnt > 1 || !env->allow_ptr_leaks;
+	reg->precise = env->subprog_cnt > 1 || !env->bpf_capable;
 	__mark_reg_unbounded(reg);
 }
 
@@ -1427,8 +1427,9 @@ static int check_subprogs(struct bpf_verifier_env *env)
 			continue;
 		if (insn[i].src_reg != BPF_PSEUDO_CALL)
 			continue;
-		if (!env->allow_ptr_leaks) {
-			verbose(env, "function calls to other bpf functions are allowed for root only\n");
+		if (!env->bpf_capable) {
+			verbose(env,
+				"function calls to other bpf functions are allowed for CAP_BPF and CAP_SYS_ADMIN\n");
 			return -EPERM;
 		}
 		ret = add_subprog(env, i + insn[i].imm + 1);
@@ -1962,8 +1963,7 @@ static int __mark_chain_precision(struct bpf_verifier_env *env, int regno,
 	bool new_marks = false;
 	int i, err;
 
-	if (!env->allow_ptr_leaks)
-		/* backtracking is root only for now */
+	if (!env->bpf_capable)
 		return 0;
 
 	func = st->frame[st->curframe];
@@ -2211,7 +2211,7 @@ static int check_stack_write(struct bpf_verifier_env *env,
 		reg = &cur->regs[value_regno];
 
 	if (reg && size == BPF_REG_SIZE && register_is_const(reg) &&
-	    !register_is_null(reg) && env->allow_ptr_leaks) {
+	    !register_is_null(reg) && env->bpf_capable) {
 		if (dst_reg != BPF_REG_FP) {
 			/* The backtracking logic can only recognize explicit
 			 * stack slot address like [fp - 8]. Other spill of
@@ -2237,7 +2237,7 @@ static int check_stack_write(struct bpf_verifier_env *env,
 			return -EINVAL;
 		}
 
-		if (!env->allow_ptr_leaks) {
+		if (!env->bypass_spec_v4) {
 			bool sanitize = false;
 
 			if (state->stack[spi].slot_type[0] == STACK_SPILL &&
@@ -3432,7 +3432,7 @@ static int check_stack_boundary(struct bpf_verifier_env *env, int regno,
 		 * Spectre masking for stack ALU.
 		 * See also retrieve_ptr_limit().
 		 */
-		if (!env->allow_ptr_leaks) {
+		if (!env->bypass_spec_v1) {
 			char tn_buf[48];
 
 			tnum_strn(tn_buf, sizeof(tn_buf), reg->var_off);
@@ -4435,10 +4435,10 @@ record_func_map(struct bpf_verifier_env *env, struct bpf_call_arg_meta *meta,
 
 	if (!BPF_MAP_PTR(aux->map_ptr_state))
 		bpf_map_ptr_store(aux, meta->map_ptr,
-				  meta->map_ptr->unpriv_array);
+				  !meta->map_ptr->bypass_spec_v1);
 	else if (BPF_MAP_PTR(aux->map_ptr_state) != meta->map_ptr)
 		bpf_map_ptr_store(aux, BPF_MAP_PTR_POISON,
-				  meta->map_ptr->unpriv_array);
+				  !meta->map_ptr->bypass_spec_v1);
 	return 0;
 }
 
@@ -4807,7 +4807,7 @@ static int retrieve_ptr_limit(const struct bpf_reg_state *ptr_reg,
 static bool can_skip_alu_sanitation(const struct bpf_verifier_env *env,
 				    const struct bpf_insn *insn)
 {
-	return env->allow_ptr_leaks || BPF_SRC(insn->code) == BPF_K;
+	return env->bypass_spec_v1 || BPF_SRC(insn->code) == BPF_K;
 }
 
 static int update_alu_sanitation_state(struct bpf_insn_aux_data *aux,
@@ -5117,7 +5117,7 @@ static int adjust_ptr_min_max_vals(struct bpf_verifier_env *env,
 	/* For unprivileged we require that resulting offset must be in bounds
 	 * in order to be able to sanitize access later on.
 	 */
-	if (!env->allow_ptr_leaks) {
+	if (!env->bypass_spec_v1) {
 		if (dst_reg->type == PTR_TO_MAP_VALUE &&
 		    check_map_access(env, dst, dst_reg->off, 1, false)) {
 			verbose(env, "R%d pointer arithmetic of map value goes out of range, "
@@ -7244,7 +7244,7 @@ static int push_insn(int t, int w, int e, struct bpf_verifier_env *env,
 		insn_stack[env->cfg.cur_stack++] = w;
 		return 1;
 	} else if ((insn_state[w] & 0xF0) == DISCOVERED) {
-		if (loop_ok && env->allow_ptr_leaks)
+		if (loop_ok && env->bpf_capable)
 			return 0;
 		verbose_linfo(env, t, "%d: ", t);
 		verbose_linfo(env, w, "%d: ", w);
@@ -8353,7 +8353,7 @@ next:
 	if (env->max_states_per_insn < states_cnt)
 		env->max_states_per_insn = states_cnt;
 
-	if (!env->allow_ptr_leaks && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
+	if (!env->bpf_capable && states_cnt > BPF_COMPLEXITY_LIMIT_STATES)
 		return push_jmp_history(env, cur);
 
 	if (!add_new_state)
@@ -10014,7 +10014,7 @@ static int fixup_bpf_calls(struct bpf_verifier_env *env)
 			insn->code = BPF_JMP | BPF_TAIL_CALL;
 
 			aux = &env->insn_aux_data[i + delta];
-			if (env->allow_ptr_leaks && !expect_blinding &&
+			if (env->bpf_capable && !expect_blinding &&
 			    prog->jit_requested &&
 			    !bpf_map_key_poisoned(aux) &&
 			    !bpf_map_ptr_poisoned(aux) &&
@@ -10758,7 +10758,7 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
 		env->insn_aux_data[i].orig_idx = i;
 	env->prog = *prog;
 	env->ops = bpf_verifier_ops[env->prog->type];
-	is_priv = capable(CAP_SYS_ADMIN);
+	is_priv = bpf_capable();
 
 	if (!btf_vmlinux && IS_ENABLED(CONFIG_DEBUG_INFO_BTF)) {
 		mutex_lock(&bpf_verifier_lock);
@@ -10799,7 +10799,10 @@ int bpf_check(struct bpf_prog **prog, union bpf_attr *attr,
 	if (attr->prog_flags & BPF_F_ANY_ALIGNMENT)
 		env->strict_alignment = false;
 
-	env->allow_ptr_leaks = is_priv;
+	env->allow_ptr_leaks = bpf_allow_ptr_leaks();
+	env->bypass_spec_v1 = bpf_bypass_spec_v1();
+	env->bypass_spec_v4 = bpf_bypass_spec_v4();
+	env->bpf_capable = bpf_capable();
 
 	if (is_priv)
 		env->test_state_freq = attr->prog_flags & BPF_F_TEST_STATE_FREQ;
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index d961428fb5b6..9a84d7fb4869 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -315,6 +315,9 @@ static const struct bpf_func_proto bpf_probe_write_user_proto = {
 
 static const struct bpf_func_proto *bpf_get_probe_write_proto(void)
 {
+	if (!capable(CAP_SYS_ADMIN))
+		return NULL;
+
 	pr_warn_ratelimited("%s[%d] is installing a program with bpf_probe_write_user helper that may corrupt user memory!",
 			    current->comm, task_pid_nr(current));
 
diff --git a/net/core/bpf_sk_storage.c b/net/core/bpf_sk_storage.c
index 756b63b6f7b3..d2c4d16dadba 100644
--- a/net/core/bpf_sk_storage.c
+++ b/net/core/bpf_sk_storage.c
@@ -625,7 +625,7 @@ static int bpf_sk_storage_map_alloc_check(union bpf_attr *attr)
 	    !attr->btf_key_type_id || !attr->btf_value_type_id)
 		return -EINVAL;
 
-	if (!capable(CAP_SYS_ADMIN))
+	if (!bpf_capable())
 		return -EPERM;
 
 	if (attr->value_size > MAX_VALUE_SIZE)
@@ -978,7 +978,7 @@ bpf_sk_storage_diag_alloc(const struct nlattr *nla_stgs)
 	/* bpf_sk_storage_map is currently limited to CAP_SYS_ADMIN as
 	 * the map_alloc_check() side also does.
 	 */
-	if (!capable(CAP_SYS_ADMIN))
+	if (!bpf_capable())
 		return ERR_PTR(-EPERM);
 
 	nla_for_each_nested(nla, nla_stgs, rem) {
diff --git a/net/core/filter.c b/net/core/filter.c
index a85eb538d4d6..f8a3c7e9d027 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -6687,7 +6687,7 @@ static bool cg_skb_is_valid_access(int off, int size,
 		return false;
 	case bpf_ctx_range(struct __sk_buff, data):
 	case bpf_ctx_range(struct __sk_buff, data_end):
-		if (!capable(CAP_SYS_ADMIN))
+		if (!bpf_capable())
 			return false;
 		break;
 	}
@@ -6699,7 +6699,7 @@ static bool cg_skb_is_valid_access(int off, int size,
 		case bpf_ctx_range_till(struct __sk_buff, cb[0], cb[4]):
 			break;
 		case bpf_ctx_range(struct __sk_buff, tstamp):
-			if (!capable(CAP_SYS_ADMIN))
+			if (!bpf_capable())
 				return false;
 			break;
 		default:
-- 
cgit v1.2.3-59-g8ed1b


From 81626001187609b9c49696a5b48d5abcf0e5f9be Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Wed, 13 May 2020 16:03:55 -0700
Subject: selftests/bpf: Use CAP_BPF and CAP_PERFMON in tests

Make all test_verifier test exercise CAP_BPF and CAP_PERFMON

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200513230355.7858-4-alexei.starovoitov@gmail.com
---
 tools/testing/selftests/bpf/test_verifier.c      | 44 +++++++++++++++++++-----
 tools/testing/selftests/bpf/verifier/calls.c     | 16 ++++-----
 tools/testing/selftests/bpf/verifier/dead_code.c | 10 +++---
 3 files changed, 49 insertions(+), 21 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_verifier.c b/tools/testing/selftests/bpf/test_verifier.c
index 21a1ce219c1c..78a6bae56ea6 100644
--- a/tools/testing/selftests/bpf/test_verifier.c
+++ b/tools/testing/selftests/bpf/test_verifier.c
@@ -818,10 +818,18 @@ static void do_test_fixup(struct bpf_test *test, enum bpf_prog_type prog_type,
 	}
 }
 
+struct libcap {
+	struct __user_cap_header_struct hdr;
+	struct __user_cap_data_struct data[2];
+};
+
 static int set_admin(bool admin)
 {
 	cap_t caps;
-	const cap_value_t cap_val = CAP_SYS_ADMIN;
+	/* need CAP_BPF, CAP_NET_ADMIN, CAP_PERFMON to load progs */
+	const cap_value_t cap_net_admin = CAP_NET_ADMIN;
+	const cap_value_t cap_sys_admin = CAP_SYS_ADMIN;
+	struct libcap *cap;
 	int ret = -1;
 
 	caps = cap_get_proc();
@@ -829,11 +837,26 @@ static int set_admin(bool admin)
 		perror("cap_get_proc");
 		return -1;
 	}
-	if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_val,
+	cap = (struct libcap *)caps;
+	if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_sys_admin, CAP_CLEAR)) {
+		perror("cap_set_flag clear admin");
+		goto out;
+	}
+	if (cap_set_flag(caps, CAP_EFFECTIVE, 1, &cap_net_admin,
 				admin ? CAP_SET : CAP_CLEAR)) {
-		perror("cap_set_flag");
+		perror("cap_set_flag set_or_clear net");
 		goto out;
 	}
+	/* libcap is likely old and simply ignores CAP_BPF and CAP_PERFMON,
+	 * so update effective bits manually
+	 */
+	if (admin) {
+		cap->data[1].effective |= 1 << (38 /* CAP_PERFMON */ - 32);
+		cap->data[1].effective |= 1 << (39 /* CAP_BPF */ - 32);
+	} else {
+		cap->data[1].effective &= ~(1 << (38 - 32));
+		cap->data[1].effective &= ~(1 << (39 - 32));
+	}
 	if (cap_set_proc(caps)) {
 		perror("cap_set_proc");
 		goto out;
@@ -1067,9 +1090,11 @@ fail_log:
 
 static bool is_admin(void)
 {
+	cap_flag_value_t net_priv = CAP_CLEAR;
+	bool perfmon_priv = false;
+	bool bpf_priv = false;
+	struct libcap *cap;
 	cap_t caps;
-	cap_flag_value_t sysadmin = CAP_CLEAR;
-	const cap_value_t cap_val = CAP_SYS_ADMIN;
 
 #ifdef CAP_IS_SUPPORTED
 	if (!CAP_IS_SUPPORTED(CAP_SETFCAP)) {
@@ -1082,11 +1107,14 @@ static bool is_admin(void)
 		perror("cap_get_proc");
 		return false;
 	}
-	if (cap_get_flag(caps, cap_val, CAP_EFFECTIVE, &sysadmin))
-		perror("cap_get_flag");
+	cap = (struct libcap *)caps;
+	bpf_priv = cap->data[1].effective & (1 << (39/* CAP_BPF */ - 32));
+	perfmon_priv = cap->data[1].effective & (1 << (38/* CAP_PERFMON */ - 32));
+	if (cap_get_flag(caps, CAP_NET_ADMIN, CAP_EFFECTIVE, &net_priv))
+		perror("cap_get_flag NET");
 	if (cap_free(caps))
 		perror("cap_free");
-	return (sysadmin == CAP_SET);
+	return bpf_priv && perfmon_priv && net_priv == CAP_SET;
 }
 
 static void get_unpriv_disabled()
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index 2d752c4f8d9d..7629a0cebb9b 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -19,7 +19,7 @@
 	BPF_MOV64_IMM(BPF_REG_0, 2),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+	.errstr_unpriv = "function calls to other bpf functions are allowed for",
 	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = 1,
@@ -315,7 +315,7 @@
 	BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "allowed for root only",
+	.errstr_unpriv = "allowed for",
 	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = POINTER_VALUE,
@@ -346,7 +346,7 @@
 	BPF_ALU64_REG(BPF_ADD, BPF_REG_0, BPF_REG_2),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "allowed for root only",
+	.errstr_unpriv = "allowed for",
 	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = TEST_DATA_LEN + TEST_DATA_LEN - ETH_HLEN - ETH_HLEN,
@@ -397,7 +397,7 @@
 	BPF_MOV64_IMM(BPF_REG_0, 1),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+	.errstr_unpriv = "function calls to other bpf functions are allowed for",
 	.fixup_map_hash_48b = { 3 },
 	.result_unpriv = REJECT,
 	.result = ACCEPT,
@@ -1064,7 +1064,7 @@
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "allowed for root only",
+	.errstr_unpriv = "allowed for",
 	.result_unpriv = REJECT,
 	.errstr = "R0 !read_ok",
 	.result = REJECT,
@@ -1977,7 +1977,7 @@
 	BPF_EXIT_INSN(),
 	},
 	.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
-	.errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+	.errstr_unpriv = "function calls to other bpf functions are allowed for",
 	.result_unpriv = REJECT,
 	.result = ACCEPT,
 },
@@ -2003,7 +2003,7 @@
 	BPF_EXIT_INSN(),
 	},
 	.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
-	.errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+	.errstr_unpriv = "function calls to other bpf functions are allowed for",
 	.errstr = "!read_ok",
 	.result = REJECT,
 },
@@ -2028,7 +2028,7 @@
 	BPF_EXIT_INSN(),
 	},
 	.prog_type = BPF_PROG_TYPE_SOCKET_FILTER,
-	.errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+	.errstr_unpriv = "function calls to other bpf functions are allowed for",
 	.errstr = "!read_ok",
 	.result = REJECT,
 },
diff --git a/tools/testing/selftests/bpf/verifier/dead_code.c b/tools/testing/selftests/bpf/verifier/dead_code.c
index 50a8a63be4ac..5cf361d8eb1c 100644
--- a/tools/testing/selftests/bpf/verifier/dead_code.c
+++ b/tools/testing/selftests/bpf/verifier/dead_code.c
@@ -85,7 +85,7 @@
 	BPF_MOV64_IMM(BPF_REG_0, 12),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+	.errstr_unpriv = "function calls to other bpf functions are allowed for",
 	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = 7,
@@ -103,7 +103,7 @@
 	BPF_MOV64_IMM(BPF_REG_0, 12),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+	.errstr_unpriv = "function calls to other bpf functions are allowed for",
 	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = 7,
@@ -121,7 +121,7 @@
 	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 1, 0, -5),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+	.errstr_unpriv = "function calls to other bpf functions are allowed for",
 	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = 7,
@@ -137,7 +137,7 @@
 	BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+	.errstr_unpriv = "function calls to other bpf functions are allowed for",
 	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = 2,
@@ -152,7 +152,7 @@
 	BPF_MOV64_REG(BPF_REG_0, BPF_REG_1),
 	BPF_EXIT_INSN(),
 	},
-	.errstr_unpriv = "function calls to other bpf functions are allowed for root only",
+	.errstr_unpriv = "function calls to other bpf functions are allowed for",
 	.result_unpriv = REJECT,
 	.result = ACCEPT,
 	.retval = 2,
-- 
cgit v1.2.3-59-g8ed1b


From 2e186a2cf8c785f38ef3237e83f8921f82f6e2b7 Mon Sep 17 00:00:00 2001
From: Tobias Waldekranz <tobias@waldekranz.com>
Date: Fri, 15 May 2020 11:52:52 +0200
Subject: net: core: recursively find netdev by device node

The assumption that a device node is associated either with the
netdev's device, or the parent of that device, does not hold for all
drivers. E.g. Freescale's DPAA has two layers of platform devices
above the netdev. Instead, recursively walk up the tree from the
netdev, allowing any parent to match against the sought after node.

Signed-off-by: Tobias Waldekranz <tobias@waldekranz.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/net-sysfs.c | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/core/net-sysfs.c b/net/core/net-sysfs.c
index 880e89c894f6..e353b822bb15 100644
--- a/net/core/net-sysfs.c
+++ b/net/core/net-sysfs.c
@@ -1805,12 +1805,12 @@ static struct class net_class __ro_after_init = {
 #ifdef CONFIG_OF_NET
 static int of_dev_node_match(struct device *dev, const void *data)
 {
-	int ret = 0;
-
-	if (dev->parent)
-		ret = dev->parent->of_node == data;
+	for (; dev; dev = dev->parent) {
+		if (dev->of_node == data)
+			return 1;
+	}
 
-	return ret == 0 ? dev->of_node == data : ret;
+	return 0;
 }
 
 /*
-- 
cgit v1.2.3-59-g8ed1b


From f8ab1807a9c9aa14478920e64d1c9d3685aae26f Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Fri, 15 May 2020 14:40:11 +0300
Subject: net: sched: introduce terse dump flag

Add new TCA_DUMP_FLAGS attribute and use it in cls API to request terse
filter output from classifiers with TCA_DUMP_FLAGS_TERSE flag. This option
is intended to be used to improve performance of TC filter dump when
userland only needs to obtain stats and not the whole classifier/action
data. Extend struct tcf_proto_ops with new terse_dump() callback that must
be defined by supporting classifier implementations.

Support of the options in specific classifiers and actions is
implemented in following patches in the series.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sch_generic.h      |  4 ++++
 include/uapi/linux/rtnetlink.h |  6 ++++++
 net/sched/cls_api.c            | 39 +++++++++++++++++++++++++++++++--------
 3 files changed, 41 insertions(+), 8 deletions(-)

diff --git a/include/net/sch_generic.h b/include/net/sch_generic.h
index ab87a8b86a32..c510b03b9751 100644
--- a/include/net/sch_generic.h
+++ b/include/net/sch_generic.h
@@ -330,6 +330,10 @@ struct tcf_proto_ops {
 	int			(*dump)(struct net*, struct tcf_proto*, void *,
 					struct sk_buff *skb, struct tcmsg*,
 					bool);
+	int			(*terse_dump)(struct net *net,
+					      struct tcf_proto *tp, void *fh,
+					      struct sk_buff *skb,
+					      struct tcmsg *t, bool rtnl_held);
 	int			(*tmplt_dump)(struct sk_buff *skb,
 					      struct net *net,
 					      void *tmplt_priv);
diff --git a/include/uapi/linux/rtnetlink.h b/include/uapi/linux/rtnetlink.h
index 4a8c5b745157..073e71ef6bdd 100644
--- a/include/uapi/linux/rtnetlink.h
+++ b/include/uapi/linux/rtnetlink.h
@@ -609,11 +609,17 @@ enum {
 	TCA_HW_OFFLOAD,
 	TCA_INGRESS_BLOCK,
 	TCA_EGRESS_BLOCK,
+	TCA_DUMP_FLAGS,
 	__TCA_MAX
 };
 
 #define TCA_MAX (__TCA_MAX - 1)
 
+#define TCA_DUMP_FLAGS_TERSE (1 << 0) /* Means that in dump user gets only basic
+				       * data necessary to identify the objects
+				       * (handle, cookie, etc.) and stats.
+				       */
+
 #define TCA_RTA(r)  ((struct rtattr*)(((char*)(r)) + NLMSG_ALIGN(sizeof(struct tcmsg))))
 #define TCA_PAYLOAD(n) NLMSG_PAYLOAD(n,sizeof(struct tcmsg))
 
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 299b963c796e..cb2c10e0fee5 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -1851,7 +1851,7 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb,
 			 struct tcf_proto *tp, struct tcf_block *block,
 			 struct Qdisc *q, u32 parent, void *fh,
 			 u32 portid, u32 seq, u16 flags, int event,
-			 bool rtnl_held)
+			 bool terse_dump, bool rtnl_held)
 {
 	struct tcmsg *tcm;
 	struct nlmsghdr  *nlh;
@@ -1878,6 +1878,14 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb,
 		goto nla_put_failure;
 	if (!fh) {
 		tcm->tcm_handle = 0;
+	} else if (terse_dump) {
+		if (tp->ops->terse_dump) {
+			if (tp->ops->terse_dump(net, tp, fh, skb, tcm,
+						rtnl_held) < 0)
+				goto nla_put_failure;
+		} else {
+			goto cls_op_not_supp;
+		}
 	} else {
 		if (tp->ops->dump &&
 		    tp->ops->dump(net, tp, fh, skb, tcm, rtnl_held) < 0)
@@ -1888,6 +1896,7 @@ static int tcf_fill_node(struct net *net, struct sk_buff *skb,
 
 out_nlmsg_trim:
 nla_put_failure:
+cls_op_not_supp:
 	nlmsg_trim(skb, b);
 	return -1;
 }
@@ -1908,7 +1917,7 @@ static int tfilter_notify(struct net *net, struct sk_buff *oskb,
 
 	if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
 			  n->nlmsg_seq, n->nlmsg_flags, event,
-			  rtnl_held) <= 0) {
+			  false, rtnl_held) <= 0) {
 		kfree_skb(skb);
 		return -EINVAL;
 	}
@@ -1940,7 +1949,7 @@ static int tfilter_del_notify(struct net *net, struct sk_buff *oskb,
 
 	if (tcf_fill_node(net, skb, tp, block, q, parent, fh, portid,
 			  n->nlmsg_seq, n->nlmsg_flags, RTM_DELTFILTER,
-			  rtnl_held) <= 0) {
+			  false, rtnl_held) <= 0) {
 		NL_SET_ERR_MSG(extack, "Failed to build del event notification");
 		kfree_skb(skb);
 		return -EINVAL;
@@ -2501,6 +2510,7 @@ struct tcf_dump_args {
 	struct tcf_block *block;
 	struct Qdisc *q;
 	u32 parent;
+	bool terse_dump;
 };
 
 static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
@@ -2511,12 +2521,12 @@ static int tcf_node_dump(struct tcf_proto *tp, void *n, struct tcf_walker *arg)
 	return tcf_fill_node(net, a->skb, tp, a->block, a->q, a->parent,
 			     n, NETLINK_CB(a->cb->skb).portid,
 			     a->cb->nlh->nlmsg_seq, NLM_F_MULTI,
-			     RTM_NEWTFILTER, true);
+			     RTM_NEWTFILTER, a->terse_dump, true);
 }
 
 static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
 			   struct sk_buff *skb, struct netlink_callback *cb,
-			   long index_start, long *p_index)
+			   long index_start, long *p_index, bool terse)
 {
 	struct net *net = sock_net(skb->sk);
 	struct tcf_block *block = chain->block;
@@ -2545,7 +2555,7 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
 			if (tcf_fill_node(net, skb, tp, block, q, parent, NULL,
 					  NETLINK_CB(cb->skb).portid,
 					  cb->nlh->nlmsg_seq, NLM_F_MULTI,
-					  RTM_NEWTFILTER, true) <= 0)
+					  RTM_NEWTFILTER, false, true) <= 0)
 				goto errout;
 			cb->args[1] = 1;
 		}
@@ -2561,6 +2571,7 @@ static bool tcf_chain_dump(struct tcf_chain *chain, struct Qdisc *q, u32 parent,
 		arg.w.skip = cb->args[1] - 1;
 		arg.w.count = 0;
 		arg.w.cookie = cb->args[2];
+		arg.terse_dump = terse;
 		tp->ops->walk(tp, &arg.w, true);
 		cb->args[2] = arg.w.cookie;
 		cb->args[1] = arg.w.count + 1;
@@ -2574,6 +2585,10 @@ errout:
 	return false;
 }
 
+static const struct nla_policy tcf_tfilter_dump_policy[TCA_MAX + 1] = {
+	[TCA_DUMP_FLAGS] = NLA_POLICY_BITFIELD32(TCA_DUMP_FLAGS_TERSE),
+};
+
 /* called with RTNL */
 static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 {
@@ -2583,6 +2598,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 	struct Qdisc *q = NULL;
 	struct tcf_block *block;
 	struct tcmsg *tcm = nlmsg_data(cb->nlh);
+	bool terse_dump = false;
 	long index_start;
 	long index;
 	u32 parent;
@@ -2592,10 +2608,17 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 		return skb->len;
 
 	err = nlmsg_parse_deprecated(cb->nlh, sizeof(*tcm), tca, TCA_MAX,
-				     NULL, cb->extack);
+				     tcf_tfilter_dump_policy, cb->extack);
 	if (err)
 		return err;
 
+	if (tca[TCA_DUMP_FLAGS]) {
+		struct nla_bitfield32 flags =
+			nla_get_bitfield32(tca[TCA_DUMP_FLAGS]);
+
+		terse_dump = flags.value & TCA_DUMP_FLAGS_TERSE;
+	}
+
 	if (tcm->tcm_ifindex == TCM_IFINDEX_MAGIC_BLOCK) {
 		block = tcf_block_refcnt_get(net, tcm->tcm_block_index);
 		if (!block)
@@ -2653,7 +2676,7 @@ static int tc_dump_tfilter(struct sk_buff *skb, struct netlink_callback *cb)
 		    nla_get_u32(tca[TCA_CHAIN]) != chain->index)
 			continue;
 		if (!tcf_chain_dump(chain, q, parent, skb, cb,
-				    index_start, &index)) {
+				    index_start, &index, terse_dump)) {
 			tcf_chain_put(chain);
 			err = -EMSGSIZE;
 			break;
-- 
cgit v1.2.3-59-g8ed1b


From ca44b738e59420ae73d9e04a1be630a405e3a0f1 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Fri, 15 May 2020 14:40:12 +0300
Subject: net: sched: implement terse dump support in act

Extend tcf_action_dump() with boolean argument 'terse' that is used to
request terse-mode action dump. In terse mode only essential data needed to
identify particular action (action kind, cookie, etc.) and its stats is put
to resulting skb and everything else is omitted. Implement
tcf_exts_terse_dump() helper in cls API that is intended to be used to
request terse dump of all exts (actions) attached to the filter.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/act_api.h |  2 +-
 include/net/pkt_cls.h |  1 +
 net/sched/act_api.c   | 30 +++++++++++++++++++++++-------
 net/sched/cls_api.c   | 28 +++++++++++++++++++++++++++-
 4 files changed, 52 insertions(+), 9 deletions(-)

diff --git a/include/net/act_api.h b/include/net/act_api.h
index c24d7643548e..1b4bfc4437be 100644
--- a/include/net/act_api.h
+++ b/include/net/act_api.h
@@ -193,7 +193,7 @@ struct tc_action *tcf_action_init_1(struct net *net, struct tcf_proto *tp,
 				    bool rtnl_held,
 				    struct netlink_ext_ack *extack);
 int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[], int bind,
-		    int ref);
+		    int ref, bool terse);
 int tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int, int);
 int tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int, int);
 
diff --git a/include/net/pkt_cls.h b/include/net/pkt_cls.h
index 04aa0649f3b0..ed65619cbc47 100644
--- a/include/net/pkt_cls.h
+++ b/include/net/pkt_cls.h
@@ -325,6 +325,7 @@ int tcf_exts_validate(struct net *net, struct tcf_proto *tp,
 void tcf_exts_destroy(struct tcf_exts *exts);
 void tcf_exts_change(struct tcf_exts *dst, struct tcf_exts *src);
 int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts);
+int tcf_exts_terse_dump(struct sk_buff *skb, struct tcf_exts *exts);
 int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts);
 
 /**
diff --git a/net/sched/act_api.c b/net/sched/act_api.c
index fbbec2e562f5..8ac7eb0a8309 100644
--- a/net/sched/act_api.c
+++ b/net/sched/act_api.c
@@ -766,12 +766,10 @@ tcf_action_dump_old(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 	return a->ops->dump(skb, a, bind, ref);
 }
 
-int
-tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+static int
+tcf_action_dump_terse(struct sk_buff *skb, struct tc_action *a)
 {
-	int err = -EINVAL;
 	unsigned char *b = skb_tail_pointer(skb);
-	struct nlattr *nest;
 	struct tc_cookie *cookie;
 
 	if (nla_put_string(skb, TCA_KIND, a->ops->kind))
@@ -789,6 +787,23 @@ tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
 	}
 	rcu_read_unlock();
 
+	return 0;
+
+nla_put_failure:
+	nlmsg_trim(skb, b);
+	return -1;
+}
+
+int
+tcf_action_dump_1(struct sk_buff *skb, struct tc_action *a, int bind, int ref)
+{
+	int err = -EINVAL;
+	unsigned char *b = skb_tail_pointer(skb);
+	struct nlattr *nest;
+
+	if (tcf_action_dump_terse(skb, a))
+		goto nla_put_failure;
+
 	if (a->hw_stats != TCA_ACT_HW_STATS_ANY &&
 	    nla_put_bitfield32(skb, TCA_ACT_HW_STATS,
 			       a->hw_stats, TCA_ACT_HW_STATS_ANY))
@@ -820,7 +835,7 @@ nla_put_failure:
 EXPORT_SYMBOL(tcf_action_dump_1);
 
 int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[],
-		    int bind, int ref)
+		    int bind, int ref, bool terse)
 {
 	struct tc_action *a;
 	int err = -EINVAL, i;
@@ -831,7 +846,8 @@ int tcf_action_dump(struct sk_buff *skb, struct tc_action *actions[],
 		nest = nla_nest_start_noflag(skb, i + 1);
 		if (nest == NULL)
 			goto nla_put_failure;
-		err = tcf_action_dump_1(skb, a, bind, ref);
+		err = terse ? tcf_action_dump_terse(skb, a) :
+			tcf_action_dump_1(skb, a, bind, ref);
 		if (err < 0)
 			goto errout;
 		nla_nest_end(skb, nest);
@@ -1133,7 +1149,7 @@ static int tca_get_fill(struct sk_buff *skb, struct tc_action *actions[],
 	if (!nest)
 		goto out_nlmsg_trim;
 
-	if (tcf_action_dump(skb, actions, bind, ref) < 0)
+	if (tcf_action_dump(skb, actions, bind, ref, false) < 0)
 		goto out_nlmsg_trim;
 
 	nla_nest_end(skb, nest);
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index cb2c10e0fee5..752d608f4442 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -3179,7 +3179,8 @@ int tcf_exts_dump(struct sk_buff *skb, struct tcf_exts *exts)
 			if (nest == NULL)
 				goto nla_put_failure;
 
-			if (tcf_action_dump(skb, exts->actions, 0, 0) < 0)
+			if (tcf_action_dump(skb, exts->actions, 0, 0, false)
+			    < 0)
 				goto nla_put_failure;
 			nla_nest_end(skb, nest);
 		} else if (exts->police) {
@@ -3203,6 +3204,31 @@ nla_put_failure:
 }
 EXPORT_SYMBOL(tcf_exts_dump);
 
+int tcf_exts_terse_dump(struct sk_buff *skb, struct tcf_exts *exts)
+{
+#ifdef CONFIG_NET_CLS_ACT
+	struct nlattr *nest;
+
+	if (!exts->action || !tcf_exts_has_actions(exts))
+		return 0;
+
+	nest = nla_nest_start_noflag(skb, exts->action);
+	if (!nest)
+		goto nla_put_failure;
+
+	if (tcf_action_dump(skb, exts->actions, 0, 0, true) < 0)
+		goto nla_put_failure;
+	nla_nest_end(skb, nest);
+	return 0;
+
+nla_put_failure:
+	nla_nest_cancel(skb, nest);
+	return -1;
+#else
+	return 0;
+#endif
+}
+EXPORT_SYMBOL(tcf_exts_terse_dump);
 
 int tcf_exts_dump_stats(struct sk_buff *skb, struct tcf_exts *exts)
 {
-- 
cgit v1.2.3-59-g8ed1b


From 0348451db9fffebd01d7b496e108dd729c2fcb24 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Fri, 15 May 2020 14:40:13 +0300
Subject: net: sched: cls_flower: implement terse dump support

Implement tcf_proto_ops->terse_dump() callback for flower classifier. Only
dump handle, flags and action data in terse mode.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_flower.c | 43 +++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 74a0febcafb8..0c574700da75 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -2768,6 +2768,48 @@ nla_put_failure:
 	return -1;
 }
 
+static int fl_terse_dump(struct net *net, struct tcf_proto *tp, void *fh,
+			 struct sk_buff *skb, struct tcmsg *t, bool rtnl_held)
+{
+	struct cls_fl_filter *f = fh;
+	struct nlattr *nest;
+	bool skip_hw;
+
+	if (!f)
+		return skb->len;
+
+	t->tcm_handle = f->handle;
+
+	nest = nla_nest_start_noflag(skb, TCA_OPTIONS);
+	if (!nest)
+		goto nla_put_failure;
+
+	spin_lock(&tp->lock);
+
+	skip_hw = tc_skip_hw(f->flags);
+
+	if (f->flags && nla_put_u32(skb, TCA_FLOWER_FLAGS, f->flags))
+		goto nla_put_failure_locked;
+
+	spin_unlock(&tp->lock);
+
+	if (!skip_hw)
+		fl_hw_update_stats(tp, f, rtnl_held);
+
+	if (tcf_exts_terse_dump(skb, &f->exts))
+		goto nla_put_failure;
+
+	nla_nest_end(skb, nest);
+
+	return skb->len;
+
+nla_put_failure_locked:
+	spin_unlock(&tp->lock);
+nla_put_failure:
+	nla_nest_cancel(skb, nest);
+	return -1;
+}
+
 static int fl_tmplt_dump(struct sk_buff *skb, struct net *net, void *tmplt_priv)
 {
 	struct fl_flow_tmplt *tmplt = tmplt_priv;
@@ -2832,6 +2874,7 @@ static struct tcf_proto_ops cls_fl_ops __read_mostly = {
 	.hw_add		= fl_hw_add,
 	.hw_del		= fl_hw_del,
 	.dump		= fl_dump,
+	.terse_dump	= fl_terse_dump,
 	.bind_class	= fl_bind_class,
 	.tmplt_create	= fl_tmplt_create,
 	.tmplt_destroy	= fl_tmplt_destroy,
-- 
cgit v1.2.3-59-g8ed1b


From e7534fd42a99f2dcca022d2c9a37adf82ad07998 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Fri, 15 May 2020 14:40:14 +0300
Subject: selftests: implement flower classifier terse dump tests

Implement two basic tests to verify terse dump functionality of flower
classifier:

- Test that verifies that terse dump works.

- Test that verifies that terse dump doesn't print filter key.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../tc-testing/tc-tests/filters/tests.json         | 38 ++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
index 12aa4bc1f6a0..bb543bf69d69 100644
--- a/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
+++ b/tools/testing/selftests/tc-testing/tc-tests/filters/tests.json
@@ -87,5 +87,43 @@
         "teardown": [
             "$TC qdisc del dev $DEV2 ingress"
         ]
+    },
+    {
+        "id": "7c65",
+        "name": "Add flower filter and then terse dump it",
+        "category": [
+            "filter",
+            "flower"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV2 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress flower dst_mac e4:11:22:11:4a:51 action drop",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show terse dev $DEV2 ingress",
+        "matchPattern": "filter protocol ip pref 1 flower.*handle",
+        "matchCount": "1",
+        "teardown": [
+            "$TC qdisc del dev $DEV2 ingress"
+        ]
+    },
+    {
+        "id": "d45e",
+        "name": "Add flower filter and verify that terse dump doesn't output filter key",
+        "category": [
+            "filter",
+            "flower"
+        ],
+        "setup": [
+            "$TC qdisc add dev $DEV2 ingress"
+        ],
+        "cmdUnderTest": "$TC filter add dev $DEV2 protocol ip pref 1 ingress flower dst_mac e4:11:22:11:4a:51 action drop",
+        "expExitCode": "0",
+        "verifyCmd": "$TC filter show terse dev $DEV2 ingress",
+        "matchPattern": "  dst_mac e4:11:22:11:4a:51",
+        "matchCount": "0",
+        "teardown": [
+            "$TC qdisc del dev $DEV2 ingress"
+        ]
     }
 ]
-- 
cgit v1.2.3-59-g8ed1b


From 0141792f8b7300006b874dda1c35acd0abd90d9d Mon Sep 17 00:00:00 2001
From: DENG Qingfang <dqfext@gmail.com>
Date: Fri, 15 May 2020 23:25:55 +0800
Subject: net: dsa: mt7530: fix VLAN setup

Allow DSA to add VLAN entries even if VLAN filtering is disabled, so
enabling it will not block the traffic of existent ports in the bridge

Signed-off-by: DENG Qingfang <dqfext@gmail.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/mt7530.c | 13 +------------
 1 file changed, 1 insertion(+), 12 deletions(-)

diff --git a/drivers/net/dsa/mt7530.c b/drivers/net/dsa/mt7530.c
index a063d914c23f..d30542fc556a 100644
--- a/drivers/net/dsa/mt7530.c
+++ b/drivers/net/dsa/mt7530.c
@@ -1085,12 +1085,6 @@ mt7530_port_vlan_add(struct dsa_switch *ds, int port,
 	struct mt7530_priv *priv = ds->priv;
 	u16 vid;
 
-	/* The port is kept as VLAN-unaware if bridge with vlan_filtering not
-	 * being set.
-	 */
-	if (!dsa_port_is_vlan_filtering(dsa_to_port(ds, port)))
-		return;
-
 	mutex_lock(&priv->reg_mutex);
 
 	for (vid = vlan->vid_begin; vid <= vlan->vid_end; ++vid) {
@@ -1116,12 +1110,6 @@ mt7530_port_vlan_del(struct dsa_switch *ds, int port,
 	struct mt7530_priv *priv = ds->priv;
 	u16 vid, pvid;
 
-	/* The port is kept as VLAN-unaware if bridge with vlan_filtering not
-	 * being set.
-	 */
-	if (!dsa_port_is_vlan_filtering(dsa_to_port(ds, port)))
-		return 0;
-
 	mutex_lock(&priv->reg_mutex);
 
 	pvid = priv->ports[port].pvid;
@@ -1235,6 +1223,7 @@ mt7530_setup(struct dsa_switch *ds)
 	 * as two netdev instances.
 	 */
 	dn = dsa_to_port(ds, MT7530_CPU_PORT)->master->dev.of_node->parent;
+	ds->configure_vlan_while_not_filtering = true;
 
 	if (priv->id == ID_MT7530) {
 		regulator_set_voltage(priv->core_pwr, 1000000, 1000000);
-- 
cgit v1.2.3-59-g8ed1b


From 4f1d97262d58e0f493d03de4938bce736ac3cf3d Mon Sep 17 00:00:00 2001
From: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
Date: Fri, 15 May 2020 22:41:03 +0530
Subject: cxgb4: improve credits recovery in TC-MQPRIO Tx path

Request credit update for every half credits consumed, including
the current request. Also, avoid re-trying to post packets when there
are no credits left. The credit update reply via interrupt will
eventually restore the credits and will invoke the Tx path again.

Signed-off-by: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/sge.c | 40 +++++++++++++++++++++-----------
 1 file changed, 26 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/sge.c b/drivers/net/ethernet/chelsio/cxgb4/sge.c
index 6516c45864b3..1359158652b7 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sge.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sge.c
@@ -2091,10 +2091,9 @@ static inline u8 ethofld_calc_tx_flits(struct adapter *adap,
 	return flits + nsgl;
 }
 
-static inline void *write_eo_wr(struct adapter *adap,
-				struct sge_eosw_txq *eosw_txq,
-				struct sk_buff *skb, struct fw_eth_tx_eo_wr *wr,
-				u32 hdr_len, u32 wrlen)
+static void *write_eo_wr(struct adapter *adap, struct sge_eosw_txq *eosw_txq,
+			 struct sk_buff *skb, struct fw_eth_tx_eo_wr *wr,
+			 u32 hdr_len, u32 wrlen)
 {
 	const struct skb_shared_info *ssi = skb_shinfo(skb);
 	struct cpl_tx_pkt_core *cpl;
@@ -2113,7 +2112,8 @@ static inline void *write_eo_wr(struct adapter *adap,
 	immd_len += hdr_len;
 
 	if (!eosw_txq->ncompl ||
-	    eosw_txq->last_compl >= adap->params.ofldq_wr_cred / 2) {
+	    (eosw_txq->last_compl + wrlen16) >=
+	    (adap->params.ofldq_wr_cred / 2)) {
 		compl = true;
 		eosw_txq->ncompl++;
 		eosw_txq->last_compl = 0;
@@ -2153,8 +2153,8 @@ static inline void *write_eo_wr(struct adapter *adap,
 	return cpl;
 }
 
-static void ethofld_hard_xmit(struct net_device *dev,
-			      struct sge_eosw_txq *eosw_txq)
+static int ethofld_hard_xmit(struct net_device *dev,
+			     struct sge_eosw_txq *eosw_txq)
 {
 	struct port_info *pi = netdev2pinfo(dev);
 	struct adapter *adap = netdev2adap(dev);
@@ -2167,8 +2167,8 @@ static void ethofld_hard_xmit(struct net_device *dev,
 	bool skip_eotx_wr = false;
 	struct tx_sw_desc *d;
 	struct sk_buff *skb;
+	int left, ret = 0;
 	u8 flits, ndesc;
-	int left;
 
 	eohw_txq = &adap->sge.eohw_txq[eosw_txq->hwqid];
 	spin_lock(&eohw_txq->lock);
@@ -2198,11 +2198,19 @@ static void ethofld_hard_xmit(struct net_device *dev,
 	wrlen = flits * 8;
 	wrlen16 = DIV_ROUND_UP(wrlen, 16);
 
-	/* If there are no CPL credits, then wait for credits
-	 * to come back and retry again
+	left = txq_avail(&eohw_txq->q) - ndesc;
+
+	/* If there are no descriptors left in hardware queues or no
+	 * CPL credits left in software queues, then wait for them
+	 * to come back and retry again. Note that we always request
+	 * for credits update via interrupt for every half credits
+	 * consumed. So, the interrupt will eventually restore the
+	 * credits and invoke the Tx path again.
 	 */
-	if (unlikely(wrlen16 > eosw_txq->cred))
+	if (unlikely(left < 0 || wrlen16 > eosw_txq->cred)) {
+		ret = -ENOMEM;
 		goto out_unlock;
+	}
 
 	if (unlikely(skip_eotx_wr)) {
 		start = (u64 *)wr;
@@ -2231,7 +2239,8 @@ write_wr_headers:
 	sgl = (u64 *)inline_tx_skb_header(skb, &eohw_txq->q, (void *)start,
 					  hdr_len);
 	if (data_len) {
-		if (unlikely(cxgb4_map_skb(adap->pdev_dev, skb, d->addr))) {
+		ret = cxgb4_map_skb(adap->pdev_dev, skb, d->addr);
+		if (unlikely(ret)) {
 			memset(d->addr, 0, sizeof(d->addr));
 			eohw_txq->mapping_err++;
 			goto out_unlock;
@@ -2277,12 +2286,13 @@ write_wr_headers:
 
 out_unlock:
 	spin_unlock(&eohw_txq->lock);
+	return ret;
 }
 
 static void ethofld_xmit(struct net_device *dev, struct sge_eosw_txq *eosw_txq)
 {
 	struct sk_buff *skb;
-	int pktcount;
+	int pktcount, ret;
 
 	switch (eosw_txq->state) {
 	case CXGB4_EO_STATE_ACTIVE:
@@ -2307,7 +2317,9 @@ static void ethofld_xmit(struct net_device *dev, struct sge_eosw_txq *eosw_txq)
 			continue;
 		}
 
-		ethofld_hard_xmit(dev, eosw_txq);
+		ret = ethofld_hard_xmit(dev, eosw_txq);
+		if (ret)
+			break;
 	}
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 4bccfc036abbdf510de808925c646c8c49e0309e Mon Sep 17 00:00:00 2001
From: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
Date: Fri, 15 May 2020 22:41:04 +0530
Subject: cxgb4: tune burst buffer size for TC-MQPRIO offload

For each traffic class, firmware handles up to 4 * MTU amount of data
per burst cycle. Under heavy load, this small buffer size is a
bottleneck when buffering large TSO packets in <= 1500 MTU case.
Increase the burst buffer size to 8 * MTU when supported.

Also, keep the driver's traffic class configuration API similar to
the firmware API counterpart.

Signed-off-by: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h         | 30 ++++++++++++----------
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c    |  2 +-
 .../net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c   |  7 +++++
 drivers/net/ethernet/chelsio/cxgb4/sched.c         |  3 ++-
 drivers/net/ethernet/chelsio/cxgb4/t4_hw.c         |  8 +++---
 5 files changed, 31 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 30d25a37fc3b..fc1405a8ed74 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -1125,19 +1125,20 @@ struct adapter {
  * programmed with various parameters.
  */
 struct ch_sched_params {
-	s8   type;                     /* packet or flow */
+	u8   type;                     /* packet or flow */
 	union {
 		struct {
-			s8   level;    /* scheduler hierarchy level */
-			s8   mode;     /* per-class or per-flow */
-			s8   rateunit; /* bit or packet rate */
-			s8   ratemode; /* %port relative or kbps absolute */
-			s8   channel;  /* scheduler channel [0..N] */
-			s8   class;    /* scheduler class [0..N] */
-			s32  minrate;  /* minimum rate */
-			s32  maxrate;  /* maximum rate */
-			s16  weight;   /* percent weight */
-			s16  pktsize;  /* average packet size */
+			u8   level;    /* scheduler hierarchy level */
+			u8   mode;     /* per-class or per-flow */
+			u8   rateunit; /* bit or packet rate */
+			u8   ratemode; /* %port relative or kbps absolute */
+			u8   channel;  /* scheduler channel [0..N] */
+			u8   class;    /* scheduler class [0..N] */
+			u32  minrate;  /* minimum rate */
+			u32  maxrate;  /* maximum rate */
+			u16  weight;   /* percent weight */
+			u16  pktsize;  /* average packet size */
+			u16  burstsize;  /* burst buffer size */
 		} params;
 	} u;
 };
@@ -1952,9 +1953,10 @@ int t4_sge_ctxt_rd(struct adapter *adap, unsigned int mbox, unsigned int cid,
 		   enum ctxt_type ctype, u32 *data);
 int t4_sge_ctxt_rd_bd(struct adapter *adap, unsigned int cid,
 		      enum ctxt_type ctype, u32 *data);
-int t4_sched_params(struct adapter *adapter, int type, int level, int mode,
-		    int rateunit, int ratemode, int channel, int class,
-		    int minrate, int maxrate, int weight, int pktsize);
+int t4_sched_params(struct adapter *adapter, u8 type, u8 level, u8 mode,
+		    u8 rateunit, u8 ratemode, u8 channel, u8 class,
+		    u32 minrate, u32 maxrate, u16 weight, u16 pktsize,
+		    u16 burstsize);
 void t4_sge_decode_idma_state(struct adapter *adapter, int state);
 void t4_idma_monitor_init(struct adapter *adapter,
 			  struct sge_idma_monitor_state *idma);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index a70018f067aa..196451f8006f 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -3021,7 +3021,7 @@ static int cxgb4_mgmt_set_vf_rate(struct net_device *dev, int vf,
 			      SCHED_CLASS_RATEUNIT_BITS,
 			      SCHED_CLASS_RATEMODE_ABS,
 			      pi->tx_chan, class_id, 0,
-			      max_tx_rate * 1000, 0, pktsize);
+			      max_tx_rate * 1000, 0, pktsize, 0);
 	if (ret) {
 		dev_err(adap->pdev_dev, "Err %d for Traffic Class config\n",
 			ret);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c
index e6af4906d674..56079c9937db 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c
@@ -342,6 +342,13 @@ static int cxgb4_mqprio_alloc_tc(struct net_device *dev,
 		p.u.params.minrate = div_u64(mqprio->min_rate[i] * 8, 1000);
 		p.u.params.maxrate = div_u64(mqprio->max_rate[i] * 8, 1000);
 
+		/* Request larger burst buffer for smaller MTU, so
+		 * that hardware can work on more data per burst
+		 * cycle.
+		 */
+		if (dev->mtu <= ETH_DATA_LEN)
+			p.u.params.burstsize = 8 * dev->mtu;
+
 		e = cxgb4_sched_class_alloc(dev, &p);
 		if (!e) {
 			ret = -ENOMEM;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/sched.c b/drivers/net/ethernet/chelsio/cxgb4/sched.c
index cebe1412d960..fde93c50cfec 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/sched.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/sched.c
@@ -57,7 +57,8 @@ static int t4_sched_class_fw_cmd(struct port_info *pi,
 				      p->u.params.ratemode,
 				      p->u.params.channel, e->idx,
 				      p->u.params.minrate, p->u.params.maxrate,
-				      p->u.params.weight, p->u.params.pktsize);
+				      p->u.params.weight, p->u.params.pktsize,
+				      p->u.params.burstsize);
 		break;
 	default:
 		err = -ENOTSUPP;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
index 2a3480fc1d91..1c8068c02728 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4_hw.c
@@ -10361,9 +10361,10 @@ int t4_sge_ctxt_rd_bd(struct adapter *adap, unsigned int cid,
 	return ret;
 }
 
-int t4_sched_params(struct adapter *adapter, int type, int level, int mode,
-		    int rateunit, int ratemode, int channel, int class,
-		    int minrate, int maxrate, int weight, int pktsize)
+int t4_sched_params(struct adapter *adapter, u8 type, u8 level, u8 mode,
+		    u8 rateunit, u8 ratemode, u8 channel, u8 class,
+		    u32 minrate, u32 maxrate, u16 weight, u16 pktsize,
+		    u16 burstsize)
 {
 	struct fw_sched_cmd cmd;
 
@@ -10385,6 +10386,7 @@ int t4_sched_params(struct adapter *adapter, int type, int level, int mode,
 	cmd.u.params.max = cpu_to_be32(maxrate);
 	cmd.u.params.weight = cpu_to_be16(weight);
 	cmd.u.params.pktsize = cpu_to_be16(pktsize);
+	cmd.u.params.burstsize = cpu_to_be16(burstsize);
 
 	return t4_wr_mbox_meat(adapter, adapter->mbox, &cmd, sizeof(cmd),
 			       NULL, 1);
-- 
cgit v1.2.3-59-g8ed1b


From 5148e5950c675a26ab1f5eb4b291e9bd986116c9 Mon Sep 17 00:00:00 2001
From: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
Date: Fri, 15 May 2020 22:41:05 +0530
Subject: cxgb4: add EOTID tracking and software context dump

Rework and add support for dumping EOTID software context used by
TC-MQPRIO. Also track number of EOTIDs in use.

Signed-off-by: Rahul Lakkireddy <rahul.lakkireddy@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c | 144 +++++++++++++++++----
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c    |   1 +
 .../net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c   |  10 ++
 .../net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.h   |   1 +
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h     |   5 +
 5 files changed, 133 insertions(+), 28 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
index ebed99f3d4cf..7818c392da50 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
@@ -49,6 +49,7 @@
 #include "cudbg_lib_common.h"
 #include "cudbg_entity.h"
 #include "cudbg_lib.h"
+#include "cxgb4_tc_mqprio.h"
 
 /* generic seq_file support for showing a table of size rows x width. */
 static void *seq_tab_get_idx(struct seq_tab *tb, loff_t pos)
@@ -2657,32 +2658,19 @@ static int sge_qinfo_uld_ciq_entries(const struct adapter *adap, int uld)
 
 static int sge_qinfo_show(struct seq_file *seq, void *v)
 {
-	int eth_entries, ctrl_entries, eo_entries = 0;
+	int eth_entries, ctrl_entries, eohw_entries = 0, eosw_entries = 0;
 	int uld_rxq_entries[CXGB4_ULD_MAX] = { 0 };
 	int uld_ciq_entries[CXGB4_ULD_MAX] = { 0 };
 	int uld_txq_entries[CXGB4_TX_MAX] = { 0 };
 	const struct sge_uld_txq_info *utxq_info;
 	const struct sge_uld_rxq_info *urxq_info;
+	struct cxgb4_tc_port_mqprio *port_mqprio;
 	struct adapter *adap = seq->private;
-	int i, n, r = (uintptr_t)v - 1;
+	int i, j, n, r = (uintptr_t)v - 1;
 	struct sge *s = &adap->sge;
 
 	eth_entries = DIV_ROUND_UP(adap->sge.ethqsets, 4);
 	ctrl_entries = DIV_ROUND_UP(MAX_CTRL_QUEUES, 4);
-	if (adap->sge.eohw_txq)
-		eo_entries = DIV_ROUND_UP(adap->sge.eoqsets, 4);
-
-	mutex_lock(&uld_mutex);
-	if (s->uld_txq_info)
-		for (i = 0; i < ARRAY_SIZE(uld_txq_entries); i++)
-			uld_txq_entries[i] = sge_qinfo_uld_txq_entries(adap, i);
-
-	if (s->uld_rxq_info) {
-		for (i = 0; i < ARRAY_SIZE(uld_rxq_entries); i++) {
-			uld_rxq_entries[i] = sge_qinfo_uld_rxq_entries(adap, i);
-			uld_ciq_entries[i] = sge_qinfo_uld_ciq_entries(adap, i);
-		}
-	}
 
 	if (r)
 		seq_putc(seq, '\n');
@@ -2759,11 +2747,21 @@ do { \
 		RL("FLLow:", fl.low);
 		RL("FLStarving:", fl.starving);
 
-		goto unlock;
+		goto out;
 	}
 
 	r -= eth_entries;
-	if (r < eo_entries) {
+	if (!adap->tc_mqprio)
+		goto skip_mqprio;
+
+	mutex_lock(&adap->tc_mqprio->mqprio_mutex);
+	if (!refcount_read(&adap->tc_mqprio->refcnt)) {
+		mutex_unlock(&adap->tc_mqprio->mqprio_mutex);
+		goto skip_mqprio;
+	}
+
+	eohw_entries = DIV_ROUND_UP(adap->sge.eoqsets, 4);
+	if (r < eohw_entries) {
 		int base_qset = r * 4;
 		const struct sge_ofld_rxq *rx = &s->eohw_rxq[base_qset];
 		const struct sge_eohw_txq *tx = &s->eohw_txq[base_qset];
@@ -2808,10 +2806,71 @@ do { \
 		RL("FLLow:", fl.low);
 		RL("FLStarving:", fl.starving);
 
-		goto unlock;
+		mutex_unlock(&adap->tc_mqprio->mqprio_mutex);
+		goto out;
+	}
+
+	r -= eohw_entries;
+	for (j = 0; j < adap->params.nports; j++) {
+		int entries;
+		u8 tc;
+
+		port_mqprio = &adap->tc_mqprio->port_mqprio[j];
+		entries = 0;
+		for (tc = 0; tc < port_mqprio->mqprio.qopt.num_tc; tc++)
+			entries += port_mqprio->mqprio.qopt.count[tc];
+
+		if (!entries)
+			continue;
+
+		eosw_entries = DIV_ROUND_UP(entries, 4);
+		if (r < eosw_entries) {
+			const struct sge_eosw_txq *tx;
+
+			n = min(4, entries - 4 * r);
+			tx = &port_mqprio->eosw_txq[4 * r];
+
+			S("QType:", "EOSW-TXQ");
+			S("Interface:",
+			  adap->port[j] ? adap->port[j]->name : "N/A");
+			T("EOTID:", hwtid);
+			T("HWQID:", hwqid);
+			T("State:", state);
+			T("Size:", ndesc);
+			T("In-Use:", inuse);
+			T("Credits:", cred);
+			T("Compl:", ncompl);
+			T("Last-Compl:", last_compl);
+			T("PIDX:", pidx);
+			T("Last-PIDX:", last_pidx);
+			T("CIDX:", cidx);
+			T("Last-CIDX:", last_cidx);
+			T("FLOWC-IDX:", flowc_idx);
+
+			mutex_unlock(&adap->tc_mqprio->mqprio_mutex);
+			goto out;
+		}
+
+		r -= eosw_entries;
+	}
+	mutex_unlock(&adap->tc_mqprio->mqprio_mutex);
+
+skip_mqprio:
+	if (!is_uld(adap))
+		goto skip_uld;
+
+	mutex_lock(&uld_mutex);
+	if (s->uld_txq_info)
+		for (i = 0; i < ARRAY_SIZE(uld_txq_entries); i++)
+			uld_txq_entries[i] = sge_qinfo_uld_txq_entries(adap, i);
+
+	if (s->uld_rxq_info) {
+		for (i = 0; i < ARRAY_SIZE(uld_rxq_entries); i++) {
+			uld_rxq_entries[i] = sge_qinfo_uld_rxq_entries(adap, i);
+			uld_ciq_entries[i] = sge_qinfo_uld_ciq_entries(adap, i);
+		}
 	}
 
-	r -= eo_entries;
 	if (r < uld_txq_entries[CXGB4_TX_OFLD]) {
 		const struct sge_uld_txq *tx;
 
@@ -2994,6 +3053,9 @@ do { \
 	}
 
 	r -= uld_txq_entries[CXGB4_TX_CRYPTO];
+	mutex_unlock(&uld_mutex);
+
+skip_uld:
 	if (r < ctrl_entries) {
 		const struct sge_ctrl_txq *tx = &s->ctrlq[r * 4];
 
@@ -3008,7 +3070,7 @@ do { \
 		TL("TxQFull:", q.stops);
 		TL("TxQRestarts:", q.restarts);
 
-		goto unlock;
+		goto out;
 	}
 
 	r -= ctrl_entries;
@@ -3026,11 +3088,9 @@ do { \
 		seq_printf(seq, "%-12s %16u\n", "Intr pktcnt:",
 			   s->counter_val[evtq->pktcnt_idx]);
 
-		goto unlock;
+		goto out;
 	}
 
-unlock:
-	mutex_unlock(&uld_mutex);
 #undef R
 #undef RL
 #undef T
@@ -3039,13 +3099,38 @@ unlock:
 #undef R3
 #undef T3
 #undef S3
+out:
+	return 0;
+
+unlock:
+	mutex_unlock(&uld_mutex);
 	return 0;
 }
 
 static int sge_queue_entries(const struct adapter *adap)
 {
-	int tot_uld_entries = 0;
-	int i;
+	int i, tot_uld_entries = 0, eohw_entries = 0, eosw_entries = 0;
+
+	if (adap->tc_mqprio) {
+		struct cxgb4_tc_port_mqprio *port_mqprio;
+		u8 tc;
+
+		mutex_lock(&adap->tc_mqprio->mqprio_mutex);
+		if (adap->sge.eohw_txq)
+			eohw_entries = DIV_ROUND_UP(adap->sge.eoqsets, 4);
+
+		for (i = 0; i < adap->params.nports; i++) {
+			u32 entries = 0;
+
+			port_mqprio = &adap->tc_mqprio->port_mqprio[i];
+			for (tc = 0; tc < port_mqprio->mqprio.qopt.num_tc; tc++)
+				entries += port_mqprio->mqprio.qopt.count[tc];
+
+			if (entries)
+				eosw_entries += DIV_ROUND_UP(entries, 4);
+		}
+		mutex_unlock(&adap->tc_mqprio->mqprio_mutex);
+	}
 
 	if (!is_uld(adap))
 		goto lld_only;
@@ -3062,8 +3147,7 @@ static int sge_queue_entries(const struct adapter *adap)
 
 lld_only:
 	return DIV_ROUND_UP(adap->sge.ethqsets, 4) +
-	       (adap->sge.eohw_txq ? DIV_ROUND_UP(adap->sge.eoqsets, 4) : 0) +
-	       tot_uld_entries +
+	       eohw_entries + eosw_entries + tot_uld_entries +
 	       DIV_ROUND_UP(MAX_CTRL_QUEUES, 4) + 1;
 }
 
@@ -3244,6 +3328,10 @@ static int tid_info_show(struct seq_file *seq, void *v)
 	if (t->nhpftids)
 		seq_printf(seq, "HPFTID range: %u..%u\n", t->hpftid_base,
 			   t->hpftid_base + t->nhpftids - 1);
+	if (t->neotids)
+		seq_printf(seq, "EOTID range: %u..%u, in use: %u\n",
+			   t->eotid_base, t->eotid_base + t->neotids - 1,
+			   atomic_read(&t->eotids_in_use));
 	if (t->ntids)
 		seq_printf(seq, "HW TID usage: %u IP users, %u IPv6 users\n",
 			   t4_read_reg(adap, LE_DB_ACT_CNT_IPV4_A),
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 196451f8006f..d05c2371d8c7 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -1579,6 +1579,7 @@ static int tid_init(struct tid_info *t)
 	atomic_set(&t->tids_in_use, 0);
 	atomic_set(&t->conns_in_use, 0);
 	atomic_set(&t->hash_tids_in_use, 0);
+	atomic_set(&t->eotids_in_use, 0);
 
 	/* Setup the free list for atid_tab and clear the stid bitmap. */
 	if (natids) {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c
index 56079c9937db..ae7123a9de8e 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.c
@@ -574,6 +574,7 @@ static void cxgb4_mqprio_disable_offload(struct net_device *dev)
 int cxgb4_setup_tc_mqprio(struct net_device *dev,
 			  struct tc_mqprio_qopt_offload *mqprio)
 {
+	struct adapter *adap = netdev2adap(dev);
 	bool needs_bring_up = false;
 	int ret;
 
@@ -581,6 +582,8 @@ int cxgb4_setup_tc_mqprio(struct net_device *dev,
 	if (ret)
 		return ret;
 
+	mutex_lock(&adap->tc_mqprio->mqprio_mutex);
+
 	/* To configure tc params, the current allocated EOTIDs must
 	 * be freed up. However, they can't be freed up if there's
 	 * traffic running on the interface. So, ensure interface is
@@ -616,6 +619,7 @@ out:
 	if (needs_bring_up)
 		cxgb_open(dev);
 
+	mutex_unlock(&adap->tc_mqprio->mqprio_mutex);
 	return ret;
 }
 
@@ -628,6 +632,7 @@ void cxgb4_mqprio_stop_offload(struct adapter *adap)
 	if (!adap->tc_mqprio || !adap->tc_mqprio->port_mqprio)
 		return;
 
+	mutex_lock(&adap->tc_mqprio->mqprio_mutex);
 	for_each_port(adap, i) {
 		dev = adap->port[i];
 		if (!dev)
@@ -639,6 +644,7 @@ void cxgb4_mqprio_stop_offload(struct adapter *adap)
 
 		cxgb4_mqprio_disable_offload(dev);
 	}
+	mutex_unlock(&adap->tc_mqprio->mqprio_mutex);
 }
 
 int cxgb4_init_tc_mqprio(struct adapter *adap)
@@ -660,6 +666,8 @@ int cxgb4_init_tc_mqprio(struct adapter *adap)
 		goto out_free_mqprio;
 	}
 
+	mutex_init(&tc_mqprio->mqprio_mutex);
+
 	tc_mqprio->port_mqprio = tc_port_mqprio;
 	for (i = 0; i < adap->params.nports; i++) {
 		port_mqprio = &tc_mqprio->port_mqprio[i];
@@ -694,6 +702,7 @@ void cxgb4_cleanup_tc_mqprio(struct adapter *adap)
 	u8 i;
 
 	if (adap->tc_mqprio) {
+		mutex_lock(&adap->tc_mqprio->mqprio_mutex);
 		if (adap->tc_mqprio->port_mqprio) {
 			for (i = 0; i < adap->params.nports; i++) {
 				struct net_device *dev = adap->port[i];
@@ -705,6 +714,7 @@ void cxgb4_cleanup_tc_mqprio(struct adapter *adap)
 			}
 			kfree(adap->tc_mqprio->port_mqprio);
 		}
+		mutex_unlock(&adap->tc_mqprio->mqprio_mutex);
 		kfree(adap->tc_mqprio);
 	}
 }
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.h
index ff8794132b22..be96f1dc0372 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_tc_mqprio.h
@@ -33,6 +33,7 @@ struct cxgb4_tc_port_mqprio {
 
 struct cxgb4_tc_mqprio {
 	refcount_t refcnt; /* Refcount for adapter-wide resources */
+	struct mutex mqprio_mutex; /* Lock for accessing MQPRIO info */
 	struct cxgb4_tc_port_mqprio *port_mqprio; /* Per port MQPRIO info */
 };
 
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index be831317520a..16796785eea3 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -147,6 +147,9 @@ struct tid_info {
 	/* TIDs in the HASH */
 	atomic_t hash_tids_in_use;
 	atomic_t conns_in_use;
+	/* ETHOFLD TIDs used for rate limiting */
+	atomic_t eotids_in_use;
+
 	/* lock for setting/clearing filter bitmap */
 	spinlock_t ftid_lock;
 
@@ -221,12 +224,14 @@ static inline void cxgb4_alloc_eotid(struct tid_info *t, u32 eotid, void *data)
 {
 	set_bit(eotid, t->eotid_bmap);
 	t->eotid_tab[eotid].data = data;
+	atomic_inc(&t->eotids_in_use);
 }
 
 static inline void cxgb4_free_eotid(struct tid_info *t, u32 eotid)
 {
 	clear_bit(eotid, t->eotid_bmap);
 	t->eotid_tab[eotid].data = NULL;
+	atomic_dec(&t->eotids_in_use);
 }
 
 int cxgb4_alloc_atid(struct tid_info *t, void *data);
-- 
cgit v1.2.3-59-g8ed1b


From b0ed0bbfb3046ed127f6004b5893ccb6cdd9ba90 Mon Sep 17 00:00:00 2001
From: Kevin Lo <kevlo@kevlo.org>
Date: Sat, 16 May 2020 01:24:47 +0800
Subject: net: phy: broadcom: add support for BCM54811 PHY

The BCM54811 PHY shares many similarities with the already supported BCM54810
PHY but additionally requires some semi-unique configuration.

Signed-off-by: Kevin Lo <kevlo@kevlo.org>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/broadcom.c | 56 +++++++++++++++++++++++++++++++++++++++++-----
 include/linux/brcmphy.h    |  2 ++
 2 files changed, 53 insertions(+), 5 deletions(-)

diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index 97201d5cf007..8cd8d188542a 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -195,7 +195,8 @@ static void bcm54xx_adjust_rxrefclk(struct phy_device *phydev)
 	if (BRCM_PHY_MODEL(phydev) != PHY_ID_BCM57780 &&
 	    BRCM_PHY_MODEL(phydev) != PHY_ID_BCM50610 &&
 	    BRCM_PHY_MODEL(phydev) != PHY_ID_BCM50610M &&
-	    BRCM_PHY_MODEL(phydev) != PHY_ID_BCM54810)
+	    BRCM_PHY_MODEL(phydev) != PHY_ID_BCM54810 &&
+	    BRCM_PHY_MODEL(phydev) != PHY_ID_BCM54811)
 		return;
 
 	val = bcm_phy_read_shadow(phydev, BCM54XX_SHD_SCR3);
@@ -214,8 +215,10 @@ static void bcm54xx_adjust_rxrefclk(struct phy_device *phydev)
 		clk125en = false;
 	} else {
 		if (phydev->dev_flags & PHY_BRCM_RX_REFCLK_UNUSED) {
-			/* Here, bit 0 _enables_ CLK125 when set */
-			val &= ~BCM54XX_SHD_SCR3_DEF_CLK125;
+			if (BRCM_PHY_MODEL(phydev) != PHY_ID_BCM54811) {
+				/* Here, bit 0 _enables_ CLK125 when set */
+				val &= ~BCM54XX_SHD_SCR3_DEF_CLK125;
+			}
 			clk125en = false;
 		}
 	}
@@ -225,8 +228,13 @@ static void bcm54xx_adjust_rxrefclk(struct phy_device *phydev)
 	else
 		val |= BCM54XX_SHD_SCR3_DLLAPD_DIS;
 
-	if (phydev->dev_flags & PHY_BRCM_DIS_TXCRXC_NOENRGY)
-		val |= BCM54XX_SHD_SCR3_TRDDAPD;
+	if (phydev->dev_flags & PHY_BRCM_DIS_TXCRXC_NOENRGY) {
+		if (BRCM_PHY_MODEL(phydev) == PHY_ID_BCM54810 ||
+		    BRCM_PHY_MODEL(phydev) == PHY_ID_BCM54811)
+			val |= BCM54810_SHD_SCR3_TRDDAPD;
+		else
+			val |= BCM54XX_SHD_SCR3_TRDDAPD;
+	}
 
 	if (orig != val)
 		bcm_phy_write_shadow(phydev, BCM54XX_SHD_SCR3, val);
@@ -327,6 +335,32 @@ static int bcm54xx_resume(struct phy_device *phydev)
 	return bcm54xx_config_init(phydev);
 }
 
+static int bcm54811_config_init(struct phy_device *phydev)
+{
+	int err, reg;
+
+	/* Disable BroadR-Reach function. */
+	reg = bcm_phy_read_exp(phydev, BCM54810_EXP_BROADREACH_LRE_MISC_CTL);
+	reg &= ~BCM54810_EXP_BROADREACH_LRE_MISC_CTL_EN;
+	err = bcm_phy_write_exp(phydev, BCM54810_EXP_BROADREACH_LRE_MISC_CTL,
+				reg);
+	if (err < 0)
+		return err;
+
+	err = bcm54xx_config_init(phydev);
+
+	/* Enable CLK125 MUX on LED4 if ref clock is enabled. */
+	if (!(phydev->dev_flags & PHY_BRCM_RX_REFCLK_UNUSED)) {
+		reg = bcm_phy_read_exp(phydev, BCM54612E_EXP_SPARE0);
+		err = bcm_phy_write_exp(phydev, BCM54612E_EXP_SPARE0,
+					BCM54612E_LED4_CLK125OUT_EN | reg);
+		if (err < 0)
+			return err;
+        }
+
+	return err;
+}
+
 static int bcm5482_config_init(struct phy_device *phydev)
 {
 	int err, reg;
@@ -722,6 +756,17 @@ static struct phy_driver broadcom_drivers[] = {
 	.config_intr    = bcm_phy_config_intr,
 	.suspend	= genphy_suspend,
 	.resume		= bcm54xx_resume,
+}, {
+	.phy_id         = PHY_ID_BCM54811,
+	.phy_id_mask    = 0xfffffff0,
+	.name           = "Broadcom BCM54811",
+	/* PHY_GBIT_FEATURES */
+	.config_init    = bcm54811_config_init,
+	.config_aneg    = bcm5481_config_aneg,
+	.ack_interrupt  = bcm_phy_ack_intr,
+	.config_intr    = bcm_phy_config_intr,
+	.suspend	= genphy_suspend,
+	.resume		= bcm54xx_resume,
 }, {
 	.phy_id		= PHY_ID_BCM5482,
 	.phy_id_mask	= 0xfffffff0,
@@ -816,6 +861,7 @@ static struct mdio_device_id __maybe_unused broadcom_tbl[] = {
 	{ PHY_ID_BCM5464, 0xfffffff0 },
 	{ PHY_ID_BCM5481, 0xfffffff0 },
 	{ PHY_ID_BCM54810, 0xfffffff0 },
+	{ PHY_ID_BCM54811, 0xfffffff0 },
 	{ PHY_ID_BCM5482, 0xfffffff0 },
 	{ PHY_ID_BCM50610, 0xfffffff0 },
 	{ PHY_ID_BCM50610M, 0xfffffff0 },
diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h
index d41624db6de2..6ad4c000661a 100644
--- a/include/linux/brcmphy.h
+++ b/include/linux/brcmphy.h
@@ -17,6 +17,7 @@
 #define PHY_ID_BCM5395			0x0143bcf0
 #define PHY_ID_BCM53125			0x03625f20
 #define PHY_ID_BCM54810			0x03625d00
+#define PHY_ID_BCM54811			0x03625cc0
 #define PHY_ID_BCM5482			0x0143bcb0
 #define PHY_ID_BCM5411			0x00206070
 #define PHY_ID_BCM5421			0x002060e0
@@ -255,6 +256,7 @@
 #define BCM54810_EXP_BROADREACH_LRE_MISC_CTL_EN	(1 << 0)
 #define BCM54810_SHD_CLK_CTL			0x3
 #define BCM54810_SHD_CLK_CTL_GTXCLK_EN		(1 << 9)
+#define BCM54810_SHD_SCR3_TRDDAPD		0x0100
 
 /* BCM54612E Registers */
 #define BCM54612E_EXP_SPARE0		(MII_BCM54XX_EXP_SEL_ETC + 0x34)
-- 
cgit v1.2.3-59-g8ed1b


From ca1c933bcee9393d8a83c6be1093471e0c3b655d Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Thu, 14 May 2020 21:42:18 +0200
Subject: net: phy: tja11xx: execute cable test on link up

A typical 100Base-T1 link should be always connected. If the link is in
a shot or open state, it is a failure. In most cases, we won't be able
to automatically handle this issue, but we need to log it or notify user
(if possible).

With this patch, the cable will be tested on "ip l s dev .. up" attempt
and send ethnl notification to the user space.

This patch was tested with TJA1102 PHY and "ethtool --monitor" command.

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/nxp-tja11xx.c | 48 ++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 43 insertions(+), 5 deletions(-)

diff --git a/drivers/net/phy/nxp-tja11xx.c b/drivers/net/phy/nxp-tja11xx.c
index 8b743d25002b..0d4f9067ca71 100644
--- a/drivers/net/phy/nxp-tja11xx.c
+++ b/drivers/net/phy/nxp-tja11xx.c
@@ -180,10 +180,43 @@ static int tja11xx_soft_reset(struct phy_device *phydev)
 	return genphy_soft_reset(phydev);
 }
 
+static int tja11xx_config_aneg_cable_test(struct phy_device *phydev)
+{
+	bool finished = false;
+	int ret;
+
+	if (phydev->link)
+		return 0;
+
+	if (!phydev->drv->cable_test_start ||
+	    !phydev->drv->cable_test_get_status)
+		return 0;
+
+	ret = ethnl_cable_test_alloc(phydev);
+	if (ret)
+		return ret;
+
+	ret = phydev->drv->cable_test_start(phydev);
+	if (ret)
+		return ret;
+
+	/* According to the documentation this test takes 100 usec */
+	usleep_range(100, 200);
+
+	ret = phydev->drv->cable_test_get_status(phydev, &finished);
+	if (ret)
+		return ret;
+
+	if (finished)
+		ethnl_cable_test_finished(phydev);
+
+	return 0;
+}
+
 static int tja11xx_config_aneg(struct phy_device *phydev)
 {
+	int ret, changed = 0;
 	u16 ctl = 0;
-	int ret;
 
 	switch (phydev->master_slave_set) {
 	case MASTER_SLAVE_CFG_MASTER_FORCE:
@@ -193,17 +226,22 @@ static int tja11xx_config_aneg(struct phy_device *phydev)
 		break;
 	case MASTER_SLAVE_CFG_UNKNOWN:
 	case MASTER_SLAVE_CFG_UNSUPPORTED:
-		return 0;
+		goto do_test;
 	default:
 		phydev_warn(phydev, "Unsupported Master/Slave mode\n");
 		return -ENOTSUPP;
 	}
 
-	ret = phy_modify_changed(phydev, MII_CFG1, MII_CFG1_MASTER_SLAVE, ctl);
-	if (ret < 0)
+	changed = phy_modify_changed(phydev, MII_CFG1, MII_CFG1_MASTER_SLAVE, ctl);
+	if (changed < 0)
+		return changed;
+
+do_test:
+	ret = tja11xx_config_aneg_cable_test(phydev);
+	if (ret)
 		return ret;
 
-	return __genphy_config_aneg(phydev, ret);
+	return __genphy_config_aneg(phydev, changed);
 }
 
 static int tja11xx_config_init(struct phy_device *phydev)
-- 
cgit v1.2.3-59-g8ed1b


From 90bf45134d55d626ae2713cac50cda10c6c8b0c2 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Fri, 15 May 2020 19:22:15 +0200
Subject: mptcp: add new sock flag to deal with join subflows

MP_JOIN subflows must not land into the accept queue.
Currently tcp_check_req() calls an mptcp specific helper
to detect such scenario.

Such helper leverages the subflow context to check for
MP_JOIN subflows. We need to deal also with MP JOIN
failures, even when the subflow context is not available
due allocation failure.

A possible solution would be changing the syn_recv_sock()
signature to allow returning a more descriptive action/
error code and deal with that in tcp_check_req().

Since the above need is MPTCP specific, this patch instead
uses a TCP request socket hole to add a MPTCP specific flag.
Such flag is used by the MPTCP syn_recv_sock() to tell
tcp_check_req() how to deal with the request socket.

This change is a no-op for !MPTCP build, and makes the
MPTCP code simpler. It allows also the next patch to deal
correctly with MP JOIN failure.

v1 -> v2:
 - be more conservative on drop_req initialization (Mat)

RFC -> v1:
 - move the drop_req bit inside tcp_request_sock (Eric)

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Reviewed-by: Christoph Paasch <cpaasch@apple.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h      |  3 +++
 include/net/mptcp.h      | 17 ++++++++++-------
 net/ipv4/tcp_minisocks.c |  2 +-
 net/mptcp/protocol.c     |  7 -------
 net/mptcp/subflow.c      |  3 +++
 5 files changed, 17 insertions(+), 15 deletions(-)

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index e60db06ec28d..bf44e85d709d 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -120,6 +120,9 @@ struct tcp_request_sock {
 	u64				snt_synack; /* first SYNACK sent time */
 	bool				tfo_listener;
 	bool				is_mptcp;
+#if IS_ENABLED(CONFIG_MPTCP)
+	bool				drop_req;
+#endif
 	u32				txhash;
 	u32				rcv_isn;
 	u32				snt_isn;
diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index e60275659de6..c4a6ef4ba35b 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -68,6 +68,11 @@ static inline bool rsk_is_mptcp(const struct request_sock *req)
 	return tcp_rsk(req)->is_mptcp;
 }
 
+static inline bool rsk_drop_req(const struct request_sock *req)
+{
+	return tcp_rsk(req)->is_mptcp && tcp_rsk(req)->drop_req;
+}
+
 void mptcp_space(const struct sock *ssk, int *space, int *full_space);
 bool mptcp_syn_options(struct sock *sk, const struct sk_buff *skb,
 		       unsigned int *size, struct mptcp_out_options *opts);
@@ -121,8 +126,6 @@ static inline bool mptcp_skb_can_collapse(const struct sk_buff *to,
 				 skb_ext_find(from, SKB_EXT_MPTCP));
 }
 
-bool mptcp_sk_is_subflow(const struct sock *sk);
-
 void mptcp_seq_show(struct seq_file *seq);
 #else
 
@@ -140,6 +143,11 @@ static inline bool rsk_is_mptcp(const struct request_sock *req)
 	return false;
 }
 
+static inline bool rsk_drop_req(const struct request_sock *req)
+{
+	return false;
+}
+
 static inline void mptcp_parse_option(const struct sk_buff *skb,
 				      const unsigned char *ptr, int opsize,
 				      struct tcp_options_received *opt_rx)
@@ -190,11 +198,6 @@ static inline bool mptcp_skb_can_collapse(const struct sk_buff *to,
 	return true;
 }
 
-static inline bool mptcp_sk_is_subflow(const struct sock *sk)
-{
-	return false;
-}
-
 static inline void mptcp_space(const struct sock *ssk, int *s, int *fs) { }
 static inline void mptcp_seq_show(struct seq_file *seq) { }
 #endif /* CONFIG_MPTCP */
diff --git a/net/ipv4/tcp_minisocks.c b/net/ipv4/tcp_minisocks.c
index 7e40322cc5ec..495dda2449fe 100644
--- a/net/ipv4/tcp_minisocks.c
+++ b/net/ipv4/tcp_minisocks.c
@@ -774,7 +774,7 @@ struct sock *tcp_check_req(struct sock *sk, struct sk_buff *skb,
 	if (!child)
 		goto listen_overflow;
 
-	if (own_req && sk_is_mptcp(child) && mptcp_sk_is_subflow(child)) {
+	if (own_req && rsk_drop_req(req)) {
 		reqsk_queue_removed(&inet_csk(sk)->icsk_accept_queue, req);
 		inet_csk_reqsk_queue_drop_and_put(sk, req);
 		return child;
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index e1f23016ed3f..a61e60e94137 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -1638,13 +1638,6 @@ bool mptcp_finish_join(struct sock *sk)
 	return ret;
 }
 
-bool mptcp_sk_is_subflow(const struct sock *sk)
-{
-	struct mptcp_subflow_context *subflow = mptcp_subflow_ctx(sk);
-
-	return subflow->mp_join == 1;
-}
-
 static bool mptcp_memory_free(const struct sock *sk, int wake)
 {
 	struct mptcp_sock *msk = mptcp_sk(sk);
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 009d5c478062..5e03ed8ae899 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -470,6 +470,8 @@ create_child:
 	if (child && *own_req) {
 		struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(child);
 
+		tcp_rsk(req)->drop_req = false;
+
 		/* we need to fallback on ctx allocation failure and on pre-reqs
 		 * checking above. In the latter scenario we additionally need
 		 * to reset the context to non MPTCP status.
@@ -512,6 +514,7 @@ create_child:
 				goto close_child;
 
 			SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKRX);
+			tcp_rsk(req)->drop_req = true;
 		}
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 2f8a397d0a54b59c05e481523ab2a88a63d82d18 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Fri, 15 May 2020 19:22:16 +0200
Subject: inet_connection_sock: factor out destroy helper.

Move the steps to prepare an inet_connection_sock for
forced disposal inside a separate helper. No functional
changes inteded, this will just simplify the next patch.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Christoph Paasch <cpaasch@apple.com>
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/inet_connection_sock.h | 8 ++++++++
 net/ipv4/inet_connection_sock.c    | 6 +-----
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/include/net/inet_connection_sock.h b/include/net/inet_connection_sock.h
index a3f076befa4f..2f1f8c3efb26 100644
--- a/include/net/inet_connection_sock.h
+++ b/include/net/inet_connection_sock.h
@@ -287,6 +287,14 @@ static inline int inet_csk_reqsk_queue_is_full(const struct sock *sk)
 void inet_csk_reqsk_queue_drop(struct sock *sk, struct request_sock *req);
 void inet_csk_reqsk_queue_drop_and_put(struct sock *sk, struct request_sock *req);
 
+static inline void inet_csk_prepare_for_destroy_sock(struct sock *sk)
+{
+	/* The below has to be done to allow calling inet_csk_destroy_sock */
+	sock_set_flag(sk, SOCK_DEAD);
+	percpu_counter_inc(sk->sk_prot->orphan_count);
+	inet_sk(sk)->inet_num = 0;
+}
+
 void inet_csk_destroy_sock(struct sock *sk);
 void inet_csk_prepare_forced_close(struct sock *sk);
 
diff --git a/net/ipv4/inet_connection_sock.c b/net/ipv4/inet_connection_sock.c
index 5f34eb951627..d6faf3702824 100644
--- a/net/ipv4/inet_connection_sock.c
+++ b/net/ipv4/inet_connection_sock.c
@@ -896,11 +896,7 @@ void inet_csk_prepare_forced_close(struct sock *sk)
 	/* sk_clone_lock locked the socket and set refcnt to 2 */
 	bh_unlock_sock(sk);
 	sock_put(sk);
-
-	/* The below has to be done to allow calling inet_csk_destroy_sock */
-	sock_set_flag(sk, SOCK_DEAD);
-	percpu_counter_inc(sk->sk_prot->orphan_count);
-	inet_sk(sk)->inet_num = 0;
+	inet_csk_prepare_for_destroy_sock(sk);
 }
 EXPORT_SYMBOL(inet_csk_prepare_forced_close);
 
-- 
cgit v1.2.3-59-g8ed1b


From 729cd6436f359b6e618c2f14836d419f40444503 Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Fri, 15 May 2020 19:22:17 +0200
Subject: mptcp: cope better with MP_JOIN failure

Currently, on MP_JOIN failure we reset the child
socket, but leave the request socket untouched.

tcp_check_req will deal with it according to the
'tcp_abort_on_overflow' sysctl value - by default the
req socket will stay alive.

The above leads to inconsistent behavior on MP JOIN
failure, and bad listener overflow accounting.

This patch addresses the issue leveraging the infrastructure
just introduced to ask the TCP stack to drop the req on
failure.

The child socket is not freed anymore by subflow_syn_recv_sock(),
instead it's moved to a dead state and will be disposed by the
next sock_put done by the TCP stack, so that listener overflow
accounting is not affected by MP JOIN failure.

Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Reviewed-by: Christoph Paasch <cpaasch@apple.com>
Reviewed-by: Mat Martineau <mathew.j.martineau@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/subflow.c | 15 +++++++++------
 1 file changed, 9 insertions(+), 6 deletions(-)

diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 5e03ed8ae899..3cf2eeea9d80 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -478,7 +478,7 @@ create_child:
 		 */
 		if (!ctx || fallback) {
 			if (fallback_is_fatal)
-				goto close_child;
+				goto dispose_child;
 
 			if (ctx) {
 				subflow_ulp_fallback(child, ctx);
@@ -507,11 +507,11 @@ create_child:
 
 			owner = mptcp_token_get_sock(ctx->token);
 			if (!owner)
-				goto close_child;
+				goto dispose_child;
 
 			ctx->conn = (struct sock *)owner;
 			if (!mptcp_finish_join(child))
-				goto close_child;
+				goto dispose_child;
 
 			SUBFLOW_REQ_INC_STATS(req, MPTCP_MIB_JOINACKRX);
 			tcp_rsk(req)->drop_req = true;
@@ -531,11 +531,14 @@ out:
 		      !mptcp_subflow_ctx(child)->conn));
 	return child;
 
-close_child:
+dispose_child:
+	tcp_rsk(req)->drop_req = true;
 	tcp_send_active_reset(child, GFP_ATOMIC);
-	inet_csk_prepare_forced_close(child);
+	inet_csk_prepare_for_destroy_sock(child);
 	tcp_done(child);
-	return NULL;
+
+	/* The last child reference will be released by the caller */
+	return child;
 }
 
 static struct inet_connection_sock_af_ops subflow_specific;
-- 
cgit v1.2.3-59-g8ed1b


From 2726cd4a29280c20ea983be285a6aefe75b205a4 Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Sun, 3 May 2020 10:15:58 +0300
Subject: net/mlx5: Dedicate fw page to the requesting function

The cited patch assumes that all chuncks in a fw page belong to the same
function, thus the driver must dedicate fw page to the requesting
function, which is actually what was intedned in the original fw pages
allocator design, hence the fwp->func_id !

Up until the cited patch everything worked ok, but now "relase all pages"
is broken on systems with page_size > 4k.

Fix this by dedicating fw page to the requesting function id via adding a
func_id parameter to alloc_4k() function.

Fixes: c6168161f693 ("net/mlx5: Add support for release all pages event")
Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index 8ce78f42dfc0..84f6356edbf8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -156,15 +156,21 @@ static int mlx5_cmd_query_pages(struct mlx5_core_dev *dev, u16 *func_id,
 	return err;
 }
 
-static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr)
+static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr, u16 func_id)
 {
-	struct fw_page *fp;
+	struct fw_page *fp = NULL;
+	struct fw_page *iter;
 	unsigned n;
 
-	if (list_empty(&dev->priv.free_list))
+	list_for_each_entry(iter, &dev->priv.free_list, list) {
+		if (iter->func_id != func_id)
+			continue;
+		fp = iter;
+	}
+
+	if (list_empty(&dev->priv.free_list) || !fp)
 		return -ENOMEM;
 
-	fp = list_entry(dev->priv.free_list.next, struct fw_page, list);
 	n = find_first_bit(&fp->bitmask, 8 * sizeof(fp->bitmask));
 	if (n >= MLX5_NUM_4K_IN_PAGE) {
 		mlx5_core_warn(dev, "alloc 4k bug\n");
@@ -295,7 +301,7 @@ static int give_pages(struct mlx5_core_dev *dev, u16 func_id, int npages,
 
 	for (i = 0; i < npages; i++) {
 retry:
-		err = alloc_4k(dev, &addr);
+		err = alloc_4k(dev, &addr, func_id);
 		if (err) {
 			if (err == -ENOMEM)
 				err = alloc_system_page(dev, func_id);
-- 
cgit v1.2.3-59-g8ed1b


From e7f860e2106a4c288d98308c59545ddd350e4739 Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Sun, 3 May 2020 11:01:39 +0300
Subject: net/mlx5: Fix a bug of releasing wrong chunks on > 4K page size
 systems

On systems with page size larger than 4K, a fwp object has few 4K chunks.
Fix a bug in fwp free flow where the chunk address was dropped and
fwp->addr was used instead (first chunk address). This caused a wrong
update of fwp->bitmask which later can cause errors in re-alloc fwp
chunk flow.

In order to fix this it, re-factor the release flow:
- Free 4k: Releases a specific 4k chunk inside the fwp, defined by
  starting address.
- Free fwp: Unconditionally release the whole fwp and its resources.
Free addr will call free fwp if all chunks were released, in order to do
code sharing.

In addition, fix npages to count for all released chunks correctly.

Fixes: c6168161f693 ("net/mlx5: Add support for release all pages event")
Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/pagealloc.c    | 44 +++++++++++-----------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
index 84f6356edbf8..5ddd18639a1e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/pagealloc.c
@@ -188,35 +188,35 @@ static int alloc_4k(struct mlx5_core_dev *dev, u64 *addr, u16 func_id)
 
 #define MLX5_U64_4K_PAGE_MASK ((~(u64)0U) << PAGE_SHIFT)
 
-static void free_fwp(struct mlx5_core_dev *dev, struct fw_page *fwp)
+static void free_fwp(struct mlx5_core_dev *dev, struct fw_page *fwp,
+		     bool in_free_list)
 {
-	int n = (fwp->addr & ~MLX5_U64_4K_PAGE_MASK) >> MLX5_ADAPTER_PAGE_SHIFT;
-
-	fwp->free_count++;
-	set_bit(n, &fwp->bitmask);
-	if (fwp->free_count == MLX5_NUM_4K_IN_PAGE) {
-		rb_erase(&fwp->rb_node, &dev->priv.page_root);
-		if (fwp->free_count != 1)
-			list_del(&fwp->list);
-		dma_unmap_page(dev->device, fwp->addr & MLX5_U64_4K_PAGE_MASK,
-			       PAGE_SIZE, DMA_BIDIRECTIONAL);
-		__free_page(fwp->page);
-		kfree(fwp);
-	} else if (fwp->free_count == 1) {
-		list_add(&fwp->list, &dev->priv.free_list);
-	}
+	rb_erase(&fwp->rb_node, &dev->priv.page_root);
+	if (in_free_list)
+		list_del(&fwp->list);
+	dma_unmap_page(dev->device, fwp->addr & MLX5_U64_4K_PAGE_MASK,
+		       PAGE_SIZE, DMA_BIDIRECTIONAL);
+	__free_page(fwp->page);
+	kfree(fwp);
 }
 
-static void free_addr(struct mlx5_core_dev *dev, u64 addr)
+static void free_4k(struct mlx5_core_dev *dev, u64 addr)
 {
 	struct fw_page *fwp;
+	int n;
 
 	fwp = find_fw_page(dev, addr & MLX5_U64_4K_PAGE_MASK);
 	if (!fwp) {
 		mlx5_core_warn_rl(dev, "page not found\n");
 		return;
 	}
-	free_fwp(dev, fwp);
+	n = (addr & ~MLX5_U64_4K_PAGE_MASK) >> MLX5_ADAPTER_PAGE_SHIFT;
+	fwp->free_count++;
+	set_bit(n, &fwp->bitmask);
+	if (fwp->free_count == MLX5_NUM_4K_IN_PAGE)
+		free_fwp(dev, fwp, fwp->free_count != 1);
+	else if (fwp->free_count == 1)
+		list_add(&fwp->list, &dev->priv.free_list);
 }
 
 static int alloc_system_page(struct mlx5_core_dev *dev, u16 func_id)
@@ -340,7 +340,7 @@ retry:
 
 out_4k:
 	for (i--; i >= 0; i--)
-		free_addr(dev, MLX5_GET64(manage_pages_in, in, pas[i]));
+		free_4k(dev, MLX5_GET64(manage_pages_in, in, pas[i]));
 out_free:
 	kvfree(in);
 	if (notify_fail)
@@ -361,8 +361,8 @@ static void release_all_pages(struct mlx5_core_dev *dev, u32 func_id,
 		p = rb_next(p);
 		if (fwp->func_id != func_id)
 			continue;
-		free_fwp(dev, fwp);
-		npages++;
+		npages += (MLX5_NUM_4K_IN_PAGE - fwp->free_count);
+		free_fwp(dev, fwp, fwp->free_count);
 	}
 
 	dev->priv.fw_pages -= npages;
@@ -446,7 +446,7 @@ static int reclaim_pages(struct mlx5_core_dev *dev, u32 func_id, int npages,
 	}
 
 	for (i = 0; i < num_claimed; i++)
-		free_addr(dev, MLX5_GET64(manage_pages_out, out, pas[i]));
+		free_4k(dev, MLX5_GET64(manage_pages_out, out, pas[i]));
 
 	if (nclaimed)
 		*nclaimed = num_claimed;
-- 
cgit v1.2.3-59-g8ed1b


From 4162f58b476b248d0718f3d6aad2e57da2b08a63 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@mellanox.com>
Date: Fri, 1 May 2020 10:20:01 -0500
Subject: net/mlx5: Have single error unwinding path

Having multiple error unwinding path are error prone.
Lets have just one error unwinding path.

Signed-off-by: Parav Pandit <parav@mellanox.com>
Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 742ba012c234..2e128068a48c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1217,10 +1217,9 @@ int mlx5_load_one(struct mlx5_core_dev *dev, bool boot)
 		mlx5_register_device(dev);
 
 	set_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state);
-out:
-	mutex_unlock(&dev->intf_state_mutex);
 
-	return err;
+	mutex_unlock(&dev->intf_state_mutex);
+	return 0;
 
 err_devlink_reg:
 	mlx5_unload(dev);
@@ -1230,8 +1229,8 @@ err_load:
 function_teardown:
 	mlx5_function_teardown(dev, boot);
 	dev->state = MLX5_DEVICE_STATE_INTERNAL_ERROR;
+out:
 	mutex_unlock(&dev->intf_state_mutex);
-
 	return err;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 41798df9bfca5aae080a66252ae1709867e28757 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@mellanox.com>
Date: Fri, 1 May 2020 09:42:45 -0500
Subject: net/mlx5: Drain wq first during PCI device removal

mlx5_unload_one() is done with cleanup = true only once.

So instead of doing health wq drain inside the if(), directly do
during PCI device removal.

Signed-off-by: Parav Pandit <parav@mellanox.com>
Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 2e128068a48c..d6a8128f667a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1236,10 +1236,8 @@ out:
 
 void mlx5_unload_one(struct mlx5_core_dev *dev, bool cleanup)
 {
-	if (cleanup) {
+	if (cleanup)
 		mlx5_unregister_device(dev);
-		mlx5_drain_health_wq(dev);
-	}
 
 	mutex_lock(&dev->intf_state_mutex);
 	if (!test_bit(MLX5_INTERFACE_STATE_UP, &dev->intf_state)) {
@@ -1382,6 +1380,7 @@ static void remove_one(struct pci_dev *pdev)
 	mlx5_crdump_disable(dev);
 	mlx5_devlink_unregister(devlink);
 
+	mlx5_drain_health_wq(dev);
 	mlx5_unload_one(dev, true);
 	mlx5_pci_close(dev);
 	mlx5_mdev_uninit(dev);
-- 
cgit v1.2.3-59-g8ed1b


From 49c0355d301b4e0e01e0f19ddbb023bd7d0ee48c Mon Sep 17 00:00:00 2001
From: Paul Blakey <paulb@mellanox.com>
Date: Thu, 7 May 2020 12:01:39 +0300
Subject: net/mlx5: Wait for inactive autogroups

Currently, if one thread tries to add an entry to an autogrouped table
with no free matching group, while another thread is in the process of
creating a new matching autogroup, it doesn't wait for the new group
creation, and creates an unnecessary new autogroup.

Instead of skipping inactive, wait on the write lock of those groups.

Signed-off-by: Paul Blakey <paulb@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
Reviewed-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/fs_core.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
index 2da45e9b9b6d..52af6023a4b4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_core.c
@@ -1755,11 +1755,13 @@ skip_search:
 	list_for_each_entry(iter, match_head, list) {
 		g = iter->g;
 
-		if (!g->node.active)
-			continue;
-
 		nested_down_write_ref_node(&g->node, FS_LOCK_PARENT);
 
+		if (!g->node.active) {
+			up_write_ref_node(&g->node, false);
+			continue;
+		}
+
 		err = insert_fte(g, fte);
 		if (err) {
 			up_write_ref_node(&g->node, false);
-- 
cgit v1.2.3-59-g8ed1b


From 90bf1c8dbdc52d08a18c9c1fb2b847999159e3a2 Mon Sep 17 00:00:00 2001
From: Eran Ben Elisha <eranbe@mellanox.com>
Date: Thu, 7 May 2020 14:13:33 +0300
Subject: net/mlx5: Move internal timer read function to clock library

Move mlx5_read_internal_timer() into lib/clock.c file as it is being
used there. As such, make this function a static one.

In addition, rearrange headers include to support function move.

Signed-off-by: Eran Ben Elisha <eranbe@mellanox.com>
Reviewed-by: Aya Levin <ayal@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h        |  1 -
 drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c | 21 +++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/main.c      | 20 --------------------
 drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h |  3 ---
 4 files changed, 21 insertions(+), 24 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 26911b15f8fe..195162b9b245 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -36,7 +36,6 @@
 #include <linux/etherdevice.h>
 #include <linux/timecounter.h>
 #include <linux/net_tstamp.h>
-#include <linux/ptp_clock_kernel.h>
 #include <linux/crash_dump.h>
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/qp.h>
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
index 43f97601b500..ef0706d15a5b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/clock.c
@@ -32,6 +32,7 @@
 
 #include <linux/clocksource.h>
 #include <linux/highmem.h>
+#include <linux/ptp_clock_kernel.h>
 #include <rdma/mlx5-abi.h>
 #include "lib/eq.h"
 #include "en.h"
@@ -66,6 +67,26 @@ enum {
 	MLX5_MTPPS_FS_ENH_OUT_PER_ADJ		= BIT(0x7),
 };
 
+static u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev,
+				    struct ptp_system_timestamp *sts)
+{
+	u32 timer_h, timer_h1, timer_l;
+
+	timer_h = ioread32be(&dev->iseg->internal_timer_h);
+	ptp_read_system_prets(sts);
+	timer_l = ioread32be(&dev->iseg->internal_timer_l);
+	ptp_read_system_postts(sts);
+	timer_h1 = ioread32be(&dev->iseg->internal_timer_h);
+	if (timer_h != timer_h1) {
+		/* wrap around */
+		ptp_read_system_prets(sts);
+		timer_l = ioread32be(&dev->iseg->internal_timer_l);
+		ptp_read_system_postts(sts);
+	}
+
+	return (u64)timer_l | (u64)timer_h1 << 32;
+}
+
 static u64 read_internal_timer(const struct cyclecounter *cc)
 {
 	struct mlx5_clock *clock = container_of(cc, struct mlx5_clock, cycles);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index d6a8128f667a..4d2e1e982460 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -672,26 +672,6 @@ int mlx5_core_disable_hca(struct mlx5_core_dev *dev, u16 func_id)
 	return mlx5_cmd_exec_in(dev, disable_hca, in);
 }
 
-u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev,
-			     struct ptp_system_timestamp *sts)
-{
-	u32 timer_h, timer_h1, timer_l;
-
-	timer_h = ioread32be(&dev->iseg->internal_timer_h);
-	ptp_read_system_prets(sts);
-	timer_l = ioread32be(&dev->iseg->internal_timer_l);
-	ptp_read_system_postts(sts);
-	timer_h1 = ioread32be(&dev->iseg->internal_timer_h);
-	if (timer_h != timer_h1) {
-		/* wrap around */
-		ptp_read_system_prets(sts);
-		timer_l = ioread32be(&dev->iseg->internal_timer_l);
-		ptp_read_system_postts(sts);
-	}
-
-	return (u64)timer_l | (u64)timer_h1 << 32;
-}
-
 static int mlx5_core_set_issi(struct mlx5_core_dev *dev)
 {
 	u32 query_out[MLX5_ST_SZ_DW(query_issi_out)] = {};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
index a8fb43a85d1d..fc1649dac11b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/mlx5_core.h
@@ -38,7 +38,6 @@
 #include <linux/sched.h>
 #include <linux/if_link.h>
 #include <linux/firmware.h>
-#include <linux/ptp_clock_kernel.h>
 #include <linux/mlx5/cq.h>
 #include <linux/mlx5/fs.h>
 #include <linux/mlx5/driver.h>
@@ -141,8 +140,6 @@ int mlx5_modify_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
 int mlx5_destroy_scheduling_element_cmd(struct mlx5_core_dev *dev, u8 hierarchy,
 					u32 element_id);
 int mlx5_wait_for_pages(struct mlx5_core_dev *dev, int *pages);
-u64 mlx5_read_internal_timer(struct mlx5_core_dev *dev,
-			     struct ptp_system_timestamp *sts);
 
 void mlx5_cmd_trigger_completions(struct mlx5_core_dev *dev);
 void mlx5_cmd_flush(struct mlx5_core_dev *dev);
-- 
cgit v1.2.3-59-g8ed1b


From 9102d836d296fbc94517736d2dd1131ad6b01740 Mon Sep 17 00:00:00 2001
From: Roi Dayan <roid@mellanox.com>
Date: Sun, 12 Apr 2020 15:39:15 +0300
Subject: net/mlx5e: CT: Fix offload with CT action after CT NAT action

It could be a chain of rules will do action CT again after CT NAT
Before this fix matching will break as we get into the CT table
after NAT changes and not CT NAT.
Fix this by adding pre ct and pre ct nat tables to skip ct/ct_nat
tables and go straight to post_ct table if ct/nat was already done.

Signed-off-by: Roi Dayan <roid@mellanox.com>
Reviewed-by: Paul Blakey <paulb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c | 308 +++++++++++++++++++--
 1 file changed, 286 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index 5568ded97e0b..98263f00ee43 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -24,6 +24,7 @@
 #define MLX5_CT_ZONE_MASK GENMASK(MLX5_CT_ZONE_BITS - 1, 0)
 #define MLX5_CT_STATE_ESTABLISHED_BIT BIT(1)
 #define MLX5_CT_STATE_TRK_BIT BIT(2)
+#define MLX5_CT_STATE_NAT_BIT BIT(3)
 
 #define MLX5_FTE_ID_BITS (mlx5e_tc_attr_to_reg_mappings[FTEID_TO_REG].mlen * 8)
 #define MLX5_FTE_ID_MAX GENMASK(MLX5_FTE_ID_BITS - 1, 0)
@@ -61,6 +62,15 @@ struct mlx5_ct_zone_rule {
 	bool nat;
 };
 
+struct mlx5_tc_ct_pre {
+	struct mlx5_flow_table *fdb;
+	struct mlx5_flow_group *flow_grp;
+	struct mlx5_flow_group *miss_grp;
+	struct mlx5_flow_handle *flow_rule;
+	struct mlx5_flow_handle *miss_rule;
+	struct mlx5_modify_hdr *modify_hdr;
+};
+
 struct mlx5_ct_ft {
 	struct rhash_head node;
 	u16 zone;
@@ -68,6 +78,8 @@ struct mlx5_ct_ft {
 	struct nf_flowtable *nf_ft;
 	struct mlx5_tc_ct_priv *ct_priv;
 	struct rhashtable ct_entries_ht;
+	struct mlx5_tc_ct_pre pre_ct;
+	struct mlx5_tc_ct_pre pre_ct_nat;
 };
 
 struct mlx5_ct_entry {
@@ -426,6 +438,7 @@ mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
 	struct mlx5_eswitch *esw = ct_priv->esw;
 	struct mlx5_modify_hdr *mod_hdr;
 	struct flow_action_entry *meta;
+	u16 ct_state = 0;
 	int err;
 
 	meta = mlx5_tc_ct_get_ct_metadata_action(flow_rule);
@@ -444,11 +457,13 @@ mlx5_tc_ct_entry_create_mod_hdr(struct mlx5_tc_ct_priv *ct_priv,
 						  &mod_acts);
 		if (err)
 			goto err_mapping;
+
+		ct_state |= MLX5_CT_STATE_NAT_BIT;
 	}
 
+	ct_state |= MLX5_CT_STATE_ESTABLISHED_BIT | MLX5_CT_STATE_TRK_BIT;
 	err = mlx5_tc_ct_entry_set_registers(ct_priv, &mod_acts,
-					     (MLX5_CT_STATE_ESTABLISHED_BIT |
-					      MLX5_CT_STATE_TRK_BIT),
+					     ct_state,
 					     meta->ct_metadata.mark,
 					     meta->ct_metadata.labels[0],
 					     tupleid);
@@ -791,6 +806,238 @@ mlx5_tc_ct_parse_action(struct mlx5e_priv *priv,
 	return 0;
 }
 
+static int tc_ct_pre_ct_add_rules(struct mlx5_ct_ft *ct_ft,
+				  struct mlx5_tc_ct_pre *pre_ct,
+				  bool nat)
+{
+	struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
+	struct mlx5e_tc_mod_hdr_acts pre_mod_acts = {};
+	struct mlx5_core_dev *dev = ct_priv->esw->dev;
+	struct mlx5_flow_table *fdb = pre_ct->fdb;
+	struct mlx5_flow_destination dest = {};
+	struct mlx5_flow_act flow_act = {};
+	struct mlx5_modify_hdr *mod_hdr;
+	struct mlx5_flow_handle *rule;
+	struct mlx5_flow_spec *spec;
+	u32 ctstate;
+	u16 zone;
+	int err;
+
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+	if (!spec)
+		return -ENOMEM;
+
+	zone = ct_ft->zone & MLX5_CT_ZONE_MASK;
+	err = mlx5e_tc_match_to_reg_set(dev, &pre_mod_acts, ZONE_TO_REG, zone);
+	if (err) {
+		ct_dbg("Failed to set zone register mapping");
+		goto err_mapping;
+	}
+
+	mod_hdr = mlx5_modify_header_alloc(dev,
+					   MLX5_FLOW_NAMESPACE_FDB,
+					   pre_mod_acts.num_actions,
+					   pre_mod_acts.actions);
+
+	if (IS_ERR(mod_hdr)) {
+		err = PTR_ERR(mod_hdr);
+		ct_dbg("Failed to create pre ct mod hdr");
+		goto err_mapping;
+	}
+	pre_ct->modify_hdr = mod_hdr;
+
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST |
+			  MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+	flow_act.flags |= FLOW_ACT_IGNORE_FLOW_LEVEL;
+	flow_act.modify_hdr = mod_hdr;
+	dest.type = MLX5_FLOW_DESTINATION_TYPE_FLOW_TABLE;
+
+	/* add flow rule */
+	mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
+				    zone, MLX5_CT_ZONE_MASK);
+	ctstate = MLX5_CT_STATE_TRK_BIT;
+	if (nat)
+		ctstate |= MLX5_CT_STATE_NAT_BIT;
+	mlx5e_tc_match_to_reg_match(spec, CTSTATE_TO_REG, ctstate, ctstate);
+
+	dest.ft = ct_priv->post_ct;
+	rule = mlx5_add_flow_rules(fdb, spec, &flow_act, &dest, 1);
+	if (IS_ERR(rule)) {
+		err = PTR_ERR(rule);
+		ct_dbg("Failed to add pre ct flow rule zone %d", zone);
+		goto err_flow_rule;
+	}
+	pre_ct->flow_rule = rule;
+
+	/* add miss rule */
+	memset(spec, 0, sizeof(*spec));
+	dest.ft = nat ? ct_priv->ct_nat : ct_priv->ct;
+	rule = mlx5_add_flow_rules(fdb, spec, &flow_act, &dest, 1);
+	if (IS_ERR(rule)) {
+		err = PTR_ERR(rule);
+		ct_dbg("Failed to add pre ct miss rule zone %d", zone);
+		goto err_miss_rule;
+	}
+	pre_ct->miss_rule = rule;
+
+	dealloc_mod_hdr_actions(&pre_mod_acts);
+	kvfree(spec);
+	return 0;
+
+err_miss_rule:
+	mlx5_del_flow_rules(pre_ct->flow_rule);
+err_flow_rule:
+	mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
+err_mapping:
+	dealloc_mod_hdr_actions(&pre_mod_acts);
+	kvfree(spec);
+	return err;
+}
+
+static void
+tc_ct_pre_ct_del_rules(struct mlx5_ct_ft *ct_ft,
+		       struct mlx5_tc_ct_pre *pre_ct)
+{
+	struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
+	struct mlx5_core_dev *dev = ct_priv->esw->dev;
+
+	mlx5_del_flow_rules(pre_ct->flow_rule);
+	mlx5_del_flow_rules(pre_ct->miss_rule);
+	mlx5_modify_header_dealloc(dev, pre_ct->modify_hdr);
+}
+
+static int
+mlx5_tc_ct_alloc_pre_ct(struct mlx5_ct_ft *ct_ft,
+			struct mlx5_tc_ct_pre *pre_ct,
+			bool nat)
+{
+	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+	struct mlx5_tc_ct_priv *ct_priv = ct_ft->ct_priv;
+	struct mlx5_core_dev *dev = ct_priv->esw->dev;
+	struct mlx5_flow_table_attr ft_attr = {};
+	struct mlx5_flow_namespace *ns;
+	struct mlx5_flow_table *ft;
+	struct mlx5_flow_group *g;
+	u32 metadata_reg_c_2_mask;
+	u32 *flow_group_in;
+	void *misc;
+	int err;
+
+	ns = mlx5_get_flow_namespace(dev, MLX5_FLOW_NAMESPACE_FDB);
+	if (!ns) {
+		err = -EOPNOTSUPP;
+		ct_dbg("Failed to get FDB flow namespace");
+		return err;
+	}
+
+	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+	if (!flow_group_in)
+		return -ENOMEM;
+
+	ft_attr.flags = MLX5_FLOW_TABLE_UNMANAGED;
+	ft_attr.prio = FDB_TC_OFFLOAD;
+	ft_attr.max_fte = 2;
+	ft_attr.level = 1;
+	ft = mlx5_create_flow_table(ns, &ft_attr);
+	if (IS_ERR(ft)) {
+		err = PTR_ERR(ft);
+		ct_dbg("Failed to create pre ct table");
+		goto out_free;
+	}
+	pre_ct->fdb = ft;
+
+	/* create flow group */
+	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
+	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+		 MLX5_MATCH_MISC_PARAMETERS_2);
+
+	misc = MLX5_ADDR_OF(create_flow_group_in, flow_group_in,
+			    match_criteria.misc_parameters_2);
+
+	metadata_reg_c_2_mask = MLX5_CT_ZONE_MASK;
+	metadata_reg_c_2_mask |= (MLX5_CT_STATE_TRK_BIT << 16);
+	if (nat)
+		metadata_reg_c_2_mask |= (MLX5_CT_STATE_NAT_BIT << 16);
+
+	MLX5_SET(fte_match_set_misc2, misc, metadata_reg_c_2,
+		 metadata_reg_c_2_mask);
+
+	g = mlx5_create_flow_group(ft, flow_group_in);
+	if (IS_ERR(g)) {
+		err = PTR_ERR(g);
+		ct_dbg("Failed to create pre ct group");
+		goto err_flow_grp;
+	}
+	pre_ct->flow_grp = g;
+
+	/* create miss group */
+	memset(flow_group_in, 0, inlen);
+	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
+	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
+	g = mlx5_create_flow_group(ft, flow_group_in);
+	if (IS_ERR(g)) {
+		err = PTR_ERR(g);
+		ct_dbg("Failed to create pre ct miss group");
+		goto err_miss_grp;
+	}
+	pre_ct->miss_grp = g;
+
+	err = tc_ct_pre_ct_add_rules(ct_ft, pre_ct, nat);
+	if (err)
+		goto err_add_rules;
+
+	kvfree(flow_group_in);
+	return 0;
+
+err_add_rules:
+	mlx5_destroy_flow_group(pre_ct->miss_grp);
+err_miss_grp:
+	mlx5_destroy_flow_group(pre_ct->flow_grp);
+err_flow_grp:
+	mlx5_destroy_flow_table(ft);
+out_free:
+	kvfree(flow_group_in);
+	return err;
+}
+
+static void
+mlx5_tc_ct_free_pre_ct(struct mlx5_ct_ft *ct_ft,
+		       struct mlx5_tc_ct_pre *pre_ct)
+{
+	tc_ct_pre_ct_del_rules(ct_ft, pre_ct);
+	mlx5_destroy_flow_group(pre_ct->miss_grp);
+	mlx5_destroy_flow_group(pre_ct->flow_grp);
+	mlx5_destroy_flow_table(pre_ct->fdb);
+}
+
+static int
+mlx5_tc_ct_alloc_pre_ct_tables(struct mlx5_ct_ft *ft)
+{
+	int err;
+
+	err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct, false);
+	if (err)
+		return err;
+
+	err = mlx5_tc_ct_alloc_pre_ct(ft, &ft->pre_ct_nat, true);
+	if (err)
+		goto err_pre_ct_nat;
+
+	return 0;
+
+err_pre_ct_nat:
+	mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
+	return err;
+}
+
+static void
+mlx5_tc_ct_free_pre_ct_tables(struct mlx5_ct_ft *ft)
+{
+	mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct_nat);
+	mlx5_tc_ct_free_pre_ct(ft, &ft->pre_ct);
+}
+
 static struct mlx5_ct_ft *
 mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone,
 		     struct nf_flowtable *nf_ft)
@@ -813,6 +1060,10 @@ mlx5_tc_ct_add_ft_cb(struct mlx5_tc_ct_priv *ct_priv, u16 zone,
 	ft->ct_priv = ct_priv;
 	refcount_set(&ft->refcount, 1);
 
+	err = mlx5_tc_ct_alloc_pre_ct_tables(ft);
+	if (err)
+		goto err_alloc_pre_ct;
+
 	err = rhashtable_init(&ft->ct_entries_ht, &cts_ht_params);
 	if (err)
 		goto err_init;
@@ -834,6 +1085,8 @@ err_add_cb:
 err_insert:
 	rhashtable_destroy(&ft->ct_entries_ht);
 err_init:
+	mlx5_tc_ct_free_pre_ct_tables(ft);
+err_alloc_pre_ct:
 	kfree(ft);
 	return ERR_PTR(err);
 }
@@ -859,21 +1112,40 @@ mlx5_tc_ct_del_ft_cb(struct mlx5_tc_ct_priv *ct_priv, struct mlx5_ct_ft *ft)
 	rhashtable_free_and_destroy(&ft->ct_entries_ht,
 				    mlx5_tc_ct_flush_ft_entry,
 				    ct_priv);
+	mlx5_tc_ct_free_pre_ct_tables(ft);
 	kfree(ft);
 }
 
 /* We translate the tc filter with CT action to the following HW model:
  *
- * +-------------------+      +--------------------+    +--------------+
- * + pre_ct (tc chain) +----->+ CT (nat or no nat) +--->+ post_ct      +----->
- * + original match    +  |   + tuple + zone match + |  + fte_id match +  |
- * +-------------------+  |   +--------------------+ |  +--------------+  |
- *                        v                          v                    v
- *                       set chain miss mapping  set mark             original
- *                       set fte_id              set label            filter
- *                       set zone                set established      actions
- *                       set tunnel_id           do nat (if needed)
- *                       do decap
+ * +---------------------+
+ * + fdb prio (tc chain) +
+ * + original match      +
+ * +---------------------+
+ *      | set chain miss mapping
+ *      | set fte_id
+ *      | set tunnel_id
+ *      | do decap
+ *      v
+ * +---------------------+
+ * + pre_ct/pre_ct_nat   +  if matches     +---------------------+
+ * + zone+nat match      +---------------->+ post_ct (see below) +
+ * +---------------------+  set zone       +---------------------+
+ *      | set zone
+ *      v
+ * +--------------------+
+ * + CT (nat or no nat) +
+ * + tuple + zone match +
+ * +--------------------+
+ *      | set mark
+ *      | set label
+ *      | set established
+ *      | do nat (if needed)
+ *      v
+ * +--------------+
+ * + post_ct      + original filter actions
+ * + fte_id match +------------------------>
+ * +--------------+
  */
 static int
 __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
@@ -888,7 +1160,7 @@ __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
 	struct mlx5_flow_spec *post_ct_spec = NULL;
 	struct mlx5_eswitch *esw = ct_priv->esw;
 	struct mlx5_esw_flow_attr *pre_ct_attr;
-	struct  mlx5_modify_hdr *mod_hdr;
+	struct mlx5_modify_hdr *mod_hdr;
 	struct mlx5_flow_handle *rule;
 	struct mlx5_ct_flow *ct_flow;
 	int chain_mapping = 0, err;
@@ -951,14 +1223,6 @@ __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
 		goto err_mapping;
 	}
 
-	err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts, ZONE_TO_REG,
-					attr->ct_attr.zone &
-					MLX5_CT_ZONE_MASK);
-	if (err) {
-		ct_dbg("Failed to set zone register mapping");
-		goto err_mapping;
-	}
-
 	err = mlx5e_tc_match_to_reg_set(esw->dev, &pre_mod_acts,
 					FTEID_TO_REG, fte_id);
 	if (err) {
@@ -1018,7 +1282,7 @@ __mlx5_tc_ct_flow_offload(struct mlx5e_priv *priv,
 
 	/* Change original rule point to ct table */
 	pre_ct_attr->dest_chain = 0;
-	pre_ct_attr->dest_ft = nat ? ct_priv->ct_nat : ct_priv->ct;
+	pre_ct_attr->dest_ft = nat ? ft->pre_ct_nat.fdb : ft->pre_ct.fdb;
 	ct_flow->pre_ct_rule = mlx5_eswitch_add_offloaded_rule(esw,
 							       orig_spec,
 							       pre_ct_attr);
-- 
cgit v1.2.3-59-g8ed1b


From 80639b199c9ca87444da218ba0e7511946452dd4 Mon Sep 17 00:00:00 2001
From: Erez Shitrit <erezsh@mellanox.com>
Date: Sun, 3 May 2020 13:01:37 +0300
Subject: net/mlx5e: IPoIB, Enable loopback packets for IPoIB interfaces

Enable loopback of unicast and multicast traffic for IPoIB enhanced
mode.
This will allow interfaces with the same pkey to communicate between
them e.g cloned interfaces that located in different namespaces.

Signed-off-by: Erez Shitrit <erezsh@mellanox.com>
Reviewed-by: Alex Vesker <valex@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h               |  3 ++-
 drivers/net/ethernet/mellanox/mlx5/core/en_common.c        | 13 ++++++++++---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c          |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c      |  4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c      |  7 ++++++-
 drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h      |  2 ++
 drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c |  2 +-
 7 files changed, 24 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 195162b9b245..ac385ac93fe5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -1082,7 +1082,8 @@ void mlx5e_destroy_tir(struct mlx5_core_dev *mdev,
 		       struct mlx5e_tir *tir);
 int mlx5e_create_mdev_resources(struct mlx5_core_dev *mdev);
 void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev);
-int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb);
+int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb,
+		       bool enable_mc_lb);
 
 /* common netdev helpers */
 void mlx5e_create_q_counters(struct mlx5e_priv *priv);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
index af3228b3f303..1e42c7ae621b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_common.c
@@ -141,10 +141,12 @@ void mlx5e_destroy_mdev_resources(struct mlx5_core_dev *mdev)
 	memset(res, 0, sizeof(*res));
 }
 
-int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb)
+int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb,
+		       bool enable_mc_lb)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
 	struct mlx5e_tir *tir;
+	u8 lb_flags = 0;
 	int err  = 0;
 	u32 tirn = 0;
 	int inlen;
@@ -158,8 +160,13 @@ int mlx5e_refresh_tirs(struct mlx5e_priv *priv, bool enable_uc_lb)
 	}
 
 	if (enable_uc_lb)
-		MLX5_SET(modify_tir_in, in, ctx.self_lb_block,
-			 MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST);
+		lb_flags = MLX5_TIRC_SELF_LB_BLOCK_BLOCK_UNICAST;
+
+	if (enable_mc_lb)
+		lb_flags |= MLX5_TIRC_SELF_LB_BLOCK_BLOCK_MULTICAST;
+
+	if (lb_flags)
+		MLX5_SET(modify_tir_in, in, ctx.self_lb_block, lb_flags);
 
 	MLX5_SET(modify_tir_in, in, bitmask.self_lb_en, 1);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 0e4ca08ddca9..65e2b364443e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -5275,7 +5275,7 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv)
 
 int mlx5e_update_nic_rx(struct mlx5e_priv *priv)
 {
-	return mlx5e_refresh_tirs(priv, false);
+	return mlx5e_refresh_tirs(priv, false, false);
 }
 
 static const struct mlx5e_profile mlx5e_nic_profile = {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
index bbff8d8ded76..46790216ce86 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_selftest.c
@@ -234,7 +234,7 @@ static int mlx5e_test_loopback_setup(struct mlx5e_priv *priv,
 			return err;
 	}
 
-	err = mlx5e_refresh_tirs(priv, true);
+	err = mlx5e_refresh_tirs(priv, true, false);
 	if (err)
 		goto out;
 
@@ -263,7 +263,7 @@ static void mlx5e_test_loopback_cleanup(struct mlx5e_priv *priv,
 		mlx5_nic_vport_update_local_lb(priv->mdev, false);
 
 	dev_remove_pack(&lbtp->pt);
-	mlx5e_refresh_tirs(priv, false);
+	mlx5e_refresh_tirs(priv, false, false);
 }
 
 #define MLX5E_LB_VERIFY_TIMEOUT (msecs_to_jiffies(200))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
index 035bd21e5d4e..7db70b6ccc07 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.c
@@ -262,6 +262,11 @@ void mlx5i_destroy_underlay_qp(struct mlx5_core_dev *mdev, u32 qpn)
 	mlx5_cmd_exec_in(mdev, destroy_qp, in);
 }
 
+int mlx5i_update_nic_rx(struct mlx5e_priv *priv)
+{
+	return mlx5e_refresh_tirs(priv, true, true);
+}
+
 int mlx5i_create_tis(struct mlx5_core_dev *mdev, u32 underlay_qpn, u32 *tisn)
 {
 	u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
@@ -456,7 +461,7 @@ static const struct mlx5e_profile mlx5i_nic_profile = {
 	.cleanup_rx	   = mlx5i_cleanup_rx,
 	.enable		   = NULL, /* mlx5i_enable */
 	.disable	   = NULL, /* mlx5i_disable */
-	.update_rx	   = mlx5e_update_nic_rx,
+	.update_rx	   = mlx5i_update_nic_rx,
 	.update_stats	   = NULL, /* mlx5i_update_stats */
 	.update_carrier    = NULL, /* no HW update in IB link */
 	.rx_handlers.handle_rx_cqe       = mlx5i_handle_rx_cqe,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
index c4aa47018c0e..79071a15c4ca 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib.h
@@ -92,6 +92,8 @@ int mlx5i_init(struct mlx5_core_dev *mdev,
 	       void *ppriv);
 void mlx5i_cleanup(struct mlx5e_priv *priv);
 
+int mlx5i_update_nic_rx(struct mlx5e_priv *priv);
+
 /* Get child interface nic profile */
 const struct mlx5e_profile *mlx5i_pkey_get_profile(void);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
index b9af37ad40bf..f70367018862 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/ipoib/ipoib_vlan.c
@@ -347,7 +347,7 @@ static const struct mlx5e_profile mlx5i_pkey_nic_profile = {
 	.cleanup_rx	   = mlx5i_pkey_cleanup_rx,
 	.enable		   = NULL,
 	.disable	   = NULL,
-	.update_rx	   = mlx5e_update_nic_rx,
+	.update_rx	   = mlx5i_update_nic_rx,
 	.update_stats	   = NULL,
 	.rx_handlers.handle_rx_cqe       = mlx5i_handle_rx_cqe,
 	.rx_handlers.handle_rx_cqe_mpwqe = NULL, /* Not supported */
-- 
cgit v1.2.3-59-g8ed1b


From 8b46d424a743ddfef8056d5167f13ee7ebd1dcad Mon Sep 17 00:00:00 2001
From: Erez Shitrit <erezsh@mellanox.com>
Date: Mon, 4 May 2020 11:46:25 +0300
Subject: net/mlx5e: IPoIB, Drop multicast packets that this interface sent

After enabled loopback packets for IPoIB, we need to drop these packets
that this HCA has replicated and came back to the same interface that
sent them.

Fixes: 4c6c615e3f30 ("net/mlx5e: IPoIB, Add PKEY child interface nic profile")
Signed-off-by: Erez Shitrit <erezsh@mellanox.com>
Reviewed-by: Alex Vesker <valex@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 821f94beda7a..a514685fb560 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -1489,6 +1489,7 @@ out:
 
 #ifdef CONFIG_MLX5_CORE_IPOIB
 
+#define MLX5_IB_GRH_SGID_OFFSET 8
 #define MLX5_IB_GRH_DGID_OFFSET 24
 #define MLX5_GID_SIZE           16
 
@@ -1502,6 +1503,7 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq,
 	struct net_device *netdev;
 	struct mlx5e_priv *priv;
 	char *pseudo_header;
+	u32 flags_rqpn;
 	u32 qpn;
 	u8 *dgid;
 	u8 g;
@@ -1523,7 +1525,8 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq,
 	tstamp = &priv->tstamp;
 	stats = &priv->channel_stats[rq->ix].rq;
 
-	g = (be32_to_cpu(cqe->flags_rqpn) >> 28) & 3;
+	flags_rqpn = be32_to_cpu(cqe->flags_rqpn);
+	g = (flags_rqpn >> 28) & 3;
 	dgid = skb->data + MLX5_IB_GRH_DGID_OFFSET;
 	if ((!g) || dgid[0] != 0xff)
 		skb->pkt_type = PACKET_HOST;
@@ -1532,9 +1535,15 @@ static inline void mlx5i_complete_rx_cqe(struct mlx5e_rq *rq,
 	else
 		skb->pkt_type = PACKET_MULTICAST;
 
-	/* TODO: IB/ipoib: Allow mcast packets from other VFs
-	 * 68996a6e760e5c74654723eeb57bf65628ae87f4
+	/* Drop packets that this interface sent, ie multicast packets
+	 * that the HCA has replicated.
 	 */
+	if (g && (qpn == (flags_rqpn & 0xffffff)) &&
+	    (memcmp(netdev->dev_addr + 4, skb->data + MLX5_IB_GRH_SGID_OFFSET,
+		    MLX5_GID_SIZE) == 0)) {
+		skb->dev = NULL;
+		return;
+	}
 
 	skb_pull(skb, MLX5_IB_GRH_BYTES);
 
-- 
cgit v1.2.3-59-g8ed1b


From 5ffb4d858b7051720f20bcbb92dce0b433e60d88 Mon Sep 17 00:00:00 2001
From: Maxim Mikityanskiy <maximmi@mellanox.com>
Date: Mon, 30 Mar 2020 16:28:34 +0300
Subject: net/mlx5e: Calculate SQ stop room in a robust way

Currently, different formulas are used to estimate the space that may be
taken by WQEs in the SQ during a single packet transmit. This space is
called stop room, and it's checked in the end of packet transmit to find
out if the next packet could overflow the SQ. If it could, the driver
tells the kernel to stop sending next packets.

Many factors affect the stop room:

1. Padding with NOPs to avoid WQEs spanning over page boundaries.

2. Enabled and disabled offloads (TLS, upcoming MPWQE).

3. The maximum size of a WQE.

The padding is performed before every WQE if it doesn't fit the current
page.

The current formula assumes that only one padding will be required per
packet, and it doesn't take into account that the WQEs posted during the
transmission of a single packet might exceed the page size in very rare
circumstances. For example, to hit this condition with 4096-byte pages,
TLS offload will have to interrupt an almost-full MPWQE session, be in
the resync flow and try to transmit a near to maximum amount of data.

To avoid SQ overflows in such rare cases after MPWQE is added, this
patch introduces a more robust formula to estimate the stop room. The
new formula uses the fact that a WQE of size X will not require more
than X-1 WQEBBs of padding. More exact estimations are possible, but
they result in much more complex and error-prone code for little gain.

Before this patch, the TLS stop room included space for both INNOVA and
ConnectX TLS offloads that couldn't run at the same time anyway, so this
patch accounts only for the active one.

Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Reviewed-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h  | 40 ++++++++++++----------
 drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c   |  4 ++-
 drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h   |  2 --
 .../ethernet/mellanox/mlx5/core/en_accel/ktls.c    | 13 +++++++
 .../ethernet/mellanox/mlx5/core/en_accel/ktls.h    | 12 +++----
 .../net/ethernet/mellanox/mlx5/core/en_accel/tls.c | 14 ++++++++
 .../net/ethernet/mellanox/mlx5/core/en_accel/tls.h |  7 ++++
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  | 28 ++++++++++-----
 8 files changed, 84 insertions(+), 36 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
index dce2bbbf9109..bfd3e1161bc6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/txrx.h
@@ -6,25 +6,6 @@
 
 #include "en.h"
 
-#define MLX5E_SQ_NOPS_ROOM (MLX5_SEND_WQE_MAX_WQEBBS - 1)
-#define MLX5E_SQ_STOP_ROOM (MLX5_SEND_WQE_MAX_WQEBBS +\
-			    MLX5E_SQ_NOPS_ROOM)
-
-#ifndef CONFIG_MLX5_EN_TLS
-#define MLX5E_SQ_TLS_ROOM (0)
-#else
-/* TLS offload requires additional stop_room for:
- *  - a resync SKB.
- * kTLS offload requires fixed additional stop_room for:
- * - a static params WQE, and a progress params WQE.
- * The additional MTU-depending room for the resync DUMP WQEs
- * will be calculated and added in runtime.
- */
-#define MLX5E_SQ_TLS_ROOM  \
-	(MLX5_SEND_WQE_MAX_WQEBBS + \
-	 MLX5E_KTLS_STATIC_WQEBBS + MLX5E_KTLS_PROGRESS_WQEBBS)
-#endif
-
 #define INL_HDR_START_SZ (sizeof(((struct mlx5_wqe_eth_seg *)NULL)->inline_hdr.start))
 
 enum mlx5e_icosq_wqe_type {
@@ -331,4 +312,25 @@ mlx5e_set_eseg_swp(struct sk_buff *skb, struct mlx5_wqe_eth_seg *eseg,
 	}
 }
 
+static inline u16 mlx5e_stop_room_for_wqe(u16 wqe_size)
+{
+	BUILD_BUG_ON(PAGE_SIZE / MLX5_SEND_WQE_BB < MLX5_SEND_WQE_MAX_WQEBBS);
+
+	/* A WQE must not cross the page boundary, hence two conditions:
+	 * 1. Its size must not exceed the page size.
+	 * 2. If the WQE size is X, and the space remaining in a page is less
+	 *    than X, this space needs to be padded with NOPs. So, one WQE of
+	 *    size X may require up to X-1 WQEBBs of padding, which makes the
+	 *    stop room of X-1 + X.
+	 * WQE size is also limited by the hardware limit.
+	 */
+
+	if (__builtin_constant_p(wqe_size))
+		BUILD_BUG_ON(wqe_size > MLX5_SEND_WQE_MAX_WQEBBS);
+	else
+		WARN_ON_ONCE(wqe_size > MLX5_SEND_WQE_MAX_WQEBBS);
+
+	return wqe_size * 2 - 1;
+}
+
 #endif
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index 761c8979bd41..42202d19245c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -257,8 +257,10 @@ enum {
 static int mlx5e_xmit_xdp_frame_check_mpwqe(struct mlx5e_xdpsq *sq)
 {
 	if (unlikely(!sq->mpwqe.wqe)) {
+		const u16 stop_room = mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS);
+
 		if (unlikely(!mlx5e_wqc_has_room_for(&sq->wq, sq->cc, sq->pc,
-						     MLX5E_XDPSQ_STOP_ROOM))) {
+						     stop_room))) {
 			/* SQ is full, ring doorbell */
 			mlx5e_xmit_xdp_doorbell(sq);
 			sq->stats->full++;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
index e2e01f064c1e..be64eb68f4e5 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
@@ -40,8 +40,6 @@
 	(sizeof(struct mlx5e_tx_wqe) / MLX5_SEND_WQE_DS)
 #define MLX5E_XDP_TX_DS_COUNT (MLX5E_XDP_TX_EMPTY_DS_COUNT + 1 /* SG DS */)
 
-#define MLX5E_XDPSQ_STOP_ROOM (MLX5E_SQ_STOP_ROOM)
-
 #define MLX5E_XDP_INLINE_WQE_SZ_THRSD (256 - sizeof(struct mlx5_wqe_inline_seg))
 #define MLX5E_XDP_INLINE_WQE_MAX_DS_CNT \
 	DIV_ROUND_UP(MLX5E_XDP_INLINE_WQE_SZ_THRSD, MLX5_SEND_WQE_DS)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
index 46725cd743a3..417a2d9dd248 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.c
@@ -4,6 +4,19 @@
 #include "en.h"
 #include "en_accel/ktls.h"
 
+u16 mlx5e_ktls_get_stop_room(struct mlx5e_txqsq *sq)
+{
+	u16 num_dumps, stop_room = 0;
+
+	num_dumps = mlx5e_ktls_dumps_num_wqes(sq, MAX_SKB_FRAGS, TLS_MAX_PAYLOAD_SIZE);
+
+	stop_room += mlx5e_stop_room_for_wqe(MLX5E_KTLS_STATIC_WQEBBS);
+	stop_room += mlx5e_stop_room_for_wqe(MLX5E_KTLS_PROGRESS_WQEBBS);
+	stop_room += num_dumps * mlx5e_stop_room_for_wqe(MLX5E_KTLS_DUMP_WQEBBS);
+
+	return stop_room;
+}
+
 static int mlx5e_ktls_create_tis(struct mlx5_core_dev *mdev, u32 *tisn)
 {
 	u32 in[MLX5_ST_SZ_DW(create_tis_in)] = {};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
index dabbc5f226ce..c6180892cfcb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/ktls.h
@@ -102,15 +102,16 @@ bool mlx5e_ktls_handle_tx_skb(struct tls_context *tls_ctx, struct mlx5e_txqsq *s
 void mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
 					   struct mlx5e_tx_wqe_info *wi,
 					   u32 *dma_fifo_cc);
+u16 mlx5e_ktls_get_stop_room(struct mlx5e_txqsq *sq);
+
 static inline u8
-mlx5e_ktls_dumps_num_wqebbs(struct mlx5e_txqsq *sq, unsigned int nfrags,
-			    unsigned int sync_len)
+mlx5e_ktls_dumps_num_wqes(struct mlx5e_txqsq *sq, unsigned int nfrags,
+			  unsigned int sync_len)
 {
 	/* Given the MTU and sync_len, calculates an upper bound for the
-	 * number of WQEBBs needed for the TX resync DUMP WQEs of a record.
+	 * number of DUMP WQEs needed for the TX resync of a record.
 	 */
-	return MLX5E_KTLS_DUMP_WQEBBS *
-		(nfrags + DIV_ROUND_UP(sync_len, sq->hw_mtu));
+	return nfrags + DIV_ROUND_UP(sync_len, sq->hw_mtu);
 }
 #else
 
@@ -122,7 +123,6 @@ static inline void
 mlx5e_ktls_tx_handle_resync_dump_comp(struct mlx5e_txqsq *sq,
 				      struct mlx5e_tx_wqe_info *wi,
 				      u32 *dma_fifo_cc) {}
-
 #endif
 
 #endif /* __MLX5E_TLS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c
index fba561ffe1d4..c27e9a609d51 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c
@@ -240,3 +240,17 @@ void mlx5e_tls_cleanup(struct mlx5e_priv *priv)
 	kfree(tls);
 	priv->tls = NULL;
 }
+
+u16 mlx5e_tls_get_stop_room(struct mlx5e_txqsq *sq)
+{
+	struct mlx5_core_dev *mdev = sq->channel->mdev;
+
+	if (!mlx5_accel_is_tls_device(mdev))
+		return 0;
+
+	if (MLX5_CAP_GEN(mdev, tls_tx))
+		return mlx5e_ktls_get_stop_room(sq);
+
+	/* Resync SKB. */
+	return mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h
index 9015f3f7792d..9219bdb2786e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.h
@@ -94,6 +94,8 @@ int mlx5e_tls_get_count(struct mlx5e_priv *priv);
 int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data);
 int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data);
 
+u16 mlx5e_tls_get_stop_room(struct mlx5e_txqsq *sq);
+
 #else
 
 static inline void mlx5e_tls_build_netdev(struct mlx5e_priv *priv)
@@ -108,6 +110,11 @@ static inline int mlx5e_tls_get_count(struct mlx5e_priv *priv) { return 0; }
 static inline int mlx5e_tls_get_strings(struct mlx5e_priv *priv, uint8_t *data) { return 0; }
 static inline int mlx5e_tls_get_stats(struct mlx5e_priv *priv, u64 *data) { return 0; }
 
+static inline u16 mlx5e_tls_get_stop_room(struct mlx5e_txqsq *sq)
+{
+	return 0;
+}
+
 #endif
 
 #endif /* __MLX5E_TLS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 65e2b364443e..75f178a43822 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -1122,6 +1122,22 @@ static int mlx5e_alloc_txqsq_db(struct mlx5e_txqsq *sq, int numa)
 	return 0;
 }
 
+static int mlx5e_calc_sq_stop_room(struct mlx5e_txqsq *sq, u8 log_sq_size)
+{
+	int sq_size = 1 << log_sq_size;
+
+	sq->stop_room  = mlx5e_tls_get_stop_room(sq);
+	sq->stop_room += mlx5e_stop_room_for_wqe(MLX5_SEND_WQE_MAX_WQEBBS);
+
+	if (WARN_ON(sq->stop_room >= sq_size)) {
+		netdev_err(sq->channel->netdev, "Stop room %hu is bigger than the SQ size %d\n",
+			   sq->stop_room, sq_size);
+		return -ENOSPC;
+	}
+
+	return 0;
+}
+
 static void mlx5e_tx_err_cqe_work(struct work_struct *recover_work);
 static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
 			     int txq_ix,
@@ -1146,20 +1162,16 @@ static int mlx5e_alloc_txqsq(struct mlx5e_channel *c,
 	sq->min_inline_mode = params->tx_min_inline_mode;
 	sq->hw_mtu    = MLX5E_SW2HW_MTU(params, params->sw_mtu);
 	sq->stats     = &c->priv->channel_stats[c->ix].sq[tc];
-	sq->stop_room = MLX5E_SQ_STOP_ROOM;
 	INIT_WORK(&sq->recover_work, mlx5e_tx_err_cqe_work);
 	if (!MLX5_CAP_ETH(mdev, wqe_vlan_insert))
 		set_bit(MLX5E_SQ_STATE_VLAN_NEED_L2_INLINE, &sq->state);
 	if (MLX5_IPSEC_DEV(c->priv->mdev))
 		set_bit(MLX5E_SQ_STATE_IPSEC, &sq->state);
-#ifdef CONFIG_MLX5_EN_TLS
-	if (mlx5_accel_is_tls_device(c->priv->mdev)) {
+	if (mlx5_accel_is_tls_device(c->priv->mdev))
 		set_bit(MLX5E_SQ_STATE_TLS, &sq->state);
-		sq->stop_room += MLX5E_SQ_TLS_ROOM +
-			mlx5e_ktls_dumps_num_wqebbs(sq, MAX_SKB_FRAGS,
-						    TLS_MAX_PAYLOAD_SIZE);
-	}
-#endif
+	err = mlx5e_calc_sq_stop_room(sq, params->log_sq_size);
+	if (err)
+		return err;
 
 	param->wq.db_numa_node = cpu_to_node(c->cpu);
 	err = mlx5_wq_cyc_create(mdev, &param->wq, sqc_wq, wq, &sq->wq_ctrl);
-- 
cgit v1.2.3-59-g8ed1b


From 3f3ab178c71b12295b5950792b72d2198f0e77c7 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Wed, 23 Oct 2019 15:00:52 +0300
Subject: net/mlx5e: Take DCBNL-related definitions into dedicated files

Take DCBNL-related definitions out of the common en.h header,
Use a dedicated header file for exposing them.
Some need not to be exposed, use them locally in the .c file.
Use stubs to eliminate use of CONFIG_MLX5_CORE_EN_DCB in the
generic control flows.

Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h       | 50 +-------------------
 drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h | 54 ++++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 28 ++++++++++-
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  | 16 +------
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c   |  9 +---
 5 files changed, 84 insertions(+), 73 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index ac385ac93fe5..81fd53569463 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -52,6 +52,7 @@
 #include "wq.h"
 #include "mlx5_core.h"
 #include "en_stats.h"
+#include "en/dcbnl.h"
 #include "en/fs.h"
 #include "lib/hv_vhca.h"
 
@@ -68,8 +69,6 @@ struct page_pool;
 #define MLX5E_HW2SW_MTU(params, hwmtu) ((hwmtu) - ((params)->hard_mtu))
 #define MLX5E_SW2HW_MTU(params, swmtu) ((swmtu) + ((params)->hard_mtu))
 
-#define MLX5E_MAX_PRIORITY      8
-#define MLX5E_MAX_DSCP          64
 #define MLX5E_MAX_NUM_TC	8
 
 #define MLX5_RX_HEADROOM NET_SKB_PAD
@@ -242,10 +241,6 @@ enum mlx5e_priv_flag {
 
 #define MLX5E_GET_PFLAG(params, pflag) (!!((params)->pflags & (BIT(pflag))))
 
-#ifdef CONFIG_MLX5_CORE_EN_DCB
-#define MLX5E_MAX_BW_ALLOC 100 /* Max percentage of BW allocation */
-#endif
-
 struct mlx5e_params {
 	u8  log_sq_size;
 	u8  rq_wq_type;
@@ -270,42 +265,6 @@ struct mlx5e_params {
 	int hard_mtu;
 };
 
-#ifdef CONFIG_MLX5_CORE_EN_DCB
-struct mlx5e_cee_config {
-	/* bw pct for priority group */
-	u8                         pg_bw_pct[CEE_DCBX_MAX_PGS];
-	u8                         prio_to_pg_map[CEE_DCBX_MAX_PRIO];
-	bool                       pfc_setting[CEE_DCBX_MAX_PRIO];
-	bool                       pfc_enable;
-};
-
-enum {
-	MLX5_DCB_CHG_RESET,
-	MLX5_DCB_NO_CHG,
-	MLX5_DCB_CHG_NO_RESET,
-};
-
-struct mlx5e_dcbx {
-	enum mlx5_dcbx_oper_mode   mode;
-	struct mlx5e_cee_config    cee_cfg; /* pending configuration */
-	u8                         dscp_app_cnt;
-
-	/* The only setting that cannot be read from FW */
-	u8                         tc_tsa[IEEE_8021QAZ_MAX_TCS];
-	u8                         cap;
-
-	/* Buffer configuration */
-	bool                       manual_buffer;
-	u32                        cable_len;
-	u32                        xoff;
-};
-
-struct mlx5e_dcbx_dp {
-	u8                         dscp2prio[MLX5E_MAX_DSCP];
-	u8                         trust_state;
-};
-#endif
-
 enum {
 	MLX5E_RQ_STATE_ENABLED,
 	MLX5E_RQ_STATE_RECOVERING,
@@ -1068,13 +1027,6 @@ static inline bool mlx5_tx_swp_supported(struct mlx5_core_dev *mdev)
 }
 
 extern const struct ethtool_ops mlx5e_ethtool_ops;
-#ifdef CONFIG_MLX5_CORE_EN_DCB
-extern const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops;
-int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets);
-void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv);
-void mlx5e_dcbnl_init_app(struct mlx5e_priv *priv);
-void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv);
-#endif
 
 int mlx5e_create_tir(struct mlx5_core_dev *mdev, struct mlx5e_tir *tir,
 		     u32 *in);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h b/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h
new file mode 100644
index 000000000000..7be6b2d36b60
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/dcbnl.h
@@ -0,0 +1,54 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies. */
+
+#ifndef __MLX5E_DCBNL_H__
+#define __MLX5E_DCBNL_H__
+
+#ifdef CONFIG_MLX5_CORE_EN_DCB
+
+#define MLX5E_MAX_PRIORITY (8)
+
+struct mlx5e_cee_config {
+	/* bw pct for priority group */
+	u8                         pg_bw_pct[CEE_DCBX_MAX_PGS];
+	u8                         prio_to_pg_map[CEE_DCBX_MAX_PRIO];
+	bool                       pfc_setting[CEE_DCBX_MAX_PRIO];
+	bool                       pfc_enable;
+};
+
+struct mlx5e_dcbx {
+	enum mlx5_dcbx_oper_mode   mode;
+	struct mlx5e_cee_config    cee_cfg; /* pending configuration */
+	u8                         dscp_app_cnt;
+
+	/* The only setting that cannot be read from FW */
+	u8                         tc_tsa[IEEE_8021QAZ_MAX_TCS];
+	u8                         cap;
+
+	/* Buffer configuration */
+	bool                       manual_buffer;
+	u32                        cable_len;
+	u32                        xoff;
+};
+
+#define MLX5E_MAX_DSCP (64)
+
+struct mlx5e_dcbx_dp {
+	u8                         dscp2prio[MLX5E_MAX_DSCP];
+	u8                         trust_state;
+};
+
+void mlx5e_dcbnl_build_netdev(struct net_device *netdev);
+void mlx5e_dcbnl_build_rep_netdev(struct net_device *netdev);
+void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv);
+void mlx5e_dcbnl_init_app(struct mlx5e_priv *priv);
+void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv);
+#else
+static inline void mlx5e_dcbnl_build_netdev(struct net_device *netdev) {}
+static inline void mlx5e_dcbnl_build_rep_netdev(struct net_device *netdev) {}
+static inline void mlx5e_dcbnl_initialize(struct mlx5e_priv *priv) {}
+static inline void mlx5e_dcbnl_init_app(struct mlx5e_priv *priv) {}
+static inline void mlx5e_dcbnl_delete_app(struct mlx5e_priv *priv) {}
+#endif
+
+#endif /* __MLX5E_DCBNL_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index 47874d34156b..ec7b332d74c2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -35,6 +35,8 @@
 #include "en/port.h"
 #include "en/port_buffer.h"
 
+#define MLX5E_MAX_BW_ALLOC 100 /* Max percentage of BW allocation */
+
 #define MLX5E_100MB (100000)
 #define MLX5E_1GB   (1000000)
 
@@ -49,6 +51,12 @@ enum {
 	MLX5E_LOWEST_PRIO_GROUP   = 0,
 };
 
+enum {
+	MLX5_DCB_CHG_RESET,
+	MLX5_DCB_NO_CHG,
+	MLX5_DCB_CHG_NO_RESET,
+};
+
 #define MLX5_DSCP_SUPPORTED(mdev) (MLX5_CAP_GEN(mdev, qcam_reg)  && \
 				   MLX5_CAP_QCAM_REG(mdev, qpts) && \
 				   MLX5_CAP_QCAM_REG(mdev, qpdpm))
@@ -238,7 +246,7 @@ static void mlx5e_build_tc_tx_bw(struct ieee_ets *ets, u8 *tc_tx_bw,
  *   Report both group #0 and #1 as ETS type.
  *     All the tcs in group #0 will be reported with 0% BW.
  */
-int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets)
+static int mlx5e_dcbnl_ieee_setets_core(struct mlx5e_priv *priv, struct ieee_ets *ets)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
 	u8 tc_tx_bw[IEEE_8021QAZ_MAX_TCS];
@@ -1009,6 +1017,24 @@ const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops = {
 	.setpfcstate    = mlx5e_dcbnl_setpfcstate,
 };
 
+void mlx5e_dcbnl_build_netdev(struct net_device *netdev)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5_core_dev *mdev = priv->mdev;
+
+	if (MLX5_CAP_GEN(mdev, vport_group_manager) && MLX5_CAP_GEN(mdev, qos))
+		netdev->dcbnl_ops = &mlx5e_dcbnl_ops;
+}
+
+void mlx5e_dcbnl_build_rep_netdev(struct net_device *netdev)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5_core_dev *mdev = priv->mdev;
+
+	if (MLX5_CAP_GEN(mdev, qos))
+		netdev->dcbnl_ops = &mlx5e_dcbnl_ops;
+}
+
 static void mlx5e_dcbnl_query_dcbx_mode(struct mlx5e_priv *priv,
 					enum mlx5_dcbx_oper_mode *mode)
 {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 75f178a43822..07823abe5557 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -66,7 +66,6 @@
 #include "en/devlink.h"
 #include "lib/mlx5.h"
 
-
 bool mlx5e_check_fragmented_striding_rq_cap(struct mlx5_core_dev *mdev)
 {
 	bool striding_rq_umr = MLX5_CAP_GEN(mdev, striding_rq) &&
@@ -4927,10 +4926,7 @@ static void mlx5e_build_nic_netdev(struct net_device *netdev)
 
 	netdev->netdev_ops = &mlx5e_netdev_ops;
 
-#ifdef CONFIG_MLX5_CORE_EN_DCB
-	if (MLX5_CAP_GEN(mdev, vport_group_manager) && MLX5_CAP_GEN(mdev, qos))
-		netdev->dcbnl_ops = &mlx5e_dcbnl_ops;
-#endif
+	mlx5e_dcbnl_build_netdev(netdev);
 
 	netdev->watchdog_timeo    = 15 * HZ;
 
@@ -5218,9 +5214,7 @@ static int mlx5e_init_nic_tx(struct mlx5e_priv *priv)
 		return err;
 	}
 
-#ifdef CONFIG_MLX5_CORE_EN_DCB
 	mlx5e_dcbnl_initialize(priv);
-#endif
 	return 0;
 }
 
@@ -5247,9 +5241,7 @@ static void mlx5e_nic_enable(struct mlx5e_priv *priv)
 	mlx5e_hv_vhca_stats_create(priv);
 	if (netdev->reg_state != NETREG_REGISTERED)
 		return;
-#ifdef CONFIG_MLX5_CORE_EN_DCB
 	mlx5e_dcbnl_init_app(priv);
-#endif
 
 	queue_work(priv->wq, &priv->set_rx_mode_work);
 
@@ -5264,10 +5256,8 @@ static void mlx5e_nic_disable(struct mlx5e_priv *priv)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
 
-#ifdef CONFIG_MLX5_CORE_EN_DCB
 	if (priv->netdev->reg_state == NETREG_REGISTERED)
 		mlx5e_dcbnl_delete_app(priv);
-#endif
 
 	rtnl_lock();
 	if (netif_running(priv->netdev))
@@ -5564,9 +5554,7 @@ static void *mlx5e_add(struct mlx5_core_dev *mdev)
 
 	mlx5e_devlink_port_type_eth_set(priv);
 
-#ifdef CONFIG_MLX5_CORE_EN_DCB
 	mlx5e_dcbnl_init_app(priv);
-#endif
 	return priv;
 
 err_devlink_port_unregister:
@@ -5589,9 +5577,7 @@ static void mlx5e_remove(struct mlx5_core_dev *mdev, void *vpriv)
 	}
 #endif
 	priv = vpriv;
-#ifdef CONFIG_MLX5_CORE_EN_DCB
 	mlx5e_dcbnl_delete_app(priv);
-#endif
 	unregister_netdev(priv->netdev);
 	mlx5e_devlink_port_unregister(priv);
 	mlx5e_detach(mdev, vpriv);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 1eac7a53d56f..52351c105627 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -1544,10 +1544,7 @@ static void mlx5e_build_rep_netdev(struct net_device *netdev)
 		/* we want a persistent mac for the uplink rep */
 		mlx5_query_mac_address(mdev, netdev->dev_addr);
 		netdev->ethtool_ops = &mlx5e_uplink_rep_ethtool_ops;
-#ifdef CONFIG_MLX5_CORE_EN_DCB
-		if (MLX5_CAP_GEN(mdev, qos))
-			netdev->dcbnl_ops = &mlx5e_dcbnl_ops;
-#endif
+		mlx5e_dcbnl_build_rep_netdev(netdev);
 	} else {
 		netdev->netdev_ops = &mlx5e_netdev_ops_rep;
 		eth_hw_addr_random(netdev);
@@ -1929,10 +1926,8 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
 	mlx5_lag_add(mdev, netdev);
 	priv->events_nb.notifier_call = uplink_rep_async_event;
 	mlx5_notifier_register(mdev, &priv->events_nb);
-#ifdef CONFIG_MLX5_CORE_EN_DCB
 	mlx5e_dcbnl_initialize(priv);
 	mlx5e_dcbnl_init_app(priv);
-#endif
 }
 
 static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv)
@@ -1940,9 +1935,7 @@ static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv)
 	struct mlx5_core_dev *mdev = priv->mdev;
 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
 
-#ifdef CONFIG_MLX5_CORE_EN_DCB
 	mlx5e_dcbnl_delete_app(priv);
-#endif
 	mlx5_notifier_unregister(mdev, &priv->events_nb);
 	cancel_work_sync(&rpriv->uplink_priv.reoffload_flows_work);
 	mlx5_lag_remove(mdev);
-- 
cgit v1.2.3-59-g8ed1b


From 2ec0616e870f0f2aa8353e0de057f0c2dc8d52d5 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Sat, 16 May 2020 00:39:18 +0200
Subject: bpf: Fix check_return_code to only allow [0,1] in trace_iter progs

As per 15d83c4d7cef ("bpf: Allow loading of a bpf_iter program") we only
allow a range of [0,1] for return codes. Therefore BPF_TRACE_ITER relies
on the default tnum_range(0, 1) which is set in range var. On recent merge
of net into net-next commit e92888c72fbd ("bpf: Enforce returning 0 for
fentry/fexit progs") got pulled in and caused a merge conflict with the
changes from 15d83c4d7cef. The resolution had a snall hiccup in that it
removed the [0,1] range restriction again so that BPF_TRACE_ITER would
have no enforcement. Fix it by adding it back.

Fixes: da07f52d3caf ("Merge git://git.kernel.org/pub/scm/linux/kernel/git/netdev/net")
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/verifier.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 25b14ee0e26d..9c7d67d65d8c 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -7120,10 +7120,11 @@ static int check_return_code(struct bpf_verifier_env *env)
 		case BPF_TRACE_FEXIT:
 			range = tnum_const(0);
 			break;
-		case BPF_TRACE_ITER:
 		case BPF_TRACE_RAW_TP:
 		case BPF_MODIFY_RETURN:
 			return 0;
+		case BPF_TRACE_ITER:
+			break;
 		default:
 			return -ENOTSUPP;
 		}
-- 
cgit v1.2.3-59-g8ed1b


From f516acd5397fdbb77ef0aad0798d9ef7c3001d72 Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Fri, 15 May 2020 09:50:02 -0700
Subject: libbpf, hashmap: Remove unused #include

Remove #include of libbpf_internal.h that is unused.

Discussed in this thread:
https://lore.kernel.org/lkml/CAEf4BzZRmiEds_8R8g4vaAeWvJzPb4xYLnpF0X2VNY8oTzkphQ@mail.gmail.com/

Signed-off-by: Ian Rogers <irogers@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200515165007.217120-3-irogers@google.com
---
 tools/lib/bpf/hashmap.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tools/lib/bpf/hashmap.h b/tools/lib/bpf/hashmap.h
index bae8879cdf58..e823b35e7371 100644
--- a/tools/lib/bpf/hashmap.h
+++ b/tools/lib/bpf/hashmap.h
@@ -15,7 +15,6 @@
 #else
 #include <bits/reg.h>
 #endif
-#include "libbpf_internal.h"
 
 static inline size_t hash_bits(size_t h, int bits)
 {
-- 
cgit v1.2.3-59-g8ed1b


From 8d35d74f52ae07689e575ea21f7dc2e07dd1392f Mon Sep 17 00:00:00 2001
From: Ian Rogers <irogers@google.com>
Date: Fri, 15 May 2020 09:50:03 -0700
Subject: libbpf, hashmap: Fix signedness warnings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fixes the following warnings:

  hashmap.c: In function ‘hashmap__clear’:
  hashmap.h:150:20: error: comparison of integer expressions of different signedness: ‘int’ and ‘size_t’ {aka ‘long unsigned int’} [-Werror=sign-compare]
    150 |  for (bkt = 0; bkt < map->cap; bkt++)        \

  hashmap.c: In function ‘hashmap_grow’:
  hashmap.h:150:20: error: comparison of integer expressions of different signedness: ‘int’ and ‘size_t’ {aka ‘long unsigned int’} [-Werror=sign-compare]
    150 |  for (bkt = 0; bkt < map->cap; bkt++)        \

Signed-off-by: Ian Rogers <irogers@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200515165007.217120-4-irogers@google.com
---
 tools/lib/bpf/hashmap.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/tools/lib/bpf/hashmap.c b/tools/lib/bpf/hashmap.c
index cffb96202e0d..a405dad068f5 100644
--- a/tools/lib/bpf/hashmap.c
+++ b/tools/lib/bpf/hashmap.c
@@ -60,7 +60,7 @@ struct hashmap *hashmap__new(hashmap_hash_fn hash_fn,
 void hashmap__clear(struct hashmap *map)
 {
 	struct hashmap_entry *cur, *tmp;
-	int bkt;
+	size_t bkt;
 
 	hashmap__for_each_entry_safe(map, cur, tmp, bkt) {
 		free(cur);
@@ -100,8 +100,7 @@ static int hashmap_grow(struct hashmap *map)
 	struct hashmap_entry **new_buckets;
 	struct hashmap_entry *cur, *tmp;
 	size_t new_cap_bits, new_cap;
-	size_t h;
-	int bkt;
+	size_t h, bkt;
 
 	new_cap_bits = map->cap_bits + 1;
 	if (new_cap_bits < HASHMAP_MIN_CAP_BITS)
-- 
cgit v1.2.3-59-g8ed1b


From 5366d2269139ba8eb6a906d73a0819947e3e4e0a Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Fri, 15 May 2020 12:49:03 -0700
Subject: selftests/bpf: Fix test_align verifier log patterns

Commit 294f2fc6da27 ("bpf: Verifer, adjust_scalar_min_max_vals to always
call update_reg_bounds()") changed the way verifier logs some of its state,
adjust the test_align accordingly. Where possible, I tried to not copy-paste
the entire log line and resorted to dropping the last closing brace instead.

Fixes: 294f2fc6da27 ("bpf: Verifer, adjust_scalar_min_max_vals to always call update_reg_bounds()")
Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200515194904.229296-1-sdf@google.com
---
 tools/testing/selftests/bpf/test_align.c | 41 ++++++++++++++++----------------
 1 file changed, 21 insertions(+), 20 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/test_align.c
index 0262f7b374f9..c9c9bdce9d6d 100644
--- a/tools/testing/selftests/bpf/test_align.c
+++ b/tools/testing/selftests/bpf/test_align.c
@@ -359,15 +359,15 @@ static struct bpf_align_test tests[] = {
 			 * is still (4n), fixed offset is not changed.
 			 * Also, we create a new reg->id.
 			 */
-			{29, "R5_w=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc))"},
+			{29, "R5_w=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc)"},
 			/* At the time the word size load is performed from R5,
 			 * its total fixed offset is NET_IP_ALIGN + reg->off (18)
 			 * which is 20.  Then the variable offset is (4n), so
 			 * the total offset is 4-byte aligned and meets the
 			 * load's requirements.
 			 */
-			{33, "R4=pkt(id=4,off=22,r=22,umax_value=2040,var_off=(0x0; 0x7fc))"},
-			{33, "R5=pkt(id=4,off=18,r=22,umax_value=2040,var_off=(0x0; 0x7fc))"},
+			{33, "R4=pkt(id=4,off=22,r=22,umax_value=2040,var_off=(0x0; 0x7fc)"},
+			{33, "R5=pkt(id=4,off=18,r=22,umax_value=2040,var_off=(0x0; 0x7fc)"},
 		},
 	},
 	{
@@ -410,15 +410,15 @@ static struct bpf_align_test tests[] = {
 			/* Adding 14 makes R6 be (4n+2) */
 			{9, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
 			/* Packet pointer has (4n+2) offset */
-			{11, "R5_w=pkt(id=1,off=0,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
-			{13, "R4=pkt(id=1,off=4,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+			{11, "R5_w=pkt(id=1,off=0,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc)"},
+			{13, "R4=pkt(id=1,off=4,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc)"},
 			/* At the time the word size load is performed from R5,
 			 * its total fixed offset is NET_IP_ALIGN + reg->off (0)
 			 * which is 2.  Then the variable offset is (4n+2), so
 			 * the total offset is 4-byte aligned and meets the
 			 * load's requirements.
 			 */
-			{15, "R5=pkt(id=1,off=0,r=4,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+			{15, "R5=pkt(id=1,off=0,r=4,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc)"},
 			/* Newly read value in R6 was shifted left by 2, so has
 			 * known alignment of 4.
 			 */
@@ -426,15 +426,15 @@ static struct bpf_align_test tests[] = {
 			/* Added (4n) to packet pointer's (4n+2) var_off, giving
 			 * another (4n+2).
 			 */
-			{19, "R5_w=pkt(id=2,off=0,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"},
-			{21, "R4=pkt(id=2,off=4,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"},
+			{19, "R5_w=pkt(id=2,off=0,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc)"},
+			{21, "R4=pkt(id=2,off=4,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc)"},
 			/* At the time the word size load is performed from R5,
 			 * its total fixed offset is NET_IP_ALIGN + reg->off (0)
 			 * which is 2.  Then the variable offset is (4n+2), so
 			 * the total offset is 4-byte aligned and meets the
 			 * load's requirements.
 			 */
-			{23, "R5=pkt(id=2,off=0,r=4,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc))"},
+			{23, "R5=pkt(id=2,off=0,r=4,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc)"},
 		},
 	},
 	{
@@ -469,16 +469,16 @@ static struct bpf_align_test tests[] = {
 		.matches = {
 			{4, "R5_w=pkt_end(id=0,off=0,imm=0)"},
 			/* (ptr - ptr) << 2 == unknown, (4n) */
-			{6, "R5_w=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc))"},
+			{6, "R5_w=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc)"},
 			/* (4n) + 14 == (4n+2).  We blow our bounds, because
 			 * the add could overflow.
 			 */
-			{7, "R5_w=inv(id=0,var_off=(0x2; 0xfffffffffffffffc))"},
+			{7, "R5_w=inv(id=0,smin_value=-9223372036854775806,smax_value=9223372036854775806,umin_value=2,umax_value=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"},
 			/* Checked s>=0 */
-			{9, "R5=inv(id=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+			{9, "R5=inv(id=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"},
 			/* packet pointer + nonnegative (4n+2) */
-			{11, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
-			{13, "R4_w=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+			{11, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"},
+			{13, "R4_w=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"},
 			/* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine.
 			 * We checked the bounds, but it might have been able
 			 * to overflow if the packet pointer started in the
@@ -486,7 +486,7 @@ static struct bpf_align_test tests[] = {
 			 * So we did not get a 'range' on R6, and the access
 			 * attempt will fail.
 			 */
-			{15, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372036854775806,var_off=(0x2; 0x7ffffffffffffffc))"},
+			{15, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"},
 		}
 	},
 	{
@@ -528,7 +528,7 @@ static struct bpf_align_test tests[] = {
 			/* New unknown value in R7 is (4n) */
 			{11, "R7_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
 			/* Subtracting it from R6 blows our unsigned bounds */
-			{12, "R6=inv(id=0,smin_value=-1006,smax_value=1034,var_off=(0x2; 0xfffffffffffffffc))"},
+			{12, "R6=inv(id=0,smin_value=-1006,smax_value=1034,umin_value=2,umax_value=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"},
 			/* Checked s>= 0 */
 			{14, "R6=inv(id=0,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc))"},
 			/* At the time the word size load is performed from R5,
@@ -537,7 +537,8 @@ static struct bpf_align_test tests[] = {
 			 * the total offset is 4-byte aligned and meets the
 			 * load's requirements.
 			 */
-			{20, "R5=pkt(id=1,off=0,r=4,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc))"},
+			{20, "R5=pkt(id=1,off=0,r=4,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc)"},
+
 		},
 	},
 	{
@@ -579,18 +580,18 @@ static struct bpf_align_test tests[] = {
 			/* Adding 14 makes R6 be (4n+2) */
 			{11, "R6_w=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"},
 			/* Subtracting from packet pointer overflows ubounds */
-			{13, "R5_w=pkt(id=1,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c))"},
+			{13, "R5_w=pkt(id=1,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c)"},
 			/* New unknown value in R7 is (4n), >= 76 */
 			{15, "R7_w=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"},
 			/* Adding it to packet pointer gives nice bounds again */
-			{16, "R5_w=pkt(id=2,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0x7fc))"},
+			{16, "R5_w=pkt(id=2,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"},
 			/* At the time the word size load is performed from R5,
 			 * its total fixed offset is NET_IP_ALIGN + reg->off (0)
 			 * which is 2.  Then the variable offset is (4n+2), so
 			 * the total offset is 4-byte aligned and meets the
 			 * load's requirements.
 			 */
-			{20, "R5=pkt(id=2,off=0,r=4,umin_value=2,umax_value=1082,var_off=(0x2; 0x7fc))"},
+			{20, "R5=pkt(id=2,off=0,r=4,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"},
 		},
 	},
 };
-- 
cgit v1.2.3-59-g8ed1b


From 3b09d27cc93d584f49bc18f1e1696ba19d43233a Mon Sep 17 00:00:00 2001
From: Stanislav Fomichev <sdf@google.com>
Date: Fri, 15 May 2020 12:49:04 -0700
Subject: selftests/bpf: Move test_align under test_progs

There is a much higher chance we can see the regressions if the
test is part of test_progs.

Signed-off-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200515194904.229296-2-sdf@google.com
---
 tools/testing/selftests/bpf/prog_tests/align.c | 666 +++++++++++++++++++++++
 tools/testing/selftests/bpf/test_align.c       | 720 -------------------------
 2 files changed, 666 insertions(+), 720 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/align.c
 delete mode 100644 tools/testing/selftests/bpf/test_align.c

diff --git a/tools/testing/selftests/bpf/prog_tests/align.c b/tools/testing/selftests/bpf/prog_tests/align.c
new file mode 100644
index 000000000000..c548aded6585
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/align.c
@@ -0,0 +1,666 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+
+#define MAX_INSNS	512
+#define MAX_MATCHES	16
+
+struct bpf_reg_match {
+	unsigned int line;
+	const char *match;
+};
+
+struct bpf_align_test {
+	const char *descr;
+	struct bpf_insn	insns[MAX_INSNS];
+	enum {
+		UNDEF,
+		ACCEPT,
+		REJECT
+	} result;
+	enum bpf_prog_type prog_type;
+	/* Matches must be in order of increasing line */
+	struct bpf_reg_match matches[MAX_MATCHES];
+};
+
+static struct bpf_align_test tests[] = {
+	/* Four tests of known constants.  These aren't staggeringly
+	 * interesting since we track exact values now.
+	 */
+	{
+		.descr = "mov",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_3, 2),
+			BPF_MOV64_IMM(BPF_REG_3, 4),
+			BPF_MOV64_IMM(BPF_REG_3, 8),
+			BPF_MOV64_IMM(BPF_REG_3, 16),
+			BPF_MOV64_IMM(BPF_REG_3, 32),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.matches = {
+			{1, "R1=ctx(id=0,off=0,imm=0)"},
+			{1, "R10=fp0"},
+			{1, "R3_w=inv2"},
+			{2, "R3_w=inv4"},
+			{3, "R3_w=inv8"},
+			{4, "R3_w=inv16"},
+			{5, "R3_w=inv32"},
+		},
+	},
+	{
+		.descr = "shift",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_3, 1),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_3, 4),
+			BPF_MOV64_IMM(BPF_REG_4, 32),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.matches = {
+			{1, "R1=ctx(id=0,off=0,imm=0)"},
+			{1, "R10=fp0"},
+			{1, "R3_w=inv1"},
+			{2, "R3_w=inv2"},
+			{3, "R3_w=inv4"},
+			{4, "R3_w=inv8"},
+			{5, "R3_w=inv16"},
+			{6, "R3_w=inv1"},
+			{7, "R4_w=inv32"},
+			{8, "R4_w=inv16"},
+			{9, "R4_w=inv8"},
+			{10, "R4_w=inv4"},
+			{11, "R4_w=inv2"},
+		},
+	},
+	{
+		.descr = "addsub",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_3, 4),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 4),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 2),
+			BPF_MOV64_IMM(BPF_REG_4, 8),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.matches = {
+			{1, "R1=ctx(id=0,off=0,imm=0)"},
+			{1, "R10=fp0"},
+			{1, "R3_w=inv4"},
+			{2, "R3_w=inv8"},
+			{3, "R3_w=inv10"},
+			{4, "R4_w=inv8"},
+			{5, "R4_w=inv12"},
+			{6, "R4_w=inv14"},
+		},
+	},
+	{
+		.descr = "mul",
+		.insns = {
+			BPF_MOV64_IMM(BPF_REG_3, 7),
+			BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, 1),
+			BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, 2),
+			BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, 4),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.matches = {
+			{1, "R1=ctx(id=0,off=0,imm=0)"},
+			{1, "R10=fp0"},
+			{1, "R3_w=inv7"},
+			{2, "R3_w=inv7"},
+			{3, "R3_w=inv14"},
+			{4, "R3_w=inv56"},
+		},
+	},
+
+	/* Tests using unknown values */
+#define PREP_PKT_POINTERS \
+	BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, \
+		    offsetof(struct __sk_buff, data)), \
+	BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, \
+		    offsetof(struct __sk_buff, data_end))
+
+#define LOAD_UNKNOWN(DST_REG) \
+	PREP_PKT_POINTERS, \
+	BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), \
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), \
+	BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 1), \
+	BPF_EXIT_INSN(), \
+	BPF_LDX_MEM(BPF_B, DST_REG, BPF_REG_2, 0)
+
+	{
+		.descr = "unknown shift",
+		.insns = {
+			LOAD_UNKNOWN(BPF_REG_3),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
+			LOAD_UNKNOWN(BPF_REG_4),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_4, 5),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
+			BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.matches = {
+			{7, "R0_w=pkt(id=0,off=8,r=8,imm=0)"},
+			{7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{8, "R3_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
+			{9, "R3_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			{10, "R3_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
+			{11, "R3_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
+			{18, "R3=pkt_end(id=0,off=0,imm=0)"},
+			{18, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{19, "R4_w=inv(id=0,umax_value=8160,var_off=(0x0; 0x1fe0))"},
+			{20, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
+			{21, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
+			{22, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			{23, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
+		},
+	},
+	{
+		.descr = "unknown mul",
+		.insns = {
+			LOAD_UNKNOWN(BPF_REG_3),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_3),
+			BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 1),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_3),
+			BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 2),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_3),
+			BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 4),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_3),
+			BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 8),
+			BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 2),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.matches = {
+			{7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{8, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{9, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{10, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{11, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
+			{12, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{13, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			{14, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{15, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
+			{16, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
+		},
+	},
+	{
+		.descr = "packet const offset",
+		.insns = {
+			PREP_PKT_POINTERS,
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+
+			/* Skip over ethernet header.  */
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+			BPF_EXIT_INSN(),
+
+			BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_5, 0),
+			BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_5, 1),
+			BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_5, 2),
+			BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_5, 3),
+			BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_5, 0),
+			BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_5, 2),
+			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0),
+
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.matches = {
+			{4, "R5_w=pkt(id=0,off=0,r=0,imm=0)"},
+			{5, "R5_w=pkt(id=0,off=14,r=0,imm=0)"},
+			{6, "R4_w=pkt(id=0,off=14,r=0,imm=0)"},
+			{10, "R2=pkt(id=0,off=0,r=18,imm=0)"},
+			{10, "R5=pkt(id=0,off=14,r=18,imm=0)"},
+			{10, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
+			{14, "R4_w=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"},
+			{15, "R4_w=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"},
+		},
+	},
+	{
+		.descr = "packet variable offset",
+		.insns = {
+			LOAD_UNKNOWN(BPF_REG_6),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2),
+
+			/* First, add a constant to the R5 packet pointer,
+			 * then a variable with a known alignment.
+			 */
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0),
+
+			/* Now, test in the other direction.  Adding first
+			 * the variable offset to R5, then the constant.
+			 */
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0),
+
+			/* Test multiple accumulations of unknown values
+			 * into a packet pointer.
+			 */
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 4),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0),
+
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.matches = {
+			/* Calculated offset in R6 has unknown value, but known
+			 * alignment of 4.
+			 */
+			{8, "R2_w=pkt(id=0,off=0,r=8,imm=0)"},
+			{8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			/* Offset is added to packet pointer R5, resulting in
+			 * known fixed offset, and variable offset from R6.
+			 */
+			{11, "R5_w=pkt(id=1,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			/* At the time the word size load is performed from R5,
+			 * it's total offset is NET_IP_ALIGN + reg->off (0) +
+			 * reg->aux_off (14) which is 16.  Then the variable
+			 * offset is considered using reg->aux_off_align which
+			 * is 4 and meets the load's requirements.
+			 */
+			{15, "R4=pkt(id=1,off=18,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			{15, "R5=pkt(id=1,off=14,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			/* Variable offset is added to R5 packet pointer,
+			 * resulting in auxiliary alignment of 4.
+			 */
+			{18, "R5_w=pkt(id=2,off=0,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			/* Constant offset is added to R5, resulting in
+			 * reg->off of 14.
+			 */
+			{19, "R5_w=pkt(id=2,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			/* At the time the word size load is performed from R5,
+			 * its total fixed offset is NET_IP_ALIGN + reg->off
+			 * (14) which is 16.  Then the variable offset is 4-byte
+			 * aligned, so the total offset is 4-byte aligned and
+			 * meets the load's requirements.
+			 */
+			{23, "R4=pkt(id=2,off=18,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			{23, "R5=pkt(id=2,off=14,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			/* Constant offset is added to R5 packet pointer,
+			 * resulting in reg->off value of 14.
+			 */
+			{26, "R5_w=pkt(id=0,off=14,r=8"},
+			/* Variable offset is added to R5, resulting in a
+			 * variable offset of (4n).
+			 */
+			{27, "R5_w=pkt(id=3,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			/* Constant is added to R5 again, setting reg->off to 18. */
+			{28, "R5_w=pkt(id=3,off=18,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			/* And once more we add a variable; resulting var_off
+			 * is still (4n), fixed offset is not changed.
+			 * Also, we create a new reg->id.
+			 */
+			{29, "R5_w=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc)"},
+			/* At the time the word size load is performed from R5,
+			 * its total fixed offset is NET_IP_ALIGN + reg->off (18)
+			 * which is 20.  Then the variable offset is (4n), so
+			 * the total offset is 4-byte aligned and meets the
+			 * load's requirements.
+			 */
+			{33, "R4=pkt(id=4,off=22,r=22,umax_value=2040,var_off=(0x0; 0x7fc)"},
+			{33, "R5=pkt(id=4,off=18,r=22,umax_value=2040,var_off=(0x0; 0x7fc)"},
+		},
+	},
+	{
+		.descr = "packet variable offset 2",
+		.insns = {
+			/* Create an unknown offset, (4n+2)-aligned */
+			LOAD_UNKNOWN(BPF_REG_6),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 14),
+			/* Add it to the packet pointer */
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+			/* Check bounds and perform a read */
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_5, 0),
+			/* Make a (4n) offset from the value we just read */
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 0xff),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2),
+			/* Add it to the packet pointer */
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+			/* Check bounds and perform a read */
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_5, 0),
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.matches = {
+			/* Calculated offset in R6 has unknown value, but known
+			 * alignment of 4.
+			 */
+			{8, "R2_w=pkt(id=0,off=0,r=8,imm=0)"},
+			{8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			/* Adding 14 makes R6 be (4n+2) */
+			{9, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+			/* Packet pointer has (4n+2) offset */
+			{11, "R5_w=pkt(id=1,off=0,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc)"},
+			{13, "R4=pkt(id=1,off=4,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc)"},
+			/* At the time the word size load is performed from R5,
+			 * its total fixed offset is NET_IP_ALIGN + reg->off (0)
+			 * which is 2.  Then the variable offset is (4n+2), so
+			 * the total offset is 4-byte aligned and meets the
+			 * load's requirements.
+			 */
+			{15, "R5=pkt(id=1,off=0,r=4,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc)"},
+			/* Newly read value in R6 was shifted left by 2, so has
+			 * known alignment of 4.
+			 */
+			{18, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			/* Added (4n) to packet pointer's (4n+2) var_off, giving
+			 * another (4n+2).
+			 */
+			{19, "R5_w=pkt(id=2,off=0,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc)"},
+			{21, "R4=pkt(id=2,off=4,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc)"},
+			/* At the time the word size load is performed from R5,
+			 * its total fixed offset is NET_IP_ALIGN + reg->off (0)
+			 * which is 2.  Then the variable offset is (4n+2), so
+			 * the total offset is 4-byte aligned and meets the
+			 * load's requirements.
+			 */
+			{23, "R5=pkt(id=2,off=0,r=4,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc)"},
+		},
+	},
+	{
+		.descr = "dubious pointer arithmetic",
+		.insns = {
+			PREP_PKT_POINTERS,
+			BPF_MOV64_IMM(BPF_REG_0, 0),
+			/* (ptr - ptr) << 2 */
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_3),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_5, BPF_REG_2),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_5, 2),
+			/* We have a (4n) value.  Let's make a packet offset
+			 * out of it.  First add 14, to make it a (4n+2)
+			 */
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
+			/* Then make sure it's nonnegative */
+			BPF_JMP_IMM(BPF_JSGE, BPF_REG_5, 0, 1),
+			BPF_EXIT_INSN(),
+			/* Add it to packet pointer */
+			BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5),
+			/* Check bounds and perform a read */
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_6, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.result = REJECT,
+		.matches = {
+			{4, "R5_w=pkt_end(id=0,off=0,imm=0)"},
+			/* (ptr - ptr) << 2 == unknown, (4n) */
+			{6, "R5_w=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc)"},
+			/* (4n) + 14 == (4n+2).  We blow our bounds, because
+			 * the add could overflow.
+			 */
+			{7, "R5_w=inv(id=0,smin_value=-9223372036854775806,smax_value=9223372036854775806,umin_value=2,umax_value=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"},
+			/* Checked s>=0 */
+			{9, "R5=inv(id=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"},
+			/* packet pointer + nonnegative (4n+2) */
+			{11, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"},
+			{13, "R4_w=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"},
+			/* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine.
+			 * We checked the bounds, but it might have been able
+			 * to overflow if the packet pointer started in the
+			 * upper half of the address space.
+			 * So we did not get a 'range' on R6, and the access
+			 * attempt will fail.
+			 */
+			{15, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"},
+		}
+	},
+	{
+		.descr = "variable subtraction",
+		.insns = {
+			/* Create an unknown offset, (4n+2)-aligned */
+			LOAD_UNKNOWN(BPF_REG_6),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 14),
+			/* Create another unknown, (4n)-aligned, and subtract
+			 * it from the first one
+			 */
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_7, 2),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_6, BPF_REG_7),
+			/* Bounds-check the result */
+			BPF_JMP_IMM(BPF_JSGE, BPF_REG_6, 0, 1),
+			BPF_EXIT_INSN(),
+			/* Add it to the packet pointer */
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
+			/* Check bounds and perform a read */
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_5, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.matches = {
+			/* Calculated offset in R6 has unknown value, but known
+			 * alignment of 4.
+			 */
+			{7, "R2_w=pkt(id=0,off=0,r=8,imm=0)"},
+			{9, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			/* Adding 14 makes R6 be (4n+2) */
+			{10, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
+			/* New unknown value in R7 is (4n) */
+			{11, "R7_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
+			/* Subtracting it from R6 blows our unsigned bounds */
+			{12, "R6=inv(id=0,smin_value=-1006,smax_value=1034,umin_value=2,umax_value=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"},
+			/* Checked s>= 0 */
+			{14, "R6=inv(id=0,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc))"},
+			/* At the time the word size load is performed from R5,
+			 * its total fixed offset is NET_IP_ALIGN + reg->off (0)
+			 * which is 2.  Then the variable offset is (4n+2), so
+			 * the total offset is 4-byte aligned and meets the
+			 * load's requirements.
+			 */
+			{20, "R5=pkt(id=1,off=0,r=4,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc)"},
+
+		},
+	},
+	{
+		.descr = "pointer variable subtraction",
+		.insns = {
+			/* Create an unknown offset, (4n+2)-aligned and bounded
+			 * to [14,74]
+			 */
+			LOAD_UNKNOWN(BPF_REG_6),
+			BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
+			BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 0xf),
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 14),
+			/* Subtract it from the packet pointer */
+			BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
+			BPF_ALU64_REG(BPF_SUB, BPF_REG_5, BPF_REG_6),
+			/* Create another unknown, (4n)-aligned and >= 74.
+			 * That in fact means >= 76, since 74 % 4 == 2
+			 */
+			BPF_ALU64_IMM(BPF_LSH, BPF_REG_7, 2),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 76),
+			/* Add it to the packet pointer */
+			BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_7),
+			/* Check bounds and perform a read */
+			BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
+			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
+			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
+			BPF_EXIT_INSN(),
+			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_5, 0),
+			BPF_EXIT_INSN(),
+		},
+		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+		.matches = {
+			/* Calculated offset in R6 has unknown value, but known
+			 * alignment of 4.
+			 */
+			{7, "R2_w=pkt(id=0,off=0,r=8,imm=0)"},
+			{10, "R6_w=inv(id=0,umax_value=60,var_off=(0x0; 0x3c))"},
+			/* Adding 14 makes R6 be (4n+2) */
+			{11, "R6_w=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"},
+			/* Subtracting from packet pointer overflows ubounds */
+			{13, "R5_w=pkt(id=1,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c)"},
+			/* New unknown value in R7 is (4n), >= 76 */
+			{15, "R7_w=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"},
+			/* Adding it to packet pointer gives nice bounds again */
+			{16, "R5_w=pkt(id=2,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"},
+			/* At the time the word size load is performed from R5,
+			 * its total fixed offset is NET_IP_ALIGN + reg->off (0)
+			 * which is 2.  Then the variable offset is (4n+2), so
+			 * the total offset is 4-byte aligned and meets the
+			 * load's requirements.
+			 */
+			{20, "R5=pkt(id=2,off=0,r=4,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"},
+		},
+	},
+};
+
+static int probe_filter_length(const struct bpf_insn *fp)
+{
+	int len;
+
+	for (len = MAX_INSNS - 1; len > 0; --len)
+		if (fp[len].code != 0 || fp[len].imm != 0)
+			break;
+	return len + 1;
+}
+
+static char bpf_vlog[32768];
+
+static int do_test_single(struct bpf_align_test *test)
+{
+	struct bpf_insn *prog = test->insns;
+	int prog_type = test->prog_type;
+	char bpf_vlog_copy[32768];
+	const char *line_ptr;
+	int cur_line = -1;
+	int prog_len, i;
+	int fd_prog;
+	int ret;
+
+	prog_len = probe_filter_length(prog);
+	fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER,
+				     prog, prog_len, BPF_F_STRICT_ALIGNMENT,
+				     "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 2);
+	if (fd_prog < 0 && test->result != REJECT) {
+		printf("Failed to load program.\n");
+		printf("%s", bpf_vlog);
+		ret = 1;
+	} else if (fd_prog >= 0 && test->result == REJECT) {
+		printf("Unexpected success to load!\n");
+		printf("%s", bpf_vlog);
+		ret = 1;
+		close(fd_prog);
+	} else {
+		ret = 0;
+		/* We make a local copy so that we can strtok() it */
+		strncpy(bpf_vlog_copy, bpf_vlog, sizeof(bpf_vlog_copy));
+		line_ptr = strtok(bpf_vlog_copy, "\n");
+		for (i = 0; i < MAX_MATCHES; i++) {
+			struct bpf_reg_match m = test->matches[i];
+
+			if (!m.match)
+				break;
+			while (line_ptr) {
+				cur_line = -1;
+				sscanf(line_ptr, "%u: ", &cur_line);
+				if (cur_line == m.line)
+					break;
+				line_ptr = strtok(NULL, "\n");
+			}
+			if (!line_ptr) {
+				printf("Failed to find line %u for match: %s\n",
+				       m.line, m.match);
+				ret = 1;
+				printf("%s", bpf_vlog);
+				break;
+			}
+			if (!strstr(line_ptr, m.match)) {
+				printf("Failed to find match %u: %s\n",
+				       m.line, m.match);
+				ret = 1;
+				printf("%s", bpf_vlog);
+				break;
+			}
+		}
+		if (fd_prog >= 0)
+			close(fd_prog);
+	}
+	return ret;
+}
+
+void test_align(void)
+{
+	unsigned int i;
+
+	for (i = 0; i < ARRAY_SIZE(tests); i++) {
+		struct bpf_align_test *test = &tests[i];
+
+		if (!test__start_subtest(test->descr))
+			continue;
+
+		CHECK_FAIL(do_test_single(test));
+	}
+}
diff --git a/tools/testing/selftests/bpf/test_align.c b/tools/testing/selftests/bpf/test_align.c
deleted file mode 100644
index c9c9bdce9d6d..000000000000
--- a/tools/testing/selftests/bpf/test_align.c
+++ /dev/null
@@ -1,720 +0,0 @@
-#include <asm/types.h>
-#include <linux/types.h>
-#include <stdint.h>
-#include <stdio.h>
-#include <stdlib.h>
-#include <unistd.h>
-#include <errno.h>
-#include <string.h>
-#include <stddef.h>
-#include <stdbool.h>
-
-#include <linux/unistd.h>
-#include <linux/filter.h>
-#include <linux/bpf_perf_event.h>
-#include <linux/bpf.h>
-
-#include <bpf/bpf.h>
-
-#include "../../../include/linux/filter.h"
-#include "bpf_rlimit.h"
-#include "bpf_util.h"
-
-#define MAX_INSNS	512
-#define MAX_MATCHES	16
-
-struct bpf_reg_match {
-	unsigned int line;
-	const char *match;
-};
-
-struct bpf_align_test {
-	const char *descr;
-	struct bpf_insn	insns[MAX_INSNS];
-	enum {
-		UNDEF,
-		ACCEPT,
-		REJECT
-	} result;
-	enum bpf_prog_type prog_type;
-	/* Matches must be in order of increasing line */
-	struct bpf_reg_match matches[MAX_MATCHES];
-};
-
-static struct bpf_align_test tests[] = {
-	/* Four tests of known constants.  These aren't staggeringly
-	 * interesting since we track exact values now.
-	 */
-	{
-		.descr = "mov",
-		.insns = {
-			BPF_MOV64_IMM(BPF_REG_3, 2),
-			BPF_MOV64_IMM(BPF_REG_3, 4),
-			BPF_MOV64_IMM(BPF_REG_3, 8),
-			BPF_MOV64_IMM(BPF_REG_3, 16),
-			BPF_MOV64_IMM(BPF_REG_3, 32),
-			BPF_MOV64_IMM(BPF_REG_0, 0),
-			BPF_EXIT_INSN(),
-		},
-		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
-		.matches = {
-			{1, "R1=ctx(id=0,off=0,imm=0)"},
-			{1, "R10=fp0"},
-			{1, "R3_w=inv2"},
-			{2, "R3_w=inv4"},
-			{3, "R3_w=inv8"},
-			{4, "R3_w=inv16"},
-			{5, "R3_w=inv32"},
-		},
-	},
-	{
-		.descr = "shift",
-		.insns = {
-			BPF_MOV64_IMM(BPF_REG_3, 1),
-			BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
-			BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
-			BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
-			BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
-			BPF_ALU64_IMM(BPF_RSH, BPF_REG_3, 4),
-			BPF_MOV64_IMM(BPF_REG_4, 32),
-			BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
-			BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
-			BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
-			BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
-			BPF_MOV64_IMM(BPF_REG_0, 0),
-			BPF_EXIT_INSN(),
-		},
-		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
-		.matches = {
-			{1, "R1=ctx(id=0,off=0,imm=0)"},
-			{1, "R10=fp0"},
-			{1, "R3_w=inv1"},
-			{2, "R3_w=inv2"},
-			{3, "R3_w=inv4"},
-			{4, "R3_w=inv8"},
-			{5, "R3_w=inv16"},
-			{6, "R3_w=inv1"},
-			{7, "R4_w=inv32"},
-			{8, "R4_w=inv16"},
-			{9, "R4_w=inv8"},
-			{10, "R4_w=inv4"},
-			{11, "R4_w=inv2"},
-		},
-	},
-	{
-		.descr = "addsub",
-		.insns = {
-			BPF_MOV64_IMM(BPF_REG_3, 4),
-			BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 4),
-			BPF_ALU64_IMM(BPF_ADD, BPF_REG_3, 2),
-			BPF_MOV64_IMM(BPF_REG_4, 8),
-			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
-			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 2),
-			BPF_MOV64_IMM(BPF_REG_0, 0),
-			BPF_EXIT_INSN(),
-		},
-		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
-		.matches = {
-			{1, "R1=ctx(id=0,off=0,imm=0)"},
-			{1, "R10=fp0"},
-			{1, "R3_w=inv4"},
-			{2, "R3_w=inv8"},
-			{3, "R3_w=inv10"},
-			{4, "R4_w=inv8"},
-			{5, "R4_w=inv12"},
-			{6, "R4_w=inv14"},
-		},
-	},
-	{
-		.descr = "mul",
-		.insns = {
-			BPF_MOV64_IMM(BPF_REG_3, 7),
-			BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, 1),
-			BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, 2),
-			BPF_ALU64_IMM(BPF_MUL, BPF_REG_3, 4),
-			BPF_MOV64_IMM(BPF_REG_0, 0),
-			BPF_EXIT_INSN(),
-		},
-		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
-		.matches = {
-			{1, "R1=ctx(id=0,off=0,imm=0)"},
-			{1, "R10=fp0"},
-			{1, "R3_w=inv7"},
-			{2, "R3_w=inv7"},
-			{3, "R3_w=inv14"},
-			{4, "R3_w=inv56"},
-		},
-	},
-
-	/* Tests using unknown values */
-#define PREP_PKT_POINTERS \
-	BPF_LDX_MEM(BPF_W, BPF_REG_2, BPF_REG_1, \
-		    offsetof(struct __sk_buff, data)), \
-	BPF_LDX_MEM(BPF_W, BPF_REG_3, BPF_REG_1, \
-		    offsetof(struct __sk_buff, data_end))
-
-#define LOAD_UNKNOWN(DST_REG) \
-	PREP_PKT_POINTERS, \
-	BPF_MOV64_REG(BPF_REG_0, BPF_REG_2), \
-	BPF_ALU64_IMM(BPF_ADD, BPF_REG_0, 8), \
-	BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_0, 1), \
-	BPF_EXIT_INSN(), \
-	BPF_LDX_MEM(BPF_B, DST_REG, BPF_REG_2, 0)
-
-	{
-		.descr = "unknown shift",
-		.insns = {
-			LOAD_UNKNOWN(BPF_REG_3),
-			BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
-			BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
-			BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
-			BPF_ALU64_IMM(BPF_LSH, BPF_REG_3, 1),
-			LOAD_UNKNOWN(BPF_REG_4),
-			BPF_ALU64_IMM(BPF_LSH, BPF_REG_4, 5),
-			BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
-			BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
-			BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
-			BPF_ALU64_IMM(BPF_RSH, BPF_REG_4, 1),
-			BPF_MOV64_IMM(BPF_REG_0, 0),
-			BPF_EXIT_INSN(),
-		},
-		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
-		.matches = {
-			{7, "R0_w=pkt(id=0,off=8,r=8,imm=0)"},
-			{7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
-			{8, "R3_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
-			{9, "R3_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
-			{10, "R3_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
-			{11, "R3_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
-			{18, "R3=pkt_end(id=0,off=0,imm=0)"},
-			{18, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
-			{19, "R4_w=inv(id=0,umax_value=8160,var_off=(0x0; 0x1fe0))"},
-			{20, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
-			{21, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
-			{22, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
-			{23, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
-		},
-	},
-	{
-		.descr = "unknown mul",
-		.insns = {
-			LOAD_UNKNOWN(BPF_REG_3),
-			BPF_MOV64_REG(BPF_REG_4, BPF_REG_3),
-			BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 1),
-			BPF_MOV64_REG(BPF_REG_4, BPF_REG_3),
-			BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 2),
-			BPF_MOV64_REG(BPF_REG_4, BPF_REG_3),
-			BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 4),
-			BPF_MOV64_REG(BPF_REG_4, BPF_REG_3),
-			BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 8),
-			BPF_ALU64_IMM(BPF_MUL, BPF_REG_4, 2),
-			BPF_MOV64_IMM(BPF_REG_0, 0),
-			BPF_EXIT_INSN(),
-		},
-		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
-		.matches = {
-			{7, "R3_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
-			{8, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
-			{9, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
-			{10, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
-			{11, "R4_w=inv(id=0,umax_value=510,var_off=(0x0; 0x1fe))"},
-			{12, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
-			{13, "R4_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
-			{14, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
-			{15, "R4_w=inv(id=0,umax_value=2040,var_off=(0x0; 0x7f8))"},
-			{16, "R4_w=inv(id=0,umax_value=4080,var_off=(0x0; 0xff0))"},
-		},
-	},
-	{
-		.descr = "packet const offset",
-		.insns = {
-			PREP_PKT_POINTERS,
-			BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
-
-			BPF_MOV64_IMM(BPF_REG_0, 0),
-
-			/* Skip over ethernet header.  */
-			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
-			BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
-			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
-			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
-			BPF_EXIT_INSN(),
-
-			BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_5, 0),
-			BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_5, 1),
-			BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_5, 2),
-			BPF_LDX_MEM(BPF_B, BPF_REG_4, BPF_REG_5, 3),
-			BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_5, 0),
-			BPF_LDX_MEM(BPF_H, BPF_REG_4, BPF_REG_5, 2),
-			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0),
-
-			BPF_MOV64_IMM(BPF_REG_0, 0),
-			BPF_EXIT_INSN(),
-		},
-		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
-		.matches = {
-			{4, "R5_w=pkt(id=0,off=0,r=0,imm=0)"},
-			{5, "R5_w=pkt(id=0,off=14,r=0,imm=0)"},
-			{6, "R4_w=pkt(id=0,off=14,r=0,imm=0)"},
-			{10, "R2=pkt(id=0,off=0,r=18,imm=0)"},
-			{10, "R5=pkt(id=0,off=14,r=18,imm=0)"},
-			{10, "R4_w=inv(id=0,umax_value=255,var_off=(0x0; 0xff))"},
-			{14, "R4_w=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"},
-			{15, "R4_w=inv(id=0,umax_value=65535,var_off=(0x0; 0xffff))"},
-		},
-	},
-	{
-		.descr = "packet variable offset",
-		.insns = {
-			LOAD_UNKNOWN(BPF_REG_6),
-			BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2),
-
-			/* First, add a constant to the R5 packet pointer,
-			 * then a variable with a known alignment.
-			 */
-			BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
-			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
-			BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
-			BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
-			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
-			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
-			BPF_EXIT_INSN(),
-			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0),
-
-			/* Now, test in the other direction.  Adding first
-			 * the variable offset to R5, then the constant.
-			 */
-			BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
-			BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
-			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
-			BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
-			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
-			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
-			BPF_EXIT_INSN(),
-			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0),
-
-			/* Test multiple accumulations of unknown values
-			 * into a packet pointer.
-			 */
-			BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
-			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
-			BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
-			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 4),
-			BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
-			BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
-			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
-			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
-			BPF_EXIT_INSN(),
-			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_5, 0),
-
-			BPF_MOV64_IMM(BPF_REG_0, 0),
-			BPF_EXIT_INSN(),
-		},
-		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
-		.matches = {
-			/* Calculated offset in R6 has unknown value, but known
-			 * alignment of 4.
-			 */
-			{8, "R2_w=pkt(id=0,off=0,r=8,imm=0)"},
-			{8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
-			/* Offset is added to packet pointer R5, resulting in
-			 * known fixed offset, and variable offset from R6.
-			 */
-			{11, "R5_w=pkt(id=1,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
-			/* At the time the word size load is performed from R5,
-			 * it's total offset is NET_IP_ALIGN + reg->off (0) +
-			 * reg->aux_off (14) which is 16.  Then the variable
-			 * offset is considered using reg->aux_off_align which
-			 * is 4 and meets the load's requirements.
-			 */
-			{15, "R4=pkt(id=1,off=18,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
-			{15, "R5=pkt(id=1,off=14,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
-			/* Variable offset is added to R5 packet pointer,
-			 * resulting in auxiliary alignment of 4.
-			 */
-			{18, "R5_w=pkt(id=2,off=0,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
-			/* Constant offset is added to R5, resulting in
-			 * reg->off of 14.
-			 */
-			{19, "R5_w=pkt(id=2,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
-			/* At the time the word size load is performed from R5,
-			 * its total fixed offset is NET_IP_ALIGN + reg->off
-			 * (14) which is 16.  Then the variable offset is 4-byte
-			 * aligned, so the total offset is 4-byte aligned and
-			 * meets the load's requirements.
-			 */
-			{23, "R4=pkt(id=2,off=18,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
-			{23, "R5=pkt(id=2,off=14,r=18,umax_value=1020,var_off=(0x0; 0x3fc))"},
-			/* Constant offset is added to R5 packet pointer,
-			 * resulting in reg->off value of 14.
-			 */
-			{26, "R5_w=pkt(id=0,off=14,r=8"},
-			/* Variable offset is added to R5, resulting in a
-			 * variable offset of (4n).
-			 */
-			{27, "R5_w=pkt(id=3,off=14,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
-			/* Constant is added to R5 again, setting reg->off to 18. */
-			{28, "R5_w=pkt(id=3,off=18,r=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
-			/* And once more we add a variable; resulting var_off
-			 * is still (4n), fixed offset is not changed.
-			 * Also, we create a new reg->id.
-			 */
-			{29, "R5_w=pkt(id=4,off=18,r=0,umax_value=2040,var_off=(0x0; 0x7fc)"},
-			/* At the time the word size load is performed from R5,
-			 * its total fixed offset is NET_IP_ALIGN + reg->off (18)
-			 * which is 20.  Then the variable offset is (4n), so
-			 * the total offset is 4-byte aligned and meets the
-			 * load's requirements.
-			 */
-			{33, "R4=pkt(id=4,off=22,r=22,umax_value=2040,var_off=(0x0; 0x7fc)"},
-			{33, "R5=pkt(id=4,off=18,r=22,umax_value=2040,var_off=(0x0; 0x7fc)"},
-		},
-	},
-	{
-		.descr = "packet variable offset 2",
-		.insns = {
-			/* Create an unknown offset, (4n+2)-aligned */
-			LOAD_UNKNOWN(BPF_REG_6),
-			BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2),
-			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 14),
-			/* Add it to the packet pointer */
-			BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
-			BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
-			/* Check bounds and perform a read */
-			BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
-			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
-			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
-			BPF_EXIT_INSN(),
-			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_5, 0),
-			/* Make a (4n) offset from the value we just read */
-			BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 0xff),
-			BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2),
-			/* Add it to the packet pointer */
-			BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
-			/* Check bounds and perform a read */
-			BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
-			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
-			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
-			BPF_EXIT_INSN(),
-			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_5, 0),
-			BPF_MOV64_IMM(BPF_REG_0, 0),
-			BPF_EXIT_INSN(),
-		},
-		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
-		.matches = {
-			/* Calculated offset in R6 has unknown value, but known
-			 * alignment of 4.
-			 */
-			{8, "R2_w=pkt(id=0,off=0,r=8,imm=0)"},
-			{8, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
-			/* Adding 14 makes R6 be (4n+2) */
-			{9, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
-			/* Packet pointer has (4n+2) offset */
-			{11, "R5_w=pkt(id=1,off=0,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc)"},
-			{13, "R4=pkt(id=1,off=4,r=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc)"},
-			/* At the time the word size load is performed from R5,
-			 * its total fixed offset is NET_IP_ALIGN + reg->off (0)
-			 * which is 2.  Then the variable offset is (4n+2), so
-			 * the total offset is 4-byte aligned and meets the
-			 * load's requirements.
-			 */
-			{15, "R5=pkt(id=1,off=0,r=4,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc)"},
-			/* Newly read value in R6 was shifted left by 2, so has
-			 * known alignment of 4.
-			 */
-			{18, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
-			/* Added (4n) to packet pointer's (4n+2) var_off, giving
-			 * another (4n+2).
-			 */
-			{19, "R5_w=pkt(id=2,off=0,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc)"},
-			{21, "R4=pkt(id=2,off=4,r=0,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc)"},
-			/* At the time the word size load is performed from R5,
-			 * its total fixed offset is NET_IP_ALIGN + reg->off (0)
-			 * which is 2.  Then the variable offset is (4n+2), so
-			 * the total offset is 4-byte aligned and meets the
-			 * load's requirements.
-			 */
-			{23, "R5=pkt(id=2,off=0,r=4,umin_value=14,umax_value=2054,var_off=(0x2; 0xffc)"},
-		},
-	},
-	{
-		.descr = "dubious pointer arithmetic",
-		.insns = {
-			PREP_PKT_POINTERS,
-			BPF_MOV64_IMM(BPF_REG_0, 0),
-			/* (ptr - ptr) << 2 */
-			BPF_MOV64_REG(BPF_REG_5, BPF_REG_3),
-			BPF_ALU64_REG(BPF_SUB, BPF_REG_5, BPF_REG_2),
-			BPF_ALU64_IMM(BPF_LSH, BPF_REG_5, 2),
-			/* We have a (4n) value.  Let's make a packet offset
-			 * out of it.  First add 14, to make it a (4n+2)
-			 */
-			BPF_ALU64_IMM(BPF_ADD, BPF_REG_5, 14),
-			/* Then make sure it's nonnegative */
-			BPF_JMP_IMM(BPF_JSGE, BPF_REG_5, 0, 1),
-			BPF_EXIT_INSN(),
-			/* Add it to packet pointer */
-			BPF_MOV64_REG(BPF_REG_6, BPF_REG_2),
-			BPF_ALU64_REG(BPF_ADD, BPF_REG_6, BPF_REG_5),
-			/* Check bounds and perform a read */
-			BPF_MOV64_REG(BPF_REG_4, BPF_REG_6),
-			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
-			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
-			BPF_EXIT_INSN(),
-			BPF_LDX_MEM(BPF_W, BPF_REG_4, BPF_REG_6, 0),
-			BPF_EXIT_INSN(),
-		},
-		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
-		.result = REJECT,
-		.matches = {
-			{4, "R5_w=pkt_end(id=0,off=0,imm=0)"},
-			/* (ptr - ptr) << 2 == unknown, (4n) */
-			{6, "R5_w=inv(id=0,smax_value=9223372036854775804,umax_value=18446744073709551612,var_off=(0x0; 0xfffffffffffffffc)"},
-			/* (4n) + 14 == (4n+2).  We blow our bounds, because
-			 * the add could overflow.
-			 */
-			{7, "R5_w=inv(id=0,smin_value=-9223372036854775806,smax_value=9223372036854775806,umin_value=2,umax_value=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"},
-			/* Checked s>=0 */
-			{9, "R5=inv(id=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"},
-			/* packet pointer + nonnegative (4n+2) */
-			{11, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"},
-			{13, "R4_w=pkt(id=1,off=4,r=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"},
-			/* NET_IP_ALIGN + (4n+2) == (4n), alignment is fine.
-			 * We checked the bounds, but it might have been able
-			 * to overflow if the packet pointer started in the
-			 * upper half of the address space.
-			 * So we did not get a 'range' on R6, and the access
-			 * attempt will fail.
-			 */
-			{15, "R6_w=pkt(id=1,off=0,r=0,umin_value=2,umax_value=9223372034707292158,var_off=(0x2; 0x7fffffff7ffffffc)"},
-		}
-	},
-	{
-		.descr = "variable subtraction",
-		.insns = {
-			/* Create an unknown offset, (4n+2)-aligned */
-			LOAD_UNKNOWN(BPF_REG_6),
-			BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
-			BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2),
-			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 14),
-			/* Create another unknown, (4n)-aligned, and subtract
-			 * it from the first one
-			 */
-			BPF_ALU64_IMM(BPF_LSH, BPF_REG_7, 2),
-			BPF_ALU64_REG(BPF_SUB, BPF_REG_6, BPF_REG_7),
-			/* Bounds-check the result */
-			BPF_JMP_IMM(BPF_JSGE, BPF_REG_6, 0, 1),
-			BPF_EXIT_INSN(),
-			/* Add it to the packet pointer */
-			BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
-			BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_6),
-			/* Check bounds and perform a read */
-			BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
-			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
-			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
-			BPF_EXIT_INSN(),
-			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_5, 0),
-			BPF_EXIT_INSN(),
-		},
-		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
-		.matches = {
-			/* Calculated offset in R6 has unknown value, but known
-			 * alignment of 4.
-			 */
-			{7, "R2_w=pkt(id=0,off=0,r=8,imm=0)"},
-			{9, "R6_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
-			/* Adding 14 makes R6 be (4n+2) */
-			{10, "R6_w=inv(id=0,umin_value=14,umax_value=1034,var_off=(0x2; 0x7fc))"},
-			/* New unknown value in R7 is (4n) */
-			{11, "R7_w=inv(id=0,umax_value=1020,var_off=(0x0; 0x3fc))"},
-			/* Subtracting it from R6 blows our unsigned bounds */
-			{12, "R6=inv(id=0,smin_value=-1006,smax_value=1034,umin_value=2,umax_value=18446744073709551614,var_off=(0x2; 0xfffffffffffffffc)"},
-			/* Checked s>= 0 */
-			{14, "R6=inv(id=0,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc))"},
-			/* At the time the word size load is performed from R5,
-			 * its total fixed offset is NET_IP_ALIGN + reg->off (0)
-			 * which is 2.  Then the variable offset is (4n+2), so
-			 * the total offset is 4-byte aligned and meets the
-			 * load's requirements.
-			 */
-			{20, "R5=pkt(id=1,off=0,r=4,umin_value=2,umax_value=1034,var_off=(0x2; 0x7fc)"},
-
-		},
-	},
-	{
-		.descr = "pointer variable subtraction",
-		.insns = {
-			/* Create an unknown offset, (4n+2)-aligned and bounded
-			 * to [14,74]
-			 */
-			LOAD_UNKNOWN(BPF_REG_6),
-			BPF_MOV64_REG(BPF_REG_7, BPF_REG_6),
-			BPF_ALU64_IMM(BPF_AND, BPF_REG_6, 0xf),
-			BPF_ALU64_IMM(BPF_LSH, BPF_REG_6, 2),
-			BPF_ALU64_IMM(BPF_ADD, BPF_REG_6, 14),
-			/* Subtract it from the packet pointer */
-			BPF_MOV64_REG(BPF_REG_5, BPF_REG_2),
-			BPF_ALU64_REG(BPF_SUB, BPF_REG_5, BPF_REG_6),
-			/* Create another unknown, (4n)-aligned and >= 74.
-			 * That in fact means >= 76, since 74 % 4 == 2
-			 */
-			BPF_ALU64_IMM(BPF_LSH, BPF_REG_7, 2),
-			BPF_ALU64_IMM(BPF_ADD, BPF_REG_7, 76),
-			/* Add it to the packet pointer */
-			BPF_ALU64_REG(BPF_ADD, BPF_REG_5, BPF_REG_7),
-			/* Check bounds and perform a read */
-			BPF_MOV64_REG(BPF_REG_4, BPF_REG_5),
-			BPF_ALU64_IMM(BPF_ADD, BPF_REG_4, 4),
-			BPF_JMP_REG(BPF_JGE, BPF_REG_3, BPF_REG_4, 1),
-			BPF_EXIT_INSN(),
-			BPF_LDX_MEM(BPF_W, BPF_REG_6, BPF_REG_5, 0),
-			BPF_EXIT_INSN(),
-		},
-		.prog_type = BPF_PROG_TYPE_SCHED_CLS,
-		.matches = {
-			/* Calculated offset in R6 has unknown value, but known
-			 * alignment of 4.
-			 */
-			{7, "R2_w=pkt(id=0,off=0,r=8,imm=0)"},
-			{10, "R6_w=inv(id=0,umax_value=60,var_off=(0x0; 0x3c))"},
-			/* Adding 14 makes R6 be (4n+2) */
-			{11, "R6_w=inv(id=0,umin_value=14,umax_value=74,var_off=(0x2; 0x7c))"},
-			/* Subtracting from packet pointer overflows ubounds */
-			{13, "R5_w=pkt(id=1,off=0,r=8,umin_value=18446744073709551542,umax_value=18446744073709551602,var_off=(0xffffffffffffff82; 0x7c)"},
-			/* New unknown value in R7 is (4n), >= 76 */
-			{15, "R7_w=inv(id=0,umin_value=76,umax_value=1096,var_off=(0x0; 0x7fc))"},
-			/* Adding it to packet pointer gives nice bounds again */
-			{16, "R5_w=pkt(id=2,off=0,r=0,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"},
-			/* At the time the word size load is performed from R5,
-			 * its total fixed offset is NET_IP_ALIGN + reg->off (0)
-			 * which is 2.  Then the variable offset is (4n+2), so
-			 * the total offset is 4-byte aligned and meets the
-			 * load's requirements.
-			 */
-			{20, "R5=pkt(id=2,off=0,r=4,umin_value=2,umax_value=1082,var_off=(0x2; 0xfffffffc)"},
-		},
-	},
-};
-
-static int probe_filter_length(const struct bpf_insn *fp)
-{
-	int len;
-
-	for (len = MAX_INSNS - 1; len > 0; --len)
-		if (fp[len].code != 0 || fp[len].imm != 0)
-			break;
-	return len + 1;
-}
-
-static char bpf_vlog[32768];
-
-static int do_test_single(struct bpf_align_test *test)
-{
-	struct bpf_insn *prog = test->insns;
-	int prog_type = test->prog_type;
-	char bpf_vlog_copy[32768];
-	const char *line_ptr;
-	int cur_line = -1;
-	int prog_len, i;
-	int fd_prog;
-	int ret;
-
-	prog_len = probe_filter_length(prog);
-	fd_prog = bpf_verify_program(prog_type ? : BPF_PROG_TYPE_SOCKET_FILTER,
-				     prog, prog_len, BPF_F_STRICT_ALIGNMENT,
-				     "GPL", 0, bpf_vlog, sizeof(bpf_vlog), 2);
-	if (fd_prog < 0 && test->result != REJECT) {
-		printf("Failed to load program.\n");
-		printf("%s", bpf_vlog);
-		ret = 1;
-	} else if (fd_prog >= 0 && test->result == REJECT) {
-		printf("Unexpected success to load!\n");
-		printf("%s", bpf_vlog);
-		ret = 1;
-		close(fd_prog);
-	} else {
-		ret = 0;
-		/* We make a local copy so that we can strtok() it */
-		strncpy(bpf_vlog_copy, bpf_vlog, sizeof(bpf_vlog_copy));
-		line_ptr = strtok(bpf_vlog_copy, "\n");
-		for (i = 0; i < MAX_MATCHES; i++) {
-			struct bpf_reg_match m = test->matches[i];
-
-			if (!m.match)
-				break;
-			while (line_ptr) {
-				cur_line = -1;
-				sscanf(line_ptr, "%u: ", &cur_line);
-				if (cur_line == m.line)
-					break;
-				line_ptr = strtok(NULL, "\n");
-			}
-			if (!line_ptr) {
-				printf("Failed to find line %u for match: %s\n",
-				       m.line, m.match);
-				ret = 1;
-				printf("%s", bpf_vlog);
-				break;
-			}
-			if (!strstr(line_ptr, m.match)) {
-				printf("Failed to find match %u: %s\n",
-				       m.line, m.match);
-				ret = 1;
-				printf("%s", bpf_vlog);
-				break;
-			}
-		}
-		if (fd_prog >= 0)
-			close(fd_prog);
-	}
-	return ret;
-}
-
-static int do_test(unsigned int from, unsigned int to)
-{
-	int all_pass = 0;
-	int all_fail = 0;
-	unsigned int i;
-
-	for (i = from; i < to; i++) {
-		struct bpf_align_test *test = &tests[i];
-		int fail;
-
-		printf("Test %3d: %s ... ",
-		       i, test->descr);
-		fail = do_test_single(test);
-		if (fail) {
-			all_fail++;
-			printf("FAIL\n");
-		} else {
-			all_pass++;
-			printf("PASS\n");
-		}
-	}
-	printf("Results: %d pass %d fail\n",
-	       all_pass, all_fail);
-	return all_fail ? EXIT_FAILURE : EXIT_SUCCESS;
-}
-
-int main(int argc, char **argv)
-{
-	unsigned int from = 0, to = ARRAY_SIZE(tests);
-
-	if (argc == 3) {
-		unsigned int l = atoi(argv[argc - 2]);
-		unsigned int u = atoi(argv[argc - 1]);
-
-		if (l < to && u < to) {
-			from = l;
-			to   = u + 1;
-		}
-	} else if (argc == 2) {
-		unsigned int t = atoi(argv[argc - 1]);
-
-		if (t < to) {
-			from = t;
-			to   = t + 1;
-		}
-	}
-	return do_test(from, to);
-}
-- 
cgit v1.2.3-59-g8ed1b


From 2ea46dc6864261e94441e4d141058fc538d0de6b Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Fri, 15 May 2020 15:33:18 -0700
Subject: ethernet: ti: am65-cpts: Add missing inline qualifier to stub
 functions

When building with Clang:

In file included from drivers/net/ethernet/ti/am65-cpsw-ethtool.c:15:
drivers/net/ethernet/ti/am65-cpts.h:58:12: warning: unused function
'am65_cpts_ns_gettime' [-Wunused-function]
static s64 am65_cpts_ns_gettime(struct am65_cpts *cpts)
           ^
drivers/net/ethernet/ti/am65-cpts.h:63:12: warning: unused function
'am65_cpts_estf_enable' [-Wunused-function]
static int am65_cpts_estf_enable(struct am65_cpts *cpts,
           ^
drivers/net/ethernet/ti/am65-cpts.h:69:13: warning: unused function
'am65_cpts_estf_disable' [-Wunused-function]
static void am65_cpts_estf_disable(struct am65_cpts *cpts, int idx)
            ^
3 warnings generated.

These functions need to be marked as inline, which adds __maybe_unused,
to avoid these warnings, which is the pattern for stub functions.

Fixes: ec008fa2a9e5 ("ethernet: ti: am65-cpts: add routines to support taprio offload")
Link: https://github.com/ClangBuiltLinux/linux/issues/1026
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/ti/am65-cpts.h | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/ti/am65-cpts.h b/drivers/net/ethernet/ti/am65-cpts.h
index 98c1960b20b9..cf9fbc28fd03 100644
--- a/drivers/net/ethernet/ti/am65-cpts.h
+++ b/drivers/net/ethernet/ti/am65-cpts.h
@@ -55,18 +55,18 @@ static inline void am65_cpts_rx_enable(struct am65_cpts *cpts, bool en)
 {
 }
 
-static s64 am65_cpts_ns_gettime(struct am65_cpts *cpts)
+static inline s64 am65_cpts_ns_gettime(struct am65_cpts *cpts)
 {
 	return 0;
 }
 
-static int am65_cpts_estf_enable(struct am65_cpts *cpts,
-				 int idx, struct am65_cpts_estf_cfg *cfg)
+static inline int am65_cpts_estf_enable(struct am65_cpts *cpts, int idx,
+					struct am65_cpts_estf_cfg *cfg)
 {
 	return 0;
 }
 
-static void am65_cpts_estf_disable(struct am65_cpts *cpts, int idx)
+static inline void am65_cpts_estf_disable(struct am65_cpts *cpts, int idx)
 {
 }
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From 991e35eebe1e90ffc1c75105286a50e627b56dd1 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Wed, 13 May 2020 12:13:09 -0700
Subject: bpf: Selftests, move sockmap bpf prog header into progs

Moves test_sockmap_kern.h into progs directory but does not change
code at all.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/158939718921.15176.5766299102332077086.stgit@john-Precision-5820-Tower
---
 .../selftests/bpf/progs/test_sockmap_kern.h        | 451 +++++++++++++++++++++
 tools/testing/selftests/bpf/test_sockmap_kern.h    | 451 ---------------------
 2 files changed, 451 insertions(+), 451 deletions(-)
 create mode 100644 tools/testing/selftests/bpf/progs/test_sockmap_kern.h
 delete mode 100644 tools/testing/selftests/bpf/test_sockmap_kern.h

diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
new file mode 100644
index 000000000000..9b4d3a68a91a
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
@@ -0,0 +1,451 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c) 2017-2018 Covalent IO, Inc. http://covalent.io */
+#include <stddef.h>
+#include <string.h>
+#include <linux/bpf.h>
+#include <linux/if_ether.h>
+#include <linux/if_packet.h>
+#include <linux/ip.h>
+#include <linux/ipv6.h>
+#include <linux/in.h>
+#include <linux/udp.h>
+#include <linux/tcp.h>
+#include <linux/pkt_cls.h>
+#include <sys/socket.h>
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+/* Sockmap sample program connects a client and a backend together
+ * using cgroups.
+ *
+ *    client:X <---> frontend:80 client:X <---> backend:80
+ *
+ * For simplicity we hard code values here and bind 1:1. The hard
+ * coded values are part of the setup in sockmap.sh script that
+ * is associated with this BPF program.
+ *
+ * The bpf_printk is verbose and prints information as connections
+ * are established and verdicts are decided.
+ */
+
+struct {
+	__uint(type, TEST_MAP_TYPE);
+	__uint(max_entries, 20);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(int));
+} sock_map SEC(".maps");
+
+struct {
+	__uint(type, TEST_MAP_TYPE);
+	__uint(max_entries, 20);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(int));
+} sock_map_txmsg SEC(".maps");
+
+struct {
+	__uint(type, TEST_MAP_TYPE);
+	__uint(max_entries, 20);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(int));
+} sock_map_redir SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, int);
+} sock_apply_bytes SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, int);
+} sock_cork_bytes SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 6);
+	__type(key, int);
+	__type(value, int);
+} sock_bytes SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, int);
+} sock_redir_flags SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, int);
+	__type(value, int);
+} sock_skb_opts SEC(".maps");
+
+SEC("sk_skb1")
+int bpf_prog1(struct __sk_buff *skb)
+{
+	return skb->len;
+}
+
+SEC("sk_skb2")
+int bpf_prog2(struct __sk_buff *skb)
+{
+	__u32 lport = skb->local_port;
+	__u32 rport = skb->remote_port;
+	int len, *f, ret, zero = 0;
+	__u64 flags = 0;
+
+	if (lport == 10000)
+		ret = 10;
+	else
+		ret = 1;
+
+	len = (__u32)skb->data_end - (__u32)skb->data;
+	f = bpf_map_lookup_elem(&sock_skb_opts, &zero);
+	if (f && *f) {
+		ret = 3;
+		flags = *f;
+	}
+
+	bpf_printk("sk_skb2: redirect(%iB) flags=%i\n",
+		   len, flags);
+#ifdef SOCKMAP
+	return bpf_sk_redirect_map(skb, &sock_map, ret, flags);
+#else
+	return bpf_sk_redirect_hash(skb, &sock_map, &ret, flags);
+#endif
+
+}
+
+SEC("sockops")
+int bpf_sockmap(struct bpf_sock_ops *skops)
+{
+	__u32 lport, rport;
+	int op, err = 0, index, key, ret;
+
+
+	op = (int) skops->op;
+
+	switch (op) {
+	case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
+		lport = skops->local_port;
+		rport = skops->remote_port;
+
+		if (lport == 10000) {
+			ret = 1;
+#ifdef SOCKMAP
+			err = bpf_sock_map_update(skops, &sock_map, &ret,
+						  BPF_NOEXIST);
+#else
+			err = bpf_sock_hash_update(skops, &sock_map, &ret,
+						   BPF_NOEXIST);
+#endif
+			bpf_printk("passive(%i -> %i) map ctx update err: %d\n",
+				   lport, bpf_ntohl(rport), err);
+		}
+		break;
+	case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
+		lport = skops->local_port;
+		rport = skops->remote_port;
+
+		if (bpf_ntohl(rport) == 10001) {
+			ret = 10;
+#ifdef SOCKMAP
+			err = bpf_sock_map_update(skops, &sock_map, &ret,
+						  BPF_NOEXIST);
+#else
+			err = bpf_sock_hash_update(skops, &sock_map, &ret,
+						   BPF_NOEXIST);
+#endif
+			bpf_printk("active(%i -> %i) map ctx update err: %d\n",
+				   lport, bpf_ntohl(rport), err);
+		}
+		break;
+	default:
+		break;
+	}
+
+	return 0;
+}
+
+SEC("sk_msg1")
+int bpf_prog4(struct sk_msg_md *msg)
+{
+	int *bytes, zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5;
+	int *start, *end, *start_push, *end_push, *start_pop, *pop;
+
+	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+	if (bytes)
+		bpf_msg_apply_bytes(msg, *bytes);
+	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+	if (bytes)
+		bpf_msg_cork_bytes(msg, *bytes);
+	start = bpf_map_lookup_elem(&sock_bytes, &zero);
+	end = bpf_map_lookup_elem(&sock_bytes, &one);
+	if (start && end)
+		bpf_msg_pull_data(msg, *start, *end, 0);
+	start_push = bpf_map_lookup_elem(&sock_bytes, &two);
+	end_push = bpf_map_lookup_elem(&sock_bytes, &three);
+	if (start_push && end_push)
+		bpf_msg_push_data(msg, *start_push, *end_push, 0);
+	start_pop = bpf_map_lookup_elem(&sock_bytes, &four);
+	pop = bpf_map_lookup_elem(&sock_bytes, &five);
+	if (start_pop && pop)
+		bpf_msg_pop_data(msg, *start_pop, *pop, 0);
+	return SK_PASS;
+}
+
+SEC("sk_msg2")
+int bpf_prog5(struct sk_msg_md *msg)
+{
+	int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5;
+	int *start, *end, *start_push, *end_push, *start_pop, *pop;
+	int *bytes, len1, len2 = 0, len3, len4;
+	int err1 = -1, err2 = -1;
+
+	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+	if (bytes)
+		err1 = bpf_msg_apply_bytes(msg, *bytes);
+	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+	if (bytes)
+		err2 = bpf_msg_cork_bytes(msg, *bytes);
+	len1 = (__u64)msg->data_end - (__u64)msg->data;
+	start = bpf_map_lookup_elem(&sock_bytes, &zero);
+	end = bpf_map_lookup_elem(&sock_bytes, &one);
+	if (start && end) {
+		int err;
+
+		bpf_printk("sk_msg2: pull(%i:%i)\n",
+			   start ? *start : 0, end ? *end : 0);
+		err = bpf_msg_pull_data(msg, *start, *end, 0);
+		if (err)
+			bpf_printk("sk_msg2: pull_data err %i\n",
+				   err);
+		len2 = (__u64)msg->data_end - (__u64)msg->data;
+		bpf_printk("sk_msg2: length update %i->%i\n",
+			   len1, len2);
+	}
+
+	start_push = bpf_map_lookup_elem(&sock_bytes, &two);
+	end_push = bpf_map_lookup_elem(&sock_bytes, &three);
+	if (start_push && end_push) {
+		int err;
+
+		bpf_printk("sk_msg2: push(%i:%i)\n",
+			   start_push ? *start_push : 0,
+			   end_push ? *end_push : 0);
+		err = bpf_msg_push_data(msg, *start_push, *end_push, 0);
+		if (err)
+			bpf_printk("sk_msg2: push_data err %i\n", err);
+		len3 = (__u64)msg->data_end - (__u64)msg->data;
+		bpf_printk("sk_msg2: length push_update %i->%i\n",
+			   len2 ? len2 : len1, len3);
+	}
+	start_pop = bpf_map_lookup_elem(&sock_bytes, &four);
+	pop = bpf_map_lookup_elem(&sock_bytes, &five);
+	if (start_pop && pop) {
+		int err;
+
+		bpf_printk("sk_msg2: pop(%i@%i)\n",
+			   start_pop, pop);
+		err = bpf_msg_pop_data(msg, *start_pop, *pop, 0);
+		if (err)
+			bpf_printk("sk_msg2: pop_data err %i\n", err);
+		len4 = (__u64)msg->data_end - (__u64)msg->data;
+		bpf_printk("sk_msg2: length pop_data %i->%i\n",
+			   len1 ? len1 : 0,  len4);
+	}
+
+	bpf_printk("sk_msg2: data length %i err1 %i err2 %i\n",
+		   len1, err1, err2);
+	return SK_PASS;
+}
+
+SEC("sk_msg3")
+int bpf_prog6(struct sk_msg_md *msg)
+{
+	int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5, key = 0;
+	int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop, *f;
+	__u64 flags = 0;
+
+	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+	if (bytes)
+		bpf_msg_apply_bytes(msg, *bytes);
+	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+	if (bytes)
+		bpf_msg_cork_bytes(msg, *bytes);
+
+	start = bpf_map_lookup_elem(&sock_bytes, &zero);
+	end = bpf_map_lookup_elem(&sock_bytes, &one);
+	if (start && end)
+		bpf_msg_pull_data(msg, *start, *end, 0);
+
+	start_push = bpf_map_lookup_elem(&sock_bytes, &two);
+	end_push = bpf_map_lookup_elem(&sock_bytes, &three);
+	if (start_push && end_push)
+		bpf_msg_push_data(msg, *start_push, *end_push, 0);
+
+	start_pop = bpf_map_lookup_elem(&sock_bytes, &four);
+	pop = bpf_map_lookup_elem(&sock_bytes, &five);
+	if (start_pop && pop)
+		bpf_msg_pop_data(msg, *start_pop, *pop, 0);
+
+	f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
+	if (f && *f) {
+		key = 2;
+		flags = *f;
+	}
+#ifdef SOCKMAP
+	return bpf_msg_redirect_map(msg, &sock_map_redir, key, flags);
+#else
+	return bpf_msg_redirect_hash(msg, &sock_map_redir, &key, flags);
+#endif
+}
+
+SEC("sk_msg4")
+int bpf_prog7(struct sk_msg_md *msg)
+{
+	int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop, *f;
+	int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5;
+	int len1, len2 = 0, len3, len4;
+	int err1 = 0, err2 = 0, key = 0;
+	__u64 flags = 0;
+
+		int err;
+	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+	if (bytes)
+		err1 = bpf_msg_apply_bytes(msg, *bytes);
+	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+	if (bytes)
+		err2 = bpf_msg_cork_bytes(msg, *bytes);
+	len1 = (__u64)msg->data_end - (__u64)msg->data;
+
+	start = bpf_map_lookup_elem(&sock_bytes, &zero);
+	end = bpf_map_lookup_elem(&sock_bytes, &one);
+	if (start && end) {
+		bpf_printk("sk_msg2: pull(%i:%i)\n",
+			   start ? *start : 0, end ? *end : 0);
+		err = bpf_msg_pull_data(msg, *start, *end, 0);
+		if (err)
+			bpf_printk("sk_msg2: pull_data err %i\n",
+				   err);
+		len2 = (__u64)msg->data_end - (__u64)msg->data;
+		bpf_printk("sk_msg2: length update %i->%i\n",
+			   len1, len2);
+	}
+
+	start_push = bpf_map_lookup_elem(&sock_bytes, &two);
+	end_push = bpf_map_lookup_elem(&sock_bytes, &three);
+	if (start_push && end_push) {
+		bpf_printk("sk_msg4: push(%i:%i)\n",
+			   start_push ? *start_push : 0,
+			   end_push ? *end_push : 0);
+		err = bpf_msg_push_data(msg, *start_push, *end_push, 0);
+		if (err)
+			bpf_printk("sk_msg4: push_data err %i\n",
+				   err);
+		len3 = (__u64)msg->data_end - (__u64)msg->data;
+		bpf_printk("sk_msg4: length push_update %i->%i\n",
+			   len2 ? len2 : len1, len3);
+	}
+
+	start_pop = bpf_map_lookup_elem(&sock_bytes, &four);
+	pop = bpf_map_lookup_elem(&sock_bytes, &five);
+	if (start_pop && pop) {
+		int err;
+
+		bpf_printk("sk_msg4: pop(%i@%i)\n",
+			   start_pop, pop);
+		err = bpf_msg_pop_data(msg, *start_pop, *pop, 0);
+		if (err)
+			bpf_printk("sk_msg4: pop_data err %i\n", err);
+		len4 = (__u64)msg->data_end - (__u64)msg->data;
+		bpf_printk("sk_msg4: length pop_data %i->%i\n",
+			   len1 ? len1 : 0,  len4);
+	}
+
+
+	f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
+	if (f && *f) {
+		key = 2;
+		flags = *f;
+	}
+	bpf_printk("sk_msg3: redirect(%iB) flags=%i err=%i\n",
+		   len1, flags, err1 ? err1 : err2);
+#ifdef SOCKMAP
+	err = bpf_msg_redirect_map(msg, &sock_map_redir, key, flags);
+#else
+	err = bpf_msg_redirect_hash(msg, &sock_map_redir, &key, flags);
+#endif
+	bpf_printk("sk_msg3: err %i\n", err);
+	return err;
+}
+
+SEC("sk_msg5")
+int bpf_prog8(struct sk_msg_md *msg)
+{
+	void *data_end = (void *)(long) msg->data_end;
+	void *data = (void *)(long) msg->data;
+	int ret = 0, *bytes, zero = 0;
+
+	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+	if (bytes) {
+		ret = bpf_msg_apply_bytes(msg, *bytes);
+		if (ret)
+			return SK_DROP;
+	} else {
+		return SK_DROP;
+	}
+	return SK_PASS;
+}
+SEC("sk_msg6")
+int bpf_prog9(struct sk_msg_md *msg)
+{
+	void *data_end = (void *)(long) msg->data_end;
+	void *data = (void *)(long) msg->data;
+	int ret = 0, *bytes, zero = 0;
+
+	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+	if (bytes) {
+		if (((__u64)data_end - (__u64)data) >= *bytes)
+			return SK_PASS;
+		ret = bpf_msg_cork_bytes(msg, *bytes);
+		if (ret)
+			return SK_DROP;
+	}
+	return SK_PASS;
+}
+
+SEC("sk_msg7")
+int bpf_prog10(struct sk_msg_md *msg)
+{
+	int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop;
+	int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5;
+
+	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
+	if (bytes)
+		bpf_msg_apply_bytes(msg, *bytes);
+	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
+	if (bytes)
+		bpf_msg_cork_bytes(msg, *bytes);
+	start = bpf_map_lookup_elem(&sock_bytes, &zero);
+	end = bpf_map_lookup_elem(&sock_bytes, &one);
+	if (start && end)
+		bpf_msg_pull_data(msg, *start, *end, 0);
+	start_push = bpf_map_lookup_elem(&sock_bytes, &two);
+	end_push = bpf_map_lookup_elem(&sock_bytes, &three);
+	if (start_push && end_push)
+		bpf_msg_push_data(msg, *start_push, *end_push, 0);
+	start_pop = bpf_map_lookup_elem(&sock_bytes, &four);
+	pop = bpf_map_lookup_elem(&sock_bytes, &five);
+	if (start_pop && pop)
+		bpf_msg_pop_data(msg, *start_pop, *pop, 0);
+	bpf_printk("return sk drop\n");
+	return SK_DROP;
+}
+
+int _version SEC("version") = 1;
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/test_sockmap_kern.h b/tools/testing/selftests/bpf/test_sockmap_kern.h
deleted file mode 100644
index 9b4d3a68a91a..000000000000
--- a/tools/testing/selftests/bpf/test_sockmap_kern.h
+++ /dev/null
@@ -1,451 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 */
-/* Copyright (c) 2017-2018 Covalent IO, Inc. http://covalent.io */
-#include <stddef.h>
-#include <string.h>
-#include <linux/bpf.h>
-#include <linux/if_ether.h>
-#include <linux/if_packet.h>
-#include <linux/ip.h>
-#include <linux/ipv6.h>
-#include <linux/in.h>
-#include <linux/udp.h>
-#include <linux/tcp.h>
-#include <linux/pkt_cls.h>
-#include <sys/socket.h>
-#include <bpf/bpf_helpers.h>
-#include <bpf/bpf_endian.h>
-
-/* Sockmap sample program connects a client and a backend together
- * using cgroups.
- *
- *    client:X <---> frontend:80 client:X <---> backend:80
- *
- * For simplicity we hard code values here and bind 1:1. The hard
- * coded values are part of the setup in sockmap.sh script that
- * is associated with this BPF program.
- *
- * The bpf_printk is verbose and prints information as connections
- * are established and verdicts are decided.
- */
-
-struct {
-	__uint(type, TEST_MAP_TYPE);
-	__uint(max_entries, 20);
-	__uint(key_size, sizeof(int));
-	__uint(value_size, sizeof(int));
-} sock_map SEC(".maps");
-
-struct {
-	__uint(type, TEST_MAP_TYPE);
-	__uint(max_entries, 20);
-	__uint(key_size, sizeof(int));
-	__uint(value_size, sizeof(int));
-} sock_map_txmsg SEC(".maps");
-
-struct {
-	__uint(type, TEST_MAP_TYPE);
-	__uint(max_entries, 20);
-	__uint(key_size, sizeof(int));
-	__uint(value_size, sizeof(int));
-} sock_map_redir SEC(".maps");
-
-struct {
-	__uint(type, BPF_MAP_TYPE_ARRAY);
-	__uint(max_entries, 1);
-	__type(key, int);
-	__type(value, int);
-} sock_apply_bytes SEC(".maps");
-
-struct {
-	__uint(type, BPF_MAP_TYPE_ARRAY);
-	__uint(max_entries, 1);
-	__type(key, int);
-	__type(value, int);
-} sock_cork_bytes SEC(".maps");
-
-struct {
-	__uint(type, BPF_MAP_TYPE_ARRAY);
-	__uint(max_entries, 6);
-	__type(key, int);
-	__type(value, int);
-} sock_bytes SEC(".maps");
-
-struct {
-	__uint(type, BPF_MAP_TYPE_ARRAY);
-	__uint(max_entries, 1);
-	__type(key, int);
-	__type(value, int);
-} sock_redir_flags SEC(".maps");
-
-struct {
-	__uint(type, BPF_MAP_TYPE_ARRAY);
-	__uint(max_entries, 1);
-	__type(key, int);
-	__type(value, int);
-} sock_skb_opts SEC(".maps");
-
-SEC("sk_skb1")
-int bpf_prog1(struct __sk_buff *skb)
-{
-	return skb->len;
-}
-
-SEC("sk_skb2")
-int bpf_prog2(struct __sk_buff *skb)
-{
-	__u32 lport = skb->local_port;
-	__u32 rport = skb->remote_port;
-	int len, *f, ret, zero = 0;
-	__u64 flags = 0;
-
-	if (lport == 10000)
-		ret = 10;
-	else
-		ret = 1;
-
-	len = (__u32)skb->data_end - (__u32)skb->data;
-	f = bpf_map_lookup_elem(&sock_skb_opts, &zero);
-	if (f && *f) {
-		ret = 3;
-		flags = *f;
-	}
-
-	bpf_printk("sk_skb2: redirect(%iB) flags=%i\n",
-		   len, flags);
-#ifdef SOCKMAP
-	return bpf_sk_redirect_map(skb, &sock_map, ret, flags);
-#else
-	return bpf_sk_redirect_hash(skb, &sock_map, &ret, flags);
-#endif
-
-}
-
-SEC("sockops")
-int bpf_sockmap(struct bpf_sock_ops *skops)
-{
-	__u32 lport, rport;
-	int op, err = 0, index, key, ret;
-
-
-	op = (int) skops->op;
-
-	switch (op) {
-	case BPF_SOCK_OPS_PASSIVE_ESTABLISHED_CB:
-		lport = skops->local_port;
-		rport = skops->remote_port;
-
-		if (lport == 10000) {
-			ret = 1;
-#ifdef SOCKMAP
-			err = bpf_sock_map_update(skops, &sock_map, &ret,
-						  BPF_NOEXIST);
-#else
-			err = bpf_sock_hash_update(skops, &sock_map, &ret,
-						   BPF_NOEXIST);
-#endif
-			bpf_printk("passive(%i -> %i) map ctx update err: %d\n",
-				   lport, bpf_ntohl(rport), err);
-		}
-		break;
-	case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
-		lport = skops->local_port;
-		rport = skops->remote_port;
-
-		if (bpf_ntohl(rport) == 10001) {
-			ret = 10;
-#ifdef SOCKMAP
-			err = bpf_sock_map_update(skops, &sock_map, &ret,
-						  BPF_NOEXIST);
-#else
-			err = bpf_sock_hash_update(skops, &sock_map, &ret,
-						   BPF_NOEXIST);
-#endif
-			bpf_printk("active(%i -> %i) map ctx update err: %d\n",
-				   lport, bpf_ntohl(rport), err);
-		}
-		break;
-	default:
-		break;
-	}
-
-	return 0;
-}
-
-SEC("sk_msg1")
-int bpf_prog4(struct sk_msg_md *msg)
-{
-	int *bytes, zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5;
-	int *start, *end, *start_push, *end_push, *start_pop, *pop;
-
-	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
-	if (bytes)
-		bpf_msg_apply_bytes(msg, *bytes);
-	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
-	if (bytes)
-		bpf_msg_cork_bytes(msg, *bytes);
-	start = bpf_map_lookup_elem(&sock_bytes, &zero);
-	end = bpf_map_lookup_elem(&sock_bytes, &one);
-	if (start && end)
-		bpf_msg_pull_data(msg, *start, *end, 0);
-	start_push = bpf_map_lookup_elem(&sock_bytes, &two);
-	end_push = bpf_map_lookup_elem(&sock_bytes, &three);
-	if (start_push && end_push)
-		bpf_msg_push_data(msg, *start_push, *end_push, 0);
-	start_pop = bpf_map_lookup_elem(&sock_bytes, &four);
-	pop = bpf_map_lookup_elem(&sock_bytes, &five);
-	if (start_pop && pop)
-		bpf_msg_pop_data(msg, *start_pop, *pop, 0);
-	return SK_PASS;
-}
-
-SEC("sk_msg2")
-int bpf_prog5(struct sk_msg_md *msg)
-{
-	int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5;
-	int *start, *end, *start_push, *end_push, *start_pop, *pop;
-	int *bytes, len1, len2 = 0, len3, len4;
-	int err1 = -1, err2 = -1;
-
-	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
-	if (bytes)
-		err1 = bpf_msg_apply_bytes(msg, *bytes);
-	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
-	if (bytes)
-		err2 = bpf_msg_cork_bytes(msg, *bytes);
-	len1 = (__u64)msg->data_end - (__u64)msg->data;
-	start = bpf_map_lookup_elem(&sock_bytes, &zero);
-	end = bpf_map_lookup_elem(&sock_bytes, &one);
-	if (start && end) {
-		int err;
-
-		bpf_printk("sk_msg2: pull(%i:%i)\n",
-			   start ? *start : 0, end ? *end : 0);
-		err = bpf_msg_pull_data(msg, *start, *end, 0);
-		if (err)
-			bpf_printk("sk_msg2: pull_data err %i\n",
-				   err);
-		len2 = (__u64)msg->data_end - (__u64)msg->data;
-		bpf_printk("sk_msg2: length update %i->%i\n",
-			   len1, len2);
-	}
-
-	start_push = bpf_map_lookup_elem(&sock_bytes, &two);
-	end_push = bpf_map_lookup_elem(&sock_bytes, &three);
-	if (start_push && end_push) {
-		int err;
-
-		bpf_printk("sk_msg2: push(%i:%i)\n",
-			   start_push ? *start_push : 0,
-			   end_push ? *end_push : 0);
-		err = bpf_msg_push_data(msg, *start_push, *end_push, 0);
-		if (err)
-			bpf_printk("sk_msg2: push_data err %i\n", err);
-		len3 = (__u64)msg->data_end - (__u64)msg->data;
-		bpf_printk("sk_msg2: length push_update %i->%i\n",
-			   len2 ? len2 : len1, len3);
-	}
-	start_pop = bpf_map_lookup_elem(&sock_bytes, &four);
-	pop = bpf_map_lookup_elem(&sock_bytes, &five);
-	if (start_pop && pop) {
-		int err;
-
-		bpf_printk("sk_msg2: pop(%i@%i)\n",
-			   start_pop, pop);
-		err = bpf_msg_pop_data(msg, *start_pop, *pop, 0);
-		if (err)
-			bpf_printk("sk_msg2: pop_data err %i\n", err);
-		len4 = (__u64)msg->data_end - (__u64)msg->data;
-		bpf_printk("sk_msg2: length pop_data %i->%i\n",
-			   len1 ? len1 : 0,  len4);
-	}
-
-	bpf_printk("sk_msg2: data length %i err1 %i err2 %i\n",
-		   len1, err1, err2);
-	return SK_PASS;
-}
-
-SEC("sk_msg3")
-int bpf_prog6(struct sk_msg_md *msg)
-{
-	int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5, key = 0;
-	int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop, *f;
-	__u64 flags = 0;
-
-	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
-	if (bytes)
-		bpf_msg_apply_bytes(msg, *bytes);
-	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
-	if (bytes)
-		bpf_msg_cork_bytes(msg, *bytes);
-
-	start = bpf_map_lookup_elem(&sock_bytes, &zero);
-	end = bpf_map_lookup_elem(&sock_bytes, &one);
-	if (start && end)
-		bpf_msg_pull_data(msg, *start, *end, 0);
-
-	start_push = bpf_map_lookup_elem(&sock_bytes, &two);
-	end_push = bpf_map_lookup_elem(&sock_bytes, &three);
-	if (start_push && end_push)
-		bpf_msg_push_data(msg, *start_push, *end_push, 0);
-
-	start_pop = bpf_map_lookup_elem(&sock_bytes, &four);
-	pop = bpf_map_lookup_elem(&sock_bytes, &five);
-	if (start_pop && pop)
-		bpf_msg_pop_data(msg, *start_pop, *pop, 0);
-
-	f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
-	if (f && *f) {
-		key = 2;
-		flags = *f;
-	}
-#ifdef SOCKMAP
-	return bpf_msg_redirect_map(msg, &sock_map_redir, key, flags);
-#else
-	return bpf_msg_redirect_hash(msg, &sock_map_redir, &key, flags);
-#endif
-}
-
-SEC("sk_msg4")
-int bpf_prog7(struct sk_msg_md *msg)
-{
-	int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop, *f;
-	int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5;
-	int len1, len2 = 0, len3, len4;
-	int err1 = 0, err2 = 0, key = 0;
-	__u64 flags = 0;
-
-		int err;
-	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
-	if (bytes)
-		err1 = bpf_msg_apply_bytes(msg, *bytes);
-	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
-	if (bytes)
-		err2 = bpf_msg_cork_bytes(msg, *bytes);
-	len1 = (__u64)msg->data_end - (__u64)msg->data;
-
-	start = bpf_map_lookup_elem(&sock_bytes, &zero);
-	end = bpf_map_lookup_elem(&sock_bytes, &one);
-	if (start && end) {
-		bpf_printk("sk_msg2: pull(%i:%i)\n",
-			   start ? *start : 0, end ? *end : 0);
-		err = bpf_msg_pull_data(msg, *start, *end, 0);
-		if (err)
-			bpf_printk("sk_msg2: pull_data err %i\n",
-				   err);
-		len2 = (__u64)msg->data_end - (__u64)msg->data;
-		bpf_printk("sk_msg2: length update %i->%i\n",
-			   len1, len2);
-	}
-
-	start_push = bpf_map_lookup_elem(&sock_bytes, &two);
-	end_push = bpf_map_lookup_elem(&sock_bytes, &three);
-	if (start_push && end_push) {
-		bpf_printk("sk_msg4: push(%i:%i)\n",
-			   start_push ? *start_push : 0,
-			   end_push ? *end_push : 0);
-		err = bpf_msg_push_data(msg, *start_push, *end_push, 0);
-		if (err)
-			bpf_printk("sk_msg4: push_data err %i\n",
-				   err);
-		len3 = (__u64)msg->data_end - (__u64)msg->data;
-		bpf_printk("sk_msg4: length push_update %i->%i\n",
-			   len2 ? len2 : len1, len3);
-	}
-
-	start_pop = bpf_map_lookup_elem(&sock_bytes, &four);
-	pop = bpf_map_lookup_elem(&sock_bytes, &five);
-	if (start_pop && pop) {
-		int err;
-
-		bpf_printk("sk_msg4: pop(%i@%i)\n",
-			   start_pop, pop);
-		err = bpf_msg_pop_data(msg, *start_pop, *pop, 0);
-		if (err)
-			bpf_printk("sk_msg4: pop_data err %i\n", err);
-		len4 = (__u64)msg->data_end - (__u64)msg->data;
-		bpf_printk("sk_msg4: length pop_data %i->%i\n",
-			   len1 ? len1 : 0,  len4);
-	}
-
-
-	f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
-	if (f && *f) {
-		key = 2;
-		flags = *f;
-	}
-	bpf_printk("sk_msg3: redirect(%iB) flags=%i err=%i\n",
-		   len1, flags, err1 ? err1 : err2);
-#ifdef SOCKMAP
-	err = bpf_msg_redirect_map(msg, &sock_map_redir, key, flags);
-#else
-	err = bpf_msg_redirect_hash(msg, &sock_map_redir, &key, flags);
-#endif
-	bpf_printk("sk_msg3: err %i\n", err);
-	return err;
-}
-
-SEC("sk_msg5")
-int bpf_prog8(struct sk_msg_md *msg)
-{
-	void *data_end = (void *)(long) msg->data_end;
-	void *data = (void *)(long) msg->data;
-	int ret = 0, *bytes, zero = 0;
-
-	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
-	if (bytes) {
-		ret = bpf_msg_apply_bytes(msg, *bytes);
-		if (ret)
-			return SK_DROP;
-	} else {
-		return SK_DROP;
-	}
-	return SK_PASS;
-}
-SEC("sk_msg6")
-int bpf_prog9(struct sk_msg_md *msg)
-{
-	void *data_end = (void *)(long) msg->data_end;
-	void *data = (void *)(long) msg->data;
-	int ret = 0, *bytes, zero = 0;
-
-	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
-	if (bytes) {
-		if (((__u64)data_end - (__u64)data) >= *bytes)
-			return SK_PASS;
-		ret = bpf_msg_cork_bytes(msg, *bytes);
-		if (ret)
-			return SK_DROP;
-	}
-	return SK_PASS;
-}
-
-SEC("sk_msg7")
-int bpf_prog10(struct sk_msg_md *msg)
-{
-	int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop;
-	int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5;
-
-	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
-	if (bytes)
-		bpf_msg_apply_bytes(msg, *bytes);
-	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
-	if (bytes)
-		bpf_msg_cork_bytes(msg, *bytes);
-	start = bpf_map_lookup_elem(&sock_bytes, &zero);
-	end = bpf_map_lookup_elem(&sock_bytes, &one);
-	if (start && end)
-		bpf_msg_pull_data(msg, *start, *end, 0);
-	start_push = bpf_map_lookup_elem(&sock_bytes, &two);
-	end_push = bpf_map_lookup_elem(&sock_bytes, &three);
-	if (start_push && end_push)
-		bpf_msg_push_data(msg, *start_push, *end_push, 0);
-	start_pop = bpf_map_lookup_elem(&sock_bytes, &four);
-	pop = bpf_map_lookup_elem(&sock_bytes, &five);
-	if (start_pop && pop)
-		bpf_msg_pop_data(msg, *start_pop, *pop, 0);
-	bpf_printk("return sk drop\n");
-	return SK_DROP;
-}
-
-int _version SEC("version") = 1;
-char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3-59-g8ed1b


From d79a32129b21296f1dce1bd9d703826853bb63a6 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Wed, 13 May 2020 12:13:27 -0700
Subject: bpf: Selftests, remove prints from sockmap tests

The prints in the test_sockmap programs were only useful when we
didn't have enough control over test infrastructure to know from
user program what was being pushed into kernel side.

Now that we have or will shortly have better test controls lets
remove the printers. This means we can remove half the programs
and cleanup bpf side.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/158939720756.15176.9806965887313279429.stgit@john-Precision-5820-Tower
---
 .../selftests/bpf/progs/test_sockmap_kern.h        | 158 +--------------------
 tools/testing/selftests/bpf/test_sockmap.c         |  25 +---
 2 files changed, 9 insertions(+), 174 deletions(-)

diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
index 9b4d3a68a91a..a443d3637db3 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
@@ -110,8 +110,6 @@ int bpf_prog2(struct __sk_buff *skb)
 		flags = *f;
 	}
 
-	bpf_printk("sk_skb2: redirect(%iB) flags=%i\n",
-		   len, flags);
 #ifdef SOCKMAP
 	return bpf_sk_redirect_map(skb, &sock_map, ret, flags);
 #else
@@ -143,8 +141,6 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
 			err = bpf_sock_hash_update(skops, &sock_map, &ret,
 						   BPF_NOEXIST);
 #endif
-			bpf_printk("passive(%i -> %i) map ctx update err: %d\n",
-				   lport, bpf_ntohl(rport), err);
 		}
 		break;
 	case BPF_SOCK_OPS_ACTIVE_ESTABLISHED_CB:
@@ -160,8 +156,6 @@ int bpf_sockmap(struct bpf_sock_ops *skops)
 			err = bpf_sock_hash_update(skops, &sock_map, &ret,
 						   BPF_NOEXIST);
 #endif
-			bpf_printk("active(%i -> %i) map ctx update err: %d\n",
-				   lport, bpf_ntohl(rport), err);
 		}
 		break;
 	default:
@@ -199,72 +193,6 @@ int bpf_prog4(struct sk_msg_md *msg)
 }
 
 SEC("sk_msg2")
-int bpf_prog5(struct sk_msg_md *msg)
-{
-	int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5;
-	int *start, *end, *start_push, *end_push, *start_pop, *pop;
-	int *bytes, len1, len2 = 0, len3, len4;
-	int err1 = -1, err2 = -1;
-
-	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
-	if (bytes)
-		err1 = bpf_msg_apply_bytes(msg, *bytes);
-	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
-	if (bytes)
-		err2 = bpf_msg_cork_bytes(msg, *bytes);
-	len1 = (__u64)msg->data_end - (__u64)msg->data;
-	start = bpf_map_lookup_elem(&sock_bytes, &zero);
-	end = bpf_map_lookup_elem(&sock_bytes, &one);
-	if (start && end) {
-		int err;
-
-		bpf_printk("sk_msg2: pull(%i:%i)\n",
-			   start ? *start : 0, end ? *end : 0);
-		err = bpf_msg_pull_data(msg, *start, *end, 0);
-		if (err)
-			bpf_printk("sk_msg2: pull_data err %i\n",
-				   err);
-		len2 = (__u64)msg->data_end - (__u64)msg->data;
-		bpf_printk("sk_msg2: length update %i->%i\n",
-			   len1, len2);
-	}
-
-	start_push = bpf_map_lookup_elem(&sock_bytes, &two);
-	end_push = bpf_map_lookup_elem(&sock_bytes, &three);
-	if (start_push && end_push) {
-		int err;
-
-		bpf_printk("sk_msg2: push(%i:%i)\n",
-			   start_push ? *start_push : 0,
-			   end_push ? *end_push : 0);
-		err = bpf_msg_push_data(msg, *start_push, *end_push, 0);
-		if (err)
-			bpf_printk("sk_msg2: push_data err %i\n", err);
-		len3 = (__u64)msg->data_end - (__u64)msg->data;
-		bpf_printk("sk_msg2: length push_update %i->%i\n",
-			   len2 ? len2 : len1, len3);
-	}
-	start_pop = bpf_map_lookup_elem(&sock_bytes, &four);
-	pop = bpf_map_lookup_elem(&sock_bytes, &five);
-	if (start_pop && pop) {
-		int err;
-
-		bpf_printk("sk_msg2: pop(%i@%i)\n",
-			   start_pop, pop);
-		err = bpf_msg_pop_data(msg, *start_pop, *pop, 0);
-		if (err)
-			bpf_printk("sk_msg2: pop_data err %i\n", err);
-		len4 = (__u64)msg->data_end - (__u64)msg->data;
-		bpf_printk("sk_msg2: length pop_data %i->%i\n",
-			   len1 ? len1 : 0,  len4);
-	}
-
-	bpf_printk("sk_msg2: data length %i err1 %i err2 %i\n",
-		   len1, err1, err2);
-	return SK_PASS;
-}
-
-SEC("sk_msg3")
 int bpf_prog6(struct sk_msg_md *msg)
 {
 	int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5, key = 0;
@@ -305,86 +233,7 @@ int bpf_prog6(struct sk_msg_md *msg)
 #endif
 }
 
-SEC("sk_msg4")
-int bpf_prog7(struct sk_msg_md *msg)
-{
-	int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop, *f;
-	int zero = 0, one = 1, two = 2, three = 3, four = 4, five = 5;
-	int len1, len2 = 0, len3, len4;
-	int err1 = 0, err2 = 0, key = 0;
-	__u64 flags = 0;
-
-		int err;
-	bytes = bpf_map_lookup_elem(&sock_apply_bytes, &zero);
-	if (bytes)
-		err1 = bpf_msg_apply_bytes(msg, *bytes);
-	bytes = bpf_map_lookup_elem(&sock_cork_bytes, &zero);
-	if (bytes)
-		err2 = bpf_msg_cork_bytes(msg, *bytes);
-	len1 = (__u64)msg->data_end - (__u64)msg->data;
-
-	start = bpf_map_lookup_elem(&sock_bytes, &zero);
-	end = bpf_map_lookup_elem(&sock_bytes, &one);
-	if (start && end) {
-		bpf_printk("sk_msg2: pull(%i:%i)\n",
-			   start ? *start : 0, end ? *end : 0);
-		err = bpf_msg_pull_data(msg, *start, *end, 0);
-		if (err)
-			bpf_printk("sk_msg2: pull_data err %i\n",
-				   err);
-		len2 = (__u64)msg->data_end - (__u64)msg->data;
-		bpf_printk("sk_msg2: length update %i->%i\n",
-			   len1, len2);
-	}
-
-	start_push = bpf_map_lookup_elem(&sock_bytes, &two);
-	end_push = bpf_map_lookup_elem(&sock_bytes, &three);
-	if (start_push && end_push) {
-		bpf_printk("sk_msg4: push(%i:%i)\n",
-			   start_push ? *start_push : 0,
-			   end_push ? *end_push : 0);
-		err = bpf_msg_push_data(msg, *start_push, *end_push, 0);
-		if (err)
-			bpf_printk("sk_msg4: push_data err %i\n",
-				   err);
-		len3 = (__u64)msg->data_end - (__u64)msg->data;
-		bpf_printk("sk_msg4: length push_update %i->%i\n",
-			   len2 ? len2 : len1, len3);
-	}
-
-	start_pop = bpf_map_lookup_elem(&sock_bytes, &four);
-	pop = bpf_map_lookup_elem(&sock_bytes, &five);
-	if (start_pop && pop) {
-		int err;
-
-		bpf_printk("sk_msg4: pop(%i@%i)\n",
-			   start_pop, pop);
-		err = bpf_msg_pop_data(msg, *start_pop, *pop, 0);
-		if (err)
-			bpf_printk("sk_msg4: pop_data err %i\n", err);
-		len4 = (__u64)msg->data_end - (__u64)msg->data;
-		bpf_printk("sk_msg4: length pop_data %i->%i\n",
-			   len1 ? len1 : 0,  len4);
-	}
-
-
-	f = bpf_map_lookup_elem(&sock_redir_flags, &zero);
-	if (f && *f) {
-		key = 2;
-		flags = *f;
-	}
-	bpf_printk("sk_msg3: redirect(%iB) flags=%i err=%i\n",
-		   len1, flags, err1 ? err1 : err2);
-#ifdef SOCKMAP
-	err = bpf_msg_redirect_map(msg, &sock_map_redir, key, flags);
-#else
-	err = bpf_msg_redirect_hash(msg, &sock_map_redir, &key, flags);
-#endif
-	bpf_printk("sk_msg3: err %i\n", err);
-	return err;
-}
-
-SEC("sk_msg5")
+SEC("sk_msg3")
 int bpf_prog8(struct sk_msg_md *msg)
 {
 	void *data_end = (void *)(long) msg->data_end;
@@ -401,7 +250,7 @@ int bpf_prog8(struct sk_msg_md *msg)
 	}
 	return SK_PASS;
 }
-SEC("sk_msg6")
+SEC("sk_msg4")
 int bpf_prog9(struct sk_msg_md *msg)
 {
 	void *data_end = (void *)(long) msg->data_end;
@@ -419,7 +268,7 @@ int bpf_prog9(struct sk_msg_md *msg)
 	return SK_PASS;
 }
 
-SEC("sk_msg7")
+SEC("sk_msg5")
 int bpf_prog10(struct sk_msg_md *msg)
 {
 	int *bytes, *start, *end, *start_push, *end_push, *start_pop, *pop;
@@ -443,7 +292,6 @@ int bpf_prog10(struct sk_msg_md *msg)
 	pop = bpf_map_lookup_elem(&sock_bytes, &five);
 	if (start_pop && pop)
 		bpf_msg_pop_data(msg, *start_pop, *pop, 0);
-	bpf_printk("return sk drop\n");
 	return SK_DROP;
 }
 
diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index 779e11da979c..6bdacc4f04d8 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -68,9 +68,7 @@ struct bpf_map *maps[8];
 int prog_fd[11];
 
 int txmsg_pass;
-int txmsg_noisy;
 int txmsg_redir;
-int txmsg_redir_noisy;
 int txmsg_drop;
 int txmsg_apply;
 int txmsg_cork;
@@ -95,9 +93,7 @@ static const struct option long_options[] = {
 	{"test",	required_argument,	NULL, 't' },
 	{"data_test",   no_argument,		NULL, 'd' },
 	{"txmsg",		no_argument,	&txmsg_pass,  1  },
-	{"txmsg_noisy",		no_argument,	&txmsg_noisy, 1  },
 	{"txmsg_redir",		no_argument,	&txmsg_redir, 1  },
-	{"txmsg_redir_noisy",	no_argument,	&txmsg_redir_noisy, 1},
 	{"txmsg_drop",		no_argument,	&txmsg_drop, 1 },
 	{"txmsg_apply",	required_argument,	NULL, 'a'},
 	{"txmsg_cork",	required_argument,	NULL, 'k'},
@@ -834,19 +830,14 @@ run:
 	/* Attach txmsg program to sockmap */
 	if (txmsg_pass)
 		tx_prog_fd = prog_fd[3];
-	else if (txmsg_noisy)
-		tx_prog_fd = prog_fd[4];
 	else if (txmsg_redir)
+		tx_prog_fd = prog_fd[4];
+	else if (txmsg_apply)
 		tx_prog_fd = prog_fd[5];
-	else if (txmsg_redir_noisy)
+	else if (txmsg_cork)
 		tx_prog_fd = prog_fd[6];
 	else if (txmsg_drop)
-		tx_prog_fd = prog_fd[9];
-	/* apply and cork must be last */
-	else if (txmsg_apply)
 		tx_prog_fd = prog_fd[7];
-	else if (txmsg_cork)
-		tx_prog_fd = prog_fd[8];
 	else
 		tx_prog_fd = 0;
 
@@ -870,7 +861,7 @@ run:
 			goto out;
 		}
 
-		if (txmsg_redir || txmsg_redir_noisy)
+		if (txmsg_redir)
 			redir_fd = c2;
 		else
 			redir_fd = c1;
@@ -1112,12 +1103,8 @@ static void test_options(char *options)
 
 	if (txmsg_pass)
 		strncat(options, "pass,", OPTSTRING);
-	if (txmsg_noisy)
-		strncat(options, "pass_noisy,", OPTSTRING);
 	if (txmsg_redir)
 		strncat(options, "redir,", OPTSTRING);
-	if (txmsg_redir_noisy)
-		strncat(options, "redir_noisy,", OPTSTRING);
 	if (txmsg_drop)
 		strncat(options, "drop,", OPTSTRING);
 	if (txmsg_apply) {
@@ -1228,7 +1215,7 @@ static int test_txmsg(int cgrp)
 {
 	int err;
 
-	txmsg_pass = txmsg_noisy = txmsg_redir_noisy = txmsg_drop = 0;
+	txmsg_pass = txmsg_drop = 0;
 	txmsg_apply = txmsg_cork = 0;
 	txmsg_ingress = txmsg_skb = 0;
 
@@ -1319,7 +1306,7 @@ static int test_mixed(int cgrp)
 	struct sockmap_options opt = {0};
 	int err;
 
-	txmsg_pass = txmsg_noisy = txmsg_redir_noisy = txmsg_drop = 0;
+	txmsg_pass = txmsg_drop = 0;
 	txmsg_apply = txmsg_cork = 0;
 	txmsg_start = txmsg_end = 0;
 	txmsg_start_push = txmsg_end_push = 0;
-- 
cgit v1.2.3-59-g8ed1b


From 13a5f3ffd202f73f1d0c2ed36dd66a0cd891e61a Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Wed, 13 May 2020 12:13:46 -0700
Subject: bpf: Selftests, sockmap test prog run without setting cgroup

Running test_sockmap with arguments to specify a test pattern requires
including a cgroup argument. Instead of requiring this if the option is
not provided create one

This is not used by selftest runs but I use it when I want to test a
specific test. Most useful when developing new code and/or tests.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/158939722675.15176.6294210959489131688.stgit@john-Precision-5820-Tower
---
 tools/testing/selftests/bpf/test_sockmap.c | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index 6bdacc4f04d8..5ef71feb65ce 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -1725,6 +1725,7 @@ int main(int argc, char **argv)
 	int opt, longindex, err, cg_fd = 0;
 	char *bpf_file = BPF_SOCKMAP_FILENAME;
 	int test = PING_PONG;
+	bool cg_created = 0;
 
 	if (argc < 2)
 		return test_suite(-1);
@@ -1805,13 +1806,25 @@ int main(int argc, char **argv)
 		}
 	}
 
-	if (argc <= 3 && cg_fd)
-		return test_suite(cg_fd);
-
 	if (!cg_fd) {
-		fprintf(stderr, "%s requires cgroup option: --cgroup <path>\n",
-			argv[0]);
-		return -1;
+		if (setup_cgroup_environment()) {
+			fprintf(stderr, "ERROR: cgroup env failed\n");
+			return -EINVAL;
+		}
+
+		cg_fd = create_and_get_cgroup(CG_PATH);
+		if (cg_fd < 0) {
+			fprintf(stderr,
+				"ERROR: (%i) open cg path failed: %s\n",
+				cg_fd, strerror(errno));
+			return cg_fd;
+		}
+
+		if (join_cgroup(CG_PATH)) {
+			fprintf(stderr, "ERROR: failed to join cgroup\n");
+			return -EINVAL;
+		}
+		cg_created = 1;
 	}
 
 	err = populate_progs(bpf_file);
@@ -1830,6 +1843,9 @@ int main(int argc, char **argv)
 	options.rate = rate;
 
 	err = run_options(&options, cg_fd, test);
+
+	if (cg_created)
+		cleanup_cgroup_environment();
 	close(cg_fd);
 	return err;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 248aba1d526e052ee9aba6dd9c5a198e30839cbd Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Wed, 13 May 2020 12:14:05 -0700
Subject: bpf: Selftests, print error in test_sockmap error cases

Its helpful to know the error value if an error occurs.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/158939724566.15176.12079885932643225626.stgit@john-Precision-5820-Tower
---
 tools/testing/selftests/bpf/test_sockmap.c | 25 +++++++++++++++++--------
 1 file changed, 17 insertions(+), 8 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index 5ef71feb65ce..7f45a8fd8f02 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -341,14 +341,18 @@ static int msg_loop_sendpage(int fd, int iov_length, int cnt,
 
 	clock_gettime(CLOCK_MONOTONIC, &s->start);
 	for (i = 0; i < cnt; i++) {
-		int sent = sendfile(fd, fp, NULL, iov_length);
+		int sent;
+
+		errno = 0;
+		sent = sendfile(fd, fp, NULL, iov_length);
 
 		if (!drop && sent < 0) {
-			perror("send loop error");
+			perror("sendpage loop error");
 			fclose(file);
 			return sent;
 		} else if (drop && sent >= 0) {
-			printf("sendpage loop error expected: %i\n", sent);
+			printf("sendpage loop error expected: %i errno %i\n",
+			       sent, errno);
 			fclose(file);
 			return -EIO;
 		}
@@ -460,13 +464,18 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
 	if (tx) {
 		clock_gettime(CLOCK_MONOTONIC, &s->start);
 		for (i = 0; i < cnt; i++) {
-			int sent = sendmsg(fd, &msg, flags);
+			int sent;
+
+			errno = 0;
+			sent = sendmsg(fd, &msg, flags);
 
 			if (!drop && sent < 0) {
-				perror("send loop error");
+				perror("sendmsg loop error");
 				goto out_errno;
 			} else if (drop && sent >= 0) {
-				printf("send loop error expected: %i\n", sent);
+				fprintf(stderr,
+					"sendmsg loop error expected: %i errno %i\n",
+					sent, errno);
 				errno = -EIO;
 				goto out_errno;
 			}
@@ -690,14 +699,14 @@ static int sendmsg_test(struct sockmap_options *opt)
 	if (WIFEXITED(rx_status)) {
 		err = WEXITSTATUS(rx_status);
 		if (err) {
-			fprintf(stderr, "rx thread exited with err %d. ", err);
+			fprintf(stderr, "rx thread exited with err %d.\n", err);
 			goto out;
 		}
 	}
 	if (WIFEXITED(tx_status)) {
 		err = WEXITSTATUS(tx_status);
 		if (err)
-			fprintf(stderr, "tx thread exited with err %d. ", err);
+			fprintf(stderr, "tx thread exited with err %d.\n", err);
 	}
 out:
 	return err;
-- 
cgit v1.2.3-59-g8ed1b


From 18d4e900a4500c54af56b9ad39f4d3b378eb0661 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Wed, 13 May 2020 12:14:25 -0700
Subject: bpf: Selftests, improve test_sockmap total bytes counter

The recv thread in test_sockmap waits to receive all bytes from sender but
in the case we use pop data it may wait for more bytes then actually being
sent. This stalls the test harness for multiple seconds. Because this
happens in multiple tests it slows time to run the selftest.

Fix by doing a better job of accounting for total bytes when pop helpers
are used.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/158939726542.15176.5964532245173539540.stgit@john-Precision-5820-Tower
---
 tools/testing/selftests/bpf/test_sockmap.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index 7f45a8fd8f02..9a7e10424584 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -502,9 +502,10 @@ static int msg_loop(int fd, int iov_count, int iov_length, int cnt,
 		 * paths.
 		 */
 		total_bytes = (float)iov_count * (float)iov_length * (float)cnt;
-		txmsg_pop_total = txmsg_pop;
 		if (txmsg_apply)
-			txmsg_pop_total *= (total_bytes / txmsg_apply);
+			txmsg_pop_total = txmsg_pop * (total_bytes / txmsg_apply);
+		else
+			txmsg_pop_total = txmsg_pop * cnt;
 		total_bytes -= txmsg_pop_total;
 		err = clock_gettime(CLOCK_MONOTONIC, &s->start);
 		if (err < 0)
@@ -638,8 +639,12 @@ static int sendmsg_test(struct sockmap_options *opt)
 
 	rxpid = fork();
 	if (rxpid == 0) {
+		iov_buf -= (txmsg_pop - txmsg_start_pop + 1);
 		if (opt->drop_expected)
-			exit(0);
+			_exit(0);
+
+		if (!iov_buf) /* zero bytes sent case */
+			_exit(0);
 
 		if (opt->sendpage)
 			iov_count = 1;
-- 
cgit v1.2.3-59-g8ed1b


From 328aa08a081bb94f9aba506363186de6ec3382ec Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Wed, 13 May 2020 12:14:44 -0700
Subject: bpf: Selftests, break down test_sockmap into subtests

At the moment test_sockmap runs all 800+ tests ungrouped which is not
ideal because it makes it hard to see what is failing but also more
importantly its hard to confirm all cases are tested. Additionally,
after inspecting we noticed the runtime is bloated because we run
many duplicate tests. Worse some of these tests are known error cases
that wait for the recvmsg handler to timeout which creats long delays.
Also we noted some tests were not clearing their options and as a
result the following tests would run with extra and incorrect options.

Fix this by reorganizing test code so its clear what tests are running
and when. Then it becomes easy to remove duplication and run tests with
only the set of send/recv patterns that are relavent.

To accomplish this break test_sockmap into subtests and remove
unnecessary duplication. The output is more readable now and
the runtime reduced.

Now default output prints subtests like this,

 $ ./test_sockmap
 # 1/ 6  sockmap:txmsg test passthrough:OK
 ...
 #22/ 1 sockhash:txmsg test push/pop data:OK
 Pass: 22 Fail: 0

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/158939728384.15176.13601520183665880762.stgit@john-Precision-5820-Tower
---
 tools/testing/selftests/bpf/test_sockmap.c | 723 ++++++++++++++---------------
 1 file changed, 348 insertions(+), 375 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index 9a7e10424584..ad0540acc0eb 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -54,7 +54,7 @@ static void running_handler(int a);
 #define S1_PORT 10000
 #define S2_PORT 10001
 
-#define BPF_SOCKMAP_FILENAME "test_sockmap_kern.o"
+#define BPF_SOCKMAP_FILENAME  "test_sockmap_kern.o"
 #define BPF_SOCKHASH_FILENAME "test_sockhash_kern.o"
 #define CG_PATH "/sockmap"
 
@@ -110,6 +110,76 @@ static const struct option long_options[] = {
 	{0, 0, NULL, 0 }
 };
 
+struct test_env {
+	const char *type;
+	const char *subtest;
+
+	int test_num;
+	int subtest_num;
+
+	int succ_cnt;
+	int fail_cnt;
+	int fail_last;
+};
+
+struct test_env env;
+
+static void test_start(void)
+{
+	env.subtest_num++;
+}
+
+static void test_fail(void)
+{
+	env.fail_cnt++;
+}
+
+static void test_pass(void)
+{
+	env.succ_cnt++;
+}
+
+static void test_reset(void)
+{
+	txmsg_start = txmsg_end = 0;
+	txmsg_start_pop = txmsg_pop = 0;
+	txmsg_start_push = txmsg_end_push = 0;
+	txmsg_pass = txmsg_drop = txmsg_redir = 0;
+	txmsg_apply = txmsg_cork = 0;
+	txmsg_ingress = txmsg_skb = 0;
+}
+
+static int test_start_subtest(const char *name, const char *type)
+{
+	env.type = type;
+	env.subtest = name;
+	env.test_num++;
+	env.subtest_num = 0;
+	env.fail_last = env.fail_cnt;
+	test_reset();
+	return 0;
+}
+
+static void test_end_subtest(void)
+{
+	int error = env.fail_cnt - env.fail_last;
+	int type = strcmp(env.type, BPF_SOCKMAP_FILENAME);
+
+	if (!error)
+		test_pass();
+
+	fprintf(stdout, "#%2d/%2d %8s:%s:%s\n",
+		env.test_num, env.subtest_num,
+		!type ? "sockmap" : "sockhash",
+		env.subtest, error ? "FAIL" : "OK");
+}
+
+static void test_print_results(void)
+{
+	fprintf(stdout, "Pass: %d Fail: %d\n",
+		env.succ_cnt, env.fail_cnt);
+}
+
 static void usage(char *argv[])
 {
 	int i;
@@ -316,6 +386,7 @@ struct sockmap_options {
 	int iov_count;
 	int iov_length;
 	int rate;
+	char *map;
 };
 
 static int msg_loop_sendpage(int fd, int iov_length, int cnt,
@@ -1169,416 +1240,305 @@ static int __test_exec(int cgrp, int test, struct sockmap_options *opt)
 
 	test_options(options);
 
-	fprintf(stdout,
-		"[TEST %i]: (%i, %i, %i, %s, %s): ",
-		test_cnt, opt->rate, opt->iov_count, opt->iov_length,
-		test_to_str(test), options);
-	fflush(stdout);
+	if (opt->verbose) {
+		fprintf(stdout,
+			"[TEST %i]: (%i, %i, %i, %s, %s): ",
+			test_cnt, opt->rate, opt->iov_count, opt->iov_length,
+			test_to_str(test), options);
+		fflush(stdout);
+	}
 	err = run_options(opt, cgrp, test);
-	fprintf(stdout, "%s\n", !err ? "PASS" : "FAILED");
+	if (opt->verbose)
+		fprintf(stdout, "%s\n", !err ? "PASS" : "FAILED");
 	test_cnt++;
 	!err ? passed++ : failed++;
 	free(options);
 	return err;
 }
 
-static int test_exec(int cgrp, struct sockmap_options *opt)
-{
-	int err = __test_exec(cgrp, SENDMSG, opt);
-
-	if (err)
-		goto out;
-
-	err = __test_exec(cgrp, SENDPAGE, opt);
-out:
-	return err;
-}
-
-static int test_loop(int cgrp)
-{
-	struct sockmap_options opt;
-
-	int err, i, l, r;
-
-	opt.verbose = 0;
-	opt.base = false;
-	opt.sendpage = false;
-	opt.data_test = false;
-	opt.drop_expected = false;
-	opt.iov_count = 0;
-	opt.iov_length = 0;
-	opt.rate = 0;
-
-	r = 1;
-	for (i = 1; i < 100; i += 33) {
-		for (l = 1; l < 100; l += 33) {
-			opt.rate = r;
-			opt.iov_count = i;
-			opt.iov_length = l;
-			err = test_exec(cgrp, &opt);
-			if (err)
-				goto out;
-		}
-	}
-	sched_yield();
-out:
-	return err;
-}
-
-static int test_txmsg(int cgrp)
+static void test_exec(int cgrp, struct sockmap_options *opt)
 {
+	int type = strcmp(opt->map, BPF_SOCKMAP_FILENAME);
 	int err;
 
-	txmsg_pass = txmsg_drop = 0;
-	txmsg_apply = txmsg_cork = 0;
-	txmsg_ingress = txmsg_skb = 0;
-
-	txmsg_pass = 1;
-	err = test_loop(cgrp);
-	txmsg_pass = 0;
-	if (err)
-		goto out;
-
-	txmsg_redir = 1;
-	err = test_loop(cgrp);
-	txmsg_redir = 0;
-	if (err)
-		goto out;
-
-	txmsg_drop = 1;
-	err = test_loop(cgrp);
-	txmsg_drop = 0;
-	if (err)
-		goto out;
-
-	txmsg_redir = 1;
-	txmsg_ingress = 1;
-	err = test_loop(cgrp);
-	txmsg_redir = 0;
-	txmsg_ingress = 0;
-	if (err)
-		goto out;
-out:
-	txmsg_pass = 0;
-	txmsg_redir = 0;
-	txmsg_drop = 0;
-	return err;
+	if (type == 0) {
+		test_start();
+		err = __test_exec(cgrp, SENDMSG, opt);
+		if (err)
+			test_fail();
+	} else {
+		test_start();
+		err = __test_exec(cgrp, SENDPAGE, opt);
+		if (err)
+			test_fail();
+	}
 }
 
-static int test_send(struct sockmap_options *opt, int cgrp)
+static void test_send_one(struct sockmap_options *opt, int cgrp)
 {
-	int err;
-
 	opt->iov_length = 1;
 	opt->iov_count = 1;
 	opt->rate = 1;
-	err = test_exec(cgrp, opt);
-	if (err)
-		goto out;
+	test_exec(cgrp, opt);
 
 	opt->iov_length = 1;
 	opt->iov_count = 1024;
 	opt->rate = 1;
-	err = test_exec(cgrp, opt);
-	if (err)
-		goto out;
+	test_exec(cgrp, opt);
 
 	opt->iov_length = 1024;
 	opt->iov_count = 1;
 	opt->rate = 1;
-	err = test_exec(cgrp, opt);
-	if (err)
-		goto out;
+	test_exec(cgrp, opt);
 
-	opt->iov_length = 1;
+}
+
+static void test_send_many(struct sockmap_options *opt, int cgrp)
+{
+	opt->iov_length = 3;
 	opt->iov_count = 1;
 	opt->rate = 512;
-	err = test_exec(cgrp, opt);
-	if (err)
-		goto out;
+	test_exec(cgrp, opt);
+
+	opt->rate = 100;
+	opt->iov_count = 1;
+	opt->iov_length = 5;
+	test_exec(cgrp, opt);
+}
 
+static void test_send_large(struct sockmap_options *opt, int cgrp)
+{
 	opt->iov_length = 256;
 	opt->iov_count = 1024;
 	opt->rate = 2;
-	err = test_exec(cgrp, opt);
-	if (err)
-		goto out;
+	test_exec(cgrp, opt);
+}
 
-	opt->rate = 100;
-	opt->iov_count = 1;
-	opt->iov_length = 5;
-	err = test_exec(cgrp, opt);
-	if (err)
-		goto out;
-out:
+static void test_send(struct sockmap_options *opt, int cgrp)
+{
+	test_send_one(opt, cgrp);
+	test_send_many(opt, cgrp);
+	test_send_large(opt, cgrp);
 	sched_yield();
-	return err;
 }
 
-static int test_mixed(int cgrp)
+static void test_txmsg_pass(int cgrp, char *map)
 {
-	struct sockmap_options opt = {0};
-	int err;
-
-	txmsg_pass = txmsg_drop = 0;
-	txmsg_apply = txmsg_cork = 0;
-	txmsg_start = txmsg_end = 0;
-	txmsg_start_push = txmsg_end_push = 0;
-	txmsg_start_pop = txmsg_pop = 0;
+	struct sockmap_options opt = {.map = map};
 
 	/* Test small and large iov_count values with pass/redir/apply/cork */
 	txmsg_pass = 1;
-	txmsg_redir = 0;
-	txmsg_apply = 1;
-	txmsg_cork = 0;
-	err = test_send(&opt, cgrp);
-	if (err)
-		goto out;
+	test_send(&opt, cgrp);
+}
 
-	txmsg_pass = 1;
-	txmsg_redir = 0;
-	txmsg_apply = 0;
-	txmsg_cork = 1;
-	err = test_send(&opt, cgrp);
-	if (err)
-		goto out;
+static void test_txmsg_redir(int cgrp, char *map)
+{
+	struct sockmap_options opt = {.map = map};
 
-	txmsg_pass = 1;
-	txmsg_redir = 0;
-	txmsg_apply = 1;
-	txmsg_cork = 1;
-	err = test_send(&opt, cgrp);
-	if (err)
-		goto out;
+	txmsg_redir = 1;
+	test_send(&opt, cgrp);
+}
 
-	txmsg_pass = 1;
-	txmsg_redir = 0;
-	txmsg_apply = 1024;
-	txmsg_cork = 0;
-	err = test_send(&opt, cgrp);
-	if (err)
-		goto out;
+static void test_txmsg_drop(int cgrp, char *map)
+{
+	struct sockmap_options opt = {.map = map};
 
-	txmsg_pass = 1;
-	txmsg_redir = 0;
-	txmsg_apply = 0;
-	txmsg_cork = 1024;
-	err = test_send(&opt, cgrp);
-	if (err)
-		goto out;
+	txmsg_drop = 1;
+	test_send(&opt, cgrp);
+}
 
-	txmsg_pass = 1;
-	txmsg_redir = 0;
-	txmsg_apply = 1024;
-	txmsg_cork = 1024;
-	err = test_send(&opt, cgrp);
-	if (err)
-		goto out;
+static void test_txmsg_ingress_redir(int cgrp, char *map)
+{
+	struct sockmap_options opt = {.map = map};
+
+	txmsg_pass = txmsg_drop = 0;
+	txmsg_ingress = txmsg_redir = 1;
+	test_send(&opt, cgrp);
+}
+
+/* Test cork with hung data. This tests poor usage patterns where
+ * cork can leave data on the ring if user program is buggy and
+ * doesn't flush them somehow. They do take some time however
+ * because they wait for a timeout. Test pass, redir and cork with
+ * apply logic. Use cork size of 4097 with send_large to avoid
+ * aligning cork size with send size.
+ */
+static void test_txmsg_cork_hangs(int cgrp, char *map)
+{
+	struct sockmap_options opt = {.map = map};
 
 	txmsg_pass = 1;
 	txmsg_redir = 0;
-	txmsg_cork = 4096;
-	txmsg_apply = 4096;
-	err = test_send(&opt, cgrp);
-	if (err)
-		goto out;
-
-	txmsg_pass = 0;
-	txmsg_redir = 1;
-	txmsg_apply = 1;
-	txmsg_cork = 0;
-	err = test_send(&opt, cgrp);
-	if (err)
-		goto out;
+	txmsg_cork = 4097;
+	txmsg_apply = 4097;
+	test_send_large(&opt, cgrp);
 
 	txmsg_pass = 0;
 	txmsg_redir = 1;
 	txmsg_apply = 0;
-	txmsg_cork = 1;
-	err = test_send(&opt, cgrp);
-	if (err)
-		goto out;
+	txmsg_cork = 4097;
+	test_send_large(&opt, cgrp);
 
 	txmsg_pass = 0;
 	txmsg_redir = 1;
-	txmsg_apply = 1024;
-	txmsg_cork = 0;
-	err = test_send(&opt, cgrp);
-	if (err)
-		goto out;
+	txmsg_apply = 4097;
+	txmsg_cork = 4097;
+	test_send_large(&opt, cgrp);
+}
 
-	txmsg_pass = 0;
+static void test_txmsg_pull(int cgrp, char *map)
+{
+	struct sockmap_options opt = {.map = map};
+
+	/* Test basic start/end */
+	txmsg_start = 1;
+	txmsg_end = 2;
+	test_send(&opt, cgrp);
+
+	/* Test >4k pull */
+	txmsg_start = 4096;
+	txmsg_end = 9182;
+	test_send_large(&opt, cgrp);
+
+	/* Test pull + redirect */
+	txmsg_redir = 0;
+	txmsg_start = 1;
+	txmsg_end = 2;
+	test_send(&opt, cgrp);
+
+	/* Test pull + cork */
+	txmsg_redir = 0;
+	txmsg_cork = 512;
+	txmsg_start = 1;
+	txmsg_end = 2;
+	test_send_many(&opt, cgrp);
+
+	/* Test pull + cork + redirect */
 	txmsg_redir = 1;
-	txmsg_apply = 0;
-	txmsg_cork = 1024;
-	err = test_send(&opt, cgrp);
-	if (err)
-		goto out;
+	txmsg_cork = 512;
+	txmsg_start = 1;
+	txmsg_end = 2;
+	test_send_many(&opt, cgrp);
+}
 
-	txmsg_pass = 0;
+static void test_txmsg_pop(int cgrp, char *map)
+{
+	struct sockmap_options opt = {.map = map};
+
+	/* Test basic pop */
+	txmsg_start_pop = 1;
+	txmsg_pop = 2;
+	test_send_many(&opt, cgrp);
+
+	/* Test pop with >4k */
+	txmsg_start_pop = 4096;
+	txmsg_pop = 4096;
+	test_send_large(&opt, cgrp);
+
+	/* Test pop + redirect */
 	txmsg_redir = 1;
-	txmsg_apply = 1024;
-	txmsg_cork = 1024;
-	err = test_send(&opt, cgrp);
-	if (err)
-		goto out;
+	txmsg_start_pop = 1;
+	txmsg_pop = 2;
+	test_send_many(&opt, cgrp);
 
-	txmsg_pass = 0;
+	/* Test pop + cork */
+	txmsg_redir = 0;
+	txmsg_cork = 512;
+	txmsg_start_pop = 1;
+	txmsg_pop = 2;
+	test_send_many(&opt, cgrp);
+
+	/* Test pop + redirect + cork */
 	txmsg_redir = 1;
-	txmsg_cork = 4096;
-	txmsg_apply = 4096;
-	err = test_send(&opt, cgrp);
-	if (err)
-		goto out;
-out:
-	return err;
+	txmsg_cork = 4;
+	txmsg_start_pop = 1;
+	txmsg_pop = 2;
+	test_send_many(&opt, cgrp);
 }
 
-static int test_start_end(int cgrp)
+static void test_txmsg_push(int cgrp, char *map)
 {
-	struct sockmap_options opt = {0};
-	int err, i;
+	struct sockmap_options opt = {.map = map};
 
-	/* Test basic start/end with lots of iov_count and iov_lengths */
-	txmsg_start = 1;
-	txmsg_end = 2;
+	/* Test basic push */
+	txmsg_start_push = 1;
+	txmsg_end_push = 1;
+	test_send(&opt, cgrp);
+
+	/* Test push 4kB >4k */
+	txmsg_start_push = 4096;
+	txmsg_end_push = 4096;
+	test_send_large(&opt, cgrp);
+
+	/* Test push + redirect */
+	txmsg_redir = 1;
 	txmsg_start_push = 1;
 	txmsg_end_push = 2;
-	txmsg_start_pop = 1;
-	txmsg_pop = 1;
-	err = test_txmsg(cgrp);
-	if (err)
-		goto out;
+	test_send_many(&opt, cgrp);
 
-	/* Cut a byte of pushed data but leave reamining in place */
-	txmsg_start = 1;
-	txmsg_end = 2;
+	/* Test push + cork */
+	txmsg_redir = 0;
+	txmsg_cork = 512;
 	txmsg_start_push = 1;
-	txmsg_end_push = 3;
-	txmsg_start_pop = 1;
-	txmsg_pop = 1;
-	err = test_txmsg(cgrp);
-	if (err)
-		goto out;
+	txmsg_end_push = 2;
+	test_send_many(&opt, cgrp);
+}
 
-	/* Test start/end with cork */
-	opt.rate = 16;
-	opt.iov_count = 1;
-	opt.iov_length = 100;
-	txmsg_cork = 1600;
-
-	txmsg_start_pop = 0;
-	txmsg_pop = 0;
-
-	for (i = 99; i <= 1600; i += 500) {
-		txmsg_start = 0;
-		txmsg_end = i;
-		txmsg_start_push = 0;
-		txmsg_end_push = i;
-		err = test_exec(cgrp, &opt);
-		if (err)
-			goto out;
-	}
+static void test_txmsg_push_pop(int cgrp, char *map)
+{
+	struct sockmap_options opt = {.map = map};
 
-	/* Test pop data in middle of cork */
-	for (i = 99; i <= 1600; i += 500) {
-		txmsg_start_pop = 10;
-		txmsg_pop = i;
-		err = test_exec(cgrp, &opt);
-		if (err)
-			goto out;
-	}
-	txmsg_start_pop = 0;
-	txmsg_pop = 0;
-
-	/* Test start/end with cork but pull data in middle */
-	for (i = 199; i <= 1600; i += 500) {
-		txmsg_start = 100;
-		txmsg_end = i;
-		txmsg_start_push = 100;
-		txmsg_end_push = i;
-		err = test_exec(cgrp, &opt);
-		if (err)
-			goto out;
-	}
+	txmsg_start_push = 1;
+	txmsg_end_push = 10;
+	txmsg_start_pop = 5;
+	txmsg_pop = 4;
+	test_send_large(&opt, cgrp);
+}
 
-	/* Test start/end with cork pulling last sg entry */
-	txmsg_start = 1500;
-	txmsg_end = 1600;
-	txmsg_start_push = 1500;
-	txmsg_end_push = 1600;
-	err = test_exec(cgrp, &opt);
-	if (err)
-		goto out;
+static void test_txmsg_apply(int cgrp, char *map)
+{
+	struct sockmap_options opt = {.map = map};
 
-	/* Test pop with cork pulling last sg entry */
-	txmsg_start_pop = 1500;
-	txmsg_pop = 1600;
-	err = test_exec(cgrp, &opt);
-	if (err)
-		goto out;
-	txmsg_start_pop = 0;
-	txmsg_pop = 0;
-
-	/* Test start/end pull of single byte in last page */
-	txmsg_start = 1111;
-	txmsg_end = 1112;
-	txmsg_start_push = 1111;
-	txmsg_end_push = 1112;
-	err = test_exec(cgrp, &opt);
-	if (err)
-		goto out;
+	txmsg_pass = 1;
+	txmsg_redir = 0;
+	txmsg_apply = 1;
+	txmsg_cork = 0;
+	test_send_one(&opt, cgrp);
 
-	/* Test pop of single byte in last page */
-	txmsg_start_pop = 1111;
-	txmsg_pop = 1112;
-	err = test_exec(cgrp, &opt);
-	if (err)
-		goto out;
+	txmsg_pass = 0;
+	txmsg_redir = 1;
+	txmsg_apply = 1;
+	txmsg_cork = 0;
+	test_send_one(&opt, cgrp);
 
-	/* Test start/end with end < start */
-	txmsg_start = 1111;
-	txmsg_end = 0;
-	txmsg_start_push = 1111;
-	txmsg_end_push = 0;
-	err = test_exec(cgrp, &opt);
-	if (err)
-		goto out;
+	txmsg_pass = 1;
+	txmsg_redir = 0;
+	txmsg_apply = 1024;
+	txmsg_cork = 0;
+	test_send_large(&opt, cgrp);
 
-	/* Test start/end with end > data */
-	txmsg_start = 0;
-	txmsg_end = 1601;
-	txmsg_start_push = 0;
-	txmsg_end_push = 1601;
-	err = test_exec(cgrp, &opt);
-	if (err)
-		goto out;
+	txmsg_pass = 0;
+	txmsg_redir = 1;
+	txmsg_apply = 1024;
+	txmsg_cork = 0;
+	test_send_large(&opt, cgrp);
+}
 
-	/* Test start/end with start > data */
-	txmsg_start = 1601;
-	txmsg_end = 1600;
-	txmsg_start_push = 1601;
-	txmsg_end_push = 1600;
-	err = test_exec(cgrp, &opt);
-	if (err)
-		goto out;
+static void test_txmsg_cork(int cgrp, char *map)
+{
+	struct sockmap_options opt = {.map = map};
 
-	/* Test pop with start > data */
-	txmsg_start_pop = 1601;
-	txmsg_pop = 1;
-	err = test_exec(cgrp, &opt);
-	if (err)
-		goto out;
+	txmsg_pass = 1;
+	txmsg_redir = 0;
+	txmsg_apply = 0;
+	txmsg_cork = 1;
+	test_send(&opt, cgrp);
 
-	/* Test pop with pop range > data */
-	txmsg_start_pop = 1599;
-	txmsg_pop = 10;
-	err = test_exec(cgrp, &opt);
-out:
-	txmsg_start = 0;
-	txmsg_end = 0;
-	sched_yield();
-	return err;
+	txmsg_pass = 1;
+	txmsg_redir = 0;
+	txmsg_apply = 1;
+	txmsg_cork = 1;
+	test_send(&opt, cgrp);
 }
 
 char *map_names[] = {
@@ -1663,16 +1623,59 @@ static int populate_progs(char *bpf_file)
 	return 0;
 }
 
-static int __test_suite(int cg_fd, char *bpf_file)
+struct _test {
+	char *title;
+	void (*tester)(int cg_fd, char *map);
+};
+
+struct _test test[] = {
+	{"txmsg test passthrough", test_txmsg_pass},
+	{"txmsg test redirect", test_txmsg_redir},
+	{"txmsg test drop", test_txmsg_drop},
+	{"txmsg test ingress redirect", test_txmsg_ingress_redir},
+	{"txmsg test apply", test_txmsg_apply},
+	{"txmsg test cork", test_txmsg_cork},
+	{"txmsg test hanging corks", test_txmsg_cork_hangs},
+	{"txmsg test push_data", test_txmsg_push},
+	{"txmsg test pull-data", test_txmsg_pull},
+	{"txmsg test pop-data", test_txmsg_pop},
+	{"txmsg test push/pop data", test_txmsg_push_pop},
+};
+
+static int __test_selftests(int cg_fd, char *map)
 {
-	int err, cleanup = cg_fd;
+	int i, err;
 
-	err = populate_progs(bpf_file);
+	err = populate_progs(map);
 	if (err < 0) {
 		fprintf(stderr, "ERROR: (%i) load bpf failed\n", err);
 		return err;
 	}
 
+	/* Tests basic commands and APIs */
+	for (i = 0; i < sizeof(test)/sizeof(struct _test); i++) {
+		struct _test t = test[i];
+
+		test_start_subtest(t.title, map);
+		t.tester(cg_fd, map);
+		test_end_subtest();
+	}
+
+	return err;
+}
+
+static void test_selftests_sockmap(int cg_fd)
+{
+	__test_selftests(cg_fd, BPF_SOCKMAP_FILENAME);
+}
+
+static void test_selftests_sockhash(int cg_fd)
+{
+	__test_selftests(cg_fd, BPF_SOCKHASH_FILENAME);
+}
+
+static int test_selftest(int cg_fd)
+{
 	if (cg_fd < 0) {
 		if (setup_cgroup_environment()) {
 			fprintf(stderr, "ERROR: cgroup env failed\n");
@@ -1693,43 +1696,12 @@ static int __test_suite(int cg_fd, char *bpf_file)
 		}
 	}
 
-	/* Tests basic commands and APIs with range of iov values */
-	txmsg_start = txmsg_end = txmsg_start_push = txmsg_end_push = 0;
-	err = test_txmsg(cg_fd);
-	if (err)
-		goto out;
-
-	/* Tests interesting combinations of APIs used together */
-	err = test_mixed(cg_fd);
-	if (err)
-		goto out;
-
-	/* Tests pull_data API using start/end API */
-	err = test_start_end(cg_fd);
-	if (err)
-		goto out;
-
-out:
-	printf("Summary: %i PASSED %i FAILED\n", passed, failed);
-	if (cleanup < 0) {
-		cleanup_cgroup_environment();
-		close(cg_fd);
-	}
-	return err;
-}
-
-static int test_suite(int cg_fd)
-{
-	int err;
-
-	err = __test_suite(cg_fd, BPF_SOCKMAP_FILENAME);
-	if (err)
-		goto out;
-	err = __test_suite(cg_fd, BPF_SOCKHASH_FILENAME);
-out:
-	if (cg_fd > -1)
-		close(cg_fd);
-	return err;
+	test_selftests_sockmap(cg_fd);
+	test_selftests_sockhash(cg_fd);
+	cleanup_cgroup_environment();
+	close(cg_fd);
+	test_print_results();
+	return 0;
 }
 
 int main(int argc, char **argv)
@@ -1741,8 +1713,9 @@ int main(int argc, char **argv)
 	int test = PING_PONG;
 	bool cg_created = 0;
 
-	if (argc < 2)
-		return test_suite(-1);
+	if (argc < 2) {
+		return test_selftest(-1);
+	}
 
 	while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:p:q:",
 				  long_options, &longindex)) != -1) {
-- 
cgit v1.2.3-59-g8ed1b


From b98ca90c56ee498c751ff5c20b9db8cb64c13fc5 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Wed, 13 May 2020 12:15:04 -0700
Subject: bpf: Selftests, provide verbose option for selftests execution

Pass options from command line args into individual tests which allows us
to use verbose option from command line with selftests. Now when verbose
option is set individual subtest details will be printed. Also we can
consolidate cgroup bring up and tear down.

Additionally just setting verbose is very noisy so introduce verbose=1
and verbose=2. Really verbose=2 is only useful when developing tests
or debugging some specific issue.

For example now we get output like this with --verbose,

#20/17 sockhash:txmsg test pull-data:OK
 [TEST 160]: (512, 1, 3, sendpage, pop (1,3),): msg_loop_rx: iov_count 1 iov_buf 1 cnt 512 err 0
 [TEST 161]: (100, 1, 5, sendpage, pop (1,3),): msg_loop_rx: iov_count 1 iov_buf 3 cnt 100 err 0
 [TEST 162]: (2, 1024, 256, sendpage, pop (4096,8192),): msg_loop_rx: iov_count 1 iov_buf 255 cnt 2 err 0
 [TEST 163]: (512, 1, 3, sendpage, redir,pop (1,3),): msg_loop_rx: iov_count 1 iov_buf 1 cnt 512 err 0
 [TEST 164]: (100, 1, 5, sendpage, redir,pop (1,3),): msg_loop_rx: iov_count 1 iov_buf 3 cnt 100 err 0
 [TEST 165]: (512, 1, 3, sendpage, cork 512,pop (1,3),): msg_loop_rx: iov_count 1 iov_buf 1 cnt 512 err 0
 [TEST 166]: (100, 1, 5, sendpage, cork 512,pop (1,3),): msg_loop_rx: iov_count 1 iov_buf 3 cnt 100 err 0
 [TEST 167]: (512, 1, 3, sendpage, redir,cork 4,pop (1,3),): msg_loop_rx: iov_count 1 iov_buf 1 cnt 512 err 0
 [TEST 168]: (100, 1, 5, sendpage, redir,cork 4,pop (1,3),): msg_loop_rx: iov_count 1 iov_buf 3 cnt 100 err 0

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/158939730412.15176.1975675235035143367.stgit@john-Precision-5820-Tower
---
 tools/testing/selftests/bpf/test_sockmap.c | 179 ++++++++++++-----------------
 1 file changed, 71 insertions(+), 108 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index ad0540acc0eb..2be8d9df152a 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -87,7 +87,7 @@ static const struct option long_options[] = {
 	{"help",	no_argument,		NULL, 'h' },
 	{"cgroup",	required_argument,	NULL, 'c' },
 	{"rate",	required_argument,	NULL, 'r' },
-	{"verbose",	no_argument,		NULL, 'v' },
+	{"verbose",	optional_argument,	NULL, 'v' },
 	{"iov_count",	required_argument,	NULL, 'i' },
 	{"length",	required_argument,	NULL, 'l' },
 	{"test",	required_argument,	NULL, 't' },
@@ -362,7 +362,7 @@ static int sockmap_init_sockets(int verbose)
 		return errno;
 	}
 
-	if (verbose) {
+	if (verbose > 1) {
 		printf("connected sockets: c1 <-> p1, c2 <-> p2\n");
 		printf("cgroups binding: c1(%i) <-> s1(%i) - - - c2(%i) <-> s2(%i)\n",
 			c1, s1, c2, s2);
@@ -721,7 +721,7 @@ static int sendmsg_test(struct sockmap_options *opt)
 			iov_count = 1;
 		err = msg_loop(rx_fd, iov_count, iov_buf,
 			       cnt, &s, false, opt);
-		if (opt->verbose)
+		if (opt->verbose > 1)
 			fprintf(stderr,
 				"msg_loop_rx: iov_count %i iov_buf %i cnt %i err %i\n",
 				iov_count, iov_buf, cnt, err);
@@ -729,7 +729,7 @@ static int sendmsg_test(struct sockmap_options *opt)
 			sent_Bps = sentBps(s);
 			recvd_Bps = recvdBps(s);
 		}
-		if (opt->verbose)
+		if (opt->verbose > 1)
 			fprintf(stdout,
 				"rx_sendmsg: TX: %zuB %fB/s %fGB/s RX: %zuB %fB/s %fGB/s %s\n",
 				s.bytes_sent, sent_Bps, sent_Bps/giga,
@@ -759,7 +759,7 @@ static int sendmsg_test(struct sockmap_options *opt)
 			sent_Bps = sentBps(s);
 			recvd_Bps = recvdBps(s);
 		}
-		if (opt->verbose)
+		if (opt->verbose > 1)
 			fprintf(stdout,
 				"tx_sendmsg: TX: %zuB %fB/s %f GB/s RX: %zuB %fB/s %fGB/s\n",
 				s.bytes_sent, sent_Bps, sent_Bps/giga,
@@ -864,6 +864,7 @@ static int forever_ping_pong(int rate, struct sockmap_options *opt)
 }
 
 enum {
+	SELFTESTS,
 	PING_PONG,
 	SENDMSG,
 	BASE,
@@ -1242,14 +1243,14 @@ static int __test_exec(int cgrp, int test, struct sockmap_options *opt)
 
 	if (opt->verbose) {
 		fprintf(stdout,
-			"[TEST %i]: (%i, %i, %i, %s, %s): ",
+			" [TEST %i]: (%i, %i, %i, %s, %s): ",
 			test_cnt, opt->rate, opt->iov_count, opt->iov_length,
 			test_to_str(test), options);
 		fflush(stdout);
 	}
 	err = run_options(opt, cgrp, test);
 	if (opt->verbose)
-		fprintf(stdout, "%s\n", !err ? "PASS" : "FAILED");
+		fprintf(stdout, " %s\n", !err ? "PASS" : "FAILED");
 	test_cnt++;
 	!err ? passed++ : failed++;
 	free(options);
@@ -1322,38 +1323,30 @@ static void test_send(struct sockmap_options *opt, int cgrp)
 	sched_yield();
 }
 
-static void test_txmsg_pass(int cgrp, char *map)
+static void test_txmsg_pass(int cgrp, struct sockmap_options *opt)
 {
-	struct sockmap_options opt = {.map = map};
-
 	/* Test small and large iov_count values with pass/redir/apply/cork */
 	txmsg_pass = 1;
-	test_send(&opt, cgrp);
+	test_send(opt, cgrp);
 }
 
-static void test_txmsg_redir(int cgrp, char *map)
+static void test_txmsg_redir(int cgrp, struct sockmap_options *opt)
 {
-	struct sockmap_options opt = {.map = map};
-
 	txmsg_redir = 1;
-	test_send(&opt, cgrp);
+	test_send(opt, cgrp);
 }
 
-static void test_txmsg_drop(int cgrp, char *map)
+static void test_txmsg_drop(int cgrp, struct sockmap_options *opt)
 {
-	struct sockmap_options opt = {.map = map};
-
 	txmsg_drop = 1;
-	test_send(&opt, cgrp);
+	test_send(opt, cgrp);
 }
 
-static void test_txmsg_ingress_redir(int cgrp, char *map)
+static void test_txmsg_ingress_redir(int cgrp, struct sockmap_options *opt)
 {
-	struct sockmap_options opt = {.map = map};
-
 	txmsg_pass = txmsg_drop = 0;
 	txmsg_ingress = txmsg_redir = 1;
-	test_send(&opt, cgrp);
+	test_send(opt, cgrp);
 }
 
 /* Test cork with hung data. This tests poor usage patterns where
@@ -1363,182 +1356,168 @@ static void test_txmsg_ingress_redir(int cgrp, char *map)
  * apply logic. Use cork size of 4097 with send_large to avoid
  * aligning cork size with send size.
  */
-static void test_txmsg_cork_hangs(int cgrp, char *map)
+static void test_txmsg_cork_hangs(int cgrp, struct sockmap_options *opt)
 {
-	struct sockmap_options opt = {.map = map};
-
 	txmsg_pass = 1;
 	txmsg_redir = 0;
 	txmsg_cork = 4097;
 	txmsg_apply = 4097;
-	test_send_large(&opt, cgrp);
+	test_send_large(opt, cgrp);
 
 	txmsg_pass = 0;
 	txmsg_redir = 1;
 	txmsg_apply = 0;
 	txmsg_cork = 4097;
-	test_send_large(&opt, cgrp);
+	test_send_large(opt, cgrp);
 
 	txmsg_pass = 0;
 	txmsg_redir = 1;
 	txmsg_apply = 4097;
 	txmsg_cork = 4097;
-	test_send_large(&opt, cgrp);
+	test_send_large(opt, cgrp);
 }
 
-static void test_txmsg_pull(int cgrp, char *map)
+static void test_txmsg_pull(int cgrp, struct sockmap_options *opt)
 {
-	struct sockmap_options opt = {.map = map};
-
 	/* Test basic start/end */
 	txmsg_start = 1;
 	txmsg_end = 2;
-	test_send(&opt, cgrp);
+	test_send(opt, cgrp);
 
 	/* Test >4k pull */
 	txmsg_start = 4096;
 	txmsg_end = 9182;
-	test_send_large(&opt, cgrp);
+	test_send_large(opt, cgrp);
 
 	/* Test pull + redirect */
 	txmsg_redir = 0;
 	txmsg_start = 1;
 	txmsg_end = 2;
-	test_send(&opt, cgrp);
+	test_send(opt, cgrp);
 
 	/* Test pull + cork */
 	txmsg_redir = 0;
 	txmsg_cork = 512;
 	txmsg_start = 1;
 	txmsg_end = 2;
-	test_send_many(&opt, cgrp);
+	test_send_many(opt, cgrp);
 
 	/* Test pull + cork + redirect */
 	txmsg_redir = 1;
 	txmsg_cork = 512;
 	txmsg_start = 1;
 	txmsg_end = 2;
-	test_send_many(&opt, cgrp);
+	test_send_many(opt, cgrp);
 }
 
-static void test_txmsg_pop(int cgrp, char *map)
+static void test_txmsg_pop(int cgrp, struct sockmap_options *opt)
 {
-	struct sockmap_options opt = {.map = map};
-
 	/* Test basic pop */
 	txmsg_start_pop = 1;
 	txmsg_pop = 2;
-	test_send_many(&opt, cgrp);
+	test_send_many(opt, cgrp);
 
 	/* Test pop with >4k */
 	txmsg_start_pop = 4096;
 	txmsg_pop = 4096;
-	test_send_large(&opt, cgrp);
+	test_send_large(opt, cgrp);
 
 	/* Test pop + redirect */
 	txmsg_redir = 1;
 	txmsg_start_pop = 1;
 	txmsg_pop = 2;
-	test_send_many(&opt, cgrp);
+	test_send_many(opt, cgrp);
 
 	/* Test pop + cork */
 	txmsg_redir = 0;
 	txmsg_cork = 512;
 	txmsg_start_pop = 1;
 	txmsg_pop = 2;
-	test_send_many(&opt, cgrp);
+	test_send_many(opt, cgrp);
 
 	/* Test pop + redirect + cork */
 	txmsg_redir = 1;
 	txmsg_cork = 4;
 	txmsg_start_pop = 1;
 	txmsg_pop = 2;
-	test_send_many(&opt, cgrp);
+	test_send_many(opt, cgrp);
 }
 
-static void test_txmsg_push(int cgrp, char *map)
+static void test_txmsg_push(int cgrp, struct sockmap_options *opt)
 {
-	struct sockmap_options opt = {.map = map};
-
 	/* Test basic push */
 	txmsg_start_push = 1;
 	txmsg_end_push = 1;
-	test_send(&opt, cgrp);
+	test_send(opt, cgrp);
 
 	/* Test push 4kB >4k */
 	txmsg_start_push = 4096;
 	txmsg_end_push = 4096;
-	test_send_large(&opt, cgrp);
+	test_send_large(opt, cgrp);
 
 	/* Test push + redirect */
 	txmsg_redir = 1;
 	txmsg_start_push = 1;
 	txmsg_end_push = 2;
-	test_send_many(&opt, cgrp);
+	test_send_many(opt, cgrp);
 
 	/* Test push + cork */
 	txmsg_redir = 0;
 	txmsg_cork = 512;
 	txmsg_start_push = 1;
 	txmsg_end_push = 2;
-	test_send_many(&opt, cgrp);
+	test_send_many(opt, cgrp);
 }
 
-static void test_txmsg_push_pop(int cgrp, char *map)
+static void test_txmsg_push_pop(int cgrp, struct sockmap_options *opt)
 {
-	struct sockmap_options opt = {.map = map};
-
 	txmsg_start_push = 1;
 	txmsg_end_push = 10;
 	txmsg_start_pop = 5;
 	txmsg_pop = 4;
-	test_send_large(&opt, cgrp);
+	test_send_large(opt, cgrp);
 }
 
-static void test_txmsg_apply(int cgrp, char *map)
+static void test_txmsg_apply(int cgrp, struct sockmap_options *opt)
 {
-	struct sockmap_options opt = {.map = map};
-
 	txmsg_pass = 1;
 	txmsg_redir = 0;
 	txmsg_apply = 1;
 	txmsg_cork = 0;
-	test_send_one(&opt, cgrp);
+	test_send_one(opt, cgrp);
 
 	txmsg_pass = 0;
 	txmsg_redir = 1;
 	txmsg_apply = 1;
 	txmsg_cork = 0;
-	test_send_one(&opt, cgrp);
+	test_send_one(opt, cgrp);
 
 	txmsg_pass = 1;
 	txmsg_redir = 0;
 	txmsg_apply = 1024;
 	txmsg_cork = 0;
-	test_send_large(&opt, cgrp);
+	test_send_large(opt, cgrp);
 
 	txmsg_pass = 0;
 	txmsg_redir = 1;
 	txmsg_apply = 1024;
 	txmsg_cork = 0;
-	test_send_large(&opt, cgrp);
+	test_send_large(opt, cgrp);
 }
 
-static void test_txmsg_cork(int cgrp, char *map)
+static void test_txmsg_cork(int cgrp, struct sockmap_options *opt)
 {
-	struct sockmap_options opt = {.map = map};
-
 	txmsg_pass = 1;
 	txmsg_redir = 0;
 	txmsg_apply = 0;
 	txmsg_cork = 1;
-	test_send(&opt, cgrp);
+	test_send(opt, cgrp);
 
 	txmsg_pass = 1;
 	txmsg_redir = 0;
 	txmsg_apply = 1;
 	txmsg_cork = 1;
-	test_send(&opt, cgrp);
+	test_send(opt, cgrp);
 }
 
 char *map_names[] = {
@@ -1625,7 +1604,7 @@ static int populate_progs(char *bpf_file)
 
 struct _test {
 	char *title;
-	void (*tester)(int cg_fd, char *map);
+	void (*tester)(int cg_fd, struct sockmap_options *opt);
 };
 
 struct _test test[] = {
@@ -1642,11 +1621,11 @@ struct _test test[] = {
 	{"txmsg test push/pop data", test_txmsg_push_pop},
 };
 
-static int __test_selftests(int cg_fd, char *map)
+static int __test_selftests(int cg_fd, struct sockmap_options *opt)
 {
 	int i, err;
 
-	err = populate_progs(map);
+	err = populate_progs(opt->map);
 	if (err < 0) {
 		fprintf(stderr, "ERROR: (%i) load bpf failed\n", err);
 		return err;
@@ -1656,50 +1635,31 @@ static int __test_selftests(int cg_fd, char *map)
 	for (i = 0; i < sizeof(test)/sizeof(struct _test); i++) {
 		struct _test t = test[i];
 
-		test_start_subtest(t.title, map);
-		t.tester(cg_fd, map);
+		test_start_subtest(t.title, opt->map);
+		t.tester(cg_fd, opt);
 		test_end_subtest();
 	}
 
 	return err;
 }
 
-static void test_selftests_sockmap(int cg_fd)
+static void test_selftests_sockmap(int cg_fd, struct sockmap_options *opt)
 {
-	__test_selftests(cg_fd, BPF_SOCKMAP_FILENAME);
+	opt->map = BPF_SOCKMAP_FILENAME;
+	__test_selftests(cg_fd, opt);
 }
 
-static void test_selftests_sockhash(int cg_fd)
+static void test_selftests_sockhash(int cg_fd, struct sockmap_options *opt)
 {
-	__test_selftests(cg_fd, BPF_SOCKHASH_FILENAME);
+	opt->map = BPF_SOCKHASH_FILENAME;
+	__test_selftests(cg_fd, opt);
 }
 
-static int test_selftest(int cg_fd)
+static int test_selftest(int cg_fd, struct sockmap_options *opt)
 {
-	if (cg_fd < 0) {
-		if (setup_cgroup_environment()) {
-			fprintf(stderr, "ERROR: cgroup env failed\n");
-			return -EINVAL;
-		}
-
-		cg_fd = create_and_get_cgroup(CG_PATH);
-		if (cg_fd < 0) {
-			fprintf(stderr,
-				"ERROR: (%i) open cg path failed: %s\n",
-				cg_fd, optarg);
-			return cg_fd;
-		}
 
-		if (join_cgroup(CG_PATH)) {
-			fprintf(stderr, "ERROR: failed to join cgroup\n");
-			return -EINVAL;
-		}
-	}
-
-	test_selftests_sockmap(cg_fd);
-	test_selftests_sockhash(cg_fd);
-	cleanup_cgroup_environment();
-	close(cg_fd);
+	test_selftests_sockmap(cg_fd, opt);
+	test_selftests_sockhash(cg_fd, opt);
 	test_print_results();
 	return 0;
 }
@@ -1710,14 +1670,10 @@ int main(int argc, char **argv)
 	struct sockmap_options options = {0};
 	int opt, longindex, err, cg_fd = 0;
 	char *bpf_file = BPF_SOCKMAP_FILENAME;
-	int test = PING_PONG;
+	int test = SELFTESTS;
 	bool cg_created = 0;
 
-	if (argc < 2) {
-		return test_selftest(-1);
-	}
-
-	while ((opt = getopt_long(argc, argv, ":dhvc:r:i:l:t:p:q:",
+	while ((opt = getopt_long(argc, argv, ":dhv:c:r:i:l:t:p:q:",
 				  long_options, &longindex)) != -1) {
 		switch (opt) {
 		case 's':
@@ -1758,6 +1714,8 @@ int main(int argc, char **argv)
 			break;
 		case 'v':
 			options.verbose = 1;
+			if (optarg)
+				options.verbose = atoi(optarg);
 			break;
 		case 'i':
 			iov_count = atoi(optarg);
@@ -1814,6 +1772,11 @@ int main(int argc, char **argv)
 		cg_created = 1;
 	}
 
+	if (test == SELFTESTS) {
+		err = test_selftest(cg_fd, &options);
+		goto out;
+	}
+
 	err = populate_progs(bpf_file);
 	if (err) {
 		fprintf(stderr, "populate program: (%s) %s\n",
@@ -1830,7 +1793,7 @@ int main(int argc, char **argv)
 	options.rate = rate;
 
 	err = run_options(&options, cg_fd, test);
-
+out:
 	if (cg_created)
 		cleanup_cgroup_environment();
 	close(cg_fd);
-- 
cgit v1.2.3-59-g8ed1b


From 065a74cbd0d0bd7115846d630e141a95a95e1ce1 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Wed, 13 May 2020 12:15:24 -0700
Subject: bpf: Selftests, add whitelist option to test_sockmap

Allow running specific tests with a comma deliminated whitelist. For example
to run all apply and cork tests.

 $ ./test_sockmap --whitelist="cork,apply"

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/158939732464.15176.1959113294944564542.stgit@john-Precision-5820-Tower
---
 tools/testing/selftests/bpf/test_sockmap.c | 31 +++++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index 2be8d9df152a..1b98e9210d13 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -107,6 +107,7 @@ static const struct option long_options[] = {
 	{"txmsg_skb", no_argument,		&txmsg_skb, 1 },
 	{"ktls", no_argument,			&ktls, 1 },
 	{"peek", no_argument,			&peek_flag, 1 },
+	{"whitelist", required_argument,	NULL, 'n' },
 	{0, 0, NULL, 0 }
 };
 
@@ -387,6 +388,7 @@ struct sockmap_options {
 	int iov_length;
 	int rate;
 	char *map;
+	char *whitelist;
 };
 
 static int msg_loop_sendpage(int fd, int iov_length, int cnt,
@@ -1621,6 +1623,24 @@ struct _test test[] = {
 	{"txmsg test push/pop data", test_txmsg_push_pop},
 };
 
+static int check_whitelist(struct _test *t, struct sockmap_options *opt)
+{
+	char *entry, *ptr;
+
+	if (!opt->whitelist)
+		return 0;
+	ptr = strdup(opt->whitelist);
+	if (!ptr)
+		return -ENOMEM;
+	entry = strtok(ptr, ",");
+	while (entry) {
+		if (strstr(opt->map, entry) != 0 || strstr(t->title, entry) != 0)
+			return 0;
+		entry = strtok(NULL, ",");
+	}
+	return -EINVAL;
+}
+
 static int __test_selftests(int cg_fd, struct sockmap_options *opt)
 {
 	int i, err;
@@ -1635,6 +1655,9 @@ static int __test_selftests(int cg_fd, struct sockmap_options *opt)
 	for (i = 0; i < sizeof(test)/sizeof(struct _test); i++) {
 		struct _test t = test[i];
 
+		if (check_whitelist(&t, opt) < 0)
+			continue;
+
 		test_start_subtest(t.title, opt->map);
 		t.tester(cg_fd, opt);
 		test_end_subtest();
@@ -1673,7 +1696,7 @@ int main(int argc, char **argv)
 	int test = SELFTESTS;
 	bool cg_created = 0;
 
-	while ((opt = getopt_long(argc, argv, ":dhv:c:r:i:l:t:p:q:",
+	while ((opt = getopt_long(argc, argv, ":dhv:c:r:i:l:t:p:q:n:",
 				  long_options, &longindex)) != -1) {
 		switch (opt) {
 		case 's':
@@ -1742,6 +1765,10 @@ int main(int argc, char **argv)
 				return -1;
 			}
 			break;
+		case 'n':
+			options.whitelist = strdup(optarg);
+			if (!options.whitelist)
+				return -ENOMEM;
 		case 0:
 			break;
 		case 'h':
@@ -1794,6 +1821,8 @@ int main(int argc, char **argv)
 
 	err = run_options(&options, cg_fd, test);
 out:
+	if (options.whitelist)
+		free(options.whitelist);
 	if (cg_created)
 		cleanup_cgroup_environment();
 	close(cg_fd);
-- 
cgit v1.2.3-59-g8ed1b


From a7238f7c79dda1c484f92478c42408e1a3d418c6 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Wed, 13 May 2020 12:15:43 -0700
Subject: bpf: Selftests, add blacklist to test_sockmap

This adds a blacklist to test_sockmap. For example, now we can run
all apply and cork tests except those with timeouts by doing,

 $ ./test_sockmap --whitelist "apply,cork" --blacklist "hang"

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/158939734350.15176.6643981099665208826.stgit@john-Precision-5820-Tower
---
 tools/testing/selftests/bpf/test_sockmap.c | 33 ++++++++++++++++++++++++++++--
 1 file changed, 31 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index 1b98e9210d13..2ed2db625371 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -108,6 +108,7 @@ static const struct option long_options[] = {
 	{"ktls", no_argument,			&ktls, 1 },
 	{"peek", no_argument,			&peek_flag, 1 },
 	{"whitelist", required_argument,	NULL, 'n' },
+	{"blacklist", required_argument,	NULL, 'b' },
 	{0, 0, NULL, 0 }
 };
 
@@ -389,6 +390,7 @@ struct sockmap_options {
 	int rate;
 	char *map;
 	char *whitelist;
+	char *blacklist;
 };
 
 static int msg_loop_sendpage(int fd, int iov_length, int cnt,
@@ -1641,6 +1643,24 @@ static int check_whitelist(struct _test *t, struct sockmap_options *opt)
 	return -EINVAL;
 }
 
+static int check_blacklist(struct _test *t, struct sockmap_options *opt)
+{
+	char *entry, *ptr;
+
+	if (!opt->blacklist)
+		return -EINVAL;
+	ptr = strdup(opt->blacklist);
+	if (!ptr)
+		return -ENOMEM;
+	entry = strtok(ptr, ",");
+	while (entry) {
+		if (strstr(opt->map, entry) != 0 || strstr(t->title, entry) != 0)
+			return 0;
+		entry = strtok(NULL, ",");
+	}
+	return -EINVAL;
+}
+
 static int __test_selftests(int cg_fd, struct sockmap_options *opt)
 {
 	int i, err;
@@ -1655,7 +1675,9 @@ static int __test_selftests(int cg_fd, struct sockmap_options *opt)
 	for (i = 0; i < sizeof(test)/sizeof(struct _test); i++) {
 		struct _test t = test[i];
 
-		if (check_whitelist(&t, opt) < 0)
+		if (check_whitelist(&t, opt) != 0)
+			continue;
+		if (check_blacklist(&t, opt) == 0)
 			continue;
 
 		test_start_subtest(t.title, opt->map);
@@ -1696,7 +1718,7 @@ int main(int argc, char **argv)
 	int test = SELFTESTS;
 	bool cg_created = 0;
 
-	while ((opt = getopt_long(argc, argv, ":dhv:c:r:i:l:t:p:q:n:",
+	while ((opt = getopt_long(argc, argv, ":dhv:c:r:i:l:t:p:q:n:b:",
 				  long_options, &longindex)) != -1) {
 		switch (opt) {
 		case 's':
@@ -1769,6 +1791,11 @@ int main(int argc, char **argv)
 			options.whitelist = strdup(optarg);
 			if (!options.whitelist)
 				return -ENOMEM;
+			break;
+		case 'b':
+			options.blacklist = strdup(optarg);
+			if (!options.blacklist)
+				return -ENOMEM;
 		case 0:
 			break;
 		case 'h':
@@ -1823,6 +1850,8 @@ int main(int argc, char **argv)
 out:
 	if (options.whitelist)
 		free(options.whitelist);
+	if (options.blacklist)
+		free(options.blacklist);
 	if (cg_created)
 		cleanup_cgroup_environment();
 	close(cg_fd);
-- 
cgit v1.2.3-59-g8ed1b


From 96586dd9268d26b278a1dd9110080001a6acbb0f Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Wed, 13 May 2020 12:16:02 -0700
Subject: bpf: Selftests, add ktls tests to test_sockmap

Until now we have only had minimal ktls+sockmap testing when being
used with helpers and different sendmsg/sendpage patterns. Add a
pass with ktls here.

To run just ktls tests,

 $ ./test_sockmap --whitelist="ktls"

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
Link: https://lore.kernel.org/bpf/158939736278.15176.5435314315563203761.stgit@john-Precision-5820-Tower
---
 tools/testing/selftests/bpf/test_sockmap.c | 70 +++++++++++++++++++-----------
 1 file changed, 44 insertions(+), 26 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index 2ed2db625371..c80643828b82 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -115,6 +115,7 @@ static const struct option long_options[] = {
 struct test_env {
 	const char *type;
 	const char *subtest;
+	const char *prepend;
 
 	int test_num;
 	int subtest_num;
@@ -126,6 +127,26 @@ struct test_env {
 
 struct test_env env;
 
+struct sockmap_options {
+	int verbose;
+	bool base;
+	bool sendpage;
+	bool data_test;
+	bool drop_expected;
+	int iov_count;
+	int iov_length;
+	int rate;
+	char *map;
+	char *whitelist;
+	char *blacklist;
+	char *prepend;
+};
+
+struct _test {
+	char *title;
+	void (*tester)(int cg_fd, struct sockmap_options *opt);
+};
+
 static void test_start(void)
 {
 	env.subtest_num++;
@@ -151,10 +172,11 @@ static void test_reset(void)
 	txmsg_ingress = txmsg_skb = 0;
 }
 
-static int test_start_subtest(const char *name, const char *type)
+static int test_start_subtest(const struct _test *t, struct sockmap_options *o)
 {
-	env.type = type;
-	env.subtest = name;
+	env.type = o->map;
+	env.subtest = t->title;
+	env.prepend = o->prepend;
 	env.test_num++;
 	env.subtest_num = 0;
 	env.fail_last = env.fail_cnt;
@@ -170,9 +192,10 @@ static void test_end_subtest(void)
 	if (!error)
 		test_pass();
 
-	fprintf(stdout, "#%2d/%2d %8s:%s:%s\n",
+	fprintf(stdout, "#%2d/%2d %8s:%s:%s:%s\n",
 		env.test_num, env.subtest_num,
 		!type ? "sockmap" : "sockhash",
+		env.prepend ? : "",
 		env.subtest, error ? "FAIL" : "OK");
 }
 
@@ -379,20 +402,6 @@ struct msg_stats {
 	struct timespec end;
 };
 
-struct sockmap_options {
-	int verbose;
-	bool base;
-	bool sendpage;
-	bool data_test;
-	bool drop_expected;
-	int iov_count;
-	int iov_length;
-	int rate;
-	char *map;
-	char *whitelist;
-	char *blacklist;
-};
-
 static int msg_loop_sendpage(int fd, int iov_length, int cnt,
 			     struct msg_stats *s,
 			     struct sockmap_options *opt)
@@ -1606,11 +1615,6 @@ static int populate_progs(char *bpf_file)
 	return 0;
 }
 
-struct _test {
-	char *title;
-	void (*tester)(int cg_fd, struct sockmap_options *opt);
-};
-
 struct _test test[] = {
 	{"txmsg test passthrough", test_txmsg_pass},
 	{"txmsg test redirect", test_txmsg_redir},
@@ -1636,7 +1640,9 @@ static int check_whitelist(struct _test *t, struct sockmap_options *opt)
 		return -ENOMEM;
 	entry = strtok(ptr, ",");
 	while (entry) {
-		if (strstr(opt->map, entry) != 0 || strstr(t->title, entry) != 0)
+		if ((opt->prepend && strstr(opt->prepend, entry) != 0) ||
+		    strstr(opt->map, entry) != 0 ||
+		    strstr(t->title, entry) != 0)
 			return 0;
 		entry = strtok(NULL, ",");
 	}
@@ -1654,7 +1660,9 @@ static int check_blacklist(struct _test *t, struct sockmap_options *opt)
 		return -ENOMEM;
 	entry = strtok(ptr, ",");
 	while (entry) {
-		if (strstr(opt->map, entry) != 0 || strstr(t->title, entry) != 0)
+		if ((opt->prepend && strstr(opt->prepend, entry) != 0) ||
+		    strstr(opt->map, entry) != 0 ||
+		    strstr(t->title, entry) != 0)
 			return 0;
 		entry = strtok(NULL, ",");
 	}
@@ -1680,7 +1688,7 @@ static int __test_selftests(int cg_fd, struct sockmap_options *opt)
 		if (check_blacklist(&t, opt) == 0)
 			continue;
 
-		test_start_subtest(t.title, opt->map);
+		test_start_subtest(&t, opt);
 		t.tester(cg_fd, opt);
 		test_end_subtest();
 	}
@@ -1700,11 +1708,21 @@ static void test_selftests_sockhash(int cg_fd, struct sockmap_options *opt)
 	__test_selftests(cg_fd, opt);
 }
 
+static void test_selftests_ktls(int cg_fd, struct sockmap_options *opt)
+{
+	opt->map = BPF_SOCKHASH_FILENAME;
+	opt->prepend = "ktls";
+	ktls = 1;
+	__test_selftests(cg_fd, opt);
+	ktls = 0;
+}
+
 static int test_selftest(int cg_fd, struct sockmap_options *opt)
 {
 
 	test_selftests_sockmap(cg_fd, opt);
 	test_selftests_sockhash(cg_fd, opt);
+	test_selftests_ktls(cg_fd, opt);
 	test_print_results();
 	return 0;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 6f42a29305659ca6491b45939d663a0de7f197b7 Mon Sep 17 00:00:00 2001
From: Kevin Lo <kevlo@kevlo.org>
Date: Sat, 16 May 2020 10:09:26 +0800
Subject: net: phy: broadcom: fix checkpatch complains about tabs

This patch makes checkpatch happy for tabs

Signed-off-by: Kevin Lo <kevlo@kevlo.org>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/broadcom.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/phy/broadcom.c b/drivers/net/phy/broadcom.c
index 8cd8d188542a..cd271de9609b 100644
--- a/drivers/net/phy/broadcom.c
+++ b/drivers/net/phy/broadcom.c
@@ -356,7 +356,7 @@ static int bcm54811_config_init(struct phy_device *phydev)
 					BCM54612E_LED4_CLK125OUT_EN | reg);
 		if (err < 0)
 			return err;
-        }
+	}
 
 	return err;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 74a1c059168ba1388aa475847c05a02f5f971a46 Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Wed, 13 May 2020 16:55:46 +0300
Subject: dpaa2-eth: add bulking to XDP_TX

Add driver level bulking to the XDP_TX action.

An array of frame descriptors is held for each Tx frame queue and
populated accordingly when the action returned by the XDP program is
XDP_TX. The frames will be actually enqueued only when the array is
filled. At the end of the NAPI cycle a flush on the queued frames is
performed in order to enqueue the remaining FDs.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 68 ++++++++++++++++--------
 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h |  1 +
 2 files changed, 46 insertions(+), 23 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index 5fbaa51b38bc..fe3806d54630 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -273,13 +273,43 @@ static int dpaa2_eth_xdp_flush(struct dpaa2_eth_priv *priv,
 	return total_enqueued;
 }
 
-static int xdp_enqueue(struct dpaa2_eth_priv *priv, struct dpaa2_fd *fd,
-		       void *buf_start, u16 queue_id)
+static void xdp_tx_flush(struct dpaa2_eth_priv *priv,
+			 struct dpaa2_eth_channel *ch,
+			 struct dpaa2_eth_fq *fq)
+{
+	struct rtnl_link_stats64 *percpu_stats;
+	struct dpaa2_fd *fds;
+	int enqueued, i;
+
+	percpu_stats = this_cpu_ptr(priv->percpu_stats);
+
+	// enqueue the array of XDP_TX frames
+	enqueued = dpaa2_eth_xdp_flush(priv, fq, &fq->xdp_tx_fds);
+
+	/* update statistics */
+	percpu_stats->tx_packets += enqueued;
+	fds = fq->xdp_tx_fds.fds;
+	for (i = 0; i < enqueued; i++) {
+		percpu_stats->tx_bytes += dpaa2_fd_get_len(&fds[i]);
+		ch->stats.xdp_tx++;
+	}
+	for (i = enqueued; i < fq->xdp_tx_fds.num; i++) {
+		xdp_release_buf(priv, ch, dpaa2_fd_get_addr(&fds[i]));
+		percpu_stats->tx_errors++;
+		ch->stats.xdp_tx_err++;
+	}
+	fq->xdp_tx_fds.num = 0;
+}
+
+static void xdp_enqueue(struct dpaa2_eth_priv *priv,
+			struct dpaa2_eth_channel *ch,
+			struct dpaa2_fd *fd,
+			void *buf_start, u16 queue_id)
 {
-	struct dpaa2_eth_fq *fq;
 	struct dpaa2_faead *faead;
+	struct dpaa2_fd *dest_fd;
+	struct dpaa2_eth_fq *fq;
 	u32 ctrl, frc;
-	int i, err;
 
 	/* Mark the egress frame hardware annotation area as valid */
 	frc = dpaa2_fd_get_frc(fd);
@@ -296,13 +326,13 @@ static int xdp_enqueue(struct dpaa2_eth_priv *priv, struct dpaa2_fd *fd,
 	faead->conf_fqid = 0;
 
 	fq = &priv->fq[queue_id];
-	for (i = 0; i < DPAA2_ETH_ENQUEUE_RETRIES; i++) {
-		err = priv->enqueue(priv, fq, fd, 0, 1, NULL);
-		if (err != -EBUSY)
-			break;
-	}
+	dest_fd = &fq->xdp_tx_fds.fds[fq->xdp_tx_fds.num++];
+	memcpy(dest_fd, fd, sizeof(*dest_fd));
 
-	return err;
+	if (fq->xdp_tx_fds.num < DEV_MAP_BULK_SIZE)
+		return;
+
+	xdp_tx_flush(priv, ch, fq);
 }
 
 static u32 run_xdp(struct dpaa2_eth_priv *priv,
@@ -311,14 +341,11 @@ static u32 run_xdp(struct dpaa2_eth_priv *priv,
 		   struct dpaa2_fd *fd, void *vaddr)
 {
 	dma_addr_t addr = dpaa2_fd_get_addr(fd);
-	struct rtnl_link_stats64 *percpu_stats;
 	struct bpf_prog *xdp_prog;
 	struct xdp_buff xdp;
 	u32 xdp_act = XDP_PASS;
 	int err;
 
-	percpu_stats = this_cpu_ptr(priv->percpu_stats);
-
 	rcu_read_lock();
 
 	xdp_prog = READ_ONCE(ch->xdp.prog);
@@ -344,16 +371,7 @@ static u32 run_xdp(struct dpaa2_eth_priv *priv,
 	case XDP_PASS:
 		break;
 	case XDP_TX:
-		err = xdp_enqueue(priv, fd, vaddr, rx_fq->flowid);
-		if (err) {
-			xdp_release_buf(priv, ch, addr);
-			percpu_stats->tx_errors++;
-			ch->stats.xdp_tx_err++;
-		} else {
-			percpu_stats->tx_packets++;
-			percpu_stats->tx_bytes += dpaa2_fd_get_len(fd);
-			ch->stats.xdp_tx++;
-		}
+		xdp_enqueue(priv, ch, fd, vaddr, rx_fq->flowid);
 		break;
 	default:
 		bpf_warn_invalid_xdp_action(xdp_act);
@@ -1175,6 +1193,7 @@ static int dpaa2_eth_poll(struct napi_struct *napi, int budget)
 	int store_cleaned, work_done;
 	struct list_head rx_list;
 	int retries = 0;
+	u16 flowid;
 	int err;
 
 	ch = container_of(napi, struct dpaa2_eth_channel, napi);
@@ -1197,6 +1216,7 @@ static int dpaa2_eth_poll(struct napi_struct *napi, int budget)
 			break;
 		if (fq->type == DPAA2_RX_FQ) {
 			rx_cleaned += store_cleaned;
+			flowid = fq->flowid;
 		} else {
 			txconf_cleaned += store_cleaned;
 			/* We have a single Tx conf FQ on this channel */
@@ -1239,6 +1259,8 @@ out:
 
 	if (ch->xdp.res & XDP_REDIRECT)
 		xdp_do_flush_map();
+	else if (rx_cleaned && ch->xdp.res & XDP_TX)
+		xdp_tx_flush(priv, ch, &priv->fq[flowid]);
 
 	return work_done;
 }
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
index 42f0a7a80afa..0581fbf1f98c 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
@@ -334,6 +334,7 @@ struct dpaa2_eth_fq {
 	struct dpaa2_eth_fq_stats stats;
 
 	struct dpaa2_eth_xdp_fds xdp_redirect_fds;
+	struct dpaa2_eth_xdp_fds xdp_tx_fds;
 };
 
 struct dpaa2_eth_ch_xdp {
-- 
cgit v1.2.3-59-g8ed1b


From 9efd6a3cecdde984d67e63d17fe6af53c7c50968 Mon Sep 17 00:00:00 2001
From: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Date: Wed, 13 May 2020 15:58:43 +0200
Subject: netns: enable to inherit devconf from current netns

The goal is to be able to inherit the initial devconf parameters from the
current netns, ie the netns where this new netns has been created.

This is useful in a containers environment where /proc/sys is read only.
For example, if a pod is created with specifics devconf parameters and has
the capability to create netns, the user expects to get the same parameters
than his 'init_net', which is not the real init_net in this case.

Signed-off-by: Nicolas Dichtel <nicolas.dichtel@6wind.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/admin-guide/sysctl/net.rst |  4 +++-
 net/core/sysctl_net_core.c               |  4 +++-
 net/ipv4/devinet.c                       | 23 ++++++++++++++++++-----
 net/ipv6/addrconf.c                      | 23 ++++++++++++++++++++---
 4 files changed, 44 insertions(+), 10 deletions(-)

diff --git a/Documentation/admin-guide/sysctl/net.rst b/Documentation/admin-guide/sysctl/net.rst
index 2ad1b77a7182..42cd04bca548 100644
--- a/Documentation/admin-guide/sysctl/net.rst
+++ b/Documentation/admin-guide/sysctl/net.rst
@@ -339,7 +339,9 @@ settings from init_net and for IPv6 we reset all settings to default.
 
 If set to 1, both IPv4 and IPv6 settings are forced to inherit from
 current ones in init_net. If set to 2, both IPv4 and IPv6 settings are
-forced to reset to their default values.
+forced to reset to their default values. If set to 3, both IPv4 and IPv6
+settings are forced to inherit from current ones in the netns where this
+new netns has been created.
 
 Default : 0  (for compatibility reasons)
 
diff --git a/net/core/sysctl_net_core.c b/net/core/sysctl_net_core.c
index 0ddb13a6282b..b109cc8a6dd8 100644
--- a/net/core/sysctl_net_core.c
+++ b/net/core/sysctl_net_core.c
@@ -23,6 +23,7 @@
 #include <net/pkt_sched.h>
 
 static int two __maybe_unused = 2;
+static int three = 3;
 static int min_sndbuf = SOCK_MIN_SNDBUF;
 static int min_rcvbuf = SOCK_MIN_RCVBUF;
 static int max_skb_frags = MAX_SKB_FRAGS;
@@ -39,6 +40,7 @@ EXPORT_SYMBOL(sysctl_fb_tunnels_only_for_init_net);
  *     IPv6: reset all settings to default
  * 1 - Both inherit all current settings from init_net
  * 2 - Both reset all settings to default
+ * 3 - Both inherit all settings from current netns
  */
 int sysctl_devconf_inherit_init_net __read_mostly;
 EXPORT_SYMBOL(sysctl_devconf_inherit_init_net);
@@ -553,7 +555,7 @@ static struct ctl_table net_core_table[] = {
 		.mode		= 0644,
 		.proc_handler	= proc_dointvec_minmax,
 		.extra1		= SYSCTL_ZERO,
-		.extra2		= &two,
+		.extra2		= &three,
 	},
 	{
 		.procname	= "high_order_alloc_disable",
diff --git a/net/ipv4/devinet.c b/net/ipv4/devinet.c
index fc94f82f82c7..f048d0a188b7 100644
--- a/net/ipv4/devinet.c
+++ b/net/ipv4/devinet.c
@@ -2666,11 +2666,24 @@ static __net_init int devinet_init_net(struct net *net)
 	tbl[0].extra2 = net;
 #endif
 
-	if ((!IS_ENABLED(CONFIG_SYSCTL) ||
-	     sysctl_devconf_inherit_init_net != 2) &&
-	    !net_eq(net, &init_net)) {
-		memcpy(all, init_net.ipv4.devconf_all, sizeof(ipv4_devconf));
-		memcpy(dflt, init_net.ipv4.devconf_dflt, sizeof(ipv4_devconf_dflt));
+	if (!net_eq(net, &init_net)) {
+		if (IS_ENABLED(CONFIG_SYSCTL) &&
+		    sysctl_devconf_inherit_init_net == 3) {
+			/* copy from the current netns */
+			memcpy(all, current->nsproxy->net_ns->ipv4.devconf_all,
+			       sizeof(ipv4_devconf));
+			memcpy(dflt,
+			       current->nsproxy->net_ns->ipv4.devconf_dflt,
+			       sizeof(ipv4_devconf_dflt));
+		} else if (!IS_ENABLED(CONFIG_SYSCTL) ||
+			   sysctl_devconf_inherit_init_net != 2) {
+			/* inherit == 0 or 1: copy from init_net */
+			memcpy(all, init_net.ipv4.devconf_all,
+			       sizeof(ipv4_devconf));
+			memcpy(dflt, init_net.ipv4.devconf_dflt,
+			       sizeof(ipv4_devconf_dflt));
+		}
+		/* else inherit == 2: use compiled values */
 	}
 
 #ifdef CONFIG_SYSCTL
diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index fd885f06c4ed..ab7e839753ae 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -6991,9 +6991,26 @@ static int __net_init addrconf_init_net(struct net *net)
 		goto err_alloc_dflt;
 
 	if (IS_ENABLED(CONFIG_SYSCTL) &&
-	    sysctl_devconf_inherit_init_net == 1 && !net_eq(net, &init_net)) {
-		memcpy(all, init_net.ipv6.devconf_all, sizeof(ipv6_devconf));
-		memcpy(dflt, init_net.ipv6.devconf_dflt, sizeof(ipv6_devconf_dflt));
+	    !net_eq(net, &init_net)) {
+		switch (sysctl_devconf_inherit_init_net) {
+		case 1:  /* copy from init_net */
+			memcpy(all, init_net.ipv6.devconf_all,
+			       sizeof(ipv6_devconf));
+			memcpy(dflt, init_net.ipv6.devconf_dflt,
+			       sizeof(ipv6_devconf_dflt));
+			break;
+		case 3: /* copy from the current netns */
+			memcpy(all, current->nsproxy->net_ns->ipv6.devconf_all,
+			       sizeof(ipv6_devconf));
+			memcpy(dflt,
+			       current->nsproxy->net_ns->ipv6.devconf_dflt,
+			       sizeof(ipv6_devconf_dflt));
+			break;
+		case 0:
+		case 2:
+			/* use compiled values */
+			break;
+		}
 	}
 
 	/* these will be inherited by all namespaces */
-- 
cgit v1.2.3-59-g8ed1b


From a0c1d0eafd1ef1ada3b588ea205e5bc37ae0d8d9 Mon Sep 17 00:00:00 2001
From: Christoph Paasch <cpaasch@apple.com>
Date: Thu, 14 May 2020 08:53:03 -0700
Subject: mptcp: Use 32-bit DATA_ACK when possible

RFC8684 allows to send 32-bit DATA_ACKs as long as the peer is not
sending 64-bit data-sequence numbers. The 64-bit DSN is only there for
extreme scenarios when a very high throughput subflow is combined with a
long-RTT subflow such that the high-throughput subflow wraps around the
32-bit sequence number space within an RTT of the high-RTT subflow.

It is thus a rare scenario and we should try to use the 32-bit DATA_ACK
instead as long as possible. It allows to reduce the TCP-option overhead
by 4 bytes, thus makes space for an additional SACK-block. It also makes
tcpdumps much easier to read when the DSN and DATA_ACK are both either
32 or 64-bit.

Signed-off-by: Christoph Paasch <cpaasch@apple.com>
Reviewed-by: Matthieu Baerts <matthieu.baerts@tessares.net>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/mptcp.h  |  5 ++++-
 net/mptcp/options.c  | 33 ++++++++++++++++++++++++---------
 net/mptcp/protocol.h |  1 +
 net/mptcp/subflow.c  |  2 ++
 4 files changed, 31 insertions(+), 10 deletions(-)

diff --git a/include/net/mptcp.h b/include/net/mptcp.h
index c4a6ef4ba35b..46d0487d2b22 100644
--- a/include/net/mptcp.h
+++ b/include/net/mptcp.h
@@ -16,7 +16,10 @@ struct seq_file;
 
 /* MPTCP sk_buff extension data */
 struct mptcp_ext {
-	u64		data_ack;
+	union {
+		u64	data_ack;
+		u32	data_ack32;
+	};
 	u64		data_seq;
 	u32		subflow_seq;
 	u16		data_len;
diff --git a/net/mptcp/options.c b/net/mptcp/options.c
index 45497af23906..ece6f92cf7d1 100644
--- a/net/mptcp/options.c
+++ b/net/mptcp/options.c
@@ -516,7 +516,16 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
 		return ret;
 	}
 
-	ack_size = TCPOLEN_MPTCP_DSS_ACK64;
+	if (subflow->use_64bit_ack) {
+		ack_size = TCPOLEN_MPTCP_DSS_ACK64;
+		opts->ext_copy.data_ack = msk->ack_seq;
+		opts->ext_copy.ack64 = 1;
+	} else {
+		ack_size = TCPOLEN_MPTCP_DSS_ACK32;
+		opts->ext_copy.data_ack32 = (uint32_t)(msk->ack_seq);
+		opts->ext_copy.ack64 = 0;
+	}
+	opts->ext_copy.use_ack = 1;
 
 	/* Add kind/length/subtype/flag overhead if mapping is not populated */
 	if (dss_size == 0)
@@ -524,10 +533,6 @@ static bool mptcp_established_options_dss(struct sock *sk, struct sk_buff *skb,
 
 	dss_size += ack_size;
 
-	opts->ext_copy.data_ack = msk->ack_seq;
-	opts->ext_copy.ack64 = 1;
-	opts->ext_copy.use_ack = 1;
-
 	*size = ALIGN(dss_size, 4);
 	return true;
 }
@@ -986,8 +991,13 @@ mp_capable_done:
 		u8 flags = 0;
 
 		if (mpext->use_ack) {
-			len += TCPOLEN_MPTCP_DSS_ACK64;
-			flags = MPTCP_DSS_HAS_ACK | MPTCP_DSS_ACK64;
+			flags = MPTCP_DSS_HAS_ACK;
+			if (mpext->ack64) {
+				len += TCPOLEN_MPTCP_DSS_ACK64;
+				flags |= MPTCP_DSS_ACK64;
+			} else {
+				len += TCPOLEN_MPTCP_DSS_ACK32;
+			}
 		}
 
 		if (mpext->use_map) {
@@ -1004,8 +1014,13 @@ mp_capable_done:
 		*ptr++ = mptcp_option(MPTCPOPT_DSS, len, 0, flags);
 
 		if (mpext->use_ack) {
-			put_unaligned_be64(mpext->data_ack, ptr);
-			ptr += 2;
+			if (mpext->ack64) {
+				put_unaligned_be64(mpext->data_ack, ptr);
+				ptr += 2;
+			} else {
+				put_unaligned_be32(mpext->data_ack32, ptr);
+				ptr += 1;
+			}
 		}
 
 		if (mpext->use_map) {
diff --git a/net/mptcp/protocol.h b/net/mptcp/protocol.h
index e4ca6320ce76..f5adca93e8fb 100644
--- a/net/mptcp/protocol.h
+++ b/net/mptcp/protocol.h
@@ -290,6 +290,7 @@ struct mptcp_subflow_context {
 		data_avail : 1,
 		rx_eof : 1,
 		data_fin_tx_enable : 1,
+		use_64bit_ack : 1, /* Set when we received a 64-bit DSN */
 		can_ack : 1;	    /* only after processing the remote a key */
 	u64	data_fin_tx_seq;
 	u32	remote_nonce;
diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index 53c75b0e5dce..0020d356233d 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -667,9 +667,11 @@ static enum mapping_status get_mapping_status(struct sock *ssk)
 	if (!mpext->dsn64) {
 		map_seq = expand_seq(subflow->map_seq, subflow->map_data_len,
 				     mpext->data_seq);
+		subflow->use_64bit_ack = 0;
 		pr_debug("expanded seq=%llu", subflow->map_seq);
 	} else {
 		map_seq = mpext->data_seq;
+		subflow->use_64bit_ack = 1;
 	}
 
 	if (subflow->map_valid) {
-- 
cgit v1.2.3-59-g8ed1b


From 7be92514b99c15b89def6d72bbc84c354f89a025 Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 15 May 2020 12:49:00 -0700
Subject: ethtool: check if there is at least one channel for TX/RX in the core

Having a channel config with no ability to RX or TX traffic is
clearly wrong. Check for this in the core so the drivers don't
have to.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Michal Kubecek <mkubecek@suse.cz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ethtool/channels.c | 20 ++++++++++++++++++--
 net/ethtool/ioctl.c    |  5 +++++
 2 files changed, 23 insertions(+), 2 deletions(-)

diff --git a/net/ethtool/channels.c b/net/ethtool/channels.c
index 389924b65d05..3aa4975919d7 100644
--- a/net/ethtool/channels.c
+++ b/net/ethtool/channels.c
@@ -129,13 +129,13 @@ int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info)
 {
 	struct nlattr *tb[ETHTOOL_A_CHANNELS_MAX + 1];
 	unsigned int from_channel, old_total, i;
+	bool mod = false, mod_combined = false;
 	struct ethtool_channels channels = {};
 	struct ethnl_req_info req_info = {};
 	const struct nlattr *err_attr;
 	const struct ethtool_ops *ops;
 	struct net_device *dev;
 	u32 max_rx_in_use = 0;
-	bool mod = false;
 	int ret;
 
 	ret = nlmsg_parse(info->nlhdr, GENL_HDRLEN, tb,
@@ -170,7 +170,8 @@ int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info)
 	ethnl_update_u32(&channels.other_count,
 			 tb[ETHTOOL_A_CHANNELS_OTHER_COUNT], &mod);
 	ethnl_update_u32(&channels.combined_count,
-			 tb[ETHTOOL_A_CHANNELS_COMBINED_COUNT], &mod);
+			 tb[ETHTOOL_A_CHANNELS_COMBINED_COUNT], &mod_combined);
+	mod |= mod_combined;
 	ret = 0;
 	if (!mod)
 		goto out_ops;
@@ -193,6 +194,21 @@ int ethnl_set_channels(struct sk_buff *skb, struct genl_info *info)
 		goto out_ops;
 	}
 
+	/* ensure there is at least one RX and one TX channel */
+	if (!channels.combined_count && !channels.rx_count)
+		err_attr = tb[ETHTOOL_A_CHANNELS_RX_COUNT];
+	else if (!channels.combined_count && !channels.tx_count)
+		err_attr = tb[ETHTOOL_A_CHANNELS_TX_COUNT];
+	else
+		err_attr = NULL;
+	if (err_attr) {
+		if (mod_combined)
+			err_attr = tb[ETHTOOL_A_CHANNELS_COMBINED_COUNT];
+		ret = -EINVAL;
+		NL_SET_ERR_MSG_ATTR(info->extack, err_attr, "requested channel counts would result in no RX or TX channel being configured");
+		goto out_ops;
+	}
+
 	/* ensure the new Rx count fits within the configured Rx flow
 	 * indirection table settings
 	 */
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index 52102ab1709b..a574d60111fa 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -1676,6 +1676,11 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev,
 	    channels.other_count > curr.max_other)
 		return -EINVAL;
 
+	/* ensure there is at least one RX and one TX channel */
+	if (!channels.combined_count &&
+	    (!channels.rx_count || !channels.tx_count))
+		return -EINVAL;
+
 	/* ensure the new Rx count fits within the configured Rx flow
 	 * indirection table settings */
 	if (netif_is_rxfh_configured(dev) &&
-- 
cgit v1.2.3-59-g8ed1b


From 4df6ff2a99920254ce8c2a3c418ddabc69ff16fe Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 15 May 2020 12:49:01 -0700
Subject: nfp: don't check lack of RX/TX channels

Core will now perform this check.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Michal Kubecek <mkubecek@suse.cz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
index a5aa3219d112..6eb9fb9a1814 100644
--- a/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
+++ b/drivers/net/ethernet/netronome/nfp/nfp_net_ethtool.c
@@ -1438,8 +1438,7 @@ static int nfp_net_set_channels(struct net_device *netdev,
 	unsigned int total_rx, total_tx;
 
 	/* Reject unsupported */
-	if (!channel->combined_count ||
-	    channel->other_count != NFP_NET_NON_Q_VECTORS ||
+	if (channel->other_count != NFP_NET_NON_Q_VECTORS ||
 	    (channel->rx_count && channel->tx_count))
 		return -EINVAL;
 
-- 
cgit v1.2.3-59-g8ed1b


From 75c36dbb1c3790eab909344b758decce6bb432da Mon Sep 17 00:00:00 2001
From: Jakub Kicinski <kuba@kernel.org>
Date: Fri, 15 May 2020 12:49:02 -0700
Subject: ethtool: don't call set_channels in drivers if config didn't change

Don't call drivers if nothing changed. Netlink code already
contains this logic.

Signed-off-by: Jakub Kicinski <kuba@kernel.org>
Reviewed-by: Michal Kubecek <mkubecek@suse.cz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ethtool/ioctl.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index a574d60111fa..eeb1137a3f23 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -1669,6 +1669,12 @@ static noinline_for_stack int ethtool_set_channels(struct net_device *dev,
 
 	dev->ethtool_ops->get_channels(dev, &curr);
 
+	if (channels.rx_count == curr.rx_count &&
+	    channels.tx_count == curr.tx_count &&
+	    channels.combined_count == curr.combined_count &&
+	    channels.other_count == curr.other_count)
+		return 0;
+
 	/* ensure new counts are within the maximums */
 	if (channels.rx_count > curr.max_rx ||
 	    channels.tx_count > curr.max_tx ||
-- 
cgit v1.2.3-59-g8ed1b


From 13f15b59ad70549545272cc8f0bf4d68a8ef9c14 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sun, 17 May 2020 00:05:08 +0200
Subject: r8169: remove remaining call to mdiobus_unregister

After having switched to devm_mdiobus_register() also this remaining
call to mdiobus_unregister() can be removed.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 97a7e27fff16..e35820c72264 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -4990,7 +4990,6 @@ static void rtl_remove_one(struct pci_dev *pdev)
 	netif_napi_del(&tp->napi);
 
 	unregister_netdev(dev);
-	mdiobus_unregister(tp->phydev->mdio.bus);
 
 	rtl_release_firmware(tp);
 
-- 
cgit v1.2.3-59-g8ed1b


From 85d4ec592510520a2beb11c7f35748fd09ffe089 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Sun, 17 May 2020 01:43:05 +0300
Subject: mlxsw: spectrum_trap: Move struct definition out of header file

'struct mlxsw_sp_trap_policer_item' is only used in one file, so move it
there.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c | 6 ++++++
 drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.h | 6 ------
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
index fbf714d027d8..634e695b89fa 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
@@ -12,6 +12,12 @@
 #include "spectrum.h"
 #include "spectrum_trap.h"
 
+struct mlxsw_sp_trap_policer_item {
+	u16 hw_id;
+	u32 id;
+	struct list_head list; /* Member of policer_item_list */
+};
+
 /* All driver-specific traps must be documented in
  * Documentation/networking/devlink/mlxsw.rst
  */
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.h
index 8c54897ba173..8a11a2b973f8 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.h
@@ -15,10 +15,4 @@ struct mlxsw_sp_trap {
 	unsigned long policers_usage[]; /* Usage bitmap */
 };
 
-struct mlxsw_sp_trap_policer_item {
-	u16 hw_id;
-	u32 id;
-	struct list_head list; /* Member of policer_item_list */
-};
-
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From cc678f4dbc576f75e0465b21a18f8460858b07b7 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Sun, 17 May 2020 01:43:06 +0300
Subject: mlxsw: spectrum_trap: Store all trap policer data in one array

Instead of maintaining an array of policers and a linked list, only
maintain an array.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_trap.c    | 148 ++++++++++++---------
 .../net/ethernet/mellanox/mlxsw/spectrum_trap.h    |   3 +-
 2 files changed, 89 insertions(+), 62 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
index 634e695b89fa..7b2ddc49a04d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
@@ -13,9 +13,8 @@
 #include "spectrum_trap.h"
 
 struct mlxsw_sp_trap_policer_item {
+	struct devlink_trap_policer policer;
 	u16 hw_id;
-	u32 id;
-	struct list_head list; /* Member of policer_item_list */
 };
 
 /* All driver-specific traps must be documented in
@@ -182,8 +181,11 @@ static void mlxsw_sp_rx_exception_listener(struct sk_buff *skb, u8 local_port,
 			     1 << MLXSW_REG_QPCR_LOWEST_CBS)
 
 /* Ordered by policer identifier */
-static const struct devlink_trap_policer mlxsw_sp_trap_policers_arr[] = {
-	MLXSW_SP_TRAP_POLICER(1, 10 * 1024, 128),
+static const struct mlxsw_sp_trap_policer_item
+mlxsw_sp_trap_policer_items_arr[] = {
+	{
+		.policer = MLXSW_SP_TRAP_POLICER(1, 10 * 1024, 128),
+	},
 };
 
 static const struct devlink_trap_group mlxsw_sp_trap_groups_arr[] = {
@@ -319,12 +321,12 @@ static const u16 mlxsw_sp_listener_devlink_map[] = {
 static struct mlxsw_sp_trap_policer_item *
 mlxsw_sp_trap_policer_item_lookup(struct mlxsw_sp *mlxsw_sp, u32 id)
 {
-	struct mlxsw_sp_trap_policer_item *policer_item;
 	struct mlxsw_sp_trap *trap = mlxsw_sp->trap;
+	int i;
 
-	list_for_each_entry(policer_item, &trap->policer_item_list, list) {
-		if (policer_item->id == id)
-			return policer_item;
+	for (i = 0; i < trap->policers_count; i++) {
+		if (trap->policer_items_arr[i].policer.id == id)
+			return &trap->policer_items_arr[i];
 	}
 
 	return NULL;
@@ -352,72 +354,102 @@ static int mlxsw_sp_trap_dummy_group_init(struct mlxsw_sp *mlxsw_sp)
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(htgt), htgt_pl);
 }
 
-static int mlxsw_sp_trap_policers_init(struct mlxsw_sp *mlxsw_sp)
+static int mlxsw_sp_trap_policer_items_arr_init(struct mlxsw_sp *mlxsw_sp)
 {
-	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+	size_t elem_size = sizeof(struct mlxsw_sp_trap_policer_item);
+	u64 arr_size = ARRAY_SIZE(mlxsw_sp_trap_policer_items_arr);
 	struct mlxsw_sp_trap *trap = mlxsw_sp->trap;
 	u64 free_policers = 0;
-	u32 last_id = 0;
-	int err, i;
+	u32 last_id;
+	int i;
 
 	for_each_clear_bit(i, trap->policers_usage, trap->max_policers)
 		free_policers++;
 
-	if (ARRAY_SIZE(mlxsw_sp_trap_policers_arr) > free_policers) {
+	if (arr_size > free_policers) {
 		dev_err(mlxsw_sp->bus_info->dev, "Exceeded number of supported packet trap policers\n");
 		return -ENOBUFS;
 	}
 
-	trap->policers_arr = kcalloc(free_policers,
-				     sizeof(struct devlink_trap_policer),
-				     GFP_KERNEL);
-	if (!trap->policers_arr)
+	trap->policer_items_arr = kcalloc(free_policers, elem_size, GFP_KERNEL);
+	if (!trap->policer_items_arr)
 		return -ENOMEM;
 
 	trap->policers_count = free_policers;
 
-	for (i = 0; i < free_policers; i++) {
-		const struct devlink_trap_policer *policer;
-
-		if (i < ARRAY_SIZE(mlxsw_sp_trap_policers_arr)) {
-			policer = &mlxsw_sp_trap_policers_arr[i];
-			trap->policers_arr[i] = *policer;
-			last_id = policer->id;
-		} else {
-			/* Use parameters set for first policer and override
-			 * relevant ones.
-			 */
-			policer = &mlxsw_sp_trap_policers_arr[0];
-			trap->policers_arr[i] = *policer;
-			trap->policers_arr[i].id = ++last_id;
-			trap->policers_arr[i].init_rate = 1;
-			trap->policers_arr[i].init_burst = 16;
-		}
+	/* Initialize policer items array with pre-defined policers. */
+	memcpy(trap->policer_items_arr, mlxsw_sp_trap_policer_items_arr,
+	       elem_size * arr_size);
+
+	/* Initialize policer items array with the rest of the available
+	 * policers.
+	 */
+	last_id = mlxsw_sp_trap_policer_items_arr[arr_size - 1].policer.id;
+	for (i = arr_size; i < trap->policers_count; i++) {
+		const struct mlxsw_sp_trap_policer_item *policer_item;
+
+		/* Use parameters set for first policer and override
+		 * relevant ones.
+		 */
+		policer_item = &mlxsw_sp_trap_policer_items_arr[0];
+		trap->policer_items_arr[i] = *policer_item;
+		trap->policer_items_arr[i].policer.id = ++last_id;
+		trap->policer_items_arr[i].policer.init_rate = 1;
+		trap->policer_items_arr[i].policer.init_burst = 16;
 	}
 
-	INIT_LIST_HEAD(&trap->policer_item_list);
+	return 0;
+}
+
+static void mlxsw_sp_trap_policer_items_arr_fini(struct mlxsw_sp *mlxsw_sp)
+{
+	kfree(mlxsw_sp->trap->policer_items_arr);
+}
+
+static int mlxsw_sp_trap_policers_init(struct mlxsw_sp *mlxsw_sp)
+{
+	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+	const struct mlxsw_sp_trap_policer_item *policer_item;
+	struct mlxsw_sp_trap *trap = mlxsw_sp->trap;
+	int err, i;
 
-	err = devlink_trap_policers_register(devlink, trap->policers_arr,
-					     trap->policers_count);
+	err = mlxsw_sp_trap_policer_items_arr_init(mlxsw_sp);
 	if (err)
-		goto err_trap_policers_register;
+		return err;
+
+	for (i = 0; i < trap->policers_count; i++) {
+		policer_item = &trap->policer_items_arr[i];
+		err = devlink_trap_policers_register(devlink,
+						     &policer_item->policer, 1);
+		if (err)
+			goto err_trap_policer_register;
+	}
 
 	return 0;
 
-err_trap_policers_register:
-	kfree(trap->policers_arr);
+err_trap_policer_register:
+	for (i--; i >= 0; i--) {
+		policer_item = &trap->policer_items_arr[i];
+		devlink_trap_policers_unregister(devlink,
+						 &policer_item->policer, 1);
+	}
+	mlxsw_sp_trap_policer_items_arr_fini(mlxsw_sp);
 	return err;
 }
 
 static void mlxsw_sp_trap_policers_fini(struct mlxsw_sp *mlxsw_sp)
 {
 	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+	const struct mlxsw_sp_trap_policer_item *policer_item;
 	struct mlxsw_sp_trap *trap = mlxsw_sp->trap;
+	int i;
 
-	devlink_trap_policers_unregister(devlink, trap->policers_arr,
-					 trap->policers_count);
-	WARN_ON(!list_empty(&trap->policer_item_list));
-	kfree(trap->policers_arr);
+	for (i = trap->policers_count - 1; i >= 0; i--) {
+		policer_item = &trap->policer_items_arr[i];
+		devlink_trap_policers_unregister(devlink,
+						 &policer_item->policer, 1);
+	}
+	mlxsw_sp_trap_policer_items_arr_fini(mlxsw_sp);
 }
 
 int mlxsw_sp_devlink_traps_init(struct mlxsw_sp *mlxsw_sp)
@@ -608,10 +640,10 @@ int mlxsw_sp_trap_group_set(struct mlxsw_core *mlxsw_core,
 	return __mlxsw_sp_trap_group_init(mlxsw_core, group, policer_id);
 }
 
-static struct mlxsw_sp_trap_policer_item *
-mlxsw_sp_trap_policer_item_init(struct mlxsw_sp *mlxsw_sp, u32 id)
+static int
+mlxsw_sp_trap_policer_item_init(struct mlxsw_sp *mlxsw_sp,
+				struct mlxsw_sp_trap_policer_item *policer_item)
 {
-	struct mlxsw_sp_trap_policer_item *policer_item;
 	struct mlxsw_sp_trap *trap = mlxsw_sp->trap;
 	u16 hw_id;
 
@@ -621,27 +653,19 @@ mlxsw_sp_trap_policer_item_init(struct mlxsw_sp *mlxsw_sp, u32 id)
 	 */
 	hw_id = find_first_zero_bit(trap->policers_usage, trap->max_policers);
 	if (WARN_ON(hw_id == trap->max_policers))
-		return ERR_PTR(-ENOBUFS);
-
-	policer_item = kzalloc(sizeof(*policer_item), GFP_KERNEL);
-	if (!policer_item)
-		return ERR_PTR(-ENOMEM);
+		return -ENOBUFS;
 
 	__set_bit(hw_id, trap->policers_usage);
 	policer_item->hw_id = hw_id;
-	policer_item->id = id;
-	list_add_tail(&policer_item->list, &trap->policer_item_list);
 
-	return policer_item;
+	return 0;
 }
 
 static void
 mlxsw_sp_trap_policer_item_fini(struct mlxsw_sp *mlxsw_sp,
 				struct mlxsw_sp_trap_policer_item *policer_item)
 {
-	list_del(&policer_item->list);
 	__clear_bit(policer_item->hw_id, mlxsw_sp->trap->policers_usage);
-	kfree(policer_item);
 }
 
 static int mlxsw_sp_trap_policer_bs(u64 burst, u8 *p_burst_size,
@@ -684,9 +708,13 @@ int mlxsw_sp_trap_policer_init(struct mlxsw_core *mlxsw_core,
 	struct mlxsw_sp_trap_policer_item *policer_item;
 	int err;
 
-	policer_item = mlxsw_sp_trap_policer_item_init(mlxsw_sp, policer->id);
-	if (IS_ERR(policer_item))
-		return PTR_ERR(policer_item);
+	policer_item = mlxsw_sp_trap_policer_item_lookup(mlxsw_sp, policer->id);
+	if (WARN_ON(!policer_item))
+		return -EINVAL;
+
+	err = mlxsw_sp_trap_policer_item_init(mlxsw_sp, policer_item);
+	if (err)
+		return err;
 
 	err = __mlxsw_sp_trap_policer_set(mlxsw_sp, policer_item->hw_id,
 					  policer->init_rate,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.h
index 8a11a2b973f8..8be8482d82ac 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.h
@@ -8,9 +8,8 @@
 #include <net/devlink.h>
 
 struct mlxsw_sp_trap {
-	struct devlink_trap_policer *policers_arr; /* Registered policers */
+	struct mlxsw_sp_trap_policer_item *policer_items_arr;
 	u64 policers_count; /* Number of registered policers */
-	struct list_head policer_item_list;
 	u64 max_policers;
 	unsigned long policers_usage[]; /* Usage bitmap */
 };
-- 
cgit v1.2.3-59-g8ed1b


From b14a40dbdea5d0c4decc463dfd2b5dd8e4b11e46 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Sun, 17 May 2020 01:43:07 +0300
Subject: mlxsw: spectrum_trap: Store all trap group data in one array

Use one array to store all the information about all the trap groups
instead of hard coding it in code. This will be used in future patches
to disable certain functionality (e.g., policer binding) on a trap group
basis.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_trap.c    | 147 +++++++++++++++------
 .../net/ethernet/mellanox/mlxsw/spectrum_trap.h    |   4 +
 2 files changed, 110 insertions(+), 41 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
index 7b2ddc49a04d..f87135ee69ee 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
@@ -17,6 +17,13 @@ struct mlxsw_sp_trap_policer_item {
 	u16 hw_id;
 };
 
+struct mlxsw_sp_trap_group_item {
+	struct devlink_trap_group group;
+	u16 hw_group_id;
+	u8 priority;
+	u8 tc;
+};
+
 /* All driver-specific traps must be documented in
  * Documentation/networking/devlink/mlxsw.rst
  */
@@ -188,11 +195,31 @@ mlxsw_sp_trap_policer_items_arr[] = {
 	},
 };
 
-static const struct devlink_trap_group mlxsw_sp_trap_groups_arr[] = {
-	DEVLINK_TRAP_GROUP_GENERIC(L2_DROPS, 1),
-	DEVLINK_TRAP_GROUP_GENERIC(L3_DROPS, 1),
-	DEVLINK_TRAP_GROUP_GENERIC(TUNNEL_DROPS, 1),
-	DEVLINK_TRAP_GROUP_GENERIC(ACL_DROPS, 1),
+static const struct mlxsw_sp_trap_group_item mlxsw_sp_trap_group_items_arr[] = {
+	{
+		.group = DEVLINK_TRAP_GROUP_GENERIC(L2_DROPS, 1),
+		.hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_L2_DISCARDS,
+		.priority = 0,
+		.tc = 1,
+	},
+	{
+		.group = DEVLINK_TRAP_GROUP_GENERIC(L3_DROPS, 1),
+		.hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_L3_DISCARDS,
+		.priority = 0,
+		.tc = 1,
+	},
+	{
+		.group = DEVLINK_TRAP_GROUP_GENERIC(TUNNEL_DROPS, 1),
+		.hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_TUNNEL_DISCARDS,
+		.priority = 0,
+		.tc = 1,
+	},
+	{
+		.group = DEVLINK_TRAP_GROUP_GENERIC(ACL_DROPS, 1),
+		.hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_ACL_DISCARDS,
+		.priority = 0,
+		.tc = 1,
+	},
 };
 
 static const struct devlink_trap mlxsw_sp_traps_arr[] = {
@@ -332,6 +359,20 @@ mlxsw_sp_trap_policer_item_lookup(struct mlxsw_sp *mlxsw_sp, u32 id)
 	return NULL;
 }
 
+static struct mlxsw_sp_trap_group_item *
+mlxsw_sp_trap_group_item_lookup(struct mlxsw_sp *mlxsw_sp, u16 id)
+{
+	struct mlxsw_sp_trap *trap = mlxsw_sp->trap;
+	int i;
+
+	for (i = 0; i < trap->groups_count; i++) {
+		if (trap->group_items_arr[i].group.id == id)
+			return &trap->group_items_arr[i];
+	}
+
+	return NULL;
+}
+
 static int mlxsw_sp_trap_cpu_policers_set(struct mlxsw_sp *mlxsw_sp)
 {
 	char qpcr_pl[MLXSW_REG_QPCR_LEN];
@@ -452,9 +493,57 @@ static void mlxsw_sp_trap_policers_fini(struct mlxsw_sp *mlxsw_sp)
 	mlxsw_sp_trap_policer_items_arr_fini(mlxsw_sp);
 }
 
+static int mlxsw_sp_trap_groups_init(struct mlxsw_sp *mlxsw_sp)
+{
+	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+	const struct mlxsw_sp_trap_group_item *group_item;
+	struct mlxsw_sp_trap *trap = mlxsw_sp->trap;
+	int err, i;
+
+	trap->group_items_arr = kmemdup(mlxsw_sp_trap_group_items_arr,
+					sizeof(mlxsw_sp_trap_group_items_arr),
+					GFP_KERNEL);
+	if (!trap->group_items_arr)
+		return -ENOMEM;
+
+	trap->groups_count = ARRAY_SIZE(mlxsw_sp_trap_group_items_arr);
+
+	for (i = 0; i < trap->groups_count; i++) {
+		group_item = &trap->group_items_arr[i];
+		err = devlink_trap_groups_register(devlink, &group_item->group,
+						   1);
+		if (err)
+			goto err_trap_group_register;
+	}
+
+	return 0;
+
+err_trap_group_register:
+	for (i--; i >= 0; i--) {
+		group_item = &trap->group_items_arr[i];
+		devlink_trap_groups_unregister(devlink, &group_item->group, 1);
+	}
+	kfree(trap->group_items_arr);
+	return err;
+}
+
+static void mlxsw_sp_trap_groups_fini(struct mlxsw_sp *mlxsw_sp)
+{
+	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+	struct mlxsw_sp_trap *trap = mlxsw_sp->trap;
+	int i;
+
+	for (i = trap->groups_count - 1; i >= 0; i--) {
+		const struct mlxsw_sp_trap_group_item *group_item;
+
+		group_item = &trap->group_items_arr[i];
+		devlink_trap_groups_unregister(devlink, &group_item->group, 1);
+	}
+	kfree(trap->group_items_arr);
+}
+
 int mlxsw_sp_devlink_traps_init(struct mlxsw_sp *mlxsw_sp)
 {
-	size_t groups_count = ARRAY_SIZE(mlxsw_sp_trap_groups_arr);
 	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
 	int err;
 
@@ -474,10 +563,9 @@ int mlxsw_sp_devlink_traps_init(struct mlxsw_sp *mlxsw_sp)
 	if (err)
 		return err;
 
-	err = devlink_trap_groups_register(devlink, mlxsw_sp_trap_groups_arr,
-					   groups_count);
+	err = mlxsw_sp_trap_groups_init(mlxsw_sp);
 	if (err)
-		goto err_trap_groups_register;
+		goto err_trap_groups_init;
 
 	err = devlink_traps_register(devlink, mlxsw_sp_traps_arr,
 				     ARRAY_SIZE(mlxsw_sp_traps_arr), mlxsw_sp);
@@ -487,22 +575,19 @@ int mlxsw_sp_devlink_traps_init(struct mlxsw_sp *mlxsw_sp)
 	return 0;
 
 err_traps_register:
-	devlink_trap_groups_unregister(devlink, mlxsw_sp_trap_groups_arr,
-				       groups_count);
-err_trap_groups_register:
+	mlxsw_sp_trap_groups_fini(mlxsw_sp);
+err_trap_groups_init:
 	mlxsw_sp_trap_policers_fini(mlxsw_sp);
 	return err;
 }
 
 void mlxsw_sp_devlink_traps_fini(struct mlxsw_sp *mlxsw_sp)
 {
-	size_t groups_count = ARRAY_SIZE(mlxsw_sp_trap_groups_arr);
 	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
 
 	devlink_traps_unregister(devlink, mlxsw_sp_traps_arr,
 				 ARRAY_SIZE(mlxsw_sp_traps_arr));
-	devlink_trap_groups_unregister(devlink, mlxsw_sp_trap_groups_arr,
-				       groups_count);
+	mlxsw_sp_trap_groups_fini(mlxsw_sp);
 	mlxsw_sp_trap_policers_fini(mlxsw_sp);
 }
 
@@ -582,33 +667,12 @@ __mlxsw_sp_trap_group_init(struct mlxsw_core *mlxsw_core,
 {
 	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
 	u16 hw_policer_id = MLXSW_REG_HTGT_INVALID_POLICER;
+	const struct mlxsw_sp_trap_group_item *group_item;
 	char htgt_pl[MLXSW_REG_HTGT_LEN];
-	u8 priority, tc, group_id;
-
-	switch (group->id) {
-	case DEVLINK_TRAP_GROUP_GENERIC_ID_L2_DROPS:
-		group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_L2_DISCARDS;
-		priority = 0;
-		tc = 1;
-		break;
-	case DEVLINK_TRAP_GROUP_GENERIC_ID_L3_DROPS:
-		group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_L3_DISCARDS;
-		priority = 0;
-		tc = 1;
-		break;
-	case DEVLINK_TRAP_GROUP_GENERIC_ID_TUNNEL_DROPS:
-		group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_TUNNEL_DISCARDS;
-		priority = 0;
-		tc = 1;
-		break;
-	case DEVLINK_TRAP_GROUP_GENERIC_ID_ACL_DROPS:
-		group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_ACL_DISCARDS;
-		priority = 0;
-		tc = 1;
-		break;
-	default:
+
+	group_item = mlxsw_sp_trap_group_item_lookup(mlxsw_sp, group->id);
+	if (WARN_ON(!group_item))
 		return -EINVAL;
-	}
 
 	if (policer_id) {
 		struct mlxsw_sp_trap_policer_item *policer_item;
@@ -620,7 +684,8 @@ __mlxsw_sp_trap_group_init(struct mlxsw_core *mlxsw_core,
 		hw_policer_id = policer_item->hw_id;
 	}
 
-	mlxsw_reg_htgt_pack(htgt_pl, group_id, hw_policer_id, priority, tc);
+	mlxsw_reg_htgt_pack(htgt_pl, group_item->hw_group_id, hw_policer_id,
+			    group_item->priority, group_item->tc);
 	return mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.h
index 8be8482d82ac..1280f8bc617a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.h
@@ -10,6 +10,10 @@
 struct mlxsw_sp_trap {
 	struct mlxsw_sp_trap_policer_item *policer_items_arr;
 	u64 policers_count; /* Number of registered policers */
+
+	struct mlxsw_sp_trap_group_item *group_items_arr;
+	u64 groups_count; /* Number of registered groups */
+
 	u64 max_policers;
 	unsigned long policers_usage[]; /* Usage bitmap */
 };
-- 
cgit v1.2.3-59-g8ed1b


From 200b7cca0ba170d8a7e1bd228f0fb1f885de02c0 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Sun, 17 May 2020 01:43:08 +0300
Subject: mlxsw: spectrum_trap: Store all trap data in one array

Each trap registered with devlink is mapped to one or more Rx listeners.
These listeners allow the switch driver (e.g., mlxsw_spectrum) to
register a function that is called when a packet is received (trapped)
for a specific reason.

Currently, three arrays are used to describe the mapping between the
logical devlink traps and the Rx listeners.

Instead, get rid of these arrays and store all the information in one
array that is easier to validate and extend with more per-trap
information.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_trap.c    | 466 ++++++++++++++-------
 .../net/ethernet/mellanox/mlxsw/spectrum_trap.h    |   3 +
 2 files changed, 326 insertions(+), 143 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
index f87135ee69ee..3a13b17cd1b8 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
@@ -24,6 +24,13 @@ struct mlxsw_sp_trap_group_item {
 	u8 tc;
 };
 
+#define MLXSW_SP_TRAP_LISTENERS_MAX 3
+
+struct mlxsw_sp_trap_item {
+	struct devlink_trap trap;
+	struct mlxsw_listener listeners_arr[MLXSW_SP_TRAP_LISTENERS_MAX];
+};
+
 /* All driver-specific traps must be documented in
  * Documentation/networking/devlink/mlxsw.rst
  */
@@ -222,125 +229,221 @@ static const struct mlxsw_sp_trap_group_item mlxsw_sp_trap_group_items_arr[] = {
 	},
 };
 
-static const struct devlink_trap mlxsw_sp_traps_arr[] = {
-	MLXSW_SP_TRAP_DROP(SMAC_MC, L2_DROPS),
-	MLXSW_SP_TRAP_DROP(VLAN_TAG_MISMATCH, L2_DROPS),
-	MLXSW_SP_TRAP_DROP(INGRESS_VLAN_FILTER, L2_DROPS),
-	MLXSW_SP_TRAP_DROP(INGRESS_STP_FILTER, L2_DROPS),
-	MLXSW_SP_TRAP_DROP(EMPTY_TX_LIST, L2_DROPS),
-	MLXSW_SP_TRAP_DROP(PORT_LOOPBACK_FILTER, L2_DROPS),
-	MLXSW_SP_TRAP_DROP(BLACKHOLE_ROUTE, L3_DROPS),
-	MLXSW_SP_TRAP_DROP(NON_IP_PACKET, L3_DROPS),
-	MLXSW_SP_TRAP_DROP(UC_DIP_MC_DMAC, L3_DROPS),
-	MLXSW_SP_TRAP_DROP(DIP_LB, L3_DROPS),
-	MLXSW_SP_TRAP_DROP(SIP_MC, L3_DROPS),
-	MLXSW_SP_TRAP_DROP(SIP_LB, L3_DROPS),
-	MLXSW_SP_TRAP_DROP(CORRUPTED_IP_HDR, L3_DROPS),
-	MLXSW_SP_TRAP_DROP(IPV4_SIP_BC, L3_DROPS),
-	MLXSW_SP_TRAP_DROP(IPV6_MC_DIP_RESERVED_SCOPE, L3_DROPS),
-	MLXSW_SP_TRAP_DROP(IPV6_MC_DIP_INTERFACE_LOCAL_SCOPE, L3_DROPS),
-	MLXSW_SP_TRAP_EXCEPTION(MTU_ERROR, L3_DROPS),
-	MLXSW_SP_TRAP_EXCEPTION(TTL_ERROR, L3_DROPS),
-	MLXSW_SP_TRAP_EXCEPTION(RPF, L3_DROPS),
-	MLXSW_SP_TRAP_EXCEPTION(REJECT_ROUTE, L3_DROPS),
-	MLXSW_SP_TRAP_EXCEPTION(UNRESOLVED_NEIGH, L3_DROPS),
-	MLXSW_SP_TRAP_EXCEPTION(IPV4_LPM_UNICAST_MISS, L3_DROPS),
-	MLXSW_SP_TRAP_EXCEPTION(IPV6_LPM_UNICAST_MISS, L3_DROPS),
-	MLXSW_SP_TRAP_DRIVER_DROP(IRIF_DISABLED, L3_DROPS),
-	MLXSW_SP_TRAP_DRIVER_DROP(ERIF_DISABLED, L3_DROPS),
-	MLXSW_SP_TRAP_DROP(NON_ROUTABLE, L3_DROPS),
-	MLXSW_SP_TRAP_EXCEPTION(DECAP_ERROR, TUNNEL_DROPS),
-	MLXSW_SP_TRAP_DROP(OVERLAY_SMAC_MC, TUNNEL_DROPS),
-	MLXSW_SP_TRAP_DROP_EXT(INGRESS_FLOW_ACTION_DROP, ACL_DROPS,
-			       DEVLINK_TRAP_METADATA_TYPE_F_FA_COOKIE),
-	MLXSW_SP_TRAP_DROP_EXT(EGRESS_FLOW_ACTION_DROP, ACL_DROPS,
-			       DEVLINK_TRAP_METADATA_TYPE_F_FA_COOKIE),
-};
-
-static const struct mlxsw_listener mlxsw_sp_listeners_arr[] = {
-	MLXSW_SP_RXL_DISCARD(ING_PACKET_SMAC_MC, L2_DISCARDS),
-	MLXSW_SP_RXL_DISCARD(ING_SWITCH_VTAG_ALLOW, L2_DISCARDS),
-	MLXSW_SP_RXL_DISCARD(ING_SWITCH_VLAN, L2_DISCARDS),
-	MLXSW_SP_RXL_DISCARD(ING_SWITCH_STP, L2_DISCARDS),
-	MLXSW_SP_RXL_DISCARD(LOOKUP_SWITCH_UC, L2_DISCARDS),
-	MLXSW_SP_RXL_DISCARD(LOOKUP_SWITCH_MC_NULL, L2_DISCARDS),
-	MLXSW_SP_RXL_DISCARD(LOOKUP_SWITCH_LB, L2_DISCARDS),
-	MLXSW_SP_RXL_DISCARD(ROUTER2, L3_DISCARDS),
-	MLXSW_SP_RXL_DISCARD(ING_ROUTER_NON_IP_PACKET, L3_DISCARDS),
-	MLXSW_SP_RXL_DISCARD(ING_ROUTER_UC_DIP_MC_DMAC, L3_DISCARDS),
-	MLXSW_SP_RXL_DISCARD(ING_ROUTER_DIP_LB, L3_DISCARDS),
-	MLXSW_SP_RXL_DISCARD(ING_ROUTER_SIP_MC, L3_DISCARDS),
-	MLXSW_SP_RXL_DISCARD(ING_ROUTER_SIP_LB, L3_DISCARDS),
-	MLXSW_SP_RXL_DISCARD(ING_ROUTER_CORRUPTED_IP_HDR, L3_DISCARDS),
-	MLXSW_SP_RXL_DISCARD(ING_ROUTER_IPV4_SIP_BC, L3_DISCARDS),
-	MLXSW_SP_RXL_DISCARD(IPV6_MC_DIP_RESERVED_SCOPE, L3_DISCARDS),
-	MLXSW_SP_RXL_DISCARD(IPV6_MC_DIP_INTERFACE_LOCAL_SCOPE, L3_DISCARDS),
-	MLXSW_SP_RXL_EXCEPTION(MTUERROR, L3_DISCARDS, TRAP_TO_CPU),
-	MLXSW_SP_RXL_EXCEPTION(TTLERROR, L3_DISCARDS, TRAP_TO_CPU),
-	MLXSW_SP_RXL_EXCEPTION(RPF, L3_DISCARDS, TRAP_TO_CPU),
-	MLXSW_SP_RXL_EXCEPTION(RTR_INGRESS1, L3_DISCARDS, TRAP_TO_CPU),
-	MLXSW_SP_RXL_EXCEPTION(HOST_MISS_IPV4, L3_DISCARDS, TRAP_TO_CPU),
-	MLXSW_SP_RXL_EXCEPTION(HOST_MISS_IPV6, L3_DISCARDS, TRAP_TO_CPU),
-	MLXSW_SP_RXL_EXCEPTION(DISCARD_ROUTER3, L3_DISCARDS,
-			       TRAP_EXCEPTION_TO_CPU),
-	MLXSW_SP_RXL_EXCEPTION(DISCARD_ROUTER_LPM4, L3_DISCARDS,
-			       TRAP_EXCEPTION_TO_CPU),
-	MLXSW_SP_RXL_EXCEPTION(DISCARD_ROUTER_LPM6, L3_DISCARDS,
-			       TRAP_EXCEPTION_TO_CPU),
-	MLXSW_SP_RXL_DISCARD(ROUTER_IRIF_EN, L3_DISCARDS),
-	MLXSW_SP_RXL_DISCARD(ROUTER_ERIF_EN, L3_DISCARDS),
-	MLXSW_SP_RXL_DISCARD(NON_ROUTABLE, L3_DISCARDS),
-	MLXSW_SP_RXL_EXCEPTION(DECAP_ECN0, TUNNEL_DISCARDS,
-			       TRAP_EXCEPTION_TO_CPU),
-	MLXSW_SP_RXL_EXCEPTION(IPIP_DECAP_ERROR, TUNNEL_DISCARDS,
-			       TRAP_EXCEPTION_TO_CPU),
-	MLXSW_SP_RXL_EXCEPTION(DISCARD_DEC_PKT, TUNNEL_DISCARDS,
-			       TRAP_EXCEPTION_TO_CPU),
-	MLXSW_SP_RXL_DISCARD(OVERLAY_SMAC_MC, TUNNEL_DISCARDS),
-	MLXSW_SP_RXL_ACL_DISCARD(INGRESS_ACL, ACL_DISCARDS, DUMMY),
-	MLXSW_SP_RXL_ACL_DISCARD(EGRESS_ACL, ACL_DISCARDS, DUMMY),
-};
-
-/* Mapping between hardware trap and devlink trap. Multiple hardware traps can
- * be mapped to the same devlink trap. Order is according to
- * 'mlxsw_sp_listeners_arr'.
- */
-static const u16 mlxsw_sp_listener_devlink_map[] = {
-	DEVLINK_TRAP_GENERIC_ID_SMAC_MC,
-	DEVLINK_TRAP_GENERIC_ID_VLAN_TAG_MISMATCH,
-	DEVLINK_TRAP_GENERIC_ID_INGRESS_VLAN_FILTER,
-	DEVLINK_TRAP_GENERIC_ID_INGRESS_STP_FILTER,
-	DEVLINK_TRAP_GENERIC_ID_EMPTY_TX_LIST,
-	DEVLINK_TRAP_GENERIC_ID_EMPTY_TX_LIST,
-	DEVLINK_TRAP_GENERIC_ID_PORT_LOOPBACK_FILTER,
-	DEVLINK_TRAP_GENERIC_ID_BLACKHOLE_ROUTE,
-	DEVLINK_TRAP_GENERIC_ID_NON_IP_PACKET,
-	DEVLINK_TRAP_GENERIC_ID_UC_DIP_MC_DMAC,
-	DEVLINK_TRAP_GENERIC_ID_DIP_LB,
-	DEVLINK_TRAP_GENERIC_ID_SIP_MC,
-	DEVLINK_TRAP_GENERIC_ID_SIP_LB,
-	DEVLINK_TRAP_GENERIC_ID_CORRUPTED_IP_HDR,
-	DEVLINK_TRAP_GENERIC_ID_IPV4_SIP_BC,
-	DEVLINK_TRAP_GENERIC_ID_IPV6_MC_DIP_RESERVED_SCOPE,
-	DEVLINK_TRAP_GENERIC_ID_IPV6_MC_DIP_INTERFACE_LOCAL_SCOPE,
-	DEVLINK_TRAP_GENERIC_ID_MTU_ERROR,
-	DEVLINK_TRAP_GENERIC_ID_TTL_ERROR,
-	DEVLINK_TRAP_GENERIC_ID_RPF,
-	DEVLINK_TRAP_GENERIC_ID_REJECT_ROUTE,
-	DEVLINK_TRAP_GENERIC_ID_UNRESOLVED_NEIGH,
-	DEVLINK_TRAP_GENERIC_ID_UNRESOLVED_NEIGH,
-	DEVLINK_TRAP_GENERIC_ID_UNRESOLVED_NEIGH,
-	DEVLINK_TRAP_GENERIC_ID_IPV4_LPM_UNICAST_MISS,
-	DEVLINK_TRAP_GENERIC_ID_IPV6_LPM_UNICAST_MISS,
-	DEVLINK_MLXSW_TRAP_ID_IRIF_DISABLED,
-	DEVLINK_MLXSW_TRAP_ID_ERIF_DISABLED,
-	DEVLINK_TRAP_GENERIC_ID_NON_ROUTABLE,
-	DEVLINK_TRAP_GENERIC_ID_DECAP_ERROR,
-	DEVLINK_TRAP_GENERIC_ID_DECAP_ERROR,
-	DEVLINK_TRAP_GENERIC_ID_DECAP_ERROR,
-	DEVLINK_TRAP_GENERIC_ID_OVERLAY_SMAC_MC,
-	DEVLINK_TRAP_GENERIC_ID_INGRESS_FLOW_ACTION_DROP,
-	DEVLINK_TRAP_GENERIC_ID_EGRESS_FLOW_ACTION_DROP,
+static const struct mlxsw_sp_trap_item mlxsw_sp_trap_items_arr[] = {
+	{
+		.trap = MLXSW_SP_TRAP_DROP(SMAC_MC, L2_DROPS),
+		.listeners_arr = {
+			MLXSW_SP_RXL_DISCARD(ING_PACKET_SMAC_MC, L2_DISCARDS),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_DROP(VLAN_TAG_MISMATCH, L2_DROPS),
+		.listeners_arr = {
+			MLXSW_SP_RXL_DISCARD(ING_SWITCH_VTAG_ALLOW,
+					     L2_DISCARDS),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_DROP(INGRESS_VLAN_FILTER, L2_DROPS),
+		.listeners_arr = {
+			MLXSW_SP_RXL_DISCARD(ING_SWITCH_VLAN, L2_DISCARDS),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_DROP(INGRESS_STP_FILTER, L2_DROPS),
+		.listeners_arr = {
+			MLXSW_SP_RXL_DISCARD(ING_SWITCH_STP, L2_DISCARDS),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_DROP(EMPTY_TX_LIST, L2_DROPS),
+		.listeners_arr = {
+			MLXSW_SP_RXL_DISCARD(LOOKUP_SWITCH_UC, L2_DISCARDS),
+			MLXSW_SP_RXL_DISCARD(LOOKUP_SWITCH_MC_NULL, L2_DISCARDS),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_DROP(PORT_LOOPBACK_FILTER, L2_DROPS),
+		.listeners_arr = {
+			MLXSW_SP_RXL_DISCARD(LOOKUP_SWITCH_LB, L2_DISCARDS),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_DROP(BLACKHOLE_ROUTE, L3_DROPS),
+		.listeners_arr = {
+			MLXSW_SP_RXL_DISCARD(ROUTER2, L3_DISCARDS),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_DROP(NON_IP_PACKET, L3_DROPS),
+		.listeners_arr = {
+			MLXSW_SP_RXL_DISCARD(ING_ROUTER_NON_IP_PACKET,
+					     L3_DISCARDS),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_DROP(UC_DIP_MC_DMAC, L3_DROPS),
+		.listeners_arr = {
+			MLXSW_SP_RXL_DISCARD(ING_ROUTER_UC_DIP_MC_DMAC,
+					     L3_DISCARDS),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_DROP(DIP_LB, L3_DROPS),
+		.listeners_arr = {
+			MLXSW_SP_RXL_DISCARD(ING_ROUTER_DIP_LB, L3_DISCARDS),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_DROP(SIP_MC, L3_DROPS),
+		.listeners_arr = {
+			MLXSW_SP_RXL_DISCARD(ING_ROUTER_SIP_MC, L3_DISCARDS),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_DROP(SIP_LB, L3_DROPS),
+		.listeners_arr = {
+			MLXSW_SP_RXL_DISCARD(ING_ROUTER_SIP_LB, L3_DISCARDS),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_DROP(CORRUPTED_IP_HDR, L3_DROPS),
+		.listeners_arr = {
+			MLXSW_SP_RXL_DISCARD(ING_ROUTER_CORRUPTED_IP_HDR,
+					     L3_DISCARDS),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_DROP(IPV4_SIP_BC, L3_DROPS),
+		.listeners_arr = {
+			MLXSW_SP_RXL_DISCARD(ING_ROUTER_IPV4_SIP_BC,
+					     L3_DISCARDS),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_DROP(IPV6_MC_DIP_RESERVED_SCOPE,
+					   L3_DROPS),
+		.listeners_arr = {
+			MLXSW_SP_RXL_DISCARD(IPV6_MC_DIP_RESERVED_SCOPE,
+					     L3_DISCARDS),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_DROP(IPV6_MC_DIP_INTERFACE_LOCAL_SCOPE,
+					   L3_DROPS),
+		.listeners_arr = {
+			MLXSW_SP_RXL_DISCARD(IPV6_MC_DIP_INTERFACE_LOCAL_SCOPE,
+					     L3_DISCARDS),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_EXCEPTION(MTU_ERROR, L3_DROPS),
+		.listeners_arr = {
+			MLXSW_SP_RXL_EXCEPTION(MTUERROR, L3_DISCARDS,
+					       TRAP_TO_CPU),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_EXCEPTION(TTL_ERROR, L3_DROPS),
+		.listeners_arr = {
+			MLXSW_SP_RXL_EXCEPTION(TTLERROR, L3_DISCARDS,
+					       TRAP_TO_CPU),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_EXCEPTION(RPF, L3_DROPS),
+		.listeners_arr = {
+			MLXSW_SP_RXL_EXCEPTION(RPF, L3_DISCARDS, TRAP_TO_CPU),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_EXCEPTION(REJECT_ROUTE, L3_DROPS),
+		.listeners_arr = {
+			MLXSW_SP_RXL_EXCEPTION(RTR_INGRESS1, L3_DISCARDS,
+					       TRAP_TO_CPU),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_EXCEPTION(UNRESOLVED_NEIGH, L3_DROPS),
+		.listeners_arr = {
+			MLXSW_SP_RXL_EXCEPTION(HOST_MISS_IPV4, L3_DISCARDS,
+					       TRAP_TO_CPU),
+			MLXSW_SP_RXL_EXCEPTION(HOST_MISS_IPV6, L3_DISCARDS,
+					       TRAP_TO_CPU),
+			MLXSW_SP_RXL_EXCEPTION(DISCARD_ROUTER3, L3_DISCARDS,
+					       TRAP_EXCEPTION_TO_CPU),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_EXCEPTION(IPV4_LPM_UNICAST_MISS,
+						L3_DROPS),
+		.listeners_arr = {
+			MLXSW_SP_RXL_EXCEPTION(DISCARD_ROUTER_LPM4, L3_DISCARDS,
+					       TRAP_EXCEPTION_TO_CPU),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_EXCEPTION(IPV6_LPM_UNICAST_MISS,
+						L3_DROPS),
+		.listeners_arr = {
+			MLXSW_SP_RXL_EXCEPTION(DISCARD_ROUTER_LPM6, L3_DISCARDS,
+					       TRAP_EXCEPTION_TO_CPU),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_DRIVER_DROP(IRIF_DISABLED, L3_DROPS),
+		.listeners_arr = {
+			MLXSW_SP_RXL_DISCARD(ROUTER_IRIF_EN, L3_DISCARDS),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_DRIVER_DROP(ERIF_DISABLED, L3_DROPS),
+		.listeners_arr = {
+			MLXSW_SP_RXL_DISCARD(ROUTER_ERIF_EN, L3_DISCARDS),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_DROP(NON_ROUTABLE, L3_DROPS),
+		.listeners_arr = {
+			MLXSW_SP_RXL_DISCARD(NON_ROUTABLE, L3_DISCARDS),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_EXCEPTION(DECAP_ERROR, TUNNEL_DROPS),
+		.listeners_arr = {
+			MLXSW_SP_RXL_EXCEPTION(DECAP_ECN0, TUNNEL_DISCARDS,
+					       TRAP_EXCEPTION_TO_CPU),
+			MLXSW_SP_RXL_EXCEPTION(IPIP_DECAP_ERROR,
+					       TUNNEL_DISCARDS,
+					       TRAP_EXCEPTION_TO_CPU),
+			MLXSW_SP_RXL_EXCEPTION(DISCARD_DEC_PKT, TUNNEL_DISCARDS,
+					       TRAP_EXCEPTION_TO_CPU),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_DROP(OVERLAY_SMAC_MC, TUNNEL_DROPS),
+		.listeners_arr = {
+			MLXSW_SP_RXL_DISCARD(OVERLAY_SMAC_MC, TUNNEL_DISCARDS),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_DROP_EXT(INGRESS_FLOW_ACTION_DROP,
+					       ACL_DROPS,
+					       DEVLINK_TRAP_METADATA_TYPE_F_FA_COOKIE),
+		.listeners_arr = {
+			MLXSW_SP_RXL_ACL_DISCARD(INGRESS_ACL, ACL_DISCARDS,
+						 DUMMY),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_DROP_EXT(EGRESS_FLOW_ACTION_DROP,
+					       ACL_DROPS,
+					       DEVLINK_TRAP_METADATA_TYPE_F_FA_COOKIE),
+		.listeners_arr = {
+			MLXSW_SP_RXL_ACL_DISCARD(EGRESS_ACL, ACL_DISCARDS,
+						 DUMMY),
+		},
+	},
 };
 
 #define MLXSW_SP_THIN_POLICER_ID	(MLXSW_REG_HTGT_TRAP_GROUP_MAX + 1)
@@ -373,6 +476,20 @@ mlxsw_sp_trap_group_item_lookup(struct mlxsw_sp *mlxsw_sp, u16 id)
 	return NULL;
 }
 
+static struct mlxsw_sp_trap_item *
+mlxsw_sp_trap_item_lookup(struct mlxsw_sp *mlxsw_sp, u16 id)
+{
+	struct mlxsw_sp_trap *trap = mlxsw_sp->trap;
+	int i;
+
+	for (i = 0; i < trap->traps_count; i++) {
+		if (trap->trap_items_arr[i].trap.id == id)
+			return &trap->trap_items_arr[i];
+	}
+
+	return NULL;
+}
+
 static int mlxsw_sp_trap_cpu_policers_set(struct mlxsw_sp *mlxsw_sp)
 {
 	char qpcr_pl[MLXSW_REG_QPCR_LEN];
@@ -542,9 +659,63 @@ static void mlxsw_sp_trap_groups_fini(struct mlxsw_sp *mlxsw_sp)
 	kfree(trap->group_items_arr);
 }
 
-int mlxsw_sp_devlink_traps_init(struct mlxsw_sp *mlxsw_sp)
+static bool
+mlxsw_sp_trap_listener_is_valid(const struct mlxsw_listener *listener)
+{
+	return listener->trap_id != 0;
+}
+
+static int mlxsw_sp_traps_init(struct mlxsw_sp *mlxsw_sp)
 {
 	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+	struct mlxsw_sp_trap *trap = mlxsw_sp->trap;
+	const struct mlxsw_sp_trap_item *trap_item;
+	int err, i;
+
+	trap->trap_items_arr = kmemdup(mlxsw_sp_trap_items_arr,
+				       sizeof(mlxsw_sp_trap_items_arr),
+				       GFP_KERNEL);
+	if (!trap->trap_items_arr)
+		return -ENOMEM;
+
+	trap->traps_count = ARRAY_SIZE(mlxsw_sp_trap_items_arr);
+
+	for (i = 0; i < trap->traps_count; i++) {
+		trap_item = &trap->trap_items_arr[i];
+		err = devlink_traps_register(devlink, &trap_item->trap, 1,
+					     mlxsw_sp);
+		if (err)
+			goto err_trap_register;
+	}
+
+	return 0;
+
+err_trap_register:
+	for (i--; i >= 0; i--) {
+		trap_item = &trap->trap_items_arr[i];
+		devlink_traps_unregister(devlink, &trap_item->trap, 1);
+	}
+	kfree(trap->trap_items_arr);
+	return err;
+}
+
+static void mlxsw_sp_traps_fini(struct mlxsw_sp *mlxsw_sp)
+{
+	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
+	struct mlxsw_sp_trap *trap = mlxsw_sp->trap;
+	int i;
+
+	for (i = trap->traps_count - 1; i >= 0; i--) {
+		const struct mlxsw_sp_trap_item *trap_item;
+
+		trap_item = &trap->trap_items_arr[i];
+		devlink_traps_unregister(devlink, &trap_item->trap, 1);
+	}
+	kfree(trap->trap_items_arr);
+}
+
+int mlxsw_sp_devlink_traps_init(struct mlxsw_sp *mlxsw_sp)
+{
 	int err;
 
 	err = mlxsw_sp_trap_cpu_policers_set(mlxsw_sp);
@@ -555,10 +726,6 @@ int mlxsw_sp_devlink_traps_init(struct mlxsw_sp *mlxsw_sp)
 	if (err)
 		return err;
 
-	if (WARN_ON(ARRAY_SIZE(mlxsw_sp_listener_devlink_map) !=
-		    ARRAY_SIZE(mlxsw_sp_listeners_arr)))
-		return -EINVAL;
-
 	err = mlxsw_sp_trap_policers_init(mlxsw_sp);
 	if (err)
 		return err;
@@ -567,14 +734,13 @@ int mlxsw_sp_devlink_traps_init(struct mlxsw_sp *mlxsw_sp)
 	if (err)
 		goto err_trap_groups_init;
 
-	err = devlink_traps_register(devlink, mlxsw_sp_traps_arr,
-				     ARRAY_SIZE(mlxsw_sp_traps_arr), mlxsw_sp);
+	err = mlxsw_sp_traps_init(mlxsw_sp);
 	if (err)
-		goto err_traps_register;
+		goto err_traps_init;
 
 	return 0;
 
-err_traps_register:
+err_traps_init:
 	mlxsw_sp_trap_groups_fini(mlxsw_sp);
 err_trap_groups_init:
 	mlxsw_sp_trap_policers_fini(mlxsw_sp);
@@ -583,10 +749,7 @@ err_trap_groups_init:
 
 void mlxsw_sp_devlink_traps_fini(struct mlxsw_sp *mlxsw_sp)
 {
-	struct devlink *devlink = priv_to_devlink(mlxsw_sp->core);
-
-	devlink_traps_unregister(devlink, mlxsw_sp_traps_arr,
-				 ARRAY_SIZE(mlxsw_sp_traps_arr));
+	mlxsw_sp_traps_fini(mlxsw_sp);
 	mlxsw_sp_trap_groups_fini(mlxsw_sp);
 	mlxsw_sp_trap_policers_fini(mlxsw_sp);
 }
@@ -594,16 +757,21 @@ void mlxsw_sp_devlink_traps_fini(struct mlxsw_sp *mlxsw_sp)
 int mlxsw_sp_trap_init(struct mlxsw_core *mlxsw_core,
 		       const struct devlink_trap *trap, void *trap_ctx)
 {
+	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+	const struct mlxsw_sp_trap_item *trap_item;
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(mlxsw_sp_listener_devlink_map); i++) {
+	trap_item = mlxsw_sp_trap_item_lookup(mlxsw_sp, trap->id);
+	if (WARN_ON(!trap_item))
+		return -EINVAL;
+
+	for (i = 0; i < MLXSW_SP_TRAP_LISTENERS_MAX; i++) {
 		const struct mlxsw_listener *listener;
 		int err;
 
-		if (mlxsw_sp_listener_devlink_map[i] != trap->id)
+		listener = &trap_item->listeners_arr[i];
+		if (!mlxsw_sp_trap_listener_is_valid(listener))
 			continue;
-		listener = &mlxsw_sp_listeners_arr[i];
-
 		err = mlxsw_core_trap_register(mlxsw_core, listener, trap_ctx);
 		if (err)
 			return err;
@@ -615,15 +783,20 @@ int mlxsw_sp_trap_init(struct mlxsw_core *mlxsw_core,
 void mlxsw_sp_trap_fini(struct mlxsw_core *mlxsw_core,
 			const struct devlink_trap *trap, void *trap_ctx)
 {
+	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+	const struct mlxsw_sp_trap_item *trap_item;
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(mlxsw_sp_listener_devlink_map); i++) {
+	trap_item = mlxsw_sp_trap_item_lookup(mlxsw_sp, trap->id);
+	if (WARN_ON(!trap_item))
+		return;
+
+	for (i = MLXSW_SP_TRAP_LISTENERS_MAX - 1; i >= 0; i--) {
 		const struct mlxsw_listener *listener;
 
-		if (mlxsw_sp_listener_devlink_map[i] != trap->id)
+		listener = &trap_item->listeners_arr[i];
+		if (!mlxsw_sp_trap_listener_is_valid(listener))
 			continue;
-		listener = &mlxsw_sp_listeners_arr[i];
-
 		mlxsw_core_trap_unregister(mlxsw_core, listener, trap_ctx);
 	}
 }
@@ -632,16 +805,23 @@ int mlxsw_sp_trap_action_set(struct mlxsw_core *mlxsw_core,
 			     const struct devlink_trap *trap,
 			     enum devlink_trap_action action)
 {
+	struct mlxsw_sp *mlxsw_sp = mlxsw_core_driver_priv(mlxsw_core);
+	const struct mlxsw_sp_trap_item *trap_item;
 	int i;
 
-	for (i = 0; i < ARRAY_SIZE(mlxsw_sp_listener_devlink_map); i++) {
+	trap_item = mlxsw_sp_trap_item_lookup(mlxsw_sp, trap->id);
+	if (WARN_ON(!trap_item))
+		return -EINVAL;
+
+	for (i = 0; i < MLXSW_SP_TRAP_LISTENERS_MAX; i++) {
 		const struct mlxsw_listener *listener;
 		bool enabled;
 		int err;
 
-		if (mlxsw_sp_listener_devlink_map[i] != trap->id)
+		listener = &trap_item->listeners_arr[i];
+		if (!mlxsw_sp_trap_listener_is_valid(listener))
 			continue;
-		listener = &mlxsw_sp_listeners_arr[i];
+
 		switch (action) {
 		case DEVLINK_TRAP_ACTION_DROP:
 			enabled = false;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.h
index 1280f8bc617a..759146897b3a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.h
@@ -14,6 +14,9 @@ struct mlxsw_sp_trap {
 	struct mlxsw_sp_trap_group_item *group_items_arr;
 	u64 groups_count; /* Number of registered groups */
 
+	struct mlxsw_sp_trap_item *trap_items_arr;
+	u64 traps_count; /* Number of registered traps */
+
 	u64 max_policers;
 	unsigned long policers_usage[]; /* Usage bitmap */
 };
-- 
cgit v1.2.3-59-g8ed1b


From 84e0d83567df4597b1b624b495d689104227a551 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Sun, 17 May 2020 01:43:09 +0300
Subject: selftests: devlink_lib: Remove double blank line

One blank line is enough.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/forwarding/devlink_lib.sh | 1 -
 1 file changed, 1 deletion(-)

diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh
index 155d48bd4d9e..7b6390aea50b 100644
--- a/tools/testing/selftests/net/forwarding/devlink_lib.sh
+++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh
@@ -390,7 +390,6 @@ devlink_trap_drop_test()
 	devlink_trap_group_stats_idle_test $group_name
 	check_err $? "Trap group stats not idle with initial drop action"
 
-
 	devlink_trap_action_set $trap_name "trap"
 	devlink_trap_stats_idle_test $trap_name
 	check_fail $? "Trap stats idle after setting action to trap"
-- 
cgit v1.2.3-59-g8ed1b


From 04cc99d9bdb1119172e21c121950a0253f5c659f Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Sun, 17 May 2020 01:43:10 +0300
Subject: selftests: mlxsw: Do not hard code trap group name

It can be derived dynamically from the trap's name, so drop it.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../drivers/net/mlxsw/devlink_trap_acl_drops.sh    |  4 +--
 .../drivers/net/mlxsw/devlink_trap_l2_drops.sh     | 33 ++++++++------------
 .../drivers/net/mlxsw/devlink_trap_l3_drops.sh     | 35 +++++++---------------
 .../net/mlxsw/devlink_trap_l3_exceptions.sh        | 20 ++++---------
 .../drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh  |  6 ++--
 .../drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh |  9 ++----
 .../selftests/net/forwarding/devlink_lib.sh        |  8 +++--
 7 files changed, 43 insertions(+), 72 deletions(-)

diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_acl_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_acl_drops.sh
index 26044e397157..b32ba5fec59d 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_acl_drops.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_acl_drops.sh
@@ -107,7 +107,7 @@ ingress_flow_action_drop_test()
 
 	RET=0
 
-	devlink_trap_drop_test ingress_flow_action_drop acl_drops $swp2 101
+	devlink_trap_drop_test ingress_flow_action_drop $swp2 101
 
 	log_test "ingress_flow_action_drop"
 
@@ -132,7 +132,7 @@ egress_flow_action_drop_test()
 
 	RET=0
 
-	devlink_trap_drop_test egress_flow_action_drop acl_drops $swp2 102
+	devlink_trap_drop_test egress_flow_action_drop $swp2 102
 
 	log_test "egress_flow_action_drop"
 
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh
index e7aecb065409..a4c2812e9807 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l2_drops.sh
@@ -96,7 +96,6 @@ source_mac_is_multicast_test()
 {
 	local trap_name="source_mac_is_multicast"
 	local smac=01:02:03:04:05:06
-	local group_name="l2_drops"
 	local mz_pid
 
 	tc filter add dev $swp2 egress protocol ip pref 1 handle 101 \
@@ -107,7 +106,7 @@ source_mac_is_multicast_test()
 
 	RET=0
 
-	devlink_trap_drop_test $trap_name $group_name $swp2 101
+	devlink_trap_drop_test $trap_name $swp2 101
 
 	log_test "Source MAC is multicast"
 
@@ -118,7 +117,6 @@ __vlan_tag_mismatch_test()
 {
 	local trap_name="vlan_tag_mismatch"
 	local dmac=de:ad:be:ef:13:37
-	local group_name="l2_drops"
 	local opt=$1; shift
 	local mz_pid
 
@@ -132,7 +130,7 @@ __vlan_tag_mismatch_test()
 	$MZ $h1 "$opt" -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $swp2 101
+	devlink_trap_drop_test $trap_name $swp2 101
 
 	# Add PVID and make sure packets are no longer dropped.
 	bridge vlan add vid 1 dev $swp1 pvid untagged master
@@ -140,7 +138,7 @@ __vlan_tag_mismatch_test()
 
 	devlink_trap_stats_idle_test $trap_name
 	check_err $? "Trap stats not idle when packets should not be dropped"
-	devlink_trap_group_stats_idle_test $group_name
+	devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name)
 	check_err $? "Trap group stats not idle with when packets should not be dropped"
 
 	tc_check_packets "dev $swp2 egress" 101 0
@@ -179,7 +177,6 @@ ingress_vlan_filter_test()
 {
 	local trap_name="ingress_vlan_filter"
 	local dmac=de:ad:be:ef:13:37
-	local group_name="l2_drops"
 	local mz_pid
 	local vid=10
 
@@ -193,7 +190,7 @@ ingress_vlan_filter_test()
 	$MZ $h1 -Q $vid -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $swp2 101
+	devlink_trap_drop_test $trap_name $swp2 101
 
 	# Add the VLAN on the bridge port and make sure packets are no longer
 	# dropped.
@@ -202,7 +199,7 @@ ingress_vlan_filter_test()
 
 	devlink_trap_stats_idle_test $trap_name
 	check_err $? "Trap stats not idle when packets should not be dropped"
-	devlink_trap_group_stats_idle_test $group_name
+	devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name)
 	check_err $? "Trap group stats not idle with when packets should not be dropped"
 
 	tc_check_packets "dev $swp2 egress" 101 0
@@ -222,7 +219,6 @@ __ingress_stp_filter_test()
 {
 	local trap_name="ingress_spanning_tree_filter"
 	local dmac=de:ad:be:ef:13:37
-	local group_name="l2_drops"
 	local state=$1; shift
 	local mz_pid
 	local vid=20
@@ -237,7 +233,7 @@ __ingress_stp_filter_test()
 	$MZ $h1 -Q $vid -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $swp2 101
+	devlink_trap_drop_test $trap_name $swp2 101
 
 	# Change STP state to forwarding and make sure packets are no longer
 	# dropped.
@@ -246,7 +242,7 @@ __ingress_stp_filter_test()
 
 	devlink_trap_stats_idle_test $trap_name
 	check_err $? "Trap stats not idle when packets should not be dropped"
-	devlink_trap_group_stats_idle_test $group_name
+	devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name)
 	check_err $? "Trap group stats not idle with when packets should not be dropped"
 
 	tc_check_packets "dev $swp2 egress" 101 0
@@ -292,7 +288,6 @@ port_list_is_empty_uc_test()
 {
 	local trap_name="port_list_is_empty"
 	local dmac=de:ad:be:ef:13:37
-	local group_name="l2_drops"
 	local mz_pid
 
 	# Disable unicast flooding on both ports, so that packets cannot egress
@@ -308,7 +303,7 @@ port_list_is_empty_uc_test()
 	$MZ $h1 -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $swp2 101
+	devlink_trap_drop_test $trap_name $swp2 101
 
 	# Allow packets to be flooded to one port.
 	ip link set dev $swp2 type bridge_slave flood on
@@ -316,7 +311,7 @@ port_list_is_empty_uc_test()
 
 	devlink_trap_stats_idle_test $trap_name
 	check_err $? "Trap stats not idle when packets should not be dropped"
-	devlink_trap_group_stats_idle_test $group_name
+	devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name)
 	check_err $? "Trap group stats not idle with when packets should not be dropped"
 
 	tc_check_packets "dev $swp2 egress" 101 0
@@ -335,7 +330,6 @@ port_list_is_empty_mc_test()
 {
 	local trap_name="port_list_is_empty"
 	local dmac=01:00:5e:00:00:01
-	local group_name="l2_drops"
 	local dip=239.0.0.1
 	local mz_pid
 
@@ -354,7 +348,7 @@ port_list_is_empty_mc_test()
 	$MZ $h1 -c 0 -p 100 -a own -b $dmac -t ip -B $dip -d 1msec -q &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $swp2 101
+	devlink_trap_drop_test $trap_name $swp2 101
 
 	# Allow packets to be flooded to one port.
 	ip link set dev $swp2 type bridge_slave mcast_flood on
@@ -362,7 +356,7 @@ port_list_is_empty_mc_test()
 
 	devlink_trap_stats_idle_test $trap_name
 	check_err $? "Trap stats not idle when packets should not be dropped"
-	devlink_trap_group_stats_idle_test $group_name
+	devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name)
 	check_err $? "Trap group stats not idle with when packets should not be dropped"
 
 	tc_check_packets "dev $swp2 egress" 101 0
@@ -387,7 +381,6 @@ port_loopback_filter_uc_test()
 {
 	local trap_name="port_loopback_filter"
 	local dmac=de:ad:be:ef:13:37
-	local group_name="l2_drops"
 	local mz_pid
 
 	# Make sure packets can only egress the input port.
@@ -401,7 +394,7 @@ port_loopback_filter_uc_test()
 	$MZ $h1 -c 0 -p 100 -a own -b $dmac -t ip -d 1msec -q &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $swp2 101
+	devlink_trap_drop_test $trap_name $swp2 101
 
 	# Allow packets to be flooded.
 	ip link set dev $swp2 type bridge_slave flood on
@@ -409,7 +402,7 @@ port_loopback_filter_uc_test()
 
 	devlink_trap_stats_idle_test $trap_name
 	check_err $? "Trap stats not idle when packets should not be dropped"
-	devlink_trap_group_stats_idle_test $group_name
+	devlink_trap_group_stats_idle_test $(devlink_trap_group_get $trap_name)
 	check_err $? "Trap group stats not idle with when packets should not be dropped"
 
 	tc_check_packets "dev $swp2 egress" 101 0
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
index 616f47d86a61..f5abb1ebd392 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_drops.sh
@@ -161,7 +161,6 @@ ping_check()
 non_ip_test()
 {
 	local trap_name="non_ip"
-	local group_name="l3_drops"
 	local mz_pid
 
 	RET=0
@@ -176,7 +175,7 @@ non_ip_test()
 		00:00 de:ad:be:ef" &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $rp2 101
+	devlink_trap_drop_test $trap_name $rp2 101
 
 	log_test "Non IP"
 
@@ -190,7 +189,6 @@ __uc_dip_over_mc_dmac_test()
 	local dip=$1; shift
 	local flags=${1:-""}; shift
 	local trap_name="uc_dip_over_mc_dmac"
-	local group_name="l3_drops"
 	local dmac=01:02:03:04:05:06
 	local mz_pid
 
@@ -206,7 +204,7 @@ __uc_dip_over_mc_dmac_test()
 		-B $dip -d 1msec -q &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $rp2 101
+	devlink_trap_drop_test $trap_name $rp2 101
 
 	log_test "Unicast destination IP over multicast destination MAC: $desc"
 
@@ -227,7 +225,6 @@ __sip_is_loopback_test()
 	local dip=$1; shift
 	local flags=${1:-""}; shift
 	local trap_name="sip_is_loopback_address"
-	local group_name="l3_drops"
 	local mz_pid
 
 	RET=0
@@ -242,7 +239,7 @@ __sip_is_loopback_test()
 		-b $rp1mac -B $dip -d 1msec -q &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $rp2 101
+	devlink_trap_drop_test $trap_name $rp2 101
 
 	log_test "Source IP is loopback address: $desc"
 
@@ -262,7 +259,6 @@ __dip_is_loopback_test()
 	local dip=$1; shift
 	local flags=${1:-""}; shift
 	local trap_name="dip_is_loopback_address"
-	local group_name="l3_drops"
 	local mz_pid
 
 	RET=0
@@ -277,7 +273,7 @@ __dip_is_loopback_test()
 		-B $dip -d 1msec -q &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $rp2 101
+	devlink_trap_drop_test $trap_name $rp2 101
 
 	log_test "Destination IP is loopback address: $desc"
 
@@ -298,7 +294,6 @@ __sip_is_mc_test()
 	local dip=$1; shift
 	local flags=${1:-""}; shift
 	local trap_name="sip_is_mc"
-	local group_name="l3_drops"
 	local mz_pid
 
 	RET=0
@@ -313,7 +308,7 @@ __sip_is_mc_test()
 		-b $rp1mac -B $dip -d 1msec -q &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $rp2 101
+	devlink_trap_drop_test $trap_name $rp2 101
 
 	log_test "Source IP is multicast: $desc"
 
@@ -329,7 +324,6 @@ sip_is_mc_test()
 ipv4_sip_is_limited_bc_test()
 {
 	local trap_name="ipv4_sip_is_limited_bc"
-	local group_name="l3_drops"
 	local sip=255.255.255.255
 	local mz_pid
 
@@ -345,7 +339,7 @@ ipv4_sip_is_limited_bc_test()
 		-B $h2_ipv4 -d 1msec -q &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $rp2 101
+	devlink_trap_drop_test $trap_name $rp2 101
 
 	log_test "IPv4 source IP is limited broadcast"
 
@@ -382,7 +376,6 @@ __ipv4_header_corrupted_test()
 	local ihl=$1; shift
 	local checksum=$1; shift
 	local trap_name="ip_header_corrupted"
-	local group_name="l3_drops"
 	local payload
 	local mz_pid
 
@@ -399,7 +392,7 @@ __ipv4_header_corrupted_test()
 	$MZ $h1 -c 0 -d 1msec -a $h1mac -b $rp1mac -q p=$payload &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $rp2 101
+	devlink_trap_drop_test $trap_name $rp2 101
 
 	log_test "IP header corrupted: $desc: IPv4"
 
@@ -429,7 +422,6 @@ __ipv6_header_corrupted_test()
 	local desc=$1; shift
 	local ipver=$1; shift
 	local trap_name="ip_header_corrupted"
-	local group_name="l3_drops"
 	local payload
 	local mz_pid
 
@@ -446,7 +438,7 @@ __ipv6_header_corrupted_test()
 	$MZ $h1 -c 0 -d 1msec -a $h1mac -b $rp1mac -q p=$payload &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $rp2 101
+	devlink_trap_drop_test $trap_name $rp2 101
 
 	log_test "IP header corrupted: $desc: IPv6"
 
@@ -469,7 +461,6 @@ ip_header_corrupted_test()
 ipv6_mc_dip_reserved_scope_test()
 {
 	local trap_name="ipv6_mc_dip_reserved_scope"
-	local group_name="l3_drops"
 	local dip=FF00::
 	local mz_pid
 
@@ -485,7 +476,7 @@ ipv6_mc_dip_reserved_scope_test()
 		"33:33:00:00:00:00" -B $dip -d 1msec -q &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $rp2 101
+	devlink_trap_drop_test $trap_name $rp2 101
 
 	log_test "IPv6 multicast destination IP reserved scope"
 
@@ -495,7 +486,6 @@ ipv6_mc_dip_reserved_scope_test()
 ipv6_mc_dip_interface_local_scope_test()
 {
 	local trap_name="ipv6_mc_dip_interface_local_scope"
-	local group_name="l3_drops"
 	local dip=FF01::
 	local mz_pid
 
@@ -511,7 +501,7 @@ ipv6_mc_dip_interface_local_scope_test()
 		"33:33:00:00:00:00" -B $dip -d 1msec -q &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $rp2 101
+	devlink_trap_drop_test $trap_name $rp2 101
 
 	log_test "IPv6 multicast destination IP interface-local scope"
 
@@ -526,7 +516,6 @@ __blackhole_route_test()
 	local dip=$1; shift
 	local ip_proto=${1:-"icmp"}; shift
 	local trap_name="blackhole_route"
-	local group_name="l3_drops"
 	local mz_pid
 
 	RET=0
@@ -542,7 +531,7 @@ __blackhole_route_test()
 		-B $dip -d 1msec -q &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $rp2 101
+	devlink_trap_drop_test $trap_name $rp2 101
 	log_test "Blackhole route: IPv$flags"
 
 	devlink_trap_drop_cleanup $mz_pid $rp2 $proto 1 101
@@ -558,7 +547,6 @@ blackhole_route_test()
 irif_disabled_test()
 {
 	local trap_name="irif_disabled"
-	local group_name="l3_drops"
 	local t0_packets t0_bytes
 	local t1_packets t1_bytes
 	local mz_pid
@@ -613,7 +601,6 @@ irif_disabled_test()
 erif_disabled_test()
 {
 	local trap_name="erif_disabled"
-	local group_name="l3_drops"
 	local t0_packets t0_bytes
 	local t1_packets t1_bytes
 	local mz_pid
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh
index 2bc6df42d597..1fedfc9da434 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_l3_exceptions.sh
@@ -169,7 +169,6 @@ trap_action_check()
 mtu_value_is_too_small_test()
 {
 	local trap_name="mtu_value_is_too_small"
-	local group_name="l3_drops"
 	local expected_action="trap"
 	local mz_pid
 
@@ -191,7 +190,7 @@ mtu_value_is_too_small_test()
 		-B 198.51.100.1 -q &
 	mz_pid=$!
 
-	devlink_trap_exception_test $trap_name $group_name
+	devlink_trap_exception_test $trap_name
 
 	tc_check_packets_hitting "dev $h1 ingress" 101
 	check_err $? "Packets were not received to h1"
@@ -208,7 +207,6 @@ __ttl_value_is_too_small_test()
 {
 	local ttl_val=$1; shift
 	local trap_name="ttl_value_is_too_small"
-	local group_name="l3_drops"
 	local expected_action="trap"
 	local mz_pid
 
@@ -227,7 +225,7 @@ __ttl_value_is_too_small_test()
 		-b $rp1mac -B 198.51.100.1 -q &
 	mz_pid=$!
 
-	devlink_trap_exception_test $trap_name $group_name
+	devlink_trap_exception_test $trap_name
 
 	tc_check_packets_hitting "dev $h1 ingress" 101
 	check_err $? "Packets were not received to h1"
@@ -271,7 +269,6 @@ __mc_reverse_path_forwarding_test()
 	local proto=$1; shift
 	local flags=${1:-""}; shift
 	local trap_name="mc_reverse_path_forwarding"
-	local group_name="l3_drops"
 	local expected_action="trap"
 	local mz_pid
 
@@ -292,7 +289,7 @@ __mc_reverse_path_forwarding_test()
 
 	mz_pid=$!
 
-	devlink_trap_exception_test $trap_name $group_name
+	devlink_trap_exception_test $trap_name
 
 	tc_check_packets "dev $rp2 egress" 101 0
 	check_err $? "Packets were not dropped"
@@ -322,7 +319,6 @@ __reject_route_test()
 	local unreachable=$1; shift
 	local flags=${1:-""}; shift
 	local trap_name="reject_route"
-	local group_name="l3_drops"
 	local expected_action="trap"
 	local mz_pid
 
@@ -341,7 +337,7 @@ __reject_route_test()
 		-B $dst_ip -q &
 	mz_pid=$!
 
-	devlink_trap_exception_test $trap_name $group_name
+	devlink_trap_exception_test $trap_name
 
 	tc_check_packets_hitting "dev $h1 ingress" 101
 	check_err $? "ICMP packet was not received to h1"
@@ -370,7 +366,6 @@ __host_miss_test()
 	local desc=$1; shift
 	local dip=$1; shift
 	local trap_name="unresolved_neigh"
-	local group_name="l3_drops"
 	local expected_action="trap"
 	local mz_pid
 
@@ -405,7 +400,6 @@ __invalid_nexthop_test()
 	local subnet=$1; shift
 	local via_add=$1; shift
 	local trap_name="unresolved_neigh"
-	local group_name="l3_drops"
 	local expected_action="trap"
 	local mz_pid
 
@@ -494,7 +488,6 @@ vrf_without_routes_destroy()
 ipv4_lpm_miss_test()
 {
 	local trap_name="ipv4_lpm_miss"
-	local group_name="l3_drops"
 	local expected_action="trap"
 	local mz_pid
 
@@ -511,7 +504,7 @@ ipv4_lpm_miss_test()
 		-B 203.0.113.1 -q &
 	mz_pid=$!
 
-	devlink_trap_exception_test $trap_name $group_name
+	devlink_trap_exception_test $trap_name
 
 	log_test "LPM miss: IPv4"
 
@@ -522,7 +515,6 @@ ipv4_lpm_miss_test()
 ipv6_lpm_miss_test()
 {
 	local trap_name="ipv6_lpm_miss"
-	local group_name="l3_drops"
 	local expected_action="trap"
 	local mz_pid
 
@@ -539,7 +531,7 @@ ipv6_lpm_miss_test()
 		-B 2001:db8::1 -q &
 	mz_pid=$!
 
-	devlink_trap_exception_test $trap_name $group_name
+	devlink_trap_exception_test $trap_name
 
 	log_test "LPM miss: IPv6"
 
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh
index 039629bb92a3..8817851da7a9 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_ipip.sh
@@ -140,7 +140,6 @@ ecn_payload_get()
 ecn_decap_test()
 {
 	local trap_name="decap_error"
-	local group_name="tunnel_drops"
 	local desc=$1; shift
 	local ecn_desc=$1; shift
 	local outer_tos=$1; shift
@@ -161,7 +160,7 @@ ecn_decap_test()
 
 	mz_pid=$!
 
-	devlink_trap_exception_test $trap_name $group_name
+	devlink_trap_exception_test $trap_name
 
 	tc_check_packets "dev $swp1 egress" 101 0
 	check_err $? "Packets were not dropped"
@@ -200,7 +199,6 @@ ipip_payload_get()
 no_matching_tunnel_test()
 {
 	local trap_name="decap_error"
-	local group_name="tunnel_drops"
 	local desc=$1; shift
 	local sip=$1; shift
 	local mz_pid
@@ -218,7 +216,7 @@ no_matching_tunnel_test()
 		-A $sip -B 192.0.2.65 -t ip len=48,proto=47,p=$payload -q &
 	mz_pid=$!
 
-	devlink_trap_exception_test $trap_name $group_name
+	devlink_trap_exception_test $trap_name
 
 	tc_check_packets "dev $swp1 egress" 101 0
 	check_err $? "Packets were not dropped"
diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh
index e11a416323cf..10e0f3dbc930 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_tunnel_vxlan.sh
@@ -159,7 +159,6 @@ ecn_payload_get()
 ecn_decap_test()
 {
 	local trap_name="decap_error"
-	local group_name="tunnel_drops"
 	local desc=$1; shift
 	local ecn_desc=$1; shift
 	local outer_tos=$1; shift
@@ -177,7 +176,7 @@ ecn_decap_test()
 		-t udp sp=12345,dp=$VXPORT,tos=$outer_tos,p=$payload -q &
 	mz_pid=$!
 
-	devlink_trap_exception_test $trap_name $group_name
+	devlink_trap_exception_test $trap_name
 
 	tc_check_packets "dev $swp1 egress" 101 0
 	check_err $? "Packets were not dropped"
@@ -228,7 +227,6 @@ short_payload_get()
 corrupted_packet_test()
 {
 	local trap_name="decap_error"
-	local group_name="tunnel_drops"
 	local desc=$1; shift
 	local payload_get=$1; shift
 	local mz_pid
@@ -246,7 +244,7 @@ corrupted_packet_test()
 		-B 192.0.2.17 -t udp sp=12345,dp=$VXPORT,p=$payload -q &
 	mz_pid=$!
 
-	devlink_trap_exception_test $trap_name $group_name
+	devlink_trap_exception_test $trap_name
 
 	tc_check_packets "dev $swp1 egress" 101 0
 	check_err $? "Packets were not dropped"
@@ -297,7 +295,6 @@ mc_smac_payload_get()
 overlay_smac_is_mc_test()
 {
 	local trap_name="overlay_smac_is_mc"
-	local group_name="tunnel_drops"
 	local mz_pid
 
 	RET=0
@@ -314,7 +311,7 @@ overlay_smac_is_mc_test()
 		-B 192.0.2.17 -t udp sp=12345,dp=$VXPORT,p=$payload -q &
 	mz_pid=$!
 
-	devlink_trap_drop_test $trap_name $group_name $swp1 101
+	devlink_trap_drop_test $trap_name $swp1 101
 
 	log_test "Overlay source MAC is multicast"
 
diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh
index 7b6390aea50b..e27236109235 100644
--- a/tools/testing/selftests/net/forwarding/devlink_lib.sh
+++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh
@@ -365,7 +365,9 @@ devlink_trap_group_stats_idle_test()
 devlink_trap_exception_test()
 {
 	local trap_name=$1; shift
-	local group_name=$1; shift
+	local group_name
+
+	group_name=$(devlink_trap_group_get $trap_name)
 
 	devlink_trap_stats_idle_test $trap_name
 	check_fail $? "Trap stats idle when packets should have been trapped"
@@ -377,9 +379,11 @@ devlink_trap_exception_test()
 devlink_trap_drop_test()
 {
 	local trap_name=$1; shift
-	local group_name=$1; shift
 	local dev=$1; shift
 	local handle=$1; shift
+	local group_name
+
+	group_name=$(devlink_trap_group_get $trap_name)
 
 	# This is the common part of all the tests. It checks that stats are
 	# initially idle, then non-idle after changing the trap action and
-- 
cgit v1.2.3-59-g8ed1b


From a4f48458ca1c02eeedf914ef6a892e047d6b65cc Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Fri, 15 May 2020 15:07:30 -0500
Subject: net: ipa: don't use noirq suspend/resume callbacks

Use the suspend and resume callbacks rather than suspend_noirq and
resume_noirq.  With IPA v4.2, we use the CHANNEL_STOP command to
implement a suspend, and without interrupts enabled, that command
won't complete.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_main.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ipa/ipa_main.c b/drivers/net/ipa/ipa_main.c
index e0b1fe3c34f9..76d5108b8403 100644
--- a/drivers/net/ipa/ipa_main.c
+++ b/drivers/net/ipa/ipa_main.c
@@ -933,8 +933,8 @@ static int ipa_resume(struct device *dev)
 }
 
 static const struct dev_pm_ops ipa_pm_ops = {
-	.suspend_noirq	= ipa_suspend,
-	.resume_noirq	= ipa_resume,
+	.suspend	= ipa_suspend,
+	.resume		= ipa_resume,
 };
 
 static struct platform_driver ipa_driver = {
-- 
cgit v1.2.3-59-g8ed1b


From 195ef57f870070cb02f2f3b99a63d69e8e8f798e Mon Sep 17 00:00:00 2001
From: Alex Elder <elder@linaro.org>
Date: Fri, 15 May 2020 15:07:31 -0500
Subject: net: ipa: do not clear interrupt in gsi_channel_start()

In gsi_channel_start() there is harmless-looking comment "Clear the
channel's event ring interrupt in case it's pending".  The intent
was to avoid getting spurious interrupts when first bringing up a
channel.

However we now use channel stop/start to implement suspend and
resume, and an interrupt pending at the time we resume is actually
something we don't want to ignore.

The very first time we bring up the channel we do not expect an
interrupt to be pending, and even if it were, the effect would
simply be to schedule NAPI on that channel, which would find nothing
to do, which is not a problem.

Stop clearing any pending IEOB interrupt in gsi_channel_start().
That leaves one caller of the trivial function gsi_isr_ieob_clear().
Get rid of that function and just open-code it in gsi_isr_ieob()
instead.

This fixes a problem where suspend/resume IPA v4.2 would get stuck
when resuming after a suspend.

Signed-off-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/gsi.c | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c
index 66570609c845..d343dc94cb48 100644
--- a/drivers/net/ipa/gsi.c
+++ b/drivers/net/ipa/gsi.c
@@ -238,11 +238,6 @@ static void gsi_irq_ieob_enable(struct gsi *gsi, u32 evt_ring_id)
 	iowrite32(val, gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_MSK_OFFSET);
 }
 
-static void gsi_isr_ieob_clear(struct gsi *gsi, u32 mask)
-{
-	iowrite32(mask, gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_CLR_OFFSET);
-}
-
 static void gsi_irq_ieob_disable(struct gsi *gsi, u32 evt_ring_id)
 {
 	u32 val;
@@ -777,7 +772,6 @@ static void gsi_channel_deprogram(struct gsi_channel *channel)
 int gsi_channel_start(struct gsi *gsi, u32 channel_id)
 {
 	struct gsi_channel *channel = &gsi->channel[channel_id];
-	u32 evt_ring_id = channel->evt_ring_id;
 	int ret;
 
 	mutex_lock(&gsi->mutex);
@@ -786,9 +780,6 @@ int gsi_channel_start(struct gsi *gsi, u32 channel_id)
 
 	mutex_unlock(&gsi->mutex);
 
-	/* Clear the channel's event ring interrupt in case it's pending */
-	gsi_isr_ieob_clear(gsi, BIT(evt_ring_id));
-
 	gsi_channel_thaw(channel);
 
 	return ret;
@@ -1093,7 +1084,7 @@ static void gsi_isr_ieob(struct gsi *gsi)
 	u32 event_mask;
 
 	event_mask = ioread32(gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_OFFSET);
-	gsi_isr_ieob_clear(gsi, event_mask);
+	iowrite32(event_mask, gsi->virt + GSI_CNTXT_SRC_IEOB_IRQ_CLR_OFFSET);
 
 	while (event_mask) {
 		u32 evt_ring_id = __ffs(event_mask);
-- 
cgit v1.2.3-59-g8ed1b


From eb682677f59e809d8e06c218b565aeb9723a4ad3 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@gmail.com>
Date: Sun, 17 May 2020 12:00:33 -0600
Subject: selftests: Drop 'pref medium' in route checks

The 'pref medium' attribute was moved in iproute2 to be near the prefix
which is where it applies versus after the last nexthop. The nexthop
tests were updated to drop the string from route checking, but it crept
in again with the compat tests.

Fixes: 4dddb5be136a ("selftests: net: add new testcases for nexthop API compat mode sysctl")
Signed-off-by: David Ahern <dsahern@gmail.com>
Cc: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/fib_nexthops.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index dd0e5fec6367..50d822face36 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -965,7 +965,7 @@ ipv6_compat_mode()
 	log_test $? 0 "IPv6 compat mode on - route add notification"
 
 	# route dump should contain expanded nexthops
-	check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 122 metric 1024 pref medium nexthop via 2001:db8:91::2 dev veth1 weight 1 nexthop via 2001:db8:91::3 dev veth1 weight 1"
+	check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 122 metric 1024 nexthop via 2001:db8:91::2 dev veth1 weight 1 nexthop via 2001:db8:91::3 dev veth1 weight 1"
 	log_test $? 0 "IPv6 compat mode on - route dump"
 
 	# change in nexthop group should generate route notification
@@ -992,7 +992,7 @@ ipv6_compat_mode()
 	log_test $? 0 "IPv6 compat mode off - route add notification"
 
 	# route dump should not contain expanded nexthops
-	check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 122 metric 1024 pref medium"
+	check_route6 "2001:db8:101::1" "2001:db8:101::1 nhid 122 metric 1024"
 	log_test $? 0 "IPv6 compat mode off - route dump"
 
 	# change in nexthop group should not generate route notification
-- 
cgit v1.2.3-59-g8ed1b


From a0e17064d43e445181bc004d949a4855ea8ccf9c Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sat, 16 May 2020 10:46:17 +0200
Subject: mptcp: move common nospace-pattern to a helper

Paolo noticed that ssk_check_wmem() has same pattern, so add/use
common helper for both places.

Suggested-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 27 +++++++++++++--------------
 1 file changed, 13 insertions(+), 14 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 1f52a0fa31ed..0413454fcdaf 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -653,6 +653,15 @@ out:
 	return ret;
 }
 
+static void mptcp_nospace(struct mptcp_sock *msk, struct socket *sock)
+{
+	clear_bit(MPTCP_SEND_SPACE, &msk->flags);
+	smp_mb__after_atomic(); /* msk->flags is changed by write_space cb */
+
+	/* enables sk->write_space() callbacks */
+	set_bit(SOCK_NOSPACE, &sock->flags);
+}
+
 static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
 {
 	struct mptcp_subflow_context *subflow;
@@ -666,13 +675,8 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
 		if (!sk_stream_memory_free(ssk)) {
 			struct socket *sock = ssk->sk_socket;
 
-			if (sock) {
-				clear_bit(MPTCP_SEND_SPACE, &msk->flags);
-				smp_mb__after_atomic();
-
-				/* enables sk->write_space() callbacks */
-				set_bit(SOCK_NOSPACE, &sock->flags);
-			}
+			if (sock)
+				mptcp_nospace(msk, sock);
 
 			return NULL;
 		}
@@ -698,13 +702,8 @@ static void ssk_check_wmem(struct mptcp_sock *msk, struct sock *ssk)
 		return;
 
 	sock = READ_ONCE(ssk->sk_socket);
-
-	if (sock) {
-		clear_bit(MPTCP_SEND_SPACE, &msk->flags);
-		smp_mb__after_atomic();
-		/* set NOSPACE only after clearing SEND_SPACE flag */
-		set_bit(SOCK_NOSPACE, &sock->flags);
-	}
+	if (sock)
+		mptcp_nospace(msk, sock);
 }
 
 static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
-- 
cgit v1.2.3-59-g8ed1b


From fb529e62d3f3e85001108213dc323c35f2765575 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sat, 16 May 2020 10:46:18 +0200
Subject: mptcp: break and restart in case mptcp sndbuf is full

Its not enough to check for available tcp send space.

We also hold on to transmitted data for mptcp-level retransmits.
Right now we will send more and more data if the peer can ack data
at the tcp level fast enough, since that frees up tcp send buffer space.

But we also need to check that data was acked and reclaimed at the mptcp
level.

Therefore add needed check in mptcp_sendmsg, flush tcp data and
wait until more mptcp snd space becomes available if we are over the
limit.  Before we wait for more data, also make sure we start the
retransmit timer if we ran out of sndbuf space.

Otherwise there is a very small chance that we wait forever:

 * receiver is waiting for data
 * sender is blocked because mptcp socket buffer is full
 * at tcp level, all data was acked
 * mptcp-level snd_una was not updated, because last ack
   that acknowledged the last data packet carried an older
   MPTCP-ack.

Restarting the retransmit timer avoids this problem: if TCP
subflow is idle, data is retransmitted from the RTX queue.

New data will make the peer send a new, updated MPTCP-Ack.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 0413454fcdaf..75be8d662ac5 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -739,9 +739,23 @@ fallback:
 
 	mptcp_clean_una(sk);
 
+wait_for_sndbuf:
 	__mptcp_flush_join_list(msk);
 	ssk = mptcp_subflow_get_send(msk);
 	while (!sk_stream_memory_free(sk) || !ssk) {
+		if (ssk) {
+			/* make sure retransmit timer is
+			 * running before we wait for memory.
+			 *
+			 * The retransmit timer might be needed
+			 * to make the peer send an up-to-date
+			 * MPTCP Ack.
+			 */
+			mptcp_set_timeout(sk, ssk);
+			if (!mptcp_timer_pending(sk))
+				mptcp_reset_timer(sk);
+		}
+
 		ret = sk_stream_wait_memory(sk, &timeo);
 		if (ret)
 			goto out;
@@ -776,6 +790,28 @@ fallback:
 		}
 
 		copied += ret;
+
+		/* memory is charged to mptcp level socket as well, i.e.
+		 * if msg is very large, mptcp socket may run out of buffer
+		 * space.  mptcp_clean_una() will release data that has
+		 * been acked at mptcp level in the mean time, so there is
+		 * a good chance we can continue sending data right away.
+		 */
+		if (unlikely(!sk_stream_memory_free(sk))) {
+			tcp_push(ssk, msg->msg_flags, mss_now,
+				 tcp_sk(ssk)->nonagle, size_goal);
+			mptcp_clean_una(sk);
+			if (!sk_stream_memory_free(sk)) {
+				/* can't send more for now, need to wait for
+				 * MPTCP-level ACKs from peer.
+				 *
+				 * Wakeup will happen via mptcp_clean_una().
+				 */
+				mptcp_set_timeout(sk, ssk);
+				release_sock(ssk);
+				goto wait_for_sndbuf;
+			}
+		}
 	}
 
 	mptcp_set_timeout(sk, ssk);
-- 
cgit v1.2.3-59-g8ed1b


From 72511aab95c94d7c0f03d0b7db5df47fdca059f6 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sat, 16 May 2020 10:46:19 +0200
Subject: mptcp: avoid blocking in tcp_sendpages

The transmit loop continues to xmit new data until an error is returned
or all data was transmitted.

For the blocking i/o case, this means that tcp_sendpages() may block on
the subflow until more space becomes available, i.e. we end up sleeping
with the mptcp socket lock held.

Instead we should check if a different subflow is ready to be used.

This restarts the subflow sk lookup when the tx operation succeeded
and the tcp subflow can't accept more data or if tcp_sendpages
indicates -EAGAIN on a blocking mptcp socket.

In that case we also need to set the NOSPACE bit to make sure we get
notified once memory becomes available.

In case all subflows are busy, the existing logic will wait until a
subflow is ready, releasing the mptcp socket lock while doing so.

The mptcp worker already sets DONTWAIT, so no need to make changes there.

v2:
 * set NOSPACE bit
 * add a comment to clarify that mptcp-sk sndbuf limits need to
   be checked as well.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 35 ++++++++++++++++++++++++++++++++---
 1 file changed, 32 insertions(+), 3 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 75be8d662ac5..e97357066b21 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -590,7 +590,7 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
 	 * access the skb after the sendpages call
 	 */
 	ret = do_tcp_sendpages(ssk, page, offset, psize,
-			       msg->msg_flags | MSG_SENDPAGE_NOTLAST);
+			       msg->msg_flags | MSG_SENDPAGE_NOTLAST | MSG_DONTWAIT);
 	if (ret <= 0)
 		return ret;
 
@@ -713,6 +713,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 	struct socket *ssock;
 	size_t copied = 0;
 	struct sock *ssk;
+	bool tx_ok;
 	long timeo;
 
 	if (msg->msg_flags & ~(MSG_MORE | MSG_DONTWAIT | MSG_NOSIGNAL))
@@ -737,6 +738,7 @@ fallback:
 		return ret >= 0 ? ret + copied : (copied ? copied : ret);
 	}
 
+restart:
 	mptcp_clean_una(sk);
 
 wait_for_sndbuf:
@@ -772,11 +774,18 @@ wait_for_sndbuf:
 	pr_debug("conn_list->subflow=%p", ssk);
 
 	lock_sock(ssk);
-	while (msg_data_left(msg)) {
+	tx_ok = msg_data_left(msg);
+	while (tx_ok) {
 		ret = mptcp_sendmsg_frag(sk, ssk, msg, NULL, &timeo, &mss_now,
 					 &size_goal);
-		if (ret < 0)
+		if (ret < 0) {
+			if (ret == -EAGAIN && timeo > 0) {
+				mptcp_set_timeout(sk, ssk);
+				release_sock(ssk);
+				goto restart;
+			}
 			break;
+		}
 		if (ret == 0 && unlikely(__mptcp_needs_tcp_fallback(msk))) {
 			/* Can happen for passive sockets:
 			 * 3WHS negotiated MPTCP, but first packet after is
@@ -791,11 +800,31 @@ wait_for_sndbuf:
 
 		copied += ret;
 
+		tx_ok = msg_data_left(msg);
+		if (!tx_ok)
+			break;
+
+		if (!sk_stream_memory_free(ssk)) {
+			set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
+			tcp_push(ssk, msg->msg_flags, mss_now,
+				 tcp_sk(ssk)->nonagle, size_goal);
+			mptcp_set_timeout(sk, ssk);
+			release_sock(ssk);
+			goto restart;
+		}
+
 		/* memory is charged to mptcp level socket as well, i.e.
 		 * if msg is very large, mptcp socket may run out of buffer
 		 * space.  mptcp_clean_una() will release data that has
 		 * been acked at mptcp level in the mean time, so there is
 		 * a good chance we can continue sending data right away.
+		 *
+		 * Normally, when the tcp subflow can accept more data, then
+		 * so can the MPTCP socket.  However, we need to cope with
+		 * peers that might lag behind in their MPTCP-level
+		 * acknowledgements, i.e.  data might have been acked at
+		 * tcp level only.  So, we must also check the MPTCP socket
+		 * limits before we send more data.
 		 */
 		if (unlikely(!sk_stream_memory_free(sk))) {
 			tcp_push(ssk, msg->msg_flags, mss_now,
-- 
cgit v1.2.3-59-g8ed1b


From 149f7c71e2c710a8ced836421a631953c9f84aa3 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sat, 16 May 2020 10:46:20 +0200
Subject: mptcp: fill skb extension cache outside of mptcp_sendmsg_frag

The mptcp_sendmsg_frag helper contains a loop that will wait on the
subflow sk.

It seems preferrable to only wait in mptcp_sendmsg() when blocking io is
requested.  mptcp_sendmsg already has such a wait loop that is used when
no subflow socket is available for transmission.

This is a preparation patch that makes sure we call
mptcp_sendmsg_frag only if a skb extension has been allocated.

Moreover, such allocation currently uses GFP_ATOMIC while it
could use sleeping allocation instead.

Followup patches will remove the wait loop from mptcp_sendmsg_frag()
and will allow to do a sleeping allocation for the extension.

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index e97357066b21..1bdfbca1c23a 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -669,6 +669,9 @@ static struct sock *mptcp_subflow_get_send(struct mptcp_sock *msk)
 
 	sock_owned_by_me((const struct sock *)msk);
 
+	if (!mptcp_ext_cache_refill(msk))
+		return NULL;
+
 	mptcp_for_each_subflow(msk, subflow) {
 		struct sock *ssk = mptcp_subflow_tcp_sock(subflow);
 
@@ -804,7 +807,8 @@ wait_for_sndbuf:
 		if (!tx_ok)
 			break;
 
-		if (!sk_stream_memory_free(ssk)) {
+		if (!sk_stream_memory_free(ssk) ||
+		    !mptcp_ext_cache_refill(msk)) {
 			set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 			tcp_push(ssk, msg->msg_flags, mss_now,
 				 tcp_sk(ssk)->nonagle, size_goal);
@@ -1158,7 +1162,7 @@ static void mptcp_worker(struct work_struct *work)
 {
 	struct mptcp_sock *msk = container_of(work, struct mptcp_sock, work);
 	struct sock *ssk, *sk = &msk->sk.icsk_inet.sk;
-	int orig_len, orig_offset, ret, mss_now = 0, size_goal = 0;
+	int orig_len, orig_offset, mss_now = 0, size_goal = 0;
 	struct mptcp_data_frag *dfrag;
 	u64 orig_write_seq;
 	size_t copied = 0;
@@ -1180,6 +1184,9 @@ static void mptcp_worker(struct work_struct *work)
 	if (!dfrag)
 		goto unlock;
 
+	if (!mptcp_ext_cache_refill(msk))
+		goto reset_unlock;
+
 	ssk = mptcp_subflow_get_retrans(msk);
 	if (!ssk)
 		goto reset_unlock;
@@ -1191,8 +1198,8 @@ static void mptcp_worker(struct work_struct *work)
 	orig_offset = dfrag->offset;
 	orig_write_seq = dfrag->data_seq;
 	while (dfrag->data_len > 0) {
-		ret = mptcp_sendmsg_frag(sk, ssk, &msg, dfrag, &timeo, &mss_now,
-					 &size_goal);
+		int ret = mptcp_sendmsg_frag(sk, ssk, &msg, dfrag, &timeo,
+					     &mss_now, &size_goal);
 		if (ret < 0)
 			break;
 
@@ -1200,6 +1207,9 @@ static void mptcp_worker(struct work_struct *work)
 		copied += ret;
 		dfrag->data_len -= ret;
 		dfrag->offset += ret;
+
+		if (!mptcp_ext_cache_refill(msk))
+			break;
 	}
 	if (copied)
 		tcp_push(ssk, msg.msg_flags, mss_now, tcp_sk(ssk)->nonagle,
-- 
cgit v1.2.3-59-g8ed1b


From 17091708d1e503383f20934631305ccb375b0eb1 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sat, 16 May 2020 10:46:21 +0200
Subject: mptcp: fill skb page frag cache outside of mptcp_sendmsg_frag

The mptcp_sendmsg_frag helper contains a loop that will wait on the
subflow sk.

It seems preferrable to only wait in mptcp_sendmsg() when blocking io is
requested.  mptcp_sendmsg already has such a wait loop that is used when
no subflow socket is available for transmission.

This is another preparation patch that makes sure we call
mptcp_sendmsg_frag only if the page frag cache has been refilled.

Followup patch will remove the wait loop from mptcp_sendmsg_frag().

The retransmit worker doesn't need to do this refill as it won't
transmit new mptcp-level data.

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index 1bdfbca1c23a..a11e51222e59 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -713,6 +713,7 @@ static int mptcp_sendmsg(struct sock *sk, struct msghdr *msg, size_t len)
 {
 	int mss_now = 0, size_goal = 0, ret = 0;
 	struct mptcp_sock *msk = mptcp_sk(sk);
+	struct page_frag *pfrag;
 	struct socket *ssock;
 	size_t copied = 0;
 	struct sock *ssk;
@@ -741,13 +742,16 @@ fallback:
 		return ret >= 0 ? ret + copied : (copied ? copied : ret);
 	}
 
+	pfrag = sk_page_frag(sk);
 restart:
 	mptcp_clean_una(sk);
 
 wait_for_sndbuf:
 	__mptcp_flush_join_list(msk);
 	ssk = mptcp_subflow_get_send(msk);
-	while (!sk_stream_memory_free(sk) || !ssk) {
+	while (!sk_stream_memory_free(sk) ||
+	       !ssk ||
+	       !mptcp_page_frag_refill(ssk, pfrag)) {
 		if (ssk) {
 			/* make sure retransmit timer is
 			 * running before we wait for memory.
@@ -808,6 +812,7 @@ wait_for_sndbuf:
 			break;
 
 		if (!sk_stream_memory_free(ssk) ||
+		    !mptcp_page_frag_refill(ssk, pfrag) ||
 		    !mptcp_ext_cache_refill(msk)) {
 			set_bit(SOCK_NOSPACE, &sk->sk_socket->flags);
 			tcp_push(ssk, msg->msg_flags, mss_now,
-- 
cgit v1.2.3-59-g8ed1b


From 5c8264435d4f6a056ac926989a827aba1961e3c8 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sat, 16 May 2020 10:46:22 +0200
Subject: mptcp: remove inner wait loop from mptcp_sendmsg_frag

previous patches made sure we only call into this function
when these prerequisites are met, so no need to wait on the
subflow socket anymore.

Closes: https://github.com/multipath-tcp/mptcp_net-next/issues/7
Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index a11e51222e59..bc950cf818f7 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -510,20 +510,6 @@ static int mptcp_sendmsg_frag(struct sock *sk, struct sock *ssk,
 	 * fooled into a warning if we don't init here
 	 */
 	pfrag = sk_page_frag(sk);
-	while ((!retransmission && !mptcp_page_frag_refill(ssk, pfrag)) ||
-	       !mptcp_ext_cache_refill(msk)) {
-		ret = sk_stream_wait_memory(ssk, timeo);
-		if (ret)
-			return ret;
-
-		/* if sk_stream_wait_memory() sleeps snd_una can change
-		 * significantly, refresh the rtx queue
-		 */
-		mptcp_clean_una(sk);
-
-		if (unlikely(__mptcp_needs_tcp_fallback(msk)))
-			return 0;
-	}
 	if (!retransmission) {
 		write_seq = &msk->write_seq;
 		page = pfrag->page;
-- 
cgit v1.2.3-59-g8ed1b


From 4930f4831b1547b52c5968e9307fe3d840d7fba0 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Sat, 16 May 2020 10:46:23 +0200
Subject: net: allow __skb_ext_alloc to sleep

mptcp calls this from the transmit side, from process context.
Allow a sleeping allocation instead of unconditional GFP_ATOMIC.

Acked-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/skbuff.h | 2 +-
 net/core/skbuff.c      | 8 +++++---
 net/mptcp/protocol.c   | 4 +++-
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 3000c526f552..531843952809 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -4165,7 +4165,7 @@ struct skb_ext {
 	char data[] __aligned(8);
 };
 
-struct skb_ext *__skb_ext_alloc(void);
+struct skb_ext *__skb_ext_alloc(gfp_t flags);
 void *__skb_ext_set(struct sk_buff *skb, enum skb_ext_id id,
 		    struct skb_ext *ext);
 void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id);
diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 1bf0c3d278e7..35a133c6d13b 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -6087,13 +6087,15 @@ static void *skb_ext_get_ptr(struct skb_ext *ext, enum skb_ext_id id)
 /**
  * __skb_ext_alloc - allocate a new skb extensions storage
  *
+ * @flags: See kmalloc().
+ *
  * Returns the newly allocated pointer. The pointer can later attached to a
  * skb via __skb_ext_set().
  * Note: caller must handle the skb_ext as an opaque data.
  */
-struct skb_ext *__skb_ext_alloc(void)
+struct skb_ext *__skb_ext_alloc(gfp_t flags)
 {
-	struct skb_ext *new = kmem_cache_alloc(skbuff_ext_cache, GFP_ATOMIC);
+	struct skb_ext *new = kmem_cache_alloc(skbuff_ext_cache, flags);
 
 	if (new) {
 		memset(new->offset, 0, sizeof(new->offset));
@@ -6188,7 +6190,7 @@ void *skb_ext_add(struct sk_buff *skb, enum skb_ext_id id)
 	} else {
 		newoff = SKB_EXT_CHUNKSIZEOF(*new);
 
-		new = __skb_ext_alloc();
+		new = __skb_ext_alloc(GFP_ATOMIC);
 		if (!new)
 			return NULL;
 	}
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index bc950cf818f7..e3a628bea2b8 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -367,8 +367,10 @@ static void mptcp_stop_timer(struct sock *sk)
 
 static bool mptcp_ext_cache_refill(struct mptcp_sock *msk)
 {
+	const struct sock *sk = (const struct sock *)msk;
+
 	if (!msk->cached_ext)
-		msk->cached_ext = __skb_ext_alloc();
+		msk->cached_ext = __skb_ext_alloc(sk->sk_allocation);
 
 	return !!msk->cached_ext;
 }
-- 
cgit v1.2.3-59-g8ed1b


From dbfe7d74376e187f3c6eaff822e85176bc2cd06e Mon Sep 17 00:00:00 2001
From: John Hubbard <jhubbard@nvidia.com>
Date: Sat, 16 May 2020 18:23:36 -0700
Subject: rds: convert get_user_pages() --> pin_user_pages()

This code was using get_user_pages_fast(), in a "Case 2" scenario
(DMA/RDMA), using the categorization from [1]. That means that it's
time to convert the get_user_pages_fast() + put_page() calls to
pin_user_pages_fast() + unpin_user_pages() calls.

There is some helpful background in [2]: basically, this is a small
part of fixing a long-standing disconnect between pinning pages, and
file systems' use of those pages.

[1] Documentation/core-api/pin_user_pages.rst

[2] "Explicit pinning of user-space pages":
    https://lwn.net/Articles/807108/

Cc: David S. Miller <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: netdev@vger.kernel.org
Cc: linux-rdma@vger.kernel.org
Cc: rds-devel@oss.oracle.com
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/info.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/net/rds/info.c b/net/rds/info.c
index 03f6fd56d237..e1d63563e81c 100644
--- a/net/rds/info.c
+++ b/net/rds/info.c
@@ -162,7 +162,6 @@ int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval,
 	struct rds_info_lengths lens;
 	unsigned long nr_pages = 0;
 	unsigned long start;
-	unsigned long i;
 	rds_info_func func;
 	struct page **pages = NULL;
 	int ret;
@@ -193,7 +192,7 @@ int rds_info_getsockopt(struct socket *sock, int optname, char __user *optval,
 		ret = -ENOMEM;
 		goto out;
 	}
-	ret = get_user_pages_fast(start, nr_pages, FOLL_WRITE, pages);
+	ret = pin_user_pages_fast(start, nr_pages, FOLL_WRITE, pages);
 	if (ret != nr_pages) {
 		if (ret > 0)
 			nr_pages = ret;
@@ -235,8 +234,7 @@ call_func:
 		ret = -EFAULT;
 
 out:
-	for (i = 0; pages && i < nr_pages; i++)
-		put_page(pages[i]);
+	unpin_user_pages(pages, nr_pages);
 	kfree(pages);
 
 	return ret;
-- 
cgit v1.2.3-59-g8ed1b


From 631512f868a4a49a48bda753752d085621c68112 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Wed, 13 May 2020 12:15:56 -0500
Subject: Bluetooth: L2CAP: Replace zero-length array with flexible-array

The current codebase makes use of the zero-length array language
extension to the C90 standard, but the preferred mechanism to declare
variable-length types such as these ones is a flexible array member[1][2],
introduced in C99:

struct foo {
        int stuff;
        struct boo array[];
};

By making use of the mechanism above, we will get a compiler warning
in case the flexible array does not occur last in the structure, which
will help us prevent some kind of undefined behavior bugs from being
inadvertently introduced[3] to the codebase from now on.

Also, notice that, dynamic memory allocations won't be affected by
this change:

"Flexible array members have incomplete type, and so the sizeof operator
may not be applied. As a quirk of the original implementation of
zero-length arrays, sizeof evaluates to zero."[1]

sizeof(flexible-array-member) triggers a warning because flexible array
members have incomplete type[1]. There are some instances of code in
which the sizeof operator is being incorrectly/erroneously applied to
zero-length arrays and the result is zero. Such instances may be hiding
some bugs. So, this work (flexible-array member conversions) will also
help to get completely rid of those sorts of issues.

This issue was found with the help of Coccinelle.

[1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html
[2] https://github.com/KSPP/linux/issues/21
[3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour")

Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/l2cap.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/include/net/bluetooth/l2cap.h b/include/net/bluetooth/l2cap.h
index dada14d0622c..8f1e6a7a2df8 100644
--- a/include/net/bluetooth/l2cap.h
+++ b/include/net/bluetooth/l2cap.h
@@ -499,7 +499,7 @@ struct l2cap_ecred_conn_req {
 	__le16 mtu;
 	__le16 mps;
 	__le16 credits;
-	__le16 scid[0];
+	__le16 scid[];
 } __packed;
 
 struct l2cap_ecred_conn_rsp {
@@ -507,13 +507,13 @@ struct l2cap_ecred_conn_rsp {
 	__le16 mps;
 	__le16 credits;
 	__le16 result;
-	__le16 dcid[0];
+	__le16 dcid[];
 };
 
 struct l2cap_ecred_reconf_req {
 	__le16 mtu;
 	__le16 mps;
-	__le16 scid[0];
+	__le16 scid[];
 } __packed;
 
 #define L2CAP_RECONF_SUCCESS		0x0000
-- 
cgit v1.2.3-59-g8ed1b


From 49c06c9eb14ba61725c1c82e5107f4e4bd6c1886 Mon Sep 17 00:00:00 2001
From: Łukasz Rymanowski <lukasz.rymanowski@codecoup.pl>
Date: Wed, 13 May 2020 10:18:53 +0200
Subject: Bluetooth: Fix for GAP/SEC/SEM/BI-10-C
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Security Mode 1 level 4, force us to use have key size 16 octects long.
This patch adds check for that.

This is required for the qualification test GAP/SEC/SEM/BI-10-C

Logs from test when ATT is configured with sec level BT_SECURITY_FIPS

< ACL Data TX: Handle 3585 flags 0x00 dlen 11                                                                       #28 [hci0] 3.785965
      SMP: Pairing Request (0x01) len 6
        IO capability: DisplayYesNo (0x01)
        OOB data: Authentication data not present (0x00)
        Authentication requirement: Bonding, MITM, SC, No Keypresses (0x0d)
        Max encryption key size: 16
        Initiator key distribution: EncKey Sign (0x05)
        Responder key distribution: EncKey IdKey Sign (0x07)
> ACL Data RX: Handle 3585 flags 0x02 dlen 11                                                                       #35 [hci0] 3.883020
      SMP: Pairing Response (0x02) len 6
        IO capability: DisplayYesNo (0x01)
        OOB data: Authentication data not present (0x00)
        Authentication requirement: Bonding, MITM, SC, No Keypresses (0x0d)
        Max encryption key size: 7
        Initiator key distribution: EncKey Sign (0x05)
        Responder key distribution: EncKey IdKey Sign (0x07)
< ACL Data TX: Handle 3585 flags 0x00 dlen 6                                                                        #36 [hci0] 3.883136
      SMP: Pairing Failed (0x05) len 1
        Reason: Encryption key size (0x06)

Signed-off-by: Łukasz Rymanowski <lukasz.rymanowski@codecoup.pl>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/smp.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/bluetooth/smp.c b/net/bluetooth/smp.c
index 5510017cf9ff..6fd9ddb2d85c 100644
--- a/net/bluetooth/smp.c
+++ b/net/bluetooth/smp.c
@@ -730,6 +730,10 @@ static u8 check_enc_key_size(struct l2cap_conn *conn, __u8 max_key_size)
 	struct hci_dev *hdev = conn->hcon->hdev;
 	struct smp_chan *smp = chan->data;
 
+	if (conn->hcon->pending_sec_level == BT_SECURITY_FIPS &&
+	    max_key_size != SMP_MAX_ENC_KEY_SIZE)
+		return SMP_ENC_KEY_SIZE;
+
 	if (max_key_size > hdev->le_max_key_size ||
 	    max_key_size < SMP_MIN_ENC_KEY_SIZE)
 		return SMP_ENC_KEY_SIZE;
-- 
cgit v1.2.3-59-g8ed1b


From 56b5453a86203a44726f523b4133c1feca49ce7c Mon Sep 17 00:00:00 2001
From: Hsin-Yu Chao <hychao@chromium.org>
Date: Fri, 15 May 2020 17:27:04 +0800
Subject: Bluetooth: Add SCO fallback for invalid LMP parameters error

Bluetooth PTS test case HFP/AG/ACC/BI-12-I accepts SCO connection
with invalid parameter at the first SCO request expecting AG to
attempt another SCO request with the use of "safe settings" for
given codec, base on section 5.7.1.2 of HFP 1.7 specification.

This patch addresses it by adding "Invalid LMP Parameters" (0x1e)
to the SCO fallback case. Verified with below log:

< HCI Command: Setup Synchronous Connection (0x01|0x0028) plen 17
        Handle: 256
        Transmit bandwidth: 8000
        Receive bandwidth: 8000
        Max latency: 13
        Setting: 0x0003
          Input Coding: Linear
          Input Data Format: 1's complement
          Input Sample Size: 8-bit
          # of bits padding at MSB: 0
          Air Coding Format: Transparent Data
        Retransmission effort: Optimize for link quality (0x02)
        Packet type: 0x0380
          3-EV3 may not be used
          2-EV5 may not be used
          3-EV5 may not be used
> HCI Event: Command Status (0x0f) plen 4
      Setup Synchronous Connection (0x01|0x0028) ncmd 1
        Status: Success (0x00)
> HCI Event: Number of Completed Packets (0x13) plen 5
        Num handles: 1
        Handle: 256
        Count: 1
> HCI Event: Max Slots Change (0x1b) plen 3
        Handle: 256
        Max slots: 1
> HCI Event: Synchronous Connect Complete (0x2c) plen 17
        Status: Invalid LMP Parameters / Invalid LL Parameters (0x1e)
        Handle: 0
        Address: 00:1B:DC:F2:21:59 (OUI 00-1B-DC)
        Link type: eSCO (0x02)
        Transmission interval: 0x00
        Retransmission window: 0x02
        RX packet length: 0
        TX packet length: 0
        Air mode: Transparent (0x03)
< HCI Command: Setup Synchronous Connection (0x01|0x0028) plen 17
        Handle: 256
        Transmit bandwidth: 8000
        Receive bandwidth: 8000
        Max latency: 8
        Setting: 0x0003
          Input Coding: Linear
          Input Data Format: 1's complement
          Input Sample Size: 8-bit
          # of bits padding at MSB: 0
          Air Coding Format: Transparent Data
        Retransmission effort: Optimize for link quality (0x02)
        Packet type: 0x03c8
          EV3 may be used
          2-EV3 may not be used
          3-EV3 may not be used
          2-EV5 may not be used
          3-EV5 may not be used
> HCI Event: Command Status (0x0f) plen 4
      Setup Synchronous Connection (0x01|0x0028) ncmd 1
        Status: Success (0x00)
> HCI Event: Max Slots Change (0x1b) plen 3
        Handle: 256
        Max slots: 5
> HCI Event: Max Slots Change (0x1b) plen 3
        Handle: 256
        Max slots: 1
> HCI Event: Synchronous Connect Complete (0x2c) plen 17
        Status: Success (0x00)
        Handle: 257
        Address: 00:1B:DC:F2:21:59 (OUI 00-1B-DC)
        Link type: eSCO (0x02)
        Transmission interval: 0x06
        Retransmission window: 0x04
        RX packet length: 30
        TX packet length: 30
        Air mode: Transparent (0x03)

Signed-off-by: Hsin-Yu Chao <hychao@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_event.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index 73aabca0064b..f024b3d57a1c 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -4337,6 +4337,7 @@ static void hci_sync_conn_complete_evt(struct hci_dev *hdev,
 	case 0x11:	/* Unsupported Feature or Parameter Value */
 	case 0x1c:	/* SCO interval rejected */
 	case 0x1a:	/* Unsupported Remote Feature */
+	case 0x1e:	/* Invalid LMP Parameters */
 	case 0x1f:	/* Unspecified error */
 	case 0x20:	/* Unsupported LMP Parameter value */
 		if (conn->out) {
-- 
cgit v1.2.3-59-g8ed1b


From a228f7a410290d836f3a9f9b1ed5aef1aab25cc7 Mon Sep 17 00:00:00 2001
From: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
Date: Thu, 14 May 2020 13:14:04 -0700
Subject: Bluetooth: hci_qca: Enable WBS support for wcn3991

WCN3991 supports transparent WBS (host encoded mSBC). Add a flag to the
device match data to show WBS is supported.

This requires the matching firmware for WCN3991 in linux-firmware:
        1a8b0dc00f77 (qca: Enable transparent WBS for WCN3991)

Signed-off-by: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
Reviewed-by: Matthias Kaehlcke <mka@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/hci_qca.c | 23 +++++++++++++++++------
 1 file changed, 17 insertions(+), 6 deletions(-)

diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
index b3fd07a6f812..26efe822f6e5 100644
--- a/drivers/bluetooth/hci_qca.c
+++ b/drivers/bluetooth/hci_qca.c
@@ -75,6 +75,9 @@ enum qca_flags {
 	QCA_HW_ERROR_EVENT
 };
 
+enum qca_capabilities {
+	QCA_CAP_WIDEBAND_SPEECH = BIT(0),
+};
 
 /* HCI_IBS transmit side sleep protocol states */
 enum tx_ibs_states {
@@ -187,10 +190,11 @@ struct qca_vreg {
 	unsigned int load_uA;
 };
 
-struct qca_vreg_data {
+struct qca_device_data {
 	enum qca_btsoc_type soc_type;
 	struct qca_vreg *vregs;
 	size_t num_vregs;
+	uint32_t capabilities;
 };
 
 /*
@@ -1691,7 +1695,7 @@ static const struct hci_uart_proto qca_proto = {
 	.dequeue	= qca_dequeue,
 };
 
-static const struct qca_vreg_data qca_soc_data_wcn3990 = {
+static const struct qca_device_data qca_soc_data_wcn3990 = {
 	.soc_type = QCA_WCN3990,
 	.vregs = (struct qca_vreg []) {
 		{ "vddio", 15000  },
@@ -1702,7 +1706,7 @@ static const struct qca_vreg_data qca_soc_data_wcn3990 = {
 	.num_vregs = 4,
 };
 
-static const struct qca_vreg_data qca_soc_data_wcn3991 = {
+static const struct qca_device_data qca_soc_data_wcn3991 = {
 	.soc_type = QCA_WCN3991,
 	.vregs = (struct qca_vreg []) {
 		{ "vddio", 15000  },
@@ -1711,9 +1715,10 @@ static const struct qca_vreg_data qca_soc_data_wcn3991 = {
 		{ "vddch0", 450000 },
 	},
 	.num_vregs = 4,
+	.capabilities = QCA_CAP_WIDEBAND_SPEECH,
 };
 
-static const struct qca_vreg_data qca_soc_data_wcn3998 = {
+static const struct qca_device_data qca_soc_data_wcn3998 = {
 	.soc_type = QCA_WCN3998,
 	.vregs = (struct qca_vreg []) {
 		{ "vddio", 10000  },
@@ -1724,7 +1729,7 @@ static const struct qca_vreg_data qca_soc_data_wcn3998 = {
 	.num_vregs = 4,
 };
 
-static const struct qca_vreg_data qca_soc_data_qca6390 = {
+static const struct qca_device_data qca_soc_data_qca6390 = {
 	.soc_type = QCA_QCA6390,
 	.num_vregs = 0,
 };
@@ -1860,7 +1865,7 @@ static int qca_serdev_probe(struct serdev_device *serdev)
 {
 	struct qca_serdev *qcadev;
 	struct hci_dev *hdev;
-	const struct qca_vreg_data *data;
+	const struct qca_device_data *data;
 	int err;
 	bool power_ctrl_enabled = true;
 
@@ -1948,6 +1953,12 @@ static int qca_serdev_probe(struct serdev_device *serdev)
 		hdev->shutdown = qca_power_off;
 	}
 
+	/* Wideband speech support must be set per driver since it can't be
+	 * queried via hci.
+	 */
+	if (data && (data->capabilities & QCA_CAP_WIDEBAND_SPEECH))
+		set_bit(HCI_QUIRK_WIDEBAND_SPEECH_SUPPORTED, &hdev->quirks);
+
 	return 0;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From beb12813bc75d4a23de43b85ad1c7cb28d27631e Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 13 May 2020 12:39:51 +0300
Subject: rtlwifi: Fix a double free in _rtl_usb_tx_urb_setup()

Seven years ago we tried to fix a leak but actually introduced a double
free instead.  It was an understandable mistake because the code was a
bit confusing and the free was done in the wrong place.  The "skb"
pointer is freed in both _rtl_usb_tx_urb_setup() and _rtl_usb_transmit().
The free belongs _rtl_usb_transmit() instead of _rtl_usb_tx_urb_setup()
and I've cleaned the code up a bit to hopefully make it more clear.

Fixes: 36ef0b473fbf ("rtlwifi: usb: add missing freeing of skbuff")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200513093951.GD347693@mwanda
---
 drivers/net/wireless/realtek/rtlwifi/usb.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/usb.c b/drivers/net/wireless/realtek/rtlwifi/usb.c
index 348b0072cdd6..c66c6dc00378 100644
--- a/drivers/net/wireless/realtek/rtlwifi/usb.c
+++ b/drivers/net/wireless/realtek/rtlwifi/usb.c
@@ -881,10 +881,8 @@ static struct urb *_rtl_usb_tx_urb_setup(struct ieee80211_hw *hw,
 
 	WARN_ON(NULL == skb);
 	_urb = usb_alloc_urb(0, GFP_ATOMIC);
-	if (!_urb) {
-		kfree_skb(skb);
+	if (!_urb)
 		return NULL;
-	}
 	_rtl_install_trx_info(rtlusb, skb, ep_num);
 	usb_fill_bulk_urb(_urb, rtlusb->udev, usb_sndbulkpipe(rtlusb->udev,
 			  ep_num), skb->data, skb->len, _rtl_tx_complete, skb);
@@ -898,7 +896,6 @@ static void _rtl_usb_transmit(struct ieee80211_hw *hw, struct sk_buff *skb,
 	struct rtl_usb *rtlusb = rtl_usbdev(rtl_usbpriv(hw));
 	u32 ep_num;
 	struct urb *_urb = NULL;
-	struct sk_buff *_skb = NULL;
 
 	WARN_ON(NULL == rtlusb->usb_tx_aggregate_hdl);
 	if (unlikely(IS_USB_STOP(rtlusb))) {
@@ -907,8 +904,7 @@ static void _rtl_usb_transmit(struct ieee80211_hw *hw, struct sk_buff *skb,
 		return;
 	}
 	ep_num = rtlusb->ep_map.ep_mapping[qnum];
-	_skb = skb;
-	_urb = _rtl_usb_tx_urb_setup(hw, _skb, ep_num);
+	_urb = _rtl_usb_tx_urb_setup(hw, skb, ep_num);
 	if (unlikely(!_urb)) {
 		pr_err("Can't allocate urb. Drop skb!\n");
 		kfree_skb(skb);
-- 
cgit v1.2.3-59-g8ed1b


From 449be86670f50284069cb0ddc4fe98f064ac6d87 Mon Sep 17 00:00:00 2001
From: Zong-Zhe Yang <kevin_yang@realtek.com>
Date: Fri, 15 May 2020 13:23:21 +0800
Subject: rtw88: extract: export symbols used in chip functionalities

In the current design, various chip functions and tables
are built into rtw88 core. That causes kernel to load its
functionalities even if a chip isn't currently used. We
plan to make each chip's functionalities a separate
kernel module to reduce rtw88 core. And kernel will be
able to load the necessary.

Before extracting chip functionalities, we export symbols
inside rtw88 core which will be used in chip modules.

Signed-off-by: Zong-Zhe Yang <kevin_yang@realtek.com>
Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200515052327.31874-2-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/bf.c    |  7 +++++++
 drivers/net/wireless/realtek/rtw88/coex.c  |  3 +++
 drivers/net/wireless/realtek/rtw88/efuse.c |  1 +
 drivers/net/wireless/realtek/rtw88/fw.c    |  1 +
 drivers/net/wireless/realtek/rtw88/mac.c   |  1 +
 drivers/net/wireless/realtek/rtw88/phy.c   | 26 ++++++++++++++++++++++++++
 drivers/net/wireless/realtek/rtw88/rx.c    |  1 +
 drivers/net/wireless/realtek/rtw88/util.c  |  2 ++
 8 files changed, 42 insertions(+)

diff --git a/drivers/net/wireless/realtek/rtw88/bf.c b/drivers/net/wireless/realtek/rtw88/bf.c
index a5912da327e2..8a070d5d9174 100644
--- a/drivers/net/wireless/realtek/rtw88/bf.c
+++ b/drivers/net/wireless/realtek/rtw88/bf.c
@@ -220,6 +220,7 @@ void rtw_bf_enable_bfee_su(struct rtw_dev *rtwdev, struct rtw_vif *vif,
 	/* ndp rx standby timer */
 	rtw_write8(rtwdev, REG_SND_PTCL_CTRL + 3, RTW_NDP_RX_STANDBY_TIME);
 }
+EXPORT_SYMBOL(rtw_bf_enable_bfee_su);
 
 /* nc index: 1 2T2R 0 1T1R
  * nr index: 1 use Nsts 0 use reg setting
@@ -263,6 +264,7 @@ void rtw_bf_enable_bfee_mu(struct rtw_dev *rtwdev, struct rtw_vif *vif,
 	/* accept NDPA and BF report poll */
 	rtw_write16_set(rtwdev, REG_RXFLTMAP1, BIT_RXFLTMAP1_BF);
 }
+EXPORT_SYMBOL(rtw_bf_enable_bfee_mu);
 
 void rtw_bf_remove_bfee_su(struct rtw_dev *rtwdev,
 			   struct rtw_bfee *bfee)
@@ -288,6 +290,7 @@ void rtw_bf_remove_bfee_su(struct rtw_dev *rtwdev,
 	clear_bit(bfee->su_reg_index, bfinfo->bfer_su_reg_maping);
 	bfee->su_reg_index = 0xFF;
 }
+EXPORT_SYMBOL(rtw_bf_remove_bfee_su);
 
 void rtw_bf_remove_bfee_mu(struct rtw_dev *rtwdev,
 			   struct rtw_bfee *bfee)
@@ -301,6 +304,7 @@ void rtw_bf_remove_bfee_mu(struct rtw_dev *rtwdev,
 	if (bfinfo->bfer_su_cnt == 0 && bfinfo->bfer_mu_cnt == 0)
 		rtw_bf_del_sounding(rtwdev);
 }
+EXPORT_SYMBOL(rtw_bf_remove_bfee_mu);
 
 void rtw_bf_set_gid_table(struct rtw_dev *rtwdev, struct ieee80211_vif *vif,
 			  struct ieee80211_bss_conf *conf)
@@ -329,6 +333,7 @@ void rtw_bf_set_gid_table(struct rtw_dev *rtwdev, struct ieee80211_vif *vif,
 
 	rtw_bf_cfg_mu_bfee(rtwdev, &param);
 }
+EXPORT_SYMBOL(rtw_bf_set_gid_table);
 
 void rtw_bf_phy_init(struct rtw_dev *rtwdev)
 {
@@ -365,6 +370,7 @@ void rtw_bf_phy_init(struct rtw_dev *rtwdev)
 	rtw_write32_mask(rtwdev, REG_BBPSF_CTRL, BIT_MASK_CSI_RATE,
 			 DESC_RATE6M);
 }
+EXPORT_SYMBOL(rtw_bf_phy_init);
 
 void rtw_bf_cfg_csi_rate(struct rtw_dev *rtwdev, u8 rssi, u8 cur_rate,
 			 u8 fixrate_en, u8 *new_rate)
@@ -395,3 +401,4 @@ void rtw_bf_cfg_csi_rate(struct rtw_dev *rtwdev, u8 rssi, u8 cur_rate,
 		*new_rate = DESC_RATE24M;
 	}
 }
+EXPORT_SYMBOL(rtw_bf_cfg_csi_rate);
diff --git a/drivers/net/wireless/realtek/rtw88/coex.c b/drivers/net/wireless/realtek/rtw88/coex.c
index 567372fb4e12..924dccd5d146 100644
--- a/drivers/net/wireless/realtek/rtw88/coex.c
+++ b/drivers/net/wireless/realtek/rtw88/coex.c
@@ -283,6 +283,7 @@ void rtw_coex_write_scbd(struct rtw_dev *rtwdev, u16 bitpos, bool set)
 		rtw_write16(rtwdev, REG_WIFI_BT_INFO, val);
 	}
 }
+EXPORT_SYMBOL(rtw_coex_write_scbd);
 
 static u16 rtw_coex_read_scbd(struct rtw_dev *rtwdev)
 {
@@ -732,6 +733,7 @@ u32 rtw_coex_read_indirect_reg(struct rtw_dev *rtwdev, u16 addr)
 
 	return val;
 }
+EXPORT_SYMBOL(rtw_coex_read_indirect_reg);
 
 void rtw_coex_write_indirect_reg(struct rtw_dev *rtwdev, u16 addr,
 				 u32 mask, u32 val)
@@ -745,6 +747,7 @@ void rtw_coex_write_indirect_reg(struct rtw_dev *rtwdev, u16 addr,
 	if (!ltecoex_reg_write(rtwdev, addr, tmp))
 		rtw_err(rtwdev, "failed to write indirect register\n");
 }
+EXPORT_SYMBOL(rtw_coex_write_indirect_reg);
 
 static void rtw_coex_coex_ctrl_owner(struct rtw_dev *rtwdev, bool wifi_control)
 {
diff --git a/drivers/net/wireless/realtek/rtw88/efuse.c b/drivers/net/wireless/realtek/rtw88/efuse.c
index 13d1c58d6de5..c266c84ef233 100644
--- a/drivers/net/wireless/realtek/rtw88/efuse.c
+++ b/drivers/net/wireless/realtek/rtw88/efuse.c
@@ -141,6 +141,7 @@ int rtw_read8_physical_efuse(struct rtw_dev *rtwdev, u16 addr, u8 *data)
 
 	return 0;
 }
+EXPORT_SYMBOL(rtw_read8_physical_efuse);
 
 int rtw_parse_efuse_map(struct rtw_dev *rtwdev)
 {
diff --git a/drivers/net/wireless/realtek/rtw88/fw.c b/drivers/net/wireless/realtek/rtw88/fw.c
index 2c28afe525c7..6478fd7a78f6 100644
--- a/drivers/net/wireless/realtek/rtw88/fw.c
+++ b/drivers/net/wireless/realtek/rtw88/fw.c
@@ -330,6 +330,7 @@ void rtw_fw_do_iqk(struct rtw_dev *rtwdev, struct rtw_iqk_para *para)
 
 	rtw_fw_send_h2c_packet(rtwdev, h2c_pkt);
 }
+EXPORT_SYMBOL(rtw_fw_do_iqk);
 
 void rtw_fw_query_bt_info(struct rtw_dev *rtwdev)
 {
diff --git a/drivers/net/wireless/realtek/rtw88/mac.c b/drivers/net/wireless/realtek/rtw88/mac.c
index 3a5cfebe5a62..19b9b7ab016b 100644
--- a/drivers/net/wireless/realtek/rtw88/mac.c
+++ b/drivers/net/wireless/realtek/rtw88/mac.c
@@ -56,6 +56,7 @@ void rtw_set_channel_mac(struct rtw_dev *rtwdev, u8 channel, u8 bw,
 		value8 |= BIT_CHECK_CCK_EN;
 	rtw_write8(rtwdev, REG_CCK_CHECK, value8);
 }
+EXPORT_SYMBOL(rtw_set_channel_mac);
 
 static int rtw_mac_pre_system_cfg(struct rtw_dev *rtwdev)
 {
diff --git a/drivers/net/wireless/realtek/rtw88/phy.c b/drivers/net/wireless/realtek/rtw88/phy.c
index 45181f602e3f..8d93f3159746 100644
--- a/drivers/net/wireless/realtek/rtw88/phy.c
+++ b/drivers/net/wireless/realtek/rtw88/phy.c
@@ -82,6 +82,8 @@ u8 *rtw_rate_section[RTW_RATE_SECTION_MAX] = {
 	rtw_ht_1s_rates, rtw_ht_2s_rates,
 	rtw_vht_1s_rates, rtw_vht_2s_rates
 };
+EXPORT_SYMBOL(rtw_rate_section);
+
 u8 rtw_rate_size[RTW_RATE_SECTION_MAX] = {
 	ARRAY_SIZE(rtw_cck_rates),
 	ARRAY_SIZE(rtw_ofdm_rates),
@@ -90,6 +92,8 @@ u8 rtw_rate_size[RTW_RATE_SECTION_MAX] = {
 	ARRAY_SIZE(rtw_vht_1s_rates),
 	ARRAY_SIZE(rtw_vht_2s_rates)
 };
+EXPORT_SYMBOL(rtw_rate_size);
+
 static const u8 rtw_cck_size = ARRAY_SIZE(rtw_cck_rates);
 static const u8 rtw_ofdm_size = ARRAY_SIZE(rtw_ofdm_rates);
 static const u8 rtw_ht_1s_size = ARRAY_SIZE(rtw_ht_1s_rates);
@@ -137,6 +141,7 @@ void rtw_phy_init(struct rtw_dev *rtwdev)
 
 	dm_info->iqk.done = false;
 }
+EXPORT_SYMBOL(rtw_phy_init);
 
 void rtw_phy_dig_write(struct rtw_dev *rtwdev, u8 igi)
 {
@@ -676,6 +681,7 @@ u8 rtw_phy_rf_power_2_rssi(s8 *rf_power, u8 path_num)
 
 	return rtw_phy_linear_2_db(sum);
 }
+EXPORT_SYMBOL(rtw_phy_rf_power_2_rssi);
 
 u32 rtw_phy_read_rf(struct rtw_dev *rtwdev, enum rtw_rf_path rf_path,
 		    u32 addr, u32 mask)
@@ -698,6 +704,7 @@ u32 rtw_phy_read_rf(struct rtw_dev *rtwdev, enum rtw_rf_path rf_path,
 
 	return val;
 }
+EXPORT_SYMBOL(rtw_phy_read_rf);
 
 u32 rtw_phy_read_rf_sipi(struct rtw_dev *rtwdev, enum rtw_rf_path rf_path,
 			 u32 addr, u32 mask)
@@ -746,6 +753,7 @@ u32 rtw_phy_read_rf_sipi(struct rtw_dev *rtwdev, enum rtw_rf_path rf_path,
 
 	return (val32 & mask) >> shift;
 }
+EXPORT_SYMBOL(rtw_phy_read_rf_sipi);
 
 bool rtw_phy_write_rf_reg_sipi(struct rtw_dev *rtwdev, enum rtw_rf_path rf_path,
 			       u32 addr, u32 mask, u32 data)
@@ -785,6 +793,7 @@ bool rtw_phy_write_rf_reg_sipi(struct rtw_dev *rtwdev, enum rtw_rf_path rf_path,
 
 	return true;
 }
+EXPORT_SYMBOL(rtw_phy_write_rf_reg_sipi);
 
 bool rtw_phy_write_rf_reg(struct rtw_dev *rtwdev, enum rtw_rf_path rf_path,
 			  u32 addr, u32 mask, u32 data)
@@ -818,6 +827,7 @@ bool rtw_phy_write_rf_reg_mix(struct rtw_dev *rtwdev, enum rtw_rf_path rf_path,
 
 	return rtw_phy_write_rf_reg_sipi(rtwdev, rf_path, addr, mask, data);
 }
+EXPORT_SYMBOL(rtw_phy_write_rf_reg_mix);
 
 void rtw_phy_setup_phy_cond(struct rtw_dev *rtwdev, u32 pkg)
 {
@@ -910,6 +920,7 @@ void rtw_parse_tbl_phy_cond(struct rtw_dev *rtwdev, const struct rtw_table *tbl)
 		}
 	}
 }
+EXPORT_SYMBOL(rtw_parse_tbl_phy_cond);
 
 #define bcd_to_dec_pwr_by_rate(val, i) bcd2bin(val >> (i * 8))
 
@@ -1273,6 +1284,7 @@ void rtw_parse_tbl_bb_pg(struct rtw_dev *rtwdev, const struct rtw_table *tbl)
 					       p->data);
 	}
 }
+EXPORT_SYMBOL(rtw_parse_tbl_bb_pg);
 
 static const u8 rtw_channel_idx_5g[RTW_MAX_CHANNEL_NUM_5G] = {
 	36,  38,  40,  42,  44,  46,  48, /* Band 1 */
@@ -1417,18 +1429,21 @@ void rtw_parse_tbl_txpwr_lmt(struct rtw_dev *rtwdev,
 
 	rtw_xref_txpwr_lmt(rtwdev);
 }
+EXPORT_SYMBOL(rtw_parse_tbl_txpwr_lmt);
 
 void rtw_phy_cfg_mac(struct rtw_dev *rtwdev, const struct rtw_table *tbl,
 		     u32 addr, u32 data)
 {
 	rtw_write8(rtwdev, addr, data);
 }
+EXPORT_SYMBOL(rtw_phy_cfg_mac);
 
 void rtw_phy_cfg_agc(struct rtw_dev *rtwdev, const struct rtw_table *tbl,
 		     u32 addr, u32 data)
 {
 	rtw_write32(rtwdev, addr, data);
 }
+EXPORT_SYMBOL(rtw_phy_cfg_agc);
 
 void rtw_phy_cfg_bb(struct rtw_dev *rtwdev, const struct rtw_table *tbl,
 		    u32 addr, u32 data)
@@ -1448,6 +1463,7 @@ void rtw_phy_cfg_bb(struct rtw_dev *rtwdev, const struct rtw_table *tbl,
 	else
 		rtw_write32(rtwdev, addr, data);
 }
+EXPORT_SYMBOL(rtw_phy_cfg_bb);
 
 void rtw_phy_cfg_rf(struct rtw_dev *rtwdev, const struct rtw_table *tbl,
 		    u32 addr, u32 data)
@@ -1461,6 +1477,7 @@ void rtw_phy_cfg_rf(struct rtw_dev *rtwdev, const struct rtw_table *tbl,
 		udelay(1);
 	}
 }
+EXPORT_SYMBOL(rtw_phy_cfg_rf);
 
 static void rtw_load_rfk_table(struct rtw_dev *rtwdev)
 {
@@ -1498,6 +1515,7 @@ void rtw_phy_load_tables(struct rtw_dev *rtwdev)
 		rtw_load_table(rtwdev, tbl);
 	}
 }
+EXPORT_SYMBOL(rtw_phy_load_tables);
 
 static u8 rtw_get_channel_group(u8 channel)
 {
@@ -1841,6 +1859,7 @@ rtw_phy_get_tx_power_index(struct rtw_dev *rtwdev, u8 rf_path, u8 rate,
 
 	return tx_power;
 }
+EXPORT_SYMBOL(rtw_phy_get_tx_power_index);
 
 static void rtw_phy_set_tx_power_index_by_rs(struct rtw_dev *rtwdev,
 					     u8 ch, u8 path, u8 rs)
@@ -1903,6 +1922,7 @@ void rtw_phy_set_tx_power_level(struct rtw_dev *rtwdev, u8 channel)
 	chip->ops->set_tx_power_index(rtwdev);
 	mutex_unlock(&hal->tx_power_mutex);
 }
+EXPORT_SYMBOL(rtw_phy_set_tx_power_level);
 
 static void
 rtw_phy_tx_power_by_rate_config_by_path(struct rtw_hal *hal, u8 path,
@@ -2060,6 +2080,7 @@ void rtw_phy_config_swing_table(struct rtw_dev *rtwdev,
 		swing_table->n[RF_PATH_B] = tbl->pwrtrk_2gb_n;
 	}
 }
+EXPORT_SYMBOL(rtw_phy_config_swing_table);
 
 void rtw_phy_pwrtrack_avg(struct rtw_dev *rtwdev, u8 thermal, u8 path)
 {
@@ -2069,6 +2090,7 @@ void rtw_phy_pwrtrack_avg(struct rtw_dev *rtwdev, u8 thermal, u8 path)
 	dm_info->thermal_avg[path] =
 		ewma_thermal_read(&dm_info->avg_thermal[path]);
 }
+EXPORT_SYMBOL(rtw_phy_pwrtrack_avg);
 
 bool rtw_phy_pwrtrack_thermal_changed(struct rtw_dev *rtwdev, u8 thermal,
 				      u8 path)
@@ -2081,6 +2103,7 @@ bool rtw_phy_pwrtrack_thermal_changed(struct rtw_dev *rtwdev, u8 thermal,
 
 	return true;
 }
+EXPORT_SYMBOL(rtw_phy_pwrtrack_thermal_changed);
 
 u8 rtw_phy_pwrtrack_get_delta(struct rtw_dev *rtwdev, u8 path)
 {
@@ -2093,6 +2116,7 @@ u8 rtw_phy_pwrtrack_get_delta(struct rtw_dev *rtwdev, u8 path)
 
 	return min_t(u8, therm_delta, RTW_PWR_TRK_TBL_SZ - 1);
 }
+EXPORT_SYMBOL(rtw_phy_pwrtrack_get_delta);
 
 s8 rtw_phy_pwrtrack_get_pwridx(struct rtw_dev *rtwdev,
 			       struct rtw_swing_table *swing_table,
@@ -2126,6 +2150,7 @@ s8 rtw_phy_pwrtrack_get_pwridx(struct rtw_dev *rtwdev,
 	else
 		return -delta_swing_table_idx_neg[delta];
 }
+EXPORT_SYMBOL(rtw_phy_pwrtrack_get_pwridx);
 
 bool rtw_phy_pwrtrack_need_iqk(struct rtw_dev *rtwdev)
 {
@@ -2139,3 +2164,4 @@ bool rtw_phy_pwrtrack_need_iqk(struct rtw_dev *rtwdev)
 	}
 	return false;
 }
+EXPORT_SYMBOL(rtw_phy_pwrtrack_need_iqk);
diff --git a/drivers/net/wireless/realtek/rtw88/rx.c b/drivers/net/wireless/realtek/rtw88/rx.c
index 9b90339ab697..7087e385a9b3 100644
--- a/drivers/net/wireless/realtek/rtw88/rx.c
+++ b/drivers/net/wireless/realtek/rtw88/rx.c
@@ -191,3 +191,4 @@ void rtw_rx_fill_rx_status(struct rtw_dev *rtwdev,
 
 	rtw_rx_addr_match(rtwdev, pkt_stat, hdr);
 }
+EXPORT_SYMBOL(rtw_rx_fill_rx_status);
diff --git a/drivers/net/wireless/realtek/rtw88/util.c b/drivers/net/wireless/realtek/rtw88/util.c
index 42cf177cd445..2c515af214e7 100644
--- a/drivers/net/wireless/realtek/rtw88/util.c
+++ b/drivers/net/wireless/realtek/rtw88/util.c
@@ -19,6 +19,7 @@ bool check_hw_ready(struct rtw_dev *rtwdev, u32 addr, u32 mask, u32 target)
 
 	return false;
 }
+EXPORT_SYMBOL(check_hw_ready);
 
 bool ltecoex_read_reg(struct rtw_dev *rtwdev, u16 offset, u32 *val)
 {
@@ -76,6 +77,7 @@ void rtw_restore_reg(struct rtw_dev *rtwdev,
 		}
 	}
 }
+EXPORT_SYMBOL(rtw_restore_reg);
 
 void rtw_desc_to_mcsrate(u16 rate, u8 *mcs, u8 *nss)
 {
-- 
cgit v1.2.3-59-g8ed1b


From 72f256c2b948622cc45ff8bc0456dd6039d8fe36 Mon Sep 17 00:00:00 2001
From: Zong-Zhe Yang <kevin_yang@realtek.com>
Date: Fri, 15 May 2020 13:23:22 +0800
Subject: rtw88: extract: export symbols about pci interface

In the current design, chip entry points are built into
the pci module. That makes the pci module depend on chips.
According to dependence, once the pci module is loaded,
kernel will load chip functionalities, including those that
may not be currently used.

We plan to split chip entry points from the pci module.
Thence we export pci symbols that will be used in chip
entry point modules.

Signed-off-by: Zong-Zhe Yang <kevin_yang@realtek.com>
Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200515052327.31874-3-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/pci.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtw88/pci.c b/drivers/net/wireless/realtek/rtw88/pci.c
index e5ea30c04ac5..fb5d28152775 100644
--- a/drivers/net/wireless/realtek/rtw88/pci.c
+++ b/drivers/net/wireless/realtek/rtw88/pci.c
@@ -1360,7 +1360,8 @@ static int __maybe_unused rtw_pci_resume(struct device *dev)
 	return 0;
 }
 
-static SIMPLE_DEV_PM_OPS(rtw_pm_ops, rtw_pci_suspend, rtw_pci_resume);
+SIMPLE_DEV_PM_OPS(rtw_pm_ops, rtw_pci_suspend, rtw_pci_resume);
+EXPORT_SYMBOL(rtw_pm_ops);
 
 static int rtw_pci_claim(struct rtw_dev *rtwdev, struct pci_dev *pdev)
 {
@@ -1473,8 +1474,8 @@ static void rtw_pci_free_irq(struct rtw_dev *rtwdev, struct pci_dev *pdev)
 	pci_free_irq_vectors(pdev);
 }
 
-static int rtw_pci_probe(struct pci_dev *pdev,
-			 const struct pci_device_id *id)
+int rtw_pci_probe(struct pci_dev *pdev,
+		  const struct pci_device_id *id)
 {
 	struct ieee80211_hw *hw;
 	struct rtw_dev *rtwdev;
@@ -1551,8 +1552,9 @@ err_release_hw:
 
 	return ret;
 }
+EXPORT_SYMBOL(rtw_pci_probe);
 
-static void rtw_pci_remove(struct pci_dev *pdev)
+void rtw_pci_remove(struct pci_dev *pdev)
 {
 	struct ieee80211_hw *hw = pci_get_drvdata(pdev);
 	struct rtw_dev *rtwdev;
@@ -1572,8 +1574,9 @@ static void rtw_pci_remove(struct pci_dev *pdev)
 	rtw_core_deinit(rtwdev);
 	ieee80211_free_hw(hw);
 }
+EXPORT_SYMBOL(rtw_pci_remove);
 
-static void rtw_pci_shutdown(struct pci_dev *pdev)
+void rtw_pci_shutdown(struct pci_dev *pdev)
 {
 	struct ieee80211_hw *hw = pci_get_drvdata(pdev);
 	struct rtw_dev *rtwdev;
@@ -1588,6 +1591,7 @@ static void rtw_pci_shutdown(struct pci_dev *pdev)
 	if (chip->ops->shutdown)
 		chip->ops->shutdown(rtwdev);
 }
+EXPORT_SYMBOL(rtw_pci_shutdown);
 
 static const struct pci_device_id rtw_pci_id_table[] = {
 #ifdef CONFIG_RTW88_8822BE
-- 
cgit v1.2.3-59-g8ed1b


From ba0fbe236fb8a7b992e82d6eafb03a600f5eba43 Mon Sep 17 00:00:00 2001
From: Zong-Zhe Yang <kevin_yang@realtek.com>
Date: Fri, 15 May 2020 13:23:23 +0800
Subject: rtw88: extract: make 8822c an individual kernel module

Make objects about 8822c functions and 8822c tables,
i.e. rtw8822c.o and rtw8822c_table.o, an individual
kernel module called rtw88_8822c.ko.

For 8822c pcie chip, i.e. 8822CE chip, add a chip
entry point module called rtw88_8822ce.ko which
will depend on rtw88_8822c.ko and rtwpci.ko.

Signed-off-by: Zong-Zhe Yang <kevin_yang@realtek.com>
Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200515052327.31874-4-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/Kconfig     |  6 +++++-
 drivers/net/wireless/realtek/rtw88/Makefile    |  7 +++++-
 drivers/net/wireless/realtek/rtw88/main.h      |  1 -
 drivers/net/wireless/realtek/rtw88/pci.c       |  3 ---
 drivers/net/wireless/realtek/rtw88/rtw8822c.c  |  5 +++++
 drivers/net/wireless/realtek/rtw88/rtw8822ce.c | 30 ++++++++++++++++++++++++++
 drivers/net/wireless/realtek/rtw88/rtw8822ce.h | 14 ++++++++++++
 7 files changed, 60 insertions(+), 6 deletions(-)
 create mode 100644 drivers/net/wireless/realtek/rtw88/rtw8822ce.c
 create mode 100644 drivers/net/wireless/realtek/rtw88/rtw8822ce.h

diff --git a/drivers/net/wireless/realtek/rtw88/Kconfig b/drivers/net/wireless/realtek/rtw88/Kconfig
index 7a5fa68945c4..f4dbb5914bde 100644
--- a/drivers/net/wireless/realtek/rtw88/Kconfig
+++ b/drivers/net/wireless/realtek/rtw88/Kconfig
@@ -16,6 +16,9 @@ config RTW88_CORE
 config RTW88_PCI
 	tristate
 
+config RTW88_8822C
+	tristate
+
 config RTW88_8822BE
 	bool "Realtek 8822BE PCI wireless network adapter"
 	depends on PCI
@@ -27,10 +30,11 @@ config RTW88_8822BE
 	  802.11ac PCIe wireless network adapter
 
 config RTW88_8822CE
-	bool "Realtek 8822CE PCI wireless network adapter"
+	tristate "Realtek 8822CE PCI wireless network adapter"
 	depends on PCI
 	select RTW88_CORE
 	select RTW88_PCI
+	select RTW88_8822C
 	help
 	  Select this option will enable support for 8822CE chipset
 
diff --git a/drivers/net/wireless/realtek/rtw88/Makefile b/drivers/net/wireless/realtek/rtw88/Makefile
index 385facc0dd20..e45efd2deaa3 100644
--- a/drivers/net/wireless/realtek/rtw88/Makefile
+++ b/drivers/net/wireless/realtek/rtw88/Makefile
@@ -19,8 +19,13 @@ rtw88-y += main.o \
 	   regd.o
 
 rtw88-$(CONFIG_RTW88_8822BE)	+= rtw8822b.o rtw8822b_table.o
-rtw88-$(CONFIG_RTW88_8822CE)	+= rtw8822c.o rtw8822c_table.o
 rtw88-$(CONFIG_RTW88_8723DE)	+= rtw8723d.o rtw8723d_table.o
 
+obj-$(CONFIG_RTW88_8822C)	+= rtw88_8822c.o
+rtw88_8822c-objs		:= rtw8822c.o rtw8822c_table.o
+
+obj-$(CONFIG_RTW88_8822CE)	+= rtw88_8822ce.o
+rtw88_8822ce-objs		:= rtw8822ce.o
+
 obj-$(CONFIG_RTW88_PCI)		+= rtwpci.o
 rtwpci-objs			:= pci.o
diff --git a/drivers/net/wireless/realtek/rtw88/main.h b/drivers/net/wireless/realtek/rtw88/main.h
index c9156b9b532c..d6e25d4c5aed 100644
--- a/drivers/net/wireless/realtek/rtw88/main.h
+++ b/drivers/net/wireless/realtek/rtw88/main.h
@@ -41,7 +41,6 @@ extern unsigned int rtw_fw_lps_deep_mode;
 extern unsigned int rtw_debug_mask;
 extern const struct ieee80211_ops rtw_ops;
 extern struct rtw_chip_info rtw8822b_hw_spec;
-extern struct rtw_chip_info rtw8822c_hw_spec;
 extern struct rtw_chip_info rtw8723d_hw_spec;
 
 #define RTW_MAX_CHANNEL_NUM_2G 14
diff --git a/drivers/net/wireless/realtek/rtw88/pci.c b/drivers/net/wireless/realtek/rtw88/pci.c
index fb5d28152775..af9cd23a3273 100644
--- a/drivers/net/wireless/realtek/rtw88/pci.c
+++ b/drivers/net/wireless/realtek/rtw88/pci.c
@@ -1597,9 +1597,6 @@ static const struct pci_device_id rtw_pci_id_table[] = {
 #ifdef CONFIG_RTW88_8822BE
 	{ RTK_PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0xB822, rtw8822b_hw_spec) },
 #endif
-#ifdef CONFIG_RTW88_8822CE
-	{ RTK_PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0xC822, rtw8822c_hw_spec) },
-#endif
 #ifdef CONFIG_RTW88_8723DE
 	{ RTK_PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0xD723, rtw8723d_hw_spec) },
 #endif
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822c.c b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
index d697d70170af..5e4cc57dbd7c 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8822c.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
@@ -2,6 +2,7 @@
 /* Copyright(c) 2018-2019  Realtek Corporation
  */
 
+#include <linux/module.h>
 #include "main.h"
 #include "coex.h"
 #include "fw.h"
@@ -4377,3 +4378,7 @@ EXPORT_SYMBOL(rtw8822c_hw_spec);
 
 MODULE_FIRMWARE("rtw88/rtw8822c_fw.bin");
 MODULE_FIRMWARE("rtw88/rtw8822c_wow_fw.bin");
+
+MODULE_AUTHOR("Realtek Corporation");
+MODULE_DESCRIPTION("Realtek 802.11ac wireless 8822c driver");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822ce.c b/drivers/net/wireless/realtek/rtw88/rtw8822ce.c
new file mode 100644
index 000000000000..7b6bd990651e
--- /dev/null
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822ce.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+/* Copyright(c) 2018-2019  Realtek Corporation
+ */
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include "rtw8822ce.h"
+
+static const struct pci_device_id rtw_8822ce_id_table[] = {
+	{
+		PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0xC822),
+		.driver_data = (kernel_ulong_t)&rtw8822c_hw_spec
+	},
+	{}
+};
+MODULE_DEVICE_TABLE(pci, rtw_8822ce_id_table);
+
+static struct pci_driver rtw_8822ce_driver = {
+	.name = "rtw_8822ce",
+	.id_table = rtw_8822ce_id_table,
+	.probe = rtw_pci_probe,
+	.remove = rtw_pci_remove,
+	.driver.pm = &rtw_pm_ops,
+	.shutdown = rtw_pci_shutdown,
+};
+module_pci_driver(rtw_8822ce_driver);
+
+MODULE_AUTHOR("Realtek Corporation");
+MODULE_DESCRIPTION("Realtek 802.11ac wireless 8822ce driver");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822ce.h b/drivers/net/wireless/realtek/rtw88/rtw8822ce.h
new file mode 100644
index 000000000000..c2c0e8675d74
--- /dev/null
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822ce.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+/* Copyright(c) 2018-2019  Realtek Corporation
+ */
+
+#ifndef __RTW_8822CE_H_
+#define __RTW_8822CE_H_
+
+extern const struct dev_pm_ops rtw_pm_ops;
+extern struct rtw_chip_info rtw8822c_hw_spec;
+int rtw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id);
+void rtw_pci_remove(struct pci_dev *pdev);
+void rtw_pci_shutdown(struct pci_dev *pdev);
+
+#endif
-- 
cgit v1.2.3-59-g8ed1b


From 416e87fcc780cae8d72cb9370fa0f46007faa69a Mon Sep 17 00:00:00 2001
From: Zong-Zhe Yang <kevin_yang@realtek.com>
Date: Fri, 15 May 2020 13:23:24 +0800
Subject: rtw88: extract: make 8822b an individual kernel module

Make objects about 8822b functions and 8822b tables,
i.e. rtw8822b.o and rtw8822b_table.o, an individual
kernel module called rtw88_8822b.ko.

For 8822b pcie chip, i.e. 8822BE chip, add a chip
entry point module called rtw88_8822be.ko which
will depend on rtw88_8822b.ko and rtwpci.ko.

Signed-off-by: Zong-Zhe Yang <kevin_yang@realtek.com>
Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200515052327.31874-5-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/Kconfig     |  6 +++++-
 drivers/net/wireless/realtek/rtw88/Makefile    |  7 +++++-
 drivers/net/wireless/realtek/rtw88/main.h      |  1 -
 drivers/net/wireless/realtek/rtw88/pci.c       |  3 ---
 drivers/net/wireless/realtek/rtw88/rtw8822b.c  |  5 +++++
 drivers/net/wireless/realtek/rtw88/rtw8822be.c | 30 ++++++++++++++++++++++++++
 drivers/net/wireless/realtek/rtw88/rtw8822be.h | 14 ++++++++++++
 7 files changed, 60 insertions(+), 6 deletions(-)
 create mode 100644 drivers/net/wireless/realtek/rtw88/rtw8822be.c
 create mode 100644 drivers/net/wireless/realtek/rtw88/rtw8822be.h

diff --git a/drivers/net/wireless/realtek/rtw88/Kconfig b/drivers/net/wireless/realtek/rtw88/Kconfig
index f4dbb5914bde..dd71407a06c3 100644
--- a/drivers/net/wireless/realtek/rtw88/Kconfig
+++ b/drivers/net/wireless/realtek/rtw88/Kconfig
@@ -16,14 +16,18 @@ config RTW88_CORE
 config RTW88_PCI
 	tristate
 
+config RTW88_8822B
+	tristate
+
 config RTW88_8822C
 	tristate
 
 config RTW88_8822BE
-	bool "Realtek 8822BE PCI wireless network adapter"
+	tristate "Realtek 8822BE PCI wireless network adapter"
 	depends on PCI
 	select RTW88_CORE
 	select RTW88_PCI
+	select RTW88_8822B
 	help
 	  Select this option will enable support for 8822BE chipset
 
diff --git a/drivers/net/wireless/realtek/rtw88/Makefile b/drivers/net/wireless/realtek/rtw88/Makefile
index e45efd2deaa3..0b29f07e3661 100644
--- a/drivers/net/wireless/realtek/rtw88/Makefile
+++ b/drivers/net/wireless/realtek/rtw88/Makefile
@@ -18,9 +18,14 @@ rtw88-y += main.o \
 	   wow.o \
 	   regd.o
 
-rtw88-$(CONFIG_RTW88_8822BE)	+= rtw8822b.o rtw8822b_table.o
 rtw88-$(CONFIG_RTW88_8723DE)	+= rtw8723d.o rtw8723d_table.o
 
+obj-$(CONFIG_RTW88_8822B)	+= rtw88_8822b.o
+rtw88_8822b-objs		:= rtw8822b.o rtw8822b_table.o
+
+obj-$(CONFIG_RTW88_8822BE)	+= rtw88_8822be.o
+rtw88_8822be-objs		:= rtw8822be.o
+
 obj-$(CONFIG_RTW88_8822C)	+= rtw88_8822c.o
 rtw88_8822c-objs		:= rtw8822c.o rtw8822c_table.o
 
diff --git a/drivers/net/wireless/realtek/rtw88/main.h b/drivers/net/wireless/realtek/rtw88/main.h
index d6e25d4c5aed..a4c4ce511e6a 100644
--- a/drivers/net/wireless/realtek/rtw88/main.h
+++ b/drivers/net/wireless/realtek/rtw88/main.h
@@ -40,7 +40,6 @@ extern bool rtw_bf_support;
 extern unsigned int rtw_fw_lps_deep_mode;
 extern unsigned int rtw_debug_mask;
 extern const struct ieee80211_ops rtw_ops;
-extern struct rtw_chip_info rtw8822b_hw_spec;
 extern struct rtw_chip_info rtw8723d_hw_spec;
 
 #define RTW_MAX_CHANNEL_NUM_2G 14
diff --git a/drivers/net/wireless/realtek/rtw88/pci.c b/drivers/net/wireless/realtek/rtw88/pci.c
index af9cd23a3273..39dee8368718 100644
--- a/drivers/net/wireless/realtek/rtw88/pci.c
+++ b/drivers/net/wireless/realtek/rtw88/pci.c
@@ -1594,9 +1594,6 @@ void rtw_pci_shutdown(struct pci_dev *pdev)
 EXPORT_SYMBOL(rtw_pci_shutdown);
 
 static const struct pci_device_id rtw_pci_id_table[] = {
-#ifdef CONFIG_RTW88_8822BE
-	{ RTK_PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0xB822, rtw8822b_hw_spec) },
-#endif
 #ifdef CONFIG_RTW88_8723DE
 	{ RTK_PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0xD723, rtw8723d_hw_spec) },
 #endif
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822b.c b/drivers/net/wireless/realtek/rtw88/rtw8822b.c
index 18c5a5a96d90..e49bdd76ab9a 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8822b.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822b.c
@@ -2,6 +2,7 @@
 /* Copyright(c) 2018-2019  Realtek Corporation
  */
 
+#include <linux/module.h>
 #include "main.h"
 #include "coex.h"
 #include "fw.h"
@@ -2506,3 +2507,7 @@ struct rtw_chip_info rtw8822b_hw_spec = {
 EXPORT_SYMBOL(rtw8822b_hw_spec);
 
 MODULE_FIRMWARE("rtw88/rtw8822b_fw.bin");
+
+MODULE_AUTHOR("Realtek Corporation");
+MODULE_DESCRIPTION("Realtek 802.11ac wireless 8822b driver");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822be.c b/drivers/net/wireless/realtek/rtw88/rtw8822be.c
new file mode 100644
index 000000000000..921916ae15ca
--- /dev/null
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822be.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+/* Copyright(c) 2018-2019  Realtek Corporation
+ */
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include "rtw8822be.h"
+
+static const struct pci_device_id rtw_8822be_id_table[] = {
+	{
+		PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0xB822),
+		.driver_data = (kernel_ulong_t)&rtw8822b_hw_spec
+	},
+	{}
+};
+MODULE_DEVICE_TABLE(pci, rtw_8822be_id_table);
+
+static struct pci_driver rtw_8822be_driver = {
+	.name = "rtw_8822be",
+	.id_table = rtw_8822be_id_table,
+	.probe = rtw_pci_probe,
+	.remove = rtw_pci_remove,
+	.driver.pm = &rtw_pm_ops,
+	.shutdown = rtw_pci_shutdown,
+};
+module_pci_driver(rtw_8822be_driver);
+
+MODULE_AUTHOR("Realtek Corporation");
+MODULE_DESCRIPTION("Realtek 802.11ac wireless 8822be driver");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822be.h b/drivers/net/wireless/realtek/rtw88/rtw8822be.h
new file mode 100644
index 000000000000..d823ca059f5c
--- /dev/null
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822be.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+/* Copyright(c) 2018-2019  Realtek Corporation
+ */
+
+#ifndef __RTW_8822BE_H_
+#define __RTW_8822BE_H_
+
+extern const struct dev_pm_ops rtw_pm_ops;
+extern struct rtw_chip_info rtw8822b_hw_spec;
+int rtw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id);
+void rtw_pci_remove(struct pci_dev *pdev);
+void rtw_pci_shutdown(struct pci_dev *pdev);
+
+#endif
-- 
cgit v1.2.3-59-g8ed1b


From f56f08636ddac358b4e57494b569e5c2174c5fd3 Mon Sep 17 00:00:00 2001
From: Zong-Zhe Yang <kevin_yang@realtek.com>
Date: Fri, 15 May 2020 13:23:25 +0800
Subject: rtw88: extract: make 8723d an individual kernel module

Make objects about 8723d functions and 8723d tables,
i.e. rtw8723d.o and rtw8723d_table.o, an individual
kernel module called rtw88_8723d.ko.

For 8723d pcie chip, i.e. 8723DE chip, add a chip
entry point module called rtw88_8723de.ko which
will depend on rtw88_8723d.ko and rtwpci.ko.

Signed-off-by: Zong-Zhe Yang <kevin_yang@realtek.com>
Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200515052327.31874-6-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/Kconfig     |  6 +++++-
 drivers/net/wireless/realtek/rtw88/Makefile    |  7 +++++-
 drivers/net/wireless/realtek/rtw88/main.h      |  1 -
 drivers/net/wireless/realtek/rtw88/pci.c       |  3 ---
 drivers/net/wireless/realtek/rtw88/rtw8723d.c  |  5 +++++
 drivers/net/wireless/realtek/rtw88/rtw8723de.c | 30 ++++++++++++++++++++++++++
 drivers/net/wireless/realtek/rtw88/rtw8723de.h | 14 ++++++++++++
 7 files changed, 60 insertions(+), 6 deletions(-)
 create mode 100644 drivers/net/wireless/realtek/rtw88/rtw8723de.c
 create mode 100644 drivers/net/wireless/realtek/rtw88/rtw8723de.h

diff --git a/drivers/net/wireless/realtek/rtw88/Kconfig b/drivers/net/wireless/realtek/rtw88/Kconfig
index dd71407a06c3..ca894c4f96ac 100644
--- a/drivers/net/wireless/realtek/rtw88/Kconfig
+++ b/drivers/net/wireless/realtek/rtw88/Kconfig
@@ -22,6 +22,9 @@ config RTW88_8822B
 config RTW88_8822C
 	tristate
 
+config RTW88_8723D
+	tristate
+
 config RTW88_8822BE
 	tristate "Realtek 8822BE PCI wireless network adapter"
 	depends on PCI
@@ -45,10 +48,11 @@ config RTW88_8822CE
 	  802.11ac PCIe wireless network adapter
 
 config RTW88_8723DE
-	bool "Realtek 8723DE PCI wireless network adapter"
+	tristate "Realtek 8723DE PCI wireless network adapter"
 	depends on PCI
 	select RTW88_CORE
 	select RTW88_PCI
+	select RTW88_8723D
 	help
 	  Select this option will enable support for 8723DE chipset
 
diff --git a/drivers/net/wireless/realtek/rtw88/Makefile b/drivers/net/wireless/realtek/rtw88/Makefile
index 0b29f07e3661..9e3e95360089 100644
--- a/drivers/net/wireless/realtek/rtw88/Makefile
+++ b/drivers/net/wireless/realtek/rtw88/Makefile
@@ -18,7 +18,6 @@ rtw88-y += main.o \
 	   wow.o \
 	   regd.o
 
-rtw88-$(CONFIG_RTW88_8723DE)	+= rtw8723d.o rtw8723d_table.o
 
 obj-$(CONFIG_RTW88_8822B)	+= rtw88_8822b.o
 rtw88_8822b-objs		:= rtw8822b.o rtw8822b_table.o
@@ -32,5 +31,11 @@ rtw88_8822c-objs		:= rtw8822c.o rtw8822c_table.o
 obj-$(CONFIG_RTW88_8822CE)	+= rtw88_8822ce.o
 rtw88_8822ce-objs		:= rtw8822ce.o
 
+obj-$(CONFIG_RTW88_8723D)	+= rtw88_8723d.o
+rtw88_8723d-objs		:= rtw8723d.o rtw8723d_table.o
+
+obj-$(CONFIG_RTW88_8723DE)	+= rtw88_8723de.o
+rtw88_8723de-objs		:= rtw8723de.o
+
 obj-$(CONFIG_RTW88_PCI)		+= rtwpci.o
 rtwpci-objs			:= pci.o
diff --git a/drivers/net/wireless/realtek/rtw88/main.h b/drivers/net/wireless/realtek/rtw88/main.h
index a4c4ce511e6a..7ee09c008cd4 100644
--- a/drivers/net/wireless/realtek/rtw88/main.h
+++ b/drivers/net/wireless/realtek/rtw88/main.h
@@ -40,7 +40,6 @@ extern bool rtw_bf_support;
 extern unsigned int rtw_fw_lps_deep_mode;
 extern unsigned int rtw_debug_mask;
 extern const struct ieee80211_ops rtw_ops;
-extern struct rtw_chip_info rtw8723d_hw_spec;
 
 #define RTW_MAX_CHANNEL_NUM_2G 14
 #define RTW_MAX_CHANNEL_NUM_5G 49
diff --git a/drivers/net/wireless/realtek/rtw88/pci.c b/drivers/net/wireless/realtek/rtw88/pci.c
index 39dee8368718..2a188bbc6d5a 100644
--- a/drivers/net/wireless/realtek/rtw88/pci.c
+++ b/drivers/net/wireless/realtek/rtw88/pci.c
@@ -1594,9 +1594,6 @@ void rtw_pci_shutdown(struct pci_dev *pdev)
 EXPORT_SYMBOL(rtw_pci_shutdown);
 
 static const struct pci_device_id rtw_pci_id_table[] = {
-#ifdef CONFIG_RTW88_8723DE
-	{ RTK_PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0xD723, rtw8723d_hw_spec) },
-#endif
 	{},
 };
 MODULE_DEVICE_TABLE(pci, rtw_pci_id_table);
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.c b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
index b517af417e0e..340c8c455802 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8723d.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
@@ -2,6 +2,7 @@
 /* Copyright(c) 2018-2019  Realtek Corporation
  */
 
+#include <linux/module.h>
 #include "main.h"
 #include "coex.h"
 #include "fw.h"
@@ -2740,3 +2741,7 @@ struct rtw_chip_info rtw8723d_hw_spec = {
 EXPORT_SYMBOL(rtw8723d_hw_spec);
 
 MODULE_FIRMWARE("rtw88/rtw8723d_fw.bin");
+
+MODULE_AUTHOR("Realtek Corporation");
+MODULE_DESCRIPTION("Realtek 802.11n wireless 8723d driver");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723de.c b/drivers/net/wireless/realtek/rtw88/rtw8723de.c
new file mode 100644
index 000000000000..c81eb4c33642
--- /dev/null
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723de.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
+/* Copyright(c) 2018-2019  Realtek Corporation
+ */
+
+#include <linux/module.h>
+#include <linux/pci.h>
+#include "rtw8723de.h"
+
+static const struct pci_device_id rtw_8723de_id_table[] = {
+	{
+		PCI_DEVICE(PCI_VENDOR_ID_REALTEK, 0xD723),
+		.driver_data = (kernel_ulong_t)&rtw8723d_hw_spec
+	},
+	{}
+};
+MODULE_DEVICE_TABLE(pci, rtw_8723de_id_table);
+
+static struct pci_driver rtw_8723de_driver = {
+	.name = "rtw_8723de",
+	.id_table = rtw_8723de_id_table,
+	.probe = rtw_pci_probe,
+	.remove = rtw_pci_remove,
+	.driver.pm = &rtw_pm_ops,
+	.shutdown = rtw_pci_shutdown,
+};
+module_pci_driver(rtw_8723de_driver);
+
+MODULE_AUTHOR("Realtek Corporation");
+MODULE_DESCRIPTION("Realtek 802.11n wireless 8723de driver");
+MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723de.h b/drivers/net/wireless/realtek/rtw88/rtw8723de.h
new file mode 100644
index 000000000000..ba3842360c20
--- /dev/null
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723de.h
@@ -0,0 +1,14 @@
+/* SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause */
+/* Copyright(c) 2018-2019  Realtek Corporation
+ */
+
+#ifndef __RTW_8723DE_H_
+#define __RTW_8723DE_H_
+
+extern const struct dev_pm_ops rtw_pm_ops;
+extern struct rtw_chip_info rtw8723d_hw_spec;
+int rtw_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id);
+void rtw_pci_remove(struct pci_dev *pdev);
+void rtw_pci_shutdown(struct pci_dev *pdev);
+
+#endif
-- 
cgit v1.2.3-59-g8ed1b


From 51aab89a1808660d03559c9c33267973f6796a95 Mon Sep 17 00:00:00 2001
From: Zong-Zhe Yang <kevin_yang@realtek.com>
Date: Fri, 15 May 2020 13:23:26 +0800
Subject: rtw88: extract: remove the unused after extracting

remove the unused about pci after extracting chip modules

Signed-off-by: Zong-Zhe Yang <kevin_yang@realtek.com>
Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200515052327.31874-7-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/pci.c | 15 ---------------
 drivers/net/wireless/realtek/rtw88/pci.h |  4 ----
 2 files changed, 19 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtw88/pci.c b/drivers/net/wireless/realtek/rtw88/pci.c
index 2a188bbc6d5a..8228db9a5fc8 100644
--- a/drivers/net/wireless/realtek/rtw88/pci.c
+++ b/drivers/net/wireless/realtek/rtw88/pci.c
@@ -1593,21 +1593,6 @@ void rtw_pci_shutdown(struct pci_dev *pdev)
 }
 EXPORT_SYMBOL(rtw_pci_shutdown);
 
-static const struct pci_device_id rtw_pci_id_table[] = {
-	{},
-};
-MODULE_DEVICE_TABLE(pci, rtw_pci_id_table);
-
-static struct pci_driver rtw_pci_driver = {
-	.name = "rtw_pci",
-	.id_table = rtw_pci_id_table,
-	.probe = rtw_pci_probe,
-	.remove = rtw_pci_remove,
-	.driver.pm = &rtw_pm_ops,
-	.shutdown = rtw_pci_shutdown,
-};
-module_pci_driver(rtw_pci_driver);
-
 MODULE_AUTHOR("Realtek Corporation");
 MODULE_DESCRIPTION("Realtek 802.11ac wireless PCI driver");
 MODULE_LICENSE("Dual BSD/GPL");
diff --git a/drivers/net/wireless/realtek/rtw88/pci.h b/drivers/net/wireless/realtek/rtw88/pci.h
index 3ac4fb328d31..024c2bc275cb 100644
--- a/drivers/net/wireless/realtek/rtw88/pci.h
+++ b/drivers/net/wireless/realtek/rtw88/pci.h
@@ -5,10 +5,6 @@
 #ifndef __RTK_PCI_H_
 #define __RTK_PCI_H_
 
-#define RTK_PCI_DEVICE(vend, dev, hw_config)	\
-	PCI_DEVICE(vend, dev),			\
-	.driver_data = (kernel_ulong_t)&(hw_config),
-
 #define RTK_DEFAULT_TX_DESC_NUM 128
 #define RTK_BEQ_TX_DESC_NUM	256
 
-- 
cgit v1.2.3-59-g8ed1b


From 6b684282afcc0cdb6828a8d92566089f6529d49c Mon Sep 17 00:00:00 2001
From: Yan-Hsuan Chuang <yhchuang@realtek.com>
Date: Fri, 15 May 2020 13:23:27 +0800
Subject: rtw88: rename rtw88.ko/rtwpci.ko to rtw88_core.ko/rtw88_pci.ko

Rename the name of the kernel module for rtw88's core and pci.
Add proper prefix 'rtw88_' to easily recognize them, also can
avoid confusion with other drivers. (ex. r8822be in staging)

Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200515052327.31874-8-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/Makefile | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtw88/Makefile b/drivers/net/wireless/realtek/rtw88/Makefile
index 9e3e95360089..f31e78a6f146 100644
--- a/drivers/net/wireless/realtek/rtw88/Makefile
+++ b/drivers/net/wireless/realtek/rtw88/Makefile
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
 
-obj-$(CONFIG_RTW88_CORE)	+= rtw88.o
-rtw88-y += main.o \
+obj-$(CONFIG_RTW88_CORE)	+= rtw88_core.o
+rtw88_core-y += main.o \
 	   mac80211.o \
 	   util.o \
 	   debug.o \
@@ -37,5 +37,5 @@ rtw88_8723d-objs		:= rtw8723d.o rtw8723d_table.o
 obj-$(CONFIG_RTW88_8723DE)	+= rtw88_8723de.o
 rtw88_8723de-objs		:= rtw8723de.o
 
-obj-$(CONFIG_RTW88_PCI)		+= rtwpci.o
-rtwpci-objs			:= pci.o
+obj-$(CONFIG_RTW88_PCI)		+= rtw88_pci.o
+rtw88_pci-objs			:= pci.o
-- 
cgit v1.2.3-59-g8ed1b


From c5457559b626a10028ebf05419ac4d430cf2945a Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 14 May 2020 19:13:29 +0100
Subject: rtw88: 8723d: fix incorrect setting of ldo_pwr

Currently ldo_pwr has the LDO25 voltage bits set to zero and then
it is overwritten with the new voltage setting. The assignment
looks incorrect, it should be bit-wise or'ing in the new voltage
setting rather than a direct assignment.

Addresses-Coverity: ("Unused value")
Fixes: 1afb5eb7a00d ("rtw88: 8723d: Add cfg_ldo25 to control LDO25")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200514181329.16292-1-colin.king@canonical.com
---
 drivers/net/wireless/realtek/rtw88/rtw8723d.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.c b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
index 340c8c455802..7422baf2d41b 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8723d.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
@@ -562,7 +562,7 @@ static void rtw8723d_cfg_ldo25(struct rtw_dev *rtwdev, bool enable)
 	ldo_pwr = rtw_read8(rtwdev, REG_LDO_EFUSE_CTRL + 3);
 	if (enable) {
 		ldo_pwr &= ~BIT_MASK_LDO25_VOLTAGE;
-		ldo_pwr = (BIT_LDO25_VOLTAGE_V25 << 4) | BIT_LDO25_EN;
+		ldo_pwr |= (BIT_LDO25_VOLTAGE_V25 << 4) | BIT_LDO25_EN;
 	} else {
 		ldo_pwr &= ~BIT_LDO25_EN;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 07d0f5534935e2daf63a4e1012af13d68e089fed Mon Sep 17 00:00:00 2001
From: Kevin Lo <kevlo@kevlo.org>
Date: Fri, 15 May 2020 14:11:53 +0800
Subject: rtw88: no need to set registers for SDIO

There's no need to set SDIO related registers when powering up/down the chip.

Signed-off-by: Kevin Lo <kevlo@kevlo.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200515061153.GA15714@ns.kevlo.org
---
 drivers/net/wireless/realtek/rtw88/rtw8723d.c | 40 -----------
 drivers/net/wireless/realtek/rtw88/rtw8822b.c | 95 ---------------------------
 drivers/net/wireless/realtek/rtw88/rtw8822c.c | 20 ------
 3 files changed, 155 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.c b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
index 7422baf2d41b..8641ea645c4b 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8723d.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
@@ -2093,16 +2093,6 @@ static const struct rtw_pwr_seq_cmd trans_carddis_to_cardemu_8723d[] = {
 	 RTW_PWR_INTF_ALL_MSK,
 	 RTW_PWR_ADDR_MAC,
 	 RTW_PWR_CMD_WRITE, BIT(3) | BIT(7), 0},
-	{0x0086,
-	 RTW_PWR_CUT_ALL_MSK,
-	 RTW_PWR_INTF_SDIO_MSK,
-	 RTW_PWR_ADDR_SDIO,
-	 RTW_PWR_CMD_WRITE, BIT(0), 0},
-	{0x0086,
-	 RTW_PWR_CUT_ALL_MSK,
-	 RTW_PWR_INTF_SDIO_MSK,
-	 RTW_PWR_ADDR_SDIO,
-	 RTW_PWR_CMD_POLLING, BIT(1), BIT(1)},
 	{0x004A,
 	 RTW_PWR_CUT_ALL_MSK,
 	 RTW_PWR_INTF_USB_MSK,
@@ -2113,11 +2103,6 @@ static const struct rtw_pwr_seq_cmd trans_carddis_to_cardemu_8723d[] = {
 	 RTW_PWR_INTF_ALL_MSK,
 	 RTW_PWR_ADDR_MAC,
 	 RTW_PWR_CMD_WRITE, BIT(3) | BIT(4), 0},
-	{0x0023,
-	 RTW_PWR_CUT_ALL_MSK,
-	 RTW_PWR_INTF_SDIO_MSK,
-	 RTW_PWR_ADDR_MAC,
-	 RTW_PWR_CMD_WRITE, BIT(4), 0},
 	{0x0301,
 	 RTW_PWR_CUT_ALL_MSK,
 	 RTW_PWR_INTF_PCI_MSK,
@@ -2325,11 +2310,6 @@ static const struct rtw_pwr_seq_cmd trans_act_to_lps_8723d[] = {
 	 RTW_PWR_INTF_ALL_MSK,
 	 RTW_PWR_ADDR_MAC,
 	 RTW_PWR_CMD_WRITE, BIT(1), 0},
-	{0x0093,
-	 RTW_PWR_CUT_ALL_MSK,
-	 RTW_PWR_INTF_SDIO_MSK,
-	 RTW_PWR_ADDR_MAC,
-	 RTW_PWR_CMD_WRITE, 0xFF, 0x00},
 	{0x0553,
 	 RTW_PWR_CUT_ALL_MSK,
 	 RTW_PWR_INTF_ALL_MSK,
@@ -2409,11 +2389,6 @@ static const struct rtw_pwr_seq_cmd trans_act_to_cardemu_8723d[] = {
 };
 
 static const struct rtw_pwr_seq_cmd trans_cardemu_to_carddis_8723d[] = {
-	{0x0007,
-	 RTW_PWR_CUT_ALL_MSK,
-	 RTW_PWR_INTF_SDIO_MSK,
-	 RTW_PWR_ADDR_MAC,
-	 RTW_PWR_CMD_WRITE, 0xFF, 0x20},
 	{0x0005,
 	 RTW_PWR_CUT_ALL_MSK,
 	 RTW_PWR_INTF_USB_MSK | RTW_PWR_INTF_SDIO_MSK,
@@ -2434,21 +2409,6 @@ static const struct rtw_pwr_seq_cmd trans_cardemu_to_carddis_8723d[] = {
 	 RTW_PWR_INTF_USB_MSK,
 	 RTW_PWR_ADDR_MAC,
 	 RTW_PWR_CMD_WRITE, BIT(0), 1},
-	{0x0023,
-	 RTW_PWR_CUT_ALL_MSK,
-	 RTW_PWR_INTF_SDIO_MSK,
-	 RTW_PWR_ADDR_MAC,
-	 RTW_PWR_CMD_WRITE, BIT(4), BIT(4)},
-	{0x0086,
-	 RTW_PWR_CUT_ALL_MSK,
-	 RTW_PWR_INTF_SDIO_MSK,
-	 RTW_PWR_ADDR_SDIO,
-	 RTW_PWR_CMD_WRITE, BIT(0), BIT(0)},
-	{0x0086,
-	 RTW_PWR_CUT_ALL_MSK,
-	 RTW_PWR_INTF_SDIO_MSK,
-	 RTW_PWR_ADDR_SDIO,
-	 RTW_PWR_CMD_POLLING, BIT(1), 0},
 	{0xFFFF,
 	 RTW_PWR_CUT_ALL_MSK,
 	 RTW_PWR_INTF_ALL_MSK,
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822b.c b/drivers/net/wireless/realtek/rtw88/rtw8822b.c
index e49bdd76ab9a..6abcdf4070a2 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8822b.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822b.c
@@ -1551,16 +1551,6 @@ static void rtw8822b_bf_config_bfee(struct rtw_dev *rtwdev, struct rtw_vif *vif,
 }
 
 static const struct rtw_pwr_seq_cmd trans_carddis_to_cardemu_8822b[] = {
-	{0x0086,
-	 RTW_PWR_CUT_ALL_MSK,
-	 RTW_PWR_INTF_SDIO_MSK,
-	 RTW_PWR_ADDR_SDIO,
-	 RTW_PWR_CMD_WRITE, BIT(0), 0},
-	{0x0086,
-	 RTW_PWR_CUT_ALL_MSK,
-	 RTW_PWR_INTF_SDIO_MSK,
-	 RTW_PWR_ADDR_SDIO,
-	 RTW_PWR_CMD_POLLING, BIT(1), BIT(1)},
 	{0x004A,
 	 RTW_PWR_CUT_ALL_MSK,
 	 RTW_PWR_INTF_USB_MSK,
@@ -1689,11 +1679,6 @@ static const struct rtw_pwr_seq_cmd trans_cardemu_to_act_8822b[] = {
 	 RTW_PWR_INTF_ALL_MSK,
 	 RTW_PWR_ADDR_MAC,
 	 RTW_PWR_CMD_WRITE, 0xFF, 0x0c},
-	{0x0068,
-	 RTW_PWR_CUT_C_MSK,
-	 RTW_PWR_INTF_SDIO_MSK,
-	 RTW_PWR_ADDR_MAC,
-	 RTW_PWR_CMD_WRITE, BIT(4), BIT(4)},
 	{0x0029,
 	 RTW_PWR_CUT_ALL_MSK,
 	 RTW_PWR_INTF_ALL_MSK,
@@ -1722,11 +1707,6 @@ static const struct rtw_pwr_seq_cmd trans_cardemu_to_act_8822b[] = {
 };
 
 static const struct rtw_pwr_seq_cmd trans_act_to_cardemu_8822b[] = {
-	{0x0003,
-	 RTW_PWR_CUT_ALL_MSK,
-	 RTW_PWR_INTF_SDIO_MSK,
-	 RTW_PWR_ADDR_MAC,
-	 RTW_PWR_CMD_WRITE, BIT(2), 0},
 	{0x0093,
 	 RTW_PWR_CUT_ALL_MSK,
 	 RTW_PWR_INTF_ALL_MSK,
@@ -1795,11 +1775,6 @@ static const struct rtw_pwr_seq_cmd trans_act_to_cardemu_8822b[] = {
 };
 
 static const struct rtw_pwr_seq_cmd trans_cardemu_to_carddis_8822b[] = {
-	{0x0005,
-	 RTW_PWR_CUT_ALL_MSK,
-	 RTW_PWR_INTF_SDIO_MSK,
-	 RTW_PWR_ADDR_MAC,
-	 RTW_PWR_CMD_WRITE, BIT(7), BIT(7)},
 	{0x0007,
 	 RTW_PWR_CUT_ALL_MSK,
 	 RTW_PWR_INTF_USB_MSK | RTW_PWR_INTF_SDIO_MSK,
@@ -1820,46 +1795,6 @@ static const struct rtw_pwr_seq_cmd trans_cardemu_to_carddis_8822b[] = {
 	 RTW_PWR_INTF_USB_MSK,
 	 RTW_PWR_ADDR_MAC,
 	 RTW_PWR_CMD_WRITE, BIT(0), 0},
-	{0x0067,
-	 RTW_PWR_CUT_ALL_MSK,
-	 RTW_PWR_INTF_SDIO_MSK,
-	 RTW_PWR_ADDR_MAC,
-	 RTW_PWR_CMD_WRITE, BIT(5), 0},
-	{0x0067,
-	 RTW_PWR_CUT_ALL_MSK,
-	 RTW_PWR_INTF_SDIO_MSK,
-	 RTW_PWR_ADDR_MAC,
-	 RTW_PWR_CMD_WRITE, BIT(4), 0},
-	{0x004F,
-	 RTW_PWR_CUT_ALL_MSK,
-	 RTW_PWR_INTF_SDIO_MSK,
-	 RTW_PWR_ADDR_MAC,
-	 RTW_PWR_CMD_WRITE, BIT(0), 0},
-	{0x0067,
-	 RTW_PWR_CUT_ALL_MSK,
-	 RTW_PWR_INTF_SDIO_MSK,
-	 RTW_PWR_ADDR_MAC,
-	 RTW_PWR_CMD_WRITE, BIT(1), 0},
-	{0x0046,
-	 RTW_PWR_CUT_ALL_MSK,
-	 RTW_PWR_INTF_SDIO_MSK,
-	 RTW_PWR_ADDR_MAC,
-	 RTW_PWR_CMD_WRITE, BIT(6), BIT(6)},
-	{0x0067,
-	 RTW_PWR_CUT_ALL_MSK,
-	 RTW_PWR_INTF_SDIO_MSK,
-	 RTW_PWR_ADDR_MAC,
-	 RTW_PWR_CMD_WRITE, BIT(2), 0},
-	{0x0046,
-	 RTW_PWR_CUT_ALL_MSK,
-	 RTW_PWR_INTF_SDIO_MSK,
-	 RTW_PWR_ADDR_MAC,
-	 RTW_PWR_CMD_WRITE, BIT(7), BIT(7)},
-	{0x0062,
-	 RTW_PWR_CUT_ALL_MSK,
-	 RTW_PWR_INTF_SDIO_MSK,
-	 RTW_PWR_ADDR_MAC,
-	 RTW_PWR_CMD_WRITE, BIT(4), BIT(4)},
 	{0x0081,
 	 RTW_PWR_CUT_ALL_MSK,
 	 RTW_PWR_INTF_ALL_MSK,
@@ -1870,41 +1805,11 @@ static const struct rtw_pwr_seq_cmd trans_cardemu_to_carddis_8822b[] = {
 	 RTW_PWR_INTF_USB_MSK | RTW_PWR_INTF_SDIO_MSK,
 	 RTW_PWR_ADDR_MAC,
 	 RTW_PWR_CMD_WRITE, BIT(3) | BIT(4), BIT(3)},
-	{0x0086,
-	 RTW_PWR_CUT_ALL_MSK,
-	 RTW_PWR_INTF_SDIO_MSK,
-	 RTW_PWR_ADDR_SDIO,
-	 RTW_PWR_CMD_WRITE, BIT(0), BIT(0)},
-	{0x0086,
-	 RTW_PWR_CUT_ALL_MSK,
-	 RTW_PWR_INTF_SDIO_MSK,
-	 RTW_PWR_ADDR_SDIO,
-	 RTW_PWR_CMD_POLLING, BIT(1), 0},
 	{0x0090,
 	 RTW_PWR_CUT_ALL_MSK,
 	 RTW_PWR_INTF_USB_MSK | RTW_PWR_INTF_PCI_MSK,
 	 RTW_PWR_ADDR_MAC,
 	 RTW_PWR_CMD_WRITE, BIT(1), 0},
-	{0x0044,
-	 RTW_PWR_CUT_ALL_MSK,
-	 RTW_PWR_INTF_SDIO_MSK,
-	 RTW_PWR_ADDR_SDIO,
-	 RTW_PWR_CMD_WRITE, 0xFF, 0},
-	{0x0040,
-	 RTW_PWR_CUT_ALL_MSK,
-	 RTW_PWR_INTF_SDIO_MSK,
-	 RTW_PWR_ADDR_SDIO,
-	 RTW_PWR_CMD_WRITE, 0xFF, 0x90},
-	{0x0041,
-	 RTW_PWR_CUT_ALL_MSK,
-	 RTW_PWR_INTF_SDIO_MSK,
-	 RTW_PWR_ADDR_SDIO,
-	 RTW_PWR_CMD_WRITE, 0xFF, 0x00},
-	{0x0042,
-	 RTW_PWR_CUT_ALL_MSK,
-	 RTW_PWR_INTF_SDIO_MSK,
-	 RTW_PWR_ADDR_SDIO,
-	 RTW_PWR_CMD_WRITE, 0xFF, 0x04},
 	{0xFFFF,
 	 RTW_PWR_CUT_ALL_MSK,
 	 RTW_PWR_INTF_ALL_MSK,
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822c.c b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
index 5e4cc57dbd7c..fe995bb4e43e 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8822c.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
@@ -3563,16 +3563,6 @@ static void rtw8822c_pwr_track(struct rtw_dev *rtwdev)
 }
 
 static const struct rtw_pwr_seq_cmd trans_carddis_to_cardemu_8822c[] = {
-	{0x0086,
-	 RTW_PWR_CUT_ALL_MSK,
-	 RTW_PWR_INTF_SDIO_MSK,
-	 RTW_PWR_ADDR_SDIO,
-	 RTW_PWR_CMD_WRITE, BIT(0), 0},
-	{0x0086,
-	 RTW_PWR_CUT_ALL_MSK,
-	 RTW_PWR_INTF_SDIO_MSK,
-	 RTW_PWR_ADDR_SDIO,
-	 RTW_PWR_CMD_POLLING, BIT(1), BIT(1)},
 	{0x002E,
 	 RTW_PWR_CUT_ALL_MSK,
 	 RTW_PWR_INTF_ALL_MSK,
@@ -3783,11 +3773,6 @@ static const struct rtw_pwr_seq_cmd trans_act_to_cardemu_8822c[] = {
 };
 
 static const struct rtw_pwr_seq_cmd trans_cardemu_to_carddis_8822c[] = {
-	{0x0005,
-	 RTW_PWR_CUT_ALL_MSK,
-	 RTW_PWR_INTF_SDIO_MSK,
-	 RTW_PWR_ADDR_MAC,
-	 RTW_PWR_CMD_WRITE, BIT(7), BIT(7)},
 	{0x0007,
 	 RTW_PWR_CUT_ALL_MSK,
 	 RTW_PWR_INTF_USB_MSK | RTW_PWR_INTF_SDIO_MSK,
@@ -3833,11 +3818,6 @@ static const struct rtw_pwr_seq_cmd trans_cardemu_to_carddis_8822c[] = {
 	 RTW_PWR_INTF_PCI_MSK,
 	 RTW_PWR_ADDR_MAC,
 	 RTW_PWR_CMD_WRITE, BIT(2), BIT(2)},
-	{0x0086,
-	 RTW_PWR_CUT_ALL_MSK,
-	 RTW_PWR_INTF_SDIO_MSK,
-	 RTW_PWR_ADDR_SDIO,
-	 RTW_PWR_CMD_WRITE, BIT(0), BIT(0)},
 	{0xFFFF,
 	 RTW_PWR_CUT_ALL_MSK,
 	 RTW_PWR_INTF_ALL_MSK,
-- 
cgit v1.2.3-59-g8ed1b


From 3aa42bae9c4d1641aeb36f1a8585cd1d506cf471 Mon Sep 17 00:00:00 2001
From: Pali Rohár <pali@kernel.org>
Date: Fri, 15 May 2020 09:59:24 +0200
Subject: mwifiex: Fix memory corruption in dump_station
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The mwifiex_cfg80211_dump_station() uses static variable for iterating
over a linked list of all associated stations (when the driver is in UAP
role). This has a race condition if .dump_station is called in parallel
for multiple interfaces. This corruption can be triggered by registering
multiple SSIDs and calling, in parallel for multiple interfaces
    iw dev <iface> station dump

[16750.719775] Unable to handle kernel paging request at virtual address dead000000000110
...
[16750.899173] Call trace:
[16750.901696]  mwifiex_cfg80211_dump_station+0x94/0x100 [mwifiex]
[16750.907824]  nl80211_dump_station+0xbc/0x278 [cfg80211]
[16750.913160]  netlink_dump+0xe8/0x320
[16750.916827]  netlink_recvmsg+0x1b4/0x338
[16750.920861]  ____sys_recvmsg+0x7c/0x2b0
[16750.924801]  ___sys_recvmsg+0x70/0x98
[16750.928564]  __sys_recvmsg+0x58/0xa0
[16750.932238]  __arm64_sys_recvmsg+0x28/0x30
[16750.936453]  el0_svc_common.constprop.3+0x90/0x158
[16750.941378]  do_el0_svc+0x74/0x90
[16750.944784]  el0_sync_handler+0x12c/0x1a8
[16750.948903]  el0_sync+0x114/0x140
[16750.952312] Code: f9400003 f907f423 eb02007f 54fffd60 (b9401060)
[16750.958583] ---[ end trace c8ad181c2f4b8576 ]---

This patch drops the use of the static iterator, and instead every time
the function is called iterates to the idx-th position of the
linked-list.

It would be better to convert the code not to use linked list for
associated stations storage (since the chip has a limited number of
associated stations anyway - it could just be an array). Such a change
may be proposed in the future. In the meantime this patch can backported
into stable kernels in this simple form.

Fixes: 8baca1a34d4c ("mwifiex: dump station support in uap mode")
Signed-off-by: Pali Rohár <pali@kernel.org>
Acked-by: Ganapathi Bhat <ganapathi.bhat@nxp.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200515075924.13841-1-pali@kernel.org
---
 drivers/net/wireless/marvell/mwifiex/cfg80211.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c
index 1566d2197906..12bfd653a405 100644
--- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c
+++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c
@@ -1496,7 +1496,8 @@ mwifiex_cfg80211_dump_station(struct wiphy *wiphy, struct net_device *dev,
 			      int idx, u8 *mac, struct station_info *sinfo)
 {
 	struct mwifiex_private *priv = mwifiex_netdev_get_priv(dev);
-	static struct mwifiex_sta_node *node;
+	struct mwifiex_sta_node *node;
+	int i;
 
 	if ((GET_BSS_ROLE(priv) == MWIFIEX_BSS_ROLE_STA) &&
 	    priv->media_connected && idx == 0) {
@@ -1506,13 +1507,10 @@ mwifiex_cfg80211_dump_station(struct wiphy *wiphy, struct net_device *dev,
 		mwifiex_send_cmd(priv, HOST_CMD_APCMD_STA_LIST,
 				 HostCmd_ACT_GEN_GET, 0, NULL, true);
 
-		if (node && (&node->list == &priv->sta_list)) {
-			node = NULL;
-			return -ENOENT;
-		}
-
-		node = list_prepare_entry(node, &priv->sta_list, list);
-		list_for_each_entry_continue(node, &priv->sta_list, list) {
+		i = 0;
+		list_for_each_entry(node, &priv->sta_list, list) {
+			if (i++ != idx)
+				continue;
 			ether_addr_copy(mac, node->mac_addr);
 			return mwifiex_dump_station_info(priv, node, sinfo);
 		}
-- 
cgit v1.2.3-59-g8ed1b


From b0a4bb7693be9cca2811ac468f7f7edaf975dbcf Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 15 May 2020 11:22:26 +0100
Subject: rtlwifi: rtl8192ee: remove redundant for-loop

The for-loop seems to be redundant, the assignments for indexes
0..2 are being over-written by the last index 3 in the loop. Remove
the loop and use index 3 instead.

Addresses-Coverity: ("Unused value")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200515102226.29819-1-colin.king@canonical.com
---
 drivers/net/wireless/realtek/rtlwifi/rtl8192ee/phy.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/phy.c b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/phy.c
index 6dba576aa81e..bb291b951f4d 100644
--- a/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/phy.c
+++ b/drivers/net/wireless/realtek/rtlwifi/rtl8192ee/phy.c
@@ -2866,14 +2866,12 @@ void rtl92ee_phy_iq_calibrate(struct ieee80211_hw *hw, bool b_recovery)
 		}
 	}
 
-	for (i = 0; i < 4; i++) {
-		reg_e94 = result[i][0];
-		reg_e9c = result[i][1];
-		reg_ea4 = result[i][2];
-		reg_eb4 = result[i][4];
-		reg_ebc = result[i][5];
-		reg_ec4 = result[i][6];
-	}
+	reg_e94 = result[3][0];
+	reg_e9c = result[3][1];
+	reg_ea4 = result[3][2];
+	reg_eb4 = result[3][4];
+	reg_ebc = result[3][5];
+	reg_ec4 = result[3][6];
 
 	if (final_candidate != 0xff) {
 		reg_e94 = result[final_candidate][0];
-- 
cgit v1.2.3-59-g8ed1b


From 356d411c26735bcc62718c4c9181014255dc302d Mon Sep 17 00:00:00 2001
From: Raed Salem <raeds@mellanox.com>
Date: Fri, 15 May 2020 15:16:52 -0700
Subject: net/mlx5: Cleanup mlx5_ifc_fte_match_set_misc2_bits

Remove the "metadata_reg_b" field and all uses of this field in code
to match the device specification. As this field is not in use in SW
steering it is safe to remove it.

Signed-off-by: Raed Salem <raeds@mellanox.com>
Reviewed-by: Alex Vesker <valex@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c   | 1 -
 drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h | 3 +--
 include/linux/mlx5/mlx5_ifc.h                               | 4 +---
 3 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
index c0e3a1e7389d..78c884911ceb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
@@ -961,7 +961,6 @@ static void dr_ste_copy_mask_misc2(char *mask, struct mlx5dr_match_misc2 *spec)
 	spec->metadata_reg_c_1 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_1);
 	spec->metadata_reg_c_0 = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_c_0);
 	spec->metadata_reg_a = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_a);
-	spec->metadata_reg_b = MLX5_GET(fte_match_set_misc2, mask, metadata_reg_b);
 }
 
 static void dr_ste_copy_mask_misc3(char *mask, struct mlx5dr_match_misc3 *spec)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
index 984783238baa..71fa01ce348a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
@@ -554,8 +554,7 @@ struct mlx5dr_match_misc2 {
 	u32 metadata_reg_c_1;			/* metadata_reg_c_1 */
 	u32 metadata_reg_c_0;			/* metadata_reg_c_0 */
 	u32 metadata_reg_a;			/* metadata_reg_a */
-	u32 metadata_reg_b;			/* metadata_reg_b */
-	u8 reserved_auto2[8];
+	u8 reserved_auto2[12];
 };
 
 struct mlx5dr_match_misc3 {
diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index c9dd6e99ad56..fd8da4875ea0 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -584,9 +584,7 @@ struct mlx5_ifc_fte_match_set_misc2_bits {
 
 	u8         metadata_reg_a[0x20];
 
-	u8         metadata_reg_b[0x20];
-
-	u8         reserved_at_1c0[0x40];
+	u8         reserved_at_1a0[0x60];
 };
 
 struct mlx5_ifc_fte_match_set_misc3_bits {
-- 
cgit v1.2.3-59-g8ed1b


From 555af0c3fa0b632be73c241cc932129af4b70d27 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@mellanox.com>
Date: Fri, 15 May 2020 15:16:53 -0700
Subject: net/mlx5: Move iseg access helper routines close to mlx5_core driver

Only mlx5_core driver handles fw initialization check and command
interface revision check.
Hence move them inside the mlx5_core driver where it is used.
This avoid exposing these helpers to all mlx5 drivers.

Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c  |  5 +++++
 drivers/net/ethernet/mellanox/mlx5/core/main.c |  5 +++++
 include/linux/mlx5/driver.h                    | 10 ----------
 3 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index 34cba97f7bf4..e6567d5570ba 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -1890,6 +1890,11 @@ static void free_cmd_page(struct mlx5_core_dev *dev, struct mlx5_cmd *cmd)
 			  cmd->alloc_dma);
 }
 
+static u16 cmdif_rev(struct mlx5_core_dev *dev)
+{
+	return ioread32be(&dev->iseg->cmdif_rev_fw_sub) >> 16;
+}
+
 int mlx5_cmd_init(struct mlx5_core_dev *dev)
 {
 	int size = sizeof(struct mlx5_cmd_prot_block);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 061b69ea9cc4..8a375e3ed5c1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -177,6 +177,11 @@ static struct mlx5_profile profile[] = {
 #define FW_PRE_INIT_TIMEOUT_MILI	120000
 #define FW_INIT_WARN_MESSAGE_INTERVAL	20000
 
+static int fw_initializing(struct mlx5_core_dev *dev)
+{
+	return ioread32be(&dev->iseg->initializing) >> 31;
+}
+
 static int wait_fw_init(struct mlx5_core_dev *dev, u32 max_wait_mili,
 			u32 warn_time_mili)
 {
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 24e04901f92e..a988eb405aa6 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -823,11 +823,6 @@ static inline u16 fw_rev_sub(struct mlx5_core_dev *dev)
 	return ioread32be(&dev->iseg->cmdif_rev_fw_sub) & 0xffff;
 }
 
-static inline u16 cmdif_rev(struct mlx5_core_dev *dev)
-{
-	return ioread32be(&dev->iseg->cmdif_rev_fw_sub) >> 16;
-}
-
 static inline u32 mlx5_base_mkey(const u32 key)
 {
 	return key & 0xffffff00u;
@@ -1012,11 +1007,6 @@ int mlx5_core_roce_gid_set(struct mlx5_core_dev *dev, unsigned int index,
 			   u8 roce_version, u8 roce_l3_type, const u8 *gid,
 			   const u8 *mac, bool vlan, u16 vlan_id, u8 port_num);
 
-static inline int fw_initializing(struct mlx5_core_dev *dev)
-{
-	return ioread32be(&dev->iseg->initializing) >> 31;
-}
-
 static inline u32 mlx5_mkey_to_idx(u32 mkey)
 {
 	return mkey >> 8;
-- 
cgit v1.2.3-59-g8ed1b


From ecf814e0e19b5616048391eac359a50c1e9d5174 Mon Sep 17 00:00:00 2001
From: Michael Guralnik <michaelgur@mellanox.com>
Date: Fri, 15 May 2020 15:16:54 -0700
Subject: net/mlx5: Add support for RDMA TX FT headers modifying

Support adding header modifying actions to the RDMA TX flow table.

Signed-off-by: Michael Guralnik <michaelgur@mellanox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
Reviewed-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/infiniband/hw/mlx5/flow.c                | 5 ++++-
 drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c | 4 ++++
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/drivers/infiniband/hw/mlx5/flow.c b/drivers/infiniband/hw/mlx5/flow.c
index 69cb7e6e8955..3a0601c2052c 100644
--- a/drivers/infiniband/hw/mlx5/flow.c
+++ b/drivers/infiniband/hw/mlx5/flow.c
@@ -404,7 +404,10 @@ static bool mlx5_ib_modify_header_supported(struct mlx5_ib_dev *dev)
 {
 	return MLX5_CAP_FLOWTABLE_NIC_RX(dev->mdev,
 					 max_modify_header_actions) ||
-	       MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev, max_modify_header_actions);
+	       MLX5_CAP_FLOWTABLE_NIC_TX(dev->mdev,
+					 max_modify_header_actions) ||
+	       MLX5_CAP_FLOWTABLE_RDMA_TX(dev->mdev,
+					 max_modify_header_actions);
 }
 
 static int UVERBS_HANDLER(MLX5_IB_METHOD_FLOW_ACTION_CREATE_MODIFY_HEADER)(
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
index 1a8e826ac86b..465a1076a477 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fs_cmd.c
@@ -781,6 +781,10 @@ static int mlx5_cmd_modify_header_alloc(struct mlx5_flow_root_namespace *ns,
 		max_actions = MLX5_CAP_ESW_INGRESS_ACL(dev, max_modify_header_actions);
 		table_type = FS_FT_ESW_INGRESS_ACL;
 		break;
+	case MLX5_FLOW_NAMESPACE_RDMA_TX:
+		max_actions = MLX5_CAP_FLOWTABLE_RDMA_TX(dev, max_modify_header_actions);
+		table_type = FS_FT_RDMA_TX;
+		break;
 	default:
 		return -EOPNOTSUPP;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 8e8204a4f3e32ffb1804196f2aa252684ee663e8 Mon Sep 17 00:00:00 2001
From: Sasha Neftin <sasha.neftin@intel.com>
Date: Sun, 22 Mar 2020 09:58:46 +0200
Subject: igc: Add ECN support for TSO

Align with other Intel drivers and add ECN support for TSO.

Add NETIF_F_TSO_ECN flag

Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 9d5f8287c704..7556fcdf1fd7 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -4838,6 +4838,7 @@ static int igc_probe(struct pci_dev *pdev,
 	netdev->features |= NETIF_F_SG;
 	netdev->features |= NETIF_F_TSO;
 	netdev->features |= NETIF_F_TSO6;
+	netdev->features |= NETIF_F_TSO_ECN;
 	netdev->features |= NETIF_F_RXCSUM;
 	netdev->features |= NETIF_F_HW_CSUM;
 	netdev->features |= NETIF_F_SCTP_CRC;
-- 
cgit v1.2.3-59-g8ed1b


From 25f06eff7582ae41c7fdaa30a4a2348122f7a907 Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Tue, 24 Mar 2020 17:38:15 -0700
Subject: igc: Use netdev log helpers in igc_main.c

In igc_main.c we print log messages using both dev_* and netdev_*
helpers, generating inconsistent output. Since this is a network device
driver, we should preferably use netdev_* helpers because they append
the interface name to the message, helping making sense out of the logs.

This patch converts all dev_* calls to netdev_*. There is only two
exceptions:
  1) calls wihtin igc_probe (net_device has not been registered yet)
  2) calls in igc_init_module (module initialization).

It also takes this opportunity to improve some messages.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_main.c | 111 ++++++++++++++----------------
 1 file changed, 50 insertions(+), 61 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 7556fcdf1fd7..add31dc3881d 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -76,7 +76,7 @@ static void igc_power_down_link(struct igc_adapter *adapter)
 
 void igc_reset(struct igc_adapter *adapter)
 {
-	struct pci_dev *pdev = adapter->pdev;
+	struct net_device *dev = adapter->netdev;
 	struct igc_hw *hw = &adapter->hw;
 	struct igc_fc_info *fc = &hw->fc;
 	u32 pba, hwm;
@@ -103,7 +103,7 @@ void igc_reset(struct igc_adapter *adapter)
 	hw->mac.ops.reset_hw(hw);
 
 	if (hw->mac.ops.init_hw(hw))
-		dev_err(&pdev->dev, "Hardware Error\n");
+		netdev_err(dev, "Error on hardware initialization\n");
 
 	if (!netif_running(adapter->netdev))
 		igc_power_down_link(adapter);
@@ -288,6 +288,7 @@ static void igc_clean_all_tx_rings(struct igc_adapter *adapter)
  */
 int igc_setup_tx_resources(struct igc_ring *tx_ring)
 {
+	struct net_device *ndev = tx_ring->netdev;
 	struct device *dev = tx_ring->dev;
 	int size = 0;
 
@@ -313,8 +314,7 @@ int igc_setup_tx_resources(struct igc_ring *tx_ring)
 
 err:
 	vfree(tx_ring->tx_buffer_info);
-	dev_err(dev,
-		"Unable to allocate memory for the transmit descriptor ring\n");
+	netdev_err(ndev, "Unable to allocate memory for Tx descriptor ring\n");
 	return -ENOMEM;
 }
 
@@ -326,14 +326,13 @@ err:
  */
 static int igc_setup_all_tx_resources(struct igc_adapter *adapter)
 {
-	struct pci_dev *pdev = adapter->pdev;
+	struct net_device *dev = adapter->netdev;
 	int i, err = 0;
 
 	for (i = 0; i < adapter->num_tx_queues; i++) {
 		err = igc_setup_tx_resources(adapter->tx_ring[i]);
 		if (err) {
-			dev_err(&pdev->dev,
-				"Allocation for Tx Queue %u failed\n", i);
+			netdev_err(dev, "Error on Tx queue %u setup\n", i);
 			for (i--; i >= 0; i--)
 				igc_free_tx_resources(adapter->tx_ring[i]);
 			break;
@@ -444,6 +443,7 @@ static void igc_free_all_rx_resources(struct igc_adapter *adapter)
  */
 int igc_setup_rx_resources(struct igc_ring *rx_ring)
 {
+	struct net_device *ndev = rx_ring->netdev;
 	struct device *dev = rx_ring->dev;
 	int size, desc_len;
 
@@ -473,8 +473,7 @@ int igc_setup_rx_resources(struct igc_ring *rx_ring)
 err:
 	vfree(rx_ring->rx_buffer_info);
 	rx_ring->rx_buffer_info = NULL;
-	dev_err(dev,
-		"Unable to allocate memory for the receive descriptor ring\n");
+	netdev_err(ndev, "Unable to allocate memory for Rx descriptor ring\n");
 	return -ENOMEM;
 }
 
@@ -487,14 +486,13 @@ err:
  */
 static int igc_setup_all_rx_resources(struct igc_adapter *adapter)
 {
-	struct pci_dev *pdev = adapter->pdev;
+	struct net_device *dev = adapter->netdev;
 	int i, err = 0;
 
 	for (i = 0; i < adapter->num_rx_queues; i++) {
 		err = igc_setup_rx_resources(adapter->rx_ring[i]);
 		if (err) {
-			dev_err(&pdev->dev,
-				"Allocation for Rx Queue %u failed\n", i);
+			netdev_err(dev, "Error on Rx queue %u setup\n", i);
 			for (i--; i >= 0; i--)
 				igc_free_rx_resources(adapter->rx_ring[i]);
 			break;
@@ -1196,7 +1194,7 @@ static int igc_tx_map(struct igc_ring *tx_ring,
 
 	return 0;
 dma_error:
-	dev_err(tx_ring->dev, "TX DMA map failed\n");
+	netdev_err(tx_ring->netdev, "TX DMA map failed\n");
 	tx_buffer = &tx_ring->tx_buffer_info[i];
 
 	/* clear dma mappings for failed tx_buffer_info map */
@@ -1459,8 +1457,8 @@ static void igc_rx_checksum(struct igc_ring *ring,
 				      IGC_RXD_STAT_UDPCS))
 		skb->ip_summed = CHECKSUM_UNNECESSARY;
 
-	dev_dbg(ring->dev, "cksum success: bits %08X\n",
-		le32_to_cpu(rx_desc->wb.upper.status_error));
+	netdev_dbg(ring->netdev, "cksum success: bits %08X\n",
+		   le32_to_cpu(rx_desc->wb.upper.status_error));
 }
 
 static inline void igc_rx_hash(struct igc_ring *ring,
@@ -2122,27 +2120,27 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget)
 		    (adapter->tx_timeout_factor * HZ)) &&
 		    !(rd32(IGC_STATUS) & IGC_STATUS_TXOFF)) {
 			/* detected Tx unit hang */
-			dev_err(tx_ring->dev,
-				"Detected Tx Unit Hang\n"
-				"  Tx Queue             <%d>\n"
-				"  TDH                  <%x>\n"
-				"  TDT                  <%x>\n"
-				"  next_to_use          <%x>\n"
-				"  next_to_clean        <%x>\n"
-				"buffer_info[next_to_clean]\n"
-				"  time_stamp           <%lx>\n"
-				"  next_to_watch        <%p>\n"
-				"  jiffies              <%lx>\n"
-				"  desc.status          <%x>\n",
-				tx_ring->queue_index,
-				rd32(IGC_TDH(tx_ring->reg_idx)),
-				readl(tx_ring->tail),
-				tx_ring->next_to_use,
-				tx_ring->next_to_clean,
-				tx_buffer->time_stamp,
-				tx_buffer->next_to_watch,
-				jiffies,
-				tx_buffer->next_to_watch->wb.status);
+			netdev_err(tx_ring->netdev,
+				   "Detected Tx Unit Hang\n"
+				   "  Tx Queue             <%d>\n"
+				   "  TDH                  <%x>\n"
+				   "  TDT                  <%x>\n"
+				   "  next_to_use          <%x>\n"
+				   "  next_to_clean        <%x>\n"
+				   "buffer_info[next_to_clean]\n"
+				   "  time_stamp           <%lx>\n"
+				   "  next_to_watch        <%p>\n"
+				   "  jiffies              <%lx>\n"
+				   "  desc.status          <%x>\n",
+				   tx_ring->queue_index,
+				   rd32(IGC_TDH(tx_ring->reg_idx)),
+				   readl(tx_ring->tail),
+				   tx_ring->next_to_use,
+				   tx_ring->next_to_clean,
+				   tx_buffer->time_stamp,
+				   tx_buffer->next_to_watch,
+				   jiffies,
+				   tx_buffer->next_to_watch->wb.status);
 			netif_stop_subqueue(tx_ring->netdev,
 					    tx_ring->queue_index);
 
@@ -3238,14 +3236,14 @@ err_out:
  */
 static int igc_init_interrupt_scheme(struct igc_adapter *adapter, bool msix)
 {
-	struct pci_dev *pdev = adapter->pdev;
+	struct net_device *dev = adapter->netdev;
 	int err = 0;
 
 	igc_set_interrupt_capability(adapter, msix);
 
 	err = igc_alloc_q_vectors(adapter);
 	if (err) {
-		dev_err(&pdev->dev, "Unable to allocate memory for vectors\n");
+		netdev_err(dev, "Unable to allocate memory for vectors\n");
 		goto err_alloc_q_vectors;
 	}
 
@@ -3305,7 +3303,7 @@ static int igc_sw_init(struct igc_adapter *adapter)
 
 	/* This call may decrease the number of queues */
 	if (igc_init_interrupt_scheme(adapter, true)) {
-		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
+		netdev_err(netdev, "Unable to allocate memory for queues\n");
 		return -ENOMEM;
 	}
 
@@ -3648,8 +3646,7 @@ static int igc_change_mtu(struct net_device *netdev, int new_mtu)
 	if (netif_running(netdev))
 		igc_down(adapter);
 
-	netdev_dbg(netdev, "changing MTU from %d to %d\n",
-		   netdev->mtu, new_mtu);
+	netdev_dbg(netdev, "changing MTU from %d to %d\n", netdev->mtu, new_mtu);
 	netdev->mtu = new_mtu;
 
 	if (netif_running(netdev))
@@ -4006,8 +4003,7 @@ static void igc_watchdog_task(struct work_struct *work)
 			ctrl = rd32(IGC_CTRL);
 			/* Link status message must follow this format */
 			netdev_info(netdev,
-				    "igc: %s NIC Link is Up %d Mbps %s Duplex, Flow Control: %s\n",
-				    netdev->name,
+				    "NIC Link is Up %d Mbps %s Duplex, Flow Control: %s\n",
 				    adapter->link_speed,
 				    adapter->link_duplex == FULL_DUPLEX ?
 				    "Full" : "Half",
@@ -4045,10 +4041,10 @@ retry_read_status:
 					retry_count--;
 					goto retry_read_status;
 				} else if (!retry_count) {
-					dev_err(&adapter->pdev->dev, "exceed max 2 second\n");
+					netdev_err(netdev, "exceed max 2 second\n");
 				}
 			} else {
-				dev_err(&adapter->pdev->dev, "read 1000Base-T Status Reg\n");
+				netdev_err(netdev, "read 1000Base-T Status Reg\n");
 			}
 no_wait:
 			netif_carrier_on(netdev);
@@ -4064,8 +4060,7 @@ no_wait:
 			adapter->link_duplex = 0;
 
 			/* Links status message must follow this format */
-			netdev_info(netdev, "igc: %s NIC Link is Down\n",
-				    netdev->name);
+			netdev_info(netdev, "NIC Link is Down\n");
 			netif_carrier_off(netdev);
 
 			/* link state has changed, schedule phy info update */
@@ -4283,8 +4278,7 @@ static int igc_request_irq(struct igc_adapter *adapter)
 			  netdev->name, adapter);
 
 	if (err)
-		dev_err(&pdev->dev, "Error %d getting interrupt\n",
-			err);
+		netdev_err(netdev, "Error %d getting interrupt\n", err);
 
 request_done:
 	return err;
@@ -4686,7 +4680,6 @@ u32 igc_rd32(struct igc_hw *hw, u32 reg)
 
 int igc_set_spd_dplx(struct igc_adapter *adapter, u32 spd, u8 dplx)
 {
-	struct pci_dev *pdev = adapter->pdev;
 	struct igc_mac_info *mac = &adapter->hw.mac;
 
 	mac->autoneg = 0;
@@ -4731,7 +4724,7 @@ int igc_set_spd_dplx(struct igc_adapter *adapter, u32 spd, u8 dplx)
 	return 0;
 
 err_inval:
-	dev_err(&pdev->dev, "Unsupported Speed/Duplex configuration\n");
+	netdev_err(adapter->netdev, "Unsupported Speed/Duplex configuration\n");
 	return -EINVAL;
 }
 
@@ -4877,8 +4870,7 @@ static int igc_probe(struct pci_dev *pdev,
 
 	if (igc_get_flash_presence_i225(hw)) {
 		if (hw->nvm.ops.validate(hw) < 0) {
-			dev_err(&pdev->dev,
-				"The NVM Checksum Is Not Valid\n");
+			dev_err(&pdev->dev, "The NVM Checksum Is Not Valid\n");
 			err = -EIO;
 			goto err_eeprom;
 		}
@@ -5141,8 +5133,7 @@ static int __maybe_unused igc_resume(struct device *dev)
 		return -ENODEV;
 	err = pci_enable_device_mem(pdev);
 	if (err) {
-		dev_err(&pdev->dev,
-			"igc: Cannot enable PCI device from suspend\n");
+		netdev_err(netdev, "Cannot enable PCI device from suspend\n");
 		return err;
 	}
 	pci_set_master(pdev);
@@ -5151,7 +5142,7 @@ static int __maybe_unused igc_resume(struct device *dev)
 	pci_enable_wake(pdev, PCI_D3cold, 0);
 
 	if (igc_init_interrupt_scheme(adapter, true)) {
-		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
+		netdev_err(netdev, "Unable to allocate memory for queues\n");
 		return -ENOMEM;
 	}
 
@@ -5255,8 +5246,7 @@ static pci_ers_result_t igc_io_slot_reset(struct pci_dev *pdev)
 	pci_ers_result_t result;
 
 	if (pci_enable_device_mem(pdev)) {
-		dev_err(&pdev->dev,
-			"Could not re-enable PCI device after reset.\n");
+		netdev_err(netdev, "Could not re-enable PCI device after reset\n");
 		result = PCI_ERS_RESULT_DISCONNECT;
 	} else {
 		pci_set_master(pdev);
@@ -5295,7 +5285,7 @@ static void igc_io_resume(struct pci_dev *pdev)
 	rtnl_lock();
 	if (netif_running(netdev)) {
 		if (igc_open(netdev)) {
-			dev_err(&pdev->dev, "igc_open failed after reset\n");
+			netdev_err(netdev, "igc_open failed after reset\n");
 			return;
 		}
 	}
@@ -5342,7 +5332,6 @@ static struct pci_driver igc_driver = {
 int igc_reinit_queues(struct igc_adapter *adapter)
 {
 	struct net_device *netdev = adapter->netdev;
-	struct pci_dev *pdev = adapter->pdev;
 	int err = 0;
 
 	if (netif_running(netdev))
@@ -5351,7 +5340,7 @@ int igc_reinit_queues(struct igc_adapter *adapter)
 	igc_reset_interrupt_capability(adapter);
 
 	if (igc_init_interrupt_scheme(adapter, true)) {
-		dev_err(&pdev->dev, "Unable to allocate memory for queues\n");
+		netdev_err(netdev, "Unable to allocate memory for queues\n");
 		return -ENOMEM;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From f026d8ca2904622298c5387c384dce04f119e87a Mon Sep 17 00:00:00 2001
From: Vitaly Lifshits <vitaly.lifshits@intel.com>
Date: Tue, 5 May 2020 17:06:38 +0300
Subject: igc: add support to eeprom, registers and link self-tests

Introduced igc_diag.c and igc_diag.h, these files have the
diagnostics functionality of igc driver. For the time being
these files are being used by ethtool self-test callbacks.
Which mean that eeprom, registers and link self-tests for
ethtool were implemented.

Signed-off-by: Vitaly Lifshits <vitaly.lifshits@intel.com>
Reported-by: kbuild test robot <lkp@intel.com>
Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/Makefile      |   2 +-
 drivers/net/ethernet/intel/igc/igc.h         |   4 +
 drivers/net/ethernet/intel/igc/igc_diag.c    | 186 +++++++++++++++++++++++++++
 drivers/net/ethernet/intel/igc/igc_diag.h    |  30 +++++
 drivers/net/ethernet/intel/igc/igc_ethtool.c |  60 +++++++++
 drivers/net/ethernet/intel/igc/igc_main.c    |   4 +-
 drivers/net/ethernet/intel/igc/igc_regs.h    |   2 +
 7 files changed, 285 insertions(+), 3 deletions(-)
 create mode 100644 drivers/net/ethernet/intel/igc/igc_diag.c
 create mode 100644 drivers/net/ethernet/intel/igc/igc_diag.h

diff --git a/drivers/net/ethernet/intel/igc/Makefile b/drivers/net/ethernet/intel/igc/Makefile
index 3652f211f351..1c3051db9085 100644
--- a/drivers/net/ethernet/intel/igc/Makefile
+++ b/drivers/net/ethernet/intel/igc/Makefile
@@ -8,4 +8,4 @@
 obj-$(CONFIG_IGC) += igc.o
 
 igc-objs := igc_main.o igc_mac.o igc_i225.o igc_base.o igc_nvm.o igc_phy.o \
-igc_ethtool.o igc_ptp.o igc_dump.o igc_tsn.o
+igc_diag.o igc_ethtool.o igc_ptp.o igc_dump.o igc_tsn.o
diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index 8ddc39482a8e..661dc8875f3f 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -198,6 +198,8 @@ struct igc_adapter {
 	unsigned long link_check_timeout;
 	struct igc_info ei;
 
+	u32 test_icr;
+
 	struct ptp_clock *ptp_clock;
 	struct ptp_clock_info ptp_caps;
 	struct work_struct ptp_tx_work;
@@ -215,6 +217,8 @@ struct igc_adapter {
 
 void igc_up(struct igc_adapter *adapter);
 void igc_down(struct igc_adapter *adapter);
+int igc_open(struct net_device *netdev);
+int igc_close(struct net_device *netdev);
 int igc_setup_tx_resources(struct igc_ring *ring);
 int igc_setup_rx_resources(struct igc_ring *ring);
 void igc_free_tx_resources(struct igc_ring *ring);
diff --git a/drivers/net/ethernet/intel/igc/igc_diag.c b/drivers/net/ethernet/intel/igc/igc_diag.c
new file mode 100644
index 000000000000..cc621970c0cd
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_diag.c
@@ -0,0 +1,186 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c)  2020 Intel Corporation */
+
+#include "igc.h"
+#include "igc_diag.h"
+
+static struct igc_reg_test reg_test[] = {
+	{ IGC_FCAL,	1,	PATTERN_TEST,	0xFFFFFFFF,	0xFFFFFFFF },
+	{ IGC_FCAH,	1,	PATTERN_TEST,	0x0000FFFF,	0xFFFFFFFF },
+	{ IGC_FCT,	1,	PATTERN_TEST,	0x0000FFFF,	0xFFFFFFFF },
+	{ IGC_RDBAH(0), 4,	PATTERN_TEST,	0xFFFFFFFF,	0xFFFFFFFF },
+	{ IGC_RDBAL(0),	4,	PATTERN_TEST,	0xFFFFFF80,	0xFFFFFF80 },
+	{ IGC_RDLEN(0),	4,	PATTERN_TEST,	0x000FFF80,	0x000FFFFF },
+	{ IGC_RDT(0),	4,	PATTERN_TEST,	0x0000FFFF,	0x0000FFFF },
+	{ IGC_FCRTH,	1,	PATTERN_TEST,	0x0003FFF0,	0x0003FFF0 },
+	{ IGC_FCTTV,	1,	PATTERN_TEST,	0x0000FFFF,	0x0000FFFF },
+	{ IGC_TIPG,	1,	PATTERN_TEST,	0x3FFFFFFF,	0x3FFFFFFF },
+	{ IGC_TDBAH(0),	4,	PATTERN_TEST,	0xFFFFFFFF,	0xFFFFFFFF },
+	{ IGC_TDBAL(0),	4,	PATTERN_TEST,	0xFFFFFF80,	0xFFFFFF80 },
+	{ IGC_TDLEN(0),	4,	PATTERN_TEST,	0x000FFF80,	0x000FFFFF },
+	{ IGC_TDT(0),	4,	PATTERN_TEST,	0x0000FFFF,	0x0000FFFF },
+	{ IGC_RCTL,	1,	SET_READ_TEST,	0xFFFFFFFF,	0x00000000 },
+	{ IGC_RCTL,	1,	SET_READ_TEST,	0x04CFB2FE,	0x003FFFFB },
+	{ IGC_RCTL,	1,	SET_READ_TEST,	0x04CFB2FE,	0xFFFFFFFF },
+	{ IGC_TCTL,	1,	SET_READ_TEST,	0xFFFFFFFF,	0x00000000 },
+	{ IGC_RA,	16,	TABLE64_TEST_LO,
+						0xFFFFFFFF,	0xFFFFFFFF },
+	{ IGC_RA,	16,	TABLE64_TEST_HI,
+						0x900FFFFF,	0xFFFFFFFF },
+	{ IGC_MTA,	128,	TABLE32_TEST,
+						0xFFFFFFFF,	0xFFFFFFFF },
+	{ 0, 0, 0, 0}
+};
+
+static bool reg_pattern_test(struct igc_adapter *adapter, u64 *data, int reg,
+			     u32 mask, u32 write)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u32 pat, val, before;
+	static const u32 test_pattern[] = {
+		0x5A5A5A5A, 0xA5A5A5A5, 0x00000000, 0xFFFFFFFF
+	};
+
+	for (pat = 0; pat < ARRAY_SIZE(test_pattern); pat++) {
+		before = rd32(reg);
+		wr32(reg, test_pattern[pat] & write);
+		val = rd32(reg);
+		if (val != (test_pattern[pat] & write & mask)) {
+			netdev_err(adapter->netdev,
+				   "pattern test reg %04X failed: got 0x%08X expected 0x%08X",
+				   reg, val, test_pattern[pat] & write & mask);
+			*data = reg;
+			wr32(reg, before);
+			return false;
+		}
+		wr32(reg, before);
+	}
+	return true;
+}
+
+static bool reg_set_and_check(struct igc_adapter *adapter, u64 *data, int reg,
+			      u32 mask, u32 write)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u32 val, before;
+
+	before = rd32(reg);
+	wr32(reg, write & mask);
+	val = rd32(reg);
+	if ((write & mask) != (val & mask)) {
+		netdev_err(adapter->netdev,
+			   "set/check reg %04X test failed: got 0x%08X expected 0x%08X",
+			   reg, (val & mask), (write & mask));
+		*data = reg;
+		wr32(reg, before);
+		return false;
+	}
+	wr32(reg, before);
+	return true;
+}
+
+bool igc_reg_test(struct igc_adapter *adapter, u64 *data)
+{
+	struct igc_reg_test *test = reg_test;
+	struct igc_hw *hw = &adapter->hw;
+	u32 value, before, after;
+	u32 i, toggle, b = false;
+
+	/* Because the status register is such a special case,
+	 * we handle it separately from the rest of the register
+	 * tests.  Some bits are read-only, some toggle, and some
+	 * are writeable.
+	 */
+	toggle = 0x6800D3;
+	before = rd32(IGC_STATUS);
+	value = before & toggle;
+	wr32(IGC_STATUS, toggle);
+	after = rd32(IGC_STATUS) & toggle;
+	if (value != after) {
+		netdev_err(adapter->netdev,
+			   "failed STATUS register test got: 0x%08X expected: 0x%08X",
+			   after, value);
+		*data = 1;
+		return false;
+	}
+	/* restore previous status */
+	wr32(IGC_STATUS, before);
+
+	/* Perform the remainder of the register test, looping through
+	 * the test table until we either fail or reach the null entry.
+	 */
+	while (test->reg) {
+		for (i = 0; i < test->array_len; i++) {
+			switch (test->test_type) {
+			case PATTERN_TEST:
+				b = reg_pattern_test(adapter, data,
+						     test->reg + (i * 0x40),
+						     test->mask,
+						     test->write);
+				break;
+			case SET_READ_TEST:
+				b = reg_set_and_check(adapter, data,
+						      test->reg + (i * 0x40),
+						      test->mask,
+						      test->write);
+				break;
+			case TABLE64_TEST_LO:
+				b = reg_pattern_test(adapter, data,
+						     test->reg + (i * 8),
+						     test->mask,
+						     test->write);
+				break;
+			case TABLE64_TEST_HI:
+				b = reg_pattern_test(adapter, data,
+						     test->reg + 4 + (i * 8),
+						     test->mask,
+						     test->write);
+				break;
+			case TABLE32_TEST:
+				b = reg_pattern_test(adapter, data,
+						     test->reg + (i * 4),
+						     test->mask,
+						     test->write);
+				break;
+			}
+			if (!b)
+				return false;
+		}
+		test++;
+	}
+	*data = 0;
+	return true;
+}
+
+bool igc_eeprom_test(struct igc_adapter *adapter, u64 *data)
+{
+	struct igc_hw *hw = &adapter->hw;
+
+	*data = 0;
+
+	if (hw->nvm.ops.validate(hw) != IGC_SUCCESS) {
+		*data = 1;
+		return false;
+	}
+
+	return true;
+}
+
+bool igc_link_test(struct igc_adapter *adapter, u64 *data)
+{
+	bool link_up;
+
+	*data = 0;
+
+	/* add delay to give enough time for autonegotioation to finish */
+	if (adapter->hw.mac.autoneg)
+		ssleep(5);
+
+	link_up = igc_has_link(adapter);
+	if (!link_up) {
+		*data = 1;
+		return false;
+	}
+
+	return true;
+}
diff --git a/drivers/net/ethernet/intel/igc/igc_diag.h b/drivers/net/ethernet/intel/igc/igc_diag.h
new file mode 100644
index 000000000000..600658e33bec
--- /dev/null
+++ b/drivers/net/ethernet/intel/igc/igc_diag.h
@@ -0,0 +1,30 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (c)  2020 Intel Corporation */
+
+bool igc_reg_test(struct igc_adapter *adapter, u64 *data);
+bool igc_eeprom_test(struct igc_adapter *adapter, u64 *data);
+bool igc_link_test(struct igc_adapter *adapter, u64 *data);
+
+struct igc_reg_test {
+	u16 reg;
+	u8 array_len;
+	u8 test_type;
+	u32 mask;
+	u32 write;
+};
+
+/* In the hardware, registers are laid out either singly, in arrays
+ * spaced 0x40 bytes apart, or in contiguous tables.  We assume
+ * most tests take place on arrays or single registers (handled
+ * as a single-element array) and special-case the tables.
+ * Table tests are always pattern tests.
+ *
+ * We also make provision for some required setup steps by specifying
+ * registers to be written without any read-back testing.
+ */
+
+#define PATTERN_TEST	1
+#define SET_READ_TEST	2
+#define TABLE32_TEST	3
+#define TABLE64_TEST_LO	4
+#define TABLE64_TEST_HI	5
diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index 0a8c4a7412a4..9fc250cdf88c 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -6,6 +6,7 @@
 #include <linux/pm_runtime.h>
 
 #include "igc.h"
+#include "igc_diag.h"
 
 /* forward declaration */
 struct igc_stats {
@@ -1896,6 +1897,64 @@ static int igc_set_link_ksettings(struct net_device *netdev,
 	return 0;
 }
 
+static void igc_diag_test(struct net_device *netdev,
+			  struct ethtool_test *eth_test, u64 *data)
+{
+	struct igc_adapter *adapter = netdev_priv(netdev);
+	bool if_running = netif_running(netdev);
+
+	if (eth_test->flags == ETH_TEST_FL_OFFLINE) {
+		netdev_info(adapter->netdev, "offline testing starting");
+		set_bit(__IGC_TESTING, &adapter->state);
+
+		/* Link test performed before hardware reset so autoneg doesn't
+		 * interfere with test result
+		 */
+		if (!igc_link_test(adapter, &data[TEST_LINK]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		if (if_running)
+			igc_close(netdev);
+		else
+			igc_reset(adapter);
+
+		netdev_info(adapter->netdev, "register testing starting");
+		if (!igc_reg_test(adapter, &data[TEST_REG]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		igc_reset(adapter);
+
+		netdev_info(adapter->netdev, "eeprom testing starting");
+		if (!igc_eeprom_test(adapter, &data[TEST_EEP]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+
+		igc_reset(adapter);
+
+		/* loopback and interrupt tests
+		 * will be implemented in the future
+		 */
+		data[TEST_LOOP] = 0;
+		data[TEST_IRQ] = 0;
+
+		clear_bit(__IGC_TESTING, &adapter->state);
+		if (if_running)
+			igc_open(netdev);
+	} else {
+		netdev_info(adapter->netdev, "online testing starting");
+
+		/* register, eeprom, intr and loopback tests not run online */
+		data[TEST_REG] = 0;
+		data[TEST_EEP] = 0;
+		data[TEST_IRQ] = 0;
+		data[TEST_LOOP] = 0;
+
+		if (!igc_link_test(adapter, &data[TEST_LINK]))
+			eth_test->flags |= ETH_TEST_FL_FAILED;
+	}
+
+	msleep_interruptible(4 * 1000);
+}
+
 static const struct ethtool_ops igc_ethtool_ops = {
 	.supported_coalesce_params = ETHTOOL_COALESCE_USECS,
 	.get_drvinfo		= igc_get_drvinfo,
@@ -1933,6 +1992,7 @@ static const struct ethtool_ops igc_ethtool_ops = {
 	.complete		= igc_ethtool_complete,
 	.get_link_ksettings	= igc_get_link_ksettings,
 	.set_link_ksettings	= igc_set_link_ksettings,
+	.self_test		= igc_diag_test,
 };
 
 void igc_set_ethtool_ops(struct net_device *netdev)
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index add31dc3881d..0ae3590a50eb 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -4380,7 +4380,7 @@ err_setup_tx:
 	return err;
 }
 
-static int igc_open(struct net_device *netdev)
+int igc_open(struct net_device *netdev)
 {
 	return __igc_open(netdev, false);
 }
@@ -4422,7 +4422,7 @@ static int __igc_close(struct net_device *netdev, bool suspending)
 	return 0;
 }
 
-static int igc_close(struct net_device *netdev)
+int igc_close(struct net_device *netdev)
 {
 	if (netif_device_present(netdev) || netdev->dismantle)
 		return __igc_close(netdev, false);
diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h
index 6093cde2351c..633545977a65 100644
--- a/drivers/net/ethernet/intel/igc/igc_regs.h
+++ b/drivers/net/ethernet/intel/igc/igc_regs.h
@@ -49,6 +49,7 @@
 #define IGC_FACTPS		0x05B30
 
 /* Interrupt Register Description */
+#define IGC_EICR		0x01580  /* Ext. Interrupt Cause read - W0 */
 #define IGC_EICS		0x01520  /* Ext. Interrupt Cause Set - W0 */
 #define IGC_EIMS		0x01524  /* Ext. Interrupt Mask Set/Read - RW */
 #define IGC_EIMC		0x01528  /* Ext. Interrupt Mask Clear - WO */
@@ -119,6 +120,7 @@
 #define IGC_RLPML		0x05004  /* Rx Long Packet Max Length */
 #define IGC_RFCTL		0x05008  /* Receive Filter Control*/
 #define IGC_MTA			0x05200  /* Multicast Table Array - RW Array */
+#define IGC_RA			0x05400  /* Receive Address - RW Array */
 #define IGC_UTA			0x0A000  /* Unicast Table Array - RW */
 #define IGC_RAL(_n)		(0x05400 + ((_n) * 0x08))
 #define IGC_RAH(_n)		(0x05404 + ((_n) * 0x08))
-- 
cgit v1.2.3-59-g8ed1b


From a307593a644443db12888f45eed0dafb5869e2cc Mon Sep 17 00:00:00 2001
From: Doug Berger <opendmb@gmail.com>
Date: Mon, 18 May 2020 15:23:59 -0700
Subject: net: phy: simplify phy_link_change arguments

This function was introduced to allow for different handling of
link up and link down events particularly with regard to the
netif_carrier. The third argument do_carrier allowed the flag to
be left unchanged.

Since then the phylink has introduced an implementation that
completely ignores the third parameter since it never wants to
change the flag and the phylib always sets the third parameter
to true so the flag is always changed.

Therefore the third argument (i.e. do_carrier) is no longer
necessary and can be removed. This also means that the phylib
phy_link_down() function no longer needs its second argument.

Signed-off-by: Doug Berger <opendmb@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/phy.c        | 12 ++++++------
 drivers/net/phy/phy_device.c | 12 +++++-------
 drivers/net/phy/phylink.c    |  3 +--
 include/linux/phy.h          |  2 +-
 4 files changed, 13 insertions(+), 16 deletions(-)

diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index d4bbf79dab6c..d584701187db 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -58,13 +58,13 @@ static const char *phy_state_to_str(enum phy_state st)
 
 static void phy_link_up(struct phy_device *phydev)
 {
-	phydev->phy_link_change(phydev, true, true);
+	phydev->phy_link_change(phydev, true);
 	phy_led_trigger_change_speed(phydev);
 }
 
-static void phy_link_down(struct phy_device *phydev, bool do_carrier)
+static void phy_link_down(struct phy_device *phydev)
 {
-	phydev->phy_link_change(phydev, false, do_carrier);
+	phydev->phy_link_change(phydev, false);
 	phy_led_trigger_change_speed(phydev);
 }
 
@@ -524,7 +524,7 @@ int phy_start_cable_test(struct phy_device *phydev,
 		goto out;
 
 	/* Mark the carrier down until the test is complete */
-	phy_link_down(phydev, true);
+	phy_link_down(phydev);
 
 	netif_testing_on(dev);
 	err = phydev->drv->cable_test_start(phydev);
@@ -595,7 +595,7 @@ static int phy_check_link_status(struct phy_device *phydev)
 		phy_link_up(phydev);
 	} else if (!phydev->link && phydev->state != PHY_NOLINK) {
 		phydev->state = PHY_NOLINK;
-		phy_link_down(phydev, true);
+		phy_link_down(phydev);
 	}
 
 	return 0;
@@ -999,7 +999,7 @@ void phy_state_machine(struct work_struct *work)
 	case PHY_HALTED:
 		if (phydev->link) {
 			phydev->link = 0;
-			phy_link_down(phydev, true);
+			phy_link_down(phydev);
 		}
 		do_suspend = true;
 		break;
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index c3a107cf578e..7481135d27ab 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -916,16 +916,14 @@ struct phy_device *phy_find_first(struct mii_bus *bus)
 }
 EXPORT_SYMBOL(phy_find_first);
 
-static void phy_link_change(struct phy_device *phydev, bool up, bool do_carrier)
+static void phy_link_change(struct phy_device *phydev, bool up)
 {
 	struct net_device *netdev = phydev->attached_dev;
 
-	if (do_carrier) {
-		if (up)
-			netif_carrier_on(netdev);
-		else
-			netif_carrier_off(netdev);
-	}
+	if (up)
+		netif_carrier_on(netdev);
+	else
+		netif_carrier_off(netdev);
 	phydev->adjust_link(netdev);
 	if (phydev->mii_ts && phydev->mii_ts->link_state)
 		phydev->mii_ts->link_state(phydev->mii_ts, phydev);
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index 0f23bec431c1..b6b1f77bba58 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -803,8 +803,7 @@ void phylink_destroy(struct phylink *pl)
 }
 EXPORT_SYMBOL_GPL(phylink_destroy);
 
-static void phylink_phy_change(struct phy_device *phydev, bool up,
-			       bool do_carrier)
+static void phylink_phy_change(struct phy_device *phydev, bool up)
 {
 	struct phylink *pl = phydev->phylink;
 	bool tx_pause, rx_pause;
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 5d8ff5428010..467aa8bf9f64 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -543,7 +543,7 @@ struct phy_device {
 	u8 mdix;
 	u8 mdix_ctrl;
 
-	void (*phy_link_change)(struct phy_device *, bool up, bool do_carrier);
+	void (*phy_link_change)(struct phy_device *phydev, bool up);
 	void (*adjust_link)(struct net_device *dev);
 
 #if IS_ENABLED(CONFIG_MACSEC)
-- 
cgit v1.2.3-59-g8ed1b


From 95f96a9f2d04b20c26fca405f640acea07cf8b53 Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Tue, 24 Mar 2020 17:38:16 -0700
Subject: igc: Use netdev log helpers in igc_ethtool.c

In igc_ethtool.c we print log messages using dev_* helpers, generating
inconsistent output with the rest of the driver. Since this is a network
device driver, we should preferably use netdev_* helpers because they
append the interface name to the message, helping making sense the of
the logs.

This patch converts all dev_* calls to netdev_*.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_ethtool.c | 41 ++++++++++++++--------------
 1 file changed, 21 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index 9fc250cdf88c..a05d7abee524 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -1155,8 +1155,8 @@ static int igc_set_rss_hash_opt(struct igc_adapter *adapter,
 
 		if ((flags & UDP_RSS_FLAGS) &&
 		    !(adapter->flags & UDP_RSS_FLAGS))
-			dev_err(&adapter->pdev->dev,
-				"enabling UDP RSS: fragmented packets may arrive out of order to the stack above\n");
+			netdev_err(adapter->netdev,
+				   "Enabling UDP RSS: fragmented packets may arrive out of order to the stack above\n");
 
 		adapter->flags = flags;
 
@@ -1195,7 +1195,8 @@ static int igc_rxnfc_write_etype_filter(struct igc_adapter *adapter,
 			break;
 	}
 	if (i == MAX_ETYPE_FILTER) {
-		dev_err(&adapter->pdev->dev, "ethtool -N: etype filters are all used.\n");
+		netdev_err(adapter->netdev,
+			   "ethtool -N: etype filters are all used\n");
 		return -EINVAL;
 	}
 
@@ -1236,7 +1237,8 @@ static int igc_rxnfc_write_vlan_prio_filter(struct igc_adapter *adapter,
 	/* check whether this vlan prio is already set */
 	if (vlapqf & IGC_VLAPQF_P_VALID(vlan_priority) &&
 	    queue_index != input->action) {
-		dev_err(&adapter->pdev->dev, "ethtool rxnfc set vlan prio filter failed.\n");
+		netdev_err(adapter->netdev,
+			   "ethtool rxnfc set VLAN prio filter failed\n");
 		return -EEXIST;
 	}
 
@@ -1255,8 +1257,8 @@ int igc_add_filter(struct igc_adapter *adapter, struct igc_nfc_filter *input)
 
 	if (hw->mac.type == igc_i225 &&
 	    !(input->filter.match_flags & ~IGC_FILTER_FLAG_SRC_MAC_ADDR)) {
-		dev_err(&adapter->pdev->dev,
-			"i225 doesn't support flow classification rules specifying only source addresses.\n");
+		netdev_err(adapter->netdev,
+			   "i225 doesn't support flow classification rules specifying only source addresses\n");
 		return -EOPNOTSUPP;
 	}
 
@@ -1404,13 +1406,14 @@ static int igc_add_ethtool_nfc_entry(struct igc_adapter *adapter,
 	 */
 	if (fsp->ring_cookie == RX_CLS_FLOW_DISC ||
 	    fsp->ring_cookie >= adapter->num_rx_queues) {
-		dev_err(&adapter->pdev->dev, "ethtool -N: The specified action is invalid\n");
+		netdev_err(netdev,
+			   "ethtool -N: The specified action is invalid\n");
 		return -EINVAL;
 	}
 
 	/* Don't allow indexes to exist outside of available space */
 	if (fsp->location >= IGC_MAX_RXNFC_FILTERS) {
-		dev_err(&adapter->pdev->dev, "Location out of range\n");
+		netdev_err(netdev, "Location out of range\n");
 		return -EINVAL;
 	}
 
@@ -1458,8 +1461,8 @@ static int igc_add_ethtool_nfc_entry(struct igc_adapter *adapter,
 		if (!memcmp(&input->filter, &rule->filter,
 			    sizeof(input->filter))) {
 			err = -EEXIST;
-			dev_err(&adapter->pdev->dev,
-				"ethtool: this filter is already set\n");
+			netdev_err(netdev,
+				   "ethtool: this filter is already set\n");
 			goto err_out_w_lock;
 		}
 	}
@@ -1832,6 +1835,7 @@ static int igc_set_link_ksettings(struct net_device *netdev,
 				  const struct ethtool_link_ksettings *cmd)
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
+	struct net_device *dev = adapter->netdev;
 	struct igc_hw *hw = &adapter->hw;
 	u32 advertising;
 
@@ -1839,8 +1843,7 @@ static int igc_set_link_ksettings(struct net_device *netdev,
 	 * cannot be changed
 	 */
 	if (igc_check_reset_block(hw)) {
-		dev_err(&adapter->pdev->dev,
-			"Cannot change link characteristics when reset is active.\n");
+		netdev_err(dev, "Cannot change link characteristics when reset is active\n");
 		return -EINVAL;
 	}
 
@@ -1851,7 +1854,7 @@ static int igc_set_link_ksettings(struct net_device *netdev,
 	if (cmd->base.eth_tp_mdix_ctrl) {
 		if (cmd->base.eth_tp_mdix_ctrl != ETH_TP_MDI_AUTO &&
 		    cmd->base.autoneg != AUTONEG_ENABLE) {
-			dev_err(&adapter->pdev->dev, "forcing MDI/MDI-X state is not supported when link speed and/or duplex are forced\n");
+			netdev_err(dev, "Forcing MDI/MDI-X state is not supported when link speed and/or duplex are forced\n");
 			return -EINVAL;
 		}
 	}
@@ -1868,9 +1871,7 @@ static int igc_set_link_ksettings(struct net_device *netdev,
 		if (adapter->fc_autoneg)
 			hw->fc.requested_mode = igc_fc_default;
 	} else {
-		/* calling this overrides forced MDI setting */
-		dev_info(&adapter->pdev->dev,
-			 "Force mode currently not supported\n");
+		netdev_info(dev, "Force mode currently not supported\n");
 	}
 
 	/* MDI-X => 2; MDI => 1; Auto => 3 */
@@ -1904,7 +1905,7 @@ static void igc_diag_test(struct net_device *netdev,
 	bool if_running = netif_running(netdev);
 
 	if (eth_test->flags == ETH_TEST_FL_OFFLINE) {
-		netdev_info(adapter->netdev, "offline testing starting");
+		netdev_info(adapter->netdev, "Offline testing starting");
 		set_bit(__IGC_TESTING, &adapter->state);
 
 		/* Link test performed before hardware reset so autoneg doesn't
@@ -1918,13 +1919,13 @@ static void igc_diag_test(struct net_device *netdev,
 		else
 			igc_reset(adapter);
 
-		netdev_info(adapter->netdev, "register testing starting");
+		netdev_info(adapter->netdev, "Register testing starting");
 		if (!igc_reg_test(adapter, &data[TEST_REG]))
 			eth_test->flags |= ETH_TEST_FL_FAILED;
 
 		igc_reset(adapter);
 
-		netdev_info(adapter->netdev, "eeprom testing starting");
+		netdev_info(adapter->netdev, "EEPROM testing starting");
 		if (!igc_eeprom_test(adapter, &data[TEST_EEP]))
 			eth_test->flags |= ETH_TEST_FL_FAILED;
 
@@ -1940,7 +1941,7 @@ static void igc_diag_test(struct net_device *netdev,
 		if (if_running)
 			igc_open(netdev);
 	} else {
-		netdev_info(adapter->netdev, "online testing starting");
+		netdev_info(adapter->netdev, "Online testing starting");
 
 		/* register, eeprom, intr and loopback tests not run online */
 		data[TEST_REG] = 0;
-- 
cgit v1.2.3-59-g8ed1b


From 916a3c650763fbe1e1d37af105a82fee26da45a5 Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Tue, 24 Mar 2020 17:38:17 -0700
Subject: igc: Use netdev log helpers in igc_ptp.c

In igc_ptp.c we print log messages using dev_* helpers, generating
inconsistent output with the rest of the driver. Since this is a network
device driver, we should preferably use netdev_* helpers because they
append the interface name to the message, helping making sense out of
the logs.

This patch converts all dev_* calls to netdev_*.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_ptp.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c
index f99c514ad0f4..1bf016398b9d 100644
--- a/drivers/net/ethernet/intel/igc/igc_ptp.c
+++ b/drivers/net/ethernet/intel/igc/igc_ptp.c
@@ -466,7 +466,7 @@ void igc_ptp_tx_hang(struct igc_adapter *adapter)
 		 * interrupt
 		 */
 		rd32(IGC_TXSTMPH);
-		dev_warn(&adapter->pdev->dev, "clearing Tx timestamp hang\n");
+		netdev_warn(adapter->netdev, "Clearing Tx timestamp hang\n");
 	}
 }
 
@@ -529,7 +529,7 @@ static void igc_ptp_tx_work(struct work_struct *work)
 		 * interrupt
 		 */
 		rd32(IGC_TXSTMPH);
-		dev_warn(&adapter->pdev->dev, "clearing Tx timestamp hang\n");
+		netdev_warn(adapter->netdev, "Clearing Tx timestamp hang\n");
 		return;
 	}
 
@@ -626,10 +626,9 @@ void igc_ptp_init(struct igc_adapter *adapter)
 						&adapter->pdev->dev);
 	if (IS_ERR(adapter->ptp_clock)) {
 		adapter->ptp_clock = NULL;
-		dev_err(&adapter->pdev->dev, "ptp_clock_register failed\n");
+		netdev_err(netdev, "ptp_clock_register failed\n");
 	} else if (adapter->ptp_clock) {
-		dev_info(&adapter->pdev->dev, "added PHC on %s\n",
-			 adapter->netdev->name);
+		netdev_info(netdev, "PHC added\n");
 		adapter->ptp_flags |= IGC_PTP_ENABLED;
 	}
 }
@@ -666,8 +665,7 @@ void igc_ptp_stop(struct igc_adapter *adapter)
 
 	if (adapter->ptp_clock) {
 		ptp_clock_unregister(adapter->ptp_clock);
-		dev_info(&adapter->pdev->dev, "removed PHC on %s\n",
-			 adapter->netdev->name);
+		netdev_info(adapter->netdev, "PHC removed\n");
 		adapter->ptp_flags &= ~IGC_PTP_ENABLED;
 	}
 }
-- 
cgit v1.2.3-59-g8ed1b


From 5c32bac98c705ce409ac26b5120c14ece8bd9209 Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Tue, 24 Mar 2020 17:38:18 -0700
Subject: igc: Use netdev log helpers in igc_dump.c

In igc_dump.c we print log messages using dev_* and pr_* helpers,
generating inconsistent output with the rest of the driver. Since this
is a network device driver, we should preferably use netdev_* helpers
because they append the interface name to the message, helping making
sense out of the logs.

This patch converts all dev_* and pr_* calls to netdev_*.

Quick note about igc_rings_dump(): This function is always called with
valid adapter->netdev so there is not need to check it.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_dump.c | 109 +++++++++++++++---------------
 1 file changed, 54 insertions(+), 55 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_dump.c b/drivers/net/ethernet/intel/igc/igc_dump.c
index 657ab50ae296..4ad32d98f77f 100644
--- a/drivers/net/ethernet/intel/igc/igc_dump.c
+++ b/drivers/net/ethernet/intel/igc/igc_dump.c
@@ -47,6 +47,7 @@ static const struct igc_reg_info igc_reg_info_tbl[] = {
 /* igc_regdump - register printout routine */
 static void igc_regdump(struct igc_hw *hw, struct igc_reg_info *reginfo)
 {
+	struct net_device *dev = igc_get_hw_dev(hw);
 	int n = 0;
 	char rname[16];
 	u32 regs[8];
@@ -101,13 +102,14 @@ static void igc_regdump(struct igc_hw *hw, struct igc_reg_info *reginfo)
 			regs[n] = rd32(IGC_TXDCTL(n));
 		break;
 	default:
-		pr_info("%-15s %08x\n", reginfo->name, rd32(reginfo->ofs));
+		netdev_info(dev, "%-15s %08x\n", reginfo->name,
+			    rd32(reginfo->ofs));
 		return;
 	}
 
 	snprintf(rname, 16, "%s%s", reginfo->name, "[0-3]");
-	pr_info("%-15s %08x %08x %08x %08x\n", rname, regs[0], regs[1],
-		regs[2], regs[3]);
+	netdev_info(dev, "%-15s %08x %08x %08x %08x\n", rname, regs[0], regs[1],
+		    regs[2], regs[3]);
 }
 
 /* igc_rings_dump - Tx-rings and Rx-rings */
@@ -125,39 +127,34 @@ void igc_rings_dump(struct igc_adapter *adapter)
 	if (!netif_msg_hw(adapter))
 		return;
 
-	/* Print netdevice Info */
-	if (netdev) {
-		dev_info(&adapter->pdev->dev, "Net device Info\n");
-		pr_info("Device Name     state            trans_start\n");
-		pr_info("%-15s %016lX %016lX\n", netdev->name,
-			netdev->state, dev_trans_start(netdev));
-	}
+	netdev_info(netdev, "Device info: state %016lX trans_start %016lX\n",
+		    netdev->state, dev_trans_start(netdev));
 
 	/* Print TX Ring Summary */
-	if (!netdev || !netif_running(netdev))
+	if (!netif_running(netdev))
 		goto exit;
 
-	dev_info(&adapter->pdev->dev, "TX Rings Summary\n");
-	pr_info("Queue [NTU] [NTC] [bi(ntc)->dma  ] leng ntw timestamp\n");
+	netdev_info(netdev, "TX Rings Summary\n");
+	netdev_info(netdev, "Queue [NTU] [NTC] [bi(ntc)->dma  ] leng ntw timestamp\n");
 	for (n = 0; n < adapter->num_tx_queues; n++) {
 		struct igc_tx_buffer *buffer_info;
 
 		tx_ring = adapter->tx_ring[n];
 		buffer_info = &tx_ring->tx_buffer_info[tx_ring->next_to_clean];
 
-		pr_info(" %5d %5X %5X %016llX %04X %p %016llX\n",
-			n, tx_ring->next_to_use, tx_ring->next_to_clean,
-			(u64)dma_unmap_addr(buffer_info, dma),
-			dma_unmap_len(buffer_info, len),
-			buffer_info->next_to_watch,
-			(u64)buffer_info->time_stamp);
+		netdev_info(netdev, "%5d %5X %5X %016llX %04X %p %016llX\n",
+			    n, tx_ring->next_to_use, tx_ring->next_to_clean,
+			    (u64)dma_unmap_addr(buffer_info, dma),
+			    dma_unmap_len(buffer_info, len),
+			    buffer_info->next_to_watch,
+			    (u64)buffer_info->time_stamp);
 	}
 
 	/* Print TX Rings */
 	if (!netif_msg_tx_done(adapter))
 		goto rx_ring_summary;
 
-	dev_info(&adapter->pdev->dev, "TX Rings Dump\n");
+	netdev_info(netdev, "TX Rings Dump\n");
 
 	/* Transmit Descriptor Formats
 	 *
@@ -172,10 +169,11 @@ void igc_rings_dump(struct igc_adapter *adapter)
 
 	for (n = 0; n < adapter->num_tx_queues; n++) {
 		tx_ring = adapter->tx_ring[n];
-		pr_info("------------------------------------\n");
-		pr_info("TX QUEUE INDEX = %d\n", tx_ring->queue_index);
-		pr_info("------------------------------------\n");
-		pr_info("T [desc]     [address 63:0  ] [PlPOCIStDDM Ln] [bi->dma       ] leng  ntw timestamp        bi->skb\n");
+		netdev_info(netdev, "------------------------------------\n");
+		netdev_info(netdev, "TX QUEUE INDEX = %d\n",
+			    tx_ring->queue_index);
+		netdev_info(netdev, "------------------------------------\n");
+		netdev_info(netdev, "T [desc]     [address 63:0  ] [PlPOCIStDDM Ln] [bi->dma       ] leng  ntw timestamp        bi->skb\n");
 
 		for (i = 0; tx_ring->desc && (i < tx_ring->count); i++) {
 			const char *next_desc;
@@ -194,14 +192,14 @@ void igc_rings_dump(struct igc_adapter *adapter)
 			else
 				next_desc = "";
 
-			pr_info("T [0x%03X]    %016llX %016llX %016llX %04X  %p %016llX %p%s\n",
-				i, le64_to_cpu(u0->a),
-				le64_to_cpu(u0->b),
-				(u64)dma_unmap_addr(buffer_info, dma),
-				dma_unmap_len(buffer_info, len),
-				buffer_info->next_to_watch,
-				(u64)buffer_info->time_stamp,
-				buffer_info->skb, next_desc);
+			netdev_info(netdev, "T [0x%03X]    %016llX %016llX %016llX %04X  %p %016llX %p%s\n",
+				    i, le64_to_cpu(u0->a),
+				    le64_to_cpu(u0->b),
+				    (u64)dma_unmap_addr(buffer_info, dma),
+				    dma_unmap_len(buffer_info, len),
+				    buffer_info->next_to_watch,
+				    (u64)buffer_info->time_stamp,
+				    buffer_info->skb, next_desc);
 
 			if (netif_msg_pktdata(adapter) && buffer_info->skb)
 				print_hex_dump(KERN_INFO, "",
@@ -214,19 +212,19 @@ void igc_rings_dump(struct igc_adapter *adapter)
 
 	/* Print RX Rings Summary */
 rx_ring_summary:
-	dev_info(&adapter->pdev->dev, "RX Rings Summary\n");
-	pr_info("Queue [NTU] [NTC]\n");
+	netdev_info(netdev, "RX Rings Summary\n");
+	netdev_info(netdev, "Queue [NTU] [NTC]\n");
 	for (n = 0; n < adapter->num_rx_queues; n++) {
 		rx_ring = adapter->rx_ring[n];
-		pr_info(" %5d %5X %5X\n",
-			n, rx_ring->next_to_use, rx_ring->next_to_clean);
+		netdev_info(netdev, "%5d %5X %5X\n", n, rx_ring->next_to_use,
+			    rx_ring->next_to_clean);
 	}
 
 	/* Print RX Rings */
 	if (!netif_msg_rx_status(adapter))
 		goto exit;
 
-	dev_info(&adapter->pdev->dev, "RX Rings Dump\n");
+	netdev_info(netdev, "RX Rings Dump\n");
 
 	/* Advanced Receive Descriptor (Read) Format
 	 *    63                                           1        0
@@ -251,11 +249,12 @@ rx_ring_summary:
 
 	for (n = 0; n < adapter->num_rx_queues; n++) {
 		rx_ring = adapter->rx_ring[n];
-		pr_info("------------------------------------\n");
-		pr_info("RX QUEUE INDEX = %d\n", rx_ring->queue_index);
-		pr_info("------------------------------------\n");
-		pr_info("R  [desc]      [ PktBuf     A0] [  HeadBuf   DD] [bi->dma       ] [bi->skb] <-- Adv Rx Read format\n");
-		pr_info("RWB[desc]      [PcsmIpSHl PtRs] [vl er S cks ln] ---------------- [bi->skb] <-- Adv Rx Write-Back format\n");
+		netdev_info(netdev, "------------------------------------\n");
+		netdev_info(netdev, "RX QUEUE INDEX = %d\n",
+			    rx_ring->queue_index);
+		netdev_info(netdev, "------------------------------------\n");
+		netdev_info(netdev, "R  [desc]      [ PktBuf     A0] [  HeadBuf   DD] [bi->dma       ] [bi->skb] <-- Adv Rx Read format\n");
+		netdev_info(netdev, "RWB[desc]      [PcsmIpSHl PtRs] [vl er S cks ln] ---------------- [bi->skb] <-- Adv Rx Write-Back format\n");
 
 		for (i = 0; i < rx_ring->count; i++) {
 			const char *next_desc;
@@ -275,18 +274,18 @@ rx_ring_summary:
 
 			if (staterr & IGC_RXD_STAT_DD) {
 				/* Descriptor Done */
-				pr_info("%s[0x%03X]     %016llX %016llX ---------------- %s\n",
-					"RWB", i,
-					le64_to_cpu(u0->a),
-					le64_to_cpu(u0->b),
-					next_desc);
+				netdev_info(netdev, "%s[0x%03X]     %016llX %016llX ---------------- %s\n",
+					    "RWB", i,
+					    le64_to_cpu(u0->a),
+					    le64_to_cpu(u0->b),
+					    next_desc);
 			} else {
-				pr_info("%s[0x%03X]     %016llX %016llX %016llX %s\n",
-					"R  ", i,
-					le64_to_cpu(u0->a),
-					le64_to_cpu(u0->b),
-					(u64)buffer_info->dma,
-					next_desc);
+				netdev_info(netdev, "%s[0x%03X]     %016llX %016llX %016llX %s\n",
+					    "R  ", i,
+					    le64_to_cpu(u0->a),
+					    le64_to_cpu(u0->b),
+					    (u64)buffer_info->dma,
+					    next_desc);
 
 				if (netif_msg_pktdata(adapter) &&
 				    buffer_info->dma && buffer_info->page) {
@@ -314,8 +313,8 @@ void igc_regs_dump(struct igc_adapter *adapter)
 	struct igc_reg_info *reginfo;
 
 	/* Print Registers */
-	dev_info(&adapter->pdev->dev, "Register Dump\n");
-	pr_info(" Register Name   Value\n");
+	netdev_info(adapter->netdev, "Register Dump\n");
+	netdev_info(adapter->netdev, "Register Name   Value\n");
 	for (reginfo = (struct igc_reg_info *)igc_reg_info_tbl;
 	     reginfo->name; reginfo++) {
 		igc_regdump(hw, reginfo);
-- 
cgit v1.2.3-59-g8ed1b


From faf82d5bb1589fe4da36fc0e5b1119def97d887a Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Tue, 24 Mar 2020 17:38:19 -0700
Subject: igc: Use netdev log helpers in igc_base.c

This patch coverts one pr_debug() call to hw_dbg() in order to keep log
output aligned with the rest of the driver. hw_dbg() is actually a macro
defined in igc_hw.h that expands to netdev_dbg().

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_base.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_base.c b/drivers/net/ethernet/intel/igc/igc_base.c
index f7fb18d8d8f5..cc5a6cf531c7 100644
--- a/drivers/net/ethernet/intel/igc/igc_base.c
+++ b/drivers/net/ethernet/intel/igc/igc_base.c
@@ -26,7 +26,7 @@ static s32 igc_reset_hw_base(struct igc_hw *hw)
 	 */
 	ret_val = igc_disable_pcie_master(hw);
 	if (ret_val)
-		hw_dbg("PCI-E Master disable polling has failed.\n");
+		hw_dbg("PCI-E Master disable polling has failed\n");
 
 	hw_dbg("Masking off all interrupts\n");
 	wr32(IGC_IMC, 0xffffffff);
@@ -177,7 +177,7 @@ static s32 igc_init_phy_params_base(struct igc_hw *hw)
 	 */
 	ret_val = hw->phy.ops.reset(hw);
 	if (ret_val) {
-		hw_dbg("Error resetting the PHY.\n");
+		hw_dbg("Error resetting the PHY\n");
 		goto out;
 	}
 
@@ -367,7 +367,7 @@ void igc_rx_fifo_flush_base(struct igc_hw *hw)
 	}
 
 	if (ms_wait == 10)
-		pr_debug("Queue disable timed out after 10ms\n");
+		hw_dbg("Queue disable timed out after 10ms\n");
 
 	/* Clear RLPML, RCTL.SBP, RFCTL.LEF, and set RCTL.LPE so that all
 	 * incoming packets are rejected.  Set enable and wait 2ms so that
-- 
cgit v1.2.3-59-g8ed1b


From 3494480ad503068d2c14a65e45a37c785a73349f Mon Sep 17 00:00:00 2001
From: Sasha Neftin <sasha.neftin@intel.com>
Date: Sun, 29 Mar 2020 12:01:51 +0300
Subject: igc: Remove unneeded definition

PHY_FORCE_LIMIT definition not in use and could be removed
i225 parts support auto negotiation mechanism

Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_defines.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
index af0c03d77a39..54a7941bdb48 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -47,7 +47,6 @@
 /* Loop limit on how long we wait for auto-negotiation to complete */
 #define COPPER_LINK_UP_LIMIT		10
 #define PHY_AUTO_NEG_LIMIT		45
-#define PHY_FORCE_LIMIT			20
 
 /* Number of 100 microseconds we wait for PCI Express master disable */
 #define MASTER_DISABLE_TIMEOUT		800
-- 
cgit v1.2.3-59-g8ed1b


From 5ddb2747ae2e6141edd3b1dd2ed96d4468394cc2 Mon Sep 17 00:00:00 2001
From: Sasha Neftin <sasha.neftin@intel.com>
Date: Sun, 29 Mar 2020 14:35:54 +0300
Subject: igc: Remove unneeded register

Flow control status register not applicable for i225 parts
so clean up the unneeded define.

Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_regs.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h
index 633545977a65..5a6110e211fd 100644
--- a/drivers/net/ethernet/intel/igc/igc_regs.h
+++ b/drivers/net/ethernet/intel/igc/igc_regs.h
@@ -35,7 +35,6 @@
 #define IGC_FCRTL		0x02160  /* FC Receive Threshold Low - RW */
 #define IGC_FCRTH		0x02168  /* FC Receive Threshold High - RW */
 #define IGC_FCRTV		0x02460  /* FC Refresh Timer Value - RW */
-#define IGC_FCSTS		0x02464  /* FC Status - RO */
 
 /* PCIe Register Description */
 #define IGC_GCR			0x05B00  /* PCIe control- RW */
-- 
cgit v1.2.3-59-g8ed1b


From 7c1552da900c159a13473a2106c7547746ebe4a1 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 18 May 2020 08:28:05 +0200
Subject: ipv6: lift copy_from_user out of ipv6_route_ioctl

Prepare for better compat ioctl handling by moving the user copy out
of ipv6_route_ioctl.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_route.h |  3 ++-
 net/ipv6/af_inet6.c     | 16 ++++++++++------
 net/ipv6/route.c        | 44 ++++++++++++++++----------------------------
 3 files changed, 28 insertions(+), 35 deletions(-)

diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h
index e525f003e619..2a5277758379 100644
--- a/include/net/ip6_route.h
+++ b/include/net/ip6_route.h
@@ -118,7 +118,8 @@ void ip6_route_init_special_entries(void);
 int ip6_route_init(void);
 void ip6_route_cleanup(void);
 
-int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg);
+int ipv6_route_ioctl(struct net *net, unsigned int cmd,
+		struct in6_rtmsg *rtmsg);
 
 int ip6_route_add(struct fib6_config *cfg, gfp_t gfp_flags,
 		  struct netlink_ext_ack *extack);
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 771a462a8322..a618beb9b6d5 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -542,21 +542,25 @@ EXPORT_SYMBOL(inet6_getname);
 
 int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 {
+	void __user *argp = (void __user *)arg;
 	struct sock *sk = sock->sk;
 	struct net *net = sock_net(sk);
 
 	switch (cmd) {
 	case SIOCADDRT:
-	case SIOCDELRT:
-
-		return ipv6_route_ioctl(net, cmd, (void __user *)arg);
+	case SIOCDELRT: {
+		struct in6_rtmsg rtmsg;
 
+		if (copy_from_user(&rtmsg, argp, sizeof(rtmsg)))
+			return -EFAULT;
+		return ipv6_route_ioctl(net, cmd, &rtmsg);
+	}
 	case SIOCSIFADDR:
-		return addrconf_add_ifaddr(net, (void __user *) arg);
+		return addrconf_add_ifaddr(net, argp);
 	case SIOCDIFADDR:
-		return addrconf_del_ifaddr(net, (void __user *) arg);
+		return addrconf_del_ifaddr(net, argp);
 	case SIOCSIFDSTADDR:
-		return addrconf_set_dstaddr(net, (void __user *) arg);
+		return addrconf_set_dstaddr(net, argp);
 	default:
 		if (!sk->sk_prot->ioctl)
 			return -ENOIOCTLCMD;
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a8b4add0b545..a52ec1b86432 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -4336,41 +4336,29 @@ static void rtmsg_to_fib6_config(struct net *net,
 	};
 }
 
-int ipv6_route_ioctl(struct net *net, unsigned int cmd, void __user *arg)
+int ipv6_route_ioctl(struct net *net, unsigned int cmd, struct in6_rtmsg *rtmsg)
 {
 	struct fib6_config cfg;
-	struct in6_rtmsg rtmsg;
 	int err;
 
-	switch (cmd) {
-	case SIOCADDRT:		/* Add a route */
-	case SIOCDELRT:		/* Delete a route */
-		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
-			return -EPERM;
-		err = copy_from_user(&rtmsg, arg,
-				     sizeof(struct in6_rtmsg));
-		if (err)
-			return -EFAULT;
+	if (cmd != SIOCADDRT && cmd != SIOCDELRT)
+		return -EINVAL;
+	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+		return -EPERM;
 
-		rtmsg_to_fib6_config(net, &rtmsg, &cfg);
+	rtmsg_to_fib6_config(net, rtmsg, &cfg);
 
-		rtnl_lock();
-		switch (cmd) {
-		case SIOCADDRT:
-			err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
-			break;
-		case SIOCDELRT:
-			err = ip6_route_del(&cfg, NULL);
-			break;
-		default:
-			err = -EINVAL;
-		}
-		rtnl_unlock();
-
-		return err;
+	rtnl_lock();
+	switch (cmd) {
+	case SIOCADDRT:
+		err = ip6_route_add(&cfg, GFP_KERNEL, NULL);
+		break;
+	case SIOCDELRT:
+		err = ip6_route_del(&cfg, NULL);
+		break;
 	}
-
-	return -EINVAL;
+	rtnl_unlock();
+	return err;
 }
 
 /*
-- 
cgit v1.2.3-59-g8ed1b


From 3986912f6a9aae50945fc9d3513c621381eba1aa Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 18 May 2020 08:28:06 +0200
Subject: ipv6: move SIOCADDRT and SIOCDELRT handling into ->compat_ioctl

To prepare removing the global routing_ioctl hack start lifting the code
into a newly added ipv6 ->compat_ioctl handler.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h   |  2 ++
 net/dccp/ipv6.c      |  1 +
 net/ipv6/af_inet6.c  | 53 +++++++++++++++++++++++++++++++++++++++++++
 net/ipv6/raw.c       |  1 +
 net/l2tp/l2tp_ip6.c  |  1 +
 net/mptcp/protocol.c |  1 +
 net/sctp/ipv6.c      |  1 +
 net/socket.c         | 63 ++++++++++++++--------------------------------------
 8 files changed, 77 insertions(+), 46 deletions(-)

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 955badd1e8ff..5fc3a9d7b053 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -1115,6 +1115,8 @@ int inet6_bind(struct socket *sock, struct sockaddr *uaddr, int addr_len);
 int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
 		  int peer);
 int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg);
+int inet6_compat_ioctl(struct socket *sock, unsigned int cmd,
+		unsigned long arg);
 
 int inet6_hash_connect(struct inet_timewait_death_row *death_row,
 			      struct sock *sk);
diff --git a/net/dccp/ipv6.c b/net/dccp/ipv6.c
index 1e5e08cc0bfc..650187d68851 100644
--- a/net/dccp/ipv6.c
+++ b/net/dccp/ipv6.c
@@ -1082,6 +1082,7 @@ static const struct proto_ops inet6_dccp_ops = {
 	.mmap		   = sock_no_mmap,
 	.sendpage	   = sock_no_sendpage,
 #ifdef CONFIG_COMPAT
+	.compat_ioctl	   = inet6_compat_ioctl,
 	.compat_setsockopt = compat_sock_common_setsockopt,
 	.compat_getsockopt = compat_sock_common_getsockopt,
 #endif
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index a618beb9b6d5..b69496eaf922 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -60,6 +60,7 @@
 #include <net/calipso.h>
 #include <net/seg6.h>
 #include <net/rpl.h>
+#include <net/compat.h>
 
 #include <linux/uaccess.h>
 #include <linux/mroute6.h>
@@ -571,6 +572,56 @@ int inet6_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 }
 EXPORT_SYMBOL(inet6_ioctl);
 
+#ifdef CONFIG_COMPAT
+struct compat_in6_rtmsg {
+	struct in6_addr		rtmsg_dst;
+	struct in6_addr		rtmsg_src;
+	struct in6_addr		rtmsg_gateway;
+	u32			rtmsg_type;
+	u16			rtmsg_dst_len;
+	u16			rtmsg_src_len;
+	u32			rtmsg_metric;
+	u32			rtmsg_info;
+	u32			rtmsg_flags;
+	s32			rtmsg_ifindex;
+};
+
+static int inet6_compat_routing_ioctl(struct sock *sk, unsigned int cmd,
+		struct compat_in6_rtmsg __user *ur)
+{
+	struct in6_rtmsg rt;
+
+	if (copy_from_user(&rt.rtmsg_dst, &ur->rtmsg_dst,
+			3 * sizeof(struct in6_addr)) ||
+	    get_user(rt.rtmsg_type, &ur->rtmsg_type) ||
+	    get_user(rt.rtmsg_dst_len, &ur->rtmsg_dst_len) ||
+	    get_user(rt.rtmsg_src_len, &ur->rtmsg_src_len) ||
+	    get_user(rt.rtmsg_metric, &ur->rtmsg_metric) ||
+	    get_user(rt.rtmsg_info, &ur->rtmsg_info) ||
+	    get_user(rt.rtmsg_flags, &ur->rtmsg_flags) ||
+	    get_user(rt.rtmsg_ifindex, &ur->rtmsg_ifindex))
+		return -EFAULT;
+
+
+	return ipv6_route_ioctl(sock_net(sk), cmd, &rt);
+}
+
+int inet6_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
+{
+	void __user *argp = compat_ptr(arg);
+	struct sock *sk = sock->sk;
+
+	switch (cmd) {
+	case SIOCADDRT:
+	case SIOCDELRT:
+		return inet6_compat_routing_ioctl(sk, cmd, argp);
+	default:
+		return -ENOIOCTLCMD;
+	}
+}
+EXPORT_SYMBOL_GPL(inet6_compat_ioctl);
+#endif /* CONFIG_COMPAT */
+
 INDIRECT_CALLABLE_DECLARE(int udpv6_sendmsg(struct sock *, struct msghdr *,
 					    size_t));
 int inet6_sendmsg(struct socket *sock, struct msghdr *msg, size_t size)
@@ -632,6 +683,7 @@ const struct proto_ops inet6_stream_ops = {
 	.read_sock	   = tcp_read_sock,
 	.peek_len	   = tcp_peek_len,
 #ifdef CONFIG_COMPAT
+	.compat_ioctl	   = inet6_compat_ioctl,
 	.compat_setsockopt = compat_sock_common_setsockopt,
 	.compat_getsockopt = compat_sock_common_getsockopt,
 #endif
@@ -660,6 +712,7 @@ const struct proto_ops inet6_dgram_ops = {
 	.sendpage	   = sock_no_sendpage,
 	.set_peek_off	   = sk_set_peek_off,
 #ifdef CONFIG_COMPAT
+	.compat_ioctl	   = inet6_compat_ioctl,
 	.compat_setsockopt = compat_sock_common_setsockopt,
 	.compat_getsockopt = compat_sock_common_getsockopt,
 #endif
diff --git a/net/ipv6/raw.c b/net/ipv6/raw.c
index 0028aa1d7869..8ef5a7b30524 100644
--- a/net/ipv6/raw.c
+++ b/net/ipv6/raw.c
@@ -1377,6 +1377,7 @@ const struct proto_ops inet6_sockraw_ops = {
 	.mmap		   = sock_no_mmap,
 	.sendpage	   = sock_no_sendpage,
 #ifdef CONFIG_COMPAT
+	.compat_ioctl	   = inet6_compat_ioctl,
 	.compat_setsockopt = compat_sock_common_setsockopt,
 	.compat_getsockopt = compat_sock_common_getsockopt,
 #endif
diff --git a/net/l2tp/l2tp_ip6.c b/net/l2tp/l2tp_ip6.c
index d148766f40d1..fdfef926c591 100644
--- a/net/l2tp/l2tp_ip6.c
+++ b/net/l2tp/l2tp_ip6.c
@@ -758,6 +758,7 @@ static const struct proto_ops l2tp_ip6_ops = {
 	.mmap		   = sock_no_mmap,
 	.sendpage	   = sock_no_sendpage,
 #ifdef CONFIG_COMPAT
+	.compat_ioctl	   = inet6_compat_ioctl,
 	.compat_setsockopt = compat_sock_common_setsockopt,
 	.compat_getsockopt = compat_sock_common_getsockopt,
 #endif
diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index e3a628bea2b8..ba9d3d5c625f 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -2068,6 +2068,7 @@ static const struct proto_ops mptcp_v6_stream_ops = {
 	.mmap		   = sock_no_mmap,
 	.sendpage	   = inet_sendpage,
 #ifdef CONFIG_COMPAT
+	.compat_ioctl	   = inet6_compat_ioctl,
 	.compat_setsockopt = compat_sock_common_setsockopt,
 	.compat_getsockopt = compat_sock_common_getsockopt,
 #endif
diff --git a/net/sctp/ipv6.c b/net/sctp/ipv6.c
index c87af430107a..ccfa0ab3e7f4 100644
--- a/net/sctp/ipv6.c
+++ b/net/sctp/ipv6.c
@@ -1032,6 +1032,7 @@ static const struct proto_ops inet6_seqpacket_ops = {
 	.recvmsg	   = inet_recvmsg,
 	.mmap		   = sock_no_mmap,
 #ifdef CONFIG_COMPAT
+	.compat_ioctl	   = inet6_compat_ioctl,
 	.compat_setsockopt = compat_sock_common_setsockopt,
 	.compat_getsockopt = compat_sock_common_getsockopt,
 #endif
diff --git a/net/socket.c b/net/socket.c
index 1c9a7260a41d..682447075775 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -3384,62 +3384,33 @@ struct rtentry32 {
 	unsigned short  rt_irtt;        /* Initial RTT                  */
 };
 
-struct in6_rtmsg32 {
-	struct in6_addr		rtmsg_dst;
-	struct in6_addr		rtmsg_src;
-	struct in6_addr		rtmsg_gateway;
-	u32			rtmsg_type;
-	u16			rtmsg_dst_len;
-	u16			rtmsg_src_len;
-	u32			rtmsg_metric;
-	u32			rtmsg_info;
-	u32			rtmsg_flags;
-	s32			rtmsg_ifindex;
-};
-
 static int routing_ioctl(struct net *net, struct socket *sock,
 			 unsigned int cmd, void __user *argp)
 {
+	struct rtentry32 __user *ur4 = argp;
 	int ret;
 	void *r = NULL;
-	struct in6_rtmsg r6;
 	struct rtentry r4;
 	char devname[16];
 	u32 rtdev;
 	mm_segment_t old_fs = get_fs();
 
-	if (sock && sock->sk && sock->sk->sk_family == AF_INET6) { /* ipv6 */
-		struct in6_rtmsg32 __user *ur6 = argp;
-		ret = copy_from_user(&r6.rtmsg_dst, &(ur6->rtmsg_dst),
-			3 * sizeof(struct in6_addr));
-		ret |= get_user(r6.rtmsg_type, &(ur6->rtmsg_type));
-		ret |= get_user(r6.rtmsg_dst_len, &(ur6->rtmsg_dst_len));
-		ret |= get_user(r6.rtmsg_src_len, &(ur6->rtmsg_src_len));
-		ret |= get_user(r6.rtmsg_metric, &(ur6->rtmsg_metric));
-		ret |= get_user(r6.rtmsg_info, &(ur6->rtmsg_info));
-		ret |= get_user(r6.rtmsg_flags, &(ur6->rtmsg_flags));
-		ret |= get_user(r6.rtmsg_ifindex, &(ur6->rtmsg_ifindex));
-
-		r = (void *) &r6;
-	} else { /* ipv4 */
-		struct rtentry32 __user *ur4 = argp;
-		ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
-					3 * sizeof(struct sockaddr));
-		ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
-		ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
-		ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
-		ret |= get_user(r4.rt_window, &(ur4->rt_window));
-		ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
-		ret |= get_user(rtdev, &(ur4->rt_dev));
-		if (rtdev) {
-			ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
-			r4.rt_dev = (char __user __force *)devname;
-			devname[15] = 0;
-		} else
-			r4.rt_dev = NULL;
-
-		r = (void *) &r4;
-	}
+	ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
+				3 * sizeof(struct sockaddr));
+	ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
+	ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
+	ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
+	ret |= get_user(r4.rt_window, &(ur4->rt_window));
+	ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
+	ret |= get_user(rtdev, &(ur4->rt_dev));
+	if (rtdev) {
+		ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
+		r4.rt_dev = (char __user __force *)devname;
+		devname[15] = 0;
+	} else
+		r4.rt_dev = NULL;
+
+	r = (void *) &r4;
 
 	if (ret) {
 		ret = -EFAULT;
-- 
cgit v1.2.3-59-g8ed1b


From a50049235483b0337d129e2878e99750e4da3ac2 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 18 May 2020 08:28:07 +0200
Subject: appletalk: factor out a atrtr_ioctl_addrt helper

Add a helper than can be shared with the upcoming compat ioctl handler.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/appletalk/ddp.c | 33 ++++++++++++++++++++-------------
 1 file changed, 20 insertions(+), 13 deletions(-)

diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index b41375d4d295..4177a74f6543 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -867,6 +867,24 @@ static int atif_ioctl(int cmd, void __user *arg)
 	return copy_to_user(arg, &atreq, sizeof(atreq)) ? -EFAULT : 0;
 }
 
+static int atrtr_ioctl_addrt(struct rtentry *rt)
+{
+	struct net_device *dev = NULL;
+
+	if (rt->rt_dev) {
+		char name[IFNAMSIZ];
+
+		if (copy_from_user(name, rt->rt_dev, IFNAMSIZ-1))
+			return -EFAULT;
+		name[IFNAMSIZ-1] = '\0';
+
+		dev = __dev_get_by_name(&init_net, name);
+		if (!dev)
+			return -ENODEV;
+	}
+	return atrtr_create(rt, dev);
+}
+
 /* Routing ioctl() calls */
 static int atrtr_ioctl(unsigned int cmd, void __user *arg)
 {
@@ -882,19 +900,8 @@ static int atrtr_ioctl(unsigned int cmd, void __user *arg)
 		return atrtr_delete(&((struct sockaddr_at *)
 				      &rt.rt_dst)->sat_addr);
 
-	case SIOCADDRT: {
-		struct net_device *dev = NULL;
-		if (rt.rt_dev) {
-			char name[IFNAMSIZ];
-			if (copy_from_user(name, rt.rt_dev, IFNAMSIZ-1))
-				return -EFAULT;
-			name[IFNAMSIZ-1] = '\0';
-			dev = __dev_get_by_name(&init_net, name);
-			if (!dev)
-				return -ENODEV;
-		}
-		return atrtr_create(&rt, dev);
-	}
+	case SIOCADDRT:
+		return atrtr_ioctl_addrt(&rt);
 	}
 	return -EINVAL;
 }
-- 
cgit v1.2.3-59-g8ed1b


From dc13c8761c91c06acd3d98cd107f371cba9811b9 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 18 May 2020 08:28:08 +0200
Subject: ipv4,appletalk: move SIOCADDRT and SIOCDELRT handling into
 ->compat_ioctl

To prepare removing the global routing_ioctl hack start lifting the code
into the ipv4 and appletalk ->compat_ioctl handlers.  Unlike the existing
handler we don't bother copying in the name - there are no compat issues for
char arrays.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/compat.h | 18 +++++++++++++++
 net/appletalk/ddp.c  | 49 ++++++++++++++++++++++++++++++++++++-----
 net/ipv4/af_inet.c   | 38 +++++++++++++++++++++++++++-----
 net/socket.c         | 62 ----------------------------------------------------
 4 files changed, 94 insertions(+), 73 deletions(-)

diff --git a/include/net/compat.h b/include/net/compat.h
index e341260642fe..2b5e1f7ba153 100644
--- a/include/net/compat.h
+++ b/include/net/compat.h
@@ -30,6 +30,24 @@ struct compat_cmsghdr {
 	compat_int_t	cmsg_type;
 };
 
+struct compat_rtentry {
+	u32		rt_pad1;
+	struct sockaddr rt_dst;         /* target address               */
+	struct sockaddr rt_gateway;     /* gateway addr (RTF_GATEWAY)   */
+	struct sockaddr rt_genmask;     /* target network mask (IP)     */
+	unsigned short	rt_flags;
+	short		rt_pad2;
+	u32		rt_pad3;
+	unsigned char	rt_tos;
+	unsigned char	rt_class;
+	short		rt_pad4;
+	short		rt_metric;      /* +1 for binary compatibility! */
+	compat_uptr_t	rt_dev;         /* forcing the device at add    */
+	u32		rt_mtu;         /* per route MTU/Window         */
+	u32		rt_window;      /* Window clamping              */
+	unsigned short  rt_irtt;        /* Initial RTT                  */
+};
+
 #else /* defined(CONFIG_COMPAT) */
 /*
  * To avoid compiler warnings:
diff --git a/net/appletalk/ddp.c b/net/appletalk/ddp.c
index 4177a74f6543..15787e8c0629 100644
--- a/net/appletalk/ddp.c
+++ b/net/appletalk/ddp.c
@@ -57,6 +57,7 @@
 #include <net/sock.h>
 #include <net/tcp_states.h>
 #include <net/route.h>
+#include <net/compat.h>
 #include <linux/atalk.h>
 #include <linux/highmem.h>
 
@@ -1839,20 +1840,58 @@ static int atalk_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 
 
 #ifdef CONFIG_COMPAT
+static int atalk_compat_routing_ioctl(struct sock *sk, unsigned int cmd,
+		struct compat_rtentry __user *ur)
+{
+	compat_uptr_t rtdev;
+	struct rtentry rt;
+
+	if (copy_from_user(&rt.rt_dst, &ur->rt_dst,
+			3 * sizeof(struct sockaddr)) ||
+	    get_user(rt.rt_flags, &ur->rt_flags) ||
+	    get_user(rt.rt_metric, &ur->rt_metric) ||
+	    get_user(rt.rt_mtu, &ur->rt_mtu) ||
+	    get_user(rt.rt_window, &ur->rt_window) ||
+	    get_user(rt.rt_irtt, &ur->rt_irtt) ||
+	    get_user(rtdev, &ur->rt_dev))
+		return -EFAULT;
+
+	switch (cmd) {
+	case SIOCDELRT:
+		if (rt.rt_dst.sa_family != AF_APPLETALK)
+			return -EINVAL;
+		return atrtr_delete(&((struct sockaddr_at *)
+				      &rt.rt_dst)->sat_addr);
+
+	case SIOCADDRT:
+		rt.rt_dev = compat_ptr(rtdev);
+		return atrtr_ioctl_addrt(&rt);
+	default:
+		return -EINVAL;
+	}
+}
 static int atalk_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 {
+	void __user *argp = compat_ptr(arg);
+	struct sock *sk = sock->sk;
+
+	switch (cmd) {
+	case SIOCADDRT:
+	case SIOCDELRT:
+		return atalk_compat_routing_ioctl(sk, cmd, argp);
 	/*
 	 * SIOCATALKDIFADDR is a SIOCPROTOPRIVATE ioctl number, so we
 	 * cannot handle it in common code. The data we access if ifreq
 	 * here is compatible, so we can simply call the native
 	 * handler.
 	 */
-	if (cmd == SIOCATALKDIFADDR)
-		return atalk_ioctl(sock, cmd, (unsigned long)compat_ptr(arg));
-
-	return -ENOIOCTLCMD;
+	case SIOCATALKDIFADDR:
+		return atalk_ioctl(sock, cmd, (unsigned long)argp);
+	default:
+		return -ENOIOCTLCMD;
+	}
 }
-#endif
+#endif /* CONFIG_COMPAT */
 
 
 static const struct net_proto_family atalk_family_ops = {
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index fcf0d12a407a..c35a8b2e0499 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -116,6 +116,7 @@
 #include <linux/mroute.h>
 #endif
 #include <net/l3mdev.h>
+#include <net/compat.h>
 
 #include <trace/events/sock.h>
 
@@ -970,17 +971,42 @@ int inet_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 EXPORT_SYMBOL(inet_ioctl);
 
 #ifdef CONFIG_COMPAT
+static int inet_compat_routing_ioctl(struct sock *sk, unsigned int cmd,
+		struct compat_rtentry __user *ur)
+{
+	compat_uptr_t rtdev;
+	struct rtentry rt;
+
+	if (copy_from_user(&rt.rt_dst, &ur->rt_dst,
+			3 * sizeof(struct sockaddr)) ||
+	    get_user(rt.rt_flags, &ur->rt_flags) ||
+	    get_user(rt.rt_metric, &ur->rt_metric) ||
+	    get_user(rt.rt_mtu, &ur->rt_mtu) ||
+	    get_user(rt.rt_window, &ur->rt_window) ||
+	    get_user(rt.rt_irtt, &ur->rt_irtt) ||
+	    get_user(rtdev, &ur->rt_dev))
+		return -EFAULT;
+
+	rt.rt_dev = compat_ptr(rtdev);
+	return ip_rt_ioctl(sock_net(sk), cmd, &rt);
+}
+
 static int inet_compat_ioctl(struct socket *sock, unsigned int cmd, unsigned long arg)
 {
+	void __user *argp = compat_ptr(arg);
 	struct sock *sk = sock->sk;
-	int err = -ENOIOCTLCMD;
-
-	if (sk->sk_prot->compat_ioctl)
-		err = sk->sk_prot->compat_ioctl(sk, cmd, arg);
 
-	return err;
+	switch (cmd) {
+	case SIOCADDRT:
+	case SIOCDELRT:
+		return inet_compat_routing_ioctl(sk, cmd, argp);
+	default:
+		if (!sk->sk_prot->compat_ioctl)
+			return -ENOIOCTLCMD;
+		return sk->sk_prot->compat_ioctl(sk, cmd, arg);
+	}
 }
-#endif
+#endif /* CONFIG_COMPAT */
 
 const struct proto_ops inet_stream_ops = {
 	.family		   = PF_INET,
diff --git a/net/socket.c b/net/socket.c
index 682447075775..80422fc3c836 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -3366,65 +3366,6 @@ static int compat_sioc_ifmap(struct net *net, unsigned int cmd,
 	return err;
 }
 
-struct rtentry32 {
-	u32		rt_pad1;
-	struct sockaddr rt_dst;         /* target address               */
-	struct sockaddr rt_gateway;     /* gateway addr (RTF_GATEWAY)   */
-	struct sockaddr rt_genmask;     /* target network mask (IP)     */
-	unsigned short	rt_flags;
-	short		rt_pad2;
-	u32		rt_pad3;
-	unsigned char	rt_tos;
-	unsigned char	rt_class;
-	short		rt_pad4;
-	short		rt_metric;      /* +1 for binary compatibility! */
-	/* char * */ u32 rt_dev;        /* forcing the device at add    */
-	u32		rt_mtu;         /* per route MTU/Window         */
-	u32		rt_window;      /* Window clamping              */
-	unsigned short  rt_irtt;        /* Initial RTT                  */
-};
-
-static int routing_ioctl(struct net *net, struct socket *sock,
-			 unsigned int cmd, void __user *argp)
-{
-	struct rtentry32 __user *ur4 = argp;
-	int ret;
-	void *r = NULL;
-	struct rtentry r4;
-	char devname[16];
-	u32 rtdev;
-	mm_segment_t old_fs = get_fs();
-
-	ret = copy_from_user(&r4.rt_dst, &(ur4->rt_dst),
-				3 * sizeof(struct sockaddr));
-	ret |= get_user(r4.rt_flags, &(ur4->rt_flags));
-	ret |= get_user(r4.rt_metric, &(ur4->rt_metric));
-	ret |= get_user(r4.rt_mtu, &(ur4->rt_mtu));
-	ret |= get_user(r4.rt_window, &(ur4->rt_window));
-	ret |= get_user(r4.rt_irtt, &(ur4->rt_irtt));
-	ret |= get_user(rtdev, &(ur4->rt_dev));
-	if (rtdev) {
-		ret |= copy_from_user(devname, compat_ptr(rtdev), 15);
-		r4.rt_dev = (char __user __force *)devname;
-		devname[15] = 0;
-	} else
-		r4.rt_dev = NULL;
-
-	r = (void *) &r4;
-
-	if (ret) {
-		ret = -EFAULT;
-		goto out;
-	}
-
-	set_fs(KERNEL_DS);
-	ret = sock_do_ioctl(net, sock, cmd, (unsigned long) r);
-	set_fs(old_fs);
-
-out:
-	return ret;
-}
-
 /* Since old style bridge ioctl's endup using SIOCDEVPRIVATE
  * for some operations; this forces use of the newer bridge-utils that
  * use compatible ioctls
@@ -3463,9 +3404,6 @@ static int compat_sock_ioctl_trans(struct file *file, struct socket *sock,
 	case SIOCGIFMAP:
 	case SIOCSIFMAP:
 		return compat_sioc_ifmap(net, cmd, argp);
-	case SIOCADDRT:
-	case SIOCDELRT:
-		return routing_ioctl(net, sock, cmd, argp);
 	case SIOCGSTAMP_OLD:
 	case SIOCGSTAMPNS_OLD:
 		if (!sock->ops->gettstamp)
-- 
cgit v1.2.3-59-g8ed1b


From 0992b49023d0ae3c2786e81821619504c57e7629 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 18 May 2020 22:18:31 +0300
Subject: cxgb4: Use %pM format specifier for MAC addresses

Convert to %pM instead of using custom code.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c | 20 ++++++--------------
 1 file changed, 6 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
index 7818c392da50..c3dd50b45c48 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
@@ -1813,12 +1813,8 @@ static int mps_tcam_show(struct seq_file *seq, void *v)
 			/* Inner header lookup */
 			if (lookup_type && (lookup_type != DATALKPTYPE_M)) {
 				seq_printf(seq,
-					   "%3u %02x:%02x:%02x:%02x:%02x:%02x "
-					   "%012llx %06x %06x    -    -   %3c"
-					   "      'I'  %4x   "
-					   "%3c   %#x%4u%4d", idx, addr[0],
-					   addr[1], addr[2], addr[3],
-					   addr[4], addr[5],
+					   "%3u %pM %012llx %06x %06x    -    -   %3c      'I'  %4x   %3c   %#x%4u%4d",
+					   idx, addr,
 					   (unsigned long long)mask,
 					   vniy, (vnix | vniy),
 					   dip_hit ? 'Y' : 'N',
@@ -1830,10 +1826,8 @@ static int mps_tcam_show(struct seq_file *seq, void *v)
 					   T6_VF_G(cls_lo) : -1);
 			} else {
 				seq_printf(seq,
-					   "%3u %02x:%02x:%02x:%02x:%02x:%02x "
-					   "%012llx    -       -   ",
-					   idx, addr[0], addr[1], addr[2],
-					   addr[3], addr[4], addr[5],
+					   "%3u %pM %012llx    -       -   ",
+					   idx, addr,
 					   (unsigned long long)mask);
 
 				if (vlan_vld)
@@ -1851,10 +1845,8 @@ static int mps_tcam_show(struct seq_file *seq, void *v)
 					   T6_VF_G(cls_lo) : -1);
 			}
 		} else
-			seq_printf(seq, "%3u %02x:%02x:%02x:%02x:%02x:%02x "
-				   "%012llx%3c   %#x%4u%4d",
-				   idx, addr[0], addr[1], addr[2], addr[3],
-				   addr[4], addr[5], (unsigned long long)mask,
+			seq_printf(seq, "%3u %pM %012llx%3c   %#x%4u%4d",
+				   idx, addr, (unsigned long long)mask,
 				   (cls_lo & SRAM_VLD_F) ? 'Y' : 'N',
 				   PORTMAP_G(cls_hi),
 				   PF_G(cls_lo),
-- 
cgit v1.2.3-59-g8ed1b


From 35e43c392bf8bb38eb96ddada984bbdfd9e102a0 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Mon, 18 May 2020 22:21:28 +0300
Subject: net: seeq: Use %pM format specifier for MAC addresses

Convert to %pM instead of using custom code.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/seeq/ether3.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/seeq/ether3.c b/drivers/net/ethernet/seeq/ether3.c
index 128ee7cda1ed..65c98837ec45 100644
--- a/drivers/net/ethernet/seeq/ether3.c
+++ b/drivers/net/ethernet/seeq/ether3.c
@@ -610,12 +610,9 @@ static int ether3_rx(struct net_device *dev, unsigned int maxcnt)
 		ether3_readbuffer(dev, addrs+2, 12);
 
 if (next_ptr < RX_START || next_ptr >= RX_END) {
- int i;
  printk("%s: bad next pointer @%04X: ", dev->name, priv(dev)->rx_head);
  printk("%02X %02X %02X %02X ", next_ptr >> 8, next_ptr & 255, status & 255, status >> 8);
- for (i = 2; i < 14; i++)
-   printk("%02X ", addrs[i]);
- printk("\n");
+ printk("%pM %pM\n", addrs + 2, addrs + 8);
  next_ptr = priv(dev)->rx_head;
  break;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 588c7e5cc04805fae17cac51e77107841b0c62b0 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Mon, 18 May 2020 22:14:21 +0200
Subject: r8169: make rtl_rx better readable

Avoid the goto from the rx error handling branch into the else branch,
and in general avoid having the main rx work in the else branch.
In addition ensure proper reverse xmas tree order of variables in the
for loop.

No functional change intended.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 99 +++++++++++++++----------------
 1 file changed, 48 insertions(+), 51 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index e35820c72264..e887ee1e308f 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -4413,15 +4413,17 @@ static inline void rtl8169_rx_csum(struct sk_buff *skb, u32 opts1)
 
 static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, u32 budget)
 {
-	unsigned int cur_rx, rx_left;
-	unsigned int count;
+	unsigned int cur_rx, rx_left, count;
+	struct device *d = tp_to_dev(tp);
 
 	cur_rx = tp->cur_rx;
 
 	for (rx_left = min(budget, NUM_RX_DESC); rx_left > 0; rx_left--, cur_rx++) {
-		unsigned int entry = cur_rx % NUM_RX_DESC;
-		const void *rx_buf = page_address(tp->Rx_databuff[entry]);
+		unsigned int pkt_size, entry = cur_rx % NUM_RX_DESC;
 		struct RxDesc *desc = tp->RxDescArray + entry;
+		struct sk_buff *skb;
+		const void *rx_buf;
+		dma_addr_t addr;
 		u32 status;
 
 		status = le32_to_cpu(desc->opts1);
@@ -4443,62 +4445,57 @@ static int rtl_rx(struct net_device *dev, struct rtl8169_private *tp, u32 budget
 				dev->stats.rx_length_errors++;
 			if (status & RxCRC)
 				dev->stats.rx_crc_errors++;
-			if (status & (RxRUNT | RxCRC) && !(status & RxRWT) &&
-			    dev->features & NETIF_F_RXALL) {
-				goto process_pkt;
-			}
-		} else {
-			unsigned int pkt_size;
-			struct sk_buff *skb;
-
-process_pkt:
-			pkt_size = status & GENMASK(13, 0);
-			if (likely(!(dev->features & NETIF_F_RXFCS)))
-				pkt_size -= ETH_FCS_LEN;
-			/*
-			 * The driver does not support incoming fragmented
-			 * frames. They are seen as a symptom of over-mtu
-			 * sized frames.
-			 */
-			if (unlikely(rtl8169_fragmented_frame(status))) {
-				dev->stats.rx_dropped++;
-				dev->stats.rx_length_errors++;
-				goto release_descriptor;
-			}
 
-			skb = napi_alloc_skb(&tp->napi, pkt_size);
-			if (unlikely(!skb)) {
-				dev->stats.rx_dropped++;
+			if (!(dev->features & NETIF_F_RXALL))
 				goto release_descriptor;
-			}
+			else if (status & RxRWT || !(status & (RxRUNT | RxCRC)))
+				goto release_descriptor;
+		}
 
-			dma_sync_single_for_cpu(tp_to_dev(tp),
-						le64_to_cpu(desc->addr),
-						pkt_size, DMA_FROM_DEVICE);
-			prefetch(rx_buf);
-			skb_copy_to_linear_data(skb, rx_buf, pkt_size);
-			skb->tail += pkt_size;
-			skb->len = pkt_size;
+		pkt_size = status & GENMASK(13, 0);
+		if (likely(!(dev->features & NETIF_F_RXFCS)))
+			pkt_size -= ETH_FCS_LEN;
 
-			dma_sync_single_for_device(tp_to_dev(tp),
-						   le64_to_cpu(desc->addr),
-						   pkt_size, DMA_FROM_DEVICE);
+		/* The driver does not support incoming fragmented frames.
+		 * They are seen as a symptom of over-mtu sized frames.
+		 */
+		if (unlikely(rtl8169_fragmented_frame(status))) {
+			dev->stats.rx_dropped++;
+			dev->stats.rx_length_errors++;
+			goto release_descriptor;
+		}
 
-			rtl8169_rx_csum(skb, status);
-			skb->protocol = eth_type_trans(skb, dev);
+		skb = napi_alloc_skb(&tp->napi, pkt_size);
+		if (unlikely(!skb)) {
+			dev->stats.rx_dropped++;
+			goto release_descriptor;
+		}
 
-			rtl8169_rx_vlan_tag(desc, skb);
+		addr = le64_to_cpu(desc->addr);
+		rx_buf = page_address(tp->Rx_databuff[entry]);
 
-			if (skb->pkt_type == PACKET_MULTICAST)
-				dev->stats.multicast++;
+		dma_sync_single_for_cpu(d, addr, pkt_size, DMA_FROM_DEVICE);
+		prefetch(rx_buf);
+		skb_copy_to_linear_data(skb, rx_buf, pkt_size);
+		skb->tail += pkt_size;
+		skb->len = pkt_size;
+		dma_sync_single_for_device(d, addr, pkt_size, DMA_FROM_DEVICE);
 
-			napi_gro_receive(&tp->napi, skb);
+		rtl8169_rx_csum(skb, status);
+		skb->protocol = eth_type_trans(skb, dev);
+
+		rtl8169_rx_vlan_tag(desc, skb);
+
+		if (skb->pkt_type == PACKET_MULTICAST)
+			dev->stats.multicast++;
+
+		napi_gro_receive(&tp->napi, skb);
+
+		u64_stats_update_begin(&tp->rx_stats.syncp);
+		tp->rx_stats.packets++;
+		tp->rx_stats.bytes += pkt_size;
+		u64_stats_update_end(&tp->rx_stats.syncp);
 
-			u64_stats_update_begin(&tp->rx_stats.syncp);
-			tp->rx_stats.packets++;
-			tp->rx_stats.bytes += pkt_size;
-			u64_stats_update_end(&tp->rx_stats.syncp);
-		}
 release_descriptor:
 		rtl8169_mark_to_asic(desc);
 	}
-- 
cgit v1.2.3-59-g8ed1b


From e2e5fb8d2f7aedb66fea0722f0f712cd37592a5d Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Mon, 18 May 2020 22:22:09 +0200
Subject: r8169: improve rtl8169_mark_to_asic

Let the compiler decide about inlining, and as confirmed by Eric it's
better to use WRITE_ONCE here to ensure that the descriptor ownership
is transferred to NIC immediately.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index e887ee1e308f..23f1500928cb 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -3824,15 +3824,14 @@ static int rtl8169_change_mtu(struct net_device *dev, int new_mtu)
 	return 0;
 }
 
-static inline void rtl8169_mark_to_asic(struct RxDesc *desc)
+static void rtl8169_mark_to_asic(struct RxDesc *desc)
 {
 	u32 eor = le32_to_cpu(desc->opts1) & RingEnd;
 
 	desc->opts2 = 0;
 	/* Force memory writes to complete before releasing descriptor */
 	dma_wmb();
-
-	desc->opts1 = cpu_to_le32(DescOwn | eor | R8169_RX_BUF_SIZE);
+	WRITE_ONCE(desc->opts1, cpu_to_le32(DescOwn | eor | R8169_RX_BUF_SIZE));
 }
 
 static struct page *rtl8169_alloc_rx_data(struct rtl8169_private *tp,
-- 
cgit v1.2.3-59-g8ed1b


From 5cdfe8306631b2224e3f81fc5a1e2721c7a1948b Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Mon, 18 May 2020 22:47:16 +0200
Subject: r8169: work around an irq coalescing related tx timeout

In [0] a user reported reproducible tx timeouts on RTL8168f except
PktCntrDisable is set and irq coalescing is enabled.
Realtek told me that they are not aware of any related hw issue on
this chip version, therefore root cause is still unknown. It's not
clear whether the issue affects one or more chip versions in general,
or whether issue is specific to reporter's system.
Due to this level of uncertainty, and due to the fact that I'm aware
of this one report only, let's apply the workaround on net-next only.
After this change setting irq coalescing via ethtool can reliably
avoid the issue on the affected system.

[0] https://bugzilla.kernel.org/show_bug.cgi?id=207205

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 23f1500928cb..79817d4ffa47 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -1871,6 +1871,15 @@ static int rtl_set_coalesce(struct net_device *dev, struct ethtool_coalesce *ec)
 
 	RTL_W16(tp, IntrMitigate, w);
 
+	/* Meaning of PktCntrDisable bit changed from RTL8168e-vl */
+	if (rtl_is_8168evl_up(tp)) {
+		if (!rx_fr && !tx_fr)
+			/* disable packet counter */
+			tp->cp_cmd |= PktCntrDisable;
+		else
+			tp->cp_cmd &= ~PktCntrDisable;
+	}
+
 	tp->cp_cmd = (tp->cp_cmd & ~INTT_MASK) | cp01;
 	RTL_W16(tp, CPlusCmd, tp->cp_cmd);
 	rtl_pci_commit(tp);
-- 
cgit v1.2.3-59-g8ed1b


From 25ca180ad380a0c7286442a922e7fbcc6a9f6083 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Wed, 13 May 2020 22:14:54 +0200
Subject: ath11k: Fix some resource leaks in error path in
 'ath11k_thermal_register()'

If 'thermal_cooling_device_register()' fails, we must undo what has been
allocated so far. So we must go to 'err_thermal_destroy' instead of
returning directly

In case of error in 'ath11k_thermal_register()', the previous
'thermal_cooling_device_register()' call must also be undone. Move the
'ar->thermal.cdev = cdev' a few lines above in order for this to be done
in 'ath11k_thermal_unregister()' which is called in the error handling
path.

Fixes: 2a63bbca06b2 ("ath11k: add thermal cooling device support")
Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200513201454.258111-1-christophe.jaillet@wanadoo.fr
---
 drivers/net/wireless/ath/ath11k/thermal.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/ath/ath11k/thermal.c b/drivers/net/wireless/ath/ath11k/thermal.c
index 259dddbda2c7..5a7e150c621b 100644
--- a/drivers/net/wireless/ath/ath11k/thermal.c
+++ b/drivers/net/wireless/ath/ath11k/thermal.c
@@ -174,9 +174,12 @@ int ath11k_thermal_register(struct ath11k_base *sc)
 		if (IS_ERR(cdev)) {
 			ath11k_err(sc, "failed to setup thermal device result: %ld\n",
 				   PTR_ERR(cdev));
-			return -EINVAL;
+			ret = -EINVAL;
+			goto err_thermal_destroy;
 		}
 
+		ar->thermal.cdev = cdev;
+
 		ret = sysfs_create_link(&ar->hw->wiphy->dev.kobj, &cdev->device.kobj,
 					"cooling_device");
 		if (ret) {
@@ -184,7 +187,6 @@ int ath11k_thermal_register(struct ath11k_base *sc)
 			goto err_thermal_destroy;
 		}
 
-		ar->thermal.cdev = cdev;
 		if (!IS_REACHABLE(CONFIG_HWMON))
 			return 0;
 
-- 
cgit v1.2.3-59-g8ed1b


From 0efdcefb00d768442efef21560ea4c44d481fa5c Mon Sep 17 00:00:00 2001
From: "Daniel T. Lee" <danieltimlee@gmail.com>
Date: Sat, 16 May 2020 13:06:04 +0900
Subject: samples, bpf: Refactor pointer error check with libbpf

Current method of checking pointer error is not user friendly.
Especially the __must_check define makes this less intuitive.

Since, libbpf has an API libbpf_get_error() which checks pointer error,
this commit refactors existing pointer error check logic with libbpf.

Signed-off-by: Daniel T. Lee <danieltimlee@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200516040608.1377876-2-danieltimlee@gmail.com
---
 samples/bpf/sampleip_user.c         | 7 ++-----
 samples/bpf/trace_event_user.c      | 9 +++------
 samples/bpf/xdp_redirect_cpu_user.c | 5 +----
 3 files changed, 6 insertions(+), 15 deletions(-)

diff --git a/samples/bpf/sampleip_user.c b/samples/bpf/sampleip_user.c
index 4372d2da2f9e..921c505bb567 100644
--- a/samples/bpf/sampleip_user.c
+++ b/samples/bpf/sampleip_user.c
@@ -18,9 +18,6 @@
 #include "perf-sys.h"
 #include "trace_helpers.h"
 
-#define __must_check
-#include <linux/err.h>
-
 #define DEFAULT_FREQ	99
 #define DEFAULT_SECS	5
 #define MAX_IPS		8192
@@ -57,7 +54,7 @@ static int sampling_start(int freq, struct bpf_program *prog,
 			return 1;
 		}
 		links[i] = bpf_program__attach_perf_event(prog, pmu_fd);
-		if (IS_ERR(links[i])) {
+		if (libbpf_get_error(links[i])) {
 			fprintf(stderr, "ERROR: Attach perf event\n");
 			links[i] = NULL;
 			close(pmu_fd);
@@ -182,7 +179,7 @@ int main(int argc, char **argv)
 
 	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
 	obj = bpf_object__open_file(filename, NULL);
-	if (IS_ERR(obj)) {
+	if (libbpf_get_error(obj)) {
 		fprintf(stderr, "ERROR: opening BPF object file failed\n");
 		obj = NULL;
 		goto cleanup;
diff --git a/samples/bpf/trace_event_user.c b/samples/bpf/trace_event_user.c
index b6cd358d0418..ac1ba368195c 100644
--- a/samples/bpf/trace_event_user.c
+++ b/samples/bpf/trace_event_user.c
@@ -16,9 +16,6 @@
 #include "perf-sys.h"
 #include "trace_helpers.h"
 
-#define __must_check
-#include <linux/err.h>
-
 #define SAMPLE_FREQ 50
 
 static int pid;
@@ -159,7 +156,7 @@ static void test_perf_event_all_cpu(struct perf_event_attr *attr)
 			goto all_cpu_err;
 		}
 		links[i] = bpf_program__attach_perf_event(prog, pmu_fd);
-		if (IS_ERR(links[i])) {
+		if (libbpf_get_error(links[i])) {
 			printf("bpf_program__attach_perf_event failed\n");
 			links[i] = NULL;
 			close(pmu_fd);
@@ -198,7 +195,7 @@ static void test_perf_event_task(struct perf_event_attr *attr)
 		goto err;
 	}
 	link = bpf_program__attach_perf_event(prog, pmu_fd);
-	if (IS_ERR(link)) {
+	if (libbpf_get_error(link)) {
 		printf("bpf_program__attach_perf_event failed\n");
 		link = NULL;
 		close(pmu_fd);
@@ -314,7 +311,7 @@ int main(int argc, char **argv)
 	}
 
 	obj = bpf_object__open_file(filename, NULL);
-	if (IS_ERR(obj)) {
+	if (libbpf_get_error(obj)) {
 		printf("opening BPF object file failed\n");
 		obj = NULL;
 		goto cleanup;
diff --git a/samples/bpf/xdp_redirect_cpu_user.c b/samples/bpf/xdp_redirect_cpu_user.c
index 9b8f21abeac4..f3468168982e 100644
--- a/samples/bpf/xdp_redirect_cpu_user.c
+++ b/samples/bpf/xdp_redirect_cpu_user.c
@@ -19,9 +19,6 @@ static const char *__doc__ =
 #include <time.h>
 #include <linux/limits.h>
 
-#define __must_check
-#include <linux/err.h>
-
 #include <arpa/inet.h>
 #include <linux/if_link.h>
 
@@ -622,7 +619,7 @@ static struct bpf_link * attach_tp(struct bpf_object *obj,
 	}
 
 	link = bpf_program__attach_tracepoint(prog, tp_category, tp_name);
-	if (IS_ERR(link))
+	if (libbpf_get_error(link))
 		exit(EXIT_FAIL_BPF);
 
 	return link;
-- 
cgit v1.2.3-59-g8ed1b


From 63841bc0833623ecd4f758ec055b543cf1bc56ba Mon Sep 17 00:00:00 2001
From: "Daniel T. Lee" <danieltimlee@gmail.com>
Date: Sat, 16 May 2020 13:06:05 +0900
Subject: samples, bpf: Refactor kprobe tracing user progs with libbpf

Currently, the kprobe BPF program attachment method for bpf_load is
quite old. The implementation of bpf_load "directly" controls and
manages(create, delete) the kprobe events of DEBUGFS. On the other hand,
using using the libbpf automatically manages the kprobe event.
(under bpf_link interface)

By calling bpf_program__attach(_kprobe) in libbpf, the corresponding
kprobe is created and the BPF program will be attached to this kprobe.
To remove this, by simply invoking bpf_link__destroy will clean up the
event.

This commit refactors kprobe tracing programs (tracex{1~7}_user.c) with
libbpf using bpf_link interface and bpf_program__attach.

tracex2_kern.c, which tracks system calls (sys_*), has been modified to
append prefix depending on architecture.

Signed-off-by: Daniel T. Lee <danieltimlee@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200516040608.1377876-3-danieltimlee@gmail.com
---
 samples/bpf/Makefile       | 12 ++++-----
 samples/bpf/trace_common.h | 13 ++++++++++
 samples/bpf/tracex1_user.c | 37 ++++++++++++++++++++++------
 samples/bpf/tracex2_kern.c |  3 ++-
 samples/bpf/tracex2_user.c | 51 +++++++++++++++++++++++++++++++-------
 samples/bpf/tracex3_user.c | 61 ++++++++++++++++++++++++++++++++++------------
 samples/bpf/tracex4_user.c | 51 +++++++++++++++++++++++++++++---------
 samples/bpf/tracex6_user.c | 49 ++++++++++++++++++++++++++++++++-----
 samples/bpf/tracex7_user.c | 39 +++++++++++++++++++++++------
 9 files changed, 252 insertions(+), 64 deletions(-)
 create mode 100644 samples/bpf/trace_common.h

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 424f6fe7ce38..4c91e5914329 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -64,13 +64,13 @@ fds_example-objs := fds_example.o
 sockex1-objs := sockex1_user.o
 sockex2-objs := sockex2_user.o
 sockex3-objs := bpf_load.o sockex3_user.o
-tracex1-objs := bpf_load.o tracex1_user.o $(TRACE_HELPERS)
-tracex2-objs := bpf_load.o tracex2_user.o
-tracex3-objs := bpf_load.o tracex3_user.o
-tracex4-objs := bpf_load.o tracex4_user.o
+tracex1-objs := tracex1_user.o $(TRACE_HELPERS)
+tracex2-objs := tracex2_user.o
+tracex3-objs := tracex3_user.o
+tracex4-objs := tracex4_user.o
 tracex5-objs := bpf_load.o tracex5_user.o $(TRACE_HELPERS)
-tracex6-objs := bpf_load.o tracex6_user.o
-tracex7-objs := bpf_load.o tracex7_user.o
+tracex6-objs := tracex6_user.o
+tracex7-objs := tracex7_user.o
 test_probe_write_user-objs := bpf_load.o test_probe_write_user_user.o
 trace_output-objs := bpf_load.o trace_output_user.o $(TRACE_HELPERS)
 lathist-objs := bpf_load.o lathist_user.o
diff --git a/samples/bpf/trace_common.h b/samples/bpf/trace_common.h
new file mode 100644
index 000000000000..8cb5400aed1f
--- /dev/null
+++ b/samples/bpf/trace_common.h
@@ -0,0 +1,13 @@
+// SPDX-License-Identifier: GPL-2.0
+#ifndef __TRACE_COMMON_H
+#define __TRACE_COMMON_H
+
+#ifdef __x86_64__
+#define SYSCALL(SYS) "__x64_" __stringify(SYS)
+#elif defined(__s390x__)
+#define SYSCALL(SYS) "__s390x_" __stringify(SYS)
+#else
+#define SYSCALL(SYS)  __stringify(SYS)
+#endif
+
+#endif
diff --git a/samples/bpf/tracex1_user.c b/samples/bpf/tracex1_user.c
index 55fddbd08702..9d4adb7fd834 100644
--- a/samples/bpf/tracex1_user.c
+++ b/samples/bpf/tracex1_user.c
@@ -1,21 +1,41 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <stdio.h>
-#include <linux/bpf.h>
 #include <unistd.h>
-#include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
 #include "trace_helpers.h"
 
 int main(int ac, char **argv)
 {
-	FILE *f;
+	struct bpf_link *link = NULL;
+	struct bpf_program *prog;
+	struct bpf_object *obj;
 	char filename[256];
+	FILE *f;
 
 	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+	obj = bpf_object__open_file(filename, NULL);
+	if (libbpf_get_error(obj)) {
+		fprintf(stderr, "ERROR: opening BPF object file failed\n");
+		return 0;
+	}
+
+	prog = bpf_object__find_program_by_name(obj, "bpf_prog1");
+	if (!prog) {
+		fprintf(stderr, "ERROR: finding a prog in obj file failed\n");
+		goto cleanup;
+	}
+
+	/* load BPF program */
+	if (bpf_object__load(obj)) {
+		fprintf(stderr, "ERROR: loading BPF object file failed\n");
+		goto cleanup;
+	}
 
-	if (load_bpf_file(filename)) {
-		printf("%s", bpf_log_buf);
-		return 1;
+	link = bpf_program__attach(prog);
+	if (libbpf_get_error(link)) {
+		fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+		link = NULL;
+		goto cleanup;
 	}
 
 	f = popen("taskset 1 ping -c5 localhost", "r");
@@ -23,5 +43,8 @@ int main(int ac, char **argv)
 
 	read_trace_pipe();
 
+cleanup:
+	bpf_link__destroy(link);
+	bpf_object__close(obj);
 	return 0;
 }
diff --git a/samples/bpf/tracex2_kern.c b/samples/bpf/tracex2_kern.c
index d865bb309bcb..cc5f94c098f8 100644
--- a/samples/bpf/tracex2_kern.c
+++ b/samples/bpf/tracex2_kern.c
@@ -10,6 +10,7 @@
 #include <uapi/linux/bpf.h>
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
+#include "trace_common.h"
 
 struct bpf_map_def SEC("maps") my_map = {
 	.type = BPF_MAP_TYPE_HASH,
@@ -77,7 +78,7 @@ struct bpf_map_def SEC("maps") my_hist_map = {
 	.max_entries = 1024,
 };
 
-SEC("kprobe/sys_write")
+SEC("kprobe/" SYSCALL(sys_write))
 int bpf_prog3(struct pt_regs *ctx)
 {
 	long write_size = PT_REGS_PARM3(ctx);
diff --git a/samples/bpf/tracex2_user.c b/samples/bpf/tracex2_user.c
index c9544a4ce61a..3e36b3e4e3ef 100644
--- a/samples/bpf/tracex2_user.c
+++ b/samples/bpf/tracex2_user.c
@@ -3,17 +3,19 @@
 #include <unistd.h>
 #include <stdlib.h>
 #include <signal.h>
-#include <linux/bpf.h>
 #include <string.h>
 #include <sys/resource.h>
 
 #include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
 #include "bpf_util.h"
 
 #define MAX_INDEX	64
 #define MAX_STARS	38
 
+/* my_map, my_hist_map */
+static int map_fd[2];
+
 static void stars(char *str, long val, long max, int width)
 {
 	int i;
@@ -115,18 +117,39 @@ static void int_exit(int sig)
 int main(int ac, char **argv)
 {
 	struct rlimit r = {1024*1024, RLIM_INFINITY};
-	char filename[256];
 	long key, next_key, value;
+	struct bpf_link *links[2];
+	struct bpf_program *prog;
+	struct bpf_object *obj;
+	char filename[256];
+	int i, j = 0;
 	FILE *f;
-	int i;
-
-	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
 
 	if (setrlimit(RLIMIT_MEMLOCK, &r)) {
 		perror("setrlimit(RLIMIT_MEMLOCK)");
 		return 1;
 	}
 
+	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+	obj = bpf_object__open_file(filename, NULL);
+	if (libbpf_get_error(obj)) {
+		fprintf(stderr, "ERROR: opening BPF object file failed\n");
+		return 0;
+	}
+
+	/* load BPF program */
+	if (bpf_object__load(obj)) {
+		fprintf(stderr, "ERROR: loading BPF object file failed\n");
+		goto cleanup;
+	}
+
+	map_fd[0] = bpf_object__find_map_fd_by_name(obj, "my_map");
+	map_fd[1] = bpf_object__find_map_fd_by_name(obj, "my_hist_map");
+	if (map_fd[0] < 0 || map_fd[1] < 0) {
+		fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+		goto cleanup;
+	}
+
 	signal(SIGINT, int_exit);
 	signal(SIGTERM, int_exit);
 
@@ -138,9 +161,14 @@ int main(int ac, char **argv)
 	f = popen("dd if=/dev/zero of=/dev/null count=5000000", "r");
 	(void) f;
 
-	if (load_bpf_file(filename)) {
-		printf("%s", bpf_log_buf);
-		return 1;
+	bpf_object__for_each_program(prog, obj) {
+		links[j] = bpf_program__attach(prog);
+		if (libbpf_get_error(links[j])) {
+			fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+			links[j] = NULL;
+			goto cleanup;
+		}
+		j++;
 	}
 
 	for (i = 0; i < 5; i++) {
@@ -156,5 +184,10 @@ int main(int ac, char **argv)
 	}
 	print_hist(map_fd[1]);
 
+cleanup:
+	for (j--; j >= 0; j--)
+		bpf_link__destroy(links[j]);
+
+	bpf_object__close(obj);
 	return 0;
 }
diff --git a/samples/bpf/tracex3_user.c b/samples/bpf/tracex3_user.c
index cf8fedc773f2..70e987775c15 100644
--- a/samples/bpf/tracex3_user.c
+++ b/samples/bpf/tracex3_user.c
@@ -7,11 +7,10 @@
 #include <unistd.h>
 #include <stdbool.h>
 #include <string.h>
-#include <linux/bpf.h>
 #include <sys/resource.h>
 
 #include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
 #include "bpf_util.h"
 
 #define SLOTS 100
@@ -109,20 +108,11 @@ static void print_hist(int fd)
 int main(int ac, char **argv)
 {
 	struct rlimit r = {1024*1024, RLIM_INFINITY};
+	struct bpf_link *links[2];
+	struct bpf_program *prog;
+	struct bpf_object *obj;
 	char filename[256];
-	int i;
-
-	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
-
-	if (setrlimit(RLIMIT_MEMLOCK, &r)) {
-		perror("setrlimit(RLIMIT_MEMLOCK)");
-		return 1;
-	}
-
-	if (load_bpf_file(filename)) {
-		printf("%s", bpf_log_buf);
-		return 1;
-	}
+	int map_fd, i, j = 0;
 
 	for (i = 1; i < ac; i++) {
 		if (strcmp(argv[i], "-a") == 0) {
@@ -137,6 +127,40 @@ int main(int ac, char **argv)
 		}
 	}
 
+	if (setrlimit(RLIMIT_MEMLOCK, &r)) {
+		perror("setrlimit(RLIMIT_MEMLOCK)");
+		return 1;
+	}
+
+	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+	obj = bpf_object__open_file(filename, NULL);
+	if (libbpf_get_error(obj)) {
+		fprintf(stderr, "ERROR: opening BPF object file failed\n");
+		return 0;
+	}
+
+	/* load BPF program */
+	if (bpf_object__load(obj)) {
+		fprintf(stderr, "ERROR: loading BPF object file failed\n");
+		goto cleanup;
+	}
+
+	map_fd = bpf_object__find_map_fd_by_name(obj, "lat_map");
+	if (map_fd < 0) {
+		fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+		goto cleanup;
+	}
+
+	bpf_object__for_each_program(prog, obj) {
+		links[j] = bpf_program__attach(prog);
+		if (libbpf_get_error(links[j])) {
+			fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+			links[j] = NULL;
+			goto cleanup;
+		}
+		j++;
+	}
+
 	printf("  heatmap of IO latency\n");
 	if (text_only)
 		printf("  %s", sym[num_colors - 1]);
@@ -153,9 +177,14 @@ int main(int ac, char **argv)
 	for (i = 0; ; i++) {
 		if (i % 20 == 0)
 			print_banner();
-		print_hist(map_fd[1]);
+		print_hist(map_fd);
 		sleep(2);
 	}
 
+cleanup:
+	for (j--; j >= 0; j--)
+		bpf_link__destroy(links[j]);
+
+	bpf_object__close(obj);
 	return 0;
 }
diff --git a/samples/bpf/tracex4_user.c b/samples/bpf/tracex4_user.c
index ec52203fce39..e8faf8f184ae 100644
--- a/samples/bpf/tracex4_user.c
+++ b/samples/bpf/tracex4_user.c
@@ -8,11 +8,10 @@
 #include <stdbool.h>
 #include <string.h>
 #include <time.h>
-#include <linux/bpf.h>
 #include <sys/resource.h>
 
 #include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
 
 struct pair {
 	long long val;
@@ -36,8 +35,8 @@ static void print_old_objects(int fd)
 	key = write(1, "\e[1;1H\e[2J", 12); /* clear screen */
 
 	key = -1;
-	while (bpf_map_get_next_key(map_fd[0], &key, &next_key) == 0) {
-		bpf_map_lookup_elem(map_fd[0], &next_key, &v);
+	while (bpf_map_get_next_key(fd, &key, &next_key) == 0) {
+		bpf_map_lookup_elem(fd, &next_key, &v);
 		key = next_key;
 		if (val - v.val < 1000000000ll)
 			/* object was allocated more then 1 sec ago */
@@ -50,25 +49,55 @@ static void print_old_objects(int fd)
 int main(int ac, char **argv)
 {
 	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+	struct bpf_link *links[2];
+	struct bpf_program *prog;
+	struct bpf_object *obj;
 	char filename[256];
-	int i;
-
-	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+	int map_fd, i, j = 0;
 
 	if (setrlimit(RLIMIT_MEMLOCK, &r)) {
 		perror("setrlimit(RLIMIT_MEMLOCK, RLIM_INFINITY)");
 		return 1;
 	}
 
-	if (load_bpf_file(filename)) {
-		printf("%s", bpf_log_buf);
-		return 1;
+	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+	obj = bpf_object__open_file(filename, NULL);
+	if (libbpf_get_error(obj)) {
+		fprintf(stderr, "ERROR: opening BPF object file failed\n");
+		return 0;
+	}
+
+	/* load BPF program */
+	if (bpf_object__load(obj)) {
+		fprintf(stderr, "ERROR: loading BPF object file failed\n");
+		goto cleanup;
+	}
+
+	map_fd = bpf_object__find_map_fd_by_name(obj, "my_map");
+	if (map_fd < 0) {
+		fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+		goto cleanup;
+	}
+
+	bpf_object__for_each_program(prog, obj) {
+		links[j] = bpf_program__attach(prog);
+		if (libbpf_get_error(links[j])) {
+			fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+			links[j] = NULL;
+			goto cleanup;
+		}
+		j++;
 	}
 
 	for (i = 0; ; i++) {
-		print_old_objects(map_fd[1]);
+		print_old_objects(map_fd);
 		sleep(1);
 	}
 
+cleanup:
+	for (j--; j >= 0; j--)
+		bpf_link__destroy(links[j]);
+
+	bpf_object__close(obj);
 	return 0;
 }
diff --git a/samples/bpf/tracex6_user.c b/samples/bpf/tracex6_user.c
index 4bb3c830adb2..33df9784775d 100644
--- a/samples/bpf/tracex6_user.c
+++ b/samples/bpf/tracex6_user.c
@@ -4,7 +4,6 @@
 #include <assert.h>
 #include <fcntl.h>
 #include <linux/perf_event.h>
-#include <linux/bpf.h>
 #include <sched.h>
 #include <stdio.h>
 #include <stdlib.h>
@@ -15,12 +14,15 @@
 #include <sys/wait.h>
 #include <unistd.h>
 
-#include "bpf_load.h"
 #include <bpf/bpf.h>
+#include <bpf/libbpf.h>
 #include "perf-sys.h"
 
 #define SAMPLE_PERIOD  0x7fffffffffffffffULL
 
+/* counters, values, values2 */
+static int map_fd[3];
+
 static void check_on_cpu(int cpu, struct perf_event_attr *attr)
 {
 	struct bpf_perf_event_value value2;
@@ -174,16 +176,51 @@ static void test_bpf_perf_event(void)
 int main(int argc, char **argv)
 {
 	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+	struct bpf_link *links[2];
+	struct bpf_program *prog;
+	struct bpf_object *obj;
 	char filename[256];
+	int i = 0;
+
+	setrlimit(RLIMIT_MEMLOCK, &r);
 
 	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+	obj = bpf_object__open_file(filename, NULL);
+	if (libbpf_get_error(obj)) {
+		fprintf(stderr, "ERROR: opening BPF object file failed\n");
+		return 0;
+	}
 
-	setrlimit(RLIMIT_MEMLOCK, &r);
-	if (load_bpf_file(filename)) {
-		printf("%s", bpf_log_buf);
-		return 1;
+	/* load BPF program */
+	if (bpf_object__load(obj)) {
+		fprintf(stderr, "ERROR: loading BPF object file failed\n");
+		goto cleanup;
+	}
+
+	map_fd[0] = bpf_object__find_map_fd_by_name(obj, "counters");
+	map_fd[1] = bpf_object__find_map_fd_by_name(obj, "values");
+	map_fd[2] = bpf_object__find_map_fd_by_name(obj, "values2");
+	if (map_fd[0] < 0 || map_fd[1] < 0 || map_fd[2] < 0) {
+		fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+		goto cleanup;
+	}
+
+	bpf_object__for_each_program(prog, obj) {
+		links[i] = bpf_program__attach(prog);
+		if (libbpf_get_error(links[i])) {
+			fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+			links[i] = NULL;
+			goto cleanup;
+		}
+		i++;
 	}
 
 	test_bpf_perf_event();
+
+cleanup:
+	for (i--; i >= 0; i--)
+		bpf_link__destroy(links[i]);
+
+	bpf_object__close(obj);
 	return 0;
 }
diff --git a/samples/bpf/tracex7_user.c b/samples/bpf/tracex7_user.c
index ea6dae78f0df..fdcd6580dd73 100644
--- a/samples/bpf/tracex7_user.c
+++ b/samples/bpf/tracex7_user.c
@@ -1,28 +1,51 @@
 #define _GNU_SOURCE
 
 #include <stdio.h>
-#include <linux/bpf.h>
 #include <unistd.h>
-#include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
 
 int main(int argc, char **argv)
 {
-	FILE *f;
+	struct bpf_link *link = NULL;
+	struct bpf_program *prog;
+	struct bpf_object *obj;
 	char filename[256];
 	char command[256];
-	int ret;
+	int ret = 0;
+	FILE *f;
 
 	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+	obj = bpf_object__open_file(filename, NULL);
+	if (libbpf_get_error(obj)) {
+		fprintf(stderr, "ERROR: opening BPF object file failed\n");
+		return 0;
+	}
+
+	prog = bpf_object__find_program_by_name(obj, "bpf_prog1");
+	if (!prog) {
+		fprintf(stderr, "ERROR: finding a prog in obj file failed\n");
+		goto cleanup;
+	}
+
+	/* load BPF program */
+	if (bpf_object__load(obj)) {
+		fprintf(stderr, "ERROR: loading BPF object file failed\n");
+		goto cleanup;
+	}
 
-	if (load_bpf_file(filename)) {
-		printf("%s", bpf_log_buf);
-		return 1;
+	link = bpf_program__attach(prog);
+	if (libbpf_get_error(link)) {
+		fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+		link = NULL;
+		goto cleanup;
 	}
 
 	snprintf(command, 256, "mount %s tmpmnt/", argv[1]);
 	f = popen(command, "r");
 	ret = pclose(f);
 
+cleanup:
+	bpf_link__destroy(link);
+	bpf_object__close(obj);
 	return ret ? 0 : 1;
 }
-- 
cgit v1.2.3-59-g8ed1b


From bc1a85977b950407d67cd1e5b74f261a4bee3284 Mon Sep 17 00:00:00 2001
From: "Daniel T. Lee" <danieltimlee@gmail.com>
Date: Sat, 16 May 2020 13:06:06 +0900
Subject: samples, bpf: Refactor tail call user progs with libbpf

BPF tail call uses the BPF_MAP_TYPE_PROG_ARRAY type map for calling
into other BPF programs and this PROG_ARRAY should be filled prior to
use. Currently, samples with the PROG_ARRAY type MAP fill this program
array with bpf_load. For bpf_load to fill this map, kernel BPF program
must specify the section with specific format of <prog_type>/<array_idx>
(e.g. SEC("socket/0"))

But by using libbpf instead of bpf_load, user program can specify which
programs should be added to PROG_ARRAY. The advantage of this approach
is that you can selectively add only the programs you want, rather than
adding all of them to PROG_ARRAY, and it's much more intuitive than the
traditional approach.

This commit refactors user programs with the PROG_ARRAY type MAP with
libbpf instead of using bpf_load.

Signed-off-by: Daniel T. Lee <danieltimlee@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200516040608.1377876-4-danieltimlee@gmail.com
---
 samples/bpf/Makefile       |  4 +--
 samples/bpf/sockex3_user.c | 64 +++++++++++++++++++++++++++++---------------
 samples/bpf/tracex5_user.c | 66 ++++++++++++++++++++++++++++++++++++++++------
 3 files changed, 103 insertions(+), 31 deletions(-)

diff --git a/samples/bpf/Makefile b/samples/bpf/Makefile
index 4c91e5914329..8403e4762306 100644
--- a/samples/bpf/Makefile
+++ b/samples/bpf/Makefile
@@ -63,12 +63,12 @@ TRACE_HELPERS := ../../tools/testing/selftests/bpf/trace_helpers.o
 fds_example-objs := fds_example.o
 sockex1-objs := sockex1_user.o
 sockex2-objs := sockex2_user.o
-sockex3-objs := bpf_load.o sockex3_user.o
+sockex3-objs := sockex3_user.o
 tracex1-objs := tracex1_user.o $(TRACE_HELPERS)
 tracex2-objs := tracex2_user.o
 tracex3-objs := tracex3_user.o
 tracex4-objs := tracex4_user.o
-tracex5-objs := bpf_load.o tracex5_user.o $(TRACE_HELPERS)
+tracex5-objs := tracex5_user.o $(TRACE_HELPERS)
 tracex6-objs := tracex6_user.o
 tracex7-objs := tracex7_user.o
 test_probe_write_user-objs := bpf_load.o test_probe_write_user_user.o
diff --git a/samples/bpf/sockex3_user.c b/samples/bpf/sockex3_user.c
index bbb1cd0666a9..4dbee7427d47 100644
--- a/samples/bpf/sockex3_user.c
+++ b/samples/bpf/sockex3_user.c
@@ -1,18 +1,13 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <stdio.h>
 #include <assert.h>
-#include <linux/bpf.h>
 #include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
 #include "sock_example.h"
 #include <unistd.h>
 #include <arpa/inet.h>
 #include <sys/resource.h>
 
-#define PARSE_IP 3
-#define PARSE_IP_PROG_FD (prog_fd[0])
-#define PROG_ARRAY_FD (map_fd[0])
-
 struct flow_key_record {
 	__be32 src;
 	__be32 dst;
@@ -30,31 +25,55 @@ struct pair {
 
 int main(int argc, char **argv)
 {
+	int i, sock, key, fd, main_prog_fd, jmp_table_fd, hash_map_fd;
 	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+	struct bpf_program *prog;
+	struct bpf_object *obj;
 	char filename[256];
+	const char *title;
 	FILE *f;
-	int i, sock, err, id, key = PARSE_IP;
-	struct bpf_prog_info info = {};
-	uint32_t info_len = sizeof(info);
 
 	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
 	setrlimit(RLIMIT_MEMLOCK, &r);
 
-	if (load_bpf_file(filename)) {
-		printf("%s", bpf_log_buf);
-		return 1;
+	obj = bpf_object__open_file(filename, NULL);
+	if (libbpf_get_error(obj)) {
+		fprintf(stderr, "ERROR: opening BPF object file failed\n");
+		return 0;
+	}
+
+	/* load BPF program */
+	if (bpf_object__load(obj)) {
+		fprintf(stderr, "ERROR: loading BPF object file failed\n");
+		goto cleanup;
+	}
+
+	jmp_table_fd = bpf_object__find_map_fd_by_name(obj, "jmp_table");
+	hash_map_fd = bpf_object__find_map_fd_by_name(obj, "hash_map");
+	if (jmp_table_fd < 0 || hash_map_fd < 0) {
+		fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+		goto cleanup;
 	}
 
-	/* Test fd array lookup which returns the id of the bpf_prog */
-	err = bpf_obj_get_info_by_fd(PARSE_IP_PROG_FD, &info, &info_len);
-	assert(!err);
-	err = bpf_map_lookup_elem(PROG_ARRAY_FD, &key, &id);
-	assert(!err);
-	assert(id == info.id);
+	bpf_object__for_each_program(prog, obj) {
+		fd = bpf_program__fd(prog);
+
+		title = bpf_program__title(prog, false);
+		if (sscanf(title, "socket/%d", &key) != 1) {
+			fprintf(stderr, "ERROR: finding prog failed\n");
+			goto cleanup;
+		}
+
+		if (key == 0)
+			main_prog_fd = fd;
+		else
+			bpf_map_update_elem(jmp_table_fd, &key, &fd, BPF_ANY);
+	}
 
 	sock = open_raw_sock("lo");
 
-	assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &prog_fd[4],
+	/* attach BPF program to socket */
+	assert(setsockopt(sock, SOL_SOCKET, SO_ATTACH_BPF, &main_prog_fd,
 			  sizeof(__u32)) == 0);
 
 	if (argc > 1)
@@ -69,8 +88,8 @@ int main(int argc, char **argv)
 
 		sleep(1);
 		printf("IP     src.port -> dst.port               bytes      packets\n");
-		while (bpf_map_get_next_key(map_fd[2], &key, &next_key) == 0) {
-			bpf_map_lookup_elem(map_fd[2], &next_key, &value);
+		while (bpf_map_get_next_key(hash_map_fd, &key, &next_key) == 0) {
+			bpf_map_lookup_elem(hash_map_fd, &next_key, &value);
 			printf("%s.%05d -> %s.%05d %12lld %12lld\n",
 			       inet_ntoa((struct in_addr){htonl(next_key.src)}),
 			       next_key.port16[0],
@@ -80,5 +99,8 @@ int main(int argc, char **argv)
 			key = next_key;
 		}
 	}
+
+cleanup:
+	bpf_object__close(obj);
 	return 0;
 }
diff --git a/samples/bpf/tracex5_user.c b/samples/bpf/tracex5_user.c
index c2317b39e0d2..98dad57a96c4 100644
--- a/samples/bpf/tracex5_user.c
+++ b/samples/bpf/tracex5_user.c
@@ -1,15 +1,21 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <stdio.h>
-#include <linux/bpf.h>
+#include <stdlib.h>
 #include <unistd.h>
 #include <linux/filter.h>
 #include <linux/seccomp.h>
 #include <sys/prctl.h>
 #include <bpf/bpf.h>
-#include "bpf_load.h"
+#include <bpf/libbpf.h>
 #include <sys/resource.h>
 #include "trace_helpers.h"
 
+#ifdef __mips__
+#define	MAX_ENTRIES  6000 /* MIPS n64 syscalls start at 5000 */
+#else
+#define	MAX_ENTRIES  1024
+#endif
+
 /* install fake seccomp program to enable seccomp code path inside the kernel,
  * so that our kprobe attached to seccomp_phase1() can be triggered
  */
@@ -28,16 +34,57 @@ static void install_accept_all_seccomp(void)
 
 int main(int ac, char **argv)
 {
-	FILE *f;
-	char filename[256];
 	struct rlimit r = {RLIM_INFINITY, RLIM_INFINITY};
+	struct bpf_link *link = NULL;
+	struct bpf_program *prog;
+	struct bpf_object *obj;
+	int key, fd, progs_fd;
+	char filename[256];
+	const char *title;
+	FILE *f;
 
-	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
 	setrlimit(RLIMIT_MEMLOCK, &r);
 
-	if (load_bpf_file(filename)) {
-		printf("%s", bpf_log_buf);
-		return 1;
+	snprintf(filename, sizeof(filename), "%s_kern.o", argv[0]);
+	obj = bpf_object__open_file(filename, NULL);
+	if (libbpf_get_error(obj)) {
+		fprintf(stderr, "ERROR: opening BPF object file failed\n");
+		return 0;
+	}
+
+	prog = bpf_object__find_program_by_name(obj, "bpf_prog1");
+	if (!prog) {
+		printf("finding a prog in obj file failed\n");
+		goto cleanup;
+	}
+
+	/* load BPF program */
+	if (bpf_object__load(obj)) {
+		fprintf(stderr, "ERROR: loading BPF object file failed\n");
+		goto cleanup;
+	}
+
+	link = bpf_program__attach(prog);
+	if (libbpf_get_error(link)) {
+		fprintf(stderr, "ERROR: bpf_program__attach failed\n");
+		link = NULL;
+		goto cleanup;
+	}
+
+	progs_fd = bpf_object__find_map_fd_by_name(obj, "progs");
+	if (progs_fd < 0) {
+		fprintf(stderr, "ERROR: finding a map in obj file failed\n");
+		goto cleanup;
+	}
+
+	bpf_object__for_each_program(prog, obj) {
+		title = bpf_program__title(prog, false);
+		/* register only syscalls to PROG_ARRAY */
+		if (sscanf(title, "kprobe/%d", &key) != 1)
+			continue;
+
+		fd = bpf_program__fd(prog);
+		bpf_map_update_elem(progs_fd, &key, &fd, BPF_ANY);
 	}
 
 	install_accept_all_seccomp();
@@ -47,5 +94,8 @@ int main(int ac, char **argv)
 
 	read_trace_pipe();
 
+cleanup:
+	bpf_link__destroy(link);
+	bpf_object__close(obj);
 	return 0;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 14846dda634e28cc0430f1fbbfa6c758a2e5f873 Mon Sep 17 00:00:00 2001
From: "Daniel T. Lee" <danieltimlee@gmail.com>
Date: Sat, 16 May 2020 13:06:07 +0900
Subject: samples, bpf: Add tracex7 test file to .gitignore

This commit adds tracex7 test file (testfile.img) to .gitignore which
comes from test_override_return.sh.

Signed-off-by: Daniel T. Lee <danieltimlee@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200516040608.1377876-5-danieltimlee@gmail.com
---
 samples/bpf/.gitignore | 1 +
 1 file changed, 1 insertion(+)

diff --git a/samples/bpf/.gitignore b/samples/bpf/.gitignore
index 23837f2ed458..034800c4d1e6 100644
--- a/samples/bpf/.gitignore
+++ b/samples/bpf/.gitignore
@@ -50,3 +50,4 @@ xdp_rxq_info
 xdp_sample_pkts
 xdp_tx_iptunnel
 xdpsock
+testfile.img
-- 
cgit v1.2.3-59-g8ed1b


From 59929cd1fec508a48ea2a04d8f2e4fdef907a2cd Mon Sep 17 00:00:00 2001
From: "Daniel T. Lee" <danieltimlee@gmail.com>
Date: Sat, 16 May 2020 13:06:08 +0900
Subject: samples, bpf: Refactor kprobe, tail call kern progs map definition

Because the previous two commit replaced the bpf_load implementation of
the user program with libbpf, the corresponding kernel program's MAP
definition can be replaced with new BTF-defined map syntax.

This commit only updates the samples which uses libbpf API for loading
bpf program not with bpf_load.

Signed-off-by: Daniel T. Lee <danieltimlee@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/20200516040608.1377876-6-danieltimlee@gmail.com
---
 samples/bpf/sampleip_kern.c    | 12 ++++++------
 samples/bpf/sockex3_kern.c     | 36 ++++++++++++++++++------------------
 samples/bpf/trace_event_kern.c | 24 ++++++++++++------------
 samples/bpf/tracex2_kern.c     | 24 ++++++++++++------------
 samples/bpf/tracex3_kern.c     | 24 ++++++++++++------------
 samples/bpf/tracex4_kern.c     | 12 ++++++------
 samples/bpf/tracex5_kern.c     | 14 +++++++-------
 samples/bpf/tracex6_kern.c     | 38 ++++++++++++++++++++------------------
 8 files changed, 93 insertions(+), 91 deletions(-)

diff --git a/samples/bpf/sampleip_kern.c b/samples/bpf/sampleip_kern.c
index e504dc308371..f24806ac24e7 100644
--- a/samples/bpf/sampleip_kern.c
+++ b/samples/bpf/sampleip_kern.c
@@ -13,12 +13,12 @@
 
 #define MAX_IPS		8192
 
-struct bpf_map_def SEC("maps") ip_map = {
-	.type = BPF_MAP_TYPE_HASH,
-	.key_size = sizeof(u64),
-	.value_size = sizeof(u32),
-	.max_entries = MAX_IPS,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__type(key, u64);
+	__type(value, u32);
+	__uint(max_entries, MAX_IPS);
+} ip_map SEC(".maps");
 
 SEC("perf_event")
 int do_sample(struct bpf_perf_event_data *ctx)
diff --git a/samples/bpf/sockex3_kern.c b/samples/bpf/sockex3_kern.c
index 779a5249c418..cab9cca0b8eb 100644
--- a/samples/bpf/sockex3_kern.c
+++ b/samples/bpf/sockex3_kern.c
@@ -19,12 +19,12 @@
 
 #define PROG(F) SEC("socket/"__stringify(F)) int bpf_func_##F
 
-struct bpf_map_def SEC("maps") jmp_table = {
-	.type = BPF_MAP_TYPE_PROG_ARRAY,
-	.key_size = sizeof(u32),
-	.value_size = sizeof(u32),
-	.max_entries = 8,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+	__uint(key_size, sizeof(u32));
+	__uint(value_size, sizeof(u32));
+	__uint(max_entries, 8);
+} jmp_table SEC(".maps");
 
 #define PARSE_VLAN 1
 #define PARSE_MPLS 2
@@ -92,12 +92,12 @@ struct globals {
 	struct flow_key_record flow;
 };
 
-struct bpf_map_def SEC("maps") percpu_map = {
-	.type = BPF_MAP_TYPE_ARRAY,
-	.key_size = sizeof(__u32),
-	.value_size = sizeof(struct globals),
-	.max_entries = 32,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY);
+	__type(key, __u32);
+	__type(value, struct globals);
+	__uint(max_entries, 32);
+} percpu_map SEC(".maps");
 
 /* user poor man's per_cpu until native support is ready */
 static struct globals *this_cpu_globals(void)
@@ -113,12 +113,12 @@ struct pair {
 	__u64 bytes;
 };
 
-struct bpf_map_def SEC("maps") hash_map = {
-	.type = BPF_MAP_TYPE_HASH,
-	.key_size = sizeof(struct flow_key_record),
-	.value_size = sizeof(struct pair),
-	.max_entries = 1024,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__type(key, struct flow_key_record);
+	__type(value, struct pair);
+	__uint(max_entries, 1024);
+} hash_map SEC(".maps");
 
 static void update_stats(struct __sk_buff *skb, struct globals *g)
 {
diff --git a/samples/bpf/trace_event_kern.c b/samples/bpf/trace_event_kern.c
index da1d69e20645..7d3c66fb3f88 100644
--- a/samples/bpf/trace_event_kern.c
+++ b/samples/bpf/trace_event_kern.c
@@ -18,19 +18,19 @@ struct key_t {
 	u32 userstack;
 };
 
-struct bpf_map_def SEC("maps") counts = {
-	.type = BPF_MAP_TYPE_HASH,
-	.key_size = sizeof(struct key_t),
-	.value_size = sizeof(u64),
-	.max_entries = 10000,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__type(key, struct key_t);
+	__type(value, u64);
+	__uint(max_entries, 10000);
+} counts SEC(".maps");
 
-struct bpf_map_def SEC("maps") stackmap = {
-	.type = BPF_MAP_TYPE_STACK_TRACE,
-	.key_size = sizeof(u32),
-	.value_size = PERF_MAX_STACK_DEPTH * sizeof(u64),
-	.max_entries = 10000,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_STACK_TRACE);
+	__uint(key_size, sizeof(u32));
+	__uint(value_size, PERF_MAX_STACK_DEPTH * sizeof(u64));
+	__uint(max_entries, 10000);
+} stackmap SEC(".maps");
 
 #define KERN_STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP)
 #define USER_STACKID_FLAGS (0 | BPF_F_FAST_STACK_CMP | BPF_F_USER_STACK)
diff --git a/samples/bpf/tracex2_kern.c b/samples/bpf/tracex2_kern.c
index cc5f94c098f8..5bc696bac27d 100644
--- a/samples/bpf/tracex2_kern.c
+++ b/samples/bpf/tracex2_kern.c
@@ -12,12 +12,12 @@
 #include <bpf/bpf_tracing.h>
 #include "trace_common.h"
 
-struct bpf_map_def SEC("maps") my_map = {
-	.type = BPF_MAP_TYPE_HASH,
-	.key_size = sizeof(long),
-	.value_size = sizeof(long),
-	.max_entries = 1024,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__type(key, long);
+	__type(value, long);
+	__uint(max_entries, 1024);
+} my_map SEC(".maps");
 
 /* kprobe is NOT a stable ABI. If kernel internals change this bpf+kprobe
  * example will no longer be meaningful
@@ -71,12 +71,12 @@ struct hist_key {
 	u64 index;
 };
 
-struct bpf_map_def SEC("maps") my_hist_map = {
-	.type = BPF_MAP_TYPE_PERCPU_HASH,
-	.key_size = sizeof(struct hist_key),
-	.value_size = sizeof(long),
-	.max_entries = 1024,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_HASH);
+	__uint(key_size, sizeof(struct hist_key));
+	__uint(value_size, sizeof(long));
+	__uint(max_entries, 1024);
+} my_hist_map SEC(".maps");
 
 SEC("kprobe/" SYSCALL(sys_write))
 int bpf_prog3(struct pt_regs *ctx)
diff --git a/samples/bpf/tracex3_kern.c b/samples/bpf/tracex3_kern.c
index fe21c14feb8d..659613c19a82 100644
--- a/samples/bpf/tracex3_kern.c
+++ b/samples/bpf/tracex3_kern.c
@@ -11,12 +11,12 @@
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 
-struct bpf_map_def SEC("maps") my_map = {
-	.type = BPF_MAP_TYPE_HASH,
-	.key_size = sizeof(long),
-	.value_size = sizeof(u64),
-	.max_entries = 4096,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__type(key, long);
+	__type(value, u64);
+	__uint(max_entries, 4096);
+} my_map SEC(".maps");
 
 /* kprobe is NOT a stable ABI. If kernel internals change this bpf+kprobe
  * example will no longer be meaningful
@@ -42,12 +42,12 @@ static unsigned int log2l(unsigned long long n)
 
 #define SLOTS 100
 
-struct bpf_map_def SEC("maps") lat_map = {
-	.type = BPF_MAP_TYPE_PERCPU_ARRAY,
-	.key_size = sizeof(u32),
-	.value_size = sizeof(u64),
-	.max_entries = SLOTS,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_PERCPU_ARRAY);
+	__uint(key_size, sizeof(u32));
+	__uint(value_size, sizeof(u64));
+	__uint(max_entries, SLOTS);
+} lat_map SEC(".maps");
 
 SEC("kprobe/blk_account_io_completion")
 int bpf_prog2(struct pt_regs *ctx)
diff --git a/samples/bpf/tracex4_kern.c b/samples/bpf/tracex4_kern.c
index b1bb9df88f8e..eb0f8fdd14bf 100644
--- a/samples/bpf/tracex4_kern.c
+++ b/samples/bpf/tracex4_kern.c
@@ -15,12 +15,12 @@ struct pair {
 	u64 ip;
 };
 
-struct bpf_map_def SEC("maps") my_map = {
-	.type = BPF_MAP_TYPE_HASH,
-	.key_size = sizeof(long),
-	.value_size = sizeof(struct pair),
-	.max_entries = 1000000,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__type(key, long);
+	__type(value, struct pair);
+	__uint(max_entries, 1000000);
+} my_map SEC(".maps");
 
 /* kprobe is NOT a stable ABI. If kernel internals change this bpf+kprobe
  * example will no longer be meaningful
diff --git a/samples/bpf/tracex5_kern.c b/samples/bpf/tracex5_kern.c
index 481790fde864..32b49e8ab6bd 100644
--- a/samples/bpf/tracex5_kern.c
+++ b/samples/bpf/tracex5_kern.c
@@ -15,16 +15,16 @@
 
 #define PROG(F) SEC("kprobe/"__stringify(F)) int bpf_func_##F
 
-struct bpf_map_def SEC("maps") progs = {
-	.type = BPF_MAP_TYPE_PROG_ARRAY,
-	.key_size = sizeof(u32),
-	.value_size = sizeof(u32),
+struct {
+	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
+	__uint(key_size, sizeof(u32));
+	__uint(value_size, sizeof(u32));
 #ifdef __mips__
-	.max_entries = 6000, /* MIPS n64 syscalls start at 5000 */
+	__uint(max_entries, 6000); /* MIPS n64 syscalls start at 5000 */
 #else
-	.max_entries = 1024,
+	__uint(max_entries, 1024);
 #endif
-};
+} progs SEC(".maps");
 
 SEC("kprobe/__seccomp_filter")
 int bpf_prog1(struct pt_regs *ctx)
diff --git a/samples/bpf/tracex6_kern.c b/samples/bpf/tracex6_kern.c
index 96c234efa852..acad5712d8b4 100644
--- a/samples/bpf/tracex6_kern.c
+++ b/samples/bpf/tracex6_kern.c
@@ -3,24 +3,26 @@
 #include <uapi/linux/bpf.h>
 #include <bpf/bpf_helpers.h>
 
-struct bpf_map_def SEC("maps") counters = {
-	.type = BPF_MAP_TYPE_PERF_EVENT_ARRAY,
-	.key_size = sizeof(int),
-	.value_size = sizeof(u32),
-	.max_entries = 64,
-};
-struct bpf_map_def SEC("maps") values = {
-	.type = BPF_MAP_TYPE_HASH,
-	.key_size = sizeof(int),
-	.value_size = sizeof(u64),
-	.max_entries = 64,
-};
-struct bpf_map_def SEC("maps") values2 = {
-	.type = BPF_MAP_TYPE_HASH,
-	.key_size = sizeof(int),
-	.value_size = sizeof(struct bpf_perf_event_value),
-	.max_entries = 64,
-};
+struct {
+	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(u32));
+	__uint(max_entries, 64);
+} counters SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__type(key, int);
+	__type(value, u64);
+	__uint(max_entries, 64);
+} values SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_HASH);
+	__type(key, int);
+	__type(value, struct bpf_perf_event_value);
+	__uint(max_entries, 64);
+} values2 SEC(".maps");
 
 SEC("kprobe/htab_map_get_next_key")
 int bpf_prog1(struct pt_regs *ctx)
-- 
cgit v1.2.3-59-g8ed1b


From d800bad67d4c21aaf11722f04e0f7547fb915ab5 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Mon, 18 May 2020 15:05:27 +0200
Subject: bpf: Fix too large copy from user in bpf_test_init

Commit bc56c919fce7 ("bpf: Add xdp.frame_sz in bpf_prog_test_run_xdp().")
recently changed bpf_prog_test_run_xdp() to use larger frames for XDP in
order to test tail growing frames (via bpf_xdp_adjust_tail) and to have
memory backing frame better resemble drivers.

The commit contains a bug, as it tries to copy the max data size from
userspace, instead of the size provided by userspace.  This cause XDP
unit tests to fail sporadically with EFAULT, an unfortunate behavior.
The fix is to only copy the size specified by userspace.

Fixes: bc56c919fce7 ("bpf: Add xdp.frame_sz in bpf_prog_test_run_xdp().")
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/158980712729.256597.6115007718472928659.stgit@firesoul
---
 net/bpf/test_run.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/net/bpf/test_run.c b/net/bpf/test_run.c
index 30ba7d38941d..bfd4ccd80847 100644
--- a/net/bpf/test_run.c
+++ b/net/bpf/test_run.c
@@ -160,16 +160,20 @@ static void *bpf_test_init(const union bpf_attr *kattr, u32 size,
 			   u32 headroom, u32 tailroom)
 {
 	void __user *data_in = u64_to_user_ptr(kattr->test.data_in);
+	u32 user_size = kattr->test.data_size_in;
 	void *data;
 
 	if (size < ETH_HLEN || size > PAGE_SIZE - headroom - tailroom)
 		return ERR_PTR(-EINVAL);
 
+	if (user_size > size)
+		return ERR_PTR(-EMSGSIZE);
+
 	data = kzalloc(size + headroom + tailroom, GFP_USER);
 	if (!data)
 		return ERR_PTR(-ENOMEM);
 
-	if (copy_from_user(data + headroom, data_in, size)) {
+	if (copy_from_user(data + headroom, data_in, user_size)) {
 		kfree(data);
 		return ERR_PTR(-EFAULT);
 	}
@@ -486,8 +490,6 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr,
 
 	/* XDP have extra tailroom as (most) drivers use full page */
 	max_data_sz = 4096 - headroom - tailroom;
-	if (size > max_data_sz)
-		return -EINVAL;
 
 	data = bpf_test_init(kattr, max_data_sz, headroom, tailroom);
 	if (IS_ERR(data))
-- 
cgit v1.2.3-59-g8ed1b


From 1b66d253610c7f8f257103808a9460223a087469 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 19 May 2020 00:45:45 +0200
Subject: bpf: Add get{peer, sock}name attach types for sock_addr

As stated in 983695fa6765 ("bpf: fix unconnected udp hooks"), the objective
for the existing cgroup connect/sendmsg/recvmsg/bind BPF hooks is to be
transparent to applications. In Cilium we make use of these hooks [0] in
order to enable E-W load balancing for existing Kubernetes service types
for all Cilium managed nodes in the cluster. Those backends can be local
or remote. The main advantage of this approach is that it operates as close
as possible to the socket, and therefore allows to avoid packet-based NAT
given in connect/sendmsg/recvmsg hooks we only need to xlate sock addresses.

This also allows to expose NodePort services on loopback addresses in the
host namespace, for example. As another advantage, this also efficiently
blocks bind requests for applications in the host namespace for exposed
ports. However, one missing item is that we also need to perform reverse
xlation for inet{,6}_getname() hooks such that we can return the service
IP/port tuple back to the application instead of the remote peer address.

The vast majority of applications does not bother about getpeername(), but
in a few occasions we've seen breakage when validating the peer's address
since it returns unexpectedly the backend tuple instead of the service one.
Therefore, this trivial patch allows to customise and adds a getpeername()
as well as getsockname() BPF cgroup hook for both IPv4 and IPv6 in order
to address this situation.

Simple example:

  # ./cilium/cilium service list
  ID   Frontend     Service Type   Backend
  1    1.2.3.4:80   ClusterIP      1 => 10.0.0.10:80

Before; curl's verbose output example, no getpeername() reverse xlation:

  # curl --verbose 1.2.3.4
  * Rebuilt URL to: 1.2.3.4/
  *   Trying 1.2.3.4...
  * TCP_NODELAY set
  * Connected to 1.2.3.4 (10.0.0.10) port 80 (#0)
  > GET / HTTP/1.1
  > Host: 1.2.3.4
  > User-Agent: curl/7.58.0
  > Accept: */*
  [...]

After; with getpeername() reverse xlation:

  # curl --verbose 1.2.3.4
  * Rebuilt URL to: 1.2.3.4/
  *   Trying 1.2.3.4...
  * TCP_NODELAY set
  * Connected to 1.2.3.4 (1.2.3.4) port 80 (#0)
  > GET / HTTP/1.1
  >  Host: 1.2.3.4
  > User-Agent: curl/7.58.0
  > Accept: */*
  [...]

Originally, I had both under a BPF_CGROUP_INET{4,6}_GETNAME type and exposed
peer to the context similar as in inet{,6}_getname() fashion, but API-wise
this is suboptimal as it always enforces programs having to test for ctx->peer
which can easily be missed, hence BPF_CGROUP_INET{4,6}_GET{PEER,SOCK}NAME split.
Similarly, the checked return code is on tnum_range(1, 1), but if a use case
comes up in future, it can easily be changed to return an error code instead.
Helper and ctx member access is the same as with connect/sendmsg/etc hooks.

  [0] https://github.com/cilium/cilium/blob/master/bpf/bpf_sock.c

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Andrey Ignatov <rdna@fb.com>
Link: https://lore.kernel.org/bpf/61a479d759b2482ae3efb45546490bacd796a220.1589841594.git.daniel@iogearbox.net
---
 include/linux/bpf-cgroup.h     |  1 +
 include/uapi/linux/bpf.h       |  4 ++++
 kernel/bpf/syscall.c           | 12 ++++++++++++
 kernel/bpf/verifier.c          |  6 +++++-
 net/core/filter.c              |  4 ++++
 net/ipv4/af_inet.c             |  8 ++++++--
 net/ipv6/af_inet6.c            |  9 ++++++---
 tools/include/uapi/linux/bpf.h |  4 ++++
 8 files changed, 42 insertions(+), 6 deletions(-)

diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h
index 272626cc3fc9..c66c545e161a 100644
--- a/include/linux/bpf-cgroup.h
+++ b/include/linux/bpf-cgroup.h
@@ -396,6 +396,7 @@ static inline int bpf_percpu_cgroup_storage_update(struct bpf_map *map,
 }
 
 #define cgroup_bpf_enabled (0)
+#define BPF_CGROUP_RUN_SA_PROG_LOCK(sk, uaddr, type, t_ctx) ({ 0; })
 #define BPF_CGROUP_PRE_CONNECT_ENABLED(sk) (0)
 #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; })
 #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; })
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index b9b8a0f63b91..97e1fd19ff58 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -220,6 +220,10 @@ enum bpf_attach_type {
 	BPF_MODIFY_RETURN,
 	BPF_LSM_MAC,
 	BPF_TRACE_ITER,
+	BPF_CGROUP_INET4_GETPEERNAME,
+	BPF_CGROUP_INET6_GETPEERNAME,
+	BPF_CGROUP_INET4_GETSOCKNAME,
+	BPF_CGROUP_INET6_GETSOCKNAME,
 	__MAX_BPF_ATTACH_TYPE
 };
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 57dfc98289d5..431241c74614 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1978,6 +1978,10 @@ bpf_prog_load_check_attach(enum bpf_prog_type prog_type,
 		case BPF_CGROUP_INET6_BIND:
 		case BPF_CGROUP_INET4_CONNECT:
 		case BPF_CGROUP_INET6_CONNECT:
+		case BPF_CGROUP_INET4_GETPEERNAME:
+		case BPF_CGROUP_INET6_GETPEERNAME:
+		case BPF_CGROUP_INET4_GETSOCKNAME:
+		case BPF_CGROUP_INET6_GETSOCKNAME:
 		case BPF_CGROUP_UDP4_SENDMSG:
 		case BPF_CGROUP_UDP6_SENDMSG:
 		case BPF_CGROUP_UDP4_RECVMSG:
@@ -2767,6 +2771,10 @@ attach_type_to_prog_type(enum bpf_attach_type attach_type)
 	case BPF_CGROUP_INET6_BIND:
 	case BPF_CGROUP_INET4_CONNECT:
 	case BPF_CGROUP_INET6_CONNECT:
+	case BPF_CGROUP_INET4_GETPEERNAME:
+	case BPF_CGROUP_INET6_GETPEERNAME:
+	case BPF_CGROUP_INET4_GETSOCKNAME:
+	case BPF_CGROUP_INET6_GETSOCKNAME:
 	case BPF_CGROUP_UDP4_SENDMSG:
 	case BPF_CGROUP_UDP6_SENDMSG:
 	case BPF_CGROUP_UDP4_RECVMSG:
@@ -2912,6 +2920,10 @@ static int bpf_prog_query(const union bpf_attr *attr,
 	case BPF_CGROUP_INET6_POST_BIND:
 	case BPF_CGROUP_INET4_CONNECT:
 	case BPF_CGROUP_INET6_CONNECT:
+	case BPF_CGROUP_INET4_GETPEERNAME:
+	case BPF_CGROUP_INET6_GETPEERNAME:
+	case BPF_CGROUP_INET4_GETSOCKNAME:
+	case BPF_CGROUP_INET6_GETSOCKNAME:
 	case BPF_CGROUP_UDP4_SENDMSG:
 	case BPF_CGROUP_UDP6_SENDMSG:
 	case BPF_CGROUP_UDP4_RECVMSG:
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 9c7d67d65d8c..2ed8351f47a4 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -7094,7 +7094,11 @@ static int check_return_code(struct bpf_verifier_env *env)
 	switch (env->prog->type) {
 	case BPF_PROG_TYPE_CGROUP_SOCK_ADDR:
 		if (env->prog->expected_attach_type == BPF_CGROUP_UDP4_RECVMSG ||
-		    env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG)
+		    env->prog->expected_attach_type == BPF_CGROUP_UDP6_RECVMSG ||
+		    env->prog->expected_attach_type == BPF_CGROUP_INET4_GETPEERNAME ||
+		    env->prog->expected_attach_type == BPF_CGROUP_INET6_GETPEERNAME ||
+		    env->prog->expected_attach_type == BPF_CGROUP_INET4_GETSOCKNAME ||
+		    env->prog->expected_attach_type == BPF_CGROUP_INET6_GETSOCKNAME)
 			range = tnum_range(1, 1);
 		break;
 	case BPF_PROG_TYPE_CGROUP_SKB:
diff --git a/net/core/filter.c b/net/core/filter.c
index 822d662f97ef..bd2853d23b50 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -7049,6 +7049,8 @@ static bool sock_addr_is_valid_access(int off, int size,
 		switch (prog->expected_attach_type) {
 		case BPF_CGROUP_INET4_BIND:
 		case BPF_CGROUP_INET4_CONNECT:
+		case BPF_CGROUP_INET4_GETPEERNAME:
+		case BPF_CGROUP_INET4_GETSOCKNAME:
 		case BPF_CGROUP_UDP4_SENDMSG:
 		case BPF_CGROUP_UDP4_RECVMSG:
 			break;
@@ -7060,6 +7062,8 @@ static bool sock_addr_is_valid_access(int off, int size,
 		switch (prog->expected_attach_type) {
 		case BPF_CGROUP_INET6_BIND:
 		case BPF_CGROUP_INET6_CONNECT:
+		case BPF_CGROUP_INET6_GETPEERNAME:
+		case BPF_CGROUP_INET6_GETSOCKNAME:
 		case BPF_CGROUP_UDP6_SENDMSG:
 		case BPF_CGROUP_UDP6_RECVMSG:
 			break;
diff --git a/net/ipv4/af_inet.c b/net/ipv4/af_inet.c
index fcf0d12a407a..8f5c8c9409d3 100644
--- a/net/ipv4/af_inet.c
+++ b/net/ipv4/af_inet.c
@@ -755,12 +755,11 @@ do_err:
 }
 EXPORT_SYMBOL(inet_accept);
 
-
 /*
  *	This does both peername and sockname.
  */
 int inet_getname(struct socket *sock, struct sockaddr *uaddr,
-			int peer)
+		 int peer)
 {
 	struct sock *sk		= sock->sk;
 	struct inet_sock *inet	= inet_sk(sk);
@@ -781,6 +780,11 @@ int inet_getname(struct socket *sock, struct sockaddr *uaddr,
 		sin->sin_port = inet->inet_sport;
 		sin->sin_addr.s_addr = addr;
 	}
+	if (cgroup_bpf_enabled)
+		BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
+					    peer ? BPF_CGROUP_INET4_GETPEERNAME :
+						   BPF_CGROUP_INET4_GETSOCKNAME,
+					    NULL);
 	memset(sin->sin_zero, 0, sizeof(sin->sin_zero));
 	return sizeof(*sin);
 }
diff --git a/net/ipv6/af_inet6.c b/net/ipv6/af_inet6.c
index 771a462a8322..3b6fcc0c321a 100644
--- a/net/ipv6/af_inet6.c
+++ b/net/ipv6/af_inet6.c
@@ -504,9 +504,8 @@ EXPORT_SYMBOL_GPL(inet6_destroy_sock);
 /*
  *	This does both peername and sockname.
  */
-
 int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
-		 int peer)
+		  int peer)
 {
 	struct sockaddr_in6 *sin = (struct sockaddr_in6 *)uaddr;
 	struct sock *sk = sock->sk;
@@ -531,9 +530,13 @@ int inet6_getname(struct socket *sock, struct sockaddr *uaddr,
 			sin->sin6_addr = np->saddr;
 		else
 			sin->sin6_addr = sk->sk_v6_rcv_saddr;
-
 		sin->sin6_port = inet->inet_sport;
 	}
+	if (cgroup_bpf_enabled)
+		BPF_CGROUP_RUN_SA_PROG_LOCK(sk, (struct sockaddr *)sin,
+					    peer ? BPF_CGROUP_INET6_GETPEERNAME :
+						   BPF_CGROUP_INET6_GETSOCKNAME,
+					    NULL);
 	sin->sin6_scope_id = ipv6_iface_scope_id(&sin->sin6_addr,
 						 sk->sk_bound_dev_if);
 	return sizeof(*sin);
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 146c742f1d49..1cddc398404a 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -220,6 +220,10 @@ enum bpf_attach_type {
 	BPF_MODIFY_RETURN,
 	BPF_LSM_MAC,
 	BPF_TRACE_ITER,
+	BPF_CGROUP_INET4_GETPEERNAME,
+	BPF_CGROUP_INET6_GETPEERNAME,
+	BPF_CGROUP_INET4_GETSOCKNAME,
+	BPF_CGROUP_INET6_GETSOCKNAME,
 	__MAX_BPF_ATTACH_TYPE
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From f15ed0185de7d471e907783739dffbe397a93142 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 19 May 2020 00:45:46 +0200
Subject: bpf, libbpf: Enable get{peer, sock}name attach types

Trivial patch to add the new get{peer,sock}name attach types to the section
definitions in order to hook them up to sock_addr cgroup program type.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Andrey Ignatov <rdna@fb.com>
Link: https://lore.kernel.org/bpf/7fcd4b1e41a8ebb364754a5975c75a7795051bd2.1589841594.git.daniel@iogearbox.net
---
 tools/lib/bpf/libbpf.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 292257995487..fa04cbe547ed 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -6705,6 +6705,14 @@ static const struct bpf_sec_def section_defs[] = {
 						BPF_CGROUP_UDP4_RECVMSG),
 	BPF_EAPROG_SEC("cgroup/recvmsg6",	BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
 						BPF_CGROUP_UDP6_RECVMSG),
+	BPF_EAPROG_SEC("cgroup/getpeername4",	BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+						BPF_CGROUP_INET4_GETPEERNAME),
+	BPF_EAPROG_SEC("cgroup/getpeername6",	BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+						BPF_CGROUP_INET6_GETPEERNAME),
+	BPF_EAPROG_SEC("cgroup/getsockname4",	BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+						BPF_CGROUP_INET4_GETSOCKNAME),
+	BPF_EAPROG_SEC("cgroup/getsockname6",	BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+						BPF_CGROUP_INET6_GETSOCKNAME),
 	BPF_EAPROG_SEC("cgroup/sysctl",		BPF_PROG_TYPE_CGROUP_SYSCTL,
 						BPF_CGROUP_SYSCTL),
 	BPF_EAPROG_SEC("cgroup/getsockopt",	BPF_PROG_TYPE_CGROUP_SOCKOPT,
-- 
cgit v1.2.3-59-g8ed1b


From 05ee19c18c2bb3dea69e29219017367c4a77e65a Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 19 May 2020 00:45:47 +0200
Subject: bpf, bpftool: Enable get{peer, sock}name attach types

Make bpftool aware and add the new get{peer,sock}name attach types to its
cli, documentation and bash completion to allow attachment/detachment of
sock_addr programs there.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Andrey Ignatov <rdna@fb.com>
Link: https://lore.kernel.org/bpf/9765b3d03e4c29210c4df56a9cc7e52f5f7bb5ef.1589841594.git.daniel@iogearbox.net
---
 tools/bpf/bpftool/Documentation/bpftool-cgroup.rst | 10 +++++++---
 tools/bpf/bpftool/Documentation/bpftool-prog.rst   |  3 ++-
 tools/bpf/bpftool/bash-completion/bpftool          | 15 +++++++++------
 tools/bpf/bpftool/cgroup.c                         |  7 ++++---
 tools/bpf/bpftool/main.h                           |  4 ++++
 tools/bpf/bpftool/prog.c                           |  6 ++++--
 6 files changed, 30 insertions(+), 15 deletions(-)

diff --git a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
index e4d9da654e84..a226aee3574f 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-cgroup.rst
@@ -29,8 +29,8 @@ CGROUP COMMANDS
 |	*PROG* := { **id** *PROG_ID* | **pinned** *FILE* | **tag** *PROG_TAG* }
 |	*ATTACH_TYPE* := { **ingress** | **egress** | **sock_create** | **sock_ops** | **device** |
 |		**bind4** | **bind6** | **post_bind4** | **post_bind6** | **connect4** | **connect6** |
-|		**sendmsg4** | **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** |
-|		**getsockopt** | **setsockopt** }
+|               **getpeername4** | **getpeername6** | **getsockname4** | **getsockname6** | **sendmsg4** |
+|               **sendmsg6** | **recvmsg4** | **recvmsg6** | **sysctl** | **getsockopt** | **setsockopt** }
 |	*ATTACH_FLAGS* := { **multi** | **override** }
 
 DESCRIPTION
@@ -101,7 +101,11 @@ DESCRIPTION
                   an unconnected udp6 socket (since 5.2);
 		  **sysctl** sysctl access (since 5.2);
 		  **getsockopt** call to getsockopt (since 5.3);
-		  **setsockopt** call to setsockopt (since 5.3).
+		  **setsockopt** call to setsockopt (since 5.3);
+		  **getpeername4** call to getpeername(2) for an inet4 socket (since 5.8);
+		  **getpeername6** call to getpeername(2) for an inet6 socket (since 5.8);
+		  **getsockname4** call to getsockname(2) for an inet4 socket (since 5.8);
+		  **getsockname6** call to getsockname(2) for an inet6 socket (since 5.8).
 
 	**bpftool cgroup detach** *CGROUP* *ATTACH_TYPE* *PROG*
 		  Detach *PROG* from the cgroup *CGROUP* and attach type
diff --git a/tools/bpf/bpftool/Documentation/bpftool-prog.rst b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
index 5948e9d89c8d..2b254959d488 100644
--- a/tools/bpf/bpftool/Documentation/bpftool-prog.rst
+++ b/tools/bpf/bpftool/Documentation/bpftool-prog.rst
@@ -41,7 +41,8 @@ PROG COMMANDS
 |		**cgroup/sock** | **cgroup/dev** | **lwt_in** | **lwt_out** | **lwt_xmit** |
 |		**lwt_seg6local** | **sockops** | **sk_skb** | **sk_msg** | **lirc_mode2** |
 |		**cgroup/bind4** | **cgroup/bind6** | **cgroup/post_bind4** | **cgroup/post_bind6** |
-|		**cgroup/connect4** | **cgroup/connect6** | **cgroup/sendmsg4** | **cgroup/sendmsg6** |
+|		**cgroup/connect4** | **cgroup/connect6** | **cgroup/getpeername4** | **cgroup/getpeername6** |
+|               **cgroup/getsockname4** | **cgroup/getsockname6** | **cgroup/sendmsg4** | **cgroup/sendmsg6** |
 |		**cgroup/recvmsg4** | **cgroup/recvmsg6** | **cgroup/sysctl** |
 |		**cgroup/getsockopt** | **cgroup/setsockopt** |
 |		**struct_ops** | **fentry** | **fexit** | **freplace**
diff --git a/tools/bpf/bpftool/bash-completion/bpftool b/tools/bpf/bpftool/bash-completion/bpftool
index 9f0f20e73b87..25b25aca1112 100644
--- a/tools/bpf/bpftool/bash-completion/bpftool
+++ b/tools/bpf/bpftool/bash-completion/bpftool
@@ -472,6 +472,8 @@ _bpftool()
                                 lwt_seg6local sockops sk_skb sk_msg \
                                 lirc_mode2 cgroup/bind4 cgroup/bind6 \
                                 cgroup/connect4 cgroup/connect6 \
+                                cgroup/getpeername4 cgroup/getpeername6 \
+                                cgroup/getsockname4 cgroup/getsockname6 \
                                 cgroup/sendmsg4 cgroup/sendmsg6 \
                                 cgroup/recvmsg4 cgroup/recvmsg6 \
                                 cgroup/post_bind4 cgroup/post_bind6 \
@@ -966,9 +968,10 @@ _bpftool()
                     ;;
                 attach|detach)
                     local ATTACH_TYPES='ingress egress sock_create sock_ops \
-                        device bind4 bind6 post_bind4 post_bind6 connect4 \
-                        connect6 sendmsg4 sendmsg6 recvmsg4 recvmsg6 sysctl \
-                        getsockopt setsockopt'
+                        device bind4 bind6 post_bind4 post_bind6 connect4 connect6 \
+                        getpeername4 getpeername6 getsockname4 getsockname6 \
+                        sendmsg4 sendmsg6 recvmsg4 recvmsg6 sysctl getsockopt \
+                        setsockopt'
                     local ATTACH_FLAGS='multi override'
                     local PROG_TYPE='id pinned tag name'
                     case $prev in
@@ -977,9 +980,9 @@ _bpftool()
                             return 0
                             ;;
                         ingress|egress|sock_create|sock_ops|device|bind4|bind6|\
-                        post_bind4|post_bind6|connect4|connect6|sendmsg4|\
-                        sendmsg6|recvmsg4|recvmsg6|sysctl|getsockopt|\
-                        setsockopt)
+                        post_bind4|post_bind6|connect4|connect6|getpeername4|\
+                        getpeername6|getsockname4|getsockname6|sendmsg4|sendmsg6|\
+                        recvmsg4|recvmsg6|sysctl|getsockopt|setsockopt)
                             COMPREPLY=( $( compgen -W "$PROG_TYPE" -- \
                                 "$cur" ) )
                             return 0
diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index 1693c802bb20..27931db421d8 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -25,9 +25,10 @@
 	"       ATTACH_TYPE := { ingress | egress | sock_create |\n"	       \
 	"                        sock_ops | device | bind4 | bind6 |\n"	       \
 	"                        post_bind4 | post_bind6 | connect4 |\n"       \
-	"                        connect6 | sendmsg4 | sendmsg6 |\n"           \
-	"                        recvmsg4 | recvmsg6 | sysctl |\n"	       \
-	"                        getsockopt | setsockopt }"
+	"                        connect6 | getpeername4 | getpeername6 |\n"   \
+	"                        getsockname4 | getsockname6 | sendmsg4 |\n"   \
+	"                        sendmsg6 | recvmsg4 | recvmsg6 |\n"           \
+	"                        sysctl | getsockopt | setsockopt }"
 
 static unsigned int query_flags;
 
diff --git a/tools/bpf/bpftool/main.h b/tools/bpf/bpftool/main.h
index f89ac70ef973..5cdf0bc049bd 100644
--- a/tools/bpf/bpftool/main.h
+++ b/tools/bpf/bpftool/main.h
@@ -100,6 +100,10 @@ static const char * const attach_type_name[__MAX_BPF_ATTACH_TYPE] = {
 	[BPF_CGROUP_INET6_CONNECT] = "connect6",
 	[BPF_CGROUP_INET4_POST_BIND] = "post_bind4",
 	[BPF_CGROUP_INET6_POST_BIND] = "post_bind6",
+	[BPF_CGROUP_INET4_GETPEERNAME] = "getpeername4",
+	[BPF_CGROUP_INET6_GETPEERNAME] = "getpeername6",
+	[BPF_CGROUP_INET4_GETSOCKNAME] = "getsockname4",
+	[BPF_CGROUP_INET6_GETSOCKNAME] = "getsockname6",
 	[BPF_CGROUP_UDP4_SENDMSG] = "sendmsg4",
 	[BPF_CGROUP_UDP6_SENDMSG] = "sendmsg6",
 	[BPF_CGROUP_SYSCTL] = "sysctl",
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index b6e5ba568f98..245f941fdbcf 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -2012,8 +2012,10 @@ static int do_help(int argc, char **argv)
 		"                 sk_reuseport | flow_dissector | cgroup/sysctl |\n"
 		"                 cgroup/bind4 | cgroup/bind6 | cgroup/post_bind4 |\n"
 		"                 cgroup/post_bind6 | cgroup/connect4 | cgroup/connect6 |\n"
-		"                 cgroup/sendmsg4 | cgroup/sendmsg6 | cgroup/recvmsg4 |\n"
-		"                 cgroup/recvmsg6 | cgroup/getsockopt | cgroup/setsockopt |\n"
+		"                 cgroup/getpeername4 | cgroup/getpeername6 |\n"
+		"                 cgroup/getsockname4 | cgroup/getsockname6 | cgroup/sendmsg4 |\n"
+		"                 cgroup/sendmsg6 | cgroup/recvmsg4 | cgroup/recvmsg6 |\n"
+		"                 cgroup/getsockopt | cgroup/setsockopt |\n"
 		"                 struct_ops | fentry | fexit | freplace }\n"
 		"       ATTACH_TYPE := { msg_verdict | stream_verdict | stream_parser |\n"
 		"                        flow_dissector }\n"
-- 
cgit v1.2.3-59-g8ed1b


From 566fc3f5d1c641b510ec487cf274a047f8a1e849 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 19 May 2020 00:45:48 +0200
Subject: bpf, testing: Add get{peer, sock}name selftests to test_progs

Extend the existing connect_force_port test to assert get{peer,sock}name programs
as well. The workflow for e.g. IPv4 is as follows: i) server binds to concrete
port, ii) client calls getsockname() on server fd which exposes 1.2.3.4:60000 to
client, iii) client connects to service address 1.2.3.4:60000 binds to concrete
local address (127.0.0.1:22222) and remaps service address to a concrete backend
address (127.0.0.1:60123), iv) client then calls getsockname() on its own fd to
verify local address (127.0.0.1:22222) and getpeername() on its own fd which then
publishes service address (1.2.3.4:60000) instead of actual backend. Same workflow
is done for IPv6 just with different address/port tuples.

  # ./test_progs -t connect_force_port
  #14 connect_force_port:OK
  Summary: 1/0 PASSED, 0 SKIPPED, 0 FAILED

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Acked-by: Andrey Ignatov <rdna@fb.com>
Link: https://lore.kernel.org/bpf/3343da6ad08df81af715a95d61a84fb4a960f2bf.1589841594.git.daniel@iogearbox.net
---
 tools/testing/selftests/bpf/network_helpers.c      |  11 ++-
 tools/testing/selftests/bpf/network_helpers.h      |   1 +
 .../selftests/bpf/prog_tests/connect_force_port.c  | 107 +++++++++++++++------
 .../selftests/bpf/progs/connect_force_port4.c      |  59 +++++++++++-
 .../selftests/bpf/progs/connect_force_port6.c      |  70 +++++++++++++-
 5 files changed, 215 insertions(+), 33 deletions(-)

diff --git a/tools/testing/selftests/bpf/network_helpers.c b/tools/testing/selftests/bpf/network_helpers.c
index 999a775484c1..e36dd1a1780d 100644
--- a/tools/testing/selftests/bpf/network_helpers.c
+++ b/tools/testing/selftests/bpf/network_helpers.c
@@ -5,6 +5,8 @@
 #include <string.h>
 #include <unistd.h>
 
+#include <arpa/inet.h>
+
 #include <sys/epoll.h>
 
 #include <linux/err.h>
@@ -35,7 +37,7 @@ struct ipv6_packet pkt_v6 = {
 	.tcp.doff = 5,
 };
 
-int start_server(int family, int type)
+int start_server_with_port(int family, int type, __u16 port)
 {
 	struct sockaddr_storage addr = {};
 	socklen_t len;
@@ -45,11 +47,13 @@ int start_server(int family, int type)
 		struct sockaddr_in *sin = (void *)&addr;
 
 		sin->sin_family = AF_INET;
+		sin->sin_port = htons(port);
 		len = sizeof(*sin);
 	} else {
 		struct sockaddr_in6 *sin6 = (void *)&addr;
 
 		sin6->sin6_family = AF_INET6;
+		sin6->sin6_port = htons(port);
 		len = sizeof(*sin6);
 	}
 
@@ -76,6 +80,11 @@ int start_server(int family, int type)
 	return fd;
 }
 
+int start_server(int family, int type)
+{
+	return start_server_with_port(family, type, 0);
+}
+
 static const struct timeval timeo_sec = { .tv_sec = 3 };
 static const size_t timeo_optlen = sizeof(timeo_sec);
 
diff --git a/tools/testing/selftests/bpf/network_helpers.h b/tools/testing/selftests/bpf/network_helpers.h
index 86914e6e7b53..6a8009605670 100644
--- a/tools/testing/selftests/bpf/network_helpers.h
+++ b/tools/testing/selftests/bpf/network_helpers.h
@@ -34,6 +34,7 @@ struct ipv6_packet {
 extern struct ipv6_packet pkt_v6;
 
 int start_server(int family, int type);
+int start_server_with_port(int family, int type, __u16 port);
 int connect_to_fd(int family, int type, int server_fd);
 int connect_fd_to_fd(int client_fd, int server_fd);
 int connect_wait(int client_fd);
diff --git a/tools/testing/selftests/bpf/prog_tests/connect_force_port.c b/tools/testing/selftests/bpf/prog_tests/connect_force_port.c
index 47fbb20cb6a6..17bbf76812ca 100644
--- a/tools/testing/selftests/bpf/prog_tests/connect_force_port.c
+++ b/tools/testing/selftests/bpf/prog_tests/connect_force_port.c
@@ -4,7 +4,8 @@
 #include "cgroup_helpers.h"
 #include "network_helpers.h"
 
-static int verify_port(int family, int fd, int expected)
+static int verify_ports(int family, int fd,
+			__u16 expected_local, __u16 expected_peer)
 {
 	struct sockaddr_storage addr;
 	socklen_t len = sizeof(addr);
@@ -20,9 +21,25 @@ static int verify_port(int family, int fd, int expected)
 	else
 		port = ((struct sockaddr_in6 *)&addr)->sin6_port;
 
-	if (ntohs(port) != expected) {
-		log_err("Unexpected port %d, expected %d", ntohs(port),
-			expected);
+	if (ntohs(port) != expected_local) {
+		log_err("Unexpected local port %d, expected %d", ntohs(port),
+			expected_local);
+		return -1;
+	}
+
+	if (getpeername(fd, (struct sockaddr *)&addr, &len)) {
+		log_err("Failed to get peer addr");
+		return -1;
+	}
+
+	if (family == AF_INET)
+		port = ((struct sockaddr_in *)&addr)->sin_port;
+	else
+		port = ((struct sockaddr_in6 *)&addr)->sin6_port;
+
+	if (ntohs(port) != expected_peer) {
+		log_err("Unexpected peer port %d, expected %d", ntohs(port),
+			expected_peer);
 		return -1;
 	}
 
@@ -31,33 +48,67 @@ static int verify_port(int family, int fd, int expected)
 
 static int run_test(int cgroup_fd, int server_fd, int family, int type)
 {
+	bool v4 = family == AF_INET;
+	__u16 expected_local_port = v4 ? 22222 : 22223;
+	__u16 expected_peer_port = 60000;
 	struct bpf_prog_load_attr attr = {
-		.prog_type = BPF_PROG_TYPE_CGROUP_SOCK_ADDR,
+		.file = v4 ? "./connect_force_port4.o" :
+			     "./connect_force_port6.o",
 	};
+	struct bpf_program *prog;
 	struct bpf_object *obj;
-	int expected_port;
-	int prog_fd;
-	int err;
-	int fd;
-
-	if (family == AF_INET) {
-		attr.file = "./connect_force_port4.o";
-		attr.expected_attach_type = BPF_CGROUP_INET4_CONNECT;
-		expected_port = 22222;
-	} else {
-		attr.file = "./connect_force_port6.o";
-		attr.expected_attach_type = BPF_CGROUP_INET6_CONNECT;
-		expected_port = 22223;
-	}
+	int xlate_fd, fd, err;
+	__u32 duration = 0;
 
-	err = bpf_prog_load_xattr(&attr, &obj, &prog_fd);
+	err = bpf_prog_load_xattr(&attr, &obj, &xlate_fd);
 	if (err) {
 		log_err("Failed to load BPF object");
 		return -1;
 	}
 
-	err = bpf_prog_attach(prog_fd, cgroup_fd, attr.expected_attach_type,
-			      0);
+	prog = bpf_object__find_program_by_title(obj, v4 ?
+						 "cgroup/connect4" :
+						 "cgroup/connect6");
+	if (CHECK(!prog, "find_prog", "connect prog not found\n")) {
+		err = -EIO;
+		goto close_bpf_object;
+	}
+
+	err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd, v4 ?
+			      BPF_CGROUP_INET4_CONNECT :
+			      BPF_CGROUP_INET6_CONNECT, 0);
+	if (err) {
+		log_err("Failed to attach BPF program");
+		goto close_bpf_object;
+	}
+
+	prog = bpf_object__find_program_by_title(obj, v4 ?
+						 "cgroup/getpeername4" :
+						 "cgroup/getpeername6");
+	if (CHECK(!prog, "find_prog", "getpeername prog not found\n")) {
+		err = -EIO;
+		goto close_bpf_object;
+	}
+
+	err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd, v4 ?
+			      BPF_CGROUP_INET4_GETPEERNAME :
+			      BPF_CGROUP_INET6_GETPEERNAME, 0);
+	if (err) {
+		log_err("Failed to attach BPF program");
+		goto close_bpf_object;
+	}
+
+	prog = bpf_object__find_program_by_title(obj, v4 ?
+						 "cgroup/getsockname4" :
+						 "cgroup/getsockname6");
+	if (CHECK(!prog, "find_prog", "getsockname prog not found\n")) {
+		err = -EIO;
+		goto close_bpf_object;
+	}
+
+	err = bpf_prog_attach(bpf_program__fd(prog), cgroup_fd, v4 ?
+			      BPF_CGROUP_INET4_GETSOCKNAME :
+			      BPF_CGROUP_INET6_GETSOCKNAME, 0);
 	if (err) {
 		log_err("Failed to attach BPF program");
 		goto close_bpf_object;
@@ -69,8 +120,8 @@ static int run_test(int cgroup_fd, int server_fd, int family, int type)
 		goto close_bpf_object;
 	}
 
-	err = verify_port(family, fd, expected_port);
-
+	err = verify_ports(family, fd, expected_local_port,
+			   expected_peer_port);
 	close(fd);
 
 close_bpf_object:
@@ -86,25 +137,25 @@ void test_connect_force_port(void)
 	if (CHECK_FAIL(cgroup_fd < 0))
 		return;
 
-	server_fd = start_server(AF_INET, SOCK_STREAM);
+	server_fd = start_server_with_port(AF_INET, SOCK_STREAM, 60123);
 	if (CHECK_FAIL(server_fd < 0))
 		goto close_cgroup_fd;
 	CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET, SOCK_STREAM));
 	close(server_fd);
 
-	server_fd = start_server(AF_INET6, SOCK_STREAM);
+	server_fd = start_server_with_port(AF_INET6, SOCK_STREAM, 60124);
 	if (CHECK_FAIL(server_fd < 0))
 		goto close_cgroup_fd;
 	CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET6, SOCK_STREAM));
 	close(server_fd);
 
-	server_fd = start_server(AF_INET, SOCK_DGRAM);
+	server_fd = start_server_with_port(AF_INET, SOCK_DGRAM, 60123);
 	if (CHECK_FAIL(server_fd < 0))
 		goto close_cgroup_fd;
 	CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET, SOCK_DGRAM));
 	close(server_fd);
 
-	server_fd = start_server(AF_INET6, SOCK_DGRAM);
+	server_fd = start_server_with_port(AF_INET6, SOCK_DGRAM, 60124);
 	if (CHECK_FAIL(server_fd < 0))
 		goto close_cgroup_fd;
 	CHECK_FAIL(run_test(cgroup_fd, server_fd, AF_INET6, SOCK_DGRAM));
diff --git a/tools/testing/selftests/bpf/progs/connect_force_port4.c b/tools/testing/selftests/bpf/progs/connect_force_port4.c
index 1b8eb34b2db0..7396308677a3 100644
--- a/tools/testing/selftests/bpf/progs/connect_force_port4.c
+++ b/tools/testing/selftests/bpf/progs/connect_force_port4.c
@@ -1,5 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0
 #include <string.h>
+#include <stdbool.h>
 
 #include <linux/bpf.h>
 #include <linux/in.h>
@@ -12,17 +13,71 @@
 char _license[] SEC("license") = "GPL";
 int _version SEC("version") = 1;
 
+struct svc_addr {
+	__be32 addr;
+	__be16 port;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, struct svc_addr);
+} service_mapping SEC(".maps");
+
 SEC("cgroup/connect4")
-int _connect4(struct bpf_sock_addr *ctx)
+int connect4(struct bpf_sock_addr *ctx)
 {
 	struct sockaddr_in sa = {};
+	struct svc_addr *orig;
 
+	/* Force local address to 127.0.0.1:22222. */
 	sa.sin_family = AF_INET;
 	sa.sin_port = bpf_htons(22222);
-	sa.sin_addr.s_addr = bpf_htonl(0x7f000001); /* 127.0.0.1 */
+	sa.sin_addr.s_addr = bpf_htonl(0x7f000001);
 
 	if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0)
 		return 0;
 
+	/* Rewire service 1.2.3.4:60000 to backend 127.0.0.1:60123. */
+	if (ctx->user_port == bpf_htons(60000)) {
+		orig = bpf_sk_storage_get(&service_mapping, ctx->sk, 0,
+					  BPF_SK_STORAGE_GET_F_CREATE);
+		if (!orig)
+			return 0;
+
+		orig->addr = ctx->user_ip4;
+		orig->port = ctx->user_port;
+
+		ctx->user_ip4 = bpf_htonl(0x7f000001);
+		ctx->user_port = bpf_htons(60123);
+	}
+	return 1;
+}
+
+SEC("cgroup/getsockname4")
+int getsockname4(struct bpf_sock_addr *ctx)
+{
+	/* Expose local server as 1.2.3.4:60000 to client. */
+	if (ctx->user_port == bpf_htons(60123)) {
+		ctx->user_ip4 = bpf_htonl(0x01020304);
+		ctx->user_port = bpf_htons(60000);
+	}
+	return 1;
+}
+
+SEC("cgroup/getpeername4")
+int getpeername4(struct bpf_sock_addr *ctx)
+{
+	struct svc_addr *orig;
+
+	/* Expose service 1.2.3.4:60000 as peer instead of backend. */
+	if (ctx->user_port == bpf_htons(60123)) {
+		orig = bpf_sk_storage_get(&service_mapping, ctx->sk, 0, 0);
+		if (orig) {
+			ctx->user_ip4 = orig->addr;
+			ctx->user_port = orig->port;
+		}
+	}
 	return 1;
 }
diff --git a/tools/testing/selftests/bpf/progs/connect_force_port6.c b/tools/testing/selftests/bpf/progs/connect_force_port6.c
index ae6f7d750b4c..c1a2b555e9ad 100644
--- a/tools/testing/selftests/bpf/progs/connect_force_port6.c
+++ b/tools/testing/selftests/bpf/progs/connect_force_port6.c
@@ -12,17 +12,83 @@
 char _license[] SEC("license") = "GPL";
 int _version SEC("version") = 1;
 
+struct svc_addr {
+	__be32 addr[4];
+	__be16 port;
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, int);
+	__type(value, struct svc_addr);
+} service_mapping SEC(".maps");
+
 SEC("cgroup/connect6")
-int _connect6(struct bpf_sock_addr *ctx)
+int connect6(struct bpf_sock_addr *ctx)
 {
 	struct sockaddr_in6 sa = {};
+	struct svc_addr *orig;
 
+	/* Force local address to [::1]:22223. */
 	sa.sin6_family = AF_INET6;
 	sa.sin6_port = bpf_htons(22223);
-	sa.sin6_addr.s6_addr32[3] = bpf_htonl(1); /* ::1 */
+	sa.sin6_addr.s6_addr32[3] = bpf_htonl(1);
 
 	if (bpf_bind(ctx, (struct sockaddr *)&sa, sizeof(sa)) != 0)
 		return 0;
 
+	/* Rewire service [fc00::1]:60000 to backend [::1]:60124. */
+	if (ctx->user_port == bpf_htons(60000)) {
+		orig = bpf_sk_storage_get(&service_mapping, ctx->sk, 0,
+					  BPF_SK_STORAGE_GET_F_CREATE);
+		if (!orig)
+			return 0;
+
+		orig->addr[0] = ctx->user_ip6[0];
+		orig->addr[1] = ctx->user_ip6[1];
+		orig->addr[2] = ctx->user_ip6[2];
+		orig->addr[3] = ctx->user_ip6[3];
+		orig->port = ctx->user_port;
+
+		ctx->user_ip6[0] = 0;
+		ctx->user_ip6[1] = 0;
+		ctx->user_ip6[2] = 0;
+		ctx->user_ip6[3] = bpf_htonl(1);
+		ctx->user_port = bpf_htons(60124);
+	}
+	return 1;
+}
+
+SEC("cgroup/getsockname6")
+int getsockname6(struct bpf_sock_addr *ctx)
+{
+	/* Expose local server as [fc00::1]:60000 to client. */
+	if (ctx->user_port == bpf_htons(60124)) {
+		ctx->user_ip6[0] = bpf_htonl(0xfc000000);
+		ctx->user_ip6[1] = 0;
+		ctx->user_ip6[2] = 0;
+		ctx->user_ip6[3] = bpf_htonl(1);
+		ctx->user_port = bpf_htons(60000);
+	}
+	return 1;
+}
+
+SEC("cgroup/getpeername6")
+int getpeername6(struct bpf_sock_addr *ctx)
+{
+	struct svc_addr *orig;
+
+	/* Expose service [fc00::1]:60000 as peer instead of backend. */
+	if (ctx->user_port == bpf_htons(60124)) {
+		orig = bpf_sk_storage_get(&service_mapping, ctx->sk, 0, 0);
+		if (orig) {
+			ctx->user_ip6[0] = orig->addr[0];
+			ctx->user_ip6[1] = orig->addr[1];
+			ctx->user_ip6[2] = orig->addr[2];
+			ctx->user_ip6[3] = orig->addr[3];
+			ctx->user_port = orig->port;
+		}
+	}
 	return 1;
 }
-- 
cgit v1.2.3-59-g8ed1b


From fb53d3b63743585ce918094d6109a3865fa66e5f Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Tue, 19 May 2020 11:39:53 -0700
Subject: tools/bpf: sync bpf.h

Sync tools/include/uapi/linux/bpf.h from include/uapi.

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/include/uapi/linux/bpf.h | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 1cddc398404a..97e1fd19ff58 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -73,7 +73,7 @@ struct bpf_insn {
 /* Key of an a BPF_MAP_TYPE_LPM_TRIE entry */
 struct bpf_lpm_trie_key {
 	__u32	prefixlen;	/* up to 32 for AF_INET, 128 for AF_INET6 */
-	__u8	data[];	/* Arbitrary size */
+	__u8	data[0];	/* Arbitrary size */
 };
 
 struct bpf_cgroup_storage_key {
@@ -2019,8 +2019,8 @@ union bpf_attr {
  * int bpf_xdp_adjust_tail(struct xdp_buff *xdp_md, int delta)
  * 	Description
  * 		Adjust (move) *xdp_md*\ **->data_end** by *delta* bytes. It is
- * 		only possible to shrink the packet as of this writing,
- * 		therefore *delta* must be a negative integer.
+ * 		possible to both shrink and grow the packet tail.
+ * 		Shrink done via *delta* being a negative integer.
  *
  * 		A call to this helper is susceptible to change the underlying
  * 		packet buffer. Therefore, at load time, all checks on pointers
-- 
cgit v1.2.3-59-g8ed1b


From b9f4c01f3e0b06579a8074dcc8638fae89a1ca67 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Mon, 18 May 2020 16:45:16 -0700
Subject: selftest/bpf: Make bpf_iter selftest compilable against old vmlinux.h

It's good to be able to compile bpf_iter selftest even on systems that don't
have the very latest vmlinux.h, e.g., for libbpf tests against older kernels in
Travis CI. To that extent, re-define bpf_iter_meta and corresponding bpf_iter
context structs in each selftest. To avoid type clashes with vmlinux.h, rename
vmlinux.h's definitions to get them out of the way.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Yonghong Song <yhs@fb.com>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Link: https://lore.kernel.org/bpf/20200518234516.3915052-1-andriin@fb.com
---
 tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c   | 16 ++++++++++++++++
 .../testing/selftests/bpf/progs/bpf_iter_ipv6_route.c  | 16 ++++++++++++++++
 tools/testing/selftests/bpf/progs/bpf_iter_netlink.c   | 16 ++++++++++++++++
 tools/testing/selftests/bpf/progs/bpf_iter_task.c      | 16 ++++++++++++++++
 tools/testing/selftests/bpf/progs/bpf_iter_task_file.c | 18 ++++++++++++++++++
 .../selftests/bpf/progs/bpf_iter_test_kern_common.h    | 16 ++++++++++++++++
 6 files changed, 98 insertions(+)

diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
index 4867cd3445c8..b57bd6fef208 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_bpf_map.c
@@ -1,11 +1,27 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2020 Facebook */
+/* "undefine" structs in vmlinux.h, because we "override" them below */
+#define bpf_iter_meta bpf_iter_meta___not_used
+#define bpf_iter__bpf_map bpf_iter__bpf_map___not_used
 #include "vmlinux.h"
+#undef bpf_iter_meta
+#undef bpf_iter__bpf_map
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 
 char _license[] SEC("license") = "GPL";
 
+struct bpf_iter_meta {
+	struct seq_file *seq;
+	__u64 session_id;
+	__u64 seq_num;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__bpf_map {
+	struct bpf_iter_meta *meta;
+	struct bpf_map *map;
+} __attribute__((preserve_access_index));
+
 SEC("iter/bpf_map")
 int dump_bpf_map(struct bpf_iter__bpf_map *ctx)
 {
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
index ab9e2650e021..c8e9ca74c87b 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_ipv6_route.c
@@ -1,9 +1,25 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2020 Facebook */
+/* "undefine" structs in vmlinux.h, because we "override" them below */
+#define bpf_iter_meta bpf_iter_meta___not_used
+#define bpf_iter__ipv6_route bpf_iter__ipv6_route___not_used
 #include "vmlinux.h"
+#undef bpf_iter_meta
+#undef bpf_iter__ipv6_route
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 
+struct bpf_iter_meta {
+	struct seq_file *seq;
+	__u64 session_id;
+	__u64 seq_num;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__ipv6_route {
+	struct bpf_iter_meta *meta;
+	struct fib6_info *rt;
+} __attribute__((preserve_access_index));
+
 char _license[] SEC("license") = "GPL";
 
 extern bool CONFIG_IPV6_SUBTREES __kconfig __weak;
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
index 6b40a233d4e0..e7b8753eac0b 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_netlink.c
@@ -1,6 +1,11 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2020 Facebook */
+/* "undefine" structs in vmlinux.h, because we "override" them below */
+#define bpf_iter_meta bpf_iter_meta___not_used
+#define bpf_iter__netlink bpf_iter__netlink___not_used
 #include "vmlinux.h"
+#undef bpf_iter_meta
+#undef bpf_iter__netlink
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 
@@ -9,6 +14,17 @@ char _license[] SEC("license") = "GPL";
 #define sk_rmem_alloc	sk_backlog.rmem_alloc
 #define sk_refcnt	__sk_common.skc_refcnt
 
+struct bpf_iter_meta {
+	struct seq_file *seq;
+	__u64 session_id;
+	__u64 seq_num;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__netlink {
+	struct bpf_iter_meta *meta;
+	struct netlink_sock *sk;
+} __attribute__((preserve_access_index));
+
 static inline struct inode *SOCK_INODE(struct socket *socket)
 {
 	return &container_of(socket, struct socket_alloc, socket)->vfs_inode;
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task.c b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
index 90f9011c57ca..ee754021f98e 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task.c
@@ -1,11 +1,27 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2020 Facebook */
+/* "undefine" structs in vmlinux.h, because we "override" them below */
+#define bpf_iter_meta bpf_iter_meta___not_used
+#define bpf_iter__task bpf_iter__task___not_used
 #include "vmlinux.h"
+#undef bpf_iter_meta
+#undef bpf_iter__task
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 
 char _license[] SEC("license") = "GPL";
 
+struct bpf_iter_meta {
+	struct seq_file *seq;
+	__u64 session_id;
+	__u64 seq_num;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__task {
+	struct bpf_iter_meta *meta;
+	struct task_struct *task;
+} __attribute__((preserve_access_index));
+
 SEC("iter/task")
 int dump_task(struct bpf_iter__task *ctx)
 {
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
index c6ced38f0880..0f0ec3db20ba 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_task_file.c
@@ -1,11 +1,29 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2020 Facebook */
+/* "undefine" structs in vmlinux.h, because we "override" them below */
+#define bpf_iter_meta bpf_iter_meta___not_used
+#define bpf_iter__task_file bpf_iter__task_file___not_used
 #include "vmlinux.h"
+#undef bpf_iter_meta
+#undef bpf_iter__task_file
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_tracing.h>
 
 char _license[] SEC("license") = "GPL";
 
+struct bpf_iter_meta {
+	struct seq_file *seq;
+	__u64 session_id;
+	__u64 seq_num;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__task_file {
+	struct bpf_iter_meta *meta;
+	struct task_struct *task;
+	__u32 fd;
+	struct file *file;
+} __attribute__((preserve_access_index));
+
 SEC("iter/task_file")
 int dump_task_file(struct bpf_iter__task_file *ctx)
 {
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h
index bdd51cf14b54..dee1339e6905 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern_common.h
@@ -1,11 +1,27 @@
 /* SPDX-License-Identifier: GPL-2.0 */
 /* Copyright (c) 2020 Facebook */
+/* "undefine" structs in vmlinux.h, because we "override" them below */
+#define bpf_iter_meta bpf_iter_meta___not_used
+#define bpf_iter__task bpf_iter__task___not_used
 #include "vmlinux.h"
+#undef bpf_iter_meta
+#undef bpf_iter__task
 #include <bpf/bpf_helpers.h>
 
 char _license[] SEC("license") = "GPL";
 int count = 0;
 
+struct bpf_iter_meta {
+	struct seq_file *seq;
+	__u64 session_id;
+	__u64 seq_num;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__task {
+	struct bpf_iter_meta *meta;
+	struct task_struct *task;
+} __attribute__((preserve_access_index));
+
 SEC("iter/task")
 int dump_task(struct bpf_iter__task *ctx)
 {
-- 
cgit v1.2.3-59-g8ed1b


From fc4a4515acac072a69465d01ba45969323a9c50d Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Tue, 19 May 2020 21:09:03 +0200
Subject: s390/net: remove pm ops from ccwgroup drivers

commit 5e1fb45ec8e2 ("s390/ccwgroup: remove pm support") removed power
management support from the ccwgroup bus driver. So remove the
associated callbacks from all ccwgroup drivers.

CC: Vineeth Vijayan <vneethv@linux.ibm.com>
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/s390/include/asm/ccwgroup.h  | 10 -------
 drivers/s390/net/ctcm_main.c      | 40 --------------------------
 drivers/s390/net/lcs.c            | 59 ---------------------------------------
 drivers/s390/net/qeth_core_main.c | 31 --------------------
 4 files changed, 140 deletions(-)

diff --git a/arch/s390/include/asm/ccwgroup.h b/arch/s390/include/asm/ccwgroup.h
index 7293c139dd79..ad3acb1e882b 100644
--- a/arch/s390/include/asm/ccwgroup.h
+++ b/arch/s390/include/asm/ccwgroup.h
@@ -36,11 +36,6 @@ struct ccwgroup_device {
  * @set_online: function called when device is set online
  * @set_offline: function called when device is set offline
  * @shutdown: function called when device is shut down
- * @prepare: prepare for pm state transition
- * @complete: undo work done in @prepare
- * @freeze: callback for freezing during hibernation snapshotting
- * @thaw: undo work done in @freeze
- * @restore: callback for restoring after hibernation
  * @driver: embedded driver structure
  * @ccw_driver: supported ccw_driver (optional)
  */
@@ -50,11 +45,6 @@ struct ccwgroup_driver {
 	int (*set_online) (struct ccwgroup_device *);
 	int (*set_offline) (struct ccwgroup_device *);
 	void (*shutdown)(struct ccwgroup_device *);
-	int (*prepare) (struct ccwgroup_device *);
-	void (*complete) (struct ccwgroup_device *);
-	int (*freeze)(struct ccwgroup_device *);
-	int (*thaw) (struct ccwgroup_device *);
-	int (*restore)(struct ccwgroup_device *);
 
 	struct device_driver driver;
 	struct ccw_driver *ccw_driver;
diff --git a/drivers/s390/net/ctcm_main.c b/drivers/s390/net/ctcm_main.c
index 437a6d822105..d06809eac16d 100644
--- a/drivers/s390/net/ctcm_main.c
+++ b/drivers/s390/net/ctcm_main.c
@@ -1698,43 +1698,6 @@ static void ctcm_remove_device(struct ccwgroup_device *cgdev)
 	put_device(&cgdev->dev);
 }
 
-static int ctcm_pm_suspend(struct ccwgroup_device *gdev)
-{
-	struct ctcm_priv *priv = dev_get_drvdata(&gdev->dev);
-
-	if (gdev->state == CCWGROUP_OFFLINE)
-		return 0;
-	netif_device_detach(priv->channel[CTCM_READ]->netdev);
-	ctcm_close(priv->channel[CTCM_READ]->netdev);
-	if (!wait_event_timeout(priv->fsm->wait_q,
-	    fsm_getstate(priv->fsm) == DEV_STATE_STOPPED, CTCM_TIME_5_SEC)) {
-		netif_device_attach(priv->channel[CTCM_READ]->netdev);
-		return -EBUSY;
-	}
-	ccw_device_set_offline(gdev->cdev[1]);
-	ccw_device_set_offline(gdev->cdev[0]);
-	return 0;
-}
-
-static int ctcm_pm_resume(struct ccwgroup_device *gdev)
-{
-	struct ctcm_priv *priv = dev_get_drvdata(&gdev->dev);
-	int rc;
-
-	if (gdev->state == CCWGROUP_OFFLINE)
-		return 0;
-	rc = ccw_device_set_online(gdev->cdev[1]);
-	if (rc)
-		goto err_out;
-	rc = ccw_device_set_online(gdev->cdev[0]);
-	if (rc)
-		goto err_out;
-	ctcm_open(priv->channel[CTCM_READ]->netdev);
-err_out:
-	netif_device_attach(priv->channel[CTCM_READ]->netdev);
-	return rc;
-}
-
 static struct ccw_device_id ctcm_ids[] = {
 	{CCW_DEVICE(0x3088, 0x08), .driver_info = ctcm_channel_type_parallel},
 	{CCW_DEVICE(0x3088, 0x1e), .driver_info = ctcm_channel_type_ficon},
@@ -1764,9 +1727,6 @@ static struct ccwgroup_driver ctcm_group_driver = {
 	.remove      = ctcm_remove_device,
 	.set_online  = ctcm_new_device,
 	.set_offline = ctcm_shutdown_device,
-	.freeze	     = ctcm_pm_suspend,
-	.thaw	     = ctcm_pm_resume,
-	.restore     = ctcm_pm_resume,
 };
 
 static ssize_t group_store(struct device_driver *ddrv, const char *buf,
diff --git a/drivers/s390/net/lcs.c b/drivers/s390/net/lcs.c
index 8f08b0a2917c..440219bcaa2b 100644
--- a/drivers/s390/net/lcs.c
+++ b/drivers/s390/net/lcs.c
@@ -2296,60 +2296,6 @@ lcs_remove_device(struct ccwgroup_device *ccwgdev)
 	put_device(&ccwgdev->dev);
 }
 
-static int lcs_pm_suspend(struct lcs_card *card)
-{
-	if (card->dev)
-		netif_device_detach(card->dev);
-	lcs_set_allowed_threads(card, 0);
-	lcs_wait_for_threads(card, 0xffffffff);
-	if (card->state != DEV_STATE_DOWN)
-		__lcs_shutdown_device(card->gdev, 1);
-	return 0;
-}
-
-static int lcs_pm_resume(struct lcs_card *card)
-{
-	int rc = 0;
-
-	if (card->state == DEV_STATE_RECOVER)
-		rc = lcs_new_device(card->gdev);
-	if (card->dev)
-		netif_device_attach(card->dev);
-	if (rc) {
-		dev_warn(&card->gdev->dev, "The lcs device driver "
-			"failed to recover the device\n");
-	}
-	return rc;
-}
-
-static int lcs_prepare(struct ccwgroup_device *gdev)
-{
-	return 0;
-}
-
-static void lcs_complete(struct ccwgroup_device *gdev)
-{
-	return;
-}
-
-static int lcs_freeze(struct ccwgroup_device *gdev)
-{
-	struct lcs_card *card = dev_get_drvdata(&gdev->dev);
-	return lcs_pm_suspend(card);
-}
-
-static int lcs_thaw(struct ccwgroup_device *gdev)
-{
-	struct lcs_card *card = dev_get_drvdata(&gdev->dev);
-	return lcs_pm_resume(card);
-}
-
-static int lcs_restore(struct ccwgroup_device *gdev)
-{
-	struct lcs_card *card = dev_get_drvdata(&gdev->dev);
-	return lcs_pm_resume(card);
-}
-
 static struct ccw_device_id lcs_ids[] = {
 	{CCW_DEVICE(0x3088, 0x08), .driver_info = lcs_channel_type_parallel},
 	{CCW_DEVICE(0x3088, 0x1f), .driver_info = lcs_channel_type_2216},
@@ -2382,11 +2328,6 @@ static struct ccwgroup_driver lcs_group_driver = {
 	.remove      = lcs_remove_device,
 	.set_online  = lcs_new_device,
 	.set_offline = lcs_shutdown_device,
-	.prepare     = lcs_prepare,
-	.complete    = lcs_complete,
-	.freeze	     = lcs_freeze,
-	.thaw	     = lcs_thaw,
-	.restore     = lcs_restore,
 };
 
 static ssize_t group_store(struct device_driver *ddrv, const char *buf,
diff --git a/drivers/s390/net/qeth_core_main.c b/drivers/s390/net/qeth_core_main.c
index db8e069be3a0..18a0fb75a710 100644
--- a/drivers/s390/net/qeth_core_main.c
+++ b/drivers/s390/net/qeth_core_main.c
@@ -6434,32 +6434,6 @@ static void qeth_core_shutdown(struct ccwgroup_device *gdev)
 	qdio_free(CARD_DDEV(card));
 }
 
-static int qeth_suspend(struct ccwgroup_device *gdev)
-{
-	struct qeth_card *card = dev_get_drvdata(&gdev->dev);
-
-	qeth_set_allowed_threads(card, 0, 1);
-	wait_event(card->wait_q, qeth_threads_running(card, 0xffffffff) == 0);
-	if (gdev->state == CCWGROUP_OFFLINE)
-		return 0;
-
-	qeth_set_offline(card, false);
-	return 0;
-}
-
-static int qeth_resume(struct ccwgroup_device *gdev)
-{
-	struct qeth_card *card = dev_get_drvdata(&gdev->dev);
-	int rc;
-
-	rc = qeth_set_online(card);
-
-	qeth_set_allowed_threads(card, 0xffffffff, 0);
-	if (rc)
-		dev_warn(&card->gdev->dev, "The qeth device driver failed to recover an error on the device\n");
-	return rc;
-}
-
 static ssize_t group_store(struct device_driver *ddrv, const char *buf,
 			   size_t count)
 {
@@ -6496,11 +6470,6 @@ static struct ccwgroup_driver qeth_core_ccwgroup_driver = {
 	.set_online = qeth_core_set_online,
 	.set_offline = qeth_core_set_offline,
 	.shutdown = qeth_core_shutdown,
-	.prepare = NULL,
-	.complete = NULL,
-	.freeze = qeth_suspend,
-	.thaw = qeth_resume,
-	.restore = qeth_resume,
 };
 
 struct qeth_card *qeth_get_card_by_busid(char *bus_id)
-- 
cgit v1.2.3-59-g8ed1b


From 97ccf6f9ae6959ec473079977d9575f2b0ba7338 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Tue, 19 May 2020 21:09:04 +0200
Subject: s390/net: remove pm support from iucv drivers

Commit 394216275c7d ("s390: remove broken hibernate / power management support")
removed support for ARCH_HIBERNATION_POSSIBLE on s390.
So drop the unused pm ops from the iucv drivers.

CC: Hendrik Brueckner <brueckner@linux.ibm.com>
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/s390/net/netiucv.c  | 104 +-------------------------------------------
 drivers/s390/net/smsgiucv.c |  65 ---------------------------
 2 files changed, 1 insertion(+), 168 deletions(-)

diff --git a/drivers/s390/net/netiucv.c b/drivers/s390/net/netiucv.c
index 5ce2424ca729..260860cf3aa1 100644
--- a/drivers/s390/net/netiucv.c
+++ b/drivers/s390/net/netiucv.c
@@ -112,27 +112,10 @@ DECLARE_PER_CPU(char[256], iucv_dbf_txt_buf);
  */
 #define PRINTK_HEADER " iucv: "       /* for debugging */
 
-/* dummy device to make sure netiucv_pm functions are called */
-static struct device *netiucv_dev;
-
-static int netiucv_pm_prepare(struct device *);
-static void netiucv_pm_complete(struct device *);
-static int netiucv_pm_freeze(struct device *);
-static int netiucv_pm_restore_thaw(struct device *);
-
-static const struct dev_pm_ops netiucv_pm_ops = {
-	.prepare = netiucv_pm_prepare,
-	.complete = netiucv_pm_complete,
-	.freeze = netiucv_pm_freeze,
-	.thaw = netiucv_pm_restore_thaw,
-	.restore = netiucv_pm_restore_thaw,
-};
-
 static struct device_driver netiucv_driver = {
 	.owner = THIS_MODULE,
 	.name = "netiucv",
 	.bus  = &iucv_bus,
-	.pm = &netiucv_pm_ops,
 };
 
 static int netiucv_callback_connreq(struct iucv_path *, u8 *, u8 *);
@@ -213,7 +196,6 @@ struct netiucv_priv {
 	fsm_instance            *fsm;
         struct iucv_connection  *conn;
 	struct device           *dev;
-	int			 pm_state;
 };
 
 /**
@@ -1275,72 +1257,6 @@ static int netiucv_close(struct net_device *dev)
 	return 0;
 }
 
-static int netiucv_pm_prepare(struct device *dev)
-{
-	IUCV_DBF_TEXT(trace, 3, __func__);
-	return 0;
-}
-
-static void netiucv_pm_complete(struct device *dev)
-{
-	IUCV_DBF_TEXT(trace, 3, __func__);
-	return;
-}
-
-/**
- * netiucv_pm_freeze() - Freeze PM callback
- * @dev:	netiucv device
- *
- * close open netiucv interfaces
- */
-static int netiucv_pm_freeze(struct device *dev)
-{
-	struct netiucv_priv *priv = dev_get_drvdata(dev);
-	struct net_device *ndev = NULL;
-	int rc = 0;
-
-	IUCV_DBF_TEXT(trace, 3, __func__);
-	if (priv && priv->conn)
-		ndev = priv->conn->netdev;
-	if (!ndev)
-		goto out;
-	netif_device_detach(ndev);
-	priv->pm_state = fsm_getstate(priv->fsm);
-	rc = netiucv_close(ndev);
-out:
-	return rc;
-}
-
-/**
- * netiucv_pm_restore_thaw() - Thaw and restore PM callback
- * @dev:	netiucv device
- *
- * re-open netiucv interfaces closed during freeze
- */
-static int netiucv_pm_restore_thaw(struct device *dev)
-{
-	struct netiucv_priv *priv = dev_get_drvdata(dev);
-	struct net_device *ndev = NULL;
-	int rc = 0;
-
-	IUCV_DBF_TEXT(trace, 3, __func__);
-	if (priv && priv->conn)
-		ndev = priv->conn->netdev;
-	if (!ndev)
-		goto out;
-	switch (priv->pm_state) {
-	case DEV_STATE_RUNNING:
-	case DEV_STATE_STARTWAIT:
-		rc = netiucv_open(ndev);
-		break;
-	default:
-		break;
-	}
-	netif_device_attach(ndev);
-out:
-	return rc;
-}
-
 /**
  * Start transmission of a packet.
  * Called from generic network device layer.
@@ -2156,7 +2072,6 @@ static void __exit netiucv_exit(void)
 		netiucv_unregister_device(dev);
 	}
 
-	device_unregister(netiucv_dev);
 	driver_unregister(&netiucv_driver);
 	iucv_unregister(&netiucv_handler, 1);
 	iucv_unregister_dbf_views();
@@ -2182,27 +2097,10 @@ static int __init netiucv_init(void)
 		IUCV_DBF_TEXT_(setup, 2, "ret %d from driver_register\n", rc);
 		goto out_iucv;
 	}
-	/* establish dummy device */
-	netiucv_dev = kzalloc(sizeof(struct device), GFP_KERNEL);
-	if (!netiucv_dev) {
-		rc = -ENOMEM;
-		goto out_driver;
-	}
-	dev_set_name(netiucv_dev, "netiucv");
-	netiucv_dev->bus = &iucv_bus;
-	netiucv_dev->parent = iucv_root;
-	netiucv_dev->release = (void (*)(struct device *))kfree;
-	netiucv_dev->driver = &netiucv_driver;
-	rc = device_register(netiucv_dev);
-	if (rc) {
-		put_device(netiucv_dev);
-		goto out_driver;
-	}
+
 	netiucv_banner();
 	return rc;
 
-out_driver:
-	driver_unregister(&netiucv_driver);
 out_iucv:
 	iucv_unregister(&netiucv_handler, 1);
 out_dbf:
diff --git a/drivers/s390/net/smsgiucv.c b/drivers/s390/net/smsgiucv.c
index 066b5c3aaae6..c84ec2fbf99b 100644
--- a/drivers/s390/net/smsgiucv.c
+++ b/drivers/s390/net/smsgiucv.c
@@ -29,12 +29,9 @@ MODULE_AUTHOR
 MODULE_DESCRIPTION ("Linux for S/390 IUCV special message driver");
 
 static struct iucv_path *smsg_path;
-/* dummy device used as trigger for PM functions */
-static struct device *smsg_dev;
 
 static DEFINE_SPINLOCK(smsg_list_lock);
 static LIST_HEAD(smsg_list);
-static int iucv_path_connected;
 
 static int smsg_path_pending(struct iucv_path *, u8 *, u8 *);
 static void smsg_message_pending(struct iucv_path *, struct iucv_message *);
@@ -124,60 +121,15 @@ void smsg_unregister_callback(const char *prefix,
 	kfree(cb);
 }
 
-static int smsg_pm_freeze(struct device *dev)
-{
-#ifdef CONFIG_PM_DEBUG
-	printk(KERN_WARNING "smsg_pm_freeze\n");
-#endif
-	if (smsg_path && iucv_path_connected) {
-		iucv_path_sever(smsg_path, NULL);
-		iucv_path_connected = 0;
-	}
-	return 0;
-}
-
-static int smsg_pm_restore_thaw(struct device *dev)
-{
-	int rc;
-
-#ifdef CONFIG_PM_DEBUG
-	printk(KERN_WARNING "smsg_pm_restore_thaw\n");
-#endif
-	if (smsg_path && !iucv_path_connected) {
-		memset(smsg_path, 0, sizeof(*smsg_path));
-		smsg_path->msglim = 255;
-		smsg_path->flags = 0;
-		rc = iucv_path_connect(smsg_path, &smsg_handler, "*MSG    ",
-				       NULL, NULL, NULL);
-#ifdef CONFIG_PM_DEBUG
-		if (rc)
-			printk(KERN_ERR
-			       "iucv_path_connect returned with rc %i\n", rc);
-#endif
-		if (!rc)
-			iucv_path_connected = 1;
-		cpcmd("SET SMSG IUCV", NULL, 0, NULL);
-	}
-	return 0;
-}
-
-static const struct dev_pm_ops smsg_pm_ops = {
-	.freeze = smsg_pm_freeze,
-	.thaw = smsg_pm_restore_thaw,
-	.restore = smsg_pm_restore_thaw,
-};
-
 static struct device_driver smsg_driver = {
 	.owner = THIS_MODULE,
 	.name = SMSGIUCV_DRV_NAME,
 	.bus  = &iucv_bus,
-	.pm = &smsg_pm_ops,
 };
 
 static void __exit smsg_exit(void)
 {
 	cpcmd("SET SMSG OFF", NULL, 0, NULL);
-	device_unregister(smsg_dev);
 	iucv_unregister(&smsg_handler, 1);
 	driver_unregister(&smsg_driver);
 }
@@ -205,27 +157,10 @@ static int __init smsg_init(void)
 			       NULL, NULL, NULL);
 	if (rc)
 		goto out_free_path;
-	else
-		iucv_path_connected = 1;
-	smsg_dev = kzalloc(sizeof(struct device), GFP_KERNEL);
-	if (!smsg_dev) {
-		rc = -ENOMEM;
-		goto out_free_path;
-	}
-	dev_set_name(smsg_dev, "smsg_iucv");
-	smsg_dev->bus = &iucv_bus;
-	smsg_dev->parent = iucv_root;
-	smsg_dev->release = (void (*)(struct device *))kfree;
-	smsg_dev->driver = &smsg_driver;
-	rc = device_register(smsg_dev);
-	if (rc)
-		goto out_put;
 
 	cpcmd("SET SMSG IUCV", NULL, 0, NULL);
 	return 0;
 
-out_put:
-	put_device(smsg_dev);
 out_free_path:
 	iucv_path_free(smsg_path);
 	smsg_path = NULL;
-- 
cgit v1.2.3-59-g8ed1b


From 4b32f86bf1673acb16441dd55d7b325609f54897 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Tue, 19 May 2020 21:10:08 +0200
Subject: net/iucv: remove pm support

commit 394216275c7d ("s390: remove broken hibernate / power management support")
removed support for ARCH_HIBERNATION_POSSIBLE from s390.

So drop the unused pm ops from the s390-only iucv bus driver.

CC: Hendrik Brueckner <brueckner@linux.ibm.com>
Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/iucv/iucv.c | 188 --------------------------------------------------------
 1 file changed, 188 deletions(-)

diff --git a/net/iucv/iucv.c b/net/iucv/iucv.c
index 9a2d023842fe..19250a0c85d3 100644
--- a/net/iucv/iucv.c
+++ b/net/iucv/iucv.c
@@ -67,32 +67,9 @@ static int iucv_bus_match(struct device *dev, struct device_driver *drv)
 	return 0;
 }
 
-enum iucv_pm_states {
-	IUCV_PM_INITIAL = 0,
-	IUCV_PM_FREEZING = 1,
-	IUCV_PM_THAWING = 2,
-	IUCV_PM_RESTORING = 3,
-};
-static enum iucv_pm_states iucv_pm_state;
-
-static int iucv_pm_prepare(struct device *);
-static void iucv_pm_complete(struct device *);
-static int iucv_pm_freeze(struct device *);
-static int iucv_pm_thaw(struct device *);
-static int iucv_pm_restore(struct device *);
-
-static const struct dev_pm_ops iucv_pm_ops = {
-	.prepare = iucv_pm_prepare,
-	.complete = iucv_pm_complete,
-	.freeze = iucv_pm_freeze,
-	.thaw = iucv_pm_thaw,
-	.restore = iucv_pm_restore,
-};
-
 struct bus_type iucv_bus = {
 	.name = "iucv",
 	.match = iucv_bus_match,
-	.pm = &iucv_pm_ops,
 };
 EXPORT_SYMBOL(iucv_bus);
 
@@ -434,31 +411,6 @@ static void iucv_block_cpu(void *data)
 	cpumask_clear_cpu(cpu, &iucv_irq_cpumask);
 }
 
-/**
- * iucv_block_cpu_almost
- * @data: unused
- *
- * Allow connection-severed interrupts only on this cpu.
- */
-static void iucv_block_cpu_almost(void *data)
-{
-	int cpu = smp_processor_id();
-	union iucv_param *parm;
-
-	/* Allow iucv control interrupts only */
-	parm = iucv_param_irq[cpu];
-	memset(parm, 0, sizeof(union iucv_param));
-	parm->set_mask.ipmask = 0x08;
-	iucv_call_b2f0(IUCV_SETMASK, parm);
-	/* Allow iucv-severed interrupt only */
-	memset(parm, 0, sizeof(union iucv_param));
-	parm->set_mask.ipmask = 0x20;
-	iucv_call_b2f0(IUCV_SETCONTROLMASK, parm);
-
-	/* Clear indication that iucv interrupts are allowed for this cpu. */
-	cpumask_clear_cpu(cpu, &iucv_irq_cpumask);
-}
-
 /**
  * iucv_declare_cpu
  * @data: unused
@@ -1834,146 +1786,6 @@ static void iucv_external_interrupt(struct ext_code ext_code,
 	spin_unlock(&iucv_queue_lock);
 }
 
-static int iucv_pm_prepare(struct device *dev)
-{
-	int rc = 0;
-
-#ifdef CONFIG_PM_DEBUG
-	printk(KERN_INFO "iucv_pm_prepare\n");
-#endif
-	if (dev->driver && dev->driver->pm && dev->driver->pm->prepare)
-		rc = dev->driver->pm->prepare(dev);
-	return rc;
-}
-
-static void iucv_pm_complete(struct device *dev)
-{
-#ifdef CONFIG_PM_DEBUG
-	printk(KERN_INFO "iucv_pm_complete\n");
-#endif
-	if (dev->driver && dev->driver->pm && dev->driver->pm->complete)
-		dev->driver->pm->complete(dev);
-}
-
-/**
- * iucv_path_table_empty() - determine if iucv path table is empty
- *
- * Returns 0 if there are still iucv pathes defined
- *	   1 if there are no iucv pathes defined
- */
-static int iucv_path_table_empty(void)
-{
-	int i;
-
-	for (i = 0; i < iucv_max_pathid; i++) {
-		if (iucv_path_table[i])
-			return 0;
-	}
-	return 1;
-}
-
-/**
- * iucv_pm_freeze() - Freeze PM callback
- * @dev:	iucv-based device
- *
- * disable iucv interrupts
- * invoke callback function of the iucv-based driver
- * shut down iucv, if no iucv-pathes are established anymore
- */
-static int iucv_pm_freeze(struct device *dev)
-{
-	int cpu;
-	struct iucv_irq_list *p, *n;
-	int rc = 0;
-
-#ifdef CONFIG_PM_DEBUG
-	printk(KERN_WARNING "iucv_pm_freeze\n");
-#endif
-	if (iucv_pm_state != IUCV_PM_FREEZING) {
-		for_each_cpu(cpu, &iucv_irq_cpumask)
-			smp_call_function_single(cpu, iucv_block_cpu_almost,
-						 NULL, 1);
-		cancel_work_sync(&iucv_work);
-		list_for_each_entry_safe(p, n, &iucv_work_queue, list) {
-			list_del_init(&p->list);
-			iucv_sever_pathid(p->data.ippathid,
-					  iucv_error_no_listener);
-			kfree(p);
-		}
-	}
-	iucv_pm_state = IUCV_PM_FREEZING;
-	if (dev->driver && dev->driver->pm && dev->driver->pm->freeze)
-		rc = dev->driver->pm->freeze(dev);
-	if (iucv_path_table_empty())
-		iucv_disable();
-	return rc;
-}
-
-/**
- * iucv_pm_thaw() - Thaw PM callback
- * @dev:	iucv-based device
- *
- * make iucv ready for use again: allocate path table, declare interrupt buffers
- *				  and enable iucv interrupts
- * invoke callback function of the iucv-based driver
- */
-static int iucv_pm_thaw(struct device *dev)
-{
-	int rc = 0;
-
-#ifdef CONFIG_PM_DEBUG
-	printk(KERN_WARNING "iucv_pm_thaw\n");
-#endif
-	iucv_pm_state = IUCV_PM_THAWING;
-	if (!iucv_path_table) {
-		rc = iucv_enable();
-		if (rc)
-			goto out;
-	}
-	if (cpumask_empty(&iucv_irq_cpumask)) {
-		if (iucv_nonsmp_handler)
-			/* enable interrupts on one cpu */
-			iucv_allow_cpu(NULL);
-		else
-			/* enable interrupts on all cpus */
-			iucv_setmask_mp();
-	}
-	if (dev->driver && dev->driver->pm && dev->driver->pm->thaw)
-		rc = dev->driver->pm->thaw(dev);
-out:
-	return rc;
-}
-
-/**
- * iucv_pm_restore() - Restore PM callback
- * @dev:	iucv-based device
- *
- * make iucv ready for use again: allocate path table, declare interrupt buffers
- *				  and enable iucv interrupts
- * invoke callback function of the iucv-based driver
- */
-static int iucv_pm_restore(struct device *dev)
-{
-	int rc = 0;
-
-#ifdef CONFIG_PM_DEBUG
-	printk(KERN_WARNING "iucv_pm_restore %p\n", iucv_path_table);
-#endif
-	if ((iucv_pm_state != IUCV_PM_RESTORING) && iucv_path_table)
-		pr_warn("Suspending Linux did not completely close all IUCV connections\n");
-	iucv_pm_state = IUCV_PM_RESTORING;
-	if (cpumask_empty(&iucv_irq_cpumask)) {
-		rc = iucv_query_maxconn();
-		rc = iucv_enable();
-		if (rc)
-			goto out;
-	}
-	if (dev->driver && dev->driver->pm && dev->driver->pm->restore)
-		rc = dev->driver->pm->restore(dev);
-out:
-	return rc;
-}
-
 struct iucv_interface iucv_if = {
 	.message_receive = iucv_message_receive,
 	.__message_receive = __iucv_message_receive,
-- 
cgit v1.2.3-59-g8ed1b


From 585bc2209539501674b77de68f58e98039892501 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Tue, 19 May 2020 21:10:09 +0200
Subject: net/af_iucv: remove pm support

commit 394216275c7d ("s390: remove broken hibernate / power management support")
removed support for ARCH_HIBERNATION_POSSIBLE from s390.

So drop the unused pm ops from the s390-only af_iucv socket code.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/iucv/af_iucv.c | 141 +----------------------------------------------------
 1 file changed, 1 insertion(+), 140 deletions(-)

diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index c4bdcbc84b07..84bd18fea1d6 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -87,7 +87,6 @@ do {									\
 
 static void iucv_sock_kill(struct sock *sk);
 static void iucv_sock_close(struct sock *sk);
-static void iucv_sever_path(struct sock *, int);
 
 static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev,
 	struct packet_type *pt, struct net_device *orig_dev);
@@ -127,110 +126,6 @@ static inline void low_nmcpy(unsigned char *dst, char *src)
        memcpy(&dst[8], src, 8);
 }
 
-static int afiucv_pm_prepare(struct device *dev)
-{
-#ifdef CONFIG_PM_DEBUG
-	printk(KERN_WARNING "afiucv_pm_prepare\n");
-#endif
-	return 0;
-}
-
-static void afiucv_pm_complete(struct device *dev)
-{
-#ifdef CONFIG_PM_DEBUG
-	printk(KERN_WARNING "afiucv_pm_complete\n");
-#endif
-}
-
-/**
- * afiucv_pm_freeze() - Freeze PM callback
- * @dev:	AFIUCV dummy device
- *
- * Sever all established IUCV communication pathes
- */
-static int afiucv_pm_freeze(struct device *dev)
-{
-	struct iucv_sock *iucv;
-	struct sock *sk;
-
-#ifdef CONFIG_PM_DEBUG
-	printk(KERN_WARNING "afiucv_pm_freeze\n");
-#endif
-	read_lock(&iucv_sk_list.lock);
-	sk_for_each(sk, &iucv_sk_list.head) {
-		iucv = iucv_sk(sk);
-		switch (sk->sk_state) {
-		case IUCV_DISCONN:
-		case IUCV_CLOSING:
-		case IUCV_CONNECTED:
-			iucv_sever_path(sk, 0);
-			break;
-		case IUCV_OPEN:
-		case IUCV_BOUND:
-		case IUCV_LISTEN:
-		case IUCV_CLOSED:
-		default:
-			break;
-		}
-		skb_queue_purge(&iucv->send_skb_q);
-		skb_queue_purge(&iucv->backlog_skb_q);
-	}
-	read_unlock(&iucv_sk_list.lock);
-	return 0;
-}
-
-/**
- * afiucv_pm_restore_thaw() - Thaw and restore PM callback
- * @dev:	AFIUCV dummy device
- *
- * socket clean up after freeze
- */
-static int afiucv_pm_restore_thaw(struct device *dev)
-{
-	struct sock *sk;
-
-#ifdef CONFIG_PM_DEBUG
-	printk(KERN_WARNING "afiucv_pm_restore_thaw\n");
-#endif
-	read_lock(&iucv_sk_list.lock);
-	sk_for_each(sk, &iucv_sk_list.head) {
-		switch (sk->sk_state) {
-		case IUCV_CONNECTED:
-			sk->sk_err = EPIPE;
-			sk->sk_state = IUCV_DISCONN;
-			sk->sk_state_change(sk);
-			break;
-		case IUCV_DISCONN:
-		case IUCV_CLOSING:
-		case IUCV_LISTEN:
-		case IUCV_BOUND:
-		case IUCV_OPEN:
-		default:
-			break;
-		}
-	}
-	read_unlock(&iucv_sk_list.lock);
-	return 0;
-}
-
-static const struct dev_pm_ops afiucv_pm_ops = {
-	.prepare = afiucv_pm_prepare,
-	.complete = afiucv_pm_complete,
-	.freeze = afiucv_pm_freeze,
-	.thaw = afiucv_pm_restore_thaw,
-	.restore = afiucv_pm_restore_thaw,
-};
-
-static struct device_driver af_iucv_driver = {
-	.owner = THIS_MODULE,
-	.name = "afiucv",
-	.bus  = NULL,
-	.pm   = &afiucv_pm_ops,
-};
-
-/* dummy device used as trigger for PM functions */
-static struct device *af_iucv_dev;
-
 /**
  * iucv_msg_length() - Returns the length of an iucv message.
  * @msg:	Pointer to struct iucv_message, MUST NOT be NULL
@@ -2409,45 +2304,11 @@ static struct packet_type iucv_packet_type = {
 
 static int afiucv_iucv_init(void)
 {
-	int err;
-
-	err = pr_iucv->iucv_register(&af_iucv_handler, 0);
-	if (err)
-		goto out;
-	/* establish dummy device */
-	af_iucv_driver.bus = pr_iucv->bus;
-	err = driver_register(&af_iucv_driver);
-	if (err)
-		goto out_iucv;
-	af_iucv_dev = kzalloc(sizeof(struct device), GFP_KERNEL);
-	if (!af_iucv_dev) {
-		err = -ENOMEM;
-		goto out_driver;
-	}
-	dev_set_name(af_iucv_dev, "af_iucv");
-	af_iucv_dev->bus = pr_iucv->bus;
-	af_iucv_dev->parent = pr_iucv->root;
-	af_iucv_dev->release = (void (*)(struct device *))kfree;
-	af_iucv_dev->driver = &af_iucv_driver;
-	err = device_register(af_iucv_dev);
-	if (err)
-		goto out_iucv_dev;
-	return 0;
-
-out_iucv_dev:
-	put_device(af_iucv_dev);
-out_driver:
-	driver_unregister(&af_iucv_driver);
-out_iucv:
-	pr_iucv->iucv_unregister(&af_iucv_handler, 0);
-out:
-	return err;
+	return pr_iucv->iucv_register(&af_iucv_handler, 0);
 }
 
 static void afiucv_iucv_exit(void)
 {
-	device_unregister(af_iucv_dev);
-	driver_unregister(&af_iucv_driver);
 	pr_iucv->iucv_unregister(&af_iucv_handler, 0);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 0d1c7664ed2044048d5ed790b943affa296bc3a4 Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Tue, 19 May 2020 21:10:10 +0200
Subject: net/af_iucv: replace open-coded U16_MAX

Improve the readability of a range check.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/iucv/af_iucv.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 84bd18fea1d6..b02470a04c50 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -16,6 +16,7 @@
 #include <linux/module.h>
 #include <linux/netdevice.h>
 #include <linux/types.h>
+#include <linux/limits.h>
 #include <linux/list.h>
 #include <linux/errno.h>
 #include <linux/kernel.h>
@@ -1559,7 +1560,7 @@ static int iucv_sock_setsockopt(struct socket *sock, int level, int optname,
 		switch (sk->sk_state) {
 		case IUCV_OPEN:
 		case IUCV_BOUND:
-			if (val < 1 || val > (u16)(~0))
+			if (val < 1 || val > U16_MAX)
 				rc = -EINVAL;
 			else
 				iucv->msglimit = val;
-- 
cgit v1.2.3-59-g8ed1b


From dca1262f97499337489da7f5aa0bc6fec247a83f Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Tue, 19 May 2020 21:10:11 +0200
Subject: net/af_iucv: remove a redundant zero initialization

txmsg is declared as {0}, no need to clear individual fields later on.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/iucv/af_iucv.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index b02470a04c50..799dcf5483de 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -996,7 +996,6 @@ static int iucv_sock_sendmsg(struct socket *sock, struct msghdr *msg,
 
 	/* initialize defaults */
 	cmsg_done   = 0;	/* check for duplicate headers */
-	txmsg.class = 0;
 
 	/* iterate over control messages */
 	for_each_cmsghdr(cmsg, msg) {
-- 
cgit v1.2.3-59-g8ed1b


From e9a36ca5f6f302675e7e36101ffa0ca7f9b8779b Mon Sep 17 00:00:00 2001
From: Julian Wiedmann <jwi@linux.ibm.com>
Date: Tue, 19 May 2020 21:10:12 +0200
Subject: net/af_iucv: clean up function prototypes

Remove a bunch of forward declarations (trivially shifting code around
where needed), and make a few functions static.

Signed-off-by: Julian Wiedmann <jwi@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/iucv/af_iucv.h |   8 ----
 net/iucv/af_iucv.c         | 108 +++++++++++++++++++++------------------------
 2 files changed, 51 insertions(+), 65 deletions(-)

diff --git a/include/net/iucv/af_iucv.h b/include/net/iucv/af_iucv.h
index 14a490246be9..9259ce2b22f3 100644
--- a/include/net/iucv/af_iucv.h
+++ b/include/net/iucv/af_iucv.h
@@ -158,12 +158,4 @@ struct iucv_sock_list {
 	atomic_t	  autobind_name;
 };
 
-__poll_t iucv_sock_poll(struct file *file, struct socket *sock,
-			    poll_table *wait);
-void iucv_sock_link(struct iucv_sock_list *l, struct sock *s);
-void iucv_sock_unlink(struct iucv_sock_list *l, struct sock *s);
-void iucv_accept_enqueue(struct sock *parent, struct sock *sk);
-void iucv_accept_unlink(struct sock *sk);
-struct sock *iucv_accept_dequeue(struct sock *parent, struct socket *newsock);
-
 #endif /* __IUCV_H */
diff --git a/net/iucv/af_iucv.c b/net/iucv/af_iucv.c
index 799dcf5483de..ee0add15497d 100644
--- a/net/iucv/af_iucv.c
+++ b/net/iucv/af_iucv.c
@@ -37,8 +37,6 @@
 
 static char iucv_userid[80];
 
-static const struct proto_ops iucv_sock_ops;
-
 static struct proto iucv_proto = {
 	.name		= "AF_IUCV",
 	.owner		= THIS_MODULE,
@@ -86,13 +84,11 @@ do {									\
 	__ret;								\
 })
 
+static struct sock *iucv_accept_dequeue(struct sock *parent,
+					struct socket *newsock);
 static void iucv_sock_kill(struct sock *sk);
 static void iucv_sock_close(struct sock *sk);
 
-static int afiucv_hs_rcv(struct sk_buff *skb, struct net_device *dev,
-	struct packet_type *pt, struct net_device *orig_dev);
-static int afiucv_hs_send(struct iucv_message *imsg, struct sock *sock,
-		   struct sk_buff *skb, u8 flags);
 static void afiucv_hs_callback_txnotify(struct sk_buff *, enum iucv_tx_notify);
 
 /* Call Back functions */
@@ -331,6 +327,20 @@ static void iucv_sock_cleanup_listen(struct sock *parent)
 	parent->sk_state = IUCV_CLOSED;
 }
 
+static void iucv_sock_link(struct iucv_sock_list *l, struct sock *sk)
+{
+	write_lock_bh(&l->lock);
+	sk_add_node(sk, &l->head);
+	write_unlock_bh(&l->lock);
+}
+
+static void iucv_sock_unlink(struct iucv_sock_list *l, struct sock *sk)
+{
+	write_lock_bh(&l->lock);
+	sk_del_node_init(sk);
+	write_unlock_bh(&l->lock);
+}
+
 /* Kill socket (only if zapped and orphaned) */
 static void iucv_sock_kill(struct sock *sk)
 {
@@ -503,53 +513,7 @@ static struct sock *iucv_sock_alloc(struct socket *sock, int proto, gfp_t prio,
 	return sk;
 }
 
-/* Create an IUCV socket */
-static int iucv_sock_create(struct net *net, struct socket *sock, int protocol,
-			    int kern)
-{
-	struct sock *sk;
-
-	if (protocol && protocol != PF_IUCV)
-		return -EPROTONOSUPPORT;
-
-	sock->state = SS_UNCONNECTED;
-
-	switch (sock->type) {
-	case SOCK_STREAM:
-		sock->ops = &iucv_sock_ops;
-		break;
-	case SOCK_SEQPACKET:
-		/* currently, proto ops can handle both sk types */
-		sock->ops = &iucv_sock_ops;
-		break;
-	default:
-		return -ESOCKTNOSUPPORT;
-	}
-
-	sk = iucv_sock_alloc(sock, protocol, GFP_KERNEL, kern);
-	if (!sk)
-		return -ENOMEM;
-
-	iucv_sock_init(sk, NULL);
-
-	return 0;
-}
-
-void iucv_sock_link(struct iucv_sock_list *l, struct sock *sk)
-{
-	write_lock_bh(&l->lock);
-	sk_add_node(sk, &l->head);
-	write_unlock_bh(&l->lock);
-}
-
-void iucv_sock_unlink(struct iucv_sock_list *l, struct sock *sk)
-{
-	write_lock_bh(&l->lock);
-	sk_del_node_init(sk);
-	write_unlock_bh(&l->lock);
-}
-
-void iucv_accept_enqueue(struct sock *parent, struct sock *sk)
+static void iucv_accept_enqueue(struct sock *parent, struct sock *sk)
 {
 	unsigned long flags;
 	struct iucv_sock *par = iucv_sk(parent);
@@ -562,7 +526,7 @@ void iucv_accept_enqueue(struct sock *parent, struct sock *sk)
 	sk_acceptq_added(parent);
 }
 
-void iucv_accept_unlink(struct sock *sk)
+static void iucv_accept_unlink(struct sock *sk)
 {
 	unsigned long flags;
 	struct iucv_sock *par = iucv_sk(iucv_sk(sk)->parent);
@@ -575,7 +539,8 @@ void iucv_accept_unlink(struct sock *sk)
 	sock_put(sk);
 }
 
-struct sock *iucv_accept_dequeue(struct sock *parent, struct socket *newsock)
+static struct sock *iucv_accept_dequeue(struct sock *parent,
+					struct socket *newsock)
 {
 	struct iucv_sock *isk, *n;
 	struct sock *sk;
@@ -1406,8 +1371,8 @@ static inline __poll_t iucv_accept_poll(struct sock *parent)
 	return 0;
 }
 
-__poll_t iucv_sock_poll(struct file *file, struct socket *sock,
-			    poll_table *wait)
+static __poll_t iucv_sock_poll(struct file *file, struct socket *sock,
+			       poll_table *wait)
 {
 	struct sock *sk = sock->sk;
 	__poll_t mask = 0;
@@ -2291,6 +2256,35 @@ static const struct proto_ops iucv_sock_ops = {
 	.getsockopt	= iucv_sock_getsockopt,
 };
 
+static int iucv_sock_create(struct net *net, struct socket *sock, int protocol,
+			    int kern)
+{
+	struct sock *sk;
+
+	if (protocol && protocol != PF_IUCV)
+		return -EPROTONOSUPPORT;
+
+	sock->state = SS_UNCONNECTED;
+
+	switch (sock->type) {
+	case SOCK_STREAM:
+	case SOCK_SEQPACKET:
+		/* currently, proto ops can handle both sk types */
+		sock->ops = &iucv_sock_ops;
+		break;
+	default:
+		return -ESOCKTNOSUPPORT;
+	}
+
+	sk = iucv_sock_alloc(sock, protocol, GFP_KERNEL, kern);
+	if (!sk)
+		return -ENOMEM;
+
+	iucv_sock_init(sk, NULL);
+
+	return 0;
+}
+
 static const struct net_proto_family iucv_sock_family_ops = {
 	.family	= AF_IUCV,
 	.owner	= THIS_MODULE,
-- 
cgit v1.2.3-59-g8ed1b


From dda18a5c0b75461d1ed228f80b59c67434b8d601 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Tue, 19 May 2020 12:23:41 -0700
Subject: selftests/bpf: Convert bpf_iter_test_kern{3, 4}.c to define own
 bpf_iter_meta

b9f4c01f3e0b ("selftest/bpf: Make bpf_iter selftest compilable against old vmlinux.h")
missed the fact that bpf_iter_test_kern{3,4}.c are not just including
bpf_iter_test_kern_common.h and need similar bpf_iter_meta re-definition
explicitly.

Fixes: b9f4c01f3e0b ("selftest/bpf: Make bpf_iter selftest compilable against old vmlinux.h")
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200519192341.134360-1-andriin@fb.com
---
 tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c | 15 +++++++++++++++
 tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c | 15 +++++++++++++++
 2 files changed, 30 insertions(+)

diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c
index 636a00fa074d..13c2c90c835f 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern3.c
@@ -1,10 +1,25 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2020 Facebook */
+#define bpf_iter_meta bpf_iter_meta___not_used
+#define bpf_iter__task bpf_iter__task___not_used
 #include "vmlinux.h"
+#undef bpf_iter_meta
+#undef bpf_iter__task
 #include <bpf/bpf_helpers.h>
 
 char _license[] SEC("license") = "GPL";
 
+struct bpf_iter_meta {
+	struct seq_file *seq;
+	__u64 session_id;
+	__u64 seq_num;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__task {
+	struct bpf_iter_meta *meta;
+	struct task_struct *task;
+} __attribute__((preserve_access_index));
+
 SEC("iter/task")
 int dump_task(struct bpf_iter__task *ctx)
 {
diff --git a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
index b18dc0471d07..0aa71b333cf3 100644
--- a/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
+++ b/tools/testing/selftests/bpf/progs/bpf_iter_test_kern4.c
@@ -1,10 +1,25 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2020 Facebook */
+#define bpf_iter_meta bpf_iter_meta___not_used
+#define bpf_iter__bpf_map bpf_iter__bpf_map___not_used
 #include "vmlinux.h"
+#undef bpf_iter_meta
+#undef bpf_iter__bpf_map
 #include <bpf/bpf_helpers.h>
 
 char _license[] SEC("license") = "GPL";
 
+struct bpf_iter_meta {
+	struct seq_file *seq;
+	__u64 session_id;
+	__u64 seq_num;
+} __attribute__((preserve_access_index));
+
+struct bpf_iter__bpf_map {
+	struct bpf_iter_meta *meta;
+	struct bpf_map *map;
+} __attribute__((preserve_access_index));
+
 __u32 map1_id = 0, map2_id = 0;
 __u32 map1_accessed = 0, map2_accessed = 0;
 __u64 map1_seqnum = 0, map2_seqnum1 = 0, map2_seqnum2 = 0;
-- 
cgit v1.2.3-59-g8ed1b


From 472abd3240918278d2da817e064e9b6c8fb4faf2 Mon Sep 17 00:00:00 2001
From: Sasha Neftin <sasha.neftin@intel.com>
Date: Wed, 1 Apr 2020 11:43:20 +0300
Subject: igc: Remove PCIe Control register

GCR (PCIe Control) register not in use and should be removed
This patch clean up this register

Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_regs.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h
index 5a6110e211fd..0f94285ddc11 100644
--- a/drivers/net/ethernet/intel/igc/igc_regs.h
+++ b/drivers/net/ethernet/intel/igc/igc_regs.h
@@ -36,9 +36,6 @@
 #define IGC_FCRTH		0x02168  /* FC Receive Threshold High - RW */
 #define IGC_FCRTV		0x02460  /* FC Refresh Timer Value - RW */
 
-/* PCIe Register Description */
-#define IGC_GCR			0x05B00  /* PCIe control- RW */
-
 /* Semaphore registers */
 #define IGC_SW_FW_SYNC		0x05B5C  /* SW-FW Synchronization - RW */
 #define IGC_SWSM		0x05B50  /* SW Semaphore */
-- 
cgit v1.2.3-59-g8ed1b


From 3c215fb18e700344d96f3bde606b4492c70eaf56 Mon Sep 17 00:00:00 2001
From: Vitaly Lifshits <vitaly.lifshits@intel.com>
Date: Wed, 1 Apr 2020 12:16:44 +0300
Subject: igc: remove IGC_REMOVED function

igc driver has leftovers from the previous device that supported
Virtualization. This can be found in the function IGC_REMOVED which
became obsolete, and can be removed.

Signed-off-by: Vitaly Lifshits <vitaly.lifshits@intel.com>
Acked-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_mac.h  | 4 ----
 drivers/net/ethernet/intel/igc/igc_main.c | 3 ---
 drivers/net/ethernet/intel/igc/igc_regs.h | 3 +--
 3 files changed, 1 insertion(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_mac.h b/drivers/net/ethernet/intel/igc/igc_mac.h
index 832cccec87cd..b5963f86defb 100644
--- a/drivers/net/ethernet/intel/igc/igc_mac.h
+++ b/drivers/net/ethernet/intel/igc/igc_mac.h
@@ -8,10 +8,6 @@
 #include "igc_phy.h"
 #include "igc_defines.h"
 
-#ifndef IGC_REMOVED
-#define IGC_REMOVED(a) (0)
-#endif /* IGC_REMOVED */
-
 /* forward declaration */
 s32 igc_disable_pcie_master(struct igc_hw *hw);
 s32 igc_check_for_copper_link(struct igc_hw *hw);
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 0ae3590a50eb..125026d053eb 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -4659,9 +4659,6 @@ u32 igc_rd32(struct igc_hw *hw, u32 reg)
 	u8 __iomem *hw_addr = READ_ONCE(hw->hw_addr);
 	u32 value = 0;
 
-	if (IGC_REMOVED(hw_addr))
-		return ~value;
-
 	value = readl(&hw_addr[reg]);
 
 	/* reads should not return all F's */
diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h
index 0f94285ddc11..f101bfbf52e6 100644
--- a/drivers/net/ethernet/intel/igc/igc_regs.h
+++ b/drivers/net/ethernet/intel/igc/igc_regs.h
@@ -275,8 +275,7 @@ u32 igc_rd32(struct igc_hw *hw, u32 reg);
 #define wr32(reg, val) \
 do { \
 	u8 __iomem *hw_addr = READ_ONCE((hw)->hw_addr); \
-	if (!IGC_REMOVED(hw_addr)) \
-		writel((val), &hw_addr[(reg)]); \
+	writel((val), &hw_addr[(reg)]); \
 } while (0)
 
 #define rd32(reg) (igc_rd32(hw, reg))
-- 
cgit v1.2.3-59-g8ed1b


From 65b9ee1b928f2b679319c547c52986b8869855df Mon Sep 17 00:00:00 2001
From: Sasha Neftin <sasha.neftin@intel.com>
Date: Wed, 1 Apr 2020 14:38:44 +0300
Subject: igc: Clean up obsolete NVM defines

Packet buffer allocation, reserved word and pointer guard
not applicable for i225 parts.
This patch comes to clean up these obsolete defines

Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_defines.h | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
index 54a7941bdb48..0ecc63d423b8 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -163,11 +163,6 @@
 
 /* For checksumming, the sum of all words in the NVM should equal 0xBABA. */
 #define NVM_SUM				0xBABA
-
-#define NVM_PBA_OFFSET_0		8
-#define NVM_PBA_OFFSET_1		9
-#define NVM_RESERVED_WORD		0xFFFF
-#define NVM_PBA_PTR_GUARD		0xFAFA
 #define NVM_WORD_SIZE_BASE_SHIFT	6
 
 /* Collision related configuration parameters */
-- 
cgit v1.2.3-59-g8ed1b


From bbfaa141d211c86441f7e91454e0a306fb5296de Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Fri, 3 Apr 2020 11:17:40 -0700
Subject: igc: Rename IGC_VLAPQF macro

This patch renames the IGC_VLAPQF macro to IGC_VLANPQF as well as
related macros so they match the register name and fields described in
the data sheet.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_defines.h |  6 +++---
 drivers/net/ethernet/intel/igc/igc_ethtool.c | 23 +++++++++++------------
 drivers/net/ethernet/intel/igc/igc_regs.h    |  2 +-
 3 files changed, 15 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
index 0ecc63d423b8..f1bb5414f99f 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -508,9 +508,9 @@
 #define IGC_MAX_MAC_HDR_LEN	127
 #define IGC_MAX_NETWORK_HDR_LEN	511
 
-#define IGC_VLAPQF_QUEUE_SEL(_n, q_idx) ((q_idx) << ((_n) * 4))
-#define IGC_VLAPQF_P_VALID(_n)	(0x1 << (3 + (_n) * 4))
-#define IGC_VLAPQF_QUEUE_MASK	0x03
+#define IGC_VLANPQF_QSEL(_n, q_idx) ((q_idx) << ((_n) * 4))
+#define IGC_VLANPQF_VALID(_n)	(0x1 << (3 + (_n) * 4))
+#define IGC_VLANPQF_QUEUE_MASK	0x03
 
 #define IGC_ADVTXD_MACLEN_SHIFT		9  /* Adv ctxt desc mac len shift */
 #define IGC_ADVTXD_TUCMD_IPV4		0x00000400  /* IP Packet Type:1=IPv4 */
diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index a05d7abee524..c21971b40cb2 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -1229,23 +1229,23 @@ static int igc_rxnfc_write_vlan_prio_filter(struct igc_adapter *adapter,
 	u16 queue_index;
 	u32 vlapqf;
 
-	vlapqf = rd32(IGC_VLAPQF);
+	vlapqf = rd32(IGC_VLANPQF);
 	vlan_priority = (ntohs(input->filter.vlan_tci) & VLAN_PRIO_MASK)
 				>> VLAN_PRIO_SHIFT;
-	queue_index = (vlapqf >> (vlan_priority * 4)) & IGC_VLAPQF_QUEUE_MASK;
+	queue_index = (vlapqf >> (vlan_priority * 4)) & IGC_VLANPQF_QUEUE_MASK;
 
-	/* check whether this vlan prio is already set */
-	if (vlapqf & IGC_VLAPQF_P_VALID(vlan_priority) &&
+	/* check whether this VLAN prio is already set */
+	if (vlapqf & IGC_VLANPQF_VALID(vlan_priority) &&
 	    queue_index != input->action) {
 		netdev_err(adapter->netdev,
 			   "ethtool rxnfc set VLAN prio filter failed\n");
 		return -EEXIST;
 	}
 
-	vlapqf |= IGC_VLAPQF_P_VALID(vlan_priority);
-	vlapqf |= IGC_VLAPQF_QUEUE_SEL(vlan_priority, input->action);
+	vlapqf |= IGC_VLANPQF_VALID(vlan_priority);
+	vlapqf |= IGC_VLANPQF_QSEL(vlan_priority, input->action);
 
-	wr32(IGC_VLAPQF, vlapqf);
+	wr32(IGC_VLANPQF, vlapqf);
 
 	return 0;
 }
@@ -1313,12 +1313,11 @@ static void igc_clear_vlan_prio_filter(struct igc_adapter *adapter,
 
 	vlan_priority = (vlan_tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
 
-	vlapqf = rd32(IGC_VLAPQF);
-	vlapqf &= ~IGC_VLAPQF_P_VALID(vlan_priority);
-	vlapqf &= ~IGC_VLAPQF_QUEUE_SEL(vlan_priority,
-						IGC_VLAPQF_QUEUE_MASK);
+	vlapqf = rd32(IGC_VLANPQF);
+	vlapqf &= ~IGC_VLANPQF_VALID(vlan_priority);
+	vlapqf &= ~IGC_VLANPQF_QSEL(vlan_priority, IGC_VLANPQF_QUEUE_MASK);
 
-	wr32(IGC_VLAPQF, vlapqf);
+	wr32(IGC_VLANPQF, vlapqf);
 }
 
 int igc_erase_filter(struct igc_adapter *adapter, struct igc_nfc_filter *input)
diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h
index f101bfbf52e6..851ff19af703 100644
--- a/drivers/net/ethernet/intel/igc/igc_regs.h
+++ b/drivers/net/ethernet/intel/igc/igc_regs.h
@@ -120,7 +120,7 @@
 #define IGC_UTA			0x0A000  /* Unicast Table Array - RW */
 #define IGC_RAL(_n)		(0x05400 + ((_n) * 0x08))
 #define IGC_RAH(_n)		(0x05404 + ((_n) * 0x08))
-#define IGC_VLAPQF		0x055B0  /* VLAN Priority Queue Filter VLAPQF */
+#define IGC_VLANPQF		0x055B0  /* VLAN Priority Queue Filter - RW */
 
 /* Transmit Register Descriptions */
 #define IGC_TCTL		0x00400  /* Tx Control - RW */
-- 
cgit v1.2.3-59-g8ed1b


From fbee4760ec4fc9f885ad0737f7b371cc6b73f573 Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Fri, 3 Apr 2020 11:17:41 -0700
Subject: igc: Dump VLANPQF register

This patch adds the VLAN Priority Queue Filter Register (VLANPQF) to the
list of registers dumped by igc_get_regs().

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_ethtool.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index c21971b40cb2..19da9dc8dafb 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -316,6 +316,8 @@ static void igc_get_regs(struct net_device *netdev,
 		regs_buff[172 + i] = rd32(IGC_RAL(i));
 	for (i = 0; i < 16; i++)
 		regs_buff[188 + i] = rd32(IGC_RAH(i));
+
+	regs_buff[204] = rd32(IGC_VLANPQF);
 }
 
 static void igc_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
-- 
cgit v1.2.3-59-g8ed1b


From 2e4f1716f31f3486fa454fd3174f3de982b1dcaa Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Fri, 3 Apr 2020 11:17:42 -0700
Subject: igc: Return -EOPNOTSUPP when VLAN mask doesn't match

The I225 controller supports Rx queue assignment based on VLAN priority
only. Other Tag Control Information (TCI) are valid, but not supported
by the driver. So this patch changes the returning code from igc_add_
ethtool_nfc_entry() to -EOPNOTSUPP in order to provide more meaningful
information on why the function failed.

It also adds a debug messages to give the user a hint about what went
wrong with the NFC setup.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_ethtool.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index 19da9dc8dafb..f28f7feb39a5 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -1446,7 +1446,8 @@ static int igc_add_ethtool_nfc_entry(struct igc_adapter *adapter,
 
 	if ((fsp->flow_type & FLOW_EXT) && fsp->m_ext.vlan_tci) {
 		if (fsp->m_ext.vlan_tci != htons(VLAN_PRIO_MASK)) {
-			err = -EINVAL;
+			netdev_dbg(netdev, "VLAN mask not supported\n");
+			err = -EOPNOTSUPP;
 			goto err_out;
 		}
 		input->filter.vlan_tci = fsp->h_ext.vlan_tci;
-- 
cgit v1.2.3-59-g8ed1b


From e09303d3c4d9d9f71657550d61eb00bc84c13859 Mon Sep 17 00:00:00 2001
From: Louis Peens <louis.peens@netronome.com>
Date: Tue, 19 May 2020 16:15:01 +0200
Subject: nfp: flower: renaming of feature bits

Clean up name aliasing. Some features gets enabled using a slightly
different method, but the bitmap for these were stored in the same
field. Rename their #defines and move the bitmap to a new variable.

Signed-off-by: Louis Peens <louis.peens@netronome.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/netronome/nfp/flower/action.c |  4 ++--
 drivers/net/ethernet/netronome/nfp/flower/cmsg.c   |  4 ++--
 drivers/net/ethernet/netronome/nfp/flower/main.c   | 14 +++++++-------
 drivers/net/ethernet/netronome/nfp/flower/main.h   |  9 ++++++---
 4 files changed, 17 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/action.c b/drivers/net/ethernet/netronome/nfp/flower/action.c
index 1c76e1592ca2..ff844e5cc41f 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/action.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/action.c
@@ -209,7 +209,7 @@ nfp_fl_output(struct nfp_app *app, struct nfp_fl_output *output,
 					    NFP_FL_OUT_FLAGS_USE_TUN);
 		output->port = cpu_to_be32(NFP_FL_PORT_TYPE_TUN | tun_type);
 	} else if (netif_is_lag_master(out_dev) &&
-		   priv->flower_ext_feats & NFP_FL_FEATS_LAG) {
+		   priv->flower_en_feats & NFP_FL_ENABLE_LAG) {
 		int gid;
 
 		output->flags = cpu_to_be16(tmp_flags);
@@ -956,7 +956,7 @@ nfp_flower_output_action(struct nfp_app *app,
 
 	*a_len += sizeof(struct nfp_fl_output);
 
-	if (priv->flower_ext_feats & NFP_FL_FEATS_LAG) {
+	if (priv->flower_en_feats & NFP_FL_ENABLE_LAG) {
 		/* nfp_fl_pre_lag returns -err or size of prelag action added.
 		 * This will be 0 if it is not egressing to a lag dev.
 		 */
diff --git a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
index a595ddb92bff..a050cb898782 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/cmsg.c
@@ -264,7 +264,7 @@ nfp_flower_cmsg_process_one_rx(struct nfp_app *app, struct sk_buff *skb)
 		nfp_flower_cmsg_portmod_rx(app, skb);
 		break;
 	case NFP_FLOWER_CMSG_TYPE_MERGE_HINT:
-		if (app_priv->flower_ext_feats & NFP_FL_FEATS_FLOW_MERGE) {
+		if (app_priv->flower_en_feats & NFP_FL_ENABLE_FLOW_MERGE) {
 			nfp_flower_cmsg_merge_hint_rx(app, skb);
 			break;
 		}
@@ -285,7 +285,7 @@ nfp_flower_cmsg_process_one_rx(struct nfp_app *app, struct sk_buff *skb)
 		nfp_flower_stats_rlim_reply(app, skb);
 		break;
 	case NFP_FLOWER_CMSG_TYPE_LAG_CONFIG:
-		if (app_priv->flower_ext_feats & NFP_FL_FEATS_LAG) {
+		if (app_priv->flower_en_feats & NFP_FL_ENABLE_LAG) {
 			skb_stored = nfp_flower_lag_unprocessed_msg(app, skb);
 			break;
 		}
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c b/drivers/net/ethernet/netronome/nfp/flower/main.c
index d8ad9346a26a..62c202307940 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.c
@@ -759,7 +759,7 @@ static int nfp_flower_init(struct nfp_app *app)
 	err = nfp_rtsym_write_le(app->pf->rtbl,
 				 "_abi_flower_balance_sync_enable", 1);
 	if (!err) {
-		app_priv->flower_ext_feats |= NFP_FL_FEATS_LAG;
+		app_priv->flower_ext_feats |= NFP_FL_ENABLE_LAG;
 		nfp_flower_lag_init(&app_priv->nfp_lag);
 	} else if (err == -ENOENT) {
 		nfp_warn(app->cpp, "LAG not supported by FW.\n");
@@ -772,7 +772,7 @@ static int nfp_flower_init(struct nfp_app *app)
 		err = nfp_rtsym_write_le(app->pf->rtbl,
 					 "_abi_flower_merge_hint_enable", 1);
 		if (!err) {
-			app_priv->flower_ext_feats |= NFP_FL_FEATS_FLOW_MERGE;
+			app_priv->flower_ext_feats |= NFP_FL_ENABLE_FLOW_MERGE;
 			nfp_flower_internal_port_init(app_priv);
 		} else if (err == -ENOENT) {
 			nfp_warn(app->cpp, "Flow merge not supported by FW.\n");
@@ -793,7 +793,7 @@ static int nfp_flower_init(struct nfp_app *app)
 	return 0;
 
 err_lag_clean:
-	if (app_priv->flower_ext_feats & NFP_FL_FEATS_LAG)
+	if (app_priv->flower_ext_feats & NFP_FL_ENABLE_LAG)
 		nfp_flower_lag_cleanup(&app_priv->nfp_lag);
 err_cleanup_metadata:
 	nfp_flower_metadata_cleanup(app);
@@ -813,10 +813,10 @@ static void nfp_flower_clean(struct nfp_app *app)
 	if (app_priv->flower_ext_feats & NFP_FL_FEATS_VF_RLIM)
 		nfp_flower_qos_cleanup(app);
 
-	if (app_priv->flower_ext_feats & NFP_FL_FEATS_LAG)
+	if (app_priv->flower_en_feats & NFP_FL_ENABLE_LAG)
 		nfp_flower_lag_cleanup(&app_priv->nfp_lag);
 
-	if (app_priv->flower_ext_feats & NFP_FL_FEATS_FLOW_MERGE)
+	if (app_priv->flower_en_feats & NFP_FL_ENABLE_FLOW_MERGE)
 		nfp_flower_internal_port_cleanup(app_priv);
 
 	nfp_flower_metadata_cleanup(app);
@@ -886,7 +886,7 @@ static int nfp_flower_start(struct nfp_app *app)
 	struct nfp_flower_priv *app_priv = app->priv;
 	int err;
 
-	if (app_priv->flower_ext_feats & NFP_FL_FEATS_LAG) {
+	if (app_priv->flower_en_feats & NFP_FL_ENABLE_LAG) {
 		err = nfp_flower_lag_reset(&app_priv->nfp_lag);
 		if (err)
 			return err;
@@ -907,7 +907,7 @@ nfp_flower_netdev_event(struct nfp_app *app, struct net_device *netdev,
 	struct nfp_flower_priv *app_priv = app->priv;
 	int ret;
 
-	if (app_priv->flower_ext_feats & NFP_FL_FEATS_LAG) {
+	if (app_priv->flower_en_feats & NFP_FL_ENABLE_LAG) {
 		ret = nfp_flower_lag_netdev_event(app_priv, netdev, event, ptr);
 		if (ret & NOTIFY_STOP_MASK)
 			return ret;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h
index d55d0d33bc45..7db3be0b17e9 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.h
@@ -44,8 +44,9 @@ struct nfp_app;
 #define NFP_FL_FEATS_FLOW_MOD		BIT(5)
 #define NFP_FL_FEATS_PRE_TUN_RULES	BIT(6)
 #define NFP_FL_FEATS_IPV6_TUN		BIT(7)
-#define NFP_FL_FEATS_FLOW_MERGE		BIT(30)
-#define NFP_FL_FEATS_LAG		BIT(31)
+
+#define NFP_FL_ENABLE_FLOW_MERGE	BIT(0)
+#define NFP_FL_ENABLE_LAG		BIT(1)
 
 struct nfp_fl_mask_id {
 	struct circ_buf mask_id_free_list;
@@ -145,6 +146,7 @@ struct nfp_fl_internal_ports {
  * @mask_id_seed:	Seed used for mask hash table
  * @flower_version:	HW version of flower
  * @flower_ext_feats:	Bitmap of extra features the HW supports
+ * @flower_en_feats:	Bitmap of features enabled by HW
  * @stats_ids:		List of free stats ids
  * @mask_ids:		List of free mask ids
  * @mask_table:		Hash table used to store masks
@@ -180,6 +182,7 @@ struct nfp_flower_priv {
 	u32 mask_id_seed;
 	u64 flower_version;
 	u64 flower_ext_feats;
+	u8 flower_en_feats;
 	struct nfp_fl_stats_id stats_ids;
 	struct nfp_fl_mask_id mask_ids;
 	DECLARE_HASHTABLE(mask_table, NFP_FLOWER_MASK_HASH_BITS);
@@ -346,7 +349,7 @@ nfp_flower_internal_port_can_offload(struct nfp_app *app,
 {
 	struct nfp_flower_priv *app_priv = app->priv;
 
-	if (!(app_priv->flower_ext_feats & NFP_FL_FEATS_FLOW_MERGE))
+	if (!(app_priv->flower_en_feats & NFP_FL_ENABLE_FLOW_MERGE))
 		return false;
 	if (!netdev->rtnl_link_ops)
 		return false;
-- 
cgit v1.2.3-59-g8ed1b


From 465957c257f3083785fe8e954724cfac2e5d33e9 Mon Sep 17 00:00:00 2001
From: Louis Peens <louis.peens@netronome.com>
Date: Tue, 19 May 2020 16:15:02 +0200
Subject: nfp: flower: inform firmware of flower features

For backwards compatibility it may be required for the firmware to
disable certain features depending on the features supported by
the host. Combine the host feature bits and firmware feature bits
and write this back to the firmware.

Signed-off-by: Louis Peens <louis.peens@netronome.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/netronome/nfp/flower/main.c | 106 ++++++++++++++++-------
 drivers/net/ethernet/netronome/nfp/flower/main.h |  11 +++
 2 files changed, 87 insertions(+), 30 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c b/drivers/net/ethernet/netronome/nfp/flower/main.c
index 62c202307940..d054553c75e0 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.c
@@ -665,6 +665,77 @@ err_clear_nn:
 	return err;
 }
 
+static void nfp_flower_wait_host_bit(struct nfp_app *app)
+{
+	unsigned long err_at;
+	u64 feat;
+	int err;
+
+	/* Wait for HOST_ACK flag bit to propagate */
+	err_at = jiffies + msecs_to_jiffies(100);
+	do {
+		feat = nfp_rtsym_read_le(app->pf->rtbl,
+					 "_abi_flower_combined_features_global",
+					 &err);
+		if (time_is_before_eq_jiffies(err_at)) {
+			nfp_warn(app->cpp,
+				 "HOST_ACK bit not propagated in FW.\n");
+			break;
+		}
+		usleep_range(1000, 2000);
+	} while (!err && !(feat & NFP_FL_FEATS_HOST_ACK));
+
+	if (err)
+		nfp_warn(app->cpp,
+			 "Could not read global features entry from FW\n");
+}
+
+static int nfp_flower_sync_feature_bits(struct nfp_app *app)
+{
+	struct nfp_flower_priv *app_priv = app->priv;
+	int err;
+
+	/* Tell the firmware of the host supported features. */
+	err = nfp_rtsym_write_le(app->pf->rtbl, "_abi_flower_host_mask",
+				 app_priv->flower_ext_feats |
+				 NFP_FL_FEATS_HOST_ACK);
+	if (!err)
+		nfp_flower_wait_host_bit(app);
+	else if (err != -ENOENT)
+		return err;
+
+	/* Tell the firmware that the driver supports lag. */
+	err = nfp_rtsym_write_le(app->pf->rtbl,
+				 "_abi_flower_balance_sync_enable", 1);
+	if (!err) {
+		app_priv->flower_ext_feats |= NFP_FL_ENABLE_LAG;
+		nfp_flower_lag_init(&app_priv->nfp_lag);
+	} else if (err == -ENOENT) {
+		nfp_warn(app->cpp, "LAG not supported by FW.\n");
+	} else {
+		return err;
+	}
+
+	if (app_priv->flower_ext_feats & NFP_FL_FEATS_FLOW_MOD) {
+		/* Tell the firmware that the driver supports flow merging. */
+		err = nfp_rtsym_write_le(app->pf->rtbl,
+					 "_abi_flower_merge_hint_enable", 1);
+		if (!err) {
+			app_priv->flower_ext_feats |= NFP_FL_ENABLE_FLOW_MERGE;
+			nfp_flower_internal_port_init(app_priv);
+		} else if (err == -ENOENT) {
+			nfp_warn(app->cpp,
+				 "Flow merge not supported by FW.\n");
+		} else {
+			return err;
+		}
+	} else {
+		nfp_warn(app->cpp, "Flow mod/merge not supported by FW.\n");
+	}
+
+	return 0;
+}
+
 static int nfp_flower_init(struct nfp_app *app)
 {
 	u64 version, features, ctx_count, num_mems;
@@ -753,35 +824,11 @@ static int nfp_flower_init(struct nfp_app *app)
 	if (err)
 		app_priv->flower_ext_feats = 0;
 	else
-		app_priv->flower_ext_feats = features;
+		app_priv->flower_ext_feats = features & NFP_FL_FEATS_HOST;
 
-	/* Tell the firmware that the driver supports lag. */
-	err = nfp_rtsym_write_le(app->pf->rtbl,
-				 "_abi_flower_balance_sync_enable", 1);
-	if (!err) {
-		app_priv->flower_ext_feats |= NFP_FL_ENABLE_LAG;
-		nfp_flower_lag_init(&app_priv->nfp_lag);
-	} else if (err == -ENOENT) {
-		nfp_warn(app->cpp, "LAG not supported by FW.\n");
-	} else {
-		goto err_cleanup_metadata;
-	}
-
-	if (app_priv->flower_ext_feats & NFP_FL_FEATS_FLOW_MOD) {
-		/* Tell the firmware that the driver supports flow merging. */
-		err = nfp_rtsym_write_le(app->pf->rtbl,
-					 "_abi_flower_merge_hint_enable", 1);
-		if (!err) {
-			app_priv->flower_ext_feats |= NFP_FL_ENABLE_FLOW_MERGE;
-			nfp_flower_internal_port_init(app_priv);
-		} else if (err == -ENOENT) {
-			nfp_warn(app->cpp, "Flow merge not supported by FW.\n");
-		} else {
-			goto err_lag_clean;
-		}
-	} else {
-		nfp_warn(app->cpp, "Flow mod/merge not supported by FW.\n");
-	}
+	err = nfp_flower_sync_feature_bits(app);
+	if (err)
+		goto err_cleanup;
 
 	if (app_priv->flower_ext_feats & NFP_FL_FEATS_VF_RLIM)
 		nfp_flower_qos_init(app);
@@ -792,10 +839,9 @@ static int nfp_flower_init(struct nfp_app *app)
 
 	return 0;
 
-err_lag_clean:
+err_cleanup:
 	if (app_priv->flower_ext_feats & NFP_FL_ENABLE_LAG)
 		nfp_flower_lag_cleanup(&app_priv->nfp_lag);
-err_cleanup_metadata:
 	nfp_flower_metadata_cleanup(app);
 err_free_app_priv:
 	vfree(app->priv);
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h
index 7db3be0b17e9..59abea2a39ad 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.h
@@ -44,10 +44,21 @@ struct nfp_app;
 #define NFP_FL_FEATS_FLOW_MOD		BIT(5)
 #define NFP_FL_FEATS_PRE_TUN_RULES	BIT(6)
 #define NFP_FL_FEATS_IPV6_TUN		BIT(7)
+#define NFP_FL_FEATS_HOST_ACK		BIT(31)
 
 #define NFP_FL_ENABLE_FLOW_MERGE	BIT(0)
 #define NFP_FL_ENABLE_LAG		BIT(1)
 
+#define NFP_FL_FEATS_HOST \
+	(NFP_FL_FEATS_GENEVE | \
+	NFP_FL_NBI_MTU_SETTING | \
+	NFP_FL_FEATS_GENEVE_OPT | \
+	NFP_FL_FEATS_VLAN_PCP | \
+	NFP_FL_FEATS_VF_RLIM | \
+	NFP_FL_FEATS_FLOW_MOD | \
+	NFP_FL_FEATS_PRE_TUN_RULES | \
+	NFP_FL_FEATS_IPV6_TUN)
+
 struct nfp_fl_mask_id {
 	struct circ_buf mask_id_free_list;
 	ktime_t *last_used;
-- 
cgit v1.2.3-59-g8ed1b


From 12ddee68d058792602d017ea940a69fe6969edbe Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Thu, 23 Apr 2020 18:11:19 -0700
Subject: igc: Refactor VLAN priority filtering code

The whole VLAN priority filtering code is implemented in igc_ethtool.c
and mixes logic from ethtool and core parts. This patch refactors it so
core logic is moved to igc_main.c, aligning the VLAN priority filtering
code organization with the MAC address filtering code.

This patch also takes the opportunity to add some log messages to ease
debugging.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc.h         |  3 ++
 drivers/net/ethernet/intel/igc/igc_ethtool.c | 64 ++++++----------------------
 drivers/net/ethernet/intel/igc/igc_main.c    | 52 ++++++++++++++++++++++
 3 files changed, 68 insertions(+), 51 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index 661dc8875f3f..5f1e1d31e832 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -235,6 +235,9 @@ int igc_add_mac_filter(struct igc_adapter *adapter, const u8 *addr,
 		       const s8 queue, const u8 flags);
 int igc_del_mac_filter(struct igc_adapter *adapter, const u8 *addr,
 		       const u8 flags);
+int igc_add_vlan_prio_filter(struct igc_adapter *adapter, int prio,
+			     int queue);
+void igc_del_vlan_prio_filter(struct igc_adapter *adapter, int prio);
 void igc_update_stats(struct igc_adapter *adapter);
 
 /* igc_dump declarations */
diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index f28f7feb39a5..c5be8b936963 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -1223,35 +1223,6 @@ static int igc_rxnfc_write_etype_filter(struct igc_adapter *adapter,
 	return 0;
 }
 
-static int igc_rxnfc_write_vlan_prio_filter(struct igc_adapter *adapter,
-					    struct igc_nfc_filter *input)
-{
-	struct igc_hw *hw = &adapter->hw;
-	u8 vlan_priority;
-	u16 queue_index;
-	u32 vlapqf;
-
-	vlapqf = rd32(IGC_VLANPQF);
-	vlan_priority = (ntohs(input->filter.vlan_tci) & VLAN_PRIO_MASK)
-				>> VLAN_PRIO_SHIFT;
-	queue_index = (vlapqf >> (vlan_priority * 4)) & IGC_VLANPQF_QUEUE_MASK;
-
-	/* check whether this VLAN prio is already set */
-	if (vlapqf & IGC_VLANPQF_VALID(vlan_priority) &&
-	    queue_index != input->action) {
-		netdev_err(adapter->netdev,
-			   "ethtool rxnfc set VLAN prio filter failed\n");
-		return -EEXIST;
-	}
-
-	vlapqf |= IGC_VLANPQF_VALID(vlan_priority);
-	vlapqf |= IGC_VLANPQF_QSEL(vlan_priority, input->action);
-
-	wr32(IGC_VLANPQF, vlapqf);
-
-	return 0;
-}
-
 int igc_add_filter(struct igc_adapter *adapter, struct igc_nfc_filter *input)
 {
 	struct igc_hw *hw = &adapter->hw;
@@ -1285,10 +1256,15 @@ int igc_add_filter(struct igc_adapter *adapter, struct igc_nfc_filter *input)
 			return err;
 	}
 
-	if (input->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI)
-		err = igc_rxnfc_write_vlan_prio_filter(adapter, input);
+	if (input->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) {
+		int prio = (ntohs(input->filter.vlan_tci) & VLAN_PRIO_MASK) >>
+			   VLAN_PRIO_SHIFT;
+		err = igc_add_vlan_prio_filter(adapter, prio, input->action);
+		if (err)
+			return err;
+	}
 
-	return err;
+	return 0;
 }
 
 static void igc_clear_etype_filter_regs(struct igc_adapter *adapter,
@@ -1306,31 +1282,17 @@ static void igc_clear_etype_filter_regs(struct igc_adapter *adapter,
 	adapter->etype_bitmap[reg_index] = false;
 }
 
-static void igc_clear_vlan_prio_filter(struct igc_adapter *adapter,
-				       u16 vlan_tci)
-{
-	struct igc_hw *hw = &adapter->hw;
-	u8 vlan_priority;
-	u32 vlapqf;
-
-	vlan_priority = (vlan_tci & VLAN_PRIO_MASK) >> VLAN_PRIO_SHIFT;
-
-	vlapqf = rd32(IGC_VLANPQF);
-	vlapqf &= ~IGC_VLANPQF_VALID(vlan_priority);
-	vlapqf &= ~IGC_VLANPQF_QSEL(vlan_priority, IGC_VLANPQF_QUEUE_MASK);
-
-	wr32(IGC_VLANPQF, vlapqf);
-}
-
 int igc_erase_filter(struct igc_adapter *adapter, struct igc_nfc_filter *input)
 {
 	if (input->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE)
 		igc_clear_etype_filter_regs(adapter,
 					    input->etype_reg_index);
 
-	if (input->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI)
-		igc_clear_vlan_prio_filter(adapter,
-					   ntohs(input->filter.vlan_tci));
+	if (input->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) {
+		int prio = (ntohs(input->filter.vlan_tci) & VLAN_PRIO_MASK) >>
+			   VLAN_PRIO_SHIFT;
+		igc_del_vlan_prio_filter(adapter, prio);
+	}
 
 	if (input->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR)
 		igc_del_mac_filter(adapter, input->filter.src_addr,
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 125026d053eb..7e59c0393dbc 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -2314,6 +2314,58 @@ int igc_del_mac_filter(struct igc_adapter *adapter, const u8 *addr,
 	return 0;
 }
 
+/**
+ * igc_add_vlan_prio_filter() - Add VLAN priority filter
+ * @adapter: Pointer to adapter where the filter should be added
+ * @prio: VLAN priority value
+ * @queue: Queue number which matching frames are assigned to
+ *
+ * Return: 0 in case of success, negative errno code otherwise.
+ */
+int igc_add_vlan_prio_filter(struct igc_adapter *adapter, int prio, int queue)
+{
+	struct net_device *dev = adapter->netdev;
+	struct igc_hw *hw = &adapter->hw;
+	u32 vlanpqf;
+
+	vlanpqf = rd32(IGC_VLANPQF);
+
+	if (vlanpqf & IGC_VLANPQF_VALID(prio)) {
+		netdev_dbg(dev, "VLAN priority filter already in use\n");
+		return -EEXIST;
+	}
+
+	vlanpqf |= IGC_VLANPQF_QSEL(prio, queue);
+	vlanpqf |= IGC_VLANPQF_VALID(prio);
+
+	wr32(IGC_VLANPQF, vlanpqf);
+
+	netdev_dbg(dev, "Add VLAN priority filter: prio %d queue %d\n",
+		   prio, queue);
+	return 0;
+}
+
+/**
+ * igc_del_vlan_prio_filter() - Delete VLAN priority filter
+ * @adapter: Pointer to adapter where the filter should be deleted from
+ * @prio: VLAN priority value
+ */
+void igc_del_vlan_prio_filter(struct igc_adapter *adapter, int prio)
+{
+	struct igc_hw *hw = &adapter->hw;
+	u32 vlanpqf;
+
+	vlanpqf = rd32(IGC_VLANPQF);
+
+	vlanpqf &= ~IGC_VLANPQF_VALID(prio);
+	vlanpqf &= ~IGC_VLANPQF_QSEL(prio, IGC_VLANPQF_QUEUE_MASK);
+
+	wr32(IGC_VLANPQF, vlanpqf);
+
+	netdev_dbg(adapter->netdev, "Delete VLAN priority filter: prio %d\n",
+		   prio);
+}
+
 static int igc_uc_sync(struct net_device *netdev, const unsigned char *addr)
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
-- 
cgit v1.2.3-59-g8ed1b


From 6ad70c7686d4ad14cbea4f9930037cabea0cdb7a Mon Sep 17 00:00:00 2001
From: Vaibhav Gupta <vaibhavgupta40@gmail.com>
Date: Mon, 18 May 2020 20:32:13 +0530
Subject: realtek/8139too: use generic power management

compile-tested only

With legacy PM hooks, it was the responsibility
of a driver to manage PCI states and also
device's power state. The generic approach is
to let PCI core handle the work.

PCI core passes "struct device*" as an argument
to the .suspend() and .resume() callbacks. As
these callabcks work with "struct net_device*",
extract it from "struct device*" using
dev_get_drv_data().

Signed-off-by: Vaibhav Gupta <vaibhavgupta40@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/8139too.c | 26 +++++++-------------------
 1 file changed, 7 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/realtek/8139too.c b/drivers/net/ethernet/realtek/8139too.c
index 5caeb8368eab..227139d42227 100644
--- a/drivers/net/ethernet/realtek/8139too.c
+++ b/drivers/net/ethernet/realtek/8139too.c
@@ -2603,17 +2603,13 @@ static void rtl8139_set_rx_mode (struct net_device *dev)
 	spin_unlock_irqrestore (&tp->lock, flags);
 }
 
-#ifdef CONFIG_PM
-
-static int rtl8139_suspend (struct pci_dev *pdev, pm_message_t state)
+static int __maybe_unused rtl8139_suspend(struct device *device)
 {
-	struct net_device *dev = pci_get_drvdata (pdev);
+	struct net_device *dev = dev_get_drvdata(device);
 	struct rtl8139_private *tp = netdev_priv(dev);
 	void __iomem *ioaddr = tp->mmio_addr;
 	unsigned long flags;
 
-	pci_save_state (pdev);
-
 	if (!netif_running (dev))
 		return 0;
 
@@ -2631,38 +2627,30 @@ static int rtl8139_suspend (struct pci_dev *pdev, pm_message_t state)
 
 	spin_unlock_irqrestore (&tp->lock, flags);
 
-	pci_set_power_state (pdev, PCI_D3hot);
-
 	return 0;
 }
 
-
-static int rtl8139_resume (struct pci_dev *pdev)
+static int __maybe_unused rtl8139_resume(struct device *device)
 {
-	struct net_device *dev = pci_get_drvdata (pdev);
+	struct net_device *dev = dev_get_drvdata(device);
 
-	pci_restore_state (pdev);
 	if (!netif_running (dev))
 		return 0;
-	pci_set_power_state (pdev, PCI_D0);
+
 	rtl8139_init_ring (dev);
 	rtl8139_hw_start (dev);
 	netif_device_attach (dev);
 	return 0;
 }
 
-#endif /* CONFIG_PM */
-
+static SIMPLE_DEV_PM_OPS(rtl8139_pm_ops, rtl8139_suspend, rtl8139_resume);
 
 static struct pci_driver rtl8139_pci_driver = {
 	.name		= DRV_NAME,
 	.id_table	= rtl8139_pci_tbl,
 	.probe		= rtl8139_init_one,
 	.remove		= rtl8139_remove_one,
-#ifdef CONFIG_PM
-	.suspend	= rtl8139_suspend,
-	.resume		= rtl8139_resume,
-#endif /* CONFIG_PM */
+	.driver.pm	= &rtl8139_pm_ops,
 };
 
 
-- 
cgit v1.2.3-59-g8ed1b


From a1eae9f677a694c97c232f8c1b3b826ec68a0c11 Mon Sep 17 00:00:00 2001
From: Vaibhav Gupta <vaibhavgupta40@gmail.com>
Date: Mon, 18 May 2020 20:32:14 +0530
Subject: realtek/8139cp: use generic power management

compile-tested only

With legacy PM hooks, it was the responsibility
of a driver to manage PCI states and also
device's power state. The generic approach is
to let PCI core handle the work.

The suspend callback enables/disables PCI wake
on the basis of "cp->wol_enabled" variable
which is unknown to PCI core. To utilise its
need, call device_set_wakeup_enable().

Signed-off-by: Vaibhav Gupta <vaibhavgupta40@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/8139cp.c | 25 ++++++++-----------------
 1 file changed, 8 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/realtek/8139cp.c b/drivers/net/ethernet/realtek/8139cp.c
index 60d342f82fb3..e291e6ac40cb 100644
--- a/drivers/net/ethernet/realtek/8139cp.c
+++ b/drivers/net/ethernet/realtek/8139cp.c
@@ -2054,10 +2054,9 @@ static void cp_remove_one (struct pci_dev *pdev)
 	free_netdev(dev);
 }
 
-#ifdef CONFIG_PM
-static int cp_suspend (struct pci_dev *pdev, pm_message_t state)
+static int __maybe_unused cp_suspend(struct device *device)
 {
-	struct net_device *dev = pci_get_drvdata(pdev);
+	struct net_device *dev = dev_get_drvdata(device);
 	struct cp_private *cp = netdev_priv(dev);
 	unsigned long flags;
 
@@ -2075,16 +2074,14 @@ static int cp_suspend (struct pci_dev *pdev, pm_message_t state)
 
 	spin_unlock_irqrestore (&cp->lock, flags);
 
-	pci_save_state(pdev);
-	pci_enable_wake(pdev, pci_choose_state(pdev, state), cp->wol_enabled);
-	pci_set_power_state(pdev, pci_choose_state(pdev, state));
+	device_set_wakeup_enable(device, cp->wol_enabled);
 
 	return 0;
 }
 
-static int cp_resume (struct pci_dev *pdev)
+static int __maybe_unused cp_resume(struct device *device)
 {
-	struct net_device *dev = pci_get_drvdata (pdev);
+	struct net_device *dev = dev_get_drvdata(device);
 	struct cp_private *cp = netdev_priv(dev);
 	unsigned long flags;
 
@@ -2093,10 +2090,6 @@ static int cp_resume (struct pci_dev *pdev)
 
 	netif_device_attach (dev);
 
-	pci_set_power_state(pdev, PCI_D0);
-	pci_restore_state(pdev);
-	pci_enable_wake(pdev, PCI_D0, 0);
-
 	/* FIXME: sh*t may happen if the Rx ring buffer is depleted */
 	cp_init_rings_index (cp);
 	cp_init_hw (cp);
@@ -2111,7 +2104,6 @@ static int cp_resume (struct pci_dev *pdev)
 
 	return 0;
 }
-#endif /* CONFIG_PM */
 
 static const struct pci_device_id cp_pci_tbl[] = {
         { PCI_DEVICE(PCI_VENDOR_ID_REALTEK,     PCI_DEVICE_ID_REALTEK_8139), },
@@ -2120,15 +2112,14 @@ static const struct pci_device_id cp_pci_tbl[] = {
 };
 MODULE_DEVICE_TABLE(pci, cp_pci_tbl);
 
+static SIMPLE_DEV_PM_OPS(cp_pm_ops, cp_suspend, cp_resume);
+
 static struct pci_driver cp_driver = {
 	.name         = DRV_NAME,
 	.id_table     = cp_pci_tbl,
 	.probe        =	cp_init_one,
 	.remove       = cp_remove_one,
-#ifdef CONFIG_PM
-	.resume       = cp_resume,
-	.suspend      = cp_suspend,
-#endif
+	.driver.pm    = &cp_pm_ops,
 };
 
 module_pci_driver(cp_driver);
-- 
cgit v1.2.3-59-g8ed1b


From 09a2b50a49798611df55f85b0428278d58b089ad Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Tue, 7 Apr 2020 14:07:07 -0700
Subject: igc: Remove duplicated IGC_RXPBS macro

This patch remove the IGC_RXPBS macro defined in line 233 since it is
already defined in line 18 with the exactly same value.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_regs.h | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h
index 851ff19af703..763a24d52865 100644
--- a/drivers/net/ethernet/intel/igc/igc_regs.h
+++ b/drivers/net/ethernet/intel/igc/igc_regs.h
@@ -227,8 +227,6 @@
 
 #define IGC_FTQF(_n)	(0x059E0 + (4 * (_n)))  /* 5-tuple Queue Fltr */
 
-#define IGC_RXPBS	0x02404  /* Rx Packet Buffer Size - RW */
-
 /* Transmit Scheduling Registers */
 #define IGC_TQAVCTRL		0x3570
 #define IGC_TXQCTL(_n)		(0x3344 + 0x4 * (_n))
-- 
cgit v1.2.3-59-g8ed1b


From 1664ef3e62271dede7d51056b9ba36697b57d714 Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Tue, 7 Apr 2020 14:07:08 -0700
Subject: igc: Remove ethertype filter in PTP code

The driver only supports hardware timestamping for all incoming
traffic (HWTSTAMP_FILTER_ALL) which is enabled via Rx Time Sync
Control (TSYNCRXCTL) register already. Therefore, the ethertype
filter set in in igc_ptp_set_timestamp_mode() is useless so this
patch removes it.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc.h         |  2 +-
 drivers/net/ethernet/intel/igc/igc_defines.h |  3 ---
 drivers/net/ethernet/intel/igc/igc_ptp.c     | 12 ------------
 3 files changed, 1 insertion(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index 5f1e1d31e832..e4169fe955d8 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -26,7 +26,7 @@ void igc_set_ethtool_ops(struct net_device *);
 #define MAX_Q_VECTORS			8
 #define MAX_STD_JUMBO_FRAME_SIZE	9216
 
-#define MAX_ETYPE_FILTER		(4 - 1)
+#define MAX_ETYPE_FILTER		4
 #define IGC_RETA_SIZE			128
 
 struct igc_tx_queue_stats {
diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
index f1bb5414f99f..6909826bc747 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -384,9 +384,6 @@
 
 #define IGC_TSICR_INTERRUPTS	IGC_TSICR_TXTS
 
-/* PTP Queue Filter */
-#define IGC_ETQF_1588		BIT(30)
-
 #define IGC_FTQF_VF_BP		0x00008000
 #define IGC_FTQF_1588_TIME_STAMP	0x08000000
 #define IGC_FTQF_MASK			0xF0000000
diff --git a/drivers/net/ethernet/intel/igc/igc_ptp.c b/drivers/net/ethernet/intel/igc/igc_ptp.c
index 1bf016398b9d..0d746f8588c8 100644
--- a/drivers/net/ethernet/intel/igc/igc_ptp.c
+++ b/drivers/net/ethernet/intel/igc/igc_ptp.c
@@ -305,7 +305,6 @@ static int igc_ptp_set_timestamp_mode(struct igc_adapter *adapter,
 	struct igc_hw *hw = &adapter->hw;
 	u32 tsync_rx_cfg = 0;
 	bool is_l4 = false;
-	bool is_l2 = false;
 	u32 regval;
 
 	/* reserved for future extensions */
@@ -346,7 +345,6 @@ static int igc_ptp_set_timestamp_mode(struct igc_adapter *adapter,
 	case HWTSTAMP_FILTER_PTP_V2_L4_DELAY_REQ:
 		tsync_rx_ctl |= IGC_TSYNCRXCTL_TYPE_EVENT_V2;
 		config->rx_filter = HWTSTAMP_FILTER_PTP_V2_EVENT;
-		is_l2 = true;
 		is_l4 = true;
 		break;
 	case HWTSTAMP_FILTER_PTP_V1_L4_EVENT:
@@ -370,7 +368,6 @@ static int igc_ptp_set_timestamp_mode(struct igc_adapter *adapter,
 		tsync_rx_ctl |= IGC_TSYNCRXCTL_TYPE_ALL;
 		tsync_rx_ctl |= IGC_TSYNCRXCTL_RXSYNSIG;
 		config->rx_filter = HWTSTAMP_FILTER_ALL;
-		is_l2 = true;
 		is_l4 = true;
 
 		if (hw->mac.type == igc_i225) {
@@ -405,15 +402,6 @@ static int igc_ptp_set_timestamp_mode(struct igc_adapter *adapter,
 	/* define which PTP packets are time stamped */
 	wr32(IGC_TSYNCRXCFG, tsync_rx_cfg);
 
-	/* define ethertype filter for timestamped packets */
-	if (is_l2)
-		wr32(IGC_ETQF(3),
-		     (IGC_ETQF_FILTER_ENABLE | /* enable filter */
-		     IGC_ETQF_1588 | /* enable timestamping */
-		     ETH_P_1588)); /* 1588 eth protocol type */
-	else
-		wr32(IGC_ETQF(3), 0);
-
 	/* L4 Queue Filter[3]: filter by destination port and protocol */
 	if (is_l4) {
 		u32 ftqf = (IPPROTO_UDP /* UDP */
-- 
cgit v1.2.3-59-g8ed1b


From b4d48d96eaec9bff4139dcbeb9590869fc765aa9 Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Tue, 7 Apr 2020 14:07:09 -0700
Subject: igc: Fix MAX_ETYPE_FILTER value

The I225 controller has 8 ethertype filters, not 4. This patch fixes the
MAX_ETYPE_FILTER macro accordingly.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index e4169fe955d8..8389569aea8a 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -26,7 +26,7 @@ void igc_set_ethtool_ops(struct net_device *);
 #define MAX_Q_VECTORS			8
 #define MAX_STD_JUMBO_FRAME_SIZE	9216
 
-#define MAX_ETYPE_FILTER		4
+#define MAX_ETYPE_FILTER		8
 #define IGC_RETA_SIZE			128
 
 struct igc_tx_queue_stats {
-- 
cgit v1.2.3-59-g8ed1b


From c384b8a70c59d85f83a05c8963e71d35da2607b9 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 19 May 2020 15:03:11 +0200
Subject: ipv4: streamline ipmr_new_tunnel

Reduce a few level of indentation to simplify the function.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipmr.c | 73 ++++++++++++++++++++++++++++-----------------------------
 1 file changed, 36 insertions(+), 37 deletions(-)

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index 5c218db2dede..a1169b694113 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -471,50 +471,49 @@ static bool ipmr_init_vif_indev(const struct net_device *dev)
 
 static struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
 {
-	struct net_device  *dev;
+	struct net_device *tunnel_dev, *new_dev;
+	struct ip_tunnel_parm p = { };
+	mm_segment_t oldfs = get_fs();
+	struct ifreq ifr;
+	int err;
 
-	dev = __dev_get_by_name(net, "tunl0");
+	tunnel_dev = __dev_get_by_name(net, "tunl0");
+	if (!tunnel_dev)
+		goto out;
 
-	if (dev) {
-		const struct net_device_ops *ops = dev->netdev_ops;
-		int err;
-		struct ifreq ifr;
-		struct ip_tunnel_parm p;
+	p.iph.daddr = v->vifc_rmt_addr.s_addr;
+	p.iph.saddr = v->vifc_lcl_addr.s_addr;
+	p.iph.version = 4;
+	p.iph.ihl = 5;
+	p.iph.protocol = IPPROTO_IPIP;
+	sprintf(p.name, "dvmrp%d", v->vifc_vifi);
+	ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
 
-		memset(&p, 0, sizeof(p));
-		p.iph.daddr = v->vifc_rmt_addr.s_addr;
-		p.iph.saddr = v->vifc_lcl_addr.s_addr;
-		p.iph.version = 4;
-		p.iph.ihl = 5;
-		p.iph.protocol = IPPROTO_IPIP;
-		sprintf(p.name, "dvmrp%d", v->vifc_vifi);
-		ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
+	if (!tunnel_dev->netdev_ops->ndo_do_ioctl)
+		goto out;
 
-		if (ops->ndo_do_ioctl) {
-			mm_segment_t oldfs = get_fs();
+	set_fs(KERNEL_DS);
+	err = tunnel_dev->netdev_ops->ndo_do_ioctl(tunnel_dev, &ifr,
+			SIOCADDTUNNEL);
+	set_fs(oldfs);
+	if (err)
+		goto out;
 
-			set_fs(KERNEL_DS);
-			err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
-			set_fs(oldfs);
-		} else {
-			err = -EOPNOTSUPP;
-		}
-		dev = NULL;
+	new_dev = __dev_get_by_name(net, p.name);
+	if (!new_dev)
+		goto out;
 
-		if (err == 0 &&
-		    (dev = __dev_get_by_name(net, p.name)) != NULL) {
-			dev->flags |= IFF_MULTICAST;
-			if (!ipmr_init_vif_indev(dev))
-				goto failure;
-			if (dev_open(dev, NULL))
-				goto failure;
-			dev_hold(dev);
-		}
-	}
-	return dev;
+	new_dev->flags |= IFF_MULTICAST;
+	if (!ipmr_init_vif_indev(new_dev))
+		goto out_unregister;
+	if (dev_open(new_dev, NULL))
+		goto out_unregister;
+	dev_hold(new_dev);
+	return new_dev;
 
-failure:
-	unregister_netdevice(dev);
+out_unregister:
+	unregister_netdevice(new_dev);
+out:
 	return NULL;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From c1fd1182c43692022a7938d6b496fa21fcd49717 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 19 May 2020 15:03:12 +0200
Subject: ipv4: consolidate the VIFF_TUNNEL handling in ipmr_new_tunnel

Also move the dev_set_allmulti call and the error handling into the
ioctl helper.  This allows reusing already looked up tunnel_dev pointer
and the set up argument structure for the deletion in the error handler.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipmr.c | 53 +++++++++++++----------------------------------------
 1 file changed, 13 insertions(+), 40 deletions(-)

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index a1169b694113..cd1a3260a99a 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -421,37 +421,6 @@ static void ipmr_free_table(struct mr_table *mrt)
 
 /* Service routines creating virtual interfaces: DVMRP tunnels and PIMREG */
 
-static void ipmr_del_tunnel(struct net_device *dev, struct vifctl *v)
-{
-	struct net *net = dev_net(dev);
-
-	dev_close(dev);
-
-	dev = __dev_get_by_name(net, "tunl0");
-	if (dev) {
-		const struct net_device_ops *ops = dev->netdev_ops;
-		struct ifreq ifr;
-		struct ip_tunnel_parm p;
-
-		memset(&p, 0, sizeof(p));
-		p.iph.daddr = v->vifc_rmt_addr.s_addr;
-		p.iph.saddr = v->vifc_lcl_addr.s_addr;
-		p.iph.version = 4;
-		p.iph.ihl = 5;
-		p.iph.protocol = IPPROTO_IPIP;
-		sprintf(p.name, "dvmrp%d", v->vifc_vifi);
-		ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
-
-		if (ops->ndo_do_ioctl) {
-			mm_segment_t oldfs = get_fs();
-
-			set_fs(KERNEL_DS);
-			ops->ndo_do_ioctl(dev, &ifr, SIOCDELTUNNEL);
-			set_fs(oldfs);
-		}
-	}
-}
-
 /* Initialize ipmr pimreg/tunnel in_device */
 static bool ipmr_init_vif_indev(const struct net_device *dev)
 {
@@ -509,12 +478,22 @@ static struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
 	if (dev_open(new_dev, NULL))
 		goto out_unregister;
 	dev_hold(new_dev);
+	err = dev_set_allmulti(new_dev, 1);
+	if (err) {
+		dev_close(new_dev);
+		set_fs(KERNEL_DS);
+		tunnel_dev->netdev_ops->ndo_do_ioctl(tunnel_dev, &ifr,
+				SIOCDELTUNNEL);
+		set_fs(oldfs);
+		dev_put(new_dev);
+		new_dev = ERR_PTR(err);
+	}
 	return new_dev;
 
 out_unregister:
 	unregister_netdevice(new_dev);
 out:
-	return NULL;
+	return ERR_PTR(-ENOBUFS);
 }
 
 #if defined(CONFIG_IP_PIMSM_V1) || defined(CONFIG_IP_PIMSM_V2)
@@ -866,14 +845,8 @@ static int vif_add(struct net *net, struct mr_table *mrt,
 		break;
 	case VIFF_TUNNEL:
 		dev = ipmr_new_tunnel(net, vifc);
-		if (!dev)
-			return -ENOBUFS;
-		err = dev_set_allmulti(dev, 1);
-		if (err) {
-			ipmr_del_tunnel(dev, vifc);
-			dev_put(dev);
-			return err;
-		}
+		if (IS_ERR(dev))
+			return PTR_ERR(dev);
 		break;
 	case VIFF_USE_IFINDEX:
 	case 0:
-- 
cgit v1.2.3-59-g8ed1b


From 607259a695312cdfac2b52fb9d5b5890c834d573 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 19 May 2020 15:03:13 +0200
Subject: net: add a new ndo_tunnel_ioctl method

This method is used to properly allow kernel callers of the IPv4 route
management ioctls.  The exsting ip_tunnel_ioctl helper is renamed to
ip_tunnel_ctl to better reflect that it doesn't directly implement ioctls
touching user memory, and is used for the guts of ndo_tunnel_ctl
implementations. A new ip_tunnel_ioctl helper is added that can be wired
up directly to the ndo_do_ioctl method and takes care of the copy to and
from userspace.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/netdevice.h |  6 ++++++
 include/net/ip_tunnels.h  |  3 ++-
 net/ipv4/ip_gre.c         | 35 ++++++++++++++---------------------
 net/ipv4/ip_tunnel.c      | 16 +++++++++++++++-
 net/ipv4/ip_vti.c         | 32 +++++++++++++-------------------
 net/ipv4/ipip.c           | 30 +++++++++---------------------
 6 files changed, 59 insertions(+), 63 deletions(-)

diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index 6a8f8daef09d..a18f8fdf4260 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -53,6 +53,7 @@ struct netpoll_info;
 struct device;
 struct phy_device;
 struct dsa_port;
+struct ip_tunnel_parm;
 struct macsec_context;
 struct macsec_ops;
 
@@ -1274,6 +1275,9 @@ struct netdev_net_notifier {
  *	Get devlink port instance associated with a given netdev.
  *	Called with a reference on the netdevice and devlink locks only,
  *	rtnl_lock is not held.
+ * int (*ndo_tunnel_ctl)(struct net_device *dev, struct ip_tunnel_parm *p,
+ *			 int cmd);
+ *	Add, change, delete or get information on an IPv4 tunnel.
  */
 struct net_device_ops {
 	int			(*ndo_init)(struct net_device *dev);
@@ -1479,6 +1483,8 @@ struct net_device_ops {
 	int			(*ndo_xsk_wakeup)(struct net_device *dev,
 						  u32 queue_id, u32 flags);
 	struct devlink_port *	(*ndo_get_devlink_port)(struct net_device *dev);
+	int			(*ndo_tunnel_ctl)(struct net_device *dev,
+						  struct ip_tunnel_parm *p, int cmd);
 };
 
 /**
diff --git a/include/net/ip_tunnels.h b/include/net/ip_tunnels.h
index 236503a50759..076e5d7db7d3 100644
--- a/include/net/ip_tunnels.h
+++ b/include/net/ip_tunnels.h
@@ -269,7 +269,8 @@ void ip_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 		    const struct iphdr *tnl_params, const u8 protocol);
 void ip_md_tunnel_xmit(struct sk_buff *skb, struct net_device *dev,
 		       const u8 proto, int tunnel_hlen);
-int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd);
+int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd);
+int ip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd);
 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict);
 int ip_tunnel_change_mtu(struct net_device *dev, int new_mtu);
 
diff --git a/net/ipv4/ip_gre.c b/net/ipv4/ip_gre.c
index 0ce9b91ff55c..4e31f23e4117 100644
--- a/net/ipv4/ip_gre.c
+++ b/net/ipv4/ip_gre.c
@@ -768,45 +768,37 @@ static void ipgre_link_update(struct net_device *dev, bool set_mtu)
 	}
 }
 
-static int ipgre_tunnel_ioctl(struct net_device *dev,
-			      struct ifreq *ifr, int cmd)
+static int ipgre_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p,
+			    int cmd)
 {
-	struct ip_tunnel_parm p;
 	int err;
 
-	if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
-		return -EFAULT;
-
 	if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
-		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_GRE ||
-		    p.iph.ihl != 5 || (p.iph.frag_off & htons(~IP_DF)) ||
-		    ((p.i_flags | p.o_flags) & (GRE_VERSION | GRE_ROUTING)))
+		if (p->iph.version != 4 || p->iph.protocol != IPPROTO_GRE ||
+		    p->iph.ihl != 5 || (p->iph.frag_off & htons(~IP_DF)) ||
+		    ((p->i_flags | p->o_flags) & (GRE_VERSION | GRE_ROUTING)))
 			return -EINVAL;
 	}
 
-	p.i_flags = gre_flags_to_tnl_flags(p.i_flags);
-	p.o_flags = gre_flags_to_tnl_flags(p.o_flags);
+	p->i_flags = gre_flags_to_tnl_flags(p->i_flags);
+	p->o_flags = gre_flags_to_tnl_flags(p->o_flags);
 
-	err = ip_tunnel_ioctl(dev, &p, cmd);
+	err = ip_tunnel_ctl(dev, p, cmd);
 	if (err)
 		return err;
 
 	if (cmd == SIOCCHGTUNNEL) {
 		struct ip_tunnel *t = netdev_priv(dev);
 
-		t->parms.i_flags = p.i_flags;
-		t->parms.o_flags = p.o_flags;
+		t->parms.i_flags = p->i_flags;
+		t->parms.o_flags = p->o_flags;
 
 		if (strcmp(dev->rtnl_link_ops->kind, "erspan"))
 			ipgre_link_update(dev, true);
 	}
 
-	p.i_flags = gre_tnl_flags_to_gre_flags(p.i_flags);
-	p.o_flags = gre_tnl_flags_to_gre_flags(p.o_flags);
-
-	if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
-		return -EFAULT;
-
+	p->i_flags = gre_tnl_flags_to_gre_flags(p->i_flags);
+	p->o_flags = gre_tnl_flags_to_gre_flags(p->o_flags);
 	return 0;
 }
 
@@ -924,10 +916,11 @@ static const struct net_device_ops ipgre_netdev_ops = {
 	.ndo_stop		= ipgre_close,
 #endif
 	.ndo_start_xmit		= ipgre_xmit,
-	.ndo_do_ioctl		= ipgre_tunnel_ioctl,
+	.ndo_do_ioctl		= ip_tunnel_ioctl,
 	.ndo_change_mtu		= ip_tunnel_change_mtu,
 	.ndo_get_stats64	= ip_tunnel_get_stats64,
 	.ndo_get_iflink		= ip_tunnel_get_iflink,
+	.ndo_tunnel_ctl		= ipgre_tunnel_ctl,
 };
 
 #define GRE_FEATURES (NETIF_F_SG |		\
diff --git a/net/ipv4/ip_tunnel.c b/net/ipv4/ip_tunnel.c
index cd4b84310d92..f4f1d11eab50 100644
--- a/net/ipv4/ip_tunnel.c
+++ b/net/ipv4/ip_tunnel.c
@@ -860,7 +860,7 @@ static void ip_tunnel_update(struct ip_tunnel_net *itn,
 	netdev_state_change(dev);
 }
 
-int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
+int ip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
 {
 	int err = 0;
 	struct ip_tunnel *t = netdev_priv(dev);
@@ -960,6 +960,20 @@ int ip_tunnel_ioctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
 done:
 	return err;
 }
+EXPORT_SYMBOL_GPL(ip_tunnel_ctl);
+
+int ip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+	struct ip_tunnel_parm p;
+	int err;
+
+	if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+		return -EFAULT;
+	err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, cmd);
+	if (!err && copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
+		return -EFAULT;
+	return err;
+}
 EXPORT_SYMBOL_GPL(ip_tunnel_ioctl);
 
 int __ip_tunnel_change_mtu(struct net_device *dev, int new_mtu, bool strict)
diff --git a/net/ipv4/ip_vti.c b/net/ipv4/ip_vti.c
index 1b4e6f298648..c8974360a99f 100644
--- a/net/ipv4/ip_vti.c
+++ b/net/ipv4/ip_vti.c
@@ -378,38 +378,31 @@ static int vti4_err(struct sk_buff *skb, u32 info)
 }
 
 static int
-vti_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+vti_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
 {
 	int err = 0;
-	struct ip_tunnel_parm p;
-
-	if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
-		return -EFAULT;
 
 	if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
-		if (p.iph.version != 4 || p.iph.protocol != IPPROTO_IPIP ||
-		    p.iph.ihl != 5)
+		if (p->iph.version != 4 || p->iph.protocol != IPPROTO_IPIP ||
+		    p->iph.ihl != 5)
 			return -EINVAL;
 	}
 
-	if (!(p.i_flags & GRE_KEY))
-		p.i_key = 0;
-	if (!(p.o_flags & GRE_KEY))
-		p.o_key = 0;
+	if (!(p->i_flags & GRE_KEY))
+		p->i_key = 0;
+	if (!(p->o_flags & GRE_KEY))
+		p->o_key = 0;
 
-	p.i_flags = VTI_ISVTI;
+	p->i_flags = VTI_ISVTI;
 
-	err = ip_tunnel_ioctl(dev, &p, cmd);
+	err = ip_tunnel_ctl(dev, p, cmd);
 	if (err)
 		return err;
 
 	if (cmd != SIOCDELTUNNEL) {
-		p.i_flags |= GRE_KEY;
-		p.o_flags |= GRE_KEY;
+		p->i_flags |= GRE_KEY;
+		p->o_flags |= GRE_KEY;
 	}
-
-	if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
-		return -EFAULT;
 	return 0;
 }
 
@@ -417,10 +410,11 @@ static const struct net_device_ops vti_netdev_ops = {
 	.ndo_init	= vti_tunnel_init,
 	.ndo_uninit	= ip_tunnel_uninit,
 	.ndo_start_xmit	= vti_tunnel_xmit,
-	.ndo_do_ioctl	= vti_tunnel_ioctl,
+	.ndo_do_ioctl	= ip_tunnel_ioctl,
 	.ndo_change_mtu	= ip_tunnel_change_mtu,
 	.ndo_get_stats64 = ip_tunnel_get_stats64,
 	.ndo_get_iflink = ip_tunnel_get_iflink,
+	.ndo_tunnel_ctl	= vti_tunnel_ctl,
 };
 
 static void vti_tunnel_setup(struct net_device *dev)
diff --git a/net/ipv4/ipip.c b/net/ipv4/ipip.c
index 2f01cf6fa0de..df663baf2516 100644
--- a/net/ipv4/ipip.c
+++ b/net/ipv4/ipip.c
@@ -327,41 +327,29 @@ static bool ipip_tunnel_ioctl_verify_protocol(u8 ipproto)
 }
 
 static int
-ipip_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+ipip_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
 {
-	int err = 0;
-	struct ip_tunnel_parm p;
-
-	if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
-		return -EFAULT;
-
 	if (cmd == SIOCADDTUNNEL || cmd == SIOCCHGTUNNEL) {
-		if (p.iph.version != 4 ||
-		    !ipip_tunnel_ioctl_verify_protocol(p.iph.protocol) ||
-		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
+		if (p->iph.version != 4 ||
+		    !ipip_tunnel_ioctl_verify_protocol(p->iph.protocol) ||
+		    p->iph.ihl != 5 || (p->iph.frag_off & htons(~IP_DF)))
 			return -EINVAL;
 	}
 
-	p.i_key = p.o_key = 0;
-	p.i_flags = p.o_flags = 0;
-	err = ip_tunnel_ioctl(dev, &p, cmd);
-	if (err)
-		return err;
-
-	if (copy_to_user(ifr->ifr_ifru.ifru_data, &p, sizeof(p)))
-		return -EFAULT;
-
-	return 0;
+	p->i_key = p->o_key = 0;
+	p->i_flags = p->o_flags = 0;
+	return ip_tunnel_ctl(dev, p, cmd);
 }
 
 static const struct net_device_ops ipip_netdev_ops = {
 	.ndo_init       = ipip_tunnel_init,
 	.ndo_uninit     = ip_tunnel_uninit,
 	.ndo_start_xmit	= ipip_tunnel_xmit,
-	.ndo_do_ioctl	= ipip_tunnel_ioctl,
+	.ndo_do_ioctl	= ip_tunnel_ioctl,
 	.ndo_change_mtu = ip_tunnel_change_mtu,
 	.ndo_get_stats64 = ip_tunnel_get_stats64,
 	.ndo_get_iflink = ip_tunnel_get_iflink,
+	.ndo_tunnel_ctl	= ipip_tunnel_ctl,
 };
 
 #define IPIP_FEATURES (NETIF_F_SG |		\
-- 
cgit v1.2.3-59-g8ed1b


From c7e3670516042bfd8147151d9008b5874a6eb73e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 19 May 2020 15:03:14 +0200
Subject: impr: use ->ndo_tunnel_ctl in ipmr_new_tunnel

Use the new ->ndo_tunnel_ctl instead of overriding the address limit
and using ->ndo_do_ioctl just to do a pointless user copy.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ipmr.c | 14 +++-----------
 1 file changed, 3 insertions(+), 11 deletions(-)

diff --git a/net/ipv4/ipmr.c b/net/ipv4/ipmr.c
index cd1a3260a99a..d3e9b80a57de 100644
--- a/net/ipv4/ipmr.c
+++ b/net/ipv4/ipmr.c
@@ -442,8 +442,6 @@ static struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
 {
 	struct net_device *tunnel_dev, *new_dev;
 	struct ip_tunnel_parm p = { };
-	mm_segment_t oldfs = get_fs();
-	struct ifreq ifr;
 	int err;
 
 	tunnel_dev = __dev_get_by_name(net, "tunl0");
@@ -456,15 +454,11 @@ static struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
 	p.iph.ihl = 5;
 	p.iph.protocol = IPPROTO_IPIP;
 	sprintf(p.name, "dvmrp%d", v->vifc_vifi);
-	ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
 
-	if (!tunnel_dev->netdev_ops->ndo_do_ioctl)
+	if (!tunnel_dev->netdev_ops->ndo_tunnel_ctl)
 		goto out;
-
-	set_fs(KERNEL_DS);
-	err = tunnel_dev->netdev_ops->ndo_do_ioctl(tunnel_dev, &ifr,
+	err = tunnel_dev->netdev_ops->ndo_tunnel_ctl(tunnel_dev, &p,
 			SIOCADDTUNNEL);
-	set_fs(oldfs);
 	if (err)
 		goto out;
 
@@ -481,10 +475,8 @@ static struct net_device *ipmr_new_tunnel(struct net *net, struct vifctl *v)
 	err = dev_set_allmulti(new_dev, 1);
 	if (err) {
 		dev_close(new_dev);
-		set_fs(KERNEL_DS);
-		tunnel_dev->netdev_ops->ndo_do_ioctl(tunnel_dev, &ifr,
+		tunnel_dev->netdev_ops->ndo_tunnel_ctl(tunnel_dev, &p,
 				SIOCDELTUNNEL);
-		set_fs(oldfs);
 		dev_put(new_dev);
 		new_dev = ERR_PTR(err);
 	}
-- 
cgit v1.2.3-59-g8ed1b


From fd5d687b76b32543f6b2024ebe21f988ed1f8859 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 19 May 2020 15:03:15 +0200
Subject: sit: refactor ipip6_tunnel_ioctl

Split the ioctl handler into one function per command instead of having
a all the logic sit in one giant switch statement.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/sit.c | 368 ++++++++++++++++++++++++++++++++-------------------------
 1 file changed, 210 insertions(+), 158 deletions(-)

diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 98954830c40b..7c158fdc02da 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -83,6 +83,13 @@ struct sit_net {
 	struct net_device *fb_tunnel_dev;
 };
 
+static inline struct sit_net *dev_to_sit_net(struct net_device *dev)
+{
+	struct ip_tunnel *t = netdev_priv(dev);
+
+	return net_generic(t->net, sit_net_id);
+}
+
 /*
  * Must be invoked with rcu_read_lock
  */
@@ -291,14 +298,18 @@ __ipip6_tunnel_locate_prl(struct ip_tunnel *t, __be32 addr)
 
 }
 
-static int ipip6_tunnel_get_prl(struct ip_tunnel *t,
-				struct ip_tunnel_prl __user *a)
+static int ipip6_tunnel_get_prl(struct net_device *dev, struct ifreq *ifr)
 {
+	struct ip_tunnel_prl __user *a = ifr->ifr_ifru.ifru_data;
+	struct ip_tunnel *t = netdev_priv(dev);
 	struct ip_tunnel_prl kprl, *kp;
 	struct ip_tunnel_prl_entry *prl;
 	unsigned int cmax, c = 0, ca, len;
 	int ret = 0;
 
+	if (dev == dev_to_sit_net(dev)->fb_tunnel_dev)
+		return -EINVAL;
+
 	if (copy_from_user(&kprl, a, sizeof(kprl)))
 		return -EFAULT;
 	cmax = kprl.datalen / sizeof(kprl);
@@ -441,6 +452,35 @@ out:
 	return err;
 }
 
+static int ipip6_tunnel_prl_ctl(struct net_device *dev, struct ifreq *ifr,
+		int cmd)
+{
+	struct ip_tunnel *t = netdev_priv(dev);
+	struct ip_tunnel_prl prl;
+	int err;
+
+	if (!ns_capable(t->net->user_ns, CAP_NET_ADMIN))
+		return -EPERM;
+	if (dev == dev_to_sit_net(dev)->fb_tunnel_dev)
+		return -EINVAL;
+
+	if (copy_from_user(&prl, ifr->ifr_ifru.ifru_data, sizeof(prl)))
+		return -EFAULT;
+
+	switch (cmd) {
+	case SIOCDELPRL:
+		err = ipip6_tunnel_del_prl(t, &prl);
+		break;
+	case SIOCADDPRL:
+	case SIOCCHGPRL:
+		err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL);
+		break;
+	}
+	dst_cache_reset(&t->dst_cache);
+	netdev_state_change(dev);
+	return err;
+}
+
 static int
 isatap_chksrc(struct sk_buff *skb, const struct iphdr *iph, struct ip_tunnel *t)
 {
@@ -1151,7 +1191,53 @@ static int ipip6_tunnel_update_6rd(struct ip_tunnel *t,
 	netdev_state_change(t->dev);
 	return 0;
 }
-#endif
+
+static int
+ipip6_tunnel_get6rd(struct net_device *dev, struct ifreq *ifr)
+{
+	struct ip_tunnel *t = netdev_priv(dev);
+	struct ip_tunnel_6rd ip6rd;
+	struct ip_tunnel_parm p;
+
+	if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) {
+		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+			return -EFAULT;
+		t = ipip6_tunnel_locate(t->net, &p, 0);
+	}
+	if (!t)
+		t = netdev_priv(dev);
+
+	ip6rd.prefix = t->ip6rd.prefix;
+	ip6rd.relay_prefix = t->ip6rd.relay_prefix;
+	ip6rd.prefixlen = t->ip6rd.prefixlen;
+	ip6rd.relay_prefixlen = t->ip6rd.relay_prefixlen;
+	if (copy_to_user(ifr->ifr_ifru.ifru_data, &ip6rd, sizeof(ip6rd)))
+		return -EFAULT;
+	return 0;
+}
+
+static int
+ipip6_tunnel_6rdctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+	struct ip_tunnel *t = netdev_priv(dev);
+	struct ip_tunnel_6rd ip6rd;
+	int err;
+
+	if (!ns_capable(t->net->user_ns, CAP_NET_ADMIN))
+		return -EPERM;
+	if (copy_from_user(&ip6rd, ifr->ifr_ifru.ifru_data, sizeof(ip6rd)))
+		return -EFAULT;
+
+	if (cmd != SIOCDEL6RD) {
+		err = ipip6_tunnel_update_6rd(t, &ip6rd);
+		if (err < 0)
+			return err;
+	} else
+		ipip6_tunnel_clone_6rd(dev, dev_to_sit_net(dev));
+	return 0;
+}
+
+#endif /* CONFIG_IPV6_SIT_6RD */
 
 static bool ipip6_valid_ip_proto(u8 ipproto)
 {
@@ -1164,185 +1250,151 @@ static bool ipip6_valid_ip_proto(u8 ipproto)
 }
 
 static int
-ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+__ipip6_tunnel_ioctl_validate(struct net *net, struct ip_tunnel_parm *p)
 {
-	int err = 0;
+	if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
+		return -EPERM;
+
+	if (!ipip6_valid_ip_proto(p->iph.protocol))
+		return -EINVAL;
+	if (p->iph.version != 4 ||
+	    p->iph.ihl != 5 || (p->iph.frag_off & htons(~IP_DF)))
+		return -EINVAL;
+
+	if (p->iph.ttl)
+		p->iph.frag_off |= htons(IP_DF);
+	return 0;
+}
+
+static int
+ipip6_tunnel_get(struct net_device *dev, struct ifreq *ifr)
+{
+	struct ip_tunnel *t = netdev_priv(dev);
 	struct ip_tunnel_parm p;
-	struct ip_tunnel_prl prl;
+
+	if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) {
+		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+			return -EFAULT;
+		t = ipip6_tunnel_locate(t->net, &p, 0);
+	}
+	if (!t)
+		t = netdev_priv(dev);
+
+	if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
+		return -EFAULT;
+	return 0;
+}
+
+static int
+ipip6_tunnel_add(struct net_device *dev, struct ifreq *ifr)
+{
 	struct ip_tunnel *t = netdev_priv(dev);
-	struct net *net = t->net;
-	struct sit_net *sitn = net_generic(net, sit_net_id);
-#ifdef CONFIG_IPV6_SIT_6RD
-	struct ip_tunnel_6rd ip6rd;
-#endif
+	struct ip_tunnel_parm p;
+	int err;
 
-	switch (cmd) {
-	case SIOCGETTUNNEL:
-#ifdef CONFIG_IPV6_SIT_6RD
-	case SIOCGET6RD:
-#endif
-		if (dev == sitn->fb_tunnel_dev) {
-			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p))) {
-				err = -EFAULT;
-				break;
-			}
-			t = ipip6_tunnel_locate(net, &p, 0);
-			if (!t)
-				t = netdev_priv(dev);
-		}
+	if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+		return -EFAULT;
+	err = __ipip6_tunnel_ioctl_validate(t->net, &p);
+	if (err)
+		return err;
 
-		err = -EFAULT;
-		if (cmd == SIOCGETTUNNEL) {
-			memcpy(&p, &t->parms, sizeof(p));
-			if (copy_to_user(ifr->ifr_ifru.ifru_data, &p,
-					 sizeof(p)))
-				goto done;
-#ifdef CONFIG_IPV6_SIT_6RD
+	t = ipip6_tunnel_locate(t->net, &p, 1);
+	if (!t)
+		return -ENOBUFS;
+
+	if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
+		return -EFAULT;
+	return 0;
+}
+
+static int
+ipip6_tunnel_change(struct net_device *dev, struct ifreq *ifr)
+{
+	struct ip_tunnel *t = netdev_priv(dev);
+	struct ip_tunnel_parm p;
+	int err;
+
+	if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+		return -EFAULT;
+	err = __ipip6_tunnel_ioctl_validate(t->net, &p);
+	if (err)
+		return err;
+
+	t = ipip6_tunnel_locate(t->net, &p, 0);
+	if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) {
+		if (!t)
+			return -ENOENT;
+	} else {
+		if (t) {
+			if (t->dev != dev)
+				return -EEXIST;
 		} else {
-			ip6rd.prefix = t->ip6rd.prefix;
-			ip6rd.relay_prefix = t->ip6rd.relay_prefix;
-			ip6rd.prefixlen = t->ip6rd.prefixlen;
-			ip6rd.relay_prefixlen = t->ip6rd.relay_prefixlen;
-			if (copy_to_user(ifr->ifr_ifru.ifru_data, &ip6rd,
-					 sizeof(ip6rd)))
-				goto done;
-#endif
+			if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) ||
+			    (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
+				return -EINVAL;
+			t = netdev_priv(dev);
 		}
-		err = 0;
-		break;
 
-	case SIOCADDTUNNEL:
-	case SIOCCHGTUNNEL:
-		err = -EPERM;
-		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
-			goto done;
+		ipip6_tunnel_update(t, &p, t->fwmark);
+	}
 
-		err = -EFAULT;
-		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
-			goto done;
-
-		err = -EINVAL;
-		if (!ipip6_valid_ip_proto(p.iph.protocol))
-			goto done;
-		if (p.iph.version != 4 ||
-		    p.iph.ihl != 5 || (p.iph.frag_off&htons(~IP_DF)))
-			goto done;
-		if (p.iph.ttl)
-			p.iph.frag_off |= htons(IP_DF);
-
-		t = ipip6_tunnel_locate(net, &p, cmd == SIOCADDTUNNEL);
-
-		if (dev != sitn->fb_tunnel_dev && cmd == SIOCCHGTUNNEL) {
-			if (t) {
-				if (t->dev != dev) {
-					err = -EEXIST;
-					break;
-				}
-			} else {
-				if (((dev->flags&IFF_POINTOPOINT) && !p.iph.daddr) ||
-				    (!(dev->flags&IFF_POINTOPOINT) && p.iph.daddr)) {
-					err = -EINVAL;
-					break;
-				}
-				t = netdev_priv(dev);
-			}
+	if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
+		return -EFAULT;
+	return 0;
+}
 
-			ipip6_tunnel_update(t, &p, t->fwmark);
-		}
+static int
+ipip6_tunnel_del(struct net_device *dev, struct ifreq *ifr)
+{
+	struct ip_tunnel *t = netdev_priv(dev);
+	struct ip_tunnel_parm p;
 
-		if (t) {
-			err = 0;
-			if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
-				err = -EFAULT;
-		} else
-			err = (cmd == SIOCADDTUNNEL ? -ENOBUFS : -ENOENT);
-		break;
+	if (!ns_capable(t->net->user_ns, CAP_NET_ADMIN))
+		return -EPERM;
 
-	case SIOCDELTUNNEL:
-		err = -EPERM;
-		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
-			goto done;
-
-		if (dev == sitn->fb_tunnel_dev) {
-			err = -EFAULT;
-			if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
-				goto done;
-			err = -ENOENT;
-			t = ipip6_tunnel_locate(net, &p, 0);
-			if (!t)
-				goto done;
-			err = -EPERM;
-			if (t == netdev_priv(sitn->fb_tunnel_dev))
-				goto done;
-			dev = t->dev;
-		}
-		unregister_netdevice(dev);
-		err = 0;
-		break;
+	if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) {
+		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
+			return -EFAULT;
+		t = ipip6_tunnel_locate(t->net, &p, 0);
+		if (!t)
+			return -ENOENT;
+		if (t == netdev_priv(dev_to_sit_net(dev)->fb_tunnel_dev))
+			return -EPERM;
+		dev = t->dev;
+	}
+	unregister_netdevice(dev);
+	return 0;
+}
 
+static int
+ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
+{
+	switch (cmd) {
+	case SIOCGETTUNNEL:
+		return ipip6_tunnel_get(dev, ifr);
+	case SIOCADDTUNNEL:
+		return ipip6_tunnel_add(dev, ifr);
+	case SIOCCHGTUNNEL:
+		return ipip6_tunnel_change(dev, ifr);
+	case SIOCDELTUNNEL:
+		return ipip6_tunnel_del(dev, ifr);
 	case SIOCGETPRL:
-		err = -EINVAL;
-		if (dev == sitn->fb_tunnel_dev)
-			goto done;
-		err = ipip6_tunnel_get_prl(t, ifr->ifr_ifru.ifru_data);
-		break;
-
+		return ipip6_tunnel_get_prl(dev, ifr);
 	case SIOCADDPRL:
 	case SIOCDELPRL:
 	case SIOCCHGPRL:
-		err = -EPERM;
-		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
-			goto done;
-		err = -EINVAL;
-		if (dev == sitn->fb_tunnel_dev)
-			goto done;
-		err = -EFAULT;
-		if (copy_from_user(&prl, ifr->ifr_ifru.ifru_data, sizeof(prl)))
-			goto done;
-
-		switch (cmd) {
-		case SIOCDELPRL:
-			err = ipip6_tunnel_del_prl(t, &prl);
-			break;
-		case SIOCADDPRL:
-		case SIOCCHGPRL:
-			err = ipip6_tunnel_add_prl(t, &prl, cmd == SIOCCHGPRL);
-			break;
-		}
-		dst_cache_reset(&t->dst_cache);
-		netdev_state_change(dev);
-		break;
-
+		return ipip6_tunnel_prl_ctl(dev, ifr, cmd);
 #ifdef CONFIG_IPV6_SIT_6RD
+	case SIOCGET6RD:
+		return ipip6_tunnel_get6rd(dev, ifr);
 	case SIOCADD6RD:
 	case SIOCCHG6RD:
 	case SIOCDEL6RD:
-		err = -EPERM;
-		if (!ns_capable(net->user_ns, CAP_NET_ADMIN))
-			goto done;
-
-		err = -EFAULT;
-		if (copy_from_user(&ip6rd, ifr->ifr_ifru.ifru_data,
-				   sizeof(ip6rd)))
-			goto done;
-
-		if (cmd != SIOCDEL6RD) {
-			err = ipip6_tunnel_update_6rd(t, &ip6rd);
-			if (err < 0)
-				goto done;
-		} else
-			ipip6_tunnel_clone_6rd(dev, sitn);
-
-		err = 0;
-		break;
+		return ipip6_tunnel_6rdctl(dev, ifr, cmd);
 #endif
-
 	default:
-		err = -EINVAL;
+		return -EINVAL;
 	}
-
-done:
-	return err;
 }
 
 static const struct net_device_ops ipip6_netdev_ops = {
-- 
cgit v1.2.3-59-g8ed1b


From f60fe2df931d6c0b41c3d4f8ec3f7a429f977a3f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 19 May 2020 15:03:16 +0200
Subject: sit: impement ->ndo_tunnel_ctl

Implement the ->ndo_tunnel_ctl method, and use ip_tunnel_ioctl to
handle userspace requests for the SIOCGETTUNNEL, SIOCADDTUNNEL,
SIOCCHGTUNNEL and SIOCDELTUNNEL ioctls.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/sit.c | 73 +++++++++++++++++++++++++++-------------------------------
 1 file changed, 34 insertions(+), 39 deletions(-)

diff --git a/net/ipv6/sit.c b/net/ipv6/sit.c
index 7c158fdc02da..1fbb4dfbb191 100644
--- a/net/ipv6/sit.c
+++ b/net/ipv6/sit.c
@@ -1267,60 +1267,45 @@ __ipip6_tunnel_ioctl_validate(struct net *net, struct ip_tunnel_parm *p)
 }
 
 static int
-ipip6_tunnel_get(struct net_device *dev, struct ifreq *ifr)
+ipip6_tunnel_get(struct net_device *dev, struct ip_tunnel_parm *p)
 {
 	struct ip_tunnel *t = netdev_priv(dev);
-	struct ip_tunnel_parm p;
 
-	if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) {
-		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
-			return -EFAULT;
-		t = ipip6_tunnel_locate(t->net, &p, 0);
-	}
+	if (dev == dev_to_sit_net(dev)->fb_tunnel_dev)
+		t = ipip6_tunnel_locate(t->net, p, 0);
 	if (!t)
 		t = netdev_priv(dev);
-
-	if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
-		return -EFAULT;
+	memcpy(p, &t->parms, sizeof(*p));
 	return 0;
 }
 
 static int
-ipip6_tunnel_add(struct net_device *dev, struct ifreq *ifr)
+ipip6_tunnel_add(struct net_device *dev, struct ip_tunnel_parm *p)
 {
 	struct ip_tunnel *t = netdev_priv(dev);
-	struct ip_tunnel_parm p;
 	int err;
 
-	if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
-		return -EFAULT;
-	err = __ipip6_tunnel_ioctl_validate(t->net, &p);
+	err = __ipip6_tunnel_ioctl_validate(t->net, p);
 	if (err)
 		return err;
 
-	t = ipip6_tunnel_locate(t->net, &p, 1);
+	t = ipip6_tunnel_locate(t->net, p, 1);
 	if (!t)
 		return -ENOBUFS;
-
-	if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
-		return -EFAULT;
 	return 0;
 }
 
 static int
-ipip6_tunnel_change(struct net_device *dev, struct ifreq *ifr)
+ipip6_tunnel_change(struct net_device *dev, struct ip_tunnel_parm *p)
 {
 	struct ip_tunnel *t = netdev_priv(dev);
-	struct ip_tunnel_parm p;
 	int err;
 
-	if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
-		return -EFAULT;
-	err = __ipip6_tunnel_ioctl_validate(t->net, &p);
+	err = __ipip6_tunnel_ioctl_validate(t->net, p);
 	if (err)
 		return err;
 
-	t = ipip6_tunnel_locate(t->net, &p, 0);
+	t = ipip6_tunnel_locate(t->net, p, 0);
 	if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) {
 		if (!t)
 			return -ENOENT;
@@ -1329,33 +1314,28 @@ ipip6_tunnel_change(struct net_device *dev, struct ifreq *ifr)
 			if (t->dev != dev)
 				return -EEXIST;
 		} else {
-			if (((dev->flags & IFF_POINTOPOINT) && !p.iph.daddr) ||
-			    (!(dev->flags & IFF_POINTOPOINT) && p.iph.daddr))
+			if (((dev->flags & IFF_POINTOPOINT) && !p->iph.daddr) ||
+			    (!(dev->flags & IFF_POINTOPOINT) && p->iph.daddr))
 				return -EINVAL;
 			t = netdev_priv(dev);
 		}
 
-		ipip6_tunnel_update(t, &p, t->fwmark);
+		ipip6_tunnel_update(t, p, t->fwmark);
 	}
 
-	if (copy_to_user(ifr->ifr_ifru.ifru_data, &t->parms, sizeof(p)))
-		return -EFAULT;
 	return 0;
 }
 
 static int
-ipip6_tunnel_del(struct net_device *dev, struct ifreq *ifr)
+ipip6_tunnel_del(struct net_device *dev, struct ip_tunnel_parm *p)
 {
 	struct ip_tunnel *t = netdev_priv(dev);
-	struct ip_tunnel_parm p;
 
 	if (!ns_capable(t->net->user_ns, CAP_NET_ADMIN))
 		return -EPERM;
 
 	if (dev == dev_to_sit_net(dev)->fb_tunnel_dev) {
-		if (copy_from_user(&p, ifr->ifr_ifru.ifru_data, sizeof(p)))
-			return -EFAULT;
-		t = ipip6_tunnel_locate(t->net, &p, 0);
+		t = ipip6_tunnel_locate(t->net, p, 0);
 		if (!t)
 			return -ENOENT;
 		if (t == netdev_priv(dev_to_sit_net(dev)->fb_tunnel_dev))
@@ -1366,18 +1346,32 @@ ipip6_tunnel_del(struct net_device *dev, struct ifreq *ifr)
 	return 0;
 }
 
+static int
+ipip6_tunnel_ctl(struct net_device *dev, struct ip_tunnel_parm *p, int cmd)
+{
+	switch (cmd) {
+	case SIOCGETTUNNEL:
+		return ipip6_tunnel_get(dev, p);
+	case SIOCADDTUNNEL:
+		return ipip6_tunnel_add(dev, p);
+	case SIOCCHGTUNNEL:
+		return ipip6_tunnel_change(dev, p);
+	case SIOCDELTUNNEL:
+		return ipip6_tunnel_del(dev, p);
+	default:
+		return -EINVAL;
+	}
+}
+
 static int
 ipip6_tunnel_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 {
 	switch (cmd) {
 	case SIOCGETTUNNEL:
-		return ipip6_tunnel_get(dev, ifr);
 	case SIOCADDTUNNEL:
-		return ipip6_tunnel_add(dev, ifr);
 	case SIOCCHGTUNNEL:
-		return ipip6_tunnel_change(dev, ifr);
 	case SIOCDELTUNNEL:
-		return ipip6_tunnel_del(dev, ifr);
+		return ip_tunnel_ioctl(dev, ifr, cmd);
 	case SIOCGETPRL:
 		return ipip6_tunnel_get_prl(dev, ifr);
 	case SIOCADDPRL:
@@ -1404,6 +1398,7 @@ static const struct net_device_ops ipip6_netdev_ops = {
 	.ndo_do_ioctl	= ipip6_tunnel_ioctl,
 	.ndo_get_stats64 = ip_tunnel_get_stats64,
 	.ndo_get_iflink = ip_tunnel_get_iflink,
+	.ndo_tunnel_ctl = ipip6_tunnel_ctl,
 };
 
 static void ipip6_dev_free(struct net_device *dev)
-- 
cgit v1.2.3-59-g8ed1b


From f098846044c9edb359bf2dae4bcf0d537dda22fe Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 19 May 2020 15:03:17 +0200
Subject: ipv6: stub out even more of addrconf_set_dstaddr if SIT is disabled

There is no point in copying the structure from userspace or looking up
a device if SIT support is not disabled and we'll eventually return
-ENODEV anyway.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index ab7e839753ae..8300176f91e7 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2794,6 +2794,9 @@ int addrconf_set_dstaddr(struct net *net, void __user *arg)
 	struct net_device *dev;
 	int err = -EINVAL;
 
+	if (!IS_ENABLED(CONFIG_IPV6_SIT))
+		return -ENODEV;
+
 	rtnl_lock();
 
 	err = -EFAULT;
@@ -2806,7 +2809,6 @@ int addrconf_set_dstaddr(struct net *net, void __user *arg)
 	if (!dev)
 		goto err_exit;
 
-#if IS_ENABLED(CONFIG_IPV6_SIT)
 	if (dev->type == ARPHRD_SIT) {
 		const struct net_device_ops *ops = dev->netdev_ops;
 		struct ifreq ifr;
@@ -2842,7 +2844,6 @@ int addrconf_set_dstaddr(struct net *net, void __user *arg)
 			err = dev_open(dev, NULL);
 		}
 	}
-#endif
 
 err_exit:
 	rtnl_unlock();
-- 
cgit v1.2.3-59-g8ed1b


From 68ad6886dd3fb7d0b336363a90ace55b22f0dcb9 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 19 May 2020 15:03:18 +0200
Subject: ipv6: streamline addrconf_set_dstaddr

Factor out a addrconf_set_sit_dstaddr helper for the actual work if we
found a SIT device, and only hold the rtnl lock around the device lookup
and that new helper, as there is no point in holding it over a
copy_from_user call.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 87 +++++++++++++++++++++++------------------------------
 1 file changed, 38 insertions(+), 49 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index 8300176f91e7..c827edf87741 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2783,6 +2783,38 @@ put:
 	in6_dev_put(in6_dev);
 }
 
+static int addrconf_set_sit_dstaddr(struct net *net, struct net_device *dev,
+		struct in6_ifreq *ireq)
+{
+	struct ip_tunnel_parm p = { };
+	mm_segment_t oldfs = get_fs();
+	struct ifreq ifr;
+	int err;
+
+	if (!(ipv6_addr_type(&ireq->ifr6_addr) & IPV6_ADDR_COMPATv4))
+		return -EADDRNOTAVAIL;
+
+	p.iph.daddr = ireq->ifr6_addr.s6_addr32[3];
+	p.iph.version = 4;
+	p.iph.ihl = 5;
+	p.iph.protocol = IPPROTO_IPV6;
+	p.iph.ttl = 64;
+	ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
+
+	if (!dev->netdev_ops->ndo_do_ioctl)
+		return -EOPNOTSUPP;
+	set_fs(KERNEL_DS);
+	err = dev->netdev_ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
+	set_fs(oldfs);
+	if (err)
+		return err;
+
+	dev = __dev_get_by_name(net, p.name);
+	if (!dev)
+		return -ENOBUFS;
+	return dev_open(dev, NULL);
+}
+
 /*
  *	Set destination address.
  *	Special case for SIT interfaces where we create a new "virtual"
@@ -2790,62 +2822,19 @@ put:
  */
 int addrconf_set_dstaddr(struct net *net, void __user *arg)
 {
-	struct in6_ifreq ireq;
 	struct net_device *dev;
-	int err = -EINVAL;
+	struct in6_ifreq ireq;
+	int err = -ENODEV;
 
 	if (!IS_ENABLED(CONFIG_IPV6_SIT))
 		return -ENODEV;
-
-	rtnl_lock();
-
-	err = -EFAULT;
 	if (copy_from_user(&ireq, arg, sizeof(struct in6_ifreq)))
-		goto err_exit;
+		return -EFAULT;
 
+	rtnl_lock();
 	dev = __dev_get_by_index(net, ireq.ifr6_ifindex);
-
-	err = -ENODEV;
-	if (!dev)
-		goto err_exit;
-
-	if (dev->type == ARPHRD_SIT) {
-		const struct net_device_ops *ops = dev->netdev_ops;
-		struct ifreq ifr;
-		struct ip_tunnel_parm p;
-
-		err = -EADDRNOTAVAIL;
-		if (!(ipv6_addr_type(&ireq.ifr6_addr) & IPV6_ADDR_COMPATv4))
-			goto err_exit;
-
-		memset(&p, 0, sizeof(p));
-		p.iph.daddr = ireq.ifr6_addr.s6_addr32[3];
-		p.iph.saddr = 0;
-		p.iph.version = 4;
-		p.iph.ihl = 5;
-		p.iph.protocol = IPPROTO_IPV6;
-		p.iph.ttl = 64;
-		ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
-
-		if (ops->ndo_do_ioctl) {
-			mm_segment_t oldfs = get_fs();
-
-			set_fs(KERNEL_DS);
-			err = ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
-			set_fs(oldfs);
-		} else
-			err = -EOPNOTSUPP;
-
-		if (err == 0) {
-			err = -ENOBUFS;
-			dev = __dev_get_by_name(net, p.name);
-			if (!dev)
-				goto err_exit;
-			err = dev_open(dev, NULL);
-		}
-	}
-
-err_exit:
+	if (dev && dev->type == ARPHRD_SIT)
+		err = addrconf_set_sit_dstaddr(net, dev, &ireq);
 	rtnl_unlock();
 	return err;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 8e3db0bbb29aa8d135341c5327bae738e93932a7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 19 May 2020 15:03:19 +0200
Subject: ipv6: use ->ndo_tunnel_ctl in addrconf_set_dstaddr

Use the new ->ndo_tunnel_ctl instead of overriding the address limit
and using ->ndo_do_ioctl just to do a pointless user copy.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/addrconf.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/net/ipv6/addrconf.c b/net/ipv6/addrconf.c
index c827edf87741..09cfbf5dd7ce 100644
--- a/net/ipv6/addrconf.c
+++ b/net/ipv6/addrconf.c
@@ -2787,8 +2787,6 @@ static int addrconf_set_sit_dstaddr(struct net *net, struct net_device *dev,
 		struct in6_ifreq *ireq)
 {
 	struct ip_tunnel_parm p = { };
-	mm_segment_t oldfs = get_fs();
-	struct ifreq ifr;
 	int err;
 
 	if (!(ipv6_addr_type(&ireq->ifr6_addr) & IPV6_ADDR_COMPATv4))
@@ -2799,13 +2797,10 @@ static int addrconf_set_sit_dstaddr(struct net *net, struct net_device *dev,
 	p.iph.ihl = 5;
 	p.iph.protocol = IPPROTO_IPV6;
 	p.iph.ttl = 64;
-	ifr.ifr_ifru.ifru_data = (__force void __user *)&p;
 
-	if (!dev->netdev_ops->ndo_do_ioctl)
+	if (!dev->netdev_ops->ndo_tunnel_ctl)
 		return -EOPNOTSUPP;
-	set_fs(KERNEL_DS);
-	err = dev->netdev_ops->ndo_do_ioctl(dev, &ifr, SIOCADDTUNNEL);
-	set_fs(oldfs);
+	err = dev->netdev_ops->ndo_tunnel_ctl(dev, &p, SIOCADDTUNNEL);
 	if (err)
 		return err;
 
-- 
cgit v1.2.3-59-g8ed1b


From aa7ca7266fc49966844ac1f868085a49092b9b5d Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Tue, 7 Apr 2020 14:07:10 -0700
Subject: igc: Refactor ethertype filtering code

The whole ethertype filtering code is implemented in igc_ethtool.c and
mixes logic from ethtool and core parts. This patch refactors it so core
logic is moved to igc_main.c, aligning the ethertype filtering code
organization with the rest of the filtering code from the driver (MAC
address and VLAN priority).

Besides moving code to igc_main.c, this patch also does some minor
improvements to the code. Below are some highlights.

In case all filters are already in use and the user tries to add another
filter, we return -ENOSPC instead of -EINVAL so a more meaningful error
code is provided. This also aligns with the behavior implemented in MAC
address filtering code.

With this code refactoring, 'etype_bitmap' array in struct igc_adapter
and 'etype_reg_index' in struct igc_nfc_filter are not needed anymore
and are removed.

Log messages are added to help debugging the ethertype filtering code.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc.h         |  4 +-
 drivers/net/ethernet/intel/igc/igc_ethtool.c | 67 +++-----------------
 drivers/net/ethernet/intel/igc/igc_main.c    | 93 ++++++++++++++++++++++++++++
 3 files changed, 103 insertions(+), 61 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index 8389569aea8a..812e1cd695cf 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -189,7 +189,6 @@ struct igc_adapter {
 
 	/* lock for RX network flow classification filter */
 	spinlock_t nfc_lock;
-	bool etype_bitmap[MAX_ETYPE_FILTER];
 
 	struct igc_mac_addr *mac_table;
 
@@ -238,6 +237,8 @@ int igc_del_mac_filter(struct igc_adapter *adapter, const u8 *addr,
 int igc_add_vlan_prio_filter(struct igc_adapter *adapter, int prio,
 			     int queue);
 void igc_del_vlan_prio_filter(struct igc_adapter *adapter, int prio);
+int igc_add_etype_filter(struct igc_adapter *adapter, u16 etype, int queue);
+int igc_del_etype_filter(struct igc_adapter *adapter, u16 etype);
 void igc_update_stats(struct igc_adapter *adapter);
 
 /* igc_dump declarations */
@@ -466,7 +467,6 @@ struct igc_nfc_filter {
 	struct hlist_node nfc_node;
 	struct igc_nfc_input filter;
 	unsigned long cookie;
-	u16 etype_reg_index;
 	u16 sw_idx;
 	u16 action;
 };
diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index c5be8b936963..3cdb88a5eb01 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -1183,46 +1183,6 @@ static int igc_set_rss_hash_opt(struct igc_adapter *adapter,
 	return 0;
 }
 
-static int igc_rxnfc_write_etype_filter(struct igc_adapter *adapter,
-					struct igc_nfc_filter *input)
-{
-	struct igc_hw *hw = &adapter->hw;
-	u8 i;
-	u32 etqf;
-	u16 etype;
-
-	/* find an empty etype filter register */
-	for (i = 0; i < MAX_ETYPE_FILTER; ++i) {
-		if (!adapter->etype_bitmap[i])
-			break;
-	}
-	if (i == MAX_ETYPE_FILTER) {
-		netdev_err(adapter->netdev,
-			   "ethtool -N: etype filters are all used\n");
-		return -EINVAL;
-	}
-
-	adapter->etype_bitmap[i] = true;
-
-	etqf = rd32(IGC_ETQF(i));
-	etype = ntohs(input->filter.etype & ETHER_TYPE_FULL_MASK);
-
-	etqf |= IGC_ETQF_FILTER_ENABLE;
-	etqf &= ~IGC_ETQF_ETYPE_MASK;
-	etqf |= (etype & IGC_ETQF_ETYPE_MASK);
-
-	etqf &= ~IGC_ETQF_QUEUE_MASK;
-	etqf |= ((input->action << IGC_ETQF_QUEUE_SHIFT)
-		& IGC_ETQF_QUEUE_MASK);
-	etqf |= IGC_ETQF_QUEUE_ENABLE;
-
-	wr32(IGC_ETQF(i), etqf);
-
-	input->etype_reg_index = i;
-
-	return 0;
-}
-
 int igc_add_filter(struct igc_adapter *adapter, struct igc_nfc_filter *input)
 {
 	struct igc_hw *hw = &adapter->hw;
@@ -1236,7 +1196,9 @@ int igc_add_filter(struct igc_adapter *adapter, struct igc_nfc_filter *input)
 	}
 
 	if (input->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) {
-		err = igc_rxnfc_write_etype_filter(adapter, input);
+		u16 etype = ntohs(input->filter.etype);
+
+		err = igc_add_etype_filter(adapter, etype, input->action);
 		if (err)
 			return err;
 	}
@@ -1267,26 +1229,13 @@ int igc_add_filter(struct igc_adapter *adapter, struct igc_nfc_filter *input)
 	return 0;
 }
 
-static void igc_clear_etype_filter_regs(struct igc_adapter *adapter,
-					u16 reg_index)
-{
-	struct igc_hw *hw = &adapter->hw;
-	u32 etqf = rd32(IGC_ETQF(reg_index));
-
-	etqf &= ~IGC_ETQF_QUEUE_ENABLE;
-	etqf &= ~IGC_ETQF_QUEUE_MASK;
-	etqf &= ~IGC_ETQF_FILTER_ENABLE;
-
-	wr32(IGC_ETQF(reg_index), etqf);
-
-	adapter->etype_bitmap[reg_index] = false;
-}
-
 int igc_erase_filter(struct igc_adapter *adapter, struct igc_nfc_filter *input)
 {
-	if (input->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE)
-		igc_clear_etype_filter_regs(adapter,
-					    input->etype_reg_index);
+	if (input->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) {
+		u16 etype = ntohs(input->filter.etype);
+
+		igc_del_etype_filter(adapter, etype);
+	}
 
 	if (input->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) {
 		int prio = (ntohs(input->filter.vlan_tci) & VLAN_PRIO_MASK) >>
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 7e59c0393dbc..0df5617eb9d0 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -2366,6 +2366,99 @@ void igc_del_vlan_prio_filter(struct igc_adapter *adapter, int prio)
 		   prio);
 }
 
+static int igc_get_avail_etype_filter_slot(struct igc_adapter *adapter)
+{
+	struct igc_hw *hw = &adapter->hw;
+	int i;
+
+	for (i = 0; i < MAX_ETYPE_FILTER; i++) {
+		u32 etqf = rd32(IGC_ETQF(i));
+
+		if (!(etqf & IGC_ETQF_FILTER_ENABLE))
+			return i;
+	}
+
+	return -1;
+}
+
+/**
+ * igc_add_etype_filter() - Add ethertype filter
+ * @adapter: Pointer to adapter where the filter should be added
+ * @etype: Ethertype value
+ * @queue: If non-negative, queue assignment feature is enabled and frames
+ *         matching the filter are enqueued onto 'queue'. Otherwise, queue
+ *         assignment is disabled.
+ *
+ * Return: 0 in case of success, negative errno code otherwise.
+ */
+int igc_add_etype_filter(struct igc_adapter *adapter, u16 etype, int queue)
+{
+	struct igc_hw *hw = &adapter->hw;
+	int index;
+	u32 etqf;
+
+	index = igc_get_avail_etype_filter_slot(adapter);
+	if (index < 0)
+		return -ENOSPC;
+
+	etqf = rd32(IGC_ETQF(index));
+
+	etqf &= ~IGC_ETQF_ETYPE_MASK;
+	etqf |= etype;
+
+	if (queue >= 0) {
+		etqf &= ~IGC_ETQF_QUEUE_MASK;
+		etqf |= (queue << IGC_ETQF_QUEUE_SHIFT);
+		etqf |= IGC_ETQF_QUEUE_ENABLE;
+	}
+
+	etqf |= IGC_ETQF_FILTER_ENABLE;
+
+	wr32(IGC_ETQF(index), etqf);
+
+	netdev_dbg(adapter->netdev, "Add ethertype filter: etype %04x queue %d\n",
+		   etype, queue);
+	return 0;
+}
+
+static int igc_find_etype_filter(struct igc_adapter *adapter, u16 etype)
+{
+	struct igc_hw *hw = &adapter->hw;
+	int i;
+
+	for (i = 0; i < MAX_ETYPE_FILTER; i++) {
+		u32 etqf = rd32(IGC_ETQF(i));
+
+		if ((etqf & IGC_ETQF_ETYPE_MASK) == etype)
+			return i;
+	}
+
+	return -1;
+}
+
+/**
+ * igc_del_etype_filter() - Delete ethertype filter
+ * @adapter: Pointer to adapter where the filter should be deleted from
+ * @etype: Ethertype value
+ *
+ * Return: 0 in case of success, negative errno code otherwise.
+ */
+int igc_del_etype_filter(struct igc_adapter *adapter, u16 etype)
+{
+	struct igc_hw *hw = &adapter->hw;
+	int index;
+
+	index = igc_find_etype_filter(adapter, etype);
+	if (index < 0)
+		return -ENOENT;
+
+	wr32(IGC_ETQF(index), 0);
+
+	netdev_dbg(adapter->netdev, "Delete ethertype filter: etype %04x\n",
+		   etype);
+	return 0;
+}
+
 static int igc_uc_sync(struct net_device *netdev, const unsigned char *addr)
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
-- 
cgit v1.2.3-59-g8ed1b


From 81e330619e45fac4abeb01be1eb88f18c53aad46 Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Tue, 7 Apr 2020 14:07:11 -0700
Subject: igc: Dump ETQF registers

This patch adds the EType Queue Filter (ETQF) registers to the list of
registers dumped by igc_get_regs().

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_ethtool.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index 3cdb88a5eb01..c6586e2be3a8 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -318,6 +318,9 @@ static void igc_get_regs(struct net_device *netdev,
 		regs_buff[188 + i] = rd32(IGC_RAH(i));
 
 	regs_buff[204] = rd32(IGC_VLANPQF);
+
+	for (i = 0; i < 8; i++)
+		regs_buff[205 + i] = rd32(IGC_ETQF(i));
 }
 
 static void igc_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
-- 
cgit v1.2.3-59-g8ed1b


From 551555a7614abda5db60221c012d6c19a7a2d016 Mon Sep 17 00:00:00 2001
From: Sasha Neftin <sasha.neftin@intel.com>
Date: Sun, 12 Apr 2020 09:28:29 +0300
Subject: igc: Remove unused IGC_ICS_DRSTA define

Device reset assert for interrupt cause register not in
use for i225 device.
This patch comes to clean up this define.

Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_defines.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
index 6909826bc747..51d8a15e239c 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -244,7 +244,6 @@
 /* Interrupt Cause Set */
 #define IGC_ICS_LSC		IGC_ICR_LSC       /* Link Status Change */
 #define IGC_ICS_RXDMT0		IGC_ICR_RXDMT0    /* rx desc min. threshold */
-#define IGC_ICS_DRSTA		IGC_ICR_DRSTA     /* Device Reset Aserted */
 
 #define IGC_ICR_DOUTSYNC	0x10000000 /* NIC DMA out of sync */
 #define IGC_EITR_CNT_IGNR	0x80000000 /* Don't reset counters on write */
-- 
cgit v1.2.3-59-g8ed1b


From e5264212eb622fbc5704571c8e490c2b5fc718b9 Mon Sep 17 00:00:00 2001
From: Sasha Neftin <sasha.neftin@intel.com>
Date: Sun, 12 Apr 2020 17:21:58 +0300
Subject: igc: Remove unused registers

Tx data FIFO Head/Tail, Saved and Packet Count registers
not applicable for i225 LAN controller.
This patch comes to clean up these registers.

Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_dump.c | 4 ----
 drivers/net/ethernet/intel/igc/igc_regs.h | 5 -----
 2 files changed, 9 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_dump.c b/drivers/net/ethernet/intel/igc/igc_dump.c
index 4ad32d98f77f..4b9ec7d0b727 100644
--- a/drivers/net/ethernet/intel/igc/igc_dump.c
+++ b/drivers/net/ethernet/intel/igc/igc_dump.c
@@ -35,10 +35,6 @@ static const struct igc_reg_info igc_reg_info_tbl[] = {
 	{IGC_TDH(0), "TDH"},
 	{IGC_TDT(0), "TDT"},
 	{IGC_TXDCTL(0), "TXDCTL"},
-	{IGC_TDFH, "TDFH"},
-	{IGC_TDFT, "TDFT"},
-	{IGC_TDFHS, "TDFHS"},
-	{IGC_TDFPC, "TDFPC"},
 
 	/* List Terminator */
 	{}
diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h
index 763a24d52865..61db951f0947 100644
--- a/drivers/net/ethernet/intel/igc/igc_regs.h
+++ b/drivers/net/ethernet/intel/igc/igc_regs.h
@@ -17,11 +17,6 @@
 /* Internal Packet Buffer Size Registers */
 #define IGC_RXPBS		0x02404  /* Rx Packet Buffer Size - RW */
 #define IGC_TXPBS		0x03404  /* Tx Packet Buffer Size - RW */
-#define IGC_TDFH		0x03410  /* Tx Data FIFO Head - RW */
-#define IGC_TDFT		0x03418  /* Tx Data FIFO Tail - RW */
-#define IGC_TDFHS		0x03420  /* Tx Data FIFO Head Saved - RW */
-#define IGC_TDFTS		0x03428  /* Tx Data FIFO Tail Saved - RW */
-#define IGC_TDFPC		0x03430  /* Tx Data FIFO Packet Count - RW */
 
 /* NVM  Register Descriptions */
 #define IGC_EERD		0x12014  /* EEprom mode read - RW */
-- 
cgit v1.2.3-59-g8ed1b


From 4f65e2f483b6f764c15094d14dd53dda048a4048 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 19 May 2020 15:50:12 -0700
Subject: net: unexport skb_gro_receive()

skb_gro_receive() used to be used by SCTP, it is no longer the case.

skb_gro_receive_list() is in the same category : never used from modules.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/core/skbuff.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/net/core/skbuff.c b/net/core/skbuff.c
index 35a133c6d13b..b8afefe6f6b6 100644
--- a/net/core/skbuff.c
+++ b/net/core/skbuff.c
@@ -3727,7 +3727,6 @@ int skb_gro_receive_list(struct sk_buff *p, struct sk_buff *skb)
 
 	return 0;
 }
-EXPORT_SYMBOL_GPL(skb_gro_receive_list);
 
 /**
  *	skb_segment - Perform protocol segmentation on skb.
@@ -4191,7 +4190,6 @@ done:
 	NAPI_GRO_CB(skb)->same_flow = 1;
 	return 0;
 }
-EXPORT_SYMBOL_GPL(skb_gro_receive);
 
 #ifdef CONFIG_SKB_EXTENSIONS
 #define SKB_EXT_ALIGN_VALUE	8
-- 
cgit v1.2.3-59-g8ed1b


From 3ca44c16b0dcc764b641ee4ac226909f5c421aa3 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Tue, 19 May 2020 13:25:19 -0700
Subject: Bluetooth: Consolidate encryption handling in hci_encrypt_cfm

This makes hci_encrypt_cfm calls hci_connect_cfm in case the connection
state is BT_CONFIG so callers don't have to check the state.

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 include/net/bluetooth/hci_core.h | 20 ++++++++++++++++++--
 net/bluetooth/hci_event.c        | 28 +++-------------------------
 2 files changed, 21 insertions(+), 27 deletions(-)

diff --git a/include/net/bluetooth/hci_core.h b/include/net/bluetooth/hci_core.h
index 5dcf85f186c6..cdd4f1db8670 100644
--- a/include/net/bluetooth/hci_core.h
+++ b/include/net/bluetooth/hci_core.h
@@ -1381,10 +1381,26 @@ static inline void hci_auth_cfm(struct hci_conn *conn, __u8 status)
 		conn->security_cfm_cb(conn, status);
 }
 
-static inline void hci_encrypt_cfm(struct hci_conn *conn, __u8 status,
-								__u8 encrypt)
+static inline void hci_encrypt_cfm(struct hci_conn *conn, __u8 status)
 {
 	struct hci_cb *cb;
+	__u8 encrypt;
+
+	if (conn->state == BT_CONFIG) {
+		if (status)
+			conn->state = BT_CONNECTED;
+
+		hci_connect_cfm(conn, status);
+		hci_conn_drop(conn);
+		return;
+	}
+
+	if (!test_bit(HCI_CONN_ENCRYPT, &conn->flags))
+		encrypt = 0x00;
+	else if (test_bit(HCI_CONN_AES_CCM, &conn->flags))
+		encrypt = 0x02;
+	else
+		encrypt = 0x01;
 
 	if (conn->sec_level == BT_SECURITY_SDP)
 		conn->sec_level = BT_SECURITY_LOW;
diff --git a/net/bluetooth/hci_event.c b/net/bluetooth/hci_event.c
index f024b3d57a1c..cfeaee347db3 100644
--- a/net/bluetooth/hci_event.c
+++ b/net/bluetooth/hci_event.c
@@ -2931,7 +2931,7 @@ static void hci_auth_complete_evt(struct hci_dev *hdev, struct sk_buff *skb)
 				     &cp);
 		} else {
 			clear_bit(HCI_CONN_ENCRYPT_PEND, &conn->flags);
-			hci_encrypt_cfm(conn, ev->status, 0x00);
+			hci_encrypt_cfm(conn, ev->status);
 		}
 	}
 
@@ -3016,22 +3016,7 @@ static void read_enc_key_size_complete(struct hci_dev *hdev, u8 status,
 		conn->enc_key_size = rp->key_size;
 	}
 
-	if (conn->state == BT_CONFIG) {
-		conn->state = BT_CONNECTED;
-		hci_connect_cfm(conn, 0);
-		hci_conn_drop(conn);
-	} else {
-		u8 encrypt;
-
-		if (!test_bit(HCI_CONN_ENCRYPT, &conn->flags))
-			encrypt = 0x00;
-		else if (test_bit(HCI_CONN_AES_CCM, &conn->flags))
-			encrypt = 0x02;
-		else
-			encrypt = 0x01;
-
-		hci_encrypt_cfm(conn, 0, encrypt);
-	}
+	hci_encrypt_cfm(conn, 0);
 
 unlock:
 	hci_dev_unlock(hdev);
@@ -3149,14 +3134,7 @@ static void hci_encrypt_change_evt(struct hci_dev *hdev, struct sk_buff *skb)
 	}
 
 notify:
-	if (conn->state == BT_CONFIG) {
-		if (!ev->status)
-			conn->state = BT_CONNECTED;
-
-		hci_connect_cfm(conn, ev->status);
-		hci_conn_drop(conn);
-	} else
-		hci_encrypt_cfm(conn, ev->status, ev->encrypt);
+	hci_encrypt_cfm(conn, ev->status);
 
 unlock:
 	hci_dev_unlock(hdev);
-- 
cgit v1.2.3-59-g8ed1b


From 755dfcbca83710fa967d0efa7c5bb601f871a747 Mon Sep 17 00:00:00 2001
From: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Date: Tue, 19 May 2020 13:25:17 -0700
Subject: Bluetooth: Fix assuming EIR flags can result in SSP authentication

EIR flags should just hint if SSP may be supported but we shall verify
this with use of the actual features as the SSP bits may be disabled in
the lower layers which would result in legacy authentication to be
used.

Signed-off-by: Luiz Augusto von Dentz <luiz.von.dentz@intel.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/hci_conn.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/net/bluetooth/hci_conn.c b/net/bluetooth/hci_conn.c
index 07c34c55fc50..307800fd18e6 100644
--- a/net/bluetooth/hci_conn.c
+++ b/net/bluetooth/hci_conn.c
@@ -225,8 +225,6 @@ static void hci_acl_create_connection(struct hci_conn *conn)
 		}
 
 		memcpy(conn->dev_class, ie->data.dev_class, 3);
-		if (ie->data.ssp_mode > 0)
-			set_bit(HCI_CONN_SSP_ENABLED, &conn->flags);
 	}
 
 	cp.pkt_type = cpu_to_le16(conn->pkt_type);
-- 
cgit v1.2.3-59-g8ed1b


From 85e90d9391f57436b6f7f00503de370a657420ba Mon Sep 17 00:00:00 2001
From: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
Date: Wed, 20 May 2020 09:32:28 -0700
Subject: Bluetooth: hci_qca: Fix uninitialized access to hdev

hdev is always allocated and not only when power control is required.

Reported-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: Abhishek Pandit-Subedi <abhishekpandit@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/hci_qca.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
index 26efe822f6e5..e4a68238fcb9 100644
--- a/drivers/bluetooth/hci_qca.c
+++ b/drivers/bluetooth/hci_qca.c
@@ -1947,8 +1947,9 @@ static int qca_serdev_probe(struct serdev_device *serdev)
 		}
 	}
 
+	hdev = qcadev->serdev_hu.hdev;
+
 	if (power_ctrl_enabled) {
-		hdev = qcadev->serdev_hu.hdev;
 		set_bit(HCI_QUIRK_NON_PERSISTENT_SETUP, &hdev->quirks);
 		hdev->shutdown = qca_power_off;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From f78cdbd75a57245ecc68f5a40e470933426a082b Mon Sep 17 00:00:00 2001
From: John Hubbard <jhubbard@nvidia.com>
Date: Wed, 20 May 2020 12:41:47 -0700
Subject: rds: fix crash in rds_info_getsockopt()

The conversion to pin_user_pages() had a bug: it overlooked
the case of allocation of pages failing. Fix that by restoring
an equivalent check.

Reported-by: syzbot+118ac0af4ac7f785a45b@syzkaller.appspotmail.com
Fixes: dbfe7d74376e ("rds: convert get_user_pages() --> pin_user_pages()")

Cc: David S. Miller <davem@davemloft.net>
Cc: Jakub Kicinski <kuba@kernel.org>
Cc: netdev@vger.kernel.org
Cc: linux-rdma@vger.kernel.org
Cc: rds-devel@oss.oracle.com
Signed-off-by: John Hubbard <jhubbard@nvidia.com>
Acked-by: Santosh Shilimkar <santosh.shilimkar@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/rds/info.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/rds/info.c b/net/rds/info.c
index e1d63563e81c..b6b46a8214a0 100644
--- a/net/rds/info.c
+++ b/net/rds/info.c
@@ -234,7 +234,8 @@ call_func:
 		ret = -EFAULT;
 
 out:
-	unpin_user_pages(pages, nr_pages);
+	if (pages)
+		unpin_user_pages(pages, nr_pages);
 	kfree(pages);
 
 	return ret;
-- 
cgit v1.2.3-59-g8ed1b


From 63287de66df11308d239483415d67fe94079f47b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 9 May 2020 20:58:17 -0400
Subject: lift compat definitions of mcast [sg]etsockopt requests into
 net/compat.h

We want to get rid of compat_mc_[sg]etsockopt() and to have that stuff
handled without compat_alloc_user_space(), extra copying through
userland, etc.  To do that we'll need ipv4 and ipv6 instances of
->compat_[sg]etsockopt() to manipulate the 32bit variants of mcast
requests, so we need to move the definitions of those out of net/compat.c
and into a public header.

This patch just does a mechanical move to include/net/compat.h

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/net/compat.h | 24 ++++++++++++++++++++++++
 net/compat.c         | 25 -------------------------
 2 files changed, 24 insertions(+), 25 deletions(-)

diff --git a/include/net/compat.h b/include/net/compat.h
index 2b5e1f7ba153..69a8cd29c0ae 100644
--- a/include/net/compat.h
+++ b/include/net/compat.h
@@ -74,4 +74,28 @@ int compat_mc_getsockopt(struct sock *, int, int, char __user *, int __user *,
 			 int (*)(struct sock *, int, int, char __user *,
 				 int __user *));
 
+struct compat_group_req {
+	__u32				 gr_interface;
+	struct __kernel_sockaddr_storage gr_group
+		__aligned(4);
+} __packed;
+
+struct compat_group_source_req {
+	__u32				 gsr_interface;
+	struct __kernel_sockaddr_storage gsr_group
+		__aligned(4);
+	struct __kernel_sockaddr_storage gsr_source
+		__aligned(4);
+} __packed;
+
+struct compat_group_filter {
+	__u32				 gf_interface;
+	struct __kernel_sockaddr_storage gf_group
+		__aligned(4);
+	__u32				 gf_fmode;
+	__u32				 gf_numsrc;
+	struct __kernel_sockaddr_storage gf_slist[1]
+		__aligned(4);
+} __packed;
+
 #endif /* NET_COMPAT_H */
diff --git a/net/compat.c b/net/compat.c
index 69fc6d1e4e6e..032114de4fec 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -448,34 +448,9 @@ COMPAT_SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
 	return __compat_sys_getsockopt(fd, level, optname, optval, optlen);
 }
 
-struct compat_group_req {
-	__u32				 gr_interface;
-	struct __kernel_sockaddr_storage gr_group
-		__aligned(4);
-} __packed;
-
-struct compat_group_source_req {
-	__u32				 gsr_interface;
-	struct __kernel_sockaddr_storage gsr_group
-		__aligned(4);
-	struct __kernel_sockaddr_storage gsr_source
-		__aligned(4);
-} __packed;
-
-struct compat_group_filter {
-	__u32				 gf_interface;
-	struct __kernel_sockaddr_storage gf_group
-		__aligned(4);
-	__u32				 gf_fmode;
-	__u32				 gf_numsrc;
-	struct __kernel_sockaddr_storage gf_slist[1]
-		__aligned(4);
-} __packed;
-
 #define __COMPAT_GF0_SIZE (sizeof(struct compat_group_filter) - \
 			sizeof(struct __kernel_sockaddr_storage))
 
-
 int compat_mc_setsockopt(struct sock *sock, int level, int optname,
 	char __user *optval, unsigned int optlen,
 	int (*setsockopt)(struct sock *, int, int, char __user *, unsigned int))
-- 
cgit v1.2.3-59-g8ed1b


From e9c375fb5edeb550786d1436784db909bf672e9f Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sat, 9 May 2020 21:16:36 -0400
Subject: compat_ip{,v6}_setsockopt(): enumerate MCAST_... options explicitly

We want to check if optname is among the MCAST_... ones; do that as
an explicit switch.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 net/ipv4/ip_sockglue.c   | 10 +++++++++-
 net/ipv6/ipv6_sockglue.c | 10 +++++++++-
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 8206047d70b6..3c2c6cd3933b 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1272,9 +1272,17 @@ int compat_ip_setsockopt(struct sock *sk, int level, int optname,
 	if (level != SOL_IP)
 		return -ENOPROTOOPT;
 
-	if (optname >= MCAST_JOIN_GROUP && optname <= MCAST_MSFILTER)
+	switch (optname) {
+	case MCAST_JOIN_GROUP:
+	case MCAST_LEAVE_GROUP:
+	case MCAST_JOIN_SOURCE_GROUP:
+	case MCAST_LEAVE_SOURCE_GROUP:
+	case MCAST_BLOCK_SOURCE:
+	case MCAST_UNBLOCK_SOURCE:
+	case MCAST_MSFILTER:
 		return compat_mc_setsockopt(sk, level, optname, optval, optlen,
 			ip_setsockopt);
+	}
 
 	err = do_ip_setsockopt(sk, level, optname, optval, optlen);
 #ifdef CONFIG_NETFILTER
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index a0e50cc57e54..96e3f603c8d8 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -973,9 +973,17 @@ int compat_ipv6_setsockopt(struct sock *sk, int level, int optname,
 	if (level != SOL_IPV6)
 		return -ENOPROTOOPT;
 
-	if (optname >= MCAST_JOIN_GROUP && optname <= MCAST_MSFILTER)
+	switch (optname) {
+	case MCAST_JOIN_GROUP:
+	case MCAST_LEAVE_GROUP:
+	case MCAST_JOIN_SOURCE_GROUP:
+	case MCAST_LEAVE_SOURCE_GROUP:
+	case MCAST_BLOCK_SOURCE:
+	case MCAST_UNBLOCK_SOURCE:
+	case MCAST_MSFILTER:
 		return compat_mc_setsockopt(sk, level, optname, optval, optlen,
 			ipv6_setsockopt);
+	}
 
 	err = do_ipv6_setsockopt(sk, level, optname, optval, optlen);
 #ifdef CONFIG_NETFILTER
-- 
cgit v1.2.3-59-g8ed1b


From 931ca7ab7fe804d77bc6952f1512950c0d870f26 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 29 Mar 2020 17:18:30 -0400
Subject: ip*_mc_gsfget(): lift copyout of struct group_filter into callers

pass the userland pointer to the array in its tail, so that part
gets copied out by our functions; copyout of everything else is
done in the callers.  Rationale: reuse for compat; the array
is the same in native and compat, the layout of parts before it
is different for compat.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/linux/igmp.h     |  2 +-
 include/net/ipv6.h       |  2 +-
 net/ipv4/igmp.c          | 18 +++++-------------
 net/ipv4/ip_sockglue.c   | 19 ++++++++++++++-----
 net/ipv6/ipv6_sockglue.c | 18 ++++++++++++++----
 net/ipv6/mcast.c         | 10 +++-------
 6 files changed, 38 insertions(+), 31 deletions(-)

diff --git a/include/linux/igmp.h b/include/linux/igmp.h
index faa6586a5783..64ce8cd1cfaf 100644
--- a/include/linux/igmp.h
+++ b/include/linux/igmp.h
@@ -123,7 +123,7 @@ extern int ip_mc_msfilter(struct sock *sk, struct ip_msfilter *msf,int ifindex);
 extern int ip_mc_msfget(struct sock *sk, struct ip_msfilter *msf,
 		struct ip_msfilter __user *optval, int __user *optlen);
 extern int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
-		struct group_filter __user *optval, int __user *optlen);
+			struct sockaddr_storage __user *p);
 extern int ip_mc_sf_allow(struct sock *sk, __be32 local, __be32 rmt,
 			  int dif, int sdif);
 extern void ip_mc_init_dev(struct in_device *);
diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 5fc3a9d7b053..c45eb78d970f 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -1138,7 +1138,7 @@ int ip6_mc_source(int add, int omode, struct sock *sk,
 		  struct group_source_req *pgsr);
 int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf);
 int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
-		  struct group_filter __user *optval, int __user *optlen);
+		  struct sockaddr_storage __user *p);
 
 #ifdef CONFIG_PROC_FS
 int ac6_proc_init(struct net *net);
diff --git a/net/ipv4/igmp.c b/net/ipv4/igmp.c
index 47f0502b2101..7b272bbed2b4 100644
--- a/net/ipv4/igmp.c
+++ b/net/ipv4/igmp.c
@@ -2565,9 +2565,9 @@ done:
 }
 
 int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
-	struct group_filter __user *optval, int __user *optlen)
+	struct sockaddr_storage __user *p)
 {
-	int err, i, count, copycount;
+	int i, count, copycount;
 	struct sockaddr_in *psin;
 	__be32 addr;
 	struct ip_mc_socklist *pmc;
@@ -2583,37 +2583,29 @@ int ip_mc_gsfget(struct sock *sk, struct group_filter *gsf,
 	if (!ipv4_is_multicast(addr))
 		return -EINVAL;
 
-	err = -EADDRNOTAVAIL;
-
 	for_each_pmc_rtnl(inet, pmc) {
 		if (pmc->multi.imr_multiaddr.s_addr == addr &&
 		    pmc->multi.imr_ifindex == gsf->gf_interface)
 			break;
 	}
 	if (!pmc)		/* must have a prior join */
-		goto done;
+		return -EADDRNOTAVAIL;
 	gsf->gf_fmode = pmc->sfmode;
 	psl = rtnl_dereference(pmc->sflist);
 	count = psl ? psl->sl_count : 0;
 	copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc;
 	gsf->gf_numsrc = count;
-	if (put_user(GROUP_FILTER_SIZE(copycount), optlen) ||
-	    copy_to_user(optval, gsf, GROUP_FILTER_SIZE(0))) {
-		return -EFAULT;
-	}
-	for (i = 0; i < copycount; i++) {
+	for (i = 0; i < copycount; i++, p++) {
 		struct sockaddr_storage ss;
 
 		psin = (struct sockaddr_in *)&ss;
 		memset(&ss, 0, sizeof(ss));
 		psin->sin_family = AF_INET;
 		psin->sin_addr.s_addr = psl->sl_addr[i];
-		if (copy_to_user(&optval->gf_slist[i], &ss, sizeof(ss)))
+		if (copy_to_user(p, &ss, sizeof(ss)))
 			return -EFAULT;
 	}
 	return 0;
-done:
-	return err;
 }
 
 /*
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 3c2c6cd3933b..e3703a3e7ef4 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1473,19 +1473,28 @@ static int do_ip_getsockopt(struct sock *sk, int level, int optname,
 	}
 	case MCAST_MSFILTER:
 	{
+		struct group_filter __user *p = (void __user *)optval;
 		struct group_filter gsf;
+		const int size0 = offsetof(struct group_filter, gf_slist);
+		int num;
 
-		if (len < GROUP_FILTER_SIZE(0)) {
+		if (len < size0) {
 			err = -EINVAL;
 			goto out;
 		}
-		if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0))) {
+		if (copy_from_user(&gsf, p, size0)) {
 			err = -EFAULT;
 			goto out;
 		}
-		err = ip_mc_gsfget(sk, &gsf,
-				   (struct group_filter __user *)optval,
-				   optlen);
+		num = gsf.gf_numsrc;
+		err = ip_mc_gsfget(sk, &gsf, p->gf_slist);
+		if (err)
+			goto out;
+		if (gsf.gf_numsrc < num)
+			num = gsf.gf_numsrc;
+		if (put_user(GROUP_FILTER_SIZE(num), optlen) ||
+		    copy_to_user(p, &gsf, size0))
+			err = -EFAULT;
 		goto out;
 	}
 	case IP_MULTICAST_ALL:
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 96e3f603c8d8..e4a62ca1a3d0 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -1056,18 +1056,28 @@ static int do_ipv6_getsockopt(struct sock *sk, int level, int optname,
 		break;
 	case MCAST_MSFILTER:
 	{
+		struct group_filter __user *p = (void __user *)optval;
 		struct group_filter gsf;
+		const int size0 = offsetof(struct group_filter, gf_slist);
+		int num;
 		int err;
 
-		if (len < GROUP_FILTER_SIZE(0))
+		if (len < size0)
 			return -EINVAL;
-		if (copy_from_user(&gsf, optval, GROUP_FILTER_SIZE(0)))
+		if (copy_from_user(&gsf, p, size0))
 			return -EFAULT;
 		if (gsf.gf_group.ss_family != AF_INET6)
 			return -EADDRNOTAVAIL;
+		num = gsf.gf_numsrc;
 		lock_sock(sk);
-		err = ip6_mc_msfget(sk, &gsf,
-			(struct group_filter __user *)optval, optlen);
+		err = ip6_mc_msfget(sk, &gsf, p->gf_slist);
+		if (!err) {
+			if (num > gsf.gf_numsrc)
+				num = gsf.gf_numsrc;
+			if (put_user(GROUP_FILTER_SIZE(num), optlen) ||
+			    copy_to_user(p, &gsf, size0))
+				err = -EFAULT;
+		}
 		release_sock(sk);
 		return err;
 	}
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index eaa4c2cc2fbb..97d796c7d6c0 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -547,7 +547,7 @@ done:
 }
 
 int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
-	struct group_filter __user *optval, int __user *optlen)
+	struct sockaddr_storage *p)
 {
 	int err, i, count, copycount;
 	const struct in6_addr *group;
@@ -592,14 +592,10 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
 
 	copycount = count < gsf->gf_numsrc ? count : gsf->gf_numsrc;
 	gsf->gf_numsrc = count;
-	if (put_user(GROUP_FILTER_SIZE(copycount), optlen) ||
-	    copy_to_user(optval, gsf, GROUP_FILTER_SIZE(0))) {
-		return -EFAULT;
-	}
 	/* changes to psl require the socket lock, and a write lock
 	 * on pmc->sflock. We have the socket lock so reading here is safe.
 	 */
-	for (i = 0; i < copycount; i++) {
+	for (i = 0; i < copycount; i++, p++) {
 		struct sockaddr_in6 *psin6;
 		struct sockaddr_storage ss;
 
@@ -607,7 +603,7 @@ int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
 		memset(&ss, 0, sizeof(ss));
 		psin6->sin6_family = AF_INET6;
 		psin6->sin6_addr = psl->sl_addr[i];
-		if (copy_to_user(&optval->gf_slist[i], &ss, sizeof(ss)))
+		if (copy_to_user(p, &ss, sizeof(ss)))
 			return -EFAULT;
 	}
 	return 0;
-- 
cgit v1.2.3-59-g8ed1b


From 0dfe6581a7e35bafe5fc4d9b84edd0e66b4fd78a Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 29 Mar 2020 22:08:59 -0400
Subject: get rid of compat_mc_getsockopt()

now we can do MCAST_MSFILTER in compat ->getsockopt() without
playing silly buggers with copying things back and forth.
We can form a native struct group_filter (sans the variable-length
tail) on stack, pass that + pointer to the tail of original request
to the helper doing the bulk of the work, then do the rest of
copyout - same as the native getsockopt() does.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/net/compat.h     |  3 --
 net/compat.c             | 79 ------------------------------------------------
 net/ipv4/ip_sockglue.c   | 44 +++++++++++++++++++++++++--
 net/ipv6/ipv6_sockglue.c | 41 +++++++++++++++++++++++--
 4 files changed, 79 insertions(+), 88 deletions(-)

diff --git a/include/net/compat.h b/include/net/compat.h
index 69a8cd29c0ae..d714076d63d5 100644
--- a/include/net/compat.h
+++ b/include/net/compat.h
@@ -70,9 +70,6 @@ int cmsghdr_from_user_compat_to_kern(struct msghdr *, struct sock *,
 int compat_mc_setsockopt(struct sock *, int, int, char __user *, unsigned int,
 			 int (*)(struct sock *, int, int, char __user *,
 				 unsigned int));
-int compat_mc_getsockopt(struct sock *, int, int, char __user *, int __user *,
-			 int (*)(struct sock *, int, int, char __user *,
-				 int __user *));
 
 struct compat_group_req {
 	__u32				 gr_interface;
diff --git a/net/compat.c b/net/compat.c
index 032114de4fec..7bdfda2b382a 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -538,85 +538,6 @@ int compat_mc_setsockopt(struct sock *sock, int level, int optname,
 }
 EXPORT_SYMBOL(compat_mc_setsockopt);
 
-int compat_mc_getsockopt(struct sock *sock, int level, int optname,
-	char __user *optval, int __user *optlen,
-	int (*getsockopt)(struct sock *, int, int, char __user *, int __user *))
-{
-	struct compat_group_filter __user *gf32 = (void __user *)optval;
-	struct group_filter __user *kgf;
-	int __user	*koptlen;
-	u32 interface, fmode, numsrc;
-	int klen, ulen, err;
-
-	if (optname != MCAST_MSFILTER)
-		return getsockopt(sock, level, optname, optval, optlen);
-
-	koptlen = compat_alloc_user_space(sizeof(*koptlen));
-	if (!access_ok(optlen, sizeof(*optlen)) ||
-	    __get_user(ulen, optlen))
-		return -EFAULT;
-
-	/* adjust len for pad */
-	klen = ulen + sizeof(*kgf) - sizeof(*gf32);
-
-	if (klen < GROUP_FILTER_SIZE(0))
-		return -EINVAL;
-
-	if (!access_ok(koptlen, sizeof(*koptlen)) ||
-	    __put_user(klen, koptlen))
-		return -EFAULT;
-
-	/* have to allow space for previous compat_alloc_user_space, too */
-	kgf = compat_alloc_user_space(klen+sizeof(*optlen));
-
-	if (!access_ok(gf32, __COMPAT_GF0_SIZE) ||
-	    __get_user(interface, &gf32->gf_interface) ||
-	    __get_user(fmode, &gf32->gf_fmode) ||
-	    __get_user(numsrc, &gf32->gf_numsrc) ||
-	    __put_user(interface, &kgf->gf_interface) ||
-	    __put_user(fmode, &kgf->gf_fmode) ||
-	    __put_user(numsrc, &kgf->gf_numsrc) ||
-	    copy_in_user(&kgf->gf_group, &gf32->gf_group, sizeof(kgf->gf_group)))
-		return -EFAULT;
-
-	err = getsockopt(sock, level, optname, (char __user *)kgf, koptlen);
-	if (err)
-		return err;
-
-	if (!access_ok(koptlen, sizeof(*koptlen)) ||
-	    __get_user(klen, koptlen))
-		return -EFAULT;
-
-	ulen = klen - (sizeof(*kgf)-sizeof(*gf32));
-
-	if (!access_ok(optlen, sizeof(*optlen)) ||
-	    __put_user(ulen, optlen))
-		return -EFAULT;
-
-	if (!access_ok(kgf, klen) ||
-	    !access_ok(gf32, ulen) ||
-	    __get_user(interface, &kgf->gf_interface) ||
-	    __get_user(fmode, &kgf->gf_fmode) ||
-	    __get_user(numsrc, &kgf->gf_numsrc) ||
-	    __put_user(interface, &gf32->gf_interface) ||
-	    __put_user(fmode, &gf32->gf_fmode) ||
-	    __put_user(numsrc, &gf32->gf_numsrc))
-		return -EFAULT;
-	if (numsrc) {
-		int copylen;
-
-		klen -= GROUP_FILTER_SIZE(0);
-		copylen = numsrc * sizeof(gf32->gf_slist[0]);
-		if (copylen > klen)
-			copylen = klen;
-		if (copy_in_user(gf32->gf_slist, kgf->gf_slist, copylen))
-			return -EFAULT;
-	}
-	return err;
-}
-EXPORT_SYMBOL(compat_mc_getsockopt);
-
-
 /* Argument list sizes for compat_sys_socketcall */
 #define AL(x) ((x) * sizeof(u32))
 static unsigned char nas[21] = {
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index e3703a3e7ef4..65a30e7672ff 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1607,9 +1607,47 @@ int compat_ip_getsockopt(struct sock *sk, int level, int optname,
 {
 	int err;
 
-	if (optname == MCAST_MSFILTER)
-		return compat_mc_getsockopt(sk, level, optname, optval, optlen,
-			ip_getsockopt);
+	if (optname == MCAST_MSFILTER) {
+		const int size0 = offsetof(struct compat_group_filter, gf_slist);
+		struct compat_group_filter __user *p = (void __user *)optval;
+		struct compat_group_filter gf32;
+		struct group_filter gf;
+		int ulen, err;
+		int num;
+
+		if (level != SOL_IP)
+			return -EOPNOTSUPP;
+
+		if (get_user(ulen, optlen))
+			return -EFAULT;
+
+		if (ulen < size0)
+			return -EINVAL;
+
+		if (copy_from_user(&gf32, p, size0))
+			return -EFAULT;
+
+		gf.gf_interface = gf32.gf_interface;
+		gf.gf_fmode = gf32.gf_fmode;
+		num = gf.gf_numsrc = gf32.gf_numsrc;
+		gf.gf_group = gf32.gf_group;
+
+		rtnl_lock();
+		lock_sock(sk);
+		err = ip_mc_gsfget(sk, &gf, p->gf_slist);
+		release_sock(sk);
+		rtnl_unlock();
+		if (err)
+			return err;
+		if (gf.gf_numsrc < num)
+			num = gf.gf_numsrc;
+		ulen = GROUP_FILTER_SIZE(num) - (sizeof(gf) - sizeof(gf32));
+		if (put_user(ulen, optlen) ||
+		    put_user(gf.gf_fmode, &p->gf_fmode) ||
+		    put_user(gf.gf_numsrc, &p->gf_numsrc))
+			return -EFAULT;
+		return 0;
+	}
 
 	err = do_ip_getsockopt(sk, level, optname, optval, optlen,
 		MSG_CMSG_COMPAT);
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index e4a62ca1a3d0..0bbafe73bdde 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -1446,9 +1446,44 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname,
 	if (level != SOL_IPV6)
 		return -ENOPROTOOPT;
 
-	if (optname == MCAST_MSFILTER)
-		return compat_mc_getsockopt(sk, level, optname, optval, optlen,
-			ipv6_getsockopt);
+	if (optname == MCAST_MSFILTER) {
+		const int size0 = offsetof(struct compat_group_filter, gf_slist);
+		struct compat_group_filter __user *p = (void __user *)optval;
+		struct compat_group_filter gf32;
+		struct group_filter gf;
+		int ulen, err;
+		int num;
+
+		if (get_user(ulen, optlen))
+			return -EFAULT;
+
+		if (ulen < size0)
+			return -EINVAL;
+
+		if (copy_from_user(&gf32, p, size0))
+			return -EFAULT;
+
+		gf.gf_interface = gf32.gf_interface;
+		gf.gf_fmode = gf32.gf_fmode;
+		num = gf.gf_numsrc = gf32.gf_numsrc;
+		gf.gf_group = gf32.gf_group;
+
+		if (gf.gf_group.ss_family != AF_INET6)
+			return -EADDRNOTAVAIL;
+		lock_sock(sk);
+		err = ip6_mc_msfget(sk, &gf, p->gf_slist);
+		release_sock(sk);
+		if (err)
+			return err;
+		if (num > gf.gf_numsrc)
+			num = gf.gf_numsrc;
+		ulen = GROUP_FILTER_SIZE(num) - (sizeof(gf)-sizeof(gf32));
+		if (put_user(ulen, optlen) ||
+		    put_user(gf.gf_fmode, &p->gf_fmode) ||
+		    put_user(gf.gf_numsrc, &p->gf_numsrc))
+			return -EFAULT;
+		return 0;
+	}
 
 	err = do_ipv6_getsockopt(sk, level, optname, optval, optlen,
 				 MSG_CMSG_COMPAT);
-- 
cgit v1.2.3-59-g8ed1b


From e986d4dabcb8fab87b36b607bc710e42fe206baf Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 29 Mar 2020 22:37:56 -0400
Subject: set_mcast_msfilter(): take the guts of setsockopt(MCAST_MSFILTER)
 into a helper

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 net/ipv4/ip_sockglue.c | 73 +++++++++++++++++++++++++++-----------------------
 1 file changed, 40 insertions(+), 33 deletions(-)

diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 65a30e7672ff..cc0441157b02 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -587,6 +587,43 @@ static bool setsockopt_needs_rtnl(int optname)
 	return false;
 }
 
+static int set_mcast_msfilter(struct sock *sk, int ifindex,
+			      int numsrc, int fmode,
+			      struct sockaddr_storage *group,
+			      struct sockaddr_storage *list)
+{
+	int msize = IP_MSFILTER_SIZE(numsrc);
+	struct ip_msfilter *msf;
+	struct sockaddr_in *psin;
+	int err, i;
+
+	msf = kmalloc(msize, GFP_KERNEL);
+	if (!msf)
+		return -ENOBUFS;
+
+	psin = (struct sockaddr_in *)group;
+	if (psin->sin_family != AF_INET)
+		goto Eaddrnotavail;
+	msf->imsf_multiaddr = psin->sin_addr.s_addr;
+	msf->imsf_interface = 0;
+	msf->imsf_fmode = fmode;
+	msf->imsf_numsrc = numsrc;
+	for (i = 0; i < numsrc; ++i) {
+		psin = (struct sockaddr_in *)&list[i];
+
+		if (psin->sin_family != AF_INET)
+			goto Eaddrnotavail;
+		msf->imsf_slist[i] = psin->sin_addr.s_addr;
+	}
+	err = ip_mc_msfilter(sk, msf, ifindex);
+	kfree(msf);
+	return err;
+
+Eaddrnotavail:
+	kfree(msf);
+	return -EADDRNOTAVAIL;
+}
+
 static int do_ip_setsockopt(struct sock *sk, int level,
 			    int optname, char __user *optval, unsigned int optlen)
 {
@@ -1079,10 +1116,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 	}
 	case MCAST_MSFILTER:
 	{
-		struct sockaddr_in *psin;
-		struct ip_msfilter *msf = NULL;
 		struct group_filter *gsf = NULL;
-		int msize, i, ifindex;
 
 		if (optlen < GROUP_FILTER_SIZE(0))
 			goto e_inval;
@@ -1095,7 +1129,6 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 			err = PTR_ERR(gsf);
 			break;
 		}
-
 		/* numsrc >= (4G-140)/128 overflow in 32 bits */
 		if (gsf->gf_numsrc >= 0x1ffffff ||
 		    gsf->gf_numsrc > net->ipv4.sysctl_igmp_max_msf) {
@@ -1106,36 +1139,10 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 			err = -EINVAL;
 			goto mc_msf_out;
 		}
-		msize = IP_MSFILTER_SIZE(gsf->gf_numsrc);
-		msf = kmalloc(msize, GFP_KERNEL);
-		if (!msf) {
-			err = -ENOBUFS;
-			goto mc_msf_out;
-		}
-		ifindex = gsf->gf_interface;
-		psin = (struct sockaddr_in *)&gsf->gf_group;
-		if (psin->sin_family != AF_INET) {
-			err = -EADDRNOTAVAIL;
-			goto mc_msf_out;
-		}
-		msf->imsf_multiaddr = psin->sin_addr.s_addr;
-		msf->imsf_interface = 0;
-		msf->imsf_fmode = gsf->gf_fmode;
-		msf->imsf_numsrc = gsf->gf_numsrc;
-		err = -EADDRNOTAVAIL;
-		for (i = 0; i < gsf->gf_numsrc; ++i) {
-			psin = (struct sockaddr_in *)&gsf->gf_slist[i];
-
-			if (psin->sin_family != AF_INET)
-				goto mc_msf_out;
-			msf->imsf_slist[i] = psin->sin_addr.s_addr;
-		}
-		kfree(gsf);
-		gsf = NULL;
-
-		err = ip_mc_msfilter(sk, msf, ifindex);
+		err = set_mcast_msfilter(sk, gsf->gf_interface,
+					 gsf->gf_numsrc, gsf->gf_fmode,
+					 &gsf->gf_group, gsf->gf_slist);
 mc_msf_out:
-		kfree(msf);
 		kfree(gsf);
 		break;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 2e04172875c9daf929659eb5c3ef4b98fdf34396 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 30 Mar 2020 15:39:43 -0400
Subject: ipv4: do compat setsockopt for MCAST_MSFILTER directly

Parallel to what the native setsockopt() does, except that unlike
the native setsockopt() we do not use memdup_user() - we want
the sockaddr_storage fields properly aligned, so we allocate
4 bytes more and copy compat_group_filter at the offset 4,
which yields the proper alignments.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 net/ipv4/ip_sockglue.c | 48 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 47 insertions(+), 1 deletion(-)

diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index cc0441157b02..b6b889b5dacf 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1286,9 +1286,55 @@ int compat_ip_setsockopt(struct sock *sk, int level, int optname,
 	case MCAST_LEAVE_SOURCE_GROUP:
 	case MCAST_BLOCK_SOURCE:
 	case MCAST_UNBLOCK_SOURCE:
-	case MCAST_MSFILTER:
 		return compat_mc_setsockopt(sk, level, optname, optval, optlen,
 			ip_setsockopt);
+	case MCAST_MSFILTER:
+	{
+		const int size0 = offsetof(struct compat_group_filter, gf_slist);
+		struct compat_group_filter *gf32;
+		void *p;
+		int n;
+
+		if (optlen < size0)
+			return -EINVAL;
+		if (optlen > sysctl_optmem_max - 4)
+			return -ENOBUFS;
+
+		p = kmalloc(optlen + 4, GFP_KERNEL);
+		if (!p)
+			return -ENOMEM;
+		gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */
+		if (copy_from_user(gf32, optval, optlen)) {
+			err = -EFAULT;
+			goto mc_msf_out;
+		}
+
+		n = gf32->gf_numsrc;
+		/* numsrc >= (4G-140)/128 overflow in 32 bits */
+		if (n >= 0x1ffffff) {
+			err = -ENOBUFS;
+			goto mc_msf_out;
+		}
+		if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen) {
+			err = -EINVAL;
+			goto mc_msf_out;
+		}
+
+		rtnl_lock();
+		lock_sock(sk);
+		/* numsrc >= (4G-140)/128 overflow in 32 bits */
+		if (n > sock_net(sk)->ipv4.sysctl_igmp_max_msf)
+			err = -ENOBUFS;
+		else
+			err = set_mcast_msfilter(sk, gf32->gf_interface,
+						 n, gf32->gf_fmode,
+						 &gf32->gf_group, gf32->gf_slist);
+		release_sock(sk);
+		rtnl_unlock();
+mc_msf_out:
+		kfree(p);
+		return err;
+	}
 	}
 
 	err = do_ip_setsockopt(sk, level, optname, optval, optlen);
-- 
cgit v1.2.3-59-g8ed1b


From d59eb177c84f9572a6b51024c0b2611c3b5a27c5 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 30 Mar 2020 15:43:10 -0400
Subject: ip6_mc_msfilter(): pass the address list separately

that way we'll be able to reuse it for compat case

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/net/ipv6.h       | 3 ++-
 net/ipv6/ipv6_sockglue.c | 2 +-
 net/ipv6/mcast.c         | 7 ++++---
 3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index c45eb78d970f..39a00d3ef5e2 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -1136,7 +1136,8 @@ struct group_filter;
 
 int ip6_mc_source(int add, int omode, struct sock *sk,
 		  struct group_source_req *pgsr);
-int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf);
+int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf,
+		  struct sockaddr_storage *list);
 int ip6_mc_msfget(struct sock *sk, struct group_filter *gsf,
 		  struct sockaddr_storage __user *p);
 
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 0bbafe73bdde..7d3ecc0e69d1 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -780,7 +780,7 @@ done:
 			retv = -EINVAL;
 			break;
 		}
-		retv = ip6_mc_msfilter(sk, gsf);
+		retv = ip6_mc_msfilter(sk, gsf, gsf->gf_slist);
 		kfree(gsf);
 
 		break;
diff --git a/net/ipv6/mcast.c b/net/ipv6/mcast.c
index 97d796c7d6c0..7e12d2114158 100644
--- a/net/ipv6/mcast.c
+++ b/net/ipv6/mcast.c
@@ -457,7 +457,8 @@ done:
 	return err;
 }
 
-int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
+int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf,
+		    struct sockaddr_storage *list)
 {
 	const struct in6_addr *group;
 	struct ipv6_mc_socklist *pmc;
@@ -509,10 +510,10 @@ int ip6_mc_msfilter(struct sock *sk, struct group_filter *gsf)
 			goto done;
 		}
 		newpsl->sl_max = newpsl->sl_count = gsf->gf_numsrc;
-		for (i = 0; i < newpsl->sl_count; ++i) {
+		for (i = 0; i < newpsl->sl_count; ++i, ++list) {
 			struct sockaddr_in6 *psin6;
 
-			psin6 = (struct sockaddr_in6 *)&gsf->gf_slist[i];
+			psin6 = (struct sockaddr_in6 *)list;
 			newpsl->sl_addr[i] = psin6->sin6_addr;
 		}
 		err = ip6_mc_add_src(idev, group, gsf->gf_fmode,
-- 
cgit v1.2.3-59-g8ed1b


From 168a2cca81438aef819e43feb161614488dee97b Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 30 Mar 2020 16:40:04 -0400
Subject: ipv6: do compat setsockopt for MCAST_MSFILTER directly

similar to the ipv4 counterpart of that patch - the same
trick used to align the tail array properly.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 net/ipv6/ipv6_sockglue.c | 48 +++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 47 insertions(+), 1 deletion(-)

diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 7d3ecc0e69d1..2b5029df8f1e 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -980,9 +980,55 @@ int compat_ipv6_setsockopt(struct sock *sk, int level, int optname,
 	case MCAST_LEAVE_SOURCE_GROUP:
 	case MCAST_BLOCK_SOURCE:
 	case MCAST_UNBLOCK_SOURCE:
-	case MCAST_MSFILTER:
 		return compat_mc_setsockopt(sk, level, optname, optval, optlen,
 			ipv6_setsockopt);
+	case MCAST_MSFILTER:
+	{
+		const int size0 = offsetof(struct compat_group_filter, gf_slist);
+		struct compat_group_filter *gf32;
+		void *p;
+		int n;
+
+		if (optlen < size0)
+			return -EINVAL;
+		if (optlen > sysctl_optmem_max - 4)
+			return -ENOBUFS;
+
+		p = kmalloc(optlen + 4, GFP_KERNEL);
+		if (!p)
+			return -ENOMEM;
+
+		gf32 = p + 4; /* we want ->gf_group and ->gf_slist aligned */
+		if (copy_from_user(gf32, optval, optlen)) {
+			err = -EFAULT;
+			goto mc_msf_out;
+		}
+
+		n = gf32->gf_numsrc;
+		/* numsrc >= (4G-140)/128 overflow in 32 bits */
+		if (n >= 0x1ffffffU ||
+		    n > sysctl_mld_max_msf) {
+			err = -ENOBUFS;
+			goto mc_msf_out;
+		}
+		if (offsetof(struct compat_group_filter, gf_slist[n]) > optlen) {
+			err = -EINVAL;
+			goto mc_msf_out;
+		}
+
+		rtnl_lock();
+		lock_sock(sk);
+		err = ip6_mc_msfilter(sk, &(struct group_filter){
+				.gf_interface = gf32->gf_interface,
+				.gf_group = gf32->gf_group,
+				.gf_fmode = gf32->gf_fmode,
+				.gf_numsrc = gf32->gf_numsrc}, gf32->gf_slist);
+		release_sock(sk);
+		rtnl_unlock();
+mc_msf_out:
+		kfree(p);
+		return err;
+	}
 	}
 
 	err = do_ipv6_setsockopt(sk, level, optname, optval, optlen);
-- 
cgit v1.2.3-59-g8ed1b


From 2f984f11fdc06bcfd5bb528d07a93c20301dd068 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 26 Apr 2020 19:56:22 -0400
Subject: ipv[46]: do compat setsockopt for MCAST_{JOIN,LEAVE}_GROUP directly

direct parallel to the way these two are handled in the native
->setsockopt() instances - the helpers that do the real work
are already separated and can be reused as-is in this case.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 net/ipv4/ip_sockglue.c   | 31 +++++++++++++++++++++++++++++++
 net/ipv6/ipv6_sockglue.c | 28 ++++++++++++++++++++++++++++
 2 files changed, 59 insertions(+)

diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index b6b889b5dacf..34c3a43a9c98 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1282,6 +1282,37 @@ int compat_ip_setsockopt(struct sock *sk, int level, int optname,
 	switch (optname) {
 	case MCAST_JOIN_GROUP:
 	case MCAST_LEAVE_GROUP:
+	{
+		struct compat_group_req __user *gr32 = (void __user *)optval;
+		struct group_req greq;
+		struct sockaddr_in *psin = (struct sockaddr_in *)&greq.gr_group;
+		struct ip_mreqn mreq;
+
+		if (optlen < sizeof(struct compat_group_req))
+			return -EINVAL;
+
+		if (get_user(greq.gr_interface, &gr32->gr_interface) ||
+		    copy_from_user(&greq.gr_group, &gr32->gr_group,
+				sizeof(greq.gr_group)))
+			return -EFAULT;
+
+		if (psin->sin_family != AF_INET)
+			return -EINVAL;
+
+		memset(&mreq, 0, sizeof(mreq));
+		mreq.imr_multiaddr = psin->sin_addr;
+		mreq.imr_ifindex = greq.gr_interface;
+
+		rtnl_lock();
+		lock_sock(sk);
+		if (optname == MCAST_JOIN_GROUP)
+			err = ip_mc_join_group(sk, &mreq);
+		else
+			err = ip_mc_leave_group(sk, &mreq);
+		release_sock(sk);
+		rtnl_unlock();
+		return err;
+	}
 	case MCAST_JOIN_SOURCE_GROUP:
 	case MCAST_LEAVE_SOURCE_GROUP:
 	case MCAST_BLOCK_SOURCE:
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 2b5029df8f1e..209d827950cc 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -976,6 +976,34 @@ int compat_ipv6_setsockopt(struct sock *sk, int level, int optname,
 	switch (optname) {
 	case MCAST_JOIN_GROUP:
 	case MCAST_LEAVE_GROUP:
+	{
+		struct compat_group_req __user *gr32 = (void __user *)optval;
+		struct group_req greq;
+		struct sockaddr_in6 *psin6 = (struct sockaddr_in6 *)&greq.gr_group;
+
+		if (optlen < sizeof(struct compat_group_req))
+			return -EINVAL;
+
+		if (get_user(greq.gr_interface, &gr32->gr_interface) ||
+		    copy_from_user(&greq.gr_group, &gr32->gr_group,
+				sizeof(greq.gr_group)))
+			return -EFAULT;
+
+		if (greq.gr_group.ss_family != AF_INET6)
+			return -EADDRNOTAVAIL;
+
+		rtnl_lock();
+		lock_sock(sk);
+		if (optname == MCAST_JOIN_GROUP)
+			err = ipv6_sock_mc_join(sk, greq.gr_interface,
+						 &psin6->sin6_addr);
+		else
+			err = ipv6_sock_mc_drop(sk, greq.gr_interface,
+						 &psin6->sin6_addr);
+		release_sock(sk);
+		rtnl_unlock();
+		return err;
+	}
 	case MCAST_JOIN_SOURCE_GROUP:
 	case MCAST_LEAVE_SOURCE_GROUP:
 	case MCAST_BLOCK_SOURCE:
-- 
cgit v1.2.3-59-g8ed1b


From 2bbf8c1ead651c65bd0f7b6ba6d57cb09e2d1a57 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 27 Apr 2020 10:49:26 -0400
Subject: ipv4: take handling of group_source_req options into a helper

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 net/ipv4/ip_sockglue.c | 83 ++++++++++++++++++++++++++------------------------
 1 file changed, 44 insertions(+), 39 deletions(-)

diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 34c3a43a9c98..7f065a68664e 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -624,6 +624,49 @@ Eaddrnotavail:
 	return -EADDRNOTAVAIL;
 }
 
+static int do_mcast_group_source(struct sock *sk, int optname,
+				 struct group_source_req *greqs)
+{
+	struct ip_mreq_source mreqs;
+	struct sockaddr_in *psin;
+	int omode, add, err;
+
+	if (greqs->gsr_group.ss_family != AF_INET ||
+	    greqs->gsr_source.ss_family != AF_INET)
+		return -EADDRNOTAVAIL;
+
+	psin = (struct sockaddr_in *)&greqs->gsr_group;
+	mreqs.imr_multiaddr = psin->sin_addr.s_addr;
+	psin = (struct sockaddr_in *)&greqs->gsr_source;
+	mreqs.imr_sourceaddr = psin->sin_addr.s_addr;
+	mreqs.imr_interface = 0; /* use index for mc_source */
+
+	if (optname == MCAST_BLOCK_SOURCE) {
+		omode = MCAST_EXCLUDE;
+		add = 1;
+	} else if (optname == MCAST_UNBLOCK_SOURCE) {
+		omode = MCAST_EXCLUDE;
+		add = 0;
+	} else if (optname == MCAST_JOIN_SOURCE_GROUP) {
+		struct ip_mreqn mreq;
+
+		psin = (struct sockaddr_in *)&greqs->gsr_group;
+		mreq.imr_multiaddr = psin->sin_addr;
+		mreq.imr_address.s_addr = 0;
+		mreq.imr_ifindex = greqs->gsr_interface;
+		err = ip_mc_join_group_ssm(sk, &mreq, MCAST_INCLUDE);
+		if (err && err != -EADDRINUSE)
+			return err;
+		greqs->gsr_interface = mreq.imr_ifindex;
+		omode = MCAST_INCLUDE;
+		add = 1;
+	} else /* MCAST_LEAVE_SOURCE_GROUP */ {
+		omode = MCAST_INCLUDE;
+		add = 0;
+	}
+	return ip_mc_source(add, omode, sk, &mreqs, greqs->gsr_interface);
+}
+
 static int do_ip_setsockopt(struct sock *sk, int level,
 			    int optname, char __user *optval, unsigned int optlen)
 {
@@ -1066,9 +1109,6 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 	case MCAST_UNBLOCK_SOURCE:
 	{
 		struct group_source_req greqs;
-		struct ip_mreq_source mreqs;
-		struct sockaddr_in *psin;
-		int omode, add;
 
 		if (optlen != sizeof(struct group_source_req))
 			goto e_inval;
@@ -1076,42 +1116,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 			err = -EFAULT;
 			break;
 		}
-		if (greqs.gsr_group.ss_family != AF_INET ||
-		    greqs.gsr_source.ss_family != AF_INET) {
-			err = -EADDRNOTAVAIL;
-			break;
-		}
-		psin = (struct sockaddr_in *)&greqs.gsr_group;
-		mreqs.imr_multiaddr = psin->sin_addr.s_addr;
-		psin = (struct sockaddr_in *)&greqs.gsr_source;
-		mreqs.imr_sourceaddr = psin->sin_addr.s_addr;
-		mreqs.imr_interface = 0; /* use index for mc_source */
-
-		if (optname == MCAST_BLOCK_SOURCE) {
-			omode = MCAST_EXCLUDE;
-			add = 1;
-		} else if (optname == MCAST_UNBLOCK_SOURCE) {
-			omode = MCAST_EXCLUDE;
-			add = 0;
-		} else if (optname == MCAST_JOIN_SOURCE_GROUP) {
-			struct ip_mreqn mreq;
-
-			psin = (struct sockaddr_in *)&greqs.gsr_group;
-			mreq.imr_multiaddr = psin->sin_addr;
-			mreq.imr_address.s_addr = 0;
-			mreq.imr_ifindex = greqs.gsr_interface;
-			err = ip_mc_join_group_ssm(sk, &mreq, MCAST_INCLUDE);
-			if (err && err != -EADDRINUSE)
-				break;
-			greqs.gsr_interface = mreq.imr_ifindex;
-			omode = MCAST_INCLUDE;
-			add = 1;
-		} else /* MCAST_LEAVE_SOURCE_GROUP */ {
-			omode = MCAST_INCLUDE;
-			add = 0;
-		}
-		err = ip_mc_source(add, omode, sk, &mreqs,
-				   greqs.gsr_interface);
+		err = do_mcast_group_source(sk, optname, &greqs);
 		break;
 	}
 	case MCAST_MSFILTER:
-- 
cgit v1.2.3-59-g8ed1b


From fcfa0b09d3f794af66cf8bcee03ddb7055934742 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 27 Apr 2020 11:00:01 -0400
Subject: ipv6: take handling of group_source_req options into a helper

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 net/ipv6/ipv6_sockglue.c | 65 +++++++++++++++++++++++++++---------------------
 1 file changed, 36 insertions(+), 29 deletions(-)

diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index 209d827950cc..bb049feeb787 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -136,6 +136,41 @@ static bool setsockopt_needs_rtnl(int optname)
 	return false;
 }
 
+static int do_ipv6_mcast_group_source(struct sock *sk, int optname,
+				      struct group_source_req *greqs)
+{
+	int omode, add;
+
+	if (greqs->gsr_group.ss_family != AF_INET6 ||
+	    greqs->gsr_source.ss_family != AF_INET6)
+		return -EADDRNOTAVAIL;
+
+	if (optname == MCAST_BLOCK_SOURCE) {
+		omode = MCAST_EXCLUDE;
+		add = 1;
+	} else if (optname == MCAST_UNBLOCK_SOURCE) {
+		omode = MCAST_EXCLUDE;
+		add = 0;
+	} else if (optname == MCAST_JOIN_SOURCE_GROUP) {
+		struct sockaddr_in6 *psin6;
+		int retv;
+
+		psin6 = (struct sockaddr_in6 *)&greqs->gsr_group;
+		retv = ipv6_sock_mc_join_ssm(sk, greqs->gsr_interface,
+					     &psin6->sin6_addr,
+					     MCAST_INCLUDE);
+		/* prior join w/ different source is ok */
+		if (retv && retv != -EADDRINUSE)
+			return retv;
+		omode = MCAST_INCLUDE;
+		add = 1;
+	} else /* MCAST_LEAVE_SOURCE_GROUP */ {
+		omode = MCAST_INCLUDE;
+		add = 0;
+	}
+	return ip6_mc_source(add, omode, sk, greqs);
+}
+
 static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
 		    char __user *optval, unsigned int optlen)
 {
@@ -715,7 +750,6 @@ done:
 	case MCAST_UNBLOCK_SOURCE:
 	{
 		struct group_source_req greqs;
-		int omode, add;
 
 		if (optlen < sizeof(struct group_source_req))
 			goto e_inval;
@@ -723,34 +757,7 @@ done:
 			retv = -EFAULT;
 			break;
 		}
-		if (greqs.gsr_group.ss_family != AF_INET6 ||
-		    greqs.gsr_source.ss_family != AF_INET6) {
-			retv = -EADDRNOTAVAIL;
-			break;
-		}
-		if (optname == MCAST_BLOCK_SOURCE) {
-			omode = MCAST_EXCLUDE;
-			add = 1;
-		} else if (optname == MCAST_UNBLOCK_SOURCE) {
-			omode = MCAST_EXCLUDE;
-			add = 0;
-		} else if (optname == MCAST_JOIN_SOURCE_GROUP) {
-			struct sockaddr_in6 *psin6;
-
-			psin6 = (struct sockaddr_in6 *)&greqs.gsr_group;
-			retv = ipv6_sock_mc_join_ssm(sk, greqs.gsr_interface,
-						     &psin6->sin6_addr,
-						     MCAST_INCLUDE);
-			/* prior join w/ different source is ok */
-			if (retv && retv != -EADDRINUSE)
-				break;
-			omode = MCAST_INCLUDE;
-			add = 1;
-		} else /* MCAST_LEAVE_SOURCE_GROUP */ {
-			omode = MCAST_INCLUDE;
-			add = 0;
-		}
-		retv = ip6_mc_source(add, omode, sk, &greqs);
+		retv = do_ipv6_mcast_group_source(sk, optname, &greqs);
 		break;
 	}
 	case MCAST_MSFILTER:
-- 
cgit v1.2.3-59-g8ed1b


From b212c322c8d73c48062340dc4cbe150c4ce97fb8 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 27 Apr 2020 11:37:02 -0400
Subject: handle the group_source_req options directly

Native ->setsockopt() handling of these options (MCAST_..._SOURCE_GROUP
and MCAST_{,UN}BLOCK_SOURCE) consists of copyin + call of a helper that
does the actual work.  The only change needed for ->compat_setsockopt()
is a slightly different copyin - the helpers can be reused as-is.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 net/ipv4/ip_sockglue.c   | 23 +++++++++++++++++++++--
 net/ipv6/ipv6_sockglue.c | 23 +++++++++++++++++++++--
 2 files changed, 42 insertions(+), 4 deletions(-)

diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 7f065a68664e..a2469bc57cfe 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1322,8 +1322,27 @@ int compat_ip_setsockopt(struct sock *sk, int level, int optname,
 	case MCAST_LEAVE_SOURCE_GROUP:
 	case MCAST_BLOCK_SOURCE:
 	case MCAST_UNBLOCK_SOURCE:
-		return compat_mc_setsockopt(sk, level, optname, optval, optlen,
-			ip_setsockopt);
+	{
+		struct compat_group_source_req __user *gsr32 = (void __user *)optval;
+		struct group_source_req greqs;
+
+		if (optlen != sizeof(struct compat_group_source_req))
+			return -EINVAL;
+
+		if (get_user(greqs.gsr_interface, &gsr32->gsr_interface) ||
+		    copy_from_user(&greqs.gsr_group, &gsr32->gsr_group,
+				sizeof(greqs.gsr_group)) ||
+		    copy_from_user(&greqs.gsr_source, &gsr32->gsr_source,
+				sizeof(greqs.gsr_source)))
+			return -EFAULT;
+
+		rtnl_lock();
+		lock_sock(sk);
+		err = do_mcast_group_source(sk, optname, &greqs);
+		release_sock(sk);
+		rtnl_unlock();
+		return err;
+	}
 	case MCAST_MSFILTER:
 	{
 		const int size0 = offsetof(struct compat_group_filter, gf_slist);
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index bb049feeb787..e10258c2210e 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -1015,8 +1015,27 @@ int compat_ipv6_setsockopt(struct sock *sk, int level, int optname,
 	case MCAST_LEAVE_SOURCE_GROUP:
 	case MCAST_BLOCK_SOURCE:
 	case MCAST_UNBLOCK_SOURCE:
-		return compat_mc_setsockopt(sk, level, optname, optval, optlen,
-			ipv6_setsockopt);
+	{
+		struct compat_group_source_req __user *gsr32 = (void __user *)optval;
+		struct group_source_req greqs;
+
+		if (optlen < sizeof(struct compat_group_source_req))
+			return -EINVAL;
+
+		if (get_user(greqs.gsr_interface, &gsr32->gsr_interface) ||
+		    copy_from_user(&greqs.gsr_group, &gsr32->gsr_group,
+				sizeof(greqs.gsr_group)) ||
+		    copy_from_user(&greqs.gsr_source, &gsr32->gsr_source,
+				sizeof(greqs.gsr_source)))
+			return -EFAULT;
+
+		rtnl_lock();
+		lock_sock(sk);
+		err = do_ipv6_mcast_group_source(sk, optname, &greqs);
+		release_sock(sk);
+		rtnl_unlock();
+		return err;
+	}
 	case MCAST_MSFILTER:
 	{
 		const int size0 = offsetof(struct compat_group_filter, gf_slist);
-- 
cgit v1.2.3-59-g8ed1b


From bbced07d9952ca290e8de3957c75b8b401d7a867 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Mon, 27 Apr 2020 11:37:02 -0400
Subject: get rid of compat_mc_setsockopt()

not used anymore

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 include/net/compat.h |  4 ---
 net/compat.c         | 90 ----------------------------------------------------
 2 files changed, 94 deletions(-)

diff --git a/include/net/compat.h b/include/net/compat.h
index d714076d63d5..f241666117d8 100644
--- a/include/net/compat.h
+++ b/include/net/compat.h
@@ -67,10 +67,6 @@ int put_cmsg_compat(struct msghdr*, int, int, int, void *);
 int cmsghdr_from_user_compat_to_kern(struct msghdr *, struct sock *,
 				     unsigned char *, int);
 
-int compat_mc_setsockopt(struct sock *, int, int, char __user *, unsigned int,
-			 int (*)(struct sock *, int, int, char __user *,
-				 unsigned int));
-
 struct compat_group_req {
 	__u32				 gr_interface;
 	struct __kernel_sockaddr_storage gr_group
diff --git a/net/compat.c b/net/compat.c
index 7bdfda2b382a..afd7b444e0bf 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -448,96 +448,6 @@ COMPAT_SYSCALL_DEFINE5(getsockopt, int, fd, int, level, int, optname,
 	return __compat_sys_getsockopt(fd, level, optname, optval, optlen);
 }
 
-#define __COMPAT_GF0_SIZE (sizeof(struct compat_group_filter) - \
-			sizeof(struct __kernel_sockaddr_storage))
-
-int compat_mc_setsockopt(struct sock *sock, int level, int optname,
-	char __user *optval, unsigned int optlen,
-	int (*setsockopt)(struct sock *, int, int, char __user *, unsigned int))
-{
-	char __user	*koptval = optval;
-	int		koptlen = optlen;
-
-	switch (optname) {
-	case MCAST_JOIN_GROUP:
-	case MCAST_LEAVE_GROUP:
-	{
-		struct compat_group_req __user *gr32 = (void __user *)optval;
-		struct group_req __user *kgr =
-			compat_alloc_user_space(sizeof(struct group_req));
-		u32 interface;
-
-		if (!access_ok(gr32, sizeof(*gr32)) ||
-		    !access_ok(kgr, sizeof(struct group_req)) ||
-		    __get_user(interface, &gr32->gr_interface) ||
-		    __put_user(interface, &kgr->gr_interface) ||
-		    copy_in_user(&kgr->gr_group, &gr32->gr_group,
-				sizeof(kgr->gr_group)))
-			return -EFAULT;
-		koptval = (char __user *)kgr;
-		koptlen = sizeof(struct group_req);
-		break;
-	}
-	case MCAST_JOIN_SOURCE_GROUP:
-	case MCAST_LEAVE_SOURCE_GROUP:
-	case MCAST_BLOCK_SOURCE:
-	case MCAST_UNBLOCK_SOURCE:
-	{
-		struct compat_group_source_req __user *gsr32 = (void __user *)optval;
-		struct group_source_req __user *kgsr = compat_alloc_user_space(
-			sizeof(struct group_source_req));
-		u32 interface;
-
-		if (!access_ok(gsr32, sizeof(*gsr32)) ||
-		    !access_ok(kgsr,
-			sizeof(struct group_source_req)) ||
-		    __get_user(interface, &gsr32->gsr_interface) ||
-		    __put_user(interface, &kgsr->gsr_interface) ||
-		    copy_in_user(&kgsr->gsr_group, &gsr32->gsr_group,
-				sizeof(kgsr->gsr_group)) ||
-		    copy_in_user(&kgsr->gsr_source, &gsr32->gsr_source,
-				sizeof(kgsr->gsr_source)))
-			return -EFAULT;
-		koptval = (char __user *)kgsr;
-		koptlen = sizeof(struct group_source_req);
-		break;
-	}
-	case MCAST_MSFILTER:
-	{
-		struct compat_group_filter __user *gf32 = (void __user *)optval;
-		struct group_filter __user *kgf;
-		u32 interface, fmode, numsrc;
-
-		if (!access_ok(gf32, __COMPAT_GF0_SIZE) ||
-		    __get_user(interface, &gf32->gf_interface) ||
-		    __get_user(fmode, &gf32->gf_fmode) ||
-		    __get_user(numsrc, &gf32->gf_numsrc))
-			return -EFAULT;
-		koptlen = optlen + sizeof(struct group_filter) -
-				sizeof(struct compat_group_filter);
-		if (koptlen < GROUP_FILTER_SIZE(numsrc))
-			return -EINVAL;
-		kgf = compat_alloc_user_space(koptlen);
-		if (!access_ok(kgf, koptlen) ||
-		    __put_user(interface, &kgf->gf_interface) ||
-		    __put_user(fmode, &kgf->gf_fmode) ||
-		    __put_user(numsrc, &kgf->gf_numsrc) ||
-		    copy_in_user(&kgf->gf_group, &gf32->gf_group,
-				sizeof(kgf->gf_group)) ||
-		    (numsrc && copy_in_user(kgf->gf_slist, gf32->gf_slist,
-				numsrc * sizeof(kgf->gf_slist[0]))))
-			return -EFAULT;
-		koptval = (char __user *)kgf;
-		break;
-	}
-
-	default:
-		break;
-	}
-	return setsockopt(sock, level, optname, koptval, koptlen);
-}
-EXPORT_SYMBOL(compat_mc_setsockopt);
-
 /* Argument list sizes for compat_sys_socketcall */
 #define AL(x) ((x) * sizeof(u32))
 static unsigned char nas[21] = {
-- 
cgit v1.2.3-59-g8ed1b


From 38c53ca3c114fa2a7030f0d1f54feaf044957609 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Thu, 23 Apr 2020 10:19:04 -0400
Subject: batadv_socket_read(): get rid of pointless access_ok()

address is passed only to copy_to_user()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 net/batman-adv/icmp_socket.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/net/batman-adv/icmp_socket.c b/net/batman-adv/icmp_socket.c
index ccb535c77e5d..8bdabc03b0b2 100644
--- a/net/batman-adv/icmp_socket.c
+++ b/net/batman-adv/icmp_socket.c
@@ -135,9 +135,6 @@ static ssize_t batadv_socket_read(struct file *file, char __user *buf,
 	if (!buf || count < sizeof(struct batadv_icmp_packet))
 		return -EINVAL;
 
-	if (!access_ok(buf, count))
-		return -EFAULT;
-
 	error = wait_event_interruptible(socket_client->queue_wait,
 					 socket_client->queue_len);
 
-- 
cgit v1.2.3-59-g8ed1b


From 8c2348e36af0da79477b0726781da297263269a4 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 10 May 2020 17:20:49 -0400
Subject: atm: separate ATM_GETNAMES handling from the rest of atm_dev_ioctl()

atm_dev_ioctl() does copyin in two different ways - one for
ATM_GETNAMES, another for everything else.  Start with separating
the former into a new helper (atm_getnames()).  The next step
will be to lift the copyin into the callers.

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 net/atm/ioctl.c     |  6 +++-
 net/atm/resources.c | 88 +++++++++++++++++++++++++++--------------------------
 net/atm/resources.h |  1 +
 3 files changed, 51 insertions(+), 44 deletions(-)

diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c
index d955b683aa7c..0b4b07740fe4 100644
--- a/net/atm/ioctl.c
+++ b/net/atm/ioctl.c
@@ -162,7 +162,11 @@ static int do_vcc_ioctl(struct socket *sock, unsigned int cmd,
 	if (error != -ENOIOCTLCMD)
 		goto done;
 
-	error = atm_dev_ioctl(cmd, argp, compat);
+	if (cmd == ATM_GETNAMES) {
+		error = atm_getnames(argp, compat);
+	} else {
+		error = atm_dev_ioctl(cmd, argp, compat);
+	}
 
 done:
 	return error;
diff --git a/net/atm/resources.c b/net/atm/resources.c
index 889349c6d90d..a2ab75929eec 100644
--- a/net/atm/resources.c
+++ b/net/atm/resources.c
@@ -193,61 +193,63 @@ static int fetch_stats(struct atm_dev *dev, struct atm_dev_stats __user *arg,
 	return error ? -EFAULT : 0;
 }
 
-int atm_dev_ioctl(unsigned int cmd, void __user *arg, int compat)
+int atm_getnames(void __user *arg, int compat)
 {
 	void __user *buf;
-	int error, len, number, size = 0;
+	int error, len, size = 0;
 	struct atm_dev *dev;
 	struct list_head *p;
 	int *tmp_buf, *tmp_p;
-	int __user *sioc_len;
 	int __user *iobuf_len;
 
-	switch (cmd) {
-	case ATM_GETNAMES:
-		if (IS_ENABLED(CONFIG_COMPAT) && compat) {
+	if (IS_ENABLED(CONFIG_COMPAT) && compat) {
 #ifdef CONFIG_COMPAT
-			struct compat_atm_iobuf __user *ciobuf = arg;
-			compat_uptr_t cbuf;
-			iobuf_len = &ciobuf->length;
-			if (get_user(cbuf, &ciobuf->buffer))
-				return -EFAULT;
-			buf = compat_ptr(cbuf);
+		struct compat_atm_iobuf __user *ciobuf = arg;
+		compat_uptr_t cbuf;
+		iobuf_len = &ciobuf->length;
+		if (get_user(cbuf, &ciobuf->buffer))
+			return -EFAULT;
+		buf = compat_ptr(cbuf);
 #endif
-		} else {
-			struct atm_iobuf __user *iobuf = arg;
-			iobuf_len = &iobuf->length;
-			if (get_user(buf, &iobuf->buffer))
-				return -EFAULT;
-		}
-		if (get_user(len, iobuf_len))
+	} else {
+		struct atm_iobuf __user *iobuf = arg;
+		iobuf_len = &iobuf->length;
+		if (get_user(buf, &iobuf->buffer))
 			return -EFAULT;
-		mutex_lock(&atm_dev_mutex);
-		list_for_each(p, &atm_devs)
-			size += sizeof(int);
-		if (size > len) {
-			mutex_unlock(&atm_dev_mutex);
-			return -E2BIG;
-		}
-		tmp_buf = kmalloc(size, GFP_ATOMIC);
-		if (!tmp_buf) {
-			mutex_unlock(&atm_dev_mutex);
-			return -ENOMEM;
-		}
-		tmp_p = tmp_buf;
-		list_for_each(p, &atm_devs) {
-			dev = list_entry(p, struct atm_dev, dev_list);
-			*tmp_p++ = dev->number;
-		}
+	}
+	if (get_user(len, iobuf_len))
+		return -EFAULT;
+	mutex_lock(&atm_dev_mutex);
+	list_for_each(p, &atm_devs)
+		size += sizeof(int);
+	if (size > len) {
 		mutex_unlock(&atm_dev_mutex);
-		error = ((copy_to_user(buf, tmp_buf, size)) ||
-			 put_user(size, iobuf_len))
-			? -EFAULT : 0;
-		kfree(tmp_buf);
-		return error;
-	default:
-		break;
+		return -E2BIG;
 	}
+	tmp_buf = kmalloc(size, GFP_ATOMIC);
+	if (!tmp_buf) {
+		mutex_unlock(&atm_dev_mutex);
+		return -ENOMEM;
+	}
+	tmp_p = tmp_buf;
+	list_for_each(p, &atm_devs) {
+		dev = list_entry(p, struct atm_dev, dev_list);
+		*tmp_p++ = dev->number;
+	}
+	mutex_unlock(&atm_dev_mutex);
+	error = ((copy_to_user(buf, tmp_buf, size)) ||
+		 put_user(size, iobuf_len))
+		? -EFAULT : 0;
+	kfree(tmp_buf);
+	return error;
+}
+
+int atm_dev_ioctl(unsigned int cmd, void __user *arg, int compat)
+{
+	void __user *buf;
+	int error, len, number, size = 0;
+	struct atm_dev *dev;
+	int __user *sioc_len;
 
 	if (IS_ENABLED(CONFIG_COMPAT) && compat) {
 #ifdef CONFIG_COMPAT
diff --git a/net/atm/resources.h b/net/atm/resources.h
index 048232e4d4c6..18f8e5948ce4 100644
--- a/net/atm/resources.h
+++ b/net/atm/resources.h
@@ -14,6 +14,7 @@
 extern struct list_head atm_devs;
 extern struct mutex atm_dev_mutex;
 
+int atm_getnames(void __user *arg, int compat);
 int atm_dev_ioctl(unsigned int cmd, void __user *arg, int compat);
 
 
-- 
cgit v1.2.3-59-g8ed1b


From a3929484af75ee524419edbbc4e9ce012c3d67c9 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 10 May 2020 17:34:20 -0400
Subject: atm: move copyin from atm_getnames() into the caller

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 net/atm/ioctl.c     | 19 ++++++++++++++++++-
 net/atm/resources.c | 19 +------------------
 net/atm/resources.h |  2 +-
 3 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c
index 0b4b07740fe4..e239cebf48da 100644
--- a/net/atm/ioctl.c
+++ b/net/atm/ioctl.c
@@ -56,6 +56,8 @@ static int do_vcc_ioctl(struct socket *sock, unsigned int cmd,
 	int error;
 	struct list_head *pos;
 	void __user *argp = (void __user *)arg;
+	void __user *buf;
+	int __user *len;
 
 	vcc = ATM_SD(sock);
 	switch (cmd) {
@@ -163,7 +165,22 @@ static int do_vcc_ioctl(struct socket *sock, unsigned int cmd,
 		goto done;
 
 	if (cmd == ATM_GETNAMES) {
-		error = atm_getnames(argp, compat);
+		if (IS_ENABLED(CONFIG_COMPAT) && compat) {
+#ifdef CONFIG_COMPAT
+			struct compat_atm_iobuf __user *ciobuf = argp;
+			compat_uptr_t cbuf;
+			len = &ciobuf->length;
+			if (get_user(cbuf, &ciobuf->buffer))
+				return -EFAULT;
+			buf = compat_ptr(cbuf);
+#endif
+		} else {
+			struct atm_iobuf __user *iobuf = argp;
+			len = &iobuf->length;
+			if (get_user(buf, &iobuf->buffer))
+				return -EFAULT;
+		}
+		error = atm_getnames(buf, len);
 	} else {
 		error = atm_dev_ioctl(cmd, argp, compat);
 	}
diff --git a/net/atm/resources.c b/net/atm/resources.c
index a2ab75929eec..5507cc608969 100644
--- a/net/atm/resources.c
+++ b/net/atm/resources.c
@@ -193,30 +193,13 @@ static int fetch_stats(struct atm_dev *dev, struct atm_dev_stats __user *arg,
 	return error ? -EFAULT : 0;
 }
 
-int atm_getnames(void __user *arg, int compat)
+int atm_getnames(void __user *buf, int __user *iobuf_len)
 {
-	void __user *buf;
 	int error, len, size = 0;
 	struct atm_dev *dev;
 	struct list_head *p;
 	int *tmp_buf, *tmp_p;
-	int __user *iobuf_len;
 
-	if (IS_ENABLED(CONFIG_COMPAT) && compat) {
-#ifdef CONFIG_COMPAT
-		struct compat_atm_iobuf __user *ciobuf = arg;
-		compat_uptr_t cbuf;
-		iobuf_len = &ciobuf->length;
-		if (get_user(cbuf, &ciobuf->buffer))
-			return -EFAULT;
-		buf = compat_ptr(cbuf);
-#endif
-	} else {
-		struct atm_iobuf __user *iobuf = arg;
-		iobuf_len = &iobuf->length;
-		if (get_user(buf, &iobuf->buffer))
-			return -EFAULT;
-	}
 	if (get_user(len, iobuf_len))
 		return -EFAULT;
 	mutex_lock(&atm_dev_mutex);
diff --git a/net/atm/resources.h b/net/atm/resources.h
index 18f8e5948ce4..5e2c68d37d63 100644
--- a/net/atm/resources.h
+++ b/net/atm/resources.h
@@ -14,7 +14,7 @@
 extern struct list_head atm_devs;
 extern struct mutex atm_dev_mutex;
 
-int atm_getnames(void __user *arg, int compat);
+int atm_getnames(void __user *buf, int __user *iobuf_len);
 int atm_dev_ioctl(unsigned int cmd, void __user *arg, int compat);
 
 
-- 
cgit v1.2.3-59-g8ed1b


From 36085049bc0acb6f2e784f430c2cc66944a2ef07 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 10 May 2020 17:41:51 -0400
Subject: atm: switch do_atm_iobuf() to direct use of atm_getnames()

... and sod the compat_alloc_user_space() with its complications

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 net/atm/ioctl.c | 25 +++----------------------
 1 file changed, 3 insertions(+), 22 deletions(-)

diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c
index e239cebf48da..fdd0e3434523 100644
--- a/net/atm/ioctl.c
+++ b/net/atm/ioctl.c
@@ -251,32 +251,13 @@ static struct {
 static int do_atm_iobuf(struct socket *sock, unsigned int cmd,
 			unsigned long arg)
 {
-	struct atm_iobuf __user *iobuf;
-	struct compat_atm_iobuf __user *iobuf32;
+	struct compat_atm_iobuf __user *iobuf32 = compat_ptr(arg);
 	u32 data;
-	void __user *datap;
-	int len, err;
-
-	iobuf = compat_alloc_user_space(sizeof(*iobuf));
-	iobuf32 = compat_ptr(arg);
 
-	if (get_user(len, &iobuf32->length) ||
-	    get_user(data, &iobuf32->buffer))
-		return -EFAULT;
-	datap = compat_ptr(data);
-	if (put_user(len, &iobuf->length) ||
-	    put_user(datap, &iobuf->buffer))
+	if (get_user(data, &iobuf32->buffer))
 		return -EFAULT;
 
-	err = do_vcc_ioctl(sock, cmd, (unsigned long) iobuf, 0);
-
-	if (!err) {
-		if (copy_in_user(&iobuf32->length, &iobuf->length,
-				 sizeof(int)))
-			err = -EFAULT;
-	}
-
-	return err;
+	return atm_getnames(&iobuf32->length, compat_ptr(data));
 }
 
 static int do_atmif_sioc(struct socket *sock, unsigned int cmd,
-- 
cgit v1.2.3-59-g8ed1b


From 8cacb4165985444c275a6f813f91f08479bdbfad Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 10 May 2020 17:53:35 -0400
Subject: atm: lift copyin from atm_dev_ioctl()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 net/atm/ioctl.c     | 25 ++++++++++++++++++++++++-
 net/atm/resources.c | 35 +++++------------------------------
 net/atm/resources.h |  4 ++--
 3 files changed, 31 insertions(+), 33 deletions(-)

diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c
index fdd0e3434523..52f2c77e656f 100644
--- a/net/atm/ioctl.c
+++ b/net/atm/ioctl.c
@@ -182,7 +182,30 @@ static int do_vcc_ioctl(struct socket *sock, unsigned int cmd,
 		}
 		error = atm_getnames(buf, len);
 	} else {
-		error = atm_dev_ioctl(cmd, argp, compat);
+		int number;
+
+		if (IS_ENABLED(CONFIG_COMPAT) && compat) {
+#ifdef CONFIG_COMPAT
+			struct compat_atmif_sioc __user *csioc = argp;
+			compat_uptr_t carg;
+
+			len = &csioc->length;
+			if (get_user(carg, &csioc->arg))
+				return -EFAULT;
+			buf = compat_ptr(carg);
+			if (get_user(number, &csioc->number))
+				return -EFAULT;
+#endif
+		} else {
+			struct atmif_sioc __user *sioc = argp;
+
+			len = &sioc->length;
+			if (get_user(buf, &sioc->arg))
+				return -EFAULT;
+			if (get_user(number, &sioc->number))
+				return -EFAULT;
+		}
+		error = atm_dev_ioctl(cmd, buf, len, number, compat);
 	}
 
 done:
diff --git a/net/atm/resources.c b/net/atm/resources.c
index 5507cc608969..94bdc6527ee8 100644
--- a/net/atm/resources.c
+++ b/net/atm/resources.c
@@ -227,39 +227,14 @@ int atm_getnames(void __user *buf, int __user *iobuf_len)
 	return error;
 }
 
-int atm_dev_ioctl(unsigned int cmd, void __user *arg, int compat)
+int atm_dev_ioctl(unsigned int cmd, void __user *buf, int __user *sioc_len,
+		  int number, int compat)
 {
-	void __user *buf;
-	int error, len, number, size = 0;
+	int error, len, size = 0;
 	struct atm_dev *dev;
-	int __user *sioc_len;
 
-	if (IS_ENABLED(CONFIG_COMPAT) && compat) {
-#ifdef CONFIG_COMPAT
-		struct compat_atmif_sioc __user *csioc = arg;
-		compat_uptr_t carg;
-
-		sioc_len = &csioc->length;
-		if (get_user(carg, &csioc->arg))
-			return -EFAULT;
-		buf = compat_ptr(carg);
-
-		if (get_user(len, &csioc->length))
-			return -EFAULT;
-		if (get_user(number, &csioc->number))
-			return -EFAULT;
-#endif
-	} else {
-		struct atmif_sioc __user *sioc = arg;
-
-		sioc_len = &sioc->length;
-		if (get_user(buf, &sioc->arg))
-			return -EFAULT;
-		if (get_user(len, &sioc->length))
-			return -EFAULT;
-		if (get_user(number, &sioc->number))
-			return -EFAULT;
-	}
+	if (get_user(len, sioc_len))
+		return -EFAULT;
 
 	dev = try_then_request_module(atm_dev_lookup(number), "atm-device-%d",
 				      number);
diff --git a/net/atm/resources.h b/net/atm/resources.h
index 5e2c68d37d63..4a0839e92ff3 100644
--- a/net/atm/resources.h
+++ b/net/atm/resources.h
@@ -15,8 +15,8 @@ extern struct list_head atm_devs;
 extern struct mutex atm_dev_mutex;
 
 int atm_getnames(void __user *buf, int __user *iobuf_len);
-int atm_dev_ioctl(unsigned int cmd, void __user *arg, int compat);
-
+int atm_dev_ioctl(unsigned int cmd, void __user *buf, int __user *sioc_len,
+		  int number, int compat);
 
 #ifdef CONFIG_PROC_FS
 
-- 
cgit v1.2.3-59-g8ed1b


From 0edecc020b33f8e31d8baa80735b45e8e8434700 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 10 May 2020 18:13:56 -0400
Subject: atm: switch do_atmif_sioc() to direct use of atm_dev_ioctl()

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
---
 net/atm/ioctl.c | 25 ++++---------------------
 1 file changed, 4 insertions(+), 21 deletions(-)

diff --git a/net/atm/ioctl.c b/net/atm/ioctl.c
index 52f2c77e656f..838ebf0cabbf 100644
--- a/net/atm/ioctl.c
+++ b/net/atm/ioctl.c
@@ -286,30 +286,13 @@ static int do_atm_iobuf(struct socket *sock, unsigned int cmd,
 static int do_atmif_sioc(struct socket *sock, unsigned int cmd,
 			 unsigned long arg)
 {
-	struct atmif_sioc __user *sioc;
-	struct compat_atmif_sioc __user *sioc32;
+	struct compat_atmif_sioc __user *sioc32 = compat_ptr(arg);
+	int number;
 	u32 data;
-	void __user *datap;
-	int err;
 
-	sioc = compat_alloc_user_space(sizeof(*sioc));
-	sioc32 = compat_ptr(arg);
-
-	if (copy_in_user(&sioc->number, &sioc32->number, 2 * sizeof(int)) ||
-	    get_user(data, &sioc32->arg))
-		return -EFAULT;
-	datap = compat_ptr(data);
-	if (put_user(datap, &sioc->arg))
+	if (get_user(data, &sioc32->arg) || get_user(number, &sioc32->number))
 		return -EFAULT;
-
-	err = do_vcc_ioctl(sock, cmd, (unsigned long) sioc, 0);
-
-	if (!err) {
-		if (copy_in_user(&sioc32->length, &sioc->length,
-				 sizeof(int)))
-			err = -EFAULT;
-	}
-	return err;
+	return atm_dev_ioctl(cmd, compat_ptr(data), &sioc32->length, number, 0);
 }
 
 static int do_atm_ioctl(struct socket *sock, unsigned int cmd32,
-- 
cgit v1.2.3-59-g8ed1b


From 1c3739cb6e7ac5b5372438bf91b3d0404d18ecf9 Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Fri, 10 Apr 2020 17:28:31 -0700
Subject: igc: Remove IGC_MAC_STATE_SRC_ADDR flag

MAC address filters based on source address are not currently supported
by the IGC driver. Despite of that, the driver have some dangling code
to handle it, inherited from IGB driver. This patch removes that code to
prepare for a follow up patch that adds proper source MAC address filter
support.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc.h         |  6 ++----
 drivers/net/ethernet/intel/igc/igc_ethtool.c | 16 ++--------------
 drivers/net/ethernet/intel/igc/igc_main.c    | 27 ++++++++-------------------
 3 files changed, 12 insertions(+), 37 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index 812e1cd695cf..885998d3f62e 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -231,9 +231,8 @@ bool igc_has_link(struct igc_adapter *adapter);
 void igc_reset(struct igc_adapter *adapter);
 int igc_set_spd_dplx(struct igc_adapter *adapter, u32 spd, u8 dplx);
 int igc_add_mac_filter(struct igc_adapter *adapter, const u8 *addr,
-		       const s8 queue, const u8 flags);
-int igc_del_mac_filter(struct igc_adapter *adapter, const u8 *addr,
-		       const u8 flags);
+		       const s8 queue);
+int igc_del_mac_filter(struct igc_adapter *adapter, const u8 *addr);
 int igc_add_vlan_prio_filter(struct igc_adapter *adapter, int prio,
 			     int queue);
 void igc_del_vlan_prio_filter(struct igc_adapter *adapter, int prio);
@@ -479,7 +478,6 @@ struct igc_mac_addr {
 
 #define IGC_MAC_STATE_DEFAULT		0x1
 #define IGC_MAC_STATE_IN_USE		0x2
-#define IGC_MAC_STATE_SRC_ADDR		0x4
 
 #define IGC_MAX_RXNFC_FILTERS		16
 
diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index c6586e2be3a8..09d0305a5902 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -1208,15 +1208,7 @@ int igc_add_filter(struct igc_adapter *adapter, struct igc_nfc_filter *input)
 
 	if (input->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) {
 		err = igc_add_mac_filter(adapter, input->filter.dst_addr,
-					 input->action, 0);
-		if (err)
-			return err;
-	}
-
-	if (input->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) {
-		err = igc_add_mac_filter(adapter, input->filter.src_addr,
-					 input->action,
-					 IGC_MAC_STATE_SRC_ADDR);
+					 input->action);
 		if (err)
 			return err;
 	}
@@ -1246,12 +1238,8 @@ int igc_erase_filter(struct igc_adapter *adapter, struct igc_nfc_filter *input)
 		igc_del_vlan_prio_filter(adapter, prio);
 	}
 
-	if (input->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR)
-		igc_del_mac_filter(adapter, input->filter.src_addr,
-				   IGC_MAC_STATE_SRC_ADDR);
-
 	if (input->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR)
-		igc_del_mac_filter(adapter, input->filter.dst_addr, 0);
+		igc_del_mac_filter(adapter, input->filter.dst_addr);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 0df5617eb9d0..3242136bb47a 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -2184,8 +2184,7 @@ static void igc_nfc_filter_restore(struct igc_adapter *adapter)
 	spin_unlock(&adapter->nfc_lock);
 }
 
-static int igc_find_mac_filter(struct igc_adapter *adapter, const u8 *addr,
-			       u8 flags)
+static int igc_find_mac_filter(struct igc_adapter *adapter, const u8 *addr)
 {
 	int max_entries = adapter->hw.mac.rar_entry_count;
 	struct igc_mac_addr *entry;
@@ -2198,9 +2197,6 @@ static int igc_find_mac_filter(struct igc_adapter *adapter, const u8 *addr,
 			continue;
 		if (!ether_addr_equal(addr, entry->addr))
 			continue;
-		if ((entry->state & IGC_MAC_STATE_SRC_ADDR) !=
-		    (flags & IGC_MAC_STATE_SRC_ADDR))
-			continue;
 
 		return i;
 	}
@@ -2231,23 +2227,19 @@ static int igc_get_avail_mac_filter_slot(struct igc_adapter *adapter)
  * @queue: If non-negative, queue assignment feature is enabled and frames
  *         matching the filter are enqueued onto 'queue'. Otherwise, queue
  *         assignment is disabled.
- * @flags: Set IGC_MAC_STATE_SRC_ADDR bit to indicate @address is a source
- *         address
  *
  * Return: 0 in case of success, negative errno code otherwise.
  */
 int igc_add_mac_filter(struct igc_adapter *adapter, const u8 *addr,
-		       const s8 queue, const u8 flags)
+		       const s8 queue)
 {
 	struct net_device *dev = adapter->netdev;
 	int index;
 
 	if (!is_valid_ether_addr(addr))
 		return -EINVAL;
-	if (flags & IGC_MAC_STATE_SRC_ADDR)
-		return -ENOTSUPP;
 
-	index = igc_find_mac_filter(adapter, addr, flags);
+	index = igc_find_mac_filter(adapter, addr);
 	if (index >= 0)
 		goto update_queue_assignment;
 
@@ -2259,7 +2251,7 @@ int igc_add_mac_filter(struct igc_adapter *adapter, const u8 *addr,
 		   index, addr, queue);
 
 	ether_addr_copy(adapter->mac_table[index].addr, addr);
-	adapter->mac_table[index].state |= IGC_MAC_STATE_IN_USE | flags;
+	adapter->mac_table[index].state |= IGC_MAC_STATE_IN_USE;
 update_queue_assignment:
 	adapter->mac_table[index].queue = queue;
 
@@ -2271,13 +2263,10 @@ update_queue_assignment:
  * igc_del_mac_filter() - Delete MAC address filter
  * @adapter: Pointer to adapter where the filter should be deleted from
  * @addr: MAC address
- * @flags: Set IGC_MAC_STATE_SRC_ADDR bit to indicate @address is a source
- *         address
  *
  * Return: 0 in case of success, negative errno code otherwise.
  */
-int igc_del_mac_filter(struct igc_adapter *adapter, const u8 *addr,
-		       const u8 flags)
+int igc_del_mac_filter(struct igc_adapter *adapter, const u8 *addr)
 {
 	struct net_device *dev = adapter->netdev;
 	struct igc_mac_addr *entry;
@@ -2286,7 +2275,7 @@ int igc_del_mac_filter(struct igc_adapter *adapter, const u8 *addr,
 	if (!is_valid_ether_addr(addr))
 		return -EINVAL;
 
-	index = igc_find_mac_filter(adapter, addr, flags);
+	index = igc_find_mac_filter(adapter, addr);
 	if (index < 0)
 		return -ENOENT;
 
@@ -2463,14 +2452,14 @@ static int igc_uc_sync(struct net_device *netdev, const unsigned char *addr)
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
 
-	return igc_add_mac_filter(adapter, addr, -1, 0);
+	return igc_add_mac_filter(adapter, addr, -1);
 }
 
 static int igc_uc_unsync(struct net_device *netdev, const unsigned char *addr)
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
 
-	return igc_del_mac_filter(adapter, addr, 0);
+	return igc_del_mac_filter(adapter, addr);
 }
 
 /**
-- 
cgit v1.2.3-59-g8ed1b


From d66358cae25efbd35518d661991fce67af2945e7 Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Fri, 10 Apr 2020 17:28:32 -0700
Subject: igc: Remove mac_table from igc_adapter

In igc_adapter we keep a sort of shadow copy of RAL and RAH registers.
There is not much benefit in keeping it, at the cost of maintainability,
since adding/removing MAC address filters is not hot path, and we
already keep filters information in adapter->nfc_filter_list for cleanup
and restoration purposes.

So in order to simplify the MAC address filtering code and prepare it
for source address support, this patch removes the mac_table from
igc_adapter.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc.h         | 11 ------
 drivers/net/ethernet/intel/igc/igc_defines.h |  1 +
 drivers/net/ethernet/intel/igc/igc_main.c    | 57 ++++++++++------------------
 3 files changed, 21 insertions(+), 48 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index 885998d3f62e..5ce859155396 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -190,8 +190,6 @@ struct igc_adapter {
 	/* lock for RX network flow classification filter */
 	spinlock_t nfc_lock;
 
-	struct igc_mac_addr *mac_table;
-
 	u8 rss_indir_tbl[IGC_RETA_SIZE];
 
 	unsigned long link_check_timeout;
@@ -470,15 +468,6 @@ struct igc_nfc_filter {
 	u16 action;
 };
 
-struct igc_mac_addr {
-	u8 addr[ETH_ALEN];
-	s8 queue;
-	u8 state; /* bitmask */
-};
-
-#define IGC_MAC_STATE_DEFAULT		0x1
-#define IGC_MAC_STATE_IN_USE		0x2
-
 #define IGC_MAX_RXNFC_FILTERS		16
 
 /* igc_desc_unused - calculate if we have unused descriptors */
diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
index 51d8a15e239c..be152a93088a 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -62,6 +62,7 @@
  * (RAR[15]) for our directed address used by controllers with
  * manageability enabled, allowing us room for 15 multicast addresses.
  */
+#define IGC_RAH_RAH_MASK	0x0000FFFF
 #define IGC_RAH_QSEL_MASK	0x000C0000
 #define IGC_RAH_QSEL_SHIFT	18
 #define IGC_RAH_QSEL_ENABLE	BIT(28)
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 3242136bb47a..af3092813a06 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -820,17 +820,12 @@ static void igc_clear_mac_filter_hw(struct igc_adapter *adapter, int index)
 /* Set default MAC address for the PF in the first RAR entry */
 static void igc_set_default_mac_filter(struct igc_adapter *adapter)
 {
-	struct igc_mac_addr *mac_table = &adapter->mac_table[0];
 	struct net_device *dev = adapter->netdev;
 	u8 *addr = adapter->hw.mac.addr;
 
 	netdev_dbg(dev, "Set default MAC address filter: address %pM", addr);
 
-	ether_addr_copy(mac_table->addr, addr);
-	mac_table->state = IGC_MAC_STATE_DEFAULT | IGC_MAC_STATE_IN_USE;
-	mac_table->queue = -1;
-
-	igc_set_mac_filter_hw(adapter, 0, addr, mac_table->queue);
+	igc_set_mac_filter_hw(adapter, 0, addr, -1);
 }
 
 /**
@@ -2186,16 +2181,21 @@ static void igc_nfc_filter_restore(struct igc_adapter *adapter)
 
 static int igc_find_mac_filter(struct igc_adapter *adapter, const u8 *addr)
 {
-	int max_entries = adapter->hw.mac.rar_entry_count;
-	struct igc_mac_addr *entry;
+	struct igc_hw *hw = &adapter->hw;
+	int max_entries = hw->mac.rar_entry_count;
+	u32 ral, rah;
 	int i;
 
 	for (i = 0; i < max_entries; i++) {
-		entry = &adapter->mac_table[i];
+		ral = rd32(IGC_RAL(i));
+		rah = rd32(IGC_RAH(i));
 
-		if (!(entry->state & IGC_MAC_STATE_IN_USE))
+		if (!(rah & IGC_RAH_AV))
 			continue;
-		if (!ether_addr_equal(addr, entry->addr))
+		if ((rah & IGC_RAH_RAH_MASK) !=
+		    le16_to_cpup((__le16 *)(addr + 4)))
+			continue;
+		if (ral != le32_to_cpup((__le32 *)(addr)))
 			continue;
 
 		return i;
@@ -2206,14 +2206,15 @@ static int igc_find_mac_filter(struct igc_adapter *adapter, const u8 *addr)
 
 static int igc_get_avail_mac_filter_slot(struct igc_adapter *adapter)
 {
-	int max_entries = adapter->hw.mac.rar_entry_count;
-	struct igc_mac_addr *entry;
+	struct igc_hw *hw = &adapter->hw;
+	int max_entries = hw->mac.rar_entry_count;
+	u32 rah;
 	int i;
 
 	for (i = 0; i < max_entries; i++) {
-		entry = &adapter->mac_table[i];
+		rah = rd32(IGC_RAH(i));
 
-		if (!(entry->state & IGC_MAC_STATE_IN_USE))
+		if (!(rah & IGC_RAH_AV))
 			return i;
 	}
 
@@ -2241,7 +2242,7 @@ int igc_add_mac_filter(struct igc_adapter *adapter, const u8 *addr,
 
 	index = igc_find_mac_filter(adapter, addr);
 	if (index >= 0)
-		goto update_queue_assignment;
+		goto update_filter;
 
 	index = igc_get_avail_mac_filter_slot(adapter);
 	if (index < 0)
@@ -2250,11 +2251,7 @@ int igc_add_mac_filter(struct igc_adapter *adapter, const u8 *addr,
 	netdev_dbg(dev, "Add MAC address filter: index %d address %pM queue %d",
 		   index, addr, queue);
 
-	ether_addr_copy(adapter->mac_table[index].addr, addr);
-	adapter->mac_table[index].state |= IGC_MAC_STATE_IN_USE;
-update_queue_assignment:
-	adapter->mac_table[index].queue = queue;
-
+update_filter:
 	igc_set_mac_filter_hw(adapter, index, addr, queue);
 	return 0;
 }
@@ -2269,7 +2266,6 @@ update_queue_assignment:
 int igc_del_mac_filter(struct igc_adapter *adapter, const u8 *addr)
 {
 	struct net_device *dev = adapter->netdev;
-	struct igc_mac_addr *entry;
 	int index;
 
 	if (!is_valid_ether_addr(addr))
@@ -2279,24 +2275,18 @@ int igc_del_mac_filter(struct igc_adapter *adapter, const u8 *addr)
 	if (index < 0)
 		return -ENOENT;
 
-	entry = &adapter->mac_table[index];
-
-	if (entry->state & IGC_MAC_STATE_DEFAULT) {
+	if (index == 0) {
 		/* If this is the default filter, we don't actually delete it.
 		 * We just reset to its default value i.e. disable queue
 		 * assignment.
 		 */
 		netdev_dbg(dev, "Disable default MAC filter queue assignment");
 
-		entry->queue = -1;
-		igc_set_mac_filter_hw(adapter, 0, addr, entry->queue);
+		igc_set_mac_filter_hw(adapter, 0, addr, -1);
 	} else {
 		netdev_dbg(dev, "Delete MAC address filter: index %d address %pM",
 			   index, addr);
 
-		entry->state = 0;
-		entry->queue = -1;
-		memset(entry->addr, 0, ETH_ALEN);
 		igc_clear_mac_filter_hw(adapter, index);
 	}
 
@@ -3404,8 +3394,6 @@ static int igc_sw_init(struct igc_adapter *adapter)
 	struct pci_dev *pdev = adapter->pdev;
 	struct igc_hw *hw = &adapter->hw;
 
-	int size = sizeof(struct igc_mac_addr) * hw->mac.rar_entry_count;
-
 	pci_read_config_word(pdev, PCI_COMMAND, &hw->bus.pci_cmd_word);
 
 	/* set default ring sizes */
@@ -3429,10 +3417,6 @@ static int igc_sw_init(struct igc_adapter *adapter)
 	/* Assume MSI-X interrupts, will be checked during IRQ allocation */
 	adapter->flags |= IGC_FLAG_HAS_MSIX;
 
-	adapter->mac_table = kzalloc(size, GFP_ATOMIC);
-	if (!adapter->mac_table)
-		return -ENOMEM;
-
 	igc_init_queue_configuration(adapter);
 
 	/* This call may decrease the number of queues */
@@ -5135,7 +5119,6 @@ static void igc_remove(struct pci_dev *pdev)
 	pci_iounmap(pdev, adapter->io_addr);
 	pci_release_mem_regions(pdev);
 
-	kfree(adapter->mac_table);
 	free_netdev(netdev);
 
 	pci_disable_pcie_error_reporting(pdev);
-- 
cgit v1.2.3-59-g8ed1b


From 750433d0aa097265432a2b30a8a984604bd76e7f Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Fri, 10 Apr 2020 17:28:33 -0700
Subject: igc: Add support for source address filters in core

This patch extends MAC address filter internal APIs igc_add_mac_filter()
and igc_del_mac_filter(), as well as local helpers, to support filters
based on source address.

A new parameters 'type' is added to the APIs to indicate if the filter
type is source or destination. In case it is source type, the RAH
register is configured accordingly in igc_set_mac_filter_hw().

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc.h         | 13 ++++++--
 drivers/net/ethernet/intel/igc/igc_defines.h |  2 ++
 drivers/net/ethernet/intel/igc/igc_ethtool.c |  6 ++--
 drivers/net/ethernet/intel/igc/igc_main.c    | 49 +++++++++++++++++++---------
 4 files changed, 49 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index 5ce859155396..b501be243536 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -29,6 +29,11 @@ void igc_set_ethtool_ops(struct net_device *);
 #define MAX_ETYPE_FILTER		8
 #define IGC_RETA_SIZE			128
 
+enum igc_mac_filter_type {
+	IGC_MAC_FILTER_TYPE_DST = 0,
+	IGC_MAC_FILTER_TYPE_SRC
+};
+
 struct igc_tx_queue_stats {
 	u64 packets;
 	u64 bytes;
@@ -228,9 +233,11 @@ void igc_write_rss_indir_tbl(struct igc_adapter *adapter);
 bool igc_has_link(struct igc_adapter *adapter);
 void igc_reset(struct igc_adapter *adapter);
 int igc_set_spd_dplx(struct igc_adapter *adapter, u32 spd, u8 dplx);
-int igc_add_mac_filter(struct igc_adapter *adapter, const u8 *addr,
-		       const s8 queue);
-int igc_del_mac_filter(struct igc_adapter *adapter, const u8 *addr);
+int igc_add_mac_filter(struct igc_adapter *adapter,
+		       enum igc_mac_filter_type type, const u8 *addr,
+		       int queue);
+int igc_del_mac_filter(struct igc_adapter *adapter,
+		       enum igc_mac_filter_type type, const u8 *addr);
 int igc_add_vlan_prio_filter(struct igc_adapter *adapter, int prio,
 			     int queue);
 void igc_del_vlan_prio_filter(struct igc_adapter *adapter, int prio);
diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
index be152a93088a..45b567587ca9 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -63,6 +63,8 @@
  * manageability enabled, allowing us room for 15 multicast addresses.
  */
 #define IGC_RAH_RAH_MASK	0x0000FFFF
+#define IGC_RAH_ASEL_MASK	0x00030000
+#define IGC_RAH_ASEL_SRC_ADDR	BIT(16)
 #define IGC_RAH_QSEL_MASK	0x000C0000
 #define IGC_RAH_QSEL_SHIFT	18
 #define IGC_RAH_QSEL_ENABLE	BIT(28)
diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index 09d0305a5902..6c27046a852d 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -1207,7 +1207,8 @@ int igc_add_filter(struct igc_adapter *adapter, struct igc_nfc_filter *input)
 	}
 
 	if (input->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) {
-		err = igc_add_mac_filter(adapter, input->filter.dst_addr,
+		err = igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST,
+					 input->filter.dst_addr,
 					 input->action);
 		if (err)
 			return err;
@@ -1239,7 +1240,8 @@ int igc_erase_filter(struct igc_adapter *adapter, struct igc_nfc_filter *input)
 	}
 
 	if (input->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR)
-		igc_del_mac_filter(adapter, input->filter.dst_addr);
+		igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST,
+				   input->filter.dst_addr);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index af3092813a06..398a9307af2b 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -766,12 +766,14 @@ static void igc_setup_tctl(struct igc_adapter *adapter)
  * igc_set_mac_filter_hw() - Set MAC address filter in hardware
  * @adapter: Pointer to adapter where the filter should be set
  * @index: Filter index
- * @addr: Destination MAC address
+ * @type: MAC address filter type (source or destination)
+ * @addr: MAC address
  * @queue: If non-negative, queue assignment feature is enabled and frames
  *         matching the filter are enqueued onto 'queue'. Otherwise, queue
  *         assignment is disabled.
  */
 static void igc_set_mac_filter_hw(struct igc_adapter *adapter, int index,
+				  enum igc_mac_filter_type type,
 				  const u8 *addr, int queue)
 {
 	struct net_device *dev = adapter->netdev;
@@ -784,6 +786,11 @@ static void igc_set_mac_filter_hw(struct igc_adapter *adapter, int index,
 	ral = le32_to_cpup((__le32 *)(addr));
 	rah = le16_to_cpup((__le16 *)(addr + 4));
 
+	if (type == IGC_MAC_FILTER_TYPE_SRC) {
+		rah &= ~IGC_RAH_ASEL_MASK;
+		rah |= IGC_RAH_ASEL_SRC_ADDR;
+	}
+
 	if (queue >= 0) {
 		rah &= ~IGC_RAH_QSEL_MASK;
 		rah |= (queue << IGC_RAH_QSEL_SHIFT);
@@ -825,7 +832,7 @@ static void igc_set_default_mac_filter(struct igc_adapter *adapter)
 
 	netdev_dbg(dev, "Set default MAC address filter: address %pM", addr);
 
-	igc_set_mac_filter_hw(adapter, 0, addr, -1);
+	igc_set_mac_filter_hw(adapter, 0, IGC_MAC_FILTER_TYPE_DST, addr, -1);
 }
 
 /**
@@ -2179,7 +2186,8 @@ static void igc_nfc_filter_restore(struct igc_adapter *adapter)
 	spin_unlock(&adapter->nfc_lock);
 }
 
-static int igc_find_mac_filter(struct igc_adapter *adapter, const u8 *addr)
+static int igc_find_mac_filter(struct igc_adapter *adapter,
+			       enum igc_mac_filter_type type, const u8 *addr)
 {
 	struct igc_hw *hw = &adapter->hw;
 	int max_entries = hw->mac.rar_entry_count;
@@ -2192,6 +2200,8 @@ static int igc_find_mac_filter(struct igc_adapter *adapter, const u8 *addr)
 
 		if (!(rah & IGC_RAH_AV))
 			continue;
+		if (!!(rah & IGC_RAH_ASEL_SRC_ADDR) != type)
+			continue;
 		if ((rah & IGC_RAH_RAH_MASK) !=
 		    le16_to_cpup((__le16 *)(addr + 4)))
 			continue;
@@ -2224,6 +2234,7 @@ static int igc_get_avail_mac_filter_slot(struct igc_adapter *adapter)
 /**
  * igc_add_mac_filter() - Add MAC address filter
  * @adapter: Pointer to adapter where the filter should be added
+ * @type: MAC address filter type (source or destination)
  * @addr: MAC address
  * @queue: If non-negative, queue assignment feature is enabled and frames
  *         matching the filter are enqueued onto 'queue'. Otherwise, queue
@@ -2231,8 +2242,9 @@ static int igc_get_avail_mac_filter_slot(struct igc_adapter *adapter)
  *
  * Return: 0 in case of success, negative errno code otherwise.
  */
-int igc_add_mac_filter(struct igc_adapter *adapter, const u8 *addr,
-		       const s8 queue)
+int igc_add_mac_filter(struct igc_adapter *adapter,
+		       enum igc_mac_filter_type type, const u8 *addr,
+		       int queue)
 {
 	struct net_device *dev = adapter->netdev;
 	int index;
@@ -2240,7 +2252,7 @@ int igc_add_mac_filter(struct igc_adapter *adapter, const u8 *addr,
 	if (!is_valid_ether_addr(addr))
 		return -EINVAL;
 
-	index = igc_find_mac_filter(adapter, addr);
+	index = igc_find_mac_filter(adapter, type, addr);
 	if (index >= 0)
 		goto update_filter;
 
@@ -2248,22 +2260,25 @@ int igc_add_mac_filter(struct igc_adapter *adapter, const u8 *addr,
 	if (index < 0)
 		return -ENOSPC;
 
-	netdev_dbg(dev, "Add MAC address filter: index %d address %pM queue %d",
-		   index, addr, queue);
+	netdev_dbg(dev, "Add MAC address filter: index %d type %s address %pM queue %d\n",
+		   index, type == IGC_MAC_FILTER_TYPE_DST ? "dst" : "src",
+		   addr, queue);
 
 update_filter:
-	igc_set_mac_filter_hw(adapter, index, addr, queue);
+	igc_set_mac_filter_hw(adapter, index, type, addr, queue);
 	return 0;
 }
 
 /**
  * igc_del_mac_filter() - Delete MAC address filter
  * @adapter: Pointer to adapter where the filter should be deleted from
+ * @type: MAC address filter type (source or destination)
  * @addr: MAC address
  *
  * Return: 0 in case of success, negative errno code otherwise.
  */
-int igc_del_mac_filter(struct igc_adapter *adapter, const u8 *addr)
+int igc_del_mac_filter(struct igc_adapter *adapter,
+		       enum igc_mac_filter_type type, const u8 *addr)
 {
 	struct net_device *dev = adapter->netdev;
 	int index;
@@ -2271,7 +2286,7 @@ int igc_del_mac_filter(struct igc_adapter *adapter, const u8 *addr)
 	if (!is_valid_ether_addr(addr))
 		return -EINVAL;
 
-	index = igc_find_mac_filter(adapter, addr);
+	index = igc_find_mac_filter(adapter, type, addr);
 	if (index < 0)
 		return -ENOENT;
 
@@ -2282,10 +2297,12 @@ int igc_del_mac_filter(struct igc_adapter *adapter, const u8 *addr)
 		 */
 		netdev_dbg(dev, "Disable default MAC filter queue assignment");
 
-		igc_set_mac_filter_hw(adapter, 0, addr, -1);
+		igc_set_mac_filter_hw(adapter, 0, type, addr, -1);
 	} else {
-		netdev_dbg(dev, "Delete MAC address filter: index %d address %pM",
-			   index, addr);
+		netdev_dbg(dev, "Delete MAC address filter: index %d type %s address %pM\n",
+			   index,
+			   type == IGC_MAC_FILTER_TYPE_DST ? "dst" : "src",
+			   addr);
 
 		igc_clear_mac_filter_hw(adapter, index);
 	}
@@ -2442,14 +2459,14 @@ static int igc_uc_sync(struct net_device *netdev, const unsigned char *addr)
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
 
-	return igc_add_mac_filter(adapter, addr, -1);
+	return igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, addr, -1);
 }
 
 static int igc_uc_unsync(struct net_device *netdev, const unsigned char *addr)
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
 
-	return igc_del_mac_filter(adapter, addr);
+	return igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, addr);
 }
 
 /**
-- 
cgit v1.2.3-59-g8ed1b


From 8eb2449d839079f979eded636dabcd6b815ff28f Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Mon, 13 Apr 2020 09:00:50 -0700
Subject: igc: Enable NFC rules based source MAC address

This patch adds support for Network Flow Classification (NFC) rules
based on source MAC address. Note that the controller doesn't support
rules with both source and destination addresses set, so this special
case is checked in igc_add_ethtool_nfc_entry().

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_ethtool.c | 32 +++++++++++++++++++---------
 1 file changed, 22 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index 6c27046a852d..42ecb493c1a2 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -1188,16 +1188,8 @@ static int igc_set_rss_hash_opt(struct igc_adapter *adapter,
 
 int igc_add_filter(struct igc_adapter *adapter, struct igc_nfc_filter *input)
 {
-	struct igc_hw *hw = &adapter->hw;
 	int err = -EINVAL;
 
-	if (hw->mac.type == igc_i225 &&
-	    !(input->filter.match_flags & ~IGC_FILTER_FLAG_SRC_MAC_ADDR)) {
-		netdev_err(adapter->netdev,
-			   "i225 doesn't support flow classification rules specifying only source addresses\n");
-		return -EOPNOTSUPP;
-	}
-
 	if (input->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) {
 		u16 etype = ntohs(input->filter.etype);
 
@@ -1206,6 +1198,14 @@ int igc_add_filter(struct igc_adapter *adapter, struct igc_nfc_filter *input)
 			return err;
 	}
 
+	if (input->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) {
+		err = igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_SRC,
+					 input->filter.src_addr,
+					 input->action);
+		if (err)
+			return err;
+	}
+
 	if (input->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) {
 		err = igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST,
 					 input->filter.dst_addr,
@@ -1239,6 +1239,10 @@ int igc_erase_filter(struct igc_adapter *adapter, struct igc_nfc_filter *input)
 		igc_del_vlan_prio_filter(adapter, prio);
 	}
 
+	if (input->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR)
+		igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_SRC,
+				   input->filter.src_addr);
+
 	if (input->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR)
 		igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST,
 				   input->filter.dst_addr);
@@ -1334,20 +1338,28 @@ static int igc_add_ethtool_nfc_entry(struct igc_adapter *adapter,
 		input->filter.match_flags = IGC_FILTER_FLAG_ETHER_TYPE;
 	}
 
-	/* Only support matching addresses by the full mask */
+	/* Both source and destination address filters only support the full
+	 * mask.
+	 */
 	if (is_broadcast_ether_addr(fsp->m_u.ether_spec.h_source)) {
 		input->filter.match_flags |= IGC_FILTER_FLAG_SRC_MAC_ADDR;
 		ether_addr_copy(input->filter.src_addr,
 				fsp->h_u.ether_spec.h_source);
 	}
 
-	/* Only support matching addresses by the full mask */
 	if (is_broadcast_ether_addr(fsp->m_u.ether_spec.h_dest)) {
 		input->filter.match_flags |= IGC_FILTER_FLAG_DST_MAC_ADDR;
 		ether_addr_copy(input->filter.dst_addr,
 				fsp->h_u.ether_spec.h_dest);
 	}
 
+	if (input->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR &&
+	    input->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) {
+		netdev_dbg(netdev, "Filters with both dst and src are not supported\n");
+		err = -EOPNOTSUPP;
+		goto err_out;
+	}
+
 	if ((fsp->flow_type & FLOW_EXT) && fsp->m_ext.vlan_tci) {
 		if (fsp->m_ext.vlan_tci != htons(VLAN_PRIO_MASK)) {
 			netdev_dbg(netdev, "VLAN mask not supported\n");
-- 
cgit v1.2.3-59-g8ed1b


From 49ee3c2ab5234757bfb56a0b3a3cb422f427e3a3 Mon Sep 17 00:00:00 2001
From: Alexander Duyck <alexander.h.duyck@linux.intel.com>
Date: Fri, 17 Apr 2020 09:35:31 -0700
Subject: e1000: Do not perform reset in reset_task if we are already down

We are seeing a deadlock in e1000 down when NAPI is being disabled. Looking
over the kernel function trace of the system it appears that the interface
is being closed and then a reset is hitting which deadlocks the interface
as the NAPI interface is already disabled.

To prevent this from happening I am disabling the reset task when
__E1000_DOWN is already set. In addition code has been added so that we set
the __E1000_DOWN while holding the __E1000_RESET flag in e1000_close in
order to guarantee that the reset task will not run after we have started
the close call.

Signed-off-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>
Tested-by: Maxim Zhukov <mussitantesmortem@gmail.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/e1000/e1000_main.c | 18 ++++++++++++++----
 1 file changed, 14 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000/e1000_main.c b/drivers/net/ethernet/intel/e1000/e1000_main.c
index 05bc6e216bca..d9fa4600f745 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_main.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_main.c
@@ -542,8 +542,13 @@ void e1000_reinit_locked(struct e1000_adapter *adapter)
 	WARN_ON(in_interrupt());
 	while (test_and_set_bit(__E1000_RESETTING, &adapter->flags))
 		msleep(1);
-	e1000_down(adapter);
-	e1000_up(adapter);
+
+	/* only run the task if not already down */
+	if (!test_bit(__E1000_DOWN, &adapter->flags)) {
+		e1000_down(adapter);
+		e1000_up(adapter);
+	}
+
 	clear_bit(__E1000_RESETTING, &adapter->flags);
 }
 
@@ -1433,10 +1438,15 @@ int e1000_close(struct net_device *netdev)
 	struct e1000_hw *hw = &adapter->hw;
 	int count = E1000_CHECK_RESET_COUNT;
 
-	while (test_bit(__E1000_RESETTING, &adapter->flags) && count--)
+	while (test_and_set_bit(__E1000_RESETTING, &adapter->flags) && count--)
 		usleep_range(10000, 20000);
 
-	WARN_ON(test_bit(__E1000_RESETTING, &adapter->flags));
+	WARN_ON(count < 0);
+
+	/* signal that we're down so that the reset task will no longer run */
+	set_bit(__E1000_DOWN, &adapter->flags);
+	clear_bit(__E1000_RESETTING, &adapter->flags);
+
 	e1000_down(adapter);
 	e1000_power_down_phy(adapter);
 	e1000_free_irq(adapter);
-- 
cgit v1.2.3-59-g8ed1b


From 3b5fc88f78ea5bc6d2da06c6acff8fe6ba0e7e40 Mon Sep 17 00:00:00 2001
From: Sasha Neftin <sasha.neftin@intel.com>
Date: Mon, 20 Apr 2020 14:38:53 +0300
Subject: igc: Remove obsolete circuit breaker registers

Part of circuit breaker registers is obsolete
and not applicable for i225 device.
This patch comes to clean up these registers.

Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_mac.c  | 4 ----
 drivers/net/ethernet/intel/igc/igc_regs.h | 7 -------
 2 files changed, 11 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_mac.c b/drivers/net/ethernet/intel/igc/igc_mac.c
index 12aa6b5fcb5d..89445ab02a98 100644
--- a/drivers/net/ethernet/intel/igc/igc_mac.c
+++ b/drivers/net/ethernet/intel/igc/igc_mac.c
@@ -307,12 +307,8 @@ void igc_clear_hw_cntrs_base(struct igc_hw *hw)
 	rd32(IGC_ICTXQMTC);
 	rd32(IGC_ICRXDMTC);
 
-	rd32(IGC_CBTMPC);
-	rd32(IGC_HTDPMC);
-	rd32(IGC_CBRMPC);
 	rd32(IGC_RPTHC);
 	rd32(IGC_HGPTC);
-	rd32(IGC_HTCBDPC);
 	rd32(IGC_HGORCL);
 	rd32(IGC_HGORCH);
 	rd32(IGC_HGOTCL);
diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h
index 61db951f0947..f2654f379d88 100644
--- a/drivers/net/ethernet/intel/igc/igc_regs.h
+++ b/drivers/net/ethernet/intel/igc/igc_regs.h
@@ -68,13 +68,6 @@
 #define IGC_ICRXDMTC		0x04120  /* Rx Descriptor Min Threshold Count */
 #define IGC_ICRXOC		0x04124  /* Receiver Overrun Count */
 
-#define IGC_CBTMPC		0x0402C  /* Circuit Breaker TX Packet Count */
-#define IGC_HTDPMC		0x0403C  /* Host Transmit Discarded Packets */
-#define IGC_CBRMPC		0x040FC  /* Circuit Breaker RX Packet Count */
-#define IGC_RPTHC		0x04104  /* Rx Packets To Host */
-#define IGC_HGPTC		0x04118  /* Host Good Packets TX Count */
-#define IGC_HTCBDPC		0x04124  /* Host TX Circ.Breaker Drop Count */
-
 /* MSI-X Table Register Descriptions */
 #define IGC_PBACL		0x05B68  /* MSIx PBA Clear - R/W 1 to clear */
 
-- 
cgit v1.2.3-59-g8ed1b


From d1fe569f517e8ee402f5faac28e65c3a72790d0e Mon Sep 17 00:00:00 2001
From: Sasha Neftin <sasha.neftin@intel.com>
Date: Wed, 22 Apr 2020 11:41:55 +0300
Subject: igc: Remove header redirection register

Header redirection missed packet counter not applicable for i225 device.
This patch comes to clean up this register.

Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_regs.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h
index f2654f379d88..79bd104363ed 100644
--- a/drivers/net/ethernet/intel/igc/igc_regs.h
+++ b/drivers/net/ethernet/intel/igc/igc_regs.h
@@ -199,7 +199,6 @@
 #define IGC_HGOTCL	0x04130  /* Host Good Octets Transmit Count Low */
 #define IGC_HGOTCH	0x04134  /* Host Good Octets Transmit Count High */
 #define IGC_LENERRS	0x04138  /* Length Errors Count */
-#define IGC_HRMPC	0x0A018  /* Header Redirection Missed Packet Count */
 
 /* Time sync registers */
 #define IGC_TSICR	0x0B66C  /* Time Sync Interrupt Cause */
-- 
cgit v1.2.3-59-g8ed1b


From 281380a6fd8a202635c484c5ac978c468c006cbd Mon Sep 17 00:00:00 2001
From: Sasha Neftin <sasha.neftin@intel.com>
Date: Wed, 22 Apr 2020 13:46:47 +0300
Subject: igc: Remove per queue good transmited counter register

Per queue good transmitted packet counter not applicable for i225 device.
This patch comes to clean up this register.

Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_regs.h | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h
index 79bd104363ed..7f999cfc9b39 100644
--- a/drivers/net/ethernet/intel/igc/igc_regs.h
+++ b/drivers/net/ethernet/intel/igc/igc_regs.h
@@ -124,9 +124,6 @@
 #define IGC_MMDAC		13 /* MMD Access Control */
 #define IGC_MMDAAD		14 /* MMD Access Address/Data */
 
-/* Good transmitted packets counter registers */
-#define IGC_PQGPTC(_n)		(0x010014 + (0x100 * (_n)))
-
 /* Statistics Register Descriptions */
 #define IGC_CRCERRS	0x04000  /* CRC Error Count - R/clr */
 #define IGC_ALGNERRC	0x04004  /* Alignment Error Count - R/clr */
-- 
cgit v1.2.3-59-g8ed1b


From 8e34cad1677a45211d5112ea2bab770656139d20 Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Fri, 24 Apr 2020 13:16:05 -0700
Subject: igc: Remove unused field from igc_nfc_filter

The 'cookie' field is not used anywhere in the code so this patch
removes it from struct igc_nfc_filter.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index b501be243536..7c92fc7703be 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -470,7 +470,6 @@ struct igc_nfc_input {
 struct igc_nfc_filter {
 	struct hlist_node nfc_node;
 	struct igc_nfc_input filter;
-	unsigned long cookie;
 	u16 sw_idx;
 	u16 action;
 };
-- 
cgit v1.2.3-59-g8ed1b


From 4d0710c241dddf0db62b4be533347d3acd9e8c96 Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Fri, 24 Apr 2020 13:16:06 -0700
Subject: igc: Get rid of igc_max_channels()

The local function igc_max_channels() is a pointless wrapper around
igc_get_max_rss_queues(). This patch removes it and updates the callers
accordingly. It also does some cleanup on igc_get_max_rss_queues().

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_ethtool.c | 9 ++-------
 drivers/net/ethernet/intel/igc/igc_main.c    | 7 +------
 2 files changed, 3 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index 42ecb493c1a2..9081f36ee1f7 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -1508,18 +1508,13 @@ static int igc_set_rxfh(struct net_device *netdev, const u32 *indir,
 	return 0;
 }
 
-static unsigned int igc_max_channels(struct igc_adapter *adapter)
-{
-	return igc_get_max_rss_queues(adapter);
-}
-
 static void igc_get_channels(struct net_device *netdev,
 			     struct ethtool_channels *ch)
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
 
 	/* Report maximum channels */
-	ch->max_combined = igc_max_channels(adapter);
+	ch->max_combined = igc_get_max_rss_queues(adapter);
 
 	/* Report info for other vector */
 	if (adapter->flags & IGC_FLAG_HAS_MSIX) {
@@ -1546,7 +1541,7 @@ static int igc_set_channels(struct net_device *netdev,
 		return -EINVAL;
 
 	/* Verify the number of channels doesn't exceed hw limits */
-	max_combined = igc_max_channels(adapter);
+	max_combined = igc_get_max_rss_queues(adapter);
 	if (count > max_combined)
 		return -EINVAL;
 
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 398a9307af2b..843e8a2aaf24 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -2731,12 +2731,7 @@ void igc_set_flag_queue_pairs(struct igc_adapter *adapter,
 
 unsigned int igc_get_max_rss_queues(struct igc_adapter *adapter)
 {
-	unsigned int max_rss_queues;
-
-	/* Determine the maximum number of RSS queues supported. */
-	max_rss_queues = IGC_MAX_RX_QUEUES;
-
-	return max_rss_queues;
+	return IGC_MAX_RX_QUEUES;
 }
 
 static void igc_init_queue_configuration(struct igc_adapter *adapter)
-- 
cgit v1.2.3-59-g8ed1b


From 8b9c23cdf04f927d591f3fbc3b846d941557e886 Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Fri, 24 Apr 2020 13:16:07 -0700
Subject: igc: Cleanup _get|set_rxnfc ethtool ops

This patch does a trivial change in igc_ethtool_get_rxnfc() and
igc_ethtool_set_rxnfc() to simplify their logic.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_ethtool.c | 33 +++++++++-------------------
 1 file changed, 10 insertions(+), 23 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index 9081f36ee1f7..37fb5f0544ad 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -1055,31 +1055,23 @@ static int igc_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
 			 u32 *rule_locs)
 {
 	struct igc_adapter *adapter = netdev_priv(dev);
-	int ret = -EOPNOTSUPP;
 
 	switch (cmd->cmd) {
 	case ETHTOOL_GRXRINGS:
 		cmd->data = adapter->num_rx_queues;
-		ret = 0;
-		break;
+		return 0;
 	case ETHTOOL_GRXCLSRLCNT:
 		cmd->rule_cnt = adapter->nfc_filter_count;
-		ret = 0;
-		break;
+		return 0;
 	case ETHTOOL_GRXCLSRULE:
-		ret = igc_get_ethtool_nfc_entry(adapter, cmd);
-		break;
+		return igc_get_ethtool_nfc_entry(adapter, cmd);
 	case ETHTOOL_GRXCLSRLALL:
-		ret = igc_get_ethtool_nfc_all(adapter, cmd, rule_locs);
-		break;
+		return igc_get_ethtool_nfc_all(adapter, cmd, rule_locs);
 	case ETHTOOL_GRXFH:
-		ret = igc_get_rss_hash_opts(adapter, cmd);
-		break;
+		return igc_get_rss_hash_opts(adapter, cmd);
 	default:
-		break;
+		return -EOPNOTSUPP;
 	}
-
-	return ret;
 }
 
 #define UDP_RSS_FLAGS (IGC_FLAG_RSS_FIELD_IPV4_UDP | \
@@ -1418,22 +1410,17 @@ static int igc_del_ethtool_nfc_entry(struct igc_adapter *adapter,
 static int igc_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
 {
 	struct igc_adapter *adapter = netdev_priv(dev);
-	int ret = -EOPNOTSUPP;
 
 	switch (cmd->cmd) {
 	case ETHTOOL_SRXFH:
-		ret = igc_set_rss_hash_opt(adapter, cmd);
-		break;
+		return igc_set_rss_hash_opt(adapter, cmd);
 	case ETHTOOL_SRXCLSRLINS:
-		ret = igc_add_ethtool_nfc_entry(adapter, cmd);
-		break;
+		return igc_add_ethtool_nfc_entry(adapter, cmd);
 	case ETHTOOL_SRXCLSRLDEL:
-		ret = igc_del_ethtool_nfc_entry(adapter, cmd);
+		return igc_del_ethtool_nfc_entry(adapter, cmd);
 	default:
-		break;
+		return -EOPNOTSUPP;
 	}
-
-	return ret;
 }
 
 void igc_write_rss_indir_tbl(struct igc_adapter *adapter)
-- 
cgit v1.2.3-59-g8ed1b


From 876ea04db7675391baa8011795188e55d154aa4e Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Fri, 24 Apr 2020 13:16:08 -0700
Subject: igc: Early return in igc_get_ethtool_nfc_entry()

This patch re-writes the second half of igc_ethtool_get_nfc_entry() to
follow the 'return early' pattern seen in other parts of the driver and
removes some duplicate comments.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_ethtool.c | 58 ++++++++++++++--------------
 1 file changed, 28 insertions(+), 30 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index 37fb5f0544ad..f03093d1f863 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -945,38 +945,36 @@ static int igc_get_ethtool_nfc_entry(struct igc_adapter *adapter,
 	if (!rule || fsp->location != rule->sw_idx)
 		return -EINVAL;
 
-	if (rule->filter.match_flags) {
-		fsp->flow_type = ETHER_FLOW;
-		fsp->ring_cookie = rule->action;
-		if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) {
-			fsp->h_u.ether_spec.h_proto = rule->filter.etype;
-			fsp->m_u.ether_spec.h_proto = ETHER_TYPE_FULL_MASK;
-		}
-		if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) {
-			fsp->flow_type |= FLOW_EXT;
-			fsp->h_ext.vlan_tci = rule->filter.vlan_tci;
-			fsp->m_ext.vlan_tci = htons(VLAN_PRIO_MASK);
-		}
-		if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) {
-			ether_addr_copy(fsp->h_u.ether_spec.h_dest,
-					rule->filter.dst_addr);
-			/* As we only support matching by the full
-			 * mask, return the mask to userspace
-			 */
-			eth_broadcast_addr(fsp->m_u.ether_spec.h_dest);
-		}
-		if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) {
-			ether_addr_copy(fsp->h_u.ether_spec.h_source,
-					rule->filter.src_addr);
-			/* As we only support matching by the full
-			 * mask, return the mask to userspace
-			 */
-			eth_broadcast_addr(fsp->m_u.ether_spec.h_source);
-		}
+	if (!rule->filter.match_flags)
+		return -EINVAL;
 
-		return 0;
+	fsp->flow_type = ETHER_FLOW;
+	fsp->ring_cookie = rule->action;
+
+	if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) {
+		fsp->h_u.ether_spec.h_proto = rule->filter.etype;
+		fsp->m_u.ether_spec.h_proto = ETHER_TYPE_FULL_MASK;
 	}
-	return -EINVAL;
+
+	if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) {
+		fsp->flow_type |= FLOW_EXT;
+		fsp->h_ext.vlan_tci = rule->filter.vlan_tci;
+		fsp->m_ext.vlan_tci = htons(VLAN_PRIO_MASK);
+	}
+
+	if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) {
+		ether_addr_copy(fsp->h_u.ether_spec.h_dest,
+				rule->filter.dst_addr);
+		eth_broadcast_addr(fsp->m_u.ether_spec.h_dest);
+	}
+
+	if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) {
+		ether_addr_copy(fsp->h_u.ether_spec.h_source,
+				rule->filter.src_addr);
+		eth_broadcast_addr(fsp->m_u.ether_spec.h_source);
+	}
+
+	return 0;
 }
 
 static int igc_get_ethtool_nfc_all(struct igc_adapter *adapter,
-- 
cgit v1.2.3-59-g8ed1b


From 7df76bd1918188c80ee0bb2e849756d07ce71507 Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Fri, 24 Apr 2020 13:16:09 -0700
Subject: igc: Add 'igc_ethtool_' prefix to functions in igc_ethtool.c

This patch adds the prefix 'igc_ethtool_' to all functions defined in
igc_ethtool.c so they align with the name convention already followed by
other parts of the driver (e.g. igc_tsn, igc_ptp). Also, this avoids
some name clashing with functions added to igc_main.c by upcoming
patches in this series. No functionality is changed by this patch, just
function renaming.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc.h         |   3 +-
 drivers/net/ethernet/intel/igc/igc_ethtool.c | 215 ++++++++++++++-------------
 drivers/net/ethernet/intel/igc/igc_main.c    |   2 +-
 3 files changed, 112 insertions(+), 108 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index 7c92fc7703be..359ac40908f6 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -16,8 +16,7 @@
 
 #include "igc_hw.h"
 
-/* forward declaration */
-void igc_set_ethtool_ops(struct net_device *);
+void igc_ethtool_set_ops(struct net_device *);
 
 /* Transmit and receive queues */
 #define IGC_MAX_RX_QUEUES		4
diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index f03093d1f863..0399ca1d7d0c 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -124,8 +124,8 @@ static const char igc_priv_flags_strings[][ETH_GSTRING_LEN] = {
 
 #define IGC_PRIV_FLAGS_STR_LEN ARRAY_SIZE(igc_priv_flags_strings)
 
-static void igc_get_drvinfo(struct net_device *netdev,
-			    struct ethtool_drvinfo *drvinfo)
+static void igc_ethtool_get_drvinfo(struct net_device *netdev,
+				    struct ethtool_drvinfo *drvinfo)
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
 
@@ -139,13 +139,13 @@ static void igc_get_drvinfo(struct net_device *netdev,
 	drvinfo->n_priv_flags = IGC_PRIV_FLAGS_STR_LEN;
 }
 
-static int igc_get_regs_len(struct net_device *netdev)
+static int igc_ethtool_get_regs_len(struct net_device *netdev)
 {
 	return IGC_REGS_LEN * sizeof(u32);
 }
 
-static void igc_get_regs(struct net_device *netdev,
-			 struct ethtool_regs *regs, void *p)
+static void igc_ethtool_get_regs(struct net_device *netdev,
+				 struct ethtool_regs *regs, void *p)
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
 	struct igc_hw *hw = &adapter->hw;
@@ -323,7 +323,8 @@ static void igc_get_regs(struct net_device *netdev,
 		regs_buff[205 + i] = rd32(IGC_ETQF(i));
 }
 
-static void igc_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
+static void igc_ethtool_get_wol(struct net_device *netdev,
+				struct ethtool_wolinfo *wol)
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
 
@@ -354,7 +355,8 @@ static void igc_get_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
 		wol->wolopts |= WAKE_PHY;
 }
 
-static int igc_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
+static int igc_ethtool_set_wol(struct net_device *netdev,
+			       struct ethtool_wolinfo *wol)
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
 
@@ -382,21 +384,21 @@ static int igc_set_wol(struct net_device *netdev, struct ethtool_wolinfo *wol)
 	return 0;
 }
 
-static u32 igc_get_msglevel(struct net_device *netdev)
+static u32 igc_ethtool_get_msglevel(struct net_device *netdev)
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
 
 	return adapter->msg_enable;
 }
 
-static void igc_set_msglevel(struct net_device *netdev, u32 data)
+static void igc_ethtool_set_msglevel(struct net_device *netdev, u32 data)
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
 
 	adapter->msg_enable = data;
 }
 
-static int igc_nway_reset(struct net_device *netdev)
+static int igc_ethtool_nway_reset(struct net_device *netdev)
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
 
@@ -405,7 +407,7 @@ static int igc_nway_reset(struct net_device *netdev)
 	return 0;
 }
 
-static u32 igc_get_link(struct net_device *netdev)
+static u32 igc_ethtool_get_link(struct net_device *netdev)
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
 	struct igc_mac_info *mac = &adapter->hw.mac;
@@ -422,15 +424,15 @@ static u32 igc_get_link(struct net_device *netdev)
 	return igc_has_link(adapter);
 }
 
-static int igc_get_eeprom_len(struct net_device *netdev)
+static int igc_ethtool_get_eeprom_len(struct net_device *netdev)
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
 
 	return adapter->hw.nvm.word_size * 2;
 }
 
-static int igc_get_eeprom(struct net_device *netdev,
-			  struct ethtool_eeprom *eeprom, u8 *bytes)
+static int igc_ethtool_get_eeprom(struct net_device *netdev,
+				  struct ethtool_eeprom *eeprom, u8 *bytes)
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
 	struct igc_hw *hw = &adapter->hw;
@@ -476,8 +478,8 @@ static int igc_get_eeprom(struct net_device *netdev,
 	return ret_val;
 }
 
-static int igc_set_eeprom(struct net_device *netdev,
-			  struct ethtool_eeprom *eeprom, u8 *bytes)
+static int igc_ethtool_set_eeprom(struct net_device *netdev,
+				  struct ethtool_eeprom *eeprom, u8 *bytes)
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
 	struct igc_hw *hw = &adapter->hw;
@@ -544,8 +546,8 @@ static int igc_set_eeprom(struct net_device *netdev,
 	return ret_val;
 }
 
-static void igc_get_ringparam(struct net_device *netdev,
-			      struct ethtool_ringparam *ring)
+static void igc_ethtool_get_ringparam(struct net_device *netdev,
+				      struct ethtool_ringparam *ring)
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
 
@@ -555,8 +557,8 @@ static void igc_get_ringparam(struct net_device *netdev,
 	ring->tx_pending = adapter->tx_ring_count;
 }
 
-static int igc_set_ringparam(struct net_device *netdev,
-			     struct ethtool_ringparam *ring)
+static int igc_ethtool_set_ringparam(struct net_device *netdev,
+				     struct ethtool_ringparam *ring)
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
 	struct igc_ring *temp_ring;
@@ -670,8 +672,8 @@ clear_reset:
 	return err;
 }
 
-static void igc_get_pauseparam(struct net_device *netdev,
-			       struct ethtool_pauseparam *pause)
+static void igc_ethtool_get_pauseparam(struct net_device *netdev,
+				       struct ethtool_pauseparam *pause)
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
 	struct igc_hw *hw = &adapter->hw;
@@ -689,8 +691,8 @@ static void igc_get_pauseparam(struct net_device *netdev,
 	}
 }
 
-static int igc_set_pauseparam(struct net_device *netdev,
-			      struct ethtool_pauseparam *pause)
+static int igc_ethtool_set_pauseparam(struct net_device *netdev,
+				      struct ethtool_pauseparam *pause)
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
 	struct igc_hw *hw = &adapter->hw;
@@ -729,7 +731,8 @@ static int igc_set_pauseparam(struct net_device *netdev,
 	return retval;
 }
 
-static void igc_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
+static void igc_ethtool_get_strings(struct net_device *netdev, u32 stringset,
+				    u8 *data)
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
 	u8 *p = data;
@@ -780,7 +783,7 @@ static void igc_get_strings(struct net_device *netdev, u32 stringset, u8 *data)
 	}
 }
 
-static int igc_get_sset_count(struct net_device *netdev, int sset)
+static int igc_ethtool_get_sset_count(struct net_device *netdev, int sset)
 {
 	switch (sset) {
 	case ETH_SS_STATS:
@@ -794,7 +797,7 @@ static int igc_get_sset_count(struct net_device *netdev, int sset)
 	}
 }
 
-static void igc_get_ethtool_stats(struct net_device *netdev,
+static void igc_ethtool_get_stats(struct net_device *netdev,
 				  struct ethtool_stats *stats, u64 *data)
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
@@ -850,8 +853,8 @@ static void igc_get_ethtool_stats(struct net_device *netdev,
 	spin_unlock(&adapter->stats64_lock);
 }
 
-static int igc_get_coalesce(struct net_device *netdev,
-			    struct ethtool_coalesce *ec)
+static int igc_ethtool_get_coalesce(struct net_device *netdev,
+				    struct ethtool_coalesce *ec)
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
 
@@ -870,8 +873,8 @@ static int igc_get_coalesce(struct net_device *netdev,
 	return 0;
 }
 
-static int igc_set_coalesce(struct net_device *netdev,
-			    struct ethtool_coalesce *ec)
+static int igc_ethtool_set_coalesce(struct net_device *netdev,
+				    struct ethtool_coalesce *ec)
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
 	int i;
@@ -928,7 +931,7 @@ static int igc_set_coalesce(struct net_device *netdev,
 }
 
 #define ETHER_TYPE_FULL_MASK ((__force __be16)~0)
-static int igc_get_ethtool_nfc_entry(struct igc_adapter *adapter,
+static int igc_ethtool_get_nfc_entry(struct igc_adapter *adapter,
 				     struct ethtool_rxnfc *cmd)
 {
 	struct ethtool_rx_flow_spec *fsp = &cmd->fs;
@@ -977,7 +980,7 @@ static int igc_get_ethtool_nfc_entry(struct igc_adapter *adapter,
 	return 0;
 }
 
-static int igc_get_ethtool_nfc_all(struct igc_adapter *adapter,
+static int igc_ethtool_get_nfc_all(struct igc_adapter *adapter,
 				   struct ethtool_rxnfc *cmd,
 				   u32 *rule_locs)
 {
@@ -999,8 +1002,8 @@ static int igc_get_ethtool_nfc_all(struct igc_adapter *adapter,
 	return 0;
 }
 
-static int igc_get_rss_hash_opts(struct igc_adapter *adapter,
-				 struct ethtool_rxnfc *cmd)
+static int igc_ethtool_get_rss_hash_opts(struct igc_adapter *adapter,
+					 struct ethtool_rxnfc *cmd)
 {
 	cmd->data = 0;
 
@@ -1049,8 +1052,8 @@ static int igc_get_rss_hash_opts(struct igc_adapter *adapter,
 	return 0;
 }
 
-static int igc_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
-			 u32 *rule_locs)
+static int igc_ethtool_get_rxnfc(struct net_device *dev,
+				 struct ethtool_rxnfc *cmd, u32 *rule_locs)
 {
 	struct igc_adapter *adapter = netdev_priv(dev);
 
@@ -1062,11 +1065,11 @@ static int igc_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
 		cmd->rule_cnt = adapter->nfc_filter_count;
 		return 0;
 	case ETHTOOL_GRXCLSRULE:
-		return igc_get_ethtool_nfc_entry(adapter, cmd);
+		return igc_ethtool_get_nfc_entry(adapter, cmd);
 	case ETHTOOL_GRXCLSRLALL:
-		return igc_get_ethtool_nfc_all(adapter, cmd, rule_locs);
+		return igc_ethtool_get_nfc_all(adapter, cmd, rule_locs);
 	case ETHTOOL_GRXFH:
-		return igc_get_rss_hash_opts(adapter, cmd);
+		return igc_ethtool_get_rss_hash_opts(adapter, cmd);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -1074,8 +1077,8 @@ static int igc_get_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd,
 
 #define UDP_RSS_FLAGS (IGC_FLAG_RSS_FIELD_IPV4_UDP | \
 		       IGC_FLAG_RSS_FIELD_IPV6_UDP)
-static int igc_set_rss_hash_opt(struct igc_adapter *adapter,
-				struct ethtool_rxnfc *nfc)
+static int igc_ethtool_set_rss_hash_opt(struct igc_adapter *adapter,
+					struct ethtool_rxnfc *nfc)
 {
 	u32 flags = adapter->flags;
 
@@ -1240,7 +1243,7 @@ int igc_erase_filter(struct igc_adapter *adapter, struct igc_nfc_filter *input)
 	return 0;
 }
 
-static int igc_update_ethtool_nfc_entry(struct igc_adapter *adapter,
+static int igc_ethtool_update_nfc_entry(struct igc_adapter *adapter,
 					struct igc_nfc_filter *input,
 					u16 sw_idx)
 {
@@ -1288,7 +1291,7 @@ static int igc_update_ethtool_nfc_entry(struct igc_adapter *adapter,
 	return 0;
 }
 
-static int igc_add_ethtool_nfc_entry(struct igc_adapter *adapter,
+static int igc_ethtool_add_nfc_entry(struct igc_adapter *adapter,
 				     struct ethtool_rxnfc *cmd)
 {
 	struct net_device *netdev = adapter->netdev;
@@ -1379,7 +1382,7 @@ static int igc_add_ethtool_nfc_entry(struct igc_adapter *adapter,
 	if (err)
 		goto err_out_w_lock;
 
-	igc_update_ethtool_nfc_entry(adapter, input, input->sw_idx);
+	igc_ethtool_update_nfc_entry(adapter, input, input->sw_idx);
 
 	spin_unlock(&adapter->nfc_lock);
 	return 0;
@@ -1391,7 +1394,7 @@ err_out:
 	return err;
 }
 
-static int igc_del_ethtool_nfc_entry(struct igc_adapter *adapter,
+static int igc_ethtool_del_nfc_entry(struct igc_adapter *adapter,
 				     struct ethtool_rxnfc *cmd)
 {
 	struct ethtool_rx_flow_spec *fsp =
@@ -1399,23 +1402,24 @@ static int igc_del_ethtool_nfc_entry(struct igc_adapter *adapter,
 	int err;
 
 	spin_lock(&adapter->nfc_lock);
-	err = igc_update_ethtool_nfc_entry(adapter, NULL, fsp->location);
+	err = igc_ethtool_update_nfc_entry(adapter, NULL, fsp->location);
 	spin_unlock(&adapter->nfc_lock);
 
 	return err;
 }
 
-static int igc_set_rxnfc(struct net_device *dev, struct ethtool_rxnfc *cmd)
+static int igc_ethtool_set_rxnfc(struct net_device *dev,
+				 struct ethtool_rxnfc *cmd)
 {
 	struct igc_adapter *adapter = netdev_priv(dev);
 
 	switch (cmd->cmd) {
 	case ETHTOOL_SRXFH:
-		return igc_set_rss_hash_opt(adapter, cmd);
+		return igc_ethtool_set_rss_hash_opt(adapter, cmd);
 	case ETHTOOL_SRXCLSRLINS:
-		return igc_add_ethtool_nfc_entry(adapter, cmd);
+		return igc_ethtool_add_nfc_entry(adapter, cmd);
 	case ETHTOOL_SRXCLSRLDEL:
-		return igc_del_ethtool_nfc_entry(adapter, cmd);
+		return igc_ethtool_del_nfc_entry(adapter, cmd);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -1443,13 +1447,13 @@ void igc_write_rss_indir_tbl(struct igc_adapter *adapter)
 	}
 }
 
-static u32 igc_get_rxfh_indir_size(struct net_device *netdev)
+static u32 igc_ethtool_get_rxfh_indir_size(struct net_device *netdev)
 {
 	return IGC_RETA_SIZE;
 }
 
-static int igc_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
-			u8 *hfunc)
+static int igc_ethtool_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
+				u8 *hfunc)
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
 	int i;
@@ -1464,8 +1468,8 @@ static int igc_get_rxfh(struct net_device *netdev, u32 *indir, u8 *key,
 	return 0;
 }
 
-static int igc_set_rxfh(struct net_device *netdev, const u32 *indir,
-			const u8 *key, const u8 hfunc)
+static int igc_ethtool_set_rxfh(struct net_device *netdev, const u32 *indir,
+				const u8 *key, const u8 hfunc)
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
 	u32 num_queues;
@@ -1493,8 +1497,8 @@ static int igc_set_rxfh(struct net_device *netdev, const u32 *indir,
 	return 0;
 }
 
-static void igc_get_channels(struct net_device *netdev,
-			     struct ethtool_channels *ch)
+static void igc_ethtool_get_channels(struct net_device *netdev,
+				     struct ethtool_channels *ch)
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
 
@@ -1510,8 +1514,8 @@ static void igc_get_channels(struct net_device *netdev,
 	ch->combined_count = adapter->rss_queues;
 }
 
-static int igc_set_channels(struct net_device *netdev,
-			    struct ethtool_channels *ch)
+static int igc_ethtool_set_channels(struct net_device *netdev,
+				    struct ethtool_channels *ch)
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
 	unsigned int count = ch->combined_count;
@@ -1543,8 +1547,8 @@ static int igc_set_channels(struct net_device *netdev,
 	return 0;
 }
 
-static int igc_get_ts_info(struct net_device *dev,
-			   struct ethtool_ts_info *info)
+static int igc_ethtool_get_ts_info(struct net_device *dev,
+				   struct ethtool_ts_info *info)
 {
 	struct igc_adapter *adapter = netdev_priv(dev);
 
@@ -1576,7 +1580,7 @@ static int igc_get_ts_info(struct net_device *dev,
 	}
 }
 
-static u32 igc_get_priv_flags(struct net_device *netdev)
+static u32 igc_ethtool_get_priv_flags(struct net_device *netdev)
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
 	u32 priv_flags = 0;
@@ -1587,7 +1591,7 @@ static u32 igc_get_priv_flags(struct net_device *netdev)
 	return priv_flags;
 }
 
-static int igc_set_priv_flags(struct net_device *netdev, u32 priv_flags)
+static int igc_ethtool_set_priv_flags(struct net_device *netdev, u32 priv_flags)
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
 	unsigned int flags = adapter->flags;
@@ -1622,8 +1626,8 @@ static void igc_ethtool_complete(struct net_device *netdev)
 	pm_runtime_put(&adapter->pdev->dev);
 }
 
-static int igc_get_link_ksettings(struct net_device *netdev,
-				  struct ethtool_link_ksettings *cmd)
+static int igc_ethtool_get_link_ksettings(struct net_device *netdev,
+					  struct ethtool_link_ksettings *cmd)
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
 	struct igc_hw *hw = &adapter->hw;
@@ -1729,8 +1733,9 @@ static int igc_get_link_ksettings(struct net_device *netdev,
 	return 0;
 }
 
-static int igc_set_link_ksettings(struct net_device *netdev,
-				  const struct ethtool_link_ksettings *cmd)
+static int
+igc_ethtool_set_link_ksettings(struct net_device *netdev,
+			       const struct ethtool_link_ksettings *cmd)
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
 	struct net_device *dev = adapter->netdev;
@@ -1796,8 +1801,8 @@ static int igc_set_link_ksettings(struct net_device *netdev,
 	return 0;
 }
 
-static void igc_diag_test(struct net_device *netdev,
-			  struct ethtool_test *eth_test, u64 *data)
+static void igc_ethtool_diag_test(struct net_device *netdev,
+				  struct ethtool_test *eth_test, u64 *data)
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
 	bool if_running = netif_running(netdev);
@@ -1856,45 +1861,45 @@ static void igc_diag_test(struct net_device *netdev,
 
 static const struct ethtool_ops igc_ethtool_ops = {
 	.supported_coalesce_params = ETHTOOL_COALESCE_USECS,
-	.get_drvinfo		= igc_get_drvinfo,
-	.get_regs_len		= igc_get_regs_len,
-	.get_regs		= igc_get_regs,
-	.get_wol		= igc_get_wol,
-	.set_wol		= igc_set_wol,
-	.get_msglevel		= igc_get_msglevel,
-	.set_msglevel		= igc_set_msglevel,
-	.nway_reset		= igc_nway_reset,
-	.get_link		= igc_get_link,
-	.get_eeprom_len		= igc_get_eeprom_len,
-	.get_eeprom		= igc_get_eeprom,
-	.set_eeprom		= igc_set_eeprom,
-	.get_ringparam		= igc_get_ringparam,
-	.set_ringparam		= igc_set_ringparam,
-	.get_pauseparam		= igc_get_pauseparam,
-	.set_pauseparam		= igc_set_pauseparam,
-	.get_strings		= igc_get_strings,
-	.get_sset_count		= igc_get_sset_count,
-	.get_ethtool_stats	= igc_get_ethtool_stats,
-	.get_coalesce		= igc_get_coalesce,
-	.set_coalesce		= igc_set_coalesce,
-	.get_rxnfc		= igc_get_rxnfc,
-	.set_rxnfc		= igc_set_rxnfc,
-	.get_rxfh_indir_size	= igc_get_rxfh_indir_size,
-	.get_rxfh		= igc_get_rxfh,
-	.set_rxfh		= igc_set_rxfh,
-	.get_ts_info		= igc_get_ts_info,
-	.get_channels		= igc_get_channels,
-	.set_channels		= igc_set_channels,
-	.get_priv_flags		= igc_get_priv_flags,
-	.set_priv_flags		= igc_set_priv_flags,
+	.get_drvinfo		= igc_ethtool_get_drvinfo,
+	.get_regs_len		= igc_ethtool_get_regs_len,
+	.get_regs		= igc_ethtool_get_regs,
+	.get_wol		= igc_ethtool_get_wol,
+	.set_wol		= igc_ethtool_set_wol,
+	.get_msglevel		= igc_ethtool_get_msglevel,
+	.set_msglevel		= igc_ethtool_set_msglevel,
+	.nway_reset		= igc_ethtool_nway_reset,
+	.get_link		= igc_ethtool_get_link,
+	.get_eeprom_len		= igc_ethtool_get_eeprom_len,
+	.get_eeprom		= igc_ethtool_get_eeprom,
+	.set_eeprom		= igc_ethtool_set_eeprom,
+	.get_ringparam		= igc_ethtool_get_ringparam,
+	.set_ringparam		= igc_ethtool_set_ringparam,
+	.get_pauseparam		= igc_ethtool_get_pauseparam,
+	.set_pauseparam		= igc_ethtool_set_pauseparam,
+	.get_strings		= igc_ethtool_get_strings,
+	.get_sset_count		= igc_ethtool_get_sset_count,
+	.get_ethtool_stats	= igc_ethtool_get_stats,
+	.get_coalesce		= igc_ethtool_get_coalesce,
+	.set_coalesce		= igc_ethtool_set_coalesce,
+	.get_rxnfc		= igc_ethtool_get_rxnfc,
+	.set_rxnfc		= igc_ethtool_set_rxnfc,
+	.get_rxfh_indir_size	= igc_ethtool_get_rxfh_indir_size,
+	.get_rxfh		= igc_ethtool_get_rxfh,
+	.set_rxfh		= igc_ethtool_set_rxfh,
+	.get_ts_info		= igc_ethtool_get_ts_info,
+	.get_channels		= igc_ethtool_get_channels,
+	.set_channels		= igc_ethtool_set_channels,
+	.get_priv_flags		= igc_ethtool_get_priv_flags,
+	.set_priv_flags		= igc_ethtool_set_priv_flags,
 	.begin			= igc_ethtool_begin,
 	.complete		= igc_ethtool_complete,
-	.get_link_ksettings	= igc_get_link_ksettings,
-	.set_link_ksettings	= igc_set_link_ksettings,
-	.self_test		= igc_diag_test,
+	.get_link_ksettings	= igc_ethtool_get_link_ksettings,
+	.set_link_ksettings	= igc_ethtool_set_link_ksettings,
+	.self_test		= igc_ethtool_diag_test,
 };
 
-void igc_set_ethtool_ops(struct net_device *netdev)
+void igc_ethtool_set_ops(struct net_device *netdev)
 {
 	netdev->ethtool_ops = &igc_ethtool_ops;
 }
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 843e8a2aaf24..98356652a7d9 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -4932,7 +4932,7 @@ static int igc_probe(struct pci_dev *pdev,
 	hw->hw_addr = adapter->io_addr;
 
 	netdev->netdev_ops = &igc_netdev_ops;
-	igc_set_ethtool_ops(netdev);
+	igc_ethtool_set_ops(netdev);
 	netdev->watchdog_timeo = 5 * HZ;
 
 	netdev->mem_start = pci_resource_start(pdev, 0);
-- 
cgit v1.2.3-59-g8ed1b


From 97700bc86d068442ee19ca6d31fc0a600cdbd672 Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Fri, 24 Apr 2020 13:16:10 -0700
Subject: igc: Align terms used in NFC support code

The Network Flow Classification (NFC) support code from IGC driver uses
terms such as 'rule', 'filter', 'entry', 'input' interchangeably when
referring to NFC rules, making it harder to follow the code. This patch
renames IGC's internal APIs, structs, and variables so we stick with the
term 'rule' since this is the term used in ethtool APIs. It also removes
some not applicable comments along the way. No functionality is changed
by this patch.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc.h         |  29 +++--
 drivers/net/ethernet/intel/igc/igc_ethtool.c | 160 +++++++++++++--------------
 drivers/net/ethernet/intel/igc/igc_main.c    |  42 +++----
 3 files changed, 114 insertions(+), 117 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index 359ac40908f6..7124ba254b89 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -187,12 +187,12 @@ struct igc_adapter {
 	u32 rss_queues;
 	u32 rss_indir_tbl_init;
 
-	/* RX network flow classification support */
-	struct hlist_head nfc_filter_list;
-	unsigned int nfc_filter_count;
-
-	/* lock for RX network flow classification filter */
-	spinlock_t nfc_lock;
+	/* Any access to elements in nfc_rule_list is protected by the
+	 * nfc_rule_lock.
+	 */
+	spinlock_t nfc_rule_lock;
+	struct hlist_head nfc_rule_list;
+	unsigned int nfc_rule_count;
 
 	u8 rss_indir_tbl[IGC_RETA_SIZE];
 
@@ -453,7 +453,7 @@ enum igc_filter_match_flags {
 };
 
 /* RX network flow classification data structure */
-struct igc_nfc_input {
+struct igc_nfc_filter {
 	/* Byte layout in order, all values with MSB first:
 	 * match_flags - 1 byte
 	 * etype - 2 bytes
@@ -466,14 +466,14 @@ struct igc_nfc_input {
 	u8 dst_addr[ETH_ALEN];
 };
 
-struct igc_nfc_filter {
+struct igc_nfc_rule {
 	struct hlist_node nfc_node;
-	struct igc_nfc_input filter;
+	struct igc_nfc_filter filter;
 	u16 sw_idx;
 	u16 action;
 };
 
-#define IGC_MAX_RXNFC_FILTERS		16
+#define IGC_MAX_RXNFC_RULES		16
 
 /* igc_desc_unused - calculate if we have unused descriptors */
 static inline u16 igc_desc_unused(const struct igc_ring *ring)
@@ -549,12 +549,11 @@ static inline s32 igc_read_phy_reg(struct igc_hw *hw, u32 offset, u16 *data)
 	return 0;
 }
 
-/* forward declaration */
 void igc_reinit_locked(struct igc_adapter *);
-int igc_add_filter(struct igc_adapter *adapter,
-		   struct igc_nfc_filter *input);
-int igc_erase_filter(struct igc_adapter *adapter,
-		     struct igc_nfc_filter *input);
+int igc_enable_nfc_rule(struct igc_adapter *adapter,
+			const struct igc_nfc_rule *rule);
+int igc_disable_nfc_rule(struct igc_adapter *adapter,
+			 const struct igc_nfc_rule *rule);
 
 void igc_ptp_init(struct igc_adapter *adapter);
 void igc_ptp_reset(struct igc_adapter *adapter);
diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index 0399ca1d7d0c..408f4a9a199f 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -931,16 +931,15 @@ static int igc_ethtool_set_coalesce(struct net_device *netdev,
 }
 
 #define ETHER_TYPE_FULL_MASK ((__force __be16)~0)
-static int igc_ethtool_get_nfc_entry(struct igc_adapter *adapter,
-				     struct ethtool_rxnfc *cmd)
+static int igc_ethtool_get_nfc_rule(struct igc_adapter *adapter,
+				    struct ethtool_rxnfc *cmd)
 {
 	struct ethtool_rx_flow_spec *fsp = &cmd->fs;
-	struct igc_nfc_filter *rule = NULL;
+	struct igc_nfc_rule *rule = NULL;
 
-	/* report total rule count */
-	cmd->data = IGC_MAX_RXNFC_FILTERS;
+	cmd->data = IGC_MAX_RXNFC_RULES;
 
-	hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) {
+	hlist_for_each_entry(rule, &adapter->nfc_rule_list, nfc_node) {
 		if (fsp->location <= rule->sw_idx)
 			break;
 	}
@@ -980,17 +979,16 @@ static int igc_ethtool_get_nfc_entry(struct igc_adapter *adapter,
 	return 0;
 }
 
-static int igc_ethtool_get_nfc_all(struct igc_adapter *adapter,
-				   struct ethtool_rxnfc *cmd,
-				   u32 *rule_locs)
+static int igc_ethtool_get_nfc_rules(struct igc_adapter *adapter,
+				     struct ethtool_rxnfc *cmd,
+				     u32 *rule_locs)
 {
-	struct igc_nfc_filter *rule;
+	struct igc_nfc_rule *rule;
 	int cnt = 0;
 
-	/* report total rule count */
-	cmd->data = IGC_MAX_RXNFC_FILTERS;
+	cmd->data = IGC_MAX_RXNFC_RULES;
 
-	hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) {
+	hlist_for_each_entry(rule, &adapter->nfc_rule_list, nfc_node) {
 		if (cnt == cmd->rule_cnt)
 			return -EMSGSIZE;
 		rule_locs[cnt] = rule->sw_idx;
@@ -1062,12 +1060,12 @@ static int igc_ethtool_get_rxnfc(struct net_device *dev,
 		cmd->data = adapter->num_rx_queues;
 		return 0;
 	case ETHTOOL_GRXCLSRLCNT:
-		cmd->rule_cnt = adapter->nfc_filter_count;
+		cmd->rule_cnt = adapter->nfc_rule_count;
 		return 0;
 	case ETHTOOL_GRXCLSRULE:
-		return igc_ethtool_get_nfc_entry(adapter, cmd);
+		return igc_ethtool_get_nfc_rule(adapter, cmd);
 	case ETHTOOL_GRXCLSRLALL:
-		return igc_ethtool_get_nfc_all(adapter, cmd, rule_locs);
+		return igc_ethtool_get_nfc_rules(adapter, cmd, rule_locs);
 	case ETHTOOL_GRXFH:
 		return igc_ethtool_get_rss_hash_opts(adapter, cmd);
 	default:
@@ -1179,38 +1177,37 @@ static int igc_ethtool_set_rss_hash_opt(struct igc_adapter *adapter,
 	return 0;
 }
 
-int igc_add_filter(struct igc_adapter *adapter, struct igc_nfc_filter *input)
+int igc_enable_nfc_rule(struct igc_adapter *adapter,
+			const struct igc_nfc_rule *rule)
 {
 	int err = -EINVAL;
 
-	if (input->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) {
-		u16 etype = ntohs(input->filter.etype);
+	if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) {
+		u16 etype = ntohs(rule->filter.etype);
 
-		err = igc_add_etype_filter(adapter, etype, input->action);
+		err = igc_add_etype_filter(adapter, etype, rule->action);
 		if (err)
 			return err;
 	}
 
-	if (input->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) {
+	if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) {
 		err = igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_SRC,
-					 input->filter.src_addr,
-					 input->action);
+					 rule->filter.src_addr, rule->action);
 		if (err)
 			return err;
 	}
 
-	if (input->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) {
+	if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) {
 		err = igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST,
-					 input->filter.dst_addr,
-					 input->action);
+					 rule->filter.dst_addr, rule->action);
 		if (err)
 			return err;
 	}
 
-	if (input->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) {
-		int prio = (ntohs(input->filter.vlan_tci) & VLAN_PRIO_MASK) >>
+	if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) {
+		int prio = (ntohs(rule->filter.vlan_tci) & VLAN_PRIO_MASK) >>
 			   VLAN_PRIO_SHIFT;
-		err = igc_add_vlan_prio_filter(adapter, prio, input->action);
+		err = igc_add_vlan_prio_filter(adapter, prio, rule->action);
 		if (err)
 			return err;
 	}
@@ -1218,42 +1215,43 @@ int igc_add_filter(struct igc_adapter *adapter, struct igc_nfc_filter *input)
 	return 0;
 }
 
-int igc_erase_filter(struct igc_adapter *adapter, struct igc_nfc_filter *input)
+int igc_disable_nfc_rule(struct igc_adapter *adapter,
+			 const struct igc_nfc_rule *rule)
 {
-	if (input->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) {
-		u16 etype = ntohs(input->filter.etype);
+	if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) {
+		u16 etype = ntohs(rule->filter.etype);
 
 		igc_del_etype_filter(adapter, etype);
 	}
 
-	if (input->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) {
-		int prio = (ntohs(input->filter.vlan_tci) & VLAN_PRIO_MASK) >>
+	if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) {
+		int prio = (ntohs(rule->filter.vlan_tci) & VLAN_PRIO_MASK) >>
 			   VLAN_PRIO_SHIFT;
 		igc_del_vlan_prio_filter(adapter, prio);
 	}
 
-	if (input->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR)
+	if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR)
 		igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_SRC,
-				   input->filter.src_addr);
+				   rule->filter.src_addr);
 
-	if (input->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR)
+	if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR)
 		igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST,
-				   input->filter.dst_addr);
+				   rule->filter.dst_addr);
 
 	return 0;
 }
 
-static int igc_ethtool_update_nfc_entry(struct igc_adapter *adapter,
-					struct igc_nfc_filter *input,
-					u16 sw_idx)
+static int igc_ethtool_update_nfc_rule(struct igc_adapter *adapter,
+				       struct igc_nfc_rule *input,
+				       u16 sw_idx)
 {
-	struct igc_nfc_filter *rule, *parent;
+	struct igc_nfc_rule *rule, *parent;
 	int err = -EINVAL;
 
 	parent = NULL;
 	rule = NULL;
 
-	hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) {
+	hlist_for_each_entry(rule, &adapter->nfc_rule_list, nfc_node) {
 		/* hash found, or no matching entry */
 		if (rule->sw_idx >= sw_idx)
 			break;
@@ -1263,11 +1261,11 @@ static int igc_ethtool_update_nfc_entry(struct igc_adapter *adapter,
 	/* if there is an old rule occupying our place remove it */
 	if (rule && rule->sw_idx == sw_idx) {
 		if (!input)
-			err = igc_erase_filter(adapter, rule);
+			err = igc_disable_nfc_rule(adapter, rule);
 
 		hlist_del(&rule->nfc_node);
 		kfree(rule);
-		adapter->nfc_filter_count--;
+		adapter->nfc_rule_count--;
 	}
 
 	/* If no input this was a delete, err should be 0 if a rule was
@@ -1283,21 +1281,21 @@ static int igc_ethtool_update_nfc_entry(struct igc_adapter *adapter,
 	if (parent)
 		hlist_add_behind(&input->nfc_node, &parent->nfc_node);
 	else
-		hlist_add_head(&input->nfc_node, &adapter->nfc_filter_list);
+		hlist_add_head(&input->nfc_node, &adapter->nfc_rule_list);
 
 	/* update counts */
-	adapter->nfc_filter_count++;
+	adapter->nfc_rule_count++;
 
 	return 0;
 }
 
-static int igc_ethtool_add_nfc_entry(struct igc_adapter *adapter,
-				     struct ethtool_rxnfc *cmd)
+static int igc_ethtool_add_nfc_rule(struct igc_adapter *adapter,
+				    struct ethtool_rxnfc *cmd)
 {
 	struct net_device *netdev = adapter->netdev;
 	struct ethtool_rx_flow_spec *fsp =
 		(struct ethtool_rx_flow_spec *)&cmd->fs;
-	struct igc_nfc_filter *input, *rule;
+	struct igc_nfc_rule *rule, *tmp;
 	int err = 0;
 
 	if (!(netdev->hw_features & NETIF_F_NTUPLE))
@@ -1314,7 +1312,7 @@ static int igc_ethtool_add_nfc_entry(struct igc_adapter *adapter,
 	}
 
 	/* Don't allow indexes to exist outside of available space */
-	if (fsp->location >= IGC_MAX_RXNFC_FILTERS) {
+	if (fsp->location >= IGC_MAX_RXNFC_RULES) {
 		netdev_err(netdev, "Location out of range\n");
 		return -EINVAL;
 	}
@@ -1322,32 +1320,32 @@ static int igc_ethtool_add_nfc_entry(struct igc_adapter *adapter,
 	if ((fsp->flow_type & ~FLOW_EXT) != ETHER_FLOW)
 		return -EINVAL;
 
-	input = kzalloc(sizeof(*input), GFP_KERNEL);
-	if (!input)
+	rule = kzalloc(sizeof(*rule), GFP_KERNEL);
+	if (!rule)
 		return -ENOMEM;
 
 	if (fsp->m_u.ether_spec.h_proto == ETHER_TYPE_FULL_MASK) {
-		input->filter.etype = fsp->h_u.ether_spec.h_proto;
-		input->filter.match_flags = IGC_FILTER_FLAG_ETHER_TYPE;
+		rule->filter.etype = fsp->h_u.ether_spec.h_proto;
+		rule->filter.match_flags = IGC_FILTER_FLAG_ETHER_TYPE;
 	}
 
 	/* Both source and destination address filters only support the full
 	 * mask.
 	 */
 	if (is_broadcast_ether_addr(fsp->m_u.ether_spec.h_source)) {
-		input->filter.match_flags |= IGC_FILTER_FLAG_SRC_MAC_ADDR;
-		ether_addr_copy(input->filter.src_addr,
+		rule->filter.match_flags |= IGC_FILTER_FLAG_SRC_MAC_ADDR;
+		ether_addr_copy(rule->filter.src_addr,
 				fsp->h_u.ether_spec.h_source);
 	}
 
 	if (is_broadcast_ether_addr(fsp->m_u.ether_spec.h_dest)) {
-		input->filter.match_flags |= IGC_FILTER_FLAG_DST_MAC_ADDR;
-		ether_addr_copy(input->filter.dst_addr,
+		rule->filter.match_flags |= IGC_FILTER_FLAG_DST_MAC_ADDR;
+		ether_addr_copy(rule->filter.dst_addr,
 				fsp->h_u.ether_spec.h_dest);
 	}
 
-	if (input->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR &&
-	    input->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) {
+	if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR &&
+	    rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) {
 		netdev_dbg(netdev, "Filters with both dst and src are not supported\n");
 		err = -EOPNOTSUPP;
 		goto err_out;
@@ -1359,18 +1357,18 @@ static int igc_ethtool_add_nfc_entry(struct igc_adapter *adapter,
 			err = -EOPNOTSUPP;
 			goto err_out;
 		}
-		input->filter.vlan_tci = fsp->h_ext.vlan_tci;
-		input->filter.match_flags |= IGC_FILTER_FLAG_VLAN_TCI;
+		rule->filter.vlan_tci = fsp->h_ext.vlan_tci;
+		rule->filter.match_flags |= IGC_FILTER_FLAG_VLAN_TCI;
 	}
 
-	input->action = fsp->ring_cookie;
-	input->sw_idx = fsp->location;
+	rule->action = fsp->ring_cookie;
+	rule->sw_idx = fsp->location;
 
-	spin_lock(&adapter->nfc_lock);
+	spin_lock(&adapter->nfc_rule_lock);
 
-	hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node) {
-		if (!memcmp(&input->filter, &rule->filter,
-			    sizeof(input->filter))) {
+	hlist_for_each_entry(tmp, &adapter->nfc_rule_list, nfc_node) {
+		if (!memcmp(&rule->filter, &tmp->filter,
+			    sizeof(rule->filter))) {
 			err = -EEXIST;
 			netdev_err(netdev,
 				   "ethtool: this filter is already set\n");
@@ -1378,32 +1376,32 @@ static int igc_ethtool_add_nfc_entry(struct igc_adapter *adapter,
 		}
 	}
 
-	err = igc_add_filter(adapter, input);
+	err = igc_enable_nfc_rule(adapter, rule);
 	if (err)
 		goto err_out_w_lock;
 
-	igc_ethtool_update_nfc_entry(adapter, input, input->sw_idx);
+	igc_ethtool_update_nfc_rule(adapter, rule, rule->sw_idx);
 
-	spin_unlock(&adapter->nfc_lock);
+	spin_unlock(&adapter->nfc_rule_lock);
 	return 0;
 
 err_out_w_lock:
-	spin_unlock(&adapter->nfc_lock);
+	spin_unlock(&adapter->nfc_rule_lock);
 err_out:
-	kfree(input);
+	kfree(rule);
 	return err;
 }
 
-static int igc_ethtool_del_nfc_entry(struct igc_adapter *adapter,
-				     struct ethtool_rxnfc *cmd)
+static int igc_ethtool_del_nfc_rule(struct igc_adapter *adapter,
+				    struct ethtool_rxnfc *cmd)
 {
 	struct ethtool_rx_flow_spec *fsp =
 		(struct ethtool_rx_flow_spec *)&cmd->fs;
 	int err;
 
-	spin_lock(&adapter->nfc_lock);
-	err = igc_ethtool_update_nfc_entry(adapter, NULL, fsp->location);
-	spin_unlock(&adapter->nfc_lock);
+	spin_lock(&adapter->nfc_rule_lock);
+	err = igc_ethtool_update_nfc_rule(adapter, NULL, fsp->location);
+	spin_unlock(&adapter->nfc_rule_lock);
 
 	return err;
 }
@@ -1417,9 +1415,9 @@ static int igc_ethtool_set_rxnfc(struct net_device *dev,
 	case ETHTOOL_SRXFH:
 		return igc_ethtool_set_rss_hash_opt(adapter, cmd);
 	case ETHTOOL_SRXCLSRLINS:
-		return igc_ethtool_add_nfc_entry(adapter, cmd);
+		return igc_ethtool_add_nfc_rule(adapter, cmd);
 	case ETHTOOL_SRXCLSRLDEL:
-		return igc_ethtool_del_nfc_entry(adapter, cmd);
+		return igc_ethtool_del_nfc_rule(adapter, cmd);
 	default:
 		return -EOPNOTSUPP;
 	}
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 98356652a7d9..f48d6127a220 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -2174,16 +2174,16 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget)
 	return !!budget;
 }
 
-static void igc_nfc_filter_restore(struct igc_adapter *adapter)
+static void igc_restore_nfc_rules(struct igc_adapter *adapter)
 {
-	struct igc_nfc_filter *rule;
+	struct igc_nfc_rule *rule;
 
-	spin_lock(&adapter->nfc_lock);
+	spin_lock(&adapter->nfc_rule_lock);
 
-	hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node)
-		igc_add_filter(adapter, rule);
+	hlist_for_each_entry(rule, &adapter->nfc_rule_list, nfc_node)
+		igc_enable_nfc_rule(adapter, rule);
 
-	spin_unlock(&adapter->nfc_lock);
+	spin_unlock(&adapter->nfc_rule_lock);
 }
 
 static int igc_find_mac_filter(struct igc_adapter *adapter,
@@ -2537,7 +2537,7 @@ static void igc_configure(struct igc_adapter *adapter)
 	igc_setup_rctl(adapter);
 
 	igc_set_default_mac_filter(adapter);
-	igc_nfc_filter_restore(adapter);
+	igc_restore_nfc_rules(adapter);
 
 	igc_configure_tx(adapter);
 	igc_configure_rx(adapter);
@@ -3424,7 +3424,7 @@ static int igc_sw_init(struct igc_adapter *adapter)
 				VLAN_HLEN;
 	adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
 
-	spin_lock_init(&adapter->nfc_lock);
+	spin_lock_init(&adapter->nfc_rule_lock);
 	spin_lock_init(&adapter->stats64_lock);
 	/* Assume MSI-X interrupts, will be checked during IRQ allocation */
 	adapter->flags |= IGC_FLAG_HAS_MSIX;
@@ -3651,16 +3651,16 @@ void igc_update_stats(struct igc_adapter *adapter)
 	adapter->stats.mgpdc += rd32(IGC_MGTPDC);
 }
 
-static void igc_nfc_filter_exit(struct igc_adapter *adapter)
+static void igc_nfc_rule_exit(struct igc_adapter *adapter)
 {
-	struct igc_nfc_filter *rule;
+	struct igc_nfc_rule *rule;
 
-	spin_lock(&adapter->nfc_lock);
+	spin_lock(&adapter->nfc_rule_lock);
 
-	hlist_for_each_entry(rule, &adapter->nfc_filter_list, nfc_node)
-		igc_erase_filter(adapter, rule);
+	hlist_for_each_entry(rule, &adapter->nfc_rule_list, nfc_node)
+		igc_disable_nfc_rule(adapter, rule);
 
-	spin_unlock(&adapter->nfc_lock);
+	spin_unlock(&adapter->nfc_rule_lock);
 }
 
 /**
@@ -3681,7 +3681,7 @@ void igc_down(struct igc_adapter *adapter)
 	wr32(IGC_RCTL, rctl & ~IGC_RCTL_EN);
 	/* flush and sleep below */
 
-	igc_nfc_filter_exit(adapter);
+	igc_nfc_rule_exit(adapter);
 
 	/* set trans_start so we don't get spurious watchdogs during reset */
 	netif_trans_update(netdev);
@@ -3833,17 +3833,17 @@ static int igc_set_features(struct net_device *netdev,
 
 	if (!(features & NETIF_F_NTUPLE)) {
 		struct hlist_node *node2;
-		struct igc_nfc_filter *rule;
+		struct igc_nfc_rule *rule;
 
-		spin_lock(&adapter->nfc_lock);
+		spin_lock(&adapter->nfc_rule_lock);
 		hlist_for_each_entry_safe(rule, node2,
-					  &adapter->nfc_filter_list, nfc_node) {
-			igc_erase_filter(adapter, rule);
+					  &adapter->nfc_rule_list, nfc_node) {
+			igc_disable_nfc_rule(adapter, rule);
 			hlist_del(&rule->nfc_node);
 			kfree(rule);
 		}
-		spin_unlock(&adapter->nfc_lock);
-		adapter->nfc_filter_count = 0;
+		spin_unlock(&adapter->nfc_rule_lock);
+		adapter->nfc_rule_count = 0;
 	}
 
 	netdev->features = features;
-- 
cgit v1.2.3-59-g8ed1b


From c983e3271923c96cd5f90de0b580f1b210f7f8b6 Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Fri, 24 Apr 2020 13:16:11 -0700
Subject: igc: Change byte order in struct igc_nfc_filter

Every time we access the 'etype' and 'vlan_tci' fields from struct
igc_nfc_filter to enable or disable filters in hardware we have to
convert them from big endian to host order so it makes more sense to
simply have these fields in host order.

The byte order conversion should take place in igc_ethtool_get_nfc_
rule() and igc_ethtool_add_nfc_rule(), which are called by .get_rxnfc
and .set_rxnfc ethtool ops, since ethtool subsystem is the one who deals
with them in big endian order.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc.h         | 10 ++--------
 drivers/net/ethernet/intel/igc/igc_ethtool.c | 25 +++++++++++--------------
 2 files changed, 13 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index 7124ba254b89..fcc6261d7f67 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -452,16 +452,10 @@ enum igc_filter_match_flags {
 	IGC_FILTER_FLAG_DST_MAC_ADDR =	0x8,
 };
 
-/* RX network flow classification data structure */
 struct igc_nfc_filter {
-	/* Byte layout in order, all values with MSB first:
-	 * match_flags - 1 byte
-	 * etype - 2 bytes
-	 * vlan_tci - 2 bytes
-	 */
 	u8 match_flags;
-	__be16 etype;
-	__be16 vlan_tci;
+	u16 etype;
+	u16 vlan_tci;
 	u8 src_addr[ETH_ALEN];
 	u8 dst_addr[ETH_ALEN];
 };
diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index 408f4a9a199f..66e0760a8f9e 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -954,13 +954,13 @@ static int igc_ethtool_get_nfc_rule(struct igc_adapter *adapter,
 	fsp->ring_cookie = rule->action;
 
 	if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) {
-		fsp->h_u.ether_spec.h_proto = rule->filter.etype;
+		fsp->h_u.ether_spec.h_proto = htons(rule->filter.etype);
 		fsp->m_u.ether_spec.h_proto = ETHER_TYPE_FULL_MASK;
 	}
 
 	if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) {
 		fsp->flow_type |= FLOW_EXT;
-		fsp->h_ext.vlan_tci = rule->filter.vlan_tci;
+		fsp->h_ext.vlan_tci = htons(rule->filter.vlan_tci);
 		fsp->m_ext.vlan_tci = htons(VLAN_PRIO_MASK);
 	}
 
@@ -1183,9 +1183,8 @@ int igc_enable_nfc_rule(struct igc_adapter *adapter,
 	int err = -EINVAL;
 
 	if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) {
-		u16 etype = ntohs(rule->filter.etype);
-
-		err = igc_add_etype_filter(adapter, etype, rule->action);
+		err = igc_add_etype_filter(adapter, rule->filter.etype,
+					   rule->action);
 		if (err)
 			return err;
 	}
@@ -1205,8 +1204,9 @@ int igc_enable_nfc_rule(struct igc_adapter *adapter,
 	}
 
 	if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) {
-		int prio = (ntohs(rule->filter.vlan_tci) & VLAN_PRIO_MASK) >>
+		int prio = (rule->filter.vlan_tci & VLAN_PRIO_MASK) >>
 			   VLAN_PRIO_SHIFT;
+
 		err = igc_add_vlan_prio_filter(adapter, prio, rule->action);
 		if (err)
 			return err;
@@ -1218,14 +1218,11 @@ int igc_enable_nfc_rule(struct igc_adapter *adapter,
 int igc_disable_nfc_rule(struct igc_adapter *adapter,
 			 const struct igc_nfc_rule *rule)
 {
-	if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) {
-		u16 etype = ntohs(rule->filter.etype);
-
-		igc_del_etype_filter(adapter, etype);
-	}
+	if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE)
+		igc_del_etype_filter(adapter, rule->filter.etype);
 
 	if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) {
-		int prio = (ntohs(rule->filter.vlan_tci) & VLAN_PRIO_MASK) >>
+		int prio = (rule->filter.vlan_tci & VLAN_PRIO_MASK) >>
 			   VLAN_PRIO_SHIFT;
 		igc_del_vlan_prio_filter(adapter, prio);
 	}
@@ -1325,7 +1322,7 @@ static int igc_ethtool_add_nfc_rule(struct igc_adapter *adapter,
 		return -ENOMEM;
 
 	if (fsp->m_u.ether_spec.h_proto == ETHER_TYPE_FULL_MASK) {
-		rule->filter.etype = fsp->h_u.ether_spec.h_proto;
+		rule->filter.etype = ntohs(fsp->h_u.ether_spec.h_proto);
 		rule->filter.match_flags = IGC_FILTER_FLAG_ETHER_TYPE;
 	}
 
@@ -1357,7 +1354,7 @@ static int igc_ethtool_add_nfc_rule(struct igc_adapter *adapter,
 			err = -EOPNOTSUPP;
 			goto err_out;
 		}
-		rule->filter.vlan_tci = fsp->h_ext.vlan_tci;
+		rule->filter.vlan_tci = ntohs(fsp->h_ext.vlan_tci);
 		rule->filter.match_flags |= IGC_FILTER_FLAG_VLAN_TCI;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From d8bed686ab96169ac80b497d1cbed89300d97f83 Mon Sep 17 00:00:00 2001
From: Chris Mi <chrism@mellanox.com>
Date: Tue, 19 May 2020 22:45:20 +0800
Subject: net: psample: Add tunnel support

Currently, psample can only send the packet bits after decapsulation.
The tunnel information is lost. Add the tunnel support.

If the sampled packet has no tunnel info, the behavior is the same as
before. If it has, add a nested metadata field named PSAMPLE_ATTR_TUNNEL
and include the tunnel subfields if applicable.

Increase the metadata length for sampled packet with the tunnel info.
If new subfields of tunnel info should be included, update the metadata
length accordingly.

Signed-off-by: Chris Mi <chrism@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/psample.h |  22 ++++++
 net/psample/psample.c        | 157 +++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 179 insertions(+)

diff --git a/include/uapi/linux/psample.h b/include/uapi/linux/psample.h
index ce1116cff53d..aea26ab1431c 100644
--- a/include/uapi/linux/psample.h
+++ b/include/uapi/linux/psample.h
@@ -11,6 +11,7 @@ enum {
 	PSAMPLE_ATTR_GROUP_SEQ,
 	PSAMPLE_ATTR_SAMPLE_RATE,
 	PSAMPLE_ATTR_DATA,
+	PSAMPLE_ATTR_TUNNEL,
 
 	/* commands attributes */
 	PSAMPLE_ATTR_GROUP_REFCOUNT,
@@ -25,6 +26,27 @@ enum psample_command {
 	PSAMPLE_CMD_DEL_GROUP,
 };
 
+enum psample_tunnel_key_attr {
+	PSAMPLE_TUNNEL_KEY_ATTR_ID,                 /* be64 Tunnel ID */
+	PSAMPLE_TUNNEL_KEY_ATTR_IPV4_SRC,           /* be32 src IP address. */
+	PSAMPLE_TUNNEL_KEY_ATTR_IPV4_DST,           /* be32 dst IP address. */
+	PSAMPLE_TUNNEL_KEY_ATTR_TOS,                /* u8 Tunnel IP ToS. */
+	PSAMPLE_TUNNEL_KEY_ATTR_TTL,                /* u8 Tunnel IP TTL. */
+	PSAMPLE_TUNNEL_KEY_ATTR_DONT_FRAGMENT,      /* No argument, set DF. */
+	PSAMPLE_TUNNEL_KEY_ATTR_CSUM,               /* No argument. CSUM packet. */
+	PSAMPLE_TUNNEL_KEY_ATTR_OAM,                /* No argument. OAM frame.  */
+	PSAMPLE_TUNNEL_KEY_ATTR_GENEVE_OPTS,        /* Array of Geneve options. */
+	PSAMPLE_TUNNEL_KEY_ATTR_TP_SRC,	            /* be16 src Transport Port. */
+	PSAMPLE_TUNNEL_KEY_ATTR_TP_DST,		    /* be16 dst Transport Port. */
+	PSAMPLE_TUNNEL_KEY_ATTR_VXLAN_OPTS,	    /* Nested VXLAN opts* */
+	PSAMPLE_TUNNEL_KEY_ATTR_IPV6_SRC,           /* struct in6_addr src IPv6 address. */
+	PSAMPLE_TUNNEL_KEY_ATTR_IPV6_DST,           /* struct in6_addr dst IPv6 address. */
+	PSAMPLE_TUNNEL_KEY_ATTR_PAD,
+	PSAMPLE_TUNNEL_KEY_ATTR_ERSPAN_OPTS,        /* struct erspan_metadata */
+	PSAMPLE_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE,   /* No argument. IPV4_INFO_BRIDGE mode.*/
+	__PSAMPLE_TUNNEL_KEY_ATTR_MAX
+};
+
 /* Can be overridden at runtime by module option */
 #define PSAMPLE_ATTR_MAX (__PSAMPLE_ATTR_MAX - 1)
 
diff --git a/net/psample/psample.c b/net/psample/psample.c
index 6f2fbc6b9eb2..34a74043840b 100644
--- a/net/psample/psample.c
+++ b/net/psample/psample.c
@@ -14,6 +14,8 @@
 #include <net/genetlink.h>
 #include <net/psample.h>
 #include <linux/spinlock.h>
+#include <net/ip_tunnels.h>
+#include <net/dst_metadata.h>
 
 #define PSAMPLE_MAX_PACKET_SIZE 0xffff
 
@@ -207,10 +209,155 @@ void psample_group_put(struct psample_group *group)
 }
 EXPORT_SYMBOL_GPL(psample_group_put);
 
+static int __psample_ip_tun_to_nlattr(struct sk_buff *skb,
+			      struct ip_tunnel_info *tun_info)
+{
+	unsigned short tun_proto = ip_tunnel_info_af(tun_info);
+	const void *tun_opts = ip_tunnel_info_opts(tun_info);
+	const struct ip_tunnel_key *tun_key = &tun_info->key;
+	int tun_opts_len = tun_info->options_len;
+
+	if (tun_key->tun_flags & TUNNEL_KEY &&
+	    nla_put_be64(skb, PSAMPLE_TUNNEL_KEY_ATTR_ID, tun_key->tun_id,
+			 PSAMPLE_TUNNEL_KEY_ATTR_PAD))
+		return -EMSGSIZE;
+
+	if (tun_info->mode & IP_TUNNEL_INFO_BRIDGE &&
+	    nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV4_INFO_BRIDGE))
+		return -EMSGSIZE;
+
+	switch (tun_proto) {
+	case AF_INET:
+		if (tun_key->u.ipv4.src &&
+		    nla_put_in_addr(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV4_SRC,
+				    tun_key->u.ipv4.src))
+			return -EMSGSIZE;
+		if (tun_key->u.ipv4.dst &&
+		    nla_put_in_addr(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV4_DST,
+				    tun_key->u.ipv4.dst))
+			return -EMSGSIZE;
+		break;
+	case AF_INET6:
+		if (!ipv6_addr_any(&tun_key->u.ipv6.src) &&
+		    nla_put_in6_addr(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV6_SRC,
+				     &tun_key->u.ipv6.src))
+			return -EMSGSIZE;
+		if (!ipv6_addr_any(&tun_key->u.ipv6.dst) &&
+		    nla_put_in6_addr(skb, PSAMPLE_TUNNEL_KEY_ATTR_IPV6_DST,
+				     &tun_key->u.ipv6.dst))
+			return -EMSGSIZE;
+		break;
+	}
+	if (tun_key->tos &&
+	    nla_put_u8(skb, PSAMPLE_TUNNEL_KEY_ATTR_TOS, tun_key->tos))
+		return -EMSGSIZE;
+	if (nla_put_u8(skb, PSAMPLE_TUNNEL_KEY_ATTR_TTL, tun_key->ttl))
+		return -EMSGSIZE;
+	if ((tun_key->tun_flags & TUNNEL_DONT_FRAGMENT) &&
+	    nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_DONT_FRAGMENT))
+		return -EMSGSIZE;
+	if ((tun_key->tun_flags & TUNNEL_CSUM) &&
+	    nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_CSUM))
+		return -EMSGSIZE;
+	if (tun_key->tp_src &&
+	    nla_put_be16(skb, PSAMPLE_TUNNEL_KEY_ATTR_TP_SRC, tun_key->tp_src))
+		return -EMSGSIZE;
+	if (tun_key->tp_dst &&
+	    nla_put_be16(skb, PSAMPLE_TUNNEL_KEY_ATTR_TP_DST, tun_key->tp_dst))
+		return -EMSGSIZE;
+	if ((tun_key->tun_flags & TUNNEL_OAM) &&
+	    nla_put_flag(skb, PSAMPLE_TUNNEL_KEY_ATTR_OAM))
+		return -EMSGSIZE;
+	if (tun_opts_len) {
+		if (tun_key->tun_flags & TUNNEL_GENEVE_OPT &&
+		    nla_put(skb, PSAMPLE_TUNNEL_KEY_ATTR_GENEVE_OPTS,
+			    tun_opts_len, tun_opts))
+			return -EMSGSIZE;
+		else if (tun_key->tun_flags & TUNNEL_ERSPAN_OPT &&
+			 nla_put(skb, PSAMPLE_TUNNEL_KEY_ATTR_ERSPAN_OPTS,
+				 tun_opts_len, tun_opts))
+			return -EMSGSIZE;
+	}
+
+	return 0;
+}
+
+static int psample_ip_tun_to_nlattr(struct sk_buff *skb,
+			    struct ip_tunnel_info *tun_info)
+{
+	struct nlattr *nla;
+	int err;
+
+	nla = nla_nest_start_noflag(skb, PSAMPLE_ATTR_TUNNEL);
+	if (!nla)
+		return -EMSGSIZE;
+
+	err = __psample_ip_tun_to_nlattr(skb, tun_info);
+	if (err) {
+		nla_nest_cancel(skb, nla);
+		return err;
+	}
+
+	nla_nest_end(skb, nla);
+
+	return 0;
+}
+
+static int psample_tunnel_meta_len(struct ip_tunnel_info *tun_info)
+{
+	unsigned short tun_proto = ip_tunnel_info_af(tun_info);
+	const struct ip_tunnel_key *tun_key = &tun_info->key;
+	int tun_opts_len = tun_info->options_len;
+	int sum = 0;
+
+	if (tun_key->tun_flags & TUNNEL_KEY)
+		sum += nla_total_size(sizeof(u64));
+
+	if (tun_info->mode & IP_TUNNEL_INFO_BRIDGE)
+		sum += nla_total_size(0);
+
+	switch (tun_proto) {
+	case AF_INET:
+		if (tun_key->u.ipv4.src)
+			sum += nla_total_size(sizeof(u32));
+		if (tun_key->u.ipv4.dst)
+			sum += nla_total_size(sizeof(u32));
+		break;
+	case AF_INET6:
+		if (!ipv6_addr_any(&tun_key->u.ipv6.src))
+			sum += nla_total_size(sizeof(struct in6_addr));
+		if (!ipv6_addr_any(&tun_key->u.ipv6.dst))
+			sum += nla_total_size(sizeof(struct in6_addr));
+		break;
+	}
+	if (tun_key->tos)
+		sum += nla_total_size(sizeof(u8));
+	sum += nla_total_size(sizeof(u8));	/* TTL */
+	if (tun_key->tun_flags & TUNNEL_DONT_FRAGMENT)
+		sum += nla_total_size(0);
+	if (tun_key->tun_flags & TUNNEL_CSUM)
+		sum += nla_total_size(0);
+	if (tun_key->tp_src)
+		sum += nla_total_size(sizeof(u16));
+	if (tun_key->tp_dst)
+		sum += nla_total_size(sizeof(u16));
+	if (tun_key->tun_flags & TUNNEL_OAM)
+		sum += nla_total_size(0);
+	if (tun_opts_len) {
+		if (tun_key->tun_flags & TUNNEL_GENEVE_OPT)
+			sum += nla_total_size(tun_opts_len);
+		else if (tun_key->tun_flags & TUNNEL_ERSPAN_OPT)
+			sum += nla_total_size(tun_opts_len);
+	}
+
+	return sum;
+}
+
 void psample_sample_packet(struct psample_group *group, struct sk_buff *skb,
 			   u32 trunc_size, int in_ifindex, int out_ifindex,
 			   u32 sample_rate)
 {
+	struct ip_tunnel_info *tun_info;
 	struct sk_buff *nl_skb;
 	int data_len;
 	int meta_len;
@@ -224,6 +371,10 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb,
 		   nla_total_size(sizeof(u32)) +	/* group_num */
 		   nla_total_size(sizeof(u32));		/* seq */
 
+	tun_info = skb_tunnel_info(skb);
+	if (tun_info)
+		meta_len += psample_tunnel_meta_len(tun_info);
+
 	data_len = min(skb->len, trunc_size);
 	if (meta_len + nla_total_size(data_len) > PSAMPLE_MAX_PACKET_SIZE)
 		data_len = PSAMPLE_MAX_PACKET_SIZE - meta_len - NLA_HDRLEN
@@ -278,6 +429,12 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb,
 			goto error;
 	}
 
+	if (tun_info) {
+		ret = psample_ip_tun_to_nlattr(nl_skb, tun_info);
+		if (unlikely(ret < 0))
+			goto error;
+	}
+
 	genlmsg_end(nl_skb, data);
 	genlmsg_multicast_netns(&psample_nl_family, group->net, nl_skb, 0,
 				PSAMPLE_NL_MCGRP_SAMPLE, GFP_ATOMIC);
-- 
cgit v1.2.3-59-g8ed1b


From b8204ad878ce7f49870669283de348e458a97ac4 Mon Sep 17 00:00:00 2001
From: Yuval Basson <ybason@marvell.com>
Date: Tue, 19 May 2020 23:51:25 +0300
Subject: qed: changes to ILT to support XRC

First ILT page for TSDM client is allocated for XRC-SRQ's.
For regular SRQ's skip first ILT page that is reserved for
XRC-SRQ's.

Signed-off-by: Michal Kalderon <mkalderon@marvell.com>
Signed-off-by: Yuval Bason <ybason@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_cxt.c  | 60 +++++++++++++++++++++++++-----
 drivers/net/ethernet/qlogic/qed/qed_cxt.h  | 10 ++++-
 drivers/net/ethernet/qlogic/qed/qed_dev.c  |  6 ++-
 drivers/net/ethernet/qlogic/qed/qed_rdma.c |  2 +-
 4 files changed, 64 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.c b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
index 1a636bad717d..7b76667acaba 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_cxt.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.c
@@ -110,6 +110,7 @@ struct src_ent {
 	ALIGNED_TYPE_SIZE(union conn_context, p_hwfn)
 
 #define SRQ_CXT_SIZE (sizeof(struct rdma_srq_context))
+#define XRC_SRQ_CXT_SIZE (sizeof(struct rdma_xrc_srq_context))
 
 #define TYPE0_TASK_CXT_SIZE(p_hwfn) \
 	ALIGNED_TYPE_SIZE(union type0_task_context, p_hwfn)
@@ -293,18 +294,40 @@ static struct qed_tid_seg *qed_cxt_tid_seg_info(struct qed_hwfn *p_hwfn,
 	return NULL;
 }
 
-static void qed_cxt_set_srq_count(struct qed_hwfn *p_hwfn, u32 num_srqs)
+static void qed_cxt_set_srq_count(struct qed_hwfn *p_hwfn,
+				  u32 num_srqs, u32 num_xrc_srqs)
 {
 	struct qed_cxt_mngr *p_mgr = p_hwfn->p_cxt_mngr;
 
 	p_mgr->srq_count = num_srqs;
+	p_mgr->xrc_srq_count = num_xrc_srqs;
 }
 
-u32 qed_cxt_get_srq_count(struct qed_hwfn *p_hwfn)
+u32 qed_cxt_get_ilt_page_size(struct qed_hwfn *p_hwfn,
+			      enum ilt_clients ilt_client)
+{
+	struct qed_cxt_mngr *p_mngr = p_hwfn->p_cxt_mngr;
+	struct qed_ilt_client_cfg *p_cli = &p_mngr->clients[ilt_client];
+
+	return ILT_PAGE_IN_BYTES(p_cli->p_size.val);
+}
+
+static u32 qed_cxt_xrc_srqs_per_page(struct qed_hwfn *p_hwfn)
+{
+	u32 page_size;
+
+	page_size = qed_cxt_get_ilt_page_size(p_hwfn, ILT_CLI_TSDM);
+	return page_size / XRC_SRQ_CXT_SIZE;
+}
+
+u32 qed_cxt_get_total_srq_count(struct qed_hwfn *p_hwfn)
 {
 	struct qed_cxt_mngr *p_mgr = p_hwfn->p_cxt_mngr;
+	u32 total_srqs;
+
+	total_srqs = p_mgr->srq_count + p_mgr->xrc_srq_count;
 
-	return p_mgr->srq_count;
+	return total_srqs;
 }
 
 /* set the iids count per protocol */
@@ -692,7 +715,7 @@ int qed_cxt_cfg_ilt_compute(struct qed_hwfn *p_hwfn, u32 *line_count)
 	}
 
 	/* TSDM (SRQ CONTEXT) */
-	total = qed_cxt_get_srq_count(p_hwfn);
+	total = qed_cxt_get_total_srq_count(p_hwfn);
 
 	if (total) {
 		p_cli = qed_cxt_set_cli(&p_mngr->clients[ILT_CLI_TSDM]);
@@ -1962,11 +1985,9 @@ static void qed_rdma_set_pf_params(struct qed_hwfn *p_hwfn,
 				   struct qed_rdma_pf_params *p_params,
 				   u32 num_tasks)
 {
-	u32 num_cons, num_qps, num_srqs;
+	u32 num_cons, num_qps;
 	enum protocol_type proto;
 
-	num_srqs = min_t(u32, QED_RDMA_MAX_SRQS, p_params->num_srqs);
-
 	if (p_hwfn->mcp_info->func_info.protocol == QED_PCI_ETH_RDMA) {
 		DP_NOTICE(p_hwfn,
 			  "Current day drivers don't support RoCE & iWARP simultaneously on the same PF. Default to RoCE-only\n");
@@ -1989,6 +2010,8 @@ static void qed_rdma_set_pf_params(struct qed_hwfn *p_hwfn,
 	}
 
 	if (num_cons && num_tasks) {
+		u32 num_srqs, num_xrc_srqs;
+
 		qed_cxt_set_proto_cid_count(p_hwfn, proto, num_cons, 0);
 
 		/* Deliberatly passing ROCE for tasks id. This is because
@@ -1997,7 +2020,13 @@ static void qed_rdma_set_pf_params(struct qed_hwfn *p_hwfn,
 		qed_cxt_set_proto_tid_count(p_hwfn, PROTOCOLID_ROCE,
 					    QED_CXT_ROCE_TID_SEG, 1,
 					    num_tasks, false);
-		qed_cxt_set_srq_count(p_hwfn, num_srqs);
+
+		num_srqs = min_t(u32, QED_RDMA_MAX_SRQS, p_params->num_srqs);
+
+		/* XRC SRQs populate a single ILT page */
+		num_xrc_srqs = qed_cxt_xrc_srqs_per_page(p_hwfn);
+
+		qed_cxt_set_srq_count(p_hwfn, num_srqs, num_xrc_srqs);
 	} else {
 		DP_INFO(p_hwfn->cdev,
 			"RDMA personality used without setting params!\n");
@@ -2163,10 +2192,17 @@ qed_cxt_dynamic_ilt_alloc(struct qed_hwfn *p_hwfn,
 		p_blk = &p_cli->pf_blks[CDUC_BLK];
 		break;
 	case QED_ELEM_SRQ:
+		/* The first ILT page is not used for regular SRQs. Skip it. */
+		iid += p_hwfn->p_cxt_mngr->xrc_srq_count;
 		p_cli = &p_hwfn->p_cxt_mngr->clients[ILT_CLI_TSDM];
 		elem_size = SRQ_CXT_SIZE;
 		p_blk = &p_cli->pf_blks[SRQ_BLK];
 		break;
+	case QED_ELEM_XRC_SRQ:
+		p_cli = &p_hwfn->p_cxt_mngr->clients[ILT_CLI_TSDM];
+		elem_size = XRC_SRQ_CXT_SIZE;
+		p_blk = &p_cli->pf_blks[SRQ_BLK];
+		break;
 	case QED_ELEM_TASK:
 		p_cli = &p_hwfn->p_cxt_mngr->clients[ILT_CLI_CDUT];
 		elem_size = TYPE1_TASK_CXT_SIZE(p_hwfn);
@@ -2386,8 +2422,12 @@ int qed_cxt_free_proto_ilt(struct qed_hwfn *p_hwfn, enum protocol_type proto)
 		return rc;
 
 	/* Free TSDM CXT */
-	rc = qed_cxt_free_ilt_range(p_hwfn, QED_ELEM_SRQ, 0,
-				    qed_cxt_get_srq_count(p_hwfn));
+	rc = qed_cxt_free_ilt_range(p_hwfn, QED_ELEM_XRC_SRQ, 0,
+				    p_hwfn->p_cxt_mngr->xrc_srq_count);
+
+	rc = qed_cxt_free_ilt_range(p_hwfn, QED_ELEM_SRQ,
+				    p_hwfn->p_cxt_mngr->xrc_srq_count,
+				    p_hwfn->p_cxt_mngr->srq_count);
 
 	return rc;
 }
diff --git a/drivers/net/ethernet/qlogic/qed/qed_cxt.h b/drivers/net/ethernet/qlogic/qed/qed_cxt.h
index c4e815f6cabd..ce08ae8d8498 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_cxt.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_cxt.h
@@ -82,7 +82,8 @@ int qed_cxt_get_tid_mem_info(struct qed_hwfn *p_hwfn,
 enum qed_cxt_elem_type {
 	QED_ELEM_CXT,
 	QED_ELEM_SRQ,
-	QED_ELEM_TASK
+	QED_ELEM_TASK,
+	QED_ELEM_XRC_SRQ,
 };
 
 u32 qed_cxt_get_proto_cid_count(struct qed_hwfn *p_hwfn,
@@ -235,7 +236,6 @@ u32 qed_cxt_get_proto_tid_count(struct qed_hwfn *p_hwfn,
 				enum protocol_type type);
 u32 qed_cxt_get_proto_cid_start(struct qed_hwfn *p_hwfn,
 				enum protocol_type type);
-u32 qed_cxt_get_srq_count(struct qed_hwfn *p_hwfn);
 int qed_cxt_free_proto_ilt(struct qed_hwfn *p_hwfn, enum protocol_type proto);
 
 #define QED_CTX_WORKING_MEM 0
@@ -358,6 +358,7 @@ struct qed_cxt_mngr {
 
 	/* total number of SRQ's for this hwfn */
 	u32 srq_count;
+	u32 xrc_srq_count;
 
 	/* Maximal number of L2 steering filters */
 	u32 arfs_count;
@@ -372,4 +373,9 @@ u16 qed_get_cdut_num_vf_init_pages(struct qed_hwfn *p_hwfn);
 u16 qed_get_cdut_num_pf_work_pages(struct qed_hwfn *p_hwfn);
 u16 qed_get_cdut_num_vf_work_pages(struct qed_hwfn *p_hwfn);
 
+u32 qed_cxt_get_ilt_page_size(struct qed_hwfn *p_hwfn,
+			      enum ilt_clients ilt_client);
+
+u32 qed_cxt_get_total_srq_count(struct qed_hwfn *p_hwfn);
+
 #endif
diff --git a/drivers/net/ethernet/qlogic/qed/qed_dev.c b/drivers/net/ethernet/qlogic/qed/qed_dev.c
index 6e857468e993..1eebf30fa798 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_dev.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_dev.c
@@ -2269,6 +2269,7 @@ int qed_resc_alloc(struct qed_dev *cdev)
 		/* EQ */
 		n_eqes = qed_chain_get_capacity(&p_hwfn->p_spq->chain);
 		if (QED_IS_RDMA_PERSONALITY(p_hwfn)) {
+			u32 n_srq = qed_cxt_get_total_srq_count(p_hwfn);
 			enum protocol_type rdma_proto;
 
 			if (QED_IS_ROCE_PERSONALITY(p_hwfn))
@@ -2279,7 +2280,10 @@ int qed_resc_alloc(struct qed_dev *cdev)
 			num_cons = qed_cxt_get_proto_cid_count(p_hwfn,
 							       rdma_proto,
 							       NULL) * 2;
-			n_eqes += num_cons + 2 * MAX_NUM_VFS_BB;
+			/* EQ should be able to get events from all SRQ's
+			 * at the same time
+			 */
+			n_eqes += num_cons + 2 * MAX_NUM_VFS_BB + n_srq;
 		} else if (p_hwfn->hw_info.personality == QED_PCI_ISCSI) {
 			num_cons =
 			    qed_cxt_get_proto_cid_count(p_hwfn,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
index 38b1f402f7ed..415f3f3d2152 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
@@ -272,7 +272,7 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn)
 	}
 
 	/* Allocate bitmap for srqs */
-	p_rdma_info->num_srqs = qed_cxt_get_srq_count(p_hwfn);
+	p_rdma_info->num_srqs = p_hwfn->p_cxt_mngr->srq_count;
 	rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->srq_map,
 				 p_rdma_info->num_srqs, "SRQ");
 	if (rc) {
-- 
cgit v1.2.3-59-g8ed1b


From 7bfb399eca460500f048098bf427c45b40e17cae Mon Sep 17 00:00:00 2001
From: Yuval Basson <ybason@marvell.com>
Date: Tue, 19 May 2020 23:51:26 +0300
Subject: qed: Add XRC to RoCE

Add support for XRC-SRQ's and XRC-QP's for upper layer driver.

We maintain separate bitmaps for resource management for srq and
xrc-srq, However, the range in FW is one, The xrc-srq's are first
and then the srq's follow. Therefore we maintain a srq-id offset.

v2: perform cleanups if XRC bitmpas allocation fail.

Signed-off-by: Michal Kalderon <mkalderon@marvell.com>
Signed-off-by: Yuval Bason <ybason@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_rdma.c | 145 +++++++++++++++++++++++++----
 drivers/net/ethernet/qlogic/qed/qed_rdma.h |  19 ++++
 drivers/net/ethernet/qlogic/qed/qed_roce.c |  29 ++++++
 include/linux/qed/qed_rdma_if.h            |  19 ++++
 4 files changed, 194 insertions(+), 18 deletions(-)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
index 415f3f3d2152..50985871cd3d 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
@@ -212,13 +212,22 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn)
 		goto free_rdma_port;
 	}
 
+	/* Allocate bit map for XRC Domains */
+	rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->xrcd_map,
+				 QED_RDMA_MAX_XRCDS, "XRCD");
+	if (rc) {
+		DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+			   "Failed to allocate xrcd_map,rc = %d\n", rc);
+		goto free_pd_map;
+	}
+
 	/* Allocate DPI bitmap */
 	rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->dpi_map,
 				 p_hwfn->dpi_count, "DPI");
 	if (rc) {
 		DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
 			   "Failed to allocate DPI bitmap, rc = %d\n", rc);
-		goto free_pd_map;
+		goto free_xrcd_map;
 	}
 
 	/* Allocate bitmap for cq's. The maximum number of CQs is bound to
@@ -271,6 +280,19 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn)
 		goto free_cid_map;
 	}
 
+	/* The first SRQ follows the last XRC SRQ. This means that the
+	 * SRQ IDs start from an offset equals to max_xrc_srqs.
+	 */
+	p_rdma_info->srq_id_offset = p_hwfn->p_cxt_mngr->xrc_srq_count;
+	rc = qed_rdma_bmap_alloc(p_hwfn,
+				 &p_rdma_info->xrc_srq_map,
+				 p_hwfn->p_cxt_mngr->xrc_srq_count, "XRC SRQ");
+	if (rc) {
+		DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+			   "Failed to allocate xrc srq bitmap, rc = %d\n", rc);
+		goto free_real_cid_map;
+	}
+
 	/* Allocate bitmap for srqs */
 	p_rdma_info->num_srqs = p_hwfn->p_cxt_mngr->srq_count;
 	rc = qed_rdma_bmap_alloc(p_hwfn, &p_rdma_info->srq_map,
@@ -278,7 +300,7 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn)
 	if (rc) {
 		DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
 			   "Failed to allocate srq bitmap, rc = %d\n", rc);
-		goto free_real_cid_map;
+		goto free_xrc_srq_map;
 	}
 
 	if (QED_IS_IWARP_PERSONALITY(p_hwfn))
@@ -292,6 +314,8 @@ static int qed_rdma_alloc(struct qed_hwfn *p_hwfn)
 
 free_srq_map:
 	kfree(p_rdma_info->srq_map.bitmap);
+free_xrc_srq_map:
+	kfree(p_rdma_info->xrc_srq_map.bitmap);
 free_real_cid_map:
 	kfree(p_rdma_info->real_cid_map.bitmap);
 free_cid_map:
@@ -304,6 +328,8 @@ free_cq_map:
 	kfree(p_rdma_info->cq_map.bitmap);
 free_dpi_map:
 	kfree(p_rdma_info->dpi_map.bitmap);
+free_xrcd_map:
+	kfree(p_rdma_info->xrcd_map.bitmap);
 free_pd_map:
 	kfree(p_rdma_info->pd_map.bitmap);
 free_rdma_port:
@@ -377,6 +403,7 @@ static void qed_rdma_resc_free(struct qed_hwfn *p_hwfn)
 	qed_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->tid_map, 1);
 	qed_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->srq_map, 1);
 	qed_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->real_cid_map, 1);
+	qed_rdma_bmap_free(p_hwfn, &p_hwfn->p_rdma_info->xrc_srq_map, 1);
 
 	kfree(p_rdma_info->port);
 	kfree(p_rdma_info->dev);
@@ -612,7 +639,10 @@ static int qed_rdma_start_fw(struct qed_hwfn *p_hwfn,
 	p_params_header->cnq_start_offset = (u8)RESC_START(p_hwfn,
 							   QED_RDMA_CNQ_RAM);
 	p_params_header->num_cnqs = params->desired_cnq;
-
+	p_params_header->first_reg_srq_id =
+	    cpu_to_le16(p_hwfn->p_rdma_info->srq_id_offset);
+	p_params_header->reg_srq_base_addr =
+	    cpu_to_le32(qed_cxt_get_ilt_page_size(p_hwfn, ILT_CLI_TSDM));
 	if (params->cq_mode == QED_RDMA_CQ_MODE_16_BITS)
 		p_params_header->cq_ring_mode = 1;
 	else
@@ -983,6 +1013,41 @@ static void qed_rdma_free_pd(void *rdma_cxt, u16 pd)
 	spin_unlock_bh(&p_hwfn->p_rdma_info->lock);
 }
 
+static int qed_rdma_alloc_xrcd(void *rdma_cxt, u16 *xrcd_id)
+{
+	struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
+	u32 returned_id;
+	int rc;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Alloc XRCD\n");
+
+	spin_lock_bh(&p_hwfn->p_rdma_info->lock);
+	rc = qed_rdma_bmap_alloc_id(p_hwfn,
+				    &p_hwfn->p_rdma_info->xrcd_map,
+				    &returned_id);
+	spin_unlock_bh(&p_hwfn->p_rdma_info->lock);
+	if (rc) {
+		DP_NOTICE(p_hwfn, "Failed in allocating xrcd id\n");
+		return rc;
+	}
+
+	*xrcd_id = (u16)returned_id;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "Alloc XRCD - done, rc = %d\n", rc);
+	return rc;
+}
+
+static void qed_rdma_free_xrcd(void *rdma_cxt, u16 xrcd_id)
+{
+	struct qed_hwfn *p_hwfn = (struct qed_hwfn *)rdma_cxt;
+
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "xrcd_id = %08x\n", xrcd_id);
+
+	spin_lock_bh(&p_hwfn->p_rdma_info->lock);
+	qed_bmap_release_id(p_hwfn, &p_hwfn->p_rdma_info->xrcd_map, xrcd_id);
+	spin_unlock_bh(&p_hwfn->p_rdma_info->lock);
+}
+
 static enum qed_rdma_toggle_bit
 qed_rdma_toggle_bit_create_resize_cq(struct qed_hwfn *p_hwfn, u16 icid)
 {
@@ -1306,6 +1371,8 @@ qed_rdma_create_qp(void *rdma_cxt,
 	qp->resp_offloaded = false;
 	qp->e2e_flow_control_en = qp->use_srq ? false : true;
 	qp->stats_queue = in_params->stats_queue;
+	qp->qp_type = in_params->qp_type;
+	qp->xrcd_id = in_params->xrcd_id;
 
 	if (QED_IS_IWARP_PERSONALITY(p_hwfn)) {
 		rc = qed_iwarp_create_qp(p_hwfn, qp, out_params);
@@ -1418,6 +1485,18 @@ static int qed_rdma_modify_qp(void *rdma_cxt,
 			   qp->cur_state);
 	}
 
+	switch (qp->qp_type) {
+	case QED_RDMA_QP_TYPE_XRC_INI:
+		qp->has_req = 1;
+		break;
+	case QED_RDMA_QP_TYPE_XRC_TGT:
+		qp->has_resp = 1;
+		break;
+	default:
+		qp->has_req = 1;
+		qp->has_resp = 1;
+	}
+
 	if (QED_IS_IWARP_PERSONALITY(p_hwfn)) {
 		enum qed_iwarp_qp_state new_state =
 		    qed_roce2iwarp_state(qp->cur_state);
@@ -1657,6 +1736,15 @@ static void *qed_rdma_get_rdma_ctx(struct qed_dev *cdev)
 	return QED_AFFIN_HWFN(cdev);
 }
 
+static struct qed_bmap *qed_rdma_get_srq_bmap(struct qed_hwfn *p_hwfn,
+					      bool is_xrc)
+{
+	if (is_xrc)
+		return &p_hwfn->p_rdma_info->xrc_srq_map;
+
+	return &p_hwfn->p_rdma_info->srq_map;
+}
+
 static int qed_rdma_modify_srq(void *rdma_cxt,
 			       struct qed_rdma_modify_srq_in_params *in_params)
 {
@@ -1686,8 +1774,8 @@ static int qed_rdma_modify_srq(void *rdma_cxt,
 	if (rc)
 		return rc;
 
-	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "modified SRQ id = %x",
-		   in_params->srq_id);
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "modified SRQ id = %x, is_xrc=%u\n",
+		   in_params->srq_id, in_params->is_xrc);
 
 	return rc;
 }
@@ -1702,6 +1790,7 @@ qed_rdma_destroy_srq(void *rdma_cxt,
 	struct qed_spq_entry *p_ent;
 	struct qed_bmap *bmap;
 	u16 opaque_fid;
+	u16 offset;
 	int rc;
 
 	opaque_fid = p_hwfn->hw_info.opaque_fid;
@@ -1723,14 +1812,16 @@ qed_rdma_destroy_srq(void *rdma_cxt,
 	if (rc)
 		return rc;
 
-	bmap = &p_hwfn->p_rdma_info->srq_map;
+	bmap = qed_rdma_get_srq_bmap(p_hwfn, in_params->is_xrc);
+	offset = (in_params->is_xrc) ? 0 : p_hwfn->p_rdma_info->srq_id_offset;
 
 	spin_lock_bh(&p_hwfn->p_rdma_info->lock);
-	qed_bmap_release_id(p_hwfn, bmap, in_params->srq_id);
+	qed_bmap_release_id(p_hwfn, bmap, in_params->srq_id - offset);
 	spin_unlock_bh(&p_hwfn->p_rdma_info->lock);
 
-	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "SRQ destroyed Id = %x",
-		   in_params->srq_id);
+	DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
+		   "XRC/SRQ destroyed Id = %x, is_xrc=%u\n",
+		   in_params->srq_id, in_params->is_xrc);
 
 	return rc;
 }
@@ -1748,24 +1839,26 @@ qed_rdma_create_srq(void *rdma_cxt,
 	u16 opaque_fid, srq_id;
 	struct qed_bmap *bmap;
 	u32 returned_id;
+	u16 offset;
 	int rc;
 
-	bmap = &p_hwfn->p_rdma_info->srq_map;
+	bmap = qed_rdma_get_srq_bmap(p_hwfn, in_params->is_xrc);
 	spin_lock_bh(&p_hwfn->p_rdma_info->lock);
 	rc = qed_rdma_bmap_alloc_id(p_hwfn, bmap, &returned_id);
 	spin_unlock_bh(&p_hwfn->p_rdma_info->lock);
 
 	if (rc) {
-		DP_NOTICE(p_hwfn, "failed to allocate srq id\n");
+		DP_NOTICE(p_hwfn,
+			  "failed to allocate xrc/srq id (is_xrc=%u)\n",
+			  in_params->is_xrc);
 		return rc;
 	}
 
-	elem_type = QED_ELEM_SRQ;
+	elem_type = (in_params->is_xrc) ? (QED_ELEM_XRC_SRQ) : (QED_ELEM_SRQ);
 	rc = qed_cxt_dynamic_ilt_alloc(p_hwfn, elem_type, returned_id);
 	if (rc)
 		goto err;
-	/* returned id is no greater than u16 */
-	srq_id = (u16)returned_id;
+
 	opaque_fid = p_hwfn->hw_info.opaque_fid;
 
 	opaque_fid = p_hwfn->hw_info.opaque_fid;
@@ -1782,20 +1875,34 @@ qed_rdma_create_srq(void *rdma_cxt,
 	DMA_REGPAIR_LE(p_ramrod->pbl_base_addr, in_params->pbl_base_addr);
 	p_ramrod->pages_in_srq_pbl = cpu_to_le16(in_params->num_pages);
 	p_ramrod->pd_id = cpu_to_le16(in_params->pd_id);
-	p_ramrod->srq_id.srq_idx = cpu_to_le16(srq_id);
 	p_ramrod->srq_id.opaque_fid = cpu_to_le16(opaque_fid);
 	p_ramrod->page_size = cpu_to_le16(in_params->page_size);
 	DMA_REGPAIR_LE(p_ramrod->producers_addr, in_params->prod_pair_addr);
+	offset = (in_params->is_xrc) ? 0 : p_hwfn->p_rdma_info->srq_id_offset;
+	srq_id = (u16)returned_id + offset;
+	p_ramrod->srq_id.srq_idx = cpu_to_le16(srq_id);
 
+	if (in_params->is_xrc) {
+		SET_FIELD(p_ramrod->flags,
+			  RDMA_SRQ_CREATE_RAMROD_DATA_XRC_FLAG, 1);
+		SET_FIELD(p_ramrod->flags,
+			  RDMA_SRQ_CREATE_RAMROD_DATA_RESERVED_KEY_EN,
+			  in_params->reserved_key_en);
+		p_ramrod->xrc_srq_cq_cid =
+			cpu_to_le32((p_hwfn->hw_info.opaque_fid << 16) |
+				     in_params->cq_cid);
+		p_ramrod->xrc_domain = cpu_to_le16(in_params->xrcd_id);
+	}
 	rc = qed_spq_post(p_hwfn, p_ent, NULL);
 	if (rc)
 		goto err;
 
 	out_params->srq_id = srq_id;
 
-	DP_VERBOSE(p_hwfn, QED_MSG_RDMA,
-		   "SRQ created Id = %x\n", out_params->srq_id);
-
+	DP_VERBOSE(p_hwfn,
+		   QED_MSG_RDMA,
+		   "XRC/SRQ created Id = %x (is_xrc=%u)\n",
+		   out_params->srq_id, in_params->is_xrc);
 	return rc;
 
 err:
@@ -1961,6 +2068,8 @@ static const struct qed_rdma_ops qed_rdma_ops_pass = {
 	.rdma_cnq_prod_update = &qed_rdma_cnq_prod_update,
 	.rdma_alloc_pd = &qed_rdma_alloc_pd,
 	.rdma_dealloc_pd = &qed_rdma_free_pd,
+	.rdma_alloc_xrcd = &qed_rdma_alloc_xrcd,
+	.rdma_dealloc_xrcd = &qed_rdma_free_xrcd,
 	.rdma_create_cq = &qed_rdma_create_cq,
 	.rdma_destroy_cq = &qed_rdma_destroy_cq,
 	.rdma_create_qp = &qed_rdma_create_qp,
diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.h b/drivers/net/ethernet/qlogic/qed/qed_rdma.h
index 3689fe3e5935..5a7ebc764bb6 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_rdma.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.h
@@ -63,6 +63,11 @@
 #define QED_RDMA_MAX_CQE_32_BIT             (0x7FFFFFFF - 1)
 #define QED_RDMA_MAX_CQE_16_BIT             (0x7FFF - 1)
 
+/* Up to 2^16 XRC Domains are supported, but the actual number of supported XRC
+ * SRQs is much smaller so there's no need to have that many domains.
+ */
+#define QED_RDMA_MAX_XRCDS      (roundup_pow_of_two(RDMA_MAX_XRC_SRQS))
+
 enum qed_rdma_toggle_bit {
 	QED_RDMA_TOGGLE_BIT_CLEAR = 0,
 	QED_RDMA_TOGGLE_BIT_SET = 1
@@ -81,9 +86,11 @@ struct qed_rdma_info {
 
 	struct qed_bmap cq_map;
 	struct qed_bmap pd_map;
+	struct qed_bmap xrcd_map;
 	struct qed_bmap tid_map;
 	struct qed_bmap qp_map;
 	struct qed_bmap srq_map;
+	struct qed_bmap xrc_srq_map;
 	struct qed_bmap cid_map;
 	struct qed_bmap tcp_cid_map;
 	struct qed_bmap real_cid_map;
@@ -111,6 +118,7 @@ struct qed_rdma_qp {
 	u32 qpid;
 	u16 icid;
 	enum qed_roce_qp_state cur_state;
+	enum qed_rdma_qp_type qp_type;
 	enum qed_iwarp_qp_state iwarp_state;
 	bool use_srq;
 	bool signal_all;
@@ -153,18 +161,21 @@ struct qed_rdma_qp {
 	dma_addr_t orq_phys_addr;
 	u8 orq_num_pages;
 	bool req_offloaded;
+	bool has_req;
 
 	/* responder */
 	u8 max_rd_atomic_resp;
 	u32 rq_psn;
 	u16 rq_cq_id;
 	u16 rq_num_pages;
+	u16 xrcd_id;
 	dma_addr_t rq_pbl_ptr;
 	void *irq;
 	dma_addr_t irq_phys_addr;
 	u8 irq_num_pages;
 	bool resp_offloaded;
 	u32 cq_prod;
+	bool has_resp;
 
 	u8 remote_mac_addr[6];
 	u8 local_mac_addr[6];
@@ -174,6 +185,14 @@ struct qed_rdma_qp {
 	struct qed_iwarp_ep *ep;
 };
 
+static inline bool qed_rdma_is_xrc_qp(struct qed_rdma_qp *qp)
+{
+	if (qp->qp_type == QED_RDMA_QP_TYPE_XRC_TGT ||
+	    qp->qp_type == QED_RDMA_QP_TYPE_XRC_INI)
+		return true;
+
+	return false;
+}
 #if IS_ENABLED(CONFIG_QED_RDMA)
 void qed_rdma_dpm_bar(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
 void qed_rdma_dpm_conf(struct qed_hwfn *p_hwfn, struct qed_ptt *p_ptt);
diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c
index 475b89903f46..46a4d09eacef 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_roce.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c
@@ -254,6 +254,9 @@ static int qed_roce_sp_create_responder(struct qed_hwfn *p_hwfn,
 	int rc;
 	u8 tc;
 
+	if (!qp->has_resp)
+		return 0;
+
 	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid);
 
 	/* Allocate DMA-able memory for IRQ */
@@ -315,6 +318,10 @@ static int qed_roce_sp_create_responder(struct qed_hwfn *p_hwfn,
 		  ROCE_CREATE_QP_RESP_RAMROD_DATA_MIN_RNR_NAK_TIMER,
 		  qp->min_rnr_nak_timer);
 
+	SET_FIELD(p_ramrod->flags,
+		  ROCE_CREATE_QP_RESP_RAMROD_DATA_XRC_FLAG,
+		  qed_rdma_is_xrc_qp(qp));
+
 	p_ramrod->max_ird = qp->max_rd_atomic_resp;
 	p_ramrod->traffic_class = qp->traffic_class_tos;
 	p_ramrod->hop_limit = qp->hop_limit_ttl;
@@ -335,6 +342,7 @@ static int qed_roce_sp_create_responder(struct qed_hwfn *p_hwfn,
 	p_ramrod->qp_handle_for_cqe.lo = cpu_to_le32(qp->qp_handle.lo);
 	p_ramrod->cq_cid = cpu_to_le32((p_hwfn->hw_info.opaque_fid << 16) |
 				       qp->rq_cq_id);
+	p_ramrod->xrc_domain = cpu_to_le16(qp->xrcd_id);
 
 	tc = qed_roce_get_qp_tc(p_hwfn, qp);
 	regular_latency_queue = qed_get_cm_pq_idx_ofld_mtc(p_hwfn, tc);
@@ -395,6 +403,9 @@ static int qed_roce_sp_create_requester(struct qed_hwfn *p_hwfn,
 	int rc;
 	u8 tc;
 
+	if (!qp->has_req)
+		return 0;
+
 	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid);
 
 	/* Allocate DMA-able memory for ORQ */
@@ -444,6 +455,10 @@ static int qed_roce_sp_create_requester(struct qed_hwfn *p_hwfn,
 		  ROCE_CREATE_QP_REQ_RAMROD_DATA_RNR_NAK_CNT,
 		  qp->rnr_retry_cnt);
 
+	SET_FIELD(p_ramrod->flags,
+		  ROCE_CREATE_QP_REQ_RAMROD_DATA_XRC_FLAG,
+		  qed_rdma_is_xrc_qp(qp));
+
 	p_ramrod->max_ord = qp->max_rd_atomic_req;
 	p_ramrod->traffic_class = qp->traffic_class_tos;
 	p_ramrod->hop_limit = qp->hop_limit_ttl;
@@ -517,6 +532,9 @@ static int qed_roce_sp_modify_responder(struct qed_hwfn *p_hwfn,
 	struct qed_spq_entry *p_ent;
 	int rc;
 
+	if (!qp->has_resp)
+		return 0;
+
 	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid);
 
 	if (move_to_err && !qp->resp_offloaded)
@@ -611,6 +629,9 @@ static int qed_roce_sp_modify_requester(struct qed_hwfn *p_hwfn,
 	struct qed_spq_entry *p_ent;
 	int rc;
 
+	if (!qp->has_req)
+		return 0;
+
 	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid);
 
 	if (move_to_err && !(qp->req_offloaded))
@@ -705,6 +726,11 @@ static int qed_roce_sp_destroy_qp_responder(struct qed_hwfn *p_hwfn,
 	dma_addr_t ramrod_res_phys;
 	int rc;
 
+	if (!qp->has_resp) {
+		*cq_prod = 0;
+		return 0;
+	}
+
 	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid);
 	*cq_prod = qp->cq_prod;
 
@@ -785,6 +811,9 @@ static int qed_roce_sp_destroy_qp_requester(struct qed_hwfn *p_hwfn,
 	dma_addr_t ramrod_res_phys;
 	int rc = -ENOMEM;
 
+	if (!qp->has_req)
+		return 0;
+
 	DP_VERBOSE(p_hwfn, QED_MSG_RDMA, "icid = %08x\n", qp->icid);
 
 	if (!qp->req_offloaded)
diff --git a/include/linux/qed/qed_rdma_if.h b/include/linux/qed/qed_rdma_if.h
index 74efca15fde7..f93edd5750a5 100644
--- a/include/linux/qed/qed_rdma_if.h
+++ b/include/linux/qed/qed_rdma_if.h
@@ -53,6 +53,13 @@ enum qed_roce_qp_state {
 	QED_ROCE_QP_STATE_SQE
 };
 
+enum qed_rdma_qp_type {
+	QED_RDMA_QP_TYPE_RC,
+	QED_RDMA_QP_TYPE_XRC_INI,
+	QED_RDMA_QP_TYPE_XRC_TGT,
+	QED_RDMA_QP_TYPE_INVAL = 0xffff,
+};
+
 enum qed_rdma_tid_type {
 	QED_RDMA_TID_REGISTERED_MR,
 	QED_RDMA_TID_FMR,
@@ -291,6 +298,12 @@ struct qed_rdma_create_srq_in_params {
 	u16 num_pages;
 	u16 pd_id;
 	u16 page_size;
+
+	/* XRC related only */
+	bool reserved_key_en;
+	bool is_xrc;
+	u32 cq_cid;
+	u16 xrcd_id;
 };
 
 struct qed_rdma_destroy_cq_in_params {
@@ -319,7 +332,9 @@ struct qed_rdma_create_qp_in_params {
 	u16 rq_num_pages;
 	u64 rq_pbl_ptr;
 	u16 srq_id;
+	u16 xrcd_id;
 	u8 stats_queue;
+	enum qed_rdma_qp_type qp_type;
 };
 
 struct qed_rdma_create_qp_out_params {
@@ -429,11 +444,13 @@ struct qed_rdma_create_srq_out_params {
 
 struct qed_rdma_destroy_srq_in_params {
 	u16 srq_id;
+	bool is_xrc;
 };
 
 struct qed_rdma_modify_srq_in_params {
 	u32 wqe_limit;
 	u16 srq_id;
+	bool is_xrc;
 };
 
 struct qed_rdma_stats_out_params {
@@ -611,6 +628,8 @@ struct qed_rdma_ops {
 	int (*rdma_set_rdma_int)(struct qed_dev *cdev, u16 cnt);
 	int (*rdma_alloc_pd)(void *rdma_cxt, u16 *pd);
 	void (*rdma_dealloc_pd)(void *rdma_cxt, u16 pd);
+	int (*rdma_alloc_xrcd)(void *rdma_cxt, u16 *xrcd);
+	void (*rdma_dealloc_xrcd)(void *rdma_cxt, u16 xrcd);
 	int (*rdma_create_cq)(void *rdma_cxt,
 			      struct qed_rdma_create_cq_in_params *params,
 			      u16 *icid);
-- 
cgit v1.2.3-59-g8ed1b


From 8066021915924f58ed338bf38208215f5a7355f6 Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Wed, 20 May 2020 08:29:14 +0200
Subject: ethtool: provide UAPI for PHY Signal Quality Index (SQI)

Signal Quality Index is a mandatory value required by "OPEN Alliance
SIG" for the 100Base-T1 PHYs [1]. This indicator can be used for cable
integrity diagnostic and investigating other noise sources and
implement by at least two vendors: NXP[2] and TI[3].

[1] http://www.opensig.org/download/document/218/Advanced_PHY_features_for_automotive_Ethernet_V1.0.pdf
[2] https://www.nxp.com/docs/en/data-sheet/TJA1100.pdf
[3] https://www.ti.com/product/DP83TC811R-Q1

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Michal Kubecek <mkubecek@suse.cz>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ethtool-netlink.rst |  6 ++-
 include/linux/phy.h                          |  2 +
 include/uapi/linux/ethtool_netlink.h         |  2 +
 net/ethtool/linkstate.c                      | 75 +++++++++++++++++++++++++++-
 4 files changed, 82 insertions(+), 3 deletions(-)

diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst
index eed46b6aa07d..7e651ea33eab 100644
--- a/Documentation/networking/ethtool-netlink.rst
+++ b/Documentation/networking/ethtool-netlink.rst
@@ -454,10 +454,12 @@ Request contents:
 
 Kernel response contents:
 
-  ====================================  ======  ==========================
+  ====================================  ======  ============================
   ``ETHTOOL_A_LINKSTATE_HEADER``        nested  reply header
   ``ETHTOOL_A_LINKSTATE_LINK``          bool    link state (up/down)
-  ====================================  ======  ==========================
+  ``ETHTOOL_A_LINKSTATE_SQI``           u32     Current Signal Quality Index
+  ``ETHTOOL_A_LINKSTATE_SQI_MAX``       u32     Max support SQI value
+  ====================================  ======  ============================
 
 For most NIC drivers, the value of ``ETHTOOL_A_LINKSTATE_LINK`` returns
 carrier flag provided by ``netif_carrier_ok()`` but there are drivers which
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 467aa8bf9f64..2bcdf19ed3b4 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -723,6 +723,8 @@ struct phy_driver {
 			    struct ethtool_tunable *tuna,
 			    const void *data);
 	int (*set_loopback)(struct phy_device *dev, bool enable);
+	int (*get_sqi)(struct phy_device *dev);
+	int (*get_sqi_max)(struct phy_device *dev);
 };
 #define to_phy_driver(d) container_of(to_mdio_common_driver(d),		\
 				      struct phy_driver, mdiodrv)
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index 2881af411f76..e6f109b76c9a 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -232,6 +232,8 @@ enum {
 	ETHTOOL_A_LINKSTATE_UNSPEC,
 	ETHTOOL_A_LINKSTATE_HEADER,		/* nest - _A_HEADER_* */
 	ETHTOOL_A_LINKSTATE_LINK,		/* u8 */
+	ETHTOOL_A_LINKSTATE_SQI,		/* u32 */
+	ETHTOOL_A_LINKSTATE_SQI_MAX,		/* u32 */
 
 	/* add new constants above here */
 	__ETHTOOL_A_LINKSTATE_CNT,
diff --git a/net/ethtool/linkstate.c b/net/ethtool/linkstate.c
index 2740cde0a182..7f47ba89054e 100644
--- a/net/ethtool/linkstate.c
+++ b/net/ethtool/linkstate.c
@@ -2,6 +2,7 @@
 
 #include "netlink.h"
 #include "common.h"
+#include <linux/phy.h>
 
 struct linkstate_req_info {
 	struct ethnl_req_info		base;
@@ -10,6 +11,8 @@ struct linkstate_req_info {
 struct linkstate_reply_data {
 	struct ethnl_reply_data		base;
 	int				link;
+	int				sqi;
+	int				sqi_max;
 };
 
 #define LINKSTATE_REPDATA(__reply_base) \
@@ -20,8 +23,46 @@ linkstate_get_policy[ETHTOOL_A_LINKSTATE_MAX + 1] = {
 	[ETHTOOL_A_LINKSTATE_UNSPEC]		= { .type = NLA_REJECT },
 	[ETHTOOL_A_LINKSTATE_HEADER]		= { .type = NLA_NESTED },
 	[ETHTOOL_A_LINKSTATE_LINK]		= { .type = NLA_REJECT },
+	[ETHTOOL_A_LINKSTATE_SQI]		= { .type = NLA_REJECT },
+	[ETHTOOL_A_LINKSTATE_SQI_MAX]		= { .type = NLA_REJECT },
 };
 
+static int linkstate_get_sqi(struct net_device *dev)
+{
+	struct phy_device *phydev = dev->phydev;
+	int ret;
+
+	if (!phydev)
+		return -EOPNOTSUPP;
+
+	mutex_lock(&phydev->lock);
+	if (!phydev->drv || !phydev->drv->get_sqi)
+		ret = -EOPNOTSUPP;
+	else
+		ret = phydev->drv->get_sqi(phydev);
+	mutex_unlock(&phydev->lock);
+
+	return ret;
+}
+
+static int linkstate_get_sqi_max(struct net_device *dev)
+{
+	struct phy_device *phydev = dev->phydev;
+	int ret;
+
+	if (!phydev)
+		return -EOPNOTSUPP;
+
+	mutex_lock(&phydev->lock);
+	if (!phydev->drv || !phydev->drv->get_sqi_max)
+		ret = -EOPNOTSUPP;
+	else
+		ret = phydev->drv->get_sqi_max(phydev);
+	mutex_unlock(&phydev->lock);
+
+	return ret;
+}
+
 static int linkstate_prepare_data(const struct ethnl_req_info *req_base,
 				  struct ethnl_reply_data *reply_base,
 				  struct genl_info *info)
@@ -34,6 +75,19 @@ static int linkstate_prepare_data(const struct ethnl_req_info *req_base,
 	if (ret < 0)
 		return ret;
 	data->link = __ethtool_get_link(dev);
+
+	ret = linkstate_get_sqi(dev);
+	if (ret < 0 && ret != -EOPNOTSUPP)
+		return ret;
+
+	data->sqi = ret;
+
+	ret = linkstate_get_sqi_max(dev);
+	if (ret < 0 && ret != -EOPNOTSUPP)
+		return ret;
+
+	data->sqi_max = ret;
+
 	ethnl_ops_complete(dev);
 
 	return 0;
@@ -42,8 +96,19 @@ static int linkstate_prepare_data(const struct ethnl_req_info *req_base,
 static int linkstate_reply_size(const struct ethnl_req_info *req_base,
 				const struct ethnl_reply_data *reply_base)
 {
-	return nla_total_size(sizeof(u8)) /* LINKSTATE_LINK */
+	struct linkstate_reply_data *data = LINKSTATE_REPDATA(reply_base);
+	int len;
+
+	len = nla_total_size(sizeof(u8)) /* LINKSTATE_LINK */
 		+ 0;
+
+	if (data->sqi != -EOPNOTSUPP)
+		len += nla_total_size(sizeof(u32));
+
+	if (data->sqi_max != -EOPNOTSUPP)
+		len += nla_total_size(sizeof(u32));
+
+	return len;
 }
 
 static int linkstate_fill_reply(struct sk_buff *skb,
@@ -56,6 +121,14 @@ static int linkstate_fill_reply(struct sk_buff *skb,
 	    nla_put_u8(skb, ETHTOOL_A_LINKSTATE_LINK, !!data->link))
 		return -EMSGSIZE;
 
+	if (data->sqi != -EOPNOTSUPP &&
+	    nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI, data->sqi))
+		return -EMSGSIZE;
+
+	if (data->sqi_max != -EOPNOTSUPP &&
+	    nla_put_u32(skb, ETHTOOL_A_LINKSTATE_SQI_MAX, data->sqi_max))
+		return -EMSGSIZE;
+
 	return 0;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 68ff5e14759e7ac1aac7bc75ac5b935e390fa2b3 Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Wed, 20 May 2020 08:29:15 +0200
Subject: net: phy: tja11xx: add SQI support

This patch implements reading of the Signal Quality Index for better
cable/link troubleshooting.

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/nxp-tja11xx.c | 26 ++++++++++++++++++++++++++
 1 file changed, 26 insertions(+)

diff --git a/drivers/net/phy/nxp-tja11xx.c b/drivers/net/phy/nxp-tja11xx.c
index 0d4f9067ca71..1e79c30ca81a 100644
--- a/drivers/net/phy/nxp-tja11xx.c
+++ b/drivers/net/phy/nxp-tja11xx.c
@@ -53,6 +53,8 @@
 
 #define MII_COMMSTAT			23
 #define MII_COMMSTAT_LINK_UP		BIT(15)
+#define MII_COMMSTAT_SQI_STATE		GENMASK(7, 5)
+#define MII_COMMSTAT_SQI_MAX		7
 
 #define MII_GENSTAT			24
 #define MII_GENSTAT_PLL_LOCKED		BIT(14)
@@ -329,6 +331,22 @@ static int tja11xx_read_status(struct phy_device *phydev)
 	return 0;
 }
 
+static int tja11xx_get_sqi(struct phy_device *phydev)
+{
+	int ret;
+
+	ret = phy_read(phydev, MII_COMMSTAT);
+	if (ret < 0)
+		return ret;
+
+	return FIELD_GET(MII_COMMSTAT_SQI_STATE, ret);
+}
+
+static int tja11xx_get_sqi_max(struct phy_device *phydev)
+{
+	return MII_COMMSTAT_SQI_MAX;
+}
+
 static int tja11xx_get_sset_count(struct phy_device *phydev)
 {
 	return ARRAY_SIZE(tja11xx_hw_stats);
@@ -683,6 +701,8 @@ static struct phy_driver tja11xx_driver[] = {
 		.config_aneg	= tja11xx_config_aneg,
 		.config_init	= tja11xx_config_init,
 		.read_status	= tja11xx_read_status,
+		.get_sqi	= tja11xx_get_sqi,
+		.get_sqi_max	= tja11xx_get_sqi_max,
 		.suspend	= genphy_suspend,
 		.resume		= genphy_resume,
 		.set_loopback   = genphy_loopback,
@@ -699,6 +719,8 @@ static struct phy_driver tja11xx_driver[] = {
 		.config_aneg	= tja11xx_config_aneg,
 		.config_init	= tja11xx_config_init,
 		.read_status	= tja11xx_read_status,
+		.get_sqi	= tja11xx_get_sqi,
+		.get_sqi_max	= tja11xx_get_sqi_max,
 		.suspend	= genphy_suspend,
 		.resume		= genphy_resume,
 		.set_loopback   = genphy_loopback,
@@ -715,6 +737,8 @@ static struct phy_driver tja11xx_driver[] = {
 		.config_aneg	= tja11xx_config_aneg,
 		.config_init	= tja11xx_config_init,
 		.read_status	= tja11xx_read_status,
+		.get_sqi	= tja11xx_get_sqi,
+		.get_sqi_max	= tja11xx_get_sqi_max,
 		.match_phy_device = tja1102_p0_match_phy_device,
 		.suspend	= genphy_suspend,
 		.resume		= genphy_resume,
@@ -736,6 +760,8 @@ static struct phy_driver tja11xx_driver[] = {
 		.config_aneg	= tja11xx_config_aneg,
 		.config_init	= tja11xx_config_init,
 		.read_status	= tja11xx_read_status,
+		.get_sqi	= tja11xx_get_sqi,
+		.get_sqi_max	= tja11xx_get_sqi_max,
 		.match_phy_device = tja1102_p1_match_phy_device,
 		.suspend	= genphy_suspend,
 		.resume		= genphy_resume,
-- 
cgit v1.2.3-59-g8ed1b


From 44ac082b30dc2a05a7e23ed7e17b5f9513873386 Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn.topel@intel.com>
Date: Wed, 20 May 2020 21:20:49 +0200
Subject: xsk: Fix xsk_umem_xdp_frame_sz()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Calculating the "data_hard_end" for an XDP buffer coming from AF_XDP
zero-copy mode, the return value of xsk_umem_xdp_frame_sz() is added
to "data_hard_start".

Currently, the chunk size of the UMEM is returned by
xsk_umem_xdp_frame_sz(). This is not correct, if the fixed UMEM
headroom is non-zero. Fix this by returning the chunk_size without the
UMEM headroom.

Fixes: 2a637c5b1aaf ("xdp: For Intel AF_XDP drivers add XDP frame_sz")
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200520192103.355233-2-bjorn.topel@gmail.com
---
 include/net/xdp_sock.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index abd72de25fa4..6b1137ce1692 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -239,7 +239,7 @@ static inline u64 xsk_umem_adjust_offset(struct xdp_umem *umem, u64 address,
 
 static inline u32 xsk_umem_xdp_frame_sz(struct xdp_umem *umem)
 {
-	return umem->chunk_size_nohr + umem->headroom;
+	return umem->chunk_size_nohr;
 }
 
 #else
-- 
cgit v1.2.3-59-g8ed1b


From d20a1676df7e4c3c23d73299159811a50e4854bc Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn.topel@intel.com>
Date: Wed, 20 May 2020 21:20:50 +0200
Subject: xsk: Move xskmap.c to net/xdp/
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The XSKMAP is partly implemented by net/xdp/xsk.c. Move xskmap.c from
kernel/bpf/ to net/xdp/, which is the logical place for AF_XDP related
code. Also, move AF_XDP struct definitions, and function declarations
only used by AF_XDP internals into net/xdp/xsk.h.

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200520192103.355233-3-bjorn.topel@gmail.com
---
 include/net/xdp_sock.h |  20 ----
 kernel/bpf/Makefile    |   3 -
 kernel/bpf/xskmap.c    | 265 ------------------------------------------------
 net/xdp/Makefile       |   2 +-
 net/xdp/xsk.h          |  16 +++
 net/xdp/xskmap.c       | 267 +++++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 284 insertions(+), 289 deletions(-)
 delete mode 100644 kernel/bpf/xskmap.c
 create mode 100644 net/xdp/xskmap.c

diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 6b1137ce1692..8f3f6f5b0dfe 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -65,22 +65,12 @@ struct xdp_umem {
 	struct list_head xsk_tx_list;
 };
 
-/* Nodes are linked in the struct xdp_sock map_list field, and used to
- * track which maps a certain socket reside in.
- */
-
 struct xsk_map {
 	struct bpf_map map;
 	spinlock_t lock; /* Synchronize map updates */
 	struct xdp_sock *xsk_map[];
 };
 
-struct xsk_map_node {
-	struct list_head node;
-	struct xsk_map *map;
-	struct xdp_sock **map_entry;
-};
-
 struct xdp_sock {
 	/* struct sock must be the first member of struct xdp_sock */
 	struct sock sk;
@@ -114,7 +104,6 @@ struct xdp_sock {
 struct xdp_buff;
 #ifdef CONFIG_XDP_SOCKETS
 int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
-bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs);
 /* Used from netdev driver */
 bool xsk_umem_has_addrs(struct xdp_umem *umem, u32 cnt);
 bool xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr);
@@ -133,10 +122,6 @@ void xsk_clear_rx_need_wakeup(struct xdp_umem *umem);
 void xsk_clear_tx_need_wakeup(struct xdp_umem *umem);
 bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem);
 
-void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs,
-			     struct xdp_sock **map_entry);
-int xsk_map_inc(struct xsk_map *map);
-void xsk_map_put(struct xsk_map *map);
 int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp);
 void __xsk_map_flush(void);
 
@@ -248,11 +233,6 @@ static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
 	return -ENOTSUPP;
 }
 
-static inline bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
-{
-	return false;
-}
-
 static inline bool xsk_umem_has_addrs(struct xdp_umem *umem, u32 cnt)
 {
 	return false;
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 37b2d8620153..375b933010dd 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -12,9 +12,6 @@ obj-$(CONFIG_BPF_JIT) += dispatcher.o
 ifeq ($(CONFIG_NET),y)
 obj-$(CONFIG_BPF_SYSCALL) += devmap.o
 obj-$(CONFIG_BPF_SYSCALL) += cpumap.o
-ifeq ($(CONFIG_XDP_SOCKETS),y)
-obj-$(CONFIG_BPF_SYSCALL) += xskmap.o
-endif
 obj-$(CONFIG_BPF_SYSCALL) += offload.o
 endif
 ifeq ($(CONFIG_PERF_EVENTS),y)
diff --git a/kernel/bpf/xskmap.c b/kernel/bpf/xskmap.c
deleted file mode 100644
index 2cc5c8f4c800..000000000000
--- a/kernel/bpf/xskmap.c
+++ /dev/null
@@ -1,265 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0
-/* XSKMAP used for AF_XDP sockets
- * Copyright(c) 2018 Intel Corporation.
- */
-
-#include <linux/bpf.h>
-#include <linux/capability.h>
-#include <net/xdp_sock.h>
-#include <linux/slab.h>
-#include <linux/sched.h>
-
-int xsk_map_inc(struct xsk_map *map)
-{
-	bpf_map_inc(&map->map);
-	return 0;
-}
-
-void xsk_map_put(struct xsk_map *map)
-{
-	bpf_map_put(&map->map);
-}
-
-static struct xsk_map_node *xsk_map_node_alloc(struct xsk_map *map,
-					       struct xdp_sock **map_entry)
-{
-	struct xsk_map_node *node;
-	int err;
-
-	node = kzalloc(sizeof(*node), GFP_ATOMIC | __GFP_NOWARN);
-	if (!node)
-		return ERR_PTR(-ENOMEM);
-
-	err = xsk_map_inc(map);
-	if (err) {
-		kfree(node);
-		return ERR_PTR(err);
-	}
-
-	node->map = map;
-	node->map_entry = map_entry;
-	return node;
-}
-
-static void xsk_map_node_free(struct xsk_map_node *node)
-{
-	xsk_map_put(node->map);
-	kfree(node);
-}
-
-static void xsk_map_sock_add(struct xdp_sock *xs, struct xsk_map_node *node)
-{
-	spin_lock_bh(&xs->map_list_lock);
-	list_add_tail(&node->node, &xs->map_list);
-	spin_unlock_bh(&xs->map_list_lock);
-}
-
-static void xsk_map_sock_delete(struct xdp_sock *xs,
-				struct xdp_sock **map_entry)
-{
-	struct xsk_map_node *n, *tmp;
-
-	spin_lock_bh(&xs->map_list_lock);
-	list_for_each_entry_safe(n, tmp, &xs->map_list, node) {
-		if (map_entry == n->map_entry) {
-			list_del(&n->node);
-			xsk_map_node_free(n);
-		}
-	}
-	spin_unlock_bh(&xs->map_list_lock);
-}
-
-static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
-{
-	struct bpf_map_memory mem;
-	int err, numa_node;
-	struct xsk_map *m;
-	u64 size;
-
-	if (!capable(CAP_NET_ADMIN))
-		return ERR_PTR(-EPERM);
-
-	if (attr->max_entries == 0 || attr->key_size != 4 ||
-	    attr->value_size != 4 ||
-	    attr->map_flags & ~(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY))
-		return ERR_PTR(-EINVAL);
-
-	numa_node = bpf_map_attr_numa_node(attr);
-	size = struct_size(m, xsk_map, attr->max_entries);
-
-	err = bpf_map_charge_init(&mem, size);
-	if (err < 0)
-		return ERR_PTR(err);
-
-	m = bpf_map_area_alloc(size, numa_node);
-	if (!m) {
-		bpf_map_charge_finish(&mem);
-		return ERR_PTR(-ENOMEM);
-	}
-
-	bpf_map_init_from_attr(&m->map, attr);
-	bpf_map_charge_move(&m->map.memory, &mem);
-	spin_lock_init(&m->lock);
-
-	return &m->map;
-}
-
-static void xsk_map_free(struct bpf_map *map)
-{
-	struct xsk_map *m = container_of(map, struct xsk_map, map);
-
-	bpf_clear_redirect_map(map);
-	synchronize_net();
-	bpf_map_area_free(m);
-}
-
-static int xsk_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
-{
-	struct xsk_map *m = container_of(map, struct xsk_map, map);
-	u32 index = key ? *(u32 *)key : U32_MAX;
-	u32 *next = next_key;
-
-	if (index >= m->map.max_entries) {
-		*next = 0;
-		return 0;
-	}
-
-	if (index == m->map.max_entries - 1)
-		return -ENOENT;
-	*next = index + 1;
-	return 0;
-}
-
-static u32 xsk_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
-{
-	const int ret = BPF_REG_0, mp = BPF_REG_1, index = BPF_REG_2;
-	struct bpf_insn *insn = insn_buf;
-
-	*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
-	*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
-	*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(sizeof(struct xsk_sock *)));
-	*insn++ = BPF_ALU64_IMM(BPF_ADD, mp, offsetof(struct xsk_map, xsk_map));
-	*insn++ = BPF_ALU64_REG(BPF_ADD, ret, mp);
-	*insn++ = BPF_LDX_MEM(BPF_SIZEOF(struct xsk_sock *), ret, ret, 0);
-	*insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
-	*insn++ = BPF_MOV64_IMM(ret, 0);
-	return insn - insn_buf;
-}
-
-static void *xsk_map_lookup_elem(struct bpf_map *map, void *key)
-{
-	WARN_ON_ONCE(!rcu_read_lock_held());
-	return __xsk_map_lookup_elem(map, *(u32 *)key);
-}
-
-static void *xsk_map_lookup_elem_sys_only(struct bpf_map *map, void *key)
-{
-	return ERR_PTR(-EOPNOTSUPP);
-}
-
-static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value,
-			       u64 map_flags)
-{
-	struct xsk_map *m = container_of(map, struct xsk_map, map);
-	struct xdp_sock *xs, *old_xs, **map_entry;
-	u32 i = *(u32 *)key, fd = *(u32 *)value;
-	struct xsk_map_node *node;
-	struct socket *sock;
-	int err;
-
-	if (unlikely(map_flags > BPF_EXIST))
-		return -EINVAL;
-	if (unlikely(i >= m->map.max_entries))
-		return -E2BIG;
-
-	sock = sockfd_lookup(fd, &err);
-	if (!sock)
-		return err;
-
-	if (sock->sk->sk_family != PF_XDP) {
-		sockfd_put(sock);
-		return -EOPNOTSUPP;
-	}
-
-	xs = (struct xdp_sock *)sock->sk;
-
-	if (!xsk_is_setup_for_bpf_map(xs)) {
-		sockfd_put(sock);
-		return -EOPNOTSUPP;
-	}
-
-	map_entry = &m->xsk_map[i];
-	node = xsk_map_node_alloc(m, map_entry);
-	if (IS_ERR(node)) {
-		sockfd_put(sock);
-		return PTR_ERR(node);
-	}
-
-	spin_lock_bh(&m->lock);
-	old_xs = READ_ONCE(*map_entry);
-	if (old_xs == xs) {
-		err = 0;
-		goto out;
-	} else if (old_xs && map_flags == BPF_NOEXIST) {
-		err = -EEXIST;
-		goto out;
-	} else if (!old_xs && map_flags == BPF_EXIST) {
-		err = -ENOENT;
-		goto out;
-	}
-	xsk_map_sock_add(xs, node);
-	WRITE_ONCE(*map_entry, xs);
-	if (old_xs)
-		xsk_map_sock_delete(old_xs, map_entry);
-	spin_unlock_bh(&m->lock);
-	sockfd_put(sock);
-	return 0;
-
-out:
-	spin_unlock_bh(&m->lock);
-	sockfd_put(sock);
-	xsk_map_node_free(node);
-	return err;
-}
-
-static int xsk_map_delete_elem(struct bpf_map *map, void *key)
-{
-	struct xsk_map *m = container_of(map, struct xsk_map, map);
-	struct xdp_sock *old_xs, **map_entry;
-	int k = *(u32 *)key;
-
-	if (k >= map->max_entries)
-		return -EINVAL;
-
-	spin_lock_bh(&m->lock);
-	map_entry = &m->xsk_map[k];
-	old_xs = xchg(map_entry, NULL);
-	if (old_xs)
-		xsk_map_sock_delete(old_xs, map_entry);
-	spin_unlock_bh(&m->lock);
-
-	return 0;
-}
-
-void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs,
-			     struct xdp_sock **map_entry)
-{
-	spin_lock_bh(&map->lock);
-	if (READ_ONCE(*map_entry) == xs) {
-		WRITE_ONCE(*map_entry, NULL);
-		xsk_map_sock_delete(xs, map_entry);
-	}
-	spin_unlock_bh(&map->lock);
-}
-
-const struct bpf_map_ops xsk_map_ops = {
-	.map_alloc = xsk_map_alloc,
-	.map_free = xsk_map_free,
-	.map_get_next_key = xsk_map_get_next_key,
-	.map_lookup_elem = xsk_map_lookup_elem,
-	.map_gen_lookup = xsk_map_gen_lookup,
-	.map_lookup_elem_sys_only = xsk_map_lookup_elem_sys_only,
-	.map_update_elem = xsk_map_update_elem,
-	.map_delete_elem = xsk_map_delete_elem,
-	.map_check_btf = map_check_no_btf,
-};
diff --git a/net/xdp/Makefile b/net/xdp/Makefile
index 71e2bdafb2ce..90b5460d6166 100644
--- a/net/xdp/Makefile
+++ b/net/xdp/Makefile
@@ -1,3 +1,3 @@
 # SPDX-License-Identifier: GPL-2.0-only
-obj-$(CONFIG_XDP_SOCKETS) += xsk.o xdp_umem.o xsk_queue.o
+obj-$(CONFIG_XDP_SOCKETS) += xsk.o xdp_umem.o xsk_queue.o xskmap.o
 obj-$(CONFIG_XDP_SOCKETS_DIAG) += xsk_diag.o
diff --git a/net/xdp/xsk.h b/net/xdp/xsk.h
index 4cfd106bdb53..d6a0979050e6 100644
--- a/net/xdp/xsk.h
+++ b/net/xdp/xsk.h
@@ -17,9 +17,25 @@ struct xdp_mmap_offsets_v1 {
 	struct xdp_ring_offset_v1 cr;
 };
 
+/* Nodes are linked in the struct xdp_sock map_list field, and used to
+ * track which maps a certain socket reside in.
+ */
+
+struct xsk_map_node {
+	struct list_head node;
+	struct xsk_map *map;
+	struct xdp_sock **map_entry;
+};
+
 static inline struct xdp_sock *xdp_sk(struct sock *sk)
 {
 	return (struct xdp_sock *)sk;
 }
 
+bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs);
+void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs,
+			     struct xdp_sock **map_entry);
+int xsk_map_inc(struct xsk_map *map);
+void xsk_map_put(struct xsk_map *map);
+
 #endif /* XSK_H_ */
diff --git a/net/xdp/xskmap.c b/net/xdp/xskmap.c
new file mode 100644
index 000000000000..1dc7208c71ba
--- /dev/null
+++ b/net/xdp/xskmap.c
@@ -0,0 +1,267 @@
+// SPDX-License-Identifier: GPL-2.0
+/* XSKMAP used for AF_XDP sockets
+ * Copyright(c) 2018 Intel Corporation.
+ */
+
+#include <linux/bpf.h>
+#include <linux/capability.h>
+#include <net/xdp_sock.h>
+#include <linux/slab.h>
+#include <linux/sched.h>
+
+#include "xsk.h"
+
+int xsk_map_inc(struct xsk_map *map)
+{
+	bpf_map_inc(&map->map);
+	return 0;
+}
+
+void xsk_map_put(struct xsk_map *map)
+{
+	bpf_map_put(&map->map);
+}
+
+static struct xsk_map_node *xsk_map_node_alloc(struct xsk_map *map,
+					       struct xdp_sock **map_entry)
+{
+	struct xsk_map_node *node;
+	int err;
+
+	node = kzalloc(sizeof(*node), GFP_ATOMIC | __GFP_NOWARN);
+	if (!node)
+		return ERR_PTR(-ENOMEM);
+
+	err = xsk_map_inc(map);
+	if (err) {
+		kfree(node);
+		return ERR_PTR(err);
+	}
+
+	node->map = map;
+	node->map_entry = map_entry;
+	return node;
+}
+
+static void xsk_map_node_free(struct xsk_map_node *node)
+{
+	xsk_map_put(node->map);
+	kfree(node);
+}
+
+static void xsk_map_sock_add(struct xdp_sock *xs, struct xsk_map_node *node)
+{
+	spin_lock_bh(&xs->map_list_lock);
+	list_add_tail(&node->node, &xs->map_list);
+	spin_unlock_bh(&xs->map_list_lock);
+}
+
+static void xsk_map_sock_delete(struct xdp_sock *xs,
+				struct xdp_sock **map_entry)
+{
+	struct xsk_map_node *n, *tmp;
+
+	spin_lock_bh(&xs->map_list_lock);
+	list_for_each_entry_safe(n, tmp, &xs->map_list, node) {
+		if (map_entry == n->map_entry) {
+			list_del(&n->node);
+			xsk_map_node_free(n);
+		}
+	}
+	spin_unlock_bh(&xs->map_list_lock);
+}
+
+static struct bpf_map *xsk_map_alloc(union bpf_attr *attr)
+{
+	struct bpf_map_memory mem;
+	int err, numa_node;
+	struct xsk_map *m;
+	u64 size;
+
+	if (!capable(CAP_NET_ADMIN))
+		return ERR_PTR(-EPERM);
+
+	if (attr->max_entries == 0 || attr->key_size != 4 ||
+	    attr->value_size != 4 ||
+	    attr->map_flags & ~(BPF_F_NUMA_NODE | BPF_F_RDONLY | BPF_F_WRONLY))
+		return ERR_PTR(-EINVAL);
+
+	numa_node = bpf_map_attr_numa_node(attr);
+	size = struct_size(m, xsk_map, attr->max_entries);
+
+	err = bpf_map_charge_init(&mem, size);
+	if (err < 0)
+		return ERR_PTR(err);
+
+	m = bpf_map_area_alloc(size, numa_node);
+	if (!m) {
+		bpf_map_charge_finish(&mem);
+		return ERR_PTR(-ENOMEM);
+	}
+
+	bpf_map_init_from_attr(&m->map, attr);
+	bpf_map_charge_move(&m->map.memory, &mem);
+	spin_lock_init(&m->lock);
+
+	return &m->map;
+}
+
+static void xsk_map_free(struct bpf_map *map)
+{
+	struct xsk_map *m = container_of(map, struct xsk_map, map);
+
+	bpf_clear_redirect_map(map);
+	synchronize_net();
+	bpf_map_area_free(m);
+}
+
+static int xsk_map_get_next_key(struct bpf_map *map, void *key, void *next_key)
+{
+	struct xsk_map *m = container_of(map, struct xsk_map, map);
+	u32 index = key ? *(u32 *)key : U32_MAX;
+	u32 *next = next_key;
+
+	if (index >= m->map.max_entries) {
+		*next = 0;
+		return 0;
+	}
+
+	if (index == m->map.max_entries - 1)
+		return -ENOENT;
+	*next = index + 1;
+	return 0;
+}
+
+static u32 xsk_map_gen_lookup(struct bpf_map *map, struct bpf_insn *insn_buf)
+{
+	const int ret = BPF_REG_0, mp = BPF_REG_1, index = BPF_REG_2;
+	struct bpf_insn *insn = insn_buf;
+
+	*insn++ = BPF_LDX_MEM(BPF_W, ret, index, 0);
+	*insn++ = BPF_JMP_IMM(BPF_JGE, ret, map->max_entries, 5);
+	*insn++ = BPF_ALU64_IMM(BPF_LSH, ret, ilog2(sizeof(struct xsk_sock *)));
+	*insn++ = BPF_ALU64_IMM(BPF_ADD, mp, offsetof(struct xsk_map, xsk_map));
+	*insn++ = BPF_ALU64_REG(BPF_ADD, ret, mp);
+	*insn++ = BPF_LDX_MEM(BPF_SIZEOF(struct xsk_sock *), ret, ret, 0);
+	*insn++ = BPF_JMP_IMM(BPF_JA, 0, 0, 1);
+	*insn++ = BPF_MOV64_IMM(ret, 0);
+	return insn - insn_buf;
+}
+
+static void *xsk_map_lookup_elem(struct bpf_map *map, void *key)
+{
+	WARN_ON_ONCE(!rcu_read_lock_held());
+	return __xsk_map_lookup_elem(map, *(u32 *)key);
+}
+
+static void *xsk_map_lookup_elem_sys_only(struct bpf_map *map, void *key)
+{
+	return ERR_PTR(-EOPNOTSUPP);
+}
+
+static int xsk_map_update_elem(struct bpf_map *map, void *key, void *value,
+			       u64 map_flags)
+{
+	struct xsk_map *m = container_of(map, struct xsk_map, map);
+	struct xdp_sock *xs, *old_xs, **map_entry;
+	u32 i = *(u32 *)key, fd = *(u32 *)value;
+	struct xsk_map_node *node;
+	struct socket *sock;
+	int err;
+
+	if (unlikely(map_flags > BPF_EXIST))
+		return -EINVAL;
+	if (unlikely(i >= m->map.max_entries))
+		return -E2BIG;
+
+	sock = sockfd_lookup(fd, &err);
+	if (!sock)
+		return err;
+
+	if (sock->sk->sk_family != PF_XDP) {
+		sockfd_put(sock);
+		return -EOPNOTSUPP;
+	}
+
+	xs = (struct xdp_sock *)sock->sk;
+
+	if (!xsk_is_setup_for_bpf_map(xs)) {
+		sockfd_put(sock);
+		return -EOPNOTSUPP;
+	}
+
+	map_entry = &m->xsk_map[i];
+	node = xsk_map_node_alloc(m, map_entry);
+	if (IS_ERR(node)) {
+		sockfd_put(sock);
+		return PTR_ERR(node);
+	}
+
+	spin_lock_bh(&m->lock);
+	old_xs = READ_ONCE(*map_entry);
+	if (old_xs == xs) {
+		err = 0;
+		goto out;
+	} else if (old_xs && map_flags == BPF_NOEXIST) {
+		err = -EEXIST;
+		goto out;
+	} else if (!old_xs && map_flags == BPF_EXIST) {
+		err = -ENOENT;
+		goto out;
+	}
+	xsk_map_sock_add(xs, node);
+	WRITE_ONCE(*map_entry, xs);
+	if (old_xs)
+		xsk_map_sock_delete(old_xs, map_entry);
+	spin_unlock_bh(&m->lock);
+	sockfd_put(sock);
+	return 0;
+
+out:
+	spin_unlock_bh(&m->lock);
+	sockfd_put(sock);
+	xsk_map_node_free(node);
+	return err;
+}
+
+static int xsk_map_delete_elem(struct bpf_map *map, void *key)
+{
+	struct xsk_map *m = container_of(map, struct xsk_map, map);
+	struct xdp_sock *old_xs, **map_entry;
+	int k = *(u32 *)key;
+
+	if (k >= map->max_entries)
+		return -EINVAL;
+
+	spin_lock_bh(&m->lock);
+	map_entry = &m->xsk_map[k];
+	old_xs = xchg(map_entry, NULL);
+	if (old_xs)
+		xsk_map_sock_delete(old_xs, map_entry);
+	spin_unlock_bh(&m->lock);
+
+	return 0;
+}
+
+void xsk_map_try_sock_delete(struct xsk_map *map, struct xdp_sock *xs,
+			     struct xdp_sock **map_entry)
+{
+	spin_lock_bh(&map->lock);
+	if (READ_ONCE(*map_entry) == xs) {
+		WRITE_ONCE(*map_entry, NULL);
+		xsk_map_sock_delete(xs, map_entry);
+	}
+	spin_unlock_bh(&map->lock);
+}
+
+const struct bpf_map_ops xsk_map_ops = {
+	.map_alloc = xsk_map_alloc,
+	.map_free = xsk_map_free,
+	.map_get_next_key = xsk_map_get_next_key,
+	.map_lookup_elem = xsk_map_lookup_elem,
+	.map_gen_lookup = xsk_map_gen_lookup,
+	.map_lookup_elem_sys_only = xsk_map_lookup_elem_sys_only,
+	.map_update_elem = xsk_map_update_elem,
+	.map_delete_elem = xsk_map_delete_elem,
+	.map_check_btf = map_check_no_btf,
+};
-- 
cgit v1.2.3-59-g8ed1b


From a71506a4fda92a39c8ece119876bc7ccde6d3c9d Mon Sep 17 00:00:00 2001
From: Magnus Karlsson <magnus.karlsson@intel.com>
Date: Wed, 20 May 2020 21:20:51 +0200
Subject: xsk: Move driver interface to xdp_sock_drv.h

Move the AF_XDP zero-copy driver interface to its own include file
called xdp_sock_drv.h. This, hopefully, will make it more clear for
NIC driver implementors to know what functions to use for zero-copy
support.

v4->v5: Fix -Wmissing-prototypes by include header file. (Jakub)

Signed-off-by: Magnus Karlsson <magnus.karlsson@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200520192103.355233-4-bjorn.topel@gmail.com
---
 drivers/net/ethernet/intel/i40e/i40e_main.c        |   2 +-
 drivers/net/ethernet/intel/i40e/i40e_xsk.c         |   2 +-
 drivers/net/ethernet/intel/ice/ice_xsk.c           |   2 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c       |   2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c   |   2 +-
 .../net/ethernet/mellanox/mlx5/core/en/xsk/rx.h    |   2 +-
 .../net/ethernet/mellanox/mlx5/core/en/xsk/tx.h    |   2 +-
 .../net/ethernet/mellanox/mlx5/core/en/xsk/umem.c  |   2 +-
 include/net/xdp_sock.h                             | 214 +-------------------
 include/net/xdp_sock_drv.h                         | 217 +++++++++++++++++++++
 net/ethtool/channels.c                             |   2 +-
 net/ethtool/ioctl.c                                |   2 +-
 net/xdp/xdp_umem.h                                 |   2 +-
 net/xdp/xsk.c                                      |   2 +-
 net/xdp/xsk_queue.c                                |   1 +
 15 files changed, 238 insertions(+), 218 deletions(-)
 create mode 100644 include/net/xdp_sock_drv.h

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 2a037ec244b9..d6b2db4f2c65 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -11,7 +11,7 @@
 #include "i40e_diag.h"
 #include "i40e_xsk.h"
 #include <net/udp_tunnel.h>
-#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
 /* All i40e tracepoints are defined by the include below, which
  * must be included exactly once across the whole kernel with
  * CREATE_TRACE_POINTS defined
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index 2b9184aead5f..d8b0be29099a 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -2,7 +2,7 @@
 /* Copyright(c) 2018 Intel Corporation. */
 
 #include <linux/bpf_trace.h>
-#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
 #include <net/xdp.h>
 
 #include "i40e.h"
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index 23e5515d4527..70e204307a93 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -2,7 +2,7 @@
 /* Copyright (c) 2019, Intel Corporation. */
 
 #include <linux/bpf_trace.h>
-#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
 #include <net/xdp.h>
 #include "ice.h"
 #include "ice_base.h"
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
index a656ee9a1fae..82e4effae704 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
@@ -2,7 +2,7 @@
 /* Copyright(c) 2018 Intel Corporation. */
 
 #include <linux/bpf_trace.h>
-#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
 #include <net/xdp.h>
 
 #include "ixgbe.h"
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index 761c8979bd41..3507d23f0eb8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -31,7 +31,7 @@
  */
 
 #include <linux/bpf_trace.h>
-#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
 #include "en/xdp.h"
 #include "en/params.h"
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
index cab0e93497ae..a8e11adbf426 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
@@ -5,7 +5,7 @@
 #define __MLX5_EN_XSK_RX_H__
 
 #include "en.h"
-#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
 
 /* RX data path */
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
index 79b487d89757..39fa0a705856 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.h
@@ -5,7 +5,7 @@
 #define __MLX5_EN_XSK_TX_H__
 
 #include "en.h"
-#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
 
 /* TX data path */
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c
index 4baaa5788320..5e49fdb564b3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 /* Copyright (c) 2019 Mellanox Technologies. */
 
-#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
 #include "umem.h"
 #include "setup.h"
 #include "en/params.h"
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 8f3f6f5b0dfe..6a986dcbc336 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -15,6 +15,7 @@
 
 struct net_device;
 struct xsk_queue;
+struct xdp_buff;
 
 /* Masks for xdp_umem_page flags.
  * The low 12-bits of the addr will be 0 since this is the page address, so we
@@ -101,27 +102,9 @@ struct xdp_sock {
 	spinlock_t map_list_lock;
 };
 
-struct xdp_buff;
 #ifdef CONFIG_XDP_SOCKETS
-int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
-/* Used from netdev driver */
-bool xsk_umem_has_addrs(struct xdp_umem *umem, u32 cnt);
-bool xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr);
-void xsk_umem_release_addr(struct xdp_umem *umem);
-void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries);
-bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc);
-void xsk_umem_consume_tx_done(struct xdp_umem *umem);
-struct xdp_umem_fq_reuse *xsk_reuseq_prepare(u32 nentries);
-struct xdp_umem_fq_reuse *xsk_reuseq_swap(struct xdp_umem *umem,
-					  struct xdp_umem_fq_reuse *newq);
-void xsk_reuseq_free(struct xdp_umem_fq_reuse *rq);
-struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev, u16 queue_id);
-void xsk_set_rx_need_wakeup(struct xdp_umem *umem);
-void xsk_set_tx_need_wakeup(struct xdp_umem *umem);
-void xsk_clear_rx_need_wakeup(struct xdp_umem *umem);
-void xsk_clear_tx_need_wakeup(struct xdp_umem *umem);
-bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem);
 
+int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp);
 int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp);
 void __xsk_map_flush(void);
 
@@ -153,131 +136,24 @@ static inline u64 xsk_umem_add_offset_to_addr(u64 addr)
 	return xsk_umem_extract_addr(addr) + xsk_umem_extract_offset(addr);
 }
 
-static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
-{
-	unsigned long page_addr;
-
-	addr = xsk_umem_add_offset_to_addr(addr);
-	page_addr = (unsigned long)umem->pages[addr >> PAGE_SHIFT].addr;
-
-	return (char *)(page_addr & PAGE_MASK) + (addr & ~PAGE_MASK);
-}
-
-static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr)
-{
-	addr = xsk_umem_add_offset_to_addr(addr);
-
-	return umem->pages[addr >> PAGE_SHIFT].dma + (addr & ~PAGE_MASK);
-}
-
-/* Reuse-queue aware version of FILL queue helpers */
-static inline bool xsk_umem_has_addrs_rq(struct xdp_umem *umem, u32 cnt)
-{
-	struct xdp_umem_fq_reuse *rq = umem->fq_reuse;
-
-	if (rq->length >= cnt)
-		return true;
-
-	return xsk_umem_has_addrs(umem, cnt - rq->length);
-}
-
-static inline bool xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr)
-{
-	struct xdp_umem_fq_reuse *rq = umem->fq_reuse;
-
-	if (!rq->length)
-		return xsk_umem_peek_addr(umem, addr);
-
-	*addr = rq->handles[rq->length - 1];
-	return addr;
-}
-
-static inline void xsk_umem_release_addr_rq(struct xdp_umem *umem)
-{
-	struct xdp_umem_fq_reuse *rq = umem->fq_reuse;
-
-	if (!rq->length)
-		xsk_umem_release_addr(umem);
-	else
-		rq->length--;
-}
-
-static inline void xsk_umem_fq_reuse(struct xdp_umem *umem, u64 addr)
-{
-	struct xdp_umem_fq_reuse *rq = umem->fq_reuse;
-
-	rq->handles[rq->length++] = addr;
-}
-
-/* Handle the offset appropriately depending on aligned or unaligned mode.
- * For unaligned mode, we store the offset in the upper 16-bits of the address.
- * For aligned mode, we simply add the offset to the address.
- */
-static inline u64 xsk_umem_adjust_offset(struct xdp_umem *umem, u64 address,
-					 u64 offset)
-{
-	if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG)
-		return address + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT);
-	else
-		return address + offset;
-}
-
-static inline u32 xsk_umem_xdp_frame_sz(struct xdp_umem *umem)
-{
-	return umem->chunk_size_nohr;
-}
-
 #else
+
 static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
 {
 	return -ENOTSUPP;
 }
 
-static inline bool xsk_umem_has_addrs(struct xdp_umem *umem, u32 cnt)
-{
-	return false;
-}
-
-static inline u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr)
-{
-	return NULL;
-}
-
-static inline void xsk_umem_release_addr(struct xdp_umem *umem)
-{
-}
-
-static inline void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries)
-{
-}
-
-static inline bool xsk_umem_consume_tx(struct xdp_umem *umem,
-				       struct xdp_desc *desc)
-{
-	return false;
-}
-
-static inline void xsk_umem_consume_tx_done(struct xdp_umem *umem)
-{
-}
-
-static inline struct xdp_umem_fq_reuse *xsk_reuseq_prepare(u32 nentries)
+static inline int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp)
 {
-	return NULL;
+	return -EOPNOTSUPP;
 }
 
-static inline struct xdp_umem_fq_reuse *xsk_reuseq_swap(
-	struct xdp_umem *umem,
-	struct xdp_umem_fq_reuse *newq)
-{
-	return NULL;
-}
-static inline void xsk_reuseq_free(struct xdp_umem_fq_reuse *rq)
+static inline void __xsk_map_flush(void)
 {
 }
 
-static inline struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev,
-						     u16 queue_id)
+static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
+						     u32 key)
 {
 	return NULL;
 }
@@ -297,80 +173,6 @@ static inline u64 xsk_umem_add_offset_to_addr(u64 addr)
 	return 0;
 }
 
-static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
-{
-	return NULL;
-}
-
-static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr)
-{
-	return 0;
-}
-
-static inline bool xsk_umem_has_addrs_rq(struct xdp_umem *umem, u32 cnt)
-{
-	return false;
-}
-
-static inline u64 *xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr)
-{
-	return NULL;
-}
-
-static inline void xsk_umem_release_addr_rq(struct xdp_umem *umem)
-{
-}
-
-static inline void xsk_umem_fq_reuse(struct xdp_umem *umem, u64 addr)
-{
-}
-
-static inline void xsk_set_rx_need_wakeup(struct xdp_umem *umem)
-{
-}
-
-static inline void xsk_set_tx_need_wakeup(struct xdp_umem *umem)
-{
-}
-
-static inline void xsk_clear_rx_need_wakeup(struct xdp_umem *umem)
-{
-}
-
-static inline void xsk_clear_tx_need_wakeup(struct xdp_umem *umem)
-{
-}
-
-static inline bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem)
-{
-	return false;
-}
-
-static inline u64 xsk_umem_adjust_offset(struct xdp_umem *umem, u64 handle,
-					 u64 offset)
-{
-	return 0;
-}
-
-static inline u32 xsk_umem_xdp_frame_sz(struct xdp_umem *umem)
-{
-	return 0;
-}
-
-static inline int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp)
-{
-	return -EOPNOTSUPP;
-}
-
-static inline void __xsk_map_flush(void)
-{
-}
-
-static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
-						     u32 key)
-{
-	return NULL;
-}
 #endif /* CONFIG_XDP_SOCKETS */
 
 #endif /* _LINUX_XDP_SOCK_H */
diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
new file mode 100644
index 000000000000..d67f2361937a
--- /dev/null
+++ b/include/net/xdp_sock_drv.h
@@ -0,0 +1,217 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Interface for implementing AF_XDP zero-copy support in drivers.
+ * Copyright(c) 2020 Intel Corporation.
+ */
+
+#ifndef _LINUX_XDP_SOCK_DRV_H
+#define _LINUX_XDP_SOCK_DRV_H
+
+#include <net/xdp_sock.h>
+
+#ifdef CONFIG_XDP_SOCKETS
+
+bool xsk_umem_has_addrs(struct xdp_umem *umem, u32 cnt);
+bool xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr);
+void xsk_umem_release_addr(struct xdp_umem *umem);
+void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries);
+bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc);
+void xsk_umem_consume_tx_done(struct xdp_umem *umem);
+struct xdp_umem_fq_reuse *xsk_reuseq_prepare(u32 nentries);
+struct xdp_umem_fq_reuse *xsk_reuseq_swap(struct xdp_umem *umem,
+					  struct xdp_umem_fq_reuse *newq);
+void xsk_reuseq_free(struct xdp_umem_fq_reuse *rq);
+struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev, u16 queue_id);
+void xsk_set_rx_need_wakeup(struct xdp_umem *umem);
+void xsk_set_tx_need_wakeup(struct xdp_umem *umem);
+void xsk_clear_rx_need_wakeup(struct xdp_umem *umem);
+void xsk_clear_tx_need_wakeup(struct xdp_umem *umem);
+bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem);
+
+static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
+{
+	unsigned long page_addr;
+
+	addr = xsk_umem_add_offset_to_addr(addr);
+	page_addr = (unsigned long)umem->pages[addr >> PAGE_SHIFT].addr;
+
+	return (char *)(page_addr & PAGE_MASK) + (addr & ~PAGE_MASK);
+}
+
+static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr)
+{
+	addr = xsk_umem_add_offset_to_addr(addr);
+
+	return umem->pages[addr >> PAGE_SHIFT].dma + (addr & ~PAGE_MASK);
+}
+
+/* Reuse-queue aware version of FILL queue helpers */
+static inline bool xsk_umem_has_addrs_rq(struct xdp_umem *umem, u32 cnt)
+{
+	struct xdp_umem_fq_reuse *rq = umem->fq_reuse;
+
+	if (rq->length >= cnt)
+		return true;
+
+	return xsk_umem_has_addrs(umem, cnt - rq->length);
+}
+
+static inline bool xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr)
+{
+	struct xdp_umem_fq_reuse *rq = umem->fq_reuse;
+
+	if (!rq->length)
+		return xsk_umem_peek_addr(umem, addr);
+
+	*addr = rq->handles[rq->length - 1];
+	return addr;
+}
+
+static inline void xsk_umem_release_addr_rq(struct xdp_umem *umem)
+{
+	struct xdp_umem_fq_reuse *rq = umem->fq_reuse;
+
+	if (!rq->length)
+		xsk_umem_release_addr(umem);
+	else
+		rq->length--;
+}
+
+static inline void xsk_umem_fq_reuse(struct xdp_umem *umem, u64 addr)
+{
+	struct xdp_umem_fq_reuse *rq = umem->fq_reuse;
+
+	rq->handles[rq->length++] = addr;
+}
+
+/* Handle the offset appropriately depending on aligned or unaligned mode.
+ * For unaligned mode, we store the offset in the upper 16-bits of the address.
+ * For aligned mode, we simply add the offset to the address.
+ */
+static inline u64 xsk_umem_adjust_offset(struct xdp_umem *umem, u64 address,
+					 u64 offset)
+{
+	if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG)
+		return address + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT);
+	else
+		return address + offset;
+}
+
+static inline u32 xsk_umem_xdp_frame_sz(struct xdp_umem *umem)
+{
+	return umem->chunk_size_nohr;
+}
+
+#else
+
+static inline bool xsk_umem_has_addrs(struct xdp_umem *umem, u32 cnt)
+{
+	return false;
+}
+
+static inline u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr)
+{
+	return NULL;
+}
+
+static inline void xsk_umem_release_addr(struct xdp_umem *umem)
+{
+}
+
+static inline void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries)
+{
+}
+
+static inline bool xsk_umem_consume_tx(struct xdp_umem *umem,
+				       struct xdp_desc *desc)
+{
+	return false;
+}
+
+static inline void xsk_umem_consume_tx_done(struct xdp_umem *umem)
+{
+}
+
+static inline struct xdp_umem_fq_reuse *xsk_reuseq_prepare(u32 nentries)
+{
+	return NULL;
+}
+
+static inline struct xdp_umem_fq_reuse *xsk_reuseq_swap(
+	struct xdp_umem *umem, struct xdp_umem_fq_reuse *newq)
+{
+	return NULL;
+}
+
+static inline void xsk_reuseq_free(struct xdp_umem_fq_reuse *rq)
+{
+}
+
+static inline struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev,
+						     u16 queue_id)
+{
+	return NULL;
+}
+
+static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
+{
+	return NULL;
+}
+
+static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr)
+{
+	return 0;
+}
+
+static inline bool xsk_umem_has_addrs_rq(struct xdp_umem *umem, u32 cnt)
+{
+	return false;
+}
+
+static inline u64 *xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr)
+{
+	return NULL;
+}
+
+static inline void xsk_umem_release_addr_rq(struct xdp_umem *umem)
+{
+}
+
+static inline void xsk_umem_fq_reuse(struct xdp_umem *umem, u64 addr)
+{
+}
+
+static inline void xsk_set_rx_need_wakeup(struct xdp_umem *umem)
+{
+}
+
+static inline void xsk_set_tx_need_wakeup(struct xdp_umem *umem)
+{
+}
+
+static inline void xsk_clear_rx_need_wakeup(struct xdp_umem *umem)
+{
+}
+
+static inline void xsk_clear_tx_need_wakeup(struct xdp_umem *umem)
+{
+}
+
+static inline bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem)
+{
+	return false;
+}
+
+static inline u64 xsk_umem_adjust_offset(struct xdp_umem *umem, u64 handle,
+					 u64 offset)
+{
+	return 0;
+}
+
+static inline u32 xsk_umem_xdp_frame_sz(struct xdp_umem *umem)
+{
+	return 0;
+}
+
+#endif /* CONFIG_XDP_SOCKETS */
+
+#endif /* _LINUX_XDP_SOCK_DRV_H */
diff --git a/net/ethtool/channels.c b/net/ethtool/channels.c
index 389924b65d05..658a8580b464 100644
--- a/net/ethtool/channels.c
+++ b/net/ethtool/channels.c
@@ -1,6 +1,6 @@
 // SPDX-License-Identifier: GPL-2.0-only
 
-#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
 
 #include "netlink.h"
 #include "common.h"
diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index 52102ab1709b..74892623bacd 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -24,7 +24,7 @@
 #include <linux/sched/signal.h>
 #include <linux/net.h>
 #include <net/devlink.h>
-#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
 #include <net/flow_offload.h>
 #include <linux/ethtool_netlink.h>
 #include <generated/utsrelease.h>
diff --git a/net/xdp/xdp_umem.h b/net/xdp/xdp_umem.h
index a63a9fb251f5..32067fe98f65 100644
--- a/net/xdp/xdp_umem.h
+++ b/net/xdp/xdp_umem.h
@@ -6,7 +6,7 @@
 #ifndef XDP_UMEM_H_
 #define XDP_UMEM_H_
 
-#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
 
 int xdp_umem_assign_dev(struct xdp_umem *umem, struct net_device *dev,
 			u16 queue_id, u16 flags);
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 45ffd67b367d..8bda654e82ec 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -22,7 +22,7 @@
 #include <linux/net.h>
 #include <linux/netdevice.h>
 #include <linux/rculist.h>
-#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
 #include <net/xdp.h>
 
 #include "xsk_queue.h"
diff --git a/net/xdp/xsk_queue.c b/net/xdp/xsk_queue.c
index 57fb81bd593c..554b1ebb4d02 100644
--- a/net/xdp/xsk_queue.c
+++ b/net/xdp/xsk_queue.c
@@ -6,6 +6,7 @@
 #include <linux/log2.h>
 #include <linux/slab.h>
 #include <linux/overflow.h>
+#include <net/xdp_sock_drv.h>
 
 #include "xsk_queue.h"
 
-- 
cgit v1.2.3-59-g8ed1b


From 89e4a376e3a3dab639a3947a6c7cf5d461d1aa4c Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn.topel@intel.com>
Date: Wed, 20 May 2020 21:20:52 +0200
Subject: xsk: Move defines only used by AF_XDP internals to xsk.h
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Move the XSK_NEXT_PG_CONTIG_{MASK,SHIFT}, and
XDP_UMEM_USES_NEED_WAKEUP defines from xdp_sock.h to the AF_XDP
internal xsk.h file. Also, start using the BIT{,_ULL} macro instead of
explicit shifts.

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200520192103.355233-5-bjorn.topel@gmail.com
---
 include/net/xdp_sock.h | 14 --------------
 net/xdp/xsk.h          | 14 ++++++++++++++
 net/xdp/xsk_queue.h    |  2 ++
 3 files changed, 16 insertions(+), 14 deletions(-)

diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 6a986dcbc336..fb7fe3060175 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -17,13 +17,6 @@ struct net_device;
 struct xsk_queue;
 struct xdp_buff;
 
-/* Masks for xdp_umem_page flags.
- * The low 12-bits of the addr will be 0 since this is the page address, so we
- * can use them for flags.
- */
-#define XSK_NEXT_PG_CONTIG_SHIFT 0
-#define XSK_NEXT_PG_CONTIG_MASK (1ULL << XSK_NEXT_PG_CONTIG_SHIFT)
-
 struct xdp_umem_page {
 	void *addr;
 	dma_addr_t dma;
@@ -35,13 +28,6 @@ struct xdp_umem_fq_reuse {
 	u64 handles[];
 };
 
-/* Flags for the umem flags field.
- *
- * The NEED_WAKEUP flag is 1 due to the reuse of the flags field for public
- * flags. See inlude/uapi/include/linux/if_xdp.h.
- */
-#define XDP_UMEM_USES_NEED_WAKEUP (1 << 1)
-
 struct xdp_umem {
 	struct xsk_queue *fq;
 	struct xsk_queue *cq;
diff --git a/net/xdp/xsk.h b/net/xdp/xsk.h
index d6a0979050e6..455ddd480f3d 100644
--- a/net/xdp/xsk.h
+++ b/net/xdp/xsk.h
@@ -4,6 +4,20 @@
 #ifndef XSK_H_
 #define XSK_H_
 
+/* Masks for xdp_umem_page flags.
+ * The low 12-bits of the addr will be 0 since this is the page address, so we
+ * can use them for flags.
+ */
+#define XSK_NEXT_PG_CONTIG_SHIFT 0
+#define XSK_NEXT_PG_CONTIG_MASK BIT_ULL(XSK_NEXT_PG_CONTIG_SHIFT)
+
+/* Flags for the umem flags field.
+ *
+ * The NEED_WAKEUP flag is 1 due to the reuse of the flags field for public
+ * flags. See inlude/uapi/include/linux/if_xdp.h.
+ */
+#define XDP_UMEM_USES_NEED_WAKEUP BIT(1)
+
 struct xdp_ring_offset_v1 {
 	__u64 producer;
 	__u64 consumer;
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 648733ec24ac..a322a7dac58c 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -10,6 +10,8 @@
 #include <linux/if_xdp.h>
 #include <net/xdp_sock.h>
 
+#include "xsk.h"
+
 struct xdp_ring {
 	u32 producer ____cacheline_aligned_in_smp;
 	u32 consumer ____cacheline_aligned_in_smp;
-- 
cgit v1.2.3-59-g8ed1b


From 2b43470add8c8ff1e1ee28dffc5c5df97e955d09 Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn.topel@intel.com>
Date: Wed, 20 May 2020 21:20:53 +0200
Subject: xsk: Introduce AF_XDP buffer allocation API
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In order to simplify AF_XDP zero-copy enablement for NIC driver
developers, a new AF_XDP buffer allocation API is added. The
implementation is based on a single core (single producer/consumer)
buffer pool for the AF_XDP UMEM.

A buffer is allocated using the xsk_buff_alloc() function, and
returned using xsk_buff_free(). If a buffer is disassociated with the
pool, e.g. when a buffer is passed to an AF_XDP socket, a buffer is
said to be released. Currently, the release function is only used by
the AF_XDP internals and not visible to the driver.

Drivers using this API should register the XDP memory model with the
new MEM_TYPE_XSK_BUFF_POOL type.

The API is defined in net/xdp_sock_drv.h.

The buffer type is struct xdp_buff, and follows the lifetime of
regular xdp_buffs, i.e.  the lifetime of an xdp_buff is restricted to
a NAPI context. In other words, the API is not replacing xdp_frames.

In addition to introducing the API and implementations, the AF_XDP
core is migrated to use the new APIs.

rfc->v1: Fixed build errors/warnings for m68k and riscv. (kbuild test
         robot)
         Added headroom/chunk size getter. (Maxim/Björn)

v1->v2: Swapped SoBs. (Maxim)

v2->v3: Initialize struct xdp_buff member frame_sz. (Björn)
        Add API to query the DMA address of a frame. (Maxim)
        Do DMA sync for CPU till the end of the frame to handle
        possible growth (frame_sz). (Maxim)

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200520192103.355233-6-bjorn.topel@gmail.com
---
 include/net/xdp.h           |   4 +-
 include/net/xdp_sock.h      |   2 +
 include/net/xdp_sock_drv.h  | 164 ++++++++++++++++
 include/net/xsk_buff_pool.h |  56 ++++++
 include/trace/events/xdp.h  |   3 +-
 net/core/xdp.c              |  14 +-
 net/xdp/Makefile            |   1 +
 net/xdp/xdp_umem.c          |  19 +-
 net/xdp/xsk.c               | 147 +++++---------
 net/xdp/xsk_buff_pool.c     | 467 ++++++++++++++++++++++++++++++++++++++++++++
 net/xdp/xsk_diag.c          |   2 +-
 net/xdp/xsk_queue.h         |  59 ++++--
 12 files changed, 819 insertions(+), 119 deletions(-)
 create mode 100644 include/net/xsk_buff_pool.h
 create mode 100644 net/xdp/xsk_buff_pool.c

diff --git a/include/net/xdp.h b/include/net/xdp.h
index 3094fccf5a88..f432134c7c00 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -40,6 +40,7 @@ enum xdp_mem_type {
 	MEM_TYPE_PAGE_ORDER0,     /* Orig XDP full page model */
 	MEM_TYPE_PAGE_POOL,
 	MEM_TYPE_ZERO_COPY,
+	MEM_TYPE_XSK_BUFF_POOL,
 	MEM_TYPE_MAX,
 };
 
@@ -119,7 +120,8 @@ struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
 	int metasize;
 	int headroom;
 
-	if (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY)
+	if (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY ||
+	    xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL)
 		return xdp_convert_zc_to_xdp_frame(xdp);
 
 	/* Assure headroom is available for storing info */
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index fb7fe3060175..6e7265f63c04 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -31,11 +31,13 @@ struct xdp_umem_fq_reuse {
 struct xdp_umem {
 	struct xsk_queue *fq;
 	struct xsk_queue *cq;
+	struct xsk_buff_pool *pool;
 	struct xdp_umem_page *pages;
 	u64 chunk_mask;
 	u64 size;
 	u32 headroom;
 	u32 chunk_size_nohr;
+	u32 chunk_size;
 	struct user_struct *user;
 	refcount_t users;
 	struct work_struct work;
diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
index d67f2361937a..7752c8663d1b 100644
--- a/include/net/xdp_sock_drv.h
+++ b/include/net/xdp_sock_drv.h
@@ -7,6 +7,7 @@
 #define _LINUX_XDP_SOCK_DRV_H
 
 #include <net/xdp_sock.h>
+#include <net/xsk_buff_pool.h>
 
 #ifdef CONFIG_XDP_SOCKETS
 
@@ -101,6 +102,94 @@ static inline u32 xsk_umem_xdp_frame_sz(struct xdp_umem *umem)
 	return umem->chunk_size_nohr;
 }
 
+static inline u32 xsk_umem_get_headroom(struct xdp_umem *umem)
+{
+	return XDP_PACKET_HEADROOM + umem->headroom;
+}
+
+static inline u32 xsk_umem_get_chunk_size(struct xdp_umem *umem)
+{
+	return umem->chunk_size;
+}
+
+static inline u32 xsk_umem_get_rx_frame_size(struct xdp_umem *umem)
+{
+	return xsk_umem_get_chunk_size(umem) - xsk_umem_get_headroom(umem);
+}
+
+static inline void xsk_buff_set_rxq_info(struct xdp_umem *umem,
+					 struct xdp_rxq_info *rxq)
+{
+	xp_set_rxq_info(umem->pool, rxq);
+}
+
+static inline void xsk_buff_dma_unmap(struct xdp_umem *umem,
+				      unsigned long attrs)
+{
+	xp_dma_unmap(umem->pool, attrs);
+}
+
+static inline int xsk_buff_dma_map(struct xdp_umem *umem, struct device *dev,
+				   unsigned long attrs)
+{
+	return xp_dma_map(umem->pool, dev, attrs, umem->pgs, umem->npgs);
+}
+
+static inline dma_addr_t xsk_buff_xdp_get_dma(struct xdp_buff *xdp)
+{
+	struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
+
+	return xp_get_dma(xskb);
+}
+
+static inline dma_addr_t xsk_buff_xdp_get_frame_dma(struct xdp_buff *xdp)
+{
+	struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
+
+	return xp_get_frame_dma(xskb);
+}
+
+static inline struct xdp_buff *xsk_buff_alloc(struct xdp_umem *umem)
+{
+	return xp_alloc(umem->pool);
+}
+
+static inline bool xsk_buff_can_alloc(struct xdp_umem *umem, u32 count)
+{
+	return xp_can_alloc(umem->pool, count);
+}
+
+static inline void xsk_buff_free(struct xdp_buff *xdp)
+{
+	struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
+
+	xp_free(xskb);
+}
+
+static inline dma_addr_t xsk_buff_raw_get_dma(struct xdp_umem *umem, u64 addr)
+{
+	return xp_raw_get_dma(umem->pool, addr);
+}
+
+static inline void *xsk_buff_raw_get_data(struct xdp_umem *umem, u64 addr)
+{
+	return xp_raw_get_data(umem->pool, addr);
+}
+
+static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp)
+{
+	struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
+
+	xp_dma_sync_for_cpu(xskb);
+}
+
+static inline void xsk_buff_raw_dma_sync_for_device(struct xdp_umem *umem,
+						    dma_addr_t dma,
+						    size_t size)
+{
+	xp_dma_sync_for_device(umem->pool, dma, size);
+}
+
 #else
 
 static inline bool xsk_umem_has_addrs(struct xdp_umem *umem, u32 cnt)
@@ -212,6 +301,81 @@ static inline u32 xsk_umem_xdp_frame_sz(struct xdp_umem *umem)
 	return 0;
 }
 
+static inline u32 xsk_umem_get_headroom(struct xdp_umem *umem)
+{
+	return 0;
+}
+
+static inline u32 xsk_umem_get_chunk_size(struct xdp_umem *umem)
+{
+	return 0;
+}
+
+static inline u32 xsk_umem_get_rx_frame_size(struct xdp_umem *umem)
+{
+	return 0;
+}
+
+static inline void xsk_buff_set_rxq_info(struct xdp_umem *umem,
+					 struct xdp_rxq_info *rxq)
+{
+}
+
+static inline void xsk_buff_dma_unmap(struct xdp_umem *umem,
+				      unsigned long attrs)
+{
+}
+
+static inline int xsk_buff_dma_map(struct xdp_umem *umem, struct device *dev,
+				   unsigned long attrs)
+{
+	return 0;
+}
+
+static inline dma_addr_t xsk_buff_xdp_get_dma(struct xdp_buff *xdp)
+{
+	return 0;
+}
+
+static inline dma_addr_t xsk_buff_xdp_get_frame_dma(struct xdp_buff *xdp)
+{
+	return 0;
+}
+
+static inline struct xdp_buff *xsk_buff_alloc(struct xdp_umem *umem)
+{
+	return NULL;
+}
+
+static inline bool xsk_buff_can_alloc(struct xdp_umem *umem, u32 count)
+{
+	return false;
+}
+
+static inline void xsk_buff_free(struct xdp_buff *xdp)
+{
+}
+
+static inline dma_addr_t xsk_buff_raw_get_dma(struct xdp_umem *umem, u64 addr)
+{
+	return 0;
+}
+
+static inline void *xsk_buff_raw_get_data(struct xdp_umem *umem, u64 addr)
+{
+	return NULL;
+}
+
+static inline void xsk_buff_dma_sync_for_cpu(struct xdp_buff *xdp)
+{
+}
+
+static inline void xsk_buff_raw_dma_sync_for_device(struct xdp_umem *umem,
+						    dma_addr_t dma,
+						    size_t size)
+{
+}
+
 #endif /* CONFIG_XDP_SOCKETS */
 
 #endif /* _LINUX_XDP_SOCK_DRV_H */
diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
new file mode 100644
index 000000000000..9f221b36e405
--- /dev/null
+++ b/include/net/xsk_buff_pool.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright(c) 2020 Intel Corporation. */
+
+#ifndef XSK_BUFF_POOL_H_
+#define XSK_BUFF_POOL_H_
+
+#include <linux/types.h>
+#include <linux/dma-mapping.h>
+#include <net/xdp.h>
+
+struct xsk_buff_pool;
+struct xdp_rxq_info;
+struct xsk_queue;
+struct xdp_desc;
+struct device;
+struct page;
+
+struct xdp_buff_xsk {
+	struct xdp_buff xdp;
+	dma_addr_t dma;
+	dma_addr_t frame_dma;
+	struct xsk_buff_pool *pool;
+	bool unaligned;
+	u64 orig_addr;
+	struct list_head free_list_node;
+};
+
+/* AF_XDP core. */
+struct xsk_buff_pool *xp_create(struct page **pages, u32 nr_pages, u32 chunks,
+				u32 chunk_size, u32 headroom, u64 size,
+				bool unaligned);
+void xp_set_fq(struct xsk_buff_pool *pool, struct xsk_queue *fq);
+void xp_destroy(struct xsk_buff_pool *pool);
+void xp_release(struct xdp_buff_xsk *xskb);
+u64 xp_get_handle(struct xdp_buff_xsk *xskb);
+bool xp_validate_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc);
+
+/* AF_XDP, and XDP core. */
+void xp_free(struct xdp_buff_xsk *xskb);
+
+/* AF_XDP ZC drivers, via xdp_sock_buff.h */
+void xp_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq);
+int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev,
+	       unsigned long attrs, struct page **pages, u32 nr_pages);
+void xp_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs);
+struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool);
+bool xp_can_alloc(struct xsk_buff_pool *pool, u32 count);
+void *xp_raw_get_data(struct xsk_buff_pool *pool, u64 addr);
+dma_addr_t xp_raw_get_dma(struct xsk_buff_pool *pool, u64 addr);
+dma_addr_t xp_get_dma(struct xdp_buff_xsk *xskb);
+dma_addr_t xp_get_frame_dma(struct xdp_buff_xsk *xskb);
+void xp_dma_sync_for_cpu(struct xdp_buff_xsk *xskb);
+void xp_dma_sync_for_device(struct xsk_buff_pool *pool, dma_addr_t dma,
+			    size_t size);
+
+#endif /* XSK_BUFF_POOL_H_ */
diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h
index b95d65e8c628..48547a12fa27 100644
--- a/include/trace/events/xdp.h
+++ b/include/trace/events/xdp.h
@@ -287,7 +287,8 @@ TRACE_EVENT(xdp_devmap_xmit,
 	FN(PAGE_SHARED)		\
 	FN(PAGE_ORDER0)		\
 	FN(PAGE_POOL)		\
-	FN(ZERO_COPY)
+	FN(ZERO_COPY)		\
+	FN(XSK_BUFF_POOL)
 
 #define __MEM_TYPE_TP_FN(x)	\
 	TRACE_DEFINE_ENUM(MEM_TYPE_##x);
diff --git a/net/core/xdp.c b/net/core/xdp.c
index 490b8f5fa8ee..f0ce8b195193 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -17,6 +17,7 @@
 #include <net/xdp.h>
 #include <net/xdp_priv.h> /* struct xdp_mem_allocator */
 #include <trace/events/xdp.h>
+#include <net/xdp_sock_drv.h>
 
 #define REG_STATE_NEW		0x0
 #define REG_STATE_REGISTERED	0x1
@@ -361,7 +362,7 @@ EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
  * of xdp_frames/pages in those cases.
  */
 static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
-			 unsigned long handle)
+			 unsigned long handle, struct xdp_buff *xdp)
 {
 	struct xdp_mem_allocator *xa;
 	struct page *page;
@@ -390,6 +391,11 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
 		xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
 		xa->zc_alloc->free(xa->zc_alloc, handle);
 		rcu_read_unlock();
+		break;
+	case MEM_TYPE_XSK_BUFF_POOL:
+		/* NB! Only valid from an xdp_buff! */
+		xsk_buff_free(xdp);
+		break;
 	default:
 		/* Not possible, checked in xdp_rxq_info_reg_mem_model() */
 		break;
@@ -398,19 +404,19 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
 
 void xdp_return_frame(struct xdp_frame *xdpf)
 {
-	__xdp_return(xdpf->data, &xdpf->mem, false, 0);
+	__xdp_return(xdpf->data, &xdpf->mem, false, 0, NULL);
 }
 EXPORT_SYMBOL_GPL(xdp_return_frame);
 
 void xdp_return_frame_rx_napi(struct xdp_frame *xdpf)
 {
-	__xdp_return(xdpf->data, &xdpf->mem, true, 0);
+	__xdp_return(xdpf->data, &xdpf->mem, true, 0, NULL);
 }
 EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi);
 
 void xdp_return_buff(struct xdp_buff *xdp)
 {
-	__xdp_return(xdp->data, &xdp->rxq->mem, true, xdp->handle);
+	__xdp_return(xdp->data, &xdp->rxq->mem, true, xdp->handle, xdp);
 }
 EXPORT_SYMBOL_GPL(xdp_return_buff);
 
diff --git a/net/xdp/Makefile b/net/xdp/Makefile
index 90b5460d6166..30cdc4315f42 100644
--- a/net/xdp/Makefile
+++ b/net/xdp/Makefile
@@ -1,3 +1,4 @@
 # SPDX-License-Identifier: GPL-2.0-only
 obj-$(CONFIG_XDP_SOCKETS) += xsk.o xdp_umem.o xsk_queue.o xskmap.o
+obj-$(CONFIG_XDP_SOCKETS) += xsk_buff_pool.o
 obj-$(CONFIG_XDP_SOCKETS_DIAG) += xsk_diag.o
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index 37ace3bc0d48..7f04688045d5 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -245,7 +245,7 @@ static void xdp_umem_release(struct xdp_umem *umem)
 	}
 
 	xsk_reuseq_destroy(umem);
-
+	xp_destroy(umem->pool);
 	xdp_umem_unmap_pages(umem);
 	xdp_umem_unpin_pages(umem);
 
@@ -390,6 +390,7 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
 	umem->size = size;
 	umem->headroom = headroom;
 	umem->chunk_size_nohr = chunk_size - headroom;
+	umem->chunk_size = chunk_size;
 	umem->npgs = size / PAGE_SIZE;
 	umem->pgs = NULL;
 	umem->user = NULL;
@@ -415,11 +416,21 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
 	}
 
 	err = xdp_umem_map_pages(umem);
-	if (!err)
-		return 0;
+	if (err)
+		goto out_pages;
 
-	kvfree(umem->pages);
+	umem->pool = xp_create(umem->pgs, umem->npgs, chunks, chunk_size,
+			       headroom, size, unaligned_chunks);
+	if (!umem->pool) {
+		err = -ENOMEM;
+		goto out_unmap;
+	}
+	return 0;
 
+out_unmap:
+	xdp_umem_unmap_pages(umem);
+out_pages:
+	kvfree(umem->pages);
 out_pin:
 	xdp_umem_unpin_pages(umem);
 out_account:
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 8bda654e82ec..6933f0d494ba 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -117,76 +117,67 @@ bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem)
 }
 EXPORT_SYMBOL(xsk_umem_uses_need_wakeup);
 
-/* If a buffer crosses a page boundary, we need to do 2 memcpy's, one for
- * each page. This is only required in copy mode.
- */
-static void __xsk_rcv_memcpy(struct xdp_umem *umem, u64 addr, void *from_buf,
-			     u32 len, u32 metalen)
+static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
 {
-	void *to_buf = xdp_umem_get_data(umem, addr);
-
-	addr = xsk_umem_add_offset_to_addr(addr);
-	if (xskq_cons_crosses_non_contig_pg(umem, addr, len + metalen)) {
-		void *next_pg_addr = umem->pages[(addr >> PAGE_SHIFT) + 1].addr;
-		u64 page_start = addr & ~(PAGE_SIZE - 1);
-		u64 first_len = PAGE_SIZE - (addr - page_start);
-
-		memcpy(to_buf, from_buf, first_len);
-		memcpy(next_pg_addr, from_buf + first_len,
-		       len + metalen - first_len);
+	struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
+	u64 addr;
+	int err;
 
-		return;
+	addr = xp_get_handle(xskb);
+	err = xskq_prod_reserve_desc(xs->rx, addr, len);
+	if (err) {
+		xs->rx_dropped++;
+		return err;
 	}
 
-	memcpy(to_buf, from_buf, len + metalen);
+	xp_release(xskb);
+	return 0;
 }
 
-static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
+static void xsk_copy_xdp(struct xdp_buff *to, struct xdp_buff *from, u32 len)
 {
-	u64 offset = xs->umem->headroom;
-	u64 addr, memcpy_addr;
-	void *from_buf;
+	void *from_buf, *to_buf;
 	u32 metalen;
-	int err;
-
-	if (!xskq_cons_peek_addr(xs->umem->fq, &addr, xs->umem) ||
-	    len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) {
-		xs->rx_dropped++;
-		return -ENOSPC;
-	}
 
-	if (unlikely(xdp_data_meta_unsupported(xdp))) {
-		from_buf = xdp->data;
+	if (unlikely(xdp_data_meta_unsupported(from))) {
+		from_buf = from->data;
+		to_buf = to->data;
 		metalen = 0;
 	} else {
-		from_buf = xdp->data_meta;
-		metalen = xdp->data - xdp->data_meta;
+		from_buf = from->data_meta;
+		metalen = from->data - from->data_meta;
+		to_buf = to->data - metalen;
 	}
 
-	memcpy_addr = xsk_umem_adjust_offset(xs->umem, addr, offset);
-	__xsk_rcv_memcpy(xs->umem, memcpy_addr, from_buf, len, metalen);
-
-	offset += metalen;
-	addr = xsk_umem_adjust_offset(xs->umem, addr, offset);
-	err = xskq_prod_reserve_desc(xs->rx, addr, len);
-	if (!err) {
-		xskq_cons_release(xs->umem->fq);
-		xdp_return_buff(xdp);
-		return 0;
-	}
-
-	xs->rx_dropped++;
-	return err;
+	memcpy(to_buf, from_buf, len + metalen);
 }
 
-static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
+static int __xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len,
+		     bool explicit_free)
 {
-	int err = xskq_prod_reserve_desc(xs->rx, xdp->handle, len);
+	struct xdp_buff *xsk_xdp;
+	int err;
 
-	if (err)
+	if (len > xsk_umem_get_rx_frame_size(xs->umem)) {
+		xs->rx_dropped++;
+		return -ENOSPC;
+	}
+
+	xsk_xdp = xsk_buff_alloc(xs->umem);
+	if (!xsk_xdp) {
 		xs->rx_dropped++;
+		return -ENOSPC;
+	}
 
-	return err;
+	xsk_copy_xdp(xsk_xdp, xdp, len);
+	err = __xsk_rcv_zc(xs, xsk_xdp, len);
+	if (err) {
+		xsk_buff_free(xsk_xdp);
+		return err;
+	}
+	if (explicit_free)
+		xdp_return_buff(xdp);
+	return 0;
 }
 
 static bool xsk_is_bound(struct xdp_sock *xs)
@@ -199,7 +190,8 @@ static bool xsk_is_bound(struct xdp_sock *xs)
 	return false;
 }
 
-static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
+static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp,
+		   bool explicit_free)
 {
 	u32 len;
 
@@ -211,8 +203,10 @@ static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
 
 	len = xdp->data_end - xdp->data;
 
-	return (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY) ?
-		__xsk_rcv_zc(xs, xdp, len) : __xsk_rcv(xs, xdp, len);
+	return xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY ||
+		xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL ?
+		__xsk_rcv_zc(xs, xdp, len) :
+		__xsk_rcv(xs, xdp, len, explicit_free);
 }
 
 static void xsk_flush(struct xdp_sock *xs)
@@ -224,46 +218,11 @@ static void xsk_flush(struct xdp_sock *xs)
 
 int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
 {
-	u32 metalen = xdp->data - xdp->data_meta;
-	u32 len = xdp->data_end - xdp->data;
-	u64 offset = xs->umem->headroom;
-	void *buffer;
-	u64 addr;
 	int err;
 
 	spin_lock_bh(&xs->rx_lock);
-
-	if (xs->dev != xdp->rxq->dev || xs->queue_id != xdp->rxq->queue_index) {
-		err = -EINVAL;
-		goto out_unlock;
-	}
-
-	if (!xskq_cons_peek_addr(xs->umem->fq, &addr, xs->umem) ||
-	    len > xs->umem->chunk_size_nohr - XDP_PACKET_HEADROOM) {
-		err = -ENOSPC;
-		goto out_drop;
-	}
-
-	addr = xsk_umem_adjust_offset(xs->umem, addr, offset);
-	buffer = xdp_umem_get_data(xs->umem, addr);
-	memcpy(buffer, xdp->data_meta, len + metalen);
-
-	addr = xsk_umem_adjust_offset(xs->umem, addr, metalen);
-	err = xskq_prod_reserve_desc(xs->rx, addr, len);
-	if (err)
-		goto out_drop;
-
-	xskq_cons_release(xs->umem->fq);
-	xskq_prod_submit(xs->rx);
-
-	spin_unlock_bh(&xs->rx_lock);
-
-	xs->sk.sk_data_ready(&xs->sk);
-	return 0;
-
-out_drop:
-	xs->rx_dropped++;
-out_unlock:
+	err = xsk_rcv(xs, xdp, false);
+	xsk_flush(xs);
 	spin_unlock_bh(&xs->rx_lock);
 	return err;
 }
@@ -273,7 +232,7 @@ int __xsk_map_redirect(struct xdp_sock *xs, struct xdp_buff *xdp)
 	struct list_head *flush_list = this_cpu_ptr(&xskmap_flush_list);
 	int err;
 
-	err = xsk_rcv(xs, xdp);
+	err = xsk_rcv(xs, xdp, true);
 	if (err)
 		return err;
 
@@ -404,7 +363,7 @@ static int xsk_generic_xmit(struct sock *sk)
 
 		skb_put(skb, len);
 		addr = desc.addr;
-		buffer = xdp_umem_get_data(xs->umem, addr);
+		buffer = xsk_buff_raw_get_data(xs->umem, addr);
 		err = skb_store_bits(skb, 0, buffer, len);
 		/* This is the backpressure mechanism for the Tx path.
 		 * Reserve space in the completion queue and only proceed
@@ -860,6 +819,8 @@ static int xsk_setsockopt(struct socket *sock, int level, int optname,
 		q = (optname == XDP_UMEM_FILL_RING) ? &xs->umem->fq :
 			&xs->umem->cq;
 		err = xsk_init_queue(entries, q, true);
+		if (optname == XDP_UMEM_FILL_RING)
+			xp_set_fq(xs->umem->pool, *q);
 		mutex_unlock(&xs->mutex);
 		return err;
 	}
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
new file mode 100644
index 000000000000..e214a5795a62
--- /dev/null
+++ b/net/xdp/xsk_buff_pool.c
@@ -0,0 +1,467 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <net/xsk_buff_pool.h>
+#include <net/xdp_sock.h>
+#include <linux/dma-direct.h>
+#include <linux/dma-noncoherent.h>
+#include <linux/swiotlb.h>
+
+#include "xsk_queue.h"
+
+struct xsk_buff_pool {
+	struct xsk_queue *fq;
+	struct list_head free_list;
+	dma_addr_t *dma_pages;
+	struct xdp_buff_xsk *heads;
+	u64 chunk_mask;
+	u64 addrs_cnt;
+	u32 free_list_cnt;
+	u32 dma_pages_cnt;
+	u32 heads_cnt;
+	u32 free_heads_cnt;
+	u32 headroom;
+	u32 chunk_size;
+	u32 frame_len;
+	bool cheap_dma;
+	bool unaligned;
+	void *addrs;
+	struct device *dev;
+	struct xdp_buff_xsk *free_heads[];
+};
+
+static void xp_addr_unmap(struct xsk_buff_pool *pool)
+{
+	vunmap(pool->addrs);
+}
+
+static int xp_addr_map(struct xsk_buff_pool *pool,
+		       struct page **pages, u32 nr_pages)
+{
+	pool->addrs = vmap(pages, nr_pages, VM_MAP, PAGE_KERNEL);
+	if (!pool->addrs)
+		return -ENOMEM;
+	return 0;
+}
+
+void xp_destroy(struct xsk_buff_pool *pool)
+{
+	if (!pool)
+		return;
+
+	xp_addr_unmap(pool);
+	kvfree(pool->heads);
+	kvfree(pool);
+}
+
+struct xsk_buff_pool *xp_create(struct page **pages, u32 nr_pages, u32 chunks,
+				u32 chunk_size, u32 headroom, u64 size,
+				bool unaligned)
+{
+	struct xsk_buff_pool *pool;
+	struct xdp_buff_xsk *xskb;
+	int err;
+	u32 i;
+
+	pool = kvzalloc(struct_size(pool, free_heads, chunks), GFP_KERNEL);
+	if (!pool)
+		goto out;
+
+	pool->heads = kvcalloc(chunks, sizeof(*pool->heads), GFP_KERNEL);
+	if (!pool->heads)
+		goto out;
+
+	pool->chunk_mask = ~((u64)chunk_size - 1);
+	pool->addrs_cnt = size;
+	pool->heads_cnt = chunks;
+	pool->free_heads_cnt = chunks;
+	pool->headroom = headroom;
+	pool->chunk_size = chunk_size;
+	pool->cheap_dma = true;
+	pool->unaligned = unaligned;
+	pool->frame_len = chunk_size - headroom - XDP_PACKET_HEADROOM;
+	INIT_LIST_HEAD(&pool->free_list);
+
+	for (i = 0; i < pool->free_heads_cnt; i++) {
+		xskb = &pool->heads[i];
+		xskb->pool = pool;
+		xskb->xdp.frame_sz = chunk_size - headroom;
+		pool->free_heads[i] = xskb;
+	}
+
+	err = xp_addr_map(pool, pages, nr_pages);
+	if (!err)
+		return pool;
+
+out:
+	xp_destroy(pool);
+	return NULL;
+}
+
+void xp_set_fq(struct xsk_buff_pool *pool, struct xsk_queue *fq)
+{
+	pool->fq = fq;
+}
+
+void xp_set_rxq_info(struct xsk_buff_pool *pool, struct xdp_rxq_info *rxq)
+{
+	u32 i;
+
+	for (i = 0; i < pool->heads_cnt; i++)
+		pool->heads[i].xdp.rxq = rxq;
+}
+EXPORT_SYMBOL(xp_set_rxq_info);
+
+void xp_dma_unmap(struct xsk_buff_pool *pool, unsigned long attrs)
+{
+	dma_addr_t *dma;
+	u32 i;
+
+	if (pool->dma_pages_cnt == 0)
+		return;
+
+	for (i = 0; i < pool->dma_pages_cnt; i++) {
+		dma = &pool->dma_pages[i];
+		if (*dma) {
+			dma_unmap_page_attrs(pool->dev, *dma, PAGE_SIZE,
+					     DMA_BIDIRECTIONAL, attrs);
+			*dma = 0;
+		}
+	}
+
+	kvfree(pool->dma_pages);
+	pool->dma_pages_cnt = 0;
+	pool->dev = NULL;
+}
+EXPORT_SYMBOL(xp_dma_unmap);
+
+static void xp_check_dma_contiguity(struct xsk_buff_pool *pool)
+{
+	u32 i;
+
+	for (i = 0; i < pool->dma_pages_cnt - 1; i++) {
+		if (pool->dma_pages[i] + PAGE_SIZE == pool->dma_pages[i + 1])
+			pool->dma_pages[i] |= XSK_NEXT_PG_CONTIG_MASK;
+		else
+			pool->dma_pages[i] &= ~XSK_NEXT_PG_CONTIG_MASK;
+	}
+}
+
+static bool __maybe_unused xp_check_swiotlb_dma(struct xsk_buff_pool *pool)
+{
+#if defined(CONFIG_SWIOTLB)
+	phys_addr_t paddr;
+	u32 i;
+
+	for (i = 0; i < pool->dma_pages_cnt; i++) {
+		paddr = dma_to_phys(pool->dev, pool->dma_pages[i]);
+		if (is_swiotlb_buffer(paddr))
+			return false;
+	}
+#endif
+	return true;
+}
+
+static bool xp_check_cheap_dma(struct xsk_buff_pool *pool)
+{
+#if defined(CONFIG_HAS_DMA)
+	const struct dma_map_ops *ops = get_dma_ops(pool->dev);
+
+	if (ops) {
+		return !ops->sync_single_for_cpu &&
+			!ops->sync_single_for_device;
+	}
+
+	if (!dma_is_direct(ops))
+		return false;
+
+	if (!xp_check_swiotlb_dma(pool))
+		return false;
+
+	if (!dev_is_dma_coherent(pool->dev)) {
+#if defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU) ||		\
+	defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_CPU_ALL) ||	\
+	defined(CONFIG_ARCH_HAS_SYNC_DMA_FOR_DEVICE)
+		return false;
+#endif
+	}
+#endif
+	return true;
+}
+
+int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev,
+	       unsigned long attrs, struct page **pages, u32 nr_pages)
+{
+	dma_addr_t dma;
+	u32 i;
+
+	pool->dma_pages = kvcalloc(nr_pages, sizeof(*pool->dma_pages),
+				   GFP_KERNEL);
+	if (!pool->dma_pages)
+		return -ENOMEM;
+
+	pool->dev = dev;
+	pool->dma_pages_cnt = nr_pages;
+
+	for (i = 0; i < pool->dma_pages_cnt; i++) {
+		dma = dma_map_page_attrs(dev, pages[i], 0, PAGE_SIZE,
+					 DMA_BIDIRECTIONAL, attrs);
+		if (dma_mapping_error(dev, dma)) {
+			xp_dma_unmap(pool, attrs);
+			return -ENOMEM;
+		}
+		pool->dma_pages[i] = dma;
+	}
+
+	if (pool->unaligned)
+		xp_check_dma_contiguity(pool);
+
+	pool->dev = dev;
+	pool->cheap_dma = xp_check_cheap_dma(pool);
+	return 0;
+}
+EXPORT_SYMBOL(xp_dma_map);
+
+static bool xp_desc_crosses_non_contig_pg(struct xsk_buff_pool *pool,
+					  u64 addr, u32 len)
+{
+	bool cross_pg = (addr & (PAGE_SIZE - 1)) + len > PAGE_SIZE;
+
+	if (pool->dma_pages_cnt && cross_pg) {
+		return !(pool->dma_pages[addr >> PAGE_SHIFT] &
+			 XSK_NEXT_PG_CONTIG_MASK);
+	}
+	return false;
+}
+
+static bool xp_addr_crosses_non_contig_pg(struct xsk_buff_pool *pool,
+					  u64 addr)
+{
+	return xp_desc_crosses_non_contig_pg(pool, addr, pool->chunk_size);
+}
+
+void xp_release(struct xdp_buff_xsk *xskb)
+{
+	xskb->pool->free_heads[xskb->pool->free_heads_cnt++] = xskb;
+}
+
+static u64 xp_aligned_extract_addr(struct xsk_buff_pool *pool, u64 addr)
+{
+	return addr & pool->chunk_mask;
+}
+
+static u64 xp_unaligned_extract_addr(u64 addr)
+{
+	return addr & XSK_UNALIGNED_BUF_ADDR_MASK;
+}
+
+static u64 xp_unaligned_extract_offset(u64 addr)
+{
+	return addr >> XSK_UNALIGNED_BUF_OFFSET_SHIFT;
+}
+
+static u64 xp_unaligned_add_offset_to_addr(u64 addr)
+{
+	return xp_unaligned_extract_addr(addr) +
+		xp_unaligned_extract_offset(addr);
+}
+
+static bool xp_check_unaligned(struct xsk_buff_pool *pool, u64 *addr)
+{
+	*addr = xp_unaligned_extract_addr(*addr);
+	if (*addr >= pool->addrs_cnt ||
+	    *addr + pool->chunk_size > pool->addrs_cnt ||
+	    xp_addr_crosses_non_contig_pg(pool, *addr))
+		return false;
+	return true;
+}
+
+static bool xp_check_aligned(struct xsk_buff_pool *pool, u64 *addr)
+{
+	*addr = xp_aligned_extract_addr(pool, *addr);
+	return *addr < pool->addrs_cnt;
+}
+
+static struct xdp_buff_xsk *__xp_alloc(struct xsk_buff_pool *pool)
+{
+	struct xdp_buff_xsk *xskb;
+	u64 addr;
+	bool ok;
+
+	if (pool->free_heads_cnt == 0)
+		return NULL;
+
+	xskb = pool->free_heads[--pool->free_heads_cnt];
+
+	for (;;) {
+		if (!xskq_cons_peek_addr_unchecked(pool->fq, &addr)) {
+			xp_release(xskb);
+			return NULL;
+		}
+
+		ok = pool->unaligned ? xp_check_unaligned(pool, &addr) :
+		     xp_check_aligned(pool, &addr);
+		if (!ok) {
+			pool->fq->invalid_descs++;
+			xskq_cons_release(pool->fq);
+			continue;
+		}
+		break;
+	}
+	xskq_cons_release(pool->fq);
+
+	xskb->orig_addr = addr;
+	xskb->xdp.data_hard_start = pool->addrs + addr + pool->headroom;
+	if (pool->dma_pages_cnt) {
+		xskb->frame_dma = (pool->dma_pages[addr >> PAGE_SHIFT] &
+				   ~XSK_NEXT_PG_CONTIG_MASK) +
+				  (addr & ~PAGE_MASK);
+		xskb->dma = xskb->frame_dma + pool->headroom +
+			    XDP_PACKET_HEADROOM;
+	}
+	return xskb;
+}
+
+struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool)
+{
+	struct xdp_buff_xsk *xskb;
+
+	if (!pool->free_list_cnt) {
+		xskb = __xp_alloc(pool);
+		if (!xskb)
+			return NULL;
+	} else {
+		pool->free_list_cnt--;
+		xskb = list_first_entry(&pool->free_list, struct xdp_buff_xsk,
+					free_list_node);
+		list_del(&xskb->free_list_node);
+	}
+
+	xskb->xdp.data = xskb->xdp.data_hard_start + XDP_PACKET_HEADROOM;
+	xskb->xdp.data_meta = xskb->xdp.data;
+
+	if (!pool->cheap_dma) {
+		dma_sync_single_range_for_device(pool->dev, xskb->dma, 0,
+						 pool->frame_len,
+						 DMA_BIDIRECTIONAL);
+	}
+	return &xskb->xdp;
+}
+EXPORT_SYMBOL(xp_alloc);
+
+bool xp_can_alloc(struct xsk_buff_pool *pool, u32 count)
+{
+	if (pool->free_list_cnt >= count)
+		return true;
+	return xskq_cons_has_entries(pool->fq, count - pool->free_list_cnt);
+}
+EXPORT_SYMBOL(xp_can_alloc);
+
+void xp_free(struct xdp_buff_xsk *xskb)
+{
+	xskb->pool->free_list_cnt++;
+	list_add(&xskb->free_list_node, &xskb->pool->free_list);
+}
+EXPORT_SYMBOL(xp_free);
+
+static bool xp_aligned_validate_desc(struct xsk_buff_pool *pool,
+				     struct xdp_desc *desc)
+{
+	u64 chunk, chunk_end;
+
+	chunk = xp_aligned_extract_addr(pool, desc->addr);
+	chunk_end = xp_aligned_extract_addr(pool, desc->addr + desc->len);
+	if (chunk != chunk_end)
+		return false;
+
+	if (chunk >= pool->addrs_cnt)
+		return false;
+
+	if (desc->options)
+		return false;
+	return true;
+}
+
+static bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool,
+				       struct xdp_desc *desc)
+{
+	u64 addr, base_addr;
+
+	base_addr = xp_unaligned_extract_addr(desc->addr);
+	addr = xp_unaligned_add_offset_to_addr(desc->addr);
+
+	if (desc->len > pool->chunk_size)
+		return false;
+
+	if (base_addr >= pool->addrs_cnt || addr >= pool->addrs_cnt ||
+	    xp_desc_crosses_non_contig_pg(pool, addr, desc->len))
+		return false;
+
+	if (desc->options)
+		return false;
+	return true;
+}
+
+bool xp_validate_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc)
+{
+	return pool->unaligned ? xp_unaligned_validate_desc(pool, desc) :
+		xp_aligned_validate_desc(pool, desc);
+}
+
+u64 xp_get_handle(struct xdp_buff_xsk *xskb)
+{
+	u64 offset = xskb->xdp.data - xskb->xdp.data_hard_start;
+
+	offset += xskb->pool->headroom;
+	if (!xskb->pool->unaligned)
+		return xskb->orig_addr + offset;
+	return xskb->orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT);
+}
+
+void *xp_raw_get_data(struct xsk_buff_pool *pool, u64 addr)
+{
+	addr = pool->unaligned ? xp_unaligned_add_offset_to_addr(addr) : addr;
+	return pool->addrs + addr;
+}
+EXPORT_SYMBOL(xp_raw_get_data);
+
+dma_addr_t xp_raw_get_dma(struct xsk_buff_pool *pool, u64 addr)
+{
+	addr = pool->unaligned ? xp_unaligned_add_offset_to_addr(addr) : addr;
+	return (pool->dma_pages[addr >> PAGE_SHIFT] &
+		~XSK_NEXT_PG_CONTIG_MASK) +
+		(addr & ~PAGE_MASK);
+}
+EXPORT_SYMBOL(xp_raw_get_dma);
+
+dma_addr_t xp_get_dma(struct xdp_buff_xsk *xskb)
+{
+	return xskb->dma;
+}
+EXPORT_SYMBOL(xp_get_dma);
+
+dma_addr_t xp_get_frame_dma(struct xdp_buff_xsk *xskb)
+{
+	return xskb->frame_dma;
+}
+EXPORT_SYMBOL(xp_get_frame_dma);
+
+void xp_dma_sync_for_cpu(struct xdp_buff_xsk *xskb)
+{
+	if (xskb->pool->cheap_dma)
+		return;
+
+	dma_sync_single_range_for_cpu(xskb->pool->dev, xskb->dma, 0,
+				      xskb->pool->frame_len, DMA_BIDIRECTIONAL);
+}
+EXPORT_SYMBOL(xp_dma_sync_for_cpu);
+
+void xp_dma_sync_for_device(struct xsk_buff_pool *pool, dma_addr_t dma,
+			    size_t size)
+{
+	if (pool->cheap_dma)
+		return;
+
+	dma_sync_single_range_for_device(pool->dev, dma, 0,
+					 size, DMA_BIDIRECTIONAL);
+}
+EXPORT_SYMBOL(xp_dma_sync_for_device);
diff --git a/net/xdp/xsk_diag.c b/net/xdp/xsk_diag.c
index f59791ba43a0..0163b26aaf63 100644
--- a/net/xdp/xsk_diag.c
+++ b/net/xdp/xsk_diag.c
@@ -56,7 +56,7 @@ static int xsk_diag_put_umem(const struct xdp_sock *xs, struct sk_buff *nlskb)
 	du.id = umem->id;
 	du.size = umem->size;
 	du.num_pages = umem->npgs;
-	du.chunk_size = umem->chunk_size_nohr + umem->headroom;
+	du.chunk_size = umem->chunk_size;
 	du.headroom = umem->headroom;
 	du.ifindex = umem->dev ? umem->dev->ifindex : 0;
 	du.queue_id = umem->queue_id;
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index a322a7dac58c..9151aef7dbca 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -9,6 +9,7 @@
 #include <linux/types.h>
 #include <linux/if_xdp.h>
 #include <net/xdp_sock.h>
+#include <net/xsk_buff_pool.h>
 
 #include "xsk.h"
 
@@ -172,31 +173,45 @@ out:
 	return false;
 }
 
-static inline bool xskq_cons_is_valid_desc(struct xsk_queue *q,
-					   struct xdp_desc *d,
-					   struct xdp_umem *umem)
+static inline bool xskq_cons_read_addr_aligned(struct xsk_queue *q, u64 *addr)
 {
-	if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG) {
-		if (!xskq_cons_is_valid_unaligned(q, d->addr, d->len, umem))
-			return false;
+	struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
 
-		if (d->len > umem->chunk_size_nohr || d->options) {
-			q->invalid_descs++;
-			return false;
-		}
+	while (q->cached_cons != q->cached_prod) {
+		u32 idx = q->cached_cons & q->ring_mask;
+
+		*addr = ring->desc[idx];
+		if (xskq_cons_is_valid_addr(q, *addr))
+			return true;
 
+		q->cached_cons++;
+	}
+
+	return false;
+}
+
+static inline bool xskq_cons_read_addr_unchecked(struct xsk_queue *q, u64 *addr)
+{
+	struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
+
+	if (q->cached_cons != q->cached_prod) {
+		u32 idx = q->cached_cons & q->ring_mask;
+
+		*addr = ring->desc[idx];
 		return true;
 	}
 
-	if (!xskq_cons_is_valid_addr(q, d->addr))
-		return false;
+	return false;
+}
 
-	if (((d->addr + d->len) & q->chunk_mask) != (d->addr & q->chunk_mask) ||
-	    d->options) {
+static inline bool xskq_cons_is_valid_desc(struct xsk_queue *q,
+					   struct xdp_desc *d,
+					   struct xdp_umem *umem)
+{
+	if (!xp_validate_desc(umem->pool, d)) {
 		q->invalid_descs++;
 		return false;
 	}
-
 	return true;
 }
 
@@ -260,6 +275,20 @@ static inline bool xskq_cons_peek_addr(struct xsk_queue *q, u64 *addr,
 	return xskq_cons_read_addr(q, addr, umem);
 }
 
+static inline bool xskq_cons_peek_addr_aligned(struct xsk_queue *q, u64 *addr)
+{
+	if (q->cached_prod == q->cached_cons)
+		xskq_cons_get_entries(q);
+	return xskq_cons_read_addr_aligned(q, addr);
+}
+
+static inline bool xskq_cons_peek_addr_unchecked(struct xsk_queue *q, u64 *addr)
+{
+	if (q->cached_prod == q->cached_cons)
+		xskq_cons_get_entries(q);
+	return xskq_cons_read_addr_unchecked(q, addr);
+}
+
 static inline bool xskq_cons_peek_desc(struct xsk_queue *q,
 				       struct xdp_desc *desc,
 				       struct xdp_umem *umem)
-- 
cgit v1.2.3-59-g8ed1b


From e1675f97367bed74d4dcfe08de9ce9b5d6b288c1 Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn.topel@intel.com>
Date: Wed, 20 May 2020 21:20:54 +0200
Subject: i40e: Refactor rx_bi accesses
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

As a first step to migrate i40e to the new MEM_TYPE_XSK_BUFF_POOL
APIs, code that accesses the rx_bi (SW/shadow ring) is refactored to
use an accessor function.

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Cc: intel-wired-lan@lists.osuosl.org
Link: https://lore.kernel.org/bpf/20200520192103.355233-7-bjorn.topel@gmail.com
---
 drivers/net/ethernet/intel/i40e/i40e_txrx.c | 17 +++++++++++------
 drivers/net/ethernet/intel/i40e/i40e_xsk.c  | 18 ++++++++++++------
 2 files changed, 23 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index a3772beffe02..9b9ef951f9ce 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -1195,6 +1195,11 @@ clear_counts:
 	rc->total_packets = 0;
 }
 
+static struct i40e_rx_buffer *i40e_rx_bi(struct i40e_ring *rx_ring, u32 idx)
+{
+	return &rx_ring->rx_bi[idx];
+}
+
 /**
  * i40e_reuse_rx_page - page flip buffer and store it back on the ring
  * @rx_ring: rx descriptor ring to store buffers on
@@ -1208,7 +1213,7 @@ static void i40e_reuse_rx_page(struct i40e_ring *rx_ring,
 	struct i40e_rx_buffer *new_buff;
 	u16 nta = rx_ring->next_to_alloc;
 
-	new_buff = &rx_ring->rx_bi[nta];
+	new_buff = i40e_rx_bi(rx_ring, nta);
 
 	/* update, and store next to alloc */
 	nta++;
@@ -1272,7 +1277,7 @@ struct i40e_rx_buffer *i40e_clean_programming_status(
 	ntc = rx_ring->next_to_clean;
 
 	/* fetch, update, and store next to clean */
-	rx_buffer = &rx_ring->rx_bi[ntc++];
+	rx_buffer = i40e_rx_bi(rx_ring, ntc++);
 	ntc = (ntc < rx_ring->count) ? ntc : 0;
 	rx_ring->next_to_clean = ntc;
 
@@ -1361,7 +1366,7 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
 
 	/* Free all the Rx ring sk_buffs */
 	for (i = 0; i < rx_ring->count; i++) {
-		struct i40e_rx_buffer *rx_bi = &rx_ring->rx_bi[i];
+		struct i40e_rx_buffer *rx_bi = i40e_rx_bi(rx_ring, i);
 
 		if (!rx_bi->page)
 			continue;
@@ -1592,7 +1597,7 @@ bool i40e_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count)
 		return false;
 
 	rx_desc = I40E_RX_DESC(rx_ring, ntu);
-	bi = &rx_ring->rx_bi[ntu];
+	bi = i40e_rx_bi(rx_ring, ntu);
 
 	do {
 		if (!i40e_alloc_mapped_page(rx_ring, bi))
@@ -1614,7 +1619,7 @@ bool i40e_alloc_rx_buffers(struct i40e_ring *rx_ring, u16 cleaned_count)
 		ntu++;
 		if (unlikely(ntu == rx_ring->count)) {
 			rx_desc = I40E_RX_DESC(rx_ring, 0);
-			bi = rx_ring->rx_bi;
+			bi = i40e_rx_bi(rx_ring, 0);
 			ntu = 0;
 		}
 
@@ -1981,7 +1986,7 @@ static struct i40e_rx_buffer *i40e_get_rx_buffer(struct i40e_ring *rx_ring,
 {
 	struct i40e_rx_buffer *rx_buffer;
 
-	rx_buffer = &rx_ring->rx_bi[rx_ring->next_to_clean];
+	rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
 	prefetchw(rx_buffer->page);
 
 	/* we are reusing so sync this buffer for CPU use */
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index d8b0be29099a..d84ec92f8538 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -9,6 +9,11 @@
 #include "i40e_txrx_common.h"
 #include "i40e_xsk.h"
 
+static struct i40e_rx_buffer *i40e_rx_bi(struct i40e_ring *rx_ring, u32 idx)
+{
+	return &rx_ring->rx_bi[idx];
+}
+
 /**
  * i40e_xsk_umem_dma_map - DMA maps all UMEM memory for the netdev
  * @vsi: Current VSI
@@ -321,7 +326,7 @@ __i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 count,
 	bool ok = true;
 
 	rx_desc = I40E_RX_DESC(rx_ring, ntu);
-	bi = &rx_ring->rx_bi[ntu];
+	bi = i40e_rx_bi(rx_ring, ntu);
 	do {
 		if (!alloc(rx_ring, bi)) {
 			ok = false;
@@ -340,7 +345,7 @@ __i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 count,
 
 		if (unlikely(ntu == rx_ring->count)) {
 			rx_desc = I40E_RX_DESC(rx_ring, 0);
-			bi = rx_ring->rx_bi;
+			bi = i40e_rx_bi(rx_ring, 0);
 			ntu = 0;
 		}
 
@@ -402,7 +407,7 @@ static struct i40e_rx_buffer *i40e_get_rx_buffer_zc(struct i40e_ring *rx_ring,
 {
 	struct i40e_rx_buffer *bi;
 
-	bi = &rx_ring->rx_bi[rx_ring->next_to_clean];
+	bi = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
 
 	/* we are reusing so sync this buffer for CPU use */
 	dma_sync_single_range_for_cpu(rx_ring->dev,
@@ -424,7 +429,8 @@ static struct i40e_rx_buffer *i40e_get_rx_buffer_zc(struct i40e_ring *rx_ring,
 static void i40e_reuse_rx_buffer_zc(struct i40e_ring *rx_ring,
 				    struct i40e_rx_buffer *old_bi)
 {
-	struct i40e_rx_buffer *new_bi = &rx_ring->rx_bi[rx_ring->next_to_alloc];
+	struct i40e_rx_buffer *new_bi = i40e_rx_bi(rx_ring,
+						   rx_ring->next_to_alloc);
 	u16 nta = rx_ring->next_to_alloc;
 
 	/* update, and store next to alloc */
@@ -456,7 +462,7 @@ void i40e_zca_free(struct zero_copy_allocator *alloc, unsigned long handle)
 	mask = rx_ring->xsk_umem->chunk_mask;
 
 	nta = rx_ring->next_to_alloc;
-	bi = &rx_ring->rx_bi[nta];
+	bi = i40e_rx_bi(rx_ring, nta);
 
 	nta++;
 	rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
@@ -826,7 +832,7 @@ void i40e_xsk_clean_rx_ring(struct i40e_ring *rx_ring)
 	u16 i;
 
 	for (i = 0; i < rx_ring->count; i++) {
-		struct i40e_rx_buffer *rx_bi = &rx_ring->rx_bi[i];
+		struct i40e_rx_buffer *rx_bi = i40e_rx_bi(rx_ring, i);
 
 		if (!rx_bi->addr)
 			continue;
-- 
cgit v1.2.3-59-g8ed1b


From be1222b585fdc410b8c1dbcc57dd03a00f04eff5 Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn.topel@intel.com>
Date: Wed, 20 May 2020 21:20:55 +0200
Subject: i40e: Separate kernel allocated rx_bi rings from AF_XDP rings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Continuing the path to support MEM_TYPE_XSK_BUFF_POOL, the AF_XDP
zero-copy/sk_buff rx_bi rings are now separate. Functions to properly
allocate the different rings are added as well.

v3->v4: Made i40e_fd_handle_status() static. (kbuild test robot)
v4->v5: Fix kdoc for i40e_clean_programming_status(). (Jakub)

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Cc: intel-wired-lan@lists.osuosl.org
Link: https://lore.kernel.org/bpf/20200520192103.355233-8-bjorn.topel@gmail.com
---
 drivers/net/ethernet/intel/i40e/i40e_main.c        |   7 ++
 drivers/net/ethernet/intel/i40e/i40e_txrx.c        | 119 ++++++++-------------
 drivers/net/ethernet/intel/i40e/i40e_txrx.h        |  22 ++--
 drivers/net/ethernet/intel/i40e/i40e_txrx_common.h |  40 +++++--
 drivers/net/ethernet/intel/i40e/i40e_type.h        |   5 +-
 drivers/net/ethernet/intel/i40e/i40e_xsk.c         |  74 +++++++------
 drivers/net/ethernet/intel/i40e/i40e_xsk.h         |   2 +
 7 files changed, 142 insertions(+), 127 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index d6b2db4f2c65..3e1695bb8262 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -3260,8 +3260,12 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
 	if (ring->vsi->type == I40E_VSI_MAIN)
 		xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
 
+	kfree(ring->rx_bi);
 	ring->xsk_umem = i40e_xsk_umem(ring);
 	if (ring->xsk_umem) {
+		ret = i40e_alloc_rx_bi_zc(ring);
+		if (ret)
+			return ret;
 		ring->rx_buf_len = ring->xsk_umem->chunk_size_nohr -
 				   XDP_PACKET_HEADROOM;
 		/* For AF_XDP ZC, we disallow packets to span on
@@ -3280,6 +3284,9 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
 			 ring->queue_index);
 
 	} else {
+		ret = i40e_alloc_rx_bi(ring);
+		if (ret)
+			return ret;
 		ring->rx_buf_len = vsi->rx_buf_len;
 		if (ring->vsi->type == I40E_VSI_MAIN) {
 			ret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index 9b9ef951f9ce..f613782f2f56 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -521,28 +521,29 @@ int i40e_add_del_fdir(struct i40e_vsi *vsi,
 /**
  * i40e_fd_handle_status - check the Programming Status for FD
  * @rx_ring: the Rx ring for this descriptor
- * @rx_desc: the Rx descriptor for programming Status, not a packet descriptor.
+ * @qword0_raw: qword0
+ * @qword1: qword1 after le_to_cpu
  * @prog_id: the id originally used for programming
  *
  * This is used to verify if the FD programming or invalidation
  * requested by SW to the HW is successful or not and take actions accordingly.
  **/
-void i40e_fd_handle_status(struct i40e_ring *rx_ring,
-			   union i40e_rx_desc *rx_desc, u8 prog_id)
+static void i40e_fd_handle_status(struct i40e_ring *rx_ring, u64 qword0_raw,
+				  u64 qword1, u8 prog_id)
 {
 	struct i40e_pf *pf = rx_ring->vsi->back;
 	struct pci_dev *pdev = pf->pdev;
+	struct i40e_32b_rx_wb_qw0 *qw0;
 	u32 fcnt_prog, fcnt_avail;
 	u32 error;
-	u64 qw;
 
-	qw = le64_to_cpu(rx_desc->wb.qword1.status_error_len);
-	error = (qw & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >>
+	qw0 = (struct i40e_32b_rx_wb_qw0 *)&qword0_raw;
+	error = (qword1 & I40E_RX_PROG_STATUS_DESC_QW1_ERROR_MASK) >>
 		I40E_RX_PROG_STATUS_DESC_QW1_ERROR_SHIFT;
 
 	if (error == BIT(I40E_RX_PROG_STATUS_DESC_FD_TBL_FULL_SHIFT)) {
-		pf->fd_inv = le32_to_cpu(rx_desc->wb.qword0.hi_dword.fd_id);
-		if ((rx_desc->wb.qword0.hi_dword.fd_id != 0) ||
+		pf->fd_inv = le32_to_cpu(qw0->hi_dword.fd_id);
+		if (qw0->hi_dword.fd_id != 0 ||
 		    (I40E_DEBUG_FD & pf->hw.debug_mask))
 			dev_warn(&pdev->dev, "ntuple filter loc = %d, could not be added\n",
 				 pf->fd_inv);
@@ -560,7 +561,7 @@ void i40e_fd_handle_status(struct i40e_ring *rx_ring,
 		/* store the current atr filter count */
 		pf->fd_atr_cnt = i40e_get_current_atr_cnt(pf);
 
-		if ((rx_desc->wb.qword0.hi_dword.fd_id == 0) &&
+		if (qw0->hi_dword.fd_id == 0 &&
 		    test_bit(__I40E_FD_SB_AUTO_DISABLED, pf->state)) {
 			/* These set_bit() calls aren't atomic with the
 			 * test_bit() here, but worse case we potentially
@@ -589,7 +590,7 @@ void i40e_fd_handle_status(struct i40e_ring *rx_ring,
 	} else if (error == BIT(I40E_RX_PROG_STATUS_DESC_NO_FD_ENTRY_SHIFT)) {
 		if (I40E_DEBUG_FD & pf->hw.debug_mask)
 			dev_info(&pdev->dev, "ntuple filter fd_id = %d, could not be removed\n",
-				 rx_desc->wb.qword0.hi_dword.fd_id);
+				 qw0->hi_dword.fd_id);
 	}
 }
 
@@ -1232,29 +1233,10 @@ static void i40e_reuse_rx_page(struct i40e_ring *rx_ring,
 }
 
 /**
- * i40e_rx_is_programming_status - check for programming status descriptor
- * @qw: qword representing status_error_len in CPU ordering
- *
- * The value of in the descriptor length field indicate if this
- * is a programming status descriptor for flow director or FCoE
- * by the value of I40E_RX_PROG_STATUS_DESC_LENGTH, otherwise
- * it is a packet descriptor.
- **/
-static inline bool i40e_rx_is_programming_status(u64 qw)
-{
-	/* The Rx filter programming status and SPH bit occupy the same
-	 * spot in the descriptor. Since we don't support packet split we
-	 * can just reuse the bit as an indication that this is a
-	 * programming status descriptor.
-	 */
-	return qw & I40E_RXD_QW1_LENGTH_SPH_MASK;
-}
-
-/**
- * i40e_clean_programming_status - try clean the programming status descriptor
+ * i40e_clean_programming_status - clean the programming status descriptor
  * @rx_ring: the rx ring that has this descriptor
- * @rx_desc: the rx descriptor written back by HW
- * @qw: qword representing status_error_len in CPU ordering
+ * @qword0_raw: qword0
+ * @qword1: qword1 representing status_error_len in CPU ordering
  *
  * Flow director should handle FD_FILTER_STATUS to check its filter programming
  * status being successful or not and take actions accordingly. FCoE should
@@ -1262,34 +1244,16 @@ static inline bool i40e_rx_is_programming_status(u64 qw)
  *
  * Returns an i40e_rx_buffer to reuse if the cleanup occurred, otherwise NULL.
  **/
-struct i40e_rx_buffer *i40e_clean_programming_status(
-	struct i40e_ring *rx_ring,
-	union i40e_rx_desc *rx_desc,
-	u64 qw)
+void i40e_clean_programming_status(struct i40e_ring *rx_ring, u64 qword0_raw,
+				   u64 qword1)
 {
-	struct i40e_rx_buffer *rx_buffer;
-	u32 ntc;
 	u8 id;
 
-	if (!i40e_rx_is_programming_status(qw))
-		return NULL;
-
-	ntc = rx_ring->next_to_clean;
-
-	/* fetch, update, and store next to clean */
-	rx_buffer = i40e_rx_bi(rx_ring, ntc++);
-	ntc = (ntc < rx_ring->count) ? ntc : 0;
-	rx_ring->next_to_clean = ntc;
-
-	prefetch(I40E_RX_DESC(rx_ring, ntc));
-
-	id = (qw & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >>
+	id = (qword1 & I40E_RX_PROG_STATUS_DESC_QW1_PROGID_MASK) >>
 		  I40E_RX_PROG_STATUS_DESC_QW1_PROGID_SHIFT;
 
 	if (id == I40E_RX_PROG_STATUS_DESC_FD_FILTER_STATUS)
-		i40e_fd_handle_status(rx_ring, rx_desc, id);
-
-	return rx_buffer;
+		i40e_fd_handle_status(rx_ring, qword0_raw, qword1, id);
 }
 
 /**
@@ -1341,13 +1305,25 @@ err:
 	return -ENOMEM;
 }
 
+int i40e_alloc_rx_bi(struct i40e_ring *rx_ring)
+{
+	unsigned long sz = sizeof(*rx_ring->rx_bi) * rx_ring->count;
+
+	rx_ring->rx_bi = kzalloc(sz, GFP_KERNEL);
+	return rx_ring->rx_bi ? 0 : -ENOMEM;
+}
+
+static void i40e_clear_rx_bi(struct i40e_ring *rx_ring)
+{
+	memset(rx_ring->rx_bi, 0, sizeof(*rx_ring->rx_bi) * rx_ring->count);
+}
+
 /**
  * i40e_clean_rx_ring - Free Rx buffers
  * @rx_ring: ring to be cleaned
  **/
 void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
 {
-	unsigned long bi_size;
 	u16 i;
 
 	/* ring already cleared, nothing to do */
@@ -1393,8 +1369,10 @@ void i40e_clean_rx_ring(struct i40e_ring *rx_ring)
 	}
 
 skip_free:
-	bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
-	memset(rx_ring->rx_bi, 0, bi_size);
+	if (rx_ring->xsk_umem)
+		i40e_clear_rx_bi_zc(rx_ring);
+	else
+		i40e_clear_rx_bi(rx_ring);
 
 	/* Zero out the descriptor ring */
 	memset(rx_ring->desc, 0, rx_ring->size);
@@ -1435,15 +1413,7 @@ void i40e_free_rx_resources(struct i40e_ring *rx_ring)
 int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
 {
 	struct device *dev = rx_ring->dev;
-	int err = -ENOMEM;
-	int bi_size;
-
-	/* warn if we are about to overwrite the pointer */
-	WARN_ON(rx_ring->rx_bi);
-	bi_size = sizeof(struct i40e_rx_buffer) * rx_ring->count;
-	rx_ring->rx_bi = kzalloc(bi_size, GFP_KERNEL);
-	if (!rx_ring->rx_bi)
-		goto err;
+	int err;
 
 	u64_stats_init(&rx_ring->syncp);
 
@@ -1456,7 +1426,7 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
 	if (!rx_ring->desc) {
 		dev_info(dev, "Unable to allocate memory for the Rx descriptor ring, size=%d\n",
 			 rx_ring->size);
-		goto err;
+		return -ENOMEM;
 	}
 
 	rx_ring->next_to_alloc = 0;
@@ -1468,16 +1438,12 @@ int i40e_setup_rx_descriptors(struct i40e_ring *rx_ring)
 		err = xdp_rxq_info_reg(&rx_ring->xdp_rxq, rx_ring->netdev,
 				       rx_ring->queue_index);
 		if (err < 0)
-			goto err;
+			return err;
 	}
 
 	rx_ring->xdp_prog = rx_ring->vsi->xdp_prog;
 
 	return 0;
-err:
-	kfree(rx_ring->rx_bi);
-	rx_ring->rx_bi = NULL;
-	return err;
 }
 
 /**
@@ -2387,9 +2353,12 @@ static int i40e_clean_rx_irq(struct i40e_ring *rx_ring, int budget)
 		 */
 		dma_rmb();
 
-		rx_buffer = i40e_clean_programming_status(rx_ring, rx_desc,
-							  qword);
-		if (unlikely(rx_buffer)) {
+		if (i40e_rx_is_programming_status(qword)) {
+			i40e_clean_programming_status(rx_ring,
+						      rx_desc->raw.qword[0],
+						      qword);
+			rx_buffer = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
+			i40e_inc_ntc(rx_ring);
 			i40e_reuse_rx_page(rx_ring, rx_buffer);
 			cleaned_count++;
 			continue;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index 36d37f31a287..d343498e8de5 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -296,17 +296,15 @@ struct i40e_tx_buffer {
 
 struct i40e_rx_buffer {
 	dma_addr_t dma;
-	union {
-		struct {
-			struct page *page;
-			__u32 page_offset;
-			__u16 pagecnt_bias;
-		};
-		struct {
-			void *addr;
-			u64 handle;
-		};
-	};
+	struct page *page;
+	__u32 page_offset;
+	__u16 pagecnt_bias;
+};
+
+struct i40e_rx_buffer_zc {
+	dma_addr_t dma;
+	void *addr;
+	u64 handle;
 };
 
 struct i40e_queue_stats {
@@ -358,6 +356,7 @@ struct i40e_ring {
 	union {
 		struct i40e_tx_buffer *tx_bi;
 		struct i40e_rx_buffer *rx_bi;
+		struct i40e_rx_buffer_zc *rx_bi_zc;
 	};
 	DECLARE_BITMAP(state, __I40E_RING_STATE_NBITS);
 	u16 queue_index;		/* Queue number of ring */
@@ -495,6 +494,7 @@ int __i40e_maybe_stop_tx(struct i40e_ring *tx_ring, int size);
 bool __i40e_chk_linearize(struct sk_buff *skb);
 int i40e_xdp_xmit(struct net_device *dev, int n, struct xdp_frame **frames,
 		  u32 flags);
+int i40e_alloc_rx_bi(struct i40e_ring *rx_ring);
 
 /**
  * i40e_get_head - Retrieve head from head writeback
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h b/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h
index 8af0e99c6c0d..667c4dc4b39f 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx_common.h
@@ -4,13 +4,9 @@
 #ifndef I40E_TXRX_COMMON_
 #define I40E_TXRX_COMMON_
 
-void i40e_fd_handle_status(struct i40e_ring *rx_ring,
-			   union i40e_rx_desc *rx_desc, u8 prog_id);
 int i40e_xmit_xdp_tx_ring(struct xdp_buff *xdp, struct i40e_ring *xdp_ring);
-struct i40e_rx_buffer *i40e_clean_programming_status(
-	struct i40e_ring *rx_ring,
-	union i40e_rx_desc *rx_desc,
-	u64 qw);
+void i40e_clean_programming_status(struct i40e_ring *rx_ring, u64 qword0_raw,
+				   u64 qword1);
 void i40e_process_skb_fields(struct i40e_ring *rx_ring,
 			     union i40e_rx_desc *rx_desc, struct sk_buff *skb);
 void i40e_xdp_ring_update_tail(struct i40e_ring *xdp_ring);
@@ -84,6 +80,38 @@ static inline void i40e_arm_wb(struct i40e_ring *tx_ring,
 	}
 }
 
+/**
+ * i40e_rx_is_programming_status - check for programming status descriptor
+ * @qword1: qword1 representing status_error_len in CPU ordering
+ *
+ * The value of in the descriptor length field indicate if this
+ * is a programming status descriptor for flow director or FCoE
+ * by the value of I40E_RX_PROG_STATUS_DESC_LENGTH, otherwise
+ * it is a packet descriptor.
+ **/
+static inline bool i40e_rx_is_programming_status(u64 qword1)
+{
+	/* The Rx filter programming status and SPH bit occupy the same
+	 * spot in the descriptor. Since we don't support packet split we
+	 * can just reuse the bit as an indication that this is a
+	 * programming status descriptor.
+	 */
+	return qword1 & I40E_RXD_QW1_LENGTH_SPH_MASK;
+}
+
+/**
+ * i40e_inc_ntc: Advance the next_to_clean index
+ * @rx_ring: Rx ring
+ **/
+static inline void i40e_inc_ntc(struct i40e_ring *rx_ring)
+{
+	u32 ntc = rx_ring->next_to_clean + 1;
+
+	ntc = (ntc < rx_ring->count) ? ntc : 0;
+	rx_ring->next_to_clean = ntc;
+	prefetch(I40E_RX_DESC(rx_ring, ntc));
+}
+
 void i40e_xsk_clean_rx_ring(struct i40e_ring *rx_ring);
 void i40e_xsk_clean_tx_ring(struct i40e_ring *tx_ring);
 bool i40e_xsk_any_rx_ring_enabled(struct i40e_vsi *vsi);
diff --git a/drivers/net/ethernet/intel/i40e/i40e_type.h b/drivers/net/ethernet/intel/i40e/i40e_type.h
index 6ea2867ff60f..63e098f7cb63 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_type.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_type.h
@@ -689,7 +689,7 @@ union i40e_32byte_rx_desc {
 		__le64  rsvd2;
 	} read;
 	struct {
-		struct {
+		struct i40e_32b_rx_wb_qw0 {
 			struct {
 				union {
 					__le16 mirroring_status;
@@ -727,6 +727,9 @@ union i40e_32byte_rx_desc {
 			} hi_dword;
 		} qword3;
 	} wb;  /* writeback */
+	struct {
+		u64 qword[4];
+	} raw;
 };
 
 enum i40e_rx_desc_status_bits {
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index d84ec92f8538..4fca52a30ea4 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -9,9 +9,23 @@
 #include "i40e_txrx_common.h"
 #include "i40e_xsk.h"
 
-static struct i40e_rx_buffer *i40e_rx_bi(struct i40e_ring *rx_ring, u32 idx)
+int i40e_alloc_rx_bi_zc(struct i40e_ring *rx_ring)
 {
-	return &rx_ring->rx_bi[idx];
+	unsigned long sz = sizeof(*rx_ring->rx_bi_zc) * rx_ring->count;
+
+	rx_ring->rx_bi_zc = kzalloc(sz, GFP_KERNEL);
+	return rx_ring->rx_bi_zc ? 0 : -ENOMEM;
+}
+
+void i40e_clear_rx_bi_zc(struct i40e_ring *rx_ring)
+{
+	memset(rx_ring->rx_bi_zc, 0,
+	       sizeof(*rx_ring->rx_bi_zc) * rx_ring->count);
+}
+
+static struct i40e_rx_buffer_zc *i40e_rx_bi(struct i40e_ring *rx_ring, u32 idx)
+{
+	return &rx_ring->rx_bi_zc[idx];
 }
 
 /**
@@ -238,7 +252,7 @@ static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
 }
 
 /**
- * i40e_alloc_buffer_zc - Allocates an i40e_rx_buffer
+ * i40e_alloc_buffer_zc - Allocates an i40e_rx_buffer_zc
  * @rx_ring: Rx ring
  * @bi: Rx buffer to populate
  *
@@ -248,7 +262,7 @@ static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
  * Returns true for a successful allocation, false otherwise
  **/
 static bool i40e_alloc_buffer_zc(struct i40e_ring *rx_ring,
-				 struct i40e_rx_buffer *bi)
+				 struct i40e_rx_buffer_zc *bi)
 {
 	struct xdp_umem *umem = rx_ring->xsk_umem;
 	void *addr = bi->addr;
@@ -279,7 +293,7 @@ static bool i40e_alloc_buffer_zc(struct i40e_ring *rx_ring,
 }
 
 /**
- * i40e_alloc_buffer_slow_zc - Allocates an i40e_rx_buffer
+ * i40e_alloc_buffer_slow_zc - Allocates an i40e_rx_buffer_zc
  * @rx_ring: Rx ring
  * @bi: Rx buffer to populate
  *
@@ -289,7 +303,7 @@ static bool i40e_alloc_buffer_zc(struct i40e_ring *rx_ring,
  * Returns true for a successful allocation, false otherwise
  **/
 static bool i40e_alloc_buffer_slow_zc(struct i40e_ring *rx_ring,
-				      struct i40e_rx_buffer *bi)
+				      struct i40e_rx_buffer_zc *bi)
 {
 	struct xdp_umem *umem = rx_ring->xsk_umem;
 	u64 handle, hr;
@@ -318,11 +332,11 @@ static bool i40e_alloc_buffer_slow_zc(struct i40e_ring *rx_ring,
 static __always_inline bool
 __i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 count,
 			   bool alloc(struct i40e_ring *rx_ring,
-				      struct i40e_rx_buffer *bi))
+				      struct i40e_rx_buffer_zc *bi))
 {
 	u16 ntu = rx_ring->next_to_use;
 	union i40e_rx_desc *rx_desc;
-	struct i40e_rx_buffer *bi;
+	struct i40e_rx_buffer_zc *bi;
 	bool ok = true;
 
 	rx_desc = I40E_RX_DESC(rx_ring, ntu);
@@ -402,10 +416,11 @@ static bool i40e_alloc_rx_buffers_fast_zc(struct i40e_ring *rx_ring, u16 count)
  *
  * Returns the received Rx buffer
  **/
-static struct i40e_rx_buffer *i40e_get_rx_buffer_zc(struct i40e_ring *rx_ring,
-						    const unsigned int size)
+static struct i40e_rx_buffer_zc *i40e_get_rx_buffer_zc(
+	struct i40e_ring *rx_ring,
+	const unsigned int size)
 {
-	struct i40e_rx_buffer *bi;
+	struct i40e_rx_buffer_zc *bi;
 
 	bi = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
 
@@ -427,10 +442,10 @@ static struct i40e_rx_buffer *i40e_get_rx_buffer_zc(struct i40e_ring *rx_ring,
  * recycle queue (next_to_alloc).
  **/
 static void i40e_reuse_rx_buffer_zc(struct i40e_ring *rx_ring,
-				    struct i40e_rx_buffer *old_bi)
+				    struct i40e_rx_buffer_zc *old_bi)
 {
-	struct i40e_rx_buffer *new_bi = i40e_rx_bi(rx_ring,
-						   rx_ring->next_to_alloc);
+	struct i40e_rx_buffer_zc *new_bi = i40e_rx_bi(rx_ring,
+						      rx_ring->next_to_alloc);
 	u16 nta = rx_ring->next_to_alloc;
 
 	/* update, and store next to alloc */
@@ -452,7 +467,7 @@ static void i40e_reuse_rx_buffer_zc(struct i40e_ring *rx_ring,
  **/
 void i40e_zca_free(struct zero_copy_allocator *alloc, unsigned long handle)
 {
-	struct i40e_rx_buffer *bi;
+	struct i40e_rx_buffer_zc *bi;
 	struct i40e_ring *rx_ring;
 	u64 hr, mask;
 	u16 nta;
@@ -490,7 +505,7 @@ void i40e_zca_free(struct zero_copy_allocator *alloc, unsigned long handle)
  * Returns the skb, or NULL on failure.
  **/
 static struct sk_buff *i40e_construct_skb_zc(struct i40e_ring *rx_ring,
-					     struct i40e_rx_buffer *bi,
+					     struct i40e_rx_buffer_zc *bi,
 					     struct xdp_buff *xdp)
 {
 	unsigned int metasize = xdp->data - xdp->data_meta;
@@ -513,19 +528,6 @@ static struct sk_buff *i40e_construct_skb_zc(struct i40e_ring *rx_ring,
 	return skb;
 }
 
-/**
- * i40e_inc_ntc: Advance the next_to_clean index
- * @rx_ring: Rx ring
- **/
-static void i40e_inc_ntc(struct i40e_ring *rx_ring)
-{
-	u32 ntc = rx_ring->next_to_clean + 1;
-
-	ntc = (ntc < rx_ring->count) ? ntc : 0;
-	rx_ring->next_to_clean = ntc;
-	prefetch(I40E_RX_DESC(rx_ring, ntc));
-}
-
 /**
  * i40e_clean_rx_irq_zc - Consumes Rx packets from the hardware ring
  * @rx_ring: Rx ring
@@ -547,7 +549,7 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
 	xdp.frame_sz = xsk_umem_xdp_frame_sz(umem);
 
 	while (likely(total_rx_packets < (unsigned int)budget)) {
-		struct i40e_rx_buffer *bi;
+		struct i40e_rx_buffer_zc *bi;
 		union i40e_rx_desc *rx_desc;
 		unsigned int size;
 		u64 qword;
@@ -568,14 +570,18 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
 		 */
 		dma_rmb();
 
-		bi = i40e_clean_programming_status(rx_ring, rx_desc,
-						   qword);
-		if (unlikely(bi)) {
+		if (i40e_rx_is_programming_status(qword)) {
+			i40e_clean_programming_status(rx_ring,
+						      rx_desc->raw.qword[0],
+						      qword);
+			bi = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
+			i40e_inc_ntc(rx_ring);
 			i40e_reuse_rx_buffer_zc(rx_ring, bi);
 			cleaned_count++;
 			continue;
 		}
 
+		bi = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
 		size = (qword & I40E_RXD_QW1_LENGTH_PBUF_MASK) >>
 		       I40E_RXD_QW1_LENGTH_PBUF_SHIFT;
 		if (!size)
@@ -832,7 +838,7 @@ void i40e_xsk_clean_rx_ring(struct i40e_ring *rx_ring)
 	u16 i;
 
 	for (i = 0; i < rx_ring->count; i++) {
-		struct i40e_rx_buffer *rx_bi = i40e_rx_bi(rx_ring, i);
+		struct i40e_rx_buffer_zc *rx_bi = i40e_rx_bi(rx_ring, i);
 
 		if (!rx_bi->addr)
 			continue;
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.h b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
index 9ed59c14eb55..f5e292c218ee 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
@@ -19,5 +19,7 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget);
 bool i40e_clean_xdp_tx_irq(struct i40e_vsi *vsi,
 			   struct i40e_ring *tx_ring, int napi_budget);
 int i40e_xsk_wakeup(struct net_device *dev, u32 queue_id, u32 flags);
+int i40e_alloc_rx_bi_zc(struct i40e_ring *rx_ring);
+void i40e_clear_rx_bi_zc(struct i40e_ring *rx_ring);
 
 #endif /* _I40E_XSK_H_ */
-- 
cgit v1.2.3-59-g8ed1b


From 3b4f0b66c2b3dceea01bd26efa8c4c6f01b4961f Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn.topel@intel.com>
Date: Wed, 20 May 2020 21:20:56 +0200
Subject: i40e, xsk: Migrate to new MEM_TYPE_XSK_BUFF_POOL
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove MEM_TYPE_ZERO_COPY in favor of the new MEM_TYPE_XSK_BUFF_POOL
APIs. The AF_XDP zero-copy rx_bi ring is now simply a struct xdp_buff
pointer.

v4->v5: Fixed "warning: Excess function parameter 'bi' description in
        'i40e_construct_skb_zc'". (Jakub)

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Cc: intel-wired-lan@lists.osuosl.org
Link: https://lore.kernel.org/bpf/20200520192103.355233-9-bjorn.topel@gmail.com
---
 drivers/net/ethernet/intel/i40e/i40e_main.c |  19 +-
 drivers/net/ethernet/intel/i40e/i40e_txrx.h |   9 +-
 drivers/net/ethernet/intel/i40e/i40e_xsk.c  | 353 +++-------------------------
 drivers/net/ethernet/intel/i40e/i40e_xsk.h  |   1 -
 4 files changed, 47 insertions(+), 335 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index 3e1695bb8262..ea7395b391e5 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -3266,21 +3266,19 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
 		ret = i40e_alloc_rx_bi_zc(ring);
 		if (ret)
 			return ret;
-		ring->rx_buf_len = ring->xsk_umem->chunk_size_nohr -
-				   XDP_PACKET_HEADROOM;
+		ring->rx_buf_len = xsk_umem_get_rx_frame_size(ring->xsk_umem);
 		/* For AF_XDP ZC, we disallow packets to span on
 		 * multiple buffers, thus letting us skip that
 		 * handling in the fast-path.
 		 */
 		chain_len = 1;
-		ring->zca.free = i40e_zca_free;
 		ret = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
-						 MEM_TYPE_ZERO_COPY,
-						 &ring->zca);
+						 MEM_TYPE_XSK_BUFF_POOL,
+						 NULL);
 		if (ret)
 			return ret;
 		dev_info(&vsi->back->pdev->dev,
-			 "Registered XDP mem model MEM_TYPE_ZERO_COPY on Rx ring %d\n",
+			 "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n",
 			 ring->queue_index);
 
 	} else {
@@ -3351,9 +3349,12 @@ static int i40e_configure_rx_ring(struct i40e_ring *ring)
 	ring->tail = hw->hw_addr + I40E_QRX_TAIL(pf_q);
 	writel(0, ring->tail);
 
-	ok = ring->xsk_umem ?
-	     i40e_alloc_rx_buffers_zc(ring, I40E_DESC_UNUSED(ring)) :
-	     !i40e_alloc_rx_buffers(ring, I40E_DESC_UNUSED(ring));
+	if (ring->xsk_umem) {
+		xsk_buff_set_rxq_info(ring->xsk_umem, &ring->xdp_rxq);
+		ok = i40e_alloc_rx_buffers_zc(ring, I40E_DESC_UNUSED(ring));
+	} else {
+		ok = !i40e_alloc_rx_buffers(ring, I40E_DESC_UNUSED(ring));
+	}
 	if (!ok) {
 		/* Log this in case the user has forgotten to give the kernel
 		 * any buffers, even later in the application.
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.h b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
index d343498e8de5..5c255977fd58 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.h
@@ -301,12 +301,6 @@ struct i40e_rx_buffer {
 	__u16 pagecnt_bias;
 };
 
-struct i40e_rx_buffer_zc {
-	dma_addr_t dma;
-	void *addr;
-	u64 handle;
-};
-
 struct i40e_queue_stats {
 	u64 packets;
 	u64 bytes;
@@ -356,7 +350,7 @@ struct i40e_ring {
 	union {
 		struct i40e_tx_buffer *tx_bi;
 		struct i40e_rx_buffer *rx_bi;
-		struct i40e_rx_buffer_zc *rx_bi_zc;
+		struct xdp_buff **rx_bi_zc;
 	};
 	DECLARE_BITMAP(state, __I40E_RING_STATE_NBITS);
 	u16 queue_index;		/* Queue number of ring */
@@ -418,7 +412,6 @@ struct i40e_ring {
 	struct i40e_channel *ch;
 	struct xdp_rxq_info xdp_rxq;
 	struct xdp_umem *xsk_umem;
-	struct zero_copy_allocator zca; /* ZC allocator anchor */
 } ____cacheline_internodealigned_in_smp;
 
 static inline bool ring_uses_build_skb(struct i40e_ring *ring)
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index 4fca52a30ea4..f3953744c505 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -23,68 +23,11 @@ void i40e_clear_rx_bi_zc(struct i40e_ring *rx_ring)
 	       sizeof(*rx_ring->rx_bi_zc) * rx_ring->count);
 }
 
-static struct i40e_rx_buffer_zc *i40e_rx_bi(struct i40e_ring *rx_ring, u32 idx)
+static struct xdp_buff **i40e_rx_bi(struct i40e_ring *rx_ring, u32 idx)
 {
 	return &rx_ring->rx_bi_zc[idx];
 }
 
-/**
- * i40e_xsk_umem_dma_map - DMA maps all UMEM memory for the netdev
- * @vsi: Current VSI
- * @umem: UMEM to DMA map
- *
- * Returns 0 on success, <0 on failure
- **/
-static int i40e_xsk_umem_dma_map(struct i40e_vsi *vsi, struct xdp_umem *umem)
-{
-	struct i40e_pf *pf = vsi->back;
-	struct device *dev;
-	unsigned int i, j;
-	dma_addr_t dma;
-
-	dev = &pf->pdev->dev;
-	for (i = 0; i < umem->npgs; i++) {
-		dma = dma_map_page_attrs(dev, umem->pgs[i], 0, PAGE_SIZE,
-					 DMA_BIDIRECTIONAL, I40E_RX_DMA_ATTR);
-		if (dma_mapping_error(dev, dma))
-			goto out_unmap;
-
-		umem->pages[i].dma = dma;
-	}
-
-	return 0;
-
-out_unmap:
-	for (j = 0; j < i; j++) {
-		dma_unmap_page_attrs(dev, umem->pages[i].dma, PAGE_SIZE,
-				     DMA_BIDIRECTIONAL, I40E_RX_DMA_ATTR);
-		umem->pages[i].dma = 0;
-	}
-
-	return -1;
-}
-
-/**
- * i40e_xsk_umem_dma_unmap - DMA unmaps all UMEM memory for the netdev
- * @vsi: Current VSI
- * @umem: UMEM to DMA map
- **/
-static void i40e_xsk_umem_dma_unmap(struct i40e_vsi *vsi, struct xdp_umem *umem)
-{
-	struct i40e_pf *pf = vsi->back;
-	struct device *dev;
-	unsigned int i;
-
-	dev = &pf->pdev->dev;
-
-	for (i = 0; i < umem->npgs; i++) {
-		dma_unmap_page_attrs(dev, umem->pages[i].dma, PAGE_SIZE,
-				     DMA_BIDIRECTIONAL, I40E_RX_DMA_ATTR);
-
-		umem->pages[i].dma = 0;
-	}
-}
-
 /**
  * i40e_xsk_umem_enable - Enable/associate a UMEM to a certain ring/qid
  * @vsi: Current VSI
@@ -97,7 +40,6 @@ static int i40e_xsk_umem_enable(struct i40e_vsi *vsi, struct xdp_umem *umem,
 				u16 qid)
 {
 	struct net_device *netdev = vsi->netdev;
-	struct xdp_umem_fq_reuse *reuseq;
 	bool if_running;
 	int err;
 
@@ -111,13 +53,7 @@ static int i40e_xsk_umem_enable(struct i40e_vsi *vsi, struct xdp_umem *umem,
 	    qid >= netdev->real_num_tx_queues)
 		return -EINVAL;
 
-	reuseq = xsk_reuseq_prepare(vsi->rx_rings[0]->count);
-	if (!reuseq)
-		return -ENOMEM;
-
-	xsk_reuseq_free(xsk_reuseq_swap(umem, reuseq));
-
-	err = i40e_xsk_umem_dma_map(vsi, umem);
+	err = xsk_buff_dma_map(umem, &vsi->back->pdev->dev, I40E_RX_DMA_ATTR);
 	if (err)
 		return err;
 
@@ -170,7 +106,7 @@ static int i40e_xsk_umem_disable(struct i40e_vsi *vsi, u16 qid)
 	}
 
 	clear_bit(qid, vsi->af_xdp_zc_qps);
-	i40e_xsk_umem_dma_unmap(vsi, umem);
+	xsk_buff_dma_unmap(umem, I40E_RX_DMA_ATTR);
 
 	if (if_running) {
 		err = i40e_queue_pair_enable(vsi, qid);
@@ -209,11 +145,9 @@ int i40e_xsk_umem_setup(struct i40e_vsi *vsi, struct xdp_umem *umem,
  **/
 static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
 {
-	struct xdp_umem *umem = rx_ring->xsk_umem;
 	int err, result = I40E_XDP_PASS;
 	struct i40e_ring *xdp_ring;
 	struct bpf_prog *xdp_prog;
-	u64 offset;
 	u32 act;
 
 	rcu_read_lock();
@@ -222,9 +156,6 @@ static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
 	 */
 	xdp_prog = READ_ONCE(rx_ring->xdp_prog);
 	act = bpf_prog_run_xdp(xdp_prog, xdp);
-	offset = xdp->data - xdp->data_hard_start;
-
-	xdp->handle = xsk_umem_adjust_offset(umem, xdp->handle, offset);
 
 	switch (act) {
 	case XDP_PASS:
@@ -251,107 +182,26 @@ static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
 	return result;
 }
 
-/**
- * i40e_alloc_buffer_zc - Allocates an i40e_rx_buffer_zc
- * @rx_ring: Rx ring
- * @bi: Rx buffer to populate
- *
- * This function allocates an Rx buffer. The buffer can come from fill
- * queue, or via the recycle queue (next_to_alloc).
- *
- * Returns true for a successful allocation, false otherwise
- **/
-static bool i40e_alloc_buffer_zc(struct i40e_ring *rx_ring,
-				 struct i40e_rx_buffer_zc *bi)
-{
-	struct xdp_umem *umem = rx_ring->xsk_umem;
-	void *addr = bi->addr;
-	u64 handle, hr;
-
-	if (addr) {
-		rx_ring->rx_stats.page_reuse_count++;
-		return true;
-	}
-
-	if (!xsk_umem_peek_addr(umem, &handle)) {
-		rx_ring->rx_stats.alloc_page_failed++;
-		return false;
-	}
-
-	hr = umem->headroom + XDP_PACKET_HEADROOM;
-
-	bi->dma = xdp_umem_get_dma(umem, handle);
-	bi->dma += hr;
-
-	bi->addr = xdp_umem_get_data(umem, handle);
-	bi->addr += hr;
-
-	bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom);
-
-	xsk_umem_release_addr(umem);
-	return true;
-}
-
-/**
- * i40e_alloc_buffer_slow_zc - Allocates an i40e_rx_buffer_zc
- * @rx_ring: Rx ring
- * @bi: Rx buffer to populate
- *
- * This function allocates an Rx buffer. The buffer can come from fill
- * queue, or via the reuse queue.
- *
- * Returns true for a successful allocation, false otherwise
- **/
-static bool i40e_alloc_buffer_slow_zc(struct i40e_ring *rx_ring,
-				      struct i40e_rx_buffer_zc *bi)
-{
-	struct xdp_umem *umem = rx_ring->xsk_umem;
-	u64 handle, hr;
-
-	if (!xsk_umem_peek_addr_rq(umem, &handle)) {
-		rx_ring->rx_stats.alloc_page_failed++;
-		return false;
-	}
-
-	handle &= rx_ring->xsk_umem->chunk_mask;
-
-	hr = umem->headroom + XDP_PACKET_HEADROOM;
-
-	bi->dma = xdp_umem_get_dma(umem, handle);
-	bi->dma += hr;
-
-	bi->addr = xdp_umem_get_data(umem, handle);
-	bi->addr += hr;
-
-	bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom);
-
-	xsk_umem_release_addr_rq(umem);
-	return true;
-}
-
-static __always_inline bool
-__i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 count,
-			   bool alloc(struct i40e_ring *rx_ring,
-				      struct i40e_rx_buffer_zc *bi))
+bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 count)
 {
 	u16 ntu = rx_ring->next_to_use;
 	union i40e_rx_desc *rx_desc;
-	struct i40e_rx_buffer_zc *bi;
+	struct xdp_buff **bi, *xdp;
+	dma_addr_t dma;
 	bool ok = true;
 
 	rx_desc = I40E_RX_DESC(rx_ring, ntu);
 	bi = i40e_rx_bi(rx_ring, ntu);
 	do {
-		if (!alloc(rx_ring, bi)) {
+		xdp = xsk_buff_alloc(rx_ring->xsk_umem);
+		if (!xdp) {
 			ok = false;
 			goto no_buffers;
 		}
-
-		dma_sync_single_range_for_device(rx_ring->dev, bi->dma, 0,
-						 rx_ring->rx_buf_len,
-						 DMA_BIDIRECTIONAL);
-
-		rx_desc->read.pkt_addr = cpu_to_le64(bi->dma);
+		*bi = xdp;
+		dma = xsk_buff_xdp_get_dma(xdp);
+		rx_desc->read.pkt_addr = cpu_to_le64(dma);
+		rx_desc->read.hdr_addr = 0;
 
 		rx_desc++;
 		bi++;
@@ -363,7 +213,6 @@ __i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 count,
 			ntu = 0;
 		}
 
-		rx_desc->wb.qword1.status_error_len = 0;
 		count--;
 	} while (count);
 
@@ -374,130 +223,9 @@ no_buffers:
 	return ok;
 }
 
-/**
- * i40e_alloc_rx_buffers_zc - Allocates a number of Rx buffers
- * @rx_ring: Rx ring
- * @count: The number of buffers to allocate
- *
- * This function allocates a number of Rx buffers from the reuse queue
- * or fill ring and places them on the Rx ring.
- *
- * Returns true for a successful allocation, false otherwise
- **/
-bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 count)
-{
-	return __i40e_alloc_rx_buffers_zc(rx_ring, count,
-					  i40e_alloc_buffer_slow_zc);
-}
-
-/**
- * i40e_alloc_rx_buffers_fast_zc - Allocates a number of Rx buffers
- * @rx_ring: Rx ring
- * @count: The number of buffers to allocate
- *
- * This function allocates a number of Rx buffers from the fill ring
- * or the internal recycle mechanism and places them on the Rx ring.
- *
- * Returns true for a successful allocation, false otherwise
- **/
-static bool i40e_alloc_rx_buffers_fast_zc(struct i40e_ring *rx_ring, u16 count)
-{
-	return __i40e_alloc_rx_buffers_zc(rx_ring, count,
-					  i40e_alloc_buffer_zc);
-}
-
-/**
- * i40e_get_rx_buffer_zc - Return the current Rx buffer
- * @rx_ring: Rx ring
- * @size: The size of the rx buffer (read from descriptor)
- *
- * This function returns the current, received Rx buffer, and also
- * does DMA synchronization.  the Rx ring.
- *
- * Returns the received Rx buffer
- **/
-static struct i40e_rx_buffer_zc *i40e_get_rx_buffer_zc(
-	struct i40e_ring *rx_ring,
-	const unsigned int size)
-{
-	struct i40e_rx_buffer_zc *bi;
-
-	bi = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
-
-	/* we are reusing so sync this buffer for CPU use */
-	dma_sync_single_range_for_cpu(rx_ring->dev,
-				      bi->dma, 0,
-				      size,
-				      DMA_BIDIRECTIONAL);
-
-	return bi;
-}
-
-/**
- * i40e_reuse_rx_buffer_zc - Recycle an Rx buffer
- * @rx_ring: Rx ring
- * @old_bi: The Rx buffer to recycle
- *
- * This function recycles a finished Rx buffer, and places it on the
- * recycle queue (next_to_alloc).
- **/
-static void i40e_reuse_rx_buffer_zc(struct i40e_ring *rx_ring,
-				    struct i40e_rx_buffer_zc *old_bi)
-{
-	struct i40e_rx_buffer_zc *new_bi = i40e_rx_bi(rx_ring,
-						      rx_ring->next_to_alloc);
-	u16 nta = rx_ring->next_to_alloc;
-
-	/* update, and store next to alloc */
-	nta++;
-	rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
-
-	/* transfer page from old buffer to new buffer */
-	new_bi->dma = old_bi->dma;
-	new_bi->addr = old_bi->addr;
-	new_bi->handle = old_bi->handle;
-
-	old_bi->addr = NULL;
-}
-
-/**
- * i40e_zca_free - Free callback for MEM_TYPE_ZERO_COPY allocations
- * @alloc: Zero-copy allocator
- * @handle: Buffer handle
- **/
-void i40e_zca_free(struct zero_copy_allocator *alloc, unsigned long handle)
-{
-	struct i40e_rx_buffer_zc *bi;
-	struct i40e_ring *rx_ring;
-	u64 hr, mask;
-	u16 nta;
-
-	rx_ring = container_of(alloc, struct i40e_ring, zca);
-	hr = rx_ring->xsk_umem->headroom + XDP_PACKET_HEADROOM;
-	mask = rx_ring->xsk_umem->chunk_mask;
-
-	nta = rx_ring->next_to_alloc;
-	bi = i40e_rx_bi(rx_ring, nta);
-
-	nta++;
-	rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
-
-	handle &= mask;
-
-	bi->dma = xdp_umem_get_dma(rx_ring->xsk_umem, handle);
-	bi->dma += hr;
-
-	bi->addr = xdp_umem_get_data(rx_ring->xsk_umem, handle);
-	bi->addr += hr;
-
-	bi->handle = xsk_umem_adjust_offset(rx_ring->xsk_umem, (u64)handle,
-					    rx_ring->xsk_umem->headroom);
-}
-
 /**
  * i40e_construct_skb_zc - Create skbufff from zero-copy Rx buffer
  * @rx_ring: Rx ring
- * @bi: Rx buffer
  * @xdp: xdp_buff
  *
  * This functions allocates a new skb from a zero-copy Rx buffer.
@@ -505,7 +233,6 @@ void i40e_zca_free(struct zero_copy_allocator *alloc, unsigned long handle)
  * Returns the skb, or NULL on failure.
  **/
 static struct sk_buff *i40e_construct_skb_zc(struct i40e_ring *rx_ring,
-					     struct i40e_rx_buffer_zc *bi,
 					     struct xdp_buff *xdp)
 {
 	unsigned int metasize = xdp->data - xdp->data_meta;
@@ -524,7 +251,7 @@ static struct sk_buff *i40e_construct_skb_zc(struct i40e_ring *rx_ring,
 	if (metasize)
 		skb_metadata_set(skb, metasize);
 
-	i40e_reuse_rx_buffer_zc(rx_ring, bi);
+	xsk_buff_free(xdp);
 	return skb;
 }
 
@@ -539,25 +266,20 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
 {
 	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
 	u16 cleaned_count = I40E_DESC_UNUSED(rx_ring);
-	struct xdp_umem *umem = rx_ring->xsk_umem;
 	unsigned int xdp_res, xdp_xmit = 0;
 	bool failure = false;
 	struct sk_buff *skb;
-	struct xdp_buff xdp;
-
-	xdp.rxq = &rx_ring->xdp_rxq;
-	xdp.frame_sz = xsk_umem_xdp_frame_sz(umem);
 
 	while (likely(total_rx_packets < (unsigned int)budget)) {
-		struct i40e_rx_buffer_zc *bi;
 		union i40e_rx_desc *rx_desc;
+		struct xdp_buff **bi;
 		unsigned int size;
 		u64 qword;
 
 		if (cleaned_count >= I40E_RX_BUFFER_WRITE) {
 			failure = failure ||
-				  !i40e_alloc_rx_buffers_fast_zc(rx_ring,
-								 cleaned_count);
+				  !i40e_alloc_rx_buffers_zc(rx_ring,
+							    cleaned_count);
 			cleaned_count = 0;
 		}
 
@@ -575,9 +297,10 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
 						      rx_desc->raw.qword[0],
 						      qword);
 			bi = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
-			i40e_inc_ntc(rx_ring);
-			i40e_reuse_rx_buffer_zc(rx_ring, bi);
+			xsk_buff_free(*bi);
+			*bi = NULL;
 			cleaned_count++;
+			i40e_inc_ntc(rx_ring);
 			continue;
 		}
 
@@ -587,22 +310,18 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
 		if (!size)
 			break;
 
-		bi = i40e_get_rx_buffer_zc(rx_ring, size);
-		xdp.data = bi->addr;
-		xdp.data_meta = xdp.data;
-		xdp.data_hard_start = xdp.data - XDP_PACKET_HEADROOM;
-		xdp.data_end = xdp.data + size;
-		xdp.handle = bi->handle;
+		bi = i40e_rx_bi(rx_ring, rx_ring->next_to_clean);
+		(*bi)->data_end = (*bi)->data + size;
+		xsk_buff_dma_sync_for_cpu(*bi);
 
-		xdp_res = i40e_run_xdp_zc(rx_ring, &xdp);
+		xdp_res = i40e_run_xdp_zc(rx_ring, *bi);
 		if (xdp_res) {
-			if (xdp_res & (I40E_XDP_TX | I40E_XDP_REDIR)) {
+			if (xdp_res & (I40E_XDP_TX | I40E_XDP_REDIR))
 				xdp_xmit |= xdp_res;
-				bi->addr = NULL;
-			} else {
-				i40e_reuse_rx_buffer_zc(rx_ring, bi);
-			}
+			else
+				xsk_buff_free(*bi);
 
+			*bi = NULL;
 			total_rx_bytes += size;
 			total_rx_packets++;
 
@@ -618,7 +337,8 @@ int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget)
 		 * BIT(I40E_RXD_QW1_ERROR_SHIFT). This is due to that
 		 * SBP is *not* set in PRT_SBPVSI (default not set).
 		 */
-		skb = i40e_construct_skb_zc(rx_ring, bi, &xdp);
+		skb = i40e_construct_skb_zc(rx_ring, *bi);
+		*bi = NULL;
 		if (!skb) {
 			rx_ring->rx_stats.alloc_buff_failed++;
 			break;
@@ -676,10 +396,9 @@ static bool i40e_xmit_zc(struct i40e_ring *xdp_ring, unsigned int budget)
 		if (!xsk_umem_consume_tx(xdp_ring->xsk_umem, &desc))
 			break;
 
-		dma = xdp_umem_get_dma(xdp_ring->xsk_umem, desc.addr);
-
-		dma_sync_single_for_device(xdp_ring->dev, dma, desc.len,
-					   DMA_BIDIRECTIONAL);
+		dma = xsk_buff_raw_get_dma(xdp_ring->xsk_umem, desc.addr);
+		xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_umem, dma,
+						 desc.len);
 
 		tx_bi = &xdp_ring->tx_bi[xdp_ring->next_to_use];
 		tx_bi->bytecount = desc.len;
@@ -838,13 +557,13 @@ void i40e_xsk_clean_rx_ring(struct i40e_ring *rx_ring)
 	u16 i;
 
 	for (i = 0; i < rx_ring->count; i++) {
-		struct i40e_rx_buffer_zc *rx_bi = i40e_rx_bi(rx_ring, i);
+		struct xdp_buff *rx_bi = *i40e_rx_bi(rx_ring, i);
 
-		if (!rx_bi->addr)
+		if (!rx_bi)
 			continue;
 
-		xsk_umem_fq_reuse(rx_ring->xsk_umem, rx_bi->handle);
-		rx_bi->addr = NULL;
+		xsk_buff_free(rx_bi);
+		rx_bi = NULL;
 	}
 }
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.h b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
index f5e292c218ee..ea919a7d60ec 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.h
@@ -12,7 +12,6 @@ int i40e_queue_pair_disable(struct i40e_vsi *vsi, int queue_pair);
 int i40e_queue_pair_enable(struct i40e_vsi *vsi, int queue_pair);
 int i40e_xsk_umem_setup(struct i40e_vsi *vsi, struct xdp_umem *umem,
 			u16 qid);
-void i40e_zca_free(struct zero_copy_allocator *alloc, unsigned long handle);
 bool i40e_alloc_rx_buffers_zc(struct i40e_ring *rx_ring, u16 cleaned_count);
 int i40e_clean_rx_irq_zc(struct i40e_ring *rx_ring, int budget);
 
-- 
cgit v1.2.3-59-g8ed1b


From 175fc430670be92c00317b9aada8bf39b47b717e Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn.topel@intel.com>
Date: Wed, 20 May 2020 21:20:57 +0200
Subject: ice, xsk: Migrate to new MEM_TYPE_XSK_BUFF_POOL
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove MEM_TYPE_ZERO_COPY in favor of the new MEM_TYPE_XSK_BUFF_POOL
APIs.

v4->v5: Fixed "warning: Excess function parameter 'alloc' description
        in 'ice_alloc_rx_bufs_zc'" and "warning: Excess function
        parameter 'xdp' description in
        'ice_construct_skb_zc'". (Jakub)

Signed-off-by: Maciej Fijalkowski <maciej.fijalkowski@intel.com>
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Cc: intel-wired-lan@lists.osuosl.org
Link: https://lore.kernel.org/bpf/20200520192103.355233-10-bjorn.topel@gmail.com
---
 drivers/net/ethernet/intel/ice/ice_base.c |  16 +-
 drivers/net/ethernet/intel/ice/ice_txrx.h |   8 +-
 drivers/net/ethernet/intel/ice/ice_xsk.c  | 376 ++++--------------------------
 drivers/net/ethernet/intel/ice/ice_xsk.h  |  13 +-
 4 files changed, 54 insertions(+), 359 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
index a19cd6f5436b..433eb72b1c85 100644
--- a/drivers/net/ethernet/intel/ice/ice_base.c
+++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@ -1,6 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0
 /* Copyright (c) 2019, Intel Corporation. */
 
+#include <net/xdp_sock_drv.h>
 #include "ice_base.h"
 #include "ice_dcb_lib.h"
 
@@ -308,24 +309,23 @@ int ice_setup_rx_ctx(struct ice_ring *ring)
 		if (ring->xsk_umem) {
 			xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
 
-			ring->rx_buf_len = ring->xsk_umem->chunk_size_nohr -
-					   XDP_PACKET_HEADROOM;
+			ring->rx_buf_len =
+				xsk_umem_get_rx_frame_size(ring->xsk_umem);
 			/* For AF_XDP ZC, we disallow packets to span on
 			 * multiple buffers, thus letting us skip that
 			 * handling in the fast-path.
 			 */
 			chain_len = 1;
-			ring->zca.free = ice_zca_free;
 			err = xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
-							 MEM_TYPE_ZERO_COPY,
-							 &ring->zca);
+							 MEM_TYPE_XSK_BUFF_POOL,
+							 NULL);
 			if (err)
 				return err;
+			xsk_buff_set_rxq_info(ring->xsk_umem, &ring->xdp_rxq);
 
-			dev_info(ice_pf_to_dev(vsi->back), "Registered XDP mem model MEM_TYPE_ZERO_COPY on Rx ring %d\n",
+			dev_info(ice_pf_to_dev(vsi->back), "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n",
 				 ring->q_index);
 		} else {
-			ring->zca.free = NULL;
 			if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
 				/* coverity[check_return] */
 				xdp_rxq_info_reg(&ring->xdp_rxq,
@@ -426,7 +426,7 @@ int ice_setup_rx_ctx(struct ice_ring *ring)
 	writel(0, ring->tail);
 
 	err = ring->xsk_umem ?
-	      ice_alloc_rx_bufs_slow_zc(ring, ICE_DESC_UNUSED(ring)) :
+	      ice_alloc_rx_bufs_zc(ring, ICE_DESC_UNUSED(ring)) :
 	      ice_alloc_rx_bufs(ring, ICE_DESC_UNUSED(ring));
 	if (err)
 		dev_info(ice_pf_to_dev(vsi->back), "Failed allocate some buffers on %sRx ring %d (pf_q %d)\n",
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index 7ee00a128663..d0fd2173854f 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -155,17 +155,16 @@ struct ice_tx_offload_params {
 };
 
 struct ice_rx_buf {
-	struct sk_buff *skb;
-	dma_addr_t dma;
 	union {
 		struct {
+			struct sk_buff *skb;
+			dma_addr_t dma;
 			struct page *page;
 			unsigned int page_offset;
 			u16 pagecnt_bias;
 		};
 		struct {
-			void *addr;
-			u64 handle;
+			struct xdp_buff *xdp;
 		};
 	};
 };
@@ -289,7 +288,6 @@ struct ice_ring {
 	struct rcu_head rcu;		/* to avoid race on free */
 	struct bpf_prog *xdp_prog;
 	struct xdp_umem *xsk_umem;
-	struct zero_copy_allocator zca;
 	/* CL3 - 3rd cacheline starts here */
 	struct xdp_rxq_info xdp_rxq;
 	/* CLX - the below items are only accessed infrequently and should be
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index 70e204307a93..a73f6c3c70a4 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -279,28 +279,6 @@ static int ice_xsk_alloc_umems(struct ice_vsi *vsi)
 	return 0;
 }
 
-/**
- * ice_xsk_add_umem - add a UMEM region for XDP sockets
- * @vsi: VSI to which the UMEM will be added
- * @umem: pointer to a requested UMEM region
- * @qid: queue ID
- *
- * Returns 0 on success, negative on error
- */
-static int ice_xsk_add_umem(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid)
-{
-	int err;
-
-	err = ice_xsk_alloc_umems(vsi);
-	if (err)
-		return err;
-
-	vsi->xsk_umems[qid] = umem;
-	vsi->num_xsk_umems_used++;
-
-	return 0;
-}
-
 /**
  * ice_xsk_remove_umem - Remove an UMEM for a certain ring/qid
  * @vsi: VSI from which the VSI will be removed
@@ -318,65 +296,6 @@ static void ice_xsk_remove_umem(struct ice_vsi *vsi, u16 qid)
 	}
 }
 
-/**
- * ice_xsk_umem_dma_map - DMA map UMEM region for XDP sockets
- * @vsi: VSI to map the UMEM region
- * @umem: UMEM to map
- *
- * Returns 0 on success, negative on error
- */
-static int ice_xsk_umem_dma_map(struct ice_vsi *vsi, struct xdp_umem *umem)
-{
-	struct ice_pf *pf = vsi->back;
-	struct device *dev;
-	unsigned int i;
-
-	dev = ice_pf_to_dev(pf);
-	for (i = 0; i < umem->npgs; i++) {
-		dma_addr_t dma = dma_map_page_attrs(dev, umem->pgs[i], 0,
-						    PAGE_SIZE,
-						    DMA_BIDIRECTIONAL,
-						    ICE_RX_DMA_ATTR);
-		if (dma_mapping_error(dev, dma)) {
-			dev_dbg(dev, "XSK UMEM DMA mapping error on page num %d\n",
-				i);
-			goto out_unmap;
-		}
-
-		umem->pages[i].dma = dma;
-	}
-
-	return 0;
-
-out_unmap:
-	for (; i > 0; i--) {
-		dma_unmap_page_attrs(dev, umem->pages[i].dma, PAGE_SIZE,
-				     DMA_BIDIRECTIONAL, ICE_RX_DMA_ATTR);
-		umem->pages[i].dma = 0;
-	}
-
-	return -EFAULT;
-}
-
-/**
- * ice_xsk_umem_dma_unmap - DMA unmap UMEM region for XDP sockets
- * @vsi: VSI from which the UMEM will be unmapped
- * @umem: UMEM to unmap
- */
-static void ice_xsk_umem_dma_unmap(struct ice_vsi *vsi, struct xdp_umem *umem)
-{
-	struct ice_pf *pf = vsi->back;
-	struct device *dev;
-	unsigned int i;
-
-	dev = ice_pf_to_dev(pf);
-	for (i = 0; i < umem->npgs; i++) {
-		dma_unmap_page_attrs(dev, umem->pages[i].dma, PAGE_SIZE,
-				     DMA_BIDIRECTIONAL, ICE_RX_DMA_ATTR);
-
-		umem->pages[i].dma = 0;
-	}
-}
 
 /**
  * ice_xsk_umem_disable - disable a UMEM region
@@ -391,7 +310,7 @@ static int ice_xsk_umem_disable(struct ice_vsi *vsi, u16 qid)
 	    !vsi->xsk_umems[qid])
 		return -EINVAL;
 
-	ice_xsk_umem_dma_unmap(vsi, vsi->xsk_umems[qid]);
+	xsk_buff_dma_unmap(vsi->xsk_umems[qid], ICE_RX_DMA_ATTR);
 	ice_xsk_remove_umem(vsi, qid);
 
 	return 0;
@@ -408,7 +327,6 @@ static int ice_xsk_umem_disable(struct ice_vsi *vsi, u16 qid)
 static int
 ice_xsk_umem_enable(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid)
 {
-	struct xdp_umem_fq_reuse *reuseq;
 	int err;
 
 	if (vsi->type != ICE_VSI_PF)
@@ -419,20 +337,18 @@ ice_xsk_umem_enable(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid)
 	if (qid >= vsi->num_xsk_umems)
 		return -EINVAL;
 
+	err = ice_xsk_alloc_umems(vsi);
+	if (err)
+		return err;
+
 	if (vsi->xsk_umems && vsi->xsk_umems[qid])
 		return -EBUSY;
 
-	reuseq = xsk_reuseq_prepare(vsi->rx_rings[0]->count);
-	if (!reuseq)
-		return -ENOMEM;
-
-	xsk_reuseq_free(xsk_reuseq_swap(umem, reuseq));
-
-	err = ice_xsk_umem_dma_map(vsi, umem);
-	if (err)
-		return err;
+	vsi->xsk_umems[qid] = umem;
+	vsi->num_xsk_umems_used++;
 
-	err = ice_xsk_add_umem(vsi, umem, qid);
+	err = xsk_buff_dma_map(vsi->xsk_umems[qid], ice_pf_to_dev(vsi->back),
+			       ICE_RX_DMA_ATTR);
 	if (err)
 		return err;
 
@@ -483,138 +399,23 @@ xsk_umem_if_up:
 	return ret;
 }
 
-/**
- * ice_zca_free - Callback for MEM_TYPE_ZERO_COPY allocations
- * @zca: zero-cpoy allocator
- * @handle: Buffer handle
- */
-void ice_zca_free(struct zero_copy_allocator *zca, unsigned long handle)
-{
-	struct ice_rx_buf *rx_buf;
-	struct ice_ring *rx_ring;
-	struct xdp_umem *umem;
-	u64 hr, mask;
-	u16 nta;
-
-	rx_ring = container_of(zca, struct ice_ring, zca);
-	umem = rx_ring->xsk_umem;
-	hr = umem->headroom + XDP_PACKET_HEADROOM;
-
-	mask = umem->chunk_mask;
-
-	nta = rx_ring->next_to_alloc;
-	rx_buf = &rx_ring->rx_buf[nta];
-
-	nta++;
-	rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
-
-	handle &= mask;
-
-	rx_buf->dma = xdp_umem_get_dma(umem, handle);
-	rx_buf->dma += hr;
-
-	rx_buf->addr = xdp_umem_get_data(umem, handle);
-	rx_buf->addr += hr;
-
-	rx_buf->handle = (u64)handle + umem->headroom;
-}
-
-/**
- * ice_alloc_buf_fast_zc - Retrieve buffer address from XDP umem
- * @rx_ring: ring with an xdp_umem bound to it
- * @rx_buf: buffer to which xsk page address will be assigned
- *
- * This function allocates an Rx buffer in the hot path.
- * The buffer can come from fill queue or recycle queue.
- *
- * Returns true if an assignment was successful, false if not.
- */
-static __always_inline bool
-ice_alloc_buf_fast_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
-{
-	struct xdp_umem *umem = rx_ring->xsk_umem;
-	void *addr = rx_buf->addr;
-	u64 handle, hr;
-
-	if (addr) {
-		rx_ring->rx_stats.page_reuse_count++;
-		return true;
-	}
-
-	if (!xsk_umem_peek_addr(umem, &handle)) {
-		rx_ring->rx_stats.alloc_page_failed++;
-		return false;
-	}
-
-	hr = umem->headroom + XDP_PACKET_HEADROOM;
-
-	rx_buf->dma = xdp_umem_get_dma(umem, handle);
-	rx_buf->dma += hr;
-
-	rx_buf->addr = xdp_umem_get_data(umem, handle);
-	rx_buf->addr += hr;
-
-	rx_buf->handle = handle + umem->headroom;
-
-	xsk_umem_release_addr(umem);
-	return true;
-}
-
-/**
- * ice_alloc_buf_slow_zc - Retrieve buffer address from XDP umem
- * @rx_ring: ring with an xdp_umem bound to it
- * @rx_buf: buffer to which xsk page address will be assigned
- *
- * This function allocates an Rx buffer in the slow path.
- * The buffer can come from fill queue or recycle queue.
- *
- * Returns true if an assignment was successful, false if not.
- */
-static __always_inline bool
-ice_alloc_buf_slow_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
-{
-	struct xdp_umem *umem = rx_ring->xsk_umem;
-	u64 handle, headroom;
-
-	if (!xsk_umem_peek_addr_rq(umem, &handle)) {
-		rx_ring->rx_stats.alloc_page_failed++;
-		return false;
-	}
-
-	handle &= umem->chunk_mask;
-	headroom = umem->headroom + XDP_PACKET_HEADROOM;
-
-	rx_buf->dma = xdp_umem_get_dma(umem, handle);
-	rx_buf->dma += headroom;
-
-	rx_buf->addr = xdp_umem_get_data(umem, handle);
-	rx_buf->addr += headroom;
-
-	rx_buf->handle = handle + umem->headroom;
-
-	xsk_umem_release_addr_rq(umem);
-	return true;
-}
-
 /**
  * ice_alloc_rx_bufs_zc - allocate a number of Rx buffers
  * @rx_ring: Rx ring
  * @count: The number of buffers to allocate
- * @alloc: the function pointer to call for allocation
  *
  * This function allocates a number of Rx buffers from the fill ring
  * or the internal recycle mechanism and places them on the Rx ring.
  *
  * Returns false if all allocations were successful, true if any fail.
  */
-static bool
-ice_alloc_rx_bufs_zc(struct ice_ring *rx_ring, int count,
-		     bool (*alloc)(struct ice_ring *, struct ice_rx_buf *))
+bool ice_alloc_rx_bufs_zc(struct ice_ring *rx_ring, u16 count)
 {
 	union ice_32b_rx_flex_desc *rx_desc;
 	u16 ntu = rx_ring->next_to_use;
 	struct ice_rx_buf *rx_buf;
 	bool ret = false;
+	dma_addr_t dma;
 
 	if (!count)
 		return false;
@@ -623,16 +424,14 @@ ice_alloc_rx_bufs_zc(struct ice_ring *rx_ring, int count,
 	rx_buf = &rx_ring->rx_buf[ntu];
 
 	do {
-		if (!alloc(rx_ring, rx_buf)) {
+		rx_buf->xdp = xsk_buff_alloc(rx_ring->xsk_umem);
+		if (!rx_buf->xdp) {
 			ret = true;
 			break;
 		}
 
-		dma_sync_single_range_for_device(rx_ring->dev, rx_buf->dma, 0,
-						 rx_ring->rx_buf_len,
-						 DMA_BIDIRECTIONAL);
-
-		rx_desc->read.pkt_addr = cpu_to_le64(rx_buf->dma);
+		dma = xsk_buff_xdp_get_dma(rx_buf->xdp);
+		rx_desc->read.pkt_addr = cpu_to_le64(dma);
 		rx_desc->wb.status_error0 = 0;
 
 		rx_desc++;
@@ -652,32 +451,6 @@ ice_alloc_rx_bufs_zc(struct ice_ring *rx_ring, int count,
 	return ret;
 }
 
-/**
- * ice_alloc_rx_bufs_fast_zc - allocate zero copy bufs in the hot path
- * @rx_ring: Rx ring
- * @count: number of bufs to allocate
- *
- * Returns false on success, true on failure.
- */
-static bool ice_alloc_rx_bufs_fast_zc(struct ice_ring *rx_ring, u16 count)
-{
-	return ice_alloc_rx_bufs_zc(rx_ring, count,
-				    ice_alloc_buf_fast_zc);
-}
-
-/**
- * ice_alloc_rx_bufs_slow_zc - allocate zero copy bufs in the slow path
- * @rx_ring: Rx ring
- * @count: number of bufs to allocate
- *
- * Returns false on success, true on failure.
- */
-bool ice_alloc_rx_bufs_slow_zc(struct ice_ring *rx_ring, u16 count)
-{
-	return ice_alloc_rx_bufs_zc(rx_ring, count,
-				    ice_alloc_buf_slow_zc);
-}
-
 /**
  * ice_bump_ntc - Bump the next_to_clean counter of an Rx ring
  * @rx_ring: Rx ring
@@ -691,77 +464,22 @@ static void ice_bump_ntc(struct ice_ring *rx_ring)
 	prefetch(ICE_RX_DESC(rx_ring, ntc));
 }
 
-/**
- * ice_get_rx_buf_zc - Fetch the current Rx buffer
- * @rx_ring: Rx ring
- * @size: size of a buffer
- *
- * This function returns the current, received Rx buffer and does
- * DMA synchronization.
- *
- * Returns a pointer to the received Rx buffer.
- */
-static struct ice_rx_buf *ice_get_rx_buf_zc(struct ice_ring *rx_ring, int size)
-{
-	struct ice_rx_buf *rx_buf;
-
-	rx_buf = &rx_ring->rx_buf[rx_ring->next_to_clean];
-
-	dma_sync_single_range_for_cpu(rx_ring->dev, rx_buf->dma, 0,
-				      size, DMA_BIDIRECTIONAL);
-
-	return rx_buf;
-}
-
-/**
- * ice_reuse_rx_buf_zc - reuse an Rx buffer
- * @rx_ring: Rx ring
- * @old_buf: The buffer to recycle
- *
- * This function recycles a finished Rx buffer, and places it on the recycle
- * queue (next_to_alloc).
- */
-static void
-ice_reuse_rx_buf_zc(struct ice_ring *rx_ring, struct ice_rx_buf *old_buf)
-{
-	unsigned long mask = (unsigned long)rx_ring->xsk_umem->chunk_mask;
-	u64 hr = rx_ring->xsk_umem->headroom + XDP_PACKET_HEADROOM;
-	u16 nta = rx_ring->next_to_alloc;
-	struct ice_rx_buf *new_buf;
-
-	new_buf = &rx_ring->rx_buf[nta++];
-	rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
-
-	new_buf->dma = old_buf->dma & mask;
-	new_buf->dma += hr;
-
-	new_buf->addr = (void *)((unsigned long)old_buf->addr & mask);
-	new_buf->addr += hr;
-
-	new_buf->handle = old_buf->handle & mask;
-	new_buf->handle += rx_ring->xsk_umem->headroom;
-
-	old_buf->addr = NULL;
-}
-
 /**
  * ice_construct_skb_zc - Create an sk_buff from zero-copy buffer
  * @rx_ring: Rx ring
  * @rx_buf: zero-copy Rx buffer
- * @xdp: XDP buffer
  *
  * This function allocates a new skb from a zero-copy Rx buffer.
  *
  * Returns the skb on success, NULL on failure.
  */
 static struct sk_buff *
-ice_construct_skb_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
-		     struct xdp_buff *xdp)
+ice_construct_skb_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
 {
-	unsigned int metasize = xdp->data - xdp->data_meta;
-	unsigned int datasize = xdp->data_end - xdp->data;
-	unsigned int datasize_hard = xdp->data_end -
-				     xdp->data_hard_start;
+	unsigned int metasize = rx_buf->xdp->data - rx_buf->xdp->data_meta;
+	unsigned int datasize = rx_buf->xdp->data_end - rx_buf->xdp->data;
+	unsigned int datasize_hard = rx_buf->xdp->data_end -
+				     rx_buf->xdp->data_hard_start;
 	struct sk_buff *skb;
 
 	skb = __napi_alloc_skb(&rx_ring->q_vector->napi, datasize_hard,
@@ -769,13 +487,13 @@ ice_construct_skb_zc(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
 	if (unlikely(!skb))
 		return NULL;
 
-	skb_reserve(skb, xdp->data - xdp->data_hard_start);
-	memcpy(__skb_put(skb, datasize), xdp->data, datasize);
+	skb_reserve(skb, rx_buf->xdp->data - rx_buf->xdp->data_hard_start);
+	memcpy(__skb_put(skb, datasize), rx_buf->xdp->data, datasize);
 	if (metasize)
 		skb_metadata_set(skb, metasize);
 
-	ice_reuse_rx_buf_zc(rx_ring, rx_buf);
-
+	xsk_buff_free(rx_buf->xdp);
+	rx_buf->xdp = NULL;
 	return skb;
 }
 
@@ -802,7 +520,6 @@ ice_run_xdp_zc(struct ice_ring *rx_ring, struct xdp_buff *xdp)
 	}
 
 	act = bpf_prog_run_xdp(xdp_prog, xdp);
-	xdp->handle += xdp->data - xdp->data_hard_start;
 	switch (act) {
 	case XDP_PASS:
 		break;
@@ -840,13 +557,8 @@ int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget)
 {
 	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
 	u16 cleaned_count = ICE_DESC_UNUSED(rx_ring);
-	struct xdp_umem *umem = rx_ring->xsk_umem;
 	unsigned int xdp_xmit = 0;
 	bool failure = false;
-	struct xdp_buff xdp;
-
-	xdp.rxq = &rx_ring->xdp_rxq;
-	xdp.frame_sz = xsk_umem_xdp_frame_sz(umem);
 
 	while (likely(total_rx_packets < (unsigned int)budget)) {
 		union ice_32b_rx_flex_desc *rx_desc;
@@ -858,8 +570,8 @@ int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget)
 		u8 rx_ptype;
 
 		if (cleaned_count >= ICE_RX_BUF_WRITE) {
-			failure |= ice_alloc_rx_bufs_fast_zc(rx_ring,
-							     cleaned_count);
+			failure |= ice_alloc_rx_bufs_zc(rx_ring,
+							cleaned_count);
 			cleaned_count = 0;
 		}
 
@@ -880,25 +592,19 @@ int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget)
 		if (!size)
 			break;
 
-		rx_buf = ice_get_rx_buf_zc(rx_ring, size);
-		if (!rx_buf->addr)
-			break;
 
-		xdp.data = rx_buf->addr;
-		xdp.data_meta = xdp.data;
-		xdp.data_hard_start = xdp.data - XDP_PACKET_HEADROOM;
-		xdp.data_end = xdp.data + size;
-		xdp.handle = rx_buf->handle;
+		rx_buf = &rx_ring->rx_buf[rx_ring->next_to_clean];
+		rx_buf->xdp->data_end = rx_buf->xdp->data + size;
+		xsk_buff_dma_sync_for_cpu(rx_buf->xdp);
 
-		xdp_res = ice_run_xdp_zc(rx_ring, &xdp);
+		xdp_res = ice_run_xdp_zc(rx_ring, rx_buf->xdp);
 		if (xdp_res) {
-			if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR)) {
+			if (xdp_res & (ICE_XDP_TX | ICE_XDP_REDIR))
 				xdp_xmit |= xdp_res;
-				rx_buf->addr = NULL;
-			} else {
-				ice_reuse_rx_buf_zc(rx_ring, rx_buf);
-			}
+			else
+				xsk_buff_free(rx_buf->xdp);
 
+			rx_buf->xdp = NULL;
 			total_rx_bytes += size;
 			total_rx_packets++;
 			cleaned_count++;
@@ -908,7 +614,7 @@ int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget)
 		}
 
 		/* XDP_PASS path */
-		skb = ice_construct_skb_zc(rx_ring, rx_buf, &xdp);
+		skb = ice_construct_skb_zc(rx_ring, rx_buf);
 		if (!skb) {
 			rx_ring->rx_stats.alloc_buf_failed++;
 			break;
@@ -979,10 +685,9 @@ static bool ice_xmit_zc(struct ice_ring *xdp_ring, int budget)
 		if (!xsk_umem_consume_tx(xdp_ring->xsk_umem, &desc))
 			break;
 
-		dma = xdp_umem_get_dma(xdp_ring->xsk_umem, desc.addr);
-
-		dma_sync_single_for_device(xdp_ring->dev, dma, desc.len,
-					   DMA_BIDIRECTIONAL);
+		dma = xsk_buff_raw_get_dma(xdp_ring->xsk_umem, desc.addr);
+		xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_umem, dma,
+						 desc.len);
 
 		tx_buf->bytecount = desc.len;
 
@@ -1165,11 +870,10 @@ void ice_xsk_clean_rx_ring(struct ice_ring *rx_ring)
 	for (i = 0; i < rx_ring->count; i++) {
 		struct ice_rx_buf *rx_buf = &rx_ring->rx_buf[i];
 
-		if (!rx_buf->addr)
+		if (!rx_buf->xdp)
 			continue;
 
-		xsk_umem_fq_reuse(rx_ring->xsk_umem, rx_buf->handle);
-		rx_buf->addr = NULL;
+		rx_buf->xdp = NULL;
 	}
 }
 
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.h b/drivers/net/ethernet/intel/ice/ice_xsk.h
index 8a4ba7c6d549..fc1a06b4df36 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.h
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.h
@@ -10,11 +10,10 @@ struct ice_vsi;
 
 #ifdef CONFIG_XDP_SOCKETS
 int ice_xsk_umem_setup(struct ice_vsi *vsi, struct xdp_umem *umem, u16 qid);
-void ice_zca_free(struct zero_copy_allocator *zca, unsigned long handle);
 int ice_clean_rx_irq_zc(struct ice_ring *rx_ring, int budget);
 bool ice_clean_tx_irq_zc(struct ice_ring *xdp_ring, int budget);
 int ice_xsk_wakeup(struct net_device *netdev, u32 queue_id, u32 flags);
-bool ice_alloc_rx_bufs_slow_zc(struct ice_ring *rx_ring, u16 count);
+bool ice_alloc_rx_bufs_zc(struct ice_ring *rx_ring, u16 count);
 bool ice_xsk_any_rx_ring_ena(struct ice_vsi *vsi);
 void ice_xsk_clean_rx_ring(struct ice_ring *rx_ring);
 void ice_xsk_clean_xdp_ring(struct ice_ring *xdp_ring);
@@ -27,12 +26,6 @@ ice_xsk_umem_setup(struct ice_vsi __always_unused *vsi,
 	return -EOPNOTSUPP;
 }
 
-static inline void
-ice_zca_free(struct zero_copy_allocator __always_unused *zca,
-	     unsigned long __always_unused handle)
-{
-}
-
 static inline int
 ice_clean_rx_irq_zc(struct ice_ring __always_unused *rx_ring,
 		    int __always_unused budget)
@@ -48,8 +41,8 @@ ice_clean_tx_irq_zc(struct ice_ring __always_unused *xdp_ring,
 }
 
 static inline bool
-ice_alloc_rx_bufs_slow_zc(struct ice_ring __always_unused *rx_ring,
-			  u16 __always_unused count)
+ice_alloc_rx_bufs_zc(struct ice_ring __always_unused *rx_ring,
+		     u16 __always_unused count)
 {
 	return false;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 7117132b22538d24728295ffafa183a0a782fe03 Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn.topel@intel.com>
Date: Wed, 20 May 2020 21:20:58 +0200
Subject: ixgbe, xsk: Migrate to new MEM_TYPE_XSK_BUFF_POOL
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove MEM_TYPE_ZERO_COPY in favor of the new MEM_TYPE_XSK_BUFF_POOL
APIs.

v1->v2: Fixed xdp_buff data_end update. (Björn)

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Cc: intel-wired-lan@lists.osuosl.org
Link: https://lore.kernel.org/bpf/20200520192103.355233-11-bjorn.topel@gmail.com
---
 drivers/net/ethernet/intel/ixgbe/ixgbe.h           |   9 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c      |  15 +-
 .../net/ethernet/intel/ixgbe/ixgbe_txrx_common.h   |   2 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c       | 307 ++++-----------------
 4 files changed, 62 insertions(+), 271 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe.h b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
index 2833e4f041ce..5ddfc83a1e46 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe.h
@@ -224,17 +224,17 @@ struct ixgbe_tx_buffer {
 };
 
 struct ixgbe_rx_buffer {
-	struct sk_buff *skb;
-	dma_addr_t dma;
 	union {
 		struct {
+			struct sk_buff *skb;
+			dma_addr_t dma;
 			struct page *page;
 			__u32 page_offset;
 			__u16 pagecnt_bias;
 		};
 		struct {
-			void *addr;
-			u64 handle;
+			bool discard;
+			struct xdp_buff *xdp;
 		};
 	};
 };
@@ -351,7 +351,6 @@ struct ixgbe_ring {
 	};
 	struct xdp_rxq_info xdp_rxq;
 	struct xdp_umem *xsk_umem;
-	struct zero_copy_allocator zca; /* ZC allocator anchor */
 	u16 ring_idx;		/* {rx,tx,xdp}_ring back reference idx */
 	u16 rx_buf_len;
 } ____cacheline_internodealigned_in_smp;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index eab5934b04f5..45fc7ce1a543 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -35,7 +35,7 @@
 #include <net/tc_act/tc_mirred.h>
 #include <net/vxlan.h>
 #include <net/mpls.h>
-#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
 #include <net/xfrm.h>
 
 #include "ixgbe.h"
@@ -3745,8 +3745,7 @@ static void ixgbe_configure_srrctl(struct ixgbe_adapter *adapter,
 
 	/* configure the packet buffer length */
 	if (rx_ring->xsk_umem) {
-		u32 xsk_buf_len = rx_ring->xsk_umem->chunk_size_nohr -
-				  XDP_PACKET_HEADROOM;
+		u32 xsk_buf_len = xsk_umem_get_rx_frame_size(rx_ring->xsk_umem);
 
 		/* If the MAC support setting RXDCTL.RLPML, the
 		 * SRRCTL[n].BSIZEPKT is set to PAGE_SIZE and
@@ -4093,11 +4092,10 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter,
 	xdp_rxq_info_unreg_mem_model(&ring->xdp_rxq);
 	ring->xsk_umem = ixgbe_xsk_umem(adapter, ring);
 	if (ring->xsk_umem) {
-		ring->zca.free = ixgbe_zca_free;
 		WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
-						   MEM_TYPE_ZERO_COPY,
-						   &ring->zca));
-
+						   MEM_TYPE_XSK_BUFF_POOL,
+						   NULL));
+		xsk_buff_set_rxq_info(ring->xsk_umem, &ring->xdp_rxq);
 	} else {
 		WARN_ON(xdp_rxq_info_reg_mem_model(&ring->xdp_rxq,
 						   MEM_TYPE_PAGE_SHARED, NULL));
@@ -4153,8 +4151,7 @@ void ixgbe_configure_rx_ring(struct ixgbe_adapter *adapter,
 	}
 
 	if (ring->xsk_umem && hw->mac.type != ixgbe_mac_82599EB) {
-		u32 xsk_buf_len = ring->xsk_umem->chunk_size_nohr -
-				  XDP_PACKET_HEADROOM;
+		u32 xsk_buf_len = xsk_umem_get_rx_frame_size(ring->xsk_umem);
 
 		rxdctl &= ~(IXGBE_RXDCTL_RLPMLMASK |
 			    IXGBE_RXDCTL_RLPML_EN);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h b/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h
index 6d01700b46bc..7887ae4aaf4f 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_txrx_common.h
@@ -35,7 +35,7 @@ int ixgbe_xsk_umem_setup(struct ixgbe_adapter *adapter, struct xdp_umem *umem,
 
 void ixgbe_zca_free(struct zero_copy_allocator *alloc, unsigned long handle);
 
-void ixgbe_alloc_rx_buffers_zc(struct ixgbe_ring *rx_ring, u16 cleaned_count);
+bool ixgbe_alloc_rx_buffers_zc(struct ixgbe_ring *rx_ring, u16 cleaned_count);
 int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector,
 			  struct ixgbe_ring *rx_ring,
 			  const int budget);
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
index 82e4effae704..86add9fbd36c 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
@@ -20,54 +20,11 @@ struct xdp_umem *ixgbe_xsk_umem(struct ixgbe_adapter *adapter,
 	return xdp_get_umem_from_qid(adapter->netdev, qid);
 }
 
-static int ixgbe_xsk_umem_dma_map(struct ixgbe_adapter *adapter,
-				  struct xdp_umem *umem)
-{
-	struct device *dev = &adapter->pdev->dev;
-	unsigned int i, j;
-	dma_addr_t dma;
-
-	for (i = 0; i < umem->npgs; i++) {
-		dma = dma_map_page_attrs(dev, umem->pgs[i], 0, PAGE_SIZE,
-					 DMA_BIDIRECTIONAL, IXGBE_RX_DMA_ATTR);
-		if (dma_mapping_error(dev, dma))
-			goto out_unmap;
-
-		umem->pages[i].dma = dma;
-	}
-
-	return 0;
-
-out_unmap:
-	for (j = 0; j < i; j++) {
-		dma_unmap_page_attrs(dev, umem->pages[i].dma, PAGE_SIZE,
-				     DMA_BIDIRECTIONAL, IXGBE_RX_DMA_ATTR);
-		umem->pages[i].dma = 0;
-	}
-
-	return -1;
-}
-
-static void ixgbe_xsk_umem_dma_unmap(struct ixgbe_adapter *adapter,
-				     struct xdp_umem *umem)
-{
-	struct device *dev = &adapter->pdev->dev;
-	unsigned int i;
-
-	for (i = 0; i < umem->npgs; i++) {
-		dma_unmap_page_attrs(dev, umem->pages[i].dma, PAGE_SIZE,
-				     DMA_BIDIRECTIONAL, IXGBE_RX_DMA_ATTR);
-
-		umem->pages[i].dma = 0;
-	}
-}
-
 static int ixgbe_xsk_umem_enable(struct ixgbe_adapter *adapter,
 				 struct xdp_umem *umem,
 				 u16 qid)
 {
 	struct net_device *netdev = adapter->netdev;
-	struct xdp_umem_fq_reuse *reuseq;
 	bool if_running;
 	int err;
 
@@ -78,13 +35,7 @@ static int ixgbe_xsk_umem_enable(struct ixgbe_adapter *adapter,
 	    qid >= netdev->real_num_tx_queues)
 		return -EINVAL;
 
-	reuseq = xsk_reuseq_prepare(adapter->rx_ring[0]->count);
-	if (!reuseq)
-		return -ENOMEM;
-
-	xsk_reuseq_free(xsk_reuseq_swap(umem, reuseq));
-
-	err = ixgbe_xsk_umem_dma_map(adapter, umem);
+	err = xsk_buff_dma_map(umem, &adapter->pdev->dev, IXGBE_RX_DMA_ATTR);
 	if (err)
 		return err;
 
@@ -124,7 +75,7 @@ static int ixgbe_xsk_umem_disable(struct ixgbe_adapter *adapter, u16 qid)
 		ixgbe_txrx_ring_disable(adapter, qid);
 
 	clear_bit(qid, adapter->af_xdp_zc_qps);
-	ixgbe_xsk_umem_dma_unmap(adapter, umem);
+	xsk_buff_dma_unmap(umem, IXGBE_RX_DMA_ATTR);
 
 	if (if_running)
 		ixgbe_txrx_ring_enable(adapter, qid);
@@ -143,19 +94,14 @@ static int ixgbe_run_xdp_zc(struct ixgbe_adapter *adapter,
 			    struct ixgbe_ring *rx_ring,
 			    struct xdp_buff *xdp)
 {
-	struct xdp_umem *umem = rx_ring->xsk_umem;
 	int err, result = IXGBE_XDP_PASS;
 	struct bpf_prog *xdp_prog;
 	struct xdp_frame *xdpf;
-	u64 offset;
 	u32 act;
 
 	rcu_read_lock();
 	xdp_prog = READ_ONCE(rx_ring->xdp_prog);
 	act = bpf_prog_run_xdp(xdp_prog, xdp);
-	offset = xdp->data - xdp->data_hard_start;
-
-	xdp->handle = xsk_umem_adjust_offset(umem, xdp->handle, offset);
 
 	switch (act) {
 	case XDP_PASS:
@@ -186,140 +132,16 @@ static int ixgbe_run_xdp_zc(struct ixgbe_adapter *adapter,
 	return result;
 }
 
-static struct
-ixgbe_rx_buffer *ixgbe_get_rx_buffer_zc(struct ixgbe_ring *rx_ring,
-					unsigned int size)
-{
-	struct ixgbe_rx_buffer *bi;
-
-	bi = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
-
-	/* we are reusing so sync this buffer for CPU use */
-	dma_sync_single_range_for_cpu(rx_ring->dev,
-				      bi->dma, 0,
-				      size,
-				      DMA_BIDIRECTIONAL);
-
-	return bi;
-}
-
-static void ixgbe_reuse_rx_buffer_zc(struct ixgbe_ring *rx_ring,
-				     struct ixgbe_rx_buffer *obi)
-{
-	u16 nta = rx_ring->next_to_alloc;
-	struct ixgbe_rx_buffer *nbi;
-
-	nbi = &rx_ring->rx_buffer_info[rx_ring->next_to_alloc];
-	/* update, and store next to alloc */
-	nta++;
-	rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
-
-	/* transfer page from old buffer to new buffer */
-	nbi->dma = obi->dma;
-	nbi->addr = obi->addr;
-	nbi->handle = obi->handle;
-
-	obi->addr = NULL;
-	obi->skb = NULL;
-}
-
-void ixgbe_zca_free(struct zero_copy_allocator *alloc, unsigned long handle)
-{
-	struct ixgbe_rx_buffer *bi;
-	struct ixgbe_ring *rx_ring;
-	u64 hr, mask;
-	u16 nta;
-
-	rx_ring = container_of(alloc, struct ixgbe_ring, zca);
-	hr = rx_ring->xsk_umem->headroom + XDP_PACKET_HEADROOM;
-	mask = rx_ring->xsk_umem->chunk_mask;
-
-	nta = rx_ring->next_to_alloc;
-	bi = rx_ring->rx_buffer_info;
-
-	nta++;
-	rx_ring->next_to_alloc = (nta < rx_ring->count) ? nta : 0;
-
-	handle &= mask;
-
-	bi->dma = xdp_umem_get_dma(rx_ring->xsk_umem, handle);
-	bi->dma += hr;
-
-	bi->addr = xdp_umem_get_data(rx_ring->xsk_umem, handle);
-	bi->addr += hr;
-
-	bi->handle = xsk_umem_adjust_offset(rx_ring->xsk_umem, (u64)handle,
-					    rx_ring->xsk_umem->headroom);
-}
-
-static bool ixgbe_alloc_buffer_zc(struct ixgbe_ring *rx_ring,
-				  struct ixgbe_rx_buffer *bi)
-{
-	struct xdp_umem *umem = rx_ring->xsk_umem;
-	void *addr = bi->addr;
-	u64 handle, hr;
-
-	if (addr)
-		return true;
-
-	if (!xsk_umem_peek_addr(umem, &handle)) {
-		rx_ring->rx_stats.alloc_rx_page_failed++;
-		return false;
-	}
-
-	hr = umem->headroom + XDP_PACKET_HEADROOM;
-
-	bi->dma = xdp_umem_get_dma(umem, handle);
-	bi->dma += hr;
-
-	bi->addr = xdp_umem_get_data(umem, handle);
-	bi->addr += hr;
-
-	bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom);
-
-	xsk_umem_release_addr(umem);
-	return true;
-}
-
-static bool ixgbe_alloc_buffer_slow_zc(struct ixgbe_ring *rx_ring,
-				       struct ixgbe_rx_buffer *bi)
-{
-	struct xdp_umem *umem = rx_ring->xsk_umem;
-	u64 handle, hr;
-
-	if (!xsk_umem_peek_addr_rq(umem, &handle)) {
-		rx_ring->rx_stats.alloc_rx_page_failed++;
-		return false;
-	}
-
-	handle &= rx_ring->xsk_umem->chunk_mask;
-
-	hr = umem->headroom + XDP_PACKET_HEADROOM;
-
-	bi->dma = xdp_umem_get_dma(umem, handle);
-	bi->dma += hr;
-
-	bi->addr = xdp_umem_get_data(umem, handle);
-	bi->addr += hr;
-
-	bi->handle = xsk_umem_adjust_offset(umem, handle, umem->headroom);
-
-	xsk_umem_release_addr_rq(umem);
-	return true;
-}
-
-static __always_inline bool
-__ixgbe_alloc_rx_buffers_zc(struct ixgbe_ring *rx_ring, u16 cleaned_count,
-			    bool alloc(struct ixgbe_ring *rx_ring,
-				       struct ixgbe_rx_buffer *bi))
+bool ixgbe_alloc_rx_buffers_zc(struct ixgbe_ring *rx_ring, u16 count)
 {
 	union ixgbe_adv_rx_desc *rx_desc;
 	struct ixgbe_rx_buffer *bi;
 	u16 i = rx_ring->next_to_use;
+	dma_addr_t dma;
 	bool ok = true;
 
 	/* nothing to do */
-	if (!cleaned_count)
+	if (!count)
 		return true;
 
 	rx_desc = IXGBE_RX_DESC(rx_ring, i);
@@ -327,21 +149,18 @@ __ixgbe_alloc_rx_buffers_zc(struct ixgbe_ring *rx_ring, u16 cleaned_count,
 	i -= rx_ring->count;
 
 	do {
-		if (!alloc(rx_ring, bi)) {
+		bi->xdp = xsk_buff_alloc(rx_ring->xsk_umem);
+		if (!bi->xdp) {
 			ok = false;
 			break;
 		}
 
-		/* sync the buffer for use by the device */
-		dma_sync_single_range_for_device(rx_ring->dev, bi->dma,
-						 bi->page_offset,
-						 rx_ring->rx_buf_len,
-						 DMA_BIDIRECTIONAL);
+		dma = xsk_buff_xdp_get_dma(bi->xdp);
 
 		/* Refresh the desc even if buffer_addrs didn't change
 		 * because each write-back erases this info.
 		 */
-		rx_desc->read.pkt_addr = cpu_to_le64(bi->dma);
+		rx_desc->read.pkt_addr = cpu_to_le64(dma);
 
 		rx_desc++;
 		bi++;
@@ -355,17 +174,14 @@ __ixgbe_alloc_rx_buffers_zc(struct ixgbe_ring *rx_ring, u16 cleaned_count,
 		/* clear the length for the next_to_use descriptor */
 		rx_desc->wb.upper.length = 0;
 
-		cleaned_count--;
-	} while (cleaned_count);
+		count--;
+	} while (count);
 
 	i += rx_ring->count;
 
 	if (rx_ring->next_to_use != i) {
 		rx_ring->next_to_use = i;
 
-		/* update next to alloc since we have filled the ring */
-		rx_ring->next_to_alloc = i;
-
 		/* Force memory writes to complete before letting h/w
 		 * know there are new descriptors to fetch.  (Only
 		 * applicable for weak-ordered memory model archs,
@@ -378,40 +194,27 @@ __ixgbe_alloc_rx_buffers_zc(struct ixgbe_ring *rx_ring, u16 cleaned_count,
 	return ok;
 }
 
-void ixgbe_alloc_rx_buffers_zc(struct ixgbe_ring *rx_ring, u16 count)
-{
-	__ixgbe_alloc_rx_buffers_zc(rx_ring, count,
-				    ixgbe_alloc_buffer_slow_zc);
-}
-
-static bool ixgbe_alloc_rx_buffers_fast_zc(struct ixgbe_ring *rx_ring,
-					   u16 count)
-{
-	return __ixgbe_alloc_rx_buffers_zc(rx_ring, count,
-					   ixgbe_alloc_buffer_zc);
-}
-
 static struct sk_buff *ixgbe_construct_skb_zc(struct ixgbe_ring *rx_ring,
-					      struct ixgbe_rx_buffer *bi,
-					      struct xdp_buff *xdp)
+					      struct ixgbe_rx_buffer *bi)
 {
-	unsigned int metasize = xdp->data - xdp->data_meta;
-	unsigned int datasize = xdp->data_end - xdp->data;
+	unsigned int metasize = bi->xdp->data - bi->xdp->data_meta;
+	unsigned int datasize = bi->xdp->data_end - bi->xdp->data;
 	struct sk_buff *skb;
 
 	/* allocate a skb to store the frags */
 	skb = __napi_alloc_skb(&rx_ring->q_vector->napi,
-			       xdp->data_end - xdp->data_hard_start,
+			       bi->xdp->data_end - bi->xdp->data_hard_start,
 			       GFP_ATOMIC | __GFP_NOWARN);
 	if (unlikely(!skb))
 		return NULL;
 
-	skb_reserve(skb, xdp->data - xdp->data_hard_start);
-	memcpy(__skb_put(skb, datasize), xdp->data, datasize);
+	skb_reserve(skb, bi->xdp->data - bi->xdp->data_hard_start);
+	memcpy(__skb_put(skb, datasize), bi->xdp->data, datasize);
 	if (metasize)
 		skb_metadata_set(skb, metasize);
 
-	ixgbe_reuse_rx_buffer_zc(rx_ring, bi);
+	xsk_buff_free(bi->xdp);
+	bi->xdp = NULL;
 	return skb;
 }
 
@@ -431,14 +234,9 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector,
 	unsigned int total_rx_bytes = 0, total_rx_packets = 0;
 	struct ixgbe_adapter *adapter = q_vector->adapter;
 	u16 cleaned_count = ixgbe_desc_unused(rx_ring);
-	struct xdp_umem *umem = rx_ring->xsk_umem;
 	unsigned int xdp_res, xdp_xmit = 0;
 	bool failure = false;
 	struct sk_buff *skb;
-	struct xdp_buff xdp;
-
-	xdp.rxq = &rx_ring->xdp_rxq;
-	xdp.frame_sz = xsk_umem_xdp_frame_sz(umem);
 
 	while (likely(total_rx_packets < budget)) {
 		union ixgbe_adv_rx_desc *rx_desc;
@@ -448,8 +246,8 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector,
 		/* return some buffers to hardware, one at a time is too slow */
 		if (cleaned_count >= IXGBE_RX_BUFFER_WRITE) {
 			failure = failure ||
-				  !ixgbe_alloc_rx_buffers_fast_zc(rx_ring,
-								 cleaned_count);
+				  !ixgbe_alloc_rx_buffers_zc(rx_ring,
+							     cleaned_count);
 			cleaned_count = 0;
 		}
 
@@ -464,42 +262,40 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector,
 		 */
 		dma_rmb();
 
-		bi = ixgbe_get_rx_buffer_zc(rx_ring, size);
+		bi = &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
 
 		if (unlikely(!ixgbe_test_staterr(rx_desc,
 						 IXGBE_RXD_STAT_EOP))) {
 			struct ixgbe_rx_buffer *next_bi;
 
-			ixgbe_reuse_rx_buffer_zc(rx_ring, bi);
+			xsk_buff_free(bi->xdp);
+			bi->xdp = NULL;
 			ixgbe_inc_ntc(rx_ring);
 			next_bi =
 			       &rx_ring->rx_buffer_info[rx_ring->next_to_clean];
-			next_bi->skb = ERR_PTR(-EINVAL);
+			next_bi->discard = true;
 			continue;
 		}
 
-		if (unlikely(bi->skb)) {
-			ixgbe_reuse_rx_buffer_zc(rx_ring, bi);
+		if (unlikely(bi->discard)) {
+			xsk_buff_free(bi->xdp);
+			bi->xdp = NULL;
+			bi->discard = false;
 			ixgbe_inc_ntc(rx_ring);
 			continue;
 		}
 
-		xdp.data = bi->addr;
-		xdp.data_meta = xdp.data;
-		xdp.data_hard_start = xdp.data - XDP_PACKET_HEADROOM;
-		xdp.data_end = xdp.data + size;
-		xdp.handle = bi->handle;
-
-		xdp_res = ixgbe_run_xdp_zc(adapter, rx_ring, &xdp);
+		bi->xdp->data_end = bi->xdp->data + size;
+		xsk_buff_dma_sync_for_cpu(bi->xdp);
+		xdp_res = ixgbe_run_xdp_zc(adapter, rx_ring, bi->xdp);
 
 		if (xdp_res) {
-			if (xdp_res & (IXGBE_XDP_TX | IXGBE_XDP_REDIR)) {
+			if (xdp_res & (IXGBE_XDP_TX | IXGBE_XDP_REDIR))
 				xdp_xmit |= xdp_res;
-				bi->addr = NULL;
-				bi->skb = NULL;
-			} else {
-				ixgbe_reuse_rx_buffer_zc(rx_ring, bi);
-			}
+			else
+				xsk_buff_free(bi->xdp);
+
+			bi->xdp = NULL;
 			total_rx_packets++;
 			total_rx_bytes += size;
 
@@ -509,7 +305,7 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector,
 		}
 
 		/* XDP_PASS path */
-		skb = ixgbe_construct_skb_zc(rx_ring, bi, &xdp);
+		skb = ixgbe_construct_skb_zc(rx_ring, bi);
 		if (!skb) {
 			rx_ring->rx_stats.alloc_rx_buff_failed++;
 			break;
@@ -561,17 +357,17 @@ int ixgbe_clean_rx_irq_zc(struct ixgbe_q_vector *q_vector,
 
 void ixgbe_xsk_clean_rx_ring(struct ixgbe_ring *rx_ring)
 {
-	u16 i = rx_ring->next_to_clean;
-	struct ixgbe_rx_buffer *bi = &rx_ring->rx_buffer_info[i];
+	struct ixgbe_rx_buffer *bi;
+	u16 i;
 
-	while (i != rx_ring->next_to_alloc) {
-		xsk_umem_fq_reuse(rx_ring->xsk_umem, bi->handle);
-		i++;
-		bi++;
-		if (i == rx_ring->count) {
-			i = 0;
-			bi = rx_ring->rx_buffer_info;
-		}
+	for (i = 0; i < rx_ring->count; i++) {
+		bi = &rx_ring->rx_buffer_info[i];
+
+		if (!bi->xdp)
+			continue;
+
+		xsk_buff_free(bi->xdp);
+		bi->xdp = NULL;
 	}
 }
 
@@ -594,10 +390,9 @@ static bool ixgbe_xmit_zc(struct ixgbe_ring *xdp_ring, unsigned int budget)
 		if (!xsk_umem_consume_tx(xdp_ring->xsk_umem, &desc))
 			break;
 
-		dma = xdp_umem_get_dma(xdp_ring->xsk_umem, desc.addr);
-
-		dma_sync_single_for_device(xdp_ring->dev, dma, desc.len,
-					   DMA_BIDIRECTIONAL);
+		dma = xsk_buff_raw_get_dma(xdp_ring->xsk_umem, desc.addr);
+		xsk_buff_raw_dma_sync_for_device(xdp_ring->xsk_umem, dma,
+						 desc.len);
 
 		tx_bi = &xdp_ring->tx_buffer_info[xdp_ring->next_to_use];
 		tx_bi->bytecount = desc.len;
-- 
cgit v1.2.3-59-g8ed1b


From 39d6443c8daf9fefcfcf89de7ae87240956a0b84 Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn.topel@intel.com>
Date: Wed, 20 May 2020 21:20:59 +0200
Subject: mlx5, xsk: Migrate to new MEM_TYPE_XSK_BUFF_POOL
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Use the new MEM_TYPE_XSK_BUFF_POOL API in lieu of MEM_TYPE_ZERO_COPY in
mlx5e. It allows to drop a lot of code from the driver (which is now
common in AF_XDP core and was related to XSK RX frame allocation, DMA
mapping, etc.) and slightly improve performance (RX +0.8 Mpps, TX +0.4
Mpps).

rfc->v1: Put back the sanity check for XSK params, use XSK API to get
         the total headroom size. (Maxim)

v1->v2: Fix DMA address handling, set XDP metadata to invalid. (Maxim)

v2->v3: Handle frame_sz, use xsk_buff_xdp_get_frame_dma, use xsk_buff
        API for DMA sync on TX, add performance numbers. (Maxim)

v3->v4: Remove unused variable num_xsk_frames. (Jakub)

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200520192103.355233-12-bjorn.topel@gmail.com
---
 drivers/net/ethernet/mellanox/mlx5/core/en.h       |   7 +-
 .../net/ethernet/mellanox/mlx5/core/en/params.c    |  13 +--
 drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c   |  31 ++----
 drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h   |   2 +-
 .../net/ethernet/mellanox/mlx5/core/en/xsk/rx.c    | 113 ++++-----------------
 .../net/ethernet/mellanox/mlx5/core/en/xsk/rx.h    |  23 +++--
 .../net/ethernet/mellanox/mlx5/core/en/xsk/tx.c    |   9 +-
 .../net/ethernet/mellanox/mlx5/core/en/xsk/umem.c  |  49 ++-------
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |  25 +----
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c    |  34 ++++++-
 10 files changed, 96 insertions(+), 210 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en.h b/drivers/net/ethernet/mellanox/mlx5/core/en.h
index 26911b15f8fe..0a02b804b2fe 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en.h
@@ -407,10 +407,7 @@ struct mlx5e_dma_info {
 	dma_addr_t addr;
 	union {
 		struct page *page;
-		struct {
-			u64 handle;
-			void *data;
-		} xsk;
+		struct xdp_buff *xsk;
 	};
 };
 
@@ -623,7 +620,6 @@ struct mlx5e_rq {
 		} mpwqe;
 	};
 	struct {
-		u16            umem_headroom;
 		u16            headroom;
 		u32            frame0_sz;
 		u8             map_dir;   /* dma map direction */
@@ -656,7 +652,6 @@ struct mlx5e_rq {
 	struct page_pool      *page_pool;
 
 	/* AF_XDP zero-copy */
-	struct zero_copy_allocator zca;
 	struct xdp_umem       *umem;
 
 	struct work_struct     recover_work;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
index eb2e1f2138e4..38e4f19d69f8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/params.c
@@ -12,15 +12,16 @@ static inline bool mlx5e_rx_is_xdp(struct mlx5e_params *params,
 u16 mlx5e_get_linear_rq_headroom(struct mlx5e_params *params,
 				 struct mlx5e_xsk_param *xsk)
 {
-	u16 headroom = NET_IP_ALIGN;
+	u16 headroom;
 
-	if (mlx5e_rx_is_xdp(params, xsk)) {
+	if (xsk)
+		return xsk->headroom;
+
+	headroom = NET_IP_ALIGN;
+	if (mlx5e_rx_is_xdp(params, xsk))
 		headroom += XDP_PACKET_HEADROOM;
-		if (xsk)
-			headroom += xsk->headroom;
-	} else {
+	else
 		headroom += MLX5_RX_HEADROOM;
-	}
 
 	return headroom;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index 3507d23f0eb8..a2a194525b15 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -71,7 +71,7 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
 	xdptxd.data = xdpf->data;
 	xdptxd.len  = xdpf->len;
 
-	if (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY) {
+	if (xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL) {
 		/* The xdp_buff was in the UMEM and was copied into a newly
 		 * allocated page. The UMEM page was returned via the ZCA, and
 		 * this new page has to be mapped at this point and has to be
@@ -119,50 +119,33 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
 
 /* returns true if packet was consumed by xdp */
 bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
-		      void *va, u16 *rx_headroom, u32 *len, bool xsk)
+		      u32 *len, struct xdp_buff *xdp)
 {
 	struct bpf_prog *prog = READ_ONCE(rq->xdp_prog);
-	struct xdp_umem *umem = rq->umem;
-	struct xdp_buff xdp;
 	u32 act;
 	int err;
 
 	if (!prog)
 		return false;
 
-	xdp.data = va + *rx_headroom;
-	xdp_set_data_meta_invalid(&xdp);
-	xdp.data_end = xdp.data + *len;
-	xdp.data_hard_start = va;
-	if (xsk)
-		xdp.handle = di->xsk.handle;
-	xdp.rxq = &rq->xdp_rxq;
-	xdp.frame_sz = rq->buff.frame0_sz;
-
-	act = bpf_prog_run_xdp(prog, &xdp);
-	if (xsk) {
-		u64 off = xdp.data - xdp.data_hard_start;
-
-		xdp.handle = xsk_umem_adjust_offset(umem, xdp.handle, off);
-	}
+	act = bpf_prog_run_xdp(prog, xdp);
 	switch (act) {
 	case XDP_PASS:
-		*rx_headroom = xdp.data - xdp.data_hard_start;
-		*len = xdp.data_end - xdp.data;
+		*len = xdp->data_end - xdp->data;
 		return false;
 	case XDP_TX:
-		if (unlikely(!mlx5e_xmit_xdp_buff(rq->xdpsq, rq, di, &xdp)))
+		if (unlikely(!mlx5e_xmit_xdp_buff(rq->xdpsq, rq, di, xdp)))
 			goto xdp_abort;
 		__set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags); /* non-atomic */
 		return true;
 	case XDP_REDIRECT:
 		/* When XDP enabled then page-refcnt==1 here */
-		err = xdp_do_redirect(rq->netdev, &xdp, prog);
+		err = xdp_do_redirect(rq->netdev, xdp, prog);
 		if (unlikely(err))
 			goto xdp_abort;
 		__set_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags);
 		__set_bit(MLX5E_RQ_FLAG_XDP_REDIRECT, rq->flags);
-		if (!xsk)
+		if (xdp->rxq->mem.type != MEM_TYPE_XSK_BUFF_POOL)
 			mlx5e_page_dma_unmap(rq, di);
 		rq->stats->xdp_redirect++;
 		return true;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
index e2e01f064c1e..2e4e117aeb49 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.h
@@ -63,7 +63,7 @@
 struct mlx5e_xsk_param;
 int mlx5e_xdp_max_mtu(struct mlx5e_params *params, struct mlx5e_xsk_param *xsk);
 bool mlx5e_xdp_handle(struct mlx5e_rq *rq, struct mlx5e_dma_info *di,
-		      void *va, u16 *rx_headroom, u32 *len, bool xsk);
+		      u32 *len, struct xdp_buff *xdp);
 void mlx5e_xdp_mpwqe_complete(struct mlx5e_xdpsq *sq);
 bool mlx5e_poll_xdpsq_cq(struct mlx5e_cq *cq);
 void mlx5e_free_xdpsq_descs(struct mlx5e_xdpsq *sq);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
index 62fc8a128a8d..a33a1f762c70 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.c
@@ -3,71 +3,10 @@
 
 #include "rx.h"
 #include "en/xdp.h"
-#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
 
 /* RX data path */
 
-bool mlx5e_xsk_pages_enough_umem(struct mlx5e_rq *rq, int count)
-{
-	/* Check in advance that we have enough frames, instead of allocating
-	 * one-by-one, failing and moving frames to the Reuse Ring.
-	 */
-	return xsk_umem_has_addrs_rq(rq->umem, count);
-}
-
-int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq,
-			      struct mlx5e_dma_info *dma_info)
-{
-	struct xdp_umem *umem = rq->umem;
-	u64 handle;
-
-	if (!xsk_umem_peek_addr_rq(umem, &handle))
-		return -ENOMEM;
-
-	dma_info->xsk.handle = xsk_umem_adjust_offset(umem, handle,
-						      rq->buff.umem_headroom);
-	dma_info->xsk.data = xdp_umem_get_data(umem, dma_info->xsk.handle);
-
-	/* No need to add headroom to the DMA address. In striding RQ case, we
-	 * just provide pages for UMR, and headroom is counted at the setup
-	 * stage when creating a WQE. In non-striding RQ case, headroom is
-	 * accounted in mlx5e_alloc_rx_wqe.
-	 */
-	dma_info->addr = xdp_umem_get_dma(umem, handle);
-
-	xsk_umem_release_addr_rq(umem);
-
-	dma_sync_single_for_device(rq->pdev, dma_info->addr, PAGE_SIZE,
-				   DMA_BIDIRECTIONAL);
-
-	return 0;
-}
-
-static inline void mlx5e_xsk_recycle_frame(struct mlx5e_rq *rq, u64 handle)
-{
-	xsk_umem_fq_reuse(rq->umem, handle & rq->umem->chunk_mask);
-}
-
-/* XSKRQ uses pages from UMEM, they must not be released. They are returned to
- * the userspace if possible, and if not, this function is called to reuse them
- * in the driver.
- */
-void mlx5e_xsk_page_release(struct mlx5e_rq *rq,
-			    struct mlx5e_dma_info *dma_info)
-{
-	mlx5e_xsk_recycle_frame(rq, dma_info->xsk.handle);
-}
-
-/* Return a frame back to the hardware to fill in again. It is used by XDP when
- * the XDP program returns XDP_TX or XDP_REDIRECT not to an XSKMAP.
- */
-void mlx5e_xsk_zca_free(struct zero_copy_allocator *zca, unsigned long handle)
-{
-	struct mlx5e_rq *rq = container_of(zca, struct mlx5e_rq, zca);
-
-	mlx5e_xsk_recycle_frame(rq, handle);
-}
-
 static struct sk_buff *mlx5e_xsk_construct_skb(struct mlx5e_rq *rq, void *data,
 					       u32 cqe_bcnt)
 {
@@ -90,11 +29,8 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
 						    u32 head_offset,
 						    u32 page_idx)
 {
-	struct mlx5e_dma_info *di = &wi->umr.dma_info[page_idx];
-	u16 rx_headroom = rq->buff.headroom - rq->buff.umem_headroom;
+	struct xdp_buff *xdp = wi->umr.dma_info[page_idx].xsk;
 	u32 cqe_bcnt32 = cqe_bcnt;
-	void *va, *data;
-	u32 frag_size;
 	bool consumed;
 
 	/* Check packet size. Note LRO doesn't use linear SKB */
@@ -103,22 +39,20 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
 		return NULL;
 	}
 
-	/* head_offset is not used in this function, because di->xsk.data and
-	 * di->addr point directly to the necessary place. Furthermore, in the
-	 * current implementation, UMR pages are mapped to XSK frames, so
+	/* head_offset is not used in this function, because xdp->data and the
+	 * DMA address point directly to the necessary place. Furthermore, in
+	 * the current implementation, UMR pages are mapped to XSK frames, so
 	 * head_offset should always be 0.
 	 */
 	WARN_ON_ONCE(head_offset);
 
-	va             = di->xsk.data;
-	data           = va + rx_headroom;
-	frag_size      = rq->buff.headroom + cqe_bcnt32;
-
-	dma_sync_single_for_cpu(rq->pdev, di->addr, frag_size, DMA_BIDIRECTIONAL);
-	prefetch(data);
+	xdp->data_end = xdp->data + cqe_bcnt32;
+	xdp_set_data_meta_invalid(xdp);
+	xsk_buff_dma_sync_for_cpu(xdp);
+	prefetch(xdp->data);
 
 	rcu_read_lock();
-	consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt32, true);
+	consumed = mlx5e_xdp_handle(rq, NULL, &cqe_bcnt32, xdp);
 	rcu_read_unlock();
 
 	/* Possible flows:
@@ -145,7 +79,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
 	/* XDP_PASS: copy the data from the UMEM to a new SKB and reuse the
 	 * frame. On SKB allocation failure, NULL is returned.
 	 */
-	return mlx5e_xsk_construct_skb(rq, data, cqe_bcnt32);
+	return mlx5e_xsk_construct_skb(rq, xdp->data, cqe_bcnt32);
 }
 
 struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
@@ -153,25 +87,20 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
 					      struct mlx5e_wqe_frag_info *wi,
 					      u32 cqe_bcnt)
 {
-	struct mlx5e_dma_info *di = wi->di;
-	u16 rx_headroom = rq->buff.headroom - rq->buff.umem_headroom;
-	void *va, *data;
+	struct xdp_buff *xdp = wi->di->xsk;
 	bool consumed;
-	u32 frag_size;
 
-	/* wi->offset is not used in this function, because di->xsk.data and
-	 * di->addr point directly to the necessary place. Furthermore, in the
-	 * current implementation, one page = one packet = one frame, so
+	/* wi->offset is not used in this function, because xdp->data and the
+	 * DMA address point directly to the necessary place. Furthermore, the
+	 * XSK allocator allocates frames per packet, instead of pages, so
 	 * wi->offset should always be 0.
 	 */
 	WARN_ON_ONCE(wi->offset);
 
-	va             = di->xsk.data;
-	data           = va + rx_headroom;
-	frag_size      = rq->buff.headroom + cqe_bcnt;
-
-	dma_sync_single_for_cpu(rq->pdev, di->addr, frag_size, DMA_BIDIRECTIONAL);
-	prefetch(data);
+	xdp->data_end = xdp->data + cqe_bcnt;
+	xdp_set_data_meta_invalid(xdp);
+	xsk_buff_dma_sync_for_cpu(xdp);
+	prefetch(xdp->data);
 
 	if (unlikely(get_cqe_opcode(cqe) != MLX5_CQE_RESP_SEND)) {
 		rq->stats->wqe_err++;
@@ -179,7 +108,7 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
 	}
 
 	rcu_read_lock();
-	consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt, true);
+	consumed = mlx5e_xdp_handle(rq, NULL, &cqe_bcnt, xdp);
 	rcu_read_unlock();
 
 	if (likely(consumed))
@@ -189,5 +118,5 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
 	 * will be handled by mlx5e_put_rx_frag.
 	 * On SKB allocation failure, NULL is returned.
 	 */
-	return mlx5e_xsk_construct_skb(rq, data, cqe_bcnt);
+	return mlx5e_xsk_construct_skb(rq, xdp->data, cqe_bcnt);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
index a8e11adbf426..d147b2f13b54 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/rx.h
@@ -9,12 +9,6 @@
 
 /* RX data path */
 
-bool mlx5e_xsk_pages_enough_umem(struct mlx5e_rq *rq, int count);
-int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq,
-			      struct mlx5e_dma_info *dma_info);
-void mlx5e_xsk_page_release(struct mlx5e_rq *rq,
-			    struct mlx5e_dma_info *dma_info);
-void mlx5e_xsk_zca_free(struct zero_copy_allocator *zca, unsigned long handle);
 struct sk_buff *mlx5e_xsk_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq,
 						    struct mlx5e_mpw_info *wi,
 						    u16 cqe_bcnt,
@@ -25,6 +19,23 @@ struct sk_buff *mlx5e_xsk_skb_from_cqe_linear(struct mlx5e_rq *rq,
 					      struct mlx5e_wqe_frag_info *wi,
 					      u32 cqe_bcnt);
 
+static inline int mlx5e_xsk_page_alloc_umem(struct mlx5e_rq *rq,
+					    struct mlx5e_dma_info *dma_info)
+{
+	dma_info->xsk = xsk_buff_alloc(rq->umem);
+	if (!dma_info->xsk)
+		return -ENOMEM;
+
+	/* Store the DMA address without headroom. In striding RQ case, we just
+	 * provide pages for UMR, and headroom is counted at the setup stage
+	 * when creating a WQE. In non-striding RQ case, headroom is accounted
+	 * in mlx5e_alloc_rx_wqe.
+	 */
+	dma_info->addr = xsk_buff_xdp_get_frame_dma(dma_info->xsk);
+
+	return 0;
+}
+
 static inline bool mlx5e_xsk_update_rx_wakeup(struct mlx5e_rq *rq, bool alloc_err)
 {
 	if (!xsk_umem_uses_need_wakeup(rq->umem))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
index 3bcdb5b2fc20..83dce9cdb8c2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/tx.c
@@ -5,7 +5,7 @@
 #include "umem.h"
 #include "en/xdp.h"
 #include "en/params.h"
-#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
 
 int mlx5e_xsk_wakeup(struct net_device *dev, u32 qid, u32 flags)
 {
@@ -92,12 +92,11 @@ bool mlx5e_xsk_tx(struct mlx5e_xdpsq *sq, unsigned int budget)
 			break;
 		}
 
-		xdptxd.dma_addr = xdp_umem_get_dma(umem, desc.addr);
-		xdptxd.data = xdp_umem_get_data(umem, desc.addr);
+		xdptxd.dma_addr = xsk_buff_raw_get_dma(umem, desc.addr);
+		xdptxd.data = xsk_buff_raw_get_data(umem, desc.addr);
 		xdptxd.len = desc.len;
 
-		dma_sync_single_for_device(sq->pdev, xdptxd.dma_addr,
-					   xdptxd.len, DMA_BIDIRECTIONAL);
+		xsk_buff_raw_dma_sync_for_device(umem, xdptxd.dma_addr, xdptxd.len);
 
 		if (unlikely(!sq->xmit_xdp_frame(sq, &xdptxd, &xdpi, check_result))) {
 			if (sq->mpwqe.wqe)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c
index 5e49fdb564b3..7b17fcd0a56d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xsk/umem.c
@@ -10,40 +10,14 @@ static int mlx5e_xsk_map_umem(struct mlx5e_priv *priv,
 			      struct xdp_umem *umem)
 {
 	struct device *dev = priv->mdev->device;
-	u32 i;
 
-	for (i = 0; i < umem->npgs; i++) {
-		dma_addr_t dma = dma_map_page(dev, umem->pgs[i], 0, PAGE_SIZE,
-					      DMA_BIDIRECTIONAL);
-
-		if (unlikely(dma_mapping_error(dev, dma)))
-			goto err_unmap;
-		umem->pages[i].dma = dma;
-	}
-
-	return 0;
-
-err_unmap:
-	while (i--) {
-		dma_unmap_page(dev, umem->pages[i].dma, PAGE_SIZE,
-			       DMA_BIDIRECTIONAL);
-		umem->pages[i].dma = 0;
-	}
-
-	return -ENOMEM;
+	return xsk_buff_dma_map(umem, dev, 0);
 }
 
 static void mlx5e_xsk_unmap_umem(struct mlx5e_priv *priv,
 				 struct xdp_umem *umem)
 {
-	struct device *dev = priv->mdev->device;
-	u32 i;
-
-	for (i = 0; i < umem->npgs; i++) {
-		dma_unmap_page(dev, umem->pages[i].dma, PAGE_SIZE,
-			       DMA_BIDIRECTIONAL);
-		umem->pages[i].dma = 0;
-	}
+	return xsk_buff_dma_unmap(umem, 0);
 }
 
 static int mlx5e_xsk_get_umems(struct mlx5e_xsk *xsk)
@@ -90,13 +64,14 @@ static void mlx5e_xsk_remove_umem(struct mlx5e_xsk *xsk, u16 ix)
 
 static bool mlx5e_xsk_is_umem_sane(struct xdp_umem *umem)
 {
-	return umem->headroom <= 0xffff && umem->chunk_size_nohr <= 0xffff;
+	return xsk_umem_get_headroom(umem) <= 0xffff &&
+		xsk_umem_get_chunk_size(umem) <= 0xffff;
 }
 
 void mlx5e_build_xsk_param(struct xdp_umem *umem, struct mlx5e_xsk_param *xsk)
 {
-	xsk->headroom = umem->headroom;
-	xsk->chunk_size = umem->chunk_size_nohr + umem->headroom;
+	xsk->headroom = xsk_umem_get_headroom(umem);
+	xsk->chunk_size = xsk_umem_get_chunk_size(umem);
 }
 
 static int mlx5e_xsk_enable_locked(struct mlx5e_priv *priv,
@@ -241,18 +216,6 @@ int mlx5e_xsk_setup_umem(struct net_device *dev, struct xdp_umem *umem, u16 qid)
 		      mlx5e_xsk_disable_umem(priv, ix);
 }
 
-int mlx5e_xsk_resize_reuseq(struct xdp_umem *umem, u32 nentries)
-{
-	struct xdp_umem_fq_reuse *reuseq;
-
-	reuseq = xsk_reuseq_prepare(nentries);
-	if (unlikely(!reuseq))
-		return -ENOMEM;
-	xsk_reuseq_free(xsk_reuseq_swap(umem, reuseq));
-
-	return 0;
-}
-
 u16 mlx5e_xsk_first_unused_channel(struct mlx5e_params *params, struct mlx5e_xsk *xsk)
 {
 	u16 res = xsk->refcnt ? params->num_channels : 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 0e4ca08ddca9..4041132723a3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -38,7 +38,7 @@
 #include <linux/bpf.h>
 #include <linux/if_bridge.h>
 #include <net/page_pool.h>
-#include <net/xdp_sock.h>
+#include <net/xdp_sock_drv.h>
 #include "eswitch.h"
 #include "en.h"
 #include "en/txrx.h"
@@ -374,7 +374,6 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 	struct mlx5_core_dev *mdev = c->mdev;
 	void *rqc = rqp->rqc;
 	void *rqc_wq = MLX5_ADDR_OF(rqc, rqc, wq);
-	u32 num_xsk_frames = 0;
 	u32 rq_xdp_ix;
 	u32 pool_size;
 	int wq_sz;
@@ -414,7 +413,6 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 
 	rq->buff.map_dir = rq->xdp_prog ? DMA_BIDIRECTIONAL : DMA_FROM_DEVICE;
 	rq->buff.headroom = mlx5e_get_rq_headroom(mdev, params, xsk);
-	rq->buff.umem_headroom = xsk ? xsk->headroom : 0;
 	pool_size = 1 << params->log_rq_mtu_frames;
 
 	switch (rq->wq_type) {
@@ -428,10 +426,6 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 
 		wq_sz = mlx5_wq_ll_get_size(&rq->mpwqe.wq);
 
-		if (xsk)
-			num_xsk_frames = wq_sz <<
-				mlx5e_mpwqe_get_log_num_strides(mdev, params, xsk);
-
 		pool_size = MLX5_MPWRQ_PAGES_PER_WQE <<
 			mlx5e_mpwqe_get_log_rq_size(params, xsk);
 
@@ -483,9 +477,6 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 
 		wq_sz = mlx5_wq_cyc_get_size(&rq->wqe.wq);
 
-		if (xsk)
-			num_xsk_frames = wq_sz << rq->wqe.info.log_num_frags;
-
 		rq->wqe.info = rqp->frags_info;
 		rq->buff.frame0_sz = rq->wqe.info.arr[0].frag_stride;
 
@@ -526,19 +517,9 @@ static int mlx5e_alloc_rq(struct mlx5e_channel *c,
 	}
 
 	if (xsk) {
-		rq->buff.frame0_sz = xsk_umem_xdp_frame_sz(umem);
-
-		err = mlx5e_xsk_resize_reuseq(umem, num_xsk_frames);
-		if (unlikely(err)) {
-			mlx5_core_err(mdev, "Unable to allocate the Reuse Ring for %u frames\n",
-				      num_xsk_frames);
-			goto err_free;
-		}
-
-		rq->zca.free = mlx5e_xsk_zca_free;
 		err = xdp_rxq_info_reg_mem_model(&rq->xdp_rxq,
-						 MEM_TYPE_ZERO_COPY,
-						 &rq->zca);
+						 MEM_TYPE_XSK_BUFF_POOL, NULL);
+		xsk_buff_set_rxq_info(rq->umem, &rq->xdp_rxq);
 	} else {
 		/* Create a page_pool and register it with rxq */
 		pp_params.order     = 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 821f94beda7a..d7b24e8905f1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -300,7 +300,7 @@ static inline void mlx5e_page_release(struct mlx5e_rq *rq,
 		 * put into the Reuse Ring, because there is no way to return
 		 * the page to the userspace when the interface goes down.
 		 */
-		mlx5e_xsk_page_release(rq, dma_info);
+		xsk_buff_free(dma_info->xsk);
 	else
 		mlx5e_page_release_dynamic(rq, dma_info, recycle);
 }
@@ -385,7 +385,11 @@ static int mlx5e_alloc_rx_wqes(struct mlx5e_rq *rq, u16 ix, u8 wqe_bulk)
 	if (rq->umem) {
 		int pages_desired = wqe_bulk << rq->wqe.info.log_num_frags;
 
-		if (unlikely(!mlx5e_xsk_pages_enough_umem(rq, pages_desired)))
+		/* Check in advance that we have enough frames, instead of
+		 * allocating one-by-one, failing and moving frames to the
+		 * Reuse Ring.
+		 */
+		if (unlikely(!xsk_buff_can_alloc(rq->umem, pages_desired)))
 			return -ENOMEM;
 	}
 
@@ -480,8 +484,11 @@ static int mlx5e_alloc_rx_mpwqe(struct mlx5e_rq *rq, u16 ix)
 	int err;
 	int i;
 
+	/* Check in advance that we have enough frames, instead of allocating
+	 * one-by-one, failing and moving frames to the Reuse Ring.
+	 */
 	if (rq->umem &&
-	    unlikely(!mlx5e_xsk_pages_enough_umem(rq, MLX5_MPWRQ_PAGES_PER_WQE))) {
+	    unlikely(!xsk_buff_can_alloc(rq->umem, MLX5_MPWRQ_PAGES_PER_WQE))) {
 		err = -ENOMEM;
 		goto err;
 	}
@@ -1044,12 +1051,24 @@ struct sk_buff *mlx5e_build_linear_skb(struct mlx5e_rq *rq, void *va,
 	return skb;
 }
 
+static void mlx5e_fill_xdp_buff(struct mlx5e_rq *rq, void *va, u16 headroom,
+				u32 len, struct xdp_buff *xdp)
+{
+	xdp->data_hard_start = va;
+	xdp_set_data_meta_invalid(xdp);
+	xdp->data = va + headroom;
+	xdp->data_end = xdp->data + len;
+	xdp->rxq = &rq->xdp_rxq;
+	xdp->frame_sz = rq->buff.frame0_sz;
+}
+
 struct sk_buff *
 mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
 			  struct mlx5e_wqe_frag_info *wi, u32 cqe_bcnt)
 {
 	struct mlx5e_dma_info *di = wi->di;
 	u16 rx_headroom = rq->buff.headroom;
+	struct xdp_buff xdp;
 	struct sk_buff *skb;
 	void *va, *data;
 	bool consumed;
@@ -1065,11 +1084,13 @@ mlx5e_skb_from_cqe_linear(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe,
 	prefetch(data);
 
 	rcu_read_lock();
-	consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt, false);
+	mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt, &xdp);
+	consumed = mlx5e_xdp_handle(rq, di, &cqe_bcnt, &xdp);
 	rcu_read_unlock();
 	if (consumed)
 		return NULL; /* page/packet was consumed by XDP */
 
+	rx_headroom = xdp.data - xdp.data_hard_start;
 	frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt);
 	skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt);
 	if (unlikely(!skb))
@@ -1343,6 +1364,7 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
 	struct mlx5e_dma_info *di = &wi->umr.dma_info[page_idx];
 	u16 rx_headroom = rq->buff.headroom;
 	u32 cqe_bcnt32 = cqe_bcnt;
+	struct xdp_buff xdp;
 	struct sk_buff *skb;
 	void *va, *data;
 	u32 frag_size;
@@ -1364,7 +1386,8 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
 	prefetch(data);
 
 	rcu_read_lock();
-	consumed = mlx5e_xdp_handle(rq, di, va, &rx_headroom, &cqe_bcnt32, false);
+	mlx5e_fill_xdp_buff(rq, va, rx_headroom, cqe_bcnt32, &xdp);
+	consumed = mlx5e_xdp_handle(rq, di, &cqe_bcnt32, &xdp);
 	rcu_read_unlock();
 	if (consumed) {
 		if (__test_and_clear_bit(MLX5E_RQ_FLAG_XDP_XMIT, rq->flags))
@@ -1372,6 +1395,7 @@ mlx5e_skb_from_cqe_mpwrq_linear(struct mlx5e_rq *rq, struct mlx5e_mpw_info *wi,
 		return NULL; /* page/packet was consumed by XDP */
 	}
 
+	rx_headroom = xdp.data - xdp.data_hard_start;
 	frag_size = MLX5_SKB_FRAG_SZ(rx_headroom + cqe_bcnt32);
 	skb = mlx5e_build_linear_skb(rq, va, frag_size, rx_headroom, cqe_bcnt32);
 	if (unlikely(!skb))
-- 
cgit v1.2.3-59-g8ed1b


From 0807892ecb35734b7ce6f7c29b078f1b60151c94 Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn.topel@intel.com>
Date: Wed, 20 May 2020 21:21:00 +0200
Subject: xsk: Remove MEM_TYPE_ZERO_COPY and corresponding code
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

There are no users of MEM_TYPE_ZERO_COPY. Remove all corresponding
code, including the "handle" member of struct xdp_buff.

rfc->v1: Fixed spelling in commit message. (Björn)

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200520192103.355233-13-bjorn.topel@gmail.com
---
 drivers/net/hyperv/netvsc_bpf.c |   1 -
 include/net/xdp.h               |   9 +--
 include/net/xdp_sock.h          |  45 ------------
 include/net/xdp_sock_drv.h      | 149 ----------------------------------------
 include/trace/events/xdp.h      |   1 -
 net/core/xdp.c                  |  42 ++---------
 net/xdp/xdp_umem.c              |  56 +--------------
 net/xdp/xsk.c                   |  48 +------------
 net/xdp/xsk_buff_pool.c         |   7 ++
 net/xdp/xsk_queue.c             |  62 -----------------
 net/xdp/xsk_queue.h             | 105 ----------------------------
 11 files changed, 15 insertions(+), 510 deletions(-)

diff --git a/drivers/net/hyperv/netvsc_bpf.c b/drivers/net/hyperv/netvsc_bpf.c
index 1e0c024b0a93..8e4141552423 100644
--- a/drivers/net/hyperv/netvsc_bpf.c
+++ b/drivers/net/hyperv/netvsc_bpf.c
@@ -50,7 +50,6 @@ u32 netvsc_run_xdp(struct net_device *ndev, struct netvsc_channel *nvchan,
 	xdp->data_end = xdp->data + len;
 	xdp->rxq = &nvchan->xdp_rxq;
 	xdp->frame_sz = PAGE_SIZE;
-	xdp->handle = 0;
 
 	memcpy(xdp->data, data, len);
 
diff --git a/include/net/xdp.h b/include/net/xdp.h
index f432134c7c00..90f11760bd12 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -39,7 +39,6 @@ enum xdp_mem_type {
 	MEM_TYPE_PAGE_SHARED = 0, /* Split-page refcnt based model */
 	MEM_TYPE_PAGE_ORDER0,     /* Orig XDP full page model */
 	MEM_TYPE_PAGE_POOL,
-	MEM_TYPE_ZERO_COPY,
 	MEM_TYPE_XSK_BUFF_POOL,
 	MEM_TYPE_MAX,
 };
@@ -55,10 +54,6 @@ struct xdp_mem_info {
 
 struct page_pool;
 
-struct zero_copy_allocator {
-	void (*free)(struct zero_copy_allocator *zca, unsigned long handle);
-};
-
 struct xdp_rxq_info {
 	struct net_device *dev;
 	u32 queue_index;
@@ -71,7 +66,6 @@ struct xdp_buff {
 	void *data_end;
 	void *data_meta;
 	void *data_hard_start;
-	unsigned long handle;
 	struct xdp_rxq_info *rxq;
 	u32 frame_sz; /* frame size to deduce data_hard_end/reserved tailroom*/
 };
@@ -120,8 +114,7 @@ struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
 	int metasize;
 	int headroom;
 
-	if (xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY ||
-	    xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL)
+	if (xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL)
 		return xdp_convert_zc_to_xdp_frame(xdp);
 
 	/* Assure headroom is available for storing info */
diff --git a/include/net/xdp_sock.h b/include/net/xdp_sock.h
index 6e7265f63c04..96bfc5f5f24e 100644
--- a/include/net/xdp_sock.h
+++ b/include/net/xdp_sock.h
@@ -17,26 +17,12 @@ struct net_device;
 struct xsk_queue;
 struct xdp_buff;
 
-struct xdp_umem_page {
-	void *addr;
-	dma_addr_t dma;
-};
-
-struct xdp_umem_fq_reuse {
-	u32 nentries;
-	u32 length;
-	u64 handles[];
-};
-
 struct xdp_umem {
 	struct xsk_queue *fq;
 	struct xsk_queue *cq;
 	struct xsk_buff_pool *pool;
-	struct xdp_umem_page *pages;
-	u64 chunk_mask;
 	u64 size;
 	u32 headroom;
-	u32 chunk_size_nohr;
 	u32 chunk_size;
 	struct user_struct *user;
 	refcount_t users;
@@ -48,7 +34,6 @@ struct xdp_umem {
 	u8 flags;
 	int id;
 	struct net_device *dev;
-	struct xdp_umem_fq_reuse *fq_reuse;
 	bool zc;
 	spinlock_t xsk_tx_list_lock;
 	struct list_head xsk_tx_list;
@@ -109,21 +94,6 @@ static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
 	return xs;
 }
 
-static inline u64 xsk_umem_extract_addr(u64 addr)
-{
-	return addr & XSK_UNALIGNED_BUF_ADDR_MASK;
-}
-
-static inline u64 xsk_umem_extract_offset(u64 addr)
-{
-	return addr >> XSK_UNALIGNED_BUF_OFFSET_SHIFT;
-}
-
-static inline u64 xsk_umem_add_offset_to_addr(u64 addr)
-{
-	return xsk_umem_extract_addr(addr) + xsk_umem_extract_offset(addr);
-}
-
 #else
 
 static inline int xsk_generic_rcv(struct xdp_sock *xs, struct xdp_buff *xdp)
@@ -146,21 +116,6 @@ static inline struct xdp_sock *__xsk_map_lookup_elem(struct bpf_map *map,
 	return NULL;
 }
 
-static inline u64 xsk_umem_extract_addr(u64 addr)
-{
-	return 0;
-}
-
-static inline u64 xsk_umem_extract_offset(u64 addr)
-{
-	return 0;
-}
-
-static inline u64 xsk_umem_add_offset_to_addr(u64 addr)
-{
-	return 0;
-}
-
 #endif /* CONFIG_XDP_SOCKETS */
 
 #endif /* _LINUX_XDP_SOCK_H */
diff --git a/include/net/xdp_sock_drv.h b/include/net/xdp_sock_drv.h
index 7752c8663d1b..ccf848f7efa4 100644
--- a/include/net/xdp_sock_drv.h
+++ b/include/net/xdp_sock_drv.h
@@ -11,16 +11,9 @@
 
 #ifdef CONFIG_XDP_SOCKETS
 
-bool xsk_umem_has_addrs(struct xdp_umem *umem, u32 cnt);
-bool xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr);
-void xsk_umem_release_addr(struct xdp_umem *umem);
 void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries);
 bool xsk_umem_consume_tx(struct xdp_umem *umem, struct xdp_desc *desc);
 void xsk_umem_consume_tx_done(struct xdp_umem *umem);
-struct xdp_umem_fq_reuse *xsk_reuseq_prepare(u32 nentries);
-struct xdp_umem_fq_reuse *xsk_reuseq_swap(struct xdp_umem *umem,
-					  struct xdp_umem_fq_reuse *newq);
-void xsk_reuseq_free(struct xdp_umem_fq_reuse *rq);
 struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev, u16 queue_id);
 void xsk_set_rx_need_wakeup(struct xdp_umem *umem);
 void xsk_set_tx_need_wakeup(struct xdp_umem *umem);
@@ -28,80 +21,6 @@ void xsk_clear_rx_need_wakeup(struct xdp_umem *umem);
 void xsk_clear_tx_need_wakeup(struct xdp_umem *umem);
 bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem);
 
-static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
-{
-	unsigned long page_addr;
-
-	addr = xsk_umem_add_offset_to_addr(addr);
-	page_addr = (unsigned long)umem->pages[addr >> PAGE_SHIFT].addr;
-
-	return (char *)(page_addr & PAGE_MASK) + (addr & ~PAGE_MASK);
-}
-
-static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr)
-{
-	addr = xsk_umem_add_offset_to_addr(addr);
-
-	return umem->pages[addr >> PAGE_SHIFT].dma + (addr & ~PAGE_MASK);
-}
-
-/* Reuse-queue aware version of FILL queue helpers */
-static inline bool xsk_umem_has_addrs_rq(struct xdp_umem *umem, u32 cnt)
-{
-	struct xdp_umem_fq_reuse *rq = umem->fq_reuse;
-
-	if (rq->length >= cnt)
-		return true;
-
-	return xsk_umem_has_addrs(umem, cnt - rq->length);
-}
-
-static inline bool xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr)
-{
-	struct xdp_umem_fq_reuse *rq = umem->fq_reuse;
-
-	if (!rq->length)
-		return xsk_umem_peek_addr(umem, addr);
-
-	*addr = rq->handles[rq->length - 1];
-	return addr;
-}
-
-static inline void xsk_umem_release_addr_rq(struct xdp_umem *umem)
-{
-	struct xdp_umem_fq_reuse *rq = umem->fq_reuse;
-
-	if (!rq->length)
-		xsk_umem_release_addr(umem);
-	else
-		rq->length--;
-}
-
-static inline void xsk_umem_fq_reuse(struct xdp_umem *umem, u64 addr)
-{
-	struct xdp_umem_fq_reuse *rq = umem->fq_reuse;
-
-	rq->handles[rq->length++] = addr;
-}
-
-/* Handle the offset appropriately depending on aligned or unaligned mode.
- * For unaligned mode, we store the offset in the upper 16-bits of the address.
- * For aligned mode, we simply add the offset to the address.
- */
-static inline u64 xsk_umem_adjust_offset(struct xdp_umem *umem, u64 address,
-					 u64 offset)
-{
-	if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG)
-		return address + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT);
-	else
-		return address + offset;
-}
-
-static inline u32 xsk_umem_xdp_frame_sz(struct xdp_umem *umem)
-{
-	return umem->chunk_size_nohr;
-}
-
 static inline u32 xsk_umem_get_headroom(struct xdp_umem *umem)
 {
 	return XDP_PACKET_HEADROOM + umem->headroom;
@@ -192,20 +111,6 @@ static inline void xsk_buff_raw_dma_sync_for_device(struct xdp_umem *umem,
 
 #else
 
-static inline bool xsk_umem_has_addrs(struct xdp_umem *umem, u32 cnt)
-{
-	return false;
-}
-
-static inline u64 *xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr)
-{
-	return NULL;
-}
-
-static inline void xsk_umem_release_addr(struct xdp_umem *umem)
-{
-}
-
 static inline void xsk_umem_complete_tx(struct xdp_umem *umem, u32 nb_entries)
 {
 }
@@ -220,55 +125,12 @@ static inline void xsk_umem_consume_tx_done(struct xdp_umem *umem)
 {
 }
 
-static inline struct xdp_umem_fq_reuse *xsk_reuseq_prepare(u32 nentries)
-{
-	return NULL;
-}
-
-static inline struct xdp_umem_fq_reuse *xsk_reuseq_swap(
-	struct xdp_umem *umem, struct xdp_umem_fq_reuse *newq)
-{
-	return NULL;
-}
-
-static inline void xsk_reuseq_free(struct xdp_umem_fq_reuse *rq)
-{
-}
-
 static inline struct xdp_umem *xdp_get_umem_from_qid(struct net_device *dev,
 						     u16 queue_id)
 {
 	return NULL;
 }
 
-static inline char *xdp_umem_get_data(struct xdp_umem *umem, u64 addr)
-{
-	return NULL;
-}
-
-static inline dma_addr_t xdp_umem_get_dma(struct xdp_umem *umem, u64 addr)
-{
-	return 0;
-}
-
-static inline bool xsk_umem_has_addrs_rq(struct xdp_umem *umem, u32 cnt)
-{
-	return false;
-}
-
-static inline u64 *xsk_umem_peek_addr_rq(struct xdp_umem *umem, u64 *addr)
-{
-	return NULL;
-}
-
-static inline void xsk_umem_release_addr_rq(struct xdp_umem *umem)
-{
-}
-
-static inline void xsk_umem_fq_reuse(struct xdp_umem *umem, u64 addr)
-{
-}
-
 static inline void xsk_set_rx_need_wakeup(struct xdp_umem *umem)
 {
 }
@@ -290,17 +152,6 @@ static inline bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem)
 	return false;
 }
 
-static inline u64 xsk_umem_adjust_offset(struct xdp_umem *umem, u64 handle,
-					 u64 offset)
-{
-	return 0;
-}
-
-static inline u32 xsk_umem_xdp_frame_sz(struct xdp_umem *umem)
-{
-	return 0;
-}
-
 static inline u32 xsk_umem_get_headroom(struct xdp_umem *umem)
 {
 	return 0;
diff --git a/include/trace/events/xdp.h b/include/trace/events/xdp.h
index 48547a12fa27..b73d3e141323 100644
--- a/include/trace/events/xdp.h
+++ b/include/trace/events/xdp.h
@@ -287,7 +287,6 @@ TRACE_EVENT(xdp_devmap_xmit,
 	FN(PAGE_SHARED)		\
 	FN(PAGE_ORDER0)		\
 	FN(PAGE_POOL)		\
-	FN(ZERO_COPY)		\
 	FN(XSK_BUFF_POOL)
 
 #define __MEM_TYPE_TP_FN(x)	\
diff --git a/net/core/xdp.c b/net/core/xdp.c
index f0ce8b195193..a8c2f243367d 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -110,27 +110,6 @@ static void mem_allocator_disconnect(void *allocator)
 	mutex_unlock(&mem_id_lock);
 }
 
-static void mem_id_disconnect(int id)
-{
-	struct xdp_mem_allocator *xa;
-
-	mutex_lock(&mem_id_lock);
-
-	xa = rhashtable_lookup_fast(mem_id_ht, &id, mem_id_rht_params);
-	if (!xa) {
-		mutex_unlock(&mem_id_lock);
-		WARN(1, "Request remove non-existing id(%d), driver bug?", id);
-		return;
-	}
-
-	trace_mem_disconnect(xa);
-
-	if (!rhashtable_remove_fast(mem_id_ht, &xa->node, mem_id_rht_params))
-		call_rcu(&xa->rcu, __xdp_mem_allocator_rcu_free);
-
-	mutex_unlock(&mem_id_lock);
-}
-
 void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
 {
 	struct xdp_mem_allocator *xa;
@@ -144,9 +123,6 @@ void xdp_rxq_info_unreg_mem_model(struct xdp_rxq_info *xdp_rxq)
 	if (id == 0)
 		return;
 
-	if (xdp_rxq->mem.type == MEM_TYPE_ZERO_COPY)
-		return mem_id_disconnect(id);
-
 	if (xdp_rxq->mem.type == MEM_TYPE_PAGE_POOL) {
 		rcu_read_lock();
 		xa = rhashtable_lookup(mem_id_ht, &id, mem_id_rht_params);
@@ -302,7 +278,7 @@ int xdp_rxq_info_reg_mem_model(struct xdp_rxq_info *xdp_rxq,
 	xdp_rxq->mem.type = type;
 
 	if (!allocator) {
-		if (type == MEM_TYPE_PAGE_POOL || type == MEM_TYPE_ZERO_COPY)
+		if (type == MEM_TYPE_PAGE_POOL)
 			return -EINVAL; /* Setup time check page_pool req */
 		return 0;
 	}
@@ -362,7 +338,7 @@ EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
  * of xdp_frames/pages in those cases.
  */
 static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
-			 unsigned long handle, struct xdp_buff *xdp)
+			 struct xdp_buff *xdp)
 {
 	struct xdp_mem_allocator *xa;
 	struct page *page;
@@ -384,14 +360,6 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
 		page = virt_to_page(data); /* Assumes order0 page*/
 		put_page(page);
 		break;
-	case MEM_TYPE_ZERO_COPY:
-		/* NB! Only valid from an xdp_buff! */
-		rcu_read_lock();
-		/* mem->id is valid, checked in xdp_rxq_info_reg_mem_model() */
-		xa = rhashtable_lookup(mem_id_ht, &mem->id, mem_id_rht_params);
-		xa->zc_alloc->free(xa->zc_alloc, handle);
-		rcu_read_unlock();
-		break;
 	case MEM_TYPE_XSK_BUFF_POOL:
 		/* NB! Only valid from an xdp_buff! */
 		xsk_buff_free(xdp);
@@ -404,19 +372,19 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
 
 void xdp_return_frame(struct xdp_frame *xdpf)
 {
-	__xdp_return(xdpf->data, &xdpf->mem, false, 0, NULL);
+	__xdp_return(xdpf->data, &xdpf->mem, false, NULL);
 }
 EXPORT_SYMBOL_GPL(xdp_return_frame);
 
 void xdp_return_frame_rx_napi(struct xdp_frame *xdpf)
 {
-	__xdp_return(xdpf->data, &xdpf->mem, true, 0, NULL);
+	__xdp_return(xdpf->data, &xdpf->mem, true, NULL);
 }
 EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi);
 
 void xdp_return_buff(struct xdp_buff *xdp)
 {
-	__xdp_return(xdp->data, &xdp->rxq->mem, true, xdp->handle, xdp);
+	__xdp_return(xdp->data, &xdp->rxq->mem, true, xdp);
 }
 EXPORT_SYMBOL_GPL(xdp_return_buff);
 
diff --git a/net/xdp/xdp_umem.c b/net/xdp/xdp_umem.c
index 7f04688045d5..19e59d1a5e9f 100644
--- a/net/xdp/xdp_umem.c
+++ b/net/xdp/xdp_umem.c
@@ -179,37 +179,6 @@ void xdp_umem_clear_dev(struct xdp_umem *umem)
 	umem->zc = false;
 }
 
-static void xdp_umem_unmap_pages(struct xdp_umem *umem)
-{
-	unsigned int i;
-
-	for (i = 0; i < umem->npgs; i++)
-		if (PageHighMem(umem->pgs[i]))
-			vunmap(umem->pages[i].addr);
-}
-
-static int xdp_umem_map_pages(struct xdp_umem *umem)
-{
-	unsigned int i;
-	void *addr;
-
-	for (i = 0; i < umem->npgs; i++) {
-		if (PageHighMem(umem->pgs[i]))
-			addr = vmap(&umem->pgs[i], 1, VM_MAP, PAGE_KERNEL);
-		else
-			addr = page_address(umem->pgs[i]);
-
-		if (!addr) {
-			xdp_umem_unmap_pages(umem);
-			return -ENOMEM;
-		}
-
-		umem->pages[i].addr = addr;
-	}
-
-	return 0;
-}
-
 static void xdp_umem_unpin_pages(struct xdp_umem *umem)
 {
 	unpin_user_pages_dirty_lock(umem->pgs, umem->npgs, true);
@@ -244,14 +213,9 @@ static void xdp_umem_release(struct xdp_umem *umem)
 		umem->cq = NULL;
 	}
 
-	xsk_reuseq_destroy(umem);
 	xp_destroy(umem->pool);
-	xdp_umem_unmap_pages(umem);
 	xdp_umem_unpin_pages(umem);
 
-	kvfree(umem->pages);
-	umem->pages = NULL;
-
 	xdp_umem_unaccount_pages(umem);
 	kfree(umem);
 }
@@ -385,11 +349,8 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
 	if (headroom >= chunk_size - XDP_PACKET_HEADROOM)
 		return -EINVAL;
 
-	umem->chunk_mask = unaligned_chunks ? XSK_UNALIGNED_BUF_ADDR_MASK
-					    : ~((u64)chunk_size - 1);
 	umem->size = size;
 	umem->headroom = headroom;
-	umem->chunk_size_nohr = chunk_size - headroom;
 	umem->chunk_size = chunk_size;
 	umem->npgs = size / PAGE_SIZE;
 	umem->pgs = NULL;
@@ -408,29 +369,14 @@ static int xdp_umem_reg(struct xdp_umem *umem, struct xdp_umem_reg *mr)
 	if (err)
 		goto out_account;
 
-	umem->pages = kvcalloc(umem->npgs, sizeof(*umem->pages),
-			       GFP_KERNEL_ACCOUNT);
-	if (!umem->pages) {
-		err = -ENOMEM;
-		goto out_pin;
-	}
-
-	err = xdp_umem_map_pages(umem);
-	if (err)
-		goto out_pages;
-
 	umem->pool = xp_create(umem->pgs, umem->npgs, chunks, chunk_size,
 			       headroom, size, unaligned_chunks);
 	if (!umem->pool) {
 		err = -ENOMEM;
-		goto out_unmap;
+		goto out_pin;
 	}
 	return 0;
 
-out_unmap:
-	xdp_umem_unmap_pages(umem);
-out_pages:
-	kvfree(umem->pages);
 out_pin:
 	xdp_umem_unpin_pages(umem);
 out_account:
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 6933f0d494ba..3f2ab732ab8b 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -39,24 +39,6 @@ bool xsk_is_setup_for_bpf_map(struct xdp_sock *xs)
 		READ_ONCE(xs->umem->fq);
 }
 
-bool xsk_umem_has_addrs(struct xdp_umem *umem, u32 cnt)
-{
-	return xskq_cons_has_entries(umem->fq, cnt);
-}
-EXPORT_SYMBOL(xsk_umem_has_addrs);
-
-bool xsk_umem_peek_addr(struct xdp_umem *umem, u64 *addr)
-{
-	return xskq_cons_peek_addr(umem->fq, addr, umem);
-}
-EXPORT_SYMBOL(xsk_umem_peek_addr);
-
-void xsk_umem_release_addr(struct xdp_umem *umem)
-{
-	xskq_cons_release(umem->fq);
-}
-EXPORT_SYMBOL(xsk_umem_release_addr);
-
 void xsk_set_rx_need_wakeup(struct xdp_umem *umem)
 {
 	if (umem->need_wakeup & XDP_WAKEUP_RX)
@@ -203,8 +185,7 @@ static int xsk_rcv(struct xdp_sock *xs, struct xdp_buff *xdp,
 
 	len = xdp->data_end - xdp->data;
 
-	return xdp->rxq->mem.type == MEM_TYPE_ZERO_COPY ||
-		xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL ?
+	return xdp->rxq->mem.type == MEM_TYPE_XSK_BUFF_POOL ?
 		__xsk_rcv_zc(xs, xdp, len) :
 		__xsk_rcv(xs, xdp, len, explicit_free);
 }
@@ -588,24 +569,6 @@ static struct socket *xsk_lookup_xsk_from_fd(int fd)
 	return sock;
 }
 
-/* Check if umem pages are contiguous.
- * If zero-copy mode, use the DMA address to do the page contiguity check
- * For all other modes we use addr (kernel virtual address)
- * Store the result in the low bits of addr.
- */
-static void xsk_check_page_contiguity(struct xdp_umem *umem, u32 flags)
-{
-	struct xdp_umem_page *pgs = umem->pages;
-	int i, is_contig;
-
-	for (i = 0; i < umem->npgs - 1; i++) {
-		is_contig = (flags & XDP_ZEROCOPY) ?
-			(pgs[i].dma + PAGE_SIZE == pgs[i + 1].dma) :
-			(pgs[i].addr + PAGE_SIZE == pgs[i + 1].addr);
-		pgs[i].addr += is_contig << XSK_NEXT_PG_CONTIG_SHIFT;
-	}
-}
-
 static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 {
 	struct sockaddr_xdp *sxdp = (struct sockaddr_xdp *)addr;
@@ -688,23 +651,14 @@ static int xsk_bind(struct socket *sock, struct sockaddr *addr, int addr_len)
 		goto out_unlock;
 	} else {
 		/* This xsk has its own umem. */
-		xskq_set_umem(xs->umem->fq, xs->umem->size,
-			      xs->umem->chunk_mask);
-		xskq_set_umem(xs->umem->cq, xs->umem->size,
-			      xs->umem->chunk_mask);
-
 		err = xdp_umem_assign_dev(xs->umem, dev, qid, flags);
 		if (err)
 			goto out_unlock;
-
-		xsk_check_page_contiguity(xs->umem, flags);
 	}
 
 	xs->dev = dev;
 	xs->zc = xs->umem->zc;
 	xs->queue_id = qid;
-	xskq_set_umem(xs->rx, xs->umem->size, xs->umem->chunk_mask);
-	xskq_set_umem(xs->tx, xs->umem->size, xs->umem->chunk_mask);
 	xdp_add_sk_umem(xs->umem, xs);
 
 out_unlock:
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index e214a5795a62..89dae78865e7 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -8,6 +8,13 @@
 
 #include "xsk_queue.h"
 
+/* Masks for xdp_umem_page flags.
+ * The low 12-bits of the addr will be 0 since this is the page address, so we
+ * can use them for flags.
+ */
+#define XSK_NEXT_PG_CONTIG_SHIFT 0
+#define XSK_NEXT_PG_CONTIG_MASK BIT_ULL(XSK_NEXT_PG_CONTIG_SHIFT)
+
 struct xsk_buff_pool {
 	struct xsk_queue *fq;
 	struct list_head free_list;
diff --git a/net/xdp/xsk_queue.c b/net/xdp/xsk_queue.c
index 554b1ebb4d02..6cf9586e5027 100644
--- a/net/xdp/xsk_queue.c
+++ b/net/xdp/xsk_queue.c
@@ -10,15 +10,6 @@
 
 #include "xsk_queue.h"
 
-void xskq_set_umem(struct xsk_queue *q, u64 umem_size, u64 chunk_mask)
-{
-	if (!q)
-		return;
-
-	q->umem_size = umem_size;
-	q->chunk_mask = chunk_mask;
-}
-
 static size_t xskq_get_ring_size(struct xsk_queue *q, bool umem_queue)
 {
 	struct xdp_umem_ring *umem_ring;
@@ -64,56 +55,3 @@ void xskq_destroy(struct xsk_queue *q)
 	page_frag_free(q->ring);
 	kfree(q);
 }
-
-struct xdp_umem_fq_reuse *xsk_reuseq_prepare(u32 nentries)
-{
-	struct xdp_umem_fq_reuse *newq;
-
-	/* Check for overflow */
-	if (nentries > (u32)roundup_pow_of_two(nentries))
-		return NULL;
-	nentries = roundup_pow_of_two(nentries);
-
-	newq = kvmalloc(struct_size(newq, handles, nentries), GFP_KERNEL);
-	if (!newq)
-		return NULL;
-	memset(newq, 0, offsetof(typeof(*newq), handles));
-
-	newq->nentries = nentries;
-	return newq;
-}
-EXPORT_SYMBOL_GPL(xsk_reuseq_prepare);
-
-struct xdp_umem_fq_reuse *xsk_reuseq_swap(struct xdp_umem *umem,
-					  struct xdp_umem_fq_reuse *newq)
-{
-	struct xdp_umem_fq_reuse *oldq = umem->fq_reuse;
-
-	if (!oldq) {
-		umem->fq_reuse = newq;
-		return NULL;
-	}
-
-	if (newq->nentries < oldq->length)
-		return newq;
-
-	memcpy(newq->handles, oldq->handles,
-	       array_size(oldq->length, sizeof(u64)));
-	newq->length = oldq->length;
-
-	umem->fq_reuse = newq;
-	return oldq;
-}
-EXPORT_SYMBOL_GPL(xsk_reuseq_swap);
-
-void xsk_reuseq_free(struct xdp_umem_fq_reuse *rq)
-{
-	kvfree(rq);
-}
-EXPORT_SYMBOL_GPL(xsk_reuseq_free);
-
-void xsk_reuseq_destroy(struct xdp_umem *umem)
-{
-	xsk_reuseq_free(umem->fq_reuse);
-	umem->fq_reuse = NULL;
-}
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 9151aef7dbca..16bf15864788 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -32,8 +32,6 @@ struct xdp_umem_ring {
 };
 
 struct xsk_queue {
-	u64 chunk_mask;
-	u64 umem_size;
 	u32 ring_mask;
 	u32 nentries;
 	u32 cached_prod;
@@ -106,90 +104,6 @@ struct xsk_queue {
 
 /* Functions that read and validate content from consumer rings. */
 
-static inline bool xskq_cons_crosses_non_contig_pg(struct xdp_umem *umem,
-						   u64 addr,
-						   u64 length)
-{
-	bool cross_pg = (addr & (PAGE_SIZE - 1)) + length > PAGE_SIZE;
-	bool next_pg_contig =
-		(unsigned long)umem->pages[(addr >> PAGE_SHIFT)].addr &
-			XSK_NEXT_PG_CONTIG_MASK;
-
-	return cross_pg && !next_pg_contig;
-}
-
-static inline bool xskq_cons_is_valid_unaligned(struct xsk_queue *q,
-						u64 addr,
-						u64 length,
-						struct xdp_umem *umem)
-{
-	u64 base_addr = xsk_umem_extract_addr(addr);
-
-	addr = xsk_umem_add_offset_to_addr(addr);
-	if (base_addr >= q->umem_size || addr >= q->umem_size ||
-	    xskq_cons_crosses_non_contig_pg(umem, addr, length)) {
-		q->invalid_descs++;
-		return false;
-	}
-
-	return true;
-}
-
-static inline bool xskq_cons_is_valid_addr(struct xsk_queue *q, u64 addr)
-{
-	if (addr >= q->umem_size) {
-		q->invalid_descs++;
-		return false;
-	}
-
-	return true;
-}
-
-static inline bool xskq_cons_read_addr(struct xsk_queue *q, u64 *addr,
-				       struct xdp_umem *umem)
-{
-	struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
-
-	while (q->cached_cons != q->cached_prod) {
-		u32 idx = q->cached_cons & q->ring_mask;
-
-		*addr = ring->desc[idx] & q->chunk_mask;
-
-		if (umem->flags & XDP_UMEM_UNALIGNED_CHUNK_FLAG) {
-			if (xskq_cons_is_valid_unaligned(q, *addr,
-							 umem->chunk_size_nohr,
-							 umem))
-				return true;
-			goto out;
-		}
-
-		if (xskq_cons_is_valid_addr(q, *addr))
-			return true;
-
-out:
-		q->cached_cons++;
-	}
-
-	return false;
-}
-
-static inline bool xskq_cons_read_addr_aligned(struct xsk_queue *q, u64 *addr)
-{
-	struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
-
-	while (q->cached_cons != q->cached_prod) {
-		u32 idx = q->cached_cons & q->ring_mask;
-
-		*addr = ring->desc[idx];
-		if (xskq_cons_is_valid_addr(q, *addr))
-			return true;
-
-		q->cached_cons++;
-	}
-
-	return false;
-}
-
 static inline bool xskq_cons_read_addr_unchecked(struct xsk_queue *q, u64 *addr)
 {
 	struct xdp_umem_ring *ring = (struct xdp_umem_ring *)q->ring;
@@ -267,21 +181,6 @@ static inline bool xskq_cons_has_entries(struct xsk_queue *q, u32 cnt)
 	return entries >= cnt;
 }
 
-static inline bool xskq_cons_peek_addr(struct xsk_queue *q, u64 *addr,
-				       struct xdp_umem *umem)
-{
-	if (q->cached_prod == q->cached_cons)
-		xskq_cons_get_entries(q);
-	return xskq_cons_read_addr(q, addr, umem);
-}
-
-static inline bool xskq_cons_peek_addr_aligned(struct xsk_queue *q, u64 *addr)
-{
-	if (q->cached_prod == q->cached_cons)
-		xskq_cons_get_entries(q);
-	return xskq_cons_read_addr_aligned(q, addr);
-}
-
 static inline bool xskq_cons_peek_addr_unchecked(struct xsk_queue *q, u64 *addr)
 {
 	if (q->cached_prod == q->cached_cons)
@@ -410,11 +309,7 @@ static inline u64 xskq_nb_invalid_descs(struct xsk_queue *q)
 	return q ? q->invalid_descs : 0;
 }
 
-void xskq_set_umem(struct xsk_queue *q, u64 umem_size, u64 chunk_mask);
 struct xsk_queue *xskq_create(u32 nentries, bool umem_queue);
 void xskq_destroy(struct xsk_queue *q_ops);
 
-/* Executed by the core when the entire UMEM gets freed */
-void xsk_reuseq_destroy(struct xdp_umem *umem);
-
 #endif /* _LINUX_XSK_QUEUE_H */
-- 
cgit v1.2.3-59-g8ed1b


From 82c41671ca4f597b6ff05bd5d118161deec26e07 Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn.topel@intel.com>
Date: Wed, 20 May 2020 21:21:01 +0200
Subject: xdp: Simplify xdp_return_{frame, frame_rx_napi, buff}
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The xdp_return_{frame,frame_rx_napi,buff} function are never used,
except in xdp_convert_zc_to_xdp_frame(), by the MEM_TYPE_XSK_BUFF_POOL
memory type.

To simplify and reduce code, change so that
xdp_convert_zc_to_xdp_frame() calls xsk_buff_free() directly since the
type is know, and remove MEM_TYPE_XSK_BUFF_POOL from the switch
statement in __xdp_return() function.

Suggested-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200520192103.355233-14-bjorn.topel@gmail.com
---
 net/core/xdp.c | 21 +++++++++------------
 1 file changed, 9 insertions(+), 12 deletions(-)

diff --git a/net/core/xdp.c b/net/core/xdp.c
index a8c2f243367d..90f44f382115 100644
--- a/net/core/xdp.c
+++ b/net/core/xdp.c
@@ -335,10 +335,11 @@ EXPORT_SYMBOL_GPL(xdp_rxq_info_reg_mem_model);
  * scenarios (e.g. queue full), it is possible to return the xdp_frame
  * while still leveraging this protection.  The @napi_direct boolean
  * is used for those calls sites.  Thus, allowing for faster recycling
- * of xdp_frames/pages in those cases.
+ * of xdp_frames/pages in those cases. This path is never used by the
+ * MEM_TYPE_XSK_BUFF_POOL memory type, so it's explicitly not part of
+ * the switch-statement.
  */
-static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
-			 struct xdp_buff *xdp)
+static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct)
 {
 	struct xdp_mem_allocator *xa;
 	struct page *page;
@@ -360,33 +361,29 @@ static void __xdp_return(void *data, struct xdp_mem_info *mem, bool napi_direct,
 		page = virt_to_page(data); /* Assumes order0 page*/
 		put_page(page);
 		break;
-	case MEM_TYPE_XSK_BUFF_POOL:
-		/* NB! Only valid from an xdp_buff! */
-		xsk_buff_free(xdp);
-		break;
 	default:
 		/* Not possible, checked in xdp_rxq_info_reg_mem_model() */
+		WARN(1, "Incorrect XDP memory type (%d) usage", mem->type);
 		break;
 	}
 }
 
 void xdp_return_frame(struct xdp_frame *xdpf)
 {
-	__xdp_return(xdpf->data, &xdpf->mem, false, NULL);
+	__xdp_return(xdpf->data, &xdpf->mem, false);
 }
 EXPORT_SYMBOL_GPL(xdp_return_frame);
 
 void xdp_return_frame_rx_napi(struct xdp_frame *xdpf)
 {
-	__xdp_return(xdpf->data, &xdpf->mem, true, NULL);
+	__xdp_return(xdpf->data, &xdpf->mem, true);
 }
 EXPORT_SYMBOL_GPL(xdp_return_frame_rx_napi);
 
 void xdp_return_buff(struct xdp_buff *xdp)
 {
-	__xdp_return(xdp->data, &xdp->rxq->mem, true, xdp);
+	__xdp_return(xdp->data, &xdp->rxq->mem, true);
 }
-EXPORT_SYMBOL_GPL(xdp_return_buff);
 
 /* Only called for MEM_TYPE_PAGE_POOL see xdp.h */
 void __xdp_release_frame(void *data, struct xdp_mem_info *mem)
@@ -467,7 +464,7 @@ struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp)
 	xdpf->metasize = metasize;
 	xdpf->mem.type = MEM_TYPE_PAGE_ORDER0;
 
-	xdp_return_buff(xdp);
+	xsk_buff_free(xdp);
 	return xdpf;
 }
 EXPORT_SYMBOL_GPL(xdp_convert_zc_to_xdp_frame);
-- 
cgit v1.2.3-59-g8ed1b


From 26062b185eee49142adc45f9aa187d909d02d961 Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn.topel@intel.com>
Date: Wed, 20 May 2020 21:21:02 +0200
Subject: xsk: Explicitly inline functions and move definitions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In order to reduce the number of function calls, the struct
xsk_buff_pool definition is moved to xsk_buff_pool.h. The functions
xp_get_dma(), xp_dma_sync_for_cpu(), xp_dma_sync_for_device(),
xp_validate_desc() and various helper functions are explicitly
inlined.

Further, move xp_get_handle() and xp_release() to xsk.c, to allow for
the compiler to perform inlining.

rfc->v1: Make sure xp_validate_desc() is inlined for Tx perf. (Maxim)

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200520192103.355233-15-bjorn.topel@gmail.com
---
 include/net/xsk_buff_pool.h |  98 ++++++++++++++++++++++++++---
 net/xdp/xsk.c               |  15 +++++
 net/xdp/xsk_buff_pool.c     | 148 ++------------------------------------------
 net/xdp/xsk_queue.h         |  45 ++++++++++++++
 4 files changed, 156 insertions(+), 150 deletions(-)

diff --git a/include/net/xsk_buff_pool.h b/include/net/xsk_buff_pool.h
index 9f221b36e405..a4ff226505c9 100644
--- a/include/net/xsk_buff_pool.h
+++ b/include/net/xsk_buff_pool.h
@@ -4,6 +4,7 @@
 #ifndef XSK_BUFF_POOL_H_
 #define XSK_BUFF_POOL_H_
 
+#include <linux/if_xdp.h>
 #include <linux/types.h>
 #include <linux/dma-mapping.h>
 #include <net/xdp.h>
@@ -25,6 +26,27 @@ struct xdp_buff_xsk {
 	struct list_head free_list_node;
 };
 
+struct xsk_buff_pool {
+	struct xsk_queue *fq;
+	struct list_head free_list;
+	dma_addr_t *dma_pages;
+	struct xdp_buff_xsk *heads;
+	u64 chunk_mask;
+	u64 addrs_cnt;
+	u32 free_list_cnt;
+	u32 dma_pages_cnt;
+	u32 heads_cnt;
+	u32 free_heads_cnt;
+	u32 headroom;
+	u32 chunk_size;
+	u32 frame_len;
+	bool cheap_dma;
+	bool unaligned;
+	void *addrs;
+	struct device *dev;
+	struct xdp_buff_xsk *free_heads[];
+};
+
 /* AF_XDP core. */
 struct xsk_buff_pool *xp_create(struct page **pages, u32 nr_pages, u32 chunks,
 				u32 chunk_size, u32 headroom, u64 size,
@@ -32,8 +54,6 @@ struct xsk_buff_pool *xp_create(struct page **pages, u32 nr_pages, u32 chunks,
 void xp_set_fq(struct xsk_buff_pool *pool, struct xsk_queue *fq);
 void xp_destroy(struct xsk_buff_pool *pool);
 void xp_release(struct xdp_buff_xsk *xskb);
-u64 xp_get_handle(struct xdp_buff_xsk *xskb);
-bool xp_validate_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc);
 
 /* AF_XDP, and XDP core. */
 void xp_free(struct xdp_buff_xsk *xskb);
@@ -47,10 +67,74 @@ struct xdp_buff *xp_alloc(struct xsk_buff_pool *pool);
 bool xp_can_alloc(struct xsk_buff_pool *pool, u32 count);
 void *xp_raw_get_data(struct xsk_buff_pool *pool, u64 addr);
 dma_addr_t xp_raw_get_dma(struct xsk_buff_pool *pool, u64 addr);
-dma_addr_t xp_get_dma(struct xdp_buff_xsk *xskb);
-dma_addr_t xp_get_frame_dma(struct xdp_buff_xsk *xskb);
-void xp_dma_sync_for_cpu(struct xdp_buff_xsk *xskb);
-void xp_dma_sync_for_device(struct xsk_buff_pool *pool, dma_addr_t dma,
-			    size_t size);
+static inline dma_addr_t xp_get_dma(struct xdp_buff_xsk *xskb)
+{
+	return xskb->dma;
+}
+
+static inline dma_addr_t xp_get_frame_dma(struct xdp_buff_xsk *xskb)
+{
+	return xskb->frame_dma;
+}
+
+void xp_dma_sync_for_cpu_slow(struct xdp_buff_xsk *xskb);
+static inline void xp_dma_sync_for_cpu(struct xdp_buff_xsk *xskb)
+{
+	if (xskb->pool->cheap_dma)
+		return;
+
+	xp_dma_sync_for_cpu_slow(xskb);
+}
+
+void xp_dma_sync_for_device_slow(struct xsk_buff_pool *pool, dma_addr_t dma,
+				 size_t size);
+static inline void xp_dma_sync_for_device(struct xsk_buff_pool *pool,
+					  dma_addr_t dma, size_t size)
+{
+	if (pool->cheap_dma)
+		return;
+
+	xp_dma_sync_for_device_slow(pool, dma, size);
+}
+
+/* Masks for xdp_umem_page flags.
+ * The low 12-bits of the addr will be 0 since this is the page address, so we
+ * can use them for flags.
+ */
+#define XSK_NEXT_PG_CONTIG_SHIFT 0
+#define XSK_NEXT_PG_CONTIG_MASK BIT_ULL(XSK_NEXT_PG_CONTIG_SHIFT)
+
+static inline bool xp_desc_crosses_non_contig_pg(struct xsk_buff_pool *pool,
+						 u64 addr, u32 len)
+{
+	bool cross_pg = (addr & (PAGE_SIZE - 1)) + len > PAGE_SIZE;
+
+	if (pool->dma_pages_cnt && cross_pg) {
+		return !(pool->dma_pages[addr >> PAGE_SHIFT] &
+			 XSK_NEXT_PG_CONTIG_MASK);
+	}
+	return false;
+}
+
+static inline u64 xp_aligned_extract_addr(struct xsk_buff_pool *pool, u64 addr)
+{
+	return addr & pool->chunk_mask;
+}
+
+static inline u64 xp_unaligned_extract_addr(u64 addr)
+{
+	return addr & XSK_UNALIGNED_BUF_ADDR_MASK;
+}
+
+static inline u64 xp_unaligned_extract_offset(u64 addr)
+{
+	return addr >> XSK_UNALIGNED_BUF_OFFSET_SHIFT;
+}
+
+static inline u64 xp_unaligned_add_offset_to_addr(u64 addr)
+{
+	return xp_unaligned_extract_addr(addr) +
+		xp_unaligned_extract_offset(addr);
+}
 
 #endif /* XSK_BUFF_POOL_H_ */
diff --git a/net/xdp/xsk.c b/net/xdp/xsk.c
index 3f2ab732ab8b..b6c0f08bd80d 100644
--- a/net/xdp/xsk.c
+++ b/net/xdp/xsk.c
@@ -99,6 +99,21 @@ bool xsk_umem_uses_need_wakeup(struct xdp_umem *umem)
 }
 EXPORT_SYMBOL(xsk_umem_uses_need_wakeup);
 
+void xp_release(struct xdp_buff_xsk *xskb)
+{
+	xskb->pool->free_heads[xskb->pool->free_heads_cnt++] = xskb;
+}
+
+static u64 xp_get_handle(struct xdp_buff_xsk *xskb)
+{
+	u64 offset = xskb->xdp.data - xskb->xdp.data_hard_start;
+
+	offset += xskb->pool->headroom;
+	if (!xskb->pool->unaligned)
+		return xskb->orig_addr + offset;
+	return xskb->orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT);
+}
+
 static int __xsk_rcv_zc(struct xdp_sock *xs, struct xdp_buff *xdp, u32 len)
 {
 	struct xdp_buff_xsk *xskb = container_of(xdp, struct xdp_buff_xsk, xdp);
diff --git a/net/xdp/xsk_buff_pool.c b/net/xdp/xsk_buff_pool.c
index 89dae78865e7..540ed75e4482 100644
--- a/net/xdp/xsk_buff_pool.c
+++ b/net/xdp/xsk_buff_pool.c
@@ -8,34 +8,6 @@
 
 #include "xsk_queue.h"
 
-/* Masks for xdp_umem_page flags.
- * The low 12-bits of the addr will be 0 since this is the page address, so we
- * can use them for flags.
- */
-#define XSK_NEXT_PG_CONTIG_SHIFT 0
-#define XSK_NEXT_PG_CONTIG_MASK BIT_ULL(XSK_NEXT_PG_CONTIG_SHIFT)
-
-struct xsk_buff_pool {
-	struct xsk_queue *fq;
-	struct list_head free_list;
-	dma_addr_t *dma_pages;
-	struct xdp_buff_xsk *heads;
-	u64 chunk_mask;
-	u64 addrs_cnt;
-	u32 free_list_cnt;
-	u32 dma_pages_cnt;
-	u32 heads_cnt;
-	u32 free_heads_cnt;
-	u32 headroom;
-	u32 chunk_size;
-	u32 frame_len;
-	bool cheap_dma;
-	bool unaligned;
-	void *addrs;
-	struct device *dev;
-	struct xdp_buff_xsk *free_heads[];
-};
-
 static void xp_addr_unmap(struct xsk_buff_pool *pool)
 {
 	vunmap(pool->addrs);
@@ -228,50 +200,12 @@ int xp_dma_map(struct xsk_buff_pool *pool, struct device *dev,
 }
 EXPORT_SYMBOL(xp_dma_map);
 
-static bool xp_desc_crosses_non_contig_pg(struct xsk_buff_pool *pool,
-					  u64 addr, u32 len)
-{
-	bool cross_pg = (addr & (PAGE_SIZE - 1)) + len > PAGE_SIZE;
-
-	if (pool->dma_pages_cnt && cross_pg) {
-		return !(pool->dma_pages[addr >> PAGE_SHIFT] &
-			 XSK_NEXT_PG_CONTIG_MASK);
-	}
-	return false;
-}
-
 static bool xp_addr_crosses_non_contig_pg(struct xsk_buff_pool *pool,
 					  u64 addr)
 {
 	return xp_desc_crosses_non_contig_pg(pool, addr, pool->chunk_size);
 }
 
-void xp_release(struct xdp_buff_xsk *xskb)
-{
-	xskb->pool->free_heads[xskb->pool->free_heads_cnt++] = xskb;
-}
-
-static u64 xp_aligned_extract_addr(struct xsk_buff_pool *pool, u64 addr)
-{
-	return addr & pool->chunk_mask;
-}
-
-static u64 xp_unaligned_extract_addr(u64 addr)
-{
-	return addr & XSK_UNALIGNED_BUF_ADDR_MASK;
-}
-
-static u64 xp_unaligned_extract_offset(u64 addr)
-{
-	return addr >> XSK_UNALIGNED_BUF_OFFSET_SHIFT;
-}
-
-static u64 xp_unaligned_add_offset_to_addr(u64 addr)
-{
-	return xp_unaligned_extract_addr(addr) +
-		xp_unaligned_extract_offset(addr);
-}
-
 static bool xp_check_unaligned(struct xsk_buff_pool *pool, u64 *addr)
 {
 	*addr = xp_unaligned_extract_addr(*addr);
@@ -370,60 +304,6 @@ void xp_free(struct xdp_buff_xsk *xskb)
 }
 EXPORT_SYMBOL(xp_free);
 
-static bool xp_aligned_validate_desc(struct xsk_buff_pool *pool,
-				     struct xdp_desc *desc)
-{
-	u64 chunk, chunk_end;
-
-	chunk = xp_aligned_extract_addr(pool, desc->addr);
-	chunk_end = xp_aligned_extract_addr(pool, desc->addr + desc->len);
-	if (chunk != chunk_end)
-		return false;
-
-	if (chunk >= pool->addrs_cnt)
-		return false;
-
-	if (desc->options)
-		return false;
-	return true;
-}
-
-static bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool,
-				       struct xdp_desc *desc)
-{
-	u64 addr, base_addr;
-
-	base_addr = xp_unaligned_extract_addr(desc->addr);
-	addr = xp_unaligned_add_offset_to_addr(desc->addr);
-
-	if (desc->len > pool->chunk_size)
-		return false;
-
-	if (base_addr >= pool->addrs_cnt || addr >= pool->addrs_cnt ||
-	    xp_desc_crosses_non_contig_pg(pool, addr, desc->len))
-		return false;
-
-	if (desc->options)
-		return false;
-	return true;
-}
-
-bool xp_validate_desc(struct xsk_buff_pool *pool, struct xdp_desc *desc)
-{
-	return pool->unaligned ? xp_unaligned_validate_desc(pool, desc) :
-		xp_aligned_validate_desc(pool, desc);
-}
-
-u64 xp_get_handle(struct xdp_buff_xsk *xskb)
-{
-	u64 offset = xskb->xdp.data - xskb->xdp.data_hard_start;
-
-	offset += xskb->pool->headroom;
-	if (!xskb->pool->unaligned)
-		return xskb->orig_addr + offset;
-	return xskb->orig_addr + (offset << XSK_UNALIGNED_BUF_OFFSET_SHIFT);
-}
-
 void *xp_raw_get_data(struct xsk_buff_pool *pool, u64 addr)
 {
 	addr = pool->unaligned ? xp_unaligned_add_offset_to_addr(addr) : addr;
@@ -440,35 +320,17 @@ dma_addr_t xp_raw_get_dma(struct xsk_buff_pool *pool, u64 addr)
 }
 EXPORT_SYMBOL(xp_raw_get_dma);
 
-dma_addr_t xp_get_dma(struct xdp_buff_xsk *xskb)
-{
-	return xskb->dma;
-}
-EXPORT_SYMBOL(xp_get_dma);
-
-dma_addr_t xp_get_frame_dma(struct xdp_buff_xsk *xskb)
+void xp_dma_sync_for_cpu_slow(struct xdp_buff_xsk *xskb)
 {
-	return xskb->frame_dma;
-}
-EXPORT_SYMBOL(xp_get_frame_dma);
-
-void xp_dma_sync_for_cpu(struct xdp_buff_xsk *xskb)
-{
-	if (xskb->pool->cheap_dma)
-		return;
-
 	dma_sync_single_range_for_cpu(xskb->pool->dev, xskb->dma, 0,
 				      xskb->pool->frame_len, DMA_BIDIRECTIONAL);
 }
-EXPORT_SYMBOL(xp_dma_sync_for_cpu);
+EXPORT_SYMBOL(xp_dma_sync_for_cpu_slow);
 
-void xp_dma_sync_for_device(struct xsk_buff_pool *pool, dma_addr_t dma,
-			    size_t size)
+void xp_dma_sync_for_device_slow(struct xsk_buff_pool *pool, dma_addr_t dma,
+				 size_t size)
 {
-	if (pool->cheap_dma)
-		return;
-
 	dma_sync_single_range_for_device(pool->dev, dma, 0,
 					 size, DMA_BIDIRECTIONAL);
 }
-EXPORT_SYMBOL(xp_dma_sync_for_device);
+EXPORT_SYMBOL(xp_dma_sync_for_device_slow);
diff --git a/net/xdp/xsk_queue.h b/net/xdp/xsk_queue.h
index 16bf15864788..5b5d24d2dd37 100644
--- a/net/xdp/xsk_queue.h
+++ b/net/xdp/xsk_queue.h
@@ -118,6 +118,51 @@ static inline bool xskq_cons_read_addr_unchecked(struct xsk_queue *q, u64 *addr)
 	return false;
 }
 
+static inline bool xp_aligned_validate_desc(struct xsk_buff_pool *pool,
+					    struct xdp_desc *desc)
+{
+	u64 chunk, chunk_end;
+
+	chunk = xp_aligned_extract_addr(pool, desc->addr);
+	chunk_end = xp_aligned_extract_addr(pool, desc->addr + desc->len);
+	if (chunk != chunk_end)
+		return false;
+
+	if (chunk >= pool->addrs_cnt)
+		return false;
+
+	if (desc->options)
+		return false;
+	return true;
+}
+
+static inline bool xp_unaligned_validate_desc(struct xsk_buff_pool *pool,
+					      struct xdp_desc *desc)
+{
+	u64 addr, base_addr;
+
+	base_addr = xp_unaligned_extract_addr(desc->addr);
+	addr = xp_unaligned_add_offset_to_addr(desc->addr);
+
+	if (desc->len > pool->chunk_size)
+		return false;
+
+	if (base_addr >= pool->addrs_cnt || addr >= pool->addrs_cnt ||
+	    xp_desc_crosses_non_contig_pg(pool, addr, desc->len))
+		return false;
+
+	if (desc->options)
+		return false;
+	return true;
+}
+
+static inline bool xp_validate_desc(struct xsk_buff_pool *pool,
+				    struct xdp_desc *desc)
+{
+	return pool->unaligned ? xp_unaligned_validate_desc(pool, desc) :
+		xp_aligned_validate_desc(pool, desc);
+}
+
 static inline bool xskq_cons_is_valid_desc(struct xsk_queue *q,
 					   struct xdp_desc *d,
 					   struct xdp_umem *umem)
-- 
cgit v1.2.3-59-g8ed1b


From 28bee21dc04b39e587af3b68938e68caed02d552 Mon Sep 17 00:00:00 2001
From: Björn Töpel <bjorn.topel@intel.com>
Date: Wed, 20 May 2020 21:21:03 +0200
Subject: MAINTAINERS, xsk: Update AF_XDP section after moves/adds
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Update MAINTAINERS to correctly mirror the current AF_XDP socket file
layout. Also, add the AF_XDP files of libbpf.

rfc->v1: Sorted file entries. (Joe)

Signed-off-by: Björn Töpel <bjorn.topel@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Cc: Joe Perches <joe@perches.com>
Link: https://lore.kernel.org/bpf/20200520192103.355233-16-bjorn.topel@gmail.com
---
 MAINTAINERS | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index b7844f6cfa4a..087e68b21f9f 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -18443,8 +18443,12 @@ R:	Jonathan Lemon <jonathan.lemon@gmail.com>
 L:	netdev@vger.kernel.org
 L:	bpf@vger.kernel.org
 S:	Maintained
-F:	kernel/bpf/xskmap.c
+F:	include/net/xdp_sock*
+F:	include/net/xsk_buffer_pool.h
+F:	include/uapi/linux/if_xdp.h
 F:	net/xdp/
+F:	samples/bpf/xdpsock*
+F:	tools/lib/bpf/xsk*
 
 XEN BLOCK SUBSYSTEM
 M:	Konrad Rzeszutek Wilk <konrad.wilk@oracle.com>
-- 
cgit v1.2.3-59-g8ed1b


From cac616db39c207dc63465a4e05c6ce0e60b2cce4 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Thu, 21 May 2020 13:07:26 -0700
Subject: bpf: Verifier track null pointer branch_taken with JNE and JEQ

Currently, when considering the branches that may be taken for a jump
instruction if the register being compared is a pointer the verifier
assumes both branches may be taken. But, if the jump instruction
is comparing if a pointer is NULL we have this information in the
verifier encoded in the reg->type so we can do better in these cases.
Specifically, these two common cases can be handled.

 * If the instruction is BPF_JEQ and we are comparing against a
   zero value. This test is 'if ptr == 0 goto +X' then using the
   type information in reg->type we can decide if the ptr is not
   null. This allows us to avoid pushing both branches onto the
   stack and instead only use the != 0 case. For example
   PTR_TO_SOCK and PTR_TO_SOCK_OR_NULL encode the null pointer.
   Note if the type is PTR_TO_SOCK_OR_NULL we can not learn anything.
   And also if the value is non-zero we learn nothing because it
   could be any arbitrary value a different pointer for example

 * If the instruction is BPF_JNE and ware comparing against a zero
   value then a similar analysis as above can be done. The test in
   asm looks like 'if ptr != 0 goto +X'. Again using the type
   information if the non null type is set (from above PTR_TO_SOCK)
   we know the jump is taken.

In this patch we extend is_branch_taken() to consider this extra
information and to return only the branch that will be taken. This
resolves a verifier issue reported with C code like the following.
See progs/test_sk_lookup_kern.c in selftests.

 sk = bpf_sk_lookup_tcp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0);
 bpf_printk("sk=%d\n", sk ? 1 : 0);
 if (sk)
   bpf_sk_release(sk);
 return sk ? TC_ACT_OK : TC_ACT_UNSPEC;

In the above the bpf_printk() will resolve the pointer from
PTR_TO_SOCK_OR_NULL to PTR_TO_SOCK. Then the second test guarding
the release will cause the verifier to walk both paths resulting
in the an unreleased sock reference. See verifier/ref_tracking.c
in selftests for an assembly version of the above.

After the above additional logic is added the C code above passes
as expected.

Reported-by: Andrey Ignatov <rdna@fb.com>
Suggested-by: Alexei Starovoitov <ast@kernel.org>
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/159009164651.6313.380418298578070501.stgit@john-Precision-5820-Tower
---
 kernel/bpf/verifier.c | 36 +++++++++++++++++++++++++++++++++---
 1 file changed, 33 insertions(+), 3 deletions(-)

diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 2ed8351f47a4..d2e27dba4ac6 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -393,6 +393,15 @@ static bool type_is_sk_pointer(enum bpf_reg_type type)
 		type == PTR_TO_XDP_SOCK;
 }
 
+static bool reg_type_not_null(enum bpf_reg_type type)
+{
+	return type == PTR_TO_SOCKET ||
+		type == PTR_TO_TCP_SOCK ||
+		type == PTR_TO_MAP_VALUE ||
+		type == PTR_TO_SOCK_COMMON ||
+	        type == PTR_TO_BTF_ID;
+}
+
 static bool reg_type_may_be_null(enum bpf_reg_type type)
 {
 	return type == PTR_TO_MAP_VALUE_OR_NULL ||
@@ -6308,8 +6317,25 @@ static int is_branch64_taken(struct bpf_reg_state *reg, u64 val, u8 opcode)
 static int is_branch_taken(struct bpf_reg_state *reg, u64 val, u8 opcode,
 			   bool is_jmp32)
 {
-	if (__is_pointer_value(false, reg))
-		return -1;
+	if (__is_pointer_value(false, reg)) {
+		if (!reg_type_not_null(reg->type))
+			return -1;
+
+		/* If pointer is valid tests against zero will fail so we can
+		 * use this to direct branch taken.
+		 */
+		if (val != 0)
+			return -1;
+
+		switch (opcode) {
+		case BPF_JEQ:
+			return 0;
+		case BPF_JNE:
+			return 1;
+		default:
+			return -1;
+		}
+	}
 
 	if (is_jmp32)
 		return is_branch32_taken(reg, val, opcode);
@@ -6808,7 +6834,11 @@ static int check_cond_jmp_op(struct bpf_verifier_env *env,
 	}
 
 	if (pred >= 0) {
-		err = mark_chain_precision(env, insn->dst_reg);
+		/* If we get here with a dst_reg pointer type it is because
+		 * above is_branch_taken() special cased the 0 comparison.
+		 */
+		if (!__is_pointer_value(false, dst_reg))
+			err = mark_chain_precision(env, insn->dst_reg);
 		if (BPF_SRC(insn->code) == BPF_X && !err)
 			err = mark_chain_precision(env, insn->src_reg);
 		if (err)
-- 
cgit v1.2.3-59-g8ed1b


From c72b5cbb09bd76634b8d19695db2219964e24128 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Thu, 21 May 2020 13:07:46 -0700
Subject: bpf: Selftests, verifier case for non null pointer check branch taken

When we have pointer type that is known to be non-null and comparing
against zero we only follow the non-null branch. This adds tests to
cover this case for reference tracking. Also add the other case when
comparison against a non-zero value and ensure we still fail with
unreleased reference.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/159009166599.6313.1593680633787453767.stgit@john-Precision-5820-Tower
---
 .../testing/selftests/bpf/verifier/ref_tracking.c  | 33 ++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/tools/testing/selftests/bpf/verifier/ref_tracking.c b/tools/testing/selftests/bpf/verifier/ref_tracking.c
index 604b46151736..056e0273bf12 100644
--- a/tools/testing/selftests/bpf/verifier/ref_tracking.c
+++ b/tools/testing/selftests/bpf/verifier/ref_tracking.c
@@ -821,3 +821,36 @@
 	.result = REJECT,
 	.errstr = "invalid mem access",
 },
+{
+	"reference tracking: branch tracking valid pointer null comparison",
+	.insns = {
+	BPF_SK_LOOKUP(sk_lookup_tcp),
+	BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+	BPF_MOV64_IMM(BPF_REG_3, 1),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 1),
+	BPF_MOV64_IMM(BPF_REG_3, 0),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 2),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_EMIT_CALL(BPF_FUNC_sk_release),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.result = ACCEPT,
+},
+{
+	"reference tracking: branch tracking valid pointer value comparison",
+	.insns = {
+	BPF_SK_LOOKUP(sk_lookup_tcp),
+	BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+	BPF_MOV64_IMM(BPF_REG_3, 1),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 4),
+	BPF_MOV64_IMM(BPF_REG_3, 0),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 1234, 2),
+	BPF_MOV64_REG(BPF_REG_1, BPF_REG_6),
+	BPF_EMIT_CALL(BPF_FUNC_sk_release),
+	BPF_EXIT_INSN(),
+	},
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.errstr = "Unreleased reference",
+	.result = REJECT,
+},
-- 
cgit v1.2.3-59-g8ed1b


From f9b16ec0eeb75337aef38954a4066e6eecd7cfe5 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Thu, 21 May 2020 13:08:06 -0700
Subject: bpf: Selftests, verifier case for non null pointer map value branch

When we have pointer type that is known to be non-null we only follow
the non-null branch. This adds tests to cover the map_value pointer
returned from a map lookup. To force an error if both branches are
followed we do an ALU op on R10.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/159009168650.6313.7434084136067263554.stgit@john-Precision-5820-Tower
---
 tools/testing/selftests/bpf/verifier/value_or_null.c | 19 +++++++++++++++++++
 1 file changed, 19 insertions(+)

diff --git a/tools/testing/selftests/bpf/verifier/value_or_null.c b/tools/testing/selftests/bpf/verifier/value_or_null.c
index 860d4a71cd83..3ecb70a3d939 100644
--- a/tools/testing/selftests/bpf/verifier/value_or_null.c
+++ b/tools/testing/selftests/bpf/verifier/value_or_null.c
@@ -150,3 +150,22 @@
 	.result_unpriv = REJECT,
 	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
+{
+	"map lookup and null branch prediction",
+	.insns = {
+	BPF_MOV64_IMM(BPF_REG_1, 10),
+	BPF_STX_MEM(BPF_DW, BPF_REG_10, BPF_REG_1, -8),
+	BPF_MOV64_REG(BPF_REG_2, BPF_REG_10),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, -8),
+	BPF_LD_MAP_FD(BPF_REG_1, 0),
+	BPF_RAW_INSN(BPF_JMP | BPF_CALL, 0, 0, 0, BPF_FUNC_map_lookup_elem),
+	BPF_MOV64_REG(BPF_REG_6, BPF_REG_0),
+	BPF_JMP_IMM(BPF_JEQ, BPF_REG_6, 0, 2),
+	BPF_JMP_IMM(BPF_JNE, BPF_REG_6, 0, 1),
+	BPF_ALU64_IMM(BPF_ADD, BPF_REG_10, 10),
+	BPF_EXIT_INSN(),
+	},
+	.fixup_map_hash_8b = { 4 },
+	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
+	.result = ACCEPT,
+},
-- 
cgit v1.2.3-59-g8ed1b


From d844a71bff0fd899146e5981ec44b618afd17d83 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Thu, 21 May 2020 13:08:26 -0700
Subject: bpf: Selftests, add printk to test_sk_lookup_kern to encode null ptr
 check

Adding a printk to test_sk_lookup_kern created the reported failure
where a pointer type is checked twice for NULL. Lets add it to the
progs test test_sk_lookup_kern.c so we test the case from C all the
way into the verifier.

We already have printk's in selftests so seems OK to add another one.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/159009170603.6313.1715279795045285176.stgit@john-Precision-5820-Tower
---
 tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
index d2b38fa6a5b0..e83d0b48d80c 100644
--- a/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
+++ b/tools/testing/selftests/bpf/progs/test_sk_lookup_kern.c
@@ -73,6 +73,7 @@ int bpf_sk_lookup_test0(struct __sk_buff *skb)
 
 	tuple_len = ipv4 ? sizeof(tuple->ipv4) : sizeof(tuple->ipv6);
 	sk = bpf_sk_lookup_tcp(skb, tuple, tuple_len, BPF_F_CURRENT_NETNS, 0);
+	bpf_printk("sk=%d\n", sk ? 1 : 0);
 	if (sk)
 		bpf_sk_release(sk);
 	return sk ? TC_ACT_OK : TC_ACT_UNSPEC;
-- 
cgit v1.2.3-59-g8ed1b


From f45a645fa6af37abb5484a8d8be779283a38fb53 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Tue, 5 May 2020 15:55:37 -0700
Subject: ice: report netlist version in .info_get

The flash memory for the ice hardware contains a block of information
used for link management called the Netlist module.

As this essentially represents another section of firmware, add its
version information to the output of the driver's .info_get handler.

This includes both a version and the first few bytes of a hash of the
module contents.

  fw.netlist -> the version information extracted from the netlist module
  fw.netlist.build-> first 4 bytes of the hash of the contents, similar
                     to fw.mgmt.build

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 Documentation/networking/devlink/ice.rst        | 11 ++++
 drivers/net/ethernet/intel/ice/ice_adminq_cmd.h | 27 ++++++++
 drivers/net/ethernet/intel/ice/ice_devlink.c    | 23 +++++++
 drivers/net/ethernet/intel/ice/ice_nvm.c        | 86 +++++++++++++++++++++++++
 drivers/net/ethernet/intel/ice/ice_type.h       | 11 ++++
 5 files changed, 158 insertions(+)

diff --git a/Documentation/networking/devlink/ice.rst b/Documentation/networking/devlink/ice.rst
index 4574352d6ff4..72ea8d295724 100644
--- a/Documentation/networking/devlink/ice.rst
+++ b/Documentation/networking/devlink/ice.rst
@@ -69,6 +69,17 @@ The ``ice`` driver reports the following versions
       - The version of the DDP package that is active in the device. Note
         that both the name (as reported by ``fw.app.name``) and version are
         required to uniquely identify the package.
+    * - ``fw.netlist``
+      - running
+      - 1.1.2000-6.7.0
+      - The version of the netlist module. This module defines the device's
+        Ethernet capabilities and default settings, and is used by the
+        management firmware as part of managing link and device
+        connectivity.
+    * - ``fw.netlist.build``
+      - running
+      - 0xee16ced7
+      - The first 4 bytes of the hash of the netlist module contents.
 
 Regions
 =======
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index 2381b4014ed6..8767a78038e0 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -1264,6 +1264,33 @@ struct ice_aqc_nvm_checksum {
 	u8 rsvd2[12];
 };
 
+/* The result of netlist NVM read comes in a TLV format. The actual data
+ * (netlist header) starts from word offset 1 (byte 2). The FW strips
+ * out the type field from the TLV header so all the netlist fields
+ * should adjust their offset value by 1 word (2 bytes) in order to map
+ * their correct location.
+ */
+#define ICE_AQC_NVM_LINK_TOPO_NETLIST_MOD_ID		0x11B
+#define ICE_AQC_NVM_LINK_TOPO_NETLIST_LEN_OFFSET	1
+#define ICE_AQC_NVM_LINK_TOPO_NETLIST_LEN		2 /* In bytes */
+#define ICE_AQC_NVM_NETLIST_NODE_COUNT_OFFSET		2
+#define ICE_AQC_NVM_NETLIST_NODE_COUNT_LEN		2 /* In bytes */
+#define ICE_AQC_NVM_NETLIST_NODE_COUNT_M		ICE_M(0x3FF, 0)
+#define ICE_AQC_NVM_NETLIST_ID_BLK_START_OFFSET		5
+#define ICE_AQC_NVM_NETLIST_ID_BLK_LEN			0x30 /* In words */
+
+/* netlist ID block field offsets (word offsets) */
+#define ICE_AQC_NVM_NETLIST_ID_BLK_MAJOR_VER_LOW	2
+#define ICE_AQC_NVM_NETLIST_ID_BLK_MAJOR_VER_HIGH	3
+#define ICE_AQC_NVM_NETLIST_ID_BLK_MINOR_VER_LOW	4
+#define ICE_AQC_NVM_NETLIST_ID_BLK_MINOR_VER_HIGH	5
+#define ICE_AQC_NVM_NETLIST_ID_BLK_TYPE_LOW		6
+#define ICE_AQC_NVM_NETLIST_ID_BLK_TYPE_HIGH		7
+#define ICE_AQC_NVM_NETLIST_ID_BLK_REV_LOW		8
+#define ICE_AQC_NVM_NETLIST_ID_BLK_REV_HIGH		9
+#define ICE_AQC_NVM_NETLIST_ID_BLK_SHA_HASH		0xA
+#define ICE_AQC_NVM_NETLIST_ID_BLK_CUST_VER		0x2F
+
 /**
  * Send to PF command (indirect 0x0801) ID is only used by PF
  *
diff --git a/drivers/net/ethernet/intel/ice/ice_devlink.c b/drivers/net/ethernet/intel/ice/ice_devlink.c
index c6833944b90a..a73d06e06b5d 100644
--- a/drivers/net/ethernet/intel/ice/ice_devlink.c
+++ b/drivers/net/ethernet/intel/ice/ice_devlink.c
@@ -105,6 +105,27 @@ static int ice_info_ddp_pkg_version(struct ice_pf *pf, char *buf, size_t len)
 	return 0;
 }
 
+static int ice_info_netlist_ver(struct ice_pf *pf, char *buf, size_t len)
+{
+	struct ice_netlist_ver_info *netlist = &pf->hw.netlist_ver;
+
+	/* The netlist version fields are BCD formatted */
+	snprintf(buf, len, "%x.%x.%x-%x.%x.%x", netlist->major, netlist->minor,
+		 netlist->type >> 16, netlist->type & 0xFFFF, netlist->rev,
+		 netlist->cust_ver);
+
+	return 0;
+}
+
+static int ice_info_netlist_build(struct ice_pf *pf, char *buf, size_t len)
+{
+	struct ice_netlist_ver_info *netlist = &pf->hw.netlist_ver;
+
+	snprintf(buf, len, "0x%08x", netlist->hash);
+
+	return 0;
+}
+
 #define fixed(key, getter) { ICE_VERSION_FIXED, key, getter }
 #define running(key, getter) { ICE_VERSION_RUNNING, key, getter }
 
@@ -128,6 +149,8 @@ static const struct ice_devlink_version {
 	running(DEVLINK_INFO_VERSION_GENERIC_FW_BUNDLE_ID, ice_info_eetrack),
 	running("fw.app.name", ice_info_ddp_pkg_name),
 	running(DEVLINK_INFO_VERSION_GENERIC_FW_APP, ice_info_ddp_pkg_version),
+	running("fw.netlist", ice_info_netlist_ver),
+	running("fw.netlist.build", ice_info_netlist_build),
 };
 
 /**
diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c
index 8beb675d676b..7c2a06892bbb 100644
--- a/drivers/net/ethernet/intel/ice/ice_nvm.c
+++ b/drivers/net/ethernet/intel/ice/ice_nvm.c
@@ -366,6 +366,87 @@ static enum ice_status ice_get_orom_ver_info(struct ice_hw *hw)
 	return 0;
 }
 
+/**
+ * ice_get_netlist_ver_info
+ * @hw: pointer to the HW struct
+ *
+ * Get the netlist version information
+ */
+static enum ice_status ice_get_netlist_ver_info(struct ice_hw *hw)
+{
+	struct ice_netlist_ver_info *ver = &hw->netlist_ver;
+	enum ice_status ret;
+	u32 id_blk_start;
+	__le16 raw_data;
+	u16 data, i;
+	u16 *buff;
+
+	ret = ice_acquire_nvm(hw, ICE_RES_READ);
+	if (ret)
+		return ret;
+	buff = kcalloc(ICE_AQC_NVM_NETLIST_ID_BLK_LEN, sizeof(*buff),
+		       GFP_KERNEL);
+	if (!buff) {
+		ret = ICE_ERR_NO_MEMORY;
+		goto exit_no_mem;
+	}
+
+	/* read module length */
+	ret = ice_aq_read_nvm(hw, ICE_AQC_NVM_LINK_TOPO_NETLIST_MOD_ID,
+			      ICE_AQC_NVM_LINK_TOPO_NETLIST_LEN_OFFSET * 2,
+			      ICE_AQC_NVM_LINK_TOPO_NETLIST_LEN, &raw_data,
+			      false, false, NULL);
+	if (ret)
+		goto exit_error;
+
+	data = le16_to_cpu(raw_data);
+	/* exit if length is = 0 */
+	if (!data)
+		goto exit_error;
+
+	/* read node count */
+	ret = ice_aq_read_nvm(hw, ICE_AQC_NVM_LINK_TOPO_NETLIST_MOD_ID,
+			      ICE_AQC_NVM_NETLIST_NODE_COUNT_OFFSET * 2,
+			      ICE_AQC_NVM_NETLIST_NODE_COUNT_LEN, &raw_data,
+			      false, false, NULL);
+	if (ret)
+		goto exit_error;
+	data = le16_to_cpu(raw_data) & ICE_AQC_NVM_NETLIST_NODE_COUNT_M;
+
+	/* netlist ID block starts from offset 4 + node count * 2 */
+	id_blk_start = ICE_AQC_NVM_NETLIST_ID_BLK_START_OFFSET + data * 2;
+
+	/* read the entire netlist ID block */
+	ret = ice_aq_read_nvm(hw, ICE_AQC_NVM_LINK_TOPO_NETLIST_MOD_ID,
+			      id_blk_start * 2,
+			      ICE_AQC_NVM_NETLIST_ID_BLK_LEN * 2, buff, false,
+			      false, NULL);
+	if (ret)
+		goto exit_error;
+
+	for (i = 0; i < ICE_AQC_NVM_NETLIST_ID_BLK_LEN; i++)
+		buff[i] = le16_to_cpu(((__force __le16 *)buff)[i]);
+
+	ver->major = (buff[ICE_AQC_NVM_NETLIST_ID_BLK_MAJOR_VER_HIGH] << 16) |
+		buff[ICE_AQC_NVM_NETLIST_ID_BLK_MAJOR_VER_LOW];
+	ver->minor = (buff[ICE_AQC_NVM_NETLIST_ID_BLK_MINOR_VER_HIGH] << 16) |
+		buff[ICE_AQC_NVM_NETLIST_ID_BLK_MINOR_VER_LOW];
+	ver->type = (buff[ICE_AQC_NVM_NETLIST_ID_BLK_TYPE_HIGH] << 16) |
+		buff[ICE_AQC_NVM_NETLIST_ID_BLK_TYPE_LOW];
+	ver->rev = (buff[ICE_AQC_NVM_NETLIST_ID_BLK_REV_HIGH] << 16) |
+		buff[ICE_AQC_NVM_NETLIST_ID_BLK_REV_LOW];
+	ver->cust_ver = buff[ICE_AQC_NVM_NETLIST_ID_BLK_CUST_VER];
+	/* Read the left most 4 bytes of SHA */
+	ver->hash = buff[ICE_AQC_NVM_NETLIST_ID_BLK_SHA_HASH + 15] << 16 |
+		buff[ICE_AQC_NVM_NETLIST_ID_BLK_SHA_HASH + 14];
+
+exit_error:
+	kfree(buff);
+exit_no_mem:
+	ice_release_nvm(hw);
+	return ret;
+}
+
 /**
  * ice_discover_flash_size - Discover the available flash size.
  * @hw: pointer to the HW struct
@@ -515,6 +596,11 @@ enum ice_status ice_init_nvm(struct ice_hw *hw)
 		return status;
 	}
 
+	/* read the netlist version information */
+	status = ice_get_netlist_ver_info(hw);
+	if (status)
+		ice_debug(hw, ICE_DBG_INIT, "Failed to read netlist info.\n");
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index 4ce5f92fca4a..35ea5adbb3e5 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -259,6 +259,16 @@ struct ice_nvm_info {
 
 #define ICE_NVM_VER_LEN	32
 
+/* netlist version information */
+struct ice_netlist_ver_info {
+	u32 major;			/* major high/low */
+	u32 minor;			/* minor high/low */
+	u32 type;			/* type high/low */
+	u32 rev;			/* revision high/low */
+	u32 hash;			/* SHA-1 hash word */
+	u16 cust_ver;			/* customer version */
+};
+
 /* Max number of port to queue branches w.r.t topology */
 #define ICE_MAX_TRAFFIC_CLASS 8
 #define ICE_TXSCHED_MAX_BRANCHES ICE_MAX_TRAFFIC_CLASS
@@ -506,6 +516,7 @@ struct ice_hw {
 	struct ice_nvm_info nvm;
 	struct ice_hw_dev_caps dev_caps;	/* device capabilities */
 	struct ice_hw_func_caps func_caps;	/* function capabilities */
+	struct ice_netlist_ver_info netlist_ver; /* netlist version info */
 
 	struct ice_switch_info *switch_info;	/* switch filter lists */
 
-- 
cgit v1.2.3-59-g8ed1b


From a4e82a81f57387803f950cc3d9d112bcc5553a3d Mon Sep 17 00:00:00 2001
From: Tony Nguyen <anthony.l.nguyen@intel.com>
Date: Wed, 6 May 2020 09:32:30 -0700
Subject: ice: Add support for tunnel offloads

Create a boost TCAM entry for each tunnel port in order to get a tunnel
PTYPE. Update netdev feature flags and implement the appropriate logic to
get and set values for hardware offloads.

Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Signed-off-by: Henry Tieman <henry.w.tieman@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice.h               |   4 +
 drivers/net/ethernet/intel/ice/ice_common.c        |   2 +
 drivers/net/ethernet/intel/ice/ice_flex_pipe.c     | 524 ++++++++++++++++++++-
 drivers/net/ethernet/intel/ice/ice_flex_pipe.h     |   5 +
 drivers/net/ethernet/intel/ice/ice_flex_type.h     |  33 ++
 drivers/net/ethernet/intel/ice/ice_flow.c          |  36 +-
 drivers/net/ethernet/intel/ice/ice_flow.h          |   3 +
 drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h     |  25 +
 drivers/net/ethernet/intel/ice/ice_main.c          |  94 +++-
 drivers/net/ethernet/intel/ice/ice_protocol_type.h |   1 +
 drivers/net/ethernet/intel/ice/ice_txrx.c          | 126 ++++-
 drivers/net/ethernet/intel/ice/ice_txrx.h          |   3 +
 drivers/net/ethernet/intel/ice/ice_txrx_lib.c      |  21 +-
 drivers/net/ethernet/intel/ice/ice_type.h          |   4 +
 14 files changed, 867 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 5c11448bfbb3..43349eaa02b2 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -37,6 +37,10 @@
 #include <net/devlink.h>
 #include <net/ipv6.h>
 #include <net/xdp_sock.h>
+#include <net/geneve.h>
+#include <net/gre.h>
+#include <net/udp_tunnel.h>
+#include <net/vxlan.h>
 #include "ice_devids.h"
 #include "ice_type.h"
 #include "ice_txrx.h"
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 2c0d8fd3d5cd..1a613199d6cb 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -746,6 +746,7 @@ enum ice_status ice_init_hw(struct ice_hw *hw)
 	status = ice_init_hw_tbls(hw);
 	if (status)
 		goto err_unroll_fltr_mgmt_struct;
+	mutex_init(&hw->tnl_lock);
 	return 0;
 
 err_unroll_fltr_mgmt_struct:
@@ -775,6 +776,7 @@ void ice_deinit_hw(struct ice_hw *hw)
 	ice_sched_clear_agg(hw);
 	ice_free_seg(hw);
 	ice_free_hw_tbls(hw);
+	mutex_destroy(&hw->tnl_lock);
 
 	if (hw->port_info) {
 		devm_kfree(ice_hw_to_dev(hw), hw->port_info);
diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
index e7a2671222d2..62e305511c7e 100644
--- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
+++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
@@ -5,6 +5,15 @@
 #include "ice_flex_pipe.h"
 #include "ice_flow.h"
 
+/* To support tunneling entries by PF, the package will append the PF number to
+ * the label; for example TNL_VXLAN_PF0, TNL_VXLAN_PF1, TNL_VXLAN_PF2, etc.
+ */
+static const struct ice_tunnel_type_scan tnls[] = {
+	{ TNL_VXLAN,		"TNL_VXLAN_PF" },
+	{ TNL_GENEVE,		"TNL_GENEVE_PF" },
+	{ TNL_LAST,		"" }
+};
+
 static const u32 ice_sect_lkup[ICE_BLK_COUNT][ICE_SECT_COUNT] = {
 	/* SWITCH */
 	{
@@ -239,6 +248,268 @@ ice_pkg_enum_section(struct ice_seg *ice_seg, struct ice_pkg_enum *state,
 	return state->sect;
 }
 
+/**
+ * ice_pkg_enum_entry
+ * @ice_seg: pointer to the ice segment (or NULL on subsequent calls)
+ * @state: pointer to the enum state
+ * @sect_type: section type to enumerate
+ * @offset: pointer to variable that receives the offset in the table (optional)
+ * @handler: function that handles access to the entries into the section type
+ *
+ * This function will enumerate all the entries in particular section type in
+ * the ice segment. The first call is made with the ice_seg parameter non-NULL;
+ * on subsequent calls, ice_seg is set to NULL which continues the enumeration.
+ * When the function returns a NULL pointer, then the end of the entries has
+ * been reached.
+ *
+ * Since each section may have a different header and entry size, the handler
+ * function is needed to determine the number and location entries in each
+ * section.
+ *
+ * The offset parameter is optional, but should be used for sections that
+ * contain an offset for each section table. For such cases, the section handler
+ * function must return the appropriate offset + index to give the absolution
+ * offset for each entry. For example, if the base for a section's header
+ * indicates a base offset of 10, and the index for the entry is 2, then
+ * section handler function should set the offset to 10 + 2 = 12.
+ */
+static void *
+ice_pkg_enum_entry(struct ice_seg *ice_seg, struct ice_pkg_enum *state,
+		   u32 sect_type, u32 *offset,
+		   void *(*handler)(u32 sect_type, void *section,
+				    u32 index, u32 *offset))
+{
+	void *entry;
+
+	if (ice_seg) {
+		if (!handler)
+			return NULL;
+
+		if (!ice_pkg_enum_section(ice_seg, state, sect_type))
+			return NULL;
+
+		state->entry_idx = 0;
+		state->handler = handler;
+	} else {
+		state->entry_idx++;
+	}
+
+	if (!state->handler)
+		return NULL;
+
+	/* get entry */
+	entry = state->handler(state->sect_type, state->sect, state->entry_idx,
+			       offset);
+	if (!entry) {
+		/* end of a section, look for another section of this type */
+		if (!ice_pkg_enum_section(NULL, state, 0))
+			return NULL;
+
+		state->entry_idx = 0;
+		entry = state->handler(state->sect_type, state->sect,
+				       state->entry_idx, offset);
+	}
+
+	return entry;
+}
+
+/**
+ * ice_boost_tcam_handler
+ * @sect_type: section type
+ * @section: pointer to section
+ * @index: index of the boost TCAM entry to be returned
+ * @offset: pointer to receive absolute offset, always 0 for boost TCAM sections
+ *
+ * This is a callback function that can be passed to ice_pkg_enum_entry.
+ * Handles enumeration of individual boost TCAM entries.
+ */
+static void *
+ice_boost_tcam_handler(u32 sect_type, void *section, u32 index, u32 *offset)
+{
+	struct ice_boost_tcam_section *boost;
+
+	if (!section)
+		return NULL;
+
+	if (sect_type != ICE_SID_RXPARSER_BOOST_TCAM)
+		return NULL;
+
+	if (index > ICE_MAX_BST_TCAMS_IN_BUF)
+		return NULL;
+
+	if (offset)
+		*offset = 0;
+
+	boost = section;
+	if (index >= le16_to_cpu(boost->count))
+		return NULL;
+
+	return boost->tcam + index;
+}
+
+/**
+ * ice_find_boost_entry
+ * @ice_seg: pointer to the ice segment (non-NULL)
+ * @addr: Boost TCAM address of entry to search for
+ * @entry: returns pointer to the entry
+ *
+ * Finds a particular Boost TCAM entry and returns a pointer to that entry
+ * if it is found. The ice_seg parameter must not be NULL since the first call
+ * to ice_pkg_enum_entry requires a pointer to an actual ice_segment structure.
+ */
+static enum ice_status
+ice_find_boost_entry(struct ice_seg *ice_seg, u16 addr,
+		     struct ice_boost_tcam_entry **entry)
+{
+	struct ice_boost_tcam_entry *tcam;
+	struct ice_pkg_enum state;
+
+	memset(&state, 0, sizeof(state));
+
+	if (!ice_seg)
+		return ICE_ERR_PARAM;
+
+	do {
+		tcam = ice_pkg_enum_entry(ice_seg, &state,
+					  ICE_SID_RXPARSER_BOOST_TCAM, NULL,
+					  ice_boost_tcam_handler);
+		if (tcam && le16_to_cpu(tcam->addr) == addr) {
+			*entry = tcam;
+			return 0;
+		}
+
+		ice_seg = NULL;
+	} while (tcam);
+
+	*entry = NULL;
+	return ICE_ERR_CFG;
+}
+
+/**
+ * ice_label_enum_handler
+ * @sect_type: section type
+ * @section: pointer to section
+ * @index: index of the label entry to be returned
+ * @offset: pointer to receive absolute offset, always zero for label sections
+ *
+ * This is a callback function that can be passed to ice_pkg_enum_entry.
+ * Handles enumeration of individual label entries.
+ */
+static void *
+ice_label_enum_handler(u32 __always_unused sect_type, void *section, u32 index,
+		       u32 *offset)
+{
+	struct ice_label_section *labels;
+
+	if (!section)
+		return NULL;
+
+	if (index > ICE_MAX_LABELS_IN_BUF)
+		return NULL;
+
+	if (offset)
+		*offset = 0;
+
+	labels = section;
+	if (index >= le16_to_cpu(labels->count))
+		return NULL;
+
+	return labels->label + index;
+}
+
+/**
+ * ice_enum_labels
+ * @ice_seg: pointer to the ice segment (NULL on subsequent calls)
+ * @type: the section type that will contain the label (0 on subsequent calls)
+ * @state: ice_pkg_enum structure that will hold the state of the enumeration
+ * @value: pointer to a value that will return the label's value if found
+ *
+ * Enumerates a list of labels in the package. The caller will call
+ * ice_enum_labels(ice_seg, type, ...) to start the enumeration, then call
+ * ice_enum_labels(NULL, 0, ...) to continue. When the function returns a NULL
+ * the end of the list has been reached.
+ */
+static char *
+ice_enum_labels(struct ice_seg *ice_seg, u32 type, struct ice_pkg_enum *state,
+		u16 *value)
+{
+	struct ice_label *label;
+
+	/* Check for valid label section on first call */
+	if (type && !(type >= ICE_SID_LBL_FIRST && type <= ICE_SID_LBL_LAST))
+		return NULL;
+
+	label = ice_pkg_enum_entry(ice_seg, state, type, NULL,
+				   ice_label_enum_handler);
+	if (!label)
+		return NULL;
+
+	*value = le16_to_cpu(label->value);
+	return label->name;
+}
+
+/**
+ * ice_init_pkg_hints
+ * @hw: pointer to the HW structure
+ * @ice_seg: pointer to the segment of the package scan (non-NULL)
+ *
+ * This function will scan the package and save off relevant information
+ * (hints or metadata) for driver use. The ice_seg parameter must not be NULL
+ * since the first call to ice_enum_labels requires a pointer to an actual
+ * ice_seg structure.
+ */
+static void ice_init_pkg_hints(struct ice_hw *hw, struct ice_seg *ice_seg)
+{
+	struct ice_pkg_enum state;
+	char *label_name;
+	u16 val;
+	int i;
+
+	memset(&hw->tnl, 0, sizeof(hw->tnl));
+	memset(&state, 0, sizeof(state));
+
+	if (!ice_seg)
+		return;
+
+	label_name = ice_enum_labels(ice_seg, ICE_SID_LBL_RXPARSER_TMEM, &state,
+				     &val);
+
+	while (label_name && hw->tnl.count < ICE_TUNNEL_MAX_ENTRIES) {
+		for (i = 0; tnls[i].type != TNL_LAST; i++) {
+			size_t len = strlen(tnls[i].label_prefix);
+
+			/* Look for matching label start, before continuing */
+			if (strncmp(label_name, tnls[i].label_prefix, len))
+				continue;
+
+			/* Make sure this label matches our PF. Note that the PF
+			 * character ('0' - '7') will be located where our
+			 * prefix string's null terminator is located.
+			 */
+			if ((label_name[len] - '0') == hw->pf_id) {
+				hw->tnl.tbl[hw->tnl.count].type = tnls[i].type;
+				hw->tnl.tbl[hw->tnl.count].valid = false;
+				hw->tnl.tbl[hw->tnl.count].in_use = false;
+				hw->tnl.tbl[hw->tnl.count].marked = false;
+				hw->tnl.tbl[hw->tnl.count].boost_addr = val;
+				hw->tnl.tbl[hw->tnl.count].port = 0;
+				hw->tnl.count++;
+				break;
+			}
+		}
+
+		label_name = ice_enum_labels(NULL, 0, &state, &val);
+	}
+
+	/* Cache the appropriate boost TCAM entry pointers */
+	for (i = 0; i < hw->tnl.count; i++) {
+		ice_find_boost_entry(ice_seg, hw->tnl.tbl[i].boost_addr,
+				     &hw->tnl.tbl[i].boost_entry);
+		if (hw->tnl.tbl[i].boost_entry)
+			hw->tnl.tbl[i].valid = true;
+	}
+}
+
 /* Key creation */
 
 #define ICE_DC_KEY	0x1	/* don't care */
@@ -1050,7 +1321,8 @@ enum ice_status ice_init_pkg(struct ice_hw *hw, u8 *buf, u32 len)
 		return ICE_ERR_CFG;
 	}
 
-	/* download package */
+	/* initialize package hints and then download package */
+	ice_init_pkg_hints(hw, seg);
 	status = ice_download_pkg(hw, seg);
 	if (status == ICE_ERR_AQ_NO_WORK) {
 		ice_debug(hw, ICE_DBG_INIT,
@@ -1292,6 +1564,256 @@ static struct ice_buf *ice_pkg_buf(struct ice_buf_build *bld)
 	return &bld->buf;
 }
 
+/**
+ * ice_tunnel_port_in_use_hlpr - helper function to determine tunnel usage
+ * @hw: pointer to the HW structure
+ * @port: port to search for
+ * @index: optionally returns index
+ *
+ * Returns whether a port is already in use as a tunnel, and optionally its
+ * index
+ */
+static bool ice_tunnel_port_in_use_hlpr(struct ice_hw *hw, u16 port, u16 *index)
+{
+	u16 i;
+
+	for (i = 0; i < hw->tnl.count && i < ICE_TUNNEL_MAX_ENTRIES; i++)
+		if (hw->tnl.tbl[i].in_use && hw->tnl.tbl[i].port == port) {
+			if (index)
+				*index = i;
+			return true;
+		}
+
+	return false;
+}
+
+/**
+ * ice_tunnel_port_in_use
+ * @hw: pointer to the HW structure
+ * @port: port to search for
+ * @index: optionally returns index
+ *
+ * Returns whether a port is already in use as a tunnel, and optionally its
+ * index
+ */
+bool ice_tunnel_port_in_use(struct ice_hw *hw, u16 port, u16 *index)
+{
+	bool res;
+
+	mutex_lock(&hw->tnl_lock);
+	res = ice_tunnel_port_in_use_hlpr(hw, port, index);
+	mutex_unlock(&hw->tnl_lock);
+
+	return res;
+}
+
+/**
+ * ice_find_free_tunnel_entry
+ * @hw: pointer to the HW structure
+ * @type: tunnel type
+ * @index: optionally returns index
+ *
+ * Returns whether there is a free tunnel entry, and optionally its index
+ */
+static bool
+ice_find_free_tunnel_entry(struct ice_hw *hw, enum ice_tunnel_type type,
+			   u16 *index)
+{
+	u16 i;
+
+	for (i = 0; i < hw->tnl.count && i < ICE_TUNNEL_MAX_ENTRIES; i++)
+		if (hw->tnl.tbl[i].valid && !hw->tnl.tbl[i].in_use &&
+		    hw->tnl.tbl[i].type == type) {
+			if (index)
+				*index = i;
+			return true;
+		}
+
+	return false;
+}
+
+/**
+ * ice_create_tunnel
+ * @hw: pointer to the HW structure
+ * @type: type of tunnel
+ * @port: port of tunnel to create
+ *
+ * Create a tunnel by updating the parse graph in the parser. We do that by
+ * creating a package buffer with the tunnel info and issuing an update package
+ * command.
+ */
+enum ice_status
+ice_create_tunnel(struct ice_hw *hw, enum ice_tunnel_type type, u16 port)
+{
+	struct ice_boost_tcam_section *sect_rx, *sect_tx;
+	enum ice_status status = ICE_ERR_MAX_LIMIT;
+	struct ice_buf_build *bld;
+	u16 index;
+
+	mutex_lock(&hw->tnl_lock);
+
+	if (ice_tunnel_port_in_use_hlpr(hw, port, &index)) {
+		hw->tnl.tbl[index].ref++;
+		status = 0;
+		goto ice_create_tunnel_end;
+	}
+
+	if (!ice_find_free_tunnel_entry(hw, type, &index)) {
+		status = ICE_ERR_OUT_OF_RANGE;
+		goto ice_create_tunnel_end;
+	}
+
+	bld = ice_pkg_buf_alloc(hw);
+	if (!bld) {
+		status = ICE_ERR_NO_MEMORY;
+		goto ice_create_tunnel_end;
+	}
+
+	/* allocate 2 sections, one for Rx parser, one for Tx parser */
+	if (ice_pkg_buf_reserve_section(bld, 2))
+		goto ice_create_tunnel_err;
+
+	sect_rx = ice_pkg_buf_alloc_section(bld, ICE_SID_RXPARSER_BOOST_TCAM,
+					    sizeof(*sect_rx));
+	if (!sect_rx)
+		goto ice_create_tunnel_err;
+	sect_rx->count = cpu_to_le16(1);
+
+	sect_tx = ice_pkg_buf_alloc_section(bld, ICE_SID_TXPARSER_BOOST_TCAM,
+					    sizeof(*sect_tx));
+	if (!sect_tx)
+		goto ice_create_tunnel_err;
+	sect_tx->count = cpu_to_le16(1);
+
+	/* copy original boost entry to update package buffer */
+	memcpy(sect_rx->tcam, hw->tnl.tbl[index].boost_entry,
+	       sizeof(*sect_rx->tcam));
+
+	/* over-write the never-match dest port key bits with the encoded port
+	 * bits
+	 */
+	ice_set_key((u8 *)&sect_rx->tcam[0].key, sizeof(sect_rx->tcam[0].key),
+		    (u8 *)&port, NULL, NULL, NULL,
+		    offsetof(struct ice_boost_key_value, hv_dst_port_key),
+		    sizeof(sect_rx->tcam[0].key.key.hv_dst_port_key));
+
+	/* exact copy of entry to Tx section entry */
+	memcpy(sect_tx->tcam, sect_rx->tcam, sizeof(*sect_tx->tcam));
+
+	status = ice_update_pkg(hw, ice_pkg_buf(bld), 1);
+	if (!status) {
+		hw->tnl.tbl[index].port = port;
+		hw->tnl.tbl[index].in_use = true;
+		hw->tnl.tbl[index].ref = 1;
+	}
+
+ice_create_tunnel_err:
+	ice_pkg_buf_free(hw, bld);
+
+ice_create_tunnel_end:
+	mutex_unlock(&hw->tnl_lock);
+
+	return status;
+}
+
+/**
+ * ice_destroy_tunnel
+ * @hw: pointer to the HW structure
+ * @port: port of tunnel to destroy (ignored if the all parameter is true)
+ * @all: flag that states to destroy all tunnels
+ *
+ * Destroys a tunnel or all tunnels by creating an update package buffer
+ * targeting the specific updates requested and then performing an update
+ * package.
+ */
+enum ice_status ice_destroy_tunnel(struct ice_hw *hw, u16 port, bool all)
+{
+	struct ice_boost_tcam_section *sect_rx, *sect_tx;
+	enum ice_status status = ICE_ERR_MAX_LIMIT;
+	struct ice_buf_build *bld;
+	u16 count = 0;
+	u16 index;
+	u16 size;
+	u16 i;
+
+	mutex_lock(&hw->tnl_lock);
+
+	if (!all && ice_tunnel_port_in_use_hlpr(hw, port, &index))
+		if (hw->tnl.tbl[index].ref > 1) {
+			hw->tnl.tbl[index].ref--;
+			status = 0;
+			goto ice_destroy_tunnel_end;
+		}
+
+	/* determine count */
+	for (i = 0; i < hw->tnl.count && i < ICE_TUNNEL_MAX_ENTRIES; i++)
+		if (hw->tnl.tbl[i].valid && hw->tnl.tbl[i].in_use &&
+		    (all || hw->tnl.tbl[i].port == port))
+			count++;
+
+	if (!count) {
+		status = ICE_ERR_PARAM;
+		goto ice_destroy_tunnel_end;
+	}
+
+	/* size of section - there is at least one entry */
+	size = struct_size(sect_rx, tcam, count - 1);
+
+	bld = ice_pkg_buf_alloc(hw);
+	if (!bld) {
+		status = ICE_ERR_NO_MEMORY;
+		goto ice_destroy_tunnel_end;
+	}
+
+	/* allocate 2 sections, one for Rx parser, one for Tx parser */
+	if (ice_pkg_buf_reserve_section(bld, 2))
+		goto ice_destroy_tunnel_err;
+
+	sect_rx = ice_pkg_buf_alloc_section(bld, ICE_SID_RXPARSER_BOOST_TCAM,
+					    size);
+	if (!sect_rx)
+		goto ice_destroy_tunnel_err;
+	sect_rx->count = cpu_to_le16(1);
+
+	sect_tx = ice_pkg_buf_alloc_section(bld, ICE_SID_TXPARSER_BOOST_TCAM,
+					    size);
+	if (!sect_tx)
+		goto ice_destroy_tunnel_err;
+	sect_tx->count = cpu_to_le16(1);
+
+	/* copy original boost entry to update package buffer, one copy to Rx
+	 * section, another copy to the Tx section
+	 */
+	for (i = 0; i < hw->tnl.count && i < ICE_TUNNEL_MAX_ENTRIES; i++)
+		if (hw->tnl.tbl[i].valid && hw->tnl.tbl[i].in_use &&
+		    (all || hw->tnl.tbl[i].port == port)) {
+			memcpy(sect_rx->tcam + i, hw->tnl.tbl[i].boost_entry,
+			       sizeof(*sect_rx->tcam));
+			memcpy(sect_tx->tcam + i, hw->tnl.tbl[i].boost_entry,
+			       sizeof(*sect_tx->tcam));
+			hw->tnl.tbl[i].marked = true;
+		}
+
+	status = ice_update_pkg(hw, ice_pkg_buf(bld), 1);
+	if (!status)
+		for (i = 0; i < hw->tnl.count &&
+		     i < ICE_TUNNEL_MAX_ENTRIES; i++)
+			if (hw->tnl.tbl[i].marked) {
+				hw->tnl.tbl[i].ref = 0;
+				hw->tnl.tbl[i].port = 0;
+				hw->tnl.tbl[i].in_use = false;
+				hw->tnl.tbl[i].marked = false;
+			}
+
+ice_destroy_tunnel_err:
+	ice_pkg_buf_free(hw, bld);
+
+ice_destroy_tunnel_end:
+	mutex_unlock(&hw->tnl_lock);
+
+	return status;
+}
+
 /* PTG Management */
 
 /**
diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.h b/drivers/net/ethernet/intel/ice/ice_flex_pipe.h
index c7b5e1a6ea2b..70db213c9fe3 100644
--- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.h
+++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.h
@@ -18,6 +18,11 @@
 
 #define ICE_PKG_CNT 4
 
+enum ice_status
+ice_create_tunnel(struct ice_hw *hw, enum ice_tunnel_type type, u16 port);
+enum ice_status ice_destroy_tunnel(struct ice_hw *hw, u16 port, bool all);
+bool ice_tunnel_port_in_use(struct ice_hw *hw, u16 port, u16 *index);
+
 enum ice_status
 ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[],
 	     struct ice_fv_word *es);
diff --git a/drivers/net/ethernet/intel/ice/ice_flex_type.h b/drivers/net/ethernet/intel/ice/ice_flex_type.h
index 0fb3fe3ff3ea..249fb66fc230 100644
--- a/drivers/net/ethernet/intel/ice/ice_flex_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_flex_type.h
@@ -149,6 +149,7 @@ struct ice_buf_hdr {
 #define ICE_SID_CDID_REDIR_RSS		48
 
 #define ICE_SID_RXPARSER_BOOST_TCAM	56
+#define ICE_SID_TXPARSER_BOOST_TCAM	66
 
 #define ICE_SID_XLT0_PE			80
 #define ICE_SID_XLT_KEY_BUILDER_PE	81
@@ -291,6 +292,38 @@ struct ice_pkg_enum {
 	void *(*handler)(u32 sect_type, void *section, u32 index, u32 *offset);
 };
 
+/* Tunnel enabling */
+
+enum ice_tunnel_type {
+	TNL_VXLAN = 0,
+	TNL_GENEVE,
+	TNL_LAST = 0xFF,
+	TNL_ALL = 0xFF,
+};
+
+struct ice_tunnel_type_scan {
+	enum ice_tunnel_type type;
+	const char *label_prefix;
+};
+
+struct ice_tunnel_entry {
+	enum ice_tunnel_type type;
+	u16 boost_addr;
+	u16 port;
+	u16 ref;
+	struct ice_boost_tcam_entry *boost_entry;
+	u8 valid;
+	u8 in_use;
+	u8 marked;
+};
+
+#define ICE_TUNNEL_MAX_ENTRIES	16
+
+struct ice_tunnel_table {
+	struct ice_tunnel_entry tbl[ICE_TUNNEL_MAX_ENTRIES];
+	u16 count;
+};
+
 struct ice_pkg_es {
 	__le16 count;
 	__le16 offset;
diff --git a/drivers/net/ethernet/intel/ice/ice_flow.c b/drivers/net/ethernet/intel/ice/ice_flow.c
index 3de862a3c789..07875db08c3f 100644
--- a/drivers/net/ethernet/intel/ice/ice_flow.c
+++ b/drivers/net/ethernet/intel/ice/ice_flow.c
@@ -42,7 +42,10 @@ struct ice_flow_field_info ice_flds_info[ICE_FLOW_FIELD_IDX_MAX] = {
 	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_SCTP, 0, sizeof(__be16)),
 	/* ICE_FLOW_FIELD_IDX_SCTP_DST_PORT */
 	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_SCTP, 2, sizeof(__be16)),
-
+	/* GRE */
+	/* ICE_FLOW_FIELD_IDX_GRE_KEYID */
+	ICE_FLOW_FLD_INFO(ICE_FLOW_SEG_HDR_GRE, 12,
+			  sizeof_field(struct gre_full_hdr, key)),
 };
 
 /* Bitmaps indicating relevant packet types for a particular protocol header
@@ -134,6 +137,18 @@ static const u32 ice_ptypes_sctp_il[] = {
 	0x00000000, 0x00000000, 0x00000000, 0x00000000,
 };
 
+/* Packet types for packets with an Outermost/First GRE header */
+static const u32 ice_ptypes_gre_of[] = {
+	0x00000000, 0xBFBF7800, 0x000001DF, 0xFEFDE000,
+	0x0000017E, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+	0x00000000, 0x00000000, 0x00000000, 0x00000000,
+};
+
 /* Manage parameters and info. used during the creation of a flow profile */
 struct ice_flow_prof_params {
 	enum ice_block blk;
@@ -225,6 +240,12 @@ ice_flow_proc_seg_hdrs(struct ice_flow_prof_params *params)
 			src = (const unsigned long *)ice_ptypes_sctp_il;
 			bitmap_and(params->ptypes, params->ptypes, src,
 				   ICE_FLOW_PTYPE_MAX);
+		} else if (hdrs & ICE_FLOW_SEG_HDR_GRE) {
+			if (!i) {
+				src = (const unsigned long *)ice_ptypes_gre_of;
+				bitmap_and(params->ptypes, params->ptypes,
+					   src, ICE_FLOW_PTYPE_MAX);
+			}
 		}
 	}
 
@@ -275,6 +296,9 @@ ice_flow_xtract_fld(struct ice_hw *hw, struct ice_flow_prof_params *params,
 	case ICE_FLOW_FIELD_IDX_SCTP_DST_PORT:
 		prot_id = ICE_PROT_SCTP_IL;
 		break;
+	case ICE_FLOW_FIELD_IDX_GRE_KEYID:
+		prot_id = ICE_PROT_GRE_OF;
+		break;
 	default:
 		return ICE_ERR_NOT_IMPL;
 	}
@@ -945,6 +969,7 @@ ice_add_rss_list(struct ice_hw *hw, u16 vsi_handle, struct ice_flow_prof *prof)
 #define ICE_FLOW_PROF_ENCAP_M	(BIT_ULL(ICE_FLOW_PROF_ENCAP_S))
 
 #define ICE_RSS_OUTER_HEADERS	1
+#define ICE_RSS_INNER_HEADERS	2
 
 /* Flow profile ID format:
  * [0:31] - Packet match fields
@@ -1085,6 +1110,9 @@ ice_add_rss_cfg(struct ice_hw *hw, u16 vsi_handle, u64 hashed_flds,
 	mutex_lock(&hw->rss_locks);
 	status = ice_add_rss_cfg_sync(hw, vsi_handle, hashed_flds, addl_hdrs,
 				      ICE_RSS_OUTER_HEADERS);
+	if (!status)
+		status = ice_add_rss_cfg_sync(hw, vsi_handle, hashed_flds,
+					      addl_hdrs, ICE_RSS_INNER_HEADERS);
 	mutex_unlock(&hw->rss_locks);
 
 	return status;
@@ -1238,6 +1266,12 @@ enum ice_status ice_replay_rss_cfg(struct ice_hw *hw, u16 vsi_handle)
 						      ICE_RSS_OUTER_HEADERS);
 			if (status)
 				break;
+			status = ice_add_rss_cfg_sync(hw, vsi_handle,
+						      r->hashed_flds,
+						      r->packet_hdr,
+						      ICE_RSS_INNER_HEADERS);
+			if (status)
+				break;
 		}
 	}
 	mutex_unlock(&hw->rss_locks);
diff --git a/drivers/net/ethernet/intel/ice/ice_flow.h b/drivers/net/ethernet/intel/ice/ice_flow.h
index 5558627bd5eb..00f2b7a9feed 100644
--- a/drivers/net/ethernet/intel/ice/ice_flow.h
+++ b/drivers/net/ethernet/intel/ice/ice_flow.h
@@ -43,6 +43,7 @@ enum ice_flow_seg_hdr {
 	ICE_FLOW_SEG_HDR_TCP		= 0x00000040,
 	ICE_FLOW_SEG_HDR_UDP		= 0x00000080,
 	ICE_FLOW_SEG_HDR_SCTP		= 0x00000100,
+	ICE_FLOW_SEG_HDR_GRE		= 0x00000200,
 };
 
 enum ice_flow_field {
@@ -58,6 +59,8 @@ enum ice_flow_field {
 	ICE_FLOW_FIELD_IDX_UDP_DST_PORT,
 	ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT,
 	ICE_FLOW_FIELD_IDX_SCTP_DST_PORT,
+	/* GRE */
+	ICE_FLOW_FIELD_IDX_GRE_KEYID,
 	/* The total number of enums must not exceed 64 */
 	ICE_FLOW_FIELD_IDX_MAX
 };
diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
index 878e125d8b42..5d61acdec7ed 100644
--- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
+++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
@@ -262,6 +262,12 @@ enum ice_rx_flex_desc_status_error_0_bits {
 	ICE_RX_FLEX_DESC_STATUS0_LAST /* this entry must be last!!! */
 };
 
+enum ice_rx_flex_desc_status_error_1_bits {
+	/* Note: These are predefined bit offsets */
+	ICE_RX_FLEX_DESC_STATUS1_NAT_S = 4,
+	ICE_RX_FLEX_DESC_STATUS1_LAST /* this entry must be last!!! */
+};
+
 #define ICE_RXQ_CTX_SIZE_DWORDS		8
 #define ICE_RXQ_CTX_SZ			(ICE_RXQ_CTX_SIZE_DWORDS * sizeof(u32))
 #define ICE_TX_CMPLTNQ_CTX_SIZE_DWORDS	22
@@ -413,6 +419,25 @@ enum ice_tx_ctx_desc_cmd_bits {
 	ICE_TX_CTX_DESC_RESERVED	= 0x40
 };
 
+enum ice_tx_ctx_desc_eipt_offload {
+	ICE_TX_CTX_EIPT_NONE		= 0x0,
+	ICE_TX_CTX_EIPT_IPV6		= 0x1,
+	ICE_TX_CTX_EIPT_IPV4_NO_CSUM	= 0x2,
+	ICE_TX_CTX_EIPT_IPV4		= 0x3
+};
+
+#define ICE_TXD_CTX_QW0_EIPLEN_S	2
+
+#define ICE_TXD_CTX_QW0_L4TUNT_S	9
+
+#define ICE_TXD_CTX_UDP_TUNNELING	BIT_ULL(ICE_TXD_CTX_QW0_L4TUNT_S)
+#define ICE_TXD_CTX_GRE_TUNNELING	(0x2ULL << ICE_TXD_CTX_QW0_L4TUNT_S)
+
+#define ICE_TXD_CTX_QW0_NATLEN_S	12
+
+#define ICE_TXD_CTX_QW0_L4T_CS_S	23
+#define ICE_TXD_CTX_QW0_L4T_CS_M	BIT_ULL(ICE_TXD_CTX_QW0_L4T_CS_S)
+
 #define ICE_LAN_TXQ_MAX_QGRPS	127
 #define ICE_LAN_TXQ_MAX_QDIS	1023
 
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 5b190c257124..44ff4fe45a56 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -2342,13 +2342,27 @@ static void ice_set_netdev_features(struct net_device *netdev)
 			 NETIF_F_HW_VLAN_CTAG_TX     |
 			 NETIF_F_HW_VLAN_CTAG_RX;
 
-	tso_features = NETIF_F_TSO		|
+	tso_features = NETIF_F_TSO			|
+		       NETIF_F_TSO_ECN			|
+		       NETIF_F_TSO6			|
+		       NETIF_F_GSO_GRE			|
+		       NETIF_F_GSO_UDP_TUNNEL		|
+		       NETIF_F_GSO_GRE_CSUM		|
+		       NETIF_F_GSO_UDP_TUNNEL_CSUM	|
+		       NETIF_F_GSO_PARTIAL		|
+		       NETIF_F_GSO_IPXIP4		|
+		       NETIF_F_GSO_IPXIP6		|
 		       NETIF_F_GSO_UDP_L4;
 
+	netdev->gso_partial_features |= NETIF_F_GSO_UDP_TUNNEL_CSUM |
+					NETIF_F_GSO_GRE_CSUM;
 	/* set features that user can change */
 	netdev->hw_features = dflt_features | csumo_features |
 			      vlano_features | tso_features;
 
+	/* add support for HW_CSUM on packets with MPLS header */
+	netdev->mpls_features =  NETIF_F_HW_CSUM;
+
 	/* enable features */
 	netdev->features |= netdev->hw_features;
 	/* encap and VLAN devices inherit default, csumo and tso features */
@@ -5157,6 +5171,70 @@ static void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 	pf->tx_timeout_recovery_level++;
 }
 
+/**
+ * ice_udp_tunnel_add - Get notifications about UDP tunnel ports that come up
+ * @netdev: This physical port's netdev
+ * @ti: Tunnel endpoint information
+ */
+static void
+ice_udp_tunnel_add(struct net_device *netdev, struct udp_tunnel_info *ti)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	enum ice_tunnel_type tnl_type;
+	u16 port = ntohs(ti->port);
+	enum ice_status status;
+
+	switch (ti->type) {
+	case UDP_TUNNEL_TYPE_VXLAN:
+		tnl_type = TNL_VXLAN;
+		break;
+	case UDP_TUNNEL_TYPE_GENEVE:
+		tnl_type = TNL_GENEVE;
+		break;
+	default:
+		netdev_err(netdev, "Unknown tunnel type\n");
+		return;
+	}
+
+	status = ice_create_tunnel(&pf->hw, tnl_type, port);
+	if (status == ICE_ERR_OUT_OF_RANGE)
+		netdev_info(netdev, "Max tunneled UDP ports reached, port %d not added\n",
+			    port);
+	else if (status)
+		netdev_err(netdev, "Error adding UDP tunnel - %d\n",
+			   status);
+}
+
+/**
+ * ice_udp_tunnel_del - Get notifications about UDP tunnel ports that go away
+ * @netdev: This physical port's netdev
+ * @ti: Tunnel endpoint information
+ */
+static void
+ice_udp_tunnel_del(struct net_device *netdev, struct udp_tunnel_info *ti)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
+	u16 port = ntohs(ti->port);
+	enum ice_status status;
+	bool retval;
+
+	retval = ice_tunnel_port_in_use(&pf->hw, port, NULL);
+	if (!retval) {
+		netdev_info(netdev, "port %d not found in UDP tunnels list\n",
+			    port);
+		return;
+	}
+
+	status = ice_destroy_tunnel(&pf->hw, port, false);
+	if (status)
+		netdev_err(netdev, "error deleting port %d from UDP tunnels list\n",
+			   port);
+}
+
 /**
  * ice_open - Called when a network interface becomes active
  * @netdev: network interface device structure
@@ -5213,6 +5291,10 @@ int ice_open(struct net_device *netdev)
 	if (err)
 		netdev_err(netdev, "Failed to open VSI 0x%04X on switch 0x%04X\n",
 			   vsi->vsi_num, vsi->vsw->sw_id);
+
+	/* Update existing tunnels information */
+	udp_tunnel_get_rx_info(netdev);
+
 	return err;
 }
 
@@ -5263,21 +5345,21 @@ ice_features_check(struct sk_buff *skb,
 		features &= ~NETIF_F_GSO_MASK;
 
 	len = skb_network_header(skb) - skb->data;
-	if (len & ~(ICE_TXD_MACLEN_MAX))
+	if (len > ICE_TXD_MACLEN_MAX || len & 0x1)
 		goto out_rm_features;
 
 	len = skb_transport_header(skb) - skb_network_header(skb);
-	if (len & ~(ICE_TXD_IPLEN_MAX))
+	if (len > ICE_TXD_IPLEN_MAX || len & 0x1)
 		goto out_rm_features;
 
 	if (skb->encapsulation) {
 		len = skb_inner_network_header(skb) - skb_transport_header(skb);
-		if (len & ~(ICE_TXD_L4LEN_MAX))
+		if (len > ICE_TXD_L4LEN_MAX || len & 0x1)
 			goto out_rm_features;
 
 		len = skb_inner_transport_header(skb) -
 		      skb_inner_network_header(skb);
-		if (len & ~(ICE_TXD_IPLEN_MAX))
+		if (len > ICE_TXD_IPLEN_MAX || len & 0x1)
 			goto out_rm_features;
 	}
 
@@ -5326,4 +5408,6 @@ static const struct net_device_ops ice_netdev_ops = {
 	.ndo_bpf = ice_xdp,
 	.ndo_xdp_xmit = ice_xdp_xmit,
 	.ndo_xsk_wakeup = ice_xsk_wakeup,
+	.ndo_udp_tunnel_add = ice_udp_tunnel_add,
+	.ndo_udp_tunnel_del = ice_udp_tunnel_del,
 };
diff --git a/drivers/net/ethernet/intel/ice/ice_protocol_type.h b/drivers/net/ethernet/intel/ice/ice_protocol_type.h
index 71647566964e..678db6bf7f57 100644
--- a/drivers/net/ethernet/intel/ice/ice_protocol_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_protocol_type.h
@@ -18,6 +18,7 @@ enum ice_prot_id {
 	ICE_PROT_IPV6_IL	= 41,
 	ICE_PROT_TCP_IL		= 49,
 	ICE_PROT_UDP_IL_OR_S	= 53,
+	ICE_PROT_GRE_OF		= 64,
 	ICE_PROT_SCTP_IL	= 96,
 	ICE_PROT_META_ID	= 255, /* when offset == metadata */
 	ICE_PROT_INVALID	= 255  /* when offset == ICE_FV_OFFSET_INVAL */
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 69b21b436f9a..4ba1fc8261d9 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -1807,12 +1807,94 @@ int ice_tx_csum(struct ice_tx_buf *first, struct ice_tx_offload_params *off)
 	l2_len = ip.hdr - skb->data;
 	offset = (l2_len / 2) << ICE_TX_DESC_LEN_MACLEN_S;
 
-	if (skb->encapsulation)
-		return -1;
+	protocol = vlan_get_protocol(skb);
+
+	if (protocol == htons(ETH_P_IP))
+		first->tx_flags |= ICE_TX_FLAGS_IPV4;
+	else if (protocol == htons(ETH_P_IPV6))
+		first->tx_flags |= ICE_TX_FLAGS_IPV6;
+
+	if (skb->encapsulation) {
+		bool gso_ena = false;
+		u32 tunnel = 0;
+
+		/* define outer network header type */
+		if (first->tx_flags & ICE_TX_FLAGS_IPV4) {
+			tunnel |= (first->tx_flags & ICE_TX_FLAGS_TSO) ?
+				  ICE_TX_CTX_EIPT_IPV4 :
+				  ICE_TX_CTX_EIPT_IPV4_NO_CSUM;
+			l4_proto = ip.v4->protocol;
+		} else if (first->tx_flags & ICE_TX_FLAGS_IPV6) {
+			tunnel |= ICE_TX_CTX_EIPT_IPV6;
+			exthdr = ip.hdr + sizeof(*ip.v6);
+			l4_proto = ip.v6->nexthdr;
+			if (l4.hdr != exthdr)
+				ipv6_skip_exthdr(skb, exthdr - skb->data,
+						 &l4_proto, &frag_off);
+		}
+
+		/* define outer transport */
+		switch (l4_proto) {
+		case IPPROTO_UDP:
+			tunnel |= ICE_TXD_CTX_UDP_TUNNELING;
+			first->tx_flags |= ICE_TX_FLAGS_TUNNEL;
+			break;
+		case IPPROTO_GRE:
+			tunnel |= ICE_TXD_CTX_GRE_TUNNELING;
+			first->tx_flags |= ICE_TX_FLAGS_TUNNEL;
+			break;
+		case IPPROTO_IPIP:
+		case IPPROTO_IPV6:
+			first->tx_flags |= ICE_TX_FLAGS_TUNNEL;
+			l4.hdr = skb_inner_network_header(skb);
+			break;
+		default:
+			if (first->tx_flags & ICE_TX_FLAGS_TSO)
+				return -1;
+
+			skb_checksum_help(skb);
+			return 0;
+		}
+
+		/* compute outer L3 header size */
+		tunnel |= ((l4.hdr - ip.hdr) / 4) <<
+			  ICE_TXD_CTX_QW0_EIPLEN_S;
+
+		/* switch IP header pointer from outer to inner header */
+		ip.hdr = skb_inner_network_header(skb);
+
+		/* compute tunnel header size */
+		tunnel |= ((ip.hdr - l4.hdr) / 2) <<
+			   ICE_TXD_CTX_QW0_NATLEN_S;
+
+		gso_ena = skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL;
+		/* indicate if we need to offload outer UDP header */
+		if ((first->tx_flags & ICE_TX_FLAGS_TSO) && !gso_ena &&
+		    (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM))
+			tunnel |= ICE_TXD_CTX_QW0_L4T_CS_M;
+
+		/* record tunnel offload values */
+		off->cd_tunnel_params |= tunnel;
+
+		/* set DTYP=1 to indicate that it's an Tx context descriptor
+		 * in IPsec tunnel mode with Tx offloads in Quad word 1
+		 */
+		off->cd_qw1 |= (u64)ICE_TX_DESC_DTYPE_CTX;
+
+		/* switch L4 header pointer from outer to inner */
+		l4.hdr = skb_inner_transport_header(skb);
+		l4_proto = 0;
+
+		/* reset type as we transition from outer to inner headers */
+		first->tx_flags &= ~(ICE_TX_FLAGS_IPV4 | ICE_TX_FLAGS_IPV6);
+		if (ip.v4->version == 4)
+			first->tx_flags |= ICE_TX_FLAGS_IPV4;
+		if (ip.v6->version == 6)
+			first->tx_flags |= ICE_TX_FLAGS_IPV6;
+	}
 
 	/* Enable IP checksum offloads */
-	protocol = vlan_get_protocol(skb);
-	if (protocol == htons(ETH_P_IP)) {
+	if (first->tx_flags & ICE_TX_FLAGS_IPV4) {
 		l4_proto = ip.v4->protocol;
 		/* the stack computes the IP header already, the only time we
 		 * need the hardware to recompute it is in the case of TSO.
@@ -1822,7 +1904,7 @@ int ice_tx_csum(struct ice_tx_buf *first, struct ice_tx_offload_params *off)
 		else
 			cmd |= ICE_TX_DESC_CMD_IIPT_IPV4;
 
-	} else if (protocol == htons(ETH_P_IPV6)) {
+	} else if (first->tx_flags & ICE_TX_FLAGS_IPV6) {
 		cmd |= ICE_TX_DESC_CMD_IIPT_IPV6;
 		exthdr = ip.hdr + sizeof(*ip.v6);
 		l4_proto = ip.v6->nexthdr;
@@ -1969,6 +2051,40 @@ int ice_tso(struct ice_tx_buf *first, struct ice_tx_offload_params *off)
 		ip.v6->payload_len = 0;
 	}
 
+	if (skb_shinfo(skb)->gso_type & (SKB_GSO_GRE |
+					 SKB_GSO_GRE_CSUM |
+					 SKB_GSO_IPXIP4 |
+					 SKB_GSO_IPXIP6 |
+					 SKB_GSO_UDP_TUNNEL |
+					 SKB_GSO_UDP_TUNNEL_CSUM)) {
+		if (!(skb_shinfo(skb)->gso_type & SKB_GSO_PARTIAL) &&
+		    (skb_shinfo(skb)->gso_type & SKB_GSO_UDP_TUNNEL_CSUM)) {
+			l4.udp->len = 0;
+
+			/* determine offset of outer transport header */
+			l4_start = l4.hdr - skb->data;
+
+			/* remove payload length from outer checksum */
+			paylen = skb->len - l4_start;
+			csum_replace_by_diff(&l4.udp->check,
+					     (__force __wsum)htonl(paylen));
+		}
+
+		/* reset pointers to inner headers */
+
+		/* cppcheck-suppress unreadVariable */
+		ip.hdr = skb_inner_network_header(skb);
+		l4.hdr = skb_inner_transport_header(skb);
+
+		/* initialize inner IP header fields */
+		if (ip.v4->version == 4) {
+			ip.v4->tot_len = 0;
+			ip.v4->check = 0;
+		} else {
+			ip.v6->payload_len = 0;
+		}
+	}
+
 	/* determine offset of transport header */
 	l4_start = l4.hdr - skb->data;
 
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index 7ee00a128663..025dd642cf28 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -113,6 +113,9 @@ static inline int ice_skb_pad(void)
 #define ICE_TX_FLAGS_TSO	BIT(0)
 #define ICE_TX_FLAGS_HW_VLAN	BIT(1)
 #define ICE_TX_FLAGS_SW_VLAN	BIT(2)
+#define ICE_TX_FLAGS_IPV4	BIT(5)
+#define ICE_TX_FLAGS_IPV6	BIT(6)
+#define ICE_TX_FLAGS_TUNNEL	BIT(7)
 #define ICE_TX_FLAGS_VLAN_M	0xffff0000
 #define ICE_TX_FLAGS_VLAN_PR_M	0xe0000000
 #define ICE_TX_FLAGS_VLAN_PR_S	29
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
index 6da048a6ca7c..1f9c3d24cde7 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
@@ -84,12 +84,17 @@ ice_rx_csum(struct ice_ring *ring, struct sk_buff *skb,
 	    union ice_32b_rx_flex_desc *rx_desc, u8 ptype)
 {
 	struct ice_rx_ptype_decoded decoded;
-	u32 rx_error, rx_status;
+	u16 rx_error, rx_status;
+	u16 rx_stat_err1;
 	bool ipv4, ipv6;
 
 	rx_status = le16_to_cpu(rx_desc->wb.status_error0);
-	rx_error = rx_status;
+	rx_error = rx_status & (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) |
+				BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S) |
+				BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S) |
+				BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S));
 
+	rx_stat_err1 = le16_to_cpu(rx_desc->wb.status_error1);
 	decoded = ice_decode_rx_desc_ptype(ptype);
 
 	/* Start with CHECKSUM_NONE and by default csum_level = 0 */
@@ -125,6 +130,18 @@ ice_rx_csum(struct ice_ring *ring, struct sk_buff *skb,
 	if (rx_error & BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S))
 		goto checksum_fail;
 
+	/* check for outer UDP checksum error in tunneled packets */
+	if ((rx_stat_err1 & BIT(ICE_RX_FLEX_DESC_STATUS1_NAT_S)) &&
+	    (rx_error & BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S)))
+		goto checksum_fail;
+
+	/* If there is an outer header present that might contain a checksum
+	 * we need to bump the checksum level by 1 to reflect the fact that
+	 * we are indicating we validated the inner checksum.
+	 */
+	if (decoded.tunnel_type >= ICE_RX_PTYPE_TUNNEL_IP_GRENAT)
+		skb->csum_level = 1;
+
 	/* Only report checksum unnecessary for TCP, UDP, or SCTP */
 	switch (decoded.inner_prot) {
 	case ICE_RX_PTYPE_INNER_PROT_TCP:
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index 35ea5adbb3e5..c56b2e77a48c 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -579,6 +579,10 @@ struct ice_hw {
 	u8 *pkg_copy;
 	u32 pkg_size;
 
+	/* tunneling info */
+	struct mutex tnl_lock;
+	struct ice_tunnel_table tnl;
+
 	/* HW block tables */
 	struct ice_blk_info blk[ICE_BLK_COUNT];
 	struct mutex fl_profs_locks[ICE_BLK_COUNT];	/* lock fltr profiles */
-- 
cgit v1.2.3-59-g8ed1b


From 01b5e89aab498dad5a38d04a71beca2b562d9449 Mon Sep 17 00:00:00 2001
From: Brett Creeley <brett.creeley@intel.com>
Date: Thu, 7 May 2020 17:40:59 -0700
Subject: ice: Add VF promiscuous support

Implement promiscuous support for VF VSIs. Behaviour of promiscuous support
is based on VF trust as well as the, introduced, vf-true-promisc flag.

A trusted VF with vf-true-promisc disabled will be the default VSI, which
means that all traffic without a matching destination MAC address in the
device's internal switch will be forwarded to this VF VSI.

A trusted VF with vf-true-promisc enabled will go into "true promiscuous
mode". This amounts to the VF receiving all ingress and egress traffic
that hits the device's internal switch.

An untrusted VF will only receive traffic destined for that VF.

The vf-true-promisc-support flag cannot be toggled while any VF is in
promiscuous mode. This flag should be set prior to loading the iavf driver
or spawning VF(s).

Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice.h             |   1 +
 drivers/net/ethernet/intel/ice/ice_ethtool.c     |  12 ++
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 206 ++++++++++++++++++++++-
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h |   6 +
 4 files changed, 223 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 43349eaa02b2..ce7172901428 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -345,6 +345,7 @@ enum ice_pf_flags {
 	ICE_FLAG_FW_LLDP_AGENT,
 	ICE_FLAG_ETHTOOL_CTXT,		/* set when ethtool holds RTNL lock */
 	ICE_FLAG_LEGACY_RX,
+	ICE_FLAG_VF_TRUE_PROMISC_ENA,
 	ICE_FLAG_MDD_AUTO_RESET_VF,
 	ICE_PF_FLAGS_NBITS		/* must be last */
 };
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 593fb37bd59e..66d0bcc51ad9 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -157,6 +157,8 @@ struct ice_priv_flag {
 static const struct ice_priv_flag ice_gstrings_priv_flags[] = {
 	ICE_PRIV_FLAG("link-down-on-close", ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA),
 	ICE_PRIV_FLAG("fw-lldp-agent", ICE_FLAG_FW_LLDP_AGENT),
+	ICE_PRIV_FLAG("vf-true-promisc-support",
+		      ICE_FLAG_VF_TRUE_PROMISC_ENA),
 	ICE_PRIV_FLAG("mdd-auto-reset-vf", ICE_FLAG_MDD_AUTO_RESET_VF),
 	ICE_PRIV_FLAG("legacy-rx", ICE_FLAG_LEGACY_RX),
 };
@@ -1308,6 +1310,16 @@ static int ice_set_priv_flags(struct net_device *netdev, u32 flags)
 		ice_down(vsi);
 		ice_up(vsi);
 	}
+	/* don't allow modification of this flag when a single VF is in
+	 * promiscuous mode because it's not supported
+	 */
+	if (test_bit(ICE_FLAG_VF_TRUE_PROMISC_ENA, change_flags) &&
+	    ice_is_any_vf_in_promisc(pf)) {
+		dev_err(dev, "Changing vf-true-promisc-support flag while VF(s) are in promiscuous mode not supported\n");
+		/* toggle bit back to previous state */
+		change_bit(ICE_FLAG_VF_TRUE_PROMISC_ENA, pf->flags);
+		ret = -EAGAIN;
+	}
 	clear_bit(ICE_FLAG_ETHTOOL_CTXT, pf->flags);
 	return ret;
 }
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index 15191a325918..1389d0d6d3d2 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -34,6 +34,37 @@ static int ice_check_vf_init(struct ice_pf *pf, struct ice_vf *vf)
 	return 0;
 }
 
+/**
+ * ice_err_to_virt_err - translate errors for VF return code
+ * @ice_err: error return code
+ */
+static enum virtchnl_status_code ice_err_to_virt_err(enum ice_status ice_err)
+{
+	switch (ice_err) {
+	case ICE_SUCCESS:
+		return VIRTCHNL_STATUS_SUCCESS;
+	case ICE_ERR_BAD_PTR:
+	case ICE_ERR_INVAL_SIZE:
+	case ICE_ERR_DEVICE_NOT_SUPPORTED:
+	case ICE_ERR_PARAM:
+	case ICE_ERR_CFG:
+		return VIRTCHNL_STATUS_ERR_PARAM;
+	case ICE_ERR_NO_MEMORY:
+		return VIRTCHNL_STATUS_ERR_NO_MEMORY;
+	case ICE_ERR_NOT_READY:
+	case ICE_ERR_RESET_FAILED:
+	case ICE_ERR_FW_API_VER:
+	case ICE_ERR_AQ_ERROR:
+	case ICE_ERR_AQ_TIMEOUT:
+	case ICE_ERR_AQ_FULL:
+	case ICE_ERR_AQ_NO_WORK:
+	case ICE_ERR_AQ_EMPTY:
+		return VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR;
+	default:
+		return VIRTCHNL_STATUS_ERR_NOT_SUPPORTED;
+	}
+}
+
 /**
  * ice_vc_vf_broadcast - Broadcast a message to all VFs on PF
  * @pf: pointer to the PF structure
@@ -2059,6 +2090,173 @@ out:
 	return ret;
 }
 
+/**
+ * ice_is_any_vf_in_promisc - check if any VF(s) are in promiscuous mode
+ * @pf: PF structure for accessing VF(s)
+ *
+ * Return false if no VF(s) are in unicast and/or multicast promiscuous mode,
+ * else return true
+ */
+bool ice_is_any_vf_in_promisc(struct ice_pf *pf)
+{
+	int vf_idx;
+
+	ice_for_each_vf(pf, vf_idx) {
+		struct ice_vf *vf = &pf->vf[vf_idx];
+
+		/* found a VF that has promiscuous mode configured */
+		if (test_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states) ||
+		    test_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states))
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ * ice_vc_cfg_promiscuous_mode_msg
+ * @vf: pointer to the VF info
+ * @msg: pointer to the msg buffer
+ *
+ * called from the VF to configure VF VSIs promiscuous mode
+ */
+static int ice_vc_cfg_promiscuous_mode_msg(struct ice_vf *vf, u8 *msg)
+{
+	enum virtchnl_status_code v_ret = VIRTCHNL_STATUS_SUCCESS;
+	struct virtchnl_promisc_info *info =
+	    (struct virtchnl_promisc_info *)msg;
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *vsi;
+	struct device *dev;
+	bool rm_promisc;
+	int ret = 0;
+
+	if (!test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	if (!ice_vc_isvalid_vsi_id(vf, info->vsi_id)) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	if (!vsi) {
+		v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		goto error_param;
+	}
+
+	dev = ice_pf_to_dev(pf);
+	if (!test_bit(ICE_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps)) {
+		dev_err(dev, "Unprivileged VF %d is attempting to configure promiscuous mode\n",
+			vf->vf_id);
+		/* Leave v_ret alone, lie to the VF on purpose. */
+		goto error_param;
+	}
+
+	rm_promisc = !(info->flags & FLAG_VF_UNICAST_PROMISC) &&
+		!(info->flags & FLAG_VF_MULTICAST_PROMISC);
+
+	if (vsi->num_vlan || vf->port_vlan_info) {
+		struct ice_vsi *pf_vsi = ice_get_main_vsi(pf);
+		struct net_device *pf_netdev;
+
+		if (!pf_vsi) {
+			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+			goto error_param;
+		}
+
+		pf_netdev = pf_vsi->netdev;
+
+		ret = ice_set_vf_spoofchk(pf_netdev, vf->vf_id, rm_promisc);
+		if (ret) {
+			dev_err(dev, "Failed to update spoofchk to %s for VF %d VSI %d when setting promiscuous mode\n",
+				rm_promisc ? "ON" : "OFF", vf->vf_id,
+				vsi->vsi_num);
+			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+		}
+
+		ret = ice_cfg_vlan_pruning(vsi, true, !rm_promisc);
+		if (ret) {
+			dev_err(dev, "Failed to configure VLAN pruning in promiscuous mode\n");
+			v_ret = VIRTCHNL_STATUS_ERR_PARAM;
+			goto error_param;
+		}
+	}
+
+	if (!test_bit(ICE_FLAG_VF_TRUE_PROMISC_ENA, pf->flags)) {
+		bool set_dflt_vsi = !!(info->flags & FLAG_VF_UNICAST_PROMISC);
+
+		if (set_dflt_vsi && !ice_is_dflt_vsi_in_use(pf->first_sw))
+			/* only attempt to set the default forwarding VSI if
+			 * it's not currently set
+			 */
+			ret = ice_set_dflt_vsi(pf->first_sw, vsi);
+		else if (!set_dflt_vsi &&
+			 ice_is_vsi_dflt_vsi(pf->first_sw, vsi))
+			/* only attempt to free the default forwarding VSI if we
+			 * are the owner
+			 */
+			ret = ice_clear_dflt_vsi(pf->first_sw);
+
+		if (ret) {
+			dev_err(dev, "%sable VF %d as the default VSI failed, error %d\n",
+				set_dflt_vsi ? "en" : "dis", vf->vf_id, ret);
+			v_ret = VIRTCHNL_STATUS_ERR_ADMIN_QUEUE_ERROR;
+			goto error_param;
+		}
+	} else {
+		enum ice_status status;
+		u8 promisc_m;
+
+		if (info->flags & FLAG_VF_UNICAST_PROMISC) {
+			if (vf->port_vlan_info || vsi->num_vlan)
+				promisc_m = ICE_UCAST_VLAN_PROMISC_BITS;
+			else
+				promisc_m = ICE_UCAST_PROMISC_BITS;
+		} else if (info->flags & FLAG_VF_MULTICAST_PROMISC) {
+			if (vf->port_vlan_info || vsi->num_vlan)
+				promisc_m = ICE_MCAST_VLAN_PROMISC_BITS;
+			else
+				promisc_m = ICE_MCAST_PROMISC_BITS;
+		} else {
+			if (vf->port_vlan_info || vsi->num_vlan)
+				promisc_m = ICE_UCAST_VLAN_PROMISC_BITS;
+			else
+				promisc_m = ICE_UCAST_PROMISC_BITS;
+		}
+
+		/* Configure multicast/unicast with or without VLAN promiscuous
+		 * mode
+		 */
+		status = ice_vf_set_vsi_promisc(vf, vsi, promisc_m, rm_promisc);
+		if (status) {
+			dev_err(dev, "%sable Tx/Rx filter promiscuous mode on VF-%d failed, error: %d\n",
+				rm_promisc ? "dis" : "en", vf->vf_id, status);
+			v_ret = ice_err_to_virt_err(status);
+			goto error_param;
+		} else {
+			dev_dbg(dev, "%sable Tx/Rx filter promiscuous mode on VF-%d succeeded\n",
+				rm_promisc ? "dis" : "en", vf->vf_id);
+		}
+	}
+
+	if (info->flags & FLAG_VF_MULTICAST_PROMISC)
+		set_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states);
+	else
+		clear_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states);
+
+	if (info->flags & FLAG_VF_UNICAST_PROMISC)
+		set_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states);
+	else
+		clear_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states);
+
+error_param:
+	return ice_vc_send_msg_to_vf(vf, VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE,
+				     v_ret, NULL, 0);
+}
+
 /**
  * ice_vc_get_stats_msg
  * @vf: pointer to the VF info
@@ -2992,8 +3190,9 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v)
 		goto error_param;
 	}
 
-	if (test_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states) ||
-	    test_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states))
+	if ((test_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states) ||
+	     test_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states)) &&
+	    test_bit(ICE_FLAG_VF_TRUE_PROMISC_ENA, pf->flags))
 		vlan_promisc = true;
 
 	if (add_v) {
@@ -3317,6 +3516,9 @@ error_handler:
 	case VIRTCHNL_OP_GET_STATS:
 		err = ice_vc_get_stats_msg(vf, msg);
 		break;
+	case VIRTCHNL_OP_CONFIG_PROMISCUOUS_MODE:
+		err = ice_vc_cfg_promiscuous_mode_msg(vf, msg);
+		break;
 	case VIRTCHNL_OP_ADD_VLAN:
 		err = ice_vc_add_vlan_msg(vf, msg);
 		break;
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
index 3f9464269bd2..f7fd1188efa4 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
@@ -128,6 +128,7 @@ void ice_set_vf_state_qs_dis(struct ice_vf *vf);
 int
 ice_get_vf_stats(struct net_device *netdev, int vf_id,
 		 struct ifla_vf_stats *vf_stats);
+bool ice_is_any_vf_in_promisc(struct ice_pf *pf);
 void
 ice_vf_lan_overflow_event(struct ice_pf *pf, struct ice_rq_event_info *event);
 void ice_print_vfs_mdd_events(struct ice_pf *pf);
@@ -219,5 +220,10 @@ ice_get_vf_stats(struct net_device __always_unused *netdev,
 {
 	return -EOPNOTSUPP;
 }
+
+static inline bool ice_is_any_vf_in_promisc(struct ice_pf __always_unused *pf)
+{
+	return false;
+}
 #endif /* CONFIG_PCI_IOV */
 #endif /* _ICE_VIRTCHNL_PF_H_ */
-- 
cgit v1.2.3-59-g8ed1b


From 610ed0e93e7dd107afc2398d54de6b6fe9513f8d Mon Sep 17 00:00:00 2001
From: Avinash JD <avinash.dayanand@intel.com>
Date: Thu, 7 May 2020 17:41:00 -0700
Subject: ice: Don't reset and rebuild for Tx timeout on PFC enabled queue

When there's a Tx timeout for a queue which belongs to a PFC enabled TC,
then it's not because the queue is hung but because PFC is in action.

In PFC, peer sends a pause frame for a specified period of time when its
buffer threshold is exceeded (due to congestion). Netdev on the other
hand checks if ACK is received within a specified time for a TX packet, if
not, it'll invoke the tx_timeout routine.

Signed-off-by: Avinash JD <avinash.dayanand@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_dcb_lib.c    | 58 +++++++++++++++++++++++++
 drivers/net/ethernet/intel/ice/ice_dcb_lib.h    | 23 ++++++++++
 drivers/net/ethernet/intel/ice/ice_hw_autogen.h |  1 +
 drivers/net/ethernet/intel/ice/ice_main.c       | 10 +++++
 drivers/net/ethernet/intel/ice/ice_type.h       |  2 +
 5 files changed, 94 insertions(+)

diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
index 7bea09363b42..448b7d2fb808 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
@@ -62,6 +62,64 @@ u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg *dcbcfg)
 	return ena_tc;
 }
 
+/**
+ * ice_is_pfc_causing_hung_q
+ * @pf: pointer to PF structure
+ * @txqueue: Tx queue which is supposedly hung queue
+ *
+ * find if PFC is causing the hung queue, if yes return true else false
+ */
+bool ice_is_pfc_causing_hung_q(struct ice_pf *pf, unsigned int txqueue)
+{
+	u8 num_tcs = 0, i, tc, up_mapped_tc, up_in_tc = 0;
+	u64 ref_prio_xoff[ICE_MAX_UP];
+	struct ice_vsi *vsi;
+	u32 up2tc;
+
+	vsi = ice_get_main_vsi(pf);
+	if (!vsi)
+		return false;
+
+	ice_for_each_traffic_class(i)
+		if (vsi->tc_cfg.ena_tc & BIT(i))
+			num_tcs++;
+
+	/* first find out the TC to which the hung queue belongs to */
+	for (tc = 0; tc < num_tcs - 1; tc++)
+		if (ice_find_q_in_range(vsi->tc_cfg.tc_info[tc].qoffset,
+					vsi->tc_cfg.tc_info[tc + 1].qoffset,
+					txqueue))
+			break;
+
+	/* Build a bit map of all UPs associated to the suspect hung queue TC,
+	 * so that we check for its counter increment.
+	 */
+	up2tc = rd32(&pf->hw, PRTDCB_TUP2TC);
+	for (i = 0; i < ICE_MAX_UP; i++) {
+		up_mapped_tc = (up2tc >> (i * 3)) & 0x7;
+		if (up_mapped_tc == tc)
+			up_in_tc |= BIT(i);
+	}
+
+	/* Now that we figured out that hung queue is PFC enabled, still the
+	 * Tx timeout can be legitimate. So to make sure Tx timeout is
+	 * absolutely caused by PFC storm, check if the counters are
+	 * incrementing.
+	 */
+	for (i = 0; i < ICE_MAX_UP; i++)
+		if (up_in_tc & BIT(i))
+			ref_prio_xoff[i] = pf->stats.priority_xoff_rx[i];
+
+	ice_update_dcb_stats(pf);
+
+	for (i = 0; i < ICE_MAX_UP; i++)
+		if (up_in_tc & BIT(i))
+			if (pf->stats.priority_xoff_rx[i] > ref_prio_xoff[i])
+				return true;
+
+	return false;
+}
+
 /**
  * ice_dcb_get_mode - gets the DCB mode
  * @port_info: pointer to port info structure
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h
index 37680e815b02..7c42324494d2 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h
@@ -17,6 +17,8 @@
 void ice_dcb_rebuild(struct ice_pf *pf);
 u8 ice_dcb_get_ena_tc(struct ice_dcbx_cfg *dcbcfg);
 u8 ice_dcb_get_num_tc(struct ice_dcbx_cfg *dcbcfg);
+void ice_vsi_set_dcb_tc_cfg(struct ice_vsi *vsi);
+bool ice_is_pfc_causing_hung_q(struct ice_pf *pf, unsigned int txqueue);
 u8 ice_dcb_get_tc(struct ice_vsi *vsi, int queue_index);
 int
 ice_pf_dcb_cfg(struct ice_pf *pf, struct ice_dcbx_cfg *new_cfg, bool locked);
@@ -32,6 +34,20 @@ void
 ice_dcb_process_lldp_set_mib_change(struct ice_pf *pf,
 				    struct ice_rq_event_info *event);
 void ice_vsi_cfg_netdev_tc(struct ice_vsi *vsi, u8 ena_tc);
+
+/**
+ * ice_find_q_in_range
+ * @low: start of queue range for a TC i.e. offset of TC
+ * @high: start of queue for next TC
+ * @tx_q: hung_queue/tx_queue
+ *
+ * finds if queue 'tx_q' falls between the two offsets of any given TC
+ */
+static inline bool ice_find_q_in_range(u16 low, u16 high, unsigned int tx_q)
+{
+	return (tx_q >= low) && (tx_q < high);
+}
+
 static inline void
 ice_set_cgd_num(struct ice_tlan_ctx *tlan_ctx, struct ice_ring *ring)
 {
@@ -79,6 +95,13 @@ ice_tx_prepare_vlan_flags_dcb(struct ice_ring __always_unused *tx_ring,
 	return 0;
 }
 
+static inline bool
+ice_is_pfc_causing_hung_q(struct ice_pf __always_unused *pf,
+			  unsigned int __always_unused txqueue)
+{
+	return false;
+}
+
 #define ice_update_dcb_stats(pf) do {} while (0)
 #define ice_pf_dcb_recfg(pf) do {} while (0)
 #define ice_vsi_cfg_dcb_rings(vsi) do {} while (0)
diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
index 1d37a9f02c1c..bc48eda67c81 100644
--- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
@@ -58,6 +58,7 @@
 #define PRTDCB_GENS				0x00083020
 #define PRTDCB_GENS_DCBX_STATUS_S		0
 #define PRTDCB_GENS_DCBX_STATUS_M		ICE_M(0x7, 0)
+#define PRTDCB_TUP2TC				0x001D26C0 /* Reset Source: CORER */
 #define GL_PREEXT_L2_PMASK0(_i)			(0x0020F0FC + ((_i) * 4))
 #define GL_PREEXT_L2_PMASK1(_i)			(0x0020F108 + ((_i) * 4))
 #define GLFLXP_RXDID_FLX_WRD_0(_i)		(0x0045c800 + ((_i) * 4))
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 44ff4fe45a56..8c792ecc6550 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -5114,6 +5114,16 @@ static void ice_tx_timeout(struct net_device *netdev, unsigned int txqueue)
 
 	pf->tx_timeout_count++;
 
+	/* Check if PFC is enabled for the TC to which the queue belongs
+	 * to. If yes then Tx timeout is not caused by a hung queue, no
+	 * need to reset and rebuild
+	 */
+	if (ice_is_pfc_causing_hung_q(pf, txqueue)) {
+		dev_info(ice_pf_to_dev(pf), "Fake Tx hang detected on queue %u, timeout caused by PFC storm\n",
+			 txqueue);
+		return;
+	}
+
 	/* now that we have an index, find the tx_ring struct */
 	for (i = 0; i < vsi->num_txq; i++)
 		if (vsi->tx_rings[i] && vsi->tx_rings[i]->desc)
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index c56b2e77a48c..4f5345a7c15d 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -607,6 +607,8 @@ struct ice_eth_stats {
 	u64 tx_errors;			/* tepc */
 };
 
+#define ICE_MAX_UP	8
+
 /* Statistics collected by the MAC */
 struct ice_hw_port_stats {
 	/* eth stats collected by the port */
-- 
cgit v1.2.3-59-g8ed1b


From 891540024bb959c3ce8b71b851ba24cd3ad9423f Mon Sep 17 00:00:00 2001
From: Dave Ertman <david.m.ertman@intel.com>
Date: Thu, 7 May 2020 17:41:01 -0700
Subject: ice: Fix check for contiguous TCs

The current implementation for contiguous TC check
is assuming that the UPs will be mapped to TCs in
a linear progressing fashion.  This is obviously
not always true.

Change the check to allow for various UP2TC mapping
configurations.

Signed-off-by: Dave Ertman <david.m.ertman@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_dcb_lib.c | 19 ++++++++++++-------
 1 file changed, 12 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
index 448b7d2fb808..3c7f604c0c49 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
@@ -584,16 +584,21 @@ static int ice_dcb_sw_dflt_cfg(struct ice_pf *pf, bool ets_willing, bool locked)
  */
 static bool ice_dcb_tc_contig(u8 *prio_table)
 {
-	u8 max_tc = 0;
+	bool found_empty = false;
+	u8 used_tc = 0;
 	int i;
 
-	for (i = 0; i < CEE_DCBX_MAX_PRIO; i++) {
-		u8 cur_tc = prio_table[i];
+	/* Create a bitmap of used TCs */
+	for (i = 0; i < CEE_DCBX_MAX_PRIO; i++)
+		used_tc |= BIT(prio_table[i]);
 
-		if (cur_tc > max_tc)
-			return false;
-		else if (cur_tc == max_tc)
-			max_tc++;
+	for (i = 0; i < CEE_DCBX_MAX_PRIO; i++) {
+		if (used_tc & BIT(i)) {
+			if (found_empty)
+				return false;
+		} else {
+			found_empty = true;
+		}
 	}
 
 	return true;
-- 
cgit v1.2.3-59-g8ed1b


From 46a316500e060abd4c5745f45ffdc28d426dc99b Mon Sep 17 00:00:00 2001
From: Dave Ertman <david.m.ertman@intel.com>
Date: Thu, 7 May 2020 17:41:02 -0700
Subject: ice: only drop link once when setting pauseparams

Currently, the ice driver is setting a PHY configuration,
which causes a link drop, and then additionally it calls
for a nway_reset, which restarts auto-negotiation on the
link, which also causes a link drop.  These two link
events in such close timing is causing the FW to not be
able to generate a link interrupt for the driver to
respond to.

Remove the unnecessary auto-negotiation restart from the
set pauseparams flow.  Also remove error path that
would have performed an ice_down/ice_up as that is
also unnecessary.

Signed-off-by: Dave Ertman <david.m.ertman@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_ethtool.c | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 66d0bcc51ad9..db547c0c7c6f 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -2977,18 +2977,6 @@ ice_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause)
 		err = -EAGAIN;
 	}
 
-	if (!test_bit(__ICE_DOWN, pf->state)) {
-		/* Give it a little more time to try to come back. If still
-		 * down, restart autoneg link or reinitialize the interface.
-		 */
-		msleep(75);
-		if (!test_bit(__ICE_DOWN, pf->state))
-			return ice_nway_reset(netdev);
-
-		ice_down(vsi);
-		ice_up(vsi);
-	}
-
 	return err;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From de75135b5c04c96dc8bbb005511b7a79d952d7eb Mon Sep 17 00:00:00 2001
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Date: Thu, 7 May 2020 17:41:03 -0700
Subject: ice: Fix probe/open race condition

As soon as the driver registers the PF netdev, userspace utilities
like NetworkManager try to bring up the associated interface. When
this happens, the driver may not have finished initializing fully,
resulting in a bunch of errors in the interface up flow.

The driver already has a mechanism to indicate if it's not up yet;
by setting the __ICE_DOWN bit in pf->state, but this bit gets
cleared too early in the current flow. So clear this bit only when
the driver is fully up. Also check for the same bit in the ice_open
flow, and return -EBUSY if the bit is set.

Also in ice_open, replace references of vsi->back with a local
variable.

Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_main.c | 24 ++++++++++++++----------
 1 file changed, 14 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 8c792ecc6550..de81d9049b97 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -2137,10 +2137,8 @@ static irqreturn_t ice_misc_intr(int __always_unused irq, void *data)
 	}
 	ret = IRQ_HANDLED;
 
-	if (!test_bit(__ICE_DOWN, pf->state)) {
-		ice_service_task_schedule(pf);
-		ice_irq_dynamic_ena(hw, NULL, NULL);
-	}
+	ice_service_task_schedule(pf);
+	ice_irq_dynamic_ena(hw, NULL, NULL);
 
 	return ret;
 }
@@ -3312,9 +3310,6 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
 		goto err_init_interrupt_unroll;
 	}
 
-	/* Driver is mostly up */
-	clear_bit(__ICE_DOWN, pf->state);
-
 	/* In case of MSIX we are going to setup the misc vector right here
 	 * to handle admin queue events etc. In case of legacy and MSI
 	 * the misc functionality and queue processing is combined in
@@ -3370,9 +3365,9 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
 
 	ice_verify_cacheline_size(pf);
 
-	/* If no DDP driven features have to be setup, return here */
+	/* If no DDP driven features have to be setup, we are done with probe */
 	if (ice_is_safe_mode(pf))
-		return 0;
+		goto probe_done;
 
 	/* initialize DDP driven features */
 
@@ -3387,6 +3382,9 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
 	/* print PCI link speed and width */
 	pcie_print_link_status(pf->pdev);
 
+probe_done:
+	/* ready to go, so clear down state bit */
+	clear_bit(__ICE_DOWN, pf->state);
 	return 0;
 
 err_alloc_sw_unroll:
@@ -5261,14 +5259,20 @@ int ice_open(struct net_device *netdev)
 {
 	struct ice_netdev_priv *np = netdev_priv(netdev);
 	struct ice_vsi *vsi = np->vsi;
+	struct ice_pf *pf = vsi->back;
 	struct ice_port_info *pi;
 	int err;
 
-	if (test_bit(__ICE_NEEDS_RESTART, vsi->back->state)) {
+	if (test_bit(__ICE_NEEDS_RESTART, pf->state)) {
 		netdev_err(netdev, "driver needs to be unloaded and reloaded\n");
 		return -EIO;
 	}
 
+	if (test_bit(__ICE_DOWN, pf->state)) {
+		netdev_err(netdev, "device is not ready yet\n");
+		return -EBUSY;
+	}
+
 	netif_carrier_off(netdev);
 
 	pi = vsi->port_info;
-- 
cgit v1.2.3-59-g8ed1b


From 0fee35774d8f2277cfb101dd1d9d0d5e39015b93 Mon Sep 17 00:00:00 2001
From: Lihong Yang <lihong.yang@intel.com>
Date: Thu, 7 May 2020 17:41:04 -0700
Subject: ice: Provide more meaningful error message

When printing the ice status or AQ error codes, instead of printing out the
numerical value, provide the description of the error code. This provides
more info about the issue than a number.

Signed-off-by: Lihong Yang <lihong.yang@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice.h             |   2 +
 drivers/net/ethernet/intel/ice/ice_base.c        |   8 +-
 drivers/net/ethernet/intel/ice/ice_ethtool.c     |  42 +++---
 drivers/net/ethernet/intel/ice/ice_lib.c         | 103 ++++++-------
 drivers/net/ethernet/intel/ice/ice_main.c        | 177 +++++++++++++++++++----
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c |  28 ++--
 6 files changed, 247 insertions(+), 113 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index ce7172901428..be90337cabb8 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -528,6 +528,8 @@ int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size);
 void ice_fill_rss_lut(u8 *lut, u16 rss_table_size, u16 rss_size);
 int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset);
 void ice_print_link_msg(struct ice_vsi *vsi, bool isup);
+const char *ice_stat_str(enum ice_status stat_err);
+const char *ice_aq_str(enum ice_aq_err aq_err);
 int ice_open(struct net_device *netdev);
 int ice_stop(struct net_device *netdev);
 
diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
index a19cd6f5436b..ee1c698ff056 100644
--- a/drivers/net/ethernet/intel/ice/ice_base.c
+++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@ -662,8 +662,8 @@ ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_ring *ring,
 	status = ice_ena_vsi_txq(vsi->port_info, vsi->idx, tc, ring->q_handle,
 				 1, qg_buf, buf_len, NULL);
 	if (status) {
-		dev_err(ice_pf_to_dev(pf), "Failed to set LAN Tx queue context, error: %d\n",
-			status);
+		dev_err(ice_pf_to_dev(pf), "Failed to set LAN Tx queue context, error: %s\n",
+			ice_stat_str(status));
 		return -ENODEV;
 	}
 
@@ -832,8 +832,8 @@ ice_vsi_stop_tx_ring(struct ice_vsi *vsi, enum ice_disq_rst_src rst_src,
 	} else if (status == ICE_ERR_DOES_NOT_EXIST) {
 		dev_dbg(ice_pf_to_dev(vsi->back), "LAN Tx queues do not exist, nothing to disable\n");
 	} else if (status) {
-		dev_err(ice_pf_to_dev(vsi->back), "Failed to disable LAN Tx queues, error: %d\n",
-			status);
+		dev_err(ice_pf_to_dev(vsi->back), "Failed to disable LAN Tx queues, error: %s\n",
+			ice_stat_str(status));
 		return -ENODEV;
 	}
 
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index db547c0c7c6f..b814bc54f752 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -275,8 +275,9 @@ ice_get_eeprom(struct net_device *netdev, struct ethtool_eeprom *eeprom,
 
 	status = ice_acquire_nvm(hw, ICE_RES_READ);
 	if (status) {
-		dev_err(dev, "ice_acquire_nvm failed, err %d aq_err %d\n",
-			status, hw->adminq.sq_last_status);
+		dev_err(dev, "ice_acquire_nvm failed, err %s aq_err %s\n",
+			ice_stat_str(status),
+			ice_aq_str(hw->adminq.sq_last_status));
 		ret = -EIO;
 		goto out;
 	}
@@ -284,8 +285,9 @@ ice_get_eeprom(struct net_device *netdev, struct ethtool_eeprom *eeprom,
 	status = ice_read_flat_nvm(hw, eeprom->offset, &eeprom->len, buf,
 				   false);
 	if (status) {
-		dev_err(dev, "ice_read_flat_nvm failed, err %d aq_err %d\n",
-			status, hw->adminq.sq_last_status);
+		dev_err(dev, "ice_read_flat_nvm failed, err %s aq_err %s\n",
+			ice_stat_str(status),
+			ice_aq_str(hw->adminq.sq_last_status));
 		ret = -EIO;
 		goto release;
 	}
@@ -334,7 +336,8 @@ static u64 ice_link_test(struct net_device *netdev)
 	netdev_info(netdev, "link test\n");
 	status = ice_get_link_status(np->vsi->port_info, &link_up);
 	if (status) {
-		netdev_err(netdev, "link query error, status = %d\n", status);
+		netdev_err(netdev, "link query error, status = %s\n",
+			   ice_stat_str(status));
 		return 1;
 	}
 
@@ -1160,8 +1163,9 @@ static int ice_nway_reset(struct net_device *netdev)
 		status = ice_aq_set_link_restart_an(pi, false, NULL);
 
 	if (status) {
-		netdev_info(netdev, "link restart failed, err %d aq_err %d\n",
-			    status, pi->hw->adminq.sq_last_status);
+		netdev_info(netdev, "link restart failed, err %s aq_err %s\n",
+			    ice_stat_str(status),
+			    ice_aq_str(pi->hw->adminq.sq_last_status));
 		return -EIO;
 	}
 
@@ -2462,8 +2466,8 @@ ice_set_rss_hash_opt(struct ice_vsi *vsi, struct ethtool_rxnfc *nfc)
 
 	status = ice_add_rss_cfg(&pf->hw, vsi->idx, hashed_flds, hdrs);
 	if (status) {
-		dev_dbg(dev, "ice_add_rss_cfg failed, vsi num = %d, error = %d\n",
-			vsi->vsi_num, status);
+		dev_dbg(dev, "ice_add_rss_cfg failed, vsi num = %d, error = %s\n",
+			vsi->vsi_num, ice_stat_str(status));
 		return -EINVAL;
 	}
 
@@ -2964,16 +2968,19 @@ ice_set_pauseparam(struct net_device *netdev, struct ethtool_pauseparam *pause)
 	status = ice_set_fc(pi, &aq_failures, link_up);
 
 	if (aq_failures & ICE_SET_FC_AQ_FAIL_GET) {
-		netdev_info(netdev, "Set fc failed on the get_phy_capabilities call with err %d aq_err %d\n",
-			    status, hw->adminq.sq_last_status);
+		netdev_info(netdev, "Set fc failed on the get_phy_capabilities call with err %s aq_err %s\n",
+			    ice_stat_str(status),
+			    ice_aq_str(hw->adminq.sq_last_status));
 		err = -EAGAIN;
 	} else if (aq_failures & ICE_SET_FC_AQ_FAIL_SET) {
-		netdev_info(netdev, "Set fc failed on the set_phy_config call with err %d aq_err %d\n",
-			    status, hw->adminq.sq_last_status);
+		netdev_info(netdev, "Set fc failed on the set_phy_config call with err %s aq_err %s\n",
+			    ice_stat_str(status),
+			    ice_aq_str(hw->adminq.sq_last_status));
 		err = -EAGAIN;
 	} else if (aq_failures & ICE_SET_FC_AQ_FAIL_UPDATE) {
-		netdev_info(netdev, "Set fc failed on the get_link_info call with err %d aq_err %d\n",
-			    status, hw->adminq.sq_last_status);
+		netdev_info(netdev, "Set fc failed on the get_link_info call with err %s aq_err %s\n",
+			    ice_stat_str(status),
+			    ice_aq_str(hw->adminq.sq_last_status));
 		err = -EAGAIN;
 	}
 
@@ -3227,8 +3234,9 @@ static int ice_vsi_set_dflt_rss_lut(struct ice_vsi *vsi, int req_rss_size)
 	status = ice_aq_set_rss_lut(hw, vsi->idx, vsi->rss_lut_type, lut,
 				    vsi->rss_table_size);
 	if (status) {
-		dev_err(dev, "Cannot set RSS lut, err %d aq_err %d\n",
-			status, hw->adminq.rq_last_status);
+		dev_err(dev, "Cannot set RSS lut, err %s aq_err %s\n",
+			ice_stat_str(status),
+			ice_aq_str(hw->adminq.rq_last_status));
 		err = -EIO;
 	}
 
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 2f256bf45efc..bf4c538c94bb 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -248,8 +248,8 @@ void ice_vsi_delete(struct ice_vsi *vsi)
 
 	status = ice_free_vsi(&pf->hw, vsi->idx, ctxt, false, NULL);
 	if (status)
-		dev_err(ice_pf_to_dev(pf), "Failed to delete VSI %i in FW - error: %d\n",
-			vsi->vsi_num, status);
+		dev_err(ice_pf_to_dev(pf), "Failed to delete VSI %i in FW - error: %s\n",
+			vsi->vsi_num, ice_stat_str(status));
 
 	kfree(ctxt);
 }
@@ -521,8 +521,8 @@ static void ice_vsi_clean_rss_flow_fld(struct ice_vsi *vsi)
 
 	status = ice_rem_vsi_rss_cfg(&pf->hw, vsi->idx);
 	if (status)
-		dev_dbg(ice_pf_to_dev(pf), "ice_rem_vsi_rss_cfg failed for vsi = %d, error = %d\n",
-			vsi->vsi_num, status);
+		dev_dbg(ice_pf_to_dev(pf), "ice_rem_vsi_rss_cfg failed for vsi = %d, error = %s\n",
+			vsi->vsi_num, ice_stat_str(status));
 }
 
 /**
@@ -1193,7 +1193,8 @@ static int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi)
 				    vsi->rss_table_size);
 
 	if (status) {
-		dev_err(dev, "set_rss_lut failed, error %d\n", status);
+		dev_err(dev, "set_rss_lut failed, error %s\n",
+			ice_stat_str(status));
 		err = -EIO;
 		goto ice_vsi_cfg_rss_exit;
 	}
@@ -1215,7 +1216,8 @@ static int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi)
 	status = ice_aq_set_rss_key(&pf->hw, vsi->idx, key);
 
 	if (status) {
-		dev_err(dev, "set_rss_key failed, error %d\n", status);
+		dev_err(dev, "set_rss_key failed, error %s\n",
+			ice_stat_str(status));
 		err = -EIO;
 	}
 
@@ -1248,8 +1250,8 @@ static void ice_vsi_set_vf_rss_flow_fld(struct ice_vsi *vsi)
 
 	status = ice_add_avf_rss_cfg(&pf->hw, vsi->idx, ICE_DEFAULT_RSS_HENA);
 	if (status)
-		dev_dbg(dev, "ice_add_avf_rss_cfg failed for vsi = %d, error = %d\n",
-			vsi->vsi_num, status);
+		dev_dbg(dev, "ice_add_avf_rss_cfg failed for vsi = %d, error = %s\n",
+			vsi->vsi_num, ice_stat_str(status));
 }
 
 /**
@@ -1281,57 +1283,57 @@ static void ice_vsi_set_rss_flow_fld(struct ice_vsi *vsi)
 	status = ice_add_rss_cfg(hw, vsi_handle, ICE_FLOW_HASH_IPV4,
 				 ICE_FLOW_SEG_HDR_IPV4);
 	if (status)
-		dev_dbg(dev, "ice_add_rss_cfg failed for ipv4 flow, vsi = %d, error = %d\n",
-			vsi_num, status);
+		dev_dbg(dev, "ice_add_rss_cfg failed for ipv4 flow, vsi = %d, error = %s\n",
+			vsi_num, ice_stat_str(status));
 
 	/* configure RSS for IPv6 with input set IPv6 src/dst */
 	status = ice_add_rss_cfg(hw, vsi_handle, ICE_FLOW_HASH_IPV6,
 				 ICE_FLOW_SEG_HDR_IPV6);
 	if (status)
-		dev_dbg(dev, "ice_add_rss_cfg failed for ipv6 flow, vsi = %d, error = %d\n",
-			vsi_num, status);
+		dev_dbg(dev, "ice_add_rss_cfg failed for ipv6 flow, vsi = %d, error = %s\n",
+			vsi_num, ice_stat_str(status));
 
 	/* configure RSS for tcp4 with input set IP src/dst, TCP src/dst */
 	status = ice_add_rss_cfg(hw, vsi_handle, ICE_HASH_TCP_IPV4,
 				 ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_IPV4);
 	if (status)
-		dev_dbg(dev, "ice_add_rss_cfg failed for tcp4 flow, vsi = %d, error = %d\n",
-			vsi_num, status);
+		dev_dbg(dev, "ice_add_rss_cfg failed for tcp4 flow, vsi = %d, error = %s\n",
+			vsi_num, ice_stat_str(status));
 
 	/* configure RSS for udp4 with input set IP src/dst, UDP src/dst */
 	status = ice_add_rss_cfg(hw, vsi_handle, ICE_HASH_UDP_IPV4,
 				 ICE_FLOW_SEG_HDR_UDP | ICE_FLOW_SEG_HDR_IPV4);
 	if (status)
-		dev_dbg(dev, "ice_add_rss_cfg failed for udp4 flow, vsi = %d, error = %d\n",
-			vsi_num, status);
+		dev_dbg(dev, "ice_add_rss_cfg failed for udp4 flow, vsi = %d, error = %s\n",
+			vsi_num, ice_stat_str(status));
 
 	/* configure RSS for sctp4 with input set IP src/dst */
 	status = ice_add_rss_cfg(hw, vsi_handle, ICE_FLOW_HASH_IPV4,
 				 ICE_FLOW_SEG_HDR_SCTP | ICE_FLOW_SEG_HDR_IPV4);
 	if (status)
-		dev_dbg(dev, "ice_add_rss_cfg failed for sctp4 flow, vsi = %d, error = %d\n",
-			vsi_num, status);
+		dev_dbg(dev, "ice_add_rss_cfg failed for sctp4 flow, vsi = %d, error = %s\n",
+			vsi_num, ice_stat_str(status));
 
 	/* configure RSS for tcp6 with input set IPv6 src/dst, TCP src/dst */
 	status = ice_add_rss_cfg(hw, vsi_handle, ICE_HASH_TCP_IPV6,
 				 ICE_FLOW_SEG_HDR_TCP | ICE_FLOW_SEG_HDR_IPV6);
 	if (status)
-		dev_dbg(dev, "ice_add_rss_cfg failed for tcp6 flow, vsi = %d, error = %d\n",
-			vsi_num, status);
+		dev_dbg(dev, "ice_add_rss_cfg failed for tcp6 flow, vsi = %d, error = %s\n",
+			vsi_num, ice_stat_str(status));
 
 	/* configure RSS for udp6 with input set IPv6 src/dst, UDP src/dst */
 	status = ice_add_rss_cfg(hw, vsi_handle, ICE_HASH_UDP_IPV6,
 				 ICE_FLOW_SEG_HDR_UDP | ICE_FLOW_SEG_HDR_IPV6);
 	if (status)
-		dev_dbg(dev, "ice_add_rss_cfg failed for udp6 flow, vsi = %d, error = %d\n",
-			vsi_num, status);
+		dev_dbg(dev, "ice_add_rss_cfg failed for udp6 flow, vsi = %d, error = %s\n",
+			vsi_num, ice_stat_str(status));
 
 	/* configure RSS for sctp6 with input set IPv6 src/dst */
 	status = ice_add_rss_cfg(hw, vsi_handle, ICE_FLOW_HASH_IPV6,
 				 ICE_FLOW_SEG_HDR_SCTP | ICE_FLOW_SEG_HDR_IPV6);
 	if (status)
-		dev_dbg(dev, "ice_add_rss_cfg failed for sctp6 flow, vsi = %d, error = %d\n",
-			vsi_num, status);
+		dev_dbg(dev, "ice_add_rss_cfg failed for sctp6 flow, vsi = %d, error = %s\n",
+			vsi_num, ice_stat_str(status));
 }
 
 /**
@@ -1509,11 +1511,11 @@ int ice_vsi_kill_vlan(struct ice_vsi *vsi, u16 vid)
 	if (!status) {
 		vsi->num_vlan--;
 	} else if (status == ICE_ERR_DOES_NOT_EXIST) {
-		dev_dbg(dev, "Failed to remove VLAN %d on VSI %i, it does not exist, status: %d\n",
-			vid, vsi->vsi_num, status);
+		dev_dbg(dev, "Failed to remove VLAN %d on VSI %i, it does not exist, status: %s\n",
+			vid, vsi->vsi_num, ice_stat_str(status));
 	} else {
-		dev_err(dev, "Error removing VLAN %d on vsi %i error: %d\n",
-			vid, vsi->vsi_num, status);
+		dev_err(dev, "Error removing VLAN %d on vsi %i error: %s\n",
+			vid, vsi->vsi_num, ice_stat_str(status));
 		err = -EIO;
 	}
 
@@ -1737,8 +1739,9 @@ int ice_vsi_manage_vlan_insertion(struct ice_vsi *vsi)
 
 	status = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
 	if (status) {
-		dev_err(ice_pf_to_dev(vsi->back), "update VSI for VLAN insert failed, err %d aq_err %d\n",
-			status, hw->adminq.sq_last_status);
+		dev_err(ice_pf_to_dev(vsi->back), "update VSI for VLAN insert failed, err %s aq_err %s\n",
+			ice_stat_str(status),
+			ice_aq_str(hw->adminq.sq_last_status));
 		ret = -EIO;
 		goto out;
 	}
@@ -1783,8 +1786,9 @@ int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena)
 
 	status = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
 	if (status) {
-		dev_err(ice_pf_to_dev(vsi->back), "update VSI for VLAN strip failed, ena = %d err %d aq_err %d\n",
-			ena, status, hw->adminq.sq_last_status);
+		dev_err(ice_pf_to_dev(vsi->back), "update VSI for VLAN strip failed, ena = %d err %s aq_err %s\n",
+			ena, ice_stat_str(status),
+			ice_aq_str(hw->adminq.sq_last_status));
 		ret = -EIO;
 		goto out;
 	}
@@ -1922,9 +1926,10 @@ int ice_cfg_vlan_pruning(struct ice_vsi *vsi, bool ena, bool vlan_promisc)
 
 	status = ice_update_vsi(&pf->hw, vsi->idx, ctxt, NULL);
 	if (status) {
-		netdev_err(vsi->netdev, "%sabling VLAN pruning on VSI handle: %d, VSI HW ID: %d failed, err = %d, aq_err = %d\n",
-			   ena ? "En" : "Dis", vsi->idx, vsi->vsi_num, status,
-			   pf->hw.adminq.sq_last_status);
+		netdev_err(vsi->netdev, "%sabling VLAN pruning on VSI handle: %d, VSI HW ID: %d failed, err = %s, aq_err = %s\n",
+			   ena ? "En" : "Dis", vsi->idx, vsi->vsi_num,
+			   ice_stat_str(status),
+			   ice_aq_str(pf->hw.adminq.sq_last_status));
 		goto err_out;
 	}
 
@@ -2025,8 +2030,8 @@ ice_vsi_add_rem_eth_mac(struct ice_vsi *vsi, bool add_rule)
 		status = ice_remove_eth_mac(&pf->hw, &tmp_add_list);
 
 	if (status)
-		dev_err(dev, "Failure Adding or Removing Ethertype on VSI %i error: %d\n",
-			vsi->vsi_num, status);
+		dev_err(dev, "Failure Adding or Removing Ethertype on VSI %i error: %s\n",
+			vsi->vsi_num, ice_stat_str(status));
 
 	ice_free_fltr_list(dev, &tmp_add_list);
 }
@@ -2073,9 +2078,9 @@ void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create)
 		status = ice_remove_eth_mac(&pf->hw, &tmp_add_list);
 
 	if (status)
-		dev_err(dev, "Fail %s %s LLDP rule on VSI %i error: %d\n",
+		dev_err(dev, "Fail %s %s LLDP rule on VSI %i error: %s\n",
 			create ? "adding" : "removing", tx ? "TX" : "RX",
-			vsi->vsi_num, status);
+			vsi->vsi_num, ice_stat_str(status));
 
 	ice_free_fltr_list(dev, &tmp_add_list);
 }
@@ -2223,8 +2228,8 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
 	status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
 				 max_txqs);
 	if (status) {
-		dev_err(dev, "VSI %d failed lan queue config, error %d\n",
-			vsi->vsi_num, status);
+		dev_err(dev, "VSI %d failed lan queue config, error %s\n",
+			vsi->vsi_num, ice_stat_str(status));
 		goto unroll_vector_base;
 	}
 
@@ -2814,8 +2819,8 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi)
 	status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
 				 max_txqs);
 	if (status) {
-		dev_err(ice_pf_to_dev(pf), "VSI %d failed lan queue config, error %d\n",
-			vsi->vsi_num, status);
+		dev_err(ice_pf_to_dev(pf), "VSI %d failed lan queue config, error %s\n",
+			vsi->vsi_num, ice_stat_str(status));
 		if (init_vsi) {
 			ret = -EIO;
 			goto err_vectors;
@@ -2924,8 +2929,8 @@ int ice_vsi_cfg_tc(struct ice_vsi *vsi, u8 ena_tc)
 				 max_txqs);
 
 	if (status) {
-		dev_err(dev, "VSI %d failed TC config, error %d\n",
-			vsi->vsi_num, status);
+		dev_err(dev, "VSI %d failed TC config, error %s\n",
+			vsi->vsi_num, ice_stat_str(status));
 		ret = -EIO;
 		goto out;
 	}
@@ -3079,8 +3084,8 @@ int ice_set_dflt_vsi(struct ice_sw *sw, struct ice_vsi *vsi)
 
 	status = ice_cfg_dflt_vsi(&vsi->back->hw, vsi->idx, true, ICE_FLTR_RX);
 	if (status) {
-		dev_err(dev, "Failed to set VSI %d as the default forwarding VSI, error %d\n",
-			vsi->vsi_num, status);
+		dev_err(dev, "Failed to set VSI %d as the default forwarding VSI, error %s\n",
+			vsi->vsi_num, ice_stat_str(status));
 		return -EIO;
 	}
 
@@ -3118,8 +3123,8 @@ int ice_clear_dflt_vsi(struct ice_sw *sw)
 	status = ice_cfg_dflt_vsi(&dflt_vsi->back->hw, dflt_vsi->idx, false,
 				  ICE_FLTR_RX);
 	if (status) {
-		dev_err(dev, "Failed to clear the default forwarding VSI %d, error %d\n",
-			dflt_vsi->vsi_num, status);
+		dev_err(dev, "Failed to clear the default forwarding VSI %d, error %s\n",
+			dflt_vsi->vsi_num, ice_stat_str(status));
 		return -EIO;
 	}
 
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index de81d9049b97..c4dda1fa5853 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -163,8 +163,8 @@ unregister:
 	 * had an error
 	 */
 	if (status && vsi->netdev->reg_state == NETREG_REGISTERED) {
-		dev_err(ice_pf_to_dev(pf), "Could not add MAC filters error %d. Unregistering device\n",
-			status);
+		dev_err(ice_pf_to_dev(pf), "Could not add MAC filters error %s. Unregistering device\n",
+			ice_stat_str(status));
 		unregister_netdev(vsi->netdev);
 		free_netdev(vsi->netdev);
 		vsi->netdev = NULL;
@@ -1017,8 +1017,8 @@ static int __ice_clean_ctrlq(struct ice_pf *pf, enum ice_ctl_q q_type)
 		if (ret == ICE_ERR_AQ_NO_WORK)
 			break;
 		if (ret) {
-			dev_err(dev, "%s Receive Queue event error %d\n", qtype,
-				ret);
+			dev_err(dev, "%s Receive Queue event error %s\n", qtype,
+				ice_stat_str(ret));
 			break;
 		}
 
@@ -1809,8 +1809,8 @@ int ice_prepare_xdp_rings(struct ice_vsi *vsi, struct bpf_prog *prog)
 	status = ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
 				 max_txqs);
 	if (status) {
-		dev_err(dev, "Failed VSI LAN queue config for XDP, error:%d\n",
-			status);
+		dev_err(dev, "Failed VSI LAN queue config for XDP, error: %s\n",
+			ice_stat_str(status));
 		goto clear_xdp_rings;
 	}
 	ice_vsi_assign_bpf_prog(vsi, prog);
@@ -3752,8 +3752,8 @@ err_update_filters:
 	flags = ICE_AQC_MAN_MAC_UPDATE_LAA_WOL;
 	status = ice_aq_manage_mac_write(hw, mac, flags, NULL);
 	if (status) {
-		netdev_err(netdev, "can't set MAC %pM. write to firmware failed error %d\n",
-			   mac, status);
+		netdev_err(netdev, "can't set MAC %pM. write to firmware failed error %s\n",
+			   mac, ice_stat_str(status));
 	}
 	return 0;
 }
@@ -3817,8 +3817,8 @@ ice_set_tx_maxrate(struct net_device *netdev, int queue_index, u32 maxrate)
 		status = ice_cfg_q_bw_lmt(vsi->port_info, vsi->idx, tc,
 					  q_handle, ICE_MAX_BW, maxrate * 1000);
 	if (status) {
-		netdev_err(netdev, "Unable to set Tx max rate, error %d\n",
-			   status);
+		netdev_err(netdev, "Unable to set Tx max rate, error %s\n",
+			   ice_stat_str(status));
 		return -EIO;
 	}
 
@@ -4616,8 +4616,9 @@ static int ice_vsi_rebuild_by_type(struct ice_pf *pf, enum ice_vsi_type type)
 		/* replay filters for the VSI */
 		status = ice_replay_vsi(&pf->hw, vsi->idx);
 		if (status) {
-			dev_err(dev, "replay VSI failed, status %d, VSI index %d, type %s\n",
-				status, vsi->idx, ice_vsi_type_str(type));
+			dev_err(dev, "replay VSI failed, status %s, VSI index %d, type %s\n",
+				ice_stat_str(status), vsi->idx,
+				ice_vsi_type_str(type));
 			return -EIO;
 		}
 
@@ -4686,7 +4687,8 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
 
 	ret = ice_init_all_ctrlq(hw);
 	if (ret) {
-		dev_err(dev, "control queues init failed %d\n", ret);
+		dev_err(dev, "control queues init failed %s\n",
+			ice_stat_str(ret));
 		goto err_init_ctrlq;
 	}
 
@@ -4702,7 +4704,8 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
 
 	ret = ice_clear_pf_cfg(hw);
 	if (ret) {
-		dev_err(dev, "clear PF configuration failed %d\n", ret);
+		dev_err(dev, "clear PF configuration failed %s\n",
+			ice_stat_str(ret));
 		goto err_init_ctrlq;
 	}
 
@@ -4716,7 +4719,7 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
 
 	ret = ice_get_caps(hw);
 	if (ret) {
-		dev_err(dev, "ice_get_caps failed %d\n", ret);
+		dev_err(dev, "ice_get_caps failed %s\n", ice_stat_str(ret));
 		goto err_init_ctrlq;
 	}
 
@@ -4758,8 +4761,8 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
 	/* tell the firmware we are up */
 	ret = ice_send_version(pf);
 	if (ret) {
-		dev_err(dev, "Rebuild failed due to error sending driver version: %d\n",
-			ret);
+		dev_err(dev, "Rebuild failed due to error sending driver version: %s\n",
+			ice_stat_str(ret));
 		goto err_vsi_rebuild;
 	}
 
@@ -4870,6 +4873,112 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
 	return 0;
 }
 
+/**
+ * ice_aq_str - convert AQ err code to a string
+ * @aq_err: the AQ error code to convert
+ */
+const char *ice_aq_str(enum ice_aq_err aq_err)
+{
+	switch (aq_err) {
+	case ICE_AQ_RC_OK:
+		return "OK";
+	case ICE_AQ_RC_EPERM:
+		return "ICE_AQ_RC_EPERM";
+	case ICE_AQ_RC_ENOENT:
+		return "ICE_AQ_RC_ENOENT";
+	case ICE_AQ_RC_ENOMEM:
+		return "ICE_AQ_RC_ENOMEM";
+	case ICE_AQ_RC_EBUSY:
+		return "ICE_AQ_RC_EBUSY";
+	case ICE_AQ_RC_EEXIST:
+		return "ICE_AQ_RC_EEXIST";
+	case ICE_AQ_RC_EINVAL:
+		return "ICE_AQ_RC_EINVAL";
+	case ICE_AQ_RC_ENOSPC:
+		return "ICE_AQ_RC_ENOSPC";
+	case ICE_AQ_RC_ENOSYS:
+		return "ICE_AQ_RC_ENOSYS";
+	case ICE_AQ_RC_ENOSEC:
+		return "ICE_AQ_RC_ENOSEC";
+	case ICE_AQ_RC_EBADSIG:
+		return "ICE_AQ_RC_EBADSIG";
+	case ICE_AQ_RC_ESVN:
+		return "ICE_AQ_RC_ESVN";
+	case ICE_AQ_RC_EBADMAN:
+		return "ICE_AQ_RC_EBADMAN";
+	case ICE_AQ_RC_EBADBUF:
+		return "ICE_AQ_RC_EBADBUF";
+	}
+
+	return "ICE_AQ_RC_UNKNOWN";
+}
+
+/**
+ * ice_stat_str - convert status err code to a string
+ * @stat_err: the status error code to convert
+ */
+const char *ice_stat_str(enum ice_status stat_err)
+{
+	switch (stat_err) {
+	case ICE_SUCCESS:
+		return "OK";
+	case ICE_ERR_PARAM:
+		return "ICE_ERR_PARAM";
+	case ICE_ERR_NOT_IMPL:
+		return "ICE_ERR_NOT_IMPL";
+	case ICE_ERR_NOT_READY:
+		return "ICE_ERR_NOT_READY";
+	case ICE_ERR_NOT_SUPPORTED:
+		return "ICE_ERR_NOT_SUPPORTED";
+	case ICE_ERR_BAD_PTR:
+		return "ICE_ERR_BAD_PTR";
+	case ICE_ERR_INVAL_SIZE:
+		return "ICE_ERR_INVAL_SIZE";
+	case ICE_ERR_DEVICE_NOT_SUPPORTED:
+		return "ICE_ERR_DEVICE_NOT_SUPPORTED";
+	case ICE_ERR_RESET_FAILED:
+		return "ICE_ERR_RESET_FAILED";
+	case ICE_ERR_FW_API_VER:
+		return "ICE_ERR_FW_API_VER";
+	case ICE_ERR_NO_MEMORY:
+		return "ICE_ERR_NO_MEMORY";
+	case ICE_ERR_CFG:
+		return "ICE_ERR_CFG";
+	case ICE_ERR_OUT_OF_RANGE:
+		return "ICE_ERR_OUT_OF_RANGE";
+	case ICE_ERR_ALREADY_EXISTS:
+		return "ICE_ERR_ALREADY_EXISTS";
+	case ICE_ERR_NVM_CHECKSUM:
+		return "ICE_ERR_NVM_CHECKSUM";
+	case ICE_ERR_BUF_TOO_SHORT:
+		return "ICE_ERR_BUF_TOO_SHORT";
+	case ICE_ERR_NVM_BLANK_MODE:
+		return "ICE_ERR_NVM_BLANK_MODE";
+	case ICE_ERR_IN_USE:
+		return "ICE_ERR_IN_USE";
+	case ICE_ERR_MAX_LIMIT:
+		return "ICE_ERR_MAX_LIMIT";
+	case ICE_ERR_RESET_ONGOING:
+		return "ICE_ERR_RESET_ONGOING";
+	case ICE_ERR_HW_TABLE:
+		return "ICE_ERR_HW_TABLE";
+	case ICE_ERR_DOES_NOT_EXIST:
+		return "ICE_ERR_DOES_NOT_EXIST";
+	case ICE_ERR_AQ_ERROR:
+		return "ICE_ERR_AQ_ERROR";
+	case ICE_ERR_AQ_TIMEOUT:
+		return "ICE_ERR_AQ_TIMEOUT";
+	case ICE_ERR_AQ_FULL:
+		return "ICE_ERR_AQ_FULL";
+	case ICE_ERR_AQ_NO_WORK:
+		return "ICE_ERR_AQ_NO_WORK";
+	case ICE_ERR_AQ_EMPTY:
+		return "ICE_ERR_AQ_EMPTY";
+	}
+
+	return "ICE_ERR_UNKNOWN";
+}
+
 /**
  * ice_set_rss - Set RSS keys and lut
  * @vsi: Pointer to VSI structure
@@ -4894,8 +5003,9 @@ int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
 		status = ice_aq_set_rss_key(hw, vsi->idx, buf);
 
 		if (status) {
-			dev_err(dev, "Cannot set RSS key, err %d aq_err %d\n",
-				status, hw->adminq.rq_last_status);
+			dev_err(dev, "Cannot set RSS key, err %s aq_err %s\n",
+				ice_stat_str(status),
+				ice_aq_str(hw->adminq.rq_last_status));
 			return -EIO;
 		}
 	}
@@ -4904,8 +5014,9 @@ int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
 		status = ice_aq_set_rss_lut(hw, vsi->idx, vsi->rss_lut_type,
 					    lut, lut_size);
 		if (status) {
-			dev_err(dev, "Cannot set RSS lut, err %d aq_err %d\n",
-				status, hw->adminq.rq_last_status);
+			dev_err(dev, "Cannot set RSS lut, err %s aq_err %s\n",
+				ice_stat_str(status),
+				ice_aq_str(hw->adminq.rq_last_status));
 			return -EIO;
 		}
 	}
@@ -4936,8 +5047,9 @@ int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
 
 		status = ice_aq_get_rss_key(hw, vsi->idx, buf);
 		if (status) {
-			dev_err(dev, "Cannot get RSS key, err %d aq_err %d\n",
-				status, hw->adminq.rq_last_status);
+			dev_err(dev, "Cannot get RSS key, err %s aq_err %s\n",
+				ice_stat_str(status),
+				ice_aq_str(hw->adminq.rq_last_status));
 			return -EIO;
 		}
 	}
@@ -4946,8 +5058,9 @@ int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
 		status = ice_aq_get_rss_lut(hw, vsi->idx, vsi->rss_lut_type,
 					    lut, lut_size);
 		if (status) {
-			dev_err(dev, "Cannot get RSS lut, err %d aq_err %d\n",
-				status, hw->adminq.rq_last_status);
+			dev_err(dev, "Cannot get RSS lut, err %s aq_err %s\n",
+				ice_stat_str(status),
+				ice_aq_str(hw->adminq.rq_last_status));
 			return -EIO;
 		}
 	}
@@ -5014,8 +5127,9 @@ static int ice_vsi_update_bridge_mode(struct ice_vsi *vsi, u16 bmode)
 
 	status = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
 	if (status) {
-		dev_err(ice_pf_to_dev(vsi->back), "update VSI for bridge mode failed, bmode = %d err %d aq_err %d\n",
-			bmode, status, hw->adminq.sq_last_status);
+		dev_err(ice_pf_to_dev(vsi->back), "update VSI for bridge mode failed, bmode = %d err %s aq_err %s\n",
+			bmode, ice_stat_str(status),
+			ice_aq_str(hw->adminq.sq_last_status));
 		ret = -EIO;
 		goto out;
 	}
@@ -5084,8 +5198,9 @@ ice_bridge_setlink(struct net_device *dev, struct nlmsghdr *nlh,
 		 */
 		status = ice_update_sw_rule_bridge_mode(hw);
 		if (status) {
-			netdev_err(dev, "switch rule update failed, mode = %d err %d aq_err %d\n",
-				   mode, status, hw->adminq.sq_last_status);
+			netdev_err(dev, "switch rule update failed, mode = %d err %s aq_err %s\n",
+				   mode, ice_stat_str(status),
+				   ice_aq_str(hw->adminq.sq_last_status));
 			/* revert hw->evb_veb */
 			hw->evb_veb = (pf_sw->bridge_mode == BRIDGE_MODE_VEB);
 			return -EIO;
@@ -5211,8 +5326,8 @@ ice_udp_tunnel_add(struct net_device *netdev, struct udp_tunnel_info *ti)
 		netdev_info(netdev, "Max tunneled UDP ports reached, port %d not added\n",
 			    port);
 	else if (status)
-		netdev_err(netdev, "Error adding UDP tunnel - %d\n",
-			   status);
+		netdev_err(netdev, "Error adding UDP tunnel - %s\n",
+			   ice_stat_str(status));
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index 1389d0d6d3d2..fc03b278370b 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -491,8 +491,9 @@ static int ice_vsi_manage_pvid(struct ice_vsi *vsi, u16 pvid_info, bool enable)
 
 	status = ice_update_vsi(hw, vsi->idx, ctxt, NULL);
 	if (status) {
-		dev_info(ice_hw_to_dev(hw), "update VSI for port VLAN failed, err %d aq_err %d\n",
-			 status, hw->adminq.sq_last_status);
+		dev_info(ice_hw_to_dev(hw), "update VSI for port VLAN failed, err %s aq_err %s\n",
+			 ice_stat_str(status),
+			 ice_aq_str(hw->adminq.sq_last_status));
 		ret = -EIO;
 		goto out;
 	}
@@ -1659,8 +1660,9 @@ ice_vc_send_msg_to_vf(struct ice_vf *vf, u32 v_opcode,
 	aq_ret = ice_aq_send_msg_to_vf(&pf->hw, vf->vf_id, v_opcode, v_retval,
 				       msg, msglen, NULL);
 	if (aq_ret && pf->hw.mailboxq.sq_last_status != ICE_AQ_RC_ENOSYS) {
-		dev_info(dev, "Unable to send the message to VF %d ret %d aq_err %d\n",
-			 vf->vf_id, aq_ret, pf->hw.mailboxq.sq_last_status);
+		dev_info(dev, "Unable to send the message to VF %d ret %s aq_err %s\n",
+			 vf->vf_id, ice_stat_str(aq_ret),
+			 ice_aq_str(pf->hw.mailboxq.sq_last_status));
 		return -EIO;
 	}
 
@@ -2075,8 +2077,9 @@ int ice_set_vf_spoofchk(struct net_device *netdev, int vf_id, bool ena)
 
 	status = ice_update_vsi(&pf->hw, vf_vsi->idx, ctx, NULL);
 	if (status) {
-		dev_err(dev, "Failed to %sable spoofchk on VF %d VSI %d\n error %d\n",
-			ena ? "en" : "dis", vf->vf_id, vf_vsi->vsi_num, status);
+		dev_err(dev, "Failed to %sable spoofchk on VF %d VSI %d\n error %s\n",
+			ena ? "en" : "dis", vf->vf_id, vf_vsi->vsi_num,
+			ice_stat_str(status));
 		ret = -EIO;
 		goto out;
 	}
@@ -2232,8 +2235,9 @@ static int ice_vc_cfg_promiscuous_mode_msg(struct ice_vf *vf, u8 *msg)
 		 */
 		status = ice_vf_set_vsi_promisc(vf, vsi, promisc_m, rm_promisc);
 		if (status) {
-			dev_err(dev, "%sable Tx/Rx filter promiscuous mode on VF-%d failed, error: %d\n",
-				rm_promisc ? "dis" : "en", vf->vf_id, status);
+			dev_err(dev, "%sable Tx/Rx filter promiscuous mode on VF-%d failed, error: %s\n",
+				rm_promisc ? "dis" : "en", vf->vf_id,
+				ice_stat_str(status));
 			v_ret = ice_err_to_virt_err(status);
 			goto error_param;
 		} else {
@@ -2808,8 +2812,8 @@ ice_vc_add_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi, u8 *mac_addr)
 			vf->vf_id);
 		return -EEXIST;
 	} else if (status) {
-		dev_err(dev, "Failed to add MAC %pM for VF %d\n, error %d\n",
-			mac_addr, vf->vf_id, status);
+		dev_err(dev, "Failed to add MAC %pM for VF %d\n, error %s\n",
+			mac_addr, vf->vf_id, ice_stat_str(status));
 		return -EIO;
 	}
 
@@ -2845,8 +2849,8 @@ ice_vc_del_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi, u8 *mac_addr)
 			vf->vf_id);
 		return -ENOENT;
 	} else if (status) {
-		dev_err(dev, "Failed to delete MAC %pM for VF %d, error %d\n",
-			mac_addr, vf->vf_id, status);
+		dev_err(dev, "Failed to delete MAC %pM for VF %d, error %s\n",
+			mac_addr, vf->vf_id, ice_stat_str(status));
 		return -EIO;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 88865fc4bbd61e48d04e2747d59b85d2014cca82 Mon Sep 17 00:00:00 2001
From: Karol Kolacinski <karol.kolacinski@intel.com>
Date: Thu, 7 May 2020 17:41:05 -0700
Subject: ice: Fix casting issues

Change min() macros to min_t() which has compare type specified and it
helps avoid precision loss.

In some cases there was precision loss during calls or assignments.
Some fields in structs were unnecessarily large and gave multiple
warnings.

There were also some minor type differences which are now fixed as well as
some cases where a simple cast was needed.

Callers were were passing data that is a u16 to
ice_sched_cfg_node_bw_alloc() but the function was truncating that to a u8.
Fix that by changing the function to take a u16.

Signed-off-by: Karol Kolacinski <karol.kolacinski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice.h           | 10 ++++-----
 drivers/net/ethernet/intel/ice/ice_base.c      | 24 ++++++++++++---------
 drivers/net/ethernet/intel/ice/ice_controlq.c  |  2 +-
 drivers/net/ethernet/intel/ice/ice_ethtool.c   |  6 +++---
 drivers/net/ethernet/intel/ice/ice_flex_pipe.c | 13 ++++++------
 drivers/net/ethernet/intel/ice/ice_lib.c       | 29 ++++++++++++++------------
 drivers/net/ethernet/intel/ice/ice_main.c      | 13 ++++++------
 drivers/net/ethernet/intel/ice/ice_sched.c     |  2 +-
 drivers/net/ethernet/intel/ice/ice_switch.c    | 12 +++++------
 drivers/net/ethernet/intel/ice/ice_txrx.c      | 19 +++++++++--------
 drivers/net/ethernet/intel/ice/ice_txrx_lib.c  |  2 +-
 drivers/net/ethernet/intel/ice/ice_txrx_lib.h  |  2 +-
 drivers/net/ethernet/intel/ice/ice_type.h      |  4 ++--
 13 files changed, 74 insertions(+), 64 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index be90337cabb8..cd2bf9b8e385 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -248,8 +248,8 @@ struct ice_vsi {
 	u32 tx_busy;
 	u32 rx_buf_failed;
 	u32 rx_page_failed;
-	int num_q_vectors;
-	int base_vector;		/* IRQ base for OS reserved vectors */
+	u16 num_q_vectors;
+	u16 base_vector;		/* IRQ base for OS reserved vectors */
 	enum ice_vsi_type type;
 	u16 vsi_num;			/* HW (absolute) index of this VSI */
 	u16 idx;			/* software index in pf->vsi[] */
@@ -390,11 +390,11 @@ struct ice_pf {
 	struct mutex tc_mutex;		/* lock to protect TC changes */
 	u32 msg_enable;
 	u32 hw_csum_rx_error;
-	u32 oicr_idx;		/* Other interrupt cause MSIX vector index */
-	u32 num_avail_sw_msix;	/* remaining MSIX SW vectors left unclaimed */
+	u16 oicr_idx;		/* Other interrupt cause MSIX vector index */
+	u16 num_avail_sw_msix;	/* remaining MSIX SW vectors left unclaimed */
 	u16 max_pf_txqs;	/* Total Tx queues PF wide */
 	u16 max_pf_rxqs;	/* Total Rx queues PF wide */
-	u32 num_lan_msix;	/* Total MSIX vectors for base driver */
+	u16 num_lan_msix;	/* Total MSIX vectors for base driver */
 	u16 num_lan_tx;		/* num LAN Tx queues setup */
 	u16 num_lan_rx;		/* num LAN Rx queues setup */
 	u16 next_vsi;		/* Next free slot in pf->vsi[] - 0-based! */
diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
index ee1c698ff056..4c835c144907 100644
--- a/drivers/net/ethernet/intel/ice/ice_base.c
+++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@ -24,7 +24,7 @@ static int __ice_vsi_get_qs_contig(struct ice_qs_cfg *qs_cfg)
 
 	bitmap_set(qs_cfg->pf_map, offset, qs_cfg->q_count);
 	for (i = 0; i < qs_cfg->q_count; i++)
-		qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = i + offset;
+		qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = (u16)(i + offset);
 	mutex_unlock(qs_cfg->qs_mutex);
 
 	return 0;
@@ -47,7 +47,7 @@ static int __ice_vsi_get_qs_sc(struct ice_qs_cfg *qs_cfg)
 		if (index >= qs_cfg->pf_map_size)
 			goto err_scatter;
 		set_bit(index, qs_cfg->pf_map);
-		qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = index;
+		qs_cfg->vsi_map[i + qs_cfg->vsi_map_offset] = (u16)index;
 	}
 	mutex_unlock(qs_cfg->qs_mutex);
 
@@ -96,7 +96,7 @@ static int ice_pf_rxq_wait(struct ice_pf *pf, int pf_q, bool ena)
  * We allocate one q_vector and set default value for ITR setting associated
  * with this q_vector. If allocation fails we return -ENOMEM.
  */
-static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, int v_idx)
+static int ice_vsi_alloc_q_vector(struct ice_vsi *vsi, u16 v_idx)
 {
 	struct ice_pf *pf = vsi->back;
 	struct ice_q_vector *q_vector;
@@ -376,7 +376,7 @@ int ice_setup_rx_ctx(struct ice_ring *ring)
 	/* Max packet size for this queue - must not be set to a larger value
 	 * than 5 x DBUF
 	 */
-	rlan_ctx.rxmax = min_t(u16, vsi->max_frame,
+	rlan_ctx.rxmax = min_t(u32, vsi->max_frame,
 			       chain_len * ring->rx_buf_len);
 
 	/* Rx queue threshold in units of 64 */
@@ -453,7 +453,7 @@ int __ice_vsi_get_qs(struct ice_qs_cfg *qs_cfg)
 	if (ret) {
 		/* contig failed, so try with scatter approach */
 		qs_cfg->mapping_mode = ICE_VSI_MAP_SCATTER;
-		qs_cfg->q_count = min_t(u16, qs_cfg->q_count,
+		qs_cfg->q_count = min_t(unsigned int, qs_cfg->q_count,
 					qs_cfg->scatter_count);
 		ret = __ice_vsi_get_qs_sc(qs_cfg);
 	}
@@ -526,7 +526,8 @@ int ice_vsi_wait_one_rx_ring(struct ice_vsi *vsi, bool ena, u16 rxq_idx)
 int ice_vsi_alloc_q_vectors(struct ice_vsi *vsi)
 {
 	struct device *dev = ice_pf_to_dev(vsi->back);
-	int v_idx, err;
+	u16 v_idx;
+	int err;
 
 	if (vsi->q_vectors[0]) {
 		dev_dbg(dev, "VSI %d has existing q_vectors\n", vsi->vsi_num);
@@ -562,7 +563,7 @@ err_out:
 void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi)
 {
 	int q_vectors = vsi->num_q_vectors;
-	int tx_rings_rem, rx_rings_rem;
+	u16 tx_rings_rem, rx_rings_rem;
 	int v_id;
 
 	/* initially assigning remaining rings count to VSIs num queue value */
@@ -571,10 +572,12 @@ void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi)
 
 	for (v_id = 0; v_id < q_vectors; v_id++) {
 		struct ice_q_vector *q_vector = vsi->q_vectors[v_id];
-		int tx_rings_per_v, rx_rings_per_v, q_id, q_base;
+		u8 tx_rings_per_v, rx_rings_per_v;
+		u16 q_id, q_base;
 
 		/* Tx rings mapping to vector */
-		tx_rings_per_v = DIV_ROUND_UP(tx_rings_rem, q_vectors - v_id);
+		tx_rings_per_v = (u8)DIV_ROUND_UP(tx_rings_rem,
+						  q_vectors - v_id);
 		q_vector->num_ring_tx = tx_rings_per_v;
 		q_vector->tx.ring = NULL;
 		q_vector->tx.itr_idx = ICE_TX_ITR;
@@ -590,7 +593,8 @@ void ice_vsi_map_rings_to_vectors(struct ice_vsi *vsi)
 		tx_rings_rem -= tx_rings_per_v;
 
 		/* Rx rings mapping to vector */
-		rx_rings_per_v = DIV_ROUND_UP(rx_rings_rem, q_vectors - v_id);
+		rx_rings_per_v = (u8)DIV_ROUND_UP(rx_rings_rem,
+						  q_vectors - v_id);
 		q_vector->num_ring_rx = rx_rings_per_v;
 		q_vector->rx.ring = NULL;
 		q_vector->rx.itr_idx = ICE_RX_ITR;
diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c
index dd946866d7b8..9a865962296d 100644
--- a/drivers/net/ethernet/intel/ice/ice_controlq.c
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.c
@@ -1128,7 +1128,7 @@ ice_clean_rq_elem(struct ice_hw *hw, struct ice_ctl_q_info *cq,
 	}
 	memcpy(&e->desc, desc, sizeof(e->desc));
 	datalen = le16_to_cpu(desc->datalen);
-	e->msg_len = min(datalen, e->buf_len);
+	e->msg_len = min_t(u16, datalen, e->buf_len);
 	if (e->msg_buf && e->msg_len)
 		memcpy(e->msg_buf, cq->rq.r.rq_bi[desc_idx].va, e->msg_len);
 
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index b814bc54f752..41a6aa4ebb02 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -2609,7 +2609,7 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
 	struct ice_vsi *vsi = np->vsi;
 	struct ice_pf *pf = vsi->back;
 	int i, timeout = 50, err = 0;
-	u32 new_rx_cnt, new_tx_cnt;
+	u16 new_rx_cnt, new_tx_cnt;
 
 	if (ring->tx_pending > ICE_MAX_NUM_DESC ||
 	    ring->tx_pending < ICE_MIN_NUM_DESC ||
@@ -2661,8 +2661,8 @@ ice_set_ringparam(struct net_device *netdev, struct ethtool_ringparam *ring)
 		if (ice_is_xdp_ena_vsi(vsi))
 			for (i = 0; i < vsi->num_xdp_txq; i++)
 				vsi->xdp_rings[i]->count = new_tx_cnt;
-		vsi->num_tx_desc = new_tx_cnt;
-		vsi->num_rx_desc = new_rx_cnt;
+		vsi->num_tx_desc = (u16)new_tx_cnt;
+		vsi->num_rx_desc = (u16)new_rx_cnt;
 		netdev_dbg(netdev, "Link is down, descriptor count change happens when link is brought up\n");
 		goto done;
 	}
diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
index 62e305511c7e..4dc72aef5381 100644
--- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
+++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
@@ -1694,7 +1694,7 @@ ice_create_tunnel(struct ice_hw *hw, enum ice_tunnel_type type, u16 port)
 	 */
 	ice_set_key((u8 *)&sect_rx->tcam[0].key, sizeof(sect_rx->tcam[0].key),
 		    (u8 *)&port, NULL, NULL, NULL,
-		    offsetof(struct ice_boost_key_value, hv_dst_port_key),
+		    (u16)offsetof(struct ice_boost_key_value, hv_dst_port_key),
 		    sizeof(sect_rx->tcam[0].key.key.hv_dst_port_key));
 
 	/* exact copy of entry to Tx section entry */
@@ -2329,9 +2329,10 @@ ice_find_prof_id(struct ice_hw *hw, enum ice_block blk,
 		 struct ice_fv_word *fv, u8 *prof_id)
 {
 	struct ice_es *es = &hw->blk[blk].es;
-	u16 off, i;
+	u16 off;
+	u8 i;
 
-	for (i = 0; i < es->count; i++) {
+	for (i = 0; i < (u8)es->count; i++) {
 		off = i * es->fvw;
 
 		if (memcmp(&es->t[off], fv, es->fvw * sizeof(*fv)))
@@ -3461,7 +3462,7 @@ ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[],
 	DECLARE_BITMAP(ptgs_used, ICE_XLT1_CNT);
 	struct ice_prof_map *prof;
 	enum ice_status status;
-	u32 byte = 0;
+	u8 byte = 0;
 	u8 prof_id;
 
 	bitmap_zero(ptgs_used, ICE_XLT1_CNT);
@@ -3496,7 +3497,7 @@ ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[],
 
 	/* build list of ptgs */
 	while (bytes && prof->ptg_cnt < ICE_MAX_PTG_PER_PROFILE) {
-		u32 bit;
+		u8 bit;
 
 		if (!ptypes[byte]) {
 			bytes--;
@@ -3530,7 +3531,7 @@ ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[],
 				break;
 
 			/* nothing left in byte, then exit */
-			m = ~((1 << (bit + 1)) - 1);
+			m = ~(u8)((1 << (bit + 1)) - 1);
 			if (!(ptypes[byte] & m))
 				break;
 		}
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index bf4c538c94bb..9330989ddb40 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -37,7 +37,8 @@ const char *ice_vsi_type_str(enum ice_vsi_type vsi_type)
  */
 static int ice_vsi_ctrl_all_rx_rings(struct ice_vsi *vsi, bool ena)
 {
-	int i, ret = 0;
+	int ret = 0;
+	u16 i;
 
 	for (i = 0; i < vsi->num_rxq; i++)
 		ice_vsi_ctrl_one_rx_ring(vsi, ena, i, false);
@@ -565,8 +566,8 @@ static void ice_vsi_set_rss_params(struct ice_vsi *vsi)
 	switch (vsi->type) {
 	case ICE_VSI_PF:
 		/* PF VSI will inherit RSS instance of PF */
-		vsi->rss_table_size = cap->rss_table_size;
-		vsi->rss_size = min_t(int, num_online_cpus(),
+		vsi->rss_table_size = (u16)cap->rss_table_size;
+		vsi->rss_size = min_t(u16, num_online_cpus(),
 				      BIT(cap->rss_table_entry_width));
 		vsi->rss_lut_type = ICE_AQC_GSET_RSS_LUT_TABLE_TYPE_PF;
 		break;
@@ -684,15 +685,15 @@ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt)
 				max_rss = ICE_MAX_LG_RSS_QS;
 			else
 				max_rss = ICE_MAX_RSS_QS_PER_VF;
-			qcount_rx = min_t(int, rx_numq_tc, max_rss);
+			qcount_rx = min_t(u16, rx_numq_tc, max_rss);
 			if (!vsi->req_rxq)
-				qcount_rx = min_t(int, qcount_rx,
+				qcount_rx = min_t(u16, qcount_rx,
 						  vsi->rss_size);
 		}
 	}
 
 	/* find the (rounded up) power-of-2 of qcount */
-	pow = order_base_2(qcount_rx);
+	pow = (u16)order_base_2(qcount_rx);
 
 	ice_for_each_traffic_class(i) {
 		if (!(vsi->tc_cfg.ena_tc & BIT(i))) {
@@ -941,7 +942,7 @@ int ice_free_res(struct ice_res_tracker *res, u16 index, u16 id)
  */
 static int ice_search_res(struct ice_res_tracker *res, u16 needed, u16 id)
 {
-	int start = 0, end = 0;
+	u16 start = 0, end = 0;
 
 	if (needed > res->end)
 		return -ENOMEM;
@@ -1024,6 +1025,7 @@ static int ice_vsi_setup_vector_base(struct ice_vsi *vsi)
 	struct ice_pf *pf = vsi->back;
 	struct device *dev;
 	u16 num_q_vectors;
+	int base;
 
 	dev = ice_pf_to_dev(pf);
 	/* SRIOV doesn't grab irq_tracker entries for each VSI */
@@ -1038,14 +1040,15 @@ static int ice_vsi_setup_vector_base(struct ice_vsi *vsi)
 
 	num_q_vectors = vsi->num_q_vectors;
 	/* reserve slots from OS requested IRQs */
-	vsi->base_vector = ice_get_res(pf, pf->irq_tracker, num_q_vectors,
-				       vsi->idx);
-	if (vsi->base_vector < 0) {
+	base = ice_get_res(pf, pf->irq_tracker, num_q_vectors, vsi->idx);
+
+	if (base < 0) {
 		dev_err(dev, "%d MSI-X interrupts available. %s %d failed to get %d MSI-X vectors\n",
 			ice_get_free_res_count(pf->irq_tracker),
 			ice_vsi_type_str(vsi->type), vsi->idx, num_q_vectors);
 		return -ENOENT;
 	}
+	vsi->base_vector = (u16)base;
 	pf->num_avail_sw_msix -= num_q_vectors;
 
 	return 0;
@@ -1085,7 +1088,7 @@ static int ice_vsi_alloc_rings(struct ice_vsi *vsi)
 {
 	struct ice_pf *pf = vsi->back;
 	struct device *dev;
-	int i;
+	u16 i;
 
 	dev = ice_pf_to_dev(pf);
 	/* Allocate Tx rings */
@@ -1178,7 +1181,7 @@ static int ice_vsi_cfg_rss_lut_key(struct ice_vsi *vsi)
 	u8 *lut;
 
 	dev = ice_pf_to_dev(pf);
-	vsi->rss_size = min_t(int, vsi->rss_size, vsi->num_rxq);
+	vsi->rss_size = min_t(u16, vsi->rss_size, vsi->num_rxq);
 
 	lut = kzalloc(vsi->rss_table_size, GFP_KERNEL);
 	if (!lut)
@@ -1673,7 +1676,7 @@ void ice_vsi_cfg_msix(struct ice_vsi *vsi)
 {
 	struct ice_pf *pf = vsi->back;
 	struct ice_hw *hw = &pf->hw;
-	u32 txq = 0, rxq = 0;
+	u16 txq = 0, rxq = 0;
 	int i, q;
 
 	for (i = 0; i < vsi->num_q_vectors; i++) {
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index c4dda1fa5853..cbfd54867be1 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -2245,7 +2245,7 @@ static int ice_req_irq_msix_misc(struct ice_pf *pf)
 		return oicr_idx;
 
 	pf->num_avail_sw_msix -= 1;
-	pf->oicr_idx = oicr_idx;
+	pf->oicr_idx = (u16)oicr_idx;
 
 	err = devm_request_irq(dev, pf->msix_entries[pf->oicr_idx].vector,
 			       ice_misc_intr, 0, pf->int_name, pf);
@@ -2642,7 +2642,8 @@ unroll_vsi_setup:
 static u16
 ice_get_avail_q_count(unsigned long *pf_qmap, struct mutex *lock, u16 size)
 {
-	u16 count = 0, bit;
+	unsigned long bit;
+	u16 count = 0;
 
 	mutex_lock(lock);
 	for_each_clear_bit(bit, pf_qmap, size)
@@ -2881,8 +2882,8 @@ static int ice_init_interrupt_scheme(struct ice_pf *pf)
 	}
 
 	/* populate SW interrupts pool with number of OS granted IRQs. */
-	pf->num_avail_sw_msix = vectors;
-	pf->irq_tracker->num_entries = vectors;
+	pf->num_avail_sw_msix = (u16)vectors;
+	pf->irq_tracker->num_entries = (u16)vectors;
 	pf->irq_tracker->end = pf->irq_tracker->num_entries;
 
 	return 0;
@@ -2914,9 +2915,9 @@ int ice_vsi_recfg_qs(struct ice_vsi *vsi, int new_rx, int new_tx)
 	}
 
 	if (new_tx)
-		vsi->req_txq = new_tx;
+		vsi->req_txq = (u16)new_tx;
 	if (new_rx)
-		vsi->req_rxq = new_rx;
+		vsi->req_rxq = (u16)new_rx;
 
 	/* set for the next time the netdev is started */
 	if (!netif_running(vsi->netdev)) {
diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c
index eae707ddf8e8..d63acd2fcf79 100644
--- a/drivers/net/ethernet/intel/ice/ice_sched.c
+++ b/drivers/net/ethernet/intel/ice/ice_sched.c
@@ -1917,7 +1917,7 @@ ice_sched_update_elem(struct ice_hw *hw, struct ice_sched_node *node,
  */
 static enum ice_status
 ice_sched_cfg_node_bw_alloc(struct ice_hw *hw, struct ice_sched_node *node,
-			    enum ice_rl_type rl_type, u8 bw_alloc)
+			    enum ice_rl_type rl_type, u16 bw_alloc)
 {
 	struct ice_aqc_txsched_elem_data buf;
 	struct ice_aqc_txsched_elem *data;
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
index 51825a203e35..7d88944de31a 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.c
+++ b/drivers/net/ethernet/intel/ice/ice_switch.c
@@ -593,8 +593,8 @@ enum ice_status ice_get_initial_sw_cfg(struct ice_hw *hw)
 			    ICE_AQC_GET_SW_CONF_RESP_IS_VF)
 				is_vf = true;
 
-			res_type = le16_to_cpu(ele->vsi_port_num) >>
-				ICE_AQC_GET_SW_CONF_RESP_TYPE_S;
+			res_type = (u8)(le16_to_cpu(ele->vsi_port_num) >>
+					ICE_AQC_GET_SW_CONF_RESP_TYPE_S);
 
 			if (res_type == ICE_AQC_GET_SW_CONF_RESP_VSI) {
 				/* FW VSI is not needed. Just continue. */
@@ -1618,12 +1618,12 @@ ice_add_mac(struct ice_hw *hw, struct list_head *m_list)
 	struct ice_aqc_sw_rules_elem *s_rule, *r_iter;
 	struct ice_fltr_list_entry *m_list_itr;
 	struct list_head *rule_head;
-	u16 elem_sent, total_elem_left;
+	u16 total_elem_left, s_rule_size;
 	struct ice_switch_info *sw;
 	struct mutex *rule_lock; /* Lock to protect filter rule list */
 	enum ice_status status = 0;
 	u16 num_unicast = 0;
-	u16 s_rule_size;
+	u8 elem_sent;
 
 	if (!m_list || !hw)
 		return ICE_ERR_PARAM;
@@ -1707,8 +1707,8 @@ ice_add_mac(struct ice_hw *hw, struct list_head *m_list)
 	     total_elem_left -= elem_sent) {
 		struct ice_aqc_sw_rules_elem *entry = r_iter;
 
-		elem_sent = min(total_elem_left,
-				(u16)(ICE_AQ_MAX_BUF_LEN / s_rule_size));
+		elem_sent = min_t(u8, total_elem_left,
+				  (ICE_AQ_MAX_BUF_LEN / s_rule_size));
 		status = ice_aq_sw_rules(hw, entry, elem_sent * s_rule_size,
 					 elem_sent, ice_aqc_opc_add_sw_rules,
 					 NULL);
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 4ba1fc8261d9..05d1077f80c3 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -819,7 +819,7 @@ static struct sk_buff *
 ice_build_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
 	      struct xdp_buff *xdp)
 {
-	unsigned int metasize = xdp->data - xdp->data_meta;
+	u8 metasize = xdp->data - xdp->data_meta;
 #if (PAGE_SIZE < 8192)
 	unsigned int truesize = ice_rx_pg_size(rx_ring) / 2;
 #else
@@ -934,7 +934,7 @@ ice_construct_skb(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf,
  */
 static void ice_put_rx_buf(struct ice_ring *rx_ring, struct ice_rx_buf *rx_buf)
 {
-	u32 ntc = rx_ring->next_to_clean + 1;
+	u16 ntc = rx_ring->next_to_clean + 1;
 
 	/* fetch, update, and store next to clean */
 	ntc = (ntc < rx_ring->count) ? ntc : 0;
@@ -1544,7 +1544,7 @@ int ice_napi_poll(struct napi_struct *napi, int budget)
 		 * don't allow the budget to go below 1 because that would exit
 		 * polling early.
 		 */
-		budget_per_ring = max(budget / q_vector->num_ring_rx, 1);
+		budget_per_ring = max_t(int, budget / q_vector->num_ring_rx, 1);
 	else
 		/* Max of 1 Rx ring in this q_vector so give it the budget */
 		budget_per_ring = budget;
@@ -2026,7 +2026,8 @@ int ice_tso(struct ice_tx_buf *first, struct ice_tx_offload_params *off)
 		unsigned char *hdr;
 	} l4;
 	u64 cd_mss, cd_tso_len;
-	u32 paylen, l4_start;
+	u32 paylen;
+	u8 l4_start;
 	int err;
 
 	if (skb->ip_summed != CHECKSUM_PARTIAL)
@@ -2062,7 +2063,7 @@ int ice_tso(struct ice_tx_buf *first, struct ice_tx_offload_params *off)
 			l4.udp->len = 0;
 
 			/* determine offset of outer transport header */
-			l4_start = l4.hdr - skb->data;
+			l4_start = (u8)(l4.hdr - skb->data);
 
 			/* remove payload length from outer checksum */
 			paylen = skb->len - l4_start;
@@ -2086,7 +2087,7 @@ int ice_tso(struct ice_tx_buf *first, struct ice_tx_offload_params *off)
 	}
 
 	/* determine offset of transport header */
-	l4_start = l4.hdr - skb->data;
+	l4_start = (u8)(l4.hdr - skb->data);
 
 	/* remove payload length from checksum */
 	paylen = skb->len - l4_start;
@@ -2095,12 +2096,12 @@ int ice_tso(struct ice_tx_buf *first, struct ice_tx_offload_params *off)
 		csum_replace_by_diff(&l4.udp->check,
 				     (__force __wsum)htonl(paylen));
 		/* compute length of UDP segmentation header */
-		off->header_len = sizeof(l4.udp) + l4_start;
+		off->header_len = (u8)sizeof(l4.udp) + l4_start;
 	} else {
 		csum_replace_by_diff(&l4.tcp->check,
 				     (__force __wsum)htonl(paylen));
 		/* compute length of TCP segmentation header */
-		off->header_len = (l4.tcp->doff * 4) + l4_start;
+		off->header_len = (u8)((l4.tcp->doff * 4) + l4_start);
 	}
 
 	/* update gso_segs and bytecount */
@@ -2331,7 +2332,7 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_ring *tx_ring)
 
 	if (offload.cd_qw1 & ICE_TX_DESC_DTYPE_CTX) {
 		struct ice_tx_ctx_desc *cdesc;
-		int i = tx_ring->next_to_use;
+		u16 i = tx_ring->next_to_use;
 
 		/* grab the next descriptor */
 		cdesc = ICE_TX_CTX_DESC(tx_ring, i);
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
index 1f9c3d24cde7..9d6512f96b8c 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
@@ -8,7 +8,7 @@
  * @rx_ring: ring to bump
  * @val: new head index
  */
-void ice_release_rx_desc(struct ice_ring *rx_ring, u32 val)
+void ice_release_rx_desc(struct ice_ring *rx_ring, u16 val)
 {
 	u16 prev_ntu = rx_ring->next_to_use & ~0x7;
 
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
index ba9164dad9ae..af0fca5b91ff 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
@@ -49,7 +49,7 @@ static inline void ice_xdp_ring_update_tail(struct ice_ring *xdp_ring)
 void ice_finalize_xdp_rx(struct ice_ring *rx_ring, unsigned int xdp_res);
 int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_ring *xdp_ring);
 int ice_xmit_xdp_ring(void *data, u16 size, struct ice_ring *xdp_ring);
-void ice_release_rx_desc(struct ice_ring *rx_ring, u32 val);
+void ice_release_rx_desc(struct ice_ring *rx_ring, u16 val);
 void
 ice_process_skb_fields(struct ice_ring *rx_ring,
 		       union ice_32b_rx_flex_desc *rx_desc,
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index 4f5345a7c15d..9f6578eb4672 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -501,8 +501,8 @@ struct ice_hw {
 	u16 max_burst_size;	/* driver sets this value */
 
 	/* Tx Scheduler values */
-	u16 num_tx_sched_layers;
-	u16 num_tx_sched_phys_layers;
+	u8 num_tx_sched_layers;
+	u8 num_tx_sched_phys_layers;
 	u8 flattened_layers;
 	u8 max_cgds;
 	u8 sw_entry_point_layer;
-- 
cgit v1.2.3-59-g8ed1b


From 53bb66983f34d4ff0af179fe228e2c55e1e45921 Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
Date: Thu, 7 May 2020 17:41:06 -0700
Subject: ice: cleanup vf_id signedness

The vf_id variable is dealt with in the code in inconsistent
ways of sign usage, preventing compilation with -Werror=sign-compare.
Fix this problem in the code by always treating vf_id as unsigned, since
there are no valid values of vf_id that are negative.

Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice.h             |  2 +-
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 17 +++++++++--------
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h |  2 +-
 3 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index cd2bf9b8e385..58d0d6436c7f 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -371,7 +371,7 @@ struct ice_pf {
 	struct ice_sw *first_sw;	/* first switch created by firmware */
 	/* Virtchnl/SR-IOV config info */
 	struct ice_vf *vf;
-	int num_alloc_vfs;		/* actual number of VFs allocated */
+	u16 num_alloc_vfs;		/* actual number of VFs allocated */
 	u16 num_vfs_supported;		/* num VFs supported for this PF */
 	u16 num_qps_per_vf;
 	u16 num_msix_per_vf;
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index fc03b278370b..9fb74a390b8c 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -10,10 +10,11 @@
  * @pf: pointer to the PF structure
  * @vf_id: the ID of the VF to check
  */
-static int ice_validate_vf_id(struct ice_pf *pf, int vf_id)
+static int ice_validate_vf_id(struct ice_pf *pf, u16 vf_id)
 {
+	/* vf_id range is only valid for 0-255, and should always be unsigned */
 	if (vf_id >= pf->num_alloc_vfs) {
-		dev_err(ice_pf_to_dev(pf), "Invalid VF ID: %d\n", vf_id);
+		dev_err(ice_pf_to_dev(pf), "Invalid VF ID: %u\n", vf_id);
 		return -EINVAL;
 	}
 	return 0;
@@ -27,7 +28,7 @@ static int ice_validate_vf_id(struct ice_pf *pf, int vf_id)
 static int ice_check_vf_init(struct ice_pf *pf, struct ice_vf *vf)
 {
 	if (!test_bit(ICE_VF_STATE_INIT, vf->vf_states)) {
-		dev_err(ice_pf_to_dev(pf), "VF ID: %d in reset. Try again.\n",
+		dev_err(ice_pf_to_dev(pf), "VF ID: %u in reset. Try again.\n",
 			vf->vf_id);
 		return -EBUSY;
 	}
@@ -368,7 +369,7 @@ void ice_free_vfs(struct ice_pf *pf)
 	 * before this function ever gets called.
 	 */
 	if (!pci_vfs_assigned(pf->pdev)) {
-		int vf_id;
+		unsigned int vf_id;
 
 		/* Acknowledge VFLR for all VFs. Without this, VFs will fail to
 		 * work correctly when SR-IOV gets re-enabled.
@@ -399,9 +400,9 @@ static void ice_trigger_vf_reset(struct ice_vf *vf, bool is_vflr, bool is_pfr)
 {
 	struct ice_pf *pf = vf->pf;
 	u32 reg, reg_idx, bit_idx;
+	unsigned int vf_abs_id, i;
 	struct device *dev;
 	struct ice_hw *hw;
-	int vf_abs_id, i;
 
 	dev = ice_pf_to_dev(pf);
 	hw = &pf->hw;
@@ -449,7 +450,7 @@ static void ice_trigger_vf_reset(struct ice_vf *vf, bool is_vflr, bool is_pfr)
 		if ((reg & VF_TRANS_PENDING_M) == 0)
 			break;
 
-		dev_err(dev, "VF %d PCI transactions stuck\n", vf->vf_id);
+		dev_err(dev, "VF %u PCI transactions stuck\n", vf->vf_id);
 		udelay(ICE_PCI_CIAD_WAIT_DELAY_US);
 	}
 }
@@ -1515,7 +1516,7 @@ int ice_sriov_configure(struct pci_dev *pdev, int num_vfs)
 void ice_process_vflr_event(struct ice_pf *pf)
 {
 	struct ice_hw *hw = &pf->hw;
-	int vf_id;
+	unsigned int vf_id;
 	u32 reg;
 
 	if (!test_and_clear_bit(__ICE_VFLR_EVENT_PENDING, pf->state) ||
@@ -1556,7 +1557,7 @@ static void ice_vc_reset_vf(struct ice_vf *vf)
  */
 static struct ice_vf *ice_get_vf_from_pfq(struct ice_pf *pf, u16 pfq)
 {
-	int vf_id;
+	unsigned int vf_id;
 
 	ice_for_each_vf(pf, vf_id) {
 		struct ice_vf *vf = &pf->vf[vf_id];
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
index f7fd1188efa4..474293ff4fe5 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
@@ -64,7 +64,7 @@ struct ice_mdd_vf_events {
 struct ice_vf {
 	struct ice_pf *pf;
 
-	s16 vf_id;			/* VF ID in the PF space */
+	u16 vf_id;			/* VF ID in the PF space */
 	u16 lan_vsi_idx;		/* index into PF struct */
 	/* first vector index of this VF in the PF space */
 	int first_vector_idx;
-- 
cgit v1.2.3-59-g8ed1b


From 857a4f0e9f4956fffc0cedcaa2ba187a2e987153 Mon Sep 17 00:00:00 2001
From: Eric Joyner <eric.joyner@intel.com>
Date: Thu, 7 May 2020 17:41:07 -0700
Subject: ice: Fix resource leak on early exit from function

Memory allocated in the ice_add_prof_id_vsig() function wasn't being
properly freed if an error occurred inside the for-loop in the function.

In particular, 'p' wasn't being freed if an error occurred before it was
added to the resource list at the end of the for-loop.

Signed-off-by: Eric Joyner <eric.joyner@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_flex_pipe.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
index 4dc72aef5381..38c37f506257 100644
--- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
+++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
@@ -4228,8 +4228,10 @@ ice_add_prof_id_vsig(struct ice_hw *hw, enum ice_block blk, u16 vsig, u64 hdl,
 					      t->tcam[i].prof_id,
 					      t->tcam[i].ptg, vsig, 0, 0,
 					      vl_msk, dc_msk, nm_msk);
-		if (status)
+		if (status) {
+			devm_kfree(ice_hw_to_dev(hw), p);
 			goto err_ice_add_prof_id_vsig;
+		}
 
 		/* log change */
 		list_add(&p->list_entry, chg);
-- 
cgit v1.2.3-59-g8ed1b


From 1b8f15b64a006b5fbbfbc898c612a48a86fff6de Mon Sep 17 00:00:00 2001
From: Michal Swiatkowski <michal.swiatkowski@intel.com>
Date: Thu, 7 May 2020 17:41:08 -0700
Subject: ice: refactor filter functions

Move filter functions to separate file.

Add functions that prepare suitable ice_fltr_info struct
depending on the filter type and add this struct to earlier created
list:
- ice_fltr_add_mac_to_list
- ice_fltr_add_vlan_to_list
- ice_fltr_add_eth_to_list
This functions are used in adding and removing filters.

Create wrappers for functions mentioned above that alloc list,
add suitable ice_fltr_info to it and call add or remove function.
- ice_fltr_prepare_mac
- ice_fltr_prepare_mac_and_broadcast
- ice_fltr_prepare_vlan
- ice_fltr_prepare_eth

Signed-off-by: Michal Swiatkowski <michal.swiatkowski@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/Makefile          |   1 +
 drivers/net/ethernet/intel/ice/ice_ethtool.c     |  13 +-
 drivers/net/ethernet/intel/ice/ice_fltr.c        | 397 +++++++++++++++++++++++
 drivers/net/ethernet/intel/ice/ice_fltr.h        |  39 +++
 drivers/net/ethernet/intel/ice/ice_lib.c         | 212 ++----------
 drivers/net/ethernet/intel/ice/ice_lib.h         |   9 +-
 drivers/net/ethernet/intel/ice/ice_main.c        |  53 +--
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c |  34 +-
 8 files changed, 494 insertions(+), 264 deletions(-)
 create mode 100644 drivers/net/ethernet/intel/ice/ice_fltr.c
 create mode 100644 drivers/net/ethernet/intel/ice/ice_fltr.h

diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
index 29c6c6743450..2055e61eaf24 100644
--- a/drivers/net/ethernet/intel/ice/Makefile
+++ b/drivers/net/ethernet/intel/ice/Makefile
@@ -17,6 +17,7 @@ ice-y := ice_main.o	\
 	 ice_lib.o	\
 	 ice_txrx_lib.o	\
 	 ice_txrx.o	\
+	 ice_fltr.o	\
 	 ice_flex_pipe.o \
 	 ice_flow.o	\
 	 ice_devlink.o	\
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 41a6aa4ebb02..9fb82c993df9 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -5,6 +5,7 @@
 
 #include "ice.h"
 #include "ice_flow.h"
+#include "ice_fltr.h"
 #include "ice_lib.h"
 #include "ice_dcb_lib.h"
 
@@ -676,7 +677,6 @@ static u64 ice_loopback_test(struct net_device *netdev)
 	struct ice_ring *tx_ring, *rx_ring;
 	u8 broadcast[ETH_ALEN], ret = 0;
 	int num_frames, valid_frames;
-	LIST_HEAD(tmp_list);
 	struct device *dev;
 	u8 *tx_frame;
 	int i;
@@ -712,16 +712,11 @@ static u64 ice_loopback_test(struct net_device *netdev)
 
 	/* Test VSI needs to receive broadcast packets */
 	eth_broadcast_addr(broadcast);
-	if (ice_add_mac_to_list(test_vsi, &tmp_list, broadcast)) {
+	if (ice_fltr_add_mac(test_vsi, broadcast, ICE_FWD_TO_VSI)) {
 		ret = 5;
 		goto lbtest_mac_dis;
 	}
 
-	if (ice_add_mac(&pf->hw, &tmp_list)) {
-		ret = 6;
-		goto free_mac_list;
-	}
-
 	if (ice_lbtest_create_frame(pf, &tx_frame, ICE_LB_FRAME_SIZE)) {
 		ret = 7;
 		goto remove_mac_filters;
@@ -744,10 +739,8 @@ static u64 ice_loopback_test(struct net_device *netdev)
 lbtest_free_frame:
 	devm_kfree(dev, tx_frame);
 remove_mac_filters:
-	if (ice_remove_mac(&pf->hw, &tmp_list))
+	if (ice_fltr_remove_mac(test_vsi, broadcast, ICE_FWD_TO_VSI))
 		netdev_err(netdev, "Could not remove MAC filter for the test VSI\n");
-free_mac_list:
-	ice_free_fltr_list(dev, &tmp_list);
 lbtest_mac_dis:
 	/* Disable MAC loopback after the test is completed. */
 	if (ice_aq_set_mac_loopback(&pf->hw, false, NULL))
diff --git a/drivers/net/ethernet/intel/ice/ice_fltr.c b/drivers/net/ethernet/intel/ice/ice_fltr.c
new file mode 100644
index 000000000000..2418d4fff037
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_fltr.c
@@ -0,0 +1,397 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2020, Intel Corporation. */
+
+#include "ice.h"
+#include "ice_fltr.h"
+
+/**
+ * ice_fltr_free_list - free filter lists helper
+ * @dev: pointer to the device struct
+ * @h: pointer to the list head to be freed
+ *
+ * Helper function to free filter lists previously created using
+ * ice_fltr_add_mac_to_list
+ */
+void ice_fltr_free_list(struct device *dev, struct list_head *h)
+{
+	struct ice_fltr_list_entry *e, *tmp;
+
+	list_for_each_entry_safe(e, tmp, h, list_entry) {
+		list_del(&e->list_entry);
+		devm_kfree(dev, e);
+	}
+}
+
+/**
+ * ice_fltr_add_entry_to_list - allocate and add filter entry to list
+ * @dev: pointer to device needed by alloc function
+ * @info: filter info struct that gets added to the passed in list
+ * @list: pointer to the list which contains MAC filters entry
+ */
+static int
+ice_fltr_add_entry_to_list(struct device *dev, struct ice_fltr_info *info,
+			   struct list_head *list)
+{
+	struct ice_fltr_list_entry *entry;
+
+	entry = devm_kzalloc(dev, sizeof(*entry), GFP_ATOMIC);
+	if (!entry)
+		return -ENOMEM;
+
+	entry->fltr_info = *info;
+
+	INIT_LIST_HEAD(&entry->list_entry);
+	list_add(&entry->list_entry, list);
+
+	return 0;
+}
+
+/**
+ * ice_fltr_add_mac_list - add list of MAC filters
+ * @vsi: pointer to VSI struct
+ * @list: list of filters
+ */
+enum ice_status
+ice_fltr_add_mac_list(struct ice_vsi *vsi, struct list_head *list)
+{
+	return ice_add_mac(&vsi->back->hw, list);
+}
+
+/**
+ * ice_fltr_remove_mac_list - remove list of MAC filters
+ * @vsi: pointer to VSI struct
+ * @list: list of filters
+ */
+enum ice_status
+ice_fltr_remove_mac_list(struct ice_vsi *vsi, struct list_head *list)
+{
+	return ice_remove_mac(&vsi->back->hw, list);
+}
+
+/**
+ * ice_fltr_add_vlan_list - add list of VLAN filters
+ * @vsi: pointer to VSI struct
+ * @list: list of filters
+ */
+static enum ice_status
+ice_fltr_add_vlan_list(struct ice_vsi *vsi, struct list_head *list)
+{
+	return ice_add_vlan(&vsi->back->hw, list);
+}
+
+/**
+ * ice_fltr_remove_vlan_list - remove list of VLAN filters
+ * @vsi: pointer to VSI struct
+ * @list: list of filters
+ */
+static enum ice_status
+ice_fltr_remove_vlan_list(struct ice_vsi *vsi, struct list_head *list)
+{
+	return ice_remove_vlan(&vsi->back->hw, list);
+}
+
+/**
+ * ice_fltr_add_eth_list - add list of ethertype filters
+ * @vsi: pointer to VSI struct
+ * @list: list of filters
+ */
+static enum ice_status
+ice_fltr_add_eth_list(struct ice_vsi *vsi, struct list_head *list)
+{
+	return ice_add_eth_mac(&vsi->back->hw, list);
+}
+
+/**
+ * ice_fltr_remove_eth_list - remove list of ethertype filters
+ * @vsi: pointer to VSI struct
+ * @list: list of filters
+ */
+static enum ice_status
+ice_fltr_remove_eth_list(struct ice_vsi *vsi, struct list_head *list)
+{
+	return ice_remove_eth_mac(&vsi->back->hw, list);
+}
+
+/**
+ * ice_fltr_remove_all - remove all filters associated with VSI
+ * @vsi: pointer to VSI struct
+ */
+void ice_fltr_remove_all(struct ice_vsi *vsi)
+{
+	ice_remove_vsi_fltr(&vsi->back->hw, vsi->idx);
+}
+
+/**
+ * ice_fltr_add_mac_to_list - add MAC filter info to exsisting list
+ * @vsi: pointer to VSI struct
+ * @list: list to add filter info to
+ * @mac: MAC address to add
+ * @action: filter action
+ */
+int
+ice_fltr_add_mac_to_list(struct ice_vsi *vsi, struct list_head *list,
+			 const u8 *mac, enum ice_sw_fwd_act_type action)
+{
+	struct ice_fltr_info info = { 0 };
+
+	info.flag = ICE_FLTR_TX;
+	info.src_id = ICE_SRC_ID_VSI;
+	info.lkup_type = ICE_SW_LKUP_MAC;
+	info.fltr_act = action;
+	info.vsi_handle = vsi->idx;
+
+	ether_addr_copy(info.l_data.mac.mac_addr, mac);
+
+	return ice_fltr_add_entry_to_list(ice_pf_to_dev(vsi->back), &info,
+					  list);
+}
+
+/**
+ * ice_fltr_add_vlan_to_list - add VLAN filter info to exsisting list
+ * @vsi: pointer to VSI struct
+ * @list: list to add filter info to
+ * @vlan_id: VLAN ID to add
+ * @action: filter action
+ */
+static int
+ice_fltr_add_vlan_to_list(struct ice_vsi *vsi, struct list_head *list,
+			  u16 vlan_id, enum ice_sw_fwd_act_type action)
+{
+	struct ice_fltr_info info = { 0 };
+
+	info.flag = ICE_FLTR_TX;
+	info.src_id = ICE_SRC_ID_VSI;
+	info.lkup_type = ICE_SW_LKUP_VLAN;
+	info.fltr_act = action;
+	info.vsi_handle = vsi->idx;
+	info.l_data.vlan.vlan_id = vlan_id;
+
+	return ice_fltr_add_entry_to_list(ice_pf_to_dev(vsi->back), &info,
+					  list);
+}
+
+/**
+ * ice_fltr_add_eth_to_list - add ethertype filter info to exsisting list
+ * @vsi: pointer to VSI struct
+ * @list: list to add filter info to
+ * @ethertype: ethertype of packet that matches filter
+ * @flag: filter direction, Tx or Rx
+ * @action: filter action
+ */
+static int
+ice_fltr_add_eth_to_list(struct ice_vsi *vsi, struct list_head *list,
+			 u16 ethertype, u16 flag,
+			 enum ice_sw_fwd_act_type action)
+{
+	struct ice_fltr_info info = { 0 };
+
+	info.flag = flag;
+	info.lkup_type = ICE_SW_LKUP_ETHERTYPE;
+	info.fltr_act = action;
+	info.vsi_handle = vsi->idx;
+	info.l_data.ethertype_mac.ethertype = ethertype;
+
+	if (flag == ICE_FLTR_TX)
+		info.src_id = ICE_SRC_ID_VSI;
+	else
+		info.src_id = ICE_SRC_ID_LPORT;
+
+	return ice_fltr_add_entry_to_list(ice_pf_to_dev(vsi->back), &info,
+					  list);
+}
+
+/**
+ * ice_fltr_prepare_mac - add or remove MAC rule
+ * @vsi: pointer to VSI struct
+ * @mac: MAC address to add
+ * @action: action to be performed on filter match
+ * @mac_action: pointer to add or remove MAC function
+ */
+static enum ice_status
+ice_fltr_prepare_mac(struct ice_vsi *vsi, const u8 *mac,
+		     enum ice_sw_fwd_act_type action,
+		     enum ice_status (*mac_action)(struct ice_vsi *,
+						   struct list_head *))
+{
+	enum ice_status result;
+	LIST_HEAD(tmp_list);
+
+	if (ice_fltr_add_mac_to_list(vsi, &tmp_list, mac, action)) {
+		ice_fltr_free_list(ice_pf_to_dev(vsi->back), &tmp_list);
+		return ICE_ERR_NO_MEMORY;
+	}
+
+	result = mac_action(vsi, &tmp_list);
+	ice_fltr_free_list(ice_pf_to_dev(vsi->back), &tmp_list);
+	return result;
+}
+
+/**
+ * ice_fltr_prepare_mac_and_broadcast - add or remove MAC and broadcast filter
+ * @vsi: pointer to VSI struct
+ * @mac: MAC address to add
+ * @action: action to be performed on filter match
+ * @mac_action: pointer to add or remove MAC function
+ */
+static enum ice_status
+ice_fltr_prepare_mac_and_broadcast(struct ice_vsi *vsi, const u8 *mac,
+				   enum ice_sw_fwd_act_type action,
+				   enum ice_status(*mac_action)
+				   (struct ice_vsi *, struct list_head *))
+{
+	u8 broadcast[ETH_ALEN];
+	enum ice_status result;
+	LIST_HEAD(tmp_list);
+
+	eth_broadcast_addr(broadcast);
+	if (ice_fltr_add_mac_to_list(vsi, &tmp_list, mac, action) ||
+	    ice_fltr_add_mac_to_list(vsi, &tmp_list, broadcast, action)) {
+		ice_fltr_free_list(ice_pf_to_dev(vsi->back), &tmp_list);
+		return ICE_ERR_NO_MEMORY;
+	}
+
+	result = mac_action(vsi, &tmp_list);
+	ice_fltr_free_list(ice_pf_to_dev(vsi->back), &tmp_list);
+	return result;
+}
+
+/**
+ * ice_fltr_prepare_vlan - add or remove VLAN filter
+ * @vsi: pointer to VSI struct
+ * @vlan_id: VLAN ID to add
+ * @action: action to be performed on filter match
+ * @vlan_action: pointer to add or remove VLAN function
+ */
+static enum ice_status
+ice_fltr_prepare_vlan(struct ice_vsi *vsi, u16 vlan_id,
+		      enum ice_sw_fwd_act_type action,
+		      enum ice_status (*vlan_action)(struct ice_vsi *,
+						     struct list_head *))
+{
+	enum ice_status result;
+	LIST_HEAD(tmp_list);
+
+	if (ice_fltr_add_vlan_to_list(vsi, &tmp_list, vlan_id, action))
+		return ICE_ERR_NO_MEMORY;
+
+	result = vlan_action(vsi, &tmp_list);
+	ice_fltr_free_list(ice_pf_to_dev(vsi->back), &tmp_list);
+	return result;
+}
+
+/**
+ * ice_fltr_prepare_eth - add or remove ethertype filter
+ * @vsi: pointer to VSI struct
+ * @ethertype: ethertype of packet to be filtered
+ * @flag: direction of packet, Tx or Rx
+ * @action: action to be performed on filter match
+ * @eth_action: pointer to add or remove ethertype function
+ */
+static enum ice_status
+ice_fltr_prepare_eth(struct ice_vsi *vsi, u16 ethertype, u16 flag,
+		     enum ice_sw_fwd_act_type action,
+		     enum ice_status (*eth_action)(struct ice_vsi *,
+						   struct list_head *))
+{
+	enum ice_status result;
+	LIST_HEAD(tmp_list);
+
+	if (ice_fltr_add_eth_to_list(vsi, &tmp_list, ethertype, flag, action))
+		return ICE_ERR_NO_MEMORY;
+
+	result = eth_action(vsi, &tmp_list);
+	ice_fltr_free_list(ice_pf_to_dev(vsi->back), &tmp_list);
+	return result;
+}
+
+/**
+ * ice_fltr_add_mac - add single MAC filter
+ * @vsi: pointer to VSI struct
+ * @mac: MAC to add
+ * @action: action to be performed on filter match
+ */
+enum ice_status ice_fltr_add_mac(struct ice_vsi *vsi, const u8 *mac,
+				 enum ice_sw_fwd_act_type action)
+{
+	return ice_fltr_prepare_mac(vsi, mac, action, ice_fltr_add_mac_list);
+}
+
+/**
+ * ice_fltr_add_mac_and_broadcast - add single MAC and broadcast
+ * @vsi: pointer to VSI struct
+ * @mac: MAC to add
+ * @action: action to be performed on filter match
+ */
+enum ice_status
+ice_fltr_add_mac_and_broadcast(struct ice_vsi *vsi, const u8 *mac,
+			       enum ice_sw_fwd_act_type action)
+{
+	return ice_fltr_prepare_mac_and_broadcast(vsi, mac, action,
+						  ice_fltr_add_mac_list);
+}
+
+/**
+ * ice_fltr_remove_mac - remove MAC filter
+ * @vsi: pointer to VSI struct
+ * @mac: filter MAC to remove
+ * @action: action to remove
+ */
+enum ice_status ice_fltr_remove_mac(struct ice_vsi *vsi, const u8 *mac,
+				    enum ice_sw_fwd_act_type action)
+{
+	return ice_fltr_prepare_mac(vsi, mac, action, ice_fltr_remove_mac_list);
+}
+
+/**
+ * ice_fltr_add_vlan - add single VLAN filter
+ * @vsi: pointer to VSI struct
+ * @vlan_id: VLAN ID to add
+ * @action: action to be performed on filter match
+ */
+enum ice_status ice_fltr_add_vlan(struct ice_vsi *vsi, u16 vlan_id,
+				  enum ice_sw_fwd_act_type action)
+{
+	return ice_fltr_prepare_vlan(vsi, vlan_id, action,
+				     ice_fltr_add_vlan_list);
+}
+
+/**
+ * ice_fltr_remove_vlan - remove VLAN filter
+ * @vsi: pointer to VSI struct
+ * @vlan_id: filter VLAN to remove
+ * @action: action to remove
+ */
+enum ice_status ice_fltr_remove_vlan(struct ice_vsi *vsi, u16 vlan_id,
+				     enum ice_sw_fwd_act_type action)
+{
+	return ice_fltr_prepare_vlan(vsi, vlan_id, action,
+				     ice_fltr_remove_vlan_list);
+}
+
+/**
+ * ice_fltr_add_eth - add specyfic ethertype filter
+ * @vsi: pointer to VSI struct
+ * @ethertype: ethertype of filter
+ * @flag: direction of packet to be filtered, Tx or Rx
+ * @action: action to be performed on filter match
+ */
+enum ice_status ice_fltr_add_eth(struct ice_vsi *vsi, u16 ethertype, u16 flag,
+				 enum ice_sw_fwd_act_type action)
+{
+	return ice_fltr_prepare_eth(vsi, ethertype, flag, action,
+				    ice_fltr_add_eth_list);
+}
+
+/**
+ * ice_fltr_remove_eth - remove ethertype filter
+ * @vsi: pointer to VSI struct
+ * @ethertype: ethertype of filter
+ * @flag: direction of filter
+ * @action: action to remove
+ */
+enum ice_status ice_fltr_remove_eth(struct ice_vsi *vsi, u16 ethertype,
+				    u16 flag, enum ice_sw_fwd_act_type action)
+{
+	return ice_fltr_prepare_eth(vsi, ethertype, flag, action,
+				    ice_fltr_remove_eth_list);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_fltr.h b/drivers/net/ethernet/intel/ice/ice_fltr.h
new file mode 100644
index 000000000000..361cb4da9b43
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_fltr.h
@@ -0,0 +1,39 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2018-2020, Intel Corporation. */
+
+#ifndef _ICE_FLTR_H_
+#define _ICE_FLTR_H_
+
+void ice_fltr_free_list(struct device *dev, struct list_head *h);
+enum ice_status
+ice_fltr_add_mac_to_list(struct ice_vsi *vsi, struct list_head *list,
+			 const u8 *mac, enum ice_sw_fwd_act_type action);
+enum ice_status
+ice_fltr_add_mac(struct ice_vsi *vsi, const u8 *mac,
+		 enum ice_sw_fwd_act_type action);
+enum ice_status
+ice_fltr_add_mac_and_broadcast(struct ice_vsi *vsi, const u8 *mac,
+			       enum ice_sw_fwd_act_type action);
+enum ice_status
+ice_fltr_add_mac_list(struct ice_vsi *vsi, struct list_head *list);
+enum ice_status
+ice_fltr_remove_mac(struct ice_vsi *vsi, const u8 *mac,
+		    enum ice_sw_fwd_act_type action);
+enum ice_status
+ice_fltr_remove_mac_list(struct ice_vsi *vsi, struct list_head *list);
+
+enum ice_status
+ice_fltr_add_vlan(struct ice_vsi *vsi, u16 vid,
+		  enum ice_sw_fwd_act_type action);
+enum ice_status
+ice_fltr_remove_vlan(struct ice_vsi *vsi, u16 vid,
+		     enum ice_sw_fwd_act_type action);
+
+enum ice_status
+ice_fltr_add_eth(struct ice_vsi *vsi, u16 ethertype, u16 flag,
+		 enum ice_sw_fwd_act_type action);
+enum ice_status
+ice_fltr_remove_eth(struct ice_vsi *vsi, u16 ethertype, u16 flag,
+		    enum ice_sw_fwd_act_type action);
+void ice_fltr_remove_all(struct ice_vsi *vsi);
+#endif
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 9330989ddb40..c73c977f6967 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -5,6 +5,7 @@
 #include "ice_base.h"
 #include "ice_flow.h"
 #include "ice_lib.h"
+#include "ice_fltr.h"
 #include "ice_dcb_lib.h"
 
 /**
@@ -1339,40 +1340,6 @@ static void ice_vsi_set_rss_flow_fld(struct ice_vsi *vsi)
 			vsi_num, ice_stat_str(status));
 }
 
-/**
- * ice_add_mac_to_list - Add a MAC address filter entry to the list
- * @vsi: the VSI to be forwarded to
- * @add_list: pointer to the list which contains MAC filter entries
- * @macaddr: the MAC address to be added.
- *
- * Adds MAC address filter entry to the temp list
- *
- * Returns 0 on success or ENOMEM on failure.
- */
-int
-ice_add_mac_to_list(struct ice_vsi *vsi, struct list_head *add_list,
-		    const u8 *macaddr)
-{
-	struct ice_fltr_list_entry *tmp;
-	struct ice_pf *pf = vsi->back;
-
-	tmp = devm_kzalloc(ice_pf_to_dev(pf), sizeof(*tmp), GFP_ATOMIC);
-	if (!tmp)
-		return -ENOMEM;
-
-	tmp->fltr_info.flag = ICE_FLTR_TX;
-	tmp->fltr_info.src_id = ICE_SRC_ID_VSI;
-	tmp->fltr_info.lkup_type = ICE_SW_LKUP_MAC;
-	tmp->fltr_info.fltr_act = ICE_FWD_TO_VSI;
-	tmp->fltr_info.vsi_handle = vsi->idx;
-	ether_addr_copy(tmp->fltr_info.l_data.mac.mac_addr, macaddr);
-
-	INIT_LIST_HEAD(&tmp->list_entry);
-	list_add(&tmp->list_entry, add_list);
-
-	return 0;
-}
-
 /**
  * ice_update_eth_stats - Update VSI-specific ethernet statistics counters
  * @vsi: the VSI to be updated
@@ -1419,55 +1386,22 @@ void ice_update_eth_stats(struct ice_vsi *vsi)
 	vsi->stat_offsets_loaded = true;
 }
 
-/**
- * ice_free_fltr_list - free filter lists helper
- * @dev: pointer to the device struct
- * @h: pointer to the list head to be freed
- *
- * Helper function to free filter lists previously created using
- * ice_add_mac_to_list
- */
-void ice_free_fltr_list(struct device *dev, struct list_head *h)
-{
-	struct ice_fltr_list_entry *e, *tmp;
-
-	list_for_each_entry_safe(e, tmp, h, list_entry) {
-		list_del(&e->list_entry);
-		devm_kfree(dev, e);
-	}
-}
-
 /**
  * ice_vsi_add_vlan - Add VSI membership for given VLAN
  * @vsi: the VSI being configured
  * @vid: VLAN ID to be added
+ * @action: filter action to be performed on match
  */
-int ice_vsi_add_vlan(struct ice_vsi *vsi, u16 vid)
+int
+ice_vsi_add_vlan(struct ice_vsi *vsi, u16 vid, enum ice_sw_fwd_act_type action)
 {
-	struct ice_fltr_list_entry *tmp;
 	struct ice_pf *pf = vsi->back;
-	LIST_HEAD(tmp_add_list);
-	enum ice_status status;
 	struct device *dev;
 	int err = 0;
 
 	dev = ice_pf_to_dev(pf);
-	tmp = devm_kzalloc(dev, sizeof(*tmp), GFP_KERNEL);
-	if (!tmp)
-		return -ENOMEM;
-
-	tmp->fltr_info.lkup_type = ICE_SW_LKUP_VLAN;
-	tmp->fltr_info.fltr_act = ICE_FWD_TO_VSI;
-	tmp->fltr_info.flag = ICE_FLTR_TX;
-	tmp->fltr_info.src_id = ICE_SRC_ID_VSI;
-	tmp->fltr_info.vsi_handle = vsi->idx;
-	tmp->fltr_info.l_data.vlan.vlan_id = vid;
-
-	INIT_LIST_HEAD(&tmp->list_entry);
-	list_add(&tmp->list_entry, &tmp_add_list);
 
-	status = ice_add_vlan(&pf->hw, &tmp_add_list);
-	if (!status) {
+	if (!ice_fltr_add_vlan(vsi, vid, action)) {
 		vsi->num_vlan++;
 	} else {
 		err = -ENODEV;
@@ -1475,7 +1409,6 @@ int ice_vsi_add_vlan(struct ice_vsi *vsi, u16 vid)
 			vsi->vsi_num);
 	}
 
-	ice_free_fltr_list(dev, &tmp_add_list);
 	return err;
 }
 
@@ -1488,29 +1421,14 @@ int ice_vsi_add_vlan(struct ice_vsi *vsi, u16 vid)
  */
 int ice_vsi_kill_vlan(struct ice_vsi *vsi, u16 vid)
 {
-	struct ice_fltr_list_entry *list;
 	struct ice_pf *pf = vsi->back;
-	LIST_HEAD(tmp_add_list);
 	enum ice_status status;
 	struct device *dev;
 	int err = 0;
 
 	dev = ice_pf_to_dev(pf);
-	list = devm_kzalloc(dev, sizeof(*list), GFP_KERNEL);
-	if (!list)
-		return -ENOMEM;
-
-	list->fltr_info.lkup_type = ICE_SW_LKUP_VLAN;
-	list->fltr_info.vsi_handle = vsi->idx;
-	list->fltr_info.fltr_act = ICE_FWD_TO_VSI;
-	list->fltr_info.l_data.vlan.vlan_id = vid;
-	list->fltr_info.flag = ICE_FLTR_TX;
-	list->fltr_info.src_id = ICE_SRC_ID_VSI;
 
-	INIT_LIST_HEAD(&list->list_entry);
-	list_add(&list->list_entry, &tmp_add_list);
-
-	status = ice_remove_vlan(&pf->hw, &tmp_add_list);
+	status = ice_fltr_remove_vlan(vsi, vid, ICE_FWD_TO_VSI);
 	if (!status) {
 		vsi->num_vlan--;
 	} else if (status == ICE_ERR_DOES_NOT_EXIST) {
@@ -1522,7 +1440,6 @@ int ice_vsi_kill_vlan(struct ice_vsi *vsi, u16 vid)
 		err = -EIO;
 	}
 
-	ice_free_fltr_list(dev, &tmp_add_list);
 	return err;
 }
 
@@ -1998,47 +1915,6 @@ clear_reg_idx:
 	return -EINVAL;
 }
 
-/**
- * ice_vsi_add_rem_eth_mac - Program VSI ethertype based filter with rule
- * @vsi: the VSI being configured
- * @add_rule: boolean value to add or remove ethertype filter rule
- */
-static void
-ice_vsi_add_rem_eth_mac(struct ice_vsi *vsi, bool add_rule)
-{
-	struct ice_fltr_list_entry *list;
-	struct ice_pf *pf = vsi->back;
-	LIST_HEAD(tmp_add_list);
-	enum ice_status status;
-	struct device *dev;
-
-	dev = ice_pf_to_dev(pf);
-	list = devm_kzalloc(dev, sizeof(*list), GFP_KERNEL);
-	if (!list)
-		return;
-
-	list->fltr_info.lkup_type = ICE_SW_LKUP_ETHERTYPE;
-	list->fltr_info.fltr_act = ICE_DROP_PACKET;
-	list->fltr_info.flag = ICE_FLTR_TX;
-	list->fltr_info.src_id = ICE_SRC_ID_VSI;
-	list->fltr_info.vsi_handle = vsi->idx;
-	list->fltr_info.l_data.ethertype_mac.ethertype = vsi->ethtype;
-
-	INIT_LIST_HEAD(&list->list_entry);
-	list_add(&list->list_entry, &tmp_add_list);
-
-	if (add_rule)
-		status = ice_add_eth_mac(&pf->hw, &tmp_add_list);
-	else
-		status = ice_remove_eth_mac(&pf->hw, &tmp_add_list);
-
-	if (status)
-		dev_err(dev, "Failure Adding or Removing Ethertype on VSI %i error: %s\n",
-			vsi->vsi_num, ice_stat_str(status));
-
-	ice_free_fltr_list(dev, &tmp_add_list);
-}
-
 /**
  * ice_cfg_sw_lldp - Config switch rules for LLDP packet handling
  * @vsi: the VSI being configured
@@ -2047,45 +1923,25 @@ ice_vsi_add_rem_eth_mac(struct ice_vsi *vsi, bool add_rule)
  */
 void ice_cfg_sw_lldp(struct ice_vsi *vsi, bool tx, bool create)
 {
-	struct ice_fltr_list_entry *list;
+	enum ice_status (*eth_fltr)(struct ice_vsi *v, u16 type, u16 flag,
+				    enum ice_sw_fwd_act_type act);
 	struct ice_pf *pf = vsi->back;
-	LIST_HEAD(tmp_add_list);
 	enum ice_status status;
 	struct device *dev;
 
 	dev = ice_pf_to_dev(pf);
-	list = devm_kzalloc(dev, sizeof(*list), GFP_KERNEL);
-	if (!list)
-		return;
-
-	list->fltr_info.lkup_type = ICE_SW_LKUP_ETHERTYPE;
-	list->fltr_info.vsi_handle = vsi->idx;
-	list->fltr_info.l_data.ethertype_mac.ethertype = ETH_P_LLDP;
+	eth_fltr = create ? ice_fltr_add_eth : ice_fltr_remove_eth;
 
-	if (tx) {
-		list->fltr_info.fltr_act = ICE_DROP_PACKET;
-		list->fltr_info.flag = ICE_FLTR_TX;
-		list->fltr_info.src_id = ICE_SRC_ID_VSI;
-	} else {
-		list->fltr_info.fltr_act = ICE_FWD_TO_VSI;
-		list->fltr_info.flag = ICE_FLTR_RX;
-		list->fltr_info.src_id = ICE_SRC_ID_LPORT;
-	}
-
-	INIT_LIST_HEAD(&list->list_entry);
-	list_add(&list->list_entry, &tmp_add_list);
-
-	if (create)
-		status = ice_add_eth_mac(&pf->hw, &tmp_add_list);
+	if (tx)
+		status = eth_fltr(vsi, ETH_P_LLDP, ICE_FLTR_TX,
+				  ICE_DROP_PACKET);
 	else
-		status = ice_remove_eth_mac(&pf->hw, &tmp_add_list);
+		status = eth_fltr(vsi, ETH_P_LLDP, ICE_FLTR_RX, ICE_FWD_TO_VSI);
 
 	if (status)
 		dev_err(dev, "Fail %s %s LLDP rule on VSI %i error: %s\n",
 			create ? "adding" : "removing", tx ? "TX" : "RX",
 			vsi->vsi_num, ice_stat_str(status));
-
-	ice_free_fltr_list(dev, &tmp_add_list);
 }
 
 /**
@@ -2172,7 +2028,7 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
 		 * so this handles those cases (i.e. adding the PF to a bridge
 		 * without the 8021q module loaded).
 		 */
-		ret = ice_vsi_add_vlan(vsi, 0);
+		ret = ice_vsi_add_vlan(vsi, 0, ICE_FWD_TO_VSI);
 		if (ret)
 			goto unroll_clear_rings;
 
@@ -2247,9 +2103,8 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
 	 */
 	if (!ice_is_safe_mode(pf))
 		if (vsi->type == ICE_VSI_PF) {
-			ice_vsi_add_rem_eth_mac(vsi, true);
-
-			/* Tx LLDP packets */
+			ice_fltr_add_eth(vsi, ETH_P_PAUSE, ICE_FLTR_TX,
+					 ICE_DROP_PACKET);
 			ice_cfg_sw_lldp(vsi, true, true);
 		}
 
@@ -2566,7 +2421,8 @@ int ice_vsi_release(struct ice_vsi *vsi)
 
 	if (!ice_is_safe_mode(pf)) {
 		if (vsi->type == ICE_VSI_PF) {
-			ice_vsi_add_rem_eth_mac(vsi, false);
+			ice_fltr_remove_eth(vsi, ETH_P_PAUSE, ICE_FLTR_TX,
+					    ICE_DROP_PACKET);
 			ice_cfg_sw_lldp(vsi, true, false);
 			/* The Rx rule will only exist to remove if the LLDP FW
 			 * engine is currently stopped
@@ -2576,7 +2432,7 @@ int ice_vsi_release(struct ice_vsi *vsi)
 		}
 	}
 
-	ice_remove_vsi_fltr(&pf->hw, vsi->idx);
+	ice_fltr_remove_all(vsi);
 	ice_rm_vsi_lan_cfg(vsi->port_info, vsi->idx);
 	ice_vsi_delete(vsi);
 	ice_vsi_free_q_vectors(vsi);
@@ -2992,36 +2848,6 @@ void ice_update_rx_ring_stats(struct ice_ring *rx_ring, u64 pkts, u64 bytes)
 	u64_stats_update_end(&rx_ring->syncp);
 }
 
-/**
- * ice_vsi_cfg_mac_fltr - Add or remove a MAC address filter for a VSI
- * @vsi: the VSI being configured MAC filter
- * @macaddr: the MAC address to be added.
- * @set: Add or delete a MAC filter
- *
- * Adds or removes MAC address filter entry for VF VSI
- */
-enum ice_status
-ice_vsi_cfg_mac_fltr(struct ice_vsi *vsi, const u8 *macaddr, bool set)
-{
-	LIST_HEAD(tmp_add_list);
-	enum ice_status status;
-
-	 /* Update MAC filter list to be added or removed for a VSI */
-	if (ice_add_mac_to_list(vsi, &tmp_add_list, macaddr)) {
-		status = ICE_ERR_NO_MEMORY;
-		goto cfg_mac_fltr_exit;
-	}
-
-	if (set)
-		status = ice_add_mac(&vsi->back->hw, &tmp_add_list);
-	else
-		status = ice_remove_mac(&vsi->back->hw, &tmp_add_list);
-
-cfg_mac_fltr_exit:
-	ice_free_fltr_list(ice_pf_to_dev(vsi->back), &tmp_add_list);
-	return status;
-}
-
 /**
  * ice_is_dflt_vsi_in_use - check if the default forwarding VSI is being used
  * @sw: switch to check if its default forwarding VSI is free
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h
index 04ca00799364..9746de9b25fe 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_lib.h
@@ -8,12 +8,6 @@
 
 const char *ice_vsi_type_str(enum ice_vsi_type vsi_type);
 
-int
-ice_add_mac_to_list(struct ice_vsi *vsi, struct list_head *add_list,
-		    const u8 *macaddr);
-
-void ice_free_fltr_list(struct device *dev, struct list_head *h);
-
 void ice_update_eth_stats(struct ice_vsi *vsi);
 
 int ice_vsi_cfg_rxqs(struct ice_vsi *vsi);
@@ -22,7 +16,8 @@ int ice_vsi_cfg_lan_txqs(struct ice_vsi *vsi);
 
 void ice_vsi_cfg_msix(struct ice_vsi *vsi);
 
-int ice_vsi_add_vlan(struct ice_vsi *vsi, u16 vid);
+int
+ice_vsi_add_vlan(struct ice_vsi *vsi, u16 vid, enum ice_sw_fwd_act_type action);
 
 int ice_vsi_kill_vlan(struct ice_vsi *vsi, u16 vid);
 
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index cbfd54867be1..7fee3e4b39eb 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -8,6 +8,7 @@
 #include "ice.h"
 #include "ice_base.h"
 #include "ice_lib.h"
+#include "ice_fltr.h"
 #include "ice_dcb_lib.h"
 #include "ice_dcb_nl.h"
 #include "ice_devlink.h"
@@ -133,32 +134,18 @@ static void ice_check_for_hang_subtask(struct ice_pf *pf)
 static int ice_init_mac_fltr(struct ice_pf *pf)
 {
 	enum ice_status status;
-	u8 broadcast[ETH_ALEN];
 	struct ice_vsi *vsi;
+	u8 *perm_addr;
 
 	vsi = ice_get_main_vsi(pf);
 	if (!vsi)
 		return -EINVAL;
 
-	/* To add a MAC filter, first add the MAC to a list and then
-	 * pass the list to ice_add_mac.
-	 */
-
-	 /* Add a unicast MAC filter so the VSI can get its packets */
-	status = ice_vsi_cfg_mac_fltr(vsi, vsi->port_info->mac.perm_addr, true);
-	if (status)
-		goto unregister;
-
-	/* VSI needs to receive broadcast traffic, so add the broadcast
-	 * MAC address to the list as well.
-	 */
-	eth_broadcast_addr(broadcast);
-	status = ice_vsi_cfg_mac_fltr(vsi, broadcast, true);
-	if (status)
-		goto unregister;
+	perm_addr = vsi->port_info->mac.perm_addr;
+	status = ice_fltr_add_mac_and_broadcast(vsi, perm_addr, ICE_FWD_TO_VSI);
+	if (!status)
+		return 0;
 
-	return 0;
-unregister:
 	/* We aren't useful with no MAC filters, so unregister if we
 	 * had an error
 	 */
@@ -188,7 +175,8 @@ static int ice_add_mac_to_sync_list(struct net_device *netdev, const u8 *addr)
 	struct ice_netdev_priv *np = netdev_priv(netdev);
 	struct ice_vsi *vsi = np->vsi;
 
-	if (ice_add_mac_to_list(vsi, &vsi->tmp_sync_list, addr))
+	if (ice_fltr_add_mac_to_list(vsi, &vsi->tmp_sync_list, addr,
+				     ICE_FWD_TO_VSI))
 		return -EINVAL;
 
 	return 0;
@@ -209,7 +197,8 @@ static int ice_add_mac_to_unsync_list(struct net_device *netdev, const u8 *addr)
 	struct ice_netdev_priv *np = netdev_priv(netdev);
 	struct ice_vsi *vsi = np->vsi;
 
-	if (ice_add_mac_to_list(vsi, &vsi->tmp_unsync_list, addr))
+	if (ice_fltr_add_mac_to_list(vsi, &vsi->tmp_unsync_list, addr,
+				     ICE_FWD_TO_VSI))
 		return -EINVAL;
 
 	return 0;
@@ -307,8 +296,8 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi)
 	}
 
 	/* Remove MAC addresses in the unsync list */
-	status = ice_remove_mac(hw, &vsi->tmp_unsync_list);
-	ice_free_fltr_list(dev, &vsi->tmp_unsync_list);
+	status = ice_fltr_remove_mac_list(vsi, &vsi->tmp_unsync_list);
+	ice_fltr_free_list(dev, &vsi->tmp_unsync_list);
 	if (status) {
 		netdev_err(netdev, "Failed to delete MAC filters\n");
 		/* if we failed because of alloc failures, just bail */
@@ -319,8 +308,8 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi)
 	}
 
 	/* Add MAC addresses in the sync list */
-	status = ice_add_mac(hw, &vsi->tmp_sync_list);
-	ice_free_fltr_list(dev, &vsi->tmp_sync_list);
+	status = ice_fltr_add_mac_list(vsi, &vsi->tmp_sync_list);
+	ice_fltr_free_list(dev, &vsi->tmp_sync_list);
 	/* If filter is added successfully or already exists, do not go into
 	 * 'if' condition and report it as error. Instead continue processing
 	 * rest of the function.
@@ -2521,7 +2510,7 @@ ice_vlan_rx_add_vid(struct net_device *netdev, __always_unused __be16 proto,
 	/* Add a switch rule for this VLAN ID so its corresponding VLAN tagged
 	 * packets aren't pruned by the device's internal switch on Rx
 	 */
-	ret = ice_vsi_add_vlan(vsi, vid);
+	ret = ice_vsi_add_vlan(vsi, vid, ICE_FWD_TO_VSI);
 	if (!ret) {
 		vsi->vlan_ena = true;
 		set_bit(ICE_VSI_FLAG_VLAN_FLTR_CHANGED, vsi->flags);
@@ -3718,20 +3707,14 @@ static int ice_set_mac_address(struct net_device *netdev, void *pi)
 		return -EBUSY;
 	}
 
-	/* When we change the MAC address we also have to change the MAC address
-	 * based filter rules that were created previously for the old MAC
-	 * address. So first, we remove the old filter rule using ice_remove_mac
-	 * and then create a new filter rule using ice_add_mac via
-	 * ice_vsi_cfg_mac_fltr function call for both add and/or remove
-	 * filters.
-	 */
-	status = ice_vsi_cfg_mac_fltr(vsi, netdev->dev_addr, false);
+	/* Clean up old MAC filter before changing the MAC address */
+	status = ice_fltr_remove_mac(vsi, netdev->dev_addr, ICE_FWD_TO_VSI);
 	if (status) {
 		err = -EADDRNOTAVAIL;
 		goto err_update_filters;
 	}
 
-	status = ice_vsi_cfg_mac_fltr(vsi, mac, true);
+	status = ice_fltr_add_mac(vsi, mac, ICE_FWD_TO_VSI);
 	if (status) {
 		err = -EADDRNOTAVAIL;
 		goto err_update_filters;
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index 9fb74a390b8c..62c100d47592 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -4,6 +4,7 @@
 #include "ice.h"
 #include "ice_base.h"
 #include "ice_lib.h"
+#include "ice_fltr.h"
 
 /**
  * ice_validate_vf_id - helper to check if VF ID is valid
@@ -548,7 +549,6 @@ static int ice_calc_vf_first_vector_idx(struct ice_pf *pf, struct ice_vf *vf)
 static int ice_alloc_vsi_res(struct ice_vf *vf)
 {
 	struct ice_pf *pf = vf->pf;
-	LIST_HEAD(tmp_add_list);
 	u8 broadcast[ETH_ALEN];
 	struct ice_vsi *vsi;
 	struct device *dev;
@@ -570,7 +570,8 @@ static int ice_alloc_vsi_res(struct ice_vf *vf)
 	/* Check if port VLAN exist before, and restore it accordingly */
 	if (vf->port_vlan_info) {
 		ice_vsi_manage_pvid(vsi, vf->port_vlan_info, true);
-		if (ice_vsi_add_vlan(vsi, vf->port_vlan_info & VLAN_VID_MASK))
+		if (ice_vsi_add_vlan(vsi, vf->port_vlan_info & VLAN_VID_MASK,
+				     ICE_FWD_TO_VSI))
 			dev_warn(ice_pf_to_dev(pf), "Failed to add Port VLAN %d filter for VF %d\n",
 				 vf->port_vlan_info & VLAN_VID_MASK, vf->vf_id);
 	} else {
@@ -579,27 +580,23 @@ static int ice_alloc_vsi_res(struct ice_vf *vf)
 		 * untagged broadcast/multicast traffic seen on the VF
 		 * interface.
 		 */
-		if (ice_vsi_add_vlan(vsi, 0))
+		if (ice_vsi_add_vlan(vsi, 0, ICE_FWD_TO_VSI))
 			dev_warn(ice_pf_to_dev(pf), "Failed to add VLAN 0 filter for VF %d, MDD events will trigger. Reset the VF, disable spoofchk, or enable 8021q module on the guest\n",
 				 vf->vf_id);
 	}
 
-	eth_broadcast_addr(broadcast);
-
-	status = ice_add_mac_to_list(vsi, &tmp_add_list, broadcast);
-	if (status)
-		goto ice_alloc_vsi_res_exit;
-
 	if (is_valid_ether_addr(vf->dflt_lan_addr.addr)) {
-		status = ice_add_mac_to_list(vsi, &tmp_add_list,
-					     vf->dflt_lan_addr.addr);
+		status = ice_fltr_add_mac(vsi, vf->dflt_lan_addr.addr,
+					  ICE_FWD_TO_VSI);
 		if (status)
 			goto ice_alloc_vsi_res_exit;
 	}
 
-	status = ice_add_mac(&pf->hw, &tmp_add_list);
+	eth_broadcast_addr(broadcast);
+	status = ice_fltr_add_mac(vsi, broadcast, ICE_FWD_TO_VSI);
 	if (status)
-		dev_err(dev, "could not add mac filters error %d\n", status);
+		dev_err(dev, "could not add mac filters error %d\n",
+			status);
 	else
 		vf->num_mac = 1;
 
@@ -610,7 +607,6 @@ static int ice_alloc_vsi_res(struct ice_vf *vf)
 	 * more vectors.
 	 */
 ice_alloc_vsi_res_exit:
-	ice_free_fltr_list(dev, &tmp_add_list);
 	return status;
 }
 
@@ -2807,7 +2803,7 @@ ice_vc_add_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi, u8 *mac_addr)
 		return -EPERM;
 	}
 
-	status = ice_vsi_cfg_mac_fltr(vsi, mac_addr, true);
+	status = ice_fltr_add_mac(vsi, mac_addr, ICE_FWD_TO_VSI);
 	if (status == ICE_ERR_ALREADY_EXISTS) {
 		dev_err(dev, "MAC %pM already exists for VF %d\n", mac_addr,
 			vf->vf_id);
@@ -2844,7 +2840,7 @@ ice_vc_del_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi, u8 *mac_addr)
 	    ether_addr_equal(mac_addr, vf->dflt_lan_addr.addr))
 		return 0;
 
-	status = ice_vsi_cfg_mac_fltr(vsi, mac_addr, false);
+	status = ice_fltr_remove_mac(vsi, mac_addr, ICE_FWD_TO_VSI);
 	if (status == ICE_ERR_DOES_NOT_EXIST) {
 		dev_err(dev, "MAC %pM does not exist for VF %d\n", mac_addr,
 			vf->vf_id);
@@ -3088,7 +3084,7 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
 		/* add VLAN 0 filter back when transitioning from port VLAN to
 		 * no port VLAN. No change to old port VLAN on failure.
 		 */
-		ret = ice_vsi_add_vlan(vsi, 0);
+		ret = ice_vsi_add_vlan(vsi, 0, ICE_FWD_TO_VSI);
 		if (ret)
 			return ret;
 		ret = ice_vsi_manage_pvid(vsi, 0, false);
@@ -3101,7 +3097,7 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
 			 vlan_id, qos, vf_id);
 
 		/* add VLAN filter for the port VLAN */
-		ret = ice_vsi_add_vlan(vsi, vlan_id);
+		ret = ice_vsi_add_vlan(vsi, vlan_id, ICE_FWD_TO_VSI);
 		if (ret)
 			return ret;
 	}
@@ -3222,7 +3218,7 @@ static int ice_vc_process_vlan_msg(struct ice_vf *vf, u8 *msg, bool add_v)
 			if (!vid)
 				continue;
 
-			status = ice_vsi_add_vlan(vsi, vid);
+			status = ice_vsi_add_vlan(vsi, vid, ICE_FWD_TO_VSI);
 			if (status) {
 				v_ret = VIRTCHNL_STATUS_ERR_PARAM;
 				goto error_param;
-- 
cgit v1.2.3-59-g8ed1b


From 757976ab16be0d07e316998182d82ea7fcf561e2 Mon Sep 17 00:00:00 2001
From: Lihong Yang <lihong.yang@intel.com>
Date: Thu, 7 May 2020 17:41:09 -0700
Subject: ice: Fix check for removing/adding mac filters

In function ice_set_mac_address, we will remove old dev_addr before
adding the new MAC. In the removing and adding process of the MAC,
there is no need to return error if the check finds the to-be-removed
dev_addr does not exist in the MAC filter list or the to-be-added mac
already exists, keep going or return success accordingly.

Signed-off-by: Lihong Yang <lihong.yang@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_main.c | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 7fee3e4b39eb..6ac3e5540119 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -3707,19 +3707,24 @@ static int ice_set_mac_address(struct net_device *netdev, void *pi)
 		return -EBUSY;
 	}
 
-	/* Clean up old MAC filter before changing the MAC address */
+	/* Clean up old MAC filter. Not an error if old filter doesn't exist */
 	status = ice_fltr_remove_mac(vsi, netdev->dev_addr, ICE_FWD_TO_VSI);
-	if (status) {
+	if (status && status != ICE_ERR_DOES_NOT_EXIST) {
 		err = -EADDRNOTAVAIL;
 		goto err_update_filters;
 	}
 
+	/* Add filter for new MAC. If filter exists, just return success */
 	status = ice_fltr_add_mac(vsi, mac, ICE_FWD_TO_VSI);
-	if (status) {
-		err = -EADDRNOTAVAIL;
-		goto err_update_filters;
+	if (status == ICE_ERR_ALREADY_EXISTS) {
+		netdev_dbg(netdev, "filter for MAC %pM already exists\n", mac);
+		return 0;
 	}
 
+	/* error if the new filter addition failed */
+	if (status)
+		err = -EADDRNOTAVAIL;
+
 err_update_filters:
 	if (err) {
 		netdev_err(netdev, "can't set MAC %pM. filter update failed\n",
-- 
cgit v1.2.3-59-g8ed1b


From 92ace4824c1cf0cf2094e51501e3c48176a4f2a2 Mon Sep 17 00:00:00 2001
From: Bruce Allan <bruce.w.allan@intel.com>
Date: Thu, 7 May 2020 17:41:10 -0700
Subject: ice: remove unnecessary expression that is always true

The else conditional expression is always true due to the if conditional
expression; remove it and add a comment to make it obvious still.

Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_main.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 6ac3e5540119..fffb3433969c 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -346,7 +346,8 @@ static int ice_vsi_sync_fltr(struct ice_vsi *vsi)
 				vsi->current_netdev_flags &= ~IFF_ALLMULTI;
 				goto out_promisc;
 			}
-		} else if (!(vsi->current_netdev_flags & IFF_ALLMULTI)) {
+		} else {
+			/* !(vsi->current_netdev_flags & IFF_ALLMULTI) */
 			if (vsi->vlan_ena)
 				promisc_m = ICE_MCAST_VLAN_PROMISC_BITS;
 			else
-- 
cgit v1.2.3-59-g8ed1b


From 86a2e00d20bd4f7fd0efd1cccd8d5a5d7270b640 Mon Sep 17 00:00:00 2001
From: Bruce Allan <bruce.w.allan@intel.com>
Date: Thu, 7 May 2020 17:41:11 -0700
Subject: ice: remove unnecessary check

The variable status cannot be zero due to a prior check of it; remove this
check.

Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index fffb3433969c..c3e5c4334e26 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -149,7 +149,7 @@ static int ice_init_mac_fltr(struct ice_pf *pf)
 	/* We aren't useful with no MAC filters, so unregister if we
 	 * had an error
 	 */
-	if (status && vsi->netdev->reg_state == NETREG_REGISTERED) {
+	if (vsi->netdev->reg_state == NETREG_REGISTERED) {
 		dev_err(ice_pf_to_dev(pf), "Could not add MAC filters error %s. Unregistering device\n",
 			ice_stat_str(status));
 		unregister_netdev(vsi->netdev);
-- 
cgit v1.2.3-59-g8ed1b


From c522d1f686752d9f85cedc3e3bb7423c5abd03e5 Mon Sep 17 00:00:00 2001
From: Bruce Allan <bruce.w.allan@intel.com>
Date: Thu, 7 May 2020 17:41:12 -0700
Subject: ice: remove unnecessary backslash

Self-explanatory.

Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_adminq_cmd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index 8767a78038e0..979e9c6254af 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -541,7 +541,7 @@ struct ice_sw_rule_lkup_rx_tx {
 #define ICE_SINGLE_ACT_OTHER_ACTS		0x3
 #define ICE_SINGLE_OTHER_ACT_IDENTIFIER_S	17
 #define ICE_SINGLE_OTHER_ACT_IDENTIFIER_M	\
-				(0x3 << \ ICE_SINGLE_OTHER_ACT_IDENTIFIER_S)
+				(0x3 << ICE_SINGLE_OTHER_ACT_IDENTIFIER_S)
 
 	/* Bit 17:18 - Defines other actions */
 	/* Other action = 0 - Mirror VSI */
-- 
cgit v1.2.3-59-g8ed1b


From 5757cc7c8b71d5114aca0034cee42d22e6977d19 Mon Sep 17 00:00:00 2001
From: Tony Nguyen <anthony.l.nguyen@intel.com>
Date: Thu, 7 May 2020 17:41:13 -0700
Subject: ice: Rename build_ctob to ice_build_ctob

To make the function easier to identify as being part of the ice driver,
prepend ice to the function name.

Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_txrx.c     | 11 ++++++-----
 drivers/net/ethernet/intel/ice/ice_txrx_lib.c |  4 ++--
 drivers/net/ethernet/intel/ice/ice_txrx_lib.h |  2 +-
 drivers/net/ethernet/intel/ice/ice_xsk.c      |  4 ++--
 4 files changed, 11 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 05d1077f80c3..0d90e32efab9 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -1680,7 +1680,8 @@ ice_tx_map(struct ice_ring *tx_ring, struct ice_tx_buf *first,
 		 */
 		while (unlikely(size > ICE_MAX_DATA_PER_TXD)) {
 			tx_desc->cmd_type_offset_bsz =
-				build_ctob(td_cmd, td_offset, max_data, td_tag);
+				ice_build_ctob(td_cmd, td_offset, max_data,
+					       td_tag);
 
 			tx_desc++;
 			i++;
@@ -1700,8 +1701,8 @@ ice_tx_map(struct ice_ring *tx_ring, struct ice_tx_buf *first,
 		if (likely(!data_len))
 			break;
 
-		tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset,
-							  size, td_tag);
+		tx_desc->cmd_type_offset_bsz = ice_build_ctob(td_cmd, td_offset,
+							      size, td_tag);
 
 		tx_desc++;
 		i++;
@@ -1732,8 +1733,8 @@ ice_tx_map(struct ice_ring *tx_ring, struct ice_tx_buf *first,
 
 	/* write last descriptor with RS and EOP bits */
 	td_cmd |= (u64)ICE_TXD_LAST_DESC_CMD;
-	tx_desc->cmd_type_offset_bsz = build_ctob(td_cmd, td_offset, size,
-						  td_tag);
+	tx_desc->cmd_type_offset_bsz =
+			ice_build_ctob(td_cmd, td_offset, size, td_tag);
 
 	/* Force memory writes to complete before letting h/w know there
 	 * are new descriptors to fetch.
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
index 9d6512f96b8c..1ba97172d8d0 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
@@ -232,8 +232,8 @@ int ice_xmit_xdp_ring(void *data, u16 size, struct ice_ring *xdp_ring)
 
 	tx_desc = ICE_TX_DESC(xdp_ring, i);
 	tx_desc->buf_addr = cpu_to_le64(dma);
-	tx_desc->cmd_type_offset_bsz = build_ctob(ICE_TXD_LAST_DESC_CMD, 0,
-						  size, 0);
+	tx_desc->cmd_type_offset_bsz = ice_build_ctob(ICE_TXD_LAST_DESC_CMD, 0,
+						      size, 0);
 
 	/* Make certain all of the status bits have been updated
 	 * before next_to_watch is written.
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
index af0fca5b91ff..58ff58f0f972 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.h
@@ -22,7 +22,7 @@ ice_test_staterr(union ice_32b_rx_flex_desc *rx_desc, const u16 stat_err_bits)
 }
 
 static inline __le64
-build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag)
+ice_build_ctob(u64 td_cmd, u64 td_offset, unsigned int size, u64 td_tag)
 {
 	return cpu_to_le64(ICE_TX_DESC_DTYPE_DATA |
 			   (td_cmd    << ICE_TXD_QW1_CMD_S) |
diff --git a/drivers/net/ethernet/intel/ice/ice_xsk.c b/drivers/net/ethernet/intel/ice/ice_xsk.c
index 23e5515d4527..20ac54e3156d 100644
--- a/drivers/net/ethernet/intel/ice/ice_xsk.c
+++ b/drivers/net/ethernet/intel/ice/ice_xsk.c
@@ -988,8 +988,8 @@ static bool ice_xmit_zc(struct ice_ring *xdp_ring, int budget)
 
 		tx_desc = ICE_TX_DESC(xdp_ring, xdp_ring->next_to_use);
 		tx_desc->buf_addr = cpu_to_le64(dma);
-		tx_desc->cmd_type_offset_bsz = build_ctob(ICE_TXD_LAST_DESC_CMD,
-							  0, desc.len, 0);
+		tx_desc->cmd_type_offset_bsz =
+			ice_build_ctob(ICE_TXD_LAST_DESC_CMD, 0, desc.len, 0);
 
 		xdp_ring->next_to_use++;
 		if (xdp_ring->next_to_use == xdp_ring->count)
-- 
cgit v1.2.3-59-g8ed1b


From 2092c910e2399c9e13b199c07421133681b9eaff Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Wed, 6 May 2020 22:13:30 +0200
Subject: batman-adv: Revert "Drop lockdep.h include for soft-interface.c"

The commit 1a33e10e4a95 ("net: partially revert dynamic lockdep key
changes") reverts the commit ab92d68fc22f ("net: core: add generic lockdep
keys"). But it forgot to also revert the commit 5759af0682b3 ("batman-adv:
Drop lockdep.h include for soft-interface.c") which depends on the latter.

Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/soft-interface.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/batman-adv/soft-interface.c b/net/batman-adv/soft-interface.c
index 822af540b854..0ddd80130ea3 100644
--- a/net/batman-adv/soft-interface.c
+++ b/net/batman-adv/soft-interface.c
@@ -22,6 +22,7 @@
 #include <linux/kernel.h>
 #include <linux/kref.h>
 #include <linux/list.h>
+#include <linux/lockdep.h>
 #include <linux/netdevice.h>
 #include <linux/netlink.h>
 #include <linux/percpu.h>
-- 
cgit v1.2.3-59-g8ed1b


From cf78bb0bbcef3fbe1abf118f14b81dad36eaa94e Mon Sep 17 00:00:00 2001
From: Antonio Quartulli <a@unstable.cc>
Date: Wed, 20 May 2020 10:41:40 +0200
Subject: batman-adv: use rcu_replace_pointer() where appropriate

In commit a63fc6b75cca ("rcu: Upgrade rcu_swap_protected() to
rcu_replace_pointer()") a new helper macro named rcu_replace_pointer() was
introduced to simplify code requiring to switch an rcu pointer to a new
value while extracting the old one.

Use rcu_replace_pointer() where appropriate to make code slimer.

Signed-off-by: Antonio Quartulli <a@unstable.cc>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/gateway_client.c | 4 ++--
 net/batman-adv/hard-interface.c | 4 ++--
 net/batman-adv/routing.c        | 4 ++--
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/net/batman-adv/gateway_client.c b/net/batman-adv/gateway_client.c
index e22e49289677..a18dcc686dc3 100644
--- a/net/batman-adv/gateway_client.c
+++ b/net/batman-adv/gateway_client.c
@@ -146,8 +146,8 @@ static void batadv_gw_select(struct batadv_priv *bat_priv,
 	if (new_gw_node)
 		kref_get(&new_gw_node->refcount);
 
-	curr_gw_node = rcu_dereference_protected(bat_priv->gw.curr_gw, 1);
-	rcu_assign_pointer(bat_priv->gw.curr_gw, new_gw_node);
+	curr_gw_node = rcu_replace_pointer(bat_priv->gw.curr_gw, new_gw_node,
+					   true);
 
 	if (curr_gw_node)
 		batadv_gw_node_put(curr_gw_node);
diff --git a/net/batman-adv/hard-interface.c b/net/batman-adv/hard-interface.c
index c7e98a40dd33..3a256af92784 100644
--- a/net/batman-adv/hard-interface.c
+++ b/net/batman-adv/hard-interface.c
@@ -473,8 +473,8 @@ static void batadv_primary_if_select(struct batadv_priv *bat_priv,
 	if (new_hard_iface)
 		kref_get(&new_hard_iface->refcount);
 
-	curr_hard_iface = rcu_dereference_protected(bat_priv->primary_if, 1);
-	rcu_assign_pointer(bat_priv->primary_if, new_hard_iface);
+	curr_hard_iface = rcu_replace_pointer(bat_priv->primary_if,
+					      new_hard_iface, 1);
 
 	if (!new_hard_iface)
 		goto out;
diff --git a/net/batman-adv/routing.c b/net/batman-adv/routing.c
index 3632bd976c56..d343382e9664 100644
--- a/net/batman-adv/routing.c
+++ b/net/batman-adv/routing.c
@@ -71,13 +71,13 @@ static void _batadv_update_route(struct batadv_priv *bat_priv,
 	 * the code needs to ensure the curr_router variable contains a pointer
 	 * to the replaced best neighbor.
 	 */
-	curr_router = rcu_dereference_protected(orig_ifinfo->router, true);
 
 	/* increase refcount of new best neighbor */
 	if (neigh_node)
 		kref_get(&neigh_node->refcount);
 
-	rcu_assign_pointer(orig_ifinfo->router, neigh_node);
+	curr_router = rcu_replace_pointer(orig_ifinfo->router, neigh_node,
+					  true);
 	spin_unlock_bh(&orig_node->neigh_list_lock);
 	batadv_orig_ifinfo_put(orig_ifinfo);
 
-- 
cgit v1.2.3-59-g8ed1b


From 472f0a240250df443ffc4f39835e829916193ca1 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Fri, 22 May 2020 11:45:33 +0800
Subject: mt76: mt7915: Fix build error
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

In file included from ./include/linux/firmware.h:6:0,
                 from drivers/net/wireless/mediatek/mt76/mt7915/mcu.c:4:
In function ‘__mt7915_mcu_msg_send’,
    inlined from ‘mt7915_mcu_send_message’ at drivers/net/wireless/mediatek/mt76/mt7915/mcu.c:370:6:
./include/linux/compiler.h:396:38: error: call to ‘__compiletime_assert_545’ declared with attribute error: BUILD_BUG_ON failed: cmd == MCU_EXT_CMD_EFUSE_ACCESS && mcu_txd->set_query != MCU_Q_QUERY
  _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
                                      ^
./include/linux/compiler.h:377:4: note: in definition of macro ‘__compiletime_assert’
    prefix ## suffix();    \
    ^~~~~~
./include/linux/compiler.h:396:2: note: in expansion of macro ‘_compiletime_assert’
  _compiletime_assert(condition, msg, __compiletime_assert_, __COUNTER__)
  ^~~~~~~~~~~~~~~~~~~
./include/linux/build_bug.h:39:37: note: in expansion of macro ‘compiletime_assert’
 #define BUILD_BUG_ON_MSG(cond, msg) compiletime_assert(!(cond), msg)
                                     ^~~~~~~~~~~~~~~~~~
./include/linux/build_bug.h:50:2: note: in expansion of macro ‘BUILD_BUG_ON_MSG’
  BUILD_BUG_ON_MSG(condition, "BUILD_BUG_ON failed: " #condition)
  ^~~~~~~~~~~~~~~~
drivers/net/wireless/mediatek/mt76/mt7915/mcu.c:280:2: note: in expansion of macro ‘BUILD_BUG_ON’
  BUILD_BUG_ON(cmd == MCU_EXT_CMD_EFUSE_ACCESS &&
  ^~~~~~~~~~~~

BUILD_BUG_ON is meaningless here, chang it to WARN_ON.

Fixes: e57b7901469f ("mt76: add mac80211 driver for MT7915 PCIe-based chipsets")
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200522034533.61716-1-yuehaibing@huawei.com
---
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index f00ad2b66761..99eeea42478f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -277,8 +277,8 @@ static int __mt7915_mcu_msg_send(struct mt7915_dev *dev, struct sk_buff *skb,
 	}
 
 	mcu_txd->s2d_index = MCU_S2D_H2N;
-	BUILD_BUG_ON(cmd == MCU_EXT_CMD_EFUSE_ACCESS &&
-		     mcu_txd->set_query != MCU_Q_QUERY);
+	WARN_ON(cmd == MCU_EXT_CMD_EFUSE_ACCESS &&
+		mcu_txd->set_query != MCU_Q_QUERY);
 
 exit:
 	if (wait_seq)
-- 
cgit v1.2.3-59-g8ed1b


From 38428d68719c454d269cb03b776d8a4b0ad66111 Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Thu, 21 May 2020 22:26:13 -0700
Subject: nexthop: support for fdb ecmp nexthops

This patch introduces ecmp nexthops and nexthop groups
for mac fdb entries. In subsequent patches this is used
by the vxlan driver fdb entries. The use case is
E-VPN multihoming [1,2,3] which requires bridged vxlan traffic
to be load balanced to remote switches (vteps) belonging to
the same multi-homed ethernet segment (This is analogous to
a multi-homed LAG but over vxlan).

Changes include new nexthop flag NHA_FDB for nexthops
referenced by fdb entries. These nexthops only have ip.
This patch includes appropriate checks to avoid routes
referencing such nexthops.

example:
$ip nexthop add id 12 via 172.16.1.2 fdb
$ip nexthop add id 13 via 172.16.1.3 fdb
$ip nexthop add id 102 group 12/13 fdb

$bridge fdb add 02:02:00:00:00:13 dev vxlan1000 nhid 101 self

[1] E-VPN https://tools.ietf.org/html/rfc7432
[2] E-VPN VxLAN: https://tools.ietf.org/html/rfc8365
[3] LPC talk with mention of nexthop groups for L2 ecmp
http://vger.kernel.org/lpc_net2018_talks/scaling_bridge_fdb_database_slidesV3.pdf

v4 - fixed uninitialized variable reported by kernel test robot
Reported-by: kernel test robot <rong.a.chen@intel.com>

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip6_fib.h        |   1 +
 include/net/nexthop.h        |  32 +++++++++++
 include/uapi/linux/nexthop.h |   3 +
 net/ipv4/nexthop.c           | 132 +++++++++++++++++++++++++++++++++++--------
 net/ipv6/route.c             |   5 ++
 5 files changed, 148 insertions(+), 25 deletions(-)

diff --git a/include/net/ip6_fib.h b/include/net/ip6_fib.h
index fdaf975e3331..3f615a29766e 100644
--- a/include/net/ip6_fib.h
+++ b/include/net/ip6_fib.h
@@ -65,6 +65,7 @@ struct fib6_config {
 	struct nl_info	fc_nlinfo;
 	struct nlattr	*fc_encap;
 	u16		fc_encap_type;
+	bool		fc_is_fdb;
 };
 
 struct fib6_node {
diff --git a/include/net/nexthop.h b/include/net/nexthop.h
index c440ccc861fc..d929c98931ad 100644
--- a/include/net/nexthop.h
+++ b/include/net/nexthop.h
@@ -26,6 +26,7 @@ struct nh_config {
 	u8		nh_family;
 	u8		nh_protocol;
 	u8		nh_blackhole;
+	u8		nh_fdb;
 	u32		nh_flags;
 
 	int		nh_ifindex;
@@ -52,6 +53,7 @@ struct nh_info {
 
 	u8			family;
 	bool			reject_nh;
+	bool			fdb_nh;
 
 	union {
 		struct fib_nh_common	fib_nhc;
@@ -80,6 +82,7 @@ struct nexthop {
 	struct rb_node		rb_node;    /* entry on netns rbtree */
 	struct list_head	fi_list;    /* v4 entries using nh */
 	struct list_head	f6i_list;   /* v6 entries using nh */
+	struct list_head        fdb_list;   /* fdb entries using this nh */
 	struct list_head	grp_list;   /* nh group entries using this nh */
 	struct net		*net;
 
@@ -88,6 +91,7 @@ struct nexthop {
 	u8			protocol;   /* app managing this nh */
 	u8			nh_flags;
 	bool			is_group;
+	bool			is_fdb_nh;
 
 	refcount_t		refcnt;
 	struct rcu_head		rcu;
@@ -304,4 +308,32 @@ static inline void nexthop_path_fib6_result(struct fib6_result *res, int hash)
 int nexthop_for_each_fib6_nh(struct nexthop *nh,
 			     int (*cb)(struct fib6_nh *nh, void *arg),
 			     void *arg);
+
+static inline int nexthop_get_family(struct nexthop *nh)
+{
+	struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info);
+
+	return nhi->family;
+}
+
+static inline
+struct fib_nh_common *nexthop_fdb_nhc(struct nexthop *nh)
+{
+	struct nh_info *nhi = rcu_dereference_rtnl(nh->nh_info);
+
+	return &nhi->fib_nhc;
+}
+
+static inline struct fib_nh_common *nexthop_path_fdb_result(struct nexthop *nh,
+							    int hash)
+{
+	struct nh_info *nhi;
+	struct nexthop *nhp;
+
+	nhp = nexthop_select_path(nh, hash);
+	if (unlikely(!nhp))
+		return NULL;
+	nhi = rcu_dereference(nhp->nh_info);
+	return &nhi->fib_nhc;
+}
 #endif
diff --git a/include/uapi/linux/nexthop.h b/include/uapi/linux/nexthop.h
index 7b61867e9848..2d4a1e784cf0 100644
--- a/include/uapi/linux/nexthop.h
+++ b/include/uapi/linux/nexthop.h
@@ -49,6 +49,9 @@ enum {
 	NHA_GROUPS,	/* flag; only return nexthop groups in dump */
 	NHA_MASTER,	/* u32;  only return nexthops with given master dev */
 
+	NHA_FDB,	/* flag; nexthop belongs to a bridge fdb */
+	/* if NHA_FDB is added, OIF, BLACKHOLE, ENCAP cannot be set */
+
 	__NHA_MAX,
 };
 
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 3957364d556c..bf91edc04631 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -33,6 +33,7 @@ static const struct nla_policy rtm_nh_policy[NHA_MAX + 1] = {
 	[NHA_ENCAP]		= { .type = NLA_NESTED },
 	[NHA_GROUPS]		= { .type = NLA_FLAG },
 	[NHA_MASTER]		= { .type = NLA_U32 },
+	[NHA_FDB]		= { .type = NLA_FLAG },
 };
 
 static unsigned int nh_dev_hashfn(unsigned int val)
@@ -107,6 +108,7 @@ static struct nexthop *nexthop_alloc(void)
 		INIT_LIST_HEAD(&nh->fi_list);
 		INIT_LIST_HEAD(&nh->f6i_list);
 		INIT_LIST_HEAD(&nh->grp_list);
+		INIT_LIST_HEAD(&nh->fdb_list);
 	}
 	return nh;
 }
@@ -227,6 +229,9 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh,
 	if (nla_put_u32(skb, NHA_ID, nh->id))
 		goto nla_put_failure;
 
+	if (nh->is_fdb_nh && nla_put_flag(skb, NHA_FDB))
+		goto nla_put_failure;
+
 	if (nh->is_group) {
 		struct nh_group *nhg = rtnl_dereference(nh->nh_grp);
 
@@ -241,7 +246,7 @@ static int nh_fill_node(struct sk_buff *skb, struct nexthop *nh,
 		if (nla_put_flag(skb, NHA_BLACKHOLE))
 			goto nla_put_failure;
 		goto out;
-	} else {
+	} else if (!nh->is_fdb_nh) {
 		const struct net_device *dev;
 
 		dev = nhi->fib_nhc.nhc_dev;
@@ -387,12 +392,35 @@ static bool valid_group_nh(struct nexthop *nh, unsigned int npaths,
 	return true;
 }
 
+static int nh_check_attr_fdb_group(struct nexthop *nh, u8 *nh_family,
+				   struct netlink_ext_ack *extack)
+{
+	struct nh_info *nhi;
+
+	if (!nh->is_fdb_nh) {
+		NL_SET_ERR_MSG(extack, "FDB nexthop group can only have fdb nexthops");
+		return -EINVAL;
+	}
+
+	nhi = rtnl_dereference(nh->nh_info);
+	if (*nh_family == AF_UNSPEC) {
+		*nh_family = nhi->family;
+	} else if (*nh_family != nhi->family) {
+		NL_SET_ERR_MSG(extack, "FDB nexthop group cannot have mixed family nexthops");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int nh_check_attr_group(struct net *net, struct nlattr *tb[],
 			       struct netlink_ext_ack *extack)
 {
 	unsigned int len = nla_len(tb[NHA_GROUP]);
+	u8 nh_family = AF_UNSPEC;
 	struct nexthop_grp *nhg;
 	unsigned int i, j;
+	u8 nhg_fdb = 0;
 
 	if (len & (sizeof(struct nexthop_grp) - 1)) {
 		NL_SET_ERR_MSG(extack,
@@ -421,6 +449,8 @@ static int nh_check_attr_group(struct net *net, struct nlattr *tb[],
 		}
 	}
 
+	if (tb[NHA_FDB])
+		nhg_fdb = 1;
 	nhg = nla_data(tb[NHA_GROUP]);
 	for (i = 0; i < len; ++i) {
 		struct nexthop *nh;
@@ -432,11 +462,20 @@ static int nh_check_attr_group(struct net *net, struct nlattr *tb[],
 		}
 		if (!valid_group_nh(nh, len, extack))
 			return -EINVAL;
+
+		if (nhg_fdb && nh_check_attr_fdb_group(nh, &nh_family, extack))
+			return -EINVAL;
+
+		if (!nhg_fdb && nh->is_fdb_nh) {
+			NL_SET_ERR_MSG(extack, "Non FDB nexthop group cannot have fdb nexthops");
+			return -EINVAL;
+		}
 	}
 	for (i = NHA_GROUP + 1; i < __NHA_MAX; ++i) {
 		if (!tb[i])
 			continue;
-
+		if (tb[NHA_FDB])
+			continue;
 		NL_SET_ERR_MSG(extack,
 			       "No other attributes can be set in nexthop groups");
 		return -EINVAL;
@@ -495,6 +534,9 @@ struct nexthop *nexthop_select_path(struct nexthop *nh, int hash)
 		if (hash > atomic_read(&nhge->upper_bound))
 			continue;
 
+		if (nhge->nh->is_fdb_nh)
+			return nhge->nh;
+
 		/* nexthops always check if it is good and does
 		 * not rely on a sysctl for this behavior
 		 */
@@ -564,6 +606,11 @@ int fib6_check_nexthop(struct nexthop *nh, struct fib6_config *cfg,
 {
 	struct nh_info *nhi;
 
+	if (nh->is_fdb_nh) {
+		NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop");
+		return -EINVAL;
+	}
+
 	/* fib6_src is unique to a fib6_info and limits the ability to cache
 	 * routes in fib6_nh within a nexthop that is potentially shared
 	 * across multiple fib entries. If the config wants to use source
@@ -640,6 +687,12 @@ int fib_check_nexthop(struct nexthop *nh, u8 scope,
 {
 	int err = 0;
 
+	if (nh->is_fdb_nh) {
+		NL_SET_ERR_MSG(extack, "Route cannot point to a fdb nexthop");
+		err = -EINVAL;
+		goto out;
+	}
+
 	if (nh->is_group) {
 		struct nh_group *nhg;
 
@@ -1125,6 +1178,9 @@ static struct nexthop *nexthop_create_group(struct net *net,
 		nh_group_rebalance(nhg);
 	}
 
+	if (cfg->nh_fdb)
+		nh->is_fdb_nh = 1;
+
 	rcu_assign_pointer(nh->nh_grp, nhg);
 
 	return nh;
@@ -1152,7 +1208,7 @@ static int nh_create_ipv4(struct net *net, struct nexthop *nh,
 		.fc_encap = cfg->nh_encap,
 		.fc_encap_type = cfg->nh_encap_type,
 	};
-	u32 tb_id = l3mdev_fib_table(cfg->dev);
+	u32 tb_id = (cfg->dev ? l3mdev_fib_table(cfg->dev) : RT_TABLE_MAIN);
 	int err;
 
 	err = fib_nh_init(net, fib_nh, &fib_cfg, 1, extack);
@@ -1161,6 +1217,9 @@ static int nh_create_ipv4(struct net *net, struct nexthop *nh,
 		goto out;
 	}
 
+	if (nh->is_fdb_nh)
+		goto out;
+
 	/* sets nh_dev if successful */
 	err = fib_check_nh(net, fib_nh, tb_id, 0, extack);
 	if (!err) {
@@ -1186,6 +1245,7 @@ static int nh_create_ipv6(struct net *net,  struct nexthop *nh,
 		.fc_flags = cfg->nh_flags,
 		.fc_encap = cfg->nh_encap,
 		.fc_encap_type = cfg->nh_encap_type,
+		.fc_is_fdb = cfg->nh_fdb,
 	};
 	int err;
 
@@ -1227,6 +1287,9 @@ static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg,
 	nhi->family = cfg->nh_family;
 	nhi->fib_nhc.nhc_scope = RT_SCOPE_LINK;
 
+	if (cfg->nh_fdb)
+		nh->is_fdb_nh = 1;
+
 	if (cfg->nh_blackhole) {
 		nhi->reject_nh = 1;
 		cfg->nh_ifindex = net->loopback_dev->ifindex;
@@ -1248,7 +1311,8 @@ static struct nexthop *nexthop_create(struct net *net, struct nh_config *cfg,
 	}
 
 	/* add the entry to the device based hash */
-	nexthop_devhash_add(net, nhi);
+	if (!nh->is_fdb_nh)
+		nexthop_devhash_add(net, nhi);
 
 	rcu_assign_pointer(nh->nh_info, nhi);
 
@@ -1352,6 +1416,19 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
 	if (tb[NHA_ID])
 		cfg->nh_id = nla_get_u32(tb[NHA_ID]);
 
+	if (tb[NHA_FDB]) {
+		if (tb[NHA_OIF] || tb[NHA_BLACKHOLE] ||
+		    tb[NHA_ENCAP]   || tb[NHA_ENCAP_TYPE]) {
+			NL_SET_ERR_MSG(extack, "Fdb attribute can not be used with encap, oif or blackhole");
+			goto out;
+		}
+		if (nhm->nh_flags) {
+			NL_SET_ERR_MSG(extack, "Unsupported nexthop flags in ancillary header");
+			goto out;
+		}
+		cfg->nh_fdb = nla_get_flag(tb[NHA_FDB]);
+	}
+
 	if (tb[NHA_GROUP]) {
 		if (nhm->nh_family != AF_UNSPEC) {
 			NL_SET_ERR_MSG(extack, "Invalid family for group");
@@ -1375,8 +1452,8 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
 
 	if (tb[NHA_BLACKHOLE]) {
 		if (tb[NHA_GATEWAY] || tb[NHA_OIF] ||
-		    tb[NHA_ENCAP]   || tb[NHA_ENCAP_TYPE]) {
-			NL_SET_ERR_MSG(extack, "Blackhole attribute can not be used with gateway or oif");
+		    tb[NHA_ENCAP]   || tb[NHA_ENCAP_TYPE] || tb[NHA_FDB]) {
+			NL_SET_ERR_MSG(extack, "Blackhole attribute can not be used with gateway, oif, encap or fdb");
 			goto out;
 		}
 
@@ -1385,26 +1462,28 @@ static int rtm_to_nh_config(struct net *net, struct sk_buff *skb,
 		goto out;
 	}
 
-	if (!tb[NHA_OIF]) {
-		NL_SET_ERR_MSG(extack, "Device attribute required for non-blackhole nexthops");
+	if (!cfg->nh_fdb && !tb[NHA_OIF]) {
+		NL_SET_ERR_MSG(extack, "Device attribute required for non-blackhole and non-fdb nexthops");
 		goto out;
 	}
 
-	cfg->nh_ifindex = nla_get_u32(tb[NHA_OIF]);
-	if (cfg->nh_ifindex)
-		cfg->dev = __dev_get_by_index(net, cfg->nh_ifindex);
+	if (!cfg->nh_fdb && tb[NHA_OIF]) {
+		cfg->nh_ifindex = nla_get_u32(tb[NHA_OIF]);
+		if (cfg->nh_ifindex)
+			cfg->dev = __dev_get_by_index(net, cfg->nh_ifindex);
 
-	if (!cfg->dev) {
-		NL_SET_ERR_MSG(extack, "Invalid device index");
-		goto out;
-	} else if (!(cfg->dev->flags & IFF_UP)) {
-		NL_SET_ERR_MSG(extack, "Nexthop device is not up");
-		err = -ENETDOWN;
-		goto out;
-	} else if (!netif_carrier_ok(cfg->dev)) {
-		NL_SET_ERR_MSG(extack, "Carrier for nexthop device is down");
-		err = -ENETDOWN;
-		goto out;
+		if (!cfg->dev) {
+			NL_SET_ERR_MSG(extack, "Invalid device index");
+			goto out;
+		} else if (!(cfg->dev->flags & IFF_UP)) {
+			NL_SET_ERR_MSG(extack, "Nexthop device is not up");
+			err = -ENETDOWN;
+			goto out;
+		} else if (!netif_carrier_ok(cfg->dev)) {
+			NL_SET_ERR_MSG(extack, "Carrier for nexthop device is down");
+			err = -ENETDOWN;
+			goto out;
+		}
 	}
 
 	err = -EINVAL;
@@ -1633,7 +1712,7 @@ static bool nh_dump_filtered(struct nexthop *nh, int dev_idx, int master_idx,
 
 static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx,
 			     int *master_idx, bool *group_filter,
-			     struct netlink_callback *cb)
+			     bool *fdb_filter, struct netlink_callback *cb)
 {
 	struct netlink_ext_ack *extack = cb->extack;
 	struct nlattr *tb[NHA_MAX + 1];
@@ -1670,6 +1749,9 @@ static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx,
 		case NHA_GROUPS:
 			*group_filter = true;
 			break;
+		case NHA_FDB:
+			*fdb_filter = true;
+			break;
 		default:
 			NL_SET_ERR_MSG(extack, "Unsupported attribute in dump request");
 			return -EINVAL;
@@ -1688,17 +1770,17 @@ static int nh_valid_dump_req(const struct nlmsghdr *nlh, int *dev_idx,
 /* rtnl */
 static int rtm_dump_nexthop(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	bool group_filter = false, fdb_filter = false;
 	struct nhmsg *nhm = nlmsg_data(cb->nlh);
 	int dev_filter_idx = 0, master_idx = 0;
 	struct net *net = sock_net(skb->sk);
 	struct rb_root *root = &net->nexthop.rb_root;
-	bool group_filter = false;
 	struct rb_node *node;
 	int idx = 0, s_idx;
 	int err;
 
 	err = nh_valid_dump_req(cb->nlh, &dev_filter_idx, &master_idx,
-				&group_filter, cb);
+				&group_filter, &fdb_filter, cb);
 	if (err < 0)
 		return err;
 
diff --git a/net/ipv6/route.c b/net/ipv6/route.c
index a52ec1b86432..82cbb46a2a4f 100644
--- a/net/ipv6/route.c
+++ b/net/ipv6/route.c
@@ -3421,6 +3421,11 @@ int fib6_nh_init(struct net *net, struct fib6_nh *fib6_nh,
 #ifdef CONFIG_IPV6_ROUTER_PREF
 	fib6_nh->last_probe = jiffies;
 #endif
+	if (cfg->fc_is_fdb) {
+		fib6_nh->fib_nh_gw6 = cfg->fc_gateway;
+		fib6_nh->fib_nh_gw_family = AF_INET6;
+		return 0;
+	}
 
 	err = -ENODEV;
 	if (cfg->fc_ifindex) {
-- 
cgit v1.2.3-59-g8ed1b


From 1274e1cc42264d4e629841e4f182795cb0becfd2 Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Thu, 21 May 2020 22:26:14 -0700
Subject: vxlan: ecmp support for mac fdb entries

Todays vxlan mac fdb entries can point to multiple remote
ips (rdsts) with the sole purpose of replicating
broadcast-multicast and unknown unicast packets to those remote ips.

E-VPN multihoming [1,2,3] requires bridged vxlan traffic to be
load balanced to remote switches (vteps) belonging to the
same multi-homed ethernet segment (E-VPN multihoming is analogous
to multi-homed LAG implementations, but with the inter-switch
peerlink replaced with a vxlan tunnel). In other words it needs
support for mac ecmp. Furthermore, for faster convergence, E-VPN
multihoming needs the ability to update fdb ecmp nexthops independent
of the fdb entries.

New route nexthop API is perfect for this usecase.
This patch extends the vxlan fdb code to take a nexthop id
pointing to an ecmp nexthop group.

Changes include:
- New NDA_NH_ID attribute for fdbs
- Use the newly added fdb nexthop groups
- makes vxlan rdsts and nexthop handling code mutually
  exclusive
- since this is a new use-case and the requirement is for ecmp
nexthop groups, the fdb add and update path checks that the
nexthop is really an ecmp nexthop group. This check can be relaxed
in the future, if we want to introduce replication fdb nexthop groups
and allow its use in lieu of current rdst lists.
- fdb update requests with nexthop id's only allowed for existing
fdb's that have nexthop id's
- learning will not override an existing fdb entry with nexthop
group
- I have wrapped the switchdev offload code around the presence of
rdst

[1] E-VPN RFC https://tools.ietf.org/html/rfc7432
[2] E-VPN with vxlan https://tools.ietf.org/html/rfc8365
[3] http://vger.kernel.org/lpc_net2018_talks/scaling_bridge_fdb_database_slidesV3.pdf

Includes a null check fix in vxlan_xmit from Nikolay

v2 - Fixed build issue:
Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c            | 306 +++++++++++++++++++++++++++++++++--------
 include/net/vxlan.h            |  25 ++++
 include/uapi/linux/neighbour.h |   1 +
 net/core/neighbour.c           |   2 +
 4 files changed, 275 insertions(+), 59 deletions(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index a5b415fed11e..754e00240eea 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -26,6 +26,7 @@
 #include <net/netns/generic.h>
 #include <net/tun_proto.h>
 #include <net/vxlan.h>
+#include <net/nexthop.h>
 
 #if IS_ENABLED(CONFIG_IPV6)
 #include <net/ip6_tunnel.h>
@@ -78,6 +79,8 @@ struct vxlan_fdb {
 	u16		  state;	/* see ndm_state */
 	__be32		  vni;
 	u16		  flags;	/* see ndm_flags and below */
+	struct list_head  nh_list;
+	struct nexthop __rcu *nh;
 };
 
 #define NTF_VXLAN_ADDED_BY_USER 0x100
@@ -174,11 +177,15 @@ static inline struct hlist_head *vs_head(struct net *net, __be16 port)
  */
 static inline struct vxlan_rdst *first_remote_rcu(struct vxlan_fdb *fdb)
 {
+	if (rcu_access_pointer(fdb->nh))
+		return NULL;
 	return list_entry_rcu(fdb->remotes.next, struct vxlan_rdst, list);
 }
 
 static inline struct vxlan_rdst *first_remote_rtnl(struct vxlan_fdb *fdb)
 {
+	if (rcu_access_pointer(fdb->nh))
+		return NULL;
 	return list_first_entry(&fdb->remotes, struct vxlan_rdst, list);
 }
 
@@ -251,9 +258,10 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
 {
 	unsigned long now = jiffies;
 	struct nda_cacheinfo ci;
+	bool send_ip, send_eth;
 	struct nlmsghdr *nlh;
+	struct nexthop *nh;
 	struct ndmsg *ndm;
-	bool send_ip, send_eth;
 
 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags);
 	if (nlh == NULL)
@@ -264,16 +272,21 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
 
 	send_eth = send_ip = true;
 
+	nh = rcu_dereference_rtnl(fdb->nh);
 	if (type == RTM_GETNEIGH) {
-		send_ip = !vxlan_addr_any(&rdst->remote_ip);
+		if (rdst) {
+			send_ip = !vxlan_addr_any(&rdst->remote_ip);
+			ndm->ndm_family = send_ip ? rdst->remote_ip.sa.sa_family : AF_INET;
+		} else if (nh) {
+			ndm->ndm_family = nexthop_get_family(nh);
+		}
 		send_eth = !is_zero_ether_addr(fdb->eth_addr);
-		ndm->ndm_family = send_ip ? rdst->remote_ip.sa.sa_family : AF_INET;
 	} else
 		ndm->ndm_family	= AF_BRIDGE;
 	ndm->ndm_state = fdb->state;
 	ndm->ndm_ifindex = vxlan->dev->ifindex;
 	ndm->ndm_flags = fdb->flags;
-	if (rdst->offloaded)
+	if (rdst && rdst->offloaded)
 		ndm->ndm_flags |= NTF_OFFLOADED;
 	ndm->ndm_type = RTN_UNICAST;
 
@@ -284,23 +297,30 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
 
 	if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr))
 		goto nla_put_failure;
+	if (nh) {
+		if (nla_put_u32(skb, NDA_NH_ID, nh->id))
+			goto nla_put_failure;
+	} else if (rdst) {
+		if (send_ip && vxlan_nla_put_addr(skb, NDA_DST,
+						  &rdst->remote_ip))
+			goto nla_put_failure;
+
+		if (rdst->remote_port &&
+		    rdst->remote_port != vxlan->cfg.dst_port &&
+		    nla_put_be16(skb, NDA_PORT, rdst->remote_port))
+			goto nla_put_failure;
+		if (rdst->remote_vni != vxlan->default_dst.remote_vni &&
+		    nla_put_u32(skb, NDA_VNI, be32_to_cpu(rdst->remote_vni)))
+			goto nla_put_failure;
+		if (rdst->remote_ifindex &&
+		    nla_put_u32(skb, NDA_IFINDEX, rdst->remote_ifindex))
+			goto nla_put_failure;
+	}
 
-	if (send_ip && vxlan_nla_put_addr(skb, NDA_DST, &rdst->remote_ip))
-		goto nla_put_failure;
-
-	if (rdst->remote_port && rdst->remote_port != vxlan->cfg.dst_port &&
-	    nla_put_be16(skb, NDA_PORT, rdst->remote_port))
-		goto nla_put_failure;
-	if (rdst->remote_vni != vxlan->default_dst.remote_vni &&
-	    nla_put_u32(skb, NDA_VNI, be32_to_cpu(rdst->remote_vni)))
-		goto nla_put_failure;
 	if ((vxlan->cfg.flags & VXLAN_F_COLLECT_METADATA) && fdb->vni &&
 	    nla_put_u32(skb, NDA_SRC_VNI,
 			be32_to_cpu(fdb->vni)))
 		goto nla_put_failure;
-	if (rdst->remote_ifindex &&
-	    nla_put_u32(skb, NDA_IFINDEX, rdst->remote_ifindex))
-		goto nla_put_failure;
 
 	ci.ndm_used	 = jiffies_to_clock_t(now - fdb->used);
 	ci.ndm_confirmed = 0;
@@ -401,7 +421,7 @@ static int vxlan_fdb_notify(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb,
 {
 	int err;
 
-	if (swdev_notify) {
+	if (swdev_notify && rd) {
 		switch (type) {
 		case RTM_NEWNEIGH:
 			err = vxlan_fdb_switchdev_call_notifiers(vxlan, fdb, rd,
@@ -805,6 +825,8 @@ static struct vxlan_fdb *vxlan_fdb_alloc(const u8 *mac, __u16 state,
 	f->flags = ndm_flags;
 	f->updated = f->used = jiffies;
 	f->vni = src_vni;
+	f->nh = NULL;
+	INIT_LIST_HEAD(&f->nh_list);
 	INIT_LIST_HEAD(&f->remotes);
 	memcpy(f->eth_addr, mac, ETH_ALEN);
 
@@ -819,11 +841,78 @@ static void vxlan_fdb_insert(struct vxlan_dev *vxlan, const u8 *mac,
 			   vxlan_fdb_head(vxlan, mac, src_vni));
 }
 
+static int vxlan_fdb_nh_update(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb,
+			       u32 nhid, struct netlink_ext_ack *extack)
+{
+	struct nexthop *old_nh = rtnl_dereference(fdb->nh);
+	struct nh_group *nhg;
+	struct nexthop *nh;
+	int err = -EINVAL;
+
+	if (old_nh && old_nh->id == nhid)
+		return 0;
+
+	nh = nexthop_find_by_id(vxlan->net, nhid);
+	if (!nh) {
+		NL_SET_ERR_MSG(extack, "Nexthop id does not exist");
+		goto err_inval;
+	}
+
+	if (nh) {
+		if (!nexthop_get(nh)) {
+			NL_SET_ERR_MSG(extack, "Nexthop has been deleted");
+			nh = NULL;
+			goto err_inval;
+		}
+		if (!nh->is_fdb_nh) {
+			NL_SET_ERR_MSG(extack, "Nexthop is not a fdb nexthop");
+			goto err_inval;
+		}
+
+		if (!nh->is_group || !nh->nh_grp->mpath) {
+			NL_SET_ERR_MSG(extack, "Nexthop is not a multipath group");
+			goto err_inval;
+		}
+
+		/* check nexthop group family */
+		nhg = rtnl_dereference(nh->nh_grp);
+		switch (vxlan->default_dst.remote_ip.sa.sa_family) {
+		case AF_INET:
+			if (!nhg->has_v4) {
+				err = -EAFNOSUPPORT;
+				NL_SET_ERR_MSG(extack, "Nexthop group family not supported");
+				goto err_inval;
+			}
+			break;
+		case AF_INET6:
+			if (nhg->has_v4) {
+				err = -EAFNOSUPPORT;
+				NL_SET_ERR_MSG(extack, "Nexthop group family not supported");
+				goto err_inval;
+			}
+		}
+	}
+
+	if (old_nh) {
+		list_del_rcu(&fdb->nh_list);
+		nexthop_put(old_nh);
+	}
+	rcu_assign_pointer(fdb->nh, nh);
+	list_add_tail_rcu(&fdb->nh_list, &nh->fdb_list);
+	return 1;
+
+err_inval:
+	if (nh)
+		nexthop_put(nh);
+	return err;
+}
+
 static int vxlan_fdb_create(struct vxlan_dev *vxlan,
 			    const u8 *mac, union vxlan_addr *ip,
 			    __u16 state, __be16 port, __be32 src_vni,
 			    __be32 vni, __u32 ifindex, __u16 ndm_flags,
-			    struct vxlan_fdb **fdb)
+			    u32 nhid, struct vxlan_fdb **fdb,
+			    struct netlink_ext_ack *extack)
 {
 	struct vxlan_rdst *rd = NULL;
 	struct vxlan_fdb *f;
@@ -838,20 +927,33 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
 	if (!f)
 		return -ENOMEM;
 
-	rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
-	if (rc < 0) {
-		kfree(f);
-		return rc;
-	}
+	if (nhid)
+		rc = vxlan_fdb_nh_update(vxlan, f, nhid, extack);
+	else
+		rc = vxlan_fdb_append(f, ip, port, vni, ifindex, &rd);
+	if (rc < 0)
+		goto errout;
 
 	*fdb = f;
 
 	return 0;
+
+errout:
+	kfree(f);
+	return rc;
 }
 
 static void __vxlan_fdb_free(struct vxlan_fdb *f)
 {
 	struct vxlan_rdst *rd, *nd;
+	struct nexthop *nh;
+
+	nh = rcu_dereference_raw(f->nh);
+	if (nh) {
+		rcu_assign_pointer(f->nh, NULL);
+		list_del_rcu(&f->nh_list);
+		nexthop_put(nh);
+	}
 
 	list_for_each_entry_safe(rd, nd, &f->remotes, list) {
 		dst_cache_destroy(&rd->dst_cache);
@@ -875,10 +977,15 @@ static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f,
 	netdev_dbg(vxlan->dev, "delete %pM\n", f->eth_addr);
 
 	--vxlan->addrcnt;
-	if (do_notify)
-		list_for_each_entry(rd, &f->remotes, list)
-			vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH,
+	if (do_notify) {
+		if (rcu_access_pointer(f->nh))
+			vxlan_fdb_notify(vxlan, f, NULL, RTM_DELNEIGH,
 					 swdev_notify, NULL);
+		else
+			list_for_each_entry(rd, &f->remotes, list)
+				vxlan_fdb_notify(vxlan, f, rd, RTM_DELNEIGH,
+						 swdev_notify, NULL);
+	}
 
 	hlist_del_rcu(&f->hlist);
 	call_rcu(&f->rcu, vxlan_fdb_free);
@@ -897,7 +1004,7 @@ static int vxlan_fdb_update_existing(struct vxlan_dev *vxlan,
 				     __u16 state, __u16 flags,
 				     __be16 port, __be32 vni,
 				     __u32 ifindex, __u16 ndm_flags,
-				     struct vxlan_fdb *f,
+				     struct vxlan_fdb *f, u32 nhid,
 				     bool swdev_notify,
 				     struct netlink_ext_ack *extack)
 {
@@ -908,6 +1015,18 @@ static int vxlan_fdb_update_existing(struct vxlan_dev *vxlan,
 	int rc = 0;
 	int err;
 
+	if (nhid && !rcu_access_pointer(f->nh)) {
+		NL_SET_ERR_MSG(extack,
+			       "Cannot replace an existing non nexthop fdb with a nexthop");
+		return -EOPNOTSUPP;
+	}
+
+	if (nhid && (flags & NLM_F_APPEND)) {
+		NL_SET_ERR_MSG(extack,
+			       "Cannot append to a nexthop fdb");
+		return -EOPNOTSUPP;
+	}
+
 	/* Do not allow an externally learned entry to take over an entry added
 	 * by the user.
 	 */
@@ -929,10 +1048,17 @@ static int vxlan_fdb_update_existing(struct vxlan_dev *vxlan,
 		/* Only change unicasts */
 		if (!(is_multicast_ether_addr(f->eth_addr) ||
 		      is_zero_ether_addr(f->eth_addr))) {
-			rc = vxlan_fdb_replace(f, ip, port, vni,
-					       ifindex, &oldrd);
+			if (nhid) {
+				rc = vxlan_fdb_nh_update(vxlan, f, nhid, extack);
+				if (rc < 0)
+					return rc;
+			} else {
+				rc = vxlan_fdb_replace(f, ip, port, vni,
+						       ifindex, &oldrd);
+			}
 			notify |= rc;
 		} else {
+			NL_SET_ERR_MSG(extack, "Cannot replace non-unicast fdb entries");
 			return -EOPNOTSUPP;
 		}
 	}
@@ -962,6 +1088,8 @@ static int vxlan_fdb_update_existing(struct vxlan_dev *vxlan,
 	return 0;
 
 err_notify:
+	if (nhid)
+		return err;
 	if ((flags & NLM_F_REPLACE) && rc)
 		*rd = oldrd;
 	else if ((flags & NLM_F_APPEND) && rc) {
@@ -975,7 +1103,7 @@ static int vxlan_fdb_update_create(struct vxlan_dev *vxlan,
 				   const u8 *mac, union vxlan_addr *ip,
 				   __u16 state, __u16 flags,
 				   __be16 port, __be32 src_vni, __be32 vni,
-				   __u32 ifindex, __u16 ndm_flags,
+				   __u32 ifindex, __u16 ndm_flags, u32 nhid,
 				   bool swdev_notify,
 				   struct netlink_ext_ack *extack)
 {
@@ -990,7 +1118,7 @@ static int vxlan_fdb_update_create(struct vxlan_dev *vxlan,
 
 	netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip);
 	rc = vxlan_fdb_create(vxlan, mac, ip, state, port, src_vni,
-			      vni, ifindex, fdb_flags, &f);
+			      vni, ifindex, fdb_flags, nhid, &f, extack);
 	if (rc < 0)
 		return rc;
 
@@ -1012,7 +1140,7 @@ static int vxlan_fdb_update(struct vxlan_dev *vxlan,
 			    const u8 *mac, union vxlan_addr *ip,
 			    __u16 state, __u16 flags,
 			    __be16 port, __be32 src_vni, __be32 vni,
-			    __u32 ifindex, __u16 ndm_flags,
+			    __u32 ifindex, __u16 ndm_flags, u32 nhid,
 			    bool swdev_notify,
 			    struct netlink_ext_ack *extack)
 {
@@ -1028,14 +1156,15 @@ static int vxlan_fdb_update(struct vxlan_dev *vxlan,
 
 		return vxlan_fdb_update_existing(vxlan, ip, state, flags, port,
 						 vni, ifindex, ndm_flags, f,
-						 swdev_notify, extack);
+						 nhid, swdev_notify, extack);
 	} else {
 		if (!(flags & NLM_F_CREATE))
 			return -ENOENT;
 
 		return vxlan_fdb_update_create(vxlan, mac, ip, state, flags,
 					       port, src_vni, vni, ifindex,
-					       ndm_flags, swdev_notify, extack);
+					       ndm_flags, nhid, swdev_notify,
+					       extack);
 	}
 }
 
@@ -1049,7 +1178,7 @@ static void vxlan_fdb_dst_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f,
 
 static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan,
 			   union vxlan_addr *ip, __be16 *port, __be32 *src_vni,
-			   __be32 *vni, u32 *ifindex)
+			   __be32 *vni, u32 *ifindex, u32 *nhid)
 {
 	struct net *net = dev_net(vxlan->dev);
 	int err;
@@ -1109,6 +1238,11 @@ static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan,
 		*ifindex = 0;
 	}
 
+	if (tb[NDA_NH_ID])
+		*nhid = nla_get_u32(tb[NDA_NH_ID]);
+	else
+		*nhid = 0;
+
 	return 0;
 }
 
@@ -1123,7 +1257,7 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 	union vxlan_addr ip;
 	__be16 port;
 	__be32 src_vni, vni;
-	u32 ifindex;
+	u32 ifindex, nhid;
 	u32 hash_index;
 	int err;
 
@@ -1133,10 +1267,11 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 		return -EINVAL;
 	}
 
-	if (tb[NDA_DST] == NULL)
+	if (!tb || (!tb[NDA_DST] && !tb[NDA_NH_ID]))
 		return -EINVAL;
 
-	err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &src_vni, &vni, &ifindex);
+	err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &src_vni, &vni, &ifindex,
+			      &nhid);
 	if (err)
 		return err;
 
@@ -1148,7 +1283,7 @@ static int vxlan_fdb_add(struct ndmsg *ndm, struct nlattr *tb[],
 	err = vxlan_fdb_update(vxlan, addr, &ip, ndm->ndm_state, flags,
 			       port, src_vni, vni, ifindex,
 			       ndm->ndm_flags | NTF_VXLAN_ADDED_BY_USER,
-			       true, extack);
+			       nhid, true, extack);
 	spin_unlock_bh(&vxlan->hash_lock[hash_index]);
 
 	return err;
@@ -1159,8 +1294,8 @@ static int __vxlan_fdb_delete(struct vxlan_dev *vxlan,
 			      __be16 port, __be32 src_vni, __be32 vni,
 			      u32 ifindex, bool swdev_notify)
 {
-	struct vxlan_fdb *f;
 	struct vxlan_rdst *rd = NULL;
+	struct vxlan_fdb *f;
 	int err = -ENOENT;
 
 	f = vxlan_find_mac(vxlan, addr, src_vni);
@@ -1195,12 +1330,13 @@ static int vxlan_fdb_delete(struct ndmsg *ndm, struct nlattr *tb[],
 	struct vxlan_dev *vxlan = netdev_priv(dev);
 	union vxlan_addr ip;
 	__be32 src_vni, vni;
-	__be16 port;
-	u32 ifindex;
+	u32 ifindex, nhid;
 	u32 hash_index;
+	__be16 port;
 	int err;
 
-	err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &src_vni, &vni, &ifindex);
+	err = vxlan_fdb_parse(tb, vxlan, &ip, &port, &src_vni, &vni, &ifindex,
+			      &nhid);
 	if (err)
 		return err;
 
@@ -1228,6 +1364,17 @@ static int vxlan_fdb_dump(struct sk_buff *skb, struct netlink_callback *cb,
 		hlist_for_each_entry_rcu(f, &vxlan->fdb_head[h], hlist) {
 			struct vxlan_rdst *rd;
 
+			if (rcu_access_pointer(f->nh)) {
+				err = vxlan_fdb_info(skb, vxlan, f,
+						     NETLINK_CB(cb->skb).portid,
+						     cb->nlh->nlmsg_seq,
+						     RTM_NEWNEIGH,
+						     NLM_F_MULTI, NULL);
+				if (err < 0)
+					goto out;
+				continue;
+			}
+
 			list_for_each_entry_rcu(rd, &f->remotes, list) {
 				if (*idx < cb->args[2])
 					goto skip;
@@ -1311,6 +1458,10 @@ static bool vxlan_snoop(struct net_device *dev,
 		if (f->state & (NUD_PERMANENT | NUD_NOARP))
 			return true;
 
+		/* Don't override an fdb with nexthop with a learnt entry */
+		if (rcu_access_pointer(f->nh))
+			return true;
+
 		if (net_ratelimit())
 			netdev_info(dev,
 				    "%pM migrated from %pIS to %pIS\n",
@@ -1333,7 +1484,7 @@ static bool vxlan_snoop(struct net_device *dev,
 					 vxlan->cfg.dst_port,
 					 vni,
 					 vxlan->default_dst.remote_vni,
-					 ifindex, NTF_SELF, true, NULL);
+					 ifindex, NTF_SELF, 0, true, NULL);
 		spin_unlock(&vxlan->hash_lock[hash_index]);
 	}
 
@@ -2616,6 +2767,38 @@ tx_error:
 	kfree_skb(skb);
 }
 
+static void vxlan_xmit_nh(struct sk_buff *skb, struct net_device *dev,
+			  struct vxlan_fdb *f, __be32 vni, bool did_rsc)
+{
+	struct vxlan_rdst nh_rdst;
+	struct nexthop *nh;
+	bool do_xmit;
+	u32 hash;
+
+	memset(&nh_rdst, 0, sizeof(struct vxlan_rdst));
+	hash = skb_get_hash(skb);
+
+	rcu_read_lock();
+	nh = rcu_dereference(f->nh);
+	if (!nh) {
+		rcu_read_unlock();
+		goto drop;
+	}
+	do_xmit = vxlan_fdb_nh_path_select(nh, hash, &nh_rdst);
+	rcu_read_unlock();
+
+	if (likely(do_xmit))
+		vxlan_xmit_one(skb, dev, vni, &nh_rdst, did_rsc);
+	else
+		goto drop;
+
+	return;
+
+drop:
+	dev->stats.tx_dropped++;
+	dev_kfree_skb(skb);
+}
+
 /* Transmit local packets over Vxlan
  *
  * Outer IP header inherits ECN and DF from inner header.
@@ -2692,22 +2875,27 @@ static netdev_tx_t vxlan_xmit(struct sk_buff *skb, struct net_device *dev)
 		}
 	}
 
-	list_for_each_entry_rcu(rdst, &f->remotes, list) {
-		struct sk_buff *skb1;
+	if (rcu_access_pointer(f->nh)) {
+		vxlan_xmit_nh(skb, dev, f,
+			      (vni ? : vxlan->default_dst.remote_vni), did_rsc);
+	} else {
+		list_for_each_entry_rcu(rdst, &f->remotes, list) {
+			struct sk_buff *skb1;
 
-		if (!fdst) {
-			fdst = rdst;
-			continue;
+			if (!fdst) {
+				fdst = rdst;
+				continue;
+			}
+			skb1 = skb_clone(skb, GFP_ATOMIC);
+			if (skb1)
+				vxlan_xmit_one(skb1, dev, vni, rdst, did_rsc);
 		}
-		skb1 = skb_clone(skb, GFP_ATOMIC);
-		if (skb1)
-			vxlan_xmit_one(skb1, dev, vni, rdst, did_rsc);
+		if (fdst)
+			vxlan_xmit_one(skb, dev, vni, fdst, did_rsc);
+		else
+			kfree_skb(skb);
 	}
 
-	if (fdst)
-		vxlan_xmit_one(skb, dev, vni, fdst, did_rsc);
-	else
-		kfree_skb(skb);
 	return NETDEV_TX_OK;
 }
 
@@ -3615,7 +3803,7 @@ static int __vxlan_dev_create(struct net *net, struct net_device *dev,
 				       dst->remote_vni,
 				       dst->remote_vni,
 				       dst->remote_ifindex,
-				       NTF_SELF, &f);
+				       NTF_SELF, 0, &f, extack);
 		if (err)
 			return err;
 	}
@@ -4013,7 +4201,7 @@ static int vxlan_changelink(struct net_device *dev, struct nlattr *tb[],
 					       vxlan->cfg.dst_port,
 					       conf.vni, conf.vni,
 					       conf.remote_ifindex,
-					       NTF_SELF, true, extack);
+					       NTF_SELF, 0, true, extack);
 			if (err) {
 				spin_unlock_bh(&vxlan->hash_lock[hash_index]);
 				netdev_adjacent_change_abort(dst->remote_dev,
@@ -4335,7 +4523,7 @@ vxlan_fdb_external_learn_add(struct net_device *dev,
 			       fdb_info->remote_vni,
 			       fdb_info->remote_ifindex,
 			       NTF_USE | NTF_SELF | NTF_EXT_LEARNED,
-			       false, extack);
+			       0, false, extack);
 	spin_unlock_bh(&vxlan->hash_lock[hash_index]);
 
 	return err;
diff --git a/include/net/vxlan.h b/include/net/vxlan.h
index 373aadcfea21..3a41627cbdfe 100644
--- a/include/net/vxlan.h
+++ b/include/net/vxlan.h
@@ -7,6 +7,7 @@
 #include <net/dst_metadata.h>
 #include <net/rtnetlink.h>
 #include <net/switchdev.h>
+#include <net/nexthop.h>
 
 #define IANA_VXLAN_UDP_PORT     4789
 
@@ -487,4 +488,28 @@ static inline void vxlan_flag_attr_error(int attrtype,
 #undef VXLAN_FLAG
 }
 
+static inline bool vxlan_fdb_nh_path_select(struct nexthop *nh,
+					    int hash,
+					    struct vxlan_rdst *rdst)
+{
+	struct fib_nh_common *nhc;
+
+	nhc = nexthop_path_fdb_result(nh, hash);
+	if (unlikely(!nhc))
+		return false;
+
+	switch (nhc->nhc_gw_family) {
+	case AF_INET:
+		rdst->remote_ip.sin.sin_addr.s_addr = nhc->nhc_gw.ipv4;
+		rdst->remote_ip.sa.sa_family = AF_INET;
+		break;
+	case AF_INET6:
+		rdst->remote_ip.sin6.sin6_addr = nhc->nhc_gw.ipv6;
+		rdst->remote_ip.sa.sa_family = AF_INET6;
+		break;
+	}
+
+	return true;
+}
+
 #endif
diff --git a/include/uapi/linux/neighbour.h b/include/uapi/linux/neighbour.h
index cd144e3099a3..eefcda8ca44e 100644
--- a/include/uapi/linux/neighbour.h
+++ b/include/uapi/linux/neighbour.h
@@ -29,6 +29,7 @@ enum {
 	NDA_LINK_NETNSID,
 	NDA_SRC_VNI,
 	NDA_PROTOCOL,  /* Originator of entry */
+	NDA_NH_ID,
 	__NDA_MAX
 };
 
diff --git a/net/core/neighbour.c b/net/core/neighbour.c
index b607ea602774..37e4dba62460 100644
--- a/net/core/neighbour.c
+++ b/net/core/neighbour.c
@@ -1771,6 +1771,7 @@ static struct neigh_table *neigh_find_table(int family)
 }
 
 const struct nla_policy nda_policy[NDA_MAX+1] = {
+	[NDA_UNSPEC]		= { .strict_start_type = NDA_NH_ID },
 	[NDA_DST]		= { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
 	[NDA_LLADDR]		= { .type = NLA_BINARY, .len = MAX_ADDR_LEN },
 	[NDA_CACHEINFO]		= { .len = sizeof(struct nda_cacheinfo) },
@@ -1781,6 +1782,7 @@ const struct nla_policy nda_policy[NDA_MAX+1] = {
 	[NDA_IFINDEX]		= { .type = NLA_U32 },
 	[NDA_MASTER]		= { .type = NLA_U32 },
 	[NDA_PROTOCOL]		= { .type = NLA_U8 },
+	[NDA_NH_ID]		= { .type = NLA_U32 },
 };
 
 static int neigh_delete(struct sk_buff *skb, struct nlmsghdr *nlh,
-- 
cgit v1.2.3-59-g8ed1b


From 8590ceedb70181ad9de5a3dc2cfe50ca33a9576a Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Thu, 21 May 2020 22:26:15 -0700
Subject: nexthop: add support for notifiers

This patch adds nexthop add/del notifiers. To be used by
vxlan driver in a later patch. Could possibly be used by
switchdev drivers in the future.

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netns/nexthop.h |  1 +
 include/net/nexthop.h       | 12 ++++++++++++
 net/ipv4/nexthop.c          | 27 +++++++++++++++++++++++++++
 3 files changed, 40 insertions(+)

diff --git a/include/net/netns/nexthop.h b/include/net/netns/nexthop.h
index c712ee5eebd9..1937476c94a0 100644
--- a/include/net/netns/nexthop.h
+++ b/include/net/netns/nexthop.h
@@ -14,5 +14,6 @@ struct netns_nexthop {
 
 	unsigned int		seq;		/* protected by rtnl_mutex */
 	u32			last_id_allocated;
+	struct atomic_notifier_head notifier_chain;
 };
 #endif
diff --git a/include/net/nexthop.h b/include/net/nexthop.h
index d929c98931ad..4c951680f6f9 100644
--- a/include/net/nexthop.h
+++ b/include/net/nexthop.h
@@ -10,6 +10,7 @@
 #define __LINUX_NEXTHOP_H
 
 #include <linux/netdevice.h>
+#include <linux/notifier.h>
 #include <linux/route.h>
 #include <linux/types.h>
 #include <net/ip_fib.h>
@@ -102,6 +103,17 @@ struct nexthop {
 	};
 };
 
+enum nexthop_event_type {
+	NEXTHOP_EVENT_ADD,
+	NEXTHOP_EVENT_DEL
+};
+
+int call_nexthop_notifier(struct notifier_block *nb, struct net *net,
+			  enum nexthop_event_type event_type,
+			  struct nexthop *nh);
+int register_nexthop_notifier(struct net *net, struct notifier_block *nb);
+int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb);
+
 /* caller is holding rcu or rtnl; no reference taken to nexthop */
 struct nexthop *nexthop_find_by_id(struct net *net, u32 id);
 void nexthop_free_rcu(struct rcu_head *head);
diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index bf91edc04631..c337e73e02dd 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -36,6 +36,17 @@ static const struct nla_policy rtm_nh_policy[NHA_MAX + 1] = {
 	[NHA_FDB]		= { .type = NLA_FLAG },
 };
 
+static int call_nexthop_notifiers(struct net *net,
+				  enum fib_event_type event_type,
+				  struct nexthop *nh)
+{
+	int err;
+
+	err = atomic_notifier_call_chain(&net->nexthop.notifier_chain,
+					 event_type, nh);
+	return notifier_to_errno(err);
+}
+
 static unsigned int nh_dev_hashfn(unsigned int val)
 {
 	unsigned int mask = NH_DEV_HASHSIZE - 1;
@@ -826,6 +837,8 @@ static void __remove_nexthop_fib(struct net *net, struct nexthop *nh)
 	bool do_flush = false;
 	struct fib_info *fi;
 
+	call_nexthop_notifiers(net, NEXTHOP_EVENT_DEL, nh);
+
 	list_for_each_entry(fi, &nh->fi_list, nh_list) {
 		fi->fib_flags |= RTNH_F_DEAD;
 		do_flush = true;
@@ -1865,6 +1878,19 @@ static struct notifier_block nh_netdev_notifier = {
 	.notifier_call = nh_netdev_event,
 };
 
+int register_nexthop_notifier(struct net *net, struct notifier_block *nb)
+{
+	return atomic_notifier_chain_register(&net->nexthop.notifier_chain, nb);
+}
+EXPORT_SYMBOL(register_nexthop_notifier);
+
+int unregister_nexthop_notifier(struct net *net, struct notifier_block *nb)
+{
+	return atomic_notifier_chain_unregister(&net->nexthop.notifier_chain,
+						nb);
+}
+EXPORT_SYMBOL(unregister_nexthop_notifier);
+
 static void __net_exit nexthop_net_exit(struct net *net)
 {
 	rtnl_lock();
@@ -1881,6 +1907,7 @@ static int __net_init nexthop_net_init(struct net *net)
 	net->nexthop.devhash = kzalloc(sz, GFP_KERNEL);
 	if (!net->nexthop.devhash)
 		return -ENOMEM;
+	ATOMIC_INIT_NOTIFIER_HEAD(&net->nexthop.notifier_chain);
 
 	return 0;
 }
-- 
cgit v1.2.3-59-g8ed1b


From c7cdbe2efc40b366be748cd5786279ac395de181 Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Thu, 21 May 2020 22:26:16 -0700
Subject: vxlan: support for nexthop notifiers

vxlan driver registers for nexthop add/del notifiers to
cleanup fdb entries pointing to such nexthops.

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c | 33 +++++++++++++++++++++++++++++----
 1 file changed, 29 insertions(+), 4 deletions(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 754e00240eea..3e88fbef2d4a 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -81,6 +81,7 @@ struct vxlan_fdb {
 	u16		  flags;	/* see ndm_flags and below */
 	struct list_head  nh_list;
 	struct nexthop __rcu *nh;
+	struct vxlan_dev  *vdev;
 };
 
 #define NTF_VXLAN_ADDED_BY_USER 0x100
@@ -813,8 +814,9 @@ static int vxlan_gro_complete(struct sock *sk, struct sk_buff *skb, int nhoff)
 	return eth_gro_complete(skb, nhoff + sizeof(struct vxlanhdr));
 }
 
-static struct vxlan_fdb *vxlan_fdb_alloc(const u8 *mac, __u16 state,
-					 __be32 src_vni, __u16 ndm_flags)
+static struct vxlan_fdb *vxlan_fdb_alloc(struct vxlan_dev *vxlan, const u8 *mac,
+					 __u16 state, __be32 src_vni,
+					 __u16 ndm_flags)
 {
 	struct vxlan_fdb *f;
 
@@ -826,6 +828,7 @@ static struct vxlan_fdb *vxlan_fdb_alloc(const u8 *mac, __u16 state,
 	f->updated = f->used = jiffies;
 	f->vni = src_vni;
 	f->nh = NULL;
+	f->vdev = vxlan;
 	INIT_LIST_HEAD(&f->nh_list);
 	INIT_LIST_HEAD(&f->remotes);
 	memcpy(f->eth_addr, mac, ETH_ALEN);
@@ -923,7 +926,7 @@ static int vxlan_fdb_create(struct vxlan_dev *vxlan,
 		return -ENOSPC;
 
 	netdev_dbg(vxlan->dev, "add %pM -> %pIS\n", mac, ip);
-	f = vxlan_fdb_alloc(mac, state, src_vni, ndm_flags);
+	f = vxlan_fdb_alloc(vxlan, mac, state, src_vni, ndm_flags);
 	if (!f)
 		return -ENOMEM;
 
@@ -988,6 +991,7 @@ static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f,
 	}
 
 	hlist_del_rcu(&f->hlist);
+	f->vdev = NULL;
 	call_rcu(&f->rcu, vxlan_fdb_free);
 }
 
@@ -4598,6 +4602,25 @@ static struct notifier_block vxlan_switchdev_notifier_block __read_mostly = {
 	.notifier_call = vxlan_switchdev_event,
 };
 
+static int vxlan_nexthop_event(struct notifier_block *nb,
+			       unsigned long event, void *ptr)
+{
+	struct nexthop *nh = ptr;
+	struct vxlan_fdb *fdb, *tmp;
+
+	if (!nh || event != NEXTHOP_EVENT_DEL)
+		return NOTIFY_DONE;
+
+	list_for_each_entry_safe(fdb, tmp, &nh->fdb_list, nh_list)
+		vxlan_fdb_destroy(fdb->vdev, fdb, false, false);
+
+	return NOTIFY_DONE;
+}
+
+static struct notifier_block vxlan_nexthop_notifier_block __read_mostly = {
+	.notifier_call = vxlan_nexthop_event,
+};
+
 static __net_init int vxlan_init_net(struct net *net)
 {
 	struct vxlan_net *vn = net_generic(net, vxlan_net_id);
@@ -4609,7 +4632,7 @@ static __net_init int vxlan_init_net(struct net *net)
 	for (h = 0; h < PORT_HASH_SIZE; ++h)
 		INIT_HLIST_HEAD(&vn->sock_list[h]);
 
-	return 0;
+	return register_nexthop_notifier(net, &vxlan_nexthop_notifier_block);
 }
 
 static void vxlan_destroy_tunnels(struct net *net, struct list_head *head)
@@ -4641,6 +4664,8 @@ static void __net_exit vxlan_exit_batch_net(struct list_head *net_list)
 	LIST_HEAD(list);
 
 	rtnl_lock();
+	list_for_each_entry(net, net_list, exit_list)
+		unregister_nexthop_notifier(net, &vxlan_nexthop_notifier_block);
 	list_for_each_entry(net, net_list, exit_list)
 		vxlan_destroy_tunnels(net, &list);
 
-- 
cgit v1.2.3-59-g8ed1b


From 0534c5489c11cbda0bd2d9719a121a0f90433905 Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Thu, 21 May 2020 22:26:17 -0700
Subject: selftests: net: add fdb nexthop tests

This commit adds ipv4 and ipv6 fdb nexthop api tests to fib_nexthops.sh.

Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/fib_nexthops.sh | 160 +++++++++++++++++++++++++++-
 1 file changed, 158 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index 50d822face36..51f8e9afe6ae 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -19,8 +19,8 @@ ret=0
 ksft_skip=4
 
 # all tests in this script. Can be overridden with -t option
-IPV4_TESTS="ipv4_fcnal ipv4_grp_fcnal ipv4_withv6_fcnal ipv4_fcnal_runtime ipv4_compat_mode"
-IPV6_TESTS="ipv6_fcnal ipv6_grp_fcnal ipv6_fcnal_runtime ipv6_compat_mode"
+IPV4_TESTS="ipv4_fcnal ipv4_grp_fcnal ipv4_withv6_fcnal ipv4_fcnal_runtime ipv4_compat_mode ipv4_fdb_grp_fcnal"
+IPV6_TESTS="ipv6_fcnal ipv6_grp_fcnal ipv6_fcnal_runtime ipv6_compat_mode ipv6_fdb_grp_fcnal"
 
 ALL_TESTS="basic ${IPV4_TESTS} ${IPV6_TESTS}"
 TESTS="${ALL_TESTS}"
@@ -146,6 +146,7 @@ setup()
 	create_ns remote
 
 	IP="ip -netns me"
+	BRIDGE="bridge -netns me"
 	set -e
 	$IP li add veth1 type veth peer name veth2
 	$IP li set veth1 up
@@ -280,6 +281,161 @@ stop_ip_monitor()
 	return $rc
 }
 
+check_nexthop_fdb_support()
+{
+	$IP nexthop help 2>&1 | grep -q fdb
+	if [ $? -ne 0 ]; then
+		echo "SKIP: iproute2 too old, missing fdb nexthop support"
+		return $ksft_skip
+	fi
+}
+
+ipv6_fdb_grp_fcnal()
+{
+	local rc
+
+	echo
+	echo "IPv6 fdb groups functional"
+	echo "--------------------------"
+
+	check_nexthop_fdb_support
+	if [ $? -eq $ksft_skip ]; then
+		return $ksft_skip
+	fi
+
+	# create group with multiple nexthops
+	run_cmd "$IP nexthop add id 61 via 2001:db8:91::2 fdb"
+	run_cmd "$IP nexthop add id 62 via 2001:db8:91::3 fdb"
+	run_cmd "$IP nexthop add id 102 group 61/62 fdb"
+	check_nexthop "id 102" "id 102 group 61/62 fdb"
+	log_test $? 0 "Fdb Nexthop group with multiple nexthops"
+
+	## get nexthop group
+	run_cmd "$IP nexthop get id 102"
+	check_nexthop "id 102" "id 102 group 61/62 fdb"
+	log_test $? 0 "Get Fdb nexthop group by id"
+
+	# fdb nexthop group can only contain fdb nexthops
+	run_cmd "$IP nexthop add id 63 via 2001:db8:91::4"
+	run_cmd "$IP nexthop add id 64 via 2001:db8:91::5"
+	run_cmd "$IP nexthop add id 103 group 63/64 fdb"
+	log_test $? 2 "Fdb Nexthop group with non-fdb nexthops"
+
+	# Non fdb nexthop group can not contain fdb nexthops
+	run_cmd "$IP nexthop add id 65 via 2001:db8:91::5 fdb"
+	run_cmd "$IP nexthop add id 66 via 2001:db8:91::6 fdb"
+	run_cmd "$IP nexthop add id 104 group 65/66"
+	log_test $? 2 "Non-Fdb Nexthop group with fdb nexthops"
+
+	# fdb nexthop cannot have blackhole
+	run_cmd "$IP nexthop add id 67 blackhole fdb"
+	log_test $? 2 "Fdb Nexthop with blackhole"
+
+	# fdb nexthop with oif
+	run_cmd "$IP nexthop add id 68 via 2001:db8:91::7 dev veth1 fdb"
+	log_test $? 2 "Fdb Nexthop with oif"
+
+	# fdb nexthop with onlink
+	run_cmd "$IP nexthop add id 68 via 2001:db8:91::7 onlink fdb"
+	log_test $? 2 "Fdb Nexthop with onlink"
+
+	# fdb nexthop with encap
+	run_cmd "$IP nexthop add id 69 encap mpls 101 via 2001:db8:91::8 dev veth1 fdb"
+	log_test $? 2 "Fdb Nexthop with encap"
+
+	run_cmd "$IP link add name vx10 type vxlan id 1010 local 2001:db8:91::9 remote 2001:db8:91::10 dstport 4789 nolearning noudpcsum tos inherit ttl 100"
+	run_cmd "$BRIDGE fdb add 02:02:00:00:00:13 dev vx10 nhid 102 self"
+	log_test $? 0 "Fdb mac add with nexthop group"
+
+	## fdb nexthops can only reference nexthop groups and not nexthops
+	run_cmd "$BRIDGE fdb add 02:02:00:00:00:14 dev vx10 nhid 61 self"
+	log_test $? 255 "Fdb mac add with nexthop"
+
+	run_cmd "$IP -6 ro add 2001:db8:101::1/128 nhid 66"
+	log_test $? 2 "Route add with fdb nexthop"
+
+	run_cmd "$IP -6 ro add 2001:db8:101::1/128 nhid 103"
+	log_test $? 2 "Route add with fdb nexthop group"
+
+	run_cmd "$IP nexthop del id 102"
+	log_test $? 0 "Fdb nexthop delete"
+
+	$IP link del dev vx10
+}
+
+ipv4_fdb_grp_fcnal()
+{
+	local rc
+
+	echo
+	echo "IPv4 fdb groups functional"
+	echo "--------------------------"
+
+	check_nexthop_fdb_support
+	if [ $? -eq $ksft_skip ]; then
+		return $ksft_skip
+	fi
+
+	# create group with multiple nexthops
+	run_cmd "$IP nexthop add id 12 via 172.16.1.2 fdb"
+	run_cmd "$IP nexthop add id 13 via 172.16.1.3 fdb"
+	run_cmd "$IP nexthop add id 102 group 12/13 fdb"
+	check_nexthop "id 102" "id 102 group 12/13 fdb"
+	log_test $? 0 "Fdb Nexthop group with multiple nexthops"
+
+	# get nexthop group
+	run_cmd "$IP nexthop get id 102"
+	check_nexthop "id 102" "id 102 group 12/13 fdb"
+	log_test $? 0 "Get Fdb nexthop group by id"
+
+	# fdb nexthop group can only contain fdb nexthops
+	run_cmd "$IP nexthop add id 14 via 172.16.1.2"
+	run_cmd "$IP nexthop add id 15 via 172.16.1.3"
+	run_cmd "$IP nexthop add id 103 group 14/15 fdb"
+	log_test $? 2 "Fdb Nexthop group with non-fdb nexthops"
+
+	# Non fdb nexthop group can not contain fdb nexthops
+	run_cmd "$IP nexthop add id 16 via 172.16.1.2 fdb"
+	run_cmd "$IP nexthop add id 17 via 172.16.1.3 fdb"
+	run_cmd "$IP nexthop add id 104 group 14/15"
+	log_test $? 2 "Non-Fdb Nexthop group with fdb nexthops"
+
+	# fdb nexthop cannot have blackhole
+	run_cmd "$IP nexthop add id 18 blackhole fdb"
+	log_test $? 2 "Fdb Nexthop with blackhole"
+
+	# fdb nexthop with oif
+	run_cmd "$IP nexthop add id 16 via 172.16.1.2 dev veth1 fdb"
+	log_test $? 2 "Fdb Nexthop with oif"
+
+	# fdb nexthop with onlink
+	run_cmd "$IP nexthop add id 16 via 172.16.1.2 onlink fdb"
+	log_test $? 2 "Fdb Nexthop with onlink"
+
+	# fdb nexthop with encap
+	run_cmd "$IP nexthop add id 17 encap mpls 101 via 172.16.1.2 dev veth1 fdb"
+	log_test $? 2 "Fdb Nexthop with encap"
+
+	run_cmd "$IP link add name vx10 type vxlan id 1010 local 10.0.0.1 remote 10.0.0.2 dstport 4789 nolearning noudpcsum tos inherit ttl 100"
+	run_cmd "$BRIDGE fdb add 02:02:00:00:00:13 dev vx10 nhid 102 self"
+	log_test $? 0 "Fdb mac add with nexthop group"
+
+	# fdb nexthops can only reference nexthop groups and not nexthops
+	run_cmd "$BRIDGE fdb add 02:02:00:00:00:14 dev vx10 nhid 12 self"
+	log_test $? 255 "Fdb mac add with nexthop"
+
+	run_cmd "$IP ro add 172.16.0.0/22 nhid 15"
+	log_test $? 2 "Route add with fdb nexthop"
+
+	run_cmd "$IP ro add 172.16.0.0/22 nhid 103"
+	log_test $? 2 "Route add with fdb nexthop group"
+
+	run_cmd "$IP nexthop del id 102"
+	log_test $? 0 "Fdb nexthop delete"
+
+	$IP link del dev vx10
+}
+
 ################################################################################
 # basic operations (add, delete, replace) on nexthops and nexthop groups
 #
-- 
cgit v1.2.3-59-g8ed1b


From 8ce84271697a2346e88582480b26b7e244a8603a Mon Sep 17 00:00:00 2001
From: Dmitry Bezrukov <dbezrukov@marvell.com>
Date: Fri, 22 May 2020 11:19:37 +0300
Subject: net: atlantic: changes for multi-TC support

This patch contains the following changes:
* access cfg via aq_nic_get_cfg() in aq_nic_start() and aq_nic_map_skb();
* call aq_nic_get_dev() just once in aq_nic_map_skb();
* move ring allocation/deallocation out of aq_vec_alloc()/aq_vec_free();
* add the missing aq_nic_deinit() in atl_resume_common();
* rename 'tcs' field to 'tcs_max' in aq_hw_caps_s to differentiate it from
  the 'tcs' field in aq_nic_cfg_s, which is used for the current number of
  TCs;
* update _TC_MAX defines to the actual number of supported TCs;
* move tx_tc_mode register defines slightly higher (just to keep the order
  of definitions);
* separate variables for TX/RX buff_size in hw_atl*_hw_qos_set();
* use AQ_HW_*_TC instead of hardcoded magic numbers;
* actually use the 'ret' value in aq_mdo_add_secy();

Signed-off-by: Dmitry Bezrukov <dbezrukov@marvell.com>
Co-developed-by: Mark Starovoytov <mstarovoitov@marvell.com>
Signed-off-by: Mark Starovoytov <mstarovoitov@marvell.com>
Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/aquantia/atlantic/aq_hw.h     |  4 +-
 drivers/net/ethernet/aquantia/atlantic/aq_macsec.c |  2 +-
 drivers/net/ethernet/aquantia/atlantic/aq_nic.c    | 43 +++++++++++++-------
 .../net/ethernet/aquantia/atlantic/aq_pci_func.c   |  3 ++
 drivers/net/ethernet/aquantia/atlantic/aq_vec.c    | 47 ++++++++++++++--------
 drivers/net/ethernet/aquantia/atlantic/aq_vec.h    |  3 ++
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c  |  2 +-
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c  | 34 +++++++---------
 .../aquantia/atlantic/hw_atl/hw_atl_b0_internal.h  |  2 +-
 .../aquantia/atlantic/hw_atl/hw_atl_llh_internal.h | 31 ++++++++------
 .../ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c   |  4 +-
 .../aquantia/atlantic/hw_atl2/hw_atl2_internal.h   |  2 +-
 12 files changed, 105 insertions(+), 72 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
index 03fea9469f01..703ef8d064a2 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
@@ -46,7 +46,7 @@ struct aq_hw_caps_s {
 	u32 mac_regs_count;
 	u32 hw_alive_check_addr;
 	u8 msix_irqs;
-	u8 tcs;
+	u8 tcs_max;
 	u8 rxd_alignment;
 	u8 rxd_size;
 	u8 txd_alignment;
@@ -120,6 +120,8 @@ struct aq_stats_s {
 
 #define AQ_HW_MULTICAST_ADDRESS_MAX     32U
 
+#define AQ_HW_PTP_TC                    2U
+
 #define AQ_HW_LED_BLINK    0x2U
 #define AQ_HW_LED_DEFAULT  0x0U
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c b/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c
index 91870ceaf3fe..4a6dfac857ca 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_macsec.c
@@ -478,7 +478,7 @@ static int aq_mdo_add_secy(struct macsec_context *ctx)
 
 	set_bit(txsc_idx, &cfg->txsc_idx_busy);
 
-	return 0;
+	return ret;
 }
 
 static int aq_mdo_upd_secy(struct macsec_context *ctx)
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index 1c6d12deb47a..b003f1035701 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -399,9 +399,15 @@ int aq_nic_init(struct aq_nic_s *self)
 		err = aq_phy_init(self->aq_hw);
 	}
 
-	for (i = 0U, aq_vec = self->aq_vec[0];
-		self->aq_vecs > i; ++i, aq_vec = self->aq_vec[i])
+	for (i = 0U; i < self->aq_vecs; i++) {
+		aq_vec = self->aq_vec[i];
+		err = aq_vec_ring_alloc(aq_vec, self, i,
+					aq_nic_get_cfg(self));
+		if (err)
+			goto err_exit;
+
 		aq_vec_init(aq_vec, self->aq_hw_ops, self->aq_hw);
+	}
 
 	err = aq_ptp_init(self, self->irqvecs - 1);
 	if (err < 0)
@@ -424,9 +430,12 @@ err_exit:
 int aq_nic_start(struct aq_nic_s *self)
 {
 	struct aq_vec_s *aq_vec = NULL;
+	struct aq_nic_cfg_s *cfg;
 	unsigned int i = 0U;
 	int err = 0;
 
+	cfg = aq_nic_get_cfg(self);
+
 	err = self->aq_hw_ops->hw_multicast_list_set(self->aq_hw,
 						     self->mc_list.ar,
 						     self->mc_list.count);
@@ -464,7 +473,7 @@ int aq_nic_start(struct aq_nic_s *self)
 	timer_setup(&self->service_timer, aq_nic_service_timer_cb, 0);
 	aq_nic_service_timer_cb(&self->service_timer);
 
-	if (self->aq_nic_cfg.is_polling) {
+	if (cfg->is_polling) {
 		timer_setup(&self->polling_timer, aq_nic_polling_timer_cb, 0);
 		mod_timer(&self->polling_timer, jiffies +
 			  AQ_CFG_POLLING_TIMER_INTERVAL);
@@ -482,16 +491,16 @@ int aq_nic_start(struct aq_nic_s *self)
 		if (err < 0)
 			goto err_exit;
 
-		if (self->aq_nic_cfg.link_irq_vec) {
+		if (cfg->link_irq_vec) {
 			int irqvec = pci_irq_vector(self->pdev,
-						   self->aq_nic_cfg.link_irq_vec);
+						    cfg->link_irq_vec);
 			err = request_threaded_irq(irqvec, NULL,
 						   aq_linkstate_threaded_isr,
 						   IRQF_SHARED | IRQF_ONESHOT,
 						   self->ndev->name, self);
 			if (err < 0)
 				goto err_exit;
-			self->msix_entry_mask |= (1 << self->aq_nic_cfg.link_irq_vec);
+			self->msix_entry_mask |= (1 << cfg->link_irq_vec);
 		}
 
 		err = self->aq_hw_ops->hw_irq_enable(self->aq_hw,
@@ -518,6 +527,8 @@ unsigned int aq_nic_map_skb(struct aq_nic_s *self, struct sk_buff *skb,
 			    struct aq_ring_s *ring)
 {
 	unsigned int nr_frags = skb_shinfo(skb)->nr_frags;
+	struct aq_nic_cfg_s *cfg = aq_nic_get_cfg(self);
+	struct device *dev = aq_nic_get_dev(self);
 	struct aq_ring_buff_s *first = NULL;
 	u8 ipver = ip_hdr(skb)->version;
 	struct aq_ring_buff_s *dx_buff;
@@ -559,7 +570,7 @@ unsigned int aq_nic_map_skb(struct aq_nic_s *self, struct sk_buff *skb,
 		need_context_tag = true;
 	}
 
-	if (self->aq_nic_cfg.is_vlan_tx_insert && skb_vlan_tag_present(skb)) {
+	if (cfg->is_vlan_tx_insert && skb_vlan_tag_present(skb)) {
 		dx_buff->vlan_tx_tag = skb_vlan_tag_get(skb);
 		dx_buff->len_pkt = skb->len;
 		dx_buff->is_vlan = 1U;
@@ -574,12 +585,12 @@ unsigned int aq_nic_map_skb(struct aq_nic_s *self, struct sk_buff *skb,
 	}
 
 	dx_buff->len = skb_headlen(skb);
-	dx_buff->pa = dma_map_single(aq_nic_get_dev(self),
+	dx_buff->pa = dma_map_single(dev,
 				     skb->data,
 				     dx_buff->len,
 				     DMA_TO_DEVICE);
 
-	if (unlikely(dma_mapping_error(aq_nic_get_dev(self), dx_buff->pa))) {
+	if (unlikely(dma_mapping_error(dev, dx_buff->pa))) {
 		ret = 0;
 		goto exit;
 	}
@@ -611,13 +622,13 @@ unsigned int aq_nic_map_skb(struct aq_nic_s *self, struct sk_buff *skb,
 			else
 				buff_size = frag_len;
 
-			frag_pa = skb_frag_dma_map(aq_nic_get_dev(self),
+			frag_pa = skb_frag_dma_map(dev,
 						   frag,
 						   buff_offset,
 						   buff_size,
 						   DMA_TO_DEVICE);
 
-			if (unlikely(dma_mapping_error(aq_nic_get_dev(self),
+			if (unlikely(dma_mapping_error(dev,
 						       frag_pa)))
 				goto mapping_error;
 
@@ -651,12 +662,12 @@ mapping_error:
 		if (!(dx_buff->is_gso_tcp || dx_buff->is_gso_udp) &&
 		    !dx_buff->is_vlan && dx_buff->pa) {
 			if (unlikely(dx_buff->is_sop)) {
-				dma_unmap_single(aq_nic_get_dev(self),
+				dma_unmap_single(dev,
 						 dx_buff->pa,
 						 dx_buff->len,
 						 DMA_TO_DEVICE);
 			} else {
-				dma_unmap_page(aq_nic_get_dev(self),
+				dma_unmap_page(dev,
 					       dx_buff->pa,
 					       dx_buff->len,
 					       DMA_TO_DEVICE);
@@ -1145,9 +1156,11 @@ void aq_nic_deinit(struct aq_nic_s *self, bool link_down)
 	if (!self)
 		goto err_exit;
 
-	for (i = 0U, aq_vec = self->aq_vec[0];
-		self->aq_vecs > i; ++i, aq_vec = self->aq_vec[i])
+	for (i = 0U; i < self->aq_vecs; i++) {
+		aq_vec = self->aq_vec[i];
 		aq_vec_deinit(aq_vec);
+		aq_vec_ring_free(aq_vec);
+	}
 
 	aq_ptp_unregister(self);
 	aq_ptp_ring_deinit(self);
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
index d10fff8a8c71..41c0f560f95b 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_pci_func.c
@@ -431,6 +431,9 @@ static int atl_resume_common(struct device *dev, bool deep)
 	netif_tx_start_all_queues(nic->ndev);
 
 err_exit:
+	if (ret < 0)
+		aq_nic_deinit(nic, true);
+
 	rtnl_unlock();
 
 	return ret;
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
index f40a427970dc..d5650cd6e236 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
@@ -103,16 +103,11 @@ err_exit:
 struct aq_vec_s *aq_vec_alloc(struct aq_nic_s *aq_nic, unsigned int idx,
 			      struct aq_nic_cfg_s *aq_nic_cfg)
 {
-	struct aq_ring_s *ring = NULL;
 	struct aq_vec_s *self = NULL;
-	unsigned int i = 0U;
-	int err = 0;
 
 	self = kzalloc(sizeof(*self), GFP_KERNEL);
-	if (!self) {
-		err = -ENOMEM;
+	if (!self)
 		goto err_exit;
-	}
 
 	self->aq_nic = aq_nic;
 	self->aq_ring_param.vec_idx = idx;
@@ -128,10 +123,19 @@ struct aq_vec_s *aq_vec_alloc(struct aq_nic_s *aq_nic, unsigned int idx,
 	netif_napi_add(aq_nic_get_ndev(aq_nic), &self->napi,
 		       aq_vec_poll, AQ_CFG_NAPI_WEIGHT);
 
+err_exit:
+	return self;
+}
+
+int aq_vec_ring_alloc(struct aq_vec_s *self, struct aq_nic_s *aq_nic,
+		      unsigned int idx, struct aq_nic_cfg_s *aq_nic_cfg)
+{
+	struct aq_ring_s *ring = NULL;
+	unsigned int i = 0U;
+	int err = 0;
+
 	for (i = 0; i < aq_nic_cfg->tcs; ++i) {
-		unsigned int idx_ring = AQ_NIC_TCVEC2RING(self->nic,
-						self->tx_rings,
-						self->aq_ring_param.vec_idx);
+		unsigned int idx_ring = AQ_NIC_TCVEC2RING(aq_nic, i, idx);
 
 		ring = aq_ring_tx_alloc(&self->ring[i][AQ_VEC_TX_ID], aq_nic,
 					idx_ring, aq_nic_cfg);
@@ -156,11 +160,11 @@ struct aq_vec_s *aq_vec_alloc(struct aq_nic_s *aq_nic, unsigned int idx,
 
 err_exit:
 	if (err < 0) {
-		aq_vec_free(self);
+		aq_vec_ring_free(self);
 		self = NULL;
 	}
 
-	return self;
+	return err;
 }
 
 int aq_vec_init(struct aq_vec_s *self, const struct aq_hw_ops *aq_hw_ops,
@@ -269,6 +273,18 @@ err_exit:;
 }
 
 void aq_vec_free(struct aq_vec_s *self)
+{
+	if (!self)
+		goto err_exit;
+
+	netif_napi_del(&self->napi);
+
+	kfree(self);
+
+err_exit:;
+}
+
+void aq_vec_ring_free(struct aq_vec_s *self)
 {
 	struct aq_ring_s *ring = NULL;
 	unsigned int i = 0U;
@@ -279,13 +295,12 @@ void aq_vec_free(struct aq_vec_s *self)
 	for (i = 0U, ring = self->ring[0];
 		self->tx_rings > i; ++i, ring = self->ring[i]) {
 		aq_ring_free(&ring[AQ_VEC_TX_ID]);
-		aq_ring_free(&ring[AQ_VEC_RX_ID]);
+		if (i < self->rx_rings)
+			aq_ring_free(&ring[AQ_VEC_RX_ID]);
 	}
 
-	netif_napi_del(&self->napi);
-
-	kfree(self);
-
+	self->tx_rings = 0;
+	self->rx_rings = 0;
 err_exit:;
 }
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_vec.h b/drivers/net/ethernet/aquantia/atlantic/aq_vec.h
index 0fe8e0904c7f..0ee86b26df8a 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_vec.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_vec.h
@@ -25,10 +25,13 @@ irqreturn_t aq_vec_isr(int irq, void *private);
 irqreturn_t aq_vec_isr_legacy(int irq, void *private);
 struct aq_vec_s *aq_vec_alloc(struct aq_nic_s *aq_nic, unsigned int idx,
 			      struct aq_nic_cfg_s *aq_nic_cfg);
+int aq_vec_ring_alloc(struct aq_vec_s *self, struct aq_nic_s *aq_nic,
+		      unsigned int idx, struct aq_nic_cfg_s *aq_nic_cfg);
 int aq_vec_init(struct aq_vec_s *self, const struct aq_hw_ops *aq_hw_ops,
 		struct aq_hw_s *aq_hw);
 void aq_vec_deinit(struct aq_vec_s *self);
 void aq_vec_free(struct aq_vec_s *self);
+void aq_vec_ring_free(struct aq_vec_s *self);
 int aq_vec_start(struct aq_vec_s *self);
 void aq_vec_stop(struct aq_vec_s *self);
 cpumask_t *aq_vec_get_affinity_mask(struct aq_vec_s *self);
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
index 1b0670a8ae33..88b17cf77625 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
@@ -21,7 +21,7 @@
 	.msix_irqs = 4U,		  \
 	.irq_mask = ~0U,		  \
 	.vecs = HW_ATL_A0_RSS_MAX,	  \
-	.tcs = HW_ATL_A0_TC_MAX,	  \
+	.tcs_max = HW_ATL_A0_TC_MAX,	  \
 	.rxd_alignment = 1U,		  \
 	.rxd_size = HW_ATL_A0_RXD_SIZE,   \
 	.rxds_max = HW_ATL_A0_MAX_RXD,    \
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
index fa3cd7e9954b..bee4fb3c8741 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
@@ -23,7 +23,7 @@
 	.msix_irqs = 8U,		  \
 	.irq_mask = ~0U,		  \
 	.vecs = HW_ATL_B0_RSS_MAX,	  \
-	.tcs = HW_ATL_B0_TC_MAX,	  \
+	.tcs_max = HW_ATL_B0_TC_MAX,	  \
 	.rxd_alignment = 1U,		  \
 	.rxd_size = HW_ATL_B0_RXD_SIZE,   \
 	.rxds_max = HW_ATL_B0_MAX_RXD,    \
@@ -116,8 +116,9 @@ static int hw_atl_b0_set_fc(struct aq_hw_s *self, u32 fc, u32 tc)
 
 static int hw_atl_b0_hw_qos_set(struct aq_hw_s *self)
 {
+	u32 tx_buff_size = HW_ATL_B0_TXBUF_MAX;
+	u32 rx_buff_size = HW_ATL_B0_RXBUF_MAX;
 	unsigned int i_priority = 0U;
-	u32 buff_size = 0U;
 	u32 tc = 0U;
 
 	/* TPS Descriptor rate init */
@@ -131,8 +132,6 @@ static int hw_atl_b0_hw_qos_set(struct aq_hw_s *self)
 	hw_atl_tps_tx_pkt_shed_desc_tc_arb_mode_set(self, 0U);
 	hw_atl_tps_tx_pkt_shed_data_arb_mode_set(self, 0U);
 
-	tc = 0;
-
 	/* TX Packet Scheduler Data TC0 */
 	hw_atl_tps_tx_pkt_shed_tc_data_max_credit_set(self, 0xFFF, tc);
 	hw_atl_tps_tx_pkt_shed_tc_data_weight_set(self, 0x64, tc);
@@ -140,46 +139,41 @@ static int hw_atl_b0_hw_qos_set(struct aq_hw_s *self)
 	hw_atl_tps_tx_pkt_shed_desc_tc_weight_set(self, 0x1E, tc);
 
 	/* Tx buf size TC0 */
-	buff_size = HW_ATL_B0_TXBUF_MAX - HW_ATL_B0_PTP_TXBUF_SIZE;
+	tx_buff_size -= HW_ATL_B0_PTP_TXBUF_SIZE;
 
-	hw_atl_tpb_tx_pkt_buff_size_per_tc_set(self, buff_size, tc);
+	hw_atl_tpb_tx_pkt_buff_size_per_tc_set(self, tx_buff_size, tc);
 	hw_atl_tpb_tx_buff_hi_threshold_per_tc_set(self,
-						   (buff_size *
+						   (tx_buff_size *
 						   (1024 / 32U) * 66U) /
 						   100U, tc);
 	hw_atl_tpb_tx_buff_lo_threshold_per_tc_set(self,
-						   (buff_size *
+						   (tx_buff_size *
 						   (1024 / 32U) * 50U) /
 						   100U, tc);
 	/* Init TC2 for PTP_TX */
-	tc = 2;
-
 	hw_atl_tpb_tx_pkt_buff_size_per_tc_set(self, HW_ATL_B0_PTP_TXBUF_SIZE,
-					       tc);
+					       AQ_HW_PTP_TC);
 
 	/* QoS Rx buf size per TC */
-	tc = 0;
-	buff_size = HW_ATL_B0_RXBUF_MAX - HW_ATL_B0_PTP_RXBUF_SIZE;
+	rx_buff_size -= HW_ATL_B0_PTP_RXBUF_SIZE;
 
-	hw_atl_rpb_rx_pkt_buff_size_per_tc_set(self, buff_size, tc);
+	hw_atl_rpb_rx_pkt_buff_size_per_tc_set(self, rx_buff_size, tc);
 	hw_atl_rpb_rx_buff_hi_threshold_per_tc_set(self,
-						   (buff_size *
+						   (rx_buff_size *
 						   (1024U / 32U) * 66U) /
 						   100U, tc);
 	hw_atl_rpb_rx_buff_lo_threshold_per_tc_set(self,
-						   (buff_size *
+						   (rx_buff_size *
 						   (1024U / 32U) * 50U) /
 						   100U, tc);
 
 	hw_atl_b0_set_fc(self, self->aq_nic_cfg->fc.req, tc);
 
 	/* Init TC2 for PTP_RX */
-	tc = 2;
-
 	hw_atl_rpb_rx_pkt_buff_size_per_tc_set(self, HW_ATL_B0_PTP_RXBUF_SIZE,
-					       tc);
+					       AQ_HW_PTP_TC);
 	/* No flow control for PTP */
-	hw_atl_rpb_rx_xoff_en_per_tc_set(self, 0U, tc);
+	hw_atl_rpb_rx_xoff_en_per_tc_set(self, 0U, AQ_HW_PTP_TC);
 
 	/* QoS 802.1p priority -> TC mapping */
 	for (i_priority = 8U; i_priority--;)
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h
index 7ab23a1751d3..4fba4e0928c7 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h
@@ -75,7 +75,7 @@
 #define HW_ATL_B0_RSS_HASHKEY_BITS 320U
 
 #define HW_ATL_B0_TCRSS_4_8  1
-#define HW_ATL_B0_TC_MAX 1U
+#define HW_ATL_B0_TC_MAX 8U
 #define HW_ATL_B0_RSS_MAX 8U
 
 #define HW_ATL_B0_LRO_RXD_MAX 16U
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h
index 18de2f7b8959..5d86ffab4ece 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h
@@ -2038,6 +2038,24 @@
 /* default value of bitfield lso_tcp_flag_mid[b:0] */
 #define HW_ATL_THM_LSO_TCP_FLAG_MID_DEFAULT 0x0
 
+/* tx tx_tc_mode bitfield definitions
+ * preprocessor definitions for the bitfield "tx_tc_mode".
+ * port="pif_tpb_tx_tc_mode_i,pif_tps_tx_tc_mode_i"
+ */
+
+/* register address for bitfield tx_tc_mode */
+#define HW_ATL_TPB_TX_TC_MODE_ADDR 0x00007900
+/* bitmask for bitfield tx_tc_mode */
+#define HW_ATL_TPB_TX_TC_MODE_MSK 0x00000100
+/* inverted bitmask for bitfield tx_tc_mode */
+#define HW_ATL_TPB_TX_TC_MODE_MSKN 0xFFFFFEFF
+/* lower bit position of bitfield tx_tc_mode */
+#define HW_ATL_TPB_TX_TC_MODE_SHIFT 8
+/* width of bitfield tx_tc_mode */
+#define HW_ATL_TPB_TX_TC_MODE_WIDTH 1
+/* default value of bitfield tx_tc_mode */
+#define HW_ATL_TPB_TX_TC_MODE_DEFAULT 0x0
+
 /* tx tx_buf_en bitfield definitions
  * preprocessor definitions for the bitfield "tx_buf_en".
  * port="pif_tpb_tx_buf_en_i"
@@ -2056,19 +2074,6 @@
 /* default value of bitfield tx_buf_en */
 #define HW_ATL_TPB_TX_BUF_EN_DEFAULT 0x0
 
-/* register address for bitfield tx_tc_mode */
-#define HW_ATL_TPB_TX_TC_MODE_ADDR 0x00007900
-/* bitmask for bitfield tx_tc_mode */
-#define HW_ATL_TPB_TX_TC_MODE_MSK 0x00000100
-/* inverted bitmask for bitfield tx_tc_mode */
-#define HW_ATL_TPB_TX_TC_MODE_MSKN 0xFFFFFEFF
-/* lower bit position of bitfield tx_tc_mode */
-#define HW_ATL_TPB_TX_TC_MODE_SHIFT 8
-/* width of bitfield tx_tc_mode */
-#define HW_ATL_TPB_TX_TC_MODE_WIDTH 1
-/* default value of bitfield tx_tc_mode */
-#define HW_ATL_TPB_TX_TC_MODE_DEFAULT 0x0
-
 /* tx tx{b}_hi_thresh[c:0] bitfield definitions
  * preprocessor definitions for the bitfield "tx{b}_hi_thresh[c:0]".
  * parameter: buffer {b} | stride size 0x10 | range [0, 7]
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
index 6f2b33ae3d06..ccdb74562270 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
@@ -23,7 +23,7 @@ static int hw_atl2_act_rslvr_table_set(struct aq_hw_s *self, u8 location,
 	.msix_irqs = 8U,		  \
 	.irq_mask = ~0U,		  \
 	.vecs = HW_ATL2_RSS_MAX,	  \
-	.tcs = HW_ATL2_TC_MAX,	  \
+	.tcs_max = HW_ATL2_TC_MAX,	  \
 	.rxd_alignment = 1U,		  \
 	.rxd_size = HW_ATL2_RXD_SIZE,   \
 	.rxds_max = HW_ATL2_MAX_RXD,    \
@@ -126,8 +126,6 @@ static int hw_atl2_hw_qos_set(struct aq_hw_s *self)
 	hw_atl_tps_tx_pkt_shed_desc_tc_arb_mode_set(self, 0U);
 	hw_atl_tps_tx_pkt_shed_data_arb_mode_set(self, 0U);
 
-	tc = 0;
-
 	/* TX Packet Scheduler Data TC0 */
 	hw_atl2_tps_tx_pkt_shed_tc_data_max_credit_set(self, 0xFFF0, tc);
 	hw_atl2_tps_tx_pkt_shed_tc_data_weight_set(self, 0x640, tc);
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_internal.h
index e66b3583bfe9..be0c049ea582 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_internal.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_internal.h
@@ -31,7 +31,7 @@
 
 #define HW_ATL2_RSS_REDIRECTION_MAX 64U
 
-#define HW_ATL2_TC_MAX 1U
+#define HW_ATL2_TC_MAX 8U
 #define HW_ATL2_RSS_MAX 8U
 
 #define HW_ATL2_INTR_MODER_MAX  0x1FF
-- 
cgit v1.2.3-59-g8ed1b


From 593dd0fc202eed27de07c5df9ef24a3c00cf0c09 Mon Sep 17 00:00:00 2001
From: Dmitry Bezrukov <dbezrukov@marvell.com>
Date: Fri, 22 May 2020 11:19:38 +0300
Subject: net: atlantic: move PTP TC initialization to a separate function

This patch moves the PTP TC initialization into a separate function.

Signed-off-by: Dmitry Bezrukov <dbezrukov@marvell.com>
Co-developed-by: Mark Starovoytov <mstarovoitov@marvell.com>
Signed-off-by: Mark Starovoytov <mstarovoitov@marvell.com>
Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c  | 31 +++++++++++++---------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
index bee4fb3c8741..0ff3f6eea022 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
@@ -114,6 +114,21 @@ static int hw_atl_b0_set_fc(struct aq_hw_s *self, u32 fc, u32 tc)
 	return 0;
 }
 
+static int hw_atl_b0_tc_ptp_set(struct aq_hw_s *self)
+{
+	/* Init TC2 for PTP_TX */
+	hw_atl_tpb_tx_pkt_buff_size_per_tc_set(self, HW_ATL_B0_PTP_TXBUF_SIZE,
+					       AQ_HW_PTP_TC);
+
+	/* Init TC2 for PTP_RX */
+	hw_atl_rpb_rx_pkt_buff_size_per_tc_set(self, HW_ATL_B0_PTP_RXBUF_SIZE,
+					       AQ_HW_PTP_TC);
+	/* No flow control for PTP */
+	hw_atl_rpb_rx_xoff_en_per_tc_set(self, 0U, AQ_HW_PTP_TC);
+
+	return aq_hw_err_from_flags(self);
+}
+
 static int hw_atl_b0_hw_qos_set(struct aq_hw_s *self)
 {
 	u32 tx_buff_size = HW_ATL_B0_TXBUF_MAX;
@@ -121,6 +136,9 @@ static int hw_atl_b0_hw_qos_set(struct aq_hw_s *self)
 	unsigned int i_priority = 0U;
 	u32 tc = 0U;
 
+	tx_buff_size -= HW_ATL_B0_PTP_TXBUF_SIZE;
+	rx_buff_size -= HW_ATL_B0_PTP_RXBUF_SIZE;
+
 	/* TPS Descriptor rate init */
 	hw_atl_tps_tx_pkt_shed_desc_rate_curr_time_res_set(self, 0x0U);
 	hw_atl_tps_tx_pkt_shed_desc_rate_lim_set(self, 0xA);
@@ -139,8 +157,6 @@ static int hw_atl_b0_hw_qos_set(struct aq_hw_s *self)
 	hw_atl_tps_tx_pkt_shed_desc_tc_weight_set(self, 0x1E, tc);
 
 	/* Tx buf size TC0 */
-	tx_buff_size -= HW_ATL_B0_PTP_TXBUF_SIZE;
-
 	hw_atl_tpb_tx_pkt_buff_size_per_tc_set(self, tx_buff_size, tc);
 	hw_atl_tpb_tx_buff_hi_threshold_per_tc_set(self,
 						   (tx_buff_size *
@@ -150,13 +166,8 @@ static int hw_atl_b0_hw_qos_set(struct aq_hw_s *self)
 						   (tx_buff_size *
 						   (1024 / 32U) * 50U) /
 						   100U, tc);
-	/* Init TC2 for PTP_TX */
-	hw_atl_tpb_tx_pkt_buff_size_per_tc_set(self, HW_ATL_B0_PTP_TXBUF_SIZE,
-					       AQ_HW_PTP_TC);
 
 	/* QoS Rx buf size per TC */
-	rx_buff_size -= HW_ATL_B0_PTP_RXBUF_SIZE;
-
 	hw_atl_rpb_rx_pkt_buff_size_per_tc_set(self, rx_buff_size, tc);
 	hw_atl_rpb_rx_buff_hi_threshold_per_tc_set(self,
 						   (rx_buff_size *
@@ -169,11 +180,7 @@ static int hw_atl_b0_hw_qos_set(struct aq_hw_s *self)
 
 	hw_atl_b0_set_fc(self, self->aq_nic_cfg->fc.req, tc);
 
-	/* Init TC2 for PTP_RX */
-	hw_atl_rpb_rx_pkt_buff_size_per_tc_set(self, HW_ATL_B0_PTP_RXBUF_SIZE,
-					       AQ_HW_PTP_TC);
-	/* No flow control for PTP */
-	hw_atl_rpb_rx_xoff_en_per_tc_set(self, 0U, AQ_HW_PTP_TC);
+	hw_atl_b0_tc_ptp_set(self);
 
 	/* QoS 802.1p priority -> TC mapping */
 	for (i_priority = 8U; i_priority--;)
-- 
cgit v1.2.3-59-g8ed1b


From 0aa7bc3ee4652e0790f9b42c93c769b59b9f2308 Mon Sep 17 00:00:00 2001
From: Dmitry Bezrukov <dbezrukov@marvell.com>
Date: Fri, 22 May 2020 11:19:39 +0300
Subject: net: atlantic: changes for multi-TC support

This patch contains the following changes:
* add cfg->is_ptp (used for PTP enable/disable switch, which
  is described in more details below);
* add cfg->tc_mode (A1 supports 2 HW modes only);
* setup queue to TC mapping based on TC mode on A2;
* remove hw_tx_tc_mode_get / hw_rx_tc_mode_get hw_ops.

In the first generation of our hardware (A1), a whole traffic class is
consumed for PTP handling in FW (FW uses it to send the ptp data and to
send back timestamps).
The 'is_ptp' flag introduced in this patch will be used in to automatically
disable PTP when a conflicting configuration is detected, e.g. when
multiple TCs are enabled.

Signed-off-by: Dmitry Bezrukov <dbezrukov@marvell.com>
Co-developed-by: Mark Starovoytov <mstarovoitov@marvell.com>
Signed-off-by: Mark Starovoytov <mstarovoitov@marvell.com>
Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/aquantia/atlantic/aq_hw.h     | 10 +++---
 drivers/net/ethernet/aquantia/atlantic/aq_nic.c    | 26 +++++++++-----
 drivers/net/ethernet/aquantia/atlantic/aq_nic.h    |  2 ++
 drivers/net/ethernet/aquantia/atlantic/aq_ptp.c    | 27 +++++++--------
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c  | 25 ++++----------
 .../ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c   | 40 ++++++++++++++++------
 6 files changed, 75 insertions(+), 55 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
index 703ef8d064a2..c3df9da6088c 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
@@ -18,6 +18,12 @@
 #define AQ_HW_MAC_COUNTER_HZ   312500000ll
 #define AQ_HW_PHY_COUNTER_HZ   160000000ll
 
+enum aq_tc_mode {
+	AQ_TC_MODE_INVALID = -1,
+	AQ_TC_MODE_8TCS,
+	AQ_TC_MODE_4TCS,
+};
+
 #define AQ_RX_FIRST_LOC_FVLANID     0U
 #define AQ_RX_LAST_LOC_FVLANID	   15U
 #define AQ_RX_FIRST_LOC_FETHERT    16U
@@ -281,10 +287,6 @@ struct aq_hw_ops {
 	int (*hw_set_offload)(struct aq_hw_s *self,
 			      struct aq_nic_cfg_s *aq_nic_cfg);
 
-	int (*hw_tx_tc_mode_get)(struct aq_hw_s *self, u32 *tc_mode);
-
-	int (*hw_rx_tc_mode_get)(struct aq_hw_s *self, u32 *tc_mode);
-
 	int (*hw_ring_hwts_rx_fill)(struct aq_hw_s *self,
 				    struct aq_ring_s *aq_ring);
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index b003f1035701..3eeb652068e2 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -89,6 +89,7 @@ void aq_nic_cfg_start(struct aq_nic_s *self)
 	cfg->is_autoneg = AQ_CFG_IS_AUTONEG_DEF;
 
 	cfg->is_lro = AQ_CFG_IS_LRO_DEF;
+	cfg->is_ptp = true;
 
 	/*descriptors */
 	cfg->rxds = min(cfg->aq_hw_caps->rxds_max, AQ_CFG_RXDS_DEF);
@@ -122,6 +123,11 @@ void aq_nic_cfg_start(struct aq_nic_s *self)
 		cfg->vecs = 1U;
 	}
 
+	if (cfg->vecs <= 4)
+		cfg->tc_mode = AQ_TC_MODE_8TCS;
+	else
+		cfg->tc_mode = AQ_TC_MODE_4TCS;
+
 	/* Check if we have enough vectors allocated for
 	 * link status IRQ. If no - we'll know link state from
 	 * slower service task.
@@ -409,17 +415,19 @@ int aq_nic_init(struct aq_nic_s *self)
 		aq_vec_init(aq_vec, self->aq_hw_ops, self->aq_hw);
 	}
 
-	err = aq_ptp_init(self, self->irqvecs - 1);
-	if (err < 0)
-		goto err_exit;
+	if (aq_nic_get_cfg(self)->is_ptp) {
+		err = aq_ptp_init(self, self->irqvecs - 1);
+		if (err < 0)
+			goto err_exit;
 
-	err = aq_ptp_ring_alloc(self);
-	if (err < 0)
-		goto err_exit;
+		err = aq_ptp_ring_alloc(self);
+		if (err < 0)
+			goto err_exit;
 
-	err = aq_ptp_ring_init(self);
-	if (err < 0)
-		goto err_exit;
+		err = aq_ptp_ring_init(self);
+		if (err < 0)
+			goto err_exit;
+	}
 
 	netif_carrier_off(self->ndev);
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
index 0663b8d0220d..3434f8206823 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
@@ -59,6 +59,8 @@ struct aq_nic_cfg_s {
 	bool is_polling;
 	bool is_rss;
 	bool is_lro;
+	bool is_ptp;
+	enum aq_tc_mode tc_mode;
 	u32 priv_flags;
 	u8  tcs;
 	struct aq_rss_parameters aq_rss;
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c
index 58e8c641e8b3..599ced261b2a 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ptp.c
@@ -945,26 +945,29 @@ void aq_ptp_ring_deinit(struct aq_nic_s *aq_nic)
 #define PTP_4TC_RING_IDX            16
 #define PTP_HWST_RING_IDX           31
 
+/* Index must be 8 (8 TCs) or 16 (4 TCs).
+ * It depends on Traffic Class mode.
+ */
+static unsigned int ptp_ring_idx(const enum aq_tc_mode tc_mode)
+{
+	if (tc_mode == AQ_TC_MODE_8TCS)
+		return PTP_8TC_RING_IDX;
+
+	return PTP_4TC_RING_IDX;
+}
+
 int aq_ptp_ring_alloc(struct aq_nic_s *aq_nic)
 {
 	struct aq_ptp_s *aq_ptp = aq_nic->aq_ptp;
 	unsigned int tx_ring_idx, rx_ring_idx;
 	struct aq_ring_s *hwts;
-	u32 tx_tc_mode, rx_tc_mode;
 	struct aq_ring_s *ring;
 	int err;
 
 	if (!aq_ptp)
 		return 0;
 
-	/* Index must to be 8 (8 TCs) or 16 (4 TCs).
-	 * It depends from Traffic Class mode.
-	 */
-	aq_nic->aq_hw_ops->hw_tx_tc_mode_get(aq_nic->aq_hw, &tx_tc_mode);
-	if (tx_tc_mode == 0)
-		tx_ring_idx = PTP_8TC_RING_IDX;
-	else
-		tx_ring_idx = PTP_4TC_RING_IDX;
+	tx_ring_idx = ptp_ring_idx(aq_nic->aq_nic_cfg.tc_mode);
 
 	ring = aq_ring_tx_alloc(&aq_ptp->ptp_tx, aq_nic,
 				tx_ring_idx, &aq_nic->aq_nic_cfg);
@@ -973,11 +976,7 @@ int aq_ptp_ring_alloc(struct aq_nic_s *aq_nic)
 		goto err_exit;
 	}
 
-	aq_nic->aq_hw_ops->hw_rx_tc_mode_get(aq_nic->aq_hw, &rx_tc_mode);
-	if (rx_tc_mode == 0)
-		rx_ring_idx = PTP_8TC_RING_IDX;
-	else
-		rx_ring_idx = PTP_4TC_RING_IDX;
+	rx_ring_idx = ptp_ring_idx(aq_nic->aq_nic_cfg.tc_mode);
 
 	ring = aq_ring_rx_alloc(&aq_ptp->ptp_rx, aq_nic,
 				rx_ring_idx, &aq_nic->aq_nic_cfg);
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
index 0ff3f6eea022..7caf586ea56c 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
@@ -131,13 +131,16 @@ static int hw_atl_b0_tc_ptp_set(struct aq_hw_s *self)
 
 static int hw_atl_b0_hw_qos_set(struct aq_hw_s *self)
 {
+	struct aq_nic_cfg_s *cfg = self->aq_nic_cfg;
 	u32 tx_buff_size = HW_ATL_B0_TXBUF_MAX;
 	u32 rx_buff_size = HW_ATL_B0_RXBUF_MAX;
 	unsigned int i_priority = 0U;
 	u32 tc = 0U;
 
-	tx_buff_size -= HW_ATL_B0_PTP_TXBUF_SIZE;
-	rx_buff_size -= HW_ATL_B0_PTP_RXBUF_SIZE;
+	if (cfg->is_ptp) {
+		tx_buff_size -= HW_ATL_B0_PTP_TXBUF_SIZE;
+		rx_buff_size -= HW_ATL_B0_PTP_RXBUF_SIZE;
+	}
 
 	/* TPS Descriptor rate init */
 	hw_atl_tps_tx_pkt_shed_desc_rate_curr_time_res_set(self, 0x0U);
@@ -180,7 +183,8 @@ static int hw_atl_b0_hw_qos_set(struct aq_hw_s *self)
 
 	hw_atl_b0_set_fc(self, self->aq_nic_cfg->fc.req, tc);
 
-	hw_atl_b0_tc_ptp_set(self);
+	if (cfg->is_ptp)
+		hw_atl_b0_tc_ptp_set(self);
 
 	/* QoS 802.1p priority -> TC mapping */
 	for (i_priority = 8U; i_priority--;)
@@ -1079,18 +1083,6 @@ int hw_atl_b0_hw_ring_rx_stop(struct aq_hw_s *self, struct aq_ring_s *ring)
 	return aq_hw_err_from_flags(self);
 }
 
-static int hw_atl_b0_tx_tc_mode_get(struct aq_hw_s *self, u32 *tc_mode)
-{
-	*tc_mode = hw_atl_tpb_tps_tx_tc_mode_get(self);
-	return aq_hw_err_from_flags(self);
-}
-
-static int hw_atl_b0_rx_tc_mode_get(struct aq_hw_s *self, u32 *tc_mode)
-{
-	*tc_mode = hw_atl_rpb_rpf_rx_traf_class_mode_get(self);
-	return aq_hw_err_from_flags(self);
-}
-
 #define get_ptp_ts_val_u64(self, indx) \
 	((u64)(hw_atl_pcs_ptp_clock_get(self, indx) & 0xffff))
 
@@ -1508,9 +1500,6 @@ const struct aq_hw_ops hw_atl_ops_b0 = {
 	.hw_get_hw_stats             = hw_atl_utils_get_hw_stats,
 	.hw_get_fw_version           = hw_atl_utils_get_fw_version,
 
-	.hw_tx_tc_mode_get       = hw_atl_b0_tx_tc_mode_get,
-	.hw_rx_tc_mode_get       = hw_atl_b0_rx_tc_mode_get,
-
 	.hw_ring_hwts_rx_fill        = hw_atl_b0_hw_ring_hwts_rx_fill,
 	.hw_ring_hwts_rx_receive     = hw_atl_b0_hw_ring_hwts_rx_receive,
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
index ccdb74562270..a14118550882 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
@@ -91,16 +91,36 @@ static int hw_atl2_hw_reset(struct aq_hw_s *self)
 
 static int hw_atl2_hw_queue_to_tc_map_set(struct aq_hw_s *self)
 {
-	if (!hw_atl_rpb_rpf_rx_traf_class_mode_get(self)) {
-		aq_hw_write_reg(self, HW_ATL2_RX_Q_TC_MAP_ADR(0), 0x11110000);
-		aq_hw_write_reg(self, HW_ATL2_RX_Q_TC_MAP_ADR(8), 0x33332222);
-		aq_hw_write_reg(self, HW_ATL2_RX_Q_TC_MAP_ADR(16), 0x55554444);
-		aq_hw_write_reg(self, HW_ATL2_RX_Q_TC_MAP_ADR(24), 0x77776666);
-	} else {
-		aq_hw_write_reg(self, HW_ATL2_RX_Q_TC_MAP_ADR(0), 0x00000000);
-		aq_hw_write_reg(self, HW_ATL2_RX_Q_TC_MAP_ADR(8), 0x11111111);
-		aq_hw_write_reg(self, HW_ATL2_RX_Q_TC_MAP_ADR(16), 0x22222222);
-		aq_hw_write_reg(self, HW_ATL2_RX_Q_TC_MAP_ADR(24), 0x33333333);
+	struct aq_nic_cfg_s *cfg = self->aq_nic_cfg;
+	unsigned int tcs, q_per_tc;
+	unsigned int tc, q;
+	u32 value = 0;
+
+	switch (cfg->tc_mode) {
+	case AQ_TC_MODE_8TCS:
+		tcs = 8;
+		q_per_tc = 4;
+		break;
+	case AQ_TC_MODE_4TCS:
+		tcs = 4;
+		q_per_tc = 8;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	for (tc = 0; tc != tcs; tc++) {
+		unsigned int tc_q_offset = tc * q_per_tc;
+
+		for (q = tc_q_offset; q != tc_q_offset + q_per_tc; q++)
+			value |= tc << HW_ATL2_RX_Q_TC_MAP_SHIFT(q);
+
+		if (HW_ATL2_RX_Q_TC_MAP_ADR(q) !=
+		    HW_ATL2_RX_Q_TC_MAP_ADR(q - 1)) {
+			aq_hw_write_reg(self, HW_ATL2_RX_Q_TC_MAP_ADR(q - 1),
+					value);
+			value = 0;
+		}
 	}
 
 	return aq_hw_err_from_flags(self);
-- 
cgit v1.2.3-59-g8ed1b


From a83fe6b6ad6b10f6912025ae23bd5c2596a4e7f4 Mon Sep 17 00:00:00 2001
From: Dmitry Bezrukov <dbezrukov@marvell.com>
Date: Fri, 22 May 2020 11:19:40 +0300
Subject: net: atlantic: QoS implementation: multi-TC support

This patch adds multi-TC support.

PTP is automatically disabled when the user enables more than 2 TCs,
otherwise traffic on TC2 won't quite work, because it's reserved for PTP.

Signed-off-by: Dmitry Bezrukov <dbezrukov@marvell.com>
Co-developed-by: Dmitry Bogdanov <dbogdanov@marvell.com>
Signed-off-by: Dmitry Bogdanov <dbogdanov@marvell.com>
Co-developed-by: Mark Starovoytov <mstarovoitov@marvell.com>
Signed-off-by: Mark Starovoytov <mstarovoitov@marvell.com>
Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/aquantia/atlantic/aq_filters.c    |  11 +-
 drivers/net/ethernet/aquantia/atlantic/aq_hw.h     |   1 +
 .../net/ethernet/aquantia/atlantic/aq_hw_utils.c   |  26 +++++
 .../net/ethernet/aquantia/atlantic/aq_hw_utils.h   |   2 +
 drivers/net/ethernet/aquantia/atlantic/aq_main.c   |  39 ++++++-
 drivers/net/ethernet/aquantia/atlantic/aq_nic.c    |  61 ++++++++++-
 drivers/net/ethernet/aquantia/atlantic/aq_nic.h    |  14 ++-
 drivers/net/ethernet/aquantia/atlantic/aq_ring.c   |  19 +++-
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c  |  74 +++++++------
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c |   9 +-
 .../ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c   | 122 ++++++++++++---------
 .../aquantia/atlantic/hw_atl2/hw_atl2_internal.h   |   7 --
 .../aquantia/atlantic/hw_atl2/hw_atl2_llh.c        |   8 ++
 .../aquantia/atlantic/hw_atl2/hw_atl2_llh.h        |   4 +
 .../atlantic/hw_atl2/hw_atl2_llh_internal.h        |  10 ++
 15 files changed, 293 insertions(+), 114 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_filters.c b/drivers/net/ethernet/aquantia/atlantic/aq_filters.c
index 03ff92bc4a7f..1bc4d33a0ce5 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_filters.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_filters.c
@@ -153,6 +153,8 @@ aq_check_approve_fvlan(struct aq_nic_s *aq_nic,
 		       struct aq_hw_rx_fltrs_s *rx_fltrs,
 		       struct ethtool_rx_flow_spec *fsp)
 {
+	struct aq_nic_cfg_s *cfg = &aq_nic->aq_nic_cfg;
+
 	if (fsp->location < AQ_RX_FIRST_LOC_FVLANID ||
 	    fsp->location > AQ_RX_LAST_LOC_FVLANID) {
 		netdev_err(aq_nic->ndev,
@@ -170,10 +172,10 @@ aq_check_approve_fvlan(struct aq_nic_s *aq_nic,
 		return -EINVAL;
 	}
 
-	if (fsp->ring_cookie > aq_nic->aq_nic_cfg.num_rss_queues) {
+	if (fsp->ring_cookie > cfg->num_rss_queues * cfg->tcs) {
 		netdev_err(aq_nic->ndev,
 			   "ethtool: queue number must be in range [0, %d]",
-			   aq_nic->aq_nic_cfg.num_rss_queues - 1);
+			   cfg->num_rss_queues * cfg->tcs - 1);
 		return -EINVAL;
 	}
 	return 0;
@@ -262,6 +264,7 @@ static bool __must_check
 aq_rule_is_not_correct(struct aq_nic_s *aq_nic,
 		       struct ethtool_rx_flow_spec *fsp)
 {
+	struct aq_nic_cfg_s *cfg = &aq_nic->aq_nic_cfg;
 	bool rule_is_not_correct = false;
 
 	if (!aq_nic) {
@@ -274,11 +277,11 @@ aq_rule_is_not_correct(struct aq_nic_s *aq_nic,
 	} else if (aq_check_filter(aq_nic, fsp)) {
 		rule_is_not_correct = true;
 	} else if (fsp->ring_cookie != RX_CLS_FLOW_DISC) {
-		if (fsp->ring_cookie >= aq_nic->aq_nic_cfg.num_rss_queues) {
+		if (fsp->ring_cookie >= cfg->num_rss_queues * cfg->tcs) {
 			netdev_err(aq_nic->ndev,
 				   "ethtool: The specified action is invalid.\n"
 				   "Maximum allowable value action is %u.\n",
-				   aq_nic->aq_nic_cfg.num_rss_queues - 1);
+				   cfg->num_rss_queues * cfg->tcs - 1);
 			rule_is_not_correct = true;
 		}
 	}
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
index c3df9da6088c..1dccaaee04b3 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
@@ -124,6 +124,7 @@ struct aq_stats_s {
 #define AQ_HW_TXD_MULTIPLE 8U
 #define AQ_HW_RXD_MULTIPLE 8U
 
+#define AQ_HW_QUEUES_MAX                32U
 #define AQ_HW_MULTICAST_ADDRESS_MAX     32U
 
 #define AQ_HW_PTP_TC                    2U
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw_utils.c b/drivers/net/ethernet/aquantia/atlantic/aq_hw_utils.c
index 7dbf49adcea6..342c5179f846 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_hw_utils.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw_utils.c
@@ -79,3 +79,29 @@ int aq_hw_err_from_flags(struct aq_hw_s *hw)
 err_exit:
 	return err;
 }
+
+int aq_hw_num_tcs(struct aq_hw_s *hw)
+{
+	switch (hw->aq_nic_cfg->tc_mode) {
+	case AQ_TC_MODE_8TCS:
+		return 8;
+	case AQ_TC_MODE_4TCS:
+		return 4;
+	default:
+		break;
+	}
+
+	return 1;
+}
+
+int aq_hw_q_per_tc(struct aq_hw_s *hw)
+{
+	switch (hw->aq_nic_cfg->tc_mode) {
+	case AQ_TC_MODE_8TCS:
+		return 4;
+	case AQ_TC_MODE_4TCS:
+		return 8;
+	default:
+		return 4;
+	}
+}
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw_utils.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw_utils.h
index 9ef82d487e01..32aa5f2fb840 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_hw_utils.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw_utils.h
@@ -34,5 +34,7 @@ u32 aq_hw_read_reg(struct aq_hw_s *hw, u32 reg);
 void aq_hw_write_reg(struct aq_hw_s *hw, u32 reg, u32 value);
 u64 aq_hw_read_reg64(struct aq_hw_s *hw, u32 reg);
 int aq_hw_err_from_flags(struct aq_hw_s *hw);
+int aq_hw_num_tcs(struct aq_hw_s *hw);
+int aq_hw_q_per_tc(struct aq_hw_s *hw);
 
 #endif /* AQ_HW_UTILS_H */
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.c b/drivers/net/ethernet/aquantia/atlantic/aq_main.c
index 9fcab646cbd5..ef9e969fbf7a 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_main.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.c
@@ -12,11 +12,13 @@
 #include "aq_ethtool.h"
 #include "aq_ptp.h"
 #include "aq_filters.h"
+#include "aq_hw_utils.h"
 
 #include <linux/netdevice.h>
 #include <linux/module.h>
 #include <linux/ip.h>
 #include <linux/udp.h>
+#include <net/pkt_cls.h>
 
 MODULE_LICENSE("GPL v2");
 MODULE_AUTHOR(AQ_CFG_DRV_AUTHOR);
@@ -38,7 +40,7 @@ struct net_device *aq_ndev_alloc(void)
 	struct net_device *ndev = NULL;
 	struct aq_nic_s *aq_nic = NULL;
 
-	ndev = alloc_etherdev_mq(sizeof(struct aq_nic_s), AQ_CFG_VECS_MAX);
+	ndev = alloc_etherdev_mq(sizeof(struct aq_nic_s), AQ_HW_QUEUES_MAX);
 	if (!ndev)
 		return NULL;
 
@@ -330,6 +332,40 @@ static int aq_ndo_vlan_rx_kill_vid(struct net_device *ndev, __be16 proto,
 	return 0;
 }
 
+static int aq_validate_mqprio_opt(struct aq_nic_s *self,
+				  const unsigned int num_tc)
+{
+	if (num_tc > aq_hw_num_tcs(self->aq_hw)) {
+		netdev_err(self->ndev, "Too many TCs requested\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (num_tc != 0 && !is_power_of_2(num_tc)) {
+		netdev_err(self->ndev, "TC count should be power of 2\n");
+		return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int aq_ndo_setup_tc(struct net_device *dev, enum tc_setup_type type,
+			   void *type_data)
+{
+	struct aq_nic_s *aq_nic = netdev_priv(dev);
+	struct tc_mqprio_qopt *mqprio = type_data;
+	int err;
+
+	if (type != TC_SETUP_QDISC_MQPRIO)
+		return -EOPNOTSUPP;
+
+	err = aq_validate_mqprio_opt(aq_nic, mqprio->num_tc);
+	if (err)
+		return err;
+
+	return aq_nic_setup_tc_mqprio(aq_nic, mqprio->num_tc,
+				      mqprio->prio_tc_map);
+}
+
 static const struct net_device_ops aq_ndev_ops = {
 	.ndo_open = aq_ndev_open,
 	.ndo_stop = aq_ndev_close,
@@ -341,6 +377,7 @@ static const struct net_device_ops aq_ndev_ops = {
 	.ndo_do_ioctl = aq_ndev_ioctl,
 	.ndo_vlan_rx_add_vid = aq_ndo_vlan_rx_add_vid,
 	.ndo_vlan_rx_kill_vid = aq_ndo_vlan_rx_kill_vid,
+	.ndo_setup_tc = aq_ndo_setup_tc,
 };
 
 static int __init aq_ndev_init_module(void)
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index 3eeb652068e2..da781082be32 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -26,6 +26,7 @@
 #include <linux/ip.h>
 #include <linux/tcp.h>
 #include <net/ip.h>
+#include <net/pkt_cls.h>
 
 static unsigned int aq_itr = AQ_CFG_INTERRUPT_MODERATION_AUTO;
 module_param_named(aq_itr, aq_itr, uint, 0644);
@@ -68,6 +69,7 @@ static void aq_nic_rss_init(struct aq_nic_s *self, unsigned int num_rss_queues)
 void aq_nic_cfg_start(struct aq_nic_s *self)
 {
 	struct aq_nic_cfg_s *cfg = &self->aq_nic_cfg;
+	int i;
 
 	cfg->tcs = AQ_CFG_TCS_DEF;
 
@@ -142,6 +144,9 @@ void aq_nic_cfg_start(struct aq_nic_s *self)
 	cfg->is_vlan_rx_strip = !!(cfg->features & NETIF_F_HW_VLAN_CTAG_RX);
 	cfg->is_vlan_tx_insert = !!(cfg->features & NETIF_F_HW_VLAN_CTAG_TX);
 	cfg->is_vlan_force_promisc = true;
+
+	for (i = 0; i < sizeof(cfg->prio_tc_map); i++)
+		cfg->prio_tc_map[i] = cfg->tcs * i / 8;
 }
 
 static int aq_nic_update_link_status(struct aq_nic_s *self)
@@ -517,14 +522,21 @@ int aq_nic_start(struct aq_nic_s *self)
 			goto err_exit;
 	}
 
-	err = netif_set_real_num_tx_queues(self->ndev, self->aq_vecs);
+	err = netif_set_real_num_tx_queues(self->ndev,
+					   self->aq_vecs * cfg->tcs);
 	if (err < 0)
 		goto err_exit;
 
-	err = netif_set_real_num_rx_queues(self->ndev, self->aq_vecs);
+	err = netif_set_real_num_rx_queues(self->ndev,
+					   self->aq_vecs * cfg->tcs);
 	if (err < 0)
 		goto err_exit;
 
+	for (i = 0; i < cfg->tcs; i++) {
+		u16 offset = self->aq_vecs * i;
+
+		netdev_set_tc_queue(self->ndev, i, self->aq_vecs, offset);
+	}
 	netif_tx_start_all_queues(self->ndev);
 
 err_exit:
@@ -690,10 +702,10 @@ exit:
 int aq_nic_xmit(struct aq_nic_s *self, struct sk_buff *skb)
 {
 	unsigned int vec = skb->queue_mapping % self->aq_nic_cfg.vecs;
+	unsigned int tc = skb->queue_mapping / self->aq_nic_cfg.vecs;
 	struct aq_ring_s *ring = NULL;
 	unsigned int frags = 0U;
 	int err = NETDEV_TX_OK;
-	unsigned int tc = 0U;
 
 	frags = skb_shinfo(skb)->nr_frags + 1;
 
@@ -712,7 +724,8 @@ int aq_nic_xmit(struct aq_nic_s *self, struct sk_buff *skb)
 	}
 
 	/* Above status update may stop the queue. Check this. */
-	if (__netif_subqueue_stopped(self->ndev, ring->idx)) {
+	if (__netif_subqueue_stopped(self->ndev,
+				     AQ_NIC_RING2QMAP(self, ring->idx))) {
 		err = NETDEV_TX_BUSY;
 		goto err_exit;
 	}
@@ -1266,3 +1279,43 @@ void aq_nic_release_filter(struct aq_nic_s *self, enum aq_rx_filter_type type,
 		break;
 	}
 }
+
+int aq_nic_setup_tc_mqprio(struct aq_nic_s *self, u32 tcs, u8 *prio_tc_map)
+{
+	struct aq_nic_cfg_s *cfg = &self->aq_nic_cfg;
+	bool ndev_running;
+	int err = 0;
+	int i;
+
+	/* if already the same configuration or
+	 * disable request (tcs is 0) and we already is disabled
+	 */
+	if (tcs == cfg->tcs || (tcs == 0 && !cfg->is_qos))
+		return 0;
+
+	ndev_running = netif_running(self->ndev);
+	if (ndev_running)
+		dev_close(self->ndev);
+
+	cfg->tcs = tcs;
+	if (cfg->tcs == 0)
+		cfg->tcs = 1;
+	if (prio_tc_map)
+		memcpy(cfg->prio_tc_map, prio_tc_map, sizeof(cfg->prio_tc_map));
+	else
+		for (i = 0; i < sizeof(cfg->prio_tc_map); i++)
+			cfg->prio_tc_map[i] = cfg->tcs * i / 8;
+
+	cfg->is_qos = (tcs != 0 ? true : false);
+	cfg->is_ptp = (cfg->tcs <= AQ_HW_PTP_TC);
+	if (!cfg->is_ptp)
+		netdev_warn(self->ndev, "%s\n",
+			    "PTP is auto disabled due to requested TC count.");
+
+	netdev_set_num_tc(self->ndev, cfg->tcs);
+
+	if (ndev_running)
+		err = dev_open(self->ndev, NULL);
+
+	return err;
+}
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
index 3434f8206823..29e129411945 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
@@ -59,10 +59,12 @@ struct aq_nic_cfg_s {
 	bool is_polling;
 	bool is_rss;
 	bool is_lro;
+	bool is_qos;
 	bool is_ptp;
 	enum aq_tc_mode tc_mode;
 	u32 priv_flags;
 	u8  tcs;
+	u8 prio_tc_map[8];
 	struct aq_rss_parameters aq_rss;
 	u32 eee_speeds;
 };
@@ -79,8 +81,15 @@ struct aq_nic_cfg_s {
 #define AQ_NIC_WOL_MODES        (WAKE_MAGIC |\
 				 WAKE_PHY)
 
+#define AQ_NIC_RING_PER_TC(_NIC_) \
+	(((_NIC_)->aq_nic_cfg.tc_mode == AQ_TC_MODE_4TCS) ? 8 : 4)
+
 #define AQ_NIC_TCVEC2RING(_NIC_, _TC_, _VEC_) \
-	((_TC_) * AQ_CFG_TCS_MAX + (_VEC_))
+	((_TC_) * AQ_NIC_RING_PER_TC(_NIC_) + (_VEC_))
+
+#define AQ_NIC_RING2QMAP(_NIC_, _ID_) \
+	((_ID_) / AQ_NIC_RING_PER_TC(_NIC_) * (_NIC_)->aq_vecs + \
+	((_ID_) % AQ_NIC_RING_PER_TC(_NIC_)))
 
 struct aq_hw_rx_fl2 {
 	struct aq_rx_filter_vlan aq_vlans[AQ_VLAN_MAX_FILTERS];
@@ -106,7 +115,7 @@ struct aq_nic_s {
 	atomic_t flags;
 	u32 msg_enable;
 	struct aq_vec_s *aq_vec[AQ_CFG_VECS_MAX];
-	struct aq_ring_s *aq_ring_tx[AQ_CFG_VECS_MAX * AQ_CFG_TCS_MAX];
+	struct aq_ring_s *aq_ring_tx[AQ_HW_QUEUES_MAX];
 	struct aq_hw_s *aq_hw;
 	struct net_device *ndev;
 	unsigned int aq_vecs;
@@ -183,4 +192,5 @@ void aq_nic_shutdown(struct aq_nic_s *self);
 u8 aq_nic_reserve_filter(struct aq_nic_s *self, enum aq_rx_filter_type type);
 void aq_nic_release_filter(struct aq_nic_s *self, enum aq_rx_filter_type type,
 			   u32 location);
+int aq_nic_setup_tc_mqprio(struct aq_nic_s *self, u32 tcs, u8 *prio_tc_map);
 #endif /* AQ_NIC_H */
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
index bae95a618560..68fdb3994088 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ring.c
@@ -232,8 +232,11 @@ void aq_ring_queue_wake(struct aq_ring_s *ring)
 {
 	struct net_device *ndev = aq_nic_get_ndev(ring->aq_nic);
 
-	if (__netif_subqueue_stopped(ndev, ring->idx)) {
-		netif_wake_subqueue(ndev, ring->idx);
+	if (__netif_subqueue_stopped(ndev,
+				     AQ_NIC_RING2QMAP(ring->aq_nic,
+						      ring->idx))) {
+		netif_wake_subqueue(ndev,
+				    AQ_NIC_RING2QMAP(ring->aq_nic, ring->idx));
 		ring->stats.tx.queue_restarts++;
 	}
 }
@@ -242,8 +245,11 @@ void aq_ring_queue_stop(struct aq_ring_s *ring)
 {
 	struct net_device *ndev = aq_nic_get_ndev(ring->aq_nic);
 
-	if (!__netif_subqueue_stopped(ndev, ring->idx))
-		netif_stop_subqueue(ndev, ring->idx);
+	if (!__netif_subqueue_stopped(ndev,
+				      AQ_NIC_RING2QMAP(ring->aq_nic,
+						       ring->idx)))
+		netif_stop_subqueue(ndev,
+				    AQ_NIC_RING2QMAP(ring->aq_nic, ring->idx));
 }
 
 bool aq_ring_tx_clean(struct aq_ring_s *self)
@@ -466,7 +472,10 @@ int aq_ring_rx_clean(struct aq_ring_s *self,
 			     buff->is_hash_l4 ? PKT_HASH_TYPE_L4 :
 			     PKT_HASH_TYPE_NONE);
 		/* Send all PTP traffic to 0 queue */
-		skb_record_rx_queue(skb, is_ptp_ring ? 0 : self->idx);
+		skb_record_rx_queue(skb,
+				    is_ptp_ring ? 0
+						: AQ_NIC_RING2QMAP(self->aq_nic,
+								   self->idx));
 
 		++self->stats.rx.packets;
 		self->stats.rx.bytes += skb->len;
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
index 7caf586ea56c..775382440b47 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
@@ -46,7 +46,8 @@
 			NETIF_F_HW_VLAN_CTAG_RX |     \
 			NETIF_F_HW_VLAN_CTAG_TX |     \
 			NETIF_F_GSO_UDP_L4      |     \
-			NETIF_F_GSO_PARTIAL,          \
+			NETIF_F_GSO_PARTIAL |         \
+			NETIF_F_HW_TC,                \
 	.hw_priv_flags = IFF_UNICAST_FLT, \
 	.flow_control = true,		  \
 	.mtu = HW_ATL_B0_MTU_JUMBO,	  \
@@ -134,7 +135,7 @@ static int hw_atl_b0_hw_qos_set(struct aq_hw_s *self)
 	struct aq_nic_cfg_s *cfg = self->aq_nic_cfg;
 	u32 tx_buff_size = HW_ATL_B0_TXBUF_MAX;
 	u32 rx_buff_size = HW_ATL_B0_RXBUF_MAX;
-	unsigned int i_priority = 0U;
+	unsigned int prio = 0U;
 	u32 tc = 0U;
 
 	if (cfg->is_ptp) {
@@ -153,42 +154,45 @@ static int hw_atl_b0_hw_qos_set(struct aq_hw_s *self)
 	hw_atl_tps_tx_pkt_shed_desc_tc_arb_mode_set(self, 0U);
 	hw_atl_tps_tx_pkt_shed_data_arb_mode_set(self, 0U);
 
-	/* TX Packet Scheduler Data TC0 */
-	hw_atl_tps_tx_pkt_shed_tc_data_max_credit_set(self, 0xFFF, tc);
-	hw_atl_tps_tx_pkt_shed_tc_data_weight_set(self, 0x64, tc);
-	hw_atl_tps_tx_pkt_shed_desc_tc_max_credit_set(self, 0x50, tc);
-	hw_atl_tps_tx_pkt_shed_desc_tc_weight_set(self, 0x1E, tc);
-
-	/* Tx buf size TC0 */
-	hw_atl_tpb_tx_pkt_buff_size_per_tc_set(self, tx_buff_size, tc);
-	hw_atl_tpb_tx_buff_hi_threshold_per_tc_set(self,
-						   (tx_buff_size *
-						   (1024 / 32U) * 66U) /
-						   100U, tc);
-	hw_atl_tpb_tx_buff_lo_threshold_per_tc_set(self,
-						   (tx_buff_size *
-						   (1024 / 32U) * 50U) /
-						   100U, tc);
-
-	/* QoS Rx buf size per TC */
-	hw_atl_rpb_rx_pkt_buff_size_per_tc_set(self, rx_buff_size, tc);
-	hw_atl_rpb_rx_buff_hi_threshold_per_tc_set(self,
-						   (rx_buff_size *
-						   (1024U / 32U) * 66U) /
-						   100U, tc);
-	hw_atl_rpb_rx_buff_lo_threshold_per_tc_set(self,
-						   (rx_buff_size *
-						   (1024U / 32U) * 50U) /
-						   100U, tc);
-
-	hw_atl_b0_set_fc(self, self->aq_nic_cfg->fc.req, tc);
+	tx_buff_size /= cfg->tcs;
+	rx_buff_size /= cfg->tcs;
+	for (tc = 0; tc < cfg->tcs; tc++) {
+		u32 threshold = 0U;
+
+		/* TX Packet Scheduler Data TC0 */
+		hw_atl_tps_tx_pkt_shed_tc_data_max_credit_set(self, 0xFFF, tc);
+		hw_atl_tps_tx_pkt_shed_tc_data_weight_set(self, 0x64, tc);
+		hw_atl_tps_tx_pkt_shed_desc_tc_max_credit_set(self, 0x50, tc);
+		hw_atl_tps_tx_pkt_shed_desc_tc_weight_set(self, 0x1E, tc);
+
+		/* Tx buf size TC0 */
+		hw_atl_tpb_tx_pkt_buff_size_per_tc_set(self, tx_buff_size, tc);
+
+		threshold = (tx_buff_size * (1024 / 32U) * 66U) / 100U;
+		hw_atl_tpb_tx_buff_hi_threshold_per_tc_set(self, threshold, tc);
+
+		threshold = (tx_buff_size * (1024 / 32U) * 50U) / 100U;
+		hw_atl_tpb_tx_buff_lo_threshold_per_tc_set(self, threshold, tc);
+
+		/* QoS Rx buf size per TC */
+		hw_atl_rpb_rx_pkt_buff_size_per_tc_set(self, rx_buff_size, tc);
+
+		threshold = (rx_buff_size * (1024U / 32U) * 66U) / 100U;
+		hw_atl_rpb_rx_buff_hi_threshold_per_tc_set(self, threshold, tc);
+
+		threshold = (rx_buff_size * (1024U / 32U) * 50U) / 100U;
+		hw_atl_rpb_rx_buff_lo_threshold_per_tc_set(self, threshold, tc);
+
+		hw_atl_b0_set_fc(self, self->aq_nic_cfg->fc.req, tc);
+	}
 
 	if (cfg->is_ptp)
 		hw_atl_b0_tc_ptp_set(self);
 
 	/* QoS 802.1p priority -> TC mapping */
-	for (i_priority = 8U; i_priority--;)
-		hw_atl_rpf_rpb_user_priority_tc_map_set(self, i_priority, 0U);
+	for (prio = 0; prio < 8; ++prio)
+		hw_atl_rpf_rpb_user_priority_tc_map_set(self, prio,
+							cfg->prio_tc_map[prio]);
 
 	return aq_hw_err_from_flags(self);
 }
@@ -319,7 +323,7 @@ int hw_atl_b0_hw_offload_set(struct aq_hw_s *self,
 static int hw_atl_b0_hw_init_tx_path(struct aq_hw_s *self)
 {
 	/* Tx TC/Queue number config */
-	hw_atl_tpb_tps_tx_tc_mode_set(self, 1U);
+	hw_atl_tpb_tps_tx_tc_mode_set(self, self->aq_nic_cfg->tc_mode);
 
 	hw_atl_thm_lso_tcp_flag_of_first_pkt_set(self, 0x0FF6U);
 	hw_atl_thm_lso_tcp_flag_of_middle_pkt_set(self, 0x0FF6U);
@@ -345,7 +349,7 @@ static int hw_atl_b0_hw_init_rx_path(struct aq_hw_s *self)
 	int i;
 
 	/* Rx TC/RSS number config */
-	hw_atl_rpb_rpf_rx_traf_class_mode_set(self, 1U);
+	hw_atl_rpb_rpf_rx_traf_class_mode_set(self, cfg->tc_mode);
 
 	/* Rx flow control */
 	hw_atl_rpb_rx_flow_ctl_mode_set(self, 1U);
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c
index 9e2d01a6aac8..8cb6765a1398 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c
@@ -754,7 +754,7 @@ void hw_atl_rpfl2_accept_all_mc_packets_set(struct aq_hw_s *aq_hw,
 }
 
 void hw_atl_rpf_rpb_user_priority_tc_map_set(struct aq_hw_s *aq_hw,
-					     u32 user_priority_tc_map, u32 tc)
+					     u32 user_priority, u32 tc)
 {
 /* register address for bitfield rx_tc_up{t}[2:0] */
 	static u32 rpf_rpb_rx_tc_upt_adr[8] = {
@@ -773,10 +773,9 @@ void hw_atl_rpf_rpb_user_priority_tc_map_set(struct aq_hw_s *aq_hw,
 			0U, 4U, 8U, 12U, 16U, 20U, 24U, 28U
 		};
 
-	aq_hw_write_reg_bit(aq_hw, rpf_rpb_rx_tc_upt_adr[tc],
-			    rpf_rpb_rx_tc_upt_msk[tc],
-			    rpf_rpb_rx_tc_upt_shft[tc],
-			    user_priority_tc_map);
+	aq_hw_write_reg_bit(aq_hw, rpf_rpb_rx_tc_upt_adr[user_priority],
+			    rpf_rpb_rx_tc_upt_msk[user_priority],
+			    rpf_rpb_rx_tc_upt_shft[user_priority], tc);
 }
 
 void hw_atl_rpf_rss_key_addr_set(struct aq_hw_s *aq_hw, u32 rss_key_addr)
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
index a14118550882..05c049661b2e 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
@@ -47,7 +47,8 @@ static int hw_atl2_act_rslvr_table_set(struct aq_hw_s *self, u8 location,
 			NETIF_F_HW_VLAN_CTAG_RX |     \
 			NETIF_F_HW_VLAN_CTAG_TX |     \
 			NETIF_F_GSO_UDP_L4      |     \
-			NETIF_F_GSO_PARTIAL,          \
+			NETIF_F_GSO_PARTIAL     |     \
+			NETIF_F_HW_TC,                \
 	.hw_priv_flags = IFF_UNICAST_FLT, \
 	.flow_control = true,		  \
 	.mtu = HW_ATL2_MTU_JUMBO,	  \
@@ -132,7 +133,6 @@ static int hw_atl2_hw_qos_set(struct aq_hw_s *self)
 	u32 tx_buff_size = HW_ATL2_TXBUF_MAX;
 	u32 rx_buff_size = HW_ATL2_RXBUF_MAX;
 	unsigned int prio = 0U;
-	u32 threshold = 0U;
 	u32 tc = 0U;
 
 	/* TPS Descriptor rate init */
@@ -146,34 +146,41 @@ static int hw_atl2_hw_qos_set(struct aq_hw_s *self)
 	hw_atl_tps_tx_pkt_shed_desc_tc_arb_mode_set(self, 0U);
 	hw_atl_tps_tx_pkt_shed_data_arb_mode_set(self, 0U);
 
-	/* TX Packet Scheduler Data TC0 */
-	hw_atl2_tps_tx_pkt_shed_tc_data_max_credit_set(self, 0xFFF0, tc);
-	hw_atl2_tps_tx_pkt_shed_tc_data_weight_set(self, 0x640, tc);
-	hw_atl_tps_tx_pkt_shed_desc_tc_max_credit_set(self, 0x50, tc);
-	hw_atl_tps_tx_pkt_shed_desc_tc_weight_set(self, 0x1E, tc);
+	tx_buff_size /= cfg->tcs;
+	rx_buff_size /= cfg->tcs;
+	for (tc = 0; tc < cfg->tcs; tc++) {
+		u32 threshold = 0U;
 
-	/* Tx buf size TC0 */
-	hw_atl_tpb_tx_pkt_buff_size_per_tc_set(self, tx_buff_size, tc);
+		/* TX Packet Scheduler Data TC0 */
+		hw_atl2_tps_tx_pkt_shed_tc_data_max_credit_set(self, 0xFFF0,
+							       tc);
+		hw_atl2_tps_tx_pkt_shed_tc_data_weight_set(self, 0x640, tc);
+		hw_atl_tps_tx_pkt_shed_desc_tc_max_credit_set(self, 0x50, tc);
+		hw_atl_tps_tx_pkt_shed_desc_tc_weight_set(self, 0x1E, tc);
 
-	threshold = (tx_buff_size * (1024 / 32U) * 66U) / 100U;
-	hw_atl_tpb_tx_buff_hi_threshold_per_tc_set(self, threshold, tc);
+		/* Tx buf size TC0 */
+		hw_atl_tpb_tx_pkt_buff_size_per_tc_set(self, tx_buff_size, tc);
 
-	threshold = (tx_buff_size * (1024 / 32U) * 50U) / 100U;
-	hw_atl_tpb_tx_buff_lo_threshold_per_tc_set(self, threshold, tc);
+		threshold = (tx_buff_size * (1024 / 32U) * 66U) / 100U;
+		hw_atl_tpb_tx_buff_hi_threshold_per_tc_set(self, threshold, tc);
 
-	/* QoS Rx buf size per TC */
-	hw_atl_rpb_rx_pkt_buff_size_per_tc_set(self, rx_buff_size, tc);
+		threshold = (tx_buff_size * (1024 / 32U) * 50U) / 100U;
+		hw_atl_tpb_tx_buff_lo_threshold_per_tc_set(self, threshold, tc);
 
-	threshold = (rx_buff_size * (1024U / 32U) * 66U) / 100U;
-	hw_atl_rpb_rx_buff_hi_threshold_per_tc_set(self, threshold, tc);
+		/* QoS Rx buf size per TC */
+		hw_atl_rpb_rx_pkt_buff_size_per_tc_set(self, rx_buff_size, tc);
 
-	threshold = (rx_buff_size * (1024U / 32U) * 50U) / 100U;
-	hw_atl_rpb_rx_buff_lo_threshold_per_tc_set(self, threshold, tc);
+		threshold = (rx_buff_size * (1024U / 32U) * 66U) / 100U;
+		hw_atl_rpb_rx_buff_hi_threshold_per_tc_set(self, threshold, tc);
+
+		threshold = (rx_buff_size * (1024U / 32U) * 50U) / 100U;
+		hw_atl_rpb_rx_buff_lo_threshold_per_tc_set(self, threshold, tc);
+	}
 
 	/* QoS 802.1p priority -> TC mapping */
 	for (prio = 0; prio < 8; ++prio)
 		hw_atl_rpf_rpb_user_priority_tc_map_set(self, prio,
-							cfg->tcs * prio / 8);
+							cfg->prio_tc_map[prio]);
 
 	/* ATL2 Apply legacy ring to TC mapping */
 	hw_atl2_hw_queue_to_tc_map_set(self);
@@ -184,11 +191,24 @@ static int hw_atl2_hw_qos_set(struct aq_hw_s *self)
 static int hw_atl2_hw_rss_set(struct aq_hw_s *self,
 			      struct aq_rss_parameters *rss_params)
 {
-	u8 *indirection_table =	rss_params->indirection_table;
+	u8 *indirection_table = rss_params->indirection_table;
+	const u32 num_tcs = aq_hw_num_tcs(self);
+	u32 rpf_redir2_enable;
+	int tc;
 	int i;
 
-	for (i = HW_ATL2_RSS_REDIRECTION_MAX; i--;)
-		hw_atl2_new_rpf_rss_redir_set(self, 0, i, indirection_table[i]);
+	rpf_redir2_enable = num_tcs > 4 ? 1 : 0;
+
+	hw_atl2_rpf_redirection_table2_select_set(self, rpf_redir2_enable);
+
+	for (i = HW_ATL2_RSS_REDIRECTION_MAX; i--;) {
+		for (tc = 0; tc != num_tcs; tc++) {
+			hw_atl2_new_rpf_rss_redir_set(self, tc, i,
+						      tc *
+						      aq_hw_q_per_tc(self) +
+						      indirection_table[i]);
+		}
+	}
 
 	return aq_hw_err_from_flags(self);
 }
@@ -196,7 +216,7 @@ static int hw_atl2_hw_rss_set(struct aq_hw_s *self,
 static int hw_atl2_hw_init_tx_path(struct aq_hw_s *self)
 {
 	/* Tx TC/RSS number config */
-	hw_atl_tpb_tps_tx_tc_mode_set(self, 1U);
+	hw_atl_tpb_tps_tx_tc_mode_set(self, self->aq_nic_cfg->tc_mode);
 
 	hw_atl_thm_lso_tcp_flag_of_first_pkt_set(self, 0x0FF6U);
 	hw_atl_thm_lso_tcp_flag_of_middle_pkt_set(self, 0x0FF6U);
@@ -219,13 +239,29 @@ static int hw_atl2_hw_init_tx_path(struct aq_hw_s *self)
 static void hw_atl2_hw_init_new_rx_filters(struct aq_hw_s *self)
 {
 	struct hw_atl2_priv *priv = (struct hw_atl2_priv *)self->priv;
+	u8 *prio_tc_map = self->aq_nic_cfg->prio_tc_map;
+	u16 action;
 	u8 index;
+	int i;
 
+	/* Action Resolver Table (ART) is used by RPF to decide which action
+	 * to take with a packet based upon input tag and tag mask, where:
+	 *  - input tag is a combination of 3-bit VLan Prio (PTP) and
+	 *    29-bit concatenation of all tags from filter block;
+	 *  - tag mask is a mask used for matching against input tag.
+	 * The input_tag is compared with the all the Requested_tags in the
+	 * Record table to find a match. Action field of the selected matched
+	 * REC entry is used for further processing. If multiple entries match,
+	 * the lowest REC entry, Action field will be selected.
+	 */
 	hw_atl2_rpf_act_rslvr_section_en_set(self, 0xFFFF);
 	hw_atl2_rpfl2_uc_flr_tag_set(self, HW_ATL2_RPF_TAG_BASE_UC,
 				     HW_ATL2_MAC_UC);
 	hw_atl2_rpfl2_bc_flr_tag_set(self, HW_ATL2_RPF_TAG_BASE_UC);
 
+	/* FW reserves the beginning of ART, thus all driver entries must
+	 * start from the offset specified in FW caps.
+	 */
 	index = priv->art_base_index + HW_ATL2_RPF_L2_PROMISC_OFF_INDEX;
 	hw_atl2_act_rslvr_table_set(self, index, 0,
 				    HW_ATL2_RPF_TAG_UC_MASK |
@@ -238,33 +274,17 @@ static void hw_atl2_hw_init_new_rx_filters(struct aq_hw_s *self)
 					HW_ATL2_RPF_TAG_UNTAG_MASK,
 				    HW_ATL2_ACTION_DROP);
 
-	index = priv->art_base_index + HW_ATL2_RPF_VLAN_INDEX;
-	hw_atl2_act_rslvr_table_set(self, index, HW_ATL2_RPF_TAG_BASE_VLAN,
-				    HW_ATL2_RPF_TAG_VLAN_MASK,
-				    HW_ATL2_ACTION_ASSIGN_TC(0));
+	/* Configure ART to map given VLan Prio (PCP) to the TC index for
+	 * RSS redirection table.
+	 */
+	for (i = 0; i < 8; i++) {
+		action = HW_ATL2_ACTION_ASSIGN_TC(prio_tc_map[i]);
 
-	index = priv->art_base_index + HW_ATL2_RPF_MAC_INDEX;
-	hw_atl2_act_rslvr_table_set(self, index, HW_ATL2_RPF_TAG_BASE_UC,
-				    HW_ATL2_RPF_TAG_UC_MASK,
-				    HW_ATL2_ACTION_ASSIGN_TC(0));
-
-	index = priv->art_base_index + HW_ATL2_RPF_ALLMC_INDEX;
-	hw_atl2_act_rslvr_table_set(self, index, HW_ATL2_RPF_TAG_BASE_ALLMC,
-				    HW_ATL2_RPF_TAG_ALLMC_MASK,
-				    HW_ATL2_ACTION_ASSIGN_TC(0));
-
-	index = priv->art_base_index + HW_ATL2_RPF_UNTAG_INDEX;
-	hw_atl2_act_rslvr_table_set(self, index, HW_ATL2_RPF_TAG_UNTAG_MASK,
-				    HW_ATL2_RPF_TAG_UNTAG_MASK,
-				    HW_ATL2_ACTION_ASSIGN_TC(0));
-
-	index = priv->art_base_index + HW_ATL2_RPF_VLAN_PROMISC_ON_INDEX;
-	hw_atl2_act_rslvr_table_set(self, index, 0, HW_ATL2_RPF_TAG_VLAN_MASK,
-				    HW_ATL2_ACTION_DISABLE);
-
-	index = priv->art_base_index + HW_ATL2_RPF_L2_PROMISC_ON_INDEX;
-	hw_atl2_act_rslvr_table_set(self, index, 0, HW_ATL2_RPF_TAG_UC_MASK,
-				    HW_ATL2_ACTION_DISABLE);
+		index = priv->art_base_index + HW_ATL2_RPF_PCP_TO_TC_INDEX + i;
+		hw_atl2_act_rslvr_table_set(self, index,
+					    i << HW_ATL2_RPF_TAG_PCP_OFFSET,
+					    HW_ATL2_RPF_TAG_PCP_MASK, action);
+	}
 }
 
 static void hw_atl2_hw_new_rx_filter_vlan_promisc(struct aq_hw_s *self,
@@ -327,7 +347,7 @@ static int hw_atl2_hw_init_rx_path(struct aq_hw_s *self)
 	int i;
 
 	/* Rx TC/RSS number config */
-	hw_atl_rpb_rpf_rx_traf_class_mode_set(self, 1U);
+	hw_atl_rpb_rpf_rx_traf_class_mode_set(self, cfg->tc_mode);
 
 	/* Rx flow control */
 	hw_atl_rpb_rx_flow_ctl_mode_set(self, 1U);
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_internal.h
index be0c049ea582..9ac1979a4867 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_internal.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_internal.h
@@ -82,13 +82,6 @@ enum HW_ATL2_RPF_ART_INDEX {
 	HW_ATL2_RPF_VLAN_USER_INDEX	= HW_ATL2_RPF_ET_PCP_USER_INDEX + 16,
 	HW_ATL2_RPF_PCP_TO_TC_INDEX	= HW_ATL2_RPF_VLAN_USER_INDEX +
 					  HW_ATL_VLAN_MAX_FILTERS,
-	HW_ATL2_RPF_VLAN_INDEX		= HW_ATL2_RPF_PCP_TO_TC_INDEX +
-					  AQ_CFG_TCS_MAX,
-	HW_ATL2_RPF_MAC_INDEX,
-	HW_ATL2_RPF_ALLMC_INDEX,
-	HW_ATL2_RPF_UNTAG_INDEX,
-	HW_ATL2_RPF_VLAN_PROMISC_ON_INDEX,
-	HW_ATL2_RPF_L2_PROMISC_ON_INDEX,
 };
 
 #define HW_ATL2_ACTION(ACTION, RSS, INDEX, VALID) \
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.c
index e779d70fde66..f096d0a6bda9 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.c
@@ -7,6 +7,14 @@
 #include "hw_atl2_llh_internal.h"
 #include "aq_hw_utils.h"
 
+void hw_atl2_rpf_redirection_table2_select_set(struct aq_hw_s *aq_hw,
+					       u32 select)
+{
+	aq_hw_write_reg_bit(aq_hw, HW_ATL2_RPF_PIF_RPF_REDIR2_ENI_ADR,
+			    HW_ATL2_RPF_PIF_RPF_REDIR2_ENI_MSK,
+			    HW_ATL2_RPF_PIF_RPF_REDIR2_ENI_SHIFT, select);
+}
+
 void hw_atl2_rpf_rss_hash_type_set(struct aq_hw_s *aq_hw, u32 rss_hash_type)
 {
 	aq_hw_write_reg_bit(aq_hw, HW_ATL2_RPF_PIF_RPF_RSS_HASH_TYPEI_ADR,
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.h
index 8c6d78a64d42..5c1ae755ffae 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.h
@@ -15,6 +15,10 @@ void hw_atl2_reg_tx_intr_moder_ctrl_set(struct aq_hw_s *aq_hw,
 					u32 tx_intr_moderation_ctl,
 					u32 queue);
 
+/* Set Redirection Table 2 Select */
+void hw_atl2_rpf_redirection_table2_select_set(struct aq_hw_s *aq_hw,
+					       u32 select);
+
 /** Set RSS HASH type */
 void hw_atl2_rpf_rss_hash_type_set(struct aq_hw_s *aq_hw, u32 rss_hash_type);
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh_internal.h
index cde9e9d2836d..b0ac8cd581d7 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh_internal.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh_internal.h
@@ -6,6 +6,16 @@
 #ifndef HW_ATL2_LLH_INTERNAL_H
 #define HW_ATL2_LLH_INTERNAL_H
 
+/* RX pif_rpf_redir_2_en_i Bitfield Definitions
+ * PORT="pif_rpf_redir_2_en_i"
+ */
+#define HW_ATL2_RPF_PIF_RPF_REDIR2_ENI_ADR 0x000054C8
+#define HW_ATL2_RPF_PIF_RPF_REDIR2_ENI_MSK 0x00001000
+#define HW_ATL2_RPF_PIF_RPF_REDIR2_ENI_MSKN 0xFFFFEFFF
+#define HW_ATL2_RPF_PIF_RPF_REDIR2_ENI_SHIFT 12
+#define HW_ATL2_RPF_PIF_RPF_REDIR2_ENI_WIDTH 1
+#define HW_ATL2_RPF_PIF_RPF_REDIR2_ENI_DEFAULT 0x0
+
 /* RX pif_rpf_rss_hash_type_i Bitfield Definitions
  */
 #define HW_ATL2_RPF_PIF_RPF_RSS_HASH_TYPEI_ADR 0x000054C8
-- 
cgit v1.2.3-59-g8ed1b


From 4272ba8b11f88be8daca5e2477bfe110145d559c Mon Sep 17 00:00:00 2001
From: Mark Starovoytov <mstarovoitov@marvell.com>
Date: Fri, 22 May 2020 11:19:41 +0300
Subject: net: atlantic: per-TC queue statistics

This patch adds support for per-TC queue statistics.

By default (single TC), the output is the same as it used to be, e.g.:
     Queue[0] InPackets: 2
     Queue[0] OutPackets: 8
     Queue[0] Restarts: 0
     Queue[0] InJumboPackets: 0
     Queue[0] InLroPackets: 0
     Queue[0] InErrors: 0

If several TCs are enabled, then each queue statistics line is prefixed
with TC number, e.g.:
     TC0 Queue[0] InPackets: 6
     TC0 Queue[0] OutPackets: 11
Queue numbering is end-to-end, so:
     TC1 Queue[4] InPackets: 0
     TC1 Queue[4] OutPackets: 22

Signed-off-by: Mark Starovoytov <mstarovoitov@marvell.com>
Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/aquantia/atlantic/aq_ethtool.c    | 59 +++++++++++++---------
 drivers/net/ethernet/aquantia/atlantic/aq_nic.c    | 12 +++--
 drivers/net/ethernet/aquantia/atlantic/aq_vec.c    | 24 +++++----
 drivers/net/ethernet/aquantia/atlantic/aq_vec.h    |  5 +-
 4 files changed, 59 insertions(+), 41 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
index 86fc77d85fda..440a7d129848 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
@@ -88,13 +88,13 @@ static const char aq_ethtool_stat_names[][ETH_GSTRING_LEN] = {
 	"InDroppedDma",
 };
 
-static const char aq_ethtool_queue_stat_names[][ETH_GSTRING_LEN] = {
-	"Queue[%d] InPackets",
-	"Queue[%d] OutPackets",
-	"Queue[%d] Restarts",
-	"Queue[%d] InJumboPackets",
-	"Queue[%d] InLroPackets",
-	"Queue[%d] InErrors",
+static const char * const aq_ethtool_queue_stat_names[] = {
+	"%sQueue[%d] InPackets",
+	"%sQueue[%d] OutPackets",
+	"%sQueue[%d] Restarts",
+	"%sQueue[%d] InJumboPackets",
+	"%sQueue[%d] InLroPackets",
+	"%sQueue[%d] InErrors",
 };
 
 #if IS_ENABLED(CONFIG_MACSEC)
@@ -166,7 +166,8 @@ static u32 aq_ethtool_n_stats(struct net_device *ndev)
 	struct aq_nic_s *nic = netdev_priv(ndev);
 	struct aq_nic_cfg_s *cfg = aq_nic_get_cfg(nic);
 	u32 n_stats = ARRAY_SIZE(aq_ethtool_stat_names) +
-		      ARRAY_SIZE(aq_ethtool_queue_stat_names) * cfg->vecs;
+		      ARRAY_SIZE(aq_ethtool_queue_stat_names) * cfg->vecs *
+			cfg->tcs;
 
 #if IS_ENABLED(CONFIG_MACSEC)
 	if (nic->macsec_cfg) {
@@ -223,7 +224,7 @@ static void aq_ethtool_get_drvinfo(struct net_device *ndev,
 static void aq_ethtool_get_strings(struct net_device *ndev,
 				   u32 stringset, u8 *data)
 {
-	struct aq_nic_s *aq_nic = netdev_priv(ndev);
+	struct aq_nic_s *nic = netdev_priv(ndev);
 	struct aq_nic_cfg_s *cfg;
 	u8 *p = data;
 	int i, si;
@@ -231,24 +232,35 @@ static void aq_ethtool_get_strings(struct net_device *ndev,
 	int sa;
 #endif
 
-	cfg = aq_nic_get_cfg(aq_nic);
+	cfg = aq_nic_get_cfg(nic);
 
 	switch (stringset) {
-	case ETH_SS_STATS:
+	case ETH_SS_STATS: {
+		const int stat_cnt = ARRAY_SIZE(aq_ethtool_queue_stat_names);
+		char tc_string[8];
+		int tc;
+
+		memset(tc_string, 0, sizeof(tc_string));
 		memcpy(p, aq_ethtool_stat_names,
 		       sizeof(aq_ethtool_stat_names));
 		p = p + sizeof(aq_ethtool_stat_names);
-		for (i = 0; i < cfg->vecs; i++) {
-			for (si = 0;
-				si < ARRAY_SIZE(aq_ethtool_queue_stat_names);
-				si++) {
-				snprintf(p, ETH_GSTRING_LEN,
-					 aq_ethtool_queue_stat_names[si], i);
-				p += ETH_GSTRING_LEN;
+
+		for (tc = 0; tc < cfg->tcs; tc++) {
+			if (cfg->is_qos)
+				snprintf(tc_string, 8, "TC%d ", tc);
+
+			for (i = 0; i < cfg->vecs; i++) {
+				for (si = 0; si < stat_cnt; si++) {
+					snprintf(p, ETH_GSTRING_LEN,
+					     aq_ethtool_queue_stat_names[si],
+					     tc_string,
+					     AQ_NIC_TCVEC2RING(nic, tc, i));
+					p += ETH_GSTRING_LEN;
+				}
 			}
 		}
 #if IS_ENABLED(CONFIG_MACSEC)
-		if (!aq_nic->macsec_cfg)
+		if (!nic->macsec_cfg)
 			break;
 
 		memcpy(p, aq_macsec_stat_names, sizeof(aq_macsec_stat_names));
@@ -256,7 +268,7 @@ static void aq_ethtool_get_strings(struct net_device *ndev,
 		for (i = 0; i < AQ_MACSEC_MAX_SC; i++) {
 			struct aq_macsec_txsc *aq_txsc;
 
-			if (!(test_bit(i, &aq_nic->macsec_cfg->txsc_idx_busy)))
+			if (!(test_bit(i, &nic->macsec_cfg->txsc_idx_busy)))
 				continue;
 
 			for (si = 0;
@@ -266,7 +278,7 @@ static void aq_ethtool_get_strings(struct net_device *ndev,
 					 aq_macsec_txsc_stat_names[si], i);
 				p += ETH_GSTRING_LEN;
 			}
-			aq_txsc = &aq_nic->macsec_cfg->aq_txsc[i];
+			aq_txsc = &nic->macsec_cfg->aq_txsc[i];
 			for (sa = 0; sa < MACSEC_NUM_AN; sa++) {
 				if (!(test_bit(sa, &aq_txsc->tx_sa_idx_busy)))
 					continue;
@@ -283,10 +295,10 @@ static void aq_ethtool_get_strings(struct net_device *ndev,
 		for (i = 0; i < AQ_MACSEC_MAX_SC; i++) {
 			struct aq_macsec_rxsc *aq_rxsc;
 
-			if (!(test_bit(i, &aq_nic->macsec_cfg->rxsc_idx_busy)))
+			if (!(test_bit(i, &nic->macsec_cfg->rxsc_idx_busy)))
 				continue;
 
-			aq_rxsc = &aq_nic->macsec_cfg->aq_rxsc[i];
+			aq_rxsc = &nic->macsec_cfg->aq_rxsc[i];
 			for (sa = 0; sa < MACSEC_NUM_AN; sa++) {
 				if (!(test_bit(sa, &aq_rxsc->rx_sa_idx_busy)))
 					continue;
@@ -302,6 +314,7 @@ static void aq_ethtool_get_strings(struct net_device *ndev,
 		}
 #endif
 		break;
+	}
 	case ETH_SS_PRIV_FLAGS:
 		memcpy(p, aq_ethtool_priv_flag_names,
 		       sizeof(aq_ethtool_priv_flag_names));
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index da781082be32..851f22aadea1 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -855,6 +855,7 @@ u64 *aq_nic_get_stats(struct aq_nic_s *self, u64 *data)
 	struct aq_stats_s *stats;
 	unsigned int count = 0U;
 	unsigned int i = 0U;
+	unsigned int tc;
 
 	if (self->aq_fw_ops->update_stats) {
 		mutex_lock(&self->fwreq_mutex);
@@ -893,10 +894,13 @@ u64 *aq_nic_get_stats(struct aq_nic_s *self, u64 *data)
 
 	data += i;
 
-	for (i = 0U, aq_vec = self->aq_vec[0];
-		aq_vec && self->aq_vecs > i; ++i, aq_vec = self->aq_vec[i]) {
-		data += count;
-		aq_vec_get_sw_stats(aq_vec, data, &count);
+	for (tc = 0U; tc < self->aq_nic_cfg.tcs; tc++) {
+		for (i = 0U, aq_vec = self->aq_vec[0];
+		     aq_vec && self->aq_vecs > i;
+		     ++i, aq_vec = self->aq_vec[i]) {
+			data += count;
+			aq_vec_get_sw_stats(aq_vec, tc, data, &count);
+		}
 	}
 
 	data += count;
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
index d5650cd6e236..41826c10700f 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
@@ -348,16 +348,14 @@ cpumask_t *aq_vec_get_affinity_mask(struct aq_vec_s *self)
 	return &self->aq_ring_param.affinity_mask;
 }
 
-void aq_vec_add_stats(struct aq_vec_s *self,
-		      struct aq_ring_stats_rx_s *stats_rx,
-		      struct aq_ring_stats_tx_s *stats_tx)
+static void aq_vec_add_stats(struct aq_vec_s *self,
+			     const unsigned int tc,
+			     struct aq_ring_stats_rx_s *stats_rx,
+			     struct aq_ring_stats_tx_s *stats_tx)
 {
-	struct aq_ring_s *ring = NULL;
-	unsigned int r = 0U;
+	struct aq_ring_s *ring = self->ring[tc];
 
-	for (r = 0U, ring = self->ring[0];
-		self->tx_rings > r; ++r, ring = self->ring[r]) {
-		struct aq_ring_stats_tx_s *tx = &ring[AQ_VEC_TX_ID].stats.tx;
+	if (tc < self->rx_rings) {
 		struct aq_ring_stats_rx_s *rx = &ring[AQ_VEC_RX_ID].stats.rx;
 
 		stats_rx->packets += rx->packets;
@@ -368,6 +366,10 @@ void aq_vec_add_stats(struct aq_vec_s *self,
 		stats_rx->pg_losts += rx->pg_losts;
 		stats_rx->pg_flips += rx->pg_flips;
 		stats_rx->pg_reuses += rx->pg_reuses;
+	}
+
+	if (tc < self->tx_rings) {
+		struct aq_ring_stats_tx_s *tx = &ring[AQ_VEC_TX_ID].stats.tx;
 
 		stats_tx->packets += tx->packets;
 		stats_tx->bytes += tx->bytes;
@@ -376,7 +378,8 @@ void aq_vec_add_stats(struct aq_vec_s *self,
 	}
 }
 
-int aq_vec_get_sw_stats(struct aq_vec_s *self, u64 *data, unsigned int *p_count)
+int aq_vec_get_sw_stats(struct aq_vec_s *self, const unsigned int tc, u64 *data,
+			unsigned int *p_count)
 {
 	struct aq_ring_stats_rx_s stats_rx;
 	struct aq_ring_stats_tx_s stats_tx;
@@ -384,7 +387,8 @@ int aq_vec_get_sw_stats(struct aq_vec_s *self, u64 *data, unsigned int *p_count)
 
 	memset(&stats_rx, 0U, sizeof(struct aq_ring_stats_rx_s));
 	memset(&stats_tx, 0U, sizeof(struct aq_ring_stats_tx_s));
-	aq_vec_add_stats(self, &stats_rx, &stats_tx);
+
+	aq_vec_add_stats(self, tc, &stats_rx, &stats_tx);
 
 	/* This data should mimic aq_ethtool_queue_stat_names structure
 	 */
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_vec.h b/drivers/net/ethernet/aquantia/atlantic/aq_vec.h
index 0ee86b26df8a..541af85e6510 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_vec.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_vec.h
@@ -35,10 +35,7 @@ void aq_vec_ring_free(struct aq_vec_s *self);
 int aq_vec_start(struct aq_vec_s *self);
 void aq_vec_stop(struct aq_vec_s *self);
 cpumask_t *aq_vec_get_affinity_mask(struct aq_vec_s *self);
-int aq_vec_get_sw_stats(struct aq_vec_s *self, u64 *data,
+int aq_vec_get_sw_stats(struct aq_vec_s *self, const unsigned int tc, u64 *data,
 			unsigned int *p_count);
-void aq_vec_add_stats(struct aq_vec_s *self,
-		      struct aq_ring_stats_rx_s *stats_rx,
-		      struct aq_ring_stats_tx_s *stats_tx);
 
 #endif /* AQ_VEC_H */
-- 
cgit v1.2.3-59-g8ed1b


From b9e989262a3e2e9b795361a7e7978d64851eb98f Mon Sep 17 00:00:00 2001
From: Mark Starovoytov <mstarovoitov@marvell.com>
Date: Fri, 22 May 2020 11:19:42 +0300
Subject: net: atlantic: make TCVEC2RING accept nic_cfg

This patch updates TCVEC2RING to accept nic_cfg, which is needed to be able
to use it from hw_atl.
The name is updated to reflect the changes.

Signed-off-by: Mark Starovoytov <mstarovoitov@marvell.com>
Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c |  2 +-
 drivers/net/ethernet/aquantia/atlantic/aq_nic.c     |  9 +++++----
 drivers/net/ethernet/aquantia/atlantic/aq_nic.h     | 13 +++++++------
 drivers/net/ethernet/aquantia/atlantic/aq_vec.c     |  3 ++-
 4 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
index 440a7d129848..90a52a4b2d48 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
@@ -254,7 +254,7 @@ static void aq_ethtool_get_strings(struct net_device *ndev,
 					snprintf(p, ETH_GSTRING_LEN,
 					     aq_ethtool_queue_stat_names[si],
 					     tc_string,
-					     AQ_NIC_TCVEC2RING(nic, tc, i));
+					     AQ_NIC_CFG_TCVEC2RING(cfg, tc, i));
 					p += ETH_GSTRING_LEN;
 				}
 			}
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index 851f22aadea1..b2ef0115c293 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -701,15 +701,16 @@ exit:
 
 int aq_nic_xmit(struct aq_nic_s *self, struct sk_buff *skb)
 {
-	unsigned int vec = skb->queue_mapping % self->aq_nic_cfg.vecs;
-	unsigned int tc = skb->queue_mapping / self->aq_nic_cfg.vecs;
+	struct aq_nic_cfg_s *cfg = aq_nic_get_cfg(self);
+	unsigned int vec = skb->queue_mapping % cfg->vecs;
+	unsigned int tc = skb->queue_mapping / cfg->vecs;
 	struct aq_ring_s *ring = NULL;
 	unsigned int frags = 0U;
 	int err = NETDEV_TX_OK;
 
 	frags = skb_shinfo(skb)->nr_frags + 1;
 
-	ring = self->aq_ring_tx[AQ_NIC_TCVEC2RING(self, tc, vec)];
+	ring = self->aq_ring_tx[AQ_NIC_CFG_TCVEC2RING(cfg, tc, vec)];
 
 	if (frags > AQ_CFG_SKB_FRAGS_MAX) {
 		dev_kfree_skb_any(skb);
@@ -718,7 +719,7 @@ int aq_nic_xmit(struct aq_nic_s *self, struct sk_buff *skb)
 
 	aq_ring_update_queue_state(ring);
 
-	if (self->aq_nic_cfg.priv_flags & BIT(AQ_HW_LOOPBACK_DMA_NET)) {
+	if (cfg->priv_flags & BIT(AQ_HW_LOOPBACK_DMA_NET)) {
 		err = NETDEV_TX_BUSY;
 		goto err_exit;
 	}
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
index 29e129411945..6cc2ebfe6a44 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
@@ -81,15 +81,16 @@ struct aq_nic_cfg_s {
 #define AQ_NIC_WOL_MODES        (WAKE_MAGIC |\
 				 WAKE_PHY)
 
-#define AQ_NIC_RING_PER_TC(_NIC_) \
-	(((_NIC_)->aq_nic_cfg.tc_mode == AQ_TC_MODE_4TCS) ? 8 : 4)
+#define AQ_NIC_CFG_RING_PER_TC(_NIC_CFG_) \
+	(((_NIC_CFG_)->tc_mode == AQ_TC_MODE_4TCS) ? 8 : 4)
 
-#define AQ_NIC_TCVEC2RING(_NIC_, _TC_, _VEC_) \
-	((_TC_) * AQ_NIC_RING_PER_TC(_NIC_) + (_VEC_))
+#define AQ_NIC_CFG_TCVEC2RING(_NIC_CFG_, _TC_, _VEC_) \
+	((_TC_) * AQ_NIC_CFG_RING_PER_TC(_NIC_CFG_) + (_VEC_))
 
 #define AQ_NIC_RING2QMAP(_NIC_, _ID_) \
-	((_ID_) / AQ_NIC_RING_PER_TC(_NIC_) * (_NIC_)->aq_vecs + \
-	((_ID_) % AQ_NIC_RING_PER_TC(_NIC_)))
+	((_ID_) / AQ_NIC_CFG_RING_PER_TC(&(_NIC_)->aq_nic_cfg) * \
+		(_NIC_)->aq_vecs + \
+	((_ID_) % AQ_NIC_CFG_RING_PER_TC(&(_NIC_)->aq_nic_cfg)))
 
 struct aq_hw_rx_fl2 {
 	struct aq_rx_filter_vlan aq_vlans[AQ_VLAN_MAX_FILTERS];
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
index 41826c10700f..d1d43c8ce400 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_vec.c
@@ -135,7 +135,8 @@ int aq_vec_ring_alloc(struct aq_vec_s *self, struct aq_nic_s *aq_nic,
 	int err = 0;
 
 	for (i = 0; i < aq_nic_cfg->tcs; ++i) {
-		unsigned int idx_ring = AQ_NIC_TCVEC2RING(aq_nic, i, idx);
+		const unsigned int idx_ring = AQ_NIC_CFG_TCVEC2RING(aq_nic_cfg,
+								    i, idx);
 
 		ring = aq_ring_tx_alloc(&self->ring[i][AQ_VEC_TX_ID], aq_nic,
 					idx_ring, aq_nic_cfg);
-- 
cgit v1.2.3-59-g8ed1b


From 7327699f35f8e90b32c03080b5cba4e9aa95e087 Mon Sep 17 00:00:00 2001
From: Mark Starovoytov <mstarovoitov@marvell.com>
Date: Fri, 22 May 2020 11:19:43 +0300
Subject: net: atlantic: QoS implementation: max_rate

This patch adds initial support for mqprio rate limiters (max_rate only).

Atlantic HW supports Rate-Shaping for time-sensitive traffic at per
Traffic Class (TC) granularity.
Target rate is defined by:
* nominal link rate (always 10G);
* rate factor (ratio between nominal rate and max allowed).

Signed-off-by: Mark Starovoytov <mstarovoitov@marvell.com>
Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/aquantia/atlantic/aq_hw.h     |  3 +
 drivers/net/ethernet/aquantia/atlantic/aq_main.c   | 30 ++++++++--
 drivers/net/ethernet/aquantia/atlantic/aq_nic.c    | 20 +++++++
 drivers/net/ethernet/aquantia/atlantic/aq_nic.h    |  3 +
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c  | 58 ++++++++++++++++--
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h  |  2 +
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c | 36 +++++++++++
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h | 16 +++++
 .../aquantia/atlantic/hw_atl/hw_atl_llh_internal.h | 70 ++++++++++++++++++++++
 .../ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c   |  9 +--
 10 files changed, 235 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
index 1dccaaee04b3..d31e576f8b86 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
@@ -35,6 +35,9 @@ enum aq_tc_mode {
 			(AQ_RX_LAST_LOC_FVLANID - AQ_RX_FIRST_LOC_FVLANID + 1U)
 #define AQ_RX_QUEUE_NOT_ASSIGNED   0xFFU
 
+/* Used for rate to Mbps conversion */
+#define AQ_MBPS_DIVISOR         125000 /* 1000000 / 8 */
+
 /* NIC H/W capabilities */
 struct aq_hw_caps_s {
 	u64 hw_features;
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.c b/drivers/net/ethernet/aquantia/atlantic/aq_main.c
index ef9e969fbf7a..d8817047f4ef 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_main.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.c
@@ -333,8 +333,12 @@ static int aq_ndo_vlan_rx_kill_vid(struct net_device *ndev, __be16 proto,
 }
 
 static int aq_validate_mqprio_opt(struct aq_nic_s *self,
+				  struct tc_mqprio_qopt_offload *mqprio,
 				  const unsigned int num_tc)
 {
+	const bool has_min_rate = !!(mqprio->flags & TC_MQPRIO_F_MIN_RATE);
+	int i;
+
 	if (num_tc > aq_hw_num_tcs(self->aq_hw)) {
 		netdev_err(self->ndev, "Too many TCs requested\n");
 		return -EOPNOTSUPP;
@@ -345,25 +349,43 @@ static int aq_validate_mqprio_opt(struct aq_nic_s *self,
 		return -EOPNOTSUPP;
 	}
 
+	for (i = 0; i < num_tc; i++) {
+		if (has_min_rate && mqprio->min_rate[i]) {
+			netdev_err(self->ndev,
+				   "Min tx rate is not supported\n");
+			return -EOPNOTSUPP;
+		}
+	}
+
 	return 0;
 }
 
 static int aq_ndo_setup_tc(struct net_device *dev, enum tc_setup_type type,
 			   void *type_data)
 {
+	struct tc_mqprio_qopt_offload *mqprio = type_data;
 	struct aq_nic_s *aq_nic = netdev_priv(dev);
-	struct tc_mqprio_qopt *mqprio = type_data;
 	int err;
+	int i;
 
 	if (type != TC_SETUP_QDISC_MQPRIO)
 		return -EOPNOTSUPP;
 
-	err = aq_validate_mqprio_opt(aq_nic, mqprio->num_tc);
+	err = aq_validate_mqprio_opt(aq_nic, mqprio, mqprio->qopt.num_tc);
 	if (err)
 		return err;
 
-	return aq_nic_setup_tc_mqprio(aq_nic, mqprio->num_tc,
-				      mqprio->prio_tc_map);
+	if (mqprio->flags & TC_MQPRIO_F_MAX_RATE) {
+		for (i = 0; i < mqprio->qopt.num_tc; i++) {
+			u64 max_rate = mqprio->max_rate[i];
+
+			do_div(max_rate, AQ_MBPS_DIVISOR);
+			aq_nic_setup_tc_max_rate(aq_nic, i, (u32)max_rate);
+		}
+	}
+
+	return aq_nic_setup_tc_mqprio(aq_nic, mqprio->qopt.num_tc,
+				      mqprio->qopt.prio_tc_map);
 }
 
 static const struct net_device_ops aq_ndev_ops = {
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index b2ef0115c293..2e0e7d34fda0 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -1324,3 +1324,23 @@ int aq_nic_setup_tc_mqprio(struct aq_nic_s *self, u32 tcs, u8 *prio_tc_map)
 
 	return err;
 }
+
+int aq_nic_setup_tc_max_rate(struct aq_nic_s *self, const unsigned int tc,
+			     const u32 max_rate)
+{
+	struct aq_nic_cfg_s *cfg = &self->aq_nic_cfg;
+
+	if (tc >= AQ_CFG_TCS_MAX)
+		return -EINVAL;
+
+	if (max_rate && max_rate < 10) {
+		netdev_warn(self->ndev,
+			"Setting %s to the minimum usable value of %dMbps.\n",
+			"max rate", 10);
+		cfg->tc_max_rate[tc] = 10;
+	} else {
+		cfg->tc_max_rate[tc] = max_rate;
+	}
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
index 6cc2ebfe6a44..351c4e68f40d 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
@@ -65,6 +65,7 @@ struct aq_nic_cfg_s {
 	u32 priv_flags;
 	u8  tcs;
 	u8 prio_tc_map[8];
+	u32 tc_max_rate[AQ_CFG_TCS_MAX];
 	struct aq_rss_parameters aq_rss;
 	u32 eee_speeds;
 };
@@ -194,4 +195,6 @@ u8 aq_nic_reserve_filter(struct aq_nic_s *self, enum aq_rx_filter_type type);
 void aq_nic_release_filter(struct aq_nic_s *self, enum aq_rx_filter_type type,
 			   u32 location);
 int aq_nic_setup_tc_mqprio(struct aq_nic_s *self, u32 tcs, u8 *prio_tc_map);
+int aq_nic_setup_tc_max_rate(struct aq_nic_s *self, const unsigned int tc,
+			     const u32 max_rate);
 #endif /* AQ_NIC_H */
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
index 775382440b47..abc86eb4f525 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
@@ -138,6 +138,8 @@ static int hw_atl_b0_hw_qos_set(struct aq_hw_s *self)
 	unsigned int prio = 0U;
 	u32 tc = 0U;
 
+	hw_atl_b0_hw_init_tx_tc_rate_limit(self);
+
 	if (cfg->is_ptp) {
 		tx_buff_size -= HW_ATL_B0_PTP_TXBUF_SIZE;
 		rx_buff_size -= HW_ATL_B0_PTP_RXBUF_SIZE;
@@ -151,7 +153,6 @@ static int hw_atl_b0_hw_qos_set(struct aq_hw_s *self)
 	hw_atl_tps_tx_pkt_shed_desc_vm_arb_mode_set(self, 0U);
 
 	/* TPS TC credits init */
-	hw_atl_tps_tx_pkt_shed_desc_tc_arb_mode_set(self, 0U);
 	hw_atl_tps_tx_pkt_shed_data_arb_mode_set(self, 0U);
 
 	tx_buff_size /= cfg->tcs;
@@ -162,8 +163,6 @@ static int hw_atl_b0_hw_qos_set(struct aq_hw_s *self)
 		/* TX Packet Scheduler Data TC0 */
 		hw_atl_tps_tx_pkt_shed_tc_data_max_credit_set(self, 0xFFF, tc);
 		hw_atl_tps_tx_pkt_shed_tc_data_weight_set(self, 0x64, tc);
-		hw_atl_tps_tx_pkt_shed_desc_tc_max_credit_set(self, 0x50, tc);
-		hw_atl_tps_tx_pkt_shed_desc_tc_weight_set(self, 0x1E, tc);
 
 		/* Tx buf size TC0 */
 		hw_atl_tpb_tx_pkt_buff_size_per_tc_set(self, tx_buff_size, tc);
@@ -320,10 +319,61 @@ int hw_atl_b0_hw_offload_set(struct aq_hw_s *self,
 	return aq_hw_err_from_flags(self);
 }
 
+int hw_atl_b0_hw_init_tx_tc_rate_limit(struct aq_hw_s *self)
+{
+	/* Scale factor is based on the number of bits in fractional portion */
+	static const u32 scale = BIT(HW_ATL_TPS_DESC_RATE_Y_WIDTH);
+	static const u32 frac_msk = HW_ATL_TPS_DESC_RATE_Y_MSK >>
+				    HW_ATL_TPS_DESC_RATE_Y_SHIFT;
+	struct aq_nic_cfg_s *nic_cfg = self->aq_nic_cfg;
+	int tc;
+
+	hw_atl_tps_tx_pkt_shed_desc_tc_arb_mode_set(self, 0U);
+	hw_atl_tps_tx_desc_rate_mode_set(self, nic_cfg->is_qos ? 1U : 0U);
+	for (tc = 0; tc != nic_cfg->tcs; tc++) {
+		const u32 en = (nic_cfg->tc_max_rate[tc] != 0) ? 1U : 0U;
+		const u32 desc = AQ_NIC_CFG_TCVEC2RING(nic_cfg, tc, 0);
+
+		hw_atl_tps_tx_pkt_shed_desc_tc_max_credit_set(self, 0x50, tc);
+		hw_atl_tps_tx_pkt_shed_desc_tc_weight_set(self, 0x1E, tc);
+
+		hw_atl_tps_tx_desc_rate_en_set(self, desc, en);
+
+		if (en) {
+			/* Nominal rate is always 10G */
+			const u32 rate = 10000U * scale /
+					 nic_cfg->tc_max_rate[tc];
+			const u32 rate_int = rate >>
+					     HW_ATL_TPS_DESC_RATE_Y_WIDTH;
+			const u32 rate_frac = rate & frac_msk;
+
+			hw_atl_tps_tx_desc_rate_x_set(self, desc, rate_int);
+			hw_atl_tps_tx_desc_rate_y_set(self, desc, rate_frac);
+		} else {
+			/* A value of 1 indicates the queue is not
+			 * rate controlled.
+			 */
+			hw_atl_tps_tx_desc_rate_x_set(self, desc, 1U);
+			hw_atl_tps_tx_desc_rate_y_set(self, desc, 0U);
+		}
+	}
+	for (tc = nic_cfg->tcs; tc != AQ_CFG_TCS_MAX; tc++) {
+		const u32 desc = AQ_NIC_CFG_TCVEC2RING(nic_cfg, tc, 0);
+
+		hw_atl_tps_tx_desc_rate_en_set(self, desc, 0U);
+		hw_atl_tps_tx_desc_rate_x_set(self, desc, 1U);
+		hw_atl_tps_tx_desc_rate_y_set(self, desc, 0U);
+	}
+
+	return aq_hw_err_from_flags(self);
+}
+
 static int hw_atl_b0_hw_init_tx_path(struct aq_hw_s *self)
 {
+	struct aq_nic_cfg_s *nic_cfg = self->aq_nic_cfg;
+
 	/* Tx TC/Queue number config */
-	hw_atl_tpb_tps_tx_tc_mode_set(self, self->aq_nic_cfg->tc_mode);
+	hw_atl_tpb_tps_tx_tc_mode_set(self, nic_cfg->tc_mode);
 
 	hw_atl_thm_lso_tcp_flag_of_first_pkt_set(self, 0x0FF6U);
 	hw_atl_thm_lso_tcp_flag_of_middle_pkt_set(self, 0x0FF6U);
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h
index b855459272ca..992ee4ed37cc 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h
@@ -62,6 +62,8 @@ int hw_atl_b0_hw_mac_addr_set(struct aq_hw_s *self, u8 *mac_addr);
 
 int hw_atl_b0_hw_start(struct aq_hw_s *self);
 
+int hw_atl_b0_hw_init_tx_tc_rate_limit(struct aq_hw_s *self);
+
 int hw_atl_b0_hw_irq_enable(struct aq_hw_s *self, u64 mask);
 int hw_atl_b0_hw_irq_disable(struct aq_hw_s *self, u64 mask);
 int hw_atl_b0_hw_irq_read(struct aq_hw_s *self, u64 *mask);
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c
index 8cb6765a1398..0ea791a9c100 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c
@@ -1511,6 +1511,42 @@ void hw_atl_tps_tx_pkt_shed_tc_data_weight_set(struct aq_hw_s *aq_hw,
 			    tx_pkt_shed_tc_data_weight);
 }
 
+void hw_atl_tps_tx_desc_rate_mode_set(struct aq_hw_s *aq_hw,
+				      const u32 rate_mode)
+{
+	aq_hw_write_reg_bit(aq_hw, HW_ATL_TPS_TX_DESC_RATE_MODE_ADR,
+			    HW_ATL_TPS_TX_DESC_RATE_MODE_MSK,
+			    HW_ATL_TPS_TX_DESC_RATE_MODE_SHIFT,
+			    rate_mode);
+}
+
+void hw_atl_tps_tx_desc_rate_en_set(struct aq_hw_s *aq_hw, const u32 desc,
+				    const u32 enable)
+{
+	aq_hw_write_reg_bit(aq_hw, HW_ATL_TPS_DESC_RATE_EN_ADR(desc),
+			    HW_ATL_TPS_DESC_RATE_EN_MSK,
+			    HW_ATL_TPS_DESC_RATE_EN_SHIFT,
+			    enable);
+}
+
+void hw_atl_tps_tx_desc_rate_x_set(struct aq_hw_s *aq_hw, const u32 desc,
+				   const u32 rate_int)
+{
+	aq_hw_write_reg_bit(aq_hw, HW_ATL_TPS_DESC_RATE_X_ADR(desc),
+			    HW_ATL_TPS_DESC_RATE_X_MSK,
+			    HW_ATL_TPS_DESC_RATE_X_SHIFT,
+			    rate_int);
+}
+
+void hw_atl_tps_tx_desc_rate_y_set(struct aq_hw_s *aq_hw, const u32 desc,
+				   const u32 rate_frac)
+{
+	aq_hw_write_reg_bit(aq_hw, HW_ATL_TPS_DESC_RATE_Y_ADR(desc),
+			    HW_ATL_TPS_DESC_RATE_Y_MSK,
+			    HW_ATL_TPS_DESC_RATE_Y_SHIFT,
+			    rate_frac);
+}
+
 /* tx */
 void hw_atl_tx_tx_reg_res_dis_set(struct aq_hw_s *aq_hw, u32 tx_reg_res_dis)
 {
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h
index b88cb84805d5..c56cc4e8e13c 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h
@@ -710,6 +710,22 @@ void hw_atl_tps_tx_pkt_shed_tc_data_weight_set(struct aq_hw_s *aq_hw,
 					       u32 tx_pkt_shed_tc_data_weight,
 					u32 tc);
 
+/* set tx descriptor rate mode */
+void hw_atl_tps_tx_desc_rate_mode_set(struct aq_hw_s *aq_hw,
+				      const u32 rate_mode);
+
+/* set tx packet scheduler descriptor rate enable */
+void hw_atl_tps_tx_desc_rate_en_set(struct aq_hw_s *aq_hw, const u32 desc,
+				    const u32 enable);
+
+/* set tx packet scheduler descriptor rate integral value */
+void hw_atl_tps_tx_desc_rate_x_set(struct aq_hw_s *aq_hw, const u32 desc,
+				   const u32 rate_int);
+
+/* set tx packet scheduler descriptor rate fractional value */
+void hw_atl_tps_tx_desc_rate_y_set(struct aq_hw_s *aq_hw, const u32 desc,
+				   const u32 rate_frac);
+
 /* tx */
 
 /* set tx register reset disable */
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h
index 5d86ffab4ece..06220792daf1 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh_internal.h
@@ -2056,6 +2056,24 @@
 /* default value of bitfield tx_tc_mode */
 #define HW_ATL_TPB_TX_TC_MODE_DEFAULT 0x0
 
+/* tx tx_desc_rate_mode bitfield definitions
+ * preprocessor definitions for the bitfield "tx_desc_rate_mode".
+ * port="pif_tps_desc_rate_mode_i"
+ */
+
+/* register address for bitfield tx_desc_rate_mode */
+#define HW_ATL_TPS_TX_DESC_RATE_MODE_ADR 0x00007900
+/* bitmask for bitfield tx_desc_rate_mode */
+#define HW_ATL_TPS_TX_DESC_RATE_MODE_MSK 0x00000080
+/* inverted bitmask for bitfield tx_desc_rate_mode */
+#define HW_ATL_TPS_TX_DESC_RATE_MODE_MSKN 0xFFFFFF7F
+/* lower bit position of bitfield tx_desc_rate_mode */
+#define HW_ATL_TPS_TX_DESC_RATE_MODE_SHIFT 7
+/* width of bitfield tx_desc_rate_mode */
+#define HW_ATL_TPS_TX_DESC_RATE_MODE_WIDTH 1
+/* default value of bitfield tx_desc_rate_mode */
+#define HW_ATL_TPS_TX_DESC_RATE_MODE_DEFAULT 0x0
+
 /* tx tx_buf_en bitfield definitions
  * preprocessor definitions for the bitfield "tx_buf_en".
  * port="pif_tpb_tx_buf_en_i"
@@ -2275,6 +2293,58 @@
 /* default value of bitfield data_tc_arb_mode */
 #define HW_ATL_TPS_DATA_TC_ARB_MODE_DEFAULT 0x0
 
+/* tx desc{r}_rate_en bitfield definitions
+ * preprocessor definitions for the bitfield "desc{r}_rate_en".
+ * port="pif_tps_desc_rate_en_i[0]"
+ */
+
+/* register address for bitfield desc{r}_rate_en */
+#define HW_ATL_TPS_DESC_RATE_EN_ADR(desc) (0x00007408 + (desc) * 0x10)
+/* bitmask for bitfield desc{r}_rate_en */
+#define HW_ATL_TPS_DESC_RATE_EN_MSK 0x80000000
+/* inverted bitmask for bitfield desc{r}_rate_en */
+#define HW_ATL_TPS_DESC_RATE_EN_MSKN 0x7FFFFFFF
+/* lower bit position of bitfield desc{r}_rate_en */
+#define HW_ATL_TPS_DESC_RATE_EN_SHIFT 31
+/* width of bitfield desc{r}_rate_en */
+#define HW_ATL_TPS_DESC_RATE_EN_WIDTH 1
+/* default value of bitfield desc{r}_rate_en */
+#define HW_ATL_TPS_DESC_RATE_EN_DEFAULT 0x0
+
+/* tx desc{r}_rate_x bitfield definitions
+ * preprocessor definitions for the bitfield "desc{r}_rate_x".
+ * port="pif_tps_desc0_rate_x"
+ */
+/* register address for bitfield desc{r}_rate_x */
+#define HW_ATL_TPS_DESC_RATE_X_ADR(desc) (0x00007408 + (desc) * 0x10)
+/* bitmask for bitfield desc{r}_rate_x */
+#define HW_ATL_TPS_DESC_RATE_X_MSK 0x03FF0000
+/* inverted bitmask for bitfield desc{r}_rate_x */
+#define HW_ATL_TPS_DESC_RATE_X_MSKN 0xFC00FFFF
+/* lower bit position of bitfield desc{r}_rate_x */
+#define HW_ATL_TPS_DESC_RATE_X_SHIFT 16
+/* width of bitfield desc{r}_rate_x */
+#define HW_ATL_TPS_DESC_RATE_X_WIDTH 10
+/* default value of bitfield desc{r}_rate_x */
+#define HW_ATL_TPS_DESC_RATE_X_DEFAULT 0x0
+
+/* tx desc{r}_rate_y bitfield definitions
+ * preprocessor definitions for the bitfield "desc{r}_rate_y".
+ * port="pif_tps_desc0_rate_y"
+ */
+/* register address for bitfield desc{r}_rate_y */
+#define HW_ATL_TPS_DESC_RATE_Y_ADR(desc) (0x00007408 + (desc) * 0x10)
+/* bitmask for bitfield desc{r}_rate_y */
+#define HW_ATL_TPS_DESC_RATE_Y_MSK 0x00003FFF
+/* inverted bitmask for bitfield desc{r}_rate_y */
+#define HW_ATL_TPS_DESC_RATE_Y_MSKN 0xFFFFC000
+/* lower bit position of bitfield desc{r}_rate_y */
+#define HW_ATL_TPS_DESC_RATE_Y_SHIFT 0
+/* width of bitfield desc{r}_rate_y */
+#define HW_ATL_TPS_DESC_RATE_Y_WIDTH 14
+/* default value of bitfield desc{r}_rate_y */
+#define HW_ATL_TPS_DESC_RATE_Y_DEFAULT 0x0
+
 /* tx desc_rate_ta_rst bitfield definitions
  * preprocessor definitions for the bitfield "desc_rate_ta_rst".
  * port="pif_tps_desc_rate_ta_rst_i"
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
index 05c049661b2e..b42ff81adfeb 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
@@ -135,6 +135,8 @@ static int hw_atl2_hw_qos_set(struct aq_hw_s *self)
 	unsigned int prio = 0U;
 	u32 tc = 0U;
 
+	hw_atl_b0_hw_init_tx_tc_rate_limit(self);
+
 	/* TPS Descriptor rate init */
 	hw_atl_tps_tx_pkt_shed_desc_rate_curr_time_res_set(self, 0x0U);
 	hw_atl_tps_tx_pkt_shed_desc_rate_lim_set(self, 0xA);
@@ -143,7 +145,6 @@ static int hw_atl2_hw_qos_set(struct aq_hw_s *self)
 	hw_atl_tps_tx_pkt_shed_desc_vm_arb_mode_set(self, 0U);
 
 	/* TPS TC credits init */
-	hw_atl_tps_tx_pkt_shed_desc_tc_arb_mode_set(self, 0U);
 	hw_atl_tps_tx_pkt_shed_data_arb_mode_set(self, 0U);
 
 	tx_buff_size /= cfg->tcs;
@@ -155,8 +156,6 @@ static int hw_atl2_hw_qos_set(struct aq_hw_s *self)
 		hw_atl2_tps_tx_pkt_shed_tc_data_max_credit_set(self, 0xFFF0,
 							       tc);
 		hw_atl2_tps_tx_pkt_shed_tc_data_weight_set(self, 0x640, tc);
-		hw_atl_tps_tx_pkt_shed_desc_tc_max_credit_set(self, 0x50, tc);
-		hw_atl_tps_tx_pkt_shed_desc_tc_weight_set(self, 0x1E, tc);
 
 		/* Tx buf size TC0 */
 		hw_atl_tpb_tx_pkt_buff_size_per_tc_set(self, tx_buff_size, tc);
@@ -215,8 +214,10 @@ static int hw_atl2_hw_rss_set(struct aq_hw_s *self,
 
 static int hw_atl2_hw_init_tx_path(struct aq_hw_s *self)
 {
+	struct aq_nic_cfg_s *nic_cfg = self->aq_nic_cfg;
+
 	/* Tx TC/RSS number config */
-	hw_atl_tpb_tps_tx_tc_mode_set(self, self->aq_nic_cfg->tc_mode);
+	hw_atl_tpb_tps_tx_tc_mode_set(self, nic_cfg->tc_mode);
 
 	hw_atl_thm_lso_tcp_flag_of_first_pkt_set(self, 0x0FF6U);
 	hw_atl_thm_lso_tcp_flag_of_middle_pkt_set(self, 0x0FF6U);
-- 
cgit v1.2.3-59-g8ed1b


From 14ef766b13822001087d468aa41f22caa2a42022 Mon Sep 17 00:00:00 2001
From: Mark Starovoytov <mstarovoitov@marvell.com>
Date: Fri, 22 May 2020 11:19:44 +0300
Subject: net: atlantic: automatically downgrade the number of queues if
 necessary

This patch adds support for automatic queue number downgrade.

On A2: this is a must have, because only TC0/TC1 support more than 4Q.
Other TCs support 4Qs maximum.
Thus, on A2 we must downgrade the number of queues per TC to 4, if more
than 2 TCs are requested.

On A1: this allows using 8TCs even on systems with cpu count >= 8, when
we have 8 queues by default.
We will just automatically switch to 8TCx4Q mode in this case.

Signed-off-by: Mark Starovoytov <mstarovoitov@marvell.com>
Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/aquantia/atlantic/aq_ethtool.c    | 15 ++---
 drivers/net/ethernet/aquantia/atlantic/aq_main.c   |  5 +-
 drivers/net/ethernet/aquantia/atlantic/aq_nic.c    | 78 +++++++++++++++-------
 drivers/net/ethernet/aquantia/atlantic/aq_nic.h    |  1 +
 4 files changed, 63 insertions(+), 36 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
index 90a52a4b2d48..743d3b13b39d 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_ethtool.c
@@ -793,8 +793,6 @@ static int aq_set_ringparam(struct net_device *ndev,
 		dev_close(ndev);
 	}
 
-	aq_nic_free_vectors(aq_nic);
-
 	cfg->rxds = max(ring->rx_pending, hw_caps->rxds_min);
 	cfg->rxds = min(cfg->rxds, hw_caps->rxds_max);
 	cfg->rxds = ALIGN(cfg->rxds, AQ_HW_RXD_MULTIPLE);
@@ -803,15 +801,10 @@ static int aq_set_ringparam(struct net_device *ndev,
 	cfg->txds = min(cfg->txds, hw_caps->txds_max);
 	cfg->txds = ALIGN(cfg->txds, AQ_HW_TXD_MULTIPLE);
 
-	for (aq_nic->aq_vecs = 0; aq_nic->aq_vecs < cfg->vecs;
-	     aq_nic->aq_vecs++) {
-		aq_nic->aq_vec[aq_nic->aq_vecs] =
-		    aq_vec_alloc(aq_nic, aq_nic->aq_vecs, cfg);
-		if (unlikely(!aq_nic->aq_vec[aq_nic->aq_vecs])) {
-			err = -ENOMEM;
-			goto err_exit;
-		}
-	}
+	err = aq_nic_realloc_vectors(aq_nic);
+	if (err)
+		goto err_exit;
+
 	if (ndev_running)
 		err = dev_open(ndev, NULL);
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.c b/drivers/net/ethernet/aquantia/atlantic/aq_main.c
index d8817047f4ef..57a116ccad55 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_main.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.c
@@ -337,9 +337,12 @@ static int aq_validate_mqprio_opt(struct aq_nic_s *self,
 				  const unsigned int num_tc)
 {
 	const bool has_min_rate = !!(mqprio->flags & TC_MQPRIO_F_MIN_RATE);
+	struct aq_nic_cfg_s *aq_nic_cfg = aq_nic_get_cfg(self);
+	const unsigned int tcs_max = min_t(u8, aq_nic_cfg->aq_hw_caps->tcs_max,
+					   AQ_CFG_TCS_MAX);
 	int i;
 
-	if (num_tc > aq_hw_num_tcs(self->aq_hw)) {
+	if (num_tc > tcs_max) {
 		netdev_err(self->ndev, "Too many TCs requested\n");
 		return -EOPNOTSUPP;
 	}
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index 2e0e7d34fda0..0973214db20f 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -65,6 +65,33 @@ static void aq_nic_rss_init(struct aq_nic_s *self, unsigned int num_rss_queues)
 		rss_params->indirection_table[i] = i & (num_rss_queues - 1);
 }
 
+/* Recalculate the number of vectors */
+static void aq_nic_cfg_update_num_vecs(struct aq_nic_s *self)
+{
+	struct aq_nic_cfg_s *cfg = &self->aq_nic_cfg;
+
+	cfg->vecs = min(cfg->aq_hw_caps->vecs, AQ_CFG_VECS_DEF);
+	cfg->vecs = min(cfg->vecs, num_online_cpus());
+	if (self->irqvecs > AQ_HW_SERVICE_IRQS)
+		cfg->vecs = min(cfg->vecs, self->irqvecs - AQ_HW_SERVICE_IRQS);
+	/* cfg->vecs should be power of 2 for RSS */
+	cfg->vecs = rounddown_pow_of_two(cfg->vecs);
+
+	if (ATL_HW_IS_CHIP_FEATURE(self->aq_hw, ANTIGUA)) {
+		if (cfg->tcs > 2)
+			cfg->vecs = min(cfg->vecs, 4U);
+	}
+
+	if (cfg->vecs <= 4)
+		cfg->tc_mode = AQ_TC_MODE_8TCS;
+	else
+		cfg->tc_mode = AQ_TC_MODE_4TCS;
+
+	/*rss rings */
+	cfg->num_rss_queues = min(cfg->vecs, AQ_CFG_NUM_RSS_QUEUES_DEF);
+	aq_nic_rss_init(self, cfg->num_rss_queues);
+}
+
 /* Checks hw_caps and 'corrects' aq_nic_cfg in runtime */
 void aq_nic_cfg_start(struct aq_nic_s *self)
 {
@@ -81,7 +108,6 @@ void aq_nic_cfg_start(struct aq_nic_s *self)
 
 	cfg->rxpageorder = AQ_CFG_RX_PAGEORDER;
 	cfg->is_rss = AQ_CFG_IS_RSS_DEF;
-	cfg->num_rss_queues = AQ_CFG_NUM_RSS_QUEUES_DEF;
 	cfg->aq_rss.base_cpu_number = AQ_CFG_RSS_BASE_CPU_NUM_DEF;
 	cfg->fc.req = AQ_CFG_FC_MODE;
 	cfg->wol = AQ_CFG_WOL_MODES;
@@ -97,24 +123,7 @@ void aq_nic_cfg_start(struct aq_nic_s *self)
 	cfg->rxds = min(cfg->aq_hw_caps->rxds_max, AQ_CFG_RXDS_DEF);
 	cfg->txds = min(cfg->aq_hw_caps->txds_max, AQ_CFG_TXDS_DEF);
 
-	/*rss rings */
-	cfg->vecs = min(cfg->aq_hw_caps->vecs, AQ_CFG_VECS_DEF);
-	cfg->vecs = min(cfg->vecs, num_online_cpus());
-	if (self->irqvecs > AQ_HW_SERVICE_IRQS)
-		cfg->vecs = min(cfg->vecs, self->irqvecs - AQ_HW_SERVICE_IRQS);
-	/* cfg->vecs should be power of 2 for RSS */
-	if (cfg->vecs >= 8U)
-		cfg->vecs = 8U;
-	else if (cfg->vecs >= 4U)
-		cfg->vecs = 4U;
-	else if (cfg->vecs >= 2U)
-		cfg->vecs = 2U;
-	else
-		cfg->vecs = 1U;
-
-	cfg->num_rss_queues = min(cfg->vecs, AQ_CFG_NUM_RSS_QUEUES_DEF);
-
-	aq_nic_rss_init(self, cfg->num_rss_queues);
+	aq_nic_cfg_update_num_vecs(self);
 
 	cfg->irq_type = aq_pci_func_get_irq_type(self);
 
@@ -125,11 +134,6 @@ void aq_nic_cfg_start(struct aq_nic_s *self)
 		cfg->vecs = 1U;
 	}
 
-	if (cfg->vecs <= 4)
-		cfg->tc_mode = AQ_TC_MODE_8TCS;
-	else
-		cfg->tc_mode = AQ_TC_MODE_4TCS;
-
 	/* Check if we have enough vectors allocated for
 	 * link status IRQ. If no - we'll know link state from
 	 * slower service task.
@@ -1219,6 +1223,22 @@ void aq_nic_free_vectors(struct aq_nic_s *self)
 err_exit:;
 }
 
+int aq_nic_realloc_vectors(struct aq_nic_s *self)
+{
+	struct aq_nic_cfg_s *cfg = aq_nic_get_cfg(self);
+
+	aq_nic_free_vectors(self);
+
+	for (self->aq_vecs = 0; self->aq_vecs < cfg->vecs; self->aq_vecs++) {
+		self->aq_vec[self->aq_vecs] = aq_vec_alloc(self, self->aq_vecs,
+							   cfg);
+		if (unlikely(!self->aq_vec[self->aq_vecs]))
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
 void aq_nic_shutdown(struct aq_nic_s *self)
 {
 	int err = 0;
@@ -1288,6 +1308,7 @@ void aq_nic_release_filter(struct aq_nic_s *self, enum aq_rx_filter_type type,
 int aq_nic_setup_tc_mqprio(struct aq_nic_s *self, u32 tcs, u8 *prio_tc_map)
 {
 	struct aq_nic_cfg_s *cfg = &self->aq_nic_cfg;
+	const unsigned int prev_vecs = cfg->vecs;
 	bool ndev_running;
 	int err = 0;
 	int i;
@@ -1319,9 +1340,18 @@ int aq_nic_setup_tc_mqprio(struct aq_nic_s *self, u32 tcs, u8 *prio_tc_map)
 
 	netdev_set_num_tc(self->ndev, cfg->tcs);
 
+	/* Changing the number of TCs might change the number of vectors */
+	aq_nic_cfg_update_num_vecs(self);
+	if (prev_vecs != cfg->vecs) {
+		err = aq_nic_realloc_vectors(self);
+		if (err)
+			goto err_exit;
+	}
+
 	if (ndev_running)
 		err = dev_open(self->ndev, NULL);
 
+err_exit:
 	return err;
 }
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
index 351c4e68f40d..7a1d799b1e0d 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
@@ -177,6 +177,7 @@ void aq_nic_deinit(struct aq_nic_s *self, bool link_down);
 void aq_nic_set_power(struct aq_nic_s *self);
 void aq_nic_free_hot_resources(struct aq_nic_s *self);
 void aq_nic_free_vectors(struct aq_nic_s *self);
+int aq_nic_realloc_vectors(struct aq_nic_s *self);
 int aq_nic_set_mtu(struct aq_nic_s *self, int new_mtu);
 int aq_nic_set_mac(struct aq_nic_s *self, struct net_device *ndev);
 int aq_nic_set_packet_filter(struct aq_nic_s *self, unsigned int flags);
-- 
cgit v1.2.3-59-g8ed1b


From 5479e8436f32cdbe76d63119459a7d651c1c39ed Mon Sep 17 00:00:00 2001
From: Mark Starovoytov <mstarovoitov@marvell.com>
Date: Fri, 22 May 2020 11:19:45 +0300
Subject: net: atlantic: always use random TC-queue mapping for TX on A2.

This patch changes the TC-queue mapping mechanism used on A2.
Configure the A2 HW in such a way that we can keep queue index mapping
exactly as it was on A1.

Signed-off-by: Mark Starovoytov <mstarovoitov@marvell.com>
Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c   | 31 ++++++++++++-----
 .../aquantia/atlantic/hw_atl2/hw_atl2_llh.c        |  9 +++++
 .../aquantia/atlantic/hw_atl2/hw_atl2_llh.h        |  4 +++
 .../atlantic/hw_atl2/hw_atl2_llh_internal.h        | 39 ++++++++++++++++++++--
 4 files changed, 72 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
index b42ff81adfeb..a5bffadde6df 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
@@ -95,7 +95,10 @@ static int hw_atl2_hw_queue_to_tc_map_set(struct aq_hw_s *self)
 	struct aq_nic_cfg_s *cfg = self->aq_nic_cfg;
 	unsigned int tcs, q_per_tc;
 	unsigned int tc, q;
-	u32 value = 0;
+	u32 rx_map = 0;
+	u32 tx_map = 0;
+
+	hw_atl2_tpb_tx_tc_q_rand_map_en_set(self, 1U);
 
 	switch (cfg->tc_mode) {
 	case AQ_TC_MODE_8TCS:
@@ -113,14 +116,24 @@ static int hw_atl2_hw_queue_to_tc_map_set(struct aq_hw_s *self)
 	for (tc = 0; tc != tcs; tc++) {
 		unsigned int tc_q_offset = tc * q_per_tc;
 
-		for (q = tc_q_offset; q != tc_q_offset + q_per_tc; q++)
-			value |= tc << HW_ATL2_RX_Q_TC_MAP_SHIFT(q);
+		for (q = tc_q_offset; q != tc_q_offset + q_per_tc; q++) {
+			rx_map |= tc << HW_ATL2_RX_Q_TC_MAP_SHIFT(q);
+			if (HW_ATL2_RX_Q_TC_MAP_ADR(q) !=
+			    HW_ATL2_RX_Q_TC_MAP_ADR(q + 1)) {
+				aq_hw_write_reg(self,
+						HW_ATL2_RX_Q_TC_MAP_ADR(q),
+						rx_map);
+				rx_map = 0;
+			}
 
-		if (HW_ATL2_RX_Q_TC_MAP_ADR(q) !=
-		    HW_ATL2_RX_Q_TC_MAP_ADR(q - 1)) {
-			aq_hw_write_reg(self, HW_ATL2_RX_Q_TC_MAP_ADR(q - 1),
-					value);
-			value = 0;
+			tx_map |= tc << HW_ATL2_TX_Q_TC_MAP_SHIFT(q);
+			if (HW_ATL2_TX_Q_TC_MAP_ADR(q) !=
+			    HW_ATL2_TX_Q_TC_MAP_ADR(q + 1)) {
+				aq_hw_write_reg(self,
+						HW_ATL2_TX_Q_TC_MAP_ADR(q),
+						tx_map);
+				tx_map = 0;
+			}
 		}
 	}
 
@@ -181,7 +194,7 @@ static int hw_atl2_hw_qos_set(struct aq_hw_s *self)
 		hw_atl_rpf_rpb_user_priority_tc_map_set(self, prio,
 							cfg->prio_tc_map[prio]);
 
-	/* ATL2 Apply legacy ring to TC mapping */
+	/* ATL2 Apply ring to TC mapping */
 	hw_atl2_hw_queue_to_tc_map_set(self);
 
 	return aq_hw_err_from_flags(self);
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.c
index f096d0a6bda9..6817fa57cc83 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.c
@@ -68,6 +68,15 @@ void hw_atl2_rpf_vlan_flr_tag_set(struct aq_hw_s *aq_hw, u32 tag, u32 filter)
 
 /* TX */
 
+void hw_atl2_tpb_tx_tc_q_rand_map_en_set(struct aq_hw_s *aq_hw,
+					 const u32 tc_q_rand_map_en)
+{
+	aq_hw_write_reg_bit(aq_hw, HW_ATL2_TPB_TX_TC_Q_RAND_MAP_EN_ADR,
+			    HW_ATL2_TPB_TX_TC_Q_RAND_MAP_EN_MSK,
+			    HW_ATL2_TPB_TX_TC_Q_RAND_MAP_EN_SHIFT,
+			    tc_q_rand_map_en);
+}
+
 void hw_atl2_tpb_tx_buf_clk_gate_en_set(struct aq_hw_s *aq_hw, u32 clk_gate_en)
 {
 	aq_hw_write_reg_bit(aq_hw, HW_ATL2_TPB_TX_BUF_CLK_GATE_EN_ADR,
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.h
index 5c1ae755ffae..d4b087d1dec1 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.h
@@ -38,6 +38,10 @@ void hw_atl2_new_rpf_rss_redir_set(struct aq_hw_s *aq_hw, u32 tc, u32 index,
 /* Set VLAN filter tag */
 void hw_atl2_rpf_vlan_flr_tag_set(struct aq_hw_s *aq_hw, u32 tag, u32 filter);
 
+/* set tx random TC-queue mapping enable bit */
+void hw_atl2_tpb_tx_tc_q_rand_map_en_set(struct aq_hw_s *aq_hw,
+					 const u32 tc_q_rand_map_en);
+
 /* set tx buffer clock gate enable */
 void hw_atl2_tpb_tx_buf_clk_gate_en_set(struct aq_hw_s *aq_hw, u32 clk_gate_en);
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh_internal.h
index b0ac8cd581d7..bf0198ca4e85 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh_internal.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh_internal.h
@@ -132,6 +132,24 @@
 /* Default value of bitfield rx_q{Q}_tc_map[2:0] */
 #define HW_ATL2_RX_Q_TC_MAP_DEFAULT 0x0
 
+/* tx tx_tc_q_rand_map_en bitfield definitions
+ * preprocessor definitions for the bitfield "tx_tc_q_rand_map_en".
+ * port="pif_tpb_tx_tc_q_rand_map_en_i"
+ */
+
+/* register address for bitfield tx_tc_q_rand_map_en */
+#define HW_ATL2_TPB_TX_TC_Q_RAND_MAP_EN_ADR 0x00007900
+/* bitmask for bitfield tx_tc_q_rand_map_en */
+#define HW_ATL2_TPB_TX_TC_Q_RAND_MAP_EN_MSK 0x00000200
+/* inverted bitmask for bitfield tx_tc_q_rand_map_en */
+#define HW_ATL2_TPB_TX_TC_Q_RAND_MAP_EN_MSKN 0xFFFFFDFF
+/* lower bit position of bitfield tx_tc_q_rand_map_en */
+#define HW_ATL2_TPB_TX_TC_Q_RAND_MAP_EN_SHIFT 9
+/* width of bitfield tx_tc_q_rand_map_en */
+#define HW_ATL2_TPB_TX_TC_Q_RAND_MAP_EN_WIDTH 1
+/* default value of bitfield tx_tc_q_rand_map_en */
+#define HW_ATL2_TPB_TX_TC_Q_RAND_MAP_EN_DEFAULT 0x0
+
 /* tx tx_buffer_clk_gate_en bitfield definitions
  * preprocessor definitions for the bitfield "tx_buffer_clk_gate_en".
  * port="pif_tpb_tx_buffer_clk_gate_en_i"
@@ -150,8 +168,25 @@
 /* default value of bitfield tx_buffer_clk_gate_en */
 #define HW_ATL2_TPB_TX_BUF_CLK_GATE_EN_DEFAULT 0x0
 
-/* tx data_tc{t}_credit_max[b:0] bitfield definitions
- * preprocessor definitions for the bitfield "data_tc{t}_credit_max[b:0]".
+/* tx tx_q_tc_map{q} bitfield definitions
+ * preprocessor definitions for the bitfield "tx_q_tc_map{q}".
+ * parameter: queue {q} | bit-level stride | range [0, 31]
+ * port="pif_tpb_tx_q_tc_map0_i[2:0]"
+ */
+
+/* register address for bitfield tx_q_tc_map{q} */
+#define HW_ATL2_TX_Q_TC_MAP_ADR(queue) \
+	(((queue) < 32) ? 0x0000799C + ((queue) / 4) * 4 : 0)
+/* lower bit position of bitfield tx_q_tc_map{q} */
+#define HW_ATL2_TX_Q_TC_MAP_SHIFT(queue) \
+	(((queue) < 32) ? ((queue) * 8) % 32 : 0)
+/* width of bitfield tx_q_tc_map{q} */
+#define HW_ATL2_TX_Q_TC_MAP_WIDTH 3
+/* default value of bitfield tx_q_tc_map{q} */
+#define HW_ATL2_TX_Q_TC_MAP_DEFAULT 0x0
+
+/* tx data_tc{t}_credit_max[f:0] bitfield definitions
+ * preprocessor definitions for the bitfield "data_tc{t}_credit_max[f:0]".
  * parameter: tc {t} | stride size 0x4 | range [0, 7]
  * port="pif_tps_data_tc0_credit_max_i[11:0]"
  */
-- 
cgit v1.2.3-59-g8ed1b


From b64f2ac9955bcd3547329c30d8f7a55f84297df8 Mon Sep 17 00:00:00 2001
From: Mark Starovoytov <mstarovoitov@marvell.com>
Date: Fri, 22 May 2020 11:19:46 +0300
Subject: net: atlantic: change the order of arguments for TC weight/credit
 setters

This patch changes the order of arguments for TC weight/credit setter
functions.
Having the "value to be set" on the right is slightly more robust in
a sense that it's more natural for the humans, so it's a bit more
error-proof this way.

Signed-off-by: Mark Starovoytov <mstarovoitov@marvell.com>
Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c    |  8 ++++----
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c    |  8 ++++----
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c   | 20 ++++++++++----------
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h   | 16 ++++++++--------
 .../ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.c | 10 +++++-----
 .../ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.h |  8 ++++----
 6 files changed, 35 insertions(+), 35 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
index 88b17cf77625..a312864969af 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_a0.c
@@ -136,10 +136,10 @@ static int hw_atl_a0_hw_qos_set(struct aq_hw_s *self)
 	hw_atl_tps_tx_pkt_shed_desc_tc_arb_mode_set(self, 0U);
 	hw_atl_tps_tx_pkt_shed_data_arb_mode_set(self, 0U);
 
-	hw_atl_tps_tx_pkt_shed_tc_data_max_credit_set(self, 0xFFF, 0U);
-	hw_atl_tps_tx_pkt_shed_tc_data_weight_set(self, 0x64, 0U);
-	hw_atl_tps_tx_pkt_shed_desc_tc_max_credit_set(self, 0x50, 0U);
-	hw_atl_tps_tx_pkt_shed_desc_tc_weight_set(self, 0x1E, 0U);
+	hw_atl_tps_tx_pkt_shed_tc_data_max_credit_set(self, 0U, 0xFFF);
+	hw_atl_tps_tx_pkt_shed_tc_data_weight_set(self, 0U, 0x64);
+	hw_atl_tps_tx_pkt_shed_desc_tc_max_credit_set(self, 0U, 0x50);
+	hw_atl_tps_tx_pkt_shed_desc_tc_weight_set(self, 0U, 0x1E);
 
 	/* Tx buf size */
 	buff_size = HW_ATL_A0_TXBUF_MAX;
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
index abc86eb4f525..2448a09ef7b9 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
@@ -161,8 +161,8 @@ static int hw_atl_b0_hw_qos_set(struct aq_hw_s *self)
 		u32 threshold = 0U;
 
 		/* TX Packet Scheduler Data TC0 */
-		hw_atl_tps_tx_pkt_shed_tc_data_max_credit_set(self, 0xFFF, tc);
-		hw_atl_tps_tx_pkt_shed_tc_data_weight_set(self, 0x64, tc);
+		hw_atl_tps_tx_pkt_shed_tc_data_max_credit_set(self, tc, 0xFFF);
+		hw_atl_tps_tx_pkt_shed_tc_data_weight_set(self, tc, 0x64);
 
 		/* Tx buf size TC0 */
 		hw_atl_tpb_tx_pkt_buff_size_per_tc_set(self, tx_buff_size, tc);
@@ -334,8 +334,8 @@ int hw_atl_b0_hw_init_tx_tc_rate_limit(struct aq_hw_s *self)
 		const u32 en = (nic_cfg->tc_max_rate[tc] != 0) ? 1U : 0U;
 		const u32 desc = AQ_NIC_CFG_TCVEC2RING(nic_cfg, tc, 0);
 
-		hw_atl_tps_tx_pkt_shed_desc_tc_max_credit_set(self, 0x50, tc);
-		hw_atl_tps_tx_pkt_shed_desc_tc_weight_set(self, 0x1E, tc);
+		hw_atl_tps_tx_pkt_shed_desc_tc_max_credit_set(self, tc, 0x50);
+		hw_atl_tps_tx_pkt_shed_desc_tc_weight_set(self, tc, 0x1E);
 
 		hw_atl_tps_tx_desc_rate_en_set(self, desc, en);
 
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c
index 0ea791a9c100..3c8e8047ea1e 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.c
@@ -1463,8 +1463,8 @@ void hw_atl_tps_tx_pkt_shed_desc_tc_arb_mode_set(struct aq_hw_s *aq_hw,
 }
 
 void hw_atl_tps_tx_pkt_shed_desc_tc_max_credit_set(struct aq_hw_s *aq_hw,
-						   u32 max_credit,
-						   u32 tc)
+						   const u32 tc,
+						   const u32 max_credit)
 {
 	aq_hw_write_reg_bit(aq_hw, HW_ATL_TPS_DESC_TCTCREDIT_MAX_ADR(tc),
 			    HW_ATL_TPS_DESC_TCTCREDIT_MAX_MSK,
@@ -1473,13 +1473,13 @@ void hw_atl_tps_tx_pkt_shed_desc_tc_max_credit_set(struct aq_hw_s *aq_hw,
 }
 
 void hw_atl_tps_tx_pkt_shed_desc_tc_weight_set(struct aq_hw_s *aq_hw,
-					       u32 tx_pkt_shed_desc_tc_weight,
-					       u32 tc)
+					       const u32 tc,
+					       const u32 weight)
 {
 	aq_hw_write_reg_bit(aq_hw, HW_ATL_TPS_DESC_TCTWEIGHT_ADR(tc),
 			    HW_ATL_TPS_DESC_TCTWEIGHT_MSK,
 			    HW_ATL_TPS_DESC_TCTWEIGHT_SHIFT,
-			    tx_pkt_shed_desc_tc_weight);
+			    weight);
 }
 
 void hw_atl_tps_tx_pkt_shed_desc_vm_arb_mode_set(struct aq_hw_s *aq_hw,
@@ -1492,8 +1492,8 @@ void hw_atl_tps_tx_pkt_shed_desc_vm_arb_mode_set(struct aq_hw_s *aq_hw,
 }
 
 void hw_atl_tps_tx_pkt_shed_tc_data_max_credit_set(struct aq_hw_s *aq_hw,
-						   u32 max_credit,
-						   u32 tc)
+						   const u32 tc,
+						   const u32 max_credit)
 {
 	aq_hw_write_reg_bit(aq_hw, HW_ATL_TPS_DATA_TCTCREDIT_MAX_ADR(tc),
 			    HW_ATL_TPS_DATA_TCTCREDIT_MAX_MSK,
@@ -1502,13 +1502,13 @@ void hw_atl_tps_tx_pkt_shed_tc_data_max_credit_set(struct aq_hw_s *aq_hw,
 }
 
 void hw_atl_tps_tx_pkt_shed_tc_data_weight_set(struct aq_hw_s *aq_hw,
-					       u32 tx_pkt_shed_tc_data_weight,
-					       u32 tc)
+					       const u32 tc,
+					       const u32 weight)
 {
 	aq_hw_write_reg_bit(aq_hw, HW_ATL_TPS_DATA_TCTWEIGHT_ADR(tc),
 			    HW_ATL_TPS_DATA_TCTWEIGHT_MSK,
 			    HW_ATL_TPS_DATA_TCTWEIGHT_SHIFT,
-			    tx_pkt_shed_tc_data_weight);
+			    weight);
 }
 
 void hw_atl_tps_tx_desc_rate_mode_set(struct aq_hw_s *aq_hw,
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h
index c56cc4e8e13c..61a6f70c51cd 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_llh.h
@@ -688,13 +688,13 @@ void hw_atl_tps_tx_pkt_shed_desc_tc_arb_mode_set(struct aq_hw_s *aq_hw,
 
 /* set tx packet scheduler descriptor tc max credit */
 void hw_atl_tps_tx_pkt_shed_desc_tc_max_credit_set(struct aq_hw_s *aq_hw,
-						   u32 max_credit,
-					    u32 tc);
+						   const u32 tc,
+						   const u32 max_credit);
 
 /* set tx packet scheduler descriptor tc weight */
 void hw_atl_tps_tx_pkt_shed_desc_tc_weight_set(struct aq_hw_s *aq_hw,
-					       u32 tx_pkt_shed_desc_tc_weight,
-					u32 tc);
+					       const u32 tc,
+					       const u32 weight);
 
 /* set tx packet scheduler descriptor vm arbitration mode */
 void hw_atl_tps_tx_pkt_shed_desc_vm_arb_mode_set(struct aq_hw_s *aq_hw,
@@ -702,13 +702,13 @@ void hw_atl_tps_tx_pkt_shed_desc_vm_arb_mode_set(struct aq_hw_s *aq_hw,
 
 /* set tx packet scheduler tc data max credit */
 void hw_atl_tps_tx_pkt_shed_tc_data_max_credit_set(struct aq_hw_s *aq_hw,
-						   u32 max_credit,
-					    u32 tc);
+						   const u32 tc,
+						   const u32 max_credit);
 
 /* set tx packet scheduler tc data weight */
 void hw_atl_tps_tx_pkt_shed_tc_data_weight_set(struct aq_hw_s *aq_hw,
-					       u32 tx_pkt_shed_tc_data_weight,
-					u32 tc);
+					       const u32 tc,
+					       const u32 weight);
 
 /* set tx descriptor rate mode */
 void hw_atl_tps_tx_desc_rate_mode_set(struct aq_hw_s *aq_hw,
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.c
index 6817fa57cc83..c6a6ba66eb05 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.c
@@ -94,8 +94,8 @@ void hw_atl2_reg_tx_intr_moder_ctrl_set(struct aq_hw_s *aq_hw,
 }
 
 void hw_atl2_tps_tx_pkt_shed_tc_data_max_credit_set(struct aq_hw_s *aq_hw,
-						    u32 max_credit,
-						    u32 tc)
+						    const u32 tc,
+						    const u32 max_credit)
 {
 	aq_hw_write_reg_bit(aq_hw, HW_ATL2_TPS_DATA_TCTCREDIT_MAX_ADR(tc),
 			    HW_ATL2_TPS_DATA_TCTCREDIT_MAX_MSK,
@@ -104,13 +104,13 @@ void hw_atl2_tps_tx_pkt_shed_tc_data_max_credit_set(struct aq_hw_s *aq_hw,
 }
 
 void hw_atl2_tps_tx_pkt_shed_tc_data_weight_set(struct aq_hw_s *aq_hw,
-						u32 tx_pkt_shed_tc_data_weight,
-						u32 tc)
+						const u32 tc,
+						const u32 weight)
 {
 	aq_hw_write_reg_bit(aq_hw, HW_ATL2_TPS_DATA_TCTWEIGHT_ADR(tc),
 			    HW_ATL2_TPS_DATA_TCTWEIGHT_MSK,
 			    HW_ATL2_TPS_DATA_TCTWEIGHT_SHIFT,
-			    tx_pkt_shed_tc_data_weight);
+			    weight);
 }
 
 u32 hw_atl2_get_hw_version(struct aq_hw_s *aq_hw)
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.h
index d4b087d1dec1..883fa009bc0e 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.h
@@ -47,13 +47,13 @@ void hw_atl2_tpb_tx_buf_clk_gate_en_set(struct aq_hw_s *aq_hw, u32 clk_gate_en);
 
 /* set tx packet scheduler tc data max credit */
 void hw_atl2_tps_tx_pkt_shed_tc_data_max_credit_set(struct aq_hw_s *aq_hw,
-						    u32 max_credit,
-						    u32 tc);
+						    const u32 tc,
+						    const u32 max_credit);
 
 /* set tx packet scheduler tc data weight */
 void hw_atl2_tps_tx_pkt_shed_tc_data_weight_set(struct aq_hw_s *aq_hw,
-						u32 tx_pkt_shed_tc_data_weight,
-						u32 tc);
+						const u32 tc,
+						const u32 weight);
 
 u32 hw_atl2_get_hw_version(struct aq_hw_s *aq_hw);
 
-- 
cgit v1.2.3-59-g8ed1b


From 2deac71ac492a6025b163701436e0aa39435a05f Mon Sep 17 00:00:00 2001
From: Mark Starovoytov <mstarovoitov@marvell.com>
Date: Fri, 22 May 2020 11:19:47 +0300
Subject: net: atlantic: QoS implementation: min_rate

This patch adds support for mqprio min_rate limiters.

A2 HW supports Weighted Strict Priority (WSP) arbitration for Tx Descriptor
Queue scheduling among TCs, which can be used for min_rate shaping.

Signed-off-by: Mark Starovoytov <mstarovoitov@marvell.com>
Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/aquantia/atlantic/aq_hw.h     |   2 +
 drivers/net/ethernet/aquantia/atlantic/aq_main.c   |  26 +++--
 drivers/net/ethernet/aquantia/atlantic/aq_nic.c    |  28 +++++
 drivers/net/ethernet/aquantia/atlantic/aq_nic.h    |   4 +
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c  |  77 +++++++++++--
 .../ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h  |   2 -
 .../ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c   | 127 +++++++++++++++++++--
 .../aquantia/atlantic/hw_atl2/hw_atl2_llh.c        |   9 ++
 .../aquantia/atlantic/hw_atl2/hw_atl2_llh.h        |   3 +
 .../atlantic/hw_atl2/hw_atl2_llh_internal.h        |  62 ++++++----
 10 files changed, 286 insertions(+), 54 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
index d31e576f8b86..ed5b465bc664 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_hw.h
@@ -280,6 +280,8 @@ struct aq_hw_ops {
 	int (*hw_rss_hash_set)(struct aq_hw_s *self,
 			       struct aq_rss_parameters *rss_params);
 
+	int (*hw_tc_rate_limit_set)(struct aq_hw_s *self);
+
 	int (*hw_get_regs)(struct aq_hw_s *self,
 			   const struct aq_hw_caps_s *aq_hw_caps,
 			   u32 *regs_buff);
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_main.c b/drivers/net/ethernet/aquantia/atlantic/aq_main.c
index 57a116ccad55..8a1da044e908 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_main.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_main.c
@@ -340,7 +340,6 @@ static int aq_validate_mqprio_opt(struct aq_nic_s *self,
 	struct aq_nic_cfg_s *aq_nic_cfg = aq_nic_get_cfg(self);
 	const unsigned int tcs_max = min_t(u8, aq_nic_cfg->aq_hw_caps->tcs_max,
 					   AQ_CFG_TCS_MAX);
-	int i;
 
 	if (num_tc > tcs_max) {
 		netdev_err(self->ndev, "Too many TCs requested\n");
@@ -352,12 +351,9 @@ static int aq_validate_mqprio_opt(struct aq_nic_s *self,
 		return -EOPNOTSUPP;
 	}
 
-	for (i = 0; i < num_tc; i++) {
-		if (has_min_rate && mqprio->min_rate[i]) {
-			netdev_err(self->ndev,
-				   "Min tx rate is not supported\n");
-			return -EOPNOTSUPP;
-		}
+	if (has_min_rate && !ATL_HW_IS_CHIP_FEATURE(self->aq_hw, ANTIGUA)) {
+		netdev_err(self->ndev, "Min tx rate is not supported\n");
+		return -EOPNOTSUPP;
 	}
 
 	return 0;
@@ -368,23 +364,35 @@ static int aq_ndo_setup_tc(struct net_device *dev, enum tc_setup_type type,
 {
 	struct tc_mqprio_qopt_offload *mqprio = type_data;
 	struct aq_nic_s *aq_nic = netdev_priv(dev);
+	bool has_min_rate;
+	bool has_max_rate;
 	int err;
 	int i;
 
 	if (type != TC_SETUP_QDISC_MQPRIO)
 		return -EOPNOTSUPP;
 
+	has_min_rate = !!(mqprio->flags & TC_MQPRIO_F_MIN_RATE);
+	has_max_rate = !!(mqprio->flags & TC_MQPRIO_F_MAX_RATE);
+
 	err = aq_validate_mqprio_opt(aq_nic, mqprio, mqprio->qopt.num_tc);
 	if (err)
 		return err;
 
-	if (mqprio->flags & TC_MQPRIO_F_MAX_RATE) {
-		for (i = 0; i < mqprio->qopt.num_tc; i++) {
+	for (i = 0; i < mqprio->qopt.num_tc; i++) {
+		if (has_max_rate) {
 			u64 max_rate = mqprio->max_rate[i];
 
 			do_div(max_rate, AQ_MBPS_DIVISOR);
 			aq_nic_setup_tc_max_rate(aq_nic, i, (u32)max_rate);
 		}
+
+		if (has_min_rate) {
+			u64 min_rate = mqprio->min_rate[i];
+
+			do_div(min_rate, AQ_MBPS_DIVISOR);
+			aq_nic_setup_tc_min_rate(aq_nic, i, (u32)min_rate);
+		}
 	}
 
 	return aq_nic_setup_tc_mqprio(aq_nic, mqprio->qopt.num_tc,
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
index 0973214db20f..4435c6374f7e 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.c
@@ -196,6 +196,9 @@ static int aq_nic_update_link_status(struct aq_nic_s *self)
 #if IS_ENABLED(CONFIG_MACSEC)
 		aq_macsec_enable(self);
 #endif
+		if (self->aq_hw_ops->hw_tc_rate_limit_set)
+			self->aq_hw_ops->hw_tc_rate_limit_set(self->aq_hw);
+
 		netif_tx_wake_all_queues(self->ndev);
 	}
 	if (netif_carrier_ok(self->ndev) && !self->link_status.mbps) {
@@ -1374,3 +1377,28 @@ int aq_nic_setup_tc_max_rate(struct aq_nic_s *self, const unsigned int tc,
 
 	return 0;
 }
+
+int aq_nic_setup_tc_min_rate(struct aq_nic_s *self, const unsigned int tc,
+			     const u32 min_rate)
+{
+	struct aq_nic_cfg_s *cfg = &self->aq_nic_cfg;
+
+	if (tc >= AQ_CFG_TCS_MAX)
+		return -EINVAL;
+
+	if (min_rate)
+		set_bit(tc, &cfg->tc_min_rate_msk);
+	else
+		clear_bit(tc, &cfg->tc_min_rate_msk);
+
+	if (min_rate && min_rate < 20) {
+		netdev_warn(self->ndev,
+			"Setting %s to the minimum usable value of %dMbps.\n",
+			"min rate", 20);
+		cfg->tc_min_rate[tc] = 20;
+	} else {
+		cfg->tc_min_rate[tc] = min_rate;
+	}
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
index 7a1d799b1e0d..2ab003065e62 100644
--- a/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
+++ b/drivers/net/ethernet/aquantia/atlantic/aq_nic.h
@@ -66,6 +66,8 @@ struct aq_nic_cfg_s {
 	u8  tcs;
 	u8 prio_tc_map[8];
 	u32 tc_max_rate[AQ_CFG_TCS_MAX];
+	unsigned long tc_min_rate_msk;
+	u32 tc_min_rate[AQ_CFG_TCS_MAX];
 	struct aq_rss_parameters aq_rss;
 	u32 eee_speeds;
 };
@@ -198,4 +200,6 @@ void aq_nic_release_filter(struct aq_nic_s *self, enum aq_rx_filter_type type,
 int aq_nic_setup_tc_mqprio(struct aq_nic_s *self, u32 tcs, u8 *prio_tc_map);
 int aq_nic_setup_tc_max_rate(struct aq_nic_s *self, const unsigned int tc,
 			     const u32 max_rate);
+int aq_nic_setup_tc_min_rate(struct aq_nic_s *self, const unsigned int tc,
+			     const u32 min_rate);
 #endif /* AQ_NIC_H */
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
index 2448a09ef7b9..320f3669305d 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
@@ -138,8 +138,6 @@ static int hw_atl_b0_hw_qos_set(struct aq_hw_s *self)
 	unsigned int prio = 0U;
 	u32 tc = 0U;
 
-	hw_atl_b0_hw_init_tx_tc_rate_limit(self);
-
 	if (cfg->is_ptp) {
 		tx_buff_size -= HW_ATL_B0_PTP_TXBUF_SIZE;
 		rx_buff_size -= HW_ATL_B0_PTP_RXBUF_SIZE;
@@ -152,18 +150,11 @@ static int hw_atl_b0_hw_qos_set(struct aq_hw_s *self)
 	/* TPS VM init */
 	hw_atl_tps_tx_pkt_shed_desc_vm_arb_mode_set(self, 0U);
 
-	/* TPS TC credits init */
-	hw_atl_tps_tx_pkt_shed_data_arb_mode_set(self, 0U);
-
 	tx_buff_size /= cfg->tcs;
 	rx_buff_size /= cfg->tcs;
 	for (tc = 0; tc < cfg->tcs; tc++) {
 		u32 threshold = 0U;
 
-		/* TX Packet Scheduler Data TC0 */
-		hw_atl_tps_tx_pkt_shed_tc_data_max_credit_set(self, tc, 0xFFF);
-		hw_atl_tps_tx_pkt_shed_tc_data_weight_set(self, tc, 0x64);
-
 		/* Tx buf size TC0 */
 		hw_atl_tpb_tx_pkt_buff_size_per_tc_set(self, tx_buff_size, tc);
 
@@ -319,24 +310,87 @@ int hw_atl_b0_hw_offload_set(struct aq_hw_s *self,
 	return aq_hw_err_from_flags(self);
 }
 
-int hw_atl_b0_hw_init_tx_tc_rate_limit(struct aq_hw_s *self)
+static int hw_atl_b0_hw_init_tx_tc_rate_limit(struct aq_hw_s *self)
 {
+	static const u32 max_weight = BIT(HW_ATL_TPS_DATA_TCTWEIGHT_WIDTH) - 1;
 	/* Scale factor is based on the number of bits in fractional portion */
 	static const u32 scale = BIT(HW_ATL_TPS_DESC_RATE_Y_WIDTH);
 	static const u32 frac_msk = HW_ATL_TPS_DESC_RATE_Y_MSK >>
 				    HW_ATL_TPS_DESC_RATE_Y_SHIFT;
+	const u32 link_speed = self->aq_link_status.mbps;
 	struct aq_nic_cfg_s *nic_cfg = self->aq_nic_cfg;
+	unsigned long num_min_rated_tcs = 0;
+	u32 tc_weight[AQ_CFG_TCS_MAX];
+	u32 fixed_max_credit;
+	u8 min_rate_msk = 0;
+	u32 sum_weight = 0;
 	int tc;
 
+	/* By default max_credit is based upon MTU (in unit of 64b) */
+	fixed_max_credit = nic_cfg->aq_hw_caps->mtu / 64;
+
+	if (link_speed) {
+		min_rate_msk = nic_cfg->tc_min_rate_msk &
+			       (BIT(nic_cfg->tcs) - 1);
+		num_min_rated_tcs = hweight8(min_rate_msk);
+	}
+
+	/* First, calculate weights where min_rate is specified */
+	if (num_min_rated_tcs) {
+		for (tc = 0; tc != nic_cfg->tcs; tc++) {
+			if (!nic_cfg->tc_min_rate[tc]) {
+				tc_weight[tc] = 0;
+				continue;
+			}
+
+			tc_weight[tc] = (-1L + link_speed +
+					 nic_cfg->tc_min_rate[tc] *
+					 max_weight) /
+					link_speed;
+			tc_weight[tc] = min(tc_weight[tc], max_weight);
+			sum_weight += tc_weight[tc];
+		}
+	}
+
+	/* WSP, if min_rate is set for at least one TC.
+	 * RR otherwise.
+	 */
+	hw_atl_tps_tx_pkt_shed_data_arb_mode_set(self, min_rate_msk ? 1U : 0U);
+	/* Data TC Arbiter takes precedence over Descriptor TC Arbiter,
+	 * leave Descriptor TC Arbiter as RR.
+	 */
 	hw_atl_tps_tx_pkt_shed_desc_tc_arb_mode_set(self, 0U);
+
 	hw_atl_tps_tx_desc_rate_mode_set(self, nic_cfg->is_qos ? 1U : 0U);
+
 	for (tc = 0; tc != nic_cfg->tcs; tc++) {
 		const u32 en = (nic_cfg->tc_max_rate[tc] != 0) ? 1U : 0U;
 		const u32 desc = AQ_NIC_CFG_TCVEC2RING(nic_cfg, tc, 0);
+		u32 weight, max_credit;
 
-		hw_atl_tps_tx_pkt_shed_desc_tc_max_credit_set(self, tc, 0x50);
+		hw_atl_tps_tx_pkt_shed_desc_tc_max_credit_set(self, tc,
+							      fixed_max_credit);
 		hw_atl_tps_tx_pkt_shed_desc_tc_weight_set(self, tc, 0x1E);
 
+		if (num_min_rated_tcs) {
+			weight = tc_weight[tc];
+
+			if (!weight && sum_weight < max_weight)
+				weight = (max_weight - sum_weight) /
+					 (nic_cfg->tcs - num_min_rated_tcs);
+			else if (!weight)
+				weight = 0x64;
+
+			max_credit = max(8 * weight, fixed_max_credit);
+		} else {
+			weight = 0x64;
+			max_credit = 0xFFF;
+		}
+
+		hw_atl_tps_tx_pkt_shed_tc_data_weight_set(self, tc, weight);
+		hw_atl_tps_tx_pkt_shed_tc_data_max_credit_set(self, tc,
+							      max_credit);
+
 		hw_atl_tps_tx_desc_rate_en_set(self, desc, en);
 
 		if (en) {
@@ -1550,6 +1604,7 @@ const struct aq_hw_ops hw_atl_ops_b0 = {
 	.hw_interrupt_moderation_set = hw_atl_b0_hw_interrupt_moderation_set,
 	.hw_rss_set                  = hw_atl_b0_hw_rss_set,
 	.hw_rss_hash_set             = hw_atl_b0_hw_rss_hash_set,
+	.hw_tc_rate_limit_set        = hw_atl_b0_hw_init_tx_tc_rate_limit,
 	.hw_get_regs                 = hw_atl_utils_hw_get_regs,
 	.hw_get_hw_stats             = hw_atl_utils_get_hw_stats,
 	.hw_get_fw_version           = hw_atl_utils_get_fw_version,
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h
index 992ee4ed37cc..b855459272ca 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h
@@ -62,8 +62,6 @@ int hw_atl_b0_hw_mac_addr_set(struct aq_hw_s *self, u8 *mac_addr);
 
 int hw_atl_b0_hw_start(struct aq_hw_s *self);
 
-int hw_atl_b0_hw_init_tx_tc_rate_limit(struct aq_hw_s *self);
-
 int hw_atl_b0_hw_irq_enable(struct aq_hw_s *self, u64 mask);
 int hw_atl_b0_hw_irq_disable(struct aq_hw_s *self, u64 mask);
 int hw_atl_b0_hw_irq_read(struct aq_hw_s *self, u64 *mask);
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
index a5bffadde6df..f941773b3e20 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
@@ -10,6 +10,7 @@
 #include "hw_atl/hw_atl_b0.h"
 #include "hw_atl/hw_atl_utils.h"
 #include "hw_atl/hw_atl_llh.h"
+#include "hw_atl/hw_atl_llh_internal.h"
 #include "hw_atl2_utils.h"
 #include "hw_atl2_llh.h"
 #include "hw_atl2_internal.h"
@@ -148,8 +149,6 @@ static int hw_atl2_hw_qos_set(struct aq_hw_s *self)
 	unsigned int prio = 0U;
 	u32 tc = 0U;
 
-	hw_atl_b0_hw_init_tx_tc_rate_limit(self);
-
 	/* TPS Descriptor rate init */
 	hw_atl_tps_tx_pkt_shed_desc_rate_curr_time_res_set(self, 0x0U);
 	hw_atl_tps_tx_pkt_shed_desc_rate_lim_set(self, 0xA);
@@ -157,19 +156,11 @@ static int hw_atl2_hw_qos_set(struct aq_hw_s *self)
 	/* TPS VM init */
 	hw_atl_tps_tx_pkt_shed_desc_vm_arb_mode_set(self, 0U);
 
-	/* TPS TC credits init */
-	hw_atl_tps_tx_pkt_shed_data_arb_mode_set(self, 0U);
-
 	tx_buff_size /= cfg->tcs;
 	rx_buff_size /= cfg->tcs;
 	for (tc = 0; tc < cfg->tcs; tc++) {
 		u32 threshold = 0U;
 
-		/* TX Packet Scheduler Data TC0 */
-		hw_atl2_tps_tx_pkt_shed_tc_data_max_credit_set(self, 0xFFF0,
-							       tc);
-		hw_atl2_tps_tx_pkt_shed_tc_data_weight_set(self, 0x640, tc);
-
 		/* Tx buf size TC0 */
 		hw_atl_tpb_tx_pkt_buff_size_per_tc_set(self, tx_buff_size, tc);
 
@@ -225,6 +216,121 @@ static int hw_atl2_hw_rss_set(struct aq_hw_s *self,
 	return aq_hw_err_from_flags(self);
 }
 
+static int hw_atl2_hw_init_tx_tc_rate_limit(struct aq_hw_s *self)
+{
+	static const u32 max_weight = BIT(HW_ATL2_TPS_DATA_TCTWEIGHT_WIDTH) - 1;
+	/* Scale factor is based on the number of bits in fractional portion */
+	static const u32 scale = BIT(HW_ATL_TPS_DESC_RATE_Y_WIDTH);
+	static const u32 frac_msk = HW_ATL_TPS_DESC_RATE_Y_MSK >>
+				    HW_ATL_TPS_DESC_RATE_Y_SHIFT;
+	const u32 link_speed = self->aq_link_status.mbps;
+	struct aq_nic_cfg_s *nic_cfg = self->aq_nic_cfg;
+	unsigned long num_min_rated_tcs = 0;
+	u32 tc_weight[AQ_CFG_TCS_MAX];
+	u32 fixed_max_credit_4b;
+	u32 fixed_max_credit;
+	u8 min_rate_msk = 0;
+	u32 sum_weight = 0;
+	int tc;
+
+	/* By default max_credit is based upon MTU (in unit of 64b) */
+	fixed_max_credit = nic_cfg->aq_hw_caps->mtu / 64;
+	/* in unit of 4b */
+	fixed_max_credit_4b = nic_cfg->aq_hw_caps->mtu / 4;
+
+	if (link_speed) {
+		min_rate_msk = nic_cfg->tc_min_rate_msk &
+			       (BIT(nic_cfg->tcs) - 1);
+		num_min_rated_tcs = hweight8(min_rate_msk);
+	}
+
+	/* First, calculate weights where min_rate is specified */
+	if (num_min_rated_tcs) {
+		for (tc = 0; tc != nic_cfg->tcs; tc++) {
+			if (!nic_cfg->tc_min_rate[tc]) {
+				tc_weight[tc] = 0;
+				continue;
+			}
+
+			tc_weight[tc] = (-1L + link_speed +
+					 nic_cfg->tc_min_rate[tc] *
+					 max_weight) /
+					link_speed;
+			tc_weight[tc] = min(tc_weight[tc], max_weight);
+			sum_weight += tc_weight[tc];
+		}
+	}
+
+	/* WSP, if min_rate is set for at least one TC.
+	 * RR otherwise.
+	 */
+	hw_atl2_tps_tx_pkt_shed_data_arb_mode_set(self, min_rate_msk ? 1U : 0U);
+	/* Data TC Arbiter takes precedence over Descriptor TC Arbiter,
+	 * leave Descriptor TC Arbiter as RR.
+	 */
+	hw_atl_tps_tx_pkt_shed_desc_tc_arb_mode_set(self, 0U);
+
+	hw_atl_tps_tx_desc_rate_mode_set(self, nic_cfg->is_qos ? 1U : 0U);
+
+	for (tc = 0; tc != nic_cfg->tcs; tc++) {
+		const u32 en = (nic_cfg->tc_max_rate[tc] != 0) ? 1U : 0U;
+		const u32 desc = AQ_NIC_CFG_TCVEC2RING(nic_cfg, tc, 0);
+		u32 weight, max_credit;
+
+		hw_atl_tps_tx_pkt_shed_desc_tc_max_credit_set(self, tc,
+							      fixed_max_credit);
+		hw_atl_tps_tx_pkt_shed_desc_tc_weight_set(self, tc, 0x1E);
+
+		if (num_min_rated_tcs) {
+			weight = tc_weight[tc];
+
+			if (!weight && sum_weight < max_weight)
+				weight = (max_weight - sum_weight) /
+					 (nic_cfg->tcs - num_min_rated_tcs);
+			else if (!weight)
+				weight = 0x640;
+
+			max_credit = max(2 * weight, fixed_max_credit_4b);
+		} else {
+			weight = 0x640;
+			max_credit = 0xFFF0;
+		}
+
+		hw_atl2_tps_tx_pkt_shed_tc_data_weight_set(self, tc, weight);
+		hw_atl2_tps_tx_pkt_shed_tc_data_max_credit_set(self, tc,
+							       max_credit);
+
+		hw_atl_tps_tx_desc_rate_en_set(self, desc, en);
+
+		if (en) {
+			/* Nominal rate is always 10G */
+			const u32 rate = 10000U * scale /
+					 nic_cfg->tc_max_rate[tc];
+			const u32 rate_int = rate >>
+					     HW_ATL_TPS_DESC_RATE_Y_WIDTH;
+			const u32 rate_frac = rate & frac_msk;
+
+			hw_atl_tps_tx_desc_rate_x_set(self, desc, rate_int);
+			hw_atl_tps_tx_desc_rate_y_set(self, desc, rate_frac);
+		} else {
+			/* A value of 1 indicates the queue is not
+			 * rate controlled.
+			 */
+			hw_atl_tps_tx_desc_rate_x_set(self, desc, 1U);
+			hw_atl_tps_tx_desc_rate_y_set(self, desc, 0U);
+		}
+	}
+	for (tc = nic_cfg->tcs; tc != AQ_CFG_TCS_MAX; tc++) {
+		const u32 desc = AQ_NIC_CFG_TCVEC2RING(nic_cfg, tc, 0);
+
+		hw_atl_tps_tx_desc_rate_en_set(self, desc, 0U);
+		hw_atl_tps_tx_desc_rate_x_set(self, desc, 1U);
+		hw_atl_tps_tx_desc_rate_y_set(self, desc, 0U);
+	}
+
+	return aq_hw_err_from_flags(self);
+}
+
 static int hw_atl2_hw_init_tx_path(struct aq_hw_s *self)
 {
 	struct aq_nic_cfg_s *nic_cfg = self->aq_nic_cfg;
@@ -730,6 +836,7 @@ const struct aq_hw_ops hw_atl2_ops = {
 	.hw_interrupt_moderation_set = hw_atl2_hw_interrupt_moderation_set,
 	.hw_rss_set                  = hw_atl2_hw_rss_set,
 	.hw_rss_hash_set             = hw_atl_b0_hw_rss_hash_set,
+	.hw_tc_rate_limit_set        = hw_atl2_hw_init_tx_tc_rate_limit,
 	.hw_get_hw_stats             = hw_atl2_utils_get_hw_stats,
 	.hw_get_fw_version           = hw_atl2_utils_get_fw_version,
 	.hw_set_offload              = hw_atl_b0_hw_offload_set,
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.c
index c6a6ba66eb05..cd954b11d24a 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.c
@@ -93,6 +93,15 @@ void hw_atl2_reg_tx_intr_moder_ctrl_set(struct aq_hw_s *aq_hw,
 			tx_intr_moderation_ctl);
 }
 
+void hw_atl2_tps_tx_pkt_shed_data_arb_mode_set(struct aq_hw_s *aq_hw,
+					       const u32 data_arb_mode)
+{
+	aq_hw_write_reg_bit(aq_hw, HW_ATL2_TPS_DATA_TC_ARB_MODE_ADR,
+			    HW_ATL2_TPS_DATA_TC_ARB_MODE_MSK,
+			    HW_ATL2_TPS_DATA_TC_ARB_MODE_SHIFT,
+			    data_arb_mode);
+}
+
 void hw_atl2_tps_tx_pkt_shed_tc_data_max_credit_set(struct aq_hw_s *aq_hw,
 						    const u32 tc,
 						    const u32 max_credit)
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.h
index 883fa009bc0e..98c7a4621297 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh.h
@@ -45,6 +45,9 @@ void hw_atl2_tpb_tx_tc_q_rand_map_en_set(struct aq_hw_s *aq_hw,
 /* set tx buffer clock gate enable */
 void hw_atl2_tpb_tx_buf_clk_gate_en_set(struct aq_hw_s *aq_hw, u32 clk_gate_en);
 
+void hw_atl2_tps_tx_pkt_shed_data_arb_mode_set(struct aq_hw_s *aq_hw,
+					       const u32 data_arb_mode);
+
 /* set tx packet scheduler tc data max credit */
 void hw_atl2_tps_tx_pkt_shed_tc_data_max_credit_set(struct aq_hw_s *aq_hw,
 						    const u32 tc,
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh_internal.h
index bf0198ca4e85..e34c5cda061e 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh_internal.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_llh_internal.h
@@ -185,42 +185,60 @@
 /* default value of bitfield tx_q_tc_map{q} */
 #define HW_ATL2_TX_Q_TC_MAP_DEFAULT 0x0
 
+/* tx data_tc_arb_mode bitfield definitions
+ * preprocessor definitions for the bitfield "data_tc_arb_mode".
+ * port="pif_tps_data_tc_arb_mode_i"
+ */
+
+/* register address for bitfield data_tc_arb_mode */
+#define HW_ATL2_TPS_DATA_TC_ARB_MODE_ADR 0x00007100
+/* bitmask for bitfield data_tc_arb_mode */
+#define HW_ATL2_TPS_DATA_TC_ARB_MODE_MSK 0x00000003
+/* inverted bitmask for bitfield data_tc_arb_mode */
+#define HW_ATL2_TPS_DATA_TC_ARB_MODE_MSKN 0xfffffffc
+/* lower bit position of bitfield data_tc_arb_mode */
+#define HW_ATL2_TPS_DATA_TC_ARB_MODE_SHIFT 0
+/* width of bitfield data_tc_arb_mode */
+#define HW_ATL2_TPS_DATA_TC_ARB_MODE_WIDTH 2
+/* default value of bitfield data_tc_arb_mode */
+#define HW_ATL2_TPS_DATA_TC_ARB_MODE_DEFAULT 0x0
+
 /* tx data_tc{t}_credit_max[f:0] bitfield definitions
  * preprocessor definitions for the bitfield "data_tc{t}_credit_max[f:0]".
  * parameter: tc {t} | stride size 0x4 | range [0, 7]
- * port="pif_tps_data_tc0_credit_max_i[11:0]"
+ * port="pif_tps_data_tc0_credit_max_i[15:0]"
  */
 
-/* register address for bitfield data_tc{t}_credit_max[b:0] */
+/* register address for bitfield data_tc{t}_credit_max[f:0] */
 #define HW_ATL2_TPS_DATA_TCTCREDIT_MAX_ADR(tc) (0x00007110 + (tc) * 0x4)
-/* bitmask for bitfield data_tc{t}_credit_max[b:0] */
-#define HW_ATL2_TPS_DATA_TCTCREDIT_MAX_MSK 0x0fff0000
-/* inverted bitmask for bitfield data_tc{t}_credit_max[b:0] */
-#define HW_ATL2_TPS_DATA_TCTCREDIT_MAX_MSKN 0xf000ffff
-/* lower bit position of bitfield data_tc{t}_credit_max[b:0] */
+/* bitmask for bitfield data_tc{t}_credit_max[f:0] */
+#define HW_ATL2_TPS_DATA_TCTCREDIT_MAX_MSK 0xffff0000
+/* inverted bitmask for bitfield data_tc{t}_credit_max[f:0] */
+#define HW_ATL2_TPS_DATA_TCTCREDIT_MAX_MSKN 0x0000ffff
+/* lower bit position of bitfield data_tc{t}_credit_max[f:0] */
 #define HW_ATL2_TPS_DATA_TCTCREDIT_MAX_SHIFT 16
-/* width of bitfield data_tc{t}_credit_max[b:0] */
-#define HW_ATL2_TPS_DATA_TCTCREDIT_MAX_WIDTH 12
-/* default value of bitfield data_tc{t}_credit_max[b:0] */
+/* width of bitfield data_tc{t}_credit_max[f:0] */
+#define HW_ATL2_TPS_DATA_TCTCREDIT_MAX_WIDTH 16
+/* default value of bitfield data_tc{t}_credit_max[f:0] */
 #define HW_ATL2_TPS_DATA_TCTCREDIT_MAX_DEFAULT 0x0
 
-/* tx data_tc{t}_weight[8:0] bitfield definitions
- * preprocessor definitions for the bitfield "data_tc{t}_weight[8:0]".
+/* tx data_tc{t}_weight[e:0] bitfield definitions
+ * preprocessor definitions for the bitfield "data_tc{t}_weight[e:0]".
  * parameter: tc {t} | stride size 0x4 | range [0, 7]
- * port="pif_tps_data_tc0_weight_i[8:0]"
+ * port="pif_tps_data_tc0_weight_i[14:0]"
  */
 
-/* register address for bitfield data_tc{t}_weight[8:0] */
+/* register address for bitfield data_tc{t}_weight[e:0] */
 #define HW_ATL2_TPS_DATA_TCTWEIGHT_ADR(tc) (0x00007110 + (tc) * 0x4)
-/* bitmask for bitfield data_tc{t}_weight[8:0] */
-#define HW_ATL2_TPS_DATA_TCTWEIGHT_MSK 0x000001ff
-/* inverted bitmask for bitfield data_tc{t}_weight[8:0] */
-#define HW_ATL2_TPS_DATA_TCTWEIGHT_MSKN 0xfffffe00
-/* lower bit position of bitfield data_tc{t}_weight[8:0] */
+/* bitmask for bitfield data_tc{t}_weight[e:0] */
+#define HW_ATL2_TPS_DATA_TCTWEIGHT_MSK 0x00007fff
+/* inverted bitmask for bitfield data_tc{t}_weight[e:0] */
+#define HW_ATL2_TPS_DATA_TCTWEIGHT_MSKN 0xffff8000
+/* lower bit position of bitfield data_tc{t}_weight[e:0] */
 #define HW_ATL2_TPS_DATA_TCTWEIGHT_SHIFT 0
-/* width of bitfield data_tc{t}_weight[8:0] */
-#define HW_ATL2_TPS_DATA_TCTWEIGHT_WIDTH 9
-/* default value of bitfield data_tc{t}_weight[8:0] */
+/* width of bitfield data_tc{t}_weight[e:0] */
+#define HW_ATL2_TPS_DATA_TCTWEIGHT_WIDTH 15
+/* default value of bitfield data_tc{t}_weight[e:0] */
 #define HW_ATL2_TPS_DATA_TCTWEIGHT_DEFAULT 0x0
 
 /* tx interrupt moderation control register definitions
-- 
cgit v1.2.3-59-g8ed1b


From 40f05e5b0d0e6ed5cc868cd09fa976495716b8f9 Mon Sep 17 00:00:00 2001
From: Mark Starovoytov <mstarovoitov@marvell.com>
Date: Fri, 22 May 2020 11:19:48 +0300
Subject: net: atlantic: proper rss_ctrl1 (54c0) initialization

This patch fixes an inconsistency between code and spec, which
was found while working on the QoS implementation.

When 8TCs are used, 2 is the maximum supported number of index bits.
In a 4TC mode, we do support 3, but we shouldn't really use the bytes,
which are intended for the 8TC mode.

Signed-off-by: Mark Starovoytov <mstarovoitov@marvell.com>
Signed-off-by: Igor Russkikh <irusskikh@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c    | 16 ++++++++++++++--
 .../net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h    |  2 ++
 .../aquantia/atlantic/hw_atl/hw_atl_b0_internal.h        |  4 ++++
 drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c |  4 +---
 .../aquantia/atlantic/hw_atl2/hw_atl2_internal.h         |  3 ---
 5 files changed, 21 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
index 320f3669305d..14d79f70cad7 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.c
@@ -447,6 +447,19 @@ static int hw_atl_b0_hw_init_tx_path(struct aq_hw_s *self)
 	return aq_hw_err_from_flags(self);
 }
 
+void hw_atl_b0_hw_init_rx_rss_ctrl1(struct aq_hw_s *self)
+{
+	struct aq_nic_cfg_s *cfg = self->aq_nic_cfg;
+	u32 rss_ctrl1 = HW_ATL_RSS_DISABLED;
+
+	if (cfg->is_rss)
+		rss_ctrl1 = (cfg->tc_mode == AQ_TC_MODE_8TCS) ?
+			    HW_ATL_RSS_ENABLED_8TCS_2INDEX_BITS :
+			    HW_ATL_RSS_ENABLED_4TCS_3INDEX_BITS;
+
+	hw_atl_reg_rx_flr_rss_control1set(self, rss_ctrl1);
+}
+
 static int hw_atl_b0_hw_init_rx_path(struct aq_hw_s *self)
 {
 	struct aq_nic_cfg_s *cfg = self->aq_nic_cfg;
@@ -459,8 +472,7 @@ static int hw_atl_b0_hw_init_rx_path(struct aq_hw_s *self)
 	hw_atl_rpb_rx_flow_ctl_mode_set(self, 1U);
 
 	/* RSS Ring selection */
-	hw_atl_reg_rx_flr_rss_control1set(self, cfg->is_rss ?
-					0xB3333333U : 0x00000000U);
+	hw_atl_b0_hw_init_rx_rss_ctrl1(self);
 
 	/* Multicast filters */
 	for (i = HW_ATL_B0_MAC_MAX; i--;) {
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h
index b855459272ca..30f468f2084d 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0.h
@@ -58,6 +58,8 @@ int hw_atl_b0_hw_ring_tx_head_update(struct aq_hw_s *self,
 int hw_atl_b0_hw_ring_tx_stop(struct aq_hw_s *self, struct aq_ring_s *ring);
 int hw_atl_b0_hw_ring_rx_stop(struct aq_hw_s *self, struct aq_ring_s *ring);
 
+void hw_atl_b0_hw_init_rx_rss_ctrl1(struct aq_hw_s *self);
+
 int hw_atl_b0_hw_mac_addr_set(struct aq_hw_s *self, u8 *mac_addr);
 
 int hw_atl_b0_hw_start(struct aq_hw_s *self);
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h
index 4fba4e0928c7..cf460d61a45e 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl/hw_atl_b0_internal.h
@@ -151,6 +151,10 @@
 #define HW_ATL_B0_MAX_RXD 8184U
 #define HW_ATL_B0_MAX_TXD 8184U
 
+#define HW_ATL_RSS_DISABLED 0x00000000U
+#define HW_ATL_RSS_ENABLED_8TCS_2INDEX_BITS 0xA2222222U
+#define HW_ATL_RSS_ENABLED_4TCS_3INDEX_BITS 0x80003333U
+
 /* HW layer capabilities */
 
 #endif /* HW_ATL_B0_INTERNAL_H */
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
index f941773b3e20..8df9d4ef36f0 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2.c
@@ -475,9 +475,7 @@ static int hw_atl2_hw_init_rx_path(struct aq_hw_s *self)
 	hw_atl2_rpf_rss_hash_type_set(self, HW_ATL2_RPF_RSS_HASH_TYPE_ALL);
 
 	/* RSS Ring selection */
-	hw_atl_reg_rx_flr_rss_control1set(self, cfg->is_rss ?
-						HW_ATL_RSS_ENABLED_3INDEX_BITS :
-						HW_ATL_RSS_DISABLED);
+	hw_atl_b0_hw_init_rx_rss_ctrl1(self);
 
 	/* Multicast filters */
 	for (i = HW_ATL2_MAC_MAX; i--;) {
diff --git a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_internal.h b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_internal.h
index 9ac1979a4867..5a89bb8722f9 100644
--- a/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_internal.h
+++ b/drivers/net/ethernet/aquantia/atlantic/hw_atl2/hw_atl2_internal.h
@@ -117,9 +117,6 @@ enum HW_ATL2_RPF_RSS_HASH_TYPE {
 					HW_ATL2_RPF_RSS_HASH_TYPE_IPV6_EX_UDP,
 };
 
-#define HW_ATL_RSS_DISABLED 0x00000000U
-#define HW_ATL_RSS_ENABLED_3INDEX_BITS 0xB3333333U
-
 #define HW_ATL_MCAST_FLT_ANY_TO_HOST 0x00010FFFU
 
 struct hw_atl2_priv {
-- 
cgit v1.2.3-59-g8ed1b


From 68f236df93a986379e61191157848b4e7d303285 Mon Sep 17 00:00:00 2001
From: Arthur Kiyanovski <akiyano@amazon.com>
Date: Fri, 22 May 2020 12:08:52 +0300
Subject: net: ena: add support for the rx offset feature

Newer ENA devices can write data to rx buffers with an offset
from the beginning of the buffer.

This commit adds support for this feature in the driver.

Signed-off-by: Sameeh Jubran <sameehj@amazon.com>
Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_admin_defs.h  |  5 ++++-
 drivers/net/ethernet/amazon/ena/ena_eth_com.c     | 18 ++++++++++++------
 drivers/net/ethernet/amazon/ena/ena_eth_com.h     |  1 +
 drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h |  4 +++-
 drivers/net/ethernet/amazon/ena/ena_netdev.c      |  8 ++++++++
 5 files changed, 28 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
index 7be3dcbf3d16..727836f638ad 100644
--- a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
+++ b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
@@ -813,7 +813,8 @@ struct ena_admin_host_info {
 
 	u16 reserved;
 
-	/* 1 :0 : reserved
+	/* 0 : reserved
+	 * 1 : rx_offset
 	 * 2 : interrupt_moderation
 	 * 31:3 : reserved
 	 */
@@ -1124,6 +1125,8 @@ struct ena_admin_ena_mmio_req_read_less_resp {
 #define ENA_ADMIN_HOST_INFO_DEVICE_MASK                     GENMASK(7, 3)
 #define ENA_ADMIN_HOST_INFO_BUS_SHIFT                       8
 #define ENA_ADMIN_HOST_INFO_BUS_MASK                        GENMASK(15, 8)
+#define ENA_ADMIN_HOST_INFO_RX_OFFSET_SHIFT                 1
+#define ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK                  BIT(1)
 #define ENA_ADMIN_HOST_INFO_INTERRUPT_MODERATION_SHIFT      2
 #define ENA_ADMIN_HOST_INFO_INTERRUPT_MODERATION_MASK       BIT(2)
 
diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.c b/drivers/net/ethernet/amazon/ena/ena_eth_com.c
index 2845ac277724..a014f514c069 100644
--- a/drivers/net/ethernet/amazon/ena/ena_eth_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.c
@@ -519,7 +519,7 @@ int ena_com_rx_pkt(struct ena_com_io_cq *io_cq,
 	struct ena_eth_io_rx_cdesc_base *cdesc = NULL;
 	u16 cdesc_idx = 0;
 	u16 nb_hw_desc;
-	u16 i;
+	u16 i = 0;
 
 	WARN(io_cq->direction != ENA_COM_IO_QUEUE_DIRECTION_RX, "wrong Q type");
 
@@ -538,13 +538,19 @@ int ena_com_rx_pkt(struct ena_com_io_cq *io_cq,
 		return -ENOSPC;
 	}
 
-	for (i = 0; i < nb_hw_desc; i++) {
+	cdesc = ena_com_rx_cdesc_idx_to_ptr(io_cq, cdesc_idx);
+	ena_rx_ctx->pkt_offset = cdesc->offset;
+
+	do {
+		ena_buf[i].len = cdesc->length;
+		ena_buf[i].req_id = cdesc->req_id;
+
+		if (++i >= nb_hw_desc)
+			break;
+
 		cdesc = ena_com_rx_cdesc_idx_to_ptr(io_cq, cdesc_idx + i);
 
-		ena_buf->len = cdesc->length;
-		ena_buf->req_id = cdesc->req_id;
-		ena_buf++;
-	}
+	} while (1);
 
 	/* Update SQ head ptr */
 	io_sq->next_to_comp += nb_hw_desc;
diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.h b/drivers/net/ethernet/amazon/ena/ena_eth_com.h
index 77986c0ea52c..9834b5cdb655 100644
--- a/drivers/net/ethernet/amazon/ena/ena_eth_com.h
+++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.h
@@ -73,6 +73,7 @@ struct ena_com_rx_ctx {
 	u32 hash;
 	u16 descs;
 	int max_bufs;
+	u8 pkt_offset;
 };
 
 int ena_com_prepare_tx(struct ena_com_io_sq *io_sq,
diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h b/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h
index 00e0f056a741..ee28fb067d8c 100644
--- a/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h
+++ b/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h
@@ -264,7 +264,9 @@ struct ena_eth_io_rx_cdesc_base {
 
 	u16 sub_qid;
 
-	u16 reserved;
+	u8 offset;
+
+	u8 reserved;
 };
 
 /* 8-word format */
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 85b87ed02dd5..33578297dc56 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -1435,6 +1435,8 @@ static struct sk_buff *ena_rx_skb(struct ena_ring *rx_ring,
 
 		skb_add_rx_frag(skb, skb_shinfo(skb)->nr_frags, rx_info->page,
 				rx_info->page_offset, len, ENA_PAGE_SIZE);
+		/* The offset is non zero only for the first buffer */
+		rx_info->page_offset = 0;
 
 		netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
 			  "rx skb updated. len %d. data_len %d\n",
@@ -1590,6 +1592,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
 {
 	u16 next_to_clean = rx_ring->next_to_clean;
 	struct ena_com_rx_ctx ena_rx_ctx;
+	struct ena_rx_buffer *rx_info;
 	struct ena_adapter *adapter;
 	u32 res_budget, work_done;
 	int rx_copybreak_pkt = 0;
@@ -1614,6 +1617,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
 		ena_rx_ctx.ena_bufs = rx_ring->ena_bufs;
 		ena_rx_ctx.max_bufs = rx_ring->sgl_size;
 		ena_rx_ctx.descs = 0;
+		ena_rx_ctx.pkt_offset = 0;
 		rc = ena_com_rx_pkt(rx_ring->ena_com_io_cq,
 				    rx_ring->ena_com_io_sq,
 				    &ena_rx_ctx);
@@ -1623,6 +1627,9 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
 		if (unlikely(ena_rx_ctx.descs == 0))
 			break;
 
+		rx_info = &rx_ring->rx_buffer_info[rx_ring->ena_bufs[0].req_id];
+		rx_info->page_offset = ena_rx_ctx.pkt_offset;
+
 		netif_dbg(rx_ring->adapter, rx_status, rx_ring->netdev,
 			  "rx_poll: q %d got packet from ena. descs #: %d l3 proto %d l4 proto %d hash: %x\n",
 			  rx_ring->qid, ena_rx_ctx.descs, ena_rx_ctx.l3_proto,
@@ -3111,6 +3118,7 @@ static void ena_config_host_info(struct ena_com_dev *ena_dev,
 	host_info->num_cpus = num_online_cpus();
 
 	host_info->driver_supported_features =
+		ENA_ADMIN_HOST_INFO_RX_OFFSET_MASK |
 		ENA_ADMIN_HOST_INFO_INTERRUPT_MODERATION_MASK;
 
 	rc = ena_com_set_host_attributes(ena_dev);
-- 
cgit v1.2.3-59-g8ed1b


From 7cfe9a55934627deb95da87e35c552bd6241133e Mon Sep 17 00:00:00 2001
From: Arthur Kiyanovski <akiyano@amazon.com>
Date: Fri, 22 May 2020 12:08:53 +0300
Subject: net: ena: rename ena_com_free_desc to make API more uniform

Rename ena_com_free_desc to ena_com_free_q_entries to match
the LLQ mode.

In non-LLQ mode, an entry in an IO ring corresponds to a
a descriptor. In LLQ mode an entry may correspond to several
descriptors (per LLQ definition).

Signed-off-by: Igor Chauskin <igorch@amazon.com>
Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_eth_com.h | 6 +++---
 drivers/net/ethernet/amazon/ena/ena_netdev.c  | 5 ++---
 2 files changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.h b/drivers/net/ethernet/amazon/ena/ena_eth_com.h
index 9834b5cdb655..8b1afd3b32f2 100644
--- a/drivers/net/ethernet/amazon/ena/ena_eth_com.h
+++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.h
@@ -96,7 +96,7 @@ static inline void ena_com_unmask_intr(struct ena_com_io_cq *io_cq,
 	writel(intr_reg->intr_control, io_cq->unmask_reg);
 }
 
-static inline int ena_com_free_desc(struct ena_com_io_sq *io_sq)
+static inline int ena_com_free_q_entries(struct ena_com_io_sq *io_sq)
 {
 	u16 tail, next_to_comp, cnt;
 
@@ -114,7 +114,7 @@ static inline bool ena_com_sq_have_enough_space(struct ena_com_io_sq *io_sq,
 	int temp;
 
 	if (io_sq->mem_queue_type == ENA_ADMIN_PLACEMENT_POLICY_HOST)
-		return ena_com_free_desc(io_sq) >= required_buffers;
+		return ena_com_free_q_entries(io_sq) >= required_buffers;
 
 	/* This calculation doesn't need to be 100% accurate. So to reduce
 	 * the calculation overhead just Subtract 2 lines from the free descs
@@ -123,7 +123,7 @@ static inline bool ena_com_sq_have_enough_space(struct ena_com_io_sq *io_sq,
 	 */
 	temp = required_buffers / io_sq->llq_info.descs_per_entry + 2;
 
-	return ena_com_free_desc(io_sq) > temp;
+	return ena_com_free_q_entries(io_sq) > temp;
 }
 
 static inline bool ena_com_meta_desc_changed(struct ena_com_io_sq *io_sq,
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 33578297dc56..c3cbe48b353e 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -1691,7 +1691,7 @@ static int ena_clean_rx_irq(struct ena_ring *rx_ring, struct napi_struct *napi,
 
 	rx_ring->next_to_clean = next_to_clean;
 
-	refill_required = ena_com_free_desc(rx_ring->ena_com_io_sq);
+	refill_required = ena_com_free_q_entries(rx_ring->ena_com_io_sq);
 	refill_threshold =
 		min_t(int, rx_ring->ring_size / ENA_RX_REFILL_THRESH_DIVIDER,
 		      ENA_RX_REFILL_THRESH_PACKET);
@@ -3694,8 +3694,7 @@ static void check_for_empty_rx_ring(struct ena_adapter *adapter)
 	for (i = 0; i < adapter->num_io_queues; i++) {
 		rx_ring = &adapter->rx_ring[i];
 
-		refill_required =
-			ena_com_free_desc(rx_ring->ena_com_io_sq);
+		refill_required = ena_com_free_q_entries(rx_ring->ena_com_io_sq);
 		if (unlikely(refill_required == (rx_ring->ring_size - 1))) {
 			rx_ring->empty_rx_queue++;
 
-- 
cgit v1.2.3-59-g8ed1b


From f391503b7a11c6ca033a6e916cee23b85e21a1f1 Mon Sep 17 00:00:00 2001
From: Arthur Kiyanovski <akiyano@amazon.com>
Date: Fri, 22 May 2020 12:08:54 +0300
Subject: net: ena: use explicit variable size for clarity

Use u64 instead of unsigned long long for clarity

Signed-off-by: Shai Brandes <shaibran@amazon.com>
Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_com.c | 7 +++----
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index b51bf62af11b..02780f9fa586 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -2003,7 +2003,7 @@ void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data)
 	struct ena_admin_aenq_entry *aenq_e;
 	struct ena_admin_aenq_common_desc *aenq_common;
 	struct ena_com_aenq *aenq  = &dev->aenq;
-	unsigned long long timestamp;
+	u64 timestamp;
 	ena_aenq_handler handler_cb;
 	u16 masked_head, processed = 0;
 	u8 phase;
@@ -2021,9 +2021,8 @@ void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data)
 		 */
 		dma_rmb();
 
-		timestamp =
-			(unsigned long long)aenq_common->timestamp_low |
-			((unsigned long long)aenq_common->timestamp_high << 32);
+		timestamp = (u64)aenq_common->timestamp_low |
+			    ((u64)aenq_common->timestamp_high << 32);
 		pr_debug("AENQ! Group[%x] Syndrom[%x] timestamp: [%llus]\n",
 			 aenq_common->group, aenq_common->syndrom, timestamp);
 
-- 
cgit v1.2.3-59-g8ed1b


From adb3fb388933e03c61e941bce4a0dac4a6ae98c0 Mon Sep 17 00:00:00 2001
From: Arthur Kiyanovski <akiyano@amazon.com>
Date: Fri, 22 May 2020 12:08:55 +0300
Subject: net: ena: fix ena_com_comp_status_to_errno() return value

Default return value should be -EINVAL since the input
in this case was unexpected.
Also remove the now redundant check in the beginning
of the function.

Signed-off-by: Sameeh Jubran <sameehj@amazon.com>
Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_com.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index 02780f9fa586..921945dace22 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -523,9 +523,6 @@ static int ena_com_comp_status_to_errno(u8 comp_status)
 	if (unlikely(comp_status != 0))
 		pr_err("admin command failed[%u]\n", comp_status);
 
-	if (unlikely(comp_status > ENA_ADMIN_UNKNOWN_ERROR))
-		return -EINVAL;
-
 	switch (comp_status) {
 	case ENA_ADMIN_SUCCESS:
 		return 0;
@@ -540,7 +537,7 @@ static int ena_com_comp_status_to_errno(u8 comp_status)
 		return -EINVAL;
 	}
 
-	return 0;
+	return -EINVAL;
 }
 
 static int ena_com_wait_and_process_admin_cq_polling(struct ena_comp_ctx *comp_ctx,
-- 
cgit v1.2.3-59-g8ed1b


From da447b3b542f2d89e368c8991203ae6aa025ddeb Mon Sep 17 00:00:00 2001
From: Arthur Kiyanovski <akiyano@amazon.com>
Date: Fri, 22 May 2020 12:08:56 +0300
Subject: net: ena: simplify ena_com_update_intr_delay_resolution()

Initialize prev_intr_delay_resolution with ena_dev->intr_delay_resolution
unconditionally, since it is initialized with
ENA_DEFAULT_INTR_DELAY_RESOLUTION in ena_probe(). This approach makes much
more sense than handling errors of not initializing it.

Also added unlikely to if condition.

Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_com.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index 921945dace22..e2025eb86984 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -1281,13 +1281,9 @@ static int ena_com_ind_tbl_convert_to_device(struct ena_com_dev *ena_dev)
 static void ena_com_update_intr_delay_resolution(struct ena_com_dev *ena_dev,
 						 u16 intr_delay_resolution)
 {
-	/* Initial value of intr_delay_resolution might be 0 */
-	u16 prev_intr_delay_resolution =
-		ena_dev->intr_delay_resolution ?
-		ena_dev->intr_delay_resolution :
-		ENA_DEFAULT_INTR_DELAY_RESOLUTION;
+	u16 prev_intr_delay_resolution = ena_dev->intr_delay_resolution;
 
-	if (!intr_delay_resolution) {
+	if (unlikely(!intr_delay_resolution)) {
 		pr_err("Illegal intr_delay_resolution provided. Going to use default 1 usec resolution\n");
 		intr_delay_resolution = ENA_DEFAULT_INTR_DELAY_RESOLUTION;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 95d0fcb5704aa179c1aa8e5ea54f7715f0252383 Mon Sep 17 00:00:00 2001
From: Arthur Kiyanovski <akiyano@amazon.com>
Date: Fri, 22 May 2020 12:08:57 +0300
Subject: net: ena: cosmetic: rename ena_update_tx/rx_rings_intr_moderation()

Rename ena_update_tx/rx_rings_intr_moderation() to
ena_update_tx/rx_rings_nonadaptive_intr_moderation()
to distinguish between adaptive and non adaptive interrupt moderaion.

Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_ethtool.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
index 830d3711d6ee..ca13efa13b63 100644
--- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c
+++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
@@ -326,7 +326,7 @@ static int ena_get_coalesce(struct net_device *net_dev,
 	return 0;
 }
 
-static void ena_update_tx_rings_intr_moderation(struct ena_adapter *adapter)
+static void ena_update_tx_rings_nonadaptive_intr_moderation(struct ena_adapter *adapter)
 {
 	unsigned int val;
 	int i;
@@ -337,7 +337,7 @@ static void ena_update_tx_rings_intr_moderation(struct ena_adapter *adapter)
 		adapter->tx_ring[i].smoothed_interval = val;
 }
 
-static void ena_update_rx_rings_intr_moderation(struct ena_adapter *adapter)
+static void ena_update_rx_rings_nonadaptive_intr_moderation(struct ena_adapter *adapter)
 {
 	unsigned int val;
 	int i;
@@ -365,14 +365,14 @@ static int ena_set_coalesce(struct net_device *net_dev,
 	if (rc)
 		return rc;
 
-	ena_update_tx_rings_intr_moderation(adapter);
+	ena_update_tx_rings_nonadaptive_intr_moderation(adapter);
 
 	rc = ena_com_update_nonadaptive_moderation_interval_rx(ena_dev,
 							       coalesce->rx_coalesce_usecs);
 	if (rc)
 		return rc;
 
-	ena_update_rx_rings_intr_moderation(adapter);
+	ena_update_rx_rings_nonadaptive_intr_moderation(adapter);
 
 	if (coalesce->use_adaptive_rx_coalesce &&
 	    !ena_com_get_adaptive_moderation_enabled(ena_dev))
-- 
cgit v1.2.3-59-g8ed1b


From ba6f6b4191f850ddf9363a4aa946f2432a28fb30 Mon Sep 17 00:00:00 2001
From: Arthur Kiyanovski <akiyano@amazon.com>
Date: Fri, 22 May 2020 12:08:58 +0300
Subject: net: ena: cosmetic: set queue sizes to u32 for consistency

Make all types of variables that convey the number and sizeof queues to
be u32, for consistency with the API between the driver and device via
ena_admin_defs.h:ena_admin_get_feat_resp.max_queue_ext fields. Current
code sometimes uses int and there are multiple assignments between these
variables with different types.

Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_netdev.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index c3cbe48b353e..0999fe3310fb 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -3832,11 +3832,11 @@ static void ena_timer_service(struct timer_list *t)
 	mod_timer(&adapter->timer_service, round_jiffies(jiffies + HZ));
 }
 
-static int ena_calc_max_io_queue_num(struct pci_dev *pdev,
+static u32 ena_calc_max_io_queue_num(struct pci_dev *pdev,
 				     struct ena_com_dev *ena_dev,
 				     struct ena_com_dev_get_features_ctx *get_feat_ctx)
 {
-	int io_tx_sq_num, io_tx_cq_num, io_rx_num, max_num_io_queues;
+	u32 io_tx_sq_num, io_tx_cq_num, io_rx_num, max_num_io_queues;
 
 	if (ena_dev->supported_features & BIT(ENA_ADMIN_MAX_QUEUES_EXT)) {
 		struct ena_admin_queue_ext_feature_fields *max_queue_ext =
-- 
cgit v1.2.3-59-g8ed1b


From 13830937ccf68519ec3a40d8a8675af136cea8be Mon Sep 17 00:00:00 2001
From: Arthur Kiyanovski <akiyano@amazon.com>
Date: Fri, 22 May 2020 12:08:59 +0300
Subject: net: ena: cosmetic: fix spelling and grammar mistakes in comments

fix spelling and grammar mistakes in comments in ena_com.h,
ena_com.c and ena_netdev.c

Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_com.c    |  2 +-
 drivers/net/ethernet/amazon/ena/ena_com.h    | 30 ++++++++++++++--------------
 drivers/net/ethernet/amazon/ena/ena_netdev.c |  2 +-
 3 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index e2025eb86984..d47821655d61 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -772,7 +772,7 @@ static int ena_com_wait_and_process_admin_cq_interrupts(struct ena_comp_ctx *com
 			if (admin_queue->auto_polling)
 				admin_queue->polling = true;
 		} else {
-			pr_err("The ena device doesn't send a completion for the admin cmd %d status %d\n",
+			pr_err("The ena device didn't send a completion for the admin cmd %d status %d\n",
 			       comp_ctx->cmd_opcode, comp_ctx->status);
 		}
 		/* Check if shifted to polling mode.
diff --git a/drivers/net/ethernet/amazon/ena/ena_com.h b/drivers/net/ethernet/amazon/ena/ena_com.h
index 13a1b7812c46..bd65ae205f8d 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.h
+++ b/drivers/net/ethernet/amazon/ena/ena_com.h
@@ -393,7 +393,7 @@ struct ena_aenq_handlers {
  */
 int ena_com_mmio_reg_read_request_init(struct ena_com_dev *ena_dev);
 
-/* ena_com_set_mmio_read_mode - Enable/disable the mmio reg read mechanism
+/* ena_com_set_mmio_read_mode - Enable/disable the indirect mmio reg read mechanism
  * @ena_dev: ENA communication layer struct
  * @readless_supported: readless mode (enable/disable)
  */
@@ -515,7 +515,7 @@ void ena_com_set_admin_auto_polling_mode(struct ena_com_dev *ena_dev,
 /* ena_com_admin_q_comp_intr_handler - admin queue interrupt handler
  * @ena_dev: ENA communication layer struct
  *
- * This method go over the admin completion queue and wake up all the pending
+ * This method goes over the admin completion queue and wakes up all the pending
  * threads that wait on the commands wait event.
  *
  * @note: Should be called after MSI-X interrupt.
@@ -525,7 +525,7 @@ void ena_com_admin_q_comp_intr_handler(struct ena_com_dev *ena_dev);
 /* ena_com_aenq_intr_handler - AENQ interrupt handler
  * @ena_dev: ENA communication layer struct
  *
- * This method go over the async event notification queue and call the proper
+ * This method goes over the async event notification queue and calls the proper
  * aenq handler.
  */
 void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data);
@@ -542,14 +542,14 @@ void ena_com_abort_admin_commands(struct ena_com_dev *ena_dev);
 /* ena_com_wait_for_abort_completion - Wait for admin commands abort.
  * @ena_dev: ENA communication layer struct
  *
- * This method wait until all the outstanding admin commands will be completed.
+ * This method waits until all the outstanding admin commands are completed.
  */
 void ena_com_wait_for_abort_completion(struct ena_com_dev *ena_dev);
 
 /* ena_com_validate_version - Validate the device parameters
  * @ena_dev: ENA communication layer struct
  *
- * This method validate the device parameters are the same as the saved
+ * This method verifies the device parameters are the same as the saved
  * parameters in ena_dev.
  * This method is useful after device reset, to validate the device mac address
  * and the device offloads are the same as before the reset.
@@ -689,7 +689,7 @@ int ena_com_set_hash_function(struct ena_com_dev *ena_dev);
  *
  * Retrieve the hash function from the device.
  *
- * @note: If the caller called ena_com_fill_hash_function but didn't flash
+ * @note: If the caller called ena_com_fill_hash_function but didn't flush
  * it to the device, the new configuration will be lost.
  *
  * @return: 0 on Success and negative value otherwise.
@@ -703,7 +703,7 @@ int ena_com_get_hash_function(struct ena_com_dev *ena_dev,
  *
  * Retrieve the hash key.
  *
- * @note: If the caller called ena_com_fill_hash_key but didn't flash
+ * @note: If the caller called ena_com_fill_hash_key but didn't flush
  * it to the device, the new configuration will be lost.
  *
  * @return: 0 on Success and negative value otherwise.
@@ -743,7 +743,7 @@ int ena_com_set_hash_ctrl(struct ena_com_dev *ena_dev);
  *
  * Retrieve the hash control from the device.
  *
- * @note, If the caller called ena_com_fill_hash_ctrl but didn't flash
+ * @note: If the caller called ena_com_fill_hash_ctrl but didn't flush
  * it to the device, the new configuration will be lost.
  *
  * @return: 0 on Success and negative value otherwise.
@@ -795,7 +795,7 @@ int ena_com_indirect_table_set(struct ena_com_dev *ena_dev);
  *
  * Retrieve the RSS indirection table from the device.
  *
- * @note: If the caller called ena_com_indirect_table_fill_entry but didn't flash
+ * @note: If the caller called ena_com_indirect_table_fill_entry but didn't flush
  * it to the device, the new configuration will be lost.
  *
  * @return: 0 on Success and negative value otherwise.
@@ -821,14 +821,14 @@ int ena_com_allocate_debug_area(struct ena_com_dev *ena_dev,
 /* ena_com_delete_debug_area - Free the debug area resources.
  * @ena_dev: ENA communication layer struct
  *
- * Free the allocate debug area.
+ * Free the allocated debug area.
  */
 void ena_com_delete_debug_area(struct ena_com_dev *ena_dev);
 
 /* ena_com_delete_host_info - Free the host info resources.
  * @ena_dev: ENA communication layer struct
  *
- * Free the allocate host info.
+ * Free the allocated host info.
  */
 void ena_com_delete_host_info(struct ena_com_dev *ena_dev);
 
@@ -869,9 +869,9 @@ int ena_com_destroy_io_cq(struct ena_com_dev *ena_dev,
  * @cmd_completion: command completion return value.
  * @cmd_comp_size: command completion size.
 
- * Submit an admin command and then wait until the device will return a
+ * Submit an admin command and then wait until the device returns a
  * completion.
- * The completion will be copyed into cmd_comp.
+ * The completion will be copied into cmd_comp.
  *
  * @return - 0 on success, negative value on failure.
  */
@@ -934,7 +934,7 @@ unsigned int ena_com_get_nonadaptive_moderation_interval_rx(struct ena_com_dev *
 /* ena_com_config_dev_mode - Configure the placement policy of the device.
  * @ena_dev: ENA communication layer struct
  * @llq_features: LLQ feature descriptor, retrieve via
- *                ena_com_get_dev_attr_feat.
+ *		   ena_com_get_dev_attr_feat.
  * @ena_llq_config: The default driver LLQ parameters configurations
  */
 int ena_com_config_dev_mode(struct ena_com_dev *ena_dev,
@@ -960,7 +960,7 @@ static inline void ena_com_disable_adaptive_moderation(struct ena_com_dev *ena_d
  * @intr_reg: interrupt register to update.
  * @rx_delay_interval: Rx interval in usecs
  * @tx_delay_interval: Tx interval in usecs
- * @unmask: unask enable/disable
+ * @unmask: unmask enable/disable
  *
  * Prepare interrupt update register with the supplied parameters.
  */
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 0999fe3310fb..0349e0305608 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -4190,7 +4190,7 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 	calc_queue_ctx.get_feat_ctx = &get_feat_ctx;
 	calc_queue_ctx.pdev = pdev;
 
-	/* Initial Tx and RX interrupt delay. Assumes 1 usec granularity.
+	/* Initial TX and RX interrupt delay. Assumes 1 usec granularity.
 	 * Updated during device initialization with the real granularity
 	 */
 	ena_dev->intr_moder_tx_interval = ENA_INTR_INITIAL_TX_INTERVAL_USECS;
-- 
cgit v1.2.3-59-g8ed1b


From 46143e58884025292bd977ca8bc0fdd9dac47c4c Mon Sep 17 00:00:00 2001
From: Arthur Kiyanovski <akiyano@amazon.com>
Date: Fri, 22 May 2020 12:09:00 +0300
Subject: net: ena: cosmetic: fix line break issues

1. Join unnecessarily broken short lines in ena_com.c ena_netdev.c
2. Fix Indentations of broken lines

Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_com.c    |  8 +++-----
 drivers/net/ethernet/amazon/ena/ena_netdev.c | 13 ++++++-------
 2 files changed, 9 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index d47821655d61..a513d71576bd 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -375,7 +375,7 @@ static int ena_com_init_io_sq(struct ena_com_dev *ena_dev,
 		io_sq->bounce_buf_ctrl.next_to_use = 0;
 
 		size = io_sq->bounce_buf_ctrl.buffer_size *
-			 io_sq->bounce_buf_ctrl.buffers_num;
+			io_sq->bounce_buf_ctrl.buffers_num;
 
 		dev_node = dev_to_node(ena_dev->dmadev);
 		set_dev_node(ena_dev->dmadev, ctx->numa_node);
@@ -699,8 +699,7 @@ static int ena_com_config_llq_info(struct ena_com_dev *ena_dev,
 		/* The desc list entry size should be whole multiply of 8
 		 * This requirement comes from __iowrite64_copy()
 		 */
-		pr_err("illegal entry size %d\n",
-		       llq_info->desc_list_entry_size);
+		pr_err("illegal entry size %d\n", llq_info->desc_list_entry_size);
 		return -EINVAL;
 	}
 
@@ -2045,8 +2044,7 @@ void ena_com_aenq_intr_handler(struct ena_com_dev *dev, void *data)
 
 	/* write the aenq doorbell after all AENQ descriptors were read */
 	mb();
-	writel_relaxed((u32)aenq->head,
-		       dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF);
+	writel_relaxed((u32)aenq->head, dev->reg_bar + ENA_REGS_AENQ_HEAD_DB_OFF);
 }
 
 int ena_com_dev_reset(struct ena_com_dev *ena_dev,
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 0349e0305608..148d13cdd1bf 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -2242,7 +2242,7 @@ static int ena_rss_configure(struct ena_adapter *adapter)
 		rc = ena_rss_init_default(adapter);
 		if (rc && (rc != -EOPNOTSUPP)) {
 			netif_err(adapter, ifup, adapter->netdev,
-					"Failed to init RSS rc: %d\n", rc);
+				  "Failed to init RSS rc: %d\n", rc);
 			return rc;
 		}
 	}
@@ -2315,7 +2315,7 @@ static int ena_create_io_tx_queue(struct ena_adapter *adapter, int qid)
 	if (rc) {
 		netif_err(adapter, ifup, adapter->netdev,
 			  "Failed to create I/O TX queue num %d rc: %d\n",
-			   qid, rc);
+			  qid, rc);
 		return rc;
 	}
 
@@ -2464,7 +2464,7 @@ static int create_queues_with_size_backoff(struct ena_adapter *adapter)
 	 * ones due to past queue allocation failures.
 	 */
 	set_io_rings_size(adapter, adapter->requested_tx_ring_size,
-			adapter->requested_rx_ring_size);
+			  adapter->requested_rx_ring_size);
 
 	while (1) {
 		if (ena_xdp_present(adapter)) {
@@ -2505,7 +2505,7 @@ err_setup_tx:
 		if (rc != -ENOMEM) {
 			netif_err(adapter, ifup, adapter->netdev,
 				  "Queue creation failed with error code %d\n",
-				   rc);
+				  rc);
 			return rc;
 		}
 
@@ -2528,7 +2528,7 @@ err_setup_tx:
 			new_rx_ring_size = cur_rx_ring_size / 2;
 
 		if (new_tx_ring_size < ENA_MIN_RING_SIZE ||
-				new_rx_ring_size < ENA_MIN_RING_SIZE) {
+		    new_rx_ring_size < ENA_MIN_RING_SIZE) {
 			netif_err(adapter, ifup, adapter->netdev,
 				  "Queue creation failed with the smallest possible queue size of %d for both queues. Not retrying with smaller queues\n",
 				  ENA_MIN_RING_SIZE);
@@ -3087,8 +3087,7 @@ static u16 ena_select_queue(struct net_device *dev, struct sk_buff *skb,
 	return qid;
 }
 
-static void ena_config_host_info(struct ena_com_dev *ena_dev,
-				 struct pci_dev *pdev)
+static void ena_config_host_info(struct ena_com_dev *ena_dev, struct pci_dev *pdev)
 {
 	struct ena_admin_host_info *host_info;
 	int rc;
-- 
cgit v1.2.3-59-g8ed1b


From f3020447471c40024fbb4e5d720861b25ce46717 Mon Sep 17 00:00:00 2001
From: Arthur Kiyanovski <akiyano@amazon.com>
Date: Fri, 22 May 2020 12:09:01 +0300
Subject: net: ena: cosmetic: remove unnecessary code

1. Remove unused definition of DRV_MODULE_VERSION
2. Remove {} from single line-of-code ifs
3. Remove unnecessary comments from ena_get/set_coalesce()
4. Remove unnecessary extra spaces and newlines

Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_ethtool.c | 11 +++--------
 drivers/net/ethernet/amazon/ena/ena_netdev.h  |  6 ------
 2 files changed, 3 insertions(+), 14 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_ethtool.c b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
index ca13efa13b63..e340b65af08c 100644
--- a/drivers/net/ethernet/amazon/ena/ena_ethtool.c
+++ b/drivers/net/ethernet/amazon/ena/ena_ethtool.c
@@ -206,7 +206,7 @@ int ena_get_sset_count(struct net_device *netdev, int sset)
 	if (sset != ETH_SS_STATS)
 		return -EOPNOTSUPP;
 
-	return  adapter->num_io_queues * (ENA_STATS_ARRAY_TX + ENA_STATS_ARRAY_RX)
+	return adapter->num_io_queues * (ENA_STATS_ARRAY_TX + ENA_STATS_ARRAY_RX)
 		+ ENA_STATS_ARRAY_GLOBAL + ENA_STATS_ARRAY_ENA_COM;
 }
 
@@ -260,7 +260,6 @@ static void ena_get_strings(struct net_device *netdev, u32 sset, u8 *data)
 
 	for (i = 0; i < ENA_STATS_ARRAY_GLOBAL; i++) {
 		ena_stats = &ena_stats_global_strings[i];
-
 		memcpy(data, ena_stats->name, ETH_GSTRING_LEN);
 		data += ETH_GSTRING_LEN;
 	}
@@ -307,10 +306,8 @@ static int ena_get_coalesce(struct net_device *net_dev,
 	struct ena_adapter *adapter = netdev_priv(net_dev);
 	struct ena_com_dev *ena_dev = adapter->ena_dev;
 
-	if (!ena_com_interrupt_moderation_supported(ena_dev)) {
-		/* the devie doesn't support interrupt moderation */
+	if (!ena_com_interrupt_moderation_supported(ena_dev))
 		return -EOPNOTSUPP;
-	}
 
 	coalesce->tx_coalesce_usecs =
 		ena_com_get_nonadaptive_moderation_interval_tx(ena_dev) *
@@ -355,10 +352,8 @@ static int ena_set_coalesce(struct net_device *net_dev,
 	struct ena_com_dev *ena_dev = adapter->ena_dev;
 	int rc;
 
-	if (!ena_com_interrupt_moderation_supported(ena_dev)) {
-		/* the devie doesn't support interrupt moderation */
+	if (!ena_com_interrupt_moderation_supported(ena_dev))
 		return -EOPNOTSUPP;
-	}
 
 	rc = ena_com_update_nonadaptive_moderation_interval_tx(ena_dev,
 							       coalesce->tx_coalesce_usecs);
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
index 680099afcccf..5320b916a36b 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h
@@ -50,12 +50,6 @@
 #define DRV_MODULE_GEN_SUBMINOR 0
 
 #define DRV_MODULE_NAME		"ena"
-#ifndef DRV_MODULE_GENERATION
-#define DRV_MODULE_GENERATION \
-	__stringify(DRV_MODULE_GEN_MAJOR) "."	\
-	__stringify(DRV_MODULE_GEN_MINOR) "."	\
-	__stringify(DRV_MODULE_GEN_SUBMINOR) "K"
-#endif
 
 #define DEVICE_NAME	"Elastic Network Adapter (ENA)"
 
-- 
cgit v1.2.3-59-g8ed1b


From 0a39a35f3f27f833282a2a87c26894167f336ba2 Mon Sep 17 00:00:00 2001
From: Arthur Kiyanovski <akiyano@amazon.com>
Date: Fri, 22 May 2020 12:09:02 +0300
Subject: net: ena: cosmetic: code reorderings

1. Reorder sanity checks in get_comp_ctxt() to make more sense
2. Reorder variables in ena_com_fill_hash_function() and
   ena_calc_io_queue_size() in reverse christmas tree.
3. Move around member initializations.

Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_com.c    | 17 +++++++++--------
 drivers/net/ethernet/amazon/ena/ena_netdev.c |  5 ++---
 2 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index a513d71576bd..bf3465e5a2e7 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -200,17 +200,17 @@ static void comp_ctxt_release(struct ena_com_admin_queue *queue,
 static struct ena_comp_ctx *get_comp_ctxt(struct ena_com_admin_queue *queue,
 					  u16 command_id, bool capture)
 {
-	if (unlikely(!queue->comp_ctx)) {
-		pr_err("Completion context is NULL\n");
-		return NULL;
-	}
-
 	if (unlikely(command_id >= queue->q_depth)) {
 		pr_err("command id is larger than the queue size. cmd_id: %u queue size %d\n",
 		       command_id, queue->q_depth);
 		return NULL;
 	}
 
+	if (unlikely(!queue->comp_ctx)) {
+		pr_err("Completion context is NULL\n");
+		return NULL;
+	}
+
 	if (unlikely(queue->comp_ctx[command_id].occupied && capture)) {
 		pr_err("Completion context is occupied\n");
 		return NULL;
@@ -2266,13 +2266,14 @@ int ena_com_fill_hash_function(struct ena_com_dev *ena_dev,
 			       enum ena_admin_hash_functions func,
 			       const u8 *key, u16 key_len, u32 init_val)
 {
-	struct ena_rss *rss = &ena_dev->rss;
+	struct ena_admin_feature_rss_flow_hash_control *hash_key;
 	struct ena_admin_get_feat_resp get_resp;
-	struct ena_admin_feature_rss_flow_hash_control *hash_key =
-		rss->hash_key;
 	enum ena_admin_hash_functions old_func;
+	struct ena_rss *rss = &ena_dev->rss;
 	int rc;
 
+	hash_key = rss->hash_key;
+
 	/* Make sure size is a mult of DWs */
 	if (unlikely(key_len & 0x3))
 		return -EINVAL;
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 148d13cdd1bf..313e65b17492 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -4121,8 +4121,8 @@ static int ena_calc_io_queue_size(struct ena_calc_queue_size_ctx *ctx)
  */
 static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
-	struct ena_com_dev_get_features_ctx get_feat_ctx;
 	struct ena_calc_queue_size_ctx calc_queue_ctx = { 0 };
+	struct ena_com_dev_get_features_ctx get_feat_ctx;
 	struct ena_llq_configurations llq_config;
 	struct ena_com_dev *ena_dev = NULL;
 	struct ena_adapter *adapter;
@@ -4233,12 +4233,11 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 
 	adapter->num_io_queues = max_num_io_queues;
 	adapter->max_num_io_queues = max_num_io_queues;
+	adapter->last_monitored_tx_qid = 0;
 
 	adapter->xdp_first_ring = 0;
 	adapter->xdp_num_queues = 0;
 
-	adapter->last_monitored_tx_qid = 0;
-
 	adapter->rx_copybreak = ENA_DEFAULT_RX_COPYBREAK;
 	adapter->wd_state = wd_state;
 
-- 
cgit v1.2.3-59-g8ed1b


From 6d0862e0ec92d05e0b5bb9e46133e7460c0c675a Mon Sep 17 00:00:00 2001
From: Arthur Kiyanovski <akiyano@amazon.com>
Date: Fri, 22 May 2020 12:09:03 +0300
Subject: net: ena: cosmetic: fix spacing issues

1. Add leading and trailing spaces to several comments for better
   readability
2. Make tabs and spaces uniform in enum defines in ena_admin_defs.h

Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_admin_defs.h  | 6 +++---
 drivers/net/ethernet/amazon/ena/ena_common_defs.h | 2 +-
 drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h | 2 +-
 drivers/net/ethernet/amazon/ena/ena_regs_defs.h   | 2 +-
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
index 727836f638ad..336742f6e3c3 100644
--- a/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
+++ b/drivers/net/ethernet/amazon/ena/ena_admin_defs.h
@@ -768,8 +768,8 @@ enum ena_admin_os_type {
 	ENA_ADMIN_OS_DPDK                           = 3,
 	ENA_ADMIN_OS_FREEBSD                        = 4,
 	ENA_ADMIN_OS_IPXE                           = 5,
-	ENA_ADMIN_OS_ESXI			    = 6,
-	ENA_ADMIN_OS_GROUPS_NUM			    = 6,
+	ENA_ADMIN_OS_ESXI                           = 6,
+	ENA_ADMIN_OS_GROUPS_NUM                     = 6,
 };
 
 struct ena_admin_host_info {
@@ -1136,4 +1136,4 @@ struct ena_admin_ena_mmio_req_read_less_resp {
 /* aenq_link_change_desc */
 #define ENA_ADMIN_AENQ_LINK_CHANGE_DESC_LINK_STATUS_MASK    BIT(0)
 
-#endif /*_ENA_ADMIN_H_ */
+#endif /* _ENA_ADMIN_H_ */
diff --git a/drivers/net/ethernet/amazon/ena/ena_common_defs.h b/drivers/net/ethernet/amazon/ena/ena_common_defs.h
index 23beb7e7ed7b..8a8ded0de9ac 100644
--- a/drivers/net/ethernet/amazon/ena/ena_common_defs.h
+++ b/drivers/net/ethernet/amazon/ena/ena_common_defs.h
@@ -45,4 +45,4 @@ struct ena_common_mem_addr {
 	u16 reserved16;
 };
 
-#endif /*_ENA_COMMON_H_ */
+#endif /* _ENA_COMMON_H_ */
diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h b/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h
index ee28fb067d8c..d105c9c56192 100644
--- a/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h
+++ b/drivers/net/ethernet/amazon/ena/ena_eth_io_defs.h
@@ -414,4 +414,4 @@ struct ena_eth_io_numa_node_cfg_reg {
 #define ENA_ETH_IO_NUMA_NODE_CFG_REG_ENABLED_SHIFT          31
 #define ENA_ETH_IO_NUMA_NODE_CFG_REG_ENABLED_MASK           BIT(31)
 
-#endif /*_ENA_ETH_IO_H_ */
+#endif /* _ENA_ETH_IO_H_ */
diff --git a/drivers/net/ethernet/amazon/ena/ena_regs_defs.h b/drivers/net/ethernet/amazon/ena/ena_regs_defs.h
index 04fcafcc059c..b514bb1b855d 100644
--- a/drivers/net/ethernet/amazon/ena/ena_regs_defs.h
+++ b/drivers/net/ethernet/amazon/ena/ena_regs_defs.h
@@ -154,4 +154,4 @@ enum ena_regs_reset_reason_types {
 #define ENA_REGS_RSS_IND_ENTRY_UPDATE_CQ_IDX_SHIFT          16
 #define ENA_REGS_RSS_IND_ENTRY_UPDATE_CQ_IDX_MASK           0xffff0000
 
-#endif /*_ENA_REGS_H_ */
+#endif /* _ENA_REGS_H_ */
-- 
cgit v1.2.3-59-g8ed1b


From b0ae3ac4847c6009e7b1963899cd121e84e5082a Mon Sep 17 00:00:00 2001
From: Arthur Kiyanovski <akiyano@amazon.com>
Date: Fri, 22 May 2020 12:09:04 +0300
Subject: net: ena: cosmetic: minor code changes

1. Use BIT macro instead of shift operator for code clarity
2. Replace multiple flag assignments to a single assignment of multiple
   flags in ena_com_add_single_rx_desc()
3. Move ENA_HASH_KEY_SIZE from ena_netdev.h to ena_com.h

Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_com.c     | 2 +-
 drivers/net/ethernet/amazon/ena/ena_com.h     | 2 ++
 drivers/net/ethernet/amazon/ena/ena_eth_com.c | 8 ++++----
 drivers/net/ethernet/amazon/ena/ena_netdev.h  | 2 --
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index bf3465e5a2e7..4b1dbedbe921 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -2285,7 +2285,7 @@ int ena_com_fill_hash_function(struct ena_com_dev *ena_dev,
 	if (unlikely(rc))
 		return rc;
 
-	if (!((1 << func) & get_resp.u.flow_hash_func.supported_func)) {
+	if (!(BIT(func) & get_resp.u.flow_hash_func.supported_func)) {
 		pr_err("Flow hash function %d isn't supported\n", func);
 		return -EOPNOTSUPP;
 	}
diff --git a/drivers/net/ethernet/amazon/ena/ena_com.h b/drivers/net/ethernet/amazon/ena/ena_com.h
index bd65ae205f8d..325c9a5f677b 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.h
+++ b/drivers/net/ethernet/amazon/ena/ena_com.h
@@ -77,6 +77,8 @@
 #define ENA_INTR_INITIAL_RX_INTERVAL_USECS 0
 #define ENA_DEFAULT_INTR_DELAY_RESOLUTION 1
 
+#define ENA_HASH_KEY_SIZE 40
+
 #define ENA_HW_HINTS_NO_TIMEOUT	0xFFFF
 
 #define ENA_FEATURE_MAX_QUEUE_EXT_VER 1
diff --git a/drivers/net/ethernet/amazon/ena/ena_eth_com.c b/drivers/net/ethernet/amazon/ena/ena_eth_com.c
index a014f514c069..ec8ea25e988d 100644
--- a/drivers/net/ethernet/amazon/ena/ena_eth_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_eth_com.c
@@ -584,10 +584,10 @@ int ena_com_add_single_rx_desc(struct ena_com_io_sq *io_sq,
 
 	desc->length = ena_buf->len;
 
-	desc->ctrl = ENA_ETH_IO_RX_DESC_FIRST_MASK;
-	desc->ctrl |= ENA_ETH_IO_RX_DESC_LAST_MASK;
-	desc->ctrl |= io_sq->phase & ENA_ETH_IO_RX_DESC_PHASE_MASK;
-	desc->ctrl |= ENA_ETH_IO_RX_DESC_COMP_REQ_MASK;
+	desc->ctrl = ENA_ETH_IO_RX_DESC_FIRST_MASK |
+		ENA_ETH_IO_RX_DESC_LAST_MASK |
+		(io_sq->phase & ENA_ETH_IO_RX_DESC_PHASE_MASK) |
+		ENA_ETH_IO_RX_DESC_COMP_REQ_MASK;
 
 	desc->req_id = req_id;
 
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
index 5320b916a36b..9b3948c7e8a0 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h
@@ -98,8 +98,6 @@
 #define ENA_RX_RSS_TABLE_LOG_SIZE  7
 #define ENA_RX_RSS_TABLE_SIZE	(1 << ENA_RX_RSS_TABLE_LOG_SIZE)
 
-#define ENA_HASH_KEY_SIZE	40
-
 /* The number of tx packet completions that will be handled each NAPI poll
  * cycle is ring_size / ENA_TX_POLL_BUDGET_DIVIDER.
  */
-- 
cgit v1.2.3-59-g8ed1b


From 4bb7f4cf60e38a00965d22aa5979ab143193d41f Mon Sep 17 00:00:00 2001
From: Arthur Kiyanovski <akiyano@amazon.com>
Date: Fri, 22 May 2020 12:09:05 +0300
Subject: net: ena: reduce driver load time

This commit reduces the driver load time by using usec resolution
instead of msec when polling for hardware state change.

Also add back-off mechanism to handle cases where minimal sleep
time is not enough.

Signed-off-by: Josh Triplett <josh@joshtriplett.org>
Signed-off-by: Sameeh Jubran <sameehj@amazon.com>
Signed-off-by: Arthur Kiyanovski <akiyano@amazon.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/amazon/ena/ena_com.c    | 36 ++++++++++++++++++++--------
 drivers/net/ethernet/amazon/ena/ena_com.h    |  3 +++
 drivers/net/ethernet/amazon/ena/ena_netdev.c |  2 ++
 drivers/net/ethernet/amazon/ena/ena_netdev.h |  2 ++
 4 files changed, 33 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_com.c b/drivers/net/ethernet/amazon/ena/ena_com.c
index 4b1dbedbe921..432f143559a1 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.c
+++ b/drivers/net/ethernet/amazon/ena/ena_com.c
@@ -62,7 +62,9 @@
 
 #define ENA_REGS_ADMIN_INTR_MASK 1
 
-#define ENA_POLL_MS	5
+#define ENA_MIN_ADMIN_POLL_US 100
+
+#define ENA_MAX_ADMIN_POLL_US 5000
 
 /*****************************************************************************/
 /*****************************************************************************/
@@ -540,12 +542,20 @@ static int ena_com_comp_status_to_errno(u8 comp_status)
 	return -EINVAL;
 }
 
+static void ena_delay_exponential_backoff_us(u32 exp, u32 delay_us)
+{
+	delay_us = max_t(u32, ENA_MIN_ADMIN_POLL_US, delay_us);
+	delay_us = min_t(u32, delay_us * (1U << exp), ENA_MAX_ADMIN_POLL_US);
+	usleep_range(delay_us, 2 * delay_us);
+}
+
 static int ena_com_wait_and_process_admin_cq_polling(struct ena_comp_ctx *comp_ctx,
 						     struct ena_com_admin_queue *admin_queue)
 {
 	unsigned long flags = 0;
 	unsigned long timeout;
 	int ret;
+	u32 exp = 0;
 
 	timeout = jiffies + usecs_to_jiffies(admin_queue->completion_timeout);
 
@@ -569,7 +579,8 @@ static int ena_com_wait_and_process_admin_cq_polling(struct ena_comp_ctx *comp_c
 			goto err;
 		}
 
-		msleep(ENA_POLL_MS);
+		ena_delay_exponential_backoff_us(exp++,
+						 admin_queue->ena_dev->ena_min_poll_delay_us);
 	}
 
 	if (unlikely(comp_ctx->status == ENA_CMD_ABORTED)) {
@@ -939,12 +950,13 @@ static void ena_com_io_queue_free(struct ena_com_dev *ena_dev,
 static int wait_for_reset_state(struct ena_com_dev *ena_dev, u32 timeout,
 				u16 exp_state)
 {
-	u32 val, i;
+	u32 val, exp = 0;
+	unsigned long timeout_stamp;
 
-	/* Convert timeout from resolution of 100ms to ENA_POLL_MS */
-	timeout = (timeout * 100) / ENA_POLL_MS;
+	/* Convert timeout from resolution of 100ms to us resolution. */
+	timeout_stamp = jiffies + usecs_to_jiffies(100 * 1000 * timeout);
 
-	for (i = 0; i < timeout; i++) {
+	while (1) {
 		val = ena_com_reg_bar_read32(ena_dev, ENA_REGS_DEV_STS_OFF);
 
 		if (unlikely(val == ENA_MMIO_READ_TIMEOUT)) {
@@ -956,10 +968,11 @@ static int wait_for_reset_state(struct ena_com_dev *ena_dev, u32 timeout,
 			exp_state)
 			return 0;
 
-		msleep(ENA_POLL_MS);
-	}
+		if (time_is_before_jiffies(timeout_stamp))
+			return -ETIME;
 
-	return -ETIME;
+		ena_delay_exponential_backoff_us(exp++, ena_dev->ena_min_poll_delay_us);
+	}
 }
 
 static bool ena_com_check_supported_feature_id(struct ena_com_dev *ena_dev,
@@ -1436,11 +1449,13 @@ void ena_com_wait_for_abort_completion(struct ena_com_dev *ena_dev)
 {
 	struct ena_com_admin_queue *admin_queue = &ena_dev->admin_queue;
 	unsigned long flags = 0;
+	u32 exp = 0;
 
 	spin_lock_irqsave(&admin_queue->q_lock, flags);
 	while (atomic_read(&admin_queue->outstanding_cmds) != 0) {
 		spin_unlock_irqrestore(&admin_queue->q_lock, flags);
-		msleep(ENA_POLL_MS);
+		ena_delay_exponential_backoff_us(exp++,
+						 ena_dev->ena_min_poll_delay_us);
 		spin_lock_irqsave(&admin_queue->q_lock, flags);
 	}
 	spin_unlock_irqrestore(&admin_queue->q_lock, flags);
@@ -1788,6 +1803,7 @@ int ena_com_admin_init(struct ena_com_dev *ena_dev,
 	if (ret)
 		goto error;
 
+	admin_queue->ena_dev = ena_dev;
 	admin_queue->running_state = true;
 
 	return 0;
diff --git a/drivers/net/ethernet/amazon/ena/ena_com.h b/drivers/net/ethernet/amazon/ena/ena_com.h
index 325c9a5f677b..bc187adf54e4 100644
--- a/drivers/net/ethernet/amazon/ena/ena_com.h
+++ b/drivers/net/ethernet/amazon/ena/ena_com.h
@@ -239,6 +239,7 @@ struct ena_com_stats_admin {
 
 struct ena_com_admin_queue {
 	void *q_dmadev;
+	struct ena_com_dev *ena_dev;
 	spinlock_t q_lock; /* spinlock for the admin queue */
 
 	struct ena_comp_ctx *comp_ctx;
@@ -351,6 +352,8 @@ struct ena_com_dev {
 	struct ena_intr_moder_entry *intr_moder_tbl;
 
 	struct ena_com_llq_info llq_info;
+
+	u32 ena_min_poll_delay_us;
 };
 
 struct ena_com_dev_get_features_ctx {
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 313e65b17492..46865d5bd7e7 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -4166,6 +4166,8 @@ static int ena_probe(struct pci_dev *pdev, const struct pci_device_id *ent)
 		goto err_free_region;
 	}
 
+	ena_dev->ena_min_poll_delay_us = ENA_ADMIN_POLL_DELAY_US;
+
 	ena_dev->dmadev = &pdev->dev;
 
 	rc = ena_device_init(ena_dev, pdev, &get_feat_ctx, &wd_state);
diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.h b/drivers/net/ethernet/amazon/ena/ena_netdev.h
index 9b3948c7e8a0..ba030d260940 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.h
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.h
@@ -129,6 +129,8 @@
 #define ENA_IO_IRQ_FIRST_IDX		1
 #define ENA_IO_IRQ_IDX(q)		(ENA_IO_IRQ_FIRST_IDX + (q))
 
+#define ENA_ADMIN_POLL_DELAY_US 100
+
 /* ENA device should send keep alive msg every 1 sec.
  * We wait for 6 sec just to be on the safe side.
  */
-- 
cgit v1.2.3-59-g8ed1b


From 9f9d1e63dc551bf2799fab2f3925cb836410c6d5 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Fri, 22 May 2020 14:06:50 +0200
Subject: dt-bindings: convert the binding document for mediatek PERICFG to
 yaml

Convert the DT binding .txt file for MediaTek's peripheral configuration
controller to YAML. There's one special case where the compatible has
three positions. Otherwise, it's a pretty normal syscon.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../bindings/arm/mediatek/mediatek,pericfg.txt     | 36 -------------
 .../bindings/arm/mediatek/mediatek,pericfg.yaml    | 63 ++++++++++++++++++++++
 2 files changed, 63 insertions(+), 36 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/arm/mediatek/mediatek,pericfg.txt
 create mode 100644 Documentation/devicetree/bindings/arm/mediatek/mediatek,pericfg.yaml

diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,pericfg.txt b/Documentation/devicetree/bindings/arm/mediatek/mediatek,pericfg.txt
deleted file mode 100644
index ecf027a9003a..000000000000
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,pericfg.txt
+++ /dev/null
@@ -1,36 +0,0 @@
-Mediatek pericfg controller
-===========================
-
-The Mediatek pericfg controller provides various clocks and reset
-outputs to the system.
-
-Required Properties:
-
-- compatible: Should be one of:
-	- "mediatek,mt2701-pericfg", "syscon"
-	- "mediatek,mt2712-pericfg", "syscon"
-	- "mediatek,mt7622-pericfg", "syscon"
-	- "mediatek,mt7623-pericfg", "mediatek,mt2701-pericfg", "syscon"
-	- "mediatek,mt7629-pericfg", "syscon"
-	- "mediatek,mt8135-pericfg", "syscon"
-	- "mediatek,mt8173-pericfg", "syscon"
-	- "mediatek,mt8183-pericfg", "syscon"
-- #clock-cells: Must be 1
-- #reset-cells: Must be 1
-
-The pericfg controller uses the common clk binding from
-Documentation/devicetree/bindings/clock/clock-bindings.txt
-The available clocks are defined in dt-bindings/clock/mt*-clk.h.
-Also it uses the common reset controller binding from
-Documentation/devicetree/bindings/reset/reset.txt.
-The available reset outputs are defined in
-dt-bindings/reset/mt*-resets.h
-
-Example:
-
-pericfg: power-controller@10003000 {
-	compatible = "mediatek,mt8173-pericfg", "syscon";
-	reg = <0 0x10003000 0 0x1000>;
-	#clock-cells = <1>;
-	#reset-cells = <1>;
-};
diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,pericfg.yaml b/Documentation/devicetree/bindings/arm/mediatek/mediatek,pericfg.yaml
new file mode 100644
index 000000000000..1340c6288024
--- /dev/null
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,pericfg.yaml
@@ -0,0 +1,63 @@
+# SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: "http://devicetree.org/schemas/arm/mediatek/mediatek,pericfg.yaml#"
+$schema: "http://devicetree.org/meta-schemas/core.yaml#"
+
+title: MediaTek Peripheral Configuration Controller
+
+maintainers:
+  - Bartosz Golaszewski <bgolaszewski@baylibre.com>
+
+description:
+  The Mediatek pericfg controller provides various clocks and reset outputs
+  to the system.
+
+properties:
+  compatible:
+    oneOf:
+      - items:
+        - enum:
+          - mediatek,mt2701-pericfg
+          - mediatek,mt2712-pericfg
+          - mediatek,mt7622-pericfg
+          - mediatek,mt7629-pericfg
+          - mediatek,mt8135-pericfg
+          - mediatek,mt8173-pericfg
+          - mediatek,mt8183-pericfg
+        - const: syscon
+      - items:
+        # Special case for mt7623 for backward compatibility
+        - const: mediatek,mt7623-pericfg
+        - const: mediatek,mt2701-pericfg
+        - const: syscon
+
+  reg:
+    maxItems: 1
+
+  '#clock-cells':
+    const: 1
+
+  '#reset-cells':
+    const: 1
+
+required:
+  - compatible
+  - reg
+
+examples:
+  - |
+    pericfg@10003000 {
+        compatible = "mediatek,mt8173-pericfg", "syscon";
+        reg = <0x10003000 0x1000>;
+        #clock-cells = <1>;
+        #reset-cells = <1>;
+    };
+
+  - |
+    pericfg@10003000 {
+        compatible =  "mediatek,mt7623-pericfg", "mediatek,mt2701-pericfg", "syscon";
+        reg = <0x10003000 0x1000>;
+        #clock-cells = <1>;
+        #reset-cells = <1>;
+    };
-- 
cgit v1.2.3-59-g8ed1b


From fbc5d5ed96551df1700acebc5c31c8de300ccc50 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Fri, 22 May 2020 14:06:51 +0200
Subject: dt-bindings: add new compatible to mediatek,pericfg

The PERICFG controller is present on the MT8516 SoC. Add an appropriate
compatible variant.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/arm/mediatek/mediatek,pericfg.yaml | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Documentation/devicetree/bindings/arm/mediatek/mediatek,pericfg.yaml b/Documentation/devicetree/bindings/arm/mediatek/mediatek,pericfg.yaml
index 1340c6288024..55209a2baedc 100644
--- a/Documentation/devicetree/bindings/arm/mediatek/mediatek,pericfg.yaml
+++ b/Documentation/devicetree/bindings/arm/mediatek/mediatek,pericfg.yaml
@@ -25,6 +25,7 @@ properties:
           - mediatek,mt8135-pericfg
           - mediatek,mt8173-pericfg
           - mediatek,mt8183-pericfg
+          - mediatek,mt8516-pericfg
         - const: syscon
       - items:
         # Special case for mt7623 for backward compatibility
-- 
cgit v1.2.3-59-g8ed1b


From 1d1ba1469243012ba961b887da9c12b196049b77 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Fri, 22 May 2020 14:06:52 +0200
Subject: dt-bindings: net: add a binding document for MediaTek STAR Ethernet
 MAC

This adds yaml DT bindings for the MediaTek STAR Ethernet MAC present
on the mt8* family of SoCs.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../devicetree/bindings/net/mediatek,eth-mac.yaml  | 89 ++++++++++++++++++++++
 1 file changed, 89 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/net/mediatek,eth-mac.yaml

diff --git a/Documentation/devicetree/bindings/net/mediatek,eth-mac.yaml b/Documentation/devicetree/bindings/net/mediatek,eth-mac.yaml
new file mode 100644
index 000000000000..f85d91a9d6e5
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/mediatek,eth-mac.yaml
@@ -0,0 +1,89 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/mediatek,eth-mac.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek STAR Ethernet MAC Controller
+
+maintainers:
+  - Bartosz Golaszewski <bgolaszewski@baylibre.com>
+
+description:
+  This Ethernet MAC is used on the MT8* family of SoCs from MediaTek.
+  It's compliant with 802.3 standards and supports half- and full-duplex
+  modes with flow-control as well as CRC offloading and VLAN tags.
+
+allOf:
+  - $ref: "ethernet-controller.yaml#"
+
+properties:
+  compatible:
+    enum:
+      - mediatek,mt8516-eth
+      - mediatek,mt8518-eth
+      - mediatek,mt8175-eth
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    minItems: 3
+    maxItems: 3
+
+  clock-names:
+    additionalItems: false
+    items:
+      - const: core
+      - const: reg
+      - const: trans
+
+  mediatek,pericfg:
+    $ref: /schemas/types.yaml#definitions/phandle
+    description:
+      Phandle to the device containing the PERICFG register range. This is used
+      to control the MII mode.
+
+  mdio:
+    type: object
+    description:
+      Creates and registers an MDIO bus.
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+  - mediatek,pericfg
+  - phy-handle
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/clock/mt8516-clk.h>
+
+    ethernet: ethernet@11180000 {
+        compatible = "mediatek,mt8516-eth";
+        reg = <0x11180000 0x1000>;
+        mediatek,pericfg = <&pericfg>;
+        interrupts = <GIC_SPI 111 IRQ_TYPE_LEVEL_LOW>;
+        clocks = <&topckgen CLK_TOP_RG_ETH>,
+                 <&topckgen CLK_TOP_66M_ETH>,
+                 <&topckgen CLK_TOP_133M_ETH>;
+        clock-names = "core", "reg", "trans";
+        phy-handle = <&eth_phy>;
+        phy-mode = "rmii";
+
+        mdio {
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            eth_phy: ethernet-phy@0 {
+                reg = <0>;
+            };
+        };
+    };
-- 
cgit v1.2.3-59-g8ed1b


From d3d6974bc539d2a4311a20230892a5e4b2fd5e8d Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Fri, 22 May 2020 14:06:53 +0200
Subject: net: ethernet: mediatek: rename Kconfig prompt

We'll soon by adding a second MediaTek Ethernet driver so modify the
Kconfig prompt.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mediatek/Kconfig b/drivers/net/ethernet/mediatek/Kconfig
index 4968352ba188..5079b8090f16 100644
--- a/drivers/net/ethernet/mediatek/Kconfig
+++ b/drivers/net/ethernet/mediatek/Kconfig
@@ -1,6 +1,6 @@
 # SPDX-License-Identifier: GPL-2.0-only
 config NET_VENDOR_MEDIATEK
-	bool "MediaTek ethernet driver"
+	bool "MediaTek devices"
 	depends on ARCH_MEDIATEK || SOC_MT7621 || SOC_MT7620
 	---help---
 	  If you have a Mediatek SoC with ethernet, say Y.
-- 
cgit v1.2.3-59-g8ed1b


From 22f076a279ecf78f4dc5697396a075651942ae5b Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Fri, 22 May 2020 14:06:54 +0200
Subject: net: ethernet: mediatek: remove unnecessary spaces from Makefile

The Makefile formatting in the kernel tree usually doesn't use tabs,
so remove them before we add a second driver.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mediatek/Makefile b/drivers/net/ethernet/mediatek/Makefile
index 2d8362f9341b..3362fb7ef859 100644
--- a/drivers/net/ethernet/mediatek/Makefile
+++ b/drivers/net/ethernet/mediatek/Makefile
@@ -3,5 +3,5 @@
 # Makefile for the Mediatek SoCs built-in ethernet macs
 #
 
-obj-$(CONFIG_NET_MEDIATEK_SOC)                 += mtk_eth.o
+obj-$(CONFIG_NET_MEDIATEK_SOC) += mtk_eth.o
 mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_eth_path.o
-- 
cgit v1.2.3-59-g8ed1b


From 8c7bd5a454ffc2b0518d1499c4af95f00291d2af Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Fri, 22 May 2020 14:06:55 +0200
Subject: net: ethernet: mtk-star-emac: new driver

This adds the driver for the MediaTek STAR Ethernet MAC currently used
on the MT8* SoC family. For now we only support full-duplex.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/Kconfig         |    7 +
 drivers/net/ethernet/mediatek/Makefile        |    1 +
 drivers/net/ethernet/mediatek/mtk_star_emac.c | 1678 +++++++++++++++++++++++++
 3 files changed, 1686 insertions(+)
 create mode 100644 drivers/net/ethernet/mediatek/mtk_star_emac.c

diff --git a/drivers/net/ethernet/mediatek/Kconfig b/drivers/net/ethernet/mediatek/Kconfig
index 5079b8090f16..500c15e7ea4a 100644
--- a/drivers/net/ethernet/mediatek/Kconfig
+++ b/drivers/net/ethernet/mediatek/Kconfig
@@ -14,4 +14,11 @@ config NET_MEDIATEK_SOC
 	  This driver supports the gigabit ethernet MACs in the
 	  MediaTek SoC family.
 
+config NET_MEDIATEK_STAR_EMAC
+	tristate "MediaTek STAR Ethernet MAC support"
+	select PHYLIB
+	help
+	  This driver supports the ethernet MAC IP first used on
+	  MediaTek MT85** SoCs.
+
 endif #NET_VENDOR_MEDIATEK
diff --git a/drivers/net/ethernet/mediatek/Makefile b/drivers/net/ethernet/mediatek/Makefile
index 3362fb7ef859..3a777b4a6cd3 100644
--- a/drivers/net/ethernet/mediatek/Makefile
+++ b/drivers/net/ethernet/mediatek/Makefile
@@ -5,3 +5,4 @@
 
 obj-$(CONFIG_NET_MEDIATEK_SOC) += mtk_eth.o
 mtk_eth-y := mtk_eth_soc.o mtk_sgmii.o mtk_eth_path.o
+obj-$(CONFIG_NET_MEDIATEK_STAR_EMAC) += mtk_star_emac.o
diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c
new file mode 100644
index 000000000000..789c77af501f
--- /dev/null
+++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c
@@ -0,0 +1,1678 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/*
+ * Copyright (c) 2020 MediaTek Corporation
+ * Copyright (c) 2020 BayLibre SAS
+ *
+ * Author: Bartosz Golaszewski <bgolaszewski@baylibre.com>
+ */
+
+#include <linux/bits.h>
+#include <linux/clk.h>
+#include <linux/compiler.h>
+#include <linux/dma-mapping.h>
+#include <linux/etherdevice.h>
+#include <linux/kernel.h>
+#include <linux/mfd/syscon.h>
+#include <linux/mii.h>
+#include <linux/module.h>
+#include <linux/netdevice.h>
+#include <linux/of.h>
+#include <linux/of_mdio.h>
+#include <linux/of_net.h>
+#include <linux/platform_device.h>
+#include <linux/pm.h>
+#include <linux/regmap.h>
+#include <linux/skbuff.h>
+#include <linux/spinlock.h>
+#include <linux/workqueue.h>
+
+#define MTK_STAR_DRVNAME			"mtk_star_emac"
+
+#define MTK_STAR_WAIT_TIMEOUT			300
+#define MTK_STAR_MAX_FRAME_SIZE			1514
+#define MTK_STAR_SKB_ALIGNMENT			16
+#define MTK_STAR_NAPI_WEIGHT			64
+#define MTK_STAR_HASHTABLE_MC_LIMIT		256
+#define MTK_STAR_HASHTABLE_SIZE_MAX		512
+
+/* Normally we'd use NET_IP_ALIGN but on arm64 its value is 0 and it doesn't
+ * work for this controller.
+ */
+#define MTK_STAR_IP_ALIGN			2
+
+static const char *const mtk_star_clk_names[] = { "core", "reg", "trans" };
+#define MTK_STAR_NCLKS ARRAY_SIZE(mtk_star_clk_names)
+
+/* PHY Control Register 0 */
+#define MTK_STAR_REG_PHY_CTRL0			0x0000
+#define MTK_STAR_BIT_PHY_CTRL0_WTCMD		BIT(13)
+#define MTK_STAR_BIT_PHY_CTRL0_RDCMD		BIT(14)
+#define MTK_STAR_BIT_PHY_CTRL0_RWOK		BIT(15)
+#define MTK_STAR_MSK_PHY_CTRL0_PREG		GENMASK(12, 8)
+#define MTK_STAR_OFF_PHY_CTRL0_PREG		8
+#define MTK_STAR_MSK_PHY_CTRL0_RWDATA		GENMASK(31, 16)
+#define MTK_STAR_OFF_PHY_CTRL0_RWDATA		16
+
+/* PHY Control Register 1 */
+#define MTK_STAR_REG_PHY_CTRL1			0x0004
+#define MTK_STAR_BIT_PHY_CTRL1_LINK_ST		BIT(0)
+#define MTK_STAR_BIT_PHY_CTRL1_AN_EN		BIT(8)
+#define MTK_STAR_OFF_PHY_CTRL1_FORCE_SPD	9
+#define MTK_STAR_VAL_PHY_CTRL1_FORCE_SPD_10M	0x00
+#define MTK_STAR_VAL_PHY_CTRL1_FORCE_SPD_100M	0x01
+#define MTK_STAR_VAL_PHY_CTRL1_FORCE_SPD_1000M	0x02
+#define MTK_STAR_BIT_PHY_CTRL1_FORCE_DPX	BIT(11)
+#define MTK_STAR_BIT_PHY_CTRL1_FORCE_FC_RX	BIT(12)
+#define MTK_STAR_BIT_PHY_CTRL1_FORCE_FC_TX	BIT(13)
+
+/* MAC Configuration Register */
+#define MTK_STAR_REG_MAC_CFG			0x0008
+#define MTK_STAR_OFF_MAC_CFG_IPG		10
+#define MTK_STAR_VAL_MAC_CFG_IPG_96BIT		GENMASK(4, 0)
+#define MTK_STAR_BIT_MAC_CFG_MAXLEN_1522	BIT(16)
+#define MTK_STAR_BIT_MAC_CFG_AUTO_PAD		BIT(19)
+#define MTK_STAR_BIT_MAC_CFG_CRC_STRIP		BIT(20)
+#define MTK_STAR_BIT_MAC_CFG_VLAN_STRIP		BIT(22)
+#define MTK_STAR_BIT_MAC_CFG_NIC_PD		BIT(31)
+
+/* Flow-Control Configuration Register */
+#define MTK_STAR_REG_FC_CFG			0x000c
+#define MTK_STAR_BIT_FC_CFG_BP_EN		BIT(7)
+#define MTK_STAR_BIT_FC_CFG_UC_PAUSE_DIR	BIT(8)
+#define MTK_STAR_OFF_FC_CFG_SEND_PAUSE_TH	16
+#define MTK_STAR_MSK_FC_CFG_SEND_PAUSE_TH	GENMASK(27, 16)
+#define MTK_STAR_VAL_FC_CFG_SEND_PAUSE_TH_2K	0x800
+
+/* ARL Configuration Register */
+#define MTK_STAR_REG_ARL_CFG			0x0010
+#define MTK_STAR_BIT_ARL_CFG_HASH_ALG		BIT(0)
+#define MTK_STAR_BIT_ARL_CFG_MISC_MODE		BIT(4)
+
+/* MAC High and Low Bytes Registers */
+#define MTK_STAR_REG_MY_MAC_H			0x0014
+#define MTK_STAR_REG_MY_MAC_L			0x0018
+
+/* Hash Table Control Register */
+#define MTK_STAR_REG_HASH_CTRL			0x001c
+#define MTK_STAR_MSK_HASH_CTRL_HASH_BIT_ADDR	GENMASK(8, 0)
+#define MTK_STAR_BIT_HASH_CTRL_HASH_BIT_DATA	BIT(12)
+#define MTK_STAR_BIT_HASH_CTRL_ACC_CMD		BIT(13)
+#define MTK_STAR_BIT_HASH_CTRL_CMD_START	BIT(14)
+#define MTK_STAR_BIT_HASH_CTRL_BIST_OK		BIT(16)
+#define MTK_STAR_BIT_HASH_CTRL_BIST_DONE	BIT(17)
+#define MTK_STAR_BIT_HASH_CTRL_BIST_EN		BIT(31)
+
+/* TX DMA Control Register */
+#define MTK_STAR_REG_TX_DMA_CTRL		0x0034
+#define MTK_STAR_BIT_TX_DMA_CTRL_START		BIT(0)
+#define MTK_STAR_BIT_TX_DMA_CTRL_STOP		BIT(1)
+#define MTK_STAR_BIT_TX_DMA_CTRL_RESUME		BIT(2)
+
+/* RX DMA Control Register */
+#define MTK_STAR_REG_RX_DMA_CTRL		0x0038
+#define MTK_STAR_BIT_RX_DMA_CTRL_START		BIT(0)
+#define MTK_STAR_BIT_RX_DMA_CTRL_STOP		BIT(1)
+#define MTK_STAR_BIT_RX_DMA_CTRL_RESUME		BIT(2)
+
+/* DMA Address Registers */
+#define MTK_STAR_REG_TX_DPTR			0x003c
+#define MTK_STAR_REG_RX_DPTR			0x0040
+#define MTK_STAR_REG_TX_BASE_ADDR		0x0044
+#define MTK_STAR_REG_RX_BASE_ADDR		0x0048
+
+/* Interrupt Status Register */
+#define MTK_STAR_REG_INT_STS			0x0050
+#define MTK_STAR_REG_INT_STS_PORT_STS_CHG	BIT(2)
+#define MTK_STAR_REG_INT_STS_MIB_CNT_TH		BIT(3)
+#define MTK_STAR_BIT_INT_STS_FNRC		BIT(6)
+#define MTK_STAR_BIT_INT_STS_TNTC		BIT(8)
+
+/* Interrupt Mask Register */
+#define MTK_STAR_REG_INT_MASK			0x0054
+#define MTK_STAR_BIT_INT_MASK_FNRC		BIT(6)
+
+/* Misc. Config Register */
+#define MTK_STAR_REG_TEST1			0x005c
+#define MTK_STAR_BIT_TEST1_RST_HASH_MBIST	BIT(31)
+
+/* Extended Configuration Register */
+#define MTK_STAR_REG_EXT_CFG			0x0060
+#define MTK_STAR_OFF_EXT_CFG_SND_PAUSE_RLS	16
+#define MTK_STAR_MSK_EXT_CFG_SND_PAUSE_RLS	GENMASK(26, 16)
+#define MTK_STAR_VAL_EXT_CFG_SND_PAUSE_RLS_1K	0x400
+
+/* EthSys Configuration Register */
+#define MTK_STAR_REG_SYS_CONF			0x0094
+#define MTK_STAR_BIT_MII_PAD_OUT_ENABLE		BIT(0)
+#define MTK_STAR_BIT_EXT_MDC_MODE		BIT(1)
+#define MTK_STAR_BIT_SWC_MII_MODE		BIT(2)
+
+/* MAC Clock Configuration Register */
+#define MTK_STAR_REG_MAC_CLK_CONF		0x00ac
+#define MTK_STAR_MSK_MAC_CLK_CONF		GENMASK(7, 0)
+#define MTK_STAR_BIT_CLK_DIV_10			0x0a
+
+/* Counter registers. */
+#define MTK_STAR_REG_C_RXOKPKT			0x0100
+#define MTK_STAR_REG_C_RXOKBYTE			0x0104
+#define MTK_STAR_REG_C_RXRUNT			0x0108
+#define MTK_STAR_REG_C_RXLONG			0x010c
+#define MTK_STAR_REG_C_RXDROP			0x0110
+#define MTK_STAR_REG_C_RXCRC			0x0114
+#define MTK_STAR_REG_C_RXARLDROP		0x0118
+#define MTK_STAR_REG_C_RXVLANDROP		0x011c
+#define MTK_STAR_REG_C_RXCSERR			0x0120
+#define MTK_STAR_REG_C_RXPAUSE			0x0124
+#define MTK_STAR_REG_C_TXOKPKT			0x0128
+#define MTK_STAR_REG_C_TXOKBYTE			0x012c
+#define MTK_STAR_REG_C_TXPAUSECOL		0x0130
+#define MTK_STAR_REG_C_TXRTY			0x0134
+#define MTK_STAR_REG_C_TXSKIP			0x0138
+#define MTK_STAR_REG_C_TX_ARP			0x013c
+#define MTK_STAR_REG_C_RX_RERR			0x01d8
+#define MTK_STAR_REG_C_RX_UNI			0x01dc
+#define MTK_STAR_REG_C_RX_MULTI			0x01e0
+#define MTK_STAR_REG_C_RX_BROAD			0x01e4
+#define MTK_STAR_REG_C_RX_ALIGNERR		0x01e8
+#define MTK_STAR_REG_C_TX_UNI			0x01ec
+#define MTK_STAR_REG_C_TX_MULTI			0x01f0
+#define MTK_STAR_REG_C_TX_BROAD			0x01f4
+#define MTK_STAR_REG_C_TX_TIMEOUT		0x01f8
+#define MTK_STAR_REG_C_TX_LATECOL		0x01fc
+#define MTK_STAR_REG_C_RX_LENGTHERR		0x0214
+#define MTK_STAR_REG_C_RX_TWIST			0x0218
+
+/* Ethernet CFG Control */
+#define MTK_PERICFG_REG_NIC_CFG_CON		0x03c4
+#define MTK_PERICFG_MSK_NIC_CFG_CON_CFG_MII	GENMASK(3, 0)
+#define MTK_PERICFG_BIT_NIC_CFG_CON_RMII	BIT(0)
+
+/* Represents the actual structure of descriptors used by the MAC. We can
+ * reuse the same structure for both TX and RX - the layout is the same, only
+ * the flags differ slightly.
+ */
+struct mtk_star_ring_desc {
+	/* Contains both the status flags as well as packet length. */
+	u32 status;
+	u32 data_ptr;
+	u32 vtag;
+	u32 reserved;
+};
+
+#define MTK_STAR_DESC_MSK_LEN			GENMASK(15, 0)
+#define MTK_STAR_DESC_BIT_RX_CRCE		BIT(24)
+#define MTK_STAR_DESC_BIT_RX_OSIZE		BIT(25)
+#define MTK_STAR_DESC_BIT_INT			BIT(27)
+#define MTK_STAR_DESC_BIT_LS			BIT(28)
+#define MTK_STAR_DESC_BIT_FS			BIT(29)
+#define MTK_STAR_DESC_BIT_EOR			BIT(30)
+#define MTK_STAR_DESC_BIT_COWN			BIT(31)
+
+/* Helper structure for storing data read from/written to descriptors in order
+ * to limit reads from/writes to DMA memory.
+ */
+struct mtk_star_ring_desc_data {
+	unsigned int len;
+	unsigned int flags;
+	dma_addr_t dma_addr;
+	struct sk_buff *skb;
+};
+
+#define MTK_STAR_RING_NUM_DESCS			128
+#define MTK_STAR_NUM_TX_DESCS			MTK_STAR_RING_NUM_DESCS
+#define MTK_STAR_NUM_RX_DESCS			MTK_STAR_RING_NUM_DESCS
+#define MTK_STAR_NUM_DESCS_TOTAL		(MTK_STAR_RING_NUM_DESCS * 2)
+#define MTK_STAR_DMA_SIZE \
+		(MTK_STAR_NUM_DESCS_TOTAL * sizeof(struct mtk_star_ring_desc))
+
+struct mtk_star_ring {
+	struct mtk_star_ring_desc *descs;
+	struct sk_buff *skbs[MTK_STAR_RING_NUM_DESCS];
+	dma_addr_t dma_addrs[MTK_STAR_RING_NUM_DESCS];
+	unsigned int head;
+	unsigned int tail;
+};
+
+struct mtk_star_priv {
+	struct net_device *ndev;
+
+	struct regmap *regs;
+	struct regmap *pericfg;
+
+	struct clk_bulk_data clks[MTK_STAR_NCLKS];
+
+	void *ring_base;
+	struct mtk_star_ring_desc *descs_base;
+	dma_addr_t dma_addr;
+	struct mtk_star_ring tx_ring;
+	struct mtk_star_ring rx_ring;
+
+	struct mii_bus *mii;
+	struct napi_struct napi;
+
+	struct device_node *phy_node;
+	phy_interface_t phy_intf;
+	struct phy_device *phydev;
+	unsigned int link;
+	int speed;
+	int duplex;
+	int pause;
+
+	/* Protects against concurrent descriptor access. */
+	spinlock_t lock;
+
+	struct rtnl_link_stats64 stats;
+	struct work_struct stats_work;
+};
+
+static struct device *mtk_star_get_dev(struct mtk_star_priv *priv)
+{
+	return priv->ndev->dev.parent;
+}
+
+static const struct regmap_config mtk_star_regmap_config = {
+	.reg_bits		= 32,
+	.val_bits		= 32,
+	.reg_stride		= 4,
+	.disable_locking	= true,
+};
+
+static void mtk_star_ring_init(struct mtk_star_ring *ring,
+			       struct mtk_star_ring_desc *descs)
+{
+	memset(ring, 0, sizeof(*ring));
+	ring->descs = descs;
+	ring->head = 0;
+	ring->tail = 0;
+}
+
+static int mtk_star_ring_pop_tail(struct mtk_star_ring *ring,
+				  struct mtk_star_ring_desc_data *desc_data)
+{
+	struct mtk_star_ring_desc *desc = &ring->descs[ring->tail];
+	unsigned int status;
+
+	status = READ_ONCE(desc->status);
+	dma_rmb(); /* Make sure we read the status bits before checking it. */
+
+	if (!(status & MTK_STAR_DESC_BIT_COWN))
+		return -1;
+
+	desc_data->len = status & MTK_STAR_DESC_MSK_LEN;
+	desc_data->flags = status & ~MTK_STAR_DESC_MSK_LEN;
+	desc_data->dma_addr = ring->dma_addrs[ring->tail];
+	desc_data->skb = ring->skbs[ring->tail];
+
+	ring->dma_addrs[ring->tail] = 0;
+	ring->skbs[ring->tail] = NULL;
+
+	status &= MTK_STAR_DESC_BIT_COWN | MTK_STAR_DESC_BIT_EOR;
+
+	WRITE_ONCE(desc->data_ptr, 0);
+	WRITE_ONCE(desc->status, status);
+
+	ring->tail = (ring->tail + 1) % MTK_STAR_RING_NUM_DESCS;
+
+	return 0;
+}
+
+static void mtk_star_ring_push_head(struct mtk_star_ring *ring,
+				    struct mtk_star_ring_desc_data *desc_data,
+				    unsigned int flags)
+{
+	struct mtk_star_ring_desc *desc = &ring->descs[ring->head];
+	unsigned int status;
+
+	status = READ_ONCE(desc->status);
+
+	ring->skbs[ring->head] = desc_data->skb;
+	ring->dma_addrs[ring->head] = desc_data->dma_addr;
+
+	status |= desc_data->len;
+	if (flags)
+		status |= flags;
+
+	WRITE_ONCE(desc->data_ptr, desc_data->dma_addr);
+	WRITE_ONCE(desc->status, status);
+	status &= ~MTK_STAR_DESC_BIT_COWN;
+	/* Flush previous modifications before ownership change. */
+	dma_wmb();
+	WRITE_ONCE(desc->status, status);
+
+	ring->head = (ring->head + 1) % MTK_STAR_RING_NUM_DESCS;
+}
+
+static void
+mtk_star_ring_push_head_rx(struct mtk_star_ring *ring,
+			   struct mtk_star_ring_desc_data *desc_data)
+{
+	mtk_star_ring_push_head(ring, desc_data, 0);
+}
+
+static void
+mtk_star_ring_push_head_tx(struct mtk_star_ring *ring,
+			   struct mtk_star_ring_desc_data *desc_data)
+{
+	static const unsigned int flags = MTK_STAR_DESC_BIT_FS |
+					  MTK_STAR_DESC_BIT_LS |
+					  MTK_STAR_DESC_BIT_INT;
+
+	mtk_star_ring_push_head(ring, desc_data, flags);
+}
+
+static unsigned int mtk_star_ring_num_used_descs(struct mtk_star_ring *ring)
+{
+	return abs(ring->head - ring->tail);
+}
+
+static bool mtk_star_ring_full(struct mtk_star_ring *ring)
+{
+	return mtk_star_ring_num_used_descs(ring) == MTK_STAR_RING_NUM_DESCS;
+}
+
+static bool mtk_star_ring_descs_available(struct mtk_star_ring *ring)
+{
+	return mtk_star_ring_num_used_descs(ring) > 0;
+}
+
+static dma_addr_t mtk_star_dma_map_rx(struct mtk_star_priv *priv,
+				      struct sk_buff *skb)
+{
+	struct device *dev = mtk_star_get_dev(priv);
+
+	/* Data pointer for the RX DMA descriptor must be aligned to 4N + 2. */
+	return dma_map_single(dev, skb_tail_pointer(skb) - 2,
+			      skb_tailroom(skb), DMA_FROM_DEVICE);
+}
+
+static void mtk_star_dma_unmap_rx(struct mtk_star_priv *priv,
+				  struct mtk_star_ring_desc_data *desc_data)
+{
+	struct device *dev = mtk_star_get_dev(priv);
+
+	dma_unmap_single(dev, desc_data->dma_addr,
+			 skb_tailroom(desc_data->skb), DMA_FROM_DEVICE);
+}
+
+static dma_addr_t mtk_star_dma_map_tx(struct mtk_star_priv *priv,
+				      struct sk_buff *skb)
+{
+	struct device *dev = mtk_star_get_dev(priv);
+
+	return dma_map_single(dev, skb->data, skb_headlen(skb), DMA_TO_DEVICE);
+}
+
+static void mtk_star_dma_unmap_tx(struct mtk_star_priv *priv,
+				  struct mtk_star_ring_desc_data *desc_data)
+{
+	struct device *dev = mtk_star_get_dev(priv);
+
+	return dma_unmap_single(dev, desc_data->dma_addr,
+				skb_headlen(desc_data->skb), DMA_TO_DEVICE);
+}
+
+static void mtk_star_nic_disable_pd(struct mtk_star_priv *priv)
+{
+	regmap_update_bits(priv->regs, MTK_STAR_REG_MAC_CFG,
+			   MTK_STAR_BIT_MAC_CFG_NIC_PD, 0);
+}
+
+/* Unmask the three interrupts we care about, mask all others. */
+static void mtk_star_intr_enable(struct mtk_star_priv *priv)
+{
+	unsigned int val = MTK_STAR_BIT_INT_STS_TNTC |
+			   MTK_STAR_BIT_INT_STS_FNRC |
+			   MTK_STAR_REG_INT_STS_MIB_CNT_TH;
+
+	regmap_write(priv->regs, MTK_STAR_REG_INT_MASK, ~val);
+}
+
+static void mtk_star_intr_disable(struct mtk_star_priv *priv)
+{
+	regmap_write(priv->regs, MTK_STAR_REG_INT_MASK, ~0);
+}
+
+static void mtk_star_intr_enable_tx(struct mtk_star_priv *priv)
+{
+	regmap_update_bits(priv->regs, MTK_STAR_REG_INT_MASK,
+			   MTK_STAR_BIT_INT_STS_TNTC, 0);
+}
+
+static void mtk_star_intr_enable_rx(struct mtk_star_priv *priv)
+{
+	regmap_update_bits(priv->regs, MTK_STAR_REG_INT_MASK,
+			   MTK_STAR_BIT_INT_STS_FNRC, 0);
+}
+
+static void mtk_star_intr_enable_stats(struct mtk_star_priv *priv)
+{
+	regmap_update_bits(priv->regs, MTK_STAR_REG_INT_MASK,
+			   MTK_STAR_REG_INT_STS_MIB_CNT_TH, 0);
+}
+
+static void mtk_star_intr_disable_tx(struct mtk_star_priv *priv)
+{
+	regmap_update_bits(priv->regs, MTK_STAR_REG_INT_MASK,
+			   MTK_STAR_BIT_INT_STS_TNTC,
+			   MTK_STAR_BIT_INT_STS_TNTC);
+}
+
+static void mtk_star_intr_disable_rx(struct mtk_star_priv *priv)
+{
+	regmap_update_bits(priv->regs, MTK_STAR_REG_INT_MASK,
+			   MTK_STAR_BIT_INT_STS_FNRC,
+			   MTK_STAR_BIT_INT_STS_FNRC);
+}
+
+static void mtk_star_intr_disable_stats(struct mtk_star_priv *priv)
+{
+	regmap_update_bits(priv->regs, MTK_STAR_REG_INT_MASK,
+			   MTK_STAR_REG_INT_STS_MIB_CNT_TH,
+			   MTK_STAR_REG_INT_STS_MIB_CNT_TH);
+}
+
+static unsigned int mtk_star_intr_read(struct mtk_star_priv *priv)
+{
+	unsigned int val;
+
+	regmap_read(priv->regs, MTK_STAR_REG_INT_STS, &val);
+
+	return val;
+}
+
+static unsigned int mtk_star_intr_ack_all(struct mtk_star_priv *priv)
+{
+	unsigned int val;
+
+	val = mtk_star_intr_read(priv);
+	regmap_write(priv->regs, MTK_STAR_REG_INT_STS, val);
+
+	return val;
+}
+
+static void mtk_star_dma_init(struct mtk_star_priv *priv)
+{
+	struct mtk_star_ring_desc *desc;
+	unsigned int val;
+	int i;
+
+	priv->descs_base = (struct mtk_star_ring_desc *)priv->ring_base;
+
+	for (i = 0; i < MTK_STAR_NUM_DESCS_TOTAL; i++) {
+		desc = &priv->descs_base[i];
+
+		memset(desc, 0, sizeof(*desc));
+		desc->status = MTK_STAR_DESC_BIT_COWN;
+		if ((i == MTK_STAR_NUM_TX_DESCS - 1) ||
+		    (i == MTK_STAR_NUM_DESCS_TOTAL - 1))
+			desc->status |= MTK_STAR_DESC_BIT_EOR;
+	}
+
+	mtk_star_ring_init(&priv->tx_ring, priv->descs_base);
+	mtk_star_ring_init(&priv->rx_ring,
+			   priv->descs_base + MTK_STAR_NUM_TX_DESCS);
+
+	/* Set DMA pointers. */
+	val = (unsigned int)priv->dma_addr;
+	regmap_write(priv->regs, MTK_STAR_REG_TX_BASE_ADDR, val);
+	regmap_write(priv->regs, MTK_STAR_REG_TX_DPTR, val);
+
+	val += sizeof(struct mtk_star_ring_desc) * MTK_STAR_NUM_TX_DESCS;
+	regmap_write(priv->regs, MTK_STAR_REG_RX_BASE_ADDR, val);
+	regmap_write(priv->regs, MTK_STAR_REG_RX_DPTR, val);
+}
+
+static void mtk_star_dma_start(struct mtk_star_priv *priv)
+{
+	regmap_update_bits(priv->regs, MTK_STAR_REG_TX_DMA_CTRL,
+			   MTK_STAR_BIT_TX_DMA_CTRL_START,
+			   MTK_STAR_BIT_TX_DMA_CTRL_START);
+	regmap_update_bits(priv->regs, MTK_STAR_REG_RX_DMA_CTRL,
+			   MTK_STAR_BIT_RX_DMA_CTRL_START,
+			   MTK_STAR_BIT_RX_DMA_CTRL_START);
+}
+
+static void mtk_star_dma_stop(struct mtk_star_priv *priv)
+{
+	regmap_write(priv->regs, MTK_STAR_REG_TX_DMA_CTRL,
+		     MTK_STAR_BIT_TX_DMA_CTRL_STOP);
+	regmap_write(priv->regs, MTK_STAR_REG_RX_DMA_CTRL,
+		     MTK_STAR_BIT_RX_DMA_CTRL_STOP);
+}
+
+static void mtk_star_dma_disable(struct mtk_star_priv *priv)
+{
+	int i;
+
+	mtk_star_dma_stop(priv);
+
+	/* Take back all descriptors. */
+	for (i = 0; i < MTK_STAR_NUM_DESCS_TOTAL; i++)
+		priv->descs_base[i].status |= MTK_STAR_DESC_BIT_COWN;
+}
+
+static void mtk_star_dma_resume_rx(struct mtk_star_priv *priv)
+{
+	regmap_update_bits(priv->regs, MTK_STAR_REG_RX_DMA_CTRL,
+			   MTK_STAR_BIT_RX_DMA_CTRL_RESUME,
+			   MTK_STAR_BIT_RX_DMA_CTRL_RESUME);
+}
+
+static void mtk_star_dma_resume_tx(struct mtk_star_priv *priv)
+{
+	regmap_update_bits(priv->regs, MTK_STAR_REG_TX_DMA_CTRL,
+			   MTK_STAR_BIT_TX_DMA_CTRL_RESUME,
+			   MTK_STAR_BIT_TX_DMA_CTRL_RESUME);
+}
+
+static void mtk_star_set_mac_addr(struct net_device *ndev)
+{
+	struct mtk_star_priv *priv = netdev_priv(ndev);
+	u8 *mac_addr = ndev->dev_addr;
+	unsigned int high, low;
+
+	high = mac_addr[0] << 8 | mac_addr[1] << 0;
+	low = mac_addr[2] << 24 | mac_addr[3] << 16 |
+	      mac_addr[4] << 8 | mac_addr[5];
+
+	regmap_write(priv->regs, MTK_STAR_REG_MY_MAC_H, high);
+	regmap_write(priv->regs, MTK_STAR_REG_MY_MAC_L, low);
+}
+
+static void mtk_star_reset_counters(struct mtk_star_priv *priv)
+{
+	static const unsigned int counter_regs[] = {
+		MTK_STAR_REG_C_RXOKPKT,
+		MTK_STAR_REG_C_RXOKBYTE,
+		MTK_STAR_REG_C_RXRUNT,
+		MTK_STAR_REG_C_RXLONG,
+		MTK_STAR_REG_C_RXDROP,
+		MTK_STAR_REG_C_RXCRC,
+		MTK_STAR_REG_C_RXARLDROP,
+		MTK_STAR_REG_C_RXVLANDROP,
+		MTK_STAR_REG_C_RXCSERR,
+		MTK_STAR_REG_C_RXPAUSE,
+		MTK_STAR_REG_C_TXOKPKT,
+		MTK_STAR_REG_C_TXOKBYTE,
+		MTK_STAR_REG_C_TXPAUSECOL,
+		MTK_STAR_REG_C_TXRTY,
+		MTK_STAR_REG_C_TXSKIP,
+		MTK_STAR_REG_C_TX_ARP,
+		MTK_STAR_REG_C_RX_RERR,
+		MTK_STAR_REG_C_RX_UNI,
+		MTK_STAR_REG_C_RX_MULTI,
+		MTK_STAR_REG_C_RX_BROAD,
+		MTK_STAR_REG_C_RX_ALIGNERR,
+		MTK_STAR_REG_C_TX_UNI,
+		MTK_STAR_REG_C_TX_MULTI,
+		MTK_STAR_REG_C_TX_BROAD,
+		MTK_STAR_REG_C_TX_TIMEOUT,
+		MTK_STAR_REG_C_TX_LATECOL,
+		MTK_STAR_REG_C_RX_LENGTHERR,
+		MTK_STAR_REG_C_RX_TWIST,
+	};
+
+	unsigned int i, val;
+
+	for (i = 0; i < ARRAY_SIZE(counter_regs); i++)
+		regmap_read(priv->regs, counter_regs[i], &val);
+}
+
+static void mtk_star_update_stat(struct mtk_star_priv *priv,
+				 unsigned int reg, u64 *stat)
+{
+	unsigned int val;
+
+	regmap_read(priv->regs, reg, &val);
+	*stat += val;
+}
+
+/* Try to get as many stats as possible from the internal registers instead
+ * of tracking them ourselves.
+ */
+static void mtk_star_update_stats(struct mtk_star_priv *priv)
+{
+	struct rtnl_link_stats64 *stats = &priv->stats;
+
+	/* OK packets and bytes. */
+	mtk_star_update_stat(priv, MTK_STAR_REG_C_RXOKPKT, &stats->rx_packets);
+	mtk_star_update_stat(priv, MTK_STAR_REG_C_TXOKPKT, &stats->tx_packets);
+	mtk_star_update_stat(priv, MTK_STAR_REG_C_RXOKBYTE, &stats->rx_bytes);
+	mtk_star_update_stat(priv, MTK_STAR_REG_C_TXOKBYTE, &stats->tx_bytes);
+
+	/* RX & TX multicast. */
+	mtk_star_update_stat(priv, MTK_STAR_REG_C_RX_MULTI, &stats->multicast);
+	mtk_star_update_stat(priv, MTK_STAR_REG_C_TX_MULTI, &stats->multicast);
+
+	/* Collisions. */
+	mtk_star_update_stat(priv, MTK_STAR_REG_C_TXPAUSECOL,
+			     &stats->collisions);
+	mtk_star_update_stat(priv, MTK_STAR_REG_C_TX_LATECOL,
+			     &stats->collisions);
+	mtk_star_update_stat(priv, MTK_STAR_REG_C_RXRUNT, &stats->collisions);
+
+	/* RX Errors. */
+	mtk_star_update_stat(priv, MTK_STAR_REG_C_RX_LENGTHERR,
+			     &stats->rx_length_errors);
+	mtk_star_update_stat(priv, MTK_STAR_REG_C_RXLONG,
+			     &stats->rx_over_errors);
+	mtk_star_update_stat(priv, MTK_STAR_REG_C_RXCRC, &stats->rx_crc_errors);
+	mtk_star_update_stat(priv, MTK_STAR_REG_C_RX_ALIGNERR,
+			     &stats->rx_frame_errors);
+	mtk_star_update_stat(priv, MTK_STAR_REG_C_RXDROP,
+			     &stats->rx_fifo_errors);
+	/* Sum of the general RX error counter + all of the above. */
+	mtk_star_update_stat(priv, MTK_STAR_REG_C_RX_RERR, &stats->rx_errors);
+	stats->rx_errors += stats->rx_length_errors;
+	stats->rx_errors += stats->rx_over_errors;
+	stats->rx_errors += stats->rx_crc_errors;
+	stats->rx_errors += stats->rx_frame_errors;
+	stats->rx_errors += stats->rx_fifo_errors;
+}
+
+/* This runs in process context and parallel TX and RX paths executing in
+ * napi context may result in losing some stats data but this should happen
+ * seldom enough to be acceptable.
+ */
+static void mtk_star_update_stats_work(struct work_struct *work)
+{
+	struct mtk_star_priv *priv = container_of(work, struct mtk_star_priv,
+						 stats_work);
+
+	mtk_star_update_stats(priv);
+	mtk_star_reset_counters(priv);
+	mtk_star_intr_enable_stats(priv);
+}
+
+static struct sk_buff *mtk_star_alloc_skb(struct net_device *ndev)
+{
+	uintptr_t tail, offset;
+	struct sk_buff *skb;
+
+	skb = dev_alloc_skb(MTK_STAR_MAX_FRAME_SIZE);
+	if (!skb)
+		return NULL;
+
+	/* Align to 16 bytes. */
+	tail = (uintptr_t)skb_tail_pointer(skb);
+	if (tail & (MTK_STAR_SKB_ALIGNMENT - 1)) {
+		offset = tail & (MTK_STAR_SKB_ALIGNMENT - 1);
+		skb_reserve(skb, MTK_STAR_SKB_ALIGNMENT - offset);
+	}
+
+	/* Ensure 16-byte alignment of the skb pointer: eth_type_trans() will
+	 * extract the Ethernet header (14 bytes) so we need two more bytes.
+	 */
+	skb_reserve(skb, MTK_STAR_IP_ALIGN);
+
+	return skb;
+}
+
+static int mtk_star_prepare_rx_skbs(struct net_device *ndev)
+{
+	struct mtk_star_priv *priv = netdev_priv(ndev);
+	struct mtk_star_ring *ring = &priv->rx_ring;
+	struct device *dev = mtk_star_get_dev(priv);
+	struct mtk_star_ring_desc *desc;
+	struct sk_buff *skb;
+	dma_addr_t dma_addr;
+	int i;
+
+	for (i = 0; i < MTK_STAR_NUM_RX_DESCS; i++) {
+		skb = mtk_star_alloc_skb(ndev);
+		if (!skb)
+			return -ENOMEM;
+
+		dma_addr = mtk_star_dma_map_rx(priv, skb);
+		if (dma_mapping_error(dev, dma_addr)) {
+			dev_kfree_skb(skb);
+			return -ENOMEM;
+		}
+
+		desc = &ring->descs[i];
+		desc->data_ptr = dma_addr;
+		desc->status |= skb_tailroom(skb) & MTK_STAR_DESC_MSK_LEN;
+		desc->status &= ~MTK_STAR_DESC_BIT_COWN;
+		ring->skbs[i] = skb;
+		ring->dma_addrs[i] = dma_addr;
+	}
+
+	return 0;
+}
+
+static void
+mtk_star_ring_free_skbs(struct mtk_star_priv *priv, struct mtk_star_ring *ring,
+			void (*unmap_func)(struct mtk_star_priv *,
+					   struct mtk_star_ring_desc_data *))
+{
+	struct mtk_star_ring_desc_data desc_data;
+	struct mtk_star_ring_desc *desc;
+	int i;
+
+	for (i = 0; i < MTK_STAR_RING_NUM_DESCS; i++) {
+		if (!ring->dma_addrs[i])
+			continue;
+
+		desc = &ring->descs[i];
+
+		desc_data.dma_addr = ring->dma_addrs[i];
+		desc_data.skb = ring->skbs[i];
+
+		unmap_func(priv, &desc_data);
+		dev_kfree_skb(desc_data.skb);
+	}
+}
+
+static void mtk_star_free_rx_skbs(struct mtk_star_priv *priv)
+{
+	struct mtk_star_ring *ring = &priv->rx_ring;
+
+	mtk_star_ring_free_skbs(priv, ring, mtk_star_dma_unmap_rx);
+}
+
+static void mtk_star_free_tx_skbs(struct mtk_star_priv *priv)
+{
+	struct mtk_star_ring *ring = &priv->tx_ring;
+
+	mtk_star_ring_free_skbs(priv, ring, mtk_star_dma_unmap_tx);
+}
+
+/* All processing for TX and RX happens in the napi poll callback. */
+static irqreturn_t mtk_star_handle_irq(int irq, void *data)
+{
+	struct mtk_star_priv *priv;
+	struct net_device *ndev;
+	bool need_napi = false;
+	unsigned int status;
+
+	ndev = data;
+	priv = netdev_priv(ndev);
+
+	if (netif_running(ndev)) {
+		status = mtk_star_intr_read(priv);
+
+		if (status & MTK_STAR_BIT_INT_STS_TNTC) {
+			mtk_star_intr_disable_tx(priv);
+			need_napi = true;
+		}
+
+		if (status & MTK_STAR_BIT_INT_STS_FNRC) {
+			mtk_star_intr_disable_rx(priv);
+			need_napi = true;
+		}
+
+		if (need_napi)
+			napi_schedule(&priv->napi);
+
+		/* One of the counters reached 0x8000000 - update stats and
+		 * reset all counters.
+		 */
+		if (unlikely(status & MTK_STAR_REG_INT_STS_MIB_CNT_TH)) {
+			mtk_star_intr_disable_stats(priv);
+			schedule_work(&priv->stats_work);
+		}
+
+		mtk_star_intr_ack_all(priv);
+	}
+
+	return IRQ_HANDLED;
+}
+
+/* Wait for the completion of any previous command - CMD_START bit must be
+ * cleared by hardware.
+ */
+static int mtk_star_hash_wait_cmd_start(struct mtk_star_priv *priv)
+{
+	unsigned int val;
+
+	return regmap_read_poll_timeout_atomic(priv->regs,
+				MTK_STAR_REG_HASH_CTRL, val,
+				!(val & MTK_STAR_BIT_HASH_CTRL_CMD_START),
+				10, MTK_STAR_WAIT_TIMEOUT);
+}
+
+static int mtk_star_hash_wait_ok(struct mtk_star_priv *priv)
+{
+	unsigned int val;
+	int ret;
+
+	/* Wait for BIST_DONE bit. */
+	ret = regmap_read_poll_timeout_atomic(priv->regs,
+					MTK_STAR_REG_HASH_CTRL, val,
+					val & MTK_STAR_BIT_HASH_CTRL_BIST_DONE,
+					10, MTK_STAR_WAIT_TIMEOUT);
+	if (ret)
+		return ret;
+
+	/* Check the BIST_OK bit. */
+	regmap_read(priv->regs, MTK_STAR_REG_HASH_CTRL, &val);
+	if (!(val & MTK_STAR_BIT_HASH_CTRL_BIST_OK))
+		return -EIO;
+
+	return 0;
+}
+
+static int mtk_star_set_hashbit(struct mtk_star_priv *priv,
+				unsigned int hash_addr)
+{
+	unsigned int val;
+	int ret;
+
+	ret = mtk_star_hash_wait_cmd_start(priv);
+	if (ret)
+		return ret;
+
+	val = hash_addr & MTK_STAR_MSK_HASH_CTRL_HASH_BIT_ADDR;
+	val |= MTK_STAR_BIT_HASH_CTRL_ACC_CMD;
+	val |= MTK_STAR_BIT_HASH_CTRL_CMD_START;
+	val |= MTK_STAR_BIT_HASH_CTRL_BIST_EN;
+	val |= MTK_STAR_BIT_HASH_CTRL_HASH_BIT_DATA;
+	regmap_write(priv->regs, MTK_STAR_REG_HASH_CTRL, val);
+
+	return mtk_star_hash_wait_ok(priv);
+}
+
+static int mtk_star_reset_hash_table(struct mtk_star_priv *priv)
+{
+	int ret;
+
+	ret = mtk_star_hash_wait_cmd_start(priv);
+	if (ret)
+		return ret;
+
+	regmap_update_bits(priv->regs, MTK_STAR_REG_HASH_CTRL,
+			   MTK_STAR_BIT_HASH_CTRL_BIST_EN,
+			   MTK_STAR_BIT_HASH_CTRL_BIST_EN);
+	regmap_update_bits(priv->regs, MTK_STAR_REG_TEST1,
+			   MTK_STAR_BIT_TEST1_RST_HASH_MBIST,
+			   MTK_STAR_BIT_TEST1_RST_HASH_MBIST);
+
+	return mtk_star_hash_wait_ok(priv);
+}
+
+static void mtk_star_phy_config(struct mtk_star_priv *priv)
+{
+	unsigned int val;
+
+	if (priv->speed == SPEED_1000)
+		val = MTK_STAR_VAL_PHY_CTRL1_FORCE_SPD_1000M;
+	else if (priv->speed == SPEED_100)
+		val = MTK_STAR_VAL_PHY_CTRL1_FORCE_SPD_100M;
+	else
+		val = MTK_STAR_VAL_PHY_CTRL1_FORCE_SPD_10M;
+	val <<= MTK_STAR_OFF_PHY_CTRL1_FORCE_SPD;
+
+	val |= MTK_STAR_BIT_PHY_CTRL1_AN_EN;
+	val |= MTK_STAR_BIT_PHY_CTRL1_FORCE_FC_RX;
+	val |= MTK_STAR_BIT_PHY_CTRL1_FORCE_FC_TX;
+	/* Only full-duplex supported for now. */
+	val |= MTK_STAR_BIT_PHY_CTRL1_FORCE_DPX;
+
+	regmap_write(priv->regs, MTK_STAR_REG_PHY_CTRL1, val);
+
+	if (priv->pause) {
+		val = MTK_STAR_VAL_FC_CFG_SEND_PAUSE_TH_2K;
+		val <<= MTK_STAR_OFF_FC_CFG_SEND_PAUSE_TH;
+		val |= MTK_STAR_BIT_FC_CFG_UC_PAUSE_DIR;
+	} else {
+		val = 0;
+	}
+
+	regmap_update_bits(priv->regs, MTK_STAR_REG_FC_CFG,
+			   MTK_STAR_MSK_FC_CFG_SEND_PAUSE_TH |
+			   MTK_STAR_BIT_FC_CFG_UC_PAUSE_DIR, val);
+
+	if (priv->pause) {
+		val = MTK_STAR_VAL_EXT_CFG_SND_PAUSE_RLS_1K;
+		val <<= MTK_STAR_OFF_EXT_CFG_SND_PAUSE_RLS;
+	} else {
+		val = 0;
+	}
+
+	regmap_update_bits(priv->regs, MTK_STAR_REG_EXT_CFG,
+			   MTK_STAR_MSK_EXT_CFG_SND_PAUSE_RLS, val);
+}
+
+static void mtk_star_adjust_link(struct net_device *ndev)
+{
+	struct mtk_star_priv *priv = netdev_priv(ndev);
+	struct phy_device *phydev = priv->phydev;
+	bool new_state = false;
+
+	if (phydev->link) {
+		if (!priv->link) {
+			priv->link = phydev->link;
+			new_state = true;
+		}
+
+		if (priv->speed != phydev->speed) {
+			priv->speed = phydev->speed;
+			new_state = true;
+		}
+
+		if (priv->pause != phydev->pause) {
+			priv->pause = phydev->pause;
+			new_state = true;
+		}
+	} else {
+		if (priv->link) {
+			priv->link = phydev->link;
+			new_state = true;
+		}
+	}
+
+	if (new_state) {
+		if (phydev->link)
+			mtk_star_phy_config(priv);
+
+		phy_print_status(ndev->phydev);
+	}
+}
+
+static void mtk_star_init_config(struct mtk_star_priv *priv)
+{
+	unsigned int val;
+
+	val = (MTK_STAR_BIT_MII_PAD_OUT_ENABLE |
+	       MTK_STAR_BIT_EXT_MDC_MODE |
+	       MTK_STAR_BIT_SWC_MII_MODE);
+
+	regmap_write(priv->regs, MTK_STAR_REG_SYS_CONF, val);
+	regmap_update_bits(priv->regs, MTK_STAR_REG_MAC_CLK_CONF,
+			   MTK_STAR_MSK_MAC_CLK_CONF,
+			   MTK_STAR_BIT_CLK_DIV_10);
+}
+
+static void mtk_star_set_mode_rmii(struct mtk_star_priv *priv)
+{
+	regmap_update_bits(priv->pericfg, MTK_PERICFG_REG_NIC_CFG_CON,
+			   MTK_PERICFG_MSK_NIC_CFG_CON_CFG_MII,
+			   MTK_PERICFG_BIT_NIC_CFG_CON_RMII);
+}
+
+static int mtk_star_enable(struct net_device *ndev)
+{
+	struct mtk_star_priv *priv = netdev_priv(ndev);
+	unsigned int val;
+	int ret;
+
+	mtk_star_nic_disable_pd(priv);
+	mtk_star_intr_disable(priv);
+	mtk_star_dma_stop(priv);
+
+	mtk_star_set_mac_addr(ndev);
+
+	/* Configure the MAC */
+	val = MTK_STAR_VAL_MAC_CFG_IPG_96BIT;
+	val <<= MTK_STAR_OFF_MAC_CFG_IPG;
+	val |= MTK_STAR_BIT_MAC_CFG_MAXLEN_1522;
+	val |= MTK_STAR_BIT_MAC_CFG_AUTO_PAD;
+	val |= MTK_STAR_BIT_MAC_CFG_CRC_STRIP;
+	regmap_write(priv->regs, MTK_STAR_REG_MAC_CFG, val);
+
+	/* Enable Hash Table BIST and reset it */
+	ret = mtk_star_reset_hash_table(priv);
+	if (ret)
+		return ret;
+
+	/* Setup the hashing algorithm */
+	regmap_update_bits(priv->regs, MTK_STAR_REG_ARL_CFG,
+			   MTK_STAR_BIT_ARL_CFG_HASH_ALG |
+			   MTK_STAR_BIT_ARL_CFG_MISC_MODE, 0);
+
+	/* Don't strip VLAN tags */
+	regmap_update_bits(priv->regs, MTK_STAR_REG_MAC_CFG,
+			   MTK_STAR_BIT_MAC_CFG_VLAN_STRIP, 0);
+
+	/* Setup DMA */
+	mtk_star_dma_init(priv);
+
+	ret = mtk_star_prepare_rx_skbs(ndev);
+	if (ret)
+		goto err_out;
+
+	/* Request the interrupt */
+	ret = request_irq(ndev->irq, mtk_star_handle_irq,
+			  IRQF_TRIGGER_FALLING, ndev->name, ndev);
+	if (ret)
+		goto err_free_skbs;
+
+	napi_enable(&priv->napi);
+
+	mtk_star_intr_ack_all(priv);
+	mtk_star_intr_enable(priv);
+
+	/* Connect to and start PHY */
+	priv->phydev = of_phy_connect(ndev, priv->phy_node,
+				      mtk_star_adjust_link, 0, priv->phy_intf);
+	if (!priv->phydev) {
+		netdev_err(ndev, "failed to connect to PHY\n");
+		goto err_free_irq;
+	}
+
+	mtk_star_dma_start(priv);
+	phy_start(priv->phydev);
+	netif_start_queue(ndev);
+
+	return 0;
+
+err_free_irq:
+	free_irq(ndev->irq, ndev);
+err_free_skbs:
+	mtk_star_free_rx_skbs(priv);
+err_out:
+	return ret;
+}
+
+static void mtk_star_disable(struct net_device *ndev)
+{
+	struct mtk_star_priv *priv = netdev_priv(ndev);
+
+	netif_stop_queue(ndev);
+	napi_disable(&priv->napi);
+	mtk_star_intr_disable(priv);
+	mtk_star_dma_disable(priv);
+	mtk_star_intr_ack_all(priv);
+	phy_stop(priv->phydev);
+	phy_disconnect(priv->phydev);
+	free_irq(ndev->irq, ndev);
+	mtk_star_free_rx_skbs(priv);
+	mtk_star_free_tx_skbs(priv);
+}
+
+static int mtk_star_netdev_open(struct net_device *ndev)
+{
+	return mtk_star_enable(ndev);
+}
+
+static int mtk_star_netdev_stop(struct net_device *ndev)
+{
+	mtk_star_disable(ndev);
+
+	return 0;
+}
+
+static int mtk_star_netdev_ioctl(struct net_device *ndev,
+				 struct ifreq *req, int cmd)
+{
+	if (!netif_running(ndev))
+		return -EINVAL;
+
+	return phy_mii_ioctl(ndev->phydev, req, cmd);
+}
+
+static int mtk_star_netdev_start_xmit(struct sk_buff *skb,
+				      struct net_device *ndev)
+{
+	struct mtk_star_priv *priv = netdev_priv(ndev);
+	struct mtk_star_ring *ring = &priv->tx_ring;
+	struct device *dev = mtk_star_get_dev(priv);
+	struct mtk_star_ring_desc_data desc_data;
+
+	desc_data.dma_addr = mtk_star_dma_map_tx(priv, skb);
+	if (dma_mapping_error(dev, desc_data.dma_addr))
+		goto err_drop_packet;
+
+	desc_data.skb = skb;
+	desc_data.len = skb->len;
+
+	spin_lock_bh(&priv->lock);
+
+	mtk_star_ring_push_head_tx(ring, &desc_data);
+
+	netdev_sent_queue(ndev, skb->len);
+
+	if (mtk_star_ring_full(ring))
+		netif_stop_queue(ndev);
+
+	spin_unlock_bh(&priv->lock);
+
+	mtk_star_dma_resume_tx(priv);
+
+	return NETDEV_TX_OK;
+
+err_drop_packet:
+	dev_kfree_skb(skb);
+	ndev->stats.tx_dropped++;
+	return NETDEV_TX_BUSY;
+}
+
+/* Returns the number of bytes sent or a negative number on the first
+ * descriptor owned by DMA.
+ */
+static int mtk_star_tx_complete_one(struct mtk_star_priv *priv)
+{
+	struct mtk_star_ring *ring = &priv->tx_ring;
+	struct mtk_star_ring_desc_data desc_data;
+	int ret;
+
+	ret = mtk_star_ring_pop_tail(ring, &desc_data);
+	if (ret)
+		return ret;
+
+	mtk_star_dma_unmap_tx(priv, &desc_data);
+	ret = desc_data.skb->len;
+	dev_kfree_skb_irq(desc_data.skb);
+
+	return ret;
+}
+
+static void mtk_star_tx_complete_all(struct mtk_star_priv *priv)
+{
+	struct mtk_star_ring *ring = &priv->tx_ring;
+	struct net_device *ndev = priv->ndev;
+	int ret, pkts_compl, bytes_compl;
+	bool wake = false;
+
+	spin_lock(&priv->lock);
+
+	for (pkts_compl = 0, bytes_compl = 0;;
+	     pkts_compl++, bytes_compl += ret, wake = true) {
+		if (!mtk_star_ring_descs_available(ring))
+			break;
+
+		ret = mtk_star_tx_complete_one(priv);
+		if (ret < 0)
+			break;
+	}
+
+	netdev_completed_queue(ndev, pkts_compl, bytes_compl);
+
+	if (wake && netif_queue_stopped(ndev))
+		netif_wake_queue(ndev);
+
+	mtk_star_intr_enable_tx(priv);
+
+	spin_unlock(&priv->lock);
+}
+
+static void mtk_star_netdev_get_stats64(struct net_device *ndev,
+					struct rtnl_link_stats64 *stats)
+{
+	struct mtk_star_priv *priv = netdev_priv(ndev);
+
+	mtk_star_update_stats(priv);
+
+	memcpy(stats, &priv->stats, sizeof(*stats));
+}
+
+static void mtk_star_set_rx_mode(struct net_device *ndev)
+{
+	struct mtk_star_priv *priv = netdev_priv(ndev);
+	struct netdev_hw_addr *hw_addr;
+	unsigned int hash_addr, i;
+	int ret;
+
+	if (ndev->flags & IFF_PROMISC) {
+		regmap_update_bits(priv->regs, MTK_STAR_REG_ARL_CFG,
+				   MTK_STAR_BIT_ARL_CFG_MISC_MODE,
+				   MTK_STAR_BIT_ARL_CFG_MISC_MODE);
+	} else if (netdev_mc_count(ndev) > MTK_STAR_HASHTABLE_MC_LIMIT ||
+		   ndev->flags & IFF_ALLMULTI) {
+		for (i = 0; i < MTK_STAR_HASHTABLE_SIZE_MAX; i++) {
+			ret = mtk_star_set_hashbit(priv, i);
+			if (ret)
+				goto hash_fail;
+		}
+	} else {
+		/* Clear previous settings. */
+		ret = mtk_star_reset_hash_table(priv);
+		if (ret)
+			goto hash_fail;
+
+		netdev_for_each_mc_addr(hw_addr, ndev) {
+			hash_addr = (hw_addr->addr[0] & 0x01) << 8;
+			hash_addr += hw_addr->addr[5];
+			ret = mtk_star_set_hashbit(priv, hash_addr);
+			if (ret)
+				goto hash_fail;
+		}
+	}
+
+	return;
+
+hash_fail:
+	if (ret == -ETIMEDOUT)
+		netdev_err(ndev, "setting hash bit timed out\n");
+	else
+		/* Should be -EIO */
+		netdev_err(ndev, "unable to set hash bit");
+}
+
+static const struct net_device_ops mtk_star_netdev_ops = {
+	.ndo_open		= mtk_star_netdev_open,
+	.ndo_stop		= mtk_star_netdev_stop,
+	.ndo_start_xmit		= mtk_star_netdev_start_xmit,
+	.ndo_get_stats64	= mtk_star_netdev_get_stats64,
+	.ndo_set_rx_mode	= mtk_star_set_rx_mode,
+	.ndo_do_ioctl		= mtk_star_netdev_ioctl,
+	.ndo_set_mac_address	= eth_mac_addr,
+	.ndo_validate_addr	= eth_validate_addr,
+};
+
+static void mtk_star_get_drvinfo(struct net_device *dev,
+				 struct ethtool_drvinfo *info)
+{
+	strlcpy(info->driver, MTK_STAR_DRVNAME, sizeof(info->driver));
+}
+
+/* TODO Add ethtool stats. */
+static const struct ethtool_ops mtk_star_ethtool_ops = {
+	.get_drvinfo		= mtk_star_get_drvinfo,
+	.get_link		= ethtool_op_get_link,
+	.get_link_ksettings	= phy_ethtool_get_link_ksettings,
+	.set_link_ksettings	= phy_ethtool_set_link_ksettings,
+};
+
+static int mtk_star_receive_packet(struct mtk_star_priv *priv)
+{
+	struct mtk_star_ring *ring = &priv->rx_ring;
+	struct device *dev = mtk_star_get_dev(priv);
+	struct mtk_star_ring_desc_data desc_data;
+	struct net_device *ndev = priv->ndev;
+	struct sk_buff *curr_skb, *new_skb;
+	dma_addr_t new_dma_addr;
+	int ret;
+
+	spin_lock(&priv->lock);
+	ret = mtk_star_ring_pop_tail(ring, &desc_data);
+	spin_unlock(&priv->lock);
+	if (ret)
+		return -1;
+
+	curr_skb = desc_data.skb;
+
+	if ((desc_data.flags & MTK_STAR_DESC_BIT_RX_CRCE) ||
+	    (desc_data.flags & MTK_STAR_DESC_BIT_RX_OSIZE)) {
+		/* Error packet -> drop and reuse skb. */
+		new_skb = curr_skb;
+		goto push_new_skb;
+	}
+
+	/* Prepare new skb before receiving the current one. Reuse the current
+	 * skb if we fail at any point.
+	 */
+	new_skb = mtk_star_alloc_skb(ndev);
+	if (!new_skb) {
+		ndev->stats.rx_dropped++;
+		new_skb = curr_skb;
+		goto push_new_skb;
+	}
+
+	new_dma_addr = mtk_star_dma_map_rx(priv, new_skb);
+	if (dma_mapping_error(dev, new_dma_addr)) {
+		ndev->stats.rx_dropped++;
+		dev_kfree_skb(new_skb);
+		new_skb = curr_skb;
+		netdev_err(ndev, "DMA mapping error of RX descriptor\n");
+		goto push_new_skb;
+	}
+
+	/* We can't fail anymore at this point: it's safe to unmap the skb. */
+	mtk_star_dma_unmap_rx(priv, &desc_data);
+
+	skb_put(desc_data.skb, desc_data.len);
+	desc_data.skb->ip_summed = CHECKSUM_NONE;
+	desc_data.skb->protocol = eth_type_trans(desc_data.skb, ndev);
+	desc_data.skb->dev = ndev;
+	netif_receive_skb(desc_data.skb);
+
+push_new_skb:
+	desc_data.dma_addr = new_dma_addr;
+	desc_data.len = skb_tailroom(new_skb);
+	desc_data.skb = new_skb;
+
+	spin_lock(&priv->lock);
+	mtk_star_ring_push_head_rx(ring, &desc_data);
+	spin_unlock(&priv->lock);
+
+	return 0;
+}
+
+static int mtk_star_process_rx(struct mtk_star_priv *priv, int budget)
+{
+	int received, ret;
+
+	for (received = 0, ret = 0; received < budget && ret == 0; received++)
+		ret = mtk_star_receive_packet(priv);
+
+	mtk_star_dma_resume_rx(priv);
+
+	return received;
+}
+
+static int mtk_star_poll(struct napi_struct *napi, int budget)
+{
+	struct mtk_star_priv *priv;
+	int received = 0;
+
+	priv = container_of(napi, struct mtk_star_priv, napi);
+
+	/* Clean-up all TX descriptors. */
+	mtk_star_tx_complete_all(priv);
+	/* Receive up to $budget packets. */
+	received = mtk_star_process_rx(priv, budget);
+
+	if (received < budget) {
+		napi_complete_done(napi, received);
+		mtk_star_intr_enable_rx(priv);
+	}
+
+	return received;
+}
+
+static void mtk_star_mdio_rwok_clear(struct mtk_star_priv *priv)
+{
+	regmap_write(priv->regs, MTK_STAR_REG_PHY_CTRL0,
+		     MTK_STAR_BIT_PHY_CTRL0_RWOK);
+}
+
+static int mtk_star_mdio_rwok_wait(struct mtk_star_priv *priv)
+{
+	unsigned int val;
+
+	return regmap_read_poll_timeout(priv->regs, MTK_STAR_REG_PHY_CTRL0,
+					val, val & MTK_STAR_BIT_PHY_CTRL0_RWOK,
+					10, MTK_STAR_WAIT_TIMEOUT);
+}
+
+static int mtk_star_mdio_read(struct mii_bus *mii, int phy_id, int regnum)
+{
+	struct mtk_star_priv *priv = mii->priv;
+	unsigned int val, data;
+	int ret;
+
+	if (regnum & MII_ADDR_C45)
+		return -EOPNOTSUPP;
+
+	mtk_star_mdio_rwok_clear(priv);
+
+	val = (regnum << MTK_STAR_OFF_PHY_CTRL0_PREG);
+	val &= MTK_STAR_MSK_PHY_CTRL0_PREG;
+	val |= MTK_STAR_BIT_PHY_CTRL0_RDCMD;
+
+	regmap_write(priv->regs, MTK_STAR_REG_PHY_CTRL0, val);
+
+	ret = mtk_star_mdio_rwok_wait(priv);
+	if (ret)
+		return ret;
+
+	regmap_read(priv->regs, MTK_STAR_REG_PHY_CTRL0, &data);
+
+	data &= MTK_STAR_MSK_PHY_CTRL0_RWDATA;
+	data >>= MTK_STAR_OFF_PHY_CTRL0_RWDATA;
+
+	return data;
+}
+
+static int mtk_star_mdio_write(struct mii_bus *mii, int phy_id,
+			       int regnum, u16 data)
+{
+	struct mtk_star_priv *priv = mii->priv;
+	unsigned int val;
+
+	if (regnum & MII_ADDR_C45)
+		return -EOPNOTSUPP;
+
+	mtk_star_mdio_rwok_clear(priv);
+
+	val = data;
+	val <<= MTK_STAR_OFF_PHY_CTRL0_RWDATA;
+	val &= MTK_STAR_MSK_PHY_CTRL0_RWDATA;
+	regnum <<= MTK_STAR_OFF_PHY_CTRL0_PREG;
+	regnum &= MTK_STAR_MSK_PHY_CTRL0_PREG;
+	val |= regnum;
+	val |= MTK_STAR_BIT_PHY_CTRL0_WTCMD;
+
+	regmap_write(priv->regs, MTK_STAR_REG_PHY_CTRL0, val);
+
+	return mtk_star_mdio_rwok_wait(priv);
+}
+
+static int mtk_star_mdio_init(struct net_device *ndev)
+{
+	struct mtk_star_priv *priv = netdev_priv(ndev);
+	struct device *dev = mtk_star_get_dev(priv);
+	struct device_node *of_node, *mdio_node;
+	int ret;
+
+	of_node = dev->of_node;
+
+	mdio_node = of_get_child_by_name(of_node, "mdio");
+	if (!mdio_node)
+		return -ENODEV;
+
+	if (!of_device_is_available(mdio_node)) {
+		ret = -ENODEV;
+		goto out_put_node;
+	}
+
+	priv->mii = devm_mdiobus_alloc(dev);
+	if (!priv->mii) {
+		ret = -ENOMEM;
+		goto out_put_node;
+	}
+
+	snprintf(priv->mii->id, MII_BUS_ID_SIZE, "%s", dev_name(dev));
+	priv->mii->name = "mtk-mac-mdio";
+	priv->mii->parent = dev;
+	priv->mii->read = mtk_star_mdio_read;
+	priv->mii->write = mtk_star_mdio_write;
+	priv->mii->priv = priv;
+
+	ret = of_mdiobus_register(priv->mii, mdio_node);
+
+out_put_node:
+	of_node_put(mdio_node);
+	return ret;
+}
+
+static int mtk_star_suspend(struct device *dev)
+{
+	struct mtk_star_priv *priv;
+	struct net_device *ndev;
+
+	ndev = dev_get_drvdata(dev);
+	priv = netdev_priv(ndev);
+
+	if (netif_running(ndev))
+		mtk_star_disable(ndev);
+
+	clk_bulk_disable_unprepare(MTK_STAR_NCLKS, priv->clks);
+
+	return 0;
+}
+
+static int mtk_star_resume(struct device *dev)
+{
+	struct mtk_star_priv *priv;
+	struct net_device *ndev;
+	int ret;
+
+	ndev = dev_get_drvdata(dev);
+	priv = netdev_priv(ndev);
+
+	ret = clk_bulk_prepare_enable(MTK_STAR_NCLKS, priv->clks);
+	if (ret)
+		return ret;
+
+	if (netif_running(ndev)) {
+		ret = mtk_star_enable(ndev);
+		if (ret)
+			clk_bulk_disable_unprepare(MTK_STAR_NCLKS, priv->clks);
+	}
+
+	return ret;
+}
+
+static void mtk_star_clk_disable_unprepare(void *data)
+{
+	struct mtk_star_priv *priv = data;
+
+	clk_bulk_disable_unprepare(MTK_STAR_NCLKS, priv->clks);
+}
+
+static void mtk_star_mdiobus_unregister(void *data)
+{
+	struct mtk_star_priv *priv = data;
+
+	mdiobus_unregister(priv->mii);
+}
+
+static void mtk_star_unregister_netdev(void *data)
+{
+	struct net_device *ndev = data;
+
+	unregister_netdev(ndev);
+}
+
+static int mtk_star_probe(struct platform_device *pdev)
+{
+	struct device_node *of_node;
+	struct mtk_star_priv *priv;
+	struct net_device *ndev;
+	struct device *dev;
+	void __iomem *base;
+	int ret, i;
+
+	dev = &pdev->dev;
+	of_node = dev->of_node;
+
+	ndev = devm_alloc_etherdev(dev, sizeof(*priv));
+	if (!ndev)
+		return -ENOMEM;
+
+	priv = netdev_priv(ndev);
+	priv->ndev = ndev;
+	SET_NETDEV_DEV(ndev, dev);
+	platform_set_drvdata(pdev, ndev);
+
+	ndev->min_mtu = ETH_ZLEN;
+	ndev->max_mtu = MTK_STAR_MAX_FRAME_SIZE;
+
+	spin_lock_init(&priv->lock);
+	INIT_WORK(&priv->stats_work, mtk_star_update_stats_work);
+
+	base = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(base))
+		return PTR_ERR(base);
+
+	/* We won't be checking the return values of regmap read & write
+	 * functions. They can only fail for mmio if there's a clock attached
+	 * to regmap which is not the case here.
+	 */
+	priv->regs = devm_regmap_init_mmio(dev, base,
+					   &mtk_star_regmap_config);
+	if (IS_ERR(priv->regs))
+		return PTR_ERR(priv->regs);
+
+	priv->pericfg = syscon_regmap_lookup_by_phandle(of_node,
+							"mediatek,pericfg");
+	if (IS_ERR(priv->pericfg)) {
+		dev_err(dev, "Failed to lookup the PERICFG syscon\n");
+		return PTR_ERR(priv->pericfg);
+	}
+
+	ndev->irq = platform_get_irq(pdev, 0);
+	if (ndev->irq < 0)
+		return ndev->irq;
+
+	for (i = 0; i < MTK_STAR_NCLKS; i++)
+		priv->clks[i].id = mtk_star_clk_names[i];
+	ret = devm_clk_bulk_get(dev, MTK_STAR_NCLKS, priv->clks);
+	if (ret)
+		return ret;
+
+	ret = clk_bulk_prepare_enable(MTK_STAR_NCLKS, priv->clks);
+	if (ret)
+		return ret;
+
+	ret = devm_add_action_or_reset(dev,
+				       mtk_star_clk_disable_unprepare, priv);
+	if (ret)
+		return ret;
+
+	ret = of_get_phy_mode(of_node, &priv->phy_intf);
+	if (ret) {
+		return ret;
+	} else if (priv->phy_intf != PHY_INTERFACE_MODE_RMII) {
+		dev_err(dev, "unsupported phy mode: %s\n",
+			phy_modes(priv->phy_intf));
+		return -EINVAL;
+	}
+
+	priv->phy_node = of_parse_phandle(of_node, "phy-handle", 0);
+	if (!priv->phy_node) {
+		dev_err(dev, "failed to retrieve the phy handle from device tree\n");
+		return -ENODEV;
+	}
+
+	mtk_star_set_mode_rmii(priv);
+
+	ret = dma_set_mask_and_coherent(dev, DMA_BIT_MASK(32));
+	if (ret) {
+		dev_err(dev, "unsupported DMA mask\n");
+		return ret;
+	}
+
+	priv->ring_base = dmam_alloc_coherent(dev, MTK_STAR_DMA_SIZE,
+					      &priv->dma_addr,
+					      GFP_KERNEL | GFP_DMA);
+	if (!priv->ring_base)
+		return -ENOMEM;
+
+	mtk_star_nic_disable_pd(priv);
+	mtk_star_init_config(priv);
+
+	ret = mtk_star_mdio_init(ndev);
+	if (ret)
+		return ret;
+
+	ret = devm_add_action_or_reset(dev, mtk_star_mdiobus_unregister, priv);
+	if (ret)
+		return ret;
+
+	ret = eth_platform_get_mac_address(dev, ndev->dev_addr);
+	if (ret || !is_valid_ether_addr(ndev->dev_addr))
+		eth_hw_addr_random(ndev);
+
+	ndev->netdev_ops = &mtk_star_netdev_ops;
+	ndev->ethtool_ops = &mtk_star_ethtool_ops;
+
+	netif_napi_add(ndev, &priv->napi, mtk_star_poll, MTK_STAR_NAPI_WEIGHT);
+
+	ret = register_netdev(ndev);
+	if (ret)
+		return ret;
+
+	ret = devm_add_action_or_reset(dev, mtk_star_unregister_netdev, ndev);
+	if (ret)
+		return ret;
+
+	return 0;
+}
+
+static const struct of_device_id mtk_star_of_match[] = {
+	{ .compatible = "mediatek,mt8516-eth", },
+	{ .compatible = "mediatek,mt8518-eth", },
+	{ .compatible = "mediatek,mt8175-eth", },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, mtk_star_of_match);
+
+static SIMPLE_DEV_PM_OPS(mtk_star_pm_ops,
+			 mtk_star_suspend, mtk_star_resume);
+
+static struct platform_driver mtk_star_driver = {
+	.driver = {
+		.name = MTK_STAR_DRVNAME,
+		.pm = &mtk_star_pm_ops,
+		.of_match_table = of_match_ptr(mtk_star_of_match),
+	},
+	.probe = mtk_star_probe,
+};
+module_platform_driver(mtk_star_driver);
+
+MODULE_AUTHOR("Bartosz Golaszewski <bgolaszewski@baylibre.com>");
+MODULE_DESCRIPTION("Mediatek STAR Ethernet MAC Driver");
+MODULE_LICENSE("GPL");
-- 
cgit v1.2.3-59-g8ed1b


From e56e82da172b4b17675c56dc156938d8f5f1b3c2 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Fri, 22 May 2020 14:06:56 +0200
Subject: ARM64: dts: mediatek: add pericfg syscon to mt8516.dtsi

This adds support for the PERICFG register range as a syscon. This will
soon be used by the MediaTek Ethernet MAC driver for NIC configuration.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm64/boot/dts/mediatek/mt8516.dtsi | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/arch/arm64/boot/dts/mediatek/mt8516.dtsi b/arch/arm64/boot/dts/mediatek/mt8516.dtsi
index 2f8adf042195..8cedaf74ae86 100644
--- a/arch/arm64/boot/dts/mediatek/mt8516.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8516.dtsi
@@ -191,6 +191,11 @@
 			#clock-cells = <1>;
 		};
 
+		pericfg: pericfg@10003050 {
+			compatible = "mediatek,mt8516-pericfg", "syscon";
+			reg = <0 0x10003050 0 0x1000>;
+		};
+
 		apmixedsys: apmixedsys@10018000 {
 			compatible = "mediatek,mt8516-apmixedsys", "syscon";
 			reg = <0 0x10018000 0 0x710>;
-- 
cgit v1.2.3-59-g8ed1b


From fe3b2107ae91f1c63047bafb4303c67fef1780ae Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Fri, 22 May 2020 14:06:57 +0200
Subject: ARM64: dts: mediatek: add the ethernet node to mt8516.dtsi

Add the Ethernet MAC node to mt8516.dtsi. This defines parameters common
to all the boards based on this SoC.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm64/boot/dts/mediatek/mt8516.dtsi | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/arch/arm64/boot/dts/mediatek/mt8516.dtsi b/arch/arm64/boot/dts/mediatek/mt8516.dtsi
index 8cedaf74ae86..89af661e7f63 100644
--- a/arch/arm64/boot/dts/mediatek/mt8516.dtsi
+++ b/arch/arm64/boot/dts/mediatek/mt8516.dtsi
@@ -406,6 +406,18 @@
 			status = "disabled";
 		};
 
+		ethernet: ethernet@11180000 {
+			compatible = "mediatek,mt8516-eth";
+			reg = <0 0x11180000 0 0x1000>;
+			mediatek,pericfg = <&pericfg>;
+			interrupts = <GIC_SPI 111 IRQ_TYPE_LEVEL_LOW>;
+			clocks = <&topckgen CLK_TOP_RG_ETH>,
+				 <&topckgen CLK_TOP_66M_ETH>,
+				 <&topckgen CLK_TOP_133M_ETH>;
+			clock-names = "core", "reg", "trans";
+			status = "disabled";
+		};
+
 		rng: rng@1020c000 {
 			compatible = "mediatek,mt8516-rng",
 				     "mediatek,mt7623-rng";
-- 
cgit v1.2.3-59-g8ed1b


From b3f5758f0d59b51a95d13724c9627bce77eff139 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Fri, 22 May 2020 14:06:58 +0200
Subject: ARM64: dts: mediatek: add an alias for ethernet0 for pumpkin boards

Add the ethernet0 alias for ethernet so that u-boot can find this node
and fill in the MAC address.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm64/boot/dts/mediatek/pumpkin-common.dtsi | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm64/boot/dts/mediatek/pumpkin-common.dtsi b/arch/arm64/boot/dts/mediatek/pumpkin-common.dtsi
index a31093d7142b..97d9b000c37e 100644
--- a/arch/arm64/boot/dts/mediatek/pumpkin-common.dtsi
+++ b/arch/arm64/boot/dts/mediatek/pumpkin-common.dtsi
@@ -9,6 +9,7 @@
 / {
 	aliases {
 		serial0 = &uart0;
+		ethernet0 = &ethernet;
 	};
 
 	chosen {
-- 
cgit v1.2.3-59-g8ed1b


From 7c20f7f36e6a3a93dcdb9cb5d790494cd7f22a1b Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Fri, 22 May 2020 14:06:59 +0200
Subject: ARM64: dts: mediatek: add ethernet pins for pumpkin boards

Setup the pin control for the Ethernet MAC.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm64/boot/dts/mediatek/pumpkin-common.dtsi | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/arch/arm64/boot/dts/mediatek/pumpkin-common.dtsi b/arch/arm64/boot/dts/mediatek/pumpkin-common.dtsi
index 97d9b000c37e..4b1d5f69aba6 100644
--- a/arch/arm64/boot/dts/mediatek/pumpkin-common.dtsi
+++ b/arch/arm64/boot/dts/mediatek/pumpkin-common.dtsi
@@ -219,4 +219,19 @@
 			bias-pull-up;
 		};
 	};
+
+	ethernet_pins_default: ethernet {
+		pins_ethernet {
+			pinmux = <MT8516_PIN_0_EINT0__FUNC_EXT_TXD0>,
+				 <MT8516_PIN_1_EINT1__FUNC_EXT_TXD1>,
+				 <MT8516_PIN_5_EINT5__FUNC_EXT_RXER>,
+				 <MT8516_PIN_6_EINT6__FUNC_EXT_RXC>,
+				 <MT8516_PIN_7_EINT7__FUNC_EXT_RXDV>,
+				 <MT8516_PIN_8_EINT8__FUNC_EXT_RXD0>,
+				 <MT8516_PIN_9_EINT9__FUNC_EXT_RXD1>,
+				 <MT8516_PIN_12_EINT12__FUNC_EXT_TXEN>,
+				 <MT8516_PIN_38_MRG_DI__FUNC_EXT_MDIO>,
+				 <MT8516_PIN_39_MRG_DO__FUNC_EXT_MDC>;
+		};
+	};
 };
-- 
cgit v1.2.3-59-g8ed1b


From 4e4ad6862de5dda0174f3a60c98c8cded0beb64d Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Fri, 22 May 2020 14:07:00 +0200
Subject: ARM64: dts: mediatek: enable ethernet on pumpkin boards

Add remaining properties to the ethernet node and enable it.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm64/boot/dts/mediatek/pumpkin-common.dtsi | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/arch/arm64/boot/dts/mediatek/pumpkin-common.dtsi b/arch/arm64/boot/dts/mediatek/pumpkin-common.dtsi
index 4b1d5f69aba6..dfceffe6950a 100644
--- a/arch/arm64/boot/dts/mediatek/pumpkin-common.dtsi
+++ b/arch/arm64/boot/dts/mediatek/pumpkin-common.dtsi
@@ -167,6 +167,24 @@
 	status = "okay";
 };
 
+&ethernet {
+	pinctrl-names = "default";
+	pinctrl-0 = <&ethernet_pins_default>;
+	phy-handle = <&eth_phy>;
+	phy-mode = "rmii";
+	mac-address = [00 00 00 00 00 00];
+	status = "okay";
+
+	mdio {
+		#address-cells = <1>;
+		#size-cells = <0>;
+
+		eth_phy: ethernet-phy@0 {
+			reg = <0>;
+		};
+	};
+};
+
 &usb0 {
 	status = "okay";
 	dr_mode = "peripheral";
-- 
cgit v1.2.3-59-g8ed1b


From e7bb18e6c8b7ed35746af8c7c708447a66ee385c Mon Sep 17 00:00:00 2001
From: Vadim Fedorenko <vfedorenko@novek.ru>
Date: Wed, 20 May 2020 18:21:35 +0300
Subject: ip6_tunnel: simplify transmit path

Merge ip{4,6}ip6_tnl_xmit functions into one universal
ipxip6_tnl_xmit in preparation for adding MPLS support.

Signed-off-by: Vadim Fedorenko <vfedorenko@novek.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_tunnel.c | 182 ++++++++++++++++++++++----------------------------
 1 file changed, 79 insertions(+), 103 deletions(-)

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 4703b09808d0..dae6f7146b49 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1253,22 +1253,22 @@ tx_err_dst_release:
 EXPORT_SYMBOL(ip6_tnl_xmit);
 
 static inline int
-ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
+ipxip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev,
+		u8 protocol)
 {
 	struct ip6_tnl *t = netdev_priv(dev);
+	struct ipv6hdr *ipv6h;
 	const struct iphdr  *iph;
 	int encap_limit = -1;
+	__u16 offset;
 	struct flowi6 fl6;
-	__u8 dsfield;
+	__u8 dsfield, orig_dsfield;
 	__u32 mtu;
 	u8 tproto;
 	int err;
 
-	iph = ip_hdr(skb);
-	memset(&(IPCB(skb)->opt), 0, sizeof(IPCB(skb)->opt));
-
 	tproto = READ_ONCE(t->parms.proto);
-	if (tproto != IPPROTO_IPIP && tproto != 0)
+	if (tproto != protocol && tproto != 0)
 		return -1;
 
 	if (t->parms.collect_md) {
@@ -1281,129 +1281,101 @@ ip4ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
 			return -1;
 		key = &tun_info->key;
 		memset(&fl6, 0, sizeof(fl6));
-		fl6.flowi6_proto = IPPROTO_IPIP;
+		fl6.flowi6_proto = protocol;
 		fl6.saddr = key->u.ipv6.src;
 		fl6.daddr = key->u.ipv6.dst;
 		fl6.flowlabel = key->label;
 		dsfield =  key->tos;
+		switch (protocol) {
+		case IPPROTO_IPIP:
+			iph = ip_hdr(skb);
+			orig_dsfield = ipv4_get_dsfield(iph);
+			break;
+		case IPPROTO_IPV6:
+			ipv6h = ipv6_hdr(skb);
+			orig_dsfield = ipv6_get_dsfield(ipv6h);
+			break;
+		default:
+			orig_dsfield = dsfield;
+			break;
+		}
 	} else {
 		if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT))
 			encap_limit = t->parms.encap_limit;
+		if (protocol == IPPROTO_IPV6) {
+			offset = ip6_tnl_parse_tlv_enc_lim(skb,
+						skb_network_header(skb));
+			/* ip6_tnl_parse_tlv_enc_lim() might have
+			 * reallocated skb->head
+			 */
+			if (offset > 0) {
+				struct ipv6_tlv_tnl_enc_lim *tel;
 
-		memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
-		fl6.flowi6_proto = IPPROTO_IPIP;
-
-		if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
-			dsfield = ipv4_get_dsfield(iph);
-		else
-			dsfield = ip6_tclass(t->parms.flowinfo);
-		if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
-			fl6.flowi6_mark = skb->mark;
-		else
-			fl6.flowi6_mark = t->parms.fwmark;
-	}
-
-	fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
-	dsfield = INET_ECN_encapsulate(dsfield, ipv4_get_dsfield(iph));
-
-	if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
-		return -1;
-
-	skb_set_inner_ipproto(skb, IPPROTO_IPIP);
-
-	err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
-			   IPPROTO_IPIP);
-	if (err != 0) {
-		/* XXX: send ICMP error even if DF is not set. */
-		if (err == -EMSGSIZE)
-			icmp_send(skb, ICMP_DEST_UNREACH, ICMP_FRAG_NEEDED,
-				  htonl(mtu));
-		return -1;
-	}
-
-	return 0;
-}
-
-static inline int
-ip6ip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev)
-{
-	struct ip6_tnl *t = netdev_priv(dev);
-	struct ipv6hdr *ipv6h;
-	int encap_limit = -1;
-	__u16 offset;
-	struct flowi6 fl6;
-	__u8 dsfield;
-	__u32 mtu;
-	u8 tproto;
-	int err;
-
-	ipv6h = ipv6_hdr(skb);
-	tproto = READ_ONCE(t->parms.proto);
-	if ((tproto != IPPROTO_IPV6 && tproto != 0) ||
-	    ip6_tnl_addr_conflict(t, ipv6h))
-		return -1;
-
-	if (t->parms.collect_md) {
-		struct ip_tunnel_info *tun_info;
-		const struct ip_tunnel_key *key;
-
-		tun_info = skb_tunnel_info(skb);
-		if (unlikely(!tun_info || !(tun_info->mode & IP_TUNNEL_INFO_TX) ||
-			     ip_tunnel_info_af(tun_info) != AF_INET6))
-			return -1;
-		key = &tun_info->key;
-		memset(&fl6, 0, sizeof(fl6));
-		fl6.flowi6_proto = IPPROTO_IPV6;
-		fl6.saddr = key->u.ipv6.src;
-		fl6.daddr = key->u.ipv6.dst;
-		fl6.flowlabel = key->label;
-		dsfield = key->tos;
-	} else {
-		offset = ip6_tnl_parse_tlv_enc_lim(skb, skb_network_header(skb));
-		/* ip6_tnl_parse_tlv_enc_lim() might have reallocated skb->head */
-		ipv6h = ipv6_hdr(skb);
-		if (offset > 0) {
-			struct ipv6_tlv_tnl_enc_lim *tel;
-
-			tel = (void *)&skb_network_header(skb)[offset];
-			if (tel->encap_limit == 0) {
-				icmpv6_send(skb, ICMPV6_PARAMPROB,
-					    ICMPV6_HDR_FIELD, offset + 2);
-				return -1;
+				tel = (void *)&skb_network_header(skb)[offset];
+				if (tel->encap_limit == 0) {
+					icmpv6_send(skb, ICMPV6_PARAMPROB,
+						ICMPV6_HDR_FIELD, offset + 2);
+					return -1;
+				}
+				encap_limit = tel->encap_limit - 1;
 			}
-			encap_limit = tel->encap_limit - 1;
-		} else if (!(t->parms.flags & IP6_TNL_F_IGN_ENCAP_LIMIT)) {
-			encap_limit = t->parms.encap_limit;
 		}
 
 		memcpy(&fl6, &t->fl.u.ip6, sizeof(fl6));
-		fl6.flowi6_proto = IPPROTO_IPV6;
+		fl6.flowi6_proto = protocol;
 
-		if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
-			dsfield = ipv6_get_dsfield(ipv6h);
-		else
-			dsfield = ip6_tclass(t->parms.flowinfo);
-		if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
-			fl6.flowlabel |= ip6_flowlabel(ipv6h);
 		if (t->parms.flags & IP6_TNL_F_USE_ORIG_FWMARK)
 			fl6.flowi6_mark = skb->mark;
 		else
 			fl6.flowi6_mark = t->parms.fwmark;
+		switch (protocol) {
+		case IPPROTO_IPIP:
+			iph = ip_hdr(skb);
+			orig_dsfield = ipv4_get_dsfield(iph);
+			if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
+				dsfield = orig_dsfield;
+			else
+				dsfield = ip6_tclass(t->parms.flowinfo);
+			break;
+		case IPPROTO_IPV6:
+			ipv6h = ipv6_hdr(skb);
+			orig_dsfield = ipv6_get_dsfield(ipv6h);
+			if (t->parms.flags & IP6_TNL_F_USE_ORIG_TCLASS)
+				dsfield = orig_dsfield;
+			else
+				dsfield = ip6_tclass(t->parms.flowinfo);
+			if (t->parms.flags & IP6_TNL_F_USE_ORIG_FLOWLABEL)
+				fl6.flowlabel |= ip6_flowlabel(ipv6h);
+			break;
+		default:
+			break;
+		}
 	}
 
 	fl6.flowi6_uid = sock_net_uid(dev_net(dev), NULL);
-	dsfield = INET_ECN_encapsulate(dsfield, ipv6_get_dsfield(ipv6h));
+	dsfield = INET_ECN_encapsulate(dsfield, orig_dsfield);
 
 	if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
 		return -1;
 
-	skb_set_inner_ipproto(skb, IPPROTO_IPV6);
+	skb_set_inner_ipproto(skb, protocol);
 
 	err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
-			   IPPROTO_IPV6);
+			   protocol);
 	if (err != 0) {
+		/* XXX: send ICMP error even if DF is not set. */
 		if (err == -EMSGSIZE)
-			icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+			switch (protocol) {
+			case IPPROTO_IPIP:
+				icmp_send(skb, ICMP_DEST_UNREACH,
+					  ICMP_FRAG_NEEDED, htonl(mtu));
+				break;
+			case IPPROTO_IPV6:
+				icmpv6_send(skb, ICMPV6_PKT_TOOBIG, 0, mtu);
+				break;
+			default:
+				break;
+			}
 		return -1;
 	}
 
@@ -1415,6 +1387,7 @@ ip6_tnl_start_xmit(struct sk_buff *skb, struct net_device *dev)
 {
 	struct ip6_tnl *t = netdev_priv(dev);
 	struct net_device_stats *stats = &t->dev->stats;
+	u8 ipproto;
 	int ret;
 
 	if (!pskb_inet_may_pull(skb))
@@ -1422,15 +1395,18 @@ ip6_tnl_start_xmit(struct sk_buff *skb, struct net_device *dev)
 
 	switch (skb->protocol) {
 	case htons(ETH_P_IP):
-		ret = ip4ip6_tnl_xmit(skb, dev);
+		ipproto = IPPROTO_IPIP;
 		break;
 	case htons(ETH_P_IPV6):
-		ret = ip6ip6_tnl_xmit(skb, dev);
+		if (ip6_tnl_addr_conflict(t, ipv6_hdr(skb)))
+			goto tx_err;
+		ipproto = IPPROTO_IPV6;
 		break;
 	default:
 		goto tx_err;
 	}
 
+	ret = ipxip6_tnl_xmit(skb, dev, ipproto);
 	if (ret < 0)
 		goto tx_err;
 
-- 
cgit v1.2.3-59-g8ed1b


From 6c11fbf97e69d2164406fc634758f90d34501ece Mon Sep 17 00:00:00 2001
From: Vadim Fedorenko <vfedorenko@novek.ru>
Date: Wed, 20 May 2020 18:21:36 +0300
Subject: ip6_tunnel: add MPLS transmit support

Add ETH_P_MPLS_UC as supported protocol.

Signed-off-by: Vadim Fedorenko <vfedorenko@novek.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_tunnel.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index dae6f7146b49..6b94c870693c 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -1232,6 +1232,8 @@ route_lookup:
 		ipv6_push_frag_opts(skb, &opt.ops, &proto);
 	}
 
+	skb_set_inner_ipproto(skb, proto);
+
 	skb_push(skb, sizeof(struct ipv6hdr));
 	skb_reset_network_header(skb);
 	ipv6h = ipv6_hdr(skb);
@@ -1348,6 +1350,7 @@ ipxip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev,
 				fl6.flowlabel |= ip6_flowlabel(ipv6h);
 			break;
 		default:
+			orig_dsfield = dsfield = ip6_tclass(t->parms.flowinfo);
 			break;
 		}
 	}
@@ -1358,8 +1361,6 @@ ipxip6_tnl_xmit(struct sk_buff *skb, struct net_device *dev,
 	if (iptunnel_handle_offloads(skb, SKB_GSO_IPXIP6))
 		return -1;
 
-	skb_set_inner_ipproto(skb, protocol);
-
 	err = ip6_tnl_xmit(skb, dev, dsfield, &fl6, encap_limit, &mtu,
 			   protocol);
 	if (err != 0) {
@@ -1402,6 +1403,9 @@ ip6_tnl_start_xmit(struct sk_buff *skb, struct net_device *dev)
 			goto tx_err;
 		ipproto = IPPROTO_IPV6;
 		break;
+	case htons(ETH_P_MPLS_UC):
+		ipproto = IPPROTO_MPLS;
+		break;
 	default:
 		goto tx_err;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From f234efac2c6220f32cbc446e75dc1d27b04166c3 Mon Sep 17 00:00:00 2001
From: Vadim Fedorenko <vfedorenko@novek.ru>
Date: Wed, 20 May 2020 18:21:37 +0300
Subject: tunnel6: support for IPPROTO_MPLS

This patch is just preparation for MPLS support in ip6_tunnel

Signed-off-by: Vadim Fedorenko <vfedorenko@novek.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/tunnel6.c | 87 +++++++++++++++++++++++++++++++++++++++++++++++++++---
 1 file changed, 83 insertions(+), 4 deletions(-)

diff --git a/net/ipv6/tunnel6.c b/net/ipv6/tunnel6.c
index 21e7b95ddbfa..06c02ebe6b9b 100644
--- a/net/ipv6/tunnel6.c
+++ b/net/ipv6/tunnel6.c
@@ -21,8 +21,14 @@
 
 static struct xfrm6_tunnel __rcu *tunnel6_handlers __read_mostly;
 static struct xfrm6_tunnel __rcu *tunnel46_handlers __read_mostly;
+static struct xfrm6_tunnel __rcu *tunnelmpls6_handlers __read_mostly;
 static DEFINE_MUTEX(tunnel6_mutex);
 
+static inline int xfrm6_tunnel_mpls_supported(void)
+{
+	return IS_ENABLED(CONFIG_MPLS);
+}
+
 int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family)
 {
 	struct xfrm6_tunnel __rcu **pprev;
@@ -32,8 +38,21 @@ int xfrm6_tunnel_register(struct xfrm6_tunnel *handler, unsigned short family)
 
 	mutex_lock(&tunnel6_mutex);
 
-	for (pprev = (family == AF_INET6) ? &tunnel6_handlers : &tunnel46_handlers;
-	     (t = rcu_dereference_protected(*pprev,
+	switch (family) {
+	case AF_INET6:
+		pprev = &tunnel6_handlers;
+		break;
+	case AF_INET:
+		pprev = &tunnel46_handlers;
+		break;
+	case AF_MPLS:
+		pprev = &tunnelmpls6_handlers;
+		break;
+	default:
+		goto err;
+	}
+
+	for (; (t = rcu_dereference_protected(*pprev,
 			lockdep_is_held(&tunnel6_mutex))) != NULL;
 	     pprev = &t->next) {
 		if (t->priority > priority)
@@ -62,8 +81,21 @@ int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family)
 
 	mutex_lock(&tunnel6_mutex);
 
-	for (pprev = (family == AF_INET6) ? &tunnel6_handlers : &tunnel46_handlers;
-	     (t = rcu_dereference_protected(*pprev,
+	switch (family) {
+	case AF_INET6:
+		pprev = &tunnel6_handlers;
+		break;
+	case AF_INET:
+		pprev = &tunnel46_handlers;
+		break;
+	case AF_MPLS:
+		pprev = &tunnelmpls6_handlers;
+		break;
+	default:
+		goto err;
+	}
+
+	for (; (t = rcu_dereference_protected(*pprev,
 			lockdep_is_held(&tunnel6_mutex))) != NULL;
 	     pprev = &t->next) {
 		if (t == handler) {
@@ -73,6 +105,7 @@ int xfrm6_tunnel_deregister(struct xfrm6_tunnel *handler, unsigned short family)
 		}
 	}
 
+err:
 	mutex_unlock(&tunnel6_mutex);
 
 	synchronize_net();
@@ -86,6 +119,24 @@ EXPORT_SYMBOL(xfrm6_tunnel_deregister);
 	     handler != NULL;				\
 	     handler = rcu_dereference(handler->next))	\
 
+static int tunnelmpls6_rcv(struct sk_buff *skb)
+{
+	struct xfrm6_tunnel *handler;
+
+	if (!pskb_may_pull(skb, sizeof(struct ipv6hdr)))
+		goto drop;
+
+	for_each_tunnel_rcu(tunnelmpls6_handlers, handler)
+		if (!handler->handler(skb))
+			return 0;
+
+	icmpv6_send(skb, ICMPV6_DEST_UNREACH, ICMPV6_PORT_UNREACH, 0);
+
+drop:
+	kfree_skb(skb);
+	return 0;
+}
+
 static int tunnel6_rcv(struct sk_buff *skb)
 {
 	struct xfrm6_tunnel *handler;
@@ -146,6 +197,18 @@ static int tunnel46_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	return -ENOENT;
 }
 
+static int tunnelmpls6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+			   u8 type, u8 code, int offset, __be32 info)
+{
+	struct xfrm6_tunnel *handler;
+
+	for_each_tunnel_rcu(tunnelmpls6_handlers, handler)
+		if (!handler->err_handler(skb, opt, type, code, offset, info))
+			return 0;
+
+	return -ENOENT;
+}
+
 static const struct inet6_protocol tunnel6_protocol = {
 	.handler	= tunnel6_rcv,
 	.err_handler	= tunnel6_err,
@@ -158,6 +221,12 @@ static const struct inet6_protocol tunnel46_protocol = {
 	.flags          = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
 };
 
+static const struct inet6_protocol tunnelmpls6_protocol = {
+	.handler	= tunnelmpls6_rcv,
+	.err_handler	= tunnelmpls6_err,
+	.flags          = INET6_PROTO_NOPOLICY|INET6_PROTO_FINAL,
+};
+
 static int __init tunnel6_init(void)
 {
 	if (inet6_add_protocol(&tunnel6_protocol, IPPROTO_IPV6)) {
@@ -169,6 +238,13 @@ static int __init tunnel6_init(void)
 		inet6_del_protocol(&tunnel6_protocol, IPPROTO_IPV6);
 		return -EAGAIN;
 	}
+	if (xfrm6_tunnel_mpls_supported() &&
+	    inet6_add_protocol(&tunnelmpls6_protocol, IPPROTO_MPLS)) {
+		pr_err("%s: can't add protocol\n", __func__);
+		inet6_del_protocol(&tunnel6_protocol, IPPROTO_IPV6);
+		inet6_del_protocol(&tunnel46_protocol, IPPROTO_IPIP);
+		return -EAGAIN;
+	}
 	return 0;
 }
 
@@ -178,6 +254,9 @@ static void __exit tunnel6_fini(void)
 		pr_err("%s: can't remove protocol\n", __func__);
 	if (inet6_del_protocol(&tunnel6_protocol, IPPROTO_IPV6))
 		pr_err("%s: can't remove protocol\n", __func__);
+	if (xfrm6_tunnel_mpls_supported() &&
+	    inet6_del_protocol(&tunnelmpls6_protocol, IPPROTO_MPLS))
+		pr_err("%s: can't remove protocol\n", __func__);
 }
 
 module_init(tunnel6_init);
-- 
cgit v1.2.3-59-g8ed1b


From f200e98d9716ce52464838dbaa2856e5ecc52194 Mon Sep 17 00:00:00 2001
From: Vadim Fedorenko <vfedorenko@novek.ru>
Date: Wed, 20 May 2020 18:21:38 +0300
Subject: ip6_tunnel: add generic MPLS receive support

Add support for MPLS in receive side.

Signed-off-by: Vadim Fedorenko <vfedorenko@novek.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ip6_tunnel.c | 59 +++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 59 insertions(+)

diff --git a/net/ipv6/ip6_tunnel.c b/net/ipv6/ip6_tunnel.c
index 6b94c870693c..821d96c720b9 100644
--- a/net/ipv6/ip6_tunnel.c
+++ b/net/ipv6/ip6_tunnel.c
@@ -89,6 +89,11 @@ struct ip6_tnl_net {
 	struct ip6_tnl __rcu *collect_md_tun;
 };
 
+static inline int ip6_tnl_mpls_supported(void)
+{
+	return IS_ENABLED(CONFIG_MPLS);
+}
+
 static struct net_device_stats *ip6_get_stats(struct net_device *dev)
 {
 	struct pcpu_sw_netstats tmp, sum = { 0 };
@@ -718,6 +723,20 @@ ip6ip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 	return 0;
 }
 
+static int
+mplsip6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
+	    u8 type, u8 code, int offset, __be32 info)
+{
+	__u32 rel_info = ntohl(info);
+	int err, rel_msg = 0;
+	u8 rel_type = type;
+	u8 rel_code = code;
+
+	err = ip6_tnl_err(skb, IPPROTO_MPLS, opt, &rel_type, &rel_code,
+			  &rel_msg, &rel_info, offset);
+	return err;
+}
+
 static int ip4ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
 				       const struct ipv6hdr *ipv6h,
 				       struct sk_buff *skb)
@@ -740,6 +759,14 @@ static int ip6ip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
 	return IP6_ECN_decapsulate(ipv6h, skb);
 }
 
+static inline int mplsip6_dscp_ecn_decapsulate(const struct ip6_tnl *t,
+					       const struct ipv6hdr *ipv6h,
+					       struct sk_buff *skb)
+{
+	/* ECN is not supported in AF_MPLS */
+	return 0;
+}
+
 __u32 ip6_tnl_get_cap(struct ip6_tnl *t,
 			     const struct in6_addr *laddr,
 			     const struct in6_addr *raddr)
@@ -901,6 +928,11 @@ static const struct tnl_ptk_info tpi_v4 = {
 	.proto = htons(ETH_P_IP),
 };
 
+static const struct tnl_ptk_info tpi_mpls = {
+	/* no tunnel info required for mplsip6. */
+	.proto = htons(ETH_P_MPLS_UC),
+};
+
 static int ipxip6_rcv(struct sk_buff *skb, u8 ipproto,
 		      const struct tnl_ptk_info *tpi,
 		      int (*dscp_ecn_decapsulate)(const struct ip6_tnl *t,
@@ -958,6 +990,12 @@ static int ip6ip6_rcv(struct sk_buff *skb)
 			  ip6ip6_dscp_ecn_decapsulate);
 }
 
+static int mplsip6_rcv(struct sk_buff *skb)
+{
+	return ipxip6_rcv(skb, IPPROTO_MPLS, &tpi_mpls,
+			  mplsip6_dscp_ecn_decapsulate);
+}
+
 struct ipv6_tel_txoption {
 	struct ipv6_txoptions ops;
 	__u8 dst_opt[8];
@@ -2198,6 +2236,12 @@ static struct xfrm6_tunnel ip6ip6_handler __read_mostly = {
 	.priority	=	1,
 };
 
+static struct xfrm6_tunnel mplsip6_handler __read_mostly = {
+	.handler	= mplsip6_rcv,
+	.err_handler	= mplsip6_err,
+	.priority	=	1,
+};
+
 static void __net_exit ip6_tnl_destroy_tunnels(struct net *net, struct list_head *list)
 {
 	struct ip6_tnl_net *ip6n = net_generic(net, ip6_tnl_net_id);
@@ -2312,6 +2356,15 @@ static int __init ip6_tunnel_init(void)
 		pr_err("%s: can't register ip6ip6\n", __func__);
 		goto out_ip6ip6;
 	}
+
+	if (ip6_tnl_mpls_supported()) {
+		err = xfrm6_tunnel_register(&mplsip6_handler, AF_MPLS);
+		if (err < 0) {
+			pr_err("%s: can't register mplsip6\n", __func__);
+			goto out_mplsip6;
+		}
+	}
+
 	err = rtnl_link_register(&ip6_link_ops);
 	if (err < 0)
 		goto rtnl_link_failed;
@@ -2319,6 +2372,9 @@ static int __init ip6_tunnel_init(void)
 	return 0;
 
 rtnl_link_failed:
+	if (ip6_tnl_mpls_supported())
+		xfrm6_tunnel_deregister(&mplsip6_handler, AF_MPLS);
+out_mplsip6:
 	xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6);
 out_ip6ip6:
 	xfrm6_tunnel_deregister(&ip4ip6_handler, AF_INET);
@@ -2341,6 +2397,9 @@ static void __exit ip6_tunnel_cleanup(void)
 	if (xfrm6_tunnel_deregister(&ip6ip6_handler, AF_INET6))
 		pr_info("%s: can't deregister ip6ip6\n", __func__);
 
+	if (ip6_tnl_mpls_supported() &&
+	    xfrm6_tunnel_deregister(&mplsip6_handler, AF_MPLS))
+		pr_info("%s: can't deregister mplsip6\n", __func__);
 	unregister_pernet_device(&ip6_tnl_net_ops);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 1515aa70c04151676a7dfefecfcf1d36ff52bc67 Mon Sep 17 00:00:00 2001
From: Vadim Fedorenko <vfedorenko@novek.ru>
Date: Wed, 20 May 2020 18:21:39 +0300
Subject: mpls: Add support for IPv6 tunnels

Add support for IPv6 tunnel devices in AF_MPLS.

Signed-off-by: Vadim Fedorenko <vfedorenko@novek.ru>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mpls/af_mpls.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/net/mpls/af_mpls.c b/net/mpls/af_mpls.c
index a42e4ed5ab0e..fd30ea61336e 100644
--- a/net/mpls/af_mpls.c
+++ b/net/mpls/af_mpls.c
@@ -1593,7 +1593,8 @@ static int mpls_dev_notify(struct notifier_block *this, unsigned long event,
 		    dev->type == ARPHRD_IPGRE ||
 		    dev->type == ARPHRD_IP6GRE ||
 		    dev->type == ARPHRD_SIT ||
-		    dev->type == ARPHRD_TUNNEL) {
+		    dev->type == ARPHRD_TUNNEL ||
+		    dev->type == ARPHRD_TUNNEL6) {
 			mdev = mpls_add_dev(dev);
 			if (IS_ERR(mdev))
 				return notifier_from_errno(PTR_ERR(mdev));
-- 
cgit v1.2.3-59-g8ed1b


From 060b6381efe58478e1d7dfff7a1e76a73a6377db Mon Sep 17 00:00:00 2001
From: Edward Cree <ecree@solarflare.com>
Date: Wed, 20 May 2020 19:18:10 +0100
Subject: net: flow_offload: simplify hw stats check handling

Make FLOW_ACTION_HW_STATS_DONT_CARE be all bits, rather than none, so that
 drivers and __flow_action_hw_stats_check can use simple bitwise checks.

Pre-fill all actions with DONT_CARE in flow_rule_alloc(), rather than
 relying on implicit semantics of zero from kzalloc, so that callers which
 don't configure action stats themselves (i.e. netfilter) get the correct
 behaviour by default.

Only the kernel's internal API semantics change; the TC uAPI is unaffected.

v4: move DONT_CARE setting to flow_rule_alloc() for robustness and simplicity.

v3: set DONT_CARE in nft and ct offload.

v2: rebased on net-next, removed RFC tags.

Signed-off-by: Edward Cree <ecree@solarflare.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c |  8 ++++----
 include/net/flow_offload.h                            | 11 +++++++----
 net/core/flow_offload.c                               |  6 ++++++
 3 files changed, 17 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
index b286fe158820..51e1b3930c56 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_flower.c
@@ -30,14 +30,14 @@ static int mlxsw_sp_flower_parse_actions(struct mlxsw_sp *mlxsw_sp,
 		return -EOPNOTSUPP;
 
 	act = flow_action_first_entry_get(flow_action);
-	if (act->hw_stats == FLOW_ACTION_HW_STATS_ANY ||
-	    act->hw_stats == FLOW_ACTION_HW_STATS_IMMEDIATE) {
+	if (act->hw_stats & FLOW_ACTION_HW_STATS_DISABLED) {
+		/* Nothing to do */
+	} else if (act->hw_stats & FLOW_ACTION_HW_STATS_IMMEDIATE) {
 		/* Count action is inserted first */
 		err = mlxsw_sp_acl_rulei_act_count(mlxsw_sp, rulei, extack);
 		if (err)
 			return err;
-	} else if (act->hw_stats != FLOW_ACTION_HW_STATS_DISABLED &&
-		   act->hw_stats != FLOW_ACTION_HW_STATS_DONT_CARE) {
+	} else {
 		NL_SET_ERR_MSG_MOD(extack, "Unsupported action HW stats type");
 		return -EOPNOTSUPP;
 	}
diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index 4001ffb04f0d..95d633785ef9 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -168,10 +168,11 @@ enum flow_action_hw_stats_bit {
 	FLOW_ACTION_HW_STATS_IMMEDIATE_BIT,
 	FLOW_ACTION_HW_STATS_DELAYED_BIT,
 	FLOW_ACTION_HW_STATS_DISABLED_BIT,
+
+	FLOW_ACTION_HW_STATS_NUM_BITS
 };
 
 enum flow_action_hw_stats {
-	FLOW_ACTION_HW_STATS_DONT_CARE = 0,
 	FLOW_ACTION_HW_STATS_IMMEDIATE =
 		BIT(FLOW_ACTION_HW_STATS_IMMEDIATE_BIT),
 	FLOW_ACTION_HW_STATS_DELAYED = BIT(FLOW_ACTION_HW_STATS_DELAYED_BIT),
@@ -179,6 +180,7 @@ enum flow_action_hw_stats {
 				   FLOW_ACTION_HW_STATS_DELAYED,
 	FLOW_ACTION_HW_STATS_DISABLED =
 		BIT(FLOW_ACTION_HW_STATS_DISABLED_BIT),
+	FLOW_ACTION_HW_STATS_DONT_CARE = BIT(FLOW_ACTION_HW_STATS_NUM_BITS) - 1,
 };
 
 typedef void (*action_destr)(void *priv);
@@ -340,11 +342,12 @@ __flow_action_hw_stats_check(const struct flow_action *action,
 		return false;
 
 	action_entry = flow_action_first_entry_get(action);
-	if (action_entry->hw_stats == FLOW_ACTION_HW_STATS_DONT_CARE)
-		return true;
+
+	/* Zero is not a legal value for hw_stats, catch anyone passing it */
+	WARN_ON_ONCE(!action_entry->hw_stats);
 
 	if (!check_allow_bit &&
-	    action_entry->hw_stats != FLOW_ACTION_HW_STATS_ANY) {
+	    ~action_entry->hw_stats & FLOW_ACTION_HW_STATS_ANY) {
 		NL_SET_ERR_MSG_MOD(extack, "Driver supports only default HW stats type \"any\"");
 		return false;
 	} else if (check_allow_bit &&
diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
index e951b743bed3..e64941c526b1 100644
--- a/net/core/flow_offload.c
+++ b/net/core/flow_offload.c
@@ -8,6 +8,7 @@
 struct flow_rule *flow_rule_alloc(unsigned int num_actions)
 {
 	struct flow_rule *rule;
+	int i;
 
 	rule = kzalloc(struct_size(rule, action.entries, num_actions),
 		       GFP_KERNEL);
@@ -15,6 +16,11 @@ struct flow_rule *flow_rule_alloc(unsigned int num_actions)
 		return NULL;
 
 	rule->action.num_entries = num_actions;
+	/* Pre-fill each action hw_stats with DONT_CARE.
+	 * Caller can override this if it wants stats for a given action.
+	 */
+	for (i = 0; i < num_actions; i++)
+		rule->action.entries[i].hw_stats = FLOW_ACTION_HW_STATS_DONT_CARE;
 
 	return rule;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 93a09e74574b2b75319938ef4155fe841335e436 Mon Sep 17 00:00:00 2001
From: Potnuri Bharat Teja <bharat@chelsio.com>
Date: Thu, 21 May 2020 16:04:29 +0530
Subject: cxgb4: add adapter hotplug support for ULDs

Upon adapter hotplug, cxgb4 registers ULD devices for all the ULDs that
are already loaded, ensuring that ULD's can enumerate the hotplugged
adapter without reloading the ULD.

Signed-off-by: Potnuri Bharat Teja <bharat@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h      |   8 ++
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c |   8 +-
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c  | 138 +++++++++++++++---------
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h  |   2 +
 4 files changed, 100 insertions(+), 56 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index fc1405a8ed74..5a41801acb6a 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -60,6 +60,7 @@
 
 #define CH_WARN(adap, fmt, ...) dev_warn(adap->pdev_dev, fmt, ## __VA_ARGS__)
 extern struct list_head adapter_list;
+extern struct list_head uld_list;
 extern struct mutex uld_mutex;
 
 /* Suspend an Ethernet Tx queue with fewer available descriptors than this.
@@ -822,6 +823,13 @@ struct sge_uld_txq_info {
 	u16 ntxq;		/* # of egress uld queues */
 };
 
+/* struct to maintain ULD list to reallocate ULD resources on hotplug */
+struct cxgb4_uld_list {
+	struct cxgb4_uld_info uld_info;
+	struct list_head list_node;
+	enum cxgb4_uld uld_type;
+};
+
 enum sge_eosw_state {
 	CXGB4_EO_STATE_CLOSED = 0, /* Not ready to accept traffic */
 	CXGB4_EO_STATE_FLOWC_OPEN_SEND, /* Send FLOWC open request */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index d05c2371d8c7..7a0414f379be 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -180,6 +180,7 @@ static struct dentry *cxgb4_debugfs_root;
 
 LIST_HEAD(adapter_list);
 DEFINE_MUTEX(uld_mutex);
+LIST_HEAD(uld_list);
 
 static int cfg_queues(struct adapter *adap);
 
@@ -6519,11 +6520,8 @@ fw_attach_fail:
 	/* PCIe EEH recovery on powerpc platforms needs fundamental reset */
 	pdev->needs_freset = 1;
 
-	if (is_uld(adapter)) {
-		mutex_lock(&uld_mutex);
-		list_add_tail(&adapter->list_node, &adapter_list);
-		mutex_unlock(&uld_mutex);
-	}
+	if (is_uld(adapter))
+		cxgb4_uld_enable(adapter);
 
 	if (!is_t4(adapter->params.chip))
 		cxgb4_ptp_init(adapter);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
index e65b52375dd8..6b1d3df4b9ba 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
@@ -681,6 +681,74 @@ static void cxgb4_set_ktls_feature(struct adapter *adap, bool enable)
 }
 #endif
 
+static void cxgb4_uld_alloc_resources(struct adapter *adap,
+				      enum cxgb4_uld type,
+				      const struct cxgb4_uld_info *p)
+{
+	int ret = 0;
+
+	if ((type == CXGB4_ULD_CRYPTO && !is_pci_uld(adap)) ||
+	    (type != CXGB4_ULD_CRYPTO && !is_offload(adap)))
+		return;
+	if (type == CXGB4_ULD_ISCSIT && is_t4(adap->params.chip))
+		return;
+	ret = cfg_queues_uld(adap, type, p);
+	if (ret)
+		goto out;
+	ret = setup_sge_queues_uld(adap, type, p->lro);
+	if (ret)
+		goto free_queues;
+	if (adap->flags & CXGB4_USING_MSIX) {
+		ret = request_msix_queue_irqs_uld(adap, type);
+		if (ret)
+			goto free_rxq;
+	}
+	if (adap->flags & CXGB4_FULL_INIT_DONE)
+		enable_rx_uld(adap, type);
+#ifdef CONFIG_CHELSIO_TLS_DEVICE
+	/* send mbox to enable ktls related settings. */
+	if (type == CXGB4_ULD_CRYPTO &&
+	    (adap->params.crypto & FW_CAPS_CONFIG_TX_TLS_HW))
+		cxgb4_set_ktls_feature(adap, 1);
+#endif
+	if (adap->uld[type].add)
+		goto free_irq;
+	ret = setup_sge_txq_uld(adap, type, p);
+	if (ret)
+		goto free_irq;
+	adap->uld[type] = *p;
+	ret = uld_attach(adap, type);
+	if (ret)
+		goto free_txq;
+	return;
+free_txq:
+	release_sge_txq_uld(adap, type);
+free_irq:
+	if (adap->flags & CXGB4_FULL_INIT_DONE)
+		quiesce_rx_uld(adap, type);
+	if (adap->flags & CXGB4_USING_MSIX)
+		free_msix_queue_irqs_uld(adap, type);
+free_rxq:
+	free_sge_queues_uld(adap, type);
+free_queues:
+	free_queues_uld(adap, type);
+out:
+	dev_warn(adap->pdev_dev,
+		 "ULD registration failed for uld type %d\n", type);
+}
+
+void cxgb4_uld_enable(struct adapter *adap)
+{
+	struct cxgb4_uld_list *uld_entry;
+
+	mutex_lock(&uld_mutex);
+	list_add_tail(&adap->list_node, &adapter_list);
+	list_for_each_entry(uld_entry, &uld_list, list_node)
+		cxgb4_uld_alloc_resources(adap, uld_entry->uld_type,
+					  &uld_entry->uld_info);
+	mutex_unlock(&uld_mutex);
+}
+
 /* cxgb4_register_uld - register an upper-layer driver
  * @type: the ULD type
  * @p: the ULD methods
@@ -691,63 +759,23 @@ static void cxgb4_set_ktls_feature(struct adapter *adap, bool enable)
 void cxgb4_register_uld(enum cxgb4_uld type,
 			const struct cxgb4_uld_info *p)
 {
+	struct cxgb4_uld_list *uld_entry;
 	struct adapter *adap;
-	int ret = 0;
 
 	if (type >= CXGB4_ULD_MAX)
 		return;
 
+	uld_entry = kzalloc(sizeof(*uld_entry), GFP_KERNEL);
+	if (!uld_entry)
+		return;
+
+	memcpy(&uld_entry->uld_info, p, sizeof(struct cxgb4_uld_info));
 	mutex_lock(&uld_mutex);
-	list_for_each_entry(adap, &adapter_list, list_node) {
-		if ((type == CXGB4_ULD_CRYPTO && !is_pci_uld(adap)) ||
-		    (type != CXGB4_ULD_CRYPTO && !is_offload(adap)))
-			continue;
-		if (type == CXGB4_ULD_ISCSIT && is_t4(adap->params.chip))
-			continue;
-		ret = cfg_queues_uld(adap, type, p);
-		if (ret)
-			goto out;
-		ret = setup_sge_queues_uld(adap, type, p->lro);
-		if (ret)
-			goto free_queues;
-		if (adap->flags & CXGB4_USING_MSIX) {
-			ret = request_msix_queue_irqs_uld(adap, type);
-			if (ret)
-				goto free_rxq;
-		}
-		if (adap->flags & CXGB4_FULL_INIT_DONE)
-			enable_rx_uld(adap, type);
-#ifdef CONFIG_CHELSIO_TLS_DEVICE
-		/* send mbox to enable ktls related settings. */
-		if (type == CXGB4_ULD_CRYPTO &&
-		    (adap->params.crypto & FW_CAPS_CONFIG_TX_TLS_HW))
-			cxgb4_set_ktls_feature(adap, 1);
-#endif
-		if (adap->uld[type].add)
-			goto free_irq;
-		ret = setup_sge_txq_uld(adap, type, p);
-		if (ret)
-			goto free_irq;
-		adap->uld[type] = *p;
-		ret = uld_attach(adap, type);
-		if (ret)
-			goto free_txq;
-		continue;
-free_txq:
-		release_sge_txq_uld(adap, type);
-free_irq:
-		if (adap->flags & CXGB4_FULL_INIT_DONE)
-			quiesce_rx_uld(adap, type);
-		if (adap->flags & CXGB4_USING_MSIX)
-			free_msix_queue_irqs_uld(adap, type);
-free_rxq:
-		free_sge_queues_uld(adap, type);
-free_queues:
-		free_queues_uld(adap, type);
-out:
-		dev_warn(adap->pdev_dev,
-			 "ULD registration failed for uld type %d\n", type);
-	}
+	list_for_each_entry(adap, &adapter_list, list_node)
+		cxgb4_uld_alloc_resources(adap, type, p);
+
+	uld_entry->uld_type = type;
+	list_add_tail(&uld_entry->list_node, &uld_list);
 	mutex_unlock(&uld_mutex);
 	return;
 }
@@ -761,6 +789,7 @@ EXPORT_SYMBOL(cxgb4_register_uld);
  */
 int cxgb4_unregister_uld(enum cxgb4_uld type)
 {
+	struct cxgb4_uld_list *uld_entry, *tmp;
 	struct adapter *adap;
 
 	if (type >= CXGB4_ULD_MAX)
@@ -783,6 +812,13 @@ int cxgb4_unregister_uld(enum cxgb4_uld type)
 			cxgb4_set_ktls_feature(adap, 0);
 #endif
 	}
+
+	list_for_each_entry_safe(uld_entry, tmp, &uld_list, list_node) {
+		if (uld_entry->uld_type == type) {
+			list_del(&uld_entry->list_node);
+			kfree(uld_entry);
+		}
+	}
 	mutex_unlock(&uld_mutex);
 
 	return 0;
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index 16796785eea3..085fa1424f9a 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -327,6 +327,7 @@ enum cxgb4_control {
 	CXGB4_CONTROL_DB_DROP,
 };
 
+struct adapter;
 struct pci_dev;
 struct l2t_data;
 struct net_device;
@@ -465,6 +466,7 @@ struct cxgb4_uld_info {
 	int (*tx_handler)(struct sk_buff *skb, struct net_device *dev);
 };
 
+void cxgb4_uld_enable(struct adapter *adap);
 void cxgb4_register_uld(enum cxgb4_uld type, const struct cxgb4_uld_info *p);
 int cxgb4_unregister_uld(enum cxgb4_uld type);
 int cxgb4_ofld_send(struct net_device *dev, struct sk_buff *skb);
-- 
cgit v1.2.3-59-g8ed1b


From 593532668f635d19d207510e0fbb5c2250f56b6f Mon Sep 17 00:00:00 2001
From: "David S. Miller" <davem@davemloft.net>
Date: Fri, 22 May 2020 16:09:42 -0700
Subject: Revert "net: mvneta: speed down the PHY, if WoL used, to save energy"

This reverts commit 5e3768a436bb70c9c3e27aaba6b73f8ef8f5dcf3.

On request from Russell King, this is a layering violation.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvneta.c | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 41d2a0eac5fa..37947949345c 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -3566,10 +3566,6 @@ static void mvneta_start_dev(struct mvneta_port *pp)
 		    MVNETA_CAUSE_LINK_CHANGE);
 
 	phylink_start(pp->phylink);
-
-	/* We may have called phy_speed_down before */
-	phy_speed_up(pp->dev->phydev);
-
 	netif_tx_start_all_queues(pp->dev);
 }
 
@@ -3577,9 +3573,6 @@ static void mvneta_stop_dev(struct mvneta_port *pp)
 {
 	unsigned int cpu;
 
-	if (device_may_wakeup(&pp->dev->dev))
-		phy_speed_down(pp->dev->phydev, false);
-
 	phylink_stop(pp->phylink);
 
 	if (!pp->neta_armada3700) {
@@ -4052,10 +4045,6 @@ static int mvneta_mdio_probe(struct mvneta_port *pp)
 	phylink_ethtool_get_wol(pp->phylink, &wol);
 	device_set_wakeup_capable(&pp->dev->dev, !!wol.supported);
 
-	/* PHY WoL may be enabled but device wakeup disabled */
-	if (wol.supported)
-		device_set_wakeup_enable(&pp->dev->dev, !!wol.wolopts);
-
 	return err;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 6736aa793c2b5fb6c64884d2623c66aa1b9bfa92 Mon Sep 17 00:00:00 2001
From: Alan Maguire <alan.maguire@oracle.com>
Date: Fri, 22 May 2020 12:24:34 +0100
Subject: selftests/bpf: Add general instructions for test execution

Getting a clean BPF selftests run involves ensuring latest trunk LLVM/clang
are used, pahole is recent (>=1.16) and config matches the specified
config file as closely as possible.  Add to bpf_devel_QA.rst and point
tools/testing/selftests/bpf/README.rst to it.

Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/1590146674-25485-1-git-send-email-alan.maguire@oracle.com
---
 Documentation/bpf/bpf_devel_QA.rst     | 15 +++++++++++++++
 tools/testing/selftests/bpf/README.rst |  2 ++
 2 files changed, 17 insertions(+)

diff --git a/Documentation/bpf/bpf_devel_QA.rst b/Documentation/bpf/bpf_devel_QA.rst
index 38c15c6fcb14..0b3db91dc100 100644
--- a/Documentation/bpf/bpf_devel_QA.rst
+++ b/Documentation/bpf/bpf_devel_QA.rst
@@ -437,6 +437,21 @@ needed::
 See the kernels selftest `Documentation/dev-tools/kselftest.rst`_
 document for further documentation.
 
+To maximize the number of tests passing, the .config of the kernel
+under test should match the config file fragment in
+tools/testing/selftests/bpf as closely as possible.
+
+Finally to ensure support for latest BPF Type Format features -
+discussed in `Documentation/bpf/btf.rst`_ - pahole version 1.16
+is required for kernels built with CONFIG_DEBUG_INFO_BTF=y.
+pahole is delivered in the dwarves package or can be built
+from source at
+
+https://github.com/acmel/dwarves
+
+Some distros have pahole version 1.16 packaged already, e.g.
+Fedora, Gentoo.
+
 Q: Which BPF kernel selftests version should I run my kernel against?
 ---------------------------------------------------------------------
 A: If you run a kernel ``xyz``, then always run the BPF kernel selftests
diff --git a/tools/testing/selftests/bpf/README.rst b/tools/testing/selftests/bpf/README.rst
index 0f67f1b470b0..e885d351595f 100644
--- a/tools/testing/selftests/bpf/README.rst
+++ b/tools/testing/selftests/bpf/README.rst
@@ -1,6 +1,8 @@
 ==================
 BPF Selftest Notes
 ==================
+General instructions on running selftests can be found in
+`Documentation/bpf/bpf_devel_QA.rst`_.
 
 Additional information about selftest failures are
 documented here.
-- 
cgit v1.2.3-59-g8ed1b


From 3c8e8cf4b18b3a7034fab4c4504fc4b54e4b6195 Mon Sep 17 00:00:00 2001
From: Alan Maguire <alan.maguire@oracle.com>
Date: Fri, 22 May 2020 12:36:28 +0100
Subject: selftests/bpf: CONFIG_IPV6_SEG6_BPF required for test_seg6_loop.o

test_seg6_loop.o uses the helper bpf_lwt_seg6_adjust_srh();
it will not be present if CONFIG_IPV6_SEG6_BPF is not specified.

Fixes: b061017f8b4d ("selftests/bpf: add realistic loop tests")
Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/1590147389-26482-2-git-send-email-alan.maguire@oracle.com
---
 tools/testing/selftests/bpf/config | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 60e3ae5d4e48..48e058552eb7 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -25,6 +25,7 @@ CONFIG_XDP_SOCKETS=y
 CONFIG_FTRACE_SYSCALLS=y
 CONFIG_IPV6_TUNNEL=y
 CONFIG_IPV6_GRE=y
+CONFIG_IPV6_SEG6_BPF=y
 CONFIG_NET_FOU=m
 CONFIG_NET_FOU_IP_TUNNELS=y
 CONFIG_IPV6_FOU=m
-- 
cgit v1.2.3-59-g8ed1b


From a5dfaa2ab94057dd75c7911143482a0a85593c14 Mon Sep 17 00:00:00 2001
From: Alan Maguire <alan.maguire@oracle.com>
Date: Fri, 22 May 2020 12:36:29 +0100
Subject: selftests/bpf: CONFIG_LIRC required for test_lirc_mode2.sh

test_lirc_mode2.sh assumes presence of /sys/class/rc/rc0/lirc*/uevent
which will not be present unless CONFIG_LIRC=y

Fixes: 6bdd533cee9a ("bpf: add selftest for lirc_mode2 type program")
Signed-off-by: Alan Maguire <alan.maguire@oracle.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/1590147389-26482-3-git-send-email-alan.maguire@oracle.com
---
 tools/testing/selftests/bpf/config | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/bpf/config b/tools/testing/selftests/bpf/config
index 48e058552eb7..2118e23ac07a 100644
--- a/tools/testing/selftests/bpf/config
+++ b/tools/testing/selftests/bpf/config
@@ -38,3 +38,4 @@ CONFIG_IPV6_SIT=m
 CONFIG_BPF_JIT=y
 CONFIG_BPF_LSM=y
 CONFIG_SECURITY=y
+CONFIG_LIRC=y
-- 
cgit v1.2.3-59-g8ed1b


From c4566aec6e8088b28687d006a48469498b9f2af8 Mon Sep 17 00:00:00 2001
From: Dan Murphy <dmurphy@ti.com>
Date: Thu, 21 May 2020 12:47:37 -0500
Subject: net: phy: dp83869: Update port-mirroring to read straps

The device tree may not have the property set for port mirroring
because the hardware may have it strapped. If the property is not in the
DT then check the straps and set the port mirroring bit appropriately.

Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Dan Murphy <dmurphy@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/dp83869.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/net/phy/dp83869.c b/drivers/net/phy/dp83869.c
index 7996a4aea8d2..073a0f7754a5 100644
--- a/drivers/net/phy/dp83869.c
+++ b/drivers/net/phy/dp83869.c
@@ -66,6 +66,7 @@
 
 /* STRAP_STS1 bits */
 #define DP83869_STRAP_STS1_RESERVED		BIT(11)
+#define DP83869_STRAP_MIRROR_ENABLED           BIT(12)
 
 /* PHYCTRL bits */
 #define DP83869_RX_FIFO_SHIFT	12
@@ -191,10 +192,18 @@ static int dp83869_of_init(struct phy_device *phydev)
 	else if (of_property_read_bool(of_node, "ti,min-output-impedance"))
 		dp83869->io_impedance = DP83869_IO_MUX_CFG_IO_IMPEDANCE_MIN;
 
-	if (of_property_read_bool(of_node, "enet-phy-lane-swap"))
+	if (of_property_read_bool(of_node, "enet-phy-lane-swap")) {
 		dp83869->port_mirroring = DP83869_PORT_MIRRORING_EN;
-	else
-		dp83869->port_mirroring = DP83869_PORT_MIRRORING_DIS;
+	} else {
+		/* If the lane swap is not in the DT then check the straps */
+		ret = phy_read_mmd(phydev, DP83869_DEVADDR, DP83869_STRAP_STS1);
+		if (ret < 0)
+			return ret;
+		if (ret & DP83869_STRAP_MIRROR_ENABLED)
+			dp83869->port_mirroring = DP83869_PORT_MIRRORING_EN;
+		else
+			dp83869->port_mirroring = DP83869_PORT_MIRRORING_DIS;
+	}
 
 	if (of_property_read_u32(of_node, "rx-fifo-depth",
 				 &dp83869->rx_fifo_depth))
-- 
cgit v1.2.3-59-g8ed1b


From 0eaf8ccf2047d60f8725d3cfa005ee6170f1fdce Mon Sep 17 00:00:00 2001
From: Dan Murphy <dmurphy@ti.com>
Date: Thu, 21 May 2020 12:47:38 -0500
Subject: net: phy: dp83869: Set opmode from straps

If the op-mode for the device is not set in the device tree then set
the strapped op-mode and store it for later configuration.

Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: Dan Murphy <dmurphy@ti.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/dp83869.c | 21 ++++++++++++++++++++-
 1 file changed, 20 insertions(+), 1 deletion(-)

diff --git a/drivers/net/phy/dp83869.c b/drivers/net/phy/dp83869.c
index 073a0f7754a5..cfb22a21a2e6 100644
--- a/drivers/net/phy/dp83869.c
+++ b/drivers/net/phy/dp83869.c
@@ -65,6 +65,7 @@
 #define DP83869_RGMII_RX_CLK_DELAY_EN		BIT(0)
 
 /* STRAP_STS1 bits */
+#define DP83869_STRAP_OP_MODE_MASK		GENMASK(2, 0)
 #define DP83869_STRAP_STS1_RESERVED		BIT(11)
 #define DP83869_STRAP_MIRROR_ENABLED           BIT(12)
 
@@ -161,6 +162,20 @@ static int dp83869_config_port_mirroring(struct phy_device *phydev)
 					  DP83869_CFG3_PORT_MIRROR_EN);
 }
 
+static int dp83869_set_strapped_mode(struct phy_device *phydev)
+{
+	struct dp83869_private *dp83869 = phydev->priv;
+	int val;
+
+	val = phy_read_mmd(phydev, DP83869_DEVADDR, DP83869_STRAP_STS1);
+	if (val < 0)
+		return val;
+
+	dp83869->mode = val & DP83869_STRAP_OP_MODE_MASK;
+
+	return 0;
+}
+
 #ifdef CONFIG_OF_MDIO
 static int dp83869_of_init(struct phy_device *phydev)
 {
@@ -185,6 +200,10 @@ static int dp83869_of_init(struct phy_device *phydev)
 		if (dp83869->mode < DP83869_RGMII_COPPER_ETHERNET ||
 		    dp83869->mode > DP83869_SGMII_COPPER_ETHERNET)
 			return -EINVAL;
+	} else {
+		ret = dp83869_set_strapped_mode(phydev);
+		if (ret)
+			return ret;
 	}
 
 	if (of_property_read_bool(of_node, "ti,max-output-impedance"))
@@ -218,7 +237,7 @@ static int dp83869_of_init(struct phy_device *phydev)
 #else
 static int dp83869_of_init(struct phy_device *phydev)
 {
-	return 0;
+	return dp83869_set_strapped_mode(phydev);
 }
 #endif /* CONFIG_OF_MDIO */
 
-- 
cgit v1.2.3-59-g8ed1b


From 7aa38018be1fa4cff3e631f26bc821086ba90d29 Mon Sep 17 00:00:00 2001
From: Horatiu Vultur <horatiu.vultur@microchip.com>
Date: Thu, 21 May 2020 23:19:05 +0000
Subject: bridge: mrp: Add br_mrp_unique_ifindex function

It is not allow to have the same net bridge port part of multiple MRP
rings. Therefore add a check if the port is used already in a different
MRP. In that case return failure.

Fixes: 9a9f26e8f7ea ("bridge: mrp: Connect MRP API with the switchdev API")
Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_mrp.c | 25 +++++++++++++++++++++++++
 1 file changed, 25 insertions(+)

diff --git a/net/bridge/br_mrp.c b/net/bridge/br_mrp.c
index d7bc09de4c13..854e31bf0151 100644
--- a/net/bridge/br_mrp.c
+++ b/net/bridge/br_mrp.c
@@ -37,6 +37,26 @@ static struct br_mrp *br_mrp_find_id(struct net_bridge *br, u32 ring_id)
 	return res;
 }
 
+static bool br_mrp_unique_ifindex(struct net_bridge *br, u32 ifindex)
+{
+	struct br_mrp *mrp;
+
+	list_for_each_entry_rcu(mrp, &br->mrp_list, list,
+				lockdep_rtnl_is_held()) {
+		struct net_bridge_port *p;
+
+		p = rtnl_dereference(mrp->p_port);
+		if (p && p->dev->ifindex == ifindex)
+			return false;
+
+		p = rtnl_dereference(mrp->s_port);
+		if (p && p->dev->ifindex == ifindex)
+			return false;
+	}
+
+	return true;
+}
+
 static struct br_mrp *br_mrp_find_port(struct net_bridge *br,
 				       struct net_bridge_port *p)
 {
@@ -255,6 +275,11 @@ int br_mrp_add(struct net_bridge *br, struct br_mrp_instance *instance)
 	    !br_mrp_get_port(br, instance->s_ifindex))
 		return -EINVAL;
 
+	/* It is not possible to have the same port part of multiple rings */
+	if (!br_mrp_unique_ifindex(br, instance->p_ifindex) ||
+	    !br_mrp_unique_ifindex(br, instance->s_ifindex))
+		return -EINVAL;
+
 	mrp = kzalloc(sizeof(*mrp), GFP_KERNEL);
 	if (!mrp)
 		return -ENOMEM;
-- 
cgit v1.2.3-59-g8ed1b


From 89c1e111cb4860b51efef50474cd259c2702edc6 Mon Sep 17 00:00:00 2001
From: Horatiu Vultur <horatiu.vultur@microchip.com>
Date: Thu, 21 May 2020 23:19:06 +0000
Subject: switchdev: mrp: Remove the variable mrp_ring_state

Remove the variable mrp_ring_state from switchdev_attr because is not
used anywhere.
The ring state is set using SWITCHDEV_OBJ_ID_RING_STATE_MRP.

Fixes: c284b5459008 ("switchdev: mrp: Extend switchdev API to offload MRP")
Acked-by: Ivan Vecera <ivecera@redhat.com>
Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/switchdev.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index ae7aeb0d1f9c..db519957e134 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -62,7 +62,6 @@ struct switchdev_attr {
 #if IS_ENABLED(CONFIG_BRIDGE_MRP)
 		u8 mrp_port_state;			/* MRP_PORT_STATE */
 		u8 mrp_port_role;			/* MRP_PORT_ROLE */
-		u8 mrp_ring_state;			/* MRP_RING_STATE */
 #endif
 	} u;
 };
-- 
cgit v1.2.3-59-g8ed1b


From 4fb13499d3a0cc74cf9820c052481f0ccda2bb23 Mon Sep 17 00:00:00 2001
From: Horatiu Vultur <horatiu.vultur@microchip.com>
Date: Thu, 21 May 2020 23:19:07 +0000
Subject: bridge: mrp: Restore port state when deleting MRP instance

When a MRP instance is deleted, then restore the port according to the
bridge state. If the bridge is up then the ports will be in forwarding
state otherwise will be in disabled state.

Fixes: 9a9f26e8f7ea ("bridge: mrp: Connect MRP API with the switchdev API")
Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_mrp.c | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/net/bridge/br_mrp.c b/net/bridge/br_mrp.c
index 854e31bf0151..528d767eb026 100644
--- a/net/bridge/br_mrp.c
+++ b/net/bridge/br_mrp.c
@@ -223,6 +223,7 @@ out:
 static void br_mrp_del_impl(struct net_bridge *br, struct br_mrp *mrp)
 {
 	struct net_bridge_port *p;
+	u8 state;
 
 	/* Stop sending MRP_Test frames */
 	cancel_delayed_work_sync(&mrp->test_work);
@@ -234,20 +235,24 @@ static void br_mrp_del_impl(struct net_bridge *br, struct br_mrp *mrp)
 	p = rtnl_dereference(mrp->p_port);
 	if (p) {
 		spin_lock_bh(&br->lock);
-		p->state = BR_STATE_FORWARDING;
+		state = netif_running(br->dev) ?
+				BR_STATE_FORWARDING : BR_STATE_DISABLED;
+		p->state = state;
 		p->flags &= ~BR_MRP_AWARE;
 		spin_unlock_bh(&br->lock);
-		br_mrp_port_switchdev_set_state(p, BR_STATE_FORWARDING);
+		br_mrp_port_switchdev_set_state(p, state);
 		rcu_assign_pointer(mrp->p_port, NULL);
 	}
 
 	p = rtnl_dereference(mrp->s_port);
 	if (p) {
 		spin_lock_bh(&br->lock);
-		p->state = BR_STATE_FORWARDING;
+		state = netif_running(br->dev) ?
+				BR_STATE_FORWARDING : BR_STATE_DISABLED;
+		p->state = state;
 		p->flags &= ~BR_MRP_AWARE;
 		spin_unlock_bh(&br->lock);
-		br_mrp_port_switchdev_set_state(p, BR_STATE_FORWARDING);
+		br_mrp_port_switchdev_set_state(p, state);
 		rcu_assign_pointer(mrp->s_port, NULL);
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 2639324a8fb69e1db88ef985e7def18a6bc106d1 Mon Sep 17 00:00:00 2001
From: Tang Bin <tangbin@cmss.chinamobile.com>
Date: Sat, 16 May 2020 07:06:33 +0800
Subject: net/mlx5e: Use IS_ERR() to check and simplify code

Use IS_ERR() and PTR_ERR() instead of PTR_ERR_OR_ZERO() to
simplify code, avoid redundant judgements.

Signed-off-by: Zhang Shengju <zhangshengju@cmss.chinamobile.com>
Signed-off-by: Tang Bin <tangbin@cmss.chinamobile.com>
Reviewed-by: Leon Romanovsky <leonro@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
index b45c3f46570b..9f50a1d3c5cc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
@@ -96,9 +96,8 @@ static int mlx5e_route_lookup_ipv4(struct mlx5e_priv *priv,
 	}
 
 	rt = ip_route_output_key(dev_net(mirred_dev), fl4);
-	ret = PTR_ERR_OR_ZERO(rt);
-	if (ret)
-		return ret;
+	if (IS_ERR(rt))
+		return PTR_ERR(rt);
 
 	if (mlx5_lag_is_multipath(mdev) && rt->rt_gw_family != AF_INET) {
 		ip_rt_put(rt);
-- 
cgit v1.2.3-59-g8ed1b


From 768c3667e6f36bc9db0dac854aa198651b27412f Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Tue, 12 May 2020 16:41:41 +0300
Subject: net/mlx5e: Extract TC-specific code from en_rep.c to rep/tc.c

As a preparation for introducing new kconfig option that controls
compilation of all TC offloads code in mlx5, extract TC-specific code from
en_rep.c to standalone file. This allows easily compiling out the code by
only including new source in make file when corresponding kconfig is
enabled instead of adding multiple ifdef blocks to en_rep.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   3 +-
 .../net/ethernet/mellanox/mlx5/core/en/rep/tc.c    | 710 +++++++++++++++++++++
 .../net/ethernet/mellanox/mlx5/core/en/rep/tc.h    |  45 ++
 .../net/ethernet/mellanox/mlx5/core/en/tc_tun.c    |   1 +
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c   | 543 +---------------
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.h   |  13 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c    |   9 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c    | 170 +----
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.h    |  35 +-
 9 files changed, 821 insertions(+), 708 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index d3c7dbd7f1d5..c21453970dbb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -33,7 +33,8 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
 mlx5_core-$(CONFIG_MLX5_EN_ARFS)     += en_arfs.o
 mlx5_core-$(CONFIG_MLX5_EN_RXNFC)    += en_fs_ethtool.o
 mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o
-mlx5_core-$(CONFIG_MLX5_ESWITCH)     += en_rep.o en_tc.o en/tc_tun.o lib/port_tun.o lag_mp.o \
+mlx5_core-$(CONFIG_MLX5_ESWITCH)     += en_rep.o en_tc.o en/rep/tc.o en/tc_tun.o lib/port_tun.o \
+					lag_mp.o \
 					lib/geneve.o en/mapping.o en/tc_tun_vxlan.o en/tc_tun_gre.o \
 					en/tc_tun_geneve.o diag/en_tc_tracepoint.o
 mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += en/hv_vhca_stats.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
new file mode 100644
index 000000000000..edc574582135
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
@@ -0,0 +1,710 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies. */
+
+#include <net/dst_metadata.h>
+#include <linux/netdevice.h>
+#include <linux/list.h>
+#include <linux/rculist.h>
+#include <linux/rtnetlink.h>
+#include <linux/workqueue.h>
+#include <linux/spinlock.h>
+#include "tc.h"
+#include "en_rep.h"
+#include "eswitch.h"
+#include "esw/chains.h"
+#include "en/tc_ct.h"
+#include "en/mapping.h"
+#include "en/tc_tun.h"
+#include "lib/port_tun.h"
+
+struct mlx5e_rep_indr_block_priv {
+	struct net_device *netdev;
+	struct mlx5e_rep_priv *rpriv;
+
+	struct list_head list;
+};
+
+int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
+				 struct mlx5e_encap_entry *e)
+{
+	struct mlx5e_rep_priv *rpriv = priv->ppriv;
+	struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
+	struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy;
+	struct mlx5e_neigh_hash_entry *nhe;
+	int err;
+
+	err = mlx5_tun_entropy_refcount_inc(tun_entropy, e->reformat_type);
+	if (err)
+		return err;
+
+	mutex_lock(&rpriv->neigh_update.encap_lock);
+	nhe = mlx5e_rep_neigh_entry_lookup(priv, &e->m_neigh);
+	if (!nhe) {
+		err = mlx5e_rep_neigh_entry_create(priv, e, &nhe);
+		if (err) {
+			mutex_unlock(&rpriv->neigh_update.encap_lock);
+			mlx5_tun_entropy_refcount_dec(tun_entropy,
+						      e->reformat_type);
+			return err;
+		}
+	}
+
+	e->nhe = nhe;
+	spin_lock(&nhe->encap_list_lock);
+	list_add_rcu(&e->encap_list, &nhe->encap_list);
+	spin_unlock(&nhe->encap_list_lock);
+
+	mutex_unlock(&rpriv->neigh_update.encap_lock);
+
+	return 0;
+}
+
+void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
+				  struct mlx5e_encap_entry *e)
+{
+	struct mlx5e_rep_priv *rpriv = priv->ppriv;
+	struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
+	struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy;
+
+	if (!e->nhe)
+		return;
+
+	spin_lock(&e->nhe->encap_list_lock);
+	list_del_rcu(&e->encap_list);
+	spin_unlock(&e->nhe->encap_list_lock);
+
+	mlx5e_rep_neigh_entry_release(e->nhe);
+	e->nhe = NULL;
+	mlx5_tun_entropy_refcount_dec(tun_entropy, e->reformat_type);
+}
+
+void mlx5e_rep_update_flows(struct mlx5e_priv *priv,
+			    struct mlx5e_encap_entry *e,
+			    bool neigh_connected,
+			    unsigned char ha[ETH_ALEN])
+{
+	struct ethhdr *eth = (struct ethhdr *)e->encap_header;
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	bool encap_connected;
+	LIST_HEAD(flow_list);
+
+	ASSERT_RTNL();
+
+	/* wait for encap to be fully initialized */
+	wait_for_completion(&e->res_ready);
+
+	mutex_lock(&esw->offloads.encap_tbl_lock);
+	encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID);
+	if (e->compl_result < 0 || (encap_connected == neigh_connected &&
+				    ether_addr_equal(e->h_dest, ha)))
+		goto unlock;
+
+	mlx5e_take_all_encap_flows(e, &flow_list);
+
+	if ((e->flags & MLX5_ENCAP_ENTRY_VALID) &&
+	    (!neigh_connected || !ether_addr_equal(e->h_dest, ha)))
+		mlx5e_tc_encap_flows_del(priv, e, &flow_list);
+
+	if (neigh_connected && !(e->flags & MLX5_ENCAP_ENTRY_VALID)) {
+		ether_addr_copy(e->h_dest, ha);
+		ether_addr_copy(eth->h_dest, ha);
+		/* Update the encap source mac, in case that we delete
+		 * the flows when encap source mac changed.
+		 */
+		ether_addr_copy(eth->h_source, e->route_dev->dev_addr);
+
+		mlx5e_tc_encap_flows_add(priv, e, &flow_list);
+	}
+unlock:
+	mutex_unlock(&esw->offloads.encap_tbl_lock);
+	mlx5e_put_encap_flow_list(priv, &flow_list);
+}
+
+static int
+mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv,
+			      struct flow_cls_offload *cls_flower, int flags)
+{
+	switch (cls_flower->command) {
+	case FLOW_CLS_REPLACE:
+		return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
+					      flags);
+	case FLOW_CLS_DESTROY:
+		return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
+					   flags);
+	case FLOW_CLS_STATS:
+		return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
+					  flags);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static
+int mlx5e_rep_setup_tc_cls_matchall(struct mlx5e_priv *priv,
+				    struct tc_cls_matchall_offload *ma)
+{
+	switch (ma->command) {
+	case TC_CLSMATCHALL_REPLACE:
+		return mlx5e_tc_configure_matchall(priv, ma);
+	case TC_CLSMATCHALL_DESTROY:
+		return mlx5e_tc_delete_matchall(priv, ma);
+	case TC_CLSMATCHALL_STATS:
+		mlx5e_tc_stats_matchall(priv, ma);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data,
+				 void *cb_priv)
+{
+	unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(ESW_OFFLOAD);
+	struct mlx5e_priv *priv = cb_priv;
+
+	switch (type) {
+	case TC_SETUP_CLSFLOWER:
+		return mlx5e_rep_setup_tc_cls_flower(priv, type_data, flags);
+	case TC_SETUP_CLSMATCHALL:
+		return mlx5e_rep_setup_tc_cls_matchall(priv, type_data);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int mlx5e_rep_setup_ft_cb(enum tc_setup_type type, void *type_data,
+				 void *cb_priv)
+{
+	struct flow_cls_offload tmp, *f = type_data;
+	struct mlx5e_priv *priv = cb_priv;
+	struct mlx5_eswitch *esw;
+	unsigned long flags;
+	int err;
+
+	flags = MLX5_TC_FLAG(INGRESS) |
+		MLX5_TC_FLAG(ESW_OFFLOAD) |
+		MLX5_TC_FLAG(FT_OFFLOAD);
+	esw = priv->mdev->priv.eswitch;
+
+	switch (type) {
+	case TC_SETUP_CLSFLOWER:
+		memcpy(&tmp, f, sizeof(*f));
+
+		if (!mlx5_esw_chains_prios_supported(esw))
+			return -EOPNOTSUPP;
+
+		/* Re-use tc offload path by moving the ft flow to the
+		 * reserved ft chain.
+		 *
+		 * FT offload can use prio range [0, INT_MAX], so we normalize
+		 * it to range [1, mlx5_esw_chains_get_prio_range(esw)]
+		 * as with tc, where prio 0 isn't supported.
+		 *
+		 * We only support chain 0 of FT offload.
+		 */
+		if (tmp.common.prio >= mlx5_esw_chains_get_prio_range(esw))
+			return -EOPNOTSUPP;
+		if (tmp.common.chain_index != 0)
+			return -EOPNOTSUPP;
+
+		tmp.common.chain_index = mlx5_esw_chains_get_ft_chain(esw);
+		tmp.common.prio++;
+		err = mlx5e_rep_setup_tc_cls_flower(priv, &tmp, flags);
+		memcpy(&f->stats, &tmp.stats, sizeof(f->stats));
+		return err;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static LIST_HEAD(mlx5e_rep_block_tc_cb_list);
+static LIST_HEAD(mlx5e_rep_block_ft_cb_list);
+int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
+		       void *type_data)
+{
+	struct mlx5e_priv *priv = netdev_priv(dev);
+	struct flow_block_offload *f = type_data;
+
+	f->unlocked_driver_cb = true;
+
+	switch (type) {
+	case TC_SETUP_BLOCK:
+		return flow_block_cb_setup_simple(type_data,
+						  &mlx5e_rep_block_tc_cb_list,
+						  mlx5e_rep_setup_tc_cb,
+						  priv, priv, true);
+	case TC_SETUP_FT:
+		return flow_block_cb_setup_simple(type_data,
+						  &mlx5e_rep_block_ft_cb_list,
+						  mlx5e_rep_setup_ft_cb,
+						  priv, priv, true);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+int mlx5e_rep_tc_init(struct mlx5e_rep_priv *rpriv)
+{
+	struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
+	int err;
+
+	mutex_init(&uplink_priv->unready_flows_lock);
+	INIT_LIST_HEAD(&uplink_priv->unready_flows);
+
+	/* init shared tc flow table */
+	err = mlx5e_tc_esw_init(&uplink_priv->tc_ht);
+	return err;
+}
+
+void mlx5e_rep_tc_cleanup(struct mlx5e_rep_priv *rpriv)
+{
+	/* delete shared tc flow table */
+	mlx5e_tc_esw_cleanup(&rpriv->uplink_priv.tc_ht);
+	mutex_destroy(&rpriv->uplink_priv.unready_flows_lock);
+}
+
+void mlx5e_rep_tc_enable(struct mlx5e_priv *priv)
+{
+	struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+	INIT_WORK(&rpriv->uplink_priv.reoffload_flows_work,
+		  mlx5e_tc_reoffload_flows_work);
+}
+
+void mlx5e_rep_tc_disable(struct mlx5e_priv *priv)
+{
+	struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+	cancel_work_sync(&rpriv->uplink_priv.reoffload_flows_work);
+}
+
+int mlx5e_rep_tc_event_port_affinity(struct mlx5e_priv *priv)
+{
+	struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+	queue_work(priv->wq, &rpriv->uplink_priv.reoffload_flows_work);
+
+	return NOTIFY_OK;
+}
+
+static struct mlx5e_rep_indr_block_priv *
+mlx5e_rep_indr_block_priv_lookup(struct mlx5e_rep_priv *rpriv,
+				 struct net_device *netdev)
+{
+	struct mlx5e_rep_indr_block_priv *cb_priv;
+
+	/* All callback list access should be protected by RTNL. */
+	ASSERT_RTNL();
+
+	list_for_each_entry(cb_priv,
+			    &rpriv->uplink_priv.tc_indr_block_priv_list,
+			    list)
+		if (cb_priv->netdev == netdev)
+			return cb_priv;
+
+	return NULL;
+}
+
+static void mlx5e_rep_indr_unregister_block(struct mlx5e_rep_priv *rpriv,
+					    struct net_device *netdev);
+
+void mlx5e_rep_indr_clean_block_privs(struct mlx5e_rep_priv *rpriv)
+{
+	struct mlx5e_rep_indr_block_priv *cb_priv, *temp;
+	struct list_head *head = &rpriv->uplink_priv.tc_indr_block_priv_list;
+
+	list_for_each_entry_safe(cb_priv, temp, head, list) {
+		mlx5e_rep_indr_unregister_block(rpriv, cb_priv->netdev);
+		kfree(cb_priv);
+	}
+}
+
+static int
+mlx5e_rep_indr_offload(struct net_device *netdev,
+		       struct flow_cls_offload *flower,
+		       struct mlx5e_rep_indr_block_priv *indr_priv,
+		       unsigned long flags)
+{
+	struct mlx5e_priv *priv = netdev_priv(indr_priv->rpriv->netdev);
+	int err = 0;
+
+	switch (flower->command) {
+	case FLOW_CLS_REPLACE:
+		err = mlx5e_configure_flower(netdev, priv, flower, flags);
+		break;
+	case FLOW_CLS_DESTROY:
+		err = mlx5e_delete_flower(netdev, priv, flower, flags);
+		break;
+	case FLOW_CLS_STATS:
+		err = mlx5e_stats_flower(netdev, priv, flower, flags);
+		break;
+	default:
+		err = -EOPNOTSUPP;
+	}
+
+	return err;
+}
+
+static int mlx5e_rep_indr_setup_tc_cb(enum tc_setup_type type,
+				      void *type_data, void *indr_priv)
+{
+	unsigned long flags = MLX5_TC_FLAG(EGRESS) | MLX5_TC_FLAG(ESW_OFFLOAD);
+	struct mlx5e_rep_indr_block_priv *priv = indr_priv;
+
+	switch (type) {
+	case TC_SETUP_CLSFLOWER:
+		return mlx5e_rep_indr_offload(priv->netdev, type_data, priv,
+					      flags);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int mlx5e_rep_indr_setup_ft_cb(enum tc_setup_type type,
+				      void *type_data, void *indr_priv)
+{
+	struct mlx5e_rep_indr_block_priv *priv = indr_priv;
+	struct flow_cls_offload *f = type_data;
+	struct flow_cls_offload tmp;
+	struct mlx5e_priv *mpriv;
+	struct mlx5_eswitch *esw;
+	unsigned long flags;
+	int err;
+
+	mpriv = netdev_priv(priv->rpriv->netdev);
+	esw = mpriv->mdev->priv.eswitch;
+
+	flags = MLX5_TC_FLAG(EGRESS) |
+		MLX5_TC_FLAG(ESW_OFFLOAD) |
+		MLX5_TC_FLAG(FT_OFFLOAD);
+
+	switch (type) {
+	case TC_SETUP_CLSFLOWER:
+		memcpy(&tmp, f, sizeof(*f));
+
+		/* Re-use tc offload path by moving the ft flow to the
+		 * reserved ft chain.
+		 *
+		 * FT offload can use prio range [0, INT_MAX], so we normalize
+		 * it to range [1, mlx5_esw_chains_get_prio_range(esw)]
+		 * as with tc, where prio 0 isn't supported.
+		 *
+		 * We only support chain 0 of FT offload.
+		 */
+		if (!mlx5_esw_chains_prios_supported(esw) ||
+		    tmp.common.prio >= mlx5_esw_chains_get_prio_range(esw) ||
+		    tmp.common.chain_index)
+			return -EOPNOTSUPP;
+
+		tmp.common.chain_index = mlx5_esw_chains_get_ft_chain(esw);
+		tmp.common.prio++;
+		err = mlx5e_rep_indr_offload(priv->netdev, &tmp, priv, flags);
+		memcpy(&f->stats, &tmp.stats, sizeof(f->stats));
+		return err;
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static void mlx5e_rep_indr_block_unbind(void *cb_priv)
+{
+	struct mlx5e_rep_indr_block_priv *indr_priv = cb_priv;
+
+	list_del(&indr_priv->list);
+	kfree(indr_priv);
+}
+
+static LIST_HEAD(mlx5e_block_cb_list);
+
+static int
+mlx5e_rep_indr_setup_block(struct net_device *netdev,
+			   struct mlx5e_rep_priv *rpriv,
+			   struct flow_block_offload *f,
+			   flow_setup_cb_t *setup_cb)
+{
+	struct mlx5e_rep_indr_block_priv *indr_priv;
+	struct flow_block_cb *block_cb;
+
+	if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
+		return -EOPNOTSUPP;
+
+	f->unlocked_driver_cb = true;
+	f->driver_block_list = &mlx5e_block_cb_list;
+
+	switch (f->command) {
+	case FLOW_BLOCK_BIND:
+		indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev);
+		if (indr_priv)
+			return -EEXIST;
+
+		indr_priv = kmalloc(sizeof(*indr_priv), GFP_KERNEL);
+		if (!indr_priv)
+			return -ENOMEM;
+
+		indr_priv->netdev = netdev;
+		indr_priv->rpriv = rpriv;
+		list_add(&indr_priv->list,
+			 &rpriv->uplink_priv.tc_indr_block_priv_list);
+
+		block_cb = flow_block_cb_alloc(setup_cb, indr_priv, indr_priv,
+					       mlx5e_rep_indr_block_unbind);
+		if (IS_ERR(block_cb)) {
+			list_del(&indr_priv->list);
+			kfree(indr_priv);
+			return PTR_ERR(block_cb);
+		}
+		flow_block_cb_add(block_cb, f);
+		list_add_tail(&block_cb->driver_list, &mlx5e_block_cb_list);
+
+		return 0;
+	case FLOW_BLOCK_UNBIND:
+		indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev);
+		if (!indr_priv)
+			return -ENOENT;
+
+		block_cb = flow_block_cb_lookup(f->block, setup_cb, indr_priv);
+		if (!block_cb)
+			return -ENOENT;
+
+		flow_block_cb_remove(block_cb, f);
+		list_del(&block_cb->driver_list);
+		return 0;
+	default:
+		return -EOPNOTSUPP;
+	}
+	return 0;
+}
+
+static
+int mlx5e_rep_indr_setup_cb(struct net_device *netdev, void *cb_priv,
+			    enum tc_setup_type type, void *type_data)
+{
+	switch (type) {
+	case TC_SETUP_BLOCK:
+		return mlx5e_rep_indr_setup_block(netdev, cb_priv, type_data,
+						  mlx5e_rep_indr_setup_tc_cb);
+	case TC_SETUP_FT:
+		return mlx5e_rep_indr_setup_block(netdev, cb_priv, type_data,
+						  mlx5e_rep_indr_setup_ft_cb);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+static int mlx5e_rep_indr_register_block(struct mlx5e_rep_priv *rpriv,
+					 struct net_device *netdev)
+{
+	int err;
+
+	err = __flow_indr_block_cb_register(netdev, rpriv,
+					    mlx5e_rep_indr_setup_cb,
+					    rpriv);
+	if (err) {
+		struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+
+		mlx5_core_err(priv->mdev, "Failed to register remote block notifier for %s err=%d\n",
+			      netdev_name(netdev), err);
+	}
+	return err;
+}
+
+static void mlx5e_rep_indr_unregister_block(struct mlx5e_rep_priv *rpriv,
+					    struct net_device *netdev)
+{
+	__flow_indr_block_cb_unregister(netdev, mlx5e_rep_indr_setup_cb,
+					rpriv);
+}
+
+static int mlx5e_nic_rep_netdevice_event(struct notifier_block *nb,
+					 unsigned long event, void *ptr)
+{
+	struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv,
+						     uplink_priv.netdevice_nb);
+	struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+	struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+
+	if (!mlx5e_tc_tun_device_to_offload(priv, netdev) &&
+	    !(is_vlan_dev(netdev) && vlan_dev_real_dev(netdev) == rpriv->netdev))
+		return NOTIFY_OK;
+
+	switch (event) {
+	case NETDEV_REGISTER:
+		mlx5e_rep_indr_register_block(rpriv, netdev);
+		break;
+	case NETDEV_UNREGISTER:
+		mlx5e_rep_indr_unregister_block(rpriv, netdev);
+		break;
+	}
+	return NOTIFY_OK;
+}
+
+int mlx5e_rep_tc_netdevice_event_register(struct mlx5e_rep_priv *rpriv)
+{
+	struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
+	int err;
+
+	/* init indirect block notifications */
+	INIT_LIST_HEAD(&uplink_priv->tc_indr_block_priv_list);
+
+	uplink_priv->netdevice_nb.notifier_call = mlx5e_nic_rep_netdevice_event;
+	err = register_netdevice_notifier_dev_net(rpriv->netdev,
+						  &uplink_priv->netdevice_nb,
+						  &uplink_priv->netdevice_nn);
+	return err;
+}
+
+void mlx5e_rep_tc_netdevice_event_unregister(struct mlx5e_rep_priv *rpriv)
+{
+	struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
+
+	/* clean indirect TC block notifications */
+	unregister_netdevice_notifier_dev_net(rpriv->netdev,
+					      &uplink_priv->netdevice_nb,
+					      &uplink_priv->netdevice_nn);
+}
+
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+static bool mlx5e_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb,
+				 struct mlx5e_tc_update_priv *tc_priv,
+				 u32 tunnel_id)
+{
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	struct tunnel_match_enc_opts enc_opts = {};
+	struct mlx5_rep_uplink_priv *uplink_priv;
+	struct mlx5e_rep_priv *uplink_rpriv;
+	struct metadata_dst *tun_dst;
+	struct tunnel_match_key key;
+	u32 tun_id, enc_opts_id;
+	struct net_device *dev;
+	int err;
+
+	enc_opts_id = tunnel_id & ENC_OPTS_BITS_MASK;
+	tun_id = tunnel_id >> ENC_OPTS_BITS;
+
+	if (!tun_id)
+		return true;
+
+	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+	uplink_priv = &uplink_rpriv->uplink_priv;
+
+	err = mapping_find(uplink_priv->tunnel_mapping, tun_id, &key);
+	if (err) {
+		WARN_ON_ONCE(true);
+		netdev_dbg(priv->netdev,
+			   "Couldn't find tunnel for tun_id: %d, err: %d\n",
+			   tun_id, err);
+		return false;
+	}
+
+	if (enc_opts_id) {
+		err = mapping_find(uplink_priv->tunnel_enc_opts_mapping,
+				   enc_opts_id, &enc_opts);
+		if (err) {
+			netdev_dbg(priv->netdev,
+				   "Couldn't find tunnel (opts) for tun_id: %d, err: %d\n",
+				   enc_opts_id, err);
+			return false;
+		}
+	}
+
+	tun_dst = tun_rx_dst(enc_opts.key.len);
+	if (!tun_dst) {
+		WARN_ON_ONCE(true);
+		return false;
+	}
+
+	ip_tunnel_key_init(&tun_dst->u.tun_info.key,
+			   key.enc_ipv4.src, key.enc_ipv4.dst,
+			   key.enc_ip.tos, key.enc_ip.ttl,
+			   0, /* label */
+			   key.enc_tp.src, key.enc_tp.dst,
+			   key32_to_tunnel_id(key.enc_key_id.keyid),
+			   TUNNEL_KEY);
+
+	if (enc_opts.key.len)
+		ip_tunnel_info_opts_set(&tun_dst->u.tun_info,
+					enc_opts.key.data,
+					enc_opts.key.len,
+					enc_opts.key.dst_opt_type);
+
+	skb_dst_set(skb, (struct dst_entry *)tun_dst);
+	dev = dev_get_by_index(&init_net, key.filter_ifindex);
+	if (!dev) {
+		netdev_dbg(priv->netdev,
+			   "Couldn't find tunnel device with ifindex: %d\n",
+			   key.filter_ifindex);
+		return false;
+	}
+
+	/* Set tun_dev so we do dev_put() after datapath */
+	tc_priv->tun_dev = dev;
+
+	skb->dev = dev;
+
+	return true;
+}
+#endif /* CONFIG_NET_TC_SKB_EXT */
+
+bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
+			     struct sk_buff *skb,
+			     struct mlx5e_tc_update_priv *tc_priv)
+{
+#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
+	u32 chain = 0, reg_c0, reg_c1, tunnel_id, tuple_id;
+	struct mlx5_rep_uplink_priv *uplink_priv;
+	struct mlx5e_rep_priv *uplink_rpriv;
+	struct tc_skb_ext *tc_skb_ext;
+	struct mlx5_eswitch *esw;
+	struct mlx5e_priv *priv;
+	int tunnel_moffset;
+	int err;
+
+	reg_c0 = (be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK);
+	if (reg_c0 == MLX5_FS_DEFAULT_FLOW_TAG)
+		reg_c0 = 0;
+	reg_c1 = be32_to_cpu(cqe->ft_metadata);
+
+	if (!reg_c0)
+		return true;
+
+	priv = netdev_priv(skb->dev);
+	esw = priv->mdev->priv.eswitch;
+
+	err = mlx5_eswitch_get_chain_for_tag(esw, reg_c0, &chain);
+	if (err) {
+		netdev_dbg(priv->netdev,
+			   "Couldn't find chain for chain tag: %d, err: %d\n",
+			   reg_c0, err);
+		return false;
+	}
+
+	if (chain) {
+		tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT);
+		if (!tc_skb_ext) {
+			WARN_ON(1);
+			return false;
+		}
+
+		tc_skb_ext->chain = chain;
+
+		tuple_id = reg_c1 & TUPLE_ID_MAX;
+
+		uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+		uplink_priv = &uplink_rpriv->uplink_priv;
+		if (!mlx5e_tc_ct_restore_flow(uplink_priv, skb, tuple_id))
+			return false;
+	}
+
+	tunnel_moffset = mlx5e_tc_attr_to_reg_mappings[TUNNEL_TO_REG].moffset;
+	tunnel_id = reg_c1 >> (8 * tunnel_moffset);
+	return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id);
+#endif /* CONFIG_NET_TC_SKB_EXT */
+
+	return true;
+}
+
+void mlx5_rep_tc_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv)
+{
+	if (tc_priv->tun_dev)
+		dev_put(tc_priv->tun_dev);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h
new file mode 100644
index 000000000000..90da00626b97
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h
@@ -0,0 +1,45 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_REP_TC_H__
+#define __MLX5_EN_REP_TC_H__
+
+#include <linux/skbuff.h>
+#include "en.h"
+#include "en_tc.h"
+#include "en_rep.h"
+
+struct mlx5e_rep_priv;
+int mlx5e_rep_tc_init(struct mlx5e_rep_priv *rpriv);
+void mlx5e_rep_tc_cleanup(struct mlx5e_rep_priv *rpriv);
+
+int mlx5e_rep_tc_netdevice_event_register(struct mlx5e_rep_priv *rpriv);
+void mlx5e_rep_tc_netdevice_event_unregister(struct mlx5e_rep_priv *rpriv);
+
+void mlx5e_rep_tc_enable(struct mlx5e_priv *priv);
+void mlx5e_rep_tc_disable(struct mlx5e_priv *priv);
+
+int mlx5e_rep_tc_event_port_affinity(struct mlx5e_priv *priv);
+
+struct mlx5e_encap_entry;
+void mlx5e_rep_update_flows(struct mlx5e_priv *priv,
+			    struct mlx5e_encap_entry *e,
+			    bool neigh_connected,
+			    unsigned char ha[ETH_ALEN]);
+
+int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
+				 struct mlx5e_encap_entry *e);
+void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
+				  struct mlx5e_encap_entry *e);
+
+int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
+		       void *type_data);
+void mlx5e_rep_indr_clean_block_privs(struct mlx5e_rep_priv *rpriv);
+
+struct mlx5e_tc_update_priv;
+bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
+			     struct sk_buff *skb,
+			     struct mlx5e_tc_update_priv *tc_priv);
+void mlx5_rep_tc_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv);
+
+#endif /* __MLX5_EN_REP_TC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
index 9f50a1d3c5cc..9fdd79afa6e4 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
@@ -6,6 +6,7 @@
 #include <net/geneve.h>
 #include "en/tc_tun.h"
 #include "en_tc.h"
+#include "rep/tc.h"
 
 struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev)
 {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 52351c105627..c84f0d9b516e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -45,9 +45,8 @@
 #include "en.h"
 #include "en_rep.h"
 #include "en_tc.h"
-#include "en/tc_tun.h"
+#include "en/rep/tc.h"
 #include "fs_core.h"
-#include "lib/port_tun.h"
 #include "lib/mlx5.h"
 #define CREATE_TRACE_POINTS
 #include "diag/en_rep_tracepoint.h"
@@ -58,16 +57,6 @@
 
 static const char mlx5e_rep_driver_name[] = "mlx5e_rep";
 
-struct mlx5e_rep_indr_block_priv {
-	struct net_device *netdev;
-	struct mlx5e_rep_priv *rpriv;
-
-	struct list_head list;
-};
-
-static void mlx5e_rep_indr_unregister_block(struct mlx5e_rep_priv *rpriv,
-					    struct net_device *netdev);
-
 static void mlx5e_rep_get_drvinfo(struct net_device *dev,
 				  struct ethtool_drvinfo *drvinfo)
 {
@@ -521,7 +510,7 @@ static bool mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe)
 
 static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe);
 
-static void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe)
+void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe)
 {
 	if (refcount_dec_and_test(&nhe->refcnt)) {
 		mlx5e_rep_neigh_entry_remove(nhe);
@@ -579,48 +568,6 @@ static void mlx5e_rep_neigh_stats_work(struct work_struct *work)
 	rtnl_unlock();
 }
 
-static void mlx5e_rep_update_flows(struct mlx5e_priv *priv,
-				   struct mlx5e_encap_entry *e,
-				   bool neigh_connected,
-				   unsigned char ha[ETH_ALEN])
-{
-	struct ethhdr *eth = (struct ethhdr *)e->encap_header;
-	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-	bool encap_connected;
-	LIST_HEAD(flow_list);
-
-	ASSERT_RTNL();
-
-	/* wait for encap to be fully initialized */
-	wait_for_completion(&e->res_ready);
-
-	mutex_lock(&esw->offloads.encap_tbl_lock);
-	encap_connected = !!(e->flags & MLX5_ENCAP_ENTRY_VALID);
-	if (e->compl_result < 0 || (encap_connected == neigh_connected &&
-				    ether_addr_equal(e->h_dest, ha)))
-		goto unlock;
-
-	mlx5e_take_all_encap_flows(e, &flow_list);
-
-	if ((e->flags & MLX5_ENCAP_ENTRY_VALID) &&
-	    (!neigh_connected || !ether_addr_equal(e->h_dest, ha)))
-		mlx5e_tc_encap_flows_del(priv, e, &flow_list);
-
-	if (neigh_connected && !(e->flags & MLX5_ENCAP_ENTRY_VALID)) {
-		ether_addr_copy(e->h_dest, ha);
-		ether_addr_copy(eth->h_dest, ha);
-		/* Update the encap source mac, in case that we delete
-		 * the flows when encap source mac changed.
-		 */
-		ether_addr_copy(eth->h_source, e->route_dev->dev_addr);
-
-		mlx5e_tc_encap_flows_add(priv, e, &flow_list);
-	}
-unlock:
-	mutex_unlock(&esw->offloads.encap_tbl_lock);
-	mlx5e_put_encap_flow_list(priv, &flow_list);
-}
-
 static void mlx5e_rep_neigh_update(struct work_struct *work)
 {
 	struct mlx5e_neigh_hash_entry *nhe =
@@ -662,254 +609,6 @@ static void mlx5e_rep_neigh_update(struct work_struct *work)
 	neigh_release(n);
 }
 
-static struct mlx5e_rep_indr_block_priv *
-mlx5e_rep_indr_block_priv_lookup(struct mlx5e_rep_priv *rpriv,
-				 struct net_device *netdev)
-{
-	struct mlx5e_rep_indr_block_priv *cb_priv;
-
-	/* All callback list access should be protected by RTNL. */
-	ASSERT_RTNL();
-
-	list_for_each_entry(cb_priv,
-			    &rpriv->uplink_priv.tc_indr_block_priv_list,
-			    list)
-		if (cb_priv->netdev == netdev)
-			return cb_priv;
-
-	return NULL;
-}
-
-static void mlx5e_rep_indr_clean_block_privs(struct mlx5e_rep_priv *rpriv)
-{
-	struct mlx5e_rep_indr_block_priv *cb_priv, *temp;
-	struct list_head *head = &rpriv->uplink_priv.tc_indr_block_priv_list;
-
-	list_for_each_entry_safe(cb_priv, temp, head, list) {
-		mlx5e_rep_indr_unregister_block(rpriv, cb_priv->netdev);
-		kfree(cb_priv);
-	}
-}
-
-static int
-mlx5e_rep_indr_offload(struct net_device *netdev,
-		       struct flow_cls_offload *flower,
-		       struct mlx5e_rep_indr_block_priv *indr_priv,
-		       unsigned long flags)
-{
-	struct mlx5e_priv *priv = netdev_priv(indr_priv->rpriv->netdev);
-	int err = 0;
-
-	switch (flower->command) {
-	case FLOW_CLS_REPLACE:
-		err = mlx5e_configure_flower(netdev, priv, flower, flags);
-		break;
-	case FLOW_CLS_DESTROY:
-		err = mlx5e_delete_flower(netdev, priv, flower, flags);
-		break;
-	case FLOW_CLS_STATS:
-		err = mlx5e_stats_flower(netdev, priv, flower, flags);
-		break;
-	default:
-		err = -EOPNOTSUPP;
-	}
-
-	return err;
-}
-
-static int mlx5e_rep_indr_setup_tc_cb(enum tc_setup_type type,
-				      void *type_data, void *indr_priv)
-{
-	unsigned long flags = MLX5_TC_FLAG(EGRESS) | MLX5_TC_FLAG(ESW_OFFLOAD);
-	struct mlx5e_rep_indr_block_priv *priv = indr_priv;
-
-	switch (type) {
-	case TC_SETUP_CLSFLOWER:
-		return mlx5e_rep_indr_offload(priv->netdev, type_data, priv,
-					      flags);
-	default:
-		return -EOPNOTSUPP;
-	}
-}
-
-static int mlx5e_rep_indr_setup_ft_cb(enum tc_setup_type type,
-				      void *type_data, void *indr_priv)
-{
-	struct mlx5e_rep_indr_block_priv *priv = indr_priv;
-	struct flow_cls_offload *f = type_data;
-	struct flow_cls_offload tmp;
-	struct mlx5e_priv *mpriv;
-	struct mlx5_eswitch *esw;
-	unsigned long flags;
-	int err;
-
-	mpriv = netdev_priv(priv->rpriv->netdev);
-	esw = mpriv->mdev->priv.eswitch;
-
-	flags = MLX5_TC_FLAG(EGRESS) |
-		MLX5_TC_FLAG(ESW_OFFLOAD) |
-		MLX5_TC_FLAG(FT_OFFLOAD);
-
-	switch (type) {
-	case TC_SETUP_CLSFLOWER:
-		memcpy(&tmp, f, sizeof(*f));
-
-		/* Re-use tc offload path by moving the ft flow to the
-		 * reserved ft chain.
-		 *
-		 * FT offload can use prio range [0, INT_MAX], so we normalize
-		 * it to range [1, mlx5_esw_chains_get_prio_range(esw)]
-		 * as with tc, where prio 0 isn't supported.
-		 *
-		 * We only support chain 0 of FT offload.
-		 */
-		if (!mlx5_esw_chains_prios_supported(esw) ||
-		    tmp.common.prio >= mlx5_esw_chains_get_prio_range(esw) ||
-		    tmp.common.chain_index)
-			return -EOPNOTSUPP;
-
-		tmp.common.chain_index = mlx5_esw_chains_get_ft_chain(esw);
-		tmp.common.prio++;
-		err = mlx5e_rep_indr_offload(priv->netdev, &tmp, priv, flags);
-		memcpy(&f->stats, &tmp.stats, sizeof(f->stats));
-		return err;
-	default:
-		return -EOPNOTSUPP;
-	}
-}
-
-static void mlx5e_rep_indr_block_unbind(void *cb_priv)
-{
-	struct mlx5e_rep_indr_block_priv *indr_priv = cb_priv;
-
-	list_del(&indr_priv->list);
-	kfree(indr_priv);
-}
-
-static LIST_HEAD(mlx5e_block_cb_list);
-
-static int
-mlx5e_rep_indr_setup_block(struct net_device *netdev,
-			   struct mlx5e_rep_priv *rpriv,
-			   struct flow_block_offload *f,
-			   flow_setup_cb_t *setup_cb)
-{
-	struct mlx5e_rep_indr_block_priv *indr_priv;
-	struct flow_block_cb *block_cb;
-
-	if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
-		return -EOPNOTSUPP;
-
-	f->unlocked_driver_cb = true;
-	f->driver_block_list = &mlx5e_block_cb_list;
-
-	switch (f->command) {
-	case FLOW_BLOCK_BIND:
-		indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev);
-		if (indr_priv)
-			return -EEXIST;
-
-		indr_priv = kmalloc(sizeof(*indr_priv), GFP_KERNEL);
-		if (!indr_priv)
-			return -ENOMEM;
-
-		indr_priv->netdev = netdev;
-		indr_priv->rpriv = rpriv;
-		list_add(&indr_priv->list,
-			 &rpriv->uplink_priv.tc_indr_block_priv_list);
-
-		block_cb = flow_block_cb_alloc(setup_cb, indr_priv, indr_priv,
-					       mlx5e_rep_indr_block_unbind);
-		if (IS_ERR(block_cb)) {
-			list_del(&indr_priv->list);
-			kfree(indr_priv);
-			return PTR_ERR(block_cb);
-		}
-		flow_block_cb_add(block_cb, f);
-		list_add_tail(&block_cb->driver_list, &mlx5e_block_cb_list);
-
-		return 0;
-	case FLOW_BLOCK_UNBIND:
-		indr_priv = mlx5e_rep_indr_block_priv_lookup(rpriv, netdev);
-		if (!indr_priv)
-			return -ENOENT;
-
-		block_cb = flow_block_cb_lookup(f->block, setup_cb, indr_priv);
-		if (!block_cb)
-			return -ENOENT;
-
-		flow_block_cb_remove(block_cb, f);
-		list_del(&block_cb->driver_list);
-		return 0;
-	default:
-		return -EOPNOTSUPP;
-	}
-	return 0;
-}
-
-static
-int mlx5e_rep_indr_setup_cb(struct net_device *netdev, void *cb_priv,
-			    enum tc_setup_type type, void *type_data)
-{
-	switch (type) {
-	case TC_SETUP_BLOCK:
-		return mlx5e_rep_indr_setup_block(netdev, cb_priv, type_data,
-						  mlx5e_rep_indr_setup_tc_cb);
-	case TC_SETUP_FT:
-		return mlx5e_rep_indr_setup_block(netdev, cb_priv, type_data,
-						  mlx5e_rep_indr_setup_ft_cb);
-	default:
-		return -EOPNOTSUPP;
-	}
-}
-
-static int mlx5e_rep_indr_register_block(struct mlx5e_rep_priv *rpriv,
-					 struct net_device *netdev)
-{
-	int err;
-
-	err = __flow_indr_block_cb_register(netdev, rpriv,
-					    mlx5e_rep_indr_setup_cb,
-					    rpriv);
-	if (err) {
-		struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
-
-		mlx5_core_err(priv->mdev, "Failed to register remote block notifier for %s err=%d\n",
-			      netdev_name(netdev), err);
-	}
-	return err;
-}
-
-static void mlx5e_rep_indr_unregister_block(struct mlx5e_rep_priv *rpriv,
-					    struct net_device *netdev)
-{
-	__flow_indr_block_cb_unregister(netdev, mlx5e_rep_indr_setup_cb,
-					rpriv);
-}
-
-static int mlx5e_nic_rep_netdevice_event(struct notifier_block *nb,
-					 unsigned long event, void *ptr)
-{
-	struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv,
-						     uplink_priv.netdevice_nb);
-	struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
-	struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
-
-	if (!mlx5e_tc_tun_device_to_offload(priv, netdev) &&
-	    !(is_vlan_dev(netdev) && vlan_dev_real_dev(netdev) == rpriv->netdev))
-		return NOTIFY_OK;
-
-	switch (event) {
-	case NETDEV_REGISTER:
-		mlx5e_rep_indr_register_block(rpriv, netdev);
-		break;
-	case NETDEV_UNREGISTER:
-		mlx5e_rep_indr_unregister_block(rpriv, netdev);
-		break;
-	}
-	return NOTIFY_OK;
-}
-
 static void
 mlx5e_rep_queue_neigh_update_work(struct mlx5e_priv *priv,
 				  struct mlx5e_neigh_hash_entry *nhe,
@@ -932,10 +631,6 @@ mlx5e_rep_queue_neigh_update_work(struct mlx5e_priv *priv,
 	}
 }
 
-static struct mlx5e_neigh_hash_entry *
-mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
-			     struct mlx5e_neigh *m_neigh);
-
 static int mlx5e_rep_netevent_event(struct notifier_block *nb,
 				    unsigned long event, void *ptr)
 {
@@ -1091,7 +786,7 @@ static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe)
 /* This function must only be called under the representor's encap_lock or
  * inside rcu read lock section.
  */
-static struct mlx5e_neigh_hash_entry *
+struct mlx5e_neigh_hash_entry *
 mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
 			     struct mlx5e_neigh *m_neigh)
 {
@@ -1104,9 +799,9 @@ mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
 	return nhe && mlx5e_rep_neigh_entry_hold(nhe) ? nhe : NULL;
 }
 
-static int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv,
-					struct mlx5e_encap_entry *e,
-					struct mlx5e_neigh_hash_entry **nhe)
+int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv,
+				 struct mlx5e_encap_entry *e,
+				 struct mlx5e_neigh_hash_entry **nhe)
 {
 	int err;
 
@@ -1131,60 +826,6 @@ out_free:
 	return err;
 }
 
-int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
-				 struct mlx5e_encap_entry *e)
-{
-	struct mlx5e_rep_priv *rpriv = priv->ppriv;
-	struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
-	struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy;
-	struct mlx5e_neigh_hash_entry *nhe;
-	int err;
-
-	err = mlx5_tun_entropy_refcount_inc(tun_entropy, e->reformat_type);
-	if (err)
-		return err;
-
-	mutex_lock(&rpriv->neigh_update.encap_lock);
-	nhe = mlx5e_rep_neigh_entry_lookup(priv, &e->m_neigh);
-	if (!nhe) {
-		err = mlx5e_rep_neigh_entry_create(priv, e, &nhe);
-		if (err) {
-			mutex_unlock(&rpriv->neigh_update.encap_lock);
-			mlx5_tun_entropy_refcount_dec(tun_entropy,
-						      e->reformat_type);
-			return err;
-		}
-	}
-
-	e->nhe = nhe;
-	spin_lock(&nhe->encap_list_lock);
-	list_add_rcu(&e->encap_list, &nhe->encap_list);
-	spin_unlock(&nhe->encap_list_lock);
-
-	mutex_unlock(&rpriv->neigh_update.encap_lock);
-
-	return 0;
-}
-
-void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
-				  struct mlx5e_encap_entry *e)
-{
-	struct mlx5e_rep_priv *rpriv = priv->ppriv;
-	struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
-	struct mlx5_tun_entropy *tun_entropy = &uplink_priv->tun_entropy;
-
-	if (!e->nhe)
-		return;
-
-	spin_lock(&e->nhe->encap_list_lock);
-	list_del_rcu(&e->encap_list);
-	spin_unlock(&e->nhe->encap_list_lock);
-
-	mlx5e_rep_neigh_entry_release(e->nhe);
-	e->nhe = NULL;
-	mlx5_tun_entropy_refcount_dec(tun_entropy, e->reformat_type);
-}
-
 static int mlx5e_rep_open(struct net_device *dev)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
@@ -1225,129 +866,6 @@ static int mlx5e_rep_close(struct net_device *dev)
 	return ret;
 }
 
-static int
-mlx5e_rep_setup_tc_cls_flower(struct mlx5e_priv *priv,
-			      struct flow_cls_offload *cls_flower, int flags)
-{
-	switch (cls_flower->command) {
-	case FLOW_CLS_REPLACE:
-		return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
-					      flags);
-	case FLOW_CLS_DESTROY:
-		return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
-					   flags);
-	case FLOW_CLS_STATS:
-		return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
-					  flags);
-	default:
-		return -EOPNOTSUPP;
-	}
-}
-
-static
-int mlx5e_rep_setup_tc_cls_matchall(struct mlx5e_priv *priv,
-				    struct tc_cls_matchall_offload *ma)
-{
-	switch (ma->command) {
-	case TC_CLSMATCHALL_REPLACE:
-		return mlx5e_tc_configure_matchall(priv, ma);
-	case TC_CLSMATCHALL_DESTROY:
-		return mlx5e_tc_delete_matchall(priv, ma);
-	case TC_CLSMATCHALL_STATS:
-		mlx5e_tc_stats_matchall(priv, ma);
-		return 0;
-	default:
-		return -EOPNOTSUPP;
-	}
-}
-
-static int mlx5e_rep_setup_tc_cb(enum tc_setup_type type, void *type_data,
-				 void *cb_priv)
-{
-	unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(ESW_OFFLOAD);
-	struct mlx5e_priv *priv = cb_priv;
-
-	switch (type) {
-	case TC_SETUP_CLSFLOWER:
-		return mlx5e_rep_setup_tc_cls_flower(priv, type_data, flags);
-	case TC_SETUP_CLSMATCHALL:
-		return mlx5e_rep_setup_tc_cls_matchall(priv, type_data);
-	default:
-		return -EOPNOTSUPP;
-	}
-}
-
-static int mlx5e_rep_setup_ft_cb(enum tc_setup_type type, void *type_data,
-				 void *cb_priv)
-{
-	struct flow_cls_offload tmp, *f = type_data;
-	struct mlx5e_priv *priv = cb_priv;
-	struct mlx5_eswitch *esw;
-	unsigned long flags;
-	int err;
-
-	flags = MLX5_TC_FLAG(INGRESS) |
-		MLX5_TC_FLAG(ESW_OFFLOAD) |
-		MLX5_TC_FLAG(FT_OFFLOAD);
-	esw = priv->mdev->priv.eswitch;
-
-	switch (type) {
-	case TC_SETUP_CLSFLOWER:
-		memcpy(&tmp, f, sizeof(*f));
-
-		if (!mlx5_esw_chains_prios_supported(esw))
-			return -EOPNOTSUPP;
-
-		/* Re-use tc offload path by moving the ft flow to the
-		 * reserved ft chain.
-		 *
-		 * FT offload can use prio range [0, INT_MAX], so we normalize
-		 * it to range [1, mlx5_esw_chains_get_prio_range(esw)]
-		 * as with tc, where prio 0 isn't supported.
-		 *
-		 * We only support chain 0 of FT offload.
-		 */
-		if (tmp.common.prio >= mlx5_esw_chains_get_prio_range(esw))
-			return -EOPNOTSUPP;
-		if (tmp.common.chain_index != 0)
-			return -EOPNOTSUPP;
-
-		tmp.common.chain_index = mlx5_esw_chains_get_ft_chain(esw);
-		tmp.common.prio++;
-		err = mlx5e_rep_setup_tc_cls_flower(priv, &tmp, flags);
-		memcpy(&f->stats, &tmp.stats, sizeof(f->stats));
-		return err;
-	default:
-		return -EOPNOTSUPP;
-	}
-}
-
-static LIST_HEAD(mlx5e_rep_block_tc_cb_list);
-static LIST_HEAD(mlx5e_rep_block_ft_cb_list);
-static int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
-			      void *type_data)
-{
-	struct mlx5e_priv *priv = netdev_priv(dev);
-	struct flow_block_offload *f = type_data;
-
-	f->unlocked_driver_cb = true;
-
-	switch (type) {
-	case TC_SETUP_BLOCK:
-		return flow_block_cb_setup_simple(type_data,
-						  &mlx5e_rep_block_tc_cb_list,
-						  mlx5e_rep_setup_tc_cb,
-						  priv, priv, true);
-	case TC_SETUP_FT:
-		return flow_block_cb_setup_simple(type_data,
-						  &mlx5e_rep_block_ft_cb_list,
-						  mlx5e_rep_setup_ft_cb,
-						  priv, priv, true);
-	default:
-		return -EOPNOTSUPP;
-	}
-}
-
 bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv)
 {
 	struct mlx5e_rep_priv *rpriv = priv->ppriv;
@@ -1791,31 +1309,23 @@ static int mlx5e_init_uplink_rep_tx(struct mlx5e_rep_priv *rpriv)
 	priv = netdev_priv(netdev);
 	uplink_priv = &rpriv->uplink_priv;
 
-	mutex_init(&uplink_priv->unready_flows_lock);
-	INIT_LIST_HEAD(&uplink_priv->unready_flows);
-
-	/* init shared tc flow table */
-	err = mlx5e_tc_esw_init(&uplink_priv->tc_ht);
+	err = mlx5e_rep_tc_init(rpriv);
 	if (err)
 		return err;
 
 	mlx5_init_port_tun_entropy(&uplink_priv->tun_entropy, priv->mdev);
 
-	/* init indirect block notifications */
-	INIT_LIST_HEAD(&uplink_priv->tc_indr_block_priv_list);
-	uplink_priv->netdevice_nb.notifier_call = mlx5e_nic_rep_netdevice_event;
-	err = register_netdevice_notifier_dev_net(rpriv->netdev,
-						  &uplink_priv->netdevice_nb,
-						  &uplink_priv->netdevice_nn);
+	err = mlx5e_rep_tc_netdevice_event_register(rpriv);
 	if (err) {
-		mlx5_core_err(priv->mdev, "Failed to register netdev notifier\n");
-		goto tc_esw_cleanup;
+		mlx5_core_err(priv->mdev, "Failed to register netdev notifier, err: %d\n",
+			      err);
+		goto tc_rep_cleanup;
 	}
 
 	return 0;
 
-tc_esw_cleanup:
-	mlx5e_tc_esw_cleanup(&uplink_priv->tc_ht);
+tc_rep_cleanup:
+	mlx5e_rep_tc_cleanup(rpriv);
 	return err;
 }
 
@@ -1845,17 +1355,10 @@ destroy_tises:
 
 static void mlx5e_cleanup_uplink_rep_tx(struct mlx5e_rep_priv *rpriv)
 {
-	struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
-
-	/* clean indirect TC block notifications */
-	unregister_netdevice_notifier_dev_net(rpriv->netdev,
-					      &uplink_priv->netdevice_nb,
-					      &uplink_priv->netdevice_nn);
+	mlx5e_rep_tc_netdevice_event_unregister(rpriv);
 	mlx5e_rep_indr_clean_block_privs(rpriv);
 
-	/* delete shared tc flow table */
-	mlx5e_tc_esw_cleanup(&rpriv->uplink_priv.tc_ht);
-	mutex_destroy(&rpriv->uplink_priv.unready_flows_lock);
+	mlx5e_rep_tc_cleanup(rpriv);
 }
 
 static void mlx5e_cleanup_rep_tx(struct mlx5e_priv *priv)
@@ -1897,13 +1400,8 @@ static int uplink_rep_async_event(struct notifier_block *nb, unsigned long event
 		return NOTIFY_OK;
 	}
 
-	if (event == MLX5_DEV_EVENT_PORT_AFFINITY) {
-		struct mlx5e_rep_priv *rpriv = priv->ppriv;
-
-		queue_work(priv->wq, &rpriv->uplink_priv.reoffload_flows_work);
-
-		return NOTIFY_OK;
-	}
+	if (event == MLX5_DEV_EVENT_PORT_AFFINITY)
+		return mlx5e_rep_tc_event_port_affinity(priv);
 
 	return NOTIFY_DONE;
 }
@@ -1912,7 +1410,6 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
 {
 	struct net_device *netdev = priv->netdev;
 	struct mlx5_core_dev *mdev = priv->mdev;
-	struct mlx5e_rep_priv *rpriv = priv->ppriv;
 	u16 max_mtu;
 
 	netdev->min_mtu = ETH_MIN_MTU;
@@ -1920,8 +1417,7 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
 	netdev->max_mtu = MLX5E_HW2SW_MTU(&priv->channels.params, max_mtu);
 	mlx5e_set_dev_port_mtu(priv);
 
-	INIT_WORK(&rpriv->uplink_priv.reoffload_flows_work,
-		  mlx5e_tc_reoffload_flows_work);
+	mlx5e_rep_tc_enable(priv);
 
 	mlx5_lag_add(mdev, netdev);
 	priv->events_nb.notifier_call = uplink_rep_async_event;
@@ -1933,11 +1429,10 @@ static void mlx5e_uplink_rep_enable(struct mlx5e_priv *priv)
 static void mlx5e_uplink_rep_disable(struct mlx5e_priv *priv)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
-	struct mlx5e_rep_priv *rpriv = priv->ppriv;
 
 	mlx5e_dcbnl_delete_app(priv);
 	mlx5_notifier_unregister(mdev, &priv->events_nb);
-	cancel_work_sync(&rpriv->uplink_priv.reoffload_flows_work);
+	mlx5e_rep_tc_disable(priv);
 	mlx5_lag_remove(mdev);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
index 6a2337900420..74d46e9a201a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -203,16 +203,19 @@ void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe);
 void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq,
 				   struct mlx5_cqe64 *cqe);
 
-int mlx5e_rep_encap_entry_attach(struct mlx5e_priv *priv,
-				 struct mlx5e_encap_entry *e);
-void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
-				  struct mlx5e_encap_entry *e);
-
 void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv);
 
 bool mlx5e_eswitch_rep(struct net_device *netdev);
 bool mlx5e_eswitch_uplink_rep(struct net_device *netdev);
 
+struct mlx5e_neigh_hash_entry *
+mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
+			     struct mlx5e_neigh *m_neigh);
+int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv,
+				 struct mlx5e_encap_entry *e,
+				 struct mlx5e_neigh_hash_entry **nhe);
+void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe);
+
 #else /* CONFIG_MLX5_ESWITCH */
 static inline bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) { return false; }
 static inline int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv) { return 0; }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index a514685fb560..1b60aeebab48 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -42,6 +42,7 @@
 #include "en_tc.h"
 #include "eswitch.h"
 #include "en_rep.h"
+#include "en/rep/tc.h"
 #include "ipoib/ipoib.h"
 #include "en_accel/ipsec_rxtx.h"
 #include "en_accel/tls_rxtx.h"
@@ -1216,12 +1217,12 @@ void mlx5e_handle_rx_cqe_rep(struct mlx5e_rq *rq, struct mlx5_cqe64 *cqe)
 	if (rep->vlan && skb_vlan_tag_present(skb))
 		skb_vlan_pop(skb);
 
-	if (!mlx5e_tc_rep_update_skb(cqe, skb, &tc_priv))
+	if (!mlx5e_rep_tc_update_skb(cqe, skb, &tc_priv))
 		goto free_wqe;
 
 	napi_gro_receive(rq->cq.napi, skb);
 
-	mlx5_tc_rep_post_napi_receive(&tc_priv);
+	mlx5_rep_tc_post_napi_receive(&tc_priv);
 
 free_wqe:
 	mlx5e_free_rx_wqe(rq, wi, true);
@@ -1272,12 +1273,12 @@ void mlx5e_handle_rx_cqe_mpwrq_rep(struct mlx5e_rq *rq,
 
 	mlx5e_complete_rx_cqe(rq, cqe, cqe_bcnt, skb);
 
-	if (!mlx5e_tc_rep_update_skb(cqe, skb, &tc_priv))
+	if (!mlx5e_rep_tc_update_skb(cqe, skb, &tc_priv))
 		goto mpwrq_cqe_out;
 
 	napi_gro_receive(rq->cq.napi, skb);
 
-	mlx5_tc_rep_post_napi_receive(&tc_priv);
+	mlx5_rep_tc_post_napi_receive(&tc_priv);
 
 mpwrq_cqe_out:
 	if (likely(wi->consumed_strides < rq->mpwqe.num_strides))
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index a050808f2128..251975ccbdf7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -49,6 +49,7 @@
 #include <net/ipv6_stubs.h>
 #include "en.h"
 #include "en_rep.h"
+#include "en/rep/tc.h"
 #include "en_tc.h"
 #include "eswitch.h"
 #include "esw/chains.h"
@@ -158,35 +159,6 @@ struct mlx5e_tc_flow_parse_attr {
 #define MLX5E_TC_TABLE_NUM_GROUPS 4
 #define MLX5E_TC_TABLE_MAX_GROUP_SIZE BIT(16)
 
-struct tunnel_match_key {
-	struct flow_dissector_key_control enc_control;
-	struct flow_dissector_key_keyid enc_key_id;
-	struct flow_dissector_key_ports enc_tp;
-	struct flow_dissector_key_ip enc_ip;
-	union {
-		struct flow_dissector_key_ipv4_addrs enc_ipv4;
-		struct flow_dissector_key_ipv6_addrs enc_ipv6;
-	};
-
-	int filter_ifindex;
-};
-
-struct tunnel_match_enc_opts {
-	struct flow_dissector_key_enc_opts key;
-	struct flow_dissector_key_enc_opts mask;
-};
-
-/* Tunnel_id mapping is TUNNEL_INFO_BITS + ENC_OPTS_BITS.
- * Upper TUNNEL_INFO_BITS for general tunnel info.
- * Lower ENC_OPTS_BITS bits for enc_opts.
- */
-#define TUNNEL_INFO_BITS 6
-#define TUNNEL_INFO_BITS_MASK GENMASK(TUNNEL_INFO_BITS - 1, 0)
-#define ENC_OPTS_BITS 2
-#define ENC_OPTS_BITS_MASK GENMASK(ENC_OPTS_BITS - 1, 0)
-#define TUNNEL_ID_BITS (TUNNEL_INFO_BITS + ENC_OPTS_BITS)
-#define TUNNEL_ID_MASK GENMASK(TUNNEL_ID_BITS - 1, 0)
-
 struct mlx5e_tc_attr_to_reg_mapping mlx5e_tc_attr_to_reg_mappings[] = {
 	[CHAIN_TO_REG] = {
 		.mfield = MLX5_ACTION_IN_FIELD_METADATA_REG_C_0,
@@ -4806,146 +4778,6 @@ void mlx5e_tc_reoffload_flows_work(struct work_struct *work)
 	mutex_unlock(&rpriv->unready_flows_lock);
 }
 
-#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
-static bool mlx5e_restore_tunnel(struct mlx5e_priv *priv, struct sk_buff *skb,
-				 struct mlx5e_tc_update_priv *tc_priv,
-				 u32 tunnel_id)
-{
-	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
-	struct tunnel_match_enc_opts enc_opts = {};
-	struct mlx5_rep_uplink_priv *uplink_priv;
-	struct mlx5e_rep_priv *uplink_rpriv;
-	struct metadata_dst *tun_dst;
-	struct tunnel_match_key key;
-	u32 tun_id, enc_opts_id;
-	struct net_device *dev;
-	int err;
-
-	enc_opts_id = tunnel_id & ENC_OPTS_BITS_MASK;
-	tun_id = tunnel_id >> ENC_OPTS_BITS;
-
-	if (!tun_id)
-		return true;
-
-	uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
-	uplink_priv = &uplink_rpriv->uplink_priv;
-
-	err = mapping_find(uplink_priv->tunnel_mapping, tun_id, &key);
-	if (err) {
-		WARN_ON_ONCE(true);
-		netdev_dbg(priv->netdev,
-			   "Couldn't find tunnel for tun_id: %d, err: %d\n",
-			   tun_id, err);
-		return false;
-	}
-
-	if (enc_opts_id) {
-		err = mapping_find(uplink_priv->tunnel_enc_opts_mapping,
-				   enc_opts_id, &enc_opts);
-		if (err) {
-			netdev_dbg(priv->netdev,
-				   "Couldn't find tunnel (opts) for tun_id: %d, err: %d\n",
-				   enc_opts_id, err);
-			return false;
-		}
-	}
-
-	tun_dst = tun_rx_dst(enc_opts.key.len);
-	if (!tun_dst) {
-		WARN_ON_ONCE(true);
-		return false;
-	}
-
-	ip_tunnel_key_init(&tun_dst->u.tun_info.key,
-			   key.enc_ipv4.src, key.enc_ipv4.dst,
-			   key.enc_ip.tos, key.enc_ip.ttl,
-			   0, /* label */
-			   key.enc_tp.src, key.enc_tp.dst,
-			   key32_to_tunnel_id(key.enc_key_id.keyid),
-			   TUNNEL_KEY);
-
-	if (enc_opts.key.len)
-		ip_tunnel_info_opts_set(&tun_dst->u.tun_info,
-					enc_opts.key.data,
-					enc_opts.key.len,
-					enc_opts.key.dst_opt_type);
-
-	skb_dst_set(skb, (struct dst_entry *)tun_dst);
-	dev = dev_get_by_index(&init_net, key.filter_ifindex);
-	if (!dev) {
-		netdev_dbg(priv->netdev,
-			   "Couldn't find tunnel device with ifindex: %d\n",
-			   key.filter_ifindex);
-		return false;
-	}
-
-	/* Set tun_dev so we do dev_put() after datapath */
-	tc_priv->tun_dev = dev;
-
-	skb->dev = dev;
-
-	return true;
-}
-#endif /* CONFIG_NET_TC_SKB_EXT */
-
-bool mlx5e_tc_rep_update_skb(struct mlx5_cqe64 *cqe,
-			     struct sk_buff *skb,
-			     struct mlx5e_tc_update_priv *tc_priv)
-{
-#if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
-	u32 chain = 0, reg_c0, reg_c1, tunnel_id, tuple_id;
-	struct mlx5_rep_uplink_priv *uplink_priv;
-	struct mlx5e_rep_priv *uplink_rpriv;
-	struct tc_skb_ext *tc_skb_ext;
-	struct mlx5_eswitch *esw;
-	struct mlx5e_priv *priv;
-	int tunnel_moffset;
-	int err;
-
-	reg_c0 = (be32_to_cpu(cqe->sop_drop_qpn) & MLX5E_TC_FLOW_ID_MASK);
-	if (reg_c0 == MLX5_FS_DEFAULT_FLOW_TAG)
-		reg_c0 = 0;
-	reg_c1 = be32_to_cpu(cqe->ft_metadata);
-
-	if (!reg_c0)
-		return true;
-
-	priv = netdev_priv(skb->dev);
-	esw = priv->mdev->priv.eswitch;
-
-	err = mlx5_eswitch_get_chain_for_tag(esw, reg_c0, &chain);
-	if (err) {
-		netdev_dbg(priv->netdev,
-			   "Couldn't find chain for chain tag: %d, err: %d\n",
-			   reg_c0, err);
-		return false;
-	}
-
-	if (chain) {
-		tc_skb_ext = skb_ext_add(skb, TC_SKB_EXT);
-		if (!tc_skb_ext) {
-			WARN_ON(1);
-			return false;
-		}
-
-		tc_skb_ext->chain = chain;
-
-		tuple_id = reg_c1 & TUPLE_ID_MAX;
-
-		uplink_rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
-		uplink_priv = &uplink_rpriv->uplink_priv;
-		if (!mlx5e_tc_ct_restore_flow(uplink_priv, skb, tuple_id))
-			return false;
-	}
-
-	tunnel_moffset = mlx5e_tc_attr_to_reg_mappings[TUNNEL_TO_REG].moffset;
-	tunnel_id = reg_c1 >> (8 * tunnel_moffset);
-	return mlx5e_restore_tunnel(priv, skb, tc_priv, tunnel_id);
-#endif /* CONFIG_NET_TC_SKB_EXT */
-
-	return true;
-}
-
 void mlx5_tc_rep_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv)
 {
 	if (tc_priv->tun_dev)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
index abdcfa4c4e0e..1d8d85b842fe 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -34,11 +34,41 @@
 #define __MLX5_EN_TC_H__
 
 #include <net/pkt_cls.h>
+#include "en.h"
 
 #define MLX5E_TC_FLOW_ID_MASK 0x0000ffff
 
 #ifdef CONFIG_MLX5_ESWITCH
 
+struct tunnel_match_key {
+	struct flow_dissector_key_control enc_control;
+	struct flow_dissector_key_keyid enc_key_id;
+	struct flow_dissector_key_ports enc_tp;
+	struct flow_dissector_key_ip enc_ip;
+	union {
+		struct flow_dissector_key_ipv4_addrs enc_ipv4;
+		struct flow_dissector_key_ipv6_addrs enc_ipv6;
+	};
+
+	int filter_ifindex;
+};
+
+struct tunnel_match_enc_opts {
+	struct flow_dissector_key_enc_opts key;
+	struct flow_dissector_key_enc_opts mask;
+};
+
+/* Tunnel_id mapping is TUNNEL_INFO_BITS + ENC_OPTS_BITS.
+ * Upper TUNNEL_INFO_BITS for general tunnel info.
+ * Lower ENC_OPTS_BITS bits for enc_opts.
+ */
+#define TUNNEL_INFO_BITS 6
+#define TUNNEL_INFO_BITS_MASK GENMASK(TUNNEL_INFO_BITS - 1, 0)
+#define ENC_OPTS_BITS 2
+#define ENC_OPTS_BITS_MASK GENMASK(ENC_OPTS_BITS - 1, 0)
+#define TUNNEL_ID_BITS (TUNNEL_INFO_BITS + ENC_OPTS_BITS)
+#define TUNNEL_ID_MASK GENMASK(TUNNEL_ID_BITS - 1, 0)
+
 enum {
 	MLX5E_TC_FLAG_INGRESS_BIT,
 	MLX5E_TC_FLAG_EGRESS_BIT,
@@ -119,11 +149,6 @@ struct mlx5e_tc_update_priv {
 	struct net_device *tun_dev;
 };
 
-bool mlx5e_tc_rep_update_skb(struct mlx5_cqe64 *cqe, struct sk_buff *skb,
-			     struct mlx5e_tc_update_priv *tc_priv);
-
-void mlx5_tc_rep_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv);
-
 struct mlx5e_tc_mod_hdr_acts {
 	int num_actions;
 	int max_actions;
-- 
cgit v1.2.3-59-g8ed1b


From 549c243e4e010067a075e248f4d72e8dda844e12 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Tue, 12 May 2020 17:29:22 +0300
Subject: net/mlx5e: Extract neigh-specific code from en_rep.c to rep/neigh.c

As a preparation for introducing new kconfig option that controls
compilation of all TC offloads code in mlx5, extract neigh-specific code
from en_rep.c to standalone file. This allows easily compiling out the code
by only including new source in make file when corresponding kconfig is
enabled instead of adding multiple ifdef blocks to en_rep.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   2 +-
 .../net/ethernet/mellanox/mlx5/core/en/rep/neigh.c | 368 +++++++++++++++++++++
 .../net/ethernet/mellanox/mlx5/core/en/rep/neigh.h |  23 ++
 .../net/ethernet/mellanox/mlx5/core/en/rep/tc.c    |   1 +
 .../net/ethernet/mellanox/mlx5/core/en/tc_tun.c    |   1 +
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c   | 354 +-------------------
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.h   |   8 -
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c    |   6 -
 8 files changed, 395 insertions(+), 368 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index c21453970dbb..3c9d78e6695c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -33,7 +33,7 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
 mlx5_core-$(CONFIG_MLX5_EN_ARFS)     += en_arfs.o
 mlx5_core-$(CONFIG_MLX5_EN_RXNFC)    += en_fs_ethtool.o
 mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o
-mlx5_core-$(CONFIG_MLX5_ESWITCH)     += en_rep.o en_tc.o en/rep/tc.o en/tc_tun.o lib/port_tun.o \
+mlx5_core-$(CONFIG_MLX5_ESWITCH)     += en_rep.o en_tc.o en/rep/tc.o en/rep/neigh.o en/tc_tun.o lib/port_tun.o \
 					lag_mp.o \
 					lib/geneve.o en/mapping.o en/tc_tun_vxlan.o en/tc_tun_gre.o \
 					en/tc_tun_geneve.o diag/en_tc_tracepoint.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c
new file mode 100644
index 000000000000..baa162432e75
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.c
@@ -0,0 +1,368 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies. */
+
+#include <linux/refcount.h>
+#include <linux/list.h>
+#include <linux/rculist.h>
+#include <linux/rtnetlink.h>
+#include <linux/workqueue.h>
+#include <linux/rwlock.h>
+#include <linux/spinlock.h>
+#include <linux/notifier.h>
+#include <net/netevent.h>
+#include "neigh.h"
+#include "tc.h"
+#include "en_rep.h"
+#include "fs_core.h"
+#include "diag/en_rep_tracepoint.h"
+
+static unsigned long mlx5e_rep_ipv6_interval(void)
+{
+	if (IS_ENABLED(CONFIG_IPV6) && ipv6_stub->nd_tbl)
+		return NEIGH_VAR(&ipv6_stub->nd_tbl->parms, DELAY_PROBE_TIME);
+
+	return ~0UL;
+}
+
+static void mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv *rpriv)
+{
+	unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
+	unsigned long ipv6_interval = mlx5e_rep_ipv6_interval();
+	struct net_device *netdev = rpriv->netdev;
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+
+	rpriv->neigh_update.min_interval = min_t(unsigned long, ipv6_interval, ipv4_interval);
+	mlx5_fc_update_sampling_interval(priv->mdev, rpriv->neigh_update.min_interval);
+}
+
+void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv)
+{
+	struct mlx5e_rep_priv *rpriv = priv->ppriv;
+	struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+
+	mlx5_fc_queue_stats_work(priv->mdev,
+				 &neigh_update->neigh_stats_work,
+				 neigh_update->min_interval);
+}
+
+static bool mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe)
+{
+	return refcount_inc_not_zero(&nhe->refcnt);
+}
+
+static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe);
+
+void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe)
+{
+	if (refcount_dec_and_test(&nhe->refcnt)) {
+		mlx5e_rep_neigh_entry_remove(nhe);
+		kfree_rcu(nhe, rcu);
+	}
+}
+
+static struct mlx5e_neigh_hash_entry *
+mlx5e_get_next_nhe(struct mlx5e_rep_priv *rpriv,
+		   struct mlx5e_neigh_hash_entry *nhe)
+{
+	struct mlx5e_neigh_hash_entry *next = NULL;
+
+	rcu_read_lock();
+
+	for (next = nhe ?
+		     list_next_or_null_rcu(&rpriv->neigh_update.neigh_list,
+					   &nhe->neigh_list,
+					   struct mlx5e_neigh_hash_entry,
+					   neigh_list) :
+		     list_first_or_null_rcu(&rpriv->neigh_update.neigh_list,
+					    struct mlx5e_neigh_hash_entry,
+					    neigh_list);
+	     next;
+	     next = list_next_or_null_rcu(&rpriv->neigh_update.neigh_list,
+					  &next->neigh_list,
+					  struct mlx5e_neigh_hash_entry,
+					  neigh_list))
+		if (mlx5e_rep_neigh_entry_hold(next))
+			break;
+
+	rcu_read_unlock();
+
+	if (nhe)
+		mlx5e_rep_neigh_entry_release(nhe);
+
+	return next;
+}
+
+static void mlx5e_rep_neigh_stats_work(struct work_struct *work)
+{
+	struct mlx5e_rep_priv *rpriv = container_of(work, struct mlx5e_rep_priv,
+						    neigh_update.neigh_stats_work.work);
+	struct net_device *netdev = rpriv->netdev;
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5e_neigh_hash_entry *nhe = NULL;
+
+	rtnl_lock();
+	if (!list_empty(&rpriv->neigh_update.neigh_list))
+		mlx5e_rep_queue_neigh_stats_work(priv);
+
+	while ((nhe = mlx5e_get_next_nhe(rpriv, nhe)) != NULL)
+		mlx5e_tc_update_neigh_used_value(nhe);
+
+	rtnl_unlock();
+}
+
+static void mlx5e_rep_neigh_update(struct work_struct *work)
+{
+	struct mlx5e_neigh_hash_entry *nhe =
+		container_of(work, struct mlx5e_neigh_hash_entry, neigh_update_work);
+	struct neighbour *n = nhe->n;
+	struct mlx5e_encap_entry *e;
+	unsigned char ha[ETH_ALEN];
+	struct mlx5e_priv *priv;
+	bool neigh_connected;
+	u8 nud_state, dead;
+
+	rtnl_lock();
+
+	/* If these parameters are changed after we release the lock,
+	 * we'll receive another event letting us know about it.
+	 * We use this lock to avoid inconsistency between the neigh validity
+	 * and it's hw address.
+	 */
+	read_lock_bh(&n->lock);
+	memcpy(ha, n->ha, ETH_ALEN);
+	nud_state = n->nud_state;
+	dead = n->dead;
+	read_unlock_bh(&n->lock);
+
+	neigh_connected = (nud_state & NUD_VALID) && !dead;
+
+	trace_mlx5e_rep_neigh_update(nhe, ha, neigh_connected);
+
+	list_for_each_entry(e, &nhe->encap_list, encap_list) {
+		if (!mlx5e_encap_take(e))
+			continue;
+
+		priv = netdev_priv(e->out_dev);
+		mlx5e_rep_update_flows(priv, e, neigh_connected, ha);
+		mlx5e_encap_put(priv, e);
+	}
+	mlx5e_rep_neigh_entry_release(nhe);
+	rtnl_unlock();
+	neigh_release(n);
+}
+
+static void mlx5e_rep_queue_neigh_update_work(struct mlx5e_priv *priv,
+					      struct mlx5e_neigh_hash_entry *nhe,
+					      struct neighbour *n)
+{
+	/* Take a reference to ensure the neighbour and mlx5 encap
+	 * entry won't be destructed until we drop the reference in
+	 * delayed work.
+	 */
+	neigh_hold(n);
+
+	/* This assignment is valid as long as the the neigh reference
+	 * is taken
+	 */
+	nhe->n = n;
+
+	if (!queue_work(priv->wq, &nhe->neigh_update_work)) {
+		mlx5e_rep_neigh_entry_release(nhe);
+		neigh_release(n);
+	}
+}
+
+static int mlx5e_rep_netevent_event(struct notifier_block *nb,
+				    unsigned long event, void *ptr)
+{
+	struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv,
+						    neigh_update.netevent_nb);
+	struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+	struct net_device *netdev = rpriv->netdev;
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5e_neigh_hash_entry *nhe = NULL;
+	struct mlx5e_neigh m_neigh = {};
+	struct neigh_parms *p;
+	struct neighbour *n;
+	bool found = false;
+
+	switch (event) {
+	case NETEVENT_NEIGH_UPDATE:
+		n = ptr;
+#if IS_ENABLED(CONFIG_IPV6)
+		if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl)
+#else
+		if (n->tbl != &arp_tbl)
+#endif
+			return NOTIFY_DONE;
+
+		m_neigh.dev = n->dev;
+		m_neigh.family = n->ops->family;
+		memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
+
+		rcu_read_lock();
+		nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh);
+		rcu_read_unlock();
+		if (!nhe)
+			return NOTIFY_DONE;
+
+		mlx5e_rep_queue_neigh_update_work(priv, nhe, n);
+		break;
+
+	case NETEVENT_DELAY_PROBE_TIME_UPDATE:
+		p = ptr;
+
+		/* We check the device is present since we don't care about
+		 * changes in the default table, we only care about changes
+		 * done per device delay prob time parameter.
+		 */
+#if IS_ENABLED(CONFIG_IPV6)
+		if (!p->dev || (p->tbl != ipv6_stub->nd_tbl && p->tbl != &arp_tbl))
+#else
+		if (!p->dev || p->tbl != &arp_tbl)
+#endif
+			return NOTIFY_DONE;
+
+		rcu_read_lock();
+		list_for_each_entry_rcu(nhe, &neigh_update->neigh_list,
+					neigh_list) {
+			if (p->dev == nhe->m_neigh.dev) {
+				found = true;
+				break;
+			}
+		}
+		rcu_read_unlock();
+		if (!found)
+			return NOTIFY_DONE;
+
+		neigh_update->min_interval = min_t(unsigned long,
+						   NEIGH_VAR(p, DELAY_PROBE_TIME),
+						   neigh_update->min_interval);
+		mlx5_fc_update_sampling_interval(priv->mdev,
+						 neigh_update->min_interval);
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+static const struct rhashtable_params mlx5e_neigh_ht_params = {
+	.head_offset = offsetof(struct mlx5e_neigh_hash_entry, rhash_node),
+	.key_offset = offsetof(struct mlx5e_neigh_hash_entry, m_neigh),
+	.key_len = sizeof(struct mlx5e_neigh),
+	.automatic_shrinking = true,
+};
+
+int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv)
+{
+	struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+	int err;
+
+	err = rhashtable_init(&neigh_update->neigh_ht, &mlx5e_neigh_ht_params);
+	if (err)
+		return err;
+
+	INIT_LIST_HEAD(&neigh_update->neigh_list);
+	mutex_init(&neigh_update->encap_lock);
+	INIT_DELAYED_WORK(&neigh_update->neigh_stats_work,
+			  mlx5e_rep_neigh_stats_work);
+	mlx5e_rep_neigh_update_init_interval(rpriv);
+
+	rpriv->neigh_update.netevent_nb.notifier_call = mlx5e_rep_netevent_event;
+	err = register_netevent_notifier(&rpriv->neigh_update.netevent_nb);
+	if (err)
+		goto out_err;
+	return 0;
+
+out_err:
+	rhashtable_destroy(&neigh_update->neigh_ht);
+	return err;
+}
+
+void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv)
+{
+	struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+	struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+
+	unregister_netevent_notifier(&neigh_update->netevent_nb);
+
+	flush_workqueue(priv->wq); /* flush neigh update works */
+
+	cancel_delayed_work_sync(&rpriv->neigh_update.neigh_stats_work);
+
+	mutex_destroy(&neigh_update->encap_lock);
+	rhashtable_destroy(&neigh_update->neigh_ht);
+}
+
+static int mlx5e_rep_neigh_entry_insert(struct mlx5e_priv *priv,
+					struct mlx5e_neigh_hash_entry *nhe)
+{
+	struct mlx5e_rep_priv *rpriv = priv->ppriv;
+	int err;
+
+	err = rhashtable_insert_fast(&rpriv->neigh_update.neigh_ht,
+				     &nhe->rhash_node,
+				     mlx5e_neigh_ht_params);
+	if (err)
+		return err;
+
+	list_add_rcu(&nhe->neigh_list, &rpriv->neigh_update.neigh_list);
+
+	return err;
+}
+
+static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe)
+{
+	struct mlx5e_rep_priv *rpriv = nhe->priv->ppriv;
+
+	mutex_lock(&rpriv->neigh_update.encap_lock);
+
+	list_del_rcu(&nhe->neigh_list);
+
+	rhashtable_remove_fast(&rpriv->neigh_update.neigh_ht,
+			       &nhe->rhash_node,
+			       mlx5e_neigh_ht_params);
+	mutex_unlock(&rpriv->neigh_update.encap_lock);
+}
+
+/* This function must only be called under the representor's encap_lock or
+ * inside rcu read lock section.
+ */
+struct mlx5e_neigh_hash_entry *
+mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
+			     struct mlx5e_neigh *m_neigh)
+{
+	struct mlx5e_rep_priv *rpriv = priv->ppriv;
+	struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
+	struct mlx5e_neigh_hash_entry *nhe;
+
+	nhe = rhashtable_lookup_fast(&neigh_update->neigh_ht, m_neigh,
+				     mlx5e_neigh_ht_params);
+	return nhe && mlx5e_rep_neigh_entry_hold(nhe) ? nhe : NULL;
+}
+
+int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv,
+				 struct mlx5e_encap_entry *e,
+				 struct mlx5e_neigh_hash_entry **nhe)
+{
+	int err;
+
+	*nhe = kzalloc(sizeof(**nhe), GFP_KERNEL);
+	if (!*nhe)
+		return -ENOMEM;
+
+	(*nhe)->priv = priv;
+	memcpy(&(*nhe)->m_neigh, &e->m_neigh, sizeof(e->m_neigh));
+	INIT_WORK(&(*nhe)->neigh_update_work, mlx5e_rep_neigh_update);
+	spin_lock_init(&(*nhe)->encap_list_lock);
+	INIT_LIST_HEAD(&(*nhe)->encap_list);
+	refcount_set(&(*nhe)->refcnt, 1);
+
+	err = mlx5e_rep_neigh_entry_insert(priv, *nhe);
+	if (err)
+		goto out_free;
+	return 0;
+
+out_free:
+	kfree(*nhe);
+	return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h
new file mode 100644
index 000000000000..8eddb3ac0d74
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h
@@ -0,0 +1,23 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies. */
+
+#ifndef __MLX5_EN_REP_NEIGH__
+#define __MLX5_EN_REP_NEIGH__
+
+#include "en.h"
+#include "en_rep.h"
+
+int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv);
+void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv);
+
+struct mlx5e_neigh_hash_entry *
+mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
+			     struct mlx5e_neigh *m_neigh);
+int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv,
+				 struct mlx5e_encap_entry *e,
+				 struct mlx5e_neigh_hash_entry **nhe);
+void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe);
+
+void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv);
+
+#endif /* __MLX5_EN_REP_NEIGH__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
index edc574582135..c609a5e50ebc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
@@ -9,6 +9,7 @@
 #include <linux/workqueue.h>
 #include <linux/spinlock.h>
 #include "tc.h"
+#include "neigh.h"
 #include "en_rep.h"
 #include "eswitch.h"
 #include "esw/chains.h"
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
index 9fdd79afa6e4..9be1fcc269b2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
@@ -7,6 +7,7 @@
 #include "en/tc_tun.h"
 #include "en_tc.h"
 #include "rep/tc.h"
+#include "rep/neigh.h"
 
 struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev)
 {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index c84f0d9b516e..a46405c6d560 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -35,7 +35,6 @@
 #include <net/switchdev.h>
 #include <net/pkt_cls.h>
 #include <net/act_api.h>
-#include <net/netevent.h>
 #include <net/arp.h>
 #include <net/devlink.h>
 #include <net/ipv6_stubs.h>
@@ -46,6 +45,7 @@
 #include "en_rep.h"
 #include "en_tc.h"
 #include "en/rep/tc.h"
+#include "en/rep/neigh.h"
 #include "fs_core.h"
 #include "lib/mlx5.h"
 #define CREATE_TRACE_POINTS
@@ -474,358 +474,6 @@ void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv)
 	mlx5e_sqs2vport_stop(esw, rep);
 }
 
-static unsigned long mlx5e_rep_ipv6_interval(void)
-{
-	if (IS_ENABLED(CONFIG_IPV6) && ipv6_stub->nd_tbl)
-		return NEIGH_VAR(&ipv6_stub->nd_tbl->parms, DELAY_PROBE_TIME);
-
-	return ~0UL;
-}
-
-static void mlx5e_rep_neigh_update_init_interval(struct mlx5e_rep_priv *rpriv)
-{
-	unsigned long ipv4_interval = NEIGH_VAR(&arp_tbl.parms, DELAY_PROBE_TIME);
-	unsigned long ipv6_interval = mlx5e_rep_ipv6_interval();
-	struct net_device *netdev = rpriv->netdev;
-	struct mlx5e_priv *priv = netdev_priv(netdev);
-
-	rpriv->neigh_update.min_interval = min_t(unsigned long, ipv6_interval, ipv4_interval);
-	mlx5_fc_update_sampling_interval(priv->mdev, rpriv->neigh_update.min_interval);
-}
-
-void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv)
-{
-	struct mlx5e_rep_priv *rpriv = priv->ppriv;
-	struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
-
-	mlx5_fc_queue_stats_work(priv->mdev,
-				 &neigh_update->neigh_stats_work,
-				 neigh_update->min_interval);
-}
-
-static bool mlx5e_rep_neigh_entry_hold(struct mlx5e_neigh_hash_entry *nhe)
-{
-	return refcount_inc_not_zero(&nhe->refcnt);
-}
-
-static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe);
-
-void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe)
-{
-	if (refcount_dec_and_test(&nhe->refcnt)) {
-		mlx5e_rep_neigh_entry_remove(nhe);
-		kfree_rcu(nhe, rcu);
-	}
-}
-
-static struct mlx5e_neigh_hash_entry *
-mlx5e_get_next_nhe(struct mlx5e_rep_priv *rpriv,
-		   struct mlx5e_neigh_hash_entry *nhe)
-{
-	struct mlx5e_neigh_hash_entry *next = NULL;
-
-	rcu_read_lock();
-
-	for (next = nhe ?
-		     list_next_or_null_rcu(&rpriv->neigh_update.neigh_list,
-					   &nhe->neigh_list,
-					   struct mlx5e_neigh_hash_entry,
-					   neigh_list) :
-		     list_first_or_null_rcu(&rpriv->neigh_update.neigh_list,
-					    struct mlx5e_neigh_hash_entry,
-					    neigh_list);
-	     next;
-	     next = list_next_or_null_rcu(&rpriv->neigh_update.neigh_list,
-					  &next->neigh_list,
-					  struct mlx5e_neigh_hash_entry,
-					  neigh_list))
-		if (mlx5e_rep_neigh_entry_hold(next))
-			break;
-
-	rcu_read_unlock();
-
-	if (nhe)
-		mlx5e_rep_neigh_entry_release(nhe);
-
-	return next;
-}
-
-static void mlx5e_rep_neigh_stats_work(struct work_struct *work)
-{
-	struct mlx5e_rep_priv *rpriv = container_of(work, struct mlx5e_rep_priv,
-						    neigh_update.neigh_stats_work.work);
-	struct net_device *netdev = rpriv->netdev;
-	struct mlx5e_priv *priv = netdev_priv(netdev);
-	struct mlx5e_neigh_hash_entry *nhe = NULL;
-
-	rtnl_lock();
-	if (!list_empty(&rpriv->neigh_update.neigh_list))
-		mlx5e_rep_queue_neigh_stats_work(priv);
-
-	while ((nhe = mlx5e_get_next_nhe(rpriv, nhe)) != NULL)
-		mlx5e_tc_update_neigh_used_value(nhe);
-
-	rtnl_unlock();
-}
-
-static void mlx5e_rep_neigh_update(struct work_struct *work)
-{
-	struct mlx5e_neigh_hash_entry *nhe =
-		container_of(work, struct mlx5e_neigh_hash_entry, neigh_update_work);
-	struct neighbour *n = nhe->n;
-	struct mlx5e_encap_entry *e;
-	unsigned char ha[ETH_ALEN];
-	struct mlx5e_priv *priv;
-	bool neigh_connected;
-	u8 nud_state, dead;
-
-	rtnl_lock();
-
-	/* If these parameters are changed after we release the lock,
-	 * we'll receive another event letting us know about it.
-	 * We use this lock to avoid inconsistency between the neigh validity
-	 * and it's hw address.
-	 */
-	read_lock_bh(&n->lock);
-	memcpy(ha, n->ha, ETH_ALEN);
-	nud_state = n->nud_state;
-	dead = n->dead;
-	read_unlock_bh(&n->lock);
-
-	neigh_connected = (nud_state & NUD_VALID) && !dead;
-
-	trace_mlx5e_rep_neigh_update(nhe, ha, neigh_connected);
-
-	list_for_each_entry(e, &nhe->encap_list, encap_list) {
-		if (!mlx5e_encap_take(e))
-			continue;
-
-		priv = netdev_priv(e->out_dev);
-		mlx5e_rep_update_flows(priv, e, neigh_connected, ha);
-		mlx5e_encap_put(priv, e);
-	}
-	mlx5e_rep_neigh_entry_release(nhe);
-	rtnl_unlock();
-	neigh_release(n);
-}
-
-static void
-mlx5e_rep_queue_neigh_update_work(struct mlx5e_priv *priv,
-				  struct mlx5e_neigh_hash_entry *nhe,
-				  struct neighbour *n)
-{
-	/* Take a reference to ensure the neighbour and mlx5 encap
-	 * entry won't be destructed until we drop the reference in
-	 * delayed work.
-	 */
-	neigh_hold(n);
-
-	/* This assignment is valid as long as the the neigh reference
-	 * is taken
-	 */
-	nhe->n = n;
-
-	if (!queue_work(priv->wq, &nhe->neigh_update_work)) {
-		mlx5e_rep_neigh_entry_release(nhe);
-		neigh_release(n);
-	}
-}
-
-static int mlx5e_rep_netevent_event(struct notifier_block *nb,
-				    unsigned long event, void *ptr)
-{
-	struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv,
-						    neigh_update.netevent_nb);
-	struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
-	struct net_device *netdev = rpriv->netdev;
-	struct mlx5e_priv *priv = netdev_priv(netdev);
-	struct mlx5e_neigh_hash_entry *nhe = NULL;
-	struct mlx5e_neigh m_neigh = {};
-	struct neigh_parms *p;
-	struct neighbour *n;
-	bool found = false;
-
-	switch (event) {
-	case NETEVENT_NEIGH_UPDATE:
-		n = ptr;
-#if IS_ENABLED(CONFIG_IPV6)
-		if (n->tbl != ipv6_stub->nd_tbl && n->tbl != &arp_tbl)
-#else
-		if (n->tbl != &arp_tbl)
-#endif
-			return NOTIFY_DONE;
-
-		m_neigh.dev = n->dev;
-		m_neigh.family = n->ops->family;
-		memcpy(&m_neigh.dst_ip, n->primary_key, n->tbl->key_len);
-
-		rcu_read_lock();
-		nhe = mlx5e_rep_neigh_entry_lookup(priv, &m_neigh);
-		rcu_read_unlock();
-		if (!nhe)
-			return NOTIFY_DONE;
-
-		mlx5e_rep_queue_neigh_update_work(priv, nhe, n);
-		break;
-
-	case NETEVENT_DELAY_PROBE_TIME_UPDATE:
-		p = ptr;
-
-		/* We check the device is present since we don't care about
-		 * changes in the default table, we only care about changes
-		 * done per device delay prob time parameter.
-		 */
-#if IS_ENABLED(CONFIG_IPV6)
-		if (!p->dev || (p->tbl != ipv6_stub->nd_tbl && p->tbl != &arp_tbl))
-#else
-		if (!p->dev || p->tbl != &arp_tbl)
-#endif
-			return NOTIFY_DONE;
-
-		rcu_read_lock();
-		list_for_each_entry_rcu(nhe, &neigh_update->neigh_list,
-					neigh_list) {
-			if (p->dev == nhe->m_neigh.dev) {
-				found = true;
-				break;
-			}
-		}
-		rcu_read_unlock();
-		if (!found)
-			return NOTIFY_DONE;
-
-		neigh_update->min_interval = min_t(unsigned long,
-						   NEIGH_VAR(p, DELAY_PROBE_TIME),
-						   neigh_update->min_interval);
-		mlx5_fc_update_sampling_interval(priv->mdev,
-						 neigh_update->min_interval);
-		break;
-	}
-	return NOTIFY_DONE;
-}
-
-static const struct rhashtable_params mlx5e_neigh_ht_params = {
-	.head_offset = offsetof(struct mlx5e_neigh_hash_entry, rhash_node),
-	.key_offset = offsetof(struct mlx5e_neigh_hash_entry, m_neigh),
-	.key_len = sizeof(struct mlx5e_neigh),
-	.automatic_shrinking = true,
-};
-
-static int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv)
-{
-	struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
-	int err;
-
-	err = rhashtable_init(&neigh_update->neigh_ht, &mlx5e_neigh_ht_params);
-	if (err)
-		return err;
-
-	INIT_LIST_HEAD(&neigh_update->neigh_list);
-	mutex_init(&neigh_update->encap_lock);
-	INIT_DELAYED_WORK(&neigh_update->neigh_stats_work,
-			  mlx5e_rep_neigh_stats_work);
-	mlx5e_rep_neigh_update_init_interval(rpriv);
-
-	rpriv->neigh_update.netevent_nb.notifier_call = mlx5e_rep_netevent_event;
-	err = register_netevent_notifier(&rpriv->neigh_update.netevent_nb);
-	if (err)
-		goto out_err;
-	return 0;
-
-out_err:
-	rhashtable_destroy(&neigh_update->neigh_ht);
-	return err;
-}
-
-static void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv)
-{
-	struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
-	struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
-
-	unregister_netevent_notifier(&neigh_update->netevent_nb);
-
-	flush_workqueue(priv->wq); /* flush neigh update works */
-
-	cancel_delayed_work_sync(&rpriv->neigh_update.neigh_stats_work);
-
-	mutex_destroy(&neigh_update->encap_lock);
-	rhashtable_destroy(&neigh_update->neigh_ht);
-}
-
-static int mlx5e_rep_neigh_entry_insert(struct mlx5e_priv *priv,
-					struct mlx5e_neigh_hash_entry *nhe)
-{
-	struct mlx5e_rep_priv *rpriv = priv->ppriv;
-	int err;
-
-	err = rhashtable_insert_fast(&rpriv->neigh_update.neigh_ht,
-				     &nhe->rhash_node,
-				     mlx5e_neigh_ht_params);
-	if (err)
-		return err;
-
-	list_add_rcu(&nhe->neigh_list, &rpriv->neigh_update.neigh_list);
-
-	return err;
-}
-
-static void mlx5e_rep_neigh_entry_remove(struct mlx5e_neigh_hash_entry *nhe)
-{
-	struct mlx5e_rep_priv *rpriv = nhe->priv->ppriv;
-
-	mutex_lock(&rpriv->neigh_update.encap_lock);
-
-	list_del_rcu(&nhe->neigh_list);
-
-	rhashtable_remove_fast(&rpriv->neigh_update.neigh_ht,
-			       &nhe->rhash_node,
-			       mlx5e_neigh_ht_params);
-	mutex_unlock(&rpriv->neigh_update.encap_lock);
-}
-
-/* This function must only be called under the representor's encap_lock or
- * inside rcu read lock section.
- */
-struct mlx5e_neigh_hash_entry *
-mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
-			     struct mlx5e_neigh *m_neigh)
-{
-	struct mlx5e_rep_priv *rpriv = priv->ppriv;
-	struct mlx5e_neigh_update_table *neigh_update = &rpriv->neigh_update;
-	struct mlx5e_neigh_hash_entry *nhe;
-
-	nhe = rhashtable_lookup_fast(&neigh_update->neigh_ht, m_neigh,
-				     mlx5e_neigh_ht_params);
-	return nhe && mlx5e_rep_neigh_entry_hold(nhe) ? nhe : NULL;
-}
-
-int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv,
-				 struct mlx5e_encap_entry *e,
-				 struct mlx5e_neigh_hash_entry **nhe)
-{
-	int err;
-
-	*nhe = kzalloc(sizeof(**nhe), GFP_KERNEL);
-	if (!*nhe)
-		return -ENOMEM;
-
-	(*nhe)->priv = priv;
-	memcpy(&(*nhe)->m_neigh, &e->m_neigh, sizeof(e->m_neigh));
-	INIT_WORK(&(*nhe)->neigh_update_work, mlx5e_rep_neigh_update);
-	spin_lock_init(&(*nhe)->encap_list_lock);
-	INIT_LIST_HEAD(&(*nhe)->encap_list);
-	refcount_set(&(*nhe)->refcnt, 1);
-
-	err = mlx5e_rep_neigh_entry_insert(priv, *nhe);
-	if (err)
-		goto out_free;
-	return 0;
-
-out_free:
-	kfree(*nhe);
-	return err;
-}
-
 static int mlx5e_rep_open(struct net_device *dev)
 {
 	struct mlx5e_priv *priv = netdev_priv(dev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
index 74d46e9a201a..81ed06e58fea 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -208,14 +208,6 @@ void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv);
 bool mlx5e_eswitch_rep(struct net_device *netdev);
 bool mlx5e_eswitch_uplink_rep(struct net_device *netdev);
 
-struct mlx5e_neigh_hash_entry *
-mlx5e_rep_neigh_entry_lookup(struct mlx5e_priv *priv,
-			     struct mlx5e_neigh *m_neigh);
-int mlx5e_rep_neigh_entry_create(struct mlx5e_priv *priv,
-				 struct mlx5e_encap_entry *e,
-				 struct mlx5e_neigh_hash_entry **nhe);
-void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe);
-
 #else /* CONFIG_MLX5_ESWITCH */
 static inline bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv) { return false; }
 static inline int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv) { return 0; }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 251975ccbdf7..749390dc7aaa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -4777,9 +4777,3 @@ void mlx5e_tc_reoffload_flows_work(struct work_struct *work)
 	}
 	mutex_unlock(&rpriv->unready_flows_lock);
 }
-
-void mlx5_tc_rep_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv)
-{
-	if (tc_priv->tun_dev)
-		dev_put(tc_priv->tun_dev);
-}
-- 
cgit v1.2.3-59-g8ed1b


From e2394a61d243d74ec7d0bfd4c6e25171fba54caa Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Tue, 12 May 2020 18:08:41 +0300
Subject: net/mlx5e: Move TC-specific code from en_main.c to en_tc.c

As a preparation for introducing new kconfig option that controls
compilation of all TC offloads code in mlx5, extract TC-specific code from
en_main.c to en_tc.c. This allows easily compiling out the code by
only including new source in make file when corresponding kconfig is
enabled instead of adding multiple ifdef blocks to en_main.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c | 37 -----------------------
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c   | 35 +++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.h   |  7 +++++
 3 files changed, 42 insertions(+), 37 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 07823abe5557..3829dfd39800 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3539,41 +3539,6 @@ out:
 	return err;
 }
 
-#ifdef CONFIG_MLX5_ESWITCH
-static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv,
-				     struct flow_cls_offload *cls_flower,
-				     unsigned long flags)
-{
-	switch (cls_flower->command) {
-	case FLOW_CLS_REPLACE:
-		return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
-					      flags);
-	case FLOW_CLS_DESTROY:
-		return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
-					   flags);
-	case FLOW_CLS_STATS:
-		return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
-					  flags);
-	default:
-		return -EOPNOTSUPP;
-	}
-}
-
-static int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
-				   void *cb_priv)
-{
-	unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(NIC_OFFLOAD);
-	struct mlx5e_priv *priv = cb_priv;
-
-	switch (type) {
-	case TC_SETUP_CLSFLOWER:
-		return mlx5e_setup_tc_cls_flower(priv, type_data, flags);
-	default:
-		return -EOPNOTSUPP;
-	}
-}
-#endif
-
 static LIST_HEAD(mlx5e_block_cb_list);
 
 static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
@@ -3582,7 +3547,6 @@ static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
 	struct mlx5e_priv *priv = netdev_priv(dev);
 
 	switch (type) {
-#ifdef CONFIG_MLX5_ESWITCH
 	case TC_SETUP_BLOCK: {
 		struct flow_block_offload *f = type_data;
 
@@ -3592,7 +3556,6 @@ static int mlx5e_setup_tc(struct net_device *dev, enum tc_setup_type type,
 						  mlx5e_setup_tc_block_cb,
 						  priv, priv, true);
 	}
-#endif
 	case TC_SETUP_QDISC_MQPRIO:
 		return mlx5e_setup_tc_mqprio(priv, type_data);
 	default:
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 749390dc7aaa..1614b077a477 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -31,6 +31,7 @@
  */
 
 #include <net/flow_dissector.h>
+#include <net/flow_offload.h>
 #include <net/sch_generic.h>
 #include <net/pkt_cls.h>
 #include <net/tc_act/tc_gact.h>
@@ -50,6 +51,7 @@
 #include "en.h"
 #include "en_rep.h"
 #include "en/rep/tc.h"
+#include "en/rep/neigh.h"
 #include "en_tc.h"
 #include "eswitch.h"
 #include "esw/chains.h"
@@ -4777,3 +4779,36 @@ void mlx5e_tc_reoffload_flows_work(struct work_struct *work)
 	}
 	mutex_unlock(&rpriv->unready_flows_lock);
 }
+
+static int mlx5e_setup_tc_cls_flower(struct mlx5e_priv *priv,
+				     struct flow_cls_offload *cls_flower,
+				     unsigned long flags)
+{
+	switch (cls_flower->command) {
+	case FLOW_CLS_REPLACE:
+		return mlx5e_configure_flower(priv->netdev, priv, cls_flower,
+					      flags);
+	case FLOW_CLS_DESTROY:
+		return mlx5e_delete_flower(priv->netdev, priv, cls_flower,
+					   flags);
+	case FLOW_CLS_STATS:
+		return mlx5e_stats_flower(priv->netdev, priv, cls_flower,
+					  flags);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
+
+int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+			    void *cb_priv)
+{
+	unsigned long flags = MLX5_TC_FLAG(INGRESS) | MLX5_TC_FLAG(NIC_OFFLOAD);
+	struct mlx5e_priv *priv = cb_priv;
+
+	switch (type) {
+	case TC_SETUP_CLSFLOWER:
+		return mlx5e_setup_tc_cls_flower(priv, type_data, flags);
+	default:
+		return -EOPNOTSUPP;
+	}
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
index 1d8d85b842fe..9c59b7fe258a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -173,6 +173,9 @@ void dealloc_mod_hdr_actions(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts);
 struct mlx5e_tc_flow;
 u32 mlx5e_tc_get_flow_tun_id(struct mlx5e_tc_flow *flow);
 
+int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
+			    void *cb_priv);
+
 #else /* CONFIG_MLX5_ESWITCH */
 static inline int  mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; }
 static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {}
@@ -181,6 +184,10 @@ static inline int  mlx5e_tc_num_filters(struct mlx5e_priv *priv,
 {
 	return 0;
 }
+
+static inline int
+mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
+{ return -EOPNOTSUPP; }
 #endif
 
 #endif /* __MLX5_EN_TC_H__ */
-- 
cgit v1.2.3-59-g8ed1b


From d956873f908cff46dd55976dbd9442a027e9d9d1 Mon Sep 17 00:00:00 2001
From: Vlad Buslov <vladbu@mellanox.com>
Date: Tue, 12 May 2020 18:24:11 +0300
Subject: net/mlx5e: Introduce kconfig var for TC support

In order to improve code maintainability and readability, introduce new
CONFIG_MLX5_CLS_ACT kconfig variable to control compilation of TC hardware
offloads implementation. This allows distinguishing between features that
require TC support (MPLSoUDP, etc.) and features that just rely on
representor functionality (rep_bond for live migration, etc.).

Modify rep_tc.h, rep_neigh.h, en_tc.h and chains.h files to provide stubs
for functions that are called from generic code.

Signed-off-by: Vlad Buslov <vladbu@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/Kconfig    | 17 ++++++++-
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   | 11 +++---
 .../net/ethernet/mellanox/mlx5/core/en/rep/neigh.h | 11 ++++++
 .../net/ethernet/mellanox/mlx5/core/en/rep/tc.h    | 44 ++++++++++++++++++++--
 drivers/net/ethernet/mellanox/mlx5/core/en_main.c  |  4 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.h    | 16 ++++++--
 .../net/ethernet/mellanox/mlx5/core/esw/chains.h   | 19 ++++++++++
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c |  2 +
 8 files changed, 109 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index 7d69a3061f17..4256d59eca2b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -78,9 +78,24 @@ config MLX5_ESWITCH
 	        Legacy SRIOV mode (L2 mac vlan steering based).
 	        Switchdev mode (eswitch offloads).
 
+config MLX5_CLS_ACT
+	bool "MLX5 TC classifier action support"
+	depends on MLX5_ESWITCH && NET_CLS_ACT
+	default y
+	help
+	  mlx5 ConnectX offloads support for TC classifier action (NET_CLS_ACT),
+	  works in both native NIC mdoe and Switchdev SRIOV mode.
+	  Actions get attached to a Hardware offloaded classifiers and are
+	  invoked after a successful classification. Actions are used to
+	  overwrite the classification result, instantly drop or redirect and/or
+	  reformat packets in wire speeds without involving the host cpu.
+
+	  If set to N, TC offloads in both NIC and switchdev modes will be disabled.
+	  If unsure, set to Y
+
 config MLX5_TC_CT
 	bool "MLX5 TC connection tracking offload support"
-	depends on MLX5_CORE_EN && NET_SWITCHDEV && NF_FLOW_TABLE && NET_ACT_CT && NET_TC_SKB_EXT
+	depends on MLX5_CLS_ACT && NF_FLOW_TABLE && NET_ACT_CT && NET_TC_SKB_EXT
 	default y
 	help
 	  Say Y here if you want to support offloading connection tracking rules
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 3c9d78e6695c..3c1f12c7175f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -33,18 +33,19 @@ mlx5_core-$(CONFIG_MLX5_CORE_EN) += en_main.o en_common.o en_fs.o en_ethtool.o \
 mlx5_core-$(CONFIG_MLX5_EN_ARFS)     += en_arfs.o
 mlx5_core-$(CONFIG_MLX5_EN_RXNFC)    += en_fs_ethtool.o
 mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o
-mlx5_core-$(CONFIG_MLX5_ESWITCH)     += en_rep.o en_tc.o en/rep/tc.o en/rep/neigh.o en/tc_tun.o lib/port_tun.o \
-					lag_mp.o \
-					lib/geneve.o en/mapping.o en/tc_tun_vxlan.o en/tc_tun_gre.o \
-					en/tc_tun_geneve.o diag/en_tc_tracepoint.o
 mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += en/hv_vhca_stats.o
+mlx5_core-$(CONFIG_MLX5_ESWITCH)     += en_rep.o lib/geneve.o lib/port_tun.o lag_mp.o
+mlx5_core-$(CONFIG_MLX5_CLS_ACT)     += en_tc.o en/rep/tc.o en/rep/neigh.o \
+					en/mapping.o esw/chains.o en/tc_tun.o \
+					en/tc_tun_vxlan.o en/tc_tun_gre.o en/tc_tun_geneve.o \
+					diag/en_tc_tracepoint.o
 mlx5_core-$(CONFIG_MLX5_TC_CT)	     += en/tc_ct.o
 
 #
 # Core extra
 #
 mlx5_core-$(CONFIG_MLX5_ESWITCH)   += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \
-				      ecpf.o rdma.o esw/chains.o
+				      ecpf.o rdma.o
 mlx5_core-$(CONFIG_MLX5_MPFS)      += lib/mpfs.o
 mlx5_core-$(CONFIG_VXLAN)          += lib/vxlan.o
 mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h
index 8eddb3ac0d74..32b239189c95 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/neigh.h
@@ -7,6 +7,8 @@
 #include "en.h"
 #include "en_rep.h"
 
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
+
 int mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv);
 void mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv);
 
@@ -20,4 +22,13 @@ void mlx5e_rep_neigh_entry_release(struct mlx5e_neigh_hash_entry *nhe);
 
 void mlx5e_rep_queue_neigh_stats_work(struct mlx5e_priv *priv);
 
+#else /* CONFIG_MLX5_CLS_ACT */
+
+static inline int
+mlx5e_rep_neigh_init(struct mlx5e_rep_priv *rpriv) { return 0; }
+static inline void
+mlx5e_rep_neigh_cleanup(struct mlx5e_rep_priv *rpriv) {}
+
+#endif /* CONFIG_MLX5_CLS_ACT */
+
 #endif /* __MLX5_EN_REP_NEIGH__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h
index 90da00626b97..86f92abf2fdd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h
@@ -5,11 +5,11 @@
 #define __MLX5_EN_REP_TC_H__
 
 #include <linux/skbuff.h>
-#include "en.h"
 #include "en_tc.h"
 #include "en_rep.h"
 
-struct mlx5e_rep_priv;
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
+
 int mlx5e_rep_tc_init(struct mlx5e_rep_priv *rpriv);
 void mlx5e_rep_tc_cleanup(struct mlx5e_rep_priv *rpriv);
 
@@ -21,7 +21,6 @@ void mlx5e_rep_tc_disable(struct mlx5e_priv *priv);
 
 int mlx5e_rep_tc_event_port_affinity(struct mlx5e_priv *priv);
 
-struct mlx5e_encap_entry;
 void mlx5e_rep_update_flows(struct mlx5e_priv *priv,
 			    struct mlx5e_encap_entry *e,
 			    bool neigh_connected,
@@ -36,10 +35,47 @@ int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
 		       void *type_data);
 void mlx5e_rep_indr_clean_block_privs(struct mlx5e_rep_priv *rpriv);
 
-struct mlx5e_tc_update_priv;
 bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
 			     struct sk_buff *skb,
 			     struct mlx5e_tc_update_priv *tc_priv);
 void mlx5_rep_tc_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv);
 
+#else /* CONFIG_MLX5_CLS_ACT */
+
+struct mlx5e_rep_priv;
+static inline int
+mlx5e_rep_tc_init(struct mlx5e_rep_priv *rpriv) { return 0; }
+static inline void
+mlx5e_rep_tc_cleanup(struct mlx5e_rep_priv *rpriv) {}
+
+static inline int
+mlx5e_rep_tc_netdevice_event_register(struct mlx5e_rep_priv *rpriv) { return 0; }
+static inline void
+mlx5e_rep_tc_netdevice_event_unregister(struct mlx5e_rep_priv *rpriv) {}
+
+static inline void
+mlx5e_rep_tc_enable(struct mlx5e_priv *priv) {}
+static inline void
+mlx5e_rep_tc_disable(struct mlx5e_priv *priv) {}
+
+static inline int
+mlx5e_rep_tc_event_port_affinity(struct mlx5e_priv *priv) { return NOTIFY_DONE; }
+
+static inline int
+mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
+		   void *type_data) { return -EOPNOTSUPP; }
+
+static inline void
+mlx5e_rep_indr_clean_block_privs(struct mlx5e_rep_priv *rpriv) {}
+
+struct mlx5e_tc_update_priv;
+static inline bool
+mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
+			struct sk_buff *skb,
+			struct mlx5e_tc_update_priv *tc_priv) { return true; }
+static inline void
+mlx5_rep_tc_post_napi_receive(struct mlx5e_tc_update_priv *tc_priv) {}
+
+#endif /* CONFIG_MLX5_CLS_ACT */
+
 #endif /* __MLX5_EN_REP_TC_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
index 3829dfd39800..803f1066ac08 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_main.c
@@ -3728,7 +3728,7 @@ static int set_feature_cvlan_filter(struct net_device *netdev, bool enable)
 	return 0;
 }
 
-#ifdef CONFIG_MLX5_ESWITCH
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
 static int set_feature_tc_num_filters(struct net_device *netdev, bool enable)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -3839,7 +3839,7 @@ int mlx5e_set_features(struct net_device *netdev, netdev_features_t features)
 	err |= MLX5E_HANDLE_FEATURE(NETIF_F_LRO, set_feature_lro);
 	err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_VLAN_CTAG_FILTER,
 				    set_feature_cvlan_filter);
-#ifdef CONFIG_MLX5_ESWITCH
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
 	err |= MLX5E_HANDLE_FEATURE(NETIF_F_HW_TC, set_feature_tc_num_filters);
 #endif
 	err |= MLX5E_HANDLE_FEATURE(NETIF_F_RXALL, set_feature_rx_all);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
index 9c59b7fe258a..037aa73bf9ab 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -80,9 +80,6 @@ enum {
 
 #define MLX5_TC_FLAG(flag) BIT(MLX5E_TC_FLAG_##flag##_BIT)
 
-int mlx5e_tc_nic_init(struct mlx5e_priv *priv);
-void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv);
-
 int mlx5e_tc_esw_init(struct rhashtable *tc_ht);
 void mlx5e_tc_esw_cleanup(struct rhashtable *tc_ht);
 
@@ -173,9 +170,22 @@ void dealloc_mod_hdr_actions(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts);
 struct mlx5e_tc_flow;
 u32 mlx5e_tc_get_flow_tun_id(struct mlx5e_tc_flow *flow);
 
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
+
+int mlx5e_tc_nic_init(struct mlx5e_priv *priv);
+void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv);
+
 int mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data,
 			    void *cb_priv);
 
+#else /* CONFIG_MLX5_CLS_ACT */
+static inline int  mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; }
+static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {}
+static inline int
+mlx5e_setup_tc_block_cb(enum tc_setup_type type, void *type_data, void *cb_priv)
+{ return -EOPNOTSUPP; }
+#endif /* CONFIG_MLX5_CLS_ACT */
+
 #else /* CONFIG_MLX5_ESWITCH */
 static inline int  mlx5e_tc_nic_init(struct mlx5e_priv *priv) { return 0; }
 static inline void mlx5e_tc_nic_cleanup(struct mlx5e_priv *priv) {}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/chains.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/chains.h
index f8c4239846ea..7679ac359e31 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/chains.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/chains.h
@@ -6,6 +6,8 @@
 
 #include "eswitch.h"
 
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
+
 bool
 mlx5_esw_chains_prios_supported(struct mlx5_eswitch *esw);
 bool
@@ -46,4 +48,21 @@ void mlx5_esw_chains_destroy(struct mlx5_eswitch *esw);
 int
 mlx5_eswitch_get_chain_for_tag(struct mlx5_eswitch *esw, u32 tag, u32 *chain);
 
+#else /* CONFIG_MLX5_CLS_ACT */
+
+static inline struct mlx5_flow_table *
+mlx5_esw_chains_get_table(struct mlx5_eswitch *esw, u32 chain, u32 prio,
+			  u32 level) { return ERR_PTR(-EOPNOTSUPP); }
+static inline void
+mlx5_esw_chains_put_table(struct mlx5_eswitch *esw, u32 chain, u32 prio,
+			  u32 level) {}
+
+static inline struct mlx5_flow_table *
+mlx5_esw_chains_get_tc_end_ft(struct mlx5_eswitch *esw) { return ERR_PTR(-EOPNOTSUPP); }
+
+static inline int mlx5_esw_chains_create(struct mlx5_eswitch *esw) { return 0; }
+static inline void mlx5_esw_chains_destroy(struct mlx5_eswitch *esw) {}
+
+#endif /* CONFIG_MLX5_CLS_ACT */
+
 #endif /* __ML5_ESW_CHAINS_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 57ac2ef52e80..1c9be19ee025 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -1727,7 +1727,9 @@ static int mlx5_esw_offloads_pair(struct mlx5_eswitch *esw,
 
 static void mlx5_esw_offloads_unpair(struct mlx5_eswitch *esw)
 {
+#if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
 	mlx5e_tc_clean_fdb_peer_flows(esw);
+#endif
 	esw_del_fdb_peer_miss_rules(esw);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From d43600c01c4c69f9d0ac4626f03599196b845ce1 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Sun, 17 Nov 2019 14:35:42 +0200
Subject: net: Add netif_is_bareudp() API to identify bareudp devices

Add netif_is_bareudp() so the device can be identified as a bareudp one.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Reviewed-by: Eli Britstein <elibr@mellanox.com>
Reviewed-by: Paul Blakey <paulb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/net/bareudp.h | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/include/net/bareudp.h b/include/net/bareudp.h
index cb03f6f15956..dc65a0d71d9b 100644
--- a/include/net/bareudp.h
+++ b/include/net/bareudp.h
@@ -5,6 +5,7 @@
 
 #include <linux/types.h>
 #include <linux/skbuff.h>
+#include <net/rtnetlink.h>
 
 struct bareudp_conf {
 	__be16 ethertype;
@@ -17,4 +18,10 @@ struct net_device *bareudp_dev_create(struct net *net, const char *name,
 				      u8 name_assign_type,
 				      struct bareudp_conf *info);
 
+static inline bool netif_is_bareudp(const struct net_device *dev)
+{
+	return dev->rtnl_link_ops &&
+	       !strcmp(dev->rtnl_link_ops->kind, "bareudp");
+}
+
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From f828ca6a2fb6f6c861e5113d3c91d3bca68db28e Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Sun, 17 Nov 2019 15:32:24 +0200
Subject: net/mlx5e: Add support for hw encapsulation of MPLS over UDP

MPLS over UDP is supported by adding a rule on a representor net device
which does tunnel_key set, push mpls and forward to a baredup device. At
the hardware level we use a packet_reformat_context object to do the
encapsulation of the packet.

The resulting packet looks as follows (left side transmitted first):
outer L2 | outer IP | UDP | MPLS | inner L3 and data |

Example usage:
  tc filter add dev $rep0 protocol ip prio 1 root flower skip_sw  \
     action tunnel_key set src_ip 8.8.8.21 dst_ip 8.8.8.24 id 555 \
     dst_port 6635 tos 4 ttl 6 csum action mpls push protocol 0x8847 \
     label 555 tc 3 action mirred egress redirect dev bareudp0

This is how the filter is shown with tc filter show:
tc filter show dev enp59s0f0_0 ingress
filter protocol ip pref 1 flower chain 0
filter protocol ip pref 1 flower chain 0 handle 0x1
  eth_type ipv4
  skip_sw
  in_hw in_hw_count 1
        action order 1: tunnel_key  set
        src_ip 8.8.8.21
        dst_ip 8.8.8.24
        key_id 555
        dst_port 6635
        csum
        tos 0x4
        ttl 6 pipe
         index 1 ref 1 bind 1

        action order 2: mpls  push protocol mpls_uc label 555 tc 3 ttl 255 pipe
         index 1 ref 1 bind 1

        action order 3: mirred (Egress Redirect to device bareudp0) stolen
        index 1 ref 1 bind 1

Signed-off-by: Eli Cohen <eli@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Reviewed-by: Eli Britstein <elibr@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |  2 +-
 .../net/ethernet/mellanox/mlx5/core/en/tc_tun.c    |  3 +
 .../net/ethernet/mellanox/mlx5/core/en/tc_tun.h    |  2 +
 .../mellanox/mlx5/core/en/tc_tun_mplsoudp.c        | 88 ++++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c    | 18 +++++
 .../net/ethernet/mellanox/mlx5/core/lib/port_tun.c |  4 +-
 6 files changed, 114 insertions(+), 3 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 3c1f12c7175f..e5ee9103fefb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -38,7 +38,7 @@ mlx5_core-$(CONFIG_MLX5_ESWITCH)     += en_rep.o lib/geneve.o lib/port_tun.o lag
 mlx5_core-$(CONFIG_MLX5_CLS_ACT)     += en_tc.o en/rep/tc.o en/rep/neigh.o \
 					en/mapping.o esw/chains.o en/tc_tun.o \
 					en/tc_tun_vxlan.o en/tc_tun_gre.o en/tc_tun_geneve.o \
-					diag/en_tc_tracepoint.o
+					en/tc_tun_mplsoudp.o diag/en_tc_tracepoint.o
 mlx5_core-$(CONFIG_MLX5_TC_CT)	     += en/tc_ct.o
 
 #
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
index 9be1fcc269b2..e99382f58807 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
@@ -4,6 +4,7 @@
 #include <net/vxlan.h>
 #include <net/gre.h>
 #include <net/geneve.h>
+#include <net/bareudp.h>
 #include "en/tc_tun.h"
 #include "en_tc.h"
 #include "rep/tc.h"
@@ -18,6 +19,8 @@ struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev)
 	else if (netif_is_gretap(tunnel_dev) ||
 		 netif_is_ip6gretap(tunnel_dev))
 		return &gre_tunnel;
+	else if (netif_is_bareudp(tunnel_dev))
+		return &mplsoudp_tunnel;
 	else
 		return NULL;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
index 1630f0ec3ad7..704359df6095 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.h
@@ -16,6 +16,7 @@ enum {
 	MLX5E_TC_TUNNEL_TYPE_VXLAN,
 	MLX5E_TC_TUNNEL_TYPE_GENEVE,
 	MLX5E_TC_TUNNEL_TYPE_GRETAP,
+	MLX5E_TC_TUNNEL_TYPE_MPLSOUDP,
 };
 
 struct mlx5e_tc_tunnel {
@@ -46,6 +47,7 @@ struct mlx5e_tc_tunnel {
 extern struct mlx5e_tc_tunnel vxlan_tunnel;
 extern struct mlx5e_tc_tunnel geneve_tunnel;
 extern struct mlx5e_tc_tunnel gre_tunnel;
+extern struct mlx5e_tc_tunnel mplsoudp_tunnel;
 
 struct mlx5e_tc_tunnel *mlx5e_get_tc_tun(struct net_device *tunnel_dev);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c
new file mode 100644
index 000000000000..ff296c0a32c4
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2018 Mellanox Technologies. */
+
+#include <net/bareudp.h>
+#include <net/mpls.h>
+#include "en/tc_tun.h"
+
+static bool can_offload(struct mlx5e_priv *priv)
+{
+	return MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev, reformat_l3_tunnel_to_l2);
+}
+
+static int calc_hlen(struct mlx5e_encap_entry *e)
+{
+	return sizeof(struct udphdr) + MPLS_HLEN;
+}
+
+static int init_encap_attr(struct net_device *tunnel_dev,
+			   struct mlx5e_priv *priv,
+			   struct mlx5e_encap_entry *e,
+			   struct netlink_ext_ack *extack)
+{
+	e->tunnel = &mplsoudp_tunnel;
+	e->reformat_type = MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL;
+	return 0;
+}
+
+static inline __be32 mpls_label_id_field(__be32 label, u8 tos, u8 ttl)
+{
+	u32 res;
+
+	/* mpls label is 32 bits long and construction as follows:
+	 * 20 bits label
+	 * 3 bits tos
+	 * 1 bit bottom of stack. Since we support only one label, this bit is
+	 *       always set.
+	 * 8 bits TTL
+	 */
+	res = be32_to_cpu(label) << 12 | 1 << 8 | (tos & 7) <<  9 | ttl;
+	return cpu_to_be32(res);
+}
+
+static int generate_ip_tun_hdr(char buf[],
+			       __u8 *ip_proto,
+			       struct mlx5e_encap_entry *r)
+{
+	const struct ip_tunnel_key *tun_key = &r->tun_info->key;
+	__be32 tun_id = tunnel_id_to_key32(tun_key->tun_id);
+	struct udphdr *udp = (struct udphdr *)(buf);
+	struct mpls_shim_hdr *mpls;
+
+	mpls = (struct mpls_shim_hdr *)(udp + 1);
+	*ip_proto = IPPROTO_UDP;
+
+	udp->dest = tun_key->tp_dst;
+	mpls->label_stack_entry = mpls_label_id_field(tun_id, tun_key->tos, tun_key->ttl);
+
+	return 0;
+}
+
+static int parse_udp_ports(struct mlx5e_priv *priv,
+			   struct mlx5_flow_spec *spec,
+			   struct flow_cls_offload *f,
+			   void *headers_c,
+			   void *headers_v)
+{
+	return mlx5e_tc_tun_parse_udp_ports(priv, spec, f, headers_c, headers_v);
+}
+
+static int parse_tunnel(struct mlx5e_priv *priv,
+			struct mlx5_flow_spec *spec,
+			struct flow_cls_offload *f,
+			void *headers_c,
+			void *headers_v)
+{
+	return 0;
+}
+
+struct mlx5e_tc_tunnel mplsoudp_tunnel = {
+	.tunnel_type          = MLX5E_TC_TUNNEL_TYPE_MPLSOUDP,
+	.match_level          = MLX5_MATCH_L4,
+	.can_offload          = can_offload,
+	.calc_hlen            = calc_hlen,
+	.init_encap_attr      = init_encap_attr,
+	.generate_ip_tun_hdr  = generate_ip_tun_hdr,
+	.parse_udp_ports      = parse_udp_ports,
+	.parse_tunnel         = parse_tunnel,
+};
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 1614b077a477..2cebbd03bc57 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -48,6 +48,7 @@
 #include <net/tc_act/tc_csum.h>
 #include <net/arp.h>
 #include <net/ipv6_stubs.h>
+#include <net/bareudp.h>
 #include "en.h"
 #include "en_rep.h"
 #include "en/rep/tc.h"
@@ -3685,6 +3686,7 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
 	bool encap = false, decap = false;
 	u32 action = attr->action;
 	int err, i, if_count = 0;
+	bool mpls_push = false;
 
 	if (!flow_action_has_entries(flow_action))
 		return -EINVAL;
@@ -3699,6 +3701,16 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
 			action |= MLX5_FLOW_CONTEXT_ACTION_DROP |
 				  MLX5_FLOW_CONTEXT_ACTION_COUNT;
 			break;
+		case FLOW_ACTION_MPLS_PUSH:
+			if (!MLX5_CAP_ESW_FLOWTABLE_FDB(priv->mdev,
+							reformat_l2_to_l3_tunnel) ||
+			    act->mpls_push.proto != htons(ETH_P_MPLS_UC)) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "mpls push is supported only for mpls_uc protocol");
+				return -EOPNOTSUPP;
+			}
+			mpls_push = true;
+			break;
 		case FLOW_ACTION_MANGLE:
 		case FLOW_ACTION_ADD:
 			err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_FDB,
@@ -3729,6 +3741,12 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
 				return -EINVAL;
 			}
 
+			if (mpls_push && !netif_is_bareudp(out_dev)) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "mpls is supported only through a bareudp device");
+				return -EOPNOTSUPP;
+			}
+
 			if (ft_flow && out_dev == priv->netdev) {
 				/* Ignore forward to self rules generated
 				 * by adding both mlx5 devs to the flow table
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c
index 8809a65ecefb..e042e0924079 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/lib/port_tun.c
@@ -144,11 +144,11 @@ static int mlx5_set_entropy(struct mlx5_tun_entropy *tun_entropy,
 int mlx5_tun_entropy_refcount_inc(struct mlx5_tun_entropy *tun_entropy,
 				  int reformat_type)
 {
-	/* the default is error for unknown (non VXLAN/GRE tunnel types) */
 	int err = -EOPNOTSUPP;
 
 	mutex_lock(&tun_entropy->lock);
-	if (reformat_type == MLX5_REFORMAT_TYPE_L2_TO_VXLAN &&
+	if ((reformat_type == MLX5_REFORMAT_TYPE_L2_TO_VXLAN ||
+	     reformat_type == MLX5_REFORMAT_TYPE_L2_TO_L3_TUNNEL) &&
 	    tun_entropy->enabled) {
 		/* in case entropy calculation is enabled for all tunneling
 		 * types, it is ok for VXLAN, so approve.
-- 
cgit v1.2.3-59-g8ed1b


From 72046a91d13469ef35885323a84938dce35ade94 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Wed, 29 Jan 2020 16:21:16 +0200
Subject: net/mlx5e: Allow to match on mpls parameters

Support matching on MPLS over UDP parameters using misc2 section of
match parameters.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../mellanox/mlx5/core/en/tc_tun_mplsoudp.c        | 49 ++++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c    | 20 ++++++++-
 2 files changed, 67 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c
index ff296c0a32c4..98ee62e427d2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c
@@ -73,6 +73,55 @@ static int parse_tunnel(struct mlx5e_priv *priv,
 			void *headers_c,
 			void *headers_v)
 {
+	struct flow_rule *rule = flow_cls_offload_flow_rule(f);
+	struct flow_match_enc_keyid enc_keyid;
+	struct flow_match_mpls match;
+	void *misc2_c;
+	void *misc2_v;
+
+	misc2_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
+			       misc_parameters_2);
+	misc2_v = MLX5_ADDR_OF(fte_match_param, spec->match_value,
+			       misc_parameters_2);
+
+	if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_MPLS))
+		return 0;
+
+	if (!flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_KEYID))
+		return 0;
+
+	flow_rule_match_enc_keyid(rule, &enc_keyid);
+
+	if (!enc_keyid.mask->keyid)
+		return 0;
+
+	if (!(MLX5_CAP_GEN(priv->mdev, flex_parser_protocols) &
+	      MLX5_FLEX_PROTO_CW_MPLS_UDP))
+		return -EOPNOTSUPP;
+
+	flow_rule_match_mpls(rule, &match);
+
+	MLX5_SET(fte_match_set_misc2, misc2_c,
+		 outer_first_mpls_over_udp.mpls_label, match.mask->mpls_label);
+	MLX5_SET(fte_match_set_misc2, misc2_v,
+		 outer_first_mpls_over_udp.mpls_label, match.key->mpls_label);
+
+	MLX5_SET(fte_match_set_misc2, misc2_c,
+		 outer_first_mpls_over_udp.mpls_exp, match.mask->mpls_tc);
+	MLX5_SET(fte_match_set_misc2, misc2_v,
+		 outer_first_mpls_over_udp.mpls_exp, match.key->mpls_tc);
+
+	MLX5_SET(fte_match_set_misc2, misc2_c,
+		 outer_first_mpls_over_udp.mpls_s_bos, match.mask->mpls_bos);
+	MLX5_SET(fte_match_set_misc2, misc2_v,
+		 outer_first_mpls_over_udp.mpls_s_bos, match.key->mpls_bos);
+
+	MLX5_SET(fte_match_set_misc2, misc2_c,
+		 outer_first_mpls_over_udp.mpls_ttl, match.mask->mpls_ttl);
+	MLX5_SET(fte_match_set_misc2, misc2_v,
+		 outer_first_mpls_over_udp.mpls_ttl, match.key->mpls_ttl);
+	spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 2cebbd03bc57..801fcd1b5f85 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -2073,6 +2073,20 @@ static int mlx5e_flower_parse_meta(struct net_device *filter_dev,
 	return 0;
 }
 
+static bool skip_key_basic(struct net_device *filter_dev,
+			   struct flow_cls_offload *f)
+{
+	/* When doing mpls over udp decap, the user needs to provide
+	 * MPLS_UC as the protocol in order to be able to match on mpls
+	 * label fields.  However, the actual ethertype is IP so we want to
+	 * avoid matching on this, otherwise we'll fail the match.
+	 */
+	if (netif_is_bareudp(filter_dev) && f->common.chain_index == 0)
+		return true;
+
+	return false;
+}
+
 static int __parse_cls_flower(struct mlx5e_priv *priv,
 			      struct mlx5e_tc_flow *flow,
 			      struct mlx5_flow_spec *spec,
@@ -2117,7 +2131,8 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
 	      BIT(FLOW_DISSECTOR_KEY_IP)  |
 	      BIT(FLOW_DISSECTOR_KEY_CT) |
 	      BIT(FLOW_DISSECTOR_KEY_ENC_IP) |
-	      BIT(FLOW_DISSECTOR_KEY_ENC_OPTS))) {
+	      BIT(FLOW_DISSECTOR_KEY_ENC_OPTS) |
+	      BIT(FLOW_DISSECTOR_KEY_MPLS))) {
 		NL_SET_ERR_MSG_MOD(extack, "Unsupported key");
 		netdev_warn(priv->netdev, "Unsupported key used: 0x%x\n",
 			    dissector->used_keys);
@@ -2147,7 +2162,8 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
 	if (err)
 		return err;
 
-	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
+	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC) &&
+	    !skip_key_basic(filter_dev, f)) {
 		struct flow_match_basic match;
 
 		flow_rule_match_basic(rule, &match);
-- 
cgit v1.2.3-59-g8ed1b


From 14e6b038afa014ac2288a2f3d692697f708ba344 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Mon, 3 Feb 2020 13:44:14 +0200
Subject: net/mlx5e: Add support for hw decapsulation of MPLS over UDP

MPLS over UDP is supported in hardware by using a packet reformat object
with reformat type equal L3_TUNNEL_TO_L2 which both decapsulates the
outer L3, L4 and MPLS headers, and allows for setting the L2 headers of
the resulting decapsulated packet. For the hardware to operate
correctly, the configuration of the firmware must have
FLEX_PARSER_PROFILE_ENABLE = 1.

Example tc rule:
  tc filter add dev bareudp0 protocol all prio 1 root flower enc_dst_port \
      6635 enc_src_ip 8.8.8.23 action mpls pop protocol ip pipe \
      action pedit ex munge eth dst set 00:11:22:33:44:21 pipe action \
      mirred egress redirect dev enp59s0f0_0

We use pedit to set the correct destination MAC.

For MPLS over UDP decapsulation to take place, the driver logic requires
the following:

1. flower filter added on bareudp device.
2. action mpls pop
3. zero or more pedit munge actions
4. one redirect action

Current implementation supports only IPv4 and no VLAN.

tc filter show output looks like this:
   filter protocol all pref 1 flower chain 0
   filter protocol all pref 1 flower chain 0 handle 0x1
     enc_src_ip 8.8.8.24
     enc_dst_port 6635
     in_hw in_hw_count 1
            action order 1: mpls  pop protocol ip pipe
             index 2 ref 1 bind 1

            action order 2:  pedit action pipe keys 2
             index 1 ref 1 bind 1
             key #0  at eth+0: val 00112233 mask 00000000
             key #1  at eth+4: val 44210000 mask 0000ffff

            action order 3: mirred (Egress Redirect to device enp59s0f0_0) stolen
            index 2 ref 1 bind 1

Signed-off-by: Eli Cohen <eli@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Reviewed-by: Eli Britstein <elibr@mellanox.com>
Reviewed-by: Vlad Buslov <vladbu@mellanox.com>
Reviewed-by: Paul Blakey <paulb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.h   |  16 ++
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c    | 217 ++++++++++++++++++++-
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c  |   3 +
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |   3 +
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c |   4 +
 5 files changed, 238 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
index 81ed06e58fea..93e911baacad 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -158,6 +158,22 @@ struct mlx5e_neigh_hash_entry {
 enum {
 	/* set when the encap entry is successfully offloaded into HW */
 	MLX5_ENCAP_ENTRY_VALID     = BIT(0),
+	MLX5_REFORMAT_DECAP        = BIT(1),
+};
+
+struct mlx5e_decap_key {
+	struct ethhdr key;
+};
+
+struct mlx5e_decap_entry {
+	struct mlx5e_decap_key key;
+	struct list_head flows;
+	struct hlist_node hlist;
+	refcount_t refcnt;
+	struct completion res_ready;
+	int compl_result;
+	struct mlx5_pkt_reformat *pkt_reformat;
+	struct rcu_head rcu;
 };
 
 struct mlx5e_encap_entry {
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 801fcd1b5f85..a6b18f0444e7 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -46,6 +46,7 @@
 #include <net/tc_act/tc_tunnel_key.h>
 #include <net/tc_act/tc_pedit.h>
 #include <net/tc_act/tc_csum.h>
+#include <net/tc_act/tc_mpls.h>
 #include <net/arp.h>
 #include <net/ipv6_stubs.h>
 #include <net/bareudp.h>
@@ -93,6 +94,7 @@ enum {
 	MLX5E_TC_FLOW_FLAG_NOT_READY	= MLX5E_TC_FLOW_BASE + 5,
 	MLX5E_TC_FLOW_FLAG_DELETED	= MLX5E_TC_FLOW_BASE + 6,
 	MLX5E_TC_FLOW_FLAG_CT		= MLX5E_TC_FLOW_BASE + 7,
+	MLX5E_TC_FLOW_FLAG_L3_TO_L2_DECAP = MLX5E_TC_FLOW_BASE + 8,
 };
 
 #define MLX5E_TC_MAX_SPLITS 1
@@ -126,6 +128,11 @@ struct mlx5e_tc_flow {
 	u64			cookie;
 	unsigned long		flags;
 	struct mlx5_flow_handle *rule[MLX5E_TC_MAX_SPLITS + 1];
+
+	/* flows sharing the same reformat object - currently mpls decap */
+	struct list_head l3_to_l2_reformat;
+	struct mlx5e_decap_entry *decap_reformat;
+
 	/* Flow can be associated with multiple encap IDs.
 	 * The number of encaps is bounded by the number of supported
 	 * destinations.
@@ -157,6 +164,7 @@ struct mlx5e_tc_flow_parse_attr {
 	struct mlx5_flow_spec spec;
 	struct mlx5e_tc_mod_hdr_acts mod_hdr_acts;
 	int mirred_ifindex[MLX5_MAX_FLOW_FWD_VPORTS];
+	struct ethhdr eth;
 };
 
 #define MLX5E_TC_TABLE_NUM_GROUPS 4
@@ -1124,6 +1132,11 @@ static int mlx5e_attach_encap(struct mlx5e_priv *priv,
 			      struct netlink_ext_ack *extack,
 			      struct net_device **encap_dev,
 			      bool *encap_valid);
+static int mlx5e_attach_decap(struct mlx5e_priv *priv,
+			      struct mlx5e_tc_flow *flow,
+			      struct netlink_ext_ack *extack);
+static void mlx5e_detach_decap(struct mlx5e_priv *priv,
+			       struct mlx5e_tc_flow *flow);
 
 static struct mlx5_flow_handle *
 mlx5e_tc_offload_fdb_rules(struct mlx5_eswitch *esw,
@@ -1299,6 +1312,12 @@ mlx5e_tc_add_fdb_flow(struct mlx5e_priv *priv,
 		return -EOPNOTSUPP;
 	}
 
+	if (flow_flag_test(flow, L3_TO_L2_DECAP)) {
+		err = mlx5e_attach_decap(priv, flow, extack);
+		if (err)
+			return err;
+	}
+
 	for (out_index = 0; out_index < MLX5_MAX_FLOW_FWD_VPORTS; out_index++) {
 		int mirred_ifindex;
 
@@ -1408,6 +1427,9 @@ static void mlx5e_tc_del_fdb_flow(struct mlx5e_priv *priv,
 
 	if (attr->action & MLX5_FLOW_CONTEXT_ACTION_COUNT)
 		mlx5_fc_destroy(attr->counter_dev, attr->counter);
+
+	if (flow_flag_test(flow, L3_TO_L2_DECAP))
+		mlx5e_detach_decap(priv, flow);
 }
 
 void mlx5e_tc_encap_flows_add(struct mlx5e_priv *priv,
@@ -1684,6 +1706,17 @@ static void mlx5e_encap_dealloc(struct mlx5e_priv *priv, struct mlx5e_encap_entr
 	kfree_rcu(e, rcu);
 }
 
+static void mlx5e_decap_dealloc(struct mlx5e_priv *priv,
+				struct mlx5e_decap_entry *d)
+{
+	WARN_ON(!list_empty(&d->flows));
+
+	if (!d->compl_result)
+		mlx5_packet_reformat_dealloc(priv->mdev, d->pkt_reformat);
+
+	kfree_rcu(d, rcu);
+}
+
 void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
 {
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
@@ -1696,6 +1729,18 @@ void mlx5e_encap_put(struct mlx5e_priv *priv, struct mlx5e_encap_entry *e)
 	mlx5e_encap_dealloc(priv, e);
 }
 
+static void mlx5e_decap_put(struct mlx5e_priv *priv, struct mlx5e_decap_entry *d)
+{
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+
+	if (!refcount_dec_and_mutex_lock(&d->refcnt, &esw->offloads.decap_tbl_lock))
+		return;
+	hash_del_rcu(&d->hlist);
+	mutex_unlock(&esw->offloads.decap_tbl_lock);
+
+	mlx5e_decap_dealloc(priv, d);
+}
+
 static void mlx5e_detach_encap(struct mlx5e_priv *priv,
 			       struct mlx5e_tc_flow *flow, int out_index)
 {
@@ -1719,6 +1764,29 @@ static void mlx5e_detach_encap(struct mlx5e_priv *priv,
 	mlx5e_encap_dealloc(priv, e);
 }
 
+static void mlx5e_detach_decap(struct mlx5e_priv *priv,
+			       struct mlx5e_tc_flow *flow)
+{
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	struct mlx5e_decap_entry *d = flow->decap_reformat;
+
+	if (!d)
+		return;
+
+	mutex_lock(&esw->offloads.decap_tbl_lock);
+	list_del(&flow->l3_to_l2_reformat);
+	flow->decap_reformat = NULL;
+
+	if (!refcount_dec_and_test(&d->refcnt)) {
+		mutex_unlock(&esw->offloads.decap_tbl_lock);
+		return;
+	}
+	hash_del_rcu(&d->hlist);
+	mutex_unlock(&esw->offloads.decap_tbl_lock);
+
+	mlx5e_decap_dealloc(priv, d);
+}
+
 static void __mlx5e_tc_del_fdb_peer_flow(struct mlx5e_tc_flow *flow)
 {
 	struct mlx5_eswitch *esw = flow->priv->mdev->priv.eswitch;
@@ -1990,7 +2058,11 @@ static int parse_tunnel_attr(struct mlx5e_priv *priv,
 			return err;
 		}
 
-		flow->esw_attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
+		/* With mpls over udp we decapsulate using packet reformat
+		 * object
+		 */
+		if (!netif_is_bareudp(filter_dev))
+			flow->esw_attr->action |= MLX5_FLOW_CONTEXT_ACTION_DECAP;
 	}
 
 	if (!needs_mapping && !sets_mapping)
@@ -3285,12 +3357,22 @@ static inline int cmp_encap_info(struct encap_key *a,
 	       a->tc_tunnel->tunnel_type != b->tc_tunnel->tunnel_type;
 }
 
+static inline int cmp_decap_info(struct mlx5e_decap_key *a,
+				 struct mlx5e_decap_key *b)
+{
+	return memcmp(&a->key, &b->key, sizeof(b->key));
+}
+
 static inline int hash_encap_info(struct encap_key *key)
 {
 	return jhash(key->ip_tun_key, sizeof(*key->ip_tun_key),
 		     key->tc_tunnel->tunnel_type);
 }
 
+static inline int hash_decap_info(struct mlx5e_decap_key *key)
+{
+	return jhash(&key->key, sizeof(key->key), 0);
+}
 
 static bool is_merged_eswitch_dev(struct mlx5e_priv *priv,
 				  struct net_device *peer_netdev)
@@ -3305,13 +3387,16 @@ static bool is_merged_eswitch_dev(struct mlx5e_priv *priv,
 		same_hw_devs(priv, peer_priv));
 }
 
-
-
 bool mlx5e_encap_take(struct mlx5e_encap_entry *e)
 {
 	return refcount_inc_not_zero(&e->refcnt);
 }
 
+static bool mlx5e_decap_take(struct mlx5e_decap_entry *e)
+{
+	return refcount_inc_not_zero(&e->refcnt);
+}
+
 static struct mlx5e_encap_entry *
 mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key,
 		uintptr_t hash_key)
@@ -3332,6 +3417,24 @@ mlx5e_encap_get(struct mlx5e_priv *priv, struct encap_key *key,
 	return NULL;
 }
 
+static struct mlx5e_decap_entry *
+mlx5e_decap_get(struct mlx5e_priv *priv, struct mlx5e_decap_key *key,
+		uintptr_t hash_key)
+{
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	struct mlx5e_decap_key r_key;
+	struct mlx5e_decap_entry *e;
+
+	hash_for_each_possible_rcu(esw->offloads.decap_tbl, e,
+				   hlist, hash_key) {
+		r_key = e->key;
+		if (!cmp_decap_info(&r_key, key) &&
+		    mlx5e_decap_take(e))
+			return e;
+	}
+	return NULL;
+}
+
 static struct ip_tunnel_info *dup_tun_info(const struct ip_tunnel_info *tun_info)
 {
 	size_t tun_size = sizeof(*tun_info) + tun_info->options_len;
@@ -3477,6 +3580,84 @@ out_err_init:
 	return err;
 }
 
+static int mlx5e_attach_decap(struct mlx5e_priv *priv,
+			      struct mlx5e_tc_flow *flow,
+			      struct netlink_ext_ack *extack)
+{
+	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
+	struct mlx5_esw_flow_attr *attr = flow->esw_attr;
+	struct mlx5e_tc_flow_parse_attr *parse_attr;
+	struct mlx5e_decap_entry *d;
+	struct mlx5e_decap_key key;
+	uintptr_t hash_key;
+	int err;
+
+	parse_attr = attr->parse_attr;
+	if (sizeof(parse_attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "encap header larger than max supported");
+		return -EOPNOTSUPP;
+	}
+
+	key.key = parse_attr->eth;
+	hash_key = hash_decap_info(&key);
+	mutex_lock(&esw->offloads.decap_tbl_lock);
+	d = mlx5e_decap_get(priv, &key, hash_key);
+	if (d) {
+		mutex_unlock(&esw->offloads.decap_tbl_lock);
+		wait_for_completion(&d->res_ready);
+		mutex_lock(&esw->offloads.decap_tbl_lock);
+		if (d->compl_result) {
+			err = -EREMOTEIO;
+			goto out_free;
+		}
+		goto found;
+	}
+
+	d = kzalloc(sizeof(*d), GFP_KERNEL);
+	if (!d) {
+		err = -ENOMEM;
+		goto out_err;
+	}
+
+	d->key = key;
+	refcount_set(&d->refcnt, 1);
+	init_completion(&d->res_ready);
+	INIT_LIST_HEAD(&d->flows);
+	hash_add_rcu(esw->offloads.decap_tbl, &d->hlist, hash_key);
+	mutex_unlock(&esw->offloads.decap_tbl_lock);
+
+	d->pkt_reformat = mlx5_packet_reformat_alloc(priv->mdev,
+						     MLX5_REFORMAT_TYPE_L3_TUNNEL_TO_L2,
+						     sizeof(parse_attr->eth),
+						     &parse_attr->eth,
+						     MLX5_FLOW_NAMESPACE_FDB);
+	if (IS_ERR(d->pkt_reformat)) {
+		err = PTR_ERR(d->pkt_reformat);
+		d->compl_result = err;
+	}
+	mutex_lock(&esw->offloads.decap_tbl_lock);
+	complete_all(&d->res_ready);
+	if (err)
+		goto out_free;
+
+found:
+	flow->decap_reformat = d;
+	attr->decap_pkt_reformat = d->pkt_reformat;
+	list_add(&flow->l3_to_l2_reformat, &d->flows);
+	mutex_unlock(&esw->offloads.decap_tbl_lock);
+	return 0;
+
+out_free:
+	mutex_unlock(&esw->offloads.decap_tbl_lock);
+	mlx5e_decap_put(priv, d);
+	return err;
+
+out_err:
+	mutex_unlock(&esw->offloads.decap_tbl_lock);
+	return err;
+}
+
 static int parse_tc_vlan_action(struct mlx5e_priv *priv,
 				const struct flow_action_entry *act,
 				struct mlx5_esw_flow_attr *attr,
@@ -3688,7 +3869,8 @@ static int verify_uplink_forwarding(struct mlx5e_priv *priv,
 static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
 				struct flow_action *flow_action,
 				struct mlx5e_tc_flow *flow,
-				struct netlink_ext_ack *extack)
+				struct netlink_ext_ack *extack,
+				struct net_device *filter_dev)
 {
 	struct pedit_headers_action hdrs[2] = {};
 	struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
@@ -3727,8 +3909,32 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
 			}
 			mpls_push = true;
 			break;
+		case FLOW_ACTION_MPLS_POP:
+			/* we only support mpls pop if it is the first action
+			 * and the filter net device is bareudp. Subsequent
+			 * actions can be pedit and the last can be mirred
+			 * egress redirect.
+			 */
+			if (i) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "mpls pop supported only as first action");
+				return -EOPNOTSUPP;
+			}
+			if (!netif_is_bareudp(filter_dev)) {
+				NL_SET_ERR_MSG_MOD(extack,
+						   "mpls pop supported only on bareudp devices");
+				return -EOPNOTSUPP;
+			}
+
+			parse_attr->eth.h_proto = act->mpls_pop.proto;
+			action |= MLX5_FLOW_CONTEXT_ACTION_PACKET_REFORMAT;
+			flow_flag_set(flow, L3_TO_L2_DECAP);
+			break;
 		case FLOW_ACTION_MANGLE:
 		case FLOW_ACTION_ADD:
+			if (flow_flag_test(flow, L3_TO_L2_DECAP))
+				return -EOPNOTSUPP;
+
 			err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_FDB,
 						    hdrs, extack);
 			if (err)
@@ -4093,6 +4299,7 @@ mlx5e_alloc_flow(struct mlx5e_priv *priv, int attr_size,
 		INIT_LIST_HEAD(&flow->encaps[out_index].list);
 	INIT_LIST_HEAD(&flow->mod_hdr);
 	INIT_LIST_HEAD(&flow->hairpin);
+	INIT_LIST_HEAD(&flow->l3_to_l2_reformat);
 	refcount_set(&flow->refcnt, 1);
 	init_completion(&flow->init_done);
 
@@ -4162,7 +4369,7 @@ __mlx5e_add_fdb_flow(struct mlx5e_priv *priv,
 	if (err)
 		goto err_free;
 
-	err = parse_tc_fdb_actions(priv, &rule->action, flow, extack);
+	err = parse_tc_fdb_actions(priv, &rule->action, flow, extack, filter_dev);
 	if (err)
 		goto err_free;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index c5eb4e7754a9..ac79b7c9aeb3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -2262,6 +2262,8 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
 	hash_init(esw->offloads.encap_tbl);
 	mutex_init(&esw->offloads.mod_hdr.lock);
 	hash_init(esw->offloads.mod_hdr.hlist);
+	mutex_init(&esw->offloads.decap_tbl_lock);
+	hash_init(esw->offloads.decap_tbl);
 	atomic64_set(&esw->offloads.num_flows, 0);
 	mutex_init(&esw->state_lock);
 	mutex_init(&esw->mode_lock);
@@ -2303,6 +2305,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
 	mutex_destroy(&esw->state_lock);
 	mutex_destroy(&esw->offloads.mod_hdr.lock);
 	mutex_destroy(&esw->offloads.encap_tbl_lock);
+	mutex_destroy(&esw->offloads.decap_tbl_lock);
 	kfree(esw->vports);
 	kfree(esw);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 4a1c6c78bb14..ccbbea3e0505 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -209,6 +209,8 @@ struct mlx5_esw_offload {
 	struct mutex peer_mutex;
 	struct mutex encap_tbl_lock; /* protects encap_tbl */
 	DECLARE_HASHTABLE(encap_tbl, 8);
+	struct mutex decap_tbl_lock; /* protects decap_tbl */
+	DECLARE_HASHTABLE(decap_tbl, 8);
 	struct mod_hdr_tbl mod_hdr;
 	DECLARE_HASHTABLE(termtbl_tbl, 8);
 	struct mutex termtbl_mutex; /* protects termtbl hash */
@@ -432,6 +434,7 @@ struct mlx5_esw_flow_attr {
 	struct mlx5_flow_table *fdb;
 	struct mlx5_flow_table *dest_ft;
 	struct mlx5_ct_attr ct_attr;
+	struct mlx5_pkt_reformat *decap_pkt_reformat;
 	struct mlx5e_tc_flow_parse_attr *parse_attr;
 };
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 1c9be19ee025..554fc64d8ef6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -366,6 +366,10 @@ mlx5_eswitch_add_offloaded_rule(struct mlx5_eswitch *esw,
 			}
 		}
 	}
+
+	if (attr->decap_pkt_reformat)
+		flow_act.pkt_reformat = attr->decap_pkt_reformat;
+
 	if (flow_act.action & MLX5_FLOW_CONTEXT_ACTION_COUNT) {
 		dest[i].type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
 		dest[i].counter_id = mlx5_fc_id(attr->counter);
-- 
cgit v1.2.3-59-g8ed1b


From 582234b465edfa12835b20477c0aa2bc91a02e18 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Wed, 8 Apr 2020 09:01:33 +0300
Subject: net/mlx5e: Support pedit on mpls over UDP decap

Allow to modify ethernet headers while decapsulating mpls over UDP
packets. This is implemented using the same reformat object used for
decapsulation.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Reviewed-by: Paul Blakey <paulb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 65 ++++++++++++++++++++-----
 1 file changed, 53 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index a6b18f0444e7..cc669ea450ae 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -2900,10 +2900,12 @@ void dealloc_mod_hdr_actions(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts)
 
 static const struct pedit_headers zero_masks = {};
 
-static int parse_tc_pedit_action(struct mlx5e_priv *priv,
-				 const struct flow_action_entry *act, int namespace,
-				 struct pedit_headers_action *hdrs,
-				 struct netlink_ext_ack *extack)
+static int
+parse_pedit_to_modify_hdr(struct mlx5e_priv *priv,
+			  const struct flow_action_entry *act, int namespace,
+			  struct mlx5e_tc_flow_parse_attr *parse_attr,
+			  struct pedit_headers_action *hdrs,
+			  struct netlink_ext_ack *extack)
 {
 	u8 cmd = (act->id == FLOW_ACTION_MANGLE) ? 0 : 1;
 	int err = -EOPNOTSUPP;
@@ -2939,6 +2941,46 @@ out_err:
 	return err;
 }
 
+static int
+parse_pedit_to_reformat(struct mlx5e_priv *priv,
+			const struct flow_action_entry *act,
+			struct mlx5e_tc_flow_parse_attr *parse_attr,
+			struct netlink_ext_ack *extack)
+{
+	u32 mask, val, offset;
+	u32 *p;
+
+	if (act->id != FLOW_ACTION_MANGLE)
+		return -EOPNOTSUPP;
+
+	if (act->mangle.htype != FLOW_ACT_MANGLE_HDR_TYPE_ETH) {
+		NL_SET_ERR_MSG_MOD(extack, "Only Ethernet modification is supported");
+		return -EOPNOTSUPP;
+	}
+
+	mask = ~act->mangle.mask;
+	val = act->mangle.val;
+	offset = act->mangle.offset;
+	p = (u32 *)&parse_attr->eth;
+	*(p + (offset >> 2)) |= (val & mask);
+
+	return 0;
+}
+
+static int parse_tc_pedit_action(struct mlx5e_priv *priv,
+				 const struct flow_action_entry *act, int namespace,
+				 struct mlx5e_tc_flow_parse_attr *parse_attr,
+				 struct pedit_headers_action *hdrs,
+				 struct mlx5e_tc_flow *flow,
+				 struct netlink_ext_ack *extack)
+{
+	if (flow && flow_flag_test(flow, L3_TO_L2_DECAP))
+		return parse_pedit_to_reformat(priv, act, parse_attr, extack);
+
+	return parse_pedit_to_modify_hdr(priv, act, namespace,
+					 parse_attr, hdrs, extack);
+}
+
 static int alloc_tc_pedit_action(struct mlx5e_priv *priv, int namespace,
 				 struct mlx5e_tc_flow_parse_attr *parse_attr,
 				 struct pedit_headers_action *hdrs,
@@ -3197,7 +3239,7 @@ static int add_vlan_rewrite_action(struct mlx5e_priv *priv, int namespace,
 		return -EOPNOTSUPP;
 	}
 
-	err = parse_tc_pedit_action(priv, &pedit_act, namespace, hdrs, NULL);
+	err = parse_tc_pedit_action(priv, &pedit_act, namespace, parse_attr, hdrs, NULL, extack);
 	*action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
 
 	return err;
@@ -3263,7 +3305,7 @@ static int parse_tc_nic_actions(struct mlx5e_priv *priv,
 		case FLOW_ACTION_MANGLE:
 		case FLOW_ACTION_ADD:
 			err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_KERNEL,
-						    hdrs, extack);
+						    parse_attr, hdrs, NULL, extack);
 			if (err)
 				return err;
 
@@ -3932,16 +3974,15 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
 			break;
 		case FLOW_ACTION_MANGLE:
 		case FLOW_ACTION_ADD:
-			if (flow_flag_test(flow, L3_TO_L2_DECAP))
-				return -EOPNOTSUPP;
-
 			err = parse_tc_pedit_action(priv, act, MLX5_FLOW_NAMESPACE_FDB,
-						    hdrs, extack);
+						    parse_attr, hdrs, flow, extack);
 			if (err)
 				return err;
 
-			action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
-			attr->split_count = attr->out_count;
+			if (!flow_flag_test(flow, L3_TO_L2_DECAP)) {
+				action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+				attr->split_count = attr->out_count;
+			}
 			break;
 		case FLOW_ACTION_CSUM:
 			if (csum_offload_supported(priv, action,
-- 
cgit v1.2.3-59-g8ed1b


From 16fdc16c6bff6764fc8c9a5f10640dfdb7ce201a Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Fri, 24 Apr 2020 13:16:12 -0700
Subject: igc: Refactor igc_ethtool_add_nfc_rule()

Current implementation of igc_ethtool_add_nfc_rule() is quite long and a
bit convoluted so this patch does a code refactoring to improve the
code.

Code related to NFC rule object initialization is refactored out to the
local helper function igc_ethtool_init_nfc_rule(). Likewise, code
related to NFC rule validation is refactored out to another local
helper, igc_ethtool_is_nfc_rule_valid().

RX_CLS_FLOW_DISC check is removed since it is redundant. The macro is
defined as the max value fsp->ring_cookie can have, so checking if
fsp->ring_cookie >= adapter->num_rx_queues is already sufficient.

Finally, some log messages are improved or added, and obvious comments
are removed.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_ethtool.c | 150 ++++++++++++++++-----------
 1 file changed, 92 insertions(+), 58 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index 66e0760a8f9e..1145c88a8e44 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -1271,9 +1271,6 @@ static int igc_ethtool_update_nfc_rule(struct igc_adapter *adapter,
 	if (!input)
 		return err;
 
-	/* initialize node */
-	INIT_HLIST_NODE(&input->nfc_node);
-
 	/* add filter to the list */
 	if (parent)
 		hlist_add_behind(&input->nfc_node, &parent->nfc_node);
@@ -1286,41 +1283,19 @@ static int igc_ethtool_update_nfc_rule(struct igc_adapter *adapter,
 	return 0;
 }
 
-static int igc_ethtool_add_nfc_rule(struct igc_adapter *adapter,
-				    struct ethtool_rxnfc *cmd)
+static void igc_ethtool_init_nfc_rule(struct igc_nfc_rule *rule,
+				      const struct ethtool_rx_flow_spec *fsp)
 {
-	struct net_device *netdev = adapter->netdev;
-	struct ethtool_rx_flow_spec *fsp =
-		(struct ethtool_rx_flow_spec *)&cmd->fs;
-	struct igc_nfc_rule *rule, *tmp;
-	int err = 0;
-
-	if (!(netdev->hw_features & NETIF_F_NTUPLE))
-		return -EOPNOTSUPP;
+	INIT_HLIST_NODE(&rule->nfc_node);
 
-	/* Don't allow programming if the action is a queue greater than
-	 * the number of online Rx queues.
-	 */
-	if (fsp->ring_cookie == RX_CLS_FLOW_DISC ||
-	    fsp->ring_cookie >= adapter->num_rx_queues) {
-		netdev_err(netdev,
-			   "ethtool -N: The specified action is invalid\n");
-		return -EINVAL;
-	}
+	rule->action = fsp->ring_cookie;
+	rule->sw_idx = fsp->location;
 
-	/* Don't allow indexes to exist outside of available space */
-	if (fsp->location >= IGC_MAX_RXNFC_RULES) {
-		netdev_err(netdev, "Location out of range\n");
-		return -EINVAL;
+	if ((fsp->flow_type & FLOW_EXT) && fsp->m_ext.vlan_tci) {
+		rule->filter.vlan_tci = ntohs(fsp->h_ext.vlan_tci);
+		rule->filter.match_flags |= IGC_FILTER_FLAG_VLAN_TCI;
 	}
 
-	if ((fsp->flow_type & ~FLOW_EXT) != ETHER_FLOW)
-		return -EINVAL;
-
-	rule = kzalloc(sizeof(*rule), GFP_KERNEL);
-	if (!rule)
-		return -ENOMEM;
-
 	if (fsp->m_u.ether_spec.h_proto == ETHER_TYPE_FULL_MASK) {
 		rule->filter.etype = ntohs(fsp->h_u.ether_spec.h_proto);
 		rule->filter.match_flags = IGC_FILTER_FLAG_ETHER_TYPE;
@@ -1340,51 +1315,110 @@ static int igc_ethtool_add_nfc_rule(struct igc_adapter *adapter,
 		ether_addr_copy(rule->filter.dst_addr,
 				fsp->h_u.ether_spec.h_dest);
 	}
+}
 
-	if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR &&
-	    rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) {
-		netdev_dbg(netdev, "Filters with both dst and src are not supported\n");
-		err = -EOPNOTSUPP;
-		goto err_out;
-	}
+/**
+ * igc_ethtool_check_nfc_rule() - Check if NFC rule is valid
+ * @adapter: Pointer to adapter
+ * @rule: Rule under evaluation
+ *
+ * Rules with both destination and source MAC addresses are considered invalid
+ * since the driver doesn't support them.
+ *
+ * Also, if there is already another rule with the same filter, @rule is
+ * considered invalid.
+ *
+ * Context: Expects adapter->nfc_rule_lock to be held by caller.
+ *
+ * Return: 0 in case of success, negative errno code otherwise.
+ */
+static int igc_ethtool_check_nfc_rule(struct igc_adapter *adapter,
+				      struct igc_nfc_rule *rule)
+{
+	struct net_device *dev = adapter->netdev;
+	u8 flags = rule->filter.match_flags;
+	struct igc_nfc_rule *tmp;
 
-	if ((fsp->flow_type & FLOW_EXT) && fsp->m_ext.vlan_tci) {
-		if (fsp->m_ext.vlan_tci != htons(VLAN_PRIO_MASK)) {
-			netdev_dbg(netdev, "VLAN mask not supported\n");
-			err = -EOPNOTSUPP;
-			goto err_out;
-		}
-		rule->filter.vlan_tci = ntohs(fsp->h_ext.vlan_tci);
-		rule->filter.match_flags |= IGC_FILTER_FLAG_VLAN_TCI;
+	if (!flags) {
+		netdev_dbg(dev, "Rule with no match\n");
+		return -EINVAL;
 	}
 
-	rule->action = fsp->ring_cookie;
-	rule->sw_idx = fsp->location;
-
-	spin_lock(&adapter->nfc_rule_lock);
+	if (flags & IGC_FILTER_FLAG_DST_MAC_ADDR &&
+	    flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) {
+		netdev_dbg(dev, "Filters with both dst and src are not supported\n");
+		return -EOPNOTSUPP;
+	}
 
 	hlist_for_each_entry(tmp, &adapter->nfc_rule_list, nfc_node) {
 		if (!memcmp(&rule->filter, &tmp->filter,
 			    sizeof(rule->filter))) {
-			err = -EEXIST;
-			netdev_err(netdev,
-				   "ethtool: this filter is already set\n");
-			goto err_out_w_lock;
+			netdev_dbg(dev, "Rule already exists\n");
+			return -EEXIST;
 		}
 	}
 
+	return 0;
+}
+
+static int igc_ethtool_add_nfc_rule(struct igc_adapter *adapter,
+				    struct ethtool_rxnfc *cmd)
+{
+	struct net_device *netdev = adapter->netdev;
+	struct ethtool_rx_flow_spec *fsp =
+		(struct ethtool_rx_flow_spec *)&cmd->fs;
+	struct igc_nfc_rule *rule;
+	int err;
+
+	if (!(netdev->hw_features & NETIF_F_NTUPLE)) {
+		netdev_dbg(netdev, "N-tuple filters disabled\n");
+		return -EOPNOTSUPP;
+	}
+
+	if ((fsp->flow_type & ~FLOW_EXT) != ETHER_FLOW) {
+		netdev_dbg(netdev, "Only ethernet flow type is supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	if ((fsp->flow_type & FLOW_EXT) &&
+	    fsp->m_ext.vlan_tci != htons(VLAN_PRIO_MASK)) {
+		netdev_dbg(netdev, "VLAN mask not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (fsp->ring_cookie >= adapter->num_rx_queues) {
+		netdev_dbg(netdev, "Invalid action\n");
+		return -EINVAL;
+	}
+
+	if (fsp->location >= IGC_MAX_RXNFC_RULES) {
+		netdev_dbg(netdev, "Invalid location\n");
+		return -EINVAL;
+	}
+
+	rule = kzalloc(sizeof(*rule), GFP_KERNEL);
+	if (!rule)
+		return -ENOMEM;
+
+	igc_ethtool_init_nfc_rule(rule, fsp);
+
+	spin_lock(&adapter->nfc_rule_lock);
+
+	err = igc_ethtool_check_nfc_rule(adapter, rule);
+	if (err)
+		goto err;
+
 	err = igc_enable_nfc_rule(adapter, rule);
 	if (err)
-		goto err_out_w_lock;
+		goto err;
 
 	igc_ethtool_update_nfc_rule(adapter, rule, rule->sw_idx);
 
 	spin_unlock(&adapter->nfc_rule_lock);
 	return 0;
 
-err_out_w_lock:
+err:
 	spin_unlock(&adapter->nfc_rule_lock);
-err_out:
 	kfree(rule);
 	return err;
 }
-- 
cgit v1.2.3-59-g8ed1b


From d3ba9e6f6157e6fa047d853936dabb981e315080 Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Fri, 24 Apr 2020 13:16:13 -0700
Subject: igc: Fix 'sw_idx' type in struct igc_nfc_rule

The 'sw_idx' field from 'struct igc_nfc_rule' is u16 type but it is
assigned an u32 value in igc_ethtool_init_nfc_rule(). This patch changes
'sw_idx' type to u32 so they match. Also, it makes more sense to call
this field 'location' since it holds the NFC rule location.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc.h         |  2 +-
 drivers/net/ethernet/intel/igc/igc_ethtool.c | 16 ++++++++--------
 2 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index fcc6261d7f67..ae7d48070ee2 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -463,7 +463,7 @@ struct igc_nfc_filter {
 struct igc_nfc_rule {
 	struct hlist_node nfc_node;
 	struct igc_nfc_filter filter;
-	u16 sw_idx;
+	u32 location;
 	u16 action;
 };
 
diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index 1145c88a8e44..24aa321f64b5 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -940,11 +940,11 @@ static int igc_ethtool_get_nfc_rule(struct igc_adapter *adapter,
 	cmd->data = IGC_MAX_RXNFC_RULES;
 
 	hlist_for_each_entry(rule, &adapter->nfc_rule_list, nfc_node) {
-		if (fsp->location <= rule->sw_idx)
+		if (fsp->location <= rule->location)
 			break;
 	}
 
-	if (!rule || fsp->location != rule->sw_idx)
+	if (!rule || fsp->location != rule->location)
 		return -EINVAL;
 
 	if (!rule->filter.match_flags)
@@ -991,7 +991,7 @@ static int igc_ethtool_get_nfc_rules(struct igc_adapter *adapter,
 	hlist_for_each_entry(rule, &adapter->nfc_rule_list, nfc_node) {
 		if (cnt == cmd->rule_cnt)
 			return -EMSGSIZE;
-		rule_locs[cnt] = rule->sw_idx;
+		rule_locs[cnt] = rule->location;
 		cnt++;
 	}
 
@@ -1240,7 +1240,7 @@ int igc_disable_nfc_rule(struct igc_adapter *adapter,
 
 static int igc_ethtool_update_nfc_rule(struct igc_adapter *adapter,
 				       struct igc_nfc_rule *input,
-				       u16 sw_idx)
+				       u32 location)
 {
 	struct igc_nfc_rule *rule, *parent;
 	int err = -EINVAL;
@@ -1250,13 +1250,13 @@ static int igc_ethtool_update_nfc_rule(struct igc_adapter *adapter,
 
 	hlist_for_each_entry(rule, &adapter->nfc_rule_list, nfc_node) {
 		/* hash found, or no matching entry */
-		if (rule->sw_idx >= sw_idx)
+		if (rule->location >= location)
 			break;
 		parent = rule;
 	}
 
 	/* if there is an old rule occupying our place remove it */
-	if (rule && rule->sw_idx == sw_idx) {
+	if (rule && rule->location == location) {
 		if (!input)
 			err = igc_disable_nfc_rule(adapter, rule);
 
@@ -1289,7 +1289,7 @@ static void igc_ethtool_init_nfc_rule(struct igc_nfc_rule *rule,
 	INIT_HLIST_NODE(&rule->nfc_node);
 
 	rule->action = fsp->ring_cookie;
-	rule->sw_idx = fsp->location;
+	rule->location = fsp->location;
 
 	if ((fsp->flow_type & FLOW_EXT) && fsp->m_ext.vlan_tci) {
 		rule->filter.vlan_tci = ntohs(fsp->h_ext.vlan_tci);
@@ -1412,7 +1412,7 @@ static int igc_ethtool_add_nfc_rule(struct igc_adapter *adapter,
 	if (err)
 		goto err;
 
-	igc_ethtool_update_nfc_rule(adapter, rule, rule->sw_idx);
+	igc_ethtool_update_nfc_rule(adapter, rule, rule->location);
 
 	spin_unlock(&adapter->nfc_rule_lock);
 	return 0;
-- 
cgit v1.2.3-59-g8ed1b


From b500350a36ae6e83a03931723b787b771f411817 Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Fri, 24 Apr 2020 13:16:14 -0700
Subject: igc: Fix locking issue when retrieving NFC rules

Access to NFC rules stored in adapter->nfc_rule_list is protect by
adapter->nfc_rule_lock. The functions igc_ethtool_get_nfc_rule()
and igc_ethtool_get_nfc_rules() are missing to hold the lock while
accessing rule objects.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_ethtool.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index 24aa321f64b5..decd29fbfbe2 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -939,16 +939,18 @@ static int igc_ethtool_get_nfc_rule(struct igc_adapter *adapter,
 
 	cmd->data = IGC_MAX_RXNFC_RULES;
 
+	spin_lock(&adapter->nfc_rule_lock);
+
 	hlist_for_each_entry(rule, &adapter->nfc_rule_list, nfc_node) {
 		if (fsp->location <= rule->location)
 			break;
 	}
 
 	if (!rule || fsp->location != rule->location)
-		return -EINVAL;
+		goto out;
 
 	if (!rule->filter.match_flags)
-		return -EINVAL;
+		goto out;
 
 	fsp->flow_type = ETHER_FLOW;
 	fsp->ring_cookie = rule->action;
@@ -976,7 +978,12 @@ static int igc_ethtool_get_nfc_rule(struct igc_adapter *adapter,
 		eth_broadcast_addr(fsp->m_u.ether_spec.h_source);
 	}
 
+	spin_unlock(&adapter->nfc_rule_lock);
 	return 0;
+
+out:
+	spin_unlock(&adapter->nfc_rule_lock);
+	return -EINVAL;
 }
 
 static int igc_ethtool_get_nfc_rules(struct igc_adapter *adapter,
@@ -988,13 +995,19 @@ static int igc_ethtool_get_nfc_rules(struct igc_adapter *adapter,
 
 	cmd->data = IGC_MAX_RXNFC_RULES;
 
+	spin_lock(&adapter->nfc_rule_lock);
+
 	hlist_for_each_entry(rule, &adapter->nfc_rule_list, nfc_node) {
-		if (cnt == cmd->rule_cnt)
+		if (cnt == cmd->rule_cnt) {
+			spin_unlock(&adapter->nfc_rule_lock);
 			return -EMSGSIZE;
+		}
 		rule_locs[cnt] = rule->location;
 		cnt++;
 	}
 
+	spin_unlock(&adapter->nfc_rule_lock);
+
 	cmd->rule_cnt = cnt;
 
 	return 0;
-- 
cgit v1.2.3-59-g8ed1b


From 4bdf89e85ed3881e8c40510ebad918dded9e5831 Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Fri, 24 Apr 2020 13:16:15 -0700
Subject: igc: Fix NFC rule overwrite cases

When the 'loc' argument is passed in ethtool, the input rule overwrites
any rule present in that location. In this situation we must disable the
old rule otherwise it is left enabled in hardware. This patch fixes
the issue by always calling igc_disable_nfc_rule() when deleting the
old rule, no matter the value of 'input' argument.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_ethtool.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index decd29fbfbe2..f01a7ec0c1c2 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -1270,8 +1270,7 @@ static int igc_ethtool_update_nfc_rule(struct igc_adapter *adapter,
 
 	/* if there is an old rule occupying our place remove it */
 	if (rule && rule->location == location) {
-		if (!input)
-			err = igc_disable_nfc_rule(adapter, rule);
+		err = igc_disable_nfc_rule(adapter, rule);
 
 		hlist_del(&rule->nfc_node);
 		kfree(rule);
-- 
cgit v1.2.3-59-g8ed1b


From 39707c16e6b34fb70b3f170d5733245256d768e5 Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Fri, 24 Apr 2020 13:16:16 -0700
Subject: igc: Fix NFC rules with multicast addresses

Multicast MAC addresses are valid address for NFC rules but
igc_add_mac_filter() is currently rejecting them. In fact, the I225
controller doesn't impose any constraint on the address value so this
patch gets rid of the address validation check in MAC filter APIs.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_main.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index f48d6127a220..acb8dfdf275f 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -2249,9 +2249,6 @@ int igc_add_mac_filter(struct igc_adapter *adapter,
 	struct net_device *dev = adapter->netdev;
 	int index;
 
-	if (!is_valid_ether_addr(addr))
-		return -EINVAL;
-
 	index = igc_find_mac_filter(adapter, type, addr);
 	if (index >= 0)
 		goto update_filter;
@@ -2283,9 +2280,6 @@ int igc_del_mac_filter(struct igc_adapter *adapter,
 	struct net_device *dev = adapter->netdev;
 	int index;
 
-	if (!is_valid_ether_addr(addr))
-		return -EINVAL;
-
 	index = igc_find_mac_filter(adapter, type, addr);
 	if (index < 0)
 		return -ENOENT;
-- 
cgit v1.2.3-59-g8ed1b


From d957c6010a907d86d41d1bee024a9827e385c4fa Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Fri, 24 Apr 2020 13:16:17 -0700
Subject: igc: Fix NFC rules restoration

When network interface is brought up, the driver re-enables the NFC
rules previously configured. However, this is done in reverse order
the rules were added and hardware filters are configured differently.

For example, consider the following rules:

$ ethtool -N eth0 flow-type ether dst 00:00:00:00:00:AA queue 0
$ ethtool -N eth0 flow-type ether dst 00:00:00:00:00:BB queue 1
$ ethtool -N eth0 flow-type ether dst 00:00:00:00:00:CC queue 2
$ ethtool -N eth0 flow-type ether dst 00:00:00:00:00:DD queue 3

RAL/RAH registers are configure so filter index 1 has address ending
with AA, filter index 2 has address ending in BB, and so on.

If we bring the interface down and up again, RAL/RAH registers are
configured so filter index 1 has address ending in DD, filter index 2
has CC, and so on. IOW, in reverse order we had before bringing the
interface down.

This issue can be fixed by traversing adapter->nfc_rule_list in
backwards when restoring the rules. Since hlist doesn't support
backwards traversal, this patch replaces it by list_head and fixes
igc_restore_nfc_rules() accordingly.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc.h         |  4 ++--
 drivers/net/ethernet/intel/igc/igc_ethtool.c | 19 ++++++++-----------
 drivers/net/ethernet/intel/igc/igc_main.c    | 16 +++++++++-------
 3 files changed, 19 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index ae7d48070ee2..76bc3a51ad70 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -191,7 +191,7 @@ struct igc_adapter {
 	 * nfc_rule_lock.
 	 */
 	spinlock_t nfc_rule_lock;
-	struct hlist_head nfc_rule_list;
+	struct list_head nfc_rule_list;
 	unsigned int nfc_rule_count;
 
 	u8 rss_indir_tbl[IGC_RETA_SIZE];
@@ -461,7 +461,7 @@ struct igc_nfc_filter {
 };
 
 struct igc_nfc_rule {
-	struct hlist_node nfc_node;
+	struct list_head list;
 	struct igc_nfc_filter filter;
 	u32 location;
 	u16 action;
diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index f01a7ec0c1c2..a90493fee0d2 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -941,7 +941,7 @@ static int igc_ethtool_get_nfc_rule(struct igc_adapter *adapter,
 
 	spin_lock(&adapter->nfc_rule_lock);
 
-	hlist_for_each_entry(rule, &adapter->nfc_rule_list, nfc_node) {
+	list_for_each_entry(rule, &adapter->nfc_rule_list, list) {
 		if (fsp->location <= rule->location)
 			break;
 	}
@@ -997,7 +997,7 @@ static int igc_ethtool_get_nfc_rules(struct igc_adapter *adapter,
 
 	spin_lock(&adapter->nfc_rule_lock);
 
-	hlist_for_each_entry(rule, &adapter->nfc_rule_list, nfc_node) {
+	list_for_each_entry(rule, &adapter->nfc_rule_list, list) {
 		if (cnt == cmd->rule_cnt) {
 			spin_unlock(&adapter->nfc_rule_lock);
 			return -EMSGSIZE;
@@ -1261,7 +1261,7 @@ static int igc_ethtool_update_nfc_rule(struct igc_adapter *adapter,
 	parent = NULL;
 	rule = NULL;
 
-	hlist_for_each_entry(rule, &adapter->nfc_rule_list, nfc_node) {
+	list_for_each_entry(rule, &adapter->nfc_rule_list, list) {
 		/* hash found, or no matching entry */
 		if (rule->location >= location)
 			break;
@@ -1272,7 +1272,7 @@ static int igc_ethtool_update_nfc_rule(struct igc_adapter *adapter,
 	if (rule && rule->location == location) {
 		err = igc_disable_nfc_rule(adapter, rule);
 
-		hlist_del(&rule->nfc_node);
+		list_del(&rule->list);
 		kfree(rule);
 		adapter->nfc_rule_count--;
 	}
@@ -1283,11 +1283,8 @@ static int igc_ethtool_update_nfc_rule(struct igc_adapter *adapter,
 	if (!input)
 		return err;
 
-	/* add filter to the list */
-	if (parent)
-		hlist_add_behind(&input->nfc_node, &parent->nfc_node);
-	else
-		hlist_add_head(&input->nfc_node, &adapter->nfc_rule_list);
+	list_add(&input->list, parent ? &parent->list :
+					&adapter->nfc_rule_list);
 
 	/* update counts */
 	adapter->nfc_rule_count++;
@@ -1298,7 +1295,7 @@ static int igc_ethtool_update_nfc_rule(struct igc_adapter *adapter,
 static void igc_ethtool_init_nfc_rule(struct igc_nfc_rule *rule,
 				      const struct ethtool_rx_flow_spec *fsp)
 {
-	INIT_HLIST_NODE(&rule->nfc_node);
+	INIT_LIST_HEAD(&rule->list);
 
 	rule->action = fsp->ring_cookie;
 	rule->location = fsp->location;
@@ -1362,7 +1359,7 @@ static int igc_ethtool_check_nfc_rule(struct igc_adapter *adapter,
 		return -EOPNOTSUPP;
 	}
 
-	hlist_for_each_entry(tmp, &adapter->nfc_rule_list, nfc_node) {
+	list_for_each_entry(tmp, &adapter->nfc_rule_list, list) {
 		if (!memcmp(&rule->filter, &tmp->filter,
 			    sizeof(rule->filter))) {
 			netdev_dbg(dev, "Rule already exists\n");
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index acb8dfdf275f..cf76e2c1f9b1 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -2180,7 +2180,7 @@ static void igc_restore_nfc_rules(struct igc_adapter *adapter)
 
 	spin_lock(&adapter->nfc_rule_lock);
 
-	hlist_for_each_entry(rule, &adapter->nfc_rule_list, nfc_node)
+	list_for_each_entry_reverse(rule, &adapter->nfc_rule_list, list)
 		igc_enable_nfc_rule(adapter, rule);
 
 	spin_unlock(&adapter->nfc_rule_lock);
@@ -3419,6 +3419,9 @@ static int igc_sw_init(struct igc_adapter *adapter)
 	adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
 
 	spin_lock_init(&adapter->nfc_rule_lock);
+	INIT_LIST_HEAD(&adapter->nfc_rule_list);
+	adapter->nfc_rule_count = 0;
+
 	spin_lock_init(&adapter->stats64_lock);
 	/* Assume MSI-X interrupts, will be checked during IRQ allocation */
 	adapter->flags |= IGC_FLAG_HAS_MSIX;
@@ -3651,7 +3654,7 @@ static void igc_nfc_rule_exit(struct igc_adapter *adapter)
 
 	spin_lock(&adapter->nfc_rule_lock);
 
-	hlist_for_each_entry(rule, &adapter->nfc_rule_list, nfc_node)
+	list_for_each_entry(rule, &adapter->nfc_rule_list, list)
 		igc_disable_nfc_rule(adapter, rule);
 
 	spin_unlock(&adapter->nfc_rule_lock);
@@ -3826,14 +3829,13 @@ static int igc_set_features(struct net_device *netdev,
 		return 0;
 
 	if (!(features & NETIF_F_NTUPLE)) {
-		struct hlist_node *node2;
-		struct igc_nfc_rule *rule;
+		struct igc_nfc_rule *rule, *tmp;
 
 		spin_lock(&adapter->nfc_rule_lock);
-		hlist_for_each_entry_safe(rule, node2,
-					  &adapter->nfc_rule_list, nfc_node) {
+		list_for_each_entry_safe(rule, tmp,
+					 &adapter->nfc_rule_list, list) {
 			igc_disable_nfc_rule(adapter, rule);
-			hlist_del(&rule->nfc_node);
+			list_del(&rule->list);
 			kfree(rule);
 		}
 		spin_unlock(&adapter->nfc_rule_lock);
-- 
cgit v1.2.3-59-g8ed1b


From 36fa21520f33317fe51bf80bc154873e922c2a26 Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Fri, 24 Apr 2020 13:16:18 -0700
Subject: igc: Refactor igc_ethtool_update_nfc_rule()

Current implementation of igc_ethtool_update_nfc_rule() is a bit
convoluted since it handles too many things: rule lookup, deletion
and addition. This patch breaks it into three functions so we simplify
the code and improve code reuse.

Code related to rule lookup is refactored out to a new function called
igc_get_nfc_rule().

Code related to rule addition is refactored out to a new function called
igc_add_nfc_rule(). This function enables the rule in hardware and adds
it to the adapter's list.

Code related to rule deletion is refactored out to a new function called
igc_del_nfc_rule(). This function disables the rule in hardware, removes
it from adapter's list, and deletes it.

As a byproduct of this refactoring, igc_enable_nfc_rule() and
igc_disable_nfc_rule() are moved to igc_main.c since they are not used
in igc_ethtool.c anymore, and igc_restore_nfc_rules() and igc_nfc_rule_
exit() are moved around to avoid forward declaration.

Also, since this patch already touches igc_ethtool_get_nfc_rule(), it
takes the opportunity to remove the 'match_flags' check. Empty flags
are not allowed to be added so no need to check that.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc.h         |  18 +--
 drivers/net/ethernet/intel/igc/igc_ethtool.c | 138 +++---------------
 drivers/net/ethernet/intel/igc/igc_main.c    | 205 ++++++++++++++++++++++-----
 3 files changed, 195 insertions(+), 166 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index 76bc3a51ad70..a484b328268b 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -232,16 +232,6 @@ void igc_write_rss_indir_tbl(struct igc_adapter *adapter);
 bool igc_has_link(struct igc_adapter *adapter);
 void igc_reset(struct igc_adapter *adapter);
 int igc_set_spd_dplx(struct igc_adapter *adapter, u32 spd, u8 dplx);
-int igc_add_mac_filter(struct igc_adapter *adapter,
-		       enum igc_mac_filter_type type, const u8 *addr,
-		       int queue);
-int igc_del_mac_filter(struct igc_adapter *adapter,
-		       enum igc_mac_filter_type type, const u8 *addr);
-int igc_add_vlan_prio_filter(struct igc_adapter *adapter, int prio,
-			     int queue);
-void igc_del_vlan_prio_filter(struct igc_adapter *adapter, int prio);
-int igc_add_etype_filter(struct igc_adapter *adapter, u16 etype, int queue);
-int igc_del_etype_filter(struct igc_adapter *adapter, u16 etype);
 void igc_update_stats(struct igc_adapter *adapter);
 
 /* igc_dump declarations */
@@ -544,10 +534,10 @@ static inline s32 igc_read_phy_reg(struct igc_hw *hw, u32 offset, u16 *data)
 }
 
 void igc_reinit_locked(struct igc_adapter *);
-int igc_enable_nfc_rule(struct igc_adapter *adapter,
-			const struct igc_nfc_rule *rule);
-int igc_disable_nfc_rule(struct igc_adapter *adapter,
-			 const struct igc_nfc_rule *rule);
+struct igc_nfc_rule *igc_get_nfc_rule(struct igc_adapter *adapter,
+				      u32 location);
+int igc_add_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule);
+void igc_del_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule);
 
 void igc_ptp_init(struct igc_adapter *adapter);
 void igc_ptp_reset(struct igc_adapter *adapter);
diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index a90493fee0d2..43dff09a8f86 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -941,15 +941,8 @@ static int igc_ethtool_get_nfc_rule(struct igc_adapter *adapter,
 
 	spin_lock(&adapter->nfc_rule_lock);
 
-	list_for_each_entry(rule, &adapter->nfc_rule_list, list) {
-		if (fsp->location <= rule->location)
-			break;
-	}
-
-	if (!rule || fsp->location != rule->location)
-		goto out;
-
-	if (!rule->filter.match_flags)
+	rule = igc_get_nfc_rule(adapter, fsp->location);
+	if (!rule)
 		goto out;
 
 	fsp->flow_type = ETHER_FLOW;
@@ -1190,108 +1183,6 @@ static int igc_ethtool_set_rss_hash_opt(struct igc_adapter *adapter,
 	return 0;
 }
 
-int igc_enable_nfc_rule(struct igc_adapter *adapter,
-			const struct igc_nfc_rule *rule)
-{
-	int err = -EINVAL;
-
-	if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) {
-		err = igc_add_etype_filter(adapter, rule->filter.etype,
-					   rule->action);
-		if (err)
-			return err;
-	}
-
-	if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) {
-		err = igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_SRC,
-					 rule->filter.src_addr, rule->action);
-		if (err)
-			return err;
-	}
-
-	if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) {
-		err = igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST,
-					 rule->filter.dst_addr, rule->action);
-		if (err)
-			return err;
-	}
-
-	if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) {
-		int prio = (rule->filter.vlan_tci & VLAN_PRIO_MASK) >>
-			   VLAN_PRIO_SHIFT;
-
-		err = igc_add_vlan_prio_filter(adapter, prio, rule->action);
-		if (err)
-			return err;
-	}
-
-	return 0;
-}
-
-int igc_disable_nfc_rule(struct igc_adapter *adapter,
-			 const struct igc_nfc_rule *rule)
-{
-	if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE)
-		igc_del_etype_filter(adapter, rule->filter.etype);
-
-	if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) {
-		int prio = (rule->filter.vlan_tci & VLAN_PRIO_MASK) >>
-			   VLAN_PRIO_SHIFT;
-		igc_del_vlan_prio_filter(adapter, prio);
-	}
-
-	if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR)
-		igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_SRC,
-				   rule->filter.src_addr);
-
-	if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR)
-		igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST,
-				   rule->filter.dst_addr);
-
-	return 0;
-}
-
-static int igc_ethtool_update_nfc_rule(struct igc_adapter *adapter,
-				       struct igc_nfc_rule *input,
-				       u32 location)
-{
-	struct igc_nfc_rule *rule, *parent;
-	int err = -EINVAL;
-
-	parent = NULL;
-	rule = NULL;
-
-	list_for_each_entry(rule, &adapter->nfc_rule_list, list) {
-		/* hash found, or no matching entry */
-		if (rule->location >= location)
-			break;
-		parent = rule;
-	}
-
-	/* if there is an old rule occupying our place remove it */
-	if (rule && rule->location == location) {
-		err = igc_disable_nfc_rule(adapter, rule);
-
-		list_del(&rule->list);
-		kfree(rule);
-		adapter->nfc_rule_count--;
-	}
-
-	/* If no input this was a delete, err should be 0 if a rule was
-	 * successfully found and removed from the list else -EINVAL
-	 */
-	if (!input)
-		return err;
-
-	list_add(&input->list, parent ? &parent->list :
-					&adapter->nfc_rule_list);
-
-	/* update counts */
-	adapter->nfc_rule_count++;
-
-	return 0;
-}
-
 static void igc_ethtool_init_nfc_rule(struct igc_nfc_rule *rule,
 				      const struct ethtool_rx_flow_spec *fsp)
 {
@@ -1376,7 +1267,7 @@ static int igc_ethtool_add_nfc_rule(struct igc_adapter *adapter,
 	struct net_device *netdev = adapter->netdev;
 	struct ethtool_rx_flow_spec *fsp =
 		(struct ethtool_rx_flow_spec *)&cmd->fs;
-	struct igc_nfc_rule *rule;
+	struct igc_nfc_rule *rule, *old_rule;
 	int err;
 
 	if (!(netdev->hw_features & NETIF_F_NTUPLE)) {
@@ -1417,12 +1308,14 @@ static int igc_ethtool_add_nfc_rule(struct igc_adapter *adapter,
 	if (err)
 		goto err;
 
-	err = igc_enable_nfc_rule(adapter, rule);
+	old_rule = igc_get_nfc_rule(adapter, fsp->location);
+	if (old_rule)
+		igc_del_nfc_rule(adapter, old_rule);
+
+	err = igc_add_nfc_rule(adapter, rule);
 	if (err)
 		goto err;
 
-	igc_ethtool_update_nfc_rule(adapter, rule, rule->location);
-
 	spin_unlock(&adapter->nfc_rule_lock);
 	return 0;
 
@@ -1437,13 +1330,20 @@ static int igc_ethtool_del_nfc_rule(struct igc_adapter *adapter,
 {
 	struct ethtool_rx_flow_spec *fsp =
 		(struct ethtool_rx_flow_spec *)&cmd->fs;
-	int err;
+	struct igc_nfc_rule *rule;
 
 	spin_lock(&adapter->nfc_rule_lock);
-	err = igc_ethtool_update_nfc_rule(adapter, NULL, fsp->location);
-	spin_unlock(&adapter->nfc_rule_lock);
 
-	return err;
+	rule = igc_get_nfc_rule(adapter, fsp->location);
+	if (!rule) {
+		spin_unlock(&adapter->nfc_rule_lock);
+		return -EINVAL;
+	}
+
+	igc_del_nfc_rule(adapter, rule);
+
+	spin_unlock(&adapter->nfc_rule_lock);
+	return 0;
 }
 
 static int igc_ethtool_set_rxnfc(struct net_device *dev,
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index cf76e2c1f9b1..ad9217335a64 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -2174,18 +2174,6 @@ static bool igc_clean_tx_irq(struct igc_q_vector *q_vector, int napi_budget)
 	return !!budget;
 }
 
-static void igc_restore_nfc_rules(struct igc_adapter *adapter)
-{
-	struct igc_nfc_rule *rule;
-
-	spin_lock(&adapter->nfc_rule_lock);
-
-	list_for_each_entry_reverse(rule, &adapter->nfc_rule_list, list)
-		igc_enable_nfc_rule(adapter, rule);
-
-	spin_unlock(&adapter->nfc_rule_lock);
-}
-
 static int igc_find_mac_filter(struct igc_adapter *adapter,
 			       enum igc_mac_filter_type type, const u8 *addr)
 {
@@ -2242,9 +2230,9 @@ static int igc_get_avail_mac_filter_slot(struct igc_adapter *adapter)
  *
  * Return: 0 in case of success, negative errno code otherwise.
  */
-int igc_add_mac_filter(struct igc_adapter *adapter,
-		       enum igc_mac_filter_type type, const u8 *addr,
-		       int queue)
+static int igc_add_mac_filter(struct igc_adapter *adapter,
+			      enum igc_mac_filter_type type, const u8 *addr,
+			      int queue)
 {
 	struct net_device *dev = adapter->netdev;
 	int index;
@@ -2274,8 +2262,8 @@ update_filter:
  *
  * Return: 0 in case of success, negative errno code otherwise.
  */
-int igc_del_mac_filter(struct igc_adapter *adapter,
-		       enum igc_mac_filter_type type, const u8 *addr)
+static int igc_del_mac_filter(struct igc_adapter *adapter,
+			      enum igc_mac_filter_type type, const u8 *addr)
 {
 	struct net_device *dev = adapter->netdev;
 	int index;
@@ -2312,7 +2300,8 @@ int igc_del_mac_filter(struct igc_adapter *adapter,
  *
  * Return: 0 in case of success, negative errno code otherwise.
  */
-int igc_add_vlan_prio_filter(struct igc_adapter *adapter, int prio, int queue)
+static int igc_add_vlan_prio_filter(struct igc_adapter *adapter, int prio,
+				    int queue)
 {
 	struct net_device *dev = adapter->netdev;
 	struct igc_hw *hw = &adapter->hw;
@@ -2340,7 +2329,7 @@ int igc_add_vlan_prio_filter(struct igc_adapter *adapter, int prio, int queue)
  * @adapter: Pointer to adapter where the filter should be deleted from
  * @prio: VLAN priority value
  */
-void igc_del_vlan_prio_filter(struct igc_adapter *adapter, int prio)
+static void igc_del_vlan_prio_filter(struct igc_adapter *adapter, int prio)
 {
 	struct igc_hw *hw = &adapter->hw;
 	u32 vlanpqf;
@@ -2381,7 +2370,8 @@ static int igc_get_avail_etype_filter_slot(struct igc_adapter *adapter)
  *
  * Return: 0 in case of success, negative errno code otherwise.
  */
-int igc_add_etype_filter(struct igc_adapter *adapter, u16 etype, int queue)
+static int igc_add_etype_filter(struct igc_adapter *adapter, u16 etype,
+				int queue)
 {
 	struct igc_hw *hw = &adapter->hw;
 	int index;
@@ -2433,7 +2423,7 @@ static int igc_find_etype_filter(struct igc_adapter *adapter, u16 etype)
  *
  * Return: 0 in case of success, negative errno code otherwise.
  */
-int igc_del_etype_filter(struct igc_adapter *adapter, u16 etype)
+static int igc_del_etype_filter(struct igc_adapter *adapter, u16 etype)
 {
 	struct igc_hw *hw = &adapter->hw;
 	int index;
@@ -2449,6 +2439,167 @@ int igc_del_etype_filter(struct igc_adapter *adapter, u16 etype)
 	return 0;
 }
 
+static int igc_enable_nfc_rule(struct igc_adapter *adapter,
+			       const struct igc_nfc_rule *rule)
+{
+	int err;
+
+	if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE) {
+		err = igc_add_etype_filter(adapter, rule->filter.etype,
+					   rule->action);
+		if (err)
+			return err;
+	}
+
+	if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) {
+		err = igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_SRC,
+					 rule->filter.src_addr, rule->action);
+		if (err)
+			return err;
+	}
+
+	if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR) {
+		err = igc_add_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST,
+					 rule->filter.dst_addr, rule->action);
+		if (err)
+			return err;
+	}
+
+	if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) {
+		int prio = (rule->filter.vlan_tci & VLAN_PRIO_MASK) >>
+			   VLAN_PRIO_SHIFT;
+
+		err = igc_add_vlan_prio_filter(adapter, prio, rule->action);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int igc_disable_nfc_rule(struct igc_adapter *adapter,
+				const struct igc_nfc_rule *rule)
+{
+	if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE)
+		igc_del_etype_filter(adapter, rule->filter.etype);
+
+	if (rule->filter.match_flags & IGC_FILTER_FLAG_VLAN_TCI) {
+		int prio = (rule->filter.vlan_tci & VLAN_PRIO_MASK) >>
+			   VLAN_PRIO_SHIFT;
+
+		igc_del_vlan_prio_filter(adapter, prio);
+	}
+
+	if (rule->filter.match_flags & IGC_FILTER_FLAG_SRC_MAC_ADDR)
+		igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_SRC,
+				   rule->filter.src_addr);
+
+	if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR)
+		igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST,
+				   rule->filter.dst_addr);
+
+	return 0;
+}
+
+/**
+ * igc_get_nfc_rule() - Get NFC rule
+ * @adapter: Pointer to adapter
+ * @location: Rule location
+ *
+ * Context: Expects adapter->nfc_rule_lock to be held by caller.
+ *
+ * Return: Pointer to NFC rule at @location. If not found, NULL.
+ */
+struct igc_nfc_rule *igc_get_nfc_rule(struct igc_adapter *adapter,
+				      u32 location)
+{
+	struct igc_nfc_rule *rule;
+
+	list_for_each_entry(rule, &adapter->nfc_rule_list, list) {
+		if (rule->location == location)
+			return rule;
+		if (rule->location > location)
+			break;
+	}
+
+	return NULL;
+}
+
+/**
+ * igc_del_nfc_rule() - Delete NFC rule
+ * @adapter: Pointer to adapter
+ * @rule: Pointer to rule to be deleted
+ *
+ * Disable NFC rule in hardware and delete it from adapter.
+ *
+ * Context: Expects adapter->nfc_rule_lock to be held by caller.
+ */
+void igc_del_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule)
+{
+	igc_disable_nfc_rule(adapter, rule);
+
+	list_del(&rule->list);
+	adapter->nfc_rule_count--;
+
+	kfree(rule);
+}
+
+/**
+ * igc_add_nfc_rule() - Add NFC rule
+ * @adapter: Pointer to adapter
+ * @rule: Pointer to rule to be added
+ *
+ * Enable NFC rule in hardware and add it to adapter.
+ *
+ * Context: Expects adapter->nfc_rule_lock to be held by caller.
+ *
+ * Return: 0 on success, negative errno on failure.
+ */
+int igc_add_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule)
+{
+	struct igc_nfc_rule *pred, *cur;
+	int err;
+
+	err = igc_enable_nfc_rule(adapter, rule);
+	if (err)
+		return err;
+
+	pred = NULL;
+	list_for_each_entry(cur, &adapter->nfc_rule_list, list) {
+		if (cur->location >= rule->location)
+			break;
+		pred = cur;
+	}
+
+	list_add(&rule->list, pred ? &pred->list : &adapter->nfc_rule_list);
+	adapter->nfc_rule_count++;
+	return 0;
+}
+
+static void igc_restore_nfc_rules(struct igc_adapter *adapter)
+{
+	struct igc_nfc_rule *rule;
+
+	spin_lock(&adapter->nfc_rule_lock);
+
+	list_for_each_entry_reverse(rule, &adapter->nfc_rule_list, list)
+		igc_enable_nfc_rule(adapter, rule);
+
+	spin_unlock(&adapter->nfc_rule_lock);
+}
+
+static void igc_nfc_rule_exit(struct igc_adapter *adapter)
+{
+	struct igc_nfc_rule *rule;
+
+	spin_lock(&adapter->nfc_rule_lock);
+
+	list_for_each_entry(rule, &adapter->nfc_rule_list, list)
+		igc_disable_nfc_rule(adapter, rule);
+
+	spin_unlock(&adapter->nfc_rule_lock);
+}
+
 static int igc_uc_sync(struct net_device *netdev, const unsigned char *addr)
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
@@ -3648,18 +3799,6 @@ void igc_update_stats(struct igc_adapter *adapter)
 	adapter->stats.mgpdc += rd32(IGC_MGTPDC);
 }
 
-static void igc_nfc_rule_exit(struct igc_adapter *adapter)
-{
-	struct igc_nfc_rule *rule;
-
-	spin_lock(&adapter->nfc_rule_lock);
-
-	list_for_each_entry(rule, &adapter->nfc_rule_list, list)
-		igc_disable_nfc_rule(adapter, rule);
-
-	spin_unlock(&adapter->nfc_rule_lock);
-}
-
 /**
  * igc_down - Close the interface
  * @adapter: board private structure
-- 
cgit v1.2.3-59-g8ed1b


From e256ec83fabd5803a576c46e03289e519d087fda Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Fri, 24 Apr 2020 13:16:19 -0700
Subject: igc: Fix NFC rules leak when driver is unloaded

If we have RFC rules in adapter->nfc_rule_list when the IGC driver
is unloaded, all rules are leaked. This patch fixes the issue by
introducing the helper igc_flush_nfc_rules() and calling it in
igc_remove(). It also updates igc_set_features() so is reuses the
new helper instead of re-implementing it.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_main.c | 29 ++++++++++++++++-------------
 1 file changed, 16 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index ad9217335a64..0a6f880e3538 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -2544,6 +2544,18 @@ void igc_del_nfc_rule(struct igc_adapter *adapter, struct igc_nfc_rule *rule)
 	kfree(rule);
 }
 
+static void igc_flush_nfc_rules(struct igc_adapter *adapter)
+{
+	struct igc_nfc_rule *rule, *tmp;
+
+	spin_lock(&adapter->nfc_rule_lock);
+
+	list_for_each_entry_safe(rule, tmp, &adapter->nfc_rule_list, list)
+		igc_del_nfc_rule(adapter, rule);
+
+	spin_unlock(&adapter->nfc_rule_lock);
+}
+
 /**
  * igc_add_nfc_rule() - Add NFC rule
  * @adapter: Pointer to adapter
@@ -3967,19 +3979,8 @@ static int igc_set_features(struct net_device *netdev,
 	if (!(changed & (NETIF_F_RXALL | NETIF_F_NTUPLE)))
 		return 0;
 
-	if (!(features & NETIF_F_NTUPLE)) {
-		struct igc_nfc_rule *rule, *tmp;
-
-		spin_lock(&adapter->nfc_rule_lock);
-		list_for_each_entry_safe(rule, tmp,
-					 &adapter->nfc_rule_list, list) {
-			igc_disable_nfc_rule(adapter, rule);
-			list_del(&rule->list);
-			kfree(rule);
-		}
-		spin_unlock(&adapter->nfc_rule_lock);
-		adapter->nfc_rule_count = 0;
-	}
+	if (!(features & NETIF_F_NTUPLE))
+		igc_flush_nfc_rules(adapter);
 
 	netdev->features = features;
 
@@ -5246,6 +5247,8 @@ static void igc_remove(struct pci_dev *pdev)
 
 	pm_runtime_get_noresume(&pdev->dev);
 
+	igc_flush_nfc_rules(adapter);
+
 	igc_ptp_stop(adapter);
 
 	set_bit(__IGC_DOWN, &adapter->state);
-- 
cgit v1.2.3-59-g8ed1b


From 1894df0ccb6ac7ba8b2c799e7d74b5db1180c518 Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Fri, 24 Apr 2020 13:16:20 -0700
Subject: igc: Fix NFC rule validation

If we try to overwrite an existing rule with the same filter but
different action, we get EEXIST error as shown below.

$ ethtool -N eth0 flow-type ether dst <MACADDR> action 1 loc 10
$ ethtool -N eth0 flow-type ether dst <MACADDR> action 2 loc 10
rmgr: Cannot insert RX class rule: File exists

The second command is expected to overwrite the previous rule in location
10 and succeed.

This patch fixes igc_ethtool_check_nfc_rule() so it also checks the
rules location. In case they match, the rule under evaluation should not
be considered invalid.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_ethtool.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index 43dff09a8f86..d14c46dce053 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -1225,8 +1225,8 @@ static void igc_ethtool_init_nfc_rule(struct igc_nfc_rule *rule,
  * Rules with both destination and source MAC addresses are considered invalid
  * since the driver doesn't support them.
  *
- * Also, if there is already another rule with the same filter, @rule is
- * considered invalid.
+ * Also, if there is already another rule with the same filter in a different
+ * location, @rule is considered invalid.
  *
  * Context: Expects adapter->nfc_rule_lock to be held by caller.
  *
@@ -1252,7 +1252,8 @@ static int igc_ethtool_check_nfc_rule(struct igc_adapter *adapter,
 
 	list_for_each_entry(tmp, &adapter->nfc_rule_list, list) {
 		if (!memcmp(&rule->filter, &tmp->filter,
-			    sizeof(rule->filter))) {
+			    sizeof(rule->filter)) &&
+		    tmp->location != rule->location) {
 			netdev_dbg(dev, "Rule already exists\n");
 			return -EEXIST;
 		}
-- 
cgit v1.2.3-59-g8ed1b


From acda576f72b8a2eed44aa3840561daa0ce837744 Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Fri, 24 Apr 2020 13:16:21 -0700
Subject: igc: Change return type from igc_disable_nfc_rule()

None of igc_disable_nfc_rule() callers actually check its returning
value. A closer look at why this function would fail shows that the
only situation is when we try to delete an Ethertype or MAC filter that
doesn't exist.

That situation is very unlikely so we can change igc_del_etype_filter()
and igc_del_mac_filter() logic to "if the filter doesn't exist, we are
done", and keep the logic in igc_disable_nfc_rule() callers simple.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_main.c | 26 +++++++++-----------------
 1 file changed, 9 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 0a6f880e3538..9338209cedf2 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -2259,18 +2259,16 @@ update_filter:
  * @adapter: Pointer to adapter where the filter should be deleted from
  * @type: MAC address filter type (source or destination)
  * @addr: MAC address
- *
- * Return: 0 in case of success, negative errno code otherwise.
  */
-static int igc_del_mac_filter(struct igc_adapter *adapter,
-			      enum igc_mac_filter_type type, const u8 *addr)
+static void igc_del_mac_filter(struct igc_adapter *adapter,
+			       enum igc_mac_filter_type type, const u8 *addr)
 {
 	struct net_device *dev = adapter->netdev;
 	int index;
 
 	index = igc_find_mac_filter(adapter, type, addr);
 	if (index < 0)
-		return -ENOENT;
+		return;
 
 	if (index == 0) {
 		/* If this is the default filter, we don't actually delete it.
@@ -2288,8 +2286,6 @@ static int igc_del_mac_filter(struct igc_adapter *adapter,
 
 		igc_clear_mac_filter_hw(adapter, index);
 	}
-
-	return 0;
 }
 
 /**
@@ -2420,23 +2416,20 @@ static int igc_find_etype_filter(struct igc_adapter *adapter, u16 etype)
  * igc_del_etype_filter() - Delete ethertype filter
  * @adapter: Pointer to adapter where the filter should be deleted from
  * @etype: Ethertype value
- *
- * Return: 0 in case of success, negative errno code otherwise.
  */
-static int igc_del_etype_filter(struct igc_adapter *adapter, u16 etype)
+static void igc_del_etype_filter(struct igc_adapter *adapter, u16 etype)
 {
 	struct igc_hw *hw = &adapter->hw;
 	int index;
 
 	index = igc_find_etype_filter(adapter, etype);
 	if (index < 0)
-		return -ENOENT;
+		return;
 
 	wr32(IGC_ETQF(index), 0);
 
 	netdev_dbg(adapter->netdev, "Delete ethertype filter: etype %04x\n",
 		   etype);
-	return 0;
 }
 
 static int igc_enable_nfc_rule(struct igc_adapter *adapter,
@@ -2477,8 +2470,8 @@ static int igc_enable_nfc_rule(struct igc_adapter *adapter,
 	return 0;
 }
 
-static int igc_disable_nfc_rule(struct igc_adapter *adapter,
-				const struct igc_nfc_rule *rule)
+static void igc_disable_nfc_rule(struct igc_adapter *adapter,
+				 const struct igc_nfc_rule *rule)
 {
 	if (rule->filter.match_flags & IGC_FILTER_FLAG_ETHER_TYPE)
 		igc_del_etype_filter(adapter, rule->filter.etype);
@@ -2497,8 +2490,6 @@ static int igc_disable_nfc_rule(struct igc_adapter *adapter,
 	if (rule->filter.match_flags & IGC_FILTER_FLAG_DST_MAC_ADDR)
 		igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST,
 				   rule->filter.dst_addr);
-
-	return 0;
 }
 
 /**
@@ -2623,7 +2614,8 @@ static int igc_uc_unsync(struct net_device *netdev, const unsigned char *addr)
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
 
-	return igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, addr);
+	igc_del_mac_filter(adapter, IGC_MAC_FILTER_TYPE_DST, addr);
+	return 0;
 }
 
 /**
-- 
cgit v1.2.3-59-g8ed1b


From 42fc5dc042796a825d9e2db8ee4cd977b12f73d1 Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Fri, 24 Apr 2020 13:16:22 -0700
Subject: igc: Change adapter->nfc_rule_lock to mutex

This patch changes adapter->nfc_rule_lock type from spin_lock to mutex
so we avoid unnecessary busy waiting on lock contention.

A closer look at the execution context of NFC rule API users shows that
all of them run in process context. The API users are: ethtool ops,
igc_configure(), called when interface is brought up by user or reset
workequeue thread, igc_down(), called when interface is brought down,
and igc_remove(), called when driver is unloaded.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc.h         |  2 +-
 drivers/net/ethernet/intel/igc/igc_ethtool.c | 24 ++++++++++++------------
 drivers/net/ethernet/intel/igc/igc_main.c    | 14 +++++++-------
 3 files changed, 20 insertions(+), 20 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index a484b328268b..14f9edaaaf83 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -190,7 +190,7 @@ struct igc_adapter {
 	/* Any access to elements in nfc_rule_list is protected by the
 	 * nfc_rule_lock.
 	 */
-	spinlock_t nfc_rule_lock;
+	struct mutex nfc_rule_lock;
 	struct list_head nfc_rule_list;
 	unsigned int nfc_rule_count;
 
diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index d14c46dce053..946e775e34ae 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -939,7 +939,7 @@ static int igc_ethtool_get_nfc_rule(struct igc_adapter *adapter,
 
 	cmd->data = IGC_MAX_RXNFC_RULES;
 
-	spin_lock(&adapter->nfc_rule_lock);
+	mutex_lock(&adapter->nfc_rule_lock);
 
 	rule = igc_get_nfc_rule(adapter, fsp->location);
 	if (!rule)
@@ -971,11 +971,11 @@ static int igc_ethtool_get_nfc_rule(struct igc_adapter *adapter,
 		eth_broadcast_addr(fsp->m_u.ether_spec.h_source);
 	}
 
-	spin_unlock(&adapter->nfc_rule_lock);
+	mutex_unlock(&adapter->nfc_rule_lock);
 	return 0;
 
 out:
-	spin_unlock(&adapter->nfc_rule_lock);
+	mutex_unlock(&adapter->nfc_rule_lock);
 	return -EINVAL;
 }
 
@@ -988,18 +988,18 @@ static int igc_ethtool_get_nfc_rules(struct igc_adapter *adapter,
 
 	cmd->data = IGC_MAX_RXNFC_RULES;
 
-	spin_lock(&adapter->nfc_rule_lock);
+	mutex_lock(&adapter->nfc_rule_lock);
 
 	list_for_each_entry(rule, &adapter->nfc_rule_list, list) {
 		if (cnt == cmd->rule_cnt) {
-			spin_unlock(&adapter->nfc_rule_lock);
+			mutex_unlock(&adapter->nfc_rule_lock);
 			return -EMSGSIZE;
 		}
 		rule_locs[cnt] = rule->location;
 		cnt++;
 	}
 
-	spin_unlock(&adapter->nfc_rule_lock);
+	mutex_unlock(&adapter->nfc_rule_lock);
 
 	cmd->rule_cnt = cnt;
 
@@ -1303,7 +1303,7 @@ static int igc_ethtool_add_nfc_rule(struct igc_adapter *adapter,
 
 	igc_ethtool_init_nfc_rule(rule, fsp);
 
-	spin_lock(&adapter->nfc_rule_lock);
+	mutex_lock(&adapter->nfc_rule_lock);
 
 	err = igc_ethtool_check_nfc_rule(adapter, rule);
 	if (err)
@@ -1317,11 +1317,11 @@ static int igc_ethtool_add_nfc_rule(struct igc_adapter *adapter,
 	if (err)
 		goto err;
 
-	spin_unlock(&adapter->nfc_rule_lock);
+	mutex_unlock(&adapter->nfc_rule_lock);
 	return 0;
 
 err:
-	spin_unlock(&adapter->nfc_rule_lock);
+	mutex_unlock(&adapter->nfc_rule_lock);
 	kfree(rule);
 	return err;
 }
@@ -1333,17 +1333,17 @@ static int igc_ethtool_del_nfc_rule(struct igc_adapter *adapter,
 		(struct ethtool_rx_flow_spec *)&cmd->fs;
 	struct igc_nfc_rule *rule;
 
-	spin_lock(&adapter->nfc_rule_lock);
+	mutex_lock(&adapter->nfc_rule_lock);
 
 	rule = igc_get_nfc_rule(adapter, fsp->location);
 	if (!rule) {
-		spin_unlock(&adapter->nfc_rule_lock);
+		mutex_unlock(&adapter->nfc_rule_lock);
 		return -EINVAL;
 	}
 
 	igc_del_nfc_rule(adapter, rule);
 
-	spin_unlock(&adapter->nfc_rule_lock);
+	mutex_unlock(&adapter->nfc_rule_lock);
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 9338209cedf2..165263ae8add 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -2539,12 +2539,12 @@ static void igc_flush_nfc_rules(struct igc_adapter *adapter)
 {
 	struct igc_nfc_rule *rule, *tmp;
 
-	spin_lock(&adapter->nfc_rule_lock);
+	mutex_lock(&adapter->nfc_rule_lock);
 
 	list_for_each_entry_safe(rule, tmp, &adapter->nfc_rule_list, list)
 		igc_del_nfc_rule(adapter, rule);
 
-	spin_unlock(&adapter->nfc_rule_lock);
+	mutex_unlock(&adapter->nfc_rule_lock);
 }
 
 /**
@@ -2583,24 +2583,24 @@ static void igc_restore_nfc_rules(struct igc_adapter *adapter)
 {
 	struct igc_nfc_rule *rule;
 
-	spin_lock(&adapter->nfc_rule_lock);
+	mutex_lock(&adapter->nfc_rule_lock);
 
 	list_for_each_entry_reverse(rule, &adapter->nfc_rule_list, list)
 		igc_enable_nfc_rule(adapter, rule);
 
-	spin_unlock(&adapter->nfc_rule_lock);
+	mutex_unlock(&adapter->nfc_rule_lock);
 }
 
 static void igc_nfc_rule_exit(struct igc_adapter *adapter)
 {
 	struct igc_nfc_rule *rule;
 
-	spin_lock(&adapter->nfc_rule_lock);
+	mutex_lock(&adapter->nfc_rule_lock);
 
 	list_for_each_entry(rule, &adapter->nfc_rule_list, list)
 		igc_disable_nfc_rule(adapter, rule);
 
-	spin_unlock(&adapter->nfc_rule_lock);
+	mutex_unlock(&adapter->nfc_rule_lock);
 }
 
 static int igc_uc_sync(struct net_device *netdev, const unsigned char *addr)
@@ -3573,7 +3573,7 @@ static int igc_sw_init(struct igc_adapter *adapter)
 				VLAN_HLEN;
 	adapter->min_frame_size = ETH_ZLEN + ETH_FCS_LEN;
 
-	spin_lock_init(&adapter->nfc_rule_lock);
+	mutex_init(&adapter->nfc_rule_lock);
 	INIT_LIST_HEAD(&adapter->nfc_rule_list);
 	adapter->nfc_rule_count = 0;
 
-- 
cgit v1.2.3-59-g8ed1b


From 5c739e77ca338a37765370290e02cb270651380b Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Fri, 24 Apr 2020 13:16:23 -0700
Subject: igc: Remove igc_nfc_rule_exit()

During igc_down(), we call igc_nfc_rule_exit() which traverse the NFC
rule list disabling filters one by one. Later on in igc_down() flow
we issue an hardware reset which also clear all filters.  Since we
already reset the hardware, we don't actually need to disable each
filter manually. In order to simplify the code, this patch removes
igc_nfc_rule() altogether.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_main.c | 14 --------------
 1 file changed, 14 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 165263ae8add..97d26991c87e 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -2591,18 +2591,6 @@ static void igc_restore_nfc_rules(struct igc_adapter *adapter)
 	mutex_unlock(&adapter->nfc_rule_lock);
 }
 
-static void igc_nfc_rule_exit(struct igc_adapter *adapter)
-{
-	struct igc_nfc_rule *rule;
-
-	mutex_lock(&adapter->nfc_rule_lock);
-
-	list_for_each_entry(rule, &adapter->nfc_rule_list, list)
-		igc_disable_nfc_rule(adapter, rule);
-
-	mutex_unlock(&adapter->nfc_rule_lock);
-}
-
 static int igc_uc_sync(struct net_device *netdev, const unsigned char *addr)
 {
 	struct igc_adapter *adapter = netdev_priv(netdev);
@@ -3821,8 +3809,6 @@ void igc_down(struct igc_adapter *adapter)
 	wr32(IGC_RCTL, rctl & ~IGC_RCTL_EN);
 	/* flush and sleep below */
 
-	igc_nfc_rule_exit(adapter);
-
 	/* set trans_start so we don't get spurious watchdogs during reset */
 	netif_trans_update(netdev);
 
-- 
cgit v1.2.3-59-g8ed1b


From 14ec06b02e260b2e78785741d0e734f4b04db1fe Mon Sep 17 00:00:00 2001
From: Sasha Neftin <sasha.neftin@intel.com>
Date: Mon, 4 May 2020 09:29:25 +0300
Subject: igc: Remove unused descriptor's flags

Enable Tidv register, Report Packet Sent, Report Status and
Ethernet CRC flags not in use.
This patch comes to clean up these flags.

Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_defines.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
index 45b567587ca9..3d8d40d6fa3f 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -265,13 +265,9 @@
 #define IGC_TXD_POPTS_IXSM	0x01       /* Insert IP checksum */
 #define IGC_TXD_POPTS_TXSM	0x02       /* Insert TCP/UDP checksum */
 #define IGC_TXD_CMD_EOP		0x01000000 /* End of Packet */
-#define IGC_TXD_CMD_IFCS	0x02000000 /* Insert FCS (Ethernet CRC) */
 #define IGC_TXD_CMD_IC		0x04000000 /* Insert Checksum */
-#define IGC_TXD_CMD_RS		0x08000000 /* Report Status */
-#define IGC_TXD_CMD_RPS		0x10000000 /* Report Packet Sent */
 #define IGC_TXD_CMD_DEXT	0x20000000 /* Desc extension (0 = legacy) */
 #define IGC_TXD_CMD_VLE		0x40000000 /* Add VLAN tag */
-#define IGC_TXD_CMD_IDE		0x80000000 /* Enable Tidv register */
 #define IGC_TXD_STAT_DD		0x00000001 /* Descriptor Done */
 #define IGC_TXD_STAT_EC		0x00000002 /* Excess Collisions */
 #define IGC_TXD_STAT_LC		0x00000004 /* Late Collisions */
-- 
cgit v1.2.3-59-g8ed1b


From 165ae7a8feb53dc47fb041357e4b253bfc927cf9 Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng@canonical.com>
Date: Tue, 5 May 2020 12:01:54 +0800
Subject: igb: Report speed and duplex as unknown when device is runtime
 suspended

igb device gets runtime suspended when there's no link partner. We can't
get correct speed under that state:
$ cat /sys/class/net/enp3s0/speed
1000

In addition to that, an error can also be spotted in dmesg:
[  385.991957] igb 0000:03:00.0 enp3s0: PCIe link lost

Since device can only be runtime suspended when there's no link partner,
we can skip reading register and let the following logic set speed and
duplex with correct status.

The more generic approach will be wrap get_link_ksettings() with begin()
and complete() callbacks. However, for this particular issue, begin()
calls igb_runtime_resume() , which tries to rtnl_lock() while the lock
is already hold by upper ethtool layer.

So let's take this approach until the igb_runtime_resume() no longer
needs to hold rtnl_lock.

CC: stable <stable@vger.kernel.org>
Suggested-by: Alexander Duyck <alexander.duyck@gmail.com>
Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/igb_ethtool.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/igb/igb_ethtool.c b/drivers/net/ethernet/intel/igb/igb_ethtool.c
index 39d3b76a6f5d..2cd003c5ad43 100644
--- a/drivers/net/ethernet/intel/igb/igb_ethtool.c
+++ b/drivers/net/ethernet/intel/igb/igb_ethtool.c
@@ -143,7 +143,8 @@ static int igb_get_link_ksettings(struct net_device *netdev,
 	u32 speed;
 	u32 supported, advertising;
 
-	status = rd32(E1000_STATUS);
+	status = pm_runtime_suspended(&adapter->pdev->dev) ?
+		 0 : rd32(E1000_STATUS);
 	if (hw->phy.media_type == e1000_media_type_copper) {
 
 		supported = (SUPPORTED_10baseT_Half |
-- 
cgit v1.2.3-59-g8ed1b


From 0c80cdbf33207c7aea1aec9027ad8011bd6178ad Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng@canonical.com>
Date: Thu, 7 May 2020 14:25:45 +0800
Subject: e1000e: Warn if disabling ULP failed

The hardware may stop working if driver failed to disable ULP mode.

Take the return value of e1000_disable_ulp_lpt_lp() into account, and
pass up the error if it fails.

Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/e1000e/ich8lan.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/e1000e/ich8lan.c b/drivers/net/ethernet/intel/e1000e/ich8lan.c
index 735bf25952fc..f999cca37a8a 100644
--- a/drivers/net/ethernet/intel/e1000e/ich8lan.c
+++ b/drivers/net/ethernet/intel/e1000e/ich8lan.c
@@ -300,7 +300,11 @@ static s32 e1000_init_phy_workarounds_pchlan(struct e1000_hw *hw)
 	 * so forcibly disable it.
 	 */
 	hw->dev_spec.ich8lan.ulp_state = e1000_ulp_state_unknown;
-	e1000_disable_ulp_lpt_lp(hw, true);
+	ret_val = e1000_disable_ulp_lpt_lp(hw, true);
+	if (ret_val) {
+		e_warn("Failed to disable ULP\n");
+		goto out;
+	}
 
 	ret_val = hw->phy.ops.acquire(hw);
 	if (ret_val) {
-- 
cgit v1.2.3-59-g8ed1b


From f29801030ac67bf98b7a65d3aea67b30769d4f7c Mon Sep 17 00:00:00 2001
From: Kai-Heng Feng <kai.heng.feng@canonical.com>
Date: Thu, 7 May 2020 22:21:07 +0800
Subject: e1000e: Disable TSO for buffer overrun workaround

Commit b10effb92e27 ("e1000e: fix buffer overrun while the I219 is
processing DMA transactions") imposes roughly 30% performance penalty.

The commit log states that "Disabling TSO eliminates performance loss
for TCP traffic without a noticeable impact on CPU performance", so
let's disable TSO by default to regain the loss.

CC: stable <stable@vger.kernel.org>
Fixes: b10effb92e27 ("e1000e: fix buffer overrun while the I219 is processing DMA transactions")
BugLink: https://bugs.launchpad.net/bugs/1802691
Signed-off-by: Kai-Heng Feng <kai.heng.feng@canonical.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/e1000e/netdev.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index e0b074820b47..66609cf689de 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -5294,6 +5294,10 @@ static void e1000_watchdog_task(struct work_struct *work)
 					/* oops */
 					break;
 				}
+				if (hw->mac.type == e1000_pch_spt) {
+					netdev->features &= ~NETIF_F_TSO;
+					netdev->features &= ~NETIF_F_TSO6;
+				}
 			}
 
 			/* enable transmits in the hardware, need to do this
-- 
cgit v1.2.3-59-g8ed1b


From e086ba2fccda4c196b84a167493f67f089d0ebdc Mon Sep 17 00:00:00 2001
From: Vitaly Lifshits <vitaly.lifshits@intel.com>
Date: Thu, 7 May 2020 20:14:06 +0300
Subject: e1000e: disable s0ix entry and exit flows for ME systems

Since ME systems do not support SLP_S0 in S0ix state, and S0ix entry
and exit flows may cause errors on them it is best to avoid using
e1000e_s0ix_entry_flow and e1000e_s0ix_exit_flow functions.

This was done by creating a struct of all devices that comes with ME
and by checking if the current device has ME.

Signed-off-by: Vitaly Lifshits <vitaly.lifshits@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/e1000e/netdev.c | 45 ++++++++++++++++++++++++++++--
 1 file changed, 43 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 66609cf689de..32f23a15ff64 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -107,6 +107,45 @@ static const struct e1000_reg_info e1000_reg_info_tbl[] = {
 	{0, NULL}
 };
 
+struct e1000e_me_supported {
+	u16 device_id;		/* supported device ID */
+};
+
+static const struct e1000e_me_supported me_supported[] = {
+	{E1000_DEV_ID_PCH_LPT_I217_LM},
+	{E1000_DEV_ID_PCH_LPTLP_I218_LM},
+	{E1000_DEV_ID_PCH_I218_LM2},
+	{E1000_DEV_ID_PCH_I218_LM3},
+	{E1000_DEV_ID_PCH_SPT_I219_LM},
+	{E1000_DEV_ID_PCH_SPT_I219_LM2},
+	{E1000_DEV_ID_PCH_LBG_I219_LM3},
+	{E1000_DEV_ID_PCH_SPT_I219_LM4},
+	{E1000_DEV_ID_PCH_SPT_I219_LM5},
+	{E1000_DEV_ID_PCH_CNP_I219_LM6},
+	{E1000_DEV_ID_PCH_CNP_I219_LM7},
+	{E1000_DEV_ID_PCH_ICP_I219_LM8},
+	{E1000_DEV_ID_PCH_ICP_I219_LM9},
+	{E1000_DEV_ID_PCH_CMP_I219_LM10},
+	{E1000_DEV_ID_PCH_CMP_I219_LM11},
+	{E1000_DEV_ID_PCH_CMP_I219_LM12},
+	{E1000_DEV_ID_PCH_TGP_I219_LM13},
+	{E1000_DEV_ID_PCH_TGP_I219_LM14},
+	{E1000_DEV_ID_PCH_TGP_I219_LM15},
+	{0}
+};
+
+static bool e1000e_check_me(u16 device_id)
+{
+	struct e1000e_me_supported *id;
+
+	for (id = (struct e1000e_me_supported *)me_supported;
+	     id->device_id; id++)
+		if (device_id == id->device_id)
+			return true;
+
+	return false;
+}
+
 /**
  * __ew32_prepare - prepare to write to MAC CSR register on certain parts
  * @hw: pointer to the HW structure
@@ -6916,7 +6955,8 @@ static int e1000e_pm_suspend(struct device *dev)
 		e1000e_pm_thaw(dev);
 
 	/* Introduce S0ix implementation */
-	if (hw->mac.type >= e1000_pch_cnp)
+	if (hw->mac.type >= e1000_pch_cnp &&
+	    !e1000e_check_me(hw->adapter->pdev->device))
 		e1000e_s0ix_entry_flow(adapter);
 
 	return rc;
@@ -6931,7 +6971,8 @@ static int e1000e_pm_resume(struct device *dev)
 	int rc;
 
 	/* Introduce S0ix implementation */
-	if (hw->mac.type >= e1000_pch_cnp)
+	if (hw->mac.type >= e1000_pch_cnp &&
+	    !e1000e_check_me(hw->adapter->pdev->device))
 		e1000e_s0ix_exit_flow(adapter);
 
 	rc = __e1000_resume(pdev);
-- 
cgit v1.2.3-59-g8ed1b


From 65ece6de0114fc84fbc0487bf68cae91d535dd78 Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert@linux-m68k.org>
Date: Thu, 16 Apr 2020 11:50:49 +0200
Subject: virtchnl: Add missing explicit padding to structures
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

On e.g. m68k, the alignment of 32-bit values is only 2 bytes, leading
to the following:

    ./include/linux/avf/virtchnl.h:147:36: warning: division by zero [-Wdiv-by-zero]
      { virtchnl_static_assert_##X = (n)/((sizeof(struct X) == (n)) ? 1 : 0) }
					^
    ./include/linux/avf/virtchnl.h:577:1: note: in expansion of macro ‘VIRTCHNL_CHECK_STRUCT_LEN’
     VIRTCHNL_CHECK_STRUCT_LEN(272, virtchnl_filter);
     ^~~~~~~~~~~~~~~~~~~~~~~~~
    ./include/linux/avf/virtchnl.h:577:32: error: enumerator value for ‘virtchnl_static_assert_virtchnl_filter’ is not an integer constant
     VIRTCHNL_CHECK_STRUCT_LEN(272, virtchnl_filter);
				    ^~~~~~~~~~~~~~~
    ./include/linux/avf/virtchnl.h:147:53: note: in definition of macro ‘VIRTCHNL_CHECK_STRUCT_LEN’
      { virtchnl_static_assert_##X = (n)/((sizeof(struct X) == (n)) ? 1 : 0) }
							 ^
    ./include/linux/avf/virtchnl.h:147:36: warning: division by zero [-Wdiv-by-zero]
      { virtchnl_static_assert_##X = (n)/((sizeof(struct X) == (n)) ? 1 : 0) }
					^
    ./include/linux/avf/virtchnl.h:619:1: note: in expansion of macro ‘VIRTCHNL_CHECK_STRUCT_LEN’
     VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_pf_event);
     ^~~~~~~~~~~~~~~~~~~~~~~~~
    ./include/linux/avf/virtchnl.h:619:31: error: enumerator value for ‘virtchnl_static_assert_virtchnl_pf_event’ is not an integer constant
     VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_pf_event);
				   ^~~~~~~~~~~~~~~~~
    ./include/linux/avf/virtchnl.h:147:53: note: in definition of macro ‘VIRTCHNL_CHECK_STRUCT_LEN’
      { virtchnl_static_assert_##X = (n)/((sizeof(struct X) == (n)) ? 1 : 0) }
							 ^
    ./include/linux/avf/virtchnl.h:147:36: warning: division by zero [-Wdiv-by-zero]
      { virtchnl_static_assert_##X = (n)/((sizeof(struct X) == (n)) ? 1 : 0) }
					^
    ./include/linux/avf/virtchnl.h:640:1: note: in expansion of macro ‘VIRTCHNL_CHECK_STRUCT_LEN’
     VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_iwarp_qv_info);
     ^~~~~~~~~~~~~~~~~~~~~~~~~
    ./include/linux/avf/virtchnl.h:640:31: error: enumerator value for ‘virtchnl_static_assert_virtchnl_iwarp_qv_info’ is not an integer constant
     VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_iwarp_qv_info);
				   ^~~~~~~~~~~~~~~~~~~~~~
    ./include/linux/avf/virtchnl.h:147:53: note: in definition of macro ‘VIRTCHNL_CHECK_STRUCT_LEN’
      { virtchnl_static_assert_##X = (n)/((sizeof(struct X) == (n)) ? 1 : 0) }
							 ^
    ./include/linux/avf/virtchnl.h:147:36: warning: division by zero [-Wdiv-by-zero]
      { virtchnl_static_assert_##X = (n)/((sizeof(struct X) == (n)) ? 1 : 0) }
					^
    ./include/linux/avf/virtchnl.h:647:1: note: in expansion of macro ‘VIRTCHNL_CHECK_STRUCT_LEN’
     VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_iwarp_qvlist_info);
     ^~~~~~~~~~~~~~~~~~~~~~~~~
    ./include/linux/avf/virtchnl.h:647:31: error: enumerator value for ‘virtchnl_static_assert_virtchnl_iwarp_qvlist_info’ is not an integer constant
     VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_iwarp_qvlist_info);
				   ^~~~~~~~~~~~~~~~~~~~~~~~~~
    ./include/linux/avf/virtchnl.h:147:53: note: in definition of macro ‘VIRTCHNL_CHECK_STRUCT_LEN’
      { virtchnl_static_assert_##X = (n)/((sizeof(struct X) == (n)) ? 1 : 0) }
							 ^

Fix this by adding explicit padding to structures with holes.

Reported-by: <noreply@ellerman.id.au>
Signed-off-by: Geert Uytterhoeven <geert@linux-m68k.org>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 include/linux/avf/virtchnl.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h
index ca956b672ac0..40bad71865ea 100644
--- a/include/linux/avf/virtchnl.h
+++ b/include/linux/avf/virtchnl.h
@@ -476,6 +476,7 @@ struct virtchnl_rss_key {
 	u16 vsi_id;
 	u16 key_len;
 	u8 key[1];         /* RSS hash key, packed bytes */
+	u8 pad[1];
 };
 
 VIRTCHNL_CHECK_STRUCT_LEN(6, virtchnl_rss_key);
@@ -484,6 +485,7 @@ struct virtchnl_rss_lut {
 	u16 vsi_id;
 	u16 lut_entries;
 	u8 lut[1];        /* RSS lookup table */
+	u8 pad[1];
 };
 
 VIRTCHNL_CHECK_STRUCT_LEN(6, virtchnl_rss_lut);
@@ -572,6 +574,7 @@ struct virtchnl_filter {
 	enum	virtchnl_action action;
 	u32	action_meta;
 	u8	field_flags;
+	u8	pad[3];
 };
 
 VIRTCHNL_CHECK_STRUCT_LEN(272, virtchnl_filter);
@@ -610,6 +613,7 @@ struct virtchnl_pf_event {
 			/* link_speed provided in Mbps */
 			u32 link_speed;
 			u8 link_status;
+			u8 pad[3];
 		} link_event_adv;
 	} event_data;
 
@@ -635,6 +639,7 @@ struct virtchnl_iwarp_qv_info {
 	u16 ceq_idx;
 	u16 aeq_idx;
 	u8 itr_idx;
+	u8 pad[3];
 };
 
 VIRTCHNL_CHECK_STRUCT_LEN(12, virtchnl_iwarp_qv_info);
-- 
cgit v1.2.3-59-g8ed1b


From 148beb612031255156d68b342170140524afb36e Mon Sep 17 00:00:00 2001
From: Henry Tieman <henry.w.tieman@intel.com>
Date: Mon, 11 May 2020 18:01:40 -0700
Subject: ice: Initialize Flow Director resources

Flow Director allows for redirection based on ntuple rules. Rules are
programmed using the ethtool set-ntuple interface. Supported actions are
redirect to queue and drop.

Setup the initial framework to process Flow Director filters. Create and
allocate resources to manage and program filters to the hardware. Filters
are processed via a sideband interface; a control VSI is created to manage
communication and process requests through the sideband. Upon allocation of
resources, update the hardware tables to accept perfect filters.

Signed-off-by: Henry Tieman <henry.w.tieman@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/Makefile            |   2 +
 drivers/net/ethernet/intel/ice/ice.h               |  24 ++
 drivers/net/ethernet/intel/ice/ice_adminq_cmd.h    |   9 +
 drivers/net/ethernet/intel/ice/ice_base.c          |   1 +
 drivers/net/ethernet/intel/ice/ice_common.c        |  36 ++
 drivers/net/ethernet/intel/ice/ice_ethtool.c       |  11 +-
 drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c  | 399 +++++++++++++++++++++
 drivers/net/ethernet/intel/ice/ice_fdir.c          |  54 +++
 drivers/net/ethernet/intel/ice/ice_fdir.h          |  12 +
 drivers/net/ethernet/intel/ice/ice_flex_pipe.c     | 224 ++++++++++++
 drivers/net/ethernet/intel/ice/ice_flow.c          | 169 ++++++++-
 drivers/net/ethernet/intel/ice/ice_flow.h          |  32 +-
 drivers/net/ethernet/intel/ice/ice_hw_autogen.h    |  11 +
 drivers/net/ethernet/intel/ice/ice_lib.c           | 210 +++++++++--
 drivers/net/ethernet/intel/ice/ice_lib.h           |   2 +
 drivers/net/ethernet/intel/ice/ice_main.c          | 160 ++++++++-
 drivers/net/ethernet/intel/ice/ice_protocol_type.h |   1 +
 drivers/net/ethernet/intel/ice/ice_switch.c        |  75 ++++
 drivers/net/ethernet/intel/ice/ice_switch.h        |   7 +
 drivers/net/ethernet/intel/ice/ice_txrx.c          | 100 +++++-
 drivers/net/ethernet/intel/ice/ice_txrx.h          |   7 +-
 drivers/net/ethernet/intel/ice/ice_type.h          |  45 ++-
 22 files changed, 1553 insertions(+), 38 deletions(-)
 create mode 100644 drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
 create mode 100644 drivers/net/ethernet/intel/ice/ice_fdir.c
 create mode 100644 drivers/net/ethernet/intel/ice/ice_fdir.h

diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
index 2055e61eaf24..9ffa2e366766 100644
--- a/drivers/net/ethernet/intel/ice/Makefile
+++ b/drivers/net/ethernet/intel/ice/Makefile
@@ -18,6 +18,8 @@ ice-y := ice_main.o	\
 	 ice_txrx_lib.o	\
 	 ice_txrx.o	\
 	 ice_fltr.o	\
+	 ice_fdir.o	\
+	 ice_ethtool_fdir.o \
 	 ice_flex_pipe.o \
 	 ice_flow.o	\
 	 ice_devlink.o	\
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 58d0d6436c7f..ffd11bc2e5f0 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -50,6 +50,7 @@
 #include "ice_sched.h"
 #include "ice_virtchnl_pf.h"
 #include "ice_sriov.h"
+#include "ice_fdir.h"
 #include "ice_xsk.h"
 
 extern const char ice_drv_ver[];
@@ -66,6 +67,7 @@ extern const char ice_drv_ver[];
 #define ICE_AQ_LEN		64
 #define ICE_MBXSQ_LEN		64
 #define ICE_MIN_MSIX		2
+#define ICE_FDIR_MSIX		1
 #define ICE_NO_VSI		0xffff
 #define ICE_VSI_MAP_CONTIG	0
 #define ICE_VSI_MAP_SCATTER	1
@@ -257,6 +259,8 @@ struct ice_vsi {
 	s16 vf_id;			/* VF ID for SR-IOV VSIs */
 
 	u16 ethtype;			/* Ethernet protocol for pause frame */
+	u16 num_gfltr;
+	u16 num_bfltr;
 
 	/* RSS config */
 	u16 rss_table_size;	/* HW RSS table size */
@@ -339,6 +343,7 @@ enum ice_pf_flags {
 	ICE_FLAG_SRIOV_CAPABLE,
 	ICE_FLAG_DCB_CAPABLE,
 	ICE_FLAG_DCB_ENA,
+	ICE_FLAG_FD_ENA,
 	ICE_FLAG_ADV_FEATURES,
 	ICE_FLAG_LINK_DOWN_ON_CLOSE_ENA,
 	ICE_FLAG_NO_MEDIA,
@@ -367,6 +372,8 @@ struct ice_pf {
 	 */
 	u16 sriov_base_vector;
 
+	u16 ctrl_vsi_idx;		/* control VSI index in pf->vsi array */
+
 	struct ice_vsi **vsi;		/* VSIs created by the driver */
 	struct ice_sw *first_sw;	/* first switch created by firmware */
 	/* Virtchnl/SR-IOV config info */
@@ -505,8 +512,22 @@ static inline struct ice_vsi *ice_get_main_vsi(struct ice_pf *pf)
 	return NULL;
 }
 
+/**
+ * ice_get_ctrl_vsi - Get the control VSI
+ * @pf: PF instance
+ */
+static inline struct ice_vsi *ice_get_ctrl_vsi(struct ice_pf *pf)
+{
+	/* if pf->ctrl_vsi_idx is ICE_NO_VSI, control VSI was not set up */
+	if (!pf->vsi || pf->ctrl_vsi_idx == ICE_NO_VSI)
+		return NULL;
+
+	return pf->vsi[pf->ctrl_vsi_idx];
+}
+
 int ice_vsi_setup_tx_rings(struct ice_vsi *vsi);
 int ice_vsi_setup_rx_rings(struct ice_vsi *vsi);
+int ice_vsi_open_ctrl(struct ice_vsi *vsi);
 void ice_set_ethtool_ops(struct net_device *netdev);
 void ice_set_ethtool_safe_mode_ops(struct net_device *netdev);
 u16 ice_get_avail_txq_count(struct ice_pf *pf);
@@ -530,6 +551,9 @@ int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset);
 void ice_print_link_msg(struct ice_vsi *vsi, bool isup);
 const char *ice_stat_str(enum ice_status stat_err);
 const char *ice_aq_str(enum ice_aq_err aq_err);
+void ice_vsi_manage_fdir(struct ice_vsi *vsi, bool ena);
+void ice_fdir_release_flows(struct ice_hw *hw);
+int ice_fdir_create_dflt_rules(struct ice_pf *pf);
 int ice_open(struct net_device *netdev);
 int ice_stop(struct net_device *netdev);
 
diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index 979e9c6254af..deada2e3d7c0 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -107,6 +107,7 @@ struct ice_aqc_list_caps_elem {
 #define ICE_AQC_CAPS_RXQS				0x0041
 #define ICE_AQC_CAPS_TXQS				0x0042
 #define ICE_AQC_CAPS_MSIX				0x0043
+#define ICE_AQC_CAPS_FD					0x0045
 #define ICE_AQC_CAPS_MAX_MTU				0x0047
 
 	u8 major_ver;
@@ -232,6 +233,11 @@ struct ice_aqc_get_sw_cfg_resp {
  */
 #define ICE_AQC_RES_TYPE_VSI_LIST_REP			0x03
 #define ICE_AQC_RES_TYPE_VSI_LIST_PRUNE			0x04
+#define ICE_AQC_RES_TYPE_FDIR_COUNTER_BLOCK		0x21
+#define ICE_AQC_RES_TYPE_FDIR_GUARANTEED_ENTRIES	0x22
+#define ICE_AQC_RES_TYPE_FDIR_SHARED_ENTRIES		0x23
+#define ICE_AQC_RES_TYPE_FD_PROF_BLDR_PROFID		0x58
+#define ICE_AQC_RES_TYPE_FD_PROF_BLDR_TCAM		0x59
 #define ICE_AQC_RES_TYPE_HASH_PROF_BLDR_PROFID		0x60
 #define ICE_AQC_RES_TYPE_HASH_PROF_BLDR_TCAM		0x61
 
@@ -240,6 +246,9 @@ struct ice_aqc_get_sw_cfg_resp {
 
 #define ICE_AQC_RES_TYPE_FLAG_DEDICATED			0x00
 
+#define ICE_AQC_RES_TYPE_S	0
+#define ICE_AQC_RES_TYPE_M	(0x07F << ICE_AQC_RES_TYPE_S)
+
 /* Allocate Resources command (indirect 0x0208)
  * Free Resources command (indirect 0x0209)
  */
diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
index 4c835c144907..00c072f61a32 100644
--- a/drivers/net/ethernet/intel/ice/ice_base.c
+++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@ -246,6 +246,7 @@ ice_setup_tx_ctx(struct ice_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q)
 	 */
 	switch (vsi->type) {
 	case ICE_VSI_LB:
+	case ICE_VSI_CTRL:
 	case ICE_VSI_PF:
 		tlan_ctx->vmvf_type = ICE_TLAN_CTX_VMVF_TYPE_PF;
 		break;
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 1a613199d6cb..3a4c14150107 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -653,6 +653,10 @@ enum ice_status ice_init_hw(struct ice_hw *hw)
 	if (status)
 		goto err_unroll_cqinit;
 
+	/* Set bit to enable Flow Director filters */
+	wr32(hw, PFQF_FD_ENA, PFQF_FD_ENA_FD_ENA_M);
+	INIT_LIST_HEAD(&hw->fdir_list_head);
+
 	ice_clear_pxe_mode(hw);
 
 	status = ice_init_nvm(hw);
@@ -741,6 +745,10 @@ enum ice_status ice_init_hw(struct ice_hw *hw)
 	status = ice_aq_manage_mac_read(hw, mac_buf, mac_buf_len, NULL);
 	devm_kfree(ice_hw_to_dev(hw), mac_buf);
 
+	if (status)
+		goto err_unroll_fltr_mgmt_struct;
+	/* Obtain counter base index which would be used by flow director */
+	status = ice_alloc_fd_res_cntr(hw, &hw->fd_ctr_base);
 	if (status)
 		goto err_unroll_fltr_mgmt_struct;
 	status = ice_init_hw_tbls(hw);
@@ -770,6 +778,7 @@ err_unroll_cqinit:
  */
 void ice_deinit_hw(struct ice_hw *hw)
 {
+	ice_free_fd_res_cntr(hw, hw->fd_ctr_base);
 	ice_cleanup_fltr_mgmt_struct(hw);
 
 	ice_sched_cleanup_all(hw);
@@ -1680,6 +1689,33 @@ ice_parse_caps(struct ice_hw *hw, void *buf, u32 cap_count,
 				  "%s: msix_vector_first_id = %d\n", prefix,
 				  caps->msix_vector_first_id);
 			break;
+		case ICE_AQC_CAPS_FD:
+			if (dev_p) {
+				dev_p->num_flow_director_fltr = number;
+				ice_debug(hw, ICE_DBG_INIT,
+					  "%s: num_flow_director_fltr = %d\n",
+					  prefix,
+					  dev_p->num_flow_director_fltr);
+			}
+			if (func_p) {
+				u32 reg_val, val;
+
+				reg_val = rd32(hw, GLQF_FD_SIZE);
+				val = (reg_val & GLQF_FD_SIZE_FD_GSIZE_M) >>
+				      GLQF_FD_SIZE_FD_GSIZE_S;
+				func_p->fd_fltr_guar =
+				      ice_get_num_per_func(hw, val);
+				val = (reg_val & GLQF_FD_SIZE_FD_BSIZE_M) >>
+				      GLQF_FD_SIZE_FD_BSIZE_S;
+				func_p->fd_fltr_best_effort = val;
+				ice_debug(hw, ICE_DBG_INIT,
+					  "%s: fd_fltr_guar = %d\n",
+					  prefix, func_p->fd_fltr_guar);
+				ice_debug(hw, ICE_DBG_INIT,
+					  "%s: fd_fltr_best_effort = %d\n",
+					  prefix, func_p->fd_fltr_best_effort);
+			}
+			break;
 		case ICE_AQC_CAPS_MAX_MTU:
 			caps->max_mtu = number;
 			ice_debug(hw, ICE_DBG_INIT, "%s: max_mtu = %d\n",
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 9fb82c993df9..d11960b21474 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -3184,6 +3184,10 @@ ice_get_channels(struct net_device *dev, struct ethtool_channels *ch)
 	ch->combined_count = ice_get_combined_cnt(vsi);
 	ch->rx_count = vsi->num_rxq - ch->combined_count;
 	ch->tx_count = vsi->num_txq - ch->combined_count;
+
+	/* report other queues */
+	ch->other_count = test_bit(ICE_FLAG_FD_ENA, pf->flags) ? 1 : 0;
+	ch->max_other = ch->other_count;
 }
 
 /**
@@ -3256,9 +3260,14 @@ static int ice_set_channels(struct net_device *dev, struct ethtool_channels *ch)
 		return -EOPNOTSUPP;
 	}
 	/* do not support changing other_count */
-	if (ch->other_count)
+	if (ch->other_count != (test_bit(ICE_FLAG_FD_ENA, pf->flags) ? 1U : 0U))
 		return -EINVAL;
 
+	if (test_bit(ICE_FLAG_FD_ENA, pf->flags) && pf->hw.fdir_active_fltr) {
+		netdev_err(dev, "Cannot set channels when Flow Director filters are active\n");
+		return -EOPNOTSUPP;
+	}
+
 	curr_combined = ice_get_combined_cnt(vsi);
 
 	/* these checks are for cases where user didn't specify a particular
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
new file mode 100644
index 000000000000..425bf6f00db1
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
@@ -0,0 +1,399 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2020, Intel Corporation. */
+
+/* flow director ethtool support for ice */
+
+#include "ice.h"
+#include "ice_lib.h"
+#include "ice_flow.h"
+
+/* calls to ice_flow_add_prof require the number of segments in the array
+ * for segs_cnt. In this code that is one more than the index.
+ */
+#define TNL_SEG_CNT(_TNL_) ((_TNL_) + 1)
+
+/**
+ * ice_fdir_get_hw_prof - return the ice_fd_hw_proc associated with a flow
+ * @hw: hardware structure containing the filter list
+ * @blk: hardware block
+ * @flow: FDir flow type to release
+ */
+static struct ice_fd_hw_prof *
+ice_fdir_get_hw_prof(struct ice_hw *hw, enum ice_block blk, int flow)
+{
+	if (blk == ICE_BLK_FD && hw->fdir_prof)
+		return hw->fdir_prof[flow];
+
+	return NULL;
+}
+
+/**
+ * ice_fdir_erase_flow_from_hw - remove a flow from the HW profile tables
+ * @hw: hardware structure containing the filter list
+ * @blk: hardware block
+ * @flow: FDir flow type to release
+ */
+static void
+ice_fdir_erase_flow_from_hw(struct ice_hw *hw, enum ice_block blk, int flow)
+{
+	struct ice_fd_hw_prof *prof = ice_fdir_get_hw_prof(hw, blk, flow);
+	int tun;
+
+	if (!prof)
+		return;
+
+	for (tun = 0; tun < ICE_FD_HW_SEG_MAX; tun++) {
+		u64 prof_id;
+		int j;
+
+		prof_id = flow + tun * ICE_FLTR_PTYPE_MAX;
+		for (j = 0; j < prof->cnt; j++) {
+			u16 vsi_num;
+
+			if (!prof->entry_h[j][tun] || !prof->vsi_h[j])
+				continue;
+			vsi_num = ice_get_hw_vsi_num(hw, prof->vsi_h[j]);
+			ice_rem_prof_id_flow(hw, blk, vsi_num, prof_id);
+			ice_flow_rem_entry(hw, blk, prof->entry_h[j][tun]);
+			prof->entry_h[j][tun] = 0;
+		}
+		ice_flow_rem_prof(hw, blk, prof_id);
+	}
+}
+
+/**
+ * ice_fdir_rem_flow - release the ice_flow structures for a filter type
+ * @hw: hardware structure containing the filter list
+ * @blk: hardware block
+ * @flow_type: FDir flow type to release
+ */
+static void
+ice_fdir_rem_flow(struct ice_hw *hw, enum ice_block blk,
+		  enum ice_fltr_ptype flow_type)
+{
+	int flow = (int)flow_type & ~FLOW_EXT;
+	struct ice_fd_hw_prof *prof;
+	int tun, i;
+
+	prof = ice_fdir_get_hw_prof(hw, blk, flow);
+	if (!prof)
+		return;
+
+	ice_fdir_erase_flow_from_hw(hw, blk, flow);
+	for (i = 0; i < prof->cnt; i++)
+		prof->vsi_h[i] = 0;
+	for (tun = 0; tun < ICE_FD_HW_SEG_MAX; tun++) {
+		if (!prof->fdir_seg[tun])
+			continue;
+		devm_kfree(ice_hw_to_dev(hw), prof->fdir_seg[tun]);
+		prof->fdir_seg[tun] = NULL;
+	}
+	prof->cnt = 0;
+}
+
+/**
+ * ice_fdir_release_flows - release all flows in use for later replay
+ * @hw: pointer to HW instance
+ */
+void ice_fdir_release_flows(struct ice_hw *hw)
+{
+	int flow;
+
+	/* release Flow Director HW table entries */
+	for (flow = 0; flow < ICE_FLTR_PTYPE_MAX; flow++)
+		ice_fdir_erase_flow_from_hw(hw, ICE_BLK_FD, flow);
+}
+
+/**
+ * ice_fdir_alloc_flow_prof - allocate FDir flow profile structure(s)
+ * @hw: HW structure containing the FDir flow profile structure(s)
+ * @flow: flow type to allocate the flow profile for
+ *
+ * Allocate the fdir_prof and fdir_prof[flow] if not already created. Return 0
+ * on success and negative on error.
+ */
+static int
+ice_fdir_alloc_flow_prof(struct ice_hw *hw, enum ice_fltr_ptype flow)
+{
+	if (!hw)
+		return -EINVAL;
+
+	if (!hw->fdir_prof) {
+		hw->fdir_prof = devm_kcalloc(ice_hw_to_dev(hw),
+					     ICE_FLTR_PTYPE_MAX,
+					     sizeof(*hw->fdir_prof),
+					     GFP_KERNEL);
+		if (!hw->fdir_prof)
+			return -ENOMEM;
+	}
+
+	if (!hw->fdir_prof[flow]) {
+		hw->fdir_prof[flow] = devm_kzalloc(ice_hw_to_dev(hw),
+						   sizeof(**hw->fdir_prof),
+						   GFP_KERNEL);
+		if (!hw->fdir_prof[flow])
+			return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_fdir_set_hw_fltr_rule - Configure HW tables to generate a FDir rule
+ * @pf: pointer to the PF structure
+ * @seg: protocol header description pointer
+ * @flow: filter enum
+ * @tun: FDir segment to program
+ */
+static int
+ice_fdir_set_hw_fltr_rule(struct ice_pf *pf, struct ice_flow_seg_info *seg,
+			  enum ice_fltr_ptype flow, enum ice_fd_hw_seg tun)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_vsi *main_vsi, *ctrl_vsi;
+	struct ice_flow_seg_info *old_seg;
+	struct ice_flow_prof *prof = NULL;
+	struct ice_fd_hw_prof *hw_prof;
+	struct ice_hw *hw = &pf->hw;
+	enum ice_status status;
+	u64 entry1_h = 0;
+	u64 entry2_h = 0;
+	u64 prof_id;
+	int err;
+
+	main_vsi = ice_get_main_vsi(pf);
+	if (!main_vsi)
+		return -EINVAL;
+
+	ctrl_vsi = ice_get_ctrl_vsi(pf);
+	if (!ctrl_vsi)
+		return -EINVAL;
+
+	err = ice_fdir_alloc_flow_prof(hw, flow);
+	if (err)
+		return err;
+
+	hw_prof = hw->fdir_prof[flow];
+	old_seg = hw_prof->fdir_seg[tun];
+	if (old_seg) {
+		/* This flow_type already has a changed input set.
+		 * If it matches the requested input set then we are
+		 * done. Or, if it's different then it's an error.
+		 */
+		if (!memcmp(old_seg, seg, sizeof(*seg)))
+			return -EEXIST;
+
+		/* remove HW filter definition */
+		ice_fdir_rem_flow(hw, ICE_BLK_FD, flow);
+	}
+
+	/* Adding a profile, but there is only one header supported.
+	 * That is the final parameters are 1 header (segment), no
+	 * actions (NULL) and zero actions 0.
+	 */
+	prof_id = flow + tun * ICE_FLTR_PTYPE_MAX;
+	status = ice_flow_add_prof(hw, ICE_BLK_FD, ICE_FLOW_RX, prof_id, seg,
+				   TNL_SEG_CNT(tun), &prof);
+	if (status)
+		return ice_status_to_errno(status);
+	status = ice_flow_add_entry(hw, ICE_BLK_FD, prof_id, main_vsi->idx,
+				    main_vsi->idx, ICE_FLOW_PRIO_NORMAL,
+				    seg, &entry1_h);
+	if (status) {
+		err = ice_status_to_errno(status);
+		goto err_prof;
+	}
+	status = ice_flow_add_entry(hw, ICE_BLK_FD, prof_id, main_vsi->idx,
+				    ctrl_vsi->idx, ICE_FLOW_PRIO_NORMAL,
+				    seg, &entry2_h);
+	if (status) {
+		err = ice_status_to_errno(status);
+		goto err_entry;
+	}
+
+	hw_prof->fdir_seg[tun] = seg;
+	hw_prof->entry_h[0][tun] = entry1_h;
+	hw_prof->entry_h[1][tun] = entry2_h;
+	hw_prof->vsi_h[0] = main_vsi->idx;
+	hw_prof->vsi_h[1] = ctrl_vsi->idx;
+	if (!hw_prof->cnt)
+		hw_prof->cnt = 2;
+
+	return 0;
+
+err_entry:
+	ice_rem_prof_id_flow(hw, ICE_BLK_FD,
+			     ice_get_hw_vsi_num(hw, main_vsi->idx), prof_id);
+	ice_flow_rem_entry(hw, ICE_BLK_FD, entry1_h);
+err_prof:
+	ice_flow_rem_prof(hw, ICE_BLK_FD, prof_id);
+	dev_err(dev, "Failed to add filter.  Flow director filters on each port must have the same input set.\n");
+
+	return err;
+}
+
+/**
+ * ice_set_init_fdir_seg
+ * @seg: flow segment for programming
+ * @l4_proto: ICE_FLOW_SEG_HDR_TCP or ICE_FLOW_SEG_HDR_UDP
+ *
+ * Set the configuration for perfect filters to the provided flow segment for
+ * programming the HW filter. This is to be called only when initializing
+ * filters as this function it assumes no filters exist.
+ */
+static int
+ice_set_init_fdir_seg(struct ice_flow_seg_info *seg,
+		      enum ice_flow_seg_hdr l4_proto)
+{
+	enum ice_flow_field src_port, dst_port;
+
+	if (!seg)
+		return -EINVAL;
+
+	if (l4_proto == ICE_FLOW_SEG_HDR_TCP) {
+		src_port = ICE_FLOW_FIELD_IDX_TCP_SRC_PORT;
+		dst_port = ICE_FLOW_FIELD_IDX_TCP_DST_PORT;
+	} else if (l4_proto == ICE_FLOW_SEG_HDR_UDP) {
+		src_port = ICE_FLOW_FIELD_IDX_UDP_SRC_PORT;
+		dst_port = ICE_FLOW_FIELD_IDX_UDP_DST_PORT;
+	} else {
+		return -EINVAL;
+	}
+
+	ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_IPV4 | l4_proto);
+
+	/* IP source address */
+	ice_flow_set_fld(seg, ICE_FLOW_FIELD_IDX_IPV4_SA,
+			 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+			 ICE_FLOW_FLD_OFF_INVAL, false);
+
+	/* IP destination address */
+	ice_flow_set_fld(seg, ICE_FLOW_FIELD_IDX_IPV4_DA,
+			 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+			 ICE_FLOW_FLD_OFF_INVAL, false);
+
+	/* Layer 4 source port */
+	ice_flow_set_fld(seg, src_port, ICE_FLOW_FLD_OFF_INVAL,
+			 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL, false);
+
+	/* Layer 4 destination port */
+	ice_flow_set_fld(seg, dst_port, ICE_FLOW_FLD_OFF_INVAL,
+			 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL, false);
+
+	return 0;
+}
+
+/**
+ * ice_create_init_fdir_rule
+ * @pf: PF structure
+ * @flow: filter enum
+ *
+ * Return error value or 0 on success.
+ */
+static int
+ice_create_init_fdir_rule(struct ice_pf *pf, enum ice_fltr_ptype flow)
+{
+	struct ice_flow_seg_info *seg, *tun_seg;
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_hw *hw = &pf->hw;
+	int ret;
+
+	/* if there is already a filter rule for kind return -EINVAL */
+	if (hw->fdir_prof && hw->fdir_prof[flow] &&
+	    hw->fdir_prof[flow]->fdir_seg[0])
+		return -EINVAL;
+
+	seg = devm_kzalloc(dev, sizeof(*seg), GFP_KERNEL);
+	if (!seg)
+		return -ENOMEM;
+
+	tun_seg = devm_kzalloc(dev, sizeof(*seg) * ICE_FD_HW_SEG_MAX,
+			       GFP_KERNEL);
+	if (!tun_seg) {
+		devm_kfree(dev, seg);
+		return -ENOMEM;
+	}
+
+	if (flow == ICE_FLTR_PTYPE_NONF_IPV4_TCP)
+		ret = ice_set_init_fdir_seg(seg, ICE_FLOW_SEG_HDR_TCP);
+	else if (flow == ICE_FLTR_PTYPE_NONF_IPV4_UDP)
+		ret = ice_set_init_fdir_seg(seg, ICE_FLOW_SEG_HDR_UDP);
+	else
+		ret = -EINVAL;
+	if (ret)
+		goto err_exit;
+
+	/* add filter for outer headers */
+	ret = ice_fdir_set_hw_fltr_rule(pf, seg, flow, ICE_FD_HW_SEG_NON_TUN);
+	if (ret)
+		/* could not write filter, free memory */
+		goto err_exit;
+
+	/* make tunneled filter HW entries if possible */
+	memcpy(&tun_seg[1], seg, sizeof(*seg));
+	ret = ice_fdir_set_hw_fltr_rule(pf, tun_seg, flow, ICE_FD_HW_SEG_TUN);
+	if (ret)
+		/* could not write tunnel filter, but outer header filter
+		 * exists
+		 */
+		devm_kfree(dev, tun_seg);
+
+	set_bit(flow, hw->fdir_perfect_fltr);
+	return ret;
+err_exit:
+	devm_kfree(dev, tun_seg);
+	devm_kfree(dev, seg);
+
+	return -EOPNOTSUPP;
+}
+
+/**
+ * ice_fdir_create_dflt_rules - create default perfect filters
+ * @pf: PF data structure
+ *
+ * Returns 0 for success or error.
+ */
+int ice_fdir_create_dflt_rules(struct ice_pf *pf)
+{
+	int err;
+
+	/* Create perfect TCP and UDP rules in hardware. */
+	err = ice_create_init_fdir_rule(pf, ICE_FLTR_PTYPE_NONF_IPV4_TCP);
+	if (err)
+		return err;
+
+	err = ice_create_init_fdir_rule(pf, ICE_FLTR_PTYPE_NONF_IPV4_UDP);
+
+	return err;
+}
+
+/**
+ * ice_vsi_manage_fdir - turn on/off flow director
+ * @vsi: the VSI being changed
+ * @ena: boolean value indicating if this is an enable or disable request
+ */
+void ice_vsi_manage_fdir(struct ice_vsi *vsi, bool ena)
+{
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	enum ice_fltr_ptype flow;
+
+	if (ena) {
+		set_bit(ICE_FLAG_FD_ENA, pf->flags);
+		ice_fdir_create_dflt_rules(pf);
+		return;
+	}
+
+	mutex_lock(&hw->fdir_fltr_lock);
+	if (!test_and_clear_bit(ICE_FLAG_FD_ENA, pf->flags))
+		goto release_lock;
+
+	if (hw->fdir_prof)
+		for (flow = ICE_FLTR_PTYPE_NONF_NONE; flow < ICE_FLTR_PTYPE_MAX;
+		     flow++)
+			if (hw->fdir_prof[flow])
+				ice_fdir_rem_flow(hw, ICE_BLK_FD, flow);
+
+release_lock:
+	mutex_unlock(&hw->fdir_fltr_lock);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.c b/drivers/net/ethernet/intel/ice/ice_fdir.c
new file mode 100644
index 000000000000..878fa4df9453
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_fdir.c
@@ -0,0 +1,54 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2020, Intel Corporation. */
+
+#include "ice_common.h"
+
+/**
+ * ice_alloc_fd_res_cntr - obtain counter resource for FD type
+ * @hw: pointer to the hardware structure
+ * @cntr_id: returns counter index
+ */
+enum ice_status ice_alloc_fd_res_cntr(struct ice_hw *hw, u16 *cntr_id)
+{
+	return ice_alloc_res_cntr(hw, ICE_AQC_RES_TYPE_FDIR_COUNTER_BLOCK,
+				  ICE_AQC_RES_TYPE_FLAG_DEDICATED, 1, cntr_id);
+}
+
+/**
+ * ice_free_fd_res_cntr - Free counter resource for FD type
+ * @hw: pointer to the hardware structure
+ * @cntr_id: counter index to be freed
+ */
+enum ice_status ice_free_fd_res_cntr(struct ice_hw *hw, u16 cntr_id)
+{
+	return ice_free_res_cntr(hw, ICE_AQC_RES_TYPE_FDIR_COUNTER_BLOCK,
+				 ICE_AQC_RES_TYPE_FLAG_DEDICATED, 1, cntr_id);
+}
+
+/**
+ * ice_alloc_fd_guar_item - allocate resource for FD guaranteed entries
+ * @hw: pointer to the hardware structure
+ * @cntr_id: returns counter index
+ * @num_fltr: number of filter entries to be allocated
+ */
+enum ice_status
+ice_alloc_fd_guar_item(struct ice_hw *hw, u16 *cntr_id, u16 num_fltr)
+{
+	return ice_alloc_res_cntr(hw, ICE_AQC_RES_TYPE_FDIR_GUARANTEED_ENTRIES,
+				  ICE_AQC_RES_TYPE_FLAG_DEDICATED, num_fltr,
+				  cntr_id);
+}
+
+/**
+ * ice_alloc_fd_shrd_item - allocate resource for flow director shared entries
+ * @hw: pointer to the hardware structure
+ * @cntr_id: returns counter index
+ * @num_fltr: number of filter entries to be allocated
+ */
+enum ice_status
+ice_alloc_fd_shrd_item(struct ice_hw *hw, u16 *cntr_id, u16 num_fltr)
+{
+	return ice_alloc_res_cntr(hw, ICE_AQC_RES_TYPE_FDIR_SHARED_ENTRIES,
+				  ICE_AQC_RES_TYPE_FLAG_DEDICATED, num_fltr,
+				  cntr_id);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.h b/drivers/net/ethernet/intel/ice/ice_fdir.h
new file mode 100644
index 000000000000..feac47adde6e
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_fdir.h
@@ -0,0 +1,12 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2018-2020, Intel Corporation. */
+
+#ifndef _ICE_FDIR_H_
+#define _ICE_FDIR_H_
+enum ice_status ice_alloc_fd_res_cntr(struct ice_hw *hw, u16 *cntr_id);
+enum ice_status ice_free_fd_res_cntr(struct ice_hw *hw, u16 cntr_id);
+enum ice_status
+ice_alloc_fd_guar_item(struct ice_hw *hw, u16 *cntr_id, u16 num_fltr);
+enum ice_status
+ice_alloc_fd_shrd_item(struct ice_hw *hw, u16 *cntr_id, u16 num_fltr);
+#endif /* _ICE_FDIR_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
index 38c37f506257..fe2f04f706e7 100644
--- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
+++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
@@ -2353,6 +2353,9 @@ ice_find_prof_id(struct ice_hw *hw, enum ice_block blk,
 static bool ice_prof_id_rsrc_type(enum ice_block blk, u16 *rsrc_type)
 {
 	switch (blk) {
+	case ICE_BLK_FD:
+		*rsrc_type = ICE_AQC_RES_TYPE_FD_PROF_BLDR_PROFID;
+		break;
 	case ICE_BLK_RSS:
 		*rsrc_type = ICE_AQC_RES_TYPE_HASH_PROF_BLDR_PROFID;
 		break;
@@ -2370,6 +2373,9 @@ static bool ice_prof_id_rsrc_type(enum ice_block blk, u16 *rsrc_type)
 static bool ice_tcam_ent_rsrc_type(enum ice_block blk, u16 *rsrc_type)
 {
 	switch (blk) {
+	case ICE_BLK_FD:
+		*rsrc_type = ICE_AQC_RES_TYPE_FD_PROF_BLDR_TCAM;
+		break;
 	case ICE_BLK_RSS:
 		*rsrc_type = ICE_AQC_RES_TYPE_HASH_PROF_BLDR_TCAM;
 		break;
@@ -2813,6 +2819,12 @@ static void ice_free_flow_profs(struct ice_hw *hw, u8 blk_idx)
 
 	mutex_lock(&hw->fl_profs_locks[blk_idx]);
 	list_for_each_entry_safe(p, tmp, &hw->fl_profs[blk_idx], l_entry) {
+		struct ice_flow_entry *e, *t;
+
+		list_for_each_entry_safe(e, t, &p->entries, l_entry)
+			ice_flow_rem_entry(hw, (enum ice_block)blk_idx,
+					   ICE_FLOW_ENTRY_HNDL(e));
+
 		list_del(&p->l_entry);
 		devm_kfree(ice_hw_to_dev(hw), p);
 	}
@@ -3441,6 +3453,206 @@ error_tmp:
 	return status;
 }
 
+/**
+ * ice_update_fd_mask - set Flow Director Field Vector mask for a profile
+ * @hw: pointer to the HW struct
+ * @prof_id: profile ID
+ * @mask_sel: mask select
+ *
+ * This function enable any of the masks selected by the mask select parameter
+ * for the profile specified.
+ */
+static void ice_update_fd_mask(struct ice_hw *hw, u16 prof_id, u32 mask_sel)
+{
+	wr32(hw, GLQF_FDMASK_SEL(prof_id), mask_sel);
+
+	ice_debug(hw, ICE_DBG_INIT, "fd mask(%d): %x = %x\n", prof_id,
+		  GLQF_FDMASK_SEL(prof_id), mask_sel);
+}
+
+struct ice_fd_src_dst_pair {
+	u8 prot_id;
+	u8 count;
+	u16 off;
+};
+
+static const struct ice_fd_src_dst_pair ice_fd_pairs[] = {
+	/* These are defined in pairs */
+	{ ICE_PROT_IPV4_OF_OR_S, 2, 12 },
+	{ ICE_PROT_IPV4_OF_OR_S, 2, 16 },
+
+	{ ICE_PROT_IPV4_IL, 2, 12 },
+	{ ICE_PROT_IPV4_IL, 2, 16 },
+
+	{ ICE_PROT_TCP_IL, 1, 0 },
+	{ ICE_PROT_TCP_IL, 1, 2 },
+
+	{ ICE_PROT_UDP_OF, 1, 0 },
+	{ ICE_PROT_UDP_OF, 1, 2 },
+
+	{ ICE_PROT_UDP_IL_OR_S, 1, 0 },
+	{ ICE_PROT_UDP_IL_OR_S, 1, 2 },
+
+	{ ICE_PROT_SCTP_IL, 1, 0 },
+	{ ICE_PROT_SCTP_IL, 1, 2 }
+};
+
+#define ICE_FD_SRC_DST_PAIR_COUNT	ARRAY_SIZE(ice_fd_pairs)
+
+/**
+ * ice_update_fd_swap - set register appropriately for a FD FV extraction
+ * @hw: pointer to the HW struct
+ * @prof_id: profile ID
+ * @es: extraction sequence (length of array is determined by the block)
+ */
+static enum ice_status
+ice_update_fd_swap(struct ice_hw *hw, u16 prof_id, struct ice_fv_word *es)
+{
+	DECLARE_BITMAP(pair_list, ICE_FD_SRC_DST_PAIR_COUNT);
+	u8 pair_start[ICE_FD_SRC_DST_PAIR_COUNT] = { 0 };
+#define ICE_FD_FV_NOT_FOUND (-2)
+	s8 first_free = ICE_FD_FV_NOT_FOUND;
+	u8 used[ICE_MAX_FV_WORDS] = { 0 };
+	s8 orig_free, si;
+	u32 mask_sel = 0;
+	u8 i, j, k;
+
+	bitmap_zero(pair_list, ICE_FD_SRC_DST_PAIR_COUNT);
+
+	/* This code assumes that the Flow Director field vectors are assigned
+	 * from the end of the FV indexes working towards the zero index, that
+	 * only complete fields will be included and will be consecutive, and
+	 * that there are no gaps between valid indexes.
+	 */
+
+	/* Determine swap fields present */
+	for (i = 0; i < hw->blk[ICE_BLK_FD].es.fvw; i++) {
+		/* Find the first free entry, assuming right to left population.
+		 * This is where we can start adding additional pairs if needed.
+		 */
+		if (first_free == ICE_FD_FV_NOT_FOUND && es[i].prot_id !=
+		    ICE_PROT_INVALID)
+			first_free = i - 1;
+
+		for (j = 0; j < ICE_FD_SRC_DST_PAIR_COUNT; j++)
+			if (es[i].prot_id == ice_fd_pairs[j].prot_id &&
+			    es[i].off == ice_fd_pairs[j].off) {
+				set_bit(j, pair_list);
+				pair_start[j] = i;
+			}
+	}
+
+	orig_free = first_free;
+
+	/* determine missing swap fields that need to be added */
+	for (i = 0; i < ICE_FD_SRC_DST_PAIR_COUNT; i += 2) {
+		u8 bit1 = test_bit(i + 1, pair_list);
+		u8 bit0 = test_bit(i, pair_list);
+
+		if (bit0 ^ bit1) {
+			u8 index;
+
+			/* add the appropriate 'paired' entry */
+			if (!bit0)
+				index = i;
+			else
+				index = i + 1;
+
+			/* check for room */
+			if (first_free + 1 < (s8)ice_fd_pairs[index].count)
+				return ICE_ERR_MAX_LIMIT;
+
+			/* place in extraction sequence */
+			for (k = 0; k < ice_fd_pairs[index].count; k++) {
+				es[first_free - k].prot_id =
+					ice_fd_pairs[index].prot_id;
+				es[first_free - k].off =
+					ice_fd_pairs[index].off + (k * 2);
+
+				if (k > first_free)
+					return ICE_ERR_OUT_OF_RANGE;
+
+				/* keep track of non-relevant fields */
+				mask_sel |= BIT(first_free - k);
+			}
+
+			pair_start[index] = first_free;
+			first_free -= ice_fd_pairs[index].count;
+		}
+	}
+
+	/* fill in the swap array */
+	si = hw->blk[ICE_BLK_FD].es.fvw - 1;
+	while (si >= 0) {
+		u8 indexes_used = 1;
+
+		/* assume flat at this index */
+#define ICE_SWAP_VALID	0x80
+		used[si] = si | ICE_SWAP_VALID;
+
+		if (orig_free == ICE_FD_FV_NOT_FOUND || si <= orig_free) {
+			si -= indexes_used;
+			continue;
+		}
+
+		/* check for a swap location */
+		for (j = 0; j < ICE_FD_SRC_DST_PAIR_COUNT; j++)
+			if (es[si].prot_id == ice_fd_pairs[j].prot_id &&
+			    es[si].off == ice_fd_pairs[j].off) {
+				u8 idx;
+
+				/* determine the appropriate matching field */
+				idx = j + ((j % 2) ? -1 : 1);
+
+				indexes_used = ice_fd_pairs[idx].count;
+				for (k = 0; k < indexes_used; k++) {
+					used[si - k] = (pair_start[idx] - k) |
+						ICE_SWAP_VALID;
+				}
+
+				break;
+			}
+
+		si -= indexes_used;
+	}
+
+	/* for each set of 4 swap and 4 inset indexes, write the appropriate
+	 * register
+	 */
+	for (j = 0; j < hw->blk[ICE_BLK_FD].es.fvw / 4; j++) {
+		u32 raw_swap = 0;
+		u32 raw_in = 0;
+
+		for (k = 0; k < 4; k++) {
+			u8 idx;
+
+			idx = (j * 4) + k;
+			if (used[idx] && !(mask_sel & BIT(idx))) {
+				raw_swap |= used[idx] << (k * BITS_PER_BYTE);
+#define ICE_INSET_DFLT 0x9f
+				raw_in |= ICE_INSET_DFLT << (k * BITS_PER_BYTE);
+			}
+		}
+
+		/* write the appropriate swap register set */
+		wr32(hw, GLQF_FDSWAP(prof_id, j), raw_swap);
+
+		ice_debug(hw, ICE_DBG_INIT, "swap wr(%d, %d): %x = %08x\n",
+			  prof_id, j, GLQF_FDSWAP(prof_id, j), raw_swap);
+
+		/* write the appropriate inset register set */
+		wr32(hw, GLQF_FDINSET(prof_id, j), raw_in);
+
+		ice_debug(hw, ICE_DBG_INIT, "inset wr(%d, %d): %x = %08x\n",
+			  prof_id, j, GLQF_FDINSET(prof_id, j), raw_in);
+	}
+
+	/* initially clear the mask select for this profile */
+	ice_update_fd_mask(hw, prof_id, 0);
+
+	return 0;
+}
+
 /**
  * ice_add_prof - add profile
  * @hw: pointer to the HW struct
@@ -3476,6 +3688,18 @@ ice_add_prof(struct ice_hw *hw, enum ice_block blk, u64 id, u8 ptypes[],
 		status = ice_alloc_prof_id(hw, blk, &prof_id);
 		if (status)
 			goto err_ice_add_prof;
+		if (blk == ICE_BLK_FD) {
+			/* For Flow Director block, the extraction sequence may
+			 * need to be altered in the case where there are paired
+			 * fields that have no match. This is necessary because
+			 * for Flow Director, src and dest fields need to paired
+			 * for filter programming and these values are swapped
+			 * during Tx.
+			 */
+			status = ice_update_fd_swap(hw, prof_id, es);
+			if (status)
+				goto err_ice_add_prof;
+		}
 
 		/* and write new es */
 		ice_write_es(hw, blk, prof_id, es);
diff --git a/drivers/net/ethernet/intel/ice/ice_flow.c b/drivers/net/ethernet/intel/ice/ice_flow.c
index 07875db08c3f..f4b6c3933564 100644
--- a/drivers/net/ethernet/intel/ice/ice_flow.c
+++ b/drivers/net/ethernet/intel/ice/ice_flow.c
@@ -397,10 +397,8 @@ ice_flow_proc_segs(struct ice_hw *hw, struct ice_flow_prof_params *params)
 		return status;
 
 	switch (params->blk) {
+	case ICE_BLK_FD:
 	case ICE_BLK_RSS:
-		/* Only header information is provided for RSS configuration.
-		 * No further processing is needed.
-		 */
 		status = 0;
 		break;
 	default:
@@ -481,6 +479,43 @@ ice_flow_find_prof_id(struct ice_hw *hw, enum ice_block blk, u64 prof_id)
 	return NULL;
 }
 
+/**
+ * ice_dealloc_flow_entry - Deallocate flow entry memory
+ * @hw: pointer to the HW struct
+ * @entry: flow entry to be removed
+ */
+static void
+ice_dealloc_flow_entry(struct ice_hw *hw, struct ice_flow_entry *entry)
+{
+	if (!entry)
+		return;
+
+	if (entry->entry)
+		devm_kfree(ice_hw_to_dev(hw), entry->entry);
+
+	devm_kfree(ice_hw_to_dev(hw), entry);
+}
+
+/**
+ * ice_flow_rem_entry_sync - Remove a flow entry
+ * @hw: pointer to the HW struct
+ * @blk: classification stage
+ * @entry: flow entry to be removed
+ */
+static enum ice_status
+ice_flow_rem_entry_sync(struct ice_hw *hw, enum ice_block __always_unused blk,
+			struct ice_flow_entry *entry)
+{
+	if (!entry)
+		return ICE_ERR_BAD_PTR;
+
+	list_del(&entry->l_entry);
+
+	ice_dealloc_flow_entry(hw, entry);
+
+	return 0;
+}
+
 /**
  * ice_flow_add_prof_sync - Add a flow profile for packet segments and fields
  * @hw: pointer to the HW struct
@@ -568,6 +603,21 @@ ice_flow_rem_prof_sync(struct ice_hw *hw, enum ice_block blk,
 {
 	enum ice_status status;
 
+	/* Remove all remaining flow entries before removing the flow profile */
+	if (!list_empty(&prof->entries)) {
+		struct ice_flow_entry *e, *t;
+
+		mutex_lock(&prof->entries_lock);
+
+		list_for_each_entry_safe(e, t, &prof->entries, l_entry) {
+			status = ice_flow_rem_entry_sync(hw, blk, e);
+			if (status)
+				break;
+		}
+
+		mutex_unlock(&prof->entries_lock);
+	}
+
 	/* Remove all hardware profiles associated with this flow profile */
 	status = ice_rem_prof(hw, blk, prof->id);
 	if (!status) {
@@ -653,7 +703,7 @@ ice_flow_disassoc_prof(struct ice_hw *hw, enum ice_block blk,
  * @segs_cnt: number of packet segments provided
  * @prof: stores the returned flow profile added
  */
-static enum ice_status
+enum ice_status
 ice_flow_add_prof(struct ice_hw *hw, enum ice_block blk, enum ice_flow_dir dir,
 		  u64 prof_id, struct ice_flow_seg_info *segs, u8 segs_cnt,
 		  struct ice_flow_prof **prof)
@@ -691,7 +741,7 @@ ice_flow_add_prof(struct ice_hw *hw, enum ice_block blk, enum ice_flow_dir dir,
  * @blk: the block for which the flow profile is to be removed
  * @prof_id: unique ID of the flow profile to be removed
  */
-static enum ice_status
+enum ice_status
 ice_flow_rem_prof(struct ice_hw *hw, enum ice_block blk, u64 prof_id)
 {
 	struct ice_flow_prof *prof;
@@ -714,6 +764,113 @@ out:
 	return status;
 }
 
+/**
+ * ice_flow_add_entry - Add a flow entry
+ * @hw: pointer to the HW struct
+ * @blk: classification stage
+ * @prof_id: ID of the profile to add a new flow entry to
+ * @entry_id: unique ID to identify this flow entry
+ * @vsi_handle: software VSI handle for the flow entry
+ * @prio: priority of the flow entry
+ * @data: pointer to a data buffer containing flow entry's match values/masks
+ * @entry_h: pointer to buffer that receives the new flow entry's handle
+ */
+enum ice_status
+ice_flow_add_entry(struct ice_hw *hw, enum ice_block blk, u64 prof_id,
+		   u64 entry_id, u16 vsi_handle, enum ice_flow_priority prio,
+		   void *data, u64 *entry_h)
+{
+	struct ice_flow_entry *e = NULL;
+	struct ice_flow_prof *prof;
+	enum ice_status status;
+
+	/* No flow entry data is expected for RSS */
+	if (!entry_h || (!data && blk != ICE_BLK_RSS))
+		return ICE_ERR_BAD_PTR;
+
+	if (!ice_is_vsi_valid(hw, vsi_handle))
+		return ICE_ERR_PARAM;
+
+	mutex_lock(&hw->fl_profs_locks[blk]);
+
+	prof = ice_flow_find_prof_id(hw, blk, prof_id);
+	if (!prof) {
+		status = ICE_ERR_DOES_NOT_EXIST;
+	} else {
+		/* Allocate memory for the entry being added and associate
+		 * the VSI to the found flow profile
+		 */
+		e = devm_kzalloc(ice_hw_to_dev(hw), sizeof(*e), GFP_KERNEL);
+		if (!e)
+			status = ICE_ERR_NO_MEMORY;
+		else
+			status = ice_flow_assoc_prof(hw, blk, prof, vsi_handle);
+	}
+
+	mutex_unlock(&hw->fl_profs_locks[blk]);
+	if (status)
+		goto out;
+
+	e->id = entry_id;
+	e->vsi_handle = vsi_handle;
+	e->prof = prof;
+	e->priority = prio;
+
+	switch (blk) {
+	case ICE_BLK_FD:
+	case ICE_BLK_RSS:
+		break;
+	default:
+		status = ICE_ERR_NOT_IMPL;
+		goto out;
+	}
+
+	mutex_lock(&prof->entries_lock);
+	list_add(&e->l_entry, &prof->entries);
+	mutex_unlock(&prof->entries_lock);
+
+	*entry_h = ICE_FLOW_ENTRY_HNDL(e);
+
+out:
+	if (status && e) {
+		if (e->entry)
+			devm_kfree(ice_hw_to_dev(hw), e->entry);
+		devm_kfree(ice_hw_to_dev(hw), e);
+	}
+
+	return status;
+}
+
+/**
+ * ice_flow_rem_entry - Remove a flow entry
+ * @hw: pointer to the HW struct
+ * @blk: classification stage
+ * @entry_h: handle to the flow entry to be removed
+ */
+enum ice_status ice_flow_rem_entry(struct ice_hw *hw, enum ice_block blk,
+				   u64 entry_h)
+{
+	struct ice_flow_entry *entry;
+	struct ice_flow_prof *prof;
+	enum ice_status status = 0;
+
+	if (entry_h == ICE_FLOW_ENTRY_HANDLE_INVAL)
+		return ICE_ERR_PARAM;
+
+	entry = ICE_FLOW_ENTRY_PTR(entry_h);
+
+	/* Retain the pointer to the flow profile as the entry will be freed */
+	prof = entry->prof;
+
+	if (prof) {
+		mutex_lock(&prof->entries_lock);
+		status = ice_flow_rem_entry_sync(hw, blk, entry);
+		mutex_unlock(&prof->entries_lock);
+	}
+
+	return status;
+}
+
 /**
  * ice_flow_set_fld_ext - specifies locations of field from entry's input buffer
  * @seg: packet segment the field being set belongs to
@@ -776,7 +933,7 @@ ice_flow_set_fld_ext(struct ice_flow_seg_info *seg, enum ice_flow_field fld,
  * create the content of a match entry. This function should only be used for
  * fixed-size data structures.
  */
-static void
+void
 ice_flow_set_fld(struct ice_flow_seg_info *seg, enum ice_flow_field fld,
 		 u16 val_loc, u16 mask_loc, u16 last_loc, bool range)
 {
diff --git a/drivers/net/ethernet/intel/ice/ice_flow.h b/drivers/net/ethernet/intel/ice/ice_flow.h
index 00f2b7a9feed..3c784c3b5db2 100644
--- a/drivers/net/ethernet/intel/ice/ice_flow.h
+++ b/drivers/net/ethernet/intel/ice/ice_flow.h
@@ -172,6 +172,22 @@ struct ice_flow_seg_info {
 	struct ice_flow_fld_info fields[ICE_FLOW_FIELD_IDX_MAX];
 };
 
+/* This structure describes a flow entry, and is tracked only in this file */
+struct ice_flow_entry {
+	struct list_head l_entry;
+
+	u64 id;
+	struct ice_flow_prof *prof;
+	/* Flow entry's content */
+	void *entry;
+	enum ice_flow_priority priority;
+	u16 vsi_handle;
+	u16 entry_sz;
+};
+
+#define ICE_FLOW_ENTRY_HNDL(e)	((u64)e)
+#define ICE_FLOW_ENTRY_PTR(h)	((struct ice_flow_entry *)(h))
+
 struct ice_flow_prof {
 	struct list_head l_entry;
 
@@ -197,7 +213,21 @@ struct ice_rss_cfg {
 	u32 packet_hdr;
 };
 
-enum ice_status ice_flow_rem_entry(struct ice_hw *hw, u64 entry_h);
+enum ice_status
+ice_flow_add_prof(struct ice_hw *hw, enum ice_block blk, enum ice_flow_dir dir,
+		  u64 prof_id, struct ice_flow_seg_info *segs, u8 segs_cnt,
+		  struct ice_flow_prof **prof);
+enum ice_status
+ice_flow_rem_prof(struct ice_hw *hw, enum ice_block blk, u64 prof_id);
+enum ice_status
+ice_flow_add_entry(struct ice_hw *hw, enum ice_block blk, u64 prof_id,
+		   u64 entry_id, u16 vsi, enum ice_flow_priority prio,
+		   void *data, u64 *entry_h);
+enum ice_status
+ice_flow_rem_entry(struct ice_hw *hw, enum ice_block blk, u64 entry_h);
+void
+ice_flow_set_fld(struct ice_flow_seg_info *seg, enum ice_flow_field fld,
+		 u16 val_loc, u16 mask_loc, u16 last_loc, bool range);
 void ice_rem_vsi_rss_list(struct ice_hw *hw, u16 vsi_handle);
 enum ice_status ice_replay_rss_cfg(struct ice_hw *hw, u16 vsi_handle);
 enum ice_status
diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
index bc48eda67c81..3c61b2a04fc4 100644
--- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
@@ -290,6 +290,17 @@
 #define GL_PWR_MODE_CTL				0x000B820C
 #define GL_PWR_MODE_CTL_CAR_MAX_BW_S		30
 #define GL_PWR_MODE_CTL_CAR_MAX_BW_M		ICE_M(0x3, 30)
+#define GLQF_FD_SIZE				0x00460010
+#define GLQF_FD_SIZE_FD_GSIZE_S			0
+#define GLQF_FD_SIZE_FD_GSIZE_M			ICE_M(0x7FFF, 0)
+#define GLQF_FD_SIZE_FD_BSIZE_S			16
+#define GLQF_FD_SIZE_FD_BSIZE_M			ICE_M(0x7FFF, 16)
+#define GLQF_FDINSET(_i, _j)			(0x00412000 + ((_i) * 4 + (_j) * 512))
+#define GLQF_FDMASK_SEL(_i)			(0x00410400 + ((_i) * 4))
+#define GLQF_FDSWAP(_i, _j)			(0x00413000 + ((_i) * 4 + (_j) * 512))
+#define PFQF_FD_ENA				0x0043A000
+#define PFQF_FD_ENA_FD_ENA_M			BIT(0)
+#define PFQF_FD_SIZE				0x00460100
 #define GLDCB_RTCTQ_RXQNUM_S			0
 #define GLDCB_RTCTQ_RXQNUM_M			ICE_M(0x7FF, 0)
 #define GLPRT_BPRCL(_i)				(0x00381380 + ((_i) * 8))
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index c73c977f6967..43c949e0a760 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -19,6 +19,8 @@ const char *ice_vsi_type_str(enum ice_vsi_type vsi_type)
 		return "ICE_VSI_PF";
 	case ICE_VSI_VF:
 		return "ICE_VSI_VF";
+	case ICE_VSI_CTRL:
+		return "ICE_VSI_CTRL";
 	case ICE_VSI_LB:
 		return "ICE_VSI_LB";
 	default:
@@ -123,6 +125,7 @@ static void ice_vsi_set_num_desc(struct ice_vsi *vsi)
 {
 	switch (vsi->type) {
 	case ICE_VSI_PF:
+	case ICE_VSI_CTRL:
 	case ICE_VSI_LB:
 		vsi->num_rx_desc = ICE_DFLT_NUM_RX_DESC;
 		vsi->num_tx_desc = ICE_DFLT_NUM_TX_DESC;
@@ -187,6 +190,11 @@ static void ice_vsi_set_num_qs(struct ice_vsi *vsi, u16 vf_id)
 		 */
 		vsi->num_q_vectors = pf->num_msix_per_vf - ICE_NONQ_VECS_VF;
 		break;
+	case ICE_VSI_CTRL:
+		vsi->alloc_txq = 1;
+		vsi->alloc_rxq = 1;
+		vsi->num_q_vectors = 1;
+		break;
 	case ICE_VSI_LB:
 		vsi->alloc_txq = 1;
 		vsi->alloc_rxq = 1;
@@ -322,7 +330,7 @@ int ice_vsi_clear(struct ice_vsi *vsi)
 	/* updates the PF for this cleared VSI */
 
 	pf->vsi[vsi->idx] = NULL;
-	if (vsi->idx < pf->next_vsi)
+	if (vsi->idx < pf->next_vsi && vsi->type != ICE_VSI_CTRL)
 		pf->next_vsi = vsi->idx;
 
 	ice_vsi_free_arrays(vsi);
@@ -332,6 +340,25 @@ int ice_vsi_clear(struct ice_vsi *vsi)
 	return 0;
 }
 
+/**
+ * ice_msix_clean_ctrl_vsi - MSIX mode interrupt handler for ctrl VSI
+ * @irq: interrupt number
+ * @data: pointer to a q_vector
+ */
+static irqreturn_t ice_msix_clean_ctrl_vsi(int __always_unused irq, void *data)
+{
+	struct ice_q_vector *q_vector = (struct ice_q_vector *)data;
+
+	if (!q_vector->tx.ring)
+		return IRQ_HANDLED;
+
+#define FDIR_RX_DESC_CLEAN_BUDGET 64
+	ice_clean_rx_irq(q_vector->rx.ring, FDIR_RX_DESC_CLEAN_BUDGET);
+	ice_clean_ctrl_tx_irq(q_vector->tx.ring);
+
+	return IRQ_HANDLED;
+}
+
 /**
  * ice_msix_clean_rings - MSIX mode Interrupt Handler
  * @irq: interrupt number
@@ -383,8 +410,6 @@ ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type vsi_type, u16 vf_id)
 	vsi->back = pf;
 	set_bit(__ICE_DOWN, vsi->state);
 
-	vsi->idx = pf->next_vsi;
-
 	if (vsi_type == ICE_VSI_VF)
 		ice_vsi_set_num_qs(vsi, vf_id);
 	else
@@ -398,6 +423,13 @@ ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type vsi_type, u16 vf_id)
 		/* Setup default MSIX irq handler for VSI */
 		vsi->irq_handler = ice_msix_clean_rings;
 		break;
+	case ICE_VSI_CTRL:
+		if (ice_vsi_alloc_arrays(vsi))
+			goto err_rings;
+
+		/* Setup ctrl VSI MSIX irq handler */
+		vsi->irq_handler = ice_msix_clean_ctrl_vsi;
+		break;
 	case ICE_VSI_VF:
 		if (ice_vsi_alloc_arrays(vsi))
 			goto err_rings;
@@ -411,12 +443,20 @@ ice_vsi_alloc(struct ice_pf *pf, enum ice_vsi_type vsi_type, u16 vf_id)
 		goto unlock_pf;
 	}
 
-	/* fill VSI slot in the PF struct */
-	pf->vsi[pf->next_vsi] = vsi;
+	if (vsi->type == ICE_VSI_CTRL) {
+		/* Use the last VSI slot as the index for the control VSI */
+		vsi->idx = pf->num_alloc_vsi - 1;
+		pf->ctrl_vsi_idx = vsi->idx;
+		pf->vsi[vsi->idx] = vsi;
+	} else {
+		/* fill slot and make note of the index */
+		vsi->idx = pf->next_vsi;
+		pf->vsi[pf->next_vsi] = vsi;
 
-	/* prepare pf->next_vsi for next use */
-	pf->next_vsi = ice_get_free_slot(pf->vsi, pf->num_alloc_vsi,
-					 pf->next_vsi);
+		/* prepare pf->next_vsi for next use */
+		pf->next_vsi = ice_get_free_slot(pf->vsi, pf->num_alloc_vsi,
+						 pf->next_vsi);
+	}
 	goto unlock_pf;
 
 err_rings:
@@ -427,6 +467,48 @@ unlock_pf:
 	return vsi;
 }
 
+/**
+ * ice_alloc_fd_res - Allocate FD resource for a VSI
+ * @vsi: pointer to the ice_vsi
+ *
+ * This allocates the FD resources
+ *
+ * Returns 0 on success, -EPERM on no-op or -EIO on failure
+ */
+static int ice_alloc_fd_res(struct ice_vsi *vsi)
+{
+	struct ice_pf *pf = vsi->back;
+	u32 g_val, b_val;
+
+	/* Flow Director filters are only allocated/assigned to the PF VSI which
+	 * passes the traffic. The CTRL VSI is only used to add/delete filters
+	 * so we don't allocate resources to it
+	 */
+
+	/* FD filters from guaranteed pool per VSI */
+	g_val = pf->hw.func_caps.fd_fltr_guar;
+	if (!g_val)
+		return -EPERM;
+
+	/* FD filters from best effort pool */
+	b_val = pf->hw.func_caps.fd_fltr_best_effort;
+	if (!b_val)
+		return -EPERM;
+
+	if (vsi->type != ICE_VSI_PF)
+		return -EPERM;
+
+	if (!test_bit(ICE_FLAG_FD_ENA, pf->flags))
+		return -EPERM;
+
+	vsi->num_gfltr = g_val / pf->num_alloc_vsi;
+
+	/* each VSI gets same "best_effort" quota */
+	vsi->num_bfltr = b_val;
+
+	return 0;
+}
+
 /**
  * ice_vsi_get_qs - Assign queues from PF to VSI
  * @vsi: the VSI to assign queues to
@@ -583,8 +665,8 @@ static void ice_vsi_set_rss_params(struct ice_vsi *vsi)
 	case ICE_VSI_LB:
 		break;
 	default:
-		dev_warn(ice_pf_to_dev(pf), "Unknown VSI type %d\n",
-			 vsi->type);
+		dev_dbg(ice_pf_to_dev(pf), "Unsupported VSI type %s\n",
+			ice_vsi_type_str(vsi->type));
 		break;
 	}
 }
@@ -753,6 +835,51 @@ static void ice_vsi_setup_q_map(struct ice_vsi *vsi, struct ice_vsi_ctx *ctxt)
 	ctxt->info.q_mapping[1] = cpu_to_le16(vsi->num_rxq);
 }
 
+/**
+ * ice_set_fd_vsi_ctx - Set FD VSI context before adding a VSI
+ * @ctxt: the VSI context being set
+ * @vsi: the VSI being configured
+ */
+static void ice_set_fd_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi)
+{
+	u8 dflt_q_group, dflt_q_prio;
+	u16 dflt_q, report_q, val;
+
+	if (vsi->type != ICE_VSI_PF && vsi->type != ICE_VSI_CTRL)
+		return;
+
+	val = ICE_AQ_VSI_PROP_FLOW_DIR_VALID;
+	ctxt->info.valid_sections |= cpu_to_le16(val);
+	dflt_q = 0;
+	dflt_q_group = 0;
+	report_q = 0;
+	dflt_q_prio = 0;
+
+	/* enable flow director filtering/programming */
+	val = ICE_AQ_VSI_FD_ENABLE | ICE_AQ_VSI_FD_PROG_ENABLE;
+	ctxt->info.fd_options = cpu_to_le16(val);
+	/* max of allocated flow director filters */
+	ctxt->info.max_fd_fltr_dedicated =
+			cpu_to_le16(vsi->num_gfltr);
+	/* max of shared flow director filters any VSI may program */
+	ctxt->info.max_fd_fltr_shared =
+			cpu_to_le16(vsi->num_bfltr);
+	/* default queue index within the VSI of the default FD */
+	val = ((dflt_q << ICE_AQ_VSI_FD_DEF_Q_S) &
+	       ICE_AQ_VSI_FD_DEF_Q_M);
+	/* target queue or queue group to the FD filter */
+	val |= ((dflt_q_group << ICE_AQ_VSI_FD_DEF_GRP_S) &
+		ICE_AQ_VSI_FD_DEF_GRP_M);
+	ctxt->info.fd_def_q = cpu_to_le16(val);
+	/* queue index on which FD filter completion is reported */
+	val = ((report_q << ICE_AQ_VSI_FD_REPORT_Q_S) &
+	       ICE_AQ_VSI_FD_REPORT_Q_M);
+	/* priority of the default qindex action */
+	val |= ((dflt_q_prio << ICE_AQ_VSI_FD_DEF_PRIORITY_S) &
+		ICE_AQ_VSI_FD_DEF_PRIORITY_M);
+	ctxt->info.fd_report_opt = cpu_to_le16(val);
+}
+
 /**
  * ice_set_rss_vsi_ctx - Set RSS VSI context before adding a VSI
  * @ctxt: the VSI context being set
@@ -778,13 +905,10 @@ static void ice_set_rss_vsi_ctx(struct ice_vsi_ctx *ctxt, struct ice_vsi *vsi)
 		lut_type = ICE_AQ_VSI_Q_OPT_RSS_LUT_VSI;
 		hash_type = ICE_AQ_VSI_Q_OPT_RSS_TPLZ;
 		break;
-	case ICE_VSI_LB:
+	default:
 		dev_dbg(dev, "Unsupported VSI type %s\n",
 			ice_vsi_type_str(vsi->type));
 		return;
-	default:
-		dev_warn(dev, "Unknown VSI type %d\n", vsi->type);
-		return;
 	}
 
 	ctxt->info.q_opt_rss = ((lut_type << ICE_AQ_VSI_Q_OPT_RSS_LUT_S) &
@@ -816,6 +940,7 @@ static int ice_vsi_init(struct ice_vsi *vsi, bool init_vsi)
 
 	ctxt->info = vsi->info;
 	switch (vsi->type) {
+	case ICE_VSI_CTRL:
 	case ICE_VSI_LB:
 	case ICE_VSI_PF:
 		ctxt->flags = ICE_AQ_VSI_TYPE_PF;
@@ -831,12 +956,15 @@ static int ice_vsi_init(struct ice_vsi *vsi, bool init_vsi)
 	}
 
 	ice_set_dflt_vsi_ctx(ctxt);
+	if (test_bit(ICE_FLAG_FD_ENA, pf->flags))
+		ice_set_fd_vsi_ctx(ctxt, vsi);
 	/* if the switch is in VEB mode, allow VSI loopback */
 	if (vsi->vsw->bridge_mode == BRIDGE_MODE_VEB)
 		ctxt->info.sw_flags |= ICE_AQ_VSI_SW_FLAG_ALLOW_LB;
 
 	/* Set LUT type and HASH type if RSS is enabled */
-	if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
+	if (test_bit(ICE_FLAG_RSS_ENA, pf->flags) &&
+	    vsi->type != ICE_VSI_CTRL) {
 		ice_set_rss_vsi_ctx(ctxt, vsi);
 		/* if updating VSI context, make sure to set valid_section:
 		 * to indicate which section of VSI context being updated
@@ -1986,10 +2114,12 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
 	if (vsi->type == ICE_VSI_VF)
 		vsi->vf_id = vf_id;
 
+	ice_alloc_fd_res(vsi);
+
 	if (ice_vsi_get_qs(vsi)) {
 		dev_err(dev, "Failed to allocate queues. vsi->idx = %d\n",
 			vsi->idx);
-		goto unroll_get_qs;
+		goto unroll_vsi_alloc;
 	}
 
 	/* set RSS capabilities */
@@ -2004,6 +2134,7 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
 		goto unroll_get_qs;
 
 	switch (vsi->type) {
+	case ICE_VSI_CTRL:
 	case ICE_VSI_PF:
 		ret = ice_vsi_alloc_q_vectors(vsi);
 		if (ret)
@@ -2034,14 +2165,16 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
 
 		ice_vsi_map_rings_to_vectors(vsi);
 
-		/* Do not exit if configuring RSS had an issue, at least
-		 * receive traffic on first queue. Hence no need to capture
-		 * return value
-		 */
-		if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
-			ice_vsi_cfg_rss_lut_key(vsi);
-			ice_vsi_set_rss_flow_fld(vsi);
-		}
+		/* ICE_VSI_CTRL does not need RSS so skip RSS processing */
+		if (vsi->type != ICE_VSI_CTRL)
+			/* Do not exit if configuring RSS had an issue, at
+			 * least receive traffic on first queue. Hence no
+			 * need to capture return value
+			 */
+			if (test_bit(ICE_FLAG_RSS_ENA, pf->flags)) {
+				ice_vsi_cfg_rss_lut_key(vsi);
+				ice_vsi_set_rss_flow_fld(vsi);
+			}
 		break;
 	case ICE_VSI_VF:
 		/* VF driver will take care of creating netdev for this type and
@@ -2122,6 +2255,7 @@ unroll_vsi_init:
 	ice_vsi_delete(vsi);
 unroll_get_qs:
 	ice_vsi_put_qs(vsi);
+unroll_vsi_alloc:
 	ice_vsi_clear(vsi);
 
 	return NULL;
@@ -2274,6 +2408,8 @@ int ice_ena_vsi(struct ice_vsi *vsi, bool locked)
 			if (!locked)
 				rtnl_unlock();
 		}
+	} else if (vsi->type == ICE_VSI_CTRL) {
+		err = ice_vsi_open_ctrl(vsi);
 	}
 
 	return err;
@@ -2303,6 +2439,8 @@ void ice_dis_vsi(struct ice_vsi *vsi, bool locked)
 		} else {
 			ice_vsi_close(vsi);
 		}
+	} else if (vsi->type == ICE_VSI_CTRL) {
+		ice_vsi_close(vsi);
 	}
 }
 
@@ -2848,6 +2986,30 @@ void ice_update_rx_ring_stats(struct ice_ring *rx_ring, u64 pkts, u64 bytes)
 	u64_stats_update_end(&rx_ring->syncp);
 }
 
+/**
+ * ice_status_to_errno - convert from enum ice_status to Linux errno
+ * @err: ice_status value to convert
+ */
+int ice_status_to_errno(enum ice_status err)
+{
+	switch (err) {
+	case ICE_SUCCESS:
+		return 0;
+	case ICE_ERR_DOES_NOT_EXIST:
+		return -ENOENT;
+	case ICE_ERR_OUT_OF_RANGE:
+		return -ENOTTY;
+	case ICE_ERR_PARAM:
+		return -EINVAL;
+	case ICE_ERR_NO_MEMORY:
+		return -ENOMEM;
+	case ICE_ERR_MAX_LIMIT:
+		return -EAGAIN;
+	default:
+		return -EINVAL;
+	}
+}
+
 /**
  * ice_is_dflt_vsi_in_use - check if the default forwarding VSI is being used
  * @sw: switch to check if its default forwarding VSI is free
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h
index 9746de9b25fe..076e635e0c9f 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_lib.h
@@ -92,6 +92,8 @@ void ice_update_rx_ring_stats(struct ice_ring *ring, u64 pkts, u64 bytes);
 
 void ice_vsi_cfg_frame_size(struct ice_vsi *vsi);
 
+int ice_status_to_errno(enum ice_status err);
+
 u32 ice_intrl_usec_to_reg(u8 intrl, u8 gran);
 
 enum ice_status
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index c3e5c4334e26..2cc1f345c845 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -2319,6 +2319,7 @@ static void ice_set_netdev_features(struct net_device *netdev)
 
 	dflt_features = NETIF_F_SG	|
 			NETIF_F_HIGHDMA	|
+			NETIF_F_NTUPLE	|
 			NETIF_F_RXHASH;
 
 	csumo_features = NETIF_F_RXCSUM	  |
@@ -2458,6 +2459,20 @@ ice_pf_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)
 	return ice_vsi_setup(pf, pi, ICE_VSI_PF, ICE_INVAL_VFID);
 }
 
+/**
+ * ice_ctrl_vsi_setup - Set up a control VSI
+ * @pf: board private structure
+ * @pi: pointer to the port_info instance
+ *
+ * Returns pointer to the successfully allocated VSI software struct
+ * on success, otherwise returns NULL on failure.
+ */
+static struct ice_vsi *
+ice_ctrl_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi)
+{
+	return ice_vsi_setup(pf, pi, ICE_VSI_CTRL, ICE_INVAL_VFID);
+}
+
 /**
  * ice_lb_vsi_setup - Set up a loopback VSI
  * @pf: board private structure
@@ -2706,6 +2721,23 @@ static void ice_set_pf_caps(struct ice_pf *pf)
 	if (func_caps->common_cap.rss_table_size)
 		set_bit(ICE_FLAG_RSS_ENA, pf->flags);
 
+	clear_bit(ICE_FLAG_FD_ENA, pf->flags);
+	if (func_caps->fd_fltr_guar > 0 || func_caps->fd_fltr_best_effort > 0) {
+		u16 unused;
+
+		/* ctrl_vsi_idx will be set to a valid value when flow director
+		 * is setup by ice_init_fdir
+		 */
+		pf->ctrl_vsi_idx = ICE_NO_VSI;
+		set_bit(ICE_FLAG_FD_ENA, pf->flags);
+		/* force guaranteed filter pool for PF */
+		ice_alloc_fd_guar_item(&pf->hw, &unused,
+				       func_caps->fd_fltr_guar);
+		/* force shared filter pool for PF */
+		ice_alloc_fd_shrd_item(&pf->hw, &unused,
+				       func_caps->fd_fltr_best_effort);
+	}
+
 	pf->max_pf_txqs = func_caps->common_cap.num_txq;
 	pf->max_pf_rxqs = func_caps->common_cap.num_rxq;
 }
@@ -2772,6 +2804,15 @@ static int ice_ena_msix_range(struct ice_pf *pf)
 	v_budget += needed;
 	v_left -= needed;
 
+	/* reserve one vector for flow director */
+	if (test_bit(ICE_FLAG_FD_ENA, pf->flags)) {
+		needed = ICE_FDIR_MSIX;
+		if (v_left < needed)
+			goto no_hw_vecs_left_err;
+		v_budget += needed;
+		v_left -= needed;
+	}
+
 	pf->msix_entries = devm_kcalloc(dev, v_budget,
 					sizeof(*pf->msix_entries), GFP_KERNEL);
 
@@ -2796,8 +2837,10 @@ static int ice_ena_msix_range(struct ice_pf *pf)
 	if (v_actual < v_budget) {
 		dev_warn(dev, "not enough OS MSI-X vectors. requested = %d, obtained = %d\n",
 			 v_budget, v_actual);
-/* 2 vectors for LAN (traffic + OICR) */
+/* 2 vectors each for LAN and RDMA (traffic + OICR), one for flow director */
 #define ICE_MIN_LAN_VECS 2
+#define ICE_MIN_RDMA_VECS 2
+#define ICE_MIN_VECS (ICE_MIN_LAN_VECS + ICE_MIN_RDMA_VECS + 1)
 
 		if (v_actual < ICE_MIN_LAN_VECS) {
 			/* error if we can't get minimum vectors */
@@ -3102,6 +3145,53 @@ static enum ice_status ice_send_version(struct ice_pf *pf)
 	return ice_aq_send_driver_ver(&pf->hw, &dv, NULL);
 }
 
+/**
+ * ice_init_fdir - Initialize flow director VSI and configuration
+ * @pf: pointer to the PF instance
+ *
+ * returns 0 on success, negative on error
+ */
+static int ice_init_fdir(struct ice_pf *pf)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_vsi *ctrl_vsi;
+	int err;
+
+	/* Side Band Flow Director needs to have a control VSI.
+	 * Allocate it and store it in the PF.
+	 */
+	ctrl_vsi = ice_ctrl_vsi_setup(pf, pf->hw.port_info);
+	if (!ctrl_vsi) {
+		dev_dbg(dev, "could not create control VSI\n");
+		return -ENOMEM;
+	}
+
+	err = ice_vsi_open_ctrl(ctrl_vsi);
+	if (err) {
+		dev_dbg(dev, "could not open control VSI\n");
+		goto err_vsi_open;
+	}
+
+	mutex_init(&pf->hw.fdir_fltr_lock);
+
+	err = ice_fdir_create_dflt_rules(pf);
+	if (err)
+		goto err_fdir_rule;
+
+	return 0;
+
+err_fdir_rule:
+	ice_fdir_release_flows(&pf->hw);
+	ice_vsi_close(ctrl_vsi);
+err_vsi_open:
+	ice_vsi_release(ctrl_vsi);
+	if (pf->ctrl_vsi_idx != ICE_NO_VSI) {
+		pf->vsi[pf->ctrl_vsi_idx] = NULL;
+		pf->ctrl_vsi_idx = ICE_NO_VSI;
+	}
+	return err;
+}
+
 /**
  * ice_get_opt_fw_name - return optional firmware file name or NULL
  * @pf: pointer to the PF instance
@@ -3362,6 +3452,10 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
 
 	/* initialize DDP driven features */
 
+	/* Note: Flow director init failure is non-fatal to load */
+	if (ice_init_fdir(pf))
+		dev_err(dev, "could not initialize flow director\n");
+
 	/* Note: DCB init failure is non-fatal to load */
 	if (ice_init_pf_dcb(pf, false)) {
 		clear_bit(ICE_FLAG_DCB_CAPABLE, pf->flags);
@@ -3424,6 +3518,7 @@ static void ice_remove(struct pci_dev *pdev)
 	set_bit(__ICE_DOWN, pf->state);
 	ice_service_task_stop(pf);
 
+	mutex_destroy(&(&pf->hw)->fdir_fltr_lock);
 	ice_devlink_destroy_port(pf);
 	ice_vsi_release_all(pf);
 	ice_free_irq_msix_misc(pf);
@@ -3940,6 +4035,13 @@ ice_set_features(struct net_device *netdev, netdev_features_t features)
 		 (netdev->features & NETIF_F_HW_VLAN_CTAG_FILTER))
 		ret = ice_cfg_vlan_pruning(vsi, false, false);
 
+	if ((features & NETIF_F_NTUPLE) &&
+	    !(netdev->features & NETIF_F_NTUPLE))
+		ice_vsi_manage_fdir(vsi, true);
+	else if (!(features & NETIF_F_NTUPLE) &&
+		 (netdev->features & NETIF_F_NTUPLE))
+		ice_vsi_manage_fdir(vsi, false);
+
 	return ret;
 }
 
@@ -4494,6 +4596,62 @@ int ice_vsi_setup_rx_rings(struct ice_vsi *vsi)
 	return err;
 }
 
+/**
+ * ice_vsi_open_ctrl - open control VSI for use
+ * @vsi: the VSI to open
+ *
+ * Initialization of the Control VSI
+ *
+ * Returns 0 on success, negative value on error
+ */
+int ice_vsi_open_ctrl(struct ice_vsi *vsi)
+{
+	char int_name[ICE_INT_NAME_STR_LEN];
+	struct ice_pf *pf = vsi->back;
+	struct device *dev;
+	int err;
+
+	dev = ice_pf_to_dev(pf);
+	/* allocate descriptors */
+	err = ice_vsi_setup_tx_rings(vsi);
+	if (err)
+		goto err_setup_tx;
+
+	err = ice_vsi_setup_rx_rings(vsi);
+	if (err)
+		goto err_setup_rx;
+
+	err = ice_vsi_cfg(vsi);
+	if (err)
+		goto err_setup_rx;
+
+	snprintf(int_name, sizeof(int_name) - 1, "%s-%s:ctrl",
+		 dev_driver_string(dev), dev_name(dev));
+	err = ice_vsi_req_irq_msix(vsi, int_name);
+	if (err)
+		goto err_setup_rx;
+
+	ice_vsi_cfg_msix(vsi);
+
+	err = ice_vsi_start_all_rx_rings(vsi);
+	if (err)
+		goto err_up_complete;
+
+	clear_bit(__ICE_DOWN, vsi->state);
+	ice_vsi_ena_irq(vsi);
+
+	return 0;
+
+err_up_complete:
+	ice_down(vsi);
+err_setup_rx:
+	ice_vsi_free_rx_rings(vsi);
+err_setup_tx:
+	ice_vsi_free_tx_rings(vsi);
+
+	return err;
+}
+
 /**
  * ice_vsi_open - Called when a network interface is made active
  * @vsi: the VSI to open
diff --git a/drivers/net/ethernet/intel/ice/ice_protocol_type.h b/drivers/net/ethernet/intel/ice/ice_protocol_type.h
index 678db6bf7f57..babe4a485fd6 100644
--- a/drivers/net/ethernet/intel/ice/ice_protocol_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_protocol_type.h
@@ -17,6 +17,7 @@ enum ice_prot_id {
 	ICE_PROT_IPV6_OF_OR_S	= 40,
 	ICE_PROT_IPV6_IL	= 41,
 	ICE_PROT_TCP_IL		= 49,
+	ICE_PROT_UDP_OF		= 52,
 	ICE_PROT_UDP_IL_OR_S	= 53,
 	ICE_PROT_GRE_OF		= 64,
 	ICE_PROT_SCTP_IL	= 96,
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
index 7d88944de31a..0156b73df1b1 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.c
+++ b/drivers/net/ethernet/intel/ice/ice_switch.c
@@ -2677,6 +2677,81 @@ void ice_remove_vsi_fltr(struct ice_hw *hw, u16 vsi_handle)
 	ice_remove_vsi_lkup_fltr(hw, vsi_handle, ICE_SW_LKUP_PROMISC_VLAN);
 }
 
+/**
+ * ice_alloc_res_cntr - allocating resource counter
+ * @hw: pointer to the hardware structure
+ * @type: type of resource
+ * @alloc_shared: if set it is shared else dedicated
+ * @num_items: number of entries requested for FD resource type
+ * @counter_id: counter index returned by AQ call
+ */
+enum ice_status
+ice_alloc_res_cntr(struct ice_hw *hw, u8 type, u8 alloc_shared, u16 num_items,
+		   u16 *counter_id)
+{
+	struct ice_aqc_alloc_free_res_elem *buf;
+	enum ice_status status;
+	u16 buf_len;
+
+	/* Allocate resource */
+	buf_len = sizeof(*buf);
+	buf = kzalloc(buf_len, GFP_KERNEL);
+	if (!buf)
+		return ICE_ERR_NO_MEMORY;
+
+	buf->num_elems = cpu_to_le16(num_items);
+	buf->res_type = cpu_to_le16(((type << ICE_AQC_RES_TYPE_S) &
+				      ICE_AQC_RES_TYPE_M) | alloc_shared);
+
+	status = ice_aq_alloc_free_res(hw, 1, buf, buf_len,
+				       ice_aqc_opc_alloc_res, NULL);
+	if (status)
+		goto exit;
+
+	*counter_id = le16_to_cpu(buf->elem[0].e.sw_resp);
+
+exit:
+	kfree(buf);
+	return status;
+}
+
+/**
+ * ice_free_res_cntr - free resource counter
+ * @hw: pointer to the hardware structure
+ * @type: type of resource
+ * @alloc_shared: if set it is shared else dedicated
+ * @num_items: number of entries to be freed for FD resource type
+ * @counter_id: counter ID resource which needs to be freed
+ */
+enum ice_status
+ice_free_res_cntr(struct ice_hw *hw, u8 type, u8 alloc_shared, u16 num_items,
+		  u16 counter_id)
+{
+	struct ice_aqc_alloc_free_res_elem *buf;
+	enum ice_status status;
+	u16 buf_len;
+
+	/* Free resource */
+	buf_len = sizeof(*buf);
+	buf = kzalloc(buf_len, GFP_KERNEL);
+	if (!buf)
+		return ICE_ERR_NO_MEMORY;
+
+	buf->num_elems = cpu_to_le16(num_items);
+	buf->res_type = cpu_to_le16(((type << ICE_AQC_RES_TYPE_S) &
+				      ICE_AQC_RES_TYPE_M) | alloc_shared);
+	buf->elem[0].e.sw_resp = cpu_to_le16(counter_id);
+
+	status = ice_aq_alloc_free_res(hw, 1, buf, buf_len,
+				       ice_aqc_opc_free_res, NULL);
+	if (status)
+		ice_debug(hw, ICE_DBG_SW,
+			  "counter resource could not be freed\n");
+
+	kfree(buf);
+	return status;
+}
+
 /**
  * ice_replay_vsi_fltr - Replay filters for requested VSI
  * @hw: pointer to the hardware structure
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.h b/drivers/net/ethernet/intel/ice/ice_switch.h
index fa14b9545dab..8b4f9d35c860 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.h
+++ b/drivers/net/ethernet/intel/ice/ice_switch.h
@@ -208,6 +208,13 @@ void ice_clear_all_vsi_ctx(struct ice_hw *hw);
 /* Switch config */
 enum ice_status ice_get_initial_sw_cfg(struct ice_hw *hw);
 
+enum ice_status
+ice_alloc_res_cntr(struct ice_hw *hw, u8 type, u8 alloc_shared, u16 num_items,
+		   u16 *counter_id);
+enum ice_status
+ice_free_res_cntr(struct ice_hw *hw, u8 type, u8 alloc_shared, u16 num_items,
+		  u16 counter_id);
+
 /* Switch/bridge related commands */
 enum ice_status ice_update_sw_rule_bridge_mode(struct ice_hw *hw);
 enum ice_status ice_add_mac(struct ice_hw *hw, struct list_head *m_lst);
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 0d90e32efab9..173a167c96d9 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -15,6 +15,8 @@
 
 #define ICE_RX_HDR_SIZE		256
 
+#define FDIR_DESC_RXDID 0x40
+
 /**
  * ice_unmap_and_free_tx_buf - Release a Tx buffer
  * @ring: the ring that owns the buffer
@@ -24,7 +26,9 @@ static void
 ice_unmap_and_free_tx_buf(struct ice_ring *ring, struct ice_tx_buf *tx_buf)
 {
 	if (tx_buf->skb) {
-		if (ice_ring_is_xdp(ring))
+		if (tx_buf->tx_flags & ICE_TX_FLAGS_DUMMY_PKT)
+			devm_kfree(ring->dev, tx_buf->raw_buf);
+		else if (ice_ring_is_xdp(ring))
 			page_frag_free(tx_buf->raw_buf);
 		else
 			dev_kfree_skb_any(tx_buf->skb);
@@ -599,7 +603,8 @@ bool ice_alloc_rx_bufs(struct ice_ring *rx_ring, u16 cleaned_count)
 	struct ice_rx_buf *bi;
 
 	/* do nothing if no valid netdev defined */
-	if (!rx_ring->netdev || !cleaned_count)
+	if ((!rx_ring->netdev && rx_ring->vsi->type != ICE_VSI_CTRL) ||
+	    !cleaned_count)
 		return false;
 
 	/* get the Rx descriptor and buffer based on next_to_use */
@@ -997,7 +1002,7 @@ ice_is_non_eop(struct ice_ring *rx_ring, union ice_32b_rx_flex_desc *rx_desc,
  *
  * Returns amount of work completed
  */
-static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
+int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
 {
 	unsigned int total_rx_bytes = 0, total_rx_pkts = 0;
 	u16 cleaned_count = ICE_DESC_UNUSED(rx_ring);
@@ -1040,6 +1045,12 @@ static int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget)
 		 */
 		dma_rmb();
 
+		if (rx_desc->wb.rxdid == FDIR_DESC_RXDID || !rx_ring->netdev) {
+			ice_put_rx_buf(rx_ring, NULL);
+			cleaned_count++;
+			continue;
+		}
+
 		size = le16_to_cpu(rx_desc->wb.pkt_len) &
 			ICE_RX_FLX_DESC_PKT_LEN_M;
 
@@ -2378,3 +2389,86 @@ netdev_tx_t ice_start_xmit(struct sk_buff *skb, struct net_device *netdev)
 
 	return ice_xmit_frame_ring(skb, tx_ring);
 }
+
+/**
+ * ice_clean_ctrl_tx_irq - interrupt handler for flow director Tx queue
+ * @tx_ring: tx_ring to clean
+ */
+void ice_clean_ctrl_tx_irq(struct ice_ring *tx_ring)
+{
+	struct ice_vsi *vsi = tx_ring->vsi;
+	s16 i = tx_ring->next_to_clean;
+	int budget = ICE_DFLT_IRQ_WORK;
+	struct ice_tx_desc *tx_desc;
+	struct ice_tx_buf *tx_buf;
+
+	tx_buf = &tx_ring->tx_buf[i];
+	tx_desc = ICE_TX_DESC(tx_ring, i);
+	i -= tx_ring->count;
+
+	do {
+		struct ice_tx_desc *eop_desc = tx_buf->next_to_watch;
+
+		/* if next_to_watch is not set then there is no pending work */
+		if (!eop_desc)
+			break;
+
+		/* prevent any other reads prior to eop_desc */
+		smp_rmb();
+
+		/* if the descriptor isn't done, no work to do */
+		if (!(eop_desc->cmd_type_offset_bsz &
+		      cpu_to_le64(ICE_TX_DESC_DTYPE_DESC_DONE)))
+			break;
+
+		/* clear next_to_watch to prevent false hangs */
+		tx_buf->next_to_watch = NULL;
+		tx_desc->buf_addr = 0;
+		tx_desc->cmd_type_offset_bsz = 0;
+
+		/* move past filter desc */
+		tx_buf++;
+		tx_desc++;
+		i++;
+		if (unlikely(!i)) {
+			i -= tx_ring->count;
+			tx_buf = tx_ring->tx_buf;
+			tx_desc = ICE_TX_DESC(tx_ring, 0);
+		}
+
+		/* unmap the data header */
+		if (dma_unmap_len(tx_buf, len))
+			dma_unmap_single(tx_ring->dev,
+					 dma_unmap_addr(tx_buf, dma),
+					 dma_unmap_len(tx_buf, len),
+					 DMA_TO_DEVICE);
+		if (tx_buf->tx_flags & ICE_TX_FLAGS_DUMMY_PKT)
+			devm_kfree(tx_ring->dev, tx_buf->raw_buf);
+
+		/* clear next_to_watch to prevent false hangs */
+		tx_buf->raw_buf = NULL;
+		tx_buf->tx_flags = 0;
+		tx_buf->next_to_watch = NULL;
+		dma_unmap_len_set(tx_buf, len, 0);
+		tx_desc->buf_addr = 0;
+		tx_desc->cmd_type_offset_bsz = 0;
+
+		/* move past eop_desc for start of next FD desc */
+		tx_buf++;
+		tx_desc++;
+		i++;
+		if (unlikely(!i)) {
+			i -= tx_ring->count;
+			tx_buf = tx_ring->tx_buf;
+			tx_desc = ICE_TX_DESC(tx_ring, 0);
+		}
+
+		budget--;
+	} while (likely(budget));
+
+	i += tx_ring->count;
+	tx_ring->next_to_clean = i;
+
+	/* re-enable interrupt if needed */
+	ice_irq_dynamic_ena(&vsi->back->hw, vsi, vsi->q_vectors[0]);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index 025dd642cf28..2209583c993e 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -113,6 +113,10 @@ static inline int ice_skb_pad(void)
 #define ICE_TX_FLAGS_TSO	BIT(0)
 #define ICE_TX_FLAGS_HW_VLAN	BIT(1)
 #define ICE_TX_FLAGS_SW_VLAN	BIT(2)
+/* ICE_TX_FLAGS_DUMMY_PKT is used to mark dummy packets that should be
+ * freed instead of returned like skb packets.
+ */
+#define ICE_TX_FLAGS_DUMMY_PKT	BIT(3)
 #define ICE_TX_FLAGS_IPV4	BIT(5)
 #define ICE_TX_FLAGS_IPV6	BIT(6)
 #define ICE_TX_FLAGS_TUNNEL	BIT(7)
@@ -376,5 +380,6 @@ int ice_setup_rx_ring(struct ice_ring *rx_ring);
 void ice_free_tx_ring(struct ice_ring *tx_ring);
 void ice_free_rx_ring(struct ice_ring *rx_ring);
 int ice_napi_poll(struct napi_struct *napi, int budget);
-
+int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget);
+void ice_clean_ctrl_tx_irq(struct ice_ring *tx_ring);
 #endif /* _ICE_TXRX_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index 9f6578eb4672..74bdc00e869c 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -118,7 +118,8 @@ enum ice_media_type {
 
 enum ice_vsi_type {
 	ICE_VSI_PF = 0,
-	ICE_VSI_VF,
+	ICE_VSI_VF = 1,
+	ICE_VSI_CTRL = 3,	/* equates to ICE_VSI_PF with 1 queue pair */
 	ICE_VSI_LB = 6,
 };
 
@@ -161,6 +162,34 @@ struct ice_phy_info {
 	u8 get_link_info;
 };
 
+/* protocol enumeration for filters */
+enum ice_fltr_ptype {
+	/* NONE - used for undef/error */
+	ICE_FLTR_PTYPE_NONF_NONE = 0,
+	ICE_FLTR_PTYPE_NONF_IPV4_UDP,
+	ICE_FLTR_PTYPE_NONF_IPV4_TCP,
+	ICE_FLTR_PTYPE_NONF_IPV4_SCTP,
+	ICE_FLTR_PTYPE_NONF_IPV4_OTHER,
+	ICE_FLTR_PTYPE_FRAG_IPV4,
+	ICE_FLTR_PTYPE_MAX,
+};
+
+enum ice_fd_hw_seg {
+	ICE_FD_HW_SEG_NON_TUN = 0,
+	ICE_FD_HW_SEG_TUN,
+	ICE_FD_HW_SEG_MAX,
+};
+
+/* 2 VSI = 1 ICE_VSI_PF + 1 ICE_VSI_CTRL */
+#define ICE_MAX_FDIR_VSI_PER_FILTER	2
+
+struct ice_fd_hw_prof {
+	struct ice_flow_seg_info *fdir_seg[ICE_FD_HW_SEG_MAX];
+	int cnt;
+	u64 entry_h[ICE_MAX_FDIR_VSI_PER_FILTER][ICE_FD_HW_SEG_MAX];
+	u16 vsi_h[ICE_MAX_FDIR_VSI_PER_FILTER];
+};
+
 /* Common HW capabilities for SW use */
 struct ice_hw_common_caps {
 	u32 valid_functions;
@@ -197,6 +226,8 @@ struct ice_hw_func_caps {
 	u32 num_allocd_vfs;		/* Number of allocated VFs */
 	u32 vf_base_id;			/* Logical ID of the first VF */
 	u32 guar_num_vsi;
+	u32 fd_fltr_guar;		/* Number of filters guaranteed */
+	u32 fd_fltr_best_effort;	/* Number of best effort filters */
 };
 
 /* Device wide capabilities */
@@ -204,6 +235,7 @@ struct ice_hw_dev_caps {
 	struct ice_hw_common_caps common_cap;
 	u32 num_vfs_exposed;		/* Total number of VFs exposed */
 	u32 num_vsi_allocd_to_host;	/* Excluding EMP VSI */
+	u32 num_flow_director_fltr;	/* Number of FD filters available */
 	u32 num_funcs;
 };
 
@@ -489,6 +521,8 @@ struct ice_hw {
 	u64 debug_mask;		/* bitmap for debug mask */
 	enum ice_mac_type mac_type;
 
+	u16 fd_ctr_base;	/* FD counter base index */
+
 	/* pci info */
 	u16 device_id;
 	u16 vendor_id;
@@ -587,6 +621,15 @@ struct ice_hw {
 	struct ice_blk_info blk[ICE_BLK_COUNT];
 	struct mutex fl_profs_locks[ICE_BLK_COUNT];	/* lock fltr profiles */
 	struct list_head fl_profs[ICE_BLK_COUNT];
+
+	/* Flow Director filter info */
+	int fdir_active_fltr;
+
+	struct mutex fdir_fltr_lock;	/* protect Flow Director */
+	struct list_head fdir_list_head;
+
+	struct ice_fd_hw_prof **fdir_prof;
+	DECLARE_BITMAP(fdir_perfect_fltr, ICE_FLTR_PTYPE_MAX);
 	struct mutex rss_locks;	/* protect RSS configuration */
 	struct list_head rss_list_head;
 };
-- 
cgit v1.2.3-59-g8ed1b


From 4ab956462f67763be4049b03a414aa7c9b2d4c96 Mon Sep 17 00:00:00 2001
From: Henry Tieman <henry.w.tieman@intel.com>
Date: Mon, 11 May 2020 18:01:41 -0700
Subject: ice: Support displaying ntuple rules

Add functionality for ethtool --show-ntuple, allowing for filters to be
displayed when set functionality is added. Add statistics related to
Flow Director matches and status.

Signed-off-by: Henry Tieman <henry.w.tieman@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice.h              |   9 ++
 drivers/net/ethernet/intel/ice/ice_ethtool.c      |  17 +++
 drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c | 161 ++++++++++++++++++++++
 drivers/net/ethernet/intel/ice/ice_fdir.c         |  33 +++++
 drivers/net/ethernet/intel/ice/ice_fdir.h         |  47 +++++++
 drivers/net/ethernet/intel/ice/ice_hw_autogen.h   |   1 +
 drivers/net/ethernet/intel/ice/ice_main.c         |   9 ++
 drivers/net/ethernet/intel/ice/ice_type.h         |   3 +
 8 files changed, 280 insertions(+)

diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index ffd11bc2e5f0..e0c9e4a30d82 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -525,6 +525,11 @@ static inline struct ice_vsi *ice_get_ctrl_vsi(struct ice_pf *pf)
 	return pf->vsi[pf->ctrl_vsi_idx];
 }
 
+#define ICE_FD_STAT_CTR_BLOCK_COUNT	256
+#define ICE_FD_STAT_PF_IDX(base_idx) \
+			((base_idx) * ICE_FD_STAT_CTR_BLOCK_COUNT)
+#define ICE_FD_SB_STAT_IDX(base_idx) ICE_FD_STAT_PF_IDX(base_idx)
+
 int ice_vsi_setup_tx_rings(struct ice_vsi *vsi);
 int ice_vsi_setup_rx_rings(struct ice_vsi *vsi);
 int ice_vsi_open_ctrl(struct ice_vsi *vsi);
@@ -552,6 +557,10 @@ void ice_print_link_msg(struct ice_vsi *vsi, bool isup);
 const char *ice_stat_str(enum ice_status stat_err);
 const char *ice_aq_str(enum ice_aq_err aq_err);
 void ice_vsi_manage_fdir(struct ice_vsi *vsi, bool ena);
+int ice_get_ethtool_fdir_entry(struct ice_hw *hw, struct ethtool_rxnfc *cmd);
+int
+ice_get_fdir_fltr_ids(struct ice_hw *hw, struct ethtool_rxnfc *cmd,
+		      u32 *rule_locs);
 void ice_fdir_release_flows(struct ice_hw *hw);
 int ice_fdir_create_dflt_rules(struct ice_pf *pf);
 int ice_open(struct net_device *netdev);
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index d11960b21474..f77db28e1e4c 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -130,6 +130,8 @@ static const struct ice_stats ice_gstrings_pf_stats[] = {
 	ICE_PF_STAT("illegal_bytes.nic", stats.illegal_bytes),
 	ICE_PF_STAT("mac_local_faults.nic", stats.mac_local_faults),
 	ICE_PF_STAT("mac_remote_faults.nic", stats.mac_remote_faults),
+	ICE_PF_STAT("fdir_sb_match.nic", stats.fd_sb_match),
+	ICE_PF_STAT("fdir_sb_status.nic", stats.fd_sb_status),
 };
 
 static const u32 ice_regs_dump_list[] = {
@@ -2558,12 +2560,27 @@ ice_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd,
 	struct ice_netdev_priv *np = netdev_priv(netdev);
 	struct ice_vsi *vsi = np->vsi;
 	int ret = -EOPNOTSUPP;
+	struct ice_hw *hw;
+
+	hw = &vsi->back->hw;
 
 	switch (cmd->cmd) {
 	case ETHTOOL_GRXRINGS:
 		cmd->data = vsi->rss_size;
 		ret = 0;
 		break;
+	case ETHTOOL_GRXCLSRLCNT:
+		cmd->rule_cnt = hw->fdir_active_fltr;
+		/* report total rule count */
+		cmd->data = ice_get_fdir_cnt_all(hw);
+		ret = 0;
+		break;
+	case ETHTOOL_GRXCLSRULE:
+		ret = ice_get_ethtool_fdir_entry(hw, cmd);
+		break;
+	case ETHTOOL_GRXCLSRLALL:
+		ret = ice_get_fdir_fltr_ids(hw, cmd, (u32 *)rule_locs);
+		break;
 	case ETHTOOL_GRXFH:
 		ice_get_rss_hash_opt(vsi, cmd);
 		ret = 0;
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
index 425bf6f00db1..9276ebf96d28 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
@@ -12,6 +12,167 @@
  */
 #define TNL_SEG_CNT(_TNL_) ((_TNL_) + 1)
 
+/**
+ * ice_fltr_to_ethtool_flow - convert filter type values to ethtool
+ * flow type values
+ * @flow: filter type to be converted
+ *
+ * Returns the corresponding ethtool flow type.
+ */
+static int ice_fltr_to_ethtool_flow(enum ice_fltr_ptype flow)
+{
+	switch (flow) {
+	case ICE_FLTR_PTYPE_NONF_IPV4_TCP:
+		return TCP_V4_FLOW;
+	case ICE_FLTR_PTYPE_NONF_IPV4_UDP:
+		return UDP_V4_FLOW;
+	case ICE_FLTR_PTYPE_NONF_IPV4_SCTP:
+		return SCTP_V4_FLOW;
+	case ICE_FLTR_PTYPE_NONF_IPV4_OTHER:
+		return IPV4_USER_FLOW;
+	default:
+		/* 0 is undefined ethtool flow */
+		return 0;
+	}
+}
+
+/**
+ * ice_ethtool_flow_to_fltr - convert ethtool flow type to filter enum
+ * @eth: Ethtool flow type to be converted
+ *
+ * Returns flow enum
+ */
+static enum ice_fltr_ptype ice_ethtool_flow_to_fltr(int eth)
+{
+	switch (eth) {
+	case TCP_V4_FLOW:
+		return ICE_FLTR_PTYPE_NONF_IPV4_TCP;
+	case UDP_V4_FLOW:
+		return ICE_FLTR_PTYPE_NONF_IPV4_UDP;
+	case SCTP_V4_FLOW:
+		return ICE_FLTR_PTYPE_NONF_IPV4_SCTP;
+	case IPV4_USER_FLOW:
+		return ICE_FLTR_PTYPE_NONF_IPV4_OTHER;
+	default:
+		return ICE_FLTR_PTYPE_NONF_NONE;
+	}
+}
+
+/**
+ * ice_get_ethtool_fdir_entry - fill ethtool structure with fdir filter data
+ * @hw: hardware structure that contains filter list
+ * @cmd: ethtool command data structure to receive the filter data
+ *
+ * Returns 0 on success and -EINVAL on failure
+ */
+int ice_get_ethtool_fdir_entry(struct ice_hw *hw, struct ethtool_rxnfc *cmd)
+{
+	struct ethtool_rx_flow_spec *fsp;
+	struct ice_fdir_fltr *rule;
+	int ret = 0;
+	u16 idx;
+
+	fsp = (struct ethtool_rx_flow_spec *)&cmd->fs;
+
+	mutex_lock(&hw->fdir_fltr_lock);
+
+	rule = ice_fdir_find_fltr_by_idx(hw, fsp->location);
+
+	if (!rule || fsp->location != rule->fltr_id) {
+		ret = -EINVAL;
+		goto release_lock;
+	}
+
+	fsp->flow_type = ice_fltr_to_ethtool_flow(rule->flow_type);
+
+	memset(&fsp->m_u, 0, sizeof(fsp->m_u));
+	memset(&fsp->m_ext, 0, sizeof(fsp->m_ext));
+
+	switch (fsp->flow_type) {
+	case IPV4_USER_FLOW:
+		fsp->h_u.usr_ip4_spec.ip_ver = ETH_RX_NFC_IP4;
+		fsp->h_u.usr_ip4_spec.proto = 0;
+		fsp->h_u.usr_ip4_spec.l4_4_bytes = rule->ip.l4_header;
+		fsp->h_u.usr_ip4_spec.tos = rule->ip.tos;
+		fsp->h_u.usr_ip4_spec.ip4src = rule->ip.src_ip;
+		fsp->h_u.usr_ip4_spec.ip4dst = rule->ip.dst_ip;
+		fsp->m_u.usr_ip4_spec.ip4src = rule->mask.src_ip;
+		fsp->m_u.usr_ip4_spec.ip4dst = rule->mask.dst_ip;
+		fsp->m_u.usr_ip4_spec.ip_ver = 0xFF;
+		fsp->m_u.usr_ip4_spec.proto = 0;
+		fsp->m_u.usr_ip4_spec.l4_4_bytes = rule->mask.l4_header;
+		fsp->m_u.usr_ip4_spec.tos = rule->mask.tos;
+		break;
+	case TCP_V4_FLOW:
+	case UDP_V4_FLOW:
+	case SCTP_V4_FLOW:
+		fsp->h_u.tcp_ip4_spec.psrc = rule->ip.src_port;
+		fsp->h_u.tcp_ip4_spec.pdst = rule->ip.dst_port;
+		fsp->h_u.tcp_ip4_spec.ip4src = rule->ip.src_ip;
+		fsp->h_u.tcp_ip4_spec.ip4dst = rule->ip.dst_ip;
+		fsp->m_u.tcp_ip4_spec.psrc = rule->mask.src_port;
+		fsp->m_u.tcp_ip4_spec.pdst = rule->mask.dst_port;
+		fsp->m_u.tcp_ip4_spec.ip4src = rule->mask.src_ip;
+		fsp->m_u.tcp_ip4_spec.ip4dst = rule->mask.dst_ip;
+		break;
+	default:
+		break;
+	}
+
+	if (rule->dest_ctl == ICE_FLTR_PRGM_DESC_DEST_DROP_PKT)
+		fsp->ring_cookie = RX_CLS_FLOW_DISC;
+	else
+		fsp->ring_cookie = rule->q_index;
+
+	idx = ice_ethtool_flow_to_fltr(fsp->flow_type);
+	if (idx == ICE_FLTR_PTYPE_NONF_NONE) {
+		dev_err(ice_hw_to_dev(hw), "Missing input index for flow_type %d\n",
+			rule->flow_type);
+		ret = -EINVAL;
+	}
+
+release_lock:
+	mutex_unlock(&hw->fdir_fltr_lock);
+	return ret;
+}
+
+/**
+ * ice_get_fdir_fltr_ids - fill buffer with filter IDs of active filters
+ * @hw: hardware structure containing the filter list
+ * @cmd: ethtool command data structure
+ * @rule_locs: ethtool array passed in from OS to receive filter IDs
+ *
+ * Returns 0 as expected for success by ethtool
+ */
+int
+ice_get_fdir_fltr_ids(struct ice_hw *hw, struct ethtool_rxnfc *cmd,
+		      u32 *rule_locs)
+{
+	struct ice_fdir_fltr *f_rule;
+	unsigned int cnt = 0;
+	int val = 0;
+
+	/* report total rule count */
+	cmd->data = ice_get_fdir_cnt_all(hw);
+
+	mutex_lock(&hw->fdir_fltr_lock);
+
+	list_for_each_entry(f_rule, &hw->fdir_list_head, fltr_node) {
+		if (cnt == cmd->rule_cnt) {
+			val = -EMSGSIZE;
+			goto release_lock;
+		}
+		rule_locs[cnt] = f_rule->fltr_id;
+		cnt++;
+	}
+
+release_lock:
+	mutex_unlock(&hw->fdir_fltr_lock);
+	if (!val)
+		cmd->rule_cnt = cnt;
+	return val;
+}
+
 /**
  * ice_fdir_get_hw_prof - return the ice_fd_hw_proc associated with a flow
  * @hw: hardware structure containing the filter list
diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.c b/drivers/net/ethernet/intel/ice/ice_fdir.c
index 878fa4df9453..1f423e50182c 100644
--- a/drivers/net/ethernet/intel/ice/ice_fdir.c
+++ b/drivers/net/ethernet/intel/ice/ice_fdir.c
@@ -52,3 +52,36 @@ ice_alloc_fd_shrd_item(struct ice_hw *hw, u16 *cntr_id, u16 num_fltr)
 				  ICE_AQC_RES_TYPE_FLAG_DEDICATED, num_fltr,
 				  cntr_id);
 }
+
+/**
+ * ice_get_fdir_cnt_all - get the number of Flow Director filters
+ * @hw: hardware data structure
+ *
+ * Returns the number of filters available on device
+ */
+int ice_get_fdir_cnt_all(struct ice_hw *hw)
+{
+	return hw->func_caps.fd_fltr_guar + hw->func_caps.fd_fltr_best_effort;
+}
+
+/**
+ * ice_fdir_find_by_idx - find filter with idx
+ * @hw: pointer to hardware structure
+ * @fltr_idx: index to find.
+ *
+ * Returns pointer to filter if found or null
+ */
+struct ice_fdir_fltr *
+ice_fdir_find_fltr_by_idx(struct ice_hw *hw, u32 fltr_idx)
+{
+	struct ice_fdir_fltr *rule;
+
+	list_for_each_entry(rule, &hw->fdir_list_head, fltr_node) {
+		/* rule ID found in the list */
+		if (fltr_idx == rule->fltr_id)
+			return rule;
+		if (fltr_idx < rule->fltr_id)
+			break;
+	}
+	return NULL;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.h b/drivers/net/ethernet/intel/ice/ice_fdir.h
index feac47adde6e..1b69249b40c1 100644
--- a/drivers/net/ethernet/intel/ice/ice_fdir.h
+++ b/drivers/net/ethernet/intel/ice/ice_fdir.h
@@ -3,10 +3,57 @@
 
 #ifndef _ICE_FDIR_H_
 #define _ICE_FDIR_H_
+
+enum ice_fltr_prgm_desc_dest {
+	ICE_FLTR_PRGM_DESC_DEST_DROP_PKT,
+	ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_QINDEX,
+};
+
+struct ice_fdir_v4 {
+	__be32 dst_ip;
+	__be32 src_ip;
+	__be16 dst_port;
+	__be16 src_port;
+	__be32 l4_header;
+	__be32 sec_parm_idx;	/* security parameter index */
+	u8 tos;
+	u8 ip_ver;
+	u8 proto;
+};
+
+struct ice_fdir_extra {
+	u8 dst_mac[ETH_ALEN];	/* dest MAC address */
+	u32 usr_def[2];		/* user data */
+	__be16 vlan_type;	/* VLAN ethertype */
+	__be16 vlan_tag;	/* VLAN tag info */
+};
+
+struct ice_fdir_fltr {
+	struct list_head fltr_node;
+	enum ice_fltr_ptype flow_type;
+
+	struct ice_fdir_v4 ip;
+	struct ice_fdir_v4 mask;
+
+	struct ice_fdir_extra ext_data;
+	struct ice_fdir_extra ext_mask;
+
+	/* filter control */
+	u16 q_index;
+	u16 dest_vsi;
+	u8 dest_ctl;
+	u8 fltr_status;
+	u16 cnt_index;
+	u32 fltr_id;
+};
+
 enum ice_status ice_alloc_fd_res_cntr(struct ice_hw *hw, u16 *cntr_id);
 enum ice_status ice_free_fd_res_cntr(struct ice_hw *hw, u16 cntr_id);
 enum ice_status
 ice_alloc_fd_guar_item(struct ice_hw *hw, u16 *cntr_id, u16 num_fltr);
 enum ice_status
 ice_alloc_fd_shrd_item(struct ice_hw *hw, u16 *cntr_id, u16 num_fltr);
+int ice_get_fdir_cnt_all(struct ice_hw *hw);
+struct ice_fdir_fltr *
+ice_fdir_find_fltr_by_idx(struct ice_hw *hw, u32 fltr_idx);
 #endif /* _ICE_FDIR_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
index 3c61b2a04fc4..3376cdf5667f 100644
--- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
@@ -344,6 +344,7 @@
 #define GLPRT_TDOLD(_i)				(0x00381280 + ((_i) * 8))
 #define GLPRT_UPRCL(_i)				(0x00381300 + ((_i) * 8))
 #define GLPRT_UPTCL(_i)				(0x003811C0 + ((_i) * 8))
+#define GLSTAT_FD_CNT0L(_i)			(0x003A0000 + ((_i) * 8))
 #define GLV_BPRCL(_i)				(0x003B6000 + ((_i) * 8))
 #define GLV_BPTCL(_i)				(0x0030E000 + ((_i) * 8))
 #define GLV_GORCL(_i)				(0x003B0000 + ((_i) * 8))
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 2cc1f345c845..fecc3b29a4de 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -4281,6 +4281,7 @@ void ice_update_pf_stats(struct ice_pf *pf)
 {
 	struct ice_hw_port_stats *prev_ps, *cur_ps;
 	struct ice_hw *hw = &pf->hw;
+	u16 fd_ctr_base;
 	u8 port;
 
 	port = hw->port_info->lport;
@@ -4369,6 +4370,12 @@ void ice_update_pf_stats(struct ice_pf *pf)
 	ice_stat_update40(hw, GLPRT_PTC9522L(port), pf->stat_prev_loaded,
 			  &prev_ps->tx_size_big, &cur_ps->tx_size_big);
 
+	fd_ctr_base = hw->fd_ctr_base;
+
+	ice_stat_update40(hw,
+			  GLSTAT_FD_CNT0L(ICE_FD_SB_STAT_IDX(fd_ctr_base)),
+			  pf->stat_prev_loaded, &prev_ps->fd_sb_match,
+			  &cur_ps->fd_sb_match);
 	ice_stat_update32(hw, GLPRT_LXONRXC(port), pf->stat_prev_loaded,
 			  &prev_ps->link_xon_rx, &cur_ps->link_xon_rx);
 
@@ -4412,6 +4419,8 @@ void ice_update_pf_stats(struct ice_pf *pf)
 	ice_stat_update32(hw, GLPRT_RJC(port), pf->stat_prev_loaded,
 			  &prev_ps->rx_jabber, &cur_ps->rx_jabber);
 
+	cur_ps->fd_sb_status = test_bit(ICE_FLAG_FD_ENA, pf->flags) ? 1 : 0;
+
 	pf->stat_prev_loaded = true;
 }
 
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index 74bdc00e869c..0c14d89f7be9 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -691,6 +691,9 @@ struct ice_hw_port_stats {
 	u64 tx_size_1023;		/* ptc1023 */
 	u64 tx_size_1522;		/* ptc1522 */
 	u64 tx_size_big;		/* ptc9522 */
+	/* flow director stats */
+	u32 fd_sb_status;
+	u64 fd_sb_match;
 };
 
 /* Checksum and Shadow RAM pointers */
-- 
cgit v1.2.3-59-g8ed1b


From cac2a27cd9ab1638e21df11ec583d2bc919a3ae3 Mon Sep 17 00:00:00 2001
From: Henry Tieman <henry.w.tieman@intel.com>
Date: Mon, 11 May 2020 18:01:42 -0700
Subject: ice: Support IPv4 Flow Director filters

Support the addition and deletion of IPv4 filters.

Supported fields are: src-ip, dst-ip, src-port, and dst-port
Supported flow-types are: tcp4, udp4, sctp4, ip4

Example usage:

ethtool -N eth0 flow-type tcp4 src-ip 192.168.0.55 dst-ip 172.16.0.55 \
src-port 16 dst-port 12 action 32

Signed-off-by: Henry Tieman <henry.w.tieman@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice.h              |   4 +
 drivers/net/ethernet/intel/ice/ice_ethtool.c      |   4 +
 drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c | 658 ++++++++++++++++++++++
 drivers/net/ethernet/intel/ice/ice_fdir.c         | 513 +++++++++++++++++
 drivers/net/ethernet/intel/ice/ice_fdir.h         |  79 +++
 drivers/net/ethernet/intel/ice/ice_flex_pipe.c    |  34 ++
 drivers/net/ethernet/intel/ice/ice_flex_pipe.h    |   3 +
 drivers/net/ethernet/intel/ice/ice_hw_autogen.h   |   6 +
 drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h    | 101 ++++
 drivers/net/ethernet/intel/ice/ice_txrx.c         |  82 +++
 drivers/net/ethernet/intel/ice/ice_txrx.h         |   3 +
 drivers/net/ethernet/intel/ice/ice_type.h         |   6 +
 12 files changed, 1493 insertions(+)

diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index e0c9e4a30d82..298a65a3799c 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -96,6 +96,7 @@ extern const char ice_drv_ver[];
 #define ICE_TX_DESC(R, i) (&(((struct ice_tx_desc *)((R)->desc))[i]))
 #define ICE_RX_DESC(R, i) (&(((union ice_32b_rx_flex_desc *)((R)->desc))[i]))
 #define ICE_TX_CTX_DESC(R, i) (&(((struct ice_tx_ctx_desc *)((R)->desc))[i]))
+#define ICE_TX_FDIRDESC(R, i) (&(((struct ice_fltr_desc *)((R)->desc))[i]))
 
 /* Macro for each VSI in a PF */
 #define ice_for_each_vsi(pf, i) \
@@ -216,6 +217,7 @@ enum ice_state {
 	__ICE_CFG_BUSY,
 	__ICE_SERVICE_SCHED,
 	__ICE_SERVICE_DIS,
+	__ICE_FD_FLUSH_REQ,
 	__ICE_OICR_INTR_DIS,		/* Global OICR interrupt disabled */
 	__ICE_MDD_VF_PRINT_PENDING,	/* set when MDD event handle */
 	__ICE_VF_RESETS_DISABLED,	/* disable resets during ice_remove */
@@ -557,6 +559,8 @@ void ice_print_link_msg(struct ice_vsi *vsi, bool isup);
 const char *ice_stat_str(enum ice_status stat_err);
 const char *ice_aq_str(enum ice_aq_err aq_err);
 void ice_vsi_manage_fdir(struct ice_vsi *vsi, bool ena);
+int ice_add_fdir_ethtool(struct ice_vsi *vsi, struct ethtool_rxnfc *cmd);
+int ice_del_fdir_ethtool(struct ice_vsi *vsi, struct ethtool_rxnfc *cmd);
 int ice_get_ethtool_fdir_entry(struct ice_hw *hw, struct ethtool_rxnfc *cmd);
 int
 ice_get_fdir_fltr_ids(struct ice_hw *hw, struct ethtool_rxnfc *cmd,
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index f77db28e1e4c..72105d70cead 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -2537,6 +2537,10 @@ static int ice_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *cmd)
 	struct ice_vsi *vsi = np->vsi;
 
 	switch (cmd->cmd) {
+	case ETHTOOL_SRXCLSRLINS:
+		return ice_add_fdir_ethtool(vsi, cmd);
+	case ETHTOOL_SRXCLSRLDEL:
+		return ice_del_fdir_ethtool(vsi, cmd);
 	case ETHTOOL_SRXFH:
 		return ice_set_rss_hash_opt(vsi, cmd);
 	default:
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
index 9276ebf96d28..6badf2ef2255 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
@@ -265,6 +265,43 @@ void ice_fdir_release_flows(struct ice_hw *hw)
 		ice_fdir_erase_flow_from_hw(hw, ICE_BLK_FD, flow);
 }
 
+/**
+ * ice_fdir_num_avail_fltr - return the number of unused flow director filters
+ * @hw: pointer to hardware structure
+ * @vsi: software VSI structure
+ *
+ * There are 2 filter pools: guaranteed and best effort(shared). Each VSI can
+ * use filters from either pool. The guaranteed pool is divided between VSIs.
+ * The best effort filter pool is common to all VSIs and is a device shared
+ * resource pool. The number of filters available to this VSI is the sum of
+ * the VSIs guaranteed filter pool and the global available best effort
+ * filter pool.
+ *
+ * Returns the number of available flow director filters to this VSI
+ */
+static int ice_fdir_num_avail_fltr(struct ice_hw *hw, struct ice_vsi *vsi)
+{
+	u16 vsi_num = ice_get_hw_vsi_num(hw, vsi->idx);
+	u16 num_guar;
+	u16 num_be;
+
+	/* total guaranteed filters assigned to this VSI */
+	num_guar = vsi->num_gfltr;
+
+	/* minus the guaranteed filters programed by this VSI */
+	num_guar -= (rd32(hw, VSIQF_FD_CNT(vsi_num)) &
+		     VSIQF_FD_CNT_FD_GCNT_M) >> VSIQF_FD_CNT_FD_GCNT_S;
+
+	/* total global best effort filters */
+	num_be = hw->func_caps.fd_fltr_best_effort;
+
+	/* minus the global best effort filters programmed */
+	num_be -= (rd32(hw, GLQF_FD_CNT) & GLQF_FD_CNT_FD_BCNT_M) >>
+		   GLQF_FD_CNT_FD_BCNT_S;
+
+	return num_guar + num_be;
+}
+
 /**
  * ice_fdir_alloc_flow_prof - allocate FDir flow profile structure(s)
  * @hw: HW structure containing the FDir flow profile structure(s)
@@ -344,6 +381,14 @@ ice_fdir_set_hw_fltr_rule(struct ice_pf *pf, struct ice_flow_seg_info *seg,
 		if (!memcmp(old_seg, seg, sizeof(*seg)))
 			return -EEXIST;
 
+		/* if there are FDir filters using this flow,
+		 * then return error.
+		 */
+		if (hw->fdir_fltr_cnt[flow]) {
+			dev_err(dev, "Failed to add filter.  Flow director filters on each port must have the same input set.\n");
+			return -EINVAL;
+		}
+
 		/* remove HW filter definition */
 		ice_fdir_rem_flow(hw, ICE_BLK_FD, flow);
 	}
@@ -508,6 +553,347 @@ err_exit:
 	return -EOPNOTSUPP;
 }
 
+/**
+ * ice_set_fdir_ip4_seg
+ * @seg: flow segment for programming
+ * @tcp_ip4_spec: mask data from ethtool
+ * @l4_proto: Layer 4 protocol to program
+ * @perfect_fltr: only valid on success; returns true if perfect filter,
+ *		  false if not
+ *
+ * Set the mask data into the flow segment to be used to program HW
+ * table based on provided L4 protocol for IPv4
+ */
+static int
+ice_set_fdir_ip4_seg(struct ice_flow_seg_info *seg,
+		     struct ethtool_tcpip4_spec *tcp_ip4_spec,
+		     enum ice_flow_seg_hdr l4_proto, bool *perfect_fltr)
+{
+	enum ice_flow_field src_port, dst_port;
+
+	/* make sure we don't have any empty rule */
+	if (!tcp_ip4_spec->psrc && !tcp_ip4_spec->ip4src &&
+	    !tcp_ip4_spec->pdst && !tcp_ip4_spec->ip4dst)
+		return -EINVAL;
+
+	/* filtering on TOS not supported */
+	if (tcp_ip4_spec->tos)
+		return -EOPNOTSUPP;
+
+	if (l4_proto == ICE_FLOW_SEG_HDR_TCP) {
+		src_port = ICE_FLOW_FIELD_IDX_TCP_SRC_PORT;
+		dst_port = ICE_FLOW_FIELD_IDX_TCP_DST_PORT;
+	} else if (l4_proto == ICE_FLOW_SEG_HDR_UDP) {
+		src_port = ICE_FLOW_FIELD_IDX_UDP_SRC_PORT;
+		dst_port = ICE_FLOW_FIELD_IDX_UDP_DST_PORT;
+	} else if (l4_proto == ICE_FLOW_SEG_HDR_SCTP) {
+		src_port = ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT;
+		dst_port = ICE_FLOW_FIELD_IDX_SCTP_DST_PORT;
+	} else {
+		return -EOPNOTSUPP;
+	}
+
+	*perfect_fltr = true;
+	ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_IPV4 | l4_proto);
+
+	/* IP source address */
+	if (tcp_ip4_spec->ip4src == htonl(0xFFFFFFFF))
+		ice_flow_set_fld(seg, ICE_FLOW_FIELD_IDX_IPV4_SA,
+				 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+				 ICE_FLOW_FLD_OFF_INVAL, false);
+	else if (!tcp_ip4_spec->ip4src)
+		*perfect_fltr = false;
+	else
+		return -EOPNOTSUPP;
+
+	/* IP destination address */
+	if (tcp_ip4_spec->ip4dst == htonl(0xFFFFFFFF))
+		ice_flow_set_fld(seg, ICE_FLOW_FIELD_IDX_IPV4_DA,
+				 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+				 ICE_FLOW_FLD_OFF_INVAL, false);
+	else if (!tcp_ip4_spec->ip4dst)
+		*perfect_fltr = false;
+	else
+		return -EOPNOTSUPP;
+
+	/* Layer 4 source port */
+	if (tcp_ip4_spec->psrc == htons(0xFFFF))
+		ice_flow_set_fld(seg, src_port, ICE_FLOW_FLD_OFF_INVAL,
+				 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+				 false);
+	else if (!tcp_ip4_spec->psrc)
+		*perfect_fltr = false;
+	else
+		return -EOPNOTSUPP;
+
+	/* Layer 4 destination port */
+	if (tcp_ip4_spec->pdst == htons(0xFFFF))
+		ice_flow_set_fld(seg, dst_port, ICE_FLOW_FLD_OFF_INVAL,
+				 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+				 false);
+	else if (!tcp_ip4_spec->pdst)
+		*perfect_fltr = false;
+	else
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
+/**
+ * ice_set_fdir_ip4_usr_seg
+ * @seg: flow segment for programming
+ * @usr_ip4_spec: ethtool userdef packet offset
+ * @perfect_fltr: only valid on success; returns true if perfect filter,
+ *		  false if not
+ *
+ * Set the offset data into the flow segment to be used to program HW
+ * table for IPv4
+ */
+static int
+ice_set_fdir_ip4_usr_seg(struct ice_flow_seg_info *seg,
+			 struct ethtool_usrip4_spec *usr_ip4_spec,
+			 bool *perfect_fltr)
+{
+	/* first 4 bytes of Layer 4 header */
+	if (usr_ip4_spec->l4_4_bytes)
+		return -EINVAL;
+	if (usr_ip4_spec->tos)
+		return -EINVAL;
+	if (usr_ip4_spec->ip_ver)
+		return -EINVAL;
+	/* Filtering on Layer 4 protocol not supported */
+	if (usr_ip4_spec->proto)
+		return -EOPNOTSUPP;
+	/* empty rules are not valid */
+	if (!usr_ip4_spec->ip4src && !usr_ip4_spec->ip4dst)
+		return -EINVAL;
+
+	*perfect_fltr = true;
+	ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_IPV4);
+
+	/* IP source address */
+	if (usr_ip4_spec->ip4src == htonl(0xFFFFFFFF))
+		ice_flow_set_fld(seg, ICE_FLOW_FIELD_IDX_IPV4_SA,
+				 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+				 ICE_FLOW_FLD_OFF_INVAL, false);
+	else if (!usr_ip4_spec->ip4src)
+		*perfect_fltr = false;
+	else
+		return -EOPNOTSUPP;
+
+	/* IP destination address */
+	if (usr_ip4_spec->ip4dst == htonl(0xFFFFFFFF))
+		ice_flow_set_fld(seg, ICE_FLOW_FIELD_IDX_IPV4_DA,
+				 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+				 ICE_FLOW_FLD_OFF_INVAL, false);
+	else if (!usr_ip4_spec->ip4dst)
+		*perfect_fltr = false;
+	else
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
+/**
+ * ice_cfg_fdir_xtrct_seq - Configure extraction sequence for the given filter
+ * @pf: PF structure
+ * @fsp: pointer to ethtool Rx flow specification
+ *
+ * Returns 0 on success.
+ */
+static int
+ice_cfg_fdir_xtrct_seq(struct ice_pf *pf, struct ethtool_rx_flow_spec *fsp)
+{
+	struct ice_flow_seg_info *seg, *tun_seg;
+	struct device *dev = ice_pf_to_dev(pf);
+	enum ice_fltr_ptype fltr_idx;
+	struct ice_hw *hw = &pf->hw;
+	bool perfect_filter;
+	int ret;
+
+	seg = devm_kzalloc(dev, sizeof(*seg), GFP_KERNEL);
+	if (!seg)
+		return -ENOMEM;
+
+	tun_seg = devm_kzalloc(dev, sizeof(*seg) * ICE_FD_HW_SEG_MAX,
+			       GFP_KERNEL);
+	if (!tun_seg) {
+		devm_kfree(dev, seg);
+		return -ENOMEM;
+	}
+
+	switch (fsp->flow_type & ~FLOW_EXT) {
+	case TCP_V4_FLOW:
+		ret = ice_set_fdir_ip4_seg(seg, &fsp->m_u.tcp_ip4_spec,
+					   ICE_FLOW_SEG_HDR_TCP,
+					   &perfect_filter);
+		break;
+	case UDP_V4_FLOW:
+		ret = ice_set_fdir_ip4_seg(seg, &fsp->m_u.tcp_ip4_spec,
+					   ICE_FLOW_SEG_HDR_UDP,
+					   &perfect_filter);
+		break;
+	case SCTP_V4_FLOW:
+		ret = ice_set_fdir_ip4_seg(seg, &fsp->m_u.tcp_ip4_spec,
+					   ICE_FLOW_SEG_HDR_SCTP,
+					   &perfect_filter);
+		break;
+	case IPV4_USER_FLOW:
+		ret = ice_set_fdir_ip4_usr_seg(seg, &fsp->m_u.usr_ip4_spec,
+					       &perfect_filter);
+		break;
+	default:
+		ret = -EINVAL;
+	}
+	if (ret)
+		goto err_exit;
+
+	/* tunnel segments are shifted up one. */
+	memcpy(&tun_seg[1], seg, sizeof(*seg));
+
+	/* add filter for outer headers */
+	fltr_idx = ice_ethtool_flow_to_fltr(fsp->flow_type & ~FLOW_EXT);
+	ret = ice_fdir_set_hw_fltr_rule(pf, seg, fltr_idx,
+					ICE_FD_HW_SEG_NON_TUN);
+	if (ret == -EEXIST)
+		/* Rule already exists, free memory and continue */
+		devm_kfree(dev, seg);
+	else if (ret)
+		/* could not write filter, free memory */
+		goto err_exit;
+
+	/* make tunneled filter HW entries if possible */
+	memcpy(&tun_seg[1], seg, sizeof(*seg));
+	ret = ice_fdir_set_hw_fltr_rule(pf, tun_seg, fltr_idx,
+					ICE_FD_HW_SEG_TUN);
+	if (ret == -EEXIST) {
+		/* Rule already exists, free memory and count as success */
+		devm_kfree(dev, tun_seg);
+		ret = 0;
+	} else if (ret) {
+		/* could not write tunnel filter, but outer filter exists */
+		devm_kfree(dev, tun_seg);
+	}
+
+	if (perfect_filter)
+		set_bit(fltr_idx, hw->fdir_perfect_fltr);
+	else
+		clear_bit(fltr_idx, hw->fdir_perfect_fltr);
+
+	return ret;
+
+err_exit:
+	devm_kfree(dev, tun_seg);
+	devm_kfree(dev, seg);
+
+	return -EOPNOTSUPP;
+}
+
+/**
+ * ice_fdir_write_fltr - send a flow director filter to the hardware
+ * @pf: PF data structure
+ * @input: filter structure
+ * @add: true adds filter and false removed filter
+ * @is_tun: true adds inner filter on tunnel and false outer headers
+ *
+ * returns 0 on success and negative value on error
+ */
+static int
+ice_fdir_write_fltr(struct ice_pf *pf, struct ice_fdir_fltr *input, bool add,
+		    bool is_tun)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+	struct ice_hw *hw = &pf->hw;
+	struct ice_fltr_desc desc;
+	struct ice_vsi *ctrl_vsi;
+	enum ice_status status;
+	u8 *pkt, *frag_pkt;
+	bool has_frag;
+	int err;
+
+	ctrl_vsi = ice_get_ctrl_vsi(pf);
+	if (!ctrl_vsi)
+		return -EINVAL;
+
+	pkt = devm_kzalloc(dev, ICE_FDIR_MAX_RAW_PKT_SIZE, GFP_KERNEL);
+	if (!pkt)
+		return -ENOMEM;
+	frag_pkt = devm_kzalloc(dev, ICE_FDIR_MAX_RAW_PKT_SIZE, GFP_KERNEL);
+	if (!frag_pkt) {
+		err = -ENOMEM;
+		goto err_free;
+	}
+
+	ice_fdir_get_prgm_desc(hw, input, &desc, add);
+	status = ice_fdir_get_gen_prgm_pkt(hw, input, pkt, false, is_tun);
+	if (status) {
+		err = ice_status_to_errno(status);
+		goto err_free_all;
+	}
+	err = ice_prgm_fdir_fltr(ctrl_vsi, &desc, pkt);
+	if (err)
+		goto err_free_all;
+
+	/* repeat for fragment packet */
+	has_frag = ice_fdir_has_frag(input->flow_type);
+	if (has_frag) {
+		/* does not return error */
+		ice_fdir_get_prgm_desc(hw, input, &desc, add);
+		status = ice_fdir_get_gen_prgm_pkt(hw, input, frag_pkt, true,
+						   is_tun);
+		if (status) {
+			err = ice_status_to_errno(status);
+			goto err_frag;
+		}
+		err = ice_prgm_fdir_fltr(ctrl_vsi, &desc, frag_pkt);
+		if (err)
+			goto err_frag;
+	} else {
+		devm_kfree(dev, frag_pkt);
+	}
+
+	return 0;
+
+err_free_all:
+	devm_kfree(dev, frag_pkt);
+err_free:
+	devm_kfree(dev, pkt);
+	return err;
+
+err_frag:
+	devm_kfree(dev, frag_pkt);
+	return err;
+}
+
+/**
+ * ice_fdir_write_all_fltr - send a flow director filter to the hardware
+ * @pf: PF data structure
+ * @input: filter structure
+ * @add: true adds filter and false removed filter
+ *
+ * returns 0 on success and negative value on error
+ */
+static int
+ice_fdir_write_all_fltr(struct ice_pf *pf, struct ice_fdir_fltr *input,
+			bool add)
+{
+	u16 port_num;
+	int tun;
+
+	for (tun = 0; tun < ICE_FD_HW_SEG_MAX; tun++) {
+		bool is_tun = tun == ICE_FD_HW_SEG_TUN;
+		int err;
+
+		if (is_tun && !ice_get_open_tunnel_port(&pf->hw, TNL_ALL,
+							&port_num))
+			continue;
+		err = ice_fdir_write_fltr(pf, input, add, is_tun);
+		if (err)
+			return err;
+	}
+	return 0;
+}
+
 /**
  * ice_fdir_create_dflt_rules - create default perfect filters
  * @pf: PF data structure
@@ -535,6 +921,7 @@ int ice_fdir_create_dflt_rules(struct ice_pf *pf)
  */
 void ice_vsi_manage_fdir(struct ice_vsi *vsi, bool ena)
 {
+	struct ice_fdir_fltr *f_rule, *tmp;
 	struct ice_pf *pf = vsi->back;
 	struct ice_hw *hw = &pf->hw;
 	enum ice_fltr_ptype flow;
@@ -548,6 +935,13 @@ void ice_vsi_manage_fdir(struct ice_vsi *vsi, bool ena)
 	mutex_lock(&hw->fdir_fltr_lock);
 	if (!test_and_clear_bit(ICE_FLAG_FD_ENA, pf->flags))
 		goto release_lock;
+	list_for_each_entry_safe(f_rule, tmp, &hw->fdir_list_head, fltr_node) {
+		/* ignore return value */
+		ice_fdir_write_all_fltr(pf, f_rule, false);
+		ice_fdir_update_cntrs(hw, f_rule->flow_type, false);
+		list_del(&f_rule->fltr_node);
+		devm_kfree(ice_hw_to_dev(hw), f_rule);
+	}
 
 	if (hw->fdir_prof)
 		for (flow = ICE_FLTR_PTYPE_NONF_NONE; flow < ICE_FLTR_PTYPE_MAX;
@@ -558,3 +952,267 @@ void ice_vsi_manage_fdir(struct ice_vsi *vsi, bool ena)
 release_lock:
 	mutex_unlock(&hw->fdir_fltr_lock);
 }
+
+/**
+ * ice_fdir_update_list_entry - add or delete a filter from the filter list
+ * @pf: PF structure
+ * @input: filter structure
+ * @fltr_idx: ethtool index of filter to modify
+ *
+ * returns 0 on success and negative on errors
+ */
+static int
+ice_fdir_update_list_entry(struct ice_pf *pf, struct ice_fdir_fltr *input,
+			   int fltr_idx)
+{
+	struct ice_fdir_fltr *old_fltr;
+	struct ice_hw *hw = &pf->hw;
+	int err = -ENOENT;
+
+	/* Do not update filters during reset */
+	if (ice_is_reset_in_progress(pf->state))
+		return -EBUSY;
+
+	old_fltr = ice_fdir_find_fltr_by_idx(hw, fltr_idx);
+	if (old_fltr) {
+		err = ice_fdir_write_all_fltr(pf, old_fltr, false);
+		if (err)
+			return err;
+		ice_fdir_update_cntrs(hw, old_fltr->flow_type, false);
+		if (!input && !hw->fdir_fltr_cnt[old_fltr->flow_type])
+			/* we just deleted the last filter of flow_type so we
+			 * should also delete the HW filter info.
+			 */
+			ice_fdir_rem_flow(hw, ICE_BLK_FD, old_fltr->flow_type);
+		list_del(&old_fltr->fltr_node);
+		devm_kfree(ice_hw_to_dev(hw), old_fltr);
+	}
+	if (!input)
+		return err;
+	ice_fdir_list_add_fltr(hw, input);
+	ice_fdir_update_cntrs(hw, input->flow_type, true);
+	return 0;
+}
+
+/**
+ * ice_del_fdir_ethtool - delete Flow Director filter
+ * @vsi: pointer to target VSI
+ * @cmd: command to add or delete Flow Director filter
+ *
+ * Returns 0 on success and negative values for failure
+ */
+int ice_del_fdir_ethtool(struct ice_vsi *vsi, struct ethtool_rxnfc *cmd)
+{
+	struct ethtool_rx_flow_spec *fsp =
+		(struct ethtool_rx_flow_spec *)&cmd->fs;
+	struct ice_pf *pf = vsi->back;
+	struct ice_hw *hw = &pf->hw;
+	int val;
+
+	if (!test_bit(ICE_FLAG_FD_ENA, pf->flags))
+		return -EOPNOTSUPP;
+
+	/* Do not delete filters during reset */
+	if (ice_is_reset_in_progress(pf->state)) {
+		dev_err(ice_pf_to_dev(pf), "Device is resetting - deleting Flow Director filters not supported during reset\n");
+		return -EBUSY;
+	}
+
+	if (test_bit(__ICE_FD_FLUSH_REQ, pf->state))
+		return -EBUSY;
+
+	mutex_lock(&hw->fdir_fltr_lock);
+	val = ice_fdir_update_list_entry(pf, NULL, fsp->location);
+	mutex_unlock(&hw->fdir_fltr_lock);
+
+	return val;
+}
+
+/**
+ * ice_set_fdir_input_set - Set the input set for Flow Director
+ * @vsi: pointer to target VSI
+ * @fsp: pointer to ethtool Rx flow specification
+ * @input: filter structure
+ */
+static int
+ice_set_fdir_input_set(struct ice_vsi *vsi, struct ethtool_rx_flow_spec *fsp,
+		       struct ice_fdir_fltr *input)
+{
+	u16 dest_vsi, q_index = 0;
+	struct ice_pf *pf;
+	struct ice_hw *hw;
+	int flow_type;
+	u8 dest_ctl;
+
+	if (!vsi || !fsp || !input)
+		return -EINVAL;
+
+	pf = vsi->back;
+	hw = &pf->hw;
+
+	dest_vsi = vsi->idx;
+	if (fsp->ring_cookie == RX_CLS_FLOW_DISC) {
+		dest_ctl = ICE_FLTR_PRGM_DESC_DEST_DROP_PKT;
+	} else {
+		u32 ring = ethtool_get_flow_spec_ring(fsp->ring_cookie);
+		u8 vf = ethtool_get_flow_spec_ring_vf(fsp->ring_cookie);
+
+		if (vf) {
+			dev_err(ice_pf_to_dev(pf), "Failed to add filter. Flow director filters are not supported on VF queues.\n");
+			return -EINVAL;
+		}
+
+		if (ring >= vsi->num_rxq)
+			return -EINVAL;
+
+		dest_ctl = ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_QINDEX;
+		q_index = ring;
+	}
+
+	input->fltr_id = fsp->location;
+	input->q_index = q_index;
+	flow_type = fsp->flow_type & ~FLOW_EXT;
+
+	input->dest_vsi = dest_vsi;
+	input->dest_ctl = dest_ctl;
+	input->fltr_status = ICE_FLTR_PRGM_DESC_FD_STATUS_FD_ID;
+	input->cnt_index = ICE_FD_SB_STAT_IDX(hw->fd_ctr_base);
+	input->flow_type = ice_ethtool_flow_to_fltr(flow_type);
+
+	if (fsp->flow_type & FLOW_EXT) {
+		memcpy(input->ext_data.usr_def, fsp->h_ext.data,
+		       sizeof(input->ext_data.usr_def));
+		input->ext_data.vlan_type = fsp->h_ext.vlan_etype;
+		input->ext_data.vlan_tag = fsp->h_ext.vlan_tci;
+		memcpy(input->ext_mask.usr_def, fsp->m_ext.data,
+		       sizeof(input->ext_mask.usr_def));
+		input->ext_mask.vlan_type = fsp->m_ext.vlan_etype;
+		input->ext_mask.vlan_tag = fsp->m_ext.vlan_tci;
+	}
+
+	switch (flow_type) {
+	case TCP_V4_FLOW:
+	case UDP_V4_FLOW:
+	case SCTP_V4_FLOW:
+		input->ip.dst_port = fsp->h_u.tcp_ip4_spec.pdst;
+		input->ip.src_port = fsp->h_u.tcp_ip4_spec.psrc;
+		input->ip.dst_ip = fsp->h_u.tcp_ip4_spec.ip4dst;
+		input->ip.src_ip = fsp->h_u.tcp_ip4_spec.ip4src;
+		input->mask.dst_port = fsp->m_u.tcp_ip4_spec.pdst;
+		input->mask.src_port = fsp->m_u.tcp_ip4_spec.psrc;
+		input->mask.dst_ip = fsp->m_u.tcp_ip4_spec.ip4dst;
+		input->mask.src_ip = fsp->m_u.tcp_ip4_spec.ip4src;
+		break;
+	case IPV4_USER_FLOW:
+		input->ip.dst_ip = fsp->h_u.usr_ip4_spec.ip4dst;
+		input->ip.src_ip = fsp->h_u.usr_ip4_spec.ip4src;
+		input->ip.l4_header = fsp->h_u.usr_ip4_spec.l4_4_bytes;
+		input->ip.proto = fsp->h_u.usr_ip4_spec.proto;
+		input->ip.ip_ver = fsp->h_u.usr_ip4_spec.ip_ver;
+		input->ip.tos = fsp->h_u.usr_ip4_spec.tos;
+		input->mask.dst_ip = fsp->m_u.usr_ip4_spec.ip4dst;
+		input->mask.src_ip = fsp->m_u.usr_ip4_spec.ip4src;
+		input->mask.l4_header = fsp->m_u.usr_ip4_spec.l4_4_bytes;
+		input->mask.proto = fsp->m_u.usr_ip4_spec.proto;
+		input->mask.ip_ver = fsp->m_u.usr_ip4_spec.ip_ver;
+		input->mask.tos = fsp->m_u.usr_ip4_spec.tos;
+		break;
+	default:
+		/* not doing un-parsed flow types */
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_add_fdir_ethtool - Add/Remove Flow Director filter
+ * @vsi: pointer to target VSI
+ * @cmd: command to add or delete Flow Director filter
+ *
+ * Returns 0 on success and negative values for failure
+ */
+int ice_add_fdir_ethtool(struct ice_vsi *vsi, struct ethtool_rxnfc *cmd)
+{
+	struct ethtool_rx_flow_spec *fsp;
+	struct ice_fdir_fltr *input;
+	struct device *dev;
+	struct ice_pf *pf;
+	struct ice_hw *hw;
+	int fltrs_needed;
+	u16 tunnel_port;
+	int ret;
+
+	if (!vsi)
+		return -EINVAL;
+
+	pf = vsi->back;
+	hw = &pf->hw;
+	dev = ice_pf_to_dev(pf);
+
+	if (!test_bit(ICE_FLAG_FD_ENA, pf->flags))
+		return -EOPNOTSUPP;
+
+	/* Do not program filters during reset */
+	if (ice_is_reset_in_progress(pf->state)) {
+		dev_err(dev, "Device is resetting - adding Flow Director filters not supported during reset\n");
+		return -EBUSY;
+	}
+
+	fsp = (struct ethtool_rx_flow_spec *)&cmd->fs;
+
+	if (fsp->flow_type & FLOW_MAC_EXT)
+		return -EINVAL;
+
+	ret = ice_cfg_fdir_xtrct_seq(pf, fsp);
+	if (ret)
+		return ret;
+
+	if (fsp->location >= ice_get_fdir_cnt_all(hw)) {
+		dev_err(dev, "Failed to add filter.  The maximum number of flow director filters has been reached.\n");
+		return -ENOSPC;
+	}
+
+	/* return error if not an update and no available filters */
+	fltrs_needed = ice_get_open_tunnel_port(hw, TNL_ALL, &tunnel_port) ?
+		2 : 1;
+	if (!ice_fdir_find_fltr_by_idx(hw, fsp->location) &&
+	    ice_fdir_num_avail_fltr(hw, pf->vsi[vsi->idx]) < fltrs_needed) {
+		dev_err(dev, "Failed to add filter.  The maximum number of flow director filters has been reached.\n");
+		return -ENOSPC;
+	}
+
+	input = devm_kzalloc(dev, sizeof(*input), GFP_KERNEL);
+	if (!input)
+		return -ENOMEM;
+
+	ret = ice_set_fdir_input_set(vsi, fsp, input);
+	if (ret)
+		goto free_input;
+
+	mutex_lock(&hw->fdir_fltr_lock);
+	if (ice_fdir_is_dup_fltr(hw, input)) {
+		ret = -EINVAL;
+		goto release_lock;
+	}
+
+	/* input struct is added to the HW filter list */
+	ice_fdir_update_list_entry(pf, input, fsp->location);
+
+	ret = ice_fdir_write_all_fltr(pf, input, true);
+	if (ret)
+		goto remove_sw_rule;
+
+	goto release_lock;
+
+remove_sw_rule:
+	ice_fdir_update_cntrs(hw, input->flow_type, false);
+	list_del(&input->fltr_node);
+release_lock:
+	mutex_unlock(&hw->fdir_fltr_lock);
+free_input:
+	if (ret)
+		devm_kfree(dev, input);
+
+	return ret;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.c b/drivers/net/ethernet/intel/ice/ice_fdir.c
index 1f423e50182c..60a824363f06 100644
--- a/drivers/net/ethernet/intel/ice/ice_fdir.c
+++ b/drivers/net/ethernet/intel/ice/ice_fdir.c
@@ -3,6 +3,261 @@
 
 #include "ice_common.h"
 
+/* These are training packet headers used to program flow director filters. */
+static const u8 ice_fdir_tcpv4_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x28, 0x00, 0x01, 0x00, 0x00, 0x40, 0x06,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x50, 0x00,
+	0x20, 0x00, 0x00, 0x00, 0x00, 0x00
+};
+
+static const u8 ice_fdir_udpv4_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x1C, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00,
+};
+
+static const u8 ice_fdir_sctpv4_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x20, 0x00, 0x00, 0x40, 0x00, 0x40, 0x84,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_ipv4_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x14, 0x00, 0x00, 0x40, 0x00, 0x40, 0x10,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00
+};
+
+static const u8 ice_fdir_tcp4_tun_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x5a, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x04, 0x00, 0x00, 0x03, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00,
+	0x45, 0x00, 0x00, 0x28, 0x00, 0x00, 0x40, 0x00,
+	0x40, 0x06, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x50, 0x00, 0x20, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_udp4_tun_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x4e, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x04, 0x00, 0x00, 0x03, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00,
+	0x45, 0x00, 0x00, 0x1c, 0x00, 0x00, 0x40, 0x00,
+	0x40, 0x11, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_sctp4_tun_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x52, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x04, 0x00, 0x00, 0x03, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00,
+	0x45, 0x00, 0x00, 0x20, 0x00, 0x01, 0x00, 0x00,
+	0x40, 0x84, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_ip4_tun_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x46, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x04, 0x00, 0x00, 0x03, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x08, 0x00,
+	0x45, 0x00, 0x00, 0x14, 0x00, 0x00, 0x00, 0x00,
+	0x40, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+};
+
+/* Flow Director no-op training packet table */
+static const struct ice_fdir_base_pkt ice_fdir_pkt[] = {
+	{
+		ICE_FLTR_PTYPE_NONF_IPV4_TCP,
+		sizeof(ice_fdir_tcpv4_pkt), ice_fdir_tcpv4_pkt,
+		sizeof(ice_fdir_tcp4_tun_pkt), ice_fdir_tcp4_tun_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV4_UDP,
+		sizeof(ice_fdir_udpv4_pkt), ice_fdir_udpv4_pkt,
+		sizeof(ice_fdir_udp4_tun_pkt), ice_fdir_udp4_tun_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV4_SCTP,
+		sizeof(ice_fdir_sctpv4_pkt), ice_fdir_sctpv4_pkt,
+		sizeof(ice_fdir_sctp4_tun_pkt), ice_fdir_sctp4_tun_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV4_OTHER,
+		sizeof(ice_fdir_ipv4_pkt), ice_fdir_ipv4_pkt,
+		sizeof(ice_fdir_ip4_tun_pkt), ice_fdir_ip4_tun_pkt,
+	},
+};
+
+#define ICE_FDIR_NUM_PKT ARRAY_SIZE(ice_fdir_pkt)
+
+/**
+ * ice_set_dflt_val_fd_desc
+ * @fd_fltr_ctx: pointer to fd filter descriptor
+ */
+static void ice_set_dflt_val_fd_desc(struct ice_fd_fltr_desc_ctx *fd_fltr_ctx)
+{
+	fd_fltr_ctx->comp_q = ICE_FXD_FLTR_QW0_COMP_Q_ZERO;
+	fd_fltr_ctx->comp_report = ICE_FXD_FLTR_QW0_COMP_REPORT_SW_FAIL;
+	fd_fltr_ctx->fd_space = ICE_FXD_FLTR_QW0_FD_SPACE_GUAR_BEST;
+	fd_fltr_ctx->cnt_ena = ICE_FXD_FLTR_QW0_STAT_ENA_PKTS;
+	fd_fltr_ctx->evict_ena = ICE_FXD_FLTR_QW0_EVICT_ENA_TRUE;
+	fd_fltr_ctx->toq = ICE_FXD_FLTR_QW0_TO_Q_EQUALS_QINDEX;
+	fd_fltr_ctx->toq_prio = ICE_FXD_FLTR_QW0_TO_Q_PRIO1;
+	fd_fltr_ctx->dpu_recipe = ICE_FXD_FLTR_QW0_DPU_RECIPE_DFLT;
+	fd_fltr_ctx->drop = ICE_FXD_FLTR_QW0_DROP_NO;
+	fd_fltr_ctx->flex_prio = ICE_FXD_FLTR_QW0_FLEX_PRI_NONE;
+	fd_fltr_ctx->flex_mdid = ICE_FXD_FLTR_QW0_FLEX_MDID0;
+	fd_fltr_ctx->flex_val = ICE_FXD_FLTR_QW0_FLEX_VAL0;
+	fd_fltr_ctx->dtype = ICE_TX_DESC_DTYPE_FLTR_PROG;
+	fd_fltr_ctx->desc_prof_prio = ICE_FXD_FLTR_QW1_PROF_PRIO_ZERO;
+	fd_fltr_ctx->desc_prof = ICE_FXD_FLTR_QW1_PROF_ZERO;
+	fd_fltr_ctx->swap = ICE_FXD_FLTR_QW1_SWAP_SET;
+	fd_fltr_ctx->fdid_prio = ICE_FXD_FLTR_QW1_FDID_PRI_ONE;
+	fd_fltr_ctx->fdid_mdid = ICE_FXD_FLTR_QW1_FDID_MDID_FD;
+	fd_fltr_ctx->fdid = ICE_FXD_FLTR_QW1_FDID_ZERO;
+}
+
+/**
+ * ice_set_fd_desc_val
+ * @ctx: pointer to fd filter descriptor context
+ * @fdir_desc: populated with fd filter descriptor values
+ */
+static void
+ice_set_fd_desc_val(struct ice_fd_fltr_desc_ctx *ctx,
+		    struct ice_fltr_desc *fdir_desc)
+{
+	u64 qword;
+
+	/* prep QW0 of FD filter programming desc */
+	qword = ((u64)ctx->qindex << ICE_FXD_FLTR_QW0_QINDEX_S) &
+		ICE_FXD_FLTR_QW0_QINDEX_M;
+	qword |= ((u64)ctx->comp_q << ICE_FXD_FLTR_QW0_COMP_Q_S) &
+		 ICE_FXD_FLTR_QW0_COMP_Q_M;
+	qword |= ((u64)ctx->comp_report << ICE_FXD_FLTR_QW0_COMP_REPORT_S) &
+		 ICE_FXD_FLTR_QW0_COMP_REPORT_M;
+	qword |= ((u64)ctx->fd_space << ICE_FXD_FLTR_QW0_FD_SPACE_S) &
+		 ICE_FXD_FLTR_QW0_FD_SPACE_M;
+	qword |= ((u64)ctx->cnt_index << ICE_FXD_FLTR_QW0_STAT_CNT_S) &
+		 ICE_FXD_FLTR_QW0_STAT_CNT_M;
+	qword |= ((u64)ctx->cnt_ena << ICE_FXD_FLTR_QW0_STAT_ENA_S) &
+		 ICE_FXD_FLTR_QW0_STAT_ENA_M;
+	qword |= ((u64)ctx->evict_ena << ICE_FXD_FLTR_QW0_EVICT_ENA_S) &
+		 ICE_FXD_FLTR_QW0_EVICT_ENA_M;
+	qword |= ((u64)ctx->toq << ICE_FXD_FLTR_QW0_TO_Q_S) &
+		 ICE_FXD_FLTR_QW0_TO_Q_M;
+	qword |= ((u64)ctx->toq_prio << ICE_FXD_FLTR_QW0_TO_Q_PRI_S) &
+		 ICE_FXD_FLTR_QW0_TO_Q_PRI_M;
+	qword |= ((u64)ctx->dpu_recipe << ICE_FXD_FLTR_QW0_DPU_RECIPE_S) &
+		 ICE_FXD_FLTR_QW0_DPU_RECIPE_M;
+	qword |= ((u64)ctx->drop << ICE_FXD_FLTR_QW0_DROP_S) &
+		 ICE_FXD_FLTR_QW0_DROP_M;
+	qword |= ((u64)ctx->flex_prio << ICE_FXD_FLTR_QW0_FLEX_PRI_S) &
+		 ICE_FXD_FLTR_QW0_FLEX_PRI_M;
+	qword |= ((u64)ctx->flex_mdid << ICE_FXD_FLTR_QW0_FLEX_MDID_S) &
+		 ICE_FXD_FLTR_QW0_FLEX_MDID_M;
+	qword |= ((u64)ctx->flex_val << ICE_FXD_FLTR_QW0_FLEX_VAL_S) &
+		 ICE_FXD_FLTR_QW0_FLEX_VAL_M;
+	fdir_desc->qidx_compq_space_stat = cpu_to_le64(qword);
+
+	/* prep QW1 of FD filter programming desc */
+	qword = ((u64)ctx->dtype << ICE_FXD_FLTR_QW1_DTYPE_S) &
+		ICE_FXD_FLTR_QW1_DTYPE_M;
+	qword |= ((u64)ctx->pcmd << ICE_FXD_FLTR_QW1_PCMD_S) &
+		 ICE_FXD_FLTR_QW1_PCMD_M;
+	qword |= ((u64)ctx->desc_prof_prio << ICE_FXD_FLTR_QW1_PROF_PRI_S) &
+		 ICE_FXD_FLTR_QW1_PROF_PRI_M;
+	qword |= ((u64)ctx->desc_prof << ICE_FXD_FLTR_QW1_PROF_S) &
+		 ICE_FXD_FLTR_QW1_PROF_M;
+	qword |= ((u64)ctx->fd_vsi << ICE_FXD_FLTR_QW1_FD_VSI_S) &
+		 ICE_FXD_FLTR_QW1_FD_VSI_M;
+	qword |= ((u64)ctx->swap << ICE_FXD_FLTR_QW1_SWAP_S) &
+		 ICE_FXD_FLTR_QW1_SWAP_M;
+	qword |= ((u64)ctx->fdid_prio << ICE_FXD_FLTR_QW1_FDID_PRI_S) &
+		 ICE_FXD_FLTR_QW1_FDID_PRI_M;
+	qword |= ((u64)ctx->fdid_mdid << ICE_FXD_FLTR_QW1_FDID_MDID_S) &
+		 ICE_FXD_FLTR_QW1_FDID_MDID_M;
+	qword |= ((u64)ctx->fdid << ICE_FXD_FLTR_QW1_FDID_S) &
+		 ICE_FXD_FLTR_QW1_FDID_M;
+	fdir_desc->dtype_cmd_vsi_fdid = cpu_to_le64(qword);
+}
+
+/**
+ * ice_fdir_get_prgm_desc - set a fdir descriptor from a fdir filter struct
+ * @hw: pointer to the hardware structure
+ * @input: filter
+ * @fdesc: filter descriptor
+ * @add: if add is true, this is an add operation, false implies delete
+ */
+void
+ice_fdir_get_prgm_desc(struct ice_hw *hw, struct ice_fdir_fltr *input,
+		       struct ice_fltr_desc *fdesc, bool add)
+{
+	struct ice_fd_fltr_desc_ctx fdir_fltr_ctx = { 0 };
+
+	/* set default context info */
+	ice_set_dflt_val_fd_desc(&fdir_fltr_ctx);
+
+	/* change sideband filtering values */
+	fdir_fltr_ctx.fdid = input->fltr_id;
+	if (input->dest_ctl == ICE_FLTR_PRGM_DESC_DEST_DROP_PKT) {
+		fdir_fltr_ctx.drop = ICE_FXD_FLTR_QW0_DROP_YES;
+		fdir_fltr_ctx.qindex = 0;
+	} else {
+		fdir_fltr_ctx.drop = ICE_FXD_FLTR_QW0_DROP_NO;
+		fdir_fltr_ctx.qindex = input->q_index;
+	}
+	fdir_fltr_ctx.cnt_ena = ICE_FXD_FLTR_QW0_STAT_ENA_PKTS;
+	fdir_fltr_ctx.cnt_index = input->cnt_index;
+	fdir_fltr_ctx.fd_vsi = ice_get_hw_vsi_num(hw, input->dest_vsi);
+	fdir_fltr_ctx.evict_ena = ICE_FXD_FLTR_QW0_EVICT_ENA_FALSE;
+	fdir_fltr_ctx.toq_prio = 3;
+	fdir_fltr_ctx.pcmd = add ? ICE_FXD_FLTR_QW1_PCMD_ADD :
+		ICE_FXD_FLTR_QW1_PCMD_REMOVE;
+	fdir_fltr_ctx.swap = ICE_FXD_FLTR_QW1_SWAP_NOT_SET;
+	fdir_fltr_ctx.comp_q = ICE_FXD_FLTR_QW0_COMP_Q_ZERO;
+	fdir_fltr_ctx.comp_report = ICE_FXD_FLTR_QW0_COMP_REPORT_SW_FAIL;
+	fdir_fltr_ctx.fdid_prio = 3;
+	fdir_fltr_ctx.desc_prof = 1;
+	fdir_fltr_ctx.desc_prof_prio = 3;
+	ice_set_fd_desc_val(&fdir_fltr_ctx, fdesc);
+}
+
 /**
  * ice_alloc_fd_res_cntr - obtain counter resource for FD type
  * @hw: pointer to the hardware structure
@@ -64,6 +319,150 @@ int ice_get_fdir_cnt_all(struct ice_hw *hw)
 	return hw->func_caps.fd_fltr_guar + hw->func_caps.fd_fltr_best_effort;
 }
 
+/**
+ * ice_pkt_insert_u16 - insert a be16 value into a memory buffer
+ * @pkt: packet buffer
+ * @offset: offset into buffer
+ * @data: 16 bit value to convert and insert into pkt at offset
+ */
+static void ice_pkt_insert_u16(u8 *pkt, int offset, __be16 data)
+{
+	memcpy(pkt + offset, &data, sizeof(data));
+}
+
+/**
+ * ice_pkt_insert_u32 - insert a be32 value into a memory buffer
+ * @pkt: packet buffer
+ * @offset: offset into buffer
+ * @data: 32 bit value to convert and insert into pkt at offset
+ */
+static void ice_pkt_insert_u32(u8 *pkt, int offset, __be32 data)
+{
+	memcpy(pkt + offset, &data, sizeof(data));
+}
+
+/**
+ * ice_fdir_get_gen_prgm_pkt - generate a training packet
+ * @hw: pointer to the hardware structure
+ * @input: flow director filter data structure
+ * @pkt: pointer to return filter packet
+ * @frag: generate a fragment packet
+ * @tun: true implies generate a tunnel packet
+ */
+enum ice_status
+ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input,
+			  u8 *pkt, bool frag, bool tun)
+{
+	enum ice_fltr_ptype flow;
+	u16 tnl_port;
+	u8 *loc;
+	u16 idx;
+
+	if (input->flow_type == ICE_FLTR_PTYPE_NONF_IPV4_OTHER) {
+		switch (input->ip.proto) {
+		case IPPROTO_TCP:
+			flow = ICE_FLTR_PTYPE_NONF_IPV4_TCP;
+			break;
+		case IPPROTO_UDP:
+			flow = ICE_FLTR_PTYPE_NONF_IPV4_UDP;
+			break;
+		case IPPROTO_SCTP:
+			flow = ICE_FLTR_PTYPE_NONF_IPV4_SCTP;
+			break;
+		case IPPROTO_IP:
+			flow = ICE_FLTR_PTYPE_NONF_IPV4_OTHER;
+			break;
+		default:
+			return ICE_ERR_PARAM;
+		}
+	} else {
+		flow = input->flow_type;
+	}
+
+	for (idx = 0; idx < ICE_FDIR_NUM_PKT; idx++)
+		if (ice_fdir_pkt[idx].flow == flow)
+			break;
+	if (idx == ICE_FDIR_NUM_PKT)
+		return ICE_ERR_PARAM;
+	if (!tun) {
+		memcpy(pkt, ice_fdir_pkt[idx].pkt, ice_fdir_pkt[idx].pkt_len);
+		loc = pkt;
+	} else {
+		if (!ice_get_open_tunnel_port(hw, TNL_ALL, &tnl_port))
+			return ICE_ERR_DOES_NOT_EXIST;
+		if (!ice_fdir_pkt[idx].tun_pkt)
+			return ICE_ERR_PARAM;
+		memcpy(pkt, ice_fdir_pkt[idx].tun_pkt,
+		       ice_fdir_pkt[idx].tun_pkt_len);
+		ice_pkt_insert_u16(pkt, ICE_IPV4_UDP_DST_PORT_OFFSET,
+				   htons(tnl_port));
+		loc = &pkt[ICE_FDIR_TUN_PKT_OFF];
+	}
+
+	/* Reverse the src and dst, since the HW expects them to be from Tx
+	 * perspective. The input from user is from Rx filter perspective.
+	 */
+	switch (flow) {
+	case ICE_FLTR_PTYPE_NONF_IPV4_TCP:
+		ice_pkt_insert_u32(loc, ICE_IPV4_DST_ADDR_OFFSET,
+				   input->ip.src_ip);
+		ice_pkt_insert_u16(loc, ICE_IPV4_TCP_DST_PORT_OFFSET,
+				   input->ip.src_port);
+		ice_pkt_insert_u32(loc, ICE_IPV4_SRC_ADDR_OFFSET,
+				   input->ip.dst_ip);
+		ice_pkt_insert_u16(loc, ICE_IPV4_TCP_SRC_PORT_OFFSET,
+				   input->ip.dst_port);
+		if (frag)
+			loc[20] = ICE_FDIR_IPV4_PKT_FLAG_DF;
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_UDP:
+		ice_pkt_insert_u32(loc, ICE_IPV4_DST_ADDR_OFFSET,
+				   input->ip.src_ip);
+		ice_pkt_insert_u16(loc, ICE_IPV4_UDP_DST_PORT_OFFSET,
+				   input->ip.src_port);
+		ice_pkt_insert_u32(loc, ICE_IPV4_SRC_ADDR_OFFSET,
+				   input->ip.dst_ip);
+		ice_pkt_insert_u16(loc, ICE_IPV4_UDP_SRC_PORT_OFFSET,
+				   input->ip.dst_port);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_SCTP:
+		ice_pkt_insert_u32(loc, ICE_IPV4_DST_ADDR_OFFSET,
+				   input->ip.src_ip);
+		ice_pkt_insert_u16(loc, ICE_IPV4_SCTP_DST_PORT_OFFSET,
+				   input->ip.src_port);
+		ice_pkt_insert_u32(loc, ICE_IPV4_SRC_ADDR_OFFSET,
+				   input->ip.dst_ip);
+		ice_pkt_insert_u16(loc, ICE_IPV4_SCTP_SRC_PORT_OFFSET,
+				   input->ip.dst_port);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_OTHER:
+		ice_pkt_insert_u32(loc, ICE_IPV4_DST_ADDR_OFFSET,
+				   input->ip.src_ip);
+		ice_pkt_insert_u32(loc, ICE_IPV4_SRC_ADDR_OFFSET,
+				   input->ip.dst_ip);
+		ice_pkt_insert_u16(loc, ICE_IPV4_PROTO_OFFSET, 0);
+		break;
+	default:
+		return ICE_ERR_PARAM;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_fdir_has_frag - does flow type have 2 ptypes
+ * @flow: flow ptype
+ *
+ * returns true is there is a fragment packet for this ptype
+ */
+bool ice_fdir_has_frag(enum ice_fltr_ptype flow)
+{
+	if (flow == ICE_FLTR_PTYPE_NONF_IPV4_OTHER)
+		return true;
+	else
+		return false;
+}
+
 /**
  * ice_fdir_find_by_idx - find filter with idx
  * @hw: pointer to hardware structure
@@ -85,3 +484,117 @@ ice_fdir_find_fltr_by_idx(struct ice_hw *hw, u32 fltr_idx)
 	}
 	return NULL;
 }
+
+/**
+ * ice_fdir_list_add_fltr - add a new node to the flow director filter list
+ * @hw: hardware structure
+ * @fltr: filter node to add to structure
+ */
+void ice_fdir_list_add_fltr(struct ice_hw *hw, struct ice_fdir_fltr *fltr)
+{
+	struct ice_fdir_fltr *rule, *parent = NULL;
+
+	list_for_each_entry(rule, &hw->fdir_list_head, fltr_node) {
+		/* rule ID found or pass its spot in the list */
+		if (rule->fltr_id >= fltr->fltr_id)
+			break;
+		parent = rule;
+	}
+
+	if (parent)
+		list_add(&fltr->fltr_node, &parent->fltr_node);
+	else
+		list_add(&fltr->fltr_node, &hw->fdir_list_head);
+}
+
+/**
+ * ice_fdir_update_cntrs - increment / decrement filter counter
+ * @hw: pointer to hardware structure
+ * @flow: filter flow type
+ * @add: true implies filters added
+ */
+void
+ice_fdir_update_cntrs(struct ice_hw *hw, enum ice_fltr_ptype flow, bool add)
+{
+	int incr;
+
+	incr = add ? 1 : -1;
+	hw->fdir_active_fltr += incr;
+
+	if (flow == ICE_FLTR_PTYPE_NONF_NONE || flow >= ICE_FLTR_PTYPE_MAX)
+		ice_debug(hw, ICE_DBG_SW, "Unknown filter type %d\n", flow);
+	else
+		hw->fdir_fltr_cnt[flow] += incr;
+}
+
+/**
+ * ice_fdir_comp_rules - compare 2 filters
+ * @a: a Flow Director filter data structure
+ * @b: a Flow Director filter data structure
+ *
+ * Returns true if the filters match
+ */
+static bool
+ice_fdir_comp_rules(struct ice_fdir_fltr *a,  struct ice_fdir_fltr *b)
+{
+	enum ice_fltr_ptype flow_type = a->flow_type;
+
+	/* The calling function already checks that the two filters have the
+	 * same flow_type.
+	 */
+	if (flow_type == ICE_FLTR_PTYPE_NONF_IPV4_TCP ||
+	    flow_type == ICE_FLTR_PTYPE_NONF_IPV4_UDP ||
+	    flow_type == ICE_FLTR_PTYPE_NONF_IPV4_SCTP) {
+		if (a->ip.dst_ip == b->ip.dst_ip &&
+		    a->ip.src_ip == b->ip.src_ip &&
+		    a->ip.dst_port == b->ip.dst_port &&
+		    a->ip.src_port == b->ip.src_port)
+			return true;
+	} else if (flow_type == ICE_FLTR_PTYPE_NONF_IPV4_OTHER) {
+		if (a->ip.dst_ip == b->ip.dst_ip &&
+		    a->ip.src_ip == b->ip.src_ip &&
+		    a->ip.l4_header == b->ip.l4_header &&
+		    a->ip.proto == b->ip.proto &&
+		    a->ip.ip_ver == b->ip.ip_ver &&
+		    a->ip.tos == b->ip.tos)
+			return true;
+	}
+
+	return false;
+}
+
+/**
+ * ice_fdir_is_dup_fltr - test if filter is already in list for PF
+ * @hw: hardware data structure
+ * @input: Flow Director filter data structure
+ *
+ * Returns true if the filter is found in the list
+ */
+bool ice_fdir_is_dup_fltr(struct ice_hw *hw, struct ice_fdir_fltr *input)
+{
+	struct ice_fdir_fltr *rule;
+	bool ret = false;
+
+	list_for_each_entry(rule, &hw->fdir_list_head, fltr_node) {
+		enum ice_fltr_ptype flow_type;
+
+		if (rule->flow_type != input->flow_type)
+			continue;
+
+		flow_type = input->flow_type;
+		if (flow_type == ICE_FLTR_PTYPE_NONF_IPV4_TCP ||
+		    flow_type == ICE_FLTR_PTYPE_NONF_IPV4_UDP ||
+		    flow_type == ICE_FLTR_PTYPE_NONF_IPV4_SCTP ||
+		    flow_type == ICE_FLTR_PTYPE_NONF_IPV4_OTHER)
+			ret = ice_fdir_comp_rules(rule, input);
+		if (ret) {
+			if (rule->fltr_id == input->fltr_id &&
+			    rule->q_index != input->q_index)
+				ret = false;
+			else
+				break;
+		}
+	}
+
+	return ret;
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.h b/drivers/net/ethernet/intel/ice/ice_fdir.h
index 1b69249b40c1..1a13c80e1eac 100644
--- a/drivers/net/ethernet/intel/ice/ice_fdir.h
+++ b/drivers/net/ethernet/intel/ice/ice_fdir.h
@@ -4,11 +4,70 @@
 #ifndef _ICE_FDIR_H_
 #define _ICE_FDIR_H_
 
+#define ICE_FDIR_TUN_PKT_OFF		50
+#define ICE_FDIR_MAX_RAW_PKT_SIZE	(512 + ICE_FDIR_TUN_PKT_OFF)
+
+/* macros for offsets into packets for flow director programming */
+#define ICE_IPV4_SRC_ADDR_OFFSET	26
+#define ICE_IPV4_DST_ADDR_OFFSET	30
+#define ICE_IPV4_TCP_SRC_PORT_OFFSET	34
+#define ICE_IPV4_TCP_DST_PORT_OFFSET	36
+#define ICE_IPV4_UDP_SRC_PORT_OFFSET	34
+#define ICE_IPV4_UDP_DST_PORT_OFFSET	36
+#define ICE_IPV4_SCTP_SRC_PORT_OFFSET	34
+#define ICE_IPV4_SCTP_DST_PORT_OFFSET	36
+#define ICE_IPV4_PROTO_OFFSET		23
+#define ICE_IPV6_SRC_ADDR_OFFSET	22
+#define ICE_IPV6_DST_ADDR_OFFSET	38
+#define ICE_IPV6_TCP_SRC_PORT_OFFSET	54
+#define ICE_IPV6_TCP_DST_PORT_OFFSET	56
+#define ICE_IPV6_UDP_SRC_PORT_OFFSET	54
+#define ICE_IPV6_UDP_DST_PORT_OFFSET	56
+#define ICE_IPV6_SCTP_SRC_PORT_OFFSET	54
+#define ICE_IPV6_SCTP_DST_PORT_OFFSET	56
+/* IP v4 has 2 flag bits that enable fragment processing: DF and MF. DF
+ * requests that the packet not be fragmented. MF indicates that a packet has
+ * been fragmented.
+ */
+#define ICE_FDIR_IPV4_PKT_FLAG_DF		0x20
+
 enum ice_fltr_prgm_desc_dest {
 	ICE_FLTR_PRGM_DESC_DEST_DROP_PKT,
 	ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_QINDEX,
 };
 
+enum ice_fltr_prgm_desc_fd_status {
+	ICE_FLTR_PRGM_DESC_FD_STATUS_NONE,
+	ICE_FLTR_PRGM_DESC_FD_STATUS_FD_ID,
+};
+
+/* Flow Director (FD) Filter Programming descriptor */
+struct ice_fd_fltr_desc_ctx {
+	u32 fdid;
+	u16 qindex;
+	u16 cnt_index;
+	u16 fd_vsi;
+	u16 flex_val;
+	u8 comp_q;
+	u8 comp_report;
+	u8 fd_space;
+	u8 cnt_ena;
+	u8 evict_ena;
+	u8 toq;
+	u8 toq_prio;
+	u8 dpu_recipe;
+	u8 drop;
+	u8 flex_prio;
+	u8 flex_mdid;
+	u8 dtype;
+	u8 pcmd;
+	u8 desc_prof_prio;
+	u8 desc_prof;
+	u8 swap;
+	u8 fdid_prio;
+	u8 fdid_mdid;
+};
+
 struct ice_fdir_v4 {
 	__be32 dst_ip;
 	__be32 src_ip;
@@ -47,13 +106,33 @@ struct ice_fdir_fltr {
 	u32 fltr_id;
 };
 
+/* Dummy packet filter definition structure */
+struct ice_fdir_base_pkt {
+	enum ice_fltr_ptype flow;
+	u16 pkt_len;
+	const u8 *pkt;
+	u16 tun_pkt_len;
+	const u8 *tun_pkt;
+};
+
 enum ice_status ice_alloc_fd_res_cntr(struct ice_hw *hw, u16 *cntr_id);
 enum ice_status ice_free_fd_res_cntr(struct ice_hw *hw, u16 cntr_id);
 enum ice_status
 ice_alloc_fd_guar_item(struct ice_hw *hw, u16 *cntr_id, u16 num_fltr);
 enum ice_status
 ice_alloc_fd_shrd_item(struct ice_hw *hw, u16 *cntr_id, u16 num_fltr);
+void
+ice_fdir_get_prgm_desc(struct ice_hw *hw, struct ice_fdir_fltr *input,
+		       struct ice_fltr_desc *fdesc, bool add);
+enum ice_status
+ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input,
+			  u8 *pkt, bool frag, bool tun);
 int ice_get_fdir_cnt_all(struct ice_hw *hw);
+bool ice_fdir_is_dup_fltr(struct ice_hw *hw, struct ice_fdir_fltr *input);
+bool ice_fdir_has_frag(enum ice_fltr_ptype flow);
 struct ice_fdir_fltr *
 ice_fdir_find_fltr_by_idx(struct ice_hw *hw, u32 fltr_idx);
+void
+ice_fdir_update_cntrs(struct ice_hw *hw, enum ice_fltr_ptype flow, bool add);
+void ice_fdir_list_add_fltr(struct ice_hw *hw, struct ice_fdir_fltr *input);
 #endif /* _ICE_FDIR_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
index fe2f04f706e7..16d2f599bd70 100644
--- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
+++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
@@ -1632,6 +1632,34 @@ ice_find_free_tunnel_entry(struct ice_hw *hw, enum ice_tunnel_type type,
 	return false;
 }
 
+/**
+ * ice_get_open_tunnel_port - retrieve an open tunnel port
+ * @hw: pointer to the HW structure
+ * @type: tunnel type (TNL_ALL will return any open port)
+ * @port: returns open port
+ */
+bool
+ice_get_open_tunnel_port(struct ice_hw *hw, enum ice_tunnel_type type,
+			 u16 *port)
+{
+	bool res = false;
+	u16 i;
+
+	mutex_lock(&hw->tnl_lock);
+
+	for (i = 0; i < hw->tnl.count && i < ICE_TUNNEL_MAX_ENTRIES; i++)
+		if (hw->tnl.tbl[i].valid && hw->tnl.tbl[i].in_use &&
+		    (type == TNL_ALL || hw->tnl.tbl[i].type == type)) {
+			*port = hw->tnl.tbl[i].port;
+			res = true;
+			break;
+		}
+
+	mutex_unlock(&hw->tnl_lock);
+
+	return res;
+}
+
 /**
  * ice_create_tunnel
  * @hw: pointer to the HW structure
@@ -2332,6 +2360,12 @@ ice_find_prof_id(struct ice_hw *hw, enum ice_block blk,
 	u16 off;
 	u8 i;
 
+	/* For FD, we don't want to re-use a existed profile with the same
+	 * field vector and mask. This will cause rule interference.
+	 */
+	if (blk == ICE_BLK_FD)
+		return ICE_ERR_DOES_NOT_EXIST;
+
 	for (i = 0; i < (u8)es->count; i++) {
 		off = i * es->fvw;
 
diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.h b/drivers/net/ethernet/intel/ice/ice_flex_pipe.h
index 70db213c9fe3..568ea519af51 100644
--- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.h
+++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.h
@@ -18,6 +18,9 @@
 
 #define ICE_PKG_CNT 4
 
+bool
+ice_get_open_tunnel_port(struct ice_hw *hw, enum ice_tunnel_type type,
+			 u16 *port);
 enum ice_status
 ice_create_tunnel(struct ice_hw *hw, enum ice_tunnel_type type, u16 port);
 enum ice_status ice_destroy_tunnel(struct ice_hw *hw, u16 port, bool all);
diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
index 3376cdf5667f..c8b037d25053 100644
--- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
@@ -290,6 +290,9 @@
 #define GL_PWR_MODE_CTL				0x000B820C
 #define GL_PWR_MODE_CTL_CAR_MAX_BW_S		30
 #define GL_PWR_MODE_CTL_CAR_MAX_BW_M		ICE_M(0x3, 30)
+#define GLQF_FD_CNT				0x00460018
+#define GLQF_FD_CNT_FD_BCNT_S			16
+#define GLQF_FD_CNT_FD_BCNT_M			ICE_M(0x7FFF, 16)
 #define GLQF_FD_SIZE				0x00460010
 #define GLQF_FD_SIZE_FD_GSIZE_S			0
 #define GLQF_FD_SIZE_FD_GSIZE_M			ICE_M(0x7FFF, 0)
@@ -355,6 +358,9 @@
 #define GLV_TEPC(_VSI)				(0x00312000 + ((_VSI) * 4))
 #define GLV_UPRCL(_i)				(0x003B2000 + ((_i) * 8))
 #define GLV_UPTCL(_i)				(0x0030A000 + ((_i) * 8))
+#define VSIQF_FD_CNT(_VSI)			(0x00464000 + ((_VSI) * 4))
+#define VSIQF_FD_CNT_FD_GCNT_S			0
+#define VSIQF_FD_CNT_FD_GCNT_M			ICE_M(0x3FFF, 0)
 #define VSIQF_HKEY_MAX_INDEX			12
 #define VSIQF_HLUT_MAX_INDEX			15
 #define VFINT_DYN_CTLN(_i)			(0x00003800 + ((_i) * 4))
diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
index 5d61acdec7ed..bd2cd3435768 100644
--- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
+++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
@@ -40,6 +40,104 @@ union ice_32byte_rx_desc {
 	} wb; /* writeback */
 };
 
+struct ice_fltr_desc {
+	__le64 qidx_compq_space_stat;
+	__le64 dtype_cmd_vsi_fdid;
+};
+
+#define ICE_FXD_FLTR_QW0_QINDEX_S	0
+#define ICE_FXD_FLTR_QW0_QINDEX_M	(0x7FFULL << ICE_FXD_FLTR_QW0_QINDEX_S)
+#define ICE_FXD_FLTR_QW0_COMP_Q_S	11
+#define ICE_FXD_FLTR_QW0_COMP_Q_M	BIT_ULL(ICE_FXD_FLTR_QW0_COMP_Q_S)
+#define ICE_FXD_FLTR_QW0_COMP_Q_ZERO	0x0ULL
+
+#define ICE_FXD_FLTR_QW0_COMP_REPORT_S	12
+#define ICE_FXD_FLTR_QW0_COMP_REPORT_M	\
+				(0x3ULL << ICE_FXD_FLTR_QW0_COMP_REPORT_S)
+#define ICE_FXD_FLTR_QW0_COMP_REPORT_SW_FAIL	0x1ULL
+
+#define ICE_FXD_FLTR_QW0_FD_SPACE_S	14
+#define ICE_FXD_FLTR_QW0_FD_SPACE_M	(0x3ULL << ICE_FXD_FLTR_QW0_FD_SPACE_S)
+#define ICE_FXD_FLTR_QW0_FD_SPACE_GUAR_BEST		0x2ULL
+
+#define ICE_FXD_FLTR_QW0_STAT_CNT_S	16
+#define ICE_FXD_FLTR_QW0_STAT_CNT_M	\
+				(0x1FFFULL << ICE_FXD_FLTR_QW0_STAT_CNT_S)
+#define ICE_FXD_FLTR_QW0_STAT_ENA_S	29
+#define ICE_FXD_FLTR_QW0_STAT_ENA_M	(0x3ULL << ICE_FXD_FLTR_QW0_STAT_ENA_S)
+#define ICE_FXD_FLTR_QW0_STAT_ENA_PKTS		0x1ULL
+
+#define ICE_FXD_FLTR_QW0_EVICT_ENA_S	31
+#define ICE_FXD_FLTR_QW0_EVICT_ENA_M	BIT_ULL(ICE_FXD_FLTR_QW0_EVICT_ENA_S)
+#define ICE_FXD_FLTR_QW0_EVICT_ENA_FALSE	0x0ULL
+#define ICE_FXD_FLTR_QW0_EVICT_ENA_TRUE		0x1ULL
+
+#define ICE_FXD_FLTR_QW0_TO_Q_S		32
+#define ICE_FXD_FLTR_QW0_TO_Q_M		(0x7ULL << ICE_FXD_FLTR_QW0_TO_Q_S)
+#define ICE_FXD_FLTR_QW0_TO_Q_EQUALS_QINDEX	0x0ULL
+
+#define ICE_FXD_FLTR_QW0_TO_Q_PRI_S	35
+#define ICE_FXD_FLTR_QW0_TO_Q_PRI_M	(0x7ULL << ICE_FXD_FLTR_QW0_TO_Q_PRI_S)
+#define ICE_FXD_FLTR_QW0_TO_Q_PRIO1	0x1ULL
+
+#define ICE_FXD_FLTR_QW0_DPU_RECIPE_S	38
+#define ICE_FXD_FLTR_QW0_DPU_RECIPE_M	\
+			(0x3ULL << ICE_FXD_FLTR_QW0_DPU_RECIPE_S)
+#define ICE_FXD_FLTR_QW0_DPU_RECIPE_DFLT	0x0ULL
+
+#define ICE_FXD_FLTR_QW0_DROP_S		40
+#define ICE_FXD_FLTR_QW0_DROP_M		BIT_ULL(ICE_FXD_FLTR_QW0_DROP_S)
+#define ICE_FXD_FLTR_QW0_DROP_NO	0x0ULL
+#define ICE_FXD_FLTR_QW0_DROP_YES	0x1ULL
+
+#define ICE_FXD_FLTR_QW0_FLEX_PRI_S	41
+#define ICE_FXD_FLTR_QW0_FLEX_PRI_M	(0x7ULL << ICE_FXD_FLTR_QW0_FLEX_PRI_S)
+#define ICE_FXD_FLTR_QW0_FLEX_PRI_NONE	0x0ULL
+
+#define ICE_FXD_FLTR_QW0_FLEX_MDID_S	44
+#define ICE_FXD_FLTR_QW0_FLEX_MDID_M	(0xFULL << ICE_FXD_FLTR_QW0_FLEX_MDID_S)
+#define ICE_FXD_FLTR_QW0_FLEX_MDID0	0x0ULL
+
+#define ICE_FXD_FLTR_QW0_FLEX_VAL_S	48
+#define ICE_FXD_FLTR_QW0_FLEX_VAL_M	\
+				(0xFFFFULL << ICE_FXD_FLTR_QW0_FLEX_VAL_S)
+#define ICE_FXD_FLTR_QW0_FLEX_VAL0	0x0ULL
+
+#define ICE_FXD_FLTR_QW1_DTYPE_S	0
+#define ICE_FXD_FLTR_QW1_DTYPE_M	(0xFULL << ICE_FXD_FLTR_QW1_DTYPE_S)
+#define ICE_FXD_FLTR_QW1_PCMD_S		4
+#define ICE_FXD_FLTR_QW1_PCMD_M		BIT_ULL(ICE_FXD_FLTR_QW1_PCMD_S)
+#define ICE_FXD_FLTR_QW1_PCMD_ADD	0x0ULL
+#define ICE_FXD_FLTR_QW1_PCMD_REMOVE	0x1ULL
+
+#define ICE_FXD_FLTR_QW1_PROF_PRI_S	5
+#define ICE_FXD_FLTR_QW1_PROF_PRI_M	(0x7ULL << ICE_FXD_FLTR_QW1_PROF_PRI_S)
+#define ICE_FXD_FLTR_QW1_PROF_PRIO_ZERO	0x0ULL
+
+#define ICE_FXD_FLTR_QW1_PROF_S		8
+#define ICE_FXD_FLTR_QW1_PROF_M		(0x3FULL << ICE_FXD_FLTR_QW1_PROF_S)
+#define ICE_FXD_FLTR_QW1_PROF_ZERO	0x0ULL
+
+#define ICE_FXD_FLTR_QW1_FD_VSI_S	14
+#define ICE_FXD_FLTR_QW1_FD_VSI_M	(0x3FFULL << ICE_FXD_FLTR_QW1_FD_VSI_S)
+#define ICE_FXD_FLTR_QW1_SWAP_S		24
+#define ICE_FXD_FLTR_QW1_SWAP_M		BIT_ULL(ICE_FXD_FLTR_QW1_SWAP_S)
+#define ICE_FXD_FLTR_QW1_SWAP_NOT_SET	0x0ULL
+#define ICE_FXD_FLTR_QW1_SWAP_SET	0x1ULL
+
+#define ICE_FXD_FLTR_QW1_FDID_PRI_S	25
+#define ICE_FXD_FLTR_QW1_FDID_PRI_M	(0x7ULL << ICE_FXD_FLTR_QW1_FDID_PRI_S)
+#define ICE_FXD_FLTR_QW1_FDID_PRI_ONE	0x1ULL
+
+#define ICE_FXD_FLTR_QW1_FDID_MDID_S	28
+#define ICE_FXD_FLTR_QW1_FDID_MDID_M	(0xFULL << ICE_FXD_FLTR_QW1_FDID_MDID_S)
+#define ICE_FXD_FLTR_QW1_FDID_MDID_FD	0x05ULL
+
+#define ICE_FXD_FLTR_QW1_FDID_S		32
+#define ICE_FXD_FLTR_QW1_FDID_M		\
+			(0xFFFFFFFFULL << ICE_FXD_FLTR_QW1_FDID_S)
+#define ICE_FXD_FLTR_QW1_FDID_ZERO	0x0ULL
+
 struct ice_rx_ptype_decoded {
 	u32 ptype:10;
 	u32 known:1;
@@ -346,6 +444,7 @@ struct ice_tx_desc {
 enum ice_tx_desc_dtype_value {
 	ICE_TX_DESC_DTYPE_DATA		= 0x0,
 	ICE_TX_DESC_DTYPE_CTX		= 0x1,
+	ICE_TX_DESC_DTYPE_FLTR_PROG	= 0x8,
 	/* DESC_DONE - HW has completed write-back of descriptor */
 	ICE_TX_DESC_DTYPE_DESC_DONE	= 0xF,
 };
@@ -357,12 +456,14 @@ enum ice_tx_desc_cmd_bits {
 	ICE_TX_DESC_CMD_EOP			= 0x0001,
 	ICE_TX_DESC_CMD_RS			= 0x0002,
 	ICE_TX_DESC_CMD_IL2TAG1			= 0x0008,
+	ICE_TX_DESC_CMD_DUMMY			= 0x0010,
 	ICE_TX_DESC_CMD_IIPT_IPV6		= 0x0020,
 	ICE_TX_DESC_CMD_IIPT_IPV4		= 0x0040,
 	ICE_TX_DESC_CMD_IIPT_IPV4_CSUM		= 0x0060,
 	ICE_TX_DESC_CMD_L4T_EOFT_TCP		= 0x0100,
 	ICE_TX_DESC_CMD_L4T_EOFT_SCTP		= 0x0200,
 	ICE_TX_DESC_CMD_L4T_EOFT_UDP		= 0x0300,
+	ICE_TX_DESC_CMD_RE			= 0x0400,
 };
 
 #define ICE_TXD_QW1_OFFSET_S	16
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index 173a167c96d9..cda7e05bd8ae 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -16,6 +16,88 @@
 #define ICE_RX_HDR_SIZE		256
 
 #define FDIR_DESC_RXDID 0x40
+#define ICE_FDIR_CLEAN_DELAY 10
+
+/**
+ * ice_prgm_fdir_fltr - Program a Flow Director filter
+ * @vsi: VSI to send dummy packet
+ * @fdir_desc: flow director descriptor
+ * @raw_packet: allocated buffer for flow director
+ */
+int
+ice_prgm_fdir_fltr(struct ice_vsi *vsi, struct ice_fltr_desc *fdir_desc,
+		   u8 *raw_packet)
+{
+	struct ice_tx_buf *tx_buf, *first;
+	struct ice_fltr_desc *f_desc;
+	struct ice_tx_desc *tx_desc;
+	struct ice_ring *tx_ring;
+	struct device *dev;
+	dma_addr_t dma;
+	u32 td_cmd;
+	u16 i;
+
+	/* VSI and Tx ring */
+	if (!vsi)
+		return -ENOENT;
+	tx_ring = vsi->tx_rings[0];
+	if (!tx_ring || !tx_ring->desc)
+		return -ENOENT;
+	dev = tx_ring->dev;
+
+	/* we are using two descriptors to add/del a filter and we can wait */
+	for (i = ICE_FDIR_CLEAN_DELAY; ICE_DESC_UNUSED(tx_ring) < 2; i--) {
+		if (!i)
+			return -EAGAIN;
+		msleep_interruptible(1);
+	}
+
+	dma = dma_map_single(dev, raw_packet, ICE_FDIR_MAX_RAW_PKT_SIZE,
+			     DMA_TO_DEVICE);
+
+	if (dma_mapping_error(dev, dma))
+		return -EINVAL;
+
+	/* grab the next descriptor */
+	i = tx_ring->next_to_use;
+	first = &tx_ring->tx_buf[i];
+	f_desc = ICE_TX_FDIRDESC(tx_ring, i);
+	memcpy(f_desc, fdir_desc, sizeof(*f_desc));
+
+	i++;
+	i = (i < tx_ring->count) ? i : 0;
+	tx_desc = ICE_TX_DESC(tx_ring, i);
+	tx_buf = &tx_ring->tx_buf[i];
+
+	i++;
+	tx_ring->next_to_use = (i < tx_ring->count) ? i : 0;
+
+	memset(tx_buf, 0, sizeof(*tx_buf));
+	dma_unmap_len_set(tx_buf, len, ICE_FDIR_MAX_RAW_PKT_SIZE);
+	dma_unmap_addr_set(tx_buf, dma, dma);
+
+	tx_desc->buf_addr = cpu_to_le64(dma);
+	td_cmd = ICE_TXD_LAST_DESC_CMD | ICE_TX_DESC_CMD_DUMMY |
+		 ICE_TX_DESC_CMD_RE;
+
+	tx_buf->tx_flags = ICE_TX_FLAGS_DUMMY_PKT;
+	tx_buf->raw_buf = raw_packet;
+
+	tx_desc->cmd_type_offset_bsz =
+		ice_build_ctob(td_cmd, 0, ICE_FDIR_MAX_RAW_PKT_SIZE, 0);
+
+	/* Force memory write to complete before letting h/w know
+	 * there are new descriptors to fetch.
+	 */
+	wmb();
+
+	/* mark the data descriptor to be watched */
+	first->next_to_watch = tx_desc;
+
+	writel(tx_ring->next_to_use, tx_ring->tail);
+
+	return 0;
+}
 
 /**
  * ice_unmap_and_free_tx_buf - Release a Tx buffer
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index 2209583c993e..7c4030caeea4 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -380,6 +380,9 @@ int ice_setup_rx_ring(struct ice_ring *rx_ring);
 void ice_free_tx_ring(struct ice_ring *tx_ring);
 void ice_free_rx_ring(struct ice_ring *rx_ring);
 int ice_napi_poll(struct napi_struct *napi, int budget);
+int
+ice_prgm_fdir_fltr(struct ice_vsi *vsi, struct ice_fltr_desc *fdir_desc,
+		   u8 *raw_packet);
 int ice_clean_rx_irq(struct ice_ring *rx_ring, int budget);
 void ice_clean_ctrl_tx_irq(struct ice_ring *tx_ring);
 #endif /* _ICE_TXRX_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index 0c14d89f7be9..fcf1f7853a41 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -628,6 +628,12 @@ struct ice_hw {
 	struct mutex fdir_fltr_lock;	/* protect Flow Director */
 	struct list_head fdir_list_head;
 
+	/* Book-keeping of side-band filter count per flow-type.
+	 * This is used to detect and handle input set changes for
+	 * respective flow-type.
+	 */
+	u16 fdir_fltr_cnt[ICE_FLTR_PTYPE_MAX];
+
 	struct ice_fd_hw_prof **fdir_prof;
 	DECLARE_BITMAP(fdir_perfect_fltr, ICE_FLTR_PTYPE_MAX);
 	struct mutex rss_locks;	/* protect RSS configuration */
-- 
cgit v1.2.3-59-g8ed1b


From 165d80d6adab51b6a2f9c40ad0c8d3dec18d7bef Mon Sep 17 00:00:00 2001
From: Henry Tieman <henry.w.tieman@intel.com>
Date: Mon, 11 May 2020 18:01:43 -0700
Subject: ice: Support IPv6 Flow Director filters

Extend supported filters to allow for IPv6 filters.

Supported fields are: src-ip, dst-ip, src-port, and dst-port
Supported flow-types are: tcp6, udp6, sctp6, ip6

Example usage:

ethtool -N eth0 flow-type tcp6 src-port 12 dst-port 13 \
src-ip fce0::1:34 dst-ip fce0::1:35 action 32

Signed-off-by: Henry Tieman <henry.w.tieman@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c | 391 +++++++++++++++++++---
 drivers/net/ethernet/intel/ice/ice_fdir.c         | 303 +++++++++++++++--
 drivers/net/ethernet/intel/ice/ice_fdir.h         |  19 +-
 drivers/net/ethernet/intel/ice/ice_flex_pipe.c    |   6 +
 drivers/net/ethernet/intel/ice/ice_type.h         |   4 +
 5 files changed, 642 insertions(+), 81 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
index 6badf2ef2255..aa85d5ad2477 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
@@ -7,6 +7,24 @@
 #include "ice_lib.h"
 #include "ice_flow.h"
 
+static struct in6_addr full_ipv6_addr_mask = {
+	.in6_u = {
+		.u6_addr8 = {
+			0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+			0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
+		}
+	}
+};
+
+static struct in6_addr zero_ipv6_addr_mask = {
+	.in6_u = {
+		.u6_addr8 = {
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+			0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+		}
+	}
+};
+
 /* calls to ice_flow_add_prof require the number of segments in the array
  * for segs_cnt. In this code that is one more than the index.
  */
@@ -30,6 +48,14 @@ static int ice_fltr_to_ethtool_flow(enum ice_fltr_ptype flow)
 		return SCTP_V4_FLOW;
 	case ICE_FLTR_PTYPE_NONF_IPV4_OTHER:
 		return IPV4_USER_FLOW;
+	case ICE_FLTR_PTYPE_NONF_IPV6_TCP:
+		return TCP_V6_FLOW;
+	case ICE_FLTR_PTYPE_NONF_IPV6_UDP:
+		return UDP_V6_FLOW;
+	case ICE_FLTR_PTYPE_NONF_IPV6_SCTP:
+		return SCTP_V6_FLOW;
+	case ICE_FLTR_PTYPE_NONF_IPV6_OTHER:
+		return IPV6_USER_FLOW;
 	default:
 		/* 0 is undefined ethtool flow */
 		return 0;
@@ -53,6 +79,14 @@ static enum ice_fltr_ptype ice_ethtool_flow_to_fltr(int eth)
 		return ICE_FLTR_PTYPE_NONF_IPV4_SCTP;
 	case IPV4_USER_FLOW:
 		return ICE_FLTR_PTYPE_NONF_IPV4_OTHER;
+	case TCP_V6_FLOW:
+		return ICE_FLTR_PTYPE_NONF_IPV6_TCP;
+	case UDP_V6_FLOW:
+		return ICE_FLTR_PTYPE_NONF_IPV6_UDP;
+	case SCTP_V6_FLOW:
+		return ICE_FLTR_PTYPE_NONF_IPV6_SCTP;
+	case IPV6_USER_FLOW:
+		return ICE_FLTR_PTYPE_NONF_IPV6_OTHER;
 	default:
 		return ICE_FLTR_PTYPE_NONF_NONE;
 	}
@@ -92,28 +126,64 @@ int ice_get_ethtool_fdir_entry(struct ice_hw *hw, struct ethtool_rxnfc *cmd)
 	case IPV4_USER_FLOW:
 		fsp->h_u.usr_ip4_spec.ip_ver = ETH_RX_NFC_IP4;
 		fsp->h_u.usr_ip4_spec.proto = 0;
-		fsp->h_u.usr_ip4_spec.l4_4_bytes = rule->ip.l4_header;
-		fsp->h_u.usr_ip4_spec.tos = rule->ip.tos;
-		fsp->h_u.usr_ip4_spec.ip4src = rule->ip.src_ip;
-		fsp->h_u.usr_ip4_spec.ip4dst = rule->ip.dst_ip;
-		fsp->m_u.usr_ip4_spec.ip4src = rule->mask.src_ip;
-		fsp->m_u.usr_ip4_spec.ip4dst = rule->mask.dst_ip;
+		fsp->h_u.usr_ip4_spec.l4_4_bytes = rule->ip.v4.l4_header;
+		fsp->h_u.usr_ip4_spec.tos = rule->ip.v4.tos;
+		fsp->h_u.usr_ip4_spec.ip4src = rule->ip.v4.src_ip;
+		fsp->h_u.usr_ip4_spec.ip4dst = rule->ip.v4.dst_ip;
+		fsp->m_u.usr_ip4_spec.ip4src = rule->mask.v4.src_ip;
+		fsp->m_u.usr_ip4_spec.ip4dst = rule->mask.v4.dst_ip;
 		fsp->m_u.usr_ip4_spec.ip_ver = 0xFF;
 		fsp->m_u.usr_ip4_spec.proto = 0;
-		fsp->m_u.usr_ip4_spec.l4_4_bytes = rule->mask.l4_header;
-		fsp->m_u.usr_ip4_spec.tos = rule->mask.tos;
+		fsp->m_u.usr_ip4_spec.l4_4_bytes = rule->mask.v4.l4_header;
+		fsp->m_u.usr_ip4_spec.tos = rule->mask.v4.tos;
 		break;
 	case TCP_V4_FLOW:
 	case UDP_V4_FLOW:
 	case SCTP_V4_FLOW:
-		fsp->h_u.tcp_ip4_spec.psrc = rule->ip.src_port;
-		fsp->h_u.tcp_ip4_spec.pdst = rule->ip.dst_port;
-		fsp->h_u.tcp_ip4_spec.ip4src = rule->ip.src_ip;
-		fsp->h_u.tcp_ip4_spec.ip4dst = rule->ip.dst_ip;
-		fsp->m_u.tcp_ip4_spec.psrc = rule->mask.src_port;
-		fsp->m_u.tcp_ip4_spec.pdst = rule->mask.dst_port;
-		fsp->m_u.tcp_ip4_spec.ip4src = rule->mask.src_ip;
-		fsp->m_u.tcp_ip4_spec.ip4dst = rule->mask.dst_ip;
+		fsp->h_u.tcp_ip4_spec.psrc = rule->ip.v4.src_port;
+		fsp->h_u.tcp_ip4_spec.pdst = rule->ip.v4.dst_port;
+		fsp->h_u.tcp_ip4_spec.ip4src = rule->ip.v4.src_ip;
+		fsp->h_u.tcp_ip4_spec.ip4dst = rule->ip.v4.dst_ip;
+		fsp->m_u.tcp_ip4_spec.psrc = rule->mask.v4.src_port;
+		fsp->m_u.tcp_ip4_spec.pdst = rule->mask.v4.dst_port;
+		fsp->m_u.tcp_ip4_spec.ip4src = rule->mask.v4.src_ip;
+		fsp->m_u.tcp_ip4_spec.ip4dst = rule->mask.v4.dst_ip;
+		break;
+	case IPV6_USER_FLOW:
+		fsp->h_u.usr_ip6_spec.l4_4_bytes = rule->ip.v6.l4_header;
+		fsp->h_u.usr_ip6_spec.tclass = rule->ip.v6.tc;
+		fsp->h_u.usr_ip6_spec.l4_proto = rule->ip.v6.proto;
+		memcpy(fsp->h_u.tcp_ip6_spec.ip6src, rule->ip.v6.src_ip,
+		       sizeof(struct in6_addr));
+		memcpy(fsp->h_u.tcp_ip6_spec.ip6dst, rule->ip.v6.dst_ip,
+		       sizeof(struct in6_addr));
+		memcpy(fsp->m_u.tcp_ip6_spec.ip6src, rule->mask.v6.src_ip,
+		       sizeof(struct in6_addr));
+		memcpy(fsp->m_u.tcp_ip6_spec.ip6dst, rule->mask.v6.dst_ip,
+		       sizeof(struct in6_addr));
+		fsp->m_u.usr_ip6_spec.l4_4_bytes = rule->mask.v6.l4_header;
+		fsp->m_u.usr_ip6_spec.tclass = rule->mask.v6.tc;
+		fsp->m_u.usr_ip6_spec.l4_proto = rule->mask.v6.proto;
+		break;
+	case TCP_V6_FLOW:
+	case UDP_V6_FLOW:
+	case SCTP_V6_FLOW:
+		memcpy(fsp->h_u.tcp_ip6_spec.ip6src, rule->ip.v6.src_ip,
+		       sizeof(struct in6_addr));
+		memcpy(fsp->h_u.tcp_ip6_spec.ip6dst, rule->ip.v6.dst_ip,
+		       sizeof(struct in6_addr));
+		fsp->h_u.tcp_ip6_spec.psrc = rule->ip.v6.src_port;
+		fsp->h_u.tcp_ip6_spec.pdst = rule->ip.v6.dst_port;
+		memcpy(fsp->m_u.tcp_ip6_spec.ip6src,
+		       rule->mask.v6.src_ip,
+		       sizeof(struct in6_addr));
+		memcpy(fsp->m_u.tcp_ip6_spec.ip6dst,
+		       rule->mask.v6.dst_ip,
+		       sizeof(struct in6_addr));
+		fsp->m_u.tcp_ip6_spec.psrc = rule->mask.v6.src_port;
+		fsp->m_u.tcp_ip6_spec.pdst = rule->mask.v6.dst_port;
+		fsp->h_u.tcp_ip6_spec.tclass = rule->ip.v6.tc;
+		fsp->m_u.tcp_ip6_spec.tclass = rule->mask.v6.tc;
 		break;
 	default:
 		break;
@@ -441,6 +511,7 @@ err_prof:
 /**
  * ice_set_init_fdir_seg
  * @seg: flow segment for programming
+ * @l3_proto: ICE_FLOW_SEG_HDR_IPV4 or ICE_FLOW_SEG_HDR_IPV6
  * @l4_proto: ICE_FLOW_SEG_HDR_TCP or ICE_FLOW_SEG_HDR_UDP
  *
  * Set the configuration for perfect filters to the provided flow segment for
@@ -449,13 +520,24 @@ err_prof:
  */
 static int
 ice_set_init_fdir_seg(struct ice_flow_seg_info *seg,
+		      enum ice_flow_seg_hdr l3_proto,
 		      enum ice_flow_seg_hdr l4_proto)
 {
-	enum ice_flow_field src_port, dst_port;
+	enum ice_flow_field src_addr, dst_addr, src_port, dst_port;
 
 	if (!seg)
 		return -EINVAL;
 
+	if (l3_proto == ICE_FLOW_SEG_HDR_IPV4) {
+		src_addr = ICE_FLOW_FIELD_IDX_IPV4_SA;
+		dst_addr = ICE_FLOW_FIELD_IDX_IPV4_DA;
+	} else if (l3_proto == ICE_FLOW_SEG_HDR_IPV6) {
+		src_addr = ICE_FLOW_FIELD_IDX_IPV6_SA;
+		dst_addr = ICE_FLOW_FIELD_IDX_IPV6_DA;
+	} else {
+		return -EINVAL;
+	}
+
 	if (l4_proto == ICE_FLOW_SEG_HDR_TCP) {
 		src_port = ICE_FLOW_FIELD_IDX_TCP_SRC_PORT;
 		dst_port = ICE_FLOW_FIELD_IDX_TCP_DST_PORT;
@@ -466,17 +548,15 @@ ice_set_init_fdir_seg(struct ice_flow_seg_info *seg,
 		return -EINVAL;
 	}
 
-	ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_IPV4 | l4_proto);
+	ICE_FLOW_SET_HDRS(seg, l3_proto | l4_proto);
 
 	/* IP source address */
-	ice_flow_set_fld(seg, ICE_FLOW_FIELD_IDX_IPV4_SA,
-			 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
-			 ICE_FLOW_FLD_OFF_INVAL, false);
+	ice_flow_set_fld(seg, src_addr, ICE_FLOW_FLD_OFF_INVAL,
+			 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL, false);
 
 	/* IP destination address */
-	ice_flow_set_fld(seg, ICE_FLOW_FIELD_IDX_IPV4_DA,
-			 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
-			 ICE_FLOW_FLD_OFF_INVAL, false);
+	ice_flow_set_fld(seg, dst_addr, ICE_FLOW_FLD_OFF_INVAL,
+			 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL, false);
 
 	/* Layer 4 source port */
 	ice_flow_set_fld(seg, src_port, ICE_FLOW_FLD_OFF_INVAL,
@@ -521,9 +601,17 @@ ice_create_init_fdir_rule(struct ice_pf *pf, enum ice_fltr_ptype flow)
 	}
 
 	if (flow == ICE_FLTR_PTYPE_NONF_IPV4_TCP)
-		ret = ice_set_init_fdir_seg(seg, ICE_FLOW_SEG_HDR_TCP);
+		ret = ice_set_init_fdir_seg(seg, ICE_FLOW_SEG_HDR_IPV4,
+					    ICE_FLOW_SEG_HDR_TCP);
 	else if (flow == ICE_FLTR_PTYPE_NONF_IPV4_UDP)
-		ret = ice_set_init_fdir_seg(seg, ICE_FLOW_SEG_HDR_UDP);
+		ret = ice_set_init_fdir_seg(seg, ICE_FLOW_SEG_HDR_IPV4,
+					    ICE_FLOW_SEG_HDR_UDP);
+	else if (flow == ICE_FLTR_PTYPE_NONF_IPV6_TCP)
+		ret = ice_set_init_fdir_seg(seg, ICE_FLOW_SEG_HDR_IPV6,
+					    ICE_FLOW_SEG_HDR_TCP);
+	else if (flow == ICE_FLTR_PTYPE_NONF_IPV6_UDP)
+		ret = ice_set_init_fdir_seg(seg, ICE_FLOW_SEG_HDR_IPV6,
+					    ICE_FLOW_SEG_HDR_UDP);
 	else
 		ret = -EINVAL;
 	if (ret)
@@ -694,6 +782,156 @@ ice_set_fdir_ip4_usr_seg(struct ice_flow_seg_info *seg,
 	return 0;
 }
 
+/**
+ * ice_set_fdir_ip6_seg
+ * @seg: flow segment for programming
+ * @tcp_ip6_spec: mask data from ethtool
+ * @l4_proto: Layer 4 protocol to program
+ * @perfect_fltr: only valid on success; returns true if perfect filter,
+ *		  false if not
+ *
+ * Set the mask data into the flow segment to be used to program HW
+ * table based on provided L4 protocol for IPv6
+ */
+static int
+ice_set_fdir_ip6_seg(struct ice_flow_seg_info *seg,
+		     struct ethtool_tcpip6_spec *tcp_ip6_spec,
+		     enum ice_flow_seg_hdr l4_proto, bool *perfect_fltr)
+{
+	enum ice_flow_field src_port, dst_port;
+
+	/* make sure we don't have any empty rule */
+	if (!memcmp(tcp_ip6_spec->ip6src, &zero_ipv6_addr_mask,
+		    sizeof(struct in6_addr)) &&
+	    !memcmp(tcp_ip6_spec->ip6dst, &zero_ipv6_addr_mask,
+		    sizeof(struct in6_addr)) &&
+	    !tcp_ip6_spec->psrc && !tcp_ip6_spec->pdst)
+		return -EINVAL;
+
+	/* filtering on TC not supported */
+	if (tcp_ip6_spec->tclass)
+		return -EOPNOTSUPP;
+
+	if (l4_proto == ICE_FLOW_SEG_HDR_TCP) {
+		src_port = ICE_FLOW_FIELD_IDX_TCP_SRC_PORT;
+		dst_port = ICE_FLOW_FIELD_IDX_TCP_DST_PORT;
+	} else if (l4_proto == ICE_FLOW_SEG_HDR_UDP) {
+		src_port = ICE_FLOW_FIELD_IDX_UDP_SRC_PORT;
+		dst_port = ICE_FLOW_FIELD_IDX_UDP_DST_PORT;
+	} else if (l4_proto == ICE_FLOW_SEG_HDR_SCTP) {
+		src_port = ICE_FLOW_FIELD_IDX_SCTP_SRC_PORT;
+		dst_port = ICE_FLOW_FIELD_IDX_SCTP_DST_PORT;
+	} else {
+		return -EINVAL;
+	}
+
+	*perfect_fltr = true;
+	ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_IPV6 | l4_proto);
+
+	if (!memcmp(tcp_ip6_spec->ip6src, &full_ipv6_addr_mask,
+		    sizeof(struct in6_addr)))
+		ice_flow_set_fld(seg, ICE_FLOW_FIELD_IDX_IPV6_SA,
+				 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+				 ICE_FLOW_FLD_OFF_INVAL, false);
+	else if (!memcmp(tcp_ip6_spec->ip6src, &zero_ipv6_addr_mask,
+			 sizeof(struct in6_addr)))
+		*perfect_fltr = false;
+	else
+		return -EOPNOTSUPP;
+
+	if (!memcmp(tcp_ip6_spec->ip6dst, &full_ipv6_addr_mask,
+		    sizeof(struct in6_addr)))
+		ice_flow_set_fld(seg, ICE_FLOW_FIELD_IDX_IPV6_DA,
+				 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+				 ICE_FLOW_FLD_OFF_INVAL, false);
+	else if (!memcmp(tcp_ip6_spec->ip6dst, &zero_ipv6_addr_mask,
+			 sizeof(struct in6_addr)))
+		*perfect_fltr = false;
+	else
+		return -EOPNOTSUPP;
+
+	/* Layer 4 source port */
+	if (tcp_ip6_spec->psrc == htons(0xFFFF))
+		ice_flow_set_fld(seg, src_port, ICE_FLOW_FLD_OFF_INVAL,
+				 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+				 false);
+	else if (!tcp_ip6_spec->psrc)
+		*perfect_fltr = false;
+	else
+		return -EOPNOTSUPP;
+
+	/* Layer 4 destination port */
+	if (tcp_ip6_spec->pdst == htons(0xFFFF))
+		ice_flow_set_fld(seg, dst_port, ICE_FLOW_FLD_OFF_INVAL,
+				 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+				 false);
+	else if (!tcp_ip6_spec->pdst)
+		*perfect_fltr = false;
+	else
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
+/**
+ * ice_set_fdir_ip6_usr_seg
+ * @seg: flow segment for programming
+ * @usr_ip6_spec: ethtool userdef packet offset
+ * @perfect_fltr: only valid on success; returns true if perfect filter,
+ *		  false if not
+ *
+ * Set the offset data into the flow segment to be used to program HW
+ * table for IPv6
+ */
+static int
+ice_set_fdir_ip6_usr_seg(struct ice_flow_seg_info *seg,
+			 struct ethtool_usrip6_spec *usr_ip6_spec,
+			 bool *perfect_fltr)
+{
+	/* filtering on Layer 4 bytes not supported */
+	if (usr_ip6_spec->l4_4_bytes)
+		return -EOPNOTSUPP;
+	/* filtering on TC not supported */
+	if (usr_ip6_spec->tclass)
+		return -EOPNOTSUPP;
+	/* filtering on Layer 4 protocol not supported */
+	if (usr_ip6_spec->l4_proto)
+		return -EOPNOTSUPP;
+	/* empty rules are not valid */
+	if (!memcmp(usr_ip6_spec->ip6src, &zero_ipv6_addr_mask,
+		    sizeof(struct in6_addr)) &&
+	    !memcmp(usr_ip6_spec->ip6dst, &zero_ipv6_addr_mask,
+		    sizeof(struct in6_addr)))
+		return -EINVAL;
+
+	*perfect_fltr = true;
+	ICE_FLOW_SET_HDRS(seg, ICE_FLOW_SEG_HDR_IPV6);
+
+	if (!memcmp(usr_ip6_spec->ip6src, &full_ipv6_addr_mask,
+		    sizeof(struct in6_addr)))
+		ice_flow_set_fld(seg, ICE_FLOW_FIELD_IDX_IPV6_SA,
+				 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+				 ICE_FLOW_FLD_OFF_INVAL, false);
+	else if (!memcmp(usr_ip6_spec->ip6src, &zero_ipv6_addr_mask,
+			 sizeof(struct in6_addr)))
+		*perfect_fltr = false;
+	else
+		return -EOPNOTSUPP;
+
+	if (!memcmp(usr_ip6_spec->ip6dst, &full_ipv6_addr_mask,
+		    sizeof(struct in6_addr)))
+		ice_flow_set_fld(seg, ICE_FLOW_FIELD_IDX_IPV6_DA,
+				 ICE_FLOW_FLD_OFF_INVAL, ICE_FLOW_FLD_OFF_INVAL,
+				 ICE_FLOW_FLD_OFF_INVAL, false);
+	else if (!memcmp(usr_ip6_spec->ip6dst, &zero_ipv6_addr_mask,
+			 sizeof(struct in6_addr)))
+		*perfect_fltr = false;
+	else
+		return -EOPNOTSUPP;
+
+	return 0;
+}
+
 /**
  * ice_cfg_fdir_xtrct_seq - Configure extraction sequence for the given filter
  * @pf: PF structure
@@ -742,6 +980,25 @@ ice_cfg_fdir_xtrct_seq(struct ice_pf *pf, struct ethtool_rx_flow_spec *fsp)
 		ret = ice_set_fdir_ip4_usr_seg(seg, &fsp->m_u.usr_ip4_spec,
 					       &perfect_filter);
 		break;
+	case TCP_V6_FLOW:
+		ret = ice_set_fdir_ip6_seg(seg, &fsp->m_u.tcp_ip6_spec,
+					   ICE_FLOW_SEG_HDR_TCP,
+					   &perfect_filter);
+		break;
+	case UDP_V6_FLOW:
+		ret = ice_set_fdir_ip6_seg(seg, &fsp->m_u.tcp_ip6_spec,
+					   ICE_FLOW_SEG_HDR_UDP,
+					   &perfect_filter);
+		break;
+	case SCTP_V6_FLOW:
+		ret = ice_set_fdir_ip6_seg(seg, &fsp->m_u.tcp_ip6_spec,
+					   ICE_FLOW_SEG_HDR_SCTP,
+					   &perfect_filter);
+		break;
+	case IPV6_USER_FLOW:
+		ret = ice_set_fdir_ip6_usr_seg(seg, &fsp->m_u.usr_ip6_spec,
+					       &perfect_filter);
+		break;
 	default:
 		ret = -EINVAL;
 	}
@@ -910,6 +1167,14 @@ int ice_fdir_create_dflt_rules(struct ice_pf *pf)
 		return err;
 
 	err = ice_create_init_fdir_rule(pf, ICE_FLTR_PTYPE_NONF_IPV4_UDP);
+	if (err)
+		return err;
+
+	err = ice_create_init_fdir_rule(pf, ICE_FLTR_PTYPE_NONF_IPV6_TCP);
+	if (err)
+		return err;
+
+	err = ice_create_init_fdir_rule(pf, ICE_FLTR_PTYPE_NONF_IPV6_UDP);
 
 	return err;
 }
@@ -1094,28 +1359,62 @@ ice_set_fdir_input_set(struct ice_vsi *vsi, struct ethtool_rx_flow_spec *fsp,
 	case TCP_V4_FLOW:
 	case UDP_V4_FLOW:
 	case SCTP_V4_FLOW:
-		input->ip.dst_port = fsp->h_u.tcp_ip4_spec.pdst;
-		input->ip.src_port = fsp->h_u.tcp_ip4_spec.psrc;
-		input->ip.dst_ip = fsp->h_u.tcp_ip4_spec.ip4dst;
-		input->ip.src_ip = fsp->h_u.tcp_ip4_spec.ip4src;
-		input->mask.dst_port = fsp->m_u.tcp_ip4_spec.pdst;
-		input->mask.src_port = fsp->m_u.tcp_ip4_spec.psrc;
-		input->mask.dst_ip = fsp->m_u.tcp_ip4_spec.ip4dst;
-		input->mask.src_ip = fsp->m_u.tcp_ip4_spec.ip4src;
+		input->ip.v4.dst_port = fsp->h_u.tcp_ip4_spec.pdst;
+		input->ip.v4.src_port = fsp->h_u.tcp_ip4_spec.psrc;
+		input->ip.v4.dst_ip = fsp->h_u.tcp_ip4_spec.ip4dst;
+		input->ip.v4.src_ip = fsp->h_u.tcp_ip4_spec.ip4src;
+		input->mask.v4.dst_port = fsp->m_u.tcp_ip4_spec.pdst;
+		input->mask.v4.src_port = fsp->m_u.tcp_ip4_spec.psrc;
+		input->mask.v4.dst_ip = fsp->m_u.tcp_ip4_spec.ip4dst;
+		input->mask.v4.src_ip = fsp->m_u.tcp_ip4_spec.ip4src;
 		break;
 	case IPV4_USER_FLOW:
-		input->ip.dst_ip = fsp->h_u.usr_ip4_spec.ip4dst;
-		input->ip.src_ip = fsp->h_u.usr_ip4_spec.ip4src;
-		input->ip.l4_header = fsp->h_u.usr_ip4_spec.l4_4_bytes;
-		input->ip.proto = fsp->h_u.usr_ip4_spec.proto;
-		input->ip.ip_ver = fsp->h_u.usr_ip4_spec.ip_ver;
-		input->ip.tos = fsp->h_u.usr_ip4_spec.tos;
-		input->mask.dst_ip = fsp->m_u.usr_ip4_spec.ip4dst;
-		input->mask.src_ip = fsp->m_u.usr_ip4_spec.ip4src;
-		input->mask.l4_header = fsp->m_u.usr_ip4_spec.l4_4_bytes;
-		input->mask.proto = fsp->m_u.usr_ip4_spec.proto;
-		input->mask.ip_ver = fsp->m_u.usr_ip4_spec.ip_ver;
-		input->mask.tos = fsp->m_u.usr_ip4_spec.tos;
+		input->ip.v4.dst_ip = fsp->h_u.usr_ip4_spec.ip4dst;
+		input->ip.v4.src_ip = fsp->h_u.usr_ip4_spec.ip4src;
+		input->ip.v4.l4_header = fsp->h_u.usr_ip4_spec.l4_4_bytes;
+		input->ip.v4.proto = fsp->h_u.usr_ip4_spec.proto;
+		input->ip.v4.ip_ver = fsp->h_u.usr_ip4_spec.ip_ver;
+		input->ip.v4.tos = fsp->h_u.usr_ip4_spec.tos;
+		input->mask.v4.dst_ip = fsp->m_u.usr_ip4_spec.ip4dst;
+		input->mask.v4.src_ip = fsp->m_u.usr_ip4_spec.ip4src;
+		input->mask.v4.l4_header = fsp->m_u.usr_ip4_spec.l4_4_bytes;
+		input->mask.v4.proto = fsp->m_u.usr_ip4_spec.proto;
+		input->mask.v4.ip_ver = fsp->m_u.usr_ip4_spec.ip_ver;
+		input->mask.v4.tos = fsp->m_u.usr_ip4_spec.tos;
+		break;
+	case TCP_V6_FLOW:
+	case UDP_V6_FLOW:
+	case SCTP_V6_FLOW:
+		memcpy(input->ip.v6.dst_ip, fsp->h_u.usr_ip6_spec.ip6dst,
+		       sizeof(struct in6_addr));
+		memcpy(input->ip.v6.src_ip, fsp->h_u.usr_ip6_spec.ip6src,
+		       sizeof(struct in6_addr));
+		input->ip.v6.dst_port = fsp->h_u.tcp_ip6_spec.pdst;
+		input->ip.v6.src_port = fsp->h_u.tcp_ip6_spec.psrc;
+		input->ip.v6.tc = fsp->h_u.tcp_ip6_spec.tclass;
+		memcpy(input->mask.v6.dst_ip, fsp->m_u.tcp_ip6_spec.ip6dst,
+		       sizeof(struct in6_addr));
+		memcpy(input->mask.v6.src_ip, fsp->m_u.tcp_ip6_spec.ip6src,
+		       sizeof(struct in6_addr));
+		input->mask.v6.dst_port = fsp->m_u.tcp_ip6_spec.pdst;
+		input->mask.v6.src_port = fsp->m_u.tcp_ip6_spec.psrc;
+		input->mask.v6.tc = fsp->m_u.tcp_ip6_spec.tclass;
+		break;
+	case IPV6_USER_FLOW:
+		memcpy(input->ip.v6.dst_ip, fsp->h_u.usr_ip6_spec.ip6dst,
+		       sizeof(struct in6_addr));
+		memcpy(input->ip.v6.src_ip, fsp->h_u.usr_ip6_spec.ip6src,
+		       sizeof(struct in6_addr));
+		input->ip.v6.l4_header = fsp->h_u.usr_ip6_spec.l4_4_bytes;
+		input->ip.v6.tc = fsp->h_u.usr_ip6_spec.tclass;
+		input->ip.v6.proto = fsp->h_u.usr_ip6_spec.l4_proto;
+		memcpy(input->mask.v6.dst_ip, fsp->m_u.usr_ip6_spec.ip6dst,
+		       sizeof(struct in6_addr));
+		memcpy(input->mask.v6.src_ip, fsp->m_u.usr_ip6_spec.ip6src,
+		       sizeof(struct in6_addr));
+		input->mask.v6.l4_header = fsp->m_u.usr_ip6_spec.l4_4_bytes;
+		input->mask.v6.tc = fsp->m_u.usr_ip6_spec.tclass;
+		input->mask.v6.proto = fsp->m_u.usr_ip6_spec.l4_proto;
 		break;
 	default:
 		/* not doing un-parsed flow types */
diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.c b/drivers/net/ethernet/intel/ice/ice_fdir.c
index 60a824363f06..d50cc6e9086e 100644
--- a/drivers/net/ethernet/intel/ice/ice_fdir.c
+++ b/drivers/net/ethernet/intel/ice/ice_fdir.c
@@ -40,6 +40,52 @@ static const u8 ice_fdir_ipv4_pkt[] = {
 	0x00, 0x00
 };
 
+static const u8 ice_fdir_tcpv6_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00,
+	0x00, 0x00, 0x00, 0x14, 0x06, 0x40, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x50, 0x00, 0x20, 0x00, 0x00, 0x00,
+	0x00, 0x00,
+};
+
+static const u8 ice_fdir_udpv6_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00,
+	0x00, 0x00, 0x00, 0x08, 0x11, 0x40, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x08, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_sctpv6_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00,
+	0x00, 0x00, 0x00, 0x0C, 0x84, 0x40, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00,
+};
+
+static const u8 ice_fdir_ipv6_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x86, 0xDD, 0x60, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x3B, 0x40, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
 static const u8 ice_fdir_tcp4_tun_pkt[] = {
 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
 	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
@@ -100,6 +146,76 @@ static const u8 ice_fdir_ip4_tun_pkt[] = {
 	0x00, 0x00, 0x00, 0x00,
 };
 
+static const u8 ice_fdir_tcp6_tun_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x6e, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x04, 0x00, 0x00, 0x03, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0xdd,
+	0x60, 0x00, 0x00, 0x00, 0x00, 0x14, 0x06, 0x40,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x50, 0x00, 0x20, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_udp6_tun_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x62, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x04, 0x00, 0x00, 0x03, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0xdd,
+	0x60, 0x00, 0x00, 0x00, 0x00, 0x08, 0x11, 0x40,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_sctp6_tun_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x66, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x04, 0x00, 0x00, 0x03, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0xdd,
+	0x60, 0x00, 0x00, 0x00, 0x00, 0x0c, 0x84, 0x40,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00,
+};
+
+static const u8 ice_fdir_ip6_tun_pkt[] = {
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x08, 0x00, 0x45, 0x00,
+	0x00, 0x5a, 0x00, 0x00, 0x40, 0x00, 0x40, 0x11,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x04, 0x00, 0x00, 0x03, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x86, 0xdd,
+	0x60, 0x00, 0x00, 0x00, 0x00, 0x00, 0x3b, 0x40,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
+};
+
 /* Flow Director no-op training packet table */
 static const struct ice_fdir_base_pkt ice_fdir_pkt[] = {
 	{
@@ -122,6 +238,26 @@ static const struct ice_fdir_base_pkt ice_fdir_pkt[] = {
 		sizeof(ice_fdir_ipv4_pkt), ice_fdir_ipv4_pkt,
 		sizeof(ice_fdir_ip4_tun_pkt), ice_fdir_ip4_tun_pkt,
 	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV6_TCP,
+		sizeof(ice_fdir_tcpv6_pkt), ice_fdir_tcpv6_pkt,
+		sizeof(ice_fdir_tcp6_tun_pkt), ice_fdir_tcp6_tun_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV6_UDP,
+		sizeof(ice_fdir_udpv6_pkt), ice_fdir_udpv6_pkt,
+		sizeof(ice_fdir_udp6_tun_pkt), ice_fdir_udp6_tun_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV6_SCTP,
+		sizeof(ice_fdir_sctpv6_pkt), ice_fdir_sctpv6_pkt,
+		sizeof(ice_fdir_sctp6_tun_pkt), ice_fdir_sctp6_tun_pkt,
+	},
+	{
+		ICE_FLTR_PTYPE_NONF_IPV6_OTHER,
+		sizeof(ice_fdir_ipv6_pkt), ice_fdir_ipv6_pkt,
+		sizeof(ice_fdir_ip6_tun_pkt), ice_fdir_ip6_tun_pkt,
+	},
 };
 
 #define ICE_FDIR_NUM_PKT ARRAY_SIZE(ice_fdir_pkt)
@@ -319,6 +455,21 @@ int ice_get_fdir_cnt_all(struct ice_hw *hw)
 	return hw->func_caps.fd_fltr_guar + hw->func_caps.fd_fltr_best_effort;
 }
 
+/**
+ * ice_pkt_insert_ipv6_addr - insert a be32 IPv6 address into a memory buffer
+ * @pkt: packet buffer
+ * @offset: offset into buffer
+ * @addr: IPv6 address to convert and insert into pkt at offset
+ */
+static void ice_pkt_insert_ipv6_addr(u8 *pkt, int offset, __be32 *addr)
+{
+	int idx;
+
+	for (idx = 0; idx < ICE_IPV6_ADDR_LEN_AS_U32; idx++)
+		memcpy(pkt + offset + idx * sizeof(*addr), &addr[idx],
+		       sizeof(*addr));
+}
+
 /**
  * ice_pkt_insert_u16 - insert a be16 value into a memory buffer
  * @pkt: packet buffer
@@ -359,7 +510,7 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input,
 	u16 idx;
 
 	if (input->flow_type == ICE_FLTR_PTYPE_NONF_IPV4_OTHER) {
-		switch (input->ip.proto) {
+		switch (input->ip.v4.proto) {
 		case IPPROTO_TCP:
 			flow = ICE_FLTR_PTYPE_NONF_IPV4_TCP;
 			break;
@@ -375,6 +526,23 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input,
 		default:
 			return ICE_ERR_PARAM;
 		}
+	} else if (input->flow_type == ICE_FLTR_PTYPE_NONF_IPV6_OTHER) {
+		switch (input->ip.v6.proto) {
+		case IPPROTO_TCP:
+			flow = ICE_FLTR_PTYPE_NONF_IPV6_TCP;
+			break;
+		case IPPROTO_UDP:
+			flow = ICE_FLTR_PTYPE_NONF_IPV6_UDP;
+			break;
+		case IPPROTO_SCTP:
+			flow = ICE_FLTR_PTYPE_NONF_IPV6_SCTP;
+			break;
+		case IPPROTO_IP:
+			flow = ICE_FLTR_PTYPE_NONF_IPV6_OTHER;
+			break;
+		default:
+			return ICE_ERR_PARAM;
+		}
 	} else {
 		flow = input->flow_type;
 	}
@@ -405,43 +573,79 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input,
 	switch (flow) {
 	case ICE_FLTR_PTYPE_NONF_IPV4_TCP:
 		ice_pkt_insert_u32(loc, ICE_IPV4_DST_ADDR_OFFSET,
-				   input->ip.src_ip);
+				   input->ip.v4.src_ip);
 		ice_pkt_insert_u16(loc, ICE_IPV4_TCP_DST_PORT_OFFSET,
-				   input->ip.src_port);
+				   input->ip.v4.src_port);
 		ice_pkt_insert_u32(loc, ICE_IPV4_SRC_ADDR_OFFSET,
-				   input->ip.dst_ip);
+				   input->ip.v4.dst_ip);
 		ice_pkt_insert_u16(loc, ICE_IPV4_TCP_SRC_PORT_OFFSET,
-				   input->ip.dst_port);
+				   input->ip.v4.dst_port);
 		if (frag)
 			loc[20] = ICE_FDIR_IPV4_PKT_FLAG_DF;
 		break;
 	case ICE_FLTR_PTYPE_NONF_IPV4_UDP:
 		ice_pkt_insert_u32(loc, ICE_IPV4_DST_ADDR_OFFSET,
-				   input->ip.src_ip);
+				   input->ip.v4.src_ip);
 		ice_pkt_insert_u16(loc, ICE_IPV4_UDP_DST_PORT_OFFSET,
-				   input->ip.src_port);
+				   input->ip.v4.src_port);
 		ice_pkt_insert_u32(loc, ICE_IPV4_SRC_ADDR_OFFSET,
-				   input->ip.dst_ip);
+				   input->ip.v4.dst_ip);
 		ice_pkt_insert_u16(loc, ICE_IPV4_UDP_SRC_PORT_OFFSET,
-				   input->ip.dst_port);
+				   input->ip.v4.dst_port);
 		break;
 	case ICE_FLTR_PTYPE_NONF_IPV4_SCTP:
 		ice_pkt_insert_u32(loc, ICE_IPV4_DST_ADDR_OFFSET,
-				   input->ip.src_ip);
+				   input->ip.v4.src_ip);
 		ice_pkt_insert_u16(loc, ICE_IPV4_SCTP_DST_PORT_OFFSET,
-				   input->ip.src_port);
+				   input->ip.v4.src_port);
 		ice_pkt_insert_u32(loc, ICE_IPV4_SRC_ADDR_OFFSET,
-				   input->ip.dst_ip);
+				   input->ip.v4.dst_ip);
 		ice_pkt_insert_u16(loc, ICE_IPV4_SCTP_SRC_PORT_OFFSET,
-				   input->ip.dst_port);
+				   input->ip.v4.dst_port);
 		break;
 	case ICE_FLTR_PTYPE_NONF_IPV4_OTHER:
 		ice_pkt_insert_u32(loc, ICE_IPV4_DST_ADDR_OFFSET,
-				   input->ip.src_ip);
+				   input->ip.v4.src_ip);
 		ice_pkt_insert_u32(loc, ICE_IPV4_SRC_ADDR_OFFSET,
-				   input->ip.dst_ip);
+				   input->ip.v4.dst_ip);
 		ice_pkt_insert_u16(loc, ICE_IPV4_PROTO_OFFSET, 0);
 		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_TCP:
+		ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_DST_ADDR_OFFSET,
+					 input->ip.v6.src_ip);
+		ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_SRC_ADDR_OFFSET,
+					 input->ip.v6.dst_ip);
+		ice_pkt_insert_u16(loc, ICE_IPV6_TCP_DST_PORT_OFFSET,
+				   input->ip.v6.src_port);
+		ice_pkt_insert_u16(loc, ICE_IPV6_TCP_SRC_PORT_OFFSET,
+				   input->ip.v6.dst_port);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_UDP:
+		ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_DST_ADDR_OFFSET,
+					 input->ip.v6.src_ip);
+		ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_SRC_ADDR_OFFSET,
+					 input->ip.v6.dst_ip);
+		ice_pkt_insert_u16(loc, ICE_IPV6_UDP_DST_PORT_OFFSET,
+				   input->ip.v6.src_port);
+		ice_pkt_insert_u16(loc, ICE_IPV6_UDP_SRC_PORT_OFFSET,
+				   input->ip.v6.dst_port);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_SCTP:
+		ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_DST_ADDR_OFFSET,
+					 input->ip.v6.src_ip);
+		ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_SRC_ADDR_OFFSET,
+					 input->ip.v6.dst_ip);
+		ice_pkt_insert_u16(loc, ICE_IPV6_SCTP_DST_PORT_OFFSET,
+				   input->ip.v6.src_port);
+		ice_pkt_insert_u16(loc, ICE_IPV6_SCTP_SRC_PORT_OFFSET,
+				   input->ip.v6.dst_port);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_OTHER:
+		ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_DST_ADDR_OFFSET,
+					 input->ip.v6.src_ip);
+		ice_pkt_insert_ipv6_addr(loc, ICE_IPV6_SRC_ADDR_OFFSET,
+					 input->ip.v6.dst_ip);
+		break;
 	default:
 		return ICE_ERR_PARAM;
 	}
@@ -527,37 +731,68 @@ ice_fdir_update_cntrs(struct ice_hw *hw, enum ice_fltr_ptype flow, bool add)
 		hw->fdir_fltr_cnt[flow] += incr;
 }
 
+/**
+ * ice_cmp_ipv6_addr - compare 2 IP v6 addresses
+ * @a: IP v6 address
+ * @b: IP v6 address
+ *
+ * Returns 0 on equal, returns non-0 if different
+ */
+static int ice_cmp_ipv6_addr(__be32 *a, __be32 *b)
+{
+	return memcmp(a, b, 4 * sizeof(__be32));
+}
+
 /**
  * ice_fdir_comp_rules - compare 2 filters
  * @a: a Flow Director filter data structure
  * @b: a Flow Director filter data structure
+ * @v6: bool true if v6 filter
  *
  * Returns true if the filters match
  */
 static bool
-ice_fdir_comp_rules(struct ice_fdir_fltr *a,  struct ice_fdir_fltr *b)
+ice_fdir_comp_rules(struct ice_fdir_fltr *a,  struct ice_fdir_fltr *b, bool v6)
 {
 	enum ice_fltr_ptype flow_type = a->flow_type;
 
 	/* The calling function already checks that the two filters have the
 	 * same flow_type.
 	 */
-	if (flow_type == ICE_FLTR_PTYPE_NONF_IPV4_TCP ||
-	    flow_type == ICE_FLTR_PTYPE_NONF_IPV4_UDP ||
-	    flow_type == ICE_FLTR_PTYPE_NONF_IPV4_SCTP) {
-		if (a->ip.dst_ip == b->ip.dst_ip &&
-		    a->ip.src_ip == b->ip.src_ip &&
-		    a->ip.dst_port == b->ip.dst_port &&
-		    a->ip.src_port == b->ip.src_port)
-			return true;
-	} else if (flow_type == ICE_FLTR_PTYPE_NONF_IPV4_OTHER) {
-		if (a->ip.dst_ip == b->ip.dst_ip &&
-		    a->ip.src_ip == b->ip.src_ip &&
-		    a->ip.l4_header == b->ip.l4_header &&
-		    a->ip.proto == b->ip.proto &&
-		    a->ip.ip_ver == b->ip.ip_ver &&
-		    a->ip.tos == b->ip.tos)
-			return true;
+	if (!v6) {
+		if (flow_type == ICE_FLTR_PTYPE_NONF_IPV4_TCP ||
+		    flow_type == ICE_FLTR_PTYPE_NONF_IPV4_UDP ||
+		    flow_type == ICE_FLTR_PTYPE_NONF_IPV4_SCTP) {
+			if (a->ip.v4.dst_ip == b->ip.v4.dst_ip &&
+			    a->ip.v4.src_ip == b->ip.v4.src_ip &&
+			    a->ip.v4.dst_port == b->ip.v4.dst_port &&
+			    a->ip.v4.src_port == b->ip.v4.src_port)
+				return true;
+		} else if (flow_type == ICE_FLTR_PTYPE_NONF_IPV4_OTHER) {
+			if (a->ip.v4.dst_ip == b->ip.v4.dst_ip &&
+			    a->ip.v4.src_ip == b->ip.v4.src_ip &&
+			    a->ip.v4.l4_header == b->ip.v4.l4_header &&
+			    a->ip.v4.proto == b->ip.v4.proto &&
+			    a->ip.v4.ip_ver == b->ip.v4.ip_ver &&
+			    a->ip.v4.tos == b->ip.v4.tos)
+				return true;
+		}
+	} else {
+		if (flow_type == ICE_FLTR_PTYPE_NONF_IPV6_UDP ||
+		    flow_type == ICE_FLTR_PTYPE_NONF_IPV6_TCP ||
+		    flow_type == ICE_FLTR_PTYPE_NONF_IPV6_SCTP) {
+			if (a->ip.v6.dst_port == b->ip.v6.dst_port &&
+			    a->ip.v6.src_port == b->ip.v6.src_port &&
+			    !ice_cmp_ipv6_addr(a->ip.v6.dst_ip,
+					       b->ip.v6.dst_ip) &&
+			    !ice_cmp_ipv6_addr(a->ip.v6.src_ip,
+					       b->ip.v6.src_ip))
+				return true;
+		} else if (flow_type == ICE_FLTR_PTYPE_NONF_IPV6_OTHER) {
+			if (a->ip.v6.dst_port == b->ip.v6.dst_port &&
+			    a->ip.v6.src_port == b->ip.v6.src_port)
+				return true;
+		}
 	}
 
 	return false;
@@ -586,7 +821,9 @@ bool ice_fdir_is_dup_fltr(struct ice_hw *hw, struct ice_fdir_fltr *input)
 		    flow_type == ICE_FLTR_PTYPE_NONF_IPV4_UDP ||
 		    flow_type == ICE_FLTR_PTYPE_NONF_IPV4_SCTP ||
 		    flow_type == ICE_FLTR_PTYPE_NONF_IPV4_OTHER)
-			ret = ice_fdir_comp_rules(rule, input);
+			ret = ice_fdir_comp_rules(rule, input, false);
+		else
+			ret = ice_fdir_comp_rules(rule, input, true);
 		if (ret) {
 			if (rule->fltr_id == input->fltr_id &&
 			    rule->q_index != input->q_index)
diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.h b/drivers/net/ethernet/intel/ice/ice_fdir.h
index 1a13c80e1eac..977dcbc1400d 100644
--- a/drivers/net/ethernet/intel/ice/ice_fdir.h
+++ b/drivers/net/ethernet/intel/ice/ice_fdir.h
@@ -80,6 +80,19 @@ struct ice_fdir_v4 {
 	u8 proto;
 };
 
+#define ICE_IPV6_ADDR_LEN_AS_U32		4
+
+struct ice_fdir_v6 {
+	__be32 dst_ip[ICE_IPV6_ADDR_LEN_AS_U32];
+	__be32 src_ip[ICE_IPV6_ADDR_LEN_AS_U32];
+	__be16 dst_port;
+	__be16 src_port;
+	__be32 l4_header; /* next header */
+	__be32 sec_parm_idx; /* security parameter index */
+	u8 tc;
+	u8 proto;
+};
+
 struct ice_fdir_extra {
 	u8 dst_mac[ETH_ALEN];	/* dest MAC address */
 	u32 usr_def[2];		/* user data */
@@ -91,8 +104,10 @@ struct ice_fdir_fltr {
 	struct list_head fltr_node;
 	enum ice_fltr_ptype flow_type;
 
-	struct ice_fdir_v4 ip;
-	struct ice_fdir_v4 mask;
+	union {
+		struct ice_fdir_v4 v4;
+		struct ice_fdir_v6 v6;
+	} ip, mask;
 
 	struct ice_fdir_extra ext_data;
 	struct ice_fdir_extra ext_mask;
diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
index 16d2f599bd70..da82783d1571 100644
--- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
+++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
@@ -3518,6 +3518,12 @@ static const struct ice_fd_src_dst_pair ice_fd_pairs[] = {
 	{ ICE_PROT_IPV4_IL, 2, 12 },
 	{ ICE_PROT_IPV4_IL, 2, 16 },
 
+	{ ICE_PROT_IPV6_OF_OR_S, 8, 8 },
+	{ ICE_PROT_IPV6_OF_OR_S, 8, 24 },
+
+	{ ICE_PROT_IPV6_IL, 8, 8 },
+	{ ICE_PROT_IPV6_IL, 8, 24 },
+
 	{ ICE_PROT_TCP_IL, 1, 0 },
 	{ ICE_PROT_TCP_IL, 1, 2 },
 
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index fcf1f7853a41..f1bfe8c94f1f 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -171,6 +171,10 @@ enum ice_fltr_ptype {
 	ICE_FLTR_PTYPE_NONF_IPV4_SCTP,
 	ICE_FLTR_PTYPE_NONF_IPV4_OTHER,
 	ICE_FLTR_PTYPE_FRAG_IPV4,
+	ICE_FLTR_PTYPE_NONF_IPV6_UDP,
+	ICE_FLTR_PTYPE_NONF_IPV6_TCP,
+	ICE_FLTR_PTYPE_NONF_IPV6_SCTP,
+	ICE_FLTR_PTYPE_NONF_IPV6_OTHER,
 	ICE_FLTR_PTYPE_MAX,
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From 2c57ffcb199004825184f96081fbf036d3b2426a Mon Sep 17 00:00:00 2001
From: Henry Tieman <henry.w.tieman@intel.com>
Date: Mon, 11 May 2020 18:01:44 -0700
Subject: ice: Enable flex-bytes support

Flex-bytes allows for packet matching based on an offset and value. This
is supported via the ethtool user-def option.  It is specified by providing
an offset followed by a 2 byte match value. Offset is measured from the
start of the MAC address.

The following restrictions apply to flex-bytes. The specified offset must
be an even number and be smaller than 0x1fe.

Example usage:

ethtool -N eth0 flow-type tcp4 src-ip 192.168.0.55 dst-ip 172.16.0.55 \
src-port 12 dst-port 13 user-def 0x10ffff action 32

Signed-off-by: Henry Tieman <henry.w.tieman@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c  |  88 +++++++++++-
 drivers/net/ethernet/intel/ice/ice_fdir.c          |   3 +
 drivers/net/ethernet/intel/ice/ice_fdir.h          |  13 ++
 drivers/net/ethernet/intel/ice/ice_flow.c          | 150 +++++++++++++++++++++
 drivers/net/ethernet/intel/ice/ice_flow.h          |  12 ++
 drivers/net/ethernet/intel/ice/ice_protocol_type.h |   1 +
 6 files changed, 265 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
index aa85d5ad2477..f240c062860b 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
@@ -92,6 +92,19 @@ static enum ice_fltr_ptype ice_ethtool_flow_to_fltr(int eth)
 	}
 }
 
+/**
+ * ice_is_mask_valid - check mask field set
+ * @mask: full mask to check
+ * @field: field for which mask should be valid
+ *
+ * If the mask is fully set return true. If it is not valid for field return
+ * false.
+ */
+static bool ice_is_mask_valid(u64 mask, u64 field)
+{
+	return (mask & field) == field;
+}
+
 /**
  * ice_get_ethtool_fdir_entry - fill ethtool structure with fdir filter data
  * @hw: hardware structure that contains filter list
@@ -335,6 +348,53 @@ void ice_fdir_release_flows(struct ice_hw *hw)
 		ice_fdir_erase_flow_from_hw(hw, ICE_BLK_FD, flow);
 }
 
+/**
+ * ice_parse_rx_flow_user_data - deconstruct user-defined data
+ * @fsp: pointer to ethtool Rx flow specification
+ * @data: pointer to userdef data structure for storage
+ *
+ * Returns 0 on success, negative error value on failure
+ */
+static int
+ice_parse_rx_flow_user_data(struct ethtool_rx_flow_spec *fsp,
+			    struct ice_rx_flow_userdef *data)
+{
+	u64 value, mask;
+
+	memset(data, 0, sizeof(*data));
+	if (!(fsp->flow_type & FLOW_EXT))
+		return 0;
+
+	value = be64_to_cpu(*((__force __be64 *)fsp->h_ext.data));
+	mask = be64_to_cpu(*((__force __be64 *)fsp->m_ext.data));
+	if (!mask)
+		return 0;
+
+#define ICE_USERDEF_FLEX_WORD_M	GENMASK_ULL(15, 0)
+#define ICE_USERDEF_FLEX_OFFS_S	16
+#define ICE_USERDEF_FLEX_OFFS_M	GENMASK_ULL(31, ICE_USERDEF_FLEX_OFFS_S)
+#define ICE_USERDEF_FLEX_FLTR_M	GENMASK_ULL(31, 0)
+
+	/* 0x1fe is the maximum value for offsets stored in the internal
+	 * filtering tables.
+	 */
+#define ICE_USERDEF_FLEX_MAX_OFFS_VAL 0x1fe
+
+	if (!ice_is_mask_valid(mask, ICE_USERDEF_FLEX_FLTR_M) ||
+	    value > ICE_USERDEF_FLEX_FLTR_M)
+		return -EINVAL;
+
+	data->flex_word = value & ICE_USERDEF_FLEX_WORD_M;
+	data->flex_offset = (value & ICE_USERDEF_FLEX_OFFS_M) >>
+			     ICE_USERDEF_FLEX_OFFS_S;
+	if (data->flex_offset > ICE_USERDEF_FLEX_MAX_OFFS_VAL)
+		return -EINVAL;
+
+	data->flex_fltr = true;
+
+	return 0;
+}
+
 /**
  * ice_fdir_num_avail_fltr - return the number of unused flow director filters
  * @hw: pointer to hardware structure
@@ -936,11 +996,13 @@ ice_set_fdir_ip6_usr_seg(struct ice_flow_seg_info *seg,
  * ice_cfg_fdir_xtrct_seq - Configure extraction sequence for the given filter
  * @pf: PF structure
  * @fsp: pointer to ethtool Rx flow specification
+ * @user: user defined data from flow specification
  *
  * Returns 0 on success.
  */
 static int
-ice_cfg_fdir_xtrct_seq(struct ice_pf *pf, struct ethtool_rx_flow_spec *fsp)
+ice_cfg_fdir_xtrct_seq(struct ice_pf *pf, struct ethtool_rx_flow_spec *fsp,
+		       struct ice_rx_flow_userdef *user)
 {
 	struct ice_flow_seg_info *seg, *tun_seg;
 	struct device *dev = ice_pf_to_dev(pf);
@@ -1008,6 +1070,18 @@ ice_cfg_fdir_xtrct_seq(struct ice_pf *pf, struct ethtool_rx_flow_spec *fsp)
 	/* tunnel segments are shifted up one. */
 	memcpy(&tun_seg[1], seg, sizeof(*seg));
 
+	if (user && user->flex_fltr) {
+		perfect_filter = false;
+		ice_flow_add_fld_raw(seg, user->flex_offset,
+				     ICE_FLTR_PRGM_FLEX_WORD_SIZE,
+				     ICE_FLOW_FLD_OFF_INVAL,
+				     ICE_FLOW_FLD_OFF_INVAL);
+		ice_flow_add_fld_raw(&tun_seg[1], user->flex_offset,
+				     ICE_FLTR_PRGM_FLEX_WORD_SIZE,
+				     ICE_FLOW_FLD_OFF_INVAL,
+				     ICE_FLOW_FLD_OFF_INVAL);
+	}
+
 	/* add filter for outer headers */
 	fltr_idx = ice_ethtool_flow_to_fltr(fsp->flow_type & ~FLOW_EXT);
 	ret = ice_fdir_set_hw_fltr_rule(pf, seg, fltr_idx,
@@ -1433,6 +1507,7 @@ ice_set_fdir_input_set(struct ice_vsi *vsi, struct ethtool_rx_flow_spec *fsp,
  */
 int ice_add_fdir_ethtool(struct ice_vsi *vsi, struct ethtool_rxnfc *cmd)
 {
+	struct ice_rx_flow_userdef userdata;
 	struct ethtool_rx_flow_spec *fsp;
 	struct ice_fdir_fltr *input;
 	struct device *dev;
@@ -1460,10 +1535,13 @@ int ice_add_fdir_ethtool(struct ice_vsi *vsi, struct ethtool_rxnfc *cmd)
 
 	fsp = (struct ethtool_rx_flow_spec *)&cmd->fs;
 
+	if (ice_parse_rx_flow_user_data(fsp, &userdata))
+		return -EINVAL;
+
 	if (fsp->flow_type & FLOW_MAC_EXT)
 		return -EINVAL;
 
-	ret = ice_cfg_fdir_xtrct_seq(pf, fsp);
+	ret = ice_cfg_fdir_xtrct_seq(pf, fsp, &userdata);
 	if (ret)
 		return ret;
 
@@ -1495,6 +1573,12 @@ int ice_add_fdir_ethtool(struct ice_vsi *vsi, struct ethtool_rxnfc *cmd)
 		goto release_lock;
 	}
 
+	if (userdata.flex_fltr) {
+		input->flex_fltr = true;
+		input->flex_word = cpu_to_be16(userdata.flex_word);
+		input->flex_offset = userdata.flex_offset;
+	}
+
 	/* input struct is added to the HW filter list */
 	ice_fdir_update_list_entry(pf, input, fsp->location);
 
diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.c b/drivers/net/ethernet/intel/ice/ice_fdir.c
index d50cc6e9086e..6834df14332f 100644
--- a/drivers/net/ethernet/intel/ice/ice_fdir.c
+++ b/drivers/net/ethernet/intel/ice/ice_fdir.c
@@ -650,6 +650,9 @@ ice_fdir_get_gen_prgm_pkt(struct ice_hw *hw, struct ice_fdir_fltr *input,
 		return ICE_ERR_PARAM;
 	}
 
+	if (input->flex_fltr)
+		ice_pkt_insert_u16(loc, input->flex_offset, input->flex_word);
+
 	return 0;
 }
 
diff --git a/drivers/net/ethernet/intel/ice/ice_fdir.h b/drivers/net/ethernet/intel/ice/ice_fdir.h
index 977dcbc1400d..1c587766daab 100644
--- a/drivers/net/ethernet/intel/ice/ice_fdir.h
+++ b/drivers/net/ethernet/intel/ice/ice_fdir.h
@@ -68,6 +68,14 @@ struct ice_fd_fltr_desc_ctx {
 	u8 fdid_mdid;
 };
 
+#define ICE_FLTR_PRGM_FLEX_WORD_SIZE	sizeof(__be16)
+
+struct ice_rx_flow_userdef {
+	u16 flex_word;
+	u16 flex_offset;
+	u16 flex_fltr;
+};
+
 struct ice_fdir_v4 {
 	__be32 dst_ip;
 	__be32 src_ip;
@@ -112,6 +120,11 @@ struct ice_fdir_fltr {
 	struct ice_fdir_extra ext_data;
 	struct ice_fdir_extra ext_mask;
 
+	/* flex byte filter data */
+	__be16 flex_word;
+	u16 flex_offset;
+	u16 flex_fltr;
+
 	/* filter control */
 	u16 q_index;
 	u16 dest_vsi;
diff --git a/drivers/net/ethernet/intel/ice/ice_flow.c b/drivers/net/ethernet/intel/ice/ice_flow.c
index f4b6c3933564..d74e5290677f 100644
--- a/drivers/net/ethernet/intel/ice/ice_flow.c
+++ b/drivers/net/ethernet/intel/ice/ice_flow.c
@@ -193,6 +193,40 @@ ice_flow_val_hdrs(struct ice_flow_seg_info *segs, u8 segs_cnt)
 	return 0;
 }
 
+/* Sizes of fixed known protocol headers without header options */
+#define ICE_FLOW_PROT_HDR_SZ_MAC	14
+#define ICE_FLOW_PROT_HDR_SZ_IPV4	20
+#define ICE_FLOW_PROT_HDR_SZ_IPV6	40
+#define ICE_FLOW_PROT_HDR_SZ_TCP	20
+#define ICE_FLOW_PROT_HDR_SZ_UDP	8
+#define ICE_FLOW_PROT_HDR_SZ_SCTP	12
+
+/**
+ * ice_flow_calc_seg_sz - calculates size of a packet segment based on headers
+ * @params: information about the flow to be processed
+ * @seg: index of packet segment whose header size is to be determined
+ */
+static u16 ice_flow_calc_seg_sz(struct ice_flow_prof_params *params, u8 seg)
+{
+	u16 sz = ICE_FLOW_PROT_HDR_SZ_MAC;
+
+	/* L3 headers */
+	if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_IPV4)
+		sz += ICE_FLOW_PROT_HDR_SZ_IPV4;
+	else if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_IPV6)
+		sz += ICE_FLOW_PROT_HDR_SZ_IPV6;
+
+	/* L4 headers */
+	if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_TCP)
+		sz += ICE_FLOW_PROT_HDR_SZ_TCP;
+	else if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_UDP)
+		sz += ICE_FLOW_PROT_HDR_SZ_UDP;
+	else if (params->prof->segs[seg].hdrs & ICE_FLOW_SEG_HDR_SCTP)
+		sz += ICE_FLOW_PROT_HDR_SZ_SCTP;
+
+	return sz;
+}
+
 /**
  * ice_flow_proc_seg_hdrs - process protocol headers present in pkt segments
  * @params: information about the flow to be processed
@@ -347,6 +381,81 @@ ice_flow_xtract_fld(struct ice_hw *hw, struct ice_flow_prof_params *params,
 	return 0;
 }
 
+/**
+ * ice_flow_xtract_raws - Create extract sequence entries for raw bytes
+ * @hw: pointer to the HW struct
+ * @params: information about the flow to be processed
+ * @seg: index of packet segment whose raw fields are to be be extracted
+ */
+static enum ice_status
+ice_flow_xtract_raws(struct ice_hw *hw, struct ice_flow_prof_params *params,
+		     u8 seg)
+{
+	u16 fv_words;
+	u16 hdrs_sz;
+	u8 i;
+
+	if (!params->prof->segs[seg].raws_cnt)
+		return 0;
+
+	if (params->prof->segs[seg].raws_cnt >
+	    ARRAY_SIZE(params->prof->segs[seg].raws))
+		return ICE_ERR_MAX_LIMIT;
+
+	/* Offsets within the segment headers are not supported */
+	hdrs_sz = ice_flow_calc_seg_sz(params, seg);
+	if (!hdrs_sz)
+		return ICE_ERR_PARAM;
+
+	fv_words = hw->blk[params->blk].es.fvw;
+
+	for (i = 0; i < params->prof->segs[seg].raws_cnt; i++) {
+		struct ice_flow_seg_fld_raw *raw;
+		u16 off, cnt, j;
+
+		raw = &params->prof->segs[seg].raws[i];
+
+		/* Storing extraction information */
+		raw->info.xtrct.prot_id = ICE_PROT_MAC_OF_OR_S;
+		raw->info.xtrct.off = (raw->off / ICE_FLOW_FV_EXTRACT_SZ) *
+			ICE_FLOW_FV_EXTRACT_SZ;
+		raw->info.xtrct.disp = (raw->off % ICE_FLOW_FV_EXTRACT_SZ) *
+			BITS_PER_BYTE;
+		raw->info.xtrct.idx = params->es_cnt;
+
+		/* Determine the number of field vector entries this raw field
+		 * consumes.
+		 */
+		cnt = DIV_ROUND_UP(raw->info.xtrct.disp +
+				   (raw->info.src.last * BITS_PER_BYTE),
+				   (ICE_FLOW_FV_EXTRACT_SZ * BITS_PER_BYTE));
+		off = raw->info.xtrct.off;
+		for (j = 0; j < cnt; j++) {
+			u16 idx;
+
+			/* Make sure the number of extraction sequence required
+			 * does not exceed the block's capability
+			 */
+			if (params->es_cnt >= hw->blk[params->blk].es.count ||
+			    params->es_cnt >= ICE_MAX_FV_WORDS)
+				return ICE_ERR_MAX_LIMIT;
+
+			/* some blocks require a reversed field vector layout */
+			if (hw->blk[params->blk].es.reverse)
+				idx = fv_words - params->es_cnt - 1;
+			else
+				idx = params->es_cnt;
+
+			params->es[idx].prot_id = raw->info.xtrct.prot_id;
+			params->es[idx].off = off;
+			params->es_cnt++;
+			off += ICE_FLOW_FV_EXTRACT_SZ;
+		}
+	}
+
+	return 0;
+}
+
 /**
  * ice_flow_create_xtrct_seq - Create an extraction sequence for given segments
  * @hw: pointer to the HW struct
@@ -373,6 +482,11 @@ ice_flow_create_xtrct_seq(struct ice_hw *hw,
 			if (status)
 				return status;
 		}
+
+		/* Process raw matching bytes */
+		status = ice_flow_xtract_raws(hw, params, i);
+		if (status)
+			return status;
 	}
 
 	return status;
@@ -943,6 +1057,42 @@ ice_flow_set_fld(struct ice_flow_seg_info *seg, enum ice_flow_field fld,
 	ice_flow_set_fld_ext(seg, fld, t, val_loc, mask_loc, last_loc);
 }
 
+/**
+ * ice_flow_add_fld_raw - sets locations of a raw field from entry's input buf
+ * @seg: packet segment the field being set belongs to
+ * @off: offset of the raw field from the beginning of the segment in bytes
+ * @len: length of the raw pattern to be matched
+ * @val_loc: location of the value to match from entry's input buffer
+ * @mask_loc: location of mask value from entry's input buffer
+ *
+ * This function specifies the offset of the raw field to be match from the
+ * beginning of the specified packet segment, and the locations, in the form of
+ * byte offsets from the start of the input buffer for a flow entry, from where
+ * the value to match and the mask value to be extracted. These locations are
+ * then stored in the flow profile. When adding flow entries to the associated
+ * flow profile, these locations can be used to quickly extract the values to
+ * create the content of a match entry. This function should only be used for
+ * fixed-size data structures.
+ */
+void
+ice_flow_add_fld_raw(struct ice_flow_seg_info *seg, u16 off, u8 len,
+		     u16 val_loc, u16 mask_loc)
+{
+	if (seg->raws_cnt < ICE_FLOW_SEG_RAW_FLD_MAX) {
+		seg->raws[seg->raws_cnt].off = off;
+		seg->raws[seg->raws_cnt].info.type = ICE_FLOW_FLD_TYPE_SIZE;
+		seg->raws[seg->raws_cnt].info.src.val = val_loc;
+		seg->raws[seg->raws_cnt].info.src.mask = mask_loc;
+		/* The "last" field is used to store the length of the field */
+		seg->raws[seg->raws_cnt].info.src.last = len;
+	}
+
+	/* Overflows of "raws" will be handled as an error condition later in
+	 * the flow when this information is processed.
+	 */
+	seg->raws_cnt++;
+}
+
 #define ICE_FLOW_RSS_SEG_HDR_L3_MASKS \
 	(ICE_FLOW_SEG_HDR_IPV4 | ICE_FLOW_SEG_HDR_IPV6)
 
diff --git a/drivers/net/ethernet/intel/ice/ice_flow.h b/drivers/net/ethernet/intel/ice/ice_flow.h
index 3c784c3b5db2..3913da2116d2 100644
--- a/drivers/net/ethernet/intel/ice/ice_flow.h
+++ b/drivers/net/ethernet/intel/ice/ice_flow.h
@@ -128,6 +128,7 @@ enum ice_flow_priority {
 };
 
 #define ICE_FLOW_SEG_MAX		2
+#define ICE_FLOW_SEG_RAW_FLD_MAX	2
 #define ICE_FLOW_FV_EXTRACT_SZ		2
 
 #define ICE_FLOW_SET_HDRS(seg, val)	((seg)->hdrs |= (u32)(val))
@@ -164,12 +165,20 @@ struct ice_flow_fld_info {
 	struct ice_flow_seg_xtrct xtrct;
 };
 
+struct ice_flow_seg_fld_raw {
+	struct ice_flow_fld_info info;
+	u16 off;	/* Offset from the start of the segment */
+};
+
 struct ice_flow_seg_info {
 	u32 hdrs;	/* Bitmask indicating protocol headers present */
 	u64 match;	/* Bitmask indicating header fields to be matched */
 	u64 range;	/* Bitmask indicating header fields matched as ranges */
 
 	struct ice_flow_fld_info fields[ICE_FLOW_FIELD_IDX_MAX];
+
+	u8 raws_cnt;	/* Number of raw fields to be matched */
+	struct ice_flow_seg_fld_raw raws[ICE_FLOW_SEG_RAW_FLD_MAX];
 };
 
 /* This structure describes a flow entry, and is tracked only in this file */
@@ -228,6 +237,9 @@ ice_flow_rem_entry(struct ice_hw *hw, enum ice_block blk, u64 entry_h);
 void
 ice_flow_set_fld(struct ice_flow_seg_info *seg, enum ice_flow_field fld,
 		 u16 val_loc, u16 mask_loc, u16 last_loc, bool range);
+void
+ice_flow_add_fld_raw(struct ice_flow_seg_info *seg, u16 off, u8 len,
+		     u16 val_loc, u16 mask_loc);
 void ice_rem_vsi_rss_list(struct ice_hw *hw, u16 vsi_handle);
 enum ice_status ice_replay_rss_cfg(struct ice_hw *hw, u16 vsi_handle);
 enum ice_status
diff --git a/drivers/net/ethernet/intel/ice/ice_protocol_type.h b/drivers/net/ethernet/intel/ice/ice_protocol_type.h
index babe4a485fd6..7f4c1ec1eff2 100644
--- a/drivers/net/ethernet/intel/ice/ice_protocol_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_protocol_type.h
@@ -12,6 +12,7 @@
  */
 enum ice_prot_id {
 	ICE_PROT_ID_INVAL	= 0,
+	ICE_PROT_MAC_OF_OR_S	= 1,
 	ICE_PROT_IPV4_OF_OR_S	= 32,
 	ICE_PROT_IPV4_IL	= 33,
 	ICE_PROT_IPV6_OF_OR_S	= 40,
-- 
cgit v1.2.3-59-g8ed1b


From 83af00395190bc2db05a67a417a2ea0d5967b74d Mon Sep 17 00:00:00 2001
From: Henry Tieman <henry.w.tieman@intel.com>
Date: Mon, 11 May 2020 18:01:45 -0700
Subject: ice: Restore filters following reset

Following a reset, Flow Director filters are cleared from the hardware.
Rebuild the filters using the software structures containing the filter
rules.

Signed-off-by: Henry Tieman <henry.w.tieman@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice.h              |  2 +
 drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c | 65 +++++++++++++++++++++++
 drivers/net/ethernet/intel/ice/ice_lib.c          | 17 +++---
 drivers/net/ethernet/intel/ice/ice_main.c         | 31 +++++++++++
 4 files changed, 109 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 298a65a3799c..38739ee8cd94 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -566,6 +566,8 @@ int
 ice_get_fdir_fltr_ids(struct ice_hw *hw, struct ethtool_rxnfc *cmd,
 		      u32 *rule_locs);
 void ice_fdir_release_flows(struct ice_hw *hw);
+void ice_fdir_replay_flows(struct ice_hw *hw);
+void ice_fdir_replay_fltrs(struct ice_pf *pf);
 int ice_fdir_create_dflt_rules(struct ice_pf *pf);
 int ice_open(struct net_device *netdev);
 int ice_stop(struct net_device *netdev);
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
index f240c062860b..a0002032be61 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
@@ -348,6 +348,53 @@ void ice_fdir_release_flows(struct ice_hw *hw)
 		ice_fdir_erase_flow_from_hw(hw, ICE_BLK_FD, flow);
 }
 
+/**
+ * ice_fdir_replay_flows - replay HW Flow Director filter info
+ * @hw: pointer to HW instance
+ */
+void ice_fdir_replay_flows(struct ice_hw *hw)
+{
+	int flow;
+
+	for (flow = 0; flow < ICE_FLTR_PTYPE_MAX; flow++) {
+		int tun;
+
+		if (!hw->fdir_prof[flow] || !hw->fdir_prof[flow]->cnt)
+			continue;
+		for (tun = 0; tun < ICE_FD_HW_SEG_MAX; tun++) {
+			struct ice_flow_prof *hw_prof;
+			struct ice_fd_hw_prof *prof;
+			u64 prof_id;
+			int j;
+
+			prof = hw->fdir_prof[flow];
+			prof_id = flow + tun * ICE_FLTR_PTYPE_MAX;
+			ice_flow_add_prof(hw, ICE_BLK_FD, ICE_FLOW_RX, prof_id,
+					  prof->fdir_seg[tun], TNL_SEG_CNT(tun),
+					  &hw_prof);
+			for (j = 0; j < prof->cnt; j++) {
+				enum ice_flow_priority prio;
+				u64 entry_h = 0;
+				int err;
+
+				prio = ICE_FLOW_PRIO_NORMAL;
+				err = ice_flow_add_entry(hw, ICE_BLK_FD,
+							 prof_id,
+							 prof->vsi_h[0],
+							 prof->vsi_h[j],
+							 prio, prof->fdir_seg,
+							 &entry_h);
+				if (err) {
+					dev_err(ice_hw_to_dev(hw), "Could not replay Flow Director, flow type %d\n",
+						flow);
+					continue;
+				}
+				prof->entry_h[j][tun] = entry_h;
+			}
+		}
+	}
+}
+
 /**
  * ice_parse_rx_flow_user_data - deconstruct user-defined data
  * @fsp: pointer to ethtool Rx flow specification
@@ -1225,6 +1272,24 @@ ice_fdir_write_all_fltr(struct ice_pf *pf, struct ice_fdir_fltr *input,
 	return 0;
 }
 
+/**
+ * ice_fdir_replay_fltrs - replay filters from the HW filter list
+ * @pf: board private structure
+ */
+void ice_fdir_replay_fltrs(struct ice_pf *pf)
+{
+	struct ice_fdir_fltr *f_rule;
+	struct ice_hw *hw = &pf->hw;
+
+	list_for_each_entry(f_rule, &hw->fdir_list_head, fltr_node) {
+		int err = ice_fdir_write_all_fltr(pf, f_rule, true);
+
+		if (err)
+			dev_dbg(ice_pf_to_dev(pf), "Flow Director error %d, could not reprogram filter %d\n",
+				err, f_rule->fltr_id);
+	}
+}
+
 /**
  * ice_fdir_create_dflt_rules - create default perfect filters
  * @pf: PF data structure
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 43c949e0a760..ff77fc3f633e 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -2748,6 +2748,8 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi)
 		goto err_vsi;
 
 	ice_vsi_get_qs(vsi);
+
+	ice_alloc_fd_res(vsi);
 	ice_vsi_set_tc_cfg(vsi);
 
 	/* Initialize VSI struct elements and create VSI in FW */
@@ -2756,6 +2758,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi)
 		goto err_vsi;
 
 	switch (vsi->type) {
+	case ICE_VSI_CTRL:
 	case ICE_VSI_PF:
 		ret = ice_vsi_alloc_q_vectors(vsi);
 		if (ret)
@@ -2780,12 +2783,14 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi)
 			if (ret)
 				goto err_vectors;
 		}
-		/* Do not exit if configuring RSS had an issue, at least
-		 * receive traffic on first queue. Hence no need to capture
-		 * return value
-		 */
-		if (test_bit(ICE_FLAG_RSS_ENA, pf->flags))
-			ice_vsi_cfg_rss_lut_key(vsi);
+		/* ICE_VSI_CTRL does not need RSS so skip RSS processing */
+		if (vsi->type != ICE_VSI_CTRL)
+			/* Do not exit if configuring RSS had an issue, at
+			 * least receive traffic on first queue. Hence no
+			 * need to capture return value
+			 */
+			if (test_bit(ICE_FLAG_RSS_ENA, pf->flags))
+				ice_vsi_cfg_rss_lut_key(vsi);
 		break;
 	case ICE_VSI_VF:
 		ret = ice_vsi_alloc_q_vectors(vsi);
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index fecc3b29a4de..d06a3311a2dc 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -4895,6 +4895,21 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
 		goto err_sched_init_port;
 	}
 
+	if (test_bit(ICE_FLAG_FD_ENA, pf->flags)) {
+		wr32(hw, PFQF_FD_ENA, PFQF_FD_ENA_FD_ENA_M);
+		if (!rd32(hw, PFQF_FD_SIZE)) {
+			u16 unused, guar, b_effort;
+
+			guar = hw->func_caps.fd_fltr_guar;
+			b_effort = hw->func_caps.fd_fltr_best_effort;
+
+			/* force guaranteed filter pool for PF */
+			ice_alloc_fd_guar_item(hw, &unused, guar);
+			/* force shared filter pool for PF */
+			ice_alloc_fd_shrd_item(hw, &unused, b_effort);
+		}
+	}
+
 	if (test_bit(ICE_FLAG_DCB_ENA, pf->flags))
 		ice_dcb_rebuild(pf);
 
@@ -4913,6 +4928,22 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
 		}
 	}
 
+	/* If Flow Director is active */
+	if (test_bit(ICE_FLAG_FD_ENA, pf->flags)) {
+		err = ice_vsi_rebuild_by_type(pf, ICE_VSI_CTRL);
+		if (err) {
+			dev_err(dev, "control VSI rebuild failed: %d\n", err);
+			goto err_vsi_rebuild;
+		}
+
+		/* replay HW Flow Director recipes */
+		if (hw->fdir_prof)
+			ice_fdir_replay_flows(hw);
+
+		/* replay Flow Director filters */
+		ice_fdir_replay_fltrs(pf);
+	}
+
 	ice_update_pf_netdev_link(pf);
 
 	/* tell the firmware we are up */
-- 
cgit v1.2.3-59-g8ed1b


From 28bf26724fdb0e02267d19e280d6717ee810a10d Mon Sep 17 00:00:00 2001
From: Brett Creeley <brett.creeley@intel.com>
Date: Mon, 11 May 2020 18:01:46 -0700
Subject: ice: Implement aRFS

Enable accelerated Receive Flow Steering (aRFS). It is used to steer Rx
flows to a specific queue. This functionality is triggered by the network
stack through ndo_rx_flow_steer and requires Flow Director (ntuple on) to
function.

The fltr_info is used to add/remove/update flow rules in the HW, the
fltr_state is used to determine what to do with the filter with respect
to HW and/or SW, and the flow_id is used in co-ordination with the
network stack.

The work for aRFS is split into two paths: the ndo_rx_flow_steer
operation and the ice_service_task. The former is where the kernel hands
us an Rx SKB among other items to setup aRFS and the latter is where
the driver adds/updates/removes filter rules from HW and updates filter
state.

In the Rx path the following things can happen:
        1. New aRFS entries are added to the hash table and the state is
           set to ICE_ARFS_INACTIVE so the filter can be updated in HW
           by the ice_service_task path.
        2. aRFS entries have their Rx Queue updated if we receive a
           pre-existing flow_id and the filter state is ICE_ARFS_ACTIVE.
           The state is set to ICE_ARFS_INACTIVE so the filter can be
           updated in HW by the ice_service_task path.
        3. aRFS entries marked as ICE_ARFS_TODEL are deleted

In the ice_service_task path the following things can happen:
        1. New aRFS entries marked as ICE_ARFS_INACTIVE are added or
           updated in HW.
           and their state is updated to ICE_ARFS_ACTIVE.
        2. aRFS entries are deleted from HW and their state is updated
           to ICE_ARFS_TODEL.

Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Signed-off-by: Madhu Chittim <madhu.chittim@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/Makefile           |   1 +
 drivers/net/ethernet/intel/ice/ice.h              |  14 +
 drivers/net/ethernet/intel/ice/ice_arfs.c         | 663 ++++++++++++++++++++++
 drivers/net/ethernet/intel/ice/ice_arfs.h         |  82 +++
 drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c |   8 +-
 drivers/net/ethernet/intel/ice/ice_lib.c          |   1 +
 drivers/net/ethernet/intel/ice/ice_main.c         |  48 +-
 7 files changed, 805 insertions(+), 12 deletions(-)
 create mode 100644 drivers/net/ethernet/intel/ice/ice_arfs.c
 create mode 100644 drivers/net/ethernet/intel/ice/ice_arfs.h

diff --git a/drivers/net/ethernet/intel/ice/Makefile b/drivers/net/ethernet/intel/ice/Makefile
index 9ffa2e366766..980bbcc64b4b 100644
--- a/drivers/net/ethernet/intel/ice/Makefile
+++ b/drivers/net/ethernet/intel/ice/Makefile
@@ -26,4 +26,5 @@ ice-y := ice_main.o	\
 	 ice_ethtool.o
 ice-$(CONFIG_PCI_IOV) += ice_virtchnl_pf.o ice_sriov.o
 ice-$(CONFIG_DCB) += ice_dcb.o ice_dcb_nl.o ice_dcb_lib.o
+ice-$(CONFIG_RFS_ACCEL) += ice_arfs.o
 ice-$(CONFIG_XDP_SOCKETS) += ice_xsk.o
diff --git a/drivers/net/ethernet/intel/ice/ice.h b/drivers/net/ethernet/intel/ice/ice.h
index 38739ee8cd94..5792ee616b5c 100644
--- a/drivers/net/ethernet/intel/ice/ice.h
+++ b/drivers/net/ethernet/intel/ice/ice.h
@@ -34,6 +34,7 @@
 #include <linux/ctype.h>
 #include <linux/bpf.h>
 #include <linux/avf/virtchnl.h>
+#include <linux/cpu_rmap.h>
 #include <net/devlink.h>
 #include <net/ipv6.h>
 #include <net/xdp_sock.h>
@@ -52,6 +53,7 @@
 #include "ice_sriov.h"
 #include "ice_fdir.h"
 #include "ice_xsk.h"
+#include "ice_arfs.h"
 
 extern const char ice_drv_ver[];
 #define ICE_BAR0		0
@@ -271,6 +273,14 @@ struct ice_vsi {
 	u8 *rss_lut_user;	/* User configured lookup table entries */
 	u8 rss_lut_type;	/* used to configure Get/Set RSS LUT AQ call */
 
+	/* aRFS members only allocated for the PF VSI */
+#define ICE_MAX_ARFS_LIST	1024
+#define ICE_ARFS_LST_MASK	(ICE_MAX_ARFS_LIST - 1)
+	struct hlist_head *arfs_fltr_list;
+	struct ice_arfs_active_fltr_cntrs *arfs_fltr_cntrs;
+	spinlock_t arfs_lock;	/* protects aRFS hash table and filter state */
+	atomic_t *arfs_last_fltr_id;
+
 	u16 max_frame;
 	u16 rx_buf_len;
 
@@ -558,6 +568,9 @@ int ice_schedule_reset(struct ice_pf *pf, enum ice_reset_req reset);
 void ice_print_link_msg(struct ice_vsi *vsi, bool isup);
 const char *ice_stat_str(enum ice_status stat_err);
 const char *ice_aq_str(enum ice_aq_err aq_err);
+int
+ice_fdir_write_fltr(struct ice_pf *pf, struct ice_fdir_fltr *input, bool add,
+		    bool is_tun);
 void ice_vsi_manage_fdir(struct ice_vsi *vsi, bool ena);
 int ice_add_fdir_ethtool(struct ice_vsi *vsi, struct ethtool_rxnfc *cmd);
 int ice_del_fdir_ethtool(struct ice_vsi *vsi, struct ethtool_rxnfc *cmd);
@@ -571,5 +584,6 @@ void ice_fdir_replay_fltrs(struct ice_pf *pf);
 int ice_fdir_create_dflt_rules(struct ice_pf *pf);
 int ice_open(struct net_device *netdev);
 int ice_stop(struct net_device *netdev);
+void ice_service_task_schedule(struct ice_pf *pf);
 
 #endif /* _ICE_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.c b/drivers/net/ethernet/intel/ice/ice_arfs.c
new file mode 100644
index 000000000000..6560acd76c94
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_arfs.c
@@ -0,0 +1,663 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (C) 2018-2020, Intel Corporation. */
+
+#include "ice.h"
+
+/**
+ * ice_is_arfs_active - helper to check is aRFS is active
+ * @vsi: VSI to check
+ */
+static bool ice_is_arfs_active(struct ice_vsi *vsi)
+{
+	return !!vsi->arfs_fltr_list;
+}
+
+/**
+ * ice_is_arfs_using_perfect_flow - check if aRFS has active perfect filters
+ * @hw: pointer to the HW structure
+ * @flow_type: flow type as Flow Director understands it
+ *
+ * Flow Director will query this function to see if aRFS is currently using
+ * the specified flow_type for perfect (4-tuple) filters.
+ */
+bool
+ice_is_arfs_using_perfect_flow(struct ice_hw *hw, enum ice_fltr_ptype flow_type)
+{
+	struct ice_arfs_active_fltr_cntrs *arfs_fltr_cntrs;
+	struct ice_pf *pf = hw->back;
+	struct ice_vsi *vsi;
+
+	vsi = ice_get_main_vsi(pf);
+	if (!vsi)
+		return false;
+
+	arfs_fltr_cntrs = vsi->arfs_fltr_cntrs;
+
+	/* active counters can be updated by multiple CPUs */
+	smp_mb__before_atomic();
+	switch (flow_type) {
+	case ICE_FLTR_PTYPE_NONF_IPV4_UDP:
+		return atomic_read(&arfs_fltr_cntrs->active_udpv4_cnt) > 0;
+	case ICE_FLTR_PTYPE_NONF_IPV6_UDP:
+		return atomic_read(&arfs_fltr_cntrs->active_udpv6_cnt) > 0;
+	case ICE_FLTR_PTYPE_NONF_IPV4_TCP:
+		return atomic_read(&arfs_fltr_cntrs->active_tcpv4_cnt) > 0;
+	case ICE_FLTR_PTYPE_NONF_IPV6_TCP:
+		return atomic_read(&arfs_fltr_cntrs->active_tcpv6_cnt) > 0;
+	default:
+		return false;
+	}
+}
+
+/**
+ * ice_arfs_update_active_fltr_cntrs - update active filter counters for aRFS
+ * @vsi: VSI that aRFS is active on
+ * @entry: aRFS entry used to change counters
+ * @add: true to increment counter, false to decrement
+ */
+static void
+ice_arfs_update_active_fltr_cntrs(struct ice_vsi *vsi,
+				  struct ice_arfs_entry *entry, bool add)
+{
+	struct ice_arfs_active_fltr_cntrs *fltr_cntrs = vsi->arfs_fltr_cntrs;
+
+	switch (entry->fltr_info.flow_type) {
+	case ICE_FLTR_PTYPE_NONF_IPV4_TCP:
+		if (add)
+			atomic_inc(&fltr_cntrs->active_tcpv4_cnt);
+		else
+			atomic_dec(&fltr_cntrs->active_tcpv4_cnt);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_TCP:
+		if (add)
+			atomic_inc(&fltr_cntrs->active_tcpv6_cnt);
+		else
+			atomic_dec(&fltr_cntrs->active_tcpv6_cnt);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV4_UDP:
+		if (add)
+			atomic_inc(&fltr_cntrs->active_udpv4_cnt);
+		else
+			atomic_dec(&fltr_cntrs->active_udpv4_cnt);
+		break;
+	case ICE_FLTR_PTYPE_NONF_IPV6_UDP:
+		if (add)
+			atomic_inc(&fltr_cntrs->active_udpv6_cnt);
+		else
+			atomic_dec(&fltr_cntrs->active_udpv6_cnt);
+		break;
+	default:
+		dev_err(ice_pf_to_dev(vsi->back), "aRFS: Failed to update filter counters, invalid filter type %d\n",
+			entry->fltr_info.flow_type);
+	}
+}
+
+/**
+ * ice_arfs_del_flow_rules - delete the rules passed in from HW
+ * @vsi: VSI for the flow rules that need to be deleted
+ * @del_list_head: head of the list of ice_arfs_entry(s) for rule deletion
+ *
+ * Loop through the delete list passed in and remove the rules from HW. After
+ * each rule is deleted, disconnect and free the ice_arfs_entry because it is no
+ * longer being referenced by the aRFS hash table.
+ */
+static void
+ice_arfs_del_flow_rules(struct ice_vsi *vsi, struct hlist_head *del_list_head)
+{
+	struct ice_arfs_entry *e;
+	struct hlist_node *n;
+	struct device *dev;
+
+	dev = ice_pf_to_dev(vsi->back);
+
+	hlist_for_each_entry_safe(e, n, del_list_head, list_entry) {
+		int result;
+
+		result = ice_fdir_write_fltr(vsi->back, &e->fltr_info, false,
+					     false);
+		if (!result)
+			ice_arfs_update_active_fltr_cntrs(vsi, e, false);
+		else
+			dev_dbg(dev, "Unable to delete aRFS entry, err %d fltr_state %d fltr_id %d flow_id %d Q %d\n",
+				result, e->fltr_state, e->fltr_info.fltr_id,
+				e->flow_id, e->fltr_info.q_index);
+
+		/* The aRFS hash table is no longer referencing this entry */
+		hlist_del(&e->list_entry);
+		devm_kfree(dev, e);
+	}
+}
+
+/**
+ * ice_arfs_add_flow_rules - add the rules passed in from HW
+ * @vsi: VSI for the flow rules that need to be added
+ * @add_list_head: head of the list of ice_arfs_entry_ptr(s) for rule addition
+ *
+ * Loop through the add list passed in and remove the rules from HW. After each
+ * rule is added, disconnect and free the ice_arfs_entry_ptr node. Don't free
+ * the ice_arfs_entry(s) because they are still being referenced in the aRFS
+ * hash table.
+ */
+static void
+ice_arfs_add_flow_rules(struct ice_vsi *vsi, struct hlist_head *add_list_head)
+{
+	struct ice_arfs_entry_ptr *ep;
+	struct hlist_node *n;
+	struct device *dev;
+
+	dev = ice_pf_to_dev(vsi->back);
+
+	hlist_for_each_entry_safe(ep, n, add_list_head, list_entry) {
+		int result;
+
+		result = ice_fdir_write_fltr(vsi->back,
+					     &ep->arfs_entry->fltr_info, true,
+					     false);
+		if (!result)
+			ice_arfs_update_active_fltr_cntrs(vsi, ep->arfs_entry,
+							  true);
+		else
+			dev_dbg(dev, "Unable to add aRFS entry, err %d fltr_state %d fltr_id %d flow_id %d Q %d\n",
+				result, ep->arfs_entry->fltr_state,
+				ep->arfs_entry->fltr_info.fltr_id,
+				ep->arfs_entry->flow_id,
+				ep->arfs_entry->fltr_info.q_index);
+
+		hlist_del(&ep->list_entry);
+		devm_kfree(dev, ep);
+	}
+}
+
+/**
+ * ice_arfs_is_flow_expired - check if the aRFS entry has expired
+ * @vsi: VSI containing the aRFS entry
+ * @arfs_entry: aRFS entry that's being checked for expiration
+ *
+ * Return true if the flow has expired, else false. This function should be used
+ * to determine whether or not an aRFS entry should be removed from the hardware
+ * and software structures.
+ */
+static bool
+ice_arfs_is_flow_expired(struct ice_vsi *vsi, struct ice_arfs_entry *arfs_entry)
+{
+#define ICE_ARFS_TIME_DELTA_EXPIRATION	msecs_to_jiffies(5000)
+	if (rps_may_expire_flow(vsi->netdev, arfs_entry->fltr_info.q_index,
+				arfs_entry->flow_id,
+				arfs_entry->fltr_info.fltr_id))
+		return true;
+
+	/* expiration timer only used for UDP filters */
+	if (arfs_entry->fltr_info.flow_type != ICE_FLTR_PTYPE_NONF_IPV4_UDP &&
+	    arfs_entry->fltr_info.flow_type != ICE_FLTR_PTYPE_NONF_IPV6_UDP)
+		return false;
+
+	return time_in_range64(arfs_entry->time_activated +
+			       ICE_ARFS_TIME_DELTA_EXPIRATION,
+			       arfs_entry->time_activated, get_jiffies_64());
+}
+
+/**
+ * ice_arfs_update_flow_rules - add/delete aRFS rules in HW
+ * @vsi: the VSI to be forwarded to
+ * @idx: index into the table of aRFS filter lists. Obtained from skb->hash
+ * @add_list: list to populate with filters to be added to Flow Director
+ * @del_list: list to populate with filters to be deleted from Flow Director
+ *
+ * Iterate over the hlist at the index given in the aRFS hash table and
+ * determine if there are any aRFS entries that need to be either added or
+ * deleted in the HW. If the aRFS entry is marked as ICE_ARFS_INACTIVE the
+ * filter needs to be added to HW, else if it's marked as ICE_ARFS_ACTIVE and
+ * the flow has expired delete the filter from HW. The caller of this function
+ * is expected to add/delete rules on the add_list/del_list respectively.
+ */
+static void
+ice_arfs_update_flow_rules(struct ice_vsi *vsi, u16 idx,
+			   struct hlist_head *add_list,
+			   struct hlist_head *del_list)
+{
+	struct ice_arfs_entry *e;
+	struct hlist_node *n;
+	struct device *dev;
+
+	dev = ice_pf_to_dev(vsi->back);
+
+	/* go through the aRFS hlist at this idx and check for needed updates */
+	hlist_for_each_entry_safe(e, n, &vsi->arfs_fltr_list[idx], list_entry)
+		/* check if filter needs to be added to HW */
+		if (e->fltr_state == ICE_ARFS_INACTIVE) {
+			enum ice_fltr_ptype flow_type = e->fltr_info.flow_type;
+			struct ice_arfs_entry_ptr *ep =
+				devm_kzalloc(dev, sizeof(*ep), GFP_ATOMIC);
+
+			if (!ep)
+				continue;
+			INIT_HLIST_NODE(&ep->list_entry);
+			/* reference aRFS entry to add HW filter */
+			ep->arfs_entry = e;
+			hlist_add_head(&ep->list_entry, add_list);
+			e->fltr_state = ICE_ARFS_ACTIVE;
+			/* expiration timer only used for UDP flows */
+			if (flow_type == ICE_FLTR_PTYPE_NONF_IPV4_UDP ||
+			    flow_type == ICE_FLTR_PTYPE_NONF_IPV6_UDP)
+				e->time_activated = get_jiffies_64();
+		} else if (e->fltr_state == ICE_ARFS_ACTIVE) {
+			/* check if filter needs to be removed from HW */
+			if (ice_arfs_is_flow_expired(vsi, e)) {
+				/* remove aRFS entry from hash table for delete
+				 * and to prevent referencing it the next time
+				 * through this hlist index
+				 */
+				hlist_del(&e->list_entry);
+				e->fltr_state = ICE_ARFS_TODEL;
+				/* save reference to aRFS entry for delete */
+				hlist_add_head(&e->list_entry, del_list);
+			}
+		}
+}
+
+/**
+ * ice_sync_arfs_fltrs - update all aRFS filters
+ * @pf: board private structure
+ */
+void ice_sync_arfs_fltrs(struct ice_pf *pf)
+{
+	HLIST_HEAD(tmp_del_list);
+	HLIST_HEAD(tmp_add_list);
+	struct ice_vsi *pf_vsi;
+	unsigned int i;
+
+	pf_vsi = ice_get_main_vsi(pf);
+	if (!pf_vsi)
+		return;
+
+	if (!ice_is_arfs_active(pf_vsi))
+		return;
+
+	spin_lock_bh(&pf_vsi->arfs_lock);
+	/* Once we process aRFS for the PF VSI get out */
+	for (i = 0; i < ICE_MAX_ARFS_LIST; i++)
+		ice_arfs_update_flow_rules(pf_vsi, i, &tmp_add_list,
+					   &tmp_del_list);
+	spin_unlock_bh(&pf_vsi->arfs_lock);
+
+	/* use list of ice_arfs_entry(s) for delete */
+	ice_arfs_del_flow_rules(pf_vsi, &tmp_del_list);
+
+	/* use list of ice_arfs_entry_ptr(s) for add */
+	ice_arfs_add_flow_rules(pf_vsi, &tmp_add_list);
+}
+
+/**
+ * ice_arfs_build_entry - builds an aRFS entry based on input
+ * @vsi: destination VSI for this flow
+ * @fk: flow dissector keys for creating the tuple
+ * @rxq_idx: Rx queue to steer this flow to
+ * @flow_id: passed down from the stack and saved for flow expiration
+ *
+ * returns an aRFS entry on success and NULL on failure
+ */
+static struct ice_arfs_entry *
+ice_arfs_build_entry(struct ice_vsi *vsi, const struct flow_keys *fk,
+		     u16 rxq_idx, u32 flow_id)
+{
+	struct ice_arfs_entry *arfs_entry;
+	struct ice_fdir_fltr *fltr_info;
+	u8 ip_proto;
+
+	arfs_entry = devm_kzalloc(ice_pf_to_dev(vsi->back),
+				  sizeof(*arfs_entry),
+				  GFP_ATOMIC | __GFP_NOWARN);
+	if (!arfs_entry)
+		return NULL;
+
+	fltr_info = &arfs_entry->fltr_info;
+	fltr_info->q_index = rxq_idx;
+	fltr_info->dest_ctl = ICE_FLTR_PRGM_DESC_DEST_DIRECT_PKT_QINDEX;
+	fltr_info->dest_vsi = vsi->idx;
+	ip_proto = fk->basic.ip_proto;
+
+	if (fk->basic.n_proto == htons(ETH_P_IP)) {
+		fltr_info->ip.v4.proto = ip_proto;
+		fltr_info->flow_type = (ip_proto == IPPROTO_TCP) ?
+			ICE_FLTR_PTYPE_NONF_IPV4_TCP :
+			ICE_FLTR_PTYPE_NONF_IPV4_UDP;
+		fltr_info->ip.v4.src_ip = fk->addrs.v4addrs.src;
+		fltr_info->ip.v4.dst_ip = fk->addrs.v4addrs.dst;
+		fltr_info->ip.v4.src_port = fk->ports.src;
+		fltr_info->ip.v4.dst_port = fk->ports.dst;
+	} else { /* ETH_P_IPV6 */
+		fltr_info->ip.v6.proto = ip_proto;
+		fltr_info->flow_type = (ip_proto == IPPROTO_TCP) ?
+			ICE_FLTR_PTYPE_NONF_IPV6_TCP :
+			ICE_FLTR_PTYPE_NONF_IPV6_UDP;
+		memcpy(&fltr_info->ip.v6.src_ip, &fk->addrs.v6addrs.src,
+		       sizeof(struct in6_addr));
+		memcpy(&fltr_info->ip.v6.dst_ip, &fk->addrs.v6addrs.dst,
+		       sizeof(struct in6_addr));
+		fltr_info->ip.v6.src_port = fk->ports.src;
+		fltr_info->ip.v6.dst_port = fk->ports.dst;
+	}
+
+	arfs_entry->flow_id = flow_id;
+	fltr_info->fltr_id =
+		atomic_inc_return(vsi->arfs_last_fltr_id) % RPS_NO_FILTER;
+
+	return arfs_entry;
+}
+
+/**
+ * ice_arfs_is_perfect_flow_set - Check to see if perfect flow is set
+ * @hw: pointer to HW structure
+ * @l3_proto: ETH_P_IP or ETH_P_IPV6 in network order
+ * @l4_proto: IPPROTO_UDP or IPPROTO_TCP
+ *
+ * We only support perfect (4-tuple) filters for aRFS. This function allows aRFS
+ * to check if perfect (4-tuple) flow rules are currently in place by Flow
+ * Director.
+ */
+static bool
+ice_arfs_is_perfect_flow_set(struct ice_hw *hw, __be16 l3_proto, u8 l4_proto)
+{
+	unsigned long *perfect_fltr = hw->fdir_perfect_fltr;
+
+	/* advanced Flow Director disabled, perfect filters always supported */
+	if (!perfect_fltr)
+		return true;
+
+	if (l3_proto == htons(ETH_P_IP) && l4_proto == IPPROTO_UDP)
+		return test_bit(ICE_FLTR_PTYPE_NONF_IPV4_UDP, perfect_fltr);
+	else if (l3_proto == htons(ETH_P_IP) && l4_proto == IPPROTO_TCP)
+		return test_bit(ICE_FLTR_PTYPE_NONF_IPV4_TCP, perfect_fltr);
+	else if (l3_proto == htons(ETH_P_IPV6) && l4_proto == IPPROTO_UDP)
+		return test_bit(ICE_FLTR_PTYPE_NONF_IPV6_UDP, perfect_fltr);
+	else if (l3_proto == htons(ETH_P_IPV6) && l4_proto == IPPROTO_TCP)
+		return test_bit(ICE_FLTR_PTYPE_NONF_IPV6_TCP, perfect_fltr);
+
+	return false;
+}
+
+/**
+ * ice_rx_flow_steer - steer the Rx flow to where application is being run
+ * @netdev: ptr to the netdev being adjusted
+ * @skb: buffer with required header information
+ * @rxq_idx: queue to which the flow needs to move
+ * @flow_id: flow identifier provided by the netdev
+ *
+ * Based on the skb, rxq_idx, and flow_id passed in add/update an entry in the
+ * aRFS hash table. Iterate over one of the hlists in the aRFS hash table and
+ * if the flow_id already exists in the hash table but the rxq_idx has changed
+ * mark the entry as ICE_ARFS_INACTIVE so it can get updated in HW, else
+ * if the entry is marked as ICE_ARFS_TODEL delete it from the aRFS hash table.
+ * If neither of the previous conditions are true then add a new entry in the
+ * aRFS hash table, which gets set to ICE_ARFS_INACTIVE by default so it can be
+ * added to HW.
+ */
+int
+ice_rx_flow_steer(struct net_device *netdev, const struct sk_buff *skb,
+		  u16 rxq_idx, u32 flow_id)
+{
+	struct ice_netdev_priv *np = netdev_priv(netdev);
+	struct ice_arfs_entry *arfs_entry;
+	struct ice_vsi *vsi = np->vsi;
+	struct flow_keys fk;
+	struct ice_pf *pf;
+	__be16 n_proto;
+	u8 ip_proto;
+	u16 idx;
+	int ret;
+
+	/* failed to allocate memory for aRFS so don't crash */
+	if (unlikely(!vsi->arfs_fltr_list))
+		return -ENODEV;
+
+	pf = vsi->back;
+
+	if (skb->encapsulation)
+		return -EPROTONOSUPPORT;
+
+	if (!skb_flow_dissect_flow_keys(skb, &fk, 0))
+		return -EPROTONOSUPPORT;
+
+	n_proto = fk.basic.n_proto;
+	/* Support only IPV4 and IPV6 */
+	if ((n_proto == htons(ETH_P_IP) && !ip_is_fragment(ip_hdr(skb))) ||
+	    n_proto == htons(ETH_P_IPV6))
+		ip_proto = fk.basic.ip_proto;
+	else
+		return -EPROTONOSUPPORT;
+
+	/* Support only TCP and UDP */
+	if (ip_proto != IPPROTO_TCP && ip_proto != IPPROTO_UDP)
+		return -EPROTONOSUPPORT;
+
+	/* only support 4-tuple filters for aRFS */
+	if (!ice_arfs_is_perfect_flow_set(&pf->hw, n_proto, ip_proto))
+		return -EOPNOTSUPP;
+
+	/* choose the aRFS list bucket based on skb hash */
+	idx = skb_get_hash_raw(skb) & ICE_ARFS_LST_MASK;
+	/* search for entry in the bucket */
+	spin_lock_bh(&vsi->arfs_lock);
+	hlist_for_each_entry(arfs_entry, &vsi->arfs_fltr_list[idx],
+			     list_entry) {
+		struct ice_fdir_fltr *fltr_info;
+
+		/* keep searching for the already existing arfs_entry flow */
+		if (arfs_entry->flow_id != flow_id)
+			continue;
+
+		fltr_info = &arfs_entry->fltr_info;
+		ret = fltr_info->fltr_id;
+
+		if (fltr_info->q_index == rxq_idx ||
+		    arfs_entry->fltr_state != ICE_ARFS_ACTIVE)
+			goto out;
+
+		/* update the queue to forward to on an already existing flow */
+		fltr_info->q_index = rxq_idx;
+		arfs_entry->fltr_state = ICE_ARFS_INACTIVE;
+		ice_arfs_update_active_fltr_cntrs(vsi, arfs_entry, false);
+		goto out_schedule_service_task;
+	}
+
+	arfs_entry = ice_arfs_build_entry(vsi, &fk, rxq_idx, flow_id);
+	if (!arfs_entry) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	ret = arfs_entry->fltr_info.fltr_id;
+	INIT_HLIST_NODE(&arfs_entry->list_entry);
+	hlist_add_head(&arfs_entry->list_entry, &vsi->arfs_fltr_list[idx]);
+out_schedule_service_task:
+	ice_service_task_schedule(pf);
+out:
+	spin_unlock_bh(&vsi->arfs_lock);
+	return ret;
+}
+
+/**
+ * ice_init_arfs_cntrs - initialize aRFS counter values
+ * @vsi: VSI that aRFS counters need to be initialized on
+ */
+static int ice_init_arfs_cntrs(struct ice_vsi *vsi)
+{
+	if (!vsi || vsi->type != ICE_VSI_PF)
+		return -EINVAL;
+
+	vsi->arfs_fltr_cntrs = kzalloc(sizeof(*vsi->arfs_fltr_cntrs),
+				       GFP_KERNEL);
+	if (!vsi->arfs_fltr_cntrs)
+		return -ENOMEM;
+
+	vsi->arfs_last_fltr_id = kzalloc(sizeof(*vsi->arfs_last_fltr_id),
+					 GFP_KERNEL);
+	if (!vsi->arfs_last_fltr_id) {
+		kfree(vsi->arfs_fltr_cntrs);
+		vsi->arfs_fltr_cntrs = NULL;
+		return -ENOMEM;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_init_arfs - initialize aRFS resources
+ * @vsi: the VSI to be forwarded to
+ */
+void ice_init_arfs(struct ice_vsi *vsi)
+{
+	struct hlist_head *arfs_fltr_list;
+	unsigned int i;
+
+	if (!vsi || vsi->type != ICE_VSI_PF)
+		return;
+
+	arfs_fltr_list = kzalloc(sizeof(*arfs_fltr_list) * ICE_MAX_ARFS_LIST,
+				 GFP_KERNEL);
+	if (!arfs_fltr_list)
+		return;
+
+	if (ice_init_arfs_cntrs(vsi))
+		goto free_arfs_fltr_list;
+
+	for (i = 0; i < ICE_MAX_ARFS_LIST; i++)
+		INIT_HLIST_HEAD(&arfs_fltr_list[i]);
+
+	spin_lock_init(&vsi->arfs_lock);
+
+	vsi->arfs_fltr_list = arfs_fltr_list;
+
+	return;
+
+free_arfs_fltr_list:
+	kfree(arfs_fltr_list);
+}
+
+/**
+ * ice_clear_arfs - clear the aRFS hash table and any memory used for aRFS
+ * @vsi: the VSI to be forwarded to
+ */
+void ice_clear_arfs(struct ice_vsi *vsi)
+{
+	struct device *dev;
+	unsigned int i;
+
+	if (!vsi || vsi->type != ICE_VSI_PF || !vsi->back ||
+	    !vsi->arfs_fltr_list)
+		return;
+
+	dev = ice_pf_to_dev(vsi->back);
+	for (i = 0; i < ICE_MAX_ARFS_LIST; i++) {
+		struct ice_arfs_entry *r;
+		struct hlist_node *n;
+
+		spin_lock_bh(&vsi->arfs_lock);
+		hlist_for_each_entry_safe(r, n, &vsi->arfs_fltr_list[i],
+					  list_entry) {
+			hlist_del(&r->list_entry);
+			devm_kfree(dev, r);
+		}
+		spin_unlock_bh(&vsi->arfs_lock);
+	}
+
+	kfree(vsi->arfs_fltr_list);
+	vsi->arfs_fltr_list = NULL;
+	kfree(vsi->arfs_last_fltr_id);
+	vsi->arfs_last_fltr_id = NULL;
+	kfree(vsi->arfs_fltr_cntrs);
+	vsi->arfs_fltr_cntrs = NULL;
+}
+
+/**
+ * ice_free_cpu_rx_rmap - free setup CPU reverse map
+ * @vsi: the VSI to be forwarded to
+ */
+void ice_free_cpu_rx_rmap(struct ice_vsi *vsi)
+{
+	struct net_device *netdev;
+
+	if (!vsi || vsi->type != ICE_VSI_PF || !vsi->arfs_fltr_list)
+		return;
+
+	netdev = vsi->netdev;
+	if (!netdev || !netdev->rx_cpu_rmap ||
+	    netdev->reg_state != NETREG_REGISTERED)
+		return;
+
+	free_irq_cpu_rmap(netdev->rx_cpu_rmap);
+	netdev->rx_cpu_rmap = NULL;
+}
+
+/**
+ * ice_set_cpu_rx_rmap - setup CPU reverse map for each queue
+ * @vsi: the VSI to be forwarded to
+ */
+int ice_set_cpu_rx_rmap(struct ice_vsi *vsi)
+{
+	struct net_device *netdev;
+	struct ice_pf *pf;
+	int base_idx, i;
+
+	if (!vsi || vsi->type != ICE_VSI_PF)
+		return -EINVAL;
+
+	pf = vsi->back;
+	netdev = vsi->netdev;
+	if (!pf || !netdev || !vsi->num_q_vectors ||
+	    vsi->netdev->reg_state != NETREG_REGISTERED)
+		return -EINVAL;
+
+	netdev_dbg(netdev, "Setup CPU RMAP: vsi type 0x%x, ifname %s, q_vectors %d\n",
+		   vsi->type, netdev->name, vsi->num_q_vectors);
+
+	netdev->rx_cpu_rmap = alloc_irq_cpu_rmap(vsi->num_q_vectors);
+	if (unlikely(!netdev->rx_cpu_rmap))
+		return -EINVAL;
+
+	base_idx = vsi->base_vector;
+	for (i = 0; i < vsi->num_q_vectors; i++)
+		if (irq_cpu_rmap_add(netdev->rx_cpu_rmap,
+				     pf->msix_entries[base_idx + i].vector)) {
+			ice_free_cpu_rx_rmap(vsi);
+			return -EINVAL;
+		}
+
+	return 0;
+}
+
+/**
+ * ice_remove_arfs - remove/clear all aRFS resources
+ * @pf: device private structure
+ */
+void ice_remove_arfs(struct ice_pf *pf)
+{
+	struct ice_vsi *pf_vsi;
+
+	pf_vsi = ice_get_main_vsi(pf);
+	if (!pf_vsi)
+		return;
+
+	ice_free_cpu_rx_rmap(pf_vsi);
+	ice_clear_arfs(pf_vsi);
+}
+
+/**
+ * ice_rebuild_arfs - remove/clear all aRFS resources and rebuild after reset
+ * @pf: device private structure
+ */
+void ice_rebuild_arfs(struct ice_pf *pf)
+{
+	struct ice_vsi *pf_vsi;
+
+	pf_vsi = ice_get_main_vsi(pf);
+	if (!pf_vsi)
+		return;
+
+	ice_remove_arfs(pf);
+	if (ice_set_cpu_rx_rmap(pf_vsi)) {
+		dev_err(ice_pf_to_dev(pf), "Failed to rebuild aRFS\n");
+		return;
+	}
+	ice_init_arfs(pf_vsi);
+}
diff --git a/drivers/net/ethernet/intel/ice/ice_arfs.h b/drivers/net/ethernet/intel/ice/ice_arfs.h
new file mode 100644
index 000000000000..f39cd16403ed
--- /dev/null
+++ b/drivers/net/ethernet/intel/ice/ice_arfs.h
@@ -0,0 +1,82 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/* Copyright (C) 2018-2020, Intel Corporation. */
+
+#ifndef _ICE_ARFS_H_
+#define _ICE_ARFS_H_
+enum ice_arfs_fltr_state {
+	ICE_ARFS_INACTIVE,
+	ICE_ARFS_ACTIVE,
+	ICE_ARFS_TODEL,
+};
+
+struct ice_arfs_entry {
+	struct ice_fdir_fltr fltr_info;
+	struct hlist_node list_entry;
+	u64 time_activated;	/* only valid for UDP flows */
+	u32 flow_id;
+	/* fltr_state = 0 - ICE_ARFS_INACTIVE:
+	 *	filter needs to be updated or programmed in HW.
+	 * fltr_state = 1 - ICE_ARFS_ACTIVE:
+	 *	filter is active and programmed in HW.
+	 * fltr_state = 2 - ICE_ARFS_TODEL:
+	 *	filter has been deleted from HW and needs to be removed from
+	 *	the aRFS hash table.
+	 */
+	u8 fltr_state;
+};
+
+struct ice_arfs_entry_ptr {
+	struct ice_arfs_entry *arfs_entry;
+	struct hlist_node list_entry;
+};
+
+struct ice_arfs_active_fltr_cntrs {
+	atomic_t active_tcpv4_cnt;
+	atomic_t active_tcpv6_cnt;
+	atomic_t active_udpv4_cnt;
+	atomic_t active_udpv6_cnt;
+};
+
+#ifdef CONFIG_RFS_ACCEL
+int
+ice_rx_flow_steer(struct net_device *netdev, const struct sk_buff *skb,
+		  u16 rxq_idx, u32 flow_id);
+void ice_clear_arfs(struct ice_vsi *vsi);
+void ice_free_cpu_rx_rmap(struct ice_vsi *vsi);
+void ice_init_arfs(struct ice_vsi *vsi);
+void ice_sync_arfs_fltrs(struct ice_pf *pf);
+int ice_set_cpu_rx_rmap(struct ice_vsi *vsi);
+void ice_remove_arfs(struct ice_pf *pf);
+void ice_rebuild_arfs(struct ice_pf *pf);
+bool
+ice_is_arfs_using_perfect_flow(struct ice_hw *hw,
+			       enum ice_fltr_ptype flow_type);
+#else
+#define ice_sync_arfs_fltrs(pf) do {} while (0)
+#define ice_init_arfs(vsi) do {} while (0)
+#define ice_clear_arfs(vsi) do {} while (0)
+#define ice_remove_arfs(pf) do {} while (0)
+#define ice_free_cpu_rx_rmap(vsi) do {} while (0)
+#define ice_rebuild_arfs(pf) do {} while (0)
+
+static inline int ice_set_cpu_rx_rmap(struct ice_vsi __always_unused *vsi)
+{
+	return 0;
+}
+
+static inline int
+ice_rx_flow_steer(struct net_device __always_unused *netdev,
+		  const struct sk_buff __always_unused *skb,
+		  u16 __always_unused rxq_idx, u32 __always_unused flow_id)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline bool
+ice_is_arfs_using_perfect_flow(struct ice_hw __always_unused *hw,
+			       enum ice_fltr_ptype __always_unused flow_type)
+{
+	return false;
+}
+#endif /* CONFIG_RFS_ACCEL */
+#endif /* _ICE_ARFS_H_ */
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
index a0002032be61..42803fc0ed18 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
@@ -566,6 +566,12 @@ ice_fdir_set_hw_fltr_rule(struct ice_pf *pf, struct ice_flow_seg_info *seg,
 			return -EINVAL;
 		}
 
+		if (ice_is_arfs_using_perfect_flow(hw, flow)) {
+			dev_err(dev, "aRFS using perfect flow type %d, cannot change input set\n",
+				flow);
+			return -EINVAL;
+		}
+
 		/* remove HW filter definition */
 		ice_fdir_rem_flow(hw, ICE_BLK_FD, flow);
 	}
@@ -1176,7 +1182,7 @@ err_exit:
  *
  * returns 0 on success and negative value on error
  */
-static int
+int
 ice_fdir_write_fltr(struct ice_pf *pf, struct ice_fdir_fltr *input, bool add,
 		    bool is_tun)
 {
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index ff77fc3f633e..f81bd4c30bbc 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -2175,6 +2175,7 @@ ice_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi,
 				ice_vsi_cfg_rss_lut_key(vsi);
 				ice_vsi_set_rss_flow_fld(vsi);
 			}
+		ice_init_arfs(vsi);
 		break;
 	case ICE_VSI_VF:
 		/* VF driver will take care of creating netdev for this type and
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index d06a3311a2dc..c69567210584 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -1113,7 +1113,7 @@ static void ice_clean_mailboxq_subtask(struct ice_pf *pf)
  *
  * If not already scheduled, this puts the task into the work queue.
  */
-static void ice_service_task_schedule(struct ice_pf *pf)
+void ice_service_task_schedule(struct ice_pf *pf)
 {
 	if (!test_bit(__ICE_SERVICE_DIS, pf->state) &&
 	    !test_and_set_bit(__ICE_SERVICE_SCHED, pf->state) &&
@@ -1483,7 +1483,7 @@ static void ice_service_task(struct work_struct *work)
 
 	ice_process_vflr_event(pf);
 	ice_clean_mailboxq_subtask(pf);
-
+	ice_sync_arfs_fltrs(pf);
 	/* Clear __ICE_SERVICE_SCHED flag to allow scheduling next event */
 	ice_service_task_complete(pf);
 
@@ -1642,9 +1642,14 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename)
 		}
 
 		/* register for affinity change notifications */
-		q_vector->affinity_notify.notify = ice_irq_affinity_notify;
-		q_vector->affinity_notify.release = ice_irq_affinity_release;
-		irq_set_affinity_notifier(irq_num, &q_vector->affinity_notify);
+		if (!IS_ENABLED(CONFIG_RFS_ACCEL)) {
+			struct irq_affinity_notify *affinity_notify;
+
+			affinity_notify = &q_vector->affinity_notify;
+			affinity_notify->notify = ice_irq_affinity_notify;
+			affinity_notify->release = ice_irq_affinity_release;
+			irq_set_affinity_notifier(irq_num, affinity_notify);
+		}
 
 		/* assign the mask for this irq */
 		irq_set_affinity_hint(irq_num, &q_vector->affinity_mask);
@@ -1656,8 +1661,9 @@ static int ice_vsi_req_irq_msix(struct ice_vsi *vsi, char *basename)
 free_q_irqs:
 	while (vector) {
 		vector--;
-		irq_num = pf->msix_entries[base + vector].vector,
-		irq_set_affinity_notifier(irq_num, NULL);
+		irq_num = pf->msix_entries[base + vector].vector;
+		if (!IS_ENABLED(CONFIG_RFS_ACCEL))
+			irq_set_affinity_notifier(irq_num, NULL);
 		irq_set_affinity_hint(irq_num, NULL);
 		devm_free_irq(dev, irq_num, &vsi->q_vectors[vector]);
 	}
@@ -2611,12 +2617,22 @@ static int ice_setup_pf_sw(struct ice_pf *pf)
 	 */
 	ice_napi_add(vsi);
 
+	status = ice_set_cpu_rx_rmap(vsi);
+	if (status) {
+		dev_err(ice_pf_to_dev(pf), "Failed to set CPU Rx map VSI %d error %d\n",
+			vsi->vsi_num, status);
+		status = -EINVAL;
+		goto unroll_napi_add;
+	}
 	status = ice_init_mac_fltr(pf);
 	if (status)
-		goto unroll_napi_add;
+		goto free_cpu_rx_map;
 
 	return status;
 
+free_cpu_rx_map:
+	ice_free_cpu_rx_rmap(vsi);
+
 unroll_napi_add:
 	if (vsi) {
 		ice_napi_del(vsi);
@@ -3519,6 +3535,8 @@ static void ice_remove(struct pci_dev *pdev)
 	ice_service_task_stop(pf);
 
 	mutex_destroy(&(&pf->hw)->fdir_fltr_lock);
+	if (!ice_is_safe_mode(pf))
+		ice_remove_arfs(pf);
 	ice_devlink_destroy_port(pf);
 	ice_vsi_release_all(pf);
 	ice_free_irq_msix_misc(pf);
@@ -4036,11 +4054,14 @@ ice_set_features(struct net_device *netdev, netdev_features_t features)
 		ret = ice_cfg_vlan_pruning(vsi, false, false);
 
 	if ((features & NETIF_F_NTUPLE) &&
-	    !(netdev->features & NETIF_F_NTUPLE))
+	    !(netdev->features & NETIF_F_NTUPLE)) {
 		ice_vsi_manage_fdir(vsi, true);
-	else if (!(features & NETIF_F_NTUPLE) &&
-		 (netdev->features & NETIF_F_NTUPLE))
+		ice_init_arfs(vsi);
+	} else if (!(features & NETIF_F_NTUPLE) &&
+		 (netdev->features & NETIF_F_NTUPLE)) {
 		ice_vsi_manage_fdir(vsi, false);
+		ice_clear_arfs(vsi);
+	}
 
 	return ret;
 }
@@ -4942,6 +4963,8 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
 
 		/* replay Flow Director filters */
 		ice_fdir_replay_fltrs(pf);
+
+		ice_rebuild_arfs(pf);
 	}
 
 	ice_update_pf_netdev_link(pf);
@@ -5721,6 +5744,9 @@ static const struct net_device_ops ice_netdev_ops = {
 	.ndo_bridge_setlink = ice_bridge_setlink,
 	.ndo_fdb_add = ice_fdb_add,
 	.ndo_fdb_del = ice_fdb_del,
+#ifdef CONFIG_RFS_ACCEL
+	.ndo_rx_flow_steer = ice_rx_flow_steer,
+#endif
 	.ndo_tx_timeout = ice_tx_timeout,
 	.ndo_bpf = ice_xdp,
 	.ndo_xdp_xmit = ice_xdp_xmit,
-- 
cgit v1.2.3-59-g8ed1b


From 4244910568ed2fed3532d86aa7fc910d2583fa41 Mon Sep 17 00:00:00 2001
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Date: Fri, 15 May 2020 17:36:30 -0700
Subject: ice: Call ice_aq_set_mac_cfg

As per the specification, the driver needs to call set_mac_cfg
(opcode 0x0603) to be able to exercise jumbo frames. Call the
function during initialization and the post reset rebuild flow.

Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_adminq_cmd.h | 21 ++++++++
 drivers/net/ethernet/intel/ice/ice_common.c     | 69 +++++++++++++++++++++++++
 drivers/net/ethernet/intel/ice/ice_common.h     |  2 +
 drivers/net/ethernet/intel/ice/ice_hw_autogen.h |  5 ++
 drivers/net/ethernet/intel/ice/ice_main.c       |  6 +++
 5 files changed, 103 insertions(+)

diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index deada2e3d7c0..f80fb6570f8f 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -1068,6 +1068,25 @@ struct ice_aqc_set_phy_cfg_data {
 	u8 rsvd1;
 };
 
+/* Set MAC Config command data structure (direct 0x0603) */
+struct ice_aqc_set_mac_cfg {
+	__le16 max_frame_size;
+	u8 params;
+#define ICE_AQ_SET_MAC_PACE_S		3
+#define ICE_AQ_SET_MAC_PACE_M		(0xF << ICE_AQ_SET_MAC_PACE_S)
+#define ICE_AQ_SET_MAC_PACE_TYPE_M	BIT(7)
+#define ICE_AQ_SET_MAC_PACE_TYPE_RATE	0
+#define ICE_AQ_SET_MAC_PACE_TYPE_FIXED	ICE_AQ_SET_MAC_PACE_TYPE_M
+	u8 tx_tmr_priority;
+	__le16 tx_tmr_value;
+	__le16 fc_refresh_threshold;
+	u8 drop_opts;
+#define ICE_AQ_SET_MAC_AUTO_DROP_MASK		BIT(0)
+#define ICE_AQ_SET_MAC_AUTO_DROP_NONE		0
+#define ICE_AQ_SET_MAC_AUTO_DROP_BLOCKING_PKTS	BIT(0)
+	u8 reserved[7];
+};
+
 /* Restart AN command data structure (direct 0x0605)
  * Also used for response, with only the lport_num field present.
  */
@@ -1774,6 +1793,7 @@ struct ice_aq_desc {
 		struct ice_aqc_download_pkg download_pkg;
 		struct ice_aqc_set_mac_lb set_mac_lb;
 		struct ice_aqc_alloc_free_res_cmd sw_res_ctrl;
+		struct ice_aqc_set_mac_cfg set_mac_cfg;
 		struct ice_aqc_set_event_mask set_event_mask;
 		struct ice_aqc_get_link_status get_link_status;
 		struct ice_aqc_event_lan_overflow lan_overflow;
@@ -1870,6 +1890,7 @@ enum ice_adminq_opc {
 	/* PHY commands */
 	ice_aqc_opc_get_phy_caps			= 0x0600,
 	ice_aqc_opc_set_phy_cfg				= 0x0601,
+	ice_aqc_opc_set_mac_cfg				= 0x0603,
 	ice_aqc_opc_restart_an				= 0x0605,
 	ice_aqc_opc_get_link_status			= 0x0607,
 	ice_aqc_opc_set_event_mask			= 0x0613,
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 3a4c14150107..0a0b00fffaf7 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -315,6 +315,71 @@ ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse,
 	return 0;
 }
 
+/**
+ * ice_fill_tx_timer_and_fc_thresh
+ * @hw: pointer to the HW struct
+ * @cmd: pointer to MAC cfg structure
+ *
+ * Add Tx timer and FC refresh threshold info to Set MAC Config AQ command
+ * descriptor
+ */
+static void
+ice_fill_tx_timer_and_fc_thresh(struct ice_hw *hw,
+				struct ice_aqc_set_mac_cfg *cmd)
+{
+	u16 fc_thres_val, tx_timer_val;
+	u32 val;
+
+	/* We read back the transmit timer and FC threshold value of
+	 * LFC. Thus, we will use index =
+	 * PRTMAC_HSEC_CTL_TX_PAUSE_QUANTA_MAX_INDEX.
+	 *
+	 * Also, because we are operating on transmit timer and FC
+	 * threshold of LFC, we don't turn on any bit in tx_tmr_priority
+	 */
+#define IDX_OF_LFC PRTMAC_HSEC_CTL_TX_PAUSE_QUANTA_MAX_INDEX
+
+	/* Retrieve the transmit timer */
+	val = rd32(hw, PRTMAC_HSEC_CTL_TX_PAUSE_QUANTA(IDX_OF_LFC));
+	tx_timer_val = val &
+		PRTMAC_HSEC_CTL_TX_PAUSE_QUANTA_HSEC_CTL_TX_PAUSE_QUANTA_M;
+	cmd->tx_tmr_value = cpu_to_le16(tx_timer_val);
+
+	/* Retrieve the FC threshold */
+	val = rd32(hw, PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER(IDX_OF_LFC));
+	fc_thres_val = val & PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER_M;
+
+	cmd->fc_refresh_threshold = cpu_to_le16(fc_thres_val);
+}
+
+/**
+ * ice_aq_set_mac_cfg
+ * @hw: pointer to the HW struct
+ * @max_frame_size: Maximum Frame Size to be supported
+ * @cd: pointer to command details structure or NULL
+ *
+ * Set MAC configuration (0x0603)
+ */
+enum ice_status
+ice_aq_set_mac_cfg(struct ice_hw *hw, u16 max_frame_size, struct ice_sq_cd *cd)
+{
+	struct ice_aqc_set_mac_cfg *cmd;
+	struct ice_aq_desc desc;
+
+	cmd = &desc.params.set_mac_cfg;
+
+	if (max_frame_size == 0)
+		return ICE_ERR_PARAM;
+
+	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_set_mac_cfg);
+
+	cmd->max_frame_size = cpu_to_le16(max_frame_size);
+
+	ice_fill_tx_timer_and_fc_thresh(hw, cmd);
+
+	return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
+}
+
 /**
  * ice_init_fltr_mgmt_struct - initializes filter management list and locks
  * @hw: pointer to the HW struct
@@ -745,6 +810,10 @@ enum ice_status ice_init_hw(struct ice_hw *hw)
 	status = ice_aq_manage_mac_read(hw, mac_buf, mac_buf_len, NULL);
 	devm_kfree(ice_hw_to_dev(hw), mac_buf);
 
+	if (status)
+		goto err_unroll_fltr_mgmt_struct;
+	/* enable jumbo frame support at MAC level */
+	status = ice_aq_set_mac_cfg(hw, ICE_AQ_SET_MAC_FRAME_SIZE_MAX, NULL);
 	if (status)
 		goto err_unroll_fltr_mgmt_struct;
 	/* Obtain counter base index which would be used by flow director */
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
index 8104f3d64d96..bea755a658eb 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.h
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -108,6 +108,8 @@ enum ice_status
 ice_aq_set_link_restart_an(struct ice_port_info *pi, bool ena_link,
 			   struct ice_sq_cd *cd);
 enum ice_status
+ice_aq_set_mac_cfg(struct ice_hw *hw, u16 max_frame_size, struct ice_sq_cd *cd);
+enum ice_status
 ice_aq_get_link_info(struct ice_port_info *pi, bool ena_lse,
 		     struct ice_link_status *link, struct ice_sq_cd *cd);
 enum ice_status
diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
index c8b037d25053..1f9b427a35fa 100644
--- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
@@ -219,6 +219,11 @@
 #define VPLAN_TX_QBASE_VFNUMQ_M			ICE_M(0xFF, 16)
 #define VPLAN_TXQ_MAPENA(_VF)			(0x00073800 + ((_VF) * 4))
 #define VPLAN_TXQ_MAPENA_TX_ENA_M		BIT(0)
+#define PRTMAC_HSEC_CTL_TX_PAUSE_QUANTA(_i)	(0x001E36E0 + ((_i) * 32))
+#define PRTMAC_HSEC_CTL_TX_PAUSE_QUANTA_MAX_INDEX 8
+#define PRTMAC_HSEC_CTL_TX_PAUSE_QUANTA_HSEC_CTL_TX_PAUSE_QUANTA_M ICE_M(0xFFFF, 0)
+#define PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER(_i) (0x001E3800 + ((_i) * 32))
+#define PRTMAC_HSEC_CTL_TX_PAUSE_REFRESH_TIMER_M ICE_M(0xFFFF, 0)
 #define GL_MDCK_TX_TDPU				0x00049348
 #define GL_MDCK_TX_TDPU_RCU_ANTISPOOF_ITR_DIS_M BIT(1)
 #define GL_MDET_RX				0x00294C00
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index c69567210584..220f1bfc6376 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -4901,6 +4901,12 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
 		goto err_init_ctrlq;
 	}
 
+	ret = ice_aq_set_mac_cfg(hw, ICE_AQ_SET_MAC_FRAME_SIZE_MAX, NULL);
+	if (ret) {
+		dev_err(dev, "set_mac_cfg failed %s\n", ice_stat_str(ret));
+		goto err_init_ctrlq;
+	}
+
 	err = ice_sched_init_port(hw->port_info);
 	if (err)
 		goto err_sched_init_port;
-- 
cgit v1.2.3-59-g8ed1b


From 7438a3b0947a2ef43c16bfb7b5b19048d6d92b14 Mon Sep 17 00:00:00 2001
From: Paul Greenwalt <paul.greenwalt@intel.com>
Date: Fri, 15 May 2020 17:36:31 -0700
Subject: ice: print Rx MDD auto reset message before VF reset

Rx MDD auto reset message was not being logged because logging occurred
after the VF reset and the VF MDD data was reinitialized.

Log the Rx MDD auto reset message before triggering the VF reset.

Signed-off-by: Paul Greenwalt <paul.greenwalt@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_main.c        |  7 ++++++-
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 25 ++++++++++++++++++------
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h |  2 ++
 3 files changed, 27 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 220f1bfc6376..bac5a0857c8c 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -1322,8 +1322,13 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
 			 * PF can be configured to reset the VF through ethtool
 			 * private flag mdd-auto-reset-vf.
 			 */
-			if (test_bit(ICE_FLAG_MDD_AUTO_RESET_VF, pf->flags))
+			if (test_bit(ICE_FLAG_MDD_AUTO_RESET_VF, pf->flags)) {
+				/* VF MDD event counters will be cleared by
+				 * reset, so print the event prior to reset.
+				 */
+				ice_print_vf_rx_mdd_event(vf);
 				ice_reset_vf(&pf->vf[i], false);
+			}
 		}
 	}
 
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index 62c100d47592..e9c14d460731 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -3756,6 +3756,24 @@ int ice_get_vf_stats(struct net_device *netdev, int vf_id,
 	return 0;
 }
 
+/**
+ * ice_print_vf_rx_mdd_event - print VF Rx malicious driver detect event
+ * @vf: pointer to the VF structure
+ */
+void ice_print_vf_rx_mdd_event(struct ice_vf *vf)
+{
+	struct ice_pf *pf = vf->pf;
+	struct device *dev;
+
+	dev = ice_pf_to_dev(pf);
+
+	dev_info(dev, "%d Rx Malicious Driver Detection events detected on PF %d VF %d MAC %pM. mdd-auto-reset-vfs=%s\n",
+		 vf->mdd_rx_events.count, pf->hw.pf_id, vf->vf_id,
+		 vf->dflt_lan_addr.addr,
+		 test_bit(ICE_FLAG_MDD_AUTO_RESET_VF, pf->flags)
+			  ? "on" : "off");
+}
+
 /**
  * ice_print_vfs_mdd_event - print VFs malicious driver detect event
  * @pf: pointer to the PF structure
@@ -3785,12 +3803,7 @@ void ice_print_vfs_mdd_events(struct ice_pf *pf)
 		if (vf->mdd_rx_events.count != vf->mdd_rx_events.last_printed) {
 			vf->mdd_rx_events.last_printed =
 							vf->mdd_rx_events.count;
-
-			dev_info(dev, "%d Rx Malicious Driver Detection events detected on PF %d VF %d MAC %pM. mdd-auto-reset-vfs=%s\n",
-				 vf->mdd_rx_events.count, hw->pf_id, i,
-				 vf->dflt_lan_addr.addr,
-				 test_bit(ICE_FLAG_MDD_AUTO_RESET_VF, pf->flags)
-					  ? "on" : "off");
+			ice_print_vf_rx_mdd_event(vf);
 		}
 
 		/* only print Tx MDD event message if there are new events */
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
index 474293ff4fe5..0adff89a6749 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
@@ -132,6 +132,7 @@ bool ice_is_any_vf_in_promisc(struct ice_pf *pf);
 void
 ice_vf_lan_overflow_event(struct ice_pf *pf, struct ice_rq_event_info *event);
 void ice_print_vfs_mdd_events(struct ice_pf *pf);
+void ice_print_vf_rx_mdd_event(struct ice_vf *vf);
 #else /* CONFIG_PCI_IOV */
 #define ice_process_vflr_event(pf) do {} while (0)
 #define ice_free_vfs(pf) do {} while (0)
@@ -141,6 +142,7 @@ void ice_print_vfs_mdd_events(struct ice_pf *pf);
 #define ice_set_vf_state_qs_dis(vf) do {} while (0)
 #define ice_vf_lan_overflow_event(pf, event) do {} while (0)
 #define ice_print_vfs_mdd_events(pf) do {} while (0)
+#define ice_print_vf_rx_mdd_event(vf) do {} while (0)
 
 static inline bool
 ice_reset_all_vfs(struct ice_pf __always_unused *pf,
-- 
cgit v1.2.3-59-g8ed1b


From 4dc926d3a59e73b8c4adf51b261f1a1bbd48a989 Mon Sep 17 00:00:00 2001
From: Brett Creeley <brett.creeley@intel.com>
Date: Fri, 15 May 2020 17:36:32 -0700
Subject: ice: Fix Tx timeout when link is toggled on a VF's interface

Currently if the iavf is loaded and a VF link transitions from up to
down to up again a Tx timeout will be triggered. This happens because
Tx/Rx queue interrupts are only enabled when receiving the
VIRTCHNL_OP_CONFIG_MAP_IRQ message, which happens on reset or initial
iavf driver load, but not when bringing link up. This is problematic
because they are disabled on the VIRTCHNL_OP_DISABLE_QUEUES message,
which is part of bringing a VF's link down. However, they are not
enabled on the VIRTCHNL_OP_ENABLE_QUEUES message, which is part of
bringing a VF's link up.

Fix this by re-enabling the VF's Rx and Tx queue interrupts when they
were previously configured. This is done by first checking to make
sure the previous value in QINT_[R|T]QCTL.MSIX_INDX is not 0, which
is used to represent the OICR in the VF's interrupt space. If the
MSIX_INDX is non-zero then enable the interrupt by setting the
QINT_[R|T]CTL.CAUSE_ENA bit to 1.

Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 48 ++++++++++++++++++++++++
 1 file changed, 48 insertions(+)

diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index e9c14d460731..a12fce73efbc 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -2316,6 +2316,52 @@ static bool ice_vc_validate_vqs_bitmaps(struct virtchnl_queue_select *vqs)
 	return true;
 }
 
+/**
+ * ice_vf_ena_txq_interrupt - enable Tx queue interrupt via QINT_TQCTL
+ * @vsi: VSI of the VF to configure
+ * @q_idx: VF queue index used to determine the queue in the PF's space
+ */
+static void ice_vf_ena_txq_interrupt(struct ice_vsi *vsi, u32 q_idx)
+{
+	struct ice_hw *hw = &vsi->back->hw;
+	u32 pfq = vsi->txq_map[q_idx];
+	u32 reg;
+
+	reg = rd32(hw, QINT_TQCTL(pfq));
+
+	/* MSI-X index 0 in the VF's space is always for the OICR, which means
+	 * this is most likely a poll mode VF driver, so don't enable an
+	 * interrupt that was never configured via VIRTCHNL_OP_CONFIG_IRQ_MAP
+	 */
+	if (!(reg & QINT_TQCTL_MSIX_INDX_M))
+		return;
+
+	wr32(hw, QINT_TQCTL(pfq), reg | QINT_TQCTL_CAUSE_ENA_M);
+}
+
+/**
+ * ice_vf_ena_rxq_interrupt - enable Tx queue interrupt via QINT_RQCTL
+ * @vsi: VSI of the VF to configure
+ * @q_idx: VF queue index used to determine the queue in the PF's space
+ */
+static void ice_vf_ena_rxq_interrupt(struct ice_vsi *vsi, u32 q_idx)
+{
+	struct ice_hw *hw = &vsi->back->hw;
+	u32 pfq = vsi->rxq_map[q_idx];
+	u32 reg;
+
+	reg = rd32(hw, QINT_RQCTL(pfq));
+
+	/* MSI-X index 0 in the VF's space is always for the OICR, which means
+	 * this is most likely a poll mode VF driver, so don't enable an
+	 * interrupt that was never configured via VIRTCHNL_OP_CONFIG_IRQ_MAP
+	 */
+	if (!(reg & QINT_RQCTL_MSIX_INDX_M))
+		return;
+
+	wr32(hw, QINT_RQCTL(pfq), reg | QINT_RQCTL_CAUSE_ENA_M);
+}
+
 /**
  * ice_vc_ena_qs_msg
  * @vf: pointer to the VF info
@@ -2376,6 +2422,7 @@ static int ice_vc_ena_qs_msg(struct ice_vf *vf, u8 *msg)
 			goto error_param;
 		}
 
+		ice_vf_ena_rxq_interrupt(vsi, vf_q_id);
 		set_bit(vf_q_id, vf->rxq_ena);
 	}
 
@@ -2391,6 +2438,7 @@ static int ice_vc_ena_qs_msg(struct ice_vf *vf, u8 *msg)
 		if (test_bit(vf_q_id, vf->txq_ena))
 			continue;
 
+		ice_vf_ena_txq_interrupt(vsi, vf_q_id);
 		set_bit(vf_q_id, vf->txq_ena);
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 47ebc7b02485aa0c0e18f96368e6f6958735fcea Mon Sep 17 00:00:00 2001
From: Brett Creeley <brett.creeley@intel.com>
Date: Fri, 15 May 2020 17:36:33 -0700
Subject: ice: Check if unicast MAC exists before setting VF MAC

Currently if a unicast MAC is set via ndo_set_vf_mac, the PF driver will
set the VF's dflt_lan_addr.addr once some basic checks have passed. The
VF is then reset. During reset the PF driver will attempt to program the
VF's MAC from the dflt_lan_addr.addr field. This fails when the MAC
already exists on the PF's switch.

This is causing the VF to be completely disabled until removing/enabling
any VFs via sysfs.

Fix this by checking if the unicast MAC exists before triggering a VF
reset directly in ndo_set_vf_mac. Also, add a check if the unicast MAC
is set to the same value as before and return 0 if that is the case.

Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 43 ++++++++++++++++++++++++
 1 file changed, 43 insertions(+)

diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index a12fce73efbc..95e8bca562e5 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -3640,6 +3640,39 @@ ice_get_vf_cfg(struct net_device *netdev, int vf_id, struct ifla_vf_info *ivi)
 	return 0;
 }
 
+/**
+ * ice_unicast_mac_exists - check if the unicast MAC exists on the PF's switch
+ * @pf: PF used to reference the switch's rules
+ * @umac: unicast MAC to compare against existing switch rules
+ *
+ * Return true on the first/any match, else return false
+ */
+static bool ice_unicast_mac_exists(struct ice_pf *pf, u8 *umac)
+{
+	struct ice_sw_recipe *mac_recipe_list =
+		&pf->hw.switch_info->recp_list[ICE_SW_LKUP_MAC];
+	struct ice_fltr_mgmt_list_entry *list_itr;
+	struct list_head *rule_head;
+	struct mutex *rule_lock; /* protect MAC filter list access */
+
+	rule_head = &mac_recipe_list->filt_rules;
+	rule_lock = &mac_recipe_list->filt_rule_lock;
+
+	mutex_lock(rule_lock);
+	list_for_each_entry(list_itr, rule_head, list_entry) {
+		u8 *existing_mac = &list_itr->fltr_info.l_data.mac.mac_addr[0];
+
+		if (ether_addr_equal(existing_mac, umac)) {
+			mutex_unlock(rule_lock);
+			return true;
+		}
+	}
+
+	mutex_unlock(rule_lock);
+
+	return false;
+}
+
 /**
  * ice_set_vf_mac
  * @netdev: network interface device structure
@@ -3663,10 +3696,20 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
 	}
 
 	vf = &pf->vf[vf_id];
+	/* nothing left to do, unicast MAC already set */
+	if (ether_addr_equal(vf->dflt_lan_addr.addr, mac))
+		return 0;
+
 	ret = ice_check_vf_ready_for_cfg(vf);
 	if (ret)
 		return ret;
 
+	if (ice_unicast_mac_exists(pf, mac)) {
+		netdev_err(netdev, "Unicast MAC %pM already exists on this PF. Preventing setting VF %u unicast MAC address to %pM\n",
+			   mac, vf_id, mac);
+		return -EINVAL;
+	}
+
 	/* copy MAC into dflt_lan_addr and trigger a VF reset. The reset
 	 * flow will use the updated dflt_lan_addr and add a MAC filter
 	 * using ice_add_mac. Also set pf_set_mac to indicate that the PF has
-- 
cgit v1.2.3-59-g8ed1b


From b82729195892f5fc1dd69a8cfb23731d3ef55578 Mon Sep 17 00:00:00 2001
From: Victor Raj <victor.raj@intel.com>
Date: Fri, 15 May 2020 17:36:34 -0700
Subject: ice: check for compatibility between DDP package and firmware

Require the Dynamic Device Personalization (DDP) file to have the same
major version number and the same or older minor number than the firmware
version major and minor, respectively.

Check the OS and NVM package versions before downloading the package.
If the OS package version is not compatible with NVM then return an
appropriate error.

Split the 32-byte segment name into a 28-byte segment name and
a 4-byte Track-ID. Older packages will still work with this change
because no package has a name that will take up more than 28 bytes;
in this case the Track-ID will be 0.

Note that the driver will store the segment name as 32-bytes in the
ice_hw structure, in order to normalize the length of the various
package name strings that it uses.

Also add section ID and structure for the segment metadata section.

Signed-off-by: Victor Raj <victor.raj@intel.com>
Signed-off-by: Dan Nowlin <dan.nowlin@intel.com>
Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_adminq_cmd.h |   4 +-
 drivers/net/ethernet/intel/ice/ice_flex_pipe.c  | 112 ++++++++++++++++++------
 drivers/net/ethernet/intel/ice/ice_flex_type.h  |   8 +-
 drivers/net/ethernet/intel/ice/ice_main.c       |   5 ++
 drivers/net/ethernet/intel/ice/ice_status.h     |   2 +
 drivers/net/ethernet/intel/ice/ice_type.h       |   1 +
 6 files changed, 102 insertions(+), 30 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index f80fb6570f8f..586d69491268 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -1703,10 +1703,12 @@ struct ice_pkg_ver {
 };
 
 #define ICE_PKG_NAME_SIZE	32
+#define ICE_SEG_NAME_SIZE	28
 
 struct ice_aqc_get_pkg_info {
 	struct ice_pkg_ver ver;
-	char name[ICE_PKG_NAME_SIZE];
+	char name[ICE_SEG_NAME_SIZE];
+	__le32 track_id;
 	u8 is_in_nvm;
 	u8 is_active;
 	u8 is_active_at_boot;
diff --git a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
index da82783d1571..4420fc02f7e7 100644
--- a/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
+++ b/drivers/net/ethernet/intel/ice/ice_flex_pipe.c
@@ -864,8 +864,9 @@ ice_find_seg_in_pkg(struct ice_hw *hw, u32 seg_type,
 	u32 i;
 
 	ice_debug(hw, ICE_DBG_PKG, "Package format version: %d.%d.%d.%d\n",
-		  pkg_hdr->format_ver.major, pkg_hdr->format_ver.minor,
-		  pkg_hdr->format_ver.update, pkg_hdr->format_ver.draft);
+		  pkg_hdr->pkg_format_ver.major, pkg_hdr->pkg_format_ver.minor,
+		  pkg_hdr->pkg_format_ver.update,
+		  pkg_hdr->pkg_format_ver.draft);
 
 	/* Search all package segments for the requested segment type */
 	for (i = 0; i < le32_to_cpu(pkg_hdr->seg_count); i++) {
@@ -1035,13 +1036,15 @@ ice_download_pkg(struct ice_hw *hw, struct ice_seg *ice_seg)
 {
 	struct ice_buf_table *ice_buf_tbl;
 
-	ice_debug(hw, ICE_DBG_PKG, "Segment version: %d.%d.%d.%d\n",
-		  ice_seg->hdr.seg_ver.major, ice_seg->hdr.seg_ver.minor,
-		  ice_seg->hdr.seg_ver.update, ice_seg->hdr.seg_ver.draft);
+	ice_debug(hw, ICE_DBG_PKG, "Segment format version: %d.%d.%d.%d\n",
+		  ice_seg->hdr.seg_format_ver.major,
+		  ice_seg->hdr.seg_format_ver.minor,
+		  ice_seg->hdr.seg_format_ver.update,
+		  ice_seg->hdr.seg_format_ver.draft);
 
 	ice_debug(hw, ICE_DBG_PKG, "Seg: type 0x%X, size %d, name %s\n",
 		  le32_to_cpu(ice_seg->hdr.seg_type),
-		  le32_to_cpu(ice_seg->hdr.seg_size), ice_seg->hdr.seg_name);
+		  le32_to_cpu(ice_seg->hdr.seg_size), ice_seg->hdr.seg_id);
 
 	ice_buf_tbl = ice_find_buf_table(ice_seg);
 
@@ -1086,14 +1089,16 @@ ice_init_pkg_info(struct ice_hw *hw, struct ice_pkg_hdr *pkg_hdr)
 
 	seg_hdr = ice_find_seg_in_pkg(hw, SEGMENT_TYPE_ICE, pkg_hdr);
 	if (seg_hdr) {
-		hw->ice_pkg_ver = seg_hdr->seg_ver;
-		memcpy(hw->ice_pkg_name, seg_hdr->seg_name,
+		hw->ice_pkg_ver = seg_hdr->seg_format_ver;
+		memcpy(hw->ice_pkg_name, seg_hdr->seg_id,
 		       sizeof(hw->ice_pkg_name));
 
-		ice_debug(hw, ICE_DBG_PKG, "Ice Pkg: %d.%d.%d.%d, %s\n",
-			  seg_hdr->seg_ver.major, seg_hdr->seg_ver.minor,
-			  seg_hdr->seg_ver.update, seg_hdr->seg_ver.draft,
-			  seg_hdr->seg_name);
+		ice_debug(hw, ICE_DBG_PKG, "Ice Seg: %d.%d.%d.%d, %s\n",
+			  seg_hdr->seg_format_ver.major,
+			  seg_hdr->seg_format_ver.minor,
+			  seg_hdr->seg_format_ver.update,
+			  seg_hdr->seg_format_ver.draft,
+			  seg_hdr->seg_id);
 	} else {
 		ice_debug(hw, ICE_DBG_INIT,
 			  "Did not find ice segment in driver package\n");
@@ -1134,9 +1139,11 @@ static enum ice_status ice_get_pkg_info(struct ice_hw *hw)
 		if (pkg_info->pkg_info[i].is_active) {
 			flags[place++] = 'A';
 			hw->active_pkg_ver = pkg_info->pkg_info[i].ver;
+			hw->active_track_id =
+				le32_to_cpu(pkg_info->pkg_info[i].track_id);
 			memcpy(hw->active_pkg_name,
 			       pkg_info->pkg_info[i].name,
-			       sizeof(hw->active_pkg_name));
+			       sizeof(pkg_info->pkg_info[i].name));
 			hw->active_pkg_in_nvm = pkg_info->pkg_info[i].is_in_nvm;
 		}
 		if (pkg_info->pkg_info[i].is_active_at_boot)
@@ -1176,10 +1183,10 @@ static enum ice_status ice_verify_pkg(struct ice_pkg_hdr *pkg, u32 len)
 	if (len < sizeof(*pkg))
 		return ICE_ERR_BUF_TOO_SHORT;
 
-	if (pkg->format_ver.major != ICE_PKG_FMT_VER_MAJ ||
-	    pkg->format_ver.minor != ICE_PKG_FMT_VER_MNR ||
-	    pkg->format_ver.update != ICE_PKG_FMT_VER_UPD ||
-	    pkg->format_ver.draft != ICE_PKG_FMT_VER_DFT)
+	if (pkg->pkg_format_ver.major != ICE_PKG_FMT_VER_MAJ ||
+	    pkg->pkg_format_ver.minor != ICE_PKG_FMT_VER_MNR ||
+	    pkg->pkg_format_ver.update != ICE_PKG_FMT_VER_UPD ||
+	    pkg->pkg_format_ver.draft != ICE_PKG_FMT_VER_DFT)
 		return ICE_ERR_CFG;
 
 	/* pkg must have at least one segment */
@@ -1260,6 +1267,68 @@ static enum ice_status ice_chk_pkg_version(struct ice_pkg_ver *pkg_ver)
 	return 0;
 }
 
+/**
+ * ice_chk_pkg_compat
+ * @hw: pointer to the hardware structure
+ * @ospkg: pointer to the package hdr
+ * @seg: pointer to the package segment hdr
+ *
+ * This function checks the package version compatibility with driver and NVM
+ */
+static enum ice_status
+ice_chk_pkg_compat(struct ice_hw *hw, struct ice_pkg_hdr *ospkg,
+		   struct ice_seg **seg)
+{
+	struct ice_aqc_get_pkg_info_resp *pkg;
+	enum ice_status status;
+	u16 size;
+	u32 i;
+
+	/* Check package version compatibility */
+	status = ice_chk_pkg_version(&hw->pkg_ver);
+	if (status) {
+		ice_debug(hw, ICE_DBG_INIT, "Package version check failed.\n");
+		return status;
+	}
+
+	/* find ICE segment in given package */
+	*seg = (struct ice_seg *)ice_find_seg_in_pkg(hw, SEGMENT_TYPE_ICE,
+						     ospkg);
+	if (!*seg) {
+		ice_debug(hw, ICE_DBG_INIT, "no ice segment in package.\n");
+		return ICE_ERR_CFG;
+	}
+
+	/* Check if FW is compatible with the OS package */
+	size = struct_size(pkg, pkg_info, ICE_PKG_CNT - 1);
+	pkg = kzalloc(size, GFP_KERNEL);
+	if (!pkg)
+		return ICE_ERR_NO_MEMORY;
+
+	status = ice_aq_get_pkg_info_list(hw, pkg, size, NULL);
+	if (status)
+		goto fw_ddp_compat_free_alloc;
+
+	for (i = 0; i < le32_to_cpu(pkg->count); i++) {
+		/* loop till we find the NVM package */
+		if (!pkg->pkg_info[i].is_in_nvm)
+			continue;
+		if ((*seg)->hdr.seg_format_ver.major !=
+			pkg->pkg_info[i].ver.major ||
+		    (*seg)->hdr.seg_format_ver.minor >
+			pkg->pkg_info[i].ver.minor) {
+			status = ICE_ERR_FW_DDP_MISMATCH;
+			ice_debug(hw, ICE_DBG_INIT,
+				  "OS package is not compatible with NVM.\n");
+		}
+		/* done processing NVM package so break */
+		break;
+	}
+fw_ddp_compat_free_alloc:
+	kfree(pkg);
+	return status;
+}
+
 /**
  * ice_init_pkg - initialize/download package
  * @hw: pointer to the hardware structure
@@ -1310,17 +1379,10 @@ enum ice_status ice_init_pkg(struct ice_hw *hw, u8 *buf, u32 len)
 	/* before downloading the package, check package version for
 	 * compatibility with driver
 	 */
-	status = ice_chk_pkg_version(&hw->pkg_ver);
+	status = ice_chk_pkg_compat(hw, pkg, &seg);
 	if (status)
 		return status;
 
-	/* find segment in given package */
-	seg = (struct ice_seg *)ice_find_seg_in_pkg(hw, SEGMENT_TYPE_ICE, pkg);
-	if (!seg) {
-		ice_debug(hw, ICE_DBG_INIT, "no ice segment in package.\n");
-		return ICE_ERR_CFG;
-	}
-
 	/* initialize package hints and then download package */
 	ice_init_pkg_hints(hw, seg);
 	status = ice_download_pkg(hw, seg);
diff --git a/drivers/net/ethernet/intel/ice/ice_flex_type.h b/drivers/net/ethernet/intel/ice/ice_flex_type.h
index 249fb66fc230..a6f391eac8ff 100644
--- a/drivers/net/ethernet/intel/ice/ice_flex_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_flex_type.h
@@ -20,7 +20,7 @@ struct ice_fv {
 
 /* Package and segment headers and tables */
 struct ice_pkg_hdr {
-	struct ice_pkg_ver format_ver;
+	struct ice_pkg_ver pkg_format_ver;
 	__le32 seg_count;
 	__le32 seg_offset[1];
 };
@@ -30,9 +30,9 @@ struct ice_generic_seg_hdr {
 #define SEGMENT_TYPE_METADATA	0x00000001
 #define SEGMENT_TYPE_ICE	0x00000010
 	__le32 seg_type;
-	struct ice_pkg_ver seg_ver;
+	struct ice_pkg_ver seg_format_ver;
 	__le32 seg_size;
-	char seg_name[ICE_PKG_NAME_SIZE];
+	char seg_id[ICE_PKG_NAME_SIZE];
 };
 
 /* ice specific segment */
@@ -75,7 +75,7 @@ struct ice_buf_table {
 struct ice_global_metadata_seg {
 	struct ice_generic_seg_hdr hdr;
 	struct ice_pkg_ver pkg_ver;
-	__le32 track_id;
+	__le32 rsvd;
 	char pkg_name[ICE_PKG_NAME_SIZE];
 };
 
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index bac5a0857c8c..5adf6c92872d 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -3052,6 +3052,9 @@ ice_log_pkg_init(struct ice_hw *hw, enum ice_status *status)
 			*status = ICE_ERR_NOT_SUPPORTED;
 		}
 		break;
+	case ICE_ERR_FW_DDP_MISMATCH:
+		dev_err(dev, "The firmware loaded on the device is not compatible with the DDP package.  Please update the device's NVM.  Entering safe mode.\n");
+		break;
 	case ICE_ERR_BUF_TOO_SHORT:
 	case ICE_ERR_CFG:
 		dev_err(dev, "The DDP package file is invalid. Entering Safe Mode.\n");
@@ -5186,6 +5189,8 @@ const char *ice_stat_str(enum ice_status stat_err)
 		return "ICE_ERR_HW_TABLE";
 	case ICE_ERR_DOES_NOT_EXIST:
 		return "ICE_ERR_DOES_NOT_EXIST";
+	case ICE_ERR_FW_DDP_MISMATCH:
+		return "ICE_ERR_FW_DDP_MISMATCH";
 	case ICE_ERR_AQ_ERROR:
 		return "ICE_ERR_AQ_ERROR";
 	case ICE_ERR_AQ_TIMEOUT:
diff --git a/drivers/net/ethernet/intel/ice/ice_status.h b/drivers/net/ethernet/intel/ice/ice_status.h
index a9a8bc3aca42..546a02856d09 100644
--- a/drivers/net/ethernet/intel/ice/ice_status.h
+++ b/drivers/net/ethernet/intel/ice/ice_status.h
@@ -27,6 +27,8 @@ enum ice_status {
 	ICE_ERR_MAX_LIMIT			= -17,
 	ICE_ERR_RESET_ONGOING			= -18,
 	ICE_ERR_HW_TABLE			= -19,
+	ICE_ERR_FW_DDP_MISMATCH			= -20,
+
 	ICE_ERR_NVM_CHECKSUM			= -51,
 	ICE_ERR_BUF_TOO_SHORT			= -52,
 	ICE_ERR_NVM_BLANK_MODE			= -53,
diff --git a/drivers/net/ethernet/intel/ice/ice_type.h b/drivers/net/ethernet/intel/ice/ice_type.h
index f1bfe8c94f1f..c1ad8622e65c 100644
--- a/drivers/net/ethernet/intel/ice/ice_type.h
+++ b/drivers/net/ethernet/intel/ice/ice_type.h
@@ -597,6 +597,7 @@ struct ice_hw {
 
 	/* Active package version (currently active) */
 	struct ice_pkg_ver active_pkg_ver;
+	u32 active_track_id;
 	u8 active_pkg_name[ICE_PKG_NAME_SIZE];
 	u8 active_pkg_in_nvm;
 
-- 
cgit v1.2.3-59-g8ed1b


From 1fba4a8a92706c89716449b1aab1b6879f438d34 Mon Sep 17 00:00:00 2001
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Date: Fri, 15 May 2020 17:36:35 -0700
Subject: ice: Fix bad register reads

The "ethtool -d" handler reads registers in the ice_regs_dump_list array
and returns read values back to the userspace.

The register offsets PFINT0_ITR* are not valid as per the specification
and reading these causes a "unable to handle kernel paging request" bug
in the driver. Remove these registers from ice_regs_dump_list.

Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_ethtool.c    | 3 ---
 drivers/net/ethernet/intel/ice/ice_hw_autogen.h | 3 ---
 2 files changed, 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 72105d70cead..477ad33e0403 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -142,9 +142,6 @@ static const u32 ice_regs_dump_list[] = {
 	QINT_RQCTL(0),
 	PFINT_OICR_ENA,
 	QRX_ITR(0),
-	PF0INT_ITR_0(0),
-	PF0INT_ITR_1(0),
-	PF0INT_ITR_2(0),
 };
 
 struct ice_priv_flag {
diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
index 1f9b427a35fa..2f1c776747a4 100644
--- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
@@ -6,9 +6,6 @@
 #ifndef _ICE_HW_AUTOGEN_H_
 #define _ICE_HW_AUTOGEN_H_
 
-#define PF0INT_ITR_0(_i)			(0x03000004 + ((_i) * 4096))
-#define PF0INT_ITR_1(_i)			(0x03000008 + ((_i) * 4096))
-#define PF0INT_ITR_2(_i)			(0x0300000C + ((_i) * 4096))
 #define QTX_COMM_DBELL(_DBQM)			(0x002C0000 + ((_DBQM) * 4))
 #define QTX_COMM_HEAD(_DBQM)			(0x000E0000 + ((_DBQM) * 4))
 #define QTX_COMM_HEAD_HEAD_S			0
-- 
cgit v1.2.3-59-g8ed1b


From 9d68a79c3b8fe88c1444c401aaf3496f7ed2a05d Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
Date: Fri, 15 May 2020 17:36:36 -0700
Subject: ice: fix usage of incorrect variable

The driver was using rq_last_status where it should have been
using sq_last_status. Fix the string to be using the correct
error reporting variable.

Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_ethtool.c | 2 +-
 drivers/net/ethernet/intel/ice/ice_main.c    | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index 477ad33e0403..f39d4eb7fd8b 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -3251,7 +3251,7 @@ static int ice_vsi_set_dflt_rss_lut(struct ice_vsi *vsi, int req_rss_size)
 	if (status) {
 		dev_err(dev, "Cannot set RSS lut, err %s aq_err %s\n",
 			ice_stat_str(status),
-			ice_aq_str(hw->adminq.rq_last_status));
+			ice_aq_str(hw->adminq.sq_last_status));
 		err = -EIO;
 	}
 
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 5adf6c92872d..6e6df4d690cc 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -5232,7 +5232,7 @@ int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
 		if (status) {
 			dev_err(dev, "Cannot set RSS key, err %s aq_err %s\n",
 				ice_stat_str(status),
-				ice_aq_str(hw->adminq.rq_last_status));
+				ice_aq_str(hw->adminq.sq_last_status));
 			return -EIO;
 		}
 	}
@@ -5243,7 +5243,7 @@ int ice_set_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
 		if (status) {
 			dev_err(dev, "Cannot set RSS lut, err %s aq_err %s\n",
 				ice_stat_str(status),
-				ice_aq_str(hw->adminq.rq_last_status));
+				ice_aq_str(hw->adminq.sq_last_status));
 			return -EIO;
 		}
 	}
@@ -5276,7 +5276,7 @@ int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
 		if (status) {
 			dev_err(dev, "Cannot get RSS key, err %s aq_err %s\n",
 				ice_stat_str(status),
-				ice_aq_str(hw->adminq.rq_last_status));
+				ice_aq_str(hw->adminq.sq_last_status));
 			return -EIO;
 		}
 	}
@@ -5287,7 +5287,7 @@ int ice_get_rss(struct ice_vsi *vsi, u8 *seed, u8 *lut, u16 lut_size)
 		if (status) {
 			dev_err(dev, "Cannot get RSS lut, err %s aq_err %s\n",
 				ice_stat_str(status),
-				ice_aq_str(hw->adminq.rq_last_status));
+				ice_aq_str(hw->adminq.sq_last_status));
 			return -EIO;
 		}
 	}
-- 
cgit v1.2.3-59-g8ed1b


From c1e0883012a75a04180a6ebf23a869172187c506 Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
Date: Fri, 15 May 2020 17:36:37 -0700
Subject: ice: cleanup unsigned loops

Fix loop variables that are comparing or assigning signed against
unsigned values, mostly by declaring loop counters as unsigned.

Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_dcb_nl.c      |  9 +++++----
 drivers/net/ethernet/intel/ice/ice_ethtool.c     | 10 +++++-----
 drivers/net/ethernet/intel/ice/ice_main.c        |  4 ++--
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c |  4 ++--
 4 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_nl.c b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c
index c4c12414083a..93cf70d06fe5 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb_nl.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c
@@ -671,7 +671,7 @@ static bool
 ice_dcbnl_find_app(struct ice_dcbx_cfg *cfg,
 		   struct ice_dcb_app_priority_table *app)
 {
-	int i;
+	unsigned int i;
 
 	for (i = 0; i < cfg->numapps; i++) {
 		if (app->selector == cfg->app[i].selector &&
@@ -746,7 +746,8 @@ static int ice_dcbnl_delapp(struct net_device *netdev, struct dcb_app *app)
 {
 	struct ice_pf *pf = ice_netdev_to_pf(netdev);
 	struct ice_dcbx_cfg *old_cfg, *new_cfg;
-	int i, j, ret = 0;
+	unsigned int i, j;
+	int ret = 0;
 
 	if (pf->dcbx_cap & DCB_CAP_DCBX_LLD_MANAGED)
 		return -EINVAL;
@@ -869,7 +870,7 @@ void ice_dcbnl_set_all(struct ice_vsi *vsi)
 	struct ice_port_info *pi;
 	struct dcb_app sapp;
 	struct ice_pf *pf;
-	int i;
+	unsigned int i;
 
 	if (!netdev)
 		return;
@@ -941,7 +942,7 @@ ice_dcbnl_flush_apps(struct ice_pf *pf, struct ice_dcbx_cfg *old_cfg,
 		     struct ice_dcbx_cfg *new_cfg)
 {
 	struct ice_vsi *main_vsi = ice_get_main_vsi(pf);
-	int i;
+	unsigned int i;
 
 	if (!main_vsi)
 		return;
diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index f39d4eb7fd8b..fd1849155d85 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -205,7 +205,7 @@ ice_get_regs(struct net_device *netdev, struct ethtool_regs *regs, void *p)
 	struct ice_pf *pf = np->vsi->back;
 	struct ice_hw *hw = &pf->hw;
 	u32 *regs_buf = (u32 *)p;
-	int i;
+	unsigned int i;
 
 	regs->version = 1;
 
@@ -308,7 +308,7 @@ out:
  */
 static bool ice_active_vfs(struct ice_pf *pf)
 {
-	int i;
+	unsigned int i;
 
 	ice_for_each_vf(pf, i) {
 		struct ice_vf *vf = &pf->vf[i];
@@ -378,7 +378,7 @@ static int ice_reg_pattern_test(struct ice_hw *hw, u32 reg, u32 mask)
 		0x00000000, 0xFFFFFFFF
 	};
 	u32 val, orig_val;
-	int i;
+	unsigned int i;
 
 	orig_val = rd32(hw, reg);
 	for (i = 0; i < ARRAY_SIZE(patterns); ++i) {
@@ -431,7 +431,7 @@ static u64 ice_reg_test(struct net_device *netdev)
 			GLINT_ITR(2, 1) - GLINT_ITR(2, 0)},
 		{GLINT_CTL, 0xffff0001, 1, 0}
 	};
-	int i;
+	unsigned int i;
 
 	netdev_dbg(netdev, "Register test\n");
 	for (i = 0; i < ARRAY_SIZE(ice_reg_list); ++i) {
@@ -3759,10 +3759,10 @@ ice_get_module_eeprom(struct net_device *netdev,
 	struct ice_hw *hw = &pf->hw;
 	enum ice_status status;
 	bool is_sfp = false;
+	unsigned int i;
 	u16 offset = 0;
 	u8 value = 0;
 	u8 page = 0;
-	int i;
 
 	status = ice_aq_sff_eeprom(hw, 0, addr, offset, page, 0,
 				   &value, 1, 0, NULL);
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 6e6df4d690cc..1c255b27244c 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -452,7 +452,7 @@ static void
 ice_prepare_for_reset(struct ice_pf *pf)
 {
 	struct ice_hw *hw = &pf->hw;
-	int i;
+	unsigned int i;
 
 	/* already prepared for reset */
 	if (test_bit(__ICE_PREPARED_FOR_RESET, pf->state))
@@ -1188,8 +1188,8 @@ static void ice_handle_mdd_event(struct ice_pf *pf)
 {
 	struct device *dev = ice_pf_to_dev(pf);
 	struct ice_hw *hw = &pf->hw;
+	unsigned int i;
 	u32 reg;
-	int i;
 
 	if (!test_and_clear_bit(__ICE_MDD_EVENT_PENDING, pf->state)) {
 		/* Since the VF MDD event logging is rate limited, check if
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index 95e8bca562e5..9b09a111321c 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -80,7 +80,7 @@ ice_vc_vf_broadcast(struct ice_pf *pf, enum virtchnl_ops v_opcode,
 		    enum virtchnl_status_code v_retval, u8 *msg, u16 msglen)
 {
 	struct ice_hw *hw = &pf->hw;
-	int i;
+	unsigned int i;
 
 	ice_for_each_vf(pf, i) {
 		struct ice_vf *vf = &pf->vf[i];
@@ -325,7 +325,7 @@ void ice_free_vfs(struct ice_pf *pf)
 {
 	struct device *dev = ice_pf_to_dev(pf);
 	struct ice_hw *hw = &pf->hw;
-	int tmp, i;
+	unsigned int tmp, i;
 
 	if (!pf->vf)
 		return;
-- 
cgit v1.2.3-59-g8ed1b


From 0465d8f830dc40714db6c02e1db28b30206a82f7 Mon Sep 17 00:00:00 2001
From: Michael Walle <michael@walle.cc>
Date: Fri, 22 May 2020 11:53:31 +0200
Subject: net: phy: at803x: fix PHY ID masks

Ever since its first commit 0ca7111a38f05 ("phy: add AT803x driver") the
PHY ID mask was set to 0xffffffef. It is unclear to me why this mask was
chosen in the first place. Both the AR8031/AR8033 and the AR8035
datasheets mention it is always the given value:
 - for AR8031/AR8033 its 0x004d/0xd074
 - for AR8035 its 0x004d/0xd072

Unfortunately, I don't have a datasheet for the AR8030. Therefore, we
leave its PHY ID mask untouched. For the PHYs mentioned before use the
handy PHY_ID_MATCH_EXACT() macro.

I've tried to contact the author of the initial commit, but received no
answer so far.

Cc: Matus Ujhelyi <ujhelyi.m@gmail.com>
Signed-off-by: Michael Walle <michael@walle.cc>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/at803x.c | 16 +++++++---------
 1 file changed, 7 insertions(+), 9 deletions(-)

diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c
index acd51b29a476..822b3acf6be7 100644
--- a/drivers/net/phy/at803x.c
+++ b/drivers/net/phy/at803x.c
@@ -126,7 +126,7 @@
 #define ATH8031_PHY_ID 0x004dd074
 #define ATH8032_PHY_ID 0x004dd023
 #define ATH8035_PHY_ID 0x004dd072
-#define AT803X_PHY_ID_MASK			0xffffffef
+#define AT8030_PHY_ID_MASK			0xffffffef
 
 MODULE_DESCRIPTION("Qualcomm Atheros AR803x PHY driver");
 MODULE_AUTHOR("Matus Ujhelyi");
@@ -967,9 +967,8 @@ static int at803x_cable_test_start(struct phy_device *phydev)
 static struct phy_driver at803x_driver[] = {
 {
 	/* Qualcomm Atheros AR8035 */
-	.phy_id			= ATH8035_PHY_ID,
+	PHY_ID_MATCH_EXACT(ATH8035_PHY_ID),
 	.name			= "Qualcomm Atheros AR8035",
-	.phy_id_mask		= AT803X_PHY_ID_MASK,
 	.flags			= PHY_POLL_CABLE_TEST,
 	.probe			= at803x_probe,
 	.remove			= at803x_remove,
@@ -991,7 +990,7 @@ static struct phy_driver at803x_driver[] = {
 	/* Qualcomm Atheros AR8030 */
 	.phy_id			= ATH8030_PHY_ID,
 	.name			= "Qualcomm Atheros AR8030",
-	.phy_id_mask		= AT803X_PHY_ID_MASK,
+	.phy_id_mask		= AT8030_PHY_ID_MASK,
 	.probe			= at803x_probe,
 	.remove			= at803x_remove,
 	.config_init		= at803x_config_init,
@@ -1005,9 +1004,8 @@ static struct phy_driver at803x_driver[] = {
 	.config_intr		= at803x_config_intr,
 }, {
 	/* Qualcomm Atheros AR8031/AR8033 */
-	.phy_id			= ATH8031_PHY_ID,
+	PHY_ID_MATCH_EXACT(ATH8031_PHY_ID),
 	.name			= "Qualcomm Atheros AR8031/AR8033",
-	.phy_id_mask		= AT803X_PHY_ID_MASK,
 	.flags			= PHY_POLL_CABLE_TEST,
 	.probe			= at803x_probe,
 	.remove			= at803x_remove,
@@ -1055,10 +1053,10 @@ static struct phy_driver at803x_driver[] = {
 module_phy_driver(at803x_driver);
 
 static struct mdio_device_id __maybe_unused atheros_tbl[] = {
-	{ ATH8030_PHY_ID, AT803X_PHY_ID_MASK },
-	{ ATH8031_PHY_ID, AT803X_PHY_ID_MASK },
+	{ ATH8030_PHY_ID, AT8030_PHY_ID_MASK },
+	{ PHY_ID_MATCH_EXACT(ATH8031_PHY_ID) },
 	{ PHY_ID_MATCH_EXACT(ATH8032_PHY_ID) },
-	{ ATH8035_PHY_ID, AT803X_PHY_ID_MASK },
+	{ PHY_ID_MATCH_EXACT(ATH8035_PHY_ID) },
 	{ PHY_ID_MATCH_EXACT(ATH9331_PHY_ID) },
 	{ }
 };
-- 
cgit v1.2.3-59-g8ed1b


From 07a7f30819475a6f058df6bba5150c50e7942cfb Mon Sep 17 00:00:00 2001
From: Randy Dunlap <rdunlap@infradead.org>
Date: Fri, 22 May 2020 13:05:26 -0700
Subject: net: psample: fix build error when CONFIG_INET is not enabled
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Fix psample build error when CONFIG_INET is not set/enabled by
bracketing the tunnel code in #ifdef CONFIG_NET / #endif.

../net/psample/psample.c: In function ‘__psample_ip_tun_to_nlattr’:
../net/psample/psample.c:216:25: error: implicit declaration of function ‘ip_tunnel_info_opts’; did you mean ‘ip_tunnel_info_opts_set’? [-Werror=implicit-function-declaration]

Signed-off-by: Randy Dunlap <rdunlap@infradead.org>
Cc: Yotam Gigi <yotam.gi@gmail.com>
Cc: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/psample/psample.c | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/net/psample/psample.c b/net/psample/psample.c
index 34a74043840b..a042261a45c5 100644
--- a/net/psample/psample.c
+++ b/net/psample/psample.c
@@ -209,6 +209,7 @@ void psample_group_put(struct psample_group *group)
 }
 EXPORT_SYMBOL_GPL(psample_group_put);
 
+#ifdef CONFIG_INET
 static int __psample_ip_tun_to_nlattr(struct sk_buff *skb,
 			      struct ip_tunnel_info *tun_info)
 {
@@ -352,12 +353,15 @@ static int psample_tunnel_meta_len(struct ip_tunnel_info *tun_info)
 
 	return sum;
 }
+#endif
 
 void psample_sample_packet(struct psample_group *group, struct sk_buff *skb,
 			   u32 trunc_size, int in_ifindex, int out_ifindex,
 			   u32 sample_rate)
 {
+#ifdef CONFIG_INET
 	struct ip_tunnel_info *tun_info;
+#endif
 	struct sk_buff *nl_skb;
 	int data_len;
 	int meta_len;
@@ -371,9 +375,11 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb,
 		   nla_total_size(sizeof(u32)) +	/* group_num */
 		   nla_total_size(sizeof(u32));		/* seq */
 
+#ifdef CONFIG_INET
 	tun_info = skb_tunnel_info(skb);
 	if (tun_info)
 		meta_len += psample_tunnel_meta_len(tun_info);
+#endif
 
 	data_len = min(skb->len, trunc_size);
 	if (meta_len + nla_total_size(data_len) > PSAMPLE_MAX_PACKET_SIZE)
@@ -429,11 +435,13 @@ void psample_sample_packet(struct psample_group *group, struct sk_buff *skb,
 			goto error;
 	}
 
+#ifdef CONFIG_INET
 	if (tun_info) {
 		ret = psample_ip_tun_to_nlattr(nl_skb, tun_info);
 		if (unlikely(ret < 0))
 			goto error;
 	}
+#endif
 
 	genlmsg_end(nl_skb, data);
 	genlmsg_multicast_netns(&psample_nl_family, group->net, nl_skb, 0,
-- 
cgit v1.2.3-59-g8ed1b


From 54113ded67126a7f81245de7c9cd73f49c6b2d0c Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sat, 23 May 2020 13:21:36 +0200
Subject: r8169: remove mask argument from rtl_w0w1_eri

rtl_eri_read() returns the full 32bit value, therefore there's no
benefit in writing back parts of it only. handle it like the vendor
driver and write the full 32 bit always. Omitting the mask argument
avoids some overhead and makes the code better readable.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 66 ++++++++++++++-----------------
 1 file changed, 30 insertions(+), 36 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 79817d4ffa47..42178a0bc23e 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -1061,25 +1061,21 @@ static u32 rtl_eri_read(struct rtl8169_private *tp, int addr)
 	return _rtl_eri_read(tp, addr, ERIAR_EXGMAC);
 }
 
-static void rtl_w0w1_eri(struct rtl8169_private *tp, int addr, u32 mask, u32 p,
-			 u32 m)
+static void rtl_w0w1_eri(struct rtl8169_private *tp, int addr, u32 p, u32 m)
 {
-	u32 val;
+	u32 val = rtl_eri_read(tp, addr);
 
-	val = rtl_eri_read(tp, addr);
-	rtl_eri_write(tp, addr, mask, (val & ~m) | p);
+	rtl_eri_write(tp, addr, ERIAR_MASK_1111, (val & ~m) | p);
 }
 
-static void rtl_eri_set_bits(struct rtl8169_private *tp, int addr, u32 mask,
-			     u32 p)
+static void rtl_eri_set_bits(struct rtl8169_private *tp, int addr, u32 p)
 {
-	rtl_w0w1_eri(tp, addr, mask, p, 0);
+	rtl_w0w1_eri(tp, addr, p, 0);
 }
 
-static void rtl_eri_clear_bits(struct rtl8169_private *tp, int addr, u32 mask,
-			       u32 m)
+static void rtl_eri_clear_bits(struct rtl8169_private *tp, int addr, u32 m)
 {
-	rtl_w0w1_eri(tp, addr, mask, 0, m);
+	rtl_w0w1_eri(tp, addr, 0, m);
 }
 
 static u32 r8168dp_ocp_read(struct rtl8169_private *tp, u8 mask, u16 reg)
@@ -1243,8 +1239,8 @@ static bool r8168_check_dash(struct rtl8169_private *tp)
 
 static void rtl_reset_packet_filter(struct rtl8169_private *tp)
 {
-	rtl_eri_clear_bits(tp, 0xdc, ERIAR_MASK_0001, BIT(0));
-	rtl_eri_set_bits(tp, 0xdc, ERIAR_MASK_0001, BIT(0));
+	rtl_eri_clear_bits(tp, 0xdc, BIT(0));
+	rtl_eri_set_bits(tp, 0xdc, BIT(0));
 }
 
 DECLARE_RTL_COND(rtl_efusear_cond)
@@ -1371,11 +1367,9 @@ static void __rtl8169_set_wol(struct rtl8169_private *tp, u32 wolopts)
 	if (rtl_is_8168evl_up(tp)) {
 		tmp--;
 		if (wolopts & WAKE_MAGIC)
-			rtl_eri_set_bits(tp, 0x0dc, ERIAR_MASK_0100,
-					 MagicPacket_v2);
+			rtl_eri_set_bits(tp, 0x0dc, MagicPacket_v2);
 		else
-			rtl_eri_clear_bits(tp, 0x0dc, ERIAR_MASK_0100,
-					   MagicPacket_v2);
+			rtl_eri_clear_bits(tp, 0x0dc, MagicPacket_v2);
 	} else if (rtl_is_8125(tp)) {
 		tmp--;
 		if (wolopts & WAKE_MAGIC)
@@ -2119,7 +2113,7 @@ static void rtl8168_config_eee_mac(struct rtl8169_private *tp)
 	if (tp->mac_version != RTL_GIGA_MAC_VER_38)
 		RTL_W8(tp, EEE_LED, RTL_R8(tp, EEE_LED) & ~0x07);
 
-	rtl_eri_set_bits(tp, 0x1b0, ERIAR_MASK_1111, 0x0003);
+	rtl_eri_set_bits(tp, 0x1b0, 0x0003);
 }
 
 static void rtl8125_config_eee_mac(struct rtl8169_private *tp)
@@ -2283,7 +2277,7 @@ static void rtl_pll_power_down(struct rtl8169_private *tp)
 	case RTL_GIGA_MAC_VER_40:
 	case RTL_GIGA_MAC_VER_41:
 	case RTL_GIGA_MAC_VER_49:
-		rtl_eri_clear_bits(tp, 0x1a8, ERIAR_MASK_1111, 0xfc000000);
+		rtl_eri_clear_bits(tp, 0x1a8, 0xfc000000);
 		RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) & ~0x80);
 		break;
 	default:
@@ -2316,7 +2310,7 @@ static void rtl_pll_power_up(struct rtl8169_private *tp)
 	case RTL_GIGA_MAC_VER_41:
 	case RTL_GIGA_MAC_VER_49:
 		RTL_W8(tp, PMCH, RTL_R8(tp, PMCH) | 0xc0);
-		rtl_eri_set_bits(tp, 0x1a8, ERIAR_MASK_1111, 0xfc000000);
+		rtl_eri_set_bits(tp, 0x1a8, 0xfc000000);
 		break;
 	default:
 		break;
@@ -2925,8 +2919,8 @@ static void rtl_hw_start_8168e_2(struct rtl8169_private *tp)
 	rtl_set_fifo_size(tp, 0x10, 0x10, 0x02, 0x06);
 	rtl_eri_write(tp, 0xcc, ERIAR_MASK_1111, 0x00000050);
 	rtl_eri_write(tp, 0xd0, ERIAR_MASK_1111, 0x07ff0060);
-	rtl_eri_set_bits(tp, 0x1b0, ERIAR_MASK_0001, BIT(4));
-	rtl_w0w1_eri(tp, 0x0d4, ERIAR_MASK_0011, 0x0c00, 0xff00);
+	rtl_eri_set_bits(tp, 0x1b0, BIT(4));
+	rtl_w0w1_eri(tp, 0x0d4, 0x0c00, 0xff00);
 
 	rtl_disable_clock_request(tp);
 
@@ -2949,8 +2943,8 @@ static void rtl_hw_start_8168f(struct rtl8169_private *tp)
 	rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000);
 	rtl_set_fifo_size(tp, 0x10, 0x10, 0x02, 0x06);
 	rtl_reset_packet_filter(tp);
-	rtl_eri_set_bits(tp, 0x1b0, ERIAR_MASK_0001, BIT(4));
-	rtl_eri_set_bits(tp, 0x1d0, ERIAR_MASK_0001, BIT(4));
+	rtl_eri_set_bits(tp, 0x1b0, BIT(4));
+	rtl_eri_set_bits(tp, 0x1d0, BIT(4));
 	rtl_eri_write(tp, 0xcc, ERIAR_MASK_1111, 0x00000050);
 	rtl_eri_write(tp, 0xd0, ERIAR_MASK_1111, 0x00000060);
 
@@ -2979,7 +2973,7 @@ static void rtl_hw_start_8168f_1(struct rtl8169_private *tp)
 
 	rtl_ephy_init(tp, e_info_8168f_1);
 
-	rtl_w0w1_eri(tp, 0x0d4, ERIAR_MASK_0011, 0x0c00, 0xff00);
+	rtl_w0w1_eri(tp, 0x0d4, 0x0c00, 0xff00);
 }
 
 static void rtl_hw_start_8411(struct rtl8169_private *tp)
@@ -2997,7 +2991,7 @@ static void rtl_hw_start_8411(struct rtl8169_private *tp)
 
 	rtl_ephy_init(tp, e_info_8168f_1);
 
-	rtl_eri_set_bits(tp, 0x0d4, ERIAR_MASK_0011, 0x0c00);
+	rtl_eri_set_bits(tp, 0x0d4, 0x0c00);
 }
 
 static void rtl_hw_start_8168g(struct rtl8169_private *tp)
@@ -3017,8 +3011,8 @@ static void rtl_hw_start_8168g(struct rtl8169_private *tp)
 
 	rtl8168_config_eee_mac(tp);
 
-	rtl_w0w1_eri(tp, 0x2fc, ERIAR_MASK_0001, 0x01, 0x06);
-	rtl_eri_clear_bits(tp, 0x1b0, ERIAR_MASK_0011, BIT(12));
+	rtl_w0w1_eri(tp, 0x2fc, 0x01, 0x06);
+	rtl_eri_clear_bits(tp, 0x1b0, BIT(12));
 
 	rtl_pcie_state_l2l3_disable(tp);
 }
@@ -3244,9 +3238,9 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
 
 	rtl_reset_packet_filter(tp);
 
-	rtl_eri_set_bits(tp, 0xdc, ERIAR_MASK_1111, BIT(4));
+	rtl_eri_set_bits(tp, 0xdc, BIT(4));
 
-	rtl_eri_set_bits(tp, 0xd4, ERIAR_MASK_1111, 0x1f00);
+	rtl_eri_set_bits(tp, 0xd4, 0x1f00);
 
 	rtl_eri_write(tp, 0x5f0, ERIAR_MASK_0011, 0x4f87);
 
@@ -3262,7 +3256,7 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
 
 	RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~TX_10M_PS_EN);
 
-	rtl_eri_clear_bits(tp, 0x1b0, ERIAR_MASK_0011, BIT(12));
+	rtl_eri_clear_bits(tp, 0x1b0, BIT(12));
 
 	rtl_pcie_state_l2l3_disable(tp);
 
@@ -3299,7 +3293,7 @@ static void rtl_hw_start_8168ep(struct rtl8169_private *tp)
 
 	rtl_reset_packet_filter(tp);
 
-	rtl_eri_set_bits(tp, 0xd4, ERIAR_MASK_1111, 0x1f80);
+	rtl_eri_set_bits(tp, 0xd4, 0x1f80);
 
 	rtl_eri_write(tp, 0x5f0, ERIAR_MASK_0011, 0x4f87);
 
@@ -3310,7 +3304,7 @@ static void rtl_hw_start_8168ep(struct rtl8169_private *tp)
 
 	rtl8168_config_eee_mac(tp);
 
-	rtl_w0w1_eri(tp, 0x2fc, ERIAR_MASK_0001, 0x01, 0x06);
+	rtl_w0w1_eri(tp, 0x2fc, 0x01, 0x06);
 
 	RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~TX_10M_PS_EN);
 
@@ -3402,7 +3396,7 @@ static void rtl_hw_start_8117(struct rtl8169_private *tp)
 
 	rtl_reset_packet_filter(tp);
 
-	rtl_eri_set_bits(tp, 0xd4, ERIAR_MASK_1111, 0x1f90);
+	rtl_eri_set_bits(tp, 0xd4, 0x1f90);
 
 	rtl_eri_write(tp, 0x5f0, ERIAR_MASK_0011, 0x4f87);
 
@@ -3418,7 +3412,7 @@ static void rtl_hw_start_8117(struct rtl8169_private *tp)
 
 	RTL_W8(tp, DLLPR, RTL_R8(tp, DLLPR) & ~TX_10M_PS_EN);
 
-	rtl_eri_clear_bits(tp, 0x1b0, ERIAR_MASK_0011, BIT(12));
+	rtl_eri_clear_bits(tp, 0x1b0, BIT(12));
 
 	rtl_pcie_state_l2l3_disable(tp);
 
@@ -3543,7 +3537,7 @@ static void rtl_hw_start_8402(struct rtl8169_private *tp)
 	rtl_reset_packet_filter(tp);
 	rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000);
 	rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000);
-	rtl_w0w1_eri(tp, 0x0d4, ERIAR_MASK_0011, 0x0e00, 0xff00);
+	rtl_w0w1_eri(tp, 0x0d4, 0x0e00, 0xff00);
 
 	/* disable EEE */
 	rtl_eri_write(tp, 0x1b0, ERIAR_MASK_0011, 0x0000);
-- 
cgit v1.2.3-59-g8ed1b


From a15aaa038b8ed213da7547f31c98345e2fe06d64 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sat, 23 May 2020 13:22:22 +0200
Subject: r8169: remove mask argument from r8168dp_ocp_read

All callers read the full 32bit value, therefore the mask argument can
be removed.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 42178a0bc23e..391b77710a7d 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -1078,9 +1078,9 @@ static void rtl_eri_clear_bits(struct rtl8169_private *tp, int addr, u32 m)
 	rtl_w0w1_eri(tp, addr, 0, m);
 }
 
-static u32 r8168dp_ocp_read(struct rtl8169_private *tp, u8 mask, u16 reg)
+static u32 r8168dp_ocp_read(struct rtl8169_private *tp, u16 reg)
 {
-	RTL_W32(tp, OCPAR, ((u32)mask & 0x0f) << 12 | (reg & 0x0fff));
+	RTL_W32(tp, OCPAR, 0x0fu << 12 | (reg & 0x0fff));
 	return rtl_loop_wait_high(tp, &rtl_ocpar_cond, 100, 20) ?
 		RTL_R32(tp, OCPDR) : ~0;
 }
@@ -1127,7 +1127,7 @@ DECLARE_RTL_COND(rtl_dp_ocp_read_cond)
 
 	reg = rtl8168_get_ocp_reg(tp);
 
-	return r8168dp_ocp_read(tp, 0x0f, reg) & 0x00000800;
+	return r8168dp_ocp_read(tp, reg) & 0x00000800;
 }
 
 DECLARE_RTL_COND(rtl_ep_ocp_read_cond)
@@ -1215,7 +1215,7 @@ static bool r8168dp_check_dash(struct rtl8169_private *tp)
 {
 	u16 reg = rtl8168_get_ocp_reg(tp);
 
-	return !!(r8168dp_ocp_read(tp, 0x0f, reg) & 0x00008000);
+	return !!(r8168dp_ocp_read(tp, reg) & 0x00008000);
 }
 
 static bool r8168ep_check_dash(struct rtl8169_private *tp)
-- 
cgit v1.2.3-59-g8ed1b


From 787c0c04f4c28d10a1c49bcbd625c24a3e9155d1 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sat, 23 May 2020 13:23:21 +0200
Subject: r8169: remove mask argument from r8168ep_ocp_read

Remove the mask argument as it's not used by r8168ep_ocp_read().

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 391b77710a7d..1a69ec92caa5 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -1085,7 +1085,7 @@ static u32 r8168dp_ocp_read(struct rtl8169_private *tp, u16 reg)
 		RTL_R32(tp, OCPDR) : ~0;
 }
 
-static u32 r8168ep_ocp_read(struct rtl8169_private *tp, u8 mask, u16 reg)
+static u32 r8168ep_ocp_read(struct rtl8169_private *tp, u16 reg)
 {
 	return _rtl_eri_read(tp, reg, ERIAR_OOB);
 }
@@ -1132,7 +1132,7 @@ DECLARE_RTL_COND(rtl_dp_ocp_read_cond)
 
 DECLARE_RTL_COND(rtl_ep_ocp_read_cond)
 {
-	return r8168ep_ocp_read(tp, 0x0f, 0x124) & 0x00000001;
+	return r8168ep_ocp_read(tp, 0x124) & 0x00000001;
 }
 
 DECLARE_RTL_COND(rtl_ocp_tx_cond)
@@ -1157,8 +1157,7 @@ static void rtl8168dp_driver_start(struct rtl8169_private *tp)
 static void rtl8168ep_driver_start(struct rtl8169_private *tp)
 {
 	r8168ep_ocp_write(tp, 0x01, 0x180, OOB_CMD_DRIVER_START);
-	r8168ep_ocp_write(tp, 0x01, 0x30,
-			  r8168ep_ocp_read(tp, 0x01, 0x30) | 0x01);
+	r8168ep_ocp_write(tp, 0x01, 0x30, r8168ep_ocp_read(tp, 0x30) | 0x01);
 	rtl_loop_wait_high(tp, &rtl_ep_ocp_read_cond, 10000, 10);
 }
 
@@ -1189,8 +1188,7 @@ static void rtl8168ep_driver_stop(struct rtl8169_private *tp)
 {
 	rtl8168ep_stop_cmac(tp);
 	r8168ep_ocp_write(tp, 0x01, 0x180, OOB_CMD_DRIVER_STOP);
-	r8168ep_ocp_write(tp, 0x01, 0x30,
-			  r8168ep_ocp_read(tp, 0x01, 0x30) | 0x01);
+	r8168ep_ocp_write(tp, 0x01, 0x30, r8168ep_ocp_read(tp, 0x30) | 0x01);
 	rtl_loop_wait_low(tp, &rtl_ep_ocp_read_cond, 10000, 10);
 }
 
@@ -1220,7 +1218,7 @@ static bool r8168dp_check_dash(struct rtl8169_private *tp)
 
 static bool r8168ep_check_dash(struct rtl8169_private *tp)
 {
-	return !!(r8168ep_ocp_read(tp, 0x0f, 0x128) & 0x00000001);
+	return r8168ep_ocp_read(tp, 0x128) & 0x00000001;
 }
 
 static bool r8168_check_dash(struct rtl8169_private *tp)
-- 
cgit v1.2.3-59-g8ed1b


From 7eef3d095abe11fd7f99fa86c0be7c14b8852fae Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Sat, 23 May 2020 15:27:07 +0200
Subject: Documentation: devres: add a missing section for networking helpers

Add a new section for networking devres helpers to devres.rst and list
the two existing devm functions.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/driver-api/driver-model/devres.rst | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/Documentation/driver-api/driver-model/devres.rst b/Documentation/driver-api/driver-model/devres.rst
index 46c13780994c..50df28d20fa7 100644
--- a/Documentation/driver-api/driver-model/devres.rst
+++ b/Documentation/driver-api/driver-model/devres.rst
@@ -372,6 +372,10 @@ MUX
   devm_mux_chip_register()
   devm_mux_control_get()
 
+NET
+  devm_alloc_etherdev()
+  devm_alloc_etherdev_mqs()
+
 PER-CPU MEM
   devm_alloc_percpu()
   devm_free_percpu()
-- 
cgit v1.2.3-59-g8ed1b


From cb8a14b205699fee1053a406e1e3fce330b6bdc3 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Sat, 23 May 2020 15:27:08 +0200
Subject: net: move devres helpers into a separate source file

There's currently only a single devres helper in net/ - devm variant
of alloc_etherdev. Let's move it to net/devres.c with the intention of
assing a second one: devm_register_netdev(). This new routine will need
to know the address of the release function of devm_alloc_etherdev() so
that it can verify (using devres_find()) that the struct net_device
that's being passed to it is also resource managed.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/Makefile       |  2 +-
 net/devres.c       | 36 ++++++++++++++++++++++++++++++++++++
 net/ethernet/eth.c | 28 ----------------------------
 3 files changed, 37 insertions(+), 29 deletions(-)
 create mode 100644 net/devres.c

diff --git a/net/Makefile b/net/Makefile
index 07ea48160874..5744bf1997fd 100644
--- a/net/Makefile
+++ b/net/Makefile
@@ -6,7 +6,7 @@
 # Rewritten to use lists instead of if-statements.
 #
 
-obj-$(CONFIG_NET)		:= socket.o core/
+obj-$(CONFIG_NET)		:= devres.o socket.o core/
 
 tmp-$(CONFIG_COMPAT) 		:= compat.o
 obj-$(CONFIG_NET)		+= $(tmp-y)
diff --git a/net/devres.c b/net/devres.c
new file mode 100644
index 000000000000..c1465d9f9019
--- /dev/null
+++ b/net/devres.c
@@ -0,0 +1,36 @@
+// SPDX-License-Identifier: GPL-2.0-or-later
+/*
+ * This file contains all networking devres helpers.
+ */
+
+#include <linux/device.h>
+#include <linux/etherdevice.h>
+#include <linux/netdevice.h>
+
+static void devm_free_netdev(struct device *dev, void *res)
+{
+	free_netdev(*(struct net_device **)res);
+}
+
+struct net_device *devm_alloc_etherdev_mqs(struct device *dev, int sizeof_priv,
+					   unsigned int txqs, unsigned int rxqs)
+{
+	struct net_device **dr;
+	struct net_device *netdev;
+
+	dr = devres_alloc(devm_free_netdev, sizeof(*dr), GFP_KERNEL);
+	if (!dr)
+		return NULL;
+
+	netdev = alloc_etherdev_mqs(sizeof_priv, txqs, rxqs);
+	if (!netdev) {
+		devres_free(dr);
+		return NULL;
+	}
+
+	*dr = netdev;
+	devres_add(dev, dr);
+
+	return netdev;
+}
+EXPORT_SYMBOL(devm_alloc_etherdev_mqs);
diff --git a/net/ethernet/eth.c b/net/ethernet/eth.c
index c8b903302ff2..dac65180c4ef 100644
--- a/net/ethernet/eth.c
+++ b/net/ethernet/eth.c
@@ -400,34 +400,6 @@ struct net_device *alloc_etherdev_mqs(int sizeof_priv, unsigned int txqs,
 }
 EXPORT_SYMBOL(alloc_etherdev_mqs);
 
-static void devm_free_netdev(struct device *dev, void *res)
-{
-	free_netdev(*(struct net_device **)res);
-}
-
-struct net_device *devm_alloc_etherdev_mqs(struct device *dev, int sizeof_priv,
-					   unsigned int txqs, unsigned int rxqs)
-{
-	struct net_device **dr;
-	struct net_device *netdev;
-
-	dr = devres_alloc(devm_free_netdev, sizeof(*dr), GFP_KERNEL);
-	if (!dr)
-		return NULL;
-
-	netdev = alloc_etherdev_mqs(sizeof_priv, txqs, rxqs);
-	if (!netdev) {
-		devres_free(dr);
-		return NULL;
-	}
-
-	*dr = netdev;
-	devres_add(dev, dr);
-
-	return netdev;
-}
-EXPORT_SYMBOL(devm_alloc_etherdev_mqs);
-
 ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len)
 {
 	return scnprintf(buf, PAGE_SIZE, "%*phC\n", len, addr);
-- 
cgit v1.2.3-59-g8ed1b


From f75063abc39441585a13fcc5b9ef3af993e9ac40 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Sat, 23 May 2020 15:27:09 +0200
Subject: net: devres: define a separate devres structure for
 devm_alloc_etherdev()

Not using a proxy structure to store struct net_device doesn't save
anything in terms of compiled code size or memory usage but significantly
decreases the readability of the code with all the pointer casting.

Define struct net_device_devres and use it in devm_alloc_etherdev_mqs().

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/devres.c | 20 ++++++++++++--------
 1 file changed, 12 insertions(+), 8 deletions(-)

diff --git a/net/devres.c b/net/devres.c
index c1465d9f9019..b97b0c5a8216 100644
--- a/net/devres.c
+++ b/net/devres.c
@@ -7,30 +7,34 @@
 #include <linux/etherdevice.h>
 #include <linux/netdevice.h>
 
-static void devm_free_netdev(struct device *dev, void *res)
+struct net_device_devres {
+	struct net_device *ndev;
+};
+
+static void devm_free_netdev(struct device *dev, void *this)
 {
-	free_netdev(*(struct net_device **)res);
+	struct net_device_devres *res = this;
+
+	free_netdev(res->ndev);
 }
 
 struct net_device *devm_alloc_etherdev_mqs(struct device *dev, int sizeof_priv,
 					   unsigned int txqs, unsigned int rxqs)
 {
-	struct net_device **dr;
-	struct net_device *netdev;
+	struct net_device_devres *dr;
 
 	dr = devres_alloc(devm_free_netdev, sizeof(*dr), GFP_KERNEL);
 	if (!dr)
 		return NULL;
 
-	netdev = alloc_etherdev_mqs(sizeof_priv, txqs, rxqs);
-	if (!netdev) {
+	dr->ndev = alloc_etherdev_mqs(sizeof_priv, txqs, rxqs);
+	if (!dr->ndev) {
 		devres_free(dr);
 		return NULL;
 	}
 
-	*dr = netdev;
 	devres_add(dev, dr);
 
-	return netdev;
+	return dr->ndev;
 }
 EXPORT_SYMBOL(devm_alloc_etherdev_mqs);
-- 
cgit v1.2.3-59-g8ed1b


From cd16627fc0468564fdd60f20ad52420b87195127 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Sat, 23 May 2020 15:27:10 +0200
Subject: net: devres: provide devm_register_netdev()

Provide devm_register_netdev() - a device resource managed variant
of register_netdev(). This new helper will only work for net_device
structs that are also already managed by devres.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/driver-api/driver-model/devres.rst |  1 +
 include/linux/netdevice.h                        |  2 +
 net/devres.c                                     | 55 ++++++++++++++++++++++++
 3 files changed, 58 insertions(+)

diff --git a/Documentation/driver-api/driver-model/devres.rst b/Documentation/driver-api/driver-model/devres.rst
index 50df28d20fa7..fc242ed4bde5 100644
--- a/Documentation/driver-api/driver-model/devres.rst
+++ b/Documentation/driver-api/driver-model/devres.rst
@@ -375,6 +375,7 @@ MUX
 NET
   devm_alloc_etherdev()
   devm_alloc_etherdev_mqs()
+  devm_register_netdev()
 
 PER-CPU MEM
   devm_alloc_percpu()
diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h
index a18f8fdf4260..1a96e9c4ec36 100644
--- a/include/linux/netdevice.h
+++ b/include/linux/netdevice.h
@@ -4280,6 +4280,8 @@ struct net_device *alloc_netdev_mqs(int sizeof_priv, const char *name,
 int register_netdev(struct net_device *dev);
 void unregister_netdev(struct net_device *dev);
 
+int devm_register_netdev(struct device *dev, struct net_device *ndev);
+
 /* General hardware address lists handling functions */
 int __hw_addr_sync(struct netdev_hw_addr_list *to_list,
 		   struct netdev_hw_addr_list *from_list, int addr_len);
diff --git a/net/devres.c b/net/devres.c
index b97b0c5a8216..57a6a88d11f6 100644
--- a/net/devres.c
+++ b/net/devres.c
@@ -38,3 +38,58 @@ struct net_device *devm_alloc_etherdev_mqs(struct device *dev, int sizeof_priv,
 	return dr->ndev;
 }
 EXPORT_SYMBOL(devm_alloc_etherdev_mqs);
+
+static void devm_netdev_release(struct device *dev, void *this)
+{
+	struct net_device_devres *res = this;
+
+	unregister_netdev(res->ndev);
+}
+
+static int netdev_devres_match(struct device *dev, void *this, void *match_data)
+{
+	struct net_device_devres *res = this;
+	struct net_device *ndev = match_data;
+
+	return ndev == res->ndev;
+}
+
+/**
+ *	devm_register_netdev - resource managed variant of register_netdev()
+ *	@dev: managing device for this netdev - usually the parent device
+ *	@ndev: device to register
+ *
+ *	This is a devres variant of register_netdev() for which the unregister
+ *	function will be call automatically when the managing device is
+ *	detached. Note: the net_device used must also be resource managed by
+ *	the same struct device.
+ */
+int devm_register_netdev(struct device *dev, struct net_device *ndev)
+{
+	struct net_device_devres *dr;
+	int ret;
+
+	/* struct net_device must itself be managed. For now a managed netdev
+	 * can only be allocated by devm_alloc_etherdev_mqs() so the check is
+	 * straightforward.
+	 */
+	if (WARN_ON(!devres_find(dev, devm_free_netdev,
+				 netdev_devres_match, ndev)))
+		return -EINVAL;
+
+	dr = devres_alloc(devm_netdev_release, sizeof(*dr), GFP_KERNEL);
+	if (!dr)
+		return -ENOMEM;
+
+	ret = register_netdev(ndev);
+	if (ret) {
+		devres_free(dr);
+		return ret;
+	}
+
+	dr->ndev = ndev;
+	devres_add(ndev->dev.parent, dr);
+
+	return 0;
+}
+EXPORT_SYMBOL(devm_register_netdev);
-- 
cgit v1.2.3-59-g8ed1b


From 9250dccc119e72aecb0dfa786c94b4d8eb67ea29 Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Sat, 23 May 2020 15:27:11 +0200
Subject: net: ethernet: mtk_star_emac: use devm_register_netdev()

Use the new devres variant of register_netdev() in the mtk-star-emac
driver and shrink the code by a couple lines.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_star_emac.c | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c
index 789c77af501f..b74349cede28 100644
--- a/drivers/net/ethernet/mediatek/mtk_star_emac.c
+++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c
@@ -1519,13 +1519,6 @@ static void mtk_star_mdiobus_unregister(void *data)
 	mdiobus_unregister(priv->mii);
 }
 
-static void mtk_star_unregister_netdev(void *data)
-{
-	struct net_device *ndev = data;
-
-	unregister_netdev(ndev);
-}
-
 static int mtk_star_probe(struct platform_device *pdev)
 {
 	struct device_node *of_node;
@@ -1641,15 +1634,7 @@ static int mtk_star_probe(struct platform_device *pdev)
 
 	netif_napi_add(ndev, &priv->napi, mtk_star_poll, MTK_STAR_NAPI_WEIGHT);
 
-	ret = register_netdev(ndev);
-	if (ret)
-		return ret;
-
-	ret = devm_add_action_or_reset(dev, mtk_star_unregister_netdev, ndev);
-	if (ret)
-		return ret;
-
-	return 0;
+	return devm_register_netdev(dev, ndev);
 }
 
 static const struct of_device_id mtk_star_of_match[] = {
-- 
cgit v1.2.3-59-g8ed1b


From 316107119f473e764cf5e50437333c8b83bec0da Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sat, 23 May 2020 17:40:25 +0200
Subject: ethtool: propagate get_coalesce return value

get_coalesce returns 0 or ERRNO, but the return value isn't checked.
The returned coalesce data may be invalid if an ERRNO is set,
therefore better check and propagate the return value.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ethtool/ioctl.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/net/ethtool/ioctl.c b/net/ethtool/ioctl.c
index 31e0b4e88a9d..b5df90c981c2 100644
--- a/net/ethtool/ioctl.c
+++ b/net/ethtool/ioctl.c
@@ -1510,11 +1510,14 @@ static noinline_for_stack int ethtool_get_coalesce(struct net_device *dev,
 						   void __user *useraddr)
 {
 	struct ethtool_coalesce coalesce = { .cmd = ETHTOOL_GCOALESCE };
+	int ret;
 
 	if (!dev->ethtool_ops->get_coalesce)
 		return -EOPNOTSUPP;
 
-	dev->ethtool_ops->get_coalesce(dev, &coalesce);
+	ret = dev->ethtool_ops->get_coalesce(dev, &coalesce);
+	if (ret)
+		return ret;
 
 	if (copy_to_user(useraddr, &coalesce, sizeof(coalesce)))
 		return -EFAULT;
-- 
cgit v1.2.3-59-g8ed1b


From debb7af6867f1331f72745732b4849c748e929a6 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Mon, 25 May 2020 00:50:57 +0300
Subject: mlxsw: spectrum: Rename IGMP trap group

The IGMP trap group will be used for MLD traps in the next patch, so
rename it to "MC_SNOOPING" which is more appropriate.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h      |  2 +-
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 14 +++++++-------
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 3c3db1c874b6..602f9fdfd7ea 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -5531,7 +5531,7 @@ enum mlxsw_reg_htgt_trap_group {
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_STP,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_LACP,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP,
-	MLXSW_REG_HTGT_TRAP_GROUP_SP_IGMP,
+	MLXSW_REG_HTGT_TRAP_GROUP_SP_MC_SNOOPING,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index dfafd30c57b9..3457a3058eee 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -4046,11 +4046,11 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
 	MLXSW_RXL(mlxsw_sp_rx_listener_ptp, LLDP, TRAP_TO_CPU,
 		  false, SP_LLDP, DISCARD),
 	MLXSW_SP_RXL_MARK(DHCP, MIRROR_TO_CPU, DHCP, false),
-	MLXSW_SP_RXL_MARK(IGMP_QUERY, MIRROR_TO_CPU, IGMP, false),
-	MLXSW_SP_RXL_NO_MARK(IGMP_V1_REPORT, TRAP_TO_CPU, IGMP, false),
-	MLXSW_SP_RXL_NO_MARK(IGMP_V2_REPORT, TRAP_TO_CPU, IGMP, false),
-	MLXSW_SP_RXL_NO_MARK(IGMP_V2_LEAVE, TRAP_TO_CPU, IGMP, false),
-	MLXSW_SP_RXL_NO_MARK(IGMP_V3_REPORT, TRAP_TO_CPU, IGMP, false),
+	MLXSW_SP_RXL_MARK(IGMP_QUERY, MIRROR_TO_CPU, MC_SNOOPING, false),
+	MLXSW_SP_RXL_NO_MARK(IGMP_V1_REPORT, TRAP_TO_CPU, MC_SNOOPING, false),
+	MLXSW_SP_RXL_NO_MARK(IGMP_V2_REPORT, TRAP_TO_CPU, MC_SNOOPING, false),
+	MLXSW_SP_RXL_NO_MARK(IGMP_V2_LEAVE, TRAP_TO_CPU, MC_SNOOPING, false),
+	MLXSW_SP_RXL_NO_MARK(IGMP_V3_REPORT, TRAP_TO_CPU, MC_SNOOPING, false),
 	MLXSW_SP_RXL_MARK(ARPBC, MIRROR_TO_CPU, ARP, false),
 	MLXSW_SP_RXL_MARK(ARPUC, MIRROR_TO_CPU, ARP, false),
 	MLXSW_SP_RXL_NO_MARK(FID_MISS, TRAP_TO_CPU, IP2ME, false),
@@ -4155,7 +4155,7 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core)
 			rate = 128;
 			burst_size = 7;
 			break;
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IGMP:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_MC_SNOOPING:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD:
 			rate = 16 * 1024;
 			burst_size = 10;
@@ -4235,7 +4235,7 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core)
 			priority = 4;
 			tc = 4;
 			break;
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IGMP:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_MC_SNOOPING:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD:
 			priority = 3;
-- 
cgit v1.2.3-59-g8ed1b


From 99129069b7123c22ca14529972946216757077ee Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Mon, 25 May 2020 00:50:58 +0300
Subject: mlxsw: spectrum: Use same trap group for MLD and IGMP packets

Both packet types are needed for the same reason (multicast snooping),
so associate them with the same trap group.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h      |  1 -
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 16 +++++++---------
 2 files changed, 7 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 602f9fdfd7ea..d51a4c4665d0 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -5542,7 +5542,6 @@ enum mlxsw_reg_htgt_trap_group {
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_EVENT,
-	MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_LBERROR,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP0,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 3457a3058eee..bab51dfb6e13 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -4054,14 +4054,14 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
 	MLXSW_SP_RXL_MARK(ARPBC, MIRROR_TO_CPU, ARP, false),
 	MLXSW_SP_RXL_MARK(ARPUC, MIRROR_TO_CPU, ARP, false),
 	MLXSW_SP_RXL_NO_MARK(FID_MISS, TRAP_TO_CPU, IP2ME, false),
-	MLXSW_SP_RXL_MARK(IPV6_MLDV12_LISTENER_QUERY, MIRROR_TO_CPU, IPV6_MLD,
-			  false),
-	MLXSW_SP_RXL_NO_MARK(IPV6_MLDV1_LISTENER_REPORT, TRAP_TO_CPU, IPV6_MLD,
-			     false),
-	MLXSW_SP_RXL_NO_MARK(IPV6_MLDV1_LISTENER_DONE, TRAP_TO_CPU, IPV6_MLD,
-			     false),
-	MLXSW_SP_RXL_NO_MARK(IPV6_MLDV2_LISTENER_REPORT, TRAP_TO_CPU, IPV6_MLD,
+	MLXSW_SP_RXL_MARK(IPV6_MLDV12_LISTENER_QUERY, MIRROR_TO_CPU,
+			  MC_SNOOPING, false),
+	MLXSW_SP_RXL_NO_MARK(IPV6_MLDV1_LISTENER_REPORT, TRAP_TO_CPU,
+			     MC_SNOOPING, false),
+	MLXSW_SP_RXL_NO_MARK(IPV6_MLDV1_LISTENER_DONE, TRAP_TO_CPU, MC_SNOOPING,
 			     false),
+	MLXSW_SP_RXL_NO_MARK(IPV6_MLDV2_LISTENER_REPORT, TRAP_TO_CPU,
+			     MC_SNOOPING, false),
 	/* L3 traps */
 	MLXSW_SP_RXL_L3_MARK(LBERROR, MIRROR_TO_CPU, LBERROR, false),
 	MLXSW_SP_RXL_MARK(IP2ME, TRAP_TO_CPU, IP2ME, false),
@@ -4156,7 +4156,6 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core)
 			burst_size = 7;
 			break;
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_MC_SNOOPING:
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD:
 			rate = 16 * 1024;
 			burst_size = 10;
 			break;
@@ -4237,7 +4236,6 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core)
 			break;
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_MC_SNOOPING:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME:
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_MLD:
 			priority = 3;
 			tc = 3;
 			break;
-- 
cgit v1.2.3-59-g8ed1b


From 0ecb947412d7e95128a007f791b020a37ce5e9be Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Mon, 25 May 2020 00:50:59 +0300
Subject: mlxsw: spectrum: Trap IPv4 DHCP packets in router

Currently, IPv4 DHCP packets are trapped during L2 forwarding, which
means that packets might be trapped unnecessarily. Instead, only trap
the DHCP packets that reach the router. Either because they were flooded
to the router port or forwarded to it by the FDB. This is consistent
with the corresponding IPv6 trap.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 2 +-
 drivers/net/ethernet/mellanox/mlxsw/trap.h     | 1 +
 2 files changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index bab51dfb6e13..fa6e630abb6e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -4045,7 +4045,6 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
 	MLXSW_SP_RXL_NO_MARK(LACP, TRAP_TO_CPU, LACP, true),
 	MLXSW_RXL(mlxsw_sp_rx_listener_ptp, LLDP, TRAP_TO_CPU,
 		  false, SP_LLDP, DISCARD),
-	MLXSW_SP_RXL_MARK(DHCP, MIRROR_TO_CPU, DHCP, false),
 	MLXSW_SP_RXL_MARK(IGMP_QUERY, MIRROR_TO_CPU, MC_SNOOPING, false),
 	MLXSW_SP_RXL_NO_MARK(IGMP_V1_REPORT, TRAP_TO_CPU, MC_SNOOPING, false),
 	MLXSW_SP_RXL_NO_MARK(IGMP_V2_REPORT, TRAP_TO_CPU, MC_SNOOPING, false),
@@ -4074,6 +4073,7 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
 			  false),
 	MLXSW_SP_RXL_MARK(IPV4_OSPF, TRAP_TO_CPU, OSPF, false),
 	MLXSW_SP_RXL_MARK(IPV6_OSPF, TRAP_TO_CPU, OSPF, false),
+	MLXSW_SP_RXL_MARK(IPV4_DHCP, TRAP_TO_CPU, DHCP, false),
 	MLXSW_SP_RXL_MARK(IPV6_DHCP, TRAP_TO_CPU, DHCP, false),
 	MLXSW_SP_RXL_MARK(RTR_INGRESS0, TRAP_TO_CPU, REMOTE_ROUTE, false),
 	MLXSW_SP_RXL_MARK(IPV4_BGP, TRAP_TO_CPU, BGP, false),
diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h
index eaa521b7561b..fac05433c488 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/trap.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h
@@ -59,6 +59,7 @@ enum {
 	MLXSW_TRAP_ID_L3_IPV6_NEIGHBOR_SOLICITATION = 0x8C,
 	MLXSW_TRAP_ID_L3_IPV6_NEIGHBOR_ADVERTISMENT = 0x8D,
 	MLXSW_TRAP_ID_L3_IPV6_REDIRECTION = 0x8E,
+	MLXSW_TRAP_ID_IPV4_DHCP = 0x8F,
 	MLXSW_TRAP_ID_HOST_MISS_IPV4 = 0x90,
 	MLXSW_TRAP_ID_IPV6_MC_LINK_LOCAL_DEST = 0x91,
 	MLXSW_TRAP_ID_HOST_MISS_IPV6 = 0x92,
-- 
cgit v1.2.3-59-g8ed1b


From 938e6d0b7672317e54836610d9b9535dda19e90b Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Mon, 25 May 2020 00:51:00 +0300
Subject: mlxsw: spectrum: Change default rate and priority of DHCP packets

Reduce the default acceptable rate of DHCP packets to 128 packets per
second and reduce their priority. This is reasonable given the Spectrum
ASICs are limited to 128 ports at the moment.

These are only the default values. Users will be able to modify them via
devlink-trap.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index fa6e630abb6e..c2d6890803da 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -4152,6 +4152,7 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core)
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_LBERROR:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP:
 			rate = 128;
 			burst_size = 7;
 			break;
@@ -4161,7 +4162,6 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core)
 			break;
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP:
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND:
@@ -4230,7 +4230,6 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core)
 			tc = 5;
 			break;
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP:
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP:
 			priority = 4;
 			tc = 4;
 			break;
@@ -4242,6 +4241,7 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core)
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP1:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP:
 			priority = 2;
 			tc = 2;
 			break;
-- 
cgit v1.2.3-59-g8ed1b


From e0d848477a68ab940caf8a880d3181b5ef9e4e0e Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Mon, 25 May 2020 00:51:01 +0300
Subject: mlxsw: spectrum_buffers: Assign non-zero quotas to TC 0 of the CPU
 port

As explained in commit 9ffcc3725f09 ("mlxsw: spectrum: Allow packets to
be trapped from any PG"), incoming packets can be admitted to the shared
buffer and forwarded / trapped, if:

(Ingress{Port}.Usage < Thres && Ingress{Port,PG}.Usage < Thres &&
 Egress{Port}.Usage < Thres && Egress{Port,TC}.Usage < Thres)
||
(Ingress{Port}.Usage < Min || Ingress{Port,PG} < Min ||
 Egress{Port}.Usage < Min || Egress{Port,TC}.Usage < Min)

Trapped packets are scheduled to transmission through the CPU port.
Currently, the minimum and maximum quotas of traffic class (TC) 0 of the
CPU port are 0, which means it is not usable.

Assign non-zero quotas to TC 0 of the CPU port, so that it could be
utilized by subsequent patches.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
index 968f0902e4fe..21bfb2f6a6f0 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_buffers.c
@@ -614,7 +614,7 @@ static const struct mlxsw_sp_sb_cm mlxsw_sp2_sb_cms_egress[] = {
 #define MLXSW_SP_CPU_PORT_SB_CM MLXSW_SP_SB_CM(0, 0, MLXSW_SP_SB_POOL_EGR_CPU)
 
 static const struct mlxsw_sp_sb_cm mlxsw_sp_cpu_port_sb_cms[] = {
-	MLXSW_SP_CPU_PORT_SB_CM,
+	MLXSW_SP_SB_CM(1000, 8, MLXSW_SP_SB_POOL_EGR_CPU),
 	MLXSW_SP_SB_CM(1000, 8, MLXSW_SP_SB_POOL_EGR_CPU),
 	MLXSW_SP_SB_CM(1000, 8, MLXSW_SP_SB_POOL_EGR_CPU),
 	MLXSW_SP_SB_CM(1000, 8, MLXSW_SP_SB_POOL_EGR_CPU),
-- 
cgit v1.2.3-59-g8ed1b


From 5047d819f5b1ad244675ba69b36af1ac44c76e34 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Mon, 25 May 2020 00:51:02 +0300
Subject: mlxsw: spectrum: Align TC and trap priority

The traffic class (TC) attribute of packet traps determines through which
TC a packet trap will be scheduled through the CPU port.

The priority attribute determines which trap will be triggered in case
several packet traps match a packet.

We try to configure these attributes to the same value for all packet
traps as there is little reason not to.

Some packet traps did not use the same value, so rectify that now.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c      | 2 +-
 drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c | 8 ++++----
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index c2d6890803da..978f6d98e8c4 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -4253,7 +4253,7 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core)
 			break;
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_LBERROR:
 			priority = 0;
-			tc = 1;
+			tc = 0;
 			break;
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_EVENT:
 			priority = MLXSW_REG_HTGT_DEFAULT_PRIORITY;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
index 3a13b17cd1b8..1d414d0e5431 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
@@ -207,25 +207,25 @@ static const struct mlxsw_sp_trap_group_item mlxsw_sp_trap_group_items_arr[] = {
 		.group = DEVLINK_TRAP_GROUP_GENERIC(L2_DROPS, 1),
 		.hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_L2_DISCARDS,
 		.priority = 0,
-		.tc = 1,
+		.tc = 0,
 	},
 	{
 		.group = DEVLINK_TRAP_GROUP_GENERIC(L3_DROPS, 1),
 		.hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_L3_DISCARDS,
 		.priority = 0,
-		.tc = 1,
+		.tc = 0,
 	},
 	{
 		.group = DEVLINK_TRAP_GROUP_GENERIC(TUNNEL_DROPS, 1),
 		.hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_TUNNEL_DISCARDS,
 		.priority = 0,
-		.tc = 1,
+		.tc = 0,
 	},
 	{
 		.group = DEVLINK_TRAP_GROUP_GENERIC(ACL_DROPS, 1),
 		.hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_ACL_DISCARDS,
 		.priority = 0,
-		.tc = 1,
+		.tc = 0,
 	},
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From d88f8cc158b9c3812172431a3761e86b92f2d53d Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Mon, 25 May 2020 00:51:03 +0300
Subject: mlxsw: spectrum_trap: Remove unnecessary field

Now that traffic class (TC) and priority are set to the same value,
there is no need to store both. Remove the first.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
index 1d414d0e5431..78f983c1a056 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
@@ -21,7 +21,6 @@ struct mlxsw_sp_trap_group_item {
 	struct devlink_trap_group group;
 	u16 hw_group_id;
 	u8 priority;
-	u8 tc;
 };
 
 #define MLXSW_SP_TRAP_LISTENERS_MAX 3
@@ -207,25 +206,21 @@ static const struct mlxsw_sp_trap_group_item mlxsw_sp_trap_group_items_arr[] = {
 		.group = DEVLINK_TRAP_GROUP_GENERIC(L2_DROPS, 1),
 		.hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_L2_DISCARDS,
 		.priority = 0,
-		.tc = 0,
 	},
 	{
 		.group = DEVLINK_TRAP_GROUP_GENERIC(L3_DROPS, 1),
 		.hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_L3_DISCARDS,
 		.priority = 0,
-		.tc = 0,
 	},
 	{
 		.group = DEVLINK_TRAP_GROUP_GENERIC(TUNNEL_DROPS, 1),
 		.hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_TUNNEL_DISCARDS,
 		.priority = 0,
-		.tc = 0,
 	},
 	{
 		.group = DEVLINK_TRAP_GROUP_GENERIC(ACL_DROPS, 1),
 		.hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_ACL_DISCARDS,
 		.priority = 0,
-		.tc = 0,
 	},
 };
 
@@ -865,7 +860,7 @@ __mlxsw_sp_trap_group_init(struct mlxsw_core *mlxsw_core,
 	}
 
 	mlxsw_reg_htgt_pack(htgt_pl, group_item->hw_group_id, hw_policer_id,
-			    group_item->priority, group_item->tc);
+			    group_item->priority, group_item->priority);
 	return mlxsw_reg_write(mlxsw_core, MLXSW_REG(htgt), htgt_pl);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 32446438cc5bb09aa5ca43c18b0d13dc88569850 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Mon, 25 May 2020 00:51:04 +0300
Subject: mlxsw: spectrum: Rename ARP trap group

The ARP trap group will be used for IPv6 ND traps in the next patch, so
rename it to "NEIGH_DISCOVERY" which is more appropriate.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h      |  2 +-
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 13 +++++++------
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index d51a4c4665d0..4d61c414348f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -5536,7 +5536,7 @@ enum mlxsw_reg_htgt_trap_group {
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_MULTICAST,
-	MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP,
+	MLXSW_REG_HTGT_TRAP_GROUP_SP_NEIGH_DISCOVERY,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 978f6d98e8c4..6ef8222cc0ae 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -4050,8 +4050,8 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
 	MLXSW_SP_RXL_NO_MARK(IGMP_V2_REPORT, TRAP_TO_CPU, MC_SNOOPING, false),
 	MLXSW_SP_RXL_NO_MARK(IGMP_V2_LEAVE, TRAP_TO_CPU, MC_SNOOPING, false),
 	MLXSW_SP_RXL_NO_MARK(IGMP_V3_REPORT, TRAP_TO_CPU, MC_SNOOPING, false),
-	MLXSW_SP_RXL_MARK(ARPBC, MIRROR_TO_CPU, ARP, false),
-	MLXSW_SP_RXL_MARK(ARPUC, MIRROR_TO_CPU, ARP, false),
+	MLXSW_SP_RXL_MARK(ARPBC, MIRROR_TO_CPU, NEIGH_DISCOVERY, false),
+	MLXSW_SP_RXL_MARK(ARPUC, MIRROR_TO_CPU, NEIGH_DISCOVERY, false),
 	MLXSW_SP_RXL_NO_MARK(FID_MISS, TRAP_TO_CPU, IP2ME, false),
 	MLXSW_SP_RXL_MARK(IPV6_MLDV12_LISTENER_QUERY, MIRROR_TO_CPU,
 			  MC_SNOOPING, false),
@@ -4112,8 +4112,9 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
 	MLXSW_SP_RXL_MARK(ACL1, TRAP_TO_CPU, MULTICAST, false),
 	MLXSW_SP_RXL_L3_MARK(ACL2, TRAP_TO_CPU, MULTICAST, false),
 	/* NVE traps */
-	MLXSW_SP_RXL_MARK(NVE_ENCAP_ARP, TRAP_TO_CPU, ARP, false),
-	MLXSW_SP_RXL_NO_MARK(NVE_DECAP_ARP, TRAP_TO_CPU, ARP, false),
+	MLXSW_SP_RXL_MARK(NVE_ENCAP_ARP, TRAP_TO_CPU, NEIGH_DISCOVERY, false),
+	MLXSW_SP_RXL_NO_MARK(NVE_DECAP_ARP, TRAP_TO_CPU, NEIGH_DISCOVERY,
+			     false),
 	/* PTP traps */
 	MLXSW_RXL(mlxsw_sp_rx_listener_ptp, PTP0, TRAP_TO_CPU,
 		  false, SP_PTP0, DISCARD),
@@ -4161,7 +4162,7 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core)
 			burst_size = 10;
 			break;
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP:
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_NEIGH_DISCOVERY:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND:
@@ -4238,7 +4239,7 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core)
 			priority = 3;
 			tc = 3;
 			break;
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_ARP:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_NEIGH_DISCOVERY:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP1:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP:
-- 
cgit v1.2.3-59-g8ed1b


From b33f5d9fb71a65dde1477417b0e11205a2d21adc Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Mon, 25 May 2020 00:51:05 +0300
Subject: mlxsw: spectrum: Use same trap group for IPv6 ND and ARP packets

Both packet types are needed for the same reason (neighbour discovery),
so associate them with the same trap group.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 6ef8222cc0ae..c4fdfe8fd5a3 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -4082,10 +4082,10 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
 			  false),
 	MLXSW_SP_RXL_MARK(L3_IPV6_ROUTER_ADVERTISMENT, TRAP_TO_CPU, IPV6_ND,
 			  false),
-	MLXSW_SP_RXL_MARK(L3_IPV6_NEIGHBOR_SOLICITATION, TRAP_TO_CPU, IPV6_ND,
-			  false),
-	MLXSW_SP_RXL_MARK(L3_IPV6_NEIGHBOR_ADVERTISMENT, TRAP_TO_CPU, IPV6_ND,
-			  false),
+	MLXSW_SP_RXL_MARK(L3_IPV6_NEIGHBOR_SOLICITATION, TRAP_TO_CPU,
+			  NEIGH_DISCOVERY, false),
+	MLXSW_SP_RXL_MARK(L3_IPV6_NEIGHBOR_ADVERTISMENT, TRAP_TO_CPU,
+			  NEIGH_DISCOVERY, false),
 	MLXSW_SP_RXL_MARK(L3_IPV6_REDIRECTION, TRAP_TO_CPU, IPV6_ND, false),
 	MLXSW_SP_RXL_MARK(IPV6_MC_LINK_LOCAL_DEST, TRAP_TO_CPU, ROUTER_EXP,
 			  false),
-- 
cgit v1.2.3-59-g8ed1b


From ce3c3bf0bfad4bb8652f3c3f5770d38a10ce2894 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Mon, 25 May 2020 00:51:06 +0300
Subject: mlxsw: spectrum: Use dedicated trap group for sampled packets

The rate with which packets are sampled is determined by user space, so
there is no need to associate such packets with a policer.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h      | 1 +
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 3 ++-
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 4d61c414348f..9b27a129b0a6 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -5547,6 +5547,7 @@ enum mlxsw_reg_htgt_trap_group {
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP0,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP1,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_VRRP,
+	MLXSW_REG_HTGT_TRAP_GROUP_SP_PKT_SAMPLE,
 
 	__MLXSW_REG_HTGT_TRAP_GROUP_MAX,
 	MLXSW_REG_HTGT_TRAP_GROUP_MAX = __MLXSW_REG_HTGT_TRAP_GROUP_MAX - 1
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index c4fdfe8fd5a3..d275887bba28 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -4103,7 +4103,7 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
 			     ROUTER_EXP, false),
 	/* PKT Sample trap */
 	MLXSW_RXL(mlxsw_sp_rx_listener_sample_func, PKT_SAMPLE, MIRROR_TO_CPU,
-		  false, SP_IP2ME, DISCARD),
+		  false, SP_PKT_SAMPLE, DISCARD),
 	/* ACL trap */
 	MLXSW_SP_RXL_NO_MARK(ACL0, TRAP_TO_CPU, IP2ME, false),
 	/* Multicast Router Traps */
@@ -4252,6 +4252,7 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core)
 			priority = 1;
 			tc = 1;
 			break;
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_PKT_SAMPLE:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_LBERROR:
 			priority = 0;
 			tc = 0;
-- 
cgit v1.2.3-59-g8ed1b


From 154388e11255dbbcf68906fe8058fe72af346634 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Mon, 25 May 2020 00:51:07 +0300
Subject: mlxsw: spectrum: Fix spelling mistake in trap's name

Fix incorrect spelling of "advertisement".

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 4 ++--
 drivers/net/ethernet/mellanox/mlxsw/trap.h     | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index d275887bba28..943a24975799 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -4080,11 +4080,11 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
 	MLXSW_SP_RXL_MARK(IPV6_BGP, TRAP_TO_CPU, BGP, false),
 	MLXSW_SP_RXL_MARK(L3_IPV6_ROUTER_SOLICITATION, TRAP_TO_CPU, IPV6_ND,
 			  false),
-	MLXSW_SP_RXL_MARK(L3_IPV6_ROUTER_ADVERTISMENT, TRAP_TO_CPU, IPV6_ND,
+	MLXSW_SP_RXL_MARK(L3_IPV6_ROUTER_ADVERTISEMENT, TRAP_TO_CPU, IPV6_ND,
 			  false),
 	MLXSW_SP_RXL_MARK(L3_IPV6_NEIGHBOR_SOLICITATION, TRAP_TO_CPU,
 			  NEIGH_DISCOVERY, false),
-	MLXSW_SP_RXL_MARK(L3_IPV6_NEIGHBOR_ADVERTISMENT, TRAP_TO_CPU,
+	MLXSW_SP_RXL_MARK(L3_IPV6_NEIGHBOR_ADVERTISEMENT, TRAP_TO_CPU,
 			  NEIGH_DISCOVERY, false),
 	MLXSW_SP_RXL_MARK(L3_IPV6_REDIRECTION, TRAP_TO_CPU, IPV6_ND, false),
 	MLXSW_SP_RXL_MARK(IPV6_MC_LINK_LOCAL_DEST, TRAP_TO_CPU, ROUTER_EXP,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h
index fac05433c488..1b89472a0908 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/trap.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h
@@ -55,9 +55,9 @@ enum {
 	MLXSW_TRAP_ID_IPV4_BGP = 0x88,
 	MLXSW_TRAP_ID_IPV6_BGP = 0x89,
 	MLXSW_TRAP_ID_L3_IPV6_ROUTER_SOLICITATION = 0x8A,
-	MLXSW_TRAP_ID_L3_IPV6_ROUTER_ADVERTISMENT = 0x8B,
+	MLXSW_TRAP_ID_L3_IPV6_ROUTER_ADVERTISEMENT = 0x8B,
 	MLXSW_TRAP_ID_L3_IPV6_NEIGHBOR_SOLICITATION = 0x8C,
-	MLXSW_TRAP_ID_L3_IPV6_NEIGHBOR_ADVERTISMENT = 0x8D,
+	MLXSW_TRAP_ID_L3_IPV6_NEIGHBOR_ADVERTISEMENT = 0x8D,
 	MLXSW_TRAP_ID_L3_IPV6_REDIRECTION = 0x8E,
 	MLXSW_TRAP_ID_IPV4_DHCP = 0x8F,
 	MLXSW_TRAP_ID_HOST_MISS_IPV4 = 0x90,
-- 
cgit v1.2.3-59-g8ed1b


From 06ec313eea4f7dcda209b7136618b802c5b30ef8 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Mon, 25 May 2020 00:38:56 +0300
Subject: vxlan: Do not assume RTNL is held in vxlan_fdb_info()

vxlan_fdb_info() is not always called with RTNL held or from an RCU
read-side critical section. For example, in the following call path:

vxlan_cleanup()
  vxlan_fdb_destroy()
    vxlan_fdb_notify()
      __vxlan_fdb_notify()
        vxlan_fdb_info()

The use of rtnl_dereference() can therefore result in the following
splat [1].

Fix this by dereferencing the nexthop under RCU read-side critical
section.

[1]
[May24 22:56] =============================
[  +0.004676] WARNING: suspicious RCU usage
[  +0.004614] 5.7.0-rc5-custom-16219-g201392003491 #2772 Not tainted
[  +0.007116] -----------------------------
[  +0.004657] drivers/net/vxlan.c:276 suspicious rcu_dereference_check() usage!
[  +0.008164]
              other info that might help us debug this:

[  +0.009126]
              rcu_scheduler_active = 2, debug_locks = 1
[  +0.007504] 5 locks held by bash/6892:
[  +0.004392]  #0: ffff8881d47e3410 (&sig->cred_guard_mutex){+.+.}-{3:3}, at: __do_execve_file.isra.27+0x392/0x23c0
[  +0.011795]  #1: ffff8881d47e34b0 (&sig->exec_update_mutex){+.+.}-{3:3}, at: flush_old_exec+0x510/0x2030
[  +0.010947]  #2: ffff8881a141b0b0 (ptlock_ptr(page)#2){+.+.}-{2:2}, at: unmap_page_range+0x9c0/0x2590
[  +0.010585]  #3: ffff888230009d50 ((&vxlan->age_timer)){+.-.}-{0:0}, at: call_timer_fn+0xe8/0x800
[  +0.010192]  #4: ffff888183729bc8 (&vxlan->hash_lock[h]){+.-.}-{2:2}, at: vxlan_cleanup+0x133/0x4a0
[  +0.010382]
              stack backtrace:
[  +0.005103] CPU: 1 PID: 6892 Comm: bash Not tainted 5.7.0-rc5-custom-16219-g201392003491 #2772
[  +0.009675] Hardware name: Mellanox Technologies Ltd. MSN2100-CB2FO/SA001017, BIOS 5.6.5 06/07/2016
[  +0.010155] Call Trace:
[  +0.002775]  <IRQ>
[  +0.002313]  dump_stack+0xfd/0x178
[  +0.003895]  lockdep_rcu_suspicious+0x14a/0x153
[  +0.005157]  vxlan_fdb_info+0xe39/0x12a0
[  +0.004775]  __vxlan_fdb_notify+0xb8/0x160
[  +0.004672]  vxlan_fdb_notify+0x8e/0xe0
[  +0.004370]  vxlan_fdb_destroy+0x117/0x330
[  +0.004662]  vxlan_cleanup+0x1aa/0x4a0
[  +0.004329]  call_timer_fn+0x1c4/0x800
[  +0.004357]  run_timer_softirq+0x129d/0x17e0
[  +0.004762]  __do_softirq+0x24c/0xaef
[  +0.004232]  irq_exit+0x167/0x190
[  +0.003767]  smp_apic_timer_interrupt+0x1dd/0x6a0
[  +0.005340]  apic_timer_interrupt+0xf/0x20
[  +0.004620]  </IRQ>

Fixes: 1274e1cc4226 ("vxlan: ecmp support for mac fdb entries")
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reported-by: Amit Cohen <amitc@mellanox.com>
Acked-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c | 15 ++++++++++++---
 1 file changed, 12 insertions(+), 3 deletions(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 3e88fbef2d4a..a0015cdedfaf 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -263,6 +263,8 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
 	struct nlmsghdr *nlh;
 	struct nexthop *nh;
 	struct ndmsg *ndm;
+	int nh_family;
+	u32 nh_id;
 
 	nlh = nlmsg_put(skb, portid, seq, type, sizeof(*ndm), flags);
 	if (nlh == NULL)
@@ -273,13 +275,20 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
 
 	send_eth = send_ip = true;
 
-	nh = rcu_dereference_rtnl(fdb->nh);
+	rcu_read_lock();
+	nh = rcu_dereference(fdb->nh);
+	if (nh) {
+		nh_family = nexthop_get_family(nh);
+		nh_id = nh->id;
+	}
+	rcu_read_unlock();
+
 	if (type == RTM_GETNEIGH) {
 		if (rdst) {
 			send_ip = !vxlan_addr_any(&rdst->remote_ip);
 			ndm->ndm_family = send_ip ? rdst->remote_ip.sa.sa_family : AF_INET;
 		} else if (nh) {
-			ndm->ndm_family = nexthop_get_family(nh);
+			ndm->ndm_family = nh_family;
 		}
 		send_eth = !is_zero_ether_addr(fdb->eth_addr);
 	} else
@@ -299,7 +308,7 @@ static int vxlan_fdb_info(struct sk_buff *skb, struct vxlan_dev *vxlan,
 	if (send_eth && nla_put(skb, NDA_LLADDR, ETH_ALEN, &fdb->eth_addr))
 		goto nla_put_failure;
 	if (nh) {
-		if (nla_put_u32(skb, NDA_NH_ID, nh->id))
+		if (nla_put_u32(skb, NDA_NH_ID, nh_id))
 			goto nla_put_failure;
 	} else if (rdst) {
 		if (send_ip && vxlan_nla_put_addr(skb, NDA_DST,
-- 
cgit v1.2.3-59-g8ed1b


From ca23cb0bc50faae0d48786b2f9f702dbb528b925 Mon Sep 17 00:00:00 2001
From: Sven Auhagen <sven.auhagen@voleatech.de>
Date: Sat, 23 May 2020 12:14:08 +0200
Subject: mvneta: MVNETA_SKB_HEADROOM set last 3 bits to zero

For XDP the MVNETA_SKB_HEADROOM is used as an offset for
the received data.
The MVNETA manual states that the last 3 bits assumed to be 0.

This is currently the case but lets make it explicit in the definition
to prevent future problems.

Signed-off-by: Sven Auhagen <sven.auhagen@voleatech.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/marvell/mvneta.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 37947949345c..15e42a7f8a86 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -324,7 +324,8 @@
 	      ETH_HLEN + ETH_FCS_LEN,			     \
 	      cache_line_size())
 
-#define MVNETA_SKB_HEADROOM	max(XDP_PACKET_HEADROOM, NET_SKB_PAD)
+/* Driver assumes that the last 3 bits are 0 */
+#define MVNETA_SKB_HEADROOM	(max(XDP_PACKET_HEADROOM, NET_SKB_PAD) & ~0x7)
 #define MVNETA_SKB_PAD	(SKB_DATA_ALIGN(sizeof(struct skb_shared_info) + \
 			 MVNETA_SKB_HEADROOM))
 #define MVNETA_SKB_SIZE(len)	(SKB_DATA_ALIGN(len) + MVNETA_SKB_PAD)
-- 
cgit v1.2.3-59-g8ed1b


From 6a1015b0b4b1f3a0de9e40d2ba86877d13f50918 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Sat, 23 May 2020 20:46:48 +0300
Subject: ipv4: potential underflow in compat_ip_setsockopt()

The value of "n" is capped at 0x1ffffff but it checked for negative
values.  I don't think this causes a problem but I'm not certain and
it's harmless to prevent it.

Fixes: 2e04172875c9 ("ipv4: do compat setsockopt for MCAST_MSFILTER directly")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/ip_sockglue.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index a2469bc57cfe..f43d5f12aa86 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -1347,8 +1347,8 @@ int compat_ip_setsockopt(struct sock *sk, int level, int optname,
 	{
 		const int size0 = offsetof(struct compat_group_filter, gf_slist);
 		struct compat_group_filter *gf32;
+		unsigned int n;
 		void *p;
-		int n;
 
 		if (optlen < size0)
 			return -EINVAL;
-- 
cgit v1.2.3-59-g8ed1b


From 880f8f99d12ca89d3ec76f688e0d92612054cbb1 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sat, 23 May 2020 12:46:49 -0700
Subject: bnx2x: allow bnx2x_bsc_read() to schedule

bnx2x_warpcore_read_sfp_module_eeprom() can call bnx2x_bsc_read()
three times before giving up.

This causes latency blips of at least 31 ms (58 ms being reported
by our teams)

Convert the long lasting loops of udelay() to usleep_range() ones,
and breaks the loops on precise time tracking.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Ariel Elior <aelior@marvell.com>
Cc: Sudarsana Kalluru <skalluru@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c | 26 ++++++++++++++----------
 1 file changed, 15 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
index 517caedc0a87..1426c691c7c4 100644
--- a/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
+++ b/drivers/net/ethernet/broadcom/bnx2x/bnx2x_link.c
@@ -3085,6 +3085,7 @@ static int bnx2x_bsc_read(struct link_params *params,
 			  u8 xfer_cnt,
 			  u32 *data_array)
 {
+	u64 t0, delta;
 	u32 val, i;
 	int rc = 0;
 
@@ -3114,17 +3115,18 @@ static int bnx2x_bsc_read(struct link_params *params,
 	REG_WR(bp, MCP_REG_MCPR_IMC_COMMAND, val);
 
 	/* Poll for completion */
-	i = 0;
+	t0 = ktime_get_ns();
 	val = REG_RD(bp, MCP_REG_MCPR_IMC_COMMAND);
 	while (((val >> MCPR_IMC_COMMAND_IMC_STATUS_BITSHIFT) & 0x3) != 1) {
-		udelay(10);
-		val = REG_RD(bp, MCP_REG_MCPR_IMC_COMMAND);
-		if (i++ > 1000) {
-			DP(NETIF_MSG_LINK, "wr 0 byte timed out after %d try\n",
-								i);
+		delta = ktime_get_ns() - t0;
+		if (delta > 10 * NSEC_PER_MSEC) {
+			DP(NETIF_MSG_LINK, "wr 0 byte timed out after %Lu ns\n",
+					   delta);
 			rc = -EFAULT;
 			break;
 		}
+		usleep_range(10, 20);
+		val = REG_RD(bp, MCP_REG_MCPR_IMC_COMMAND);
 	}
 	if (rc == -EFAULT)
 		return rc;
@@ -3138,16 +3140,18 @@ static int bnx2x_bsc_read(struct link_params *params,
 	REG_WR(bp, MCP_REG_MCPR_IMC_COMMAND, val);
 
 	/* Poll for completion */
-	i = 0;
+	t0 = ktime_get_ns();
 	val = REG_RD(bp, MCP_REG_MCPR_IMC_COMMAND);
 	while (((val >> MCPR_IMC_COMMAND_IMC_STATUS_BITSHIFT) & 0x3) != 1) {
-		udelay(10);
-		val = REG_RD(bp, MCP_REG_MCPR_IMC_COMMAND);
-		if (i++ > 1000) {
-			DP(NETIF_MSG_LINK, "rd op timed out after %d try\n", i);
+		delta = ktime_get_ns() - t0;
+		if (delta > 10 * NSEC_PER_MSEC) {
+			DP(NETIF_MSG_LINK, "rd op timed out after %Lu ns\n",
+					   delta);
 			rc = -EFAULT;
 			break;
 		}
+		usleep_range(10, 20);
+		val = REG_RD(bp, MCP_REG_MCPR_IMC_COMMAND);
 	}
 	if (rc == -EFAULT)
 		return rc;
-- 
cgit v1.2.3-59-g8ed1b


From 45af29ca761c275e350cca659856bc56f1035ef9 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Sun, 24 May 2020 11:00:02 -0700
Subject: tcp: allow traceroute -Mtcp for unpriv users
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Unpriv users can use traceroute over plain UDP sockets, but not TCP ones.

$ traceroute -Mtcp 8.8.8.8
You do not have enough privileges to use this traceroute method.

$ traceroute -n -Mudp 8.8.8.8
traceroute to 8.8.8.8 (8.8.8.8), 30 hops max, 60 byte packets
 1  192.168.86.1  3.631 ms  3.512 ms  3.405 ms
 2  10.1.10.1  4.183 ms  4.125 ms  4.072 ms
 3  96.120.88.125  20.621 ms  19.462 ms  20.553 ms
 4  96.110.177.65  24.271 ms  25.351 ms  25.250 ms
 5  69.139.199.197  44.492 ms  43.075 ms  44.346 ms
 6  68.86.143.93  27.969 ms  25.184 ms  25.092 ms
 7  96.112.146.18  25.323 ms 96.112.146.22  25.583 ms 96.112.146.26  24.502 ms
 8  72.14.239.204  24.405 ms 74.125.37.224  16.326 ms  17.194 ms
 9  209.85.251.9  18.154 ms 209.85.247.55  14.449 ms 209.85.251.9  26.296 ms^C

We can easily support traceroute over TCP, by queueing an error message
into socket error queue.

Note that applications need to set IP_RECVERR/IPV6_RECVERR option to
enable this feature, and that the error message is only queued
while in SYN_SNT state.

socket(AF_INET6, SOCK_STREAM, IPPROTO_IP) = 3
setsockopt(3, SOL_IPV6, IPV6_RECVERR, [1], 4) = 0
setsockopt(3, SOL_SOCKET, SO_TIMESTAMP_OLD, [1], 4) = 0
setsockopt(3, SOL_IPV6, IPV6_UNICAST_HOPS, [5], 4) = 0
connect(3, {sa_family=AF_INET6, sin6_port=htons(8787), sin6_flowinfo=htonl(0),
        inet_pton(AF_INET6, "2002:a05:6608:297::", &sin6_addr), sin6_scope_id=0}, 28) = -1 EHOSTUNREACH (No route to host)
recvmsg(3, {msg_name={sa_family=AF_INET6, sin6_port=htons(8787), sin6_flowinfo=htonl(0),
        inet_pton(AF_INET6, "2002:a05:6608:297::", &sin6_addr), sin6_scope_id=0},
        msg_namelen=1024->28, msg_iov=[{iov_base="`\r\337\320\0004\6\1&\7\370\260\200\231\16\27\0\0\0\0\0\0\0\0 \2\n\5f\10\2\227"..., iov_len=1024}],
        msg_iovlen=1, msg_control=[{cmsg_len=32, cmsg_level=SOL_SOCKET, cmsg_type=SO_TIMESTAMP_OLD, cmsg_data={tv_sec=1590340680, tv_usec=272424}},
                                   {cmsg_len=60, cmsg_level=SOL_IPV6, cmsg_type=IPV6_RECVERR}],
        msg_controllen=96, msg_flags=MSG_ERRQUEUE}, MSG_ERRQUEUE) = 144

Suggested-by: Maciej Żenczykowski <maze@google.com
Signed-off-by: Eric Dumazet <edumazet@google.com>
Cc: Willem de Bruijn <willemb@google.com>
Reviewed-by: Maciej Żenczykowski <maze@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 2 ++
 net/ipv6/tcp_ipv6.c | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 6c05f1ceb538..900c6d154cbc 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -573,6 +573,8 @@ int tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 		if (fastopen && !fastopen->sk)
 			break;
 
+		ip_icmp_error(sk, skb, err, th->dest, info, (u8 *)th);
+
 		if (!sock_owned_by_user(sk)) {
 			sk->sk_err = err;
 
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 413b3425ac66..01a6f5111a77 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -463,6 +463,8 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		if (fastopen && !fastopen->sk)
 			break;
 
+		ipv6_icmp_error(sk, skb, err, th->dest, ntohl(info), (u8 *)th);
+
 		if (!sock_owned_by_user(sk)) {
 			sk->sk_err = err;
 			sk->sk_error_report(sk);		/* Wake people up to see the error (see connect in sock.c) */
-- 
cgit v1.2.3-59-g8ed1b


From eabd5c9dd0c0b8d471d144801c8302a4eff6eb27 Mon Sep 17 00:00:00 2001
From: Richard Cochran <richardcochran@gmail.com>
Date: Sun, 24 May 2020 11:27:10 -0700
Subject: ptp_clock: Let the ADJ_OFFSET interface respect the ADJ_NANO flag for
 PHC devices.

In commit 184ecc9eb260d5a3bcdddc5bebd18f285ac004e9 ("ptp: Add adjphase
function to support phase offset control.") the PTP Hardware Clock
interface expanded to support the ADJ_OFFSET offset mode.  However,
the implementation did not respect the traditional yet pedantic
distinction between units of microseconds and nanoseconds signaled by
the ADJ_NANO flag.  This patch fixes the issue by adding logic to
handle that flag.

Signed-off-by: Richard Cochran <richardcochran@gmail.com>
Reviewed-by: Vincent Cheng <vincent.cheng.xh@renesas.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/ptp/ptp_clock.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/ptp/ptp_clock.c b/drivers/ptp/ptp_clock.c
index fc984a8828fb..03a246e60fd9 100644
--- a/drivers/ptp/ptp_clock.c
+++ b/drivers/ptp/ptp_clock.c
@@ -147,8 +147,14 @@ static int ptp_clock_adjtime(struct posix_clock *pc, struct __kernel_timex *tx)
 			err = ops->adjfreq(ops, ppb);
 		ptp->dialed_frequency = tx->freq;
 	} else if (tx->modes & ADJ_OFFSET) {
-		if (ops->adjphase)
-			err = ops->adjphase(ops, tx->offset);
+		if (ops->adjphase) {
+			s32 offset = tx->offset;
+
+			if (!(tx->modes & ADJ_NANO))
+				offset *= NSEC_PER_USEC;
+
+			err = ops->adjphase(ops, offset);
+		}
 	} else if (tx->modes == 0) {
 		tx->freq = ptp->dialed_frequency;
 		err = 0;
-- 
cgit v1.2.3-59-g8ed1b


From 07153961f82878dee73717f64761d67dff9546c1 Mon Sep 17 00:00:00 2001
From: Wang Wenhu <wenhu.wang@vivo.com>
Date: Sun, 24 May 2020 23:29:51 -0700
Subject: drivers: ipa: print dev_err info accurately

Print certain name string instead of hard-coded "memory" for dev_err
output, which would be more accurate and helpful for debugging.

Signed-off-by: Wang Wenhu <wenhu.wang@vivo.com>
Cc: Alex Elder <elder@kernel.org>
Reviewed-by: Alex Elder <elder@linaro.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_clock.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ipa/ipa_clock.c b/drivers/net/ipa/ipa_clock.c
index 374491ea11cf..c5204fd58ac4 100644
--- a/drivers/net/ipa/ipa_clock.c
+++ b/drivers/net/ipa/ipa_clock.c
@@ -66,8 +66,8 @@ ipa_interconnect_init_one(struct device *dev, const char *name)
 
 	path = of_icc_get(dev, name);
 	if (IS_ERR(path))
-		dev_err(dev, "error %ld getting memory interconnect\n",
-			PTR_ERR(path));
+		dev_err(dev, "error %ld getting %s interconnect\n",
+			PTR_ERR(path), name);
 
 	return path;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 617504c67e01d30310558442777a4112ea6d587d Mon Sep 17 00:00:00 2001
From: Horatiu Vultur <horatiu.vultur@microchip.com>
Date: Mon, 25 May 2020 09:55:41 +0000
Subject: bridge: mrp: Fix out-of-bounds read in br_mrp_parse

The issue was reported by syzbot. When the function br_mrp_parse was
called with a valid net_bridge_port, the net_bridge was an invalid
pointer. Therefore the check br->stp_enabled could pass/fail
depending where it was pointing in memory.
The fix consists of setting the net_bridge pointer if the port is a
valid pointer.

Reported-by: syzbot+9c6f0f1f8e32223df9a4@syzkaller.appspotmail.com
Fixes: 6536993371fa ("bridge: mrp: Integrate MRP into the bridge")
Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_mrp_netlink.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/net/bridge/br_mrp_netlink.c b/net/bridge/br_mrp_netlink.c
index 397e7f710772..4a08a99519b0 100644
--- a/net/bridge/br_mrp_netlink.c
+++ b/net/bridge/br_mrp_netlink.c
@@ -27,6 +27,12 @@ int br_mrp_parse(struct net_bridge *br, struct net_bridge_port *p,
 	struct nlattr *tb[IFLA_BRIDGE_MRP_MAX + 1];
 	int err;
 
+	/* When this function is called for a port then the br pointer is
+	 * invalid, therefor set the br to point correctly
+	 */
+	if (p)
+		br = p->br;
+
 	if (br->stp_enabled != BR_NO_STP) {
 		NL_SET_ERR_MSG_MOD(extack, "MRP can't be enabled if STP is already enabled");
 		return -EINVAL;
-- 
cgit v1.2.3-59-g8ed1b


From d29d5ff9daee41a2553843574257e5a6724d5453 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Mon, 25 May 2020 19:49:15 +0200
Subject: r8169: sync RTL8168g hw config with vendor driver

Sync hw config for RTL8168g with r8168 vendor driver.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 79090aefac69..d034a57a0e1c 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -3019,6 +3019,7 @@ static void rtl_hw_start_8168g(struct rtl8169_private *tp)
 
 	rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000);
 	rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000);
+	rtl_eri_set_bits(tp, 0x0d4, 0x1f80);
 
 	rtl8168_config_eee_mac(tp);
 
-- 
cgit v1.2.3-59-g8ed1b


From ee1350f94e5c7885142d736370e5fa7dc2c7747a Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Mon, 25 May 2020 19:49:55 +0200
Subject: r8169: sync RTL8168h hw config with vendor driver

Sync hw config for RTL8168h with r8168 vendor driver.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index d034a57a0e1c..173a4c41c4af 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -3250,9 +3250,8 @@ static void rtl_hw_start_8168h_1(struct rtl8169_private *tp)
 
 	rtl_reset_packet_filter(tp);
 
-	rtl_eri_set_bits(tp, 0xdc, BIT(4));
-
 	rtl_eri_set_bits(tp, 0xd4, 0x1f00);
+	rtl_eri_set_bits(tp, 0xdc, 0x001c);
 
 	rtl_eri_write(tp, 0x5f0, ERIAR_MASK_0011, 0x4f87);
 
-- 
cgit v1.2.3-59-g8ed1b


From 33b00ca1da49969da068f672e7c2430f16f1f85f Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Mon, 25 May 2020 19:50:38 +0200
Subject: r8169: sync RTL8168evl hw config with vendor driver

Sync hw config for RTL8168evl with r8168 vendor driver.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 173a4c41c4af..dfb07df47e0e 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -2926,12 +2926,14 @@ static void rtl_hw_start_8168e_2(struct rtl8169_private *tp)
 	rtl_ephy_init(tp, e_info_8168e_2);
 
 	rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000);
-	rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000);
+	rtl_eri_write(tp, 0xb8, ERIAR_MASK_1111, 0x0000);
 	rtl_set_fifo_size(tp, 0x10, 0x10, 0x02, 0x06);
+	rtl_eri_set_bits(tp, 0x0d4, 0x1f00);
+	rtl_eri_set_bits(tp, 0x1d0, BIT(1));
+	rtl_reset_packet_filter(tp);
+	rtl_eri_set_bits(tp, 0x1b0, BIT(4));
 	rtl_eri_write(tp, 0xcc, ERIAR_MASK_1111, 0x00000050);
 	rtl_eri_write(tp, 0xd0, ERIAR_MASK_1111, 0x07ff0060);
-	rtl_eri_set_bits(tp, 0x1b0, BIT(4));
-	rtl_w0w1_eri(tp, 0x0d4, 0x0c00, 0xff00);
 
 	rtl_disable_clock_request(tp);
 
-- 
cgit v1.2.3-59-g8ed1b


From d05890c5aed0dd4709f27525862765f1c6ddec1a Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Mon, 25 May 2020 19:52:27 +0200
Subject: r8169: sync RTL8168f/RTL8411 hw config with vendor driver

Sync hw config for RTL8168f/RTL8411 with r8168 vendor driver.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index dfb07df47e0e..17c564457e52 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -2953,11 +2953,11 @@ static void rtl_hw_start_8168f(struct rtl8169_private *tp)
 	rtl_set_def_aspm_entry_latency(tp);
 
 	rtl_eri_write(tp, 0xc0, ERIAR_MASK_0011, 0x0000);
-	rtl_eri_write(tp, 0xb8, ERIAR_MASK_0011, 0x0000);
+	rtl_eri_write(tp, 0xb8, ERIAR_MASK_1111, 0x0000);
 	rtl_set_fifo_size(tp, 0x10, 0x10, 0x02, 0x06);
 	rtl_reset_packet_filter(tp);
 	rtl_eri_set_bits(tp, 0x1b0, BIT(4));
-	rtl_eri_set_bits(tp, 0x1d0, BIT(4));
+	rtl_eri_set_bits(tp, 0x1d0, BIT(4) | BIT(1));
 	rtl_eri_write(tp, 0xcc, ERIAR_MASK_1111, 0x00000050);
 	rtl_eri_write(tp, 0xd0, ERIAR_MASK_1111, 0x00000060);
 
@@ -2986,7 +2986,7 @@ static void rtl_hw_start_8168f_1(struct rtl8169_private *tp)
 
 	rtl_ephy_init(tp, e_info_8168f_1);
 
-	rtl_w0w1_eri(tp, 0x0d4, 0x0c00, 0xff00);
+	rtl_eri_set_bits(tp, 0x0d4, 0x1f00);
 }
 
 static void rtl_hw_start_8411(struct rtl8169_private *tp)
-- 
cgit v1.2.3-59-g8ed1b


From 9ad346c90509ebd983f60da7d082f261ad329507 Mon Sep 17 00:00:00 2001
From: Sven Eckelmann <sven@narfation.org>
Date: Mon, 25 Nov 2019 10:46:50 +0100
Subject: batman-adv: Revert "disable ethtool link speed detection when auto
 negotiation off"

The commit 8c46fcd78308 ("batman-adv: disable ethtool link speed detection
when auto negotiation off") disabled the usage of ethtool's link_ksetting
when auto negotation was enabled due to invalid values when used with
tun/tap virtual net_devices. According to the patch, automatic measurements
should be used for these kind of interfaces.

But there are major flaws with this argumentation:

* automatic measurements are not implemented
* auto negotiation has nothing to do with the validity of the retrieved
  values

The first point has to be fixed by a longer patch series. The "validity"
part of the second point must be addressed in the same patch series by
dropping the usage of ethtool's link_ksetting (thus always doing automatic
measurements over ethernet).

Drop the patch again to have more default values for various net_device
types/configurations. The user can still overwrite them using the
batadv_hardif's BATADV_ATTR_THROUGHPUT_OVERRIDE.

Reported-by: Matthias Schiffer <mschiffer@universe-factory.net>
Signed-off-by: Sven Eckelmann <sven@narfation.org>
Signed-off-by: Simon Wunderlich <sw@simonwunderlich.de>
---
 net/batman-adv/bat_v_elp.c | 15 +--------------
 1 file changed, 1 insertion(+), 14 deletions(-)

diff --git a/net/batman-adv/bat_v_elp.c b/net/batman-adv/bat_v_elp.c
index 353e49c40e7f..0bdefa35da98 100644
--- a/net/batman-adv/bat_v_elp.c
+++ b/net/batman-adv/bat_v_elp.c
@@ -127,20 +127,7 @@ static u32 batadv_v_elp_get_throughput(struct batadv_hardif_neigh_node *neigh)
 	rtnl_lock();
 	ret = __ethtool_get_link_ksettings(hard_iface->net_dev, &link_settings);
 	rtnl_unlock();
-
-	/* Virtual interface drivers such as tun / tap interfaces, VLAN, etc
-	 * tend to initialize the interface throughput with some value for the
-	 * sake of having a throughput number to export via ethtool. This
-	 * exported throughput leaves batman-adv to conclude the interface
-	 * throughput is genuine (reflecting reality), thus no measurements
-	 * are necessary.
-	 *
-	 * Based on the observation that those interface types also tend to set
-	 * the link auto-negotiation to 'off', batman-adv shall check this
-	 * setting to differentiate between genuine link throughput information
-	 * and placeholders installed by virtual interfaces.
-	 */
-	if (ret == 0 && link_settings.base.autoneg == AUTONEG_ENABLE) {
+	if (ret == 0) {
 		/* link characteristics might change over time */
 		if (link_settings.base.duplex == DUPLEX_FULL)
 			hard_iface->bat_v.flags |= BATADV_FULL_DUPLEX;
-- 
cgit v1.2.3-59-g8ed1b


From 239174945dac8cb9613db7755103d5fb6c32241d Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Mon, 25 May 2020 20:15:24 -0700
Subject: tcp: tcp_v4_err() icmp skb is named icmp_skb

I missed the fact that tcp_v4_err() differs from tcp_v6_err().

After commit 4d1a2d9ec1c1 ("Rename skb to icmp_skb in tcp_v4_err()")
the skb argument has been renamed to icmp_skb only in one function.

I will in a future patch reconciliate these functions to avoid
this kind of confusion.

Fixes: 45af29ca761c ("tcp: allow traceroute -Mtcp for unpriv users")
Signed-off-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 900c6d154cbc..6789671f0f5a 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -573,7 +573,7 @@ int tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 		if (fastopen && !fastopen->sk)
 			break;
 
-		ip_icmp_error(sk, skb, err, th->dest, info, (u8 *)th);
+		ip_icmp_error(sk, icmp_skb, err, th->dest, info, (u8 *)th);
 
 		if (!sock_owned_by_user(sk)) {
 			sk->sk_err = err;
-- 
cgit v1.2.3-59-g8ed1b


From ff937b916eb6316fe4644564a572ed3b5867bc1f Mon Sep 17 00:00:00 2001
From: Yuval Basson <yuval.bason@marvell.com>
Date: Tue, 26 May 2020 09:41:20 +0300
Subject: qed: Add EDPM mode type for user-fw compatibility

In older FW versions the completion flag was treated as the ack flag in
edpm messages. Expose the FW option of setting which mode the QP is in
by adding a flag to the qedr <-> qed API.

Flag is added for backward compatibility with libqedr.
This flag will be set by qedr after determining whether the libqedr is
using the updated version.

Fixes: f10939403352 ("qed: Add support for QP verbs")
Signed-off-by: Yuval Basson <yuval.bason@marvell.com>
Signed-off-by: Michal Kalderon <michal.kalderon@marvell.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/qlogic/qed/qed_rdma.c | 1 +
 drivers/net/ethernet/qlogic/qed/qed_rdma.h | 1 +
 drivers/net/ethernet/qlogic/qed/qed_roce.c | 3 +++
 include/linux/qed/qed_rdma_if.h            | 3 +++
 4 files changed, 8 insertions(+)

diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.c b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
index 50985871cd3d..98455f698f53 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_rdma.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.c
@@ -1378,6 +1378,7 @@ qed_rdma_create_qp(void *rdma_cxt,
 		rc = qed_iwarp_create_qp(p_hwfn, qp, out_params);
 		qp->qpid = qp->icid;
 	} else {
+		qp->edpm_mode = GET_FIELD(in_params->flags, QED_ROCE_EDPM_MODE);
 		rc = qed_roce_alloc_cid(p_hwfn, &qp->icid);
 		qp->qpid = ((0xFF << 16) | qp->icid);
 	}
diff --git a/drivers/net/ethernet/qlogic/qed/qed_rdma.h b/drivers/net/ethernet/qlogic/qed/qed_rdma.h
index 5a7ebc764bb6..3898cae61e7a 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_rdma.h
+++ b/drivers/net/ethernet/qlogic/qed/qed_rdma.h
@@ -183,6 +183,7 @@ struct qed_rdma_qp {
 	void *shared_queue;
 	dma_addr_t shared_queue_phys_addr;
 	struct qed_iwarp_ep *ep;
+	u8 edpm_mode;
 };
 
 static inline bool qed_rdma_is_xrc_qp(struct qed_rdma_qp *qp)
diff --git a/drivers/net/ethernet/qlogic/qed/qed_roce.c b/drivers/net/ethernet/qlogic/qed/qed_roce.c
index 46a4d09eacef..4566815f7b87 100644
--- a/drivers/net/ethernet/qlogic/qed/qed_roce.c
+++ b/drivers/net/ethernet/qlogic/qed/qed_roce.c
@@ -459,6 +459,9 @@ static int qed_roce_sp_create_requester(struct qed_hwfn *p_hwfn,
 		  ROCE_CREATE_QP_REQ_RAMROD_DATA_XRC_FLAG,
 		  qed_rdma_is_xrc_qp(qp));
 
+	SET_FIELD(p_ramrod->flags2,
+		  ROCE_CREATE_QP_REQ_RAMROD_DATA_EDPM_MODE, qp->edpm_mode);
+
 	p_ramrod->max_ord = qp->max_rd_atomic_req;
 	p_ramrod->traffic_class = qp->traffic_class_tos;
 	p_ramrod->hop_limit = qp->hop_limit_ttl;
diff --git a/include/linux/qed/qed_rdma_if.h b/include/linux/qed/qed_rdma_if.h
index f93edd5750a5..584077565f12 100644
--- a/include/linux/qed/qed_rdma_if.h
+++ b/include/linux/qed/qed_rdma_if.h
@@ -335,6 +335,9 @@ struct qed_rdma_create_qp_in_params {
 	u16 xrcd_id;
 	u8 stats_queue;
 	enum qed_rdma_qp_type qp_type;
+	u8 flags;
+#define QED_ROCE_EDPM_MODE_MASK      0x1
+#define QED_ROCE_EDPM_MODE_SHIFT     0
 };
 
 struct qed_rdma_create_qp_out_params {
-- 
cgit v1.2.3-59-g8ed1b


From d7626b5acff9227e2a65da636a53e09bdafdc0aa Mon Sep 17 00:00:00 2001
From: Tuong Lien <tuong.t.lien@dektech.com.au>
Date: Tue, 26 May 2020 16:38:34 +0700
Subject: tipc: introduce Gap ACK blocks for broadcast link

As achieved through commit 9195948fbf34 ("tipc: improve TIPC throughput
by Gap ACK blocks"), we apply the same mechanism for the broadcast link
as well. The 'Gap ACK blocks' data field in a 'PROTOCOL/STATE_MSG' will
consist of two parts built for both the broadcast and unicast types:

 31                       16 15                        0
+-------------+-------------+-------------+-------------+
|  bgack_cnt  |  ugack_cnt  |            len            |
+-------------+-------------+-------------+-------------+  -
|            gap            |            ack            |   |
+-------------+-------------+-------------+-------------+    > bc gacks
:                           :                           :   |
+-------------+-------------+-------------+-------------+  -
|            gap            |            ack            |   |
+-------------+-------------+-------------+-------------+    > uc gacks
:                           :                           :   |
+-------------+-------------+-------------+-------------+  -

which is "automatically" backward-compatible.

We also increase the max number of Gap ACK blocks to 128, allowing upto
64 blocks per type (total buffer size = 516 bytes).

Besides, the 'tipc_link_advance_transmq()' function is refactored which
is applicable for both the unicast and broadcast cases now, so some old
functions can be removed and the code is optimized.

With the patch, TIPC broadcast is more robust regardless of packet loss
or disorder, latency, ... in the underlying network. Its performance is
boost up significantly.
For example, experiment with a 5% packet loss rate results:

$ time tipc-pipe --mc --rdm --data_size 123 --data_num 1500000
real    0m 42.46s
user    0m 1.16s
sys     0m 17.67s

Without the patch:

$ time tipc-pipe --mc --rdm --data_size 123 --data_num 1500000
real    8m 27.94s
user    0m 0.55s
sys     0m 2.38s

Acked-by: Ying Xue <ying.xue@windriver.com>
Acked-by: Jon Maloy <jmaloy@redhat.com>
Signed-off-by: Tuong Lien <tuong.t.lien@dektech.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bcast.c |   9 +-
 net/tipc/link.c  | 425 ++++++++++++++++++++++++++++++++-----------------------
 net/tipc/link.h  |   7 +-
 net/tipc/msg.h   |  27 +++-
 net/tipc/node.c  |  10 +-
 5 files changed, 293 insertions(+), 185 deletions(-)

diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 4c20be08b9c4..3ce690a96ee9 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -474,7 +474,7 @@ void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l,
 	__skb_queue_head_init(&xmitq);
 
 	tipc_bcast_lock(net);
-	tipc_link_bc_ack_rcv(l, acked, &xmitq);
+	tipc_link_bc_ack_rcv(l, acked, 0, NULL, &xmitq);
 	tipc_bcast_unlock(net);
 
 	tipc_bcbase_xmit(net, &xmitq);
@@ -492,6 +492,7 @@ int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l,
 			struct tipc_msg *hdr)
 {
 	struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq;
+	struct tipc_gap_ack_blks *ga;
 	struct sk_buff_head xmitq;
 	int rc = 0;
 
@@ -501,8 +502,10 @@ int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l,
 	if (msg_type(hdr) != STATE_MSG) {
 		tipc_link_bc_init_rcv(l, hdr);
 	} else if (!msg_bc_ack_invalid(hdr)) {
-		tipc_link_bc_ack_rcv(l, msg_bcast_ack(hdr), &xmitq);
-		rc = tipc_link_bc_sync_rcv(l, hdr, &xmitq);
+		tipc_get_gap_ack_blks(&ga, l, hdr, false);
+		rc = tipc_link_bc_ack_rcv(l, msg_bcast_ack(hdr),
+					  msg_bc_gap(hdr), ga, &xmitq);
+		rc |= tipc_link_bc_sync_rcv(l, hdr, &xmitq);
 	}
 	tipc_bcast_unlock(net);
 
diff --git a/net/tipc/link.c b/net/tipc/link.c
index d4675e922a8f..d29b9c531171 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -188,6 +188,8 @@ struct tipc_link {
 	/* Broadcast */
 	u16 ackers;
 	u16 acked;
+	u16 last_gap;
+	struct tipc_gap_ack_blks *last_ga;
 	struct tipc_link *bc_rcvlink;
 	struct tipc_link *bc_sndlink;
 	u8 nack_state;
@@ -249,11 +251,14 @@ static int tipc_link_build_nack_msg(struct tipc_link *l,
 				    struct sk_buff_head *xmitq);
 static void tipc_link_build_bc_init_msg(struct tipc_link *l,
 					struct sk_buff_head *xmitq);
-static int tipc_link_release_pkts(struct tipc_link *l, u16 to);
-static u16 tipc_build_gap_ack_blks(struct tipc_link *l, void *data, u16 gap);
-static int tipc_link_advance_transmq(struct tipc_link *l, u16 acked, u16 gap,
+static u8 __tipc_build_gap_ack_blks(struct tipc_gap_ack_blks *ga,
+				    struct tipc_link *l, u8 start_index);
+static u16 tipc_build_gap_ack_blks(struct tipc_link *l, struct tipc_msg *hdr);
+static int tipc_link_advance_transmq(struct tipc_link *l, struct tipc_link *r,
+				     u16 acked, u16 gap,
 				     struct tipc_gap_ack_blks *ga,
-				     struct sk_buff_head *xmitq);
+				     struct sk_buff_head *xmitq,
+				     bool *retransmitted, int *rc);
 static void tipc_link_update_cwin(struct tipc_link *l, int released,
 				  bool retransmitted);
 /*
@@ -370,7 +375,7 @@ void tipc_link_remove_bc_peer(struct tipc_link *snd_l,
 	snd_l->ackers--;
 	rcv_l->bc_peer_is_up = true;
 	rcv_l->state = LINK_ESTABLISHED;
-	tipc_link_bc_ack_rcv(rcv_l, ack, xmitq);
+	tipc_link_bc_ack_rcv(rcv_l, ack, 0, NULL, xmitq);
 	trace_tipc_link_reset(rcv_l, TIPC_DUMP_ALL, "bclink removed!");
 	tipc_link_reset(rcv_l);
 	rcv_l->state = LINK_RESET;
@@ -784,8 +789,6 @@ bool tipc_link_too_silent(struct tipc_link *l)
 	return (l->silent_intv_cnt + 2 > l->abort_limit);
 }
 
-static int tipc_link_bc_retrans(struct tipc_link *l, struct tipc_link *r,
-				u16 from, u16 to, struct sk_buff_head *xmitq);
 /* tipc_link_timeout - perform periodic task as instructed from node timeout
  */
 int tipc_link_timeout(struct tipc_link *l, struct sk_buff_head *xmitq)
@@ -948,6 +951,9 @@ void tipc_link_reset(struct tipc_link *l)
 	l->snd_nxt_state = 1;
 	l->rcv_nxt_state = 1;
 	l->acked = 0;
+	l->last_gap = 0;
+	kfree(l->last_ga);
+	l->last_ga = NULL;
 	l->silent_intv_cnt = 0;
 	l->rst_cnt = 0;
 	l->bc_peer_is_up = false;
@@ -1183,68 +1189,14 @@ static bool link_retransmit_failure(struct tipc_link *l, struct tipc_link *r,
 
 	if (link_is_bc_sndlink(l)) {
 		r->state = LINK_RESET;
-		*rc = TIPC_LINK_DOWN_EVT;
+		*rc |= TIPC_LINK_DOWN_EVT;
 	} else {
-		*rc = tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
+		*rc |= tipc_link_fsm_evt(l, LINK_FAILURE_EVT);
 	}
 
 	return true;
 }
 
-/* tipc_link_bc_retrans() - retransmit zero or more packets
- * @l: the link to transmit on
- * @r: the receiving link ordering the retransmit. Same as l if unicast
- * @from: retransmit from (inclusive) this sequence number
- * @to: retransmit to (inclusive) this sequence number
- * xmitq: queue for accumulating the retransmitted packets
- */
-static int tipc_link_bc_retrans(struct tipc_link *l, struct tipc_link *r,
-				u16 from, u16 to, struct sk_buff_head *xmitq)
-{
-	struct sk_buff *_skb, *skb = skb_peek(&l->transmq);
-	u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1;
-	u16 ack = l->rcv_nxt - 1;
-	int retransmitted = 0;
-	struct tipc_msg *hdr;
-	int rc = 0;
-
-	if (!skb)
-		return 0;
-	if (less(to, from))
-		return 0;
-
-	trace_tipc_link_retrans(r, from, to, &l->transmq);
-
-	if (link_retransmit_failure(l, r, &rc))
-		return rc;
-
-	skb_queue_walk(&l->transmq, skb) {
-		hdr = buf_msg(skb);
-		if (less(msg_seqno(hdr), from))
-			continue;
-		if (more(msg_seqno(hdr), to))
-			break;
-		if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr))
-			continue;
-		TIPC_SKB_CB(skb)->nxt_retr = TIPC_BC_RETR_LIM;
-		_skb = pskb_copy(skb, GFP_ATOMIC);
-		if (!_skb)
-			return 0;
-		hdr = buf_msg(_skb);
-		msg_set_ack(hdr, ack);
-		msg_set_bcast_ack(hdr, bc_ack);
-		_skb->priority = TC_PRIO_CONTROL;
-		__skb_queue_tail(xmitq, _skb);
-		l->stats.retransmitted++;
-		retransmitted++;
-		/* Increase actual retrans counter & mark first time */
-		if (!TIPC_SKB_CB(skb)->retr_cnt++)
-			TIPC_SKB_CB(skb)->retr_stamp = jiffies;
-	}
-	tipc_link_update_cwin(l, 0, retransmitted);
-	return 0;
-}
-
 /* tipc_data_input - deliver data and name distr msgs to upper layer
  *
  * Consumes buffer if message is of right type
@@ -1402,46 +1354,71 @@ static int tipc_link_tnl_rcv(struct tipc_link *l, struct sk_buff *skb,
 	return rc;
 }
 
-static int tipc_link_release_pkts(struct tipc_link *l, u16 acked)
-{
-	int released = 0;
-	struct sk_buff *skb, *tmp;
-
-	skb_queue_walk_safe(&l->transmq, skb, tmp) {
-		if (more(buf_seqno(skb), acked))
-			break;
-		__skb_unlink(skb, &l->transmq);
-		kfree_skb(skb);
-		released++;
+/**
+ * tipc_get_gap_ack_blks - get Gap ACK blocks from PROTOCOL/STATE_MSG
+ * @ga: returned pointer to the Gap ACK blocks if any
+ * @l: the tipc link
+ * @hdr: the PROTOCOL/STATE_MSG header
+ * @uc: desired Gap ACK blocks type, i.e. unicast (= 1) or broadcast (= 0)
+ *
+ * Return: the total Gap ACK blocks size
+ */
+u16 tipc_get_gap_ack_blks(struct tipc_gap_ack_blks **ga, struct tipc_link *l,
+			  struct tipc_msg *hdr, bool uc)
+{
+	struct tipc_gap_ack_blks *p;
+	u16 sz = 0;
+
+	/* Does peer support the Gap ACK blocks feature? */
+	if (l->peer_caps & TIPC_GAP_ACK_BLOCK) {
+		p = (struct tipc_gap_ack_blks *)msg_data(hdr);
+		sz = ntohs(p->len);
+		/* Sanity check */
+		if (sz == tipc_gap_ack_blks_sz(p->ugack_cnt + p->bgack_cnt)) {
+			/* Good, check if the desired type exists */
+			if ((uc && p->ugack_cnt) || (!uc && p->bgack_cnt))
+				goto ok;
+		/* Backward compatible: peer might not support bc, but uc? */
+		} else if (uc && sz == tipc_gap_ack_blks_sz(p->ugack_cnt)) {
+			if (p->ugack_cnt) {
+				p->bgack_cnt = 0;
+				goto ok;
+			}
+		}
 	}
-	return released;
+	/* Other cases: ignore! */
+	p = NULL;
+
+ok:
+	*ga = p;
+	return sz;
 }
 
-/* tipc_build_gap_ack_blks - build Gap ACK blocks
- * @l: tipc link that data have come with gaps in sequence if any
- * @data: data buffer to store the Gap ACK blocks after built
- *
- * returns the actual allocated memory size
- */
-static u16 tipc_build_gap_ack_blks(struct tipc_link *l, void *data, u16 gap)
+static u8 __tipc_build_gap_ack_blks(struct tipc_gap_ack_blks *ga,
+				    struct tipc_link *l, u8 start_index)
 {
+	struct tipc_gap_ack *gacks = &ga->gacks[start_index];
 	struct sk_buff *skb = skb_peek(&l->deferdq);
-	struct tipc_gap_ack_blks *ga = data;
-	u16 len, expect, seqno = 0;
+	u16 expect, seqno = 0;
 	u8 n = 0;
 
-	if (!skb || !gap)
-		goto exit;
+	if (!skb)
+		return 0;
 
 	expect = buf_seqno(skb);
 	skb_queue_walk(&l->deferdq, skb) {
 		seqno = buf_seqno(skb);
 		if (unlikely(more(seqno, expect))) {
-			ga->gacks[n].ack = htons(expect - 1);
-			ga->gacks[n].gap = htons(seqno - expect);
-			if (++n >= MAX_GAP_ACK_BLKS) {
-				pr_info_ratelimited("Too few Gap ACK blocks!\n");
-				goto exit;
+			gacks[n].ack = htons(expect - 1);
+			gacks[n].gap = htons(seqno - expect);
+			if (++n >= MAX_GAP_ACK_BLKS / 2) {
+				char buf[TIPC_MAX_LINK_NAME];
+
+				pr_info_ratelimited("Gacks on %s: %d, ql: %d!\n",
+						    tipc_link_name_ext(l, buf),
+						    n,
+						    skb_queue_len(&l->deferdq));
+				return n;
 			}
 		} else if (unlikely(less(seqno, expect))) {
 			pr_warn("Unexpected skb in deferdq!\n");
@@ -1451,14 +1428,44 @@ static u16 tipc_build_gap_ack_blks(struct tipc_link *l, void *data, u16 gap)
 	}
 
 	/* last block */
-	ga->gacks[n].ack = htons(seqno);
-	ga->gacks[n].gap = 0;
+	gacks[n].ack = htons(seqno);
+	gacks[n].gap = 0;
 	n++;
+	return n;
+}
 
-exit:
-	len = tipc_gap_ack_blks_sz(n);
+/* tipc_build_gap_ack_blks - build Gap ACK blocks
+ * @l: tipc unicast link
+ * @hdr: the tipc message buffer to store the Gap ACK blocks after built
+ *
+ * The function builds Gap ACK blocks for both the unicast & broadcast receiver
+ * links of a certain peer, the buffer after built has the network data format
+ * as found at the struct tipc_gap_ack_blks definition.
+ *
+ * returns the actual allocated memory size
+ */
+static u16 tipc_build_gap_ack_blks(struct tipc_link *l, struct tipc_msg *hdr)
+{
+	struct tipc_link *bcl = l->bc_rcvlink;
+	struct tipc_gap_ack_blks *ga;
+	u16 len;
+
+	ga = (struct tipc_gap_ack_blks *)msg_data(hdr);
+
+	/* Start with broadcast link first */
+	tipc_bcast_lock(bcl->net);
+	msg_set_bcast_ack(hdr, bcl->rcv_nxt - 1);
+	msg_set_bc_gap(hdr, link_bc_rcv_gap(bcl));
+	ga->bgack_cnt = __tipc_build_gap_ack_blks(ga, bcl, 0);
+	tipc_bcast_unlock(bcl->net);
+
+	/* Now for unicast link, but an explicit NACK only (???) */
+	ga->ugack_cnt = (msg_seq_gap(hdr)) ?
+			__tipc_build_gap_ack_blks(ga, l, ga->bgack_cnt) : 0;
+
+	/* Total len */
+	len = tipc_gap_ack_blks_sz(ga->bgack_cnt + ga->ugack_cnt);
 	ga->len = htons(len);
-	ga->gack_cnt = n;
 	return len;
 }
 
@@ -1466,47 +1473,109 @@ exit:
  *			       acked packets, also doing retransmissions if
  *			       gaps found
  * @l: tipc link with transmq queue to be advanced
+ * @r: tipc link "receiver" i.e. in case of broadcast (= "l" if unicast)
  * @acked: seqno of last packet acked by peer without any gaps before
  * @gap: # of gap packets
  * @ga: buffer pointer to Gap ACK blocks from peer
  * @xmitq: queue for accumulating the retransmitted packets if any
+ * @retransmitted: returned boolean value if a retransmission is really issued
+ * @rc: returned code e.g. TIPC_LINK_DOWN_EVT if a repeated retransmit failures
+ *      happens (- unlikely case)
  *
- * In case of a repeated retransmit failures, the call will return shortly
- * with a returned code (e.g. TIPC_LINK_DOWN_EVT)
+ * Return: the number of packets released from the link transmq
  */
-static int tipc_link_advance_transmq(struct tipc_link *l, u16 acked, u16 gap,
+static int tipc_link_advance_transmq(struct tipc_link *l, struct tipc_link *r,
+				     u16 acked, u16 gap,
 				     struct tipc_gap_ack_blks *ga,
-				     struct sk_buff_head *xmitq)
+				     struct sk_buff_head *xmitq,
+				     bool *retransmitted, int *rc)
 {
+	struct tipc_gap_ack_blks *last_ga = r->last_ga, *this_ga = NULL;
+	struct tipc_gap_ack *gacks = NULL;
 	struct sk_buff *skb, *_skb, *tmp;
 	struct tipc_msg *hdr;
+	u32 qlen = skb_queue_len(&l->transmq);
+	u16 nacked = acked, ngap = gap, gack_cnt = 0;
 	u16 bc_ack = l->bc_rcvlink->rcv_nxt - 1;
-	bool retransmitted = false;
 	u16 ack = l->rcv_nxt - 1;
-	bool passed = false;
-	u16 released = 0;
 	u16 seqno, n = 0;
-	int rc = 0;
+	u16 end = r->acked, start = end, offset = r->last_gap;
+	u16 si = (last_ga) ? last_ga->start_index : 0;
+	bool is_uc = !link_is_bc_sndlink(l);
+	bool bc_has_acked = false;
+
+	/* Determine Gap ACK blocks if any for the particular link */
+	if (ga && is_uc) {
+		/* Get the Gap ACKs, uc part */
+		gack_cnt = ga->ugack_cnt;
+		gacks = &ga->gacks[ga->bgack_cnt];
+	} else if (ga) {
+		/* Copy the Gap ACKs, bc part, for later renewal if needed */
+		this_ga = kmemdup(ga, tipc_gap_ack_blks_sz(ga->bgack_cnt),
+				  GFP_ATOMIC);
+		if (likely(this_ga)) {
+			this_ga->start_index = 0;
+			/* Start with the bc Gap ACKs */
+			gack_cnt = this_ga->bgack_cnt;
+			gacks = &this_ga->gacks[0];
+		} else {
+			/* Hmm, we can get in trouble..., simply ignore it */
+			pr_warn_ratelimited("Ignoring bc Gap ACKs, no memory\n");
+		}
+	}
 
+	/* Advance the link transmq */
 	skb_queue_walk_safe(&l->transmq, skb, tmp) {
 		seqno = buf_seqno(skb);
 
 next_gap_ack:
-		if (less_eq(seqno, acked)) {
+		if (less_eq(seqno, nacked)) {
+			if (is_uc)
+				goto release;
+			/* Skip packets peer has already acked */
+			if (!more(seqno, r->acked))
+				continue;
+			/* Get the next of last Gap ACK blocks */
+			while (more(seqno, end)) {
+				if (!last_ga || si >= last_ga->bgack_cnt)
+					break;
+				start = end + offset + 1;
+				end = ntohs(last_ga->gacks[si].ack);
+				offset = ntohs(last_ga->gacks[si].gap);
+				si++;
+				WARN_ONCE(more(start, end) ||
+					  (!offset &&
+					   si < last_ga->bgack_cnt) ||
+					  si > MAX_GAP_ACK_BLKS,
+					  "Corrupted Gap ACK: %d %d %d %d %d\n",
+					  start, end, offset, si,
+					  last_ga->bgack_cnt);
+			}
+			/* Check against the last Gap ACK block */
+			if (in_range(seqno, start, end))
+				continue;
+			/* Update/release the packet peer is acking */
+			bc_has_acked = true;
+			if (--TIPC_SKB_CB(skb)->ackers)
+				continue;
+release:
 			/* release skb */
 			__skb_unlink(skb, &l->transmq);
 			kfree_skb(skb);
-			released++;
-		} else if (less_eq(seqno, acked + gap)) {
-			/* First, check if repeated retrans failures occurs? */
-			if (!passed && link_retransmit_failure(l, l, &rc))
-				return rc;
-			passed = true;
-
+		} else if (less_eq(seqno, nacked + ngap)) {
+			/* First gap: check if repeated retrans failures? */
+			if (unlikely(seqno == acked + 1 &&
+				     link_retransmit_failure(l, r, rc))) {
+				/* Ignore this bc Gap ACKs if any */
+				kfree(this_ga);
+				this_ga = NULL;
+				break;
+			}
 			/* retransmit skb if unrestricted*/
 			if (time_before(jiffies, TIPC_SKB_CB(skb)->nxt_retr))
 				continue;
-			TIPC_SKB_CB(skb)->nxt_retr = TIPC_UC_RETR_TIME;
+			TIPC_SKB_CB(skb)->nxt_retr = (is_uc) ?
+					TIPC_UC_RETR_TIME : TIPC_BC_RETR_LIM;
 			_skb = pskb_copy(skb, GFP_ATOMIC);
 			if (!_skb)
 				continue;
@@ -1516,25 +1585,51 @@ next_gap_ack:
 			_skb->priority = TC_PRIO_CONTROL;
 			__skb_queue_tail(xmitq, _skb);
 			l->stats.retransmitted++;
-			retransmitted = true;
+			*retransmitted = true;
 			/* Increase actual retrans counter & mark first time */
 			if (!TIPC_SKB_CB(skb)->retr_cnt++)
 				TIPC_SKB_CB(skb)->retr_stamp = jiffies;
 		} else {
 			/* retry with Gap ACK blocks if any */
-			if (!ga || n >= ga->gack_cnt)
+			if (n >= gack_cnt)
 				break;
-			acked = ntohs(ga->gacks[n].ack);
-			gap = ntohs(ga->gacks[n].gap);
+			nacked = ntohs(gacks[n].ack);
+			ngap = ntohs(gacks[n].gap);
 			n++;
 			goto next_gap_ack;
 		}
 	}
-	if (released || retransmitted)
-		tipc_link_update_cwin(l, released, retransmitted);
-	if (released)
-		tipc_link_advance_backlog(l, xmitq);
-	return 0;
+
+	/* Renew last Gap ACK blocks for bc if needed */
+	if (bc_has_acked) {
+		if (this_ga) {
+			kfree(last_ga);
+			r->last_ga = this_ga;
+			r->last_gap = gap;
+		} else if (last_ga) {
+			if (less(acked, start)) {
+				si--;
+				offset = start - acked - 1;
+			} else if (less(acked, end)) {
+				acked = end;
+			}
+			if (si < last_ga->bgack_cnt) {
+				last_ga->start_index = si;
+				r->last_gap = offset;
+			} else {
+				kfree(last_ga);
+				r->last_ga = NULL;
+				r->last_gap = 0;
+			}
+		} else {
+			r->last_gap = 0;
+		}
+		r->acked = acked;
+	} else {
+		kfree(this_ga);
+	}
+
+	return qlen - skb_queue_len(&l->transmq);
 }
 
 /* tipc_link_build_state_msg: prepare link state message for transmission
@@ -1651,7 +1746,8 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb,
 			kfree_skb(skb);
 			break;
 		}
-		released += tipc_link_release_pkts(l, msg_ack(hdr));
+		released += tipc_link_advance_transmq(l, l, msg_ack(hdr), 0,
+						      NULL, NULL, NULL, NULL);
 
 		/* Defer delivery if sequence gap */
 		if (unlikely(seqno != rcv_nxt)) {
@@ -1739,7 +1835,7 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
 		msg_set_probe(hdr, probe);
 		msg_set_is_keepalive(hdr, probe || probe_reply);
 		if (l->peer_caps & TIPC_GAP_ACK_BLOCK)
-			glen = tipc_build_gap_ack_blks(l, data, rcvgap);
+			glen = tipc_build_gap_ack_blks(l, hdr);
 		tipc_mon_prep(l->net, data + glen, &dlen, mstate, l->bearer_id);
 		msg_set_size(hdr, INT_H_SIZE + glen + dlen);
 		skb_trim(skb, INT_H_SIZE + glen + dlen);
@@ -2027,20 +2123,19 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
 {
 	struct tipc_msg *hdr = buf_msg(skb);
 	struct tipc_gap_ack_blks *ga = NULL;
-	u16 rcvgap = 0;
-	u16 ack = msg_ack(hdr);
-	u16 gap = msg_seq_gap(hdr);
+	bool reply = msg_probe(hdr), retransmitted = false;
+	u16 dlen = msg_data_sz(hdr), glen = 0;
 	u16 peers_snd_nxt =  msg_next_sent(hdr);
 	u16 peers_tol = msg_link_tolerance(hdr);
 	u16 peers_prio = msg_linkprio(hdr);
+	u16 gap = msg_seq_gap(hdr);
+	u16 ack = msg_ack(hdr);
 	u16 rcv_nxt = l->rcv_nxt;
-	u16 dlen = msg_data_sz(hdr);
+	u16 rcvgap = 0;
 	int mtyp = msg_type(hdr);
-	bool reply = msg_probe(hdr);
-	u16 glen = 0;
-	void *data;
+	int rc = 0, released;
 	char *if_name;
-	int rc = 0;
+	void *data;
 
 	trace_tipc_proto_rcv(skb, false, l->name);
 	if (tipc_link_is_blocked(l) || !xmitq)
@@ -2137,13 +2232,7 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
 		}
 
 		/* Receive Gap ACK blocks from peer if any */
-		if (l->peer_caps & TIPC_GAP_ACK_BLOCK) {
-			ga = (struct tipc_gap_ack_blks *)data;
-			glen = ntohs(ga->len);
-			/* sanity check: if failed, ignore Gap ACK blocks */
-			if (glen != tipc_gap_ack_blks_sz(ga->gack_cnt))
-				ga = NULL;
-		}
+		glen = tipc_get_gap_ack_blks(&ga, l, hdr, true);
 
 		tipc_mon_rcv(l->net, data + glen, dlen - glen, l->addr,
 			     &l->mon_state, l->bearer_id);
@@ -2158,9 +2247,14 @@ static int tipc_link_proto_rcv(struct tipc_link *l, struct sk_buff *skb,
 			tipc_link_build_proto_msg(l, STATE_MSG, 0, reply,
 						  rcvgap, 0, 0, xmitq);
 
-		rc |= tipc_link_advance_transmq(l, ack, gap, ga, xmitq);
+		released = tipc_link_advance_transmq(l, l, ack, gap, ga, xmitq,
+						     &retransmitted, &rc);
 		if (gap)
 			l->stats.recv_nacks++;
+		if (released || retransmitted)
+			tipc_link_update_cwin(l, released, retransmitted);
+		if (released)
+			tipc_link_advance_backlog(l, xmitq);
 		if (unlikely(!skb_queue_empty(&l->wakeupq)))
 			link_prepare_wakeup(l);
 	}
@@ -2246,10 +2340,7 @@ void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr)
 int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
 			  struct sk_buff_head *xmitq)
 {
-	struct tipc_link *snd_l = l->bc_sndlink;
 	u16 peers_snd_nxt = msg_bc_snd_nxt(hdr);
-	u16 from = msg_bcast_ack(hdr) + 1;
-	u16 to = from + msg_bc_gap(hdr) - 1;
 	int rc = 0;
 
 	if (!link_is_up(l))
@@ -2271,8 +2362,6 @@ int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
 	if (more(peers_snd_nxt, l->rcv_nxt + l->window))
 		return rc;
 
-	rc = tipc_link_bc_retrans(snd_l, l, from, to, xmitq);
-
 	l->snd_nxt = peers_snd_nxt;
 	if (link_bc_rcv_gap(l))
 		rc |= TIPC_LINK_SND_STATE;
@@ -2307,38 +2396,27 @@ int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
 	return 0;
 }
 
-void tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked,
-			  struct sk_buff_head *xmitq)
+int tipc_link_bc_ack_rcv(struct tipc_link *r, u16 acked, u16 gap,
+			 struct tipc_gap_ack_blks *ga,
+			 struct sk_buff_head *xmitq)
 {
-	struct sk_buff *skb, *tmp;
-	struct tipc_link *snd_l = l->bc_sndlink;
+	struct tipc_link *l = r->bc_sndlink;
+	bool unused = false;
+	int rc = 0;
 
-	if (!link_is_up(l) || !l->bc_peer_is_up)
-		return;
+	if (!link_is_up(r) || !r->bc_peer_is_up)
+		return 0;
 
-	if (!more(acked, l->acked))
-		return;
+	if (less(acked, r->acked) || (acked == r->acked && !gap && !ga))
+		return 0;
 
-	trace_tipc_link_bc_ack(l, l->acked, acked, &snd_l->transmq);
-	/* Skip over packets peer has already acked */
-	skb_queue_walk(&snd_l->transmq, skb) {
-		if (more(buf_seqno(skb), l->acked))
-			break;
-	}
+	tipc_link_advance_transmq(l, r, acked, gap, ga, xmitq, &unused, &rc);
 
-	/* Update/release the packets peer is acking now */
-	skb_queue_walk_from_safe(&snd_l->transmq, skb, tmp) {
-		if (more(buf_seqno(skb), acked))
-			break;
-		if (!--TIPC_SKB_CB(skb)->ackers) {
-			__skb_unlink(skb, &snd_l->transmq);
-			kfree_skb(skb);
-		}
-	}
-	l->acked = acked;
-	tipc_link_advance_backlog(snd_l, xmitq);
-	if (unlikely(!skb_queue_empty(&snd_l->wakeupq)))
-		link_prepare_wakeup(snd_l);
+	tipc_link_advance_backlog(l, xmitq);
+	if (unlikely(!skb_queue_empty(&l->wakeupq)))
+		link_prepare_wakeup(l);
+
+	return rc;
 }
 
 /* tipc_link_bc_nack_rcv(): receive broadcast nack message
@@ -2366,8 +2444,7 @@ int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb,
 		return 0;
 
 	if (dnode == tipc_own_addr(l->net)) {
-		tipc_link_bc_ack_rcv(l, acked, xmitq);
-		rc = tipc_link_bc_retrans(l->bc_sndlink, l, from, to, xmitq);
+		rc = tipc_link_bc_ack_rcv(l, acked, to - acked, NULL, xmitq);
 		l->stats.recv_nacks++;
 		return rc;
 	}
diff --git a/net/tipc/link.h b/net/tipc/link.h
index d3c1c3fc1659..0a0fa7350722 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -143,8 +143,11 @@ int tipc_link_bc_peers(struct tipc_link *l);
 void tipc_link_set_mtu(struct tipc_link *l, int mtu);
 int tipc_link_mtu(struct tipc_link *l);
 int tipc_link_mss(struct tipc_link *l);
-void tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked,
-			  struct sk_buff_head *xmitq);
+u16 tipc_get_gap_ack_blks(struct tipc_gap_ack_blks **ga, struct tipc_link *l,
+			  struct tipc_msg *hdr, bool uc);
+int tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked, u16 gap,
+			 struct tipc_gap_ack_blks *ga,
+			 struct sk_buff_head *xmitq);
 void tipc_link_build_bc_sync_msg(struct tipc_link *l,
 				 struct sk_buff_head *xmitq);
 void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr);
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index 871feadbbc19..ca5f8689a33b 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -160,20 +160,39 @@ struct tipc_gap_ack {
 
 /* struct tipc_gap_ack_blks
  * @len: actual length of the record
- * @gack_cnt: number of Gap ACK blocks in the record
+ * @ugack_cnt: number of Gap ACK blocks for unicast (following the broadcast
+ *             ones)
+ * @start_index: starting index for "valid" broadcast Gap ACK blocks
+ * @bgack_cnt: number of Gap ACK blocks for broadcast in the record
  * @gacks: array of Gap ACK blocks
+ *
+ *  31                       16 15                        0
+ * +-------------+-------------+-------------+-------------+
+ * |  bgack_cnt  |  ugack_cnt  |            len            |
+ * +-------------+-------------+-------------+-------------+  -
+ * |            gap            |            ack            |   |
+ * +-------------+-------------+-------------+-------------+    > bc gacks
+ * :                           :                           :   |
+ * +-------------+-------------+-------------+-------------+  -
+ * |            gap            |            ack            |   |
+ * +-------------+-------------+-------------+-------------+    > uc gacks
+ * :                           :                           :   |
+ * +-------------+-------------+-------------+-------------+  -
  */
 struct tipc_gap_ack_blks {
 	__be16 len;
-	u8 gack_cnt;
-	u8 reserved;
+	union {
+		u8 ugack_cnt;
+		u8 start_index;
+	};
+	u8 bgack_cnt;
 	struct tipc_gap_ack gacks[];
 };
 
 #define tipc_gap_ack_blks_sz(n) (sizeof(struct tipc_gap_ack_blks) + \
 				 sizeof(struct tipc_gap_ack) * (n))
 
-#define MAX_GAP_ACK_BLKS	32
+#define MAX_GAP_ACK_BLKS	128
 #define MAX_GAP_ACK_BLKS_SZ	tipc_gap_ack_blks_sz(MAX_GAP_ACK_BLKS)
 
 static inline struct tipc_msg *buf_msg(struct sk_buff *skb)
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 803a3a6d0f50..6a49b3eeaae9 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -2071,10 +2071,16 @@ rcv:
 	le = &n->links[bearer_id];
 
 	/* Ensure broadcast reception is in synch with peer's send state */
-	if (unlikely(usr == LINK_PROTOCOL))
+	if (unlikely(usr == LINK_PROTOCOL)) {
+		if (unlikely(skb_linearize(skb))) {
+			tipc_node_put(n);
+			goto discard;
+		}
+		hdr = buf_msg(skb);
 		tipc_node_bc_sync_rcv(n, hdr, bearer_id, &xmitq);
-	else if (unlikely(tipc_link_acked(n->bc_entry.link) != bc_ack))
+	} else if (unlikely(tipc_link_acked(n->bc_entry.link) != bc_ack)) {
 		tipc_bcast_ack_rcv(net, n->bc_entry.link, hdr);
+	}
 
 	/* Receive packet directly if conditions permit */
 	tipc_node_read_lock(n);
-- 
cgit v1.2.3-59-g8ed1b


From c6ed7a5cc2d68c36287c09260dc211173e0447d7 Mon Sep 17 00:00:00 2001
From: Tuong Lien <tuong.t.lien@dektech.com.au>
Date: Tue, 26 May 2020 16:38:35 +0700
Subject: tipc: add back link trace events

In the previous commit ("tipc: add Gap ACK blocks support for broadcast
link"), we have removed the following link trace events due to the code
changes:

- tipc_link_bc_ack
- tipc_link_retrans

This commit adds them back along with some minor changes to adapt to
the new code.

Acked-by: Ying Xue <ying.xue@windriver.com>
Acked-by: Jon Maloy <jmaloy@redhat.com>
Signed-off-by: Tuong Lien <tuong.t.lien@dektech.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/link.c  |  3 +++
 net/tipc/trace.h | 13 ++++++++-----
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/net/tipc/link.c b/net/tipc/link.c
index d29b9c531171..288c5670cfa5 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -1504,6 +1504,8 @@ static int tipc_link_advance_transmq(struct tipc_link *l, struct tipc_link *r,
 	bool is_uc = !link_is_bc_sndlink(l);
 	bool bc_has_acked = false;
 
+	trace_tipc_link_retrans(r, acked + 1, acked + gap, &l->transmq);
+
 	/* Determine Gap ACK blocks if any for the particular link */
 	if (ga && is_uc) {
 		/* Get the Gap ACKs, uc part */
@@ -2410,6 +2412,7 @@ int tipc_link_bc_ack_rcv(struct tipc_link *r, u16 acked, u16 gap,
 	if (less(acked, r->acked) || (acked == r->acked && !gap && !ga))
 		return 0;
 
+	trace_tipc_link_bc_ack(r, acked, gap, &l->transmq);
 	tipc_link_advance_transmq(l, r, acked, gap, ga, xmitq, &unused, &rc);
 
 	tipc_link_advance_backlog(l, xmitq);
diff --git a/net/tipc/trace.h b/net/tipc/trace.h
index 4d8e00483afc..e7535ab75255 100644
--- a/net/tipc/trace.h
+++ b/net/tipc/trace.h
@@ -299,8 +299,10 @@ DECLARE_EVENT_CLASS(tipc_link_transmq_class,
 		__entry->from = f;
 		__entry->to = t;
 		__entry->len = skb_queue_len(tq);
-		__entry->fseqno = msg_seqno(buf_msg(skb_peek(tq)));
-		__entry->lseqno = msg_seqno(buf_msg(skb_peek_tail(tq)));
+		__entry->fseqno = __entry->len ?
+				  msg_seqno(buf_msg(skb_peek(tq))) : 0;
+		__entry->lseqno = __entry->len ?
+				  msg_seqno(buf_msg(skb_peek_tail(tq))) : 0;
 	),
 
 	TP_printk("<%s> retrans req: [%u-%u] transmq: %u [%u-%u]\n",
@@ -308,15 +310,16 @@ DECLARE_EVENT_CLASS(tipc_link_transmq_class,
 		  __entry->len, __entry->fseqno, __entry->lseqno)
 );
 
-DEFINE_EVENT(tipc_link_transmq_class, tipc_link_retrans,
+DEFINE_EVENT_CONDITION(tipc_link_transmq_class, tipc_link_retrans,
 	TP_PROTO(struct tipc_link *r, u16 f, u16 t, struct sk_buff_head *tq),
-	TP_ARGS(r, f, t, tq)
+	TP_ARGS(r, f, t, tq),
+	TP_CONDITION(less_eq(f, t))
 );
 
 DEFINE_EVENT_PRINT(tipc_link_transmq_class, tipc_link_bc_ack,
 	TP_PROTO(struct tipc_link *r, u16 f, u16 t, struct sk_buff_head *tq),
 	TP_ARGS(r, f, t, tq),
-	TP_printk("<%s> acked: [%u-%u] transmq: %u [%u-%u]\n",
+	TP_printk("<%s> acked: %u gap: %u transmq: %u [%u-%u]\n",
 		  __entry->name, __entry->from, __entry->to,
 		  __entry->len, __entry->fseqno, __entry->lseqno)
 );
-- 
cgit v1.2.3-59-g8ed1b


From a91d55d162b86fb983b88f44296149752db7efbd Mon Sep 17 00:00:00 2001
From: Tuong Lien <tuong.t.lien@dektech.com.au>
Date: Tue, 26 May 2020 16:38:36 +0700
Subject: tipc: enable broadcast retrans via unicast

In some environment, broadcast traffic is suppressed at high rate (i.e.
a kind of bandwidth limit setting). When it is applied, TIPC broadcast
can still run successfully. However, when it comes to a high load, some
packets will be dropped first and TIPC tries to retransmit them but the
packet retransmission is intentionally broadcast too, so making things
worse and not helpful at all.

This commit enables the broadcast retransmission via unicast which only
retransmits packets to the specific peer that has really reported a gap
i.e. not broadcasting to all nodes in the cluster, so will prevent from
being suppressed, and also reduce some overheads on the other peers due
to duplicates, finally improve the overall TIPC broadcast performance.

Note: the functionality can be turned on/off via the sysctl file:

echo 1 > /proc/sys/net/tipc/bc_retruni
echo 0 > /proc/sys/net/tipc/bc_retruni

Default is '0', i.e. the broadcast retransmission still works as usual.

Acked-by: Ying Xue <ying.xue@windriver.com>
Acked-by: Jon Maloy <jmaloy@redhat.com>
Signed-off-by: Tuong Lien <tuong.t.lien@dektech.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bcast.c  | 11 ++++++++---
 net/tipc/bcast.h  |  4 +++-
 net/tipc/link.c   | 10 ++++++----
 net/tipc/link.h   |  3 ++-
 net/tipc/node.c   |  2 +-
 net/tipc/sysctl.c |  9 ++++++++-
 6 files changed, 28 insertions(+), 11 deletions(-)

diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 3ce690a96ee9..50a16f8bebd9 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -46,6 +46,7 @@
 #define BCLINK_WIN_MIN      32	/* bcast minimum link window size */
 
 const char tipc_bclink_name[] = "broadcast-link";
+unsigned long sysctl_tipc_bc_retruni __read_mostly;
 
 /**
  * struct tipc_bc_base - base structure for keeping broadcast send state
@@ -474,7 +475,7 @@ void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l,
 	__skb_queue_head_init(&xmitq);
 
 	tipc_bcast_lock(net);
-	tipc_link_bc_ack_rcv(l, acked, 0, NULL, &xmitq);
+	tipc_link_bc_ack_rcv(l, acked, 0, NULL, &xmitq, NULL);
 	tipc_bcast_unlock(net);
 
 	tipc_bcbase_xmit(net, &xmitq);
@@ -489,7 +490,8 @@ void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l,
  * RCU is locked, no other locks set
  */
 int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l,
-			struct tipc_msg *hdr)
+			struct tipc_msg *hdr,
+			struct sk_buff_head *retrq)
 {
 	struct sk_buff_head *inputq = &tipc_bc_base(net)->inputq;
 	struct tipc_gap_ack_blks *ga;
@@ -503,8 +505,11 @@ int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l,
 		tipc_link_bc_init_rcv(l, hdr);
 	} else if (!msg_bc_ack_invalid(hdr)) {
 		tipc_get_gap_ack_blks(&ga, l, hdr, false);
+		if (!sysctl_tipc_bc_retruni)
+			retrq = &xmitq;
 		rc = tipc_link_bc_ack_rcv(l, msg_bcast_ack(hdr),
-					  msg_bc_gap(hdr), ga, &xmitq);
+					  msg_bc_gap(hdr), ga, &xmitq,
+					  retrq);
 		rc |= tipc_link_bc_sync_rcv(l, hdr, &xmitq);
 	}
 	tipc_bcast_unlock(net);
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index 9e847d9617d3..97d3cf9d3e4d 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -45,6 +45,7 @@ struct tipc_nl_msg;
 struct tipc_nlist;
 struct tipc_nitem;
 extern const char tipc_bclink_name[];
+extern unsigned long sysctl_tipc_bc_retruni;
 
 #define TIPC_METHOD_EXPIRE msecs_to_jiffies(5000)
 
@@ -93,7 +94,8 @@ int tipc_bcast_rcv(struct net *net, struct tipc_link *l, struct sk_buff *skb);
 void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l,
 			struct tipc_msg *hdr);
 int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l,
-			struct tipc_msg *hdr);
+			struct tipc_msg *hdr,
+			struct sk_buff_head *retrq);
 int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg);
 int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]);
 int tipc_bclink_reset_stats(struct net *net);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index 288c5670cfa5..af352391e2ab 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -375,7 +375,7 @@ void tipc_link_remove_bc_peer(struct tipc_link *snd_l,
 	snd_l->ackers--;
 	rcv_l->bc_peer_is_up = true;
 	rcv_l->state = LINK_ESTABLISHED;
-	tipc_link_bc_ack_rcv(rcv_l, ack, 0, NULL, xmitq);
+	tipc_link_bc_ack_rcv(rcv_l, ack, 0, NULL, xmitq, NULL);
 	trace_tipc_link_reset(rcv_l, TIPC_DUMP_ALL, "bclink removed!");
 	tipc_link_reset(rcv_l);
 	rcv_l->state = LINK_RESET;
@@ -2400,7 +2400,8 @@ int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
 
 int tipc_link_bc_ack_rcv(struct tipc_link *r, u16 acked, u16 gap,
 			 struct tipc_gap_ack_blks *ga,
-			 struct sk_buff_head *xmitq)
+			 struct sk_buff_head *xmitq,
+			 struct sk_buff_head *retrq)
 {
 	struct tipc_link *l = r->bc_sndlink;
 	bool unused = false;
@@ -2413,7 +2414,7 @@ int tipc_link_bc_ack_rcv(struct tipc_link *r, u16 acked, u16 gap,
 		return 0;
 
 	trace_tipc_link_bc_ack(r, acked, gap, &l->transmq);
-	tipc_link_advance_transmq(l, r, acked, gap, ga, xmitq, &unused, &rc);
+	tipc_link_advance_transmq(l, r, acked, gap, ga, retrq, &unused, &rc);
 
 	tipc_link_advance_backlog(l, xmitq);
 	if (unlikely(!skb_queue_empty(&l->wakeupq)))
@@ -2447,7 +2448,8 @@ int tipc_link_bc_nack_rcv(struct tipc_link *l, struct sk_buff *skb,
 		return 0;
 
 	if (dnode == tipc_own_addr(l->net)) {
-		rc = tipc_link_bc_ack_rcv(l, acked, to - acked, NULL, xmitq);
+		rc = tipc_link_bc_ack_rcv(l, acked, to - acked, NULL, xmitq,
+					  xmitq);
 		l->stats.recv_nacks++;
 		return rc;
 	}
diff --git a/net/tipc/link.h b/net/tipc/link.h
index 0a0fa7350722..4d0768cf91d5 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -147,7 +147,8 @@ u16 tipc_get_gap_ack_blks(struct tipc_gap_ack_blks **ga, struct tipc_link *l,
 			  struct tipc_msg *hdr, bool uc);
 int tipc_link_bc_ack_rcv(struct tipc_link *l, u16 acked, u16 gap,
 			 struct tipc_gap_ack_blks *ga,
-			 struct sk_buff_head *xmitq);
+			 struct sk_buff_head *xmitq,
+			 struct sk_buff_head *retrq);
 void tipc_link_build_bc_sync_msg(struct tipc_link *l,
 				 struct sk_buff_head *xmitq);
 void tipc_link_bc_init_rcv(struct tipc_link *l, struct tipc_msg *hdr);
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 6a49b3eeaae9..548207fdec15 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1772,7 +1772,7 @@ static void tipc_node_bc_sync_rcv(struct tipc_node *n, struct tipc_msg *hdr,
 	struct tipc_link *ucl;
 	int rc;
 
-	rc = tipc_bcast_sync_rcv(n->net, n->bc_entry.link, hdr);
+	rc = tipc_bcast_sync_rcv(n->net, n->bc_entry.link, hdr, xmitq);
 
 	if (rc & TIPC_LINK_DOWN_EVT) {
 		tipc_node_reset_links(n);
diff --git a/net/tipc/sysctl.c b/net/tipc/sysctl.c
index 58ab3d6dcdce..97a6264a2993 100644
--- a/net/tipc/sysctl.c
+++ b/net/tipc/sysctl.c
@@ -36,7 +36,7 @@
 #include "core.h"
 #include "trace.h"
 #include "crypto.h"
-
+#include "bcast.h"
 #include <linux/sysctl.h>
 
 static struct ctl_table_header *tipc_ctl_hdr;
@@ -75,6 +75,13 @@ static struct ctl_table tipc_table[] = {
 		.extra1         = SYSCTL_ONE,
 	},
 #endif
+	{
+		.procname	= "bc_retruni",
+		.data		= &sysctl_tipc_bc_retruni,
+		.maxlen		= sizeof(sysctl_tipc_bc_retruni),
+		.mode		= 0644,
+		.proc_handler	= proc_doulongvec_minmax,
+	},
 	{}
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From 03b6fefd9bb4844c75faeb10df8496794e2fd5da Mon Sep 17 00:00:00 2001
From: Tuong Lien <tuong.t.lien@dektech.com.au>
Date: Tue, 26 May 2020 16:38:37 +0700
Subject: tipc: add support for broadcast rcv stats dumping

This commit enables dumping the statistics of a broadcast-receiver link
like the traditional 'broadcast-link' one (which is for broadcast-
sender). The link dumping can be triggered via netlink (e.g. the
iproute2/tipc tool) by the link flag - 'TIPC_NLA_LINK_BROADCAST' as the
indicator.

The name of a broadcast-receiver link of a specific peer will be in the
format: 'broadcast-link:<peer-id>'.

For example:

Link <broadcast-link:1001002>
  Window:50 packets
  RX packets:7841 fragments:2408/440 bundles:0/0
  TX packets:0 fragments:0/0 bundles:0/0
  RX naks:0 defs:124 dups:0
  TX naks:21 acks:0 retrans:0
  Congestion link:0  Send queue max:0 avg:0

In addition, the broadcast-receiver link statistics can be reset in the
usual way via netlink by specifying that link name in command.

Note: the 'tipc_link_name_ext()' is removed because the link name can
now be retrieved simply via the 'l->name'.

Acked-by: Ying Xue <ying.xue@windriver.com>
Acked-by: Jon Maloy <jmaloy@redhat.com>
Signed-off-by: Tuong Lien <tuong.t.lien@dektech.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/bcast.c   |  6 ++---
 net/tipc/bcast.h   |  5 +++--
 net/tipc/link.c    | 65 +++++++++++++++++++++++++++---------------------------
 net/tipc/link.h    |  3 +--
 net/tipc/msg.c     |  9 ++++----
 net/tipc/msg.h     |  2 +-
 net/tipc/netlink.c |  2 +-
 net/tipc/node.c    | 61 +++++++++++++++++++++++++++++++++++++++++++-------
 net/tipc/trace.h   |  4 ++--
 9 files changed, 101 insertions(+), 56 deletions(-)

diff --git a/net/tipc/bcast.c b/net/tipc/bcast.c
index 50a16f8bebd9..383f87bc1061 100644
--- a/net/tipc/bcast.c
+++ b/net/tipc/bcast.c
@@ -563,10 +563,8 @@ void tipc_bcast_remove_peer(struct net *net, struct tipc_link *rcv_l)
 		tipc_sk_rcv(net, inputq);
 }
 
-int tipc_bclink_reset_stats(struct net *net)
+int tipc_bclink_reset_stats(struct net *net, struct tipc_link *l)
 {
-	struct tipc_link *l = tipc_bc_sndlink(net);
-
 	if (!l)
 		return -ENOPROTOOPT;
 
@@ -694,7 +692,7 @@ int tipc_bcast_init(struct net *net)
 	tn->bcbase = bb;
 	spin_lock_init(&tipc_net(net)->bclock);
 
-	if (!tipc_link_bc_create(net, 0, 0,
+	if (!tipc_link_bc_create(net, 0, 0, NULL,
 				 FB_MTU,
 				 BCLINK_WIN_DEFAULT,
 				 BCLINK_WIN_DEFAULT,
diff --git a/net/tipc/bcast.h b/net/tipc/bcast.h
index 97d3cf9d3e4d..4240c95188b1 100644
--- a/net/tipc/bcast.h
+++ b/net/tipc/bcast.h
@@ -96,9 +96,10 @@ void tipc_bcast_ack_rcv(struct net *net, struct tipc_link *l,
 int tipc_bcast_sync_rcv(struct net *net, struct tipc_link *l,
 			struct tipc_msg *hdr,
 			struct sk_buff_head *retrq);
-int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg);
+int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg,
+			struct tipc_link *bcl);
 int tipc_nl_bc_link_set(struct net *net, struct nlattr *attrs[]);
-int tipc_bclink_reset_stats(struct net *net);
+int tipc_bclink_reset_stats(struct net *net, struct tipc_link *l);
 
 u32 tipc_bcast_get_broadcast_mode(struct net *net);
 u32 tipc_bcast_get_broadcast_ratio(struct net *net);
diff --git a/net/tipc/link.c b/net/tipc/link.c
index af352391e2ab..ee3b8d0576b8 100644
--- a/net/tipc/link.c
+++ b/net/tipc/link.c
@@ -539,7 +539,7 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
  *
  * Returns true if link was created, otherwise false
  */
-bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer,
+bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, u8 *peer_id,
 			 int mtu, u32 min_win, u32 max_win, u16 peer_caps,
 			 struct sk_buff_head *inputq,
 			 struct sk_buff_head *namedq,
@@ -554,7 +554,18 @@ bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer,
 		return false;
 
 	l = *link;
-	strcpy(l->name, tipc_bclink_name);
+	if (peer_id) {
+		char peer_str[NODE_ID_STR_LEN] = {0,};
+
+		tipc_nodeid2string(peer_str, peer_id);
+		if (strlen(peer_str) > 16)
+			sprintf(peer_str, "%x", peer);
+		/* Broadcast receiver link name: "broadcast-link:<peer>" */
+		snprintf(l->name, sizeof(l->name), "%s:%s", tipc_bclink_name,
+			 peer_str);
+	} else {
+		strcpy(l->name, tipc_bclink_name);
+	}
 	trace_tipc_link_reset(l, TIPC_DUMP_ALL, "bclink created!");
 	tipc_link_reset(l);
 	l->state = LINK_RESET;
@@ -1412,11 +1423,8 @@ static u8 __tipc_build_gap_ack_blks(struct tipc_gap_ack_blks *ga,
 			gacks[n].ack = htons(expect - 1);
 			gacks[n].gap = htons(seqno - expect);
 			if (++n >= MAX_GAP_ACK_BLKS / 2) {
-				char buf[TIPC_MAX_LINK_NAME];
-
 				pr_info_ratelimited("Gacks on %s: %d, ql: %d!\n",
-						    tipc_link_name_ext(l, buf),
-						    n,
+						    l->name, n,
 						    skb_queue_len(&l->deferdq));
 				return n;
 			}
@@ -1587,6 +1595,8 @@ release:
 			_skb->priority = TC_PRIO_CONTROL;
 			__skb_queue_tail(xmitq, _skb);
 			l->stats.retransmitted++;
+			if (!is_uc)
+				r->stats.retransmitted++;
 			*retransmitted = true;
 			/* Increase actual retrans counter & mark first time */
 			if (!TIPC_SKB_CB(skb)->retr_cnt++)
@@ -1753,7 +1763,8 @@ int tipc_link_rcv(struct tipc_link *l, struct sk_buff *skb,
 
 		/* Defer delivery if sequence gap */
 		if (unlikely(seqno != rcv_nxt)) {
-			__tipc_skb_queue_sorted(defq, seqno, skb);
+			if (!__tipc_skb_queue_sorted(defq, seqno, skb))
+				l->stats.duplicates++;
 			rc |= tipc_link_build_nack_msg(l, xmitq);
 			break;
 		}
@@ -1787,15 +1798,15 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
 				      int tolerance, int priority,
 				      struct sk_buff_head *xmitq)
 {
+	struct tipc_mon_state *mstate = &l->mon_state;
+	struct sk_buff_head *dfq = &l->deferdq;
 	struct tipc_link *bcl = l->bc_rcvlink;
-	struct sk_buff *skb;
 	struct tipc_msg *hdr;
-	struct sk_buff_head *dfq = &l->deferdq;
+	struct sk_buff *skb;
 	bool node_up = link_is_up(bcl);
-	struct tipc_mon_state *mstate = &l->mon_state;
+	u16 glen = 0, bc_rcvgap = 0;
 	int dlen = 0;
 	void *data;
-	u16 glen = 0;
 
 	/* Don't send protocol message during reset or link failover */
 	if (tipc_link_is_blocked(l))
@@ -1833,7 +1844,8 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
 		if (l->peer_caps & TIPC_LINK_PROTO_SEQNO)
 			msg_set_seqno(hdr, l->snd_nxt_state++);
 		msg_set_seq_gap(hdr, rcvgap);
-		msg_set_bc_gap(hdr, link_bc_rcv_gap(bcl));
+		bc_rcvgap = link_bc_rcv_gap(bcl);
+		msg_set_bc_gap(hdr, bc_rcvgap);
 		msg_set_probe(hdr, probe);
 		msg_set_is_keepalive(hdr, probe || probe_reply);
 		if (l->peer_caps & TIPC_GAP_ACK_BLOCK)
@@ -1858,6 +1870,8 @@ static void tipc_link_build_proto_msg(struct tipc_link *l, int mtyp, bool probe,
 		l->stats.sent_probes++;
 	if (rcvgap)
 		l->stats.sent_nacks++;
+	if (bc_rcvgap)
+		bcl->stats.sent_nacks++;
 	skb->priority = TC_PRIO_CONTROL;
 	__skb_queue_tail(xmitq, skb);
 	trace_tipc_proto_build(skb, false, l->name);
@@ -2358,8 +2372,6 @@ int tipc_link_bc_sync_rcv(struct tipc_link *l, struct tipc_msg *hdr,
 	if (!l->bc_peer_is_up)
 		return rc;
 
-	l->stats.recv_nacks++;
-
 	/* Ignore if peers_snd_nxt goes beyond receive window */
 	if (more(peers_snd_nxt, l->rcv_nxt + l->window))
 		return rc;
@@ -2410,6 +2422,11 @@ int tipc_link_bc_ack_rcv(struct tipc_link *r, u16 acked, u16 gap,
 	if (!link_is_up(r) || !r->bc_peer_is_up)
 		return 0;
 
+	if (gap) {
+		l->stats.recv_nacks++;
+		r->stats.recv_nacks++;
+	}
+
 	if (less(acked, r->acked) || (acked == r->acked && !gap && !ga))
 		return 0;
 
@@ -2721,16 +2738,15 @@ msg_full:
 	return -EMSGSIZE;
 }
 
-int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg)
+int tipc_nl_add_bc_link(struct net *net, struct tipc_nl_msg *msg,
+			struct tipc_link *bcl)
 {
 	int err;
 	void *hdr;
 	struct nlattr *attrs;
 	struct nlattr *prop;
-	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	u32 bc_mode = tipc_bcast_get_broadcast_mode(net);
 	u32 bc_ratio = tipc_bcast_get_broadcast_ratio(net);
-	struct tipc_link *bcl = tn->bcl;
 
 	if (!bcl)
 		return 0;
@@ -2817,21 +2833,6 @@ void tipc_link_set_abort_limit(struct tipc_link *l, u32 limit)
 	l->abort_limit = limit;
 }
 
-char *tipc_link_name_ext(struct tipc_link *l, char *buf)
-{
-	if (!l)
-		scnprintf(buf, TIPC_MAX_LINK_NAME, "null");
-	else if (link_is_bc_sndlink(l))
-		scnprintf(buf, TIPC_MAX_LINK_NAME, "broadcast-sender");
-	else if (link_is_bc_rcvlink(l))
-		scnprintf(buf, TIPC_MAX_LINK_NAME,
-			  "broadcast-receiver, peer %x", l->addr);
-	else
-		memcpy(buf, l->name, TIPC_MAX_LINK_NAME);
-
-	return buf;
-}
-
 /**
  * tipc_link_dump - dump TIPC link data
  * @l: tipc link to be dumped
diff --git a/net/tipc/link.h b/net/tipc/link.h
index 4d0768cf91d5..fc07232c9a12 100644
--- a/net/tipc/link.h
+++ b/net/tipc/link.h
@@ -80,7 +80,7 @@ bool tipc_link_create(struct net *net, char *if_name, int bearer_id,
 		      struct sk_buff_head *inputq,
 		      struct sk_buff_head *namedq,
 		      struct tipc_link **link);
-bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer,
+bool tipc_link_bc_create(struct net *net, u32 ownnode, u32 peer, u8 *peer_id,
 			 int mtu, u32 min_win, u32 max_win, u16 peer_caps,
 			 struct sk_buff_head *inputq,
 			 struct sk_buff_head *namedq,
@@ -111,7 +111,6 @@ u16 tipc_link_rcv_nxt(struct tipc_link *l);
 u16 tipc_link_acked(struct tipc_link *l);
 u32 tipc_link_id(struct tipc_link *l);
 char *tipc_link_name(struct tipc_link *l);
-char *tipc_link_name_ext(struct tipc_link *l, char *buf);
 u32 tipc_link_state(struct tipc_link *l);
 char tipc_link_plane(struct tipc_link *l);
 int tipc_link_prio(struct tipc_link *l);
diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 4d0e0bdd997b..c69fb99163fc 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -825,19 +825,19 @@ bool tipc_msg_pskb_copy(u32 dst, struct sk_buff_head *msg,
  * @seqno: sequence number of buffer to add
  * @skb: buffer to add
  */
-void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno,
+bool __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno,
 			     struct sk_buff *skb)
 {
 	struct sk_buff *_skb, *tmp;
 
 	if (skb_queue_empty(list) || less(seqno, buf_seqno(skb_peek(list)))) {
 		__skb_queue_head(list, skb);
-		return;
+		return true;
 	}
 
 	if (more(seqno, buf_seqno(skb_peek_tail(list)))) {
 		__skb_queue_tail(list, skb);
-		return;
+		return true;
 	}
 
 	skb_queue_walk_safe(list, _skb, tmp) {
@@ -846,9 +846,10 @@ void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno,
 		if (seqno == buf_seqno(_skb))
 			break;
 		__skb_queue_before(list, _skb, skb);
-		return;
+		return true;
 	}
 	kfree_skb(skb);
+	return false;
 }
 
 void tipc_skb_reject(struct net *net, int err, struct sk_buff *skb,
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index ca5f8689a33b..cd4281779468 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -1145,7 +1145,7 @@ bool tipc_msg_assemble(struct sk_buff_head *list);
 bool tipc_msg_reassemble(struct sk_buff_head *list, struct sk_buff_head *rcvq);
 bool tipc_msg_pskb_copy(u32 dst, struct sk_buff_head *msg,
 			struct sk_buff_head *cpy);
-void __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno,
+bool __tipc_skb_queue_sorted(struct sk_buff_head *list, u16 seqno,
 			     struct sk_buff *skb);
 bool tipc_msg_skb_clone(struct sk_buff_head *msg, struct sk_buff_head *cpy);
 
diff --git a/net/tipc/netlink.c b/net/tipc/netlink.c
index bb9862410e68..c4aee6247d55 100644
--- a/net/tipc/netlink.c
+++ b/net/tipc/netlink.c
@@ -188,7 +188,7 @@ static const struct genl_ops tipc_genl_v2_ops[] = {
 	},
 	{
 		.cmd	= TIPC_NL_LINK_GET,
-		.validate = GENL_DONT_VALIDATE_STRICT | GENL_DONT_VALIDATE_DUMP,
+		.validate = GENL_DONT_VALIDATE_STRICT,
 		.doit   = tipc_nl_node_get_link,
 		.dumpit	= tipc_nl_node_dump_link,
 	},
diff --git a/net/tipc/node.c b/net/tipc/node.c
index 548207fdec15..0312fb181d94 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -1138,7 +1138,7 @@ void tipc_node_check_dest(struct net *net, u32 addr,
 	if (unlikely(!n->bc_entry.link)) {
 		snd_l = tipc_bc_sndlink(net);
 		if (!tipc_link_bc_create(net, tipc_own_addr(net),
-					 addr, U16_MAX,
+					 addr, peer_id, U16_MAX,
 					 tipc_link_min_win(snd_l),
 					 tipc_link_max_win(snd_l),
 					 n->capabilities,
@@ -2435,7 +2435,7 @@ int tipc_nl_node_get_link(struct sk_buff *skb, struct genl_info *info)
 		return -ENOMEM;
 
 	if (strcmp(name, tipc_bclink_name) == 0) {
-		err = tipc_nl_add_bc_link(net, &msg);
+		err = tipc_nl_add_bc_link(net, &msg, tipc_net(net)->bcl);
 		if (err)
 			goto err_free;
 	} else {
@@ -2479,6 +2479,7 @@ int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info)
 	struct tipc_node *node;
 	struct nlattr *attrs[TIPC_NLA_LINK_MAX + 1];
 	struct net *net = sock_net(skb->sk);
+	struct tipc_net *tn = tipc_net(net);
 	struct tipc_link_entry *le;
 
 	if (!info->attrs[TIPC_NLA_LINK])
@@ -2495,11 +2496,26 @@ int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info)
 
 	link_name = nla_data(attrs[TIPC_NLA_LINK_NAME]);
 
-	if (strcmp(link_name, tipc_bclink_name) == 0) {
-		err = tipc_bclink_reset_stats(net);
+	err = -EINVAL;
+	if (!strcmp(link_name, tipc_bclink_name)) {
+		err = tipc_bclink_reset_stats(net, tipc_bc_sndlink(net));
 		if (err)
 			return err;
 		return 0;
+	} else if (strstr(link_name, tipc_bclink_name)) {
+		rcu_read_lock();
+		list_for_each_entry_rcu(node, &tn->node_list, list) {
+			tipc_node_read_lock(node);
+			link = node->bc_entry.link;
+			if (link && !strcmp(link_name, tipc_link_name(link))) {
+				err = tipc_bclink_reset_stats(net, link);
+				tipc_node_read_unlock(node);
+				break;
+			}
+			tipc_node_read_unlock(node);
+		}
+		rcu_read_unlock();
+		return err;
 	}
 
 	node = tipc_node_find_by_name(net, link_name, &bearer_id);
@@ -2523,7 +2539,8 @@ int tipc_nl_node_reset_link_stats(struct sk_buff *skb, struct genl_info *info)
 
 /* Caller should hold node lock  */
 static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg,
-				    struct tipc_node *node, u32 *prev_link)
+				    struct tipc_node *node, u32 *prev_link,
+				    bool bc_link)
 {
 	u32 i;
 	int err;
@@ -2539,6 +2556,14 @@ static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg,
 		if (err)
 			return err;
 	}
+
+	if (bc_link) {
+		*prev_link = i;
+		err = tipc_nl_add_bc_link(net, msg, node->bc_entry.link);
+		if (err)
+			return err;
+	}
+
 	*prev_link = 0;
 
 	return 0;
@@ -2547,17 +2572,36 @@ static int __tipc_nl_add_node_links(struct net *net, struct tipc_nl_msg *msg,
 int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb)
 {
 	struct net *net = sock_net(skb->sk);
+	struct nlattr **attrs = genl_dumpit_info(cb)->attrs;
+	struct nlattr *link[TIPC_NLA_LINK_MAX + 1];
 	struct tipc_net *tn = net_generic(net, tipc_net_id);
 	struct tipc_node *node;
 	struct tipc_nl_msg msg;
 	u32 prev_node = cb->args[0];
 	u32 prev_link = cb->args[1];
 	int done = cb->args[2];
+	bool bc_link = cb->args[3];
 	int err;
 
 	if (done)
 		return 0;
 
+	if (!prev_node) {
+		/* Check if broadcast-receiver links dumping is needed */
+		if (attrs && attrs[TIPC_NLA_LINK]) {
+			err = nla_parse_nested_deprecated(link,
+							  TIPC_NLA_LINK_MAX,
+							  attrs[TIPC_NLA_LINK],
+							  tipc_nl_link_policy,
+							  NULL);
+			if (unlikely(err))
+				return err;
+			if (unlikely(!link[TIPC_NLA_LINK_BROADCAST]))
+				return -EINVAL;
+			bc_link = true;
+		}
+	}
+
 	msg.skb = skb;
 	msg.portid = NETLINK_CB(cb->skb).portid;
 	msg.seq = cb->nlh->nlmsg_seq;
@@ -2581,7 +2625,7 @@ int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb)
 						 list) {
 			tipc_node_read_lock(node);
 			err = __tipc_nl_add_node_links(net, &msg, node,
-						       &prev_link);
+						       &prev_link, bc_link);
 			tipc_node_read_unlock(node);
 			if (err)
 				goto out;
@@ -2589,14 +2633,14 @@ int tipc_nl_node_dump_link(struct sk_buff *skb, struct netlink_callback *cb)
 			prev_node = node->addr;
 		}
 	} else {
-		err = tipc_nl_add_bc_link(net, &msg);
+		err = tipc_nl_add_bc_link(net, &msg, tn->bcl);
 		if (err)
 			goto out;
 
 		list_for_each_entry_rcu(node, &tn->node_list, list) {
 			tipc_node_read_lock(node);
 			err = __tipc_nl_add_node_links(net, &msg, node,
-						       &prev_link);
+						       &prev_link, bc_link);
 			tipc_node_read_unlock(node);
 			if (err)
 				goto out;
@@ -2611,6 +2655,7 @@ out:
 	cb->args[0] = prev_node;
 	cb->args[1] = prev_link;
 	cb->args[2] = done;
+	cb->args[3] = bc_link;
 
 	return skb->len;
 }
diff --git a/net/tipc/trace.h b/net/tipc/trace.h
index e7535ab75255..04af83f0500c 100644
--- a/net/tipc/trace.h
+++ b/net/tipc/trace.h
@@ -255,7 +255,7 @@ DECLARE_EVENT_CLASS(tipc_link_class,
 
 	TP_fast_assign(
 		__assign_str(header, header);
-		tipc_link_name_ext(l, __entry->name);
+		memcpy(__entry->name, tipc_link_name(l), TIPC_MAX_LINK_NAME);
 		tipc_link_dump(l, dqueues, __get_str(buf));
 	),
 
@@ -295,7 +295,7 @@ DECLARE_EVENT_CLASS(tipc_link_transmq_class,
 	),
 
 	TP_fast_assign(
-		tipc_link_name_ext(r, __entry->name);
+		memcpy(__entry->name, tipc_link_name(r), TIPC_MAX_LINK_NAME);
 		__entry->from = f;
 		__entry->to = t;
 		__entry->len = skb_queue_len(tq);
-- 
cgit v1.2.3-59-g8ed1b


From 0a3e060f340dbe232ffa290c40f879b7f7db595b Mon Sep 17 00:00:00 2001
From: Tuong Lien <tuong.t.lien@dektech.com.au>
Date: Tue, 26 May 2020 16:38:38 +0700
Subject: tipc: add test for Nagle algorithm effectiveness

When streaming in Nagle mode, we try to bundle small messages from user
as many as possible if there is one outstanding buffer, i.e. not ACK-ed
by the receiving side, which helps boost up the overall throughput. So,
the algorithm's effectiveness really depends on when Nagle ACK comes or
what the specific network latency (RTT) is, compared to the user's
message sending rate.

In a bad case, the user's sending rate is low or the network latency is
small, there will not be many bundles, so making a Nagle ACK or waiting
for it is not meaningful.
For example: a user sends its messages every 100ms and the RTT is 50ms,
then for each messages, we require one Nagle ACK but then there is only
one user message sent without any bundles.

In a better case, even if we have a few bundles (e.g. the RTT = 300ms),
but now the user sends messages in medium size, then there will not be
any difference at all, that says 3 x 1000-byte data messages if bundled
will still result in 3 bundles with MTU = 1500.

When Nagle is ineffective, the delay in user message sending is clearly
wasted instead of sending directly.

Besides, adding Nagle ACKs will consume some processor load on both the
sending and receiving sides.

This commit adds a test on the effectiveness of the Nagle algorithm for
an individual connection in the network on which it actually runs.
Particularly, upon receipt of a Nagle ACK we will compare the number of
bundles in the backlog queue to the number of user messages which would
be sent directly without Nagle. If the ratio is good (e.g. >= 2), Nagle
mode will be kept for further message sending. Otherwise, we will leave
Nagle and put a 'penalty' on the connection, so it will have to spend
more 'one-way' messages before being able to re-enter Nagle.

In addition, the 'ack-required' bit is only set when really needed that
the number of Nagle ACKs will be reduced during Nagle mode.

Testing with benchmark showed that with the patch, there was not much
difference in throughput for small messages since the tool continuously
sends messages without a break, so Nagle would still take in effect.

Acked-by: Ying Xue <ying.xue@windriver.com>
Acked-by: Jon Maloy <jmaloy@redhat.com>
Signed-off-by: Tuong Lien <tuong.t.lien@dektech.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/msg.c    |  3 ---
 net/tipc/msg.h    | 14 ++++++++++--
 net/tipc/socket.c | 64 ++++++++++++++++++++++++++++++++++++++++++++-----------
 3 files changed, 64 insertions(+), 17 deletions(-)

diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index c69fb99163fc..23809039dda1 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -235,9 +235,6 @@ int tipc_msg_append(struct tipc_msg *_hdr, struct msghdr *m, int dlen,
 			msg_set_size(hdr, MIN_H_SIZE);
 			__skb_queue_tail(txq, skb);
 			total += 1;
-			if (prev)
-				msg_set_ack_required(buf_msg(prev), 0);
-			msg_set_ack_required(hdr, 1);
 		}
 		hdr = buf_msg(skb);
 		curr = msg_blocks(hdr);
diff --git a/net/tipc/msg.h b/net/tipc/msg.h
index cd4281779468..58660d56bc83 100644
--- a/net/tipc/msg.h
+++ b/net/tipc/msg.h
@@ -340,9 +340,19 @@ static inline int msg_ack_required(struct tipc_msg *m)
 	return msg_bits(m, 0, 18, 1);
 }
 
-static inline void msg_set_ack_required(struct tipc_msg *m, u32 d)
+static inline void msg_set_ack_required(struct tipc_msg *m)
 {
-	msg_set_bits(m, 0, 18, 1, d);
+	msg_set_bits(m, 0, 18, 1, 1);
+}
+
+static inline int msg_nagle_ack(struct tipc_msg *m)
+{
+	return msg_bits(m, 0, 18, 1);
+}
+
+static inline void msg_set_nagle_ack(struct tipc_msg *m)
+{
+	msg_set_bits(m, 0, 18, 1, 1);
 }
 
 static inline bool msg_is_rcast(struct tipc_msg *m)
diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index e370ad0edd76..d6b67d07d22e 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -48,6 +48,8 @@
 #include "group.h"
 #include "trace.h"
 
+#define NAGLE_START_INIT	4
+#define NAGLE_START_MAX		1024
 #define CONN_TIMEOUT_DEFAULT    8000    /* default connect timeout = 8s */
 #define CONN_PROBING_INTV	msecs_to_jiffies(3600000)  /* [ms] => 1 h */
 #define TIPC_FWD_MSG		1
@@ -119,7 +121,10 @@ struct tipc_sock {
 	struct rcu_head rcu;
 	struct tipc_group *group;
 	u32 oneway;
+	u32 nagle_start;
 	u16 snd_backlog;
+	u16 msg_acc;
+	u16 pkt_cnt;
 	bool expect_ack;
 	bool nodelay;
 	bool group_is_open;
@@ -143,7 +148,7 @@ static int tipc_sk_insert(struct tipc_sock *tsk);
 static void tipc_sk_remove(struct tipc_sock *tsk);
 static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dsz);
 static int __tipc_sendmsg(struct socket *sock, struct msghdr *m, size_t dsz);
-static void tipc_sk_push_backlog(struct tipc_sock *tsk);
+static void tipc_sk_push_backlog(struct tipc_sock *tsk, bool nagle_ack);
 
 static const struct proto_ops packet_ops;
 static const struct proto_ops stream_ops;
@@ -474,6 +479,7 @@ static int tipc_sk_create(struct net *net, struct socket *sock,
 	tsk = tipc_sk(sk);
 	tsk->max_pkt = MAX_PKT_DEFAULT;
 	tsk->maxnagle = 0;
+	tsk->nagle_start = NAGLE_START_INIT;
 	INIT_LIST_HEAD(&tsk->publications);
 	INIT_LIST_HEAD(&tsk->cong_links);
 	msg = &tsk->phdr;
@@ -541,7 +547,7 @@ static void __tipc_shutdown(struct socket *sock, int error)
 					    !tsk_conn_cong(tsk)));
 
 	/* Push out delayed messages if in Nagle mode */
-	tipc_sk_push_backlog(tsk);
+	tipc_sk_push_backlog(tsk, false);
 	/* Remove pending SYN */
 	__skb_queue_purge(&sk->sk_write_queue);
 
@@ -1252,14 +1258,37 @@ void tipc_sk_mcast_rcv(struct net *net, struct sk_buff_head *arrvq,
 /* tipc_sk_push_backlog(): send accumulated buffers in socket write queue
  *                         when socket is in Nagle mode
  */
-static void tipc_sk_push_backlog(struct tipc_sock *tsk)
+static void tipc_sk_push_backlog(struct tipc_sock *tsk, bool nagle_ack)
 {
 	struct sk_buff_head *txq = &tsk->sk.sk_write_queue;
+	struct sk_buff *skb = skb_peek_tail(txq);
 	struct net *net = sock_net(&tsk->sk);
 	u32 dnode = tsk_peer_node(tsk);
-	struct sk_buff *skb = skb_peek(txq);
 	int rc;
 
+	if (nagle_ack) {
+		tsk->pkt_cnt += skb_queue_len(txq);
+		if (!tsk->pkt_cnt || tsk->msg_acc / tsk->pkt_cnt < 2) {
+			tsk->oneway = 0;
+			if (tsk->nagle_start < NAGLE_START_MAX)
+				tsk->nagle_start *= 2;
+			tsk->expect_ack = false;
+			pr_debug("tsk %10u: bad nagle %u -> %u, next start %u!\n",
+				 tsk->portid, tsk->msg_acc, tsk->pkt_cnt,
+				 tsk->nagle_start);
+		} else {
+			tsk->nagle_start = NAGLE_START_INIT;
+			if (skb) {
+				msg_set_ack_required(buf_msg(skb));
+				tsk->expect_ack = true;
+			} else {
+				tsk->expect_ack = false;
+			}
+		}
+		tsk->msg_acc = 0;
+		tsk->pkt_cnt = 0;
+	}
+
 	if (!skb || tsk->cong_link_cnt)
 		return;
 
@@ -1267,9 +1296,10 @@ static void tipc_sk_push_backlog(struct tipc_sock *tsk)
 	if (msg_is_syn(buf_msg(skb)))
 		return;
 
+	if (tsk->msg_acc)
+		tsk->pkt_cnt += skb_queue_len(txq);
 	tsk->snt_unacked += tsk->snd_backlog;
 	tsk->snd_backlog = 0;
-	tsk->expect_ack = true;
 	rc = tipc_node_xmit(net, txq, dnode, tsk->portid);
 	if (rc == -ELINKCONG)
 		tsk->cong_link_cnt = 1;
@@ -1322,8 +1352,7 @@ static void tipc_sk_conn_proto_rcv(struct tipc_sock *tsk, struct sk_buff *skb,
 		return;
 	} else if (mtyp == CONN_ACK) {
 		was_cong = tsk_conn_cong(tsk);
-		tsk->expect_ack = false;
-		tipc_sk_push_backlog(tsk);
+		tipc_sk_push_backlog(tsk, msg_nagle_ack(hdr));
 		tsk->snt_unacked -= msg_conn_ack(hdr);
 		if (tsk->peer_caps & TIPC_BLOCK_FLOWCTL)
 			tsk->snd_win = msg_adv_win(hdr);
@@ -1516,6 +1545,7 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen)
 	struct tipc_sock *tsk = tipc_sk(sk);
 	struct tipc_msg *hdr = &tsk->phdr;
 	struct net *net = sock_net(sk);
+	struct sk_buff *skb;
 	u32 dnode = tsk_peer_node(tsk);
 	int maxnagle = tsk->maxnagle;
 	int maxpkt = tsk->max_pkt;
@@ -1544,17 +1574,25 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen)
 			break;
 		send = min_t(size_t, dlen - sent, TIPC_MAX_USER_MSG_SIZE);
 		blocks = tsk->snd_backlog;
-		if (tsk->oneway++ >= 4 && send <= maxnagle) {
+		if (tsk->oneway++ >= tsk->nagle_start && send <= maxnagle) {
 			rc = tipc_msg_append(hdr, m, send, maxnagle, txq);
 			if (unlikely(rc < 0))
 				break;
 			blocks += rc;
+			tsk->msg_acc++;
 			if (blocks <= 64 && tsk->expect_ack) {
 				tsk->snd_backlog = blocks;
 				sent += send;
 				break;
+			} else if (blocks > 64) {
+				tsk->pkt_cnt += skb_queue_len(txq);
+			} else {
+				skb = skb_peek_tail(txq);
+				msg_set_ack_required(buf_msg(skb));
+				tsk->expect_ack = true;
+				tsk->msg_acc = 0;
+				tsk->pkt_cnt = 0;
 			}
-			tsk->expect_ack = true;
 		} else {
 			rc = tipc_msg_build(hdr, m, sent, send, maxpkt, txq);
 			if (unlikely(rc != send))
@@ -2091,7 +2129,7 @@ static void tipc_sk_proto_rcv(struct sock *sk,
 		smp_wmb();
 		tsk->cong_link_cnt--;
 		wakeup = true;
-		tipc_sk_push_backlog(tsk);
+		tipc_sk_push_backlog(tsk, false);
 		break;
 	case GROUP_PROTOCOL:
 		tipc_group_proto_rcv(grp, &wakeup, hdr, inputq, xmitq);
@@ -2180,7 +2218,7 @@ static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb,
 		return false;
 	case TIPC_ESTABLISHED:
 		if (!skb_queue_empty(&sk->sk_write_queue))
-			tipc_sk_push_backlog(tsk);
+			tipc_sk_push_backlog(tsk, false);
 		/* Accept only connection-based messages sent by peer */
 		if (likely(con_msg && !err && pport == oport &&
 			   pnode == onode)) {
@@ -2188,8 +2226,10 @@ static bool tipc_sk_filter_connect(struct tipc_sock *tsk, struct sk_buff *skb,
 				struct sk_buff *skb;
 
 				skb = tipc_sk_build_ack(tsk);
-				if (skb)
+				if (skb) {
+					msg_set_nagle_ack(buf_msg(skb));
 					__skb_queue_tail(xmitq, skb);
+				}
 			}
 			return true;
 		}
-- 
cgit v1.2.3-59-g8ed1b


From 58cff782cc55eb755826c649976aea9f5f8b3086 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Tue, 26 May 2020 14:29:00 +0200
Subject: flow_dissector: Parse multiple MPLS Label Stack Entries

The current MPLS dissector only parses the first MPLS Label Stack
Entry (second LSE can be parsed too, but only to set a key_id).

This patch adds the possibility to parse several LSEs by making
__skb_flow_dissect_mpls() return FLOW_DISSECT_RET_PROTO_AGAIN as long
as the Bottom Of Stack bit hasn't been seen, up to a maximum of
FLOW_DIS_MPLS_MAX entries.

FLOW_DIS_MPLS_MAX is arbitrarily set to 7. This should be enough for
many practical purposes, without wasting too much space.

To record the parsed values, flow_dissector_key_mpls is modified to
store an array of stack entries, instead of just the values of the
first one. A bit field, "used_lses", is also added to keep track of
the LSEs that have been set. The objective is to avoid defining a
new FLOW_DISSECTOR_KEY_MPLS_XX for each level of the MPLS stack.

TC flower is adapted for the new struct flow_dissector_key_mpls layout.
Matching on several MPLS Label Stack Entries will be added in the next
patch.

The NFP and MLX5 drivers are also adapted: nfp_flower_compile_mac() and
mlx5's parse_tunnel() now verify that the rule only uses the first LSE
and fail if it doesn't.

Finally, the behaviour of the FLOW_DISSECTOR_KEY_MPLS_ENTROPY key is
slightly modified. Instead of recording the first Entropy Label, it
now records the last one. This shouldn't have any consequences since
there doesn't seem to have any user of FLOW_DISSECTOR_KEY_MPLS_ENTROPY
in the tree. We'd probably better do a hash of all parsed MPLS labels
instead (excluding reserved labels) anyway. That'd give better entropy
and would probably also simplify the code. But that's not the purpose
of this patch, so I'm keeping that as a future possible improvement.

Signed-off-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../mellanox/mlx5/core/en/tc_tun_mplsoudp.c        | 27 +++++++----
 drivers/net/ethernet/netronome/nfp/flower/match.c  | 42 ++++++++++++-----
 include/net/flow_dissector.h                       | 14 +++++-
 net/core/flow_dissector.c                          | 49 +++++++++++++-------
 net/sched/cls_flower.c                             | 52 +++++++++++++++-------
 5 files changed, 132 insertions(+), 52 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c
index 98ee62e427d2..b4a3c96d34fd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c
@@ -101,25 +101,36 @@ static int parse_tunnel(struct mlx5e_priv *priv,
 
 	flow_rule_match_mpls(rule, &match);
 
+	/* Only support matching the first LSE */
+	if (match.mask->used_lses != 1)
+		return -EOPNOTSUPP;
+
 	MLX5_SET(fte_match_set_misc2, misc2_c,
-		 outer_first_mpls_over_udp.mpls_label, match.mask->mpls_label);
+		 outer_first_mpls_over_udp.mpls_label,
+		 match.mask->ls[0].mpls_label);
 	MLX5_SET(fte_match_set_misc2, misc2_v,
-		 outer_first_mpls_over_udp.mpls_label, match.key->mpls_label);
+		 outer_first_mpls_over_udp.mpls_label,
+		 match.key->ls[0].mpls_label);
 
 	MLX5_SET(fte_match_set_misc2, misc2_c,
-		 outer_first_mpls_over_udp.mpls_exp, match.mask->mpls_tc);
+		 outer_first_mpls_over_udp.mpls_exp,
+		 match.mask->ls[0].mpls_tc);
 	MLX5_SET(fte_match_set_misc2, misc2_v,
-		 outer_first_mpls_over_udp.mpls_exp, match.key->mpls_tc);
+		 outer_first_mpls_over_udp.mpls_exp, match.key->ls[0].mpls_tc);
 
 	MLX5_SET(fte_match_set_misc2, misc2_c,
-		 outer_first_mpls_over_udp.mpls_s_bos, match.mask->mpls_bos);
+		 outer_first_mpls_over_udp.mpls_s_bos,
+		 match.mask->ls[0].mpls_bos);
 	MLX5_SET(fte_match_set_misc2, misc2_v,
-		 outer_first_mpls_over_udp.mpls_s_bos, match.key->mpls_bos);
+		 outer_first_mpls_over_udp.mpls_s_bos,
+		 match.key->ls[0].mpls_bos);
 
 	MLX5_SET(fte_match_set_misc2, misc2_c,
-		 outer_first_mpls_over_udp.mpls_ttl, match.mask->mpls_ttl);
+		 outer_first_mpls_over_udp.mpls_ttl,
+		 match.mask->ls[0].mpls_ttl);
 	MLX5_SET(fte_match_set_misc2, misc2_v,
-		 outer_first_mpls_over_udp.mpls_ttl, match.key->mpls_ttl);
+		 outer_first_mpls_over_udp.mpls_ttl,
+		 match.key->ls[0].mpls_ttl);
 	spec->match_criteria_enable |= MLX5_MATCH_MISC_PARAMETERS_2;
 
 	return 0;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/match.c b/drivers/net/ethernet/netronome/nfp/flower/match.c
index 546bc01d507d..f7f01e2e3dce 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/match.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/match.c
@@ -74,9 +74,10 @@ nfp_flower_compile_port(struct nfp_flower_in_port *frame, u32 cmsg_port,
 	return 0;
 }
 
-static void
+static int
 nfp_flower_compile_mac(struct nfp_flower_mac_mpls *ext,
-		       struct nfp_flower_mac_mpls *msk, struct flow_rule *rule)
+		       struct nfp_flower_mac_mpls *msk, struct flow_rule *rule,
+		       struct netlink_ext_ack *extack)
 {
 	memset(ext, 0, sizeof(struct nfp_flower_mac_mpls));
 	memset(msk, 0, sizeof(struct nfp_flower_mac_mpls));
@@ -97,14 +98,28 @@ nfp_flower_compile_mac(struct nfp_flower_mac_mpls *ext,
 		u32 t_mpls;
 
 		flow_rule_match_mpls(rule, &match);
-		t_mpls = FIELD_PREP(NFP_FLOWER_MASK_MPLS_LB, match.key->mpls_label) |
-			 FIELD_PREP(NFP_FLOWER_MASK_MPLS_TC, match.key->mpls_tc) |
-			 FIELD_PREP(NFP_FLOWER_MASK_MPLS_BOS, match.key->mpls_bos) |
+
+		/* Only support matching the first LSE */
+		if (match.mask->used_lses != 1) {
+			NL_SET_ERR_MSG_MOD(extack,
+					   "unsupported offload: invalid LSE depth for MPLS match offload");
+			return -EOPNOTSUPP;
+		}
+
+		t_mpls = FIELD_PREP(NFP_FLOWER_MASK_MPLS_LB,
+				    match.key->ls[0].mpls_label) |
+			 FIELD_PREP(NFP_FLOWER_MASK_MPLS_TC,
+				    match.key->ls[0].mpls_tc) |
+			 FIELD_PREP(NFP_FLOWER_MASK_MPLS_BOS,
+				    match.key->ls[0].mpls_bos) |
 			 NFP_FLOWER_MASK_MPLS_Q;
 		ext->mpls_lse = cpu_to_be32(t_mpls);
-		t_mpls = FIELD_PREP(NFP_FLOWER_MASK_MPLS_LB, match.mask->mpls_label) |
-			 FIELD_PREP(NFP_FLOWER_MASK_MPLS_TC, match.mask->mpls_tc) |
-			 FIELD_PREP(NFP_FLOWER_MASK_MPLS_BOS, match.mask->mpls_bos) |
+		t_mpls = FIELD_PREP(NFP_FLOWER_MASK_MPLS_LB,
+				    match.mask->ls[0].mpls_label) |
+			 FIELD_PREP(NFP_FLOWER_MASK_MPLS_TC,
+				    match.mask->ls[0].mpls_tc) |
+			 FIELD_PREP(NFP_FLOWER_MASK_MPLS_BOS,
+				    match.mask->ls[0].mpls_bos) |
 			 NFP_FLOWER_MASK_MPLS_Q;
 		msk->mpls_lse = cpu_to_be32(t_mpls);
 	} else if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_BASIC)) {
@@ -121,6 +136,8 @@ nfp_flower_compile_mac(struct nfp_flower_mac_mpls *ext,
 			msk->mpls_lse = cpu_to_be32(NFP_FLOWER_MASK_MPLS_Q);
 		}
 	}
+
+	return 0;
 }
 
 static void
@@ -461,9 +478,12 @@ int nfp_flower_compile_flow_match(struct nfp_app *app,
 	msk += sizeof(struct nfp_flower_in_port);
 
 	if (NFP_FLOWER_LAYER_MAC & key_ls->key_layer) {
-		nfp_flower_compile_mac((struct nfp_flower_mac_mpls *)ext,
-				       (struct nfp_flower_mac_mpls *)msk,
-				       rule);
+		err = nfp_flower_compile_mac((struct nfp_flower_mac_mpls *)ext,
+					     (struct nfp_flower_mac_mpls *)msk,
+					     rule, extack);
+		if (err)
+			return err;
+
 		ext += sizeof(struct nfp_flower_mac_mpls);
 		msk += sizeof(struct nfp_flower_mac_mpls);
 	}
diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index 628383915827..4fb1a69c6ecf 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -59,13 +59,25 @@ struct flow_dissector_key_vlan {
 	__be16	vlan_tpid;
 };
 
-struct flow_dissector_key_mpls {
+struct flow_dissector_mpls_lse {
 	u32	mpls_ttl:8,
 		mpls_bos:1,
 		mpls_tc:3,
 		mpls_label:20;
 };
 
+#define FLOW_DIS_MPLS_MAX 7
+struct flow_dissector_key_mpls {
+	struct flow_dissector_mpls_lse ls[FLOW_DIS_MPLS_MAX]; /* Label Stack */
+	u8 used_lses; /* One bit set for each Label Stack Entry in use */
+};
+
+static inline void dissector_set_mpls_lse(struct flow_dissector_key_mpls *mpls,
+					  int lse_index)
+{
+	mpls->used_lses |= 1 << lse_index;
+}
+
 #define FLOW_DIS_TUN_OPTS_MAX 255
 /**
  * struct flow_dissector_key_enc_opts:
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 5dceed467f64..0aeb33572feb 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -480,47 +480,59 @@ EXPORT_SYMBOL(skb_flow_dissect_tunnel_info);
 static enum flow_dissect_ret
 __skb_flow_dissect_mpls(const struct sk_buff *skb,
 			struct flow_dissector *flow_dissector,
-			void *target_container, void *data, int nhoff, int hlen)
+			void *target_container, void *data, int nhoff, int hlen,
+			int lse_index, bool *entropy_label)
 {
-	struct flow_dissector_key_keyid *key_keyid;
-	struct mpls_label *hdr, _hdr[2];
-	u32 entry, label;
+	struct mpls_label *hdr, _hdr;
+	u32 entry, label, bos;
 
 	if (!dissector_uses_key(flow_dissector,
 				FLOW_DISSECTOR_KEY_MPLS_ENTROPY) &&
 	    !dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_MPLS))
 		return FLOW_DISSECT_RET_OUT_GOOD;
 
+	if (lse_index >= FLOW_DIS_MPLS_MAX)
+		return FLOW_DISSECT_RET_OUT_GOOD;
+
 	hdr = __skb_header_pointer(skb, nhoff, sizeof(_hdr), data,
 				   hlen, &_hdr);
 	if (!hdr)
 		return FLOW_DISSECT_RET_OUT_BAD;
 
-	entry = ntohl(hdr[0].entry);
+	entry = ntohl(hdr->entry);
 	label = (entry & MPLS_LS_LABEL_MASK) >> MPLS_LS_LABEL_SHIFT;
+	bos = (entry & MPLS_LS_S_MASK) >> MPLS_LS_S_SHIFT;
 
 	if (dissector_uses_key(flow_dissector, FLOW_DISSECTOR_KEY_MPLS)) {
 		struct flow_dissector_key_mpls *key_mpls;
+		struct flow_dissector_mpls_lse *lse;
 
 		key_mpls = skb_flow_dissector_target(flow_dissector,
 						     FLOW_DISSECTOR_KEY_MPLS,
 						     target_container);
-		key_mpls->mpls_label = label;
-		key_mpls->mpls_ttl = (entry & MPLS_LS_TTL_MASK)
-					>> MPLS_LS_TTL_SHIFT;
-		key_mpls->mpls_tc = (entry & MPLS_LS_TC_MASK)
-					>> MPLS_LS_TC_SHIFT;
-		key_mpls->mpls_bos = (entry & MPLS_LS_S_MASK)
-					>> MPLS_LS_S_SHIFT;
+		lse = &key_mpls->ls[lse_index];
+
+		lse->mpls_ttl = (entry & MPLS_LS_TTL_MASK) >> MPLS_LS_TTL_SHIFT;
+		lse->mpls_bos = bos;
+		lse->mpls_tc = (entry & MPLS_LS_TC_MASK) >> MPLS_LS_TC_SHIFT;
+		lse->mpls_label = label;
+		dissector_set_mpls_lse(key_mpls, lse_index);
 	}
 
-	if (label == MPLS_LABEL_ENTROPY) {
+	if (*entropy_label &&
+	    dissector_uses_key(flow_dissector,
+			       FLOW_DISSECTOR_KEY_MPLS_ENTROPY)) {
+		struct flow_dissector_key_keyid *key_keyid;
+
 		key_keyid = skb_flow_dissector_target(flow_dissector,
 						      FLOW_DISSECTOR_KEY_MPLS_ENTROPY,
 						      target_container);
-		key_keyid->keyid = hdr[1].entry & htonl(MPLS_LS_LABEL_MASK);
+		key_keyid->keyid = cpu_to_be32(label);
 	}
-	return FLOW_DISSECT_RET_OUT_GOOD;
+
+	*entropy_label = label == MPLS_LABEL_ENTROPY;
+
+	return bos ? FLOW_DISSECT_RET_OUT_GOOD : FLOW_DISSECT_RET_PROTO_AGAIN;
 }
 
 static enum flow_dissect_ret
@@ -979,6 +991,8 @@ bool __skb_flow_dissect(const struct net *net,
 	struct bpf_prog *attached = NULL;
 	enum flow_dissect_ret fdret;
 	enum flow_dissector_key_id dissector_vlan = FLOW_DISSECTOR_KEY_MAX;
+	bool mpls_el = false;
+	int mpls_lse = 0;
 	int num_hdrs = 0;
 	u8 ip_proto = 0;
 	bool ret;
@@ -1278,7 +1292,10 @@ proto_again:
 	case htons(ETH_P_MPLS_MC):
 		fdret = __skb_flow_dissect_mpls(skb, flow_dissector,
 						target_container, data,
-						nhoff, hlen);
+						nhoff, hlen, mpls_lse,
+						&mpls_el);
+		nhoff += sizeof(struct mpls_label);
+		mpls_lse++;
 		break;
 	case htons(ETH_P_FCOE):
 		if ((hlen - nhoff) < FCOE_HEADER_LEN) {
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 0c574700da75..f524afe0b7f5 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -781,9 +781,17 @@ static int fl_set_key_mpls(struct nlattr **tb,
 			   struct flow_dissector_key_mpls *key_mask,
 			   struct netlink_ext_ack *extack)
 {
+	struct flow_dissector_mpls_lse *lse_mask;
+	struct flow_dissector_mpls_lse *lse_val;
+
+	lse_val = &key_val->ls[0];
+	lse_mask = &key_mask->ls[0];
+
 	if (tb[TCA_FLOWER_KEY_MPLS_TTL]) {
-		key_val->mpls_ttl = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_TTL]);
-		key_mask->mpls_ttl = MPLS_TTL_MASK;
+		lse_val->mpls_ttl = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_TTL]);
+		lse_mask->mpls_ttl = MPLS_TTL_MASK;
+		dissector_set_mpls_lse(key_val, 0);
+		dissector_set_mpls_lse(key_mask, 0);
 	}
 	if (tb[TCA_FLOWER_KEY_MPLS_BOS]) {
 		u8 bos = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_BOS]);
@@ -794,8 +802,10 @@ static int fl_set_key_mpls(struct nlattr **tb,
 					    "Bottom Of Stack (BOS) must be 0 or 1");
 			return -EINVAL;
 		}
-		key_val->mpls_bos = bos;
-		key_mask->mpls_bos = MPLS_BOS_MASK;
+		lse_val->mpls_bos = bos;
+		lse_mask->mpls_bos = MPLS_BOS_MASK;
+		dissector_set_mpls_lse(key_val, 0);
+		dissector_set_mpls_lse(key_mask, 0);
 	}
 	if (tb[TCA_FLOWER_KEY_MPLS_TC]) {
 		u8 tc = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_TC]);
@@ -806,8 +816,10 @@ static int fl_set_key_mpls(struct nlattr **tb,
 					    "Traffic Class (TC) must be between 0 and 7");
 			return -EINVAL;
 		}
-		key_val->mpls_tc = tc;
-		key_mask->mpls_tc = MPLS_TC_MASK;
+		lse_val->mpls_tc = tc;
+		lse_mask->mpls_tc = MPLS_TC_MASK;
+		dissector_set_mpls_lse(key_val, 0);
+		dissector_set_mpls_lse(key_mask, 0);
 	}
 	if (tb[TCA_FLOWER_KEY_MPLS_LABEL]) {
 		u32 label = nla_get_u32(tb[TCA_FLOWER_KEY_MPLS_LABEL]);
@@ -818,8 +830,10 @@ static int fl_set_key_mpls(struct nlattr **tb,
 					    "Label must be between 0 and 1048575");
 			return -EINVAL;
 		}
-		key_val->mpls_label = label;
-		key_mask->mpls_label = MPLS_LABEL_MASK;
+		lse_val->mpls_label = label;
+		lse_mask->mpls_label = MPLS_LABEL_MASK;
+		dissector_set_mpls_lse(key_val, 0);
+		dissector_set_mpls_lse(key_mask, 0);
 	}
 	return 0;
 }
@@ -2222,31 +2236,37 @@ static int fl_dump_key_mpls(struct sk_buff *skb,
 			    struct flow_dissector_key_mpls *mpls_key,
 			    struct flow_dissector_key_mpls *mpls_mask)
 {
+	struct flow_dissector_mpls_lse *lse_mask;
+	struct flow_dissector_mpls_lse *lse_key;
 	int err;
 
 	if (!memchr_inv(mpls_mask, 0, sizeof(*mpls_mask)))
 		return 0;
-	if (mpls_mask->mpls_ttl) {
+
+	lse_mask = &mpls_mask->ls[0];
+	lse_key = &mpls_key->ls[0];
+
+	if (lse_mask->mpls_ttl) {
 		err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_TTL,
-				 mpls_key->mpls_ttl);
+				 lse_key->mpls_ttl);
 		if (err)
 			return err;
 	}
-	if (mpls_mask->mpls_tc) {
+	if (lse_mask->mpls_tc) {
 		err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_TC,
-				 mpls_key->mpls_tc);
+				 lse_key->mpls_tc);
 		if (err)
 			return err;
 	}
-	if (mpls_mask->mpls_label) {
+	if (lse_mask->mpls_label) {
 		err = nla_put_u32(skb, TCA_FLOWER_KEY_MPLS_LABEL,
-				  mpls_key->mpls_label);
+				  lse_key->mpls_label);
 		if (err)
 			return err;
 	}
-	if (mpls_mask->mpls_bos) {
+	if (lse_mask->mpls_bos) {
 		err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_BOS,
-				 mpls_key->mpls_bos);
+				 lse_key->mpls_bos);
 		if (err)
 			return err;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 61aec25a6db5d0c2e8ab5da6d2d152269d0d9d69 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Tue, 26 May 2020 14:29:04 +0200
Subject: cls_flower: Support filtering on multiple MPLS Label Stack Entries

With struct flow_dissector_key_mpls now recording the first
FLOW_DIS_MPLS_MAX labels, we can extend Flower to filter on any of
these LSEs independently.

In order to avoid creating new netlink attributes for every possible
depth, let's define a new TCA_FLOWER_KEY_MPLS_OPTS nested attribute
that contains the list of LSEs to match. Each LSE is represented by
another attribute, TCA_FLOWER_KEY_MPLS_OPTS_LSE, which then contains
the attributes representing the depth and the MPLS fields to match at
this depth (label, TTL, etc.).

For each MPLS field, the mask is always set to all-ones, as this is
what the original API did. We could allow user configurable masks in
the future if there is demand for more flexibility.

The new API also allows to only specify an LSE depth. In that case,
Flower only verifies that the MPLS label stack depth is greater or
equal to the provided depth (that is, an LSE exists at this depth).

Filters that only match on one (or more) fields of the first LSE are
dumped using the old netlink attributes, to avoid confusing user space
programs that don't understand the new API.

Signed-off-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/pkt_cls.h |  23 ++++
 net/sched/cls_flower.c       | 243 ++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 265 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/pkt_cls.h b/include/uapi/linux/pkt_cls.h
index fc672b232437..7576209d96f9 100644
--- a/include/uapi/linux/pkt_cls.h
+++ b/include/uapi/linux/pkt_cls.h
@@ -576,6 +576,8 @@ enum {
 	TCA_FLOWER_KEY_CT_LABELS,	/* u128 */
 	TCA_FLOWER_KEY_CT_LABELS_MASK,	/* u128 */
 
+	TCA_FLOWER_KEY_MPLS_OPTS,
+
 	__TCA_FLOWER_MAX,
 };
 
@@ -640,6 +642,27 @@ enum {
 #define TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX \
 		(__TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX - 1)
 
+enum {
+	TCA_FLOWER_KEY_MPLS_OPTS_UNSPEC,
+	TCA_FLOWER_KEY_MPLS_OPTS_LSE,
+	__TCA_FLOWER_KEY_MPLS_OPTS_MAX,
+};
+
+#define TCA_FLOWER_KEY_MPLS_OPTS_MAX (__TCA_FLOWER_KEY_MPLS_OPTS_MAX - 1)
+
+enum {
+	TCA_FLOWER_KEY_MPLS_OPT_LSE_UNSPEC,
+	TCA_FLOWER_KEY_MPLS_OPT_LSE_DEPTH,
+	TCA_FLOWER_KEY_MPLS_OPT_LSE_TTL,
+	TCA_FLOWER_KEY_MPLS_OPT_LSE_BOS,
+	TCA_FLOWER_KEY_MPLS_OPT_LSE_TC,
+	TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL,
+	__TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX,
+};
+
+#define TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX \
+		(__TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX - 1)
+
 enum {
 	TCA_FLOWER_KEY_FLAGS_IS_FRAGMENT = (1 << 0),
 	TCA_FLOWER_KEY_FLAGS_FRAG_IS_FIRST = (1 << 1),
diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index f524afe0b7f5..96f5999281e0 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -668,6 +668,7 @@ static const struct nla_policy fl_policy[TCA_FLOWER_MAX + 1] = {
 	[TCA_FLOWER_KEY_MPLS_BOS]	= { .type = NLA_U8 },
 	[TCA_FLOWER_KEY_MPLS_TC]	= { .type = NLA_U8 },
 	[TCA_FLOWER_KEY_MPLS_LABEL]	= { .type = NLA_U32 },
+	[TCA_FLOWER_KEY_MPLS_OPTS]	= { .type = NLA_NESTED },
 	[TCA_FLOWER_KEY_TCP_FLAGS]	= { .type = NLA_U16 },
 	[TCA_FLOWER_KEY_TCP_FLAGS_MASK]	= { .type = NLA_U16 },
 	[TCA_FLOWER_KEY_IP_TOS]		= { .type = NLA_U8 },
@@ -726,6 +727,20 @@ erspan_opt_policy[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX + 1] = {
 	[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_HWID]       = { .type = NLA_U8 },
 };
 
+static const struct nla_policy
+mpls_opts_policy[TCA_FLOWER_KEY_MPLS_OPTS_MAX + 1] = {
+	[TCA_FLOWER_KEY_MPLS_OPTS_LSE]    = { .type = NLA_NESTED },
+};
+
+static const struct nla_policy
+mpls_stack_entry_policy[TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX + 1] = {
+	[TCA_FLOWER_KEY_MPLS_OPT_LSE_DEPTH]    = { .type = NLA_U8 },
+	[TCA_FLOWER_KEY_MPLS_OPT_LSE_TTL]      = { .type = NLA_U8 },
+	[TCA_FLOWER_KEY_MPLS_OPT_LSE_BOS]      = { .type = NLA_U8 },
+	[TCA_FLOWER_KEY_MPLS_OPT_LSE_TC]       = { .type = NLA_U8 },
+	[TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL]    = { .type = NLA_U32 },
+};
+
 static void fl_set_key_val(struct nlattr **tb,
 			   void *val, int val_type,
 			   void *mask, int mask_type, int len)
@@ -776,6 +791,126 @@ static int fl_set_key_port_range(struct nlattr **tb, struct fl_flow_key *key,
 	return 0;
 }
 
+static int fl_set_key_mpls_lse(const struct nlattr *nla_lse,
+			       struct flow_dissector_key_mpls *key_val,
+			       struct flow_dissector_key_mpls *key_mask,
+			       struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX + 1];
+	struct flow_dissector_mpls_lse *lse_mask;
+	struct flow_dissector_mpls_lse *lse_val;
+	u8 lse_index;
+	u8 depth;
+	int err;
+
+	err = nla_parse_nested(tb, TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX, nla_lse,
+			       mpls_stack_entry_policy, extack);
+	if (err < 0)
+		return err;
+
+	if (!tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_DEPTH]) {
+		NL_SET_ERR_MSG(extack, "Missing MPLS option \"depth\"");
+		return -EINVAL;
+	}
+
+	depth = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_DEPTH]);
+
+	/* LSE depth starts at 1, for consistency with terminology used by
+	 * RFC 3031 (section 3.9), where depth 0 refers to unlabeled packets.
+	 */
+	if (depth < 1 || depth > FLOW_DIS_MPLS_MAX) {
+		NL_SET_ERR_MSG_ATTR(extack,
+				    tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_DEPTH],
+				    "Invalid MPLS depth");
+		return -EINVAL;
+	}
+	lse_index = depth - 1;
+
+	dissector_set_mpls_lse(key_val, lse_index);
+	dissector_set_mpls_lse(key_mask, lse_index);
+
+	lse_val = &key_val->ls[lse_index];
+	lse_mask = &key_mask->ls[lse_index];
+
+	if (tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_TTL]) {
+		lse_val->mpls_ttl = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_TTL]);
+		lse_mask->mpls_ttl = MPLS_TTL_MASK;
+	}
+	if (tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_BOS]) {
+		u8 bos = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_BOS]);
+
+		if (bos & ~MPLS_BOS_MASK) {
+			NL_SET_ERR_MSG_ATTR(extack,
+					    tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_BOS],
+					    "Bottom Of Stack (BOS) must be 0 or 1");
+			return -EINVAL;
+		}
+		lse_val->mpls_bos = bos;
+		lse_mask->mpls_bos = MPLS_BOS_MASK;
+	}
+	if (tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_TC]) {
+		u8 tc = nla_get_u8(tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_TC]);
+
+		if (tc & ~MPLS_TC_MASK) {
+			NL_SET_ERR_MSG_ATTR(extack,
+					    tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_TC],
+					    "Traffic Class (TC) must be between 0 and 7");
+			return -EINVAL;
+		}
+		lse_val->mpls_tc = tc;
+		lse_mask->mpls_tc = MPLS_TC_MASK;
+	}
+	if (tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL]) {
+		u32 label = nla_get_u32(tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL]);
+
+		if (label & ~MPLS_LABEL_MASK) {
+			NL_SET_ERR_MSG_ATTR(extack,
+					    tb[TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL],
+					    "Label must be between 0 and 1048575");
+			return -EINVAL;
+		}
+		lse_val->mpls_label = label;
+		lse_mask->mpls_label = MPLS_LABEL_MASK;
+	}
+
+	return 0;
+}
+
+static int fl_set_key_mpls_opts(const struct nlattr *nla_mpls_opts,
+				struct flow_dissector_key_mpls *key_val,
+				struct flow_dissector_key_mpls *key_mask,
+				struct netlink_ext_ack *extack)
+{
+	struct nlattr *nla_lse;
+	int rem;
+	int err;
+
+	if (!(nla_mpls_opts->nla_type & NLA_F_NESTED)) {
+		NL_SET_ERR_MSG_ATTR(extack, nla_mpls_opts,
+				    "NLA_F_NESTED is missing");
+		return -EINVAL;
+	}
+
+	nla_for_each_nested(nla_lse, nla_mpls_opts, rem) {
+		if (nla_type(nla_lse) != TCA_FLOWER_KEY_MPLS_OPTS_LSE) {
+			NL_SET_ERR_MSG_ATTR(extack, nla_lse,
+					    "Invalid MPLS option type");
+			return -EINVAL;
+		}
+
+		err = fl_set_key_mpls_lse(nla_lse, key_val, key_mask, extack);
+		if (err < 0)
+			return err;
+	}
+	if (rem) {
+		NL_SET_ERR_MSG(extack,
+			       "Bytes leftover after parsing MPLS options");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 static int fl_set_key_mpls(struct nlattr **tb,
 			   struct flow_dissector_key_mpls *key_val,
 			   struct flow_dissector_key_mpls *key_mask,
@@ -784,6 +919,21 @@ static int fl_set_key_mpls(struct nlattr **tb,
 	struct flow_dissector_mpls_lse *lse_mask;
 	struct flow_dissector_mpls_lse *lse_val;
 
+	if (tb[TCA_FLOWER_KEY_MPLS_OPTS]) {
+		if (tb[TCA_FLOWER_KEY_MPLS_TTL] ||
+		    tb[TCA_FLOWER_KEY_MPLS_BOS] ||
+		    tb[TCA_FLOWER_KEY_MPLS_TC] ||
+		    tb[TCA_FLOWER_KEY_MPLS_LABEL]) {
+			NL_SET_ERR_MSG_ATTR(extack,
+					    tb[TCA_FLOWER_KEY_MPLS_OPTS],
+					    "MPLS label, Traffic Class, Bottom Of Stack and Time To Live must be encapsulated in the MPLS options attribute");
+			return -EBADMSG;
+		}
+
+		return fl_set_key_mpls_opts(tb[TCA_FLOWER_KEY_MPLS_OPTS],
+					    key_val, key_mask, extack);
+	}
+
 	lse_val = &key_val->ls[0];
 	lse_mask = &key_mask->ls[0];
 
@@ -2232,6 +2382,89 @@ static int fl_dump_key_port_range(struct sk_buff *skb, struct fl_flow_key *key,
 	return 0;
 }
 
+static int fl_dump_key_mpls_opt_lse(struct sk_buff *skb,
+				    struct flow_dissector_key_mpls *mpls_key,
+				    struct flow_dissector_key_mpls *mpls_mask,
+				    u8 lse_index)
+{
+	struct flow_dissector_mpls_lse *lse_mask = &mpls_mask->ls[lse_index];
+	struct flow_dissector_mpls_lse *lse_key = &mpls_key->ls[lse_index];
+	int err;
+
+	err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_OPT_LSE_DEPTH,
+			 lse_index + 1);
+	if (err)
+		return err;
+
+	if (lse_mask->mpls_ttl) {
+		err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_OPT_LSE_TTL,
+				 lse_key->mpls_ttl);
+		if (err)
+			return err;
+	}
+	if (lse_mask->mpls_bos) {
+		err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_OPT_LSE_BOS,
+				 lse_key->mpls_bos);
+		if (err)
+			return err;
+	}
+	if (lse_mask->mpls_tc) {
+		err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_OPT_LSE_TC,
+				 lse_key->mpls_tc);
+		if (err)
+			return err;
+	}
+	if (lse_mask->mpls_label) {
+		err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_OPT_LSE_LABEL,
+				 lse_key->mpls_label);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int fl_dump_key_mpls_opts(struct sk_buff *skb,
+				 struct flow_dissector_key_mpls *mpls_key,
+				 struct flow_dissector_key_mpls *mpls_mask)
+{
+	struct nlattr *opts;
+	struct nlattr *lse;
+	u8 lse_index;
+	int err;
+
+	opts = nla_nest_start(skb, TCA_FLOWER_KEY_MPLS_OPTS);
+	if (!opts)
+		return -EMSGSIZE;
+
+	for (lse_index = 0; lse_index < FLOW_DIS_MPLS_MAX; lse_index++) {
+		if (!(mpls_mask->used_lses & 1 << lse_index))
+			continue;
+
+		lse = nla_nest_start(skb, TCA_FLOWER_KEY_MPLS_OPTS_LSE);
+		if (!lse) {
+			err = -EMSGSIZE;
+			goto err_opts;
+		}
+
+		err = fl_dump_key_mpls_opt_lse(skb, mpls_key, mpls_mask,
+					       lse_index);
+		if (err)
+			goto err_opts_lse;
+		nla_nest_end(skb, lse);
+	}
+	nla_nest_end(skb, opts);
+
+	return 0;
+
+err_opts_lse:
+	nla_nest_cancel(skb, lse);
+err_opts:
+	nla_nest_cancel(skb, opts);
+
+	return err;
+}
+
 static int fl_dump_key_mpls(struct sk_buff *skb,
 			    struct flow_dissector_key_mpls *mpls_key,
 			    struct flow_dissector_key_mpls *mpls_mask)
@@ -2240,12 +2473,20 @@ static int fl_dump_key_mpls(struct sk_buff *skb,
 	struct flow_dissector_mpls_lse *lse_key;
 	int err;
 
-	if (!memchr_inv(mpls_mask, 0, sizeof(*mpls_mask)))
+	if (!mpls_mask->used_lses)
 		return 0;
 
 	lse_mask = &mpls_mask->ls[0];
 	lse_key = &mpls_key->ls[0];
 
+	/* For backward compatibility, don't use the MPLS nested attributes if
+	 * the rule can be expressed using the old attributes.
+	 */
+	if (mpls_mask->used_lses & ~1 ||
+	    (!lse_mask->mpls_ttl && !lse_mask->mpls_bos &&
+	     !lse_mask->mpls_tc && !lse_mask->mpls_label))
+		return fl_dump_key_mpls_opts(skb, mpls_key, mpls_mask);
+
 	if (lse_mask->mpls_ttl) {
 		err = nla_put_u8(skb, TCA_FLOWER_KEY_MPLS_TTL,
 				 lse_key->mpls_ttl);
-- 
cgit v1.2.3-59-g8ed1b


From 90ce665c6a40dc1be771bf5f86e624c0acf3a76f Mon Sep 17 00:00:00 2001
From: Russell King <rmk+kernel@armlinux.org.uk>
Date: Tue, 26 May 2020 16:29:36 +0100
Subject: net: mdiobus: add clause 45 mdiobus accessors

There is a recurring pattern throughout some of the PHY code converting
a devad and regnum to our packed clause 45 representation. Rather than
having this scattered around the code, let's put a common translation
function in mdio.h, and provide some register accessors.

Convert the phylib core, phylink, bcm87xx and cortina to use these.

Signed-off-by: Russell King <rmk+kernel@armlinux.org.uk>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/bcm87xx.c    |  2 +-
 drivers/net/phy/cortina.c    |  3 +--
 drivers/net/phy/phy-core.c   | 11 ++++-------
 drivers/net/phy/phy.c        |  4 ++--
 drivers/net/phy/phy_device.c | 20 ++++++++------------
 drivers/net/phy/phylink.c    | 11 +++++------
 include/linux/mdio.h         | 31 +++++++++++++++++++++++++++++++
 include/linux/phy.h          |  6 ------
 8 files changed, 52 insertions(+), 36 deletions(-)

diff --git a/drivers/net/phy/bcm87xx.c b/drivers/net/phy/bcm87xx.c
index f6dce6850850..df360e1c5069 100644
--- a/drivers/net/phy/bcm87xx.c
+++ b/drivers/net/phy/bcm87xx.c
@@ -55,7 +55,7 @@ static int bcm87xx_of_reg_init(struct phy_device *phydev)
 		u16 mask	= be32_to_cpup(paddr++);
 		u16 val_bits	= be32_to_cpup(paddr++);
 		int val;
-		u32 regnum = MII_ADDR_C45 | (devid << 16) | reg;
+		u32 regnum = mdiobus_c45_addr(devid, reg);
 		val = 0;
 		if (mask) {
 			val = phy_read(phydev, regnum);
diff --git a/drivers/net/phy/cortina.c b/drivers/net/phy/cortina.c
index aac51362c0fe..40514a94e6ff 100644
--- a/drivers/net/phy/cortina.c
+++ b/drivers/net/phy/cortina.c
@@ -17,8 +17,7 @@
 
 static int cortina_read_reg(struct phy_device *phydev, u16 regnum)
 {
-	return mdiobus_read(phydev->mdio.bus, phydev->mdio.addr,
-			    MII_ADDR_C45 | regnum);
+	return mdiobus_c45_read(phydev->mdio.bus, phydev->mdio.addr, 0, regnum);
 }
 
 static int cortina_read_status(struct phy_device *phydev)
diff --git a/drivers/net/phy/phy-core.c b/drivers/net/phy/phy-core.c
index 66b8c61ca74c..46bd68e9ecfa 100644
--- a/drivers/net/phy/phy-core.c
+++ b/drivers/net/phy/phy-core.c
@@ -428,9 +428,8 @@ int __phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum)
 	if (phydev->drv && phydev->drv->read_mmd) {
 		val = phydev->drv->read_mmd(phydev, devad, regnum);
 	} else if (phydev->is_c45) {
-		u32 addr = MII_ADDR_C45 | (devad << 16) | (regnum & 0xffff);
-
-		val = __mdiobus_read(phydev->mdio.bus, phydev->mdio.addr, addr);
+		val = __mdiobus_c45_read(phydev->mdio.bus, phydev->mdio.addr,
+					 devad, regnum);
 	} else {
 		struct mii_bus *bus = phydev->mdio.bus;
 		int phy_addr = phydev->mdio.addr;
@@ -485,10 +484,8 @@ int __phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val)
 	if (phydev->drv && phydev->drv->write_mmd) {
 		ret = phydev->drv->write_mmd(phydev, devad, regnum, val);
 	} else if (phydev->is_c45) {
-		u32 addr = MII_ADDR_C45 | (devad << 16) | (regnum & 0xffff);
-
-		ret = __mdiobus_write(phydev->mdio.bus, phydev->mdio.addr,
-				      addr, val);
+		ret = __mdiobus_c45_write(phydev->mdio.bus, phydev->mdio.addr,
+					  devad, regnum, val);
 	} else {
 		struct mii_bus *bus = phydev->mdio.bus;
 		int phy_addr = phydev->mdio.addr;
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index d584701187db..27da0c94818f 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -361,7 +361,7 @@ int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd)
 		if (mdio_phy_id_is_c45(mii_data->phy_id)) {
 			prtad = mdio_phy_id_prtad(mii_data->phy_id);
 			devad = mdio_phy_id_devad(mii_data->phy_id);
-			devad = MII_ADDR_C45 | devad << 16 | mii_data->reg_num;
+			devad = mdiobus_c45_addr(devad, mii_data->reg_num);
 		} else {
 			prtad = mii_data->phy_id;
 			devad = mii_data->reg_num;
@@ -374,7 +374,7 @@ int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd)
 		if (mdio_phy_id_is_c45(mii_data->phy_id)) {
 			prtad = mdio_phy_id_prtad(mii_data->phy_id);
 			devad = mdio_phy_id_devad(mii_data->phy_id);
-			devad = MII_ADDR_C45 | devad << 16 | mii_data->reg_num;
+			devad = mdiobus_c45_addr(devad, mii_data->reg_num);
 		} else {
 			prtad = mii_data->phy_id;
 			devad = mii_data->reg_num;
diff --git a/drivers/net/phy/phy_device.c b/drivers/net/phy/phy_device.c
index 6b30d205642f..04946de74fa0 100644
--- a/drivers/net/phy/phy_device.c
+++ b/drivers/net/phy/phy_device.c
@@ -675,16 +675,14 @@ EXPORT_SYMBOL(phy_device_create);
 static int get_phy_c45_devs_in_pkg(struct mii_bus *bus, int addr, int dev_addr,
 				   u32 *devices_in_package)
 {
-	int phy_reg, reg_addr;
+	int phy_reg;
 
-	reg_addr = MII_ADDR_C45 | dev_addr << 16 | MDIO_DEVS2;
-	phy_reg = mdiobus_read(bus, addr, reg_addr);
+	phy_reg = mdiobus_c45_read(bus, addr, dev_addr, MDIO_DEVS2);
 	if (phy_reg < 0)
 		return -EIO;
 	*devices_in_package = phy_reg << 16;
 
-	reg_addr = MII_ADDR_C45 | dev_addr << 16 | MDIO_DEVS1;
-	phy_reg = mdiobus_read(bus, addr, reg_addr);
+	phy_reg = mdiobus_c45_read(bus, addr, dev_addr, MDIO_DEVS1);
 	if (phy_reg < 0)
 		return -EIO;
 	*devices_in_package |= phy_reg;
@@ -709,11 +707,11 @@ static int get_phy_c45_devs_in_pkg(struct mii_bus *bus, int addr, int dev_addr,
  *
  */
 static int get_phy_c45_ids(struct mii_bus *bus, int addr, u32 *phy_id,
-			   struct phy_c45_device_ids *c45_ids) {
-	int phy_reg;
-	int i, reg_addr;
+			   struct phy_c45_device_ids *c45_ids)
+{
 	const int num_ids = ARRAY_SIZE(c45_ids->device_ids);
 	u32 *devs = &c45_ids->devices_in_package;
+	int i, phy_reg;
 
 	/* Find first non-zero Devices In package. Device zero is reserved
 	 * for 802.3 c45 complied PHYs, so don't probe it at first.
@@ -747,14 +745,12 @@ static int get_phy_c45_ids(struct mii_bus *bus, int addr, u32 *phy_id,
 		if (!(c45_ids->devices_in_package & (1 << i)))
 			continue;
 
-		reg_addr = MII_ADDR_C45 | i << 16 | MII_PHYSID1;
-		phy_reg = mdiobus_read(bus, addr, reg_addr);
+		phy_reg = mdiobus_c45_read(bus, addr, i, MII_PHYSID1);
 		if (phy_reg < 0)
 			return -EIO;
 		c45_ids->device_ids[i] = phy_reg << 16;
 
-		reg_addr = MII_ADDR_C45 | i << 16 | MII_PHYSID2;
-		phy_reg = mdiobus_read(bus, addr, reg_addr);
+		phy_reg = mdiobus_c45_read(bus, addr, i, MII_PHYSID2);
 		if (phy_reg < 0)
 			return -EIO;
 		c45_ids->device_ids[i] |= phy_reg;
diff --git a/drivers/net/phy/phylink.c b/drivers/net/phy/phylink.c
index b6b1f77bba58..0ab65fb75258 100644
--- a/drivers/net/phy/phylink.c
+++ b/drivers/net/phy/phylink.c
@@ -1631,7 +1631,7 @@ static int phylink_phy_read(struct phylink *pl, unsigned int phy_id,
 	if (mdio_phy_id_is_c45(phy_id)) {
 		prtad = mdio_phy_id_prtad(phy_id);
 		devad = mdio_phy_id_devad(phy_id);
-		devad = MII_ADDR_C45 | devad << 16 | reg;
+		devad = mdiobus_c45_addr(devad, reg);
 	} else if (phydev->is_c45) {
 		switch (reg) {
 		case MII_BMCR:
@@ -1654,7 +1654,7 @@ static int phylink_phy_read(struct phylink *pl, unsigned int phy_id,
 			return -EINVAL;
 		}
 		prtad = phy_id;
-		devad = MII_ADDR_C45 | devad << 16 | reg;
+		devad = mdiobus_c45_addr(devad, reg);
 	} else {
 		prtad = phy_id;
 		devad = reg;
@@ -1671,7 +1671,7 @@ static int phylink_phy_write(struct phylink *pl, unsigned int phy_id,
 	if (mdio_phy_id_is_c45(phy_id)) {
 		prtad = mdio_phy_id_prtad(phy_id);
 		devad = mdio_phy_id_devad(phy_id);
-		devad = MII_ADDR_C45 | devad << 16 | reg;
+		devad = mdiobus_c45_addr(devad, reg);
 	} else if (phydev->is_c45) {
 		switch (reg) {
 		case MII_BMCR:
@@ -1694,7 +1694,7 @@ static int phylink_phy_write(struct phylink *pl, unsigned int phy_id,
 			return -EINVAL;
 		}
 		prtad = phy_id;
-		devad = MII_ADDR_C45 | devad << 16 | reg;
+		devad = mdiobus_c45_addr(devad, reg);
 	} else {
 		prtad = phy_id;
 		devad = reg;
@@ -2292,7 +2292,6 @@ void phylink_mii_c22_pcs_an_restart(struct mdio_device *pcs)
 }
 EXPORT_SYMBOL_GPL(phylink_mii_c22_pcs_an_restart);
 
-#define C45_ADDR(d,a)	(MII_ADDR_C45 | (d) << 16 | (a))
 void phylink_mii_c45_pcs_get_state(struct mdio_device *pcs,
 				   struct phylink_link_state *state)
 {
@@ -2300,7 +2299,7 @@ void phylink_mii_c45_pcs_get_state(struct mdio_device *pcs,
 	int addr = pcs->addr;
 	int stat;
 
-	stat = mdiobus_read(bus, addr, C45_ADDR(MDIO_MMD_PCS, MDIO_STAT1));
+	stat = mdiobus_c45_read(bus, addr, MDIO_MMD_PCS, MDIO_STAT1);
 	if (stat < 0) {
 		state->link = false;
 		return;
diff --git a/include/linux/mdio.h b/include/linux/mdio.h
index 917e4bb2ed71..36d2e0673d03 100644
--- a/include/linux/mdio.h
+++ b/include/linux/mdio.h
@@ -9,6 +9,13 @@
 #include <uapi/linux/mdio.h>
 #include <linux/mod_devicetable.h>
 
+/* Or MII_ADDR_C45 into regnum for read/write on mii_bus to enable the 21 bit
+ * IEEE 802.3ae clause 45 addressing mode used by 10GIGE phy chips.
+ */
+#define MII_ADDR_C45		(1<<30)
+#define MII_DEVADDR_C45_SHIFT	16
+#define MII_REGADDR_C45_MASK	GENMASK(15, 0)
+
 struct gpio_desc;
 struct mii_bus;
 
@@ -326,6 +333,30 @@ int mdiobus_write_nested(struct mii_bus *bus, int addr, u32 regnum, u16 val);
 int mdiobus_modify(struct mii_bus *bus, int addr, u32 regnum, u16 mask,
 		   u16 set);
 
+static inline u32 mdiobus_c45_addr(int devad, u16 regnum)
+{
+	return MII_ADDR_C45 | devad << MII_DEVADDR_C45_SHIFT | regnum;
+}
+
+static inline int __mdiobus_c45_read(struct mii_bus *bus, int prtad, int devad,
+				     u16 regnum)
+{
+	return __mdiobus_read(bus, prtad, mdiobus_c45_addr(devad, regnum));
+}
+
+static inline int __mdiobus_c45_write(struct mii_bus *bus, int prtad, int devad,
+				      u16 regnum, u16 val)
+{
+	return __mdiobus_write(bus, prtad, mdiobus_c45_addr(devad, regnum),
+			       val);
+}
+
+static inline int mdiobus_c45_read(struct mii_bus *bus, int prtad, int devad,
+				   u16 regnum)
+{
+	return mdiobus_read(bus, prtad, mdiobus_c45_addr(devad, regnum));
+}
+
 int mdiobus_register_device(struct mdio_device *mdiodev);
 int mdiobus_unregister_device(struct mdio_device *mdiodev);
 bool mdiobus_is_registered_device(struct mii_bus *bus, int addr);
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 2bcdf19ed3b4..6d256e720a66 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -209,12 +209,6 @@ static inline const char *phy_modes(phy_interface_t interface)
 
 #define MII_BUS_ID_SIZE	61
 
-/* Or MII_ADDR_C45 into regnum for read/write on mii_bus to enable the 21 bit
-   IEEE 802.3ae clause 45 addressing mode used by 10GIGE phy chips. */
-#define MII_ADDR_C45 (1<<30)
-#define MII_DEVADDR_C45_SHIFT	16
-#define MII_REGADDR_C45_MASK	GENMASK(15, 0)
-
 struct device;
 struct phylink;
 struct sfp_bus;
-- 
cgit v1.2.3-59-g8ed1b


From 9513167e6c3343f4ec8e04eb89e9b130eb90e58a Mon Sep 17 00:00:00 2001
From: Antoine Tenart <antoine.tenart@bootlin.com>
Date: Tue, 26 May 2020 18:22:53 +0200
Subject: net: phy: mscc-miim: use more reasonable delays

The MSCC MIIM MDIO driver uses delays to read poll a status register. I
made multiple tests on a Ocelot PCS120 platform which led me to reduce
those delays. The delay in between which the polling function is allowed
to sleep is reduced from 100us to 50us which in almost all cases is a
good value to succeed at the first retry. The overall delay is also
lowered as the prior value was really way to high, 10000us is large
enough.

Signed-off-by: Antoine Tenart <antoine.tenart@bootlin.com>
Reviewed-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio-mscc-miim.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/phy/mdio-mscc-miim.c b/drivers/net/phy/mdio-mscc-miim.c
index badbc99bedd3..0b7544f593fb 100644
--- a/drivers/net/phy/mdio-mscc-miim.c
+++ b/drivers/net/phy/mdio-mscc-miim.c
@@ -44,7 +44,7 @@ static int mscc_miim_wait_ready(struct mii_bus *bus)
 	u32 val;
 
 	readl_poll_timeout(miim->regs + MSCC_MIIM_REG_STATUS, val,
-			   !(val & MSCC_MIIM_STATUS_STAT_BUSY), 100, 250000);
+			   !(val & MSCC_MIIM_STATUS_STAT_BUSY), 50, 10000);
 	if (val & MSCC_MIIM_STATUS_STAT_BUSY)
 		return -ETIMEDOUT;
 
-- 
cgit v1.2.3-59-g8ed1b


From f5112c8ae22f8d5796b10d9f5db0014b3546dd00 Mon Sep 17 00:00:00 2001
From: Antoine Tenart <antoine.tenart@bootlin.com>
Date: Tue, 26 May 2020 18:22:54 +0200
Subject: net: phy: mscc-miim: remove redundant timeout check

readl_poll_timeout already returns -ETIMEDOUT if the condition isn't
satisfied, there's no need to check again the condition after calling
it. Remove the redundant timeout check.

Signed-off-by: Antoine Tenart <antoine.tenart@bootlin.com>
Reviewed-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio-mscc-miim.c | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/net/phy/mdio-mscc-miim.c b/drivers/net/phy/mdio-mscc-miim.c
index 0b7544f593fb..42119f661452 100644
--- a/drivers/net/phy/mdio-mscc-miim.c
+++ b/drivers/net/phy/mdio-mscc-miim.c
@@ -43,12 +43,8 @@ static int mscc_miim_wait_ready(struct mii_bus *bus)
 	struct mscc_miim_dev *miim = bus->priv;
 	u32 val;
 
-	readl_poll_timeout(miim->regs + MSCC_MIIM_REG_STATUS, val,
-			   !(val & MSCC_MIIM_STATUS_STAT_BUSY), 50, 10000);
-	if (val & MSCC_MIIM_STATUS_STAT_BUSY)
-		return -ETIMEDOUT;
-
-	return 0;
+	return readl_poll_timeout(miim->regs + MSCC_MIIM_REG_STATUS, val,
+				  !(val & MSCC_MIIM_STATUS_STAT_BUSY), 50, 10000);
 }
 
 static int mscc_miim_read(struct mii_bus *bus, int mii_id, int regnum)
-- 
cgit v1.2.3-59-g8ed1b


From d9c6de35e051c17474ec8a1fe2fdb8cd2b6f1a87 Mon Sep 17 00:00:00 2001
From: Antoine Tenart <antoine.tenart@bootlin.com>
Date: Tue, 26 May 2020 18:22:55 +0200
Subject: net: phy: mscc-miim: improve waiting logic

The MSCC MIIM MDIO driver uses a waiting logic to wait for the MDIO bus
to be ready to accept next commands. It does so by polling the BUSY
status bit which indicates the MDIO bus has completed all pending
operations. This can take time, and the controller supports writing the
next command as soon as there are no pending commands (which happens
while the MDIO bus is busy completing its current command).

This patch implements this improved logic by adding an helper to poll
the PENDING status bit, and by adjusting where we should wait for the
bus to not be busy or to not be pending.

Signed-off-by: Antoine Tenart <antoine.tenart@bootlin.com>
Reviewed-by: Alexandre Belloni <alexandre.belloni@bootlin.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mdio-mscc-miim.c | 15 +++++++++++++--
 1 file changed, 13 insertions(+), 2 deletions(-)

diff --git a/drivers/net/phy/mdio-mscc-miim.c b/drivers/net/phy/mdio-mscc-miim.c
index 42119f661452..aed9afa1e8f1 100644
--- a/drivers/net/phy/mdio-mscc-miim.c
+++ b/drivers/net/phy/mdio-mscc-miim.c
@@ -16,6 +16,7 @@
 #include <linux/of_mdio.h>
 
 #define MSCC_MIIM_REG_STATUS		0x0
+#define		MSCC_MIIM_STATUS_STAT_PENDING	BIT(2)
 #define		MSCC_MIIM_STATUS_STAT_BUSY	BIT(3)
 #define MSCC_MIIM_REG_CMD		0x8
 #define		MSCC_MIIM_CMD_OPR_WRITE		BIT(1)
@@ -47,13 +48,23 @@ static int mscc_miim_wait_ready(struct mii_bus *bus)
 				  !(val & MSCC_MIIM_STATUS_STAT_BUSY), 50, 10000);
 }
 
+static int mscc_miim_wait_pending(struct mii_bus *bus)
+{
+	struct mscc_miim_dev *miim = bus->priv;
+	u32 val;
+
+	return readl_poll_timeout(miim->regs + MSCC_MIIM_REG_STATUS, val,
+				  !(val & MSCC_MIIM_STATUS_STAT_PENDING),
+				  50, 10000);
+}
+
 static int mscc_miim_read(struct mii_bus *bus, int mii_id, int regnum)
 {
 	struct mscc_miim_dev *miim = bus->priv;
 	u32 val;
 	int ret;
 
-	ret = mscc_miim_wait_ready(bus);
+	ret = mscc_miim_wait_pending(bus);
 	if (ret)
 		goto out;
 
@@ -82,7 +93,7 @@ static int mscc_miim_write(struct mii_bus *bus, int mii_id,
 	struct mscc_miim_dev *miim = bus->priv;
 	int ret;
 
-	ret = mscc_miim_wait_ready(bus);
+	ret = mscc_miim_wait_pending(bus);
 	if (ret < 0)
 		goto out;
 
-- 
cgit v1.2.3-59-g8ed1b


From a021ada2b7a3a79394ed2f476ec7615a184bb488 Mon Sep 17 00:00:00 2001
From: Antoine Tenart <antoine.tenart@bootlin.com>
Date: Tue, 26 May 2020 18:22:56 +0200
Subject: net: phy: mscc-miim: read poll when high resolution timers are
 disabled

The driver uses a read polling mechanism to check the status of the MDIO
bus, to know if it is ready to accept next commands. This polling
mechanism uses usleep_delay() under the hood between reads which is fine
as long as high resolution timers are enabled. Otherwise the delays will
end up to be much longer than expected.

This patch fixes this by using udelay() under the hood when
CONFIG_HIGH_RES_TIMERS isn't enabled. This increases CPU usage.

Signed-off-by: Antoine Tenart <antoine.tenart@bootlin.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/Kconfig          |  3 ++-
 drivers/net/phy/mdio-mscc-miim.c | 22 +++++++++++++++++-----
 2 files changed, 19 insertions(+), 6 deletions(-)

diff --git a/drivers/net/phy/Kconfig b/drivers/net/phy/Kconfig
index 2a32f26ead0b..047c27087b10 100644
--- a/drivers/net/phy/Kconfig
+++ b/drivers/net/phy/Kconfig
@@ -184,7 +184,8 @@ config MDIO_MSCC_MIIM
 	depends on HAS_IOMEM
 	help
 	  This driver supports the MIIM (MDIO) interface found in the network
-	  switches of the Microsemi SoCs
+	  switches of the Microsemi SoCs; it is recommended to switch on
+	  CONFIG_HIGH_RES_TIMERS
 
 config MDIO_MVUSB
 	tristate "Marvell USB to MDIO Adapter"
diff --git a/drivers/net/phy/mdio-mscc-miim.c b/drivers/net/phy/mdio-mscc-miim.c
index aed9afa1e8f1..11f583fd4611 100644
--- a/drivers/net/phy/mdio-mscc-miim.c
+++ b/drivers/net/phy/mdio-mscc-miim.c
@@ -39,13 +39,25 @@ struct mscc_miim_dev {
 	void __iomem *phy_regs;
 };
 
+/* When high resolution timers aren't built-in: we can't use usleep_range() as
+ * we would sleep way too long. Use udelay() instead.
+ */
+#define mscc_readl_poll_timeout(addr, val, cond, delay_us, timeout_us)	\
+({									\
+	if (!IS_ENABLED(CONFIG_HIGH_RES_TIMERS))			\
+		readl_poll_timeout_atomic(addr, val, cond, delay_us,	\
+					  timeout_us);			\
+	readl_poll_timeout(addr, val, cond, delay_us, timeout_us);	\
+})
+
 static int mscc_miim_wait_ready(struct mii_bus *bus)
 {
 	struct mscc_miim_dev *miim = bus->priv;
 	u32 val;
 
-	return readl_poll_timeout(miim->regs + MSCC_MIIM_REG_STATUS, val,
-				  !(val & MSCC_MIIM_STATUS_STAT_BUSY), 50, 10000);
+	return mscc_readl_poll_timeout(miim->regs + MSCC_MIIM_REG_STATUS, val,
+				       !(val & MSCC_MIIM_STATUS_STAT_BUSY), 50,
+				       10000);
 }
 
 static int mscc_miim_wait_pending(struct mii_bus *bus)
@@ -53,9 +65,9 @@ static int mscc_miim_wait_pending(struct mii_bus *bus)
 	struct mscc_miim_dev *miim = bus->priv;
 	u32 val;
 
-	return readl_poll_timeout(miim->regs + MSCC_MIIM_REG_STATUS, val,
-				  !(val & MSCC_MIIM_STATUS_STAT_PENDING),
-				  50, 10000);
+	return mscc_readl_poll_timeout(miim->regs + MSCC_MIIM_REG_STATUS, val,
+				       !(val & MSCC_MIIM_STATUS_STAT_PENDING),
+				       50, 10000);
 }
 
 static int mscc_miim_read(struct mii_bus *bus, int mii_id, int regnum)
-- 
cgit v1.2.3-59-g8ed1b


From 09d0310f07672b7e3e60cf719b96c803f4830e5c Mon Sep 17 00:00:00 2001
From: Dmitry Vyukov <dvyukov@google.com>
Date: Mon, 25 May 2020 17:31:58 +0200
Subject: net/smc: mark smc_pnet_policy as const

Netlink policies are generally declared as const.
This is safer and prevents potential bugs.

Signed-off-by: Dmitry Vyukov <dvyukov@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_pnet.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/smc/smc_pnet.c b/net/smc/smc_pnet.c
index be03f1260d59..014d91b9778e 100644
--- a/net/smc/smc_pnet.c
+++ b/net/smc/smc_pnet.c
@@ -32,7 +32,7 @@
 
 static struct net_device *pnet_find_base_ndev(struct net_device *ndev);
 
-static struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = {
+static const struct nla_policy smc_pnet_policy[SMC_PNETID_MAX + 1] = {
 	[SMC_PNETID_NAME] = {
 		.type = NLA_NUL_STRING,
 		.len = SMC_MAX_PNETID_LEN
-- 
cgit v1.2.3-59-g8ed1b


From 8a448bf832af537d26aa557d183a16943dce4510 Mon Sep 17 00:00:00 2001
From: Fugang Duan <fugang.duan@nxp.com>
Date: Tue, 26 May 2020 00:27:10 +0800
Subject: net: ethernet: fec: move GPR register offset and bit into DT

The commit da722186f654 (net: fec: set GPR bit on suspend by DT
configuration) set the GPR reigster offset and bit in driver for
wake on lan feature.

But it introduces two issues here:
- one SOC has two instances, they have different bit
- different SOCs may have different offset and bit

So to support wake-on-lan feature on other i.MX platforms, it should
configure the GPR reigster offset and bit from DT.

So the patch is to improve the commit da722186f654 (net: fec: set GPR
bit on suspend by DT configuration) to support multiple ethernet
instances on i.MX series.

v2:
 * switch back to store the quirks bitmask in driver_data
v3:
 * suggested by Sascha Hauer, use a struct fec_devinfo for
   abstracting differences between different hardware variants,
   it can give more freedom to describe the differences.

Signed-off-by: Fugang Duan <fugang.duan@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/fec_main.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 2e209142f2d1..4acb91dce5fc 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -88,8 +88,6 @@ static void fec_enet_itr_coal_init(struct net_device *ndev);
 
 struct fec_devinfo {
 	u32 quirks;
-	u8 stop_gpr_reg;
-	u8 stop_gpr_bit;
 };
 
 static const struct fec_devinfo fec_imx25_info = {
@@ -112,8 +110,6 @@ static const struct fec_devinfo fec_imx6q_info = {
 		  FEC_QUIRK_HAS_BUFDESC_EX | FEC_QUIRK_HAS_CSUM |
 		  FEC_QUIRK_HAS_VLAN | FEC_QUIRK_ERR006358 |
 		  FEC_QUIRK_HAS_RACC,
-	.stop_gpr_reg = 0x34,
-	.stop_gpr_bit = 27,
 };
 
 static const struct fec_devinfo fec_mvf600_info = {
@@ -3476,19 +3472,23 @@ static int fec_enet_get_irq_cnt(struct platform_device *pdev)
 }
 
 static int fec_enet_init_stop_mode(struct fec_enet_private *fep,
-				   struct fec_devinfo *dev_info,
 				   struct device_node *np)
 {
 	struct device_node *gpr_np;
+	u32 out_val[3];
 	int ret = 0;
 
-	if (!dev_info)
-		return 0;
-
-	gpr_np = of_parse_phandle(np, "gpr", 0);
+	gpr_np = of_parse_phandle(np, "fsl,stop-mode", 0);
 	if (!gpr_np)
 		return 0;
 
+	ret = of_property_read_u32_array(np, "fsl,stop-mode", out_val,
+					 ARRAY_SIZE(out_val));
+	if (ret) {
+		dev_dbg(&fep->pdev->dev, "no stop mode property\n");
+		return ret;
+	}
+
 	fep->stop_gpr.gpr = syscon_node_to_regmap(gpr_np);
 	if (IS_ERR(fep->stop_gpr.gpr)) {
 		dev_err(&fep->pdev->dev, "could not find gpr regmap\n");
@@ -3497,8 +3497,8 @@ static int fec_enet_init_stop_mode(struct fec_enet_private *fep,
 		goto out;
 	}
 
-	fep->stop_gpr.reg = dev_info->stop_gpr_reg;
-	fep->stop_gpr.bit = dev_info->stop_gpr_bit;
+	fep->stop_gpr.reg = out_val[1];
+	fep->stop_gpr.bit = out_val[2];
 
 out:
 	of_node_put(gpr_np);
@@ -3575,7 +3575,7 @@ fec_probe(struct platform_device *pdev)
 	if (of_get_property(np, "fsl,magic-packet", NULL))
 		fep->wol_flag |= FEC_WOL_HAS_MAGIC_PACKET;
 
-	ret = fec_enet_init_stop_mode(fep, dev_info, np);
+	ret = fec_enet_init_stop_mode(fep, np);
 	if (ret)
 		goto failed_stop_mode;
 
-- 
cgit v1.2.3-59-g8ed1b


From 998ec26b66c0fafdeec07288ef8973726c297478 Mon Sep 17 00:00:00 2001
From: Fugang Duan <fugang.duan@nxp.com>
Date: Tue, 26 May 2020 00:27:11 +0800
Subject: dt-bindings: fec: update the gpr property

- rename the 'gpr' property string to 'fsl,stop-mode'.
- Update the property to define gpr register offset and
bit in DT, since different instance have different gpr bit.

v2:
 * rename 'gpr' property string to 'fsl,stop-mode'.

Signed-off-by: Fugang Duan <fugang.duan@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/devicetree/bindings/net/fsl-fec.txt | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/Documentation/devicetree/bindings/net/fsl-fec.txt b/Documentation/devicetree/bindings/net/fsl-fec.txt
index 26c492a2e0e1..9b543789cd52 100644
--- a/Documentation/devicetree/bindings/net/fsl-fec.txt
+++ b/Documentation/devicetree/bindings/net/fsl-fec.txt
@@ -22,8 +22,11 @@ Optional properties:
 - fsl,err006687-workaround-present: If present indicates that the system has
   the hardware workaround for ERR006687 applied and does not need a software
   workaround.
-- gpr: phandle of SoC general purpose register mode. Required for wake on LAN
-  on some SoCs
+- fsl,stop-mode: register bits of stop mode control, the format is
+		 <&gpr req_gpr req_bit>.
+		 gpr is the phandle to general purpose register node.
+		 req_gpr is the gpr register offset for ENET stop request.
+		 req_bit is the gpr bit offset for ENET stop request.
  -interrupt-names:  names of the interrupts listed in interrupts property in
   the same order. The defaults if not specified are
   __Number of interrupts__   __Default__
-- 
cgit v1.2.3-59-g8ed1b


From d009a621b92d9aea851a6d8df1c4142f423c41e6 Mon Sep 17 00:00:00 2001
From: Fugang Duan <fugang.duan@nxp.com>
Date: Tue, 26 May 2020 00:27:12 +0800
Subject: ARM: dts: imx: add ethernet stop mode property

- Update the imx6qdl gpr property to define gpr register
  offset and bit in DT.
- Add imx6sx/imx6ul/imx7d ethernet stop mode property.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Fugang Duan <fugang.duan@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm/boot/dts/imx6qdl.dtsi | 2 +-
 arch/arm/boot/dts/imx6sx.dtsi  | 2 ++
 arch/arm/boot/dts/imx6ul.dtsi  | 2 ++
 arch/arm/boot/dts/imx7d.dtsi   | 1 +
 arch/arm/boot/dts/imx7s.dtsi   | 1 +
 5 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/arch/arm/boot/dts/imx6qdl.dtsi b/arch/arm/boot/dts/imx6qdl.dtsi
index 98da446aa0f2..48f50161ea21 100644
--- a/arch/arm/boot/dts/imx6qdl.dtsi
+++ b/arch/arm/boot/dts/imx6qdl.dtsi
@@ -1045,7 +1045,7 @@
 					 <&clks IMX6QDL_CLK_ENET>,
 					 <&clks IMX6QDL_CLK_ENET_REF>;
 				clock-names = "ipg", "ahb", "ptp";
-				gpr = <&gpr>;
+				fsl,stop-mode = <&gpr 0x34 27>;
 				status = "disabled";
 			};
 
diff --git a/arch/arm/boot/dts/imx6sx.dtsi b/arch/arm/boot/dts/imx6sx.dtsi
index d6f831731460..09f21aaee936 100644
--- a/arch/arm/boot/dts/imx6sx.dtsi
+++ b/arch/arm/boot/dts/imx6sx.dtsi
@@ -930,6 +930,7 @@
 					      "enet_clk_ref", "enet_out";
 				fsl,num-tx-queues = <3>;
 				fsl,num-rx-queues = <3>;
+				fsl,stop-mode = <&gpr 0x10 3>;
 				status = "disabled";
 			};
 
@@ -1039,6 +1040,7 @@
 					 <&clks IMX6SX_CLK_ENET_PTP>;
 				clock-names = "ipg", "ahb", "ptp",
 					      "enet_clk_ref", "enet_out";
+				fsl,stop-mode = <&gpr 0x10 4>;
 				status = "disabled";
 			};
 
diff --git a/arch/arm/boot/dts/imx6ul.dtsi b/arch/arm/boot/dts/imx6ul.dtsi
index 2ccf67c4ac1a..345ae9b0db37 100644
--- a/arch/arm/boot/dts/imx6ul.dtsi
+++ b/arch/arm/boot/dts/imx6ul.dtsi
@@ -520,6 +520,7 @@
 					      "enet_clk_ref", "enet_out";
 				fsl,num-tx-queues = <1>;
 				fsl,num-rx-queues = <1>;
+				fsl,stop-mode = <&gpr 0x10 4>;
 				status = "disabled";
 			};
 
@@ -856,6 +857,7 @@
 					      "enet_clk_ref", "enet_out";
 				fsl,num-tx-queues = <1>;
 				fsl,num-rx-queues = <1>;
+				fsl,stop-mode = <&gpr 0x10 3>;
 				status = "disabled";
 			};
 
diff --git a/arch/arm/boot/dts/imx7d.dtsi b/arch/arm/boot/dts/imx7d.dtsi
index 4c22828df55f..cff875b80b60 100644
--- a/arch/arm/boot/dts/imx7d.dtsi
+++ b/arch/arm/boot/dts/imx7d.dtsi
@@ -153,6 +153,7 @@
 			"enet_clk_ref", "enet_out";
 		fsl,num-tx-queues = <3>;
 		fsl,num-rx-queues = <3>;
+		fsl,stop-mode = <&gpr 0x10 4>;
 		status = "disabled";
 	};
 
diff --git a/arch/arm/boot/dts/imx7s.dtsi b/arch/arm/boot/dts/imx7s.dtsi
index 76e3ffbbbfbf..5bf0b39fa99b 100644
--- a/arch/arm/boot/dts/imx7s.dtsi
+++ b/arch/arm/boot/dts/imx7s.dtsi
@@ -1190,6 +1190,7 @@
 					"enet_clk_ref", "enet_out";
 				fsl,num-tx-queues = <3>;
 				fsl,num-rx-queues = <3>;
+				fsl,stop-mode = <&gpr 0x10 3>;
 				status = "disabled";
 			};
 		};
-- 
cgit v1.2.3-59-g8ed1b


From f099b8b7531f86d28dbbb8e43534fd79131b62e9 Mon Sep 17 00:00:00 2001
From: Fugang Duan <fugang.duan@nxp.com>
Date: Tue, 26 May 2020 00:27:13 +0800
Subject: ARM: dts: imx6qdl-sabresd: enable fec wake-on-lan

Enable ethernet wake-on-lan feature for imx6q/dl/qp sabresd
boards since the PHY clock is supplied by external osc.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Fugang Duan <fugang.duan@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 arch/arm/boot/dts/imx6qdl-sabresd.dtsi | 1 +
 1 file changed, 1 insertion(+)

diff --git a/arch/arm/boot/dts/imx6qdl-sabresd.dtsi b/arch/arm/boot/dts/imx6qdl-sabresd.dtsi
index fe59dde41b64..28b35ccb3757 100644
--- a/arch/arm/boot/dts/imx6qdl-sabresd.dtsi
+++ b/arch/arm/boot/dts/imx6qdl-sabresd.dtsi
@@ -204,6 +204,7 @@
 	pinctrl-0 = <&pinctrl_enet>;
 	phy-mode = "rgmii-id";
 	phy-reset-gpios = <&gpio1 25 GPIO_ACTIVE_LOW>;
+	fsl,magic-packet;
 	status = "okay";
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From 12b1bc75cd460c30652db0d4a3bd3fc59c16742c Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Mon, 25 May 2020 21:54:00 +0200
Subject: r8169: improve rtl_remove_one

Don't call netif_napi_del() manually, free_netdev() does this for us.
In addition reorder calls to match reverse order of calls in probe().

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 17c564457e52..d672ae77c644 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -4996,17 +4996,15 @@ static void rtl_remove_one(struct pci_dev *pdev)
 	struct net_device *dev = pci_get_drvdata(pdev);
 	struct rtl8169_private *tp = netdev_priv(dev);
 
-	if (r8168_check_dash(tp))
-		rtl8168_driver_stop(tp);
-
-	netif_napi_del(&tp->napi);
+	if (pci_dev_run_wake(pdev))
+		pm_runtime_get_noresume(&pdev->dev);
 
 	unregister_netdev(dev);
 
-	rtl_release_firmware(tp);
+	if (r8168_check_dash(tp))
+		rtl8168_driver_stop(tp);
 
-	if (pci_dev_run_wake(pdev))
-		pm_runtime_get_noresume(&pdev->dev);
+	rtl_release_firmware(tp);
 
 	/* restore original MAC address */
 	rtl_rar_set(tp, dev->perm_addr);
-- 
cgit v1.2.3-59-g8ed1b


From 4e637c70b503b686aae45716a25a94dc3a434f3a Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Mon, 25 May 2020 23:41:13 +0200
Subject: mptcp: attempt coalescing when moving skbs to mptcp rx queue

We can try to coalesce skbs we take from the subflows rx queue with the
tail of the mptcp rx queue.

If successful, the skb head can be discarded early.

We can also free the skb extensions, we do not access them after this.

Signed-off-by: Florian Westphal <fw@strlen.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/protocol.c | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/net/mptcp/protocol.c b/net/mptcp/protocol.c
index ba9d3d5c625f..b2c8b57e7942 100644
--- a/net/mptcp/protocol.c
+++ b/net/mptcp/protocol.c
@@ -144,12 +144,29 @@ static void __mptcp_move_skb(struct mptcp_sock *msk, struct sock *ssk,
 			     unsigned int offset, size_t copy_len)
 {
 	struct sock *sk = (struct sock *)msk;
+	struct sk_buff *tail;
 
 	__skb_unlink(skb, &ssk->sk_receive_queue);
-	skb_set_owner_r(skb, sk);
-	__skb_queue_tail(&sk->sk_receive_queue, skb);
 
+	skb_ext_reset(skb);
+	skb_orphan(skb);
 	msk->ack_seq += copy_len;
+
+	tail = skb_peek_tail(&sk->sk_receive_queue);
+	if (offset == 0 && tail) {
+		bool fragstolen;
+		int delta;
+
+		if (skb_try_coalesce(tail, skb, &fragstolen, &delta)) {
+			kfree_skb_partial(skb, fragstolen);
+			atomic_add(delta, &sk->sk_rmem_alloc);
+			sk_mem_charge(sk, delta);
+			return;
+		}
+	}
+
+	skb_set_owner_r(skb, sk);
+	__skb_queue_tail(&sk->sk_receive_queue, skb);
 	MPTCP_SKB_CB(skb)->offset = offset;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 3c2d8a046a4cd0bc835f2e6cfbc12f41d7f4323e Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Tue, 26 May 2020 02:05:43 +0300
Subject: mlxsw: spectrum: Use dedicated trap group for ACL trap

Packets that are trapped via tc's trap action are currently subject to
the same policer as packets hitting local routes. The latter are
critical to the correct functioning of the control plane, while the
former are mainly used for traffic inspection.

Split the ACL trap to a separate group with its own policer. Use a
higher priority for these traps than for traps using mirror action
(e.g., ARP, IGMP). Otherwise, packets matching both traps will not be
forwarded in hardware (because of trap action) and also not forwarded in
software because they will be marked with 'offload_fwd_mark'.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h      | 1 +
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 9b27a129b0a6..5b5b96a82a34 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -5548,6 +5548,7 @@ enum mlxsw_reg_htgt_trap_group {
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP1,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_VRRP,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_PKT_SAMPLE,
+	MLXSW_REG_HTGT_TRAP_GROUP_SP_FLOW_LOGGING,
 
 	__MLXSW_REG_HTGT_TRAP_GROUP_MAX,
 	MLXSW_REG_HTGT_TRAP_GROUP_MAX = __MLXSW_REG_HTGT_TRAP_GROUP_MAX - 1
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 943a24975799..e0811a7e13b9 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -4105,7 +4105,7 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
 	MLXSW_RXL(mlxsw_sp_rx_listener_sample_func, PKT_SAMPLE, MIRROR_TO_CPU,
 		  false, SP_PKT_SAMPLE, DISCARD),
 	/* ACL trap */
-	MLXSW_SP_RXL_NO_MARK(ACL0, TRAP_TO_CPU, IP2ME, false),
+	MLXSW_SP_RXL_NO_MARK(ACL0, TRAP_TO_CPU, FLOW_LOGGING, false),
 	/* Multicast Router Traps */
 	MLXSW_SP_RXL_MARK(IPV4_PIM, TRAP_TO_CPU, PIM, false),
 	MLXSW_SP_RXL_MARK(IPV6_PIM, TRAP_TO_CPU, PIM, false),
@@ -4167,6 +4167,7 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core)
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_MULTICAST:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_FLOW_LOGGING:
 			rate = 1024;
 			burst_size = 7;
 			break;
@@ -4230,6 +4231,7 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core)
 			priority = 5;
 			tc = 5;
 			break;
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_FLOW_LOGGING:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP:
 			priority = 4;
 			tc = 4;
-- 
cgit v1.2.3-59-g8ed1b


From 761bc42fbecff8cfb8e529451fd0f13800d050c4 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Tue, 26 May 2020 02:05:44 +0300
Subject: mlxsw: spectrum: Use same switch case for identical groups

Trap groups that use the same policer settings can share the same switch
case.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index e0811a7e13b9..e8c9fc4cb6fb 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -4168,9 +4168,6 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core)
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_MULTICAST:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_FLOW_LOGGING:
-			rate = 1024;
-			burst_size = 7;
-			break;
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME:
 			rate = 1024;
 			burst_size = 7;
-- 
cgit v1.2.3-59-g8ed1b


From 412df3d1bb03e1fb97fee5ec9a9dc39e7ea459be Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Tue, 26 May 2020 02:05:45 +0300
Subject: mlxsw: spectrum: Rename IPv6 ND trap group

The IPv6 Neighbour Discovery (ND) group will be used for various IPv6
packets, not all of which fall under the definition of ND, so rename it
to "IPV6" which is more appropriate.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h      |  2 +-
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 10 +++++-----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 5b5b96a82a34..6df7cc8a69f1 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -5542,7 +5542,7 @@ enum mlxsw_reg_htgt_trap_group {
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_EVENT,
-	MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND,
+	MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_LBERROR,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP0,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP1,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index e8c9fc4cb6fb..b420aa292d7c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -4078,15 +4078,15 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
 	MLXSW_SP_RXL_MARK(RTR_INGRESS0, TRAP_TO_CPU, REMOTE_ROUTE, false),
 	MLXSW_SP_RXL_MARK(IPV4_BGP, TRAP_TO_CPU, BGP, false),
 	MLXSW_SP_RXL_MARK(IPV6_BGP, TRAP_TO_CPU, BGP, false),
-	MLXSW_SP_RXL_MARK(L3_IPV6_ROUTER_SOLICITATION, TRAP_TO_CPU, IPV6_ND,
+	MLXSW_SP_RXL_MARK(L3_IPV6_ROUTER_SOLICITATION, TRAP_TO_CPU, IPV6,
 			  false),
-	MLXSW_SP_RXL_MARK(L3_IPV6_ROUTER_ADVERTISEMENT, TRAP_TO_CPU, IPV6_ND,
+	MLXSW_SP_RXL_MARK(L3_IPV6_ROUTER_ADVERTISEMENT, TRAP_TO_CPU, IPV6,
 			  false),
 	MLXSW_SP_RXL_MARK(L3_IPV6_NEIGHBOR_SOLICITATION, TRAP_TO_CPU,
 			  NEIGH_DISCOVERY, false),
 	MLXSW_SP_RXL_MARK(L3_IPV6_NEIGHBOR_ADVERTISEMENT, TRAP_TO_CPU,
 			  NEIGH_DISCOVERY, false),
-	MLXSW_SP_RXL_MARK(L3_IPV6_REDIRECTION, TRAP_TO_CPU, IPV6_ND, false),
+	MLXSW_SP_RXL_MARK(L3_IPV6_REDIRECTION, TRAP_TO_CPU, IPV6, false),
 	MLXSW_SP_RXL_MARK(IPV6_MC_LINK_LOCAL_DEST, TRAP_TO_CPU, ROUTER_EXP,
 			  false),
 	MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV4, TRAP_TO_CPU, ROUTER_EXP, false),
@@ -4165,7 +4165,7 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core)
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_NEIGH_DISCOVERY:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE:
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_MULTICAST:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_FLOW_LOGGING:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME:
@@ -4239,7 +4239,7 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core)
 			tc = 3;
 			break;
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_NEIGH_DISCOVERY:
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6_ND:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP1:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP:
 			priority = 2;
-- 
cgit v1.2.3-59-g8ed1b


From 954eef26778cb878327bb38b937a0fcf51b8445b Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Tue, 26 May 2020 02:05:46 +0300
Subject: mlxsw: spectrum: Use same trap group for various IPv6 packets

Group these various IPv6 packets (e.g., router solicitations, router
advertisement) together and subject them to the same policer.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index b420aa292d7c..141a605582c6 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -4067,9 +4067,9 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
 	MLXSW_SP_RXL_MARK(IPV6_UNSPECIFIED_ADDRESS, TRAP_TO_CPU, ROUTER_EXP,
 			  false),
 	MLXSW_SP_RXL_MARK(IPV6_LINK_LOCAL_DEST, TRAP_TO_CPU, ROUTER_EXP, false),
-	MLXSW_SP_RXL_MARK(IPV6_LINK_LOCAL_SRC, TRAP_TO_CPU, ROUTER_EXP, false),
-	MLXSW_SP_RXL_MARK(IPV6_ALL_NODES_LINK, TRAP_TO_CPU, ROUTER_EXP, false),
-	MLXSW_SP_RXL_MARK(IPV6_ALL_ROUTERS_LINK, TRAP_TO_CPU, ROUTER_EXP,
+	MLXSW_SP_RXL_MARK(IPV6_LINK_LOCAL_SRC, TRAP_TO_CPU, IPV6, false),
+	MLXSW_SP_RXL_MARK(IPV6_ALL_NODES_LINK, TRAP_TO_CPU, IPV6, false),
+	MLXSW_SP_RXL_MARK(IPV6_ALL_ROUTERS_LINK, TRAP_TO_CPU, IPV6,
 			  false),
 	MLXSW_SP_RXL_MARK(IPV4_OSPF, TRAP_TO_CPU, OSPF, false),
 	MLXSW_SP_RXL_MARK(IPV6_OSPF, TRAP_TO_CPU, OSPF, false),
-- 
cgit v1.2.3-59-g8ed1b


From d322309d726b77226cc4880ee6c38c26c1dac999 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Tue, 26 May 2020 02:05:47 +0300
Subject: mlxsw: spectrum: Use separate trap group for FID miss

When a packet enters the device it is classified to a filtering
identifier (FID) based on the ingress port and VLAN. The FID miss trap
is used to trap packets for which a FID could not be found.

In mlxsw this trap should only be triggered when a port is enslaved to
an OVS bridge and a matching ACL rule could not be found, so as to
trigger learning.

These packets are therefore completely unrelated to packets hitting
local routes and should be in a different group. Move them.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h      | 1 +
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 4 +++-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 6df7cc8a69f1..b55a833a5d17 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -5549,6 +5549,7 @@ enum mlxsw_reg_htgt_trap_group {
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_VRRP,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_PKT_SAMPLE,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_FLOW_LOGGING,
+	MLXSW_REG_HTGT_TRAP_GROUP_SP_FID_MISS,
 
 	__MLXSW_REG_HTGT_TRAP_GROUP_MAX,
 	MLXSW_REG_HTGT_TRAP_GROUP_MAX = __MLXSW_REG_HTGT_TRAP_GROUP_MAX - 1
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 141a605582c6..ac71d67457aa 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -4052,7 +4052,7 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
 	MLXSW_SP_RXL_NO_MARK(IGMP_V3_REPORT, TRAP_TO_CPU, MC_SNOOPING, false),
 	MLXSW_SP_RXL_MARK(ARPBC, MIRROR_TO_CPU, NEIGH_DISCOVERY, false),
 	MLXSW_SP_RXL_MARK(ARPUC, MIRROR_TO_CPU, NEIGH_DISCOVERY, false),
-	MLXSW_SP_RXL_NO_MARK(FID_MISS, TRAP_TO_CPU, IP2ME, false),
+	MLXSW_SP_RXL_NO_MARK(FID_MISS, TRAP_TO_CPU, FID_MISS, false),
 	MLXSW_SP_RXL_MARK(IPV6_MLDV12_LISTENER_QUERY, MIRROR_TO_CPU,
 			  MC_SNOOPING, false),
 	MLXSW_SP_RXL_NO_MARK(IPV6_MLDV1_LISTENER_REPORT, TRAP_TO_CPU,
@@ -4169,6 +4169,7 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core)
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_MULTICAST:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_FLOW_LOGGING:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_FID_MISS:
 			rate = 1024;
 			burst_size = 7;
 			break;
@@ -4248,6 +4249,7 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core)
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_MULTICAST:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_FID_MISS:
 			priority = 1;
 			tc = 1;
 			break;
-- 
cgit v1.2.3-59-g8ed1b


From 1e3cd589421af8fed677c017f12a5f78f59aa427 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Tue, 26 May 2020 02:05:48 +0300
Subject: mlxsw: spectrum: Use same trap group for local routes and link-local
 destination

Packets with an IPv6 link-local destination (i.e., fe80::/10) should not
be forwarded and are therefore trapped to the CPU for local delivery.
Since these packets are trapped for the same logical reason as packets
hitting local routes, associate both traps with the same group.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index ac71d67457aa..5fe51ee8a206 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -4066,7 +4066,7 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
 	MLXSW_SP_RXL_MARK(IP2ME, TRAP_TO_CPU, IP2ME, false),
 	MLXSW_SP_RXL_MARK(IPV6_UNSPECIFIED_ADDRESS, TRAP_TO_CPU, ROUTER_EXP,
 			  false),
-	MLXSW_SP_RXL_MARK(IPV6_LINK_LOCAL_DEST, TRAP_TO_CPU, ROUTER_EXP, false),
+	MLXSW_SP_RXL_MARK(IPV6_LINK_LOCAL_DEST, TRAP_TO_CPU, IP2ME, false),
 	MLXSW_SP_RXL_MARK(IPV6_LINK_LOCAL_SRC, TRAP_TO_CPU, IPV6, false),
 	MLXSW_SP_RXL_MARK(IPV6_ALL_NODES_LINK, TRAP_TO_CPU, IPV6, false),
 	MLXSW_SP_RXL_MARK(IPV6_ALL_ROUTERS_LINK, TRAP_TO_CPU, IPV6,
-- 
cgit v1.2.3-59-g8ed1b


From 025b7de7f4e9b26c31c511e84a7cef14605e70ef Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Tue, 26 May 2020 02:05:49 +0300
Subject: mlxsw: spectrum: Reduce priority of locally delivered packets

To align with recent recommended values. Will be configurable by future
patches.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c            | 2 +-
 tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 5fe51ee8a206..b10e5aeaedef 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -4235,11 +4235,11 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core)
 			tc = 4;
 			break;
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_MC_SNOOPING:
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME:
 			priority = 3;
 			tc = 3;
 			break;
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_NEIGH_DISCOVERY:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP1:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP:
diff --git a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh
index 58f3a05f08af..7d9e73a43a49 100755
--- a/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh
+++ b/tools/testing/selftests/drivers/net/mlxsw/sharedbuffer.sh
@@ -15,7 +15,7 @@ source mlxsw_lib.sh
 SB_POOL_ING=0
 SB_POOL_EGR_CPU=10
 
-SB_ITC_CPU_IP=3
+SB_ITC_CPU_IP=2
 SB_ITC_CPU_ARP=2
 SB_ITC=0
 
-- 
cgit v1.2.3-59-g8ed1b


From 03cb0ce0ddc8c00b7c4c5becff7fb7cf44b00a91 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Tue, 26 May 2020 02:05:50 +0300
Subject: mlxsw: switchx2: Move SwitchX-2 trap groups out of main enum

The number of Spectrum trap groups is not infinite, but two identifiers
are occupied by SwitchX-2 specific trap groups. Free these identifiers
by moving them out of the main enum.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h      | 2 --
 drivers/net/ethernet/mellanox/mlxsw/switchx2.c | 5 +++++
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index b55a833a5d17..fd5e18b71114 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -5526,8 +5526,6 @@ MLXSW_ITEM32(reg, htgt, type, 0x00, 8, 4);
 
 enum mlxsw_reg_htgt_trap_group {
 	MLXSW_REG_HTGT_TRAP_GROUP_EMAD,
-	MLXSW_REG_HTGT_TRAP_GROUP_SX2_RX,
-	MLXSW_REG_HTGT_TRAP_GROUP_SX2_CTRL,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_STP,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_LACP,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
index 2503f61db5fb..b438f5576e18 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/switchx2.c
@@ -1404,6 +1404,11 @@ err_port_module_info_get:
 	return err;
 }
 
+enum {
+	MLXSW_REG_HTGT_TRAP_GROUP_SX2_RX = 1,
+	MLXSW_REG_HTGT_TRAP_GROUP_SX2_CTRL = 2,
+};
+
 #define MLXSW_SX_RXL(_trap_id) \
 	MLXSW_RXL(mlxsw_sx_rx_listener_func, _trap_id, TRAP_TO_CPU,	\
 		  false, SX2_RX, FORWARD)
-- 
cgit v1.2.3-59-g8ed1b


From b87bde80daf77649efba90ef0b1079b84be92d39 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Tue, 26 May 2020 02:05:51 +0300
Subject: mlxsw: spectrum_trap: Do not hard code "thin" policer identifier

As explained in commit e612523041ab ("mlxsw: spectrum_trap: Introduce
dummy group with thin policer"), the purpose of the "thin" policer is to
pass as less packets as possible to the CPU.

The identifier of this policer is currently set according to the maximum
number of used trap groups, but this is fragile: On Spectrum-1 the
maximum number of policers is less than the maximum number of trap
groups, which might result in an invalid policer identifier in case the
number of used trap groups grows beyond the policer limit.

Solve this by dynamically allocating the policer identifier.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c | 17 +++++++++++------
 drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.h |  2 ++
 2 files changed, 13 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
index 78f983c1a056..f4b812276a5a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
@@ -441,8 +441,6 @@ static const struct mlxsw_sp_trap_item mlxsw_sp_trap_items_arr[] = {
 	},
 };
 
-#define MLXSW_SP_THIN_POLICER_ID	(MLXSW_REG_HTGT_TRAP_GROUP_MAX + 1)
-
 static struct mlxsw_sp_trap_policer_item *
 mlxsw_sp_trap_policer_item_lookup(struct mlxsw_sp *mlxsw_sp, u32 id)
 {
@@ -487,14 +485,21 @@ mlxsw_sp_trap_item_lookup(struct mlxsw_sp *mlxsw_sp, u16 id)
 
 static int mlxsw_sp_trap_cpu_policers_set(struct mlxsw_sp *mlxsw_sp)
 {
+	struct mlxsw_sp_trap *trap = mlxsw_sp->trap;
 	char qpcr_pl[MLXSW_REG_QPCR_LEN];
+	u16 hw_id;
 
 	/* The purpose of "thin" policer is to drop as many packets
 	 * as possible. The dummy group is using it.
 	 */
-	__set_bit(MLXSW_SP_THIN_POLICER_ID, mlxsw_sp->trap->policers_usage);
-	mlxsw_reg_qpcr_pack(qpcr_pl, MLXSW_SP_THIN_POLICER_ID,
-			    MLXSW_REG_QPCR_IR_UNITS_M, false, 1, 4);
+	hw_id = find_first_zero_bit(trap->policers_usage, trap->max_policers);
+	if (WARN_ON(hw_id == trap->max_policers))
+		return -ENOBUFS;
+
+	__set_bit(hw_id, trap->policers_usage);
+	trap->thin_policer_hw_id = hw_id;
+	mlxsw_reg_qpcr_pack(qpcr_pl, hw_id, MLXSW_REG_QPCR_IR_UNITS_M,
+			    false, 1, 4);
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(qpcr), qpcr_pl);
 }
 
@@ -503,7 +508,7 @@ static int mlxsw_sp_trap_dummy_group_init(struct mlxsw_sp *mlxsw_sp)
 	char htgt_pl[MLXSW_REG_HTGT_LEN];
 
 	mlxsw_reg_htgt_pack(htgt_pl, MLXSW_REG_HTGT_TRAP_GROUP_SP_DUMMY,
-			    MLXSW_SP_THIN_POLICER_ID, 0, 1);
+			    mlxsw_sp->trap->thin_policer_hw_id, 0, 1);
 	return mlxsw_reg_write(mlxsw_sp->core, MLXSW_REG(htgt), htgt_pl);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.h
index 759146897b3a..13ac412f4d53 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.h
@@ -17,6 +17,8 @@ struct mlxsw_sp_trap {
 	struct mlxsw_sp_trap_item *trap_items_arr;
 	u64 traps_count; /* Number of registered traps */
 
+	u16 thin_policer_hw_id;
+
 	u64 max_policers;
 	unsigned long policers_usage[]; /* Usage bitmap */
 };
-- 
cgit v1.2.3-59-g8ed1b


From 500769bebef7a3b1a60c3f6028690677332446c6 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Tue, 26 May 2020 02:05:52 +0300
Subject: mlxsw: reg: Move all trap groups under the same enum

After the previous patch the split is no longer necessary and all the
trap groups can be moved under the same enum.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h | 10 +++-------
 1 file changed, 3 insertions(+), 7 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index fd5e18b71114..586a2f37fd12 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -5548,18 +5548,14 @@ enum mlxsw_reg_htgt_trap_group {
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_PKT_SAMPLE,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_FLOW_LOGGING,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_FID_MISS,
-
-	__MLXSW_REG_HTGT_TRAP_GROUP_MAX,
-	MLXSW_REG_HTGT_TRAP_GROUP_MAX = __MLXSW_REG_HTGT_TRAP_GROUP_MAX - 1
-};
-
-enum mlxsw_reg_htgt_discard_trap_group {
-	MLXSW_REG_HTGT_DISCARD_TRAP_GROUP_BASE = MLXSW_REG_HTGT_TRAP_GROUP_MAX,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_DUMMY,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_L2_DISCARDS,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_L3_DISCARDS,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_TUNNEL_DISCARDS,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_ACL_DISCARDS,
+
+	__MLXSW_REG_HTGT_TRAP_GROUP_MAX,
+	MLXSW_REG_HTGT_TRAP_GROUP_MAX = __MLXSW_REG_HTGT_TRAP_GROUP_MAX - 1
 };
 
 /* reg_htgt_trap_group
-- 
cgit v1.2.3-59-g8ed1b


From 1260e083d4cff4dc223bb0913c35ba5e92d1b379 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Tue, 26 May 2020 02:05:53 +0300
Subject: mlxsw: spectrum: Share one group for all locally delivered packets

Routed IP packets with the Router Alert option need to be trapped to
the CPU as they might need to be locally delivered to raw sockets with
the IP_ROUTER_ALERT / IPV6_ROUTER_ALERT socket option.

Move them to the same group with other packets that might need to be
trapped following route lookup.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index b10e5aeaedef..016df2c14f0e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -4089,8 +4089,8 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
 	MLXSW_SP_RXL_MARK(L3_IPV6_REDIRECTION, TRAP_TO_CPU, IPV6, false),
 	MLXSW_SP_RXL_MARK(IPV6_MC_LINK_LOCAL_DEST, TRAP_TO_CPU, ROUTER_EXP,
 			  false),
-	MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV4, TRAP_TO_CPU, ROUTER_EXP, false),
-	MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV6, TRAP_TO_CPU, ROUTER_EXP, false),
+	MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV4, TRAP_TO_CPU, IP2ME, false),
+	MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV6, TRAP_TO_CPU, IP2ME, false),
 	MLXSW_SP_RXL_MARK(IPV4_VRRP, TRAP_TO_CPU, VRRP, false),
 	MLXSW_SP_RXL_MARK(IPV6_VRRP, TRAP_TO_CPU, VRRP, false),
 	MLXSW_SP_RXL_NO_MARK(DISCARD_ING_ROUTER_SIP_CLASS_E, FORWARD,
-- 
cgit v1.2.3-59-g8ed1b


From dacc4e3acf542ce4c8a6dcc1a350decb443cf7f3 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Tue, 26 May 2020 02:05:54 +0300
Subject: mlxsw: spectrum: Treat IPv6 link-local SIP as an exception

IPv6 packets that need to be forwarded and have a link-local source IP are
dropped by the kernel and an ICMPv6 "Destination unreachable" is sent to
the sending host.

As such, change the trap group of such packets so that they do not
interfere with IPv6 management packets. In the future this trap will be
exposed as an exception via devlink-trap.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 016df2c14f0e..5cb7fd650156 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -4067,7 +4067,7 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
 	MLXSW_SP_RXL_MARK(IPV6_UNSPECIFIED_ADDRESS, TRAP_TO_CPU, ROUTER_EXP,
 			  false),
 	MLXSW_SP_RXL_MARK(IPV6_LINK_LOCAL_DEST, TRAP_TO_CPU, IP2ME, false),
-	MLXSW_SP_RXL_MARK(IPV6_LINK_LOCAL_SRC, TRAP_TO_CPU, IPV6, false),
+	MLXSW_SP_RXL_MARK(IPV6_LINK_LOCAL_SRC, TRAP_TO_CPU, ROUTER_EXP, false),
 	MLXSW_SP_RXL_MARK(IPV6_ALL_NODES_LINK, TRAP_TO_CPU, IPV6, false),
 	MLXSW_SP_RXL_MARK(IPV6_ALL_ROUTERS_LINK, TRAP_TO_CPU, IPV6,
 			  false),
-- 
cgit v1.2.3-59-g8ed1b


From 9785b92b4443f2862495c9aa0ee8caed6f43523d Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Tue, 26 May 2020 02:05:55 +0300
Subject: mlxsw: spectrum: Add packet traps for BFD packets

Bidirectional Forwarding Detection (BFD) provides "low-overhead,
short-duration detection of failures in the path between adjacent
forwarding engines" (RFC 5880).

This is accomplished by exchanging BFD packets between the two
forwarding engines. Up until now these packets were trapped via the
general local delivery (i.e., IP2ME) trap which also traps a lot of
other packets that are not as time-sensitive as BFD packets.

Expose dedicated traps for BFD packets so that user space could
configure a dedicated policer for them.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h      | 1 +
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c | 7 +++++++
 drivers/net/ethernet/mellanox/mlxsw/trap.h     | 2 ++
 3 files changed, 10 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 586a2f37fd12..38fa7304af0c 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -5548,6 +5548,7 @@ enum mlxsw_reg_htgt_trap_group {
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_PKT_SAMPLE,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_FLOW_LOGGING,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_FID_MISS,
+	MLXSW_REG_HTGT_TRAP_GROUP_SP_BFD,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_DUMMY,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_L2_DISCARDS,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_L3_DISCARDS,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 5cb7fd650156..c598ae9ed106 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -4093,6 +4093,8 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
 	MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV6, TRAP_TO_CPU, IP2ME, false),
 	MLXSW_SP_RXL_MARK(IPV4_VRRP, TRAP_TO_CPU, VRRP, false),
 	MLXSW_SP_RXL_MARK(IPV6_VRRP, TRAP_TO_CPU, VRRP, false),
+	MLXSW_SP_RXL_MARK(IPV4_BFD, TRAP_TO_CPU, BFD, false),
+	MLXSW_SP_RXL_MARK(IPV6_BFD, TRAP_TO_CPU, BFD, false),
 	MLXSW_SP_RXL_NO_MARK(DISCARD_ING_ROUTER_SIP_CLASS_E, FORWARD,
 			     ROUTER_EXP, false),
 	MLXSW_SP_RXL_NO_MARK(DISCARD_ING_ROUTER_MC_DMAC, FORWARD,
@@ -4185,6 +4187,10 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core)
 			rate = 360;
 			burst_size = 7;
 			break;
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_BFD:
+			rate = 20 * 1024;
+			burst_size = 10;
+			break;
 		default:
 			continue;
 		}
@@ -4226,6 +4232,7 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core)
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP0:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_VRRP:
+		case MLXSW_REG_HTGT_TRAP_GROUP_SP_BFD:
 			priority = 5;
 			tc = 5;
 			break;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/trap.h b/drivers/net/ethernet/mellanox/mlxsw/trap.h
index 1b89472a0908..28e60697d14e 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/trap.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/trap.h
@@ -66,6 +66,8 @@ enum {
 	MLXSW_TRAP_ID_IPIP_DECAP_ERROR = 0xB1,
 	MLXSW_TRAP_ID_NVE_DECAP_ARP = 0xB8,
 	MLXSW_TRAP_ID_NVE_ENCAP_ARP = 0xBD,
+	MLXSW_TRAP_ID_IPV4_BFD = 0xD0,
+	MLXSW_TRAP_ID_IPV6_BFD = 0xD1,
 	MLXSW_TRAP_ID_ROUTER_ALERT_IPV4 = 0xD6,
 	MLXSW_TRAP_ID_ROUTER_ALERT_IPV6 = 0xD7,
 	MLXSW_TRAP_ID_DISCARD_NON_ROUTABLE = 0x11A,
-- 
cgit v1.2.3-59-g8ed1b


From 10d3757fcb07a83101b6578a33bd800e81d27eea Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Tue, 26 May 2020 02:05:56 +0300
Subject: mlxsw: spectrum_router: Allow programming link-local prefix routes

The device has a trap for IPv6 packets that need be routed and have a
unicast link-local destination IP (i.e., fe80::/10). This allows mlxsw
to ignore link-local routes, as the packets will be trapped to the CPU
in any case.

However, since link-local routes are not programmed, it is possible for
routed packets to hit the default route which might also be programmed
to trap packets. This means that packets with a link-local destination
IP might be trapped for the wrong reason.

To overcome this, allow programming link-local prefix routes (usually
one fe80::/64 per-table), so that the packets will be forwarded until
reaching the link-local trap.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index 71aee4914619..c939b3596566 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -5003,9 +5003,11 @@ static bool mlxsw_sp_fib6_rt_should_ignore(const struct fib6_info *rt)
 {
 	/* Packets with link-local destination IP arriving to the router
 	 * are trapped to the CPU, so no need to program specific routes
-	 * for them.
+	 * for them. Only allow prefix routes (usually one fe80::/64) so
+	 * that packets are trapped for the right reason.
 	 */
-	if (ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_LINKLOCAL)
+	if ((ipv6_addr_type(&rt->fib6_dst.addr) & IPV6_ADDR_LINKLOCAL) &&
+	    (rt->fib6_flags & (RTF_LOCAL | RTF_ANYCAST)))
 		return true;
 
 	/* Multicast routes aren't supported, so ignore them. Neighbour
-- 
cgit v1.2.3-59-g8ed1b


From 81f3dc9349ce0bf7b8447f147f45e70f0a5b36a6 Mon Sep 17 00:00:00 2001
From: Alexander Sverdlin <alexander.sverdlin@nokia.com>
Date: Tue, 26 May 2020 14:27:51 +0200
Subject: macvlan: Skip loopback packets in RX handler

Ignore loopback-originatig packets soon enough and don't try to process L2
header where it doesn't exist. The very similar br_handle_frame() in bridge
code performs exactly the same check.

This is an example of such ICMPv6 packet:

skb len=96 headroom=40 headlen=96 tailroom=56
mac=(40,0) net=(40,40) trans=80
shinfo(txflags=0 nr_frags=0 gso(size=0 type=0 segs=0))
csum(0xae2e9a2f ip_summed=1 complete_sw=0 valid=0 level=0)
hash(0xc97ebd88 sw=1 l4=1) proto=0x86dd pkttype=5 iif=24
dev name=etha01.212 feat=0x0x0000000040005000
skb headroom: 00000000: 00 7c 86 52 84 88 ff ff 00 00 00 00 00 00 08 00
skb headroom: 00000010: 45 00 00 9e 5d 5c 40 00 40 11 33 33 00 00 00 01
skb headroom: 00000020: 02 40 43 80 00 00 86 dd
skb linear:   00000000: 60 09 88 bd 00 38 3a ff fe 80 00 00 00 00 00 00
skb linear:   00000010: 00 40 43 ff fe 80 00 00 ff 02 00 00 00 00 00 00
skb linear:   00000020: 00 00 00 00 00 00 00 01 86 00 61 00 40 00 00 2d
skb linear:   00000030: 00 00 00 00 00 00 00 00 03 04 40 e0 00 00 01 2c
skb linear:   00000040: 00 00 00 78 00 00 00 00 fd 5f 42 68 23 87 a8 81
skb linear:   00000050: 00 00 00 00 00 00 00 00 01 01 02 40 43 80 00 00
skb tailroom: 00000000: ...
skb tailroom: 00000010: ...
skb tailroom: 00000020: ...
skb tailroom: 00000030: ...

Call Trace, how it happens exactly:
 ...
 macvlan_handle_frame+0x321/0x425 [macvlan]
 ? macvlan_forward_source+0x110/0x110 [macvlan]
 __netif_receive_skb_core+0x545/0xda0
 ? enqueue_task_fair+0xe5/0x8e0
 ? __netif_receive_skb_one_core+0x36/0x70
 __netif_receive_skb_one_core+0x36/0x70
 process_backlog+0x97/0x140
 net_rx_action+0x1eb/0x350
 ? __hrtimer_run_queues+0x136/0x2e0
 __do_softirq+0xe3/0x383
 do_softirq_own_stack+0x2a/0x40
 </IRQ>
 do_softirq.part.4+0x4e/0x50
 netif_rx_ni+0x60/0xd0
 dev_loopback_xmit+0x83/0xf0
 ip6_finish_output2+0x575/0x590 [ipv6]
 ? ip6_cork_release.isra.1+0x64/0x90 [ipv6]
 ? __ip6_make_skb+0x38d/0x680 [ipv6]
 ? ip6_output+0x6c/0x140 [ipv6]
 ip6_output+0x6c/0x140 [ipv6]
 ip6_send_skb+0x1e/0x60 [ipv6]
 rawv6_sendmsg+0xc4b/0xe10 [ipv6]
 ? proc_put_long+0xd0/0xd0
 ? rw_copy_check_uvector+0x4e/0x110
 ? sock_sendmsg+0x36/0x40
 sock_sendmsg+0x36/0x40
 ___sys_sendmsg+0x2b6/0x2d0
 ? proc_dointvec+0x23/0x30
 ? addrconf_sysctl_forward+0x8d/0x250 [ipv6]
 ? dev_forward_change+0x130/0x130 [ipv6]
 ? _raw_spin_unlock+0x12/0x30
 ? proc_sys_call_handler.isra.14+0x9f/0x110
 ? __call_rcu+0x213/0x510
 ? get_max_files+0x10/0x10
 ? trace_hardirqs_on+0x2c/0xe0
 ? __sys_sendmsg+0x63/0xa0
 __sys_sendmsg+0x63/0xa0
 do_syscall_64+0x6c/0x1e0
 entry_SYSCALL_64_after_hwframe+0x49/0xbe

Signed-off-by: Alexander Sverdlin <alexander.sverdlin@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/macvlan.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/macvlan.c b/drivers/net/macvlan.c
index 9a419d5102ce..563aed5b3d9f 100644
--- a/drivers/net/macvlan.c
+++ b/drivers/net/macvlan.c
@@ -448,6 +448,10 @@ static rx_handler_result_t macvlan_handle_frame(struct sk_buff **pskb)
 	int ret;
 	rx_handler_result_t handle_res;
 
+	/* Packets from dev_loopback_xmit() do not have L2 header, bail out */
+	if (unlikely(skb->pkt_type == PACKET_LOOPBACK))
+		return RX_HANDLER_PASS;
+
 	port = macvlan_port_get_rcu(skb->dev);
 	if (is_multicast_ether_addr(eth->h_dest)) {
 		unsigned int hash;
-- 
cgit v1.2.3-59-g8ed1b


From 53c0ec4f4db19d430570bbbfc80ce899419d29f4 Mon Sep 17 00:00:00 2001
From: Armin Wolf <W_Armin@gmx.de>
Date: Tue, 26 May 2020 20:03:02 +0200
Subject: ne2k-pci: Fix various coding-style issues and improve printk() usage

Fixed a ton of minor checkpatch errors/warnings and remove version
printing at module init/when device is found and use MODULE_VERSION
instead. Also modifying the RTL8029 PCI string to include the compatible
RTL8029AS nic.
The only mayor issue remaining is the missing SPDX tag, but since the
exact version of the GPL is not stated anywhere inside the file, its
impossible to add such a tag at the moment.
But maybe it is possible, since 8390.h states Donald Becker's 8390
drivers are licensed under GPL 2.2 only (= GPL-2.0-only ?).
The kernel module containing this patch compiles and runs without
problems on a RTL8029AS-based NE2000 clone card with kernel 5.7.0-rc6.

Signed-off-by: Armin Wolf <W_Armin@gmx.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/8390/ne2k-pci.c | 345 ++++++++++++++++++-----------------
 1 file changed, 182 insertions(+), 163 deletions(-)

diff --git a/drivers/net/ethernet/8390/ne2k-pci.c b/drivers/net/ethernet/8390/ne2k-pci.c
index 42985a82321a..77d78b4c59c4 100644
--- a/drivers/net/ethernet/8390/ne2k-pci.c
+++ b/drivers/net/ethernet/8390/ne2k-pci.c
@@ -1,39 +1,43 @@
-/* ne2k-pci.c: A NE2000 clone on PCI bus driver for Linux. */
-/*
-	A Linux device driver for PCI NE2000 clones.
-
-	Authors and other copyright holders:
-	1992-2000 by Donald Becker, NE2000 core and various modifications.
-	1995-1998 by Paul Gortmaker, core modifications and PCI support.
-	Copyright 1993 assigned to the United States Government as represented
-	by the Director, National Security Agency.
-
-	This software may be used and distributed according to the terms of
-	the GNU General Public License (GPL), incorporated herein by reference.
-	Drivers based on or derived from this code fall under the GPL and must
-	retain the authorship, copyright and license notice.  This file is not
-	a complete program and may only be used when the entire operating
-	system is licensed under the GPL.
-
-	The author may be reached as becker@scyld.com, or C/O
-	Scyld Computing Corporation
-	410 Severn Ave., Suite 210
-	Annapolis MD 21403
-
-	Issues remaining:
-	People are making PCI ne2000 clones! Oh the horror, the horror...
-	Limited full-duplex support.
-*/
+/* A Linux device driver for PCI NE2000 clones.
+ *
+ * Authors and other copyright holders:
+ * 1992-2000 by Donald Becker, NE2000 core and various modifications.
+ * 1995-1998 by Paul Gortmaker, core modifications and PCI support.
+ * Copyright 1993 assigned to the United States Government as represented
+ * by the Director, National Security Agency.
+ *
+ * This software may be used and distributed according to the terms of
+ * the GNU General Public License (GPL), incorporated herein by reference.
+ * Drivers based on or derived from this code fall under the GPL and must
+ * retain the authorship, copyright and license notice.  This file is not
+ * a complete program and may only be used when the entire operating
+ * system is licensed under the GPL.
+ *
+ * The author may be reached as becker@scyld.com, or C/O
+ * Scyld Computing Corporation
+ * 410 Severn Ave., Suite 210
+ * Annapolis MD 21403
+ *
+ * Issues remaining:
+ * People are making PCI NE2000 clones! Oh the horror, the horror...
+ * Limited full-duplex support.
+ */
 
 #define DRV_NAME	"ne2k-pci"
+#define DRV_DESCRIPTION	"PCI NE2000 clone driver"
+#define DRV_AUTHOR	"Donald Becker / Paul Gortmaker"
 #define DRV_VERSION	"1.03"
 #define DRV_RELDATE	"9/22/2003"
 
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
 
 /* The user-configurable values.
-   These may be modified when a driver module is loaded.*/
+ * These may be modified when a driver module is loaded.
+ */
+
+/* More are supported, limit only on options */
+#define MAX_UNITS 8
 
-#define MAX_UNITS 8				/* More are supported, limit only on options */
 /* Used to pass the full-duplex flag, etc. */
 static int full_duplex[MAX_UNITS];
 static int options[MAX_UNITS];
@@ -52,7 +56,7 @@ static int options[MAX_UNITS];
 #include <linux/netdevice.h>
 #include <linux/etherdevice.h>
 
-#include <asm/io.h>
+#include <linux/io.h>
 #include <asm/irq.h>
 #include <linux/uaccess.h>
 
@@ -60,20 +64,14 @@ static int options[MAX_UNITS];
 
 static u32 ne2k_msg_enable;
 
-/* These identify the driver base version and may not be removed. */
-static const char version[] =
-	KERN_INFO DRV_NAME ".c:v" DRV_VERSION " " DRV_RELDATE
-	" D. Becker/P. Gortmaker\n";
-
 #if defined(__powerpc__)
 #define inl_le(addr)  le32_to_cpu(inl(addr))
 #define inw_le(addr)  le16_to_cpu(inw(addr))
 #endif
 
-#define PFX DRV_NAME ": "
-
-MODULE_AUTHOR("Donald Becker / Paul Gortmaker");
-MODULE_DESCRIPTION("PCI NE2000 clone driver");
+MODULE_AUTHOR(DRV_AUTHOR);
+MODULE_DESCRIPTION(DRV_DESCRIPTION);
+MODULE_VERSION(DRV_VERSION);
 MODULE_LICENSE("GPL");
 
 module_param_named(msg_enable, ne2k_msg_enable, uint, 0444);
@@ -83,7 +81,8 @@ MODULE_PARM_DESC(msg_enable, "Debug message level (see linux/netdevice.h for bit
 MODULE_PARM_DESC(options, "Bit 5: full duplex");
 MODULE_PARM_DESC(full_duplex, "full duplex setting(s) (1)");
 
-/* Some defines that people can play with if so inclined. */
+/* Some defines that people can play with if so inclined.
+ */
 
 /* Use 32 bit data-movement operations instead of 16 bit. */
 #define USE_LONGIO
@@ -91,14 +90,18 @@ MODULE_PARM_DESC(full_duplex, "full duplex setting(s) (1)");
 /* Do we implement the read before write bugfix ? */
 /* #define NE_RW_BUGFIX */
 
-/* Flags.  We rename an existing ei_status field to store flags! */
-/* Thus only the low 8 bits are usable for non-init-time flags. */
+/* Flags.  We rename an existing ei_status field to store flags!
+ * Thus only the low 8 bits are usable for non-init-time flags.
+ */
 #define ne2k_flags reg0
+
 enum {
-	ONLY_16BIT_IO=8, ONLY_32BIT_IO=4,	/* Chip can do only 16/32-bit xfers. */
-	FORCE_FDX=0x20,						/* User override. */
-	REALTEK_FDX=0x40, HOLTEK_FDX=0x80,
-	STOP_PG_0x60=0x100,
+	/* Chip can do only 16/32-bit xfers. */
+	ONLY_16BIT_IO = 8, ONLY_32BIT_IO = 4,
+	/* User override. */
+	FORCE_FDX = 0x20,
+	REALTEK_FDX = 0x40, HOLTEK_FDX = 0x80,
+	STOP_PG_0x60 = 0x100,
 };
 
 enum ne2k_pci_chipsets {
@@ -120,7 +123,7 @@ static struct {
 	char *name;
 	int flags;
 } pci_clone_list[] = {
-	{"RealTek RTL-8029", REALTEK_FDX},
+	{"RealTek RTL-8029(AS)", REALTEK_FDX},
 	{"Winbond 89C940", 0},
 	{"Compex RL2000", 0},
 	{"KTI ET32P2", 0},
@@ -149,13 +152,14 @@ static const struct pci_device_id ne2k_pci_tbl[] = {
 	{ 0x8c4a, 0x1980, PCI_ANY_ID, PCI_ANY_ID, 0, 0, CH_Winbond_89C940_8c4a },
 	{ 0, }
 };
+
 MODULE_DEVICE_TABLE(pci, ne2k_pci_tbl);
 
 
 /* ---- No user-serviceable parts below ---- */
 
 #define NE_BASE	 (dev->base_addr)
-#define NE_CMD	 	0x00
+#define NE_CMD		0x00
 #define NE_DATAPORT	0x10	/* NatSemi-defined port window offset. */
 #define NE_RESET	0x1f	/* Issue a read to reset, a write to clear. */
 #define NE_IO_EXTENT	0x20
@@ -168,18 +172,20 @@ static int ne2k_pci_open(struct net_device *dev);
 static int ne2k_pci_close(struct net_device *dev);
 
 static void ne2k_pci_reset_8390(struct net_device *dev);
-static void ne2k_pci_get_8390_hdr(struct net_device *dev, struct e8390_pkt_hdr *hdr,
-			  int ring_page);
+static void ne2k_pci_get_8390_hdr(struct net_device *dev,
+				  struct e8390_pkt_hdr *hdr, int ring_page);
 static void ne2k_pci_block_input(struct net_device *dev, int count,
-			  struct sk_buff *skb, int ring_offset);
+				 struct sk_buff *skb, int ring_offset);
 static void ne2k_pci_block_output(struct net_device *dev, const int count,
-		const unsigned char *buf, const int start_page);
+				  const unsigned char *buf,
+				  const int start_page);
 static const struct ethtool_ops ne2k_pci_ethtool_ops;
 
 
 /* There is no room in the standard 8390 structure for extra info we need,
-   so we build a meta/outer-wrapper structure.. */
+ * so we build a meta/outer-wrapper structure..
+ */
 struct ne2k_pci_card {
 	struct net_device *dev;
 	struct pci_dev *pci_dev;
@@ -187,18 +193,17 @@ struct ne2k_pci_card {
 
 
-/*
-  NEx000-clone boards have a Station Address (SA) PROM (SAPROM) in the packet
-  buffer memory space.  By-the-spec NE2000 clones have 0x57,0x57 in bytes
-  0x0e,0x0f of the SAPROM, while other supposed NE2000 clones must be
-  detected by their SA prefix.
-
-  Reading the SAPROM from a word-wide card with the 8390 set in byte-wide
-  mode results in doubled values, which can be detected and compensated for.
-
-  The probe is also responsible for initializing the card and filling
-  in the 'dev' and 'ei_status' structures.
-*/
+/* NEx000-clone boards have a Station Address (SA) PROM (SAPROM) in the packet
+ * buffer memory space.  By-the-spec NE2000 clones have 0x57,0x57 in bytes
+ * 0x0e,0x0f of the SAPROM, while other supposed NE2000 clones must be
+ * detected by their SA prefix.
+ *
+ * Reading the SAPROM from a word-wide card with the 8390 set in byte-wide
+ * mode results in doubled values, which can be detected and compensated for.
+ *
+ * The probe is also responsible for initializing the card and filling
+ * in the 'dev' and 'ei_status' structures.
+ */
 
 static const struct net_device_ops ne2k_netdev_ops = {
 	.ndo_open		= ne2k_pci_open,
@@ -208,7 +213,7 @@ static const struct net_device_ops ne2k_netdev_ops = {
 	.ndo_get_stats		= ei_get_stats,
 	.ndo_set_rx_mode	= ei_set_multicast_list,
 	.ndo_validate_addr	= eth_validate_addr,
-	.ndo_set_mac_address 	= eth_mac_addr,
+	.ndo_set_mac_address	= eth_mac_addr,
 #ifdef CONFIG_NET_POLL_CONTROLLER
 	.ndo_poll_controller = ei_poll,
 #endif
@@ -227,28 +232,21 @@ static int ne2k_pci_init_one(struct pci_dev *pdev,
 	int flags = pci_clone_list[chip_idx].flags;
 	struct ei_device *ei_local;
 
-/* when built into the kernel, we only print version if device is found */
-#ifndef MODULE
-	static int printed_version;
-	if (!printed_version++)
-		printk(version);
-#endif
-
 	fnd_cnt++;
 
-	i = pci_enable_device (pdev);
+	i = pci_enable_device(pdev);
 	if (i)
 		return i;
 
-	ioaddr = pci_resource_start (pdev, 0);
+	ioaddr = pci_resource_start(pdev, 0);
 	irq = pdev->irq;
 
-	if (!ioaddr || ((pci_resource_flags (pdev, 0) & IORESOURCE_IO) == 0)) {
+	if (!ioaddr || ((pci_resource_flags(pdev, 0) & IORESOURCE_IO) == 0)) {
 		dev_err(&pdev->dev, "no I/O resource at PCI BAR #0\n");
 		goto err_out;
 	}
 
-	if (request_region (ioaddr, NE_IO_EXTENT, DRV_NAME) == NULL) {
+	if (!request_region(ioaddr, NE_IO_EXTENT, DRV_NAME)) {
 		dev_err(&pdev->dev, "I/O resource 0x%x @ 0x%lx busy\n",
 			NE_IO_EXTENT, ioaddr);
 		goto err_out;
@@ -261,14 +259,17 @@ static int ne2k_pci_init_one(struct pci_dev *pdev,
 	/* Do a preliminary verification that we have a 8390. */
 	{
 		int regd;
-		outb(E8390_NODMA+E8390_PAGE1+E8390_STOP, ioaddr + E8390_CMD);
+
+		outb(E8390_NODMA + E8390_PAGE1 + E8390_STOP, ioaddr + E8390_CMD);
 		regd = inb(ioaddr + 0x0d);
 		outb(0xff, ioaddr + 0x0d);
-		outb(E8390_NODMA+E8390_PAGE0, ioaddr + E8390_CMD);
-		inb(ioaddr + EN0_COUNTER0); /* Clear the counter by reading. */
+		outb(E8390_NODMA + E8390_PAGE0, ioaddr + E8390_CMD);
+		/* Clear the counter by reading. */
+		inb(ioaddr + EN0_COUNTER0);
 		if (inb(ioaddr + EN0_COUNTER0) != 0) {
 			outb(reg0, ioaddr);
-			outb(regd, ioaddr + 0x0d);	/* Restore the old values. */
+			/*  Restore the old values. */
+			outb(regd, ioaddr + 0x0d);
 			goto err_out_free_res;
 		}
 	}
@@ -291,9 +292,9 @@ static int ne2k_pci_init_one(struct pci_dev *pdev,
 
 		outb(inb(ioaddr + NE_RESET), ioaddr + NE_RESET);
 
-		/* This looks like a horrible timing loop, but it should never take
-		   more than a few cycles.
-		*/
+		/* This looks like a horrible timing loop, but it should never
+		 * take more than a few cycles.
+		 */
 		while ((inb(ioaddr + EN0_ISR) & ENISR_RESET) == 0)
 			/* Limit wait: '2' avoids jiffy roll-over. */
 			if (jiffies - reset_start_time > 2) {
@@ -301,42 +302,53 @@ static int ne2k_pci_init_one(struct pci_dev *pdev,
 					"Card failure (no reset ack).\n");
 				goto err_out_free_netdev;
 			}
-
-		outb(0xff, ioaddr + EN0_ISR);		/* Ack all intr. */
+		/* Ack all intr. */
+		outb(0xff, ioaddr + EN0_ISR);
 	}
 
 	/* Read the 16 bytes of station address PROM.
-	   We must first initialize registers, similar to NS8390_init(eifdev, 0).
-	   We can't reliably read the SAPROM address without this.
-	   (I learned the hard way!). */
+	 * We must first initialize registers, similar
+	 * to NS8390_init(eifdev, 0).
+	 * We can't reliably read the SAPROM address without this.
+	 * (I learned the hard way!).
+	 */
 	{
 		struct {unsigned char value, offset; } program_seq[] = {
-			{E8390_NODMA+E8390_PAGE0+E8390_STOP, E8390_CMD}, /* Select page 0*/
-			{0x49,	EN0_DCFG},	/* Set word-wide access. */
-			{0x00,	EN0_RCNTLO},	/* Clear the count regs. */
+			/* Select page 0 */
+			{E8390_NODMA + E8390_PAGE0 + E8390_STOP, E8390_CMD},
+			/* Set word-wide access */
+			{0x49,	EN0_DCFG},
+			/* Clear the count regs. */
+			{0x00,	EN0_RCNTLO},
+			/* Mask completion IRQ */
 			{0x00,	EN0_RCNTHI},
-			{0x00,	EN0_IMR},	/* Mask completion irq. */
+			{0x00,	EN0_IMR},
 			{0xFF,	EN0_ISR},
-			{E8390_RXOFF, EN0_RXCR},	/* 0x20  Set to monitor */
-			{E8390_TXOFF, EN0_TXCR},	/* 0x02  and loopback mode. */
+			/* 0x20 Set to monitor */
+			{E8390_RXOFF, EN0_RXCR},
+			/* 0x02 and loopback mode */
+			{E8390_TXOFF, EN0_TXCR},
 			{32,	EN0_RCNTLO},
 			{0x00,	EN0_RCNTHI},
-			{0x00,	EN0_RSARLO},	/* DMA starting at 0x0000. */
+			/* DMA starting at 0x0000 */
+			{0x00,	EN0_RSARLO},
 			{0x00,	EN0_RSARHI},
 			{E8390_RREAD+E8390_START, E8390_CMD},
 		};
 		for (i = 0; i < ARRAY_SIZE(program_seq); i++)
-			outb(program_seq[i].value, ioaddr + program_seq[i].offset);
+			outb(program_seq[i].value,
+			     ioaddr + program_seq[i].offset);
 
 	}
 
 	/* Note: all PCI cards have at least 16 bit access, so we don't have
-	   to check for 8 bit cards.  Most cards permit 32 bit access. */
+	 * to check for 8 bit cards.  Most cards permit 32 bit access.
+	 */
 	if (flags & ONLY_32BIT_IO) {
 		for (i = 0; i < 4 ; i++)
 			((u32 *)SA_prom)[i] = le32_to_cpu(inl(ioaddr + NE_DATAPORT));
 	} else
-		for(i = 0; i < 32 /*sizeof(SA_prom)*/; i++)
+		for (i = 0; i < 32 /* sizeof(SA_prom )*/; i++)
 			SA_prom[i] = inb(ioaddr + NE_DATAPORT);
 
 	/* We always set the 8390 registers for word mode. */
@@ -356,7 +368,7 @@ static int ne2k_pci_init_one(struct pci_dev *pdev,
 	ei_status.word16 = 1;
 	ei_status.ne2k_flags = flags;
 	if (fnd_cnt < MAX_UNITS) {
-		if (full_duplex[fnd_cnt] > 0  ||  (options[fnd_cnt] & FORCE_FDX))
+		if (full_duplex[fnd_cnt] > 0 || (options[fnd_cnt] & FORCE_FDX))
 			ei_status.ne2k_flags |= FORCE_FDX;
 	}
 
@@ -388,16 +400,15 @@ static int ne2k_pci_init_one(struct pci_dev *pdev,
 	return 0;
 
 err_out_free_netdev:
-	free_netdev (dev);
+	free_netdev(dev);
 err_out_free_res:
-	release_region (ioaddr, NE_IO_EXTENT);
+	release_region(ioaddr, NE_IO_EXTENT);
 err_out:
 	pci_disable_device(pdev);
 	return -ENODEV;
 }
 
-/*
- * Magic incantation sequence for full duplex on the supported cards.
+/* Magic incantation sequence for full duplex on the supported cards.
  */
 static inline int set_realtek_fdx(struct net_device *dev)
 {
@@ -431,7 +442,9 @@ static int ne2k_pci_set_fdx(struct net_device *dev)
 
 static int ne2k_pci_open(struct net_device *dev)
 {
-	int ret = request_irq(dev->irq, ei_interrupt, IRQF_SHARED, dev->name, dev);
+	int ret = request_irq(dev->irq, ei_interrupt, IRQF_SHARED,
+			      dev->name, dev);
+
 	if (ret)
 		return ret;
 
@@ -450,7 +463,8 @@ static int ne2k_pci_close(struct net_device *dev)
 }
 
 /* Hard reset the card.  This used to pause for the same period that a
-   8390 reset command required, but that shouldn't be necessary. */
+ * 8390 reset command required, but that shouldn't be necessary.
+ */
 static void ne2k_pci_reset_8390(struct net_device *dev)
 {
 	unsigned long reset_start_time = jiffies;
@@ -467,31 +481,34 @@ static void ne2k_pci_reset_8390(struct net_device *dev)
 	/* This check _should_not_ be necessary, omit eventually. */
 	while ((inb(NE_BASE+EN0_ISR) & ENISR_RESET) == 0)
 		if (jiffies - reset_start_time > 2) {
-			netdev_err(dev, "ne2k_pci_reset_8390() did not complete.\n");
+			netdev_err(dev, "%s did not complete.\n", __func__);
 			break;
 		}
-	outb(ENISR_RESET, NE_BASE + EN0_ISR);	/* Ack intr. */
+	/* Ack intr. */
+	outb(ENISR_RESET, NE_BASE + EN0_ISR);
 }
 
 /* Grab the 8390 specific header. Similar to the block_input routine, but
-   we don't need to be concerned with ring wrap as the header will be at
-   the start of a page, so we optimize accordingly. */
+ * we don't need to be concerned with ring wrap as the header will be at
+ * the start of a page, so we optimize accordingly.
+ */
 
-static void ne2k_pci_get_8390_hdr(struct net_device *dev, struct e8390_pkt_hdr *hdr, int ring_page)
+static void ne2k_pci_get_8390_hdr(struct net_device *dev,
+				  struct e8390_pkt_hdr *hdr, int ring_page)
 {
 
 	long nic_base = dev->base_addr;
 
-	/* This *shouldn't* happen. If it does, it's the last thing you'll see */
+	/* This *shouldn't* happen. If it does, it's the last thing you'll see
+	 */
 	if (ei_status.dmaing) {
-		netdev_err(dev, "DMAing conflict in ne2k_pci_get_8390_hdr "
-			   "[DMAstat:%d][irqlock:%d].\n",
-			   ei_status.dmaing, ei_status.irqlock);
+		netdev_err(dev, "DMAing conflict in %s [DMAstat:%d][irqlock:%d].\n",
+			   __func__, ei_status.dmaing, ei_status.irqlock);
 		return;
 	}
 
 	ei_status.dmaing |= 0x01;
-	outb(E8390_NODMA+E8390_PAGE0+E8390_START, nic_base+ NE_CMD);
+	outb(E8390_NODMA + E8390_PAGE0 + E8390_START, nic_base + NE_CMD);
 	outb(sizeof(struct e8390_pkt_hdr), nic_base + EN0_RCNTLO);
 	outb(0, nic_base + EN0_RCNTHI);
 	outb(0, nic_base + EN0_RSARLO);		/* On page boundary */
@@ -499,20 +516,22 @@ static void ne2k_pci_get_8390_hdr(struct net_device *dev, struct e8390_pkt_hdr *
 	outb(E8390_RREAD+E8390_START, nic_base + NE_CMD);
 
 	if (ei_status.ne2k_flags & ONLY_16BIT_IO) {
-		insw(NE_BASE + NE_DATAPORT, hdr, sizeof(struct e8390_pkt_hdr)>>1);
+		insw(NE_BASE + NE_DATAPORT, hdr,
+		     sizeof(struct e8390_pkt_hdr) >> 1);
 	} else {
-		*(u32*)hdr = le32_to_cpu(inl(NE_BASE + NE_DATAPORT));
+		*(u32 *)hdr = le32_to_cpu(inl(NE_BASE + NE_DATAPORT));
 		le16_to_cpus(&hdr->count);
 	}
-
-	outb(ENISR_RDC, nic_base + EN0_ISR);	/* Ack intr. */
+	/* Ack intr. */
+	outb(ENISR_RDC, nic_base + EN0_ISR);
 	ei_status.dmaing &= ~0x01;
 }
 
 /* Block input and output, similar to the Crynwr packet driver.  If you
-   are porting to a new ethercard, look at the packet driver source for hints.
-   The NEx000 doesn't share the on-board packet memory -- you have to put
-   the packet out through the "remote DMA" dataport using outb. */
+ *are porting to a new ethercard, look at the packet driver source for hints.
+ *The NEx000 doesn't share the on-board packet memory -- you have to put
+ *the packet out through the "remote DMA" dataport using outb.
+ */
 
 static void ne2k_pci_block_input(struct net_device *dev, int count,
 				 struct sk_buff *skb, int ring_offset)
@@ -520,30 +539,30 @@ static void ne2k_pci_block_input(struct net_device *dev, int count,
 	long nic_base = dev->base_addr;
 	char *buf = skb->data;
 
-	/* This *shouldn't* happen. If it does, it's the last thing you'll see */
+	/* This *shouldn't* happen.
+	 * If it does, it's the last thing you'll see.
+	 */
 	if (ei_status.dmaing) {
-		netdev_err(dev, "DMAing conflict in ne2k_pci_block_input "
-			   "[DMAstat:%d][irqlock:%d].\n",
-			   ei_status.dmaing, ei_status.irqlock);
+		netdev_err(dev, "DMAing conflict in %s [DMAstat:%d][irqlock:%d]\n",
+			   __func__, ei_status.dmaing, ei_status.irqlock);
 		return;
 	}
 	ei_status.dmaing |= 0x01;
 	if (ei_status.ne2k_flags & ONLY_32BIT_IO)
 		count = (count + 3) & 0xFFFC;
-	outb(E8390_NODMA+E8390_PAGE0+E8390_START, nic_base+ NE_CMD);
+	outb(E8390_NODMA + E8390_PAGE0 + E8390_START, nic_base + NE_CMD);
 	outb(count & 0xff, nic_base + EN0_RCNTLO);
 	outb(count >> 8, nic_base + EN0_RCNTHI);
 	outb(ring_offset & 0xff, nic_base + EN0_RSARLO);
 	outb(ring_offset >> 8, nic_base + EN0_RSARHI);
-	outb(E8390_RREAD+E8390_START, nic_base + NE_CMD);
+	outb(E8390_RREAD + E8390_START, nic_base + NE_CMD);
 
 	if (ei_status.ne2k_flags & ONLY_16BIT_IO) {
-		insw(NE_BASE + NE_DATAPORT,buf,count>>1);
-		if (count & 0x01) {
+		insw(NE_BASE + NE_DATAPORT, buf, count >> 1);
+		if (count & 0x01)
 			buf[count-1] = inb(NE_BASE + NE_DATAPORT);
-		}
 	} else {
-		insl(NE_BASE + NE_DATAPORT, buf, count>>2);
+		insl(NE_BASE + NE_DATAPORT, buf, count >> 2);
 		if (count & 3) {
 			buf += count & ~3;
 			if (count & 2) {
@@ -556,30 +575,32 @@ static void ne2k_pci_block_input(struct net_device *dev, int count,
 				*buf = inb(NE_BASE + NE_DATAPORT);
 		}
 	}
-
-	outb(ENISR_RDC, nic_base + EN0_ISR);	/* Ack intr. */
+	/* Ack intr. */
+	outb(ENISR_RDC, nic_base + EN0_ISR);
 	ei_status.dmaing &= ~0x01;
 }
 
 static void ne2k_pci_block_output(struct net_device *dev, int count,
-				  const unsigned char *buf, const int start_page)
+		const unsigned char *buf, const int start_page)
 {
 	long nic_base = NE_BASE;
 	unsigned long dma_start;
 
 	/* On little-endian it's always safe to round the count up for
-	   word writes. */
+	 * word writes.
+	 */
 	if (ei_status.ne2k_flags & ONLY_32BIT_IO)
 		count = (count + 3) & 0xFFFC;
 	else
 		if (count & 0x01)
 			count++;
 
-	/* This *shouldn't* happen. If it does, it's the last thing you'll see */
+	/* This *shouldn't* happen.
+	 * If it does, it's the last thing you'll see.
+	 */
 	if (ei_status.dmaing) {
-		netdev_err(dev, "DMAing conflict in ne2k_pci_block_output."
-			   "[DMAstat:%d][irqlock:%d]\n",
-			   ei_status.dmaing, ei_status.irqlock);
+		netdev_err(dev, "DMAing conflict in %s [DMAstat:%d][irqlock:%d]\n",
+			   __func__, ei_status.dmaing, ei_status.irqlock);
 		return;
 	}
 	ei_status.dmaing |= 0x01;
@@ -588,9 +609,10 @@ static void ne2k_pci_block_output(struct net_device *dev, int count,
 
 #ifdef NE8390_RW_BUGFIX
 	/* Handle the read-before-write bug the same way as the
-	   Crynwr packet driver -- the NatSemi method doesn't work.
-	   Actually this doesn't always work either, but if you have
-	   problems with your NEx000 this is better than nothing! */
+	 * Crynwr packet driver -- the NatSemi method doesn't work.
+	 * Actually this doesn't always work either, but if you have
+	 * problems with your NEx000 this is better than nothing!
+	 */
 	outb(0x42, nic_base + EN0_RCNTLO);
 	outb(0x00, nic_base + EN0_RCNTHI);
 	outb(0x42, nic_base + EN0_RSARLO);
@@ -599,16 +621,16 @@ static void ne2k_pci_block_output(struct net_device *dev, int count,
 #endif
 	outb(ENISR_RDC, nic_base + EN0_ISR);
 
-   /* Now the normal output. */
+	/* Now the normal output. */
 	outb(count & 0xff, nic_base + EN0_RCNTLO);
 	outb(count >> 8,   nic_base + EN0_RCNTHI);
 	outb(0x00, nic_base + EN0_RSARLO);
 	outb(start_page, nic_base + EN0_RSARHI);
 	outb(E8390_RWRITE+E8390_START, nic_base + NE_CMD);
 	if (ei_status.ne2k_flags & ONLY_16BIT_IO) {
-		outsw(NE_BASE + NE_DATAPORT, buf, count>>1);
+		outsw(NE_BASE + NE_DATAPORT, buf, count >> 1);
 	} else {
-		outsl(NE_BASE + NE_DATAPORT, buf, count>>2);
+		outsl(NE_BASE + NE_DATAPORT, buf, count >> 2);
 		if (count & 3) {
 			buf += count & ~3;
 			if (count & 2) {
@@ -623,14 +645,15 @@ static void ne2k_pci_block_output(struct net_device *dev, int count,
 	dma_start = jiffies;
 
 	while ((inb(nic_base + EN0_ISR) & ENISR_RDC) == 0)
-		if (jiffies - dma_start > 2) {			/* Avoid clock roll-over. */
+		/* Avoid clock roll-over. */
+		if (jiffies - dma_start > 2) {
 			netdev_warn(dev, "timeout waiting for Tx RDC.\n");
 			ne2k_pci_reset_8390(dev);
-			NS8390_init(dev,1);
+			NS8390_init(dev, 1);
 			break;
 		}
-
-	outb(ENISR_RDC, nic_base + EN0_ISR);	/* Ack intr. */
+	/* Ack intr. */
+	outb(ENISR_RDC, nic_base + EN0_ISR);
 	ei_status.dmaing &= ~0x01;
 }
 
@@ -640,9 +663,9 @@ static void ne2k_pci_get_drvinfo(struct net_device *dev,
 	struct ei_device *ei = netdev_priv(dev);
 	struct pci_dev *pci_dev = (struct pci_dev *) ei->priv;
 
-	strlcpy(info->driver, DRV_NAME, sizeof(info->driver));
-	strlcpy(info->version, DRV_VERSION, sizeof(info->version));
-	strlcpy(info->bus_info, pci_name(pci_dev), sizeof(info->bus_info));
+	strscpy(info->driver, DRV_NAME, sizeof(info->driver));
+	strscpy(info->version, DRV_VERSION, sizeof(info->version));
+	strscpy(info->bus_info, pci_name(pci_dev), sizeof(info->bus_info));
 }
 
 static u32 ne2k_pci_get_msglevel(struct net_device *dev)
@@ -677,9 +700,9 @@ static void ne2k_pci_remove_one(struct pci_dev *pdev)
 }
 
 #ifdef CONFIG_PM
-static int ne2k_pci_suspend (struct pci_dev *pdev, pm_message_t state)
+static int ne2k_pci_suspend(struct pci_dev *pdev, pm_message_t state)
 {
-	struct net_device *dev = pci_get_drvdata (pdev);
+	struct net_device *dev = pci_get_drvdata(pdev);
 
 	netif_device_detach(dev);
 	pci_save_state(pdev);
@@ -689,9 +712,9 @@ static int ne2k_pci_suspend (struct pci_dev *pdev, pm_message_t state)
 	return 0;
 }
 
-static int ne2k_pci_resume (struct pci_dev *pdev)
+static int ne2k_pci_resume(struct pci_dev *pdev)
 {
-	struct net_device *dev = pci_get_drvdata (pdev);
+	struct net_device *dev = pci_get_drvdata(pdev);
 	int rc;
 
 	pci_set_power_state(pdev, PCI_D0);
@@ -718,24 +741,20 @@ static struct pci_driver ne2k_driver = {
 #ifdef CONFIG_PM
 	.suspend	= ne2k_pci_suspend,
 	.resume		= ne2k_pci_resume,
-#endif /* CONFIG_PM */
+#endif
 
 };
 
 
 static int __init ne2k_pci_init(void)
 {
-/* when a module, this is printed whether or not devices are found in probe */
-#ifdef MODULE
-	printk(version);
-#endif
 	return pci_register_driver(&ne2k_driver);
 }
 
 
 static void __exit ne2k_pci_cleanup(void)
 {
-	pci_unregister_driver (&ne2k_driver);
+	pci_unregister_driver(&ne2k_driver);
 }
 
 module_init(ne2k_pci_init);
-- 
cgit v1.2.3-59-g8ed1b


From a331172b156b23e83dfb556ade0ca23426c3f149 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Wed, 27 May 2020 00:21:37 +0200
Subject: net: ethtool: Add attributes for cable test TDR data

Some Ethernet PHYs can return the raw time domain reflectromatry data.
Add the attributes to allow this data to be requested and returned via
netlink ethtool.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>

v2:
m -> cm
Report what the PHY actually used for start/stop/step.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ethtool-netlink.rst | 81 ++++++++++++++++++++++++++++
 include/uapi/linux/ethtool_netlink.h         | 63 ++++++++++++++++++++++
 2 files changed, 144 insertions(+)

diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst
index 7e651ea33eab..dae36227d590 100644
--- a/Documentation/networking/ethtool-netlink.rst
+++ b/Documentation/networking/ethtool-netlink.rst
@@ -205,6 +205,7 @@ Userspace to kernel:
   ``ETHTOOL_MSG_EEE_SET``               set EEE settings
   ``ETHTOOL_MSG_TSINFO_GET``		get timestamping info
   ``ETHTOOL_MSG_CABLE_TEST_ACT``        action start cable test
+  ``ETHTOOL_MSG_CABLE_TEST_TDR_ACT``    action start raw TDR cable test
   ===================================== ================================
 
 Kernel to userspace:
@@ -237,6 +238,7 @@ Kernel to userspace:
   ``ETHTOOL_MSG_EEE_NTF``               EEE settings
   ``ETHTOOL_MSG_TSINFO_GET_REPLY``	timestamping info
   ``ETHTOOL_MSG_CABLE_TEST_NTF``        Cable test results
+  ``ETHTOOL_MSG_CABLE_TEST_TDR_NTF``    Cable test TDR results
   ===================================== =================================
 
 ``GET`` requests are sent by userspace applications to retrieve device
@@ -1014,6 +1016,84 @@ information.
  | | | ``ETHTOOL_A_CABLE_FAULT_LENGTH_CM``     | u32    | length in cm        |
  +-+-+-----------------------------------------+--------+---------------------+
 
+CABLE_TEST TDR
+==============
+
+Start a cable test and report raw TDR data
+
+Request contents:
+
+  ====================================  ======  ==========================
+  ``ETHTOOL_A_CABLE_TEST_TDR_HEADER``   nested  request header
+  ====================================  ======  ==========================
+
+Notification contents:
+
+Raw TDR data is gathered by sending a pulse down the cable and
+recording the amplitude of the reflected pulse for a given distance.
+
+It can take a number of seconds to collect TDR data, especial if the
+full 100 meters is probed at 1 meter intervals. When the test is
+started a notification will be sent containing just
+ETHTOOL_A_CABLE_TEST_TDR_STATUS with the value
+ETHTOOL_A_CABLE_TEST_NTF_STATUS_STARTED.
+
+When the test has completed a second notification will be sent
+containing ETHTOOL_A_CABLE_TEST_TDR_STATUS with the value
+ETHTOOL_A_CABLE_TEST_NTF_STATUS_COMPLETED and the TDR data.
+
+The message may optionally contain the amplitude of the pulse send
+down the cable. This is measured in mV. A reflection should not be
+bigger than transmitted pulse.
+
+Before the raw TDR data should be an ETHTOOL_A_CABLE_TDR_NEST_STEP
+nest containing information about the distance along the cable for the
+first reading, the last reading, and the step between each
+reading. Distances are measured in centimeters. These should be the
+exact values the PHY used. These may be different to what the user
+requested, if the native measurement resolution is greater than 1 cm.
+
+For each step along the cable, a ETHTOOL_A_CABLE_TDR_NEST_AMPLITUDE is
+used to report the amplitude of the reflection for a given pair.
+
+ +---------------------------------------------+--------+----------------------+
+ | ``ETHTOOL_A_CABLE_TEST_TDR_HEADER``         | nested | reply header         |
+ +---------------------------------------------+--------+----------------------+
+ | ``ETHTOOL_A_CABLE_TEST_TDR_STATUS``         | u8     | completed            |
+ +---------------------------------------------+--------+----------------------+
+ | ``ETHTOOL_A_CABLE_TEST_TDR_NTF_NEST``       | nested | all the results      |
+ +-+-------------------------------------------+--------+----------------------+
+ | | ``ETHTOOL_A_CABLE_TDR_NEST_PULSE``        | nested | TX Pulse amplitude   |
+ +-+-+-----------------------------------------+--------+----------------------+
+ | | | ``ETHTOOL_A_CABLE_PULSE_mV``            | s16    | Pulse amplitude      |
+ +-+-+-----------------------------------------+--------+----------------------+
+ | | ``ETHTOOL_A_CABLE_NEST_STEP``             | nested | TDR step info        |
+ +-+-+-----------------------------------------+--------+----------------------+
+ | | | ``ETHTOOL_A_CABLE_STEP_FIRST_DISTANCE ``| u32    | First data distance  |
+ +-+-+-----------------------------------------+--------+----------------------+
+ | | | ``ETHTOOL_A_CABLE_STEP_LAST_DISTANCE `` | u32    | Last data distance   |
+ +-+-+-----------------------------------------+--------+----------------------+
+ | | | ``ETHTOOL_A_CABLE_STEP_STEP_DISTANCE `` | u32    | distance of each step|
+ +-+-+-----------------------------------------+--------+----------------------+
+ | | ``ETHTOOL_A_CABLE_TDR_NEST_AMPLITUDE``    | nested | Reflection amplitude |
+ +-+-+-----------------------------------------+--------+----------------------+
+ | | | ``ETHTOOL_A_CABLE_RESULTS_PAIR``        | u8     | pair number          |
+ +-+-+-----------------------------------------+--------+----------------------+
+ | | | ``ETHTOOL_A_CABLE_AMPLITUDE_mV``        | s16    | Reflection amplitude |
+ +-+-+-----------------------------------------+--------+----------------------+
+ | | ``ETHTOOL_A_CABLE_TDR_NEST_AMPLITUDE``    | nested | Reflection amplitude |
+ +-+-+-----------------------------------------+--------+----------------------+
+ | | | ``ETHTOOL_A_CABLE_RESULTS_PAIR``        | u8     | pair number          |
+ +-+-+-----------------------------------------+--------+----------------------+
+ | | | ``ETHTOOL_A_CABLE_AMPLITUDE_mV``        | s16    | Reflection amplitude |
+ +-+-+-----------------------------------------+--------+----------------------+
+ | | ``ETHTOOL_A_CABLE_TDR_NEST_AMPLITUDE``    | nested | Reflection amplitude |
+ +-+-+-----------------------------------------+--------+----------------------+
+ | | | ``ETHTOOL_A_CABLE_RESULTS_PAIR``        | u8     | pair number          |
+ +-+-+-----------------------------------------+--------+----------------------+
+ | | | ``ETHTOOL_A_CABLE_AMPLITUDE_mV``        | s16    | Reflection amplitude |
+ +-+-+-----------------------------------------+--------+----------------------+
+
 Request translation
 ===================
 
@@ -1110,4 +1190,5 @@ are netlink only.
   ``ETHTOOL_GFECPARAM``               n/a
   ``ETHTOOL_SFECPARAM``               n/a
   n/a                                 ''ETHTOOL_MSG_CABLE_TEST_ACT''
+  n/a                                 ''ETHTOOL_MSG_CABLE_TEST_TDR_ACT''
   =================================== =====================================
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index e6f109b76c9a..739faa7070c6 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -40,6 +40,7 @@ enum {
 	ETHTOOL_MSG_EEE_SET,
 	ETHTOOL_MSG_TSINFO_GET,
 	ETHTOOL_MSG_CABLE_TEST_ACT,
+	ETHTOOL_MSG_CABLE_TEST_TDR_ACT,
 
 	/* add new constants above here */
 	__ETHTOOL_MSG_USER_CNT,
@@ -76,6 +77,7 @@ enum {
 	ETHTOOL_MSG_EEE_NTF,
 	ETHTOOL_MSG_TSINFO_GET_REPLY,
 	ETHTOOL_MSG_CABLE_TEST_NTF,
+	ETHTOOL_MSG_CABLE_TEST_TDR_NTF,
 
 	/* add new constants above here */
 	__ETHTOOL_MSG_KERNEL_CNT,
@@ -478,6 +480,67 @@ enum {
 	ETHTOOL_A_CABLE_TEST_NTF_MAX = (__ETHTOOL_A_CABLE_TEST_NTF_CNT - 1)
 };
 
+/* CABLE TEST TDR */
+
+enum {
+	ETHTOOL_A_CABLE_TEST_TDR_UNSPEC,
+	ETHTOOL_A_CABLE_TEST_TDR_HEADER,	/* nest - _A_HEADER_* */
+
+	/* add new constants above here */
+	__ETHTOOL_A_CABLE_TEST_TDR_CNT,
+	ETHTOOL_A_CABLE_TEST_TDR_MAX = __ETHTOOL_A_CABLE_TEST_TDR_CNT - 1
+};
+
+/* CABLE TEST TDR NOTIFY */
+
+enum {
+	ETHTOOL_A_CABLE_AMPLITUDE_UNSPEC,
+	ETHTOOL_A_CABLE_AMPLITUDE_PAIR,         /* u8 */
+	ETHTOOL_A_CABLE_AMPLITUDE_mV,           /* s16 */
+
+	__ETHTOOL_A_CABLE_AMPLITUDE_CNT,
+	ETHTOOL_A_CABLE_AMPLITUDE_MAX = (__ETHTOOL_A_CABLE_AMPLITUDE_CNT - 1)
+};
+
+enum {
+	ETHTOOL_A_CABLE_PULSE_UNSPEC,
+	ETHTOOL_A_CABLE_PULSE_mV,		/* s16 */
+
+	__ETHTOOL_A_CABLE_PULSE_CNT,
+	ETHTOOL_A_CABLE_PULSE_MAX = (__ETHTOOL_A_CABLE_PULSE_CNT - 1)
+};
+
+enum {
+	ETHTOOL_A_CABLE_STEP_UNSPEC,
+	ETHTOOL_A_CABLE_STEP_FIRST_DISTANCE,	/* u32 */
+	ETHTOOL_A_CABLE_STEP_LAST_DISTANCE,	/* u32 */
+	ETHTOOL_A_CABLE_STEP_STEP_DISTANCE,	/* u32 */
+
+	__ETHTOOL_A_CABLE_STEP_CNT,
+	ETHTOOL_A_CABLE_STEP_MAX = (__ETHTOOL_A_CABLE_STEP_CNT - 1)
+};
+
+enum {
+	ETHTOOL_A_CABLE_TDR_NEST_UNSPEC,
+	ETHTOOL_A_CABLE_TDR_NEST_STEP,		/* nest - ETHTTOOL_A_CABLE_STEP */
+	ETHTOOL_A_CABLE_TDR_NEST_AMPLITUDE,	/* nest - ETHTOOL_A_CABLE_AMPLITUDE */
+	ETHTOOL_A_CABLE_TDR_NEST_PULSE,		/* nest - ETHTOOL_A_CABLE_PULSE */
+
+	__ETHTOOL_A_CABLE_TDR_NEST_CNT,
+	ETHTOOL_A_CABLE_TDR_NEST_MAX = (__ETHTOOL_A_CABLE_TDR_NEST_CNT - 1)
+};
+
+enum {
+	ETHTOOL_A_CABLE_TEST_TDR_NTF_UNSPEC,
+	ETHTOOL_A_CABLE_TEST_TDR_NTF_HEADER,	/* nest - ETHTOOL_A_HEADER_* */
+	ETHTOOL_A_CABLE_TEST_TDR_NTF_STATUS,	/* u8 - _STARTED/_COMPLETE */
+	ETHTOOL_A_CABLE_TEST_TDR_NTF_NEST,	/* nest - of results: */
+
+	/* add new constants above here */
+	__ETHTOOL_A_CABLE_TEST_TDR_NTF_CNT,
+	ETHTOOL_A_CABLE_TEST_TDR_NTF_MAX = __ETHTOOL_A_CABLE_TEST_TDR_NTF_CNT - 1
+};
+
 /* generic netlink info */
 #define ETHTOOL_GENL_NAME "ethtool"
 #define ETHTOOL_GENL_VERSION 1
-- 
cgit v1.2.3-59-g8ed1b


From 1a644de29f712771c2ec00e52caa391544eb6141 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Wed, 27 May 2020 00:21:38 +0200
Subject: net: ethtool: Add generic parts of cable test TDR

Add the generic parts of the code used to trigger a cable test and
return raw TDR data. Any PHY driver which support this must implement
the new driver op.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>

v2
Update nxp-tja11xx for API change.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/nxp-tja11xx.c   |  2 +-
 drivers/net/phy/phy.c           | 65 ++++++++++++++++++++++++++++++++++++++++-
 include/linux/ethtool_netlink.h |  4 +--
 include/linux/phy.h             | 13 +++++++++
 net/ethtool/cabletest.c         | 64 ++++++++++++++++++++++++++++++++++++----
 net/ethtool/netlink.c           |  5 ++++
 net/ethtool/netlink.h           |  1 +
 7 files changed, 144 insertions(+), 10 deletions(-)

diff --git a/drivers/net/phy/nxp-tja11xx.c b/drivers/net/phy/nxp-tja11xx.c
index 1e79c30ca81a..a72fa0d2e7c7 100644
--- a/drivers/net/phy/nxp-tja11xx.c
+++ b/drivers/net/phy/nxp-tja11xx.c
@@ -194,7 +194,7 @@ static int tja11xx_config_aneg_cable_test(struct phy_device *phydev)
 	    !phydev->drv->cable_test_get_status)
 		return 0;
 
-	ret = ethnl_cable_test_alloc(phydev);
+	ret = ethnl_cable_test_alloc(phydev, ETHTOOL_MSG_CABLE_TEST_NTF);
 	if (ret)
 		return ret;
 
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 27da0c94818f..495d9ba3d5bf 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -519,7 +519,7 @@ int phy_start_cable_test(struct phy_device *phydev,
 		goto out;
 	}
 
-	err = ethnl_cable_test_alloc(phydev);
+	err = ethnl_cable_test_alloc(phydev, ETHTOOL_MSG_CABLE_TEST_NTF);
 	if (err)
 		goto out;
 
@@ -552,6 +552,69 @@ out:
 }
 EXPORT_SYMBOL(phy_start_cable_test);
 
+int phy_start_cable_test_tdr(struct phy_device *phydev,
+			     struct netlink_ext_ack *extack)
+{
+	struct net_device *dev = phydev->attached_dev;
+	int err = -ENOMEM;
+
+	if (!(phydev->drv &&
+	      phydev->drv->cable_test_tdr_start &&
+	      phydev->drv->cable_test_get_status)) {
+		NL_SET_ERR_MSG(extack,
+			       "PHY driver does not support cable test TDR");
+		return -EOPNOTSUPP;
+	}
+
+	mutex_lock(&phydev->lock);
+	if (phydev->state == PHY_CABLETEST) {
+		NL_SET_ERR_MSG(extack,
+			       "PHY already performing a test");
+		err = -EBUSY;
+		goto out;
+	}
+
+	if (phydev->state < PHY_UP ||
+	    phydev->state > PHY_CABLETEST) {
+		NL_SET_ERR_MSG(extack,
+			       "PHY not configured. Try setting interface up");
+		err = -EBUSY;
+		goto out;
+	}
+
+	err = ethnl_cable_test_alloc(phydev, ETHTOOL_MSG_CABLE_TEST_TDR_NTF);
+	if (err)
+		goto out;
+
+	/* Mark the carrier down until the test is complete */
+	phy_link_down(phydev);
+
+	netif_testing_on(dev);
+	err = phydev->drv->cable_test_tdr_start(phydev);
+	if (err) {
+		netif_testing_off(dev);
+		phy_link_up(phydev);
+		goto out_free;
+	}
+
+	phydev->state = PHY_CABLETEST;
+
+	if (phy_polling_mode(phydev))
+		phy_trigger_machine(phydev);
+
+	mutex_unlock(&phydev->lock);
+
+	return 0;
+
+out_free:
+	ethnl_cable_test_free(phydev);
+out:
+	mutex_unlock(&phydev->lock);
+
+	return err;
+}
+EXPORT_SYMBOL(phy_start_cable_test_tdr);
+
 static int phy_config_aneg(struct phy_device *phydev)
 {
 	if (phydev->drv->config_aneg)
diff --git a/include/linux/ethtool_netlink.h b/include/linux/ethtool_netlink.h
index e317fc99565e..24817ba252a0 100644
--- a/include/linux/ethtool_netlink.h
+++ b/include/linux/ethtool_netlink.h
@@ -17,13 +17,13 @@ enum ethtool_multicast_groups {
 struct phy_device;
 
 #if IS_ENABLED(CONFIG_ETHTOOL_NETLINK)
-int ethnl_cable_test_alloc(struct phy_device *phydev);
+int ethnl_cable_test_alloc(struct phy_device *phydev, u8 cmd);
 void ethnl_cable_test_free(struct phy_device *phydev);
 void ethnl_cable_test_finished(struct phy_device *phydev);
 int ethnl_cable_test_result(struct phy_device *phydev, u8 pair, u8 result);
 int ethnl_cable_test_fault_length(struct phy_device *phydev, u8 pair, u32 cm);
 #else
-static inline int ethnl_cable_test_alloc(struct phy_device *phydev)
+static inline int ethnl_cable_test_alloc(struct phy_device *phydev, u8 cmd)
 {
 	return -EOPNOTSUPP;
 }
diff --git a/include/linux/phy.h b/include/linux/phy.h
index 6d256e720a66..d3c384f353ca 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -699,6 +699,10 @@ struct phy_driver {
 
 	/* Start a cable test */
 	int (*cable_test_start)(struct phy_device *dev);
+
+	/* Start a raw TDR cable test */
+	int (*cable_test_tdr_start)(struct phy_device *dev);
+
 	/* Once per second, or on interrupt, request the status of the
 	 * test.
 	 */
@@ -1251,6 +1255,8 @@ int phy_reset_after_clk_enable(struct phy_device *phydev);
 #if IS_ENABLED(CONFIG_PHYLIB)
 int phy_start_cable_test(struct phy_device *phydev,
 			 struct netlink_ext_ack *extack);
+int phy_start_cable_test_tdr(struct phy_device *phydev,
+			     struct netlink_ext_ack *extack);
 #else
 static inline
 int phy_start_cable_test(struct phy_device *phydev,
@@ -1259,6 +1265,13 @@ int phy_start_cable_test(struct phy_device *phydev,
 	NL_SET_ERR_MSG(extack, "Kernel not compiled with PHYLIB support");
 	return -EOPNOTSUPP;
 }
+static inline
+int phy_start_cable_test_tdr(struct phy_device *phydev,
+			     struct netlink_ext_ack *extack)
+{
+	NL_SET_ERR_MSG(extack, "Kernel not compiled with PHYLIB support");
+	return -EOPNOTSUPP;
+}
 #endif
 
 int phy_cable_test_result(struct phy_device *phydev, u8 pair, u16 result);
diff --git a/net/ethtool/cabletest.c b/net/ethtool/cabletest.c
index 5ba06eabe8c2..94e9d5f04353 100644
--- a/net/ethtool/cabletest.c
+++ b/net/ethtool/cabletest.c
@@ -13,7 +13,7 @@ cable_test_act_policy[ETHTOOL_A_CABLE_TEST_MAX + 1] = {
 	[ETHTOOL_A_CABLE_TEST_HEADER]		= { .type = NLA_NESTED },
 };
 
-static int ethnl_cable_test_started(struct phy_device *phydev)
+static int ethnl_cable_test_started(struct phy_device *phydev, u8 cmd)
 {
 	struct sk_buff *skb;
 	int err = -ENOMEM;
@@ -23,7 +23,7 @@ static int ethnl_cable_test_started(struct phy_device *phydev)
 	if (!skb)
 		goto out;
 
-	ehdr = ethnl_bcastmsg_put(skb, ETHTOOL_MSG_CABLE_TEST_NTF);
+	ehdr = ethnl_bcastmsg_put(skb, cmd);
 	if (!ehdr) {
 		err = -EMSGSIZE;
 		goto out;
@@ -86,7 +86,8 @@ int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info)
 	ethnl_ops_complete(dev);
 
 	if (!ret)
-		ethnl_cable_test_started(dev->phydev);
+		ethnl_cable_test_started(dev->phydev,
+					 ETHTOOL_MSG_CABLE_TEST_NTF);
 
 out_rtnl:
 	rtnl_unlock();
@@ -95,7 +96,7 @@ out_dev_put:
 	return ret;
 }
 
-int ethnl_cable_test_alloc(struct phy_device *phydev)
+int ethnl_cable_test_alloc(struct phy_device *phydev, u8 cmd)
 {
 	int err = -ENOMEM;
 
@@ -103,8 +104,7 @@ int ethnl_cable_test_alloc(struct phy_device *phydev)
 	if (!phydev->skb)
 		goto out;
 
-	phydev->ehdr = ethnl_bcastmsg_put(phydev->skb,
-					  ETHTOOL_MSG_CABLE_TEST_NTF);
+	phydev->ehdr = ethnl_bcastmsg_put(phydev->skb, cmd);
 	if (!phydev->ehdr) {
 		err = -EMSGSIZE;
 		goto out;
@@ -199,3 +199,55 @@ err:
 	return ret;
 }
 EXPORT_SYMBOL_GPL(ethnl_cable_test_fault_length);
+
+static const struct nla_policy
+cable_test_tdr_act_policy[ETHTOOL_A_CABLE_TEST_TDR_MAX + 1] = {
+	[ETHTOOL_A_CABLE_TEST_TDR_UNSPEC]	= { .type = NLA_REJECT },
+	[ETHTOOL_A_CABLE_TEST_TDR_HEADER]	= { .type = NLA_NESTED },
+};
+
+int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info)
+{
+	struct nlattr *tb[ETHTOOL_A_CABLE_TEST_TDR_MAX + 1];
+	struct ethnl_req_info req_info = {};
+	struct net_device *dev;
+	int ret;
+
+	ret = nlmsg_parse(info->nlhdr, GENL_HDRLEN, tb,
+			  ETHTOOL_A_CABLE_TEST_TDR_MAX,
+			  cable_test_tdr_act_policy, info->extack);
+	if (ret < 0)
+		return ret;
+
+	ret = ethnl_parse_header_dev_get(&req_info,
+					 tb[ETHTOOL_A_CABLE_TEST_TDR_HEADER],
+					 genl_info_net(info), info->extack,
+					 true);
+	if (ret < 0)
+		return ret;
+
+	dev = req_info.dev;
+	if (!dev->phydev) {
+		ret = -EOPNOTSUPP;
+		goto out_dev_put;
+	}
+
+	rtnl_lock();
+	ret = ethnl_ops_begin(dev);
+	if (ret < 0)
+		goto out_rtnl;
+
+	ret = phy_start_cable_test_tdr(dev->phydev, info->extack);
+
+	ethnl_ops_complete(dev);
+
+	if (!ret)
+		ethnl_cable_test_started(dev->phydev,
+					 ETHTOOL_MSG_CABLE_TEST_TDR_NTF);
+
+out_rtnl:
+	rtnl_unlock();
+out_dev_put:
+	dev_put(dev);
+	return ret;
+}
diff --git a/net/ethtool/netlink.c b/net/ethtool/netlink.c
index 0f2f4754dcf9..88fd07f47040 100644
--- a/net/ethtool/netlink.c
+++ b/net/ethtool/netlink.c
@@ -844,6 +844,11 @@ static const struct genl_ops ethtool_genl_ops[] = {
 		.flags	= GENL_UNS_ADMIN_PERM,
 		.doit	= ethnl_act_cable_test,
 	},
+	{
+		.cmd	= ETHTOOL_MSG_CABLE_TEST_TDR_ACT,
+		.flags	= GENL_UNS_ADMIN_PERM,
+		.doit	= ethnl_act_cable_test_tdr,
+	},
 };
 
 static const struct genl_multicast_group ethtool_nl_mcgrps[] = {
diff --git a/net/ethtool/netlink.h b/net/ethtool/netlink.h
index b0eb5d920099..9a96b6e90dc2 100644
--- a/net/ethtool/netlink.h
+++ b/net/ethtool/netlink.h
@@ -360,5 +360,6 @@ int ethnl_set_coalesce(struct sk_buff *skb, struct genl_info *info);
 int ethnl_set_pause(struct sk_buff *skb, struct genl_info *info);
 int ethnl_set_eee(struct sk_buff *skb, struct genl_info *info);
 int ethnl_act_cable_test(struct sk_buff *skb, struct genl_info *info);
+int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info);
 
 #endif /* _NET_ETHTOOL_NETLINK_H */
-- 
cgit v1.2.3-59-g8ed1b


From 6b4a0fc106521e480c00b55a7ef38c89f02dc4e8 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Wed, 27 May 2020 00:21:39 +0200
Subject: net: ethtool: Add helpers for cable test TDR data

Add helpers for returning raw TDR helpers in netlink messages.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/ethtool_netlink.h | 21 +++++++++++
 net/ethtool/cabletest.c         | 80 ++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 100 insertions(+), 1 deletion(-)

diff --git a/include/linux/ethtool_netlink.h b/include/linux/ethtool_netlink.h
index 24817ba252a0..8fbe4f97ffad 100644
--- a/include/linux/ethtool_netlink.h
+++ b/include/linux/ethtool_netlink.h
@@ -22,6 +22,10 @@ void ethnl_cable_test_free(struct phy_device *phydev);
 void ethnl_cable_test_finished(struct phy_device *phydev);
 int ethnl_cable_test_result(struct phy_device *phydev, u8 pair, u8 result);
 int ethnl_cable_test_fault_length(struct phy_device *phydev, u8 pair, u32 cm);
+int ethnl_cable_test_amplitude(struct phy_device *phydev, u8 pair, s16 mV);
+int ethnl_cable_test_pulse(struct phy_device *phydev, u16 mV);
+int ethnl_cable_test_step(struct phy_device *phydev, u32 first, u32 last,
+			  u32 step);
 #else
 static inline int ethnl_cable_test_alloc(struct phy_device *phydev, u8 cmd)
 {
@@ -46,5 +50,22 @@ static inline int ethnl_cable_test_fault_length(struct phy_device *phydev,
 {
 	return -EOPNOTSUPP;
 }
+
+static inline int ethnl_cable_test_amplitude(struct phy_device *phydev,
+					     u8 pair, s16 mV)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int ethnl_cable_test_pulse(struct phy_device *phydev, u16 mV)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int ethnl_cable_test_step(struct phy_device *phydev, u32 first,
+					u32 last, u32 step)
+{
+	return -EOPNOTSUPP;
+}
 #endif /* IS_ENABLED(ETHTOOL_NETLINK) */
 #endif /* _LINUX_ETHTOOL_NETLINK_H_ */
diff --git a/net/ethtool/cabletest.c b/net/ethtool/cabletest.c
index 94e9d5f04353..390d0673ff01 100644
--- a/net/ethtool/cabletest.c
+++ b/net/ethtool/cabletest.c
@@ -100,7 +100,10 @@ int ethnl_cable_test_alloc(struct phy_device *phydev, u8 cmd)
 {
 	int err = -ENOMEM;
 
-	phydev->skb = genlmsg_new(NLMSG_GOODSIZE, GFP_KERNEL);
+	/* One TDR sample occupies 20 bytes. For a 150 meter cable,
+	 * with four pairs, around 12K is needed.
+	 */
+	phydev->skb = genlmsg_new(SZ_16K, GFP_KERNEL);
 	if (!phydev->skb)
 		goto out;
 
@@ -251,3 +254,78 @@ out_dev_put:
 	dev_put(dev);
 	return ret;
 }
+ 
+int ethnl_cable_test_amplitude(struct phy_device *phydev,
+			       u8 pair, s16 mV)
+{
+	struct nlattr *nest;
+	int ret = -EMSGSIZE;
+
+	nest = nla_nest_start(phydev->skb,
+			      ETHTOOL_A_CABLE_TDR_NEST_AMPLITUDE);
+	if (!nest)
+		return -EMSGSIZE;
+
+	if (nla_put_u8(phydev->skb, ETHTOOL_A_CABLE_AMPLITUDE_PAIR, pair))
+		goto err;
+	if (nla_put_u16(phydev->skb, ETHTOOL_A_CABLE_AMPLITUDE_mV, mV))
+		goto err;
+
+	nla_nest_end(phydev->skb, nest);
+	return 0;
+
+err:
+	nla_nest_cancel(phydev->skb, nest);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ethnl_cable_test_amplitude);
+
+int ethnl_cable_test_pulse(struct phy_device *phydev, u16 mV)
+{
+	struct nlattr *nest;
+	int ret = -EMSGSIZE;
+
+	nest = nla_nest_start(phydev->skb, ETHTOOL_A_CABLE_TDR_NEST_PULSE);
+	if (!nest)
+		return -EMSGSIZE;
+
+	if (nla_put_u16(phydev->skb, ETHTOOL_A_CABLE_PULSE_mV, mV))
+		goto err;
+
+	nla_nest_end(phydev->skb, nest);
+	return 0;
+
+err:
+	nla_nest_cancel(phydev->skb, nest);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ethnl_cable_test_pulse);
+
+int ethnl_cable_test_step(struct phy_device *phydev, u32 first, u32 last,
+			  u32 step)
+{
+	struct nlattr *nest;
+	int ret = -EMSGSIZE;
+
+	nest = nla_nest_start(phydev->skb, ETHTOOL_A_CABLE_TDR_NEST_STEP);
+	if (!nest)
+		return -EMSGSIZE;
+
+	if (nla_put_u32(phydev->skb, ETHTOOL_A_CABLE_STEP_FIRST_DISTANCE,
+			first))
+		goto err;
+
+	if (nla_put_u32(phydev->skb, ETHTOOL_A_CABLE_STEP_LAST_DISTANCE, last))
+		goto err;
+
+	if (nla_put_u32(phydev->skb, ETHTOOL_A_CABLE_STEP_STEP_DISTANCE, step))
+		goto err;
+
+	nla_nest_end(phydev->skb, nest);
+	return 0;
+
+err:
+	nla_nest_cancel(phydev->skb, nest);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(ethnl_cable_test_step);
-- 
cgit v1.2.3-59-g8ed1b


From 0c9bcc1d2394acef2c6e89e652d984cc845c7bea Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Wed, 27 May 2020 00:21:40 +0200
Subject: net: phy: marvell: Add support for amplitude graph

The Marvell PHYs can measure the amplitude of the returned signal for
a given distance. Implement this option of the cable test
infrastructure. When reporting the step, convert the distance into cm.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>

v2:
Step based on the measurement resolution, and convert this to cm.

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell.c | 232 +++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 231 insertions(+), 1 deletion(-)

diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 4bc7febf9248..e597bee2e966 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -42,6 +42,7 @@
 #define MII_MARVELL_FIBER_PAGE		0x01
 #define MII_MARVELL_MSCR_PAGE		0x02
 #define MII_MARVELL_LED_PAGE		0x03
+#define MII_MARVELL_VCT5_PAGE		0x05
 #define MII_MARVELL_MISC_TEST_PAGE	0x06
 #define MII_MARVELL_VCT7_PAGE		0x07
 #define MII_MARVELL_WOL_PAGE		0x11
@@ -164,6 +165,54 @@
 #define MII_88E1510_GEN_CTRL_REG_1_MODE_SGMII	0x1	/* SGMII to copper */
 #define MII_88E1510_GEN_CTRL_REG_1_RESET	0x8000	/* Soft reset */
 
+#define MII_VCT5_TX_RX_MDI0_COUPLING	0x10
+#define MII_VCT5_TX_RX_MDI1_COUPLING	0x11
+#define MII_VCT5_TX_RX_MDI2_COUPLING	0x12
+#define MII_VCT5_TX_RX_MDI3_COUPLING	0x13
+#define MII_VCT5_TX_RX_AMPLITUDE_MASK	0x7f00
+#define MII_VCT5_TX_RX_AMPLITUDE_SHIFT	8
+#define MII_VCT5_TX_RX_COUPLING_POSITIVE_REFLECTION	BIT(15)
+
+#define MII_VCT5_CTRL				0x17
+#define MII_VCT5_CTRL_ENABLE				BIT(15)
+#define MII_VCT5_CTRL_COMPLETE				BIT(14)
+#define MII_VCT5_CTRL_TX_SAME_CHANNEL			(0x0 << 11)
+#define MII_VCT5_CTRL_TX0_CHANNEL			(0x4 << 11)
+#define MII_VCT5_CTRL_TX1_CHANNEL			(0x5 << 11)
+#define MII_VCT5_CTRL_TX2_CHANNEL			(0x6 << 11)
+#define MII_VCT5_CTRL_TX3_CHANNEL			(0x7 << 11)
+#define MII_VCT5_CTRL_SAMPLES_2				(0x0 << 8)
+#define MII_VCT5_CTRL_SAMPLES_4				(0x1 << 8)
+#define MII_VCT5_CTRL_SAMPLES_8				(0x2 << 8)
+#define MII_VCT5_CTRL_SAMPLES_16			(0x3 << 8)
+#define MII_VCT5_CTRL_SAMPLES_32			(0x4 << 8)
+#define MII_VCT5_CTRL_SAMPLES_64			(0x5 << 8)
+#define MII_VCT5_CTRL_SAMPLES_128			(0x6 << 8)
+#define MII_VCT5_CTRL_SAMPLES_DEFAULT			(0x6 << 8)
+#define MII_VCT5_CTRL_SAMPLES_256			(0x7 << 8)
+#define MII_VCT5_CTRL_SAMPLES_SHIFT			8
+#define MII_VCT5_CTRL_MODE_MAXIMUM_PEEK			(0x0 << 6)
+#define MII_VCT5_CTRL_MODE_FIRST_LAST_PEEK		(0x1 << 6)
+#define MII_VCT5_CTRL_MODE_OFFSET			(0x2 << 6)
+#define MII_VCT5_CTRL_SAMPLE_POINT			(0x3 << 6)
+#define MII_VCT5_CTRL_PEEK_HYST_DEFAULT			3
+
+#define MII_VCT5_SAMPLE_POINT_DISTANCE		0x18
+#define MII_VCT5_TX_PULSE_CTRL			0x1c
+#define MII_VCT5_TX_PULSE_CTRL_DONT_WAIT_LINK_DOWN	BIT(12)
+#define MII_VCT5_TX_PULSE_CTRL_PULSE_WIDTH_128nS	(0x0 << 10)
+#define MII_VCT5_TX_PULSE_CTRL_PULSE_WIDTH_96nS		(0x1 << 10)
+#define MII_VCT5_TX_PULSE_CTRL_PULSE_WIDTH_64nS		(0x2 << 10)
+#define MII_VCT5_TX_PULSE_CTRL_PULSE_WIDTH_32nS		(0x3 << 10)
+#define MII_VCT5_TX_PULSE_CTRL_PULSE_WIDTH_SHIFT	10
+#define MII_VCT5_TX_PULSE_CTRL_PULSE_AMPLITUDE_1000mV	(0x0 << 8)
+#define MII_VCT5_TX_PULSE_CTRL_PULSE_AMPLITUDE_750mV	(0x1 << 8)
+#define MII_VCT5_TX_PULSE_CTRL_PULSE_AMPLITUDE_500mV	(0x2 << 8)
+#define MII_VCT5_TX_PULSE_CTRL_PULSE_AMPLITUDE_250mV	(0x3 << 8)
+#define MII_VCT5_TX_PULSE_CTRL_PULSE_AMPLITUDE_SHIFT	8
+#define MII_VCT5_TX_PULSE_CTRL_MAX_AMP			BIT(7)
+#define MII_VCT5_TX_PULSE_CTRL_GT_140m_46_86mV		(0x6 << 0)
+
 #define MII_VCT7_PAIR_0_DISTANCE	0x10
 #define MII_VCT7_PAIR_1_DISTANCE	0x11
 #define MII_VCT7_PAIR_2_DISTANCE	0x12
@@ -220,6 +269,7 @@ struct marvell_priv {
 	u64 stats[ARRAY_SIZE(marvell_hw_stats)];
 	char *hwmon_name;
 	struct device *hwmon_dev;
+	bool cable_test_tdr;
 };
 
 static int marvell_read_page(struct phy_device *phydev)
@@ -1690,7 +1740,119 @@ static void marvell_get_stats(struct phy_device *phydev,
 		data[i] = marvell_get_stat(phydev, i);
 }
 
-static int marvell_vct7_cable_test_start(struct phy_device *phydev)
+static int marvell_vct5_wait_complete(struct phy_device *phydev)
+{
+	int i;
+	int val;
+
+	for (i = 0; i < 32; i++) {
+		val = phy_read_paged(phydev, MII_MARVELL_VCT5_PAGE,
+				     MII_VCT5_CTRL);
+		if (val < 0)
+			return val;
+
+		if (val & MII_VCT5_CTRL_COMPLETE)
+			return 0;
+
+		usleep_range(1000, 2000);
+	}
+
+	phydev_err(phydev, "Timeout while waiting for cable test to finish\n");
+	return -ETIMEDOUT;
+}
+
+static int marvell_vct5_amplitude(struct phy_device *phydev, int pair)
+{
+	int amplitude;
+	int val;
+	int reg;
+
+	reg = MII_VCT5_TX_RX_MDI0_COUPLING + pair;
+	val = phy_read_paged(phydev, MII_MARVELL_VCT5_PAGE, reg);
+
+	if (val < 0)
+		return 0;
+
+	amplitude = (val & MII_VCT5_TX_RX_AMPLITUDE_MASK) >>
+		MII_VCT5_TX_RX_AMPLITUDE_SHIFT;
+
+	if (!(val & MII_VCT5_TX_RX_COUPLING_POSITIVE_REFLECTION))
+		amplitude = -amplitude;
+
+	return 1000 * amplitude / 128;
+}
+
+static u32 marvell_vct5_distance2cm(int distance)
+{
+	return distance * 805 / 10;
+}
+
+static int marvell_vct5_amplitude_distance(struct phy_device *phydev,
+					   int distance)
+{
+	int mV_pair0, mV_pair1, mV_pair2, mV_pair3;
+	u16 reg;
+	int err;
+
+	err = phy_write_paged(phydev, MII_MARVELL_VCT5_PAGE,
+			      MII_VCT5_SAMPLE_POINT_DISTANCE,
+			      distance);
+	if (err)
+		return err;
+
+	reg = MII_VCT5_CTRL_ENABLE |
+		MII_VCT5_CTRL_TX_SAME_CHANNEL |
+		MII_VCT5_CTRL_SAMPLES_DEFAULT |
+		MII_VCT5_CTRL_SAMPLE_POINT |
+		MII_VCT5_CTRL_PEEK_HYST_DEFAULT;
+	err = phy_write_paged(phydev, MII_MARVELL_VCT5_PAGE,
+			      MII_VCT5_CTRL, reg);
+	if (err)
+		return err;
+
+	err = marvell_vct5_wait_complete(phydev);
+	if (err)
+		return err;
+
+	mV_pair0 = marvell_vct5_amplitude(phydev, 0);
+	mV_pair1 = marvell_vct5_amplitude(phydev, 1);
+	mV_pair2 = marvell_vct5_amplitude(phydev, 2);
+	mV_pair3 = marvell_vct5_amplitude(phydev, 3);
+
+	ethnl_cable_test_amplitude(phydev, ETHTOOL_A_CABLE_PAIR_A, mV_pair0);
+	ethnl_cable_test_amplitude(phydev, ETHTOOL_A_CABLE_PAIR_B, mV_pair1);
+	ethnl_cable_test_amplitude(phydev, ETHTOOL_A_CABLE_PAIR_C, mV_pair2);
+	ethnl_cable_test_amplitude(phydev, ETHTOOL_A_CABLE_PAIR_D, mV_pair3);
+
+	return 0;
+}
+
+static int marvell_vct5_amplitude_graph(struct phy_device *phydev)
+{
+	int distance;
+	int err;
+	u16 reg;
+
+	reg = MII_VCT5_TX_PULSE_CTRL_GT_140m_46_86mV |
+		MII_VCT5_TX_PULSE_CTRL_DONT_WAIT_LINK_DOWN |
+		MII_VCT5_TX_PULSE_CTRL_MAX_AMP |
+		MII_VCT5_TX_PULSE_CTRL_PULSE_WIDTH_32nS;
+
+	err = phy_write_paged(phydev, MII_MARVELL_VCT5_PAGE,
+			      MII_VCT5_TX_PULSE_CTRL, reg);
+	if (err)
+		return err;
+
+	for (distance = 0; distance <= 100; distance++) {
+		err = marvell_vct5_amplitude_distance(phydev, distance);
+		if (err)
+			return err;
+	}
+
+	return 0;
+}
+
+static int marvell_cable_test_start_common(struct phy_device *phydev)
 {
 	int bmcr, bmsr, ret;
 
@@ -1719,12 +1881,69 @@ static int marvell_vct7_cable_test_start(struct phy_device *phydev)
 	if (bmsr & BMSR_LSTATUS)
 		msleep(1500);
 
+	return 0;
+}
+
+static int marvell_vct7_cable_test_start(struct phy_device *phydev)
+{
+	struct marvell_priv *priv = phydev->priv;
+	int ret;
+
+	ret = marvell_cable_test_start_common(phydev);
+	if (ret)
+		return ret;
+
+	priv->cable_test_tdr = false;
+
+	/* Reset the VCT5 API control to defaults, otherwise
+	 * VCT7 does not work correctly.
+	 */
+	ret = phy_write_paged(phydev, MII_MARVELL_VCT5_PAGE,
+			      MII_VCT5_CTRL,
+			      MII_VCT5_CTRL_TX_SAME_CHANNEL |
+			      MII_VCT5_CTRL_SAMPLES_DEFAULT |
+			      MII_VCT5_CTRL_MODE_MAXIMUM_PEEK |
+			      MII_VCT5_CTRL_PEEK_HYST_DEFAULT);
+	if (ret)
+		return ret;
+
+	ret = phy_write_paged(phydev, MII_MARVELL_VCT5_PAGE,
+			      MII_VCT5_SAMPLE_POINT_DISTANCE, 0);
+	if (ret)
+		return ret;
+
 	return phy_write_paged(phydev, MII_MARVELL_VCT7_PAGE,
 			       MII_VCT7_CTRL,
 			       MII_VCT7_CTRL_RUN_NOW |
 			       MII_VCT7_CTRL_CENTIMETERS);
 }
 
+static int marvell_vct5_cable_test_tdr_start(struct phy_device *phydev)
+{
+	struct marvell_priv *priv = phydev->priv;
+	int ret;
+
+	/* Disable  VCT7 */
+	ret = phy_write_paged(phydev, MII_MARVELL_VCT7_PAGE,
+			      MII_VCT7_CTRL, 0);
+	if (ret)
+		return ret;
+
+	ret = marvell_cable_test_start_common(phydev);
+	if (ret)
+		return ret;
+
+	priv->cable_test_tdr = true;
+	ret = ethnl_cable_test_pulse(phydev, 1000);
+	if (ret)
+		return ret;
+
+	return ethnl_cable_test_step(phydev,
+				     marvell_vct5_distance2cm(0),
+				     marvell_vct5_distance2cm(100),
+				     marvell_vct5_distance2cm(1));
+}
+
 static int marvell_vct7_distance_to_length(int distance, bool meter)
 {
 	if (meter)
@@ -1828,8 +2047,15 @@ static int marvell_vct7_cable_test_report(struct phy_device *phydev)
 static int marvell_vct7_cable_test_get_status(struct phy_device *phydev,
 					      bool *finished)
 {
+	struct marvell_priv *priv = phydev->priv;
 	int ret;
 
+	if (priv->cable_test_tdr) {
+		ret = marvell_vct5_amplitude_graph(phydev);
+		*finished = true;
+		return ret;
+	}
+
 	*finished = false;
 
 	ret = phy_read_paged(phydev, MII_MARVELL_VCT7_PAGE,
@@ -2563,6 +2789,7 @@ static struct phy_driver marvell_drivers[] = {
 		.get_tunable = m88e1011_get_tunable,
 		.set_tunable = m88e1011_set_tunable,
 		.cable_test_start = marvell_vct7_cable_test_start,
+		.cable_test_tdr_start = marvell_vct5_cable_test_tdr_start,
 		.cable_test_get_status = marvell_vct7_cable_test_get_status,
 	},
 	{
@@ -2588,6 +2815,7 @@ static struct phy_driver marvell_drivers[] = {
 		.get_tunable = m88e1540_get_tunable,
 		.set_tunable = m88e1540_set_tunable,
 		.cable_test_start = marvell_vct7_cable_test_start,
+		.cable_test_tdr_start = marvell_vct5_cable_test_tdr_start,
 		.cable_test_get_status = marvell_vct7_cable_test_get_status,
 	},
 	{
@@ -2613,6 +2841,7 @@ static struct phy_driver marvell_drivers[] = {
 		.get_tunable = m88e1540_get_tunable,
 		.set_tunable = m88e1540_set_tunable,
 		.cable_test_start = marvell_vct7_cable_test_start,
+		.cable_test_tdr_start = marvell_vct5_cable_test_tdr_start,
 		.cable_test_get_status = marvell_vct7_cable_test_get_status,
 	},
 	{
@@ -2658,6 +2887,7 @@ static struct phy_driver marvell_drivers[] = {
 		.get_tunable = m88e1540_get_tunable,
 		.set_tunable = m88e1540_set_tunable,
 		.cable_test_start = marvell_vct7_cable_test_start,
+		.cable_test_tdr_start = marvell_vct5_cable_test_tdr_start,
 		.cable_test_get_status = marvell_vct7_cable_test_get_status,
 	},
 };
-- 
cgit v1.2.3-59-g8ed1b


From f2bc8ad31a7f814237bc6301d59296d76505a688 Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Wed, 27 May 2020 00:21:41 +0200
Subject: net: ethtool: Allow PHY cable test TDR data to configured

Allow the user to configure where on the cable the TDR data should be
retrieved, in terms of first and last sample, and the step between
samples. Also add the ability to ask for TDR data for just one pair.

If this configuration is not provided, it defaults to 1-150m at 1m
intervals for all pairs.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>

v3:
Move the TDR configuration into a structure
Add a range check on step
Use NL_SET_ERR_MSG_ATTR() when appropriate
Move TDR configuration into a nest
Document attributes in the request

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/ethtool-netlink.rst |  22 +++++-
 drivers/net/phy/marvell.c                    |  59 ++++++++++-----
 drivers/net/phy/phy.c                        |   5 +-
 include/linux/phy.h                          |  21 +++++-
 include/uapi/linux/ethtool_netlink.h         |  13 ++++
 net/ethtool/cabletest.c                      | 104 ++++++++++++++++++++++++++-
 6 files changed, 197 insertions(+), 27 deletions(-)

diff --git a/Documentation/networking/ethtool-netlink.rst b/Documentation/networking/ethtool-netlink.rst
index dae36227d590..d42661b91128 100644
--- a/Documentation/networking/ethtool-netlink.rst
+++ b/Documentation/networking/ethtool-netlink.rst
@@ -1023,9 +1023,25 @@ Start a cable test and report raw TDR data
 
 Request contents:
 
-  ====================================  ======  ==========================
-  ``ETHTOOL_A_CABLE_TEST_TDR_HEADER``   nested  request header
-  ====================================  ======  ==========================
+ +--------------------------------------------+--------+-----------------------+
+ | ``ETHTOOL_A_CABLE_TEST_TDR_HEADER``        | nested | reply header          |
+ +--------------------------------------------+--------+-----------------------+
+ | ``ETHTOOL_A_CABLE_TEST_TDR_CFG``           | nested | test configuration    |
+ +-+------------------------------------------+--------+-----------------------+
+ | | ``ETHTOOL_A_CABLE_STEP_FIRST_DISTANCE `` | u32    | first data distance   |
+ +-+-+----------------------------------------+--------+-----------------------+
+ | | ``ETHTOOL_A_CABLE_STEP_LAST_DISTANCE ``  | u32    | last data distance    |
+ +-+-+----------------------------------------+--------+-----------------------+
+ | | ``ETHTOOL_A_CABLE_STEP_STEP_DISTANCE ``  | u32    | distance of each step |
+ +-+-+----------------------------------------+--------+-----------------------+
+ | | ``ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR``    | u8     | pair to test          |
+ +-+-+----------------------------------------+--------+-----------------------+
+
+The ETHTOOL_A_CABLE_TEST_TDR_CFG is optional, as well as all members
+of the nest. All distances are expressed in centimeters. The PHY takes
+the distances as a guide, and rounds to the nearest distance it
+actually supports. If a pair is passed, only that one pair will be
+tested. Otherwise all pairs are tested.
 
 Notification contents:
 
diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index e597bee2e966..335e51d6f138 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -198,6 +198,7 @@
 #define MII_VCT5_CTRL_PEEK_HYST_DEFAULT			3
 
 #define MII_VCT5_SAMPLE_POINT_DISTANCE		0x18
+#define MII_VCT5_SAMPLE_POINT_DISTANCE_MAX	511
 #define MII_VCT5_TX_PULSE_CTRL			0x1c
 #define MII_VCT5_TX_PULSE_CTRL_DONT_WAIT_LINK_DOWN	BIT(12)
 #define MII_VCT5_TX_PULSE_CTRL_PULSE_WIDTH_128nS	(0x0 << 10)
@@ -270,6 +271,10 @@ struct marvell_priv {
 	char *hwmon_name;
 	struct device *hwmon_dev;
 	bool cable_test_tdr;
+	u32 first;
+	u32 last;
+	u32 step;
+	s8 pair;
 };
 
 static int marvell_read_page(struct phy_device *phydev)
@@ -1787,12 +1792,18 @@ static u32 marvell_vct5_distance2cm(int distance)
 	return distance * 805 / 10;
 }
 
+static u32 marvell_vct5_cm2distance(int cm)
+{
+	return cm * 10 / 805;
+}
+
 static int marvell_vct5_amplitude_distance(struct phy_device *phydev,
-					   int distance)
+					   int distance, int pair)
 {
-	int mV_pair0, mV_pair1, mV_pair2, mV_pair3;
 	u16 reg;
 	int err;
+	int mV;
+	int i;
 
 	err = phy_write_paged(phydev, MII_MARVELL_VCT5_PAGE,
 			      MII_VCT5_SAMPLE_POINT_DISTANCE,
@@ -1814,21 +1825,20 @@ static int marvell_vct5_amplitude_distance(struct phy_device *phydev,
 	if (err)
 		return err;
 
-	mV_pair0 = marvell_vct5_amplitude(phydev, 0);
-	mV_pair1 = marvell_vct5_amplitude(phydev, 1);
-	mV_pair2 = marvell_vct5_amplitude(phydev, 2);
-	mV_pair3 = marvell_vct5_amplitude(phydev, 3);
+	for (i = 0; i < 4; i++) {
+		if (pair != PHY_PAIR_ALL && i != pair)
+			continue;
 
-	ethnl_cable_test_amplitude(phydev, ETHTOOL_A_CABLE_PAIR_A, mV_pair0);
-	ethnl_cable_test_amplitude(phydev, ETHTOOL_A_CABLE_PAIR_B, mV_pair1);
-	ethnl_cable_test_amplitude(phydev, ETHTOOL_A_CABLE_PAIR_C, mV_pair2);
-	ethnl_cable_test_amplitude(phydev, ETHTOOL_A_CABLE_PAIR_D, mV_pair3);
+		mV = marvell_vct5_amplitude(phydev, i);
+		ethnl_cable_test_amplitude(phydev, i, mV);
+	}
 
 	return 0;
 }
 
 static int marvell_vct5_amplitude_graph(struct phy_device *phydev)
 {
+	struct marvell_priv *priv = phydev->priv;
 	int distance;
 	int err;
 	u16 reg;
@@ -1843,8 +1853,11 @@ static int marvell_vct5_amplitude_graph(struct phy_device *phydev)
 	if (err)
 		return err;
 
-	for (distance = 0; distance <= 100; distance++) {
-		err = marvell_vct5_amplitude_distance(phydev, distance);
+	for (distance = priv->first;
+	     distance <= priv->last;
+	     distance += priv->step) {
+		err = marvell_vct5_amplitude_distance(phydev, distance,
+						      priv->pair);
 		if (err)
 			return err;
 	}
@@ -1918,11 +1931,24 @@ static int marvell_vct7_cable_test_start(struct phy_device *phydev)
 			       MII_VCT7_CTRL_CENTIMETERS);
 }
 
-static int marvell_vct5_cable_test_tdr_start(struct phy_device *phydev)
+static int marvell_vct5_cable_test_tdr_start(struct phy_device *phydev,
+					     const struct phy_tdr_config *cfg)
 {
 	struct marvell_priv *priv = phydev->priv;
 	int ret;
 
+	priv->cable_test_tdr = true;
+	priv->first = marvell_vct5_cm2distance(cfg->first);
+	priv->last = marvell_vct5_cm2distance(cfg->last);
+	priv->step = marvell_vct5_cm2distance(cfg->step);
+	priv->pair = cfg->pair;
+
+	if (priv->first > MII_VCT5_SAMPLE_POINT_DISTANCE_MAX)
+		return -EINVAL;
+
+	if (priv->last > MII_VCT5_SAMPLE_POINT_DISTANCE_MAX)
+		return -EINVAL;
+
 	/* Disable  VCT7 */
 	ret = phy_write_paged(phydev, MII_MARVELL_VCT7_PAGE,
 			      MII_VCT7_CTRL, 0);
@@ -1933,15 +1959,14 @@ static int marvell_vct5_cable_test_tdr_start(struct phy_device *phydev)
 	if (ret)
 		return ret;
 
-	priv->cable_test_tdr = true;
 	ret = ethnl_cable_test_pulse(phydev, 1000);
 	if (ret)
 		return ret;
 
 	return ethnl_cable_test_step(phydev,
-				     marvell_vct5_distance2cm(0),
-				     marvell_vct5_distance2cm(100),
-				     marvell_vct5_distance2cm(1));
+				     marvell_vct5_distance2cm(priv->first),
+				     marvell_vct5_distance2cm(priv->last),
+				     marvell_vct5_distance2cm(priv->step));
 }
 
 static int marvell_vct7_distance_to_length(int distance, bool meter)
diff --git a/drivers/net/phy/phy.c b/drivers/net/phy/phy.c
index 495d9ba3d5bf..1de3938628f4 100644
--- a/drivers/net/phy/phy.c
+++ b/drivers/net/phy/phy.c
@@ -553,7 +553,8 @@ out:
 EXPORT_SYMBOL(phy_start_cable_test);
 
 int phy_start_cable_test_tdr(struct phy_device *phydev,
-			     struct netlink_ext_ack *extack)
+			     struct netlink_ext_ack *extack,
+			     const struct phy_tdr_config *config)
 {
 	struct net_device *dev = phydev->attached_dev;
 	int err = -ENOMEM;
@@ -590,7 +591,7 @@ int phy_start_cable_test_tdr(struct phy_device *phydev,
 	phy_link_down(phydev);
 
 	netif_testing_on(dev);
-	err = phydev->drv->cable_test_tdr_start(phydev);
+	err = phydev->drv->cable_test_tdr_start(phydev, config);
 	if (err) {
 		netif_testing_off(dev);
 		phy_link_up(phydev);
diff --git a/include/linux/phy.h b/include/linux/phy.h
index d3c384f353ca..8c05d0fb5c00 100644
--- a/include/linux/phy.h
+++ b/include/linux/phy.h
@@ -548,6 +548,18 @@ struct phy_device {
 #define to_phy_device(d) container_of(to_mdio_device(d), \
 				      struct phy_device, mdio)
 
+/* A structure containing possible configuration parameters
+ * for a TDR cable test. The driver does not need to implement
+ * all the parameters, but should report what is actually used.
+ */
+struct phy_tdr_config {
+	u32 first;
+	u32 last;
+	u32 step;
+	s8 pair;
+};
+#define PHY_PAIR_ALL -1
+
 /* struct phy_driver: Driver structure for a particular PHY type
  *
  * driver_data: static driver data
@@ -701,7 +713,8 @@ struct phy_driver {
 	int (*cable_test_start)(struct phy_device *dev);
 
 	/* Start a raw TDR cable test */
-	int (*cable_test_tdr_start)(struct phy_device *dev);
+	int (*cable_test_tdr_start)(struct phy_device *dev,
+				    const struct phy_tdr_config *config);
 
 	/* Once per second, or on interrupt, request the status of the
 	 * test.
@@ -1256,7 +1269,8 @@ int phy_reset_after_clk_enable(struct phy_device *phydev);
 int phy_start_cable_test(struct phy_device *phydev,
 			 struct netlink_ext_ack *extack);
 int phy_start_cable_test_tdr(struct phy_device *phydev,
-			     struct netlink_ext_ack *extack);
+			     struct netlink_ext_ack *extack,
+			     const struct phy_tdr_config *config);
 #else
 static inline
 int phy_start_cable_test(struct phy_device *phydev,
@@ -1267,7 +1281,8 @@ int phy_start_cable_test(struct phy_device *phydev,
 }
 static inline
 int phy_start_cable_test_tdr(struct phy_device *phydev,
-			     struct netlink_ext_ack *extack)
+			     struct netlink_ext_ack *extack,
+			     const struct phy_tdr_config *config)
 {
 	NL_SET_ERR_MSG(extack, "Kernel not compiled with PHYLIB support");
 	return -EOPNOTSUPP;
diff --git a/include/uapi/linux/ethtool_netlink.h b/include/uapi/linux/ethtool_netlink.h
index 739faa7070c6..fc9051f2eeac 100644
--- a/include/uapi/linux/ethtool_netlink.h
+++ b/include/uapi/linux/ethtool_netlink.h
@@ -482,9 +482,22 @@ enum {
 
 /* CABLE TEST TDR */
 
+enum {
+	ETHTOOL_A_CABLE_TEST_TDR_CFG_UNSPEC,
+	ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST,		/* u32 */
+	ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST,		/* u32 */
+	ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP,		/* u32 */
+	ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR,		/* u8 */
+
+	/* add new constants above here */
+	__ETHTOOL_A_CABLE_TEST_TDR_CFG_CNT,
+	ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX = __ETHTOOL_A_CABLE_TEST_TDR_CFG_CNT - 1
+};
+
 enum {
 	ETHTOOL_A_CABLE_TEST_TDR_UNSPEC,
 	ETHTOOL_A_CABLE_TEST_TDR_HEADER,	/* nest - _A_HEADER_* */
+	ETHTOOL_A_CABLE_TEST_TDR_CFG,		/* nest - *_TDR_CFG_* */
 
 	/* add new constants above here */
 	__ETHTOOL_A_CABLE_TEST_TDR_CNT,
diff --git a/net/ethtool/cabletest.c b/net/ethtool/cabletest.c
index 390d0673ff01..9991688d7d1d 100644
--- a/net/ethtool/cabletest.c
+++ b/net/ethtool/cabletest.c
@@ -5,7 +5,11 @@
 #include "netlink.h"
 #include "common.h"
 
-/* CABLE_TEST_ACT */
+/* 802.3 standard allows 100 meters for BaseT cables. However longer
+ * cables might work, depending on the quality of the cables and the
+ * PHY. So allow testing for up to 150 meters.
+ */
+#define MAX_CABLE_LENGTH_CM (150 * 100)
 
 static const struct nla_policy
 cable_test_act_policy[ETHTOOL_A_CABLE_TEST_MAX + 1] = {
@@ -203,16 +207,107 @@ err:
 }
 EXPORT_SYMBOL_GPL(ethnl_cable_test_fault_length);
 
+struct cable_test_tdr_req_info {
+	struct ethnl_req_info		base;
+};
+
+static const struct nla_policy
+cable_test_tdr_act_cfg_policy[ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX + 1] = {
+	[ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST]	= { .type = NLA_U32 },
+	[ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST]	= { .type = NLA_U32 },
+	[ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP]	= { .type = NLA_U32 },
+	[ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR]	= { .type = NLA_U8 },
+};
+
 static const struct nla_policy
 cable_test_tdr_act_policy[ETHTOOL_A_CABLE_TEST_TDR_MAX + 1] = {
 	[ETHTOOL_A_CABLE_TEST_TDR_UNSPEC]	= { .type = NLA_REJECT },
 	[ETHTOOL_A_CABLE_TEST_TDR_HEADER]	= { .type = NLA_NESTED },
+	[ETHTOOL_A_CABLE_TEST_TDR_CFG]		= { .type = NLA_NESTED },
 };
 
+/* CABLE_TEST_TDR_ACT */
+int ethnl_act_cable_test_tdr_cfg(const struct nlattr *nest,
+				 struct genl_info *info,
+				 struct phy_tdr_config *cfg)
+{
+	struct nlattr *tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX + 1];
+	int ret;
+
+	ret = nla_parse_nested(tb, ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX, nest,
+			       cable_test_tdr_act_cfg_policy, info->extack);
+	if (ret < 0)
+		return ret;
+
+	if (tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST])
+		cfg->first = nla_get_u32(
+			tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST]);
+	else
+		cfg->first = 100;
+	if (tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST])
+		cfg->last = nla_get_u32(tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST]);
+	else
+		cfg->last = MAX_CABLE_LENGTH_CM;
+
+	if (tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP])
+		cfg->step = nla_get_u32(tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP]);
+	else
+		cfg->step = 100;
+
+	if (tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR]) {
+		cfg->pair = nla_get_u8(tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR]);
+		if (cfg->pair > ETHTOOL_A_CABLE_PAIR_D) {
+			NL_SET_ERR_MSG_ATTR(
+				info->extack,
+				tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_PAIR],
+				"invalid pair parameter");
+			return -EINVAL;
+		}
+	} else {
+		cfg->pair = PHY_PAIR_ALL;
+	}
+
+	if (cfg->first > MAX_CABLE_LENGTH_CM) {
+		NL_SET_ERR_MSG_ATTR(info->extack,
+				    tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_FIRST],
+				    "invalid first parameter");
+		return -EINVAL;
+	}
+
+	if (cfg->last > MAX_CABLE_LENGTH_CM) {
+		NL_SET_ERR_MSG_ATTR(info->extack,
+				    tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_LAST],
+				    "invalid last parameter");
+		return -EINVAL;
+	}
+
+	if (cfg->first > cfg->last) {
+		NL_SET_ERR_MSG(info->extack, "invalid first/last parameter");
+		return -EINVAL;
+	}
+
+	if (!cfg->step) {
+		NL_SET_ERR_MSG_ATTR(info->extack,
+				    tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP],
+				    "invalid step parameter");
+		return -EINVAL;
+	}
+
+	if (cfg->step > (cfg->last - cfg->first)) {
+		NL_SET_ERR_MSG_ATTR(info->extack,
+				    tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_STEP],
+				    "step parameter too big");
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
 int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info)
 {
 	struct nlattr *tb[ETHTOOL_A_CABLE_TEST_TDR_MAX + 1];
 	struct ethnl_req_info req_info = {};
+	struct phy_tdr_config cfg;
 	struct net_device *dev;
 	int ret;
 
@@ -235,12 +330,17 @@ int ethnl_act_cable_test_tdr(struct sk_buff *skb, struct genl_info *info)
 		goto out_dev_put;
 	}
 
+	ret = ethnl_act_cable_test_tdr_cfg(tb[ETHTOOL_A_CABLE_TEST_TDR_CFG],
+					   info, &cfg);
+	if (ret)
+		goto out_dev_put;
+
 	rtnl_lock();
 	ret = ethnl_ops_begin(dev);
 	if (ret < 0)
 		goto out_rtnl;
 
-	ret = phy_start_cable_test_tdr(dev->phydev, info->extack);
+	ret = phy_start_cable_test_tdr(dev->phydev, info->extack, &cfg);
 
 	ethnl_ops_complete(dev);
 
-- 
cgit v1.2.3-59-g8ed1b


From a618e86da91d259374caff065cc557899dc181ce Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Wed, 27 May 2020 00:21:42 +0200
Subject: net : phy: marvell: Speedup TDR data retrieval by only changing page
 once

Getting the TDR data requires a large number of MDIO bus
transactions. The number can however be reduced if the page is only
changed once. Add the needed locking to allow this, and make use of
unlocked read/write methods where needed.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell.c | 31 +++++++++++++++++++------------
 1 file changed, 19 insertions(+), 12 deletions(-)

diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 335e51d6f138..e9deedea5f19 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -1751,15 +1751,12 @@ static int marvell_vct5_wait_complete(struct phy_device *phydev)
 	int val;
 
 	for (i = 0; i < 32; i++) {
-		val = phy_read_paged(phydev, MII_MARVELL_VCT5_PAGE,
-				     MII_VCT5_CTRL);
+		val = __phy_read(phydev, MII_VCT5_CTRL);
 		if (val < 0)
 			return val;
 
 		if (val & MII_VCT5_CTRL_COMPLETE)
 			return 0;
-
-		usleep_range(1000, 2000);
 	}
 
 	phydev_err(phydev, "Timeout while waiting for cable test to finish\n");
@@ -1773,7 +1770,7 @@ static int marvell_vct5_amplitude(struct phy_device *phydev, int pair)
 	int reg;
 
 	reg = MII_VCT5_TX_RX_MDI0_COUPLING + pair;
-	val = phy_read_paged(phydev, MII_MARVELL_VCT5_PAGE, reg);
+	val = __phy_read(phydev, reg);
 
 	if (val < 0)
 		return 0;
@@ -1805,9 +1802,8 @@ static int marvell_vct5_amplitude_distance(struct phy_device *phydev,
 	int mV;
 	int i;
 
-	err = phy_write_paged(phydev, MII_MARVELL_VCT5_PAGE,
-			      MII_VCT5_SAMPLE_POINT_DISTANCE,
-			      distance);
+	err = __phy_write(phydev, MII_VCT5_SAMPLE_POINT_DISTANCE,
+			  distance);
 	if (err)
 		return err;
 
@@ -1816,8 +1812,7 @@ static int marvell_vct5_amplitude_distance(struct phy_device *phydev,
 		MII_VCT5_CTRL_SAMPLES_DEFAULT |
 		MII_VCT5_CTRL_SAMPLE_POINT |
 		MII_VCT5_CTRL_PEEK_HYST_DEFAULT;
-	err = phy_write_paged(phydev, MII_MARVELL_VCT5_PAGE,
-			      MII_VCT5_CTRL, reg);
+	err = __phy_write(phydev, MII_VCT5_CTRL, reg);
 	if (err)
 		return err;
 
@@ -1840,6 +1835,7 @@ static int marvell_vct5_amplitude_graph(struct phy_device *phydev)
 {
 	struct marvell_priv *priv = phydev->priv;
 	int distance;
+	int page;
 	int err;
 	u16 reg;
 
@@ -1853,16 +1849,27 @@ static int marvell_vct5_amplitude_graph(struct phy_device *phydev)
 	if (err)
 		return err;
 
+	/* Reading the TDR data is very MDIO heavy. We need to optimize
+	 * access to keep the time to a minimum. So lock the bus once,
+	 * and don't release it until complete. We can then avoid having
+	 * to change the page for every access, greatly speeding things
+	 * up.
+	 */
+	page = phy_select_page(phydev, MII_MARVELL_VCT5_PAGE);
+	if (page < 0)
+		return page;
+
 	for (distance = priv->first;
 	     distance <= priv->last;
 	     distance += priv->step) {
 		err = marvell_vct5_amplitude_distance(phydev, distance,
 						      priv->pair);
 		if (err)
-			return err;
+			goto restore_page;
 	}
 
-	return 0;
+restore_page:
+	return phy_restore_page(phydev, page, err);
 }
 
 static int marvell_cable_test_start_common(struct phy_device *phydev)
-- 
cgit v1.2.3-59-g8ed1b


From db8668a1951954156c039b9f8fe2881d428a522c Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Wed, 27 May 2020 00:21:43 +0200
Subject: net: phy: marvell: Configure TDR pulse based on measurement length

When performing a TDR measurement for a short distance, the pulse
width should be low, to help differentiate between the outgoing pulse
and any reflection. For longer distances, the pulse should be wider,
to help with attenuation.

Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell.c | 25 +++++++++++++++++++++++--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index e9deedea5f19..2c04e3b2b285 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -214,6 +214,11 @@
 #define MII_VCT5_TX_PULSE_CTRL_MAX_AMP			BIT(7)
 #define MII_VCT5_TX_PULSE_CTRL_GT_140m_46_86mV		(0x6 << 0)
 
+/* For TDR measurements less than 11 meters, a short pulse should be
+ * used.
+ */
+#define TDR_SHORT_CABLE_LENGTH	11
+
 #define MII_VCT7_PAIR_0_DISTANCE	0x10
 #define MII_VCT7_PAIR_1_DISTANCE	0x11
 #define MII_VCT7_PAIR_2_DISTANCE	0x12
@@ -1835,14 +1840,19 @@ static int marvell_vct5_amplitude_graph(struct phy_device *phydev)
 {
 	struct marvell_priv *priv = phydev->priv;
 	int distance;
+	u16 width;
 	int page;
 	int err;
 	u16 reg;
 
+	if (priv->first <= TDR_SHORT_CABLE_LENGTH)
+		width = MII_VCT5_TX_PULSE_CTRL_PULSE_WIDTH_32nS;
+	else
+		width = MII_VCT5_TX_PULSE_CTRL_PULSE_WIDTH_128nS;
+
 	reg = MII_VCT5_TX_PULSE_CTRL_GT_140m_46_86mV |
 		MII_VCT5_TX_PULSE_CTRL_DONT_WAIT_LINK_DOWN |
-		MII_VCT5_TX_PULSE_CTRL_MAX_AMP |
-		MII_VCT5_TX_PULSE_CTRL_PULSE_WIDTH_32nS;
+		MII_VCT5_TX_PULSE_CTRL_MAX_AMP | width;
 
 	err = phy_write_paged(phydev, MII_MARVELL_VCT5_PAGE,
 			      MII_VCT5_TX_PULSE_CTRL, reg);
@@ -1866,6 +1876,17 @@ static int marvell_vct5_amplitude_graph(struct phy_device *phydev)
 						      priv->pair);
 		if (err)
 			goto restore_page;
+
+		if (distance > TDR_SHORT_CABLE_LENGTH &&
+		    width == MII_VCT5_TX_PULSE_CTRL_PULSE_WIDTH_32nS) {
+			width = MII_VCT5_TX_PULSE_CTRL_PULSE_WIDTH_128nS;
+			reg = MII_VCT5_TX_PULSE_CTRL_GT_140m_46_86mV |
+				MII_VCT5_TX_PULSE_CTRL_DONT_WAIT_LINK_DOWN |
+				MII_VCT5_TX_PULSE_CTRL_MAX_AMP | width;
+			err = __phy_write(phydev, MII_VCT5_TX_PULSE_CTRL, reg);
+			if (err)
+				goto restore_page;
+		}
 	}
 
 restore_page:
-- 
cgit v1.2.3-59-g8ed1b


From dc0f3ed1973f101508957b59e529e03da1349e09 Mon Sep 17 00:00:00 2001
From: Oleksij Rempel <o.rempel@pengutronix.de>
Date: Wed, 27 May 2020 07:08:43 +0200
Subject: net: phy: at803x: add cable diagnostics support for ATH9331 and
 ATH8032

Add support for Atheros 100Base-T PHYs. The only difference seems to be
the ability to test 2 pairs instead of 4 and the lack of 1000Base-T
specific register.

Only the ATH9331 was tested with this patch.

Signed-off-by: Oleksij Rempel <o.rempel@pengutronix.de>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/at803x.c | 18 ++++++++++++++++--
 1 file changed, 16 insertions(+), 2 deletions(-)

diff --git a/drivers/net/phy/at803x.c b/drivers/net/phy/at803x.c
index 822b3acf6be7..97cbe593f0ea 100644
--- a/drivers/net/phy/at803x.c
+++ b/drivers/net/phy/at803x.c
@@ -920,10 +920,16 @@ static int at803x_cable_test_one_pair(struct phy_device *phydev, int pair)
 static int at803x_cable_test_get_status(struct phy_device *phydev,
 					bool *finished)
 {
-	unsigned long pair_mask = 0xf;
+	unsigned long pair_mask;
 	int retries = 20;
 	int pair, ret;
 
+	if (phydev->phy_id == ATH9331_PHY_ID ||
+	    phydev->phy_id == ATH8032_PHY_ID)
+		pair_mask = 0x3;
+	else
+		pair_mask = 0xf;
+
 	*finished = false;
 
 	/* According to the datasheet the CDT can be performed when
@@ -958,7 +964,9 @@ static int at803x_cable_test_start(struct phy_device *phydev)
 	 */
 	phy_write(phydev, MII_BMCR, BMCR_ANENABLE);
 	phy_write(phydev, MII_ADVERTISE, ADVERTISE_CSMA);
-	phy_write(phydev, MII_CTRL1000, 0);
+	if (phydev->phy_id != ATH9331_PHY_ID &&
+	    phydev->phy_id != ATH8032_PHY_ID)
+		phy_write(phydev, MII_CTRL1000, 0);
 
 	/* we do all the (time consuming) work later */
 	return 0;
@@ -1030,6 +1038,7 @@ static struct phy_driver at803x_driver[] = {
 	.name			= "Qualcomm Atheros AR8032",
 	.probe			= at803x_probe,
 	.remove			= at803x_remove,
+	.flags			= PHY_POLL_CABLE_TEST,
 	.config_init		= at803x_config_init,
 	.link_change_notify	= at803x_link_change_notify,
 	.set_wol		= at803x_set_wol,
@@ -1039,15 +1048,20 @@ static struct phy_driver at803x_driver[] = {
 	/* PHY_BASIC_FEATURES */
 	.ack_interrupt		= at803x_ack_interrupt,
 	.config_intr		= at803x_config_intr,
+	.cable_test_start	= at803x_cable_test_start,
+	.cable_test_get_status	= at803x_cable_test_get_status,
 }, {
 	/* ATHEROS AR9331 */
 	PHY_ID_MATCH_EXACT(ATH9331_PHY_ID),
 	.name			= "Qualcomm Atheros AR9331 built-in PHY",
 	.suspend		= at803x_suspend,
 	.resume			= at803x_resume,
+	.flags			= PHY_POLL_CABLE_TEST,
 	/* PHY_BASIC_FEATURES */
 	.ack_interrupt		= &at803x_ack_interrupt,
 	.config_intr		= &at803x_config_intr,
+	.cable_test_start	= at803x_cable_test_start,
+	.cable_test_get_status	= at803x_cable_test_get_status,
 } };
 
 module_phy_driver(at803x_driver);
-- 
cgit v1.2.3-59-g8ed1b


From 2d5d9b7ff49f52ab3ec66ffdc841471a1353ea1b Mon Sep 17 00:00:00 2001
From: Sergey Matyukevich <sergey.matyukevich.os@quantenna.com>
Date: Fri, 24 Apr 2020 14:29:01 +0300
Subject: cfg80211: fix mask type in cfg80211_tid_cfg structure

TIDs mask type is u64 in wiphy settings and nl80211 processing, see:
- wiphy TIDs mask sizes in tid_config_support structure
- prepare driver command in parse_tid_conf

Use the same type for TIDs mask in cfg80211_tid_cfg.

Signed-off-by: Sergey Matyukevich <sergey.matyukevich.os@quantenna.com>
Link: https://lore.kernel.org/r/20200424112905.26770-2-sergey.matyukevich.os@quantenna.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 8b6d5c5184d1..e71d4f690ef1 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -646,7 +646,7 @@ struct cfg80211_chan_def {
 struct cfg80211_tid_cfg {
 	bool config_override;
 	u8 tids;
-	u32 mask;
+	u64 mask;
 	enum nl80211_tid_config noack;
 	u8 retry_long, retry_short;
 	enum nl80211_tid_config ampdu;
-- 
cgit v1.2.3-59-g8ed1b


From 60c2ef0ef07f319504763eaaed8cb003af879008 Mon Sep 17 00:00:00 2001
From: Sergey Matyukevich <sergey.matyukevich.os@quantenna.com>
Date: Fri, 24 Apr 2020 14:29:02 +0300
Subject: mac80211: fix variable names in TID config methods

Fix all variable names from 'tid' to 'tids' to avoid confusion.
Now this is not TID number, but TID mask.

Signed-off-by: Sergey Matyukevich <sergey.matyukevich.os@quantenna.com>
Link: https://lore.kernel.org/r/20200424112905.26770-3-sergey.matyukevich.os@quantenna.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/cfg.c        | 6 +++---
 net/mac80211/driver-ops.h | 4 ++--
 2 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 548a384b0509..06a2b7640a9d 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -3957,7 +3957,7 @@ static int ieee80211_set_tid_config(struct wiphy *wiphy,
 
 static int ieee80211_reset_tid_config(struct wiphy *wiphy,
 				      struct net_device *dev,
-				      const u8 *peer, u8 tid)
+				      const u8 *peer, u8 tids)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct sta_info *sta;
@@ -3967,7 +3967,7 @@ static int ieee80211_reset_tid_config(struct wiphy *wiphy,
 		return -EOPNOTSUPP;
 
 	if (!peer)
-		return drv_reset_tid_config(sdata->local, sdata, NULL, tid);
+		return drv_reset_tid_config(sdata->local, sdata, NULL, tids);
 
 	mutex_lock(&sdata->local->sta_mtx);
 	sta = sta_info_get_bss(sdata, peer);
@@ -3976,7 +3976,7 @@ static int ieee80211_reset_tid_config(struct wiphy *wiphy,
 		return -ENOENT;
 	}
 
-	ret = drv_reset_tid_config(sdata->local, sdata, &sta->sta, tid);
+	ret = drv_reset_tid_config(sdata->local, sdata, &sta->sta, tids);
 	mutex_unlock(&sdata->local->sta_mtx);
 
 	return ret;
diff --git a/net/mac80211/driver-ops.h b/net/mac80211/driver-ops.h
index 3877710e3b48..de69fc9c4f07 100644
--- a/net/mac80211/driver-ops.h
+++ b/net/mac80211/driver-ops.h
@@ -1375,12 +1375,12 @@ static inline int drv_set_tid_config(struct ieee80211_local *local,
 
 static inline int drv_reset_tid_config(struct ieee80211_local *local,
 				       struct ieee80211_sub_if_data *sdata,
-				       struct ieee80211_sta *sta, u8 tid)
+				       struct ieee80211_sta *sta, u8 tids)
 {
 	int ret;
 
 	might_sleep();
-	ret = local->ops->reset_tid_config(&local->hw, &sdata->vif, sta, tid);
+	ret = local->ops->reset_tid_config(&local->hw, &sdata->vif, sta, tids);
 	trace_drv_return_int(local, ret);
 
 	return ret;
-- 
cgit v1.2.3-59-g8ed1b


From 33462e68231bccfe563a87614f4c4dd5d333837c Mon Sep 17 00:00:00 2001
From: Sergey Matyukevich <sergey.matyukevich.os@quantenna.com>
Date: Fri, 24 Apr 2020 14:29:03 +0300
Subject: cfg80211: add support for TID specific AMSDU configuration

This patch adds support to control per TID MSDU aggregation
using the NL80211_TID_CONFIG_ATTR_AMSDU_CTRL attribute.

Signed-off-by: Sergey Matyukevich <sergey.matyukevich.os@quantenna.com>
Link: https://lore.kernel.org/r/20200424112905.26770-4-sergey.matyukevich.os@quantenna.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       |  4 +++-
 include/uapi/linux/nl80211.h | 10 +++++++---
 net/wireless/nl80211.c       |  8 ++++++++
 3 files changed, 18 insertions(+), 4 deletions(-)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index e71d4f690ef1..5cacf24cc9f0 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -640,8 +640,9 @@ struct cfg80211_chan_def {
  * @noack: noack configuration value for the TID
  * @retry_long: retry count value
  * @retry_short: retry count value
- * @ampdu: Enable/Disable aggregation
+ * @ampdu: Enable/Disable MPDU aggregation
  * @rtscts: Enable/Disable RTS/CTS
+ * @amsdu: Enable/Disable MSDU aggregation
  */
 struct cfg80211_tid_cfg {
 	bool config_override;
@@ -651,6 +652,7 @@ struct cfg80211_tid_cfg {
 	u8 retry_long, retry_short;
 	enum nl80211_tid_config ampdu;
 	enum nl80211_tid_config rtscts;
+	enum nl80211_tid_config amsdu;
 };
 
 /**
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 9679d561f7d0..1ccb0bf657ec 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -4844,12 +4844,15 @@ enum nl80211_tid_config {
  *	&NL80211_CMD_SET_TID_CONFIG. Its type is u8, min value is 1 and
  *	the max value is advertised by the driver in this attribute on
  *	output in wiphy capabilities.
- * @NL80211_TID_CONFIG_ATTR_AMPDU_CTRL: Enable/Disable aggregation for the TIDs
- *	specified in %NL80211_TID_CONFIG_ATTR_TIDS. Its type is u8, using
- *	the values from &nl80211_tid_config.
+ * @NL80211_TID_CONFIG_ATTR_AMPDU_CTRL: Enable/Disable MPDU aggregation
+ *	for the TIDs specified in %NL80211_TID_CONFIG_ATTR_TIDS.
+ *	Its type is u8, using the values from &nl80211_tid_config.
  * @NL80211_TID_CONFIG_ATTR_RTSCTS_CTRL: Enable/Disable RTS_CTS for the TIDs
  *	specified in %NL80211_TID_CONFIG_ATTR_TIDS. It is u8 type, using
  *	the values from &nl80211_tid_config.
+ * @NL80211_TID_CONFIG_ATTR_AMSDU_CTRL: Enable/Disable MSDU aggregation
+ *	for the TIDs specified in %NL80211_TID_CONFIG_ATTR_TIDS.
+ *	Its type is u8, using the values from &nl80211_tid_config.
  */
 enum nl80211_tid_config_attr {
 	__NL80211_TID_CONFIG_ATTR_INVALID,
@@ -4863,6 +4866,7 @@ enum nl80211_tid_config_attr {
 	NL80211_TID_CONFIG_ATTR_RETRY_LONG,
 	NL80211_TID_CONFIG_ATTR_AMPDU_CTRL,
 	NL80211_TID_CONFIG_ATTR_RTSCTS_CTRL,
+	NL80211_TID_CONFIG_ATTR_AMSDU_CTRL,
 
 	/* keep last */
 	__NL80211_TID_CONFIG_ATTR_AFTER_LAST,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index fa66d5b6f557..482a80b78844 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -343,6 +343,8 @@ nl80211_tid_config_attr_policy[NL80211_TID_CONFIG_ATTR_MAX + 1] = {
 			NLA_POLICY_MAX(NLA_U8, NL80211_TID_CONFIG_DISABLE),
 	[NL80211_TID_CONFIG_ATTR_RTSCTS_CTRL] =
 			NLA_POLICY_MAX(NLA_U8, NL80211_TID_CONFIG_DISABLE),
+	[NL80211_TID_CONFIG_ATTR_AMSDU_CTRL] =
+			NLA_POLICY_MAX(NLA_U8, NL80211_TID_CONFIG_DISABLE),
 };
 
 static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
@@ -14080,6 +14082,12 @@ static int parse_tid_conf(struct cfg80211_registered_device *rdev,
 			nla_get_u8(attrs[NL80211_TID_CONFIG_ATTR_RTSCTS_CTRL]);
 	}
 
+	if (attrs[NL80211_TID_CONFIG_ATTR_AMSDU_CTRL]) {
+		tid_conf->mask |= BIT(NL80211_TID_CONFIG_ATTR_AMSDU_CTRL);
+		tid_conf->amsdu =
+			nla_get_u8(attrs[NL80211_TID_CONFIG_ATTR_AMSDU_CTRL]);
+	}
+
 	if (peer)
 		mask = rdev->wiphy.tid_config_support.peer;
 	else
-- 
cgit v1.2.3-59-g8ed1b


From c03369558c435f7e82f7c06b0173fa73c1ed15c0 Mon Sep 17 00:00:00 2001
From: Sergey Matyukevich <sergey.matyukevich.os@quantenna.com>
Date: Fri, 24 Apr 2020 14:29:04 +0300
Subject: nl80211: simplify peer specific TID configuration

Current rule for applying TID configuration for specific peer looks overly
complicated. No need to reject new TID configuration when override flag is
specified. Another call with the same TID configuration, but without
override flag, allows to apply new configuration anyway.

Use the same approach as for the 'all peers' case: if override flag is
specified, then reset existing TID configuration and immediately
apply a new one.

Signed-off-by: Sergey Matyukevich <sergey.matyukevich.os@quantenna.com>
Link: https://lore.kernel.org/r/20200424112905.26770-5-sergey.matyukevich.os@quantenna.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 10 ++++------
 net/wireless/nl80211.c       |  5 +----
 2 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 1ccb0bf657ec..d1b1d9e49887 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -4823,12 +4823,10 @@ enum nl80211_tid_config {
  *	(%NL80211_TID_CONFIG_ATTR_TIDS, %NL80211_TID_CONFIG_ATTR_OVERRIDE).
  * @NL80211_TID_CONFIG_ATTR_PEER_SUPP: same as the previous per-vif one, but
  *	per peer instead.
- * @NL80211_TID_CONFIG_ATTR_OVERRIDE: flag attribue, if no peer
- *	is selected, if set indicates that the new configuration overrides
- *	all previous peer configurations, otherwise previous peer specific
- *	configurations should be left untouched. If peer is selected then
- *	it will reset particular TID configuration of that peer and it will
- *	not accept other TID config attributes along with peer.
+ * @NL80211_TID_CONFIG_ATTR_OVERRIDE: flag attribue, if set indicates
+ *	that the new configuration overrides all previous peer
+ *	configurations, otherwise previous peer specific configurations
+ *	should be left untouched.
  * @NL80211_TID_CONFIG_ATTR_TIDS: a bitmask value of TIDs (bit 0 to 7)
  *	Its type is u16.
  * @NL80211_TID_CONFIG_ATTR_NOACK: Configure ack policy for the TID.
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 482a80b78844..258c621f651c 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -14036,10 +14036,7 @@ static int parse_tid_conf(struct cfg80211_registered_device *rdev,
 		if (rdev->ops->reset_tid_config) {
 			err = rdev_reset_tid_config(rdev, dev, peer,
 						    tid_conf->tids);
-			/* If peer is there no other configuration will be
-			 * allowed
-			 */
-			if (err || peer)
+			if (err)
 				return err;
 		} else {
 			return -EINVAL;
-- 
cgit v1.2.3-59-g8ed1b


From e76fede8bf7c90d92c799d9ceb092dec48346e2c Mon Sep 17 00:00:00 2001
From: Thomas Pedersen <thomas@adapt-ip.com>
Date: Thu, 30 Apr 2020 10:25:50 -0700
Subject: cfg80211: add KHz variants of frame RX API

Drivers may wish to report the RX frequency in units of
KHz. Provide cfg80211_rx_mgmt_khz() and wrap it with
cfg80211_rx_mgmt() so exisiting drivers which can't report
KHz anyway don't need to change. Add a similar wrapper for
cfg80211_report_obss_beacon() so the frequency units stay
somewhat consistent.

This doesn't actually change the nl80211 API yet.

Signed-off-by: Thomas Pedersen <thomas@adapt-ip.com>
Link: https://lore.kernel.org/r/20200430172554.18383-2-thomas@adapt-ip.com
[fix mac80211 calling the non-khz version of obss beacon report,
 drop trace point name changes]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h |  2 ++
 include/net/cfg80211.h    | 54 ++++++++++++++++++++++++++++++++++++++++++-----
 net/mac80211/rx.c         | 12 ++++++-----
 net/wireless/mlme.c       |  6 +++---
 net/wireless/nl80211.c    | 12 +++++------
 net/wireless/trace.h      |  8 +++----
 6 files changed, 71 insertions(+), 23 deletions(-)

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index a561db435a4b..41d5f000c0d9 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -3333,6 +3333,8 @@ static inline int ieee80211_get_tdls_action(struct sk_buff *skb, u32 hdr_size)
 /* convert frequencies */
 #define MHZ_TO_KHZ(freq) ((freq) * 1000)
 #define KHZ_TO_MHZ(freq) ((freq) / 1000)
+#define PR_KHZ(f) KHZ_TO_MHZ(f), f % 1000
+#define KHZ_F "%d.%03d"
 
 /* convert powers */
 #define DBI_TO_MBI(gain) ((gain) * 100)
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 5cacf24cc9f0..7415f77d99ca 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -6988,6 +6988,26 @@ void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr,
 			  enum nl80211_connect_failed_reason reason,
 			  gfp_t gfp);
 
+/**
+ * cfg80211_rx_mgmt_khz - notification of received, unprocessed management frame
+ * @wdev: wireless device receiving the frame
+ * @freq: Frequency on which the frame was received in KHz
+ * @sig_dbm: signal strength in dBm, or 0 if unknown
+ * @buf: Management frame (header + body)
+ * @len: length of the frame data
+ * @flags: flags, as defined in enum nl80211_rxmgmt_flags
+ *
+ * This function is called whenever an Action frame is received for a station
+ * mode interface, but is not processed in kernel.
+ *
+ * Return: %true if a user space application has registered for this frame.
+ * For action frames, that makes it responsible for rejecting unrecognized
+ * action frames; %false otherwise, in which case for action frames the
+ * driver is responsible for rejecting the frame.
+ */
+bool cfg80211_rx_mgmt_khz(struct wireless_dev *wdev, int freq, int sig_dbm,
+			  const u8 *buf, size_t len, u32 flags);
+
 /**
  * cfg80211_rx_mgmt - notification of received, unprocessed management frame
  * @wdev: wireless device receiving the frame
@@ -7005,8 +7025,13 @@ void cfg80211_conn_failed(struct net_device *dev, const u8 *mac_addr,
  * action frames; %false otherwise, in which case for action frames the
  * driver is responsible for rejecting the frame.
  */
-bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_dbm,
-		      const u8 *buf, size_t len, u32 flags);
+static inline bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq,
+				    int sig_dbm, const u8 *buf, size_t len,
+				    u32 flags)
+{
+	return cfg80211_rx_mgmt_khz(wdev, MHZ_TO_KHZ(freq), sig_dbm, buf, len,
+				    flags);
+}
 
 /**
  * cfg80211_mgmt_tx_status - notification of TX status for management frame
@@ -7204,6 +7229,21 @@ void cfg80211_probe_status(struct net_device *dev, const u8 *addr,
 			   u64 cookie, bool acked, s32 ack_signal,
 			   bool is_valid_ack_signal, gfp_t gfp);
 
+/**
+ * cfg80211_report_obss_beacon_khz - report beacon from other APs
+ * @wiphy: The wiphy that received the beacon
+ * @frame: the frame
+ * @len: length of the frame
+ * @freq: frequency the frame was received on in KHz
+ * @sig_dbm: signal strength in dBm, or 0 if unknown
+ *
+ * Use this function to report to userspace when a beacon was
+ * received. It is not useful to call this when there is no
+ * netdev that is in AP/GO mode.
+ */
+void cfg80211_report_obss_beacon_khz(struct wiphy *wiphy, const u8 *frame,
+				     size_t len, int freq, int sig_dbm);
+
 /**
  * cfg80211_report_obss_beacon - report beacon from other APs
  * @wiphy: The wiphy that received the beacon
@@ -7216,9 +7256,13 @@ void cfg80211_probe_status(struct net_device *dev, const u8 *addr,
  * received. It is not useful to call this when there is no
  * netdev that is in AP/GO mode.
  */
-void cfg80211_report_obss_beacon(struct wiphy *wiphy,
-				 const u8 *frame, size_t len,
-				 int freq, int sig_dbm);
+static inline void cfg80211_report_obss_beacon(struct wiphy *wiphy,
+					       const u8 *frame, size_t len,
+					       int freq, int sig_dbm)
+{
+	cfg80211_report_obss_beacon_khz(wiphy, frame, len, MHZ_TO_KHZ(freq),
+					sig_dbm);
+}
 
 /**
  * cfg80211_reg_can_beacon - check if beaconing is allowed
diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index eaf8931e4627..8e47b0d31051 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -3095,9 +3095,10 @@ ieee80211_rx_h_mgmt_check(struct ieee80211_rx_data *rx)
 		    !(status->flag & RX_FLAG_NO_SIGNAL_VAL))
 			sig = status->signal;
 
-		cfg80211_report_obss_beacon(rx->local->hw.wiphy,
-					    rx->skb->data, rx->skb->len,
-					    status->freq, sig);
+		cfg80211_report_obss_beacon_khz(rx->local->hw.wiphy,
+						rx->skb->data, rx->skb->len,
+						ieee80211_rx_status_to_khz(status),
+						sig);
 		rx->flags |= IEEE80211_RX_BEACON_REPORTED;
 	}
 
@@ -3443,8 +3444,9 @@ ieee80211_rx_h_userspace_mgmt(struct ieee80211_rx_data *rx)
 	    !(status->flag & RX_FLAG_NO_SIGNAL_VAL))
 		sig = status->signal;
 
-	if (cfg80211_rx_mgmt(&rx->sdata->wdev, status->freq, sig,
-			     rx->skb->data, rx->skb->len, 0)) {
+	if (cfg80211_rx_mgmt_khz(&rx->sdata->wdev,
+				 ieee80211_rx_status_to_khz(status), sig,
+				 rx->skb->data, rx->skb->len, 0)) {
 		if (rx->sta)
 			rx->sta->rx_stats.packets++;
 		dev_kfree_skb(rx->skb);
diff --git a/net/wireless/mlme.c b/net/wireless/mlme.c
index 409497a3527d..189334314cba 100644
--- a/net/wireless/mlme.c
+++ b/net/wireless/mlme.c
@@ -729,8 +729,8 @@ int cfg80211_mlme_mgmt_tx(struct cfg80211_registered_device *rdev,
 	return rdev_mgmt_tx(rdev, wdev, params, cookie);
 }
 
-bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_dbm,
-		      const u8 *buf, size_t len, u32 flags)
+bool cfg80211_rx_mgmt_khz(struct wireless_dev *wdev, int freq, int sig_dbm,
+			  const u8 *buf, size_t len, u32 flags)
 {
 	struct wiphy *wiphy = wdev->wiphy;
 	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
@@ -785,7 +785,7 @@ bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq, int sig_dbm,
 	trace_cfg80211_return_bool(result);
 	return result;
 }
-EXPORT_SYMBOL(cfg80211_rx_mgmt);
+EXPORT_SYMBOL(cfg80211_rx_mgmt_khz);
 
 void cfg80211_sched_dfs_chan_update(struct cfg80211_registered_device *rdev)
 {
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 258c621f651c..f6523f1485a3 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -16214,7 +16214,7 @@ int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
 					netdev->ifindex)) ||
 	    nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev),
 			      NL80211_ATTR_PAD) ||
-	    nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, freq) ||
+	    nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, KHZ_TO_MHZ(freq)) ||
 	    (sig_dbm &&
 	     nla_put_u32(msg, NL80211_ATTR_RX_SIGNAL_DBM, sig_dbm)) ||
 	    nla_put(msg, NL80211_ATTR_FRAME, len, buf) ||
@@ -16840,9 +16840,8 @@ void cfg80211_probe_status(struct net_device *dev, const u8 *addr,
 }
 EXPORT_SYMBOL(cfg80211_probe_status);
 
-void cfg80211_report_obss_beacon(struct wiphy *wiphy,
-				 const u8 *frame, size_t len,
-				 int freq, int sig_dbm)
+void cfg80211_report_obss_beacon_khz(struct wiphy *wiphy, const u8 *frame,
+				     size_t len, int freq, int sig_dbm)
 {
 	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
 	struct sk_buff *msg;
@@ -16865,7 +16864,8 @@ void cfg80211_report_obss_beacon(struct wiphy *wiphy,
 
 		if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
 		    (freq &&
-		     nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, freq)) ||
+		     nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ,
+				 KHZ_TO_MHZ(freq))) ||
 		    (sig_dbm &&
 		     nla_put_u32(msg, NL80211_ATTR_RX_SIGNAL_DBM, sig_dbm)) ||
 		    nla_put(msg, NL80211_ATTR_FRAME, len, frame))
@@ -16882,7 +16882,7 @@ void cfg80211_report_obss_beacon(struct wiphy *wiphy,
 	spin_unlock_bh(&rdev->beacon_registrations_lock);
 	nlmsg_free(msg);
 }
-EXPORT_SYMBOL(cfg80211_report_obss_beacon);
+EXPORT_SYMBOL(cfg80211_report_obss_beacon_khz);
 
 #ifdef CONFIG_PM
 static int cfg80211_net_detect_results(struct sk_buff *msg,
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index 53c887ea67c7..f2ab44a2a3e4 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -2840,8 +2840,8 @@ TRACE_EVENT(cfg80211_rx_mgmt,
 		__entry->freq = freq;
 		__entry->sig_dbm = sig_dbm;
 	),
-	TP_printk(WDEV_PR_FMT ", freq: %d, sig dbm: %d",
-		  WDEV_PR_ARG, __entry->freq, __entry->sig_dbm)
+	TP_printk(WDEV_PR_FMT ", freq: "KHZ_F", sig dbm: %d",
+		  WDEV_PR_ARG, PR_KHZ(__entry->freq), __entry->sig_dbm)
 );
 
 TRACE_EVENT(cfg80211_mgmt_tx_status,
@@ -3121,8 +3121,8 @@ TRACE_EVENT(cfg80211_report_obss_beacon,
 		__entry->freq = freq;
 		__entry->sig_dbm = sig_dbm;
 	),
-	TP_printk(WIPHY_PR_FMT ", freq: %d, sig_dbm: %d",
-		  WIPHY_PR_ARG, __entry->freq, __entry->sig_dbm)
+	TP_printk(WIPHY_PR_FMT ", freq: "KHZ_F", sig_dbm: %d",
+		  WIPHY_PR_ARG, PR_KHZ(__entry->freq), __entry->sig_dbm)
 );
 
 TRACE_EVENT(cfg80211_tdls_oper_request,
-- 
cgit v1.2.3-59-g8ed1b


From 942ba88ba9c87f5e225574f1f0d6548f0105ed73 Mon Sep 17 00:00:00 2001
From: Thomas Pedersen <thomas@adapt-ip.com>
Date: Thu, 30 Apr 2020 10:25:51 -0700
Subject: nl80211: add KHz frequency offset for most wifi commands

cfg80211 recently gained the ability to understand a
frequency offset component in KHz. Expose this in nl80211
through the new attributes NL80211_ATTR_WIPHY_FREQ_OFFSET,
NL80211_FREQUENCY_ATTR_OFFSET,
NL80211_ATTR_CENTER_FREQ1_OFFSET, and
NL80211_BSS_FREQUENCY_OFFSET.

These add support to send and receive a KHz offset
component with the following NL80211 commands:

- NL80211_CMD_FRAME
- NL80211_CMD_GET_SCAN
- NL80211_CMD_AUTHENTICATE
- NL80211_CMD_ASSOCIATE
- NL80211_CMD_CONNECT

Along with any other command which takes a chandef, ie:

- NL80211_CMD_SET_CHANNEL
- NL80211_CMD_SET_WIPHY
- NL80211_CMD_START_AP
- NL80211_CMD_RADAR_DETECT
- NL80211_CMD_NOTIFY_RADAR
- NL80211_CMD_CHANNEL_SWITCH
- NL80211_JOIN_IBSS
- NL80211_CMD_REMAIN_ON_CHANNEL
- NL80211_CMD_JOIN_OCB
- NL80211_CMD_JOIN_MESH
- NL80211_CMD_TDLS_CHANNEL_SWITCH

If the driver advertises a band containing channels with
frequency offset, it must also verify support for
frequency offset channels in its cfg80211 ops, or return
an error.

Signed-off-by: Thomas Pedersen <thomas@adapt-ip.com>
Link: https://lore.kernel.org/r/20200430172554.18383-3-thomas@adapt-ip.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 50 ++++++++++++++++++----------
 net/wireless/nl80211.c       | 78 ++++++++++++++++++++++++++++++++------------
 2 files changed, 91 insertions(+), 37 deletions(-)

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index d1b1d9e49887..b1cd132c1d27 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -296,13 +296,14 @@
  *	to get a list of all present wiphys.
  * @NL80211_CMD_SET_WIPHY: set wiphy parameters, needs %NL80211_ATTR_WIPHY or
  *	%NL80211_ATTR_IFINDEX; can be used to set %NL80211_ATTR_WIPHY_NAME,
- *	%NL80211_ATTR_WIPHY_TXQ_PARAMS, %NL80211_ATTR_WIPHY_FREQ (and the
- *	attributes determining the channel width; this is used for setting
- *	monitor mode channel),  %NL80211_ATTR_WIPHY_RETRY_SHORT,
- *	%NL80211_ATTR_WIPHY_RETRY_LONG, %NL80211_ATTR_WIPHY_FRAG_THRESHOLD,
- *	and/or %NL80211_ATTR_WIPHY_RTS_THRESHOLD.
- *	However, for setting the channel, see %NL80211_CMD_SET_CHANNEL
- *	instead, the support here is for backward compatibility only.
+ *	%NL80211_ATTR_WIPHY_TXQ_PARAMS, %NL80211_ATTR_WIPHY_FREQ,
+ *	%NL80211_ATTR_WIPHY_FREQ_OFFSET (and the attributes determining the
+ *	channel width; this is used for setting monitor mode channel),
+ *	%NL80211_ATTR_WIPHY_RETRY_SHORT, %NL80211_ATTR_WIPHY_RETRY_LONG,
+ *	%NL80211_ATTR_WIPHY_FRAG_THRESHOLD, and/or
+ *	%NL80211_ATTR_WIPHY_RTS_THRESHOLD.  However, for setting the channel,
+ *	see %NL80211_CMD_SET_CHANNEL instead, the support here is for backward
+ *	compatibility only.
  * @NL80211_CMD_NEW_WIPHY: Newly created wiphy, response to get request
  *	or rename notification. Has attributes %NL80211_ATTR_WIPHY and
  *	%NL80211_ATTR_WIPHY_NAME.
@@ -351,7 +352,8 @@
  *	%NL80211_ATTR_AUTH_TYPE, %NL80211_ATTR_INACTIVITY_TIMEOUT,
  *	%NL80211_ATTR_ACL_POLICY and %NL80211_ATTR_MAC_ADDRS.
  *	The channel to use can be set on the interface or be given using the
- *	%NL80211_ATTR_WIPHY_FREQ and the attributes determining channel width.
+ *	%NL80211_ATTR_WIPHY_FREQ and %NL80211_ATTR_WIPHY_FREQ_OFFSET, and the
+ *	attributes determining channel width.
  * @NL80211_CMD_NEW_BEACON: old alias for %NL80211_CMD_START_AP
  * @NL80211_CMD_STOP_AP: Stop AP operation on the given interface
  * @NL80211_CMD_DEL_BEACON: old alias for %NL80211_CMD_STOP_AP
@@ -536,11 +538,12 @@
  *	interface. %NL80211_ATTR_MAC is used to specify PeerSTAAddress (and
  *	BSSID in case of station mode). %NL80211_ATTR_SSID is used to specify
  *	the SSID (mainly for association, but is included in authentication
- *	request, too, to help BSS selection. %NL80211_ATTR_WIPHY_FREQ is used
- *	to specify the frequence of the channel in MHz. %NL80211_ATTR_AUTH_TYPE
- *	is used to specify the authentication type. %NL80211_ATTR_IE is used to
- *	define IEs (VendorSpecificInfo, but also including RSN IE and FT IEs)
- *	to be added to the frame.
+ *	request, too, to help BSS selection. %NL80211_ATTR_WIPHY_FREQ +
+ *	%NL80211_ATTR_WIPHY_FREQ_OFFSET is used to specify the frequence of the
+ *	channel in MHz. %NL80211_ATTR_AUTH_TYPE is used to specify the
+ *	authentication type. %NL80211_ATTR_IE is used to define IEs
+ *	(VendorSpecificInfo, but also including RSN IE and FT IEs) to be added
+ *	to the frame.
  *	When used as an event, this reports reception of an Authentication
  *	frame in station and IBSS modes when the local MLME processed the
  *	frame, i.e., it was for the local STA and was received in correct
@@ -595,8 +598,9 @@
  *	requests to connect to a specified network but without separating
  *	auth and assoc steps. For this, you need to specify the SSID in a
  *	%NL80211_ATTR_SSID attribute, and can optionally specify the association
- *	IEs in %NL80211_ATTR_IE, %NL80211_ATTR_AUTH_TYPE, %NL80211_ATTR_USE_MFP,
- *	%NL80211_ATTR_MAC, %NL80211_ATTR_WIPHY_FREQ, %NL80211_ATTR_CONTROL_PORT,
+ *	IEs in %NL80211_ATTR_IE, %NL80211_ATTR_AUTH_TYPE,
+ *	%NL80211_ATTR_USE_MFP, %NL80211_ATTR_MAC, %NL80211_ATTR_WIPHY_FREQ,
+ *	%NL80211_ATTR_WIPHY_FREQ_OFFSET, %NL80211_ATTR_CONTROL_PORT,
  *	%NL80211_ATTR_CONTROL_PORT_ETHERTYPE,
  *	%NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT,
  *	%NL80211_ATTR_CONTROL_PORT_OVER_NL80211, %NL80211_ATTR_MAC_HINT, and
@@ -1433,7 +1437,8 @@ enum nl80211_commands {
  *	of &enum nl80211_chan_width, describing the channel width. See the
  *	documentation of the enum for more information.
  * @NL80211_ATTR_CENTER_FREQ1: Center frequency of the first part of the
- *	channel, used for anything but 20 MHz bandwidth
+ *	channel, used for anything but 20 MHz bandwidth. In S1G this is the
+ *	operating channel center frequency.
  * @NL80211_ATTR_CENTER_FREQ2: Center frequency of the second part of the
  *	channel, used only for 80+80 MHz bandwidth
  * @NL80211_ATTR_WIPHY_CHANNEL_TYPE: included with NL80211_ATTR_WIPHY_FREQ
@@ -2480,9 +2485,14 @@ enum nl80211_commands {
  *	entry without having to force a disconnection after the PMK timeout. If
  *	no roaming occurs between the reauth threshold and PMK expiration,
  *	disassociation is still forced.
- *
  * @NL80211_ATTR_RECEIVE_MULTICAST: multicast flag for the
  *	%NL80211_CMD_REGISTER_FRAME command, see the description there.
+ * @NL80211_ATTR_WIPHY_FREQ_OFFSET: offset of the associated
+ *	%NL80211_ATTR_WIPHY_FREQ in positive KHz. Only valid when supplied with
+ *	an %NL80211_ATTR_WIPHY_FREQ_OFFSET.
+ * @NL80211_ATTR_CENTER_FREQ1_OFFSET: Center frequency offset in KHz for the
+ *	first channel segment specified in %NL80211_ATTR_CENTER_FREQ1.
+ *
  *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
@@ -2960,6 +2970,8 @@ enum nl80211_attrs {
 	NL80211_ATTR_PMK_REAUTH_THRESHOLD,
 
 	NL80211_ATTR_RECEIVE_MULTICAST,
+	NL80211_ATTR_WIPHY_FREQ_OFFSET,
+	NL80211_ATTR_CENTER_FREQ1_OFFSET,
 
 	/* add attributes here, update the policy in nl80211.c */
 
@@ -3682,6 +3694,7 @@ enum nl80211_wmm_rule {
  *	(see &enum nl80211_wmm_rule)
  * @NL80211_FREQUENCY_ATTR_NO_HE: HE operation is not allowed on this channel
  *	in current regulatory domain.
+ * @NL80211_FREQUENCY_ATTR_OFFSET: frequency offset in KHz
  * @NL80211_FREQUENCY_ATTR_MAX: highest frequency attribute number
  *	currently defined
  * @__NL80211_FREQUENCY_ATTR_AFTER_LAST: internal use
@@ -3712,6 +3725,7 @@ enum nl80211_frequency_attr {
 	NL80211_FREQUENCY_ATTR_NO_10MHZ,
 	NL80211_FREQUENCY_ATTR_WMM,
 	NL80211_FREQUENCY_ATTR_NO_HE,
+	NL80211_FREQUENCY_ATTR_OFFSET,
 
 	/* keep last */
 	__NL80211_FREQUENCY_ATTR_AFTER_LAST,
@@ -4482,6 +4496,7 @@ enum nl80211_bss_scan_width {
  * @NL80211_BSS_CHAIN_SIGNAL: per-chain signal strength of last BSS update.
  *	Contains a nested array of signal strength attributes (u8, dBm),
  *	using the nesting index as the antenna number.
+ * @NL80211_BSS_FREQUENCY_OFFSET: frequency offset in KHz
  * @__NL80211_BSS_AFTER_LAST: internal
  * @NL80211_BSS_MAX: highest BSS attribute
  */
@@ -4506,6 +4521,7 @@ enum nl80211_bss {
 	NL80211_BSS_PARENT_TSF,
 	NL80211_BSS_PARENT_BSSID,
 	NL80211_BSS_CHAIN_SIGNAL,
+	NL80211_BSS_FREQUENCY_OFFSET,
 
 	/* keep last */
 	__NL80211_BSS_AFTER_LAST,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index f6523f1485a3..87d7efd186d0 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -365,6 +365,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 
 	[NL80211_ATTR_CHANNEL_WIDTH] = { .type = NLA_U32 },
 	[NL80211_ATTR_CENTER_FREQ1] = { .type = NLA_U32 },
+	[NL80211_ATTR_CENTER_FREQ1_OFFSET] = NLA_POLICY_RANGE(NLA_U32, 0, 999),
 	[NL80211_ATTR_CENTER_FREQ2] = { .type = NLA_U32 },
 
 	[NL80211_ATTR_WIPHY_RETRY_SHORT] = NLA_POLICY_MIN(NLA_U8, 1),
@@ -638,6 +639,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	[NL80211_ATTR_PMK_LIFETIME] = NLA_POLICY_MIN(NLA_U32, 1),
 	[NL80211_ATTR_PMK_REAUTH_THRESHOLD] = NLA_POLICY_RANGE(NLA_U8, 1, 100),
 	[NL80211_ATTR_RECEIVE_MULTICAST] = { .type = NLA_FLAG },
+	[NL80211_ATTR_WIPHY_FREQ_OFFSET] = NLA_POLICY_RANGE(NLA_U32, 0, 999),
 };
 
 /* policy for the key attributes */
@@ -904,6 +906,9 @@ static int nl80211_msg_put_channel(struct sk_buff *msg, struct wiphy *wiphy,
 			chan->center_freq))
 		goto nla_put_failure;
 
+	if (nla_put_u32(msg, NL80211_FREQUENCY_ATTR_OFFSET, chan->freq_offset))
+		goto nla_put_failure;
+
 	if ((chan->flags & IEEE80211_CHAN_DISABLED) &&
 	    nla_put_flag(msg, NL80211_FREQUENCY_ATTR_DISABLED))
 		goto nla_put_failure;
@@ -1309,13 +1314,11 @@ static int nl80211_key_allowed(struct wireless_dev *wdev)
 }
 
 static struct ieee80211_channel *nl80211_get_valid_chan(struct wiphy *wiphy,
-							struct nlattr *tb)
+							u32 freq)
 {
 	struct ieee80211_channel *chan;
 
-	if (tb == NULL)
-		return NULL;
-	chan = ieee80211_get_channel(wiphy, nla_get_u32(tb));
+	chan = ieee80211_get_channel_khz(wiphy, freq);
 	if (!chan || chan->flags & IEEE80211_CHAN_DISABLED)
 		return NULL;
 	return chan;
@@ -2770,13 +2773,17 @@ int nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
 	if (!attrs[NL80211_ATTR_WIPHY_FREQ])
 		return -EINVAL;
 
-	control_freq = nla_get_u32(attrs[NL80211_ATTR_WIPHY_FREQ]);
+	control_freq = MHZ_TO_KHZ(
+			nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]));
+	if (info->attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET])
+		control_freq +=
+		    nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET]);
 
 	memset(chandef, 0, sizeof(*chandef));
-
-	chandef->chan = ieee80211_get_channel(&rdev->wiphy, control_freq);
+	chandef->chan = ieee80211_get_channel_khz(&rdev->wiphy, control_freq);
 	chandef->width = NL80211_CHAN_WIDTH_20_NOHT;
-	chandef->center_freq1 = control_freq;
+	chandef->center_freq1 = KHZ_TO_MHZ(control_freq);
+	chandef->freq1_offset = control_freq % 1000;
 	chandef->center_freq2 = 0;
 
 	/* Primary channel not allowed */
@@ -2824,9 +2831,15 @@ int nl80211_parse_chandef(struct cfg80211_registered_device *rdev,
 	} else if (attrs[NL80211_ATTR_CHANNEL_WIDTH]) {
 		chandef->width =
 			nla_get_u32(attrs[NL80211_ATTR_CHANNEL_WIDTH]);
-		if (attrs[NL80211_ATTR_CENTER_FREQ1])
+		if (attrs[NL80211_ATTR_CENTER_FREQ1]) {
 			chandef->center_freq1 =
 				nla_get_u32(attrs[NL80211_ATTR_CENTER_FREQ1]);
+			if (attrs[NL80211_ATTR_CENTER_FREQ1_OFFSET])
+				chandef->freq1_offset = nla_get_u32(
+				      attrs[NL80211_ATTR_CENTER_FREQ1_OFFSET]);
+			else
+				chandef->freq1_offset = 0;
+		}
 		if (attrs[NL80211_ATTR_CENTER_FREQ2])
 			chandef->center_freq2 =
 				nla_get_u32(attrs[NL80211_ATTR_CENTER_FREQ2]);
@@ -3259,6 +3272,9 @@ static int nl80211_send_chandef(struct sk_buff *msg,
 	if (nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ,
 			chandef->chan->center_freq))
 		return -ENOBUFS;
+	if (nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ_OFFSET,
+			chandef->chan->freq_offset))
+		return -ENOBUFS;
 	switch (chandef->width) {
 	case NL80211_CHAN_WIDTH_20_NOHT:
 	case NL80211_CHAN_WIDTH_20:
@@ -8873,6 +8889,8 @@ static int nl80211_send_bss(struct sk_buff *msg, struct netlink_callback *cb,
 		goto nla_put_failure;
 	if (nla_put_u16(msg, NL80211_BSS_CAPABILITY, res->capability) ||
 	    nla_put_u32(msg, NL80211_BSS_FREQUENCY, res->channel->center_freq) ||
+	    nla_put_u32(msg, NL80211_BSS_FREQUENCY_OFFSET,
+			res->channel->freq_offset) ||
 	    nla_put_u32(msg, NL80211_BSS_CHAN_WIDTH, res->scan_width) ||
 	    nla_put_u32(msg, NL80211_BSS_SEEN_MS_AGO,
 			jiffies_to_msecs(jiffies - intbss->ts)))
@@ -9141,6 +9159,7 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
 	enum nl80211_auth_type auth_type;
 	struct key_parse key;
 	bool local_state_change;
+	u32 freq;
 
 	if (!info->attrs[NL80211_ATTR_MAC])
 		return -EINVAL;
@@ -9197,8 +9216,12 @@ static int nl80211_authenticate(struct sk_buff *skb, struct genl_info *info)
 		return -EOPNOTSUPP;
 
 	bssid = nla_data(info->attrs[NL80211_ATTR_MAC]);
-	chan = nl80211_get_valid_chan(&rdev->wiphy,
-				      info->attrs[NL80211_ATTR_WIPHY_FREQ]);
+	freq = MHZ_TO_KHZ(nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]));
+	if (info->attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET])
+		freq +=
+		    nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET]);
+
+	chan = nl80211_get_valid_chan(&rdev->wiphy, freq);
 	if (!chan)
 		return -EINVAL;
 
@@ -9388,6 +9411,7 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
 	struct cfg80211_assoc_request req = {};
 	const u8 *bssid, *ssid;
 	int err, ssid_len = 0;
+	u32 freq;
 
 	if (dev->ieee80211_ptr->conn_owner_nlportid &&
 	    dev->ieee80211_ptr->conn_owner_nlportid != info->snd_portid)
@@ -9407,8 +9431,11 @@ static int nl80211_associate(struct sk_buff *skb, struct genl_info *info)
 
 	bssid = nla_data(info->attrs[NL80211_ATTR_MAC]);
 
-	chan = nl80211_get_valid_chan(&rdev->wiphy,
-				      info->attrs[NL80211_ATTR_WIPHY_FREQ]);
+	freq = MHZ_TO_KHZ(nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ]));
+	if (info->attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET])
+		freq +=
+		    nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET]);
+	chan = nl80211_get_valid_chan(&rdev->wiphy, freq);
 	if (!chan)
 		return -EINVAL;
 
@@ -10088,6 +10115,7 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
 	struct cfg80211_connect_params connect;
 	struct wiphy *wiphy;
 	struct cfg80211_cached_keys *connkeys = NULL;
+	u32 freq = 0;
 	int err;
 
 	memset(&connect, 0, sizeof(connect));
@@ -10158,14 +10186,21 @@ static int nl80211_connect(struct sk_buff *skb, struct genl_info *info)
 		connect.prev_bssid =
 			nla_data(info->attrs[NL80211_ATTR_PREV_BSSID]);
 
-	if (info->attrs[NL80211_ATTR_WIPHY_FREQ]) {
-		connect.channel = nl80211_get_valid_chan(
-			wiphy, info->attrs[NL80211_ATTR_WIPHY_FREQ]);
+	if (info->attrs[NL80211_ATTR_WIPHY_FREQ])
+		freq = MHZ_TO_KHZ(nla_get_u32(
+					info->attrs[NL80211_ATTR_WIPHY_FREQ]));
+	if (info->attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET])
+		freq +=
+		    nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ_OFFSET]);
+
+	if (freq) {
+		connect.channel = nl80211_get_valid_chan(wiphy, freq);
 		if (!connect.channel)
 			return -EINVAL;
 	} else if (info->attrs[NL80211_ATTR_WIPHY_FREQ_HINT]) {
-		connect.channel_hint = nl80211_get_valid_chan(
-			wiphy, info->attrs[NL80211_ATTR_WIPHY_FREQ_HINT]);
+		freq = nla_get_u32(info->attrs[NL80211_ATTR_WIPHY_FREQ_HINT]);
+		freq = MHZ_TO_KHZ(freq);
+		connect.channel_hint = nl80211_get_valid_chan(wiphy, freq);
 		if (!connect.channel_hint)
 			return -EINVAL;
 	}
@@ -16215,6 +16250,7 @@ int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
 	    nla_put_u64_64bit(msg, NL80211_ATTR_WDEV, wdev_id(wdev),
 			      NL80211_ATTR_PAD) ||
 	    nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ, KHZ_TO_MHZ(freq)) ||
+	    nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ_OFFSET, freq % 1000) ||
 	    (sig_dbm &&
 	     nla_put_u32(msg, NL80211_ATTR_RX_SIGNAL_DBM, sig_dbm)) ||
 	    nla_put(msg, NL80211_ATTR_FRAME, len, buf) ||
@@ -16864,8 +16900,10 @@ void cfg80211_report_obss_beacon_khz(struct wiphy *wiphy, const u8 *frame,
 
 		if (nla_put_u32(msg, NL80211_ATTR_WIPHY, rdev->wiphy_idx) ||
 		    (freq &&
-		     nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ,
-				 KHZ_TO_MHZ(freq))) ||
+		     (nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ,
+				  KHZ_TO_MHZ(freq)) ||
+		      nla_put_u32(msg, NL80211_ATTR_WIPHY_FREQ_OFFSET,
+				  freq % 1000))) ||
 		    (sig_dbm &&
 		     nla_put_u32(msg, NL80211_ATTR_RX_SIGNAL_DBM, sig_dbm)) ||
 		    nla_put(msg, NL80211_ATTR_FRAME, len, frame))
-- 
cgit v1.2.3-59-g8ed1b


From 2032f3b2f943256ff40df23182913dfc7e73ec6a Mon Sep 17 00:00:00 2001
From: Thomas Pedersen <thomas@adapt-ip.com>
Date: Thu, 30 Apr 2020 10:25:52 -0700
Subject: nl80211: support scan frequencies in KHz

If the driver advertises NL80211_EXT_FEATURE_SCAN_FREQ_KHZ
userspace can omit NL80211_ATTR_SCAN_FREQUENCIES in favor
of an NL80211_ATTR_SCAN_FREQ_KHZ. To get scan results in
KHz userspace must also set the
NL80211_SCAN_FLAG_FREQ_KHZ.

This lets nl80211 remain compatible with older userspaces
while not requring and sending redundant (and potentially
incorrect) scan frequency sets.

Signed-off-by: Thomas Pedersen <thomas@adapt-ip.com>
Link: https://lore.kernel.org/r/20200430172554.18383-4-thomas@adapt-ip.com
[use just nla_nest_start() (not _noflag) for NL80211_ATTR_SCAN_FREQ_KHZ]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/nl80211.h | 13 ++++++++++-
 net/mac80211/main.c          |  2 ++
 net/wireless/nl80211.c       | 51 +++++++++++++++++++++++++++++++++-----------
 3 files changed, 53 insertions(+), 13 deletions(-)

diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index b1cd132c1d27..47d39b6a073d 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2492,7 +2492,7 @@ enum nl80211_commands {
  *	an %NL80211_ATTR_WIPHY_FREQ_OFFSET.
  * @NL80211_ATTR_CENTER_FREQ1_OFFSET: Center frequency offset in KHz for the
  *	first channel segment specified in %NL80211_ATTR_CENTER_FREQ1.
- *
+ * @NL80211_ATTR_SCAN_FREQ_KHZ: nested attribute with KHz frequencies
  *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
@@ -2972,6 +2972,7 @@ enum nl80211_attrs {
 	NL80211_ATTR_RECEIVE_MULTICAST,
 	NL80211_ATTR_WIPHY_FREQ_OFFSET,
 	NL80211_ATTR_CENTER_FREQ1_OFFSET,
+	NL80211_ATTR_SCAN_FREQ_KHZ,
 
 	/* add attributes here, update the policy in nl80211.c */
 
@@ -5723,6 +5724,11 @@ enum nl80211_feature_flags {
  * @NL80211_EXT_FEATURE_MULTICAST_REGISTRATIONS: management frame registrations
  *	are possible for multicast frames and those will be reported properly.
  *
+ * @NL80211_EXT_FEATURE_SCAN_FREQ_KHZ: This driver supports receiving and
+ *	reporting scan request with %NL80211_ATTR_SCAN_FREQ_KHZ. In order to
+ *	report %NL80211_ATTR_SCAN_FREQ_KHZ, %NL80211_SCAN_FLAG_FREQ_KHZ must be
+ *	included in the scan request.
+ *
  * @NUM_NL80211_EXT_FEATURES: number of extended features.
  * @MAX_NL80211_EXT_FEATURES: highest extended feature index.
  */
@@ -5776,6 +5782,7 @@ enum nl80211_ext_feature_index {
 	NL80211_EXT_FEATURE_DEL_IBSS_STA,
 	NL80211_EXT_FEATURE_MULTICAST_REGISTRATIONS,
 	NL80211_EXT_FEATURE_BEACON_PROTECTION_CLIENT,
+	NL80211_EXT_FEATURE_SCAN_FREQ_KHZ,
 
 	/* add new features before the definition below */
 	NUM_NL80211_EXT_FEATURES,
@@ -5887,6 +5894,9 @@ enum nl80211_timeout_reason {
  * @NL80211_SCAN_FLAG_MIN_PREQ_CONTENT: minimize probe request content to
  *	only have supported rates and no additional capabilities (unless
  *	added by userspace explicitly.)
+ * @NL80211_SCAN_FLAG_FREQ_KHZ: report scan results with
+ *	%NL80211_ATTR_SCAN_FREQ_KHZ. This also means
+ *	%NL80211_ATTR_SCAN_FREQUENCIES will not be included.
  */
 enum nl80211_scan_flags {
 	NL80211_SCAN_FLAG_LOW_PRIORITY				= 1<<0,
@@ -5902,6 +5912,7 @@ enum nl80211_scan_flags {
 	NL80211_SCAN_FLAG_HIGH_ACCURACY				= 1<<10,
 	NL80211_SCAN_FLAG_RANDOM_SN				= 1<<11,
 	NL80211_SCAN_FLAG_MIN_PREQ_CONTENT			= 1<<12,
+	NL80211_SCAN_FLAG_FREQ_KHZ				= 1<<13,
 };
 
 /**
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index 06c90d360633..ac74bd780b42 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -596,6 +596,8 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
 			      NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211);
 	wiphy_ext_feature_set(wiphy,
 			      NL80211_EXT_FEATURE_CONTROL_PORT_NO_PREAUTH);
+	wiphy_ext_feature_set(wiphy,
+			      NL80211_EXT_FEATURE_SCAN_FREQ_KHZ);
 
 	if (!ops->hw_scan) {
 		wiphy->features |= NL80211_FEATURE_LOW_PRIORITY_SCAN |
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 87d7efd186d0..84bfa147769a 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -640,6 +640,7 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	[NL80211_ATTR_PMK_REAUTH_THRESHOLD] = NLA_POLICY_RANGE(NLA_U8, 1, 100),
 	[NL80211_ATTR_RECEIVE_MULTICAST] = { .type = NLA_FLAG },
 	[NL80211_ATTR_WIPHY_FREQ_OFFSET] = NLA_POLICY_RANGE(NLA_U32, 0, 999),
+	[NL80211_ATTR_SCAN_FREQ_KHZ] = { .type = NLA_NESTED },
 };
 
 /* policy for the key attributes */
@@ -7719,6 +7720,8 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
 	struct wireless_dev *wdev = info->user_ptr[1];
 	struct cfg80211_scan_request *request;
+	struct nlattr *scan_freqs = NULL;
+	bool scan_freqs_khz = false;
 	struct nlattr *attr;
 	struct wiphy *wiphy;
 	int err, tmp, n_ssids = 0, n_channels, i;
@@ -7737,9 +7740,17 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
 		goto unlock;
 	}
 
-	if (info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]) {
-		n_channels = validate_scan_freqs(
-				info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]);
+	if (info->attrs[NL80211_ATTR_SCAN_FREQ_KHZ]) {
+		if (!wiphy_ext_feature_isset(wiphy,
+					     NL80211_EXT_FEATURE_SCAN_FREQ_KHZ))
+			return -EOPNOTSUPP;
+		scan_freqs = info->attrs[NL80211_ATTR_SCAN_FREQ_KHZ];
+		scan_freqs_khz = true;
+	} else if (info->attrs[NL80211_ATTR_SCAN_FREQUENCIES])
+		scan_freqs = info->attrs[NL80211_ATTR_SCAN_FREQUENCIES];
+
+	if (scan_freqs) {
+		n_channels = validate_scan_freqs(scan_freqs);
 		if (!n_channels) {
 			err = -EINVAL;
 			goto unlock;
@@ -7787,13 +7798,16 @@ static int nl80211_trigger_scan(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	i = 0;
-	if (info->attrs[NL80211_ATTR_SCAN_FREQUENCIES]) {
+	if (scan_freqs) {
 		/* user specified, bail out if channel not found */
-		nla_for_each_nested(attr, info->attrs[NL80211_ATTR_SCAN_FREQUENCIES], tmp) {
+		nla_for_each_nested(attr, scan_freqs, tmp) {
 			struct ieee80211_channel *chan;
+			int freq = nla_get_u32(attr);
 
-			chan = ieee80211_get_channel(wiphy, nla_get_u32(attr));
+			if (!scan_freqs_khz)
+				freq = MHZ_TO_KHZ(freq);
 
+			chan = ieee80211_get_channel_khz(wiphy, freq);
 			if (!chan) {
 				err = -EINVAL;
 				goto out_free;
@@ -15231,14 +15245,27 @@ static int nl80211_add_scan_req(struct sk_buff *msg,
 	}
 	nla_nest_end(msg, nest);
 
-	nest = nla_nest_start_noflag(msg, NL80211_ATTR_SCAN_FREQUENCIES);
-	if (!nest)
-		goto nla_put_failure;
-	for (i = 0; i < req->n_channels; i++) {
-		if (nla_put_u32(msg, i, req->channels[i]->center_freq))
+	if (req->flags & NL80211_SCAN_FLAG_FREQ_KHZ) {
+		nest = nla_nest_start(msg, NL80211_ATTR_SCAN_FREQ_KHZ);
+		if (!nest)
+			goto nla_put_failure;
+		for (i = 0; i < req->n_channels; i++) {
+			if (nla_put_u32(msg, i,
+				   ieee80211_channel_to_khz(req->channels[i])))
+				goto nla_put_failure;
+		}
+		nla_nest_end(msg, nest);
+	} else {
+		nest = nla_nest_start_noflag(msg,
+					     NL80211_ATTR_SCAN_FREQUENCIES);
+		if (!nest)
 			goto nla_put_failure;
+		for (i = 0; i < req->n_channels; i++) {
+			if (nla_put_u32(msg, i, req->channels[i]->center_freq))
+				goto nla_put_failure;
+		}
+		nla_nest_end(msg, nest);
 	}
-	nla_nest_end(msg, nest);
 
 	if (req->ie &&
 	    nla_put(msg, NL80211_ATTR_IE, req->ie_len, req->ie))
-- 
cgit v1.2.3-59-g8ed1b


From d6fb67ff86bb991d5ac18471e5f739bc32e5090e Mon Sep 17 00:00:00 2001
From: Thomas Pedersen <thomas@adapt-ip.com>
Date: Thu, 30 Apr 2020 10:25:53 -0700
Subject: ieee80211: S1G defines

These are found in IEEE-802.11ah-2016.

Signed-off-by: Thomas Pedersen <thomas@adapt-ip.com>
Link: https://lore.kernel.org/r/20200430172554.18383-5-thomas@adapt-ip.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 221 ++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 221 insertions(+)

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 41d5f000c0d9..f630b8978a43 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -105,6 +105,51 @@
 
 /* extension, added by 802.11ad */
 #define IEEE80211_STYPE_DMG_BEACON		0x0000
+#define IEEE80211_STYPE_S1G_BEACON		0x0010
+
+/* bits unique to S1G beacon */
+#define IEEE80211_S1G_BCN_NEXT_TBTT	0x100
+
+/* see 802.11ah-2016 9.9 NDP CMAC frames */
+#define IEEE80211_S1G_1MHZ_NDP_BITS	25
+#define IEEE80211_S1G_1MHZ_NDP_BYTES	4
+#define IEEE80211_S1G_2MHZ_NDP_BITS	37
+#define IEEE80211_S1G_2MHZ_NDP_BYTES	5
+
+#define IEEE80211_NDP_FTYPE_CTS			0
+#define IEEE80211_NDP_FTYPE_CF_END		0
+#define IEEE80211_NDP_FTYPE_PS_POLL		1
+#define IEEE80211_NDP_FTYPE_ACK			2
+#define IEEE80211_NDP_FTYPE_PS_POLL_ACK		3
+#define IEEE80211_NDP_FTYPE_BA			4
+#define IEEE80211_NDP_FTYPE_BF_REPORT_POLL	5
+#define IEEE80211_NDP_FTYPE_PAGING		6
+#define IEEE80211_NDP_FTYPE_PREQ		7
+
+#define SM64(f, v)	((((u64)v) << f##_S) & f)
+
+/* NDP CMAC frame fields */
+#define IEEE80211_NDP_FTYPE                    0x0000000000000007
+#define IEEE80211_NDP_FTYPE_S                  0x0000000000000000
+
+/* 1M Probe Request 11ah 9.9.3.1.1 */
+#define IEEE80211_NDP_1M_PREQ_ANO      0x0000000000000008
+#define IEEE80211_NDP_1M_PREQ_ANO_S                     3
+#define IEEE80211_NDP_1M_PREQ_CSSID    0x00000000000FFFF0
+#define IEEE80211_NDP_1M_PREQ_CSSID_S                   4
+#define IEEE80211_NDP_1M_PREQ_RTYPE    0x0000000000100000
+#define IEEE80211_NDP_1M_PREQ_RTYPE_S                  20
+#define IEEE80211_NDP_1M_PREQ_RSV      0x0000000001E00000
+#define IEEE80211_NDP_1M_PREQ_RSV      0x0000000001E00000
+/* 2M Probe Request 11ah 9.9.3.1.2 */
+#define IEEE80211_NDP_2M_PREQ_ANO      0x0000000000000008
+#define IEEE80211_NDP_2M_PREQ_ANO_S                     3
+#define IEEE80211_NDP_2M_PREQ_CSSID    0x0000000FFFFFFFF0
+#define IEEE80211_NDP_2M_PREQ_CSSID_S                   4
+#define IEEE80211_NDP_2M_PREQ_RTYPE    0x0000001000000000
+#define IEEE80211_NDP_2M_PREQ_RTYPE_S                  36
+
+#define IEEE80211_ANO_NETTYPE_WILD              15
 
 /* control extension - for IEEE80211_FTYPE_CTL | IEEE80211_STYPE_CTL_EXT */
 #define IEEE80211_CTL_EXT_POLL		0x2000
@@ -121,6 +166,21 @@
 #define IEEE80211_MAX_SN		IEEE80211_SN_MASK
 #define IEEE80211_SN_MODULO		(IEEE80211_MAX_SN + 1)
 
+
+/* PV1 Layout 11ah 9.8.3.1 */
+#define IEEE80211_PV1_FCTL_VERS		0x0003
+#define IEEE80211_PV1_FCTL_FTYPE	0x001c
+#define IEEE80211_PV1_FCTL_STYPE	0x00e0
+#define IEEE80211_PV1_FCTL_TODS		0x0100
+#define IEEE80211_PV1_FCTL_MOREFRAGS	0x0200
+#define IEEE80211_PV1_FCTL_PM		0x0400
+#define IEEE80211_PV1_FCTL_MOREDATA	0x0800
+#define IEEE80211_PV1_FCTL_PROTECTED	0x1000
+#define IEEE80211_PV1_FCTL_END_SP       0x2000
+#define IEEE80211_PV1_FCTL_RELAYED      0x4000
+#define IEEE80211_PV1_FCTL_ACK_POLICY   0x8000
+#define IEEE80211_PV1_FCTL_CTL_EXT	0x0f00
+
 static inline bool ieee80211_sn_less(u16 sn1, u16 sn2)
 {
 	return ((sn1 - sn2) & IEEE80211_SN_MASK) > (IEEE80211_SN_MODULO >> 1);
@@ -148,6 +208,7 @@ static inline u16 ieee80211_sn_sub(u16 sn1, u16 sn2)
 #define IEEE80211_MAX_FRAG_THRESHOLD	2352
 #define IEEE80211_MAX_RTS_THRESHOLD	2353
 #define IEEE80211_MAX_AID		2007
+#define IEEE80211_MAX_AID_S1G		8191
 #define IEEE80211_MAX_TIM_LEN		251
 #define IEEE80211_MAX_MESH_PEERINGS	63
 /* Maximum size for the MA-UNITDATA primitive, 802.11 standard section
@@ -371,6 +432,17 @@ static inline bool ieee80211_is_data(__le16 fc)
 	       cpu_to_le16(IEEE80211_FTYPE_DATA);
 }
 
+/**
+ * ieee80211_is_ext - check if type is IEEE80211_FTYPE_EXT
+ * @fc: frame control bytes in little-endian byteorder
+ */
+static inline bool ieee80211_is_ext(__le16 fc)
+{
+	return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE)) ==
+	       cpu_to_le16(IEEE80211_FTYPE_EXT);
+}
+
+
 /**
  * ieee80211_is_data_qos - check if type is IEEE80211_FTYPE_DATA and IEEE80211_STYPE_QOS_DATA is set
  * @fc: frame control bytes in little-endian byteorder
@@ -469,6 +541,18 @@ static inline bool ieee80211_is_beacon(__le16 fc)
 	       cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_BEACON);
 }
 
+/**
+ * ieee80211_is_s1g_beacon - check if IEEE80211_FTYPE_EXT &&
+ * IEEE80211_STYPE_S1G_BEACON
+ * @fc: frame control bytes in little-endian byteorder
+ */
+static inline bool ieee80211_is_s1g_beacon(__le16 fc)
+{
+	return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE |
+				 IEEE80211_FCTL_STYPE)) ==
+	       cpu_to_le16(IEEE80211_FTYPE_EXT | IEEE80211_STYPE_S1G_BEACON);
+}
+
 /**
  * ieee80211_is_atim - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_ATIM
  * @fc: frame control bytes in little-endian byteorder
@@ -900,6 +984,59 @@ struct ieee80211_addba_ext_ie {
 	u8 data;
 } __packed;
 
+/**
+ * struct ieee80211_s1g_bcn_compat_ie
+ *
+ * S1G Beacon Compatibility element
+ */
+struct ieee80211_s1g_bcn_compat_ie {
+	__le16 compat_info;
+	__le16 beacon_int;
+	__le32 tsf_completion;
+} __packed;
+
+/**
+ * struct ieee80211_s1g_oper_ie
+ *
+ * S1G Operation element
+ */
+struct ieee80211_s1g_oper_ie {
+	u8 ch_width;
+	u8 oper_class;
+	u8 primary_ch;
+	u8 oper_ch;
+	__le16 basic_mcs_nss;
+} __packed;
+
+/**
+ * struct ieee80211_aid_response_ie
+ *
+ * AID Response element
+ */
+struct ieee80211_aid_response_ie {
+	__le16 aid;
+	u8 switch_count;
+	__le16 response_int;
+} __packed;
+
+struct ieee80211_s1g_cap {
+	u8 capab_info[10];
+	u8 supp_mcs_nss[5];
+} __packed;
+
+struct ieee80211_ext {
+	__le16 frame_control;
+	__le16 duration;
+	union {
+		struct {
+			u8 sa[ETH_ALEN];
+			__le32 timestamp;
+			u8 change_seq;
+			u8 variable[0];
+		} __packed s1g_beacon;
+	} u;
+} __packed __aligned(2);
+
 struct ieee80211_mgmt {
 	__le16 frame_control;
 	__le16 duration;
@@ -2137,6 +2274,86 @@ ieee80211_he_spr_size(const u8 *he_spr_ie)
 	return spr_len;
 }
 
+/* S1G Capabilities Information field */
+#define S1G_CAPAB_B0_S1G_LONG BIT(0)
+#define S1G_CAPAB_B0_SGI_1MHZ BIT(1)
+#define S1G_CAPAB_B0_SGI_2MHZ BIT(2)
+#define S1G_CAPAB_B0_SGI_4MHZ BIT(3)
+#define S1G_CAPAB_B0_SGI_8MHZ BIT(4)
+#define S1G_CAPAB_B0_SGI_16MHZ BIT(5)
+#define S1G_CAPAB_B0_SUPP_CH_WIDTH_MASK (BIT(6) | BIT(7))
+#define S1G_CAPAB_B0_SUPP_CH_WIDTH_SHIFT 6
+
+#define S1G_CAPAB_B1_RX_LDPC BIT(0)
+#define S1G_CAPAB_B1_TX_STBC BIT(1)
+#define S1G_CAPAB_B1_RX_STBC BIT(2)
+#define S1G_CAPAB_B1_SU_BFER BIT(3)
+#define S1G_CAPAB_B1_SU_BFEE BIT(4)
+#define S1G_CAPAB_B1_BFEE_STS_MASK (BIT(5) | BIT(6) | BIT(7))
+#define S1G_CAPAB_B1_BFEE_STS_SHIFT 5
+
+#define S1G_CAPAB_B2_SOUNDING_DIMENSIONS_MASK (BIT(0) | BIT(1) | BIT(2))
+#define S1G_CAPAB_B2_SOUNDING_DIMENSIONS_SHIFT 0
+#define S1G_CAPAB_B2_MU_BFER BIT(3)
+#define S1G_CAPAB_B2_MU_BFEE BIT(4)
+#define S1G_CAPAB_B2_PLUS_HTC_VHT BIT(5)
+#define S1G_CAPAB_B2_TRAVELING_PILOT_MASK (BIT(6) | BIT(7))
+#define S1G_CAPAB_B2_TRAVELING_PILOT_SHIFT 6
+
+#define S1G_CAPAB_B3_RD_RESPONDER BIT(0)
+#define S1G_CAPAB_B3_HT_DELAYED_BA BIT(1)
+#define S1G_CAPAB_B3_MAX_MPDU_LEN BIT(2)
+#define S1G_CAPAB_B3_MAX_AMPDU_LEN_EXP_MASK (BIT(3) | BIT(4))
+#define S1G_CAPAB_B3_MAX_AMPDU_LEN_EXP_SHIFT 3
+#define S1G_CAPAB_B3_MIN_MPDU_START_MASK (BIT(5) | BIT(6) | BIT(7))
+#define S1G_CAPAB_B3_MIN_MPDU_START_SHIFT 5
+
+#define S1G_CAPAB_B4_UPLINK_SYNC BIT(0)
+#define S1G_CAPAB_B4_DYNAMIC_AID BIT(1)
+#define S1G_CAPAB_B4_BAT BIT(2)
+#define S1G_CAPAB_B4_TIME_ADE BIT(3)
+#define S1G_CAPAB_B4_NON_TIM BIT(4)
+#define S1G_CAPAB_B4_GROUP_AID BIT(5)
+#define S1G_CAPAB_B4_STA_TYPE_MASK (BIT(6) | BIT(7))
+#define S1G_CAPAB_B4_STA_TYPE_SHIFT 6
+
+#define S1G_CAPAB_B5_CENT_AUTH_CONTROL BIT(0)
+#define S1G_CAPAB_B5_DIST_AUTH_CONTROL BIT(1)
+#define S1G_CAPAB_B5_AMSDU BIT(2)
+#define S1G_CAPAB_B5_AMPDU BIT(3)
+#define S1G_CAPAB_B5_ASYMMETRIC_BA BIT(4)
+#define S1G_CAPAB_B5_FLOW_CONTROL BIT(5)
+#define S1G_CAPAB_B5_SECTORIZED_BEAM_MASK (BIT(6) | BIT(7))
+#define S1G_CAPAB_B5_SECTORIZED_BEAM_SHIFT 6
+
+#define S1G_CAPAB_B6_OBSS_MITIGATION BIT(0)
+#define S1G_CAPAB_B6_FRAGMENT_BA BIT(1)
+#define S1G_CAPAB_B6_NDP_PS_POLL BIT(2)
+#define S1G_CAPAB_B6_RAW_OPERATION BIT(3)
+#define S1G_CAPAB_B6_PAGE_SLICING BIT(4)
+#define S1G_CAPAB_B6_TXOP_SHARING_IMP_ACK BIT(5)
+#define S1G_CAPAB_B6_VHT_LINK_ADAPT_MASK (BIT(6) | BIT(7))
+#define S1G_CAPAB_B6_VHT_LINK_ADAPT_SHIFT 6
+
+#define S1G_CAPAB_B7_TACK_AS_PS_POLL BIT(0)
+#define S1G_CAPAB_B7_DUP_1MHZ BIT(1)
+#define S1G_CAPAB_B7_MCS_NEGOTIATION BIT(2)
+#define S1G_CAPAB_B7_1MHZ_CTL_RESPONSE_PREAMBLE BIT(3)
+#define S1G_CAPAB_B7_NDP_BFING_REPORT_POLL BIT(4)
+#define S1G_CAPAB_B7_UNSOLICITED_DYN_AID BIT(5)
+#define S1G_CAPAB_B7_SECTOR_TRAINING_OPERATION BIT(6)
+#define S1G_CAPAB_B7_TEMP_PS_MODE_SWITCH BIT(7)
+
+#define S1G_CAPAB_B8_TWT_GROUPING BIT(0)
+#define S1G_CAPAB_B8_BDT BIT(1)
+#define S1G_CAPAB_B8_COLOR_MASK (BIT(2) | BIT(3) | BIT(4))
+#define S1G_CAPAB_B8_COLOR_SHIFT 2
+#define S1G_CAPAB_B8_TWT_REQUEST BIT(5)
+#define S1G_CAPAB_B8_TWT_RESPOND BIT(6)
+#define S1G_CAPAB_B8_PV1_FRAME BIT(7)
+
+#define S1G_CAPAB_B9_LINK_ADAPT_PER_CONTROL_RESPONSE BIT(0)
+
 /* Authentication algorithms */
 #define WLAN_AUTH_OPEN 0
 #define WLAN_AUTH_SHARED_KEY 1
@@ -2532,8 +2749,12 @@ enum ieee80211_eid {
 	WLAN_EID_QUIET_CHANNEL = 198,
 	WLAN_EID_OPMODE_NOTIF = 199,
 
+	WLAN_EID_S1G_BCN_COMPAT = 213,
+	WLAN_EID_S1G_SHORT_BCN_INTERVAL = 214,
+	WLAN_EID_S1G_CAPABILITIES = 217,
 	WLAN_EID_VENDOR_SPECIFIC = 221,
 	WLAN_EID_QOS_PARAMETER = 222,
+	WLAN_EID_S1G_OPERATION = 232,
 	WLAN_EID_CAG_NUMBER = 237,
 	WLAN_EID_AP_CSN = 239,
 	WLAN_EID_FILS_INDICATION = 240,
-- 
cgit v1.2.3-59-g8ed1b


From fedd0fe4e89b009f31eb53ec36dbdf1e457616c0 Mon Sep 17 00:00:00 2001
From: Tamizh Chelvam <tamizhr@codeaurora.org>
Date: Mon, 4 May 2020 22:34:59 +0530
Subject: mac80211: Add new AMPDU factor macro for HE peer caps

Add IEEE80211_HE_VHT_MAX_AMPDU_FACTOR and IEEE80211_HE_HT_MAX_AMPDU_FACTOR
as per spec to use for peer max ampdu factor.

Signed-off-by: Tamizh Chelvam <tamizhr@codeaurora.org>
Link: https://lore.kernel.org/r/1588611900-21185-1-git-send-email-tamizhr@codeaurora.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index f630b8978a43..2153d465d752 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -1958,6 +1958,8 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap,
 #define IEEE80211_HE_MAC_CAP3_FLEX_TWT_SCHED			0x40
 #define IEEE80211_HE_MAC_CAP3_RX_CTRL_FRAME_TO_MULTIBSS		0x80
 
+#define IEEE80211_HE_MAC_CAP3_MAX_AMPDU_LEN_EXP_SHIFT		3
+
 #define IEEE80211_HE_MAC_CAP4_BSRP_BQRP_A_MPDU_AGG		0x01
 #define IEEE80211_HE_MAC_CAP4_QTP				0x02
 #define IEEE80211_HE_MAC_CAP4_BQR				0x04
@@ -1979,6 +1981,9 @@ int ieee80211_get_vht_max_nss(struct ieee80211_vht_cap *cap,
 #define IEEE80211_HE_MAC_CAP5_PUNCTURED_SOUNDING		0x40
 #define IEEE80211_HE_MAC_CAP5_HT_VHT_TRIG_FRAME_RX		0x80
 
+#define IEEE80211_HE_VHT_MAX_AMPDU_FACTOR	20
+#define IEEE80211_HE_HT_MAX_AMPDU_FACTOR	16
+
 /* 802.11ax HE PHY capabilities */
 #define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_IN_2G		0x02
 #define IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_40MHZ_80MHZ_IN_5G	0x04
-- 
cgit v1.2.3-59-g8ed1b


From 396fba0a59f3c94d6fd6443fbeabd8bd9e3956eb Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Thu, 7 May 2020 13:39:09 -0500
Subject: cfg80211: Replace zero-length array with flexible-array

The current codebase makes use of the zero-length array language
extension to the C90 standard, but the preferred mechanism to declare
variable-length types such as these ones is a flexible array member[1][2],
introduced in C99:

struct foo {
        int stuff;
        struct boo array[];
};

By making use of the mechanism above, we will get a compiler warning
in case the flexible array does not occur last in the structure, which
will help us prevent some kind of undefined behavior bugs from being
inadvertently introduced[3] to the codebase from now on.

Also, notice that, dynamic memory allocations won't be affected by
this change:

"Flexible array members have incomplete type, and so the sizeof operator
may not be applied. As a quirk of the original implementation of
zero-length arrays, sizeof evaluates to zero."[1]

sizeof(flexible-array-member) triggers a warning because flexible array
members have incomplete type[1]. There are some instances of code in
which the sizeof operator is being incorrectly/erroneously applied to
zero-length arrays and the result is zero. Such instances may be hiding
some bugs. So, this work (flexible-array member conversions) will also
help to get completely rid of those sorts of issues.

This issue was found with the help of Coccinelle.

[1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html
[2] https://github.com/KSPP/linux/issues/21
[3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour")

Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Link: https://lore.kernel.org/r/20200507183909.GA12993@embeddedor
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 6 +++---
 include/net/cfg80211.h    | 8 ++++----
 net/wireless/core.h       | 2 +-
 3 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 2153d465d752..0320ca4c7d28 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -800,7 +800,7 @@ struct ieee80211_msrment_ie {
 	u8 token;
 	u8 mode;
 	u8 type;
-	u8 request[0];
+	u8 request[];
 } __packed;
 
 /**
@@ -1781,7 +1781,7 @@ struct ieee80211_he_operation {
 	__le32 he_oper_params;
 	__le16 he_mcs_nss_set;
 	/* Optional 0,1,3,4,5,7 or 8 bytes: depends on @he_oper_params */
-	u8 optional[0];
+	u8 optional[];
 } __packed;
 
 /**
@@ -1793,7 +1793,7 @@ struct ieee80211_he_operation {
 struct ieee80211_he_spr {
 	u8 he_sr_control;
 	/* Optional 0 to 19 bytes: depends on @he_sr_control */
-	u8 optional[0];
+	u8 optional[];
 } __packed;
 
 /**
diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 7415f77d99ca..021366cfb2b0 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2037,7 +2037,7 @@ struct cfg80211_scan_request {
 	bool no_cck;
 
 	/* keep last */
-	struct ieee80211_channel *channels[0];
+	struct ieee80211_channel *channels[];
 };
 
 static inline void get_random_mask_addr(u8 *buf, const u8 *addr, const u8 *mask)
@@ -2183,7 +2183,7 @@ struct cfg80211_sched_scan_request {
 	struct list_head list;
 
 	/* keep last */
-	struct ieee80211_channel *channels[0];
+	struct ieee80211_channel *channels[];
 };
 
 /**
@@ -2305,7 +2305,7 @@ struct cfg80211_bss {
 	u8 bssid_index;
 	u8 max_bssid_indicator;
 
-	u8 priv[0] __aligned(sizeof(void *));
+	u8 priv[] __aligned(sizeof(void *));
 };
 
 /**
@@ -4852,7 +4852,7 @@ struct wiphy {
 
 	u8 max_data_retry_count;
 
-	char priv[0] __aligned(NETDEV_ALIGN);
+	char priv[] __aligned(NETDEV_ALIGN);
 };
 
 static inline struct net *wiphy_net(struct wiphy *wiphy)
diff --git a/net/wireless/core.h b/net/wireless/core.h
index 639d41896573..e0e5b3ee9699 100644
--- a/net/wireless/core.h
+++ b/net/wireless/core.h
@@ -286,7 +286,7 @@ struct cfg80211_cqm_config {
 	u32 rssi_hyst;
 	s32 last_rssi_event_value;
 	int n_rssi_thresholds;
-	s32 rssi_thresholds[0];
+	s32 rssi_thresholds[];
 };
 
 void cfg80211_destroy_ifaces(struct cfg80211_registered_device *rdev);
-- 
cgit v1.2.3-59-g8ed1b


From 3c23215ba8c70c0e9b16beffb7f700a401391e38 Mon Sep 17 00:00:00 2001
From: "Gustavo A. R. Silva" <gustavoars@kernel.org>
Date: Thu, 7 May 2020 13:59:07 -0500
Subject: mac80211: Replace zero-length array with flexible-array

The current codebase makes use of the zero-length array language
extension to the C90 standard, but the preferred mechanism to declare
variable-length types such as these ones is a flexible array member[1][2],
introduced in C99:

struct foo {
        int stuff;
        struct boo array[];
};

By making use of the mechanism above, we will get a compiler warning
in case the flexible array does not occur last in the structure, which
will help us prevent some kind of undefined behavior bugs from being
inadvertently introduced[3] to the codebase from now on.

Also, notice that, dynamic memory allocations won't be affected by
this change:

"Flexible array members have incomplete type, and so the sizeof operator
may not be applied. As a quirk of the original implementation of
zero-length arrays, sizeof evaluates to zero."[1]

sizeof(flexible-array-member) triggers a warning because flexible array
members have incomplete type[1]. There are some instances of code in
which the sizeof operator is being incorrectly/erroneously applied to
zero-length arrays and the result is zero. Such instances may be hiding
some bugs. So, this work (flexible-array member conversions) will also
help to get completely rid of those sorts of issues.

This issue was found with the help of Coccinelle.

[1] https://gcc.gnu.org/onlinedocs/gcc/Zero-Length.html
[2] https://github.com/KSPP/linux/issues/21
[3] commit 76497732932f ("cxgb3/l2t: Fix undefined behaviour")

Signed-off-by: Gustavo A. R. Silva <gustavoars@kernel.org>
Link: https://lore.kernel.org/r/20200507185907.GA15102@embeddedor
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h     | 10 +++++-----
 net/mac80211/ieee80211_i.h |  2 +-
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 0d48e679efb0..7cb712427df1 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -230,7 +230,7 @@ struct ieee80211_chanctx_conf {
 
 	bool radar_enabled;
 
-	u8 drv_priv[0] __aligned(sizeof(void *));
+	u8 drv_priv[] __aligned(sizeof(void *));
 };
 
 /**
@@ -1670,7 +1670,7 @@ struct ieee80211_vif {
 	bool txqs_stopped[IEEE80211_NUM_ACS];
 
 	/* must be last */
-	u8 drv_priv[0] __aligned(sizeof(void *));
+	u8 drv_priv[] __aligned(sizeof(void *));
 };
 
 static inline bool ieee80211_vif_is_mesh(struct ieee80211_vif *vif)
@@ -1798,7 +1798,7 @@ struct ieee80211_key_conf {
 	s8 keyidx;
 	u16 flags;
 	u8 keylen;
-	u8 key[0];
+	u8 key[];
 };
 
 #define IEEE80211_MAX_PN_LEN	16
@@ -2053,7 +2053,7 @@ struct ieee80211_sta {
 	struct ieee80211_txq *txq[IEEE80211_NUM_TIDS + 1];
 
 	/* must be last */
-	u8 drv_priv[0] __aligned(sizeof(void *));
+	u8 drv_priv[] __aligned(sizeof(void *));
 };
 
 /**
@@ -2099,7 +2099,7 @@ struct ieee80211_txq {
 	u8 ac;
 
 	/* must be last */
-	u8 drv_priv[0] __aligned(sizeof(void *));
+	u8 drv_priv[] __aligned(sizeof(void *));
 };
 
 /**
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 8cbae66b5cdb..2d1b6cb75497 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -267,7 +267,7 @@ struct probe_resp {
 	struct rcu_head rcu_head;
 	int len;
 	u16 csa_counter_offsets[IEEE80211_MAX_CSA_COUNTERS_NUM];
-	u8 data[0];
+	u8 data[];
 };
 
 struct ps_data {
-- 
cgit v1.2.3-59-g8ed1b


From dca9ca2d588bd2c0989c671f048540b82e57cf1e Mon Sep 17 00:00:00 2001
From: Markus Theil <markus.theil@tu-ilmenau.de>
Date: Fri, 8 May 2020 16:42:00 +0200
Subject: nl80211: add ability to report TX status for control port TX

This adds the necessary capabilities in nl80211 to allow drivers to
assign a cookie to control port TX frames (returned via extack in
the netlink ACK message of the command) and then later report the
frame's status.

Signed-off-by: Markus Theil <markus.theil@tu-ilmenau.de>
Link: https://lore.kernel.org/r/20200508144202.7678-2-markus.theil@tu-ilmenau.de
[use extack cookie instead of explicit message, recombine patches]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 20 +++++++++++++++++++-
 include/uapi/linux/nl80211.h | 12 ++++++++++++
 net/mac80211/ieee80211_i.h   |  3 ++-
 net/mac80211/tx.c            |  3 ++-
 net/wireless/nl80211.c       | 41 +++++++++++++++++++++++++++++++++--------
 net/wireless/rdev-ops.h      |  9 ++++++---
 net/wireless/trace.h         | 17 +++++++++++++++++
 7 files changed, 91 insertions(+), 14 deletions(-)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 021366cfb2b0..f842f3652026 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -4069,7 +4069,8 @@ struct cfg80211_ops {
 				   struct net_device *dev,
 				   const u8 *buf, size_t len,
 				   const u8 *dest, const __be16 proto,
-				   const bool noencrypt);
+				   const bool noencrypt,
+				   u64 *cookie);
 
 	int	(*get_ftm_responder_stats)(struct wiphy *wiphy,
 				struct net_device *dev,
@@ -7049,6 +7050,23 @@ static inline bool cfg80211_rx_mgmt(struct wireless_dev *wdev, int freq,
 void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie,
 			     const u8 *buf, size_t len, bool ack, gfp_t gfp);
 
+/**
+ * cfg80211_control_port_tx_status - notification of TX status for control
+ *                                   port frames
+ * @wdev: wireless device receiving the frame
+ * @cookie: Cookie returned by cfg80211_ops::tx_control_port()
+ * @buf: Data frame (header + body)
+ * @len: length of the frame data
+ * @ack: Whether frame was acknowledged
+ * @gfp: context flags
+ *
+ * This function is called whenever a control port frame was requested to be
+ * transmitted with cfg80211_ops::tx_control_port() to report the TX status of
+ * the transmission attempt.
+ */
+void cfg80211_control_port_tx_status(struct wireless_dev *wdev, u64 cookie,
+				     const u8 *buf, size_t len, bool ack,
+				     gfp_t gfp);
 
 /**
  * cfg80211_rx_control_port - notification about a received control port frame
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 47d39b6a073d..0f324b6b81cc 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -1164,6 +1164,12 @@
  *	dropped because it did not include a valid MME MIC while beacon
  *	protection was enabled (BIGTK configured in station mode).
  *
+ * @NL80211_CMD_CONTROL_PORT_FRAME_TX_STATUS: Report TX status of a control
+ *	port frame transmitted with %NL80211_CMD_CONTROL_PORT_FRAME.
+ *	%NL80211_ATTR_COOKIE identifies the TX command and %NL80211_ATTR_FRAME
+ *	includes the contents of the frame. %NL80211_ATTR_ACK flag is included
+ *	if the recipient acknowledged the frame.
+ *
  * @NL80211_CMD_MAX: highest used command number
  * @__NL80211_CMD_AFTER_LAST: internal use
  */
@@ -1392,6 +1398,8 @@ enum nl80211_commands {
 
 	NL80211_CMD_UNPROT_BEACON,
 
+	NL80211_CMD_CONTROL_PORT_FRAME_TX_STATUS,
+
 	/* add new commands above here */
 
 	/* used to define NL80211_CMD_MAX below */
@@ -5729,6 +5737,9 @@ enum nl80211_feature_flags {
  *	report %NL80211_ATTR_SCAN_FREQ_KHZ, %NL80211_SCAN_FLAG_FREQ_KHZ must be
  *	included in the scan request.
  *
+ * @NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211_TX_STATUS: The driver
+ *	can report tx status for control port over nl80211 tx operations.
+ *
  * @NUM_NL80211_EXT_FEATURES: number of extended features.
  * @MAX_NL80211_EXT_FEATURES: highest extended feature index.
  */
@@ -5783,6 +5794,7 @@ enum nl80211_ext_feature_index {
 	NL80211_EXT_FEATURE_MULTICAST_REGISTRATIONS,
 	NL80211_EXT_FEATURE_BEACON_PROTECTION_CLIENT,
 	NL80211_EXT_FEATURE_SCAN_FREQ_KHZ,
+	NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211_TX_STATUS,
 
 	/* add new features before the definition below */
 	NUM_NL80211_EXT_FEATURES,
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 2d1b6cb75497..b87dc873825b 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1800,7 +1800,8 @@ void ieee80211_check_fast_xmit_iface(struct ieee80211_sub_if_data *sdata);
 void ieee80211_clear_fast_xmit(struct sta_info *sta);
 int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev,
 			      const u8 *buf, size_t len,
-			      const u8 *dest, __be16 proto, bool unencrypted);
+			      const u8 *dest, __be16 proto, bool unencrypted,
+			      u64 *cookie);
 int ieee80211_probe_mesh_link(struct wiphy *wiphy, struct net_device *dev,
 			      const u8 *buf, size_t len);
 
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 47f460c8bd74..5931128e1855 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -5339,7 +5339,8 @@ void __ieee80211_tx_skb_tid_band(struct ieee80211_sub_if_data *sdata,
 
 int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev,
 			      const u8 *buf, size_t len,
-			      const u8 *dest, __be16 proto, bool unencrypted)
+			      const u8 *dest, __be16 proto, bool unencrypted,
+			      u64 *cookie)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_local *local = sdata->local;
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 84bfa147769a..7ea764865546 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -13866,6 +13866,7 @@ static int nl80211_external_auth(struct sk_buff *skb, struct genl_info *info)
 
 static int nl80211_tx_control_port(struct sk_buff *skb, struct genl_info *info)
 {
+	bool dont_wait_for_ack = info->attrs[NL80211_ATTR_DONT_WAIT_FOR_ACK];
 	struct cfg80211_registered_device *rdev = info->user_ptr[0];
 	struct net_device *dev = info->user_ptr[1];
 	struct wireless_dev *wdev = dev->ieee80211_ptr;
@@ -13874,6 +13875,7 @@ static int nl80211_tx_control_port(struct sk_buff *skb, struct genl_info *info)
 	u8 *dest;
 	u16 proto;
 	bool noencrypt;
+	u64 cookie = 0;
 	int err;
 
 	if (!wiphy_ext_feature_isset(&rdev->wiphy,
@@ -13918,9 +13920,12 @@ static int nl80211_tx_control_port(struct sk_buff *skb, struct genl_info *info)
 	noencrypt =
 		nla_get_flag(info->attrs[NL80211_ATTR_CONTROL_PORT_NO_ENCRYPT]);
 
-	return rdev_tx_control_port(rdev, dev, buf, len,
-				    dest, cpu_to_be16(proto), noencrypt);
-
+	err = rdev_tx_control_port(rdev, dev, buf, len,
+				   dest, cpu_to_be16(proto), noencrypt,
+				   dont_wait_for_ack ? NULL : &cookie);
+	if (!err && !dont_wait_for_ack)
+		nl_set_extack_cookie_u64(info->extack, cookie);
+	return err;
  out:
 	wdev_unlock(wdev);
 	return err;
@@ -16294,8 +16299,9 @@ int nl80211_send_mgmt(struct cfg80211_registered_device *rdev,
 	return -ENOBUFS;
 }
 
-void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie,
-			     const u8 *buf, size_t len, bool ack, gfp_t gfp)
+static void nl80211_frame_tx_status(struct wireless_dev *wdev, u64 cookie,
+				    const u8 *buf, size_t len, bool ack,
+				    gfp_t gfp, enum nl80211_commands command)
 {
 	struct wiphy *wiphy = wdev->wiphy;
 	struct cfg80211_registered_device *rdev = wiphy_to_rdev(wiphy);
@@ -16303,13 +16309,16 @@ void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie,
 	struct sk_buff *msg;
 	void *hdr;
 
-	trace_cfg80211_mgmt_tx_status(wdev, cookie, ack);
+	if (command == NL80211_CMD_FRAME_TX_STATUS)
+		trace_cfg80211_mgmt_tx_status(wdev, cookie, ack);
+	else
+		trace_cfg80211_control_port_tx_status(wdev, cookie, ack);
 
 	msg = nlmsg_new(100 + len, gfp);
 	if (!msg)
 		return;
 
-	hdr = nl80211hdr_put(msg, 0, 0, 0, NL80211_CMD_FRAME_TX_STATUS);
+	hdr = nl80211hdr_put(msg, 0, 0, 0, command);
 	if (!hdr) {
 		nlmsg_free(msg);
 		return;
@@ -16332,9 +16341,25 @@ void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie,
 				NL80211_MCGRP_MLME, gfp);
 	return;
 
- nla_put_failure:
+nla_put_failure:
 	nlmsg_free(msg);
 }
+
+void cfg80211_control_port_tx_status(struct wireless_dev *wdev, u64 cookie,
+				     const u8 *buf, size_t len, bool ack,
+				     gfp_t gfp)
+{
+	nl80211_frame_tx_status(wdev, cookie, buf, len, ack, gfp,
+				NL80211_CMD_CONTROL_PORT_FRAME_TX_STATUS);
+}
+EXPORT_SYMBOL(cfg80211_control_port_tx_status);
+
+void cfg80211_mgmt_tx_status(struct wireless_dev *wdev, u64 cookie,
+			     const u8 *buf, size_t len, bool ack, gfp_t gfp)
+{
+	nl80211_frame_tx_status(wdev, cookie, buf, len, ack, gfp,
+				NL80211_CMD_FRAME_TX_STATUS);
+}
 EXPORT_SYMBOL(cfg80211_mgmt_tx_status);
 
 static int __nl80211_rx_control_port(struct net_device *dev,
diff --git a/net/wireless/rdev-ops.h b/net/wireless/rdev-ops.h
index df5142e86c4f..950d57494168 100644
--- a/net/wireless/rdev-ops.h
+++ b/net/wireless/rdev-ops.h
@@ -748,14 +748,17 @@ static inline int rdev_tx_control_port(struct cfg80211_registered_device *rdev,
 				       struct net_device *dev,
 				       const void *buf, size_t len,
 				       const u8 *dest, __be16 proto,
-				       const bool noencrypt)
+				       const bool noencrypt, u64 *cookie)
 {
 	int ret;
 	trace_rdev_tx_control_port(&rdev->wiphy, dev, buf, len,
 				   dest, proto, noencrypt);
 	ret = rdev->ops->tx_control_port(&rdev->wiphy, dev, buf, len,
-					 dest, proto, noencrypt);
-	trace_rdev_return_int(&rdev->wiphy, ret);
+					 dest, proto, noencrypt, cookie);
+	if (cookie)
+		trace_rdev_return_int_cookie(&rdev->wiphy, ret, *cookie);
+	else
+		trace_rdev_return_int(&rdev->wiphy, ret);
 	return ret;
 }
 
diff --git a/net/wireless/trace.h b/net/wireless/trace.h
index f2ab44a2a3e4..b23cab016521 100644
--- a/net/wireless/trace.h
+++ b/net/wireless/trace.h
@@ -2861,6 +2861,23 @@ TRACE_EVENT(cfg80211_mgmt_tx_status,
 		  WDEV_PR_ARG, __entry->cookie, BOOL_TO_STR(__entry->ack))
 );
 
+TRACE_EVENT(cfg80211_control_port_tx_status,
+	TP_PROTO(struct wireless_dev *wdev, u64 cookie, bool ack),
+	TP_ARGS(wdev, cookie, ack),
+	TP_STRUCT__entry(
+		WDEV_ENTRY
+		__field(u64, cookie)
+		__field(bool, ack)
+	),
+	TP_fast_assign(
+		WDEV_ASSIGN;
+		__entry->cookie = cookie;
+		__entry->ack = ack;
+	),
+	TP_printk(WDEV_PR_FMT", cookie: %llu, ack: %s",
+		  WDEV_PR_ARG, __entry->cookie, BOOL_TO_STR(__entry->ack))
+);
+
 TRACE_EVENT(cfg80211_rx_control_port,
 	TP_PROTO(struct net_device *netdev, struct sk_buff *skb,
 		 bool unencrypted),
-- 
cgit v1.2.3-59-g8ed1b


From 1ea02224afc29431880a67b8c3198146cc01d33e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 26 May 2020 10:31:33 +0200
Subject: mac80211: allow SA-QUERY processing in userspace

As discussed with Mathy almost two years ago in
http://lore.kernel.org/r/20180806224857.14853-1-Mathy.Vanhoef@cs.kuleuven.be
we should let userspace process SA-QUERY frames if it
wants to, so that it can handle OCV (operating channel
validation) which mac80211 doesn't know how to.

Evidently I had been expecting Mathy to (re)send such a
patch, but he never did, perhaps expecting me to do it
after our discussion.

In any case, this came up now with OCV getting more
attention, so move the code around as discussed there
to let userspace handle it, and do it properly.

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Link: https://lore.kernel.org/r/20200526103131.1f9cf7e5b6db.Iae5b42b09ad2b1cbcbe13492002c43f0d1d51dfc@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rx.c | 49 ++++++++++++++++++++++++++++++++++++-------------
 1 file changed, 36 insertions(+), 13 deletions(-)

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 8e47b0d31051..8b0fa5e345f4 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -3354,19 +3354,6 @@ ieee80211_rx_h_action(struct ieee80211_rx_data *rx)
 			}
 		}
 		break;
-	case WLAN_CATEGORY_SA_QUERY:
-		if (len < (IEEE80211_MIN_ACTION_SIZE +
-			   sizeof(mgmt->u.action.u.sa_query)))
-			break;
-
-		switch (mgmt->u.action.u.sa_query.action) {
-		case WLAN_ACTION_SA_QUERY_REQUEST:
-			if (sdata->vif.type != NL80211_IFTYPE_STATION)
-				break;
-			ieee80211_process_sa_query_req(sdata, mgmt, len);
-			goto handled;
-		}
-		break;
 	case WLAN_CATEGORY_SELF_PROTECTED:
 		if (len < (IEEE80211_MIN_ACTION_SIZE +
 			   sizeof(mgmt->u.action.u.self_prot.action_code)))
@@ -3456,6 +3443,41 @@ ieee80211_rx_h_userspace_mgmt(struct ieee80211_rx_data *rx)
 	return RX_CONTINUE;
 }
 
+static ieee80211_rx_result debug_noinline
+ieee80211_rx_h_action_post_userspace(struct ieee80211_rx_data *rx)
+{
+	struct ieee80211_sub_if_data *sdata = rx->sdata;
+	struct ieee80211_mgmt *mgmt = (struct ieee80211_mgmt *) rx->skb->data;
+	int len = rx->skb->len;
+
+	if (!ieee80211_is_action(mgmt->frame_control))
+		return RX_CONTINUE;
+
+	switch (mgmt->u.action.category) {
+	case WLAN_CATEGORY_SA_QUERY:
+		if (len < (IEEE80211_MIN_ACTION_SIZE +
+			   sizeof(mgmt->u.action.u.sa_query)))
+			break;
+
+		switch (mgmt->u.action.u.sa_query.action) {
+		case WLAN_ACTION_SA_QUERY_REQUEST:
+			if (sdata->vif.type != NL80211_IFTYPE_STATION)
+				break;
+			ieee80211_process_sa_query_req(sdata, mgmt, len);
+			goto handled;
+		}
+		break;
+	}
+
+	return RX_CONTINUE;
+
+ handled:
+	if (rx->sta)
+		rx->sta->rx_stats.packets++;
+	dev_kfree_skb(rx->skb);
+	return RX_QUEUED;
+}
+
 static ieee80211_rx_result debug_noinline
 ieee80211_rx_h_action_return(struct ieee80211_rx_data *rx)
 {
@@ -3736,6 +3758,7 @@ static void ieee80211_rx_handlers(struct ieee80211_rx_data *rx,
 		CALL_RXH(ieee80211_rx_h_mgmt_check);
 		CALL_RXH(ieee80211_rx_h_action);
 		CALL_RXH(ieee80211_rx_h_userspace_mgmt);
+		CALL_RXH(ieee80211_rx_h_action_post_userspace);
 		CALL_RXH(ieee80211_rx_h_action_return);
 		CALL_RXH(ieee80211_rx_h_mgmt);
 
-- 
cgit v1.2.3-59-g8ed1b


From 9a5f6488623730dc16cca0836ade23869761adee Mon Sep 17 00:00:00 2001
From: Tamizh Chelvam <tamizhr@codeaurora.org>
Date: Wed, 13 May 2020 13:41:44 +0530
Subject: nl80211: Add support to configure TID specific Tx rate configuration

This patch adds support to configure per TID Tx Rate configuration
through NL80211_TID_CONFIG_ATTR_TX_RATE* attributes. And it uses
nl80211_parse_tx_bitrate_mask api to validate the Tx rate mask.

Signed-off-by: Tamizh Chelvam <tamizhr@codeaurora.org>
Link: https://lore.kernel.org/r/1589357504-10175-1-git-send-email-tamizhr@codeaurora.org
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 29 +++++++++++++----------
 include/uapi/linux/nl80211.h | 21 +++++++++++++++++
 net/wireless/nl80211.c       | 56 +++++++++++++++++++++++++++++++++-----------
 3 files changed, 80 insertions(+), 26 deletions(-)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index f842f3652026..e2dbc9c02ef3 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -630,6 +630,19 @@ struct cfg80211_chan_def {
 	u16 freq1_offset;
 };
 
+/*
+ * cfg80211_bitrate_mask - masks for bitrate control
+ */
+struct cfg80211_bitrate_mask {
+	struct {
+		u32 legacy;
+		u8 ht_mcs[IEEE80211_HT_MCS_MASK_LEN];
+		u16 vht_mcs[NL80211_VHT_NSS_MAX];
+		enum nl80211_txrate_gi gi;
+	} control[NUM_NL80211_BANDS];
+};
+
+
 /**
  * struct cfg80211_tid_cfg - TID specific configuration
  * @config_override: Flag to notify driver to reset TID configuration
@@ -643,6 +656,8 @@ struct cfg80211_chan_def {
  * @ampdu: Enable/Disable MPDU aggregation
  * @rtscts: Enable/Disable RTS/CTS
  * @amsdu: Enable/Disable MSDU aggregation
+ * @txrate_type: Tx bitrate mask type
+ * @txrate_mask: Tx bitrate to be applied for the TID
  */
 struct cfg80211_tid_cfg {
 	bool config_override;
@@ -653,6 +668,8 @@ struct cfg80211_tid_cfg {
 	enum nl80211_tid_config ampdu;
 	enum nl80211_tid_config rtscts;
 	enum nl80211_tid_config amsdu;
+	enum nl80211_tx_rate_setting txrate_type;
+	struct cfg80211_bitrate_mask txrate_mask;
 };
 
 /**
@@ -1007,18 +1024,6 @@ struct cfg80211_acl_data {
 	struct mac_address mac_addrs[];
 };
 
-/*
- * cfg80211_bitrate_mask - masks for bitrate control
- */
-struct cfg80211_bitrate_mask {
-	struct {
-		u32 legacy;
-		u8 ht_mcs[IEEE80211_HT_MCS_MASK_LEN];
-		u16 vht_mcs[NL80211_VHT_NSS_MAX];
-		enum nl80211_txrate_gi gi;
-	} control[NUM_NL80211_BANDS];
-};
-
 /**
  * enum cfg80211_ap_settings_flags - AP settings flags
  *
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 0f324b6b81cc..c14666b75e57 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -4841,6 +4841,17 @@ enum nl80211_tid_config {
 	NL80211_TID_CONFIG_DISABLE,
 };
 
+/* enum nl80211_tx_rate_setting - TX rate configuration type
+ * @NL80211_TX_RATE_AUTOMATIC: automatically determine TX rate
+ * @NL80211_TX_RATE_LIMITED: limit the TX rate by the TX rate parameter
+ * @NL80211_TX_RATE_FIXED: fix TX rate to the TX rate parameter
+ */
+enum nl80211_tx_rate_setting {
+	NL80211_TX_RATE_AUTOMATIC,
+	NL80211_TX_RATE_LIMITED,
+	NL80211_TX_RATE_FIXED,
+};
+
 /* enum nl80211_tid_config_attr - TID specific configuration.
  * @NL80211_TID_CONFIG_ATTR_PAD: pad attribute for 64-bit values
  * @NL80211_TID_CONFIG_ATTR_VIF_SUPP: a bitmap (u64) of attributes supported
@@ -4876,6 +4887,14 @@ enum nl80211_tid_config {
  * @NL80211_TID_CONFIG_ATTR_AMSDU_CTRL: Enable/Disable MSDU aggregation
  *	for the TIDs specified in %NL80211_TID_CONFIG_ATTR_TIDS.
  *	Its type is u8, using the values from &nl80211_tid_config.
+ * @NL80211_TID_CONFIG_ATTR_TX_RATE_TYPE: This attribute will be useful
+ *	to notfiy the driver that what type of txrate should be used
+ *	for the TIDs specified in %NL80211_TID_CONFIG_ATTR_TIDS. using
+ *	the values form &nl80211_tx_rate_setting.
+ * @NL80211_TID_CONFIG_ATTR_TX_RATE: Data frame TX rate mask should be applied
+ *	with the parameters passed through %NL80211_ATTR_TX_RATES.
+ *	configuration is applied to the data frame for the tid to that connected
+ *	station.
  */
 enum nl80211_tid_config_attr {
 	__NL80211_TID_CONFIG_ATTR_INVALID,
@@ -4890,6 +4909,8 @@ enum nl80211_tid_config_attr {
 	NL80211_TID_CONFIG_ATTR_AMPDU_CTRL,
 	NL80211_TID_CONFIG_ATTR_RTSCTS_CTRL,
 	NL80211_TID_CONFIG_ATTR_AMSDU_CTRL,
+	NL80211_TID_CONFIG_ATTR_TX_RATE_TYPE,
+	NL80211_TID_CONFIG_ATTR_TX_RATE,
 
 	/* keep last */
 	__NL80211_TID_CONFIG_ATTR_AFTER_LAST,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 7ea764865546..22c4d13e28cb 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -329,6 +329,15 @@ he_bss_color_policy[NL80211_HE_BSS_COLOR_ATTR_MAX + 1] = {
 	[NL80211_HE_BSS_COLOR_ATTR_PARTIAL] = { .type = NLA_FLAG },
 };
 
+static const struct nla_policy nl80211_txattr_policy[NL80211_TXRATE_MAX + 1] = {
+	[NL80211_TXRATE_LEGACY] = { .type = NLA_BINARY,
+				    .len = NL80211_MAX_SUPP_RATES },
+	[NL80211_TXRATE_HT] = { .type = NLA_BINARY,
+				.len = NL80211_MAX_SUPP_HT_RATES },
+	[NL80211_TXRATE_VHT] = NLA_POLICY_EXACT_LEN_WARN(sizeof(struct nl80211_txrate_vht)),
+	[NL80211_TXRATE_GI] = { .type = NLA_U8 },
+};
+
 static const struct nla_policy
 nl80211_tid_config_attr_policy[NL80211_TID_CONFIG_ATTR_MAX + 1] = {
 	[NL80211_TID_CONFIG_ATTR_VIF_SUPP] = { .type = NLA_U64 },
@@ -345,6 +354,10 @@ nl80211_tid_config_attr_policy[NL80211_TID_CONFIG_ATTR_MAX + 1] = {
 			NLA_POLICY_MAX(NLA_U8, NL80211_TID_CONFIG_DISABLE),
 	[NL80211_TID_CONFIG_ATTR_AMSDU_CTRL] =
 			NLA_POLICY_MAX(NLA_U8, NL80211_TID_CONFIG_DISABLE),
+	[NL80211_TID_CONFIG_ATTR_TX_RATE_TYPE] =
+			NLA_POLICY_MAX(NLA_U8, NL80211_TX_RATE_FIXED),
+	[NL80211_TID_CONFIG_ATTR_TX_RATE] =
+			NLA_POLICY_NESTED(nl80211_txattr_policy),
 };
 
 static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
@@ -4388,16 +4401,9 @@ static bool vht_set_mcs_mask(struct ieee80211_supported_band *sband,
 	return true;
 }
 
-static const struct nla_policy nl80211_txattr_policy[NL80211_TXRATE_MAX + 1] = {
-	[NL80211_TXRATE_LEGACY] = { .type = NLA_BINARY,
-				    .len = NL80211_MAX_SUPP_RATES },
-	[NL80211_TXRATE_HT] = { .type = NLA_BINARY,
-				.len = NL80211_MAX_SUPP_HT_RATES },
-	[NL80211_TXRATE_VHT] = NLA_POLICY_EXACT_LEN_WARN(sizeof(struct nl80211_txrate_vht)),
-	[NL80211_TXRATE_GI] = { .type = NLA_U8 },
-};
-
 static int nl80211_parse_tx_bitrate_mask(struct genl_info *info,
+					 struct nlattr *attrs[],
+					 enum nl80211_attrs attr,
 					 struct cfg80211_bitrate_mask *mask)
 {
 	struct nlattr *tb[NL80211_TXRATE_MAX + 1];
@@ -4428,14 +4434,14 @@ static int nl80211_parse_tx_bitrate_mask(struct genl_info *info,
 	}
 
 	/* if no rates are given set it back to the defaults */
-	if (!info->attrs[NL80211_ATTR_TX_RATES])
+	if (!attrs[attr])
 		goto out;
 
 	/* The nested attribute uses enum nl80211_band as the index. This maps
 	 * directly to the enum nl80211_band values used in cfg80211.
 	 */
 	BUILD_BUG_ON(NL80211_MAX_SUPP_HT_RATES > IEEE80211_HT_MCS_MASK_LEN * 8);
-	nla_for_each_nested(tx_rates, info->attrs[NL80211_ATTR_TX_RATES], rem) {
+	nla_for_each_nested(tx_rates, attrs[attr], rem) {
 		enum nl80211_band band = nla_type(tx_rates);
 		int err;
 
@@ -4940,7 +4946,9 @@ static int nl80211_start_ap(struct sk_buff *skb, struct genl_info *info)
 		return -EINVAL;
 
 	if (info->attrs[NL80211_ATTR_TX_RATES]) {
-		err = nl80211_parse_tx_bitrate_mask(info, &params.beacon_rate);
+		err = nl80211_parse_tx_bitrate_mask(info, info->attrs,
+						    NL80211_ATTR_TX_RATES,
+						    &params.beacon_rate);
 		if (err)
 			return err;
 
@@ -10753,7 +10761,8 @@ static int nl80211_set_tx_bitrate_mask(struct sk_buff *skb,
 	if (!rdev->ops->set_bitrate_mask)
 		return -EOPNOTSUPP;
 
-	err = nl80211_parse_tx_bitrate_mask(info, &mask);
+	err = nl80211_parse_tx_bitrate_mask(info, info->attrs,
+					    NL80211_ATTR_TX_RATES, &mask);
 	if (err)
 		return err;
 
@@ -11359,7 +11368,9 @@ static int nl80211_join_mesh(struct sk_buff *skb, struct genl_info *info)
 	}
 
 	if (info->attrs[NL80211_ATTR_TX_RATES]) {
-		err = nl80211_parse_tx_bitrate_mask(info, &setup.beacon_rate);
+		err = nl80211_parse_tx_bitrate_mask(info, info->attrs,
+						    NL80211_ATTR_TX_RATES,
+						    &setup.beacon_rate);
 		if (err)
 			return err;
 
@@ -14139,6 +14150,23 @@ static int parse_tid_conf(struct cfg80211_registered_device *rdev,
 			nla_get_u8(attrs[NL80211_TID_CONFIG_ATTR_AMSDU_CTRL]);
 	}
 
+	if (attrs[NL80211_TID_CONFIG_ATTR_TX_RATE_TYPE]) {
+		u32 idx = NL80211_TID_CONFIG_ATTR_TX_RATE_TYPE, attr;
+
+		tid_conf->txrate_type = nla_get_u8(attrs[idx]);
+
+		if (tid_conf->txrate_type != NL80211_TX_RATE_AUTOMATIC) {
+			attr = NL80211_TID_CONFIG_ATTR_TX_RATE;
+			err = nl80211_parse_tx_bitrate_mask(info, attrs, attr,
+						    &tid_conf->txrate_mask);
+			if (err)
+				return err;
+
+			tid_conf->mask |= BIT(NL80211_TID_CONFIG_ATTR_TX_RATE);
+		}
+		tid_conf->mask |= BIT(NL80211_TID_CONFIG_ATTR_TX_RATE_TYPE);
+	}
+
 	if (peer)
 		mask = rdev->wiphy.tid_config_support.peer;
 	else
-- 
cgit v1.2.3-59-g8ed1b


From a3b018febccd3686c39e86e98b5081bde014fc66 Mon Sep 17 00:00:00 2001
From: Patrick Steinhardt <ps@pks.im>
Date: Sun, 17 May 2020 18:30:19 +0200
Subject: cfg80211: fix CFG82011_CRDA_SUPPORT still mentioning internal regdb

Back with commit c8c240e284b3 (cfg80211: reg: remove support for
built-in regdb, 2015-10-15), support for using CFG80211_INTERNAL_REGDB
was removed in favor of loading the regulatory database as firmware
file. The documentation of CFG80211_CRDA_SUPPORT was not adjusted,
though, which is why it still mentions mentions the old way of loading
via the internal regulatory database.

Remove it so that the kernel option only mentions using the firmware
file.

Signed-off-by: Patrick Steinhardt <ps@pks.im>
Link: https://lore.kernel.org/r/c56e60207fbd0512029de8c6276ee00f73491924.1589732954.git.ps@pks.im
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/Kconfig | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/wireless/Kconfig b/net/wireless/Kconfig
index 63cf7131f601..813e93644ae7 100644
--- a/net/wireless/Kconfig
+++ b/net/wireless/Kconfig
@@ -181,8 +181,8 @@ config CFG80211_CRDA_SUPPORT
 	default y
 	help
 	  You should enable this option unless you know for sure you have no
-	  need for it, for example when using internal regdb (above) or the
-	  database loaded as a firmware file.
+	  need for it, for example when using the regulatory database loaded as
+	  a firmware file.
 
 	  If unsure, say Y.
 
-- 
cgit v1.2.3-59-g8ed1b


From 1b9ae0c92925ac40489be526d67d0010d0724ce0 Mon Sep 17 00:00:00 2001
From: Hauke Mehrtens <hauke@hauke-m.de>
Date: Thu, 21 May 2020 22:14:22 +0200
Subject: wireless: Use linux/stddef.h instead of stddef.h

When compiling inside the kernel include linux/stddef.h instead of
stddef.h. When I compile this header file in backports for power PC I
run into a conflict with ptrdiff_t. I was unable to reproduce this in
mainline kernel. I still would like to fix this problem in the kernel.

Fixes: 6989310f5d43 ("wireless: Use offsetof instead of custom macro.")
Signed-off-by: Hauke Mehrtens <hauke@hauke-m.de>
Link: https://lore.kernel.org/r/20200521201422.16493-1-hauke@hauke-m.de
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/uapi/linux/wireless.h | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

diff --git a/include/uapi/linux/wireless.h b/include/uapi/linux/wireless.h
index a2c006a364e0..24f3371ad826 100644
--- a/include/uapi/linux/wireless.h
+++ b/include/uapi/linux/wireless.h
@@ -74,7 +74,11 @@
 #include <linux/socket.h>		/* for "struct sockaddr" et al	*/
 #include <linux/if.h>			/* for IFNAMSIZ and co... */
 
-#include <stddef.h>                     /* for offsetof */
+#ifdef __KERNEL__
+#	include <linux/stddef.h>	/* for offsetof */
+#else
+#	include <stddef.h>		/* for offsetof */
+#endif
 
 /***************************** VERSION *****************************/
 /*
-- 
cgit v1.2.3-59-g8ed1b


From c11299243370580832c27882dcedf2604f9f48f8 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Tue, 26 May 2020 14:33:48 +0200
Subject: mac80211: fix HT-Control field reception for management frames

If we receive management frames with an HT-Control field, we cannot
parse them properly, as we assume a fixed length management header.

Since we don't even need the HTC field (for these frames, or really
at all), just remove it at the beginning of RX.

Reported-by: Haggai Abramovsky <haggai.abramovsky@intel.com>
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Link: https://lore.kernel.org/r/20200526143346.cf5ce70521c5.I333251a084ec4cfe67b7ef7efe2d2f1a33883931@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/rx.c | 44 +++++++++++++++++++++++++++++++++++++-------
 1 file changed, 37 insertions(+), 7 deletions(-)

diff --git a/net/mac80211/rx.c b/net/mac80211/rx.c
index 8b0fa5e345f4..21854a61a2b7 100644
--- a/net/mac80211/rx.c
+++ b/net/mac80211/rx.c
@@ -93,13 +93,44 @@ static u8 *ieee80211_get_bssid(struct ieee80211_hdr *hdr, size_t len,
  * This function cleans up the SKB, i.e. it removes all the stuff
  * only useful for monitoring.
  */
-static void remove_monitor_info(struct sk_buff *skb,
-				unsigned int present_fcs_len,
-				unsigned int rtap_space)
+static struct sk_buff *ieee80211_clean_skb(struct sk_buff *skb,
+					   unsigned int present_fcs_len,
+					   unsigned int rtap_space)
 {
+	struct ieee80211_hdr *hdr;
+	unsigned int hdrlen;
+	__le16 fc;
+
 	if (present_fcs_len)
 		__pskb_trim(skb, skb->len - present_fcs_len);
 	__pskb_pull(skb, rtap_space);
+
+	hdr = (void *)skb->data;
+	fc = hdr->frame_control;
+
+	/*
+	 * Remove the HT-Control field (if present) on management
+	 * frames after we've sent the frame to monitoring. We
+	 * (currently) don't need it, and don't properly parse
+	 * frames with it present, due to the assumption of a
+	 * fixed management header length.
+	 */
+	if (likely(!ieee80211_is_mgmt(fc) || !ieee80211_has_order(fc)))
+		return skb;
+
+	hdrlen = ieee80211_hdrlen(fc);
+	hdr->frame_control &= ~cpu_to_le16(IEEE80211_FCTL_ORDER);
+
+	if (!pskb_may_pull(skb, hdrlen)) {
+		dev_kfree_skb(skb);
+		return NULL;
+	}
+
+	memmove(skb->data + IEEE80211_HT_CTL_LEN, skb->data,
+		hdrlen - IEEE80211_HT_CTL_LEN);
+	__pskb_pull(skb, IEEE80211_HT_CTL_LEN);
+
+	return skb;
 }
 
 static inline bool should_drop_frame(struct sk_buff *skb, int present_fcs_len,
@@ -827,8 +858,8 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
 			return NULL;
 		}
 
-		remove_monitor_info(origskb, present_fcs_len, rtap_space);
-		return origskb;
+		return ieee80211_clean_skb(origskb, present_fcs_len,
+					   rtap_space);
 	}
 
 	ieee80211_handle_mu_mimo_mon(monitor_sdata, origskb, rtap_space);
@@ -871,8 +902,7 @@ ieee80211_rx_monitor(struct ieee80211_local *local, struct sk_buff *origskb,
 	if (!origskb)
 		return NULL;
 
-	remove_monitor_info(origskb, present_fcs_len, rtap_space);
-	return origskb;
+	return ieee80211_clean_skb(origskb, present_fcs_len, rtap_space);
 }
 
 static void ieee80211_parse_qos(struct ieee80211_rx_data *rx)
-- 
cgit v1.2.3-59-g8ed1b


From 119aadf816f5373dc82ca4109d6d5b777e00475b Mon Sep 17 00:00:00 2001
From: Ramon Fontes <ramonreisfontes@gmail.com>
Date: Fri, 15 May 2020 13:46:40 -0300
Subject: mac80211_hwsim: report the WIPHY_FLAG_SUPPORTS_5_10_MHZ capability

Signed-off-by: Ramon Fontes <ramonreisfontes@gmail.com>
Link: https://lore.kernel.org/r/20200515164640.97276-1-ramonreisfontes@gmail.com
[fix indentation to use tabs]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 drivers/net/wireless/mac80211_hwsim.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wireless/mac80211_hwsim.c b/drivers/net/wireless/mac80211_hwsim.c
index f4ded2f2ee3b..1356e8cbe617 100644
--- a/drivers/net/wireless/mac80211_hwsim.c
+++ b/drivers/net/wireless/mac80211_hwsim.c
@@ -3054,6 +3054,7 @@ static int mac80211_hwsim_new_radio(struct genl_info *info,
 	hw->wiphy->flags |= WIPHY_FLAG_SUPPORTS_TDLS |
 			    WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL |
 			    WIPHY_FLAG_AP_UAPSD |
+			    WIPHY_FLAG_SUPPORTS_5_10_MHZ |
 			    WIPHY_FLAG_HAS_CHANNEL_SWITCH;
 	hw->wiphy->features |= NL80211_FEATURE_ACTIVE_MONITOR |
 			       NL80211_FEATURE_AP_MODE_CHAN_WIDTH_CHANGE |
-- 
cgit v1.2.3-59-g8ed1b


From 6b646a7e4af69814dd1a3340fca0f02d4977420d Mon Sep 17 00:00:00 2001
From: Leon Romanovsky <leonro@mellanox.com>
Date: Mon, 9 Mar 2020 16:44:25 +0200
Subject: net/mlx5: Add ability to read and write ECE options

The end result of RDMA-CM ECE handshake is ECE options, which is
needed to be used while configuring data QPs. Such options can
come in any QP state, so add in/out fields to set and query
ECE options.

OUT fields:
* create_qp() - default ECE options for that type of QP.
* modify_qp() - enabled ECE options after QP state transition.

IN fields:
* create_qp() - create QP with this ECE option.
* modify_qp() - requested options. For unconnected QPs, the FW
will return an error if ECE is already configured with any options
that not equal to previously set.

Reviewed-by: Mark Zhang <markz@mellanox.com>
Reviewed-by: Maor Gottlieb <maorg@mellanox.com>
Signed-off-by: Leon Romanovsky <leonro@mellanox.com>
---
 include/linux/mlx5/mlx5_ifc.h | 26 ++++++++++++++++----------
 1 file changed, 16 insertions(+), 10 deletions(-)

diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h
index fd8da4875ea0..1a56dc079c32 100644
--- a/include/linux/mlx5/mlx5_ifc.h
+++ b/include/linux/mlx5/mlx5_ifc.h
@@ -1208,7 +1208,9 @@ struct mlx5_ifc_cmd_hca_cap_bits {
 	u8         reserved_at_99[0x2];
 	u8         log_max_qp[0x5];
 
-	u8         reserved_at_a0[0xb];
+	u8         reserved_at_a0[0x3];
+	u8	   ece_support[0x1];
+	u8	   reserved_at_a4[0x7];
 	u8         log_max_srq[0x5];
 	u8         reserved_at_b0[0x10];
 
@@ -4216,7 +4218,8 @@ struct mlx5_ifc_rts2rts_qp_out_bits {
 
 	u8         syndrome[0x20];
 
-	u8         reserved_at_40[0x40];
+	u8         reserved_at_40[0x20];
+	u8         ece[0x20];
 };
 
 struct mlx5_ifc_rts2rts_qp_in_bits {
@@ -4233,7 +4236,7 @@ struct mlx5_ifc_rts2rts_qp_in_bits {
 
 	u8         opt_param_mask[0x20];
 
-	u8         reserved_at_a0[0x20];
+	u8         ece[0x20];
 
 	struct mlx5_ifc_qpc_bits qpc;
 
@@ -4246,7 +4249,8 @@ struct mlx5_ifc_rtr2rts_qp_out_bits {
 
 	u8         syndrome[0x20];
 
-	u8         reserved_at_40[0x40];
+	u8         reserved_at_40[0x20];
+	u8         ece[0x20];
 };
 
 struct mlx5_ifc_rtr2rts_qp_in_bits {
@@ -4263,7 +4267,7 @@ struct mlx5_ifc_rtr2rts_qp_in_bits {
 
 	u8         opt_param_mask[0x20];
 
-	u8         reserved_at_a0[0x20];
+	u8         ece[0x20];
 
 	struct mlx5_ifc_qpc_bits qpc;
 
@@ -4815,7 +4819,8 @@ struct mlx5_ifc_query_qp_out_bits {
 
 	u8         syndrome[0x20];
 
-	u8         reserved_at_40[0x40];
+	u8         reserved_at_40[0x20];
+	u8         ece[0x20];
 
 	u8         opt_param_mask[0x20];
 
@@ -6580,7 +6585,8 @@ struct mlx5_ifc_init2rtr_qp_out_bits {
 
 	u8         syndrome[0x20];
 
-	u8         reserved_at_40[0x40];
+	u8         reserved_at_40[0x20];
+	u8         ece[0x20];
 };
 
 struct mlx5_ifc_init2rtr_qp_in_bits {
@@ -6597,7 +6603,7 @@ struct mlx5_ifc_init2rtr_qp_in_bits {
 
 	u8         opt_param_mask[0x20];
 
-	u8         reserved_at_a0[0x20];
+	u8         ece[0x20];
 
 	struct mlx5_ifc_qpc_bits qpc;
 
@@ -7693,7 +7699,7 @@ struct mlx5_ifc_create_qp_out_bits {
 	u8         reserved_at_40[0x8];
 	u8         qpn[0x18];
 
-	u8         reserved_at_60[0x20];
+	u8         ece[0x20];
 };
 
 struct mlx5_ifc_create_qp_in_bits {
@@ -7707,7 +7713,7 @@ struct mlx5_ifc_create_qp_in_bits {
 
 	u8         opt_param_mask[0x20];
 
-	u8         reserved_at_a0[0x20];
+	u8         ece[0x20];
 
 	struct mlx5_ifc_qpc_bits qpc;
 
-- 
cgit v1.2.3-59-g8ed1b


From d8e79f1dbcee8032667c0718a654c749d64f6304 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Wed, 27 May 2020 01:00:20 -0700
Subject: nexthop: Fix type of event_type in call_nexthop_notifiers

Clang warns:

net/ipv4/nexthop.c:841:30: warning: implicit conversion from enumeration
type 'enum nexthop_event_type' to different enumeration type 'enum
fib_event_type' [-Wenum-conversion]
        call_nexthop_notifiers(net, NEXTHOP_EVENT_DEL, nh);
        ~~~~~~~~~~~~~~~~~~~~~~      ^~~~~~~~~~~~~~~~~
1 warning generated.

Use the right type for event_type so that clang does not warn.

Fixes: 8590ceedb701 ("nexthop: add support for notifiers")
Link: https://github.com/ClangBuiltLinux/linux/issues/1038
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/nexthop.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index 143011f9b580..ec1282858cb7 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -37,7 +37,7 @@ static const struct nla_policy rtm_nh_policy[NHA_MAX + 1] = {
 };
 
 static int call_nexthop_notifiers(struct net *net,
-				  enum fib_event_type event_type,
+				  enum nexthop_event_type event_type,
 				  struct nexthop *nh)
 {
 	int err;
-- 
cgit v1.2.3-59-g8ed1b


From 7cf4eda481b235cbc7c210715cce19fde3d23d55 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 27 May 2020 09:15:55 +0100
Subject: mlxsw: spectrum_router: remove redundant initialization of pointer
 br_dev

The pointer br_dev is being initialized with a value that is never read
and is being updated with a new value later on. The initialization
is redundant and can be removed.

Addresses-Coverity: ("Unused value")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Reviewed-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
index c939b3596566..770de0222e7b 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_router.c
@@ -7574,7 +7574,7 @@ static struct mlxsw_sp_fid *
 mlxsw_sp_rif_vlan_fid_get(struct mlxsw_sp_rif *rif,
 			  struct netlink_ext_ack *extack)
 {
-	struct net_device *br_dev = rif->dev;
+	struct net_device *br_dev;
 	u16 vid;
 	int err;
 
-- 
cgit v1.2.3-59-g8ed1b


From f96e9641e92b54de27b93d1af03d74b8304ce00a Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Wed, 27 May 2020 11:24:04 +0200
Subject: net: ethernet: mtk-star-emac: fix error path in RX handling

The dma_addr field in desc_data must not be overwritten until after the
new skb is mapped. Currently we do replace it with uninitialized value
in error path. This change fixes it by moving the assignment before the
label to which we jump after mapping or allocation errors.

Fixes: 8c7bd5a454ff ("net: ethernet: mtk-star-emac: new driver")
Reported-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Tested-by: Nathan Chancellor <natechancellor@gmail.com> # build
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_star_emac.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c
index b74349cede28..72bb624a6a68 100644
--- a/drivers/net/ethernet/mediatek/mtk_star_emac.c
+++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c
@@ -1308,6 +1308,8 @@ static int mtk_star_receive_packet(struct mtk_star_priv *priv)
 		goto push_new_skb;
 	}
 
+	desc_data.dma_addr = new_dma_addr;
+
 	/* We can't fail anymore at this point: it's safe to unmap the skb. */
 	mtk_star_dma_unmap_rx(priv, &desc_data);
 
@@ -1318,7 +1320,6 @@ static int mtk_star_receive_packet(struct mtk_star_priv *priv)
 	netif_receive_skb(desc_data.skb);
 
 push_new_skb:
-	desc_data.dma_addr = new_dma_addr;
 	desc_data.len = skb_tailroom(new_skb);
 	desc_data.skb = new_skb;
 
-- 
cgit v1.2.3-59-g8ed1b


From 9f01a71c5cbec10b851588457089d17c20dc5a40 Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Wed, 27 May 2020 13:01:29 +0100
Subject: net: dsa: b53: remove redundant premature assignment to new_pvid

Variable new_pvid is being assigned with a value that is never read,
the following if statement updates new_pvid with a new value in both
of the if paths. The assignment is redundant and can be removed.

Addresses-Coverity: ("Unused value")
Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/b53/b53_common.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/dsa/b53/b53_common.c b/drivers/net/dsa/b53/b53_common.c
index ceb8be653182..1df05841ab6b 100644
--- a/drivers/net/dsa/b53/b53_common.c
+++ b/drivers/net/dsa/b53/b53_common.c
@@ -1325,7 +1325,6 @@ int b53_vlan_filtering(struct dsa_switch *ds, int port, bool vlan_filtering)
 	u16 pvid, new_pvid;
 
 	b53_read16(dev, B53_VLAN_PAGE, B53_VLAN_PORT_DEF_TAG(port), &pvid);
-	new_pvid = pvid;
 	if (!vlan_filtering) {
 		/* Filtering is currently enabled, use the default PVID since
 		 * the bridge does not expect tagging anymore
-- 
cgit v1.2.3-59-g8ed1b


From 20f6a05ef63594feb0c6dfbd629da0448b43124d Mon Sep 17 00:00:00 2001
From: Horatiu Vultur <horatiu.vultur@microchip.com>
Date: Wed, 27 May 2020 12:34:30 +0000
Subject: bridge: mrp: Rework the MRP netlink interface

This patch reworks the MRP netlink interface. Before, each attribute
represented a binary structure which made it hard to be extended.
Therefore update the MRP netlink interface such that each existing
attribute to be a nested attribute which contains the fields of the
binary structures.
In this way the MRP netlink interface can be extended without breaking
the backwards compatibility. It is also using strict checking for
attributes under the MRP top attribute.

Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/if_bridge.h |  64 ++++++++--
 net/bridge/br_mrp.c            |   8 +-
 net/bridge/br_mrp_netlink.c    | 266 +++++++++++++++++++++++++++++++++++------
 net/bridge/br_private_mrp.h    |   2 +-
 4 files changed, 290 insertions(+), 50 deletions(-)

diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index bd8c95488f16..5a43eb86c93b 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -169,17 +169,69 @@ enum {
 	__IFLA_BRIDGE_MRP_MAX,
 };
 
+#define IFLA_BRIDGE_MRP_MAX (__IFLA_BRIDGE_MRP_MAX - 1)
+
+enum {
+	IFLA_BRIDGE_MRP_INSTANCE_UNSPEC,
+	IFLA_BRIDGE_MRP_INSTANCE_RING_ID,
+	IFLA_BRIDGE_MRP_INSTANCE_P_IFINDEX,
+	IFLA_BRIDGE_MRP_INSTANCE_S_IFINDEX,
+	__IFLA_BRIDGE_MRP_INSTANCE_MAX,
+};
+
+#define IFLA_BRIDGE_MRP_INSTANCE_MAX (__IFLA_BRIDGE_MRP_INSTANCE_MAX - 1)
+
+enum {
+	IFLA_BRIDGE_MRP_PORT_STATE_UNSPEC,
+	IFLA_BRIDGE_MRP_PORT_STATE_STATE,
+	__IFLA_BRIDGE_MRP_PORT_STATE_MAX,
+};
+
+#define IFLA_BRIDGE_MRP_PORT_STATE_MAX (__IFLA_BRIDGE_MRP_PORT_STATE_MAX - 1)
+
+enum {
+	IFLA_BRIDGE_MRP_PORT_ROLE_UNSPEC,
+	IFLA_BRIDGE_MRP_PORT_ROLE_ROLE,
+	__IFLA_BRIDGE_MRP_PORT_ROLE_MAX,
+};
+
+#define IFLA_BRIDGE_MRP_PORT_ROLE_MAX (__IFLA_BRIDGE_MRP_PORT_ROLE_MAX - 1)
+
+enum {
+	IFLA_BRIDGE_MRP_RING_STATE_UNSPEC,
+	IFLA_BRIDGE_MRP_RING_STATE_RING_ID,
+	IFLA_BRIDGE_MRP_RING_STATE_STATE,
+	__IFLA_BRIDGE_MRP_RING_STATE_MAX,
+};
+
+#define IFLA_BRIDGE_MRP_RING_STATE_MAX (__IFLA_BRIDGE_MRP_RING_STATE_MAX - 1)
+
+enum {
+	IFLA_BRIDGE_MRP_RING_ROLE_UNSPEC,
+	IFLA_BRIDGE_MRP_RING_ROLE_RING_ID,
+	IFLA_BRIDGE_MRP_RING_ROLE_ROLE,
+	__IFLA_BRIDGE_MRP_RING_ROLE_MAX,
+};
+
+#define IFLA_BRIDGE_MRP_RING_ROLE_MAX (__IFLA_BRIDGE_MRP_RING_ROLE_MAX - 1)
+
+enum {
+	IFLA_BRIDGE_MRP_START_TEST_UNSPEC,
+	IFLA_BRIDGE_MRP_START_TEST_RING_ID,
+	IFLA_BRIDGE_MRP_START_TEST_INTERVAL,
+	IFLA_BRIDGE_MRP_START_TEST_MAX_MISS,
+	IFLA_BRIDGE_MRP_START_TEST_PERIOD,
+	__IFLA_BRIDGE_MRP_START_TEST_MAX,
+};
+
+#define IFLA_BRIDGE_MRP_START_TEST_MAX (__IFLA_BRIDGE_MRP_START_TEST_MAX - 1)
+
 struct br_mrp_instance {
 	__u32 ring_id;
 	__u32 p_ifindex;
 	__u32 s_ifindex;
 };
 
-struct br_mrp_port_role {
-	__u32 ring_id;
-	__u32 role;
-};
-
 struct br_mrp_ring_state {
 	__u32 ring_id;
 	__u32 ring_state;
@@ -197,8 +249,6 @@ struct br_mrp_start_test {
 	__u32 period;
 };
 
-#define IFLA_BRIDGE_MRP_MAX (__IFLA_BRIDGE_MRP_MAX - 1)
-
 struct bridge_stp_xstats {
 	__u64 transition_blk;
 	__u64 transition_fwd;
diff --git a/net/bridge/br_mrp.c b/net/bridge/br_mrp.c
index 528d767eb026..8ea59504ef47 100644
--- a/net/bridge/br_mrp.c
+++ b/net/bridge/br_mrp.c
@@ -376,24 +376,24 @@ int br_mrp_set_port_state(struct net_bridge_port *p,
  * note: already called with rtnl_lock
  */
 int br_mrp_set_port_role(struct net_bridge_port *p,
-			 struct br_mrp_port_role *role)
+			 enum br_mrp_port_role_type role)
 {
 	struct br_mrp *mrp;
 
 	if (!p || !(p->flags & BR_MRP_AWARE))
 		return -EINVAL;
 
-	mrp = br_mrp_find_id(p->br, role->ring_id);
+	mrp = br_mrp_find_port(p->br, p);
 
 	if (!mrp)
 		return -EINVAL;
 
-	if (role->role == BR_MRP_PORT_ROLE_PRIMARY)
+	if (role == BR_MRP_PORT_ROLE_PRIMARY)
 		rcu_assign_pointer(mrp->p_port, p);
 	else
 		rcu_assign_pointer(mrp->s_port, p);
 
-	br_mrp_port_switchdev_set_role(p, role->role);
+	br_mrp_port_switchdev_set_role(p, role);
 
 	return 0;
 }
diff --git a/net/bridge/br_mrp_netlink.c b/net/bridge/br_mrp_netlink.c
index 4a08a99519b0..d9de780d2ce0 100644
--- a/net/bridge/br_mrp_netlink.c
+++ b/net/bridge/br_mrp_netlink.c
@@ -8,19 +8,222 @@
 
 static const struct nla_policy br_mrp_policy[IFLA_BRIDGE_MRP_MAX + 1] = {
 	[IFLA_BRIDGE_MRP_UNSPEC]	= { .type = NLA_REJECT },
-	[IFLA_BRIDGE_MRP_INSTANCE]	= { .type = NLA_EXACT_LEN,
-				    .len = sizeof(struct br_mrp_instance)},
-	[IFLA_BRIDGE_MRP_PORT_STATE]	= { .type = NLA_U32 },
-	[IFLA_BRIDGE_MRP_PORT_ROLE]	= { .type = NLA_EXACT_LEN,
-				    .len = sizeof(struct br_mrp_port_role)},
-	[IFLA_BRIDGE_MRP_RING_STATE]	= { .type = NLA_EXACT_LEN,
-				    .len = sizeof(struct br_mrp_ring_state)},
-	[IFLA_BRIDGE_MRP_RING_ROLE]	= { .type = NLA_EXACT_LEN,
-				    .len = sizeof(struct br_mrp_ring_role)},
-	[IFLA_BRIDGE_MRP_START_TEST]	= { .type = NLA_EXACT_LEN,
-				    .len = sizeof(struct br_mrp_start_test)},
+	[IFLA_BRIDGE_MRP_INSTANCE]	= { .type = NLA_NESTED },
+	[IFLA_BRIDGE_MRP_PORT_STATE]	= { .type = NLA_NESTED },
+	[IFLA_BRIDGE_MRP_PORT_ROLE]	= { .type = NLA_NESTED },
+	[IFLA_BRIDGE_MRP_RING_STATE]	= { .type = NLA_NESTED },
+	[IFLA_BRIDGE_MRP_RING_ROLE]	= { .type = NLA_NESTED },
+	[IFLA_BRIDGE_MRP_START_TEST]	= { .type = NLA_NESTED },
 };
 
+static const struct nla_policy
+br_mrp_instance_policy[IFLA_BRIDGE_MRP_INSTANCE_MAX + 1] = {
+	[IFLA_BRIDGE_MRP_INSTANCE_UNSPEC]	= { .type = NLA_REJECT },
+	[IFLA_BRIDGE_MRP_INSTANCE_RING_ID]	= { .type = NLA_U32 },
+	[IFLA_BRIDGE_MRP_INSTANCE_P_IFINDEX]	= { .type = NLA_U32 },
+	[IFLA_BRIDGE_MRP_INSTANCE_S_IFINDEX]	= { .type = NLA_U32 },
+};
+
+static int br_mrp_instance_parse(struct net_bridge *br, struct nlattr *attr,
+				 int cmd, struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[IFLA_BRIDGE_MRP_INSTANCE_MAX + 1];
+	struct br_mrp_instance inst;
+	int err;
+
+	err = nla_parse_nested(tb, IFLA_BRIDGE_MRP_INSTANCE_MAX, attr,
+			       br_mrp_instance_policy, extack);
+	if (err)
+		return err;
+
+	if (!tb[IFLA_BRIDGE_MRP_INSTANCE_RING_ID] ||
+	    !tb[IFLA_BRIDGE_MRP_INSTANCE_P_IFINDEX] ||
+	    !tb[IFLA_BRIDGE_MRP_INSTANCE_S_IFINDEX]) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Missing attribute: RING_ID or P_IFINDEX or S_IFINDEX");
+		return -EINVAL;
+	}
+
+	memset(&inst, 0, sizeof(inst));
+
+	inst.ring_id = nla_get_u32(tb[IFLA_BRIDGE_MRP_INSTANCE_RING_ID]);
+	inst.p_ifindex = nla_get_u32(tb[IFLA_BRIDGE_MRP_INSTANCE_P_IFINDEX]);
+	inst.s_ifindex = nla_get_u32(tb[IFLA_BRIDGE_MRP_INSTANCE_S_IFINDEX]);
+
+	if (cmd == RTM_SETLINK)
+		return br_mrp_add(br, &inst);
+	else
+		return br_mrp_del(br, &inst);
+
+	return 0;
+}
+
+static const struct nla_policy
+br_mrp_port_state_policy[IFLA_BRIDGE_MRP_PORT_STATE_MAX + 1] = {
+	[IFLA_BRIDGE_MRP_PORT_STATE_UNSPEC]	= { .type = NLA_REJECT },
+	[IFLA_BRIDGE_MRP_PORT_STATE_STATE]	= { .type = NLA_U32 },
+};
+
+static int br_mrp_port_state_parse(struct net_bridge_port *p,
+				   struct nlattr *attr,
+				   struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[IFLA_BRIDGE_MRP_PORT_STATE_MAX + 1];
+	enum br_mrp_port_state_type state;
+	int err;
+
+	err = nla_parse_nested(tb, IFLA_BRIDGE_MRP_PORT_STATE_MAX, attr,
+			       br_mrp_port_state_policy, extack);
+	if (err)
+		return err;
+
+	if (!tb[IFLA_BRIDGE_MRP_PORT_STATE_STATE]) {
+		NL_SET_ERR_MSG_MOD(extack, "Missing attribute: STATE");
+		return -EINVAL;
+	}
+
+	state = nla_get_u32(tb[IFLA_BRIDGE_MRP_PORT_STATE_STATE]);
+
+	return br_mrp_set_port_state(p, state);
+}
+
+static const struct nla_policy
+br_mrp_port_role_policy[IFLA_BRIDGE_MRP_PORT_ROLE_MAX + 1] = {
+	[IFLA_BRIDGE_MRP_PORT_ROLE_UNSPEC]	= { .type = NLA_REJECT },
+	[IFLA_BRIDGE_MRP_PORT_ROLE_ROLE]	= { .type = NLA_U32 },
+};
+
+static int br_mrp_port_role_parse(struct net_bridge_port *p,
+				  struct nlattr *attr,
+				  struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[IFLA_BRIDGE_MRP_PORT_ROLE_MAX + 1];
+	enum br_mrp_port_role_type role;
+	int err;
+
+	err = nla_parse_nested(tb, IFLA_BRIDGE_MRP_PORT_ROLE_MAX, attr,
+			       br_mrp_port_role_policy, extack);
+	if (err)
+		return err;
+
+	if (!tb[IFLA_BRIDGE_MRP_PORT_ROLE_ROLE]) {
+		NL_SET_ERR_MSG_MOD(extack, "Missing attribute: ROLE");
+		return -EINVAL;
+	}
+
+	role = nla_get_u32(tb[IFLA_BRIDGE_MRP_PORT_ROLE_ROLE]);
+
+	return br_mrp_set_port_role(p, role);
+}
+
+static const struct nla_policy
+br_mrp_ring_state_policy[IFLA_BRIDGE_MRP_RING_STATE_MAX + 1] = {
+	[IFLA_BRIDGE_MRP_RING_STATE_UNSPEC]	= { .type = NLA_REJECT },
+	[IFLA_BRIDGE_MRP_RING_STATE_RING_ID]	= { .type = NLA_U32 },
+	[IFLA_BRIDGE_MRP_RING_STATE_STATE]	= { .type = NLA_U32 },
+};
+
+static int br_mrp_ring_state_parse(struct net_bridge *br, struct nlattr *attr,
+				   struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[IFLA_BRIDGE_MRP_RING_STATE_MAX + 1];
+	struct br_mrp_ring_state state;
+	int err;
+
+	err = nla_parse_nested(tb, IFLA_BRIDGE_MRP_RING_STATE_MAX, attr,
+			       br_mrp_ring_state_policy, extack);
+	if (err)
+		return err;
+
+	if (!tb[IFLA_BRIDGE_MRP_RING_STATE_RING_ID] ||
+	    !tb[IFLA_BRIDGE_MRP_RING_STATE_STATE]) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Missing attribute: RING_ID or STATE");
+		return -EINVAL;
+	}
+
+	memset(&state, 0x0, sizeof(state));
+
+	state.ring_id = nla_get_u32(tb[IFLA_BRIDGE_MRP_RING_STATE_RING_ID]);
+	state.ring_state = nla_get_u32(tb[IFLA_BRIDGE_MRP_RING_STATE_STATE]);
+
+	return br_mrp_set_ring_state(br, &state);
+}
+
+static const struct nla_policy
+br_mrp_ring_role_policy[IFLA_BRIDGE_MRP_RING_ROLE_MAX + 1] = {
+	[IFLA_BRIDGE_MRP_RING_ROLE_UNSPEC]	= { .type = NLA_REJECT },
+	[IFLA_BRIDGE_MRP_RING_ROLE_RING_ID]	= { .type = NLA_U32 },
+	[IFLA_BRIDGE_MRP_RING_ROLE_ROLE]	= { .type = NLA_U32 },
+};
+
+static int br_mrp_ring_role_parse(struct net_bridge *br, struct nlattr *attr,
+				  struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[IFLA_BRIDGE_MRP_RING_ROLE_MAX + 1];
+	struct br_mrp_ring_role role;
+	int err;
+
+	err = nla_parse_nested(tb, IFLA_BRIDGE_MRP_RING_ROLE_MAX, attr,
+			       br_mrp_ring_role_policy, extack);
+	if (err)
+		return err;
+
+	if (!tb[IFLA_BRIDGE_MRP_RING_ROLE_RING_ID] ||
+	    !tb[IFLA_BRIDGE_MRP_RING_ROLE_ROLE]) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Missing attribute: RING_ID or ROLE");
+		return -EINVAL;
+	}
+
+	memset(&role, 0x0, sizeof(role));
+
+	role.ring_id = nla_get_u32(tb[IFLA_BRIDGE_MRP_RING_ROLE_RING_ID]);
+	role.ring_role = nla_get_u32(tb[IFLA_BRIDGE_MRP_RING_ROLE_ROLE]);
+
+	return br_mrp_set_ring_role(br, &role);
+}
+
+static const struct nla_policy
+br_mrp_start_test_policy[IFLA_BRIDGE_MRP_START_TEST_MAX + 1] = {
+	[IFLA_BRIDGE_MRP_START_TEST_UNSPEC]	= { .type = NLA_REJECT },
+	[IFLA_BRIDGE_MRP_START_TEST_RING_ID]	= { .type = NLA_U32 },
+	[IFLA_BRIDGE_MRP_START_TEST_INTERVAL]	= { .type = NLA_U32 },
+	[IFLA_BRIDGE_MRP_START_TEST_MAX_MISS]	= { .type = NLA_U32 },
+	[IFLA_BRIDGE_MRP_START_TEST_PERIOD]	= { .type = NLA_U32 },
+};
+
+static int br_mrp_start_test_parse(struct net_bridge *br, struct nlattr *attr,
+				   struct netlink_ext_ack *extack)
+{
+	struct nlattr *tb[IFLA_BRIDGE_MRP_START_TEST_MAX + 1];
+	struct br_mrp_start_test test;
+	int err;
+
+	err = nla_parse_nested(tb, IFLA_BRIDGE_MRP_START_TEST_MAX, attr,
+			       br_mrp_start_test_policy, extack);
+	if (err)
+		return err;
+
+	if (!tb[IFLA_BRIDGE_MRP_START_TEST_RING_ID] ||
+	    !tb[IFLA_BRIDGE_MRP_START_TEST_INTERVAL] ||
+	    !tb[IFLA_BRIDGE_MRP_START_TEST_MAX_MISS] ||
+	    !tb[IFLA_BRIDGE_MRP_START_TEST_PERIOD]) {
+		NL_SET_ERR_MSG_MOD(extack,
+				   "Missing attribute: RING_ID or INTERVAL or MAX_MISS or PERIOD");
+		return -EINVAL;
+	}
+
+	memset(&test, 0x0, sizeof(test));
+
+	test.ring_id = nla_get_u32(tb[IFLA_BRIDGE_MRP_START_TEST_RING_ID]);
+	test.interval = nla_get_u32(tb[IFLA_BRIDGE_MRP_START_TEST_INTERVAL]);
+	test.max_miss = nla_get_u32(tb[IFLA_BRIDGE_MRP_START_TEST_MAX_MISS]);
+	test.period = nla_get_u32(tb[IFLA_BRIDGE_MRP_START_TEST_PERIOD]);
+
+	return br_mrp_start_test(br, &test);
+}
+
 int br_mrp_parse(struct net_bridge *br, struct net_bridge_port *p,
 		 struct nlattr *attr, int cmd, struct netlink_ext_ack *extack)
 {
@@ -44,58 +247,45 @@ int br_mrp_parse(struct net_bridge *br, struct net_bridge_port *p,
 		return err;
 
 	if (tb[IFLA_BRIDGE_MRP_INSTANCE]) {
-		struct br_mrp_instance *instance =
-			nla_data(tb[IFLA_BRIDGE_MRP_INSTANCE]);
-
-		if (cmd == RTM_SETLINK)
-			err = br_mrp_add(br, instance);
-		else
-			err = br_mrp_del(br, instance);
+		err = br_mrp_instance_parse(br, tb[IFLA_BRIDGE_MRP_INSTANCE],
+					    cmd, extack);
 		if (err)
 			return err;
 	}
 
 	if (tb[IFLA_BRIDGE_MRP_PORT_STATE]) {
-		enum br_mrp_port_state_type state =
-			nla_get_u32(tb[IFLA_BRIDGE_MRP_PORT_STATE]);
-
-		err = br_mrp_set_port_state(p, state);
+		err = br_mrp_port_state_parse(p, tb[IFLA_BRIDGE_MRP_PORT_STATE],
+					      extack);
 		if (err)
 			return err;
 	}
 
 	if (tb[IFLA_BRIDGE_MRP_PORT_ROLE]) {
-		struct br_mrp_port_role *role =
-			nla_data(tb[IFLA_BRIDGE_MRP_PORT_ROLE]);
-
-		err = br_mrp_set_port_role(p, role);
+		err = br_mrp_port_role_parse(p, tb[IFLA_BRIDGE_MRP_PORT_ROLE],
+					     extack);
 		if (err)
 			return err;
 	}
 
 	if (tb[IFLA_BRIDGE_MRP_RING_STATE]) {
-		struct br_mrp_ring_state *state =
-			nla_data(tb[IFLA_BRIDGE_MRP_RING_STATE]);
-
-		err = br_mrp_set_ring_state(br, state);
+		err = br_mrp_ring_state_parse(br,
+					      tb[IFLA_BRIDGE_MRP_RING_STATE],
+					      extack);
 		if (err)
 			return err;
 	}
 
 	if (tb[IFLA_BRIDGE_MRP_RING_ROLE]) {
-		struct br_mrp_ring_role *role =
-			nla_data(tb[IFLA_BRIDGE_MRP_RING_ROLE]);
-
-		err = br_mrp_set_ring_role(br, role);
+		err = br_mrp_ring_role_parse(br, tb[IFLA_BRIDGE_MRP_RING_ROLE],
+					     extack);
 		if (err)
 			return err;
 	}
 
 	if (tb[IFLA_BRIDGE_MRP_START_TEST]) {
-		struct br_mrp_start_test *test =
-			nla_data(tb[IFLA_BRIDGE_MRP_START_TEST]);
-
-		err = br_mrp_start_test(br, test);
+		err = br_mrp_start_test_parse(br,
+					      tb[IFLA_BRIDGE_MRP_START_TEST],
+					      extack);
 		if (err)
 			return err;
 	}
diff --git a/net/bridge/br_private_mrp.h b/net/bridge/br_private_mrp.h
index 2921a4b59f8e..a0f53cc3ab85 100644
--- a/net/bridge/br_private_mrp.h
+++ b/net/bridge/br_private_mrp.h
@@ -37,7 +37,7 @@ int br_mrp_del(struct net_bridge *br, struct br_mrp_instance *instance);
 int br_mrp_set_port_state(struct net_bridge_port *p,
 			  enum br_mrp_port_state_type state);
 int br_mrp_set_port_role(struct net_bridge_port *p,
-			 struct br_mrp_port_role *role);
+			 enum br_mrp_port_role_type role);
 int br_mrp_set_ring_state(struct net_bridge *br,
 			  struct br_mrp_ring_state *state);
 int br_mrp_set_ring_role(struct net_bridge *br, struct br_mrp_ring_role *role);
-- 
cgit v1.2.3-59-g8ed1b


From f99c0646ef83076dba88255c42482d1b4325f890 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 27 May 2020 15:34:45 +0200
Subject: mtk-star-emac: mark PM functions as __maybe_unused

Without CONFIG_PM, the compiler warns about two unused functions:

drivers/net/ethernet/mediatek/mtk_star_emac.c:1472:12: error: unused function 'mtk_star_suspend' [-Werror,-Wunused-function]
drivers/net/ethernet/mediatek/mtk_star_emac.c:1488:12: error: unused function 'mtk_star_resume' [-Werror,-Wunused-function]

Mark these as __maybe_unused.

Fixes: 8c7bd5a454ff ("net: ethernet: mtk-star-emac: new driver")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_star_emac.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c
index 72bb624a6a68..8596ca0e60eb 100644
--- a/drivers/net/ethernet/mediatek/mtk_star_emac.c
+++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c
@@ -1468,7 +1468,7 @@ out_put_node:
 	return ret;
 }
 
-static int mtk_star_suspend(struct device *dev)
+static __maybe_unused int mtk_star_suspend(struct device *dev)
 {
 	struct mtk_star_priv *priv;
 	struct net_device *ndev;
@@ -1484,7 +1484,7 @@ static int mtk_star_suspend(struct device *dev)
 	return 0;
 }
 
-static int mtk_star_resume(struct device *dev)
+static __maybe_unused int mtk_star_resume(struct device *dev)
 {
 	struct mtk_star_priv *priv;
 	struct net_device *ndev;
-- 
cgit v1.2.3-59-g8ed1b


From 5a1b72cebc774ec68854dab59c6838d795ee9370 Mon Sep 17 00:00:00 2001
From: Stephen Worley <sworley@cumulusnetworks.com>
Date: Wed, 27 May 2020 12:41:42 -0400
Subject: net: add large ecmp group nexthop tests

Add a couple large ecmp group nexthop selftests to cover
the remnant fixed by d69100b8eee27c2d60ee52df76e0b80a8d492d34.

The tests create 100 x32 ecmp groups of ipv4 and ipv6 and then
dump them. On kernels without the fix, they will fail due
to data remnant during the dump.

Signed-off-by: Stephen Worley <sworley@cumulusnetworks.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Reviewed-by: David Ahern <dsahern@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/fib_nexthops.sh | 84 ++++++++++++++++++++++++++++-
 1 file changed, 82 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index 51f8e9afe6ae..1e2f61262e4e 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -19,8 +19,8 @@ ret=0
 ksft_skip=4
 
 # all tests in this script. Can be overridden with -t option
-IPV4_TESTS="ipv4_fcnal ipv4_grp_fcnal ipv4_withv6_fcnal ipv4_fcnal_runtime ipv4_compat_mode ipv4_fdb_grp_fcnal"
-IPV6_TESTS="ipv6_fcnal ipv6_grp_fcnal ipv6_fcnal_runtime ipv6_compat_mode ipv6_fdb_grp_fcnal"
+IPV4_TESTS="ipv4_fcnal ipv4_grp_fcnal ipv4_withv6_fcnal ipv4_fcnal_runtime ipv4_large_grp ipv4_compat_mode ipv4_fdb_grp_fcnal"
+IPV6_TESTS="ipv6_fcnal ipv6_grp_fcnal ipv6_fcnal_runtime ipv6_large_grp ipv6_compat_mode ipv6_fdb_grp_fcnal"
 
 ALL_TESTS="basic ${IPV4_TESTS} ${IPV6_TESTS}"
 TESTS="${ALL_TESTS}"
@@ -254,6 +254,60 @@ check_route6()
 	check_output "${out}" "${expected}"
 }
 
+check_large_grp()
+{
+	local ipv=$1
+	local ecmp=$2
+	local grpnum=100
+	local nhidstart=100
+	local grpidstart=1000
+	local iter=0
+	local nhidstr=""
+	local grpidstr=""
+	local grpstr=""
+	local ipstr=""
+
+	if [ $ipv -eq 4 ]; then
+		ipstr="172.16.1."
+	else
+		ipstr="2001:db8:91::"
+	fi
+
+	#
+	# Create $grpnum groups with specified $ecmp and dump them
+	#
+
+	# create nexthops with different gateways
+	iter=2
+	while [ $iter -le $(($ecmp + 1)) ]
+	do
+		nhidstr="$(($nhidstart + $iter))"
+		run_cmd "$IP nexthop add id $nhidstr via $ipstr$iter dev veth1"
+		check_nexthop "id $nhidstr" "id $nhidstr via $ipstr$iter dev veth1 scope link"
+
+		if [ $iter -le $ecmp ]; then
+			grpstr+="$nhidstr/"
+		else
+			grpstr+="$nhidstr"
+		fi
+		((iter++))
+	done
+
+	# create duplicate large ecmp groups
+	iter=0
+	while [ $iter -le $grpnum ]
+	do
+		grpidstr="$(($grpidstart + $iter))"
+		run_cmd "$IP nexthop add id $grpidstr group $grpstr"
+		check_nexthop "id $grpidstr" "id $grpidstr group $grpstr"
+		((iter++))
+	done
+
+	# dump large groups
+	run_cmd "$IP nexthop list"
+	log_test $? 0 "Dump large (x$ecmp) ecmp groups"
+}
+
 start_ip_monitor()
 {
 	local mtype=$1
@@ -700,6 +754,19 @@ ipv6_fcnal_runtime()
 	# route with src address and using nexthop - not allowed
 }
 
+ipv6_large_grp()
+{
+	local ecmp=32
+
+	echo
+	echo "IPv6 large groups (x$ecmp)"
+	echo "---------------------"
+
+	check_large_grp 6 $ecmp
+
+	$IP nexthop flush >/dev/null 2>&1
+}
+
 ipv4_fcnal()
 {
 	local rc
@@ -1066,6 +1133,19 @@ ipv4_fcnal_runtime()
 	log_test $? 0 "IPv4 route with MPLS encap, v6 gw - check"
 }
 
+ipv4_large_grp()
+{
+	local ecmp=32
+
+	echo
+	echo "IPv4 large groups (x$ecmp)"
+	echo "---------------------"
+
+	check_large_grp 4 $ecmp
+
+	$IP nexthop flush >/dev/null 2>&1
+}
+
 sysctl_nexthop_compat_mode_check()
 {
 	local sysctlname="net.ipv4.nexthop_compat_mode"
-- 
cgit v1.2.3-59-g8ed1b


From 626a83238e6a63d88a5b5291febe797b244b5f18 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 27 May 2020 19:45:38 +0300
Subject: net: dsa: felix: accept VLAN config regardless of bridge VLAN
 awareness state

The ocelot core library is written with the idea in mind that the VLAN
table is populated by the bridge. Otherwise, not even a sane default
pvid is provided: in standalone mode, the default pvid is 0, and the
core expects the bridge layer to change it to 1.

So without this patch, the VLAN table is completely empty at the end of
the commands below, and traffic is broken as a result:

ip link add dev br0 type bridge vlan_filtering 0 && ip link set dev br0 up
for eth in $(ls /sys/bus/pci/devices/0000\:00\:00.5/net/); do
	ip link set dev $eth master br0
	ip link set dev $eth up
done
ip link set dev br0 type bridge vlan_filtering 1

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/ocelot/felix.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/dsa/ocelot/felix.c b/drivers/net/dsa/ocelot/felix.c
index 18c23ffd6b40..a6e272d2110d 100644
--- a/drivers/net/dsa/ocelot/felix.c
+++ b/drivers/net/dsa/ocelot/felix.c
@@ -594,6 +594,7 @@ static int felix_setup(struct dsa_switch *ds)
 				 ANA_FLOODING, tc);
 
 	ds->mtu_enforcement_ingress = true;
+	ds->configure_vlan_while_not_filtering = true;
 	/* It looks like the MAC/PCS interrupt register - PM0_IEVENT (0x8040)
 	 * isn't instantiated for the Felix PF.
 	 * In-band AN may take a few ms to complete, so we need to poll.
-- 
cgit v1.2.3-59-g8ed1b


From cb8aa9a3affb7d23b11b11fbed41e2feaabc4b0a Mon Sep 17 00:00:00 2001
From: Romain Bellan <romain.bellan@wifirst.fr>
Date: Mon, 4 May 2020 21:34:29 +0200
Subject: netfilter: ctnetlink: add kernel side filtering for dump

Conntrack dump does not support kernel side filtering (only get exists,
but it returns only one entry. And user has to give a full valid tuple)

It means that userspace has to implement filtering after receiving many
irrelevant entries, consuming resources (conntrack table is sometimes
very huge, much more than a routing table for example).

This patch adds filtering in kernel side. To achieve this goal, we:

 * Add a new CTA_FILTER netlink attributes, actually a flag list to
   parametize filtering
 * Convert some *nlattr_to_tuple() functions, to allow a partial parsing
   of CTA_TUPLE_ORIG and CTA_TUPLE_REPLY (so nf_conntrack_tuple it not
   fully set)

Filtering is now possible on:
 * IP SRC/DST values
 * Ports for TCP and UDP flows
 * IMCP(v6) codes types and IDs

Filtering is done as an "AND" operator. For example, when flags
PROTO_SRC_PORT, PROTO_NUM and IP_SRC are sets, only entries matching all
values are dumped.

Changes since v1:
  Set NLM_F_DUMP_FILTERED in nlm flags if entries are filtered

Changes since v2:
  Move several constants to nf_internals.h
  Move a fix on netlink values check in a separate patch
  Add a check on not-supported flags
  Return EOPNOTSUPP if CDA_FILTER is set in ctnetlink_flush_conntrack
  (not yet implemented)
  Code style issues

Changes since v3:
  Fix compilation warning reported by kbuild test robot

Changes since v4:
  Fix a regression introduced in v3 (returned EINVAL for valid netlink
  messages without CTA_MARK)

Changes since v5:
  Change definition of CTA_FILTER_F_ALL
  Fix a regression when CTA_TUPLE_ZONE is not set

Signed-off-by: Romain Bellan <romain.bellan@wifirst.fr>
Signed-off-by: Florent Fourcot <florent.fourcot@wifirst.fr>
Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_conntrack_l4proto.h       |   6 +-
 include/uapi/linux/netfilter/nfnetlink_conntrack.h |   9 +
 net/netfilter/nf_conntrack_core.c                  |  19 +-
 net/netfilter/nf_conntrack_netlink.c               | 334 ++++++++++++++++++---
 net/netfilter/nf_conntrack_proto_icmp.c            |  40 ++-
 net/netfilter/nf_conntrack_proto_icmpv6.c          |  42 ++-
 net/netfilter/nf_internals.h                       |  17 ++
 7 files changed, 394 insertions(+), 73 deletions(-)

diff --git a/include/net/netfilter/nf_conntrack_l4proto.h b/include/net/netfilter/nf_conntrack_l4proto.h
index 4cad1f0a327a..88186b95b3c2 100644
--- a/include/net/netfilter/nf_conntrack_l4proto.h
+++ b/include/net/netfilter/nf_conntrack_l4proto.h
@@ -42,7 +42,8 @@ struct nf_conntrack_l4proto {
 	/* Calculate tuple nlattr size */
 	unsigned int (*nlattr_tuple_size)(void);
 	int (*nlattr_to_tuple)(struct nlattr *tb[],
-			       struct nf_conntrack_tuple *t);
+			       struct nf_conntrack_tuple *t,
+			       u_int32_t flags);
 	const struct nla_policy *nla_policy;
 
 	struct {
@@ -152,7 +153,8 @@ const struct nf_conntrack_l4proto *nf_ct_l4proto_find(u8 l4proto);
 int nf_ct_port_tuple_to_nlattr(struct sk_buff *skb,
 			       const struct nf_conntrack_tuple *tuple);
 int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[],
-			       struct nf_conntrack_tuple *t);
+			       struct nf_conntrack_tuple *t,
+			       u_int32_t flags);
 unsigned int nf_ct_port_nlattr_tuple_size(void);
 extern const struct nla_policy nf_ct_port_nla_policy[];
 
diff --git a/include/uapi/linux/netfilter/nfnetlink_conntrack.h b/include/uapi/linux/netfilter/nfnetlink_conntrack.h
index 1d41810d17e2..262881792671 100644
--- a/include/uapi/linux/netfilter/nfnetlink_conntrack.h
+++ b/include/uapi/linux/netfilter/nfnetlink_conntrack.h
@@ -55,6 +55,7 @@ enum ctattr_type {
 	CTA_LABELS,
 	CTA_LABELS_MASK,
 	CTA_SYNPROXY,
+	CTA_FILTER,
 	__CTA_MAX
 };
 #define CTA_MAX (__CTA_MAX - 1)
@@ -276,4 +277,12 @@ enum ctattr_expect_stats {
 };
 #define CTA_STATS_EXP_MAX (__CTA_STATS_EXP_MAX - 1)
 
+enum ctattr_filter {
+	CTA_FILTER_UNSPEC,
+	CTA_FILTER_ORIG_FLAGS,
+	CTA_FILTER_REPLY_FLAGS,
+	__CTA_FILTER_MAX
+};
+#define CTA_FILTER_MAX (__CTA_FILTER_MAX - 1)
+
 #endif /* _IPCONNTRACK_NETLINK_H */
diff --git a/net/netfilter/nf_conntrack_core.c b/net/netfilter/nf_conntrack_core.c
index 1d57b95d3481..8abb1727bcc4 100644
--- a/net/netfilter/nf_conntrack_core.c
+++ b/net/netfilter/nf_conntrack_core.c
@@ -1974,13 +1974,22 @@ const struct nla_policy nf_ct_port_nla_policy[CTA_PROTO_MAX+1] = {
 EXPORT_SYMBOL_GPL(nf_ct_port_nla_policy);
 
 int nf_ct_port_nlattr_to_tuple(struct nlattr *tb[],
-			       struct nf_conntrack_tuple *t)
+			       struct nf_conntrack_tuple *t,
+			       u_int32_t flags)
 {
-	if (!tb[CTA_PROTO_SRC_PORT] || !tb[CTA_PROTO_DST_PORT])
-		return -EINVAL;
+	if (flags & CTA_FILTER_FLAG(CTA_PROTO_SRC_PORT)) {
+		if (!tb[CTA_PROTO_SRC_PORT])
+			return -EINVAL;
+
+		t->src.u.tcp.port = nla_get_be16(tb[CTA_PROTO_SRC_PORT]);
+	}
 
-	t->src.u.tcp.port = nla_get_be16(tb[CTA_PROTO_SRC_PORT]);
-	t->dst.u.tcp.port = nla_get_be16(tb[CTA_PROTO_DST_PORT]);
+	if (flags & CTA_FILTER_FLAG(CTA_PROTO_DST_PORT)) {
+		if (!tb[CTA_PROTO_DST_PORT])
+			return -EINVAL;
+
+		t->dst.u.tcp.port = nla_get_be16(tb[CTA_PROTO_DST_PORT]);
+	}
 
 	return 0;
 }
diff --git a/net/netfilter/nf_conntrack_netlink.c b/net/netfilter/nf_conntrack_netlink.c
index 9ddfcd002d3b..d7bd8b1f27d5 100644
--- a/net/netfilter/nf_conntrack_netlink.c
+++ b/net/netfilter/nf_conntrack_netlink.c
@@ -54,6 +54,8 @@
 #include <linux/netfilter/nfnetlink.h>
 #include <linux/netfilter/nfnetlink_conntrack.h>
 
+#include "nf_internals.h"
+
 MODULE_LICENSE("GPL");
 
 static int ctnetlink_dump_tuples_proto(struct sk_buff *skb,
@@ -544,14 +546,16 @@ static int ctnetlink_dump_info(struct sk_buff *skb, struct nf_conn *ct)
 
 static int
 ctnetlink_fill_info(struct sk_buff *skb, u32 portid, u32 seq, u32 type,
-		    struct nf_conn *ct, bool extinfo)
+		    struct nf_conn *ct, bool extinfo, unsigned int flags)
 {
 	const struct nf_conntrack_zone *zone;
 	struct nlmsghdr *nlh;
 	struct nfgenmsg *nfmsg;
 	struct nlattr *nest_parms;
-	unsigned int flags = portid ? NLM_F_MULTI : 0, event;
+	unsigned int event;
 
+	if (portid)
+		flags |= NLM_F_MULTI;
 	event = nfnl_msg_type(NFNL_SUBSYS_CTNETLINK, IPCTNL_MSG_CT_NEW);
 	nlh = nlmsg_put(skb, portid, seq, event, sizeof(*nfmsg), flags);
 	if (nlh == NULL)
@@ -847,17 +851,70 @@ static int ctnetlink_done(struct netlink_callback *cb)
 }
 
 struct ctnetlink_filter {
+	u_int32_t cta_flags;
 	u8 family;
+
+	u_int32_t orig_flags;
+	u_int32_t reply_flags;
+
+	struct nf_conntrack_tuple orig;
+	struct nf_conntrack_tuple reply;
+	struct nf_conntrack_zone zone;
+
 	struct {
 		u_int32_t val;
 		u_int32_t mask;
 	} mark;
 };
 
+static const struct nla_policy cta_filter_nla_policy[CTA_FILTER_MAX + 1] = {
+	[CTA_FILTER_ORIG_FLAGS]		= { .type = NLA_U32 },
+	[CTA_FILTER_REPLY_FLAGS]	= { .type = NLA_U32 },
+};
+
+static int ctnetlink_parse_filter(const struct nlattr *attr,
+				  struct ctnetlink_filter *filter)
+{
+	struct nlattr *tb[CTA_FILTER_MAX + 1];
+	int ret = 0;
+
+	ret = nla_parse_nested(tb, CTA_FILTER_MAX, attr, cta_filter_nla_policy,
+			       NULL);
+	if (ret)
+		return ret;
+
+	if (tb[CTA_FILTER_ORIG_FLAGS]) {
+		filter->orig_flags = nla_get_u32(tb[CTA_FILTER_ORIG_FLAGS]);
+		if (filter->orig_flags & ~CTA_FILTER_F_ALL)
+			return -EOPNOTSUPP;
+	}
+
+	if (tb[CTA_FILTER_REPLY_FLAGS]) {
+		filter->reply_flags = nla_get_u32(tb[CTA_FILTER_REPLY_FLAGS]);
+		if (filter->reply_flags & ~CTA_FILTER_F_ALL)
+			return -EOPNOTSUPP;
+	}
+
+	return 0;
+}
+
+static int ctnetlink_parse_zone(const struct nlattr *attr,
+				struct nf_conntrack_zone *zone);
+static int ctnetlink_parse_tuple_filter(const struct nlattr * const cda[],
+					 struct nf_conntrack_tuple *tuple,
+					 u32 type, u_int8_t l3num,
+					 struct nf_conntrack_zone *zone,
+					 u_int32_t flags);
+
+/* applied on filters */
+#define CTA_FILTER_F_CTA_MARK			(1 << 0)
+#define CTA_FILTER_F_CTA_MARK_MASK		(1 << 1)
+
 static struct ctnetlink_filter *
 ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family)
 {
 	struct ctnetlink_filter *filter;
+	int err;
 
 #ifndef CONFIG_NF_CONNTRACK_MARK
 	if (cda[CTA_MARK] || cda[CTA_MARK_MASK])
@@ -871,14 +928,65 @@ ctnetlink_alloc_filter(const struct nlattr * const cda[], u8 family)
 	filter->family = family;
 
 #ifdef CONFIG_NF_CONNTRACK_MARK
-	if (cda[CTA_MARK] && cda[CTA_MARK_MASK]) {
+	if (cda[CTA_MARK]) {
 		filter->mark.val = ntohl(nla_get_be32(cda[CTA_MARK]));
-		filter->mark.mask = ntohl(nla_get_be32(cda[CTA_MARK_MASK]));
+		filter->cta_flags |= CTA_FILTER_FLAG(CTA_MARK);
+
+		if (cda[CTA_MARK_MASK]) {
+			filter->mark.mask = ntohl(nla_get_be32(cda[CTA_MARK_MASK]));
+			filter->cta_flags |= CTA_FILTER_FLAG(CTA_MARK_MASK);
+		} else {
+			filter->mark.mask = 0xffffffff;
+		}
+	} else if (cda[CTA_MARK_MASK]) {
+		return ERR_PTR(-EINVAL);
 	}
 #endif
+	if (!cda[CTA_FILTER])
+		return filter;
+
+	err = ctnetlink_parse_zone(cda[CTA_ZONE], &filter->zone);
+	if (err < 0)
+		return ERR_PTR(err);
+
+	err = ctnetlink_parse_filter(cda[CTA_FILTER], filter);
+	if (err < 0)
+		return ERR_PTR(err);
+
+	if (filter->orig_flags) {
+		if (!cda[CTA_TUPLE_ORIG])
+			return ERR_PTR(-EINVAL);
+
+		err = ctnetlink_parse_tuple_filter(cda, &filter->orig,
+						   CTA_TUPLE_ORIG,
+						   filter->family,
+						   &filter->zone,
+						   filter->orig_flags);
+		if (err < 0)
+			return ERR_PTR(err);
+	}
+
+	if (filter->reply_flags) {
+		if (!cda[CTA_TUPLE_REPLY])
+			return ERR_PTR(-EINVAL);
+
+		err = ctnetlink_parse_tuple_filter(cda, &filter->reply,
+						   CTA_TUPLE_REPLY,
+						   filter->family,
+						   &filter->zone,
+						   filter->orig_flags);
+		if (err < 0)
+			return ERR_PTR(err);
+	}
+
 	return filter;
 }
 
+static bool ctnetlink_needs_filter(u8 family, const struct nlattr * const *cda)
+{
+	return family || cda[CTA_MARK] || cda[CTA_FILTER];
+}
+
 static int ctnetlink_start(struct netlink_callback *cb)
 {
 	const struct nlattr * const *cda = cb->data;
@@ -886,7 +994,7 @@ static int ctnetlink_start(struct netlink_callback *cb)
 	struct nfgenmsg *nfmsg = nlmsg_data(cb->nlh);
 	u8 family = nfmsg->nfgen_family;
 
-	if (family || (cda[CTA_MARK] && cda[CTA_MARK_MASK])) {
+	if (ctnetlink_needs_filter(family, cda)) {
 		filter = ctnetlink_alloc_filter(cda, family);
 		if (IS_ERR(filter))
 			return PTR_ERR(filter);
@@ -896,9 +1004,79 @@ static int ctnetlink_start(struct netlink_callback *cb)
 	return 0;
 }
 
+static int ctnetlink_filter_match_tuple(struct nf_conntrack_tuple *filter_tuple,
+					struct nf_conntrack_tuple *ct_tuple,
+					u_int32_t flags, int family)
+{
+	switch (family) {
+	case NFPROTO_IPV4:
+		if ((flags & CTA_FILTER_FLAG(CTA_IP_SRC)) &&
+		    filter_tuple->src.u3.ip != ct_tuple->src.u3.ip)
+			return  0;
+
+		if ((flags & CTA_FILTER_FLAG(CTA_IP_DST)) &&
+		    filter_tuple->dst.u3.ip != ct_tuple->dst.u3.ip)
+			return  0;
+		break;
+	case NFPROTO_IPV6:
+		if ((flags & CTA_FILTER_FLAG(CTA_IP_SRC)) &&
+		    !ipv6_addr_cmp(&filter_tuple->src.u3.in6,
+				   &ct_tuple->src.u3.in6))
+			return 0;
+
+		if ((flags & CTA_FILTER_FLAG(CTA_IP_DST)) &&
+		    !ipv6_addr_cmp(&filter_tuple->dst.u3.in6,
+				   &ct_tuple->dst.u3.in6))
+			return 0;
+		break;
+	}
+
+	if ((flags & CTA_FILTER_FLAG(CTA_PROTO_NUM)) &&
+	    filter_tuple->dst.protonum != ct_tuple->dst.protonum)
+		return 0;
+
+	switch (ct_tuple->dst.protonum) {
+	case IPPROTO_TCP:
+	case IPPROTO_UDP:
+		if ((flags & CTA_FILTER_FLAG(CTA_PROTO_SRC_PORT)) &&
+		    filter_tuple->src.u.tcp.port != ct_tuple->src.u.tcp.port)
+			return 0;
+
+		if ((flags & CTA_FILTER_FLAG(CTA_PROTO_DST_PORT)) &&
+		    filter_tuple->dst.u.tcp.port != ct_tuple->dst.u.tcp.port)
+			return 0;
+		break;
+	case IPPROTO_ICMP:
+		if ((flags & CTA_FILTER_FLAG(CTA_PROTO_ICMP_TYPE)) &&
+		    filter_tuple->dst.u.icmp.type != ct_tuple->dst.u.icmp.type)
+			return 0;
+		if ((flags & CTA_FILTER_FLAG(CTA_PROTO_ICMP_CODE)) &&
+		    filter_tuple->dst.u.icmp.code != ct_tuple->dst.u.icmp.code)
+			return 0;
+		if ((flags & CTA_FILTER_FLAG(CTA_PROTO_ICMP_ID)) &&
+		    filter_tuple->src.u.icmp.id != ct_tuple->src.u.icmp.id)
+			return 0;
+		break;
+	case IPPROTO_ICMPV6:
+		if ((flags & CTA_FILTER_FLAG(CTA_PROTO_ICMPV6_TYPE)) &&
+		    filter_tuple->dst.u.icmp.type != ct_tuple->dst.u.icmp.type)
+			return 0;
+		if ((flags & CTA_FILTER_FLAG(CTA_PROTO_ICMPV6_CODE)) &&
+		    filter_tuple->dst.u.icmp.code != ct_tuple->dst.u.icmp.code)
+			return 0;
+		if ((flags & CTA_FILTER_FLAG(CTA_PROTO_ICMPV6_ID)) &&
+		    filter_tuple->src.u.icmp.id != ct_tuple->src.u.icmp.id)
+			return 0;
+		break;
+	}
+
+	return 1;
+}
+
 static int ctnetlink_filter_match(struct nf_conn *ct, void *data)
 {
 	struct ctnetlink_filter *filter = data;
+	struct nf_conntrack_tuple *tuple;
 
 	if (filter == NULL)
 		goto out;
@@ -910,8 +1088,28 @@ static int ctnetlink_filter_match(struct nf_conn *ct, void *data)
 	if (filter->family && nf_ct_l3num(ct) != filter->family)
 		goto ignore_entry;
 
+	if (filter->orig_flags) {
+		tuple = nf_ct_tuple(ct, IP_CT_DIR_ORIGINAL);
+		if (!ctnetlink_filter_match_tuple(&filter->orig, tuple,
+						  filter->orig_flags,
+						  filter->family))
+			goto ignore_entry;
+	}
+
+	if (filter->reply_flags) {
+		tuple = nf_ct_tuple(ct, IP_CT_DIR_REPLY);
+		if (!ctnetlink_filter_match_tuple(&filter->reply, tuple,
+						  filter->reply_flags,
+						  filter->family))
+			goto ignore_entry;
+	}
+
 #ifdef CONFIG_NF_CONNTRACK_MARK
-	if ((ct->mark & filter->mark.mask) != filter->mark.val)
+	if ((filter->cta_flags & CTA_FILTER_FLAG(CTA_MARK_MASK)) &&
+	    (ct->mark & filter->mark.mask) != filter->mark.val)
+		goto ignore_entry;
+	else if ((filter->cta_flags & CTA_FILTER_FLAG(CTA_MARK)) &&
+		 ct->mark != filter->mark.val)
 		goto ignore_entry;
 #endif
 
@@ -925,6 +1123,7 @@ ignore_entry:
 static int
 ctnetlink_dump_table(struct sk_buff *skb, struct netlink_callback *cb)
 {
+	unsigned int flags = cb->data ? NLM_F_DUMP_FILTERED : 0;
 	struct net *net = sock_net(skb->sk);
 	struct nf_conn *ct, *last;
 	struct nf_conntrack_tuple_hash *h;
@@ -979,7 +1178,7 @@ restart:
 			ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid,
 					    cb->nlh->nlmsg_seq,
 					    NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
-					    ct, true);
+					    ct, true, flags);
 			if (res < 0) {
 				nf_conntrack_get(&ct->ct_general);
 				cb->args[1] = (unsigned long)ct;
@@ -1014,31 +1213,50 @@ out:
 }
 
 static int ipv4_nlattr_to_tuple(struct nlattr *tb[],
-				struct nf_conntrack_tuple *t)
+				struct nf_conntrack_tuple *t,
+				u_int32_t flags)
 {
-	if (!tb[CTA_IP_V4_SRC] || !tb[CTA_IP_V4_DST])
-		return -EINVAL;
+	if (flags & CTA_FILTER_FLAG(CTA_IP_SRC)) {
+		if (!tb[CTA_IP_V4_SRC])
+			return -EINVAL;
+
+		t->src.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_SRC]);
+	}
+
+	if (flags & CTA_FILTER_FLAG(CTA_IP_DST)) {
+		if (!tb[CTA_IP_V4_DST])
+			return -EINVAL;
 
-	t->src.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_SRC]);
-	t->dst.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_DST]);
+		t->dst.u3.ip = nla_get_in_addr(tb[CTA_IP_V4_DST]);
+	}
 
 	return 0;
 }
 
 static int ipv6_nlattr_to_tuple(struct nlattr *tb[],
-				struct nf_conntrack_tuple *t)
+				struct nf_conntrack_tuple *t,
+				u_int32_t flags)
 {
-	if (!tb[CTA_IP_V6_SRC] || !tb[CTA_IP_V6_DST])
-		return -EINVAL;
+	if (flags & CTA_FILTER_FLAG(CTA_IP_SRC)) {
+		if (!tb[CTA_IP_V6_SRC])
+			return -EINVAL;
 
-	t->src.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_SRC]);
-	t->dst.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_DST]);
+		t->src.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_SRC]);
+	}
+
+	if (flags & CTA_FILTER_FLAG(CTA_IP_DST)) {
+		if (!tb[CTA_IP_V6_DST])
+			return -EINVAL;
+
+		t->dst.u3.in6 = nla_get_in6_addr(tb[CTA_IP_V6_DST]);
+	}
 
 	return 0;
 }
 
 static int ctnetlink_parse_tuple_ip(struct nlattr *attr,
-				    struct nf_conntrack_tuple *tuple)
+				    struct nf_conntrack_tuple *tuple,
+				    u_int32_t flags)
 {
 	struct nlattr *tb[CTA_IP_MAX+1];
 	int ret = 0;
@@ -1054,10 +1272,10 @@ static int ctnetlink_parse_tuple_ip(struct nlattr *attr,
 
 	switch (tuple->src.l3num) {
 	case NFPROTO_IPV4:
-		ret = ipv4_nlattr_to_tuple(tb, tuple);
+		ret = ipv4_nlattr_to_tuple(tb, tuple, flags);
 		break;
 	case NFPROTO_IPV6:
-		ret = ipv6_nlattr_to_tuple(tb, tuple);
+		ret = ipv6_nlattr_to_tuple(tb, tuple, flags);
 		break;
 	}
 
@@ -1069,7 +1287,8 @@ static const struct nla_policy proto_nla_policy[CTA_PROTO_MAX+1] = {
 };
 
 static int ctnetlink_parse_tuple_proto(struct nlattr *attr,
-				       struct nf_conntrack_tuple *tuple)
+				       struct nf_conntrack_tuple *tuple,
+				       u_int32_t flags)
 {
 	const struct nf_conntrack_l4proto *l4proto;
 	struct nlattr *tb[CTA_PROTO_MAX+1];
@@ -1080,8 +1299,12 @@ static int ctnetlink_parse_tuple_proto(struct nlattr *attr,
 	if (ret < 0)
 		return ret;
 
+	if (!(flags & CTA_FILTER_FLAG(CTA_PROTO_NUM)))
+		return 0;
+
 	if (!tb[CTA_PROTO_NUM])
 		return -EINVAL;
+
 	tuple->dst.protonum = nla_get_u8(tb[CTA_PROTO_NUM]);
 
 	rcu_read_lock();
@@ -1092,7 +1315,7 @@ static int ctnetlink_parse_tuple_proto(struct nlattr *attr,
 						     l4proto->nla_policy,
 						     NULL);
 		if (ret == 0)
-			ret = l4proto->nlattr_to_tuple(tb, tuple);
+			ret = l4proto->nlattr_to_tuple(tb, tuple, flags);
 	}
 
 	rcu_read_unlock();
@@ -1143,10 +1366,21 @@ static const struct nla_policy tuple_nla_policy[CTA_TUPLE_MAX+1] = {
 	[CTA_TUPLE_ZONE]	= { .type = NLA_U16 },
 };
 
+#define CTA_FILTER_F_ALL_CTA_PROTO \
+  (CTA_FILTER_F_CTA_PROTO_SRC_PORT | \
+   CTA_FILTER_F_CTA_PROTO_DST_PORT | \
+   CTA_FILTER_F_CTA_PROTO_ICMP_TYPE | \
+   CTA_FILTER_F_CTA_PROTO_ICMP_CODE | \
+   CTA_FILTER_F_CTA_PROTO_ICMP_ID | \
+   CTA_FILTER_F_CTA_PROTO_ICMPV6_TYPE | \
+   CTA_FILTER_F_CTA_PROTO_ICMPV6_CODE | \
+   CTA_FILTER_F_CTA_PROTO_ICMPV6_ID)
+
 static int
-ctnetlink_parse_tuple(const struct nlattr * const cda[],
-		      struct nf_conntrack_tuple *tuple, u32 type,
-		      u_int8_t l3num, struct nf_conntrack_zone *zone)
+ctnetlink_parse_tuple_filter(const struct nlattr * const cda[],
+			      struct nf_conntrack_tuple *tuple, u32 type,
+			      u_int8_t l3num, struct nf_conntrack_zone *zone,
+			      u_int32_t flags)
 {
 	struct nlattr *tb[CTA_TUPLE_MAX+1];
 	int err;
@@ -1158,23 +1392,32 @@ ctnetlink_parse_tuple(const struct nlattr * const cda[],
 	if (err < 0)
 		return err;
 
-	if (!tb[CTA_TUPLE_IP])
-		return -EINVAL;
 
 	tuple->src.l3num = l3num;
 
-	err = ctnetlink_parse_tuple_ip(tb[CTA_TUPLE_IP], tuple);
-	if (err < 0)
-		return err;
+	if (flags & CTA_FILTER_FLAG(CTA_IP_DST) ||
+	    flags & CTA_FILTER_FLAG(CTA_IP_SRC)) {
+		if (!tb[CTA_TUPLE_IP])
+			return -EINVAL;
 
-	if (!tb[CTA_TUPLE_PROTO])
-		return -EINVAL;
+		err = ctnetlink_parse_tuple_ip(tb[CTA_TUPLE_IP], tuple, flags);
+		if (err < 0)
+			return err;
+	}
 
-	err = ctnetlink_parse_tuple_proto(tb[CTA_TUPLE_PROTO], tuple);
-	if (err < 0)
-		return err;
+	if (flags & CTA_FILTER_FLAG(CTA_PROTO_NUM)) {
+		if (!tb[CTA_TUPLE_PROTO])
+			return -EINVAL;
 
-	if (tb[CTA_TUPLE_ZONE]) {
+		err = ctnetlink_parse_tuple_proto(tb[CTA_TUPLE_PROTO], tuple, flags);
+		if (err < 0)
+			return err;
+	} else if (flags & CTA_FILTER_FLAG(ALL_CTA_PROTO)) {
+		/* Can't manage proto flags without a protonum  */
+		return -EINVAL;
+	}
+
+	if ((flags & CTA_FILTER_FLAG(CTA_TUPLE_ZONE)) && tb[CTA_TUPLE_ZONE]) {
 		if (!zone)
 			return -EINVAL;
 
@@ -1193,6 +1436,15 @@ ctnetlink_parse_tuple(const struct nlattr * const cda[],
 	return 0;
 }
 
+static int
+ctnetlink_parse_tuple(const struct nlattr * const cda[],
+		      struct nf_conntrack_tuple *tuple, u32 type,
+		      u_int8_t l3num, struct nf_conntrack_zone *zone)
+{
+	return ctnetlink_parse_tuple_filter(cda, tuple, type, l3num, zone,
+					    CTA_FILTER_FLAG(ALL));
+}
+
 static const struct nla_policy help_nla_policy[CTA_HELP_MAX+1] = {
 	[CTA_HELP_NAME]		= { .type = NLA_NUL_STRING,
 				    .len = NF_CT_HELPER_NAME_LEN - 1 },
@@ -1240,6 +1492,7 @@ static const struct nla_policy ct_nla_policy[CTA_MAX+1] = {
 				    .len = NF_CT_LABELS_MAX_SIZE },
 	[CTA_LABELS_MASK]	= { .type = NLA_BINARY,
 				    .len = NF_CT_LABELS_MAX_SIZE },
+	[CTA_FILTER]		= { .type = NLA_NESTED },
 };
 
 static int ctnetlink_flush_iterate(struct nf_conn *ct, void *data)
@@ -1256,7 +1509,10 @@ static int ctnetlink_flush_conntrack(struct net *net,
 {
 	struct ctnetlink_filter *filter = NULL;
 
-	if (family || (cda[CTA_MARK] && cda[CTA_MARK_MASK])) {
+	if (ctnetlink_needs_filter(family, cda)) {
+		if (cda[CTA_FILTER])
+			return -EOPNOTSUPP;
+
 		filter = ctnetlink_alloc_filter(cda, family);
 		if (IS_ERR(filter))
 			return PTR_ERR(filter);
@@ -1385,7 +1641,7 @@ static int ctnetlink_get_conntrack(struct net *net, struct sock *ctnl,
 	}
 
 	err = ctnetlink_fill_info(skb2, NETLINK_CB(skb).portid, nlh->nlmsg_seq,
-				  NFNL_MSG_TYPE(nlh->nlmsg_type), ct, true);
+				  NFNL_MSG_TYPE(nlh->nlmsg_type), ct, true, 0);
 	nf_ct_put(ct);
 	if (err <= 0)
 		goto free;
@@ -1458,7 +1714,7 @@ restart:
 			res = ctnetlink_fill_info(skb, NETLINK_CB(cb->skb).portid,
 						  cb->nlh->nlmsg_seq,
 						  NFNL_MSG_TYPE(cb->nlh->nlmsg_type),
-						  ct, dying ? true : false);
+						  ct, dying ? true : false, 0);
 			if (res < 0) {
 				if (!atomic_inc_not_zero(&ct->ct_general.use))
 					continue;
diff --git a/net/netfilter/nf_conntrack_proto_icmp.c b/net/netfilter/nf_conntrack_proto_icmp.c
index c2e3dff773bc..4efd8741c105 100644
--- a/net/netfilter/nf_conntrack_proto_icmp.c
+++ b/net/netfilter/nf_conntrack_proto_icmp.c
@@ -20,6 +20,8 @@
 #include <net/netfilter/nf_conntrack_zones.h>
 #include <net/netfilter/nf_log.h>
 
+#include "nf_internals.h"
+
 static const unsigned int nf_ct_icmp_timeout = 30*HZ;
 
 bool icmp_pkt_to_tuple(const struct sk_buff *skb, unsigned int dataoff,
@@ -271,20 +273,32 @@ static const struct nla_policy icmp_nla_policy[CTA_PROTO_MAX+1] = {
 };
 
 static int icmp_nlattr_to_tuple(struct nlattr *tb[],
-				struct nf_conntrack_tuple *tuple)
+				struct nf_conntrack_tuple *tuple,
+				u_int32_t flags)
 {
-	if (!tb[CTA_PROTO_ICMP_TYPE] ||
-	    !tb[CTA_PROTO_ICMP_CODE] ||
-	    !tb[CTA_PROTO_ICMP_ID])
-		return -EINVAL;
-
-	tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMP_TYPE]);
-	tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMP_CODE]);
-	tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMP_ID]);
-
-	if (tuple->dst.u.icmp.type >= sizeof(invmap) ||
-	    !invmap[tuple->dst.u.icmp.type])
-		return -EINVAL;
+	if (flags & CTA_FILTER_FLAG(CTA_PROTO_ICMP_TYPE)) {
+		if (!tb[CTA_PROTO_ICMP_TYPE])
+			return -EINVAL;
+
+		tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMP_TYPE]);
+		if (tuple->dst.u.icmp.type >= sizeof(invmap) ||
+		    !invmap[tuple->dst.u.icmp.type])
+			return -EINVAL;
+	}
+
+	if (flags & CTA_FILTER_FLAG(CTA_PROTO_ICMP_CODE)) {
+		if (!tb[CTA_PROTO_ICMP_CODE])
+			return -EINVAL;
+
+		tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMP_CODE]);
+	}
+
+	if (flags & CTA_FILTER_FLAG(CTA_PROTO_ICMP_ID)) {
+		if (!tb[CTA_PROTO_ICMP_ID])
+			return -EINVAL;
+
+		tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMP_ID]);
+	}
 
 	return 0;
 }
diff --git a/net/netfilter/nf_conntrack_proto_icmpv6.c b/net/netfilter/nf_conntrack_proto_icmpv6.c
index 6f9144e1f1c1..facd8c64ec4e 100644
--- a/net/netfilter/nf_conntrack_proto_icmpv6.c
+++ b/net/netfilter/nf_conntrack_proto_icmpv6.c
@@ -24,6 +24,8 @@
 #include <net/netfilter/nf_conntrack_zones.h>
 #include <net/netfilter/nf_log.h>
 
+#include "nf_internals.h"
+
 static const unsigned int nf_ct_icmpv6_timeout = 30*HZ;
 
 bool icmpv6_pkt_to_tuple(const struct sk_buff *skb,
@@ -193,21 +195,33 @@ static const struct nla_policy icmpv6_nla_policy[CTA_PROTO_MAX+1] = {
 };
 
 static int icmpv6_nlattr_to_tuple(struct nlattr *tb[],
-				struct nf_conntrack_tuple *tuple)
+				struct nf_conntrack_tuple *tuple,
+				u_int32_t flags)
 {
-	if (!tb[CTA_PROTO_ICMPV6_TYPE] ||
-	    !tb[CTA_PROTO_ICMPV6_CODE] ||
-	    !tb[CTA_PROTO_ICMPV6_ID])
-		return -EINVAL;
-
-	tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMPV6_TYPE]);
-	tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMPV6_CODE]);
-	tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMPV6_ID]);
-
-	if (tuple->dst.u.icmp.type < 128 ||
-	    tuple->dst.u.icmp.type - 128 >= sizeof(invmap) ||
-	    !invmap[tuple->dst.u.icmp.type - 128])
-		return -EINVAL;
+	if (flags & CTA_FILTER_FLAG(CTA_PROTO_ICMPV6_TYPE)) {
+		if (!tb[CTA_PROTO_ICMPV6_TYPE])
+			return -EINVAL;
+
+		tuple->dst.u.icmp.type = nla_get_u8(tb[CTA_PROTO_ICMPV6_TYPE]);
+		if (tuple->dst.u.icmp.type < 128 ||
+		    tuple->dst.u.icmp.type - 128 >= sizeof(invmap) ||
+		    !invmap[tuple->dst.u.icmp.type - 128])
+			return -EINVAL;
+	}
+
+	if (flags & CTA_FILTER_FLAG(CTA_PROTO_ICMPV6_CODE)) {
+		if (!tb[CTA_PROTO_ICMPV6_CODE])
+			return -EINVAL;
+
+		tuple->dst.u.icmp.code = nla_get_u8(tb[CTA_PROTO_ICMPV6_CODE]);
+	}
+
+	if (flags & CTA_FILTER_FLAG(CTA_PROTO_ICMPV6_ID)) {
+		if (!tb[CTA_PROTO_ICMPV6_ID])
+			return -EINVAL;
+
+		tuple->src.u.icmp.id = nla_get_be16(tb[CTA_PROTO_ICMPV6_ID]);
+	}
 
 	return 0;
 }
diff --git a/net/netfilter/nf_internals.h b/net/netfilter/nf_internals.h
index d6c43902ebd7..832ae64179f0 100644
--- a/net/netfilter/nf_internals.h
+++ b/net/netfilter/nf_internals.h
@@ -6,6 +6,23 @@
 #include <linux/skbuff.h>
 #include <linux/netdevice.h>
 
+/* nf_conntrack_netlink.c: applied on tuple filters */
+#define CTA_FILTER_F_CTA_IP_SRC			(1 << 0)
+#define CTA_FILTER_F_CTA_IP_DST			(1 << 1)
+#define CTA_FILTER_F_CTA_TUPLE_ZONE		(1 << 2)
+#define CTA_FILTER_F_CTA_PROTO_NUM		(1 << 3)
+#define CTA_FILTER_F_CTA_PROTO_SRC_PORT		(1 << 4)
+#define CTA_FILTER_F_CTA_PROTO_DST_PORT		(1 << 5)
+#define CTA_FILTER_F_CTA_PROTO_ICMP_TYPE	(1 << 6)
+#define CTA_FILTER_F_CTA_PROTO_ICMP_CODE	(1 << 7)
+#define CTA_FILTER_F_CTA_PROTO_ICMP_ID		(1 << 8)
+#define CTA_FILTER_F_CTA_PROTO_ICMPV6_TYPE	(1 << 9)
+#define CTA_FILTER_F_CTA_PROTO_ICMPV6_CODE	(1 << 10)
+#define CTA_FILTER_F_CTA_PROTO_ICMPV6_ID	(1 << 11)
+#define CTA_FILTER_F_MAX			(1 << 12)
+#define CTA_FILTER_F_ALL			(CTA_FILTER_F_MAX-1)
+#define CTA_FILTER_FLAG(ctattr) CTA_FILTER_F_ ## ctattr
+
 /* nf_queue.c */
 void nf_queue_nf_hook_drop(struct net *net);
 
-- 
cgit v1.2.3-59-g8ed1b


From d9246a53752fdb777ed176d5f091c4ac0e482bba Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 20 May 2020 13:42:44 +0200
Subject: netfilter: nf_tables: generalise flowtable hook parsing

Update nft_flowtable_parse_hook() to take the flowtable hook list as
parameter. This allows to reuse this function to update the hooks.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 36 +++++++++++++++++++++++++-----------
 1 file changed, 25 insertions(+), 11 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 3558e76e2733..87945b4a6789 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -6178,21 +6178,30 @@ nft_flowtable_lookup_byhandle(const struct nft_table *table,
        return ERR_PTR(-ENOENT);
 }
 
+struct nft_flowtable_hook {
+	u32			num;
+	int			priority;
+	struct list_head	list;
+};
+
 static const struct nla_policy nft_flowtable_hook_policy[NFTA_FLOWTABLE_HOOK_MAX + 1] = {
 	[NFTA_FLOWTABLE_HOOK_NUM]	= { .type = NLA_U32 },
 	[NFTA_FLOWTABLE_HOOK_PRIORITY]	= { .type = NLA_U32 },
 	[NFTA_FLOWTABLE_HOOK_DEVS]	= { .type = NLA_NESTED },
 };
 
-static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx,
-					  const struct nlattr *attr,
-					  struct nft_flowtable *flowtable)
+static int nft_flowtable_parse_hook(const struct nft_ctx *ctx,
+				    const struct nlattr *attr,
+				    struct nft_flowtable_hook *flowtable_hook,
+				    struct nf_flowtable *ft)
 {
 	struct nlattr *tb[NFTA_FLOWTABLE_HOOK_MAX + 1];
 	struct nft_hook *hook;
 	int hooknum, priority;
 	int err;
 
+	INIT_LIST_HEAD(&flowtable_hook->list);
+
 	err = nla_parse_nested_deprecated(tb, NFTA_FLOWTABLE_HOOK_MAX, attr,
 					  nft_flowtable_hook_policy, NULL);
 	if (err < 0)
@@ -6211,19 +6220,19 @@ static int nf_tables_flowtable_parse_hook(const struct nft_ctx *ctx,
 
 	err = nf_tables_parse_netdev_hooks(ctx->net,
 					   tb[NFTA_FLOWTABLE_HOOK_DEVS],
-					   &flowtable->hook_list);
+					   &flowtable_hook->list);
 	if (err < 0)
 		return err;
 
-	flowtable->hooknum		= hooknum;
-	flowtable->data.priority	= priority;
+	flowtable_hook->priority	= priority;
+	flowtable_hook->num		= hooknum;
 
-	list_for_each_entry(hook, &flowtable->hook_list, list) {
+	list_for_each_entry(hook, &flowtable_hook->list, list) {
 		hook->ops.pf		= NFPROTO_NETDEV;
 		hook->ops.hooknum	= hooknum;
 		hook->ops.priority	= priority;
-		hook->ops.priv		= &flowtable->data;
-		hook->ops.hook		= flowtable->data.type->hook;
+		hook->ops.priv		= ft;
+		hook->ops.hook		= ft->type->hook;
 	}
 
 	return err;
@@ -6336,6 +6345,7 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
 				  struct netlink_ext_ack *extack)
 {
 	const struct nfgenmsg *nfmsg = nlmsg_data(nlh);
+	struct nft_flowtable_hook flowtable_hook;
 	const struct nf_flowtable_type *type;
 	u8 genmask = nft_genmask_next(net);
 	int family = nfmsg->nfgen_family;
@@ -6409,11 +6419,15 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
 	if (err < 0)
 		goto err3;
 
-	err = nf_tables_flowtable_parse_hook(&ctx, nla[NFTA_FLOWTABLE_HOOK],
-					     flowtable);
+	err = nft_flowtable_parse_hook(&ctx, nla[NFTA_FLOWTABLE_HOOK],
+				       &flowtable_hook, &flowtable->data);
 	if (err < 0)
 		goto err4;
 
+	list_splice(&flowtable_hook.list, &flowtable->hook_list);
+	flowtable->data.priority = flowtable_hook.priority;
+	flowtable->hooknum = flowtable_hook.num;
+
 	err = nft_register_flowtable_net_hooks(ctx.net, table, flowtable);
 	if (err < 0) {
 		list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) {
-- 
cgit v1.2.3-59-g8ed1b


From f9382669cf5e75ebc7636bd78e637facf27d53f7 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Tue, 19 May 2020 01:00:07 +0200
Subject: netfilter: nf_tables: pass hook list to
 nft_{un,}register_flowtable_net_hooks()

This patch prepares for incremental flowtable hook updates.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 17 ++++++++++-------
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 87945b4a6789..1505552aaa74 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -6279,23 +6279,24 @@ static void nft_unregister_flowtable_hook(struct net *net,
 }
 
 static void nft_unregister_flowtable_net_hooks(struct net *net,
-					       struct nft_flowtable *flowtable)
+					       struct list_head *hook_list)
 {
 	struct nft_hook *hook;
 
-	list_for_each_entry(hook, &flowtable->hook_list, list)
+	list_for_each_entry(hook, hook_list, list)
 		nf_unregister_net_hook(net, &hook->ops);
 }
 
 static int nft_register_flowtable_net_hooks(struct net *net,
 					    struct nft_table *table,
+					    struct list_head *hook_list,
 					    struct nft_flowtable *flowtable)
 {
 	struct nft_hook *hook, *hook2, *next;
 	struct nft_flowtable *ft;
 	int err, i = 0;
 
-	list_for_each_entry(hook, &flowtable->hook_list, list) {
+	list_for_each_entry(hook, hook_list, list) {
 		list_for_each_entry(ft, &table->flowtables, list) {
 			list_for_each_entry(hook2, &ft->hook_list, list) {
 				if (hook->ops.dev == hook2->ops.dev &&
@@ -6326,7 +6327,7 @@ static int nft_register_flowtable_net_hooks(struct net *net,
 	return 0;
 
 err_unregister_net_hooks:
-	list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) {
+	list_for_each_entry_safe(hook, next, hook_list, list) {
 		if (i-- <= 0)
 			break;
 
@@ -6428,7 +6429,9 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
 	flowtable->data.priority = flowtable_hook.priority;
 	flowtable->hooknum = flowtable_hook.num;
 
-	err = nft_register_flowtable_net_hooks(ctx.net, table, flowtable);
+	err = nft_register_flowtable_net_hooks(ctx.net, table,
+					       &flowtable->hook_list,
+					       flowtable);
 	if (err < 0) {
 		list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) {
 			list_del_rcu(&hook->list);
@@ -7493,7 +7496,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 						   nft_trans_flowtable(trans),
 						   NFT_MSG_DELFLOWTABLE);
 			nft_unregister_flowtable_net_hooks(net,
-					nft_trans_flowtable(trans));
+					&nft_trans_flowtable(trans)->hook_list);
 			break;
 		}
 	}
@@ -7652,7 +7655,7 @@ static int __nf_tables_abort(struct net *net, bool autoload)
 			trans->ctx.table->use--;
 			list_del_rcu(&nft_trans_flowtable(trans)->list);
 			nft_unregister_flowtable_net_hooks(net,
-					nft_trans_flowtable(trans));
+					&nft_trans_flowtable(trans)->hook_list);
 			break;
 		case NFT_MSG_DELFLOWTABLE:
 			trans->ctx.table->use++;
-- 
cgit v1.2.3-59-g8ed1b


From 389a2cbcb7f15e2af9babdc0c63cec318537e7ed Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 20 May 2020 13:43:43 +0200
Subject: netfilter: nf_tables: add nft_flowtable_hooks_destroy()

This patch adds a helper function destroy the flowtable hooks.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 15 +++++++++++----
 1 file changed, 11 insertions(+), 4 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 1505552aaa74..f5d100787ccb 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -6339,6 +6339,16 @@ err_unregister_net_hooks:
 	return err;
 }
 
+static void nft_flowtable_hooks_destroy(struct list_head *hook_list)
+{
+	struct nft_hook *hook, *next;
+
+	list_for_each_entry_safe(hook, next, hook_list, list) {
+		list_del_rcu(&hook->list);
+		kfree_rcu(hook, rcu);
+	}
+}
+
 static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
 				  struct sk_buff *skb,
 				  const struct nlmsghdr *nlh,
@@ -6433,10 +6443,7 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
 					       &flowtable->hook_list,
 					       flowtable);
 	if (err < 0) {
-		list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) {
-			list_del_rcu(&hook->list);
-			kfree_rcu(hook, rcu);
-		}
+		nft_flowtable_hooks_destroy(&flowtable->hook_list);
 		goto err4;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From c42d8bda69e291c5551497a02db71b50d95510d4 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 20 May 2020 13:44:18 +0200
Subject: netfilter: nf_tables: pass hook list to flowtable event notifier

Update the flowtable netlink notifier to take the list of hooks as input.
This allows to reuse this function in incremental flowtable hook updates.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index f5d100787ccb..4db70e68d7f4 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -6523,7 +6523,8 @@ static int nf_tables_delflowtable(struct net *net, struct sock *nlsk,
 static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net,
 					 u32 portid, u32 seq, int event,
 					 u32 flags, int family,
-					 struct nft_flowtable *flowtable)
+					 struct nft_flowtable *flowtable,
+					 struct list_head *hook_list)
 {
 	struct nlattr *nest, *nest_devs;
 	struct nfgenmsg *nfmsg;
@@ -6559,7 +6560,7 @@ static int nf_tables_fill_flowtable_info(struct sk_buff *skb, struct net *net,
 	if (!nest_devs)
 		goto nla_put_failure;
 
-	list_for_each_entry_rcu(hook, &flowtable->hook_list, list) {
+	list_for_each_entry_rcu(hook, hook_list, list) {
 		if (nla_put_string(skb, NFTA_DEVICE_NAME, hook->ops.dev->name))
 			goto nla_put_failure;
 	}
@@ -6612,7 +6613,9 @@ static int nf_tables_dump_flowtable(struct sk_buff *skb,
 							  cb->nlh->nlmsg_seq,
 							  NFT_MSG_NEWFLOWTABLE,
 							  NLM_F_MULTI | NLM_F_APPEND,
-							  table->family, flowtable) < 0)
+							  table->family,
+							  flowtable,
+							  &flowtable->hook_list) < 0)
 				goto done;
 
 			nl_dump_check_consistent(cb, nlmsg_hdr(skb));
@@ -6709,7 +6712,7 @@ static int nf_tables_getflowtable(struct net *net, struct sock *nlsk,
 	err = nf_tables_fill_flowtable_info(skb2, net, NETLINK_CB(skb).portid,
 					    nlh->nlmsg_seq,
 					    NFT_MSG_NEWFLOWTABLE, 0, family,
-					    flowtable);
+					    flowtable, &flowtable->hook_list);
 	if (err < 0)
 		goto err;
 
@@ -6721,6 +6724,7 @@ err:
 
 static void nf_tables_flowtable_notify(struct nft_ctx *ctx,
 				       struct nft_flowtable *flowtable,
+				       struct list_head *hook_list,
 				       int event)
 {
 	struct sk_buff *skb;
@@ -6736,7 +6740,7 @@ static void nf_tables_flowtable_notify(struct nft_ctx *ctx,
 
 	err = nf_tables_fill_flowtable_info(skb, ctx->net, ctx->portid,
 					    ctx->seq, event, 0,
-					    ctx->family, flowtable);
+					    ctx->family, flowtable, hook_list);
 	if (err < 0) {
 		kfree_skb(skb);
 		goto err;
@@ -7494,6 +7498,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 			nft_clear(net, nft_trans_flowtable(trans));
 			nf_tables_flowtable_notify(&trans->ctx,
 						   nft_trans_flowtable(trans),
+						   &nft_trans_flowtable(trans)->hook_list,
 						   NFT_MSG_NEWFLOWTABLE);
 			nft_trans_destroy(trans);
 			break;
@@ -7501,6 +7506,7 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 			list_del_rcu(&nft_trans_flowtable(trans)->list);
 			nf_tables_flowtable_notify(&trans->ctx,
 						   nft_trans_flowtable(trans),
+						   &nft_trans_flowtable(trans)->hook_list,
 						   NFT_MSG_DELFLOWTABLE);
 			nft_unregister_flowtable_net_hooks(net,
 					&nft_trans_flowtable(trans)->hook_list);
-- 
cgit v1.2.3-59-g8ed1b


From 78d9f48f7f44431a25da2b46b3a8812f6ff2b981 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 20 May 2020 13:46:47 +0200
Subject: netfilter: nf_tables: add devices to existing flowtable

This patch allows users to add devices to an existing flowtable.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h |  6 +++
 net/netfilter/nf_tables_api.c     | 97 ++++++++++++++++++++++++++++++++++-----
 2 files changed, 92 insertions(+), 11 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index d4e29c952c40..4f58c4411bb4 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1481,10 +1481,16 @@ struct nft_trans_obj {
 
 struct nft_trans_flowtable {
 	struct nft_flowtable		*flowtable;
+	bool				update;
+	struct list_head		hook_list;
 };
 
 #define nft_trans_flowtable(trans)	\
 	(((struct nft_trans_flowtable *)trans->data)->flowtable)
+#define nft_trans_flowtable_update(trans)	\
+	(((struct nft_trans_flowtable *)trans->data)->update)
+#define nft_trans_flowtable_hooks(trans)	\
+	(((struct nft_trans_flowtable *)trans->data)->hook_list)
 
 int __init nft_chain_filter_init(void);
 void nft_chain_filter_fini(void);
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 4db70e68d7f4..98f2cbb97e39 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -6349,6 +6349,62 @@ static void nft_flowtable_hooks_destroy(struct list_head *hook_list)
 	}
 }
 
+static int nft_flowtable_update(struct nft_ctx *ctx, const struct nlmsghdr *nlh,
+				struct nft_flowtable *flowtable)
+{
+	const struct nlattr * const *nla = ctx->nla;
+	struct nft_flowtable_hook flowtable_hook;
+	struct nft_hook *hook, *next;
+	struct nft_trans *trans;
+	bool unregister = false;
+	int err;
+
+	err = nft_flowtable_parse_hook(ctx, nla[NFTA_FLOWTABLE_HOOK],
+				       &flowtable_hook, &flowtable->data);
+	if (err < 0)
+		return err;
+
+	list_for_each_entry_safe(hook, next, &flowtable_hook.list, list) {
+		if (nft_hook_list_find(&flowtable->hook_list, hook)) {
+			list_del(&hook->list);
+			kfree(hook);
+		}
+	}
+
+	err = nft_register_flowtable_net_hooks(ctx->net, ctx->table,
+					       &flowtable_hook.list, flowtable);
+	if (err < 0)
+		goto err_flowtable_update_hook;
+
+	trans = nft_trans_alloc(ctx, NFT_MSG_NEWFLOWTABLE,
+				sizeof(struct nft_trans_flowtable));
+	if (!trans) {
+		unregister = true;
+		err = -ENOMEM;
+		goto err_flowtable_update_hook;
+	}
+
+	nft_trans_flowtable(trans) = flowtable;
+	nft_trans_flowtable_update(trans) = true;
+	INIT_LIST_HEAD(&nft_trans_flowtable_hooks(trans));
+	list_splice(&flowtable_hook.list, &nft_trans_flowtable_hooks(trans));
+
+	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+
+	return 0;
+
+err_flowtable_update_hook:
+	list_for_each_entry_safe(hook, next, &flowtable_hook.list, list) {
+		if (unregister)
+			nft_unregister_flowtable_hook(ctx->net, flowtable, hook);
+		list_del_rcu(&hook->list);
+		kfree_rcu(hook, rcu);
+	}
+
+	return err;
+
+}
+
 static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
 				  struct sk_buff *skb,
 				  const struct nlmsghdr *nlh,
@@ -6392,7 +6448,9 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
 			return -EEXIST;
 		}
 
-		return 0;
+		nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
+
+		return nft_flowtable_update(&ctx, nlh, flowtable);
 	}
 
 	nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
@@ -7495,11 +7553,20 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 					     NFT_MSG_DELOBJ);
 			break;
 		case NFT_MSG_NEWFLOWTABLE:
-			nft_clear(net, nft_trans_flowtable(trans));
-			nf_tables_flowtable_notify(&trans->ctx,
-						   nft_trans_flowtable(trans),
-						   &nft_trans_flowtable(trans)->hook_list,
-						   NFT_MSG_NEWFLOWTABLE);
+			if (nft_trans_flowtable_update(trans)) {
+				nf_tables_flowtable_notify(&trans->ctx,
+							   nft_trans_flowtable(trans),
+							   &nft_trans_flowtable_hooks(trans),
+							   NFT_MSG_NEWFLOWTABLE);
+				list_splice(&nft_trans_flowtable_hooks(trans),
+					    &nft_trans_flowtable(trans)->hook_list);
+			} else {
+				nft_clear(net, nft_trans_flowtable(trans));
+				nf_tables_flowtable_notify(&trans->ctx,
+							   nft_trans_flowtable(trans),
+							   &nft_trans_flowtable(trans)->hook_list,
+							   NFT_MSG_NEWFLOWTABLE);
+			}
 			nft_trans_destroy(trans);
 			break;
 		case NFT_MSG_DELFLOWTABLE:
@@ -7558,7 +7625,10 @@ static void nf_tables_abort_release(struct nft_trans *trans)
 		nft_obj_destroy(&trans->ctx, nft_trans_obj(trans));
 		break;
 	case NFT_MSG_NEWFLOWTABLE:
-		nf_tables_flowtable_destroy(nft_trans_flowtable(trans));
+		if (nft_trans_flowtable_update(trans))
+			nft_flowtable_hooks_destroy(&nft_trans_flowtable_hooks(trans));
+		else
+			nf_tables_flowtable_destroy(nft_trans_flowtable(trans));
 		break;
 	}
 	kfree(trans);
@@ -7665,10 +7735,15 @@ static int __nf_tables_abort(struct net *net, bool autoload)
 			nft_trans_destroy(trans);
 			break;
 		case NFT_MSG_NEWFLOWTABLE:
-			trans->ctx.table->use--;
-			list_del_rcu(&nft_trans_flowtable(trans)->list);
-			nft_unregister_flowtable_net_hooks(net,
-					&nft_trans_flowtable(trans)->hook_list);
+			if (nft_trans_flowtable_update(trans)) {
+				nft_unregister_flowtable_net_hooks(net,
+						&nft_trans_flowtable_hooks(trans));
+			} else {
+				trans->ctx.table->use--;
+				list_del_rcu(&nft_trans_flowtable(trans)->list);
+				nft_unregister_flowtable_net_hooks(net,
+						&nft_trans_flowtable(trans)->hook_list);
+			}
 			break;
 		case NFT_MSG_DELFLOWTABLE:
 			trans->ctx.table->use++;
-- 
cgit v1.2.3-59-g8ed1b


From abadb2f865d72a223d691fc68e006943ecadf0d9 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 20 May 2020 13:46:51 +0200
Subject: netfilter: nf_tables: delete devices from flowtable

This patch allows users to delete devices from existing flowtables.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 include/net/netfilter/nf_tables.h |   1 +
 net/netfilter/nf_tables_api.c     | 113 ++++++++++++++++++++++++++++++++------
 2 files changed, 98 insertions(+), 16 deletions(-)

diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h
index 4f58c4411bb4..6f0f6fca9ac3 100644
--- a/include/net/netfilter/nf_tables.h
+++ b/include/net/netfilter/nf_tables.h
@@ -1002,6 +1002,7 @@ struct nft_stats {
 
 struct nft_hook {
 	struct list_head	list;
+	bool			inactive;
 	struct nf_hook_ops	ops;
 	struct rcu_head		rcu;
 };
diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 98f2cbb97e39..1c2c3bb78fa0 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1669,6 +1669,7 @@ static struct nft_hook *nft_netdev_hook_alloc(struct net *net,
 		goto err_hook_dev;
 	}
 	hook->ops.dev = dev;
+	hook->inactive = false;
 
 	return hook;
 
@@ -1678,17 +1679,17 @@ err_hook_alloc:
 	return ERR_PTR(err);
 }
 
-static bool nft_hook_list_find(struct list_head *hook_list,
-			       const struct nft_hook *this)
+static struct nft_hook *nft_hook_list_find(struct list_head *hook_list,
+					   const struct nft_hook *this)
 {
 	struct nft_hook *hook;
 
 	list_for_each_entry(hook, hook_list, list) {
 		if (this->ops.dev == hook->ops.dev)
-			return true;
+			return hook;
 	}
 
-	return false;
+	return NULL;
 }
 
 static int nf_tables_parse_netdev_hooks(struct net *net,
@@ -6530,6 +6531,51 @@ err1:
 	return err;
 }
 
+static int nft_delflowtable_hook(struct nft_ctx *ctx,
+				 struct nft_flowtable *flowtable)
+{
+	const struct nlattr * const *nla = ctx->nla;
+	struct nft_flowtable_hook flowtable_hook;
+	struct nft_hook *this, *next, *hook;
+	struct nft_trans *trans;
+	int err;
+
+	err = nft_flowtable_parse_hook(ctx, nla[NFTA_FLOWTABLE_HOOK],
+				       &flowtable_hook, &flowtable->data);
+	if (err < 0)
+		return err;
+
+	list_for_each_entry_safe(this, next, &flowtable_hook.list, list) {
+		hook = nft_hook_list_find(&flowtable->hook_list, this);
+		if (!hook) {
+			err = -ENOENT;
+			goto err_flowtable_del_hook;
+		}
+		hook->inactive = true;
+		list_del(&this->list);
+		kfree(this);
+	}
+
+	trans = nft_trans_alloc(ctx, NFT_MSG_DELFLOWTABLE,
+				sizeof(struct nft_trans_flowtable));
+	if (!trans)
+		return -ENOMEM;
+
+	nft_trans_flowtable(trans) = flowtable;
+	nft_trans_flowtable_update(trans) = true;
+	INIT_LIST_HEAD(&nft_trans_flowtable_hooks(trans));
+
+	list_add_tail(&trans->list, &ctx->net->nft.commit_list);
+
+	return 0;
+
+err_flowtable_del_hook:
+	list_for_each_entry(hook, &flowtable_hook.list, list)
+		hook->inactive = false;
+
+	return err;
+}
+
 static int nf_tables_delflowtable(struct net *net, struct sock *nlsk,
 				  struct sk_buff *skb,
 				  const struct nlmsghdr *nlh,
@@ -6568,13 +6614,17 @@ static int nf_tables_delflowtable(struct net *net, struct sock *nlsk,
 		NL_SET_BAD_ATTR(extack, attr);
 		return PTR_ERR(flowtable);
 	}
+
+	nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
+
+	if (nla[NFTA_FLOWTABLE_HOOK])
+		return nft_delflowtable_hook(&ctx, flowtable);
+
 	if (flowtable->use > 0) {
 		NL_SET_BAD_ATTR(extack, attr);
 		return -EBUSY;
 	}
 
-	nft_ctx_init(&ctx, net, skb, nlh, family, table, NULL, nla);
-
 	return nft_delflowtable(&ctx, flowtable);
 }
 
@@ -7184,7 +7234,10 @@ static void nft_commit_release(struct nft_trans *trans)
 		nft_obj_destroy(&trans->ctx, nft_trans_obj(trans));
 		break;
 	case NFT_MSG_DELFLOWTABLE:
-		nf_tables_flowtable_destroy(nft_trans_flowtable(trans));
+		if (nft_trans_flowtable_update(trans))
+			nft_flowtable_hooks_destroy(&nft_trans_flowtable_hooks(trans));
+		else
+			nf_tables_flowtable_destroy(nft_trans_flowtable(trans));
 		break;
 	}
 
@@ -7345,6 +7398,17 @@ static void nft_chain_del(struct nft_chain *chain)
 	list_del_rcu(&chain->list);
 }
 
+static void nft_flowtable_hooks_del(struct nft_flowtable *flowtable,
+				    struct list_head *hook_list)
+{
+	struct nft_hook *hook, *next;
+
+	list_for_each_entry_safe(hook, next, &flowtable->hook_list, list) {
+		if (hook->inactive)
+			list_move(&hook->list, hook_list);
+	}
+}
+
 static void nf_tables_module_autoload_cleanup(struct net *net)
 {
 	struct nft_module_request *req, *next;
@@ -7570,13 +7634,24 @@ static int nf_tables_commit(struct net *net, struct sk_buff *skb)
 			nft_trans_destroy(trans);
 			break;
 		case NFT_MSG_DELFLOWTABLE:
-			list_del_rcu(&nft_trans_flowtable(trans)->list);
-			nf_tables_flowtable_notify(&trans->ctx,
-						   nft_trans_flowtable(trans),
-						   &nft_trans_flowtable(trans)->hook_list,
-						   NFT_MSG_DELFLOWTABLE);
-			nft_unregister_flowtable_net_hooks(net,
-					&nft_trans_flowtable(trans)->hook_list);
+			if (nft_trans_flowtable_update(trans)) {
+				nft_flowtable_hooks_del(nft_trans_flowtable(trans),
+							&nft_trans_flowtable_hooks(trans));
+				nf_tables_flowtable_notify(&trans->ctx,
+							   nft_trans_flowtable(trans),
+							   &nft_trans_flowtable_hooks(trans),
+							   NFT_MSG_DELFLOWTABLE);
+				nft_unregister_flowtable_net_hooks(net,
+								   &nft_trans_flowtable_hooks(trans));
+			} else {
+				list_del_rcu(&nft_trans_flowtable(trans)->list);
+				nf_tables_flowtable_notify(&trans->ctx,
+							   nft_trans_flowtable(trans),
+							   &nft_trans_flowtable(trans)->hook_list,
+							   NFT_MSG_DELFLOWTABLE);
+				nft_unregister_flowtable_net_hooks(net,
+						&nft_trans_flowtable(trans)->hook_list);
+			}
 			break;
 		}
 	}
@@ -7638,6 +7713,7 @@ static int __nf_tables_abort(struct net *net, bool autoload)
 {
 	struct nft_trans *trans, *next;
 	struct nft_trans_elem *te;
+	struct nft_hook *hook;
 
 	list_for_each_entry_safe_reverse(trans, next, &net->nft.commit_list,
 					 list) {
@@ -7746,8 +7822,13 @@ static int __nf_tables_abort(struct net *net, bool autoload)
 			}
 			break;
 		case NFT_MSG_DELFLOWTABLE:
-			trans->ctx.table->use++;
-			nft_clear(trans->ctx.net, nft_trans_flowtable(trans));
+			if (nft_trans_flowtable_update(trans)) {
+				list_for_each_entry(hook, &nft_trans_flowtable(trans)->hook_list, list)
+					hook->inactive = false;
+			} else {
+				trans->ctx.table->use++;
+				nft_clear(trans->ctx.net, nft_trans_flowtable(trans));
+			}
 			nft_trans_destroy(trans);
 			break;
 		}
-- 
cgit v1.2.3-59-g8ed1b


From 05abe4456fa376040f6cc3cc6830d2e328723478 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Wed, 20 May 2020 15:44:37 +0200
Subject: netfilter: nf_tables: allow to register flowtable with no devices

A flowtable might be composed of dynamic interfaces only. Such dynamic
interfaces might show up at a later stage. This patch allows users to
register a flowtable with no devices. Once the dynamic interface becomes
available, the user adds the dynamic devices to the flowtable.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 20 +++++++++++---------
 1 file changed, 11 insertions(+), 9 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 1c2c3bb78fa0..897ac5fbe079 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -1724,8 +1724,6 @@ static int nf_tables_parse_netdev_hooks(struct net *net,
 			goto err_hook;
 		}
 	}
-	if (!n)
-		return -EINVAL;
 
 	return 0;
 
@@ -1762,6 +1760,9 @@ static int nft_chain_parse_netdev(struct net *net,
 						   hook_list);
 		if (err < 0)
 			return err;
+
+		if (list_empty(hook_list))
+			return -EINVAL;
 	} else {
 		return -EINVAL;
 	}
@@ -6209,8 +6210,7 @@ static int nft_flowtable_parse_hook(const struct nft_ctx *ctx,
 		return err;
 
 	if (!tb[NFTA_FLOWTABLE_HOOK_NUM] ||
-	    !tb[NFTA_FLOWTABLE_HOOK_PRIORITY] ||
-	    !tb[NFTA_FLOWTABLE_HOOK_DEVS])
+	    !tb[NFTA_FLOWTABLE_HOOK_PRIORITY])
 		return -EINVAL;
 
 	hooknum = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_NUM]));
@@ -6219,11 +6219,13 @@ static int nft_flowtable_parse_hook(const struct nft_ctx *ctx,
 
 	priority = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_PRIORITY]));
 
-	err = nf_tables_parse_netdev_hooks(ctx->net,
-					   tb[NFTA_FLOWTABLE_HOOK_DEVS],
-					   &flowtable_hook->list);
-	if (err < 0)
-		return err;
+	if (tb[NFTA_FLOWTABLE_HOOK_DEVS]) {
+		err = nf_tables_parse_netdev_hooks(ctx->net,
+						   tb[NFTA_FLOWTABLE_HOOK_DEVS],
+						   &flowtable_hook->list);
+		if (err < 0)
+			return err;
+	}
 
 	flowtable_hook->priority	= priority;
 	flowtable_hook->num		= hooknum;
-- 
cgit v1.2.3-59-g8ed1b


From 5b6743fb2c2a1fcb31c8b227558f537095dbece4 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Sat, 23 May 2020 12:05:22 +0200
Subject: netfilter: nf_tables: skip flowtable hooknum and priority on device
 updates

On device updates, the hooknum and priority attributes are not required.
This patch makes optional these two netlink attributes.

Moreover, bail out with EOPNOTSUPP if userspace tries to update the
hooknum and priority for existing flowtables.

While at this, turn EINVAL into EOPNOTSUPP in case the hooknum is not
ingress. EINVAL is reserved for missing netlink attribute / malformed
netlink messages.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
---
 net/netfilter/nf_tables_api.c | 53 ++++++++++++++++++++++++++++---------------
 1 file changed, 35 insertions(+), 18 deletions(-)

diff --git a/net/netfilter/nf_tables_api.c b/net/netfilter/nf_tables_api.c
index 897ac5fbe079..073aa1051d43 100644
--- a/net/netfilter/nf_tables_api.c
+++ b/net/netfilter/nf_tables_api.c
@@ -6195,7 +6195,7 @@ static const struct nla_policy nft_flowtable_hook_policy[NFTA_FLOWTABLE_HOOK_MAX
 static int nft_flowtable_parse_hook(const struct nft_ctx *ctx,
 				    const struct nlattr *attr,
 				    struct nft_flowtable_hook *flowtable_hook,
-				    struct nf_flowtable *ft)
+				    struct nft_flowtable *flowtable, bool add)
 {
 	struct nlattr *tb[NFTA_FLOWTABLE_HOOK_MAX + 1];
 	struct nft_hook *hook;
@@ -6209,15 +6209,35 @@ static int nft_flowtable_parse_hook(const struct nft_ctx *ctx,
 	if (err < 0)
 		return err;
 
-	if (!tb[NFTA_FLOWTABLE_HOOK_NUM] ||
-	    !tb[NFTA_FLOWTABLE_HOOK_PRIORITY])
-		return -EINVAL;
+	if (add) {
+		if (!tb[NFTA_FLOWTABLE_HOOK_NUM] ||
+		    !tb[NFTA_FLOWTABLE_HOOK_PRIORITY])
+			return -EINVAL;
 
-	hooknum = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_NUM]));
-	if (hooknum != NF_NETDEV_INGRESS)
-		return -EINVAL;
+		hooknum = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_NUM]));
+		if (hooknum != NF_NETDEV_INGRESS)
+			return -EOPNOTSUPP;
+
+		priority = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_PRIORITY]));
+
+		flowtable_hook->priority	= priority;
+		flowtable_hook->num		= hooknum;
+	} else {
+		if (tb[NFTA_FLOWTABLE_HOOK_NUM]) {
+			hooknum = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_NUM]));
+			if (hooknum != flowtable->hooknum)
+				return -EOPNOTSUPP;
+		}
+
+		if (tb[NFTA_FLOWTABLE_HOOK_PRIORITY]) {
+			priority = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_PRIORITY]));
+			if (priority != flowtable->data.priority)
+				return -EOPNOTSUPP;
+		}
 
-	priority = ntohl(nla_get_be32(tb[NFTA_FLOWTABLE_HOOK_PRIORITY]));
+		flowtable_hook->priority	= flowtable->data.priority;
+		flowtable_hook->num		= flowtable->hooknum;
+	}
 
 	if (tb[NFTA_FLOWTABLE_HOOK_DEVS]) {
 		err = nf_tables_parse_netdev_hooks(ctx->net,
@@ -6227,15 +6247,12 @@ static int nft_flowtable_parse_hook(const struct nft_ctx *ctx,
 			return err;
 	}
 
-	flowtable_hook->priority	= priority;
-	flowtable_hook->num		= hooknum;
-
 	list_for_each_entry(hook, &flowtable_hook->list, list) {
 		hook->ops.pf		= NFPROTO_NETDEV;
-		hook->ops.hooknum	= hooknum;
-		hook->ops.priority	= priority;
-		hook->ops.priv		= ft;
-		hook->ops.hook		= ft->type->hook;
+		hook->ops.hooknum	= flowtable_hook->num;
+		hook->ops.priority	= flowtable_hook->priority;
+		hook->ops.priv		= &flowtable->data;
+		hook->ops.hook		= flowtable->data.type->hook;
 	}
 
 	return err;
@@ -6363,7 +6380,7 @@ static int nft_flowtable_update(struct nft_ctx *ctx, const struct nlmsghdr *nlh,
 	int err;
 
 	err = nft_flowtable_parse_hook(ctx, nla[NFTA_FLOWTABLE_HOOK],
-				       &flowtable_hook, &flowtable->data);
+				       &flowtable_hook, flowtable, false);
 	if (err < 0)
 		return err;
 
@@ -6492,7 +6509,7 @@ static int nf_tables_newflowtable(struct net *net, struct sock *nlsk,
 		goto err3;
 
 	err = nft_flowtable_parse_hook(&ctx, nla[NFTA_FLOWTABLE_HOOK],
-				       &flowtable_hook, &flowtable->data);
+				       &flowtable_hook, flowtable, true);
 	if (err < 0)
 		goto err4;
 
@@ -6543,7 +6560,7 @@ static int nft_delflowtable_hook(struct nft_ctx *ctx,
 	int err;
 
 	err = nft_flowtable_parse_hook(ctx, nla[NFTA_FLOWTABLE_HOOK],
-				       &flowtable_hook, &flowtable->data);
+				       &flowtable_hook, flowtable, false);
 	if (err < 0)
 		return err;
 
-- 
cgit v1.2.3-59-g8ed1b


From 7ff4f3f315db361e35c1d61a6fdbfddbe345b633 Mon Sep 17 00:00:00 2001
From: Antoine Tenart <antoine.tenart@bootlin.com>
Date: Tue, 26 May 2020 17:01:48 +0200
Subject: net: mscc: use the PHY MII ioctl interface when possible

Allow ioctl to be implemented by the PHY, when a PHY is attached to the
Ocelot switch. In case the ioctl is a request to set or get the hardware
timestamp, use the Ocelot switch implementation for now.

Signed-off-by: Antoine Tenart <antoine.tenart@bootlin.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mscc/ocelot.c | 20 +++++++++-----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index e621c4c3ee86..2151c08a57c7 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -1204,18 +1204,16 @@ static int ocelot_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	struct ocelot *ocelot = priv->port.ocelot;
 	int port = priv->chip_port;
 
-	/* The function is only used for PTP operations for now */
-	if (!ocelot->ptp)
-		return -EOPNOTSUPP;
-
-	switch (cmd) {
-	case SIOCSHWTSTAMP:
-		return ocelot_hwstamp_set(ocelot, port, ifr);
-	case SIOCGHWTSTAMP:
-		return ocelot_hwstamp_get(ocelot, port, ifr);
-	default:
-		return -EOPNOTSUPP;
+	if (ocelot->ptp) {
+		switch (cmd) {
+		case SIOCSHWTSTAMP:
+			return ocelot_hwstamp_set(ocelot, port, ifr);
+		case SIOCGHWTSTAMP:
+			return ocelot_hwstamp_get(ocelot, port, ifr);
+		}
 	}
+
+	return phy_mii_ioctl(dev->phydev, ifr, cmd);
 }
 
 static const struct net_device_ops ocelot_port_netdev_ops = {
-- 
cgit v1.2.3-59-g8ed1b


From b2e118f638fb8984e430624a8cf27483cc23cf8d Mon Sep 17 00:00:00 2001
From: Antoine Tenart <antoine.tenart@bootlin.com>
Date: Tue, 26 May 2020 17:01:49 +0200
Subject: net: mscc: allow offloading timestamping operations to the PHY

This patch adds support for offloading timestamping operations not only
to the Ocelot switch (as already supported) but to compatible PHYs.
When both the PHY and the Ocelot switch support timestamping operations,
the PHY implementation is chosen as the timestamp will happen closer to
the medium.

Signed-off-by: Antoine Tenart <antoine.tenart@bootlin.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mscc/ocelot.c       | 5 ++++-
 drivers/net/ethernet/mscc/ocelot_board.c | 3 ++-
 2 files changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mscc/ocelot.c b/drivers/net/ethernet/mscc/ocelot.c
index 2151c08a57c7..9cfe1fd98c30 100644
--- a/drivers/net/ethernet/mscc/ocelot.c
+++ b/drivers/net/ethernet/mscc/ocelot.c
@@ -1204,7 +1204,10 @@ static int ocelot_ioctl(struct net_device *dev, struct ifreq *ifr, int cmd)
 	struct ocelot *ocelot = priv->port.ocelot;
 	int port = priv->chip_port;
 
-	if (ocelot->ptp) {
+	/* If the attached PHY device isn't capable of timestamping operations,
+	 * use our own (when possible).
+	 */
+	if (!phy_has_hwtstamp(dev->phydev) && ocelot->ptp) {
 		switch (cmd) {
 		case SIOCSHWTSTAMP:
 			return ocelot_hwstamp_set(ocelot, port, ifr);
diff --git a/drivers/net/ethernet/mscc/ocelot_board.c b/drivers/net/ethernet/mscc/ocelot_board.c
index 67a8d61c926a..4a15d2ff8b70 100644
--- a/drivers/net/ethernet/mscc/ocelot_board.c
+++ b/drivers/net/ethernet/mscc/ocelot_board.c
@@ -189,7 +189,8 @@ static irqreturn_t ocelot_xtr_irq_handler(int irq, void *arg)
 			skb->offload_fwd_mark = 1;
 
 		skb->protocol = eth_type_trans(skb, dev);
-		netif_rx(skb);
+		if (!skb_defer_rx_timestamp(skb))
+			netif_rx(skb);
 		dev->stats.rx_bytes += len;
 		dev->stats.rx_packets++;
 	} while (ocelot_read(ocelot, QS_XTR_DATA_PRESENT) & BIT(grp));
-- 
cgit v1.2.3-59-g8ed1b


From 4cd5beaa89a95b331e5586d55469f5569faa18f6 Mon Sep 17 00:00:00 2001
From: Guangbin Huang <huangguangbin2@huawei.com>
Date: Wed, 27 May 2020 08:59:14 +0800
Subject: net: hns3: add a resetting check in
 hclgevf_init_nic_client_instance()

To prevent from initializing VF NIC client in reset handling state,
this patch adds resetting check in hclgevf_init_nic_client_instance().

Signed-off-by: Guangbin Huang <huangguangbin2@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 9 +++++++++
 1 file changed, 9 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 32341dcaa6c1..59fcb80671c8 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -2713,6 +2713,7 @@ static int hclgevf_init_nic_client_instance(struct hnae3_ae_dev *ae_dev,
 					    struct hnae3_client *client)
 {
 	struct hclgevf_dev *hdev = ae_dev->priv;
+	int rst_cnt = hdev->rst_stats.rst_cnt;
 	int ret;
 
 	ret = client->ops->init_instance(&hdev->nic);
@@ -2720,6 +2721,14 @@ static int hclgevf_init_nic_client_instance(struct hnae3_ae_dev *ae_dev,
 		return ret;
 
 	set_bit(HCLGEVF_STATE_NIC_REGISTERED, &hdev->state);
+	if (test_bit(HCLGEVF_STATE_RST_HANDLING, &hdev->state) ||
+	    rst_cnt != hdev->rst_stats.rst_cnt) {
+		clear_bit(HCLGEVF_STATE_NIC_REGISTERED, &hdev->state);
+
+		client->ops->uninit_instance(&hdev->nic, 0);
+		return -EBUSY;
+	}
+
 	hnae3_set_client_init_flag(client, ae_dev, 1);
 
 	if (netif_msg_drv(&hdev->nic))
-- 
cgit v1.2.3-59-g8ed1b


From 60c800c64d8fdd2fc5b8c62686fb08c6a6fb1045 Mon Sep 17 00:00:00 2001
From: Yufeng Mo <moyufeng@huawei.com>
Date: Wed, 27 May 2020 08:59:15 +0800
Subject: net: hns3: change the order of reinitializing RoCE and NIC client
 during reset

The HNS RDMA driver will support VF device later, whose
re-initialization should be done after PF's. This patch
changes the order of hclge_reset_prepare_up() and
hclge_notify_roce_client(), so that PF's RoCE client
will be reinitialized before VF's.

Signed-off-by: Yufeng Mo <moyufeng@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index b796d3fb5b0b..6e1e2cf385c8 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -3770,11 +3770,6 @@ static int hclge_reset_rebuild(struct hclge_dev *hdev)
 
 	hclge_clear_reset_cause(hdev);
 
-	ret = hclge_reset_prepare_up(hdev);
-	if (ret)
-		return ret;
-
-
 	ret = hclge_notify_roce_client(hdev, HNAE3_INIT_CLIENT);
 	/* ignore RoCE notify error if it fails HCLGE_RESET_MAX_FAIL_CNT - 1
 	 * times
@@ -3783,6 +3778,10 @@ static int hclge_reset_rebuild(struct hclge_dev *hdev)
 	    hdev->rst_stats.reset_fail_cnt < HCLGE_RESET_MAX_FAIL_CNT - 1)
 		return ret;
 
+	ret = hclge_reset_prepare_up(hdev);
+	if (ret)
+		return ret;
+
 	rtnl_lock();
 	ret = hclge_notify_client(hdev, HNAE3_UP_CLIENT);
 	rtnl_unlock();
-- 
cgit v1.2.3-59-g8ed1b


From 01952206e17ee34b5fe32f211619ac59ec043990 Mon Sep 17 00:00:00 2001
From: Yufeng Mo <moyufeng@huawei.com>
Date: Wed, 27 May 2020 08:59:16 +0800
Subject: net: hns3: remove unnecessary MAC enable in app loopback

Packets will not pass through MAC during app loopback.
Therefore, it is meaningless to enable MAC while doing
app loopback. This patch removes this unnecessary action.

Signed-off-by: Yufeng Mo <moyufeng@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 6e1e2cf385c8..7c9f2ba1f272 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -6583,8 +6583,6 @@ static int hclge_set_app_loopback(struct hclge_dev *hdev, bool en)
 	/* 2 Then setup the loopback flag */
 	loop_en = le32_to_cpu(req->txrx_pad_fcs_loop_en);
 	hnae3_set_bit(loop_en, HCLGE_MAC_APP_LP_B, en ? 1 : 0);
-	hnae3_set_bit(loop_en, HCLGE_MAC_TX_EN_B, en ? 1 : 0);
-	hnae3_set_bit(loop_en, HCLGE_MAC_RX_EN_B, en ? 1 : 0);
 
 	req->txrx_pad_fcs_loop_en = cpu_to_le32(loop_en);
 
-- 
cgit v1.2.3-59-g8ed1b


From 6f45a9bdd2aee6cc1e4223fdc9e4f548a3f54595 Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Wed, 27 May 2020 08:59:17 +0800
Subject: net: hns3: add a print for initializing CMDQ when reset pending

When initializing CMDQ fails because of reset pending,
there is no hint for debugging, so adds a log for it.

Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
index 7f509eff562e..64a1d0bdd7d1 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
@@ -426,6 +426,9 @@ int hclge_cmd_init(struct hclge_dev *hdev)
 	 * reset may happen when lower level reset is being processed.
 	 */
 	if ((hclge_is_reset_pending(hdev))) {
+		dev_err(&hdev->pdev->dev,
+			"failed to init cmd since reset %#lx pending\n",
+			hdev->reset_pending);
 		ret = -EBUSY;
 		goto err_cmd_init;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From f745664257b62f6ba29f45fd21fbe7193b4da57b Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 26 May 2020 19:48:49 -0700
Subject: tcp: add tcp_ld_RTO_revert() helper

RFC 6069 logic has been implemented for IPv4 only so far,
right in the middle of tcp_v4_err() and was error prone.

Move this code to one helper, to make tcp_v4_err() more
readable and to eventually expand RFC 6069 to IPv6 in
the future.

Also perform sock_owned_by_user() check a bit sooner.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Tested-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 85 ++++++++++++++++++++++++++++-------------------------
 1 file changed, 45 insertions(+), 40 deletions(-)

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 6789671f0f5a..f32dcadc91b7 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -403,6 +403,45 @@ void tcp_req_err(struct sock *sk, u32 seq, bool abort)
 }
 EXPORT_SYMBOL(tcp_req_err);
 
+/* TCP-LD (RFC 6069) logic */
+static void tcp_ld_RTO_revert(struct sock *sk, u32 seq)
+{
+	struct inet_connection_sock *icsk = inet_csk(sk);
+	struct tcp_sock *tp = tcp_sk(sk);
+	struct sk_buff *skb;
+	s32 remaining;
+	u32 delta_us;
+
+	if (sock_owned_by_user(sk))
+		return;
+
+	if (seq != tp->snd_una  || !icsk->icsk_retransmits ||
+	    !icsk->icsk_backoff)
+		return;
+
+	skb = tcp_rtx_queue_head(sk);
+	if (WARN_ON_ONCE(!skb))
+		return;
+
+	icsk->icsk_backoff--;
+	icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) : TCP_TIMEOUT_INIT;
+	icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
+
+	tcp_mstamp_refresh(tp);
+	delta_us = (u32)(tp->tcp_mstamp - tcp_skb_timestamp_us(skb));
+	remaining = icsk->icsk_rto - usecs_to_jiffies(delta_us);
+
+	if (remaining > 0) {
+		inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
+					  remaining, TCP_RTO_MAX);
+	} else {
+		/* RTO revert clocked out retransmission.
+		 * Will retransmit now.
+		 */
+		tcp_retransmit_timer(sk);
+	}
+}
+
 /*
  * This routine is called by the ICMP module when it gets some
  * sort of error condition.  If err < 0 then the socket should
@@ -423,17 +462,13 @@ int tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 {
 	const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
 	struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
-	struct inet_connection_sock *icsk;
 	struct tcp_sock *tp;
 	struct inet_sock *inet;
 	const int type = icmp_hdr(icmp_skb)->type;
 	const int code = icmp_hdr(icmp_skb)->code;
 	struct sock *sk;
-	struct sk_buff *skb;
 	struct request_sock *fastopen;
 	u32 seq, snd_una;
-	s32 remaining;
-	u32 delta_us;
 	int err;
 	struct net *net = dev_net(icmp_skb->dev);
 
@@ -476,7 +511,6 @@ int tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 		goto out;
 	}
 
-	icsk = inet_csk(sk);
 	tp = tcp_sk(sk);
 	/* XXX (TFO) - tp->snd_una should be ISN (tcp_create_openreq_child() */
 	fastopen = rcu_dereference(tp->fastopen_rsk);
@@ -521,41 +555,12 @@ int tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 		}
 
 		err = icmp_err_convert[code].errno;
-		/* check if icmp_skb allows revert of backoff
-		 * (see draft-zimmermann-tcp-lcd) */
-		if (code != ICMP_NET_UNREACH && code != ICMP_HOST_UNREACH)
-			break;
-		if (seq != tp->snd_una  || !icsk->icsk_retransmits ||
-		    !icsk->icsk_backoff || fastopen)
-			break;
-
-		if (sock_owned_by_user(sk))
-			break;
-
-		skb = tcp_rtx_queue_head(sk);
-		if (WARN_ON_ONCE(!skb))
-			break;
-
-		icsk->icsk_backoff--;
-		icsk->icsk_rto = tp->srtt_us ? __tcp_set_rto(tp) :
-					       TCP_TIMEOUT_INIT;
-		icsk->icsk_rto = inet_csk_rto_backoff(icsk, TCP_RTO_MAX);
-
-
-		tcp_mstamp_refresh(tp);
-		delta_us = (u32)(tp->tcp_mstamp - tcp_skb_timestamp_us(skb));
-		remaining = icsk->icsk_rto -
-			    usecs_to_jiffies(delta_us);
-
-		if (remaining > 0) {
-			inet_csk_reset_xmit_timer(sk, ICSK_TIME_RETRANS,
-						  remaining, TCP_RTO_MAX);
-		} else {
-			/* RTO revert clocked out retransmission.
-			 * Will retransmit now */
-			tcp_retransmit_timer(sk);
-		}
-
+		/* check if this ICMP message allows revert of backoff.
+		 * (see RFC 6069)
+		 */
+		if (!fastopen &&
+		    (code == ICMP_NET_UNREACH || code == ICMP_HOST_UNREACH))
+			tcp_ld_RTO_revert(sk, seq);
 		break;
 	case ICMP_TIME_EXCEEDED:
 		err = EHOSTUNREACH;
-- 
cgit v1.2.3-59-g8ed1b


From a12daf13a4492bb7fb26231e52afb381927f938e Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Tue, 26 May 2020 19:48:50 -0700
Subject: tcp: rename tcp_v4_err() skb parameter

This essentially reverts 4d1a2d9ec1c1 ("Revert Backoff [v3]:
Rename skb to icmp_skb in tcp_v4_err()")

Now we have tcp_ld_RTO_revert() helper, we can use the usual
name for sk_buff parameter, so that tcp_v4_err() and
tcp_v6_err() use similar names.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/tcp_ipv4.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index f32dcadc91b7..4eef5b84fff1 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -458,23 +458,23 @@ static void tcp_ld_RTO_revert(struct sock *sk, u32 seq)
  *
  */
 
-int tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
+int tcp_v4_err(struct sk_buff *skb, u32 info)
 {
-	const struct iphdr *iph = (const struct iphdr *)icmp_skb->data;
-	struct tcphdr *th = (struct tcphdr *)(icmp_skb->data + (iph->ihl << 2));
+	const struct iphdr *iph = (const struct iphdr *)skb->data;
+	struct tcphdr *th = (struct tcphdr *)(skb->data + (iph->ihl << 2));
 	struct tcp_sock *tp;
 	struct inet_sock *inet;
-	const int type = icmp_hdr(icmp_skb)->type;
-	const int code = icmp_hdr(icmp_skb)->code;
+	const int type = icmp_hdr(skb)->type;
+	const int code = icmp_hdr(skb)->code;
 	struct sock *sk;
 	struct request_sock *fastopen;
 	u32 seq, snd_una;
 	int err;
-	struct net *net = dev_net(icmp_skb->dev);
+	struct net *net = dev_net(skb->dev);
 
 	sk = __inet_lookup_established(net, &tcp_hashinfo, iph->daddr,
 				       th->dest, iph->saddr, ntohs(th->source),
-				       inet_iif(icmp_skb), 0);
+				       inet_iif(skb), 0);
 	if (!sk) {
 		__ICMP_INC_STATS(net, ICMP_MIB_INERRORS);
 		return -ENOENT;
@@ -524,7 +524,7 @@ int tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 	switch (type) {
 	case ICMP_REDIRECT:
 		if (!sock_owned_by_user(sk))
-			do_redirect(icmp_skb, sk);
+			do_redirect(skb, sk);
 		goto out;
 	case ICMP_SOURCE_QUENCH:
 		/* Just silently ignore these. */
@@ -578,7 +578,7 @@ int tcp_v4_err(struct sk_buff *icmp_skb, u32 info)
 		if (fastopen && !fastopen->sk)
 			break;
 
-		ip_icmp_error(sk, icmp_skb, err, th->dest, info, (u8 *)th);
+		ip_icmp_error(sk, skb, err, th->dest, info, (u8 *)th);
 
 		if (!sock_owned_by_user(sk)) {
 			sk->sk_err = err;
-- 
cgit v1.2.3-59-g8ed1b


From 8fa54b1160721e5d94a039cd736aa0c63044d58a Mon Sep 17 00:00:00 2001
From: Wang Wenhu <wenhu.wang@vivo.com>
Date: Tue, 26 May 2020 20:19:24 -0700
Subject: drivers: ipa: fix typoes for ipa

Change "transactio" -> "transaction". Also an alignment correction.

Signed-off-by: Wang Wenhu <wenhu.wang@vivo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/gsi.c          | 2 +-
 drivers/net/ipa/ipa_endpoint.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ipa/gsi.c b/drivers/net/ipa/gsi.c
index 012304ddaed2..55226b264e3c 100644
--- a/drivers/net/ipa/gsi.c
+++ b/drivers/net/ipa/gsi.c
@@ -1358,7 +1358,7 @@ static void gsi_channel_update(struct gsi_channel *channel)
  * gsi_channel_poll_one() - Return a single completed transaction on a channel
  * @channel:	Channel to be polled
  *
- * @Return:	 Transaction pointer, or null if none are available
+ * @Return:	Transaction pointer, or null if none are available
  *
  * This function returns the first entry on a channel's completed transaction
  * list.  If that list is empty, the hardware is consulted to determine
diff --git a/drivers/net/ipa/ipa_endpoint.c b/drivers/net/ipa/ipa_endpoint.c
index 82066a223a67..66649a806dd1 100644
--- a/drivers/net/ipa/ipa_endpoint.c
+++ b/drivers/net/ipa/ipa_endpoint.c
@@ -364,7 +364,7 @@ int ipa_endpoint_modem_exception_reset_all(struct ipa *ipa)
 	/* We need one command per modem TX endpoint.  We can get an upper
 	 * bound on that by assuming all initialized endpoints are modem->IPA.
 	 * That won't happen, and we could be more precise, but this is fine
-	 * for now.  We need to end the transactio with a "tag process."
+	 * for now.  We need to end the transaction with a "tag process."
 	 */
 	count = hweight32(initialized) + ipa_cmd_tag_process_count();
 	trans = ipa_cmd_trans_alloc(ipa, count);
-- 
cgit v1.2.3-59-g8ed1b


From b3037ac50130a169c3980e63f8df2f0b599db411 Mon Sep 17 00:00:00 2001
From: Wang Wenhu <wenhu.wang@vivo.com>
Date: Tue, 26 May 2020 20:32:22 -0700
Subject: drivers: ipa: remove discription of nonexistent element

No element named "client" exists within "struct ipa_endpoint".
It might be a heritage forgotten to be removed. Delete it now.

Signed-off-by: Wang Wenhu <wenhu.wang@vivo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ipa/ipa_endpoint.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ipa/ipa_endpoint.h b/drivers/net/ipa/ipa_endpoint.h
index 3b297d65828e..58a245de488e 100644
--- a/drivers/net/ipa/ipa_endpoint.h
+++ b/drivers/net/ipa/ipa_endpoint.h
@@ -41,7 +41,6 @@ enum ipa_endpoint_name {
 
 /**
  * struct ipa_endpoint - IPA endpoint information
- * @client:	Client associated with the endpoint
  * @channel_id:	EP's GSI channel
  * @evt_ring_id: EP's GSI channel event ring
  */
-- 
cgit v1.2.3-59-g8ed1b


From 4909daba37846317ec7dcba16fba009636f7fe21 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Tue, 26 May 2020 21:35:23 -0700
Subject: net_sched: use qdisc_reset() in qdisc_destroy()

qdisc_destroy() calls ops->reset() and cleans up qdisc->gso_skb
and qdisc->skb_bad_txq, these are nearly same with qdisc_reset(),
so just call it directly, and cosolidate the code for the next
patch.

Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_generic.c | 16 +++-------------
 1 file changed, 3 insertions(+), 13 deletions(-)

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index ebc55d884247..7a0b06001e48 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -949,7 +949,6 @@ static void qdisc_free_cb(struct rcu_head *head)
 static void qdisc_destroy(struct Qdisc *qdisc)
 {
 	const struct Qdisc_ops  *ops = qdisc->ops;
-	struct sk_buff *skb, *tmp;
 
 #ifdef CONFIG_NET_SCHED
 	qdisc_hash_del(qdisc);
@@ -957,24 +956,15 @@ static void qdisc_destroy(struct Qdisc *qdisc)
 	qdisc_put_stab(rtnl_dereference(qdisc->stab));
 #endif
 	gen_kill_estimator(&qdisc->rate_est);
-	if (ops->reset)
-		ops->reset(qdisc);
+
+	qdisc_reset(qdisc);
+
 	if (ops->destroy)
 		ops->destroy(qdisc);
 
 	module_put(ops->owner);
 	dev_put(qdisc_dev(qdisc));
 
-	skb_queue_walk_safe(&qdisc->gso_skb, skb, tmp) {
-		__skb_unlink(skb, &qdisc->gso_skb);
-		kfree_skb_list(skb);
-	}
-
-	skb_queue_walk_safe(&qdisc->skb_bad_txq, skb, tmp) {
-		__skb_unlink(skb, &qdisc->skb_bad_txq);
-		kfree_skb_list(skb);
-	}
-
 	call_rcu(&qdisc->rcu, qdisc_free_cb);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From a34dac0b9055202cf9c64e08d8d8dc5e23029d3a Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Tue, 26 May 2020 21:35:24 -0700
Subject: net_sched: add tracepoints for qdisc_reset() and qdisc_destroy()

Add two tracepoints for qdisc_reset() and qdisc_destroy() to track
qdisc resetting and destroying.

Sample output:

  tc-756   [000] ...3   138.355662: qdisc_reset: dev=ens3 kind=pfifo_fast parent=ffff:ffff handle=0:0
  tc-756   [000] ...1   138.355720: qdisc_reset: dev=ens3 kind=pfifo_fast parent=ffff:ffff handle=0:0
  tc-756   [000] ...1   138.355867: qdisc_reset: dev=ens3 kind=pfifo_fast parent=ffff:ffff handle=0:0
  tc-756   [000] ...1   138.355930: qdisc_destroy: dev=ens3 kind=pfifo_fast parent=ffff:ffff handle=0:0
  tc-757   [000] ...2   143.073780: qdisc_reset: dev=ens3 kind=fq_codel parent=ffff:ffff handle=8001:0
  tc-757   [000] ...1   143.073878: qdisc_reset: dev=ens3 kind=fq_codel parent=ffff:ffff handle=8001:0
  tc-757   [000] ...1   143.074114: qdisc_reset: dev=ens3 kind=fq_codel parent=ffff:ffff handle=8001:0
  tc-757   [000] ...1   143.074228: qdisc_destroy: dev=ens3 kind=fq_codel parent=ffff:ffff handle=8001:0

Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/trace/events/qdisc.h | 52 ++++++++++++++++++++++++++++++++++++++++++++
 net/sched/sch_generic.c      |  4 ++++
 2 files changed, 56 insertions(+)

diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h
index 0d1a9ebf55ba..2b948801afa3 100644
--- a/include/trace/events/qdisc.h
+++ b/include/trace/events/qdisc.h
@@ -8,6 +8,8 @@
 #include <linux/netdevice.h>
 #include <linux/tracepoint.h>
 #include <linux/ftrace.h>
+#include <linux/pkt_sched.h>
+#include <net/sch_generic.h>
 
 TRACE_EVENT(qdisc_dequeue,
 
@@ -44,6 +46,56 @@ TRACE_EVENT(qdisc_dequeue,
 		  __entry->txq_state, __entry->packets, __entry->skbaddr )
 );
 
+TRACE_EVENT(qdisc_reset,
+
+	TP_PROTO(struct Qdisc *q),
+
+	TP_ARGS(q),
+
+	TP_STRUCT__entry(
+		__string(	dev,		qdisc_dev(q)	)
+		__string(	kind,		q->ops->id	)
+		__field(	u32,		parent		)
+		__field(	u32,		handle		)
+	),
+
+	TP_fast_assign(
+		__assign_str(dev, qdisc_dev(q));
+		__assign_str(kind, q->ops->id);
+		__entry->parent = q->parent;
+		__entry->handle = q->handle;
+	),
+
+	TP_printk("dev=%s kind=%s parent=%x:%x handle=%x:%x", __get_str(dev),
+		  __get_str(kind), TC_H_MAJ(__entry->parent) >> 16, TC_H_MIN(__entry->parent),
+		  TC_H_MAJ(__entry->handle) >> 16, TC_H_MIN(__entry->handle))
+);
+
+TRACE_EVENT(qdisc_destroy,
+
+	TP_PROTO(struct Qdisc *q),
+
+	TP_ARGS(q),
+
+	TP_STRUCT__entry(
+		__string(	dev,		qdisc_dev(q)	)
+		__string(	kind,		q->ops->id	)
+		__field(	u32,		parent		)
+		__field(	u32,		handle		)
+	),
+
+	TP_fast_assign(
+		__assign_str(dev, qdisc_dev(q));
+		__assign_str(kind, q->ops->id);
+		__entry->parent = q->parent;
+		__entry->handle = q->handle;
+	),
+
+	TP_printk("dev=%s kind=%s parent=%x:%x handle=%x:%x", __get_str(dev),
+		  __get_str(kind), TC_H_MAJ(__entry->parent) >> 16, TC_H_MIN(__entry->parent),
+		  TC_H_MAJ(__entry->handle) >> 16, TC_H_MIN(__entry->handle))
+);
+
 #endif /* _TRACE_QDISC_H */
 
 /* This part must be outside protection */
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index 7a0b06001e48..abaa446ed01a 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -911,6 +911,8 @@ void qdisc_reset(struct Qdisc *qdisc)
 	const struct Qdisc_ops *ops = qdisc->ops;
 	struct sk_buff *skb, *tmp;
 
+	trace_qdisc_reset(qdisc);
+
 	if (ops->reset)
 		ops->reset(qdisc);
 
@@ -965,6 +967,8 @@ static void qdisc_destroy(struct Qdisc *qdisc)
 	module_put(ops->owner);
 	dev_put(qdisc_dev(qdisc));
 
+	trace_qdisc_destroy(qdisc);
+
 	call_rcu(&qdisc->rcu, qdisc_free_cb);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From f5a7833e83628f18c1ee94e6ffcb1d232f029be9 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Tue, 26 May 2020 21:35:25 -0700
Subject: net_sched: add a tracepoint for qdisc creation

With this tracepoint, we could know when qdisc's are created,
especially those default qdisc's.

Sample output:

  tc-736   [001] ...1    56.230107: qdisc_create: dev=ens3 kind=pfifo parent=1:0
  tc-736   [001] ...1    56.230113: qdisc_create: dev=ens3 kind=hfsc parent=ffff:ffff
  tc-738   [001] ...1    56.256816: qdisc_create: dev=ens3 kind=pfifo parent=1:100
  tc-739   [001] ...1    56.267584: qdisc_create: dev=ens3 kind=pfifo parent=1:200
  tc-740   [001] ...1    56.279649: qdisc_create: dev=ens3 kind=fq_codel parent=1:100
  tc-741   [001] ...1    56.289996: qdisc_create: dev=ens3 kind=pfifo_fast parent=1:200
  tc-745   [000] .N.1   111.687483: qdisc_create: dev=ens3 kind=ingress parent=ffff:fff1

Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/trace/events/qdisc.h | 23 +++++++++++++++++++++++
 net/sched/sch_api.c          |  3 +++
 net/sched/sch_generic.c      |  4 +++-
 3 files changed, 29 insertions(+), 1 deletion(-)

diff --git a/include/trace/events/qdisc.h b/include/trace/events/qdisc.h
index 2b948801afa3..330d32d84485 100644
--- a/include/trace/events/qdisc.h
+++ b/include/trace/events/qdisc.h
@@ -96,6 +96,29 @@ TRACE_EVENT(qdisc_destroy,
 		  TC_H_MAJ(__entry->handle) >> 16, TC_H_MIN(__entry->handle))
 );
 
+TRACE_EVENT(qdisc_create,
+
+	TP_PROTO(const struct Qdisc_ops *ops, struct net_device *dev, u32 parent),
+
+	TP_ARGS(ops, dev, parent),
+
+	TP_STRUCT__entry(
+		__string(	dev,		dev->name	)
+		__string(	kind,		ops->id		)
+		__field(	u32,		parent		)
+	),
+
+	TP_fast_assign(
+		__assign_str(dev, dev->name);
+		__assign_str(kind, ops->id);
+		__entry->parent = parent;
+	),
+
+	TP_printk("dev=%s kind=%s parent=%x:%x",
+		  __get_str(dev), __get_str(kind),
+		  TC_H_MAJ(__entry->parent) >> 16, TC_H_MIN(__entry->parent))
+);
+
 #endif /* _TRACE_QDISC_H */
 
 /* This part must be outside protection */
diff --git a/net/sched/sch_api.c b/net/sched/sch_api.c
index 0d99df1e764d..9a3449b56bd6 100644
--- a/net/sched/sch_api.c
+++ b/net/sched/sch_api.c
@@ -32,6 +32,8 @@
 #include <net/pkt_sched.h>
 #include <net/pkt_cls.h>
 
+#include <trace/events/qdisc.h>
+
 /*
 
    Short review.
@@ -1283,6 +1285,7 @@ static struct Qdisc *qdisc_create(struct net_device *dev,
 	}
 
 	qdisc_hash_add(sch, false);
+	trace_qdisc_create(ops, dev, parent);
 
 	return sch;
 
diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index abaa446ed01a..a4271e47f220 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -896,8 +896,10 @@ struct Qdisc *qdisc_create_dflt(struct netdev_queue *dev_queue,
 	}
 	sch->parent = parentid;
 
-	if (!ops->init || ops->init(sch, NULL, extack) == 0)
+	if (!ops->init || ops->init(sch, NULL, extack) == 0) {
+		trace_qdisc_create(ops, dev_queue->dev, parentid);
 		return sch;
+	}
 
 	qdisc_put(sch);
 	return NULL;
-- 
cgit v1.2.3-59-g8ed1b


From 70f50965338a12e17454c31ad5ece27069719358 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Tue, 26 May 2020 21:35:26 -0700
Subject: net_sched: avoid resetting active qdisc for multiple times
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Except for sch_mq and sch_mqprio, each dev queue points to the
same root qdisc, so when we reset the dev queues with
netdev_for_each_tx_queue() we end up resetting the same instance
of the root qdisc for multiple times.

Avoid this by checking the __QDISC_STATE_DEACTIVATED bit in
each iteration, so for sch_mq/sch_mqprio, we still reset all
of them like before, for the rest, we only reset it once.

Reported-by: Václav Zindulka <vaclav.zindulka@tlapnet.cz>
Tested-by: Václav Zindulka <vaclav.zindulka@tlapnet.cz>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_generic.c | 37 +++++++++++++++++++++++--------------
 1 file changed, 23 insertions(+), 14 deletions(-)

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index a4271e47f220..d13e27467470 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -1128,6 +1128,28 @@ void dev_activate(struct net_device *dev)
 }
 EXPORT_SYMBOL(dev_activate);
 
+static void qdisc_deactivate(struct Qdisc *qdisc)
+{
+	bool nolock = qdisc->flags & TCQ_F_NOLOCK;
+
+	if (qdisc->flags & TCQ_F_BUILTIN)
+		return;
+	if (test_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state))
+		return;
+
+	if (nolock)
+		spin_lock_bh(&qdisc->seqlock);
+	spin_lock_bh(qdisc_lock(qdisc));
+
+	set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state);
+
+	qdisc_reset(qdisc);
+
+	spin_unlock_bh(qdisc_lock(qdisc));
+	if (nolock)
+		spin_unlock_bh(&qdisc->seqlock);
+}
+
 static void dev_deactivate_queue(struct net_device *dev,
 				 struct netdev_queue *dev_queue,
 				 void *_qdisc_default)
@@ -1137,21 +1159,8 @@ static void dev_deactivate_queue(struct net_device *dev,
 
 	qdisc = rtnl_dereference(dev_queue->qdisc);
 	if (qdisc) {
-		bool nolock = qdisc->flags & TCQ_F_NOLOCK;
-
-		if (nolock)
-			spin_lock_bh(&qdisc->seqlock);
-		spin_lock_bh(qdisc_lock(qdisc));
-
-		if (!(qdisc->flags & TCQ_F_BUILTIN))
-			set_bit(__QDISC_STATE_DEACTIVATED, &qdisc->state);
-
+		qdisc_deactivate(qdisc);
 		rcu_assign_pointer(dev_queue->qdisc, qdisc_default);
-		qdisc_reset(qdisc);
-
-		spin_unlock_bh(qdisc_lock(qdisc));
-		if (nolock)
-			spin_unlock_bh(&qdisc->seqlock);
 	}
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 759ae57f1b7bf5dfea1816f786f568d69efe34e2 Mon Sep 17 00:00:00 2001
From: Cong Wang <xiyou.wangcong@gmail.com>
Date: Tue, 26 May 2020 21:35:27 -0700
Subject: net_sched: get rid of unnecessary dev_qdisc_reset()
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Resetting old qdisc on dev_queue->qdisc_sleeping in
dev_qdisc_reset() is redundant, because this qdisc,
even if not same with dev_queue->qdisc, is reset via
qdisc_put() right after calling dev_graft_qdisc() when
hitting refcnt 0.

This is very easy to observe with qdisc_reset() tracepoint
and stack traces.

Reported-by: Václav Zindulka <vaclav.zindulka@tlapnet.cz>
Tested-by: Václav Zindulka <vaclav.zindulka@tlapnet.cz>
Cc: Jamal Hadi Salim <jhs@mojatatu.com>
Cc: Jiri Pirko <jiri@resnulli.us>
Signed-off-by: Cong Wang <xiyou.wangcong@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_generic.c | 16 ----------------
 1 file changed, 16 deletions(-)

diff --git a/net/sched/sch_generic.c b/net/sched/sch_generic.c
index d13e27467470..b19a0021a0bd 100644
--- a/net/sched/sch_generic.c
+++ b/net/sched/sch_generic.c
@@ -1191,16 +1191,6 @@ static bool some_qdisc_is_busy(struct net_device *dev)
 	return false;
 }
 
-static void dev_qdisc_reset(struct net_device *dev,
-			    struct netdev_queue *dev_queue,
-			    void *none)
-{
-	struct Qdisc *qdisc = dev_queue->qdisc_sleeping;
-
-	if (qdisc)
-		qdisc_reset(qdisc);
-}
-
 /**
  * 	dev_deactivate_many - deactivate transmissions on several devices
  * 	@head: list of devices to deactivate
@@ -1237,12 +1227,6 @@ void dev_deactivate_many(struct list_head *head)
 			 */
 			schedule_timeout_uninterruptible(1);
 		}
-		/* The new qdisc is assigned at this point so we can safely
-		 * unwind stale skb lists and qdisc statistics
-		 */
-		netdev_for_each_tx_queue(dev, dev_qdisc_reset, NULL);
-		if (dev_ingress_queue(dev))
-			dev_qdisc_reset(dev, dev_ingress_queue(dev), NULL);
 	}
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From b3ae2459f89773adcbf16fef4b68deaaa3be1929 Mon Sep 17 00:00:00 2001
From: Tariq Toukan <tariqt@mellanox.com>
Date: Wed, 27 May 2020 12:25:26 +0300
Subject: net/tls: Add force_resync for driver resync

This patch adds a field to the tls rx offload context which enables
drivers to force a send_resync call.

This field can be used by drivers to request a resync at the next
possible tls record. It is beneficial for hardware that provides the
resync sequence number asynchronously. In such cases, the packet that
triggered the resync does not contain the information required for a
resync. Instead, the driver requests resync for all the following
TLS record until the asynchronous notification with the resync request
TCP sequence arrives.

A following series for mlx5e ConnectX-6DX TLS RX offload support will
use this mechanism.

Signed-off-by: Boris Pismenny <borisp@mellanox.com>
Signed-off-by: Tariq Toukan <tariqt@mellanox.com>
Reviewed-by: Maxim Mikityanskiy <maximmi@mellanox.com>
Reviewed-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tls.h    | 12 +++++++++++-
 net/tls/tls_device.c |  9 ++++++---
 2 files changed, 17 insertions(+), 4 deletions(-)

diff --git a/include/net/tls.h b/include/net/tls.h
index bf9eb4823933..cf9ec152fbb7 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -594,12 +594,22 @@ tls_driver_ctx(const struct sock *sk, enum tls_offload_ctx_dir direction)
 #endif
 
 /* The TLS context is valid until sk_destruct is called */
+#define RESYNC_REQ (1 << 0)
+#define RESYNC_REQ_FORCE (1 << 1)
 static inline void tls_offload_rx_resync_request(struct sock *sk, __be32 seq)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_offload_context_rx *rx_ctx = tls_offload_ctx_rx(tls_ctx);
 
-	atomic64_set(&rx_ctx->resync_req, ((u64)ntohl(seq) << 32) | 1);
+	atomic64_set(&rx_ctx->resync_req, ((u64)ntohl(seq) << 32) | RESYNC_REQ);
+}
+
+static inline void tls_offload_rx_force_resync_request(struct sock *sk)
+{
+	struct tls_context *tls_ctx = tls_get_ctx(sk);
+	struct tls_offload_context_rx *rx_ctx = tls_offload_ctx_rx(tls_ctx);
+
+	atomic64_set(&rx_ctx->resync_req, RESYNC_REQ | RESYNC_REQ_FORCE);
 }
 
 static inline void
diff --git a/net/tls/tls_device.c b/net/tls/tls_device.c
index a562ebaaa33c..0e55f8365ce2 100644
--- a/net/tls/tls_device.c
+++ b/net/tls/tls_device.c
@@ -694,10 +694,11 @@ void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct tls_offload_context_rx *rx_ctx;
+	bool is_req_pending, is_force_resync;
 	u8 rcd_sn[TLS_MAX_REC_SEQ_SIZE];
-	u32 sock_data, is_req_pending;
 	struct tls_prot_info *prot;
 	s64 resync_req;
+	u32 sock_data;
 	u32 req_seq;
 
 	if (tls_ctx->rx_conf != TLS_HW)
@@ -712,9 +713,11 @@ void tls_device_rx_resync_new_rec(struct sock *sk, u32 rcd_len, u32 seq)
 		resync_req = atomic64_read(&rx_ctx->resync_req);
 		req_seq = resync_req >> 32;
 		seq += TLS_HEADER_SIZE - 1;
-		is_req_pending = resync_req;
+		is_req_pending = resync_req & RESYNC_REQ;
+		is_force_resync = resync_req & RESYNC_REQ_FORCE;
 
-		if (likely(!is_req_pending) || req_seq != seq ||
+		if (likely(!is_req_pending) ||
+		    (!is_force_resync && req_seq != seq) ||
 		    !atomic64_try_cmpxchg(&rx_ctx->resync_req, &resync_req, 0))
 			return;
 		break;
-- 
cgit v1.2.3-59-g8ed1b


From 50ce4c099bebf56be86c9448f7f4bcd34f33663c Mon Sep 17 00:00:00 2001
From: Jonas Falkevik <jonas.falkevik@gmail.com>
Date: Wed, 27 May 2020 11:59:43 +0200
Subject: sctp: fix typo sctp_ulpevent_nofity_peer_addr_change

change typo in function name "nofity" to "notify"
sctp_ulpevent_nofity_peer_addr_change ->
sctp_ulpevent_notify_peer_addr_change

Signed-off-by: Jonas Falkevik <jonas.falkevik@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sctp/ulpevent.h | 2 +-
 net/sctp/associola.c        | 8 ++++----
 net/sctp/ulpevent.c         | 2 +-
 3 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/include/net/sctp/ulpevent.h b/include/net/sctp/ulpevent.h
index 0b032b92da0b..994e984eef32 100644
--- a/include/net/sctp/ulpevent.h
+++ b/include/net/sctp/ulpevent.h
@@ -80,7 +80,7 @@ struct sctp_ulpevent *sctp_ulpevent_make_assoc_change(
 	struct sctp_chunk *chunk,
 	gfp_t gfp);
 
-void sctp_ulpevent_nofity_peer_addr_change(struct sctp_transport *transport,
+void sctp_ulpevent_notify_peer_addr_change(struct sctp_transport *transport,
 					   int state, int error);
 
 struct sctp_ulpevent *sctp_ulpevent_make_remote_error(
diff --git a/net/sctp/associola.c b/net/sctp/associola.c
index 437079a4883d..72315137d7e7 100644
--- a/net/sctp/associola.c
+++ b/net/sctp/associola.c
@@ -432,7 +432,7 @@ void sctp_assoc_set_primary(struct sctp_association *asoc,
 		changeover = 1 ;
 
 	asoc->peer.primary_path = transport;
-	sctp_ulpevent_nofity_peer_addr_change(transport,
+	sctp_ulpevent_notify_peer_addr_change(transport,
 					      SCTP_ADDR_MADE_PRIM, 0);
 
 	/* Set a default msg_name for events. */
@@ -574,7 +574,7 @@ void sctp_assoc_rm_peer(struct sctp_association *asoc,
 
 	asoc->peer.transport_count--;
 
-	sctp_ulpevent_nofity_peer_addr_change(peer, SCTP_ADDR_REMOVED, 0);
+	sctp_ulpevent_notify_peer_addr_change(peer, SCTP_ADDR_REMOVED, 0);
 	sctp_transport_free(peer);
 }
 
@@ -714,7 +714,7 @@ struct sctp_transport *sctp_assoc_add_peer(struct sctp_association *asoc,
 	list_add_tail_rcu(&peer->transports, &asoc->peer.transport_addr_list);
 	asoc->peer.transport_count++;
 
-	sctp_ulpevent_nofity_peer_addr_change(peer, SCTP_ADDR_ADDED, 0);
+	sctp_ulpevent_notify_peer_addr_change(peer, SCTP_ADDR_ADDED, 0);
 
 	/* If we do not yet have a primary path, set one.  */
 	if (!asoc->peer.primary_path) {
@@ -840,7 +840,7 @@ void sctp_assoc_control_transport(struct sctp_association *asoc,
 	 * to the user.
 	 */
 	if (ulp_notify)
-		sctp_ulpevent_nofity_peer_addr_change(transport,
+		sctp_ulpevent_notify_peer_addr_change(transport,
 						      spc_state, error);
 
 	/* Select new active and retran paths. */
diff --git a/net/sctp/ulpevent.c b/net/sctp/ulpevent.c
index c82dbdcf13f2..f0640306e77f 100644
--- a/net/sctp/ulpevent.c
+++ b/net/sctp/ulpevent.c
@@ -336,7 +336,7 @@ fail:
 	return NULL;
 }
 
-void sctp_ulpevent_nofity_peer_addr_change(struct sctp_transport *transport,
+void sctp_ulpevent_notify_peer_addr_change(struct sctp_transport *transport,
 					   int state, int error)
 {
 	struct sctp_association *asoc = transport->asoc;
-- 
cgit v1.2.3-59-g8ed1b


From 0774dc7643db525f0bb9d0aa212cbfad3a412fc6 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 27 May 2020 20:22:28 +0200
Subject: dlm: use the tcp version of accept_from_sock for sctp as well

The only difference between a few missing fixes applied to the SCTP
one is that TCP uses ->getpeername to get the remote address, while
SCTP uses kernel_getsockopt(.. SCTP_PRIMARY_ADDR).  But given that
getpeername is defined to return the primary address for sctp, there
doesn't seem to be any reason for the different way of quering the
peername, or all the code duplication.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/dlm/lowcomms.c | 123 ++----------------------------------------------------
 1 file changed, 3 insertions(+), 120 deletions(-)

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index cdfaf4f0e11a..f13dad0fd9ef 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -724,7 +724,7 @@ out_close:
 }
 
 /* Listening socket is busy, accept a connection */
-static int tcp_accept_from_sock(struct connection *con)
+static int accept_from_sock(struct connection *con)
 {
 	int result;
 	struct sockaddr_storage peeraddr;
@@ -852,123 +852,6 @@ accept_err:
 	return result;
 }
 
-static int sctp_accept_from_sock(struct connection *con)
-{
-	/* Check that the new node is in the lockspace */
-	struct sctp_prim prim;
-	int nodeid;
-	int prim_len, ret;
-	int addr_len;
-	struct connection *newcon;
-	struct connection *addcon;
-	struct socket *newsock;
-
-	mutex_lock(&connections_lock);
-	if (!dlm_allow_conn) {
-		mutex_unlock(&connections_lock);
-		return -1;
-	}
-	mutex_unlock(&connections_lock);
-
-	mutex_lock_nested(&con->sock_mutex, 0);
-
-	ret = kernel_accept(con->sock, &newsock, O_NONBLOCK);
-	if (ret < 0)
-		goto accept_err;
-
-	memset(&prim, 0, sizeof(struct sctp_prim));
-	prim_len = sizeof(struct sctp_prim);
-
-	ret = kernel_getsockopt(newsock, IPPROTO_SCTP, SCTP_PRIMARY_ADDR,
-				(char *)&prim, &prim_len);
-	if (ret < 0) {
-		log_print("getsockopt/sctp_primary_addr failed: %d", ret);
-		goto accept_err;
-	}
-
-	make_sockaddr(&prim.ssp_addr, 0, &addr_len);
-	ret = addr_to_nodeid(&prim.ssp_addr, &nodeid);
-	if (ret) {
-		unsigned char *b = (unsigned char *)&prim.ssp_addr;
-
-		log_print("reject connect from unknown addr");
-		print_hex_dump_bytes("ss: ", DUMP_PREFIX_NONE,
-				     b, sizeof(struct sockaddr_storage));
-		goto accept_err;
-	}
-
-	newcon = nodeid2con(nodeid, GFP_NOFS);
-	if (!newcon) {
-		ret = -ENOMEM;
-		goto accept_err;
-	}
-
-	mutex_lock_nested(&newcon->sock_mutex, 1);
-
-	if (newcon->sock) {
-		struct connection *othercon = newcon->othercon;
-
-		if (!othercon) {
-			othercon = kmem_cache_zalloc(con_cache, GFP_NOFS);
-			if (!othercon) {
-				log_print("failed to allocate incoming socket");
-				mutex_unlock(&newcon->sock_mutex);
-				ret = -ENOMEM;
-				goto accept_err;
-			}
-			othercon->nodeid = nodeid;
-			othercon->rx_action = receive_from_sock;
-			mutex_init(&othercon->sock_mutex);
-			INIT_LIST_HEAD(&othercon->writequeue);
-			spin_lock_init(&othercon->writequeue_lock);
-			INIT_WORK(&othercon->swork, process_send_sockets);
-			INIT_WORK(&othercon->rwork, process_recv_sockets);
-			set_bit(CF_IS_OTHERCON, &othercon->flags);
-		}
-		mutex_lock_nested(&othercon->sock_mutex, 2);
-		if (!othercon->sock) {
-			newcon->othercon = othercon;
-			add_sock(newsock, othercon);
-			addcon = othercon;
-			mutex_unlock(&othercon->sock_mutex);
-		} else {
-			printk("Extra connection from node %d attempted\n", nodeid);
-			ret = -EAGAIN;
-			mutex_unlock(&othercon->sock_mutex);
-			mutex_unlock(&newcon->sock_mutex);
-			goto accept_err;
-		}
-	} else {
-		newcon->rx_action = receive_from_sock;
-		add_sock(newsock, newcon);
-		addcon = newcon;
-	}
-
-	log_print("connected to %d", nodeid);
-
-	mutex_unlock(&newcon->sock_mutex);
-
-	/*
-	 * Add it to the active queue in case we got data
-	 * between processing the accept adding the socket
-	 * to the read_sockets list
-	 */
-	if (!test_and_set_bit(CF_READ_PENDING, &addcon->flags))
-		queue_work(recv_workqueue, &addcon->rwork);
-	mutex_unlock(&con->sock_mutex);
-
-	return 0;
-
-accept_err:
-	mutex_unlock(&con->sock_mutex);
-	if (newsock)
-		sock_release(newsock);
-	if (ret != -EAGAIN)
-		log_print("error accepting connection from node: %d", ret);
-
-	return ret;
-}
-
 static void free_entry(struct writequeue_entry *e)
 {
 	__free_page(e->page);
@@ -1253,7 +1136,7 @@ static struct socket *tcp_create_listen_sock(struct connection *con,
 	write_lock_bh(&sock->sk->sk_callback_lock);
 	sock->sk->sk_user_data = con;
 	save_listen_callbacks(sock);
-	con->rx_action = tcp_accept_from_sock;
+	con->rx_action = accept_from_sock;
 	con->connect_action = tcp_connect_to_sock;
 	write_unlock_bh(&sock->sk->sk_callback_lock);
 
@@ -1340,7 +1223,7 @@ static int sctp_listen_for_all(void)
 	save_listen_callbacks(sock);
 	con->sock = sock;
 	con->sock->sk->sk_data_ready = lowcomms_data_ready;
-	con->rx_action = sctp_accept_from_sock;
+	con->rx_action = accept_from_sock;
 	con->connect_action = sctp_connect_to_sock;
 
 	write_unlock_bh(&sock->sk->sk_callback_lock);
-- 
cgit v1.2.3-59-g8ed1b


From 7a15b2e013f535a125ad7351ffc808c79bc6de35 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Wed, 27 May 2020 20:22:29 +0200
Subject: net: remove kernel_getsockopt

No users left.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net.h |  2 --
 net/socket.c        | 34 ----------------------------------
 2 files changed, 36 deletions(-)

diff --git a/include/linux/net.h b/include/linux/net.h
index 6451425e828f..74ef5d7315f7 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -303,8 +303,6 @@ int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
 		   int flags);
 int kernel_getsockname(struct socket *sock, struct sockaddr *addr);
 int kernel_getpeername(struct socket *sock, struct sockaddr *addr);
-int kernel_getsockopt(struct socket *sock, int level, int optname, char *optval,
-		      int *optlen);
 int kernel_setsockopt(struct socket *sock, int level, int optname, char *optval,
 		      unsigned int optlen);
 int kernel_sendpage(struct socket *sock, struct page *page, int offset,
diff --git a/net/socket.c b/net/socket.c
index 80422fc3c836..81a98b6cbd08 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -3624,40 +3624,6 @@ int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
 }
 EXPORT_SYMBOL(kernel_getpeername);
 
-/**
- *	kernel_getsockopt - get a socket option (kernel space)
- *	@sock: socket
- *	@level: API level (SOL_SOCKET, ...)
- *	@optname: option tag
- *	@optval: option value
- *	@optlen: option length
- *
- *	Assigns the option length to @optlen.
- *	Returns 0 or an error.
- */
-
-int kernel_getsockopt(struct socket *sock, int level, int optname,
-			char *optval, int *optlen)
-{
-	mm_segment_t oldfs = get_fs();
-	char __user *uoptval;
-	int __user *uoptlen;
-	int err;
-
-	uoptval = (char __user __force *) optval;
-	uoptlen = (int __user __force *) optlen;
-
-	set_fs(KERNEL_DS);
-	if (level == SOL_SOCKET)
-		err = sock_getsockopt(sock, level, optname, uoptval, uoptlen);
-	else
-		err = sock->ops->getsockopt(sock, level, optname, uoptval,
-					    uoptlen);
-	set_fs(oldfs);
-	return err;
-}
-EXPORT_SYMBOL(kernel_getsockopt);
-
 /**
  *	kernel_setsockopt - set a socket option (kernel space)
  *	@sock: socket
-- 
cgit v1.2.3-59-g8ed1b


From 22bef5e78f1193b664f59834361704cb22f9d5d7 Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
Date: Fri, 15 May 2020 17:36:38 -0700
Subject: ice: fix signed vs unsigned comparisons

Fix the remaining signed vs unsigned issues, which appear
when compiling with -Werror=sign-compare.

Many of these are because there is an external interface that is passing
an int to us (which we can't change) but that we (rightfully) store
and compare against as an unsigned in our data structures.

Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_base.c | 4 ++--
 drivers/net/ethernet/intel/ice/ice_main.c | 8 ++++----
 drivers/net/ethernet/intel/ice/ice_txrx.h | 7 ++++---
 3 files changed, 10 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
index 94d833b4e745..9452c0eb70b0 100644
--- a/drivers/net/ethernet/intel/ice/ice_base.c
+++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@ -13,7 +13,7 @@
  */
 static int __ice_vsi_get_qs_contig(struct ice_qs_cfg *qs_cfg)
 {
-	int offset, i;
+	unsigned int offset, i;
 
 	mutex_lock(qs_cfg->qs_mutex);
 	offset = bitmap_find_next_zero_area(qs_cfg->pf_map, qs_cfg->pf_map_size,
@@ -39,7 +39,7 @@ static int __ice_vsi_get_qs_contig(struct ice_qs_cfg *qs_cfg)
  */
 static int __ice_vsi_get_qs_sc(struct ice_qs_cfg *qs_cfg)
 {
-	int i, index = 0;
+	unsigned int i, index = 0;
 
 	mutex_lock(qs_cfg->qs_mutex);
 	for (i = 0; i < qs_cfg->q_count; i++) {
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 1c255b27244c..c2da3e1a2e17 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -5035,7 +5035,7 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
 	struct ice_pf *pf = vsi->back;
 	u8 count = 0;
 
-	if (new_mtu == netdev->mtu) {
+	if (new_mtu == (int)netdev->mtu) {
 		netdev_warn(netdev, "MTU is already %u\n", netdev->mtu);
 		return 0;
 	}
@@ -5050,11 +5050,11 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
 		}
 	}
 
-	if (new_mtu < netdev->min_mtu) {
+	if (new_mtu < (int)netdev->min_mtu) {
 		netdev_err(netdev, "new MTU invalid. min_mtu is %d\n",
 			   netdev->min_mtu);
 		return -EINVAL;
-	} else if (new_mtu > netdev->max_mtu) {
+	} else if (new_mtu > (int)netdev->max_mtu) {
 		netdev_err(netdev, "new MTU invalid. max_mtu is %d\n",
 			   netdev->min_mtu);
 		return -EINVAL;
@@ -5075,7 +5075,7 @@ static int ice_change_mtu(struct net_device *netdev, int new_mtu)
 		return -EBUSY;
 	}
 
-	netdev->mtu = new_mtu;
+	netdev->mtu = (unsigned int)new_mtu;
 
 	/* if VSI is up, bring it down and then back up */
 	if (!test_and_set_bit(__ICE_DOWN, vsi->state)) {
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.h b/drivers/net/ethernet/intel/ice/ice_txrx.h
index cf21b4fe928a..e70c4619edc3 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.h
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.h
@@ -38,7 +38,8 @@
  */
 #if (PAGE_SIZE < 8192)
 #define ICE_2K_TOO_SMALL_WITH_PADDING \
-((NET_SKB_PAD + ICE_RXBUF_1536) > SKB_WITH_OVERHEAD(ICE_RXBUF_2048))
+	((unsigned int)(NET_SKB_PAD + ICE_RXBUF_1536) > \
+			SKB_WITH_OVERHEAD(ICE_RXBUF_2048))
 
 /**
  * ice_compute_pad - compute the padding
@@ -107,8 +108,8 @@ static inline int ice_skb_pad(void)
 #define DESC_NEEDED (MAX_SKB_FRAGS + ICE_DESCS_FOR_CTX_DESC + \
 		     ICE_DESCS_PER_CACHE_LINE + ICE_DESCS_FOR_SKB_DATA_PTR)
 #define ICE_DESC_UNUSED(R)	\
-	((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \
-	(R)->next_to_clean - (R)->next_to_use - 1)
+	(u16)((((R)->next_to_clean > (R)->next_to_use) ? 0 : (R)->count) + \
+	      (R)->next_to_clean - (R)->next_to_use - 1)
 
 #define ICE_TX_FLAGS_TSO	BIT(0)
 #define ICE_TX_FLAGS_HW_VLAN	BIT(1)
-- 
cgit v1.2.3-59-g8ed1b


From f0cbbb9c6e06532fc839770de20e1e94e0d999dd Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
Date: Fri, 15 May 2020 17:36:39 -0700
Subject: ice: remove unused macro

The driver had an unused define that can be removed.  Found by
compiler -Werror=unused-macros check.

Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_dcb_nl.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_nl.c b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c
index 93cf70d06fe5..87f91b750d59 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb_nl.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_nl.c
@@ -7,8 +7,6 @@
 #include "ice_dcb_nl.h"
 #include <net/dcbnl.h>
 
-#define ICE_APP_PROT_ID_ROCE	0x8915
-
 /**
  * ice_dcbnl_devreset - perform enough of a ifdown/ifup to sync DCBNL info
  * @netdev: device associated with interface that needs reset
-- 
cgit v1.2.3-59-g8ed1b


From bf8987df8aa57da884276b07a64307eac577eaaf Mon Sep 17 00:00:00 2001
From: Paul Greenwalt <paul.greenwalt@intel.com>
Date: Fri, 15 May 2020 17:36:40 -0700
Subject: ice: set VF default LAN address

Remove is_zero_ether_add() check when setting the VF default LAN address.
This check assumed that the address had been delete and zeroed before
calling ice_vc_add_mac_addr(). Now the default LAN address will be set
to the last unicast MAC address added by the VF.

The default LAN address is reported by the PF via ndo_get_vf_config.

Signed-off-by: Paul Greenwalt <paul.greenwalt@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index 9b09a111321c..efd54299a220 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -2862,9 +2862,11 @@ ice_vc_add_mac_addr(struct ice_vf *vf, struct ice_vsi *vsi, u8 *mac_addr)
 		return -EIO;
 	}
 
-	/* only set dflt_lan_addr once */
-	if (is_zero_ether_addr(vf->dflt_lan_addr.addr) &&
-	    is_unicast_ether_addr(mac_addr))
+	/* Set the default LAN address to the latest unicast MAC address added
+	 * by the VF. The default LAN address is reported by the PF via
+	 * ndo_get_vf_config.
+	 */
+	if (is_unicast_ether_addr(mac_addr))
 		ether_addr_copy(vf->dflt_lan_addr.addr, mac_addr);
 
 	vf->num_mac++;
-- 
cgit v1.2.3-59-g8ed1b


From 5df42c8267418bfb8da54cc4772b397ea4c88aea Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
Date: Fri, 15 May 2020 17:36:41 -0700
Subject: ice: fix MAC write command

The manage MAC write command was implemented in an overly complex way
that actually didn't work, as it wasn't symmetric to the manage MAC
read command, and was feeding bytes out of order to the firmware. Fix
the implementation by just using a simple array to represent the MAC
address when it is being written via firmware command.

Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_adminq_cmd.h | 10 ++++------
 drivers/net/ethernet/intel/ice/ice_common.c     |  5 +----
 2 files changed, 5 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index 586d69491268..f04c338fb6e0 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -156,13 +156,11 @@ struct ice_aqc_manage_mac_write {
 #define ICE_AQC_MAN_MAC_WR_MC_MAG_EN		BIT(0)
 #define ICE_AQC_MAN_MAC_WR_WOL_LAA_PFR_KEEP	BIT(1)
 #define ICE_AQC_MAN_MAC_WR_S		6
-#define ICE_AQC_MAN_MAC_WR_M		(3 << ICE_AQC_MAN_MAC_WR_S)
+#define ICE_AQC_MAN_MAC_WR_M		ICE_M(3, ICE_AQC_MAN_MAC_WR_S)
 #define ICE_AQC_MAN_MAC_UPDATE_LAA	0
-#define ICE_AQC_MAN_MAC_UPDATE_LAA_WOL	(BIT(0) << ICE_AQC_MAN_MAC_WR_S)
-	/* High 16 bits of MAC address in big endian order */
-	__be16 sah;
-	/* Low 32 bits of MAC address in big endian order */
-	__be32 sal;
+#define ICE_AQC_MAN_MAC_UPDATE_LAA_WOL	BIT(ICE_AQC_MAN_MAC_WR_S)
+	/* byte stream in network order */
+	u8 mac_addr[ETH_ALEN];
 	__le32 addr_high;
 	__le32 addr_low;
 };
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 0a0b00fffaf7..5da369ae33e0 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -1994,10 +1994,7 @@ ice_aq_manage_mac_write(struct ice_hw *hw, const u8 *mac_addr, u8 flags,
 	ice_fill_dflt_direct_cmd_desc(&desc, ice_aqc_opc_manage_mac_write);
 
 	cmd->flags = flags;
-
-	/* Prep values for flags, sah, sal */
-	cmd->sah = htons(*((const u16 *)mac_addr));
-	cmd->sal = htonl(*((const u32 *)(mac_addr + 2)));
+	ether_addr_copy(cmd->mac_addr, mac_addr);
 
 	return ice_aq_send_cmd(hw, &desc, NULL, 0, cd);
 }
-- 
cgit v1.2.3-59-g8ed1b


From 1aaef2bc4e0a5ce9e4dd86359e6a0bf52c6aa64f Mon Sep 17 00:00:00 2001
From: Surabhi Boob <surabhi.boob@intel.com>
Date: Fri, 15 May 2020 17:36:42 -0700
Subject: ice: Fix memory leak

Handle memory leak on filter management initialization failure.

Signed-off-by: Surabhi Boob <surabhi.boob@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_common.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 5da369ae33e0..ee62cfa3a69e 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -387,6 +387,7 @@ ice_aq_set_mac_cfg(struct ice_hw *hw, u16 max_frame_size, struct ice_sq_cd *cd)
 static enum ice_status ice_init_fltr_mgmt_struct(struct ice_hw *hw)
 {
 	struct ice_switch_info *sw;
+	enum ice_status status;
 
 	hw->switch_info = devm_kzalloc(ice_hw_to_dev(hw),
 				       sizeof(*hw->switch_info), GFP_KERNEL);
@@ -397,7 +398,12 @@ static enum ice_status ice_init_fltr_mgmt_struct(struct ice_hw *hw)
 
 	INIT_LIST_HEAD(&sw->vsi_list_map_head);
 
-	return ice_init_def_sw_recp(hw);
+	status = ice_init_def_sw_recp(hw);
+	if (status) {
+		devm_kfree(ice_hw_to_dev(hw), hw->switch_info);
+		return status;
+	}
+	return 0;
 }
 
 /**
-- 
cgit v1.2.3-59-g8ed1b


From 68d270783742783f96e89ef92ac24ab3c7fb1d31 Mon Sep 17 00:00:00 2001
From: Surabhi Boob <surabhi.boob@intel.com>
Date: Fri, 15 May 2020 17:36:43 -0700
Subject: ice: Fix for memory leaks and modify ICE_FREE_CQ_BUFS

Handle memory leaks during control queue initialization and
buffer allocation failures. The macro ICE_FREE_CQ_BUFS is modified to
re-use for this fix.

Signed-off-by: Surabhi Boob <surabhi.boob@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_controlq.c | 49 +++++++++++++++------------
 1 file changed, 28 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c
index 9a865962296d..62c2c1e621d2 100644
--- a/drivers/net/ethernet/intel/ice/ice_controlq.c
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.c
@@ -199,7 +199,9 @@ unwind_alloc_rq_bufs:
 		cq->rq.r.rq_bi[i].pa = 0;
 		cq->rq.r.rq_bi[i].size = 0;
 	}
+	cq->rq.r.rq_bi = NULL;
 	devm_kfree(ice_hw_to_dev(hw), cq->rq.dma_head);
+	cq->rq.dma_head = NULL;
 
 	return ICE_ERR_NO_MEMORY;
 }
@@ -245,7 +247,9 @@ unwind_alloc_sq_bufs:
 		cq->sq.r.sq_bi[i].pa = 0;
 		cq->sq.r.sq_bi[i].size = 0;
 	}
+	cq->sq.r.sq_bi = NULL;
 	devm_kfree(ice_hw_to_dev(hw), cq->sq.dma_head);
+	cq->sq.dma_head = NULL;
 
 	return ICE_ERR_NO_MEMORY;
 }
@@ -304,6 +308,28 @@ ice_cfg_rq_regs(struct ice_hw *hw, struct ice_ctl_q_info *cq)
 	return 0;
 }
 
+#define ICE_FREE_CQ_BUFS(hw, qi, ring)					\
+do {									\
+	int i;								\
+	/* free descriptors */						\
+	if ((qi)->ring.r.ring##_bi)					\
+		for (i = 0; i < (qi)->num_##ring##_entries; i++)	\
+			if ((qi)->ring.r.ring##_bi[i].pa) {		\
+				dmam_free_coherent(ice_hw_to_dev(hw),	\
+					(qi)->ring.r.ring##_bi[i].size,	\
+					(qi)->ring.r.ring##_bi[i].va,	\
+					(qi)->ring.r.ring##_bi[i].pa);	\
+					(qi)->ring.r.ring##_bi[i].va = NULL;\
+					(qi)->ring.r.ring##_bi[i].pa = 0;\
+					(qi)->ring.r.ring##_bi[i].size = 0;\
+		}							\
+	/* free the buffer info list */					\
+	if ((qi)->ring.cmd_buf)						\
+		devm_kfree(ice_hw_to_dev(hw), (qi)->ring.cmd_buf);	\
+	/* free DMA head */						\
+	devm_kfree(ice_hw_to_dev(hw), (qi)->ring.dma_head);		\
+} while (0)
+
 /**
  * ice_init_sq - main initialization routine for Control ATQ
  * @hw: pointer to the hardware structure
@@ -357,6 +383,7 @@ static enum ice_status ice_init_sq(struct ice_hw *hw, struct ice_ctl_q_info *cq)
 	goto init_ctrlq_exit;
 
 init_ctrlq_free_rings:
+	ICE_FREE_CQ_BUFS(hw, cq, sq);
 	ice_free_cq_ring(hw, &cq->sq);
 
 init_ctrlq_exit:
@@ -416,33 +443,13 @@ static enum ice_status ice_init_rq(struct ice_hw *hw, struct ice_ctl_q_info *cq)
 	goto init_ctrlq_exit;
 
 init_ctrlq_free_rings:
+	ICE_FREE_CQ_BUFS(hw, cq, rq);
 	ice_free_cq_ring(hw, &cq->rq);
 
 init_ctrlq_exit:
 	return ret_code;
 }
 
-#define ICE_FREE_CQ_BUFS(hw, qi, ring)					\
-do {									\
-	int i;								\
-	/* free descriptors */						\
-	for (i = 0; i < (qi)->num_##ring##_entries; i++)		\
-		if ((qi)->ring.r.ring##_bi[i].pa) {			\
-			dmam_free_coherent(ice_hw_to_dev(hw),		\
-					   (qi)->ring.r.ring##_bi[i].size,\
-					   (qi)->ring.r.ring##_bi[i].va,\
-					   (qi)->ring.r.ring##_bi[i].pa);\
-			(qi)->ring.r.ring##_bi[i].va = NULL;		\
-			(qi)->ring.r.ring##_bi[i].pa = 0;		\
-			(qi)->ring.r.ring##_bi[i].size = 0;		\
-		}							\
-	/* free the buffer info list */					\
-	if ((qi)->ring.cmd_buf)						\
-		devm_kfree(ice_hw_to_dev(hw), (qi)->ring.cmd_buf);	\
-	/* free DMA head */						\
-	devm_kfree(ice_hw_to_dev(hw), (qi)->ring.dma_head);		\
-} while (0)
-
 /**
  * ice_shutdown_sq - shutdown the Control ATQ
  * @hw: pointer to the hardware structure
-- 
cgit v1.2.3-59-g8ed1b


From 4f1fe43c920b92ac41c34f151fe452d46936b79d Mon Sep 17 00:00:00 2001
From: Brett Creeley <brett.creeley@intel.com>
Date: Fri, 15 May 2020 17:36:44 -0700
Subject: ice: Add more Rx errors to netdev's rx_error counter

Currently we are only including illegal_bytes and rx_crc_errors in the
PF netdev's rx_error counter. There are many more causes of Rx errors
that the device supports and reports via Ethtool. Accumulate all Rx
errors in the PF netdev's rx_error counter.

Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_main.c | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index c2da3e1a2e17..93a42ff7496b 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -4295,7 +4295,13 @@ void ice_update_vsi_stats(struct ice_vsi *vsi)
 	if (vsi->type == ICE_VSI_PF) {
 		cur_ns->rx_crc_errors = pf->stats.crc_errors;
 		cur_ns->rx_errors = pf->stats.crc_errors +
-				    pf->stats.illegal_bytes;
+				    pf->stats.illegal_bytes +
+				    pf->stats.rx_len_errors +
+				    pf->stats.rx_undersize +
+				    pf->hw_csum_rx_error +
+				    pf->stats.rx_jabber +
+				    pf->stats.rx_fragments +
+				    pf->stats.rx_oversize;
 		cur_ns->rx_length_errors = pf->stats.rx_len_errors;
 		/* record drops from the port level */
 		cur_ns->rx_missed_errors = pf->stats.eth.rx_discards;
-- 
cgit v1.2.3-59-g8ed1b


From 1960827570c7ed83fb0725debf856b06f46e1a77 Mon Sep 17 00:00:00 2001
From: Brett Creeley <brett.creeley@intel.com>
Date: Fri, 15 May 2020 17:42:13 -0700
Subject: ice: Don't allow VLAN stripping change when pvid set

Currently, if the PVID is set in the VLAN handling section of the VSI
context the driver still allows VLAN stripping to be enabled/disabled.
VLAN stripping should only be modifiable when the PVID is not set. Fix
this by preventing VLAN stripping modification when PVID is set.

Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_lib.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index f81bd4c30bbc..89962c14e31f 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -1812,6 +1812,12 @@ int ice_vsi_manage_vlan_stripping(struct ice_vsi *vsi, bool ena)
 	enum ice_status status;
 	int ret = 0;
 
+	/* do not allow modifying VLAN stripping when a port VLAN is configured
+	 * on this VSI
+	 */
+	if (vsi->info.pvid)
+		return 0;
+
 	ctxt = kzalloc(sizeof(*ctxt), GFP_KERNEL);
 	if (!ctxt)
 		return -ENOMEM;
-- 
cgit v1.2.3-59-g8ed1b


From b5c7f857e5c98c21271678992fb2634df38292f5 Mon Sep 17 00:00:00 2001
From: Evan Swanson <evan.swanson@intel.com>
Date: Fri, 15 May 2020 17:42:14 -0700
Subject: ice: Handle critical FW error during admin queue initialization

A race condition between FW and SW can occur between admin queue setup and
the first command sent. A link event may occur and FW attempts to notify a
non-existent queue. FW will set the critical error bit and disable the
queue. When this happens retry queue setup.

Signed-off-by: Evan Swanson <evan.swanson@intel.com>
Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_controlq.c   | 126 ++++++++++++++----------
 drivers/net/ethernet/intel/ice/ice_controlq.h   |   3 +
 drivers/net/ethernet/intel/ice/ice_hw_autogen.h |   2 +
 drivers/net/ethernet/intel/ice/ice_main.c       |   2 +
 drivers/net/ethernet/intel/ice/ice_status.h     |   1 +
 5 files changed, 80 insertions(+), 54 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c
index 62c2c1e621d2..479a74efc536 100644
--- a/drivers/net/ethernet/intel/ice/ice_controlq.c
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.c
@@ -12,6 +12,7 @@ do {								\
 	(qinfo)->sq.bal = prefix##_ATQBAL;			\
 	(qinfo)->sq.len_mask = prefix##_ATQLEN_ATQLEN_M;	\
 	(qinfo)->sq.len_ena_mask = prefix##_ATQLEN_ATQENABLE_M;	\
+	(qinfo)->sq.len_crit_mask = prefix##_ATQLEN_ATQCRIT_M;	\
 	(qinfo)->sq.head_mask = prefix##_ATQH_ATQH_M;		\
 	(qinfo)->rq.head = prefix##_ARQH;			\
 	(qinfo)->rq.tail = prefix##_ARQT;			\
@@ -20,6 +21,7 @@ do {								\
 	(qinfo)->rq.bal = prefix##_ARQBAL;			\
 	(qinfo)->rq.len_mask = prefix##_ARQLEN_ARQLEN_M;	\
 	(qinfo)->rq.len_ena_mask = prefix##_ARQLEN_ARQENABLE_M;	\
+	(qinfo)->rq.len_crit_mask = prefix##_ARQLEN_ARQCRIT_M;	\
 	(qinfo)->rq.head_mask = prefix##_ARQH_ARQH_M;		\
 } while (0)
 
@@ -641,6 +643,50 @@ init_ctrlq_free_sq:
 	return ret_code;
 }
 
+/**
+ * ice_shutdown_ctrlq - shutdown routine for any control queue
+ * @hw: pointer to the hardware structure
+ * @q_type: specific Control queue type
+ *
+ * NOTE: this function does not destroy the control queue locks.
+ */
+static void ice_shutdown_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type)
+{
+	struct ice_ctl_q_info *cq;
+
+	switch (q_type) {
+	case ICE_CTL_Q_ADMIN:
+		cq = &hw->adminq;
+		if (ice_check_sq_alive(hw, cq))
+			ice_aq_q_shutdown(hw, true);
+		break;
+	case ICE_CTL_Q_MAILBOX:
+		cq = &hw->mailboxq;
+		break;
+	default:
+		return;
+	}
+
+	ice_shutdown_sq(hw, cq);
+	ice_shutdown_rq(hw, cq);
+}
+
+/**
+ * ice_shutdown_all_ctrlq - shutdown routine for all control queues
+ * @hw: pointer to the hardware structure
+ *
+ * NOTE: this function does not destroy the control queue locks. The driver
+ * may call this at runtime to shutdown and later restart control queues, such
+ * as in response to a reset event.
+ */
+void ice_shutdown_all_ctrlq(struct ice_hw *hw)
+{
+	/* Shutdown FW admin queue */
+	ice_shutdown_ctrlq(hw, ICE_CTL_Q_ADMIN);
+	/* Shutdown PF-VF Mailbox */
+	ice_shutdown_ctrlq(hw, ICE_CTL_Q_MAILBOX);
+}
+
 /**
  * ice_init_all_ctrlq - main initialization routine for all control queues
  * @hw: pointer to the hardware structure
@@ -656,17 +702,27 @@ init_ctrlq_free_sq:
  */
 enum ice_status ice_init_all_ctrlq(struct ice_hw *hw)
 {
-	enum ice_status ret_code;
+	enum ice_status status;
+	u32 retry = 0;
 
 	/* Init FW admin queue */
-	ret_code = ice_init_ctrlq(hw, ICE_CTL_Q_ADMIN);
-	if (ret_code)
-		return ret_code;
+	do {
+		status = ice_init_ctrlq(hw, ICE_CTL_Q_ADMIN);
+		if (status)
+			return status;
 
-	ret_code = ice_init_check_adminq(hw);
-	if (ret_code)
-		return ret_code;
+		status = ice_init_check_adminq(hw);
+		if (status != ICE_ERR_AQ_FW_CRITICAL)
+			break;
 
+		ice_debug(hw, ICE_DBG_AQ_MSG,
+			  "Retry Admin Queue init due to FW critical error\n");
+		ice_shutdown_ctrlq(hw, ICE_CTL_Q_ADMIN);
+		msleep(ICE_CTL_Q_ADMIN_INIT_MSEC);
+	} while (retry++ < ICE_CTL_Q_ADMIN_INIT_TIMEOUT);
+
+	if (status)
+		return status;
 	/* Init Mailbox queue */
 	return ice_init_ctrlq(hw, ICE_CTL_Q_MAILBOX);
 }
@@ -707,50 +763,6 @@ enum ice_status ice_create_all_ctrlq(struct ice_hw *hw)
 	return ice_init_all_ctrlq(hw);
 }
 
-/**
- * ice_shutdown_ctrlq - shutdown routine for any control queue
- * @hw: pointer to the hardware structure
- * @q_type: specific Control queue type
- *
- * NOTE: this function does not destroy the control queue locks.
- */
-static void ice_shutdown_ctrlq(struct ice_hw *hw, enum ice_ctl_q q_type)
-{
-	struct ice_ctl_q_info *cq;
-
-	switch (q_type) {
-	case ICE_CTL_Q_ADMIN:
-		cq = &hw->adminq;
-		if (ice_check_sq_alive(hw, cq))
-			ice_aq_q_shutdown(hw, true);
-		break;
-	case ICE_CTL_Q_MAILBOX:
-		cq = &hw->mailboxq;
-		break;
-	default:
-		return;
-	}
-
-	ice_shutdown_sq(hw, cq);
-	ice_shutdown_rq(hw, cq);
-}
-
-/**
- * ice_shutdown_all_ctrlq - shutdown routine for all control queues
- * @hw: pointer to the hardware structure
- *
- * NOTE: this function does not destroy the control queue locks. The driver
- * may call this at runtime to shutdown and later restart control queues, such
- * as in response to a reset event.
- */
-void ice_shutdown_all_ctrlq(struct ice_hw *hw)
-{
-	/* Shutdown FW admin queue */
-	ice_shutdown_ctrlq(hw, ICE_CTL_Q_ADMIN);
-	/* Shutdown PF-VF Mailbox */
-	ice_shutdown_ctrlq(hw, ICE_CTL_Q_MAILBOX);
-}
-
 /**
  * ice_destroy_ctrlq_locks - Destroy locks for a control queue
  * @cq: pointer to the control queue
@@ -1049,9 +1061,15 @@ ice_sq_send_cmd(struct ice_hw *hw, struct ice_ctl_q_info *cq,
 
 	/* update the error if time out occurred */
 	if (!cmd_completed) {
-		ice_debug(hw, ICE_DBG_AQ_MSG,
-			  "Control Send Queue Writeback timeout.\n");
-		status = ICE_ERR_AQ_TIMEOUT;
+		if (rd32(hw, cq->rq.len) & cq->rq.len_crit_mask ||
+		    rd32(hw, cq->sq.len) & cq->sq.len_crit_mask) {
+			ice_debug(hw, ICE_DBG_AQ_MSG, "Critical FW error.\n");
+			status = ICE_ERR_AQ_FW_CRITICAL;
+		} else {
+			ice_debug(hw, ICE_DBG_AQ_MSG,
+				  "Control Send Queue Writeback timeout.\n");
+			status = ICE_ERR_AQ_TIMEOUT;
+		}
 	}
 
 sq_send_command_error:
diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.h b/drivers/net/ethernet/intel/ice/ice_controlq.h
index bf0ebe6149e8..faaa08e8171b 100644
--- a/drivers/net/ethernet/intel/ice/ice_controlq.h
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.h
@@ -34,6 +34,8 @@ enum ice_ctl_q {
 /* Control Queue timeout settings - max delay 250ms */
 #define ICE_CTL_Q_SQ_CMD_TIMEOUT	2500  /* Count 2500 times */
 #define ICE_CTL_Q_SQ_CMD_USEC		100   /* Check every 100usec */
+#define ICE_CTL_Q_ADMIN_INIT_TIMEOUT	10    /* Count 10 times */
+#define ICE_CTL_Q_ADMIN_INIT_MSEC	100   /* Check every 100msec */
 
 struct ice_ctl_q_ring {
 	void *dma_head;			/* Virtual address to DMA head */
@@ -59,6 +61,7 @@ struct ice_ctl_q_ring {
 	u32 bal;
 	u32 len_mask;
 	u32 len_ena_mask;
+	u32 len_crit_mask;
 	u32 head_mask;
 };
 
diff --git a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
index 2f1c776747a4..1086c9f778b4 100644
--- a/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
+++ b/drivers/net/ethernet/intel/ice/ice_hw_autogen.h
@@ -39,6 +39,7 @@
 #define PF_MBX_ARQH_ARQH_M			ICE_M(0x3FF, 0)
 #define PF_MBX_ARQLEN				0x0022E480
 #define PF_MBX_ARQLEN_ARQLEN_M			ICE_M(0x3FF, 0)
+#define PF_MBX_ARQLEN_ARQCRIT_M			BIT(30)
 #define PF_MBX_ARQLEN_ARQENABLE_M		BIT(31)
 #define PF_MBX_ARQT				0x0022E580
 #define PF_MBX_ATQBAH				0x0022E180
@@ -47,6 +48,7 @@
 #define PF_MBX_ATQH_ATQH_M			ICE_M(0x3FF, 0)
 #define PF_MBX_ATQLEN				0x0022E200
 #define PF_MBX_ATQLEN_ATQLEN_M			ICE_M(0x3FF, 0)
+#define PF_MBX_ATQLEN_ATQCRIT_M			BIT(30)
 #define PF_MBX_ATQLEN_ATQENABLE_M		BIT(31)
 #define PF_MBX_ATQT				0x0022E300
 #define PRTDCB_GENC				0x00083000
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 93a42ff7496b..247e7b186b3c 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -5207,6 +5207,8 @@ const char *ice_stat_str(enum ice_status stat_err)
 		return "ICE_ERR_AQ_NO_WORK";
 	case ICE_ERR_AQ_EMPTY:
 		return "ICE_ERR_AQ_EMPTY";
+	case ICE_ERR_AQ_FW_CRITICAL:
+		return "ICE_ERR_AQ_FW_CRITICAL";
 	}
 
 	return "ICE_ERR_UNKNOWN";
diff --git a/drivers/net/ethernet/intel/ice/ice_status.h b/drivers/net/ethernet/intel/ice/ice_status.h
index 546a02856d09..4028c6365172 100644
--- a/drivers/net/ethernet/intel/ice/ice_status.h
+++ b/drivers/net/ethernet/intel/ice/ice_status.h
@@ -37,6 +37,7 @@ enum ice_status {
 	ICE_ERR_AQ_FULL				= -102,
 	ICE_ERR_AQ_NO_WORK			= -103,
 	ICE_ERR_AQ_EMPTY			= -104,
+	ICE_ERR_AQ_FW_CRITICAL			= -105,
 };
 
 #endif /* _ICE_STATUS_H_ */
-- 
cgit v1.2.3-59-g8ed1b


From c8f135c6ee7851ad72bd4d877216950fcbd45fb6 Mon Sep 17 00:00:00 2001
From: Marta Plantykow <marta.a.plantykow@intel.com>
Date: Fri, 15 May 2020 17:42:15 -0700
Subject: ice: Change number of XDP TxQ to 0 when destroying rings

When XDP Tx rings are destroyed the number of XDP Tx queues
is not changing. This patch is changing this number to 0.

Signed-off-by: Marta Plantykow <marta.a.plantykow@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_main.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 247e7b186b3c..081fec3131cd 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -1899,6 +1899,9 @@ free_qmap:
 	for (i = 0; i < vsi->tc_cfg.numtc; i++)
 		max_txqs[i] = vsi->num_txq;
 
+	/* change number of XDP Tx queues to 0 */
+	vsi->num_xdp_txq = 0;
+
 	return ice_cfg_vsi_lan(vsi->port_info, vsi->idx, vsi->tc_cfg.ena_tc,
 			       max_txqs);
 }
-- 
cgit v1.2.3-59-g8ed1b


From 49d358e0e746dc24bfb1b1cf98c17064e5177424 Mon Sep 17 00:00:00 2001
From: Marta Plantykow <marta.a.plantykow@intel.com>
Date: Fri, 15 May 2020 17:42:16 -0700
Subject: ice: Add XDP Tx to VSI ring stats

When XDP Tx program is loaded and packets are sent from
interface, VSI statistics are not updated. This patch adds
packets sent on Tx XDP ring to VSI ring stats.

Signed-off-by: Marta Plantykow <marta.a.plantykow@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_main.c | 42 ++++++++++++++++++++++++-------
 1 file changed, 33 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 081fec3131cd..81c5f0ce5b8f 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -4219,6 +4219,33 @@ ice_fetch_u64_stats_per_ring(struct ice_ring *ring, u64 *pkts, u64 *bytes)
 	} while (u64_stats_fetch_retry_irq(&ring->syncp, start));
 }
 
+/**
+ * ice_update_vsi_tx_ring_stats - Update VSI Tx ring stats counters
+ * @vsi: the VSI to be updated
+ * @rings: rings to work on
+ * @count: number of rings
+ */
+static void
+ice_update_vsi_tx_ring_stats(struct ice_vsi *vsi, struct ice_ring **rings,
+			     u16 count)
+{
+	struct rtnl_link_stats64 *vsi_stats = &vsi->net_stats;
+	u16 i;
+
+	for (i = 0; i < count; i++) {
+		struct ice_ring *ring;
+		u64 pkts, bytes;
+
+		ring = READ_ONCE(rings[i]);
+		ice_fetch_u64_stats_per_ring(ring, &pkts, &bytes);
+		vsi_stats->tx_packets += pkts;
+		vsi_stats->tx_bytes += bytes;
+		vsi->tx_restart += ring->tx_stats.restart_q;
+		vsi->tx_busy += ring->tx_stats.tx_busy;
+		vsi->tx_linearize += ring->tx_stats.tx_linearize;
+	}
+}
+
 /**
  * ice_update_vsi_ring_stats - Update VSI stats counters
  * @vsi: the VSI to be updated
@@ -4246,15 +4273,7 @@ static void ice_update_vsi_ring_stats(struct ice_vsi *vsi)
 	rcu_read_lock();
 
 	/* update Tx rings counters */
-	ice_for_each_txq(vsi, i) {
-		ring = READ_ONCE(vsi->tx_rings[i]);
-		ice_fetch_u64_stats_per_ring(ring, &pkts, &bytes);
-		vsi_stats->tx_packets += pkts;
-		vsi_stats->tx_bytes += bytes;
-		vsi->tx_restart += ring->tx_stats.restart_q;
-		vsi->tx_busy += ring->tx_stats.tx_busy;
-		vsi->tx_linearize += ring->tx_stats.tx_linearize;
-	}
+	ice_update_vsi_tx_ring_stats(vsi, vsi->tx_rings, vsi->num_txq);
 
 	/* update Rx rings counters */
 	ice_for_each_rxq(vsi, i) {
@@ -4266,6 +4285,11 @@ static void ice_update_vsi_ring_stats(struct ice_vsi *vsi)
 		vsi->rx_page_failed += ring->rx_stats.alloc_page_failed;
 	}
 
+	/* update XDP Tx rings counters */
+	if (ice_is_xdp_ena_vsi(vsi))
+		ice_update_vsi_tx_ring_stats(vsi, vsi->xdp_rings,
+					     vsi->num_xdp_txq);
+
 	rcu_read_unlock();
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From ae15e0ba1b333f391ab0d678abb752cb6a7f2782 Mon Sep 17 00:00:00 2001
From: Marta Plantykow <marta.a.plantykow@intel.com>
Date: Fri, 15 May 2020 17:42:17 -0700
Subject: ice: Change number of XDP Tx queues to match number of Rx queues

In current implementation number of XDP Tx queues is the same as
the number of transmit queues, which is not always true. This
patch changes this number to match the number of receive queues.
XDP programs are running on Rx rings, so what we actually need to
provide is the XDP Tx ring per each Rx ring so that the whole XDP
ecosystem is functional, e.g. if the result of XDP prog is XDP_TX
then you have the need to access the XDP Tx ring.

Signed-off-by: Marta Plantykow <marta.a.plantykow@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_lib.c  | 2 +-
 drivers/net/ethernet/intel/ice/ice_main.c | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 89962c14e31f..6f3ee8ac11ce 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -2785,7 +2785,7 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi)
 
 		ice_vsi_map_rings_to_vectors(vsi);
 		if (ice_is_xdp_ena_vsi(vsi)) {
-			vsi->num_xdp_txq = vsi->alloc_txq;
+			vsi->num_xdp_txq = vsi->alloc_rxq;
 			ret = ice_prepare_xdp_rings(vsi, vsi->xdp_prog);
 			if (ret)
 				goto err_vectors;
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 81c5f0ce5b8f..b64c4e796636 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -1935,7 +1935,7 @@ ice_xdp_setup_prog(struct ice_vsi *vsi, struct bpf_prog *prog,
 	}
 
 	if (!ice_is_xdp_ena_vsi(vsi) && prog) {
-		vsi->num_xdp_txq = vsi->alloc_txq;
+		vsi->num_xdp_txq = vsi->alloc_rxq;
 		xdp_ring_err = ice_prepare_xdp_rings(vsi, prog);
 		if (xdp_ring_err)
 			NL_SET_ERR_MSG_MOD(extack, "Setting up XDP Tx resources failed");
-- 
cgit v1.2.3-59-g8ed1b


From 7e34786a74e14038faadabb24b0d7f4436961c6b Mon Sep 17 00:00:00 2001
From: Bruce Allan <bruce.w.allan@intel.com>
Date: Fri, 15 May 2020 17:42:18 -0700
Subject: ice: avoid undefined behavior

When writing the driver's struct ice_tlan_ctx structure, do not write the
8-bit element int_q_state with the associated internal-to-hardware field
which is 122-bits, otherwise the helper function ice_write_byte() will use
undefined behavior when setting the mask used for that write.  This should
not cause any functional change and will avoid use of undefined behavior.
Also, update a comment to highlight this structure element is not written.

Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_base.c      |  5 +++--
 drivers/net/ethernet/intel/ice/ice_common.c    | 12 ++++++++++--
 drivers/net/ethernet/intel/ice/ice_common.h    |  3 ++-
 drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h |  2 +-
 4 files changed, 16 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
index 9452c0eb70b0..18076e0d12d0 100644
--- a/drivers/net/ethernet/intel/ice/ice_base.c
+++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@ -638,6 +638,7 @@ ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_ring *ring,
 	struct ice_aqc_add_txqs_perq *txq;
 	struct ice_pf *pf = vsi->back;
 	u8 buf_len = sizeof(*qg_buf);
+	struct ice_hw *hw = &pf->hw;
 	enum ice_status status;
 	u16 pf_q;
 	u8 tc;
@@ -646,13 +647,13 @@ ice_vsi_cfg_txq(struct ice_vsi *vsi, struct ice_ring *ring,
 	ice_setup_tx_ctx(ring, &tlan_ctx, pf_q);
 	/* copy context contents into the qg_buf */
 	qg_buf->txqs[0].txq_id = cpu_to_le16(pf_q);
-	ice_set_ctx((u8 *)&tlan_ctx, qg_buf->txqs[0].txq_ctx,
+	ice_set_ctx(hw, (u8 *)&tlan_ctx, qg_buf->txqs[0].txq_ctx,
 		    ice_tlan_ctx_info);
 
 	/* init queue specific tail reg. It is referred as
 	 * transmit comm scheduler queue doorbell.
 	 */
-	ring->tail = pf->hw.hw_addr + QTX_COMM_DBELL(pf_q);
+	ring->tail = hw->hw_addr + QTX_COMM_DBELL(pf_q);
 
 	if (IS_ENABLED(CONFIG_DCB))
 		tc = ring->dcb_tc;
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index ee62cfa3a69e..8c73e161829d 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -1098,7 +1098,7 @@ ice_write_rxq_ctx(struct ice_hw *hw, struct ice_rlan_ctx *rlan_ctx,
 
 	rlan_ctx->prefena = 1;
 
-	ice_set_ctx((u8 *)rlan_ctx, ctx_buf, ice_rlan_ctx_info);
+	ice_set_ctx(hw, (u8 *)rlan_ctx, ctx_buf, ice_rlan_ctx_info);
 	return ice_copy_rxq_ctx_to_hw(hw, ctx_buf, rxq_index);
 }
 
@@ -3199,12 +3199,14 @@ ice_write_qword(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info)
 
 /**
  * ice_set_ctx - set context bits in packed structure
+ * @hw: pointer to the hardware structure
  * @src_ctx:  pointer to a generic non-packed context structure
  * @dest_ctx: pointer to memory for the packed structure
  * @ce_info:  a description of the structure to be transformed
  */
 enum ice_status
-ice_set_ctx(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info)
+ice_set_ctx(struct ice_hw *hw, u8 *src_ctx, u8 *dest_ctx,
+	    const struct ice_ctx_ele *ce_info)
 {
 	int f;
 
@@ -3213,6 +3215,12 @@ ice_set_ctx(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info)
 		 * using the correct size so that we are correct regardless
 		 * of the endianness of the machine.
 		 */
+		if (ce_info[f].width > (ce_info[f].size_of * BITS_PER_BYTE)) {
+			ice_debug(hw, ICE_DBG_QCTX,
+				  "Field %d width of %d bits larger than size of %d byte(s) ... skipping write\n",
+				  f, ce_info[f].width, ce_info[f].size_of);
+			continue;
+		}
 		switch (ce_info[f].size_of) {
 		case sizeof(u8):
 			ice_write_byte(src_ctx, dest_ctx, &ce_info[f]);
diff --git a/drivers/net/ethernet/intel/ice/ice_common.h b/drivers/net/ethernet/intel/ice/ice_common.h
index bea755a658eb..9b9e50d2398b 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.h
+++ b/drivers/net/ethernet/intel/ice/ice_common.h
@@ -70,7 +70,8 @@ enum ice_status ice_aq_q_shutdown(struct ice_hw *hw, bool unloading);
 void ice_fill_dflt_direct_cmd_desc(struct ice_aq_desc *desc, u16 opcode);
 extern const struct ice_ctx_ele ice_tlan_ctx_info[];
 enum ice_status
-ice_set_ctx(u8 *src_ctx, u8 *dest_ctx, const struct ice_ctx_ele *ce_info);
+ice_set_ctx(struct ice_hw *hw, u8 *src_ctx, u8 *dest_ctx,
+	    const struct ice_ctx_ele *ce_info);
 
 extern struct mutex ice_global_cfg_lock_sw;
 
diff --git a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
index bd2cd3435768..14dfbbc1b2cf 100644
--- a/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
+++ b/drivers/net/ethernet/intel/ice/ice_lan_tx_rx.h
@@ -581,7 +581,7 @@ struct ice_tlan_ctx {
 	u8 drop_ena;
 	u8 cache_prof_idx;
 	u8 pkt_shaper_prof_idx;
-	u8 int_q_state;	/* width not needed - internal do not write */
+	u8 int_q_state;	/* width not needed - internal - DO NOT WRITE!!! */
 };
 
 /* macro to make the table lines short */
-- 
cgit v1.2.3-59-g8ed1b


From 13f90b393f7338e6a2b2646fa1b677cc8b50cd23 Mon Sep 17 00:00:00 2001
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Date: Fri, 15 May 2020 17:42:19 -0700
Subject: ice: Refactor Rx checksum checks

We don't need both rx_status and rx_error parameters, as the latter is
a subset of the former. Remove rx_error completely and check the right bit
in rx_status.

Rename rx_status to rx_status0, and rx_status_err1 to
rx_status1. This naming more closely reflects the specification.

Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_txrx_lib.c | 27 +++++++++++----------------
 1 file changed, 11 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
index 1ba97172d8d0..ab2031b1c635 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
@@ -84,17 +84,12 @@ ice_rx_csum(struct ice_ring *ring, struct sk_buff *skb,
 	    union ice_32b_rx_flex_desc *rx_desc, u8 ptype)
 {
 	struct ice_rx_ptype_decoded decoded;
-	u16 rx_error, rx_status;
-	u16 rx_stat_err1;
+	u16 rx_status0, rx_status1;
 	bool ipv4, ipv6;
 
-	rx_status = le16_to_cpu(rx_desc->wb.status_error0);
-	rx_error = rx_status & (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) |
-				BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S) |
-				BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S) |
-				BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S));
+	rx_status0 = le16_to_cpu(rx_desc->wb.status_error0);
+	rx_status1 = le16_to_cpu(rx_desc->wb.status_error1);
 
-	rx_stat_err1 = le16_to_cpu(rx_desc->wb.status_error1);
 	decoded = ice_decode_rx_desc_ptype(ptype);
 
 	/* Start with CHECKSUM_NONE and by default csum_level = 0 */
@@ -106,7 +101,7 @@ ice_rx_csum(struct ice_ring *ring, struct sk_buff *skb,
 		return;
 
 	/* check if HW has decoded the packet and checksum */
-	if (!(rx_status & BIT(ICE_RX_FLEX_DESC_STATUS0_L3L4P_S)))
+	if (!(rx_status0 & BIT(ICE_RX_FLEX_DESC_STATUS0_L3L4P_S)))
 		return;
 
 	if (!(decoded.known && decoded.outer_ip))
@@ -117,22 +112,22 @@ ice_rx_csum(struct ice_ring *ring, struct sk_buff *skb,
 	ipv6 = (decoded.outer_ip == ICE_RX_PTYPE_OUTER_IP) &&
 	       (decoded.outer_ip_ver == ICE_RX_PTYPE_OUTER_IPV6);
 
-	if (ipv4 && (rx_error & (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) |
-				 BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S))))
+	if (ipv4 && (rx_status0 & (BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_IPE_S) |
+				   BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EIPE_S))))
 		goto checksum_fail;
-	else if (ipv6 && (rx_status &
-		 (BIT(ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S))))
+
+	if (ipv6 && (rx_status0 & (BIT(ICE_RX_FLEX_DESC_STATUS0_IPV6EXADD_S))))
 		goto checksum_fail;
 
 	/* check for L4 errors and handle packets that were not able to be
 	 * checksummed due to arrival speed
 	 */
-	if (rx_error & BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S))
+	if (rx_status0 & BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_L4E_S))
 		goto checksum_fail;
 
 	/* check for outer UDP checksum error in tunneled packets */
-	if ((rx_stat_err1 & BIT(ICE_RX_FLEX_DESC_STATUS1_NAT_S)) &&
-	    (rx_error & BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S)))
+	if ((rx_status1 & BIT(ICE_RX_FLEX_DESC_STATUS1_NAT_S)) &&
+	    (rx_status0 & BIT(ICE_RX_FLEX_DESC_STATUS0_XSUM_EUDPE_S)))
 		goto checksum_fail;
 
 	/* If there is an outer header present that might contain a checksum
-- 
cgit v1.2.3-59-g8ed1b


From ea651a86d46895a8b342664db66c3dee3412ad34 Mon Sep 17 00:00:00 2001
From: Vu Pham <vuhuong@mellanox.com>
Date: Wed, 6 Nov 2019 09:57:12 -0800
Subject: net/mlx5: E-Switch, Refactor eswitch egress acl codes

Refactor the egress acl codes so that offloads and legacy modes
can configure specifically their own needs of egress acl table,
groups and rules. While at it, restructure the eswitch egress
acl codes into eswitch directory and different files:
. Acl egress helper functions to acl_helper.c/h
. Acl egress functions used in offloads mode to acl_egress_ofld.c
. Acl egress functions used in legacy mode to acl_egress_lgy.c

This patch does not change any functionality.

Signed-off-by: Vu Pham <vuhuong@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   3 +
 .../mellanox/mlx5/core/esw/acl/egress_lgcy.c       | 170 +++++++++++++++
 .../mellanox/mlx5/core/esw/acl/egress_ofld.c       |  88 ++++++++
 .../ethernet/mellanox/mlx5/core/esw/acl/helper.c   | 142 +++++++++++++
 .../ethernet/mellanox/mlx5/core/esw/acl/helper.h   |  22 ++
 .../net/ethernet/mellanox/mlx5/core/esw/acl/lgcy.h |  13 ++
 .../net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h |  13 ++
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c  | 235 +--------------------
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |  15 +-
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c |  36 +---
 10 files changed, 462 insertions(+), 275 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.h
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/acl/lgcy.h
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index e5ee9103fefb..ad046b2ea4f9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -46,6 +46,9 @@ mlx5_core-$(CONFIG_MLX5_TC_CT)	     += en/tc_ct.o
 #
 mlx5_core-$(CONFIG_MLX5_ESWITCH)   += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \
 				      ecpf.o rdma.o
+mlx5_core-$(CONFIG_MLX5_ESWITCH)   += esw/acl/helper.o \
+				      esw/acl/egress_lgcy.o esw/acl/egress_ofld.o
+
 mlx5_core-$(CONFIG_MLX5_MPFS)      += lib/mpfs.o
 mlx5_core-$(CONFIG_VXLAN)          += lib/vxlan.o
 mlx5_core-$(CONFIG_PTP_1588_CLOCK) += lib/clock.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c
new file mode 100644
index 000000000000..d46f8b225ebe
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_lgcy.c
@@ -0,0 +1,170 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */
+
+#include "mlx5_core.h"
+#include "eswitch.h"
+#include "helper.h"
+#include "lgcy.h"
+
+static void esw_acl_egress_lgcy_rules_destroy(struct mlx5_vport *vport)
+{
+	esw_acl_egress_vlan_destroy(vport);
+	if (!IS_ERR_OR_NULL(vport->egress.legacy.drop_rule)) {
+		mlx5_del_flow_rules(vport->egress.legacy.drop_rule);
+		vport->egress.legacy.drop_rule = NULL;
+	}
+}
+
+static int esw_acl_egress_lgcy_groups_create(struct mlx5_eswitch *esw,
+					     struct mlx5_vport *vport)
+{
+	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+	struct mlx5_core_dev *dev = esw->dev;
+	struct mlx5_flow_group *drop_grp;
+	u32 *flow_group_in;
+	int err = 0;
+
+	err = esw_acl_egress_vlan_grp_create(esw, vport);
+	if (err)
+		return err;
+
+	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+	if (!flow_group_in) {
+		err = -ENOMEM;
+		goto alloc_err;
+	}
+
+	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
+	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
+	drop_grp = mlx5_create_flow_group(vport->egress.acl, flow_group_in);
+	if (IS_ERR(drop_grp)) {
+		err = PTR_ERR(drop_grp);
+		esw_warn(dev, "Failed to create E-Switch vport[%d] egress drop flow group, err(%d)\n",
+			 vport->vport, err);
+		goto drop_grp_err;
+	}
+
+	vport->egress.legacy.drop_grp = drop_grp;
+	kvfree(flow_group_in);
+	return 0;
+
+drop_grp_err:
+	kvfree(flow_group_in);
+alloc_err:
+	esw_acl_egress_vlan_grp_destroy(vport);
+	return err;
+}
+
+static void esw_acl_egress_lgcy_groups_destroy(struct mlx5_vport *vport)
+{
+	if (!IS_ERR_OR_NULL(vport->egress.legacy.drop_grp)) {
+		mlx5_destroy_flow_group(vport->egress.legacy.drop_grp);
+		vport->egress.legacy.drop_grp = NULL;
+	}
+	esw_acl_egress_vlan_grp_destroy(vport);
+}
+
+int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw,
+			      struct mlx5_vport *vport)
+{
+	struct mlx5_flow_destination drop_ctr_dst = {};
+	struct mlx5_flow_destination *dst = NULL;
+	struct mlx5_fc *drop_counter = NULL;
+	struct mlx5_flow_act flow_act = {};
+	/* The egress acl table contains 2 rules:
+	 * 1)Allow traffic with vlan_tag=vst_vlan_id
+	 * 2)Drop all other traffic.
+	 */
+	int table_size = 2;
+	int dest_num = 0;
+	int err = 0;
+
+	if (MLX5_CAP_ESW_EGRESS_ACL(esw->dev, flow_counter)) {
+		drop_counter = mlx5_fc_create(esw->dev, false);
+		if (IS_ERR(drop_counter))
+			esw_warn(esw->dev,
+				 "vport[%d] configure egress drop rule counter err(%ld)\n",
+				 vport->vport, PTR_ERR(drop_counter));
+		vport->egress.legacy.drop_counter = drop_counter;
+	}
+
+	esw_acl_egress_lgcy_rules_destroy(vport);
+
+	if (!vport->info.vlan && !vport->info.qos) {
+		esw_acl_egress_lgcy_cleanup(esw, vport);
+		return 0;
+	}
+
+	if (!IS_ERR_OR_NULL(vport->egress.acl))
+		return 0;
+
+	vport->egress.acl = esw_acl_table_create(esw, vport->vport,
+						 MLX5_FLOW_NAMESPACE_ESW_EGRESS,
+						 table_size);
+	if (IS_ERR_OR_NULL(vport->egress.acl)) {
+		err = PTR_ERR(vport->egress.acl);
+		vport->egress.acl = NULL;
+		goto out;
+	}
+
+	err = esw_acl_egress_lgcy_groups_create(esw, vport);
+	if (err)
+		goto out;
+
+	esw_debug(esw->dev,
+		  "vport[%d] configure egress rules, vlan(%d) qos(%d)\n",
+		  vport->vport, vport->info.vlan, vport->info.qos);
+
+	/* Allowed vlan rule */
+	err = esw_egress_acl_vlan_create(esw, vport, NULL, vport->info.vlan,
+					 MLX5_FLOW_CONTEXT_ACTION_ALLOW);
+	if (err)
+		goto out;
+
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
+
+	/* Attach egress drop flow counter */
+	if (!IS_ERR_OR_NULL(drop_counter)) {
+		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+		drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+		drop_ctr_dst.counter_id = mlx5_fc_id(drop_counter);
+		dst = &drop_ctr_dst;
+		dest_num++;
+	}
+	vport->egress.legacy.drop_rule =
+		mlx5_add_flow_rules(vport->egress.acl, NULL,
+				    &flow_act, dst, dest_num);
+	if (IS_ERR(vport->egress.legacy.drop_rule)) {
+		err = PTR_ERR(vport->egress.legacy.drop_rule);
+		esw_warn(esw->dev,
+			 "vport[%d] configure egress drop rule failed, err(%d)\n",
+			 vport->vport, err);
+		vport->egress.legacy.drop_rule = NULL;
+		goto out;
+	}
+
+	return err;
+
+out:
+	esw_acl_egress_lgcy_cleanup(esw, vport);
+	return err;
+}
+
+void esw_acl_egress_lgcy_cleanup(struct mlx5_eswitch *esw,
+				 struct mlx5_vport *vport)
+{
+	if (IS_ERR_OR_NULL(vport->egress.acl))
+		goto clean_drop_counter;
+
+	esw_debug(esw->dev, "Destroy vport[%d] E-Switch egress ACL\n", vport->vport);
+
+	esw_acl_egress_lgcy_rules_destroy(vport);
+	esw_acl_egress_lgcy_groups_destroy(vport);
+	esw_acl_egress_table_destroy(vport);
+
+clean_drop_counter:
+	if (!IS_ERR_OR_NULL(vport->egress.legacy.drop_counter)) {
+		mlx5_fc_destroy(esw->dev, vport->egress.legacy.drop_counter);
+		vport->egress.legacy.drop_counter = NULL;
+	}
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c
new file mode 100644
index 000000000000..49a53ebf56dd
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c
@@ -0,0 +1,88 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */
+
+#include "mlx5_core.h"
+#include "eswitch.h"
+#include "helper.h"
+#include "ofld.h"
+
+static int esw_acl_egress_ofld_rules_create(struct mlx5_eswitch *esw,
+					    struct mlx5_vport *vport)
+{
+	if (!MLX5_CAP_GEN(esw->dev, prio_tag_required))
+		return 0;
+
+	/* For prio tag mode, there is only 1 FTEs:
+	 * 1) prio tag packets - pop the prio tag VLAN, allow
+	 * Unmatched traffic is allowed by default
+	 */
+	esw_debug(esw->dev,
+		  "vport[%d] configure prio tag egress rules\n", vport->vport);
+
+	/* prio tag vlan rule - pop it so vport receives untagged packets */
+	return esw_egress_acl_vlan_create(esw, vport, NULL, 0,
+					  MLX5_FLOW_CONTEXT_ACTION_VLAN_POP |
+					  MLX5_FLOW_CONTEXT_ACTION_ALLOW);
+}
+
+static void esw_acl_egress_ofld_rules_destroy(struct mlx5_vport *vport)
+{
+	esw_acl_egress_vlan_destroy(vport);
+}
+
+static int esw_acl_egress_ofld_groups_create(struct mlx5_eswitch *esw,
+					     struct mlx5_vport *vport)
+{
+	if (!MLX5_CAP_GEN(esw->dev, prio_tag_required))
+		return 0;
+
+	return esw_acl_egress_vlan_grp_create(esw, vport);
+}
+
+static void esw_acl_egress_ofld_groups_destroy(struct mlx5_vport *vport)
+{
+	esw_acl_egress_vlan_grp_destroy(vport);
+}
+
+int esw_acl_egress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
+{
+	int err;
+
+	if (!MLX5_CAP_GEN(esw->dev, prio_tag_required))
+		return 0;
+
+	esw_acl_egress_ofld_rules_destroy(vport);
+
+	vport->egress.acl = esw_acl_table_create(esw, vport->vport,
+						 MLX5_FLOW_NAMESPACE_ESW_EGRESS, 0);
+	if (IS_ERR_OR_NULL(vport->egress.acl)) {
+		err = PTR_ERR(vport->egress.acl);
+		vport->egress.acl = NULL;
+		return err;
+	}
+
+	err = esw_acl_egress_ofld_groups_create(esw, vport);
+	if (err)
+		goto group_err;
+
+	esw_debug(esw->dev, "vport[%d] configure egress rules\n", vport->vport);
+
+	err = esw_acl_egress_ofld_rules_create(esw, vport);
+	if (err)
+		goto rules_err;
+
+	return 0;
+
+rules_err:
+	esw_acl_egress_ofld_groups_destroy(vport);
+group_err:
+	esw_acl_egress_table_destroy(vport);
+	return err;
+}
+
+void esw_acl_egress_ofld_cleanup(struct mlx5_vport *vport)
+{
+	esw_acl_egress_ofld_rules_destroy(vport);
+	esw_acl_egress_ofld_groups_destroy(vport);
+	esw_acl_egress_table_destroy(vport);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c
new file mode 100644
index 000000000000..8b7996721a7c
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c
@@ -0,0 +1,142 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */
+
+#include "mlx5_core.h"
+#include "eswitch.h"
+#include "helper.h"
+
+struct mlx5_flow_table *
+esw_acl_table_create(struct mlx5_eswitch *esw, u16 vport_num, int ns, int size)
+{
+	struct mlx5_core_dev *dev = esw->dev;
+	struct mlx5_flow_namespace *root_ns;
+	struct mlx5_flow_table *acl;
+	int acl_supported;
+	int vport_index;
+	int err;
+
+	acl_supported = (ns == MLX5_FLOW_NAMESPACE_ESW_INGRESS) ?
+			MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support) :
+			MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support);
+
+	if (!acl_supported)
+		return ERR_PTR(-EOPNOTSUPP);
+
+	esw_debug(dev, "Create vport[%d] %s ACL table\n", vport_num,
+		  ns == MLX5_FLOW_NAMESPACE_ESW_INGRESS ? "ingress" : "egress");
+
+	vport_index = mlx5_eswitch_vport_num_to_index(esw, vport_num);
+	root_ns = mlx5_get_flow_vport_acl_namespace(dev, ns, vport_index);
+	if (!root_ns) {
+		esw_warn(dev, "Failed to get E-Switch root namespace for vport (%d)\n",
+			 vport_num);
+		return ERR_PTR(-EOPNOTSUPP);
+	}
+
+	acl = mlx5_create_vport_flow_table(root_ns, 0, size, 0, vport_num);
+	if (IS_ERR(acl)) {
+		err = PTR_ERR(acl);
+		esw_warn(dev, "vport[%d] create %s ACL table, err(%d)\n", vport_num,
+			 ns == MLX5_FLOW_NAMESPACE_ESW_INGRESS ? "ingress" : "egress", err);
+	}
+	return acl;
+}
+
+int esw_egress_acl_vlan_create(struct mlx5_eswitch *esw,
+			       struct mlx5_vport *vport,
+			       struct mlx5_flow_destination *fwd_dest,
+			       u16 vlan_id, u32 flow_action)
+{
+	struct mlx5_flow_act flow_act = {};
+	struct mlx5_flow_spec *spec;
+	int err = 0;
+
+	if (vport->egress.allowed_vlan)
+		return -EEXIST;
+
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+	if (!spec)
+		return -ENOMEM;
+
+	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag);
+	MLX5_SET_TO_ONES(fte_match_param, spec->match_value, outer_headers.cvlan_tag);
+	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.first_vid);
+	MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vlan_id);
+
+	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+	flow_act.action = flow_action;
+	vport->egress.allowed_vlan =
+		mlx5_add_flow_rules(vport->egress.acl, spec,
+				    &flow_act, fwd_dest, 0);
+	if (IS_ERR(vport->egress.allowed_vlan)) {
+		err = PTR_ERR(vport->egress.allowed_vlan);
+		esw_warn(esw->dev,
+			 "vport[%d] configure egress vlan rule failed, err(%d)\n",
+			 vport->vport, err);
+		vport->egress.allowed_vlan = NULL;
+	}
+
+	kvfree(spec);
+	return err;
+}
+
+void esw_acl_egress_vlan_destroy(struct mlx5_vport *vport)
+{
+	if (!IS_ERR_OR_NULL(vport->egress.allowed_vlan)) {
+		mlx5_del_flow_rules(vport->egress.allowed_vlan);
+		vport->egress.allowed_vlan = NULL;
+	}
+}
+
+int esw_acl_egress_vlan_grp_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
+{
+	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+	struct mlx5_flow_group *vlan_grp;
+	void *match_criteria;
+	u32 *flow_group_in;
+	int ret = 0;
+
+	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+	if (!flow_group_in)
+		return -ENOMEM;
+
+	MLX5_SET(create_flow_group_in, flow_group_in,
+		 match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+	match_criteria = MLX5_ADDR_OF(create_flow_group_in,
+				      flow_group_in, match_criteria);
+	MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag);
+	MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.first_vid);
+	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
+
+	vlan_grp = mlx5_create_flow_group(vport->egress.acl, flow_group_in);
+	if (IS_ERR(vlan_grp)) {
+		ret = PTR_ERR(vlan_grp);
+		esw_warn(esw->dev,
+			 "Failed to create E-Switch vport[%d] egress pop vlans flow group, err(%d)\n",
+			 vport->vport, ret);
+		goto out;
+	}
+	vport->egress.vlan_grp = vlan_grp;
+
+out:
+	kvfree(flow_group_in);
+	return ret;
+}
+
+void esw_acl_egress_vlan_grp_destroy(struct mlx5_vport *vport)
+{
+	if (!IS_ERR_OR_NULL(vport->egress.vlan_grp)) {
+		mlx5_destroy_flow_group(vport->egress.vlan_grp);
+		vport->egress.vlan_grp = NULL;
+	}
+}
+
+void esw_acl_egress_table_destroy(struct mlx5_vport *vport)
+{
+	if (IS_ERR_OR_NULL(vport->egress.acl))
+		return;
+
+	mlx5_destroy_flow_table(vport->egress.acl);
+	vport->egress.acl = NULL;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.h
new file mode 100644
index 000000000000..543372df6196
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.h
@@ -0,0 +1,22 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */
+
+#ifndef __MLX5_ESWITCH_ACL_HELPER_H__
+#define __MLX5_ESWITCH_ACL_HELPER_H__
+
+#include "eswitch.h"
+
+/* General acl helper functions */
+struct mlx5_flow_table *
+esw_acl_table_create(struct mlx5_eswitch *esw, u16 vport_num, int ns, int size);
+
+/* Egress acl helper functions */
+void esw_acl_egress_table_destroy(struct mlx5_vport *vport);
+int esw_egress_acl_vlan_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport,
+			       struct mlx5_flow_destination *fwd_dest,
+			       u16 vlan_id, u32 flow_action);
+void esw_acl_egress_vlan_destroy(struct mlx5_vport *vport);
+int esw_acl_egress_vlan_grp_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
+void esw_acl_egress_vlan_grp_destroy(struct mlx5_vport *vport);
+
+#endif /* __MLX5_ESWITCH_ACL_HELPER_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/lgcy.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/lgcy.h
new file mode 100644
index 000000000000..6b05a3af4462
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/lgcy.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */
+
+#ifndef __MLX5_ESWITCH_ACL_LGCY_H__
+#define __MLX5_ESWITCH_ACL_LGCY_H__
+
+#include "eswitch.h"
+
+/* Eswitch acl egress external APIs */
+int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
+void esw_acl_egress_lgcy_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
+
+#endif /* __MLX5_ESWITCH_ACL_LGCY_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h
new file mode 100644
index 000000000000..fc912b254226
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h
@@ -0,0 +1,13 @@
+/* SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB */
+/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */
+
+#ifndef __MLX5_ESWITCH_ACL_OFLD_H__
+#define __MLX5_ESWITCH_ACL_OFLD_H__
+
+#include "eswitch.h"
+
+/* Eswitch acl egress external APIs */
+int esw_acl_egress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
+void esw_acl_egress_ofld_cleanup(struct mlx5_vport *vport);
+
+#endif /* __MLX5_ESWITCH_ACL_OFLD_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index ac79b7c9aeb3..ae74486b9c9e 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -35,6 +35,7 @@
 #include <linux/mlx5/mlx5_ifc.h>
 #include <linux/mlx5/vport.h>
 #include <linux/mlx5/fs.h>
+#include "esw/acl/lgcy.h"
 #include "mlx5_core.h"
 #include "lib/eq.h"
 #include "eswitch.h"
@@ -936,121 +937,6 @@ static void esw_vport_change_handler(struct work_struct *work)
 	mutex_unlock(&esw->state_lock);
 }
 
-int esw_vport_enable_egress_acl(struct mlx5_eswitch *esw,
-				struct mlx5_vport *vport)
-{
-	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
-	struct mlx5_flow_group *vlan_grp = NULL;
-	struct mlx5_flow_group *drop_grp = NULL;
-	struct mlx5_core_dev *dev = esw->dev;
-	struct mlx5_flow_namespace *root_ns;
-	struct mlx5_flow_table *acl;
-	void *match_criteria;
-	u32 *flow_group_in;
-	/* The egress acl table contains 2 rules:
-	 * 1)Allow traffic with vlan_tag=vst_vlan_id
-	 * 2)Drop all other traffic.
-	 */
-	int table_size = 2;
-	int err = 0;
-
-	if (!MLX5_CAP_ESW_EGRESS_ACL(dev, ft_support))
-		return -EOPNOTSUPP;
-
-	if (!IS_ERR_OR_NULL(vport->egress.acl))
-		return 0;
-
-	esw_debug(dev, "Create vport[%d] egress ACL log_max_size(%d)\n",
-		  vport->vport, MLX5_CAP_ESW_EGRESS_ACL(dev, log_max_ft_size));
-
-	root_ns = mlx5_get_flow_vport_acl_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_EGRESS,
-			mlx5_eswitch_vport_num_to_index(esw, vport->vport));
-	if (!root_ns) {
-		esw_warn(dev, "Failed to get E-Switch egress flow namespace for vport (%d)\n", vport->vport);
-		return -EOPNOTSUPP;
-	}
-
-	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
-	if (!flow_group_in)
-		return -ENOMEM;
-
-	acl = mlx5_create_vport_flow_table(root_ns, 0, table_size, 0, vport->vport);
-	if (IS_ERR(acl)) {
-		err = PTR_ERR(acl);
-		esw_warn(dev, "Failed to create E-Switch vport[%d] egress flow Table, err(%d)\n",
-			 vport->vport, err);
-		goto out;
-	}
-
-	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
-	match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
-	MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag);
-	MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.first_vid);
-	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
-	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
-
-	vlan_grp = mlx5_create_flow_group(acl, flow_group_in);
-	if (IS_ERR(vlan_grp)) {
-		err = PTR_ERR(vlan_grp);
-		esw_warn(dev, "Failed to create E-Switch vport[%d] egress allowed vlans flow group, err(%d)\n",
-			 vport->vport, err);
-		goto out;
-	}
-
-	memset(flow_group_in, 0, inlen);
-	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
-	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
-	drop_grp = mlx5_create_flow_group(acl, flow_group_in);
-	if (IS_ERR(drop_grp)) {
-		err = PTR_ERR(drop_grp);
-		esw_warn(dev, "Failed to create E-Switch vport[%d] egress drop flow group, err(%d)\n",
-			 vport->vport, err);
-		goto out;
-	}
-
-	vport->egress.acl = acl;
-	vport->egress.drop_grp = drop_grp;
-	vport->egress.allowed_vlans_grp = vlan_grp;
-out:
-	kvfree(flow_group_in);
-	if (err && !IS_ERR_OR_NULL(vlan_grp))
-		mlx5_destroy_flow_group(vlan_grp);
-	if (err && !IS_ERR_OR_NULL(acl))
-		mlx5_destroy_flow_table(acl);
-	return err;
-}
-
-void esw_vport_cleanup_egress_rules(struct mlx5_eswitch *esw,
-				    struct mlx5_vport *vport)
-{
-	if (!IS_ERR_OR_NULL(vport->egress.allowed_vlan)) {
-		mlx5_del_flow_rules(vport->egress.allowed_vlan);
-		vport->egress.allowed_vlan = NULL;
-	}
-
-	if (!IS_ERR_OR_NULL(vport->egress.legacy.drop_rule)) {
-		mlx5_del_flow_rules(vport->egress.legacy.drop_rule);
-		vport->egress.legacy.drop_rule = NULL;
-	}
-}
-
-void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw,
-				  struct mlx5_vport *vport)
-{
-	if (IS_ERR_OR_NULL(vport->egress.acl))
-		return;
-
-	esw_debug(esw->dev, "Destroy vport[%d] E-Switch egress ACL\n", vport->vport);
-
-	esw_vport_cleanup_egress_rules(esw, vport);
-	mlx5_destroy_flow_group(vport->egress.allowed_vlans_grp);
-	mlx5_destroy_flow_group(vport->egress.drop_grp);
-	mlx5_destroy_flow_table(vport->egress.acl);
-	vport->egress.allowed_vlans_grp = NULL;
-	vport->egress.drop_grp = NULL;
-	vport->egress.acl = NULL;
-}
-
 static int
 esw_vport_create_legacy_ingress_acl_groups(struct mlx5_eswitch *esw,
 					   struct mlx5_vport *vport)
@@ -1346,102 +1232,6 @@ out:
 	return err;
 }
 
-int mlx5_esw_create_vport_egress_acl_vlan(struct mlx5_eswitch *esw,
-					  struct mlx5_vport *vport,
-					  u16 vlan_id, u32 flow_action)
-{
-	struct mlx5_flow_act flow_act = {};
-	struct mlx5_flow_spec *spec;
-	int err = 0;
-
-	if (vport->egress.allowed_vlan)
-		return -EEXIST;
-
-	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
-	if (!spec)
-		return -ENOMEM;
-
-	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag);
-	MLX5_SET_TO_ONES(fte_match_param, spec->match_value, outer_headers.cvlan_tag);
-	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.first_vid);
-	MLX5_SET(fte_match_param, spec->match_value, outer_headers.first_vid, vlan_id);
-
-	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
-	flow_act.action = flow_action;
-	vport->egress.allowed_vlan =
-		mlx5_add_flow_rules(vport->egress.acl, spec,
-				    &flow_act, NULL, 0);
-	if (IS_ERR(vport->egress.allowed_vlan)) {
-		err = PTR_ERR(vport->egress.allowed_vlan);
-		esw_warn(esw->dev,
-			 "vport[%d] configure egress vlan rule failed, err(%d)\n",
-			 vport->vport, err);
-		vport->egress.allowed_vlan = NULL;
-	}
-
-	kvfree(spec);
-	return err;
-}
-
-static int esw_vport_egress_config(struct mlx5_eswitch *esw,
-				   struct mlx5_vport *vport)
-{
-	struct mlx5_fc *counter = vport->egress.legacy.drop_counter;
-	struct mlx5_flow_destination drop_ctr_dst = {0};
-	struct mlx5_flow_destination *dst = NULL;
-	struct mlx5_flow_act flow_act = {0};
-	int dest_num = 0;
-	int err = 0;
-
-	esw_vport_cleanup_egress_rules(esw, vport);
-
-	if (!vport->info.vlan && !vport->info.qos) {
-		esw_vport_disable_egress_acl(esw, vport);
-		return 0;
-	}
-
-	err = esw_vport_enable_egress_acl(esw, vport);
-	if (err) {
-		mlx5_core_warn(esw->dev,
-			       "failed to enable egress acl (%d) on vport[%d]\n",
-			       err, vport->vport);
-		return err;
-	}
-
-	esw_debug(esw->dev,
-		  "vport[%d] configure egress rules, vlan(%d) qos(%d)\n",
-		  vport->vport, vport->info.vlan, vport->info.qos);
-
-	/* Allowed vlan rule */
-	err = mlx5_esw_create_vport_egress_acl_vlan(esw, vport, vport->info.vlan,
-						    MLX5_FLOW_CONTEXT_ACTION_ALLOW);
-	if (err)
-		return err;
-
-	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
-
-	/* Attach egress drop flow counter */
-	if (counter) {
-		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
-		drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
-		drop_ctr_dst.counter_id = mlx5_fc_id(counter);
-		dst = &drop_ctr_dst;
-		dest_num++;
-	}
-	vport->egress.legacy.drop_rule =
-		mlx5_add_flow_rules(vport->egress.acl, NULL,
-				    &flow_act, dst, dest_num);
-	if (IS_ERR(vport->egress.legacy.drop_rule)) {
-		err = PTR_ERR(vport->egress.legacy.drop_rule);
-		esw_warn(esw->dev,
-			 "vport[%d] configure egress drop rule failed, err(%d)\n",
-			 vport->vport, err);
-		vport->egress.legacy.drop_rule = NULL;
-	}
-
-	return err;
-}
-
 static bool element_type_supported(struct mlx5_eswitch *esw, int type)
 {
 	const struct mlx5_core_dev *dev = esw->dev;
@@ -1667,17 +1457,7 @@ static int esw_vport_create_legacy_acl_tables(struct mlx5_eswitch *esw,
 	if (ret)
 		goto ingress_err;
 
-	if (MLX5_CAP_ESW_EGRESS_ACL(esw->dev, flow_counter)) {
-		vport->egress.legacy.drop_counter = mlx5_fc_create(esw->dev, false);
-		if (IS_ERR(vport->egress.legacy.drop_counter)) {
-			esw_warn(esw->dev,
-				 "vport[%d] configure egress drop rule counter failed\n",
-				 vport->vport);
-			vport->egress.legacy.drop_counter = NULL;
-		}
-	}
-
-	ret = esw_vport_egress_config(esw, vport);
+	ret = esw_acl_egress_lgcy_setup(esw, vport);
 	if (ret)
 		goto egress_err;
 
@@ -1685,9 +1465,6 @@ static int esw_vport_create_legacy_acl_tables(struct mlx5_eswitch *esw,
 
 egress_err:
 	esw_vport_disable_legacy_ingress_acl(esw, vport);
-	mlx5_fc_destroy(esw->dev, vport->egress.legacy.drop_counter);
-	vport->egress.legacy.drop_counter = NULL;
-
 ingress_err:
 	mlx5_fc_destroy(esw->dev, vport->ingress.legacy.drop_counter);
 	vport->ingress.legacy.drop_counter = NULL;
@@ -1710,9 +1487,7 @@ static void esw_vport_destroy_legacy_acl_tables(struct mlx5_eswitch *esw,
 	if (mlx5_esw_is_manager_vport(esw, vport->vport))
 		return;
 
-	esw_vport_disable_egress_acl(esw, vport);
-	mlx5_fc_destroy(esw->dev, vport->egress.legacy.drop_counter);
-	vport->egress.legacy.drop_counter = NULL;
+	esw_acl_egress_lgcy_cleanup(esw, vport);
 
 	esw_vport_disable_legacy_ingress_acl(esw, vport);
 	mlx5_fc_destroy(esw->dev, vport->ingress.legacy.drop_counter);
@@ -2433,7 +2208,7 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
 		err = esw_vport_ingress_config(esw, evport);
 		if (err)
 			return err;
-		err = esw_vport_egress_config(esw, evport);
+		err = esw_acl_egress_lgcy_setup(esw, evport);
 	}
 
 	return err;
@@ -2734,7 +2509,7 @@ static int mlx5_eswitch_query_vport_drop_stats(struct mlx5_core_dev *dev,
 	if (!vport->enabled)
 		goto unlock;
 
-	if (vport->egress.legacy.drop_counter)
+	if (!IS_ERR_OR_NULL(vport->egress.legacy.drop_counter))
 		mlx5_fc_query(dev, vport->egress.legacy.drop_counter,
 			      &stats->rx_dropped, &bytes);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index ccbbea3e0505..490410401631 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -99,10 +99,10 @@ struct vport_ingress {
 
 struct vport_egress {
 	struct mlx5_flow_table *acl;
-	struct mlx5_flow_group *allowed_vlans_grp;
-	struct mlx5_flow_group *drop_grp;
 	struct mlx5_flow_handle  *allowed_vlan;
+	struct mlx5_flow_group *vlan_grp;
 	struct {
+		struct mlx5_flow_group *drop_grp;
 		struct mlx5_flow_handle *drop_rule;
 		struct mlx5_fc *drop_counter;
 	} legacy;
@@ -291,12 +291,7 @@ int esw_vport_create_ingress_acl_table(struct mlx5_eswitch *esw,
 				       struct mlx5_vport *vport,
 				       int table_size);
 void esw_vport_destroy_ingress_acl_table(struct mlx5_vport *vport);
-void esw_vport_cleanup_egress_rules(struct mlx5_eswitch *esw,
-				    struct mlx5_vport *vport);
-int esw_vport_enable_egress_acl(struct mlx5_eswitch *esw,
-				struct mlx5_vport *vport);
-void esw_vport_disable_egress_acl(struct mlx5_eswitch *esw,
-				  struct mlx5_vport *vport);
+
 int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num,
 			       u32 rate_mbps);
 
@@ -458,10 +453,6 @@ int mlx5_eswitch_del_vlan_action(struct mlx5_eswitch *esw,
 int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
 				  u16 vport, u16 vlan, u8 qos, u8 set_flags);
 
-int mlx5_esw_create_vport_egress_acl_vlan(struct mlx5_eswitch *esw,
-					  struct mlx5_vport *vport,
-					  u16 vlan_id, u32 flow_action);
-
 static inline bool mlx5_esw_qos_enabled(struct mlx5_eswitch *esw)
 {
 	return esw->qos.enabled;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 554fc64d8ef6..0b00b30187ce 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -37,6 +37,7 @@
 #include <linux/mlx5/fs.h>
 #include "mlx5_core.h"
 #include "eswitch.h"
+#include "esw/acl/ofld.h"
 #include "esw/chains.h"
 #include "rdma.h"
 #include "en.h"
@@ -2093,37 +2094,6 @@ group_err:
 	return err;
 }
 
-static int esw_vport_egress_config(struct mlx5_eswitch *esw,
-				   struct mlx5_vport *vport)
-{
-	int err;
-
-	if (!MLX5_CAP_GEN(esw->dev, prio_tag_required))
-		return 0;
-
-	esw_vport_cleanup_egress_rules(esw, vport);
-
-	err = esw_vport_enable_egress_acl(esw, vport);
-	if (err)
-		return err;
-
-	/* For prio tag mode, there is only 1 FTEs:
-	 * 1) prio tag packets - pop the prio tag VLAN, allow
-	 * Unmatched traffic is allowed by default
-	 */
-	esw_debug(esw->dev,
-		  "vport[%d] configure prio tag egress rules\n", vport->vport);
-
-	/* prio tag vlan rule - pop it so VF receives untagged packets */
-	err = mlx5_esw_create_vport_egress_acl_vlan(esw, vport, 0,
-						    MLX5_FLOW_CONTEXT_ACTION_VLAN_POP |
-						    MLX5_FLOW_CONTEXT_ACTION_ALLOW);
-	if (err)
-		esw_vport_disable_egress_acl(esw, vport);
-
-	return err;
-}
-
 static bool
 esw_check_vport_match_metadata_supported(const struct mlx5_eswitch *esw)
 {
@@ -2167,7 +2137,7 @@ esw_vport_create_offloads_acl_tables(struct mlx5_eswitch *esw,
 		return err;
 
 	if (mlx5_eswitch_is_vf_vport(esw, vport->vport)) {
-		err = esw_vport_egress_config(esw, vport);
+		err = esw_acl_egress_ofld_setup(esw, vport);
 		if (err) {
 			esw_vport_cleanup_ingress_rules(esw, vport);
 			esw_vport_del_ingress_acl_modify_metadata(esw, vport);
@@ -2182,7 +2152,7 @@ void
 esw_vport_destroy_offloads_acl_tables(struct mlx5_eswitch *esw,
 				      struct mlx5_vport *vport)
 {
-	esw_vport_disable_egress_acl(esw, vport);
+	esw_acl_egress_ofld_cleanup(vport);
 	esw_vport_cleanup_ingress_rules(esw, vport);
 	esw_vport_del_ingress_acl_modify_metadata(esw, vport);
 	esw_vport_destroy_ingress_acl_group(vport);
-- 
cgit v1.2.3-59-g8ed1b


From 07bab9502641dff9c3c864162270d12c6dd0e834 Mon Sep 17 00:00:00 2001
From: Vu Pham <vuhuong@mellanox.com>
Date: Fri, 27 Mar 2020 23:12:22 -0700
Subject: net/mlx5: E-Switch, Refactor eswitch ingress acl codes

Restructure the eswitch ingress acl codes into eswitch directory
and different files:
. Acl ingress helper functions to acl_helper.c/h
. Acl ingress functions used in offloads mode to acl_ingress_ofld.c
. Acl ingress functions used in legacy mode to acl_ingress_lgy.c

This patch does not change any functionality.

Signed-off-by: Vu Pham <vuhuong@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   3 +-
 .../ethernet/mellanox/mlx5/core/esw/acl/helper.c   |  18 ++
 .../ethernet/mellanox/mlx5/core/esw/acl/helper.h   |   4 +
 .../mellanox/mlx5/core/esw/acl/ingress_lgcy.c      | 279 ++++++++++++++++++
 .../mellanox/mlx5/core/esw/acl/ingress_ofld.c      | 293 +++++++++++++++++++
 .../net/ethernet/mellanox/mlx5/core/esw/acl/lgcy.h |   4 +
 .../net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h |   4 +
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c  | 322 +--------------------
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |   6 -
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 269 +----------------
 10 files changed, 619 insertions(+), 583 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index ad046b2ea4f9..3934dc258041 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -47,7 +47,8 @@ mlx5_core-$(CONFIG_MLX5_TC_CT)	     += en/tc_ct.o
 mlx5_core-$(CONFIG_MLX5_ESWITCH)   += eswitch.o eswitch_offloads.o eswitch_offloads_termtbl.o \
 				      ecpf.o rdma.o
 mlx5_core-$(CONFIG_MLX5_ESWITCH)   += esw/acl/helper.o \
-				      esw/acl/egress_lgcy.o esw/acl/egress_ofld.o
+				      esw/acl/egress_lgcy.o esw/acl/egress_ofld.o \
+				      esw/acl/ingress_lgcy.o esw/acl/ingress_ofld.o
 
 mlx5_core-$(CONFIG_MLX5_MPFS)      += lib/mpfs.o
 mlx5_core-$(CONFIG_VXLAN)          += lib/vxlan.o
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c
index 8b7996721a7c..22f4c1c28006 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.c
@@ -140,3 +140,21 @@ void esw_acl_egress_table_destroy(struct mlx5_vport *vport)
 	mlx5_destroy_flow_table(vport->egress.acl);
 	vport->egress.acl = NULL;
 }
+
+void esw_acl_ingress_table_destroy(struct mlx5_vport *vport)
+{
+	if (!vport->ingress.acl)
+		return;
+
+	mlx5_destroy_flow_table(vport->ingress.acl);
+	vport->ingress.acl = NULL;
+}
+
+void esw_acl_ingress_allow_rule_destroy(struct mlx5_vport *vport)
+{
+	if (!vport->ingress.allow_rule)
+		return;
+
+	mlx5_del_flow_rules(vport->ingress.allow_rule);
+	vport->ingress.allow_rule = NULL;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.h
index 543372df6196..8dc4cab66a71 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/helper.h
@@ -19,4 +19,8 @@ void esw_acl_egress_vlan_destroy(struct mlx5_vport *vport);
 int esw_acl_egress_vlan_grp_create(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
 void esw_acl_egress_vlan_grp_destroy(struct mlx5_vport *vport);
 
+/* Ingress acl helper functions */
+void esw_acl_ingress_table_destroy(struct mlx5_vport *vport);
+void esw_acl_ingress_allow_rule_destroy(struct mlx5_vport *vport);
+
 #endif /* __MLX5_ESWITCH_ACL_HELPER_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c
new file mode 100644
index 000000000000..9bda4fe2eafa
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_lgcy.c
@@ -0,0 +1,279 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */
+
+#include "mlx5_core.h"
+#include "eswitch.h"
+#include "helper.h"
+#include "lgcy.h"
+
+static void esw_acl_ingress_lgcy_rules_destroy(struct mlx5_vport *vport)
+{
+	if (vport->ingress.legacy.drop_rule) {
+		mlx5_del_flow_rules(vport->ingress.legacy.drop_rule);
+		vport->ingress.legacy.drop_rule = NULL;
+	}
+	esw_acl_ingress_allow_rule_destroy(vport);
+}
+
+static int esw_acl_ingress_lgcy_groups_create(struct mlx5_eswitch *esw,
+					      struct mlx5_vport *vport)
+{
+	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+	struct mlx5_core_dev *dev = esw->dev;
+	struct mlx5_flow_group *g;
+	void *match_criteria;
+	u32 *flow_group_in;
+	int err;
+
+	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+	if (!flow_group_in)
+		return -ENOMEM;
+
+	match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
+
+	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+		 MLX5_MATCH_OUTER_HEADERS);
+	MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag);
+	MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_47_16);
+	MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_15_0);
+	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
+	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
+
+	g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in);
+	if (IS_ERR(g)) {
+		err = PTR_ERR(g);
+		esw_warn(dev, "vport[%d] ingress create untagged spoofchk flow group, err(%d)\n",
+			 vport->vport, err);
+		goto spoof_err;
+	}
+	vport->ingress.legacy.allow_untagged_spoofchk_grp = g;
+
+	memset(flow_group_in, 0, inlen);
+	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+		 MLX5_MATCH_OUTER_HEADERS);
+	MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag);
+	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
+	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
+
+	g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in);
+	if (IS_ERR(g)) {
+		err = PTR_ERR(g);
+		esw_warn(dev, "vport[%d] ingress create untagged flow group, err(%d)\n",
+			 vport->vport, err);
+		goto untagged_err;
+	}
+	vport->ingress.legacy.allow_untagged_only_grp = g;
+
+	memset(flow_group_in, 0, inlen);
+	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable,
+		 MLX5_MATCH_OUTER_HEADERS);
+	MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_47_16);
+	MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_15_0);
+	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 2);
+	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 2);
+
+	g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in);
+	if (IS_ERR(g)) {
+		err = PTR_ERR(g);
+		esw_warn(dev, "vport[%d] ingress create spoofchk flow group, err(%d)\n",
+			 vport->vport, err);
+		goto allow_spoof_err;
+	}
+	vport->ingress.legacy.allow_spoofchk_only_grp = g;
+
+	memset(flow_group_in, 0, inlen);
+	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 3);
+	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 3);
+
+	g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in);
+	if (IS_ERR(g)) {
+		err = PTR_ERR(g);
+		esw_warn(dev, "vport[%d] ingress create drop flow group, err(%d)\n",
+			 vport->vport, err);
+		goto drop_err;
+	}
+	vport->ingress.legacy.drop_grp = g;
+	kvfree(flow_group_in);
+	return 0;
+
+drop_err:
+	if (!IS_ERR_OR_NULL(vport->ingress.legacy.allow_spoofchk_only_grp)) {
+		mlx5_destroy_flow_group(vport->ingress.legacy.allow_spoofchk_only_grp);
+		vport->ingress.legacy.allow_spoofchk_only_grp = NULL;
+	}
+allow_spoof_err:
+	if (!IS_ERR_OR_NULL(vport->ingress.legacy.allow_untagged_only_grp)) {
+		mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_only_grp);
+		vport->ingress.legacy.allow_untagged_only_grp = NULL;
+	}
+untagged_err:
+	if (!IS_ERR_OR_NULL(vport->ingress.legacy.allow_untagged_spoofchk_grp)) {
+		mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_spoofchk_grp);
+		vport->ingress.legacy.allow_untagged_spoofchk_grp = NULL;
+	}
+spoof_err:
+	kvfree(flow_group_in);
+	return err;
+}
+
+static void esw_acl_ingress_lgcy_groups_destroy(struct mlx5_vport *vport)
+{
+	if (vport->ingress.legacy.allow_spoofchk_only_grp) {
+		mlx5_destroy_flow_group(vport->ingress.legacy.allow_spoofchk_only_grp);
+		vport->ingress.legacy.allow_spoofchk_only_grp = NULL;
+	}
+	if (vport->ingress.legacy.allow_untagged_only_grp) {
+		mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_only_grp);
+		vport->ingress.legacy.allow_untagged_only_grp = NULL;
+	}
+	if (vport->ingress.legacy.allow_untagged_spoofchk_grp) {
+		mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_spoofchk_grp);
+		vport->ingress.legacy.allow_untagged_spoofchk_grp = NULL;
+	}
+	if (vport->ingress.legacy.drop_grp) {
+		mlx5_destroy_flow_group(vport->ingress.legacy.drop_grp);
+		vport->ingress.legacy.drop_grp = NULL;
+	}
+}
+
+int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw,
+			       struct mlx5_vport *vport)
+{
+	struct mlx5_flow_destination drop_ctr_dst = {};
+	struct mlx5_flow_destination *dst = NULL;
+	struct mlx5_flow_act flow_act = {};
+	struct mlx5_flow_spec *spec = NULL;
+	struct mlx5_fc *counter = NULL;
+	/* The ingress acl table contains 4 groups
+	 * (2 active rules at the same time -
+	 *      1 allow rule from one of the first 3 groups.
+	 *      1 drop rule from the last group):
+	 * 1)Allow untagged traffic with smac=original mac.
+	 * 2)Allow untagged traffic.
+	 * 3)Allow traffic with smac=original mac.
+	 * 4)Drop all other traffic.
+	 */
+	int table_size = 4;
+	int dest_num = 0;
+	int err = 0;
+	u8 *smac_v;
+
+	esw_acl_ingress_lgcy_rules_destroy(vport);
+
+	if (MLX5_CAP_ESW_INGRESS_ACL(esw->dev, flow_counter)) {
+		counter = mlx5_fc_create(esw->dev, false);
+		if (IS_ERR(counter))
+			esw_warn(esw->dev,
+				 "vport[%d] configure ingress drop rule counter failed\n",
+				 vport->vport);
+		vport->ingress.legacy.drop_counter = counter;
+	}
+
+	if (!vport->info.vlan && !vport->info.qos && !vport->info.spoofchk) {
+		esw_acl_ingress_lgcy_cleanup(esw, vport);
+		return 0;
+	}
+
+	if (!vport->ingress.acl) {
+		vport->ingress.acl = esw_acl_table_create(esw, vport->vport,
+							  MLX5_FLOW_NAMESPACE_ESW_INGRESS,
+							  table_size);
+		if (IS_ERR_OR_NULL(vport->ingress.acl)) {
+			err = PTR_ERR(vport->ingress.acl);
+			vport->ingress.acl = NULL;
+			return err;
+		}
+
+		err = esw_acl_ingress_lgcy_groups_create(esw, vport);
+		if (err)
+			goto out;
+	}
+
+	esw_debug(esw->dev,
+		  "vport[%d] configure ingress rules, vlan(%d) qos(%d)\n",
+		  vport->vport, vport->info.vlan, vport->info.qos);
+
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+	if (!spec) {
+		err = -ENOMEM;
+		goto out;
+	}
+
+	if (vport->info.vlan || vport->info.qos)
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+				 outer_headers.cvlan_tag);
+
+	if (vport->info.spoofchk) {
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+				 outer_headers.smac_47_16);
+		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria,
+				 outer_headers.smac_15_0);
+		smac_v = MLX5_ADDR_OF(fte_match_param,
+				      spec->match_value,
+				      outer_headers.smac_47_16);
+		ether_addr_copy(smac_v, vport->info.mac);
+	}
+
+	/* Create ingress allow rule */
+	memset(spec, 0, sizeof(*spec));
+	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+	vport->ingress.allow_rule = mlx5_add_flow_rules(vport->ingress.acl, spec,
+							&flow_act, NULL, 0);
+	if (IS_ERR(vport->ingress.allow_rule)) {
+		err = PTR_ERR(vport->ingress.allow_rule);
+		esw_warn(esw->dev,
+			 "vport[%d] configure ingress allow rule, err(%d)\n",
+			 vport->vport, err);
+		vport->ingress.allow_rule = NULL;
+		goto out;
+	}
+
+	memset(&flow_act, 0, sizeof(flow_act));
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
+	/* Attach drop flow counter */
+	if (counter) {
+		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
+		drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
+		drop_ctr_dst.counter_id = mlx5_fc_id(counter);
+		dst = &drop_ctr_dst;
+		dest_num++;
+	}
+	vport->ingress.legacy.drop_rule =
+		mlx5_add_flow_rules(vport->ingress.acl, NULL,
+				    &flow_act, dst, dest_num);
+	if (IS_ERR(vport->ingress.legacy.drop_rule)) {
+		err = PTR_ERR(vport->ingress.legacy.drop_rule);
+		esw_warn(esw->dev,
+			 "vport[%d] configure ingress drop rule, err(%d)\n",
+			 vport->vport, err);
+		vport->ingress.legacy.drop_rule = NULL;
+		goto out;
+	}
+	kvfree(spec);
+	return 0;
+
+out:
+	esw_acl_ingress_lgcy_cleanup(esw, vport);
+	kvfree(spec);
+	return err;
+}
+
+void esw_acl_ingress_lgcy_cleanup(struct mlx5_eswitch *esw,
+				  struct mlx5_vport *vport)
+{
+	if (IS_ERR_OR_NULL(vport->ingress.acl))
+		goto clean_drop_counter;
+
+	esw_debug(esw->dev, "Destroy vport[%d] E-Switch ingress ACL\n", vport->vport);
+
+	esw_acl_ingress_lgcy_rules_destroy(vport);
+	esw_acl_ingress_lgcy_groups_destroy(vport);
+	esw_acl_ingress_table_destroy(vport);
+
+clean_drop_counter:
+	if (!IS_ERR_OR_NULL(vport->ingress.legacy.drop_counter)) {
+		mlx5_fc_destroy(esw->dev, vport->ingress.legacy.drop_counter);
+		vport->ingress.legacy.drop_counter = NULL;
+	}
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c
new file mode 100644
index 000000000000..1bae549f3fa7
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c
@@ -0,0 +1,293 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */
+
+#include "mlx5_core.h"
+#include "eswitch.h"
+#include "helper.h"
+#include "ofld.h"
+
+static bool
+esw_acl_ingress_prio_tag_enabled(const struct mlx5_eswitch *esw,
+				 const struct mlx5_vport *vport)
+{
+	return (MLX5_CAP_GEN(esw->dev, prio_tag_required) &&
+		mlx5_eswitch_is_vf_vport(esw, vport->vport));
+}
+
+static int esw_acl_ingress_prio_tag_create(struct mlx5_eswitch *esw,
+					   struct mlx5_vport *vport)
+{
+	struct mlx5_flow_act flow_act = {};
+	struct mlx5_flow_spec *spec;
+	int err = 0;
+
+	/* For prio tag mode, there is only 1 FTEs:
+	 * 1) Untagged packets - push prio tag VLAN and modify metadata if
+	 * required, allow
+	 * Unmatched traffic is allowed by default
+	 */
+	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
+	if (!spec)
+		return -ENOMEM;
+
+	/* Untagged packets - push prio tag VLAN, allow */
+	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag);
+	MLX5_SET(fte_match_param, spec->match_value, outer_headers.cvlan_tag, 0);
+	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH |
+			  MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+	flow_act.vlan[0].ethtype = ETH_P_8021Q;
+	flow_act.vlan[0].vid = 0;
+	flow_act.vlan[0].prio = 0;
+
+	if (vport->ingress.offloads.modify_metadata_rule) {
+		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
+		flow_act.modify_hdr = vport->ingress.offloads.modify_metadata;
+	}
+
+	vport->ingress.allow_rule = mlx5_add_flow_rules(vport->ingress.acl, spec,
+							&flow_act, NULL, 0);
+	if (IS_ERR(vport->ingress.allow_rule)) {
+		err = PTR_ERR(vport->ingress.allow_rule);
+		esw_warn(esw->dev,
+			 "vport[%d] configure ingress untagged allow rule, err(%d)\n",
+			 vport->vport, err);
+		vport->ingress.allow_rule = NULL;
+	}
+
+	kvfree(spec);
+	return err;
+}
+
+static int esw_acl_ingress_mod_metadata_create(struct mlx5_eswitch *esw,
+					       struct mlx5_vport *vport)
+{
+	u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
+	struct mlx5_flow_act flow_act = {};
+	int err = 0;
+	u32 key;
+
+	key = mlx5_eswitch_get_vport_metadata_for_match(esw, vport->vport);
+	key >>= ESW_SOURCE_PORT_METADATA_OFFSET;
+
+	MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET);
+	MLX5_SET(set_action_in, action, field,
+		 MLX5_ACTION_IN_FIELD_METADATA_REG_C_0);
+	MLX5_SET(set_action_in, action, data, key);
+	MLX5_SET(set_action_in, action, offset,
+		 ESW_SOURCE_PORT_METADATA_OFFSET);
+	MLX5_SET(set_action_in, action, length,
+		 ESW_SOURCE_PORT_METADATA_BITS);
+
+	vport->ingress.offloads.modify_metadata =
+		mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS,
+					 1, action);
+	if (IS_ERR(vport->ingress.offloads.modify_metadata)) {
+		err = PTR_ERR(vport->ingress.offloads.modify_metadata);
+		esw_warn(esw->dev,
+			 "failed to alloc modify header for vport %d ingress acl (%d)\n",
+			 vport->vport, err);
+		return err;
+	}
+
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+	flow_act.modify_hdr = vport->ingress.offloads.modify_metadata;
+	vport->ingress.offloads.modify_metadata_rule =
+				mlx5_add_flow_rules(vport->ingress.acl,
+						    NULL, &flow_act, NULL, 0);
+	if (IS_ERR(vport->ingress.offloads.modify_metadata_rule)) {
+		err = PTR_ERR(vport->ingress.offloads.modify_metadata_rule);
+		esw_warn(esw->dev,
+			 "failed to add setting metadata rule for vport %d ingress acl, err(%d)\n",
+			 vport->vport, err);
+		mlx5_modify_header_dealloc(esw->dev, vport->ingress.offloads.modify_metadata);
+		vport->ingress.offloads.modify_metadata_rule = NULL;
+	}
+	return err;
+}
+
+static void esw_acl_ingress_mod_metadata_destroy(struct mlx5_eswitch *esw,
+						 struct mlx5_vport *vport)
+{
+	if (!vport->ingress.offloads.modify_metadata_rule)
+		return;
+
+	mlx5_del_flow_rules(vport->ingress.offloads.modify_metadata_rule);
+	mlx5_modify_header_dealloc(esw->dev, vport->ingress.offloads.modify_metadata);
+	vport->ingress.offloads.modify_metadata_rule = NULL;
+}
+
+static int esw_acl_ingress_ofld_rules_create(struct mlx5_eswitch *esw,
+					     struct mlx5_vport *vport)
+{
+	int err;
+
+	if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+		err = esw_acl_ingress_mod_metadata_create(esw, vport);
+		if (err) {
+			esw_warn(esw->dev,
+				 "vport(%d) create ingress modify metadata, err(%d)\n",
+				 vport->vport, err);
+			return err;
+		}
+	}
+
+	if (esw_acl_ingress_prio_tag_enabled(esw, vport)) {
+		err = esw_acl_ingress_prio_tag_create(esw, vport);
+		if (err) {
+			esw_warn(esw->dev,
+				 "vport(%d) create ingress prio tag rule, err(%d)\n",
+				 vport->vport, err);
+			goto prio_tag_err;
+		}
+	}
+
+	return 0;
+
+prio_tag_err:
+	esw_acl_ingress_mod_metadata_destroy(esw, vport);
+	return err;
+}
+
+static void esw_acl_ingress_ofld_rules_destroy(struct mlx5_eswitch *esw,
+					       struct mlx5_vport *vport)
+{
+	esw_acl_ingress_allow_rule_destroy(vport);
+	esw_acl_ingress_mod_metadata_destroy(esw, vport);
+}
+
+static int esw_acl_ingress_ofld_groups_create(struct mlx5_eswitch *esw,
+					      struct mlx5_vport *vport)
+{
+	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+	struct mlx5_flow_group *g;
+	void *match_criteria;
+	u32 *flow_group_in;
+	u32 flow_index = 0;
+	int ret = 0;
+
+	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+	if (!flow_group_in)
+		return -ENOMEM;
+
+	if (esw_acl_ingress_prio_tag_enabled(esw, vport)) {
+		/* This group is to hold FTE to match untagged packets when prio_tag
+		 * is enabled.
+		 */
+		match_criteria = MLX5_ADDR_OF(create_flow_group_in,
+					      flow_group_in, match_criteria);
+		MLX5_SET(create_flow_group_in, flow_group_in,
+			 match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
+		MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag);
+		MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_index);
+		MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_index);
+
+		g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in);
+		if (IS_ERR(g)) {
+			ret = PTR_ERR(g);
+			esw_warn(esw->dev, "vport[%d] ingress create untagged flow group, err(%d)\n",
+				 vport->vport, ret);
+			goto prio_tag_err;
+		}
+		vport->ingress.offloads.metadata_prio_tag_grp = g;
+		flow_index++;
+	}
+
+	if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
+		/* This group holds an FTE with no match to add metadata for
+		 * tagged packets if prio-tag is enabled, or for all untagged
+		 * traffic in case prio-tag is disabled.
+		 */
+		memset(flow_group_in, 0, inlen);
+		MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_index);
+		MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_index);
+
+		g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in);
+		if (IS_ERR(g)) {
+			ret = PTR_ERR(g);
+			esw_warn(esw->dev, "vport[%d] ingress create drop flow group, err(%d)\n",
+				 vport->vport, ret);
+			goto metadata_err;
+		}
+		vport->ingress.offloads.metadata_allmatch_grp = g;
+	}
+
+	kvfree(flow_group_in);
+	return 0;
+
+metadata_err:
+	if (!IS_ERR_OR_NULL(vport->ingress.offloads.metadata_prio_tag_grp)) {
+		mlx5_destroy_flow_group(vport->ingress.offloads.metadata_prio_tag_grp);
+		vport->ingress.offloads.metadata_prio_tag_grp = NULL;
+	}
+prio_tag_err:
+	kvfree(flow_group_in);
+	return ret;
+}
+
+static void esw_acl_ingress_ofld_groups_destroy(struct mlx5_vport *vport)
+{
+	if (vport->ingress.offloads.metadata_allmatch_grp) {
+		mlx5_destroy_flow_group(vport->ingress.offloads.metadata_allmatch_grp);
+		vport->ingress.offloads.metadata_allmatch_grp = NULL;
+	}
+
+	if (vport->ingress.offloads.metadata_prio_tag_grp) {
+		mlx5_destroy_flow_group(vport->ingress.offloads.metadata_prio_tag_grp);
+		vport->ingress.offloads.metadata_prio_tag_grp = NULL;
+	}
+}
+
+int esw_acl_ingress_ofld_setup(struct mlx5_eswitch *esw,
+			       struct mlx5_vport *vport)
+{
+	int num_ftes = 0;
+	int err;
+
+	if (!mlx5_eswitch_vport_match_metadata_enabled(esw) &&
+	    !esw_acl_ingress_prio_tag_enabled(esw, vport))
+		return 0;
+
+	esw_acl_ingress_allow_rule_destroy(vport);
+
+	if (mlx5_eswitch_vport_match_metadata_enabled(esw))
+		num_ftes++;
+	if (esw_acl_ingress_prio_tag_enabled(esw, vport))
+		num_ftes++;
+
+	vport->ingress.acl = esw_acl_table_create(esw, vport->vport,
+						  MLX5_FLOW_NAMESPACE_ESW_INGRESS,
+						  num_ftes);
+	if (IS_ERR_OR_NULL(vport->ingress.acl)) {
+		err = PTR_ERR(vport->ingress.acl);
+		vport->ingress.acl = NULL;
+		return err;
+	}
+
+	err = esw_acl_ingress_ofld_groups_create(esw, vport);
+	if (err)
+		goto group_err;
+
+	esw_debug(esw->dev,
+		  "vport[%d] configure ingress rules\n", vport->vport);
+
+	err = esw_acl_ingress_ofld_rules_create(esw, vport);
+	if (err)
+		goto rules_err;
+
+	return 0;
+
+rules_err:
+	esw_acl_ingress_ofld_groups_destroy(vport);
+group_err:
+	esw_acl_ingress_table_destroy(vport);
+	return err;
+}
+
+void esw_acl_ingress_ofld_cleanup(struct mlx5_eswitch *esw,
+				  struct mlx5_vport *vport)
+{
+	esw_acl_ingress_ofld_rules_destroy(esw, vport);
+	esw_acl_ingress_ofld_groups_destroy(vport);
+	esw_acl_ingress_table_destroy(vport);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/lgcy.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/lgcy.h
index 6b05a3af4462..44c152da3d83 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/lgcy.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/lgcy.h
@@ -10,4 +10,8 @@
 int esw_acl_egress_lgcy_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
 void esw_acl_egress_lgcy_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
 
+/* Eswitch acl ingress external APIs */
+int esw_acl_ingress_lgcy_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
+void esw_acl_ingress_lgcy_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
+
 #endif /* __MLX5_ESWITCH_ACL_LGCY_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h
index fc912b254226..9e5e0fac29ef 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h
@@ -10,4 +10,8 @@
 int esw_acl_egress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
 void esw_acl_egress_ofld_cleanup(struct mlx5_vport *vport);
 
+/* Eswitch acl ingress external APIs */
+int esw_acl_ingress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
+void esw_acl_ingress_ofld_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
+
 #endif /* __MLX5_ESWITCH_ACL_OFLD_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index ae74486b9c9e..20ab13ff2303 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -937,301 +937,6 @@ static void esw_vport_change_handler(struct work_struct *work)
 	mutex_unlock(&esw->state_lock);
 }
 
-static int
-esw_vport_create_legacy_ingress_acl_groups(struct mlx5_eswitch *esw,
-					   struct mlx5_vport *vport)
-{
-	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
-	struct mlx5_core_dev *dev = esw->dev;
-	struct mlx5_flow_group *g;
-	void *match_criteria;
-	u32 *flow_group_in;
-	int err;
-
-	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
-	if (!flow_group_in)
-		return -ENOMEM;
-
-	match_criteria = MLX5_ADDR_OF(create_flow_group_in, flow_group_in, match_criteria);
-
-	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
-	MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag);
-	MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_47_16);
-	MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_15_0);
-	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 0);
-	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 0);
-
-	g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in);
-	if (IS_ERR(g)) {
-		err = PTR_ERR(g);
-		esw_warn(dev, "vport[%d] ingress create untagged spoofchk flow group, err(%d)\n",
-			 vport->vport, err);
-		goto spoof_err;
-	}
-	vport->ingress.legacy.allow_untagged_spoofchk_grp = g;
-
-	memset(flow_group_in, 0, inlen);
-	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
-	MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag);
-	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 1);
-	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 1);
-
-	g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in);
-	if (IS_ERR(g)) {
-		err = PTR_ERR(g);
-		esw_warn(dev, "vport[%d] ingress create untagged flow group, err(%d)\n",
-			 vport->vport, err);
-		goto untagged_err;
-	}
-	vport->ingress.legacy.allow_untagged_only_grp = g;
-
-	memset(flow_group_in, 0, inlen);
-	MLX5_SET(create_flow_group_in, flow_group_in, match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
-	MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_47_16);
-	MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.smac_15_0);
-	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 2);
-	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 2);
-
-	g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in);
-	if (IS_ERR(g)) {
-		err = PTR_ERR(g);
-		esw_warn(dev, "vport[%d] ingress create spoofchk flow group, err(%d)\n",
-			 vport->vport, err);
-		goto allow_spoof_err;
-	}
-	vport->ingress.legacy.allow_spoofchk_only_grp = g;
-
-	memset(flow_group_in, 0, inlen);
-	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, 3);
-	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, 3);
-
-	g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in);
-	if (IS_ERR(g)) {
-		err = PTR_ERR(g);
-		esw_warn(dev, "vport[%d] ingress create drop flow group, err(%d)\n",
-			 vport->vport, err);
-		goto drop_err;
-	}
-	vport->ingress.legacy.drop_grp = g;
-	kvfree(flow_group_in);
-	return 0;
-
-drop_err:
-	if (!IS_ERR_OR_NULL(vport->ingress.legacy.allow_spoofchk_only_grp)) {
-		mlx5_destroy_flow_group(vport->ingress.legacy.allow_spoofchk_only_grp);
-		vport->ingress.legacy.allow_spoofchk_only_grp = NULL;
-	}
-allow_spoof_err:
-	if (!IS_ERR_OR_NULL(vport->ingress.legacy.allow_untagged_only_grp)) {
-		mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_only_grp);
-		vport->ingress.legacy.allow_untagged_only_grp = NULL;
-	}
-untagged_err:
-	if (!IS_ERR_OR_NULL(vport->ingress.legacy.allow_untagged_spoofchk_grp)) {
-		mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_spoofchk_grp);
-		vport->ingress.legacy.allow_untagged_spoofchk_grp = NULL;
-	}
-spoof_err:
-	kvfree(flow_group_in);
-	return err;
-}
-
-int esw_vport_create_ingress_acl_table(struct mlx5_eswitch *esw,
-				       struct mlx5_vport *vport, int table_size)
-{
-	struct mlx5_core_dev *dev = esw->dev;
-	struct mlx5_flow_namespace *root_ns;
-	struct mlx5_flow_table *acl;
-	int vport_index;
-	int err;
-
-	if (!MLX5_CAP_ESW_INGRESS_ACL(dev, ft_support))
-		return -EOPNOTSUPP;
-
-	esw_debug(dev, "Create vport[%d] ingress ACL log_max_size(%d)\n",
-		  vport->vport, MLX5_CAP_ESW_INGRESS_ACL(dev, log_max_ft_size));
-
-	vport_index = mlx5_eswitch_vport_num_to_index(esw, vport->vport);
-	root_ns = mlx5_get_flow_vport_acl_namespace(dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS,
-						    vport_index);
-	if (!root_ns) {
-		esw_warn(dev, "Failed to get E-Switch ingress flow namespace for vport (%d)\n",
-			 vport->vport);
-		return -EOPNOTSUPP;
-	}
-
-	acl = mlx5_create_vport_flow_table(root_ns, 0, table_size, 0, vport->vport);
-	if (IS_ERR(acl)) {
-		err = PTR_ERR(acl);
-		esw_warn(dev, "vport[%d] ingress create flow Table, err(%d)\n",
-			 vport->vport, err);
-		return err;
-	}
-	vport->ingress.acl = acl;
-	return 0;
-}
-
-void esw_vport_destroy_ingress_acl_table(struct mlx5_vport *vport)
-{
-	if (!vport->ingress.acl)
-		return;
-
-	mlx5_destroy_flow_table(vport->ingress.acl);
-	vport->ingress.acl = NULL;
-}
-
-void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw,
-				     struct mlx5_vport *vport)
-{
-	if (vport->ingress.legacy.drop_rule) {
-		mlx5_del_flow_rules(vport->ingress.legacy.drop_rule);
-		vport->ingress.legacy.drop_rule = NULL;
-	}
-
-	if (vport->ingress.allow_rule) {
-		mlx5_del_flow_rules(vport->ingress.allow_rule);
-		vport->ingress.allow_rule = NULL;
-	}
-}
-
-static void esw_vport_disable_legacy_ingress_acl(struct mlx5_eswitch *esw,
-						 struct mlx5_vport *vport)
-{
-	if (!vport->ingress.acl)
-		return;
-
-	esw_debug(esw->dev, "Destroy vport[%d] E-Switch ingress ACL\n", vport->vport);
-
-	esw_vport_cleanup_ingress_rules(esw, vport);
-	if (vport->ingress.legacy.allow_spoofchk_only_grp) {
-		mlx5_destroy_flow_group(vport->ingress.legacy.allow_spoofchk_only_grp);
-		vport->ingress.legacy.allow_spoofchk_only_grp = NULL;
-	}
-	if (vport->ingress.legacy.allow_untagged_only_grp) {
-		mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_only_grp);
-		vport->ingress.legacy.allow_untagged_only_grp = NULL;
-	}
-	if (vport->ingress.legacy.allow_untagged_spoofchk_grp) {
-		mlx5_destroy_flow_group(vport->ingress.legacy.allow_untagged_spoofchk_grp);
-		vport->ingress.legacy.allow_untagged_spoofchk_grp = NULL;
-	}
-	if (vport->ingress.legacy.drop_grp) {
-		mlx5_destroy_flow_group(vport->ingress.legacy.drop_grp);
-		vport->ingress.legacy.drop_grp = NULL;
-	}
-	esw_vport_destroy_ingress_acl_table(vport);
-}
-
-static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
-				    struct mlx5_vport *vport)
-{
-	struct mlx5_fc *counter = vport->ingress.legacy.drop_counter;
-	struct mlx5_flow_destination drop_ctr_dst = {0};
-	struct mlx5_flow_destination *dst = NULL;
-	struct mlx5_flow_act flow_act = {0};
-	struct mlx5_flow_spec *spec = NULL;
-	int dest_num = 0;
-	int err = 0;
-	u8 *smac_v;
-
-	/* The ingress acl table contains 4 groups
-	 * (2 active rules at the same time -
-	 *      1 allow rule from one of the first 3 groups.
-	 *      1 drop rule from the last group):
-	 * 1)Allow untagged traffic with smac=original mac.
-	 * 2)Allow untagged traffic.
-	 * 3)Allow traffic with smac=original mac.
-	 * 4)Drop all other traffic.
-	 */
-	int table_size = 4;
-
-	esw_vport_cleanup_ingress_rules(esw, vport);
-
-	if (!vport->info.vlan && !vport->info.qos && !vport->info.spoofchk) {
-		esw_vport_disable_legacy_ingress_acl(esw, vport);
-		return 0;
-	}
-
-	if (!vport->ingress.acl) {
-		err = esw_vport_create_ingress_acl_table(esw, vport, table_size);
-		if (err) {
-			esw_warn(esw->dev,
-				 "vport[%d] enable ingress acl err (%d)\n",
-				 err, vport->vport);
-			return err;
-		}
-
-		err = esw_vport_create_legacy_ingress_acl_groups(esw, vport);
-		if (err)
-			goto out;
-	}
-
-	esw_debug(esw->dev,
-		  "vport[%d] configure ingress rules, vlan(%d) qos(%d)\n",
-		  vport->vport, vport->info.vlan, vport->info.qos);
-
-	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
-	if (!spec) {
-		err = -ENOMEM;
-		goto out;
-	}
-
-	if (vport->info.vlan || vport->info.qos)
-		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag);
-
-	if (vport->info.spoofchk) {
-		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_47_16);
-		MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.smac_15_0);
-		smac_v = MLX5_ADDR_OF(fte_match_param,
-				      spec->match_value,
-				      outer_headers.smac_47_16);
-		ether_addr_copy(smac_v, vport->info.mac);
-	}
-
-	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
-	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_ALLOW;
-	vport->ingress.allow_rule =
-		mlx5_add_flow_rules(vport->ingress.acl, spec,
-				    &flow_act, NULL, 0);
-	if (IS_ERR(vport->ingress.allow_rule)) {
-		err = PTR_ERR(vport->ingress.allow_rule);
-		esw_warn(esw->dev,
-			 "vport[%d] configure ingress allow rule, err(%d)\n",
-			 vport->vport, err);
-		vport->ingress.allow_rule = NULL;
-		goto out;
-	}
-
-	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_DROP;
-
-	/* Attach drop flow counter */
-	if (counter) {
-		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_COUNT;
-		drop_ctr_dst.type = MLX5_FLOW_DESTINATION_TYPE_COUNTER;
-		drop_ctr_dst.counter_id = mlx5_fc_id(counter);
-		dst = &drop_ctr_dst;
-		dest_num++;
-	}
-	vport->ingress.legacy.drop_rule =
-		mlx5_add_flow_rules(vport->ingress.acl, NULL,
-				    &flow_act, dst, dest_num);
-	if (IS_ERR(vport->ingress.legacy.drop_rule)) {
-		err = PTR_ERR(vport->ingress.legacy.drop_rule);
-		esw_warn(esw->dev,
-			 "vport[%d] configure ingress drop rule, err(%d)\n",
-			 vport->vport, err);
-		vport->ingress.legacy.drop_rule = NULL;
-		goto out;
-	}
-	kvfree(spec);
-	return 0;
-
-out:
-	esw_vport_disable_legacy_ingress_acl(esw, vport);
-	kvfree(spec);
-	return err;
-}
-
 static bool element_type_supported(struct mlx5_eswitch *esw, int type)
 {
 	const struct mlx5_core_dev *dev = esw->dev;
@@ -1443,17 +1148,7 @@ static int esw_vport_create_legacy_acl_tables(struct mlx5_eswitch *esw,
 	if (mlx5_esw_is_manager_vport(esw, vport->vport))
 		return 0;
 
-	if (MLX5_CAP_ESW_INGRESS_ACL(esw->dev, flow_counter)) {
-		vport->ingress.legacy.drop_counter = mlx5_fc_create(esw->dev, false);
-		if (IS_ERR(vport->ingress.legacy.drop_counter)) {
-			esw_warn(esw->dev,
-				 "vport[%d] configure ingress drop rule counter failed\n",
-				 vport->vport);
-			vport->ingress.legacy.drop_counter = NULL;
-		}
-	}
-
-	ret = esw_vport_ingress_config(esw, vport);
+	ret = esw_acl_ingress_lgcy_setup(esw, vport);
 	if (ret)
 		goto ingress_err;
 
@@ -1464,10 +1159,8 @@ static int esw_vport_create_legacy_acl_tables(struct mlx5_eswitch *esw,
 	return 0;
 
 egress_err:
-	esw_vport_disable_legacy_ingress_acl(esw, vport);
+	esw_acl_ingress_lgcy_cleanup(esw, vport);
 ingress_err:
-	mlx5_fc_destroy(esw->dev, vport->ingress.legacy.drop_counter);
-	vport->ingress.legacy.drop_counter = NULL;
 	return ret;
 }
 
@@ -1488,10 +1181,7 @@ static void esw_vport_destroy_legacy_acl_tables(struct mlx5_eswitch *esw,
 		return;
 
 	esw_acl_egress_lgcy_cleanup(esw, vport);
-
-	esw_vport_disable_legacy_ingress_acl(esw, vport);
-	mlx5_fc_destroy(esw->dev, vport->ingress.legacy.drop_counter);
-	vport->ingress.legacy.drop_counter = NULL;
+	esw_acl_ingress_lgcy_cleanup(esw, vport);
 }
 
 static void esw_vport_cleanup_acl(struct mlx5_eswitch *esw,
@@ -2123,7 +1813,7 @@ int mlx5_eswitch_set_vport_mac(struct mlx5_eswitch *esw,
 	ether_addr_copy(evport->info.mac, mac);
 	evport->info.node_guid = node_guid;
 	if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY)
-		err = esw_vport_ingress_config(esw, evport);
+		err = esw_acl_ingress_lgcy_setup(esw, evport);
 
 unlock:
 	mutex_unlock(&esw->state_lock);
@@ -2205,7 +1895,7 @@ int __mlx5_eswitch_set_vport_vlan(struct mlx5_eswitch *esw,
 	evport->info.vlan = vlan;
 	evport->info.qos = qos;
 	if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY) {
-		err = esw_vport_ingress_config(esw, evport);
+		err = esw_acl_ingress_lgcy_setup(esw, evport);
 		if (err)
 			return err;
 		err = esw_acl_egress_lgcy_setup(esw, evport);
@@ -2250,7 +1940,7 @@ int mlx5_eswitch_set_vport_spoofchk(struct mlx5_eswitch *esw,
 			       "Spoofchk in set while MAC is invalid, vport(%d)\n",
 			       evport->vport);
 	if (evport->enabled && esw->mode == MLX5_ESWITCH_LEGACY)
-		err = esw_vport_ingress_config(esw, evport);
+		err = esw_acl_ingress_lgcy_setup(esw, evport);
 	if (err)
 		evport->info.spoofchk = pschk;
 	mutex_unlock(&esw->state_lock);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 490410401631..ca7b7961c295 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -285,12 +285,6 @@ void esw_offloads_disable(struct mlx5_eswitch *esw);
 int esw_offloads_enable(struct mlx5_eswitch *esw);
 void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw);
 int esw_offloads_init_reps(struct mlx5_eswitch *esw);
-void esw_vport_cleanup_ingress_rules(struct mlx5_eswitch *esw,
-				     struct mlx5_vport *vport);
-int esw_vport_create_ingress_acl_table(struct mlx5_eswitch *esw,
-				       struct mlx5_vport *vport,
-				       int table_size);
-void esw_vport_destroy_ingress_acl_table(struct mlx5_vport *vport);
 
 int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num,
 			       u32 rate_mbps);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 0b00b30187ce..11bc9cc1d5f0 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -235,13 +235,6 @@ static struct mlx5_eswitch_rep *mlx5_eswitch_get_rep(struct mlx5_eswitch *esw,
 	return &esw->offloads.vport_reps[idx];
 }
 
-static bool
-esw_check_ingress_prio_tag_enabled(const struct mlx5_eswitch *esw,
-				   const struct mlx5_vport *vport)
-{
-	return (MLX5_CAP_GEN(esw->dev, prio_tag_required) &&
-		mlx5_eswitch_is_vf_vport(esw, vport->vport));
-}
 
 static void
 mlx5_eswitch_set_rule_source_port(struct mlx5_eswitch *esw,
@@ -1852,248 +1845,6 @@ static void esw_offloads_devcom_cleanup(struct mlx5_eswitch *esw)
 	mlx5_devcom_unregister_component(devcom, MLX5_DEVCOM_ESW_OFFLOADS);
 }
 
-static int esw_vport_ingress_prio_tag_config(struct mlx5_eswitch *esw,
-					     struct mlx5_vport *vport)
-{
-	struct mlx5_flow_act flow_act = {0};
-	struct mlx5_flow_spec *spec;
-	int err = 0;
-
-	/* For prio tag mode, there is only 1 FTEs:
-	 * 1) Untagged packets - push prio tag VLAN and modify metadata if
-	 * required, allow
-	 * Unmatched traffic is allowed by default
-	 */
-	spec = kvzalloc(sizeof(*spec), GFP_KERNEL);
-	if (!spec)
-		return -ENOMEM;
-
-	/* Untagged packets - push prio tag VLAN, allow */
-	MLX5_SET_TO_ONES(fte_match_param, spec->match_criteria, outer_headers.cvlan_tag);
-	MLX5_SET(fte_match_param, spec->match_value, outer_headers.cvlan_tag, 0);
-	spec->match_criteria_enable = MLX5_MATCH_OUTER_HEADERS;
-	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_VLAN_PUSH |
-			  MLX5_FLOW_CONTEXT_ACTION_ALLOW;
-	flow_act.vlan[0].ethtype = ETH_P_8021Q;
-	flow_act.vlan[0].vid = 0;
-	flow_act.vlan[0].prio = 0;
-
-	if (vport->ingress.offloads.modify_metadata_rule) {
-		flow_act.action |= MLX5_FLOW_CONTEXT_ACTION_MOD_HDR;
-		flow_act.modify_hdr = vport->ingress.offloads.modify_metadata;
-	}
-
-	vport->ingress.allow_rule =
-		mlx5_add_flow_rules(vport->ingress.acl, spec,
-				    &flow_act, NULL, 0);
-	if (IS_ERR(vport->ingress.allow_rule)) {
-		err = PTR_ERR(vport->ingress.allow_rule);
-		esw_warn(esw->dev,
-			 "vport[%d] configure ingress untagged allow rule, err(%d)\n",
-			 vport->vport, err);
-		vport->ingress.allow_rule = NULL;
-	}
-
-	kvfree(spec);
-	return err;
-}
-
-static int esw_vport_add_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
-						     struct mlx5_vport *vport)
-{
-	u8 action[MLX5_UN_SZ_BYTES(set_add_copy_action_in_auto)] = {};
-	struct mlx5_flow_act flow_act = {};
-	int err = 0;
-	u32 key;
-
-	key = mlx5_eswitch_get_vport_metadata_for_match(esw, vport->vport);
-	key >>= ESW_SOURCE_PORT_METADATA_OFFSET;
-
-	MLX5_SET(set_action_in, action, action_type, MLX5_ACTION_TYPE_SET);
-	MLX5_SET(set_action_in, action, field,
-		 MLX5_ACTION_IN_FIELD_METADATA_REG_C_0);
-	MLX5_SET(set_action_in, action, data, key);
-	MLX5_SET(set_action_in, action, offset,
-		 ESW_SOURCE_PORT_METADATA_OFFSET);
-	MLX5_SET(set_action_in, action, length,
-		 ESW_SOURCE_PORT_METADATA_BITS);
-
-	vport->ingress.offloads.modify_metadata =
-		mlx5_modify_header_alloc(esw->dev, MLX5_FLOW_NAMESPACE_ESW_INGRESS,
-					 1, action);
-	if (IS_ERR(vport->ingress.offloads.modify_metadata)) {
-		err = PTR_ERR(vport->ingress.offloads.modify_metadata);
-		esw_warn(esw->dev,
-			 "failed to alloc modify header for vport %d ingress acl (%d)\n",
-			 vport->vport, err);
-		return err;
-	}
-
-	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_MOD_HDR | MLX5_FLOW_CONTEXT_ACTION_ALLOW;
-	flow_act.modify_hdr = vport->ingress.offloads.modify_metadata;
-	vport->ingress.offloads.modify_metadata_rule =
-				mlx5_add_flow_rules(vport->ingress.acl,
-						    NULL, &flow_act, NULL, 0);
-	if (IS_ERR(vport->ingress.offloads.modify_metadata_rule)) {
-		err = PTR_ERR(vport->ingress.offloads.modify_metadata_rule);
-		esw_warn(esw->dev,
-			 "failed to add setting metadata rule for vport %d ingress acl, err(%d)\n",
-			 vport->vport, err);
-		mlx5_modify_header_dealloc(esw->dev, vport->ingress.offloads.modify_metadata);
-		vport->ingress.offloads.modify_metadata_rule = NULL;
-	}
-	return err;
-}
-
-static void esw_vport_del_ingress_acl_modify_metadata(struct mlx5_eswitch *esw,
-						      struct mlx5_vport *vport)
-{
-	if (vport->ingress.offloads.modify_metadata_rule) {
-		mlx5_del_flow_rules(vport->ingress.offloads.modify_metadata_rule);
-		mlx5_modify_header_dealloc(esw->dev, vport->ingress.offloads.modify_metadata);
-
-		vport->ingress.offloads.modify_metadata_rule = NULL;
-	}
-}
-
-static int esw_vport_create_ingress_acl_group(struct mlx5_eswitch *esw,
-					      struct mlx5_vport *vport)
-{
-	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
-	struct mlx5_flow_group *g;
-	void *match_criteria;
-	u32 *flow_group_in;
-	u32 flow_index = 0;
-	int ret = 0;
-
-	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
-	if (!flow_group_in)
-		return -ENOMEM;
-
-	if (esw_check_ingress_prio_tag_enabled(esw, vport)) {
-		/* This group is to hold FTE to match untagged packets when prio_tag
-		 * is enabled.
-		 */
-		memset(flow_group_in, 0, inlen);
-
-		match_criteria = MLX5_ADDR_OF(create_flow_group_in,
-					      flow_group_in, match_criteria);
-		MLX5_SET(create_flow_group_in, flow_group_in,
-			 match_criteria_enable, MLX5_MATCH_OUTER_HEADERS);
-		MLX5_SET_TO_ONES(fte_match_param, match_criteria, outer_headers.cvlan_tag);
-		MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_index);
-		MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_index);
-
-		g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in);
-		if (IS_ERR(g)) {
-			ret = PTR_ERR(g);
-			esw_warn(esw->dev, "vport[%d] ingress create untagged flow group, err(%d)\n",
-				 vport->vport, ret);
-			goto prio_tag_err;
-		}
-		vport->ingress.offloads.metadata_prio_tag_grp = g;
-		flow_index++;
-	}
-
-	if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
-		/* This group holds an FTE with no matches for add metadata for
-		 * tagged packets, if prio-tag is enabled (as a fallthrough),
-		 * or all traffic in case prio-tag is disabled.
-		 */
-		memset(flow_group_in, 0, inlen);
-		MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_index);
-		MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_index);
-
-		g = mlx5_create_flow_group(vport->ingress.acl, flow_group_in);
-		if (IS_ERR(g)) {
-			ret = PTR_ERR(g);
-			esw_warn(esw->dev, "vport[%d] ingress create drop flow group, err(%d)\n",
-				 vport->vport, ret);
-			goto metadata_err;
-		}
-		vport->ingress.offloads.metadata_allmatch_grp = g;
-	}
-
-	kvfree(flow_group_in);
-	return 0;
-
-metadata_err:
-	if (!IS_ERR_OR_NULL(vport->ingress.offloads.metadata_prio_tag_grp)) {
-		mlx5_destroy_flow_group(vport->ingress.offloads.metadata_prio_tag_grp);
-		vport->ingress.offloads.metadata_prio_tag_grp = NULL;
-	}
-prio_tag_err:
-	kvfree(flow_group_in);
-	return ret;
-}
-
-static void esw_vport_destroy_ingress_acl_group(struct mlx5_vport *vport)
-{
-	if (vport->ingress.offloads.metadata_allmatch_grp) {
-		mlx5_destroy_flow_group(vport->ingress.offloads.metadata_allmatch_grp);
-		vport->ingress.offloads.metadata_allmatch_grp = NULL;
-	}
-
-	if (vport->ingress.offloads.metadata_prio_tag_grp) {
-		mlx5_destroy_flow_group(vport->ingress.offloads.metadata_prio_tag_grp);
-		vport->ingress.offloads.metadata_prio_tag_grp = NULL;
-	}
-}
-
-static int esw_vport_ingress_config(struct mlx5_eswitch *esw,
-				    struct mlx5_vport *vport)
-{
-	int num_ftes = 0;
-	int err;
-
-	if (!mlx5_eswitch_vport_match_metadata_enabled(esw) &&
-	    !esw_check_ingress_prio_tag_enabled(esw, vport))
-		return 0;
-
-	esw_vport_cleanup_ingress_rules(esw, vport);
-
-	if (mlx5_eswitch_vport_match_metadata_enabled(esw))
-		num_ftes++;
-	if (esw_check_ingress_prio_tag_enabled(esw, vport))
-		num_ftes++;
-
-	err = esw_vport_create_ingress_acl_table(esw, vport, num_ftes);
-	if (err) {
-		esw_warn(esw->dev,
-			 "failed to enable ingress acl (%d) on vport[%d]\n",
-			 err, vport->vport);
-		return err;
-	}
-
-	err = esw_vport_create_ingress_acl_group(esw, vport);
-	if (err)
-		goto group_err;
-
-	esw_debug(esw->dev,
-		  "vport[%d] configure ingress rules\n", vport->vport);
-
-	if (mlx5_eswitch_vport_match_metadata_enabled(esw)) {
-		err = esw_vport_add_ingress_acl_modify_metadata(esw, vport);
-		if (err)
-			goto metadata_err;
-	}
-
-	if (esw_check_ingress_prio_tag_enabled(esw, vport)) {
-		err = esw_vport_ingress_prio_tag_config(esw, vport);
-		if (err)
-			goto prio_tag_err;
-	}
-	return 0;
-
-prio_tag_err:
-	esw_vport_del_ingress_acl_modify_metadata(esw, vport);
-metadata_err:
-	esw_vport_destroy_ingress_acl_group(vport);
-group_err:
-	esw_vport_destroy_ingress_acl_table(vport);
-	return err;
-}
-
 static bool
 esw_check_vport_match_metadata_supported(const struct mlx5_eswitch *esw)
 {
@@ -2132,19 +1883,20 @@ esw_vport_create_offloads_acl_tables(struct mlx5_eswitch *esw,
 {
 	int err;
 
-	err = esw_vport_ingress_config(esw, vport);
+	err = esw_acl_ingress_ofld_setup(esw, vport);
 	if (err)
 		return err;
 
 	if (mlx5_eswitch_is_vf_vport(esw, vport->vport)) {
 		err = esw_acl_egress_ofld_setup(esw, vport);
-		if (err) {
-			esw_vport_cleanup_ingress_rules(esw, vport);
-			esw_vport_del_ingress_acl_modify_metadata(esw, vport);
-			esw_vport_destroy_ingress_acl_group(vport);
-			esw_vport_destroy_ingress_acl_table(vport);
-		}
+		if (err)
+			goto egress_err;
 	}
+
+	return 0;
+
+egress_err:
+	esw_acl_ingress_ofld_cleanup(esw, vport);
 	return err;
 }
 
@@ -2153,10 +1905,7 @@ esw_vport_destroy_offloads_acl_tables(struct mlx5_eswitch *esw,
 				      struct mlx5_vport *vport)
 {
 	esw_acl_egress_ofld_cleanup(vport);
-	esw_vport_cleanup_ingress_rules(esw, vport);
-	esw_vport_del_ingress_acl_modify_metadata(esw, vport);
-	esw_vport_destroy_ingress_acl_group(vport);
-	esw_vport_destroy_ingress_acl_table(vport);
+	esw_acl_ingress_ofld_cleanup(esw, vport);
 }
 
 static int esw_create_uplink_offloads_acl_tables(struct mlx5_eswitch *esw)
-- 
cgit v1.2.3-59-g8ed1b


From bf773dc0e6d55a828a9111124b1d7836f2d4492c Mon Sep 17 00:00:00 2001
From: Vu Pham <vuhuong@mellanox.com>
Date: Mon, 16 Mar 2020 17:32:50 -0700
Subject: net/mlx5: E-Switch, Introduce APIs to enable egress acl
 forward-to-vport rule

By default, e-switch vport's egress acl just forward packets to its
counterpart NIC vport using existing egress acl table.

During port failover in bonding scenario where two VFs representors
are bonded, the egress acl forward-to-vport rule will be added to
the existing egress acl table of e-switch vport of passive/inactive
slave representor to forward packets to other NIC vport ie. the active
slave representor's NIC vport to handle egress "failover" traffic.

Enable egress acl and have APIs to create and destroy egress acl
forward-to-vport rule and group.

Signed-off-by: Vu Pham <vuhuong@mellanox.com>
Reviewed-by: Parav Pandit <parav@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../mellanox/mlx5/core/esw/acl/egress_ofld.c       | 185 ++++++++++++++++++---
 .../net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h |  10 ++
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |  16 +-
 3 files changed, 187 insertions(+), 24 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c
index 49a53ebf56dd..07b2acd7e6b3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/egress_ofld.c
@@ -6,55 +6,165 @@
 #include "helper.h"
 #include "ofld.h"
 
+static void esw_acl_egress_ofld_fwd2vport_destroy(struct mlx5_vport *vport)
+{
+	if (!vport->egress.offloads.fwd_rule)
+		return;
+
+	mlx5_del_flow_rules(vport->egress.offloads.fwd_rule);
+	vport->egress.offloads.fwd_rule = NULL;
+}
+
+static int esw_acl_egress_ofld_fwd2vport_create(struct mlx5_eswitch *esw,
+						struct mlx5_vport *vport,
+						struct mlx5_flow_destination *fwd_dest)
+{
+	struct mlx5_flow_act flow_act = {};
+	int err = 0;
+
+	esw_debug(esw->dev, "vport(%d) configure egress acl rule fwd2vport(%d)\n",
+		  vport->vport, fwd_dest->vport.num);
+
+	/* Delete the old egress forward-to-vport rule if any */
+	esw_acl_egress_ofld_fwd2vport_destroy(vport);
+
+	flow_act.action = MLX5_FLOW_CONTEXT_ACTION_FWD_DEST;
+
+	vport->egress.offloads.fwd_rule =
+		mlx5_add_flow_rules(vport->egress.acl, NULL,
+				    &flow_act, fwd_dest, 1);
+	if (IS_ERR(vport->egress.offloads.fwd_rule)) {
+		err = PTR_ERR(vport->egress.offloads.fwd_rule);
+		esw_warn(esw->dev,
+			 "vport(%d) failed to add fwd2vport acl rule err(%d)\n",
+			 vport->vport, err);
+		vport->egress.offloads.fwd_rule = NULL;
+	}
+
+	return err;
+}
+
 static int esw_acl_egress_ofld_rules_create(struct mlx5_eswitch *esw,
-					    struct mlx5_vport *vport)
+					    struct mlx5_vport *vport,
+					    struct mlx5_flow_destination *fwd_dest)
 {
-	if (!MLX5_CAP_GEN(esw->dev, prio_tag_required))
-		return 0;
+	int err = 0;
+	int action;
+
+	if (MLX5_CAP_GEN(esw->dev, prio_tag_required)) {
+		/* For prio tag mode, there is only 1 FTEs:
+		 * 1) prio tag packets - pop the prio tag VLAN, allow
+		 * Unmatched traffic is allowed by default
+		 */
+		esw_debug(esw->dev,
+			  "vport[%d] configure prio tag egress rules\n", vport->vport);
+
+		action = MLX5_FLOW_CONTEXT_ACTION_VLAN_POP;
+		action |= fwd_dest ? MLX5_FLOW_CONTEXT_ACTION_FWD_DEST :
+			  MLX5_FLOW_CONTEXT_ACTION_ALLOW;
+
+		/* prio tag vlan rule - pop it so vport receives untagged packets */
+		err = esw_egress_acl_vlan_create(esw, vport, fwd_dest, 0, action);
+		if (err)
+			goto prio_err;
+	}
 
-	/* For prio tag mode, there is only 1 FTEs:
-	 * 1) prio tag packets - pop the prio tag VLAN, allow
-	 * Unmatched traffic is allowed by default
-	 */
-	esw_debug(esw->dev,
-		  "vport[%d] configure prio tag egress rules\n", vport->vport);
+	if (fwd_dest) {
+		err = esw_acl_egress_ofld_fwd2vport_create(esw, vport, fwd_dest);
+		if (err)
+			goto fwd_err;
+	}
 
-	/* prio tag vlan rule - pop it so vport receives untagged packets */
-	return esw_egress_acl_vlan_create(esw, vport, NULL, 0,
-					  MLX5_FLOW_CONTEXT_ACTION_VLAN_POP |
-					  MLX5_FLOW_CONTEXT_ACTION_ALLOW);
+	return 0;
+
+fwd_err:
+	esw_acl_egress_vlan_destroy(vport);
+prio_err:
+	return err;
 }
 
 static void esw_acl_egress_ofld_rules_destroy(struct mlx5_vport *vport)
 {
 	esw_acl_egress_vlan_destroy(vport);
+	esw_acl_egress_ofld_fwd2vport_destroy(vport);
 }
 
 static int esw_acl_egress_ofld_groups_create(struct mlx5_eswitch *esw,
 					     struct mlx5_vport *vport)
 {
-	if (!MLX5_CAP_GEN(esw->dev, prio_tag_required))
-		return 0;
+	int inlen = MLX5_ST_SZ_BYTES(create_flow_group_in);
+	struct mlx5_flow_group *fwd_grp;
+	u32 *flow_group_in;
+	u32 flow_index = 0;
+	int ret = 0;
+
+	if (MLX5_CAP_GEN(esw->dev, prio_tag_required)) {
+		ret = esw_acl_egress_vlan_grp_create(esw, vport);
+		if (ret)
+			return ret;
+
+		flow_index++;
+	}
+
+	if (!mlx5_esw_acl_egress_fwd2vport_supported(esw))
+		goto out;
+
+	flow_group_in = kvzalloc(inlen, GFP_KERNEL);
+	if (!flow_group_in) {
+		ret = -ENOMEM;
+		goto fwd_grp_err;
+	}
+
+	/* This group holds 1 FTE to forward all packets to other vport
+	 * when bond vports is supported.
+	 */
+	MLX5_SET(create_flow_group_in, flow_group_in, start_flow_index, flow_index);
+	MLX5_SET(create_flow_group_in, flow_group_in, end_flow_index, flow_index);
+	fwd_grp = mlx5_create_flow_group(vport->egress.acl, flow_group_in);
+	if (IS_ERR(fwd_grp)) {
+		ret = PTR_ERR(fwd_grp);
+		esw_warn(esw->dev,
+			 "Failed to create vport[%d] egress fwd2vport flow group, err(%d)\n",
+			 vport->vport, ret);
+		kvfree(flow_group_in);
+		goto fwd_grp_err;
+	}
+	vport->egress.offloads.fwd_grp = fwd_grp;
+	kvfree(flow_group_in);
+	return 0;
 
-	return esw_acl_egress_vlan_grp_create(esw, vport);
+fwd_grp_err:
+	esw_acl_egress_vlan_grp_destroy(vport);
+out:
+	return ret;
 }
 
 static void esw_acl_egress_ofld_groups_destroy(struct mlx5_vport *vport)
 {
+	if (!IS_ERR_OR_NULL(vport->egress.offloads.fwd_grp)) {
+		mlx5_destroy_flow_group(vport->egress.offloads.fwd_grp);
+		vport->egress.offloads.fwd_grp = NULL;
+	}
 	esw_acl_egress_vlan_grp_destroy(vport);
 }
 
 int esw_acl_egress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport)
 {
+	int table_size = 0;
 	int err;
 
-	if (!MLX5_CAP_GEN(esw->dev, prio_tag_required))
+	if (!mlx5_esw_acl_egress_fwd2vport_supported(esw) &&
+	    !MLX5_CAP_GEN(esw->dev, prio_tag_required))
 		return 0;
 
 	esw_acl_egress_ofld_rules_destroy(vport);
 
+	if (mlx5_esw_acl_egress_fwd2vport_supported(esw))
+		table_size++;
+	if (MLX5_CAP_GEN(esw->dev, prio_tag_required))
+		table_size++;
 	vport->egress.acl = esw_acl_table_create(esw, vport->vport,
-						 MLX5_FLOW_NAMESPACE_ESW_EGRESS, 0);
+						 MLX5_FLOW_NAMESPACE_ESW_EGRESS, table_size);
 	if (IS_ERR_OR_NULL(vport->egress.acl)) {
 		err = PTR_ERR(vport->egress.acl);
 		vport->egress.acl = NULL;
@@ -67,7 +177,7 @@ int esw_acl_egress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport
 
 	esw_debug(esw->dev, "vport[%d] configure egress rules\n", vport->vport);
 
-	err = esw_acl_egress_ofld_rules_create(esw, vport);
+	err = esw_acl_egress_ofld_rules_create(esw, vport, NULL);
 	if (err)
 		goto rules_err;
 
@@ -86,3 +196,40 @@ void esw_acl_egress_ofld_cleanup(struct mlx5_vport *vport)
 	esw_acl_egress_ofld_groups_destroy(vport);
 	esw_acl_egress_table_destroy(vport);
 }
+
+int mlx5_esw_acl_egress_vport_bond(struct mlx5_eswitch *esw, u16 active_vport_num,
+				   u16 passive_vport_num)
+{
+	struct mlx5_vport *passive_vport = mlx5_eswitch_get_vport(esw, passive_vport_num);
+	struct mlx5_vport *active_vport = mlx5_eswitch_get_vport(esw, active_vport_num);
+	struct mlx5_flow_destination fwd_dest = {};
+
+	if (IS_ERR(active_vport))
+		return PTR_ERR(active_vport);
+	if (IS_ERR(passive_vport))
+		return PTR_ERR(passive_vport);
+
+	/* Cleanup and recreate rules WITHOUT fwd2vport of active vport */
+	esw_acl_egress_ofld_rules_destroy(active_vport);
+	esw_acl_egress_ofld_rules_create(esw, active_vport, NULL);
+
+	/* Cleanup and recreate all rules + fwd2vport rule of passive vport to forward */
+	esw_acl_egress_ofld_rules_destroy(passive_vport);
+	fwd_dest.type = MLX5_FLOW_DESTINATION_TYPE_VPORT;
+	fwd_dest.vport.num = active_vport_num;
+	fwd_dest.vport.vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id);
+	fwd_dest.vport.flags = MLX5_FLOW_DEST_VPORT_VHCA_ID;
+
+	return esw_acl_egress_ofld_rules_create(esw, passive_vport, &fwd_dest);
+}
+
+int mlx5_esw_acl_egress_vport_unbond(struct mlx5_eswitch *esw, u16 vport_num)
+{
+	struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
+
+	if (IS_ERR(vport))
+		return PTR_ERR(vport);
+
+	esw_acl_egress_ofld_rules_destroy(vport);
+	return esw_acl_egress_ofld_rules_create(esw, vport, NULL);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h
index 9e5e0fac29ef..90ddc5d7da46 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h
@@ -9,6 +9,16 @@
 /* Eswitch acl egress external APIs */
 int esw_acl_egress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
 void esw_acl_egress_ofld_cleanup(struct mlx5_vport *vport);
+int mlx5_esw_acl_egress_vport_bond(struct mlx5_eswitch *esw, u16 active_vport_num,
+				   u16 passive_vport_num);
+int mlx5_esw_acl_egress_vport_unbond(struct mlx5_eswitch *esw, u16 vport_num);
+
+static inline bool mlx5_esw_acl_egress_fwd2vport_supported(struct mlx5_eswitch *esw)
+{
+	return esw && esw->mode == MLX5_ESWITCH_OFFLOADS &&
+		mlx5_eswitch_vport_match_metadata_enabled(esw) &&
+		MLX5_CAP_ESW_FLOWTABLE(esw->dev, egress_acl_forward_to_vport);
+}
 
 /* Eswitch acl ingress external APIs */
 int esw_acl_ingress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index ca7b7961c295..7b6b3686b666 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -101,11 +101,17 @@ struct vport_egress {
 	struct mlx5_flow_table *acl;
 	struct mlx5_flow_handle  *allowed_vlan;
 	struct mlx5_flow_group *vlan_grp;
-	struct {
-		struct mlx5_flow_group *drop_grp;
-		struct mlx5_flow_handle *drop_rule;
-		struct mlx5_fc *drop_counter;
-	} legacy;
+	union {
+		struct {
+			struct mlx5_flow_group *drop_grp;
+			struct mlx5_flow_handle *drop_rule;
+			struct mlx5_fc *drop_counter;
+		} legacy;
+		struct {
+			struct mlx5_flow_group *fwd_grp;
+			struct mlx5_flow_handle *fwd_rule;
+		} offloads;
+	};
 };
 
 struct mlx5_vport_drop_stats {
-- 
cgit v1.2.3-59-g8ed1b


From 7e51891a237f9ea319f53f9beb83afb0077d88e6 Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Fri, 21 Jun 2019 13:23:44 -0700
Subject: net/mlx5e: Use netdev events to set/del egress acl forward-to-vport
 rule

Register a notifier block to handle netdev events for bond device
of non-uplink representors to support eswitch vports bonding.

When a non-uplink representor is a lower dev (slave) of bond and
becomes active, adding egress acl forward-to-vport rule of all slave
netdevs (active + standby) to forward to this representor's vport. Use
change lower netdev event to do this.

Use change upper event to detect slave representor unslaved from lag
device to delete its vport egress acl forward rule if any.

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Vu Pham <vuhuong@mellanox.com>
Reviewed-by: Parav Pandit <parav@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/Makefile   |   3 +-
 .../net/ethernet/mellanox/mlx5/core/en/rep/bond.c  | 161 +++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c   |   8 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.h   |   7 +
 4 files changed, 175 insertions(+), 4 deletions(-)
 create mode 100644 drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Makefile b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
index 3934dc258041..b61e47bc16e8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Makefile
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Makefile
@@ -34,7 +34,8 @@ mlx5_core-$(CONFIG_MLX5_EN_ARFS)     += en_arfs.o
 mlx5_core-$(CONFIG_MLX5_EN_RXNFC)    += en_fs_ethtool.o
 mlx5_core-$(CONFIG_MLX5_CORE_EN_DCB) += en_dcbnl.o en/port_buffer.o
 mlx5_core-$(CONFIG_PCI_HYPERV_INTERFACE) += en/hv_vhca_stats.o
-mlx5_core-$(CONFIG_MLX5_ESWITCH)     += en_rep.o lib/geneve.o lib/port_tun.o lag_mp.o
+mlx5_core-$(CONFIG_MLX5_ESWITCH)     += lag_mp.o lib/geneve.o lib/port_tun.o \
+					en_rep.o en/rep/bond.o
 mlx5_core-$(CONFIG_MLX5_CLS_ACT)     += en_tc.o en/rep/tc.o en/rep/neigh.o \
 					en/mapping.o esw/chains.o en/tc_tun.o \
 					en/tc_tun_vxlan.o en/tc_tun_gre.o en/tc_tun_geneve.o \
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
new file mode 100644
index 000000000000..d0aab36f1947
--- /dev/null
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
@@ -0,0 +1,161 @@
+// SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
+/* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */
+
+#include <net/lag.h>
+
+#include "mlx5_core.h"
+#include "eswitch.h"
+#include "esw/acl/ofld.h"
+#include "en_rep.h"
+
+struct mlx5e_rep_bond {
+	struct notifier_block nb;
+	struct netdev_net_notifier nn;
+};
+
+static bool mlx5e_rep_is_lag_netdev(struct net_device *netdev)
+{
+	struct mlx5e_priv *priv = netdev_priv(netdev);
+	struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+	/* A given netdev is not a representor or not a slave of LAG configuration */
+	if (!mlx5e_eswitch_rep(netdev) || !bond_slave_get_rtnl(netdev))
+		return false;
+
+	/* Egress acl forward to vport is supported only non-uplink representor */
+	return rpriv->rep->vport != MLX5_VPORT_UPLINK;
+}
+
+static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *ptr)
+{
+	struct netdev_notifier_changelowerstate_info *info;
+	struct netdev_lag_lower_state_info *lag_info;
+	struct mlx5e_rep_priv *rpriv;
+	struct net_device *lag_dev;
+	struct mlx5e_priv *priv;
+	struct list_head *iter;
+	struct net_device *dev;
+	u16 acl_vport_num;
+	u16 fwd_vport_num;
+
+	if (!mlx5e_rep_is_lag_netdev(netdev))
+		return;
+
+	info = ptr;
+	lag_info = info->lower_state_info;
+	/* This is not an event of a representor becoming active slave */
+	if (!lag_info->tx_enabled)
+		return;
+
+	priv = netdev_priv(netdev);
+	rpriv = priv->ppriv;
+	fwd_vport_num = rpriv->rep->vport;
+	lag_dev = netdev_master_upper_dev_get(netdev);
+
+	netdev_dbg(netdev, "lag_dev(%s)'s slave vport(%d) is txable(%d)\n",
+		   lag_dev->name, fwd_vport_num, net_lag_port_dev_txable(netdev));
+
+	/* Point everyone's egress acl to the vport of the active representor */
+	netdev_for_each_lower_dev(lag_dev, dev, iter) {
+		priv = netdev_priv(dev);
+		rpriv = priv->ppriv;
+		acl_vport_num = rpriv->rep->vport;
+		if (acl_vport_num != fwd_vport_num) {
+			mlx5_esw_acl_egress_vport_bond(priv->mdev->priv.eswitch,
+						       fwd_vport_num,
+						       acl_vport_num);
+		}
+	}
+}
+
+static void mlx5e_rep_changeupper_event(struct net_device *netdev, void *ptr)
+{
+	struct netdev_notifier_changeupper_info *info = ptr;
+	struct mlx5e_rep_priv *rpriv;
+	struct mlx5e_priv *priv;
+
+	if (!mlx5e_rep_is_lag_netdev(netdev))
+		return;
+
+	/* Nothing to setup for new enslaved representor */
+	if (info->linking)
+		return;
+
+	priv = netdev_priv(netdev);
+	rpriv = priv->ppriv;
+	netdev_dbg(netdev, "Unslave, reset vport(%d) egress acl\n", rpriv->rep->vport);
+
+	/* Reset all egress acl rules of unslave representor's vport */
+	mlx5_esw_acl_egress_vport_unbond(priv->mdev->priv.eswitch,
+					 rpriv->rep->vport);
+}
+
+/* Bond device of representors and netdev events are used here in specific way
+ * to support eswitch vports bonding and to perform failover of eswitch vport
+ * by modifying the vport's egress acl of lower dev representors. Thus this
+ * also change the traditional behavior of lower dev under bond device.
+ * All non-representor netdevs or representors of other vendors as lower dev
+ * of bond device are not supported.
+ */
+static int mlx5e_rep_esw_bond_netevent(struct notifier_block *nb,
+				       unsigned long event, void *ptr)
+{
+	struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
+
+	switch (event) {
+	case NETDEV_CHANGELOWERSTATE:
+		mlx5e_rep_changelowerstate_event(netdev, ptr);
+		break;
+	case NETDEV_CHANGEUPPER:
+		mlx5e_rep_changeupper_event(netdev, ptr);
+		break;
+	}
+	return NOTIFY_DONE;
+}
+
+/* If HW support eswitch vports bonding, register a specific notifier to
+ * handle it when two or more representors are bonded
+ */
+int mlx5e_rep_bond_init(struct mlx5e_rep_priv *rpriv)
+{
+	struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
+	struct net_device *netdev = rpriv->netdev;
+	struct mlx5e_priv *priv;
+	int ret = 0;
+
+	priv = netdev_priv(netdev);
+	if (!mlx5_esw_acl_egress_fwd2vport_supported(priv->mdev->priv.eswitch))
+		goto out;
+
+	uplink_priv->bond = kvzalloc(sizeof(*uplink_priv->bond), GFP_KERNEL);
+	if (!uplink_priv->bond) {
+		ret = -ENOMEM;
+		goto out;
+	}
+
+	uplink_priv->bond->nb.notifier_call = mlx5e_rep_esw_bond_netevent;
+	ret = register_netdevice_notifier_dev_net(netdev,
+						  &uplink_priv->bond->nb,
+						  &uplink_priv->bond->nn);
+	if (ret) {
+		netdev_err(netdev, "register bonding netevent notifier, err(%d)\n", ret);
+		kvfree(uplink_priv->bond);
+		uplink_priv->bond = NULL;
+	}
+out:
+	return ret;
+}
+
+void mlx5e_rep_bond_cleanup(struct mlx5e_rep_priv *rpriv)
+{
+	struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
+
+	if (!mlx5_esw_acl_egress_fwd2vport_supported(priv->mdev->priv.eswitch) ||
+	    !rpriv->uplink_priv.bond)
+		return;
+
+	unregister_netdevice_notifier_dev_net(rpriv->netdev,
+					      &rpriv->uplink_priv.bond->nb,
+					      &rpriv->uplink_priv.bond->nn);
+	kvfree(rpriv->uplink_priv.bond);
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 4e13e37a9ecd..12593d75e885 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -959,16 +959,18 @@ static int mlx5e_init_uplink_rep_tx(struct mlx5e_rep_priv *rpriv)
 
 	mlx5_init_port_tun_entropy(&uplink_priv->tun_entropy, priv->mdev);
 
+	mlx5e_rep_bond_init(rpriv);
 	err = mlx5e_rep_tc_netdevice_event_register(rpriv);
 	if (err) {
 		mlx5_core_err(priv->mdev, "Failed to register netdev notifier, err: %d\n",
 			      err);
-		goto tc_rep_cleanup;
+		goto err_event_reg;
 	}
 
 	return 0;
 
-tc_rep_cleanup:
+err_event_reg:
+	mlx5e_rep_bond_cleanup(rpriv);
 	mlx5e_rep_tc_cleanup(rpriv);
 	return err;
 }
@@ -1001,7 +1003,7 @@ static void mlx5e_cleanup_uplink_rep_tx(struct mlx5e_rep_priv *rpriv)
 {
 	mlx5e_rep_tc_netdevice_event_unregister(rpriv);
 	mlx5e_rep_indr_clean_block_privs(rpriv);
-
+	mlx5e_rep_bond_cleanup(rpriv);
 	mlx5e_rep_tc_cleanup(rpriv);
 }
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
index 1c4af8522467..7e56787aa224 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -56,6 +56,7 @@ struct mlx5e_neigh_update_table {
 };
 
 struct mlx5_tc_ct_priv;
+struct mlx5e_rep_bond;
 struct mlx5_rep_uplink_priv {
 	/* Filters DB - instantiated by the uplink representor and shared by
 	 * the uplink's VFs
@@ -89,6 +90,9 @@ struct mlx5_rep_uplink_priv {
 	struct mapping_ctx *tunnel_enc_opts_mapping;
 
 	struct mlx5_tc_ct_priv *ct_priv;
+
+	/* support eswitch vports bonding */
+	struct mlx5e_rep_bond *bond;
 };
 
 struct mlx5e_rep_priv {
@@ -211,6 +215,9 @@ struct mlx5e_rep_sq {
 
 void mlx5e_rep_register_vport_reps(struct mlx5_core_dev *mdev);
 void mlx5e_rep_unregister_vport_reps(struct mlx5_core_dev *mdev);
+int mlx5e_rep_bond_init(struct mlx5e_rep_priv *rpriv);
+void mlx5e_rep_bond_cleanup(struct mlx5e_rep_priv *rpriv);
+
 bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv);
 int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv);
 void mlx5e_remove_sqs_fwd_rules(struct mlx5e_priv *priv);
-- 
cgit v1.2.3-59-g8ed1b


From 553f9328385d954644d74dedb655f85b687a9470 Mon Sep 17 00:00:00 2001
From: Vu Pham <vuhuong@mellanox.com>
Date: Fri, 2 Aug 2019 16:13:10 -0700
Subject: net/mlx5e: Support tc block sharing for representors

Currently offloading a rule over a tc block shared by multiple
representors fails because an e-switch global hashtable to keep
the mapping from tc cookies to mlx5e flow instances is used, and
tc block sharing offloads the same rule/cookie multiple times,
each time for different representor sharing the tc block.

Changing the implementation and behavior by acknowledging and returning
success if the same rule/cookie is offloaded again to other slave
representor sharing the tc block by setting, checking and comparing
the netdev that added the rule first.

Signed-off-by: Vu Pham <vuhuong@mellanox.com>
Reviewed-by: Parav Pandit <parav@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Reviewed-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 23 +++++++++++++++++++++++
 1 file changed, 23 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 571da14809fe..f3e65a15c950 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -145,6 +145,7 @@ struct mlx5e_tc_flow {
 	struct list_head	hairpin; /* flows sharing the same hairpin */
 	struct list_head	peer;    /* flows with peer flow */
 	struct list_head	unready; /* flows not ready to be offloaded (e.g due to missing route) */
+	struct net_device	*orig_dev; /* netdev adding flow first */
 	int			tmp_efi_index;
 	struct list_head	tmp_list; /* temporary flow list used by neigh update */
 	refcount_t		refcnt;
@@ -4624,11 +4625,21 @@ mlx5e_tc_add_flow(struct mlx5e_priv *priv,
 	return err;
 }
 
+static bool is_flow_rule_duplicate_allowed(struct net_device *dev,
+					   struct mlx5e_rep_priv *rpriv)
+{
+	/* Offloaded flow rule is allowed to duplicate on non-uplink representor
+	 * sharing tc block with other slaves of a lag device.
+	 */
+	return netif_is_lag_port(dev) && rpriv->rep->vport != MLX5_VPORT_UPLINK;
+}
+
 int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
 			   struct flow_cls_offload *f, unsigned long flags)
 {
 	struct netlink_ext_ack *extack = f->common.extack;
 	struct rhashtable *tc_ht = get_tc_ht(priv, flags);
+	struct mlx5e_rep_priv *rpriv = priv->ppriv;
 	struct mlx5e_tc_flow *flow;
 	int err = 0;
 
@@ -4636,6 +4647,12 @@ int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
 	flow = rhashtable_lookup(tc_ht, &f->cookie, tc_ht_params);
 	rcu_read_unlock();
 	if (flow) {
+		/* Same flow rule offloaded to non-uplink representor sharing tc block,
+		 * just return 0.
+		 */
+		if (is_flow_rule_duplicate_allowed(dev, rpriv) && flow->orig_dev != dev)
+			goto out;
+
 		NL_SET_ERR_MSG_MOD(extack,
 				   "flow cookie already exists, ignoring");
 		netdev_warn_once(priv->netdev,
@@ -4650,6 +4667,12 @@ int mlx5e_configure_flower(struct net_device *dev, struct mlx5e_priv *priv,
 	if (err)
 		goto out;
 
+	/* Flow rule offloaded to non-uplink representor sharing tc block,
+	 * set the flow's owner dev.
+	 */
+	if (is_flow_rule_duplicate_allowed(dev, rpriv))
+		flow->orig_dev = dev;
+
 	err = rhashtable_lookup_insert_fast(tc_ht, &flow->node, tc_ht_params);
 	if (err)
 		goto err_free;
-- 
cgit v1.2.3-59-g8ed1b


From d34eb2fcd00472323d9e26ee0aec498c2c6f5b6f Mon Sep 17 00:00:00 2001
From: Or Gerlitz <ogerlitz@mellanox.com>
Date: Tue, 5 Mar 2019 21:11:14 +0200
Subject: net/mlx5e: Offload flow rules to active lower representor

When a bond device is created over one or more non uplink representors,
and when a flow rule is offloaded to such bond device, offload a rule
to the active lower device.

Assuming that this is active-backup lag, the rules should be offloaded
to the active lower device which is the representor of the direct
path (not the failover).

Signed-off-by: Or Gerlitz <ogerlitz@mellanox.com>
Signed-off-by: Parav Pandit <parav@mellanox.com>
Signed-off-by: Vu Pham <vuhuong@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 35 ++++++++++++++++++-------
 1 file changed, 26 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index f3e65a15c950..58f797da4d8d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -50,6 +50,7 @@
 #include <net/arp.h>
 #include <net/ipv6_stubs.h>
 #include <net/bareudp.h>
+#include <net/bonding.h>
 #include "en.h"
 #include "en_rep.h"
 #include "en/rep/tc.h"
@@ -3759,6 +3760,28 @@ static int parse_tc_vlan_action(struct mlx5e_priv *priv,
 	return 0;
 }
 
+static struct net_device *get_fdb_out_dev(struct net_device *uplink_dev,
+					  struct net_device *out_dev)
+{
+	struct net_device *fdb_out_dev = out_dev;
+	struct net_device *uplink_upper;
+
+	rcu_read_lock();
+	uplink_upper = netdev_master_upper_dev_get_rcu(uplink_dev);
+	if (uplink_upper && netif_is_lag_master(uplink_upper) &&
+	    uplink_upper == out_dev) {
+		fdb_out_dev = uplink_dev;
+	} else if (netif_is_lag_master(out_dev)) {
+		fdb_out_dev = bond_option_active_slave_get_rcu(netdev_priv(out_dev));
+		if (fdb_out_dev &&
+		    (!mlx5e_eswitch_rep(fdb_out_dev) ||
+		     !netdev_port_same_parent_id(fdb_out_dev, uplink_dev)))
+			fdb_out_dev = NULL;
+	}
+	rcu_read_unlock();
+	return fdb_out_dev;
+}
+
 static int add_vlan_push_action(struct mlx5e_priv *priv,
 				struct mlx5_esw_flow_attr *attr,
 				struct net_device **out_dev,
@@ -4074,7 +4097,6 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
 			} else if (netdev_port_same_parent_id(priv->netdev, out_dev)) {
 				struct mlx5_eswitch *esw = priv->mdev->priv.eswitch;
 				struct net_device *uplink_dev = mlx5_eswitch_uplink_get_proto_dev(esw, REP_ETH);
-				struct net_device *uplink_upper;
 
 				if (is_duplicated_output_device(priv->netdev,
 								out_dev,
@@ -4086,14 +4108,9 @@ static int parse_tc_fdb_actions(struct mlx5e_priv *priv,
 				ifindexes[if_count] = out_dev->ifindex;
 				if_count++;
 
-				rcu_read_lock();
-				uplink_upper =
-					netdev_master_upper_dev_get_rcu(uplink_dev);
-				if (uplink_upper &&
-				    netif_is_lag_master(uplink_upper) &&
-				    uplink_upper == out_dev)
-					out_dev = uplink_dev;
-				rcu_read_unlock();
+				out_dev = get_fdb_out_dev(uplink_dev, out_dev);
+				if (!out_dev)
+					return -ENODEV;
 
 				if (is_vlan_dev(out_dev)) {
 					err = add_vlan_push_action(priv, attr,
-- 
cgit v1.2.3-59-g8ed1b


From d97555e1452943264295cd3c1f066474bc3660dd Mon Sep 17 00:00:00 2001
From: Vu Pham <vuhuong@mellanox.com>
Date: Fri, 28 Feb 2020 14:28:27 -0800
Subject: net/mlx5e: Add bond_metadata and its slave entries

Adding bond_metadata and its slave entries to represent a lag device
and its slaves VF representors. Bond_metadata structure includes a
unique metadata shared by slaves VF respresentors, and a list of slaves
representors slave entries.

On enslaving event, create a bond_metadata structure representing
the upper lag device of this slave representor if it has not been
created yet. Create and add entry for the slave representor to the
slaves list.

On unslaving event, free the slave entry of the slave representor.
On the last unslave event, free the bond_metadata structure and its
resources.

Introduce APIs to create and remove bond_metadata and its resources,
enslave and unslave VF representor slave entries.

Signed-off-by: Vu Pham <vuhuong@mellanox.com>
Reviewed-by: Parav Pandit <parav@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/en/rep/bond.c  | 128 +++++++++++++++++++++
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.h   |   5 +
 2 files changed, 133 insertions(+)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
index d0aab36f1947..932e94362ceb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
@@ -1,6 +1,8 @@
 // SPDX-License-Identifier: GPL-2.0 OR Linux-OpenIB
 /* Copyright (c) 2020 Mellanox Technologies Inc. All rights reserved. */
 
+#include <linux/netdevice.h>
+#include <linux/list.h>
 #include <net/lag.h>
 
 #include "mlx5_core.h"
@@ -11,8 +13,132 @@
 struct mlx5e_rep_bond {
 	struct notifier_block nb;
 	struct netdev_net_notifier nn;
+	struct list_head metadata_list;
 };
 
+struct mlx5e_rep_bond_slave_entry {
+	struct list_head list;
+	struct net_device *netdev;
+};
+
+struct mlx5e_rep_bond_metadata {
+	struct list_head list; /* link to global list of rep_bond_metadata */
+	struct mlx5_eswitch *esw;
+	 /* private of uplink holding rep bond metadata list */
+	struct net_device *lag_dev;
+	u32 metadata_reg_c_0;
+
+	struct list_head slaves_list; /* slaves list */
+	int slaves;
+};
+
+static struct mlx5e_rep_bond_metadata *
+mlx5e_lookup_rep_bond_metadata(struct mlx5_rep_uplink_priv *uplink_priv,
+			       const struct net_device *lag_dev)
+{
+	struct mlx5e_rep_bond_metadata *found = NULL;
+	struct mlx5e_rep_bond_metadata *cur;
+
+	list_for_each_entry(cur, &uplink_priv->bond->metadata_list, list) {
+		if (cur->lag_dev == lag_dev) {
+			found = cur;
+			break;
+		}
+	}
+
+	return found;
+}
+
+static struct mlx5e_rep_bond_slave_entry *
+mlx5e_lookup_rep_bond_slave_entry(struct mlx5e_rep_bond_metadata *mdata,
+				  const struct net_device *netdev)
+{
+	struct mlx5e_rep_bond_slave_entry *found = NULL;
+	struct mlx5e_rep_bond_slave_entry *cur;
+
+	list_for_each_entry(cur, &mdata->slaves_list, list) {
+		if (cur->netdev == netdev) {
+			found = cur;
+			break;
+		}
+	}
+
+	return found;
+}
+
+static void mlx5e_rep_bond_metadata_release(struct mlx5e_rep_bond_metadata *mdata)
+{
+	netdev_dbg(mdata->lag_dev, "destroy rep_bond_metadata(%d)\n",
+		   mdata->metadata_reg_c_0);
+	list_del(&mdata->list);
+	WARN_ON(!list_empty(&mdata->slaves_list));
+	kfree(mdata);
+}
+
+/* This must be called under rtnl_lock */
+int mlx5e_rep_bond_enslave(struct mlx5_eswitch *esw, struct net_device *netdev,
+			   struct net_device *lag_dev)
+{
+	struct mlx5e_rep_bond_slave_entry *s_entry;
+	struct mlx5e_rep_bond_metadata *mdata;
+	struct mlx5e_rep_priv *rpriv;
+
+	ASSERT_RTNL();
+
+	rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+	mdata = mlx5e_lookup_rep_bond_metadata(&rpriv->uplink_priv, lag_dev);
+	if (!mdata) {
+		/* First netdev becomes slave, no metadata presents the lag_dev. Create one */
+		mdata = kzalloc(sizeof(*mdata), GFP_KERNEL);
+		if (!mdata)
+			return -ENOMEM;
+
+		mdata->lag_dev = lag_dev;
+		mdata->esw = esw;
+		INIT_LIST_HEAD(&mdata->slaves_list);
+		list_add(&mdata->list, &rpriv->uplink_priv.bond->metadata_list);
+
+		netdev_dbg(lag_dev, "create rep_bond_metadata(%d)\n",
+			   mdata->metadata_reg_c_0);
+	}
+
+	s_entry = kzalloc(sizeof(*s_entry), GFP_KERNEL);
+	if (!s_entry)
+		return -ENOMEM;
+
+	s_entry->netdev = netdev;
+	mdata->slaves++;
+	list_add_tail(&s_entry->list, &mdata->slaves_list);
+
+	return 0;
+}
+
+/* This must be called under rtnl_lock */
+void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw,
+			    const struct net_device *netdev,
+			    const struct net_device *lag_dev)
+{
+	struct mlx5e_rep_bond_slave_entry *s_entry;
+	struct mlx5e_rep_bond_metadata *mdata;
+	struct mlx5e_rep_priv *rpriv;
+
+	ASSERT_RTNL();
+
+	rpriv = mlx5_eswitch_get_uplink_priv(esw, REP_ETH);
+	mdata = mlx5e_lookup_rep_bond_metadata(&rpriv->uplink_priv, lag_dev);
+	if (!mdata)
+		return;
+
+	s_entry = mlx5e_lookup_rep_bond_slave_entry(mdata, netdev);
+	if (!s_entry)
+		return;
+
+	list_del(&s_entry->list);
+	if (--mdata->slaves == 0)
+		mlx5e_rep_bond_metadata_release(mdata);
+	kfree(s_entry);
+}
+
 static bool mlx5e_rep_is_lag_netdev(struct net_device *netdev)
 {
 	struct mlx5e_priv *priv = netdev_priv(netdev);
@@ -133,6 +259,7 @@ int mlx5e_rep_bond_init(struct mlx5e_rep_priv *rpriv)
 		goto out;
 	}
 
+	INIT_LIST_HEAD(&uplink_priv->bond->metadata_list);
 	uplink_priv->bond->nb.notifier_call = mlx5e_rep_esw_bond_netevent;
 	ret = register_netdevice_notifier_dev_net(netdev,
 						  &uplink_priv->bond->nb,
@@ -142,6 +269,7 @@ int mlx5e_rep_bond_init(struct mlx5e_rep_priv *rpriv)
 		kvfree(uplink_priv->bond);
 		uplink_priv->bond = NULL;
 	}
+
 out:
 	return ret;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
index 7e56787aa224..ed741b6e6af2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -217,6 +217,11 @@ void mlx5e_rep_register_vport_reps(struct mlx5_core_dev *mdev);
 void mlx5e_rep_unregister_vport_reps(struct mlx5_core_dev *mdev);
 int mlx5e_rep_bond_init(struct mlx5e_rep_priv *rpriv);
 void mlx5e_rep_bond_cleanup(struct mlx5e_rep_priv *rpriv);
+int mlx5e_rep_bond_enslave(struct mlx5_eswitch *esw, struct net_device *netdev,
+			   struct net_device *lag_dev);
+void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw,
+			    const struct net_device *netdev,
+			    const struct net_device *lag_dev);
 
 bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv);
 int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv);
-- 
cgit v1.2.3-59-g8ed1b


From 133dcfc577eaec6538db4ebd8b9205b361f59018 Mon Sep 17 00:00:00 2001
From: Vu Pham <vuhuong@mellanox.com>
Date: Fri, 28 Feb 2020 16:10:34 -0800
Subject: net/mlx5: E-Switch, Alloc and free unique metadata for match

Introduce infrastructure to create unique metadata for match
for vport without depending on vport_num. Vport uses its
default metadata for match in standalone configuration but
will share a different unique "bond_metadata" for match with
other vports in bond configuration.

Using ida to generate unique metadata for match for vports
in default and bond configurations.

Introduce APIs to generate, free metadata for match.
Introduce APIs to set vport's bond_metadata and replace its
ingress acl rules with bond_metatada.

Signed-off-by: Vu Pham <vuhuong@mellanox.com>
Reviewed-by: Parav Pandit <parav@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../mellanox/mlx5/core/esw/acl/ingress_ofld.c      | 29 +++++++
 .../net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h |  2 +
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.c  |  2 +
 drivers/net/ethernet/mellanox/mlx5/core/eswitch.h  |  6 ++
 .../ethernet/mellanox/mlx5/core/eswitch_offloads.c | 96 ++++++++++++++--------
 5 files changed, 103 insertions(+), 32 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c
index 1bae549f3fa7..4e55d7225a26 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ingress_ofld.c
@@ -291,3 +291,32 @@ void esw_acl_ingress_ofld_cleanup(struct mlx5_eswitch *esw,
 	esw_acl_ingress_ofld_groups_destroy(vport);
 	esw_acl_ingress_table_destroy(vport);
 }
+
+/* Caller must hold rtnl_lock */
+int mlx5_esw_acl_ingress_vport_bond_update(struct mlx5_eswitch *esw, u16 vport_num,
+					   u32 metadata)
+{
+	struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
+	int err;
+
+	if (WARN_ON_ONCE(IS_ERR(vport))) {
+		esw_warn(esw->dev, "vport(%d) invalid!\n", vport_num);
+		err = PTR_ERR(vport);
+		goto out;
+	}
+
+	esw_acl_ingress_ofld_rules_destroy(esw, vport);
+
+	vport->metadata = metadata ? metadata : vport->default_metadata;
+
+	/* Recreate ingress acl rules with vport->metadata */
+	err = esw_acl_ingress_ofld_rules_create(esw, vport);
+	if (err)
+		goto out;
+
+	return 0;
+
+out:
+	vport->metadata = vport->default_metadata;
+	return err;
+}
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h
index 90ddc5d7da46..c57869b93d60 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/esw/acl/ofld.h
@@ -23,5 +23,7 @@ static inline bool mlx5_esw_acl_egress_fwd2vport_supported(struct mlx5_eswitch *
 /* Eswitch acl ingress external APIs */
 int esw_acl_ingress_ofld_setup(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
 void esw_acl_ingress_ofld_cleanup(struct mlx5_eswitch *esw, struct mlx5_vport *vport);
+int mlx5_esw_acl_ingress_vport_bond_update(struct mlx5_eswitch *esw, u16 vport_num,
+					   u32 metadata);
 
 #endif /* __MLX5_ESWITCH_ACL_OFLD_H__ */
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
index 20ab13ff2303..1116ab9bea6c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.c
@@ -1730,6 +1730,7 @@ int mlx5_eswitch_init(struct mlx5_core_dev *dev)
 	mutex_init(&esw->offloads.decap_tbl_lock);
 	hash_init(esw->offloads.decap_tbl);
 	atomic64_set(&esw->offloads.num_flows, 0);
+	ida_init(&esw->offloads.vport_metadata_ida);
 	mutex_init(&esw->state_lock);
 	mutex_init(&esw->mode_lock);
 
@@ -1768,6 +1769,7 @@ void mlx5_eswitch_cleanup(struct mlx5_eswitch *esw)
 	esw_offloads_cleanup_reps(esw);
 	mutex_destroy(&esw->mode_lock);
 	mutex_destroy(&esw->state_lock);
+	ida_destroy(&esw->offloads.vport_metadata_ida);
 	mutex_destroy(&esw->offloads.mod_hdr.lock);
 	mutex_destroy(&esw->offloads.encap_tbl_lock);
 	mutex_destroy(&esw->offloads.decap_tbl_lock);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
index 7b6b3686b666..a5175e98c0b3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch.h
@@ -149,6 +149,8 @@ struct mlx5_vport {
 
 	struct vport_ingress    ingress;
 	struct vport_egress     egress;
+	u32                     default_metadata;
+	u32                     metadata;
 
 	struct mlx5_vport_info  info;
 
@@ -224,6 +226,7 @@ struct mlx5_esw_offload {
 	u8 inline_mode;
 	atomic64_t num_flows;
 	enum devlink_eswitch_encap_mode encap;
+	struct ida vport_metadata_ida;
 };
 
 /* E-Switch MC FDB table hash node */
@@ -292,6 +295,9 @@ int esw_offloads_enable(struct mlx5_eswitch *esw);
 void esw_offloads_cleanup_reps(struct mlx5_eswitch *esw);
 int esw_offloads_init_reps(struct mlx5_eswitch *esw);
 
+u32 mlx5_esw_match_metadata_alloc(struct mlx5_eswitch *esw);
+void mlx5_esw_match_metadata_free(struct mlx5_eswitch *esw, u32 metadata);
+
 int mlx5_esw_modify_vport_rate(struct mlx5_eswitch *esw, u16 vport_num,
 			       u32 rate_mbps);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
index 11bc9cc1d5f0..060354bb211a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/eswitch_offloads.c
@@ -31,6 +31,7 @@
  */
 
 #include <linux/etherdevice.h>
+#include <linux/idr.h>
 #include <linux/mlx5/driver.h>
 #include <linux/mlx5/mlx5_ifc.h>
 #include <linux/mlx5/vport.h>
@@ -1877,15 +1878,69 @@ static bool esw_use_vport_metadata(const struct mlx5_eswitch *esw)
 	       esw_check_vport_match_metadata_supported(esw);
 }
 
+u32 mlx5_esw_match_metadata_alloc(struct mlx5_eswitch *esw)
+{
+	u32 num_vports = GENMASK(ESW_VPORT_BITS - 1, 0) - 1;
+	u32 vhca_id_mask = GENMASK(ESW_VHCA_ID_BITS - 1, 0);
+	u32 vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id);
+	u32 start;
+	u32 end;
+	int id;
+
+	/* Make sure the vhca_id fits the ESW_VHCA_ID_BITS */
+	WARN_ON_ONCE(vhca_id >= BIT(ESW_VHCA_ID_BITS));
+
+	/* Trim vhca_id to ESW_VHCA_ID_BITS */
+	vhca_id &= vhca_id_mask;
+
+	start = (vhca_id << ESW_VPORT_BITS);
+	end = start + num_vports;
+	if (!vhca_id)
+		start += 1; /* zero is reserved/invalid metadata */
+	id = ida_alloc_range(&esw->offloads.vport_metadata_ida, start, end, GFP_KERNEL);
+
+	return (id < 0) ? 0 : id;
+}
+
+void mlx5_esw_match_metadata_free(struct mlx5_eswitch *esw, u32 metadata)
+{
+	ida_free(&esw->offloads.vport_metadata_ida, metadata);
+}
+
+static int esw_offloads_vport_metadata_setup(struct mlx5_eswitch *esw,
+					     struct mlx5_vport *vport)
+{
+	if (vport->vport == MLX5_VPORT_UPLINK)
+		return 0;
+
+	vport->default_metadata = mlx5_esw_match_metadata_alloc(esw);
+	vport->metadata = vport->default_metadata;
+	return vport->metadata ? 0 : -ENOSPC;
+}
+
+static void esw_offloads_vport_metadata_cleanup(struct mlx5_eswitch *esw,
+						struct mlx5_vport *vport)
+{
+	if (vport->vport == MLX5_VPORT_UPLINK || !vport->default_metadata)
+		return;
+
+	WARN_ON(vport->metadata != vport->default_metadata);
+	mlx5_esw_match_metadata_free(esw, vport->default_metadata);
+}
+
 int
 esw_vport_create_offloads_acl_tables(struct mlx5_eswitch *esw,
 				     struct mlx5_vport *vport)
 {
 	int err;
 
+	err = esw_offloads_vport_metadata_setup(esw, vport);
+	if (err)
+		goto metadata_err;
+
 	err = esw_acl_ingress_ofld_setup(esw, vport);
 	if (err)
-		return err;
+		goto ingress_err;
 
 	if (mlx5_eswitch_is_vf_vport(esw, vport->vport)) {
 		err = esw_acl_egress_ofld_setup(esw, vport);
@@ -1897,6 +1952,9 @@ esw_vport_create_offloads_acl_tables(struct mlx5_eswitch *esw,
 
 egress_err:
 	esw_acl_ingress_ofld_cleanup(esw, vport);
+ingress_err:
+	esw_offloads_vport_metadata_cleanup(esw, vport);
+metadata_err:
 	return err;
 }
 
@@ -1906,6 +1964,7 @@ esw_vport_destroy_offloads_acl_tables(struct mlx5_eswitch *esw,
 {
 	esw_acl_egress_ofld_cleanup(vport);
 	esw_acl_ingress_ofld_cleanup(esw, vport);
+	esw_offloads_vport_metadata_cleanup(esw, vport);
 }
 
 static int esw_create_uplink_offloads_acl_tables(struct mlx5_eswitch *esw)
@@ -2571,38 +2630,11 @@ EXPORT_SYMBOL(mlx5_eswitch_vport_match_metadata_enabled);
 u32 mlx5_eswitch_get_vport_metadata_for_match(struct mlx5_eswitch *esw,
 					      u16 vport_num)
 {
-	u32 vport_num_mask = GENMASK(ESW_VPORT_BITS - 1, 0);
-	u32 vhca_id_mask = GENMASK(ESW_VHCA_ID_BITS - 1, 0);
-	u32 vhca_id = MLX5_CAP_GEN(esw->dev, vhca_id);
-	u32 val;
+	struct mlx5_vport *vport = mlx5_eswitch_get_vport(esw, vport_num);
 
-	/* Make sure the vhca_id fits the ESW_VHCA_ID_BITS */
-	WARN_ON_ONCE(vhca_id >= BIT(ESW_VHCA_ID_BITS));
-
-	/* Trim vhca_id to ESW_VHCA_ID_BITS */
-	vhca_id &= vhca_id_mask;
-
-	/* Make sure pf and ecpf map to end of ESW_VPORT_BITS range so they
-	 * don't overlap with VF numbers, and themselves, after trimming.
-	 */
-	WARN_ON_ONCE((MLX5_VPORT_UPLINK & vport_num_mask) <
-		     vport_num_mask - 1);
-	WARN_ON_ONCE((MLX5_VPORT_ECPF & vport_num_mask) <
-		     vport_num_mask - 1);
-	WARN_ON_ONCE((MLX5_VPORT_UPLINK & vport_num_mask) ==
-		     (MLX5_VPORT_ECPF & vport_num_mask));
-
-	/* Make sure that the VF vport_num fits ESW_VPORT_BITS and don't
-	 * overlap with pf and ecpf.
-	 */
-	if (vport_num != MLX5_VPORT_UPLINK &&
-	    vport_num != MLX5_VPORT_ECPF)
-		WARN_ON_ONCE(vport_num >= vport_num_mask - 1);
-
-	/* We can now trim vport_num to ESW_VPORT_BITS */
-	vport_num &= vport_num_mask;
+	if (WARN_ON_ONCE(IS_ERR(vport)))
+		return 0;
 
-	val = (vhca_id << ESW_VPORT_BITS) | vport_num;
-	return val << (32 - ESW_SOURCE_PORT_METADATA_BITS);
+	return vport->metadata << (32 - ESW_SOURCE_PORT_METADATA_BITS);
 }
 EXPORT_SYMBOL(mlx5_eswitch_get_vport_metadata_for_match);
-- 
cgit v1.2.3-59-g8ed1b


From 88e96e533cfa11e996c59a44bbb6b0e0b9891970 Mon Sep 17 00:00:00 2001
From: Vu Pham <vuhuong@mellanox.com>
Date: Mon, 2 Mar 2020 10:33:49 -0800
Subject: net/mlx5e: Slave representors sharing unique metadata for match

Bonded slave representors' vports must share a unique metadata
for match.

On enslaving event of slave representor to lag device, allocate
new unique "bond_metadata" for match if this is the first slave.
The subsequent enslaved representors will share the same unique
"bond_metadata".

On unslaving event of slave representor, reset the slave
representor's vport to use its own default metadata.

Replace ingress acl and rx rules of the slave representors' vports
using new vport->bond_metadata.

Signed-off-by: Vu Pham <vuhuong@mellanox.com>
Reviewed-by: Parav Pandit <parav@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/en/rep/bond.c  | 65 ++++++++++++++++++++--
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c   | 22 +++++++-
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.h   |  1 +
 3 files changed, 80 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
index 932e94362ceb..13500f60bef6 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
@@ -71,6 +71,7 @@ static void mlx5e_rep_bond_metadata_release(struct mlx5e_rep_bond_metadata *mdat
 	netdev_dbg(mdata->lag_dev, "destroy rep_bond_metadata(%d)\n",
 		   mdata->metadata_reg_c_0);
 	list_del(&mdata->list);
+	mlx5_esw_match_metadata_free(mdata->esw, mdata->metadata_reg_c_0);
 	WARN_ON(!list_empty(&mdata->slaves_list));
 	kfree(mdata);
 }
@@ -82,6 +83,8 @@ int mlx5e_rep_bond_enslave(struct mlx5_eswitch *esw, struct net_device *netdev,
 	struct mlx5e_rep_bond_slave_entry *s_entry;
 	struct mlx5e_rep_bond_metadata *mdata;
 	struct mlx5e_rep_priv *rpriv;
+	struct mlx5e_priv *priv;
+	int err;
 
 	ASSERT_RTNL();
 
@@ -96,6 +99,11 @@ int mlx5e_rep_bond_enslave(struct mlx5_eswitch *esw, struct net_device *netdev,
 		mdata->lag_dev = lag_dev;
 		mdata->esw = esw;
 		INIT_LIST_HEAD(&mdata->slaves_list);
+		mdata->metadata_reg_c_0 = mlx5_esw_match_metadata_alloc(esw);
+		if (!mdata->metadata_reg_c_0) {
+			kfree(mdata);
+			return -ENOSPC;
+		}
 		list_add(&mdata->list, &rpriv->uplink_priv.bond->metadata_list);
 
 		netdev_dbg(lag_dev, "create rep_bond_metadata(%d)\n",
@@ -103,14 +111,33 @@ int mlx5e_rep_bond_enslave(struct mlx5_eswitch *esw, struct net_device *netdev,
 	}
 
 	s_entry = kzalloc(sizeof(*s_entry), GFP_KERNEL);
-	if (!s_entry)
-		return -ENOMEM;
+	if (!s_entry) {
+		err = -ENOMEM;
+		goto entry_alloc_err;
+	}
 
 	s_entry->netdev = netdev;
+	priv = netdev_priv(netdev);
+	rpriv = priv->ppriv;
+
+	err = mlx5_esw_acl_ingress_vport_bond_update(esw, rpriv->rep->vport,
+						     mdata->metadata_reg_c_0);
+	if (err)
+		goto ingress_err;
+
 	mdata->slaves++;
 	list_add_tail(&s_entry->list, &mdata->slaves_list);
+	netdev_dbg(netdev, "enslave rep vport(%d) lag_dev(%s) metadata(0x%x)\n",
+		   rpriv->rep->vport, lag_dev->name, mdata->metadata_reg_c_0);
 
 	return 0;
+
+ingress_err:
+	kfree(s_entry);
+entry_alloc_err:
+	if (!mdata->slaves)
+		mlx5e_rep_bond_metadata_release(mdata);
+	return err;
 }
 
 /* This must be called under rtnl_lock */
@@ -121,6 +148,7 @@ void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw,
 	struct mlx5e_rep_bond_slave_entry *s_entry;
 	struct mlx5e_rep_bond_metadata *mdata;
 	struct mlx5e_rep_priv *rpriv;
+	struct mlx5e_priv *priv;
 
 	ASSERT_RTNL();
 
@@ -133,7 +161,16 @@ void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw,
 	if (!s_entry)
 		return;
 
+	priv = netdev_priv(netdev);
+	rpriv = priv->ppriv;
+
+	mlx5_esw_acl_ingress_vport_bond_update(esw, rpriv->rep->vport, 0);
+	mlx5e_rep_bond_update(priv, false);
 	list_del(&s_entry->list);
+
+	netdev_dbg(netdev, "unslave rep vport(%d) lag_dev(%s) metadata(0x%x)\n",
+		   rpriv->rep->vport, lag_dev->name, mdata->metadata_reg_c_0);
+
 	if (--mdata->slaves == 0)
 		mlx5e_rep_bond_metadata_release(mdata);
 	kfree(s_entry);
@@ -163,6 +200,7 @@ static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *pt
 	struct net_device *dev;
 	u16 acl_vport_num;
 	u16 fwd_vport_num;
+	int err;
 
 	if (!mlx5e_rep_is_lag_netdev(netdev))
 		return;
@@ -187,11 +225,28 @@ static void mlx5e_rep_changelowerstate_event(struct net_device *netdev, void *pt
 		rpriv = priv->ppriv;
 		acl_vport_num = rpriv->rep->vport;
 		if (acl_vport_num != fwd_vport_num) {
-			mlx5_esw_acl_egress_vport_bond(priv->mdev->priv.eswitch,
-						       fwd_vport_num,
-						       acl_vport_num);
+			/* Only single rx_rule for unique bond_metadata should be
+			 * present, delete it if it's saved as passive vport's
+			 * rx_rule with destination as passive vport's root_ft
+			 */
+			mlx5e_rep_bond_update(priv, true);
+			err = mlx5_esw_acl_egress_vport_bond(priv->mdev->priv.eswitch,
+							     fwd_vport_num,
+							     acl_vport_num);
+			if (err)
+				netdev_warn(dev,
+					    "configure slave vport(%d) egress fwd, err(%d)",
+					    acl_vport_num, err);
 		}
 	}
+
+	/* Insert new rx_rule for unique bond_metadata, save it as active vport's
+	 * rx_rule with new destination as active vport's root_ft
+	 */
+	err = mlx5e_rep_bond_update(netdev_priv(netdev), false);
+	if (err)
+		netdev_warn(netdev, "configure active slave vport(%d) rx_rule, err(%d)",
+			    fwd_vport_num, err);
 }
 
 static void mlx5e_rep_changeupper_event(struct net_device *netdev, void *ptr)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index 12593d75e885..af89a4803c7d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -854,6 +854,24 @@ static int mlx5e_create_rep_vport_rx_rule(struct mlx5e_priv *priv)
 	return 0;
 }
 
+static void rep_vport_rx_rule_destroy(struct mlx5e_priv *priv)
+{
+	struct mlx5e_rep_priv *rpriv = priv->ppriv;
+
+	if (!rpriv->vport_rx_rule)
+		return;
+
+	mlx5_del_flow_rules(rpriv->vport_rx_rule);
+	rpriv->vport_rx_rule = NULL;
+}
+
+int mlx5e_rep_bond_update(struct mlx5e_priv *priv, bool cleanup)
+{
+	rep_vport_rx_rule_destroy(priv);
+
+	return cleanup ? 0 : mlx5e_create_rep_vport_rx_rule(priv);
+}
+
 static int mlx5e_init_rep_rx(struct mlx5e_priv *priv)
 {
 	struct mlx5_core_dev *mdev = priv->mdev;
@@ -918,9 +936,7 @@ err_close_drop_rq:
 
 static void mlx5e_cleanup_rep_rx(struct mlx5e_priv *priv)
 {
-	struct mlx5e_rep_priv *rpriv = priv->ppriv;
-
-	mlx5_del_flow_rules(rpriv->vport_rx_rule);
+	rep_vport_rx_rule_destroy(priv);
 	mlx5e_destroy_rep_root_ft(priv);
 	mlx5e_destroy_ttc_table(priv, &priv->fs.ttc);
 	mlx5e_destroy_direct_tirs(priv, priv->direct_tir);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
index ed741b6e6af2..da9f1686d525 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -222,6 +222,7 @@ int mlx5e_rep_bond_enslave(struct mlx5_eswitch *esw, struct net_device *netdev,
 void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw,
 			    const struct net_device *netdev,
 			    const struct net_device *lag_dev);
+int mlx5e_rep_bond_update(struct mlx5e_priv *priv, bool cleanup);
 
 bool mlx5e_is_uplink_rep(struct mlx5e_priv *priv);
 int mlx5e_add_sqs_fwd_rules(struct mlx5e_priv *priv);
-- 
cgit v1.2.3-59-g8ed1b


From 9728366f53d283be943ee99d5989f155e55fc077 Mon Sep 17 00:00:00 2001
From: Vu Pham <vuhuong@mellanox.com>
Date: Thu, 12 Mar 2020 10:26:25 -0700
Subject: net/mlx5e: Use change upper event to setup representors'
 bond_metadata

Use change upper event to detect slave representor from
enslaving/unslaving to/from lag device.

On enslaving event, call mlx5_enslave_rep() API to create, add
this slave representor shadow entry to the slaves list of
bond_metadata structure representing master lag device and use
its metadata to setup ingress acl metadata header.

On unslaving event, resetting the vport of unslaved representor
to use its default ingress/egress acls and rx rules with its
default_metadata.

The last slave will free the shared bond_metadata and its
unique metadata.

Signed-off-by: Vu Pham <vuhuong@mellanox.com>
Reviewed-by: Parav Pandit <parav@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../net/ethernet/mellanox/mlx5/core/en/rep/bond.c  | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
index 13500f60bef6..bdb71332cbf2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/bond.c
@@ -164,8 +164,13 @@ void mlx5e_rep_bond_unslave(struct mlx5_eswitch *esw,
 	priv = netdev_priv(netdev);
 	rpriv = priv->ppriv;
 
+	/* Reset bond_metadata to zero first then reset all ingress/egress
+	 * acls and rx rules of unslave representor's vport
+	 */
 	mlx5_esw_acl_ingress_vport_bond_update(esw, rpriv->rep->vport, 0);
+	mlx5_esw_acl_egress_vport_unbond(esw, rpriv->rep->vport);
 	mlx5e_rep_bond_update(priv, false);
+
 	list_del(&s_entry->list);
 
 	netdev_dbg(netdev, "unslave rep vport(%d) lag_dev(%s) metadata(0x%x)\n",
@@ -253,22 +258,23 @@ static void mlx5e_rep_changeupper_event(struct net_device *netdev, void *ptr)
 {
 	struct netdev_notifier_changeupper_info *info = ptr;
 	struct mlx5e_rep_priv *rpriv;
+	struct net_device *lag_dev;
 	struct mlx5e_priv *priv;
 
 	if (!mlx5e_rep_is_lag_netdev(netdev))
 		return;
 
-	/* Nothing to setup for new enslaved representor */
-	if (info->linking)
-		return;
-
 	priv = netdev_priv(netdev);
 	rpriv = priv->ppriv;
-	netdev_dbg(netdev, "Unslave, reset vport(%d) egress acl\n", rpriv->rep->vport);
+	lag_dev = info->upper_dev;
 
-	/* Reset all egress acl rules of unslave representor's vport */
-	mlx5_esw_acl_egress_vport_unbond(priv->mdev->priv.eswitch,
-					 rpriv->rep->vport);
+	netdev_dbg(netdev, "%sslave vport(%d) lag(%s)\n",
+		   info->linking ? "en" : "un", rpriv->rep->vport, lag_dev->name);
+
+	if (info->linking)
+		mlx5e_rep_bond_enslave(priv->mdev->priv.eswitch, netdev, lag_dev);
+	else
+		mlx5e_rep_bond_unslave(priv->mdev->priv.eswitch, netdev, lag_dev);
 }
 
 /* Bond device of representors and netdev events are used here in specific way
-- 
cgit v1.2.3-59-g8ed1b


From 810cbb25549b81f0c0848320f8a1614106d3a0e1 Mon Sep 17 00:00:00 2001
From: Parav Pandit <parav@mellanox.com>
Date: Thu, 14 May 2020 23:42:45 -0500
Subject: net/mlx5: Add missing mutex destroy

Add mutex destroy calls to balance with mutex_init() done in the init
path.

Signed-off-by: Parav Pandit <parav@mellanox.com>
Reviewed-by: Moshe Shemesh <moshe@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/main.c | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/main.c b/drivers/net/ethernet/mellanox/mlx5/core/main.c
index 92f2395dd31a..30de3bf35c6d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/main.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/main.c
@@ -1272,7 +1272,7 @@ static int mlx5_mdev_init(struct mlx5_core_dev *dev, int profile_idx)
 					    mlx5_debugfs_root);
 	if (!priv->dbg_root) {
 		dev_err(dev->device, "mlx5_core: error, Cannot create debugfs dir, aborting\n");
-		return -ENOMEM;
+		goto err_dbg_root;
 	}
 
 	err = mlx5_health_init(dev);
@@ -1289,15 +1289,27 @@ err_pagealloc_init:
 	mlx5_health_cleanup(dev);
 err_health_init:
 	debugfs_remove(dev->priv.dbg_root);
-
+err_dbg_root:
+	mutex_destroy(&priv->pgdir_mutex);
+	mutex_destroy(&priv->alloc_mutex);
+	mutex_destroy(&priv->bfregs.wc_head.lock);
+	mutex_destroy(&priv->bfregs.reg_head.lock);
+	mutex_destroy(&dev->intf_state_mutex);
 	return err;
 }
 
 static void mlx5_mdev_uninit(struct mlx5_core_dev *dev)
 {
+	struct mlx5_priv *priv = &dev->priv;
+
 	mlx5_pagealloc_cleanup(dev);
 	mlx5_health_cleanup(dev);
 	debugfs_remove_recursive(dev->priv.dbg_root);
+	mutex_destroy(&priv->pgdir_mutex);
+	mutex_destroy(&priv->alloc_mutex);
+	mutex_destroy(&priv->bfregs.wc_head.lock);
+	mutex_destroy(&priv->bfregs.reg_head.lock);
+	mutex_destroy(&dev->intf_state_mutex);
 }
 
 #define MLX5_IB_MOD "mlx5_ib"
-- 
cgit v1.2.3-59-g8ed1b


From 4a5d5d7392106a48c7db345a3843e854b66ea0ff Mon Sep 17 00:00:00 2001
From: Eli Britstein <elibr@mellanox.com>
Date: Mon, 11 May 2020 19:20:29 +0000
Subject: net/mlx5e: Helper function to set ethertype

Set ethertype match in a helper function as a pre-step towards
optimizing it.

Signed-off-by: Eli Britstein <elibr@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c  |  5 +----
 drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c | 21 +++++++++++++--------
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c     | 14 ++++++++++----
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.h     |  3 +++
 4 files changed, 27 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index 995b2ef1fb3b..ba72410c55fa 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -134,10 +134,7 @@ mlx5_tc_ct_set_tuple_match(struct mlx5_flow_spec *spec,
 
 		flow_rule_match_basic(rule, &match);
 
-		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
-			 ntohs(match.mask->n_proto));
-		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
-			 ntohs(match.key->n_proto));
+		mlx5e_tc_set_ethertype(headers_c, headers_v, &match);
 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
 			 match.mask->ip_proto);
 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
index e99382f58807..6d7fded75264 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
@@ -512,6 +512,13 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev,
 	}
 
 	if (flow_rule_match_key(rule, FLOW_DISSECTOR_KEY_ENC_CONTROL)) {
+		struct flow_dissector_key_basic key_basic = {};
+		struct flow_dissector_key_basic mask_basic = {
+			.n_proto = htons(0xFFFF),
+		};
+		struct flow_match_basic match_basic = {
+			.key = &key_basic, .mask = &mask_basic,
+		};
 		struct flow_match_control match;
 		u16 addr_type;
 
@@ -537,10 +544,9 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev,
 				 dst_ipv4_dst_ipv6.ipv4_layout.ipv4,
 				 ntohl(match.key->dst));
 
-			MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c,
-					 ethertype);
-			MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
-				 ETH_P_IP);
+			key_basic.n_proto = htons(ETH_P_IP);
+			mlx5e_tc_set_ethertype(headers_c, headers_v,
+					       &match_basic);
 		} else if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
 			struct flow_match_ipv6_addrs match;
 
@@ -563,10 +569,9 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev,
 			       &match.key->dst, MLX5_FLD_SZ_BYTES(ipv6_layout,
 								  ipv6));
 
-			MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c,
-					 ethertype);
-			MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
-				 ETH_P_IPV6);
+			key_basic.n_proto = htons(ETH_P_IPV6);
+			mlx5e_tc_set_ethertype(headers_c, headers_v,
+					       &match_basic);
 		}
 	}
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 58f797da4d8d..680b9e090057 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -2020,6 +2020,15 @@ u32 mlx5e_tc_get_flow_tun_id(struct mlx5e_tc_flow *flow)
 	return flow->tunnel_id;
 }
 
+void mlx5e_tc_set_ethertype(void *headers_c, void *headers_v,
+			    struct flow_match_basic *match)
+{
+	MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
+		 ntohs(match->mask->n_proto));
+	MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
+		 ntohs(match->key->n_proto));
+}
+
 static int parse_tunnel_attr(struct mlx5e_priv *priv,
 			     struct mlx5e_tc_flow *flow,
 			     struct mlx5_flow_spec *spec,
@@ -2241,10 +2250,7 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
 		struct flow_match_basic match;
 
 		flow_rule_match_basic(rule, &match);
-		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
-			 ntohs(match.mask->n_proto));
-		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
-			 ntohs(match.key->n_proto));
+		mlx5e_tc_set_ethertype(headers_c, headers_v, &match);
 
 		if (match.mask->n_proto)
 			*match_level = MLX5_MATCH_L2;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
index 037aa73bf9ab..144b71f571ea 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -170,6 +170,9 @@ void dealloc_mod_hdr_actions(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts);
 struct mlx5e_tc_flow;
 u32 mlx5e_tc_get_flow_tun_id(struct mlx5e_tc_flow *flow);
 
+void mlx5e_tc_set_ethertype(void *headers_c, void *headers_v,
+			    struct flow_match_basic *match);
+
 #if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
 
 int mlx5e_tc_nic_init(struct mlx5e_priv *priv);
-- 
cgit v1.2.3-59-g8ed1b


From fca533041aac0426f5b5618a564aeb588fc125e9 Mon Sep 17 00:00:00 2001
From: Eli Britstein <elibr@mellanox.com>
Date: Tue, 19 May 2020 05:55:59 +0000
Subject: net/mlx5e: Optimize performance for IPv4/IPv6 ethertype

The HW is optimized for IPv4/IPv6. For such cases, pending capability,
avoid matching on ethertype, and use ip_version field instead.

Signed-off-by: Eli Britstein <elibr@mellanox.com>
Reviewed-by: Roi Dayan <roid@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../mellanox/mlx5/core/diag/fs_tracepoint.c        | 85 +++++++++++-----------
 drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c |  7 +-
 .../net/ethernet/mellanox/mlx5/core/en/tc_tun.c    |  8 +-
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c    | 40 +++++++---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.h    |  5 +-
 5 files changed, 85 insertions(+), 60 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
index 8ecac81a385d..a700f3c86899 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/diag/fs_tracepoint.c
@@ -76,58 +76,59 @@ static void print_lyr_2_4_hdrs(struct trace_seq *p,
 		.v = MLX5_GET(fte_match_set_lyr_2_4, value, dmac_47_16) << 16 |
 		     MLX5_GET(fte_match_set_lyr_2_4, value, dmac_15_0)};
 	MASK_VAL_L2(u16, ethertype, ethertype);
+	MASK_VAL_L2(u8, ip_version, ip_version);
 
 	PRINT_MASKED_VALP(smac, u8 *, p, "%pM");
 	PRINT_MASKED_VALP(dmac, u8 *, p, "%pM");
 	PRINT_MASKED_VAL(ethertype, p, "%04x");
 
-	if (ethertype.m == 0xffff) {
-		if (ethertype.v == ETH_P_IP) {
+	if ((ethertype.m == 0xffff && ethertype.v == ETH_P_IP) ||
+	    (ip_version.m == 0xf && ip_version.v == 4)) {
 #define MASK_VAL_L2_BE(type, name, fld) \
 	MASK_VAL_BE(type, fte_match_set_lyr_2_4, name, mask, value, fld)
-			MASK_VAL_L2_BE(u32, src_ipv4,
-				       src_ipv4_src_ipv6.ipv4_layout.ipv4);
-			MASK_VAL_L2_BE(u32, dst_ipv4,
-				       dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
+		MASK_VAL_L2_BE(u32, src_ipv4,
+			       src_ipv4_src_ipv6.ipv4_layout.ipv4);
+		MASK_VAL_L2_BE(u32, dst_ipv4,
+			       dst_ipv4_dst_ipv6.ipv4_layout.ipv4);
 
-			PRINT_MASKED_VALP(src_ipv4, typeof(&src_ipv4.v), p,
-					  "%pI4");
-			PRINT_MASKED_VALP(dst_ipv4, typeof(&dst_ipv4.v), p,
-					  "%pI4");
-		} else if (ethertype.v == ETH_P_IPV6) {
-			static const struct in6_addr full_ones = {
-				.in6_u.u6_addr32 = {__constant_htonl(0xffffffff),
-						    __constant_htonl(0xffffffff),
-						    __constant_htonl(0xffffffff),
-						    __constant_htonl(0xffffffff)},
-			};
-			DECLARE_MASK_VAL(struct in6_addr, src_ipv6);
-			DECLARE_MASK_VAL(struct in6_addr, dst_ipv6);
+		PRINT_MASKED_VALP(src_ipv4, typeof(&src_ipv4.v), p,
+				  "%pI4");
+		PRINT_MASKED_VALP(dst_ipv4, typeof(&dst_ipv4.v), p,
+				  "%pI4");
+	} else if ((ethertype.m == 0xffff && ethertype.v == ETH_P_IPV6) ||
+		   (ip_version.m == 0xf && ip_version.v == 6)) {
+		static const struct in6_addr full_ones = {
+			.in6_u.u6_addr32 = {__constant_htonl(0xffffffff),
+					    __constant_htonl(0xffffffff),
+					    __constant_htonl(0xffffffff),
+					    __constant_htonl(0xffffffff)},
+		};
+		DECLARE_MASK_VAL(struct in6_addr, src_ipv6);
+		DECLARE_MASK_VAL(struct in6_addr, dst_ipv6);
 
-			memcpy(src_ipv6.m.in6_u.u6_addr8,
-			       MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask,
-					    src_ipv4_src_ipv6.ipv6_layout.ipv6),
-			       sizeof(src_ipv6.m));
-			memcpy(dst_ipv6.m.in6_u.u6_addr8,
-			       MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask,
-					    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
-			       sizeof(dst_ipv6.m));
-			memcpy(src_ipv6.v.in6_u.u6_addr8,
-			       MLX5_ADDR_OF(fte_match_set_lyr_2_4, value,
-					    src_ipv4_src_ipv6.ipv6_layout.ipv6),
-			       sizeof(src_ipv6.v));
-			memcpy(dst_ipv6.v.in6_u.u6_addr8,
-			       MLX5_ADDR_OF(fte_match_set_lyr_2_4, value,
-					    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
-			       sizeof(dst_ipv6.v));
+		memcpy(src_ipv6.m.in6_u.u6_addr8,
+		       MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask,
+				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
+		       sizeof(src_ipv6.m));
+		memcpy(dst_ipv6.m.in6_u.u6_addr8,
+		       MLX5_ADDR_OF(fte_match_set_lyr_2_4, mask,
+				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+		       sizeof(dst_ipv6.m));
+		memcpy(src_ipv6.v.in6_u.u6_addr8,
+		       MLX5_ADDR_OF(fte_match_set_lyr_2_4, value,
+				    src_ipv4_src_ipv6.ipv6_layout.ipv6),
+		       sizeof(src_ipv6.v));
+		memcpy(dst_ipv6.v.in6_u.u6_addr8,
+		       MLX5_ADDR_OF(fte_match_set_lyr_2_4, value,
+				    dst_ipv4_dst_ipv6.ipv6_layout.ipv6),
+		       sizeof(dst_ipv6.v));
 
-			if (!memcmp(&src_ipv6.m, &full_ones, sizeof(full_ones)))
-				trace_seq_printf(p, "src_ipv6=%pI6 ",
-						 &src_ipv6.v);
-			if (!memcmp(&dst_ipv6.m, &full_ones, sizeof(full_ones)))
-				trace_seq_printf(p, "dst_ipv6=%pI6 ",
-						 &dst_ipv6.v);
-		}
+		if (!memcmp(&src_ipv6.m, &full_ones, sizeof(full_ones)))
+			trace_seq_printf(p, "src_ipv6=%pI6 ",
+					 &src_ipv6.v);
+		if (!memcmp(&dst_ipv6.m, &full_ones, sizeof(full_ones)))
+			trace_seq_printf(p, "dst_ipv6=%pI6 ",
+					 &dst_ipv6.v);
 	}
 
 #define PRINT_MASKED_VAL_L2(type, name, fld, p, format) {\
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
index ba72410c55fa..afc19dca1f5f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_ct.c
@@ -119,7 +119,7 @@ mlx5_tc_ct_get_ct_priv(struct mlx5e_priv *priv)
 }
 
 static int
-mlx5_tc_ct_set_tuple_match(struct mlx5_flow_spec *spec,
+mlx5_tc_ct_set_tuple_match(struct mlx5e_priv *priv, struct mlx5_flow_spec *spec,
 			   struct flow_rule *rule)
 {
 	void *headers_c = MLX5_ADDR_OF(fte_match_param, spec->match_criteria,
@@ -134,7 +134,8 @@ mlx5_tc_ct_set_tuple_match(struct mlx5_flow_spec *spec,
 
 		flow_rule_match_basic(rule, &match);
 
-		mlx5e_tc_set_ethertype(headers_c, headers_v, &match);
+		mlx5e_tc_set_ethertype(priv->mdev, &match, true, headers_c,
+				       headers_v);
 		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ip_protocol,
 			 match.mask->ip_proto);
 		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_protocol,
@@ -530,7 +531,7 @@ mlx5_tc_ct_entry_add_rule(struct mlx5_tc_ct_priv *ct_priv,
 	attr->counter = entry->counter;
 	attr->flags |= MLX5_ESW_ATTR_FLAG_NO_IN_PORT;
 
-	mlx5_tc_ct_set_tuple_match(spec, flow_rule);
+	mlx5_tc_ct_set_tuple_match(netdev_priv(ct_priv->netdev), spec, flow_rule);
 	mlx5e_tc_match_to_reg_match(spec, ZONE_TO_REG,
 				    entry->zone & MLX5_CT_ZONE_MASK,
 				    MLX5_CT_ZONE_MASK);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
index 6d7fded75264..7cce85faa16f 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun.c
@@ -545,8 +545,8 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev,
 				 ntohl(match.key->dst));
 
 			key_basic.n_proto = htons(ETH_P_IP);
-			mlx5e_tc_set_ethertype(headers_c, headers_v,
-					       &match_basic);
+			mlx5e_tc_set_ethertype(priv->mdev, &match_basic, true,
+					       headers_c, headers_v);
 		} else if (addr_type == FLOW_DISSECTOR_KEY_IPV6_ADDRS) {
 			struct flow_match_ipv6_addrs match;
 
@@ -570,8 +570,8 @@ int mlx5e_tc_tun_parse(struct net_device *filter_dev,
 								  ipv6));
 
 			key_basic.n_proto = htons(ETH_P_IPV6);
-			mlx5e_tc_set_ethertype(headers_c, headers_v,
-					       &match_basic);
+			mlx5e_tc_set_ethertype(priv->mdev, &match_basic, true,
+					       headers_c, headers_v);
 		}
 	}
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 680b9e090057..0f119c08b835 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -2020,13 +2020,30 @@ u32 mlx5e_tc_get_flow_tun_id(struct mlx5e_tc_flow *flow)
 	return flow->tunnel_id;
 }
 
-void mlx5e_tc_set_ethertype(void *headers_c, void *headers_v,
-			    struct flow_match_basic *match)
-{
-	MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
-		 ntohs(match->mask->n_proto));
-	MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
-		 ntohs(match->key->n_proto));
+void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev,
+			    struct flow_match_basic *match, bool outer,
+			    void *headers_c, void *headers_v)
+{
+	bool ip_version_cap;
+
+	ip_version_cap = outer ?
+		MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+					  ft_field_support.outer_ip_version) :
+		MLX5_CAP_FLOWTABLE_NIC_RX(mdev,
+					  ft_field_support.inner_ip_version);
+
+	if (ip_version_cap && match->mask->n_proto == htons(0xFFFF) &&
+	    (match->key->n_proto == htons(ETH_P_IP) ||
+	     match->key->n_proto == htons(ETH_P_IPV6))) {
+		MLX5_SET_TO_ONES(fte_match_set_lyr_2_4, headers_c, ip_version);
+		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ip_version,
+			 match->key->n_proto == htons(ETH_P_IP) ? 4 : 6);
+	} else {
+		MLX5_SET(fte_match_set_lyr_2_4, headers_c, ethertype,
+			 ntohs(match->mask->n_proto));
+		MLX5_SET(fte_match_set_lyr_2_4, headers_v, ethertype,
+			 ntohs(match->key->n_proto));
+	}
 }
 
 static int parse_tunnel_attr(struct mlx5e_priv *priv,
@@ -2250,7 +2267,9 @@ static int __parse_cls_flower(struct mlx5e_priv *priv,
 		struct flow_match_basic match;
 
 		flow_rule_match_basic(rule, &match);
-		mlx5e_tc_set_ethertype(headers_c, headers_v, &match);
+		mlx5e_tc_set_ethertype(priv->mdev, &match,
+				       match_level == outer_match_level,
+				       headers_c, headers_v);
 
 		if (match.mask->n_proto)
 			*match_level = MLX5_MATCH_L2;
@@ -3126,16 +3145,19 @@ static bool modify_header_match_supported(struct mlx5_flow_spec *spec,
 {
 	const struct flow_action_entry *act;
 	bool modify_ip_header;
+	void *headers_c;
 	void *headers_v;
 	u16 ethertype;
 	u8 ip_proto;
 	int i, err;
 
+	headers_c = get_match_headers_criteria(actions, spec);
 	headers_v = get_match_headers_value(actions, spec);
 	ethertype = MLX5_GET(fte_match_set_lyr_2_4, headers_v, ethertype);
 
 	/* for non-IP we only re-write MACs, so we're okay */
-	if (ethertype != ETH_P_IP && ethertype != ETH_P_IPV6)
+	if (MLX5_GET(fte_match_set_lyr_2_4, headers_c, ip_version) == 0 &&
+	    ethertype != ETH_P_IP && ethertype != ETH_P_IPV6)
 		goto out_ok;
 
 	modify_ip_header = false;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
index 144b71f571ea..5c330b0cae21 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.h
@@ -170,8 +170,9 @@ void dealloc_mod_hdr_actions(struct mlx5e_tc_mod_hdr_acts *mod_hdr_acts);
 struct mlx5e_tc_flow;
 u32 mlx5e_tc_get_flow_tun_id(struct mlx5e_tc_flow *flow);
 
-void mlx5e_tc_set_ethertype(void *headers_c, void *headers_v,
-			    struct flow_match_basic *match);
+void mlx5e_tc_set_ethertype(struct mlx5_core_dev *mdev,
+			    struct flow_match_basic *match, bool outer,
+			    void *headers_c, void *headers_v);
 
 #if IS_ENABLED(CONFIG_MLX5_CLS_ACT)
 
-- 
cgit v1.2.3-59-g8ed1b


From cedb28191fdfb4fc1da0a7612465624998de7da2 Mon Sep 17 00:00:00 2001
From: Alex Vesker <valex@mellanox.com>
Date: Wed, 20 May 2020 18:09:14 +0300
Subject: net/mlx5: DR, Add a spinlock to protect the send ring

Adding this lock will allow writing steering entries without
locking the dr_domain and allow parallel insertion.

Signed-off-by: Alex Vesker <valex@mellanox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c  | 13 +++++++++----
 drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h |  1 +
 2 files changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
index b8d97d44be7b..f421013b0b54 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_send.c
@@ -357,9 +357,11 @@ static int dr_postsend_icm_data(struct mlx5dr_domain *dmn,
 	u32 buff_offset;
 	int ret;
 
+	spin_lock(&send_ring->lock);
+
 	ret = dr_handle_pending_wc(dmn, send_ring);
 	if (ret)
-		return ret;
+		goto out_unlock;
 
 	if (send_info->write.length > dmn->info.max_inline_size) {
 		buff_offset = (send_ring->tx_head &
@@ -377,7 +379,9 @@ static int dr_postsend_icm_data(struct mlx5dr_domain *dmn,
 	dr_fill_data_segs(send_ring, send_info);
 	dr_post_send(send_ring->qp, send_info);
 
-	return 0;
+out_unlock:
+	spin_unlock(&send_ring->lock);
+	return ret;
 }
 
 static int dr_get_tbl_copy_details(struct mlx5dr_domain *dmn,
@@ -563,9 +567,7 @@ int mlx5dr_send_postsend_action(struct mlx5dr_domain *dmn,
 	send_info.remote_addr = action->rewrite.chunk->mr_addr;
 	send_info.rkey = action->rewrite.chunk->rkey;
 
-	mutex_lock(&dmn->mutex);
 	ret = dr_postsend_icm_data(dmn, &send_info);
-	mutex_unlock(&dmn->mutex);
 
 	return ret;
 }
@@ -886,6 +888,7 @@ int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn)
 	init_attr.pdn = dmn->pdn;
 	init_attr.uar = dmn->uar;
 	init_attr.max_send_wr = QUEUE_SIZE;
+	spin_lock_init(&dmn->send_ring->lock);
 
 	dmn->send_ring->qp = dr_create_rc_qp(dmn->mdev, &init_attr);
 	if (!dmn->send_ring->qp)  {
@@ -990,7 +993,9 @@ int mlx5dr_send_ring_force_drain(struct mlx5dr_domain *dmn)
 			return ret;
 	}
 
+	spin_lock(&send_ring->lock);
 	ret = dr_handle_pending_wc(dmn, send_ring);
+	spin_unlock(&send_ring->lock);
 
 	return ret;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
index 984783238baa..b6061c639cb1 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
@@ -1043,6 +1043,7 @@ struct mlx5dr_send_ring {
 	struct ib_wc wc[MAX_SEND_CQE];
 	u8 sync_buff[MIN_READ_SYNC];
 	struct mlx5dr_mr *sync_mr;
+	spinlock_t lock; /* Protect the data path of the send ring */
 };
 
 int mlx5dr_send_ring_alloc(struct mlx5dr_domain *dmn);
-- 
cgit v1.2.3-59-g8ed1b


From ed03a418abe8e5a3ba541a805314bbf8a9eadda3 Mon Sep 17 00:00:00 2001
From: Alex Vesker <valex@mellanox.com>
Date: Wed, 20 May 2020 18:09:35 +0300
Subject: net/mlx5: DR, Split RX and TX lock for parallel insertion

Change the locking flow to support RX and TX locks, splitting
the single lock to two will allow inserting rules in parallel
for RX and TX parts of the FDB.

Locking the dr_domain will be done by locking the RX domain
and the TX domain locks, this is mostly used for control operations
on the dr_domain. When inserting rules for RX or TX the single
nic_doamin RX or TX lock will be used. Splitting the lock is safe since
RX and TX domains are logically separated from each other, shared
objects such the send-ring and memory pool are protected by locks.

Signed-off-by: Alex Vesker <valex@mellanox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
Reviewed-by: Erez Shitrit <erezsh@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../mellanox/mlx5/core/steering/dr_domain.c        | 14 +++++-----
 .../mellanox/mlx5/core/steering/dr_matcher.c       | 10 +++----
 .../ethernet/mellanox/mlx5/core/steering/dr_rule.c | 31 ++++++++++------------
 .../mellanox/mlx5/core/steering/dr_table.c         | 12 ++++-----
 .../mellanox/mlx5/core/steering/dr_types.h         | 24 ++++++++++++++++-
 5 files changed, 56 insertions(+), 35 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
index 48b6358b6845..890767a2a7cb 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_domain.c
@@ -297,7 +297,8 @@ mlx5dr_domain_create(struct mlx5_core_dev *mdev, enum mlx5dr_domain_type type)
 	dmn->mdev = mdev;
 	dmn->type = type;
 	refcount_set(&dmn->refcount, 1);
-	mutex_init(&dmn->mutex);
+	mutex_init(&dmn->info.rx.mutex);
+	mutex_init(&dmn->info.tx.mutex);
 
 	if (dr_domain_caps_init(mdev, dmn)) {
 		mlx5dr_err(dmn, "Failed init domain, no caps\n");
@@ -345,9 +346,9 @@ int mlx5dr_domain_sync(struct mlx5dr_domain *dmn, u32 flags)
 	int ret = 0;
 
 	if (flags & MLX5DR_DOMAIN_SYNC_FLAGS_SW) {
-		mutex_lock(&dmn->mutex);
+		mlx5dr_domain_lock(dmn);
 		ret = mlx5dr_send_ring_force_drain(dmn);
-		mutex_unlock(&dmn->mutex);
+		mlx5dr_domain_unlock(dmn);
 		if (ret) {
 			mlx5dr_err(dmn, "Force drain failed flags: %d, ret: %d\n",
 				   flags, ret);
@@ -371,7 +372,8 @@ int mlx5dr_domain_destroy(struct mlx5dr_domain *dmn)
 	dr_domain_uninit_cache(dmn);
 	dr_domain_uninit_resources(dmn);
 	dr_domain_caps_uninit(dmn);
-	mutex_destroy(&dmn->mutex);
+	mutex_destroy(&dmn->info.tx.mutex);
+	mutex_destroy(&dmn->info.rx.mutex);
 	kfree(dmn);
 	return 0;
 }
@@ -379,7 +381,7 @@ int mlx5dr_domain_destroy(struct mlx5dr_domain *dmn)
 void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn,
 			    struct mlx5dr_domain *peer_dmn)
 {
-	mutex_lock(&dmn->mutex);
+	mlx5dr_domain_lock(dmn);
 
 	if (dmn->peer_dmn)
 		refcount_dec(&dmn->peer_dmn->refcount);
@@ -389,5 +391,5 @@ void mlx5dr_domain_set_peer(struct mlx5dr_domain *dmn,
 	if (dmn->peer_dmn)
 		refcount_inc(&dmn->peer_dmn->refcount);
 
-	mutex_unlock(&dmn->mutex);
+	mlx5dr_domain_unlock(dmn);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
index a95938874798..31abcbb95ca2 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_matcher.c
@@ -690,7 +690,7 @@ mlx5dr_matcher_create(struct mlx5dr_table *tbl,
 	refcount_set(&matcher->refcount, 1);
 	INIT_LIST_HEAD(&matcher->matcher_list);
 
-	mutex_lock(&tbl->dmn->mutex);
+	mlx5dr_domain_lock(tbl->dmn);
 
 	ret = dr_matcher_init(matcher, mask);
 	if (ret)
@@ -700,14 +700,14 @@ mlx5dr_matcher_create(struct mlx5dr_table *tbl,
 	if (ret)
 		goto matcher_uninit;
 
-	mutex_unlock(&tbl->dmn->mutex);
+	mlx5dr_domain_unlock(tbl->dmn);
 
 	return matcher;
 
 matcher_uninit:
 	dr_matcher_uninit(matcher);
 free_matcher:
-	mutex_unlock(&tbl->dmn->mutex);
+	mlx5dr_domain_unlock(tbl->dmn);
 	kfree(matcher);
 dec_ref:
 	refcount_dec(&tbl->refcount);
@@ -791,13 +791,13 @@ int mlx5dr_matcher_destroy(struct mlx5dr_matcher *matcher)
 	if (refcount_read(&matcher->refcount) > 1)
 		return -EBUSY;
 
-	mutex_lock(&tbl->dmn->mutex);
+	mlx5dr_domain_lock(tbl->dmn);
 
 	dr_matcher_remove_from_tbl(matcher);
 	dr_matcher_uninit(matcher);
 	refcount_dec(&matcher->tbl->refcount);
 
-	mutex_unlock(&tbl->dmn->mutex);
+	mlx5dr_domain_unlock(tbl->dmn);
 	kfree(matcher);
 
 	return 0;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
index cce3ee7a6614..cd708dcc2e3a 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_rule.c
@@ -938,7 +938,10 @@ static bool dr_rule_verify(struct mlx5dr_matcher *matcher,
 static int dr_rule_destroy_rule_nic(struct mlx5dr_rule *rule,
 				    struct mlx5dr_rule_rx_tx *nic_rule)
 {
+	mlx5dr_domain_nic_lock(nic_rule->nic_matcher->nic_tbl->nic_dmn);
 	dr_rule_clean_rule_members(rule, nic_rule);
+	mlx5dr_domain_nic_unlock(nic_rule->nic_matcher->nic_tbl->nic_dmn);
+
 	return 0;
 }
 
@@ -1039,18 +1042,18 @@ dr_rule_create_rule_nic(struct mlx5dr_rule *rule,
 	if (dr_rule_skip(dmn->type, nic_dmn->ste_type, &matcher->mask, param))
 		return 0;
 
+	hw_ste_arr = kzalloc(DR_RULE_MAX_STE_CHAIN * DR_STE_SIZE, GFP_KERNEL);
+	if (!hw_ste_arr)
+		return -ENOMEM;
+
+	mlx5dr_domain_nic_lock(nic_dmn);
+
 	ret = mlx5dr_matcher_select_builders(matcher,
 					     nic_matcher,
 					     dr_rule_get_ipv(&param->outer),
 					     dr_rule_get_ipv(&param->inner));
 	if (ret)
-		goto out_err;
-
-	hw_ste_arr = kzalloc(DR_RULE_MAX_STE_CHAIN * DR_STE_SIZE, GFP_KERNEL);
-	if (!hw_ste_arr) {
-		ret = -ENOMEM;
-		goto out_err;
-	}
+		goto free_hw_ste;
 
 	/* Set the tag values inside the ste array */
 	ret = mlx5dr_ste_build_ste_arr(matcher, nic_matcher, param, hw_ste_arr);
@@ -1115,6 +1118,8 @@ dr_rule_create_rule_nic(struct mlx5dr_rule *rule,
 	if (htbl)
 		mlx5dr_htbl_put(htbl);
 
+	mlx5dr_domain_nic_unlock(nic_dmn);
+
 	kfree(hw_ste_arr);
 
 	return 0;
@@ -1129,8 +1134,8 @@ free_rule:
 		kfree(ste_info);
 	}
 free_hw_ste:
+	mlx5dr_domain_nic_unlock(nic_dmn);
 	kfree(hw_ste_arr);
-out_err:
 	return ret;
 }
 
@@ -1232,31 +1237,23 @@ struct mlx5dr_rule *mlx5dr_rule_create(struct mlx5dr_matcher *matcher,
 {
 	struct mlx5dr_rule *rule;
 
-	mutex_lock(&matcher->tbl->dmn->mutex);
 	refcount_inc(&matcher->refcount);
 
 	rule = dr_rule_create_rule(matcher, value, num_actions, actions);
 	if (!rule)
 		refcount_dec(&matcher->refcount);
 
-	mutex_unlock(&matcher->tbl->dmn->mutex);
-
 	return rule;
 }
 
 int mlx5dr_rule_destroy(struct mlx5dr_rule *rule)
 {
 	struct mlx5dr_matcher *matcher = rule->matcher;
-	struct mlx5dr_table *tbl = rule->matcher->tbl;
 	int ret;
 
-	mutex_lock(&tbl->dmn->mutex);
-
 	ret = dr_rule_destroy_rule(rule);
-
-	mutex_unlock(&tbl->dmn->mutex);
-
 	if (!ret)
 		refcount_dec(&matcher->refcount);
+
 	return ret;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c
index c2fe48d7b75a..b599b6beb5b9 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_table.c
@@ -14,7 +14,7 @@ int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl,
 	if (action && action->action_type != DR_ACTION_TYP_FT)
 		return -EOPNOTSUPP;
 
-	mutex_lock(&tbl->dmn->mutex);
+	mlx5dr_domain_lock(tbl->dmn);
 
 	if (!list_empty(&tbl->matcher_list))
 		last_matcher = list_last_entry(&tbl->matcher_list,
@@ -78,7 +78,7 @@ int mlx5dr_table_set_miss_action(struct mlx5dr_table *tbl,
 		refcount_inc(&action->refcount);
 
 out:
-	mutex_unlock(&tbl->dmn->mutex);
+	mlx5dr_domain_unlock(tbl->dmn);
 	return ret;
 }
 
@@ -95,7 +95,7 @@ static void dr_table_uninit_fdb(struct mlx5dr_table *tbl)
 
 static void dr_table_uninit(struct mlx5dr_table *tbl)
 {
-	mutex_lock(&tbl->dmn->mutex);
+	mlx5dr_domain_lock(tbl->dmn);
 
 	switch (tbl->dmn->type) {
 	case MLX5DR_DOMAIN_TYPE_NIC_RX:
@@ -112,7 +112,7 @@ static void dr_table_uninit(struct mlx5dr_table *tbl)
 		break;
 	}
 
-	mutex_unlock(&tbl->dmn->mutex);
+	mlx5dr_domain_unlock(tbl->dmn);
 }
 
 static int dr_table_init_nic(struct mlx5dr_domain *dmn,
@@ -177,7 +177,7 @@ static int dr_table_init(struct mlx5dr_table *tbl)
 
 	INIT_LIST_HEAD(&tbl->matcher_list);
 
-	mutex_lock(&tbl->dmn->mutex);
+	mlx5dr_domain_lock(tbl->dmn);
 
 	switch (tbl->dmn->type) {
 	case MLX5DR_DOMAIN_TYPE_NIC_RX:
@@ -201,7 +201,7 @@ static int dr_table_init(struct mlx5dr_table *tbl)
 		break;
 	}
 
-	mutex_unlock(&tbl->dmn->mutex);
+	mlx5dr_domain_unlock(tbl->dmn);
 
 	return ret;
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
index b6061c639cb1..c6d5a81d138b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_types.h
@@ -636,6 +636,7 @@ struct mlx5dr_domain_rx_tx {
 	u64 drop_icm_addr;
 	u64 default_icm_addr;
 	enum mlx5dr_ste_entry_type ste_type;
+	struct mutex mutex; /* protect rx/tx domain */
 };
 
 struct mlx5dr_domain_info {
@@ -660,7 +661,6 @@ struct mlx5dr_domain {
 	struct mlx5_uars_page *uar;
 	enum mlx5dr_domain_type type;
 	refcount_t refcount;
-	struct mutex mutex; /* protect domain */
 	struct mlx5dr_icm_pool *ste_icm_pool;
 	struct mlx5dr_icm_pool *action_icm_pool;
 	struct mlx5dr_send_ring *send_ring;
@@ -814,6 +814,28 @@ struct mlx5dr_icm_chunk {
 	struct list_head *miss_list;
 };
 
+static inline void mlx5dr_domain_nic_lock(struct mlx5dr_domain_rx_tx *nic_dmn)
+{
+	mutex_lock(&nic_dmn->mutex);
+}
+
+static inline void mlx5dr_domain_nic_unlock(struct mlx5dr_domain_rx_tx *nic_dmn)
+{
+	mutex_unlock(&nic_dmn->mutex);
+}
+
+static inline void mlx5dr_domain_lock(struct mlx5dr_domain *dmn)
+{
+	mlx5dr_domain_nic_lock(&dmn->info.rx);
+	mlx5dr_domain_nic_lock(&dmn->info.tx);
+}
+
+static inline void mlx5dr_domain_unlock(struct mlx5dr_domain *dmn)
+{
+	mlx5dr_domain_nic_unlock(&dmn->info.tx);
+	mlx5dr_domain_nic_unlock(&dmn->info.rx);
+}
+
 static inline int
 mlx5dr_matcher_supp_flex_parser_icmp_v4(struct mlx5dr_cmd_caps *caps)
 {
-- 
cgit v1.2.3-59-g8ed1b


From 3f0d97cdfe6e900a5c71817b0dfb77247afee36d Mon Sep 17 00:00:00 2001
From: Krzysztof Kazimierczak <krzysztof.kazimierczak@intel.com>
Date: Fri, 15 May 2020 17:42:20 -0700
Subject: ice: Check UMEM FQ size when allocating bufs

If a UMEM is present on a queue when an interface/queue pair is being
enabled, the driver will try to prepare the Rx buffers in advance to
improve performance. However, if fill queue is shorter than HW Rx ring,
the driver will report failure after getting the last address from the
fill queue.

This still lets the driver process the packets correctly during the NAPI
poll, but leads to a constant NAPI rescheduling. Not allocating the
buffers in advance would result in a potential performance decrease.

Commit d57d76428ae9 ("xsk: Add API to check for available entries in FQ")
provides an API that lets drivers check the number of addresses that the
fill queue holds.

Notify the user if fill queue is not long enough to prepare all buffers
before packet processing starts, and allocate the buffers during the
NAPI poll. If the fill queue size is sufficient, prepare Rx buffers in
advance.

Signed-off-by: Krzysztof Kazimierczak <krzysztof.kazimierczak@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_base.c | 30 +++++++++++++++++++++---------
 1 file changed, 21 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
index 18076e0d12d0..a174911d8994 100644
--- a/drivers/net/ethernet/intel/ice/ice_base.c
+++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@ -281,7 +281,9 @@ ice_setup_tx_ctx(struct ice_ring *ring, struct ice_tlan_ctx *tlan_ctx, u16 pf_q)
  */
 int ice_setup_rx_ctx(struct ice_ring *ring)
 {
+	struct device *dev = ice_pf_to_dev(ring->vsi->back);
 	int chain_len = ICE_MAX_CHAINED_RX_BUFS;
+	u16 num_bufs = ICE_DESC_UNUSED(ring);
 	struct ice_vsi *vsi = ring->vsi;
 	u32 rxdid = ICE_RXDID_FLEX_NIC;
 	struct ice_rlan_ctx rlan_ctx;
@@ -324,7 +326,7 @@ int ice_setup_rx_ctx(struct ice_ring *ring)
 				return err;
 			xsk_buff_set_rxq_info(ring->xsk_umem, &ring->xdp_rxq);
 
-			dev_info(ice_pf_to_dev(vsi->back), "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n",
+			dev_info(dev, "Registered XDP mem model MEM_TYPE_XSK_BUFF_POOL on Rx ring %d\n",
 				 ring->q_index);
 		} else {
 			if (!xdp_rxq_info_is_reg(&ring->xdp_rxq))
@@ -408,7 +410,7 @@ int ice_setup_rx_ctx(struct ice_ring *ring)
 	/* Absolute queue number out of 2K needs to be passed */
 	err = ice_write_rxq_ctx(hw, &rlan_ctx, pf_q);
 	if (err) {
-		dev_err(ice_pf_to_dev(vsi->back), "Failed to set LAN Rx queue context for absolute Rx queue %d error: %d\n",
+		dev_err(dev, "Failed to set LAN Rx queue context for absolute Rx queue %d error: %d\n",
 			pf_q, err);
 		return -EIO;
 	}
@@ -426,13 +428,23 @@ int ice_setup_rx_ctx(struct ice_ring *ring)
 	ring->tail = hw->hw_addr + QRX_TAIL(pf_q);
 	writel(0, ring->tail);
 
-	err = ring->xsk_umem ?
-	      ice_alloc_rx_bufs_zc(ring, ICE_DESC_UNUSED(ring)) :
-	      ice_alloc_rx_bufs(ring, ICE_DESC_UNUSED(ring));
-	if (err)
-		dev_info(ice_pf_to_dev(vsi->back), "Failed allocate some buffers on %sRx ring %d (pf_q %d)\n",
-			 ring->xsk_umem ? "UMEM enabled " : "",
-			 ring->q_index, pf_q);
+	if (ring->xsk_umem) {
+		if (!xsk_buff_can_alloc(ring->xsk_umem, num_bufs)) {
+			dev_warn(dev, "UMEM does not provide enough addresses to fill %d buffers on Rx ring %d\n",
+				 num_bufs, ring->q_index);
+			dev_warn(dev, "Change Rx ring/fill queue size to avoid performance issues\n");
+
+			return 0;
+		}
+
+		err = ice_alloc_rx_bufs_zc(ring, num_bufs);
+		if (err)
+			dev_info(dev, "Failed to allocate some buffers on UMEM enabled Rx ring %d (pf_q %d)\n",
+				 ring->q_index, pf_q);
+		return 0;
+	}
+
+	ice_alloc_rx_bufs(ring, num_bufs);
 
 	return 0;
 }
-- 
cgit v1.2.3-59-g8ed1b


From a7528198add88a6f51b8ade17a5cf86804b8f7ee Mon Sep 17 00:00:00 2001
From: Markus Theil <markus.theil@tu-ilmenau.de>
Date: Wed, 27 May 2020 18:03:34 +0200
Subject: mac80211: support control port TX status reporting

Add support for TX status reporting for the control port
TX API; this will be used by hostapd when it moves to the
control port TX API.

Signed-off-by: Markus Theil <markus.theil@tu-ilmenau.de>
Link: https://lore.kernel.org/r/20200527160334.19224-1-markus.theil@tu-ilmenau.de
[fix commit message, it was referring to nl80211]
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/ieee80211_i.h |  3 ++-
 net/mac80211/main.c        |  2 ++
 net/mac80211/status.c      |  9 ++++++-
 net/mac80211/tdls.c        |  2 +-
 net/mac80211/tx.c          | 62 ++++++++++++++++++++++++++++++++--------------
 5 files changed, 56 insertions(+), 22 deletions(-)

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index b87dc873825b..b7935f3d000d 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1783,7 +1783,8 @@ netdev_tx_t ieee80211_subif_start_xmit_8023(struct sk_buff *skb,
 void __ieee80211_subif_start_xmit(struct sk_buff *skb,
 				  struct net_device *dev,
 				  u32 info_flags,
-				  u32 ctrl_flags);
+				  u32 ctrl_flags,
+				  u64 *cookie);
 void ieee80211_purge_tx_queue(struct ieee80211_hw *hw,
 			      struct sk_buff_head *skbs);
 struct sk_buff *
diff --git a/net/mac80211/main.c b/net/mac80211/main.c
index ac74bd780b42..b4a2efe8e83a 100644
--- a/net/mac80211/main.c
+++ b/net/mac80211/main.c
@@ -596,6 +596,8 @@ struct ieee80211_hw *ieee80211_alloc_hw_nm(size_t priv_data_len,
 			      NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211);
 	wiphy_ext_feature_set(wiphy,
 			      NL80211_EXT_FEATURE_CONTROL_PORT_NO_PREAUTH);
+	wiphy_ext_feature_set(wiphy,
+			      NL80211_EXT_FEATURE_CONTROL_PORT_OVER_NL80211_TX_STATUS);
 	wiphy_ext_feature_set(wiphy,
 			      NL80211_EXT_FEATURE_SCAN_FREQ_KHZ);
 
diff --git a/net/mac80211/status.c b/net/mac80211/status.c
index 22512805eafb..7b1bacac39c6 100644
--- a/net/mac80211/status.c
+++ b/net/mac80211/status.c
@@ -649,10 +649,17 @@ static void ieee80211_report_ack_skb(struct ieee80211_local *local,
 						      info->status.ack_signal,
 						      info->status.is_valid_ack_signal,
 						      GFP_ATOMIC);
-			else
+			else if (ieee80211_is_mgmt(hdr->frame_control))
 				cfg80211_mgmt_tx_status(&sdata->wdev, cookie,
 							skb->data, skb->len,
 							acked, GFP_ATOMIC);
+			else
+				cfg80211_control_port_tx_status(&sdata->wdev,
+								cookie,
+								skb->data,
+								skb->len,
+								acked,
+								GFP_ATOMIC);
 		}
 		rcu_read_unlock();
 
diff --git a/net/mac80211/tdls.c b/net/mac80211/tdls.c
index 8ad420db3766..4b0cff4a07bd 100644
--- a/net/mac80211/tdls.c
+++ b/net/mac80211/tdls.c
@@ -1054,7 +1054,7 @@ ieee80211_tdls_prep_mgmt_packet(struct wiphy *wiphy, struct net_device *dev,
 
 	/* disable bottom halves when entering the Tx path */
 	local_bh_disable();
-	__ieee80211_subif_start_xmit(skb, dev, flags, 0);
+	__ieee80211_subif_start_xmit(skb, dev, flags, 0, NULL);
 	local_bh_enable();
 
 	return ret;
diff --git a/net/mac80211/tx.c b/net/mac80211/tx.c
index 5931128e1855..e9ce658141f5 100644
--- a/net/mac80211/tx.c
+++ b/net/mac80211/tx.c
@@ -2436,13 +2436,19 @@ int ieee80211_lookup_ra_sta(struct ieee80211_sub_if_data *sdata,
 	return 0;
 }
 
-static int ieee80211_store_ack_skb(struct ieee80211_local *local,
+static u16 ieee80211_store_ack_skb(struct ieee80211_local *local,
 				   struct sk_buff *skb,
-				   u32 *info_flags)
+				   u32 *info_flags,
+				   u64 *cookie)
 {
-	struct sk_buff *ack_skb = skb_clone_sk(skb);
+	struct sk_buff *ack_skb;
 	u16 info_id = 0;
 
+	if (skb->sk)
+		ack_skb = skb_clone_sk(skb);
+	else
+		ack_skb = skb_clone(skb, GFP_ATOMIC);
+
 	if (ack_skb) {
 		unsigned long flags;
 		int id;
@@ -2455,6 +2461,10 @@ static int ieee80211_store_ack_skb(struct ieee80211_local *local,
 		if (id >= 0) {
 			info_id = id;
 			*info_flags |= IEEE80211_TX_CTL_REQ_TX_STATUS;
+			if (cookie) {
+				*cookie = ieee80211_mgmt_tx_cookie(local);
+				IEEE80211_SKB_CB(ack_skb)->ack.cookie = *cookie;
+			}
 		} else {
 			kfree_skb(ack_skb);
 		}
@@ -2484,7 +2494,8 @@ static int ieee80211_store_ack_skb(struct ieee80211_local *local,
  */
 static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
 					   struct sk_buff *skb, u32 info_flags,
-					   struct sta_info *sta, u32 ctrl_flags)
+					   struct sta_info *sta, u32 ctrl_flags,
+					   u64 *cookie)
 {
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_tx_info *info;
@@ -2755,9 +2766,11 @@ static struct sk_buff *ieee80211_build_hdr(struct ieee80211_sub_if_data *sdata,
 		goto free;
 	}
 
-	if (unlikely(!multicast && skb->sk &&
-		     skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS))
-		info_id = ieee80211_store_ack_skb(local, skb, &info_flags);
+	if (unlikely(!multicast && ((skb->sk &&
+		     skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS) ||
+		     ctrl_flags & IEEE80211_TX_CTL_REQ_TX_STATUS)))
+		info_id = ieee80211_store_ack_skb(local, skb, &info_flags,
+						  cookie);
 
 	/*
 	 * If the skb is shared we need to obtain our own copy.
@@ -3913,7 +3926,8 @@ EXPORT_SYMBOL(ieee80211_txq_schedule_start);
 void __ieee80211_subif_start_xmit(struct sk_buff *skb,
 				  struct net_device *dev,
 				  u32 info_flags,
-				  u32 ctrl_flags)
+				  u32 ctrl_flags,
+				  u64 *cookie)
 {
 	struct ieee80211_sub_if_data *sdata = IEEE80211_DEV_TO_SUB_IF(dev);
 	struct ieee80211_local *local = sdata->local;
@@ -3983,7 +3997,7 @@ void __ieee80211_subif_start_xmit(struct sk_buff *skb,
 		skb_mark_not_on_list(skb);
 
 		skb = ieee80211_build_hdr(sdata, skb, info_flags,
-					  sta, ctrl_flags);
+					  sta, ctrl_flags, cookie);
 		if (IS_ERR(skb)) {
 			kfree_skb_list(next);
 			goto out;
@@ -4125,9 +4139,9 @@ netdev_tx_t ieee80211_subif_start_xmit(struct sk_buff *skb,
 		__skb_queue_head_init(&queue);
 		ieee80211_convert_to_unicast(skb, dev, &queue);
 		while ((skb = __skb_dequeue(&queue)))
-			__ieee80211_subif_start_xmit(skb, dev, 0, 0);
+			__ieee80211_subif_start_xmit(skb, dev, 0, 0, NULL);
 	} else {
-		__ieee80211_subif_start_xmit(skb, dev, 0, 0);
+		__ieee80211_subif_start_xmit(skb, dev, 0, 0, NULL);
 	}
 
 	return NETDEV_TX_OK;
@@ -4215,7 +4229,7 @@ static void ieee80211_8023_xmit(struct ieee80211_sub_if_data *sdata,
 
 	if (unlikely(!multicast && skb->sk &&
 		     skb_shinfo(skb)->tx_flags & SKBTX_WIFI_STATUS))
-		ieee80211_store_ack_skb(local, skb, &info->flags);
+		ieee80211_store_ack_skb(local, skb, &info->flags, NULL);
 
 	memset(info, 0, sizeof(*info));
 
@@ -4299,7 +4313,7 @@ ieee80211_build_data_template(struct ieee80211_sub_if_data *sdata,
 		goto out;
 	}
 
-	skb = ieee80211_build_hdr(sdata, skb, info_flags, sta, 0);
+	skb = ieee80211_build_hdr(sdata, skb, info_flags, sta, 0, NULL);
 	if (IS_ERR(skb))
 		goto out;
 
@@ -5347,7 +5361,7 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev,
 	struct sk_buff *skb;
 	struct ethhdr *ehdr;
 	u32 ctrl_flags = 0;
-	u32 flags;
+	u32 flags = 0;
 
 	/* Only accept CONTROL_PORT_PROTOCOL configured in CONNECT/ASSOCIATE
 	 * or Pre-Authentication
@@ -5360,9 +5374,13 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev,
 		ctrl_flags |= IEEE80211_TX_CTRL_PORT_CTRL_PROTO;
 
 	if (unencrypted)
-		flags = IEEE80211_TX_INTFL_DONT_ENCRYPT;
-	else
-		flags = 0;
+		flags |= IEEE80211_TX_INTFL_DONT_ENCRYPT;
+
+	if (cookie)
+		ctrl_flags |= IEEE80211_TX_CTL_REQ_TX_STATUS;
+
+	flags |= IEEE80211_TX_INTFL_NL80211_FRAME_TX |
+		 IEEE80211_TX_CTL_INJECTED;
 
 	skb = dev_alloc_skb(local->hw.extra_tx_headroom +
 			    sizeof(struct ethhdr) + len);
@@ -5383,10 +5401,15 @@ int ieee80211_tx_control_port(struct wiphy *wiphy, struct net_device *dev,
 	skb_reset_network_header(skb);
 	skb_reset_mac_header(skb);
 
+	/* mutex lock is only needed for incrementing the cookie counter */
+	mutex_lock(&local->mtx);
+
 	local_bh_disable();
-	__ieee80211_subif_start_xmit(skb, skb->dev, flags, ctrl_flags);
+	__ieee80211_subif_start_xmit(skb, skb->dev, flags, ctrl_flags, cookie);
 	local_bh_enable();
 
+	mutex_unlock(&local->mtx);
+
 	return 0;
 }
 
@@ -5413,7 +5436,8 @@ int ieee80211_probe_mesh_link(struct wiphy *wiphy, struct net_device *dev,
 
 	local_bh_disable();
 	__ieee80211_subif_start_xmit(skb, skb->dev, 0,
-				     IEEE80211_TX_CTRL_SKIP_MPATH_LOOKUP);
+				     IEEE80211_TX_CTRL_SKIP_MPATH_LOOKUP,
+				     NULL);
 	local_bh_enable();
 
 	return 0;
-- 
cgit v1.2.3-59-g8ed1b


From bf0ddd104167bfc08a5a169b3669f06c9052c1b0 Mon Sep 17 00:00:00 2001
From: "Azamat H. Hackimov" <azamat.hackimov@gmail.com>
Date: Sun, 24 May 2020 20:41:29 +0300
Subject: Bluetooth: btbcm: Added 003.006.007, changed 001.003.015

Added new Broadcom device BCM4350C5, changed BCM4354A2 to BCM4356A2.

Based on Broadcom Windows drivers 001.003.015 should be BCM4356A2. I
have user report that firmware name is misplaced
(https://github.com/winterheart/broadcom-bt-firmware/issues/3).

Signed-off-by: Azamat H. Hackimov <azamat.hackimov@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btbcm.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/bluetooth/btbcm.c b/drivers/bluetooth/btbcm.c
index df7a8a22e53c..1b9743b7f2ef 100644
--- a/drivers/bluetooth/btbcm.c
+++ b/drivers/bluetooth/btbcm.c
@@ -414,11 +414,12 @@ static const struct bcm_subver_table bcm_usb_subver_table[] = {
 	{ 0x2118, "BCM20702A0"	},	/* 001.001.024 */
 	{ 0x2126, "BCM4335A0"	},	/* 001.001.038 */
 	{ 0x220e, "BCM20702A1"	},	/* 001.002.014 */
-	{ 0x230f, "BCM4354A2"	},	/* 001.003.015 */
+	{ 0x230f, "BCM4356A2"	},	/* 001.003.015 */
 	{ 0x4106, "BCM4335B0"	},	/* 002.001.006 */
 	{ 0x410e, "BCM20702B0"	},	/* 002.001.014 */
 	{ 0x6109, "BCM4335C0"	},	/* 003.001.009 */
 	{ 0x610c, "BCM4354"	},	/* 003.001.012 */
+	{ 0x6607, "BCM4350C5"	},	/* 003.006.007 */
 	{ }
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From 7307f29687fda5486fa3bf2f9a5abe7a352bbce3 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Fri, 15 May 2020 19:03:42 +0200
Subject: mt76: mt7615: introduce remain_on_channel support

Introduce remain_on_channel support to mt7615 driver if the device is
running offload firmware

Co-developed-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt76.h          |  1 +
 drivers/net/wireless/mediatek/mt76/mt7615/init.c   | 10 +++
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c    | 10 ++-
 drivers/net/wireless/mediatek/mt76/mt7615/main.c   | 79 ++++++++++++++++++++++
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c    | 51 ++++++++++++++
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.h    | 19 ++++++
 drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h |  9 +++
 drivers/net/wireless/mediatek/mt76/mt7615/usb.c    |  2 +
 8 files changed, 179 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index e6de4a1b8f26..e2926e091c0f 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -290,6 +290,7 @@ enum {
 	MT76_STATE_POWER_OFF,
 	MT76_STATE_PS,
 	MT76_STATE_SUSPEND,
+	MT76_STATE_ROC,
 };
 
 struct mt76_hw_cap {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
index 1d8fdc7e062b..18a570ffb815 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
@@ -139,8 +139,10 @@ void mt7615_check_offload_capability(struct mt7615_dev *dev)
 		ieee80211_hw_set(hw, SUPPORTS_PS);
 		ieee80211_hw_set(hw, SUPPORTS_DYNAMIC_PS);
 
+		wiphy->max_remain_on_channel_duration = 5000;
 		wiphy->features |= NL80211_FEATURE_SCHED_SCAN_RANDOM_MAC_ADDR |
 				   NL80211_FEATURE_SCAN_RANDOM_MAC_ADDR |
+				   WIPHY_FLAG_HAS_REMAIN_ON_CHANNEL |
 				   NL80211_FEATURE_P2P_GO_CTWIN |
 				   NL80211_FEATURE_P2P_GO_OPPPS;
 	} else {
@@ -149,6 +151,8 @@ void mt7615_check_offload_capability(struct mt7615_dev *dev)
 		dev->ops->sched_scan_start = NULL;
 		dev->ops->sched_scan_stop = NULL;
 		dev->ops->set_rekey_data = NULL;
+		dev->ops->remain_on_channel = NULL;
+		dev->ops->cancel_remain_on_channel = NULL;
 
 		wiphy->max_sched_scan_plan_interval = 0;
 		wiphy->max_sched_scan_ie_len = 0;
@@ -373,6 +377,9 @@ int mt7615_register_ext_phy(struct mt7615_dev *dev)
 	skb_queue_head_init(&phy->scan_event_list);
 
 	INIT_WORK(&phy->ps_work, mt7615_ps_work);
+	INIT_WORK(&phy->roc_work, mt7615_roc_work);
+	timer_setup(&phy->roc_timer, mt7615_roc_timer, 0);
+	init_waitqueue_head(&phy->roc_wait);
 
 	mt7615_cap_dbdc_enable(dev);
 	mphy = mt76_alloc_phy(&dev->mt76, sizeof(*phy), &mt7615_ops);
@@ -437,9 +444,12 @@ void mt7615_init_device(struct mt7615_dev *dev)
 	INIT_LIST_HEAD(&dev->sta_poll_list);
 	spin_lock_init(&dev->sta_poll_lock);
 	init_waitqueue_head(&dev->reset_wait);
+	init_waitqueue_head(&dev->phy.roc_wait);
 
 	INIT_WORK(&dev->reset_work, mt7615_mac_reset_work);
 	INIT_WORK(&dev->phy.ps_work, mt7615_ps_work);
+	INIT_WORK(&dev->phy.roc_work, mt7615_roc_work);
+	timer_setup(&dev->phy.roc_timer, mt7615_roc_timer, 0);
 
 	mt7615_init_wiphy(hw);
 	dev->mphy.sband_2g.sband.ht_cap.cap |= IEEE80211_HT_CAP_LDPC_CODING;
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index 7d65a3fb0c23..6b5c38ab9f5d 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -175,7 +175,8 @@ mt7615_get_status_freq_info(struct mt7615_dev *dev, struct mt76_phy *mphy,
 			    struct mt76_rx_status *status, u8 chfreq)
 {
 	if (!test_bit(MT76_HW_SCANNING, &mphy->state) &&
-	    !test_bit(MT76_HW_SCHED_SCANNING, &mphy->state)) {
+	    !test_bit(MT76_HW_SCHED_SCANNING, &mphy->state) &&
+	    !test_bit(MT76_STATE_ROC, &mphy->state)) {
 		status->freq = mphy->chandef.chan->center_freq;
 		status->band = mphy->chandef.chan->band;
 		return;
@@ -1849,8 +1850,13 @@ void mt7615_mac_reset_work(struct work_struct *work)
 	set_bit(MT76_MCU_RESET, &dev->mphy.state);
 	wake_up(&dev->mt76.mcu.wait);
 	cancel_delayed_work_sync(&dev->phy.mac_work);
-	if (phy2)
+	del_timer_sync(&dev->phy.roc_timer);
+	cancel_work_sync(&dev->phy.roc_work);
+	if (phy2) {
 		cancel_delayed_work_sync(&phy2->mac_work);
+		del_timer_sync(&phy2->roc_timer);
+		cancel_work_sync(&phy2->roc_work);
+	}
 
 	/* lock/unlock all queues to ensure that no tx is pending */
 	mt76_txq_schedule_all(&dev->mphy);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
index 2e9e9d3519d7..f8cbee1770ce 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
@@ -72,6 +72,8 @@ static void mt7615_stop(struct ieee80211_hw *hw)
 
 	cancel_delayed_work_sync(&phy->mac_work);
 	cancel_work_sync(&phy->ps_work);
+	del_timer_sync(&phy->roc_timer);
+	cancel_work_sync(&phy->roc_work);
 
 	mutex_lock(&dev->mt76.mutex);
 
@@ -791,6 +793,37 @@ mt7615_set_antenna(struct ieee80211_hw *hw, u32 tx_ant, u32 rx_ant)
 	return 0;
 }
 
+static void mt7615_roc_iter(void *priv, u8 *mac,
+			    struct ieee80211_vif *vif)
+{
+	struct mt7615_phy *phy = priv;
+
+	mt7615_mcu_set_roc(phy, vif, NULL, 0);
+}
+
+void mt7615_roc_work(struct work_struct *work)
+{
+	struct mt7615_phy *phy;
+
+	phy = (struct mt7615_phy *)container_of(work, struct mt7615_phy,
+						roc_work);
+
+	if (!test_and_clear_bit(MT76_STATE_ROC, &phy->mt76->state))
+		return;
+
+	ieee80211_iterate_active_interfaces(phy->mt76->hw,
+					    IEEE80211_IFACE_ITER_RESUME_ALL,
+					    mt7615_roc_iter, phy);
+	ieee80211_remain_on_channel_expired(phy->mt76->hw);
+}
+
+void mt7615_roc_timer(struct timer_list *timer)
+{
+	struct mt7615_phy *phy = from_timer(phy, timer, roc_timer);
+
+	ieee80211_queue_work(phy->mt76->hw, &phy->roc_work);
+}
+
 void mt7615_scan_work(struct work_struct *work)
 {
 	struct mt7615_phy *phy;
@@ -864,6 +897,50 @@ mt7615_stop_sched_scan(struct ieee80211_hw *hw, struct ieee80211_vif *vif)
 	return mt7615_mcu_sched_scan_enable(mphy->priv, vif, false);
 }
 
+static int mt7615_remain_on_channel(struct ieee80211_hw *hw,
+				    struct ieee80211_vif *vif,
+				    struct ieee80211_channel *chan,
+				    int duration,
+				    enum ieee80211_roc_type type)
+{
+	struct mt7615_phy *phy = mt7615_hw_phy(hw);
+	int err;
+
+	if (test_and_set_bit(MT76_STATE_ROC, &phy->mt76->state))
+		return 0;
+
+	err = mt7615_mcu_set_roc(phy, vif, chan, duration);
+	if (err < 0) {
+		clear_bit(MT76_STATE_ROC, &phy->mt76->state);
+		return err;
+	}
+
+	if (!wait_event_timeout(phy->roc_wait, phy->roc_grant, HZ)) {
+		mt7615_mcu_set_roc(phy, vif, NULL, 0);
+		clear_bit(MT76_STATE_ROC, &phy->mt76->state);
+
+		return -ETIMEDOUT;
+	}
+
+	return 0;
+}
+
+static int mt7615_cancel_remain_on_channel(struct ieee80211_hw *hw,
+					   struct ieee80211_vif *vif)
+{
+	struct mt7615_phy *phy = mt7615_hw_phy(hw);
+
+	if (!test_and_clear_bit(MT76_STATE_ROC, &phy->mt76->state))
+		return 0;
+
+	del_timer_sync(&phy->roc_timer);
+	cancel_work_sync(&phy->roc_work);
+
+	mt7615_mcu_set_roc(phy, vif, NULL, 0);
+
+	return 0;
+}
+
 #ifdef CONFIG_PM
 static int mt7615_suspend(struct ieee80211_hw *hw,
 			  struct cfg80211_wowlan *wowlan)
@@ -978,6 +1055,8 @@ const struct ieee80211_ops mt7615_ops = {
 	.cancel_hw_scan = mt7615_cancel_hw_scan,
 	.sched_scan_start = mt7615_start_sched_scan,
 	.sched_scan_stop = mt7615_stop_sched_scan,
+	.remain_on_channel = mt7615_remain_on_channel,
+	.cancel_remain_on_channel = mt7615_cancel_remain_on_channel,
 #ifdef CONFIG_PM
 	.suspend = mt7615_suspend,
 	.resume = mt7615_resume,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index b944f372738a..7eb99bde3394 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -359,6 +359,33 @@ mt7615_mcu_scan_event(struct mt7615_dev *dev, struct sk_buff *skb)
 				     MT7615_HW_SCAN_TIMEOUT);
 }
 
+static void
+mt7615_mcu_roc_event(struct mt7615_dev *dev, struct sk_buff *skb)
+{
+	struct mt7615_roc_tlv *event;
+	struct mt7615_phy *phy;
+	struct mt76_phy *mphy;
+	int duration;
+
+	skb_pull(skb, sizeof(struct mt7615_mcu_rxd));
+	event = (struct mt7615_roc_tlv *)skb->data;
+
+	if (event->dbdc_band && dev->mt76.phy2)
+		mphy = dev->mt76.phy2;
+	else
+		mphy = &dev->mt76.phy;
+
+	ieee80211_ready_on_channel(mphy->hw);
+
+	phy = (struct mt7615_phy *)mphy->priv;
+	phy->roc_grant = true;
+	wake_up(&phy->roc_wait);
+
+	duration = le32_to_cpu(event->max_interval);
+	mod_timer(&phy->roc_timer,
+		  round_jiffies_up(jiffies + msecs_to_jiffies(duration)));
+}
+
 static void
 mt7615_mcu_beacon_loss_iter(void *priv, u8 *mac, struct ieee80211_vif *vif)
 {
@@ -426,6 +453,9 @@ mt7615_mcu_rx_unsolicited_event(struct mt7615_dev *dev, struct sk_buff *skb)
 	case MCU_EVENT_BSS_BEACON_LOSS:
 		mt7615_mcu_beacon_loss_event(dev, skb);
 		break;
+	case MCU_EVENT_ROC:
+		mt7615_mcu_roc_event(dev, skb);
+		break;
 	case MCU_EVENT_SCHED_SCAN_DONE:
 	case MCU_EVENT_SCAN_DONE:
 		mt7615_mcu_scan_event(dev, skb);
@@ -451,6 +481,7 @@ void mt7615_mcu_rx_event(struct mt7615_dev *dev, struct sk_buff *skb)
 	    rxd->eid == MCU_EVENT_SCHED_SCAN_DONE ||
 	    rxd->eid == MCU_EVENT_BSS_ABSENCE ||
 	    rxd->eid == MCU_EVENT_SCAN_DONE ||
+	    rxd->eid == MCU_EVENT_ROC ||
 	    !rxd->seq)
 		mt7615_mcu_rx_unsolicited_event(dev, skb);
 	else
@@ -3601,6 +3632,26 @@ int mt7615_mcu_update_gtk_rekey(struct ieee80211_hw *hw,
 }
 #endif /* CONFIG_PM */
 
+int mt7615_mcu_set_roc(struct mt7615_phy *phy, struct ieee80211_vif *vif,
+		       struct ieee80211_channel *chan, int duration)
+{
+	struct mt7615_vif *mvif = (struct mt7615_vif *)vif->drv_priv;
+	struct mt7615_dev *dev = phy->dev;
+	struct mt7615_roc_tlv req = {
+		.bss_idx = mvif->idx,
+		.active = !chan,
+		.max_interval = cpu_to_le32(duration),
+		.primary_chan = chan ? chan->hw_value : 0,
+		.band = chan ? chan->band : 0,
+		.req_type = 2,
+	};
+
+	phy->roc_grant = false;
+
+	return __mt76_mcu_send_msg(&dev->mt76, MCU_CMD_SET_ROC, &req,
+				   sizeof(req), false);
+}
+
 int mt7615_mcu_set_p2p_oppps(struct ieee80211_hw *hw,
 			     struct ieee80211_vif *vif)
 {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
index 348521b0d44c..fd40d99f5a23 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
@@ -82,6 +82,7 @@ enum {
 	MCU_EVENT_ACCESS_REG = 0x02,
 	MCU_EVENT_MT_PATCH_SEM = 0x04,
 	MCU_EVENT_SCAN_DONE = 0x0d,
+	MCU_EVENT_ROC = 0x10,
 	MCU_EVENT_BSS_ABSENCE  = 0x11,
 	MCU_EVENT_BSS_BEACON_LOSS = 0x13,
 	MCU_EVENT_CH_PRIVILEGE = 0x18,
@@ -525,6 +526,23 @@ struct mt7615_gtk_rekey_tlv {
 	u8 reserverd[3];
 } __packed;
 
+struct mt7615_roc_tlv {
+	u8 bss_idx;
+	u8 token;
+	u8 active;
+	u8 primary_chan;
+	u8 sco;
+	u8 band;
+	u8 width;	/* To support 80/160MHz bandwidth */
+	u8 freq_seg1;	/* To support 80/160MHz bandwidth */
+	u8 freq_seg2;	/* To support 80/160MHz bandwidth */
+	u8 req_type;
+	u8 dbdc_band;
+	u8 rsv0;
+	__le32 max_interval;	/* ms */
+	u8 rsv1[8];
+} __packed;
+
 /* offload mcu commands */
 enum {
 	MCU_CMD_START_HW_SCAN = MCU_CE_PREFIX | 0x03,
@@ -533,6 +551,7 @@ enum {
 	MCU_CMD_SET_BSS_CONNECTED = MCU_CE_PREFIX | 0x16,
 	MCU_CMD_SET_BSS_ABORT = MCU_CE_PREFIX | 0x17,
 	MCU_CMD_CANCEL_HW_SCAN = MCU_CE_PREFIX | 0x1b,
+	MCU_CMD_SET_ROC = MCU_CE_PREFIX | 0x1c,
 	MCU_CMD_SET_P2P_OPPPS = MCU_CE_PREFIX | 0x33,
 	MCU_CMD_SCHED_SCAN_ENABLE = MCU_CE_PREFIX | 0x61,
 	MCU_CMD_SCHED_SCAN_REQ = MCU_CE_PREFIX | 0x62,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index ebdfca64b079..71d5d5973116 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -201,6 +201,11 @@ struct mt7615_phy {
 	struct sk_buff_head scan_event_list;
 	struct delayed_work scan_work;
 
+	struct work_struct roc_work;
+	struct timer_list roc_timer;
+	wait_queue_head_t roc_wait;
+	bool roc_grant;
+
 	struct work_struct ps_work;
 };
 
@@ -441,6 +446,8 @@ static inline u16 mt7615_wtbl_size(struct mt7615_dev *dev)
 
 void mt7615_dma_reset(struct mt7615_dev *dev);
 void mt7615_scan_work(struct work_struct *work);
+void mt7615_roc_work(struct work_struct *work);
+void mt7615_roc_timer(struct timer_list *timer);
 void mt7615_ps_work(struct work_struct *work);
 void mt7615_init_txpower(struct mt7615_dev *dev,
 			 struct ieee80211_supported_band *sband);
@@ -532,6 +539,8 @@ int mt7615_dfs_init_radar_detector(struct mt7615_phy *phy);
 
 int mt7615_mcu_set_p2p_oppps(struct ieee80211_hw *hw,
 			     struct ieee80211_vif *vif);
+int mt7615_mcu_set_roc(struct mt7615_phy *phy, struct ieee80211_vif *vif,
+		       struct ieee80211_channel *chan, int duration);
 int mt7615_firmware_own(struct mt7615_dev *dev);
 int mt7615_driver_own(struct mt7615_dev *dev);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/usb.c b/drivers/net/wireless/mediatek/mt76/mt7615/usb.c
index d74253319622..c292b41c76e3 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/usb.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/usb.c
@@ -52,6 +52,8 @@ static void mt7663u_stop(struct ieee80211_hw *hw)
 
 	clear_bit(MT76_STATE_RUNNING, &dev->mphy.state);
 	cancel_work_sync(&phy->ps_work);
+	del_timer_sync(&phy->roc_timer);
+	cancel_work_sync(&phy->roc_work);
 	cancel_delayed_work_sync(&phy->scan_work);
 	cancel_delayed_work_sync(&phy->mac_work);
 	mt76u_stop_tx(&dev->mt76);
-- 
cgit v1.2.3-59-g8ed1b


From 802b836a01cf4a4c8a0ac67f2567a8f743b50701 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Fri, 15 May 2020 19:05:59 +0200
Subject: mt76: mt76x02: remove check in mt76x02_mcu_msg_send

mt76x02_mcu_msg_send is run just by mmio code so get rid of
mt76_is_mmio() check

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt76x02_mcu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mcu.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mcu.c
index 89a8992d84fa..267058086a90 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mcu.c
@@ -20,7 +20,7 @@ int mt76x02_mcu_msg_send(struct mt76_dev *mdev, int cmd, const void *data,
 	int ret;
 	u8 seq;
 
-	if (mt76_is_mmio(&dev->mt76) && dev->mcu_timeout)
+	if (dev->mcu_timeout)
 		return -EIO;
 
 	skb = mt76_mcu_msg_alloc(mdev, data, len);
-- 
cgit v1.2.3-59-g8ed1b


From 06acdd380a7d3893a1115c6a6ef83961cee21f98 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Sat, 16 May 2020 03:33:28 +0800
Subject: mt76: mt7915: add spatial reuse support

Enable or disable OBSS PD when the bss config changes or we
assoc to an AP that broadcasts the IE.

With this patch, we can get ~20% gain in OBSS OTA environment.

Tested-by: Evelyn Tsai <evelyn.tsai@mediatek.com>
Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/main.c   |  7 +++++-
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.c    | 25 ++++++++++++++++++++++
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.h    |  1 +
 drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h |  2 ++
 4 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/main.c b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
index 98567374c2c9..e045dc234100 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
@@ -437,8 +437,10 @@ static void mt7915_bss_info_changed(struct ieee80211_hw *hw,
 		mt7915_mcu_add_sta(dev, vif, NULL, join);
 	}
 
-	if (changed & BSS_CHANGED_ASSOC)
+	if (changed & BSS_CHANGED_ASSOC) {
 		mt7915_mcu_add_bss_info(phy, vif, info->assoc);
+		mt7915_mcu_add_obss_spr(dev, vif, info->he_obss_pd.enable);
+	}
 
 	if (changed & BSS_CHANGED_ERP_SLOT) {
 		int slottime = info->use_short_slot ? 9 : 20;
@@ -458,6 +460,9 @@ static void mt7915_bss_info_changed(struct ieee80211_hw *hw,
 	if (changed & (BSS_CHANGED_QOS | BSS_CHANGED_BEACON_ENABLED))
 		mt7915_mcu_set_tx(dev, vif);
 
+	if (changed & BSS_CHANGED_HE_OBSS_PD)
+		mt7915_mcu_add_obss_spr(dev, vif, info->he_obss_pd.enable);
+
 	if (changed & (BSS_CHANGED_BEACON |
 		       BSS_CHANGED_BEACON_ENABLED))
 		mt7915_mcu_add_beacon(hw, vif, info->enable_beacon);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index 99eeea42478f..2edff868b7c9 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -3159,3 +3159,28 @@ int mt7915_mcu_set_txbf_sounding(struct mt7915_dev *dev)
 	return __mt76_mcu_send_msg(&dev->mt76, MCU_EXT_CMD_TXBF_ACTION,
 				   &req, sizeof(req), true);
 }
+
+int mt7915_mcu_add_obss_spr(struct mt7915_dev *dev, struct ieee80211_vif *vif,
+			    bool enable)
+{
+#define MT_SPR_ENABLE		1
+	struct mt7915_vif *mvif = (struct mt7915_vif *)vif->drv_priv;
+	struct {
+		u8 action;
+		u8 arg_num;
+		u8 band_idx;
+		u8 status;
+		u8 drop_tx_idx;
+		u8 sta_idx;	/* 256 sta */
+		u8 rsv[2];
+		u32 val;
+	} __packed req = {
+		.action = MT_SPR_ENABLE,
+		.arg_num = 1,
+		.band_idx = mvif->band_idx,
+		.val = enable,
+	};
+
+	return __mt76_mcu_send_msg(&dev->mt76, MCU_EXT_CMD_SET_SPR,
+				   &req, sizeof(req), true);
+}
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
index 34ace6e672d0..c241dd7c4c36 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.h
@@ -212,6 +212,7 @@ enum {
 	MCU_EXT_CMD_RATE_CTRL = 0x87,
 	MCU_EXT_CMD_FW_DBG_CTRL = 0x95,
 	MCU_EXT_CMD_SET_RDD_TH = 0x9d,
+	MCU_EXT_CMD_SET_SPR = 0xa8,
 };
 
 enum {
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
index 5392292a838e..85d74ecd0351 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mt7915.h
@@ -301,6 +301,8 @@ int mt7915_mcu_add_key(struct mt7915_dev *dev, struct ieee80211_vif *vif,
 		       enum set_key_cmd cmd);
 int mt7915_mcu_add_beacon(struct ieee80211_hw *hw, struct ieee80211_vif *vif,
 			  int enable);
+int mt7915_mcu_add_obss_spr(struct mt7915_dev *dev, struct ieee80211_vif *vif,
+                            bool enable);
 int mt7915_mcu_add_rate_ctrl(struct mt7915_dev *dev, struct ieee80211_vif *vif,
 			     struct ieee80211_sta *sta);
 int mt7915_mcu_add_smps(struct mt7915_dev *dev, struct ieee80211_vif *vif,
-- 
cgit v1.2.3-59-g8ed1b


From f9a5c0561029c856afe5860495a9dfe8e9004b02 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Sat, 16 May 2020 17:05:18 +0800
Subject: mt76: mt7915: fix some sparse warnings

This fixes the following sparse warning:

drivers/net/wireless/mediatek/mt76/mt7915/mcu.c:253:16: sparse: sparse: mixing different enum types:
drivers/net/wireless/mediatek/mt76/mt7915/mcu.c:253:16: sparse:    unsigned int enum mt7915_txq_id
drivers/net/wireless/mediatek/mt76/mt7915/mcu.c:253:16: sparse:    unsigned int enum mt76_txq_id
drivers/net/wireless/mediatek/mt76/mt7915/mcu.c:758:63: sparse: sparse: incorrect type in argument 2 (different address spaces)
drivers/net/wireless/mediatek/mt76/mt7915/mcu.c:758:63: sparse:    expected unsigned char const [usertype] *ies
drivers/net/wireless/mediatek/mt76/mt7915/mcu.c:758:63: sparse:    got unsigned char const [noderef] <asn:4> *
drivers/net/wireless/mediatek/mt76/mt7915/mcu.c:1390:23: sparse: sparse: incorrect type in argument 1 (different base types)
drivers/net/wireless/mediatek/mt76/mt7915/mcu.c:1390:23: sparse:    expected unsigned int w
drivers/net/wireless/mediatek/mt76/mt7915/mcu.c:1390:23: sparse:    got restricted __le32 [usertype] supp_ht_mcs
drivers/net/wireless/mediatek/mt76/mt7915/mcu.c:1390:23: sparse: sparse: restricted __le32 degrades to integer
drivers/net/wireless/mediatek/mt76/mt7915/mcu.c:1429:60: sparse: sparse: bad assignment (>>=) to restricted __le16
drivers/net/wireless/mediatek/mt76/mt7915/mcu.c:1773:16: sparse: sparse: restricted __le32 degrades to integer

Fixes: 6094f86fb371 ("mt76: mt7915: add HE bss_conf support for interfaces")
Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index 2edff868b7c9..695364d35eb2 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -220,7 +220,7 @@ static int __mt7915_mcu_msg_send(struct mt7915_dev *dev, struct sk_buff *skb,
 {
 	struct mt7915_mcu_txd *mcu_txd;
 	u8 seq, pkt_fmt, qidx;
-	enum mt7915_txq_id txq;
+	enum mt76_txq_id txq;
 	__le32 *txd;
 	u32 val;
 
@@ -815,8 +815,7 @@ static void mt7915_check_he_obss_narrow_bw_ru_iter(struct wiphy *wiphy,
 	struct mt7915_he_obss_narrow_bw_ru_data *data = _data;
 	const struct element *elem;
 
-	elem = cfg80211_find_elem(WLAN_EID_EXT_CAPABILITY, bss->ies->data,
-				  bss->ies->len);
+	elem = ieee80211_bss_get_elem(bss, WLAN_EID_EXT_CAPABILITY);
 
 	if (!elem || elem->datalen < 10 ||
 	    !(elem->data[10] &
@@ -1954,7 +1953,7 @@ mt7915_mcu_sta_rate_ctrl_tlv(struct sk_buff *skb, struct mt7915_dev *dev,
 
 		ra->supp_ht_mcs = *(__le32 *)ra->ht_mcs;
 		ra->supp_mode |= MODE_HT;
-		mcs = hweight32(ra->supp_ht_mcs) - 1;
+		mcs = hweight32(le32_to_cpu(ra->supp_ht_mcs)) - 1;
 		ra->af = sta->ht_cap.ampdu_factor;
 		ra->ht_gf = !!(sta->ht_cap.cap & IEEE80211_HT_CAP_GRN_FLD);
 
@@ -1972,7 +1971,7 @@ mt7915_mcu_sta_rate_ctrl_tlv(struct sk_buff *skb, struct mt7915_dev *dev,
 	}
 
 	if (sta->vht_cap.vht_supported) {
-		__le16 mcs_map = sta->vht_cap.vht_mcs.rx_mcs_map;
+		u16 mcs_map = le16_to_cpu(sta->vht_cap.vht_mcs.rx_mcs_map);
 		u16 vht_mcs;
 		u8 af, mcs_prev;
 
@@ -2399,7 +2398,7 @@ static int mt7915_mcu_init_download(struct mt7915_dev *dev, u32 addr,
 	};
 	int attr;
 
-	if (req.addr == MCU_PATCH_ADDRESS)
+	if (req.addr == cpu_to_le32(MCU_PATCH_ADDRESS))
 		attr = -MCU_CMD_PATCH_START_REQ;
 	else
 		attr = -MCU_CMD_TARGET_ADDRESS_LEN_REQ;
-- 
cgit v1.2.3-59-g8ed1b


From 19e29c69cc4760c0d340ac1fa7f8c423fcd70a08 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Sat, 16 May 2020 17:05:19 +0800
Subject: mt76: mt7915: fix sparse warnings: incorrect type initializer

drivers/net/wireless/mediatek/mt76/mt7915/mcu.c:2317:31: sparse: sparse:
incorrect type in initializer (different base types)

Fixes: 5517f78b0063 ("mt76: mt7915: enable firmware module debug support")
Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index 695364d35eb2..8460cd453213 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -2671,7 +2671,7 @@ int mt7915_mcu_fw_dbg_ctrl(struct mt7915_dev *dev, u32 module, u8 level)
 		u16 len;
 		u8 level;
 		u8 rsv[3];
-		u32 module_idx;
+		__le32 module_idx;
 	} data = {
 		.module_idx = cpu_to_le32(module),
 		.level = level,
-- 
cgit v1.2.3-59-g8ed1b


From 4c04f25dd449a825fbcd7610c7f20be1e51b088d Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Sat, 16 May 2020 14:56:33 +0200
Subject: mt76: mt7615: fix NULL pointer deref in mt7615_register_ext_phy

Fix following NULL pointer dereference in mt7615_register_ext_phy routine

[   27.648860] Unable to handle kernel NULL pointer dereference at virtual address 0000000000000060
[   27.657697] Mem abort info:
[   27.660495]   ESR = 0x96000046
[   27.663549]   EC = 0x25: DABT (current EL), IL = 32 bits
[   27.668857]   SET = 0, FnV = 0
[   27.671910]   EA = 0, S1PTW = 0
[   27.675040] Data abort info:
[   27.677918]   ISV = 0, ISS = 0x00000046
[   27.681751]   CM = 0, WnR = 1
[   27.684717] user pgtable: 4k pages, 39-bit VAs, pgdp=000000007d8cc000
[   27.691156] [0000000000000060] pgd=000000007d281003, pud=000000007d281003, pmd=0000000000000000
[   27.699857] Internal error: Oops: 96000046 [#1] SMP
[   27.774939] CPU: 1 PID: 701 Comm: ash Not tainted 5.4.41 #0
[   27.780500] Hardware name: Bananapi BPI-R64 (DT)
[   27.785108] pstate: 60000005 (nZCv daif -PAN -UAO)
[   27.789897] pc : mt7615_register_ext_phy+0x60/0x2c8 [mt7615_common]
[   27.796156] lr : mt7615_init_debugfs+0x99c/0x18e0 [mt7615_common]
[   27.802237] sp : ffffffc0115dbcb0
[   27.805541] x29: ffffffc0115dbcb0 x28: ffffff803e309600
[   27.810843] x27: 0000000000000000 x26: 0000000000000000
[   27.816144] x25: ffffff803d936928 x24: ffffff803d936950
[   27.821447] x23: 0000000000000000 x22: 0000000fffffffe0
[   27.826749] x21: 0000000000000002 x20: ffffff8001e82620
[   27.832050] x19: 0000000000000000 x18: 0000000000000000
[   27.837352] x17: 0000000000000000 x16: 0000000000000000
[   27.842653] x15: 0000000000000000 x14: 0000000000000000
[   27.847955] x13: 0000000000000000 x12: 0000000000000000
[   27.853256] x11: 0000000000000000 x10: 0000000000000040
[   27.858558] x9 : ffffffc0112b3eb0 x8 : ffffffc0112b3ea8
[   27.863859] x7 : ffffff803e400048 x6 : 0000000000000000
[   27.869161] x5 : ffffff803e400000 x4 : 0000000000000000
[   27.874462] x3 : 0000000000000001 x2 : 0000000000007615
[   27.879764] x1 : 0000000000000068 x0 : ffffffc0088ccc58
[   27.885066] Call trace:
[   27.887505]  mt7615_register_ext_phy+0x60/0x2c8 [mt7615_common]
[   27.893416]  mt7615_init_debugfs+0x99c/0x18e0 [mt7615_common]
[   27.899156]  simple_attr_write+0xf0/0x178
[   27.903158]  debugfs_attr_write+0x4c/0x70
[   27.907159]  full_proxy_write+0x60/0x90
[   27.910987]  __vfs_write+0x18/0x40
[   27.914379]  vfs_write+0xb0/0x1b8
[   27.917685]  ksys_write+0x4c/0xc8
[   27.920989]  __arm64_sys_write+0x18/0x20

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/init.c | 18 +++++++++---------
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
index 18a570ffb815..0d105e4abdfd 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
@@ -372,15 +372,6 @@ int mt7615_register_ext_phy(struct mt7615_dev *dev)
 	if (phy)
 		return 0;
 
-	INIT_DELAYED_WORK(&phy->mac_work, mt7615_mac_work);
-	INIT_DELAYED_WORK(&phy->scan_work, mt7615_scan_work);
-	skb_queue_head_init(&phy->scan_event_list);
-
-	INIT_WORK(&phy->ps_work, mt7615_ps_work);
-	INIT_WORK(&phy->roc_work, mt7615_roc_work);
-	timer_setup(&phy->roc_timer, mt7615_roc_timer, 0);
-	init_waitqueue_head(&phy->roc_wait);
-
 	mt7615_cap_dbdc_enable(dev);
 	mphy = mt76_alloc_phy(&dev->mt76, sizeof(*phy), &mt7615_ops);
 	if (!mphy)
@@ -393,6 +384,15 @@ int mt7615_register_ext_phy(struct mt7615_dev *dev)
 	mphy->antenna_mask = BIT(hweight8(phy->chainmask)) - 1;
 	mt7615_init_wiphy(mphy->hw);
 
+	INIT_DELAYED_WORK(&phy->mac_work, mt7615_mac_work);
+	INIT_DELAYED_WORK(&phy->scan_work, mt7615_scan_work);
+	skb_queue_head_init(&phy->scan_event_list);
+
+	INIT_WORK(&phy->ps_work, mt7615_ps_work);
+	INIT_WORK(&phy->roc_work, mt7615_roc_work);
+	timer_setup(&phy->roc_timer, mt7615_roc_timer, 0);
+	init_waitqueue_head(&phy->roc_wait);
+
 	mt7615_mac_set_scs(phy, true);
 
 	/*
-- 
cgit v1.2.3-59-g8ed1b


From ae4027a798988ba001584d77cc57e6b4cd77ddec Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Wed, 13 May 2020 18:50:55 +0800
Subject: mt76: mt7915: fix decoded radiotap HE flags

Move assignment of .data1 and .data2 to a single place and fix overwriting
of values from the template

Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/mac.c | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
index 7ad7c2b7afdc..bf96b389c813 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
@@ -235,9 +235,14 @@ mt7915_mac_decode_he_radiotap(struct sk_buff *skb,
 		.data1 = HE_BITS(DATA1_DATA_MCS_KNOWN) |
 			 HE_BITS(DATA1_DATA_DCM_KNOWN) |
 			 HE_BITS(DATA1_STBC_KNOWN) |
-			 HE_BITS(DATA1_CODING_KNOWN),
+			 HE_BITS(DATA1_CODING_KNOWN) |
+			 HE_BITS(DATA1_LDPC_XSYMSEG_KNOWN) |
+			 HE_BITS(DATA1_DOPPLER_KNOWN) |
+			 HE_BITS(DATA1_BSS_COLOR_KNOWN),
 		.data2 = HE_BITS(DATA2_GI_KNOWN) |
-			 HE_BITS(DATA2_TXBF_KNOWN),
+			 HE_BITS(DATA2_TXBF_KNOWN) |
+			 HE_BITS(DATA2_PE_DISAMBIG_KNOWN) |
+			 HE_BITS(DATA2_TXOP_KNOWN),
 	};
 	struct ieee80211_radiotap_he *he = NULL;
 	__le32 v2 = rxv->v[2];
@@ -248,12 +253,6 @@ mt7915_mac_decode_he_radiotap(struct sk_buff *skb,
 	he = skb_push(skb, sizeof(known));
 	memcpy(he, &known, sizeof(known));
 
-	he->data1 = HE_BITS(DATA1_LDPC_XSYMSEG_KNOWN) |
-		    HE_BITS(DATA1_DOPPLER_KNOWN) |
-		    HE_BITS(DATA1_BSS_COLOR_KNOWN);
-	he->data2 = HE_BITS(DATA2_PE_DISAMBIG_KNOWN) |
-		    HE_BITS(DATA2_TXOP_KNOWN);
-
 	he->data3 = HE_PREP(DATA3_BSS_COLOR, BSS_COLOR, v14) |
 		    HE_PREP(DATA3_LDPC_XSYMSEG, LDPC_EXT_SYM, v2);
 	he->data5 = HE_PREP(DATA5_PE_DISAMBIG, PE_DISAMBIG, v2) |
@@ -296,10 +295,10 @@ mt7915_mac_decode_he_radiotap(struct sk_buff *skb,
 			     HE_BITS(DATA1_SPTL_REUSE3_KNOWN) |
 			     HE_BITS(DATA1_SPTL_REUSE4_KNOWN);
 
-		he->data4 = HE_PREP(DATA4_TB_SPTL_REUSE1, SR_MASK, v11) |
-			    HE_PREP(DATA4_TB_SPTL_REUSE2, SR1_MASK, v11) |
-			    HE_PREP(DATA4_TB_SPTL_REUSE3, SR2_MASK, v11) |
-			    HE_PREP(DATA4_TB_SPTL_REUSE4, SR3_MASK, v11);
+		he->data4 |= HE_PREP(DATA4_TB_SPTL_REUSE1, SR_MASK, v11) |
+			     HE_PREP(DATA4_TB_SPTL_REUSE2, SR1_MASK, v11) |
+			     HE_PREP(DATA4_TB_SPTL_REUSE3, SR2_MASK, v11) |
+			     HE_PREP(DATA4_TB_SPTL_REUSE4, SR3_MASK, v11);
 
 		mt7915_mac_decode_he_radiotap_ru(status, rxv, he);
 		break;
-- 
cgit v1.2.3-59-g8ed1b


From 238f5d6fc0285053a1684cbb676b9f507080633d Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Wed, 20 May 2020 08:04:47 +0200
Subject: mt76: fix per-driver wcid range checks after wcid array size bump

All drivers before MT7915 have a limit of 128 WCID entries. Stop relying
on ARRAY_SIZE(dev->mt76.wcid), since it no longer reflects that limit.

Fixes: 49e649c3e0a6 ("mt76: adjust wcid size to support new 802.11ax generation")
Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7603/mac.c   | 4 ++--
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c   | 4 ++--
 drivers/net/wireless/mediatek/mt76/mt76x02.h      | 3 ++-
 drivers/net/wireless/mediatek/mt76/mt76x02_mac.c  | 2 +-
 drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c | 2 +-
 drivers/net/wireless/mediatek/mt76/mt76x02_util.c | 2 +-
 6 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
index f8c0c957ca01..0f205ffe4905 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
@@ -473,7 +473,7 @@ mt7603_rx_get_wcid(struct mt7603_dev *dev, u8 idx, bool unicast)
 	struct mt7603_sta *sta;
 	struct mt76_wcid *wcid;
 
-	if (idx >= ARRAY_SIZE(dev->mt76.wcid))
+	if (idx >= MT7603_WTBL_SIZE)
 		return NULL;
 
 	wcid = rcu_dereference(dev->mt76.wcid[idx]);
@@ -1238,7 +1238,7 @@ void mt7603_mac_add_txs(struct mt7603_dev *dev, void *data)
 	if (pid == MT_PACKET_ID_NO_ACK)
 		return;
 
-	if (wcidx >= ARRAY_SIZE(dev->mt76.wcid))
+	if (wcidx >= MT7603_WTBL_SIZE)
 		return;
 
 	rcu_read_lock();
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index 6b5c38ab9f5d..f1009c92ec1b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -61,7 +61,7 @@ static struct mt76_wcid *mt7615_rx_get_wcid(struct mt7615_dev *dev,
 	struct mt7615_sta *sta;
 	struct mt76_wcid *wcid;
 
-	if (idx >= ARRAY_SIZE(dev->mt76.wcid))
+	if (idx >= MT7615_WTBL_SIZE)
 		return NULL;
 
 	wcid = rcu_dereference(dev->mt76.wcid[idx]);
@@ -1303,7 +1303,7 @@ static void mt7615_mac_add_txs(struct mt7615_dev *dev, void *data)
 	if (pid == MT_PACKET_ID_NO_ACK)
 		return;
 
-	if (wcidx >= ARRAY_SIZE(dev->mt76.wcid))
+	if (wcidx >= MT7615_WTBL_SIZE)
 		return;
 
 	rcu_read_lock();
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02.h b/drivers/net/wireless/mediatek/mt76/mt76x02.h
index 6ea210bd3f07..4c9bbc7ce023 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02.h
@@ -15,6 +15,7 @@
 #include "mt76x02_dfs.h"
 #include "mt76x02_dma.h"
 
+#define MT76x02_N_WCIDS 128
 #define MT_CALIBRATE_INTERVAL	HZ
 #define MT_MAC_WORK_INTERVAL	(HZ / 10)
 
@@ -246,7 +247,7 @@ mt76x02_rx_get_sta(struct mt76_dev *dev, u8 idx)
 {
 	struct mt76_wcid *wcid;
 
-	if (idx >= ARRAY_SIZE(dev->wcid))
+	if (idx >= MT76x02_N_WCIDS)
 		return NULL;
 
 	wcid = rcu_dereference(dev->wcid[idx]);
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c
index a5a3bcd30d6f..e4e03beabe43 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mac.c
@@ -561,7 +561,7 @@ void mt76x02_send_tx_status(struct mt76x02_dev *dev,
 
 	rcu_read_lock();
 
-	if (stat->wcid < ARRAY_SIZE(dev->mt76.wcid))
+	if (stat->wcid < MT76x02_N_WCIDS)
 		wcid = rcu_dereference(dev->mt76.wcid[stat->wcid]);
 
 	if (wcid && wcid->sta) {
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c
index 7e389dbccfeb..18adedfbbb8e 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c
@@ -415,7 +415,7 @@ static void mt76x02_reset_state(struct mt76x02_dev *dev)
 	ieee80211_iter_keys_rcu(dev->mt76.hw, NULL, mt76x02_key_sync, NULL);
 	rcu_read_unlock();
 
-	for (i = 0; i < ARRAY_SIZE(dev->mt76.wcid); i++) {
+	for (i = 0; i < MT76x02_N_WCIDS; i++) {
 		struct ieee80211_sta *sta;
 		struct ieee80211_vif *vif;
 		struct mt76x02_sta *msta;
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c
index 9a2c9afa2fb5..44822a849eb1 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_util.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_util.c
@@ -249,7 +249,7 @@ int mt76x02_sta_add(struct mt76_dev *mdev, struct ieee80211_vif *vif,
 
 	memset(msta, 0, sizeof(*msta));
 
-	idx = mt76_wcid_alloc(dev->mt76.wcid_mask, ARRAY_SIZE(dev->mt76.wcid));
+	idx = mt76_wcid_alloc(dev->mt76.wcid_mask, MT76x02_N_WCIDS);
 	if (idx < 0)
 		return -ENOSPC;
 
-- 
cgit v1.2.3-59-g8ed1b


From b62db09aa81c148328843afac9fcbaefdaba6913 Mon Sep 17 00:00:00 2001
From: Ryder Lee <ryder.lee@mediatek.com>
Date: Tue, 19 May 2020 02:07:38 +0800
Subject: mt76: mt7915: fix some sparse warnings

drivers/net/wireless/mediatek/mt76/mt7915/main.c:694:1: sparse:
sparse: context imbalance in 'mt7915_sta_rc_update' - wrong count at exit
drivers/net/wireless/mediatek/mt76/mt7915/mac.c:303:43: sparse: sparse: cast to restricted __le32
drivers/net/wireless/mediatek/mt76/mt7915/mac.c:304:43: sparse: sparse: cast to restricted __le32
drivers/net/wireless/mediatek/mt76/mt7915/mac.c:305:43: sparse: sparse: cast to restricted __le32
drivers/net/wireless/mediatek/mt76/mt7915/mac.c:319:35: sparse: sparse: cast to restricted __le32
drivers/net/wireless/mediatek/mt76/mt7915/mac.c:327:35: sparse: sparse: cast to restricted __le32
drivers/net/wireless/mediatek/mt76/mt7915/mac.c:345:41: sparse: sparse: cast to restricted __le32
drivers/net/wireless/mediatek/mt76/mt7915/mac.c:355:33: sparse: sparse: cast to restricted __le32
drivers/net/wireless/mediatek/mt76/mt7915/mac.c:451:21: sparse: sparse: invalid assignment: |=
drivers/net/wireless/mediatek/mt76/mt7915/mac.c:451:21: sparse:    left side has type unsigned int
drivers/net/wireless/mediatek/mt76/mt7915/mac.c:451:21: sparse:    right side has type restricted __le32

Fixes: e57b7901469f ("mt76: add mac80211 driver for MT7915 PCIe-based chipsets")
Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/mac.c  | 32 ++++++++++++++----------
 drivers/net/wireless/mediatek/mt76/mt7915/main.c |  2 +-
 2 files changed, 20 insertions(+), 14 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
index bf96b389c813..ab20dfde94af 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
@@ -425,20 +425,26 @@ int mt7915_mac_fill_rx(struct mt7915_dev *dev, struct sk_buff *skb)
 
 	/* RXD Group 3 - P-RXV */
 	if (rxd1 & MT_RXD1_NORMAL_GROUP_3) {
+		u32 v0, v1, v2;
+
 		memcpy(rxv.v, rxd, sizeof(rxv.v));
 
 		rxd += 2;
 		if ((u8 *)rxd - skb->data >= skb->len)
 			return -EINVAL;
 
-		if (rxv.v[0] & MT_PRXV_HT_AD_CODE)
+		v0 = le32_to_cpu(rxv.v[0]);
+		v1 = le32_to_cpu(rxv.v[1]);
+		v2 = le32_to_cpu(rxv.v[2]);
+
+		if (v0 & MT_PRXV_HT_AD_CODE)
 			status->enc_flags |= RX_ENC_FLAG_LDPC;
 
 		status->chains = mphy->antenna_mask;
-		status->chain_signal[0] = to_rssi(MT_PRXV_RCPI0, rxv.v[1]);
-		status->chain_signal[1] = to_rssi(MT_PRXV_RCPI1, rxv.v[1]);
-		status->chain_signal[2] = to_rssi(MT_PRXV_RCPI2, rxv.v[1]);
-		status->chain_signal[3] = to_rssi(MT_PRXV_RCPI3, rxv.v[1]);
+		status->chain_signal[0] = to_rssi(MT_PRXV_RCPI0, v1);
+		status->chain_signal[1] = to_rssi(MT_PRXV_RCPI1, v1);
+		status->chain_signal[2] = to_rssi(MT_PRXV_RCPI2, v1);
+		status->chain_signal[3] = to_rssi(MT_PRXV_RCPI3, v1);
 		status->signal = status->chain_signal[0];
 
 		for (i = 1; i < hweight8(mphy->antenna_mask); i++) {
@@ -451,16 +457,16 @@ int mt7915_mac_fill_rx(struct mt7915_dev *dev, struct sk_buff *skb)
 
 		/* RXD Group 5 - C-RXV */
 		if (rxd1 & MT_RXD1_NORMAL_GROUP_5) {
-			u8 stbc = FIELD_GET(MT_CRXV_HT_STBC, rxv.v[2]);
-			u8 gi = FIELD_GET(MT_CRXV_HT_SHORT_GI, rxv.v[2]);
+			u8 stbc = FIELD_GET(MT_CRXV_HT_STBC, v2);
+			u8 gi = FIELD_GET(MT_CRXV_HT_SHORT_GI, v2);
 			bool cck = false;
 
 			rxd += 18;
 			if ((u8 *)rxd - skb->data >= skb->len)
 				return -EINVAL;
 
-			idx = i = FIELD_GET(MT_PRXV_TX_RATE, rxv.v[0]);
-			rxv.phy = FIELD_GET(MT_CRXV_TX_MODE, rxv.v[2]);
+			idx = i = FIELD_GET(MT_PRXV_TX_RATE, v0);
+			rxv.phy = FIELD_GET(MT_CRXV_TX_MODE, v2);
 
 			switch (rxv.phy) {
 			case MT_PHY_TYPE_CCK:
@@ -477,7 +483,7 @@ int mt7915_mac_fill_rx(struct mt7915_dev *dev, struct sk_buff *skb)
 				break;
 			case MT_PHY_TYPE_VHT:
 				status->nss =
-					FIELD_GET(MT_PRXV_NSTS, rxv.v[0]) + 1;
+					FIELD_GET(MT_PRXV_NSTS, v0) + 1;
 				status->encoding = RX_ENC_VHT;
 				if (i > 9)
 					return -EINVAL;
@@ -489,7 +495,7 @@ int mt7915_mac_fill_rx(struct mt7915_dev *dev, struct sk_buff *skb)
 			case MT_PHY_TYPE_HE_EXT_SU:
 			case MT_PHY_TYPE_HE_TB:
 				status->nss =
-					FIELD_GET(MT_PRXV_NSTS, rxv.v[0]) + 1;
+					FIELD_GET(MT_PRXV_NSTS, v0) + 1;
 				status->encoding = RX_ENC_HE;
 				status->flag |= RX_FLAG_RADIOTAP_HE;
 				i &= GENMASK(3, 0);
@@ -505,7 +511,7 @@ int mt7915_mac_fill_rx(struct mt7915_dev *dev, struct sk_buff *skb)
 			}
 			status->rate_idx = i;
 
-			switch (FIELD_GET(MT_CRXV_FRAME_MODE, rxv.v[2])) {
+			switch (FIELD_GET(MT_CRXV_FRAME_MODE, v2)) {
 			case IEEE80211_STA_RX_BW_20:
 				break;
 			case IEEE80211_STA_RX_BW_40:
@@ -611,7 +617,7 @@ void mt7915_mac_write_txwi(struct mt7915_dev *dev, __le32 *txwi,
 			 skb->priority & IEEE80211_QOS_CTL_TID_MASK) |
 	      FIELD_PREP(MT_TXD1_OWN_MAC, omac_idx);
 	if (ext_phy && q_idx >= MT_LMAC_ALTX0 && q_idx <= MT_LMAC_BCN0)
-		val |= cpu_to_le32(MT_TXD1_TGID);
+		val |= MT_TXD1_TGID;
 
 	txwi[1] = cpu_to_le32(val);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/main.c b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
index e045dc234100..0575c259f245 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/main.c
@@ -795,7 +795,7 @@ mt7915_sta_rc_update(struct ieee80211_hw *hw,
 		rcu_read_unlock();
 		return;
 	}
-	rcu_read_lock();
+	rcu_read_unlock();
 
 	set_bit(changed, &msta->stats.changed);
 	ieee80211_queue_work(hw, &msta->stats_work);
-- 
cgit v1.2.3-59-g8ed1b


From a5e0aa78f5c44e30562a33662eeb4a594a920ce9 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Tue, 19 May 2020 10:42:11 +0200
Subject: mt76: mt7615: switch to per-vif power_save support

switch to per-vif ps support since mt7615 offload firmware can handle it
properly. This patch allows enabling/disabling power-save support on p2p
interface

Tested-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt76.h          |  1 -
 drivers/net/wireless/mediatek/mt76/mt7615/init.c   |  2 --
 drivers/net/wireless/mediatek/mt76/mt7615/main.c   | 26 +++-------------------
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c    | 14 ++++++------
 drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h |  5 +----
 drivers/net/wireless/mediatek/mt76/mt7615/usb.c    |  1 -
 6 files changed, 11 insertions(+), 38 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index e2926e091c0f..5c9195f59ae1 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -288,7 +288,6 @@ enum {
 	MT76_REMOVED,
 	MT76_READING_STATS,
 	MT76_STATE_POWER_OFF,
-	MT76_STATE_PS,
 	MT76_STATE_SUSPEND,
 	MT76_STATE_ROC,
 };
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/init.c b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
index 0d105e4abdfd..e2d80518e5af 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/init.c
@@ -388,7 +388,6 @@ int mt7615_register_ext_phy(struct mt7615_dev *dev)
 	INIT_DELAYED_WORK(&phy->scan_work, mt7615_scan_work);
 	skb_queue_head_init(&phy->scan_event_list);
 
-	INIT_WORK(&phy->ps_work, mt7615_ps_work);
 	INIT_WORK(&phy->roc_work, mt7615_roc_work);
 	timer_setup(&phy->roc_timer, mt7615_roc_timer, 0);
 	init_waitqueue_head(&phy->roc_wait);
@@ -447,7 +446,6 @@ void mt7615_init_device(struct mt7615_dev *dev)
 	init_waitqueue_head(&dev->phy.roc_wait);
 
 	INIT_WORK(&dev->reset_work, mt7615_mac_reset_work);
-	INIT_WORK(&dev->phy.ps_work, mt7615_ps_work);
 	INIT_WORK(&dev->phy.roc_work, mt7615_roc_work);
 	timer_setup(&dev->phy.roc_timer, mt7615_roc_timer, 0);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
index f8cbee1770ce..320dfda6b4e5 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
@@ -71,7 +71,6 @@ static void mt7615_stop(struct ieee80211_hw *hw)
 	struct mt7615_phy *phy = mt7615_hw_phy(hw);
 
 	cancel_delayed_work_sync(&phy->mac_work);
-	cancel_work_sync(&phy->ps_work);
 	del_timer_sync(&phy->roc_timer);
 	cancel_work_sync(&phy->roc_work);
 
@@ -362,20 +361,6 @@ static int mt7615_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 	return mt7615_mac_wtbl_set_key(dev, wcid, key, cmd);
 }
 
-void mt7615_ps_work(struct work_struct *work)
-{
-	struct mt7615_phy *phy;
-
-	phy = (struct mt7615_phy *)container_of(work, struct mt7615_phy,
-						ps_work);
-
-	mutex_lock(&phy->dev->mt76.mutex);
-	ieee80211_iterate_active_interfaces(phy->mt76->hw,
-					    IEEE80211_IFACE_ITER_RESUME_ALL,
-					    m7615_mcu_set_ps_iter, phy);
-	mutex_unlock(&phy->dev->mt76.mutex);
-}
-
 static int mt7615_config(struct ieee80211_hw *hw, u32 changed)
 {
 	struct mt7615_dev *dev = mt7615_hw_dev(hw);
@@ -401,14 +386,6 @@ static int mt7615_config(struct ieee80211_hw *hw, u32 changed)
 		mt76_wr(dev, MT_WF_RFCR(band), phy->rxfilter);
 	}
 
-	if (changed & IEEE80211_CONF_CHANGE_PS) {
-		if (hw->conf.flags & IEEE80211_CONF_PS)
-			set_bit(MT76_STATE_PS, &phy->mt76->state);
-		else
-			clear_bit(MT76_STATE_PS, &phy->mt76->state);
-		ieee80211_queue_work(hw, &phy->ps_work);
-	}
-
 	mutex_unlock(&dev->mt76.mutex);
 
 	return ret;
@@ -511,6 +488,9 @@ static void mt7615_bss_info_changed(struct ieee80211_hw *hw,
 		       BSS_CHANGED_BEACON_ENABLED))
 		mt7615_mcu_add_beacon(dev, hw, vif, info->enable_beacon);
 
+	if (changed & BSS_CHANGED_PS)
+		mt7615_mcu_set_vif_ps(dev, vif);
+
 	mutex_unlock(&dev->mt76.mutex);
 }
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 7eb99bde3394..14c2b5d7dbbd 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -2772,11 +2772,9 @@ int mt7615_mcu_set_sku_en(struct mt7615_phy *phy, bool enable)
 				   sizeof(req), true);
 }
 
-void m7615_mcu_set_ps_iter(void *priv, u8 *mac, struct ieee80211_vif *vif)
+int mt7615_mcu_set_vif_ps(struct mt7615_dev *dev, struct ieee80211_vif *vif)
 {
 	struct mt7615_vif *mvif = (struct mt7615_vif *)vif->drv_priv;
-	struct mt7615_phy *phy = priv;
-	struct mt76_phy *mphy = phy->mt76;
 	struct {
 		u8 bss_idx;
 		u8 ps_state; /* 0: device awake
@@ -2785,12 +2783,14 @@ void m7615_mcu_set_ps_iter(void *priv, u8 *mac, struct ieee80211_vif *vif)
 			      */
 	} req = {
 		.bss_idx = mvif->idx,
-		.ps_state = test_bit(MT76_STATE_PS, &mphy->state) ? 2 : 0,
+		.ps_state = vif->bss_conf.ps ? 2 : 0,
 	};
 
-	if (vif->type == NL80211_IFTYPE_STATION)
-		__mt76_mcu_send_msg(&phy->dev->mt76,  MCU_CMD_SET_PS_PROFILE,
-				    &req, sizeof(req), false);
+	if (vif->type != NL80211_IFTYPE_STATION)
+		return -ENOTSUPP;
+
+	return __mt76_mcu_send_msg(&dev->mt76,  MCU_CMD_SET_PS_PROFILE,
+				   &req, sizeof(req), false);
 }
 
 int mt7615_mcu_set_channel_domain(struct mt7615_phy *phy)
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index 71d5d5973116..170d3c2bbbb4 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -205,8 +205,6 @@ struct mt7615_phy {
 	struct timer_list roc_timer;
 	wait_queue_head_t roc_wait;
 	bool roc_grant;
-
-	struct work_struct ps_work;
 };
 
 #define mt7615_mcu_add_tx_ba(dev, ...)	(dev)->mcu_ops->add_tx_ba((dev), __VA_ARGS__)
@@ -448,7 +446,6 @@ void mt7615_dma_reset(struct mt7615_dev *dev);
 void mt7615_scan_work(struct work_struct *work);
 void mt7615_roc_work(struct work_struct *work);
 void mt7615_roc_timer(struct timer_list *timer);
-void mt7615_ps_work(struct work_struct *work);
 void mt7615_init_txpower(struct mt7615_dev *dev,
 			 struct ieee80211_supported_band *sband);
 void mt7615_phy_init(struct mt7615_dev *dev);
@@ -534,7 +531,7 @@ int mt7615_mcu_set_radar_th(struct mt7615_dev *dev, int index,
 int mt7615_mcu_set_sku_en(struct mt7615_phy *phy, bool enable);
 int mt7615_mcu_apply_rx_dcoc(struct mt7615_phy *phy);
 int mt7615_mcu_apply_tx_dpd(struct mt7615_phy *phy);
-void m7615_mcu_set_ps_iter(void *priv, u8 *mac, struct ieee80211_vif *vif);
+int mt7615_mcu_set_vif_ps(struct mt7615_dev *dev, struct ieee80211_vif *vif);
 int mt7615_dfs_init_radar_detector(struct mt7615_phy *phy);
 
 int mt7615_mcu_set_p2p_oppps(struct ieee80211_hw *hw,
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/usb.c b/drivers/net/wireless/mediatek/mt76/mt7615/usb.c
index c292b41c76e3..a50077eb24d7 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/usb.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/usb.c
@@ -51,7 +51,6 @@ static void mt7663u_stop(struct ieee80211_hw *hw)
 	struct mt7615_dev *dev = hw->priv;
 
 	clear_bit(MT76_STATE_RUNNING, &dev->mphy.state);
-	cancel_work_sync(&phy->ps_work);
 	del_timer_sync(&phy->roc_timer);
 	cancel_work_sync(&phy->roc_work);
 	cancel_delayed_work_sync(&phy->scan_work);
-- 
cgit v1.2.3-59-g8ed1b


From 6f4bd8528c36e9abecddf698600696c7a5578f2e Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Tue, 19 May 2020 23:48:20 +0100
Subject: mt76: mt7915: fix a handful of spelling mistakes

There are some spelling mistakes in some literal strings. Fix these.

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c b/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
index ee0066fedd04..5278bee812f1 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/debugfs.c
@@ -173,14 +173,14 @@ mt7915_txbf_stat_read_phy(struct mt7915_phy *phy, struct seq_file *s)
 
 	/* Tx Beamformee Rx NDPA & Tx feedback report */
 	cnt = mt76_rr(dev, MT_ETBF_TX_NDP_BFRP(ext_phy));
-	seq_printf(s, "Tx Beamformee sucessful feedback frames: %ld\n",
+	seq_printf(s, "Tx Beamformee successful feedback frames: %ld\n",
 		   FIELD_GET(MT_ETBF_TX_FB_CPL, cnt));
-	seq_printf(s, "Tx Beamformee feedback triggerd counts: %ld\n",
+	seq_printf(s, "Tx Beamformee feedback triggered counts: %ld\n",
 		   FIELD_GET(MT_ETBF_TX_FB_TRI, cnt));
 
 	/* Tx SU counters */
 	cnt = mt76_rr(dev, MT_MIB_DR11(ext_phy));
-	seq_printf(s, "Tx single-user sucessful MPDU counts: %d\n", cnt);
+	seq_printf(s, "Tx single-user successful MPDU counts: %d\n", cnt);
 
 	seq_puts(s, "\n");
 }
-- 
cgit v1.2.3-59-g8ed1b


From eca026555f01e16011eeb6a7f63ceabd8da4f4a8 Mon Sep 17 00:00:00 2001
From: Sean Wang <sean.wang@mediatek.com>
Date: Fri, 22 May 2020 09:10:24 +0200
Subject: mt76: mt7615: fix hw_scan with ssid_type for specified SSID only

Fix hw_scan with ssid_type for specified SSID only

The definition for ssid_type in current firmware is that
ssid_type BIT(2) set actually for specified SSID + wildcard SSID.
ssid_type BIT(2) and ssid_type_ext BIT(0) both set actually for
specified SSID only;

Signed-off-by: Sean Wang <sean.wang@mediatek.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.c | 1 +
 drivers/net/wireless/mediatek/mt76/mt7615/mcu.h | 6 ++++--
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
index 14c2b5d7dbbd..6e869b8c5e26 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.c
@@ -2898,6 +2898,7 @@ int mt7615_mcu_hw_scan(struct mt7615_phy *phy, struct ieee80211_vif *vif,
 		n_ssids++;
 	}
 	req->ssid_type = n_ssids ? BIT(2) : BIT(0);
+	req->ssid_type_ext = n_ssids ? BIT(0) : 0;
 	req->ssids_num = n_ssids;
 
 	/* increase channel time for passive scan */
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
index fd40d99f5a23..2314d0b23af1 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mcu.h
@@ -327,7 +327,8 @@ struct mt7615_hw_scan_req {
 		       */
 	u8 ssid_type; /* BIT(0) wildcard SSID
 		       * BIT(1) P2P wildcard SSID
-		       * BIT(2) specified SSID
+		       * BIT(2) specified SSID + wildcard SSID
+		       * BIT(2) + ssid_type_ext BIT(0) specified SSID only
 		       */
 	u8 ssids_num;
 	u8 probe_req_num; /* Number of probe request for each SSID */
@@ -362,7 +363,8 @@ struct mt7615_hw_scan_req {
 	struct mt7615_mcu_scan_ssid ext_ssids[6];
 	u8 bssid[ETH_ALEN];
 	u8 random_mac[ETH_ALEN]; /* valid when BIT(1) in scan_func is set. */
-	u8 pad[64];
+	u8 pad[63];
+	u8 ssid_type_ext;
 } __packed;
 
 #define SCAN_DONE_EVENT_MAX_CHANNEL_NUM	64
-- 
cgit v1.2.3-59-g8ed1b


From ec2bb3a570ec5bfafb71113b3617929434de5ff0 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Fri, 22 May 2020 09:26:06 +0200
Subject: mt76: mt7915: fix possible NULL pointer dereference in
 mt7915_register_ext_phy

Fix a NULL pointer dereference in mt7915_register_ext_phy since phy
data structure is allocated by mt76_alloc_phy routine

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/init.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/init.c b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
index 6f200ab3ac28..aadf56e80bae 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/init.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/init.c
@@ -592,7 +592,6 @@ int mt7915_register_ext_phy(struct mt7915_dev *dev)
 	if (phy)
 		return 0;
 
-	INIT_DELAYED_WORK(&phy->mac_work, mt7915_mac_work);
 	mt7915_cap_dbdc_enable(dev);
 	mphy = mt76_alloc_phy(&dev->mt76, sizeof(*phy), &mt7915_ops);
 	if (!mphy)
@@ -605,6 +604,8 @@ int mt7915_register_ext_phy(struct mt7915_dev *dev)
 	mphy->antenna_mask = BIT(hweight8(phy->chainmask)) - 1;
 	mt7915_init_wiphy(mphy->hw);
 
+	INIT_DELAYED_WORK(&phy->mac_work, mt7915_mac_work);
+
 	/*
 	 * Make the secondary PHY MAC address local without overlapping with
 	 * the usual MAC address allocation scheme on multiple virtual interfaces
-- 
cgit v1.2.3-59-g8ed1b


From 5e616ad216ef560b2a856c858137c772351eee9f Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Sat, 23 May 2020 14:40:57 +0200
Subject: mt76: fix wcid allocation issues

mt76 core uses ffs() to find the next free bit. This works well for 32 bit
architectures where BITS_PER_LONG is 32. ffs only checks 32 bit values, so
allocation fails on 64 bit architectures.
Additionally, the wcid mask array was too small in cases where the array
was not a multiple of BITS_PER_LONG.
Fix this by making the wcid mask array u32 instead and use DIV_ROUND_UP
for the size, just in case we ever bump it to a value that's not a multiple
of 32.

Reported-by: Ryder Lee <ryder.lee@mediatek.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt76.h |  4 ++--
 drivers/net/wireless/mediatek/mt76/util.c | 12 ++++++------
 drivers/net/wireless/mediatek/mt76/util.h | 14 +++++++-------
 3 files changed, 15 insertions(+), 15 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index 5c9195f59ae1..afb1ccf61b74 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -537,8 +537,8 @@ struct mt76_dev {
 	wait_queue_head_t tx_wait;
 	struct sk_buff_head status_list;
 
-	unsigned long wcid_mask[MT76_N_WCIDS / BITS_PER_LONG];
-	unsigned long wcid_phy_mask[MT76_N_WCIDS / BITS_PER_LONG];
+	u32 wcid_mask[DIV_ROUND_UP(MT76_N_WCIDS, 32)];
+	u32 wcid_phy_mask[DIV_ROUND_UP(MT76_N_WCIDS, 32)];
 
 	struct mt76_wcid global_wcid;
 	struct mt76_wcid __rcu *wcid[MT76_N_WCIDS];
diff --git a/drivers/net/wireless/mediatek/mt76/util.c b/drivers/net/wireless/mediatek/mt76/util.c
index 07cf71242d9e..ecde87465bf6 100644
--- a/drivers/net/wireless/mediatek/mt76/util.c
+++ b/drivers/net/wireless/mediatek/mt76/util.c
@@ -42,17 +42,17 @@ bool __mt76_poll_msec(struct mt76_dev *dev, u32 offset, u32 mask, u32 val,
 }
 EXPORT_SYMBOL_GPL(__mt76_poll_msec);
 
-int mt76_wcid_alloc(unsigned long *mask, int size)
+int mt76_wcid_alloc(u32 *mask, int size)
 {
 	int i, idx = 0, cur;
 
-	for (i = 0; i < DIV_ROUND_UP(size, BITS_PER_LONG); i++) {
+	for (i = 0; i < DIV_ROUND_UP(size, 32); i++) {
 		idx = ffs(~mask[i]);
 		if (!idx)
 			continue;
 
 		idx--;
-		cur = i * BITS_PER_LONG + idx;
+		cur = i * 32 + idx;
 		if (cur >= size)
 			break;
 
@@ -74,13 +74,13 @@ int mt76_get_min_avg_rssi(struct mt76_dev *dev, bool ext_phy)
 	rcu_read_lock();
 
 	for (i = 0; i < ARRAY_SIZE(dev->wcid_mask); i++) {
-		unsigned long mask = dev->wcid_mask[i];
-		unsigned long phy_mask = dev->wcid_phy_mask[i];
+		u32 mask = dev->wcid_mask[i];
+		u32 phy_mask = dev->wcid_phy_mask[i];
 
 		if (!mask)
 			continue;
 
-		for (j = i * BITS_PER_LONG; mask; j++, mask >>= 1, phy_mask >>= 1) {
+		for (j = i * 32; mask; j++, mask >>= 1, phy_mask >>= 1) {
 			if (!(mask & 1))
 				continue;
 
diff --git a/drivers/net/wireless/mediatek/mt76/util.h b/drivers/net/wireless/mediatek/mt76/util.h
index 48a71e7479e5..fd1a68820e0a 100644
--- a/drivers/net/wireless/mediatek/mt76/util.h
+++ b/drivers/net/wireless/mediatek/mt76/util.h
@@ -14,24 +14,24 @@
 #define MT76_INCR(_var, _size) \
 	(_var = (((_var) + 1) % (_size)))
 
-int mt76_wcid_alloc(unsigned long *mask, int size);
+int mt76_wcid_alloc(u32 *mask, int size);
 
 static inline bool
-mt76_wcid_mask_test(unsigned long *mask, int idx)
+mt76_wcid_mask_test(u32 *mask, int idx)
 {
-	return mask[idx / BITS_PER_LONG] & BIT(idx % BITS_PER_LONG);
+	return mask[idx / 32] & BIT(idx % 32);
 }
 
 static inline void
-mt76_wcid_mask_set(unsigned long *mask, int idx)
+mt76_wcid_mask_set(u32 *mask, int idx)
 {
-	mask[idx / BITS_PER_LONG] |= BIT(idx % BITS_PER_LONG);
+	mask[idx / 32] |= BIT(idx % 32);
 }
 
 static inline void
-mt76_wcid_mask_clear(unsigned long *mask, int idx)
+mt76_wcid_mask_clear(u32 *mask, int idx)
 {
-	mask[idx / BITS_PER_LONG] &= ~BIT(idx % BITS_PER_LONG);
+	mask[idx / 32] &= ~BIT(idx % 32);
 }
 
 static inline void
-- 
cgit v1.2.3-59-g8ed1b


From e47f2245375feef8f72ff119a939865fe5e830fd Mon Sep 17 00:00:00 2001
From: DENG Qingfang <dqfext@gmail.com>
Date: Sun, 24 May 2020 11:41:10 +0800
Subject: mt76: mt7615: add support for MT7611N

MT7611N is basically the same as MT7615N, except it only supports 5GHz
It is used by some TP-Link and Mercury wireless routers

Signed-off-by: DENG Qingfang <dqfext@gmail.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c | 7 +++++++
 drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h | 7 ++++++-
 drivers/net/wireless/mediatek/mt76/mt7615/pci.c    | 1 +
 3 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c
index 6a5ae047c63b..edac37e7847b 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/eeprom.c
@@ -111,6 +111,12 @@ mt7615_eeprom_parse_hw_band_cap(struct mt7615_dev *dev)
 		return;
 	}
 
+	if (is_mt7611(&dev->mt76)) {
+		/* 5GHz only */
+		dev->mt76.cap.has_5ghz = true;
+		return;
+	}
+
 	val = FIELD_GET(MT_EE_NIC_WIFI_CONF_BAND_SEL,
 			eeprom[MT_EE_WIFI_CONF]);
 	switch (val) {
@@ -310,6 +316,7 @@ static void mt7615_cal_free_data(struct mt7615_dev *dev)
 		mt7622_apply_cal_free_data(dev);
 		break;
 	case 0x7615:
+	case 0x7611:
 		mt7615_apply_cal_free_data(dev);
 		break;
 	}
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
index 170d3c2bbbb4..d6176d316bee 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mt7615.h
@@ -414,7 +414,7 @@ static inline bool is_mt7622(struct mt76_dev *dev)
 
 static inline bool is_mt7615(struct mt76_dev *dev)
 {
-	return mt76_chip(dev) == 0x7615;
+	return mt76_chip(dev) == 0x7615 || mt76_chip(dev) == 0x7611;
 }
 
 static inline bool is_mt7663(struct mt76_dev *dev)
@@ -422,6 +422,11 @@ static inline bool is_mt7663(struct mt76_dev *dev)
 	return mt76_chip(dev) == 0x7663;
 }
 
+static inline bool is_mt7611(struct mt76_dev *dev)
+{
+	return mt76_chip(dev) == 0x7611;
+}
+
 static inline void mt7615_irq_enable(struct mt7615_dev *dev, u32 mask)
 {
 	mt76_set_irq_mask(&dev->mt76, 0, 0, mask);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/pci.c b/drivers/net/wireless/mediatek/mt76/mt7615/pci.c
index 88ff14564521..b09d08d0dac9 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/pci.c
@@ -14,6 +14,7 @@
 static const struct pci_device_id mt7615_pci_device_table[] = {
 	{ PCI_DEVICE(0x14c3, 0x7615) },
 	{ PCI_DEVICE(0x14c3, 0x7663) },
+	{ PCI_DEVICE(0x14c3, 0x7611) },
 	{ },
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From f473b42ac516befcb3ba6b0a5ef16f865f7579c9 Mon Sep 17 00:00:00 2001
From: Felix Fietkau <nbd@nbd.name>
Date: Sun, 24 May 2020 14:44:52 +0200
Subject: mt76: only iterate over initialized rx queues

Fixes the following reported crash:

[    2.361127] BUG: spinlock bad magic on CPU#0, modprobe/456
[    2.361583]  lock: 0xffffa1287525b3b8, .magic: 00000000, .owner: <none>/-1, .owner_cpu: 0
[    2.362250] CPU: 0 PID: 456 Comm: modprobe Not tainted 4.14.177 #5
[    2.362751] Hardware name: HP Meep/Meep, BIOS Google_Meep.11297.75.0 06/17/2019
[    2.363343] Call Trace:
[    2.363552]  dump_stack+0x97/0xdb
[    2.363826]  ? spin_bug+0xa6/0xb3
[    2.364096]  do_raw_spin_lock+0x6a/0x9a
[    2.364417]  mt76_dma_rx_fill+0x44/0x1de [mt76]
[    2.364787]  ? mt76_dma_kick_queue+0x18/0x18 [mt76]
[    2.365184]  mt76_dma_init+0x53/0x85 [mt76]
[    2.365532]  mt7615_dma_init+0x3d7/0x546 [mt7615e]
[    2.365928]  mt7615_register_device+0xe6/0x1a0 [mt7615e]
[    2.366364]  mt7615_mmio_probe+0x14b/0x171 [mt7615e]
[    2.366771]  mt7615_pci_probe+0x118/0x13b [mt7615e]
[    2.367169]  pci_device_probe+0xaf/0x13d
[    2.367491]  driver_probe_device+0x284/0x2ca
[    2.367840]  __driver_attach+0x7a/0x9e
[    2.368146]  ? driver_attach+0x1f/0x1f
[    2.368451]  bus_for_each_dev+0xa0/0xdb
[    2.368765]  bus_add_driver+0x132/0x204
[    2.369078]  driver_register+0x8e/0xcd
[    2.369384]  do_one_initcall+0x160/0x257
[    2.369706]  ? 0xffffffffc0240000
[    2.369980]  do_init_module+0x60/0x1bb
[    2.370286]  load_module+0x18c2/0x1a2b
[    2.370596]  ? kernel_read_file+0x141/0x1b9
[    2.370937]  ? kernel_read_file_from_fd+0x46/0x71
[    2.371320]  SyS_finit_module+0xcc/0xf0
[    2.371636]  do_syscall_64+0x6b/0xf7
[    2.371930]  entry_SYSCALL_64_after_hwframe+0x3d/0xa2
[    2.372344] RIP: 0033:0x7da218ae4199
[    2.372637] RSP: 002b:00007fffd0608398 EFLAGS: 00000246 ORIG_RAX: 0000000000000139
[    2.373252] RAX: ffffffffffffffda RBX: 00005a705449df90 RCX: 00007da218ae4199
[    2.373833] RDX: 0000000000000000 RSI: 00005a7052e73bd8 RDI: 0000000000000006
[    2.374411] RBP: 00007fffd06083e0 R08: 0000000000000000 R09: 00005a705449d540
[    2.374989] R10: 0000000000000006 R11: 0000000000000246 R12: 0000000000000000
[    2.375569] R13: 00005a705449def0 R14: 00005a7052e73bd8 R15: 0000000000000000

Reported-by: Sean Wang <sean.wang@mediatek.com>
Fixes: d3377b78cec6 ("mt76: add HE phy modes and hardware queue")
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/debugfs.c      | 2 +-
 drivers/net/wireless/mediatek/mt76/dma.c          | 4 ++--
 drivers/net/wireless/mediatek/mt76/mt76.h         | 4 ++++
 drivers/net/wireless/mediatek/mt76/mt7603/mac.c   | 3 ++-
 drivers/net/wireless/mediatek/mt76/mt7615/mac.c   | 3 ++-
 drivers/net/wireless/mediatek/mt76/mt7615/pci.c   | 8 +++++---
 drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c | 3 ++-
 drivers/net/wireless/mediatek/mt76/mt7915/mac.c   | 3 ++-
 8 files changed, 20 insertions(+), 10 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/debugfs.c b/drivers/net/wireless/mediatek/mt76/debugfs.c
index 0278e1b44576..3a5de1d1b121 100644
--- a/drivers/net/wireless/mediatek/mt76/debugfs.c
+++ b/drivers/net/wireless/mediatek/mt76/debugfs.c
@@ -51,7 +51,7 @@ static int mt76_rx_queues_read(struct seq_file *s, void *data)
 	struct mt76_dev *dev = dev_get_drvdata(s->private);
 	int i, queued;
 
-	for (i = 0; i < ARRAY_SIZE(dev->q_rx); i++) {
+	mt76_for_each_q_rx(dev, i) {
 		struct mt76_queue *q = &dev->q_rx[i];
 
 		if (!q->ndesc)
diff --git a/drivers/net/wireless/mediatek/mt76/dma.c b/drivers/net/wireless/mediatek/mt76/dma.c
index 75e659774e07..f4d6074fe32a 100644
--- a/drivers/net/wireless/mediatek/mt76/dma.c
+++ b/drivers/net/wireless/mediatek/mt76/dma.c
@@ -576,7 +576,7 @@ mt76_dma_init(struct mt76_dev *dev)
 
 	init_dummy_netdev(&dev->napi_dev);
 
-	for (i = 0; i < ARRAY_SIZE(dev->q_rx); i++) {
+	mt76_for_each_q_rx(dev, i) {
 		netif_napi_add(&dev->napi_dev, &dev->napi[i], mt76_dma_rx_poll,
 			       64);
 		mt76_dma_rx_fill(dev, &dev->q_rx[i]);
@@ -610,7 +610,7 @@ void mt76_dma_cleanup(struct mt76_dev *dev)
 	for (i = 0; i < ARRAY_SIZE(dev->q_tx); i++)
 		mt76_dma_tx_cleanup(dev, i, true);
 
-	for (i = 0; i < ARRAY_SIZE(dev->q_rx); i++) {
+	mt76_for_each_q_rx(dev, i) {
 		netif_napi_del(&dev->napi[i]);
 		mt76_dma_rx_cleanup(dev, &dev->q_rx[i]);
 	}
diff --git a/drivers/net/wireless/mediatek/mt76/mt76.h b/drivers/net/wireless/mediatek/mt76/mt76.h
index afb1ccf61b74..dfe625a53c63 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76.h
+++ b/drivers/net/wireless/mediatek/mt76/mt76.h
@@ -671,6 +671,10 @@ static inline u16 mt76_rev(struct mt76_dev *dev)
 #define mt76_queue_tx_cleanup(dev, ...)	(dev)->mt76.queue_ops->tx_cleanup(&((dev)->mt76), __VA_ARGS__)
 #define mt76_queue_kick(dev, ...)	(dev)->mt76.queue_ops->kick(&((dev)->mt76), __VA_ARGS__)
 
+#define mt76_for_each_q_rx(dev, i)	\
+	for (i = 0; i < ARRAY_SIZE((dev)->q_rx) && \
+		    (dev)->q_rx[i].ndesc; i++)
+
 struct mt76_dev *mt76_alloc_device(struct device *pdev, unsigned int size,
 				   const struct ieee80211_ops *ops,
 				   const struct mt76_driver_ops *drv_ops);
diff --git a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
index 0f205ffe4905..8060c1514396 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7603/mac.c
@@ -1438,8 +1438,9 @@ static void mt7603_mac_watchdog_reset(struct mt7603_dev *dev)
 	for (i = 0; i < __MT_TXQ_MAX; i++)
 		mt76_queue_tx_cleanup(dev, i, true);
 
-	for (i = 0; i < ARRAY_SIZE(dev->mt76.q_rx); i++)
+	mt76_for_each_q_rx(&dev->mt76, i) {
 		mt76_queue_rx_reset(dev, i);
+	}
 
 	mt7603_dma_sched_reset(dev);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
index f1009c92ec1b..9f1c6ca7a665 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/mac.c
@@ -1820,8 +1820,9 @@ void mt7615_dma_reset(struct mt7615_dev *dev)
 	for (i = 0; i < __MT_TXQ_MAX; i++)
 		mt76_queue_tx_cleanup(dev, i, true);
 
-	for (i = 0; i < ARRAY_SIZE(dev->mt76.q_rx); i++)
+	mt76_for_each_q_rx(&dev->mt76, i) {
 		mt76_queue_rx_reset(dev, i);
+	}
 
 	mt76_set(dev, MT_WPDMA_GLO_CFG,
 		 MT_WPDMA_GLO_CFG_RX_DMA_EN | MT_WPDMA_GLO_CFG_TX_DMA_EN |
diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/pci.c b/drivers/net/wireless/mediatek/mt76/mt7615/pci.c
index b09d08d0dac9..ba12f199bce0 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/pci.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/pci.c
@@ -86,8 +86,9 @@ static int mt7615_pci_suspend(struct pci_dev *pdev, pm_message_t state)
 	napi_disable(&mdev->tx_napi);
 	tasklet_kill(&mdev->tx_tasklet);
 
-	for (i = 0; i < ARRAY_SIZE(mdev->q_rx); i++)
+	mt76_for_each_q_rx(mdev, i) {
 		napi_disable(&mdev->napi[i]);
+	}
 	tasklet_kill(&dev->irq_tasklet);
 
 	mt7615_dma_reset(dev);
@@ -120,8 +121,9 @@ static int mt7615_pci_suspend(struct pci_dev *pdev, pm_message_t state)
 	return 0;
 
 restore:
-	for (i = 0; i < ARRAY_SIZE(mdev->q_rx); i++)
+	mt76_for_each_q_rx(mdev, i) {
 		napi_enable(&mdev->napi[i]);
+	}
 	napi_enable(&mdev->tx_napi);
 	if (hif_suspend)
 		mt7615_mcu_set_hif_suspend(dev, false);
@@ -156,7 +158,7 @@ static int mt7615_pci_resume(struct pci_dev *pdev)
 	if (pdma_reset)
 		dev_err(mdev->dev, "PDMA engine must be reinitialized\n");
 
-	for (i = 0; i < ARRAY_SIZE(mdev->q_rx); i++) {
+	mt76_for_each_q_rx(mdev, i) {
 		napi_enable(&mdev->napi[i]);
 		napi_schedule(&mdev->napi[i]);
 	}
diff --git a/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c b/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c
index 18adedfbbb8e..cbbe986655fe 100644
--- a/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c
+++ b/drivers/net/wireless/mediatek/mt76/mt76x02_mmio.c
@@ -489,8 +489,9 @@ static void mt76x02_watchdog_reset(struct mt76x02_dev *dev)
 	for (i = 0; i < __MT_TXQ_MAX; i++)
 		mt76_queue_tx_cleanup(dev, i, true);
 
-	for (i = 0; i < ARRAY_SIZE(dev->mt76.q_rx); i++)
+	mt76_for_each_q_rx(&dev->mt76, i) {
 		mt76_queue_rx_reset(dev, i);
+	}
 
 	mt76x02_mac_start(dev);
 
diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
index ab20dfde94af..a264e304a3df 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mac.c
@@ -1146,8 +1146,9 @@ mt7915_dma_reset(struct mt7915_dev *dev)
 	for (i = 0; i < __MT_TXQ_MAX; i++)
 		mt76_queue_tx_cleanup(dev, i, true);
 
-	for (i = 0; i < ARRAY_SIZE(dev->mt76.q_rx); i++)
+	mt76_for_each_q_rx(&dev->mt76, i) {
 		mt76_queue_rx_reset(dev, i);
+	}
 
 	/* re-init prefetch settings after reset */
 	mt7915_dma_prefetch(dev);
-- 
cgit v1.2.3-59-g8ed1b


From 194a1508e082582159e312f818e11ab0f8e96e50 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Thu, 28 May 2020 07:48:56 +0000
Subject: mt76: mt7615: Use kmemdup in mt7615_queue_key_update()

Use kmemdup rather than duplicating its implementation

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7615/main.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7615/main.c b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
index 320dfda6b4e5..c26f99b368d9 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7615/main.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7615/main.c
@@ -290,12 +290,11 @@ mt7615_queue_key_update(struct mt7615_dev *dev, enum set_key_cmd cmd,
 	wd->type = MT7615_WTBL_KEY_DESC;
 	wd->sta = msta;
 
-	wd->key.key = kzalloc(key->keylen, GFP_KERNEL);
+	wd->key.key = kmemdup(key->key, key->keylen, GFP_KERNEL);
 	if (!wd->key.key) {
 		kfree(wd);
 		return -ENOMEM;
 	}
-	memcpy(wd->key.key, key->key, key->keylen);
 	wd->key.cipher = key->cipher;
 	wd->key.keyidx = key->keyidx;
 	wd->key.keylen = key->keylen;
-- 
cgit v1.2.3-59-g8ed1b


From d9045b18cd445e0d0a53903ffd5d79793d9df59e Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Thu, 28 May 2020 07:48:29 +0000
Subject: mt76: mt7915: remove set but not used variable 'msta'

Cc: linux-wireless@vger.kernel.org,
    linux-arm-kernel@lists.infradead.org,
    linux-mediatek@lists.infradead.org,
    netdev@vger.kernel.org,
    kernel-janitors@vger.kernel.org

Fixes gcc '-Wunused-but-set-variable' warning:

drivers/net/wireless/mediatek/mt76/mt7915/mcu.c: In function 'mt7915_mcu_sta_txbf_type':
drivers/net/wireless/mediatek/mt76/mt7915/mcu.c:1805:21: warning:
 variable 'msta' set but not used [-Wunused-but-set-variable]

It is never used, so can be removed.

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: Felix Fietkau <nbd@nbd.name>
---
 drivers/net/wireless/mediatek/mt76/mt7915/mcu.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
index 8460cd453213..c8c12c740c1a 100644
--- a/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
+++ b/drivers/net/wireless/mediatek/mt76/mt7915/mcu.c
@@ -1801,15 +1801,12 @@ static u8
 mt7915_mcu_sta_txbf_type(struct mt7915_phy *phy, struct ieee80211_vif *vif,
 			 struct ieee80211_sta *sta)
 {
-	struct mt7915_sta *msta;
 	u8 type = 0;
 
 	if (vif->type != NL80211_IFTYPE_STATION &&
 	    vif->type != NL80211_IFTYPE_AP)
 		return 0;
 
-	msta = (struct mt7915_sta *)sta->drv_priv;
-
 	if (sta->he_cap.has_he) {
 		struct ieee80211_he_cap_elem *pe;
 		const struct ieee80211_he_cap_elem *ve;
-- 
cgit v1.2.3-59-g8ed1b


From 7c741868ceab825bb99cf6c72859e9364d54a07c Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@kernel.org>
Date: Wed, 27 May 2020 18:03:44 -0600
Subject: selftests: Add torture tests to nexthop tests

Add Nik's torture tests as a new set to stress the replace and cleanup
paths.

Torture test created by Nikolay Aleksandrov and then I adapted to
selftest and added IPv6 version.

Signed-off-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/fib_nexthops.sh | 115 +++++++++++++++++++++++++++-
 1 file changed, 113 insertions(+), 2 deletions(-)

diff --git a/tools/testing/selftests/net/fib_nexthops.sh b/tools/testing/selftests/net/fib_nexthops.sh
index 1e2f61262e4e..dee567f7576a 100755
--- a/tools/testing/selftests/net/fib_nexthops.sh
+++ b/tools/testing/selftests/net/fib_nexthops.sh
@@ -19,8 +19,8 @@ ret=0
 ksft_skip=4
 
 # all tests in this script. Can be overridden with -t option
-IPV4_TESTS="ipv4_fcnal ipv4_grp_fcnal ipv4_withv6_fcnal ipv4_fcnal_runtime ipv4_large_grp ipv4_compat_mode ipv4_fdb_grp_fcnal"
-IPV6_TESTS="ipv6_fcnal ipv6_grp_fcnal ipv6_fcnal_runtime ipv6_large_grp ipv6_compat_mode ipv6_fdb_grp_fcnal"
+IPV4_TESTS="ipv4_fcnal ipv4_grp_fcnal ipv4_withv6_fcnal ipv4_fcnal_runtime ipv4_large_grp ipv4_compat_mode ipv4_fdb_grp_fcnal ipv4_torture"
+IPV6_TESTS="ipv6_fcnal ipv6_grp_fcnal ipv6_fcnal_runtime ipv6_large_grp ipv6_compat_mode ipv6_fdb_grp_fcnal ipv6_torture"
 
 ALL_TESTS="basic ${IPV4_TESTS} ${IPV6_TESTS}"
 TESTS="${ALL_TESTS}"
@@ -767,6 +767,62 @@ ipv6_large_grp()
 	$IP nexthop flush >/dev/null 2>&1
 }
 
+ipv6_del_add_loop1()
+{
+	while :; do
+		$IP nexthop del id 100
+		$IP nexthop add id 100 via 2001:db8:91::2 dev veth1
+	done >/dev/null 2>&1
+}
+
+ipv6_grp_replace_loop()
+{
+	while :; do
+		$IP nexthop replace id 102 group 100/101
+	done >/dev/null 2>&1
+}
+
+ipv6_torture()
+{
+	local pid1
+	local pid2
+	local pid3
+	local pid4
+	local pid5
+
+	echo
+	echo "IPv6 runtime torture"
+	echo "--------------------"
+	if [ ! -x "$(command -v mausezahn)" ]; then
+		echo "SKIP: Could not run test; need mausezahn tool"
+		return
+	fi
+
+	run_cmd "$IP nexthop add id 100 via 2001:db8:91::2 dev veth1"
+	run_cmd "$IP nexthop add id 101 via 2001:db8:92::2 dev veth3"
+	run_cmd "$IP nexthop add id 102 group 100/101"
+	run_cmd "$IP route add 2001:db8:101::1 nhid 102"
+	run_cmd "$IP route add 2001:db8:101::2 nhid 102"
+
+	ipv6_del_add_loop1 &
+	pid1=$!
+	ipv6_grp_replace_loop &
+	pid2=$!
+	ip netns exec me ping -f 2001:db8:101::1 >/dev/null 2>&1 &
+	pid3=$!
+	ip netns exec me ping -f 2001:db8:101::2 >/dev/null 2>&1 &
+	pid4=$!
+	ip netns exec me mausezahn veth1 -B 2001:db8:101::2 -A 2001:db8:91::1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 &
+	pid5=$!
+
+	sleep 300
+	kill -9 $pid1 $pid2 $pid3 $pid4 $pid5
+
+	# if we did not crash, success
+	log_test 0 0 "IPv6 torture test"
+}
+
+
 ipv4_fcnal()
 {
 	local rc
@@ -1313,6 +1369,61 @@ ipv4_compat_mode()
 	sysctl_nexthop_compat_mode_set 1 "IPv4"
 }
 
+ipv4_del_add_loop1()
+{
+	while :; do
+		$IP nexthop del id 100
+		$IP nexthop add id 100 via 172.16.1.2 dev veth1
+	done >/dev/null 2>&1
+}
+
+ipv4_grp_replace_loop()
+{
+	while :; do
+		$IP nexthop replace id 102 group 100/101
+	done >/dev/null 2>&1
+}
+
+ipv4_torture()
+{
+	local pid1
+	local pid2
+	local pid3
+	local pid4
+	local pid5
+
+	echo
+	echo "IPv4 runtime torture"
+	echo "--------------------"
+	if [ ! -x "$(command -v mausezahn)" ]; then
+		echo "SKIP: Could not run test; need mausezahn tool"
+		return
+	fi
+
+	run_cmd "$IP nexthop add id 100 via 172.16.1.2 dev veth1"
+	run_cmd "$IP nexthop add id 101 via 172.16.2.2 dev veth3"
+	run_cmd "$IP nexthop add id 102 group 100/101"
+	run_cmd "$IP route add 172.16.101.1 nhid 102"
+	run_cmd "$IP route add 172.16.101.2 nhid 102"
+
+	ipv4_del_add_loop1 &
+	pid1=$!
+	ipv4_grp_replace_loop &
+	pid2=$!
+	ip netns exec me ping -f 172.16.101.1 >/dev/null 2>&1 &
+	pid3=$!
+	ip netns exec me ping -f 172.16.101.2 >/dev/null 2>&1 &
+	pid4=$!
+	ip netns exec me mausezahn veth1 -B 172.16.101.2 -A 172.16.1.1 -c 0 -t tcp "dp=1-1023, flags=syn" >/dev/null 2>&1 &
+	pid5=$!
+
+	sleep 300
+	kill -9 $pid1 $pid2 $pid3 $pid4 $pid5
+
+	# if we did not crash, success
+	log_test 0 0 "IPv4 torture test"
+}
+
 basic()
 {
 	echo
-- 
cgit v1.2.3-59-g8ed1b


From 4d7525085a9ba86b9d78561d379b2ff8c0b30468 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Thu, 28 May 2020 03:27:58 +0300
Subject: net: dsa: sja1105: offload the Credit-Based Shaper qdisc

SJA1105, being AVB/TSN switches, provide hardware assist for the
Credit-Based Shaper as described in the IEEE 8021Q-2018 document.

First generation has 10 shapers, freely assignable to any of the 4
external ports and 8 traffic classes, and second generation has 16
shapers.

The Credit-Based Shaper tables are accessed through the dynamic
reconfiguration interface, so we have to restore them manually after a
switch reset. The tables are backed up by the static config only on
P/Q/R/S, and we don't want to add custom code only for that family,
since the procedure that is in place now works for both.

Tested with the following commands:

data_rate_kbps=67000
port_transmit_rate_kbps=1000000
idleslope=$data_rate_kbps
sendslope=$(($idleslope - $port_transmit_rate_kbps))
locredit=$((-0x80000000))
hicredit=$((0x7fffffff))
tc qdisc add dev swp2 root handle 1: mqprio hw 0 num_tc 8 \
        map 0 1 2 3 4 5 6 7 \
        queues 1@0 1@1 1@2 1@3 1@4 1@5 1@6 1@7
tc qdisc replace dev swp2 parent 1:1 cbs \
        idleslope $idleslope \
        sendslope $sendslope \
        hicredit $hicredit \
        locredit $locredit \
        offload 1

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105.h                |   2 +
 drivers/net/dsa/sja1105/sja1105_dynamic_config.c |  76 +++++++++++++++++
 drivers/net/dsa/sja1105/sja1105_main.c           | 100 +++++++++++++++++++++++
 drivers/net/dsa/sja1105/sja1105_spi.c            |   6 ++
 drivers/net/dsa/sja1105/sja1105_static_config.h  |  15 ++++
 5 files changed, 199 insertions(+)

diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
index 198d2a7d7f95..cb3c81a49fbc 100644
--- a/drivers/net/dsa/sja1105/sja1105.h
+++ b/drivers/net/dsa/sja1105/sja1105.h
@@ -84,6 +84,7 @@ struct sja1105_info {
 	 * the egress timestamps.
 	 */
 	int ptpegr_ts_bytes;
+	int num_cbs_shapers;
 	const struct sja1105_dynamic_table_ops *dyn_ops;
 	const struct sja1105_table_ops *static_ops;
 	const struct sja1105_regs *regs;
@@ -218,6 +219,7 @@ struct sja1105_private {
 	struct mutex mgmt_lock;
 	bool expect_dsa_8021q;
 	enum sja1105_vlan_state vlan_state;
+	struct sja1105_cbs_entry *cbs;
 	struct sja1105_tagger_data tagger_data;
 	struct sja1105_ptp_data ptp_data;
 	struct sja1105_tas_data tas_data;
diff --git a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
index 2a8fbd7fdedc..7516f2ffdd4e 100644
--- a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
+++ b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
@@ -136,6 +136,12 @@
 #define SJA1105_SIZE_RETAGGING_DYN_CMD				\
 	(SJA1105_SIZE_DYN_CMD + SJA1105_SIZE_RETAGGING_ENTRY)
 
+#define SJA1105ET_SIZE_CBS_DYN_CMD				\
+	(SJA1105_SIZE_DYN_CMD + SJA1105ET_SIZE_CBS_ENTRY)
+
+#define SJA1105PQRS_SIZE_CBS_DYN_CMD				\
+	(SJA1105_SIZE_DYN_CMD + SJA1105PQRS_SIZE_CBS_ENTRY)
+
 #define SJA1105_MAX_DYN_CMD_SIZE				\
 	SJA1105PQRS_SIZE_MAC_CONFIG_DYN_CMD
 
@@ -542,6 +548,60 @@ sja1105_retagging_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
 	sja1105_packing(p, &cmd->index,     5,  0, size, op);
 }
 
+static void sja1105et_cbs_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
+				      enum packing_op op)
+{
+	u8 *p = buf + SJA1105ET_SIZE_CBS_ENTRY;
+	const int size = SJA1105_SIZE_DYN_CMD;
+
+	sja1105_packing(p, &cmd->valid, 31, 31, size, op);
+	sja1105_packing(p, &cmd->index, 19, 16, size, op);
+}
+
+static size_t sja1105et_cbs_entry_packing(void *buf, void *entry_ptr,
+					  enum packing_op op)
+{
+	const size_t size = SJA1105ET_SIZE_CBS_ENTRY;
+	struct sja1105_cbs_entry *entry = entry_ptr;
+	u8 *cmd = buf + size;
+	u32 *p = buf;
+
+	sja1105_packing(cmd, &entry->port, 5, 3, SJA1105_SIZE_DYN_CMD, op);
+	sja1105_packing(cmd, &entry->prio, 2, 0, SJA1105_SIZE_DYN_CMD, op);
+	sja1105_packing(p + 3, &entry->credit_lo,  31, 0, size, op);
+	sja1105_packing(p + 2, &entry->credit_hi,  31, 0, size, op);
+	sja1105_packing(p + 1, &entry->send_slope, 31, 0, size, op);
+	sja1105_packing(p + 0, &entry->idle_slope, 31, 0, size, op);
+	return size;
+}
+
+static void sja1105pqrs_cbs_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
+					enum packing_op op)
+{
+	u8 *p = buf + SJA1105PQRS_SIZE_CBS_ENTRY;
+	const int size = SJA1105_SIZE_DYN_CMD;
+
+	sja1105_packing(p, &cmd->valid,   31, 31, size, op);
+	sja1105_packing(p, &cmd->rdwrset, 30, 30, size, op);
+	sja1105_packing(p, &cmd->errors,  29, 29, size, op);
+	sja1105_packing(p, &cmd->index,    3,  0, size, op);
+}
+
+static size_t sja1105pqrs_cbs_entry_packing(void *buf, void *entry_ptr,
+					    enum packing_op op)
+{
+	const size_t size = SJA1105PQRS_SIZE_CBS_ENTRY;
+	struct sja1105_cbs_entry *entry = entry_ptr;
+
+	sja1105_packing(buf, &entry->port,      159, 157, size, op);
+	sja1105_packing(buf, &entry->prio,      156, 154, size, op);
+	sja1105_packing(buf, &entry->credit_lo, 153, 122, size, op);
+	sja1105_packing(buf, &entry->credit_hi, 121,  90, size, op);
+	sja1105_packing(buf, &entry->send_slope, 89,  58, size, op);
+	sja1105_packing(buf, &entry->idle_slope, 57,  26, size, op);
+	return size;
+}
+
 #define OP_READ		BIT(0)
 #define OP_WRITE	BIT(1)
 #define OP_DEL		BIT(2)
@@ -631,6 +691,14 @@ struct sja1105_dynamic_table_ops sja1105et_dyn_ops[BLK_IDX_MAX_DYN] = {
 		.packed_size = SJA1105_SIZE_RETAGGING_DYN_CMD,
 		.addr = 0x31,
 	},
+	[BLK_IDX_CBS] = {
+		.entry_packing = sja1105et_cbs_entry_packing,
+		.cmd_packing = sja1105et_cbs_cmd_packing,
+		.max_entry_count = SJA1105ET_MAX_CBS_COUNT,
+		.access = OP_WRITE,
+		.packed_size = SJA1105ET_SIZE_CBS_DYN_CMD,
+		.addr = 0x2c,
+	},
 	[BLK_IDX_XMII_PARAMS] = {0},
 };
 
@@ -725,6 +793,14 @@ struct sja1105_dynamic_table_ops sja1105pqrs_dyn_ops[BLK_IDX_MAX_DYN] = {
 		.packed_size = SJA1105_SIZE_RETAGGING_DYN_CMD,
 		.addr = 0x38,
 	},
+	[BLK_IDX_CBS] = {
+		.entry_packing = sja1105pqrs_cbs_entry_packing,
+		.cmd_packing = sja1105pqrs_cbs_cmd_packing,
+		.max_entry_count = SJA1105PQRS_MAX_CBS_COUNT,
+		.access = OP_WRITE,
+		.packed_size = SJA1105PQRS_SIZE_CBS_DYN_CMD,
+		.addr = 0x32,
+	},
 	[BLK_IDX_XMII_PARAMS] = {0},
 };
 
diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 44ce7882dfb1..36ab527449e6 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -1640,6 +1640,92 @@ static void sja1105_bridge_leave(struct dsa_switch *ds, int port,
 	sja1105_bridge_member(ds, port, br, false);
 }
 
+#define BYTES_PER_KBIT (1000LL / 8)
+
+static int sja1105_find_unused_cbs_shaper(struct sja1105_private *priv)
+{
+	int i;
+
+	for (i = 0; i < priv->info->num_cbs_shapers; i++)
+		if (!priv->cbs[i].idle_slope && !priv->cbs[i].send_slope)
+			return i;
+
+	return -1;
+}
+
+static int sja1105_delete_cbs_shaper(struct sja1105_private *priv, int port,
+				     int prio)
+{
+	int i;
+
+	for (i = 0; i < priv->info->num_cbs_shapers; i++) {
+		struct sja1105_cbs_entry *cbs = &priv->cbs[i];
+
+		if (cbs->port == port && cbs->prio == prio) {
+			memset(cbs, 0, sizeof(*cbs));
+			return sja1105_dynamic_config_write(priv, BLK_IDX_CBS,
+							    i, cbs, true);
+		}
+	}
+
+	return 0;
+}
+
+static int sja1105_setup_tc_cbs(struct dsa_switch *ds, int port,
+				struct tc_cbs_qopt_offload *offload)
+{
+	struct sja1105_private *priv = ds->priv;
+	struct sja1105_cbs_entry *cbs;
+	int index;
+
+	if (!offload->enable)
+		return sja1105_delete_cbs_shaper(priv, port, offload->queue);
+
+	index = sja1105_find_unused_cbs_shaper(priv);
+	if (index < 0)
+		return -ENOSPC;
+
+	cbs = &priv->cbs[index];
+	cbs->port = port;
+	cbs->prio = offload->queue;
+	/* locredit and sendslope are negative by definition. In hardware,
+	 * positive values must be provided, and the negative sign is implicit.
+	 */
+	cbs->credit_hi = offload->hicredit;
+	cbs->credit_lo = abs(offload->locredit);
+	/* User space is in kbits/sec, hardware in bytes/sec */
+	cbs->idle_slope = offload->idleslope * BYTES_PER_KBIT;
+	cbs->send_slope = abs(offload->sendslope * BYTES_PER_KBIT);
+	/* Convert the negative values from 64-bit 2's complement
+	 * to 32-bit 2's complement (for the case of 0x80000000 whose
+	 * negative is still negative).
+	 */
+	cbs->credit_lo &= GENMASK_ULL(31, 0);
+	cbs->send_slope &= GENMASK_ULL(31, 0);
+
+	return sja1105_dynamic_config_write(priv, BLK_IDX_CBS, index, cbs,
+					    true);
+}
+
+static int sja1105_reload_cbs(struct sja1105_private *priv)
+{
+	int rc = 0, i;
+
+	for (i = 0; i < priv->info->num_cbs_shapers; i++) {
+		struct sja1105_cbs_entry *cbs = &priv->cbs[i];
+
+		if (!cbs->idle_slope && !cbs->send_slope)
+			continue;
+
+		rc = sja1105_dynamic_config_write(priv, BLK_IDX_CBS, i, cbs,
+						  true);
+		if (rc)
+			break;
+	}
+
+	return rc;
+}
+
 static const char * const sja1105_reset_reasons[] = {
 	[SJA1105_VLAN_FILTERING] = "VLAN filtering",
 	[SJA1105_RX_HWTSTAMPING] = "RX timestamping",
@@ -1754,6 +1840,10 @@ out_unlock_ptp:
 			sja1105_sgmii_pcs_force_speed(priv, speed);
 		}
 	}
+
+	rc = sja1105_reload_cbs(priv);
+	if (rc < 0)
+		goto out;
 out:
 	mutex_unlock(&priv->mgmt_lock);
 
@@ -3131,6 +3221,8 @@ static int sja1105_port_setup_tc(struct dsa_switch *ds, int port,
 	switch (type) {
 	case TC_SETUP_QDISC_TAPRIO:
 		return sja1105_setup_tc_taprio(ds, port, type_data);
+	case TC_SETUP_QDISC_CBS:
+		return sja1105_setup_tc_cbs(ds, port, type_data);
 	default:
 		return -EOPNOTSUPP;
 	}
@@ -3408,6 +3500,14 @@ static int sja1105_probe(struct spi_device *spi)
 	if (rc)
 		return rc;
 
+	if (IS_ENABLED(CONFIG_NET_SCH_CBS)) {
+		priv->cbs = devm_kcalloc(dev, priv->info->num_cbs_shapers,
+					 sizeof(struct sja1105_cbs_entry),
+					 GFP_KERNEL);
+		if (!priv->cbs)
+			return -ENOMEM;
+	}
+
 	/* Connections between dsa_port and sja1105_port */
 	for (port = 0; port < SJA1105_NUM_PORTS; port++) {
 		struct sja1105_port *sp = &priv->ports[port];
diff --git a/drivers/net/dsa/sja1105/sja1105_spi.c b/drivers/net/dsa/sja1105/sja1105_spi.c
index a0dacae803cc..bb52b9c841b2 100644
--- a/drivers/net/dsa/sja1105/sja1105_spi.c
+++ b/drivers/net/dsa/sja1105/sja1105_spi.c
@@ -515,6 +515,7 @@ struct sja1105_info sja1105e_info = {
 	.qinq_tpid		= ETH_P_8021Q,
 	.ptp_ts_bits		= 24,
 	.ptpegr_ts_bytes	= 4,
+	.num_cbs_shapers	= SJA1105ET_MAX_CBS_COUNT,
 	.reset_cmd		= sja1105et_reset_cmd,
 	.fdb_add_cmd		= sja1105et_fdb_add,
 	.fdb_del_cmd		= sja1105et_fdb_del,
@@ -530,6 +531,7 @@ struct sja1105_info sja1105t_info = {
 	.qinq_tpid		= ETH_P_8021Q,
 	.ptp_ts_bits		= 24,
 	.ptpegr_ts_bytes	= 4,
+	.num_cbs_shapers	= SJA1105ET_MAX_CBS_COUNT,
 	.reset_cmd		= sja1105et_reset_cmd,
 	.fdb_add_cmd		= sja1105et_fdb_add,
 	.fdb_del_cmd		= sja1105et_fdb_del,
@@ -545,6 +547,7 @@ struct sja1105_info sja1105p_info = {
 	.qinq_tpid		= ETH_P_8021AD,
 	.ptp_ts_bits		= 32,
 	.ptpegr_ts_bytes	= 8,
+	.num_cbs_shapers	= SJA1105PQRS_MAX_CBS_COUNT,
 	.setup_rgmii_delay	= sja1105pqrs_setup_rgmii_delay,
 	.reset_cmd		= sja1105pqrs_reset_cmd,
 	.fdb_add_cmd		= sja1105pqrs_fdb_add,
@@ -561,6 +564,7 @@ struct sja1105_info sja1105q_info = {
 	.qinq_tpid		= ETH_P_8021AD,
 	.ptp_ts_bits		= 32,
 	.ptpegr_ts_bytes	= 8,
+	.num_cbs_shapers	= SJA1105PQRS_MAX_CBS_COUNT,
 	.setup_rgmii_delay	= sja1105pqrs_setup_rgmii_delay,
 	.reset_cmd		= sja1105pqrs_reset_cmd,
 	.fdb_add_cmd		= sja1105pqrs_fdb_add,
@@ -577,6 +581,7 @@ struct sja1105_info sja1105r_info = {
 	.qinq_tpid		= ETH_P_8021AD,
 	.ptp_ts_bits		= 32,
 	.ptpegr_ts_bytes	= 8,
+	.num_cbs_shapers	= SJA1105PQRS_MAX_CBS_COUNT,
 	.setup_rgmii_delay	= sja1105pqrs_setup_rgmii_delay,
 	.reset_cmd		= sja1105pqrs_reset_cmd,
 	.fdb_add_cmd		= sja1105pqrs_fdb_add,
@@ -594,6 +599,7 @@ struct sja1105_info sja1105s_info = {
 	.qinq_tpid		= ETH_P_8021AD,
 	.ptp_ts_bits		= 32,
 	.ptpegr_ts_bytes	= 8,
+	.num_cbs_shapers	= SJA1105PQRS_MAX_CBS_COUNT,
 	.setup_rgmii_delay	= sja1105pqrs_setup_rgmii_delay,
 	.reset_cmd		= sja1105pqrs_reset_cmd,
 	.fdb_add_cmd		= sja1105pqrs_fdb_add,
diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.h b/drivers/net/dsa/sja1105/sja1105_static_config.h
index 5946847bb5b9..9b62b9b5549d 100644
--- a/drivers/net/dsa/sja1105/sja1105_static_config.h
+++ b/drivers/net/dsa/sja1105/sja1105_static_config.h
@@ -30,11 +30,13 @@
 #define SJA1105ET_SIZE_L2_LOOKUP_PARAMS_ENTRY		4
 #define SJA1105ET_SIZE_GENERAL_PARAMS_ENTRY		40
 #define SJA1105ET_SIZE_AVB_PARAMS_ENTRY			12
+#define SJA1105ET_SIZE_CBS_ENTRY			16
 #define SJA1105PQRS_SIZE_L2_LOOKUP_ENTRY		20
 #define SJA1105PQRS_SIZE_MAC_CONFIG_ENTRY		32
 #define SJA1105PQRS_SIZE_L2_LOOKUP_PARAMS_ENTRY		16
 #define SJA1105PQRS_SIZE_GENERAL_PARAMS_ENTRY		44
 #define SJA1105PQRS_SIZE_AVB_PARAMS_ENTRY		16
+#define SJA1105PQRS_SIZE_CBS_ENTRY			20
 
 /* UM10944.pdf Page 11, Table 2. Configuration Blocks */
 enum {
@@ -56,6 +58,7 @@ enum {
 	BLKID_AVB_PARAMS				= 0x10,
 	BLKID_GENERAL_PARAMS				= 0x11,
 	BLKID_RETAGGING					= 0x12,
+	BLKID_CBS					= 0x13,
 	BLKID_XMII_PARAMS				= 0x4E,
 };
 
@@ -78,6 +81,7 @@ enum sja1105_blk_idx {
 	BLK_IDX_AVB_PARAMS,
 	BLK_IDX_GENERAL_PARAMS,
 	BLK_IDX_RETAGGING,
+	BLK_IDX_CBS,
 	BLK_IDX_XMII_PARAMS,
 	BLK_IDX_MAX,
 	/* Fake block indices that are only valid for dynamic access */
@@ -105,6 +109,8 @@ enum sja1105_blk_idx {
 #define SJA1105_MAX_RETAGGING_COUNT			32
 #define SJA1105_MAX_XMII_PARAMS_COUNT			1
 #define SJA1105_MAX_AVB_PARAMS_COUNT			1
+#define SJA1105ET_MAX_CBS_COUNT				10
+#define SJA1105PQRS_MAX_CBS_COUNT			16
 
 #define SJA1105_MAX_FRAME_MEMORY			929
 #define SJA1105_MAX_FRAME_MEMORY_RETAGGING		910
@@ -289,6 +295,15 @@ struct sja1105_retagging_entry {
 	u64 destports;
 };
 
+struct sja1105_cbs_entry {
+	u64 port;
+	u64 prio;
+	u64 credit_hi;
+	u64 credit_lo;
+	u64 send_slope;
+	u64 idle_slope;
+};
+
 struct sja1105_xmii_params_entry {
 	u64 phy_mac[5];
 	u64 xmii_mode[5];
-- 
cgit v1.2.3-59-g8ed1b


From d29245692a44d71d5e2e0770463184a693696232 Mon Sep 17 00:00:00 2001
From: Eric Dumazet <edumazet@google.com>
Date: Wed, 27 May 2020 17:34:58 -0700
Subject: tcp: ipv6: support RFC 6069 (TCP-LD)

Make tcp_ld_RTO_revert() helper available to IPv6, and
implement RFC 6069 :

Quoting this RFC :

3. Connectivity Disruption Indication

   For Internet Protocol version 6 (IPv6) [RFC2460], the counterpart of
   the ICMP destination unreachable message of code 0 (net unreachable)
   and of code 1 (host unreachable) is the ICMPv6 destination
   unreachable message of code 0 (no route to destination) [RFC4443].
   As with IPv4, a router should generate an ICMPv6 destination
   unreachable message of code 0 in response to a packet that cannot be
   delivered to its destination address because it lacks a matching
   entry in its routing table.

Signed-off-by: Eric Dumazet <edumazet@google.com>
Acked-by: Yuchung Cheng <ycheng@google.com>
Acked-by: Neal Cardwell <ncardwell@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h   | 1 +
 net/ipv4/tcp_ipv4.c | 3 ++-
 net/ipv6/tcp_ipv6.c | 9 +++++++++
 3 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index b681338a8320..66e4b8331850 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -437,6 +437,7 @@ u16 tcp_get_syncookie_mss(struct request_sock_ops *rsk_ops,
 void tcp_v4_send_check(struct sock *sk, struct sk_buff *skb);
 void tcp_v4_mtu_reduced(struct sock *sk);
 void tcp_req_err(struct sock *sk, u32 seq, bool abort);
+void tcp_ld_RTO_revert(struct sock *sk, u32 seq);
 int tcp_v4_conn_request(struct sock *sk, struct sk_buff *skb);
 struct sock *tcp_create_openreq_child(const struct sock *sk,
 				      struct request_sock *req,
diff --git a/net/ipv4/tcp_ipv4.c b/net/ipv4/tcp_ipv4.c
index 4eef5b84fff1..ad6435ba6d72 100644
--- a/net/ipv4/tcp_ipv4.c
+++ b/net/ipv4/tcp_ipv4.c
@@ -404,7 +404,7 @@ void tcp_req_err(struct sock *sk, u32 seq, bool abort)
 EXPORT_SYMBOL(tcp_req_err);
 
 /* TCP-LD (RFC 6069) logic */
-static void tcp_ld_RTO_revert(struct sock *sk, u32 seq)
+void tcp_ld_RTO_revert(struct sock *sk, u32 seq)
 {
 	struct inet_connection_sock *icsk = inet_csk(sk);
 	struct tcp_sock *tp = tcp_sk(sk);
@@ -441,6 +441,7 @@ static void tcp_ld_RTO_revert(struct sock *sk, u32 seq)
 		tcp_retransmit_timer(sk);
 	}
 }
+EXPORT_SYMBOL(tcp_ld_RTO_revert);
 
 /*
  * This routine is called by the ICMP module when it gets some
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index 01a6f5111a77..b7415ca75c2d 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -473,6 +473,15 @@ static int tcp_v6_err(struct sk_buff *skb, struct inet6_skb_parm *opt,
 		} else
 			sk->sk_err_soft = err;
 		goto out;
+	case TCP_LISTEN:
+		break;
+	default:
+		/* check if this ICMP message allows revert of backoff.
+		 * (see RFC 6069)
+		 */
+		if (!fastopen && type == ICMPV6_DEST_UNREACH &&
+		    code == ICMPV6_NOROUTE)
+			tcp_ld_RTO_revert(sk, seq);
 	}
 
 	if (!sock_owned_by_user(sk) && np->recverr) {
-- 
cgit v1.2.3-59-g8ed1b


From b58f0e8f38c0a44afa59601a115bd231f23471e1 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 28 May 2020 07:12:09 +0200
Subject: net: add sock_set_reuseaddr

Add a helper to directly set the SO_REUSEADDR sockopt from kernel space
without going through a fake uaccess.

For this the iscsi target now has to formally depend on inet to avoid
a mostly theoretical compile failure.  For actual operation it already
did depend on having ipv4 or ipv6 support.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/infiniband/sw/siw/siw_cm.c        | 18 +++++-------------
 drivers/nvme/target/tcp.c                 |  8 +-------
 drivers/target/iscsi/Kconfig              |  2 +-
 drivers/target/iscsi/iscsi_target_login.c |  9 +--------
 fs/dlm/lowcomms.c                         |  6 +-----
 include/net/sock.h                        |  2 ++
 net/core/sock.c                           |  8 ++++++++
 7 files changed, 19 insertions(+), 34 deletions(-)

diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c
index 559e5fd3bad8..d1860f3e8740 100644
--- a/drivers/infiniband/sw/siw/siw_cm.c
+++ b/drivers/infiniband/sw/siw/siw_cm.c
@@ -1312,17 +1312,14 @@ static void siw_cm_llp_state_change(struct sock *sk)
 static int kernel_bindconnect(struct socket *s, struct sockaddr *laddr,
 			      struct sockaddr *raddr)
 {
-	int rv, flags = 0, s_val = 1;
+	int rv, flags = 0;
 	size_t size = laddr->sa_family == AF_INET ?
 		sizeof(struct sockaddr_in) : sizeof(struct sockaddr_in6);
 
 	/*
 	 * Make address available again asap.
 	 */
-	rv = kernel_setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (char *)&s_val,
-			       sizeof(s_val));
-	if (rv < 0)
-		return rv;
+	sock_set_reuseaddr(s->sk);
 
 	rv = s->ops->bind(s, laddr, size);
 	if (rv < 0)
@@ -1781,7 +1778,7 @@ int siw_create_listen(struct iw_cm_id *id, int backlog)
 	struct siw_cep *cep = NULL;
 	struct siw_device *sdev = to_siw_dev(id->device);
 	int addr_family = id->local_addr.ss_family;
-	int rv = 0, s_val;
+	int rv = 0;
 
 	if (addr_family != AF_INET && addr_family != AF_INET6)
 		return -EAFNOSUPPORT;
@@ -1793,13 +1790,8 @@ int siw_create_listen(struct iw_cm_id *id, int backlog)
 	/*
 	 * Allow binding local port when still in TIME_WAIT from last close.
 	 */
-	s_val = 1;
-	rv = kernel_setsockopt(s, SOL_SOCKET, SO_REUSEADDR, (char *)&s_val,
-			       sizeof(s_val));
-	if (rv) {
-		siw_dbg(id->device, "setsockopt error: %d\n", rv);
-		goto error;
-	}
+	sock_set_reuseaddr(s->sk);
+
 	if (addr_family == AF_INET) {
 		struct sockaddr_in *laddr = &to_sockaddr_in(id->local_addr);
 
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index f0da04e960f4..40757a63f455 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -1632,6 +1632,7 @@ static int nvmet_tcp_add_port(struct nvmet_port *nport)
 	port->sock->sk->sk_user_data = port;
 	port->data_ready = port->sock->sk->sk_data_ready;
 	port->sock->sk->sk_data_ready = nvmet_tcp_listen_data_ready;
+	sock_set_reuseaddr(port->sock->sk);
 
 	opt = 1;
 	ret = kernel_setsockopt(port->sock, IPPROTO_TCP,
@@ -1641,13 +1642,6 @@ static int nvmet_tcp_add_port(struct nvmet_port *nport)
 		goto err_sock;
 	}
 
-	ret = kernel_setsockopt(port->sock, SOL_SOCKET, SO_REUSEADDR,
-			(char *)&opt, sizeof(opt));
-	if (ret) {
-		pr_err("failed to set SO_REUSEADDR sock opt %d\n", ret);
-		goto err_sock;
-	}
-
 	if (so_priority > 0) {
 		ret = kernel_setsockopt(port->sock, SOL_SOCKET, SO_PRIORITY,
 				(char *)&so_priority, sizeof(so_priority));
diff --git a/drivers/target/iscsi/Kconfig b/drivers/target/iscsi/Kconfig
index 1f93ea381353..922484ea4e30 100644
--- a/drivers/target/iscsi/Kconfig
+++ b/drivers/target/iscsi/Kconfig
@@ -1,7 +1,7 @@
 # SPDX-License-Identifier: GPL-2.0-only
 config ISCSI_TARGET
 	tristate "Linux-iSCSI.org iSCSI Target Mode Stack"
-	depends on NET
+	depends on INET
 	select CRYPTO
 	select CRYPTO_CRC32C
 	select CRYPTO_CRC32C_INTEL if X86
diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c
index 731ee67fe914..91acb3f07b4c 100644
--- a/drivers/target/iscsi/iscsi_target_login.c
+++ b/drivers/target/iscsi/iscsi_target_login.c
@@ -909,14 +909,7 @@ int iscsit_setup_np(
 		}
 	}
 
-	/* FIXME: Someone please explain why this is endian-safe */
-	ret = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
-			(char *)&opt, sizeof(opt));
-	if (ret < 0) {
-		pr_err("kernel_setsockopt() for SO_REUSEADDR"
-			" failed\n");
-		goto fail;
-	}
+	sock_set_reuseaddr(sock->sk);
 
 	ret = kernel_setsockopt(sock, IPPROTO_IP, IP_FREEBIND,
 			(char *)&opt, sizeof(opt));
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index f13dad0fd9ef..88f2574ca63a 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1127,12 +1127,8 @@ static struct socket *tcp_create_listen_sock(struct connection *con,
 	kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one,
 			  sizeof(one));
 
-	result = kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEADDR,
-				   (char *)&one, sizeof(one));
+	sock_set_reuseaddr(sock->sk);
 
-	if (result < 0) {
-		log_print("Failed to set SO_REUSEADDR on socket: %d", result);
-	}
 	write_lock_bh(&sock->sk->sk_callback_lock);
 	sock->sk->sk_user_data = con;
 	save_listen_callbacks(sock);
diff --git a/include/net/sock.h b/include/net/sock.h
index 3e8c6d4b4b59..2ec085044790 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2688,4 +2688,6 @@ static inline bool sk_dev_equal_l3scope(struct sock *sk, int dif)
 
 void sock_def_readable(struct sock *sk);
 
+void sock_set_reuseaddr(struct sock *sk);
+
 #endif	/* _SOCK_H */
diff --git a/net/core/sock.c b/net/core/sock.c
index fd85e651ce28..18eb84fdf5fb 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -712,6 +712,14 @@ bool sk_mc_loop(struct sock *sk)
 }
 EXPORT_SYMBOL(sk_mc_loop);
 
+void sock_set_reuseaddr(struct sock *sk)
+{
+	lock_sock(sk);
+	sk->sk_reuse = SK_CAN_REUSE;
+	release_sock(sk);
+}
+EXPORT_SYMBOL(sock_set_reuseaddr);
+
 /*
  *	This is meant for all protocols to use and covers goings on
  *	at the socket level. Everything here is generic.
-- 
cgit v1.2.3-59-g8ed1b


From c433594c07457d2b2e41a87014bfad9bec279abf Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 28 May 2020 07:12:10 +0200
Subject: net: add sock_no_linger

Add a helper to directly set the SO_LINGER sockopt from kernel space
with onoff set to true and a linger time of 0 without going through a
fake uaccess.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/nvme/host/tcp.c   |  9 +--------
 drivers/nvme/target/tcp.c |  6 +-----
 include/net/sock.h        |  1 +
 net/core/sock.c           |  9 +++++++++
 net/rds/tcp.h             |  1 -
 net/rds/tcp_connect.c     |  2 +-
 net/rds/tcp_listen.c      | 13 +------------
 net/sunrpc/svcsock.c      | 12 ++----------
 8 files changed, 16 insertions(+), 37 deletions(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index c15a92163c1f..e72d87482eb7 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1313,7 +1313,6 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
 {
 	struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
 	struct nvme_tcp_queue *queue = &ctrl->queues[qid];
-	struct linger sol = { .l_onoff = 1, .l_linger = 0 };
 	int ret, opt, rcv_pdu_size;
 
 	queue->ctrl = ctrl;
@@ -1361,13 +1360,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
 	 * close. This is done to prevent stale data from being sent should
 	 * the network connection be restored before TCP times out.
 	 */
-	ret = kernel_setsockopt(queue->sock, SOL_SOCKET, SO_LINGER,
-			(char *)&sol, sizeof(sol));
-	if (ret) {
-		dev_err(nctrl->device,
-			"failed to set SO_LINGER sock opt %d\n", ret);
-		goto err_sock;
-	}
+	sock_no_linger(queue->sock->sk);
 
 	if (so_priority > 0) {
 		ret = kernel_setsockopt(queue->sock, SOL_SOCKET, SO_PRIORITY,
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index 40757a63f455..e0801494b097 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -1429,7 +1429,6 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue)
 {
 	struct socket *sock = queue->sock;
 	struct inet_sock *inet = inet_sk(sock->sk);
-	struct linger sol = { .l_onoff = 1, .l_linger = 0 };
 	int ret;
 
 	ret = kernel_getsockname(sock,
@@ -1447,10 +1446,7 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue)
 	 * close. This is done to prevent stale data from being sent should
 	 * the network connection be restored before TCP times out.
 	 */
-	ret = kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER,
-			(char *)&sol, sizeof(sol));
-	if (ret)
-		return ret;
+	sock_no_linger(sock->sk);
 
 	if (so_priority > 0) {
 		ret = kernel_setsockopt(sock, SOL_SOCKET, SO_PRIORITY,
diff --git a/include/net/sock.h b/include/net/sock.h
index 2ec085044790..6ed00bf009bb 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2688,6 +2688,7 @@ static inline bool sk_dev_equal_l3scope(struct sock *sk, int dif)
 
 void sock_def_readable(struct sock *sk);
 
+void sock_no_linger(struct sock *sk);
 void sock_set_reuseaddr(struct sock *sk);
 
 #endif	/* _SOCK_H */
diff --git a/net/core/sock.c b/net/core/sock.c
index 18eb84fdf5fb..f0f09524911c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -720,6 +720,15 @@ void sock_set_reuseaddr(struct sock *sk)
 }
 EXPORT_SYMBOL(sock_set_reuseaddr);
 
+void sock_no_linger(struct sock *sk)
+{
+	lock_sock(sk);
+	sk->sk_lingertime = 0;
+	sock_set_flag(sk, SOCK_LINGER);
+	release_sock(sk);
+}
+EXPORT_SYMBOL(sock_no_linger);
+
 /*
  *	This is meant for all protocols to use and covers goings on
  *	at the socket level. Everything here is generic.
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index 3c69361d21c7..d640e210b97b 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -73,7 +73,6 @@ void rds_tcp_listen_data_ready(struct sock *sk);
 int rds_tcp_accept_one(struct socket *sock);
 int rds_tcp_keepalive(struct socket *sock);
 void *rds_tcp_listen_sock_def_readable(struct net *net);
-void rds_tcp_set_linger(struct socket *sock);
 
 /* tcp_recv.c */
 int rds_tcp_recv_init(void);
diff --git a/net/rds/tcp_connect.c b/net/rds/tcp_connect.c
index 008f50fb25dd..4e64598176b0 100644
--- a/net/rds/tcp_connect.c
+++ b/net/rds/tcp_connect.c
@@ -207,7 +207,7 @@ void rds_tcp_conn_path_shutdown(struct rds_conn_path *cp)
 
 	if (sock) {
 		if (rds_destroy_pending(cp->cp_conn))
-			rds_tcp_set_linger(sock);
+			sock_no_linger(sock->sk);
 		sock->ops->shutdown(sock, RCV_SHUTDOWN | SEND_SHUTDOWN);
 		lock_sock(sock->sk);
 		rds_tcp_restore_callbacks(sock, tc); /* tc->tc_sock = NULL */
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 810a3a49e947..bbb31b9c0b39 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -111,17 +111,6 @@ struct rds_tcp_connection *rds_tcp_accept_one_path(struct rds_connection *conn)
 	return NULL;
 }
 
-void rds_tcp_set_linger(struct socket *sock)
-{
-	struct linger no_linger = {
-		.l_onoff = 1,
-		.l_linger = 0,
-	};
-
-	kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER,
-			  (char *)&no_linger, sizeof(no_linger));
-}
-
 int rds_tcp_accept_one(struct socket *sock)
 {
 	struct socket *new_sock = NULL;
@@ -241,7 +230,7 @@ rst_nsk:
 	 * be pending on it. By setting linger, we achieve the side-effect
 	 * of avoiding TIME_WAIT state on new_sock.
 	 */
-	rds_tcp_set_linger(new_sock);
+	sock_no_linger(new_sock->sk);
 	kernel_sock_shutdown(new_sock, SHUT_RDWR);
 	ret = 0;
 out:
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 023514e392b3..6773dacc64d8 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -323,17 +323,9 @@ static int svc_tcp_has_wspace(struct svc_xprt *xprt)
 
 static void svc_tcp_kill_temp_xprt(struct svc_xprt *xprt)
 {
-	struct svc_sock *svsk;
-	struct socket *sock;
-	struct linger no_linger = {
-		.l_onoff = 1,
-		.l_linger = 0,
-	};
+	struct svc_sock *svsk = container_of(xprt, struct svc_sock, sk_xprt);
 
-	svsk = container_of(xprt, struct svc_sock, sk_xprt);
-	sock = svsk->sk_sock;
-	kernel_setsockopt(sock, SOL_SOCKET, SO_LINGER,
-			  (char *)&no_linger, sizeof(no_linger));
+	sock_no_linger(svsk->sk_sock->sk);
 }
 
 /*
-- 
cgit v1.2.3-59-g8ed1b


From 6e43496745e75ac49d644df984d2f4ee5b5b6b4e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 28 May 2020 07:12:11 +0200
Subject: net: add sock_set_priority

Add a helper to directly set the SO_PRIORITY sockopt from kernel space
without going through a fake uaccess.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/nvme/host/tcp.c   | 12 ++----------
 drivers/nvme/target/tcp.c | 18 ++++--------------
 include/net/sock.h        |  1 +
 net/core/sock.c           |  8 ++++++++
 4 files changed, 15 insertions(+), 24 deletions(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index e72d87482eb7..a307972d33a0 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1362,16 +1362,8 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
 	 */
 	sock_no_linger(queue->sock->sk);
 
-	if (so_priority > 0) {
-		ret = kernel_setsockopt(queue->sock, SOL_SOCKET, SO_PRIORITY,
-				(char *)&so_priority, sizeof(so_priority));
-		if (ret) {
-			dev_err(ctrl->ctrl.device,
-				"failed to set SO_PRIORITY sock opt, ret %d\n",
-				ret);
-			goto err_sock;
-		}
-	}
+	if (so_priority > 0)
+		sock_set_priority(queue->sock->sk, so_priority);
 
 	/* Set socket type of service */
 	if (nctrl->opts->tos >= 0) {
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index e0801494b097..f3088156d01d 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -1448,12 +1448,8 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue)
 	 */
 	sock_no_linger(sock->sk);
 
-	if (so_priority > 0) {
-		ret = kernel_setsockopt(sock, SOL_SOCKET, SO_PRIORITY,
-				(char *)&so_priority, sizeof(so_priority));
-		if (ret)
-			return ret;
-	}
+	if (so_priority > 0)
+		sock_set_priority(sock->sk, so_priority);
 
 	/* Set socket type of service */
 	if (inet->rcv_tos > 0) {
@@ -1638,14 +1634,8 @@ static int nvmet_tcp_add_port(struct nvmet_port *nport)
 		goto err_sock;
 	}
 
-	if (so_priority > 0) {
-		ret = kernel_setsockopt(port->sock, SOL_SOCKET, SO_PRIORITY,
-				(char *)&so_priority, sizeof(so_priority));
-		if (ret) {
-			pr_err("failed to set SO_PRIORITY sock opt %d\n", ret);
-			goto err_sock;
-		}
-	}
+	if (so_priority > 0)
+		sock_set_priority(port->sock->sk, so_priority);
 
 	ret = kernel_bind(port->sock, (struct sockaddr *)&port->addr,
 			sizeof(port->addr));
diff --git a/include/net/sock.h b/include/net/sock.h
index 6ed00bf009bb..a3a43141a4be 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2689,6 +2689,7 @@ static inline bool sk_dev_equal_l3scope(struct sock *sk, int dif)
 void sock_def_readable(struct sock *sk);
 
 void sock_no_linger(struct sock *sk);
+void sock_set_priority(struct sock *sk, u32 priority);
 void sock_set_reuseaddr(struct sock *sk);
 
 #endif	/* _SOCK_H */
diff --git a/net/core/sock.c b/net/core/sock.c
index f0f09524911c..ceda1a9248b3 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -729,6 +729,14 @@ void sock_no_linger(struct sock *sk)
 }
 EXPORT_SYMBOL(sock_no_linger);
 
+void sock_set_priority(struct sock *sk, u32 priority)
+{
+	lock_sock(sk);
+	sk->sk_priority = priority;
+	release_sock(sk);
+}
+EXPORT_SYMBOL(sock_set_priority);
+
 /*
  *	This is meant for all protocols to use and covers goings on
  *	at the socket level. Everything here is generic.
-- 
cgit v1.2.3-59-g8ed1b


From 76ee0785f42afbc0418072b7179d95f450d3c9a8 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 28 May 2020 07:12:12 +0200
Subject: net: add sock_set_sndtimeo

Add a helper to directly set the SO_SNDTIMEO_NEW sockopt from kernel
space without going through a fake uaccess.  The interface is
simplified to only pass the seconds value, as that is the only
thing needed at the moment.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/dlm/lowcomms.c  |  8 ++------
 include/net/sock.h |  1 +
 net/core/sock.c    | 11 +++++++++++
 3 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 88f2574ca63a..b79711d0aac7 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -918,7 +918,6 @@ static void sctp_connect_to_sock(struct connection *con)
 	int result;
 	int addr_len;
 	struct socket *sock;
-	struct __kernel_sock_timeval tv = { .tv_sec = 5, .tv_usec = 0 };
 
 	if (con->nodeid == 0) {
 		log_print("attempt to connect sock 0 foiled");
@@ -970,13 +969,10 @@ static void sctp_connect_to_sock(struct connection *con)
 	 * since O_NONBLOCK argument in connect() function does not work here,
 	 * then, we should restore the default value of this attribute.
 	 */
-	kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO_NEW, (char *)&tv,
-			  sizeof(tv));
+	sock_set_sndtimeo(sock->sk, 5);
 	result = sock->ops->connect(sock, (struct sockaddr *)&daddr, addr_len,
 				   0);
-	memset(&tv, 0, sizeof(tv));
-	kernel_setsockopt(sock, SOL_SOCKET, SO_SNDTIMEO_NEW, (char *)&tv,
-			  sizeof(tv));
+	sock_set_sndtimeo(sock->sk, 0);
 
 	if (result == -EINPROGRESS)
 		result = 0;
diff --git a/include/net/sock.h b/include/net/sock.h
index a3a43141a4be..9a7b9e98685a 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2691,5 +2691,6 @@ void sock_def_readable(struct sock *sk);
 void sock_no_linger(struct sock *sk);
 void sock_set_priority(struct sock *sk, u32 priority);
 void sock_set_reuseaddr(struct sock *sk);
+void sock_set_sndtimeo(struct sock *sk, s64 secs);
 
 #endif	/* _SOCK_H */
diff --git a/net/core/sock.c b/net/core/sock.c
index ceda1a9248b3..d3b1d61e4f76 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -737,6 +737,17 @@ void sock_set_priority(struct sock *sk, u32 priority)
 }
 EXPORT_SYMBOL(sock_set_priority);
 
+void sock_set_sndtimeo(struct sock *sk, s64 secs)
+{
+	lock_sock(sk);
+	if (secs && secs < MAX_SCHEDULE_TIMEOUT / HZ - 1)
+		sk->sk_sndtimeo = secs * HZ;
+	else
+		sk->sk_sndtimeo = MAX_SCHEDULE_TIMEOUT;
+	release_sock(sk);
+}
+EXPORT_SYMBOL(sock_set_sndtimeo);
+
 /*
  *	This is meant for all protocols to use and covers goings on
  *	at the socket level. Everything here is generic.
-- 
cgit v1.2.3-59-g8ed1b


From 7594888c782e735f8a7b110094307a4dbe7b3f03 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 28 May 2020 07:12:13 +0200
Subject: net: add sock_bindtoindex

Add a helper to directly set the SO_BINDTOIFINDEX sockopt from kernel
space without going through a fake uaccess.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h        |  1 +
 net/core/sock.c           | 21 +++++++++++++++------
 net/ipv4/udp_tunnel.c     |  4 +---
 net/ipv6/ip6_udp_tunnel.c |  4 +---
 4 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index 9a7b9e98685a..cdec7bc055d5 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2688,6 +2688,7 @@ static inline bool sk_dev_equal_l3scope(struct sock *sk, int dif)
 
 void sock_def_readable(struct sock *sk);
 
+int sock_bindtoindex(struct sock *sk, int ifindex);
 void sock_no_linger(struct sock *sk);
 void sock_set_priority(struct sock *sk, u32 priority);
 void sock_set_reuseaddr(struct sock *sk);
diff --git a/net/core/sock.c b/net/core/sock.c
index d3b1d61e4f76..23f80880fbb2 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -566,7 +566,7 @@ struct dst_entry *sk_dst_check(struct sock *sk, u32 cookie)
 }
 EXPORT_SYMBOL(sk_dst_check);
 
-static int sock_setbindtodevice_locked(struct sock *sk, int ifindex)
+static int sock_bindtoindex_locked(struct sock *sk, int ifindex)
 {
 	int ret = -ENOPROTOOPT;
 #ifdef CONFIG_NETDEVICES
@@ -594,6 +594,18 @@ out:
 	return ret;
 }
 
+int sock_bindtoindex(struct sock *sk, int ifindex)
+{
+	int ret;
+
+	lock_sock(sk);
+	ret = sock_bindtoindex_locked(sk, ifindex);
+	release_sock(sk);
+
+	return ret;
+}
+EXPORT_SYMBOL(sock_bindtoindex);
+
 static int sock_setbindtodevice(struct sock *sk, char __user *optval,
 				int optlen)
 {
@@ -634,10 +646,7 @@ static int sock_setbindtodevice(struct sock *sk, char __user *optval,
 			goto out;
 	}
 
-	lock_sock(sk);
-	ret = sock_setbindtodevice_locked(sk, index);
-	release_sock(sk);
-
+	return sock_bindtoindex(sk, index);
 out:
 #endif
 
@@ -1216,7 +1225,7 @@ set_rcvbuf:
 		break;
 
 	case SO_BINDTOIFINDEX:
-		ret = sock_setbindtodevice_locked(sk, val);
+		ret = sock_bindtoindex_locked(sk, val);
 		break;
 
 	default:
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
index 150e6f0fdbf5..2158e8bddf41 100644
--- a/net/ipv4/udp_tunnel.c
+++ b/net/ipv4/udp_tunnel.c
@@ -22,9 +22,7 @@ int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg,
 		goto error;
 
 	if (cfg->bind_ifindex) {
-		err = kernel_setsockopt(sock, SOL_SOCKET, SO_BINDTOIFINDEX,
-					(void *)&cfg->bind_ifindex,
-					sizeof(cfg->bind_ifindex));
+		err = sock_bindtoindex(sock->sk, cfg->bind_ifindex);
 		if (err < 0)
 			goto error;
 	}
diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c
index 58956a6b66a2..6523609516d2 100644
--- a/net/ipv6/ip6_udp_tunnel.c
+++ b/net/ipv6/ip6_udp_tunnel.c
@@ -33,9 +33,7 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
 			goto error;
 	}
 	if (cfg->bind_ifindex) {
-		err = kernel_setsockopt(sock, SOL_SOCKET, SO_BINDTOIFINDEX,
-					(void *)&cfg->bind_ifindex,
-					sizeof(cfg->bind_ifindex));
+		err = sock_bindtoindex(sock->sk, cfg->bind_ifindex);
 		if (err < 0)
 			goto error;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 783da70e83967efeacf3c02c9dcfdc2b17bd62eb Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 28 May 2020 07:12:14 +0200
Subject: net: add sock_enable_timestamps

Add a helper to directly enable timestamps instead of setting the
SO_TIMESTAMP* sockopts from kernel space and going through a fake
uaccess.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h       |  1 +
 net/core/sock.c          | 47 +++++++++++++++++++++++++++++------------------
 net/rxrpc/local_object.c |  8 +-------
 3 files changed, 31 insertions(+), 25 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index cdec7bc055d5..99ef43508d2b 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2689,6 +2689,7 @@ static inline bool sk_dev_equal_l3scope(struct sock *sk, int dif)
 void sock_def_readable(struct sock *sk);
 
 int sock_bindtoindex(struct sock *sk, int ifindex);
+void sock_enable_timestamps(struct sock *sk);
 void sock_no_linger(struct sock *sk);
 void sock_set_priority(struct sock *sk, u32 priority);
 void sock_set_reuseaddr(struct sock *sk);
diff --git a/net/core/sock.c b/net/core/sock.c
index 23f80880fbb2..e4a4dd2b3d8b 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -757,6 +757,28 @@ void sock_set_sndtimeo(struct sock *sk, s64 secs)
 }
 EXPORT_SYMBOL(sock_set_sndtimeo);
 
+static void __sock_set_timestamps(struct sock *sk, bool val, bool new, bool ns)
+{
+	if (val)  {
+		sock_valbool_flag(sk, SOCK_TSTAMP_NEW, new);
+		sock_valbool_flag(sk, SOCK_RCVTSTAMPNS, ns);
+		sock_set_flag(sk, SOCK_RCVTSTAMP);
+		sock_enable_timestamp(sk, SOCK_TIMESTAMP);
+	} else {
+		sock_reset_flag(sk, SOCK_RCVTSTAMP);
+		sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
+		sock_reset_flag(sk, SOCK_TSTAMP_NEW);
+	}
+}
+
+void sock_enable_timestamps(struct sock *sk)
+{
+	lock_sock(sk);
+	__sock_set_timestamps(sk, true, false, true);
+	release_sock(sk);
+}
+EXPORT_SYMBOL(sock_enable_timestamps);
+
 /*
  *	This is meant for all protocols to use and covers goings on
  *	at the socket level. Everything here is generic.
@@ -948,28 +970,17 @@ set_rcvbuf:
 		break;
 
 	case SO_TIMESTAMP_OLD:
+		__sock_set_timestamps(sk, valbool, false, false);
+		break;
 	case SO_TIMESTAMP_NEW:
+		__sock_set_timestamps(sk, valbool, true, false);
+		break;
 	case SO_TIMESTAMPNS_OLD:
+		__sock_set_timestamps(sk, valbool, false, true);
+		break;
 	case SO_TIMESTAMPNS_NEW:
-		if (valbool)  {
-			if (optname == SO_TIMESTAMP_NEW || optname == SO_TIMESTAMPNS_NEW)
-				sock_set_flag(sk, SOCK_TSTAMP_NEW);
-			else
-				sock_reset_flag(sk, SOCK_TSTAMP_NEW);
-
-			if (optname == SO_TIMESTAMP_OLD || optname == SO_TIMESTAMP_NEW)
-				sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
-			else
-				sock_set_flag(sk, SOCK_RCVTSTAMPNS);
-			sock_set_flag(sk, SOCK_RCVTSTAMP);
-			sock_enable_timestamp(sk, SOCK_TIMESTAMP);
-		} else {
-			sock_reset_flag(sk, SOCK_RCVTSTAMP);
-			sock_reset_flag(sk, SOCK_RCVTSTAMPNS);
-			sock_reset_flag(sk, SOCK_TSTAMP_NEW);
-		}
+		__sock_set_timestamps(sk, valbool, true, true);
 		break;
-
 	case SO_TIMESTAMPING_NEW:
 		sock_set_flag(sk, SOCK_TSTAMP_NEW);
 		/* fall through */
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 01135e54d95d..5ea2bd01fdd5 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -189,13 +189,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
 		}
 
 		/* We want receive timestamps. */
-		opt = 1;
-		ret = kernel_setsockopt(local->socket, SOL_SOCKET, SO_TIMESTAMPNS_OLD,
-					(char *)&opt, sizeof(opt));
-		if (ret < 0) {
-			_debug("setsockopt failed");
-			goto error;
-		}
+		sock_enable_timestamps(local->socket->sk);
 		break;
 
 	default:
-- 
cgit v1.2.3-59-g8ed1b


From ce3d9544cecacd40389c399d2b7ca31acc533b70 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 28 May 2020 07:12:15 +0200
Subject: net: add sock_set_keepalive

Add a helper to directly set the SO_KEEPALIVE sockopt from kernel space
without going through a fake uaccess.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/dlm/lowcomms.c     |  6 +-----
 include/net/sock.h    |  1 +
 net/core/sock.c       | 10 ++++++++++
 net/rds/tcp_listen.c  |  6 +-----
 net/sunrpc/xprtsock.c |  4 +---
 5 files changed, 14 insertions(+), 13 deletions(-)

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index b79711d0aac7..b6e6dba28154 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1142,11 +1142,7 @@ static struct socket *tcp_create_listen_sock(struct connection *con,
 		con->sock = NULL;
 		goto create_out;
 	}
-	result = kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
-				 (char *)&one, sizeof(one));
-	if (result < 0) {
-		log_print("Set keepalive failed: %d", result);
-	}
+	sock_set_keepalive(sock->sk);
 
 	result = sock->ops->listen(sock, 5);
 	if (result < 0) {
diff --git a/include/net/sock.h b/include/net/sock.h
index 99ef43508d2b..dc08c176238f 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2691,6 +2691,7 @@ void sock_def_readable(struct sock *sk);
 int sock_bindtoindex(struct sock *sk, int ifindex);
 void sock_enable_timestamps(struct sock *sk);
 void sock_no_linger(struct sock *sk);
+void sock_set_keepalive(struct sock *sk);
 void sock_set_priority(struct sock *sk, u32 priority);
 void sock_set_reuseaddr(struct sock *sk);
 void sock_set_sndtimeo(struct sock *sk, s64 secs);
diff --git a/net/core/sock.c b/net/core/sock.c
index e4a4dd2b3d8b..728f5fb156a0 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -779,6 +779,16 @@ void sock_enable_timestamps(struct sock *sk)
 }
 EXPORT_SYMBOL(sock_enable_timestamps);
 
+void sock_set_keepalive(struct sock *sk)
+{
+	lock_sock(sk);
+	if (sk->sk_prot->keepalive)
+		sk->sk_prot->keepalive(sk, true);
+	sock_valbool_flag(sk, SOCK_KEEPOPEN, true);
+	release_sock(sk);
+}
+EXPORT_SYMBOL(sock_set_keepalive);
+
 /*
  *	This is meant for all protocols to use and covers goings on
  *	at the socket level. Everything here is generic.
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index bbb31b9c0b39..d8bd13276959 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -43,13 +43,9 @@ int rds_tcp_keepalive(struct socket *sock)
 	/* values below based on xs_udp_default_timeout */
 	int keepidle = 5; /* send a probe 'keepidle' secs after last data */
 	int keepcnt = 5; /* number of unack'ed probes before declaring dead */
-	int keepalive = 1;
 	int ret = 0;
 
-	ret = kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
-				(char *)&keepalive, sizeof(keepalive));
-	if (ret < 0)
-		goto bail;
+	sock_set_keepalive(sock->sk);
 
 	ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_KEEPCNT,
 				(char *)&keepcnt, sizeof(keepcnt));
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 845d0be805ec..30082cd03996 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2110,7 +2110,6 @@ static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
 	struct sock_xprt *transport = container_of(xprt, struct sock_xprt, xprt);
 	unsigned int keepidle;
 	unsigned int keepcnt;
-	unsigned int opt_on = 1;
 	unsigned int timeo;
 
 	spin_lock(&xprt->transport_lock);
@@ -2122,8 +2121,7 @@ static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
 	spin_unlock(&xprt->transport_lock);
 
 	/* TCP Keepalive options */
-	kernel_setsockopt(sock, SOL_SOCKET, SO_KEEPALIVE,
-			(char *)&opt_on, sizeof(opt_on));
+	sock_set_keepalive(sock->sk);
 	kernel_setsockopt(sock, SOL_TCP, TCP_KEEPIDLE,
 			(char *)&keepidle, sizeof(keepidle));
 	kernel_setsockopt(sock, SOL_TCP, TCP_KEEPINTVL,
-- 
cgit v1.2.3-59-g8ed1b


From 26cfabf9cdd273650126d84a48a7f8dedbcded48 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 28 May 2020 07:12:16 +0200
Subject: net: add sock_set_rcvbuf

Add a helper to directly set the SO_RCVBUFFORCE sockopt from kernel space
without going through a fake uaccess.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/dlm/lowcomms.c  |  7 +------
 include/net/sock.h |  1 +
 net/core/sock.c    | 59 +++++++++++++++++++++++++++++-------------------------
 3 files changed, 34 insertions(+), 33 deletions(-)

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index b6e6dba28154..2822a430a2b4 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1180,7 +1180,6 @@ static int sctp_listen_for_all(void)
 	struct socket *sock = NULL;
 	int result = -EINVAL;
 	struct connection *con = nodeid2con(0, GFP_NOFS);
-	int bufsize = NEEDED_RMEM;
 	int one = 1;
 
 	if (!con)
@@ -1195,11 +1194,7 @@ static int sctp_listen_for_all(void)
 		goto out;
 	}
 
-	result = kernel_setsockopt(sock, SOL_SOCKET, SO_RCVBUFFORCE,
-				 (char *)&bufsize, sizeof(bufsize));
-	if (result)
-		log_print("Error increasing buffer space on socket %d", result);
-
+	sock_set_rcvbuf(sock->sk, NEEDED_RMEM);
 	result = kernel_setsockopt(sock, SOL_SCTP, SCTP_NODELAY, (char *)&one,
 				   sizeof(one));
 	if (result < 0)
diff --git a/include/net/sock.h b/include/net/sock.h
index dc08c176238f..c997289aabbf 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2693,6 +2693,7 @@ void sock_enable_timestamps(struct sock *sk);
 void sock_no_linger(struct sock *sk);
 void sock_set_keepalive(struct sock *sk);
 void sock_set_priority(struct sock *sk, u32 priority);
+void sock_set_rcvbuf(struct sock *sk, int val);
 void sock_set_reuseaddr(struct sock *sk);
 void sock_set_sndtimeo(struct sock *sk, s64 secs);
 
diff --git a/net/core/sock.c b/net/core/sock.c
index 728f5fb156a0..3c6ebf952e9a 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -789,6 +789,35 @@ void sock_set_keepalive(struct sock *sk)
 }
 EXPORT_SYMBOL(sock_set_keepalive);
 
+static void __sock_set_rcvbuf(struct sock *sk, int val)
+{
+	/* Ensure val * 2 fits into an int, to prevent max_t() from treating it
+	 * as a negative value.
+	 */
+	val = min_t(int, val, INT_MAX / 2);
+	sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
+
+	/* We double it on the way in to account for "struct sk_buff" etc.
+	 * overhead.   Applications assume that the SO_RCVBUF setting they make
+	 * will allow that much actual data to be received on that socket.
+	 *
+	 * Applications are unaware that "struct sk_buff" and other overheads
+	 * allocate from the receive buffer during socket buffer allocation.
+	 *
+	 * And after considering the possible alternatives, returning the value
+	 * we actually used in getsockopt is the most desirable behavior.
+	 */
+	WRITE_ONCE(sk->sk_rcvbuf, max_t(int, val * 2, SOCK_MIN_RCVBUF));
+}
+
+void sock_set_rcvbuf(struct sock *sk, int val)
+{
+	lock_sock(sk);
+	__sock_set_rcvbuf(sk, val);
+	release_sock(sk);
+}
+EXPORT_SYMBOL(sock_set_rcvbuf);
+
 /*
  *	This is meant for all protocols to use and covers goings on
  *	at the socket level. Everything here is generic.
@@ -885,30 +914,7 @@ set_sndbuf:
 		 * play 'guess the biggest size' games. RCVBUF/SNDBUF
 		 * are treated in BSD as hints
 		 */
-		val = min_t(u32, val, sysctl_rmem_max);
-set_rcvbuf:
-		/* Ensure val * 2 fits into an int, to prevent max_t()
-		 * from treating it as a negative value.
-		 */
-		val = min_t(int, val, INT_MAX / 2);
-		sk->sk_userlocks |= SOCK_RCVBUF_LOCK;
-		/*
-		 * We double it on the way in to account for
-		 * "struct sk_buff" etc. overhead.   Applications
-		 * assume that the SO_RCVBUF setting they make will
-		 * allow that much actual data to be received on that
-		 * socket.
-		 *
-		 * Applications are unaware that "struct sk_buff" and
-		 * other overheads allocate from the receive buffer
-		 * during socket buffer allocation.
-		 *
-		 * And after considering the possible alternatives,
-		 * returning the value we actually used in getsockopt
-		 * is the most desirable behavior.
-		 */
-		WRITE_ONCE(sk->sk_rcvbuf,
-			   max_t(int, val * 2, SOCK_MIN_RCVBUF));
+		__sock_set_rcvbuf(sk, min_t(u32, val, sysctl_rmem_max));
 		break;
 
 	case SO_RCVBUFFORCE:
@@ -920,9 +926,8 @@ set_rcvbuf:
 		/* No negative values (to prevent underflow, as val will be
 		 * multiplied by 2).
 		 */
-		if (val < 0)
-			val = 0;
-		goto set_rcvbuf;
+		__sock_set_rcvbuf(sk, max(val, 0));
+		break;
 
 	case SO_KEEPALIVE:
 		if (sk->sk_prot->keepalive)
-- 
cgit v1.2.3-59-g8ed1b


From fe31a326a4aadb4a3ba2b21deacc380d06802737 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 28 May 2020 07:12:17 +0200
Subject: net: add sock_set_reuseport

Add a helper to directly set the SO_REUSEPORT sockopt from kernel space
without going through a fake uaccess.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/sock.h    |  1 +
 net/core/sock.c       |  8 ++++++++
 net/sunrpc/xprtsock.c | 17 +----------------
 3 files changed, 10 insertions(+), 16 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index c997289aabbf..d994daa418ec 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2695,6 +2695,7 @@ void sock_set_keepalive(struct sock *sk);
 void sock_set_priority(struct sock *sk, u32 priority);
 void sock_set_rcvbuf(struct sock *sk, int val);
 void sock_set_reuseaddr(struct sock *sk);
+void sock_set_reuseport(struct sock *sk);
 void sock_set_sndtimeo(struct sock *sk, s64 secs);
 
 #endif	/* _SOCK_H */
diff --git a/net/core/sock.c b/net/core/sock.c
index 3c6ebf952e9a..2ca3425b519c 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -729,6 +729,14 @@ void sock_set_reuseaddr(struct sock *sk)
 }
 EXPORT_SYMBOL(sock_set_reuseaddr);
 
+void sock_set_reuseport(struct sock *sk)
+{
+	lock_sock(sk);
+	sk->sk_reuseport = true;
+	release_sock(sk);
+}
+EXPORT_SYMBOL(sock_set_reuseport);
+
 void sock_no_linger(struct sock *sk)
 {
 	lock_sock(sk);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 30082cd03996..399848c2bcb2 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -1594,21 +1594,6 @@ static int xs_get_random_port(void)
 	return rand + min;
 }
 
-/**
- * xs_set_reuseaddr_port - set the socket's port and address reuse options
- * @sock: socket
- *
- * Note that this function has to be called on all sockets that share the
- * same port, and it must be called before binding.
- */
-static void xs_sock_set_reuseport(struct socket *sock)
-{
-	int opt = 1;
-
-	kernel_setsockopt(sock, SOL_SOCKET, SO_REUSEPORT,
-			(char *)&opt, sizeof(opt));
-}
-
 static unsigned short xs_sock_getport(struct socket *sock)
 {
 	struct sockaddr_storage buf;
@@ -1801,7 +1786,7 @@ static struct socket *xs_create_sock(struct rpc_xprt *xprt,
 	xs_reclassify_socket(family, sock);
 
 	if (reuseport)
-		xs_sock_set_reuseport(sock);
+		sock_set_reuseport(sock->sk);
 
 	err = xs_bind(transport, sock);
 	if (err) {
-- 
cgit v1.2.3-59-g8ed1b


From db10538a4b997a77a1fd561adaaa58afc7dcfa2f Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 28 May 2020 07:12:18 +0200
Subject: tcp: add tcp_sock_set_cork

Add a helper to directly set the TCP_CORK sockopt from kernel space
without going through a fake uaccess.  Cleanup the callers to avoid
pointless wrappers now that this is a simple function call.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/block/drbd/drbd_int.h      | 14 -----------
 drivers/block/drbd/drbd_receiver.c |  4 +--
 drivers/block/drbd/drbd_worker.c   |  6 ++---
 fs/cifs/transport.c                |  8 ++----
 include/linux/tcp.h                |  2 ++
 net/ipv4/tcp.c                     | 51 ++++++++++++++++++++++++--------------
 net/rds/tcp_send.c                 |  9 ++-----
 7 files changed, 43 insertions(+), 51 deletions(-)

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index aae99a2d7bd4..3550adc93c68 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1570,20 +1570,6 @@ extern void drbd_set_recv_tcq(struct drbd_device *device, int tcq_enabled);
 extern void _drbd_clear_done_ee(struct drbd_device *device, struct list_head *to_be_freed);
 extern int drbd_connected(struct drbd_peer_device *);
 
-static inline void drbd_tcp_cork(struct socket *sock)
-{
-	int val = 1;
-	(void) kernel_setsockopt(sock, SOL_TCP, TCP_CORK,
-			(char*)&val, sizeof(val));
-}
-
-static inline void drbd_tcp_uncork(struct socket *sock)
-{
-	int val = 0;
-	(void) kernel_setsockopt(sock, SOL_TCP, TCP_CORK,
-			(char*)&val, sizeof(val));
-}
-
 static inline void drbd_tcp_nodelay(struct socket *sock)
 {
 	int val = 1;
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index c15e7083b13a..55ea907ad33c 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -6162,7 +6162,7 @@ void drbd_send_acks_wf(struct work_struct *ws)
 	rcu_read_unlock();
 
 	if (tcp_cork)
-		drbd_tcp_cork(connection->meta.socket);
+		tcp_sock_set_cork(connection->meta.socket->sk, true);
 
 	err = drbd_finish_peer_reqs(device);
 	kref_put(&device->kref, drbd_destroy_device);
@@ -6175,7 +6175,7 @@ void drbd_send_acks_wf(struct work_struct *ws)
 	}
 
 	if (tcp_cork)
-		drbd_tcp_uncork(connection->meta.socket);
+		tcp_sock_set_cork(connection->meta.socket->sk, false);
 
 	return;
 }
diff --git a/drivers/block/drbd/drbd_worker.c b/drivers/block/drbd/drbd_worker.c
index 0dc019da1f8d..2b89c9f2ca70 100644
--- a/drivers/block/drbd/drbd_worker.c
+++ b/drivers/block/drbd/drbd_worker.c
@@ -2098,7 +2098,7 @@ static void wait_for_work(struct drbd_connection *connection, struct list_head *
 	if (uncork) {
 		mutex_lock(&connection->data.mutex);
 		if (connection->data.socket)
-			drbd_tcp_uncork(connection->data.socket);
+			tcp_sock_set_cork(connection->data.socket->sk, false);
 		mutex_unlock(&connection->data.mutex);
 	}
 
@@ -2153,9 +2153,9 @@ static void wait_for_work(struct drbd_connection *connection, struct list_head *
 	mutex_lock(&connection->data.mutex);
 	if (connection->data.socket) {
 		if (cork)
-			drbd_tcp_cork(connection->data.socket);
+			tcp_sock_set_cork(connection->data.socket->sk, true);
 		else if (!uncork)
-			drbd_tcp_uncork(connection->data.socket);
+			tcp_sock_set_cork(connection->data.socket->sk, false);
 	}
 	mutex_unlock(&connection->data.mutex);
 }
diff --git a/fs/cifs/transport.c b/fs/cifs/transport.c
index c97570eb2c18..99760063e000 100644
--- a/fs/cifs/transport.c
+++ b/fs/cifs/transport.c
@@ -325,7 +325,6 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
 	size_t total_len = 0, sent, size;
 	struct socket *ssocket = server->ssocket;
 	struct msghdr smb_msg;
-	int val = 1;
 	__be32 rfc1002_marker;
 
 	if (cifs_rdma_enabled(server)) {
@@ -345,8 +344,7 @@ __smb_send_rqst(struct TCP_Server_Info *server, int num_rqst,
 	}
 
 	/* cork the socket */
-	kernel_setsockopt(ssocket, SOL_TCP, TCP_CORK,
-				(char *)&val, sizeof(val));
+	tcp_sock_set_cork(ssocket->sk, true);
 
 	for (j = 0; j < num_rqst; j++)
 		send_length += smb_rqst_len(server, &rqst[j]);
@@ -435,9 +433,7 @@ unmask:
 	}
 
 	/* uncork it */
-	val = 0;
-	kernel_setsockopt(ssocket, SOL_TCP, TCP_CORK,
-				(char *)&val, sizeof(val));
+	tcp_sock_set_cork(ssocket->sk, false);
 
 	if ((total_len > 0) && (total_len != send_length)) {
 		cifs_dbg(FYI, "partial send (wanted=%u sent=%zu): terminating session\n",
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index bf44e85d709d..889eeb2256c2 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -497,4 +497,6 @@ static inline u16 tcp_mss_clamp(const struct tcp_sock *tp, u16 mss)
 int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount,
 		  int shiftlen);
 
+void tcp_sock_set_cork(struct sock *sk, bool on);
+
 #endif	/* _LINUX_TCP_H */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 970064996377..e6cf702e16d6 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2801,6 +2801,37 @@ static void tcp_enable_tx_delay(void)
 	}
 }
 
+/* When set indicates to always queue non-full frames.  Later the user clears
+ * this option and we transmit any pending partial frames in the queue.  This is
+ * meant to be used alongside sendfile() to get properly filled frames when the
+ * user (for example) must write out headers with a write() call first and then
+ * use sendfile to send out the data parts.
+ *
+ * TCP_CORK can be set together with TCP_NODELAY and it is stronger than
+ * TCP_NODELAY.
+ */
+static void __tcp_sock_set_cork(struct sock *sk, bool on)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	if (on) {
+		tp->nonagle |= TCP_NAGLE_CORK;
+	} else {
+		tp->nonagle &= ~TCP_NAGLE_CORK;
+		if (tp->nonagle & TCP_NAGLE_OFF)
+			tp->nonagle |= TCP_NAGLE_PUSH;
+		tcp_push_pending_frames(sk);
+	}
+}
+
+void tcp_sock_set_cork(struct sock *sk, bool on)
+{
+	lock_sock(sk);
+	__tcp_sock_set_cork(sk, on);
+	release_sock(sk);
+}
+EXPORT_SYMBOL(tcp_sock_set_cork);
+
 /*
  *	Socket option code for TCP.
  */
@@ -2979,25 +3010,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 		break;
 
 	case TCP_CORK:
-		/* When set indicates to always queue non-full frames.
-		 * Later the user clears this option and we transmit
-		 * any pending partial frames in the queue.  This is
-		 * meant to be used alongside sendfile() to get properly
-		 * filled frames when the user (for example) must write
-		 * out headers with a write() call first and then use
-		 * sendfile to send out the data parts.
-		 *
-		 * TCP_CORK can be set together with TCP_NODELAY and it is
-		 * stronger than TCP_NODELAY.
-		 */
-		if (val) {
-			tp->nonagle |= TCP_NAGLE_CORK;
-		} else {
-			tp->nonagle &= ~TCP_NAGLE_CORK;
-			if (tp->nonagle&TCP_NAGLE_OFF)
-				tp->nonagle |= TCP_NAGLE_PUSH;
-			tcp_push_pending_frames(sk);
-		}
+		__tcp_sock_set_cork(sk, val);
 		break;
 
 	case TCP_KEEPIDLE:
diff --git a/net/rds/tcp_send.c b/net/rds/tcp_send.c
index 78a2554a4497..8c4d1d6e9249 100644
--- a/net/rds/tcp_send.c
+++ b/net/rds/tcp_send.c
@@ -38,23 +38,18 @@
 #include "rds.h"
 #include "tcp.h"
 
-static void rds_tcp_cork(struct socket *sock, int val)
-{
-	kernel_setsockopt(sock, SOL_TCP, TCP_CORK, (void *)&val, sizeof(val));
-}
-
 void rds_tcp_xmit_path_prepare(struct rds_conn_path *cp)
 {
 	struct rds_tcp_connection *tc = cp->cp_transport_data;
 
-	rds_tcp_cork(tc->t_sock, 1);
+	tcp_sock_set_cork(tc->t_sock->sk, true);
 }
 
 void rds_tcp_xmit_path_complete(struct rds_conn_path *cp)
 {
 	struct rds_tcp_connection *tc = cp->cp_transport_data;
 
-	rds_tcp_cork(tc->t_sock, 0);
+	tcp_sock_set_cork(tc->t_sock->sk, false);
 }
 
 /* the core send_sem serializes this with other xmit and shutdown */
-- 
cgit v1.2.3-59-g8ed1b


From 12abc5ee7873a085cc280240822b8ac53c86fecd Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 28 May 2020 07:12:19 +0200
Subject: tcp: add tcp_sock_set_nodelay

Add a helper to directly set the TCP_NODELAY sockopt from kernel space
without going through a fake uaccess.  Cleanup the callers to avoid
pointless wrappers now that this is a simple function call.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Sagi Grimberg <sagi@grimberg.me>
Acked-by: Jason Gunthorpe <jgg@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/block/drbd/drbd_int.h             |  7 ------
 drivers/block/drbd/drbd_main.c            |  2 +-
 drivers/block/drbd/drbd_receiver.c        |  4 ++--
 drivers/infiniband/sw/siw/siw_cm.c        | 24 ++++---------------
 drivers/nvme/host/tcp.c                   |  9 +------
 drivers/nvme/target/tcp.c                 | 12 ++--------
 drivers/target/iscsi/iscsi_target_login.c | 15 +++---------
 fs/cifs/connect.c                         | 10 ++------
 fs/dlm/lowcomms.c                         |  8 ++-----
 fs/ocfs2/cluster/tcp.c                    | 20 ++--------------
 include/linux/tcp.h                       |  1 +
 net/ceph/messenger.c                      | 11 ++-------
 net/ipv4/tcp.c                            | 39 ++++++++++++++++++++-----------
 net/rds/tcp.c                             | 11 +--------
 net/rds/tcp.h                             |  1 -
 net/rds/tcp_listen.c                      |  2 +-
 16 files changed, 49 insertions(+), 127 deletions(-)

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index 3550adc93c68..e24bba87c8e0 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1570,13 +1570,6 @@ extern void drbd_set_recv_tcq(struct drbd_device *device, int tcq_enabled);
 extern void _drbd_clear_done_ee(struct drbd_device *device, struct list_head *to_be_freed);
 extern int drbd_connected(struct drbd_peer_device *);
 
-static inline void drbd_tcp_nodelay(struct socket *sock)
-{
-	int val = 1;
-	(void) kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY,
-			(char*)&val, sizeof(val));
-}
-
 static inline void drbd_tcp_quickack(struct socket *sock)
 {
 	int val = 2;
diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index c094c3c2c5d4..45fbd526c453 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -660,7 +660,7 @@ static int __send_command(struct drbd_connection *connection, int vnr,
 	/* DRBD protocol "pings" are latency critical.
 	 * This is supposed to trigger tcp_push_pending_frames() */
 	if (!err && (cmd == P_PING || cmd == P_PING_ACK))
-		drbd_tcp_nodelay(sock->socket);
+		tcp_sock_set_nodelay(sock->socket->sk);
 
 	return err;
 }
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 55ea907ad33c..20a5e94494ac 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1051,8 +1051,8 @@ randomize:
 
 	/* we don't want delays.
 	 * we use TCP_CORK where appropriate, though */
-	drbd_tcp_nodelay(sock.socket);
-	drbd_tcp_nodelay(msock.socket);
+	tcp_sock_set_nodelay(sock.socket->sk);
+	tcp_sock_set_nodelay(msock.socket->sk);
 
 	connection->data.socket = sock.socket;
 	connection->meta.socket = msock.socket;
diff --git a/drivers/infiniband/sw/siw/siw_cm.c b/drivers/infiniband/sw/siw/siw_cm.c
index d1860f3e8740..1662216be66d 100644
--- a/drivers/infiniband/sw/siw/siw_cm.c
+++ b/drivers/infiniband/sw/siw/siw_cm.c
@@ -947,16 +947,8 @@ static void siw_accept_newconn(struct siw_cep *cep)
 	siw_cep_get(new_cep);
 	new_s->sk->sk_user_data = new_cep;
 
-	if (siw_tcp_nagle == false) {
-		int val = 1;
-
-		rv = kernel_setsockopt(new_s, SOL_TCP, TCP_NODELAY,
-				       (char *)&val, sizeof(val));
-		if (rv) {
-			siw_dbg_cep(cep, "setsockopt NODELAY error: %d\n", rv);
-			goto error;
-		}
-	}
+	if (siw_tcp_nagle == false)
+		tcp_sock_set_nodelay(new_s->sk);
 	new_cep->state = SIW_EPSTATE_AWAIT_MPAREQ;
 
 	rv = siw_cm_queue_work(new_cep, SIW_CM_WORK_MPATIMEOUT);
@@ -1386,16 +1378,8 @@ int siw_connect(struct iw_cm_id *id, struct iw_cm_conn_param *params)
 		siw_dbg_qp(qp, "kernel_bindconnect: error %d\n", rv);
 		goto error;
 	}
-	if (siw_tcp_nagle == false) {
-		int val = 1;
-
-		rv = kernel_setsockopt(s, SOL_TCP, TCP_NODELAY, (char *)&val,
-				       sizeof(val));
-		if (rv) {
-			siw_dbg_qp(qp, "setsockopt NODELAY error: %d\n", rv);
-			goto error;
-		}
-	}
+	if (siw_tcp_nagle == false)
+		tcp_sock_set_nodelay(s->sk);
 	cep = siw_cep_alloc(sdev);
 	if (!cep) {
 		rv = -ENOMEM;
diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index a307972d33a0..4e4a750ecdb9 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1346,14 +1346,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
 	}
 
 	/* Set TCP no delay */
-	opt = 1;
-	ret = kernel_setsockopt(queue->sock, IPPROTO_TCP,
-			TCP_NODELAY, (char *)&opt, sizeof(opt));
-	if (ret) {
-		dev_err(nctrl->device,
-			"failed to set TCP_NODELAY sock opt %d\n", ret);
-		goto err_sock;
-	}
+	tcp_sock_set_nodelay(queue->sock->sk);
 
 	/*
 	 * Cleanup whatever is sitting in the TCP transmit queue on socket
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index f3088156d01d..55bc4c3c0a74 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -1580,7 +1580,7 @@ static int nvmet_tcp_add_port(struct nvmet_port *nport)
 {
 	struct nvmet_tcp_port *port;
 	__kernel_sa_family_t af;
-	int opt, ret;
+	int ret;
 
 	port = kzalloc(sizeof(*port), GFP_KERNEL);
 	if (!port)
@@ -1625,15 +1625,7 @@ static int nvmet_tcp_add_port(struct nvmet_port *nport)
 	port->data_ready = port->sock->sk->sk_data_ready;
 	port->sock->sk->sk_data_ready = nvmet_tcp_listen_data_ready;
 	sock_set_reuseaddr(port->sock->sk);
-
-	opt = 1;
-	ret = kernel_setsockopt(port->sock, IPPROTO_TCP,
-			TCP_NODELAY, (char *)&opt, sizeof(opt));
-	if (ret) {
-		pr_err("failed to set TCP_NODELAY sock opt %d\n", ret);
-		goto err_sock;
-	}
-
+	tcp_sock_set_nodelay(port->sock->sk);
 	if (so_priority > 0)
 		sock_set_priority(port->sock->sk, so_priority);
 
diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c
index 91acb3f07b4c..b561b07a869a 100644
--- a/drivers/target/iscsi/iscsi_target_login.c
+++ b/drivers/target/iscsi/iscsi_target_login.c
@@ -897,20 +897,11 @@ int iscsit_setup_np(
 	/*
 	 * Set SO_REUSEADDR, and disable Nagel Algorithm with TCP_NODELAY.
 	 */
-	/* FIXME: Someone please explain why this is endian-safe */
-	opt = 1;
-	if (np->np_network_transport == ISCSI_TCP) {
-		ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_NODELAY,
-				(char *)&opt, sizeof(opt));
-		if (ret < 0) {
-			pr_err("kernel_setsockopt() for TCP_NODELAY"
-				" failed: %d\n", ret);
-			goto fail;
-		}
-	}
-
+	if (np->np_network_transport == ISCSI_TCP)
+		tcp_sock_set_nodelay(sock->sk);
 	sock_set_reuseaddr(sock->sk);
 
+	opt = 1;
 	ret = kernel_setsockopt(sock, IPPROTO_IP, IP_FREEBIND,
 			(char *)&opt, sizeof(opt));
 	if (ret < 0) {
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 28268ed461b8..ad8fb53b3682 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -3929,14 +3929,8 @@ generic_ip_connect(struct TCP_Server_Info *server)
 			socket->sk->sk_rcvbuf = 140 * 1024;
 	}
 
-	if (server->tcp_nodelay) {
-		int val = 1;
-		rc = kernel_setsockopt(socket, SOL_TCP, TCP_NODELAY,
-				(char *)&val, sizeof(val));
-		if (rc)
-			cifs_dbg(FYI, "set TCP_NODELAY socket option error %d\n",
-				 rc);
-	}
+	if (server->tcp_nodelay)
+		tcp_sock_set_nodelay(socket->sk);
 
 	cifs_dbg(FYI, "sndbuf %d rcvbuf %d rcvtimeo 0x%lx\n",
 		 socket->sk->sk_sndbuf,
diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 2822a430a2b4..69333728d871 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -1011,7 +1011,6 @@ static void tcp_connect_to_sock(struct connection *con)
 	struct sockaddr_storage saddr, src_addr;
 	int addr_len;
 	struct socket *sock = NULL;
-	int one = 1;
 	int result;
 
 	if (con->nodeid == 0) {
@@ -1060,8 +1059,7 @@ static void tcp_connect_to_sock(struct connection *con)
 	log_print("connecting to %d", con->nodeid);
 
 	/* Turn off Nagle's algorithm */
-	kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one,
-			  sizeof(one));
+	tcp_sock_set_nodelay(sock->sk);
 
 	result = sock->ops->connect(sock, (struct sockaddr *)&saddr, addr_len,
 				   O_NONBLOCK);
@@ -1103,7 +1101,6 @@ static struct socket *tcp_create_listen_sock(struct connection *con,
 {
 	struct socket *sock = NULL;
 	int result = 0;
-	int one = 1;
 	int addr_len;
 
 	if (dlm_local_addr[0]->ss_family == AF_INET)
@@ -1120,8 +1117,7 @@ static struct socket *tcp_create_listen_sock(struct connection *con,
 	}
 
 	/* Turn off Nagle's algorithm */
-	kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (char *)&one,
-			  sizeof(one));
+	tcp_sock_set_nodelay(sock->sk);
 
 	sock_set_reuseaddr(sock->sk);
 
diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 2c512b40a940..4c70fe9d19ab 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -1441,14 +1441,6 @@ static void o2net_rx_until_empty(struct work_struct *work)
 	sc_put(sc);
 }
 
-static int o2net_set_nodelay(struct socket *sock)
-{
-	int val = 1;
-
-	return kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY,
-				    (void *)&val, sizeof(val));
-}
-
 static int o2net_set_usertimeout(struct socket *sock)
 {
 	int user_timeout = O2NET_TCP_USER_TIMEOUT;
@@ -1636,11 +1628,7 @@ static void o2net_start_connect(struct work_struct *work)
 		goto out;
 	}
 
-	ret = o2net_set_nodelay(sc->sc_sock);
-	if (ret) {
-		mlog(ML_ERROR, "setting TCP_NODELAY failed with %d\n", ret);
-		goto out;
-	}
+	tcp_sock_set_nodelay(sc->sc_sock->sk);
 
 	ret = o2net_set_usertimeout(sock);
 	if (ret) {
@@ -1832,11 +1820,7 @@ static int o2net_accept_one(struct socket *sock, int *more)
 	*more = 1;
 	new_sock->sk->sk_allocation = GFP_ATOMIC;
 
-	ret = o2net_set_nodelay(new_sock);
-	if (ret) {
-		mlog(ML_ERROR, "setting TCP_NODELAY failed with %d\n", ret);
-		goto out;
-	}
+	tcp_sock_set_nodelay(new_sock->sk);
 
 	ret = o2net_set_usertimeout(new_sock);
 	if (ret) {
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 889eeb2256c2..9e42c7fe50a8 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -498,5 +498,6 @@ int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount,
 		  int shiftlen);
 
 void tcp_sock_set_cork(struct sock *sk, bool on);
+void tcp_sock_set_nodelay(struct sock *sk);
 
 #endif	/* _LINUX_TCP_H */
diff --git a/net/ceph/messenger.c b/net/ceph/messenger.c
index f8ca5edc5f2c..27d6ab11f9ee 100644
--- a/net/ceph/messenger.c
+++ b/net/ceph/messenger.c
@@ -490,15 +490,8 @@ static int ceph_tcp_connect(struct ceph_connection *con)
 		return ret;
 	}
 
-	if (ceph_test_opt(from_msgr(con->msgr), TCP_NODELAY)) {
-		int optval = 1;
-
-		ret = kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY,
-					(char *)&optval, sizeof(optval));
-		if (ret)
-			pr_err("kernel_setsockopt(TCP_NODELAY) failed: %d",
-			       ret);
-	}
+	if (ceph_test_opt(from_msgr(con->msgr), TCP_NODELAY))
+		tcp_sock_set_nodelay(sock->sk);
 
 	con->sock = sock;
 	return 0;
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index e6cf702e16d6..a65f293a19fa 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2832,6 +2832,30 @@ void tcp_sock_set_cork(struct sock *sk, bool on)
 }
 EXPORT_SYMBOL(tcp_sock_set_cork);
 
+/* TCP_NODELAY is weaker than TCP_CORK, so that this option on corked socket is
+ * remembered, but it is not activated until cork is cleared.
+ *
+ * However, when TCP_NODELAY is set we make an explicit push, which overrides
+ * even TCP_CORK for currently queued segments.
+ */
+static void __tcp_sock_set_nodelay(struct sock *sk, bool on)
+{
+	if (on) {
+		tcp_sk(sk)->nonagle |= TCP_NAGLE_OFF|TCP_NAGLE_PUSH;
+		tcp_push_pending_frames(sk);
+	} else {
+		tcp_sk(sk)->nonagle &= ~TCP_NAGLE_OFF;
+	}
+}
+
+void tcp_sock_set_nodelay(struct sock *sk)
+{
+	lock_sock(sk);
+	__tcp_sock_set_nodelay(sk, true);
+	release_sock(sk);
+}
+EXPORT_SYMBOL(tcp_sock_set_nodelay);
+
 /*
  *	Socket option code for TCP.
  */
@@ -2929,20 +2953,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 		break;
 
 	case TCP_NODELAY:
-		if (val) {
-			/* TCP_NODELAY is weaker than TCP_CORK, so that
-			 * this option on corked socket is remembered, but
-			 * it is not activated until cork is cleared.
-			 *
-			 * However, when TCP_NODELAY is set we make
-			 * an explicit push, which overrides even TCP_CORK
-			 * for currently queued segments.
-			 */
-			tp->nonagle |= TCP_NAGLE_OFF|TCP_NAGLE_PUSH;
-			tcp_push_pending_frames(sk);
-		} else {
-			tp->nonagle &= ~TCP_NAGLE_OFF;
-		}
+		__tcp_sock_set_nodelay(sk, val);
 		break;
 
 	case TCP_THIN_LINEAR_TIMEOUTS:
diff --git a/net/rds/tcp.c b/net/rds/tcp.c
index 46782fac4c16..43db0eca911f 100644
--- a/net/rds/tcp.c
+++ b/net/rds/tcp.c
@@ -89,15 +89,6 @@ static struct ctl_table rds_tcp_sysctl_table[] = {
 	{ }
 };
 
-/* doing it this way avoids calling tcp_sk() */
-void rds_tcp_nonagle(struct socket *sock)
-{
-	int val = 1;
-
-	kernel_setsockopt(sock, SOL_TCP, TCP_NODELAY, (void *)&val,
-			      sizeof(val));
-}
-
 u32 rds_tcp_write_seq(struct rds_tcp_connection *tc)
 {
 	/* seq# of the last byte of data in tcp send buffer */
@@ -502,7 +493,7 @@ void rds_tcp_tune(struct socket *sock)
 	struct net *net = sock_net(sk);
 	struct rds_tcp_net *rtn = net_generic(net, rds_tcp_netid);
 
-	rds_tcp_nonagle(sock);
+	tcp_sock_set_nodelay(sock->sk);
 	lock_sock(sk);
 	if (rtn->sndbuf_size > 0) {
 		sk->sk_sndbuf = rtn->sndbuf_size;
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index d640e210b97b..f6d75d8cb167 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -50,7 +50,6 @@ struct rds_tcp_statistics {
 
 /* tcp.c */
 void rds_tcp_tune(struct socket *sock);
-void rds_tcp_nonagle(struct socket *sock);
 void rds_tcp_set_callbacks(struct socket *sock, struct rds_conn_path *cp);
 void rds_tcp_reset_callbacks(struct socket *sock, struct rds_conn_path *cp);
 void rds_tcp_restore_callbacks(struct socket *sock,
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index d8bd13276959..6f90ea077adc 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -288,7 +288,7 @@ struct socket *rds_tcp_listen_init(struct net *net, bool isv6)
 	}
 
 	sock->sk->sk_reuse = SK_CAN_REUSE;
-	rds_tcp_nonagle(sock);
+	tcp_sock_set_nodelay(sock->sk);
 
 	write_lock_bh(&sock->sk->sk_callback_lock);
 	sock->sk->sk_user_data = sock->sk->sk_data_ready;
-- 
cgit v1.2.3-59-g8ed1b


From ddd061b8daed3ce0c01109a69c9a2a9f9669f01a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 28 May 2020 07:12:20 +0200
Subject: tcp: add tcp_sock_set_quickack

Add a helper to directly set the TCP_QUICKACK sockopt from kernel space
without going through a fake uaccess.  Cleanup the callers to avoid
pointless wrappers now that this is a simple function call.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/block/drbd/drbd_int.h      |  7 -------
 drivers/block/drbd/drbd_receiver.c |  5 ++---
 include/linux/tcp.h                |  1 +
 net/ipv4/tcp.c                     | 39 +++++++++++++++++++++++++-------------
 4 files changed, 29 insertions(+), 23 deletions(-)

diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h
index e24bba87c8e0..14345a87c7cc 100644
--- a/drivers/block/drbd/drbd_int.h
+++ b/drivers/block/drbd/drbd_int.h
@@ -1570,13 +1570,6 @@ extern void drbd_set_recv_tcq(struct drbd_device *device, int tcq_enabled);
 extern void _drbd_clear_done_ee(struct drbd_device *device, struct list_head *to_be_freed);
 extern int drbd_connected(struct drbd_peer_device *);
 
-static inline void drbd_tcp_quickack(struct socket *sock)
-{
-	int val = 2;
-	(void) kernel_setsockopt(sock, SOL_TCP, TCP_QUICKACK,
-			(char*)&val, sizeof(val));
-}
-
 /* sets the number of 512 byte sectors of our virtual device */
 void drbd_set_my_capacity(struct drbd_device *device, sector_t size);
 
diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c
index 20a5e94494ac..3a3f2b6a821f 100644
--- a/drivers/block/drbd/drbd_receiver.c
+++ b/drivers/block/drbd/drbd_receiver.c
@@ -1223,7 +1223,7 @@ static int drbd_recv_header_maybe_unplug(struct drbd_connection *connection, str
 		 * quickly as possible, and let remote TCP know what we have
 		 * received so far. */
 		if (err == -EAGAIN) {
-			drbd_tcp_quickack(connection->data.socket);
+			tcp_sock_set_quickack(connection->data.socket->sk, 2);
 			drbd_unplug_all_devices(connection);
 		}
 		if (err > 0) {
@@ -4959,8 +4959,7 @@ static int receive_UnplugRemote(struct drbd_connection *connection, struct packe
 {
 	/* Make sure we've acked all the TCP data associated
 	 * with the data requests being unplugged */
-	drbd_tcp_quickack(connection->data.socket);
-
+	tcp_sock_set_quickack(connection->data.socket->sk, 2);
 	return 0;
 }
 
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 9e42c7fe50a8..2eaf8320b9db 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -499,5 +499,6 @@ int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount,
 
 void tcp_sock_set_cork(struct sock *sk, bool on);
 void tcp_sock_set_nodelay(struct sock *sk);
+void tcp_sock_set_quickack(struct sock *sk, int val);
 
 #endif	/* _LINUX_TCP_H */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index a65f293a19fa..27b5e7a4e2ef 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2856,6 +2856,31 @@ void tcp_sock_set_nodelay(struct sock *sk)
 }
 EXPORT_SYMBOL(tcp_sock_set_nodelay);
 
+static void __tcp_sock_set_quickack(struct sock *sk, int val)
+{
+	if (!val) {
+		inet_csk_enter_pingpong_mode(sk);
+		return;
+	}
+
+	inet_csk_exit_pingpong_mode(sk);
+	if ((1 << sk->sk_state) & (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) &&
+	    inet_csk_ack_scheduled(sk)) {
+		inet_csk(sk)->icsk_ack.pending |= ICSK_ACK_PUSHED;
+		tcp_cleanup_rbuf(sk, 1);
+		if (!(val & 1))
+			inet_csk_enter_pingpong_mode(sk);
+	}
+}
+
+void tcp_sock_set_quickack(struct sock *sk, int val)
+{
+	lock_sock(sk);
+	__tcp_sock_set_quickack(sk, val);
+	release_sock(sk);
+}
+EXPORT_SYMBOL(tcp_sock_set_quickack);
+
 /*
  *	Socket option code for TCP.
  */
@@ -3096,19 +3121,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 		break;
 
 	case TCP_QUICKACK:
-		if (!val) {
-			inet_csk_enter_pingpong_mode(sk);
-		} else {
-			inet_csk_exit_pingpong_mode(sk);
-			if ((1 << sk->sk_state) &
-			    (TCPF_ESTABLISHED | TCPF_CLOSE_WAIT) &&
-			    inet_csk_ack_scheduled(sk)) {
-				icsk->icsk_ack.pending |= ICSK_ACK_PUSHED;
-				tcp_cleanup_rbuf(sk, 1);
-				if (!(val & 1))
-					inet_csk_enter_pingpong_mode(sk);
-			}
-		}
+		__tcp_sock_set_quickack(sk, val);
 		break;
 
 #ifdef CONFIG_TCP_MD5SIG
-- 
cgit v1.2.3-59-g8ed1b


From 557eadfcc5ee8f8fa98a795e05ed21db58a65db5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 28 May 2020 07:12:21 +0200
Subject: tcp: add tcp_sock_set_syncnt

Add a helper to directly set the TCP_SYNCNT sockopt from kernel space
without going through a fake uaccess.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/nvme/host/tcp.c |  9 +--------
 include/linux/tcp.h     |  1 +
 net/ipv4/tcp.c          | 12 ++++++++++++
 3 files changed, 14 insertions(+), 8 deletions(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 4e4a750ecdb9..2872584f52f6 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1336,14 +1336,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
 	}
 
 	/* Single syn retry */
-	opt = 1;
-	ret = kernel_setsockopt(queue->sock, IPPROTO_TCP, TCP_SYNCNT,
-			(char *)&opt, sizeof(opt));
-	if (ret) {
-		dev_err(nctrl->device,
-			"failed to set TCP_SYNCNT sock opt %d\n", ret);
-		goto err_sock;
-	}
+	tcp_sock_set_syncnt(queue->sock->sk, 1);
 
 	/* Set TCP no delay */
 	tcp_sock_set_nodelay(queue->sock->sk);
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 2eaf8320b9db..6aa4ae5ebf3d 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -500,5 +500,6 @@ int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount,
 void tcp_sock_set_cork(struct sock *sk, bool on);
 void tcp_sock_set_nodelay(struct sock *sk);
 void tcp_sock_set_quickack(struct sock *sk, int val);
+int tcp_sock_set_syncnt(struct sock *sk, int val);
 
 #endif	/* _LINUX_TCP_H */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 27b5e7a4e2ef..d2c67ae1da07 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2881,6 +2881,18 @@ void tcp_sock_set_quickack(struct sock *sk, int val)
 }
 EXPORT_SYMBOL(tcp_sock_set_quickack);
 
+int tcp_sock_set_syncnt(struct sock *sk, int val)
+{
+	if (val < 1 || val > MAX_TCP_SYNCNT)
+		return -EINVAL;
+
+	lock_sock(sk);
+	inet_csk(sk)->icsk_syn_retries = val;
+	release_sock(sk);
+	return 0;
+}
+EXPORT_SYMBOL(tcp_sock_set_syncnt);
+
 /*
  *	Socket option code for TCP.
  */
-- 
cgit v1.2.3-59-g8ed1b


From c488aeadcbd002a992593e6090d54e8ac27c4310 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 28 May 2020 07:12:22 +0200
Subject: tcp: add tcp_sock_set_user_timeout

Add a helper to directly set the TCP_USER_TIMEOUT sockopt from kernel
space without going through a fake uaccess.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/ocfs2/cluster/tcp.c | 22 ++--------------------
 include/linux/tcp.h    |  1 +
 net/ipv4/tcp.c         |  8 ++++++++
 net/sunrpc/xprtsock.c  |  3 +--
 4 files changed, 12 insertions(+), 22 deletions(-)

diff --git a/fs/ocfs2/cluster/tcp.c b/fs/ocfs2/cluster/tcp.c
index 4c70fe9d19ab..79a231719460 100644
--- a/fs/ocfs2/cluster/tcp.c
+++ b/fs/ocfs2/cluster/tcp.c
@@ -1441,14 +1441,6 @@ static void o2net_rx_until_empty(struct work_struct *work)
 	sc_put(sc);
 }
 
-static int o2net_set_usertimeout(struct socket *sock)
-{
-	int user_timeout = O2NET_TCP_USER_TIMEOUT;
-
-	return kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT,
-				(void *)&user_timeout, sizeof(user_timeout));
-}
-
 static void o2net_initialize_handshake(void)
 {
 	o2net_hand->o2hb_heartbeat_timeout_ms = cpu_to_be32(
@@ -1629,12 +1621,7 @@ static void o2net_start_connect(struct work_struct *work)
 	}
 
 	tcp_sock_set_nodelay(sc->sc_sock->sk);
-
-	ret = o2net_set_usertimeout(sock);
-	if (ret) {
-		mlog(ML_ERROR, "set TCP_USER_TIMEOUT failed with %d\n", ret);
-		goto out;
-	}
+	tcp_sock_set_user_timeout(sock->sk, O2NET_TCP_USER_TIMEOUT);
 
 	o2net_register_callbacks(sc->sc_sock->sk, sc);
 
@@ -1821,12 +1808,7 @@ static int o2net_accept_one(struct socket *sock, int *more)
 	new_sock->sk->sk_allocation = GFP_ATOMIC;
 
 	tcp_sock_set_nodelay(new_sock->sk);
-
-	ret = o2net_set_usertimeout(new_sock);
-	if (ret) {
-		mlog(ML_ERROR, "set TCP_USER_TIMEOUT failed with %d\n", ret);
-		goto out;
-	}
+	tcp_sock_set_user_timeout(new_sock->sk, O2NET_TCP_USER_TIMEOUT);
 
 	ret = new_sock->ops->getname(new_sock, (struct sockaddr *) &sin, 1);
 	if (ret < 0)
diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 6aa4ae5ebf3d..de682143efe4 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -501,5 +501,6 @@ void tcp_sock_set_cork(struct sock *sk, bool on);
 void tcp_sock_set_nodelay(struct sock *sk);
 void tcp_sock_set_quickack(struct sock *sk, int val);
 int tcp_sock_set_syncnt(struct sock *sk, int val);
+void tcp_sock_set_user_timeout(struct sock *sk, u32 val);
 
 #endif	/* _LINUX_TCP_H */
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index d2c67ae1da07..0004bd9ae7b0 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2893,6 +2893,14 @@ int tcp_sock_set_syncnt(struct sock *sk, int val)
 }
 EXPORT_SYMBOL(tcp_sock_set_syncnt);
 
+void tcp_sock_set_user_timeout(struct sock *sk, u32 val)
+{
+	lock_sock(sk);
+	inet_csk(sk)->icsk_user_timeout = val;
+	release_sock(sk);
+}
+EXPORT_SYMBOL(tcp_sock_set_user_timeout);
+
 /*
  *	Socket option code for TCP.
  */
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 399848c2bcb2..231fd6162f68 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2115,8 +2115,7 @@ static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
 			(char *)&keepcnt, sizeof(keepcnt));
 
 	/* TCP user timeout (see RFC5482) */
-	kernel_setsockopt(sock, SOL_TCP, TCP_USER_TIMEOUT,
-			(char *)&timeo, sizeof(timeo));
+	tcp_sock_set_user_timeout(sock->sk, timeo);
 }
 
 static void xs_tcp_set_connect_timeout(struct rpc_xprt *xprt,
-- 
cgit v1.2.3-59-g8ed1b


From 71c48eb81c9ecb6fed49dc33e7c9b621fdcb7bf8 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 28 May 2020 07:12:23 +0200
Subject: tcp: add tcp_sock_set_keepidle

Add a helper to directly set the TCP_KEEP_IDLE sockopt from kernel
space without going through a fake uaccess.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h   |  1 +
 net/ipv4/tcp.c        | 49 ++++++++++++++++++++++++++++++++++---------------
 net/rds/tcp_listen.c  |  5 +----
 net/sunrpc/xprtsock.c |  3 +--
 4 files changed, 37 insertions(+), 21 deletions(-)

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index de682143efe4..5724dd84a85e 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -498,6 +498,7 @@ int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount,
 		  int shiftlen);
 
 void tcp_sock_set_cork(struct sock *sk, bool on);
+int tcp_sock_set_keepidle(struct sock *sk, int val);
 void tcp_sock_set_nodelay(struct sock *sk);
 void tcp_sock_set_quickack(struct sock *sk, int val);
 int tcp_sock_set_syncnt(struct sock *sk, int val);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 0004bd9ae7b0..bdf0ff933351 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2901,6 +2901,39 @@ void tcp_sock_set_user_timeout(struct sock *sk, u32 val)
 }
 EXPORT_SYMBOL(tcp_sock_set_user_timeout);
 
+static int __tcp_sock_set_keepidle(struct sock *sk, int val)
+{
+	struct tcp_sock *tp = tcp_sk(sk);
+
+	if (val < 1 || val > MAX_TCP_KEEPIDLE)
+		return -EINVAL;
+
+	tp->keepalive_time = val * HZ;
+	if (sock_flag(sk, SOCK_KEEPOPEN) &&
+	    !((1 << sk->sk_state) & (TCPF_CLOSE | TCPF_LISTEN))) {
+		u32 elapsed = keepalive_time_elapsed(tp);
+
+		if (tp->keepalive_time > elapsed)
+			elapsed = tp->keepalive_time - elapsed;
+		else
+			elapsed = 0;
+		inet_csk_reset_keepalive_timer(sk, elapsed);
+	}
+
+	return 0;
+}
+
+int tcp_sock_set_keepidle(struct sock *sk, int val)
+{
+	int err;
+
+	lock_sock(sk);
+	err = __tcp_sock_set_keepidle(sk, val);
+	release_sock(sk);
+	return err;
+}
+EXPORT_SYMBOL(tcp_sock_set_keepidle);
+
 /*
  *	Socket option code for TCP.
  */
@@ -3070,21 +3103,7 @@ static int do_tcp_setsockopt(struct sock *sk, int level,
 		break;
 
 	case TCP_KEEPIDLE:
-		if (val < 1 || val > MAX_TCP_KEEPIDLE)
-			err = -EINVAL;
-		else {
-			tp->keepalive_time = val * HZ;
-			if (sock_flag(sk, SOCK_KEEPOPEN) &&
-			    !((1 << sk->sk_state) &
-			      (TCPF_CLOSE | TCPF_LISTEN))) {
-				u32 elapsed = keepalive_time_elapsed(tp);
-				if (tp->keepalive_time > elapsed)
-					elapsed = tp->keepalive_time - elapsed;
-				else
-					elapsed = 0;
-				inet_csk_reset_keepalive_timer(sk, elapsed);
-			}
-		}
+		err = __tcp_sock_set_keepidle(sk, val);
 		break;
 	case TCP_KEEPINTVL:
 		if (val < 1 || val > MAX_TCP_KEEPINTVL)
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 6f90ea077adc..79f9adc00811 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -52,10 +52,7 @@ int rds_tcp_keepalive(struct socket *sock)
 	if (ret < 0)
 		goto bail;
 
-	ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_KEEPIDLE,
-				(char *)&keepidle, sizeof(keepidle));
-	if (ret < 0)
-		goto bail;
+	tcp_sock_set_keepidle(sock->sk, keepidle);
 
 	/* KEEPINTVL is the interval between successive probes. We follow
 	 * the model in xs_tcp_finish_connecting() and re-use keepidle.
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 231fd6162f68..473290f7c5c0 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2107,8 +2107,7 @@ static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
 
 	/* TCP Keepalive options */
 	sock_set_keepalive(sock->sk);
-	kernel_setsockopt(sock, SOL_TCP, TCP_KEEPIDLE,
-			(char *)&keepidle, sizeof(keepidle));
+	tcp_sock_set_keepidle(sock->sk, keepidle);
 	kernel_setsockopt(sock, SOL_TCP, TCP_KEEPINTVL,
 			(char *)&keepidle, sizeof(keepidle));
 	kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT,
-- 
cgit v1.2.3-59-g8ed1b


From d41ecaac903c9f4658a71d4e7a708673cfb5abba Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 28 May 2020 07:12:24 +0200
Subject: tcp: add tcp_sock_set_keepintvl

Add a helper to directly set the TCP_KEEPINTVL sockopt from kernel space
without going through a fake uaccess.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h   |  1 +
 net/ipv4/tcp.c        | 12 ++++++++++++
 net/rds/tcp_listen.c  |  4 +---
 net/sunrpc/xprtsock.c |  3 +--
 4 files changed, 15 insertions(+), 5 deletions(-)

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 5724dd84a85e..1f9bada00faa 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -499,6 +499,7 @@ int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount,
 
 void tcp_sock_set_cork(struct sock *sk, bool on);
 int tcp_sock_set_keepidle(struct sock *sk, int val);
+int tcp_sock_set_keepintvl(struct sock *sk, int val);
 void tcp_sock_set_nodelay(struct sock *sk);
 void tcp_sock_set_quickack(struct sock *sk, int val);
 int tcp_sock_set_syncnt(struct sock *sk, int val);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index bdf0ff933351..7eb083e09786 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2934,6 +2934,18 @@ int tcp_sock_set_keepidle(struct sock *sk, int val)
 }
 EXPORT_SYMBOL(tcp_sock_set_keepidle);
 
+int tcp_sock_set_keepintvl(struct sock *sk, int val)
+{
+	if (val < 1 || val > MAX_TCP_KEEPINTVL)
+		return -EINVAL;
+
+	lock_sock(sk);
+	tcp_sk(sk)->keepalive_intvl = val * HZ;
+	release_sock(sk);
+	return 0;
+}
+EXPORT_SYMBOL(tcp_sock_set_keepintvl);
+
 /*
  *	Socket option code for TCP.
  */
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 79f9adc00811..9ad555c48d15 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -53,12 +53,10 @@ int rds_tcp_keepalive(struct socket *sock)
 		goto bail;
 
 	tcp_sock_set_keepidle(sock->sk, keepidle);
-
 	/* KEEPINTVL is the interval between successive probes. We follow
 	 * the model in xs_tcp_finish_connecting() and re-use keepidle.
 	 */
-	ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_KEEPINTVL,
-				(char *)&keepidle, sizeof(keepidle));
+	tcp_sock_set_keepintvl(sock->sk, keepidle);
 bail:
 	return ret;
 }
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 473290f7c5c0..5ca64e12af0c 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2108,8 +2108,7 @@ static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
 	/* TCP Keepalive options */
 	sock_set_keepalive(sock->sk);
 	tcp_sock_set_keepidle(sock->sk, keepidle);
-	kernel_setsockopt(sock, SOL_TCP, TCP_KEEPINTVL,
-			(char *)&keepidle, sizeof(keepidle));
+	tcp_sock_set_keepintvl(sock->sk, keepidle);
 	kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT,
 			(char *)&keepcnt, sizeof(keepcnt));
 
-- 
cgit v1.2.3-59-g8ed1b


From 480aeb9639d6a077c611b303a22f9b1e5937d081 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 28 May 2020 07:12:25 +0200
Subject: tcp: add tcp_sock_set_keepcnt

Add a helper to directly set the TCP_KEEPCNT sockopt from kernel space
without going through a fake uaccess.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/tcp.h   |  1 +
 net/ipv4/tcp.c        | 12 ++++++++++++
 net/rds/tcp.h         |  2 +-
 net/rds/tcp_listen.c  | 17 +++--------------
 net/sunrpc/xprtsock.c |  3 +--
 5 files changed, 18 insertions(+), 17 deletions(-)

diff --git a/include/linux/tcp.h b/include/linux/tcp.h
index 1f9bada00faa..9aac824c523c 100644
--- a/include/linux/tcp.h
+++ b/include/linux/tcp.h
@@ -498,6 +498,7 @@ int tcp_skb_shift(struct sk_buff *to, struct sk_buff *from, int pcount,
 		  int shiftlen);
 
 void tcp_sock_set_cork(struct sock *sk, bool on);
+int tcp_sock_set_keepcnt(struct sock *sk, int val);
 int tcp_sock_set_keepidle(struct sock *sk, int val);
 int tcp_sock_set_keepintvl(struct sock *sk, int val);
 void tcp_sock_set_nodelay(struct sock *sk);
diff --git a/net/ipv4/tcp.c b/net/ipv4/tcp.c
index 7eb083e09786..15d47d5e7951 100644
--- a/net/ipv4/tcp.c
+++ b/net/ipv4/tcp.c
@@ -2946,6 +2946,18 @@ int tcp_sock_set_keepintvl(struct sock *sk, int val)
 }
 EXPORT_SYMBOL(tcp_sock_set_keepintvl);
 
+int tcp_sock_set_keepcnt(struct sock *sk, int val)
+{
+	if (val < 1 || val > MAX_TCP_KEEPCNT)
+		return -EINVAL;
+
+	lock_sock(sk);
+	tcp_sk(sk)->keepalive_probes = val;
+	release_sock(sk);
+	return 0;
+}
+EXPORT_SYMBOL(tcp_sock_set_keepcnt);
+
 /*
  *	Socket option code for TCP.
  */
diff --git a/net/rds/tcp.h b/net/rds/tcp.h
index f6d75d8cb167..bad9cf49d565 100644
--- a/net/rds/tcp.h
+++ b/net/rds/tcp.h
@@ -70,7 +70,7 @@ struct socket *rds_tcp_listen_init(struct net *net, bool isv6);
 void rds_tcp_listen_stop(struct socket *sock, struct work_struct *acceptor);
 void rds_tcp_listen_data_ready(struct sock *sk);
 int rds_tcp_accept_one(struct socket *sock);
-int rds_tcp_keepalive(struct socket *sock);
+void rds_tcp_keepalive(struct socket *sock);
 void *rds_tcp_listen_sock_def_readable(struct net *net);
 
 /* tcp_recv.c */
diff --git a/net/rds/tcp_listen.c b/net/rds/tcp_listen.c
index 9ad555c48d15..101cf14215a0 100644
--- a/net/rds/tcp_listen.c
+++ b/net/rds/tcp_listen.c
@@ -38,27 +38,19 @@
 #include "rds.h"
 #include "tcp.h"
 
-int rds_tcp_keepalive(struct socket *sock)
+void rds_tcp_keepalive(struct socket *sock)
 {
 	/* values below based on xs_udp_default_timeout */
 	int keepidle = 5; /* send a probe 'keepidle' secs after last data */
 	int keepcnt = 5; /* number of unack'ed probes before declaring dead */
-	int ret = 0;
 
 	sock_set_keepalive(sock->sk);
-
-	ret = kernel_setsockopt(sock, IPPROTO_TCP, TCP_KEEPCNT,
-				(char *)&keepcnt, sizeof(keepcnt));
-	if (ret < 0)
-		goto bail;
-
+	tcp_sock_set_keepcnt(sock->sk, keepcnt);
 	tcp_sock_set_keepidle(sock->sk, keepidle);
 	/* KEEPINTVL is the interval between successive probes. We follow
 	 * the model in xs_tcp_finish_connecting() and re-use keepidle.
 	 */
 	tcp_sock_set_keepintvl(sock->sk, keepidle);
-bail:
-	return ret;
 }
 
 /* rds_tcp_accept_one_path(): if accepting on cp_index > 0, make sure the
@@ -140,10 +132,7 @@ int rds_tcp_accept_one(struct socket *sock)
 	new_sock->ops = sock->ops;
 	__module_get(new_sock->ops->owner);
 
-	ret = rds_tcp_keepalive(new_sock);
-	if (ret < 0)
-		goto out;
-
+	rds_tcp_keepalive(new_sock);
 	rds_tcp_tune(new_sock);
 
 	inet = inet_sk(new_sock->sk);
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 5ca64e12af0c..0d3ec055bc12 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2109,8 +2109,7 @@ static void xs_tcp_set_socket_timeouts(struct rpc_xprt *xprt,
 	sock_set_keepalive(sock->sk);
 	tcp_sock_set_keepidle(sock->sk, keepidle);
 	tcp_sock_set_keepintvl(sock->sk, keepidle);
-	kernel_setsockopt(sock, SOL_TCP, TCP_KEEPCNT,
-			(char *)&keepcnt, sizeof(keepcnt));
+	tcp_sock_set_keepcnt(sock->sk, keepcnt);
 
 	/* TCP user timeout (see RFC5482) */
 	tcp_sock_set_user_timeout(sock->sk, timeo);
-- 
cgit v1.2.3-59-g8ed1b


From 6ebf71bab9fb476fc8132be4c12b88201278f0ca Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 28 May 2020 07:12:26 +0200
Subject: ipv4: add ip_sock_set_tos

Add a helper to directly set the IP_TOS sockopt from kernel space without
going through a fake uaccess.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Sagi Grimberg <sagi@grimberg.me>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/nvme/host/tcp.c   | 14 +++-----------
 drivers/nvme/target/tcp.c | 10 ++--------
 include/net/ip.h          |  2 ++
 net/ipv4/ip_sockglue.c    | 30 +++++++++++++++++++++---------
 4 files changed, 28 insertions(+), 28 deletions(-)

diff --git a/drivers/nvme/host/tcp.c b/drivers/nvme/host/tcp.c
index 2872584f52f6..4c972d8abf31 100644
--- a/drivers/nvme/host/tcp.c
+++ b/drivers/nvme/host/tcp.c
@@ -1313,7 +1313,7 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
 {
 	struct nvme_tcp_ctrl *ctrl = to_tcp_ctrl(nctrl);
 	struct nvme_tcp_queue *queue = &ctrl->queues[qid];
-	int ret, opt, rcv_pdu_size;
+	int ret, rcv_pdu_size;
 
 	queue->ctrl = ctrl;
 	INIT_LIST_HEAD(&queue->send_list);
@@ -1352,16 +1352,8 @@ static int nvme_tcp_alloc_queue(struct nvme_ctrl *nctrl,
 		sock_set_priority(queue->sock->sk, so_priority);
 
 	/* Set socket type of service */
-	if (nctrl->opts->tos >= 0) {
-		opt = nctrl->opts->tos;
-		ret = kernel_setsockopt(queue->sock, SOL_IP, IP_TOS,
-				(char *)&opt, sizeof(opt));
-		if (ret) {
-			dev_err(nctrl->device,
-				"failed to set IP_TOS sock opt %d\n", ret);
-			goto err_sock;
-		}
-	}
+	if (nctrl->opts->tos >= 0)
+		ip_sock_set_tos(queue->sock->sk, nctrl->opts->tos);
 
 	queue->sock->sk->sk_allocation = GFP_ATOMIC;
 	nvme_tcp_set_queue_io_cpu(queue);
diff --git a/drivers/nvme/target/tcp.c b/drivers/nvme/target/tcp.c
index 55bc4c3c0a74..4546049a96b3 100644
--- a/drivers/nvme/target/tcp.c
+++ b/drivers/nvme/target/tcp.c
@@ -1452,14 +1452,8 @@ static int nvmet_tcp_set_queue_sock(struct nvmet_tcp_queue *queue)
 		sock_set_priority(sock->sk, so_priority);
 
 	/* Set socket type of service */
-	if (inet->rcv_tos > 0) {
-		int tos = inet->rcv_tos;
-
-		ret = kernel_setsockopt(sock, SOL_IP, IP_TOS,
-				(char *)&tos, sizeof(tos));
-		if (ret)
-			return ret;
-	}
+	if (inet->rcv_tos > 0)
+		ip_sock_set_tos(sock->sk, inet->rcv_tos);
 
 	write_lock_bh(&sock->sk->sk_callback_lock);
 	sock->sk->sk_user_data = queue;
diff --git a/include/net/ip.h b/include/net/ip.h
index 5b317c9f4470..2fc52e26fa88 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -765,4 +765,6 @@ static inline bool inetdev_valid_mtu(unsigned int mtu)
 	return likely(mtu >= IPV4_MIN_MTU);
 }
 
+void ip_sock_set_tos(struct sock *sk, int val);
+
 #endif	/* _IP_H */
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index f43d5f12aa86..b43a29e11f4a 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -560,6 +560,26 @@ out:
 	return err;
 }
 
+static void __ip_sock_set_tos(struct sock *sk, int val)
+{
+	if (sk->sk_type == SOCK_STREAM) {
+		val &= ~INET_ECN_MASK;
+		val |= inet_sk(sk)->tos & INET_ECN_MASK;
+	}
+	if (inet_sk(sk)->tos != val) {
+		inet_sk(sk)->tos = val;
+		sk->sk_priority = rt_tos2priority(val);
+		sk_dst_reset(sk);
+	}
+}
+
+void ip_sock_set_tos(struct sock *sk, int val)
+{
+	lock_sock(sk);
+	__ip_sock_set_tos(sk, val);
+	release_sock(sk);
+}
+EXPORT_SYMBOL(ip_sock_set_tos);
 
 /*
  *	Socket option code for IP. This is the end of the line after any
@@ -823,15 +843,7 @@ static int do_ip_setsockopt(struct sock *sk, int level,
 			inet->cmsg_flags &= ~IP_CMSG_RECVFRAGSIZE;
 		break;
 	case IP_TOS:	/* This sets both TOS and Precedence */
-		if (sk->sk_type == SOCK_STREAM) {
-			val &= ~INET_ECN_MASK;
-			val |= inet->tos & INET_ECN_MASK;
-		}
-		if (inet->tos != val) {
-			inet->tos = val;
-			sk->sk_priority = rt_tos2priority(val);
-			sk_dst_reset(sk);
-		}
+		__ip_sock_set_tos(sk, val);
 		break;
 	case IP_TTL:
 		if (optlen < 1)
-- 
cgit v1.2.3-59-g8ed1b


From c4e446bf5a06a1db24b4f0115a89f0380a495c62 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 28 May 2020 07:12:27 +0200
Subject: ipv4: add ip_sock_set_freebind

Add a helper to directly set the IP_FREEBIND sockopt from kernel space
without going through a fake uaccess.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/target/iscsi/iscsi_target_login.c | 13 +++----------
 include/net/ip.h                          |  1 +
 net/ipv4/ip_sockglue.c                    |  8 ++++++++
 3 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/drivers/target/iscsi/iscsi_target_login.c b/drivers/target/iscsi/iscsi_target_login.c
index b561b07a869a..85748e338858 100644
--- a/drivers/target/iscsi/iscsi_target_login.c
+++ b/drivers/target/iscsi/iscsi_target_login.c
@@ -15,6 +15,7 @@
 #include <linux/sched/signal.h>
 #include <linux/idr.h>
 #include <linux/tcp.h>        /* TCP_NODELAY */
+#include <net/ip.h>
 #include <net/ipv6.h>         /* ipv6_addr_v4mapped() */
 #include <scsi/iscsi_proto.h>
 #include <target/target_core_base.h>
@@ -855,7 +856,7 @@ int iscsit_setup_np(
 	struct sockaddr_storage *sockaddr)
 {
 	struct socket *sock = NULL;
-	int backlog = ISCSIT_TCP_BACKLOG, ret, opt = 0, len;
+	int backlog = ISCSIT_TCP_BACKLOG, ret, len;
 
 	switch (np->np_network_transport) {
 	case ISCSI_TCP:
@@ -900,15 +901,7 @@ int iscsit_setup_np(
 	if (np->np_network_transport == ISCSI_TCP)
 		tcp_sock_set_nodelay(sock->sk);
 	sock_set_reuseaddr(sock->sk);
-
-	opt = 1;
-	ret = kernel_setsockopt(sock, IPPROTO_IP, IP_FREEBIND,
-			(char *)&opt, sizeof(opt));
-	if (ret < 0) {
-		pr_err("kernel_setsockopt() for IP_FREEBIND"
-			" failed\n");
-		goto fail;
-	}
+	ip_sock_set_freebind(sock->sk);
 
 	ret = kernel_bind(sock, (struct sockaddr *)&np->np_sockaddr, len);
 	if (ret < 0) {
diff --git a/include/net/ip.h b/include/net/ip.h
index 2fc52e26fa88..5f5d8226b6ab 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -765,6 +765,7 @@ static inline bool inetdev_valid_mtu(unsigned int mtu)
 	return likely(mtu >= IPV4_MIN_MTU);
 }
 
+void ip_sock_set_freebind(struct sock *sk);
 void ip_sock_set_tos(struct sock *sk, int val);
 
 #endif	/* _IP_H */
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index b43a29e11f4a..767838d2030d 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -581,6 +581,14 @@ void ip_sock_set_tos(struct sock *sk, int val)
 }
 EXPORT_SYMBOL(ip_sock_set_tos);
 
+void ip_sock_set_freebind(struct sock *sk)
+{
+	lock_sock(sk);
+	inet_sk(sk)->freebind = true;
+	release_sock(sk);
+}
+EXPORT_SYMBOL(ip_sock_set_freebind);
+
 /*
  *	Socket option code for IP. This is the end of the line after any
  *	TCP,UDP etc options on an IP socket.
-- 
cgit v1.2.3-59-g8ed1b


From db45c0ef258ef6c7ef3c1b8ea9e06e133e083c27 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 28 May 2020 07:12:28 +0200
Subject: ipv4: add ip_sock_set_recverr

Add a helper to directly set the IP_RECVERR sockopt from kernel space
without going through a fake uaccess.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h         | 1 +
 net/ipv4/ip_sockglue.c   | 8 ++++++++
 net/rxrpc/local_object.c | 8 +-------
 3 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/include/net/ip.h b/include/net/ip.h
index 5f5d8226b6ab..f063a491b906 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -766,6 +766,7 @@ static inline bool inetdev_valid_mtu(unsigned int mtu)
 }
 
 void ip_sock_set_freebind(struct sock *sk);
+void ip_sock_set_recverr(struct sock *sk);
 void ip_sock_set_tos(struct sock *sk, int val);
 
 #endif	/* _IP_H */
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index 767838d2030d..aca6b81da9ba 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -589,6 +589,14 @@ void ip_sock_set_freebind(struct sock *sk)
 }
 EXPORT_SYMBOL(ip_sock_set_freebind);
 
+void ip_sock_set_recverr(struct sock *sk)
+{
+	lock_sock(sk);
+	inet_sk(sk)->recverr = true;
+	release_sock(sk);
+}
+EXPORT_SYMBOL(ip_sock_set_recverr);
+
 /*
  *	Socket option code for IP. This is the end of the line after any
  *	TCP,UDP etc options on an IP socket.
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 5ea2bd01fdd5..4c0e8fe5ec1f 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -171,13 +171,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
 		/* Fall through */
 	case AF_INET:
 		/* we want to receive ICMP errors */
-		opt = 1;
-		ret = kernel_setsockopt(local->socket, SOL_IP, IP_RECVERR,
-					(char *) &opt, sizeof(opt));
-		if (ret < 0) {
-			_debug("setsockopt failed");
-			goto error;
-		}
+		ip_sock_set_recverr(local->socket->sk);
 
 		/* we want to set the don't fragment bit */
 		opt = IP_PMTUDISC_DO;
-- 
cgit v1.2.3-59-g8ed1b


From 2de569bda2a66d1308ad3f205bb29cf4f95f5636 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 28 May 2020 07:12:29 +0200
Subject: ipv4: add ip_sock_set_mtu_discover

Add a helper to directly set the IP_MTU_DISCOVER sockopt from kernel
space without going through a fake uaccess.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Howells <dhowells@redhat.com> [rxrpc bits]
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h         |  1 +
 net/ipv4/ip_sockglue.c   | 11 +++++++++++
 net/rxrpc/local_object.c |  8 +-------
 net/rxrpc/output.c       | 14 +++++---------
 4 files changed, 18 insertions(+), 16 deletions(-)

diff --git a/include/net/ip.h b/include/net/ip.h
index f063a491b906..d3649c49dd33 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -766,6 +766,7 @@ static inline bool inetdev_valid_mtu(unsigned int mtu)
 }
 
 void ip_sock_set_freebind(struct sock *sk);
+int ip_sock_set_mtu_discover(struct sock *sk, int val);
 void ip_sock_set_recverr(struct sock *sk);
 void ip_sock_set_tos(struct sock *sk, int val);
 
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index aca6b81da9ba..aa115be11dcf 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -597,6 +597,17 @@ void ip_sock_set_recverr(struct sock *sk)
 }
 EXPORT_SYMBOL(ip_sock_set_recverr);
 
+int ip_sock_set_mtu_discover(struct sock *sk, int val)
+{
+	if (val < IP_PMTUDISC_DONT || val > IP_PMTUDISC_OMIT)
+		return -EINVAL;
+	lock_sock(sk);
+	inet_sk(sk)->pmtudisc = val;
+	release_sock(sk);
+	return 0;
+}
+EXPORT_SYMBOL(ip_sock_set_mtu_discover);
+
 /*
  *	Socket option code for IP. This is the end of the line after any
  *	TCP,UDP etc options on an IP socket.
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 4c0e8fe5ec1f..6f4e6b4817cf 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -174,13 +174,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
 		ip_sock_set_recverr(local->socket->sk);
 
 		/* we want to set the don't fragment bit */
-		opt = IP_PMTUDISC_DO;
-		ret = kernel_setsockopt(local->socket, SOL_IP, IP_MTU_DISCOVER,
-					(char *) &opt, sizeof(opt));
-		if (ret < 0) {
-			_debug("setsockopt failed");
-			goto error;
-		}
+		ip_sock_set_mtu_discover(local->socket->sk, IP_PMTUDISC_DO);
 
 		/* We want receive timestamps. */
 		sock_enable_timestamps(local->socket->sk);
diff --git a/net/rxrpc/output.c b/net/rxrpc/output.c
index f8b632a5c619..1ba43c3df4ad 100644
--- a/net/rxrpc/output.c
+++ b/net/rxrpc/output.c
@@ -321,7 +321,7 @@ int rxrpc_send_data_packet(struct rxrpc_call *call, struct sk_buff *skb,
 	struct kvec iov[2];
 	rxrpc_serial_t serial;
 	size_t len;
-	int ret, opt;
+	int ret;
 
 	_enter(",{%d}", skb->len);
 
@@ -473,18 +473,14 @@ send_fragmentable:
 	switch (conn->params.local->srx.transport.family) {
 	case AF_INET6:
 	case AF_INET:
-		opt = IP_PMTUDISC_DONT;
-		kernel_setsockopt(conn->params.local->socket,
-				  SOL_IP, IP_MTU_DISCOVER,
-				  (char *)&opt, sizeof(opt));
+		ip_sock_set_mtu_discover(conn->params.local->socket->sk,
+				IP_PMTUDISC_DONT);
 		ret = kernel_sendmsg(conn->params.local->socket, &msg,
 				     iov, 2, len);
 		conn->params.peer->last_tx_at = ktime_get_seconds();
 
-		opt = IP_PMTUDISC_DO;
-		kernel_setsockopt(conn->params.local->socket,
-				  SOL_IP, IP_MTU_DISCOVER,
-				  (char *)&opt, sizeof(opt));
+		ip_sock_set_mtu_discover(conn->params.local->socket->sk,
+				IP_PMTUDISC_DO);
 		break;
 
 	default:
-- 
cgit v1.2.3-59-g8ed1b


From c1f9ec5776dd05eaf62cf6788ecdfc905dc8ec2b Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 28 May 2020 07:12:30 +0200
Subject: ipv4: add ip_sock_set_pktinfo

Add a helper to directly set the IP_PKTINFO sockopt from kernel
space without going through a fake uaccess.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ip.h       | 1 +
 net/ipv4/ip_sockglue.c | 8 ++++++++
 net/sunrpc/svcsock.c   | 5 ++---
 3 files changed, 11 insertions(+), 3 deletions(-)

diff --git a/include/net/ip.h b/include/net/ip.h
index d3649c49dd33..04ebe7bf54c6 100644
--- a/include/net/ip.h
+++ b/include/net/ip.h
@@ -767,6 +767,7 @@ static inline bool inetdev_valid_mtu(unsigned int mtu)
 
 void ip_sock_set_freebind(struct sock *sk);
 int ip_sock_set_mtu_discover(struct sock *sk, int val);
+void ip_sock_set_pktinfo(struct sock *sk);
 void ip_sock_set_recverr(struct sock *sk);
 void ip_sock_set_tos(struct sock *sk, int val);
 
diff --git a/net/ipv4/ip_sockglue.c b/net/ipv4/ip_sockglue.c
index aa115be11dcf..84ec3703c909 100644
--- a/net/ipv4/ip_sockglue.c
+++ b/net/ipv4/ip_sockglue.c
@@ -608,6 +608,14 @@ int ip_sock_set_mtu_discover(struct sock *sk, int val)
 }
 EXPORT_SYMBOL(ip_sock_set_mtu_discover);
 
+void ip_sock_set_pktinfo(struct sock *sk)
+{
+	lock_sock(sk);
+	inet_sk(sk)->cmsg_flags |= IP_CMSG_PKTINFO;
+	release_sock(sk);
+}
+EXPORT_SYMBOL(ip_sock_set_pktinfo);
+
 /*
  *	Socket option code for IP. This is the end of the line after any
  *	TCP,UDP etc options on an IP socket.
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 6773dacc64d8..7a805d165689 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -616,9 +616,8 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
 	/* make sure we get destination address info */
 	switch (svsk->sk_sk->sk_family) {
 	case AF_INET:
-		level = SOL_IP;
-		optname = IP_PKTINFO;
-		break;
+		ip_sock_set_pktinfo(svsk->sk_sock->sk);
+		return;
 	case AF_INET6:
 		level = SOL_IPV6;
 		optname = IPV6_RECVPKTINFO;
-- 
cgit v1.2.3-59-g8ed1b


From 9b115749acb24d11083ded4fe947ddd654a940e3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 28 May 2020 07:12:31 +0200
Subject: ipv6: add ip6_sock_set_v6only

Add a helper to directly set the IPV6_V6ONLY sockopt from kernel space
without going through a fake uaccess.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h        | 11 +++++++++++
 net/ipv6/ip6_udp_tunnel.c |  5 +----
 net/sunrpc/svcsock.c      |  6 +-----
 3 files changed, 13 insertions(+), 9 deletions(-)

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 39a00d3ef5e2..9b91188c9a74 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -1177,4 +1177,15 @@ int ipv6_sock_mc_join_ssm(struct sock *sk, int ifindex,
 			  const struct in6_addr *addr, unsigned int mode);
 int ipv6_sock_mc_drop(struct sock *sk, int ifindex,
 		      const struct in6_addr *addr);
+
+static inline int ip6_sock_set_v6only(struct sock *sk)
+{
+	if (inet_sk(sk)->inet_num)
+		return -EINVAL;
+	lock_sock(sk);
+	sk->sk_ipv6only = true;
+	release_sock(sk);
+	return 0;
+}
+
 #endif /* _NET_IPV6_H */
diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c
index 6523609516d2..2e0ad1bc84a8 100644
--- a/net/ipv6/ip6_udp_tunnel.c
+++ b/net/ipv6/ip6_udp_tunnel.c
@@ -25,10 +25,7 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
 		goto error;
 
 	if (cfg->ipv6_v6only) {
-		int val = 1;
-
-		err = kernel_setsockopt(sock, IPPROTO_IPV6, IPV6_V6ONLY,
-					(char *) &val, sizeof(val));
+		err = ip6_sock_set_v6only(sock->sk);
 		if (err < 0)
 			goto error;
 	}
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index 7a805d165689..a391892977cd 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -1328,7 +1328,6 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
 	struct sockaddr *newsin = (struct sockaddr *)&addr;
 	int		newlen;
 	int		family;
-	int		val;
 	RPC_IFDEBUG(char buf[RPC_MAX_ADDRBUFLEN]);
 
 	dprintk("svc: svc_create_socket(%s, %d, %s)\n",
@@ -1364,11 +1363,8 @@ static struct svc_xprt *svc_create_socket(struct svc_serv *serv,
 	 * getting requests from IPv4 remotes.  Those should
 	 * be shunted to a PF_INET listener via rpcbind.
 	 */
-	val = 1;
 	if (family == PF_INET6)
-		kernel_setsockopt(sock, SOL_IPV6, IPV6_V6ONLY,
-					(char *)&val, sizeof(val));
-
+		ip6_sock_set_v6only(sock->sk);
 	if (type == SOCK_STREAM)
 		sock->sk->sk_reuse = SK_CAN_REUSE; /* allow address reuse */
 	error = kernel_bind(sock, sin, len);
-- 
cgit v1.2.3-59-g8ed1b


From fce934949c0f0003c1777fbf8c0706ba82a8cf7e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 28 May 2020 07:12:32 +0200
Subject: ipv6: add ip6_sock_set_recverr

Add a helper to directly set the IPV6_RECVERR sockopt from kernel space
without going through a fake uaccess.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h       |  7 +++++++
 net/rxrpc/local_object.c | 10 ++--------
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 9b91188c9a74..49c4abf99148 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -1188,4 +1188,11 @@ static inline int ip6_sock_set_v6only(struct sock *sk)
 	return 0;
 }
 
+static inline void ip6_sock_set_recverr(struct sock *sk)
+{
+	lock_sock(sk);
+	inet6_sk(sk)->recverr = true;
+	release_sock(sk);
+}
+
 #endif /* _NET_IPV6_H */
diff --git a/net/rxrpc/local_object.c b/net/rxrpc/local_object.c
index 6f4e6b4817cf..c8b2097f499c 100644
--- a/net/rxrpc/local_object.c
+++ b/net/rxrpc/local_object.c
@@ -107,7 +107,7 @@ static struct rxrpc_local *rxrpc_alloc_local(struct rxrpc_net *rxnet,
 static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
 {
 	struct sock *usk;
-	int ret, opt;
+	int ret;
 
 	_enter("%p{%d,%d}",
 	       local, local->srx.transport_type, local->srx.transport.family);
@@ -157,13 +157,7 @@ static int rxrpc_open_socket(struct rxrpc_local *local, struct net *net)
 	switch (local->srx.transport.family) {
 	case AF_INET6:
 		/* we want to receive ICMPv6 errors */
-		opt = 1;
-		ret = kernel_setsockopt(local->socket, SOL_IPV6, IPV6_RECVERR,
-					(char *) &opt, sizeof(opt));
-		if (ret < 0) {
-			_debug("setsockopt failed");
-			goto error;
-		}
+		ip6_sock_set_recverr(local->socket->sk);
 
 		/* Fall through and set IPv4 options too otherwise we don't get
 		 * errors from IPv4 packets sent through the IPv6 socket.
-- 
cgit v1.2.3-59-g8ed1b


From 18d5ad62327576cbb1e5b9938a59d63ac0c15832 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 28 May 2020 07:12:33 +0200
Subject: ipv6: add ip6_sock_set_addr_preferences

Add a helper to directly set the IPV6_ADD_PREFERENCES sockopt from kernel
space without going through a fake uaccess.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h       | 67 ++++++++++++++++++++++++++++++++++++++++++++++++
 net/ipv6/ipv6_sockglue.c | 59 +-----------------------------------------
 net/sunrpc/xprtsock.c    |  7 ++---
 3 files changed, 72 insertions(+), 61 deletions(-)

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 49c4abf99148..9a9075983016 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -1195,4 +1195,71 @@ static inline void ip6_sock_set_recverr(struct sock *sk)
 	release_sock(sk);
 }
 
+static inline int __ip6_sock_set_addr_preferences(struct sock *sk, int val)
+{
+	unsigned int pref = 0;
+	unsigned int prefmask = ~0;
+
+	/* check PUBLIC/TMP/PUBTMP_DEFAULT conflicts */
+	switch (val & (IPV6_PREFER_SRC_PUBLIC |
+		       IPV6_PREFER_SRC_TMP |
+		       IPV6_PREFER_SRC_PUBTMP_DEFAULT)) {
+	case IPV6_PREFER_SRC_PUBLIC:
+		pref |= IPV6_PREFER_SRC_PUBLIC;
+		prefmask &= ~(IPV6_PREFER_SRC_PUBLIC |
+			      IPV6_PREFER_SRC_TMP);
+		break;
+	case IPV6_PREFER_SRC_TMP:
+		pref |= IPV6_PREFER_SRC_TMP;
+		prefmask &= ~(IPV6_PREFER_SRC_PUBLIC |
+			      IPV6_PREFER_SRC_TMP);
+		break;
+	case IPV6_PREFER_SRC_PUBTMP_DEFAULT:
+		prefmask &= ~(IPV6_PREFER_SRC_PUBLIC |
+			      IPV6_PREFER_SRC_TMP);
+		break;
+	case 0:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* check HOME/COA conflicts */
+	switch (val & (IPV6_PREFER_SRC_HOME | IPV6_PREFER_SRC_COA)) {
+	case IPV6_PREFER_SRC_HOME:
+		prefmask &= ~IPV6_PREFER_SRC_COA;
+		break;
+	case IPV6_PREFER_SRC_COA:
+		pref |= IPV6_PREFER_SRC_COA;
+		break;
+	case 0:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* check CGA/NONCGA conflicts */
+	switch (val & (IPV6_PREFER_SRC_CGA|IPV6_PREFER_SRC_NONCGA)) {
+	case IPV6_PREFER_SRC_CGA:
+	case IPV6_PREFER_SRC_NONCGA:
+	case 0:
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	inet6_sk(sk)->srcprefs = (inet6_sk(sk)->srcprefs & prefmask) | pref;
+	return 0;
+}
+
+static inline int ip6_sock_set_addr_preferences(struct sock *sk, bool val)
+{
+	int ret;
+
+	lock_sock(sk);
+	ret = __ip6_sock_set_addr_preferences(sk, val);
+	release_sock(sk);
+	return ret;
+}
+
 #endif /* _NET_IPV6_H */
diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index e10258c2210e..adbfed6adf11 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -845,67 +845,10 @@ done:
 		break;
 
 	case IPV6_ADDR_PREFERENCES:
-	    {
-		unsigned int pref = 0;
-		unsigned int prefmask = ~0;
-
 		if (optlen < sizeof(int))
 			goto e_inval;
-
-		retv = -EINVAL;
-
-		/* check PUBLIC/TMP/PUBTMP_DEFAULT conflicts */
-		switch (val & (IPV6_PREFER_SRC_PUBLIC|
-			       IPV6_PREFER_SRC_TMP|
-			       IPV6_PREFER_SRC_PUBTMP_DEFAULT)) {
-		case IPV6_PREFER_SRC_PUBLIC:
-			pref |= IPV6_PREFER_SRC_PUBLIC;
-			break;
-		case IPV6_PREFER_SRC_TMP:
-			pref |= IPV6_PREFER_SRC_TMP;
-			break;
-		case IPV6_PREFER_SRC_PUBTMP_DEFAULT:
-			break;
-		case 0:
-			goto pref_skip_pubtmp;
-		default:
-			goto e_inval;
-		}
-
-		prefmask &= ~(IPV6_PREFER_SRC_PUBLIC|
-			      IPV6_PREFER_SRC_TMP);
-pref_skip_pubtmp:
-
-		/* check HOME/COA conflicts */
-		switch (val & (IPV6_PREFER_SRC_HOME|IPV6_PREFER_SRC_COA)) {
-		case IPV6_PREFER_SRC_HOME:
-			break;
-		case IPV6_PREFER_SRC_COA:
-			pref |= IPV6_PREFER_SRC_COA;
-		case 0:
-			goto pref_skip_coa;
-		default:
-			goto e_inval;
-		}
-
-		prefmask &= ~IPV6_PREFER_SRC_COA;
-pref_skip_coa:
-
-		/* check CGA/NONCGA conflicts */
-		switch (val & (IPV6_PREFER_SRC_CGA|IPV6_PREFER_SRC_NONCGA)) {
-		case IPV6_PREFER_SRC_CGA:
-		case IPV6_PREFER_SRC_NONCGA:
-		case 0:
-			break;
-		default:
-			goto e_inval;
-		}
-
-		np->srcprefs = (np->srcprefs & prefmask) | pref;
-		retv = 0;
-
+		retv = __ip6_sock_set_addr_preferences(sk, val);
 		break;
-	    }
 	case IPV6_MINHOPCOUNT:
 		if (optlen < sizeof(int))
 			goto e_inval;
diff --git a/net/sunrpc/xprtsock.c b/net/sunrpc/xprtsock.c
index 0d3ec055bc12..3a143e250b9a 100644
--- a/net/sunrpc/xprtsock.c
+++ b/net/sunrpc/xprtsock.c
@@ -2150,7 +2150,6 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
 
 	if (!transport->inet) {
 		struct sock *sk = sock->sk;
-		unsigned int addr_pref = IPV6_PREFER_SRC_PUBLIC;
 
 		/* Avoid temporary address, they are bad for long-lived
 		 * connections such as NFS mounts.
@@ -2159,8 +2158,10 @@ static int xs_tcp_finish_connecting(struct rpc_xprt *xprt, struct socket *sock)
 		 *    knowledge about the normal duration of connections,
 		 *    MAY override this as appropriate.
 		 */
-		kernel_setsockopt(sock, SOL_IPV6, IPV6_ADDR_PREFERENCES,
-				(char *)&addr_pref, sizeof(addr_pref));
+		if (xs_addr(xprt)->sa_family == PF_INET6) {
+			ip6_sock_set_addr_preferences(sk,
+				IPV6_PREFER_SRC_PUBLIC);
+		}
 
 		xs_tcp_set_socket_timeouts(xprt, sock);
 
-- 
cgit v1.2.3-59-g8ed1b


From 7d7207c2d57080af93fc323dc6a85bd79207b4c6 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 28 May 2020 07:12:34 +0200
Subject: ipv6: add ip6_sock_set_recvpktinfo

Add a helper to directly set the IPV6_RECVPKTINFO sockopt from kernel
space without going through a fake uaccess.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/ipv6.h   |  7 +++++++
 net/sunrpc/svcsock.c | 10 ++--------
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/include/net/ipv6.h b/include/net/ipv6.h
index 9a9075983016..5e65bf2fd32d 100644
--- a/include/net/ipv6.h
+++ b/include/net/ipv6.h
@@ -1262,4 +1262,11 @@ static inline int ip6_sock_set_addr_preferences(struct sock *sk, bool val)
 	return ret;
 }
 
+static inline void ip6_sock_set_recvpktinfo(struct sock *sk)
+{
+	lock_sock(sk);
+	inet6_sk(sk)->rxopt.bits.rxinfo = true;
+	release_sock(sk);
+}
+
 #endif /* _NET_IPV6_H */
diff --git a/net/sunrpc/svcsock.c b/net/sunrpc/svcsock.c
index a391892977cd..e7a0037d9b56 100644
--- a/net/sunrpc/svcsock.c
+++ b/net/sunrpc/svcsock.c
@@ -595,8 +595,6 @@ static struct svc_xprt_class svc_udp_class = {
 
 static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
 {
-	int err, level, optname, one = 1;
-
 	svc_xprt_init(sock_net(svsk->sk_sock->sk), &svc_udp_class,
 		      &svsk->sk_xprt, serv);
 	clear_bit(XPT_CACHE_AUTH, &svsk->sk_xprt.xpt_flags);
@@ -617,17 +615,13 @@ static void svc_udp_init(struct svc_sock *svsk, struct svc_serv *serv)
 	switch (svsk->sk_sk->sk_family) {
 	case AF_INET:
 		ip_sock_set_pktinfo(svsk->sk_sock->sk);
-		return;
+		break;
 	case AF_INET6:
-		level = SOL_IPV6;
-		optname = IPV6_RECVPKTINFO;
+		ip6_sock_set_recvpktinfo(svsk->sk_sock->sk);
 		break;
 	default:
 		BUG();
 	}
-	err = kernel_setsockopt(svsk->sk_sock, level, optname,
-					(char *)&one, sizeof(one));
-	dprintk("svc: kernel_setsockopt returned %d\n", err);
 }
 
 /*
-- 
cgit v1.2.3-59-g8ed1b


From 298cd88a66a02c899772ffafbf648786ceb5ab95 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 28 May 2020 07:12:35 +0200
Subject: rxrpc: add rxrpc_sock_set_min_security_level

Add a helper to directly set the RXRPC_MIN_SECURITY_LEVEL sockopt from
kernel space without going through a fake uaccess.

Thanks to David Howells for the documentation updates.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: David Howells <dhowells@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/rxrpc.rst | 13 +++++++++++--
 fs/afs/rxrpc.c                     |  6 ++----
 include/net/af_rxrpc.h             |  2 ++
 net/rxrpc/af_rxrpc.c               | 13 +++++++++++++
 4 files changed, 28 insertions(+), 6 deletions(-)

diff --git a/Documentation/networking/rxrpc.rst b/Documentation/networking/rxrpc.rst
index 5ad35113d0f4..68552b92dc44 100644
--- a/Documentation/networking/rxrpc.rst
+++ b/Documentation/networking/rxrpc.rst
@@ -477,7 +477,7 @@ AF_RXRPC sockets support a few socket options at the SOL_RXRPC level:
 	 Encrypted checksum plus packet padded and first eight bytes of packet
 	 encrypted - which includes the actual packet length.
 
-     (c) RXRPC_SECURITY_ENCRYPTED
+     (c) RXRPC_SECURITY_ENCRYPT
 
 	 Encrypted checksum plus entire packet padded and encrypted, including
 	 actual packet length.
@@ -578,7 +578,7 @@ A client would issue an operation by:
      This issues a request_key() to get the key representing the security
      context.  The minimum security level can be set::
 
-	unsigned int sec = RXRPC_SECURITY_ENCRYPTED;
+	unsigned int sec = RXRPC_SECURITY_ENCRYPT;
 	setsockopt(client, SOL_RXRPC, RXRPC_MIN_SECURITY_LEVEL,
 		   &sec, sizeof(sec));
 
@@ -1090,6 +1090,15 @@ The kernel interface functions are as follows:
      jiffies).  In the event of the timeout occurring, the call will be
      aborted and -ETIME or -ETIMEDOUT will be returned.
 
+ (#) Apply the RXRPC_MIN_SECURITY_LEVEL sockopt to a socket from within in the
+     kernel::
+
+       int rxrpc_sock_set_min_security_level(struct sock *sk,
+					     unsigned int val);
+
+     This specifies the minimum security level required for calls on this
+     socket.
+
 
 Configurable Parameters
 =======================
diff --git a/fs/afs/rxrpc.c b/fs/afs/rxrpc.c
index 1ecc67da6c1a..e313dae01674 100644
--- a/fs/afs/rxrpc.c
+++ b/fs/afs/rxrpc.c
@@ -37,7 +37,6 @@ int afs_open_socket(struct afs_net *net)
 {
 	struct sockaddr_rxrpc srx;
 	struct socket *socket;
-	unsigned int min_level;
 	int ret;
 
 	_enter("");
@@ -57,9 +56,8 @@ int afs_open_socket(struct afs_net *net)
 	srx.transport.sin6.sin6_family	= AF_INET6;
 	srx.transport.sin6.sin6_port	= htons(AFS_CM_PORT);
 
-	min_level = RXRPC_SECURITY_ENCRYPT;
-	ret = kernel_setsockopt(socket, SOL_RXRPC, RXRPC_MIN_SECURITY_LEVEL,
-				(void *)&min_level, sizeof(min_level));
+	ret = rxrpc_sock_set_min_security_level(socket->sk,
+						RXRPC_SECURITY_ENCRYPT);
 	if (ret < 0)
 		goto error_2;
 
diff --git a/include/net/af_rxrpc.h b/include/net/af_rxrpc.h
index ab988940bf04..91eacbdcf33d 100644
--- a/include/net/af_rxrpc.h
+++ b/include/net/af_rxrpc.h
@@ -72,4 +72,6 @@ bool rxrpc_kernel_call_is_complete(struct rxrpc_call *);
 void rxrpc_kernel_set_max_life(struct socket *, struct rxrpc_call *,
 			       unsigned long);
 
+int rxrpc_sock_set_min_security_level(struct sock *sk, unsigned int val);
+
 #endif /* _NET_RXRPC_H */
diff --git a/net/rxrpc/af_rxrpc.c b/net/rxrpc/af_rxrpc.c
index 15ee92d79581..394189b81849 100644
--- a/net/rxrpc/af_rxrpc.c
+++ b/net/rxrpc/af_rxrpc.c
@@ -571,6 +571,19 @@ out:
 	return ret;
 }
 
+int rxrpc_sock_set_min_security_level(struct sock *sk, unsigned int val)
+{
+	if (sk->sk_state != RXRPC_UNBOUND)
+		return -EISCONN;
+	if (val > RXRPC_SECURITY_MAX)
+		return -EINVAL;
+	lock_sock(sk);
+	rxrpc_sk(sk)->min_sec_level = val;
+	release_sock(sk);
+	return 0;
+}
+EXPORT_SYMBOL(rxrpc_sock_set_min_security_level);
+
 /*
  * set RxRPC socket options
  */
-- 
cgit v1.2.3-59-g8ed1b


From 095ae612530c9465df6d372d688cb30c6abfc5f5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Thu, 28 May 2020 07:12:36 +0200
Subject: tipc: call tsk_set_importance from tipc_topsrv_create_listener

Avoid using kernel_setsockopt for the TIPC_IMPORTANCE option when we can
just use the internal helper.  The only change needed is to pass a struct
sock instead of tipc_sock, which is private to socket.c

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/socket.c | 18 +++++++++---------
 net/tipc/socket.h |  2 ++
 net/tipc/topsrv.c |  6 +++---
 3 files changed, 14 insertions(+), 12 deletions(-)

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index d6b67d07d22e..3734cdbedc9c 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -196,17 +196,17 @@ static int tsk_importance(struct tipc_sock *tsk)
 	return msg_importance(&tsk->phdr);
 }
 
-static int tsk_set_importance(struct tipc_sock *tsk, int imp)
+static struct tipc_sock *tipc_sk(const struct sock *sk)
 {
-	if (imp > TIPC_CRITICAL_IMPORTANCE)
-		return -EINVAL;
-	msg_set_importance(&tsk->phdr, (u32)imp);
-	return 0;
+	return container_of(sk, struct tipc_sock, sk);
 }
 
-static struct tipc_sock *tipc_sk(const struct sock *sk)
+int tsk_set_importance(struct sock *sk, int imp)
 {
-	return container_of(sk, struct tipc_sock, sk);
+	if (imp > TIPC_CRITICAL_IMPORTANCE)
+		return -EINVAL;
+	msg_set_importance(&tipc_sk(sk)->phdr, (u32)imp);
+	return 0;
 }
 
 static bool tsk_conn_cong(struct tipc_sock *tsk)
@@ -2721,7 +2721,7 @@ static int tipc_accept(struct socket *sock, struct socket *new_sock, int flags,
 	/* Connect new socket to it's peer */
 	tipc_sk_finish_conn(new_tsock, msg_origport(msg), msg_orignode(msg));
 
-	tsk_set_importance(new_tsock, msg_importance(msg));
+	tsk_set_importance(new_sk, msg_importance(msg));
 	if (msg_named(msg)) {
 		new_tsock->conn_type = msg_nametype(msg);
 		new_tsock->conn_instance = msg_nameinst(msg);
@@ -3139,7 +3139,7 @@ static int tipc_setsockopt(struct socket *sock, int lvl, int opt,
 
 	switch (opt) {
 	case TIPC_IMPORTANCE:
-		res = tsk_set_importance(tsk, value);
+		res = tsk_set_importance(sk, value);
 		break;
 	case TIPC_SRC_DROPPABLE:
 		if (sock->type != SOCK_STREAM)
diff --git a/net/tipc/socket.h b/net/tipc/socket.h
index 235b9679acee..b11575afc66f 100644
--- a/net/tipc/socket.h
+++ b/net/tipc/socket.h
@@ -75,4 +75,6 @@ u32 tipc_sock_get_portid(struct sock *sk);
 bool tipc_sk_overlimit1(struct sock *sk, struct sk_buff *skb);
 bool tipc_sk_overlimit2(struct sock *sk, struct sk_buff *skb);
 
+int tsk_set_importance(struct sock *sk, int imp);
+
 #endif
diff --git a/net/tipc/topsrv.c b/net/tipc/topsrv.c
index 446af7bbd13e..1489cfb941d8 100644
--- a/net/tipc/topsrv.c
+++ b/net/tipc/topsrv.c
@@ -497,7 +497,6 @@ static void tipc_topsrv_listener_data_ready(struct sock *sk)
 
 static int tipc_topsrv_create_listener(struct tipc_topsrv *srv)
 {
-	int imp = TIPC_CRITICAL_IMPORTANCE;
 	struct socket *lsock = NULL;
 	struct sockaddr_tipc saddr;
 	struct sock *sk;
@@ -514,8 +513,9 @@ static int tipc_topsrv_create_listener(struct tipc_topsrv *srv)
 	sk->sk_user_data = srv;
 	write_unlock_bh(&sk->sk_callback_lock);
 
-	rc = kernel_setsockopt(lsock, SOL_TIPC, TIPC_IMPORTANCE,
-			       (char *)&imp, sizeof(imp));
+	lock_sock(sk);
+	rc = tsk_set_importance(sk, TIPC_CRITICAL_IMPORTANCE);
+	release_sock(sk);
 	if (rc < 0)
 		goto err;
 
-- 
cgit v1.2.3-59-g8ed1b


From b113cabd4378ddd98dccdd7748a16f9f1f094ef0 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Wed, 27 May 2020 15:41:06 +0200
Subject: sfc: avoid an unused-variable warning

'nic_data' is no longer used outside of the #ifdef block
in efx_ef10_set_mac_address:

drivers/net/ethernet/sfc/ef10.c:3231:28: error: unused variable 'nic_data' [-Werror,-Wunused-variable]
        struct efx_ef10_nic_data *nic_data = efx->nic_data;

Move the variable into a local scope.

Fixes: dfcabb078847 ("sfc: move vport_id to struct efx_nic")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/ef10.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index e634e8110585..964c5e842cec 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -3228,7 +3228,6 @@ reset_nic:
 static int efx_ef10_set_mac_address(struct efx_nic *efx)
 {
 	MCDI_DECLARE_BUF(inbuf, MC_CMD_VADAPTOR_SET_MAC_IN_LEN);
-	struct efx_ef10_nic_data *nic_data = efx->nic_data;
 	bool was_enabled = efx->port_enabled;
 	int rc;
 
@@ -3256,6 +3255,7 @@ static int efx_ef10_set_mac_address(struct efx_nic *efx)
 
 #ifdef CONFIG_SFC_SRIOV
 	if (efx->pci_dev->is_virtfn && efx->pci_dev->physfn) {
+		struct efx_ef10_nic_data *nic_data = efx->nic_data;
 		struct pci_dev *pci_dev_pf = efx->pci_dev->physfn;
 
 		if (rc == -EPERM) {
-- 
cgit v1.2.3-59-g8ed1b


From 9918f2d22fd3ff1e76693512d29e743eba3dc8cb Mon Sep 17 00:00:00 2001
From: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Date: Fri, 15 May 2020 17:42:22 -0700
Subject: ice: Poll for reset completion when DDP load fails

There are certain cases where the DDP load fails and the FW issues a
core reset. For these cases, wait for reset to complete before
proceeding with reset of the driver init.

Signed-off-by: Anirudh Venkataramanan <anirudh.venkataramanan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_main.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index b64c4e796636..6583acf32575 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -3086,6 +3086,9 @@ ice_log_pkg_init(struct ice_hw *hw, enum ice_status *status)
 		case ICE_AQ_RC_EBADMAN:
 		case ICE_AQ_RC_EBADBUF:
 			dev_err(dev, "An error occurred on the device while loading the DDP package.  The device will be reset.\n");
+			/* poll for reset to complete */
+			if (ice_check_reset(hw))
+				dev_err(dev, "Error resetting device. Please reload the driver\n");
 			return;
 		default:
 			break;
-- 
cgit v1.2.3-59-g8ed1b


From 072064a43ef38fab8559edfbf12f918f8acdd85b Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Fri, 15 May 2020 17:42:23 -0700
Subject: ice: cleanup VSI context initialization

Remove an unnecessary copy of vsi->info into ctxt->info in ice_vsi_init.
This line is essentially a no-op because ice_set_dflt_vsi_ctx performs
a memset to clear the info from the context structure.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_lib.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 6f3ee8ac11ce..89e8e4f7f56f 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -938,7 +938,6 @@ static int ice_vsi_init(struct ice_vsi *vsi, bool init_vsi)
 	if (!ctxt)
 		return -ENOMEM;
 
-	ctxt->info = vsi->info;
 	switch (vsi->type) {
 	case ICE_VSI_CTRL:
 	case ICE_VSI_LB:
-- 
cgit v1.2.3-59-g8ed1b


From bc3a024101ca497bea4c69be4054c32a5c349f1d Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Fri, 15 May 2020 17:42:24 -0700
Subject: ice: fix potential double free in probe unrolling

If ice_init_interrupt_scheme fails, ice_probe will jump to clearing up
the interrupts. This can lead to some static analysis tools such as the
compiler sanitizers complaining about double free problems.

Since ice_init_interrupt_scheme already unrolls internally on failure,
there is no need to call ice_clear_interrupt_scheme when it fails. Add
a new unroll label and use that instead.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_main.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 6583acf32575..5cffaf360cb0 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -3418,7 +3418,7 @@ ice_probe(struct pci_dev *pdev, const struct pci_device_id __always_unused *ent)
 	if (err) {
 		dev_err(dev, "ice_init_interrupt_scheme failed: %d\n", err);
 		err = -EIO;
-		goto err_init_interrupt_unroll;
+		goto err_init_vsi_unroll;
 	}
 
 	/* In case of MSIX we are going to setup the misc vector right here
@@ -3511,6 +3511,7 @@ err_msix_misc_unroll:
 	ice_free_irq_msix_misc(pf);
 err_init_interrupt_unroll:
 	ice_clear_interrupt_scheme(pf);
+err_init_vsi_unroll:
 	devm_kfree(dev, pf->vsi);
 err_init_pf_unroll:
 	ice_deinit_pf(pf);
-- 
cgit v1.2.3-59-g8ed1b


From c2b313b783e0441dab2441cc1ee216eb4b9447a6 Mon Sep 17 00:00:00 2001
From: Jacob Keller <jacob.e.keller@intel.com>
Date: Fri, 15 May 2020 17:42:25 -0700
Subject: ice: fix kernel BUG if register_netdev fails

If register_netdev() fails, the driver will attempt to cleanup the
q_vectors and inadvertently trigger a kernel BUG due to a NULL pointer
dereference.

This occurs because cleaning up q_vectors attempts to call
netif_napi_del on napi_structs which were never initialized.

Resolve this by releasing the netdev in ice_cfg_netdev and setting
vsi->netdev to NULL. This ensures that after ice_cfg_netdev fails the
state is rewound to match as if ice_cfg_netdev was never called.

Signed-off-by: Jacob Keller <jacob.e.keller@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_main.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 5cffaf360cb0..69854b8644a6 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -2428,7 +2428,7 @@ static int ice_cfg_netdev(struct ice_vsi *vsi)
 
 	err = register_netdev(vsi->netdev);
 	if (err)
-		goto err_destroy_devlink_port;
+		goto err_free_netdev;
 
 	devlink_port_type_eth_set(&pf->devlink_port, vsi->netdev);
 
@@ -2439,9 +2439,11 @@ static int ice_cfg_netdev(struct ice_vsi *vsi)
 
 	return 0;
 
+err_free_netdev:
+	free_netdev(vsi->netdev);
+	vsi->netdev = NULL;
 err_destroy_devlink_port:
 	ice_devlink_destroy_port(pf);
-
 	return err;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From d3112cd1abec7a28bbe885c2151875bcff4e9092 Mon Sep 17 00:00:00 2001
From: Tony Nguyen <anthony.l.nguyen@intel.com>
Date: Fri, 15 May 2020 17:42:26 -0700
Subject: ice: Declare functions static

ice_get_pfa_module_tlv() and ice_read_sr_word() are not being called
outside of their file. Declare them as static.

Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_nvm.c | 5 +++--
 drivers/net/ethernet/intel/ice/ice_nvm.h | 4 ----
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.c b/drivers/net/ethernet/intel/ice/ice_nvm.c
index 7c2a06892bbb..b049c1c30c88 100644
--- a/drivers/net/ethernet/intel/ice/ice_nvm.c
+++ b/drivers/net/ethernet/intel/ice/ice_nvm.c
@@ -172,7 +172,8 @@ void ice_release_nvm(struct ice_hw *hw)
  *
  * Reads one 16 bit word from the Shadow RAM using the ice_read_sr_word_aq.
  */
-enum ice_status ice_read_sr_word(struct ice_hw *hw, u16 offset, u16 *data)
+static enum ice_status
+ice_read_sr_word(struct ice_hw *hw, u16 offset, u16 *data)
 {
 	enum ice_status status;
 
@@ -196,7 +197,7 @@ enum ice_status ice_read_sr_word(struct ice_hw *hw, u16 offset, u16 *data)
  * Area (PFA) and returns the TLV pointer and length. The caller can
  * use these to read the variable length TLV value.
  */
-enum ice_status
+static enum ice_status
 ice_get_pfa_module_tlv(struct ice_hw *hw, u16 *module_tlv, u16 *module_tlv_len,
 		       u16 module_type)
 {
diff --git a/drivers/net/ethernet/intel/ice/ice_nvm.h b/drivers/net/ethernet/intel/ice/ice_nvm.h
index 999f273ba6ad..165eda07b93d 100644
--- a/drivers/net/ethernet/intel/ice/ice_nvm.h
+++ b/drivers/net/ethernet/intel/ice/ice_nvm.h
@@ -11,10 +11,6 @@ enum ice_status
 ice_read_flat_nvm(struct ice_hw *hw, u32 offset, u32 *length, u8 *data,
 		  bool read_shadow_ram);
 enum ice_status
-ice_get_pfa_module_tlv(struct ice_hw *hw, u16 *module_tlv, u16 *module_tlv_len,
-		       u16 module_type);
-enum ice_status
 ice_read_pba_string(struct ice_hw *hw, u8 *pba_num, u32 pba_num_size);
 enum ice_status ice_init_nvm(struct ice_hw *hw);
-enum ice_status ice_read_sr_word(struct ice_hw *hw, u16 offset, u16 *data);
 #endif /* _ICE_NVM_H_ */
-- 
cgit v1.2.3-59-g8ed1b


From ac3716134a40b04e18a2dda78800797129138005 Mon Sep 17 00:00:00 2001
From: Brett Creeley <brett.creeley@intel.com>
Date: Fri, 15 May 2020 17:51:07 -0700
Subject: ice: Refactor ice_ena_vf_mappings to split MSIX and queue mappings

Currently ice_ena_vf_mappings() does all of the VF's MSIX and queue
mapping in one function. This makes it hard to digest. Fix this by
creating a new function for enabling MSIX mappings and one for enabling
queue mappings.

Also, rename some variables in the functions for clarity.

Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 91 +++++++++++++++---------
 1 file changed, 59 insertions(+), 32 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index efd54299a220..621ec0cc6fff 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -651,55 +651,70 @@ ice_alloc_vf_res_exit:
 }
 
 /**
- * ice_ena_vf_mappings
- * @vf: pointer to the VF structure
+ * ice_ena_vf_msix_mappings - enable VF MSIX mappings in hardware
+ * @vf: VF to enable MSIX mappings for
  *
- * Enable VF vectors and queues allocation by writing the details into
- * respective registers.
+ * Some of the registers need to be indexed/configured using hardware global
+ * device values and other registers need 0-based values, which represent PF
+ * based values.
  */
-static void ice_ena_vf_mappings(struct ice_vf *vf)
+static void ice_ena_vf_msix_mappings(struct ice_vf *vf)
 {
-	int abs_vf_id, abs_first, abs_last;
+	int device_based_first_msix, device_based_last_msix;
+	int pf_based_first_msix, pf_based_last_msix, v;
 	struct ice_pf *pf = vf->pf;
-	struct ice_vsi *vsi;
-	struct device *dev;
-	int first, last, v;
+	int device_based_vf_id;
 	struct ice_hw *hw;
 	u32 reg;
 
-	dev = ice_pf_to_dev(pf);
 	hw = &pf->hw;
-	vsi = pf->vsi[vf->lan_vsi_idx];
-	first = vf->first_vector_idx;
-	last = (first + pf->num_msix_per_vf) - 1;
-	abs_first = first + pf->hw.func_caps.common_cap.msix_vector_first_id;
-	abs_last = (abs_first + pf->num_msix_per_vf) - 1;
-	abs_vf_id = vf->vf_id + hw->func_caps.vf_base_id;
-
-	/* VF Vector allocation */
-	reg = (((abs_first << VPINT_ALLOC_FIRST_S) & VPINT_ALLOC_FIRST_M) |
-	       ((abs_last << VPINT_ALLOC_LAST_S) & VPINT_ALLOC_LAST_M) |
-	       VPINT_ALLOC_VALID_M);
+	pf_based_first_msix = vf->first_vector_idx;
+	pf_based_last_msix = (pf_based_first_msix + pf->num_msix_per_vf) - 1;
+
+	device_based_first_msix = pf_based_first_msix +
+		pf->hw.func_caps.common_cap.msix_vector_first_id;
+	device_based_last_msix =
+		(device_based_first_msix + pf->num_msix_per_vf) - 1;
+	device_based_vf_id = vf->vf_id + hw->func_caps.vf_base_id;
+
+	reg = (((device_based_first_msix << VPINT_ALLOC_FIRST_S) &
+		VPINT_ALLOC_FIRST_M) |
+	       ((device_based_last_msix << VPINT_ALLOC_LAST_S) &
+		VPINT_ALLOC_LAST_M) | VPINT_ALLOC_VALID_M);
 	wr32(hw, VPINT_ALLOC(vf->vf_id), reg);
 
-	reg = (((abs_first << VPINT_ALLOC_PCI_FIRST_S)
+	reg = (((device_based_first_msix << VPINT_ALLOC_PCI_FIRST_S)
 		 & VPINT_ALLOC_PCI_FIRST_M) |
-	       ((abs_last << VPINT_ALLOC_PCI_LAST_S) & VPINT_ALLOC_PCI_LAST_M) |
-	       VPINT_ALLOC_PCI_VALID_M);
+	       ((device_based_last_msix << VPINT_ALLOC_PCI_LAST_S) &
+		VPINT_ALLOC_PCI_LAST_M) | VPINT_ALLOC_PCI_VALID_M);
 	wr32(hw, VPINT_ALLOC_PCI(vf->vf_id), reg);
+
 	/* map the interrupts to its functions */
-	for (v = first; v <= last; v++) {
-		reg = (((abs_vf_id << GLINT_VECT2FUNC_VF_NUM_S) &
+	for (v = pf_based_first_msix; v <= pf_based_last_msix; v++) {
+		reg = (((device_based_vf_id << GLINT_VECT2FUNC_VF_NUM_S) &
 			GLINT_VECT2FUNC_VF_NUM_M) |
 		       ((hw->pf_id << GLINT_VECT2FUNC_PF_NUM_S) &
 			GLINT_VECT2FUNC_PF_NUM_M));
 		wr32(hw, GLINT_VECT2FUNC(v), reg);
 	}
 
-	/* Map mailbox interrupt. We put an explicit 0 here to remind us that
-	 * VF admin queue interrupts will go to VF MSI-X vector 0.
-	 */
-	wr32(hw, VPINT_MBX_CTL(abs_vf_id), VPINT_MBX_CTL_CAUSE_ENA_M | 0);
+	/* Map mailbox interrupt to VF MSI-X vector 0 */
+	wr32(hw, VPINT_MBX_CTL(device_based_vf_id), VPINT_MBX_CTL_CAUSE_ENA_M);
+}
+
+/**
+ * ice_ena_vf_q_mappings - enable Rx/Tx queue mappings for a VF
+ * @vf: VF to enable the mappings for
+ * @max_txq: max Tx queues allowed on the VF's VSI
+ * @max_rxq: max Rx queues allowed on the VF's VSI
+ */
+static void ice_ena_vf_q_mappings(struct ice_vf *vf, u16 max_txq, u16 max_rxq)
+{
+	struct ice_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx];
+	struct device *dev = ice_pf_to_dev(vf->pf);
+	struct ice_hw *hw = &vf->pf->hw;
+	u32 reg;
+
 	/* set regardless of mapping mode */
 	wr32(hw, VPLAN_TXQ_MAPENA(vf->vf_id), VPLAN_TXQ_MAPENA_TX_ENA_M);
 
@@ -711,7 +726,7 @@ static void ice_ena_vf_mappings(struct ice_vf *vf)
 		 */
 		reg = (((vsi->txq_map[0] << VPLAN_TX_QBASE_VFFIRSTQ_S) &
 			VPLAN_TX_QBASE_VFFIRSTQ_M) |
-		       (((vsi->alloc_txq - 1) << VPLAN_TX_QBASE_VFNUMQ_S) &
+		       (((max_txq - 1) << VPLAN_TX_QBASE_VFNUMQ_S) &
 			VPLAN_TX_QBASE_VFNUMQ_M));
 		wr32(hw, VPLAN_TX_QBASE(vf->vf_id), reg);
 	} else {
@@ -729,7 +744,7 @@ static void ice_ena_vf_mappings(struct ice_vf *vf)
 		 */
 		reg = (((vsi->rxq_map[0] << VPLAN_RX_QBASE_VFFIRSTQ_S) &
 			VPLAN_RX_QBASE_VFFIRSTQ_M) |
-		       (((vsi->alloc_txq - 1) << VPLAN_RX_QBASE_VFNUMQ_S) &
+		       (((max_rxq - 1) << VPLAN_RX_QBASE_VFNUMQ_S) &
 			VPLAN_RX_QBASE_VFNUMQ_M));
 		wr32(hw, VPLAN_RX_QBASE(vf->vf_id), reg);
 	} else {
@@ -737,6 +752,18 @@ static void ice_ena_vf_mappings(struct ice_vf *vf)
 	}
 }
 
+/**
+ * ice_ena_vf_mappings - enable VF MSIX and queue mapping
+ * @vf: pointer to the VF structure
+ */
+static void ice_ena_vf_mappings(struct ice_vf *vf)
+{
+	struct ice_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx];
+
+	ice_ena_vf_msix_mappings(vf);
+	ice_ena_vf_q_mappings(vf, vsi->alloc_txq, vsi->alloc_rxq);
+}
+
 /**
  * ice_determine_res
  * @pf: pointer to the PF structure
-- 
cgit v1.2.3-59-g8ed1b


From 02337f1f59148ad2d361feae0a11f6596b04632b Mon Sep 17 00:00:00 2001
From: Brett Creeley <brett.creeley@intel.com>
Date: Fri, 15 May 2020 17:51:08 -0700
Subject: ice: Simplify ice_sriov_configure

Add a new function for checking if SR-IOV can be configured based on
the PF and/or device's state/capabilities. Also, simplify the flow in
ice_sriov_configure().

Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 72 ++++++++++++++++--------
 1 file changed, 48 insertions(+), 24 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index 621ec0cc6fff..b699ca81d8c4 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -1460,6 +1460,8 @@ static bool ice_pf_state_is_nominal(struct ice_pf *pf)
  * ice_pci_sriov_ena - Enable or change number of VFs
  * @pf: pointer to the PF structure
  * @num_vfs: number of VFs to allocate
+ *
+ * Returns 0 on success and negative on failure
  */
 static int ice_pci_sriov_ena(struct ice_pf *pf, int num_vfs)
 {
@@ -1467,20 +1469,10 @@ static int ice_pci_sriov_ena(struct ice_pf *pf, int num_vfs)
 	struct device *dev = ice_pf_to_dev(pf);
 	int err;
 
-	if (!ice_pf_state_is_nominal(pf)) {
-		dev_err(dev, "Cannot enable SR-IOV, device not ready\n");
-		return -EBUSY;
-	}
-
-	if (!test_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags)) {
-		dev_err(dev, "This device is not capable of SR-IOV\n");
-		return -EOPNOTSUPP;
-	}
-
 	if (pre_existing_vfs && pre_existing_vfs != num_vfs)
 		ice_free_vfs(pf);
 	else if (pre_existing_vfs && pre_existing_vfs == num_vfs)
-		return num_vfs;
+		return 0;
 
 	if (num_vfs > pf->num_vfs_supported) {
 		dev_err(dev, "Can't enable %d VFs, max VFs supported is %d\n",
@@ -1496,37 +1488,69 @@ static int ice_pci_sriov_ena(struct ice_pf *pf, int num_vfs)
 	}
 
 	set_bit(ICE_FLAG_SRIOV_ENA, pf->flags);
-	return num_vfs;
+	return 0;
+}
+
+/**
+ * ice_check_sriov_allowed - check if SR-IOV is allowed based on various checks
+ * @pf: PF to enabled SR-IOV on
+ */
+static int ice_check_sriov_allowed(struct ice_pf *pf)
+{
+	struct device *dev = ice_pf_to_dev(pf);
+
+	if (!test_bit(ICE_FLAG_SRIOV_CAPABLE, pf->flags)) {
+		dev_err(dev, "This device is not capable of SR-IOV\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (ice_is_safe_mode(pf)) {
+		dev_err(dev, "SR-IOV cannot be configured - Device is in Safe Mode\n");
+		return -EOPNOTSUPP;
+	}
+
+	if (!ice_pf_state_is_nominal(pf)) {
+		dev_err(dev, "Cannot enable SR-IOV, device not ready\n");
+		return -EBUSY;
+	}
+
+	return 0;
 }
 
 /**
  * ice_sriov_configure - Enable or change number of VFs via sysfs
  * @pdev: pointer to a pci_dev structure
- * @num_vfs: number of VFs to allocate
+ * @num_vfs: number of VFs to allocate or 0 to free VFs
  *
- * This function is called when the user updates the number of VFs in sysfs.
+ * This function is called when the user updates the number of VFs in sysfs. On
+ * success return whatever num_vfs was set to by the caller. Return negative on
+ * failure.
  */
 int ice_sriov_configure(struct pci_dev *pdev, int num_vfs)
 {
 	struct ice_pf *pf = pci_get_drvdata(pdev);
 	struct device *dev = ice_pf_to_dev(pf);
+	int err;
 
-	if (ice_is_safe_mode(pf)) {
-		dev_err(dev, "SR-IOV cannot be configured - Device is in Safe Mode\n");
-		return -EOPNOTSUPP;
-	}
+	err = ice_check_sriov_allowed(pf);
+	if (err)
+		return err;
 
-	if (num_vfs)
-		return ice_pci_sriov_ena(pf, num_vfs);
+	if (!num_vfs) {
+		if (!pci_vfs_assigned(pdev)) {
+			ice_free_vfs(pf);
+			return 0;
+		}
 
-	if (!pci_vfs_assigned(pdev)) {
-		ice_free_vfs(pf);
-	} else {
 		dev_err(dev, "can't free VFs because some are assigned to VMs.\n");
 		return -EBUSY;
 	}
 
-	return 0;
+	err = ice_pci_sriov_ena(pf, num_vfs);
+	if (err)
+		return err;
+
+	return num_vfs;
 }
 
 /**
-- 
cgit v1.2.3-59-g8ed1b


From cfcee02b6c15e8866d03cae3b80edc4e9ad8cc7d Mon Sep 17 00:00:00 2001
From: Brett Creeley <brett.creeley@intel.com>
Date: Fri, 15 May 2020 17:51:09 -0700
Subject: ice: Add helper function for clearing VPGEN_VFRTRIG

Create a helper function for clearing VPGEN_VFRTRIG as this needs to be
done on reset to notify the VF that we are done resetting it. Also, it
needs to be done on SR-IOV initialization/creation in case it was left
in a bad state after SR-IOV tear down.

Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 31 +++++++++++++++---------
 1 file changed, 20 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index b699ca81d8c4..039f0b057603 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -961,6 +961,21 @@ static int ice_set_per_vf_res(struct ice_pf *pf)
 	return 0;
 }
 
+/**
+ * ice_clear_vf_reset_trigger - enable VF to access hardware
+ * @vf: VF to enabled hardware access for
+ */
+static void ice_clear_vf_reset_trigger(struct ice_vf *vf)
+{
+	struct ice_hw *hw = &vf->pf->hw;
+	u32 reg;
+
+	reg = rd32(hw, VPGEN_VFRTRIG(vf->vf_id));
+	reg &= ~VPGEN_VFRTRIG_VFSWR_M;
+	wr32(hw, VPGEN_VFRTRIG(vf->vf_id), reg);
+	ice_flush(hw);
+}
+
 /**
  * ice_cleanup_and_realloc_vf - Clean up VF and reallocate resources after reset
  * @vf: pointer to the VF structure
@@ -974,26 +989,20 @@ static void ice_cleanup_and_realloc_vf(struct ice_vf *vf)
 {
 	struct ice_pf *pf = vf->pf;
 	struct ice_hw *hw;
-	u32 reg;
 
 	hw = &pf->hw;
 
-	/* PF software completes the flow by notifying VF that reset flow is
-	 * completed. This is done by enabling hardware by clearing the reset
-	 * bit in the VPGEN_VFRTRIG reg and setting VFR_STATE in the VFGEN_RSTAT
-	 * register to VFR completed (done at the end of this function)
-	 * By doing this we allow HW to access VF memory at any point. If we
-	 * did it any sooner, HW could access memory while it was being freed
-	 * in ice_free_vf_res(), causing an IOMMU fault.
+	/* Allow HW to access VF memory after calling
+	 * ice_clear_vf_reset_trigger(). If we did it any sooner, HW could
+	 * access memory while it was being freed in ice_free_vf_res(), causing
+	 * an IOMMU fault.
 	 *
 	 * On the other hand, this needs to be done ASAP, because the VF driver
 	 * is waiting for this to happen and may report a timeout. It's
 	 * harmless, but it gets logged into Guest OS kernel log, so best avoid
 	 * it.
 	 */
-	reg = rd32(hw, VPGEN_VFRTRIG(vf->vf_id));
-	reg &= ~VPGEN_VFRTRIG_VFSWR_M;
-	wr32(hw, VPGEN_VFRTRIG(vf->vf_id), reg);
+	ice_clear_vf_reset_trigger(vf);
 
 	/* reallocate VF resources to finish resetting the VSI state */
 	if (!ice_alloc_vf_res(vf)) {
-- 
cgit v1.2.3-59-g8ed1b


From 916c7fdf5e938052a869f78b35a0ca2214d25b12 Mon Sep 17 00:00:00 2001
From: Brett Creeley <brett.creeley@intel.com>
Date: Fri, 15 May 2020 17:51:10 -0700
Subject: ice: Separate VF VSI initialization/creation from reset flow

Currently the same flow is used for VF VSI initialization/creation and VF
VSI reset. This makes the initialization/creation flow unnecessarily
complicated. Fix this by separating the initialization/creation of the
VF VSI from the reset flow.

Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 110 ++++++++++++++++++++++-
 1 file changed, 106 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index 039f0b057603..72a9da3164d9 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -1375,6 +1375,99 @@ static void ice_vc_notify_vf_reset(struct ice_vf *vf)
 			      NULL);
 }
 
+/**
+ * ice_init_vf_vsi_res - initialize/setup VF VSI resources
+ * @vf: VF to initialize/setup the VSI for
+ *
+ * This function creates a VSI for the VF, adds a VLAN 0 filter, and sets up the
+ * VF VSI's broadcast filter and is only used during initial VF creation.
+ */
+static int ice_init_vf_vsi_res(struct ice_vf *vf)
+{
+	struct ice_pf *pf = vf->pf;
+	u8 broadcast[ETH_ALEN];
+	enum ice_status status;
+	struct ice_vsi *vsi;
+	struct device *dev;
+	int err;
+
+	vf->first_vector_idx = ice_calc_vf_first_vector_idx(pf, vf);
+
+	dev = ice_pf_to_dev(pf);
+	vsi = ice_vf_vsi_setup(pf, pf->hw.port_info, vf->vf_id);
+	if (!vsi) {
+		dev_err(dev, "Failed to create VF VSI\n");
+		return -ENOMEM;
+	}
+
+	vf->lan_vsi_idx = vsi->idx;
+	vf->lan_vsi_num = vsi->vsi_num;
+
+	err = ice_vsi_add_vlan(vsi, 0, ICE_FWD_TO_VSI);
+	if (err) {
+		dev_warn(dev, "Failed to add VLAN 0 filter for VF %d\n",
+			 vf->vf_id);
+		goto release_vsi;
+	}
+
+	eth_broadcast_addr(broadcast);
+	status = ice_fltr_add_mac(vsi, broadcast, ICE_FWD_TO_VSI);
+	if (status) {
+		dev_err(dev, "Failed to add broadcast MAC filter for VF %d, status %s\n",
+			vf->vf_id, ice_stat_str(status));
+		err = ice_status_to_errno(status);
+		goto release_vsi;
+	}
+
+	vf->num_mac = 1;
+
+	return 0;
+
+release_vsi:
+	ice_vsi_release(vsi);
+	return err;
+}
+
+/**
+ * ice_start_vfs - start VFs so they are ready to be used by SR-IOV
+ * @pf: PF the VFs are associated with
+ */
+static int ice_start_vfs(struct ice_pf *pf)
+{
+	struct ice_hw *hw = &pf->hw;
+	int retval, i;
+
+	ice_for_each_vf(pf, i) {
+		struct ice_vf *vf = &pf->vf[i];
+
+		ice_clear_vf_reset_trigger(vf);
+
+		retval = ice_init_vf_vsi_res(vf);
+		if (retval) {
+			dev_err(ice_pf_to_dev(pf), "Failed to initialize VSI resources for VF %d, error %d\n",
+				vf->vf_id, retval);
+			goto teardown;
+		}
+
+		set_bit(ICE_VF_STATE_INIT, vf->vf_states);
+		ice_ena_vf_mappings(vf);
+		wr32(hw, VFGEN_RSTAT(vf->vf_id), VIRTCHNL_VFR_VFACTIVE);
+	}
+
+	ice_flush(hw);
+	return 0;
+
+teardown:
+	for (i = i - 1; i >= 0; i--) {
+		struct ice_vf *vf = &pf->vf[i];
+
+		ice_dis_vf_mappings(vf);
+		ice_vsi_release(pf->vsi[vf->lan_vsi_idx]);
+	}
+
+	return retval;
+}
+
 /**
  * ice_alloc_vfs - Allocate and set up VFs resources
  * @pf: pointer to the PF structure
@@ -1407,6 +1500,13 @@ static int ice_alloc_vfs(struct ice_pf *pf, u16 num_alloc_vfs)
 	pf->vf = vfs;
 	pf->num_alloc_vfs = num_alloc_vfs;
 
+	if (ice_set_per_vf_res(pf)) {
+		dev_err(dev, "Not enough resources for %d VFs, try with fewer number of VFs\n",
+			num_alloc_vfs);
+		ret = -ENOSPC;
+		goto err_unroll_sriov;
+	}
+
 	/* apply default profile */
 	ice_for_each_vf(pf, i) {
 		vfs[i].pf = pf;
@@ -1416,15 +1516,17 @@ static int ice_alloc_vfs(struct ice_pf *pf, u16 num_alloc_vfs)
 		/* assign default capabilities */
 		set_bit(ICE_VIRTCHNL_VF_CAP_L2, &vfs[i].vf_caps);
 		vfs[i].spoofchk = true;
+		vfs[i].num_vf_qs = pf->num_qps_per_vf;
 	}
 
-	/* VF resources get allocated with initialization */
-	if (!ice_config_res_vfs(pf)) {
-		ret = -EIO;
+	if (ice_start_vfs(pf)) {
+		dev_err(dev, "Failed to start VF(s)\n");
+		ret = -EAGAIN;
 		goto err_unroll_sriov;
 	}
 
-	return ret;
+	clear_bit(__ICE_VF_DIS, pf->state);
+	return 0;
 
 err_unroll_sriov:
 	pf->vf = NULL;
-- 
cgit v1.2.3-59-g8ed1b


From a06325a0901a887009d28e9fa06168d05c35c941 Mon Sep 17 00:00:00 2001
From: Brett Creeley <brett.creeley@intel.com>
Date: Fri, 15 May 2020 17:51:11 -0700
Subject: ice: Renaming and simplification in VF init path

Some function names weren't very clear and some portions of VF creation
could be moved into functions for clarity. Fix this by renaming some
functions and move pieces of code into clearly name functions.

Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 83 +++++++++++++++---------
 1 file changed, 54 insertions(+), 29 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index 72a9da3164d9..92a442ec7314 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -1469,16 +1469,56 @@ teardown:
 }
 
 /**
- * ice_alloc_vfs - Allocate and set up VFs resources
+ * ice_set_dflt_settings - set VF defaults during initialization/creation
+ * @pf: PF holding reference to all VFs for default configuration
+ */
+static void ice_set_dflt_settings_vfs(struct ice_pf *pf)
+{
+	int i;
+
+	ice_for_each_vf(pf, i) {
+		struct ice_vf *vf = &pf->vf[i];
+
+		vf->pf = pf;
+		vf->vf_id = i;
+		vf->vf_sw_id = pf->first_sw;
+		/* assign default capabilities */
+		set_bit(ICE_VIRTCHNL_VF_CAP_L2, &vf->vf_caps);
+		vf->spoofchk = true;
+		vf->num_vf_qs = pf->num_qps_per_vf;
+	}
+}
+
+/**
+ * ice_alloc_vfs - allocate num_vfs in the PF structure
+ * @pf: PF to store the allocated VFs in
+ * @num_vfs: number of VFs to allocate
+ */
+static int ice_alloc_vfs(struct ice_pf *pf, int num_vfs)
+{
+	struct ice_vf *vfs;
+
+	vfs = devm_kcalloc(ice_pf_to_dev(pf), num_vfs, sizeof(*vfs),
+			   GFP_KERNEL);
+	if (!vfs)
+		return -ENOMEM;
+
+	pf->vf = vfs;
+	pf->num_alloc_vfs = num_vfs;
+
+	return 0;
+}
+
+/**
+ * ice_ena_vfs - enable VFs so they are ready to be used
  * @pf: pointer to the PF structure
- * @num_alloc_vfs: number of VFs to allocate
+ * @num_vfs: number of VFs to enable
  */
-static int ice_alloc_vfs(struct ice_pf *pf, u16 num_alloc_vfs)
+static int ice_ena_vfs(struct ice_pf *pf, u16 num_vfs)
 {
 	struct device *dev = ice_pf_to_dev(pf);
 	struct ice_hw *hw = &pf->hw;
-	struct ice_vf *vfs;
-	int i, ret;
+	int ret;
 
 	/* Disable global interrupt 0 so we don't try to handle the VFLR. */
 	wr32(hw, GLINT_DYN_CTL(pf->oicr_idx),
@@ -1486,38 +1526,24 @@ static int ice_alloc_vfs(struct ice_pf *pf, u16 num_alloc_vfs)
 	set_bit(__ICE_OICR_INTR_DIS, pf->state);
 	ice_flush(hw);
 
-	ret = pci_enable_sriov(pf->pdev, num_alloc_vfs);
+	ret = pci_enable_sriov(pf->pdev, num_vfs);
 	if (ret) {
 		pf->num_alloc_vfs = 0;
 		goto err_unroll_intr;
 	}
-	/* allocate memory */
-	vfs = devm_kcalloc(dev, num_alloc_vfs, sizeof(*vfs), GFP_KERNEL);
-	if (!vfs) {
-		ret = -ENOMEM;
+
+	ret = ice_alloc_vfs(pf, num_vfs);
+	if (ret)
 		goto err_pci_disable_sriov;
-	}
-	pf->vf = vfs;
-	pf->num_alloc_vfs = num_alloc_vfs;
 
 	if (ice_set_per_vf_res(pf)) {
 		dev_err(dev, "Not enough resources for %d VFs, try with fewer number of VFs\n",
-			num_alloc_vfs);
+			num_vfs);
 		ret = -ENOSPC;
 		goto err_unroll_sriov;
 	}
 
-	/* apply default profile */
-	ice_for_each_vf(pf, i) {
-		vfs[i].pf = pf;
-		vfs[i].vf_sw_id = pf->first_sw;
-		vfs[i].vf_id = i;
-
-		/* assign default capabilities */
-		set_bit(ICE_VIRTCHNL_VF_CAP_L2, &vfs[i].vf_caps);
-		vfs[i].spoofchk = true;
-		vfs[i].num_vf_qs = pf->num_qps_per_vf;
-	}
+	ice_set_dflt_settings_vfs(pf);
 
 	if (ice_start_vfs(pf)) {
 		dev_err(dev, "Failed to start VF(s)\n");
@@ -1529,9 +1555,8 @@ static int ice_alloc_vfs(struct ice_pf *pf, u16 num_alloc_vfs)
 	return 0;
 
 err_unroll_sriov:
+	devm_kfree(dev, pf->vf);
 	pf->vf = NULL;
-	devm_kfree(dev, vfs);
-	vfs = NULL;
 	pf->num_alloc_vfs = 0;
 err_pci_disable_sriov:
 	pci_disable_sriov(pf->pdev);
@@ -1591,8 +1616,8 @@ static int ice_pci_sriov_ena(struct ice_pf *pf, int num_vfs)
 		return -EOPNOTSUPP;
 	}
 
-	dev_info(dev, "Allocating %d VFs\n", num_vfs);
-	err = ice_alloc_vfs(pf, num_vfs);
+	dev_info(dev, "Enabling %d VFs\n", num_vfs);
+	err = ice_ena_vfs(pf, num_vfs);
 	if (err) {
 		dev_err(dev, "Failed to enable SR-IOV: %d\n", err);
 		return err;
-- 
cgit v1.2.3-59-g8ed1b


From eb2af3ee94de7f493745adc34bb9d170ced8c5a9 Mon Sep 17 00:00:00 2001
From: Brett Creeley <brett.creeley@intel.com>
Date: Fri, 15 May 2020 17:51:12 -0700
Subject: ice: Add function to set trust mode bit on reset

As the title says, use a function to set trust mode bit on reset.

Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 17 +++++++++++++----
 1 file changed, 13 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index 92a442ec7314..4005a4caf2f0 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -610,6 +610,18 @@ ice_alloc_vsi_res_exit:
 	return status;
 }
 
+/**
+ * ice_vf_set_host_trust_cfg - set trust setting based on pre-reset value
+ * @vf: VF to configure trust setting for
+ */
+static void ice_vf_set_host_trust_cfg(struct ice_vf *vf)
+{
+	if (vf->trusted)
+		set_bit(ICE_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps);
+	else
+		clear_bit(ICE_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps);
+}
+
 /**
  * ice_alloc_vf_res - Allocate VF resources
  * @vf: pointer to the VF structure
@@ -635,10 +647,7 @@ static int ice_alloc_vf_res(struct ice_vf *vf)
 	if (status)
 		goto ice_alloc_vf_res_exit;
 
-	if (vf->trusted)
-		set_bit(ICE_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps);
-	else
-		clear_bit(ICE_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps);
+	ice_vf_set_host_trust_cfg(vf);
 
 	/* VF is now completely initialized */
 	set_bit(ICE_VF_STATE_INIT, vf->vf_states);
-- 
cgit v1.2.3-59-g8ed1b


From 350e822cd54ff331bb421a59b6c096bb1739d22b Mon Sep 17 00:00:00 2001
From: Brett Creeley <brett.creeley@intel.com>
Date: Fri, 15 May 2020 17:51:13 -0700
Subject: ice: Add functions to rebuild host VLAN/MAC config for a VF

When resetting a VF the VLAN and MAC filter configurations need to be
replayed. Add helper functions for this purpose.

Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 121 +++++++++++++++++------
 1 file changed, 89 insertions(+), 32 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index 4005a4caf2f0..3a714c81b5b2 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -540,6 +540,82 @@ static int ice_calc_vf_first_vector_idx(struct ice_pf *pf, struct ice_vf *vf)
 	return pf->sriov_base_vector + vf->vf_id * pf->num_msix_per_vf;
 }
 
+/**
+ * ice_vf_rebuild_host_vlan_cfg - add VLAN 0 filter or rebuild the Port VLAN
+ * @vf: VF to add MAC filters for
+ *
+ * Called after a VF VSI has been re-added/rebuilt during reset. The PF driver
+ * always re-adds either a VLAN 0 or port VLAN based filter after reset.
+ */
+static int ice_vf_rebuild_host_vlan_cfg(struct ice_vf *vf)
+{
+	struct ice_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx];
+	struct device *dev = ice_pf_to_dev(vf->pf);
+	u16 vlan_id = 0;
+	int err;
+
+	if (vf->port_vlan_info) {
+		err = ice_vsi_manage_pvid(vsi, vf->port_vlan_info, true);
+		if (err) {
+			dev_err(dev, "failed to configure port VLAN via VSI parameters for VF %u, error %d\n",
+				vf->vf_id, err);
+			return err;
+		}
+
+		vlan_id = vf->port_vlan_info & VLAN_VID_MASK;
+	}
+
+	/* vlan_id will either be 0 or the port VLAN number */
+	err = ice_vsi_add_vlan(vsi, vlan_id, ICE_FWD_TO_VSI);
+	if (err) {
+		dev_err(dev, "failed to add %s VLAN %u filter for VF %u, error %d\n",
+			vf->port_vlan_info ? "port" : "", vlan_id, vf->vf_id,
+			err);
+		return err;
+	}
+
+	return 0;
+}
+
+/**
+ * ice_vf_rebuild_host_mac_cfg - add broadcast and the VF's perm_addr/LAA
+ * @vf: VF to add MAC filters for
+ *
+ * Called after a VF VSI has been re-added/rebuilt during reset. The PF driver
+ * always re-adds a broadcast filter and the VF's perm_addr/LAA after reset.
+ */
+static int ice_vf_rebuild_host_mac_cfg(struct ice_vf *vf)
+{
+	struct ice_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx];
+	struct device *dev = ice_pf_to_dev(vf->pf);
+	enum ice_status status;
+	u8 broadcast[ETH_ALEN];
+
+	eth_broadcast_addr(broadcast);
+	status = ice_fltr_add_mac(vsi, broadcast, ICE_FWD_TO_VSI);
+	if (status) {
+		dev_err(dev, "failed to add broadcast MAC filter for VF %u, error %s\n",
+			vf->vf_id, ice_stat_str(status));
+		return ice_status_to_errno(status);
+	}
+
+	vf->num_mac++;
+
+	if (is_valid_ether_addr(vf->dflt_lan_addr.addr)) {
+		status = ice_fltr_add_mac(vsi, vf->dflt_lan_addr.addr,
+					  ICE_FWD_TO_VSI);
+		if (status) {
+			dev_err(dev, "failed to add default unicast MAC filter %pM for VF %u, error %s\n",
+				&vf->dflt_lan_addr.addr[0], vf->vf_id,
+				ice_stat_str(status));
+			return ice_status_to_errno(status);
+		}
+		vf->num_mac++;
+	}
+
+	return 0;
+}
+
 /**
  * ice_alloc_vsi_res - Setup VF VSI and its resources
  * @vf: pointer to the VF structure
@@ -549,10 +625,9 @@ static int ice_calc_vf_first_vector_idx(struct ice_pf *pf, struct ice_vf *vf)
 static int ice_alloc_vsi_res(struct ice_vf *vf)
 {
 	struct ice_pf *pf = vf->pf;
-	u8 broadcast[ETH_ALEN];
 	struct ice_vsi *vsi;
 	struct device *dev;
-	int status = 0;
+	int ret;
 
 	dev = ice_pf_to_dev(pf);
 	/* first vector index is the VFs OICR index */
@@ -567,38 +642,20 @@ static int ice_alloc_vsi_res(struct ice_vf *vf)
 	vf->lan_vsi_idx = vsi->idx;
 	vf->lan_vsi_num = vsi->vsi_num;
 
-	/* Check if port VLAN exist before, and restore it accordingly */
-	if (vf->port_vlan_info) {
-		ice_vsi_manage_pvid(vsi, vf->port_vlan_info, true);
-		if (ice_vsi_add_vlan(vsi, vf->port_vlan_info & VLAN_VID_MASK,
-				     ICE_FWD_TO_VSI))
-			dev_warn(ice_pf_to_dev(pf), "Failed to add Port VLAN %d filter for VF %d\n",
-				 vf->port_vlan_info & VLAN_VID_MASK, vf->vf_id);
-	} else {
-		/* set VLAN 0 filter by default when no port VLAN is
-		 * enabled. If a port VLAN is enabled we don't want
-		 * untagged broadcast/multicast traffic seen on the VF
-		 * interface.
-		 */
-		if (ice_vsi_add_vlan(vsi, 0, ICE_FWD_TO_VSI))
-			dev_warn(ice_pf_to_dev(pf), "Failed to add VLAN 0 filter for VF %d, MDD events will trigger. Reset the VF, disable spoofchk, or enable 8021q module on the guest\n",
-				 vf->vf_id);
+	ret = ice_vf_rebuild_host_vlan_cfg(vf);
+	if (ret) {
+		dev_err(dev, "failed to rebuild default MAC configuration for VF %d, error %d\n",
+			vf->vf_id, ret);
+		goto ice_alloc_vsi_res_exit;
 	}
 
-	if (is_valid_ether_addr(vf->dflt_lan_addr.addr)) {
-		status = ice_fltr_add_mac(vsi, vf->dflt_lan_addr.addr,
-					  ICE_FWD_TO_VSI);
-		if (status)
-			goto ice_alloc_vsi_res_exit;
-	}
 
-	eth_broadcast_addr(broadcast);
-	status = ice_fltr_add_mac(vsi, broadcast, ICE_FWD_TO_VSI);
-	if (status)
-		dev_err(dev, "could not add mac filters error %d\n",
-			status);
-	else
-		vf->num_mac = 1;
+	ret = ice_vf_rebuild_host_mac_cfg(vf);
+	if (ret) {
+		dev_err(dev, "failed to rebuild default MAC configuration for VF %d, error %d\n",
+			vf->vf_id, ret);
+		goto ice_alloc_vsi_res_exit;
+	}
 
 	/* Clear this bit after VF initialization since we shouldn't reclaim
 	 * and reassign interrupts for synchronous or asynchronous VFR events.
@@ -607,7 +664,7 @@ static int ice_alloc_vsi_res(struct ice_vf *vf)
 	 * more vectors.
 	 */
 ice_alloc_vsi_res_exit:
-	return status;
+	return ret;
 }
 
 /**
-- 
cgit v1.2.3-59-g8ed1b


From a58e1d817475eb45471869190ff72dd1b493a936 Mon Sep 17 00:00:00 2001
From: Paul Greenwalt <paul.greenwalt@intel.com>
Date: Fri, 15 May 2020 17:51:14 -0700
Subject: ice: remove VM/VF disable command on CORER/GLOBR reset

Remove VM/VF disable AQC (opcode 0x0C31) when resetting all VFs.
This is not required for CORER/GLOBR reset.

Signed-off-by: Paul Greenwalt <paul.greenwalt@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index 3a714c81b5b2..245310a52e1b 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -1196,17 +1196,6 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr)
 	ice_for_each_vf(pf, v)
 		ice_trigger_vf_reset(&pf->vf[v], is_vflr, true);
 
-	ice_for_each_vf(pf, v) {
-		struct ice_vsi *vsi;
-
-		vf = &pf->vf[v];
-		vsi = pf->vsi[vf->lan_vsi_idx];
-		if (test_bit(ICE_VF_STATE_QS_ENA, vf->vf_states))
-			ice_dis_vf_qs(vf);
-		ice_dis_vsi_txq(vsi->port_info, vsi->idx, 0, 0, NULL, NULL,
-				NULL, ICE_VF_RESET, vf->vf_id, NULL);
-	}
-
 	/* HW requires some time to make sure it can flush the FIFO for a VF
 	 * when it resets it. Poll the VPGEN_VFRSTAT register for each VF in
 	 * sequence to make sure that it has completed. We'll keep track of
-- 
cgit v1.2.3-59-g8ed1b


From 12bb018c538c3b9a050f69f62fa09fa6c9160bca Mon Sep 17 00:00:00 2001
From: Brett Creeley <brett.creeley@intel.com>
Date: Fri, 15 May 2020 17:51:15 -0700
Subject: ice: Refactor VF reset

Currently VF VSI are being reset twice during a PFR or greater. This is
causing reset, specifically resetting all VFs, to take too long. This is
causing various issues with VF drivers not being able to gracefully
handle the VF reset timeout. Fix this by refactoring how VF reset is
handled for the case mentioned previously and for the VFR/VFLR case.

The refactor was done by doing the following:

1. Removing the call to ice_vsi_rebuild_by_type for
   ICE_VSI_VF VSI, which was causing the initial VSI rebuild.

2. Adding functions for pre/post VSI rebuild functions that can be called
   in both the reset all VFs case and reset individual VF case.

3. Adding VSI rebuild functions that are specific for the reset all VFs
   case and adding functions that are specific for the reset individual
   VF case.

4. Calling the pre-rebuild function, then the specific VSI rebuild
   function based on the reset type, and then calling the post-rebuild
   function to handle VF resets.

This patch series makes some assumptions about how VSI are handling by
FW during reset:

1. During a PFR or greater all VSI in FW will be cleared.
2. During a VFR/VFLR the VSI rebuild responsibility is in the hands of
   the PF software.
3. There is code in the ice_reset_all_vfs() case to amortize operations
   if possible. This was left intact.
4. PF software should not be replaying VSI based filters that were added
   other than host configured, PF software configured, or the VF's
   default/LAA MAC. This is the VF drivers job after it has been reset.

Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_main.c        |  13 +-
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 304 ++++++++++-------------
 2 files changed, 130 insertions(+), 187 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index 69854b8644a6..bbf92d2f1ac1 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -4897,6 +4897,11 @@ static void ice_update_pf_netdev_link(struct ice_pf *pf)
  * ice_rebuild - rebuild after reset
  * @pf: PF to rebuild
  * @reset_type: type of reset
+ *
+ * Do not rebuild VF VSI in this flow because that is already handled via
+ * ice_reset_all_vfs(). This is because requirements for resetting a VF after a
+ * PFR/CORER/GLOBER/etc. are different than the normal flow. Also, we don't want
+ * to reset/rebuild all the VF VSI twice.
  */
 static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
 {
@@ -4994,14 +4999,6 @@ static void ice_rebuild(struct ice_pf *pf, enum ice_reset_req reset_type)
 		goto err_vsi_rebuild;
 	}
 
-	if (test_bit(ICE_FLAG_SRIOV_ENA, pf->flags)) {
-		err = ice_vsi_rebuild_by_type(pf, ICE_VSI_VF);
-		if (err) {
-			dev_err(dev, "VF VSI rebuild failed: %d\n", err);
-			goto err_vsi_rebuild;
-		}
-	}
-
 	/* If Flow Director is active */
 	if (test_bit(ICE_FLAG_FD_ENA, pf->flags)) {
 		err = ice_vsi_rebuild_by_type(pf, ICE_VSI_CTRL);
diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index 245310a52e1b..727f371db465 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -413,10 +413,7 @@ static void ice_trigger_vf_reset(struct ice_vf *vf, bool is_vflr, bool is_pfr)
 	clear_bit(ICE_VF_STATE_ACTIVE, vf->vf_states);
 
 	/* Disable VF's configuration API during reset. The flag is re-enabled
-	 * in ice_alloc_vf_res(), when it's safe again to access VF's VSI.
-	 * It's normally disabled in ice_free_vf_res(), but it's safer
-	 * to do it earlier to give some time to finish to any VF config
-	 * functions that may still be running at this point.
+	 * when it's safe again to access VF's VSI.
 	 */
 	clear_bit(ICE_VF_STATE_INIT, vf->vf_states);
 
@@ -616,57 +613,6 @@ static int ice_vf_rebuild_host_mac_cfg(struct ice_vf *vf)
 	return 0;
 }
 
-/**
- * ice_alloc_vsi_res - Setup VF VSI and its resources
- * @vf: pointer to the VF structure
- *
- * Returns 0 on success, negative value on failure
- */
-static int ice_alloc_vsi_res(struct ice_vf *vf)
-{
-	struct ice_pf *pf = vf->pf;
-	struct ice_vsi *vsi;
-	struct device *dev;
-	int ret;
-
-	dev = ice_pf_to_dev(pf);
-	/* first vector index is the VFs OICR index */
-	vf->first_vector_idx = ice_calc_vf_first_vector_idx(pf, vf);
-
-	vsi = ice_vf_vsi_setup(pf, pf->hw.port_info, vf->vf_id);
-	if (!vsi) {
-		dev_err(dev, "Failed to create VF VSI\n");
-		return -ENOMEM;
-	}
-
-	vf->lan_vsi_idx = vsi->idx;
-	vf->lan_vsi_num = vsi->vsi_num;
-
-	ret = ice_vf_rebuild_host_vlan_cfg(vf);
-	if (ret) {
-		dev_err(dev, "failed to rebuild default MAC configuration for VF %d, error %d\n",
-			vf->vf_id, ret);
-		goto ice_alloc_vsi_res_exit;
-	}
-
-
-	ret = ice_vf_rebuild_host_mac_cfg(vf);
-	if (ret) {
-		dev_err(dev, "failed to rebuild default MAC configuration for VF %d, error %d\n",
-			vf->vf_id, ret);
-		goto ice_alloc_vsi_res_exit;
-	}
-
-	/* Clear this bit after VF initialization since we shouldn't reclaim
-	 * and reassign interrupts for synchronous or asynchronous VFR events.
-	 * We don't want to reconfigure interrupts since AVF driver doesn't
-	 * expect vector assignment to be changed unless there is a request for
-	 * more vectors.
-	 */
-ice_alloc_vsi_res_exit:
-	return ret;
-}
-
 /**
  * ice_vf_set_host_trust_cfg - set trust setting based on pre-reset value
  * @vf: VF to configure trust setting for
@@ -679,43 +625,6 @@ static void ice_vf_set_host_trust_cfg(struct ice_vf *vf)
 		clear_bit(ICE_VIRTCHNL_VF_CAP_PRIVILEGE, &vf->vf_caps);
 }
 
-/**
- * ice_alloc_vf_res - Allocate VF resources
- * @vf: pointer to the VF structure
- */
-static int ice_alloc_vf_res(struct ice_vf *vf)
-{
-	struct ice_pf *pf = vf->pf;
-	int tx_rx_queue_left;
-	int status;
-
-	/* Update number of VF queues, in case VF had requested for queue
-	 * changes
-	 */
-	tx_rx_queue_left = min_t(int, ice_get_avail_txq_count(pf),
-				 ice_get_avail_rxq_count(pf));
-	tx_rx_queue_left += pf->num_qps_per_vf;
-	if (vf->num_req_qs && vf->num_req_qs <= tx_rx_queue_left &&
-	    vf->num_req_qs != vf->num_vf_qs)
-		vf->num_vf_qs = vf->num_req_qs;
-
-	/* setup VF VSI and necessary resources */
-	status = ice_alloc_vsi_res(vf);
-	if (status)
-		goto ice_alloc_vf_res_exit;
-
-	ice_vf_set_host_trust_cfg(vf);
-
-	/* VF is now completely initialized */
-	set_bit(ICE_VF_STATE_INIT, vf->vf_states);
-
-	return status;
-
-ice_alloc_vf_res_exit:
-	ice_free_vf_res(vf);
-	return status;
-}
-
 /**
  * ice_ena_vf_msix_mappings - enable VF MSIX mappings in hardware
  * @vf: VF to enable MSIX mappings for
@@ -1042,48 +951,6 @@ static void ice_clear_vf_reset_trigger(struct ice_vf *vf)
 	ice_flush(hw);
 }
 
-/**
- * ice_cleanup_and_realloc_vf - Clean up VF and reallocate resources after reset
- * @vf: pointer to the VF structure
- *
- * Cleanup a VF after the hardware reset is finished. Expects the caller to
- * have verified whether the reset is finished properly, and ensure the
- * minimum amount of wait time has passed. Reallocate VF resources back to make
- * VF state active
- */
-static void ice_cleanup_and_realloc_vf(struct ice_vf *vf)
-{
-	struct ice_pf *pf = vf->pf;
-	struct ice_hw *hw;
-
-	hw = &pf->hw;
-
-	/* Allow HW to access VF memory after calling
-	 * ice_clear_vf_reset_trigger(). If we did it any sooner, HW could
-	 * access memory while it was being freed in ice_free_vf_res(), causing
-	 * an IOMMU fault.
-	 *
-	 * On the other hand, this needs to be done ASAP, because the VF driver
-	 * is waiting for this to happen and may report a timeout. It's
-	 * harmless, but it gets logged into Guest OS kernel log, so best avoid
-	 * it.
-	 */
-	ice_clear_vf_reset_trigger(vf);
-
-	/* reallocate VF resources to finish resetting the VSI state */
-	if (!ice_alloc_vf_res(vf)) {
-		ice_ena_vf_mappings(vf);
-		set_bit(ICE_VF_STATE_ACTIVE, vf->vf_states);
-		clear_bit(ICE_VF_STATE_DIS, vf->vf_states);
-	}
-
-	/* Tell the VF driver the reset is done. This needs to be done only
-	 * after VF has been fully initialized, because the VF driver may
-	 * request resources immediately after setting this flag.
-	 */
-	wr32(hw, VFGEN_RSTAT(vf->vf_id), VIRTCHNL_VFR_VFACTIVE);
-}
-
 /**
  * ice_vf_set_vsi_promisc - set given VF VSI to given promiscuous mode(s)
  * @vf: pointer to the VF info
@@ -1125,44 +992,134 @@ ice_vf_set_vsi_promisc(struct ice_vf *vf, struct ice_vsi *vsi, u8 promisc_m,
 	return status;
 }
 
+static void ice_vf_clear_counters(struct ice_vf *vf)
+{
+	struct ice_vsi *vsi = vf->pf->vsi[vf->lan_vsi_idx];
+
+	vf->num_mac = 0;
+	vsi->num_vlan = 0;
+	memset(&vf->mdd_tx_events, 0, sizeof(vf->mdd_tx_events));
+	memset(&vf->mdd_rx_events, 0, sizeof(vf->mdd_rx_events));
+}
+
 /**
- * ice_config_res_vfs - Finalize allocation of VFs resources in one go
- * @pf: pointer to the PF structure
+ * ice_vf_pre_vsi_rebuild - tasks to be done prior to VSI rebuild
+ * @vf: VF to perform pre VSI rebuild tasks
  *
- * This function is being called as last part of resetting all VFs, or when
- * configuring VFs for the first time, where there is no resource to be freed
- * Returns true if resources were properly allocated for all VFs, and false
- * otherwise.
+ * These tasks are items that don't need to be amortized since they are most
+ * likely called in a for loop with all VF(s) in the reset_all_vfs() case.
  */
-static bool ice_config_res_vfs(struct ice_pf *pf)
+static void ice_vf_pre_vsi_rebuild(struct ice_vf *vf)
 {
-	struct device *dev = ice_pf_to_dev(pf);
-	struct ice_hw *hw = &pf->hw;
-	int v;
+	ice_vf_clear_counters(vf);
+	ice_clear_vf_reset_trigger(vf);
+}
 
-	if (ice_set_per_vf_res(pf)) {
-		dev_err(dev, "Cannot allocate VF resources, try with fewer number of VFs\n");
-		return false;
+/**
+ * ice_vf_rebuild_host_cfg - host admin configuration is persistent across reset
+ * @vf: VF to rebuild host configuration on
+ */
+static void ice_vf_rebuild_host_cfg(struct ice_vf *vf)
+{
+	struct device *dev = ice_pf_to_dev(vf->pf);
+
+	ice_vf_set_host_trust_cfg(vf);
+
+	if (ice_vf_rebuild_host_mac_cfg(vf))
+		dev_err(dev, "failed to rebuild default MAC configuration for VF %d\n",
+			vf->vf_id);
+
+	if (ice_vf_rebuild_host_vlan_cfg(vf))
+		dev_err(dev, "failed to rebuild VLAN configuration for VF %u\n",
+			vf->vf_id);
+}
+
+/**
+ * ice_vf_rebuild_vsi_with_release - release and setup the VF's VSI
+ * @vf: VF to release and setup the VSI for
+ *
+ * This is only called when a single VF is being reset (i.e. VFR, VFLR, host VF
+ * configuration change, etc.).
+ */
+static int ice_vf_rebuild_vsi_with_release(struct ice_vf *vf)
+{
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *vsi;
+
+	vsi = pf->vsi[vf->lan_vsi_idx];
+	ice_vsi_release(vsi);
+	vsi = ice_vf_vsi_setup(pf, pf->hw.port_info, vf->vf_id);
+	if (!vsi) {
+		dev_err(ice_pf_to_dev(pf), "Failed to create VF VSI\n");
+		return -ENOMEM;
 	}
 
-	/* rearm global interrupts */
-	if (test_and_clear_bit(__ICE_OICR_INTR_DIS, pf->state))
-		ice_irq_dynamic_ena(hw, NULL, NULL);
+	vf->lan_vsi_idx = vsi->idx;
+	vf->lan_vsi_num = vsi->vsi_num;
 
-	/* Finish resetting each VF and allocate resources */
-	ice_for_each_vf(pf, v) {
-		struct ice_vf *vf = &pf->vf[v];
+	return 0;
+}
 
-		vf->num_vf_qs = pf->num_qps_per_vf;
-		dev_dbg(dev, "VF-id %d has %d queues configured\n", vf->vf_id,
-			vf->num_vf_qs);
-		ice_cleanup_and_realloc_vf(vf);
+/**
+ * ice_vf_rebuild_vsi - rebuild the VF's VSI
+ * @vf: VF to rebuild the VSI for
+ *
+ * This is only called when all VF(s) are being reset (i.e. PCIe Reset on the
+ * host, PFR, CORER, etc.).
+ */
+static int ice_vf_rebuild_vsi(struct ice_vf *vf)
+{
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *vsi;
+
+	vsi = pf->vsi[vf->lan_vsi_idx];
+
+	if (ice_vsi_rebuild(vsi, true)) {
+		dev_err(ice_pf_to_dev(pf), "failed to rebuild VF %d VSI\n",
+			vf->vf_id);
+		return -EIO;
 	}
+	/* vsi->idx will remain the same in this case so don't update
+	 * vf->lan_vsi_idx
+	 */
+	vsi->vsi_num = ice_get_hw_vsi_num(&pf->hw, vsi->idx);
+	vf->lan_vsi_num = vsi->vsi_num;
 
-	ice_flush(hw);
-	clear_bit(__ICE_VF_DIS, pf->state);
+	return 0;
+}
 
-	return true;
+/**
+ * ice_vf_set_initialized - VF is ready for VIRTCHNL communication
+ * @vf: VF to set in initialized state
+ *
+ * After this function the VF will be ready to receive/handle the
+ * VIRTCHNL_OP_GET_VF_RESOURCES message
+ */
+static void ice_vf_set_initialized(struct ice_vf *vf)
+{
+	ice_set_vf_state_qs_dis(vf);
+	clear_bit(ICE_VF_STATE_MC_PROMISC, vf->vf_states);
+	clear_bit(ICE_VF_STATE_UC_PROMISC, vf->vf_states);
+	clear_bit(ICE_VF_STATE_DIS, vf->vf_states);
+	set_bit(ICE_VF_STATE_INIT, vf->vf_states);
+}
+
+/**
+ * ice_vf_post_vsi_rebuild - tasks to do after the VF's VSI have been rebuilt
+ * @vf: VF to perform tasks on
+ */
+static void ice_vf_post_vsi_rebuild(struct ice_vf *vf)
+{
+	struct ice_pf *pf = vf->pf;
+	struct ice_hw *hw;
+
+	hw = &pf->hw;
+
+	ice_vf_rebuild_host_cfg(vf);
+
+	ice_vf_set_initialized(vf);
+	ice_ena_vf_mappings(vf);
+	wr32(hw, VFGEN_RSTAT(vf->vf_id), VIRTCHNL_VFR_VFACTIVE);
 }
 
 /**
@@ -1232,21 +1189,13 @@ bool ice_reset_all_vfs(struct ice_pf *pf, bool is_vflr)
 	ice_for_each_vf(pf, v) {
 		vf = &pf->vf[v];
 
-		ice_free_vf_res(vf);
-
-		/* Free VF queues as well, and reallocate later.
-		 * If a given VF has different number of queues
-		 * configured, the request for update will come
-		 * via mailbox communication.
-		 */
-		vf->num_vf_qs = 0;
+		ice_vf_pre_vsi_rebuild(vf);
+		ice_vf_rebuild_vsi(vf);
+		ice_vf_post_vsi_rebuild(vf);
 	}
 
-	if (ice_sriov_free_msix_res(pf))
-		dev_err(dev, "Failed to free MSIX resources used by SR-IOV\n");
-
-	if (!ice_config_res_vfs(pf))
-		return false;
+	ice_flush(hw);
+	clear_bit(__ICE_VF_DIS, pf->state);
 
 	return true;
 }
@@ -1358,12 +1307,9 @@ bool ice_reset_vf(struct ice_vf *vf, bool is_vflr)
 			dev_err(dev, "disabling promiscuous mode failed\n");
 	}
 
-	/* free VF resources to begin resetting the VSI state */
-	ice_free_vf_res(vf);
-
-	ice_cleanup_and_realloc_vf(vf);
-
-	ice_flush(hw);
+	ice_vf_pre_vsi_rebuild(vf);
+	ice_vf_rebuild_vsi_with_release(vf);
+	ice_vf_post_vsi_rebuild(vf);
 
 	return true;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 3726cce258908ed6e30d52e2d4dfffe96ad2f962 Mon Sep 17 00:00:00 2001
From: Brett Creeley <brett.creeley@intel.com>
Date: Fri, 15 May 2020 17:51:16 -0700
Subject: ice: Refactor VF VSI release and setup functions

Currently when a VF VSI calls ice_vsi_release() and ice_vsi_setup() it
subsequently clears/sets the VF cached variables for lan_vsi_idx and
lan_vsi_num. This works fine, but can be improved by handling this in
the VF specific VSI release and setup functions.

Also, when a VF VSI is setup too many parameters are passed that can be
derived from the VF. Fix this by only calling VF VSI setup with the bare
minimum parameters.

Also, add functionality to invalidate a VF's VSI when it's released
and/or setup fails. This will make it so a VF VSI cannot be accessed via
its cached vsi_idx/vsi_num in these cases.

Finally when a VF's VSI is invalidated set the lan_vsi_idx and
lan_vsi_num to ICE_NO_VSI to clearly show that there is no valid VSI
associated with this VF.

Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 86 +++++++++++++++---------
 1 file changed, 55 insertions(+), 31 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index 727f371db465..a126e7c7663d 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -181,6 +181,26 @@ static void ice_vc_notify_vf_link_state(struct ice_vf *vf)
 			      sizeof(pfe), NULL);
 }
 
+/**
+ * ice_vf_invalidate_vsi - invalidate vsi_idx/vsi_num to remove VSI access
+ * @vf: VF to remove access to VSI for
+ */
+static void ice_vf_invalidate_vsi(struct ice_vf *vf)
+{
+	vf->lan_vsi_idx = ICE_NO_VSI;
+	vf->lan_vsi_num = ICE_NO_VSI;
+}
+
+/**
+ * ice_vf_vsi_release - invalidate the VF's VSI after freeing it
+ * @vf: invalidate this VF's VSI after freeing it
+ */
+static void ice_vf_vsi_release(struct ice_vf *vf)
+{
+	ice_vsi_release(vf->pf->vsi[vf->lan_vsi_idx]);
+	ice_vf_invalidate_vsi(vf);
+}
+
 /**
  * ice_free_vf_res - Free a VF's resources
  * @vf: pointer to the VF info
@@ -196,10 +216,8 @@ static void ice_free_vf_res(struct ice_vf *vf)
 	clear_bit(ICE_VF_STATE_INIT, vf->vf_states);
 
 	/* free VSI and disconnect it from the parent uplink */
-	if (vf->lan_vsi_idx) {
-		ice_vsi_release(pf->vsi[vf->lan_vsi_idx]);
-		vf->lan_vsi_idx = 0;
-		vf->lan_vsi_num = 0;
+	if (vf->lan_vsi_idx != ICE_NO_VSI) {
+		ice_vf_vsi_release(vf);
 		vf->num_mac = 0;
 	}
 
@@ -505,19 +523,40 @@ out:
 	return ret;
 }
 
+/**
+ * ice_vf_get_port_info - Get the VF's port info structure
+ * @vf: VF used to get the port info structure for
+ */
+static struct ice_port_info *ice_vf_get_port_info(struct ice_vf *vf)
+{
+	return vf->pf->hw.port_info;
+}
+
 /**
  * ice_vf_vsi_setup - Set up a VF VSI
- * @pf: board private structure
- * @pi: pointer to the port_info instance
- * @vf_id: defines VF ID to which this VSI connects.
+ * @vf: VF to setup VSI for
  *
  * Returns pointer to the successfully allocated VSI struct on success,
  * otherwise returns NULL on failure.
  */
-static struct ice_vsi *
-ice_vf_vsi_setup(struct ice_pf *pf, struct ice_port_info *pi, u16 vf_id)
+static struct ice_vsi *ice_vf_vsi_setup(struct ice_vf *vf)
 {
-	return ice_vsi_setup(pf, pi, ICE_VSI_VF, vf_id);
+	struct ice_port_info *pi = ice_vf_get_port_info(vf);
+	struct ice_pf *pf = vf->pf;
+	struct ice_vsi *vsi;
+
+	vsi = ice_vsi_setup(pf, pi, ICE_VSI_VF, vf->vf_id);
+
+	if (!vsi) {
+		dev_err(ice_pf_to_dev(pf), "Failed to create VF VSI\n");
+		ice_vf_invalidate_vsi(vf);
+		return NULL;
+	}
+
+	vf->lan_vsi_idx = vsi->idx;
+	vf->lan_vsi_num = vsi->vsi_num;
+
+	return vsi;
 }
 
 /**
@@ -1043,19 +1082,9 @@ static void ice_vf_rebuild_host_cfg(struct ice_vf *vf)
  */
 static int ice_vf_rebuild_vsi_with_release(struct ice_vf *vf)
 {
-	struct ice_pf *pf = vf->pf;
-	struct ice_vsi *vsi;
-
-	vsi = pf->vsi[vf->lan_vsi_idx];
-	ice_vsi_release(vsi);
-	vsi = ice_vf_vsi_setup(pf, pf->hw.port_info, vf->vf_id);
-	if (!vsi) {
-		dev_err(ice_pf_to_dev(pf), "Failed to create VF VSI\n");
+	ice_vf_vsi_release(vf);
+	if (!ice_vf_vsi_setup(vf))
 		return -ENOMEM;
-	}
-
-	vf->lan_vsi_idx = vsi->idx;
-	vf->lan_vsi_num = vsi->vsi_num;
 
 	return 0;
 }
@@ -1395,14 +1424,9 @@ static int ice_init_vf_vsi_res(struct ice_vf *vf)
 	vf->first_vector_idx = ice_calc_vf_first_vector_idx(pf, vf);
 
 	dev = ice_pf_to_dev(pf);
-	vsi = ice_vf_vsi_setup(pf, pf->hw.port_info, vf->vf_id);
-	if (!vsi) {
-		dev_err(dev, "Failed to create VF VSI\n");
+	vsi = ice_vf_vsi_setup(vf);
+	if (!vsi)
 		return -ENOMEM;
-	}
-
-	vf->lan_vsi_idx = vsi->idx;
-	vf->lan_vsi_num = vsi->vsi_num;
 
 	err = ice_vsi_add_vlan(vsi, 0, ICE_FWD_TO_VSI);
 	if (err) {
@@ -1425,7 +1449,7 @@ static int ice_init_vf_vsi_res(struct ice_vf *vf)
 	return 0;
 
 release_vsi:
-	ice_vsi_release(vsi);
+	ice_vf_vsi_release(vf);
 	return err;
 }
 
@@ -1463,7 +1487,7 @@ teardown:
 		struct ice_vf *vf = &pf->vf[i];
 
 		ice_dis_vf_mappings(vf);
-		ice_vsi_release(pf->vsi[vf->lan_vsi_idx]);
+		ice_vf_vsi_release(vf);
 	}
 
 	return retval;
-- 
cgit v1.2.3-59-g8ed1b


From 123db31d01219a4f794f3769e7bca6649d65ecb1 Mon Sep 17 00:00:00 2001
From: Ronak Doshi <doshir@vmware.com>
Date: Thu, 28 May 2020 14:53:19 -0700
Subject: vmxnet3: prepare for version 4 changes

vmxnet3 is currently at version 3 and this patch initiates the
preparation to accommodate changes for version 4. Introduced utility
macros for vmxnet3 version 4 comparison and update Copyright
information.

Signed-off-by: Ronak Doshi <doshir@vmware.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vmxnet3/Makefile          | 2 +-
 drivers/net/vmxnet3/upt1_defs.h       | 2 +-
 drivers/net/vmxnet3/vmxnet3_defs.h    | 2 +-
 drivers/net/vmxnet3/vmxnet3_drv.c     | 2 +-
 drivers/net/vmxnet3/vmxnet3_ethtool.c | 2 +-
 drivers/net/vmxnet3/vmxnet3_int.h     | 5 ++++-
 6 files changed, 9 insertions(+), 6 deletions(-)

diff --git a/drivers/net/vmxnet3/Makefile b/drivers/net/vmxnet3/Makefile
index 8cdbb63d1bb0..c5a167a1c85c 100644
--- a/drivers/net/vmxnet3/Makefile
+++ b/drivers/net/vmxnet3/Makefile
@@ -2,7 +2,7 @@
 #
 # Linux driver for VMware's vmxnet3 ethernet NIC.
 #
-# Copyright (C) 2007-2016, VMware, Inc. All Rights Reserved.
+# Copyright (C) 2007-2020, VMware, Inc. All Rights Reserved.
 #
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
diff --git a/drivers/net/vmxnet3/upt1_defs.h b/drivers/net/vmxnet3/upt1_defs.h
index db9f1fde3aac..65a203c842b2 100644
--- a/drivers/net/vmxnet3/upt1_defs.h
+++ b/drivers/net/vmxnet3/upt1_defs.h
@@ -1,7 +1,7 @@
 /*
  * Linux driver for VMware's vmxnet3 ethernet NIC.
  *
- * Copyright (C) 2008-2016, VMware, Inc. All Rights Reserved.
+ * Copyright (C) 2008-2020, VMware, Inc. All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the
diff --git a/drivers/net/vmxnet3/vmxnet3_defs.h b/drivers/net/vmxnet3/vmxnet3_defs.h
index c3a31646189f..c77274228a3e 100644
--- a/drivers/net/vmxnet3/vmxnet3_defs.h
+++ b/drivers/net/vmxnet3/vmxnet3_defs.h
@@ -1,7 +1,7 @@
 /*
  * Linux driver for VMware's vmxnet3 ethernet NIC.
  *
- * Copyright (C) 2008-2016, VMware, Inc. All Rights Reserved.
+ * Copyright (C) 2008-2020, VMware, Inc. All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 722cb054a5cd..ec2878f8c1f6 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -1,7 +1,7 @@
 /*
  * Linux driver for VMware's vmxnet3 ethernet NIC.
  *
- * Copyright (C) 2008-2016, VMware, Inc. All Rights Reserved.
+ * Copyright (C) 2008-2020, VMware, Inc. All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the
diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c
index 6528940ce5f3..1163eca7aba5 100644
--- a/drivers/net/vmxnet3/vmxnet3_ethtool.c
+++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c
@@ -1,7 +1,7 @@
 /*
  * Linux driver for VMware's vmxnet3 ethernet NIC.
  *
- * Copyright (C) 2008-2016, VMware, Inc. All Rights Reserved.
+ * Copyright (C) 2008-2020, VMware, Inc. All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the
diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h
index 1cc1cd4aaa59..e803ffad75d6 100644
--- a/drivers/net/vmxnet3/vmxnet3_int.h
+++ b/drivers/net/vmxnet3/vmxnet3_int.h
@@ -1,7 +1,7 @@
 /*
  * Linux driver for VMware's vmxnet3 ethernet NIC.
  *
- * Copyright (C) 2008-2016, VMware, Inc. All Rights Reserved.
+ * Copyright (C) 2008-2020, VMware, Inc. All Rights Reserved.
  *
  * This program is free software; you can redistribute it and/or modify it
  * under the terms of the GNU General Public License as published by the
@@ -81,6 +81,7 @@
 	#define VMXNET3_RSS
 #endif
 
+#define VMXNET3_REV_4		3	/* Vmxnet3 Rev. 4 */
 #define VMXNET3_REV_3		2	/* Vmxnet3 Rev. 3 */
 #define VMXNET3_REV_2		1	/* Vmxnet3 Rev. 2 */
 #define VMXNET3_REV_1		0	/* Vmxnet3 Rev. 1 */
@@ -412,6 +413,8 @@ struct vmxnet3_adapter {
 	(adapter->version >= VMXNET3_REV_2 + 1)
 #define VMXNET3_VERSION_GE_3(adapter) \
 	(adapter->version >= VMXNET3_REV_3 + 1)
+#define VMXNET3_VERSION_GE_4(adapter) \
+	(adapter->version >= VMXNET3_REV_4 + 1)
 
 /* must be a multiple of VMXNET3_RING_SIZE_ALIGN */
 #define VMXNET3_DEF_TX_RING_SIZE    512
-- 
cgit v1.2.3-59-g8ed1b


From d3a8a9e5c3b334d443e97daa59bb95c0b69f4794 Mon Sep 17 00:00:00 2001
From: Ronak Doshi <doshir@vmware.com>
Date: Thu, 28 May 2020 14:53:20 -0700
Subject: vmxnet3: add support to get/set rx flow hash

With vmxnet3 version 4, the emulation supports multiqueue(RSS) for
UDP and ESP traffic. A guest can enable/disable RSS for UDP/ESP over
IPv4/IPv6 by issuing commands introduced in this patch. ESP ipv6 is
not yet supported in this patch.

This patch implements get_rss_hash_opts and set_rss_hash_opts
methods to allow querying and configuring different Rx flow hash
configurations.

Signed-off-by: Ronak Doshi <doshir@vmware.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vmxnet3/vmxnet3_defs.h    |  12 ++
 drivers/net/vmxnet3/vmxnet3_drv.c     |  39 ++++++
 drivers/net/vmxnet3/vmxnet3_ethtool.c | 219 +++++++++++++++++++++++++++++++++-
 drivers/net/vmxnet3/vmxnet3_int.h     |   4 +
 4 files changed, 272 insertions(+), 2 deletions(-)

diff --git a/drivers/net/vmxnet3/vmxnet3_defs.h b/drivers/net/vmxnet3/vmxnet3_defs.h
index c77274228a3e..aac97fac1186 100644
--- a/drivers/net/vmxnet3/vmxnet3_defs.h
+++ b/drivers/net/vmxnet3/vmxnet3_defs.h
@@ -82,6 +82,7 @@ enum {
 	VMXNET3_CMD_RESERVED3,
 	VMXNET3_CMD_SET_COALESCE,
 	VMXNET3_CMD_REGISTER_MEMREGS,
+	VMXNET3_CMD_SET_RSS_FIELDS,
 
 	VMXNET3_CMD_FIRST_GET = 0xF00D0000,
 	VMXNET3_CMD_GET_QUEUE_STATUS = VMXNET3_CMD_FIRST_GET,
@@ -96,6 +97,7 @@ enum {
 	VMXNET3_CMD_GET_RESERVED1,
 	VMXNET3_CMD_GET_TXDATA_DESC_SIZE,
 	VMXNET3_CMD_GET_COALESCE,
+	VMXNET3_CMD_GET_RSS_FIELDS,
 };
 
 /*
@@ -685,12 +687,22 @@ struct Vmxnet3_MemRegs {
 	struct Vmxnet3_MemoryRegion		memRegs[1];
 };
 
+enum Vmxnet3_RSSField {
+	VMXNET3_RSS_FIELDS_TCPIP4 = 0x0001,
+	VMXNET3_RSS_FIELDS_TCPIP6 = 0x0002,
+	VMXNET3_RSS_FIELDS_UDPIP4 = 0x0004,
+	VMXNET3_RSS_FIELDS_UDPIP6 = 0x0008,
+	VMXNET3_RSS_FIELDS_ESPIP4 = 0x0010,
+	VMXNET3_RSS_FIELDS_ESPIP6 = 0x0020,
+};
+
 /* If the command data <= 16 bytes, use the shared memory directly.
  * otherwise, use variable length configuration descriptor.
  */
 union Vmxnet3_CmdInfo {
 	struct Vmxnet3_VariableLenConfDesc	varConf;
 	struct Vmxnet3_SetPolling		setPolling;
+	enum   Vmxnet3_RSSField                 setRssFields;
 	__le64					data[2];
 };
 
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index ec2878f8c1f6..4ea7a40ada88 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -2554,6 +2554,39 @@ vmxnet3_init_coalesce(struct vmxnet3_adapter *adapter)
 	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
 }
 
+static void
+vmxnet3_init_rssfields(struct vmxnet3_adapter *adapter)
+{
+	struct Vmxnet3_DriverShared *shared = adapter->shared;
+	union Vmxnet3_CmdInfo *cmdInfo = &shared->cu.cmdInfo;
+	unsigned long flags;
+
+		if (!VMXNET3_VERSION_GE_4(adapter))
+			return;
+
+	spin_lock_irqsave(&adapter->cmd_lock, flags);
+
+	if (adapter->default_rss_fields) {
+		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
+				       VMXNET3_CMD_GET_RSS_FIELDS);
+		adapter->rss_fields =
+			VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
+	} else {
+		cmdInfo->setRssFields = adapter->rss_fields;
+		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
+				       VMXNET3_CMD_SET_RSS_FIELDS);
+		/* Not all requested RSS may get applied, so get and
+		 * cache what was actually applied.
+		 */
+		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
+				       VMXNET3_CMD_GET_RSS_FIELDS);
+		adapter->rss_fields =
+			VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
+	}
+
+	spin_unlock_irqrestore(&adapter->cmd_lock, flags);
+}
+
 int
 vmxnet3_activate_dev(struct vmxnet3_adapter *adapter)
 {
@@ -2603,6 +2636,7 @@ vmxnet3_activate_dev(struct vmxnet3_adapter *adapter)
 	}
 
 	vmxnet3_init_coalesce(adapter);
+	vmxnet3_init_rssfields(adapter);
 
 	for (i = 0; i < adapter->num_rx_queues; i++) {
 		VMXNET3_WRITE_BAR0_REG(adapter,
@@ -3430,6 +3464,11 @@ vmxnet3_probe_device(struct pci_dev *pdev,
 		adapter->default_coal_mode = true;
 	}
 
+	if (VMXNET3_VERSION_GE_4(adapter)) {
+		adapter->default_rss_fields = true;
+		adapter->rss_fields = VMXNET3_RSS_FIELDS_DEFAULT;
+	}
+
 	SET_NETDEV_DEV(netdev, &pdev->dev);
 	vmxnet3_declare_features(adapter, dma64);
 
diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c
index 1163eca7aba5..57460cf1967f 100644
--- a/drivers/net/vmxnet3/vmxnet3_ethtool.c
+++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c
@@ -665,18 +665,232 @@ out:
 	return err;
 }
 
+static int
+vmxnet3_get_rss_hash_opts(struct vmxnet3_adapter *adapter,
+			  struct ethtool_rxnfc *info)
+{
+	enum Vmxnet3_RSSField rss_fields;
+
+	if (netif_running(adapter->netdev)) {
+		unsigned long flags;
+
+		spin_lock_irqsave(&adapter->cmd_lock, flags);
+
+		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
+				       VMXNET3_CMD_GET_RSS_FIELDS);
+		rss_fields = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
+		spin_unlock_irqrestore(&adapter->cmd_lock, flags);
+	} else {
+		rss_fields = adapter->rss_fields;
+	}
+
+	info->data = 0;
+
+	/* Report default options for RSS on vmxnet3 */
+	switch (info->flow_type) {
+	case TCP_V4_FLOW:
+	case TCP_V6_FLOW:
+		info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3 |
+			      RXH_IP_SRC | RXH_IP_DST;
+		break;
+	case UDP_V4_FLOW:
+		if (rss_fields & VMXNET3_RSS_FIELDS_UDPIP4)
+			info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		info->data |= RXH_IP_SRC | RXH_IP_DST;
+		break;
+	case AH_ESP_V4_FLOW:
+	case AH_V4_FLOW:
+	case ESP_V4_FLOW:
+		if (rss_fields & VMXNET3_RSS_FIELDS_ESPIP4)
+			info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+			/* fallthrough */
+	case SCTP_V4_FLOW:
+	case IPV4_FLOW:
+		info->data |= RXH_IP_SRC | RXH_IP_DST;
+		break;
+	case UDP_V6_FLOW:
+		if (rss_fields & VMXNET3_RSS_FIELDS_UDPIP6)
+			info->data |= RXH_L4_B_0_1 | RXH_L4_B_2_3;
+		info->data |= RXH_IP_SRC | RXH_IP_DST;
+		break;
+	case AH_ESP_V6_FLOW:
+	case AH_V6_FLOW:
+	case ESP_V6_FLOW:
+	case SCTP_V6_FLOW:
+	case IPV6_FLOW:
+		info->data |= RXH_IP_SRC | RXH_IP_DST;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static int
+vmxnet3_set_rss_hash_opt(struct net_device *netdev,
+			 struct vmxnet3_adapter *adapter,
+			 struct ethtool_rxnfc *nfc)
+{
+	enum Vmxnet3_RSSField rss_fields = adapter->rss_fields;
+
+	/* RSS does not support anything other than hashing
+	 * to queues on src and dst IPs and ports
+	 */
+	if (nfc->data & ~(RXH_IP_SRC | RXH_IP_DST |
+			  RXH_L4_B_0_1 | RXH_L4_B_2_3))
+		return -EINVAL;
+
+	switch (nfc->flow_type) {
+	case TCP_V4_FLOW:
+	case TCP_V6_FLOW:
+		if (!(nfc->data & RXH_IP_SRC) ||
+		    !(nfc->data & RXH_IP_DST) ||
+		    !(nfc->data & RXH_L4_B_0_1) ||
+		    !(nfc->data & RXH_L4_B_2_3))
+			return -EINVAL;
+		break;
+	case UDP_V4_FLOW:
+		if (!(nfc->data & RXH_IP_SRC) ||
+		    !(nfc->data & RXH_IP_DST))
+			return -EINVAL;
+		switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
+		case 0:
+			rss_fields &= ~VMXNET3_RSS_FIELDS_UDPIP4;
+			break;
+		case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
+			rss_fields |= VMXNET3_RSS_FIELDS_UDPIP4;
+			break;
+		default:
+			return -EINVAL;
+		}
+		break;
+	case UDP_V6_FLOW:
+		if (!(nfc->data & RXH_IP_SRC) ||
+		    !(nfc->data & RXH_IP_DST))
+			return -EINVAL;
+		switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
+		case 0:
+			rss_fields &= ~VMXNET3_RSS_FIELDS_UDPIP6;
+			break;
+		case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
+			rss_fields |= VMXNET3_RSS_FIELDS_UDPIP6;
+			break;
+		default:
+			return -EINVAL;
+		}
+		break;
+	case ESP_V4_FLOW:
+	case AH_V4_FLOW:
+	case AH_ESP_V4_FLOW:
+		if (!(nfc->data & RXH_IP_SRC) ||
+		    !(nfc->data & RXH_IP_DST))
+			return -EINVAL;
+		switch (nfc->data & (RXH_L4_B_0_1 | RXH_L4_B_2_3)) {
+		case 0:
+			rss_fields &= ~VMXNET3_RSS_FIELDS_ESPIP4;
+			break;
+		case (RXH_L4_B_0_1 | RXH_L4_B_2_3):
+			rss_fields |= VMXNET3_RSS_FIELDS_ESPIP4;
+		break;
+		default:
+			return -EINVAL;
+		}
+		break;
+	case ESP_V6_FLOW:
+	case AH_V6_FLOW:
+	case AH_ESP_V6_FLOW:
+	case SCTP_V4_FLOW:
+	case SCTP_V6_FLOW:
+		if (!(nfc->data & RXH_IP_SRC) ||
+		    !(nfc->data & RXH_IP_DST) ||
+		    (nfc->data & RXH_L4_B_0_1) ||
+		    (nfc->data & RXH_L4_B_2_3))
+			return -EINVAL;
+		break;
+	default:
+		return -EINVAL;
+	}
+
+	/* if we changed something we need to update flags */
+	if (rss_fields != adapter->rss_fields) {
+		adapter->default_rss_fields = false;
+		if (netif_running(netdev)) {
+			struct Vmxnet3_DriverShared *shared = adapter->shared;
+			union Vmxnet3_CmdInfo *cmdInfo = &shared->cu.cmdInfo;
+			unsigned long flags;
+
+			spin_lock_irqsave(&adapter->cmd_lock, flags);
+			cmdInfo->setRssFields = rss_fields;
+			VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
+					       VMXNET3_CMD_SET_RSS_FIELDS);
+
+			/* Not all requested RSS may get applied, so get and
+			 * cache what was actually applied.
+			 */
+			VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
+					       VMXNET3_CMD_GET_RSS_FIELDS);
+			adapter->rss_fields =
+				VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_CMD);
+			spin_unlock_irqrestore(&adapter->cmd_lock, flags);
+		} else {
+			/* When the device is activated, we will try to apply
+			 * these rules and cache the applied value later.
+			 */
+			adapter->rss_fields = rss_fields;
+		}
+	}
+	return 0;
+}
 
 static int
 vmxnet3_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *info,
 		  u32 *rules)
 {
 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
+	int err = 0;
+
 	switch (info->cmd) {
 	case ETHTOOL_GRXRINGS:
 		info->data = adapter->num_rx_queues;
-		return 0;
+		break;
+	case ETHTOOL_GRXFH:
+		if (!VMXNET3_VERSION_GE_4(adapter)) {
+			err = -EOPNOTSUPP;
+			break;
+		}
+		err = vmxnet3_get_rss_hash_opts(adapter, info);
+		break;
+	default:
+		err = -EOPNOTSUPP;
+		break;
 	}
-	return -EOPNOTSUPP;
+
+	return err;
+}
+
+static int
+vmxnet3_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *info)
+{
+	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
+	int err = 0;
+
+	if (!VMXNET3_VERSION_GE_4(adapter)) {
+		err = -EOPNOTSUPP;
+		goto done;
+	}
+
+	switch (info->cmd) {
+	case ETHTOOL_SRXFH:
+		err = vmxnet3_set_rss_hash_opt(netdev, adapter, info);
+		break;
+	default:
+		err = -EOPNOTSUPP;
+		break;
+	}
+
+done:
+	return err;
 }
 
 #ifdef VMXNET3_RSS
@@ -887,6 +1101,7 @@ static const struct ethtool_ops vmxnet3_ethtool_ops = {
 	.get_ringparam     = vmxnet3_get_ringparam,
 	.set_ringparam     = vmxnet3_set_ringparam,
 	.get_rxnfc         = vmxnet3_get_rxnfc,
+	.set_rxnfc         = vmxnet3_set_rxnfc,
 #ifdef VMXNET3_RSS
 	.get_rxfh_indir_size = vmxnet3_get_rss_indir_size,
 	.get_rxfh          = vmxnet3_get_rss,
diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h
index e803ffad75d6..d52ccc3eeba2 100644
--- a/drivers/net/vmxnet3/vmxnet3_int.h
+++ b/drivers/net/vmxnet3/vmxnet3_int.h
@@ -377,6 +377,8 @@ struct vmxnet3_adapter {
 	u16 rxdata_desc_size;
 
 	bool rxdataring_enabled;
+	bool default_rss_fields;
+	enum Vmxnet3_RSSField rss_fields;
 
 	struct work_struct work;
 
@@ -438,6 +440,8 @@ struct vmxnet3_adapter {
 
 #define VMXNET3_COAL_RBC_RATE(usecs) (1000000 / usecs)
 #define VMXNET3_COAL_RBC_USECS(rbc_rate) (1000000 / rbc_rate)
+#define VMXNET3_RSS_FIELDS_DEFAULT (VMXNET3_RSS_FIELDS_TCPIP4 | \
+				    VMXNET3_RSS_FIELDS_TCPIP6)
 
 int
 vmxnet3_quiesce_dev(struct vmxnet3_adapter *adapter);
-- 
cgit v1.2.3-59-g8ed1b


From dacce2be33124df3c71f979ac47e3d6354a41125 Mon Sep 17 00:00:00 2001
From: Ronak Doshi <doshir@vmware.com>
Date: Thu, 28 May 2020 14:53:21 -0700
Subject: vmxnet3: add geneve and vxlan tunnel offload support

Vmxnet3 version 3 device supports checksum/TSO offload. Thus, vNIC to
pNIC traffic can leverage hardware checksum/TSO offloads. However,
vmxnet3 does not support checksum/TSO offload for Geneve/VXLAN
encapsulated packets. Thus, for a vNIC configured with an overlay, the
guest stack must first segment the inner packet, compute the inner
checksum for each segment and encapsulate each segment before
transmitting the packet via the vNIC. This results in significant
performance penalty.

This patch will enhance vmxnet3 to support Geneve/VXLAN TSO as well as
checksum offload.

Signed-off-by: Ronak Doshi <doshir@vmware.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vmxnet3/upt1_defs.h       |   3 +
 drivers/net/vmxnet3/vmxnet3_defs.h    |  17 +++--
 drivers/net/vmxnet3/vmxnet3_drv.c     | 120 +++++++++++++++++++++++++++-------
 drivers/net/vmxnet3/vmxnet3_ethtool.c |  42 +++++++++++-
 drivers/net/vmxnet3/vmxnet3_int.h     |  12 +++-
 5 files changed, 161 insertions(+), 33 deletions(-)

diff --git a/drivers/net/vmxnet3/upt1_defs.h b/drivers/net/vmxnet3/upt1_defs.h
index 65a203c842b2..8c014c98471c 100644
--- a/drivers/net/vmxnet3/upt1_defs.h
+++ b/drivers/net/vmxnet3/upt1_defs.h
@@ -92,5 +92,8 @@ enum {
 	UPT1_F_RSS		= cpu_to_le64(0x0002),
 	UPT1_F_RXVLAN		= cpu_to_le64(0x0004),   /* VLAN tag stripping */
 	UPT1_F_LRO		= cpu_to_le64(0x0008),
+	UPT1_F_RXINNEROFLD      = cpu_to_le64(0x00010),  /* Geneve/Vxlan rx csum
+							  * offloading
+							  */
 };
 #endif
diff --git a/drivers/net/vmxnet3/vmxnet3_defs.h b/drivers/net/vmxnet3/vmxnet3_defs.h
index aac97fac1186..a8d5ebd47c71 100644
--- a/drivers/net/vmxnet3/vmxnet3_defs.h
+++ b/drivers/net/vmxnet3/vmxnet3_defs.h
@@ -103,14 +103,14 @@ enum {
 /*
  *	Little Endian layout of bitfields -
  *	Byte 0 :	7.....len.....0
- *	Byte 1 :	rsvd gen 13.len.8
+ *	Byte 1 :	oco gen 13.len.8
  *	Byte 2 : 	5.msscof.0 ext1  dtype
  *	Byte 3 : 	13...msscof...6
  *
  *	Big Endian layout of bitfields -
  *	Byte 0:		13...msscof...6
  *	Byte 1 : 	5.msscof.0 ext1  dtype
- *	Byte 2 :	rsvd gen 13.len.8
+ *	Byte 2 :	oco gen 13.len.8
  *	Byte 3 :	7.....len.....0
  *
  *	Thus, le32_to_cpu on the dword will allow the big endian driver to read
@@ -125,13 +125,13 @@ struct Vmxnet3_TxDesc {
 	u32 msscof:14;  /* MSS, checksum offset, flags */
 	u32 ext1:1;
 	u32 dtype:1;    /* descriptor type */
-	u32 rsvd:1;
+	u32 oco:1;
 	u32 gen:1;      /* generation bit */
 	u32 len:14;
 #else
 	u32 len:14;
 	u32 gen:1;      /* generation bit */
-	u32 rsvd:1;
+	u32 oco:1;
 	u32 dtype:1;    /* descriptor type */
 	u32 ext1:1;
 	u32 msscof:14;  /* MSS, checksum offset, flags */
@@ -157,9 +157,10 @@ struct Vmxnet3_TxDesc {
 };
 
 /* TxDesc.OM values */
-#define VMXNET3_OM_NONE		0
-#define VMXNET3_OM_CSUM		2
-#define VMXNET3_OM_TSO		3
+#define VMXNET3_OM_NONE         0
+#define VMXNET3_OM_ENCAP        1
+#define VMXNET3_OM_CSUM         2
+#define VMXNET3_OM_TSO          3
 
 /* fields in TxDesc we access w/o using bit fields */
 #define VMXNET3_TXD_EOP_SHIFT	12
@@ -226,6 +227,8 @@ struct Vmxnet3_RxDesc {
 #define VMXNET3_RXD_BTYPE_SHIFT  14
 #define VMXNET3_RXD_GEN_SHIFT    31
 
+#define VMXNET3_RCD_HDR_INNER_SHIFT  13
+
 struct Vmxnet3_RxCompDesc {
 #ifdef __BIG_ENDIAN_BITFIELD
 	u32		ext2:1;
diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 4ea7a40ada88..171d4b1d1d04 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -842,12 +842,22 @@ vmxnet3_parse_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
 	u8 protocol = 0;
 
 	if (ctx->mss) {	/* TSO */
-		ctx->eth_ip_hdr_size = skb_transport_offset(skb);
-		ctx->l4_hdr_size = tcp_hdrlen(skb);
-		ctx->copy_size = ctx->eth_ip_hdr_size + ctx->l4_hdr_size;
+		if (VMXNET3_VERSION_GE_4(adapter) && skb->encapsulation) {
+			ctx->l4_offset = skb_inner_transport_offset(skb);
+			ctx->l4_hdr_size = inner_tcp_hdrlen(skb);
+			ctx->copy_size = ctx->l4_offset + ctx->l4_hdr_size;
+		} else {
+			ctx->l4_offset = skb_transport_offset(skb);
+			ctx->l4_hdr_size = tcp_hdrlen(skb);
+			ctx->copy_size = ctx->l4_offset + ctx->l4_hdr_size;
+		}
 	} else {
 		if (skb->ip_summed == CHECKSUM_PARTIAL) {
-			ctx->eth_ip_hdr_size = skb_checksum_start_offset(skb);
+			/* For encap packets, skb_checksum_start_offset refers
+			 * to inner L4 offset. Thus, below works for encap as
+			 * well as non-encap case
+			 */
+			ctx->l4_offset = skb_checksum_start_offset(skb);
 
 			if (ctx->ipv4) {
 				const struct iphdr *iph = ip_hdr(skb);
@@ -871,10 +881,10 @@ vmxnet3_parse_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
 				break;
 			}
 
-			ctx->copy_size = min(ctx->eth_ip_hdr_size +
+			ctx->copy_size = min(ctx->l4_offset +
 					 ctx->l4_hdr_size, skb->len);
 		} else {
-			ctx->eth_ip_hdr_size = 0;
+			ctx->l4_offset = 0;
 			ctx->l4_hdr_size = 0;
 			/* copy as much as allowed */
 			ctx->copy_size = min_t(unsigned int,
@@ -929,6 +939,25 @@ vmxnet3_copy_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
 }
 
 
+static void
+vmxnet3_prepare_inner_tso(struct sk_buff *skb,
+			  struct vmxnet3_tx_ctx *ctx)
+{
+	struct tcphdr *tcph = inner_tcp_hdr(skb);
+	struct iphdr *iph = inner_ip_hdr(skb);
+
+	if (ctx->ipv4) {
+		iph->check = 0;
+		tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 0,
+						 IPPROTO_TCP, 0);
+	} else if (ctx->ipv6) {
+		struct ipv6hdr *iph = inner_ipv6_hdr(skb);
+
+		tcph->check = ~csum_ipv6_magic(&iph->saddr, &iph->daddr, 0,
+					       IPPROTO_TCP, 0);
+	}
+}
+
 static void
 vmxnet3_prepare_tso(struct sk_buff *skb,
 		    struct vmxnet3_tx_ctx *ctx)
@@ -987,6 +1016,7 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
 	/* Use temporary descriptor to avoid touching bits multiple times */
 	union Vmxnet3_GenericDesc tempTxDesc;
 #endif
+	struct udphdr *udph;
 
 	count = txd_estimate(skb);
 
@@ -1003,7 +1033,11 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
 			}
 			tq->stats.copy_skb_header++;
 		}
-		vmxnet3_prepare_tso(skb, &ctx);
+		if (skb->encapsulation) {
+			vmxnet3_prepare_inner_tso(skb, &ctx);
+		} else {
+			vmxnet3_prepare_tso(skb, &ctx);
+		}
 	} else {
 		if (unlikely(count > VMXNET3_MAX_TXD_PER_PKT)) {
 
@@ -1026,14 +1060,14 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
 		BUG_ON(ret <= 0 && ctx.copy_size != 0);
 		/* hdrs parsed, check against other limits */
 		if (ctx.mss) {
-			if (unlikely(ctx.eth_ip_hdr_size + ctx.l4_hdr_size >
+			if (unlikely(ctx.l4_offset + ctx.l4_hdr_size >
 				     VMXNET3_MAX_TX_BUF_SIZE)) {
 				tq->stats.drop_oversized_hdr++;
 				goto drop_pkt;
 			}
 		} else {
 			if (skb->ip_summed == CHECKSUM_PARTIAL) {
-				if (unlikely(ctx.eth_ip_hdr_size +
+				if (unlikely(ctx.l4_offset +
 					     skb->csum_offset >
 					     VMXNET3_MAX_CSUM_OFFSET)) {
 					tq->stats.drop_oversized_hdr++;
@@ -1080,16 +1114,34 @@ vmxnet3_tq_xmit(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
 #endif
 	tx_num_deferred = le32_to_cpu(tq->shared->txNumDeferred);
 	if (ctx.mss) {
-		gdesc->txd.hlen = ctx.eth_ip_hdr_size + ctx.l4_hdr_size;
-		gdesc->txd.om = VMXNET3_OM_TSO;
-		gdesc->txd.msscof = ctx.mss;
+		if (VMXNET3_VERSION_GE_4(adapter) && skb->encapsulation) {
+			gdesc->txd.hlen = ctx.l4_offset + ctx.l4_hdr_size;
+			gdesc->txd.om = VMXNET3_OM_ENCAP;
+			gdesc->txd.msscof = ctx.mss;
+
+			udph = udp_hdr(skb);
+			if (udph->check)
+				gdesc->txd.oco = 1;
+		} else {
+			gdesc->txd.hlen = ctx.l4_offset + ctx.l4_hdr_size;
+			gdesc->txd.om = VMXNET3_OM_TSO;
+			gdesc->txd.msscof = ctx.mss;
+		}
 		num_pkts = (skb->len - gdesc->txd.hlen + ctx.mss - 1) / ctx.mss;
 	} else {
 		if (skb->ip_summed == CHECKSUM_PARTIAL) {
-			gdesc->txd.hlen = ctx.eth_ip_hdr_size;
-			gdesc->txd.om = VMXNET3_OM_CSUM;
-			gdesc->txd.msscof = ctx.eth_ip_hdr_size +
-					    skb->csum_offset;
+			if (VMXNET3_VERSION_GE_4(adapter) &&
+			    skb->encapsulation) {
+				gdesc->txd.hlen = ctx.l4_offset +
+						  ctx.l4_hdr_size;
+				gdesc->txd.om = VMXNET3_OM_ENCAP;
+				gdesc->txd.msscof = 0;		/* Reserved */
+			} else {
+				gdesc->txd.hlen = ctx.l4_offset;
+				gdesc->txd.om = VMXNET3_OM_CSUM;
+				gdesc->txd.msscof = ctx.l4_offset +
+						    skb->csum_offset;
+			}
 		} else {
 			gdesc->txd.om = 0;
 			gdesc->txd.msscof = 0;
@@ -1168,13 +1220,21 @@ vmxnet3_rx_csum(struct vmxnet3_adapter *adapter,
 		    (le32_to_cpu(gdesc->dword[3]) &
 		     VMXNET3_RCD_CSUM_OK) == VMXNET3_RCD_CSUM_OK) {
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
-			BUG_ON(!(gdesc->rcd.tcp || gdesc->rcd.udp));
-			BUG_ON(gdesc->rcd.frg);
+			WARN_ON_ONCE(!(gdesc->rcd.tcp || gdesc->rcd.udp) &&
+				     !(le32_to_cpu(gdesc->dword[0]) &
+				     (1UL << VMXNET3_RCD_HDR_INNER_SHIFT)));
+			WARN_ON_ONCE(gdesc->rcd.frg &&
+				     !(le32_to_cpu(gdesc->dword[0]) &
+				     (1UL << VMXNET3_RCD_HDR_INNER_SHIFT)));
 		} else if (gdesc->rcd.v6 && (le32_to_cpu(gdesc->dword[3]) &
 					     (1 << VMXNET3_RCD_TUC_SHIFT))) {
 			skb->ip_summed = CHECKSUM_UNNECESSARY;
-			BUG_ON(!(gdesc->rcd.tcp || gdesc->rcd.udp));
-			BUG_ON(gdesc->rcd.frg);
+			WARN_ON_ONCE(!(gdesc->rcd.tcp || gdesc->rcd.udp) &&
+				     !(le32_to_cpu(gdesc->dword[0]) &
+				     (1UL << VMXNET3_RCD_HDR_INNER_SHIFT)));
+			WARN_ON_ONCE(gdesc->rcd.frg &&
+				     !(le32_to_cpu(gdesc->dword[0]) &
+				     (1UL << VMXNET3_RCD_HDR_INNER_SHIFT)));
 		} else {
 			if (gdesc->rcd.csum) {
 				skb->csum = htons(gdesc->rcd.csum);
@@ -2429,6 +2489,10 @@ vmxnet3_setup_driver_shared(struct vmxnet3_adapter *adapter)
 	if (adapter->netdev->features & NETIF_F_HW_VLAN_CTAG_RX)
 		devRead->misc.uptFeatures |= UPT1_F_RXVLAN;
 
+	if (adapter->netdev->features & (NETIF_F_GSO_UDP_TUNNEL |
+					 NETIF_F_GSO_UDP_TUNNEL_CSUM))
+		devRead->misc.uptFeatures |= UPT1_F_RXINNEROFLD;
+
 	devRead->misc.mtu = cpu_to_le32(adapter->netdev->mtu);
 	devRead->misc.queueDescPA = cpu_to_le64(adapter->queue_desc_pa);
 	devRead->misc.queueDescLen = cpu_to_le32(
@@ -2561,8 +2625,8 @@ vmxnet3_init_rssfields(struct vmxnet3_adapter *adapter)
 	union Vmxnet3_CmdInfo *cmdInfo = &shared->cu.cmdInfo;
 	unsigned long flags;
 
-		if (!VMXNET3_VERSION_GE_4(adapter))
-			return;
+	if (!VMXNET3_VERSION_GE_4(adapter))
+		return;
 
 	spin_lock_irqsave(&adapter->cmd_lock, flags);
 
@@ -3073,6 +3137,18 @@ vmxnet3_declare_features(struct vmxnet3_adapter *adapter, bool dma64)
 		NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
 		NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_TSO | NETIF_F_TSO6 |
 		NETIF_F_LRO;
+
+	if (VMXNET3_VERSION_GE_4(adapter)) {
+		netdev->hw_features |= NETIF_F_GSO_UDP_TUNNEL |
+				NETIF_F_GSO_UDP_TUNNEL_CSUM;
+
+		netdev->hw_enc_features = NETIF_F_SG | NETIF_F_RXCSUM |
+			NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
+			NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_TSO | NETIF_F_TSO6 |
+			NETIF_F_LRO | NETIF_F_GSO_UDP_TUNNEL |
+			NETIF_F_GSO_UDP_TUNNEL_CSUM;
+	}
+
 	if (dma64)
 		netdev->hw_features |= NETIF_F_HIGHDMA;
 	netdev->vlan_features = netdev->hw_features &
diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c
index 57460cf1967f..bfdda0f34b97 100644
--- a/drivers/net/vmxnet3/vmxnet3_ethtool.c
+++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c
@@ -267,14 +267,43 @@ netdev_features_t vmxnet3_fix_features(struct net_device *netdev,
 	return features;
 }
 
+static void vmxnet3_enable_encap_offloads(struct net_device *netdev)
+{
+	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
+
+	if (VMXNET3_VERSION_GE_4(adapter)) {
+		netdev->hw_enc_features |= NETIF_F_SG | NETIF_F_RXCSUM |
+			NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
+			NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_TSO | NETIF_F_TSO6 |
+			NETIF_F_LRO | NETIF_F_GSO_UDP_TUNNEL |
+			NETIF_F_GSO_UDP_TUNNEL_CSUM;
+	}
+}
+
+static void vmxnet3_disable_encap_offloads(struct net_device *netdev)
+{
+	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
+
+	if (VMXNET3_VERSION_GE_4(adapter)) {
+		netdev->hw_enc_features &= ~(NETIF_F_SG | NETIF_F_RXCSUM |
+			NETIF_F_HW_CSUM | NETIF_F_HW_VLAN_CTAG_TX |
+			NETIF_F_HW_VLAN_CTAG_RX | NETIF_F_TSO | NETIF_F_TSO6 |
+			NETIF_F_LRO | NETIF_F_GSO_UDP_TUNNEL |
+			NETIF_F_GSO_UDP_TUNNEL_CSUM);
+	}
+}
+
 int vmxnet3_set_features(struct net_device *netdev, netdev_features_t features)
 {
 	struct vmxnet3_adapter *adapter = netdev_priv(netdev);
 	unsigned long flags;
 	netdev_features_t changed = features ^ netdev->features;
+	netdev_features_t tun_offload_mask = NETIF_F_GSO_UDP_TUNNEL |
+					     NETIF_F_GSO_UDP_TUNNEL_CSUM;
+	u8 udp_tun_enabled = (netdev->features & tun_offload_mask) != 0;
 
 	if (changed & (NETIF_F_RXCSUM | NETIF_F_LRO |
-		       NETIF_F_HW_VLAN_CTAG_RX)) {
+		       NETIF_F_HW_VLAN_CTAG_RX | tun_offload_mask)) {
 		if (features & NETIF_F_RXCSUM)
 			adapter->shared->devRead.misc.uptFeatures |=
 			UPT1_F_RXCSUM;
@@ -297,6 +326,17 @@ int vmxnet3_set_features(struct net_device *netdev, netdev_features_t features)
 			adapter->shared->devRead.misc.uptFeatures &=
 			~UPT1_F_RXVLAN;
 
+		if ((features & tun_offload_mask) != 0 && !udp_tun_enabled) {
+			vmxnet3_enable_encap_offloads(netdev);
+			adapter->shared->devRead.misc.uptFeatures |=
+			UPT1_F_RXINNEROFLD;
+		} else if ((features & tun_offload_mask) == 0 &&
+			   udp_tun_enabled) {
+			vmxnet3_disable_encap_offloads(netdev);
+			adapter->shared->devRead.misc.uptFeatures &=
+			~UPT1_F_RXINNEROFLD;
+		}
+
 		spin_lock_irqsave(&adapter->cmd_lock, flags);
 		VMXNET3_WRITE_BAR1_REG(adapter, VMXNET3_REG_CMD,
 				       VMXNET3_CMD_UPDATE_FEATURE);
diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h
index d52ccc3eeba2..86db809c7592 100644
--- a/drivers/net/vmxnet3/vmxnet3_int.h
+++ b/drivers/net/vmxnet3/vmxnet3_int.h
@@ -219,10 +219,16 @@ struct vmxnet3_tx_ctx {
 	bool   ipv4;
 	bool   ipv6;
 	u16 mss;
-	u32 eth_ip_hdr_size; /* only valid for pkts requesting tso or csum
-				 * offloading
+	u32    l4_offset;	/* only valid for pkts requesting tso or csum
+				 * offloading. For encap offload, it refers to
+				 * inner L4 offset i.e. it includes outer header
+				 * encap header and inner eth and ip header size
+				 */
+
+	u32	l4_hdr_size;	/* only valid if mss != 0
+				 * Refers to inner L4 hdr size for encap
+				 * offload
 				 */
-	u32 l4_hdr_size;     /* only valid if mss != 0 */
 	u32 copy_size;       /* # of bytes copied into the data ring */
 	union Vmxnet3_GenericDesc *sop_txd;
 	union Vmxnet3_GenericDesc *eop_txd;
-- 
cgit v1.2.3-59-g8ed1b


From a31135e36eccd0d16e500d3041f23c3ece62096f Mon Sep 17 00:00:00 2001
From: Ronak Doshi <doshir@vmware.com>
Date: Thu, 28 May 2020 14:53:22 -0700
Subject: vmxnet3: update to version 4

With all vmxnet3 version 4 changes incorporated in the vmxnet3 driver,
the driver can configure emulation to run at vmxnet3 version 4, provided
the emulation advertises support for version 4.

Signed-off-by: Ronak Doshi <doshir@vmware.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vmxnet3/vmxnet3_drv.c | 7 ++++++-
 drivers/net/vmxnet3/vmxnet3_int.h | 4 ++--
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 171d4b1d1d04..3d07ce6cb706 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -3492,7 +3492,12 @@ vmxnet3_probe_device(struct pci_dev *pdev,
 		goto err_alloc_pci;
 
 	ver = VMXNET3_READ_BAR1_REG(adapter, VMXNET3_REG_VRRS);
-	if (ver & (1 << VMXNET3_REV_3)) {
+	if (ver & (1 << VMXNET3_REV_4)) {
+		VMXNET3_WRITE_BAR1_REG(adapter,
+				       VMXNET3_REG_VRRS,
+				       1 << VMXNET3_REV_4);
+		adapter->version = VMXNET3_REV_4 + 1;
+	} else if (ver & (1 << VMXNET3_REV_3)) {
 		VMXNET3_WRITE_BAR1_REG(adapter,
 				       VMXNET3_REG_VRRS,
 				       1 << VMXNET3_REV_3);
diff --git a/drivers/net/vmxnet3/vmxnet3_int.h b/drivers/net/vmxnet3/vmxnet3_int.h
index 86db809c7592..5d2b062215a2 100644
--- a/drivers/net/vmxnet3/vmxnet3_int.h
+++ b/drivers/net/vmxnet3/vmxnet3_int.h
@@ -69,12 +69,12 @@
 /*
  * Version numbers
  */
-#define VMXNET3_DRIVER_VERSION_STRING   "1.4.17.0-k"
+#define VMXNET3_DRIVER_VERSION_STRING   "1.5.0.0-k"
 
 /* Each byte of this 32-bit integer encodes a version number in
  * VMXNET3_DRIVER_VERSION_STRING.
  */
-#define VMXNET3_DRIVER_VERSION_NUM      0x01041100
+#define VMXNET3_DRIVER_VERSION_NUM      0x01050000
 
 #if defined(CONFIG_PCI_MSI)
 	/* RSS only makes sense if MSI-X is supported. */
-- 
cgit v1.2.3-59-g8ed1b


From d320692d9f8502e819e511ea5294552e0a8d3d9b Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Fri, 29 May 2020 00:21:28 +0200
Subject: net: ks8851: Factor out spi->dev in probe()/remove()

Pull out the spi->dev into one common place in the function instead of
having it repeated over and over again. This is done in preparation for
unifying ks8851 and ks8851-mll drivers. No functional change.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Marek Vasut <marex@denx.de>
Cc: David S. Miller <davem@davemloft.net>
Cc: Lukas Wunner <lukas@wunner.de>
Cc: Petr Stetiar <ynezz@true.cz>
Cc: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/micrel/ks8851.c | 29 ++++++++++++++---------------
 1 file changed, 14 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c
index 33305c9c5a62..e32ef9403803 100644
--- a/drivers/net/ethernet/micrel/ks8851.c
+++ b/drivers/net/ethernet/micrel/ks8851.c
@@ -1413,6 +1413,7 @@ static SIMPLE_DEV_PM_OPS(ks8851_pm_ops, ks8851_suspend, ks8851_resume);
 
 static int ks8851_probe(struct spi_device *spi)
 {
+	struct device *dev = &spi->dev;
 	struct net_device *ndev;
 	struct ks8851_net *ks;
 	int ret;
@@ -1431,8 +1432,7 @@ static int ks8851_probe(struct spi_device *spi)
 	ks->spidev = spi;
 	ks->tx_space = 6144;
 
-	gpio = of_get_named_gpio_flags(spi->dev.of_node, "reset-gpios",
-				       0, NULL);
+	gpio = of_get_named_gpio_flags(dev->of_node, "reset-gpios", 0, NULL);
 	if (gpio == -EPROBE_DEFER) {
 		ret = gpio;
 		goto err_gpio;
@@ -1440,15 +1440,15 @@ static int ks8851_probe(struct spi_device *spi)
 
 	ks->gpio = gpio;
 	if (gpio_is_valid(gpio)) {
-		ret = devm_gpio_request_one(&spi->dev, gpio,
+		ret = devm_gpio_request_one(dev, gpio,
 					    GPIOF_OUT_INIT_LOW, "ks8851_rst_n");
 		if (ret) {
-			dev_err(&spi->dev, "reset gpio request failed\n");
+			dev_err(dev, "reset gpio request failed\n");
 			goto err_gpio;
 		}
 	}
 
-	ks->vdd_io = devm_regulator_get(&spi->dev, "vdd-io");
+	ks->vdd_io = devm_regulator_get(dev, "vdd-io");
 	if (IS_ERR(ks->vdd_io)) {
 		ret = PTR_ERR(ks->vdd_io);
 		goto err_reg_io;
@@ -1456,12 +1456,11 @@ static int ks8851_probe(struct spi_device *spi)
 
 	ret = regulator_enable(ks->vdd_io);
 	if (ret) {
-		dev_err(&spi->dev, "regulator vdd_io enable fail: %d\n",
-			ret);
+		dev_err(dev, "regulator vdd_io enable fail: %d\n", ret);
 		goto err_reg_io;
 	}
 
-	ks->vdd_reg = devm_regulator_get(&spi->dev, "vdd");
+	ks->vdd_reg = devm_regulator_get(dev, "vdd");
 	if (IS_ERR(ks->vdd_reg)) {
 		ret = PTR_ERR(ks->vdd_reg);
 		goto err_reg;
@@ -1469,8 +1468,7 @@ static int ks8851_probe(struct spi_device *spi)
 
 	ret = regulator_enable(ks->vdd_reg);
 	if (ret) {
-		dev_err(&spi->dev, "regulator vdd enable fail: %d\n",
-			ret);
+		dev_err(dev, "regulator vdd enable fail: %d\n", ret);
 		goto err_reg;
 	}
 
@@ -1509,7 +1507,7 @@ static int ks8851_probe(struct spi_device *spi)
 	ks->mii.mdio_read	= ks8851_phy_read;
 	ks->mii.mdio_write	= ks8851_phy_write;
 
-	dev_info(&spi->dev, "message enable is %d\n", msg_enable);
+	dev_info(dev, "message enable is %d\n", msg_enable);
 
 	/* set the default message enable */
 	ks->msg_enable = netif_msg_init(msg_enable, (NETIF_MSG_DRV |
@@ -1519,7 +1517,7 @@ static int ks8851_probe(struct spi_device *spi)
 	skb_queue_head_init(&ks->txq);
 
 	ndev->ethtool_ops = &ks8851_ethtool_ops;
-	SET_NETDEV_DEV(ndev, &spi->dev);
+	SET_NETDEV_DEV(ndev, dev);
 
 	spi_set_drvdata(spi, ks);
 
@@ -1534,7 +1532,7 @@ static int ks8851_probe(struct spi_device *spi)
 	/* simple check for a valid chip being connected to the bus */
 	cider = ks8851_rdreg16(ks, KS_CIDER);
 	if ((cider & ~CIDER_REV_MASK) != CIDER_ID) {
-		dev_err(&spi->dev, "failed to read device ID\n");
+		dev_err(dev, "failed to read device ID\n");
 		ret = -ENODEV;
 		goto err_id;
 	}
@@ -1547,7 +1545,7 @@ static int ks8851_probe(struct spi_device *spi)
 
 	ret = register_netdev(ndev);
 	if (ret) {
-		dev_err(&spi->dev, "failed to register network device\n");
+		dev_err(dev, "failed to register network device\n");
 		goto err_netdev;
 	}
 
@@ -1573,9 +1571,10 @@ err_gpio:
 static int ks8851_remove(struct spi_device *spi)
 {
 	struct ks8851_net *priv = spi_get_drvdata(spi);
+	struct device *dev = &spi->dev;
 
 	if (netif_msg_drv(priv))
-		dev_info(&spi->dev, "remove\n");
+		dev_info(dev, "remove\n");
 
 	unregister_netdev(priv->netdev);
 	if (gpio_is_valid(priv->gpio))
-- 
cgit v1.2.3-59-g8ed1b


From bfd1e0eb08f6ac25659e3d9bfd80b8ddffe57c94 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Fri, 29 May 2020 00:21:29 +0200
Subject: net: ks8851: Rename ndev to netdev in probe

Rename ndev variable to netdev for the sake of consistency.

No functional change.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Marek Vasut <marex@denx.de>
Cc: David S. Miller <davem@davemloft.net>
Cc: Lukas Wunner <lukas@wunner.de>
Cc: Petr Stetiar <ynezz@true.cz>
Cc: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/micrel/ks8851.c | 30 +++++++++++++++---------------
 1 file changed, 15 insertions(+), 15 deletions(-)

diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c
index e32ef9403803..2b85072993c5 100644
--- a/drivers/net/ethernet/micrel/ks8851.c
+++ b/drivers/net/ethernet/micrel/ks8851.c
@@ -1414,21 +1414,21 @@ static SIMPLE_DEV_PM_OPS(ks8851_pm_ops, ks8851_suspend, ks8851_resume);
 static int ks8851_probe(struct spi_device *spi)
 {
 	struct device *dev = &spi->dev;
-	struct net_device *ndev;
+	struct net_device *netdev;
 	struct ks8851_net *ks;
 	int ret;
 	unsigned cider;
 	int gpio;
 
-	ndev = alloc_etherdev(sizeof(struct ks8851_net));
-	if (!ndev)
+	netdev = alloc_etherdev(sizeof(struct ks8851_net));
+	if (!netdev)
 		return -ENOMEM;
 
 	spi->bits_per_word = 8;
 
-	ks = netdev_priv(ndev);
+	ks = netdev_priv(netdev);
 
-	ks->netdev = ndev;
+	ks->netdev = netdev;
 	ks->spidev = spi;
 	ks->tx_space = 6144;
 
@@ -1500,7 +1500,7 @@ static int ks8851_probe(struct spi_device *spi)
 	ks->eeprom.register_write = ks8851_eeprom_regwrite;
 
 	/* setup mii state */
-	ks->mii.dev		= ndev;
+	ks->mii.dev		= netdev;
 	ks->mii.phy_id		= 1,
 	ks->mii.phy_id_mask	= 1;
 	ks->mii.reg_num_mask	= 0xf;
@@ -1516,15 +1516,15 @@ static int ks8851_probe(struct spi_device *spi)
 
 	skb_queue_head_init(&ks->txq);
 
-	ndev->ethtool_ops = &ks8851_ethtool_ops;
-	SET_NETDEV_DEV(ndev, dev);
+	netdev->ethtool_ops = &ks8851_ethtool_ops;
+	SET_NETDEV_DEV(netdev, dev);
 
 	spi_set_drvdata(spi, ks);
 
 	netif_carrier_off(ks->netdev);
-	ndev->if_port = IF_PORT_100BASET;
-	ndev->netdev_ops = &ks8851_netdev_ops;
-	ndev->irq = spi->irq;
+	netdev->if_port = IF_PORT_100BASET;
+	netdev->netdev_ops = &ks8851_netdev_ops;
+	netdev->irq = spi->irq;
 
 	/* issue a global soft reset to reset the device. */
 	ks8851_soft_reset(ks, GRR_GSR);
@@ -1543,14 +1543,14 @@ static int ks8851_probe(struct spi_device *spi)
 	ks8851_read_selftest(ks);
 	ks8851_init_mac(ks);
 
-	ret = register_netdev(ndev);
+	ret = register_netdev(netdev);
 	if (ret) {
 		dev_err(dev, "failed to register network device\n");
 		goto err_netdev;
 	}
 
-	netdev_info(ndev, "revision %d, MAC %pM, IRQ %d, %s EEPROM\n",
-		    CIDER_REV_GET(cider), ndev->dev_addr, ndev->irq,
+	netdev_info(netdev, "revision %d, MAC %pM, IRQ %d, %s EEPROM\n",
+		    CIDER_REV_GET(cider), netdev->dev_addr, netdev->irq,
 		    ks->rc_ccr & CCR_EEPROM ? "has" : "no");
 
 	return 0;
@@ -1564,7 +1564,7 @@ err_reg:
 	regulator_disable(ks->vdd_io);
 err_reg_io:
 err_gpio:
-	free_netdev(ndev);
+	free_netdev(netdev);
 	return ret;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 2f3271c952bfa9486aa3746f591788c983fc280d Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Fri, 29 May 2020 00:21:30 +0200
Subject: net: ks8851: Replace dev_err() with netdev_err() in IRQ handler

Use netdev_err() instead of dev_err() to avoid accessing the spidev->dev
in the interrupt handler. This is the only place which uses the spidev
in this function, so replace it with netdev_err() to get rid of it. This
is done in preparation for unifying the KS8851 SPI and parallel drivers.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Marek Vasut <marex@denx.de>
Cc: David S. Miller <davem@davemloft.net>
Cc: Lukas Wunner <lukas@wunner.de>
Cc: Petr Stetiar <ynezz@true.cz>
Cc: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/micrel/ks8851.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c
index 2b85072993c5..0088df970ad6 100644
--- a/drivers/net/ethernet/micrel/ks8851.c
+++ b/drivers/net/ethernet/micrel/ks8851.c
@@ -631,7 +631,7 @@ static irqreturn_t ks8851_irq(int irq, void *_ks)
 		handled |= IRQ_RXI;
 
 	if (status & IRQ_SPIBEI) {
-		dev_err(&ks->spidev->dev, "%s: spi bus error\n", __func__);
+		netdev_err(ks->netdev, "%s: spi bus error\n", __func__);
 		handled |= IRQ_SPIBEI;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 848fc0ce6cb84430c4b1dd0a749015ad9af1b766 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Fri, 29 May 2020 00:21:31 +0200
Subject: net: ks8851: Pass device node into ks8851_init_mac()

Since the driver probe function already has a struct device *dev pointer
and can easily derive of_node pointer from it, pass the of_node pointer as
a parameter to ks8851_init_mac() to avoid fishing it out from ks->spidev.
This is the only reference to spidev in the function, so get rid of it.
This is done in preparation for unifying the KS8851 SPI and parallel bus
drivers.

No functional change.

Signed-off-by: Marek Vasut <marex@denx.de>
Cc: David S. Miller <davem@davemloft.net>
Cc: Lukas Wunner <lukas@wunner.de>
Cc: Petr Stetiar <ynezz@true.cz>
Cc: YueHaibing <yuehaibing@huawei.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/micrel/ks8851.c | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c
index 0088df970ad6..582092a95afc 100644
--- a/drivers/net/ethernet/micrel/ks8851.c
+++ b/drivers/net/ethernet/micrel/ks8851.c
@@ -409,6 +409,7 @@ static void ks8851_read_mac_addr(struct net_device *dev)
 /**
  * ks8851_init_mac - initialise the mac address
  * @ks: The device structure
+ * @np: The device node pointer
  *
  * Get or create the initial mac address for the device and then set that
  * into the station address register. A mac address supplied in the device
@@ -416,12 +417,12 @@ static void ks8851_read_mac_addr(struct net_device *dev)
  * we try that. If no valid mac address is found we use eth_random_addr()
  * to create a new one.
  */
-static void ks8851_init_mac(struct ks8851_net *ks)
+static void ks8851_init_mac(struct ks8851_net *ks, struct device_node *np)
 {
 	struct net_device *dev = ks->netdev;
 	const u8 *mac_addr;
 
-	mac_addr = of_get_mac_address(ks->spidev->dev.of_node);
+	mac_addr = of_get_mac_address(np);
 	if (!IS_ERR(mac_addr)) {
 		ether_addr_copy(dev->dev_addr, mac_addr);
 		ks8851_write_mac_addr(dev);
@@ -1541,7 +1542,7 @@ static int ks8851_probe(struct spi_device *spi)
 	ks->rc_ccr = ks8851_rdreg16(ks, KS_CCR);
 
 	ks8851_read_selftest(ks);
-	ks8851_init_mac(ks);
+	ks8851_init_mac(ks, dev->of_node);
 
 	ret = register_netdev(netdev);
 	if (ret) {
-- 
cgit v1.2.3-59-g8ed1b


From b6948e1b7b094ccc86b1ae768beab734e5355ced Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Fri, 29 May 2020 00:21:32 +0200
Subject: net: ks8851: Use devm_alloc_etherdev()

Use device managed version of alloc_etherdev() to simplify the code.
No functional change intended.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Marek Vasut <marex@denx.de>
Cc: David S. Miller <davem@davemloft.net>
Cc: Lukas Wunner <lukas@wunner.de>
Cc: Petr Stetiar <ynezz@true.cz>
Cc: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/micrel/ks8851.c | 13 ++++---------
 1 file changed, 4 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c
index 582092a95afc..86bfe55f346d 100644
--- a/drivers/net/ethernet/micrel/ks8851.c
+++ b/drivers/net/ethernet/micrel/ks8851.c
@@ -1421,7 +1421,7 @@ static int ks8851_probe(struct spi_device *spi)
 	unsigned cider;
 	int gpio;
 
-	netdev = alloc_etherdev(sizeof(struct ks8851_net));
+	netdev = devm_alloc_etherdev(dev, sizeof(struct ks8851_net));
 	if (!netdev)
 		return -ENOMEM;
 
@@ -1434,10 +1434,8 @@ static int ks8851_probe(struct spi_device *spi)
 	ks->tx_space = 6144;
 
 	gpio = of_get_named_gpio_flags(dev->of_node, "reset-gpios", 0, NULL);
-	if (gpio == -EPROBE_DEFER) {
-		ret = gpio;
-		goto err_gpio;
-	}
+	if (gpio == -EPROBE_DEFER)
+		return gpio;
 
 	ks->gpio = gpio;
 	if (gpio_is_valid(gpio)) {
@@ -1445,7 +1443,7 @@ static int ks8851_probe(struct spi_device *spi)
 					    GPIOF_OUT_INIT_LOW, "ks8851_rst_n");
 		if (ret) {
 			dev_err(dev, "reset gpio request failed\n");
-			goto err_gpio;
+			return ret;
 		}
 	}
 
@@ -1564,8 +1562,6 @@ err_id:
 err_reg:
 	regulator_disable(ks->vdd_io);
 err_reg_io:
-err_gpio:
-	free_netdev(netdev);
 	return ret;
 }
 
@@ -1582,7 +1578,6 @@ static int ks8851_remove(struct spi_device *spi)
 		gpio_set_value(priv->gpio, 0);
 	regulator_disable(priv->vdd_reg);
 	regulator_disable(priv->vdd_io);
-	free_netdev(priv->netdev);
 
 	return 0;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 2c5b0a86ac54dae5bcf854a93dbb086191368014 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Fri, 29 May 2020 00:21:33 +0200
Subject: net: ks8851: Use dev_{get,set}_drvdata()

Replace spi_{get,set}_drvdata() with dev_{get,set}_drvdata(), which
works for both SPI and platform drivers. This is done in preparation
for unifying the KS8851 SPI and parallel bus drivers.

There should be no functional change.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Marek Vasut <marex@denx.de>
Cc: David S. Miller <davem@davemloft.net>
Cc: Lukas Wunner <lukas@wunner.de>
Cc: Petr Stetiar <ynezz@true.cz>
Cc: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/micrel/ks8851.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c
index 86bfe55f346d..fe2037e166dc 100644
--- a/drivers/net/ethernet/micrel/ks8851.c
+++ b/drivers/net/ethernet/micrel/ks8851.c
@@ -1518,7 +1518,7 @@ static int ks8851_probe(struct spi_device *spi)
 	netdev->ethtool_ops = &ks8851_ethtool_ops;
 	SET_NETDEV_DEV(netdev, dev);
 
-	spi_set_drvdata(spi, ks);
+	dev_set_drvdata(dev, ks);
 
 	netif_carrier_off(ks->netdev);
 	netdev->if_port = IF_PORT_100BASET;
@@ -1567,8 +1567,10 @@ err_reg_io:
 
 static int ks8851_remove(struct spi_device *spi)
 {
-	struct ks8851_net *priv = spi_get_drvdata(spi);
 	struct device *dev = &spi->dev;
+	struct ks8851_net *priv;
+
+	priv = dev_get_drvdata(dev);
 
 	if (netif_msg_drv(priv))
 		dev_info(dev, "remove\n");
-- 
cgit v1.2.3-59-g8ed1b


From 806f66495e791eb349cd3ea286ee411bcf1df4d0 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Fri, 29 May 2020 00:21:34 +0200
Subject: net: ks8851: Remove ks8851_rdreg32()

The ks8851_rdreg32() is used only in one place, to read two registers
using a single read. To make it easier to support 16-bit accesses via
parallel bus later on, replace this single read with two 16-bit reads
from each of the registers and drop the ks8851_rdreg32() altogether.

If this has noticeable performance impact on the SPI variant of KS8851,
then we should consider using regmap to abstract the SPI and parallel
bus options and in case of SPI, permit regmap to merge register reads
of neighboring registers into single, longer, read.

Signed-off-by: Marek Vasut <marex@denx.de>
Cc: David S. Miller <davem@davemloft.net>
Cc: Lukas Wunner <lukas@wunner.de>
Cc: Petr Stetiar <ynezz@true.cz>
Cc: YueHaibing <yuehaibing@huawei.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/micrel/ks8851.c | 25 ++-----------------------
 1 file changed, 2 insertions(+), 23 deletions(-)

diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c
index fe2037e166dc..8df130efbde1 100644
--- a/drivers/net/ethernet/micrel/ks8851.c
+++ b/drivers/net/ethernet/micrel/ks8851.c
@@ -296,25 +296,6 @@ static unsigned ks8851_rdreg16(struct ks8851_net *ks, unsigned reg)
 	return le16_to_cpu(rx);
 }
 
-/**
- * ks8851_rdreg32 - read 32 bit register from device
- * @ks: The chip information
- * @reg: The register address
- *
- * Read a 32bit register from the chip.
- *
- * Note, this read requires the address be aligned to 4 bytes.
-*/
-static unsigned ks8851_rdreg32(struct ks8851_net *ks, unsigned reg)
-{
-	__le32 rx = 0;
-
-	WARN_ON(reg & 3);
-
-	ks8851_rdreg(ks, MK_OP(0xf, reg), (u8 *)&rx, 4);
-	return le32_to_cpu(rx);
-}
-
 /**
  * ks8851_soft_reset - issue one of the soft reset to the device
  * @ks: The device state.
@@ -508,7 +489,6 @@ static void ks8851_rx_pkts(struct ks8851_net *ks)
 	unsigned rxfc;
 	unsigned rxlen;
 	unsigned rxstat;
-	u32 rxh;
 	u8 *rxpkt;
 
 	rxfc = ks8851_rdreg8(ks, KS_RXFC);
@@ -527,9 +507,8 @@ static void ks8851_rx_pkts(struct ks8851_net *ks)
 	 */
 
 	for (; rxfc != 0; rxfc--) {
-		rxh = ks8851_rdreg32(ks, KS_RXFHSR);
-		rxstat = rxh & 0xffff;
-		rxlen = (rxh >> 16) & 0xfff;
+		rxstat = ks8851_rdreg16(ks, KS_RXFHSR);
+		rxlen = ks8851_rdreg16(ks, KS_RXFHBCR) & RXFHBCR_CNT_MASK;
 
 		netif_dbg(ks, rx_status, ks->netdev,
 			  "rx: stat 0x%04x, len 0x%04x\n", rxstat, rxlen);
-- 
cgit v1.2.3-59-g8ed1b


From 88cfedd0d7ab5ff047ac7eb8243fb9a69526fee7 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Fri, 29 May 2020 00:21:35 +0200
Subject: net: ks8851: Use 16-bit writes to program MAC address

On the SPI variant of KS8851, the MAC address can be programmed with
either 8/16/32-bit writes. To make it easier to support the 16-bit
parallel option of KS8851 too, switch both the MAC address programming
and readout to 16-bit operations.

Remove ks8851_wrreg8() as it is not used anywhere anymore.

There should be no functional change.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Marek Vasut <marex@denx.de>
Cc: David S. Miller <davem@davemloft.net>
Cc: Lukas Wunner <lukas@wunner.de>
Cc: Petr Stetiar <ynezz@true.cz>
Cc: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/micrel/ks8851.c | 47 ++++++++++--------------------------
 drivers/net/ethernet/micrel/ks8851.h |  2 +-
 2 files changed, 14 insertions(+), 35 deletions(-)

diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c
index 8df130efbde1..1b81340e811f 100644
--- a/drivers/net/ethernet/micrel/ks8851.c
+++ b/drivers/net/ethernet/micrel/ks8851.c
@@ -185,36 +185,6 @@ static void ks8851_wrreg16(struct ks8851_net *ks, unsigned reg, unsigned val)
 		netdev_err(ks->netdev, "spi_sync() failed\n");
 }
 
-/**
- * ks8851_wrreg8 - write 8bit register value to chip
- * @ks: The chip state
- * @reg: The register address
- * @val: The value to write
- *
- * Issue a write to put the value @val into the register specified in @reg.
- */
-static void ks8851_wrreg8(struct ks8851_net *ks, unsigned reg, unsigned val)
-{
-	struct spi_transfer *xfer = &ks->spi_xfer1;
-	struct spi_message *msg = &ks->spi_msg1;
-	__le16 txb[2];
-	int ret;
-	int bit;
-
-	bit = 1 << (reg & 3);
-
-	txb[0] = cpu_to_le16(MK_OP(bit, reg) | KS_SPIOP_WR);
-	txb[1] = val;
-
-	xfer->tx_buf = txb;
-	xfer->rx_buf = NULL;
-	xfer->len = 3;
-
-	ret = spi_sync(ks->spidev, msg);
-	if (ret < 0)
-		netdev_err(ks->netdev, "spi_sync() failed\n");
-}
-
 /**
  * ks8851_rdreg - issue read register command and return the data
  * @ks: The device state
@@ -349,6 +319,7 @@ static void ks8851_set_powermode(struct ks8851_net *ks, unsigned pwrmode)
 static int ks8851_write_mac_addr(struct net_device *dev)
 {
 	struct ks8851_net *ks = netdev_priv(dev);
+	u16 val;
 	int i;
 
 	mutex_lock(&ks->lock);
@@ -358,8 +329,12 @@ static int ks8851_write_mac_addr(struct net_device *dev)
 	 * the first write to the MAC address does not take effect.
 	 */
 	ks8851_set_powermode(ks, PMECR_PM_NORMAL);
-	for (i = 0; i < ETH_ALEN; i++)
-		ks8851_wrreg8(ks, KS_MAR(i), dev->dev_addr[i]);
+
+	for (i = 0; i < ETH_ALEN; i += 2) {
+		val = (dev->dev_addr[i] << 8) | dev->dev_addr[i + 1];
+		ks8851_wrreg16(ks, KS_MAR(i), val);
+	}
+
 	if (!netif_running(dev))
 		ks8851_set_powermode(ks, PMECR_PM_SOFTDOWN);
 
@@ -377,12 +352,16 @@ static int ks8851_write_mac_addr(struct net_device *dev)
 static void ks8851_read_mac_addr(struct net_device *dev)
 {
 	struct ks8851_net *ks = netdev_priv(dev);
+	u16 reg;
 	int i;
 
 	mutex_lock(&ks->lock);
 
-	for (i = 0; i < ETH_ALEN; i++)
-		dev->dev_addr[i] = ks8851_rdreg8(ks, KS_MAR(i));
+	for (i = 0; i < ETH_ALEN; i += 2) {
+		reg = ks8851_rdreg16(ks, KS_MAR(i));
+		dev->dev_addr[i] = reg >> 8;
+		dev->dev_addr[i + 1] = reg & 0xff;
+	}
 
 	mutex_unlock(&ks->lock);
 }
diff --git a/drivers/net/ethernet/micrel/ks8851.h b/drivers/net/ethernet/micrel/ks8851.h
index 8f834aef8e32..f210d18a10b5 100644
--- a/drivers/net/ethernet/micrel/ks8851.h
+++ b/drivers/net/ethernet/micrel/ks8851.h
@@ -19,7 +19,7 @@
 #define CCR_32PIN				(1 << 0)    /* KSZ8851SNL    */
 
 /* MAC address registers */
-#define KS_MAR(_m)				(0x15 - (_m))
+#define KS_MAR(_m)				(0x14 - (_m))
 #define KS_MARL					0x10
 #define KS_MARM					0x12
 #define KS_MARH					0x14
-- 
cgit v1.2.3-59-g8ed1b


From aa39bf6730b72e495f711b0f06ca92e9673d648c Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Fri, 29 May 2020 00:21:36 +0200
Subject: net: ks8851: Use 16-bit read of RXFC register

The RXFC register is the only one being read using 8-bit accessors.
To make it easier to support the 16-bit accesses used by the parallel
bus variant of KS8851, use 16-bit accessor to read RXFC register as
well as neighboring RXFCTR register.

Remove ks8851_rdreg8() as it is not used anywhere anymore.

There should be no functional change.

Signed-off-by: Marek Vasut <marex@denx.de>
Cc: David S. Miller <davem@davemloft.net>
Cc: Lukas Wunner <lukas@wunner.de>
Cc: Petr Stetiar <ynezz@true.cz>
Cc: YueHaibing <yuehaibing@huawei.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/micrel/ks8851.c | 17 +----------------
 1 file changed, 1 insertion(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c
index 1b81340e811f..e2e75041e931 100644
--- a/drivers/net/ethernet/micrel/ks8851.c
+++ b/drivers/net/ethernet/micrel/ks8851.c
@@ -236,21 +236,6 @@ static void ks8851_rdreg(struct ks8851_net *ks, unsigned op,
 		memcpy(rxb, trx + 2, rxl);
 }
 
-/**
- * ks8851_rdreg8 - read 8 bit register from device
- * @ks: The chip information
- * @reg: The register address
- *
- * Read a 8bit register from the chip, returning the result
-*/
-static unsigned ks8851_rdreg8(struct ks8851_net *ks, unsigned reg)
-{
-	u8 rxb[1];
-
-	ks8851_rdreg(ks, MK_OP(1 << (reg & 3), reg), rxb, 1);
-	return rxb[0];
-}
-
 /**
  * ks8851_rdreg16 - read 16 bit register from device
  * @ks: The chip information
@@ -470,7 +455,7 @@ static void ks8851_rx_pkts(struct ks8851_net *ks)
 	unsigned rxstat;
 	u8 *rxpkt;
 
-	rxfc = ks8851_rdreg8(ks, KS_RXFC);
+	rxfc = (ks8851_rdreg16(ks, KS_RXFCTR) >> 8) & 0xff;
 
 	netif_dbg(ks, rx_status, ks->netdev,
 		  "%s: %d packets\n", __func__, rxfc);
-- 
cgit v1.2.3-59-g8ed1b


From 22726020050beacc7a164eaaafef0048d302cf41 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Fri, 29 May 2020 00:21:37 +0200
Subject: net: ks8851: Factor out bus lock handling

Pull out bus access locking code into separate functions, this is done
in preparation for unifying the driver with the parallel bus one. The
parallel bus driver does not need heavy mutex locking of the bus and
works better with spinlocks, hence prepare these locking functions to
be overridden then.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Marek Vasut <marex@denx.de>
Cc: David S. Miller <davem@davemloft.net>
Cc: Lukas Wunner <lukas@wunner.de>
Cc: Petr Stetiar <ynezz@true.cz>
Cc: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/micrel/ks8851.c | 97 +++++++++++++++++++++++++-----------
 1 file changed, 68 insertions(+), 29 deletions(-)

diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c
index e2e75041e931..053d6d085539 100644
--- a/drivers/net/ethernet/micrel/ks8851.c
+++ b/drivers/net/ethernet/micrel/ks8851.c
@@ -151,6 +151,30 @@ static int msg_enable;
 /* turn register number and byte-enable mask into data for start of packet */
 #define MK_OP(_byteen, _reg) (BYTE_EN(_byteen) | (_reg)  << (8+2) | (_reg) >> 6)
 
+/**
+ * ks8851_lock - register access lock
+ * @ks: The chip state
+ * @flags: Spinlock flags
+ *
+ * Claim chip register access lock
+ */
+static void ks8851_lock(struct ks8851_net *ks, unsigned long *flags)
+{
+	mutex_lock(&ks->lock);
+}
+
+/**
+ * ks8851_unlock - register access unlock
+ * @ks: The chip state
+ * @flags: Spinlock flags
+ *
+ * Release chip register access lock
+ */
+static void ks8851_unlock(struct ks8851_net *ks, unsigned long *flags)
+{
+	mutex_unlock(&ks->lock);
+}
+
 /* SPI register read/write calls.
  *
  * All these calls issue SPI transactions to access the chip's registers. They
@@ -304,10 +328,11 @@ static void ks8851_set_powermode(struct ks8851_net *ks, unsigned pwrmode)
 static int ks8851_write_mac_addr(struct net_device *dev)
 {
 	struct ks8851_net *ks = netdev_priv(dev);
+	unsigned long flags;
 	u16 val;
 	int i;
 
-	mutex_lock(&ks->lock);
+	ks8851_lock(ks, &flags);
 
 	/*
 	 * Wake up chip in case it was powered off when stopped; otherwise,
@@ -323,7 +348,7 @@ static int ks8851_write_mac_addr(struct net_device *dev)
 	if (!netif_running(dev))
 		ks8851_set_powermode(ks, PMECR_PM_SOFTDOWN);
 
-	mutex_unlock(&ks->lock);
+	ks8851_unlock(ks, &flags);
 
 	return 0;
 }
@@ -337,10 +362,11 @@ static int ks8851_write_mac_addr(struct net_device *dev)
 static void ks8851_read_mac_addr(struct net_device *dev)
 {
 	struct ks8851_net *ks = netdev_priv(dev);
+	unsigned long flags;
 	u16 reg;
 	int i;
 
-	mutex_lock(&ks->lock);
+	ks8851_lock(ks, &flags);
 
 	for (i = 0; i < ETH_ALEN; i += 2) {
 		reg = ks8851_rdreg16(ks, KS_MAR(i));
@@ -348,7 +374,7 @@ static void ks8851_read_mac_addr(struct net_device *dev)
 		dev->dev_addr[i + 1] = reg & 0xff;
 	}
 
-	mutex_unlock(&ks->lock);
+	ks8851_unlock(ks, &flags);
 }
 
 /**
@@ -534,10 +560,11 @@ static void ks8851_rx_pkts(struct ks8851_net *ks)
 static irqreturn_t ks8851_irq(int irq, void *_ks)
 {
 	struct ks8851_net *ks = _ks;
-	unsigned status;
 	unsigned handled = 0;
+	unsigned long flags;
+	unsigned int status;
 
-	mutex_lock(&ks->lock);
+	ks8851_lock(ks, &flags);
 
 	status = ks8851_rdreg16(ks, KS_ISR);
 
@@ -606,7 +633,7 @@ static irqreturn_t ks8851_irq(int irq, void *_ks)
 		ks8851_wrreg16(ks, KS_RXCR1, rxc->rxcr1);
 	}
 
-	mutex_unlock(&ks->lock);
+	ks8851_unlock(ks, &flags);
 
 	if (status & IRQ_LCI)
 		mii_check_link(&ks->mii);
@@ -700,10 +727,11 @@ static void ks8851_done_tx(struct ks8851_net *ks, struct sk_buff *txb)
 static void ks8851_tx_work(struct work_struct *work)
 {
 	struct ks8851_net *ks = container_of(work, struct ks8851_net, tx_work);
+	unsigned long flags;
 	struct sk_buff *txb;
 	bool last = skb_queue_empty(&ks->txq);
 
-	mutex_lock(&ks->lock);
+	ks8851_lock(ks, &flags);
 
 	while (!last) {
 		txb = skb_dequeue(&ks->txq);
@@ -719,7 +747,7 @@ static void ks8851_tx_work(struct work_struct *work)
 		}
 	}
 
-	mutex_unlock(&ks->lock);
+	ks8851_unlock(ks, &flags);
 }
 
 /**
@@ -732,6 +760,7 @@ static void ks8851_tx_work(struct work_struct *work)
 static int ks8851_net_open(struct net_device *dev)
 {
 	struct ks8851_net *ks = netdev_priv(dev);
+	unsigned long flags;
 	int ret;
 
 	ret = request_threaded_irq(dev->irq, NULL, ks8851_irq,
@@ -744,7 +773,7 @@ static int ks8851_net_open(struct net_device *dev)
 
 	/* lock the card, even if we may not actually be doing anything
 	 * else at the moment */
-	mutex_lock(&ks->lock);
+	ks8851_lock(ks, &flags);
 
 	netif_dbg(ks, ifup, ks->netdev, "opening\n");
 
@@ -804,7 +833,7 @@ static int ks8851_net_open(struct net_device *dev)
 
 	netif_dbg(ks, ifup, ks->netdev, "network device up\n");
 
-	mutex_unlock(&ks->lock);
+	ks8851_unlock(ks, &flags);
 	mii_check_link(&ks->mii);
 	return 0;
 }
@@ -820,22 +849,23 @@ static int ks8851_net_open(struct net_device *dev)
 static int ks8851_net_stop(struct net_device *dev)
 {
 	struct ks8851_net *ks = netdev_priv(dev);
+	unsigned long flags;
 
 	netif_info(ks, ifdown, dev, "shutting down\n");
 
 	netif_stop_queue(dev);
 
-	mutex_lock(&ks->lock);
+	ks8851_lock(ks, &flags);
 	/* turn off the IRQs and ack any outstanding */
 	ks8851_wrreg16(ks, KS_IER, 0x0000);
 	ks8851_wrreg16(ks, KS_ISR, 0xffff);
-	mutex_unlock(&ks->lock);
+	ks8851_unlock(ks, &flags);
 
 	/* stop any outstanding work */
 	flush_work(&ks->tx_work);
 	flush_work(&ks->rxctrl_work);
 
-	mutex_lock(&ks->lock);
+	ks8851_lock(ks, &flags);
 	/* shutdown RX process */
 	ks8851_wrreg16(ks, KS_RXCR1, 0x0000);
 
@@ -844,7 +874,7 @@ static int ks8851_net_stop(struct net_device *dev)
 
 	/* set powermode to soft power down to save power */
 	ks8851_set_powermode(ks, PMECR_PM_SOFTDOWN);
-	mutex_unlock(&ks->lock);
+	ks8851_unlock(ks, &flags);
 
 	/* ensure any queued tx buffers are dumped */
 	while (!skb_queue_empty(&ks->txq)) {
@@ -916,13 +946,14 @@ static netdev_tx_t ks8851_start_xmit(struct sk_buff *skb,
 static void ks8851_rxctrl_work(struct work_struct *work)
 {
 	struct ks8851_net *ks = container_of(work, struct ks8851_net, rxctrl_work);
+	unsigned long flags;
 
-	mutex_lock(&ks->lock);
+	ks8851_lock(ks, &flags);
 
 	/* need to shutdown RXQ before modifying filter parameters */
 	ks8851_wrreg16(ks, KS_RXCR1, 0x00);
 
-	mutex_unlock(&ks->lock);
+	ks8851_unlock(ks, &flags);
 }
 
 static void ks8851_set_rx_mode(struct net_device *dev)
@@ -1104,11 +1135,6 @@ static void ks8851_eeprom_regwrite(struct eeprom_93cx6 *ee)
  */
 static int ks8851_eeprom_claim(struct ks8851_net *ks)
 {
-	if (!(ks->rc_ccr & CCR_EEPROM))
-		return -ENOENT;
-
-	mutex_lock(&ks->lock);
-
 	/* start with clock low, cs high */
 	ks8851_wrreg16(ks, KS_EEPCR, EEPCR_EESA | EEPCR_EECS);
 	return 0;
@@ -1125,7 +1151,6 @@ static void ks8851_eeprom_release(struct ks8851_net *ks)
 	unsigned val = ks8851_rdreg16(ks, KS_EEPCR);
 
 	ks8851_wrreg16(ks, KS_EEPCR, val & ~EEPCR_EESA);
-	mutex_unlock(&ks->lock);
 }
 
 #define KS_EEPROM_MAGIC (0x00008851)
@@ -1135,6 +1160,7 @@ static int ks8851_set_eeprom(struct net_device *dev,
 {
 	struct ks8851_net *ks = netdev_priv(dev);
 	int offset = ee->offset;
+	unsigned long flags;
 	int len = ee->len;
 	u16 tmp;
 
@@ -1145,9 +1171,13 @@ static int ks8851_set_eeprom(struct net_device *dev,
 	if (ee->magic != KS_EEPROM_MAGIC)
 		return -EINVAL;
 
-	if (ks8851_eeprom_claim(ks))
+	if (!(ks->rc_ccr & CCR_EEPROM))
 		return -ENOENT;
 
+	ks8851_lock(ks, &flags);
+
+	ks8851_eeprom_claim(ks);
+
 	eeprom_93cx6_wren(&ks->eeprom, true);
 
 	/* ethtool currently only supports writing bytes, which means
@@ -1167,6 +1197,7 @@ static int ks8851_set_eeprom(struct net_device *dev,
 	eeprom_93cx6_wren(&ks->eeprom, false);
 
 	ks8851_eeprom_release(ks);
+	ks8851_unlock(ks, &flags);
 
 	return 0;
 }
@@ -1176,19 +1207,25 @@ static int ks8851_get_eeprom(struct net_device *dev,
 {
 	struct ks8851_net *ks = netdev_priv(dev);
 	int offset = ee->offset;
+	unsigned long flags;
 	int len = ee->len;
 
 	/* must be 2 byte aligned */
 	if (len & 1 || offset & 1)
 		return -EINVAL;
 
-	if (ks8851_eeprom_claim(ks))
+	if (!(ks->rc_ccr & CCR_EEPROM))
 		return -ENOENT;
 
+	ks8851_lock(ks, &flags);
+
+	ks8851_eeprom_claim(ks);
+
 	ee->magic = KS_EEPROM_MAGIC;
 
 	eeprom_93cx6_multiread(&ks->eeprom, offset/2, (__le16 *)data, len/2);
 	ks8851_eeprom_release(ks);
+	ks8851_unlock(ks, &flags);
 
 	return 0;
 }
@@ -1262,6 +1299,7 @@ static int ks8851_phy_reg(int reg)
 static int ks8851_phy_read(struct net_device *dev, int phy_addr, int reg)
 {
 	struct ks8851_net *ks = netdev_priv(dev);
+	unsigned long flags;
 	int ksreg;
 	int result;
 
@@ -1269,9 +1307,9 @@ static int ks8851_phy_read(struct net_device *dev, int phy_addr, int reg)
 	if (!ksreg)
 		return 0x0;	/* no error return allowed, so use zero */
 
-	mutex_lock(&ks->lock);
+	ks8851_lock(ks, &flags);
 	result = ks8851_rdreg16(ks, ksreg);
-	mutex_unlock(&ks->lock);
+	ks8851_unlock(ks, &flags);
 
 	return result;
 }
@@ -1280,13 +1318,14 @@ static void ks8851_phy_write(struct net_device *dev,
 			     int phy, int reg, int value)
 {
 	struct ks8851_net *ks = netdev_priv(dev);
+	unsigned long flags;
 	int ksreg;
 
 	ksreg = ks8851_phy_reg(reg);
 	if (ksreg) {
-		mutex_lock(&ks->lock);
+		ks8851_lock(ks, &flags);
 		ks8851_wrreg16(ks, ksreg, value);
-		mutex_unlock(&ks->lock);
+		ks8851_unlock(ks, &flags);
 	}
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 18a3df73093287ed5ccf650e5d700b6afc3e0663 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Fri, 29 May 2020 00:21:38 +0200
Subject: net: ks8851: Factor out SKB receive function

Factor out this netif_rx_ni(), so it could be overridden by the parallel
bus variant of the KS8851 driver.

Signed-off-by: Marek Vasut <marex@denx.de>
Cc: David S. Miller <davem@davemloft.net>
Cc: Lukas Wunner <lukas@wunner.de>
Cc: Petr Stetiar <ynezz@true.cz>
Cc: YueHaibing <yuehaibing@huawei.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/micrel/ks8851.c | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c
index 053d6d085539..087d2a39cdce 100644
--- a/drivers/net/ethernet/micrel/ks8851.c
+++ b/drivers/net/ethernet/micrel/ks8851.c
@@ -465,6 +465,15 @@ static void ks8851_dbg_dumpkkt(struct ks8851_net *ks, u8 *rxpkt)
 		   rxpkt[12], rxpkt[13], rxpkt[14], rxpkt[15]);
 }
 
+/**
+ * ks8851_rx_skb - receive skbuff
+ * @skb: The skbuff
+ */
+static void ks8851_rx_skb(struct sk_buff *skb)
+{
+	netif_rx_ni(skb);
+}
+
 /**
  * ks8851_rx_pkts - receive packets from the host
  * @ks: The device information.
@@ -533,7 +542,7 @@ static void ks8851_rx_pkts(struct ks8851_net *ks)
 					ks8851_dbg_dumpkkt(ks, rxpkt);
 
 				skb->protocol = eth_type_trans(skb, ks->netdev);
-				netif_rx_ni(skb);
+				ks8851_rx_skb(skb);
 
 				ks->netdev->stats.rx_packets++;
 				ks->netdev->stats.rx_bytes += rxlen;
-- 
cgit v1.2.3-59-g8ed1b


From d48b7634c692bc3df1aec413b4b4c823d55d7899 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Fri, 29 May 2020 00:21:39 +0200
Subject: net: ks8851: Split out SPI specific entries in struct ks8851_net

Add a new struct ks8851_net_spi, which embeds the original
struct ks8851_net and contains the entries specific only to
the SPI variant of KS8851.

There should be no functional change.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Marek Vasut <marex@denx.de>
Cc: David S. Miller <davem@davemloft.net>
Cc: Lukas Wunner <lukas@wunner.de>
Cc: Petr Stetiar <ynezz@true.cz>
Cc: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/micrel/ks8851.c | 128 +++++++++++++++++++++--------------
 1 file changed, 79 insertions(+), 49 deletions(-)

diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c
index 087d2a39cdce..482c65b1accf 100644
--- a/drivers/net/ethernet/micrel/ks8851.c
+++ b/drivers/net/ethernet/micrel/ks8851.c
@@ -64,16 +64,11 @@ union ks8851_tx_hdr {
 /**
  * struct ks8851_net - KS8851 driver private data
  * @netdev: The network device we're bound to
- * @spidev: The spi device we're bound to.
- * @lock: Lock to ensure that the device is not accessed when busy.
  * @statelock: Lock on this structure for tx list.
  * @mii: The MII state information for the mii calls.
  * @rxctrl: RX settings for @rxctrl_work.
- * @tx_work: Work queue for tx packets
  * @rxctrl_work: Work queue for updating RX mode and multicast lists
  * @txq: Queue of packets for transmission.
- * @spi_msg1: pre-setup SPI transfer with one message, @spi_xfer1.
- * @spi_msg2: pre-setup SPI transfer with two messages, @spi_xfer2.
  * @txh: Space for generating packet TX header in DMA-able data
  * @rxd: Space for receiving SPI data, in DMA-able space.
  * @txd: Space for transmitting SPI data, in DMA-able space.
@@ -87,11 +82,6 @@ union ks8851_tx_hdr {
  * @vdd_io: Optional digital power supply for IO
  * @gpio: Optional reset_n gpio
  *
- * The @lock ensures that the chip is protected when certain operations are
- * in progress. When the read or write packet transfer is in progress, most
- * of the chip registers are not ccessible until the transfer is finished and
- * the DMA has been de-asserted.
- *
  * The @statelock is used to protect information in the structure which may
  * need to be accessed via several sources, such as the network driver layer
  * or one of the work queues.
@@ -102,8 +92,6 @@ union ks8851_tx_hdr {
  */
 struct ks8851_net {
 	struct net_device	*netdev;
-	struct spi_device	*spidev;
-	struct mutex		lock;
 	spinlock_t		statelock;
 
 	union ks8851_tx_hdr	txh ____cacheline_aligned;
@@ -121,22 +109,43 @@ struct ks8851_net {
 	struct mii_if_info	mii;
 	struct ks8851_rxctrl	rxctrl;
 
-	struct work_struct	tx_work;
 	struct work_struct	rxctrl_work;
 
 	struct sk_buff_head	txq;
 
-	struct spi_message	spi_msg1;
-	struct spi_message	spi_msg2;
-	struct spi_transfer	spi_xfer1;
-	struct spi_transfer	spi_xfer2[2];
-
 	struct eeprom_93cx6	eeprom;
 	struct regulator	*vdd_reg;
 	struct regulator	*vdd_io;
 	int			gpio;
 };
 
+/**
+ * struct ks8851_net_spi - KS8851 SPI driver private data
+ * @ks8851: KS8851 driver common private data
+ * @lock: Lock to ensure that the device is not accessed when busy.
+ * @tx_work: Work queue for tx packets
+ * @spidev: The spi device we're bound to.
+ * @spi_msg1: pre-setup SPI transfer with one message, @spi_xfer1.
+ * @spi_msg2: pre-setup SPI transfer with two messages, @spi_xfer2.
+ *
+ * The @lock ensures that the chip is protected when certain operations are
+ * in progress. When the read or write packet transfer is in progress, most
+ * of the chip registers are not ccessible until the transfer is finished and
+ * the DMA has been de-asserted.
+ */
+struct ks8851_net_spi {
+	struct ks8851_net	ks8851;
+	struct mutex		lock;
+	struct work_struct	tx_work;
+	struct spi_device	*spidev;
+	struct spi_message	spi_msg1;
+	struct spi_message	spi_msg2;
+	struct spi_transfer	spi_xfer1;
+	struct spi_transfer	spi_xfer2[2];
+};
+
+#define to_ks8851_spi(ks) container_of((ks), struct ks8851_net_spi, ks8851)
+
 static int msg_enable;
 
 /* SPI frame opcodes */
@@ -160,7 +169,9 @@ static int msg_enable;
  */
 static void ks8851_lock(struct ks8851_net *ks, unsigned long *flags)
 {
-	mutex_lock(&ks->lock);
+	struct ks8851_net_spi *kss = to_ks8851_spi(ks);
+
+	mutex_lock(&kss->lock);
 }
 
 /**
@@ -172,7 +183,9 @@ static void ks8851_lock(struct ks8851_net *ks, unsigned long *flags)
  */
 static void ks8851_unlock(struct ks8851_net *ks, unsigned long *flags)
 {
-	mutex_unlock(&ks->lock);
+	struct ks8851_net_spi *kss = to_ks8851_spi(ks);
+
+	mutex_unlock(&kss->lock);
 }
 
 /* SPI register read/write calls.
@@ -192,8 +205,9 @@ static void ks8851_unlock(struct ks8851_net *ks, unsigned long *flags)
  */
 static void ks8851_wrreg16(struct ks8851_net *ks, unsigned reg, unsigned val)
 {
-	struct spi_transfer *xfer = &ks->spi_xfer1;
-	struct spi_message *msg = &ks->spi_msg1;
+	struct ks8851_net_spi *kss = to_ks8851_spi(ks);
+	struct spi_transfer *xfer = &kss->spi_xfer1;
+	struct spi_message *msg = &kss->spi_msg1;
 	__le16 txb[2];
 	int ret;
 
@@ -204,7 +218,7 @@ static void ks8851_wrreg16(struct ks8851_net *ks, unsigned reg, unsigned val)
 	xfer->rx_buf = NULL;
 	xfer->len = 4;
 
-	ret = spi_sync(ks->spidev, msg);
+	ret = spi_sync(kss->spidev, msg);
 	if (ret < 0)
 		netdev_err(ks->netdev, "spi_sync() failed\n");
 }
@@ -222,6 +236,7 @@ static void ks8851_wrreg16(struct ks8851_net *ks, unsigned reg, unsigned val)
 static void ks8851_rdreg(struct ks8851_net *ks, unsigned op,
 			 u8 *rxb, unsigned rxl)
 {
+	struct ks8851_net_spi *kss = to_ks8851_spi(ks);
 	struct spi_transfer *xfer;
 	struct spi_message *msg;
 	__le16 *txb = (__le16 *)ks->txd;
@@ -230,9 +245,9 @@ static void ks8851_rdreg(struct ks8851_net *ks, unsigned op,
 
 	txb[0] = cpu_to_le16(op | KS_SPIOP_RD);
 
-	if (ks->spidev->master->flags & SPI_MASTER_HALF_DUPLEX) {
-		msg = &ks->spi_msg2;
-		xfer = ks->spi_xfer2;
+	if (kss->spidev->master->flags & SPI_MASTER_HALF_DUPLEX) {
+		msg = &kss->spi_msg2;
+		xfer = kss->spi_xfer2;
 
 		xfer->tx_buf = txb;
 		xfer->rx_buf = NULL;
@@ -243,18 +258,18 @@ static void ks8851_rdreg(struct ks8851_net *ks, unsigned op,
 		xfer->rx_buf = trx;
 		xfer->len = rxl;
 	} else {
-		msg = &ks->spi_msg1;
-		xfer = &ks->spi_xfer1;
+		msg = &kss->spi_msg1;
+		xfer = &kss->spi_xfer1;
 
 		xfer->tx_buf = txb;
 		xfer->rx_buf = trx;
 		xfer->len = rxl + 2;
 	}
 
-	ret = spi_sync(ks->spidev, msg);
+	ret = spi_sync(kss->spidev, msg);
 	if (ret < 0)
 		netdev_err(ks->netdev, "read: spi_sync() failed\n");
-	else if (ks->spidev->master->flags & SPI_MASTER_HALF_DUPLEX)
+	else if (kss->spidev->master->flags & SPI_MASTER_HALF_DUPLEX)
 		memcpy(rxb, trx, rxl);
 	else
 		memcpy(rxb, trx + 2, rxl);
@@ -424,8 +439,9 @@ static void ks8851_init_mac(struct ks8851_net *ks, struct device_node *np)
  */
 static void ks8851_rdfifo(struct ks8851_net *ks, u8 *buff, unsigned len)
 {
-	struct spi_transfer *xfer = ks->spi_xfer2;
-	struct spi_message *msg = &ks->spi_msg2;
+	struct ks8851_net_spi *kss = to_ks8851_spi(ks);
+	struct spi_transfer *xfer = kss->spi_xfer2;
+	struct spi_message *msg = &kss->spi_msg2;
 	u8 txb[1];
 	int ret;
 
@@ -444,7 +460,7 @@ static void ks8851_rdfifo(struct ks8851_net *ks, u8 *buff, unsigned len)
 	xfer->tx_buf = NULL;
 	xfer->len = len;
 
-	ret = spi_sync(ks->spidev, msg);
+	ret = spi_sync(kss->spidev, msg);
 	if (ret < 0)
 		netdev_err(ks->netdev, "%s: spi_sync() failed\n", __func__);
 }
@@ -678,8 +694,9 @@ static inline unsigned calc_txlen(unsigned len)
  */
 static void ks8851_wrpkt(struct ks8851_net *ks, struct sk_buff *txp, bool irq)
 {
-	struct spi_transfer *xfer = ks->spi_xfer2;
-	struct spi_message *msg = &ks->spi_msg2;
+	struct ks8851_net_spi *kss = to_ks8851_spi(ks);
+	struct spi_transfer *xfer = kss->spi_xfer2;
+	struct spi_message *msg = &kss->spi_msg2;
 	unsigned fid = 0;
 	int ret;
 
@@ -706,7 +723,7 @@ static void ks8851_wrpkt(struct ks8851_net *ks, struct sk_buff *txp, bool irq)
 	xfer->rx_buf = NULL;
 	xfer->len = ALIGN(txp->len, 4);
 
-	ret = spi_sync(ks->spidev, msg);
+	ret = spi_sync(kss->spidev, msg);
 	if (ret < 0)
 		netdev_err(ks->netdev, "%s: spi_sync() failed\n", __func__);
 }
@@ -735,10 +752,15 @@ static void ks8851_done_tx(struct ks8851_net *ks, struct sk_buff *txb)
  */
 static void ks8851_tx_work(struct work_struct *work)
 {
-	struct ks8851_net *ks = container_of(work, struct ks8851_net, tx_work);
+	struct ks8851_net_spi *kss;
+	struct ks8851_net *ks;
 	unsigned long flags;
 	struct sk_buff *txb;
-	bool last = skb_queue_empty(&ks->txq);
+	bool last;
+
+	kss = container_of(work, struct ks8851_net_spi, tx_work);
+	ks = &kss->ks8851;
+	last = skb_queue_empty(&ks->txq);
 
 	ks8851_lock(ks, &flags);
 
@@ -858,8 +880,11 @@ static int ks8851_net_open(struct net_device *dev)
 static int ks8851_net_stop(struct net_device *dev)
 {
 	struct ks8851_net *ks = netdev_priv(dev);
+	struct ks8851_net_spi *kss;
 	unsigned long flags;
 
+	kss = to_ks8851_spi(ks);
+
 	netif_info(ks, ifdown, dev, "shutting down\n");
 
 	netif_stop_queue(dev);
@@ -871,7 +896,7 @@ static int ks8851_net_stop(struct net_device *dev)
 	ks8851_unlock(ks, &flags);
 
 	/* stop any outstanding work */
-	flush_work(&ks->tx_work);
+	flush_work(&kss->tx_work);
 	flush_work(&ks->rxctrl_work);
 
 	ks8851_lock(ks, &flags);
@@ -919,6 +944,9 @@ static netdev_tx_t ks8851_start_xmit(struct sk_buff *skb,
 	struct ks8851_net *ks = netdev_priv(dev);
 	unsigned needed = calc_txlen(skb->len);
 	netdev_tx_t ret = NETDEV_TX_OK;
+	struct ks8851_net_spi *kss;
+
+	kss = to_ks8851_spi(ks);
 
 	netif_dbg(ks, tx_queued, ks->netdev,
 		  "%s: skb %p, %d@%p\n", __func__, skb, skb->len, skb->data);
@@ -934,7 +962,7 @@ static netdev_tx_t ks8851_start_xmit(struct sk_buff *skb,
 	}
 
 	spin_unlock(&ks->statelock);
-	schedule_work(&ks->tx_work);
+	schedule_work(&kss->tx_work);
 
 	return ret;
 }
@@ -1406,22 +1434,24 @@ static SIMPLE_DEV_PM_OPS(ks8851_pm_ops, ks8851_suspend, ks8851_resume);
 static int ks8851_probe(struct spi_device *spi)
 {
 	struct device *dev = &spi->dev;
+	struct ks8851_net_spi *kss;
 	struct net_device *netdev;
 	struct ks8851_net *ks;
 	int ret;
 	unsigned cider;
 	int gpio;
 
-	netdev = devm_alloc_etherdev(dev, sizeof(struct ks8851_net));
+	netdev = devm_alloc_etherdev(dev, sizeof(struct ks8851_net_spi));
 	if (!netdev)
 		return -ENOMEM;
 
 	spi->bits_per_word = 8;
 
 	ks = netdev_priv(netdev);
+	kss = to_ks8851_spi(ks);
 
 	ks->netdev = netdev;
-	ks->spidev = spi;
+	kss->spidev = spi;
 	ks->tx_space = 6144;
 
 	gpio = of_get_named_gpio_flags(dev->of_node, "reset-gpios", 0, NULL);
@@ -1467,20 +1497,20 @@ static int ks8851_probe(struct spi_device *spi)
 		gpio_set_value(gpio, 1);
 	}
 
-	mutex_init(&ks->lock);
+	mutex_init(&kss->lock);
 	spin_lock_init(&ks->statelock);
 
-	INIT_WORK(&ks->tx_work, ks8851_tx_work);
+	INIT_WORK(&kss->tx_work, ks8851_tx_work);
 	INIT_WORK(&ks->rxctrl_work, ks8851_rxctrl_work);
 
 	/* initialise pre-made spi transfer messages */
 
-	spi_message_init(&ks->spi_msg1);
-	spi_message_add_tail(&ks->spi_xfer1, &ks->spi_msg1);
+	spi_message_init(&kss->spi_msg1);
+	spi_message_add_tail(&kss->spi_xfer1, &kss->spi_msg1);
 
-	spi_message_init(&ks->spi_msg2);
-	spi_message_add_tail(&ks->spi_xfer2[0], &ks->spi_msg2);
-	spi_message_add_tail(&ks->spi_xfer2[1], &ks->spi_msg2);
+	spi_message_init(&kss->spi_msg2);
+	spi_message_add_tail(&kss->spi_xfer2[0], &kss->spi_msg2);
+	spi_message_add_tail(&kss->spi_xfer2[1], &kss->spi_msg2);
 
 	/* setup EEPROM state */
 
-- 
cgit v1.2.3-59-g8ed1b


From 24be72632c68e616c23ae01b08722d38242e552b Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Fri, 29 May 2020 00:21:40 +0200
Subject: net: ks8851: Split out SPI specific code from probe() and remove()

Factor out common code into ks8851_probe_common() and
ks8851_remove_common() to permit both SPI and parallel
bus driver variants to use the common code path for
both probing and removal.

There should be no functional change.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Marek Vasut <marex@denx.de>
Cc: David S. Miller <davem@davemloft.net>
Cc: Lukas Wunner <lukas@wunner.de>
Cc: Petr Stetiar <ynezz@true.cz>
Cc: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/micrel/ks8851.c | 86 ++++++++++++++++++++----------------
 1 file changed, 48 insertions(+), 38 deletions(-)

diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c
index 482c65b1accf..4283ba5bee81 100644
--- a/drivers/net/ethernet/micrel/ks8851.c
+++ b/drivers/net/ethernet/micrel/ks8851.c
@@ -1431,27 +1431,15 @@ static int ks8851_resume(struct device *dev)
 
 static SIMPLE_DEV_PM_OPS(ks8851_pm_ops, ks8851_suspend, ks8851_resume);
 
-static int ks8851_probe(struct spi_device *spi)
+static int ks8851_probe_common(struct net_device *netdev, struct device *dev,
+			       int msg_en)
 {
-	struct device *dev = &spi->dev;
-	struct ks8851_net_spi *kss;
-	struct net_device *netdev;
-	struct ks8851_net *ks;
-	int ret;
+	struct ks8851_net *ks = netdev_priv(netdev);
 	unsigned cider;
 	int gpio;
-
-	netdev = devm_alloc_etherdev(dev, sizeof(struct ks8851_net_spi));
-	if (!netdev)
-		return -ENOMEM;
-
-	spi->bits_per_word = 8;
-
-	ks = netdev_priv(netdev);
-	kss = to_ks8851_spi(ks);
+	int ret;
 
 	ks->netdev = netdev;
-	kss->spidev = spi;
 	ks->tx_space = 6144;
 
 	gpio = of_get_named_gpio_flags(dev->of_node, "reset-gpios", 0, NULL);
@@ -1497,23 +1485,11 @@ static int ks8851_probe(struct spi_device *spi)
 		gpio_set_value(gpio, 1);
 	}
 
-	mutex_init(&kss->lock);
 	spin_lock_init(&ks->statelock);
 
-	INIT_WORK(&kss->tx_work, ks8851_tx_work);
 	INIT_WORK(&ks->rxctrl_work, ks8851_rxctrl_work);
 
-	/* initialise pre-made spi transfer messages */
-
-	spi_message_init(&kss->spi_msg1);
-	spi_message_add_tail(&kss->spi_xfer1, &kss->spi_msg1);
-
-	spi_message_init(&kss->spi_msg2);
-	spi_message_add_tail(&kss->spi_xfer2[0], &kss->spi_msg2);
-	spi_message_add_tail(&kss->spi_xfer2[1], &kss->spi_msg2);
-
 	/* setup EEPROM state */
-
 	ks->eeprom.data = ks;
 	ks->eeprom.width = PCI_EEPROM_WIDTH_93C46;
 	ks->eeprom.register_read = ks8851_eeprom_regread;
@@ -1527,12 +1503,12 @@ static int ks8851_probe(struct spi_device *spi)
 	ks->mii.mdio_read	= ks8851_phy_read;
 	ks->mii.mdio_write	= ks8851_phy_write;
 
-	dev_info(dev, "message enable is %d\n", msg_enable);
+	dev_info(dev, "message enable is %d\n", msg_en);
 
 	/* set the default message enable */
-	ks->msg_enable = netif_msg_init(msg_enable, (NETIF_MSG_DRV |
-						     NETIF_MSG_PROBE |
-						     NETIF_MSG_LINK));
+	ks->msg_enable = netif_msg_init(msg_en, NETIF_MSG_DRV |
+						NETIF_MSG_PROBE |
+						NETIF_MSG_LINK);
 
 	skb_queue_head_init(&ks->txq);
 
@@ -1544,7 +1520,6 @@ static int ks8851_probe(struct spi_device *spi)
 	netif_carrier_off(ks->netdev);
 	netdev->if_port = IF_PORT_100BASET;
 	netdev->netdev_ops = &ks8851_netdev_ops;
-	netdev->irq = spi->irq;
 
 	/* issue a global soft reset to reset the device. */
 	ks8851_soft_reset(ks, GRR_GSR);
@@ -1586,12 +1561,9 @@ err_reg_io:
 	return ret;
 }
 
-static int ks8851_remove(struct spi_device *spi)
+static int ks8851_remove_common(struct device *dev)
 {
-	struct device *dev = &spi->dev;
-	struct ks8851_net *priv;
-
-	priv = dev_get_drvdata(dev);
+	struct ks8851_net *priv = dev_get_drvdata(dev);
 
 	if (netif_msg_drv(priv))
 		dev_info(dev, "remove\n");
@@ -1605,6 +1577,44 @@ static int ks8851_remove(struct spi_device *spi)
 	return 0;
 }
 
+static int ks8851_probe(struct spi_device *spi)
+{
+	struct device *dev = &spi->dev;
+	struct ks8851_net_spi *kss;
+	struct net_device *netdev;
+	struct ks8851_net *ks;
+
+	netdev = devm_alloc_etherdev(dev, sizeof(struct ks8851_net_spi));
+	if (!netdev)
+		return -ENOMEM;
+
+	spi->bits_per_word = 8;
+
+	ks = netdev_priv(netdev);
+	kss = to_ks8851_spi(ks);
+
+	kss->spidev = spi;
+	mutex_init(&kss->lock);
+	INIT_WORK(&kss->tx_work, ks8851_tx_work);
+
+	/* initialise pre-made spi transfer messages */
+	spi_message_init(&kss->spi_msg1);
+	spi_message_add_tail(&kss->spi_xfer1, &kss->spi_msg1);
+
+	spi_message_init(&kss->spi_msg2);
+	spi_message_add_tail(&kss->spi_xfer2[0], &kss->spi_msg2);
+	spi_message_add_tail(&kss->spi_xfer2[1], &kss->spi_msg2);
+
+	netdev->irq = spi->irq;
+
+	return ks8851_probe_common(netdev, dev, msg_enable);
+}
+
+static int ks8851_remove(struct spi_device *spi)
+{
+	return ks8851_remove_common(&spi->dev);
+}
+
 static const struct of_device_id ks8851_match_table[] = {
 	{ .compatible = "micrel,ks8851" },
 	{ }
-- 
cgit v1.2.3-59-g8ed1b


From 144ad36c3d3b53045a2b3f334bd735f469cb8ea8 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Fri, 29 May 2020 00:21:41 +0200
Subject: net: ks8851: Factor out TX work flush function

While the SPI version of the KS8851 requires a TX worker thread to pump
data via SPI, the parallel bus version can write data into the TX FIFO
directly in .ndo_start_xmit, as the parallel bus access is much faster
and does not sleep. Factor out this TX work flush part, so it can be
overridden by the parallel bus driver.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Marek Vasut <marex@denx.de>
Cc: David S. Miller <davem@davemloft.net>
Cc: Lukas Wunner <lukas@wunner.de>
Cc: Petr Stetiar <ynezz@true.cz>
Cc: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/micrel/ks8851.c | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c
index 4283ba5bee81..458c86903ac0 100644
--- a/drivers/net/ethernet/micrel/ks8851.c
+++ b/drivers/net/ethernet/micrel/ks8851.c
@@ -781,6 +781,17 @@ static void ks8851_tx_work(struct work_struct *work)
 	ks8851_unlock(ks, &flags);
 }
 
+/**
+ * ks8851_flush_tx_work - flush outstanding TX work
+ * @ks: The device state
+ */
+static void ks8851_flush_tx_work(struct ks8851_net *ks)
+{
+	struct ks8851_net_spi *kss = to_ks8851_spi(ks);
+
+	flush_work(&kss->tx_work);
+}
+
 /**
  * ks8851_net_open - open network device
  * @dev: The network device being opened.
@@ -880,11 +891,8 @@ static int ks8851_net_open(struct net_device *dev)
 static int ks8851_net_stop(struct net_device *dev)
 {
 	struct ks8851_net *ks = netdev_priv(dev);
-	struct ks8851_net_spi *kss;
 	unsigned long flags;
 
-	kss = to_ks8851_spi(ks);
-
 	netif_info(ks, ifdown, dev, "shutting down\n");
 
 	netif_stop_queue(dev);
@@ -896,7 +904,7 @@ static int ks8851_net_stop(struct net_device *dev)
 	ks8851_unlock(ks, &flags);
 
 	/* stop any outstanding work */
-	flush_work(&kss->tx_work);
+	ks8851_flush_tx_work(ks);
 	flush_work(&ks->rxctrl_work);
 
 	ks8851_lock(ks, &flags);
-- 
cgit v1.2.3-59-g8ed1b


From d2a1c643a00e881de3ceae7ebe70e90f791dbb22 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Fri, 29 May 2020 00:21:42 +0200
Subject: net: ks8851: Permit overridding interrupt enable register

The parallel bus variant does not need to use the TX interrupt at all
as it writes the TX FIFO directly with in .ndo_start_xmit, permit the
drivers to configure the interrupt enable bits.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Marek Vasut <marex@denx.de>
Cc: David S. Miller <davem@davemloft.net>
Cc: Lukas Wunner <lukas@wunner.de>
Cc: Petr Stetiar <ynezz@true.cz>
Cc: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/micrel/ks8851.c | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c
index 458c86903ac0..baf424f9893b 100644
--- a/drivers/net/ethernet/micrel/ks8851.c
+++ b/drivers/net/ethernet/micrel/ks8851.c
@@ -859,17 +859,8 @@ static int ks8851_net_open(struct net_device *dev)
 	ks8851_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr);
 
 	/* clear then enable interrupts */
-
-#define STD_IRQ (IRQ_LCI |	/* Link Change */	\
-		 IRQ_TXI |	/* TX done */		\
-		 IRQ_RXI |	/* RX done */		\
-		 IRQ_SPIBEI |	/* SPI bus error */	\
-		 IRQ_TXPSI |	/* TX process stop */	\
-		 IRQ_RXPSI)	/* RX process stop */
-
-	ks->rc_ier = STD_IRQ;
-	ks8851_wrreg16(ks, KS_ISR, STD_IRQ);
-	ks8851_wrreg16(ks, KS_IER, STD_IRQ);
+	ks8851_wrreg16(ks, KS_ISR, ks->rc_ier);
+	ks8851_wrreg16(ks, KS_IER, ks->rc_ier);
 
 	netif_start_queue(ks->netdev);
 
@@ -1599,6 +1590,15 @@ static int ks8851_probe(struct spi_device *spi)
 	spi->bits_per_word = 8;
 
 	ks = netdev_priv(netdev);
+
+#define STD_IRQ (IRQ_LCI |	/* Link Change */	\
+		 IRQ_TXI |	/* TX done */		\
+		 IRQ_RXI |	/* RX done */		\
+		 IRQ_SPIBEI |	/* SPI bus error */	\
+		 IRQ_TXPSI |	/* TX process stop */	\
+		 IRQ_RXPSI)	/* RX process stop */
+	ks->rc_ier = STD_IRQ;
+
 	kss = to_ks8851_spi(ks);
 
 	kss->spidev = spi;
-- 
cgit v1.2.3-59-g8ed1b


From 7a552c850c4581d698b8d47d9d03196fe572f32d Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Fri, 29 May 2020 00:21:43 +0200
Subject: net: ks8851: Implement register, FIFO, lock accessor callbacks

The register and FIFO accessors are bus specific, so is locking.
Implement callbacks so that each variant of the KS8851 can implement
matching accessors and locking, and use the rest of the common code.

Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: Marek Vasut <marex@denx.de>
Cc: David S. Miller <davem@davemloft.net>
Cc: Lukas Wunner <lukas@wunner.de>
Cc: Petr Stetiar <ynezz@true.cz>
Cc: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/micrel/ks8851.c | 178 ++++++++++++++++++++++++++++++-----
 1 file changed, 156 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c
index baf424f9893b..1fa907d5dd5b 100644
--- a/drivers/net/ethernet/micrel/ks8851.c
+++ b/drivers/net/ethernet/micrel/ks8851.c
@@ -81,6 +81,15 @@ union ks8851_tx_hdr {
  * @vdd_reg:	Optional regulator supplying the chip
  * @vdd_io: Optional digital power supply for IO
  * @gpio: Optional reset_n gpio
+ * @lock: Bus access lock callback
+ * @unlock: Bus access unlock callback
+ * @rdreg16: 16bit register read callback
+ * @wrreg16: 16bit register write callback
+ * @rdfifo: FIFO read callback
+ * @wrfifo: FIFO write callback
+ * @start_xmit: start_xmit() implementation callback
+ * @rx_skb: rx_skb() implementation callback
+ * @flush_tx_work: flush_tx_work() implementation callback
  *
  * The @statelock is used to protect information in the structure which may
  * need to be accessed via several sources, such as the network driver layer
@@ -117,6 +126,24 @@ struct ks8851_net {
 	struct regulator	*vdd_reg;
 	struct regulator	*vdd_io;
 	int			gpio;
+
+	void			(*lock)(struct ks8851_net *ks,
+					unsigned long *flags);
+	void			(*unlock)(struct ks8851_net *ks,
+					  unsigned long *flags);
+	unsigned int		(*rdreg16)(struct ks8851_net *ks,
+					   unsigned int reg);
+	void			(*wrreg16)(struct ks8851_net *ks,
+					   unsigned int reg, unsigned int val);
+	void			(*rdfifo)(struct ks8851_net *ks, u8 *buff,
+					  unsigned int len);
+	void			(*wrfifo)(struct ks8851_net *ks,
+					  struct sk_buff *txp, bool irq);
+	netdev_tx_t		(*start_xmit)(struct sk_buff *skb,
+					      struct net_device *dev);
+	void			(*rx_skb)(struct ks8851_net *ks,
+					  struct sk_buff *skb);
+	void			(*flush_tx_work)(struct ks8851_net *ks);
 };
 
 /**
@@ -161,13 +188,13 @@ static int msg_enable;
 #define MK_OP(_byteen, _reg) (BYTE_EN(_byteen) | (_reg)  << (8+2) | (_reg) >> 6)
 
 /**
- * ks8851_lock - register access lock
+ * ks8851_lock_spi - register access lock for SPI
  * @ks: The chip state
  * @flags: Spinlock flags
  *
  * Claim chip register access lock
  */
-static void ks8851_lock(struct ks8851_net *ks, unsigned long *flags)
+static void ks8851_lock_spi(struct ks8851_net *ks, unsigned long *flags)
 {
 	struct ks8851_net_spi *kss = to_ks8851_spi(ks);
 
@@ -175,19 +202,43 @@ static void ks8851_lock(struct ks8851_net *ks, unsigned long *flags)
 }
 
 /**
- * ks8851_unlock - register access unlock
+ * ks8851_unlock_spi - register access unlock for SPI
  * @ks: The chip state
  * @flags: Spinlock flags
  *
  * Release chip register access lock
  */
-static void ks8851_unlock(struct ks8851_net *ks, unsigned long *flags)
+static void ks8851_unlock_spi(struct ks8851_net *ks, unsigned long *flags)
 {
 	struct ks8851_net_spi *kss = to_ks8851_spi(ks);
 
 	mutex_unlock(&kss->lock);
 }
 
+/**
+ * ks8851_lock - register access lock
+ * @ks: The chip state
+ * @flags: Spinlock flags
+ *
+ * Claim chip register access lock
+ */
+static void ks8851_lock(struct ks8851_net *ks, unsigned long *flags)
+{
+	ks->lock(ks, flags);
+}
+
+/**
+ * ks8851_unlock - register access unlock
+ * @ks: The chip state
+ * @flags: Spinlock flags
+ *
+ * Release chip register access lock
+ */
+static void ks8851_unlock(struct ks8851_net *ks, unsigned long *flags)
+{
+	ks->unlock(ks, flags);
+}
+
 /* SPI register read/write calls.
  *
  * All these calls issue SPI transactions to access the chip's registers. They
@@ -196,14 +247,15 @@ static void ks8851_unlock(struct ks8851_net *ks, unsigned long *flags)
  */
 
 /**
- * ks8851_wrreg16 - write 16bit register value to chip
+ * ks8851_wrreg16_spi - write 16bit register value to chip via SPI
  * @ks: The chip state
  * @reg: The register address
  * @val: The value to write
  *
  * Issue a write to put the value @val into the register specified in @reg.
  */
-static void ks8851_wrreg16(struct ks8851_net *ks, unsigned reg, unsigned val)
+static void ks8851_wrreg16_spi(struct ks8851_net *ks, unsigned int reg,
+			       unsigned int val)
 {
 	struct ks8851_net_spi *kss = to_ks8851_spi(ks);
 	struct spi_transfer *xfer = &kss->spi_xfer1;
@@ -223,6 +275,20 @@ static void ks8851_wrreg16(struct ks8851_net *ks, unsigned reg, unsigned val)
 		netdev_err(ks->netdev, "spi_sync() failed\n");
 }
 
+/**
+ * ks8851_wrreg16 - write 16bit register value to chip
+ * @ks: The chip state
+ * @reg: The register address
+ * @val: The value to write
+ *
+ * Issue a write to put the value @val into the register specified in @reg.
+ */
+static void ks8851_wrreg16(struct ks8851_net *ks, unsigned int reg,
+			   unsigned int val)
+{
+	ks->wrreg16(ks, reg, val);
+}
+
 /**
  * ks8851_rdreg - issue read register command and return the data
  * @ks: The device state
@@ -276,13 +342,14 @@ static void ks8851_rdreg(struct ks8851_net *ks, unsigned op,
 }
 
 /**
- * ks8851_rdreg16 - read 16 bit register from device
+ * ks8851_rdreg16_spi - read 16 bit register from device via SPI
  * @ks: The chip information
  * @reg: The register address
  *
  * Read a 16bit register from the chip, returning the result
 */
-static unsigned ks8851_rdreg16(struct ks8851_net *ks, unsigned reg)
+static unsigned int ks8851_rdreg16_spi(struct ks8851_net *ks,
+				       unsigned int reg)
 {
 	__le16 rx = 0;
 
@@ -290,6 +357,19 @@ static unsigned ks8851_rdreg16(struct ks8851_net *ks, unsigned reg)
 	return le16_to_cpu(rx);
 }
 
+/**
+ * ks8851_rdreg16 - read 16 bit register from device
+ * @ks: The chip information
+ * @reg: The register address
+ *
+ * Read a 16bit register from the chip, returning the result
+ */
+static unsigned int ks8851_rdreg16(struct ks8851_net *ks,
+				   unsigned int reg)
+{
+	return ks->rdreg16(ks, reg);
+}
+
 /**
  * ks8851_soft_reset - issue one of the soft reset to the device
  * @ks: The device state.
@@ -429,7 +509,7 @@ static void ks8851_init_mac(struct ks8851_net *ks, struct device_node *np)
 }
 
 /**
- * ks8851_rdfifo - read data from the receive fifo
+ * ks8851_rdfifo_spi - read data from the receive fifo via SPI
  * @ks: The device state.
  * @buff: The buffer address
  * @len: The length of the data to read
@@ -437,7 +517,8 @@ static void ks8851_init_mac(struct ks8851_net *ks, struct device_node *np)
  * Issue an RXQ FIFO read command and read the @len amount of data from
  * the FIFO into the buffer specified by @buff.
  */
-static void ks8851_rdfifo(struct ks8851_net *ks, u8 *buff, unsigned len)
+static void ks8851_rdfifo_spi(struct ks8851_net *ks, u8 *buff,
+			      unsigned int len)
 {
 	struct ks8851_net_spi *kss = to_ks8851_spi(ks);
 	struct spi_transfer *xfer = kss->spi_xfer2;
@@ -482,14 +563,25 @@ static void ks8851_dbg_dumpkkt(struct ks8851_net *ks, u8 *rxpkt)
 }
 
 /**
- * ks8851_rx_skb - receive skbuff
+ * ks8851_rx_skb_spi - receive skbuff for SPI
+ * @ks: The device state
  * @skb: The skbuff
  */
-static void ks8851_rx_skb(struct sk_buff *skb)
+static void ks8851_rx_skb_spi(struct ks8851_net *ks, struct sk_buff *skb)
 {
 	netif_rx_ni(skb);
 }
 
+/**
+ * ks8851_rx_skb - receive skbuff
+ * @ks: The device state
+ * @skb: The skbuff
+ */
+static void ks8851_rx_skb(struct ks8851_net *ks, struct sk_buff *skb)
+{
+	ks->rx_skb(ks, skb);
+}
+
 /**
  * ks8851_rx_pkts - receive packets from the host
  * @ks: The device information.
@@ -552,13 +644,13 @@ static void ks8851_rx_pkts(struct ks8851_net *ks)
 
 				rxpkt = skb_put(skb, rxlen) - 8;
 
-				ks8851_rdfifo(ks, rxpkt, rxalign + 8);
+				ks->rdfifo(ks, rxpkt, rxalign + 8);
 
 				if (netif_msg_pktdata(ks))
 					ks8851_dbg_dumpkkt(ks, rxpkt);
 
 				skb->protocol = eth_type_trans(skb, ks->netdev);
-				ks8851_rx_skb(skb);
+				ks8851_rx_skb(ks, skb);
 
 				ks->netdev->stats.rx_packets++;
 				ks->netdev->stats.rx_bytes += rxlen;
@@ -682,7 +774,7 @@ static inline unsigned calc_txlen(unsigned len)
 }
 
 /**
- * ks8851_wrpkt - write packet to TX FIFO
+ * ks8851_wrpkt_spi - write packet to TX FIFO via SPI
  * @ks: The device state.
  * @txp: The sk_buff to transmit.
  * @irq: IRQ on completion of the packet.
@@ -692,7 +784,8 @@ static inline unsigned calc_txlen(unsigned len)
  * needs, such as IRQ on completion. Send the header and the packet data to
  * the device.
  */
-static void ks8851_wrpkt(struct ks8851_net *ks, struct sk_buff *txp, bool irq)
+static void ks8851_wrpkt_spi(struct ks8851_net *ks, struct sk_buff *txp,
+			     bool irq)
 {
 	struct ks8851_net_spi *kss = to_ks8851_spi(ks);
 	struct spi_transfer *xfer = kss->spi_xfer2;
@@ -770,7 +863,7 @@ static void ks8851_tx_work(struct work_struct *work)
 
 		if (txb != NULL) {
 			ks8851_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_SDA);
-			ks8851_wrpkt(ks, txb, last);
+			ks->wrfifo(ks, txb, last);
 			ks8851_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr);
 			ks8851_wrreg16(ks, KS_TXQCR, TXQCR_METFE);
 
@@ -782,16 +875,26 @@ static void ks8851_tx_work(struct work_struct *work)
 }
 
 /**
- * ks8851_flush_tx_work - flush outstanding TX work
+ * ks8851_flush_tx_work_spi - flush outstanding TX work for SPI
  * @ks: The device state
  */
-static void ks8851_flush_tx_work(struct ks8851_net *ks)
+static void ks8851_flush_tx_work_spi(struct ks8851_net *ks)
 {
 	struct ks8851_net_spi *kss = to_ks8851_spi(ks);
 
 	flush_work(&kss->tx_work);
 }
 
+/**
+ * ks8851_flush_tx_work - flush outstanding TX work
+ * @ks: The device state
+ */
+static void ks8851_flush_tx_work(struct ks8851_net *ks)
+{
+	if (ks->flush_tx_work)
+		ks->flush_tx_work(ks);
+}
+
 /**
  * ks8851_net_open - open network device
  * @dev: The network device being opened.
@@ -925,7 +1028,7 @@ static int ks8851_net_stop(struct net_device *dev)
 }
 
 /**
- * ks8851_start_xmit - transmit packet
+ * ks8851_start_xmit_spi - transmit packet using SPI
  * @skb: The buffer to transmit
  * @dev: The device used to transmit the packet.
  *
@@ -937,8 +1040,8 @@ static int ks8851_net_stop(struct net_device *dev)
  * and secondly so we can round up more than one packet to transmit which
  * means we can try and avoid generating too many transmit done interrupts.
  */
-static netdev_tx_t ks8851_start_xmit(struct sk_buff *skb,
-				     struct net_device *dev)
+static netdev_tx_t ks8851_start_xmit_spi(struct sk_buff *skb,
+					 struct net_device *dev)
 {
 	struct ks8851_net *ks = netdev_priv(dev);
 	unsigned needed = calc_txlen(skb->len);
@@ -966,6 +1069,27 @@ static netdev_tx_t ks8851_start_xmit(struct sk_buff *skb,
 	return ret;
 }
 
+/**
+ * ks8851_start_xmit - transmit packet
+ * @skb: The buffer to transmit
+ * @dev: The device used to transmit the packet.
+ *
+ * Called by the network layer to transmit the @skb. Queue the packet for
+ * the device and schedule the necessary work to transmit the packet when
+ * it is free.
+ *
+ * We do this to firstly avoid sleeping with the network device locked,
+ * and secondly so we can round up more than one packet to transmit which
+ * means we can try and avoid generating too many transmit done interrupts.
+ */
+static netdev_tx_t ks8851_start_xmit(struct sk_buff *skb,
+				     struct net_device *dev)
+{
+	struct ks8851_net *ks = netdev_priv(dev);
+
+	return ks->start_xmit(skb, dev);
+}
+
 /**
  * ks8851_rxctrl_work - work handler to change rx mode
  * @work: The work structure this belongs to.
@@ -1591,6 +1715,16 @@ static int ks8851_probe(struct spi_device *spi)
 
 	ks = netdev_priv(netdev);
 
+	ks->lock = ks8851_lock_spi;
+	ks->unlock = ks8851_unlock_spi;
+	ks->rdreg16 = ks8851_rdreg16_spi;
+	ks->wrreg16 = ks8851_wrreg16_spi;
+	ks->rdfifo = ks8851_rdfifo_spi;
+	ks->wrfifo = ks8851_wrpkt_spi;
+	ks->start_xmit = ks8851_start_xmit_spi;
+	ks->rx_skb = ks8851_rx_skb_spi;
+	ks->flush_tx_work = ks8851_flush_tx_work_spi;
+
 #define STD_IRQ (IRQ_LCI |	/* Link Change */	\
 		 IRQ_TXI |	/* TX done */		\
 		 IRQ_RXI |	/* RX done */		\
-- 
cgit v1.2.3-59-g8ed1b


From b07f987a8d773c5c7dd5e5714e0468729abf132c Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Fri, 29 May 2020 00:21:44 +0200
Subject: net: ks8851: Separate SPI operations into separate file

Pull all the SPI bus specific code into a separate file, so that it is
not mixed with the common code. Rename ks8851.c to ks8851_common.c. The
ks8851_common.c is linked with ks8851_spi.c now, so it can call the
accessors in the ks8851_spi.c without any pointer indirection.

Signed-off-by: Marek Vasut <marex@denx.de>
Cc: David S. Miller <davem@davemloft.net>
Cc: Lukas Wunner <lukas@wunner.de>
Cc: Petr Stetiar <ynezz@true.cz>
Cc: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/micrel/Makefile        |    1 +
 drivers/net/ethernet/micrel/ks8851.c        | 1783 ---------------------------
 drivers/net/ethernet/micrel/ks8851.h        |  149 +++
 drivers/net/ethernet/micrel/ks8851_common.c | 1193 ++++++++++++++++++
 drivers/net/ethernet/micrel/ks8851_spi.c    |  485 ++++++++
 5 files changed, 1828 insertions(+), 1783 deletions(-)
 delete mode 100644 drivers/net/ethernet/micrel/ks8851.c
 create mode 100644 drivers/net/ethernet/micrel/ks8851_common.c
 create mode 100644 drivers/net/ethernet/micrel/ks8851_spi.c

diff --git a/drivers/net/ethernet/micrel/Makefile b/drivers/net/ethernet/micrel/Makefile
index 6d8ac5527aef..c7a4725c2e95 100644
--- a/drivers/net/ethernet/micrel/Makefile
+++ b/drivers/net/ethernet/micrel/Makefile
@@ -5,5 +5,6 @@
 
 obj-$(CONFIG_KS8842) += ks8842.o
 obj-$(CONFIG_KS8851) += ks8851.o
+ks8851-objs = ks8851_common.o ks8851_spi.o
 obj-$(CONFIG_KS8851_MLL) += ks8851_mll.o
 obj-$(CONFIG_KSZ884X_PCI) += ksz884x.o
diff --git a/drivers/net/ethernet/micrel/ks8851.c b/drivers/net/ethernet/micrel/ks8851.c
deleted file mode 100644
index 1fa907d5dd5b..000000000000
--- a/drivers/net/ethernet/micrel/ks8851.c
+++ /dev/null
@@ -1,1783 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/* drivers/net/ethernet/micrel/ks8851.c
- *
- * Copyright 2009 Simtec Electronics
- *	http://www.simtec.co.uk/
- *	Ben Dooks <ben@simtec.co.uk>
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#define DEBUG
-
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/ethtool.h>
-#include <linux/cache.h>
-#include <linux/crc32.h>
-#include <linux/mii.h>
-#include <linux/eeprom_93cx6.h>
-#include <linux/regulator/consumer.h>
-
-#include <linux/spi/spi.h>
-#include <linux/gpio.h>
-#include <linux/of_gpio.h>
-#include <linux/of_net.h>
-
-#include "ks8851.h"
-
-/**
- * struct ks8851_rxctrl - KS8851 driver rx control
- * @mchash: Multicast hash-table data.
- * @rxcr1: KS_RXCR1 register setting
- * @rxcr2: KS_RXCR2 register setting
- *
- * Representation of the settings needs to control the receive filtering
- * such as the multicast hash-filter and the receive register settings. This
- * is used to make the job of working out if the receive settings change and
- * then issuing the new settings to the worker that will send the necessary
- * commands.
- */
-struct ks8851_rxctrl {
-	u16	mchash[4];
-	u16	rxcr1;
-	u16	rxcr2;
-};
-
-/**
- * union ks8851_tx_hdr - tx header data
- * @txb: The header as bytes
- * @txw: The header as 16bit, little-endian words
- *
- * A dual representation of the tx header data to allow
- * access to individual bytes, and to allow 16bit accesses
- * with 16bit alignment.
- */
-union ks8851_tx_hdr {
-	u8	txb[6];
-	__le16	txw[3];
-};
-
-/**
- * struct ks8851_net - KS8851 driver private data
- * @netdev: The network device we're bound to
- * @statelock: Lock on this structure for tx list.
- * @mii: The MII state information for the mii calls.
- * @rxctrl: RX settings for @rxctrl_work.
- * @rxctrl_work: Work queue for updating RX mode and multicast lists
- * @txq: Queue of packets for transmission.
- * @txh: Space for generating packet TX header in DMA-able data
- * @rxd: Space for receiving SPI data, in DMA-able space.
- * @txd: Space for transmitting SPI data, in DMA-able space.
- * @msg_enable: The message flags controlling driver output (see ethtool).
- * @fid: Incrementing frame id tag.
- * @rc_ier: Cached copy of KS_IER.
- * @rc_ccr: Cached copy of KS_CCR.
- * @rc_rxqcr: Cached copy of KS_RXQCR.
- * @eeprom: 93CX6 EEPROM state for accessing on-board EEPROM.
- * @vdd_reg:	Optional regulator supplying the chip
- * @vdd_io: Optional digital power supply for IO
- * @gpio: Optional reset_n gpio
- * @lock: Bus access lock callback
- * @unlock: Bus access unlock callback
- * @rdreg16: 16bit register read callback
- * @wrreg16: 16bit register write callback
- * @rdfifo: FIFO read callback
- * @wrfifo: FIFO write callback
- * @start_xmit: start_xmit() implementation callback
- * @rx_skb: rx_skb() implementation callback
- * @flush_tx_work: flush_tx_work() implementation callback
- *
- * The @statelock is used to protect information in the structure which may
- * need to be accessed via several sources, such as the network driver layer
- * or one of the work queues.
- *
- * We align the buffers we may use for rx/tx to ensure that if the SPI driver
- * wants to DMA map them, it will not have any problems with data the driver
- * modifies.
- */
-struct ks8851_net {
-	struct net_device	*netdev;
-	spinlock_t		statelock;
-
-	union ks8851_tx_hdr	txh ____cacheline_aligned;
-	u8			rxd[8];
-	u8			txd[8];
-
-	u32			msg_enable ____cacheline_aligned;
-	u16			tx_space;
-	u8			fid;
-
-	u16			rc_ier;
-	u16			rc_rxqcr;
-	u16			rc_ccr;
-
-	struct mii_if_info	mii;
-	struct ks8851_rxctrl	rxctrl;
-
-	struct work_struct	rxctrl_work;
-
-	struct sk_buff_head	txq;
-
-	struct eeprom_93cx6	eeprom;
-	struct regulator	*vdd_reg;
-	struct regulator	*vdd_io;
-	int			gpio;
-
-	void			(*lock)(struct ks8851_net *ks,
-					unsigned long *flags);
-	void			(*unlock)(struct ks8851_net *ks,
-					  unsigned long *flags);
-	unsigned int		(*rdreg16)(struct ks8851_net *ks,
-					   unsigned int reg);
-	void			(*wrreg16)(struct ks8851_net *ks,
-					   unsigned int reg, unsigned int val);
-	void			(*rdfifo)(struct ks8851_net *ks, u8 *buff,
-					  unsigned int len);
-	void			(*wrfifo)(struct ks8851_net *ks,
-					  struct sk_buff *txp, bool irq);
-	netdev_tx_t		(*start_xmit)(struct sk_buff *skb,
-					      struct net_device *dev);
-	void			(*rx_skb)(struct ks8851_net *ks,
-					  struct sk_buff *skb);
-	void			(*flush_tx_work)(struct ks8851_net *ks);
-};
-
-/**
- * struct ks8851_net_spi - KS8851 SPI driver private data
- * @ks8851: KS8851 driver common private data
- * @lock: Lock to ensure that the device is not accessed when busy.
- * @tx_work: Work queue for tx packets
- * @spidev: The spi device we're bound to.
- * @spi_msg1: pre-setup SPI transfer with one message, @spi_xfer1.
- * @spi_msg2: pre-setup SPI transfer with two messages, @spi_xfer2.
- *
- * The @lock ensures that the chip is protected when certain operations are
- * in progress. When the read or write packet transfer is in progress, most
- * of the chip registers are not ccessible until the transfer is finished and
- * the DMA has been de-asserted.
- */
-struct ks8851_net_spi {
-	struct ks8851_net	ks8851;
-	struct mutex		lock;
-	struct work_struct	tx_work;
-	struct spi_device	*spidev;
-	struct spi_message	spi_msg1;
-	struct spi_message	spi_msg2;
-	struct spi_transfer	spi_xfer1;
-	struct spi_transfer	spi_xfer2[2];
-};
-
-#define to_ks8851_spi(ks) container_of((ks), struct ks8851_net_spi, ks8851)
-
-static int msg_enable;
-
-/* SPI frame opcodes */
-#define KS_SPIOP_RD	(0x00)
-#define KS_SPIOP_WR	(0x40)
-#define KS_SPIOP_RXFIFO	(0x80)
-#define KS_SPIOP_TXFIFO	(0xC0)
-
-/* shift for byte-enable data */
-#define BYTE_EN(_x)	((_x) << 2)
-
-/* turn register number and byte-enable mask into data for start of packet */
-#define MK_OP(_byteen, _reg) (BYTE_EN(_byteen) | (_reg)  << (8+2) | (_reg) >> 6)
-
-/**
- * ks8851_lock_spi - register access lock for SPI
- * @ks: The chip state
- * @flags: Spinlock flags
- *
- * Claim chip register access lock
- */
-static void ks8851_lock_spi(struct ks8851_net *ks, unsigned long *flags)
-{
-	struct ks8851_net_spi *kss = to_ks8851_spi(ks);
-
-	mutex_lock(&kss->lock);
-}
-
-/**
- * ks8851_unlock_spi - register access unlock for SPI
- * @ks: The chip state
- * @flags: Spinlock flags
- *
- * Release chip register access lock
- */
-static void ks8851_unlock_spi(struct ks8851_net *ks, unsigned long *flags)
-{
-	struct ks8851_net_spi *kss = to_ks8851_spi(ks);
-
-	mutex_unlock(&kss->lock);
-}
-
-/**
- * ks8851_lock - register access lock
- * @ks: The chip state
- * @flags: Spinlock flags
- *
- * Claim chip register access lock
- */
-static void ks8851_lock(struct ks8851_net *ks, unsigned long *flags)
-{
-	ks->lock(ks, flags);
-}
-
-/**
- * ks8851_unlock - register access unlock
- * @ks: The chip state
- * @flags: Spinlock flags
- *
- * Release chip register access lock
- */
-static void ks8851_unlock(struct ks8851_net *ks, unsigned long *flags)
-{
-	ks->unlock(ks, flags);
-}
-
-/* SPI register read/write calls.
- *
- * All these calls issue SPI transactions to access the chip's registers. They
- * all require that the necessary lock is held to prevent accesses when the
- * chip is busy transferring packet data (RX/TX FIFO accesses).
- */
-
-/**
- * ks8851_wrreg16_spi - write 16bit register value to chip via SPI
- * @ks: The chip state
- * @reg: The register address
- * @val: The value to write
- *
- * Issue a write to put the value @val into the register specified in @reg.
- */
-static void ks8851_wrreg16_spi(struct ks8851_net *ks, unsigned int reg,
-			       unsigned int val)
-{
-	struct ks8851_net_spi *kss = to_ks8851_spi(ks);
-	struct spi_transfer *xfer = &kss->spi_xfer1;
-	struct spi_message *msg = &kss->spi_msg1;
-	__le16 txb[2];
-	int ret;
-
-	txb[0] = cpu_to_le16(MK_OP(reg & 2 ? 0xC : 0x03, reg) | KS_SPIOP_WR);
-	txb[1] = cpu_to_le16(val);
-
-	xfer->tx_buf = txb;
-	xfer->rx_buf = NULL;
-	xfer->len = 4;
-
-	ret = spi_sync(kss->spidev, msg);
-	if (ret < 0)
-		netdev_err(ks->netdev, "spi_sync() failed\n");
-}
-
-/**
- * ks8851_wrreg16 - write 16bit register value to chip
- * @ks: The chip state
- * @reg: The register address
- * @val: The value to write
- *
- * Issue a write to put the value @val into the register specified in @reg.
- */
-static void ks8851_wrreg16(struct ks8851_net *ks, unsigned int reg,
-			   unsigned int val)
-{
-	ks->wrreg16(ks, reg, val);
-}
-
-/**
- * ks8851_rdreg - issue read register command and return the data
- * @ks: The device state
- * @op: The register address and byte enables in message format.
- * @rxb: The RX buffer to return the result into
- * @rxl: The length of data expected.
- *
- * This is the low level read call that issues the necessary spi message(s)
- * to read data from the register specified in @op.
- */
-static void ks8851_rdreg(struct ks8851_net *ks, unsigned op,
-			 u8 *rxb, unsigned rxl)
-{
-	struct ks8851_net_spi *kss = to_ks8851_spi(ks);
-	struct spi_transfer *xfer;
-	struct spi_message *msg;
-	__le16 *txb = (__le16 *)ks->txd;
-	u8 *trx = ks->rxd;
-	int ret;
-
-	txb[0] = cpu_to_le16(op | KS_SPIOP_RD);
-
-	if (kss->spidev->master->flags & SPI_MASTER_HALF_DUPLEX) {
-		msg = &kss->spi_msg2;
-		xfer = kss->spi_xfer2;
-
-		xfer->tx_buf = txb;
-		xfer->rx_buf = NULL;
-		xfer->len = 2;
-
-		xfer++;
-		xfer->tx_buf = NULL;
-		xfer->rx_buf = trx;
-		xfer->len = rxl;
-	} else {
-		msg = &kss->spi_msg1;
-		xfer = &kss->spi_xfer1;
-
-		xfer->tx_buf = txb;
-		xfer->rx_buf = trx;
-		xfer->len = rxl + 2;
-	}
-
-	ret = spi_sync(kss->spidev, msg);
-	if (ret < 0)
-		netdev_err(ks->netdev, "read: spi_sync() failed\n");
-	else if (kss->spidev->master->flags & SPI_MASTER_HALF_DUPLEX)
-		memcpy(rxb, trx, rxl);
-	else
-		memcpy(rxb, trx + 2, rxl);
-}
-
-/**
- * ks8851_rdreg16_spi - read 16 bit register from device via SPI
- * @ks: The chip information
- * @reg: The register address
- *
- * Read a 16bit register from the chip, returning the result
-*/
-static unsigned int ks8851_rdreg16_spi(struct ks8851_net *ks,
-				       unsigned int reg)
-{
-	__le16 rx = 0;
-
-	ks8851_rdreg(ks, MK_OP(reg & 2 ? 0xC : 0x3, reg), (u8 *)&rx, 2);
-	return le16_to_cpu(rx);
-}
-
-/**
- * ks8851_rdreg16 - read 16 bit register from device
- * @ks: The chip information
- * @reg: The register address
- *
- * Read a 16bit register from the chip, returning the result
- */
-static unsigned int ks8851_rdreg16(struct ks8851_net *ks,
-				   unsigned int reg)
-{
-	return ks->rdreg16(ks, reg);
-}
-
-/**
- * ks8851_soft_reset - issue one of the soft reset to the device
- * @ks: The device state.
- * @op: The bit(s) to set in the GRR
- *
- * Issue the relevant soft-reset command to the device's GRR register
- * specified by @op.
- *
- * Note, the delays are in there as a caution to ensure that the reset
- * has time to take effect and then complete. Since the datasheet does
- * not currently specify the exact sequence, we have chosen something
- * that seems to work with our device.
- */
-static void ks8851_soft_reset(struct ks8851_net *ks, unsigned op)
-{
-	ks8851_wrreg16(ks, KS_GRR, op);
-	mdelay(1);	/* wait a short time to effect reset */
-	ks8851_wrreg16(ks, KS_GRR, 0);
-	mdelay(1);	/* wait for condition to clear */
-}
-
-/**
- * ks8851_set_powermode - set power mode of the device
- * @ks: The device state
- * @pwrmode: The power mode value to write to KS_PMECR.
- *
- * Change the power mode of the chip.
- */
-static void ks8851_set_powermode(struct ks8851_net *ks, unsigned pwrmode)
-{
-	unsigned pmecr;
-
-	netif_dbg(ks, hw, ks->netdev, "setting power mode %d\n", pwrmode);
-
-	pmecr = ks8851_rdreg16(ks, KS_PMECR);
-	pmecr &= ~PMECR_PM_MASK;
-	pmecr |= pwrmode;
-
-	ks8851_wrreg16(ks, KS_PMECR, pmecr);
-}
-
-/**
- * ks8851_write_mac_addr - write mac address to device registers
- * @dev: The network device
- *
- * Update the KS8851 MAC address registers from the address in @dev.
- *
- * This call assumes that the chip is not running, so there is no need to
- * shutdown the RXQ process whilst setting this.
-*/
-static int ks8851_write_mac_addr(struct net_device *dev)
-{
-	struct ks8851_net *ks = netdev_priv(dev);
-	unsigned long flags;
-	u16 val;
-	int i;
-
-	ks8851_lock(ks, &flags);
-
-	/*
-	 * Wake up chip in case it was powered off when stopped; otherwise,
-	 * the first write to the MAC address does not take effect.
-	 */
-	ks8851_set_powermode(ks, PMECR_PM_NORMAL);
-
-	for (i = 0; i < ETH_ALEN; i += 2) {
-		val = (dev->dev_addr[i] << 8) | dev->dev_addr[i + 1];
-		ks8851_wrreg16(ks, KS_MAR(i), val);
-	}
-
-	if (!netif_running(dev))
-		ks8851_set_powermode(ks, PMECR_PM_SOFTDOWN);
-
-	ks8851_unlock(ks, &flags);
-
-	return 0;
-}
-
-/**
- * ks8851_read_mac_addr - read mac address from device registers
- * @dev: The network device
- *
- * Update our copy of the KS8851 MAC address from the registers of @dev.
-*/
-static void ks8851_read_mac_addr(struct net_device *dev)
-{
-	struct ks8851_net *ks = netdev_priv(dev);
-	unsigned long flags;
-	u16 reg;
-	int i;
-
-	ks8851_lock(ks, &flags);
-
-	for (i = 0; i < ETH_ALEN; i += 2) {
-		reg = ks8851_rdreg16(ks, KS_MAR(i));
-		dev->dev_addr[i] = reg >> 8;
-		dev->dev_addr[i + 1] = reg & 0xff;
-	}
-
-	ks8851_unlock(ks, &flags);
-}
-
-/**
- * ks8851_init_mac - initialise the mac address
- * @ks: The device structure
- * @np: The device node pointer
- *
- * Get or create the initial mac address for the device and then set that
- * into the station address register. A mac address supplied in the device
- * tree takes precedence. Otherwise, if there is an EEPROM present, then
- * we try that. If no valid mac address is found we use eth_random_addr()
- * to create a new one.
- */
-static void ks8851_init_mac(struct ks8851_net *ks, struct device_node *np)
-{
-	struct net_device *dev = ks->netdev;
-	const u8 *mac_addr;
-
-	mac_addr = of_get_mac_address(np);
-	if (!IS_ERR(mac_addr)) {
-		ether_addr_copy(dev->dev_addr, mac_addr);
-		ks8851_write_mac_addr(dev);
-		return;
-	}
-
-	if (ks->rc_ccr & CCR_EEPROM) {
-		ks8851_read_mac_addr(dev);
-		if (is_valid_ether_addr(dev->dev_addr))
-			return;
-
-		netdev_err(ks->netdev, "invalid mac address read %pM\n",
-				dev->dev_addr);
-	}
-
-	eth_hw_addr_random(dev);
-	ks8851_write_mac_addr(dev);
-}
-
-/**
- * ks8851_rdfifo_spi - read data from the receive fifo via SPI
- * @ks: The device state.
- * @buff: The buffer address
- * @len: The length of the data to read
- *
- * Issue an RXQ FIFO read command and read the @len amount of data from
- * the FIFO into the buffer specified by @buff.
- */
-static void ks8851_rdfifo_spi(struct ks8851_net *ks, u8 *buff,
-			      unsigned int len)
-{
-	struct ks8851_net_spi *kss = to_ks8851_spi(ks);
-	struct spi_transfer *xfer = kss->spi_xfer2;
-	struct spi_message *msg = &kss->spi_msg2;
-	u8 txb[1];
-	int ret;
-
-	netif_dbg(ks, rx_status, ks->netdev,
-		  "%s: %d@%p\n", __func__, len, buff);
-
-	/* set the operation we're issuing */
-	txb[0] = KS_SPIOP_RXFIFO;
-
-	xfer->tx_buf = txb;
-	xfer->rx_buf = NULL;
-	xfer->len = 1;
-
-	xfer++;
-	xfer->rx_buf = buff;
-	xfer->tx_buf = NULL;
-	xfer->len = len;
-
-	ret = spi_sync(kss->spidev, msg);
-	if (ret < 0)
-		netdev_err(ks->netdev, "%s: spi_sync() failed\n", __func__);
-}
-
-/**
- * ks8851_dbg_dumpkkt - dump initial packet contents to debug
- * @ks: The device state
- * @rxpkt: The data for the received packet
- *
- * Dump the initial data from the packet to dev_dbg().
-*/
-static void ks8851_dbg_dumpkkt(struct ks8851_net *ks, u8 *rxpkt)
-{
-	netdev_dbg(ks->netdev,
-		   "pkt %02x%02x%02x%02x %02x%02x%02x%02x %02x%02x%02x%02x\n",
-		   rxpkt[4], rxpkt[5], rxpkt[6], rxpkt[7],
-		   rxpkt[8], rxpkt[9], rxpkt[10], rxpkt[11],
-		   rxpkt[12], rxpkt[13], rxpkt[14], rxpkt[15]);
-}
-
-/**
- * ks8851_rx_skb_spi - receive skbuff for SPI
- * @ks: The device state
- * @skb: The skbuff
- */
-static void ks8851_rx_skb_spi(struct ks8851_net *ks, struct sk_buff *skb)
-{
-	netif_rx_ni(skb);
-}
-
-/**
- * ks8851_rx_skb - receive skbuff
- * @ks: The device state
- * @skb: The skbuff
- */
-static void ks8851_rx_skb(struct ks8851_net *ks, struct sk_buff *skb)
-{
-	ks->rx_skb(ks, skb);
-}
-
-/**
- * ks8851_rx_pkts - receive packets from the host
- * @ks: The device information.
- *
- * This is called from the IRQ work queue when the system detects that there
- * are packets in the receive queue. Find out how many packets there are and
- * read them from the FIFO.
- */
-static void ks8851_rx_pkts(struct ks8851_net *ks)
-{
-	struct sk_buff *skb;
-	unsigned rxfc;
-	unsigned rxlen;
-	unsigned rxstat;
-	u8 *rxpkt;
-
-	rxfc = (ks8851_rdreg16(ks, KS_RXFCTR) >> 8) & 0xff;
-
-	netif_dbg(ks, rx_status, ks->netdev,
-		  "%s: %d packets\n", __func__, rxfc);
-
-	/* Currently we're issuing a read per packet, but we could possibly
-	 * improve the code by issuing a single read, getting the receive
-	 * header, allocating the packet and then reading the packet data
-	 * out in one go.
-	 *
-	 * This form of operation would require us to hold the SPI bus'
-	 * chipselect low during the entie transaction to avoid any
-	 * reset to the data stream coming from the chip.
-	 */
-
-	for (; rxfc != 0; rxfc--) {
-		rxstat = ks8851_rdreg16(ks, KS_RXFHSR);
-		rxlen = ks8851_rdreg16(ks, KS_RXFHBCR) & RXFHBCR_CNT_MASK;
-
-		netif_dbg(ks, rx_status, ks->netdev,
-			  "rx: stat 0x%04x, len 0x%04x\n", rxstat, rxlen);
-
-		/* the length of the packet includes the 32bit CRC */
-
-		/* set dma read address */
-		ks8851_wrreg16(ks, KS_RXFDPR, RXFDPR_RXFPAI | 0x00);
-
-		/* start DMA access */
-		ks8851_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_SDA);
-
-		if (rxlen > 4) {
-			unsigned int rxalign;
-
-			rxlen -= 4;
-			rxalign = ALIGN(rxlen, 4);
-			skb = netdev_alloc_skb_ip_align(ks->netdev, rxalign);
-			if (skb) {
-
-				/* 4 bytes of status header + 4 bytes of
-				 * garbage: we put them before ethernet
-				 * header, so that they are copied,
-				 * but ignored.
-				 */
-
-				rxpkt = skb_put(skb, rxlen) - 8;
-
-				ks->rdfifo(ks, rxpkt, rxalign + 8);
-
-				if (netif_msg_pktdata(ks))
-					ks8851_dbg_dumpkkt(ks, rxpkt);
-
-				skb->protocol = eth_type_trans(skb, ks->netdev);
-				ks8851_rx_skb(ks, skb);
-
-				ks->netdev->stats.rx_packets++;
-				ks->netdev->stats.rx_bytes += rxlen;
-			}
-		}
-
-		/* end DMA access and dequeue packet */
-		ks8851_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_RRXEF);
-	}
-}
-
-/**
- * ks8851_irq - IRQ handler for dealing with interrupt requests
- * @irq: IRQ number
- * @_ks: cookie
- *
- * This handler is invoked when the IRQ line asserts to find out what happened.
- * As we cannot allow ourselves to sleep in HARDIRQ context, this handler runs
- * in thread context.
- *
- * Read the interrupt status, work out what needs to be done and then clear
- * any of the interrupts that are not needed.
- */
-static irqreturn_t ks8851_irq(int irq, void *_ks)
-{
-	struct ks8851_net *ks = _ks;
-	unsigned handled = 0;
-	unsigned long flags;
-	unsigned int status;
-
-	ks8851_lock(ks, &flags);
-
-	status = ks8851_rdreg16(ks, KS_ISR);
-
-	netif_dbg(ks, intr, ks->netdev,
-		  "%s: status 0x%04x\n", __func__, status);
-
-	if (status & IRQ_LCI)
-		handled |= IRQ_LCI;
-
-	if (status & IRQ_LDI) {
-		u16 pmecr = ks8851_rdreg16(ks, KS_PMECR);
-		pmecr &= ~PMECR_WKEVT_MASK;
-		ks8851_wrreg16(ks, KS_PMECR, pmecr | PMECR_WKEVT_LINK);
-
-		handled |= IRQ_LDI;
-	}
-
-	if (status & IRQ_RXPSI)
-		handled |= IRQ_RXPSI;
-
-	if (status & IRQ_TXI) {
-		handled |= IRQ_TXI;
-
-		/* no lock here, tx queue should have been stopped */
-
-		/* update our idea of how much tx space is available to the
-		 * system */
-		ks->tx_space = ks8851_rdreg16(ks, KS_TXMIR);
-
-		netif_dbg(ks, intr, ks->netdev,
-			  "%s: txspace %d\n", __func__, ks->tx_space);
-	}
-
-	if (status & IRQ_RXI)
-		handled |= IRQ_RXI;
-
-	if (status & IRQ_SPIBEI) {
-		netdev_err(ks->netdev, "%s: spi bus error\n", __func__);
-		handled |= IRQ_SPIBEI;
-	}
-
-	ks8851_wrreg16(ks, KS_ISR, handled);
-
-	if (status & IRQ_RXI) {
-		/* the datasheet says to disable the rx interrupt during
-		 * packet read-out, however we're masking the interrupt
-		 * from the device so do not bother masking just the RX
-		 * from the device. */
-
-		ks8851_rx_pkts(ks);
-	}
-
-	/* if something stopped the rx process, probably due to wanting
-	 * to change the rx settings, then do something about restarting
-	 * it. */
-	if (status & IRQ_RXPSI) {
-		struct ks8851_rxctrl *rxc = &ks->rxctrl;
-
-		/* update the multicast hash table */
-		ks8851_wrreg16(ks, KS_MAHTR0, rxc->mchash[0]);
-		ks8851_wrreg16(ks, KS_MAHTR1, rxc->mchash[1]);
-		ks8851_wrreg16(ks, KS_MAHTR2, rxc->mchash[2]);
-		ks8851_wrreg16(ks, KS_MAHTR3, rxc->mchash[3]);
-
-		ks8851_wrreg16(ks, KS_RXCR2, rxc->rxcr2);
-		ks8851_wrreg16(ks, KS_RXCR1, rxc->rxcr1);
-	}
-
-	ks8851_unlock(ks, &flags);
-
-	if (status & IRQ_LCI)
-		mii_check_link(&ks->mii);
-
-	if (status & IRQ_TXI)
-		netif_wake_queue(ks->netdev);
-
-	return IRQ_HANDLED;
-}
-
-/**
- * calc_txlen - calculate size of message to send packet
- * @len: Length of data
- *
- * Returns the size of the TXFIFO message needed to send
- * this packet.
- */
-static inline unsigned calc_txlen(unsigned len)
-{
-	return ALIGN(len + 4, 4);
-}
-
-/**
- * ks8851_wrpkt_spi - write packet to TX FIFO via SPI
- * @ks: The device state.
- * @txp: The sk_buff to transmit.
- * @irq: IRQ on completion of the packet.
- *
- * Send the @txp to the chip. This means creating the relevant packet header
- * specifying the length of the packet and the other information the chip
- * needs, such as IRQ on completion. Send the header and the packet data to
- * the device.
- */
-static void ks8851_wrpkt_spi(struct ks8851_net *ks, struct sk_buff *txp,
-			     bool irq)
-{
-	struct ks8851_net_spi *kss = to_ks8851_spi(ks);
-	struct spi_transfer *xfer = kss->spi_xfer2;
-	struct spi_message *msg = &kss->spi_msg2;
-	unsigned fid = 0;
-	int ret;
-
-	netif_dbg(ks, tx_queued, ks->netdev, "%s: skb %p, %d@%p, irq %d\n",
-		  __func__, txp, txp->len, txp->data, irq);
-
-	fid = ks->fid++;
-	fid &= TXFR_TXFID_MASK;
-
-	if (irq)
-		fid |= TXFR_TXIC;	/* irq on completion */
-
-	/* start header at txb[1] to align txw entries */
-	ks->txh.txb[1] = KS_SPIOP_TXFIFO;
-	ks->txh.txw[1] = cpu_to_le16(fid);
-	ks->txh.txw[2] = cpu_to_le16(txp->len);
-
-	xfer->tx_buf = &ks->txh.txb[1];
-	xfer->rx_buf = NULL;
-	xfer->len = 5;
-
-	xfer++;
-	xfer->tx_buf = txp->data;
-	xfer->rx_buf = NULL;
-	xfer->len = ALIGN(txp->len, 4);
-
-	ret = spi_sync(kss->spidev, msg);
-	if (ret < 0)
-		netdev_err(ks->netdev, "%s: spi_sync() failed\n", __func__);
-}
-
-/**
- * ks8851_done_tx - update and then free skbuff after transmitting
- * @ks: The device state
- * @txb: The buffer transmitted
- */
-static void ks8851_done_tx(struct ks8851_net *ks, struct sk_buff *txb)
-{
-	struct net_device *dev = ks->netdev;
-
-	dev->stats.tx_bytes += txb->len;
-	dev->stats.tx_packets++;
-
-	dev_kfree_skb(txb);
-}
-
-/**
- * ks8851_tx_work - process tx packet(s)
- * @work: The work strucutre what was scheduled.
- *
- * This is called when a number of packets have been scheduled for
- * transmission and need to be sent to the device.
- */
-static void ks8851_tx_work(struct work_struct *work)
-{
-	struct ks8851_net_spi *kss;
-	struct ks8851_net *ks;
-	unsigned long flags;
-	struct sk_buff *txb;
-	bool last;
-
-	kss = container_of(work, struct ks8851_net_spi, tx_work);
-	ks = &kss->ks8851;
-	last = skb_queue_empty(&ks->txq);
-
-	ks8851_lock(ks, &flags);
-
-	while (!last) {
-		txb = skb_dequeue(&ks->txq);
-		last = skb_queue_empty(&ks->txq);
-
-		if (txb != NULL) {
-			ks8851_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_SDA);
-			ks->wrfifo(ks, txb, last);
-			ks8851_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr);
-			ks8851_wrreg16(ks, KS_TXQCR, TXQCR_METFE);
-
-			ks8851_done_tx(ks, txb);
-		}
-	}
-
-	ks8851_unlock(ks, &flags);
-}
-
-/**
- * ks8851_flush_tx_work_spi - flush outstanding TX work for SPI
- * @ks: The device state
- */
-static void ks8851_flush_tx_work_spi(struct ks8851_net *ks)
-{
-	struct ks8851_net_spi *kss = to_ks8851_spi(ks);
-
-	flush_work(&kss->tx_work);
-}
-
-/**
- * ks8851_flush_tx_work - flush outstanding TX work
- * @ks: The device state
- */
-static void ks8851_flush_tx_work(struct ks8851_net *ks)
-{
-	if (ks->flush_tx_work)
-		ks->flush_tx_work(ks);
-}
-
-/**
- * ks8851_net_open - open network device
- * @dev: The network device being opened.
- *
- * Called when the network device is marked active, such as a user executing
- * 'ifconfig up' on the device.
- */
-static int ks8851_net_open(struct net_device *dev)
-{
-	struct ks8851_net *ks = netdev_priv(dev);
-	unsigned long flags;
-	int ret;
-
-	ret = request_threaded_irq(dev->irq, NULL, ks8851_irq,
-				   IRQF_TRIGGER_LOW | IRQF_ONESHOT,
-				   dev->name, ks);
-	if (ret < 0) {
-		netdev_err(dev, "failed to get irq\n");
-		return ret;
-	}
-
-	/* lock the card, even if we may not actually be doing anything
-	 * else at the moment */
-	ks8851_lock(ks, &flags);
-
-	netif_dbg(ks, ifup, ks->netdev, "opening\n");
-
-	/* bring chip out of any power saving mode it was in */
-	ks8851_set_powermode(ks, PMECR_PM_NORMAL);
-
-	/* issue a soft reset to the RX/TX QMU to put it into a known
-	 * state. */
-	ks8851_soft_reset(ks, GRR_QMU);
-
-	/* setup transmission parameters */
-
-	ks8851_wrreg16(ks, KS_TXCR, (TXCR_TXE | /* enable transmit process */
-				     TXCR_TXPE | /* pad to min length */
-				     TXCR_TXCRC | /* add CRC */
-				     TXCR_TXFCE)); /* enable flow control */
-
-	/* auto-increment tx data, reset tx pointer */
-	ks8851_wrreg16(ks, KS_TXFDPR, TXFDPR_TXFPAI);
-
-	/* setup receiver control */
-
-	ks8851_wrreg16(ks, KS_RXCR1, (RXCR1_RXPAFMA | /*  from mac filter */
-				      RXCR1_RXFCE | /* enable flow control */
-				      RXCR1_RXBE | /* broadcast enable */
-				      RXCR1_RXUE | /* unicast enable */
-				      RXCR1_RXE)); /* enable rx block */
-
-	/* transfer entire frames out in one go */
-	ks8851_wrreg16(ks, KS_RXCR2, RXCR2_SRDBL_FRAME);
-
-	/* set receive counter timeouts */
-	ks8851_wrreg16(ks, KS_RXDTTR, 1000); /* 1ms after first frame to IRQ */
-	ks8851_wrreg16(ks, KS_RXDBCTR, 4096); /* >4Kbytes in buffer to IRQ */
-	ks8851_wrreg16(ks, KS_RXFCTR, 10);  /* 10 frames to IRQ */
-
-	ks->rc_rxqcr = (RXQCR_RXFCTE |  /* IRQ on frame count exceeded */
-			RXQCR_RXDBCTE | /* IRQ on byte count exceeded */
-			RXQCR_RXDTTE);  /* IRQ on time exceeded */
-
-	ks8851_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr);
-
-	/* clear then enable interrupts */
-	ks8851_wrreg16(ks, KS_ISR, ks->rc_ier);
-	ks8851_wrreg16(ks, KS_IER, ks->rc_ier);
-
-	netif_start_queue(ks->netdev);
-
-	netif_dbg(ks, ifup, ks->netdev, "network device up\n");
-
-	ks8851_unlock(ks, &flags);
-	mii_check_link(&ks->mii);
-	return 0;
-}
-
-/**
- * ks8851_net_stop - close network device
- * @dev: The device being closed.
- *
- * Called to close down a network device which has been active. Cancell any
- * work, shutdown the RX and TX process and then place the chip into a low
- * power state whilst it is not being used.
- */
-static int ks8851_net_stop(struct net_device *dev)
-{
-	struct ks8851_net *ks = netdev_priv(dev);
-	unsigned long flags;
-
-	netif_info(ks, ifdown, dev, "shutting down\n");
-
-	netif_stop_queue(dev);
-
-	ks8851_lock(ks, &flags);
-	/* turn off the IRQs and ack any outstanding */
-	ks8851_wrreg16(ks, KS_IER, 0x0000);
-	ks8851_wrreg16(ks, KS_ISR, 0xffff);
-	ks8851_unlock(ks, &flags);
-
-	/* stop any outstanding work */
-	ks8851_flush_tx_work(ks);
-	flush_work(&ks->rxctrl_work);
-
-	ks8851_lock(ks, &flags);
-	/* shutdown RX process */
-	ks8851_wrreg16(ks, KS_RXCR1, 0x0000);
-
-	/* shutdown TX process */
-	ks8851_wrreg16(ks, KS_TXCR, 0x0000);
-
-	/* set powermode to soft power down to save power */
-	ks8851_set_powermode(ks, PMECR_PM_SOFTDOWN);
-	ks8851_unlock(ks, &flags);
-
-	/* ensure any queued tx buffers are dumped */
-	while (!skb_queue_empty(&ks->txq)) {
-		struct sk_buff *txb = skb_dequeue(&ks->txq);
-
-		netif_dbg(ks, ifdown, ks->netdev,
-			  "%s: freeing txb %p\n", __func__, txb);
-
-		dev_kfree_skb(txb);
-	}
-
-	free_irq(dev->irq, ks);
-
-	return 0;
-}
-
-/**
- * ks8851_start_xmit_spi - transmit packet using SPI
- * @skb: The buffer to transmit
- * @dev: The device used to transmit the packet.
- *
- * Called by the network layer to transmit the @skb. Queue the packet for
- * the device and schedule the necessary work to transmit the packet when
- * it is free.
- *
- * We do this to firstly avoid sleeping with the network device locked,
- * and secondly so we can round up more than one packet to transmit which
- * means we can try and avoid generating too many transmit done interrupts.
- */
-static netdev_tx_t ks8851_start_xmit_spi(struct sk_buff *skb,
-					 struct net_device *dev)
-{
-	struct ks8851_net *ks = netdev_priv(dev);
-	unsigned needed = calc_txlen(skb->len);
-	netdev_tx_t ret = NETDEV_TX_OK;
-	struct ks8851_net_spi *kss;
-
-	kss = to_ks8851_spi(ks);
-
-	netif_dbg(ks, tx_queued, ks->netdev,
-		  "%s: skb %p, %d@%p\n", __func__, skb, skb->len, skb->data);
-
-	spin_lock(&ks->statelock);
-
-	if (needed > ks->tx_space) {
-		netif_stop_queue(dev);
-		ret = NETDEV_TX_BUSY;
-	} else {
-		ks->tx_space -= needed;
-		skb_queue_tail(&ks->txq, skb);
-	}
-
-	spin_unlock(&ks->statelock);
-	schedule_work(&kss->tx_work);
-
-	return ret;
-}
-
-/**
- * ks8851_start_xmit - transmit packet
- * @skb: The buffer to transmit
- * @dev: The device used to transmit the packet.
- *
- * Called by the network layer to transmit the @skb. Queue the packet for
- * the device and schedule the necessary work to transmit the packet when
- * it is free.
- *
- * We do this to firstly avoid sleeping with the network device locked,
- * and secondly so we can round up more than one packet to transmit which
- * means we can try and avoid generating too many transmit done interrupts.
- */
-static netdev_tx_t ks8851_start_xmit(struct sk_buff *skb,
-				     struct net_device *dev)
-{
-	struct ks8851_net *ks = netdev_priv(dev);
-
-	return ks->start_xmit(skb, dev);
-}
-
-/**
- * ks8851_rxctrl_work - work handler to change rx mode
- * @work: The work structure this belongs to.
- *
- * Lock the device and issue the necessary changes to the receive mode from
- * the network device layer. This is done so that we can do this without
- * having to sleep whilst holding the network device lock.
- *
- * Since the recommendation from Micrel is that the RXQ is shutdown whilst the
- * receive parameters are programmed, we issue a write to disable the RXQ and
- * then wait for the interrupt handler to be triggered once the RXQ shutdown is
- * complete. The interrupt handler then writes the new values into the chip.
- */
-static void ks8851_rxctrl_work(struct work_struct *work)
-{
-	struct ks8851_net *ks = container_of(work, struct ks8851_net, rxctrl_work);
-	unsigned long flags;
-
-	ks8851_lock(ks, &flags);
-
-	/* need to shutdown RXQ before modifying filter parameters */
-	ks8851_wrreg16(ks, KS_RXCR1, 0x00);
-
-	ks8851_unlock(ks, &flags);
-}
-
-static void ks8851_set_rx_mode(struct net_device *dev)
-{
-	struct ks8851_net *ks = netdev_priv(dev);
-	struct ks8851_rxctrl rxctrl;
-
-	memset(&rxctrl, 0, sizeof(rxctrl));
-
-	if (dev->flags & IFF_PROMISC) {
-		/* interface to receive everything */
-
-		rxctrl.rxcr1 = RXCR1_RXAE | RXCR1_RXINVF;
-	} else if (dev->flags & IFF_ALLMULTI) {
-		/* accept all multicast packets */
-
-		rxctrl.rxcr1 = (RXCR1_RXME | RXCR1_RXAE |
-				RXCR1_RXPAFMA | RXCR1_RXMAFMA);
-	} else if (dev->flags & IFF_MULTICAST && !netdev_mc_empty(dev)) {
-		struct netdev_hw_addr *ha;
-		u32 crc;
-
-		/* accept some multicast */
-
-		netdev_for_each_mc_addr(ha, dev) {
-			crc = ether_crc(ETH_ALEN, ha->addr);
-			crc >>= (32 - 6);  /* get top six bits */
-
-			rxctrl.mchash[crc >> 4] |= (1 << (crc & 0xf));
-		}
-
-		rxctrl.rxcr1 = RXCR1_RXME | RXCR1_RXPAFMA;
-	} else {
-		/* just accept broadcast / unicast */
-		rxctrl.rxcr1 = RXCR1_RXPAFMA;
-	}
-
-	rxctrl.rxcr1 |= (RXCR1_RXUE | /* unicast enable */
-			 RXCR1_RXBE | /* broadcast enable */
-			 RXCR1_RXE | /* RX process enable */
-			 RXCR1_RXFCE); /* enable flow control */
-
-	rxctrl.rxcr2 |= RXCR2_SRDBL_FRAME;
-
-	/* schedule work to do the actual set of the data if needed */
-
-	spin_lock(&ks->statelock);
-
-	if (memcmp(&rxctrl, &ks->rxctrl, sizeof(rxctrl)) != 0) {
-		memcpy(&ks->rxctrl, &rxctrl, sizeof(ks->rxctrl));
-		schedule_work(&ks->rxctrl_work);
-	}
-
-	spin_unlock(&ks->statelock);
-}
-
-static int ks8851_set_mac_address(struct net_device *dev, void *addr)
-{
-	struct sockaddr *sa = addr;
-
-	if (netif_running(dev))
-		return -EBUSY;
-
-	if (!is_valid_ether_addr(sa->sa_data))
-		return -EADDRNOTAVAIL;
-
-	memcpy(dev->dev_addr, sa->sa_data, ETH_ALEN);
-	return ks8851_write_mac_addr(dev);
-}
-
-static int ks8851_net_ioctl(struct net_device *dev, struct ifreq *req, int cmd)
-{
-	struct ks8851_net *ks = netdev_priv(dev);
-
-	if (!netif_running(dev))
-		return -EINVAL;
-
-	return generic_mii_ioctl(&ks->mii, if_mii(req), cmd, NULL);
-}
-
-static const struct net_device_ops ks8851_netdev_ops = {
-	.ndo_open		= ks8851_net_open,
-	.ndo_stop		= ks8851_net_stop,
-	.ndo_do_ioctl		= ks8851_net_ioctl,
-	.ndo_start_xmit		= ks8851_start_xmit,
-	.ndo_set_mac_address	= ks8851_set_mac_address,
-	.ndo_set_rx_mode	= ks8851_set_rx_mode,
-	.ndo_validate_addr	= eth_validate_addr,
-};
-
-/* ethtool support */
-
-static void ks8851_get_drvinfo(struct net_device *dev,
-			       struct ethtool_drvinfo *di)
-{
-	strlcpy(di->driver, "KS8851", sizeof(di->driver));
-	strlcpy(di->version, "1.00", sizeof(di->version));
-	strlcpy(di->bus_info, dev_name(dev->dev.parent), sizeof(di->bus_info));
-}
-
-static u32 ks8851_get_msglevel(struct net_device *dev)
-{
-	struct ks8851_net *ks = netdev_priv(dev);
-	return ks->msg_enable;
-}
-
-static void ks8851_set_msglevel(struct net_device *dev, u32 to)
-{
-	struct ks8851_net *ks = netdev_priv(dev);
-	ks->msg_enable = to;
-}
-
-static int ks8851_get_link_ksettings(struct net_device *dev,
-				     struct ethtool_link_ksettings *cmd)
-{
-	struct ks8851_net *ks = netdev_priv(dev);
-
-	mii_ethtool_get_link_ksettings(&ks->mii, cmd);
-
-	return 0;
-}
-
-static int ks8851_set_link_ksettings(struct net_device *dev,
-				     const struct ethtool_link_ksettings *cmd)
-{
-	struct ks8851_net *ks = netdev_priv(dev);
-	return mii_ethtool_set_link_ksettings(&ks->mii, cmd);
-}
-
-static u32 ks8851_get_link(struct net_device *dev)
-{
-	struct ks8851_net *ks = netdev_priv(dev);
-	return mii_link_ok(&ks->mii);
-}
-
-static int ks8851_nway_reset(struct net_device *dev)
-{
-	struct ks8851_net *ks = netdev_priv(dev);
-	return mii_nway_restart(&ks->mii);
-}
-
-/* EEPROM support */
-
-static void ks8851_eeprom_regread(struct eeprom_93cx6 *ee)
-{
-	struct ks8851_net *ks = ee->data;
-	unsigned val;
-
-	val = ks8851_rdreg16(ks, KS_EEPCR);
-
-	ee->reg_data_out = (val & EEPCR_EESB) ? 1 : 0;
-	ee->reg_data_clock = (val & EEPCR_EESCK) ? 1 : 0;
-	ee->reg_chip_select = (val & EEPCR_EECS) ? 1 : 0;
-}
-
-static void ks8851_eeprom_regwrite(struct eeprom_93cx6 *ee)
-{
-	struct ks8851_net *ks = ee->data;
-	unsigned val = EEPCR_EESA;	/* default - eeprom access on */
-
-	if (ee->drive_data)
-		val |= EEPCR_EESRWA;
-	if (ee->reg_data_in)
-		val |= EEPCR_EEDO;
-	if (ee->reg_data_clock)
-		val |= EEPCR_EESCK;
-	if (ee->reg_chip_select)
-		val |= EEPCR_EECS;
-
-	ks8851_wrreg16(ks, KS_EEPCR, val);
-}
-
-/**
- * ks8851_eeprom_claim - claim device EEPROM and activate the interface
- * @ks: The network device state.
- *
- * Check for the presence of an EEPROM, and then activate software access
- * to the device.
- */
-static int ks8851_eeprom_claim(struct ks8851_net *ks)
-{
-	/* start with clock low, cs high */
-	ks8851_wrreg16(ks, KS_EEPCR, EEPCR_EESA | EEPCR_EECS);
-	return 0;
-}
-
-/**
- * ks8851_eeprom_release - release the EEPROM interface
- * @ks: The device state
- *
- * Release the software access to the device EEPROM
- */
-static void ks8851_eeprom_release(struct ks8851_net *ks)
-{
-	unsigned val = ks8851_rdreg16(ks, KS_EEPCR);
-
-	ks8851_wrreg16(ks, KS_EEPCR, val & ~EEPCR_EESA);
-}
-
-#define KS_EEPROM_MAGIC (0x00008851)
-
-static int ks8851_set_eeprom(struct net_device *dev,
-			     struct ethtool_eeprom *ee, u8 *data)
-{
-	struct ks8851_net *ks = netdev_priv(dev);
-	int offset = ee->offset;
-	unsigned long flags;
-	int len = ee->len;
-	u16 tmp;
-
-	/* currently only support byte writing */
-	if (len != 1)
-		return -EINVAL;
-
-	if (ee->magic != KS_EEPROM_MAGIC)
-		return -EINVAL;
-
-	if (!(ks->rc_ccr & CCR_EEPROM))
-		return -ENOENT;
-
-	ks8851_lock(ks, &flags);
-
-	ks8851_eeprom_claim(ks);
-
-	eeprom_93cx6_wren(&ks->eeprom, true);
-
-	/* ethtool currently only supports writing bytes, which means
-	 * we have to read/modify/write our 16bit EEPROMs */
-
-	eeprom_93cx6_read(&ks->eeprom, offset/2, &tmp);
-
-	if (offset & 1) {
-		tmp &= 0xff;
-		tmp |= *data << 8;
-	} else {
-		tmp &= 0xff00;
-		tmp |= *data;
-	}
-
-	eeprom_93cx6_write(&ks->eeprom, offset/2, tmp);
-	eeprom_93cx6_wren(&ks->eeprom, false);
-
-	ks8851_eeprom_release(ks);
-	ks8851_unlock(ks, &flags);
-
-	return 0;
-}
-
-static int ks8851_get_eeprom(struct net_device *dev,
-			     struct ethtool_eeprom *ee, u8 *data)
-{
-	struct ks8851_net *ks = netdev_priv(dev);
-	int offset = ee->offset;
-	unsigned long flags;
-	int len = ee->len;
-
-	/* must be 2 byte aligned */
-	if (len & 1 || offset & 1)
-		return -EINVAL;
-
-	if (!(ks->rc_ccr & CCR_EEPROM))
-		return -ENOENT;
-
-	ks8851_lock(ks, &flags);
-
-	ks8851_eeprom_claim(ks);
-
-	ee->magic = KS_EEPROM_MAGIC;
-
-	eeprom_93cx6_multiread(&ks->eeprom, offset/2, (__le16 *)data, len/2);
-	ks8851_eeprom_release(ks);
-	ks8851_unlock(ks, &flags);
-
-	return 0;
-}
-
-static int ks8851_get_eeprom_len(struct net_device *dev)
-{
-	struct ks8851_net *ks = netdev_priv(dev);
-
-	/* currently, we assume it is an 93C46 attached, so return 128 */
-	return ks->rc_ccr & CCR_EEPROM ? 128 : 0;
-}
-
-static const struct ethtool_ops ks8851_ethtool_ops = {
-	.get_drvinfo	= ks8851_get_drvinfo,
-	.get_msglevel	= ks8851_get_msglevel,
-	.set_msglevel	= ks8851_set_msglevel,
-	.get_link	= ks8851_get_link,
-	.nway_reset	= ks8851_nway_reset,
-	.get_eeprom_len	= ks8851_get_eeprom_len,
-	.get_eeprom	= ks8851_get_eeprom,
-	.set_eeprom	= ks8851_set_eeprom,
-	.get_link_ksettings = ks8851_get_link_ksettings,
-	.set_link_ksettings = ks8851_set_link_ksettings,
-};
-
-/* MII interface controls */
-
-/**
- * ks8851_phy_reg - convert MII register into a KS8851 register
- * @reg: MII register number.
- *
- * Return the KS8851 register number for the corresponding MII PHY register
- * if possible. Return zero if the MII register has no direct mapping to the
- * KS8851 register set.
- */
-static int ks8851_phy_reg(int reg)
-{
-	switch (reg) {
-	case MII_BMCR:
-		return KS_P1MBCR;
-	case MII_BMSR:
-		return KS_P1MBSR;
-	case MII_PHYSID1:
-		return KS_PHY1ILR;
-	case MII_PHYSID2:
-		return KS_PHY1IHR;
-	case MII_ADVERTISE:
-		return KS_P1ANAR;
-	case MII_LPA:
-		return KS_P1ANLPR;
-	}
-
-	return 0x0;
-}
-
-/**
- * ks8851_phy_read - MII interface PHY register read.
- * @dev: The network device the PHY is on.
- * @phy_addr: Address of PHY (ignored as we only have one)
- * @reg: The register to read.
- *
- * This call reads data from the PHY register specified in @reg. Since the
- * device does not support all the MII registers, the non-existent values
- * are always returned as zero.
- *
- * We return zero for unsupported registers as the MII code does not check
- * the value returned for any error status, and simply returns it to the
- * caller. The mii-tool that the driver was tested with takes any -ve error
- * as real PHY capabilities, thus displaying incorrect data to the user.
- */
-static int ks8851_phy_read(struct net_device *dev, int phy_addr, int reg)
-{
-	struct ks8851_net *ks = netdev_priv(dev);
-	unsigned long flags;
-	int ksreg;
-	int result;
-
-	ksreg = ks8851_phy_reg(reg);
-	if (!ksreg)
-		return 0x0;	/* no error return allowed, so use zero */
-
-	ks8851_lock(ks, &flags);
-	result = ks8851_rdreg16(ks, ksreg);
-	ks8851_unlock(ks, &flags);
-
-	return result;
-}
-
-static void ks8851_phy_write(struct net_device *dev,
-			     int phy, int reg, int value)
-{
-	struct ks8851_net *ks = netdev_priv(dev);
-	unsigned long flags;
-	int ksreg;
-
-	ksreg = ks8851_phy_reg(reg);
-	if (ksreg) {
-		ks8851_lock(ks, &flags);
-		ks8851_wrreg16(ks, ksreg, value);
-		ks8851_unlock(ks, &flags);
-	}
-}
-
-/**
- * ks8851_read_selftest - read the selftest memory info.
- * @ks: The device state
- *
- * Read and check the TX/RX memory selftest information.
- */
-static int ks8851_read_selftest(struct ks8851_net *ks)
-{
-	unsigned both_done = MBIR_TXMBF | MBIR_RXMBF;
-	int ret = 0;
-	unsigned rd;
-
-	rd = ks8851_rdreg16(ks, KS_MBIR);
-
-	if ((rd & both_done) != both_done) {
-		netdev_warn(ks->netdev, "Memory selftest not finished\n");
-		return 0;
-	}
-
-	if (rd & MBIR_TXMBFA) {
-		netdev_err(ks->netdev, "TX memory selftest fail\n");
-		ret |= 1;
-	}
-
-	if (rd & MBIR_RXMBFA) {
-		netdev_err(ks->netdev, "RX memory selftest fail\n");
-		ret |= 2;
-	}
-
-	return 0;
-}
-
-/* driver bus management functions */
-
-#ifdef CONFIG_PM_SLEEP
-
-static int ks8851_suspend(struct device *dev)
-{
-	struct ks8851_net *ks = dev_get_drvdata(dev);
-	struct net_device *netdev = ks->netdev;
-
-	if (netif_running(netdev)) {
-		netif_device_detach(netdev);
-		ks8851_net_stop(netdev);
-	}
-
-	return 0;
-}
-
-static int ks8851_resume(struct device *dev)
-{
-	struct ks8851_net *ks = dev_get_drvdata(dev);
-	struct net_device *netdev = ks->netdev;
-
-	if (netif_running(netdev)) {
-		ks8851_net_open(netdev);
-		netif_device_attach(netdev);
-	}
-
-	return 0;
-}
-#endif
-
-static SIMPLE_DEV_PM_OPS(ks8851_pm_ops, ks8851_suspend, ks8851_resume);
-
-static int ks8851_probe_common(struct net_device *netdev, struct device *dev,
-			       int msg_en)
-{
-	struct ks8851_net *ks = netdev_priv(netdev);
-	unsigned cider;
-	int gpio;
-	int ret;
-
-	ks->netdev = netdev;
-	ks->tx_space = 6144;
-
-	gpio = of_get_named_gpio_flags(dev->of_node, "reset-gpios", 0, NULL);
-	if (gpio == -EPROBE_DEFER)
-		return gpio;
-
-	ks->gpio = gpio;
-	if (gpio_is_valid(gpio)) {
-		ret = devm_gpio_request_one(dev, gpio,
-					    GPIOF_OUT_INIT_LOW, "ks8851_rst_n");
-		if (ret) {
-			dev_err(dev, "reset gpio request failed\n");
-			return ret;
-		}
-	}
-
-	ks->vdd_io = devm_regulator_get(dev, "vdd-io");
-	if (IS_ERR(ks->vdd_io)) {
-		ret = PTR_ERR(ks->vdd_io);
-		goto err_reg_io;
-	}
-
-	ret = regulator_enable(ks->vdd_io);
-	if (ret) {
-		dev_err(dev, "regulator vdd_io enable fail: %d\n", ret);
-		goto err_reg_io;
-	}
-
-	ks->vdd_reg = devm_regulator_get(dev, "vdd");
-	if (IS_ERR(ks->vdd_reg)) {
-		ret = PTR_ERR(ks->vdd_reg);
-		goto err_reg;
-	}
-
-	ret = regulator_enable(ks->vdd_reg);
-	if (ret) {
-		dev_err(dev, "regulator vdd enable fail: %d\n", ret);
-		goto err_reg;
-	}
-
-	if (gpio_is_valid(gpio)) {
-		usleep_range(10000, 11000);
-		gpio_set_value(gpio, 1);
-	}
-
-	spin_lock_init(&ks->statelock);
-
-	INIT_WORK(&ks->rxctrl_work, ks8851_rxctrl_work);
-
-	/* setup EEPROM state */
-	ks->eeprom.data = ks;
-	ks->eeprom.width = PCI_EEPROM_WIDTH_93C46;
-	ks->eeprom.register_read = ks8851_eeprom_regread;
-	ks->eeprom.register_write = ks8851_eeprom_regwrite;
-
-	/* setup mii state */
-	ks->mii.dev		= netdev;
-	ks->mii.phy_id		= 1,
-	ks->mii.phy_id_mask	= 1;
-	ks->mii.reg_num_mask	= 0xf;
-	ks->mii.mdio_read	= ks8851_phy_read;
-	ks->mii.mdio_write	= ks8851_phy_write;
-
-	dev_info(dev, "message enable is %d\n", msg_en);
-
-	/* set the default message enable */
-	ks->msg_enable = netif_msg_init(msg_en, NETIF_MSG_DRV |
-						NETIF_MSG_PROBE |
-						NETIF_MSG_LINK);
-
-	skb_queue_head_init(&ks->txq);
-
-	netdev->ethtool_ops = &ks8851_ethtool_ops;
-	SET_NETDEV_DEV(netdev, dev);
-
-	dev_set_drvdata(dev, ks);
-
-	netif_carrier_off(ks->netdev);
-	netdev->if_port = IF_PORT_100BASET;
-	netdev->netdev_ops = &ks8851_netdev_ops;
-
-	/* issue a global soft reset to reset the device. */
-	ks8851_soft_reset(ks, GRR_GSR);
-
-	/* simple check for a valid chip being connected to the bus */
-	cider = ks8851_rdreg16(ks, KS_CIDER);
-	if ((cider & ~CIDER_REV_MASK) != CIDER_ID) {
-		dev_err(dev, "failed to read device ID\n");
-		ret = -ENODEV;
-		goto err_id;
-	}
-
-	/* cache the contents of the CCR register for EEPROM, etc. */
-	ks->rc_ccr = ks8851_rdreg16(ks, KS_CCR);
-
-	ks8851_read_selftest(ks);
-	ks8851_init_mac(ks, dev->of_node);
-
-	ret = register_netdev(netdev);
-	if (ret) {
-		dev_err(dev, "failed to register network device\n");
-		goto err_netdev;
-	}
-
-	netdev_info(netdev, "revision %d, MAC %pM, IRQ %d, %s EEPROM\n",
-		    CIDER_REV_GET(cider), netdev->dev_addr, netdev->irq,
-		    ks->rc_ccr & CCR_EEPROM ? "has" : "no");
-
-	return 0;
-
-err_netdev:
-err_id:
-	if (gpio_is_valid(gpio))
-		gpio_set_value(gpio, 0);
-	regulator_disable(ks->vdd_reg);
-err_reg:
-	regulator_disable(ks->vdd_io);
-err_reg_io:
-	return ret;
-}
-
-static int ks8851_remove_common(struct device *dev)
-{
-	struct ks8851_net *priv = dev_get_drvdata(dev);
-
-	if (netif_msg_drv(priv))
-		dev_info(dev, "remove\n");
-
-	unregister_netdev(priv->netdev);
-	if (gpio_is_valid(priv->gpio))
-		gpio_set_value(priv->gpio, 0);
-	regulator_disable(priv->vdd_reg);
-	regulator_disable(priv->vdd_io);
-
-	return 0;
-}
-
-static int ks8851_probe(struct spi_device *spi)
-{
-	struct device *dev = &spi->dev;
-	struct ks8851_net_spi *kss;
-	struct net_device *netdev;
-	struct ks8851_net *ks;
-
-	netdev = devm_alloc_etherdev(dev, sizeof(struct ks8851_net_spi));
-	if (!netdev)
-		return -ENOMEM;
-
-	spi->bits_per_word = 8;
-
-	ks = netdev_priv(netdev);
-
-	ks->lock = ks8851_lock_spi;
-	ks->unlock = ks8851_unlock_spi;
-	ks->rdreg16 = ks8851_rdreg16_spi;
-	ks->wrreg16 = ks8851_wrreg16_spi;
-	ks->rdfifo = ks8851_rdfifo_spi;
-	ks->wrfifo = ks8851_wrpkt_spi;
-	ks->start_xmit = ks8851_start_xmit_spi;
-	ks->rx_skb = ks8851_rx_skb_spi;
-	ks->flush_tx_work = ks8851_flush_tx_work_spi;
-
-#define STD_IRQ (IRQ_LCI |	/* Link Change */	\
-		 IRQ_TXI |	/* TX done */		\
-		 IRQ_RXI |	/* RX done */		\
-		 IRQ_SPIBEI |	/* SPI bus error */	\
-		 IRQ_TXPSI |	/* TX process stop */	\
-		 IRQ_RXPSI)	/* RX process stop */
-	ks->rc_ier = STD_IRQ;
-
-	kss = to_ks8851_spi(ks);
-
-	kss->spidev = spi;
-	mutex_init(&kss->lock);
-	INIT_WORK(&kss->tx_work, ks8851_tx_work);
-
-	/* initialise pre-made spi transfer messages */
-	spi_message_init(&kss->spi_msg1);
-	spi_message_add_tail(&kss->spi_xfer1, &kss->spi_msg1);
-
-	spi_message_init(&kss->spi_msg2);
-	spi_message_add_tail(&kss->spi_xfer2[0], &kss->spi_msg2);
-	spi_message_add_tail(&kss->spi_xfer2[1], &kss->spi_msg2);
-
-	netdev->irq = spi->irq;
-
-	return ks8851_probe_common(netdev, dev, msg_enable);
-}
-
-static int ks8851_remove(struct spi_device *spi)
-{
-	return ks8851_remove_common(&spi->dev);
-}
-
-static const struct of_device_id ks8851_match_table[] = {
-	{ .compatible = "micrel,ks8851" },
-	{ }
-};
-MODULE_DEVICE_TABLE(of, ks8851_match_table);
-
-static struct spi_driver ks8851_driver = {
-	.driver = {
-		.name = "ks8851",
-		.of_match_table = ks8851_match_table,
-		.pm = &ks8851_pm_ops,
-	},
-	.probe = ks8851_probe,
-	.remove = ks8851_remove,
-};
-module_spi_driver(ks8851_driver);
-
-MODULE_DESCRIPTION("KS8851 Network driver");
-MODULE_AUTHOR("Ben Dooks <ben@simtec.co.uk>");
-MODULE_LICENSE("GPL");
-
-module_param_named(message, msg_enable, int, 0);
-MODULE_PARM_DESC(message, "Message verbosity level (0=none, 31=all)");
-MODULE_ALIAS("spi:ks8851");
diff --git a/drivers/net/ethernet/micrel/ks8851.h b/drivers/net/ethernet/micrel/ks8851.h
index f210d18a10b5..2b319e451121 100644
--- a/drivers/net/ethernet/micrel/ks8851.h
+++ b/drivers/net/ethernet/micrel/ks8851.h
@@ -7,6 +7,11 @@
  * KS8851 register definitions
 */
 
+#ifndef __KS8851_H__
+#define __KS8851_H__
+
+#include <linux/eeprom_93cx6.h>
+
 #define KS_CCR					0x08
 #define CCR_LE					(1 << 10)   /* KSZ8851-16MLL */
 #define CCR_EEPROM				(1 << 9)
@@ -300,3 +305,147 @@
 #define TXFR_TXIC				(1 << 15)
 #define TXFR_TXFID_MASK				(0x3f << 0)
 #define TXFR_TXFID_SHIFT			(0)
+
+/**
+ * struct ks8851_rxctrl - KS8851 driver rx control
+ * @mchash: Multicast hash-table data.
+ * @rxcr1: KS_RXCR1 register setting
+ * @rxcr2: KS_RXCR2 register setting
+ *
+ * Representation of the settings needs to control the receive filtering
+ * such as the multicast hash-filter and the receive register settings. This
+ * is used to make the job of working out if the receive settings change and
+ * then issuing the new settings to the worker that will send the necessary
+ * commands.
+ */
+struct ks8851_rxctrl {
+	u16	mchash[4];
+	u16	rxcr1;
+	u16	rxcr2;
+};
+
+/**
+ * union ks8851_tx_hdr - tx header data
+ * @txb: The header as bytes
+ * @txw: The header as 16bit, little-endian words
+ *
+ * A dual representation of the tx header data to allow
+ * access to individual bytes, and to allow 16bit accesses
+ * with 16bit alignment.
+ */
+union ks8851_tx_hdr {
+	u8	txb[6];
+	__le16	txw[3];
+};
+
+/**
+ * struct ks8851_net - KS8851 driver private data
+ * @netdev: The network device we're bound to
+ * @statelock: Lock on this structure for tx list.
+ * @mii: The MII state information for the mii calls.
+ * @rxctrl: RX settings for @rxctrl_work.
+ * @rxctrl_work: Work queue for updating RX mode and multicast lists
+ * @txq: Queue of packets for transmission.
+ * @txh: Space for generating packet TX header in DMA-able data
+ * @rxd: Space for receiving SPI data, in DMA-able space.
+ * @txd: Space for transmitting SPI data, in DMA-able space.
+ * @msg_enable: The message flags controlling driver output (see ethtool).
+ * @fid: Incrementing frame id tag.
+ * @rc_ier: Cached copy of KS_IER.
+ * @rc_ccr: Cached copy of KS_CCR.
+ * @rc_rxqcr: Cached copy of KS_RXQCR.
+ * @eeprom: 93CX6 EEPROM state for accessing on-board EEPROM.
+ * @vdd_reg:	Optional regulator supplying the chip
+ * @vdd_io: Optional digital power supply for IO
+ * @gpio: Optional reset_n gpio
+ * @lock: Bus access lock callback
+ * @unlock: Bus access unlock callback
+ * @rdreg16: 16bit register read callback
+ * @wrreg16: 16bit register write callback
+ * @rdfifo: FIFO read callback
+ * @wrfifo: FIFO write callback
+ * @start_xmit: start_xmit() implementation callback
+ * @rx_skb: rx_skb() implementation callback
+ * @flush_tx_work: flush_tx_work() implementation callback
+ *
+ * The @statelock is used to protect information in the structure which may
+ * need to be accessed via several sources, such as the network driver layer
+ * or one of the work queues.
+ *
+ * We align the buffers we may use for rx/tx to ensure that if the SPI driver
+ * wants to DMA map them, it will not have any problems with data the driver
+ * modifies.
+ */
+struct ks8851_net {
+	struct net_device	*netdev;
+	spinlock_t		statelock;
+
+	union ks8851_tx_hdr	txh ____cacheline_aligned;
+	u8			rxd[8];
+	u8			txd[8];
+
+	u32			msg_enable ____cacheline_aligned;
+	u16			tx_space;
+	u8			fid;
+
+	u16			rc_ier;
+	u16			rc_rxqcr;
+	u16			rc_ccr;
+
+	struct mii_if_info	mii;
+	struct ks8851_rxctrl	rxctrl;
+
+	struct work_struct	rxctrl_work;
+
+	struct sk_buff_head	txq;
+
+	struct eeprom_93cx6	eeprom;
+	struct regulator	*vdd_reg;
+	struct regulator	*vdd_io;
+	int			gpio;
+
+	void			(*lock)(struct ks8851_net *ks,
+					unsigned long *flags);
+	void			(*unlock)(struct ks8851_net *ks,
+					  unsigned long *flags);
+	unsigned int		(*rdreg16)(struct ks8851_net *ks,
+					   unsigned int reg);
+	void			(*wrreg16)(struct ks8851_net *ks,
+					   unsigned int reg, unsigned int val);
+	void			(*rdfifo)(struct ks8851_net *ks, u8 *buff,
+					  unsigned int len);
+	void			(*wrfifo)(struct ks8851_net *ks,
+					  struct sk_buff *txp, bool irq);
+	netdev_tx_t		(*start_xmit)(struct sk_buff *skb,
+					      struct net_device *dev);
+	void			(*rx_skb)(struct ks8851_net *ks,
+					  struct sk_buff *skb);
+	void			(*flush_tx_work)(struct ks8851_net *ks);
+};
+
+int ks8851_probe_common(struct net_device *netdev, struct device *dev,
+			int msg_en);
+int ks8851_remove_common(struct device *dev);
+int ks8851_suspend(struct device *dev);
+int ks8851_resume(struct device *dev);
+
+static __maybe_unused SIMPLE_DEV_PM_OPS(ks8851_pm_ops,
+					ks8851_suspend, ks8851_resume);
+
+/**
+ * ks8851_done_tx - update and then free skbuff after transmitting
+ * @ks: The device state
+ * @txb: The buffer transmitted
+ */
+static void __maybe_unused ks8851_done_tx(struct ks8851_net *ks,
+					  struct sk_buff *txb)
+{
+	struct net_device *dev = ks->netdev;
+
+	dev->stats.tx_bytes += txb->len;
+	dev->stats.tx_packets++;
+
+	dev_kfree_skb(txb);
+}
+
+#endif /* __KS8851_H__ */
diff --git a/drivers/net/ethernet/micrel/ks8851_common.c b/drivers/net/ethernet/micrel/ks8851_common.c
new file mode 100644
index 000000000000..d65872172229
--- /dev/null
+++ b/drivers/net/ethernet/micrel/ks8851_common.c
@@ -0,0 +1,1193 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* drivers/net/ethernet/micrel/ks8851.c
+ *
+ * Copyright 2009 Simtec Electronics
+ *	http://www.simtec.co.uk/
+ *	Ben Dooks <ben@simtec.co.uk>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#define DEBUG
+
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/cache.h>
+#include <linux/crc32.h>
+#include <linux/mii.h>
+#include <linux/regulator/consumer.h>
+
+#include <linux/gpio.h>
+#include <linux/of_gpio.h>
+#include <linux/of_net.h>
+
+#include "ks8851.h"
+
+/**
+ * ks8851_lock - register access lock
+ * @ks: The chip state
+ * @flags: Spinlock flags
+ *
+ * Claim chip register access lock
+ */
+static void ks8851_lock(struct ks8851_net *ks, unsigned long *flags)
+{
+	ks->lock(ks, flags);
+}
+
+/**
+ * ks8851_unlock - register access unlock
+ * @ks: The chip state
+ * @flags: Spinlock flags
+ *
+ * Release chip register access lock
+ */
+static void ks8851_unlock(struct ks8851_net *ks, unsigned long *flags)
+{
+	ks->unlock(ks, flags);
+}
+
+/**
+ * ks8851_wrreg16 - write 16bit register value to chip
+ * @ks: The chip state
+ * @reg: The register address
+ * @val: The value to write
+ *
+ * Issue a write to put the value @val into the register specified in @reg.
+ */
+static void ks8851_wrreg16(struct ks8851_net *ks, unsigned int reg,
+			   unsigned int val)
+{
+	ks->wrreg16(ks, reg, val);
+}
+
+/**
+ * ks8851_rdreg16 - read 16 bit register from device
+ * @ks: The chip information
+ * @reg: The register address
+ *
+ * Read a 16bit register from the chip, returning the result
+ */
+static unsigned int ks8851_rdreg16(struct ks8851_net *ks,
+				   unsigned int reg)
+{
+	return ks->rdreg16(ks, reg);
+}
+
+/**
+ * ks8851_soft_reset - issue one of the soft reset to the device
+ * @ks: The device state.
+ * @op: The bit(s) to set in the GRR
+ *
+ * Issue the relevant soft-reset command to the device's GRR register
+ * specified by @op.
+ *
+ * Note, the delays are in there as a caution to ensure that the reset
+ * has time to take effect and then complete. Since the datasheet does
+ * not currently specify the exact sequence, we have chosen something
+ * that seems to work with our device.
+ */
+static void ks8851_soft_reset(struct ks8851_net *ks, unsigned op)
+{
+	ks8851_wrreg16(ks, KS_GRR, op);
+	mdelay(1);	/* wait a short time to effect reset */
+	ks8851_wrreg16(ks, KS_GRR, 0);
+	mdelay(1);	/* wait for condition to clear */
+}
+
+/**
+ * ks8851_set_powermode - set power mode of the device
+ * @ks: The device state
+ * @pwrmode: The power mode value to write to KS_PMECR.
+ *
+ * Change the power mode of the chip.
+ */
+static void ks8851_set_powermode(struct ks8851_net *ks, unsigned pwrmode)
+{
+	unsigned pmecr;
+
+	netif_dbg(ks, hw, ks->netdev, "setting power mode %d\n", pwrmode);
+
+	pmecr = ks8851_rdreg16(ks, KS_PMECR);
+	pmecr &= ~PMECR_PM_MASK;
+	pmecr |= pwrmode;
+
+	ks8851_wrreg16(ks, KS_PMECR, pmecr);
+}
+
+/**
+ * ks8851_write_mac_addr - write mac address to device registers
+ * @dev: The network device
+ *
+ * Update the KS8851 MAC address registers from the address in @dev.
+ *
+ * This call assumes that the chip is not running, so there is no need to
+ * shutdown the RXQ process whilst setting this.
+*/
+static int ks8851_write_mac_addr(struct net_device *dev)
+{
+	struct ks8851_net *ks = netdev_priv(dev);
+	unsigned long flags;
+	u16 val;
+	int i;
+
+	ks8851_lock(ks, &flags);
+
+	/*
+	 * Wake up chip in case it was powered off when stopped; otherwise,
+	 * the first write to the MAC address does not take effect.
+	 */
+	ks8851_set_powermode(ks, PMECR_PM_NORMAL);
+
+	for (i = 0; i < ETH_ALEN; i += 2) {
+		val = (dev->dev_addr[i] << 8) | dev->dev_addr[i + 1];
+		ks8851_wrreg16(ks, KS_MAR(i), val);
+	}
+
+	if (!netif_running(dev))
+		ks8851_set_powermode(ks, PMECR_PM_SOFTDOWN);
+
+	ks8851_unlock(ks, &flags);
+
+	return 0;
+}
+
+/**
+ * ks8851_read_mac_addr - read mac address from device registers
+ * @dev: The network device
+ *
+ * Update our copy of the KS8851 MAC address from the registers of @dev.
+*/
+static void ks8851_read_mac_addr(struct net_device *dev)
+{
+	struct ks8851_net *ks = netdev_priv(dev);
+	unsigned long flags;
+	u16 reg;
+	int i;
+
+	ks8851_lock(ks, &flags);
+
+	for (i = 0; i < ETH_ALEN; i += 2) {
+		reg = ks8851_rdreg16(ks, KS_MAR(i));
+		dev->dev_addr[i] = reg >> 8;
+		dev->dev_addr[i + 1] = reg & 0xff;
+	}
+
+	ks8851_unlock(ks, &flags);
+}
+
+/**
+ * ks8851_init_mac - initialise the mac address
+ * @ks: The device structure
+ * @np: The device node pointer
+ *
+ * Get or create the initial mac address for the device and then set that
+ * into the station address register. A mac address supplied in the device
+ * tree takes precedence. Otherwise, if there is an EEPROM present, then
+ * we try that. If no valid mac address is found we use eth_random_addr()
+ * to create a new one.
+ */
+static void ks8851_init_mac(struct ks8851_net *ks, struct device_node *np)
+{
+	struct net_device *dev = ks->netdev;
+	const u8 *mac_addr;
+
+	mac_addr = of_get_mac_address(np);
+	if (!IS_ERR(mac_addr)) {
+		ether_addr_copy(dev->dev_addr, mac_addr);
+		ks8851_write_mac_addr(dev);
+		return;
+	}
+
+	if (ks->rc_ccr & CCR_EEPROM) {
+		ks8851_read_mac_addr(dev);
+		if (is_valid_ether_addr(dev->dev_addr))
+			return;
+
+		netdev_err(ks->netdev, "invalid mac address read %pM\n",
+				dev->dev_addr);
+	}
+
+	eth_hw_addr_random(dev);
+	ks8851_write_mac_addr(dev);
+}
+
+/**
+ * ks8851_dbg_dumpkkt - dump initial packet contents to debug
+ * @ks: The device state
+ * @rxpkt: The data for the received packet
+ *
+ * Dump the initial data from the packet to dev_dbg().
+ */
+static void ks8851_dbg_dumpkkt(struct ks8851_net *ks, u8 *rxpkt)
+{
+	netdev_dbg(ks->netdev,
+		   "pkt %02x%02x%02x%02x %02x%02x%02x%02x %02x%02x%02x%02x\n",
+		   rxpkt[4], rxpkt[5], rxpkt[6], rxpkt[7],
+		   rxpkt[8], rxpkt[9], rxpkt[10], rxpkt[11],
+		   rxpkt[12], rxpkt[13], rxpkt[14], rxpkt[15]);
+}
+
+/**
+ * ks8851_rx_skb - receive skbuff
+ * @ks: The device state.
+ * @skb: The skbuff
+ */
+static void ks8851_rx_skb(struct ks8851_net *ks, struct sk_buff *skb)
+{
+	ks->rx_skb(ks, skb);
+}
+
+/**
+ * ks8851_rx_pkts - receive packets from the host
+ * @ks: The device information.
+ *
+ * This is called from the IRQ work queue when the system detects that there
+ * are packets in the receive queue. Find out how many packets there are and
+ * read them from the FIFO.
+ */
+static void ks8851_rx_pkts(struct ks8851_net *ks)
+{
+	struct sk_buff *skb;
+	unsigned rxfc;
+	unsigned rxlen;
+	unsigned rxstat;
+	u8 *rxpkt;
+
+	rxfc = (ks8851_rdreg16(ks, KS_RXFCTR) >> 8) & 0xff;
+
+	netif_dbg(ks, rx_status, ks->netdev,
+		  "%s: %d packets\n", __func__, rxfc);
+
+	/* Currently we're issuing a read per packet, but we could possibly
+	 * improve the code by issuing a single read, getting the receive
+	 * header, allocating the packet and then reading the packet data
+	 * out in one go.
+	 *
+	 * This form of operation would require us to hold the SPI bus'
+	 * chipselect low during the entie transaction to avoid any
+	 * reset to the data stream coming from the chip.
+	 */
+
+	for (; rxfc != 0; rxfc--) {
+		rxstat = ks8851_rdreg16(ks, KS_RXFHSR);
+		rxlen = ks8851_rdreg16(ks, KS_RXFHBCR) & RXFHBCR_CNT_MASK;
+
+		netif_dbg(ks, rx_status, ks->netdev,
+			  "rx: stat 0x%04x, len 0x%04x\n", rxstat, rxlen);
+
+		/* the length of the packet includes the 32bit CRC */
+
+		/* set dma read address */
+		ks8851_wrreg16(ks, KS_RXFDPR, RXFDPR_RXFPAI | 0x00);
+
+		/* start DMA access */
+		ks8851_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_SDA);
+
+		if (rxlen > 4) {
+			unsigned int rxalign;
+
+			rxlen -= 4;
+			rxalign = ALIGN(rxlen, 4);
+			skb = netdev_alloc_skb_ip_align(ks->netdev, rxalign);
+			if (skb) {
+
+				/* 4 bytes of status header + 4 bytes of
+				 * garbage: we put them before ethernet
+				 * header, so that they are copied,
+				 * but ignored.
+				 */
+
+				rxpkt = skb_put(skb, rxlen) - 8;
+
+				ks->rdfifo(ks, rxpkt, rxalign + 8);
+
+				if (netif_msg_pktdata(ks))
+					ks8851_dbg_dumpkkt(ks, rxpkt);
+
+				skb->protocol = eth_type_trans(skb, ks->netdev);
+				ks8851_rx_skb(ks, skb);
+
+				ks->netdev->stats.rx_packets++;
+				ks->netdev->stats.rx_bytes += rxlen;
+			}
+		}
+
+		/* end DMA access and dequeue packet */
+		ks8851_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_RRXEF);
+	}
+}
+
+/**
+ * ks8851_irq - IRQ handler for dealing with interrupt requests
+ * @irq: IRQ number
+ * @_ks: cookie
+ *
+ * This handler is invoked when the IRQ line asserts to find out what happened.
+ * As we cannot allow ourselves to sleep in HARDIRQ context, this handler runs
+ * in thread context.
+ *
+ * Read the interrupt status, work out what needs to be done and then clear
+ * any of the interrupts that are not needed.
+ */
+static irqreturn_t ks8851_irq(int irq, void *_ks)
+{
+	struct ks8851_net *ks = _ks;
+	unsigned handled = 0;
+	unsigned long flags;
+	unsigned int status;
+
+	ks8851_lock(ks, &flags);
+
+	status = ks8851_rdreg16(ks, KS_ISR);
+
+	netif_dbg(ks, intr, ks->netdev,
+		  "%s: status 0x%04x\n", __func__, status);
+
+	if (status & IRQ_LCI)
+		handled |= IRQ_LCI;
+
+	if (status & IRQ_LDI) {
+		u16 pmecr = ks8851_rdreg16(ks, KS_PMECR);
+		pmecr &= ~PMECR_WKEVT_MASK;
+		ks8851_wrreg16(ks, KS_PMECR, pmecr | PMECR_WKEVT_LINK);
+
+		handled |= IRQ_LDI;
+	}
+
+	if (status & IRQ_RXPSI)
+		handled |= IRQ_RXPSI;
+
+	if (status & IRQ_TXI) {
+		handled |= IRQ_TXI;
+
+		/* no lock here, tx queue should have been stopped */
+
+		/* update our idea of how much tx space is available to the
+		 * system */
+		ks->tx_space = ks8851_rdreg16(ks, KS_TXMIR);
+
+		netif_dbg(ks, intr, ks->netdev,
+			  "%s: txspace %d\n", __func__, ks->tx_space);
+	}
+
+	if (status & IRQ_RXI)
+		handled |= IRQ_RXI;
+
+	if (status & IRQ_SPIBEI) {
+		netdev_err(ks->netdev, "%s: spi bus error\n", __func__);
+		handled |= IRQ_SPIBEI;
+	}
+
+	ks8851_wrreg16(ks, KS_ISR, handled);
+
+	if (status & IRQ_RXI) {
+		/* the datasheet says to disable the rx interrupt during
+		 * packet read-out, however we're masking the interrupt
+		 * from the device so do not bother masking just the RX
+		 * from the device. */
+
+		ks8851_rx_pkts(ks);
+	}
+
+	/* if something stopped the rx process, probably due to wanting
+	 * to change the rx settings, then do something about restarting
+	 * it. */
+	if (status & IRQ_RXPSI) {
+		struct ks8851_rxctrl *rxc = &ks->rxctrl;
+
+		/* update the multicast hash table */
+		ks8851_wrreg16(ks, KS_MAHTR0, rxc->mchash[0]);
+		ks8851_wrreg16(ks, KS_MAHTR1, rxc->mchash[1]);
+		ks8851_wrreg16(ks, KS_MAHTR2, rxc->mchash[2]);
+		ks8851_wrreg16(ks, KS_MAHTR3, rxc->mchash[3]);
+
+		ks8851_wrreg16(ks, KS_RXCR2, rxc->rxcr2);
+		ks8851_wrreg16(ks, KS_RXCR1, rxc->rxcr1);
+	}
+
+	ks8851_unlock(ks, &flags);
+
+	if (status & IRQ_LCI)
+		mii_check_link(&ks->mii);
+
+	if (status & IRQ_TXI)
+		netif_wake_queue(ks->netdev);
+
+	return IRQ_HANDLED;
+}
+
+/**
+ * ks8851_flush_tx_work - flush outstanding TX work
+ * @ks: The device state
+ */
+static void ks8851_flush_tx_work(struct ks8851_net *ks)
+{
+	if (ks->flush_tx_work)
+		ks->flush_tx_work(ks);
+}
+
+/**
+ * ks8851_net_open - open network device
+ * @dev: The network device being opened.
+ *
+ * Called when the network device is marked active, such as a user executing
+ * 'ifconfig up' on the device.
+ */
+static int ks8851_net_open(struct net_device *dev)
+{
+	struct ks8851_net *ks = netdev_priv(dev);
+	unsigned long flags;
+	int ret;
+
+	ret = request_threaded_irq(dev->irq, NULL, ks8851_irq,
+				   IRQF_TRIGGER_LOW | IRQF_ONESHOT,
+				   dev->name, ks);
+	if (ret < 0) {
+		netdev_err(dev, "failed to get irq\n");
+		return ret;
+	}
+
+	/* lock the card, even if we may not actually be doing anything
+	 * else at the moment */
+	ks8851_lock(ks, &flags);
+
+	netif_dbg(ks, ifup, ks->netdev, "opening\n");
+
+	/* bring chip out of any power saving mode it was in */
+	ks8851_set_powermode(ks, PMECR_PM_NORMAL);
+
+	/* issue a soft reset to the RX/TX QMU to put it into a known
+	 * state. */
+	ks8851_soft_reset(ks, GRR_QMU);
+
+	/* setup transmission parameters */
+
+	ks8851_wrreg16(ks, KS_TXCR, (TXCR_TXE | /* enable transmit process */
+				     TXCR_TXPE | /* pad to min length */
+				     TXCR_TXCRC | /* add CRC */
+				     TXCR_TXFCE)); /* enable flow control */
+
+	/* auto-increment tx data, reset tx pointer */
+	ks8851_wrreg16(ks, KS_TXFDPR, TXFDPR_TXFPAI);
+
+	/* setup receiver control */
+
+	ks8851_wrreg16(ks, KS_RXCR1, (RXCR1_RXPAFMA | /*  from mac filter */
+				      RXCR1_RXFCE | /* enable flow control */
+				      RXCR1_RXBE | /* broadcast enable */
+				      RXCR1_RXUE | /* unicast enable */
+				      RXCR1_RXE)); /* enable rx block */
+
+	/* transfer entire frames out in one go */
+	ks8851_wrreg16(ks, KS_RXCR2, RXCR2_SRDBL_FRAME);
+
+	/* set receive counter timeouts */
+	ks8851_wrreg16(ks, KS_RXDTTR, 1000); /* 1ms after first frame to IRQ */
+	ks8851_wrreg16(ks, KS_RXDBCTR, 4096); /* >4Kbytes in buffer to IRQ */
+	ks8851_wrreg16(ks, KS_RXFCTR, 10);  /* 10 frames to IRQ */
+
+	ks->rc_rxqcr = (RXQCR_RXFCTE |  /* IRQ on frame count exceeded */
+			RXQCR_RXDBCTE | /* IRQ on byte count exceeded */
+			RXQCR_RXDTTE);  /* IRQ on time exceeded */
+
+	ks8851_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr);
+
+	/* clear then enable interrupts */
+	ks8851_wrreg16(ks, KS_ISR, ks->rc_ier);
+	ks8851_wrreg16(ks, KS_IER, ks->rc_ier);
+
+	netif_start_queue(ks->netdev);
+
+	netif_dbg(ks, ifup, ks->netdev, "network device up\n");
+
+	ks8851_unlock(ks, &flags);
+	mii_check_link(&ks->mii);
+	return 0;
+}
+
+/**
+ * ks8851_net_stop - close network device
+ * @dev: The device being closed.
+ *
+ * Called to close down a network device which has been active. Cancell any
+ * work, shutdown the RX and TX process and then place the chip into a low
+ * power state whilst it is not being used.
+ */
+static int ks8851_net_stop(struct net_device *dev)
+{
+	struct ks8851_net *ks = netdev_priv(dev);
+	unsigned long flags;
+
+	netif_info(ks, ifdown, dev, "shutting down\n");
+
+	netif_stop_queue(dev);
+
+	ks8851_lock(ks, &flags);
+	/* turn off the IRQs and ack any outstanding */
+	ks8851_wrreg16(ks, KS_IER, 0x0000);
+	ks8851_wrreg16(ks, KS_ISR, 0xffff);
+	ks8851_unlock(ks, &flags);
+
+	/* stop any outstanding work */
+	ks8851_flush_tx_work(ks);
+	flush_work(&ks->rxctrl_work);
+
+	ks8851_lock(ks, &flags);
+	/* shutdown RX process */
+	ks8851_wrreg16(ks, KS_RXCR1, 0x0000);
+
+	/* shutdown TX process */
+	ks8851_wrreg16(ks, KS_TXCR, 0x0000);
+
+	/* set powermode to soft power down to save power */
+	ks8851_set_powermode(ks, PMECR_PM_SOFTDOWN);
+	ks8851_unlock(ks, &flags);
+
+	/* ensure any queued tx buffers are dumped */
+	while (!skb_queue_empty(&ks->txq)) {
+		struct sk_buff *txb = skb_dequeue(&ks->txq);
+
+		netif_dbg(ks, ifdown, ks->netdev,
+			  "%s: freeing txb %p\n", __func__, txb);
+
+		dev_kfree_skb(txb);
+	}
+
+	free_irq(dev->irq, ks);
+
+	return 0;
+}
+
+/**
+ * ks8851_start_xmit - transmit packet
+ * @skb: The buffer to transmit
+ * @dev: The device used to transmit the packet.
+ *
+ * Called by the network layer to transmit the @skb. Queue the packet for
+ * the device and schedule the necessary work to transmit the packet when
+ * it is free.
+ *
+ * We do this to firstly avoid sleeping with the network device locked,
+ * and secondly so we can round up more than one packet to transmit which
+ * means we can try and avoid generating too many transmit done interrupts.
+ */
+static netdev_tx_t ks8851_start_xmit(struct sk_buff *skb,
+				     struct net_device *dev)
+{
+	struct ks8851_net *ks = netdev_priv(dev);
+
+	return ks->start_xmit(skb, dev);
+}
+
+/**
+ * ks8851_rxctrl_work - work handler to change rx mode
+ * @work: The work structure this belongs to.
+ *
+ * Lock the device and issue the necessary changes to the receive mode from
+ * the network device layer. This is done so that we can do this without
+ * having to sleep whilst holding the network device lock.
+ *
+ * Since the recommendation from Micrel is that the RXQ is shutdown whilst the
+ * receive parameters are programmed, we issue a write to disable the RXQ and
+ * then wait for the interrupt handler to be triggered once the RXQ shutdown is
+ * complete. The interrupt handler then writes the new values into the chip.
+ */
+static void ks8851_rxctrl_work(struct work_struct *work)
+{
+	struct ks8851_net *ks = container_of(work, struct ks8851_net, rxctrl_work);
+	unsigned long flags;
+
+	ks8851_lock(ks, &flags);
+
+	/* need to shutdown RXQ before modifying filter parameters */
+	ks8851_wrreg16(ks, KS_RXCR1, 0x00);
+
+	ks8851_unlock(ks, &flags);
+}
+
+static void ks8851_set_rx_mode(struct net_device *dev)
+{
+	struct ks8851_net *ks = netdev_priv(dev);
+	struct ks8851_rxctrl rxctrl;
+
+	memset(&rxctrl, 0, sizeof(rxctrl));
+
+	if (dev->flags & IFF_PROMISC) {
+		/* interface to receive everything */
+
+		rxctrl.rxcr1 = RXCR1_RXAE | RXCR1_RXINVF;
+	} else if (dev->flags & IFF_ALLMULTI) {
+		/* accept all multicast packets */
+
+		rxctrl.rxcr1 = (RXCR1_RXME | RXCR1_RXAE |
+				RXCR1_RXPAFMA | RXCR1_RXMAFMA);
+	} else if (dev->flags & IFF_MULTICAST && !netdev_mc_empty(dev)) {
+		struct netdev_hw_addr *ha;
+		u32 crc;
+
+		/* accept some multicast */
+
+		netdev_for_each_mc_addr(ha, dev) {
+			crc = ether_crc(ETH_ALEN, ha->addr);
+			crc >>= (32 - 6);  /* get top six bits */
+
+			rxctrl.mchash[crc >> 4] |= (1 << (crc & 0xf));
+		}
+
+		rxctrl.rxcr1 = RXCR1_RXME | RXCR1_RXPAFMA;
+	} else {
+		/* just accept broadcast / unicast */
+		rxctrl.rxcr1 = RXCR1_RXPAFMA;
+	}
+
+	rxctrl.rxcr1 |= (RXCR1_RXUE | /* unicast enable */
+			 RXCR1_RXBE | /* broadcast enable */
+			 RXCR1_RXE | /* RX process enable */
+			 RXCR1_RXFCE); /* enable flow control */
+
+	rxctrl.rxcr2 |= RXCR2_SRDBL_FRAME;
+
+	/* schedule work to do the actual set of the data if needed */
+
+	spin_lock(&ks->statelock);
+
+	if (memcmp(&rxctrl, &ks->rxctrl, sizeof(rxctrl)) != 0) {
+		memcpy(&ks->rxctrl, &rxctrl, sizeof(ks->rxctrl));
+		schedule_work(&ks->rxctrl_work);
+	}
+
+	spin_unlock(&ks->statelock);
+}
+
+static int ks8851_set_mac_address(struct net_device *dev, void *addr)
+{
+	struct sockaddr *sa = addr;
+
+	if (netif_running(dev))
+		return -EBUSY;
+
+	if (!is_valid_ether_addr(sa->sa_data))
+		return -EADDRNOTAVAIL;
+
+	memcpy(dev->dev_addr, sa->sa_data, ETH_ALEN);
+	return ks8851_write_mac_addr(dev);
+}
+
+static int ks8851_net_ioctl(struct net_device *dev, struct ifreq *req, int cmd)
+{
+	struct ks8851_net *ks = netdev_priv(dev);
+
+	if (!netif_running(dev))
+		return -EINVAL;
+
+	return generic_mii_ioctl(&ks->mii, if_mii(req), cmd, NULL);
+}
+
+static const struct net_device_ops ks8851_netdev_ops = {
+	.ndo_open		= ks8851_net_open,
+	.ndo_stop		= ks8851_net_stop,
+	.ndo_do_ioctl		= ks8851_net_ioctl,
+	.ndo_start_xmit		= ks8851_start_xmit,
+	.ndo_set_mac_address	= ks8851_set_mac_address,
+	.ndo_set_rx_mode	= ks8851_set_rx_mode,
+	.ndo_validate_addr	= eth_validate_addr,
+};
+
+/* ethtool support */
+
+static void ks8851_get_drvinfo(struct net_device *dev,
+			       struct ethtool_drvinfo *di)
+{
+	strlcpy(di->driver, "KS8851", sizeof(di->driver));
+	strlcpy(di->version, "1.00", sizeof(di->version));
+	strlcpy(di->bus_info, dev_name(dev->dev.parent), sizeof(di->bus_info));
+}
+
+static u32 ks8851_get_msglevel(struct net_device *dev)
+{
+	struct ks8851_net *ks = netdev_priv(dev);
+	return ks->msg_enable;
+}
+
+static void ks8851_set_msglevel(struct net_device *dev, u32 to)
+{
+	struct ks8851_net *ks = netdev_priv(dev);
+	ks->msg_enable = to;
+}
+
+static int ks8851_get_link_ksettings(struct net_device *dev,
+				     struct ethtool_link_ksettings *cmd)
+{
+	struct ks8851_net *ks = netdev_priv(dev);
+
+	mii_ethtool_get_link_ksettings(&ks->mii, cmd);
+
+	return 0;
+}
+
+static int ks8851_set_link_ksettings(struct net_device *dev,
+				     const struct ethtool_link_ksettings *cmd)
+{
+	struct ks8851_net *ks = netdev_priv(dev);
+	return mii_ethtool_set_link_ksettings(&ks->mii, cmd);
+}
+
+static u32 ks8851_get_link(struct net_device *dev)
+{
+	struct ks8851_net *ks = netdev_priv(dev);
+	return mii_link_ok(&ks->mii);
+}
+
+static int ks8851_nway_reset(struct net_device *dev)
+{
+	struct ks8851_net *ks = netdev_priv(dev);
+	return mii_nway_restart(&ks->mii);
+}
+
+/* EEPROM support */
+
+static void ks8851_eeprom_regread(struct eeprom_93cx6 *ee)
+{
+	struct ks8851_net *ks = ee->data;
+	unsigned val;
+
+	val = ks8851_rdreg16(ks, KS_EEPCR);
+
+	ee->reg_data_out = (val & EEPCR_EESB) ? 1 : 0;
+	ee->reg_data_clock = (val & EEPCR_EESCK) ? 1 : 0;
+	ee->reg_chip_select = (val & EEPCR_EECS) ? 1 : 0;
+}
+
+static void ks8851_eeprom_regwrite(struct eeprom_93cx6 *ee)
+{
+	struct ks8851_net *ks = ee->data;
+	unsigned val = EEPCR_EESA;	/* default - eeprom access on */
+
+	if (ee->drive_data)
+		val |= EEPCR_EESRWA;
+	if (ee->reg_data_in)
+		val |= EEPCR_EEDO;
+	if (ee->reg_data_clock)
+		val |= EEPCR_EESCK;
+	if (ee->reg_chip_select)
+		val |= EEPCR_EECS;
+
+	ks8851_wrreg16(ks, KS_EEPCR, val);
+}
+
+/**
+ * ks8851_eeprom_claim - claim device EEPROM and activate the interface
+ * @ks: The network device state.
+ *
+ * Check for the presence of an EEPROM, and then activate software access
+ * to the device.
+ */
+static int ks8851_eeprom_claim(struct ks8851_net *ks)
+{
+	/* start with clock low, cs high */
+	ks8851_wrreg16(ks, KS_EEPCR, EEPCR_EESA | EEPCR_EECS);
+	return 0;
+}
+
+/**
+ * ks8851_eeprom_release - release the EEPROM interface
+ * @ks: The device state
+ *
+ * Release the software access to the device EEPROM
+ */
+static void ks8851_eeprom_release(struct ks8851_net *ks)
+{
+	unsigned val = ks8851_rdreg16(ks, KS_EEPCR);
+
+	ks8851_wrreg16(ks, KS_EEPCR, val & ~EEPCR_EESA);
+}
+
+#define KS_EEPROM_MAGIC (0x00008851)
+
+static int ks8851_set_eeprom(struct net_device *dev,
+			     struct ethtool_eeprom *ee, u8 *data)
+{
+	struct ks8851_net *ks = netdev_priv(dev);
+	int offset = ee->offset;
+	unsigned long flags;
+	int len = ee->len;
+	u16 tmp;
+
+	/* currently only support byte writing */
+	if (len != 1)
+		return -EINVAL;
+
+	if (ee->magic != KS_EEPROM_MAGIC)
+		return -EINVAL;
+
+	if (!(ks->rc_ccr & CCR_EEPROM))
+		return -ENOENT;
+
+	ks8851_lock(ks, &flags);
+
+	ks8851_eeprom_claim(ks);
+
+	eeprom_93cx6_wren(&ks->eeprom, true);
+
+	/* ethtool currently only supports writing bytes, which means
+	 * we have to read/modify/write our 16bit EEPROMs */
+
+	eeprom_93cx6_read(&ks->eeprom, offset/2, &tmp);
+
+	if (offset & 1) {
+		tmp &= 0xff;
+		tmp |= *data << 8;
+	} else {
+		tmp &= 0xff00;
+		tmp |= *data;
+	}
+
+	eeprom_93cx6_write(&ks->eeprom, offset/2, tmp);
+	eeprom_93cx6_wren(&ks->eeprom, false);
+
+	ks8851_eeprom_release(ks);
+	ks8851_unlock(ks, &flags);
+
+	return 0;
+}
+
+static int ks8851_get_eeprom(struct net_device *dev,
+			     struct ethtool_eeprom *ee, u8 *data)
+{
+	struct ks8851_net *ks = netdev_priv(dev);
+	int offset = ee->offset;
+	unsigned long flags;
+	int len = ee->len;
+
+	/* must be 2 byte aligned */
+	if (len & 1 || offset & 1)
+		return -EINVAL;
+
+	if (!(ks->rc_ccr & CCR_EEPROM))
+		return -ENOENT;
+
+	ks8851_lock(ks, &flags);
+
+	ks8851_eeprom_claim(ks);
+
+	ee->magic = KS_EEPROM_MAGIC;
+
+	eeprom_93cx6_multiread(&ks->eeprom, offset/2, (__le16 *)data, len/2);
+	ks8851_eeprom_release(ks);
+	ks8851_unlock(ks, &flags);
+
+	return 0;
+}
+
+static int ks8851_get_eeprom_len(struct net_device *dev)
+{
+	struct ks8851_net *ks = netdev_priv(dev);
+
+	/* currently, we assume it is an 93C46 attached, so return 128 */
+	return ks->rc_ccr & CCR_EEPROM ? 128 : 0;
+}
+
+static const struct ethtool_ops ks8851_ethtool_ops = {
+	.get_drvinfo	= ks8851_get_drvinfo,
+	.get_msglevel	= ks8851_get_msglevel,
+	.set_msglevel	= ks8851_set_msglevel,
+	.get_link	= ks8851_get_link,
+	.nway_reset	= ks8851_nway_reset,
+	.get_eeprom_len	= ks8851_get_eeprom_len,
+	.get_eeprom	= ks8851_get_eeprom,
+	.set_eeprom	= ks8851_set_eeprom,
+	.get_link_ksettings = ks8851_get_link_ksettings,
+	.set_link_ksettings = ks8851_set_link_ksettings,
+};
+
+/* MII interface controls */
+
+/**
+ * ks8851_phy_reg - convert MII register into a KS8851 register
+ * @reg: MII register number.
+ *
+ * Return the KS8851 register number for the corresponding MII PHY register
+ * if possible. Return zero if the MII register has no direct mapping to the
+ * KS8851 register set.
+ */
+static int ks8851_phy_reg(int reg)
+{
+	switch (reg) {
+	case MII_BMCR:
+		return KS_P1MBCR;
+	case MII_BMSR:
+		return KS_P1MBSR;
+	case MII_PHYSID1:
+		return KS_PHY1ILR;
+	case MII_PHYSID2:
+		return KS_PHY1IHR;
+	case MII_ADVERTISE:
+		return KS_P1ANAR;
+	case MII_LPA:
+		return KS_P1ANLPR;
+	}
+
+	return 0x0;
+}
+
+/**
+ * ks8851_phy_read - MII interface PHY register read.
+ * @dev: The network device the PHY is on.
+ * @phy_addr: Address of PHY (ignored as we only have one)
+ * @reg: The register to read.
+ *
+ * This call reads data from the PHY register specified in @reg. Since the
+ * device does not support all the MII registers, the non-existent values
+ * are always returned as zero.
+ *
+ * We return zero for unsupported registers as the MII code does not check
+ * the value returned for any error status, and simply returns it to the
+ * caller. The mii-tool that the driver was tested with takes any -ve error
+ * as real PHY capabilities, thus displaying incorrect data to the user.
+ */
+static int ks8851_phy_read(struct net_device *dev, int phy_addr, int reg)
+{
+	struct ks8851_net *ks = netdev_priv(dev);
+	unsigned long flags;
+	int ksreg;
+	int result;
+
+	ksreg = ks8851_phy_reg(reg);
+	if (!ksreg)
+		return 0x0;	/* no error return allowed, so use zero */
+
+	ks8851_lock(ks, &flags);
+	result = ks8851_rdreg16(ks, ksreg);
+	ks8851_unlock(ks, &flags);
+
+	return result;
+}
+
+static void ks8851_phy_write(struct net_device *dev,
+			     int phy, int reg, int value)
+{
+	struct ks8851_net *ks = netdev_priv(dev);
+	unsigned long flags;
+	int ksreg;
+
+	ksreg = ks8851_phy_reg(reg);
+	if (ksreg) {
+		ks8851_lock(ks, &flags);
+		ks8851_wrreg16(ks, ksreg, value);
+		ks8851_unlock(ks, &flags);
+	}
+}
+
+/**
+ * ks8851_read_selftest - read the selftest memory info.
+ * @ks: The device state
+ *
+ * Read and check the TX/RX memory selftest information.
+ */
+static int ks8851_read_selftest(struct ks8851_net *ks)
+{
+	unsigned both_done = MBIR_TXMBF | MBIR_RXMBF;
+	int ret = 0;
+	unsigned rd;
+
+	rd = ks8851_rdreg16(ks, KS_MBIR);
+
+	if ((rd & both_done) != both_done) {
+		netdev_warn(ks->netdev, "Memory selftest not finished\n");
+		return 0;
+	}
+
+	if (rd & MBIR_TXMBFA) {
+		netdev_err(ks->netdev, "TX memory selftest fail\n");
+		ret |= 1;
+	}
+
+	if (rd & MBIR_RXMBFA) {
+		netdev_err(ks->netdev, "RX memory selftest fail\n");
+		ret |= 2;
+	}
+
+	return 0;
+}
+
+/* driver bus management functions */
+
+#ifdef CONFIG_PM_SLEEP
+
+int ks8851_suspend(struct device *dev)
+{
+	struct ks8851_net *ks = dev_get_drvdata(dev);
+	struct net_device *netdev = ks->netdev;
+
+	if (netif_running(netdev)) {
+		netif_device_detach(netdev);
+		ks8851_net_stop(netdev);
+	}
+
+	return 0;
+}
+
+int ks8851_resume(struct device *dev)
+{
+	struct ks8851_net *ks = dev_get_drvdata(dev);
+	struct net_device *netdev = ks->netdev;
+
+	if (netif_running(netdev)) {
+		ks8851_net_open(netdev);
+		netif_device_attach(netdev);
+	}
+
+	return 0;
+}
+#endif
+
+int ks8851_probe_common(struct net_device *netdev, struct device *dev,
+			int msg_en)
+{
+	struct ks8851_net *ks = netdev_priv(netdev);
+	unsigned cider;
+	int gpio;
+	int ret;
+
+	ks->netdev = netdev;
+	ks->tx_space = 6144;
+
+	gpio = of_get_named_gpio_flags(dev->of_node, "reset-gpios", 0, NULL);
+	if (gpio == -EPROBE_DEFER)
+		return gpio;
+
+	ks->gpio = gpio;
+	if (gpio_is_valid(gpio)) {
+		ret = devm_gpio_request_one(dev, gpio,
+					    GPIOF_OUT_INIT_LOW, "ks8851_rst_n");
+		if (ret) {
+			dev_err(dev, "reset gpio request failed\n");
+			return ret;
+		}
+	}
+
+	ks->vdd_io = devm_regulator_get(dev, "vdd-io");
+	if (IS_ERR(ks->vdd_io)) {
+		ret = PTR_ERR(ks->vdd_io);
+		goto err_reg_io;
+	}
+
+	ret = regulator_enable(ks->vdd_io);
+	if (ret) {
+		dev_err(dev, "regulator vdd_io enable fail: %d\n", ret);
+		goto err_reg_io;
+	}
+
+	ks->vdd_reg = devm_regulator_get(dev, "vdd");
+	if (IS_ERR(ks->vdd_reg)) {
+		ret = PTR_ERR(ks->vdd_reg);
+		goto err_reg;
+	}
+
+	ret = regulator_enable(ks->vdd_reg);
+	if (ret) {
+		dev_err(dev, "regulator vdd enable fail: %d\n", ret);
+		goto err_reg;
+	}
+
+	if (gpio_is_valid(gpio)) {
+		usleep_range(10000, 11000);
+		gpio_set_value(gpio, 1);
+	}
+
+	spin_lock_init(&ks->statelock);
+
+	INIT_WORK(&ks->rxctrl_work, ks8851_rxctrl_work);
+
+	/* setup EEPROM state */
+	ks->eeprom.data = ks;
+	ks->eeprom.width = PCI_EEPROM_WIDTH_93C46;
+	ks->eeprom.register_read = ks8851_eeprom_regread;
+	ks->eeprom.register_write = ks8851_eeprom_regwrite;
+
+	/* setup mii state */
+	ks->mii.dev		= netdev;
+	ks->mii.phy_id		= 1,
+	ks->mii.phy_id_mask	= 1;
+	ks->mii.reg_num_mask	= 0xf;
+	ks->mii.mdio_read	= ks8851_phy_read;
+	ks->mii.mdio_write	= ks8851_phy_write;
+
+	dev_info(dev, "message enable is %d\n", msg_en);
+
+	/* set the default message enable */
+	ks->msg_enable = netif_msg_init(msg_en, NETIF_MSG_DRV |
+						NETIF_MSG_PROBE |
+						NETIF_MSG_LINK);
+
+	skb_queue_head_init(&ks->txq);
+
+	netdev->ethtool_ops = &ks8851_ethtool_ops;
+	SET_NETDEV_DEV(netdev, dev);
+
+	dev_set_drvdata(dev, ks);
+
+	netif_carrier_off(ks->netdev);
+	netdev->if_port = IF_PORT_100BASET;
+	netdev->netdev_ops = &ks8851_netdev_ops;
+
+	/* issue a global soft reset to reset the device. */
+	ks8851_soft_reset(ks, GRR_GSR);
+
+	/* simple check for a valid chip being connected to the bus */
+	cider = ks8851_rdreg16(ks, KS_CIDER);
+	if ((cider & ~CIDER_REV_MASK) != CIDER_ID) {
+		dev_err(dev, "failed to read device ID\n");
+		ret = -ENODEV;
+		goto err_id;
+	}
+
+	/* cache the contents of the CCR register for EEPROM, etc. */
+	ks->rc_ccr = ks8851_rdreg16(ks, KS_CCR);
+
+	ks8851_read_selftest(ks);
+	ks8851_init_mac(ks, dev->of_node);
+
+	ret = register_netdev(netdev);
+	if (ret) {
+		dev_err(dev, "failed to register network device\n");
+		goto err_netdev;
+	}
+
+	netdev_info(netdev, "revision %d, MAC %pM, IRQ %d, %s EEPROM\n",
+		    CIDER_REV_GET(cider), netdev->dev_addr, netdev->irq,
+		    ks->rc_ccr & CCR_EEPROM ? "has" : "no");
+
+	return 0;
+
+err_netdev:
+err_id:
+	if (gpio_is_valid(gpio))
+		gpio_set_value(gpio, 0);
+	regulator_disable(ks->vdd_reg);
+err_reg:
+	regulator_disable(ks->vdd_io);
+err_reg_io:
+	return ret;
+}
+
+int ks8851_remove_common(struct device *dev)
+{
+	struct ks8851_net *priv = dev_get_drvdata(dev);
+
+	if (netif_msg_drv(priv))
+		dev_info(dev, "remove\n");
+
+	unregister_netdev(priv->netdev);
+	if (gpio_is_valid(priv->gpio))
+		gpio_set_value(priv->gpio, 0);
+	regulator_disable(priv->vdd_reg);
+	regulator_disable(priv->vdd_io);
+
+	return 0;
+}
diff --git a/drivers/net/ethernet/micrel/ks8851_spi.c b/drivers/net/ethernet/micrel/ks8851_spi.c
new file mode 100644
index 000000000000..4ec7f1615977
--- /dev/null
+++ b/drivers/net/ethernet/micrel/ks8851_spi.c
@@ -0,0 +1,485 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* drivers/net/ethernet/micrel/ks8851.c
+ *
+ * Copyright 2009 Simtec Electronics
+ *	http://www.simtec.co.uk/
+ *	Ben Dooks <ben@simtec.co.uk>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#define DEBUG
+
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/cache.h>
+#include <linux/crc32.h>
+#include <linux/mii.h>
+#include <linux/regulator/consumer.h>
+
+#include <linux/spi/spi.h>
+#include <linux/gpio.h>
+#include <linux/of_gpio.h>
+#include <linux/of_net.h>
+
+#include "ks8851.h"
+
+static int msg_enable;
+
+/**
+ * struct ks8851_net_spi - KS8851 SPI driver private data
+ * @lock: Lock to ensure that the device is not accessed when busy.
+ * @tx_work: Work queue for tx packets
+ * @ks8851: KS8851 driver common private data
+ * @spidev: The spi device we're bound to.
+ * @spi_msg1: pre-setup SPI transfer with one message, @spi_xfer1.
+ * @spi_msg2: pre-setup SPI transfer with two messages, @spi_xfer2.
+ * @spi_xfer1: @spi_msg1 SPI transfer structure
+ * @spi_xfer2: @spi_msg2 SPI transfer structure
+ *
+ * The @lock ensures that the chip is protected when certain operations are
+ * in progress. When the read or write packet transfer is in progress, most
+ * of the chip registers are not ccessible until the transfer is finished and
+ * the DMA has been de-asserted.
+ */
+struct ks8851_net_spi {
+	struct ks8851_net	ks8851;
+	struct mutex		lock;
+	struct work_struct	tx_work;
+	struct spi_device	*spidev;
+	struct spi_message	spi_msg1;
+	struct spi_message	spi_msg2;
+	struct spi_transfer	spi_xfer1;
+	struct spi_transfer	spi_xfer2[2];
+};
+
+#define to_ks8851_spi(ks) container_of((ks), struct ks8851_net_spi, ks8851)
+
+/* SPI frame opcodes */
+#define KS_SPIOP_RD	0x00
+#define KS_SPIOP_WR	0x40
+#define KS_SPIOP_RXFIFO	0x80
+#define KS_SPIOP_TXFIFO	0xC0
+
+/* shift for byte-enable data */
+#define BYTE_EN(_x)	((_x) << 2)
+
+/* turn register number and byte-enable mask into data for start of packet */
+#define MK_OP(_byteen, _reg)	\
+	(BYTE_EN(_byteen) | (_reg) << (8 + 2) | (_reg) >> 6)
+
+/**
+ * ks8851_lock_spi - register access lock
+ * @ks: The chip state
+ * @flags: Spinlock flags
+ *
+ * Claim chip register access lock
+ */
+static void ks8851_lock_spi(struct ks8851_net *ks, unsigned long *flags)
+{
+	struct ks8851_net_spi *kss = to_ks8851_spi(ks);
+
+	mutex_lock(&kss->lock);
+}
+
+/**
+ * ks8851_unlock_spi - register access unlock
+ * @ks: The chip state
+ * @flags: Spinlock flags
+ *
+ * Release chip register access lock
+ */
+static void ks8851_unlock_spi(struct ks8851_net *ks, unsigned long *flags)
+{
+	struct ks8851_net_spi *kss = to_ks8851_spi(ks);
+
+	mutex_unlock(&kss->lock);
+}
+
+/* SPI register read/write calls.
+ *
+ * All these calls issue SPI transactions to access the chip's registers. They
+ * all require that the necessary lock is held to prevent accesses when the
+ * chip is busy transferring packet data (RX/TX FIFO accesses).
+ */
+
+/**
+ * ks8851_wrreg16_spi - write 16bit register value to chip via SPI
+ * @ks: The chip state
+ * @reg: The register address
+ * @val: The value to write
+ *
+ * Issue a write to put the value @val into the register specified in @reg.
+ */
+static void ks8851_wrreg16_spi(struct ks8851_net *ks, unsigned int reg,
+			       unsigned int val)
+{
+	struct ks8851_net_spi *kss = to_ks8851_spi(ks);
+	struct spi_transfer *xfer = &kss->spi_xfer1;
+	struct spi_message *msg = &kss->spi_msg1;
+	__le16 txb[2];
+	int ret;
+
+	txb[0] = cpu_to_le16(MK_OP(reg & 2 ? 0xC : 0x03, reg) | KS_SPIOP_WR);
+	txb[1] = cpu_to_le16(val);
+
+	xfer->tx_buf = txb;
+	xfer->rx_buf = NULL;
+	xfer->len = 4;
+
+	ret = spi_sync(kss->spidev, msg);
+	if (ret < 0)
+		netdev_err(ks->netdev, "spi_sync() failed\n");
+}
+
+/**
+ * ks8851_rdreg - issue read register command and return the data
+ * @ks: The device state
+ * @op: The register address and byte enables in message format.
+ * @rxb: The RX buffer to return the result into
+ * @rxl: The length of data expected.
+ *
+ * This is the low level read call that issues the necessary spi message(s)
+ * to read data from the register specified in @op.
+ */
+static void ks8851_rdreg(struct ks8851_net *ks, unsigned int op,
+			 u8 *rxb, unsigned int rxl)
+{
+	struct ks8851_net_spi *kss = to_ks8851_spi(ks);
+	struct spi_transfer *xfer;
+	struct spi_message *msg;
+	__le16 *txb = (__le16 *)ks->txd;
+	u8 *trx = ks->rxd;
+	int ret;
+
+	txb[0] = cpu_to_le16(op | KS_SPIOP_RD);
+
+	if (kss->spidev->master->flags & SPI_MASTER_HALF_DUPLEX) {
+		msg = &kss->spi_msg2;
+		xfer = kss->spi_xfer2;
+
+		xfer->tx_buf = txb;
+		xfer->rx_buf = NULL;
+		xfer->len = 2;
+
+		xfer++;
+		xfer->tx_buf = NULL;
+		xfer->rx_buf = trx;
+		xfer->len = rxl;
+	} else {
+		msg = &kss->spi_msg1;
+		xfer = &kss->spi_xfer1;
+
+		xfer->tx_buf = txb;
+		xfer->rx_buf = trx;
+		xfer->len = rxl + 2;
+	}
+
+	ret = spi_sync(kss->spidev, msg);
+	if (ret < 0)
+		netdev_err(ks->netdev, "read: spi_sync() failed\n");
+	else if (kss->spidev->master->flags & SPI_MASTER_HALF_DUPLEX)
+		memcpy(rxb, trx, rxl);
+	else
+		memcpy(rxb, trx + 2, rxl);
+}
+
+/**
+ * ks8851_rdreg16_spi - read 16 bit register from device via SPI
+ * @ks: The chip information
+ * @reg: The register address
+ *
+ * Read a 16bit register from the chip, returning the result
+ */
+static unsigned int ks8851_rdreg16_spi(struct ks8851_net *ks, unsigned int reg)
+{
+	__le16 rx = 0;
+
+	ks8851_rdreg(ks, MK_OP(reg & 2 ? 0xC : 0x3, reg), (u8 *)&rx, 2);
+	return le16_to_cpu(rx);
+}
+
+/**
+ * ks8851_rdfifo_spi - read data from the receive fifo via SPI
+ * @ks: The device state.
+ * @buff: The buffer address
+ * @len: The length of the data to read
+ *
+ * Issue an RXQ FIFO read command and read the @len amount of data from
+ * the FIFO into the buffer specified by @buff.
+ */
+static void ks8851_rdfifo_spi(struct ks8851_net *ks, u8 *buff, unsigned int len)
+{
+	struct ks8851_net_spi *kss = to_ks8851_spi(ks);
+	struct spi_transfer *xfer = kss->spi_xfer2;
+	struct spi_message *msg = &kss->spi_msg2;
+	u8 txb[1];
+	int ret;
+
+	netif_dbg(ks, rx_status, ks->netdev,
+		  "%s: %d@%p\n", __func__, len, buff);
+
+	/* set the operation we're issuing */
+	txb[0] = KS_SPIOP_RXFIFO;
+
+	xfer->tx_buf = txb;
+	xfer->rx_buf = NULL;
+	xfer->len = 1;
+
+	xfer++;
+	xfer->rx_buf = buff;
+	xfer->tx_buf = NULL;
+	xfer->len = len;
+
+	ret = spi_sync(kss->spidev, msg);
+	if (ret < 0)
+		netdev_err(ks->netdev, "%s: spi_sync() failed\n", __func__);
+}
+
+/**
+ * ks8851_wrfifo_spi - write packet to TX FIFO via SPI
+ * @ks: The device state.
+ * @txp: The sk_buff to transmit.
+ * @irq: IRQ on completion of the packet.
+ *
+ * Send the @txp to the chip. This means creating the relevant packet header
+ * specifying the length of the packet and the other information the chip
+ * needs, such as IRQ on completion. Send the header and the packet data to
+ * the device.
+ */
+static void ks8851_wrfifo_spi(struct ks8851_net *ks, struct sk_buff *txp,
+			      bool irq)
+{
+	struct ks8851_net_spi *kss = to_ks8851_spi(ks);
+	struct spi_transfer *xfer = kss->spi_xfer2;
+	struct spi_message *msg = &kss->spi_msg2;
+	unsigned int fid = 0;
+	int ret;
+
+	netif_dbg(ks, tx_queued, ks->netdev, "%s: skb %p, %d@%p, irq %d\n",
+		  __func__, txp, txp->len, txp->data, irq);
+
+	fid = ks->fid++;
+	fid &= TXFR_TXFID_MASK;
+
+	if (irq)
+		fid |= TXFR_TXIC;	/* irq on completion */
+
+	/* start header at txb[1] to align txw entries */
+	ks->txh.txb[1] = KS_SPIOP_TXFIFO;
+	ks->txh.txw[1] = cpu_to_le16(fid);
+	ks->txh.txw[2] = cpu_to_le16(txp->len);
+
+	xfer->tx_buf = &ks->txh.txb[1];
+	xfer->rx_buf = NULL;
+	xfer->len = 5;
+
+	xfer++;
+	xfer->tx_buf = txp->data;
+	xfer->rx_buf = NULL;
+	xfer->len = ALIGN(txp->len, 4);
+
+	ret = spi_sync(kss->spidev, msg);
+	if (ret < 0)
+		netdev_err(ks->netdev, "%s: spi_sync() failed\n", __func__);
+}
+
+/**
+ * ks8851_rx_skb_spi - receive skbuff
+ * @ks: The device state
+ * @skb: The skbuff
+ */
+static void ks8851_rx_skb_spi(struct ks8851_net *ks, struct sk_buff *skb)
+{
+	netif_rx_ni(skb);
+}
+
+/**
+ * ks8851_tx_work - process tx packet(s)
+ * @work: The work strucutre what was scheduled.
+ *
+ * This is called when a number of packets have been scheduled for
+ * transmission and need to be sent to the device.
+ */
+static void ks8851_tx_work(struct work_struct *work)
+{
+	struct ks8851_net_spi *kss;
+	struct ks8851_net *ks;
+	unsigned long flags;
+	struct sk_buff *txb;
+	bool last;
+
+	kss = container_of(work, struct ks8851_net_spi, tx_work);
+	ks = &kss->ks8851;
+	last = skb_queue_empty(&ks->txq);
+
+	ks8851_lock_spi(ks, &flags);
+
+	while (!last) {
+		txb = skb_dequeue(&ks->txq);
+		last = skb_queue_empty(&ks->txq);
+
+		if (txb) {
+			ks8851_wrreg16_spi(ks, KS_RXQCR,
+					   ks->rc_rxqcr | RXQCR_SDA);
+			ks8851_wrfifo_spi(ks, txb, last);
+			ks8851_wrreg16_spi(ks, KS_RXQCR, ks->rc_rxqcr);
+			ks8851_wrreg16_spi(ks, KS_TXQCR, TXQCR_METFE);
+
+			ks8851_done_tx(ks, txb);
+		}
+	}
+
+	ks8851_unlock_spi(ks, &flags);
+}
+
+/**
+ * ks8851_flush_tx_work_spi - flush outstanding TX work
+ * @ks: The device state
+ */
+static void ks8851_flush_tx_work_spi(struct ks8851_net *ks)
+{
+	struct ks8851_net_spi *kss = to_ks8851_spi(ks);
+
+	flush_work(&kss->tx_work);
+}
+
+/**
+ * calc_txlen - calculate size of message to send packet
+ * @len: Length of data
+ *
+ * Returns the size of the TXFIFO message needed to send
+ * this packet.
+ */
+static unsigned int calc_txlen(unsigned int len)
+{
+	return ALIGN(len + 4, 4);
+}
+
+/**
+ * ks8851_start_xmit_spi - transmit packet using SPI
+ * @skb: The buffer to transmit
+ * @dev: The device used to transmit the packet.
+ *
+ * Called by the network layer to transmit the @skb. Queue the packet for
+ * the device and schedule the necessary work to transmit the packet when
+ * it is free.
+ *
+ * We do this to firstly avoid sleeping with the network device locked,
+ * and secondly so we can round up more than one packet to transmit which
+ * means we can try and avoid generating too many transmit done interrupts.
+ */
+static netdev_tx_t ks8851_start_xmit_spi(struct sk_buff *skb,
+					 struct net_device *dev)
+{
+	unsigned int needed = calc_txlen(skb->len);
+	struct ks8851_net *ks = netdev_priv(dev);
+	netdev_tx_t ret = NETDEV_TX_OK;
+	struct ks8851_net_spi *kss;
+
+	kss = to_ks8851_spi(ks);
+
+	netif_dbg(ks, tx_queued, ks->netdev,
+		  "%s: skb %p, %d@%p\n", __func__, skb, skb->len, skb->data);
+
+	spin_lock(&ks->statelock);
+
+	if (needed > ks->tx_space) {
+		netif_stop_queue(dev);
+		ret = NETDEV_TX_BUSY;
+	} else {
+		ks->tx_space -= needed;
+		skb_queue_tail(&ks->txq, skb);
+	}
+
+	spin_unlock(&ks->statelock);
+	schedule_work(&kss->tx_work);
+
+	return ret;
+}
+
+static int ks8851_probe_spi(struct spi_device *spi)
+{
+	struct device *dev = &spi->dev;
+	struct ks8851_net_spi *kss;
+	struct net_device *netdev;
+	struct ks8851_net *ks;
+
+	netdev = devm_alloc_etherdev(dev, sizeof(struct ks8851_net_spi));
+	if (!netdev)
+		return -ENOMEM;
+
+	spi->bits_per_word = 8;
+
+	ks = netdev_priv(netdev);
+
+	ks->lock = ks8851_lock_spi;
+	ks->unlock = ks8851_unlock_spi;
+	ks->rdreg16 = ks8851_rdreg16_spi;
+	ks->wrreg16 = ks8851_wrreg16_spi;
+	ks->rdfifo = ks8851_rdfifo_spi;
+	ks->wrfifo = ks8851_wrfifo_spi;
+	ks->start_xmit = ks8851_start_xmit_spi;
+	ks->rx_skb = ks8851_rx_skb_spi;
+	ks->flush_tx_work = ks8851_flush_tx_work_spi;
+
+#define STD_IRQ (IRQ_LCI |	/* Link Change */	\
+		 IRQ_TXI |	/* TX done */		\
+		 IRQ_RXI |	/* RX done */		\
+		 IRQ_SPIBEI |	/* SPI bus error */	\
+		 IRQ_TXPSI |	/* TX process stop */	\
+		 IRQ_RXPSI)	/* RX process stop */
+	ks->rc_ier = STD_IRQ;
+
+	kss = to_ks8851_spi(ks);
+
+	kss->spidev = spi;
+	mutex_init(&kss->lock);
+	INIT_WORK(&kss->tx_work, ks8851_tx_work);
+
+	/* initialise pre-made spi transfer messages */
+	spi_message_init(&kss->spi_msg1);
+	spi_message_add_tail(&kss->spi_xfer1, &kss->spi_msg1);
+
+	spi_message_init(&kss->spi_msg2);
+	spi_message_add_tail(&kss->spi_xfer2[0], &kss->spi_msg2);
+	spi_message_add_tail(&kss->spi_xfer2[1], &kss->spi_msg2);
+
+	netdev->irq = spi->irq;
+
+	return ks8851_probe_common(netdev, dev, msg_enable);
+}
+
+static int ks8851_remove_spi(struct spi_device *spi)
+{
+	return ks8851_remove_common(&spi->dev);
+}
+
+static const struct of_device_id ks8851_match_table[] = {
+	{ .compatible = "micrel,ks8851" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, ks8851_match_table);
+
+static struct spi_driver ks8851_driver = {
+	.driver = {
+		.name = "ks8851",
+		.of_match_table = ks8851_match_table,
+		.pm = &ks8851_pm_ops,
+	},
+	.probe = ks8851_probe_spi,
+	.remove = ks8851_remove_spi,
+};
+module_spi_driver(ks8851_driver);
+
+MODULE_DESCRIPTION("KS8851 Network driver");
+MODULE_AUTHOR("Ben Dooks <ben@simtec.co.uk>");
+MODULE_LICENSE("GPL");
+
+module_param_named(message, msg_enable, int, 0);
+MODULE_PARM_DESC(message, "Message verbosity level (0=none, 31=all)");
+MODULE_ALIAS("spi:ks8851");
-- 
cgit v1.2.3-59-g8ed1b


From 797047f875b5463719cc70ba213eb691d453c946 Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Fri, 29 May 2020 00:21:45 +0200
Subject: net: ks8851: Implement Parallel bus operations

Implement accessors for KS8851-16MLL/MLLI/MLLU parallel bus variant of
the KS8851. This is based off the ks8851_mll.c , which is a driver for
exactly the same hardware, however the ks8851.c code is much higher
quality. Hence, this patch pulls out the relevant information from the
ks8851_mll.c on how to access the bus, but uses the common ks8851.c
code. To make this patch reviewable, instead of rewriting ks8851_mll.c,
ks8851_mll.c is removed in a separate subsequent patch.

Signed-off-by: Marek Vasut <marex@denx.de>
Cc: David S. Miller <davem@davemloft.net>
Cc: Lukas Wunner <lukas@wunner.de>
Cc: Petr Stetiar <ynezz@true.cz>
Cc: YueHaibing <yuehaibing@huawei.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/micrel/Kconfig      |   2 +
 drivers/net/ethernet/micrel/Makefile     |   1 +
 drivers/net/ethernet/micrel/ks8851_par.c | 357 +++++++++++++++++++++++++++++++
 3 files changed, 360 insertions(+)
 create mode 100644 drivers/net/ethernet/micrel/ks8851_par.c

diff --git a/drivers/net/ethernet/micrel/Kconfig b/drivers/net/ethernet/micrel/Kconfig
index b9c4d48e28e4..09f35209d43d 100644
--- a/drivers/net/ethernet/micrel/Kconfig
+++ b/drivers/net/ethernet/micrel/Kconfig
@@ -38,6 +38,8 @@ config KS8851_MLL
 	tristate "Micrel KS8851 MLL"
 	depends on HAS_IOMEM
 	select MII
+	select CRC32
+	select EEPROM_93CX6
 	---help---
 	  This platform driver is for Micrel KS8851 Address/data bus
 	  multiplexed network chip.
diff --git a/drivers/net/ethernet/micrel/Makefile b/drivers/net/ethernet/micrel/Makefile
index c7a4725c2e95..5cc00d22c708 100644
--- a/drivers/net/ethernet/micrel/Makefile
+++ b/drivers/net/ethernet/micrel/Makefile
@@ -7,4 +7,5 @@ obj-$(CONFIG_KS8842) += ks8842.o
 obj-$(CONFIG_KS8851) += ks8851.o
 ks8851-objs = ks8851_common.o ks8851_spi.o
 obj-$(CONFIG_KS8851_MLL) += ks8851_mll.o
+ks8851_mll-objs = ks8851_common.o ks8851_par.o
 obj-$(CONFIG_KSZ884X_PCI) += ksz884x.o
diff --git a/drivers/net/ethernet/micrel/ks8851_par.c b/drivers/net/ethernet/micrel/ks8851_par.c
new file mode 100644
index 000000000000..3bab0cb2b1a5
--- /dev/null
+++ b/drivers/net/ethernet/micrel/ks8851_par.c
@@ -0,0 +1,357 @@
+// SPDX-License-Identifier: GPL-2.0-only
+/* drivers/net/ethernet/micrel/ks8851.c
+ *
+ * Copyright 2009 Simtec Electronics
+ *	http://www.simtec.co.uk/
+ *	Ben Dooks <ben@simtec.co.uk>
+ */
+
+#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
+
+#define DEBUG
+
+#include <linux/interrupt.h>
+#include <linux/module.h>
+#include <linux/kernel.h>
+#include <linux/netdevice.h>
+#include <linux/etherdevice.h>
+#include <linux/ethtool.h>
+#include <linux/iopoll.h>
+#include <linux/mii.h>
+
+#include <linux/platform_device.h>
+#include <linux/of_net.h>
+
+#include "ks8851.h"
+
+static int msg_enable;
+
+#define BE3             0x8000      /* Byte Enable 3 */
+#define BE2             0x4000      /* Byte Enable 2 */
+#define BE1             0x2000      /* Byte Enable 1 */
+#define BE0             0x1000      /* Byte Enable 0 */
+
+/**
+ * struct ks8851_net_par - KS8851 Parallel driver private data
+ * @ks8851: KS8851 driver common private data
+ * @lock: Lock to ensure that the device is not accessed when busy.
+ * @hw_addr	: start address of data register.
+ * @hw_addr_cmd	: start address of command register.
+ * @cmd_reg_cache	: command register cached.
+ *
+ * The @lock ensures that the chip is protected when certain operations are
+ * in progress. When the read or write packet transfer is in progress, most
+ * of the chip registers are not accessible until the transfer is finished
+ * and the DMA has been de-asserted.
+ */
+struct ks8851_net_par {
+	struct ks8851_net	ks8851;
+	spinlock_t		lock;
+	void __iomem		*hw_addr;
+	void __iomem		*hw_addr_cmd;
+	u16			cmd_reg_cache;
+};
+
+#define to_ks8851_par(ks) container_of((ks), struct ks8851_net_par, ks8851)
+
+/**
+ * ks8851_lock_par - register access lock
+ * @ks: The chip state
+ * @flags: Spinlock flags
+ *
+ * Claim chip register access lock
+ */
+static void ks8851_lock_par(struct ks8851_net *ks, unsigned long *flags)
+{
+	struct ks8851_net_par *ksp = to_ks8851_par(ks);
+
+	spin_lock_irqsave(&ksp->lock, *flags);
+}
+
+/**
+ * ks8851_unlock_par - register access unlock
+ * @ks: The chip state
+ * @flags: Spinlock flags
+ *
+ * Release chip register access lock
+ */
+static void ks8851_unlock_par(struct ks8851_net *ks, unsigned long *flags)
+{
+	struct ks8851_net_par *ksp = to_ks8851_par(ks);
+
+	spin_unlock_irqrestore(&ksp->lock, *flags);
+}
+
+/**
+ * ks_check_endian - Check whether endianness of the bus is correct
+ * @ks	  : The chip information
+ *
+ * The KS8851-16MLL EESK pin allows selecting the endianness of the 16bit
+ * bus. To maintain optimum performance, the bus endianness should be set
+ * such that it matches the endianness of the CPU.
+ */
+static int ks_check_endian(struct ks8851_net *ks)
+{
+	struct ks8851_net_par *ksp = to_ks8851_par(ks);
+	u16 cider;
+
+	/*
+	 * Read CIDER register first, however read it the "wrong" way around.
+	 * If the endian strap on the KS8851-16MLL in incorrect and the chip
+	 * is operating in different endianness than the CPU, then the meaning
+	 * of BE[3:0] byte-enable bits is also swapped such that:
+	 *    BE[3,2,1,0] becomes BE[1,0,3,2]
+	 *
+	 * Luckily for us, the byte-enable bits are the top four MSbits of
+	 * the address register and the CIDER register is at offset 0xc0.
+	 * Hence, by reading address 0xc0c0, which is not impacted by endian
+	 * swapping, we assert either BE[3:2] or BE[1:0] while reading the
+	 * CIDER register.
+	 *
+	 * If the bus configuration is correct, reading 0xc0c0 asserts
+	 * BE[3:2] and this read returns 0x0000, because to read register
+	 * with bottom two LSbits of address set to 0, BE[1:0] must be
+	 * asserted.
+	 *
+	 * If the bus configuration is NOT correct, reading 0xc0c0 asserts
+	 * BE[1:0] and this read returns non-zero 0x8872 value.
+	 */
+	iowrite16(BE3 | BE2 | KS_CIDER, ksp->hw_addr_cmd);
+	cider = ioread16(ksp->hw_addr);
+	if (!cider)
+		return 0;
+
+	netdev_err(ks->netdev, "incorrect EESK endian strap setting\n");
+
+	return -EINVAL;
+}
+
+/**
+ * ks8851_wrreg16_par - write 16bit register value to chip
+ * @ks: The chip state
+ * @reg: The register address
+ * @val: The value to write
+ *
+ * Issue a write to put the value @val into the register specified in @reg.
+ */
+static void ks8851_wrreg16_par(struct ks8851_net *ks, unsigned int reg,
+			       unsigned int val)
+{
+	struct ks8851_net_par *ksp = to_ks8851_par(ks);
+
+	ksp->cmd_reg_cache = (u16)reg | ((BE1 | BE0) << (reg & 0x02));
+	iowrite16(ksp->cmd_reg_cache, ksp->hw_addr_cmd);
+	iowrite16(val, ksp->hw_addr);
+}
+
+/**
+ * ks8851_rdreg16_par - read 16 bit register from chip
+ * @ks: The chip information
+ * @reg: The register address
+ *
+ * Read a 16bit register from the chip, returning the result
+ */
+static unsigned int ks8851_rdreg16_par(struct ks8851_net *ks, unsigned int reg)
+{
+	struct ks8851_net_par *ksp = to_ks8851_par(ks);
+
+	ksp->cmd_reg_cache = (u16)reg | ((BE1 | BE0) << (reg & 0x02));
+	iowrite16(ksp->cmd_reg_cache, ksp->hw_addr_cmd);
+	return ioread16(ksp->hw_addr);
+}
+
+/**
+ * ks8851_rdfifo_par - read data from the receive fifo
+ * @ks: The device state.
+ * @buff: The buffer address
+ * @len: The length of the data to read
+ *
+ * Issue an RXQ FIFO read command and read the @len amount of data from
+ * the FIFO into the buffer specified by @buff.
+ */
+static void ks8851_rdfifo_par(struct ks8851_net *ks, u8 *buff, unsigned int len)
+{
+	struct ks8851_net_par *ksp = to_ks8851_par(ks);
+
+	netif_dbg(ks, rx_status, ks->netdev,
+		  "%s: %d@%p\n", __func__, len, buff);
+
+	ioread16_rep(ksp->hw_addr, (u16 *)buff + 1, len / 2);
+}
+
+/**
+ * ks8851_wrfifo_par - write packet to TX FIFO
+ * @ks: The device state.
+ * @txp: The sk_buff to transmit.
+ * @irq: IRQ on completion of the packet.
+ *
+ * Send the @txp to the chip. This means creating the relevant packet header
+ * specifying the length of the packet and the other information the chip
+ * needs, such as IRQ on completion. Send the header and the packet data to
+ * the device.
+ */
+static void ks8851_wrfifo_par(struct ks8851_net *ks, struct sk_buff *txp,
+			      bool irq)
+{
+	struct ks8851_net_par *ksp = to_ks8851_par(ks);
+	unsigned int len = ALIGN(txp->len, 4);
+	unsigned int fid = 0;
+
+	netif_dbg(ks, tx_queued, ks->netdev, "%s: skb %p, %d@%p, irq %d\n",
+		  __func__, txp, txp->len, txp->data, irq);
+
+	fid = ks->fid++;
+	fid &= TXFR_TXFID_MASK;
+
+	if (irq)
+		fid |= TXFR_TXIC;	/* irq on completion */
+
+	iowrite16(fid, ksp->hw_addr);
+	iowrite16(txp->len, ksp->hw_addr);
+
+	iowrite16_rep(ksp->hw_addr, txp->data, len / 2);
+}
+
+/**
+ * ks8851_rx_skb_par - receive skbuff
+ * @ks: The device state.
+ * @skb: The skbuff
+ */
+static void ks8851_rx_skb_par(struct ks8851_net *ks, struct sk_buff *skb)
+{
+	netif_rx(skb);
+}
+
+static unsigned int ks8851_rdreg16_par_txqcr(struct ks8851_net *ks)
+{
+	return ks8851_rdreg16_par(ks, KS_TXQCR);
+}
+
+/**
+ * ks8851_start_xmit_par - transmit packet
+ * @skb: The buffer to transmit
+ * @dev: The device used to transmit the packet.
+ *
+ * Called by the network layer to transmit the @skb. Queue the packet for
+ * the device and schedule the necessary work to transmit the packet when
+ * it is free.
+ *
+ * We do this to firstly avoid sleeping with the network device locked,
+ * and secondly so we can round up more than one packet to transmit which
+ * means we can try and avoid generating too many transmit done interrupts.
+ */
+static netdev_tx_t ks8851_start_xmit_par(struct sk_buff *skb,
+					 struct net_device *dev)
+{
+	struct ks8851_net *ks = netdev_priv(dev);
+	netdev_tx_t ret = NETDEV_TX_OK;
+	unsigned long flags;
+	unsigned int txqcr;
+	u16 txmir;
+	int err;
+
+	netif_dbg(ks, tx_queued, ks->netdev,
+		  "%s: skb %p, %d@%p\n", __func__, skb, skb->len, skb->data);
+
+	ks8851_lock_par(ks, &flags);
+
+	txmir = ks8851_rdreg16_par(ks, KS_TXMIR) & 0x1fff;
+
+	if (likely(txmir >= skb->len + 12)) {
+		ks8851_wrreg16_par(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_SDA);
+		ks8851_wrfifo_par(ks, skb, false);
+		ks8851_wrreg16_par(ks, KS_RXQCR, ks->rc_rxqcr);
+		ks8851_wrreg16_par(ks, KS_TXQCR, TXQCR_METFE);
+
+		err = readx_poll_timeout_atomic(ks8851_rdreg16_par_txqcr, ks,
+						txqcr, !(txqcr & TXQCR_METFE),
+						5, 1000000);
+		if (err)
+			ret = NETDEV_TX_BUSY;
+
+		ks8851_done_tx(ks, skb);
+	} else {
+		ret = NETDEV_TX_BUSY;
+	}
+
+	ks8851_unlock_par(ks, &flags);
+
+	return ret;
+}
+
+static int ks8851_probe_par(struct platform_device *pdev)
+{
+	struct device *dev = &pdev->dev;
+	struct ks8851_net_par *ksp;
+	struct net_device *netdev;
+	struct ks8851_net *ks;
+	int ret;
+
+	netdev = devm_alloc_etherdev(dev, sizeof(struct ks8851_net_par));
+	if (!netdev)
+		return -ENOMEM;
+
+	ks = netdev_priv(netdev);
+
+	ks->lock = ks8851_lock_par;
+	ks->unlock = ks8851_unlock_par;
+	ks->rdreg16 = ks8851_rdreg16_par;
+	ks->wrreg16 = ks8851_wrreg16_par;
+	ks->rdfifo = ks8851_rdfifo_par;
+	ks->wrfifo = ks8851_wrfifo_par;
+	ks->start_xmit = ks8851_start_xmit_par;
+	ks->rx_skb = ks8851_rx_skb_par;
+
+#define STD_IRQ (IRQ_LCI |	/* Link Change */	\
+		 IRQ_RXI |	/* RX done */		\
+		 IRQ_RXPSI)	/* RX process stop */
+	ks->rc_ier = STD_IRQ;
+
+	ksp = to_ks8851_par(ks);
+	spin_lock_init(&ksp->lock);
+
+	ksp->hw_addr = devm_platform_ioremap_resource(pdev, 0);
+	if (IS_ERR(ksp->hw_addr))
+		return PTR_ERR(ksp->hw_addr);
+
+	ksp->hw_addr_cmd = devm_platform_ioremap_resource(pdev, 1);
+	if (IS_ERR(ksp->hw_addr_cmd))
+		return PTR_ERR(ksp->hw_addr_cmd);
+
+	ret = ks_check_endian(ks);
+	if (ret)
+		return ret;
+
+	netdev->irq = platform_get_irq(pdev, 0);
+
+	return ks8851_probe_common(netdev, dev, msg_enable);
+}
+
+static int ks8851_remove_par(struct platform_device *pdev)
+{
+	return ks8851_remove_common(&pdev->dev);
+}
+
+static const struct of_device_id ks8851_match_table[] = {
+	{ .compatible = "micrel,ks8851-mll" },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, ks8851_match_table);
+
+static struct platform_driver ks8851_driver = {
+	.driver = {
+		.name = "ks8851",
+		.of_match_table = ks8851_match_table,
+		.pm = &ks8851_pm_ops,
+	},
+	.probe = ks8851_probe_par,
+	.remove = ks8851_remove_par,
+};
+module_platform_driver(ks8851_driver);
+
+MODULE_DESCRIPTION("KS8851 Network driver");
+MODULE_AUTHOR("Ben Dooks <ben@simtec.co.uk>");
+MODULE_LICENSE("GPL");
+
+module_param_named(message, msg_enable, int, 0);
+MODULE_PARM_DESC(message, "Message verbosity level (0=none, 31=all)");
-- 
cgit v1.2.3-59-g8ed1b


From 72628da6d63413299a6369e71f5c97f0edea6a8b Mon Sep 17 00:00:00 2001
From: Marek Vasut <marex@denx.de>
Date: Fri, 29 May 2020 00:21:46 +0200
Subject: net: ks8851: Remove ks8851_mll.c

The ks8851_mll.c is replaced by ks8851_par.c, which is using common code
from ks8851.c, just like ks8851_spi.c . Remove this old ad-hoc driver.

Signed-off-by: Marek Vasut <marex@denx.de>
Cc: David S. Miller <davem@davemloft.net>
Cc: Lukas Wunner <lukas@wunner.de>
Cc: Petr Stetiar <ynezz@true.cz>
Cc: YueHaibing <yuehaibing@huawei.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/micrel/ks8851_mll.c | 1393 ------------------------------
 1 file changed, 1393 deletions(-)
 delete mode 100644 drivers/net/ethernet/micrel/ks8851_mll.c

diff --git a/drivers/net/ethernet/micrel/ks8851_mll.c b/drivers/net/ethernet/micrel/ks8851_mll.c
deleted file mode 100644
index 45cc840d8e2e..000000000000
--- a/drivers/net/ethernet/micrel/ks8851_mll.c
+++ /dev/null
@@ -1,1393 +0,0 @@
-// SPDX-License-Identifier: GPL-2.0-only
-/**
- * drivers/net/ethernet/micrel/ks8851_mll.c
- * Copyright (c) 2009 Micrel Inc.
- */
-
-/* Supports:
- * KS8851 16bit MLL chip from Micrel Inc.
- */
-
-#define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
-
-#include <linux/interrupt.h>
-#include <linux/module.h>
-#include <linux/kernel.h>
-#include <linux/netdevice.h>
-#include <linux/etherdevice.h>
-#include <linux/ethtool.h>
-#include <linux/cache.h>
-#include <linux/crc32.h>
-#include <linux/crc32poly.h>
-#include <linux/mii.h>
-#include <linux/platform_device.h>
-#include <linux/delay.h>
-#include <linux/slab.h>
-#include <linux/ks8851_mll.h>
-#include <linux/of.h>
-#include <linux/of_device.h>
-#include <linux/of_net.h>
-
-#include "ks8851.h"
-
-#define	DRV_NAME	"ks8851_mll"
-
-static u8 KS_DEFAULT_MAC_ADDRESS[] = { 0x00, 0x10, 0xA1, 0x86, 0x95, 0x11 };
-#define MAX_RECV_FRAMES			255
-#define MAX_BUF_SIZE			2048
-#define TX_BUF_SIZE			2000
-#define RX_BUF_SIZE			2000
-
-#define RXCR1_FILTER_MASK    		(RXCR1_RXINVF | RXCR1_RXAE | \
-					 RXCR1_RXMAFMA | RXCR1_RXPAFMA)
-#define RXQCR_CMD_CNTL                	(RXQCR_RXFCTE|RXQCR_ADRFE)
-
-#define	ENUM_BUS_NONE			0
-#define	ENUM_BUS_8BIT			1
-#define	ENUM_BUS_16BIT			2
-#define	ENUM_BUS_32BIT			3
-
-#define MAX_MCAST_LST			32
-#define HW_MCAST_SIZE			8
-
-/**
- * union ks_tx_hdr - tx header data
- * @txb: The header as bytes
- * @txw: The header as 16bit, little-endian words
- *
- * A dual representation of the tx header data to allow
- * access to individual bytes, and to allow 16bit accesses
- * with 16bit alignment.
- */
-union ks_tx_hdr {
-	u8      txb[4];
-	__le16  txw[2];
-};
-
-/**
- * struct ks_net - KS8851 driver private data
- * @net_device 	: The network device we're bound to
- * @hw_addr	: start address of data register.
- * @hw_addr_cmd	: start address of command register.
- * @txh    	: temporaly buffer to save status/length.
- * @lock	: Lock to ensure that the device is not accessed when busy.
- * @pdev	: Pointer to platform device.
- * @mii		: The MII state information for the mii calls.
- * @frame_head_info   	: frame header information for multi-pkt rx.
- * @statelock	: Lock on this structure for tx list.
- * @msg_enable	: The message flags controlling driver output (see ethtool).
- * @frame_cnt  	: number of frames received.
- * @bus_width  	: i/o bus width.
- * @rc_rxqcr	: Cached copy of KS_RXQCR.
- * @rc_txcr	: Cached copy of KS_TXCR.
- * @rc_ier	: Cached copy of KS_IER.
- * @sharedbus  	: Multipex(addr and data bus) mode indicator.
- * @cmd_reg_cache	: command register cached.
- * @cmd_reg_cache_int	: command register cached. Used in the irq handler.
- * @promiscuous	: promiscuous mode indicator.
- * @all_mcast  	: mutlicast indicator.
- * @mcast_lst_size   	: size of multicast list.
- * @mcast_lst    	: multicast list.
- * @mcast_bits    	: multicast enabed.
- * @mac_addr   		: MAC address assigned to this device.
- * @fid    		: frame id.
- * @extra_byte    	: number of extra byte prepended rx pkt.
- * @enabled    		: indicator this device works.
- *
- * The @lock ensures that the chip is protected when certain operations are
- * in progress. When the read or write packet transfer is in progress, most
- * of the chip registers are not accessible until the transfer is finished and
- * the DMA has been de-asserted.
- *
- * The @statelock is used to protect information in the structure which may
- * need to be accessed via several sources, such as the network driver layer
- * or one of the work queues.
- *
- */
-
-/* Receive multiplex framer header info */
-struct type_frame_head {
-	u16	sts;         /* Frame status */
-	u16	len;         /* Byte count */
-};
-
-struct ks_net {
-	struct net_device	*netdev;
-	void __iomem    	*hw_addr;
-	void __iomem    	*hw_addr_cmd;
-	union ks_tx_hdr		txh ____cacheline_aligned;
-	struct mutex      	lock; /* spinlock to be interrupt safe */
-	struct platform_device *pdev;
-	struct mii_if_info	mii;
-	struct type_frame_head	*frame_head_info;
-	spinlock_t		statelock;
-	u32			msg_enable;
-	u32			frame_cnt;
-	int			bus_width;
-
-	u16			rc_rxqcr;
-	u16			rc_txcr;
-	u16			rc_ier;
-	u16			sharedbus;
-	u16			cmd_reg_cache;
-	u16			cmd_reg_cache_int;
-	u16			promiscuous;
-	u16			all_mcast;
-	u16			mcast_lst_size;
-	u8			mcast_lst[MAX_MCAST_LST][ETH_ALEN];
-	u8			mcast_bits[HW_MCAST_SIZE];
-	u8			mac_addr[6];
-	u8                      fid;
-	u8			extra_byte;
-	u8			enabled;
-};
-
-static int msg_enable;
-
-#define BE3             0x8000      /* Byte Enable 3 */
-#define BE2             0x4000      /* Byte Enable 2 */
-#define BE1             0x2000      /* Byte Enable 1 */
-#define BE0             0x1000      /* Byte Enable 0 */
-
-/* register read/write calls.
- *
- * All these calls issue transactions to access the chip's registers. They
- * all require that the necessary lock is held to prevent accesses when the
- * chip is busy transferring packet data (RX/TX FIFO accesses).
- */
-
-/**
- * ks_check_endian - Check whether endianness of the bus is correct
- * @ks	  : The chip information
- *
- * The KS8851-16MLL EESK pin allows selecting the endianness of the 16bit
- * bus. To maintain optimum performance, the bus endianness should be set
- * such that it matches the endianness of the CPU.
- */
-
-static int ks_check_endian(struct ks_net *ks)
-{
-	u16 cider;
-
-	/*
-	 * Read CIDER register first, however read it the "wrong" way around.
-	 * If the endian strap on the KS8851-16MLL in incorrect and the chip
-	 * is operating in different endianness than the CPU, then the meaning
-	 * of BE[3:0] byte-enable bits is also swapped such that:
-	 *    BE[3,2,1,0] becomes BE[1,0,3,2]
-	 *
-	 * Luckily for us, the byte-enable bits are the top four MSbits of
-	 * the address register and the CIDER register is at offset 0xc0.
-	 * Hence, by reading address 0xc0c0, which is not impacted by endian
-	 * swapping, we assert either BE[3:2] or BE[1:0] while reading the
-	 * CIDER register.
-	 *
-	 * If the bus configuration is correct, reading 0xc0c0 asserts
-	 * BE[3:2] and this read returns 0x0000, because to read register
-	 * with bottom two LSbits of address set to 0, BE[1:0] must be
-	 * asserted.
-	 *
-	 * If the bus configuration is NOT correct, reading 0xc0c0 asserts
-	 * BE[1:0] and this read returns non-zero 0x8872 value.
-	 */
-	iowrite16(BE3 | BE2 | KS_CIDER, ks->hw_addr_cmd);
-	cider = ioread16(ks->hw_addr);
-	if (!cider)
-		return 0;
-
-	netdev_err(ks->netdev, "incorrect EESK endian strap setting\n");
-
-	return -EINVAL;
-}
-
-/**
- * ks_rdreg16 - read 16 bit register from device
- * @ks	  : The chip information
- * @offset: The register address
- *
- * Read a 16bit register from the chip, returning the result
- */
-
-static u16 ks_rdreg16(struct ks_net *ks, int offset)
-{
-	ks->cmd_reg_cache = (u16)offset | ((BE1 | BE0) << (offset & 0x02));
-	iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd);
-	return ioread16(ks->hw_addr);
-}
-
-/**
- * ks_wrreg16 - write 16bit register value to chip
- * @ks: The chip information
- * @offset: The register address
- * @value: The value to write
- *
- */
-
-static void ks_wrreg16(struct ks_net *ks, int offset, u16 value)
-{
-	ks->cmd_reg_cache = (u16)offset | ((BE1 | BE0) << (offset & 0x02));
-	iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd);
-	iowrite16(value, ks->hw_addr);
-}
-
-/**
- * ks_inblk - read a block of data from QMU. This is called after sudo DMA mode enabled.
- * @ks: The chip state
- * @wptr: buffer address to save data
- * @len: length in byte to read
- *
- */
-static inline void ks_inblk(struct ks_net *ks, u16 *wptr, u32 len)
-{
-	len >>= 1;
-	while (len--)
-		*wptr++ = (u16)ioread16(ks->hw_addr);
-}
-
-/**
- * ks_outblk - write data to QMU. This is called after sudo DMA mode enabled.
- * @ks: The chip information
- * @wptr: buffer address
- * @len: length in byte to write
- *
- */
-static inline void ks_outblk(struct ks_net *ks, u16 *wptr, u32 len)
-{
-	len >>= 1;
-	while (len--)
-		iowrite16(*wptr++, ks->hw_addr);
-}
-
-static void ks_disable_int(struct ks_net *ks)
-{
-	ks_wrreg16(ks, KS_IER, 0x0000);
-}  /* ks_disable_int */
-
-static void ks_enable_int(struct ks_net *ks)
-{
-	ks_wrreg16(ks, KS_IER, ks->rc_ier);
-}  /* ks_enable_int */
-
-/**
- * ks_tx_fifo_space - return the available hardware buffer size.
- * @ks: The chip information
- *
- */
-static inline u16 ks_tx_fifo_space(struct ks_net *ks)
-{
-	return ks_rdreg16(ks, KS_TXMIR) & 0x1fff;
-}
-
-/**
- * ks_save_cmd_reg - save the command register from the cache.
- * @ks: The chip information
- *
- */
-static inline void ks_save_cmd_reg(struct ks_net *ks)
-{
-	/*ks8851 MLL has a bug to read back the command register.
-	* So rely on software to save the content of command register.
-	*/
-	ks->cmd_reg_cache_int = ks->cmd_reg_cache;
-}
-
-/**
- * ks_restore_cmd_reg - restore the command register from the cache and
- * 	write to hardware register.
- * @ks: The chip information
- *
- */
-static inline void ks_restore_cmd_reg(struct ks_net *ks)
-{
-	ks->cmd_reg_cache = ks->cmd_reg_cache_int;
-	iowrite16(ks->cmd_reg_cache, ks->hw_addr_cmd);
-}
-
-/**
- * ks_set_powermode - set power mode of the device
- * @ks: The chip information
- * @pwrmode: The power mode value to write to KS_PMECR.
- *
- * Change the power mode of the chip.
- */
-static void ks_set_powermode(struct ks_net *ks, unsigned pwrmode)
-{
-	unsigned pmecr;
-
-	netif_dbg(ks, hw, ks->netdev, "setting power mode %d\n", pwrmode);
-
-	ks_rdreg16(ks, KS_GRR);
-	pmecr = ks_rdreg16(ks, KS_PMECR);
-	pmecr &= ~PMECR_PM_MASK;
-	pmecr |= pwrmode;
-
-	ks_wrreg16(ks, KS_PMECR, pmecr);
-}
-
-/**
- * ks_read_config - read chip configuration of bus width.
- * @ks: The chip information
- *
- */
-static void ks_read_config(struct ks_net *ks)
-{
-	u16 reg_data = 0;
-
-	/* Regardless of bus width, 8 bit read should always work.*/
-	reg_data = ks_rdreg16(ks, KS_CCR);
-
-	/* addr/data bus are multiplexed */
-	ks->sharedbus = (reg_data & CCR_SHARED) == CCR_SHARED;
-
-	/* There are garbage data when reading data from QMU,
-	depending on bus-width.
-	*/
-
-	if (reg_data & CCR_8BIT) {
-		ks->bus_width = ENUM_BUS_8BIT;
-		ks->extra_byte = 1;
-	} else if (reg_data & CCR_16BIT) {
-		ks->bus_width = ENUM_BUS_16BIT;
-		ks->extra_byte = 2;
-	} else {
-		ks->bus_width = ENUM_BUS_32BIT;
-		ks->extra_byte = 4;
-	}
-}
-
-/**
- * ks_soft_reset - issue one of the soft reset to the device
- * @ks: The device state.
- * @op: The bit(s) to set in the GRR
- *
- * Issue the relevant soft-reset command to the device's GRR register
- * specified by @op.
- *
- * Note, the delays are in there as a caution to ensure that the reset
- * has time to take effect and then complete. Since the datasheet does
- * not currently specify the exact sequence, we have chosen something
- * that seems to work with our device.
- */
-static void ks_soft_reset(struct ks_net *ks, unsigned op)
-{
-	/* Disable interrupt first */
-	ks_wrreg16(ks, KS_IER, 0x0000);
-	ks_wrreg16(ks, KS_GRR, op);
-	mdelay(10);	/* wait a short time to effect reset */
-	ks_wrreg16(ks, KS_GRR, 0);
-	mdelay(1);	/* wait for condition to clear */
-}
-
-
-static void ks_enable_qmu(struct ks_net *ks)
-{
-	u16 w;
-
-	w = ks_rdreg16(ks, KS_TXCR);
-	/* Enables QMU Transmit (TXCR). */
-	ks_wrreg16(ks, KS_TXCR, w | TXCR_TXE);
-
-	/*
-	 * RX Frame Count Threshold Enable and Auto-Dequeue RXQ Frame
-	 * Enable
-	 */
-
-	w = ks_rdreg16(ks, KS_RXQCR);
-	ks_wrreg16(ks, KS_RXQCR, w | RXQCR_RXFCTE);
-
-	/* Enables QMU Receive (RXCR1). */
-	w = ks_rdreg16(ks, KS_RXCR1);
-	ks_wrreg16(ks, KS_RXCR1, w | RXCR1_RXE);
-	ks->enabled = true;
-}  /* ks_enable_qmu */
-
-static void ks_disable_qmu(struct ks_net *ks)
-{
-	u16	w;
-
-	w = ks_rdreg16(ks, KS_TXCR);
-
-	/* Disables QMU Transmit (TXCR). */
-	w  &= ~TXCR_TXE;
-	ks_wrreg16(ks, KS_TXCR, w);
-
-	/* Disables QMU Receive (RXCR1). */
-	w = ks_rdreg16(ks, KS_RXCR1);
-	w &= ~RXCR1_RXE ;
-	ks_wrreg16(ks, KS_RXCR1, w);
-
-	ks->enabled = false;
-
-}  /* ks_disable_qmu */
-
-/**
- * ks_read_qmu - read 1 pkt data from the QMU.
- * @ks: The chip information
- * @buf: buffer address to save 1 pkt
- * @len: Pkt length
- * Here is the sequence to read 1 pkt:
- *	1. set sudo DMA mode
- *	2. read prepend data
- *	3. read pkt data
- *	4. reset sudo DMA Mode
- */
-static inline void ks_read_qmu(struct ks_net *ks, u16 *buf, u32 len)
-{
-	u32 r =  ks->extra_byte & 0x1 ;
-	u32 w = ks->extra_byte - r;
-
-	/* 1. set sudo DMA mode */
-	ks_wrreg16(ks, KS_RXFDPR, RXFDPR_RXFPAI);
-	ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_SDA);
-
-	/* 2. read prepend data */
-	/**
-	 * read 4 + extra bytes and discard them.
-	 * extra bytes for dummy, 2 for status, 2 for len
-	 */
-
-	/* use likely(r) for 8 bit access for performance */
-	if (unlikely(r))
-		ioread8(ks->hw_addr);
-	ks_inblk(ks, buf, w + 2 + 2);
-
-	/* 3. read pkt data */
-	ks_inblk(ks, buf, ALIGN(len, 4));
-
-	/* 4. reset sudo DMA Mode */
-	ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr);
-}
-
-/**
- * ks_rcv - read multiple pkts data from the QMU.
- * @ks: The chip information
- * @netdev: The network device being opened.
- *
- * Read all of header information before reading pkt content.
- * It is not allowed only port of pkts in QMU after issuing
- * interrupt ack.
- */
-static void ks_rcv(struct ks_net *ks, struct net_device *netdev)
-{
-	u32	i;
-	struct type_frame_head *frame_hdr = ks->frame_head_info;
-	struct sk_buff *skb;
-
-	ks->frame_cnt = ks_rdreg16(ks, KS_RXFCTR) >> 8;
-
-	/* read all header information */
-	for (i = 0; i < ks->frame_cnt; i++) {
-		/* Checking Received packet status */
-		frame_hdr->sts = ks_rdreg16(ks, KS_RXFHSR);
-		/* Get packet len from hardware */
-		frame_hdr->len = ks_rdreg16(ks, KS_RXFHBCR);
-		frame_hdr++;
-	}
-
-	frame_hdr = ks->frame_head_info;
-	while (ks->frame_cnt--) {
-		if (unlikely(!(frame_hdr->sts & RXFSHR_RXFV) ||
-			     frame_hdr->len >= RX_BUF_SIZE ||
-			     frame_hdr->len <= 0)) {
-
-			/* discard an invalid packet */
-			ks_wrreg16(ks, KS_RXQCR, (ks->rc_rxqcr | RXQCR_RRXEF));
-			netdev->stats.rx_dropped++;
-			if (!(frame_hdr->sts & RXFSHR_RXFV))
-				netdev->stats.rx_frame_errors++;
-			else
-				netdev->stats.rx_length_errors++;
-			frame_hdr++;
-			continue;
-		}
-
-		skb = netdev_alloc_skb(netdev, frame_hdr->len + 16);
-		if (likely(skb)) {
-			skb_reserve(skb, 2);
-			/* read data block including CRC 4 bytes */
-			ks_read_qmu(ks, (u16 *)skb->data, frame_hdr->len);
-			skb_put(skb, frame_hdr->len - 4);
-			skb->protocol = eth_type_trans(skb, netdev);
-			netif_rx(skb);
-			/* exclude CRC size */
-			netdev->stats.rx_bytes += frame_hdr->len - 4;
-			netdev->stats.rx_packets++;
-		} else {
-			ks_wrreg16(ks, KS_RXQCR, (ks->rc_rxqcr | RXQCR_RRXEF));
-			netdev->stats.rx_dropped++;
-		}
-		frame_hdr++;
-	}
-}
-
-/**
- * ks_update_link_status - link status update.
- * @netdev: The network device being opened.
- * @ks: The chip information
- *
- */
-
-static void ks_update_link_status(struct net_device *netdev, struct ks_net *ks)
-{
-	/* check the status of the link */
-	u32 link_up_status;
-	if (ks_rdreg16(ks, KS_P1SR) & P1SR_LINK_GOOD) {
-		netif_carrier_on(netdev);
-		link_up_status = true;
-	} else {
-		netif_carrier_off(netdev);
-		link_up_status = false;
-	}
-	netif_dbg(ks, link, ks->netdev,
-		  "%s: %s\n", __func__, link_up_status ? "UP" : "DOWN");
-}
-
-/**
- * ks_irq - device interrupt handler
- * @irq: Interrupt number passed from the IRQ handler.
- * @pw: The private word passed to register_irq(), our struct ks_net.
- *
- * This is the handler invoked to find out what happened
- *
- * Read the interrupt status, work out what needs to be done and then clear
- * any of the interrupts that are not needed.
- */
-
-static irqreturn_t ks_irq(int irq, void *pw)
-{
-	struct net_device *netdev = pw;
-	struct ks_net *ks = netdev_priv(netdev);
-	unsigned long flags;
-	u16 status;
-
-	spin_lock_irqsave(&ks->statelock, flags);
-	/*this should be the first in IRQ handler */
-	ks_save_cmd_reg(ks);
-
-	status = ks_rdreg16(ks, KS_ISR);
-	if (unlikely(!status)) {
-		ks_restore_cmd_reg(ks);
-		spin_unlock_irqrestore(&ks->statelock, flags);
-		return IRQ_NONE;
-	}
-
-	ks_wrreg16(ks, KS_ISR, status);
-
-	if (likely(status & IRQ_RXI))
-		ks_rcv(ks, netdev);
-
-	if (unlikely(status & IRQ_LCI))
-		ks_update_link_status(netdev, ks);
-
-	if (unlikely(status & IRQ_TXI))
-		netif_wake_queue(netdev);
-
-	if (unlikely(status & IRQ_LDI)) {
-
-		u16 pmecr = ks_rdreg16(ks, KS_PMECR);
-		pmecr &= ~PMECR_WKEVT_MASK;
-		ks_wrreg16(ks, KS_PMECR, pmecr | PMECR_WKEVT_LINK);
-	}
-
-	if (unlikely(status & IRQ_RXOI))
-		ks->netdev->stats.rx_over_errors++;
-	/* this should be the last in IRQ handler*/
-	ks_restore_cmd_reg(ks);
-	spin_unlock_irqrestore(&ks->statelock, flags);
-	return IRQ_HANDLED;
-}
-
-
-/**
- * ks_net_open - open network device
- * @netdev: The network device being opened.
- *
- * Called when the network device is marked active, such as a user executing
- * 'ifconfig up' on the device.
- */
-static int ks_net_open(struct net_device *netdev)
-{
-	struct ks_net *ks = netdev_priv(netdev);
-	int err;
-
-#define	KS_INT_FLAGS	IRQF_TRIGGER_LOW
-	/* lock the card, even if we may not actually do anything
-	 * else at the moment.
-	 */
-
-	netif_dbg(ks, ifup, ks->netdev, "%s - entry\n", __func__);
-
-	/* reset the HW */
-	err = request_irq(netdev->irq, ks_irq, KS_INT_FLAGS, DRV_NAME, netdev);
-
-	if (err) {
-		pr_err("Failed to request IRQ: %d: %d\n", netdev->irq, err);
-		return err;
-	}
-
-	/* wake up powermode to normal mode */
-	ks_set_powermode(ks, PMECR_PM_NORMAL);
-	mdelay(1);	/* wait for normal mode to take effect */
-
-	ks_wrreg16(ks, KS_ISR, 0xffff);
-	ks_enable_int(ks);
-	ks_enable_qmu(ks);
-	netif_start_queue(ks->netdev);
-
-	netif_dbg(ks, ifup, ks->netdev, "network device up\n");
-
-	return 0;
-}
-
-/**
- * ks_net_stop - close network device
- * @netdev: The device being closed.
- *
- * Called to close down a network device which has been active. Cancell any
- * work, shutdown the RX and TX process and then place the chip into a low
- * power state whilst it is not being used.
- */
-static int ks_net_stop(struct net_device *netdev)
-{
-	struct ks_net *ks = netdev_priv(netdev);
-
-	netif_info(ks, ifdown, netdev, "shutting down\n");
-
-	netif_stop_queue(netdev);
-
-	mutex_lock(&ks->lock);
-
-	/* turn off the IRQs and ack any outstanding */
-	ks_wrreg16(ks, KS_IER, 0x0000);
-	ks_wrreg16(ks, KS_ISR, 0xffff);
-
-	/* shutdown RX/TX QMU */
-	ks_disable_qmu(ks);
-	ks_disable_int(ks);
-
-	/* set powermode to soft power down to save power */
-	ks_set_powermode(ks, PMECR_PM_SOFTDOWN);
-	free_irq(netdev->irq, netdev);
-	mutex_unlock(&ks->lock);
-	return 0;
-}
-
-
-/**
- * ks_write_qmu - write 1 pkt data to the QMU.
- * @ks: The chip information
- * @pdata: buffer address to save 1 pkt
- * @len: Pkt length in byte
- * Here is the sequence to write 1 pkt:
- *	1. set sudo DMA mode
- *	2. write status/length
- *	3. write pkt data
- *	4. reset sudo DMA Mode
- *	5. reset sudo DMA mode
- *	6. Wait until pkt is out
- */
-static void ks_write_qmu(struct ks_net *ks, u8 *pdata, u16 len)
-{
-	/* start header at txb[0] to align txw entries */
-	ks->txh.txw[0] = 0;
-	ks->txh.txw[1] = cpu_to_le16(len);
-
-	/* 1. set sudo-DMA mode */
-	ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr | RXQCR_SDA);
-	/* 2. write status/lenth info */
-	ks_outblk(ks, ks->txh.txw, 4);
-	/* 3. write pkt data */
-	ks_outblk(ks, (u16 *)pdata, ALIGN(len, 4));
-	/* 4. reset sudo-DMA mode */
-	ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr);
-	/* 5. Enqueue Tx(move the pkt from TX buffer into TXQ) */
-	ks_wrreg16(ks, KS_TXQCR, TXQCR_METFE);
-	/* 6. wait until TXQCR_METFE is auto-cleared */
-	while (ks_rdreg16(ks, KS_TXQCR) & TXQCR_METFE)
-		;
-}
-
-/**
- * ks_start_xmit - transmit packet
- * @skb		: The buffer to transmit
- * @netdev	: The device used to transmit the packet.
- *
- * Called by the network layer to transmit the @skb.
- * spin_lock_irqsave is required because tx and rx should be mutual exclusive.
- * So while tx is in-progress, prevent IRQ interrupt from happenning.
- */
-static netdev_tx_t ks_start_xmit(struct sk_buff *skb, struct net_device *netdev)
-{
-	netdev_tx_t retv = NETDEV_TX_OK;
-	struct ks_net *ks = netdev_priv(netdev);
-	unsigned long flags;
-
-	spin_lock_irqsave(&ks->statelock, flags);
-
-	/* Extra space are required:
-	*  4 byte for alignment, 4 for status/length, 4 for CRC
-	*/
-
-	if (likely(ks_tx_fifo_space(ks) >= skb->len + 12)) {
-		ks_write_qmu(ks, skb->data, skb->len);
-		/* add tx statistics */
-		netdev->stats.tx_bytes += skb->len;
-		netdev->stats.tx_packets++;
-		dev_kfree_skb(skb);
-	} else
-		retv = NETDEV_TX_BUSY;
-	spin_unlock_irqrestore(&ks->statelock, flags);
-	return retv;
-}
-
-/**
- * ks_start_rx - ready to serve pkts
- * @ks		: The chip information
- *
- */
-static void ks_start_rx(struct ks_net *ks)
-{
-	u16 cntl;
-
-	/* Enables QMU Receive (RXCR1). */
-	cntl = ks_rdreg16(ks, KS_RXCR1);
-	cntl |= RXCR1_RXE ;
-	ks_wrreg16(ks, KS_RXCR1, cntl);
-}  /* ks_start_rx */
-
-/**
- * ks_stop_rx - stop to serve pkts
- * @ks		: The chip information
- *
- */
-static void ks_stop_rx(struct ks_net *ks)
-{
-	u16 cntl;
-
-	/* Disables QMU Receive (RXCR1). */
-	cntl = ks_rdreg16(ks, KS_RXCR1);
-	cntl &= ~RXCR1_RXE ;
-	ks_wrreg16(ks, KS_RXCR1, cntl);
-
-}  /* ks_stop_rx */
-
-static unsigned long const ethernet_polynomial = CRC32_POLY_BE;
-
-static unsigned long ether_gen_crc(int length, u8 *data)
-{
-	long crc = -1;
-	while (--length >= 0) {
-		u8 current_octet = *data++;
-		int bit;
-
-		for (bit = 0; bit < 8; bit++, current_octet >>= 1) {
-			crc = (crc << 1) ^
-				((crc < 0) ^ (current_octet & 1) ?
-			ethernet_polynomial : 0);
-		}
-	}
-	return (unsigned long)crc;
-}  /* ether_gen_crc */
-
-/**
-* ks_set_grpaddr - set multicast information
-* @ks : The chip information
-*/
-
-static void ks_set_grpaddr(struct ks_net *ks)
-{
-	u8	i;
-	u32	index, position, value;
-
-	memset(ks->mcast_bits, 0, sizeof(u8) * HW_MCAST_SIZE);
-
-	for (i = 0; i < ks->mcast_lst_size; i++) {
-		position = (ether_gen_crc(6, ks->mcast_lst[i]) >> 26) & 0x3f;
-		index = position >> 3;
-		value = 1 << (position & 7);
-		ks->mcast_bits[index] |= (u8)value;
-	}
-
-	for (i  = 0; i < HW_MCAST_SIZE; i++) {
-		if (i & 1) {
-			ks_wrreg16(ks, (u16)((KS_MAHTR0 + i) & ~1),
-				(ks->mcast_bits[i] << 8) |
-				ks->mcast_bits[i - 1]);
-		}
-	}
-}  /* ks_set_grpaddr */
-
-/**
-* ks_clear_mcast - clear multicast information
-*
-* @ks : The chip information
-* This routine removes all mcast addresses set in the hardware.
-*/
-
-static void ks_clear_mcast(struct ks_net *ks)
-{
-	u16	i, mcast_size;
-	for (i = 0; i < HW_MCAST_SIZE; i++)
-		ks->mcast_bits[i] = 0;
-
-	mcast_size = HW_MCAST_SIZE >> 2;
-	for (i = 0; i < mcast_size; i++)
-		ks_wrreg16(ks, KS_MAHTR0 + (2*i), 0);
-}
-
-static void ks_set_promis(struct ks_net *ks, u16 promiscuous_mode)
-{
-	u16		cntl;
-	ks->promiscuous = promiscuous_mode;
-	ks_stop_rx(ks);  /* Stop receiving for reconfiguration */
-	cntl = ks_rdreg16(ks, KS_RXCR1);
-
-	cntl &= ~RXCR1_FILTER_MASK;
-	if (promiscuous_mode)
-		/* Enable Promiscuous mode */
-		cntl |= RXCR1_RXAE | RXCR1_RXINVF;
-	else
-		/* Disable Promiscuous mode (default normal mode) */
-		cntl |= RXCR1_RXPAFMA;
-
-	ks_wrreg16(ks, KS_RXCR1, cntl);
-
-	if (ks->enabled)
-		ks_start_rx(ks);
-
-}  /* ks_set_promis */
-
-static void ks_set_mcast(struct ks_net *ks, u16 mcast)
-{
-	u16	cntl;
-
-	ks->all_mcast = mcast;
-	ks_stop_rx(ks);  /* Stop receiving for reconfiguration */
-	cntl = ks_rdreg16(ks, KS_RXCR1);
-	cntl &= ~RXCR1_FILTER_MASK;
-	if (mcast)
-		/* Enable "Perfect with Multicast address passed mode" */
-		cntl |= (RXCR1_RXAE | RXCR1_RXMAFMA | RXCR1_RXPAFMA);
-	else
-		/**
-		 * Disable "Perfect with Multicast address passed
-		 * mode" (normal mode).
-		 */
-		cntl |= RXCR1_RXPAFMA;
-
-	ks_wrreg16(ks, KS_RXCR1, cntl);
-
-	if (ks->enabled)
-		ks_start_rx(ks);
-}  /* ks_set_mcast */
-
-static void ks_set_rx_mode(struct net_device *netdev)
-{
-	struct ks_net *ks = netdev_priv(netdev);
-	struct netdev_hw_addr *ha;
-
-	/* Turn on/off promiscuous mode. */
-	if ((netdev->flags & IFF_PROMISC) == IFF_PROMISC)
-		ks_set_promis(ks,
-			(u16)((netdev->flags & IFF_PROMISC) == IFF_PROMISC));
-	/* Turn on/off all mcast mode. */
-	else if ((netdev->flags & IFF_ALLMULTI) == IFF_ALLMULTI)
-		ks_set_mcast(ks,
-			(u16)((netdev->flags & IFF_ALLMULTI) == IFF_ALLMULTI));
-	else
-		ks_set_promis(ks, false);
-
-	if ((netdev->flags & IFF_MULTICAST) && netdev_mc_count(netdev)) {
-		if (netdev_mc_count(netdev) <= MAX_MCAST_LST) {
-			int i = 0;
-
-			netdev_for_each_mc_addr(ha, netdev) {
-				if (i >= MAX_MCAST_LST)
-					break;
-				memcpy(ks->mcast_lst[i++], ha->addr, ETH_ALEN);
-			}
-			ks->mcast_lst_size = (u8)i;
-			ks_set_grpaddr(ks);
-		} else {
-			/**
-			 * List too big to support so
-			 * turn on all mcast mode.
-			 */
-			ks->mcast_lst_size = MAX_MCAST_LST;
-			ks_set_mcast(ks, true);
-		}
-	} else {
-		ks->mcast_lst_size = 0;
-		ks_clear_mcast(ks);
-	}
-} /* ks_set_rx_mode */
-
-static void ks_set_mac(struct ks_net *ks, u8 *data)
-{
-	u16 *pw = (u16 *)data;
-	u16 w, u;
-
-	ks_stop_rx(ks);  /* Stop receiving for reconfiguration */
-
-	u = *pw++;
-	w = ((u & 0xFF) << 8) | ((u >> 8) & 0xFF);
-	ks_wrreg16(ks, KS_MARH, w);
-
-	u = *pw++;
-	w = ((u & 0xFF) << 8) | ((u >> 8) & 0xFF);
-	ks_wrreg16(ks, KS_MARM, w);
-
-	u = *pw;
-	w = ((u & 0xFF) << 8) | ((u >> 8) & 0xFF);
-	ks_wrreg16(ks, KS_MARL, w);
-
-	memcpy(ks->mac_addr, data, ETH_ALEN);
-
-	if (ks->enabled)
-		ks_start_rx(ks);
-}
-
-static int ks_set_mac_address(struct net_device *netdev, void *paddr)
-{
-	struct ks_net *ks = netdev_priv(netdev);
-	struct sockaddr *addr = paddr;
-	u8 *da;
-
-	memcpy(netdev->dev_addr, addr->sa_data, netdev->addr_len);
-
-	da = (u8 *)netdev->dev_addr;
-
-	ks_set_mac(ks, da);
-	return 0;
-}
-
-static int ks_net_ioctl(struct net_device *netdev, struct ifreq *req, int cmd)
-{
-	struct ks_net *ks = netdev_priv(netdev);
-
-	if (!netif_running(netdev))
-		return -EINVAL;
-
-	return generic_mii_ioctl(&ks->mii, if_mii(req), cmd, NULL);
-}
-
-static const struct net_device_ops ks_netdev_ops = {
-	.ndo_open		= ks_net_open,
-	.ndo_stop		= ks_net_stop,
-	.ndo_do_ioctl		= ks_net_ioctl,
-	.ndo_start_xmit		= ks_start_xmit,
-	.ndo_set_mac_address	= ks_set_mac_address,
-	.ndo_set_rx_mode	= ks_set_rx_mode,
-	.ndo_validate_addr	= eth_validate_addr,
-};
-
-/* ethtool support */
-
-static void ks_get_drvinfo(struct net_device *netdev,
-			       struct ethtool_drvinfo *di)
-{
-	strlcpy(di->driver, DRV_NAME, sizeof(di->driver));
-	strlcpy(di->version, "1.00", sizeof(di->version));
-	strlcpy(di->bus_info, dev_name(netdev->dev.parent),
-		sizeof(di->bus_info));
-}
-
-static u32 ks_get_msglevel(struct net_device *netdev)
-{
-	struct ks_net *ks = netdev_priv(netdev);
-	return ks->msg_enable;
-}
-
-static void ks_set_msglevel(struct net_device *netdev, u32 to)
-{
-	struct ks_net *ks = netdev_priv(netdev);
-	ks->msg_enable = to;
-}
-
-static int ks_get_link_ksettings(struct net_device *netdev,
-				 struct ethtool_link_ksettings *cmd)
-{
-	struct ks_net *ks = netdev_priv(netdev);
-
-	mii_ethtool_get_link_ksettings(&ks->mii, cmd);
-
-	return 0;
-}
-
-static int ks_set_link_ksettings(struct net_device *netdev,
-				 const struct ethtool_link_ksettings *cmd)
-{
-	struct ks_net *ks = netdev_priv(netdev);
-	return mii_ethtool_set_link_ksettings(&ks->mii, cmd);
-}
-
-static u32 ks_get_link(struct net_device *netdev)
-{
-	struct ks_net *ks = netdev_priv(netdev);
-	return mii_link_ok(&ks->mii);
-}
-
-static int ks_nway_reset(struct net_device *netdev)
-{
-	struct ks_net *ks = netdev_priv(netdev);
-	return mii_nway_restart(&ks->mii);
-}
-
-static const struct ethtool_ops ks_ethtool_ops = {
-	.get_drvinfo	= ks_get_drvinfo,
-	.get_msglevel	= ks_get_msglevel,
-	.set_msglevel	= ks_set_msglevel,
-	.get_link	= ks_get_link,
-	.nway_reset	= ks_nway_reset,
-	.get_link_ksettings = ks_get_link_ksettings,
-	.set_link_ksettings = ks_set_link_ksettings,
-};
-
-/* MII interface controls */
-
-/**
- * ks_phy_reg - convert MII register into a KS8851 register
- * @reg: MII register number.
- *
- * Return the KS8851 register number for the corresponding MII PHY register
- * if possible. Return zero if the MII register has no direct mapping to the
- * KS8851 register set.
- */
-static int ks_phy_reg(int reg)
-{
-	switch (reg) {
-	case MII_BMCR:
-		return KS_P1MBCR;
-	case MII_BMSR:
-		return KS_P1MBSR;
-	case MII_PHYSID1:
-		return KS_PHY1ILR;
-	case MII_PHYSID2:
-		return KS_PHY1IHR;
-	case MII_ADVERTISE:
-		return KS_P1ANAR;
-	case MII_LPA:
-		return KS_P1ANLPR;
-	}
-
-	return 0x0;
-}
-
-/**
- * ks_phy_read - MII interface PHY register read.
- * @netdev: The network device the PHY is on.
- * @phy_addr: Address of PHY (ignored as we only have one)
- * @reg: The register to read.
- *
- * This call reads data from the PHY register specified in @reg. Since the
- * device does not support all the MII registers, the non-existent values
- * are always returned as zero.
- *
- * We return zero for unsupported registers as the MII code does not check
- * the value returned for any error status, and simply returns it to the
- * caller. The mii-tool that the driver was tested with takes any -ve error
- * as real PHY capabilities, thus displaying incorrect data to the user.
- */
-static int ks_phy_read(struct net_device *netdev, int phy_addr, int reg)
-{
-	struct ks_net *ks = netdev_priv(netdev);
-	int ksreg;
-	int result;
-
-	ksreg = ks_phy_reg(reg);
-	if (!ksreg)
-		return 0x0;	/* no error return allowed, so use zero */
-
-	mutex_lock(&ks->lock);
-	result = ks_rdreg16(ks, ksreg);
-	mutex_unlock(&ks->lock);
-
-	return result;
-}
-
-static void ks_phy_write(struct net_device *netdev,
-			     int phy, int reg, int value)
-{
-	struct ks_net *ks = netdev_priv(netdev);
-	int ksreg;
-
-	ksreg = ks_phy_reg(reg);
-	if (ksreg) {
-		mutex_lock(&ks->lock);
-		ks_wrreg16(ks, ksreg, value);
-		mutex_unlock(&ks->lock);
-	}
-}
-
-/**
- * ks_read_selftest - read the selftest memory info.
- * @ks: The device state
- *
- * Read and check the TX/RX memory selftest information.
- */
-static int ks_read_selftest(struct ks_net *ks)
-{
-	unsigned both_done = MBIR_TXMBF | MBIR_RXMBF;
-	int ret = 0;
-	unsigned rd;
-
-	rd = ks_rdreg16(ks, KS_MBIR);
-
-	if ((rd & both_done) != both_done) {
-		netdev_warn(ks->netdev, "Memory selftest not finished\n");
-		return 0;
-	}
-
-	if (rd & MBIR_TXMBFA) {
-		netdev_err(ks->netdev, "TX memory selftest fails\n");
-		ret |= 1;
-	}
-
-	if (rd & MBIR_RXMBFA) {
-		netdev_err(ks->netdev, "RX memory selftest fails\n");
-		ret |= 2;
-	}
-
-	netdev_info(ks->netdev, "the selftest passes\n");
-	return ret;
-}
-
-static void ks_setup(struct ks_net *ks)
-{
-	u16	w;
-
-	/**
-	 * Configure QMU Transmit
-	 */
-
-	/* Setup Transmit Frame Data Pointer Auto-Increment (TXFDPR) */
-	ks_wrreg16(ks, KS_TXFDPR, TXFDPR_TXFPAI);
-
-	/* Setup Receive Frame Data Pointer Auto-Increment */
-	ks_wrreg16(ks, KS_RXFDPR, RXFDPR_RXFPAI);
-
-	/* Setup Receive Frame Threshold - 1 frame (RXFCTFC) */
-	ks_wrreg16(ks, KS_RXFCTR, 1 & RXFCTR_RXFCT_MASK);
-
-	/* Setup RxQ Command Control (RXQCR) */
-	ks->rc_rxqcr = RXQCR_CMD_CNTL;
-	ks_wrreg16(ks, KS_RXQCR, ks->rc_rxqcr);
-
-	/**
-	 * set the force mode to half duplex, default is full duplex
-	 *  because if the auto-negotiation fails, most switch uses
-	 *  half-duplex.
-	 */
-
-	w = ks_rdreg16(ks, KS_P1MBCR);
-	w &= ~BMCR_FULLDPLX;
-	ks_wrreg16(ks, KS_P1MBCR, w);
-
-	w = TXCR_TXFCE | TXCR_TXPE | TXCR_TXCRC | TXCR_TCGIP;
-	ks_wrreg16(ks, KS_TXCR, w);
-
-	w = RXCR1_RXFCE | RXCR1_RXBE | RXCR1_RXUE | RXCR1_RXME | RXCR1_RXIPFCC;
-
-	if (ks->promiscuous)         /* bPromiscuous */
-		w |= (RXCR1_RXAE | RXCR1_RXINVF);
-	else if (ks->all_mcast) /* Multicast address passed mode */
-		w |= (RXCR1_RXAE | RXCR1_RXMAFMA | RXCR1_RXPAFMA);
-	else                                   /* Normal mode */
-		w |= RXCR1_RXPAFMA;
-
-	ks_wrreg16(ks, KS_RXCR1, w);
-}  /*ks_setup */
-
-
-static void ks_setup_int(struct ks_net *ks)
-{
-	ks->rc_ier = 0x00;
-	/* Clear the interrupts status of the hardware. */
-	ks_wrreg16(ks, KS_ISR, 0xffff);
-
-	/* Enables the interrupts of the hardware. */
-	ks->rc_ier = (IRQ_LCI | IRQ_TXI | IRQ_RXI);
-}  /* ks_setup_int */
-
-static int ks_hw_init(struct ks_net *ks)
-{
-#define	MHEADER_SIZE	(sizeof(struct type_frame_head) * MAX_RECV_FRAMES)
-	ks->promiscuous = 0;
-	ks->all_mcast = 0;
-	ks->mcast_lst_size = 0;
-
-	ks->frame_head_info = devm_kmalloc(&ks->pdev->dev, MHEADER_SIZE,
-					   GFP_KERNEL);
-	if (!ks->frame_head_info)
-		return false;
-
-	ks_set_mac(ks, KS_DEFAULT_MAC_ADDRESS);
-	return true;
-}
-
-#if defined(CONFIG_OF)
-static const struct of_device_id ks8851_ml_dt_ids[] = {
-	{ .compatible = "micrel,ks8851-mll" },
-	{ /* sentinel */ }
-};
-MODULE_DEVICE_TABLE(of, ks8851_ml_dt_ids);
-#endif
-
-static int ks8851_probe(struct platform_device *pdev)
-{
-	int err;
-	struct net_device *netdev;
-	struct ks_net *ks;
-	u16 id, data;
-	const char *mac;
-
-	netdev = alloc_etherdev(sizeof(struct ks_net));
-	if (!netdev)
-		return -ENOMEM;
-
-	SET_NETDEV_DEV(netdev, &pdev->dev);
-
-	ks = netdev_priv(netdev);
-	ks->netdev = netdev;
-
-	ks->hw_addr = devm_platform_ioremap_resource(pdev, 0);
-	if (IS_ERR(ks->hw_addr)) {
-		err = PTR_ERR(ks->hw_addr);
-		goto err_free;
-	}
-
-	ks->hw_addr_cmd = devm_platform_ioremap_resource(pdev, 1);
-	if (IS_ERR(ks->hw_addr_cmd)) {
-		err = PTR_ERR(ks->hw_addr_cmd);
-		goto err_free;
-	}
-
-	err = ks_check_endian(ks);
-	if (err)
-		goto err_free;
-
-	netdev->irq = platform_get_irq(pdev, 0);
-
-	if ((int)netdev->irq < 0) {
-		err = netdev->irq;
-		goto err_free;
-	}
-
-	ks->pdev = pdev;
-
-	mutex_init(&ks->lock);
-	spin_lock_init(&ks->statelock);
-
-	netdev->netdev_ops = &ks_netdev_ops;
-	netdev->ethtool_ops = &ks_ethtool_ops;
-
-	/* setup mii state */
-	ks->mii.dev             = netdev;
-	ks->mii.phy_id          = 1,
-	ks->mii.phy_id_mask     = 1;
-	ks->mii.reg_num_mask    = 0xf;
-	ks->mii.mdio_read       = ks_phy_read;
-	ks->mii.mdio_write      = ks_phy_write;
-
-	netdev_info(netdev, "message enable is %d\n", msg_enable);
-	/* set the default message enable */
-	ks->msg_enable = netif_msg_init(msg_enable, (NETIF_MSG_DRV |
-						     NETIF_MSG_PROBE |
-						     NETIF_MSG_LINK));
-	ks_read_config(ks);
-
-	/* simple check for a valid chip being connected to the bus */
-	if ((ks_rdreg16(ks, KS_CIDER) & ~CIDER_REV_MASK) != CIDER_ID) {
-		netdev_err(netdev, "failed to read device ID\n");
-		err = -ENODEV;
-		goto err_free;
-	}
-
-	if (ks_read_selftest(ks)) {
-		netdev_err(netdev, "failed to read device ID\n");
-		err = -ENODEV;
-		goto err_free;
-	}
-
-	err = register_netdev(netdev);
-	if (err)
-		goto err_free;
-
-	platform_set_drvdata(pdev, netdev);
-
-	ks_soft_reset(ks, GRR_GSR);
-	ks_hw_init(ks);
-	ks_disable_qmu(ks);
-	ks_setup(ks);
-	ks_setup_int(ks);
-
-	data = ks_rdreg16(ks, KS_OBCR);
-	ks_wrreg16(ks, KS_OBCR, data | OBCR_ODS_16mA);
-
-	/* overwriting the default MAC address */
-	if (pdev->dev.of_node) {
-		mac = of_get_mac_address(pdev->dev.of_node);
-		if (!IS_ERR(mac))
-			ether_addr_copy(ks->mac_addr, mac);
-	} else {
-		struct ks8851_mll_platform_data *pdata;
-
-		pdata = dev_get_platdata(&pdev->dev);
-		if (!pdata) {
-			netdev_err(netdev, "No platform data\n");
-			err = -ENODEV;
-			goto err_pdata;
-		}
-		memcpy(ks->mac_addr, pdata->mac_addr, ETH_ALEN);
-	}
-	if (!is_valid_ether_addr(ks->mac_addr)) {
-		/* Use random MAC address if none passed */
-		eth_random_addr(ks->mac_addr);
-		netdev_info(netdev, "Using random mac address\n");
-	}
-	netdev_info(netdev, "Mac address is: %pM\n", ks->mac_addr);
-
-	memcpy(netdev->dev_addr, ks->mac_addr, ETH_ALEN);
-
-	ks_set_mac(ks, netdev->dev_addr);
-
-	id = ks_rdreg16(ks, KS_CIDER);
-
-	netdev_info(netdev, "Found chip, family: 0x%x, id: 0x%x, rev: 0x%x\n",
-		    (id >> 8) & 0xff, (id >> 4) & 0xf, (id >> 1) & 0x7);
-	return 0;
-
-err_pdata:
-	unregister_netdev(netdev);
-err_free:
-	free_netdev(netdev);
-	return err;
-}
-
-static int ks8851_remove(struct platform_device *pdev)
-{
-	struct net_device *netdev = platform_get_drvdata(pdev);
-
-	unregister_netdev(netdev);
-	free_netdev(netdev);
-	return 0;
-
-}
-
-static struct platform_driver ks8851_platform_driver = {
-	.driver = {
-		.name = DRV_NAME,
-		.of_match_table	= of_match_ptr(ks8851_ml_dt_ids),
-	},
-	.probe = ks8851_probe,
-	.remove = ks8851_remove,
-};
-
-module_platform_driver(ks8851_platform_driver);
-
-MODULE_DESCRIPTION("KS8851 MLL Network driver");
-MODULE_AUTHOR("David Choi <david.choi@micrel.com>");
-MODULE_LICENSE("GPL");
-module_param_named(message, msg_enable, int, 0);
-MODULE_PARM_DESC(message, "Message verbosity level (0=none, 31=all)");
-
-- 
cgit v1.2.3-59-g8ed1b


From 2421ee24777e9f7effc4b6db29276eced7ca2114 Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Thu, 28 May 2020 21:48:08 +0800
Subject: net: hns3: remove an unnecessary 'goto' in hclge_init_ae_dev()

Remove the redundant 'goto' and return -ENOMEM directly, when
allocating memory for 'hdev' fails in hclge_init_ae_dev().

Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 7c9f2ba1f272..0e36f037f69c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -9928,10 +9928,8 @@ static int hclge_init_ae_dev(struct hnae3_ae_dev *ae_dev)
 	int ret;
 
 	hdev = devm_kzalloc(&pdev->dev, sizeof(*hdev), GFP_KERNEL);
-	if (!hdev) {
-		ret = -ENOMEM;
-		goto out;
-	}
+	if (!hdev)
+		return -ENOMEM;
 
 	hdev->pdev = pdev;
 	hdev->ae_dev = ae_dev;
-- 
cgit v1.2.3-59-g8ed1b


From 9516352150c0cb896b3de3997b1dfe43fb96d8a5 Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Thu, 28 May 2020 21:48:09 +0800
Subject: net: hns3: add a missing mutex destroy in hclge_init_ad_dev()

Add a mutex destroy call in hclge_init_ae_dev() when fails.

Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 0e36f037f69c..7d5c304bd6b1 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -10108,6 +10108,7 @@ err_pci_uninit:
 	pci_release_regions(pdev);
 	pci_disable_device(pdev);
 out:
+	mutex_destroy(&hdev->vport_lock);
 	return ret;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 9f5a9816065f92683fd5f23cd8ec98719f20144f Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Thu, 28 May 2020 21:48:10 +0800
Subject: net: hns3: refactor hclge_config_tso()

Since parameters 'tso_mss_min' and 'tso_mss_max' only indicate
the minimum and maximum MSS, the hnae3_set_field() calls are
meaningless, remove them and change the type of these two
parameters to u16.

Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 17 ++++-------------
 1 file changed, 4 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 7d5c304bd6b1..35e5cb87f48c 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -1429,26 +1429,17 @@ static int hclge_configure(struct hclge_dev *hdev)
 	return ret;
 }
 
-static int hclge_config_tso(struct hclge_dev *hdev, unsigned int tso_mss_min,
-			    unsigned int tso_mss_max)
+static int hclge_config_tso(struct hclge_dev *hdev, u16 tso_mss_min,
+			    u16 tso_mss_max)
 {
 	struct hclge_cfg_tso_status_cmd *req;
 	struct hclge_desc desc;
-	u16 tso_mss;
 
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_TSO_GENERIC_CONFIG, false);
 
 	req = (struct hclge_cfg_tso_status_cmd *)desc.data;
-
-	tso_mss = 0;
-	hnae3_set_field(tso_mss, HCLGE_TSO_MSS_MIN_M,
-			HCLGE_TSO_MSS_MIN_S, tso_mss_min);
-	req->tso_mss_min = cpu_to_le16(tso_mss);
-
-	tso_mss = 0;
-	hnae3_set_field(tso_mss, HCLGE_TSO_MSS_MIN_M,
-			HCLGE_TSO_MSS_MIN_S, tso_mss_max);
-	req->tso_mss_max = cpu_to_le16(tso_mss);
+	req->tso_mss_min = cpu_to_le16(tso_mss_min);
+	req->tso_mss_max = cpu_to_le16(tso_mss_max);
 
 	return hclge_cmd_send(&hdev->hw, &desc, 1);
 }
-- 
cgit v1.2.3-59-g8ed1b


From 5caa039f320d023fb2a40c8c7ededfca3ce85501 Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Thu, 28 May 2020 21:48:11 +0800
Subject: net: hns3: refactor hclge_query_bd_num_cmd_send()

In order to improve code maintainability and readability, rewrite
the process of BDs' initialization in hclge_query_bd_num_cmd_send().

Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 21 ++++++++++++---------
 1 file changed, 12 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 35e5cb87f48c..e9b0e1c00970 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -10723,16 +10723,19 @@ static int hclge_get_64_bit_regs(struct hclge_dev *hdev, u32 regs_num,
 
 int hclge_query_bd_num_cmd_send(struct hclge_dev *hdev, struct hclge_desc *desc)
 {
-	/*prepare 4 commands to query DFX BD number*/
-	hclge_cmd_setup_basic_desc(&desc[0], HCLGE_OPC_DFX_BD_NUM, true);
-	desc[0].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
-	hclge_cmd_setup_basic_desc(&desc[1], HCLGE_OPC_DFX_BD_NUM, true);
-	desc[1].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
-	hclge_cmd_setup_basic_desc(&desc[2], HCLGE_OPC_DFX_BD_NUM, true);
-	desc[2].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
-	hclge_cmd_setup_basic_desc(&desc[3], HCLGE_OPC_DFX_BD_NUM, true);
+	int i;
+
+	/* initialize command BD except the last one */
+	for (i = 0; i < HCLGE_GET_DFX_REG_TYPE_CNT - 1; i++) {
+		hclge_cmd_setup_basic_desc(&desc[i], HCLGE_OPC_DFX_BD_NUM,
+					   true);
+		desc[i].flag |= cpu_to_le16(HCLGE_CMD_FLAG_NEXT);
+	}
+
+	/* initialize the last command BD */
+	hclge_cmd_setup_basic_desc(&desc[i], HCLGE_OPC_DFX_BD_NUM, true);
 
-	return hclge_cmd_send(&hdev->hw, desc, 4);
+	return hclge_cmd_send(&hdev->hw, desc, HCLGE_GET_DFX_REG_TYPE_CNT);
 }
 
 static int hclge_get_dfx_reg_bd_num(struct hclge_dev *hdev,
-- 
cgit v1.2.3-59-g8ed1b


From 639d84d0c4281e6d8814bb2cc230bfe7ccf5019d Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Thu, 28 May 2020 21:48:12 +0800
Subject: net: hns3: modify an incorrect type in struct
 hclge_cfg_gro_status_cmd

Modify field .gro_en in struct hclge_cfg_gro_status_cmd to u8
according to the UM, otherwise, it will overwrite the reserved
byte which may be used for other purpose.

Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h  | 4 ++--
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
index e3bab8f3847f..463f29151ef0 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.h
@@ -884,8 +884,8 @@ struct hclge_cfg_tso_status_cmd {
 
 #define HCLGE_GRO_EN_B		0
 struct hclge_cfg_gro_status_cmd {
-	__le16 gro_en;
-	u8 rsv[22];
+	u8 gro_en;
+	u8 rsv[23];
 };
 
 #define HCLGE_TSO_MSS_MIN	256
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index e9b0e1c00970..1e4f28518d69 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -1456,7 +1456,7 @@ static int hclge_config_gro(struct hclge_dev *hdev, bool en)
 	hclge_cmd_setup_basic_desc(&desc, HCLGE_OPC_GRO_GENERIC_CONFIG, false);
 	req = (struct hclge_cfg_gro_status_cmd *)desc.data;
 
-	req->gro_en = cpu_to_le16(en ? 1 : 0);
+	req->gro_en = en ? 1 : 0;
 
 	ret = hclge_cmd_send(&hdev->hw, &desc, 1);
 	if (ret)
-- 
cgit v1.2.3-59-g8ed1b


From fb9e44d63dc33b455a50b772a37faf43e793da91 Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Thu, 28 May 2020 21:48:13 +0800
Subject: net: hns3: modify an incorrect type in struct
 hclgevf_cfg_gro_status_cmd

Modify field .gro_en in struct hclgevf_cfg_gro_status_cmd to u8
according to the UM, otherwise, it will overwrite the reserved
byte which may be used for other purpose.

Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h  | 4 ++--
 drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h
index f830eef02e5c..40d6e602ab51 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.h
@@ -161,8 +161,8 @@ struct hclgevf_query_res_cmd {
 
 #define HCLGEVF_GRO_EN_B               0
 struct hclgevf_cfg_gro_status_cmd {
-	__le16 gro_en;
-	u8 rsv[22];
+	u8 gro_en;
+	u8 rsv[23];
 };
 
 #define HCLGEVF_RSS_DEFAULT_OUTPORT_B	4
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index 59fcb80671c8..be5789bb5ddd 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -2403,7 +2403,7 @@ static int hclgevf_config_gro(struct hclgevf_dev *hdev, bool en)
 				     false);
 	req = (struct hclgevf_cfg_gro_status_cmd *)desc.data;
 
-	req->gro_en = cpu_to_le16(en ? 1 : 0);
+	req->gro_en = en ? 1 : 0;
 
 	ret = hclgevf_cmd_send(&hdev->hw, &desc, 1);
 	if (ret)
-- 
cgit v1.2.3-59-g8ed1b


From 5e86178dcead4941fcdadc963f31ed4e859e58ce Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Thu, 28 May 2020 21:48:14 +0800
Subject: net: hns3: remove some unused fields in struct hns3_nic_priv

Remove some fileds which defined in struct hns3_nic_priv,
but not used, and remove the related definition of struct
hns3_udp_tunnel and enum hns3_udp_tnl_type.

Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.h | 22 ----------------------
 1 file changed, 22 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
index 60f82ad89957..66cd4395f781 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.h
@@ -469,21 +469,8 @@ struct hns3_enet_tqp_vector {
 	unsigned long last_jiffies;
 } ____cacheline_internodealigned_in_smp;
 
-enum hns3_udp_tnl_type {
-	HNS3_UDP_TNL_VXLAN,
-	HNS3_UDP_TNL_GENEVE,
-	HNS3_UDP_TNL_MAX,
-};
-
-struct hns3_udp_tunnel {
-	u16 dst_port;
-	int used;
-};
-
 struct hns3_nic_priv {
 	struct hnae3_handle *ae_handle;
-	u32 enet_ver;
-	u32 port_id;
 	struct net_device *netdev;
 	struct device *dev;
 
@@ -495,19 +482,10 @@ struct hns3_nic_priv {
 	struct hns3_enet_tqp_vector *tqp_vector;
 	u16 vector_num;
 
-	/* The most recently read link state */
-	int link;
 	u64 tx_timeout_count;
 
 	unsigned long state;
 
-	struct timer_list service_timer;
-
-	struct work_struct service_task;
-
-	struct notifier_block notifier_block;
-	/* Vxlan/Geneve information */
-	struct hns3_udp_tunnel udp_tnl[HNS3_UDP_TNL_MAX];
 	struct hns3_enet_coalesce tx_coal;
 	struct hns3_enet_coalesce rx_coal;
 };
-- 
cgit v1.2.3-59-g8ed1b


From c496299e0677fe8c12af8fd233783df1c8aa9c4e Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Thu, 28 May 2020 21:48:15 +0800
Subject: net: hns3; remove unused HNAE3_RESTORE_CLIENT in enum
 hnae3_reset_notify_type

Remove HNAE3_RESTORE_CLIENT which is not needed now.

Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 7506cabaa16e..0a4aac44efa5 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -145,7 +145,6 @@ enum hnae3_reset_notify_type {
 	HNAE3_DOWN_CLIENT,
 	HNAE3_INIT_CLIENT,
 	HNAE3_UNINIT_CLIENT,
-	HNAE3_RESTORE_CLIENT,
 };
 
 enum hnae3_hw_error_type {
-- 
cgit v1.2.3-59-g8ed1b


From 4828b5766a69e93ca76b15f820c97f03ebd3a48c Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Thu, 28 May 2020 21:48:16 +0800
Subject: net: hns3: remove unused struct hnae3_unic_private_info

Since field .uinfo in struct hnae3_handle never be used,
so remove it and its structure definition.

Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hnae3.h | 11 -----------
 1 file changed, 11 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hnae3.h b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
index 0a4aac44efa5..d041cac9a487 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hnae3.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hnae3.h
@@ -621,16 +621,6 @@ struct hnae3_roce_private_info {
 	unsigned long state;
 };
 
-struct hnae3_unic_private_info {
-	struct net_device *netdev;
-	u16 rx_buf_len;
-	u16 num_tx_desc;
-	u16 num_rx_desc;
-
-	u16 num_tqps;	/* total number of tqps in this handle */
-	struct hnae3_queue **tqp;  /* array base of all TQPs of this instance */
-};
-
 #define HNAE3_SUPPORT_APP_LOOPBACK    BIT(0)
 #define HNAE3_SUPPORT_PHY_LOOPBACK    BIT(1)
 #define HNAE3_SUPPORT_SERDES_SERIAL_LOOPBACK	BIT(2)
@@ -656,7 +646,6 @@ struct hnae3_handle {
 	union {
 		struct net_device *netdev; /* first member */
 		struct hnae3_knic_private_info kinfo;
-		struct hnae3_unic_private_info uinfo;
 		struct hnae3_roce_private_info rinfo;
 	};
 
-- 
cgit v1.2.3-59-g8ed1b


From 9cee2e8d303940a413d20c5d275bdaf418b09b17 Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Thu, 28 May 2020 21:48:17 +0800
Subject: net: hns3: remove two duplicated register macros in hclgevf_main.h

HCLGEVF_CMDQ_INTR_SRC_REG and HCLGEVF_CMDQ_INTR_STS_REG are same
as HCLGEVF_VECTOR0_CMDQ_SRC_REG and HCLGEVF_VECTOR0_CMDQ_STAT_REG,
replace the former with the latter, and rename macro
HCLGEVF_VECTOR0_CMDQ_STAT_REG since 'stat' is not abbreviation of
'state'.

Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 6 +++---
 drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h | 4 +---
 2 files changed, 4 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index be5789bb5ddd..a8c0e79901f5 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -46,7 +46,7 @@ static const u32 cmdq_reg_addr_list[] = {HCLGEVF_CMDQ_TX_ADDR_L_REG,
 					 HCLGEVF_CMDQ_RX_TAIL_REG,
 					 HCLGEVF_CMDQ_RX_HEAD_REG,
 					 HCLGEVF_VECTOR0_CMDQ_SRC_REG,
-					 HCLGEVF_CMDQ_INTR_STS_REG,
+					 HCLGEVF_VECTOR0_CMDQ_STATE_REG,
 					 HCLGEVF_CMDQ_INTR_EN_REG,
 					 HCLGEVF_CMDQ_INTR_GEN_REG};
 
@@ -1826,7 +1826,7 @@ static void hclgevf_dump_rst_info(struct hclgevf_dev *hdev)
 	dev_info(&hdev->pdev->dev, "vector0 interrupt enable status: 0x%x\n",
 		 hclgevf_read_dev(&hdev->hw, HCLGEVF_MISC_VECTOR_REG_BASE));
 	dev_info(&hdev->pdev->dev, "vector0 interrupt status: 0x%x\n",
-		 hclgevf_read_dev(&hdev->hw, HCLGEVF_VECTOR0_CMDQ_STAT_REG));
+		 hclgevf_read_dev(&hdev->hw, HCLGEVF_VECTOR0_CMDQ_STATE_REG));
 	dev_info(&hdev->pdev->dev, "handshake status: 0x%x\n",
 		 hclgevf_read_dev(&hdev->hw, HCLGEVF_CMDQ_TX_DEPTH_REG));
 	dev_info(&hdev->pdev->dev, "function reset status: 0x%x\n",
@@ -2250,7 +2250,7 @@ static enum hclgevf_evt_cause hclgevf_check_evt_cause(struct hclgevf_dev *hdev,
 
 	/* fetch the events from their corresponding regs */
 	cmdq_stat_reg = hclgevf_read_dev(&hdev->hw,
-					 HCLGEVF_VECTOR0_CMDQ_STAT_REG);
+					 HCLGEVF_VECTOR0_CMDQ_STATE_REG);
 
 	if (BIT(HCLGEVF_VECTOR0_RST_INT_B) & cmdq_stat_reg) {
 		rst_ing_reg = hclgevf_read_dev(&hdev->hw, HCLGEVF_RST_ING);
diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
index f19583c4bc9b..738de124cfc4 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
@@ -42,8 +42,6 @@
 #define HCLGEVF_CMDQ_RX_DEPTH_REG		0x27020
 #define HCLGEVF_CMDQ_RX_TAIL_REG		0x27024
 #define HCLGEVF_CMDQ_RX_HEAD_REG		0x27028
-#define HCLGEVF_CMDQ_INTR_SRC_REG		0x27100
-#define HCLGEVF_CMDQ_INTR_STS_REG		0x27104
 #define HCLGEVF_CMDQ_INTR_EN_REG		0x27108
 #define HCLGEVF_CMDQ_INTR_GEN_REG		0x2710C
 
@@ -88,7 +86,7 @@
 /* Vector0 interrupt CMDQ event source register(RW) */
 #define HCLGEVF_VECTOR0_CMDQ_SRC_REG	0x27100
 /* Vector0 interrupt CMDQ event status register(RO) */
-#define HCLGEVF_VECTOR0_CMDQ_STAT_REG	0x27104
+#define HCLGEVF_VECTOR0_CMDQ_STATE_REG	0x27104
 /* CMDQ register bits for RX event(=MBX event) */
 #define HCLGEVF_VECTOR0_RX_CMDQ_INT_B	1
 /* RST register bits for RESET event */
-- 
cgit v1.2.3-59-g8ed1b


From 7c6643cac0ed78395ec10fe5b3b279e61b0ee51f Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Thu, 28 May 2020 21:48:18 +0800
Subject: net: hns3: remove some unused fields in struct hclge_dev

Remove some fields in struct hclge_dev which have not been used.

Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
index 913c4f677404..46e6e0fef3ba 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.h
@@ -771,12 +771,6 @@ struct hclge_dev {
 	u16 num_roce_msi;	/* Num of roce vectors for this PF */
 	int roce_base_vector;
 
-	u16 pending_udp_bitmap;
-
-	u16 rx_itr_default;
-	u16 tx_itr_default;
-
-	u16 adminq_work_limit; /* Num of admin receive queue desc to process */
 	unsigned long service_timer_period;
 	unsigned long service_timer_previous;
 	struct timer_list reset_timer;
-- 
cgit v1.2.3-59-g8ed1b


From ead38a8537bf87228917f23c2131c7a020fe0951 Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Thu, 28 May 2020 21:48:19 +0800
Subject: net: hns3: print out speed info when parsing speed fails

When calling hclge_parse_speed() fails, printing out the speed is
helpful for debugging.

Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
index 1e4f28518d69..96bfad52630d 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_main.c
@@ -1387,7 +1387,8 @@ static int hclge_configure(struct hclge_dev *hdev)
 
 	ret = hclge_parse_speed(cfg.default_speed, &hdev->hw.mac.speed);
 	if (ret) {
-		dev_err(&hdev->pdev->dev, "Get wrong speed ret=%d.\n", ret);
+		dev_err(&hdev->pdev->dev, "failed to parse speed %u, ret = %d\n",
+			cfg.default_speed, ret);
 		return ret;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From c28481a88cb38a3e1b7b533f53eb82e5e34f7597 Mon Sep 17 00:00:00 2001
From: Takashi Iwai <tiwai@suse.de>
Date: Wed, 11 Mar 2020 09:37:40 +0100
Subject: i40e: Use scnprintf() for avoiding potential buffer overflow

Since snprintf() returns the would-be-output size instead of the
actual output size, the succeeding calls may go beyond the given
buffer limit.  Fix it by replacing with scnprintf().

Signed-off-by: Takashi Iwai <tiwai@suse.de>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_main.c | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_main.c b/drivers/net/ethernet/intel/i40e/i40e_main.c
index ea7395b391e5..5d807c8004f8 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_main.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_main.c
@@ -14486,29 +14486,29 @@ static void i40e_print_features(struct i40e_pf *pf)
 
 	i = snprintf(buf, INFO_STRING_LEN, "Features: PF-id[%d]", hw->pf_id);
 #ifdef CONFIG_PCI_IOV
-	i += snprintf(&buf[i], REMAIN(i), " VFs: %d", pf->num_req_vfs);
+	i += scnprintf(&buf[i], REMAIN(i), " VFs: %d", pf->num_req_vfs);
 #endif
-	i += snprintf(&buf[i], REMAIN(i), " VSIs: %d QP: %d",
+	i += scnprintf(&buf[i], REMAIN(i), " VSIs: %d QP: %d",
 		      pf->hw.func_caps.num_vsis,
 		      pf->vsi[pf->lan_vsi]->num_queue_pairs);
 	if (pf->flags & I40E_FLAG_RSS_ENABLED)
-		i += snprintf(&buf[i], REMAIN(i), " RSS");
+		i += scnprintf(&buf[i], REMAIN(i), " RSS");
 	if (pf->flags & I40E_FLAG_FD_ATR_ENABLED)
-		i += snprintf(&buf[i], REMAIN(i), " FD_ATR");
+		i += scnprintf(&buf[i], REMAIN(i), " FD_ATR");
 	if (pf->flags & I40E_FLAG_FD_SB_ENABLED) {
-		i += snprintf(&buf[i], REMAIN(i), " FD_SB");
-		i += snprintf(&buf[i], REMAIN(i), " NTUPLE");
+		i += scnprintf(&buf[i], REMAIN(i), " FD_SB");
+		i += scnprintf(&buf[i], REMAIN(i), " NTUPLE");
 	}
 	if (pf->flags & I40E_FLAG_DCB_CAPABLE)
-		i += snprintf(&buf[i], REMAIN(i), " DCB");
-	i += snprintf(&buf[i], REMAIN(i), " VxLAN");
-	i += snprintf(&buf[i], REMAIN(i), " Geneve");
+		i += scnprintf(&buf[i], REMAIN(i), " DCB");
+	i += scnprintf(&buf[i], REMAIN(i), " VxLAN");
+	i += scnprintf(&buf[i], REMAIN(i), " Geneve");
 	if (pf->flags & I40E_FLAG_PTP)
-		i += snprintf(&buf[i], REMAIN(i), " PTP");
+		i += scnprintf(&buf[i], REMAIN(i), " PTP");
 	if (pf->flags & I40E_FLAG_VEB_MODE_ENABLED)
-		i += snprintf(&buf[i], REMAIN(i), " VEB");
+		i += scnprintf(&buf[i], REMAIN(i), " VEB");
 	else
-		i += snprintf(&buf[i], REMAIN(i), " VEPA");
+		i += scnprintf(&buf[i], REMAIN(i), " VEPA");
 
 	dev_info(&pf->pdev->dev, "%s\n", buf);
 	kfree(buf);
-- 
cgit v1.2.3-59-g8ed1b


From e92c0e0235c204a4787d186b541b33814a393d7f Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Mon, 16 Mar 2020 11:16:38 +0100
Subject: i40e: trivial fixup of comments in i40e_xsk.c
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The comment above i40e_run_xdp_zc() was clearly copy-pasted from
function i40e_xsk_umem_setup, which is just above.

Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Acked-by: Björn Töpel <bjorn.topel@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_xsk.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_xsk.c b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
index f3953744c505..7276580cbe64 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_xsk.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_xsk.c
@@ -139,8 +139,6 @@ int i40e_xsk_umem_setup(struct i40e_vsi *vsi, struct xdp_umem *umem,
  * @rx_ring: Rx ring
  * @xdp: xdp_buff used as input to the XDP program
  *
- * This function enables or disables a UMEM to a certain ring.
- *
  * Returns any of I40E_XDP_{PASS, CONSUMED, TX, REDIR}
  **/
 static int i40e_run_xdp_zc(struct i40e_ring *rx_ring, struct xdp_buff *xdp)
@@ -224,7 +222,7 @@ no_buffers:
 }
 
 /**
- * i40e_construct_skb_zc - Create skbufff from zero-copy Rx buffer
+ * i40e_construct_skb_zc - Create skbuff from zero-copy Rx buffer
  * @rx_ring: Rx ring
  * @xdp: xdp_buff
  *
-- 
cgit v1.2.3-59-g8ed1b


From 3b70683fc4d68f5d915d9dc7e5ba72c732c7315c Mon Sep 17 00:00:00 2001
From: Xie XiuQi <xiexiuqi@huawei.com>
Date: Tue, 5 May 2020 10:45:21 +0800
Subject: ixgbe: fix signed-integer-overflow warning

ubsan report this warning, fix it by adding a unsigned suffix.

UBSAN: signed-integer-overflow in
drivers/net/ethernet/intel/ixgbe/ixgbe_common.c:2246:26
65535 * 65537 cannot be represented in type 'int'
CPU: 21 PID: 7 Comm: kworker/u256:0 Not tainted 5.7.0-rc3-debug+ #39
Hardware name: Huawei TaiShan 2280 V2/BC82AMDC, BIOS 2280-V2 03/27/2020
Workqueue: ixgbe ixgbe_service_task [ixgbe]
Call trace:
 dump_backtrace+0x0/0x3f0
 show_stack+0x28/0x38
 dump_stack+0x154/0x1e4
 ubsan_epilogue+0x18/0x60
 handle_overflow+0xf8/0x148
 __ubsan_handle_mul_overflow+0x34/0x48
 ixgbe_fc_enable_generic+0x4d0/0x590 [ixgbe]
 ixgbe_service_task+0xc20/0x1f78 [ixgbe]
 process_one_work+0x8f0/0xf18
 worker_thread+0x430/0x6d0
 kthread+0x218/0x238
 ret_from_fork+0x10/0x18

Reported-by: Hulk Robot <hulkci@huawei.com>
Signed-off-by: Xie XiuQi <xiexiuqi@huawei.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_common.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index 0bd1294ba517..39c5e6fdb72c 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -2243,7 +2243,7 @@ s32 ixgbe_fc_enable_generic(struct ixgbe_hw *hw)
 	}
 
 	/* Configure pause time (2 TCs per register) */
-	reg = hw->fc.pause_time * 0x00010001;
+	reg = hw->fc.pause_time * 0x00010001U;
 	for (i = 0; i < (MAX_TRAFFIC_CLASS / 2); i++)
 		IXGBE_WRITE_REG(hw, IXGBE_FCTTV(i), reg);
 
-- 
cgit v1.2.3-59-g8ed1b


From 85c41c5b16ee5a4939a22ec833c6a76753e3d428 Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Tue, 5 May 2020 15:41:57 +0800
Subject: ixgbe: Remove conversion to bool in
 ixgbe_device_supports_autoneg_fc()

No need to convert '==' expression to bool. This fixes the following
coccicheck warning:

drivers/net/ethernet/intel/ixgbe/ixgbe_common.c:68:11-16: WARNING:
conversion to bool not needed here

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_common.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
index 39c5e6fdb72c..17357a12cbdc 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_common.c
@@ -64,8 +64,7 @@ bool ixgbe_device_supports_autoneg_fc(struct ixgbe_hw *hw)
 			hw->mac.ops.check_link(hw, &speed, &link_up, false);
 			/* if link is down, assume supported */
 			if (link_up)
-				supported = speed == IXGBE_LINK_SPEED_1GB_FULL ?
-				true : false;
+				supported = speed == IXGBE_LINK_SPEED_1GB_FULL;
 			else
 				supported = true;
 		}
-- 
cgit v1.2.3-59-g8ed1b


From c2d77e598b1b7170a091b25e32710a0a9f9e4169 Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Tue, 5 May 2020 15:43:37 +0800
Subject: ixgbe: Use true, false for bool variable in __ixgbe_enable_sriov()

Fix the following coccicheck warning:

drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c:105:2-38: WARNING:
Assignment of 0/1 to bool variable

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
index 537dfff585e0..d05a5690e66b 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_sriov.c
@@ -102,7 +102,7 @@ static int __ixgbe_enable_sriov(struct ixgbe_adapter *adapter,
 		 * indirection table and RSS hash key with PF therefore
 		 * we want to disable the querying by default.
 		 */
-		adapter->vfinfo[i].rss_query_enabled = 0;
+		adapter->vfinfo[i].rss_query_enabled = false;
 
 		/* Untrust all VFs */
 		adapter->vfinfo[i].trusted = false;
-- 
cgit v1.2.3-59-g8ed1b


From f2d9f294120fddec48e38e50d420c1d0a247661d Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Tue, 5 May 2020 16:35:54 +0800
Subject: ixgbe: Remove unused inline function ixgbe_irq_disable_queues

commit b5f69ccf6765 ("ixgbe: avoid bringing rings up/down as macvlans are added/removed")
left behind this, remove it.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c | 29 ---------------------------
 1 file changed, 29 deletions(-)

diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index 45fc7ce1a543..a59c166f794f 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -2973,35 +2973,6 @@ static inline void ixgbe_irq_enable_queues(struct ixgbe_adapter *adapter,
 	/* skip the flush */
 }
 
-static inline void ixgbe_irq_disable_queues(struct ixgbe_adapter *adapter,
-					    u64 qmask)
-{
-	u32 mask;
-	struct ixgbe_hw *hw = &adapter->hw;
-
-	switch (hw->mac.type) {
-	case ixgbe_mac_82598EB:
-		mask = (IXGBE_EIMS_RTX_QUEUE & qmask);
-		IXGBE_WRITE_REG(hw, IXGBE_EIMC, mask);
-		break;
-	case ixgbe_mac_82599EB:
-	case ixgbe_mac_X540:
-	case ixgbe_mac_X550:
-	case ixgbe_mac_X550EM_x:
-	case ixgbe_mac_x550em_a:
-		mask = (qmask & 0xFFFFFFFF);
-		if (mask)
-			IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(0), mask);
-		mask = (qmask >> 32);
-		if (mask)
-			IXGBE_WRITE_REG(hw, IXGBE_EIMC_EX(1), mask);
-		break;
-	default:
-		break;
-	}
-	/* skip the flush */
-}
-
 /**
  * ixgbe_irq_enable - Enable default interrupt generation settings
  * @adapter: board private structure
-- 
cgit v1.2.3-59-g8ed1b


From 49c65e95f331201d431386dcb7c652bf02b306d1 Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Thu, 7 May 2020 19:09:15 +0800
Subject: igb: make igb_set_fc_watermarks() return void

This function always return 0 now, we can make it return void to
simplify the code. This fixes the following coccicheck warning:

drivers/net/ethernet/intel/igb/e1000_mac.c:728:5-12: Unneeded variable:
"ret_val". Return "0" on line 751

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igb/e1000_mac.c | 9 +++------
 1 file changed, 3 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/igb/e1000_mac.c b/drivers/net/ethernet/intel/igb/e1000_mac.c
index 79ee0a747260..3254737c07a3 100644
--- a/drivers/net/ethernet/intel/igb/e1000_mac.c
+++ b/drivers/net/ethernet/intel/igb/e1000_mac.c
@@ -12,7 +12,7 @@
 #include "igb.h"
 
 static s32 igb_set_default_fc(struct e1000_hw *hw);
-static s32 igb_set_fc_watermarks(struct e1000_hw *hw);
+static void igb_set_fc_watermarks(struct e1000_hw *hw);
 
 /**
  *  igb_get_bus_info_pcie - Get PCIe bus information
@@ -687,7 +687,7 @@ s32 igb_setup_link(struct e1000_hw *hw)
 
 	wr32(E1000_FCTTV, hw->fc.pause_time);
 
-	ret_val = igb_set_fc_watermarks(hw);
+	igb_set_fc_watermarks(hw);
 
 out:
 
@@ -723,9 +723,8 @@ void igb_config_collision_dist(struct e1000_hw *hw)
  *  flow control XON frame transmission is enabled, then set XON frame
  *  tansmission as well.
  **/
-static s32 igb_set_fc_watermarks(struct e1000_hw *hw)
+static void igb_set_fc_watermarks(struct e1000_hw *hw)
 {
-	s32 ret_val = 0;
 	u32 fcrtl = 0, fcrth = 0;
 
 	/* Set the flow control receive threshold registers.  Normally,
@@ -747,8 +746,6 @@ static s32 igb_set_fc_watermarks(struct e1000_hw *hw)
 	}
 	wr32(E1000_FCRTL, fcrtl);
 	wr32(E1000_FCRTH, fcrth);
-
-	return ret_val;
 }
 
 /**
-- 
cgit v1.2.3-59-g8ed1b


From 2c3076f5ed3c17e06c2e09a442035906c99311b2 Mon Sep 17 00:00:00 2001
From: Sasha Neftin <sasha.neftin@intel.com>
Date: Sun, 10 May 2020 18:52:00 +0300
Subject: igc: Remove unused flags

Transmit underrun, late and excess collision flags not in use.
This patch comes to clean up these flags.

Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_defines.h | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_defines.h b/drivers/net/ethernet/intel/igc/igc_defines.h
index 3d8d40d6fa3f..186deb1d9375 100644
--- a/drivers/net/ethernet/intel/igc/igc_defines.h
+++ b/drivers/net/ethernet/intel/igc/igc_defines.h
@@ -269,13 +269,9 @@
 #define IGC_TXD_CMD_DEXT	0x20000000 /* Desc extension (0 = legacy) */
 #define IGC_TXD_CMD_VLE		0x40000000 /* Add VLAN tag */
 #define IGC_TXD_STAT_DD		0x00000001 /* Descriptor Done */
-#define IGC_TXD_STAT_EC		0x00000002 /* Excess Collisions */
-#define IGC_TXD_STAT_LC		0x00000004 /* Late Collisions */
-#define IGC_TXD_STAT_TU		0x00000008 /* Transmit underrun */
 #define IGC_TXD_CMD_TCP		0x01000000 /* TCP packet */
 #define IGC_TXD_CMD_IP		0x02000000 /* IP packet */
 #define IGC_TXD_CMD_TSE		0x04000000 /* TCP Seg enable */
-#define IGC_TXD_STAT_TC		0x00000004 /* Tx Underrun */
 #define IGC_TXD_EXTCMD_TSTAMP	0x00000010 /* IEEE1588 Timestamp packet */
 
 /* IPSec Encrypt Enable */
-- 
cgit v1.2.3-59-g8ed1b


From 3d3e9b6b6a878d01c04629eae8787de132056533 Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Tue, 12 May 2020 10:35:52 -0700
Subject: igc: Reject NFC rules with multiple matches

The way Rx queue assignment based on mac address, Ethertype and VLAN
priority filtering operates in I225 doesn't allow us to properly support
NFC rules with multiple matches.

Consider the following example which assigns to queue 2 frames matching
the address MACADDR *and* Ethertype ETYPE.

$ ethtool -N eth0 flow-type ether dst <MACADDR> proto <ETYPE> queue 2

When such rule is applied, we have 2 unwanted behaviors:

    1) Any frame matching MACADDR will be assigned to queue 2. It
       doesn't matter the ETYPE value.

    2) Any accepted frame that has Ethertype equals to ETYPE, no matter
       the mac address, will be assigned to queue 2 as well.

In current code, multiple-match filters are accepted by the driver, even
though it doesn't support them properly. This patch adds a check for
multiple-match rules in igc_ethtool_is_nfc_rule_valid() so they are
rejected.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_ethtool.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_ethtool.c b/drivers/net/ethernet/intel/igc/igc_ethtool.c
index 946e775e34ae..a938ec8db681 100644
--- a/drivers/net/ethernet/intel/igc/igc_ethtool.c
+++ b/drivers/net/ethernet/intel/igc/igc_ethtool.c
@@ -1222,8 +1222,8 @@ static void igc_ethtool_init_nfc_rule(struct igc_nfc_rule *rule,
  * @adapter: Pointer to adapter
  * @rule: Rule under evaluation
  *
- * Rules with both destination and source MAC addresses are considered invalid
- * since the driver doesn't support them.
+ * The driver doesn't support rules with multiple matches so if more than
+ * one bit in filter flags is set, @rule is considered invalid.
  *
  * Also, if there is already another rule with the same filter in a different
  * location, @rule is considered invalid.
@@ -1244,9 +1244,8 @@ static int igc_ethtool_check_nfc_rule(struct igc_adapter *adapter,
 		return -EINVAL;
 	}
 
-	if (flags & IGC_FILTER_FLAG_DST_MAC_ADDR &&
-	    flags & IGC_FILTER_FLAG_SRC_MAC_ADDR) {
-		netdev_dbg(dev, "Filters with both dst and src are not supported\n");
+	if (flags & (flags - 1)) {
+		netdev_dbg(dev, "Rule with multiple matches not supported\n");
 		return -EOPNOTSUPP;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From e087d3bbc4bfb1458b28f77caa0eed092f632b2b Mon Sep 17 00:00:00 2001
From: Andre Guedes <andre.guedes@intel.com>
Date: Tue, 12 May 2020 10:35:53 -0700
Subject: igc: Fix IGC_MAX_RXNFC_RULES

IGC supports a total of 32 rules. 16 MAC address based, 8 VLAN priority
based, and 8 Ethertype based. This patch fixes IGC_MAX_RXNFC_RULES
accordingly.

Signed-off-by: Andre Guedes <andre.guedes@intel.com>
Acked-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/igc/igc.h b/drivers/net/ethernet/intel/igc/igc.h
index 14f9edaaaf83..5dbc5a156626 100644
--- a/drivers/net/ethernet/intel/igc/igc.h
+++ b/drivers/net/ethernet/intel/igc/igc.h
@@ -457,7 +457,10 @@ struct igc_nfc_rule {
 	u16 action;
 };
 
-#define IGC_MAX_RXNFC_RULES		16
+/* IGC supports a total of 32 NFC rules: 16 MAC address based,, 8 VLAN priority
+ * based, and 8 ethertype based.
+ */
+#define IGC_MAX_RXNFC_RULES		32
 
 /* igc_desc_unused - calculate if we have unused descriptors */
 static inline u16 igc_desc_unused(const struct igc_ring *ring)
-- 
cgit v1.2.3-59-g8ed1b


From d601afcae2febc49665008e9a79e701248d56c50 Mon Sep 17 00:00:00 2001
From: Punit Agrawal <punit1.agrawal@toshiba.co.jp>
Date: Fri, 15 May 2020 13:31:27 +0900
Subject: e1000e: Relax condition to trigger reset for ME workaround

It's an error if the value of the RX/TX tail descriptor does not match
what was written. The error condition is true regardless the duration
of the interference from ME. But the driver only performs the reset if
E1000_ICH_FWSM_PCIM2PCI_COUNT (2000) iterations of 50us delay have
transpired. The extra condition can lead to inconsistency between the
state of hardware as expected by the driver.

Fix this by dropping the check for number of delay iterations.

While at it, also make __ew32_prepare() static as it's not used
anywhere else.

CC: stable <stable@vger.kernel.org>
Signed-off-by: Punit Agrawal <punit1.agrawal@toshiba.co.jp>
Reviewed-by: Alexander Duyck <alexander.h.duyck@linux.intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/e1000e/e1000.h  |  1 -
 drivers/net/ethernet/intel/e1000e/netdev.c | 12 +++++-------
 2 files changed, 5 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/intel/e1000e/e1000.h b/drivers/net/ethernet/intel/e1000e/e1000.h
index 37a2314d3e6b..944abd5eae11 100644
--- a/drivers/net/ethernet/intel/e1000e/e1000.h
+++ b/drivers/net/ethernet/intel/e1000e/e1000.h
@@ -576,7 +576,6 @@ static inline u32 __er32(struct e1000_hw *hw, unsigned long reg)
 
 #define er32(reg)	__er32(hw, E1000_##reg)
 
-s32 __ew32_prepare(struct e1000_hw *hw);
 void __ew32(struct e1000_hw *hw, unsigned long reg, u32 val);
 
 #define ew32(reg, val)	__ew32(hw, E1000_##reg, (val))
diff --git a/drivers/net/ethernet/intel/e1000e/netdev.c b/drivers/net/ethernet/intel/e1000e/netdev.c
index 32f23a15ff64..444532292588 100644
--- a/drivers/net/ethernet/intel/e1000e/netdev.c
+++ b/drivers/net/ethernet/intel/e1000e/netdev.c
@@ -158,14 +158,12 @@ static bool e1000e_check_me(u16 device_id)
  * has bit 24 set while ME is accessing MAC CSR registers, wait if it is set
  * and try again a number of times.
  **/
-s32 __ew32_prepare(struct e1000_hw *hw)
+static void __ew32_prepare(struct e1000_hw *hw)
 {
 	s32 i = E1000_ICH_FWSM_PCIM2PCI_COUNT;
 
 	while ((er32(FWSM) & E1000_ICH_FWSM_PCIM2PCI) && --i)
 		udelay(50);
-
-	return i;
 }
 
 void __ew32(struct e1000_hw *hw, unsigned long reg, u32 val)
@@ -646,11 +644,11 @@ static void e1000e_update_rdt_wa(struct e1000_ring *rx_ring, unsigned int i)
 {
 	struct e1000_adapter *adapter = rx_ring->adapter;
 	struct e1000_hw *hw = &adapter->hw;
-	s32 ret_val = __ew32_prepare(hw);
 
+	__ew32_prepare(hw);
 	writel(i, rx_ring->tail);
 
-	if (unlikely(!ret_val && (i != readl(rx_ring->tail)))) {
+	if (unlikely(i != readl(rx_ring->tail))) {
 		u32 rctl = er32(RCTL);
 
 		ew32(RCTL, rctl & ~E1000_RCTL_EN);
@@ -663,11 +661,11 @@ static void e1000e_update_tdt_wa(struct e1000_ring *tx_ring, unsigned int i)
 {
 	struct e1000_adapter *adapter = tx_ring->adapter;
 	struct e1000_hw *hw = &adapter->hw;
-	s32 ret_val = __ew32_prepare(hw);
 
+	__ew32_prepare(hw);
 	writel(i, tx_ring->tail);
 
-	if (unlikely(!ret_val && (i != readl(tx_ring->tail)))) {
+	if (unlikely(i != readl(tx_ring->tail))) {
 		u32 tctl = er32(TCTL);
 
 		ew32(TCTL, tctl & ~E1000_TCTL_EN);
-- 
cgit v1.2.3-59-g8ed1b


From 3f6023f77ad044a1a3e1b57ccaaff79432910a57 Mon Sep 17 00:00:00 2001
From: Jason Yan <yanaijie@huawei.com>
Date: Wed, 6 May 2020 14:18:35 +0800
Subject: i40e: Make i40e_shutdown_adminq() return void

Fix the following coccicheck warning:

drivers/net/ethernet/intel/i40e/i40e_adminq.c:699:13-21: Unneeded
variable: "ret_code". Return "0" on line 710

Signed-off-by: Jason Yan <yanaijie@huawei.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/i40e/i40e_adminq.c    | 6 +-----
 drivers/net/ethernet/intel/i40e/i40e_prototype.h | 2 +-
 2 files changed, 2 insertions(+), 6 deletions(-)

diff --git a/drivers/net/ethernet/intel/i40e/i40e_adminq.c b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
index 37514a75f928..6a089848c857 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_adminq.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_adminq.c
@@ -694,10 +694,8 @@ init_adminq_exit:
  *  i40e_shutdown_adminq - shutdown routine for the Admin Queue
  *  @hw: pointer to the hardware structure
  **/
-i40e_status i40e_shutdown_adminq(struct i40e_hw *hw)
+void i40e_shutdown_adminq(struct i40e_hw *hw)
 {
-	i40e_status ret_code = 0;
-
 	if (i40e_check_asq_alive(hw))
 		i40e_aq_queue_shutdown(hw, true);
 
@@ -706,8 +704,6 @@ i40e_status i40e_shutdown_adminq(struct i40e_hw *hw)
 
 	if (hw->nvm_buff.va)
 		i40e_free_virt_mem(hw, &hw->nvm_buff);
-
-	return ret_code;
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/i40e/i40e_prototype.h b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
index bbb478f09093..5c1378641b3b 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_prototype.h
+++ b/drivers/net/ethernet/intel/i40e/i40e_prototype.h
@@ -17,7 +17,7 @@
 
 /* adminq functions */
 i40e_status i40e_init_adminq(struct i40e_hw *hw);
-i40e_status i40e_shutdown_adminq(struct i40e_hw *hw);
+void i40e_shutdown_adminq(struct i40e_hw *hw);
 void i40e_adminq_init_ring_data(struct i40e_hw *hw);
 i40e_status i40e_clean_arq_element(struct i40e_hw *hw,
 					     struct i40e_arq_event_info *e,
-- 
cgit v1.2.3-59-g8ed1b


From 758b51e1e71e38257dfcb753edaf07d417611786 Mon Sep 17 00:00:00 2001
From: Sasha Neftin <sasha.neftin@intel.com>
Date: Tue, 19 May 2020 17:36:34 +0300
Subject: igc: Remove symbol error counter

Accordance to the i225 datasheet symbol error counter does not
applicable to the i225 device.
This patch comes to clean up this counter.

Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_mac.c  | 1 -
 drivers/net/ethernet/intel/igc/igc_main.c | 1 -
 drivers/net/ethernet/intel/igc/igc_regs.h | 1 -
 3 files changed, 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_mac.c b/drivers/net/ethernet/intel/igc/igc_mac.c
index 89445ab02a98..9de70a24cb9e 100644
--- a/drivers/net/ethernet/intel/igc/igc_mac.c
+++ b/drivers/net/ethernet/intel/igc/igc_mac.c
@@ -235,7 +235,6 @@ out:
 void igc_clear_hw_cntrs_base(struct igc_hw *hw)
 {
 	rd32(IGC_CRCERRS);
-	rd32(IGC_SYMERRS);
 	rd32(IGC_MPC);
 	rd32(IGC_SCC);
 	rd32(IGC_ECOL);
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 97d26991c87e..662f06a647e6 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -3701,7 +3701,6 @@ void igc_update_stats(struct igc_adapter *adapter)
 	adapter->stats.prc511 += rd32(IGC_PRC511);
 	adapter->stats.prc1023 += rd32(IGC_PRC1023);
 	adapter->stats.prc1522 += rd32(IGC_PRC1522);
-	adapter->stats.symerrs += rd32(IGC_SYMERRS);
 	adapter->stats.sec += rd32(IGC_SEC);
 
 	mpc = rd32(IGC_MPC);
diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h
index 7f999cfc9b39..a3e4ec922948 100644
--- a/drivers/net/ethernet/intel/igc/igc_regs.h
+++ b/drivers/net/ethernet/intel/igc/igc_regs.h
@@ -127,7 +127,6 @@
 /* Statistics Register Descriptions */
 #define IGC_CRCERRS	0x04000  /* CRC Error Count - R/clr */
 #define IGC_ALGNERRC	0x04004  /* Alignment Error Count - R/clr */
-#define IGC_SYMERRS	0x04008  /* Symbol Error Count - R/clr */
 #define IGC_RXERRC	0x0400C  /* Receive Error Count - R/clr */
 #define IGC_MPC		0x04010  /* Missed Packet Count - R/clr */
 #define IGC_SCC		0x04014  /* Single Collision Count - R/clr */
-- 
cgit v1.2.3-59-g8ed1b


From 51c657b42f58fcf061dfd6d01df26ff1701ae72c Mon Sep 17 00:00:00 2001
From: Sasha Neftin <sasha.neftin@intel.com>
Date: Tue, 19 May 2020 17:55:42 +0300
Subject: igc: Add Receive Error Counter

Receive error counter reflect total number of non-filtered
packets received with errors. This includes: CRC error,
symbol error, Rx data error and carrier extend error.

Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_mac.c  | 1 +
 drivers/net/ethernet/intel/igc/igc_main.c | 1 +
 drivers/net/ethernet/intel/igc/igc_regs.h | 1 +
 3 files changed, 3 insertions(+)

diff --git a/drivers/net/ethernet/intel/igc/igc_mac.c b/drivers/net/ethernet/intel/igc/igc_mac.c
index 9de70a24cb9e..a5a087e1ac02 100644
--- a/drivers/net/ethernet/intel/igc/igc_mac.c
+++ b/drivers/net/ethernet/intel/igc/igc_mac.c
@@ -241,6 +241,7 @@ void igc_clear_hw_cntrs_base(struct igc_hw *hw)
 	rd32(IGC_MCC);
 	rd32(IGC_LATECOL);
 	rd32(IGC_COLC);
+	rd32(IGC_RERC);
 	rd32(IGC_DC);
 	rd32(IGC_SEC);
 	rd32(IGC_RLEC);
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index 662f06a647e6..e0c45ffa12c4 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -3740,6 +3740,7 @@ void igc_update_stats(struct igc_adapter *adapter)
 
 	adapter->stats.tpt += rd32(IGC_TPT);
 	adapter->stats.colc += rd32(IGC_COLC);
+	adapter->stats.colc += rd32(IGC_RERC);
 
 	adapter->stats.algnerrc += rd32(IGC_ALGNERRC);
 
diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h
index a3e4ec922948..7ac3b611708c 100644
--- a/drivers/net/ethernet/intel/igc/igc_regs.h
+++ b/drivers/net/ethernet/intel/igc/igc_regs.h
@@ -134,6 +134,7 @@
 #define IGC_MCC		0x0401C  /* Multiple Collision Count - R/clr */
 #define IGC_LATECOL	0x04020  /* Late Collision Count - R/clr */
 #define IGC_COLC	0x04028  /* Collision Count - R/clr */
+#define IGC_RERC	0x0402C  /* Receive Error Count - R/clr */
 #define IGC_DC		0x04030  /* Defer Count - R/clr */
 #define IGC_TNCRS	0x04034  /* Tx-No CRS - R/clr */
 #define IGC_SEC		0x04038  /* Sequence Error Count - R/clr */
-- 
cgit v1.2.3-59-g8ed1b


From e2d0f2031effc8b08a162e6db64d2c97da4cf9f5 Mon Sep 17 00:00:00 2001
From: Sasha Neftin <sasha.neftin@intel.com>
Date: Thu, 28 May 2020 10:11:11 +0300
Subject: igc: Remove Sequence Error Counter

Accordance to the i225 datasheet sequence error counter does not
applicable to the i225 device.
This patch comes to clean up this counter.

Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_mac.c  | 1 -
 drivers/net/ethernet/intel/igc/igc_main.c | 1 -
 drivers/net/ethernet/intel/igc/igc_regs.h | 1 -
 3 files changed, 3 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_mac.c b/drivers/net/ethernet/intel/igc/igc_mac.c
index a5a087e1ac02..fb496617e8e1 100644
--- a/drivers/net/ethernet/intel/igc/igc_mac.c
+++ b/drivers/net/ethernet/intel/igc/igc_mac.c
@@ -243,7 +243,6 @@ void igc_clear_hw_cntrs_base(struct igc_hw *hw)
 	rd32(IGC_COLC);
 	rd32(IGC_RERC);
 	rd32(IGC_DC);
-	rd32(IGC_SEC);
 	rd32(IGC_RLEC);
 	rd32(IGC_XONRXC);
 	rd32(IGC_XONTXC);
diff --git a/drivers/net/ethernet/intel/igc/igc_main.c b/drivers/net/ethernet/intel/igc/igc_main.c
index e0c45ffa12c4..43fcabb5c023 100644
--- a/drivers/net/ethernet/intel/igc/igc_main.c
+++ b/drivers/net/ethernet/intel/igc/igc_main.c
@@ -3701,7 +3701,6 @@ void igc_update_stats(struct igc_adapter *adapter)
 	adapter->stats.prc511 += rd32(IGC_PRC511);
 	adapter->stats.prc1023 += rd32(IGC_PRC1023);
 	adapter->stats.prc1522 += rd32(IGC_PRC1522);
-	adapter->stats.sec += rd32(IGC_SEC);
 
 	mpc = rd32(IGC_MPC);
 	adapter->stats.mpc += mpc;
diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h
index 7ac3b611708c..2b7a877dadac 100644
--- a/drivers/net/ethernet/intel/igc/igc_regs.h
+++ b/drivers/net/ethernet/intel/igc/igc_regs.h
@@ -137,7 +137,6 @@
 #define IGC_RERC	0x0402C  /* Receive Error Count - R/clr */
 #define IGC_DC		0x04030  /* Defer Count - R/clr */
 #define IGC_TNCRS	0x04034  /* Tx-No CRS - R/clr */
-#define IGC_SEC		0x04038  /* Sequence Error Count - R/clr */
 #define IGC_CEXTERR	0x0403C  /* Carrier Extension Error Count - R/clr */
 #define IGC_RLEC	0x04040  /* Receive Length Error Count - R/clr */
 #define IGC_XONRXC	0x04048  /* XON Rx Count - R/clr */
-- 
cgit v1.2.3-59-g8ed1b


From 480b7a5a3fdb99afaf9a59681616bc70c1fbfe2f Mon Sep 17 00:00:00 2001
From: Sasha Neftin <sasha.neftin@intel.com>
Date: Thu, 28 May 2020 10:25:21 +0300
Subject: igc: Fix wrong register name

Accordance to the i225 datasheet this register address
used by Host Transmit Discarded Packet by MAC counter
and not by not applicable Carrier Extension Error counter.
This patch comes to fix this wrong definition.

Signed-off-by: Sasha Neftin <sasha.neftin@intel.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/igc/igc_mac.c  | 2 +-
 drivers/net/ethernet/intel/igc/igc_regs.h | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/intel/igc/igc_mac.c b/drivers/net/ethernet/intel/igc/igc_mac.c
index fb496617e8e1..410aeb01de5c 100644
--- a/drivers/net/ethernet/intel/igc/igc_mac.c
+++ b/drivers/net/ethernet/intel/igc/igc_mac.c
@@ -287,7 +287,7 @@ void igc_clear_hw_cntrs_base(struct igc_hw *hw)
 	rd32(IGC_ALGNERRC);
 	rd32(IGC_RXERRC);
 	rd32(IGC_TNCRS);
-	rd32(IGC_CEXTERR);
+	rd32(IGC_HTDPMC);
 	rd32(IGC_TSCTC);
 	rd32(IGC_TSCTFC);
 
diff --git a/drivers/net/ethernet/intel/igc/igc_regs.h b/drivers/net/ethernet/intel/igc/igc_regs.h
index 2b7a877dadac..232e82dec62e 100644
--- a/drivers/net/ethernet/intel/igc/igc_regs.h
+++ b/drivers/net/ethernet/intel/igc/igc_regs.h
@@ -137,7 +137,7 @@
 #define IGC_RERC	0x0402C  /* Receive Error Count - R/clr */
 #define IGC_DC		0x04030  /* Defer Count - R/clr */
 #define IGC_TNCRS	0x04034  /* Tx-No CRS - R/clr */
-#define IGC_CEXTERR	0x0403C  /* Carrier Extension Error Count - R/clr */
+#define IGC_HTDPMC	0x0403C  /* Host Transmit Discarded by MAC - R/clr */
 #define IGC_RLEC	0x04040  /* Receive Length Error Count - R/clr */
 #define IGC_XONRXC	0x04048  /* XON Rx Count - R/clr */
 #define IGC_XONTXC	0x0404C  /* XON Tx Count - R/clr */
-- 
cgit v1.2.3-59-g8ed1b


From 6a3faa4d7e013af13fa0230f7537640dcb3abc38 Mon Sep 17 00:00:00 2001
From: Hari <harichandrakanthan@gmail.com>
Date: Sat, 23 May 2020 18:43:26 +0530
Subject: e1000: Fix typo in the comment

Continuous Double "the" in a comment. Changed it to single "the"

Signed-off-by: Hari <harichandrakanthan@gmail.com>
Tested-by: Aaron Brown <aaron.f.brown@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/e1000/e1000_hw.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/e1000/e1000_hw.c b/drivers/net/ethernet/intel/e1000/e1000_hw.c
index 48428d6a00be..623e516a9630 100644
--- a/drivers/net/ethernet/intel/e1000/e1000_hw.c
+++ b/drivers/net/ethernet/intel/e1000/e1000_hw.c
@@ -3960,7 +3960,7 @@ static s32 e1000_do_read_eeprom(struct e1000_hw *hw, u16 offset, u16 words,
  * @hw: Struct containing variables accessed by shared code
  *
  * Reads the first 64 16 bit words of the EEPROM and sums the values read.
- * If the the sum of the 64 16 bit words is 0xBABA, the EEPROM's checksum is
+ * If the sum of the 64 16 bit words is 0xBABA, the EEPROM's checksum is
  * valid.
  */
 s32 e1000_validate_eeprom_checksum(struct e1000_hw *hw)
-- 
cgit v1.2.3-59-g8ed1b


From a224883cc97f0c476f1d1460ac8716bf0768070a Mon Sep 17 00:00:00 2001
From: Haim Dreyfuss <haim.dreyfuss@intel.com>
Date: Fri, 29 May 2020 09:39:21 +0300
Subject: iwlwifi: set NO_HE if the regulatory domain forbids it

If the firmware's regulatory domain forbids HE operation, set it
in the cfg80211 regdomain.

Signed-off-by: Haim Dreyfuss <haim.dreyfuss@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200529092401.c3e50c36c628.I991bfa662c0ef35de5be9eaf5b78ef190b67cb56@changeid
---
 drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
index d91a8e8349e6..ee410417761d 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-nvm-parse.c
@@ -240,6 +240,7 @@ enum iwl_nvm_channel_flags {
  * @REG_CAPA_40MHZ_FORBIDDEN: 11n channel with a width of 40Mhz is forbidden
  *	for this regulatory domain (valid only in 5Ghz).
  * @REG_CAPA_DC_HIGH_ENABLED: DC HIGH allowed.
+ * @REG_CAPA_11AX_DISABLED: 11ax is forbidden for this regulatory domain.
  */
 enum iwl_reg_capa_flags {
 	REG_CAPA_BF_CCD_LOW_BAND	= BIT(0),
@@ -250,6 +251,7 @@ enum iwl_reg_capa_flags {
 	REG_CAPA_MCS_9_ALLOWED		= BIT(5),
 	REG_CAPA_40MHZ_FORBIDDEN	= BIT(7),
 	REG_CAPA_DC_HIGH_ENABLED	= BIT(9),
+	REG_CAPA_11AX_DISABLED		= BIT(10),
 };
 
 static inline void iwl_nvm_print_channel_flags(struct device *dev, u32 level,
@@ -1115,6 +1117,9 @@ static u32 iwl_nvm_get_regdom_bw_flags(const u16 *nvm_chan,
 			flags |= NL80211_RRF_NO_160MHZ;
 	}
 
+	if (cap_flags & REG_CAPA_11AX_DISABLED)
+		flags |= NL80211_RRF_NO_HE;
+
 	return flags;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 771db3a10361ef67d59c00098a442be4a8395861 Mon Sep 17 00:00:00 2001
From: Haim Dreyfuss <haim.dreyfuss@intel.com>
Date: Fri, 29 May 2020 09:39:22 +0300
Subject: iwlwifi: pcie: don't count on the FW to set persistence mode

Apparently the FW can't set the persistence in all flows. Don't count
on the FW setting it in AX210 devices or above either to avoid
potential resets on resume.

Signed-off-by: Haim Dreyfuss <haim.dreyfuss@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200529092401.5405db448555.Ie3c110932ebbd5b6aca99938a5e0a1e4dfbaa848@changeid
---
 drivers/net/wireless/intel/iwlwifi/pcie/trans.c | 12 +++---------
 1 file changed, 3 insertions(+), 9 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
index 8ccfc7cc7348..3bcbc2967c88 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
@@ -5,10 +5,9 @@
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2007 - 2015 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2007 - 2015, 2018 - 2020 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -28,10 +27,9 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2005 - 2015 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2007 - 2015, 2018 - 2020 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -1495,14 +1493,10 @@ static int iwl_trans_pcie_d3_suspend(struct iwl_trans *trans, bool test,
 	int ret;
 	struct iwl_trans_pcie *trans_pcie =  IWL_TRANS_GET_PCIE_TRANS(trans);
 
-	/*
-	 * Family IWL_DEVICE_FAMILY_AX210 and above persist mode is set by FW.
-	 */
-	if (!reset && trans->trans_cfg->device_family < IWL_DEVICE_FAMILY_AX210) {
+	if (!reset)
 		/* Enable persistence mode to avoid reset */
 		iwl_set_bit(trans, CSR_HW_IF_CONFIG_REG,
 			    CSR_HW_IF_CONFIG_REG_PERSIST_MODE);
-	}
 
 	if (trans->trans_cfg->device_family >= IWL_DEVICE_FAMILY_AX210) {
 		iwl_write_umac_prph(trans, UREG_DOORBELL_TO_ISR6,
-- 
cgit v1.2.3-59-g8ed1b


From fcac70029ccf36c5157b8f37817eff332e0145a7 Mon Sep 17 00:00:00 2001
From: Mordechay Goodstein <mordechay.goodstein@intel.com>
Date: Fri, 29 May 2020 09:39:23 +0300
Subject: iwlwifi: pcie: keep trans instead of trans_pcie in iwl_txq

We used both the trans and the trans_pcie structures in
iwl_txq, so we can keep the trans structure instead.  This
helps with the refactoring of txq code out of pcie.

Signed-off-by: Mordechay Goodstein <mordechay.goodstein@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200529092401.1f826d34339e.I23182a59bfbe089a1f659742d6fee6f64d2ed08c@changeid
---
 drivers/net/wireless/intel/iwlwifi/pcie/internal.h | 4 ++--
 drivers/net/wireless/intel/iwlwifi/pcie/tx.c       | 8 ++++----
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
index b76c0396335a..3950f5784a15 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
@@ -332,7 +332,7 @@ struct iwl_pcie_first_tb_buf {
  * @entries: transmit entries (driver state)
  * @lock: queue lock
  * @stuck_timer: timer that fires if queue gets stuck
- * @trans_pcie: pointer back to transport (for timer)
+ * @trans: pointer back to transport (for timer)
  * @need_update: indicates need to update read/write index
  * @ampdu: true if this queue is an ampdu queue for an specific RA/TID
  * @wd_timeout: queue watchdog timeout (jiffies) - per queue
@@ -371,7 +371,7 @@ struct iwl_txq {
 	spinlock_t lock;
 	unsigned long frozen_expiry_remainder;
 	struct timer_list stuck_timer;
-	struct iwl_trans_pcie *trans_pcie;
+	struct iwl_trans *trans;
 	bool need_update;
 	bool frozen;
 	bool ampdu;
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
index 9ff78bca460b..757cf4e9de33 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
@@ -183,8 +183,7 @@ void iwl_pcie_free_dma_ptr(struct iwl_trans *trans, struct iwl_dma_ptr *ptr)
 static void iwl_pcie_txq_stuck_timer(struct timer_list *t)
 {
 	struct iwl_txq *txq = from_timer(txq, t, stuck_timer);
-	struct iwl_trans_pcie *trans_pcie = txq->trans_pcie;
-	struct iwl_trans *trans = iwl_trans_pcie_get_trans(trans_pcie);
+	struct iwl_trans *trans = txq->trans;
 
 	spin_lock(&txq->lock);
 	/* check if triggered erroneously */
@@ -535,7 +534,7 @@ int iwl_pcie_txq_alloc(struct iwl_trans *trans, struct iwl_txq *txq,
 		tfd_sz = trans_pcie->tfd_size * slots_num;
 
 	timer_setup(&txq->stuck_timer, iwl_pcie_txq_stuck_timer, 0);
-	txq->trans_pcie = trans_pcie;
+	txq->trans = trans;
 
 	txq->n_window = slots_num;
 
@@ -2129,7 +2128,8 @@ static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb,
 				   u16 tb1_len)
 {
 	struct iwl_tx_cmd *tx_cmd = (void *)dev_cmd->payload;
-	struct iwl_trans_pcie *trans_pcie = txq->trans_pcie;
+	struct iwl_trans_pcie *trans_pcie =
+		IWL_TRANS_GET_PCIE_TRANS(txq->trans);
 	struct ieee80211_hdr *hdr = (void *)skb->data;
 	unsigned int snap_ip_tcp_hdrlen, ip_hdrlen, total_len, hdr_room;
 	unsigned int mss = skb_shinfo(skb)->gso_size;
-- 
cgit v1.2.3-59-g8ed1b


From 9db93491f29eb4a4a68c72783dd6f078bdd94302 Mon Sep 17 00:00:00 2001
From: Gil Adam <gil.adam@intel.com>
Date: Fri, 29 May 2020 09:39:24 +0300
Subject: iwlwifi: acpi: support device specific method (DSM)

ACPI Device Specific Method (DSM) allows standardized feature
configuration through the ACPI interface without the namespace
pollution of the usual mechanism (ACPI method for each feature).
Add generic function for evaluating DSM objects and function for
evaluating a DSM with no arguments and a single int return value.
also implement the required backport for UUID.

Signed-off-by: Gil Adam <gil.adam@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200529092401.c3242ff3ba5c.Icb48c8d61bede5dda7ef267bff10e4798e9dc77b@changeid
---
 drivers/net/wireless/intel/iwlwifi/fw/acpi.c | 99 ++++++++++++++++++++++++----
 drivers/net/wireless/intel/iwlwifi/fw/acpi.h | 22 +++++++
 2 files changed, 110 insertions(+), 11 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
index e2184ba4d8b5..dc769b580431 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
+++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.c
@@ -58,44 +58,121 @@
  *
  *****************************************************************************/
 
+#include <linux/uuid.h>
 #include "iwl-drv.h"
 #include "iwl-debug.h"
 #include "acpi.h"
 #include "fw/runtime.h"
 
-void *iwl_acpi_get_object(struct device *dev, acpi_string method)
+static const guid_t intel_wifi_guid = GUID_INIT(0xF21202BF, 0x8F78, 0x4DC6,
+						0xA5, 0xB3, 0x1F, 0x73,
+						0x8E, 0x28, 0x5A, 0xDE);
+
+static int iwl_acpi_get_handle(struct device *dev, acpi_string method,
+			       acpi_handle *ret_handle)
 {
 	acpi_handle root_handle;
-	acpi_handle handle;
-	struct acpi_buffer buf = {ACPI_ALLOCATE_BUFFER, NULL};
 	acpi_status status;
 
 	root_handle = ACPI_HANDLE(dev);
 	if (!root_handle) {
 		IWL_DEBUG_DEV_RADIO(dev,
-				    "Could not retrieve root port ACPI handle\n");
-		return ERR_PTR(-ENOENT);
+				    "ACPI: Could not retrieve root port handle\n");
+		return -ENOENT;
 	}
 
-	/* Get the method's handle */
-	status = acpi_get_handle(root_handle, method, &handle);
+	status = acpi_get_handle(root_handle, method, ret_handle);
 	if (ACPI_FAILURE(status)) {
-		IWL_DEBUG_DEV_RADIO(dev, "%s method not found\n", method);
-		return ERR_PTR(-ENOENT);
+		IWL_DEBUG_DEV_RADIO(dev,
+				    "ACPI: %s method not found\n", method);
+		return -ENOENT;
 	}
+	return 0;
+}
+
+void *iwl_acpi_get_object(struct device *dev, acpi_string method)
+{
+	struct acpi_buffer buf = {ACPI_ALLOCATE_BUFFER, NULL};
+	acpi_handle handle;
+	acpi_status status;
+	int ret;
+
+	ret = iwl_acpi_get_handle(dev, method, &handle);
+	if (ret)
+		return ERR_PTR(-ENOENT);
 
 	/* Call the method with no arguments */
 	status = acpi_evaluate_object(handle, NULL, NULL, &buf);
 	if (ACPI_FAILURE(status)) {
-		IWL_DEBUG_DEV_RADIO(dev, "%s invocation failed (0x%x)\n",
+		IWL_DEBUG_DEV_RADIO(dev,
+				    "ACPI: %s method invocation failed (status: 0x%x)\n",
 				    method, status);
 		return ERR_PTR(-ENOENT);
 	}
-
 	return buf.pointer;
 }
 IWL_EXPORT_SYMBOL(iwl_acpi_get_object);
 
+/**
+* Generic function for evaluating a method defined in the device specific
+* method (DSM) interface. The returned acpi object must be freed by calling
+* function.
+*/
+void *iwl_acpi_get_dsm_object(struct device *dev, int rev, int func,
+			      union acpi_object *args)
+{
+	union acpi_object *obj;
+
+	obj = acpi_evaluate_dsm(ACPI_HANDLE(dev), &intel_wifi_guid, rev, func,
+				args);
+	if (!obj) {
+		IWL_DEBUG_DEV_RADIO(dev,
+				    "ACPI: DSM method invocation failed (rev: %d, func:%d)\n",
+				    rev, func);
+		return ERR_PTR(-ENOENT);
+	}
+	return obj;
+}
+
+/**
+ * Evaluate a DSM with no arguments and a single u8 return value (inside a
+ * buffer object), verify and return that value.
+ */
+int iwl_acpi_get_dsm_u8(struct device *dev, int rev, int func)
+{
+	union acpi_object *obj;
+	int ret;
+
+	obj = iwl_acpi_get_dsm_object(dev, rev, func, NULL);
+	if (IS_ERR(obj))
+		return -ENOENT;
+
+	if (obj->type != ACPI_TYPE_BUFFER) {
+		IWL_DEBUG_DEV_RADIO(dev,
+				    "ACPI: DSM method did not return a valid object, type=%d\n",
+				    obj->type);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	if (obj->buffer.length != sizeof(u8)) {
+		IWL_DEBUG_DEV_RADIO(dev,
+				    "ACPI: DSM method returned invalid buffer, length=%d\n",
+				    obj->buffer.length);
+		ret = -EINVAL;
+		goto out;
+	}
+
+	ret = obj->buffer.pointer[0];
+	IWL_DEBUG_DEV_RADIO(dev,
+			    "ACPI: DSM method evaluated: func=%d, ret=%d\n",
+			    func, ret);
+out:
+	ACPI_FREE(obj);
+	return ret;
+}
+IWL_EXPORT_SYMBOL(iwl_acpi_get_dsm_u8);
+
 union acpi_object *iwl_acpi_get_wifi_pkg(struct device *dev,
 					 union acpi_object *data,
 					 int data_size, int *tbl_rev)
diff --git a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h
index 6a646dc524e1..0ada9eddb8b1 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/acpi.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/acpi.h
@@ -127,12 +127,23 @@ struct iwl_geo_profile {
 	u8 values[ACPI_GEO_TABLE_SIZE];
 };
 
+enum iwl_dsm_funcs_rev_0 {
+	DSM_FUNC_QUERY = 0,
+	DSM_FUNC_DISABLE_SRD = 1,
+	DSM_FUNC_ENABLE_INDONESIA_5G2 = 2,
+};
+
 #ifdef CONFIG_ACPI
 
 struct iwl_fw_runtime;
 
 void *iwl_acpi_get_object(struct device *dev, acpi_string method);
 
+void *iwl_acpi_get_dsm_object(struct device *dev, int rev, int func,
+			      union acpi_object *args);
+
+int iwl_acpi_get_dsm_u8(struct device *dev, int rev, int func);
+
 union acpi_object *iwl_acpi_get_wifi_pkg(struct device *dev,
 					 union acpi_object *data,
 					 int data_size, int *tbl_rev);
@@ -192,6 +203,17 @@ static inline void *iwl_acpi_get_object(struct device *dev, acpi_string method)
 	return ERR_PTR(-ENOENT);
 }
 
+static inline void *iwl_acpi_get_dsm_object(struct device *dev, int rev,
+					    int func, union acpi_object *args)
+{
+	return ERR_PTR(-ENOENT);
+}
+
+static inline int iwl_acpi_get_dsm_u8(struct device *dev, int rev, int func)
+{
+	return -ENOENT;
+}
+
 static inline union acpi_object *iwl_acpi_get_wifi_pkg(struct device *dev,
 						       union acpi_object *data,
 						       int data_size,
-- 
cgit v1.2.3-59-g8ed1b


From f5b1cb2e615f57a6bf00d600b1f31e9f8058daa5 Mon Sep 17 00:00:00 2001
From: Gil Adam <gil.adam@intel.com>
Date: Fri, 29 May 2020 09:39:25 +0300
Subject: iwlwifi: acpi: evaluate dsm to enable 5.2 bands in Indonesia

Evaluate the appropriate DSM from ACPI to enable 5.15,5.35 GHz
bands in Indonesia. If enabled send LARI_CONFIG_CHANGE cmd to fw.

Signed-off-by: Gil Adam <gil.adam@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200529092401.f549b75bfdac.Iac74a6ffe45aff887cea13ee1d31b100ca11e249@changeid
---
 .../net/wireless/intel/iwlwifi/fw/api/nvm-reg.h    | 34 ++++++++++++++++-
 drivers/net/wireless/intel/iwlwifi/mvm/fw.c        | 43 ++++++++++++++++++++++
 2 files changed, 75 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h b/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h
index 2d230a7893c2..fd719c37428c 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/nvm-reg.h
@@ -8,7 +8,7 @@
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(C) 2018 - 2019 Intel Corporation
+ * Copyright(C) 2018 - 2020 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -31,7 +31,7 @@
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2015 Intel Mobile Communications GmbH
  * Copyright(c) 2016 - 2017 Intel Deutschland GmbH
- * Copyright(C) 2018 - 2019 Intel Corporation
+ * Copyright(C) 2018 - 2020 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -74,6 +74,11 @@ enum iwl_regulatory_and_nvm_subcmd_ids {
 	 */
 	NVM_ACCESS_COMPLETE = 0x0,
 
+	/**
+	 * @LARI_CONFIG_CHANGE: &struct iwl_lari_config_change_cmd
+	 */
+	LARI_CONFIG_CHANGE = 0x1,
+
 	/**
 	 * @NVM_GET_INFO:
 	 * Command is &struct iwl_nvm_get_info,
@@ -446,4 +451,29 @@ struct iwl_tas_config_cmd {
 	__le32 black_list_size;
 	__le32 black_list_array[IWL_TAS_BLACK_LIST_MAX];
 } __packed; /* TAS_CONFIG_CMD_API_S_VER_2 */
+
+/**
+ * enum iwl_lari_configs - bit masks for the various LARI config operations
+ * @LARI_CONFIG_DISABLE_11AC_UKRAINE_MSK: disable 11ac in ukraine
+ * @LARI_CONFIG_CHANGE_ETSI_TO_PASSIVE_MSK: ETSI 5.8GHz SRD passive scan
+ * @LARI_CONFIG_CHANGE_ETSI_TO_DISABLED_MSK: ETSI 5.8GHz SRD disabled
+ * @LARI_CONFIG_ENABLE_5G2_IN_INDONESIA_MSK: enable 5.15/5.35GHz bands in
+ * 	Indonesia
+ */
+enum iwl_lari_config_masks {
+	LARI_CONFIG_DISABLE_11AC_UKRAINE_MSK		= BIT(0),
+	LARI_CONFIG_CHANGE_ETSI_TO_PASSIVE_MSK		= BIT(1),
+	LARI_CONFIG_CHANGE_ETSI_TO_DISABLED_MSK		= BIT(2),
+	LARI_CONFIG_ENABLE_5G2_IN_INDONESIA_MSK		= BIT(3),
+};
+
+/**
+ * struct iwl_lari_config_change_cmd - change LARI configuration
+ * @config_bitmap: bit map of the config commands. each bit will trigger a
+ * different predefined FW config operation
+ */
+struct iwl_lari_config_change_cmd {
+	__le32 config_bitmap;
+} __packed; /* LARI_CHANGE_CONF_CMD_S_VER_1 */
+
 #endif /* __iwl_fw_api_nvm_reg_h__ */
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
index 5e8d3f8c3d86..95a613537047 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/fw.c
@@ -988,6 +988,44 @@ static void iwl_mvm_tas_init(struct iwl_mvm *mvm)
 	if (ret < 0)
 		IWL_DEBUG_RADIO(mvm, "failed to send TAS_CONFIG (%d)\n", ret);
 }
+
+static bool iwl_mvm_eval_dsm_indonesia_5g2(struct iwl_mvm *mvm)
+{
+	int ret = iwl_acpi_get_dsm_u8((&mvm->fwrt)->dev, 0,
+				      DSM_FUNC_ENABLE_INDONESIA_5G2);
+
+	IWL_DEBUG_RADIO(mvm,
+			"Evaluated DSM function ENABLE_INDONESIA_5G2, ret=%d\n",
+			ret);
+
+	return ret == 1;
+}
+
+static void iwl_mvm_lari_cfg(struct iwl_mvm *mvm)
+{
+	int ret;
+	struct iwl_lari_config_change_cmd cmd = {};
+
+	if (iwl_mvm_eval_dsm_indonesia_5g2(mvm))
+		cmd.config_bitmap |=
+			cpu_to_le32(LARI_CONFIG_ENABLE_5G2_IN_INDONESIA_MSK);
+
+	/* apply more config masks here */
+
+	if (cmd.config_bitmap) {
+		IWL_DEBUG_RADIO(mvm,
+				"sending LARI_CONFIG_CHANGE, config_bitmap=0x%x\n",
+				le32_to_cpu(cmd.config_bitmap));
+		ret = iwl_mvm_send_cmd_pdu(mvm,
+					   WIDE_ID(REGULATORY_AND_NVM_GROUP,
+						   LARI_CONFIG_CHANGE),
+					   0, sizeof(cmd), &cmd);
+		if (ret < 0)
+			IWL_DEBUG_RADIO(mvm,
+					"Failed to send LARI_CONFIG_CHANGE (%d)\n",
+					ret);
+	}
+}
 #else /* CONFIG_ACPI */
 
 inline int iwl_mvm_sar_select_profile(struct iwl_mvm *mvm,
@@ -1019,6 +1057,10 @@ static int iwl_mvm_ppag_init(struct iwl_mvm *mvm)
 static void iwl_mvm_tas_init(struct iwl_mvm *mvm)
 {
 }
+
+static void iwl_mvm_lari_cfg(struct iwl_mvm *mvm)
+{
+}
 #endif /* CONFIG_ACPI */
 
 void iwl_mvm_send_recovery_cmd(struct iwl_mvm *mvm, u32 flags)
@@ -1293,6 +1335,7 @@ int iwl_mvm_up(struct iwl_mvm *mvm)
 	if (ret)
 		goto error;
 
+	iwl_mvm_lari_cfg(mvm);
 	/*
 	 * RTNL is not taken during Ct-kill, but we don't need to scan/Tx
 	 * anyway, so don't init MCC.
-- 
cgit v1.2.3-59-g8ed1b


From 61576240558afce5abb8f391a88a6dfcf25ca7a1 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 29 May 2020 09:39:26 +0300
Subject: iwlwifi: pcie: gen3: indicate 8k/12k RB size to device

Newer firmware versions will parse a few extra bits in the
context info to be able to determine whether we are using
bigger than 4k RBs, indicate 8k/12k to them if we actually
use those (e.g. for sniffer based on the module parameter).

Signed-off-by: Johannes Berg <johannes.berg@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200529092401.f83f994572ca.Ibcfd66c3f9b69e68a53b3b2df8331ffb225db655@changeid
---
 drivers/net/wireless/intel/iwlwifi/iwl-context-info-gen3.h | 12 ++++++++++--
 drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c   |  8 ++++++++
 2 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-context-info-gen3.h b/drivers/net/wireless/intel/iwlwifi/iwl-context-info-gen3.h
index ebea99189ca9..9d7a04833cd0 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-context-info-gen3.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-context-info-gen3.h
@@ -5,7 +5,7 @@
  *
  * GPL LICENSE SUMMARY
  *
- * Copyright(c) 2018 Intel Corporation
+ * Copyright(c) 2018, 2020 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -18,7 +18,7 @@
  *
  * BSD LICENSE
  *
- * Copyright(c) 2018 Intel Corporation
+ * Copyright(c) 2018, 2020 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -93,6 +93,11 @@ enum iwl_prph_scratch_mtr_format {
  * @IWL_PRPH_SCRATCH_MTR_FORMAT: a mask for the size of the tfd.
  *	There are 4 optional values: 0: 16 bit, 1: 32 bit, 2: 64 bit,
  *	3: 256 bit.
+ * @IWL_PRPH_SCRATCH_RB_SIZE_EXT_MASK: RB size full information, ignored
+ *	by older firmware versions, so set IWL_PRPH_SCRATCH_RB_SIZE_4K
+ *	appropriately; use the below values for this.
+ * @IWL_PRPH_SCRATCH_RB_SIZE_EXT_8K: 8kB RB size
+ * @IWL_PRPH_SCRATCH_RB_SIZE_EXT_12K: 12kB RB size
  */
 enum iwl_prph_scratch_flags {
 	IWL_PRPH_SCRATCH_EARLY_DEBUG_EN		= BIT(4),
@@ -103,6 +108,9 @@ enum iwl_prph_scratch_flags {
 	IWL_PRPH_SCRATCH_RB_SIZE_4K		= BIT(16),
 	IWL_PRPH_SCRATCH_MTR_MODE		= BIT(17),
 	IWL_PRPH_SCRATCH_MTR_FORMAT		= BIT(18) | BIT(19),
+	IWL_PRPH_SCRATCH_RB_SIZE_EXT_MASK	= 0xf << 20,
+	IWL_PRPH_SCRATCH_RB_SIZE_EXT_8K		= 8 << 20,
+	IWL_PRPH_SCRATCH_RB_SIZE_EXT_12K	= 9 << 20,
 };
 
 /*
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c
index b6a5921a63c3..dcd81ee1f773 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c
@@ -138,9 +138,17 @@ int iwl_pcie_ctxt_info_gen3_init(struct iwl_trans *trans,
 	case IWL_AMSDU_2K:
 		break;
 	case IWL_AMSDU_4K:
+		control_flags |= IWL_PRPH_SCRATCH_RB_SIZE_4K;
+		break;
 	case IWL_AMSDU_8K:
+		control_flags |= IWL_PRPH_SCRATCH_RB_SIZE_4K;
+		/* if firmware supports the ext size, tell it */
+		control_flags |= IWL_PRPH_SCRATCH_RB_SIZE_EXT_8K;
+		break;
 	case IWL_AMSDU_12K:
 		control_flags |= IWL_PRPH_SCRATCH_RB_SIZE_4K;
+		/* if firmware supports the ext size, tell it */
+		control_flags |= IWL_PRPH_SCRATCH_RB_SIZE_EXT_12K;
 		break;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 4807e73685f1d52e9143977ee1763b9d050daef3 Mon Sep 17 00:00:00 2001
From: Mordechay Goodstein <mordechay.goodstein@intel.com>
Date: Fri, 29 May 2020 09:39:27 +0300
Subject: iwlwifi: move iwl_txq and substructures to a common trans header

The txq code is not directly related to the PCIe transport, so move the
structures it uses to the common iwl-trans.h header.

Signed-off-by: Mordechay Goodstein <mordechay.goodstein@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200529092401.d9d0082b8369.I8298f6e83804c1ea99217a79d95d23ef68b184d4@changeid
---
 drivers/net/wireless/intel/iwlwifi/iwl-trans.h     | 107 +++++++++++++++++++++
 drivers/net/wireless/intel/iwlwifi/pcie/internal.h | 107 ---------------------
 2 files changed, 107 insertions(+), 107 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
index bba527b339b5..57361b27351e 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
@@ -795,6 +795,113 @@ struct iwl_trans_debug {
 	u32 domains_bitmap;
 };
 
+struct iwl_dma_ptr {
+	dma_addr_t dma;
+	void *addr;
+	size_t size;
+};
+
+struct iwl_cmd_meta {
+	/* only for SYNC commands, iff the reply skb is wanted */
+	struct iwl_host_cmd *source;
+	u32 flags;
+	u32 tbs;
+};
+
+/*
+ * The FH will write back to the first TB only, so we need to copy some data
+ * into the buffer regardless of whether it should be mapped or not.
+ * This indicates how big the first TB must be to include the scratch buffer
+ * and the assigned PN.
+ * Since PN location is 8 bytes at offset 12, it's 20 now.
+ * If we make it bigger then allocations will be bigger and copy slower, so
+ * that's probably not useful.
+ */
+#define IWL_FIRST_TB_SIZE	20
+#define IWL_FIRST_TB_SIZE_ALIGN ALIGN(IWL_FIRST_TB_SIZE, 64)
+
+struct iwl_pcie_txq_entry {
+	void *cmd;
+	struct sk_buff *skb;
+	/* buffer to free after command completes */
+	const void *free_buf;
+	struct iwl_cmd_meta meta;
+};
+
+struct iwl_pcie_first_tb_buf {
+	u8 buf[IWL_FIRST_TB_SIZE_ALIGN];
+};
+
+/**
+ * struct iwl_txq - Tx Queue for DMA
+ * @q: generic Rx/Tx queue descriptor
+ * @tfds: transmit frame descriptors (DMA memory)
+ * @first_tb_bufs: start of command headers, including scratch buffers, for
+ *	the writeback -- this is DMA memory and an array holding one buffer
+ *	for each command on the queue
+ * @first_tb_dma: DMA address for the first_tb_bufs start
+ * @entries: transmit entries (driver state)
+ * @lock: queue lock
+ * @stuck_timer: timer that fires if queue gets stuck
+ * @trans: pointer back to transport (for timer)
+ * @need_update: indicates need to update read/write index
+ * @ampdu: true if this queue is an ampdu queue for an specific RA/TID
+ * @wd_timeout: queue watchdog timeout (jiffies) - per queue
+ * @frozen: tx stuck queue timer is frozen
+ * @frozen_expiry_remainder: remember how long until the timer fires
+ * @bc_tbl: byte count table of the queue (relevant only for gen2 transport)
+ * @write_ptr: 1-st empty entry (index) host_w
+ * @read_ptr: last used entry (index) host_r
+ * @dma_addr:  physical addr for BD's
+ * @n_window: safe queue window
+ * @id: queue id
+ * @low_mark: low watermark, resume queue if free space more than this
+ * @high_mark: high watermark, stop queue if free space less than this
+ *
+ * A Tx queue consists of circular buffer of BDs (a.k.a. TFDs, transmit frame
+ * descriptors) and required locking structures.
+ *
+ * Note the difference between TFD_QUEUE_SIZE_MAX and n_window: the hardware
+ * always assumes 256 descriptors, so TFD_QUEUE_SIZE_MAX is always 256 (unless
+ * there might be HW changes in the future). For the normal TX
+ * queues, n_window, which is the size of the software queue data
+ * is also 256; however, for the command queue, n_window is only
+ * 32 since we don't need so many commands pending. Since the HW
+ * still uses 256 BDs for DMA though, TFD_QUEUE_SIZE_MAX stays 256.
+ * This means that we end up with the following:
+ *  HW entries: | 0 | ... | N * 32 | ... | N * 32 + 31 | ... | 255 |
+ *  SW entries:           | 0      | ... | 31          |
+ * where N is a number between 0 and 7. This means that the SW
+ * data is a window overlayed over the HW queue.
+ */
+struct iwl_txq {
+	void *tfds;
+	struct iwl_pcie_first_tb_buf *first_tb_bufs;
+	dma_addr_t first_tb_dma;
+	struct iwl_pcie_txq_entry *entries;
+	/* lock for syncing changes on the queue */
+	spinlock_t lock;
+	unsigned long frozen_expiry_remainder;
+	struct timer_list stuck_timer;
+	struct iwl_trans *trans;
+	bool need_update;
+	bool frozen;
+	bool ampdu;
+	int block;
+	unsigned long wd_timeout;
+	struct sk_buff_head overflow_q;
+	struct iwl_dma_ptr bc_tbl;
+
+	int write_ptr;
+	int read_ptr;
+	dma_addr_t dma_addr;
+	int n_window;
+	u32 id;
+	int low_mark;
+	int high_mark;
+
+	bool overflow_tx;
+};
 /**
  * struct iwl_trans - transport common data
  *
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
index 3950f5784a15..3c6a119aede4 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
@@ -246,12 +246,6 @@ struct iwl_rb_allocator {
 	struct work_struct rx_alloc;
 };
 
-struct iwl_dma_ptr {
-	dma_addr_t dma;
-	void *addr;
-	size_t size;
-};
-
 /**
  * iwl_queue_inc_wrap - increment queue index, wrap back to beginning
  * @index -- current index
@@ -290,107 +284,6 @@ static inline int iwl_queue_dec_wrap(struct iwl_trans *trans, int index)
 		(trans->trans_cfg->base_params->max_tfd_queue_size - 1);
 }
 
-struct iwl_cmd_meta {
-	/* only for SYNC commands, iff the reply skb is wanted */
-	struct iwl_host_cmd *source;
-	u32 flags;
-	u32 tbs;
-};
-
-/*
- * The FH will write back to the first TB only, so we need to copy some data
- * into the buffer regardless of whether it should be mapped or not.
- * This indicates how big the first TB must be to include the scratch buffer
- * and the assigned PN.
- * Since PN location is 8 bytes at offset 12, it's 20 now.
- * If we make it bigger then allocations will be bigger and copy slower, so
- * that's probably not useful.
- */
-#define IWL_FIRST_TB_SIZE	20
-#define IWL_FIRST_TB_SIZE_ALIGN ALIGN(IWL_FIRST_TB_SIZE, 64)
-
-struct iwl_pcie_txq_entry {
-	void *cmd;
-	struct sk_buff *skb;
-	/* buffer to free after command completes */
-	const void *free_buf;
-	struct iwl_cmd_meta meta;
-};
-
-struct iwl_pcie_first_tb_buf {
-	u8 buf[IWL_FIRST_TB_SIZE_ALIGN];
-};
-
-/**
- * struct iwl_txq - Tx Queue for DMA
- * @q: generic Rx/Tx queue descriptor
- * @tfds: transmit frame descriptors (DMA memory)
- * @first_tb_bufs: start of command headers, including scratch buffers, for
- *	the writeback -- this is DMA memory and an array holding one buffer
- *	for each command on the queue
- * @first_tb_dma: DMA address for the first_tb_bufs start
- * @entries: transmit entries (driver state)
- * @lock: queue lock
- * @stuck_timer: timer that fires if queue gets stuck
- * @trans: pointer back to transport (for timer)
- * @need_update: indicates need to update read/write index
- * @ampdu: true if this queue is an ampdu queue for an specific RA/TID
- * @wd_timeout: queue watchdog timeout (jiffies) - per queue
- * @frozen: tx stuck queue timer is frozen
- * @frozen_expiry_remainder: remember how long until the timer fires
- * @bc_tbl: byte count table of the queue (relevant only for gen2 transport)
- * @write_ptr: 1-st empty entry (index) host_w
- * @read_ptr: last used entry (index) host_r
- * @dma_addr:  physical addr for BD's
- * @n_window: safe queue window
- * @id: queue id
- * @low_mark: low watermark, resume queue if free space more than this
- * @high_mark: high watermark, stop queue if free space less than this
- *
- * A Tx queue consists of circular buffer of BDs (a.k.a. TFDs, transmit frame
- * descriptors) and required locking structures.
- *
- * Note the difference between TFD_QUEUE_SIZE_MAX and n_window: the hardware
- * always assumes 256 descriptors, so TFD_QUEUE_SIZE_MAX is always 256 (unless
- * there might be HW changes in the future). For the normal TX
- * queues, n_window, which is the size of the software queue data
- * is also 256; however, for the command queue, n_window is only
- * 32 since we don't need so many commands pending. Since the HW
- * still uses 256 BDs for DMA though, TFD_QUEUE_SIZE_MAX stays 256.
- * This means that we end up with the following:
- *  HW entries: | 0 | ... | N * 32 | ... | N * 32 + 31 | ... | 255 |
- *  SW entries:           | 0      | ... | 31          |
- * where N is a number between 0 and 7. This means that the SW
- * data is a window overlayed over the HW queue.
- */
-struct iwl_txq {
-	void *tfds;
-	struct iwl_pcie_first_tb_buf *first_tb_bufs;
-	dma_addr_t first_tb_dma;
-	struct iwl_pcie_txq_entry *entries;
-	spinlock_t lock;
-	unsigned long frozen_expiry_remainder;
-	struct timer_list stuck_timer;
-	struct iwl_trans *trans;
-	bool need_update;
-	bool frozen;
-	bool ampdu;
-	int block;
-	unsigned long wd_timeout;
-	struct sk_buff_head overflow_q;
-	struct iwl_dma_ptr bc_tbl;
-
-	int write_ptr;
-	int read_ptr;
-	dma_addr_t dma_addr;
-	int n_window;
-	u32 id;
-	int low_mark;
-	int high_mark;
-
-	bool overflow_tx;
-};
-
 static inline dma_addr_t
 iwl_pcie_get_first_tb_dma(struct iwl_txq *txq, int idx)
 {
-- 
cgit v1.2.3-59-g8ed1b


From 4f4822b7cd5ab818a62815cc8eeca495cf8ec872 Mon Sep 17 00:00:00 2001
From: Mordechay Goodstein <mordechay.goodstein@intel.com>
Date: Fri, 29 May 2020 09:39:28 +0300
Subject: iwlwifi: move txq-specific from trans_pcie to common trans

We don't want to have txq code in the PCIe transport code, so move all
the relevant elements to a new iwl_txq structure and store it in
iwl_trans.

spatch

@ replace_pcie @
struct iwl_trans_pcie *trans_pcie;
@@

(
-trans_pcie->queue_stopped
+trans->txqs.queue_stopped
|
-trans_pcie->queue_used
+trans->txqs.queue_used
|
-trans_pcie->txq
+trans->txqs.txq
|
-trans_pcie->txq
+trans->txqs.txq
|
-trans_pcie->cmd_queue
+trans->txqs.cmd.q_id
|
-trans_pcie->cmd_fifo
+trans->txqs.cmd.fifo
|
-trans_pcie->cmd_q_wdg_timeout
+trans->txqs.cmd.wdg_timeout
)

// clean all new unused variables
@ depends on replace_pcie @
type T;
identifier i;
expression E;
@@
- T i = E;
 ... when != i

Signed-off-by: Mordechay Goodstein <mordechay.goodstein@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200529092401.a428d3c9d66f.Ie04ae55f33954636a39c98e7ae1e739c0507435b@changeid
---
 drivers/net/wireless/intel/iwlwifi/iwl-trans.h     |  21 ++++
 .../wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c   |   2 +-
 .../net/wireless/intel/iwlwifi/pcie/ctxt-info.c    |   6 +-
 drivers/net/wireless/intel/iwlwifi/pcie/internal.h |  14 +--
 drivers/net/wireless/intel/iwlwifi/pcie/rx.c       |   6 +-
 .../net/wireless/intel/iwlwifi/pcie/trans-gen2.c   |  11 +-
 drivers/net/wireless/intel/iwlwifi/pcie/trans.c    |  35 +++----
 drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c  |  70 ++++++-------
 drivers/net/wireless/intel/iwlwifi/pcie/tx.c       | 112 ++++++++++-----------
 9 files changed, 138 insertions(+), 139 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
index 57361b27351e..a301e2484cdb 100644
--- a/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
+++ b/drivers/net/wireless/intel/iwlwifi/iwl-trans.h
@@ -902,6 +902,25 @@ struct iwl_txq {
 
 	bool overflow_tx;
 };
+
+/**
+ * struct iwl_trans_txqs - transport tx queues data
+ *
+ * @queue_used - bit mask of used queues
+ * @queue_stopped - bit mask of stopped queues
+ */
+struct iwl_trans_txqs {
+	unsigned long queue_used[BITS_TO_LONGS(IWL_MAX_TVQM_QUEUES)];
+	unsigned long queue_stopped[BITS_TO_LONGS(IWL_MAX_TVQM_QUEUES)];
+	struct iwl_txq *txq[IWL_MAX_TVQM_QUEUES];
+	struct {
+		u8 fifo;
+		u8 q_id;
+		unsigned int wdg_timeout;
+	} cmd;
+
+};
+
 /**
  * struct iwl_trans - transport common data
  *
@@ -935,6 +954,7 @@ struct iwl_txq {
  * @system_pm_mode: the system-wide power management mode in use.
  *	This mode is set dynamically, depending on the WoWLAN values
  *	configured from the userspace at runtime.
+ * @iwl_trans_txqs: transport tx queues data.
  */
 struct iwl_trans {
 	const struct iwl_trans_ops *ops;
@@ -982,6 +1002,7 @@ struct iwl_trans {
 	enum iwl_plat_pm_mode system_pm_mode;
 
 	const char *name;
+	struct iwl_trans_txqs txqs;
 
 	/* pointer to trans specific struct */
 	/*Ensure that this pointer will always be aligned to sizeof pointer */
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c
index dcd81ee1f773..1ab136600415 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info-gen3.c
@@ -221,7 +221,7 @@ int iwl_pcie_ctxt_info_gen3_init(struct iwl_trans *trans,
 	ctxt_info_gen3->tr_idx_arr_size =
 		cpu_to_le16(IWL_NUM_OF_TRANSFER_RINGS);
 	ctxt_info_gen3->mtr_base_addr =
-		cpu_to_le64(trans_pcie->txq[trans_pcie->cmd_queue]->dma_addr);
+		cpu_to_le64(trans->txqs.txq[trans->txqs.cmd.q_id]->dma_addr);
 	ctxt_info_gen3->mcr_base_addr =
 		cpu_to_le64(trans_pcie->rxq->used_bd_dma);
 	ctxt_info_gen3->mtr_size =
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c
index b65405009d02..23abfbd096b0 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/ctxt-info.c
@@ -6,7 +6,7 @@
  * GPL LICENSE SUMMARY
  *
  * Copyright(c) 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2018 - 2020 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -20,7 +20,7 @@
  * BSD LICENSE
  *
  * Copyright(c) 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2018 - 2020 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -263,7 +263,7 @@ int iwl_pcie_ctxt_info_init(struct iwl_trans *trans,
 
 	/* initialize TX command queue */
 	ctxt_info->hcmd_cfg.cmd_queue_addr =
-		cpu_to_le64(trans_pcie->txq[trans_pcie->cmd_queue]->dma_addr);
+		cpu_to_le64(trans->txqs.txq[trans->txqs.cmd.q_id]->dma_addr);
 	ctxt_info->hcmd_cfg.cmd_queue_size =
 		TFD_QUEUE_CB_SIZE(IWL_CMD_QUEUE_SIZE);
 
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
index 3c6a119aede4..55808ba10d27 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/internal.h
@@ -454,9 +454,6 @@ struct iwl_trans_pcie {
 	struct dma_pool *bc_pool;
 
 	struct iwl_txq *txq_memory;
-	struct iwl_txq *txq[IWL_MAX_TVQM_QUEUES];
-	unsigned long queue_used[BITS_TO_LONGS(IWL_MAX_TVQM_QUEUES)];
-	unsigned long queue_stopped[BITS_TO_LONGS(IWL_MAX_TVQM_QUEUES)];
 
 	/* PCI bus related data */
 	struct pci_dev *pci_dev;
@@ -470,10 +467,7 @@ struct iwl_trans_pcie {
 
 	u8 page_offs, dev_cmd_offs;
 
-	u8 cmd_queue;
 	u8 def_rx_queue;
-	u8 cmd_fifo;
-	unsigned int cmd_q_wdg_timeout;
 	u8 n_no_reclaim_cmds;
 	u8 no_reclaim_cmds[MAX_NO_RECLAIM_CMDS];
 	u8 max_tbs;
@@ -876,9 +870,7 @@ void iwl_pcie_handle_rfkill_irq(struct iwl_trans *trans);
 static inline void iwl_wake_queue(struct iwl_trans *trans,
 				  struct iwl_txq *txq)
 {
-	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-
-	if (test_and_clear_bit(txq->id, trans_pcie->queue_stopped)) {
+	if (test_and_clear_bit(txq->id, trans->txqs.queue_stopped)) {
 		IWL_DEBUG_TX_QUEUES(trans, "Wake hwq %d\n", txq->id);
 		iwl_op_mode_queue_not_full(trans->op_mode, txq->id);
 	}
@@ -887,9 +879,7 @@ static inline void iwl_wake_queue(struct iwl_trans *trans,
 static inline void iwl_stop_queue(struct iwl_trans *trans,
 				  struct iwl_txq *txq)
 {
-	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-
-	if (!test_and_set_bit(txq->id, trans_pcie->queue_stopped)) {
+	if (!test_and_set_bit(txq->id, trans->txqs.queue_stopped)) {
 		iwl_op_mode_queue_full(trans->op_mode, txq->id);
 		IWL_DEBUG_TX_QUEUES(trans, "Stop hwq %d\n", txq->id);
 	} else
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
index 72d1cf27e6a4..24cb1b1f21f0 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/rx.c
@@ -1284,7 +1284,7 @@ static void iwl_pcie_rx_handle_rb(struct iwl_trans *trans,
 				int i)
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-	struct iwl_txq *txq = trans_pcie->txq[trans_pcie->cmd_queue];
+	struct iwl_txq *txq = trans->txqs.txq[trans->txqs.cmd.q_id];
 	bool page_stolen = false;
 	int max_len = trans_pcie->rx_buf_bytes;
 	u32 offset = 0;
@@ -1671,9 +1671,9 @@ static void iwl_pcie_irq_handle_error(struct iwl_trans *trans)
 	}
 
 	for (i = 0; i < trans->trans_cfg->base_params->num_of_queues; i++) {
-		if (!trans_pcie->txq[i])
+		if (!trans->txqs.txq[i])
 			continue;
-		del_timer(&trans_pcie->txq[i]->stuck_timer);
+		del_timer(&trans->txqs.txq[i]->stuck_timer);
 	}
 
 	/* The STATUS_FW_ERROR bit is set in this function. This must happen
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
index 19a2c72081ab..97c9e9c87436 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans-gen2.c
@@ -6,7 +6,7 @@
  * GPL LICENSE SUMMARY
  *
  * Copyright(c) 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2018 - 2020 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -20,7 +20,7 @@
  * BSD LICENSE
  *
  * Copyright(c) 2017 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2018 - 2020 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -245,7 +245,7 @@ static int iwl_pcie_gen2_nic_init(struct iwl_trans *trans)
 		return -ENOMEM;
 
 	/* Allocate or reset and init all Tx and Command queues */
-	if (iwl_pcie_gen2_tx_init(trans, trans_pcie->cmd_queue, queue_size))
+	if (iwl_pcie_gen2_tx_init(trans, trans->txqs.cmd.q_id, queue_size))
 		return -ENOMEM;
 
 	/* enable shadow regs in HW */
@@ -262,8 +262,9 @@ void iwl_trans_pcie_gen2_fw_alive(struct iwl_trans *trans, u32 scd_addr)
 	iwl_pcie_reset_ict(trans);
 
 	/* make sure all queue are not stopped/used */
-	memset(trans_pcie->queue_stopped, 0, sizeof(trans_pcie->queue_stopped));
-	memset(trans_pcie->queue_used, 0, sizeof(trans_pcie->queue_used));
+	memset(trans->txqs.queue_stopped, 0,
+	       sizeof(trans->txqs.queue_stopped));
+	memset(trans->txqs.queue_used, 0, sizeof(trans->txqs.queue_used));
 
 	/* now that we got alive we can free the fw image & the context info.
 	 * paging memory cannot be freed included since FW will still use it
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
index 3bcbc2967c88..e5160d620868 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/trans.c
@@ -1904,9 +1904,9 @@ static void iwl_trans_pcie_configure(struct iwl_trans *trans,
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 
-	trans_pcie->cmd_queue = trans_cfg->cmd_queue;
-	trans_pcie->cmd_fifo = trans_cfg->cmd_fifo;
-	trans_pcie->cmd_q_wdg_timeout = trans_cfg->cmd_q_wdg_timeout;
+	trans->txqs.cmd.q_id = trans_cfg->cmd_queue;
+	trans->txqs.cmd.fifo = trans_cfg->cmd_fifo;
+	trans->txqs.cmd.wdg_timeout = trans_cfg->cmd_q_wdg_timeout;
 	if (WARN_ON(trans_cfg->n_no_reclaim_cmds > MAX_NO_RECLAIM_CMDS))
 		trans_pcie->n_no_reclaim_cmds = 0;
 	else
@@ -2199,11 +2199,10 @@ static void iwl_trans_pcie_freeze_txq_timer(struct iwl_trans *trans,
 					    unsigned long txqs,
 					    bool freeze)
 {
-	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	int queue;
 
 	for_each_set_bit(queue, &txqs, BITS_PER_LONG) {
-		struct iwl_txq *txq = trans_pcie->txq[queue];
+		struct iwl_txq *txq = trans->txqs.txq[queue];
 		unsigned long now;
 
 		spin_lock_bh(&txq->lock);
@@ -2251,13 +2250,12 @@ next_queue:
 
 static void iwl_trans_pcie_block_txq_ptrs(struct iwl_trans *trans, bool block)
 {
-	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	int i;
 
 	for (i = 0; i < trans->trans_cfg->base_params->num_of_queues; i++) {
-		struct iwl_txq *txq = trans_pcie->txq[i];
+		struct iwl_txq *txq = trans->txqs.txq[i];
 
-		if (i == trans_pcie->cmd_queue)
+		if (i == trans->txqs.cmd.q_id)
 			continue;
 
 		spin_lock_bh(&txq->lock);
@@ -2326,7 +2324,6 @@ static int iwl_trans_pcie_rxq_dma_data(struct iwl_trans *trans, int queue,
 
 static int iwl_trans_pcie_wait_txq_empty(struct iwl_trans *trans, int txq_idx)
 {
-	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	struct iwl_txq *txq;
 	unsigned long now = jiffies;
 	bool overflow_tx;
@@ -2336,11 +2333,11 @@ static int iwl_trans_pcie_wait_txq_empty(struct iwl_trans *trans, int txq_idx)
 	if (test_bit(STATUS_TRANS_DEAD, &trans->status))
 		return -ENODEV;
 
-	if (!test_bit(txq_idx, trans_pcie->queue_used))
+	if (!test_bit(txq_idx, trans->txqs.queue_used))
 		return -EINVAL;
 
 	IWL_DEBUG_TX_QUEUES(trans, "Emptying queue %d...\n", txq_idx);
-	txq = trans_pcie->txq[txq_idx];
+	txq = trans->txqs.txq[txq_idx];
 
 	spin_lock_bh(&txq->lock);
 	overflow_tx = txq->overflow_tx ||
@@ -2388,7 +2385,6 @@ static int iwl_trans_pcie_wait_txq_empty(struct iwl_trans *trans, int txq_idx)
 
 static int iwl_trans_pcie_wait_txqs_empty(struct iwl_trans *trans, u32 txq_bm)
 {
-	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	int cnt;
 	int ret = 0;
 
@@ -2397,9 +2393,9 @@ static int iwl_trans_pcie_wait_txqs_empty(struct iwl_trans *trans, u32 txq_bm)
 	     cnt < trans->trans_cfg->base_params->num_of_queues;
 	     cnt++) {
 
-		if (cnt == trans_pcie->cmd_queue)
+		if (cnt == trans->txqs.cmd.q_id)
 			continue;
-		if (!test_bit(cnt, trans_pcie->queue_used))
+		if (!test_bit(cnt, trans->txqs.queue_used))
 			continue;
 		if (!(BIT(cnt) & txq_bm))
 			continue;
@@ -2573,13 +2569,12 @@ static int iwl_dbgfs_tx_queue_seq_show(struct seq_file *seq, void *v)
 	struct iwl_dbgfs_tx_queue_priv *priv = seq->private;
 	struct iwl_dbgfs_tx_queue_state *state = v;
 	struct iwl_trans *trans = priv->trans;
-	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-	struct iwl_txq *txq = trans_pcie->txq[state->pos];
+	struct iwl_txq *txq = trans->txqs.txq[state->pos];
 
 	seq_printf(seq, "hwq %.3u: used=%d stopped=%d ",
 		   (unsigned int)state->pos,
-		   !!test_bit(state->pos, trans_pcie->queue_used),
-		   !!test_bit(state->pos, trans_pcie->queue_stopped));
+		   !!test_bit(state->pos, trans->txqs.queue_used),
+		   !!test_bit(state->pos, trans->txqs.queue_stopped));
 	if (txq)
 		seq_printf(seq,
 			   "read=%u write=%u need_update=%d frozen=%d n_window=%d ampdu=%d",
@@ -2589,7 +2584,7 @@ static int iwl_dbgfs_tx_queue_seq_show(struct seq_file *seq, void *v)
 	else
 		seq_puts(seq, "(unallocated)");
 
-	if (state->pos == trans_pcie->cmd_queue)
+	if (state->pos == trans->txqs.cmd.q_id)
 		seq_puts(seq, " (HCMD)");
 	seq_puts(seq, "\n");
 
@@ -3265,7 +3260,7 @@ static struct iwl_trans_dump_data
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	struct iwl_fw_error_dump_data *data;
-	struct iwl_txq *cmdq = trans_pcie->txq[trans_pcie->cmd_queue];
+	struct iwl_txq *cmdq = trans->txqs.txq[trans->txqs.cmd.q_id];
 	struct iwl_fw_error_dump_txcmd *txcmd;
 	struct iwl_trans_dump_data *dump_data;
 	u32 len, num_rbs = 0, monitor_len = 0;
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
index bb55563bba68..7fc7542535d8 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx-gen2.c
@@ -64,7 +64,6 @@
  */
 void iwl_pcie_gen2_tx_stop(struct iwl_trans *trans)
 {
-	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	int txq_id;
 
 	/*
@@ -72,12 +71,13 @@ void iwl_pcie_gen2_tx_stop(struct iwl_trans *trans)
 	 * queues. This happens when we have an rfkill interrupt.
 	 * Since we stop Tx altogether - mark the queues as stopped.
 	 */
-	memset(trans_pcie->queue_stopped, 0, sizeof(trans_pcie->queue_stopped));
-	memset(trans_pcie->queue_used, 0, sizeof(trans_pcie->queue_used));
+	memset(trans->txqs.queue_stopped, 0,
+	       sizeof(trans->txqs.queue_stopped));
+	memset(trans->txqs.queue_used, 0, sizeof(trans->txqs.queue_used));
 
 	/* Unmap DMA from host system and free skb's */
-	for (txq_id = 0; txq_id < ARRAY_SIZE(trans_pcie->txq); txq_id++) {
-		if (!trans_pcie->txq[txq_id])
+	for (txq_id = 0; txq_id < ARRAY_SIZE(trans->txqs.txq); txq_id++) {
+		if (!trans->txqs.txq[txq_id])
 			continue;
 		iwl_pcie_gen2_txq_unmap(trans, txq_id);
 	}
@@ -716,7 +716,7 @@ int iwl_trans_pcie_gen2_tx(struct iwl_trans *trans, struct sk_buff *skb,
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	struct iwl_cmd_meta *out_meta;
-	struct iwl_txq *txq = trans_pcie->txq[txq_id];
+	struct iwl_txq *txq = trans->txqs.txq[txq_id];
 	u16 cmd_len;
 	int idx;
 	void *tfd;
@@ -725,7 +725,7 @@ int iwl_trans_pcie_gen2_tx(struct iwl_trans *trans, struct sk_buff *skb,
 		      "queue %d out of range", txq_id))
 		return -EINVAL;
 
-	if (WARN_ONCE(!test_bit(txq_id, trans_pcie->queue_used),
+	if (WARN_ONCE(!test_bit(txq_id, trans->txqs.queue_used),
 		      "TX on unused queue %d\n", txq_id))
 		return -EINVAL;
 
@@ -819,7 +819,7 @@ static int iwl_pcie_gen2_enqueue_hcmd(struct iwl_trans *trans,
 				      struct iwl_host_cmd *cmd)
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-	struct iwl_txq *txq = trans_pcie->txq[trans_pcie->cmd_queue];
+	struct iwl_txq *txq = trans->txqs.txq[trans->txqs.cmd.q_id];
 	struct iwl_device_cmd *out_cmd;
 	struct iwl_cmd_meta *out_meta;
 	unsigned long flags;
@@ -931,7 +931,7 @@ static int iwl_pcie_gen2_enqueue_hcmd(struct iwl_trans *trans,
 		cpu_to_le16(cmd_size - sizeof(struct iwl_cmd_header_wide));
 	out_cmd->hdr_wide.reserved = 0;
 	out_cmd->hdr_wide.sequence =
-		cpu_to_le16(QUEUE_TO_SEQ(trans_pcie->cmd_queue) |
+		cpu_to_le16(QUEUE_TO_SEQ(trans->txqs.cmd.q_id) |
 					 INDEX_TO_SEQ(txq->write_ptr));
 
 	cmd_pos = sizeof(struct iwl_cmd_header_wide);
@@ -979,7 +979,7 @@ static int iwl_pcie_gen2_enqueue_hcmd(struct iwl_trans *trans,
 		     "Sending command %s (%.2x.%.2x), seq: 0x%04X, %d bytes at %d[%d]:%d\n",
 		     iwl_get_cmd_string(trans, cmd->id), group_id,
 		     out_cmd->hdr.cmd, le16_to_cpu(out_cmd->hdr.sequence),
-		     cmd_size, txq->write_ptr, idx, trans_pcie->cmd_queue);
+		     cmd_size, txq->write_ptr, idx, trans->txqs.cmd.q_id);
 
 	/* start the TFD with the minimum copy bytes */
 	tb0_size = min_t(int, copy_size, IWL_FIRST_TB_SIZE);
@@ -1056,7 +1056,7 @@ static int iwl_pcie_gen2_send_hcmd_sync(struct iwl_trans *trans,
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	const char *cmd_str = iwl_get_cmd_string(trans, cmd->id);
-	struct iwl_txq *txq = trans_pcie->txq[trans_pcie->cmd_queue];
+	struct iwl_txq *txq = trans->txqs.txq[trans->txqs.cmd.q_id];
 	int cmd_idx;
 	int ret;
 
@@ -1175,14 +1175,14 @@ int iwl_trans_pcie_gen2_send_hcmd(struct iwl_trans *trans,
 void iwl_pcie_gen2_txq_unmap(struct iwl_trans *trans, int txq_id)
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-	struct iwl_txq *txq = trans_pcie->txq[txq_id];
+	struct iwl_txq *txq = trans->txqs.txq[txq_id];
 
 	spin_lock_bh(&txq->lock);
 	while (txq->write_ptr != txq->read_ptr) {
 		IWL_DEBUG_TX_REPLY(trans, "Q %d Free %d\n",
 				   txq_id, txq->read_ptr);
 
-		if (txq_id != trans_pcie->cmd_queue) {
+		if (txq_id != trans->txqs.cmd.q_id) {
 			int idx = iwl_pcie_get_cmd_index(txq, txq->read_ptr);
 			struct sk_buff *skb = txq->entries[idx].skb;
 
@@ -1240,7 +1240,6 @@ void iwl_pcie_gen2_txq_free_memory(struct iwl_trans *trans,
  */
 static void iwl_pcie_gen2_txq_free(struct iwl_trans *trans, int txq_id)
 {
-	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	struct iwl_txq *txq;
 	int i;
 
@@ -1248,7 +1247,7 @@ static void iwl_pcie_gen2_txq_free(struct iwl_trans *trans, int txq_id)
 		      "queue %d out of range", txq_id))
 		return;
 
-	txq = trans_pcie->txq[txq_id];
+	txq = trans->txqs.txq[txq_id];
 
 	if (WARN_ON(!txq))
 		return;
@@ -1256,7 +1255,7 @@ static void iwl_pcie_gen2_txq_free(struct iwl_trans *trans, int txq_id)
 	iwl_pcie_gen2_txq_unmap(trans, txq_id);
 
 	/* De-alloc array of command/tx buffers */
-	if (txq_id == trans_pcie->cmd_queue)
+	if (txq_id == trans->txqs.cmd.q_id)
 		for (i = 0; i < txq->n_window; i++) {
 			kzfree(txq->entries[i].cmd);
 			kzfree(txq->entries[i].free_buf);
@@ -1265,9 +1264,9 @@ static void iwl_pcie_gen2_txq_free(struct iwl_trans *trans, int txq_id)
 
 	iwl_pcie_gen2_txq_free_memory(trans, txq);
 
-	trans_pcie->txq[txq_id] = NULL;
+	trans->txqs.txq[txq_id] = NULL;
 
-	clear_bit(txq_id, trans_pcie->queue_used);
+	clear_bit(txq_id, trans->txqs.queue_used);
 }
 
 int iwl_trans_pcie_dyn_txq_alloc_dma(struct iwl_trans *trans,
@@ -1327,7 +1326,6 @@ int iwl_trans_pcie_txq_alloc_response(struct iwl_trans *trans,
 				      struct iwl_txq *txq,
 				      struct iwl_host_cmd *hcmd)
 {
-	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	struct iwl_tx_queue_cfg_rsp *rsp;
 	int ret, qid;
 	u32 wr_ptr;
@@ -1342,20 +1340,20 @@ int iwl_trans_pcie_txq_alloc_response(struct iwl_trans *trans,
 	qid = le16_to_cpu(rsp->queue_number);
 	wr_ptr = le16_to_cpu(rsp->write_pointer);
 
-	if (qid >= ARRAY_SIZE(trans_pcie->txq)) {
+	if (qid >= ARRAY_SIZE(trans->txqs.txq)) {
 		WARN_ONCE(1, "queue index %d unsupported", qid);
 		ret = -EIO;
 		goto error_free_resp;
 	}
 
-	if (test_and_set_bit(qid, trans_pcie->queue_used)) {
+	if (test_and_set_bit(qid, trans->txqs.queue_used)) {
 		WARN_ONCE(1, "queue %d already used", qid);
 		ret = -EIO;
 		goto error_free_resp;
 	}
 
 	txq->id = qid;
-	trans_pcie->txq[qid] = txq;
+	trans->txqs.txq[qid] = txq;
 	wr_ptr &= (trans->trans_cfg->base_params->max_tfd_queue_size - 1);
 
 	/* Place first TFD at index corresponding to start sequence number */
@@ -1413,8 +1411,6 @@ error:
 
 void iwl_trans_pcie_dyn_txq_free(struct iwl_trans *trans, int queue)
 {
-	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-
 	if (WARN(queue >= IWL_MAX_TVQM_QUEUES,
 		 "queue %d out of range", queue))
 		return;
@@ -1425,7 +1421,7 @@ void iwl_trans_pcie_dyn_txq_free(struct iwl_trans *trans, int queue)
 	 * allow the op_mode to call txq_disable after it already called
 	 * stop_device.
 	 */
-	if (!test_and_clear_bit(queue, trans_pcie->queue_used)) {
+	if (!test_and_clear_bit(queue, trans->txqs.queue_used)) {
 		WARN_ONCE(test_bit(STATUS_DEVICE_ENABLED, &trans->status),
 			  "queue %d not used", queue);
 		return;
@@ -1433,22 +1429,21 @@ void iwl_trans_pcie_dyn_txq_free(struct iwl_trans *trans, int queue)
 
 	iwl_pcie_gen2_txq_unmap(trans, queue);
 
-	iwl_pcie_gen2_txq_free_memory(trans, trans_pcie->txq[queue]);
-	trans_pcie->txq[queue] = NULL;
+	iwl_pcie_gen2_txq_free_memory(trans, trans->txqs.txq[queue]);
+	trans->txqs.txq[queue] = NULL;
 
 	IWL_DEBUG_TX_QUEUES(trans, "Deactivate queue %d\n", queue);
 }
 
 void iwl_pcie_gen2_tx_free(struct iwl_trans *trans)
 {
-	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	int i;
 
-	memset(trans_pcie->queue_used, 0, sizeof(trans_pcie->queue_used));
+	memset(trans->txqs.queue_used, 0, sizeof(trans->txqs.queue_used));
 
 	/* Free all TX queues */
-	for (i = 0; i < ARRAY_SIZE(trans_pcie->txq); i++) {
-		if (!trans_pcie->txq[i])
+	for (i = 0; i < ARRAY_SIZE(trans->txqs.txq); i++) {
+		if (!trans->txqs.txq[i])
 			continue;
 
 		iwl_pcie_gen2_txq_free(trans, i);
@@ -1457,35 +1452,34 @@ void iwl_pcie_gen2_tx_free(struct iwl_trans *trans)
 
 int iwl_pcie_gen2_tx_init(struct iwl_trans *trans, int txq_id, int queue_size)
 {
-	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	struct iwl_txq *queue;
 	int ret;
 
 	/* alloc and init the tx queue */
-	if (!trans_pcie->txq[txq_id]) {
+	if (!trans->txqs.txq[txq_id]) {
 		queue = kzalloc(sizeof(*queue), GFP_KERNEL);
 		if (!queue) {
 			IWL_ERR(trans, "Not enough memory for tx queue\n");
 			return -ENOMEM;
 		}
-		trans_pcie->txq[txq_id] = queue;
+		trans->txqs.txq[txq_id] = queue;
 		ret = iwl_pcie_txq_alloc(trans, queue, queue_size, true);
 		if (ret) {
 			IWL_ERR(trans, "Tx %d queue init failed\n", txq_id);
 			goto error;
 		}
 	} else {
-		queue = trans_pcie->txq[txq_id];
+		queue = trans->txqs.txq[txq_id];
 	}
 
 	ret = iwl_pcie_txq_init(trans, queue, queue_size,
-				(txq_id == trans_pcie->cmd_queue));
+				(txq_id == trans->txqs.cmd.q_id));
 	if (ret) {
 		IWL_ERR(trans, "Tx %d queue alloc failed\n", txq_id);
 		goto error;
 	}
-	trans_pcie->txq[txq_id]->id = txq_id;
-	set_bit(txq_id, trans_pcie->queue_used);
+	trans->txqs.txq[txq_id]->id = txq_id;
+	set_bit(txq_id, trans->txqs.queue_used);
 
 	return 0;
 
diff --git a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
index 757cf4e9de33..5c6c3fa0d29f 100644
--- a/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
+++ b/drivers/net/wireless/intel/iwlwifi/pcie/tx.c
@@ -261,7 +261,7 @@ static void iwl_pcie_txq_inval_byte_cnt_tbl(struct iwl_trans *trans,
 
 	WARN_ON(read_ptr >= TFD_QUEUE_SIZE_MAX);
 
-	if (txq_id != trans_pcie->cmd_queue)
+	if (txq_id != trans->txqs.cmd.q_id)
 		sta_id = tx_cmd->sta_id;
 
 	bc_ent = cpu_to_le16(1 | (sta_id << 12));
@@ -279,7 +279,6 @@ static void iwl_pcie_txq_inval_byte_cnt_tbl(struct iwl_trans *trans,
 static void iwl_pcie_txq_inc_wr_ptr(struct iwl_trans *trans,
 				    struct iwl_txq *txq)
 {
-	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	u32 reg = 0;
 	int txq_id = txq->id;
 
@@ -292,7 +291,7 @@ static void iwl_pcie_txq_inc_wr_ptr(struct iwl_trans *trans,
 	 * 3. there is a chance that the NIC is asleep
 	 */
 	if (!trans->trans_cfg->base_params->shadow_reg_enable &&
-	    txq_id != trans_pcie->cmd_queue &&
+	    txq_id != trans->txqs.cmd.q_id &&
 	    test_bit(STATUS_TPOWER_PMI, &trans->status)) {
 		/*
 		 * wake up nic if it's powered down ...
@@ -323,13 +322,12 @@ static void iwl_pcie_txq_inc_wr_ptr(struct iwl_trans *trans,
 
 void iwl_pcie_txq_check_wrptrs(struct iwl_trans *trans)
 {
-	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 	int i;
 
 	for (i = 0; i < trans->trans_cfg->base_params->num_of_queues; i++) {
-		struct iwl_txq *txq = trans_pcie->txq[i];
+		struct iwl_txq *txq = trans->txqs.txq[i];
 
-		if (!test_bit(i, trans_pcie->queue_used))
+		if (!test_bit(i, trans->txqs.queue_used))
 			continue;
 
 		spin_lock_bh(&txq->lock);
@@ -660,14 +658,14 @@ static void iwl_pcie_clear_cmd_in_flight(struct iwl_trans *trans)
 static void iwl_pcie_txq_unmap(struct iwl_trans *trans, int txq_id)
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-	struct iwl_txq *txq = trans_pcie->txq[txq_id];
+	struct iwl_txq *txq = trans->txqs.txq[txq_id];
 
 	spin_lock_bh(&txq->lock);
 	while (txq->write_ptr != txq->read_ptr) {
 		IWL_DEBUG_TX_REPLY(trans, "Q %d Free %d\n",
 				   txq_id, txq->read_ptr);
 
-		if (txq_id != trans_pcie->cmd_queue) {
+		if (txq_id != trans->txqs.cmd.q_id) {
 			struct sk_buff *skb = txq->entries[txq->read_ptr].skb;
 
 			if (WARN_ON_ONCE(!skb))
@@ -682,7 +680,7 @@ static void iwl_pcie_txq_unmap(struct iwl_trans *trans, int txq_id)
 			unsigned long flags;
 
 			spin_lock_irqsave(&trans_pcie->reg_lock, flags);
-			if (txq_id == trans_pcie->cmd_queue)
+			if (txq_id == trans->txqs.cmd.q_id)
 				iwl_pcie_clear_cmd_in_flight(trans);
 			spin_unlock_irqrestore(&trans_pcie->reg_lock, flags);
 		}
@@ -711,7 +709,7 @@ static void iwl_pcie_txq_unmap(struct iwl_trans *trans, int txq_id)
 static void iwl_pcie_txq_free(struct iwl_trans *trans, int txq_id)
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-	struct iwl_txq *txq = trans_pcie->txq[txq_id];
+	struct iwl_txq *txq = trans->txqs.txq[txq_id];
 	struct device *dev = trans->dev;
 	int i;
 
@@ -721,7 +719,7 @@ static void iwl_pcie_txq_free(struct iwl_trans *trans, int txq_id)
 	iwl_pcie_txq_unmap(trans, txq_id);
 
 	/* De-alloc array of command/tx buffers */
-	if (txq_id == trans_pcie->cmd_queue)
+	if (txq_id == trans->txqs.cmd.q_id)
 		for (i = 0; i < txq->n_window; i++) {
 			kzfree(txq->entries[i].cmd);
 			kzfree(txq->entries[i].free_buf);
@@ -760,8 +758,9 @@ void iwl_pcie_tx_start(struct iwl_trans *trans, u32 scd_base_addr)
 				SCD_CONTEXT_MEM_LOWER_BOUND) / sizeof(u32);
 
 	/* make sure all queue are not stopped/used */
-	memset(trans_pcie->queue_stopped, 0, sizeof(trans_pcie->queue_stopped));
-	memset(trans_pcie->queue_used, 0, sizeof(trans_pcie->queue_used));
+	memset(trans->txqs.queue_stopped, 0,
+	       sizeof(trans->txqs.queue_stopped));
+	memset(trans->txqs.queue_used, 0, sizeof(trans->txqs.queue_used));
 
 	trans_pcie->scd_base_addr =
 		iwl_read_prph(trans, SCD_SRAM_BASE_ADDR);
@@ -783,9 +782,9 @@ void iwl_pcie_tx_start(struct iwl_trans *trans, u32 scd_base_addr)
 	if (trans->trans_cfg->base_params->scd_chain_ext_wa)
 		iwl_write_prph(trans, SCD_CHAINEXT_EN, 0);
 
-	iwl_trans_ac_txq_enable(trans, trans_pcie->cmd_queue,
-				trans_pcie->cmd_fifo,
-				trans_pcie->cmd_q_wdg_timeout);
+	iwl_trans_ac_txq_enable(trans, trans->txqs.cmd.q_id,
+				trans->txqs.cmd.fifo,
+				trans->txqs.cmd.wdg_timeout);
 
 	/* Activate all Tx DMA/FIFO channels */
 	iwl_scd_activate_fifos(trans);
@@ -821,7 +820,7 @@ void iwl_trans_pcie_tx_reset(struct iwl_trans *trans)
 
 	for (txq_id = 0; txq_id < trans->trans_cfg->base_params->num_of_queues;
 	     txq_id++) {
-		struct iwl_txq *txq = trans_pcie->txq[txq_id];
+		struct iwl_txq *txq = trans->txqs.txq[txq_id];
 		if (trans->trans_cfg->use_tfh)
 			iwl_write_direct64(trans,
 					   FH_MEM_CBBC_QUEUE(trans, txq_id),
@@ -897,8 +896,9 @@ int iwl_pcie_tx_stop(struct iwl_trans *trans)
 	 * queues. This happens when we have an rfkill interrupt.
 	 * Since we stop Tx altogether - mark the queues as stopped.
 	 */
-	memset(trans_pcie->queue_stopped, 0, sizeof(trans_pcie->queue_stopped));
-	memset(trans_pcie->queue_used, 0, sizeof(trans_pcie->queue_used));
+	memset(trans->txqs.queue_stopped, 0,
+	       sizeof(trans->txqs.queue_stopped));
+	memset(trans->txqs.queue_used, 0, sizeof(trans->txqs.queue_used));
 
 	/* This can happen: start_hw, stop_device */
 	if (!trans_pcie->txq_memory)
@@ -922,7 +922,7 @@ void iwl_pcie_tx_free(struct iwl_trans *trans)
 	int txq_id;
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
 
-	memset(trans_pcie->queue_used, 0, sizeof(trans_pcie->queue_used));
+	memset(trans->txqs.queue_used, 0, sizeof(trans->txqs.queue_used));
 
 	/* Tx queues */
 	if (trans_pcie->txq_memory) {
@@ -930,7 +930,7 @@ void iwl_pcie_tx_free(struct iwl_trans *trans)
 		     txq_id < trans->trans_cfg->base_params->num_of_queues;
 		     txq_id++) {
 			iwl_pcie_txq_free(trans, txq_id);
-			trans_pcie->txq[txq_id] = NULL;
+			trans->txqs.txq[txq_id] = NULL;
 		}
 	}
 
@@ -991,7 +991,7 @@ static int iwl_pcie_tx_alloc(struct iwl_trans *trans)
 	/* Alloc and init all Tx queues, including the command queue (#4/#9) */
 	for (txq_id = 0; txq_id < trans->trans_cfg->base_params->num_of_queues;
 	     txq_id++) {
-		bool cmd_queue = (txq_id == trans_pcie->cmd_queue);
+		bool cmd_queue = (txq_id == trans->txqs.cmd.q_id);
 
 		if (cmd_queue)
 			slots_num = max_t(u32, IWL_CMD_QUEUE_SIZE,
@@ -999,14 +999,14 @@ static int iwl_pcie_tx_alloc(struct iwl_trans *trans)
 		else
 			slots_num = max_t(u32, IWL_DEFAULT_QUEUE_SIZE,
 					  trans->cfg->min_256_ba_txq_size);
-		trans_pcie->txq[txq_id] = &trans_pcie->txq_memory[txq_id];
-		ret = iwl_pcie_txq_alloc(trans, trans_pcie->txq[txq_id],
+		trans->txqs.txq[txq_id] = &trans_pcie->txq_memory[txq_id];
+		ret = iwl_pcie_txq_alloc(trans, trans->txqs.txq[txq_id],
 					 slots_num, cmd_queue);
 		if (ret) {
 			IWL_ERR(trans, "Tx %d queue alloc failed\n", txq_id);
 			goto error;
 		}
-		trans_pcie->txq[txq_id]->id = txq_id;
+		trans->txqs.txq[txq_id]->id = txq_id;
 	}
 
 	return 0;
@@ -1045,7 +1045,7 @@ int iwl_pcie_tx_init(struct iwl_trans *trans)
 	/* Alloc and init all Tx queues, including the command queue (#4/#9) */
 	for (txq_id = 0; txq_id < trans->trans_cfg->base_params->num_of_queues;
 	     txq_id++) {
-		bool cmd_queue = (txq_id == trans_pcie->cmd_queue);
+		bool cmd_queue = (txq_id == trans->txqs.cmd.q_id);
 
 		if (cmd_queue)
 			slots_num = max_t(u32, IWL_CMD_QUEUE_SIZE,
@@ -1053,7 +1053,7 @@ int iwl_pcie_tx_init(struct iwl_trans *trans)
 		else
 			slots_num = max_t(u32, IWL_DEFAULT_QUEUE_SIZE,
 					  trans->cfg->min_256_ba_txq_size);
-		ret = iwl_pcie_txq_init(trans, trans_pcie->txq[txq_id],
+		ret = iwl_pcie_txq_init(trans, trans->txqs.txq[txq_id],
 					slots_num, cmd_queue);
 		if (ret) {
 			IWL_ERR(trans, "Tx %d queue init failed\n", txq_id);
@@ -1067,7 +1067,7 @@ int iwl_pcie_tx_init(struct iwl_trans *trans)
 		 * Circular buffer (TFD queue in DRAM) physical base address
 		 */
 		iwl_write_direct32(trans, FH_MEM_CBBC_QUEUE(trans, txq_id),
-				   trans_pcie->txq[txq_id]->dma_addr >> 8);
+				   trans->txqs.txq[txq_id]->dma_addr >> 8);
 	}
 
 	iwl_set_bits_prph(trans, SCD_GP_CTRL, SCD_GP_CTRL_AUTO_ACTIVE_MODE);
@@ -1112,18 +1112,18 @@ void iwl_trans_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn,
 			    struct sk_buff_head *skbs)
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-	struct iwl_txq *txq = trans_pcie->txq[txq_id];
+	struct iwl_txq *txq = trans->txqs.txq[txq_id];
 	int tfd_num = iwl_pcie_get_cmd_index(txq, ssn);
 	int read_ptr = iwl_pcie_get_cmd_index(txq, txq->read_ptr);
 	int last_to_free;
 
 	/* This function is not meant to release cmd queue*/
-	if (WARN_ON(txq_id == trans_pcie->cmd_queue))
+	if (WARN_ON(txq_id == trans->txqs.cmd.q_id))
 		return;
 
 	spin_lock_bh(&txq->lock);
 
-	if (!test_bit(txq_id, trans_pcie->queue_used)) {
+	if (!test_bit(txq_id, trans->txqs.queue_used)) {
 		IWL_DEBUG_TX_QUEUES(trans, "Q %d inactive - ignoring idx %d\n",
 				    txq_id, ssn);
 		goto out;
@@ -1175,7 +1175,7 @@ void iwl_trans_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn,
 	iwl_pcie_txq_progress(txq);
 
 	if (iwl_queue_space(trans, txq) > txq->low_mark &&
-	    test_bit(txq_id, trans_pcie->queue_stopped)) {
+	    test_bit(txq_id, trans->txqs.queue_stopped)) {
 		struct sk_buff_head overflow_skbs;
 
 		__skb_queue_head_init(&overflow_skbs);
@@ -1228,8 +1228,7 @@ out:
 /* Set wr_ptr of specific device and txq  */
 void iwl_trans_pcie_set_q_ptrs(struct iwl_trans *trans, int txq_id, int ptr)
 {
-	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-	struct iwl_txq *txq = trans_pcie->txq[txq_id];
+	struct iwl_txq *txq = trans->txqs.txq[txq_id];
 
 	spin_lock_bh(&txq->lock);
 
@@ -1289,7 +1288,7 @@ static int iwl_pcie_set_cmd_in_flight(struct iwl_trans *trans,
 static void iwl_pcie_cmdq_reclaim(struct iwl_trans *trans, int txq_id, int idx)
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-	struct iwl_txq *txq = trans_pcie->txq[txq_id];
+	struct iwl_txq *txq = trans->txqs.txq[txq_id];
 	unsigned long flags;
 	int nfreed = 0;
 	u16 r;
@@ -1301,7 +1300,7 @@ static void iwl_pcie_cmdq_reclaim(struct iwl_trans *trans, int txq_id, int idx)
 
 	if (idx >= trans->trans_cfg->base_params->max_tfd_queue_size ||
 	    (!iwl_queue_used(txq, idx))) {
-		WARN_ONCE(test_bit(txq_id, trans_pcie->queue_used),
+		WARN_ONCE(test_bit(txq_id, trans->txqs.queue_used),
 			  "%s: Read index for DMA queue txq id (%d), index %d is out of range [0-%d] %d %d.\n",
 			  __func__, txq_id, idx,
 			  trans->trans_cfg->base_params->max_tfd_queue_size,
@@ -1363,11 +1362,11 @@ bool iwl_trans_pcie_txq_enable(struct iwl_trans *trans, int txq_id, u16 ssn,
 			       unsigned int wdg_timeout)
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-	struct iwl_txq *txq = trans_pcie->txq[txq_id];
+	struct iwl_txq *txq = trans->txqs.txq[txq_id];
 	int fifo = -1;
 	bool scd_bug = false;
 
-	if (test_and_set_bit(txq_id, trans_pcie->queue_used))
+	if (test_and_set_bit(txq_id, trans->txqs.queue_used))
 		WARN_ONCE(1, "queue %d already used - expect issues", txq_id);
 
 	txq->wd_timeout = msecs_to_jiffies(wdg_timeout);
@@ -1376,7 +1375,7 @@ bool iwl_trans_pcie_txq_enable(struct iwl_trans *trans, int txq_id, u16 ssn,
 		fifo = cfg->fifo;
 
 		/* Disable the scheduler prior configuring the cmd queue */
-		if (txq_id == trans_pcie->cmd_queue &&
+		if (txq_id == trans->txqs.cmd.q_id &&
 		    trans_pcie->scd_set_active)
 			iwl_scd_enable_set_active(trans, 0);
 
@@ -1384,7 +1383,7 @@ bool iwl_trans_pcie_txq_enable(struct iwl_trans *trans, int txq_id, u16 ssn,
 		iwl_scd_txq_set_inactive(trans, txq_id);
 
 		/* Set this queue as a chain-building queue unless it is CMD */
-		if (txq_id != trans_pcie->cmd_queue)
+		if (txq_id != trans->txqs.cmd.q_id)
 			iwl_scd_txq_set_chain(trans, txq_id);
 
 		if (cfg->aggregate) {
@@ -1454,7 +1453,7 @@ bool iwl_trans_pcie_txq_enable(struct iwl_trans *trans, int txq_id, u16 ssn,
 			       SCD_QUEUE_STTS_REG_MSK);
 
 		/* enable the scheduler for this queue (only) */
-		if (txq_id == trans_pcie->cmd_queue &&
+		if (txq_id == trans->txqs.cmd.q_id &&
 		    trans_pcie->scd_set_active)
 			iwl_scd_enable_set_active(trans, BIT(txq_id));
 
@@ -1473,8 +1472,7 @@ bool iwl_trans_pcie_txq_enable(struct iwl_trans *trans, int txq_id, u16 ssn,
 void iwl_trans_pcie_txq_set_shared_mode(struct iwl_trans *trans, u32 txq_id,
 					bool shared_mode)
 {
-	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-	struct iwl_txq *txq = trans_pcie->txq[txq_id];
+	struct iwl_txq *txq = trans->txqs.txq[txq_id];
 
 	txq->ampdu = !shared_mode;
 }
@@ -1487,8 +1485,8 @@ void iwl_trans_pcie_txq_disable(struct iwl_trans *trans, int txq_id,
 			SCD_TX_STTS_QUEUE_OFFSET(txq_id);
 	static const u32 zero_val[4] = {};
 
-	trans_pcie->txq[txq_id]->frozen_expiry_remainder = 0;
-	trans_pcie->txq[txq_id]->frozen = false;
+	trans->txqs.txq[txq_id]->frozen_expiry_remainder = 0;
+	trans->txqs.txq[txq_id]->frozen = false;
 
 	/*
 	 * Upon HW Rfkill - we stop the device, and then stop the queues
@@ -1496,7 +1494,7 @@ void iwl_trans_pcie_txq_disable(struct iwl_trans *trans, int txq_id,
 	 * allow the op_mode to call txq_disable after it already called
 	 * stop_device.
 	 */
-	if (!test_and_clear_bit(txq_id, trans_pcie->queue_used)) {
+	if (!test_and_clear_bit(txq_id, trans->txqs.queue_used)) {
 		WARN_ONCE(test_bit(STATUS_DEVICE_ENABLED, &trans->status),
 			  "queue %d not used", txq_id);
 		return;
@@ -1510,7 +1508,7 @@ void iwl_trans_pcie_txq_disable(struct iwl_trans *trans, int txq_id,
 	}
 
 	iwl_pcie_txq_unmap(trans, txq_id);
-	trans_pcie->txq[txq_id]->ampdu = false;
+	trans->txqs.txq[txq_id]->ampdu = false;
 
 	IWL_DEBUG_TX_QUEUES(trans, "Deactivate queue %d\n", txq_id);
 }
@@ -1530,7 +1528,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans,
 				 struct iwl_host_cmd *cmd)
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-	struct iwl_txq *txq = trans_pcie->txq[trans_pcie->cmd_queue];
+	struct iwl_txq *txq = trans->txqs.txq[trans->txqs.cmd.q_id];
 	struct iwl_device_cmd *out_cmd;
 	struct iwl_cmd_meta *out_meta;
 	unsigned long flags;
@@ -1656,7 +1654,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans,
 				    sizeof(struct iwl_cmd_header_wide));
 		out_cmd->hdr_wide.reserved = 0;
 		out_cmd->hdr_wide.sequence =
-			cpu_to_le16(QUEUE_TO_SEQ(trans_pcie->cmd_queue) |
+			cpu_to_le16(QUEUE_TO_SEQ(trans->txqs.cmd.q_id) |
 						 INDEX_TO_SEQ(txq->write_ptr));
 
 		cmd_pos = sizeof(struct iwl_cmd_header_wide);
@@ -1664,7 +1662,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans,
 	} else {
 		out_cmd->hdr.cmd = iwl_cmd_opcode(cmd->id);
 		out_cmd->hdr.sequence =
-			cpu_to_le16(QUEUE_TO_SEQ(trans_pcie->cmd_queue) |
+			cpu_to_le16(QUEUE_TO_SEQ(trans->txqs.cmd.q_id) |
 						 INDEX_TO_SEQ(txq->write_ptr));
 		out_cmd->hdr.group_id = 0;
 
@@ -1715,7 +1713,7 @@ static int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans,
 		     iwl_get_cmd_string(trans, cmd->id),
 		     group_id, out_cmd->hdr.cmd,
 		     le16_to_cpu(out_cmd->hdr.sequence),
-		     cmd_size, txq->write_ptr, idx, trans_pcie->cmd_queue);
+		     cmd_size, txq->write_ptr, idx, trans->txqs.cmd.q_id);
 
 	/* start the TFD with the minimum copy bytes */
 	tb0_size = min_t(int, copy_size, IWL_FIRST_TB_SIZE);
@@ -1815,14 +1813,14 @@ void iwl_pcie_hcmd_complete(struct iwl_trans *trans,
 	struct iwl_device_cmd *cmd;
 	struct iwl_cmd_meta *meta;
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-	struct iwl_txq *txq = trans_pcie->txq[trans_pcie->cmd_queue];
+	struct iwl_txq *txq = trans->txqs.txq[trans->txqs.cmd.q_id];
 
 	/* If a Tx command is being handled and it isn't in the actual
 	 * command queue then there a command routing bug has been introduced
 	 * in the queue management code. */
-	if (WARN(txq_id != trans_pcie->cmd_queue,
+	if (WARN(txq_id != trans->txqs.cmd.q_id,
 		 "wrong command queue %d (should be %d), sequence 0x%X readp=%d writep=%d\n",
-		 txq_id, trans_pcie->cmd_queue, sequence, txq->read_ptr,
+		 txq_id, trans->txqs.cmd.q_id, sequence, txq->read_ptr,
 		 txq->write_ptr)) {
 		iwl_print_hex_error(trans, pkt, 32);
 		return;
@@ -1894,7 +1892,7 @@ static int iwl_pcie_send_hcmd_sync(struct iwl_trans *trans,
 				   struct iwl_host_cmd *cmd)
 {
 	struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
-	struct iwl_txq *txq = trans_pcie->txq[trans_pcie->cmd_queue];
+	struct iwl_txq *txq = trans->txqs.txq[trans->txqs.cmd.q_id];
 	int cmd_idx;
 	int ret;
 
@@ -2332,9 +2330,9 @@ int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb,
 	u16 wifi_seq;
 	bool amsdu;
 
-	txq = trans_pcie->txq[txq_id];
+	txq = trans->txqs.txq[txq_id];
 
-	if (WARN_ONCE(!test_bit(txq_id, trans_pcie->queue_used),
+	if (WARN_ONCE(!test_bit(txq_id, trans->txqs.queue_used),
 		      "TX on unused queue %d\n", txq_id))
 		return -EINVAL;
 
-- 
cgit v1.2.3-59-g8ed1b


From f327236df2afc8c3c711e7e070f122c26974f4da Mon Sep 17 00:00:00 2001
From: Sharon <sara.sharon@intel.com>
Date: Fri, 29 May 2020 09:39:29 +0300
Subject: iwlwifi: mvm: fix aux station leak

When mvm is initialized we alloc aux station with aux queue.
We later free the station memory when driver is stopped, but we
never free the queue's memory, which casues a leak.

Add a proper de-initialization of the station.

Signed-off-by: Sharon <sara.sharon@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200529092401.0121c5be55e9.Id7516fbb3482131d0c9dfb51ff20b226617ddb49@changeid
---
 drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c |  5 ++---
 drivers/net/wireless/intel/iwlwifi/mvm/sta.c      | 18 +++++++++++++-----
 drivers/net/wireless/intel/iwlwifi/mvm/sta.h      |  6 +++---
 3 files changed, 18 insertions(+), 11 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
index 10df77ab1a77..77916231ff7d 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/mac80211.c
@@ -1208,14 +1208,13 @@ void __iwl_mvm_mac_stop(struct iwl_mvm *mvm)
 	 */
 	flush_work(&mvm->roc_done_wk);
 
+	iwl_mvm_rm_aux_sta(mvm);
+
 	iwl_mvm_stop_device(mvm);
 
 	iwl_mvm_async_handlers_purge(mvm);
 	/* async_handlers_list is empty and will stay empty: HW is stopped */
 
-	/* the fw is stopped, the aux sta is dead: clean up driver state */
-	iwl_mvm_del_aux_sta(mvm);
-
 	/*
 	 * Clear IN_HW_RESTART and HW_RESTART_REQUESTED flag when stopping the
 	 * hw (as restart_complete() won't be called in this case) and mac80211
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
index 44d4720b7629..fee01cbbd3ac 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.c
@@ -2093,16 +2093,24 @@ int iwl_mvm_rm_snif_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif)
 	return ret;
 }
 
-void iwl_mvm_dealloc_snif_sta(struct iwl_mvm *mvm)
+int iwl_mvm_rm_aux_sta(struct iwl_mvm *mvm)
 {
-	iwl_mvm_dealloc_int_sta(mvm, &mvm->snif_sta);
-}
+	int ret;
 
-void iwl_mvm_del_aux_sta(struct iwl_mvm *mvm)
-{
 	lockdep_assert_held(&mvm->mutex);
 
+	iwl_mvm_disable_txq(mvm, NULL, mvm->aux_queue, IWL_MAX_TID_COUNT, 0);
+	ret = iwl_mvm_rm_sta_common(mvm, mvm->aux_sta.sta_id);
+	if (ret)
+		IWL_WARN(mvm, "Failed sending remove station\n");
 	iwl_mvm_dealloc_int_sta(mvm, &mvm->aux_sta);
+
+	return ret;
+}
+
+void iwl_mvm_dealloc_snif_sta(struct iwl_mvm *mvm)
+{
+	iwl_mvm_dealloc_int_sta(mvm, &mvm->snif_sta);
 }
 
 /*
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h
index 8d70093847cb..da2d1ac01229 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/sta.h
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/sta.h
@@ -8,7 +8,7 @@
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
  * Copyright(c) 2015 - 2016 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2018 - 2020 Intel Corporation
  *
  * This program is free software; you can redistribute it and/or modify
  * it under the terms of version 2 of the GNU General Public License as
@@ -31,7 +31,7 @@
  * Copyright(c) 2012 - 2014 Intel Corporation. All rights reserved.
  * Copyright(c) 2013 - 2014 Intel Mobile Communications GmbH
  * Copyright(c) 2015 - 2016 Intel Deutschland GmbH
- * Copyright(c) 2018 - 2019 Intel Corporation
+ * Copyright(c) 2018 - 2020 Intel Corporation
  * All rights reserved.
  *
  * Redistribution and use in source and binary forms, with or without
@@ -541,7 +541,7 @@ int iwl_mvm_sta_tx_agg(struct iwl_mvm *mvm, struct ieee80211_sta *sta,
 		       int tid, u8 queue, bool start);
 
 int iwl_mvm_add_aux_sta(struct iwl_mvm *mvm);
-void iwl_mvm_del_aux_sta(struct iwl_mvm *mvm);
+int iwl_mvm_rm_aux_sta(struct iwl_mvm *mvm);
 
 int iwl_mvm_alloc_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif);
 int iwl_mvm_send_add_bcast_sta(struct iwl_mvm *mvm, struct ieee80211_vif *vif);
-- 
cgit v1.2.3-59-g8ed1b


From 018971b11ab407c8d48c075ad38d2917587e97ab Mon Sep 17 00:00:00 2001
From: Avraham Stern <avraham.stern@intel.com>
Date: Fri, 29 May 2020 09:39:30 +0300
Subject: iwlwifi: mvm: add support for range request version 10

Range request version 10 keeps the same command size as version 9
but uses 2 reserved fields for the responder beacon interval and
station id (if exists).
For now, since the beacon interval of unassoc APs is unknown, use
a value of 100 TUs which is a common value for many APs.

While at it, remove the definition for CCMP_256 cipher, since this
is not supported.

Signed-off-by: Avraham Stern <avraham.stern@intel.com>
Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200529092401.b7ccdad0805f.I59ea7f773caed85a66c61401066ae169008442e6@changeid
---
 .../net/wireless/intel/iwlwifi/fw/api/location.h   | 14 ++++----
 .../net/wireless/intel/iwlwifi/mvm/ftm-initiator.c | 42 +++++++++++++++++++---
 2 files changed, 45 insertions(+), 11 deletions(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/fw/api/location.h b/drivers/net/wireless/intel/iwlwifi/fw/api/location.h
index 7ffad19d80fd..1df2e497fabf 100644
--- a/drivers/net/wireless/intel/iwlwifi/fw/api/location.h
+++ b/drivers/net/wireless/intel/iwlwifi/fw/api/location.h
@@ -550,13 +550,11 @@ struct iwl_tof_range_req_ap_entry_v4 {
 /**
  * enum iwl_location_cipher - location cipher selection
  * @IWL_LOCATION_CIPHER_CCMP_128: CCMP 128
- * @IWL_LOCATION_CIPHER_CCMP_256: CCMP 256
  * @IWL_LOCATION_CIPHER_GCMP_128: GCMP 128
  * @IWL_LOCATION_CIPHER_GCMP_256: GCMP 256
  */
 enum iwl_location_cipher {
 	IWL_LOCATION_CIPHER_CCMP_128,
-	IWL_LOCATION_CIPHER_CCMP_256,
 	IWL_LOCATION_CIPHER_GCMP_128,
 	IWL_LOCATION_CIPHER_GCMP_256,
 };
@@ -577,7 +575,8 @@ enum iwl_location_cipher {
  * @samples_per_burst: the number of FTMs pairs in single Burst (1-31);
  * @num_of_bursts: Recommended value to be sent to the AP. 2s Exponent of
  *	the number of measurement iterations (min 2^0 = 1, max 2^14)
- * @reserved: For alignment and future use
+ * @sta_id: the station id of the AP. Only relevant when associated to the AP,
+ *	otherwise should be set to &IWL_MVM_INVALID_STA.
  * @cipher: pairwise cipher suite for secured measurement.
  *          &enum iwl_location_cipher.
  * @hltk: HLTK to be used for secured 11az measurement
@@ -586,7 +585,8 @@ enum iwl_location_cipher {
  *         If &IWL_INITIATOR_AP_FLAGS_USE_CALIB is set, the fw will use the
  *         calibration value that corresponds to the rx bandwidth of the FTM
  *         frame.
- * @reserved2: For alignment and future use.
+ * @beacon_interval: beacon interval of the AP in TUs. Only required if
+ *	&IWL_INITIATOR_AP_FLAGS_TB is set.
  */
 struct iwl_tof_range_req_ap_entry {
 	__le32 initiator_ap_flags;
@@ -598,13 +598,13 @@ struct iwl_tof_range_req_ap_entry {
 	__le16 burst_period;
 	u8 samples_per_burst;
 	u8 num_of_bursts;
-	u8 reserved;
+	u8 sta_id;
 	u8 cipher;
 	u8 hltk[HLTK_11AZ_LEN];
 	u8 tk[TK_11AZ_LEN];
 	__le16 calib[IWL_TOF_BW_NUM];
-	__le16 reserved2;
-} __packed; /* LOCATION_RANGE_REQ_AP_ENTRY_CMD_API_S_VER_5 */
+	__le16 beacon_interval;
+} __packed; /* LOCATION_RANGE_REQ_AP_ENTRY_CMD_API_S_VER_6 */
 
 /**
  * enum iwl_tof_response_mode
diff --git a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
index aaa7dd1788b1..5ca45915cf7c 100644
--- a/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
+++ b/drivers/net/wireless/intel/iwlwifi/mvm/ftm-initiator.c
@@ -391,9 +391,27 @@ iwl_mvm_ftm_put_target_v3(struct iwl_mvm *mvm,
 }
 
 static int
-iwl_mvm_ftm_put_target(struct iwl_mvm *mvm,
+iwl_mvm_ftm_put_target_v4(struct iwl_mvm *mvm,
+			  struct cfg80211_pmsr_request_peer *peer,
+			  struct iwl_tof_range_req_ap_entry_v4 *target)
+{
+	int ret;
+
+	ret = iwl_mvm_ftm_target_chandef_v2(mvm, peer, &target->channel_num,
+					    &target->format_bw,
+					    &target->ctrl_ch_position);
+	if (ret)
+		return ret;
+
+	iwl_mvm_ftm_put_target_common(mvm, peer, (void *)target);
+
+	return 0;
+}
+
+static int
+iwl_mvm_ftm_put_target(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 		       struct cfg80211_pmsr_request_peer *peer,
-		       struct iwl_tof_range_req_ap_entry_v4 *target)
+		       struct iwl_tof_range_req_ap_entry *target)
 {
 	int ret;
 
@@ -405,6 +423,20 @@ iwl_mvm_ftm_put_target(struct iwl_mvm *mvm,
 
 	iwl_mvm_ftm_put_target_common(mvm, peer, (void *)target);
 
+	if (vif->bss_conf.assoc &&
+	    !memcmp(peer->addr, vif->bss_conf.bssid, ETH_ALEN)) {
+		struct iwl_mvm_vif *mvmvif = iwl_mvm_vif_from_mac80211(vif);
+
+		target->sta_id = mvmvif->ap_sta_id;
+	} else {
+		target->sta_id = IWL_MVM_INVALID_STA;
+	}
+
+	/*
+	 * TODO: Beacon interval is currently unknown, so use the common value
+	 * of 100 TUs.
+	 */
+	target->beacon_interval = cpu_to_le16(100);
 	return 0;
 }
 
@@ -496,7 +528,7 @@ static int iwl_mvm_ftm_start_v8(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 	for (i = 0; i < cmd.num_of_ap; i++) {
 		struct cfg80211_pmsr_request_peer *peer = &req->peers[i];
 
-		err = iwl_mvm_ftm_put_target(mvm, peer, &cmd.ap[i]);
+		err = iwl_mvm_ftm_put_target_v4(mvm, peer, &cmd.ap[i]);
 		if (err)
 			return err;
 	}
@@ -521,8 +553,9 @@ static int iwl_mvm_ftm_start_v9(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 
 	for (i = 0; i < cmd.num_of_ap; i++) {
 		struct cfg80211_pmsr_request_peer *peer = &req->peers[i];
+		struct iwl_tof_range_req_ap_entry *target = &cmd.ap[i];
 
-		err = iwl_mvm_ftm_put_target(mvm, peer, (void *)&cmd.ap[i]);
+		err = iwl_mvm_ftm_put_target(mvm, vif, peer, target);
 		if (err)
 			return err;
 	}
@@ -548,6 +581,7 @@ int iwl_mvm_ftm_start(struct iwl_mvm *mvm, struct ieee80211_vif *vif,
 
 		switch (cmd_ver) {
 		case 9:
+		case 10:
 			err = iwl_mvm_ftm_start_v9(mvm, vif, req);
 			break;
 		case 8:
-- 
cgit v1.2.3-59-g8ed1b


From e6d4318c049574dcfa040725903add2790cfbd7b Mon Sep 17 00:00:00 2001
From: Luca Coelho <luciano.coelho@intel.com>
Date: Fri, 29 May 2020 09:39:31 +0300
Subject: iwlwifi: bump FW API to 56 for AX devices

Start supporting API version 56 for AX devices.

Signed-off-by: Luca Coelho <luciano.coelho@intel.com>
Link: https://lore.kernel.org/r/iwlwifi.20200529092401.aabbc5b472ee.I88cb2c3d2d07e62eac3671335ff1fb80b73c5839@changeid
---
 drivers/net/wireless/intel/iwlwifi/cfg/22000.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
index 1daa653bcb99..efe427049a6e 100644
--- a/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
+++ b/drivers/net/wireless/intel/iwlwifi/cfg/22000.c
@@ -57,7 +57,7 @@
 #include "iwl-prph.h"
 
 /* Highest firmware API version supported */
-#define IWL_22000_UCODE_API_MAX	55
+#define IWL_22000_UCODE_API_MAX	56
 
 /* Lowest firmware API version supported */
 #define IWL_22000_UCODE_API_MIN	39
-- 
cgit v1.2.3-59-g8ed1b


From feac90d756c03b03b83fabe83571bd88ecc96b78 Mon Sep 17 00:00:00 2001
From: Zijun Hu <zijuhu@codeaurora.org>
Date: Fri, 29 May 2020 04:31:07 +0800
Subject: Bluetooth: hci_qca: Fix suspend/resume functionality failure

@dev parameter of qca_suspend()/qca_resume() represents
serdev_device, but it is mistook for hci_dev and causes
succedent unexpected memory access.

Fix by taking @dev as serdev_device.

Fixes: 41d5b25fed0 ("Bluetooth: hci_qca: add PM support")
Signed-off-by: Zijun Hu <zijuhu@codeaurora.org>
Reviewed-by: Matthias Kaehlcke <mka@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/hci_qca.c | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
index e4a68238fcb9..adcbe00a2275 100644
--- a/drivers/bluetooth/hci_qca.c
+++ b/drivers/bluetooth/hci_qca.c
@@ -1977,8 +1977,9 @@ static void qca_serdev_remove(struct serdev_device *serdev)
 
 static int __maybe_unused qca_suspend(struct device *dev)
 {
-	struct hci_dev *hdev = container_of(dev, struct hci_dev, dev);
-	struct hci_uart *hu = hci_get_drvdata(hdev);
+	struct serdev_device *serdev = to_serdev_device(dev);
+	struct qca_serdev *qcadev = serdev_device_get_drvdata(serdev);
+	struct hci_uart *hu = &qcadev->serdev_hu;
 	struct qca_data *qca = hu->priv;
 	unsigned long flags;
 	int ret = 0;
@@ -2057,8 +2058,9 @@ error:
 
 static int __maybe_unused qca_resume(struct device *dev)
 {
-	struct hci_dev *hdev = container_of(dev, struct hci_dev, dev);
-	struct hci_uart *hu = hci_get_drvdata(hdev);
+	struct serdev_device *serdev = to_serdev_device(dev);
+	struct qca_serdev *qcadev = serdev_device_get_drvdata(serdev);
+	struct hci_uart *hu = &qcadev->serdev_hu;
 	struct qca_data *qca = hu->priv;
 
 	clear_bit(QCA_SUSPENDING, &qca->flags);
-- 
cgit v1.2.3-59-g8ed1b


From 4803c54ca24923a30664bea2a7772db6e7303c51 Mon Sep 17 00:00:00 2001
From: Chuhong Yuan <hslester96@gmail.com>
Date: Fri, 29 May 2020 10:27:26 +0800
Subject: Bluetooth: btmtkuart: Improve exception handling in btmtuart_probe()

Calls of the functions clk_disable_unprepare() and hci_free_dev()
were missing for the exception handling.
Thus add the missed function calls together with corresponding
jump targets.

Fixes: 055825614c6b ("Bluetooth: btmtkuart: add an implementation for clock osc property")
Signed-off-by: Chuhong Yuan <hslester96@gmail.com>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btmtkuart.c | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/drivers/bluetooth/btmtkuart.c b/drivers/bluetooth/btmtkuart.c
index e11169ad8247..8a81fbca5c9d 100644
--- a/drivers/bluetooth/btmtkuart.c
+++ b/drivers/bluetooth/btmtkuart.c
@@ -1015,7 +1015,7 @@ static int btmtkuart_probe(struct serdev_device *serdev)
 	if (btmtkuart_is_standalone(bdev)) {
 		err = clk_prepare_enable(bdev->osc);
 		if (err < 0)
-			return err;
+			goto err_hci_free_dev;
 
 		if (bdev->boot) {
 			gpiod_set_value_cansleep(bdev->boot, 1);
@@ -1028,10 +1028,8 @@ static int btmtkuart_probe(struct serdev_device *serdev)
 
 		/* Power on */
 		err = regulator_enable(bdev->vcc);
-		if (err < 0) {
-			clk_disable_unprepare(bdev->osc);
-			return err;
-		}
+		if (err < 0)
+			goto err_clk_disable_unprepare;
 
 		/* Reset if the reset-gpios is available otherwise the board
 		 * -level design should be guaranteed.
@@ -1063,7 +1061,6 @@ static int btmtkuart_probe(struct serdev_device *serdev)
 	err = hci_register_dev(hdev);
 	if (err < 0) {
 		dev_err(&serdev->dev, "Can't register HCI device\n");
-		hci_free_dev(hdev);
 		goto err_regulator_disable;
 	}
 
@@ -1072,6 +1069,11 @@ static int btmtkuart_probe(struct serdev_device *serdev)
 err_regulator_disable:
 	if (btmtkuart_is_standalone(bdev))
 		regulator_disable(bdev->vcc);
+err_clk_disable_unprepare:
+	if (btmtkuart_is_standalone(bdev))
+		clk_disable_unprepare(bdev->osc);
+err_hci_free_dev:
+	hci_free_dev(hdev);
 
 	return err;
 }
-- 
cgit v1.2.3-59-g8ed1b


From e6da0edc24eecef2f6964d92fa9044e1821deace Mon Sep 17 00:00:00 2001
From: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Date: Thu, 28 May 2020 14:35:12 +0200
Subject: Bluetooth: Acquire sk_lock.slock without disabling interrupts

There was a lockdep which led to commit
   fad003b6c8e3d ("Bluetooth: Fix inconsistent lock state with RFCOMM")

Lockdep noticed that `sk->sk_lock.slock' was acquired without disabling
the softirq while the lock was also used in softirq context.
Unfortunately the solution back then was to disable interrupts before
acquiring the lock which however made lockdep happy.
It would have been enough to simply disable the softirq. Disabling
interrupts before acquiring a spinlock_t is not allowed on PREEMPT_RT
because these locks are converted to 'sleeping' spinlocks.

Use spin_lock_bh() in order to acquire the `sk_lock.slock'.

Reported-by: Luis Claudio R. Goncalves <lclaudio@uudg.org>
Reported-by: kbuild test robot <lkp@intel.com> [missing unlock]
Signed-off-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 net/bluetooth/rfcomm/sock.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/net/bluetooth/rfcomm/sock.c b/net/bluetooth/rfcomm/sock.c
index b4eaf21360ef..df14eebe80da 100644
--- a/net/bluetooth/rfcomm/sock.c
+++ b/net/bluetooth/rfcomm/sock.c
@@ -64,15 +64,13 @@ static void rfcomm_sk_data_ready(struct rfcomm_dlc *d, struct sk_buff *skb)
 static void rfcomm_sk_state_change(struct rfcomm_dlc *d, int err)
 {
 	struct sock *sk = d->owner, *parent;
-	unsigned long flags;
 
 	if (!sk)
 		return;
 
 	BT_DBG("dlc %p state %ld err %d", d, d->state, err);
 
-	local_irq_save(flags);
-	bh_lock_sock(sk);
+	spin_lock_bh(&sk->sk_lock.slock);
 
 	if (err)
 		sk->sk_err = err;
@@ -93,8 +91,7 @@ static void rfcomm_sk_state_change(struct rfcomm_dlc *d, int err)
 		sk->sk_state_change(sk);
 	}
 
-	bh_unlock_sock(sk);
-	local_irq_restore(flags);
+	spin_unlock_bh(&sk->sk_lock.slock);
 
 	if (parent && sock_flag(sk, SOCK_ZAPPED)) {
 		/* We have to drop DLC lock here, otherwise
-- 
cgit v1.2.3-59-g8ed1b


From 7e7bbddd029b644f00f0ffbfbc485ed71977d0d5 Mon Sep 17 00:00:00 2001
From: Zijun Hu <zijuhu@codeaurora.org>
Date: Fri, 29 May 2020 21:56:57 +0800
Subject: Bluetooth: hci_qca: Fix qca6390 enable failure after warm reboot

Warm reboot can not reset controller qca6390 due to
lack of controllable power supply, so causes firmware
download failure during enable.

Fixed by sending VSC EDL_SOC_RESET to reset qca6390
within added device shutdown implementation.

Signed-off-by: Zijun Hu <zijuhu@codeaurora.org>
Tested-by: Zijun Hu <zijuhu@codeaurora.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/hci_qca.c | 33 +++++++++++++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
index adcbe00a2275..aa957d749d6f 100644
--- a/drivers/bluetooth/hci_qca.c
+++ b/drivers/bluetooth/hci_qca.c
@@ -1975,6 +1975,38 @@ static void qca_serdev_remove(struct serdev_device *serdev)
 	hci_uart_unregister_device(&qcadev->serdev_hu);
 }
 
+static void qca_serdev_shutdown(struct device *dev)
+{
+	int ret;
+	int timeout = msecs_to_jiffies(CMD_TRANS_TIMEOUT_MS);
+	struct serdev_device *serdev = to_serdev_device(dev);
+	struct qca_serdev *qcadev = serdev_device_get_drvdata(serdev);
+	const u8 ibs_wake_cmd[] = { 0xFD };
+	const u8 edl_reset_soc_cmd[] = { 0x01, 0x00, 0xFC, 0x01, 0x05 };
+
+	if (qcadev->btsoc_type == QCA_QCA6390) {
+		serdev_device_write_flush(serdev);
+		ret = serdev_device_write_buf(serdev, ibs_wake_cmd,
+					      sizeof(ibs_wake_cmd));
+		if (ret < 0) {
+			BT_ERR("QCA send IBS_WAKE_IND error: %d", ret);
+			return;
+		}
+		serdev_device_wait_until_sent(serdev, timeout);
+		usleep_range(8000, 10000);
+
+		serdev_device_write_flush(serdev);
+		ret = serdev_device_write_buf(serdev, edl_reset_soc_cmd,
+					      sizeof(edl_reset_soc_cmd));
+		if (ret < 0) {
+			BT_ERR("QCA send EDL_RESET_REQ error: %d", ret);
+			return;
+		}
+		serdev_device_wait_until_sent(serdev, timeout);
+		usleep_range(8000, 10000);
+	}
+}
+
 static int __maybe_unused qca_suspend(struct device *dev)
 {
 	struct serdev_device *serdev = to_serdev_device(dev);
@@ -2102,6 +2134,7 @@ static struct serdev_device_driver qca_serdev_driver = {
 		.name = "hci_uart_qca",
 		.of_match_table = of_match_ptr(qca_bluetooth_of_match),
 		.acpi_match_table = ACPI_PTR(qca_bluetooth_acpi_match),
+		.shutdown = qca_serdev_shutdown,
 		.pm = &qca_pm_ops,
 	},
 };
-- 
cgit v1.2.3-59-g8ed1b


From b6b15e20421fefae9f78274f9fef80bc97bf5d5c Mon Sep 17 00:00:00 2001
From: Rui Salvaterra <rsalvaterra@gmail.com>
Date: Mon, 25 May 2020 14:49:07 +0100
Subject: rt2800: enable MFP support unconditionally

This gives us WPA3 support out of the box without having to manually disable
hardware crypto. The driver will fall back to software crypto if the connection
requires management frame protection.

Suggested-by: Stanislaw Gruszka <stf_xl@wp.pl>
Signed-off-by: Rui Salvaterra <rsalvaterra@gmail.com>
Acked-by: Stanislaw Gruszka <stf_xl@wp.pl>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200525134906.1672-1-rsalvaterra@gmail.com
---
 drivers/net/wireless/ralink/rt2x00/rt2800lib.c | 4 +---
 drivers/net/wireless/ralink/rt2x00/rt2x00mac.c | 3 ++-
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c
index 6beac1f74e7c..a779fe771a55 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2800lib.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt2800lib.c
@@ -9971,9 +9971,7 @@ static int rt2800_probe_hw_mode(struct rt2x00_dev *rt2x00dev)
 	if (!rt2x00_is_usb(rt2x00dev))
 		ieee80211_hw_set(rt2x00dev->hw, HOST_BROADCAST_PS_BUFFERING);
 
-	/* Set MFP if HW crypto is disabled. */
-	if (rt2800_hwcrypt_disabled(rt2x00dev))
-		ieee80211_hw_set(rt2x00dev->hw, MFP_CAPABLE);
+	ieee80211_hw_set(rt2x00dev->hw, MFP_CAPABLE);
 
 	SET_IEEE80211_DEV(rt2x00dev->hw, rt2x00dev->dev);
 	SET_IEEE80211_PERM_ADDR(rt2x00dev->hw,
diff --git a/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c b/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c
index 32efbc8e9f92..2f68a31072ae 100644
--- a/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c
+++ b/drivers/net/wireless/ralink/rt2x00/rt2x00mac.c
@@ -468,7 +468,8 @@ int rt2x00mac_set_key(struct ieee80211_hw *hw, enum set_key_cmd cmd,
 	if (!test_bit(DEVICE_STATE_PRESENT, &rt2x00dev->flags))
 		return 0;
 
-	if (!rt2x00_has_cap_hw_crypto(rt2x00dev))
+	/* The hardware can't do MFP */
+	if (!rt2x00_has_cap_hw_crypto(rt2x00dev) || (sta && sta->mfp))
 		return -EOPNOTSUPP;
 
 	/*
-- 
cgit v1.2.3-59-g8ed1b


From 3a8855d8cfcb944032ce0eecba05b2d0e93f4fb1 Mon Sep 17 00:00:00 2001
From: Sergey Matyukevich <sergey.matyukevich.os@quantenna.com>
Date: Wed, 20 May 2020 16:08:00 +0300
Subject: MAINTAINERS: update qtnfmac maintainers

I am leaving Quantenna, so I will no longer have access to firmware and
hardware. Meanwhile I plan to participate in reviewing qtnfmac patches
for a while until my firmware knowledge becomes completely obsolete.
Adding myself as a reviewer using my personal email address.

Signed-off-by: Sergey Matyukevich <sergey.matyukevich.os@quantenna.com>
Signed-off-by: Igor Mitsyanko <igor.mitsyanko.os@quantenna.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200520130800.1902-1-sergey.matyukevich.os@quantenna.com
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 9f338ed0d9ab..5d81c002232a 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -14066,7 +14066,7 @@ F:	drivers/net/wireless/ath/wcn36xx/
 
 QUANTENNA QTNFMAC WIRELESS DRIVER
 M:	Igor Mitsyanko <imitsyanko@quantenna.com>
-M:	Sergey Matyukevich <smatyukevich@quantenna.com>
+R:	Sergey Matyukevich <geomatsi@gmail.com>
 L:	linux-wireless@vger.kernel.org
 S:	Maintained
 F:	drivers/net/wireless/quantenna
-- 
cgit v1.2.3-59-g8ed1b


From dba5a189bf6169dc3e7462df17fb2aee5ca37e90 Mon Sep 17 00:00:00 2001
From: Yan-Hsuan Chuang <yhchuang@realtek.com>
Date: Wed, 20 May 2020 13:53:50 +0800
Subject: Revert "rtw88: no need to set registers for SDIO"

This reverts commit 07d0f5534935e2daf63a4e1012af13d68e089fed.

For rtw88 driver, the SDIO is going to be supported, so there is
no need to remove the SDIO related power sequence settings. And
while the power sequence parser will pass in the mask of the HCI,
the SDIO part will not be used to set registers accordingly.

Moreover, the power sequence table is released as a whole package,
so the next time if we are going to update, the SDIO settings will
be overwritten. So, revert this now.

Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200520055350.23328-1-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/rtw8723d.c | 40 +++++++++++
 drivers/net/wireless/realtek/rtw88/rtw8822b.c | 95 +++++++++++++++++++++++++++
 drivers/net/wireless/realtek/rtw88/rtw8822c.c | 20 ++++++
 3 files changed, 155 insertions(+)

diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.c b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
index 8641ea645c4b..7422baf2d41b 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8723d.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
@@ -2093,6 +2093,16 @@ static const struct rtw_pwr_seq_cmd trans_carddis_to_cardemu_8723d[] = {
 	 RTW_PWR_INTF_ALL_MSK,
 	 RTW_PWR_ADDR_MAC,
 	 RTW_PWR_CMD_WRITE, BIT(3) | BIT(7), 0},
+	{0x0086,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_SDIO_MSK,
+	 RTW_PWR_ADDR_SDIO,
+	 RTW_PWR_CMD_WRITE, BIT(0), 0},
+	{0x0086,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_SDIO_MSK,
+	 RTW_PWR_ADDR_SDIO,
+	 RTW_PWR_CMD_POLLING, BIT(1), BIT(1)},
 	{0x004A,
 	 RTW_PWR_CUT_ALL_MSK,
 	 RTW_PWR_INTF_USB_MSK,
@@ -2103,6 +2113,11 @@ static const struct rtw_pwr_seq_cmd trans_carddis_to_cardemu_8723d[] = {
 	 RTW_PWR_INTF_ALL_MSK,
 	 RTW_PWR_ADDR_MAC,
 	 RTW_PWR_CMD_WRITE, BIT(3) | BIT(4), 0},
+	{0x0023,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_SDIO_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(4), 0},
 	{0x0301,
 	 RTW_PWR_CUT_ALL_MSK,
 	 RTW_PWR_INTF_PCI_MSK,
@@ -2310,6 +2325,11 @@ static const struct rtw_pwr_seq_cmd trans_act_to_lps_8723d[] = {
 	 RTW_PWR_INTF_ALL_MSK,
 	 RTW_PWR_ADDR_MAC,
 	 RTW_PWR_CMD_WRITE, BIT(1), 0},
+	{0x0093,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_SDIO_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, 0xFF, 0x00},
 	{0x0553,
 	 RTW_PWR_CUT_ALL_MSK,
 	 RTW_PWR_INTF_ALL_MSK,
@@ -2389,6 +2409,11 @@ static const struct rtw_pwr_seq_cmd trans_act_to_cardemu_8723d[] = {
 };
 
 static const struct rtw_pwr_seq_cmd trans_cardemu_to_carddis_8723d[] = {
+	{0x0007,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_SDIO_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, 0xFF, 0x20},
 	{0x0005,
 	 RTW_PWR_CUT_ALL_MSK,
 	 RTW_PWR_INTF_USB_MSK | RTW_PWR_INTF_SDIO_MSK,
@@ -2409,6 +2434,21 @@ static const struct rtw_pwr_seq_cmd trans_cardemu_to_carddis_8723d[] = {
 	 RTW_PWR_INTF_USB_MSK,
 	 RTW_PWR_ADDR_MAC,
 	 RTW_PWR_CMD_WRITE, BIT(0), 1},
+	{0x0023,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_SDIO_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(4), BIT(4)},
+	{0x0086,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_SDIO_MSK,
+	 RTW_PWR_ADDR_SDIO,
+	 RTW_PWR_CMD_WRITE, BIT(0), BIT(0)},
+	{0x0086,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_SDIO_MSK,
+	 RTW_PWR_ADDR_SDIO,
+	 RTW_PWR_CMD_POLLING, BIT(1), 0},
 	{0xFFFF,
 	 RTW_PWR_CUT_ALL_MSK,
 	 RTW_PWR_INTF_ALL_MSK,
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822b.c b/drivers/net/wireless/realtek/rtw88/rtw8822b.c
index 6abcdf4070a2..e49bdd76ab9a 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8822b.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822b.c
@@ -1551,6 +1551,16 @@ static void rtw8822b_bf_config_bfee(struct rtw_dev *rtwdev, struct rtw_vif *vif,
 }
 
 static const struct rtw_pwr_seq_cmd trans_carddis_to_cardemu_8822b[] = {
+	{0x0086,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_SDIO_MSK,
+	 RTW_PWR_ADDR_SDIO,
+	 RTW_PWR_CMD_WRITE, BIT(0), 0},
+	{0x0086,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_SDIO_MSK,
+	 RTW_PWR_ADDR_SDIO,
+	 RTW_PWR_CMD_POLLING, BIT(1), BIT(1)},
 	{0x004A,
 	 RTW_PWR_CUT_ALL_MSK,
 	 RTW_PWR_INTF_USB_MSK,
@@ -1679,6 +1689,11 @@ static const struct rtw_pwr_seq_cmd trans_cardemu_to_act_8822b[] = {
 	 RTW_PWR_INTF_ALL_MSK,
 	 RTW_PWR_ADDR_MAC,
 	 RTW_PWR_CMD_WRITE, 0xFF, 0x0c},
+	{0x0068,
+	 RTW_PWR_CUT_C_MSK,
+	 RTW_PWR_INTF_SDIO_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(4), BIT(4)},
 	{0x0029,
 	 RTW_PWR_CUT_ALL_MSK,
 	 RTW_PWR_INTF_ALL_MSK,
@@ -1707,6 +1722,11 @@ static const struct rtw_pwr_seq_cmd trans_cardemu_to_act_8822b[] = {
 };
 
 static const struct rtw_pwr_seq_cmd trans_act_to_cardemu_8822b[] = {
+	{0x0003,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_SDIO_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(2), 0},
 	{0x0093,
 	 RTW_PWR_CUT_ALL_MSK,
 	 RTW_PWR_INTF_ALL_MSK,
@@ -1775,6 +1795,11 @@ static const struct rtw_pwr_seq_cmd trans_act_to_cardemu_8822b[] = {
 };
 
 static const struct rtw_pwr_seq_cmd trans_cardemu_to_carddis_8822b[] = {
+	{0x0005,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_SDIO_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(7), BIT(7)},
 	{0x0007,
 	 RTW_PWR_CUT_ALL_MSK,
 	 RTW_PWR_INTF_USB_MSK | RTW_PWR_INTF_SDIO_MSK,
@@ -1795,6 +1820,46 @@ static const struct rtw_pwr_seq_cmd trans_cardemu_to_carddis_8822b[] = {
 	 RTW_PWR_INTF_USB_MSK,
 	 RTW_PWR_ADDR_MAC,
 	 RTW_PWR_CMD_WRITE, BIT(0), 0},
+	{0x0067,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_SDIO_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(5), 0},
+	{0x0067,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_SDIO_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(4), 0},
+	{0x004F,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_SDIO_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(0), 0},
+	{0x0067,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_SDIO_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(1), 0},
+	{0x0046,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_SDIO_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(6), BIT(6)},
+	{0x0067,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_SDIO_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(2), 0},
+	{0x0046,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_SDIO_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(7), BIT(7)},
+	{0x0062,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_SDIO_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(4), BIT(4)},
 	{0x0081,
 	 RTW_PWR_CUT_ALL_MSK,
 	 RTW_PWR_INTF_ALL_MSK,
@@ -1805,11 +1870,41 @@ static const struct rtw_pwr_seq_cmd trans_cardemu_to_carddis_8822b[] = {
 	 RTW_PWR_INTF_USB_MSK | RTW_PWR_INTF_SDIO_MSK,
 	 RTW_PWR_ADDR_MAC,
 	 RTW_PWR_CMD_WRITE, BIT(3) | BIT(4), BIT(3)},
+	{0x0086,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_SDIO_MSK,
+	 RTW_PWR_ADDR_SDIO,
+	 RTW_PWR_CMD_WRITE, BIT(0), BIT(0)},
+	{0x0086,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_SDIO_MSK,
+	 RTW_PWR_ADDR_SDIO,
+	 RTW_PWR_CMD_POLLING, BIT(1), 0},
 	{0x0090,
 	 RTW_PWR_CUT_ALL_MSK,
 	 RTW_PWR_INTF_USB_MSK | RTW_PWR_INTF_PCI_MSK,
 	 RTW_PWR_ADDR_MAC,
 	 RTW_PWR_CMD_WRITE, BIT(1), 0},
+	{0x0044,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_SDIO_MSK,
+	 RTW_PWR_ADDR_SDIO,
+	 RTW_PWR_CMD_WRITE, 0xFF, 0},
+	{0x0040,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_SDIO_MSK,
+	 RTW_PWR_ADDR_SDIO,
+	 RTW_PWR_CMD_WRITE, 0xFF, 0x90},
+	{0x0041,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_SDIO_MSK,
+	 RTW_PWR_ADDR_SDIO,
+	 RTW_PWR_CMD_WRITE, 0xFF, 0x00},
+	{0x0042,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_SDIO_MSK,
+	 RTW_PWR_ADDR_SDIO,
+	 RTW_PWR_CMD_WRITE, 0xFF, 0x04},
 	{0xFFFF,
 	 RTW_PWR_CUT_ALL_MSK,
 	 RTW_PWR_INTF_ALL_MSK,
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822c.c b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
index fe995bb4e43e..5e4cc57dbd7c 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8822c.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
@@ -3563,6 +3563,16 @@ static void rtw8822c_pwr_track(struct rtw_dev *rtwdev)
 }
 
 static const struct rtw_pwr_seq_cmd trans_carddis_to_cardemu_8822c[] = {
+	{0x0086,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_SDIO_MSK,
+	 RTW_PWR_ADDR_SDIO,
+	 RTW_PWR_CMD_WRITE, BIT(0), 0},
+	{0x0086,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_SDIO_MSK,
+	 RTW_PWR_ADDR_SDIO,
+	 RTW_PWR_CMD_POLLING, BIT(1), BIT(1)},
 	{0x002E,
 	 RTW_PWR_CUT_ALL_MSK,
 	 RTW_PWR_INTF_ALL_MSK,
@@ -3773,6 +3783,11 @@ static const struct rtw_pwr_seq_cmd trans_act_to_cardemu_8822c[] = {
 };
 
 static const struct rtw_pwr_seq_cmd trans_cardemu_to_carddis_8822c[] = {
+	{0x0005,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_SDIO_MSK,
+	 RTW_PWR_ADDR_MAC,
+	 RTW_PWR_CMD_WRITE, BIT(7), BIT(7)},
 	{0x0007,
 	 RTW_PWR_CUT_ALL_MSK,
 	 RTW_PWR_INTF_USB_MSK | RTW_PWR_INTF_SDIO_MSK,
@@ -3818,6 +3833,11 @@ static const struct rtw_pwr_seq_cmd trans_cardemu_to_carddis_8822c[] = {
 	 RTW_PWR_INTF_PCI_MSK,
 	 RTW_PWR_ADDR_MAC,
 	 RTW_PWR_CMD_WRITE, BIT(2), BIT(2)},
+	{0x0086,
+	 RTW_PWR_CUT_ALL_MSK,
+	 RTW_PWR_INTF_SDIO_MSK,
+	 RTW_PWR_ADDR_SDIO,
+	 RTW_PWR_CMD_WRITE, BIT(0), BIT(0)},
 	{0xFFFF,
 	 RTW_PWR_CUT_ALL_MSK,
 	 RTW_PWR_INTF_ALL_MSK,
-- 
cgit v1.2.3-59-g8ed1b


From 75d057bda1fbca6ade21378aa45db712e5f7d962 Mon Sep 17 00:00:00 2001
From: Larry Finger <Larry.Finger@lwfinger.net>
Date: Tue, 26 May 2020 10:59:08 -0500
Subject: b43: Fix connection problem with WPA3

Since the driver was first introduced into the kernel, it has only
handled the ciphers associated with WEP, WPA, and WPA2. It fails with
WPA3 even though mac80211 can handle those additional ciphers in software,
b43 did not report that it could handle them. By setting MFP_CAPABLE using
ieee80211_set_hw(), the problem is fixed.

With this change, b43 will handle the ciphers it knows in hardware,
and let mac80211 handle the others in software. It is not necessary to
use the module parameter NOHWCRYPT to turn hardware encryption off.
Although this change essentially eliminates that module parameter,
I am choosing to keep it for cases where the hardware is broken,
and software encryption is required for all ciphers.

Reported-and-tested-by: Rui Salvaterra <rsalvaterra@gmail.com>
Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net>
Cc: Stable <stable@vger.kernel.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200526155909.5807-2-Larry.Finger@lwfinger.net
---
 drivers/net/wireless/broadcom/b43/main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/broadcom/b43/main.c b/drivers/net/wireless/broadcom/b43/main.c
index 39da1a4c30ac..3ad94dad2d89 100644
--- a/drivers/net/wireless/broadcom/b43/main.c
+++ b/drivers/net/wireless/broadcom/b43/main.c
@@ -5569,7 +5569,7 @@ static struct b43_wl *b43_wireless_init(struct b43_bus_dev *dev)
 	/* fill hw info */
 	ieee80211_hw_set(hw, RX_INCLUDES_FCS);
 	ieee80211_hw_set(hw, SIGNAL_DBM);
-
+	ieee80211_hw_set(hw, MFP_CAPABLE);
 	hw->wiphy->interface_modes =
 		BIT(NL80211_IFTYPE_AP) |
 		BIT(NL80211_IFTYPE_MESH_POINT) |
-- 
cgit v1.2.3-59-g8ed1b


From 6a29d134c04a8acebb7a95251acea7ad7abba106 Mon Sep 17 00:00:00 2001
From: Larry Finger <Larry.Finger@lwfinger.net>
Date: Tue, 26 May 2020 10:59:09 -0500
Subject: b43_legacy: Fix connection problem with WPA3

Since the driver was first introduced into the kernel, it has only
handled the ciphers associated with WEP, WPA, and WPA2. It fails with
WPA3 even though mac80211 can handle those additional ciphers in software,
b43legacy did not report that it could handle them. By setting MFP_CAPABLE using
ieee80211_set_hw(), the problem is fixed.

With this change, b43legacy will handle the ciphers it knows in hardware,
and let mac80211 handle the others in software. It is not necessary to
use the module parameter NOHWCRYPT to turn hardware encryption off.
Although this change essentially eliminates that module parameter,
I am choosing to keep it for cases where the hardware is broken,
and software encryption is required for all ciphers.

Signed-off-by: Larry Finger <Larry.Finger@lwfinger.net>
Cc: Stable <stable@vger.kernel.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200526155909.5807-3-Larry.Finger@lwfinger.net
---
 drivers/net/wireless/broadcom/b43legacy/main.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wireless/broadcom/b43legacy/main.c b/drivers/net/wireless/broadcom/b43legacy/main.c
index 8b6b657c4b85..5208a39fd6f7 100644
--- a/drivers/net/wireless/broadcom/b43legacy/main.c
+++ b/drivers/net/wireless/broadcom/b43legacy/main.c
@@ -3801,6 +3801,7 @@ static int b43legacy_wireless_init(struct ssb_device *dev)
 	/* fill hw info */
 	ieee80211_hw_set(hw, RX_INCLUDES_FCS);
 	ieee80211_hw_set(hw, SIGNAL_DBM);
+	ieee80211_hw_set(hw, MFP_CAPABLE); /* Allow WPA3 in software */
 
 	hw->wiphy->interface_modes =
 		BIT(NL80211_IFTYPE_AP) |
-- 
cgit v1.2.3-59-g8ed1b


From 83cee4e625f8344a55b197e2bf9713088e571375 Mon Sep 17 00:00:00 2001
From: Pali Rohár <pali@kernel.org>
Date: Wed, 20 May 2020 14:54:10 +0200
Subject: cw1200: Remove local sdio VENDOR and DEVICE id definitions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

They are already present in linux/mmc/sdio_ids.h.

Signed-off-by: Pali Rohár <pali@kernel.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200520125410.31757-1-pali@kernel.org
---
 drivers/net/wireless/st/cw1200/cw1200_sdio.c | 9 +--------
 1 file changed, 1 insertion(+), 8 deletions(-)

diff --git a/drivers/net/wireless/st/cw1200/cw1200_sdio.c b/drivers/net/wireless/st/cw1200/cw1200_sdio.c
index 43e012073dbf..b65ec14136c7 100644
--- a/drivers/net/wireless/st/cw1200/cw1200_sdio.c
+++ b/drivers/net/wireless/st/cw1200/cw1200_sdio.c
@@ -14,6 +14,7 @@
 #include <linux/mmc/sdio_func.h>
 #include <linux/mmc/card.h>
 #include <linux/mmc/sdio.h>
+#include <linux/mmc/sdio_ids.h>
 #include <net/mac80211.h>
 
 #include "cw1200.h"
@@ -48,14 +49,6 @@ struct hwbus_priv {
 	const struct cw1200_platform_data_sdio *pdata;
 };
 
-#ifndef SDIO_VENDOR_ID_STE
-#define SDIO_VENDOR_ID_STE		0x0020
-#endif
-
-#ifndef SDIO_DEVICE_ID_STE_CW1200
-#define SDIO_DEVICE_ID_STE_CW1200	0x2280
-#endif
-
 static const struct sdio_device_id cw1200_sdio_ids[] = {
 	{ SDIO_DEVICE(SDIO_VENDOR_ID_STE, SDIO_DEVICE_ID_STE_CW1200) },
 	{ /* end: all zeroes */			},
-- 
cgit v1.2.3-59-g8ed1b


From 729ef6b614a14043355c3e35569c62e08e1b9629 Mon Sep 17 00:00:00 2001
From: Pascal Terjan <pterjan@google.com>
Date: Sat, 23 May 2020 22:26:28 +0100
Subject: libertas: Use shared constant for rfc1042 header

This is one of the 9 drivers redefining rfc1042_header.

Signed-off-by: Pascal Terjan <pterjan@google.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200523212628.31526-1-pterjan@google.com
---
 drivers/net/wireless/marvell/libertas/rx.c | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/drivers/net/wireless/marvell/libertas/rx.c b/drivers/net/wireless/marvell/libertas/rx.c
index 58a1fc433b73..f28aa09d1f9e 100644
--- a/drivers/net/wireless/marvell/libertas/rx.c
+++ b/drivers/net/wireless/marvell/libertas/rx.c
@@ -62,9 +62,6 @@ int lbs_process_rxed_packet(struct lbs_private *priv, struct sk_buff *skb)
 	struct rxpd *p_rx_pd;
 	int hdrchop;
 	struct ethhdr *p_ethhdr;
-	static const u8 rfc1042_eth_hdr[] = {
-		0xaa, 0xaa, 0x03, 0x00, 0x00, 0x00
-	};
 
 	BUG_ON(!skb);
 
@@ -102,7 +99,7 @@ int lbs_process_rxed_packet(struct lbs_private *priv, struct sk_buff *skb)
 		sizeof(p_rx_pkt->eth803_hdr.src_addr));
 
 	if (memcmp(&p_rx_pkt->rfc1042_hdr,
-		   rfc1042_eth_hdr, sizeof(rfc1042_eth_hdr)) == 0) {
+		   rfc1042_header, sizeof(rfc1042_header)) == 0) {
 		/*
 		 *  Replace the 803 header and rfc1042 header (llc/snap) with an
 		 *    EthernetII header, keep the src/dst and snap_type (ethertype)
-- 
cgit v1.2.3-59-g8ed1b


From e78e5d18c65362a0e6ca383fad88ef950293fc2c Mon Sep 17 00:00:00 2001
From: Pascal Terjan <pterjan@google.com>
Date: Sat, 23 May 2020 22:27:35 +0100
Subject: atmel: Use shared constant for rfc1042 header

This is one of the 9 drivers redefining rfc1042_header.

Signed-off-by: Pascal Terjan <pterjan@google.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200523212735.32364-1-pterjan@google.com
---
 drivers/net/wireless/atmel/atmel.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/wireless/atmel/atmel.c b/drivers/net/wireless/atmel/atmel.c
index 74538085cfb7..d5875836068c 100644
--- a/drivers/net/wireless/atmel/atmel.c
+++ b/drivers/net/wireless/atmel/atmel.c
@@ -798,7 +798,6 @@ static void tx_update_descriptor(struct atmel_private *priv, int is_bcast,
 
 static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev)
 {
-	static const u8 SNAP_RFC1024[6] = { 0xaa, 0xaa, 0x03, 0x00, 0x00, 0x00 };
 	struct atmel_private *priv = netdev_priv(dev);
 	struct ieee80211_hdr header;
 	unsigned long flags;
@@ -853,7 +852,7 @@ static netdev_tx_t start_tx(struct sk_buff *skb, struct net_device *dev)
 	}
 
 	if (priv->use_wpa)
-		memcpy(&header.addr4, SNAP_RFC1024, ETH_ALEN);
+		memcpy(&header.addr4, rfc1042_header, ETH_ALEN);
 
 	header.frame_control = cpu_to_le16(frame_ctl);
 	/* Copy the wireless header into the card */
-- 
cgit v1.2.3-59-g8ed1b


From 9604617e998b49f7695fea1479ed82421ef8c9f0 Mon Sep 17 00:00:00 2001
From: Dinghao Liu <dinghao.liu@zju.edu.cn>
Date: Wed, 20 May 2020 20:42:38 +0800
Subject: wlcore: fix runtime pm imbalance in wl1271_tx_work

There are two error handling paths in this functon. When
wlcore_tx_work_locked() returns an error code, we should
decrease the runtime PM usage counter the same way as the
error handling path beginning from pm_runtime_get_sync().

Signed-off-by: Dinghao Liu <dinghao.liu@zju.edu.cn>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200520124241.9931-1-dinghao.liu@zju.edu.cn
---
 drivers/net/wireless/ti/wlcore/tx.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/wireless/ti/wlcore/tx.c b/drivers/net/wireless/ti/wlcore/tx.c
index 90e56d4c3df3..e20e18cd04ae 100644
--- a/drivers/net/wireless/ti/wlcore/tx.c
+++ b/drivers/net/wireless/ti/wlcore/tx.c
@@ -863,6 +863,7 @@ void wl1271_tx_work(struct work_struct *work)
 
 	ret = wlcore_tx_work_locked(wl);
 	if (ret < 0) {
+		pm_runtime_put_noidle(wl->dev);
 		wl12xx_queue_recovery_work(wl);
 		goto out;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 282a04bf1d8029eb98585cb5db3fd70fe8bc91f7 Mon Sep 17 00:00:00 2001
From: Dinghao Liu <dinghao.liu@zju.edu.cn>
Date: Wed, 20 May 2020 20:46:47 +0800
Subject: wlcore: fix runtime pm imbalance in wlcore_regdomain_config

pm_runtime_get_sync() increments the runtime PM usage counter even
the call returns an error code. Thus a pairing decrement is needed
on the error handling path to keep the counter balanced.

Signed-off-by: Dinghao Liu <dinghao.liu@zju.edu.cn>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200520124649.10848-1-dinghao.liu@zju.edu.cn
---
 drivers/net/wireless/ti/wlcore/main.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c
index 4421fc656b1c..fa4ced9864c0 100644
--- a/drivers/net/wireless/ti/wlcore/main.c
+++ b/drivers/net/wireless/ti/wlcore/main.c
@@ -3665,8 +3665,10 @@ void wlcore_regdomain_config(struct wl1271 *wl)
 		goto out;
 
 	ret = pm_runtime_get_sync(wl->dev);
-	if (ret < 0)
+	if (ret < 0) {
+		pm_runtime_put_autosuspend(wl->dev);
 		goto out;
+	}
 
 	ret = wlcore_cmd_regdomain_config_locked(wl);
 	if (ret < 0) {
-- 
cgit v1.2.3-59-g8ed1b


From 3e69ed2b52fd0eeb1e812e20a667316d913e6a97 Mon Sep 17 00:00:00 2001
From: Dinghao Liu <dinghao.liu@zju.edu.cn>
Date: Wed, 20 May 2020 20:57:22 +0800
Subject: wlcore: fix runtime pm imbalance in wl1271_op_suspend

When wlcore_hw_interrupt_notify() returns an error code,
a pairing runtime PM usage counter decrement is needed to
keep the counter balanced.

Signed-off-by: Dinghao Liu <dinghao.liu@zju.edu.cn>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200520125724.12832-1-dinghao.liu@zju.edu.cn
---
 drivers/net/wireless/ti/wlcore/main.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c
index fa4ced9864c0..bf6698fc1389 100644
--- a/drivers/net/wireless/ti/wlcore/main.c
+++ b/drivers/net/wireless/ti/wlcore/main.c
@@ -1746,9 +1746,7 @@ static int __maybe_unused wl1271_op_suspend(struct ieee80211_hw *hw,
 
 		ret = wl1271_configure_suspend(wl, wlvif, wow);
 		if (ret < 0) {
-			mutex_unlock(&wl->mutex);
-			wl1271_warning("couldn't prepare device to suspend");
-			return ret;
+			goto out_sleep;
 		}
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 53df5271f2397706be85c3892246e3e726113902 Mon Sep 17 00:00:00 2001
From: Dinghao Liu <dinghao.liu@zju.edu.cn>
Date: Wed, 20 May 2020 21:08:04 +0800
Subject: wlcore: fix runtime pm imbalance in __wl1271_op_remove_interface

When wl12xx_cmd_role_disable() returns an error code,
a pairing runtime PM usage counter decrement is needed to
keep the counter balanced.

Signed-off-by: Dinghao Liu <dinghao.liu@zju.edu.cn>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200520130806.14789-1-dinghao.liu@zju.edu.cn
---
 drivers/net/wireless/ti/wlcore/main.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c
index bf6698fc1389..0dcad4949889 100644
--- a/drivers/net/wireless/ti/wlcore/main.c
+++ b/drivers/net/wireless/ti/wlcore/main.c
@@ -2696,12 +2696,16 @@ static void __wl1271_op_remove_interface(struct wl1271 *wl,
 
 		if (!wlcore_is_p2p_mgmt(wlvif)) {
 			ret = wl12xx_cmd_role_disable(wl, &wlvif->role_id);
-			if (ret < 0)
+			if (ret < 0) {
+				pm_runtime_put_noidle(wl->dev);
 				goto deinit;
+			}
 		} else {
 			ret = wl12xx_cmd_role_disable(wl, &wlvif->dev_role_id);
-			if (ret < 0)
+			if (ret < 0) {
+				pm_runtime_put_noidle(wl->dev);
 				goto deinit;
+			}
 		}
 
 		pm_runtime_mark_last_busy(wl->dev);
-- 
cgit v1.2.3-59-g8ed1b


From efad661168c7f4c309f17b773ba2017b85348e9b Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Fri, 29 May 2020 10:50:07 +0800
Subject: rtw88: coex: 8723d: set antanna control owner

Without setting antenna control owner, the WiFi could be disconnected if
the BT has traffic. Because the antenna is switched to BT side for its
traffic, and the WiFi will have no chance to transfer data. Set control
owner to prevent WiFi disconnect issue.

Fixes: f5df1a8b4376 ("rtw88: 8723d: Add 8723DE to Kconfig and Makefile")
Tested-by: You-Sheng Yang <vicamo.yang@canonical.com>
Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200529025009.2468-2-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/coex.c     | 12 ++++++++++--
 drivers/net/wireless/realtek/rtw88/main.h     |  1 +
 drivers/net/wireless/realtek/rtw88/rtw8723d.c |  5 +++++
 drivers/net/wireless/realtek/rtw88/rtw8723d.h |  1 +
 4 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtw88/coex.c b/drivers/net/wireless/realtek/rtw88/coex.c
index 924dccd5d146..aa1f726d0966 100644
--- a/drivers/net/wireless/realtek/rtw88/coex.c
+++ b/drivers/net/wireless/realtek/rtw88/coex.c
@@ -751,10 +751,18 @@ EXPORT_SYMBOL(rtw_coex_write_indirect_reg);
 
 static void rtw_coex_coex_ctrl_owner(struct rtw_dev *rtwdev, bool wifi_control)
 {
-	if (wifi_control)
+	struct rtw_chip_info *chip = rtwdev->chip;
+	const struct rtw_hw_reg *btg_reg = chip->btg_reg;
+
+	if (wifi_control) {
 		rtw_write32_set(rtwdev, REG_SYS_SDIO_CTRL, BIT_LTE_MUX_CTRL_PATH);
-	else
+		if (btg_reg)
+			rtw_write8_set(rtwdev, btg_reg->addr, btg_reg->mask);
+	} else {
 		rtw_write32_clr(rtwdev, REG_SYS_SDIO_CTRL, BIT_LTE_MUX_CTRL_PATH);
+		if (btg_reg)
+			rtw_write8_clr(rtwdev, btg_reg->addr, btg_reg->mask);
+	}
 }
 
 static void rtw_coex_set_gnt_bt(struct rtw_dev *rtwdev, u8 state)
diff --git a/drivers/net/wireless/realtek/rtw88/main.h b/drivers/net/wireless/realtek/rtw88/main.h
index 7ee09c008cd4..2ae424869f8b 100644
--- a/drivers/net/wireless/realtek/rtw88/main.h
+++ b/drivers/net/wireless/realtek/rtw88/main.h
@@ -1174,6 +1174,7 @@ struct rtw_chip_info {
 	const struct coex_rf_para *wl_rf_para_tx;
 	const struct coex_rf_para *wl_rf_para_rx;
 	const struct coex_5g_afh_map *afh_5g;
+	const struct rtw_hw_reg *btg_reg;
 	const struct rtw_reg_domain *coex_info_hw_regs;
 };
 
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.c b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
index 7422baf2d41b..4d88ba8406c7 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8723d.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
@@ -2068,6 +2068,10 @@ static const u8 wl_rssi_step_8723d[] = {60, 50, 44, 30};
 static const u8 bt_rssi_step_8723d[] = {30, 30, 30, 30};
 static const struct coex_5g_afh_map afh_5g_8723d[] = { {0, 0, 0} };
 
+static const struct rtw_hw_reg btg_reg_8723d = {
+	.addr = REG_BTG_SEL, .mask = BIT_MASK_BTG_WL,
+};
+
 /* wl_tx_dec_power, bt_tx_dec_power, wl_rx_gain, bt_rx_lna_constrain */
 static const struct coex_rf_para rf_para_tx_8723d[] = {
 	{0, 0, false, 7},  /* for normal */
@@ -2734,6 +2738,7 @@ struct rtw_chip_info rtw8723d_hw_spec = {
 	.bt_afh_span_bw40 = 0x30,
 	.afh_5g_num = ARRAY_SIZE(afh_5g_8723d),
 	.afh_5g = afh_5g_8723d,
+	.btg_reg = &btg_reg_8723d,
 
 	.coex_info_hw_regs_num = ARRAY_SIZE(coex_info_hw_regs_8723d),
 	.coex_info_hw_regs = coex_info_hw_regs_8723d,
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.h b/drivers/net/wireless/realtek/rtw88/rtw8723d.h
index 31b8ed9ee652..7894d321cd7e 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8723d.h
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.h
@@ -145,6 +145,7 @@ static inline s32 iqk_mult(s32 x, s32 y, s32 *ext)
 
 #define REG_GPIO_INTM		0x0048
 #define REG_BTG_SEL		0x0067
+#define BIT_MASK_BTG_WL		BIT(7)
 #define REG_LTECOEX_PATH_CONTROL	0x0070
 #define REG_LTECOEX_CTRL	0x07c0
 #define REG_LTECOEX_WRITE_DATA	0x07c4
-- 
cgit v1.2.3-59-g8ed1b


From 2647d2827f2af6e054742f687cb21efaccab44f0 Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Fri, 29 May 2020 10:50:08 +0800
Subject: rtw88: coex: 8723d: handle BT inquiry cases

Coex mechanism used to make BT have higher priority and more time to
transfer data when BT inquiry-page, which leads to poor WiFi performance.
Should take WiFi traffic into consideration. If the WiFi is having heavy
traffic, use another parameter to make sure WiFi has more chance to TX/RX,
while guarantee the priority of BT for inquiry. If the WiFi isn't busy
(connected or not), set proper parameter to fix originals.

Fixes: f5df1a8b4376 ("rtw88: 8723d: Add 8723DE to Kconfig and Makefile")
Tested-by: You-Sheng Yang <vicamo.yang@canonical.com>
Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200529025009.2468-3-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/coex.c     | 9 ++++++---
 drivers/net/wireless/realtek/rtw88/rtw8723d.c | 5 +++--
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtw88/coex.c b/drivers/net/wireless/realtek/rtw88/coex.c
index aa1f726d0966..cbf3d503df1c 100644
--- a/drivers/net/wireless/realtek/rtw88/coex.c
+++ b/drivers/net/wireless/realtek/rtw88/coex.c
@@ -1354,12 +1354,15 @@ static void rtw_coex_action_bt_inquiry(struct rtw_dev *rtwdev)
 				tdma_case = 108;
 			else
 				tdma_case = 109;
+		} else if (coex_stat->wl_gl_busy) {
+			table_case = 114;
+			tdma_case = 121;
 		} else if (coex_stat->wl_connected) {
-			table_case = 101;
-			tdma_case = 110;
-		} else {
 			table_case = 100;
 			tdma_case = 100;
+		} else {
+			table_case = 101;
+			tdma_case = 100;
 		}
 	}
 
diff --git a/drivers/net/wireless/realtek/rtw88/rtw8723d.c b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
index 4d88ba8406c7..4700195c8eef 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8723d.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8723d.c
@@ -2040,7 +2040,7 @@ static const struct coex_tdma_para tdma_sant_8723d[] = {
 
 /* Non-Shared-Antenna TDMA */
 static const struct coex_tdma_para tdma_nsant_8723d[] = {
-	{ {0x00, 0x00, 0x00, 0x40, 0x00} }, /* case-100 */
+	{ {0x00, 0x00, 0x00, 0x40, 0x01} }, /* case-100 */
 	{ {0x61, 0x45, 0x03, 0x11, 0x11} }, /* case-101 */
 	{ {0x61, 0x3a, 0x03, 0x11, 0x11} },
 	{ {0x61, 0x30, 0x03, 0x11, 0x11} },
@@ -2060,7 +2060,8 @@ static const struct coex_tdma_para tdma_nsant_8723d[] = {
 	{ {0x51, 0x3a, 0x03, 0x10, 0x50} },
 	{ {0x51, 0x30, 0x03, 0x10, 0x50} },
 	{ {0x51, 0x20, 0x03, 0x10, 0x50} },
-	{ {0x51, 0x10, 0x03, 0x10, 0x50} }
+	{ {0x51, 0x10, 0x03, 0x10, 0x50} }, /* case-120 */
+	{ {0x51, 0x08, 0x03, 0x10, 0x50} },
 };
 
 /* rssi in percentage % (dbm = % - 100) */
-- 
cgit v1.2.3-59-g8ed1b


From 7a242fb69821ea428b89e381de63624abea68568 Mon Sep 17 00:00:00 2001
From: Ping-Ke Shih <pkshih@realtek.com>
Date: Fri, 29 May 2020 10:50:09 +0800
Subject: rtw88: fix EAPOL 4-way failure by finish IQK earlier

Connecting to an AP with WPA2 security may fail. The IQK
and the EAPOL 4-way handshake may overlap because the
driver does IQK right after assoc success.

For 802.11n devices, the IQK is done in the driver and it
could require more than 100ms to complete. During IQK, any
TX/RX events are paused. So if the EAPOL 4-way handshake
started before IQK finished, then the 1/4 and 2/4 part of
the handshake could be dropped. The AP will then issue
deauth with reason IEEE8021X_FAILED (23).

To resolve this, move IQK routine into managed TX prepare
(ieee80211_ops::mgd_prepare_tx()). The callback is called
before the managed frames (auth/assoc) are sent. This will
make sure that the IQK is completed before the handshake
starts. But don't do IQK during scanning because doing it
on each channel will take too long.

For 802.11ac devices, the IQK is done in firmware and it
takes less time to complete. Therefore we don't see a
failure during the EAPOL 4-way handshake. But it is still
worth moving the IQK into ieee80211_ops::mgd_prepare_tx().

Fixes: f5df1a8b4376 ("rtw88: 8723d: Add 8723DE to Kconfig and Makefile")
Tested-by: You-Sheng Yang <vicamo.yang@canonical.com>
Signed-off-by: Ping-Ke Shih <pkshih@realtek.com>
Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200529025009.2468-4-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/mac80211.c |  3 +--
 drivers/net/wireless/realtek/rtw88/main.c     | 17 +++++++++++++++++
 drivers/net/wireless/realtek/rtw88/main.h     |  3 +++
 3 files changed, 21 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtw88/mac80211.c b/drivers/net/wireless/realtek/rtw88/mac80211.c
index 98d2ac22f6f6..c412bc54efde 100644
--- a/drivers/net/wireless/realtek/rtw88/mac80211.c
+++ b/drivers/net/wireless/realtek/rtw88/mac80211.c
@@ -341,13 +341,11 @@ static void rtw_ops_bss_info_changed(struct ieee80211_hw *hw,
 	rtw_leave_lps_deep(rtwdev);
 
 	if (changed & BSS_CHANGED_ASSOC) {
-		struct rtw_chip_info *chip = rtwdev->chip;
 		enum rtw_net_type net_type;
 
 		if (conf->assoc) {
 			rtw_coex_connect_notify(rtwdev, COEX_ASSOCIATE_FINISH);
 			net_type = RTW_NET_MGD_LINKED;
-			chip->ops->phy_calibration(rtwdev);
 
 			rtwvif->aid = conf->aid;
 			rtw_fw_download_rsvd_page(rtwdev);
@@ -663,6 +661,7 @@ static void rtw_ops_mgd_prepare_tx(struct ieee80211_hw *hw,
 	mutex_lock(&rtwdev->mutex);
 	rtw_leave_lps_deep(rtwdev);
 	rtw_coex_connect_notify(rtwdev, COEX_ASSOCIATE_START);
+	rtw_chip_prepare_tx(rtwdev);
 	mutex_unlock(&rtwdev->mutex);
 }
 
diff --git a/drivers/net/wireless/realtek/rtw88/main.c b/drivers/net/wireless/realtek/rtw88/main.c
index f88a7d2370aa..0eefafc51c62 100644
--- a/drivers/net/wireless/realtek/rtw88/main.c
+++ b/drivers/net/wireless/realtek/rtw88/main.c
@@ -408,6 +408,23 @@ void rtw_set_channel(struct rtw_dev *rtwdev)
 	}
 
 	rtw_phy_set_tx_power_level(rtwdev, center_chan);
+
+	/* if the channel isn't set for scanning, we will do RF calibration
+	 * in ieee80211_ops::mgd_prepare_tx(). Performing the calibration
+	 * during scanning on each channel takes too long.
+	 */
+	if (!test_bit(RTW_FLAG_SCANNING, rtwdev->flags))
+		rtwdev->need_rfk = true;
+}
+
+void rtw_chip_prepare_tx(struct rtw_dev *rtwdev)
+{
+	struct rtw_chip_info *chip = rtwdev->chip;
+
+	if (rtwdev->need_rfk) {
+		rtwdev->need_rfk = false;
+		chip->ops->phy_calibration(rtwdev);
+	}
 }
 
 static void rtw_vif_write_addr(struct rtw_dev *rtwdev, u32 start, u8 *addr)
diff --git a/drivers/net/wireless/realtek/rtw88/main.h b/drivers/net/wireless/realtek/rtw88/main.h
index 2ae424869f8b..0841f5fa4bf2 100644
--- a/drivers/net/wireless/realtek/rtw88/main.h
+++ b/drivers/net/wireless/realtek/rtw88/main.h
@@ -1720,6 +1720,8 @@ struct rtw_dev {
 	struct rtw_fw_state wow_fw;
 	struct rtw_wow_param wow;
 
+	bool need_rfk;
+
 	/* hci related data, must be last */
 	u8 priv[] __aligned(sizeof(void *));
 };
@@ -1793,6 +1795,7 @@ void rtw_restore_reg(struct rtw_dev *rtwdev,
 		     struct rtw_backup_info *bckp, u32 num);
 void rtw_desc_to_mcsrate(u16 rate, u8 *mcs, u8 *nss);
 void rtw_set_channel(struct rtw_dev *rtwdev);
+void rtw_chip_prepare_tx(struct rtw_dev *rtwdev);
 void rtw_vif_port_config(struct rtw_dev *rtwdev, struct rtw_vif *rtwvif,
 			 u32 config);
 void rtw_tx_report_purge_timer(struct timer_list *t);
-- 
cgit v1.2.3-59-g8ed1b


From 4e1a341580f2e51f7d1c992b50c28a6c4a242f7f Mon Sep 17 00:00:00 2001
From: Yan-Hsuan Chuang <yhchuang@realtek.com>
Date: Fri, 22 May 2020 11:55:21 +0800
Subject: rtw88: 8822c: fix missing brace warning for old compilers

For older versions of gcc, the array = {0}; will cause warnings:

drivers/net/wireless/realtek/rtw88/rtw8822c.c: In function 'rtw8822c_power_trim':
>> drivers/net/wireless/realtek/rtw88/rtw8822c.c:1039:2: warning:
>> missing braces around initializer [-Wmissing-braces]
s8 bb_gain[2][8] = {0};
^
drivers/net/wireless/realtek/rtw88/rtw8822c.c:1039:2: warning: (near
initialization for 'bb_gain[0]') [-Wmissing-braces]

Fixes: 5ad4d8957b69 ("rtw88: set power trim according to efuse PG values")
Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Acked-by: Sebastian Andrzej Siewior <bigeasy@linutronix.de>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200522035521.12295-1-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/rtw8822c.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822c.c b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
index 5e4cc57dbd7c..8d65a9684af3 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8822c.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
@@ -1037,7 +1037,7 @@ static void rtw8822c_set_power_trim(struct rtw_dev *rtwdev, s8 bb_gain[2][8])
 static void rtw8822c_power_trim(struct rtw_dev *rtwdev)
 {
 	u8 pg_pwr = 0xff, i, path, idx;
-	s8 bb_gain[2][8] = {0};
+	s8 bb_gain[2][8] = {};
 	u16 rf_efuse_2g[3] = {PPG_2GL_TXAB, PPG_2GM_TXAB, PPG_2GH_TXAB};
 	u16 rf_efuse_5g[2][5] = {{PPG_5GL1_TXA, PPG_5GL2_TXA, PPG_5GM1_TXA,
 				  PPG_5GM2_TXA, PPG_5GH1_TXA},
-- 
cgit v1.2.3-59-g8ed1b


From 7967af8de39d4572da2d15dfa12b1b08810d60a9 Mon Sep 17 00:00:00 2001
From: Chien-Hsun Liao <ben.liao@realtek.com>
Date: Fri, 22 May 2020 17:12:34 +0800
Subject: rtw88: 8822c: remove CCK TX setting when switch channel

The CCK TX setting when switch channel will fix the CCK to
path A only, so if the antenna is configured to path B
(e.g. iw phy set antenna 0x2 0x3 "TX B/RX AB"), then the CCK
packets can never be delivered to the air if only path B is
connected with an antenna (it can possibly be transmitted
through path A, but as path B is configured, the expected
behavior is incorrect).

This can also solve the racing issue of CCK TX setting between
driver and firmware. The CCK TX setting in driver should be
removed. Otherwise, the CCK TX setting would be wrong when the
racing occurs.

Fixes: 297bcf8222f2 ("rtw88: add support for set/get antennas")
Signed-off-by: Chien-Hsun Liao <ben.liao@realtek.com>
Signed-off-by: Yan-Hsuan Chuang <yhchuang@realtek.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200522091234.24495-1-yhchuang@realtek.com
---
 drivers/net/wireless/realtek/rtw88/rtw8822c.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/wireless/realtek/rtw88/rtw8822c.c b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
index 8d65a9684af3..c3d72ef611c6 100644
--- a/drivers/net/wireless/realtek/rtw88/rtw8822c.c
+++ b/drivers/net/wireless/realtek/rtw88/rtw8822c.c
@@ -1496,7 +1496,6 @@ static void rtw8822c_set_channel_bb(struct rtw_dev *rtwdev, u8 channel, u8 bw,
 {
 	if (IS_CH_2G_BAND(channel)) {
 		rtw_write32_clr(rtwdev, REG_BGCTRL, BITS_RX_IQ_WEIGHT);
-		rtw_write32_mask(rtwdev, REG_RXCCKSEL, 0xf0000000, 0x8);
 		rtw_write32_set(rtwdev, REG_TXF4, BIT(20));
 		rtw_write32_clr(rtwdev, REG_CCK_CHECK, BIT_CHECK_CCK_EN);
 		rtw_write32_clr(rtwdev, REG_CCKTXONLY, BIT_BB_CCK_CHECK_EN);
@@ -1564,7 +1563,6 @@ static void rtw8822c_set_channel_bb(struct rtw_dev *rtwdev, u8 channel, u8 bw,
 		rtw_write32_set(rtwdev, REG_CCK_CHECK, BIT_CHECK_CCK_EN);
 		rtw_write32_set(rtwdev, REG_BGCTRL, BITS_RX_IQ_WEIGHT);
 		rtw_write32_clr(rtwdev, REG_TXF4, BIT(20));
-		rtw_write32_mask(rtwdev, REG_RXCCKSEL, 0xf0000000, 0x0);
 		rtw_write32_mask(rtwdev, REG_CCAMSK, 0x3F000000, 0x22);
 		rtw_write32_mask(rtwdev, REG_TXDFIR0, 0x70, 0x3);
 		if (IS_CH_5G_BAND_1(channel) || IS_CH_5G_BAND_2(channel)) {
-- 
cgit v1.2.3-59-g8ed1b


From da74b6933b3ba27c88fa0b7ccbd019e4f41ebfd4 Mon Sep 17 00:00:00 2001
From: Dinghao Liu <dinghao.liu@zju.edu.cn>
Date: Fri, 22 May 2020 12:49:04 +0800
Subject: wlcore: fix runtime pm imbalance in wlcore_irq_locked

When wlcore_fw_status() returns an error code, a pairing
runtime PM usage counter decrement is needed to keep the
counter balanced. It's the same for all error paths after
wlcore_fw_status().

Signed-off-by: Dinghao Liu <dinghao.liu@zju.edu.cn>
Acked-by: Tony Lindgren <tony@atomide.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200522044906.29564-1-dinghao.liu@zju.edu.cn
---
 drivers/net/wireless/ti/wlcore/main.c | 17 +++++++++--------
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireless/ti/wlcore/main.c b/drivers/net/wireless/ti/wlcore/main.c
index 0dcad4949889..de6c8a7589ca 100644
--- a/drivers/net/wireless/ti/wlcore/main.c
+++ b/drivers/net/wireless/ti/wlcore/main.c
@@ -548,7 +548,7 @@ static int wlcore_irq_locked(struct wl1271 *wl)
 
 		ret = wlcore_fw_status(wl, wl->fw_status);
 		if (ret < 0)
-			goto out;
+			goto err_ret;
 
 		wlcore_hw_tx_immediate_compl(wl);
 
@@ -565,7 +565,7 @@ static int wlcore_irq_locked(struct wl1271 *wl)
 			ret = -EIO;
 
 			/* restarting the chip. ignore any other interrupt. */
-			goto out;
+			goto err_ret;
 		}
 
 		if (unlikely(intr & WL1271_ACX_SW_INTR_WATCHDOG)) {
@@ -575,7 +575,7 @@ static int wlcore_irq_locked(struct wl1271 *wl)
 			ret = -EIO;
 
 			/* restarting the chip. ignore any other interrupt. */
-			goto out;
+			goto err_ret;
 		}
 
 		if (likely(intr & WL1271_ACX_INTR_DATA)) {
@@ -583,7 +583,7 @@ static int wlcore_irq_locked(struct wl1271 *wl)
 
 			ret = wlcore_rx(wl, wl->fw_status);
 			if (ret < 0)
-				goto out;
+				goto err_ret;
 
 			/* Check if any tx blocks were freed */
 			spin_lock_irqsave(&wl->wl_lock, flags);
@@ -596,7 +596,7 @@ static int wlcore_irq_locked(struct wl1271 *wl)
 				 */
 				ret = wlcore_tx_work_locked(wl);
 				if (ret < 0)
-					goto out;
+					goto err_ret;
 			} else {
 				spin_unlock_irqrestore(&wl->wl_lock, flags);
 			}
@@ -604,7 +604,7 @@ static int wlcore_irq_locked(struct wl1271 *wl)
 			/* check for tx results */
 			ret = wlcore_hw_tx_delayed_compl(wl);
 			if (ret < 0)
-				goto out;
+				goto err_ret;
 
 			/* Make sure the deferred queues don't get too long */
 			defer_count = skb_queue_len(&wl->deferred_tx_queue) +
@@ -617,14 +617,14 @@ static int wlcore_irq_locked(struct wl1271 *wl)
 			wl1271_debug(DEBUG_IRQ, "WL1271_ACX_INTR_EVENT_A");
 			ret = wl1271_event_handle(wl, 0);
 			if (ret < 0)
-				goto out;
+				goto err_ret;
 		}
 
 		if (intr & WL1271_ACX_INTR_EVENT_B) {
 			wl1271_debug(DEBUG_IRQ, "WL1271_ACX_INTR_EVENT_B");
 			ret = wl1271_event_handle(wl, 1);
 			if (ret < 0)
-				goto out;
+				goto err_ret;
 		}
 
 		if (intr & WL1271_ACX_INTR_INIT_COMPLETE)
@@ -635,6 +635,7 @@ static int wlcore_irq_locked(struct wl1271 *wl)
 			wl1271_debug(DEBUG_IRQ, "WL1271_ACX_INTR_HW_AVAILABLE");
 	}
 
+err_ret:
 	pm_runtime_mark_last_busy(wl->dev);
 	pm_runtime_put_autosuspend(wl->dev);
 
-- 
cgit v1.2.3-59-g8ed1b


From 2a7621ded321dfd70b5349bbfcd1af9e9df1f197 Mon Sep 17 00:00:00 2001
From: Wright Feng <wright.feng@cypress.com>
Date: Thu, 28 May 2020 22:49:34 -0500
Subject: brcmfmac: set F2 blocksize for 4373

Set F2 blocksize to 256 bytes for 4373. It fixes DMA error while having
UDP bi-directional traffic. Also use a defined F1 MesBusyCtrl value.

Signed-off-by: Wright Feng <wright.feng@cypress.com>
Signed-off-by: Chi-hsien Lin <chi-hsien.lin@cypress.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200529034938.124533-2-chi-hsien.lin@cypress.com
---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c | 14 +++++++++++++-
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c   |  4 ++--
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
index 22a17ae09e94..bb3196cba683 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
@@ -43,6 +43,7 @@
 
 #define SDIO_FUNC1_BLOCKSIZE		64
 #define SDIO_FUNC2_BLOCKSIZE		512
+#define SDIO_4373_FUNC2_BLOCKSIZE	256
 #define SDIO_4359_FUNC2_BLOCKSIZE	256
 /* Maximum milliseconds to wait for F2 to come up */
 #define SDIO_WAIT_F2RDY	3000
@@ -910,13 +911,24 @@ static int brcmf_sdiod_probe(struct brcmf_sdio_dev *sdiodev)
 		sdio_release_host(sdiodev->func1);
 		goto out;
 	}
-	if (sdiodev->func2->device == SDIO_DEVICE_ID_BROADCOM_4359)
+	switch (sdiodev->func2->device) {
+	case SDIO_DEVICE_ID_CYPRESS_4373:
+		f2_blksz = SDIO_4373_FUNC2_BLOCKSIZE;
+		break;
+	case SDIO_DEVICE_ID_BROADCOM_4359:
 		f2_blksz = SDIO_4359_FUNC2_BLOCKSIZE;
+		break;
+	default:
+		break;
+	}
+
 	ret = sdio_set_block_size(sdiodev->func2, f2_blksz);
 	if (ret) {
 		brcmf_err("Failed to set F2 blocksize\n");
 		sdio_release_host(sdiodev->func1);
 		goto out;
+	} else {
+		brcmf_dbg(SDIO, "set F2 blocksize to %d\n", f2_blksz);
 	}
 
 	/* increase F2 timeout */
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
index ce6f15284277..dce22cd2279d 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
@@ -41,6 +41,7 @@
 /* watermark expressed in number of words */
 #define DEFAULT_F2_WATERMARK    0x8
 #define CY_4373_F2_WATERMARK    0x40
+#define CY_4373_F1_MESBUSYCTRL  (CY_4373_F2_WATERMARK | SBSDIO_MESBUSYCTRL_ENAB)
 #define CY_43012_F2_WATERMARK    0x60
 #define CY_4359_F2_WATERMARK	0x40
 #define CY_4359_F1_MESBUSYCTRL	(CY_4359_F2_WATERMARK | SBSDIO_MESBUSYCTRL_ENAB)
@@ -4195,8 +4196,7 @@ static void brcmf_sdio_firmware_callback(struct device *dev, int err,
 			brcmf_sdiod_writeb(sdiod, SBSDIO_DEVICE_CTL, devctl,
 					   &err);
 			brcmf_sdiod_writeb(sdiod, SBSDIO_FUNC1_MESBUSYCTRL,
-					   CY_4373_F2_WATERMARK |
-					   SBSDIO_MESBUSYCTRL_ENAB, &err);
+					   CY_4373_F1_MESBUSYCTRL, &err);
 			break;
 		case SDIO_DEVICE_ID_CYPRESS_43012:
 			brcmf_dbg(INFO, "set F2 watermark to 0x%x*4 bytes\n",
-- 
cgit v1.2.3-59-g8ed1b


From 528158a8d4522bed5a35bb1a942638a79bca6acd Mon Sep 17 00:00:00 2001
From: Double Lo <double.lo@cypress.com>
Date: Thu, 28 May 2020 22:49:35 -0500
Subject: brcmfmac: fix 4339 CRC error under SDIO 3.0 SDR104 mode

This patch fixes 4339 CRC error while running Tput test with
suspend/resume test script.

The continuous failure messages before system crash:
brcmfmac: brcmf_sdiod_sglist_rw: CMD53 sg block read failed -84
brcmfmac: brcmf_sdio_rxglom: glom read of 25600 bytes failed: -5
brcmfmac: brcmf_sdio_rxfail: abort command, terminate frame
brcmfmac: brcmf_sdiod_sglist_rw: CMD53 sg block read failed -84
brcmfmac: brcmf_sdio_rxglom: glom read of 24576 bytes failed: -5
brcmfmac: brcmf_sdio_rxfail: abort command, terminate frame

Signed-off-by: Double Lo <double.lo@cypress.com>
Signed-off-by: Chi-hsien Lin <chi-hsien.lin@cypress.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200529034938.124533-3-chi-hsien.lin@cypress.com
---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
index dce22cd2279d..491b635e72b1 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
@@ -43,6 +43,10 @@
 #define CY_4373_F2_WATERMARK    0x40
 #define CY_4373_F1_MESBUSYCTRL  (CY_4373_F2_WATERMARK | SBSDIO_MESBUSYCTRL_ENAB)
 #define CY_43012_F2_WATERMARK    0x60
+#define CY_4339_F2_WATERMARK    48
+#define CY_4339_MES_WATERMARK	80
+#define CY_4339_MESBUSYCTRL	(CY_4339_MES_WATERMARK | \
+				 SBSDIO_MESBUSYCTRL_ENAB)
 #define CY_4359_F2_WATERMARK	0x40
 #define CY_4359_F1_MESBUSYCTRL	(CY_4359_F2_WATERMARK | SBSDIO_MESBUSYCTRL_ENAB)
 
@@ -4209,6 +4213,19 @@ static void brcmf_sdio_firmware_callback(struct device *dev, int err,
 			brcmf_sdiod_writeb(sdiod, SBSDIO_DEVICE_CTL, devctl,
 					   &err);
 			break;
+		case SDIO_DEVICE_ID_BROADCOM_4339:
+			brcmf_dbg(INFO, "set F2 watermark to 0x%x*4 bytes for 4339\n",
+				  CY_4339_F2_WATERMARK);
+			brcmf_sdiod_writeb(sdiod, SBSDIO_WATERMARK,
+					   CY_4339_F2_WATERMARK, &err);
+			devctl = brcmf_sdiod_readb(sdiod, SBSDIO_DEVICE_CTL,
+						   &err);
+			devctl |= SBSDIO_DEVCTL_F2WM_ENAB;
+			brcmf_sdiod_writeb(sdiod, SBSDIO_DEVICE_CTL, devctl,
+					   &err);
+			brcmf_sdiod_writeb(sdiod, SBSDIO_FUNC1_MESBUSYCTRL,
+					   CY_4339_MESBUSYCTRL, &err);
+			break;
 		case SDIO_DEVICE_ID_BROADCOM_4359:
 			brcmf_dbg(INFO, "set F2 watermark to 0x%x*4 bytes\n",
 				  CY_4359_F2_WATERMARK);
-- 
cgit v1.2.3-59-g8ed1b


From 2bee41270f3bafe475dd2cfe28fcc3f1ed6ab6ee Mon Sep 17 00:00:00 2001
From: Frank Kao <frank.kao@cypress.com>
Date: Thu, 28 May 2020 22:49:36 -0500
Subject: brcmfmac: set F2 blocksize and watermark for 4354/4356 SDIO

Set F2 blocksize to 256 bytes and watermark to 0x40 for 4354/4356 SDIO.
Also enable and configure F1 MesBusyCtrl. It would resolve random driver
crash issue.

Signed-off-by: Frank Kao <frank.kao@cypress.com>
Signed-off-by: Chi-Hsien Lin <chi-hsien.lin@cypress.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200529034938.124533-4-chi-hsien.lin@cypress.com
---
 .../net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c    |  8 ++++++--
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c  | 16 ++++++++++------
 2 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
index bb3196cba683..b1a66320ba54 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/bcmsdh.c
@@ -44,7 +44,7 @@
 #define SDIO_FUNC1_BLOCKSIZE		64
 #define SDIO_FUNC2_BLOCKSIZE		512
 #define SDIO_4373_FUNC2_BLOCKSIZE	256
-#define SDIO_4359_FUNC2_BLOCKSIZE	256
+#define SDIO_435X_FUNC2_BLOCKSIZE	256
 /* Maximum milliseconds to wait for F2 to come up */
 #define SDIO_WAIT_F2RDY	3000
 
@@ -916,7 +916,11 @@ static int brcmf_sdiod_probe(struct brcmf_sdio_dev *sdiodev)
 		f2_blksz = SDIO_4373_FUNC2_BLOCKSIZE;
 		break;
 	case SDIO_DEVICE_ID_BROADCOM_4359:
-		f2_blksz = SDIO_4359_FUNC2_BLOCKSIZE;
+		/* fallthrough */
+	case SDIO_DEVICE_ID_BROADCOM_4354:
+		/* fallthrough */
+	case SDIO_DEVICE_ID_BROADCOM_4356:
+		f2_blksz = SDIO_435X_FUNC2_BLOCKSIZE;
 		break;
 	default:
 		break;
diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
index 491b635e72b1..037a4efef924 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
@@ -47,9 +47,9 @@
 #define CY_4339_MES_WATERMARK	80
 #define CY_4339_MESBUSYCTRL	(CY_4339_MES_WATERMARK | \
 				 SBSDIO_MESBUSYCTRL_ENAB)
-#define CY_4359_F2_WATERMARK	0x40
-#define CY_4359_F1_MESBUSYCTRL	(CY_4359_F2_WATERMARK | SBSDIO_MESBUSYCTRL_ENAB)
-
+#define CY_435X_F2_WATERMARK	0x40
+#define CY_435X_F1_MESBUSYCTRL	(CY_435X_F2_WATERMARK | \
+				 SBSDIO_MESBUSYCTRL_ENAB)
 #ifdef DEBUG
 
 #define BRCMF_TRAP_INFO_SIZE	80
@@ -4227,17 +4227,21 @@ static void brcmf_sdio_firmware_callback(struct device *dev, int err,
 					   CY_4339_MESBUSYCTRL, &err);
 			break;
 		case SDIO_DEVICE_ID_BROADCOM_4359:
+			/* fallthrough */
+		case SDIO_DEVICE_ID_BROADCOM_4354:
+			/* fallthrough */
+		case SDIO_DEVICE_ID_BROADCOM_4356:
 			brcmf_dbg(INFO, "set F2 watermark to 0x%x*4 bytes\n",
-				  CY_4359_F2_WATERMARK);
+				  CY_435X_F2_WATERMARK);
 			brcmf_sdiod_writeb(sdiod, SBSDIO_WATERMARK,
-					   CY_4359_F2_WATERMARK, &err);
+					   CY_435X_F2_WATERMARK, &err);
 			devctl = brcmf_sdiod_readb(sdiod, SBSDIO_DEVICE_CTL,
 						   &err);
 			devctl |= SBSDIO_DEVCTL_F2WM_ENAB;
 			brcmf_sdiod_writeb(sdiod, SBSDIO_DEVICE_CTL, devctl,
 					   &err);
 			brcmf_sdiod_writeb(sdiod, SBSDIO_FUNC1_MESBUSYCTRL,
-					   CY_4359_F1_MESBUSYCTRL, &err);
+					   CY_435X_F1_MESBUSYCTRL, &err);
 			break;
 		default:
 			brcmf_sdiod_writeb(sdiod, SBSDIO_WATERMARK,
-- 
cgit v1.2.3-59-g8ed1b


From df18c257bd6a3fe1906a9c87bcb0bb30cf87ef64 Mon Sep 17 00:00:00 2001
From: Wright Feng <wright.feng@cypress.com>
Date: Thu, 28 May 2020 22:49:37 -0500
Subject: brcmfmac: fix 43455 CRC error under SDIO 3.0 SDR104 mode

This patch fixes 43455 CRC error while running throughput test with
suspend/resume stress test.

The continuous failure messages before system crash:
brcmfmac: brcmf_sdiod_sglist_rw: CMD53 sg block read failed -84
brcmfmac: brcmf_sdio_rxglom: glom read of 25600 bytes failed: -5
brcmfmac: brcmf_sdio_rxfail: abort command, terminate frame
brcmfmac: brcmf_sdiod_sglist_rw: CMD53 sg block read failed -84
brcmfmac: brcmf_sdio_rxglom: glom read of 24576 bytes failed: -5
brcmfmac: brcmf_sdio_rxfail: abort command, terminate frame

Signed-off-by: Wright Feng <wright.feng@cypress.com>
Signed-off-by: Chi-hsien Lin <chi-hsien.lin@cypress.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200529034938.124533-5-chi-hsien.lin@cypress.com
---
 .../net/wireless/broadcom/brcm80211/brcmfmac/sdio.c    | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
index 037a4efef924..58d9f0b90ad3 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
@@ -47,9 +47,14 @@
 #define CY_4339_MES_WATERMARK	80
 #define CY_4339_MESBUSYCTRL	(CY_4339_MES_WATERMARK | \
 				 SBSDIO_MESBUSYCTRL_ENAB)
+#define CY_43455_F2_WATERMARK	0x60
+#define CY_43455_MES_WATERMARK	0x50
+#define CY_43455_MESBUSYCTRL	(CY_43455_MES_WATERMARK | \
+				 SBSDIO_MESBUSYCTRL_ENAB)
 #define CY_435X_F2_WATERMARK	0x40
 #define CY_435X_F1_MESBUSYCTRL	(CY_435X_F2_WATERMARK | \
 				 SBSDIO_MESBUSYCTRL_ENAB)
+
 #ifdef DEBUG
 
 #define BRCMF_TRAP_INFO_SIZE	80
@@ -4226,6 +4231,19 @@ static void brcmf_sdio_firmware_callback(struct device *dev, int err,
 			brcmf_sdiod_writeb(sdiod, SBSDIO_FUNC1_MESBUSYCTRL,
 					   CY_4339_MESBUSYCTRL, &err);
 			break;
+		case SDIO_DEVICE_ID_BROADCOM_43455:
+			brcmf_dbg(INFO, "set F2 watermark to 0x%x*4 bytes for 43455\n",
+				  CY_43455_F2_WATERMARK);
+			brcmf_sdiod_writeb(sdiod, SBSDIO_WATERMARK,
+					   CY_43455_F2_WATERMARK, &err);
+			devctl = brcmf_sdiod_readb(sdiod, SBSDIO_DEVICE_CTL,
+						   &err);
+			devctl |= SBSDIO_DEVCTL_F2WM_ENAB;
+			brcmf_sdiod_writeb(sdiod, SBSDIO_DEVICE_CTL, devctl,
+					   &err);
+			brcmf_sdiod_writeb(sdiod, SBSDIO_FUNC1_MESBUSYCTRL,
+					   CY_43455_MESBUSYCTRL, &err);
+			break;
 		case SDIO_DEVICE_ID_BROADCOM_4359:
 			/* fallthrough */
 		case SDIO_DEVICE_ID_BROADCOM_4354:
-- 
cgit v1.2.3-59-g8ed1b


From 113a57a400a2d4974448760464f6443d52a1e498 Mon Sep 17 00:00:00 2001
From: Double Lo <double.lo@cypress.com>
Date: Thu, 28 May 2020 22:49:38 -0500
Subject: brcmfmac: 43012 Update MES Watermark

Set MES watermark size to 0x50 for 43012. It fixes SDIO bus hang issue
when running at high throughput.

Signed-off-by: Double Lo <double.lo@cypress.com>
Signed-off-by: Chi-hsien Lin <chi-hsien.lin@cypress.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200529034938.124533-6-chi-hsien.lin@cypress.com
---
 drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
index 58d9f0b90ad3..760b7737e745 100644
--- a/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
+++ b/drivers/net/wireless/broadcom/brcm80211/brcmfmac/sdio.c
@@ -43,6 +43,9 @@
 #define CY_4373_F2_WATERMARK    0x40
 #define CY_4373_F1_MESBUSYCTRL  (CY_4373_F2_WATERMARK | SBSDIO_MESBUSYCTRL_ENAB)
 #define CY_43012_F2_WATERMARK    0x60
+#define CY_43012_MES_WATERMARK  0x50
+#define CY_43012_MESBUSYCTRL    (CY_43012_MES_WATERMARK | \
+				 SBSDIO_MESBUSYCTRL_ENAB)
 #define CY_4339_F2_WATERMARK    48
 #define CY_4339_MES_WATERMARK	80
 #define CY_4339_MESBUSYCTRL	(CY_4339_MES_WATERMARK | \
@@ -4217,6 +4220,8 @@ static void brcmf_sdio_firmware_callback(struct device *dev, int err,
 			devctl |= SBSDIO_DEVCTL_F2WM_ENAB;
 			brcmf_sdiod_writeb(sdiod, SBSDIO_DEVICE_CTL, devctl,
 					   &err);
+			brcmf_sdiod_writeb(sdiod, SBSDIO_FUNC1_MESBUSYCTRL,
+					   CY_43012_MESBUSYCTRL, &err);
 			break;
 		case SDIO_DEVICE_ID_BROADCOM_4339:
 			brcmf_dbg(INFO, "set F2 watermark to 0x%x*4 bytes for 4339\n",
-- 
cgit v1.2.3-59-g8ed1b


From 11e7a91994c29da96d847f676be023da6a2c1359 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Wed, 27 May 2020 21:48:30 +0300
Subject: airo: Fix read overflows sending packets

The problem is that we always copy a minimum of ETH_ZLEN (60) bytes from
skb->data even when skb->len is less than ETH_ZLEN so it leads to a read
overflow.

The fix is to pad skb->data to at least ETH_ZLEN bytes.

Cc: <stable@vger.kernel.org>
Reported-by: Hu Jiahui <kirin.say@gmail.com>
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200527184830.GA1164846@mwanda
---
 drivers/net/wireless/cisco/airo.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/net/wireless/cisco/airo.c b/drivers/net/wireless/cisco/airo.c
index 8363f91df7ea..827bb6d74815 100644
--- a/drivers/net/wireless/cisco/airo.c
+++ b/drivers/net/wireless/cisco/airo.c
@@ -1925,6 +1925,10 @@ static netdev_tx_t mpi_start_xmit(struct sk_buff *skb,
 		airo_print_err(dev->name, "%s: skb == NULL!",__func__);
 		return NETDEV_TX_OK;
 	}
+	if (skb_padto(skb, ETH_ZLEN)) {
+		dev->stats.tx_dropped++;
+		return NETDEV_TX_OK;
+	}
 	npacks = skb_queue_len (&ai->txq);
 
 	if (npacks >= MAXTXQ - 1) {
@@ -2127,6 +2131,10 @@ static netdev_tx_t airo_start_xmit(struct sk_buff *skb,
 		airo_print_err(dev->name, "%s: skb == NULL!", __func__);
 		return NETDEV_TX_OK;
 	}
+	if (skb_padto(skb, ETH_ZLEN)) {
+		dev->stats.tx_dropped++;
+		return NETDEV_TX_OK;
+	}
 
 	/* Find a vacant FID */
 	for( i = 0; i < MAX_FIDS / 2 && (fids[i] & 0xffff0000); i++ );
@@ -2201,6 +2209,10 @@ static netdev_tx_t airo_start_xmit11(struct sk_buff *skb,
 		airo_print_err(dev->name, "%s: skb == NULL!", __func__);
 		return NETDEV_TX_OK;
 	}
+	if (skb_padto(skb, ETH_ZLEN)) {
+		dev->stats.tx_dropped++;
+		return NETDEV_TX_OK;
+	}
 
 	/* Find a vacant FID */
 	for( i = MAX_FIDS / 2; i < MAX_FIDS && (fids[i] & 0xffff0000); i++ );
-- 
cgit v1.2.3-59-g8ed1b


From 86cffb2c0a59d72f66060253e123154416618fa2 Mon Sep 17 00:00:00 2001
From: Pali Rohár <pali@kernel.org>
Date: Thu, 21 May 2020 14:34:44 +0200
Subject: mwifiex: Parse all API_VER_ID properties
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

During initialization of SD8997 wifi chip kernel prints warnings:

  mwifiex_sdio mmc0:0001:1: Unknown api_id: 3
  mwifiex_sdio mmc0:0001:1: Unknown api_id: 4

This patch adds support for parsing all api ids provided by SD8997
firmware.

Signed-off-by: Pali Rohár <pali@kernel.org>
Acked-by: Ganapathi Bhat <ganapathi.bhat@nxp.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200521123444.28957-1-pali@kernel.org
---
 drivers/net/wireless/marvell/mwifiex/cmdevt.c | 17 +++++++++++++++--
 drivers/net/wireless/marvell/mwifiex/fw.h     |  2 ++
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/drivers/net/wireless/marvell/mwifiex/cmdevt.c b/drivers/net/wireless/marvell/mwifiex/cmdevt.c
index 7e4b8cd52605..589cc5eb12a2 100644
--- a/drivers/net/wireless/marvell/mwifiex/cmdevt.c
+++ b/drivers/net/wireless/marvell/mwifiex/cmdevt.c
@@ -1581,8 +1581,21 @@ int mwifiex_ret_get_hw_spec(struct mwifiex_private *priv,
 					adapter->fw_api_ver =
 							api_rev->major_ver;
 					mwifiex_dbg(adapter, INFO,
-						    "Firmware api version %d\n",
-						    adapter->fw_api_ver);
+						    "Firmware api version %d.%d\n",
+						    adapter->fw_api_ver,
+						    api_rev->minor_ver);
+					break;
+				case UAP_FW_API_VER_ID:
+					mwifiex_dbg(adapter, INFO,
+						    "uAP api version %d.%d\n",
+						    api_rev->major_ver,
+						    api_rev->minor_ver);
+					break;
+				case CHANRPT_API_VER_ID:
+					mwifiex_dbg(adapter, INFO,
+						    "channel report api version %d.%d\n",
+						    api_rev->major_ver,
+						    api_rev->minor_ver);
 					break;
 				default:
 					mwifiex_dbg(adapter, FATAL,
diff --git a/drivers/net/wireless/marvell/mwifiex/fw.h b/drivers/net/wireless/marvell/mwifiex/fw.h
index a415d73a73e6..6f86f5b96fc9 100644
--- a/drivers/net/wireless/marvell/mwifiex/fw.h
+++ b/drivers/net/wireless/marvell/mwifiex/fw.h
@@ -1052,6 +1052,8 @@ struct host_cmd_ds_802_11_ps_mode_enh {
 enum API_VER_ID {
 	KEY_API_VER_ID = 1,
 	FW_API_VER_ID = 2,
+	UAP_FW_API_VER_ID = 3,
+	CHANRPT_API_VER_ID = 4,
 };
 
 struct hw_spec_api_rev {
-- 
cgit v1.2.3-59-g8ed1b


From 982d7287f8dad2d5e1c57dc84aca83128e787666 Mon Sep 17 00:00:00 2001
From: Pali Rohár <pali@kernel.org>
Date: Thu, 21 May 2020 14:35:59 +0200
Subject: mwifiex: Add support for NL80211_ATTR_MAX_AP_ASSOC_STA
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

SD8997 firmware sends TLV_TYPE_MAX_CONN with struct hw_spec_max_conn to
inform kernel about maximum number of p2p connections and stations in AP
mode.

During initialization of SD8997 wifi chip kernel prints warning:

  mwifiex_sdio mmc0:0001:1: Unknown GET_HW_SPEC TLV type: 0x217

This patch adds support for parsing TLV_TYPE_MAX_CONN (0x217) and sets
appropriate cfg80211 member 'max_ap_assoc_sta' from retrieved structure.

It allows userspace to retrieve NL80211_ATTR_MAX_AP_ASSOC_STA attribute.

Signed-off-by: Pali Rohár <pali@kernel.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200521123559.29028-1-pali@kernel.org
---
 drivers/net/wireless/marvell/mwifiex/cfg80211.c |  5 +++++
 drivers/net/wireless/marvell/mwifiex/cmdevt.c   | 12 ++++++++++++
 drivers/net/wireless/marvell/mwifiex/fw.h       |  8 ++++++++
 drivers/net/wireless/marvell/mwifiex/main.h     |  1 +
 4 files changed, 26 insertions(+)

diff --git a/drivers/net/wireless/marvell/mwifiex/cfg80211.c b/drivers/net/wireless/marvell/mwifiex/cfg80211.c
index 97813ac291ae..4e4f59c17ded 100644
--- a/drivers/net/wireless/marvell/mwifiex/cfg80211.c
+++ b/drivers/net/wireless/marvell/mwifiex/cfg80211.c
@@ -4335,6 +4335,11 @@ int mwifiex_register_cfg80211(struct mwifiex_adapter *adapter)
 		wiphy->iface_combinations = &mwifiex_iface_comb_ap_sta;
 	wiphy->n_iface_combinations = 1;
 
+	if (adapter->max_sta_conn > adapter->max_p2p_conn)
+		wiphy->max_ap_assoc_sta = adapter->max_sta_conn;
+	else
+		wiphy->max_ap_assoc_sta = adapter->max_p2p_conn;
+
 	/* Initialize cipher suits */
 	wiphy->cipher_suites = mwifiex_cipher_suites;
 	wiphy->n_cipher_suites = ARRAY_SIZE(mwifiex_cipher_suites);
diff --git a/drivers/net/wireless/marvell/mwifiex/cmdevt.c b/drivers/net/wireless/marvell/mwifiex/cmdevt.c
index 589cc5eb12a2..d068b9075c32 100644
--- a/drivers/net/wireless/marvell/mwifiex/cmdevt.c
+++ b/drivers/net/wireless/marvell/mwifiex/cmdevt.c
@@ -1495,6 +1495,7 @@ int mwifiex_ret_get_hw_spec(struct mwifiex_private *priv,
 	struct mwifiex_adapter *adapter = priv->adapter;
 	struct mwifiex_ie_types_header *tlv;
 	struct hw_spec_api_rev *api_rev;
+	struct hw_spec_max_conn *max_conn;
 	u16 resp_size, api_id;
 	int i, left_len, parsed_len = 0;
 
@@ -1604,6 +1605,17 @@ int mwifiex_ret_get_hw_spec(struct mwifiex_private *priv,
 					break;
 				}
 				break;
+			case TLV_TYPE_MAX_CONN:
+				max_conn = (struct hw_spec_max_conn *)tlv;
+				adapter->max_p2p_conn = max_conn->max_p2p_conn;
+				adapter->max_sta_conn = max_conn->max_sta_conn;
+				mwifiex_dbg(adapter, INFO,
+					    "max p2p connections: %u\n",
+					    adapter->max_p2p_conn);
+				mwifiex_dbg(adapter, INFO,
+					    "max sta connections: %u\n",
+					    adapter->max_sta_conn);
+				break;
 			default:
 				mwifiex_dbg(adapter, FATAL,
 					    "Unknown GET_HW_SPEC TLV type: %#x\n",
diff --git a/drivers/net/wireless/marvell/mwifiex/fw.h b/drivers/net/wireless/marvell/mwifiex/fw.h
index 6f86f5b96fc9..8047e307892e 100644
--- a/drivers/net/wireless/marvell/mwifiex/fw.h
+++ b/drivers/net/wireless/marvell/mwifiex/fw.h
@@ -220,6 +220,7 @@ enum MWIFIEX_802_11_PRIVACY_FILTER {
 #define TLV_TYPE_BSS_MODE           (PROPRIETARY_TLV_BASE_ID + 206)
 #define TLV_TYPE_RANDOM_MAC         (PROPRIETARY_TLV_BASE_ID + 236)
 #define TLV_TYPE_CHAN_ATTR_CFG      (PROPRIETARY_TLV_BASE_ID + 237)
+#define TLV_TYPE_MAX_CONN           (PROPRIETARY_TLV_BASE_ID + 279)
 
 #define MWIFIEX_TX_DATA_BUF_SIZE_2K        2048
 
@@ -2388,4 +2389,11 @@ struct mwifiex_opt_sleep_confirm {
 	__le16 action;
 	__le16 resp_ctrl;
 } __packed;
+
+struct hw_spec_max_conn {
+	struct mwifiex_ie_types_header header;
+	u8 max_p2p_conn;
+	u8 max_sta_conn;
+} __packed;
+
 #endif /* !_MWIFIEX_FW_H_ */
diff --git a/drivers/net/wireless/marvell/mwifiex/main.h b/drivers/net/wireless/marvell/mwifiex/main.h
index afaffc325452..5923c5c14c8d 100644
--- a/drivers/net/wireless/marvell/mwifiex/main.h
+++ b/drivers/net/wireless/marvell/mwifiex/main.h
@@ -1022,6 +1022,7 @@ struct mwifiex_adapter {
 	bool ext_scan;
 	u8 fw_api_ver;
 	u8 key_api_major_ver, key_api_minor_ver;
+	u8 max_p2p_conn, max_sta_conn;
 	struct memory_type_mapping *mem_type_mapping_tbl;
 	u8 num_mem_types;
 	bool scan_chan_gap_enabled;
-- 
cgit v1.2.3-59-g8ed1b


From 36432797641ff0013be9252eecf7ad1ba73171a2 Mon Sep 17 00:00:00 2001
From: Ronak Doshi <doshir@vmware.com>
Date: Thu, 28 May 2020 19:53:52 -0700
Subject: vmxnet3: use correct hdr reference when packet is encapsulated

'Commit dacce2be3312 ("vmxnet3: add geneve and vxlan tunnel offload
support")' added support for encapsulation offload. However, while
preparing inner tso packet, it uses reference to outer ip headers.

This patch fixes this issue by using correct reference for inner
headers.

Fixes: dacce2be3312 ("vmxnet3: add geneve and vxlan tunnel offload support")
Signed-off-by: Ronak Doshi <doshir@vmware.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vmxnet3/vmxnet3_drv.c | 31 +++++++++++++++++++++++--------
 1 file changed, 23 insertions(+), 8 deletions(-)

diff --git a/drivers/net/vmxnet3/vmxnet3_drv.c b/drivers/net/vmxnet3/vmxnet3_drv.c
index 3d07ce6cb706..ca395f9679d0 100644
--- a/drivers/net/vmxnet3/vmxnet3_drv.c
+++ b/drivers/net/vmxnet3/vmxnet3_drv.c
@@ -859,14 +859,29 @@ vmxnet3_parse_hdr(struct sk_buff *skb, struct vmxnet3_tx_queue *tq,
 			 */
 			ctx->l4_offset = skb_checksum_start_offset(skb);
 
-			if (ctx->ipv4) {
-				const struct iphdr *iph = ip_hdr(skb);
+			if (VMXNET3_VERSION_GE_4(adapter) &&
+			    skb->encapsulation) {
+				struct iphdr *iph = inner_ip_hdr(skb);
+
+				if (iph->version == 4) {
+					protocol = iph->protocol;
+				} else {
+					const struct ipv6hdr *ipv6h;
 
-				protocol = iph->protocol;
-			} else if (ctx->ipv6) {
-				const struct ipv6hdr *ipv6h = ipv6_hdr(skb);
+					ipv6h = inner_ipv6_hdr(skb);
+					protocol = ipv6h->nexthdr;
+				}
+			} else {
+				if (ctx->ipv4) {
+					const struct iphdr *iph = ip_hdr(skb);
 
-				protocol = ipv6h->nexthdr;
+					protocol = iph->protocol;
+				} else if (ctx->ipv6) {
+					const struct ipv6hdr *ipv6h;
+
+					ipv6h = ipv6_hdr(skb);
+					protocol = ipv6h->nexthdr;
+				}
 			}
 
 			switch (protocol) {
@@ -946,11 +961,11 @@ vmxnet3_prepare_inner_tso(struct sk_buff *skb,
 	struct tcphdr *tcph = inner_tcp_hdr(skb);
 	struct iphdr *iph = inner_ip_hdr(skb);
 
-	if (ctx->ipv4) {
+	if (iph->version == 4) {
 		iph->check = 0;
 		tcph->check = ~csum_tcpudp_magic(iph->saddr, iph->daddr, 0,
 						 IPPROTO_TCP, 0);
-	} else if (ctx->ipv6) {
+	} else {
 		struct ipv6hdr *iph = inner_ipv6_hdr(skb);
 
 		tcph->check = ~csum_ipv6_magic(&iph->saddr, &iph->daddr, 0,
-- 
cgit v1.2.3-59-g8ed1b


From 09b547a7996e77d4141de27c1657d7c3adcb478e Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Fri, 29 May 2020 10:26:48 +0200
Subject: net: ethernet: mtk-star-emac: remove unused variable

The desc pointer is set but not used. Remove it.

Reported-by: kbuild test robot <lkp@intel.com>
Fixes: 8c7bd5a454ff ("net: ethernet: mtk-star-emac: new driver")
Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_star_emac.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c
index 8596ca0e60eb..7df35872c107 100644
--- a/drivers/net/ethernet/mediatek/mtk_star_emac.c
+++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c
@@ -746,15 +746,12 @@ mtk_star_ring_free_skbs(struct mtk_star_priv *priv, struct mtk_star_ring *ring,
 					   struct mtk_star_ring_desc_data *))
 {
 	struct mtk_star_ring_desc_data desc_data;
-	struct mtk_star_ring_desc *desc;
 	int i;
 
 	for (i = 0; i < MTK_STAR_RING_NUM_DESCS; i++) {
 		if (!ring->dma_addrs[i])
 			continue;
 
-		desc = &ring->descs[i];
-
 		desc_data.dma_addr = ring->dma_addrs[i];
 		desc_data.skb = ring->skbs[i];
 
-- 
cgit v1.2.3-59-g8ed1b


From 2684bda34786b6ae0944a4bacb7f59b5955979a8 Mon Sep 17 00:00:00 2001
From: Antoine Tenart <antoine.tenart@bootlin.com>
Date: Fri, 29 May 2020 11:49:09 +0200
Subject: net: phy: mscc: fix PHYs using the vsc8574_probe

PHYs using the vsc8574_probe fail to be initialized and their
config_init return -EIO leading to errors like:
"could not attach PHY: -5".

This is because when the conversion of the MSCC PHY driver to use the
shared PHY package helpers was done, the base address retrieval and the
base PHY read and write helpers in the driver were modified. In
particular, the base address retrieval logic was moved from the
config_init to the probe. But the vsc8574_probe was forgotten. This
patch fixes it.

Fixes: deb04e9c0ff2 ("net: phy: mscc: use phy_package_shared")
Signed-off-by: Antoine Tenart <antoine.tenart@bootlin.com>
Reviewed-by: Michael Walle <michael@walle.cc>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/mscc/mscc_main.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/phy/mscc/mscc_main.c b/drivers/net/phy/mscc/mscc_main.c
index 550acf547ced..7ed0285206d0 100644
--- a/drivers/net/phy/mscc/mscc_main.c
+++ b/drivers/net/phy/mscc/mscc_main.c
@@ -1977,6 +1977,10 @@ static int vsc8574_probe(struct phy_device *phydev)
 
 	phydev->priv = vsc8531;
 
+	vsc8584_get_base_addr(phydev);
+	devm_phy_package_join(&phydev->mdio.dev, phydev,
+			      vsc8531->base_addr, 0);
+
 	vsc8531->nleds = 4;
 	vsc8531->supp_led_modes = VSC8584_SUPP_LED_MODES;
 	vsc8531->hw_stats = vsc8584_hw_stats;
-- 
cgit v1.2.3-59-g8ed1b


From 830f5ce266ce79e18f0026c36c99319b1bc08e1b Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Fri, 29 May 2020 13:02:07 +0300
Subject: net: phy: marvell: unlock after phy_select_page() failure

We need to call phy_restore_page() even if phy_select_page() fails.
Otherwise we are holding the phy_lock_mdio_bus() lock.  This requirement
is documented at the start of the phy_select_page() function.

Fixes: a618e86da91d ("net : phy: marvell: Speedup TDR data retrieval by only changing page once")
Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/marvell.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/phy/marvell.c b/drivers/net/phy/marvell.c
index 2c04e3b2b285..4ea226566cec 100644
--- a/drivers/net/phy/marvell.c
+++ b/drivers/net/phy/marvell.c
@@ -1867,7 +1867,7 @@ static int marvell_vct5_amplitude_graph(struct phy_device *phydev)
 	 */
 	page = phy_select_page(phydev, MII_MARVELL_VCT5_PAGE);
 	if (page < 0)
-		return page;
+		goto restore_page;
 
 	for (distance = priv->first;
 	     distance <= priv->last;
-- 
cgit v1.2.3-59-g8ed1b


From 40ef92c6ec09bd8aaffccfa41a715d1df5625f95 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 29 May 2020 14:09:40 +0200
Subject: sctp: add sctp_sock_set_nodelay

Add a helper to directly set the SCTP_NODELAY sockopt from kernel space
without going through a fake uaccess.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/dlm/lowcomms.c       | 10 ++--------
 include/net/sctp/sctp.h |  7 +++++++
 2 files changed, 9 insertions(+), 8 deletions(-)

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 69333728d871..9f1c3cdc9d65 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -914,7 +914,6 @@ static int sctp_bind_addrs(struct connection *con, uint16_t port)
 static void sctp_connect_to_sock(struct connection *con)
 {
 	struct sockaddr_storage daddr;
-	int one = 1;
 	int result;
 	int addr_len;
 	struct socket *sock;
@@ -961,8 +960,7 @@ static void sctp_connect_to_sock(struct connection *con)
 	log_print("connecting to %d", con->nodeid);
 
 	/* Turn off Nagle's algorithm */
-	kernel_setsockopt(sock, SOL_SCTP, SCTP_NODELAY, (char *)&one,
-			  sizeof(one));
+	sctp_sock_set_nodelay(sock->sk);
 
 	/*
 	 * Make sock->ops->connect() function return in specified time,
@@ -1176,7 +1174,6 @@ static int sctp_listen_for_all(void)
 	struct socket *sock = NULL;
 	int result = -EINVAL;
 	struct connection *con = nodeid2con(0, GFP_NOFS);
-	int one = 1;
 
 	if (!con)
 		return -ENOMEM;
@@ -1191,10 +1188,7 @@ static int sctp_listen_for_all(void)
 	}
 
 	sock_set_rcvbuf(sock->sk, NEEDED_RMEM);
-	result = kernel_setsockopt(sock, SOL_SCTP, SCTP_NODELAY, (char *)&one,
-				   sizeof(one));
-	if (result < 0)
-		log_print("Could not set SCTP NODELAY error %d\n", result);
+	sctp_sock_set_nodelay(sock->sk);
 
 	write_lock_bh(&sock->sk->sk_callback_lock);
 	/* Init con struct */
diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h
index 3ab5c6bbb90b..f8bcb75bb044 100644
--- a/include/net/sctp/sctp.h
+++ b/include/net/sctp/sctp.h
@@ -615,4 +615,11 @@ static inline bool sctp_newsk_ready(const struct sock *sk)
 	return sock_flag(sk, SOCK_DEAD) || sk->sk_socket;
 }
 
+static inline void sctp_sock_set_nodelay(struct sock *sk)
+{
+	lock_sock(sk);
+	sctp_sk(sk)->nodelay = true;
+	release_sock(sk);
+}
+
 #endif /* __net_sctp_h__ */
-- 
cgit v1.2.3-59-g8ed1b


From 05bfd3661448a46db3a258b316160d34cf0a1317 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 29 May 2020 14:09:41 +0200
Subject: sctp: refactor sctp_setsockopt_bindx

Split out a sctp_setsockopt_bindx_kernel that takes a kernel pointer
to the sockaddr and make sctp_setsockopt_bindx a small wrapper around
it.  This prepares for adding a new bind_add proto op.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sctp/socket.c | 61 +++++++++++++++++++++++++------------------------------
 1 file changed, 28 insertions(+), 33 deletions(-)

diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 827a9903ee28..6e745ac3c4a5 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -972,23 +972,22 @@ int sctp_asconf_mgmt(struct sctp_sock *sp, struct sctp_sockaddr_entry *addrw)
  * it.
  *
  * sk        The sk of the socket
- * addrs     The pointer to the addresses in user land
+ * addrs     The pointer to the addresses
  * addrssize Size of the addrs buffer
  * op        Operation to perform (add or remove, see the flags of
  *           sctp_bindx)
  *
  * Returns 0 if ok, <0 errno code on error.
  */
-static int sctp_setsockopt_bindx(struct sock *sk,
-				 struct sockaddr __user *addrs,
-				 int addrs_size, int op)
+static int sctp_setsockopt_bindx_kernel(struct sock *sk,
+					struct sockaddr *addrs, int addrs_size,
+					int op)
 {
-	struct sockaddr *kaddrs;
 	int err;
 	int addrcnt = 0;
 	int walk_size = 0;
 	struct sockaddr *sa_addr;
-	void *addr_buf;
+	void *addr_buf = addrs;
 	struct sctp_af *af;
 
 	pr_debug("%s: sk:%p addrs:%p addrs_size:%d opt:%d\n",
@@ -997,17 +996,10 @@ static int sctp_setsockopt_bindx(struct sock *sk,
 	if (unlikely(addrs_size <= 0))
 		return -EINVAL;
 
-	kaddrs = memdup_user(addrs, addrs_size);
-	if (IS_ERR(kaddrs))
-		return PTR_ERR(kaddrs);
-
 	/* Walk through the addrs buffer and count the number of addresses. */
-	addr_buf = kaddrs;
 	while (walk_size < addrs_size) {
-		if (walk_size + sizeof(sa_family_t) > addrs_size) {
-			kfree(kaddrs);
+		if (walk_size + sizeof(sa_family_t) > addrs_size)
 			return -EINVAL;
-		}
 
 		sa_addr = addr_buf;
 		af = sctp_get_af_specific(sa_addr->sa_family);
@@ -1015,10 +1007,8 @@ static int sctp_setsockopt_bindx(struct sock *sk,
 		/* If the address family is not supported or if this address
 		 * causes the address buffer to overflow return EINVAL.
 		 */
-		if (!af || (walk_size + af->sockaddr_len) > addrs_size) {
-			kfree(kaddrs);
+		if (!af || (walk_size + af->sockaddr_len) > addrs_size)
 			return -EINVAL;
-		}
 		addrcnt++;
 		addr_buf += af->sockaddr_len;
 		walk_size += af->sockaddr_len;
@@ -1029,31 +1019,36 @@ static int sctp_setsockopt_bindx(struct sock *sk,
 	case SCTP_BINDX_ADD_ADDR:
 		/* Allow security module to validate bindx addresses. */
 		err = security_sctp_bind_connect(sk, SCTP_SOCKOPT_BINDX_ADD,
-						 (struct sockaddr *)kaddrs,
-						 addrs_size);
+						 addrs, addrs_size);
 		if (err)
-			goto out;
-		err = sctp_bindx_add(sk, kaddrs, addrcnt);
+			return err;
+		err = sctp_bindx_add(sk, addrs, addrcnt);
 		if (err)
-			goto out;
-		err = sctp_send_asconf_add_ip(sk, kaddrs, addrcnt);
-		break;
-
+			return err;
+		return sctp_send_asconf_add_ip(sk, addrs, addrcnt);
 	case SCTP_BINDX_REM_ADDR:
-		err = sctp_bindx_rem(sk, kaddrs, addrcnt);
+		err = sctp_bindx_rem(sk, addrs, addrcnt);
 		if (err)
-			goto out;
-		err = sctp_send_asconf_del_ip(sk, kaddrs, addrcnt);
-		break;
+			return err;
+		return sctp_send_asconf_del_ip(sk, addrs, addrcnt);
 
 	default:
-		err = -EINVAL;
-		break;
+		return -EINVAL;
 	}
+}
 
-out:
-	kfree(kaddrs);
+static int sctp_setsockopt_bindx(struct sock *sk,
+				 struct sockaddr __user *addrs,
+				 int addrs_size, int op)
+{
+	struct sockaddr *kaddrs;
+	int err;
 
+	kaddrs = memdup_user(addrs, addrs_size);
+	if (IS_ERR(kaddrs))
+		return PTR_ERR(kaddrs);
+	err = sctp_setsockopt_bindx_kernel(sk, kaddrs, addrs_size, op);
+	kfree(kaddrs);
 	return err;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From c0425a4249e9d313eec5f81c0bde8a286ebf9a63 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 29 May 2020 14:09:42 +0200
Subject: net: add a new bind_add method

The SCTP protocol allows to bind multiple address to a socket.  That
feature is currently only exposed as a socket option.  Add a bind_add
method struct proto that allows to bind additional addresses, and
switch the dlm code to use the method instead of going through the
socket option from kernel space.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 fs/dlm/lowcomms.c  |  9 +++------
 include/net/sock.h |  6 +++++-
 net/core/sock.c    |  8 ++++++++
 net/sctp/socket.c  | 14 ++++++++++++++
 4 files changed, 30 insertions(+), 7 deletions(-)

diff --git a/fs/dlm/lowcomms.c b/fs/dlm/lowcomms.c
index 9f1c3cdc9d65..3543a8fec907 100644
--- a/fs/dlm/lowcomms.c
+++ b/fs/dlm/lowcomms.c
@@ -882,6 +882,7 @@ static void writequeue_entry_complete(struct writequeue_entry *e, int completed)
 static int sctp_bind_addrs(struct connection *con, uint16_t port)
 {
 	struct sockaddr_storage localaddr;
+	struct sockaddr *addr = (struct sockaddr *)&localaddr;
 	int i, addr_len, result = 0;
 
 	for (i = 0; i < dlm_local_count; i++) {
@@ -889,13 +890,9 @@ static int sctp_bind_addrs(struct connection *con, uint16_t port)
 		make_sockaddr(&localaddr, port, &addr_len);
 
 		if (!i)
-			result = kernel_bind(con->sock,
-					     (struct sockaddr *)&localaddr,
-					     addr_len);
+			result = kernel_bind(con->sock, addr, addr_len);
 		else
-			result = kernel_setsockopt(con->sock, SOL_SCTP,
-						   SCTP_SOCKOPT_BINDX_ADD,
-						   (char *)&localaddr, addr_len);
+			result = sock_bind_add(con->sock->sk, addr, addr_len);
 
 		if (result < 0) {
 			log_print("Can't bind to %d addr number %d, %d.\n",
diff --git a/include/net/sock.h b/include/net/sock.h
index d994daa418ec..6e9f713a7860 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -1156,7 +1156,9 @@ struct proto {
 	int			(*sendpage)(struct sock *sk, struct page *page,
 					int offset, size_t size, int flags);
 	int			(*bind)(struct sock *sk,
-					struct sockaddr *uaddr, int addr_len);
+					struct sockaddr *addr, int addr_len);
+	int			(*bind_add)(struct sock *sk,
+					struct sockaddr *addr, int addr_len);
 
 	int			(*backlog_rcv) (struct sock *sk,
 						struct sk_buff *skb);
@@ -2698,4 +2700,6 @@ void sock_set_reuseaddr(struct sock *sk);
 void sock_set_reuseport(struct sock *sk);
 void sock_set_sndtimeo(struct sock *sk, s64 secs);
 
+int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len);
+
 #endif	/* _SOCK_H */
diff --git a/net/core/sock.c b/net/core/sock.c
index 2ca3425b519c..61ec573221a6 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -3712,3 +3712,11 @@ bool sk_busy_loop_end(void *p, unsigned long start_time)
 }
 EXPORT_SYMBOL(sk_busy_loop_end);
 #endif /* CONFIG_NET_RX_BUSY_POLL */
+
+int sock_bind_add(struct sock *sk, struct sockaddr *addr, int addr_len)
+{
+	if (!sk->sk_prot->bind_add)
+		return -EOPNOTSUPP;
+	return sk->sk_prot->bind_add(sk, addr, addr_len);
+}
+EXPORT_SYMBOL(sock_bind_add);
diff --git a/net/sctp/socket.c b/net/sctp/socket.c
index 6e745ac3c4a5..d57e1a002ffc 100644
--- a/net/sctp/socket.c
+++ b/net/sctp/socket.c
@@ -1052,6 +1052,18 @@ static int sctp_setsockopt_bindx(struct sock *sk,
 	return err;
 }
 
+static int sctp_bind_add(struct sock *sk, struct sockaddr *addrs,
+		int addrlen)
+{
+	int err;
+
+	lock_sock(sk);
+	err = sctp_setsockopt_bindx_kernel(sk, addrs, addrlen,
+					   SCTP_BINDX_ADD_ADDR);
+	release_sock(sk);
+	return err;
+}
+
 static int sctp_connect_new_asoc(struct sctp_endpoint *ep,
 				 const union sctp_addr *daddr,
 				 const struct sctp_initmsg *init,
@@ -9620,6 +9632,7 @@ struct proto sctp_prot = {
 	.sendmsg     =	sctp_sendmsg,
 	.recvmsg     =	sctp_recvmsg,
 	.bind        =	sctp_bind,
+	.bind_add    =  sctp_bind_add,
 	.backlog_rcv =	sctp_backlog_rcv,
 	.hash        =	sctp_hash,
 	.unhash      =	sctp_unhash,
@@ -9662,6 +9675,7 @@ struct proto sctpv6_prot = {
 	.sendmsg	= sctp_sendmsg,
 	.recvmsg	= sctp_recvmsg,
 	.bind		= sctp_bind,
+	.bind_add	= sctp_bind_add,
 	.backlog_rcv	= sctp_backlog_rcv,
 	.hash		= sctp_hash,
 	.unhash		= sctp_unhash,
-- 
cgit v1.2.3-59-g8ed1b


From 5a892ff2facb4548c17c05931ed899038a0da63e Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Fri, 29 May 2020 14:09:43 +0200
Subject: net: remove kernel_setsockopt

No users left.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/linux/net.h |  2 --
 net/socket.c        | 31 -------------------------------
 2 files changed, 33 deletions(-)

diff --git a/include/linux/net.h b/include/linux/net.h
index 74ef5d7315f7..e10f378194a5 100644
--- a/include/linux/net.h
+++ b/include/linux/net.h
@@ -303,8 +303,6 @@ int kernel_connect(struct socket *sock, struct sockaddr *addr, int addrlen,
 		   int flags);
 int kernel_getsockname(struct socket *sock, struct sockaddr *addr);
 int kernel_getpeername(struct socket *sock, struct sockaddr *addr);
-int kernel_setsockopt(struct socket *sock, int level, int optname, char *optval,
-		      unsigned int optlen);
 int kernel_sendpage(struct socket *sock, struct page *page, int offset,
 		    size_t size, int flags);
 int kernel_sendpage_locked(struct sock *sk, struct page *page, int offset,
diff --git a/net/socket.c b/net/socket.c
index 81a98b6cbd08..976426d03f09 100644
--- a/net/socket.c
+++ b/net/socket.c
@@ -3624,37 +3624,6 @@ int kernel_getpeername(struct socket *sock, struct sockaddr *addr)
 }
 EXPORT_SYMBOL(kernel_getpeername);
 
-/**
- *	kernel_setsockopt - set a socket option (kernel space)
- *	@sock: socket
- *	@level: API level (SOL_SOCKET, ...)
- *	@optname: option tag
- *	@optval: option value
- *	@optlen: option length
- *
- *	Returns 0 or an error.
- */
-
-int kernel_setsockopt(struct socket *sock, int level, int optname,
-			char *optval, unsigned int optlen)
-{
-	mm_segment_t oldfs = get_fs();
-	char __user *uoptval;
-	int err;
-
-	uoptval = (char __user __force *) optval;
-
-	set_fs(KERNEL_DS);
-	if (level == SOL_SOCKET)
-		err = sock_setsockopt(sock, level, optname, uoptval, optlen);
-	else
-		err = sock->ops->setsockopt(sock, level, optname, uoptval,
-					    optlen);
-	set_fs(oldfs);
-	return err;
-}
-EXPORT_SYMBOL(kernel_setsockopt);
-
 /**
  *	kernel_sendpage - send a &page through a socket (kernel space)
  *	@sock: socket
-- 
cgit v1.2.3-59-g8ed1b


From 04198499b23f9d73a127a17b8576c9266c8f6f9b Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 27 May 2020 19:41:34 +0300
Subject: net: dsa: tag_8021q: stop restoring VLANs from bridge

Right now, our only tag_8021q user, sja1105, has the ability to restore
bridge VLANs on its own, so this logic is unnecessary.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/dsa/tag_8021q.c | 61 +----------------------------------------------------
 1 file changed, 1 insertion(+), 60 deletions(-)

diff --git a/net/dsa/tag_8021q.c b/net/dsa/tag_8021q.c
index 3052da668156..780b2a15ac9b 100644
--- a/net/dsa/tag_8021q.c
+++ b/net/dsa/tag_8021q.c
@@ -140,34 +140,6 @@ bool vid_is_dsa_8021q(u16 vid)
 }
 EXPORT_SYMBOL_GPL(vid_is_dsa_8021q);
 
-static int dsa_8021q_restore_pvid(struct dsa_switch *ds, int port)
-{
-	struct bridge_vlan_info vinfo;
-	struct net_device *slave;
-	u16 pvid;
-	int err;
-
-	if (!dsa_is_user_port(ds, port))
-		return 0;
-
-	slave = dsa_to_port(ds, port)->slave;
-
-	err = br_vlan_get_pvid(slave, &pvid);
-	if (!pvid || err < 0)
-		/* There is no pvid on the bridge for this port, which is
-		 * perfectly valid. Nothing to restore, bye-bye!
-		 */
-		return 0;
-
-	err = br_vlan_get_info(slave, pvid, &vinfo);
-	if (err < 0) {
-		dev_err(ds->dev, "Couldn't determine PVID attributes\n");
-		return err;
-	}
-
-	return dsa_port_vid_add(dsa_to_port(ds, port), pvid, vinfo.flags);
-}
-
 /* If @enabled is true, installs @vid with @flags into the switch port's HW
  * filter.
  * If @enabled is false, deletes @vid (ignores @flags) from the port. Had the
@@ -178,39 +150,11 @@ static int dsa_8021q_vid_apply(struct dsa_switch *ds, int port, u16 vid,
 			       u16 flags, bool enabled)
 {
 	struct dsa_port *dp = dsa_to_port(ds, port);
-	struct bridge_vlan_info vinfo;
-	int err;
 
 	if (enabled)
 		return dsa_port_vid_add(dp, vid, flags);
 
-	err = dsa_port_vid_del(dp, vid);
-	if (err < 0)
-		return err;
-
-	/* Nothing to restore from the bridge for a non-user port.
-	 * The CPU port VLANs are restored implicitly with the user ports,
-	 * similar to how the bridge does in dsa_slave_vlan_add and
-	 * dsa_slave_vlan_del.
-	 */
-	if (!dsa_is_user_port(ds, port))
-		return 0;
-
-	err = br_vlan_get_info(dp->slave, vid, &vinfo);
-	/* Couldn't determine bridge attributes for this vid,
-	 * it means the bridge had not configured it.
-	 */
-	if (err < 0)
-		return 0;
-
-	/* Restore the VID from the bridge */
-	err = dsa_port_vid_add(dp, vid, vinfo.flags);
-	if (err < 0)
-		return err;
-
-	vinfo.flags &= ~BRIDGE_VLAN_INFO_PVID;
-
-	return dsa_port_vid_add(dp->cpu_dp, vid, vinfo.flags);
+	return dsa_port_vid_del(dp, vid);
 }
 
 /* RX VLAN tagging (left) and TX VLAN tagging (right) setup shown for a single
@@ -329,9 +273,6 @@ int dsa_port_setup_8021q_tagging(struct dsa_switch *ds, int port, bool enabled)
 		return err;
 	}
 
-	if (!enabled)
-		err = dsa_8021q_restore_pvid(ds, port);
-
 	return err;
 }
 EXPORT_SYMBOL_GPL(dsa_port_setup_8021q_tagging);
-- 
cgit v1.2.3-59-g8ed1b


From aef31718a923338aff610abef41114a9c0fd37ea Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Wed, 27 May 2020 20:20:38 +0300
Subject: net: dsa: sja1105: avoid invalid state in sja1105_vlan_filtering

Be there 2 switches spi/spi2.0 and spi/spi2.1 in a cross-chip setup,
both under the same VLAN-filtering bridge, both in the
SJA1105_VLAN_BEST_EFFORT state.

If we try to change the VLAN state of one of the switches (to
SJA1105_VLAN_FILTERING_FULL) we get the following error:

devlink dev param set spi/spi2.1 name best_effort_vlan_filtering value
false cmode runtime
[   38.325683] sja1105 spi2.1: Not allowed to overcommit frame memory.
               L2 memory partitions and VL memory partitions share the
               same space. The sum of all 16 memory partitions is not
               allowed to be larger than 929 128-byte blocks (or 910
               with retagging). Please adjust
               l2-forwarding-parameters-table.part_spc and/or
               vl-forwarding-parameters-table.partspc.
[   38.356803] sja1105 spi2.1: Invalid config, cannot upload

This is because the spi/spi2.1 switch doesn't support tagging anymore in
the SJA1105_VLAN_FILTERING_FULL state, so it doesn't need to have any
retagging rules defined. Great, so it can use more frame memory
(retagging consumes extra memory).

But the built-in low-level static config checker from the sja1105 driver
says "not so fast, you've increased the frame memory to non-retagging
values, but you still kept the retagging rules in the static config".

So we need to rebuild the VLAN table immediately before re-uploading the
static config, operation which will take care, based on the new VLAN
state, of removing the retagging rules.

Fixes: 3f01c91aab92 ("net: dsa: sja1105: implement VLAN retagging for dsa_8021q sub-VLANs")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_main.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/dsa/sja1105/sja1105_main.c b/drivers/net/dsa/sja1105/sja1105_main.c
index 36ab527449e6..789b288cc78b 100644
--- a/drivers/net/dsa/sja1105/sja1105_main.c
+++ b/drivers/net/dsa/sja1105/sja1105_main.c
@@ -2746,6 +2746,10 @@ static int sja1105_vlan_filtering(struct dsa_switch *ds, int port, bool enabled)
 
 	sja1105_frame_memory_partitioning(priv);
 
+	rc = sja1105_build_vlan_table(priv, false);
+	if (rc)
+		return rc;
+
 	rc = sja1105_static_config_reload(priv, SJA1105_VLAN_FILTERING);
 	if (rc)
 		dev_err(ds->dev, "Failed to change VLAN Ethertype\n");
-- 
cgit v1.2.3-59-g8ed1b


From 8298a419a0064c6cd6c84a4a45de294a3eff0832 Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Thu, 28 May 2020 07:43:59 +0000
Subject: tipc: remove set but not used variable 'prev'

Fixes gcc '-Wunused-but-set-variable' warning:

net/tipc/msg.c: In function 'tipc_msg_append':
net/tipc/msg.c:215:24: warning:
 variable 'prev' set but not used [-Wunused-but-set-variable]

commit 0a3e060f340d ("tipc: add test for Nagle algorithm effectiveness")
left behind this, remove it.

Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/msg.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/net/tipc/msg.c b/net/tipc/msg.c
index 23809039dda1..c0afcd627c5e 100644
--- a/net/tipc/msg.c
+++ b/net/tipc/msg.c
@@ -212,7 +212,7 @@ err:
 int tipc_msg_append(struct tipc_msg *_hdr, struct msghdr *m, int dlen,
 		    int mss, struct sk_buff_head *txq)
 {
-	struct sk_buff *skb, *prev;
+	struct sk_buff *skb;
 	int accounted, total, curr;
 	int mlen, cpy, rem = dlen;
 	struct tipc_msg *hdr;
@@ -223,7 +223,6 @@ int tipc_msg_append(struct tipc_msg *_hdr, struct msghdr *m, int dlen,
 
 	while (rem) {
 		if (!skb || skb->len >= mss) {
-			prev = skb;
 			skb = tipc_buf_acquire(mss, GFP_KERNEL);
 			if (unlikely(!skb))
 				return -ENOMEM;
-- 
cgit v1.2.3-59-g8ed1b


From 139df98bdfef62868df3d626d27f1db9edd9830f Mon Sep 17 00:00:00 2001
From: Fugang Duan <fugang.duan@nxp.com>
Date: Thu, 28 May 2020 16:26:23 +0800
Subject: stmmac: platform: add "snps, dwmac-5.10a" IP compatible string

Add "snps,dwmac-5.10a" compatible string for 5.10a version that can
avoid to define some plat data in glue layer.

Signed-off-by: Fugang Duan <fugang.duan@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
index bcda49dcf619..f32317fa75c8 100644
--- a/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
+++ b/drivers/net/ethernet/stmicro/stmmac/stmmac_platform.c
@@ -507,7 +507,8 @@ stmmac_probe_config_dt(struct platform_device *pdev, const char **mac)
 
 	if (of_device_is_compatible(np, "snps,dwmac-4.00") ||
 	    of_device_is_compatible(np, "snps,dwmac-4.10a") ||
-	    of_device_is_compatible(np, "snps,dwmac-4.20a")) {
+	    of_device_is_compatible(np, "snps,dwmac-4.20a") ||
+	    of_device_is_compatible(np, "snps,dwmac-5.10a")) {
 		plat->has_gmac4 = 1;
 		plat->has_gmac = 0;
 		plat->pmt = 1;
-- 
cgit v1.2.3-59-g8ed1b


From 94abdad6974a5e108d921df2c38e35cae6179bb2 Mon Sep 17 00:00:00 2001
From: Fugang Duan <fugang.duan@nxp.com>
Date: Thu, 28 May 2020 16:26:24 +0800
Subject: net: ethernet: dwmac: add ethernet glue logic for NXP imx8 chip

NXP imx8 family like imx8mp/imx8dxl chips support Synopsys MAC 5.10a IP.
This patch adds settings for NXP imx8 glue layer:
- clocks
- dwmac address width
- phy interface mode selection
- adjust rgmii txclk rate

v2:
- adjust code sequences in order to have reverse christmas
  tree local variable ordering.

Signed-off-by: Fugang Duan <fugang.duan@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/stmicro/stmmac/Kconfig     |  13 +
 drivers/net/ethernet/stmicro/stmmac/Makefile    |   1 +
 drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c | 315 ++++++++++++++++++++++++
 3 files changed, 329 insertions(+)
 create mode 100644 drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c

diff --git a/drivers/net/ethernet/stmicro/stmmac/Kconfig b/drivers/net/ethernet/stmicro/stmmac/Kconfig
index b46f8d2ae6d7..36bd2e18f23b 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Kconfig
+++ b/drivers/net/ethernet/stmicro/stmmac/Kconfig
@@ -196,6 +196,19 @@ config DWMAC_SUN8I
 	  This selects Allwinner SoC glue layer support for the
 	  stmmac device driver. This driver is used for H3/A83T/A64
 	  EMAC ethernet controller.
+
+config DWMAC_IMX8
+	tristate "NXP IMX8 DWMAC support"
+	default ARCH_MXC
+	depends on OF && (ARCH_MXC || COMPILE_TEST)
+	select MFD_SYSCON
+	---help---
+	  Support for ethernet controller on NXP i.MX8 SOCs.
+
+	  This selects NXP SoC glue layer support for the stmmac
+	  device driver. This driver is used for i.MX8 series like
+	  iMX8MP/iMX8DXL GMAC ethernet controller.
+
 endif
 
 config DWMAC_INTEL
diff --git a/drivers/net/ethernet/stmicro/stmmac/Makefile b/drivers/net/ethernet/stmicro/stmmac/Makefile
index f9d024d6b69b..295615ab36a7 100644
--- a/drivers/net/ethernet/stmicro/stmmac/Makefile
+++ b/drivers/net/ethernet/stmicro/stmmac/Makefile
@@ -27,6 +27,7 @@ obj-$(CONFIG_DWMAC_SUNXI)	+= dwmac-sunxi.o
 obj-$(CONFIG_DWMAC_SUN8I)	+= dwmac-sun8i.o
 obj-$(CONFIG_DWMAC_DWC_QOS_ETH)	+= dwmac-dwc-qos-eth.o
 obj-$(CONFIG_DWMAC_GENERIC)	+= dwmac-generic.o
+obj-$(CONFIG_DWMAC_IMX8)	+= dwmac-imx.o
 stmmac-platform-objs:= stmmac_platform.o
 dwmac-altr-socfpga-objs := altr_tse_pcs.o dwmac-socfpga.o
 
diff --git a/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
new file mode 100644
index 000000000000..5010af7dab4a
--- /dev/null
+++ b/drivers/net/ethernet/stmicro/stmmac/dwmac-imx.c
@@ -0,0 +1,315 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * dwmac-imx.c - DWMAC Specific Glue layer for NXP imx8
+ *
+ * Copyright 2020 NXP
+ *
+ */
+
+#include <linux/clk.h>
+#include <linux/gpio/consumer.h>
+#include <linux/kernel.h>
+#include <linux/mfd/syscon.h>
+#include <linux/module.h>
+#include <linux/of.h>
+#include <linux/of_device.h>
+#include <linux/of_net.h>
+#include <linux/phy.h>
+#include <linux/platform_device.h>
+#include <linux/pm_wakeirq.h>
+#include <linux/regmap.h>
+#include <linux/slab.h>
+#include <linux/stmmac.h>
+
+#include "stmmac_platform.h"
+
+#define GPR_ENET_QOS_INTF_MODE_MASK	GENMASK(21, 16)
+#define GPR_ENET_QOS_INTF_SEL_MII	(0x0 << 16)
+#define GPR_ENET_QOS_INTF_SEL_RMII	(0x4 << 16)
+#define GPR_ENET_QOS_INTF_SEL_RGMII	(0x1 << 16)
+#define GPR_ENET_QOS_CLK_GEN_EN		(0x1 << 19)
+#define GPR_ENET_QOS_CLK_TX_CLK_SEL	(0x1 << 20)
+#define GPR_ENET_QOS_RGMII_EN		(0x1 << 21)
+
+struct imx_dwmac_ops {
+	u32 addr_width;
+	bool mac_rgmii_txclk_auto_adj;
+
+	int (*set_intf_mode)(struct plat_stmmacenet_data *plat_dat);
+};
+
+struct imx_priv_data {
+	struct device *dev;
+	struct clk *clk_tx;
+	struct clk *clk_mem;
+	struct regmap *intf_regmap;
+	u32 intf_reg_off;
+	bool rmii_refclk_ext;
+
+	const struct imx_dwmac_ops *ops;
+	struct plat_stmmacenet_data *plat_dat;
+};
+
+static int imx8mp_set_intf_mode(struct plat_stmmacenet_data *plat_dat)
+{
+	struct imx_priv_data *dwmac = plat_dat->bsp_priv;
+	int val;
+
+	switch (plat_dat->interface) {
+	case PHY_INTERFACE_MODE_MII:
+		val = GPR_ENET_QOS_INTF_SEL_MII;
+		break;
+	case PHY_INTERFACE_MODE_RMII:
+		val = GPR_ENET_QOS_INTF_SEL_RMII;
+		val |= (dwmac->rmii_refclk_ext ? 0 : GPR_ENET_QOS_CLK_TX_CLK_SEL);
+		break;
+	case PHY_INTERFACE_MODE_RGMII:
+	case PHY_INTERFACE_MODE_RGMII_ID:
+	case PHY_INTERFACE_MODE_RGMII_RXID:
+	case PHY_INTERFACE_MODE_RGMII_TXID:
+		val = GPR_ENET_QOS_INTF_SEL_RGMII |
+		      GPR_ENET_QOS_RGMII_EN;
+		break;
+	default:
+		pr_debug("imx dwmac doesn't support %d interface\n",
+			 plat_dat->interface);
+		return -EINVAL;
+	}
+
+	val |= GPR_ENET_QOS_CLK_GEN_EN;
+	return regmap_update_bits(dwmac->intf_regmap, dwmac->intf_reg_off,
+				  GPR_ENET_QOS_INTF_MODE_MASK, val);
+};
+
+static int
+imx8dxl_set_intf_mode(struct plat_stmmacenet_data *plat_dat)
+{
+	int ret = 0;
+
+	/* TBD: depends on imx8dxl scu interfaces to be upstreamed */
+	return ret;
+}
+
+static int imx_dwmac_init(struct platform_device *pdev, void *priv)
+{
+	struct plat_stmmacenet_data *plat_dat;
+	struct imx_priv_data *dwmac = priv;
+	int ret;
+
+	plat_dat = dwmac->plat_dat;
+
+	ret = clk_prepare_enable(dwmac->clk_mem);
+	if (ret) {
+		dev_err(&pdev->dev, "mem clock enable failed\n");
+		return ret;
+	}
+
+	ret = clk_prepare_enable(dwmac->clk_tx);
+	if (ret) {
+		dev_err(&pdev->dev, "tx clock enable failed\n");
+		goto clk_tx_en_failed;
+	}
+
+	if (dwmac->ops->set_intf_mode) {
+		ret = dwmac->ops->set_intf_mode(plat_dat);
+		if (ret)
+			goto intf_mode_failed;
+	}
+
+	return 0;
+
+intf_mode_failed:
+	clk_disable_unprepare(dwmac->clk_tx);
+clk_tx_en_failed:
+	clk_disable_unprepare(dwmac->clk_mem);
+	return ret;
+}
+
+static void imx_dwmac_exit(struct platform_device *pdev, void *priv)
+{
+	struct imx_priv_data *dwmac = priv;
+
+	if (dwmac->clk_tx)
+		clk_disable_unprepare(dwmac->clk_tx);
+	clk_disable_unprepare(dwmac->clk_mem);
+}
+
+static void imx_dwmac_fix_speed(void *priv, unsigned int speed)
+{
+	struct plat_stmmacenet_data *plat_dat;
+	struct imx_priv_data *dwmac = priv;
+	unsigned long rate;
+	int err;
+
+	plat_dat = dwmac->plat_dat;
+
+	if (dwmac->ops->mac_rgmii_txclk_auto_adj ||
+	    (plat_dat->interface == PHY_INTERFACE_MODE_RMII) ||
+	    (plat_dat->interface == PHY_INTERFACE_MODE_MII))
+		return;
+
+	switch (speed) {
+	case SPEED_1000:
+		rate = 125000000;
+		break;
+	case SPEED_100:
+		rate = 25000000;
+		break;
+	case SPEED_10:
+		rate = 2500000;
+		break;
+	default:
+		dev_err(dwmac->dev, "invalid speed %u\n", speed);
+		return;
+	}
+
+	err = clk_set_rate(dwmac->clk_tx, rate);
+	if (err < 0)
+		dev_err(dwmac->dev, "failed to set tx rate %lu\n", rate);
+}
+
+static int
+imx_dwmac_parse_dt(struct imx_priv_data *dwmac, struct device *dev)
+{
+	struct device_node *np = dev->of_node;
+	int err = 0;
+
+	if (of_get_property(np, "snps,rmii_refclk_ext", NULL))
+		dwmac->rmii_refclk_ext = true;
+
+	dwmac->clk_tx = devm_clk_get(dev, "tx");
+	if (IS_ERR(dwmac->clk_tx)) {
+		dev_err(dev, "failed to get tx clock\n");
+		return PTR_ERR(dwmac->clk_tx);
+	}
+
+	dwmac->clk_mem = NULL;
+	if (of_machine_is_compatible("fsl,imx8dxl")) {
+		dwmac->clk_mem = devm_clk_get(dev, "mem");
+		if (IS_ERR(dwmac->clk_mem)) {
+			dev_err(dev, "failed to get mem clock\n");
+			return PTR_ERR(dwmac->clk_mem);
+		}
+	}
+
+	if (of_machine_is_compatible("fsl,imx8mp")) {
+		/* Binding doc describes the propety:
+		   is required by i.MX8MP.
+		   is optinoal for i.MX8DXL.
+		 */
+		dwmac->intf_regmap = syscon_regmap_lookup_by_phandle(np, "intf_mode");
+		if (IS_ERR(dwmac->intf_regmap))
+			return PTR_ERR(dwmac->intf_regmap);
+
+		err = of_property_read_u32_index(np, "intf_mode", 1, &dwmac->intf_reg_off);
+		if (err) {
+			dev_err(dev, "Can't get intf mode reg offset (%d)\n", err);
+			return err;
+		}
+	}
+
+	return err;
+}
+
+static int imx_dwmac_probe(struct platform_device *pdev)
+{
+	struct plat_stmmacenet_data *plat_dat;
+	struct stmmac_resources stmmac_res;
+	struct imx_priv_data *dwmac;
+	const struct imx_dwmac_ops *data;
+	int ret;
+
+	ret = stmmac_get_platform_resources(pdev, &stmmac_res);
+	if (ret)
+		return ret;
+
+	dwmac = devm_kzalloc(&pdev->dev, sizeof(*dwmac), GFP_KERNEL);
+	if (!dwmac)
+		return PTR_ERR(dwmac);
+
+	plat_dat = stmmac_probe_config_dt(pdev, &stmmac_res.mac);
+	if (IS_ERR(plat_dat))
+		return PTR_ERR(plat_dat);
+
+	data = of_device_get_match_data(&pdev->dev);
+	if (!data) {
+		dev_err(&pdev->dev, "failed to get match data\n");
+		ret = -EINVAL;
+		goto err_match_data;
+	}
+
+	dwmac->ops = data;
+	dwmac->dev = &pdev->dev;
+
+	ret = imx_dwmac_parse_dt(dwmac, &pdev->dev);
+	if (ret) {
+		dev_err(&pdev->dev, "failed to parse OF data\n");
+		goto err_parse_dt;
+	}
+
+	ret = dma_set_mask_and_coherent(&pdev->dev,
+					DMA_BIT_MASK(dwmac->ops->addr_width));
+	if (ret) {
+		dev_err(&pdev->dev, "DMA mask set failed\n");
+		goto err_dma_mask;
+	}
+
+	plat_dat->init = imx_dwmac_init;
+	plat_dat->exit = imx_dwmac_exit;
+	plat_dat->fix_mac_speed = imx_dwmac_fix_speed;
+	plat_dat->bsp_priv = dwmac;
+	dwmac->plat_dat = plat_dat;
+
+	ret = imx_dwmac_init(pdev, dwmac);
+	if (ret)
+		goto err_dwmac_init;
+
+	ret = stmmac_dvr_probe(&pdev->dev, plat_dat, &stmmac_res);
+	if (ret)
+		goto err_drv_probe;
+
+	return 0;
+
+err_dwmac_init:
+err_drv_probe:
+	imx_dwmac_exit(pdev, plat_dat->bsp_priv);
+err_dma_mask:
+err_parse_dt:
+err_match_data:
+	stmmac_remove_config_dt(pdev, plat_dat);
+	return ret;
+}
+
+static struct imx_dwmac_ops imx8mp_dwmac_data = {
+	.addr_width = 34,
+	.mac_rgmii_txclk_auto_adj = false,
+	.set_intf_mode = imx8mp_set_intf_mode,
+};
+
+static struct imx_dwmac_ops imx8dxl_dwmac_data = {
+	.addr_width = 32,
+	.mac_rgmii_txclk_auto_adj = true,
+	.set_intf_mode = imx8dxl_set_intf_mode,
+};
+
+static const struct of_device_id imx_dwmac_match[] = {
+	{ .compatible = "nxp,imx8mp-dwmac-eqos", .data = &imx8mp_dwmac_data },
+	{ .compatible = "nxp,imx8dxl-dwmac-eqos", .data = &imx8dxl_dwmac_data },
+	{ }
+};
+MODULE_DEVICE_TABLE(of, imx_dwmac_match);
+
+static struct platform_driver imx_dwmac_driver = {
+	.probe  = imx_dwmac_probe,
+	.remove = stmmac_pltfr_remove,
+	.driver = {
+		.name           = "imx-dwmac",
+		.pm		= &stmmac_pltfr_pm_ops,
+		.of_match_table = imx_dwmac_match,
+	},
+};
+module_platform_driver(imx_dwmac_driver);
+
+MODULE_AUTHOR("NXP");
+MODULE_DESCRIPTION("NXP imx8 DWMAC Specific Glue layer");
+MODULE_LICENSE("GPL v2");
-- 
cgit v1.2.3-59-g8ed1b


From 29884aa6806ce27460281d0ec1819683261e89cd Mon Sep 17 00:00:00 2001
From: Fugang Duan <fugang.duan@nxp.com>
Date: Thu, 28 May 2020 16:26:25 +0800
Subject: dt-bindings: net: imx-dwmac: Add NXP imx8 DWMAC glue layer

Add description for NXP imx8 families like imx8mp/imx8dxl
that integrate the Synopsys gmac IP version 5.10a.

Signed-off-by: Fugang Duan <fugang.duan@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../devicetree/bindings/net/imx-dwmac.txt          | 56 ++++++++++++++++++++++
 1 file changed, 56 insertions(+)
 create mode 100644 Documentation/devicetree/bindings/net/imx-dwmac.txt

diff --git a/Documentation/devicetree/bindings/net/imx-dwmac.txt b/Documentation/devicetree/bindings/net/imx-dwmac.txt
new file mode 100644
index 000000000000..921d522fe8d7
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/imx-dwmac.txt
@@ -0,0 +1,56 @@
+IMX8 glue layer controller, NXP imx8 families support Synopsys MAC 5.10a IP.
+
+This file documents platform glue layer for IMX.
+Please see stmmac.txt for the other unchanged properties.
+
+The device node has following properties.
+
+Required properties:
+- compatible:  Should be "nxp,imx8mp-dwmac-eqos" to select glue layer
+	       and "snps,dwmac-5.10a" to select IP version.
+- clocks: Must contain a phandle for each entry in clock-names.
+- clock-names: Should be "stmmaceth" for the host clock.
+	       Should be "pclk" for the MAC apb clock.
+	       Should be "ptp_ref" for the MAC timer clock.
+	       Should be "tx" for the MAC RGMII TX clock:
+	       Should be "mem" for EQOS MEM clock.
+		- "mem" clock is required for imx8dxl platform.
+		- "mem" clock is not required for imx8mp platform.
+- interrupt-names: Should contain a list of interrupt names corresponding to
+		   the interrupts in the interrupts property, if available.
+		   Should be "macirq" for the main MAC IRQ
+		   Should be "eth_wake_irq" for the IT which wake up system
+- intf_mode: Should be phandle/offset pair. The phandle to the syscon node which
+	     encompases the GPR register, and the offset of the GPR register.
+		- required for imx8mp platform.
+		- is optional for imx8dxl platform.
+
+Optional properties:
+- intf_mode: is optional for imx8dxl platform.
+- snps,rmii_refclk_ext: to select RMII reference clock from external.
+
+Example:
+	eqos: ethernet@30bf0000 {
+		compatible = "nxp,imx8mp-dwmac-eqos", "snps,dwmac-5.10a";
+		reg = <0x30bf0000 0x10000>;
+		interrupts = <GIC_SPI 134 IRQ_TYPE_LEVEL_HIGH>,
+			     <GIC_SPI 135 IRQ_TYPE_LEVEL_HIGH>;
+		interrupt-names = "eth_wake_irq", "macirq";
+		clocks = <&clk IMX8MP_CLK_ENET_QOS_ROOT>,
+			 <&clk IMX8MP_CLK_QOS_ENET_ROOT>,
+			 <&clk IMX8MP_CLK_ENET_QOS_TIMER>,
+			 <&clk IMX8MP_CLK_ENET_QOS>;
+		clock-names = "stmmaceth", "pclk", "ptp_ref", "tx";
+		assigned-clocks = <&clk IMX8MP_CLK_ENET_AXI>,
+				  <&clk IMX8MP_CLK_ENET_QOS_TIMER>,
+				  <&clk IMX8MP_CLK_ENET_QOS>;
+		assigned-clock-parents = <&clk IMX8MP_SYS_PLL1_266M>,
+					 <&clk IMX8MP_SYS_PLL2_100M>,
+					 <&clk IMX8MP_SYS_PLL2_125M>;
+		assigned-clock-rates = <0>, <100000000>, <125000000>;
+		nvmem-cells = <&eth_mac0>;
+		nvmem-cell-names = "mac-address";
+		nvmem_macaddr_swap;
+		intf_mode = <&gpr 0x4>;
+		status = "disabled";
+	};
-- 
cgit v1.2.3-59-g8ed1b


From 91f3fd1124e002f970ef21c2459bf2ba4ac080ee Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Thu, 28 May 2020 15:59:02 +0200
Subject: dt-bindings: net: rename the bindings document for MediaTek STAR EMAC

The driver itself was renamed before getting merged into mainline, but
the binding document kept the old name. This makes both names consistent.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Acked-by: Rob Herring <robh@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../devicetree/bindings/net/mediatek,eth-mac.yaml  | 89 ----------------------
 .../bindings/net/mediatek,star-emac.yaml           | 89 ++++++++++++++++++++++
 2 files changed, 89 insertions(+), 89 deletions(-)
 delete mode 100644 Documentation/devicetree/bindings/net/mediatek,eth-mac.yaml
 create mode 100644 Documentation/devicetree/bindings/net/mediatek,star-emac.yaml

diff --git a/Documentation/devicetree/bindings/net/mediatek,eth-mac.yaml b/Documentation/devicetree/bindings/net/mediatek,eth-mac.yaml
deleted file mode 100644
index f85d91a9d6e5..000000000000
--- a/Documentation/devicetree/bindings/net/mediatek,eth-mac.yaml
+++ /dev/null
@@ -1,89 +0,0 @@
-# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
-%YAML 1.2
----
-$id: http://devicetree.org/schemas/net/mediatek,eth-mac.yaml#
-$schema: http://devicetree.org/meta-schemas/core.yaml#
-
-title: MediaTek STAR Ethernet MAC Controller
-
-maintainers:
-  - Bartosz Golaszewski <bgolaszewski@baylibre.com>
-
-description:
-  This Ethernet MAC is used on the MT8* family of SoCs from MediaTek.
-  It's compliant with 802.3 standards and supports half- and full-duplex
-  modes with flow-control as well as CRC offloading and VLAN tags.
-
-allOf:
-  - $ref: "ethernet-controller.yaml#"
-
-properties:
-  compatible:
-    enum:
-      - mediatek,mt8516-eth
-      - mediatek,mt8518-eth
-      - mediatek,mt8175-eth
-
-  reg:
-    maxItems: 1
-
-  interrupts:
-    maxItems: 1
-
-  clocks:
-    minItems: 3
-    maxItems: 3
-
-  clock-names:
-    additionalItems: false
-    items:
-      - const: core
-      - const: reg
-      - const: trans
-
-  mediatek,pericfg:
-    $ref: /schemas/types.yaml#definitions/phandle
-    description:
-      Phandle to the device containing the PERICFG register range. This is used
-      to control the MII mode.
-
-  mdio:
-    type: object
-    description:
-      Creates and registers an MDIO bus.
-
-required:
-  - compatible
-  - reg
-  - interrupts
-  - clocks
-  - clock-names
-  - mediatek,pericfg
-  - phy-handle
-
-examples:
-  - |
-    #include <dt-bindings/interrupt-controller/arm-gic.h>
-    #include <dt-bindings/clock/mt8516-clk.h>
-
-    ethernet: ethernet@11180000 {
-        compatible = "mediatek,mt8516-eth";
-        reg = <0x11180000 0x1000>;
-        mediatek,pericfg = <&pericfg>;
-        interrupts = <GIC_SPI 111 IRQ_TYPE_LEVEL_LOW>;
-        clocks = <&topckgen CLK_TOP_RG_ETH>,
-                 <&topckgen CLK_TOP_66M_ETH>,
-                 <&topckgen CLK_TOP_133M_ETH>;
-        clock-names = "core", "reg", "trans";
-        phy-handle = <&eth_phy>;
-        phy-mode = "rmii";
-
-        mdio {
-            #address-cells = <1>;
-            #size-cells = <0>;
-
-            eth_phy: ethernet-phy@0 {
-                reg = <0>;
-            };
-        };
-    };
diff --git a/Documentation/devicetree/bindings/net/mediatek,star-emac.yaml b/Documentation/devicetree/bindings/net/mediatek,star-emac.yaml
new file mode 100644
index 000000000000..aea88e621792
--- /dev/null
+++ b/Documentation/devicetree/bindings/net/mediatek,star-emac.yaml
@@ -0,0 +1,89 @@
+# SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause)
+%YAML 1.2
+---
+$id: http://devicetree.org/schemas/net/mediatek,star-emac.yaml#
+$schema: http://devicetree.org/meta-schemas/core.yaml#
+
+title: MediaTek STAR Ethernet MAC Controller
+
+maintainers:
+  - Bartosz Golaszewski <bgolaszewski@baylibre.com>
+
+description:
+  This Ethernet MAC is used on the MT8* family of SoCs from MediaTek.
+  It's compliant with 802.3 standards and supports half- and full-duplex
+  modes with flow-control as well as CRC offloading and VLAN tags.
+
+allOf:
+  - $ref: "ethernet-controller.yaml#"
+
+properties:
+  compatible:
+    enum:
+      - mediatek,mt8516-eth
+      - mediatek,mt8518-eth
+      - mediatek,mt8175-eth
+
+  reg:
+    maxItems: 1
+
+  interrupts:
+    maxItems: 1
+
+  clocks:
+    minItems: 3
+    maxItems: 3
+
+  clock-names:
+    additionalItems: false
+    items:
+      - const: core
+      - const: reg
+      - const: trans
+
+  mediatek,pericfg:
+    $ref: /schemas/types.yaml#definitions/phandle
+    description:
+      Phandle to the device containing the PERICFG register range. This is used
+      to control the MII mode.
+
+  mdio:
+    type: object
+    description:
+      Creates and registers an MDIO bus.
+
+required:
+  - compatible
+  - reg
+  - interrupts
+  - clocks
+  - clock-names
+  - mediatek,pericfg
+  - phy-handle
+
+examples:
+  - |
+    #include <dt-bindings/interrupt-controller/arm-gic.h>
+    #include <dt-bindings/clock/mt8516-clk.h>
+
+    ethernet: ethernet@11180000 {
+        compatible = "mediatek,mt8516-eth";
+        reg = <0x11180000 0x1000>;
+        mediatek,pericfg = <&pericfg>;
+        interrupts = <GIC_SPI 111 IRQ_TYPE_LEVEL_LOW>;
+        clocks = <&topckgen CLK_TOP_RG_ETH>,
+                 <&topckgen CLK_TOP_66M_ETH>,
+                 <&topckgen CLK_TOP_133M_ETH>;
+        clock-names = "core", "reg", "trans";
+        phy-handle = <&eth_phy>;
+        phy-mode = "rmii";
+
+        mdio {
+            #address-cells = <1>;
+            #size-cells = <0>;
+
+            eth_phy: ethernet-phy@0 {
+                reg = <0>;
+            };
+        };
+    };
-- 
cgit v1.2.3-59-g8ed1b


From f0b37fa613989dacbaba57010681218ed91e989b Mon Sep 17 00:00:00 2001
From: Louis Peens <louis.peens@netronome.com>
Date: Thu, 28 May 2020 16:18:46 +0200
Subject: nfp: flower: fix incorrect flag assignment

A previous refactoring missed some locations the flags were renamed
but not moved from the previous flower_ext_feats to the new flower_en_feats
variable. This lead to the FLOW_MERGE and LAG features not being enabled.

Fixes: e09303d3c4d9 ("nfp: flower: renaming of feature bits")
Signed-off-by: Louis Peens <louis.peens@netronome.com>
Signed-off-by: Simon Horman <simon.horman@netronome.com>
Acked-by: Jakub Kicinski <kuba@kernel.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/netronome/nfp/flower/main.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c b/drivers/net/ethernet/netronome/nfp/flower/main.c
index d054553c75e0..ca7032d22196 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.c
@@ -708,7 +708,7 @@ static int nfp_flower_sync_feature_bits(struct nfp_app *app)
 	err = nfp_rtsym_write_le(app->pf->rtbl,
 				 "_abi_flower_balance_sync_enable", 1);
 	if (!err) {
-		app_priv->flower_ext_feats |= NFP_FL_ENABLE_LAG;
+		app_priv->flower_en_feats |= NFP_FL_ENABLE_LAG;
 		nfp_flower_lag_init(&app_priv->nfp_lag);
 	} else if (err == -ENOENT) {
 		nfp_warn(app->cpp, "LAG not supported by FW.\n");
@@ -721,7 +721,7 @@ static int nfp_flower_sync_feature_bits(struct nfp_app *app)
 		err = nfp_rtsym_write_le(app->pf->rtbl,
 					 "_abi_flower_merge_hint_enable", 1);
 		if (!err) {
-			app_priv->flower_ext_feats |= NFP_FL_ENABLE_FLOW_MERGE;
+			app_priv->flower_en_feats |= NFP_FL_ENABLE_FLOW_MERGE;
 			nfp_flower_internal_port_init(app_priv);
 		} else if (err == -ENOENT) {
 			nfp_warn(app->cpp,
@@ -840,7 +840,7 @@ static int nfp_flower_init(struct nfp_app *app)
 	return 0;
 
 err_cleanup:
-	if (app_priv->flower_ext_feats & NFP_FL_ENABLE_LAG)
+	if (app_priv->flower_en_feats & NFP_FL_ENABLE_LAG)
 		nfp_flower_lag_cleanup(&app_priv->nfp_lag);
 	nfp_flower_metadata_cleanup(app);
 err_free_app_priv:
-- 
cgit v1.2.3-59-g8ed1b


From fd55199d3b762f9555c66a1bc4bf6eac3901fc2f Mon Sep 17 00:00:00 2001
From: Andrew Lunn <andrew@lunn.ch>
Date: Thu, 28 May 2020 23:43:24 +0200
Subject: net: ethtool: cabletest: Make ethnl_act_cable_test_tdr_cfg static

kbuild test robot is reporting:
net/ethtool/cabletest.c:230:5: warning: no previous prototype for

Mark the function as static.

Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ethtool/cabletest.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/net/ethtool/cabletest.c b/net/ethtool/cabletest.c
index 9991688d7d1d..7b7a0456c15c 100644
--- a/net/ethtool/cabletest.c
+++ b/net/ethtool/cabletest.c
@@ -227,9 +227,9 @@ cable_test_tdr_act_policy[ETHTOOL_A_CABLE_TEST_TDR_MAX + 1] = {
 };
 
 /* CABLE_TEST_TDR_ACT */
-int ethnl_act_cable_test_tdr_cfg(const struct nlattr *nest,
-				 struct genl_info *info,
-				 struct phy_tdr_config *cfg)
+static int ethnl_act_cable_test_tdr_cfg(const struct nlattr *nest,
+					struct genl_info *info,
+					struct phy_tdr_config *cfg)
 {
 	struct nlattr *tb[ETHTOOL_A_CABLE_TEST_TDR_CFG_MAX + 1];
 	int ret;
-- 
cgit v1.2.3-59-g8ed1b


From bc183dec08f9cb177cf5206a010b7a9e7b22e567 Mon Sep 17 00:00:00 2001
From: Florian Westphal <fw@strlen.de>
Date: Fri, 29 May 2020 00:01:52 +0200
Subject: tcp: tcp_init_buffer_space can be static

As of commit 98fa6271cfcb
("tcp: refactor setting the initial congestion window") this is called
only from tcp_input.c, so it can be static.

Signed-off-by: Florian Westphal <fw@strlen.de>
Reviewed-by: Eric Dumazet <edumazet@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/tcp.h    | 1 -
 net/ipv4/tcp_input.c | 2 +-
 2 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/include/net/tcp.h b/include/net/tcp.h
index 66e4b8331850..bca761ffa25f 100644
--- a/include/net/tcp.h
+++ b/include/net/tcp.h
@@ -662,7 +662,6 @@ void tcp_initialize_rcv_mss(struct sock *sk);
 int tcp_mtu_to_mss(struct sock *sk, int pmtu);
 int tcp_mss_to_mtu(struct sock *sk, int mss);
 void tcp_mtup_init(struct sock *sk);
-void tcp_init_buffer_space(struct sock *sk);
 
 static inline void tcp_bound_rto(const struct sock *sk)
 {
diff --git a/net/ipv4/tcp_input.c b/net/ipv4/tcp_input.c
index ad90102f5dfb..83330a6cb242 100644
--- a/net/ipv4/tcp_input.c
+++ b/net/ipv4/tcp_input.c
@@ -437,7 +437,7 @@ static void tcp_grow_window(struct sock *sk, const struct sk_buff *skb)
 /* 3. Try to fixup all. It is made immediately after connection enters
  *    established state.
  */
-void tcp_init_buffer_space(struct sock *sk)
+static void tcp_init_buffer_space(struct sock *sk)
 {
 	int tcp_app_win = sock_net(sk)->ipv4.sysctl_tcp_app_win;
 	struct tcp_sock *tp = tcp_sk(sk);
-- 
cgit v1.2.3-59-g8ed1b


From 56e2287b4110fbb81456a346b1d4c12bee7cf044 Mon Sep 17 00:00:00 2001
From: Jesper Dangaard Brouer <brouer@redhat.com>
Date: Wed, 27 May 2020 15:44:09 +0200
Subject: mlx5: fix xdp data_meta setup in mlx5e_fill_xdp_buff

The helper function xdp_set_data_meta_invalid() must be called after
setting xdp->data as it depends on it.

The bug was introduced in the cited patch below, and cause the kernel
to crash when using BPF helper bpf_xdp_adjust_head() on mlx5 driver.

Fixes: 39d6443c8daf ("mlx5, xsk: Migrate to new MEM_TYPE_XSK_BUFF_POOL")
Reported-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Jesper Dangaard Brouer <brouer@redhat.com>
Tested-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_rx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
index 6b3c82da199c..dbb1c6323967 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rx.c
@@ -1056,8 +1056,8 @@ static void mlx5e_fill_xdp_buff(struct mlx5e_rq *rq, void *va, u16 headroom,
 				u32 len, struct xdp_buff *xdp)
 {
 	xdp->data_hard_start = va;
-	xdp_set_data_meta_invalid(xdp);
 	xdp->data = va + headroom;
+	xdp_set_data_meta_invalid(xdp);
 	xdp->data_end = xdp->data + len;
 	xdp->rxq = &rq->xdp_rxq;
 	xdp->frame_sz = rq->buff.frame0_sz;
-- 
cgit v1.2.3-59-g8ed1b


From 2950d1d64fd035726b4b060313f931ed52e3615f Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Tue, 26 May 2020 22:09:09 -0700
Subject: net/mlx5: Kconfig: Fix spelling typo

"mdoe"->"mode"

Fixes: d956873f908c ("net/mlx5e: Introduce kconfig var for TC support")
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Reported-by: Marcelo Ricardo Leitner <marcelo.leitner@gmail.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/Kconfig | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
index 4256d59eca2b..b6ffd1622cfd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
+++ b/drivers/net/ethernet/mellanox/mlx5/core/Kconfig
@@ -84,7 +84,7 @@ config MLX5_CLS_ACT
 	default y
 	help
 	  mlx5 ConnectX offloads support for TC classifier action (NET_CLS_ACT),
-	  works in both native NIC mdoe and Switchdev SRIOV mode.
+	  works in both native NIC mode and Switchdev SRIOV mode.
 	  Actions get attached to a Hardware offloaded classifiers and are
 	  invoked after a successful classification. Actions are used to
 	  overwrite the classification result, instantly drop or redirect and/or
-- 
cgit v1.2.3-59-g8ed1b


From 2861904697de587c5b92c65a18a44ca3bbfd00ed Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Wed, 27 May 2020 00:50:22 -0700
Subject: net/mlx5e: Don't use err uninitialized in mlx5e_attach_decap

Clang warns:

drivers/net/ethernet/mellanox/mlx5/core/en_tc.c:3712:6: warning:
variable 'err' is used uninitialized whenever 'if' condition is false
[-Wsometimes-uninitialized]
        if (IS_ERR(d->pkt_reformat)) {
            ^~~~~~~~~~~~~~~~~~~~~~~
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c:3718:6: note:
uninitialized use occurs here
        if (err)
            ^~~
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c:3712:2: note: remove the
'if' if its condition is always true
        if (IS_ERR(d->pkt_reformat)) {
        ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~
drivers/net/ethernet/mellanox/mlx5/core/en_tc.c:3670:9: note: initialize
the variable 'err' to silence this warning
        int err;
               ^
                = 0
1 warning generated.

It is not wrong, err is only ever initialized in if statements but this
one is not in one. Initialize err to 0 to fix this.

Fixes: 14e6b038afa0 ("net/mlx5e: Add support for hw decapsulation of MPLS over UDP")
Link: https://github.com/ClangBuiltLinux/linux/issues/1037
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index 0f119c08b835..ac19a61c5cbc 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -3667,7 +3667,7 @@ static int mlx5e_attach_decap(struct mlx5e_priv *priv,
 	struct mlx5e_decap_entry *d;
 	struct mlx5e_decap_key key;
 	uintptr_t hash_key;
-	int err;
+	int err = 0;
 
 	parse_attr = attr->parse_attr;
 	if (sizeof(parse_attr->eth) > MLX5_CAP_ESW(priv->mdev, max_encap_header_size)) {
-- 
cgit v1.2.3-59-g8ed1b


From e1167e16114f78f948078749aa1608a785f65807 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 28 Apr 2020 23:23:47 +0200
Subject: net/mlx5: reduce stack usage in qp_read_field

Moving the mlx5_ifc_query_qp_out_bits structure on the stack was a bit
excessive and now causes the compiler to complain on 32-bit architectures:

drivers/net/ethernet/mellanox/mlx5/core/debugfs.c: In function 'qp_read_field':
drivers/net/ethernet/mellanox/mlx5/core/debugfs.c:274:1: error: the frame size of 1104 bytes is larger than 1024 bytes [-Werror=frame-larger-than=]

Revert the previous patch partially to use dynamically allocation as
the code did before. Unfortunately there is no good error handling
in case the allocation fails.

Fixes: 57a6c5e992f5 ("net/mlx5: Replace hand written QP context struct with automatic getters")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Saeed Mahameed <saeedm@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/debugfs.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
index 6409090b3ec5..d2d57213511b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
@@ -202,18 +202,23 @@ void mlx5_cq_debugfs_cleanup(struct mlx5_core_dev *dev)
 static u64 qp_read_field(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp,
 			 int index, int *is_str)
 {
-	u32 out[MLX5_ST_SZ_BYTES(query_qp_out)] = {};
+	int outlen = MLX5_ST_SZ_BYTES(query_qp_out);
 	u32 in[MLX5_ST_SZ_DW(query_qp_in)] = {};
 	u64 param = 0;
+	u32 *out;
 	int state;
 	u32 *qpc;
 	int err;
 
+	out = kzalloc(outlen, GFP_KERNEL);
+	if (!out)
+		return 0;
+
 	MLX5_SET(query_qp_in, in, opcode, MLX5_CMD_OP_QUERY_QP);
 	MLX5_SET(query_qp_in, in, qpn, qp->qpn);
 	err = mlx5_cmd_exec_inout(dev, query_qp, in, out);
 	if (err)
-		return 0;
+		goto out;
 
 	*is_str = 0;
 
@@ -269,7 +274,8 @@ static u64 qp_read_field(struct mlx5_core_dev *dev, struct mlx5_core_qp *qp,
 		param = MLX5_GET(qpc, qpc, remote_qpn);
 		break;
 	}
-
+out:
+	kfree(out);
 	return param;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 86ae579cefffe18cb08928505d90fbc87367e8f5 Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Wed, 27 May 2020 08:35:03 +0300
Subject: net: Make mpls_entry_encode() available for generic users

Move mpls_entry_encode() from net/mpls/internal.h to include/net/mpls.h
and make it available for other users. Specifically, hardware driver that
offload MPLS can benefit from that.

Suggested-by: Jakub Kicinski <kuba@kernel.org>
Suggested-by: David Ahern <dsahern@gmail.com>
Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/net/mpls.h  | 17 +++++++++++++++++
 net/mpls/internal.h | 11 -----------
 2 files changed, 17 insertions(+), 11 deletions(-)

diff --git a/include/net/mpls.h b/include/net/mpls.h
index ccaf238e8ea7..0bb7944e7b08 100644
--- a/include/net/mpls.h
+++ b/include/net/mpls.h
@@ -8,6 +8,7 @@
 
 #include <linux/if_ether.h>
 #include <linux/netdevice.h>
+#include <linux/mpls.h>
 
 #define MPLS_HLEN 4
 
@@ -25,4 +26,20 @@ static inline struct mpls_shim_hdr *mpls_hdr(const struct sk_buff *skb)
 {
 	return (struct mpls_shim_hdr *)skb_network_header(skb);
 }
+
+static inline struct mpls_shim_hdr mpls_entry_encode(u32 label,
+						     unsigned int ttl,
+						     unsigned int tc,
+						     bool bos)
+{
+	struct mpls_shim_hdr result;
+
+	result.label_stack_entry =
+		cpu_to_be32((label << MPLS_LS_LABEL_SHIFT) |
+			    (tc << MPLS_LS_TC_SHIFT) |
+			    (bos ? (1 << MPLS_LS_S_SHIFT) : 0) |
+			    (ttl << MPLS_LS_TTL_SHIFT));
+	return result;
+}
+
 #endif
diff --git a/net/mpls/internal.h b/net/mpls/internal.h
index 0e9aa94adc07..838cdfc10e47 100644
--- a/net/mpls/internal.h
+++ b/net/mpls/internal.h
@@ -172,17 +172,6 @@ struct mpls_route { /* next hop label forwarding entry */
 
 #define endfor_nexthops(rt) }
 
-static inline struct mpls_shim_hdr mpls_entry_encode(u32 label, unsigned ttl, unsigned tc, bool bos)
-{
-	struct mpls_shim_hdr result;
-	result.label_stack_entry =
-		cpu_to_be32((label << MPLS_LS_LABEL_SHIFT) |
-			    (tc << MPLS_LS_TC_SHIFT) |
-			    (bos ? (1 << MPLS_LS_S_SHIFT) : 0) |
-			    (ttl << MPLS_LS_TTL_SHIFT));
-	return result;
-}
-
 static inline struct mpls_entry_decoded mpls_entry_decode(struct mpls_shim_hdr *hdr)
 {
 	struct mpls_entry_decoded result;
-- 
cgit v1.2.3-59-g8ed1b


From f7e3ac424a2b3fc6fb3b490106cd874adb62ae4a Mon Sep 17 00:00:00 2001
From: Eli Cohen <eli@mellanox.com>
Date: Wed, 27 May 2020 08:38:03 +0300
Subject: net/mlx5e: Use generic API to build MPLS label

Make use of generic API mpls_entry_encode() to build mpls label and get
rid of local function.

Signed-off-by: Eli Cohen <eli@mellanox.com>
Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 .../ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c | 20 +++-----------------
 1 file changed, 3 insertions(+), 17 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c
index b4a3c96d34fd..1f9526244222 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/tc_tun_mplsoudp.c
@@ -25,35 +25,21 @@ static int init_encap_attr(struct net_device *tunnel_dev,
 	return 0;
 }
 
-static inline __be32 mpls_label_id_field(__be32 label, u8 tos, u8 ttl)
-{
-	u32 res;
-
-	/* mpls label is 32 bits long and construction as follows:
-	 * 20 bits label
-	 * 3 bits tos
-	 * 1 bit bottom of stack. Since we support only one label, this bit is
-	 *       always set.
-	 * 8 bits TTL
-	 */
-	res = be32_to_cpu(label) << 12 | 1 << 8 | (tos & 7) <<  9 | ttl;
-	return cpu_to_be32(res);
-}
-
 static int generate_ip_tun_hdr(char buf[],
 			       __u8 *ip_proto,
 			       struct mlx5e_encap_entry *r)
 {
 	const struct ip_tunnel_key *tun_key = &r->tun_info->key;
-	__be32 tun_id = tunnel_id_to_key32(tun_key->tun_id);
 	struct udphdr *udp = (struct udphdr *)(buf);
 	struct mpls_shim_hdr *mpls;
+	u32 tun_id;
 
+	tun_id = be32_to_cpu(tunnel_id_to_key32(tun_key->tun_id));
 	mpls = (struct mpls_shim_hdr *)(udp + 1);
 	*ip_proto = IPPROTO_UDP;
 
 	udp->dest = tun_key->tp_dst;
-	mpls->label_stack_entry = mpls_label_id_field(tun_id, tun_key->tos, tun_key->ttl);
+	*mpls = mpls_entry_encode(tun_id, tun_key->ttl, tun_key->tos, true);
 
 	return 0;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 618f88c4c40a9621a3105f3ff957a91a148e7d94 Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Thu, 28 May 2020 01:02:08 -0700
Subject: net/mlx5: DR: Fix incorrect type in argument

HW spec objects should receive a void ptr to work on, the MLX5_SET/GET
macro will know how to handle it.

No need to provide explicit or wrong pointer type in this case.

warning: incorrect type in argument 1 (different base types)
    expected unsigned long long const [usertype] *sw_action
    got restricted __be64 [usertype] *[assigned] sw_action

Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
index 554811de4c9d..df1363a34a42 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_action.c
@@ -1662,7 +1662,7 @@ dr_action_modify_check_field_limitation(struct mlx5dr_action *action,
 }
 
 static bool
-dr_action_modify_check_is_ttl_modify(const u64 *sw_action)
+dr_action_modify_check_is_ttl_modify(const void *sw_action)
 {
 	u16 sw_field = MLX5_GET(set_action_in, sw_action, field);
 
-- 
cgit v1.2.3-59-g8ed1b


From c2ba2c2287698bac36bf71e5c4f3be423371bee0 Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Thu, 28 May 2020 01:11:37 -0700
Subject: net/mlx5: DR: Fix cast to restricted __be32

raw_ip actual type is __be32 and not u32.
Fix that and get rid of the warning.

drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c:906:31:
warning: cast to restricted __be32

Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
index 78c884911ceb..470895016693 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
@@ -869,7 +869,7 @@ static void dr_ste_copy_mask_misc(char *mask, struct mlx5dr_match_misc *spec)
 
 static void dr_ste_copy_mask_spec(char *mask, struct mlx5dr_match_spec *spec)
 {
-	u32 raw_ip[4];
+	__be32 raw_ip[4];
 
 	spec->smac_47_16 = MLX5_GET(fte_match_set_lyr_2_4, mask, smac_47_16);
 
-- 
cgit v1.2.3-59-g8ed1b


From 9ff2e92c466dc1aa4d970e5027dfd66b1f32b7bc Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Thu, 28 May 2020 01:14:31 -0700
Subject: net/mlx5: DR: Fix incorrect type in return expression

dr_ste_crc32_calc() calculates crc32 and should return it in HW format.
It is being used to calculate a u32 index, hence we force the return value
of u32 to avoid the sparse warning:

drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c:115:16:
warning: incorrect type in return expression (different base types)
    expected unsigned int
    got restricted __be32 [usertype]

Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
index 470895016693..00c2f598f034 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/steering/dr_ste.c
@@ -112,7 +112,7 @@ static u32 dr_ste_crc32_calc(const void *input_data, size_t length)
 {
 	u32 crc = crc32(0, input_data, length);
 
-	return htonl(crc);
+	return (__force u32)htonl(crc);
 }
 
 u32 mlx5dr_ste_calc_hash_index(u8 *hw_ste_p, struct mlx5dr_ste_htbl *htbl)
-- 
cgit v1.2.3-59-g8ed1b


From 2553f421f44f4db7579f202b79b69046b579c7b5 Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Wed, 27 May 2020 23:16:02 -0700
Subject: net/mlx5: cmd: Fix memset with byte count warning

Fix sparse warning:
drivers/net/ethernet/mellanox/mlx5/core/cmd.c:1949:15:
warning: memset with byte count of 271720

mlx5_cmd_stats array is too big to be held inline in mlx5_cmd.
Allocate it separately.

Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/cmd.c     | 20 ++++++++++++++------
 drivers/net/ethernet/mellanox/mlx5/core/debugfs.c |  2 +-
 include/linux/mlx5/driver.h                       |  2 +-
 3 files changed, 16 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
index eca159e8e123..1d91a0d0ab1d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/cmd.c
@@ -1072,7 +1072,7 @@ static int mlx5_cmd_invoke(struct mlx5_core_dev *dev, struct mlx5_cmd_msg *in,
 
 	ds = ent->ts2 - ent->ts1;
 	op = MLX5_GET(mbox_in, in->first.data, opcode);
-	if (op < ARRAY_SIZE(cmd->stats)) {
+	if (op < MLX5_CMD_OP_MAX) {
 		stats = &cmd->stats[op];
 		spin_lock_irq(&stats->lock);
 		stats->sum += ds;
@@ -1551,7 +1551,7 @@ static void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool force
 
 			if (ent->callback) {
 				ds = ent->ts2 - ent->ts1;
-				if (ent->op < ARRAY_SIZE(cmd->stats)) {
+				if (ent->op < MLX5_CMD_OP_MAX) {
 					stats = &cmd->stats[ent->op];
 					spin_lock_irqsave(&stats->lock, flags);
 					stats->sum += ds;
@@ -1960,10 +1960,16 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev)
 		return -EINVAL;
 	}
 
-	cmd->pool = dma_pool_create("mlx5_cmd", dev->device, size, align, 0);
-	if (!cmd->pool)
+	cmd->stats = kvzalloc(MLX5_CMD_OP_MAX * sizeof(*cmd->stats), GFP_KERNEL);
+	if (!cmd->stats)
 		return -ENOMEM;
 
+	cmd->pool = dma_pool_create("mlx5_cmd", dev->device, size, align, 0);
+	if (!cmd->pool) {
+		err = -ENOMEM;
+		goto dma_pool_err;
+	}
+
 	err = alloc_cmd_page(dev, cmd);
 	if (err)
 		goto err_free_pool;
@@ -1999,7 +2005,7 @@ int mlx5_cmd_init(struct mlx5_core_dev *dev)
 
 	spin_lock_init(&cmd->alloc_lock);
 	spin_lock_init(&cmd->token_lock);
-	for (i = 0; i < ARRAY_SIZE(cmd->stats); i++)
+	for (i = 0; i < MLX5_CMD_OP_MAX; i++)
 		spin_lock_init(&cmd->stats[i].lock);
 
 	sema_init(&cmd->sem, cmd->max_reg_cmds);
@@ -2046,7 +2052,8 @@ err_free_page:
 
 err_free_pool:
 	dma_pool_destroy(cmd->pool);
-
+dma_pool_err:
+	kvfree(cmd->stats);
 	return err;
 }
 EXPORT_SYMBOL(mlx5_cmd_init);
@@ -2060,6 +2067,7 @@ void mlx5_cmd_cleanup(struct mlx5_core_dev *dev)
 	destroy_msg_cache(dev);
 	free_cmd_page(dev, cmd);
 	dma_pool_destroy(cmd->pool);
+	kvfree(cmd->stats);
 }
 EXPORT_SYMBOL(mlx5_cmd_cleanup);
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
index d2d57213511b..07c8d9811bc8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/debugfs.c
@@ -171,7 +171,7 @@ void mlx5_cmdif_debugfs_init(struct mlx5_core_dev *dev)
 	cmd = &dev->priv.cmdif_debugfs;
 	*cmd = debugfs_create_dir("commands", dev->priv.dbg_root);
 
-	for (i = 0; i < ARRAY_SIZE(dev->cmd.stats); i++) {
+	for (i = 0; i < MLX5_CMD_OP_MAX; i++) {
 		stats = &dev->cmd.stats[i];
 		namep = mlx5_command_str(i);
 		if (strcmp(namep, "unknown command opcode")) {
diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h
index 6aa6bbd60559..13c0e4556eda 100644
--- a/include/linux/mlx5/driver.h
+++ b/include/linux/mlx5/driver.h
@@ -298,7 +298,7 @@ struct mlx5_cmd {
 	struct mlx5_cmd_debug dbg;
 	struct cmd_msg_cache cache[MLX5_NUM_COMMAND_CACHES];
 	int checksum_disabled;
-	struct mlx5_cmd_stats stats[MLX5_CMD_OP_MAX];
+	struct mlx5_cmd_stats *stats;
 };
 
 struct mlx5_port_caps {
-- 
cgit v1.2.3-59-g8ed1b


From aee3e9c457f172870bdb87e675faf6c4528190b1 Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Thu, 28 May 2020 18:42:40 -0700
Subject: net/mlx5: Accel: fpga tls fix cast to __be64 and incorrect argument
 types

tls handle and rcd_sn are actually big endian and not in host format.
Fix that.

Fix the following sparse warnings:
drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c:177:21:
warning: cast to restricted __be64

drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c:178:52:
warning: incorrect type in argument 2 (different base types)
    expected unsigned int [usertype] handle
    got restricted __be32 [usertype] handle

Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c    | 4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h    | 8 ++++----
 drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c | 2 +-
 drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c     | 4 ++--
 drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h     | 4 ++--
 5 files changed, 11 insertions(+), 11 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c
index cab708af3422..cbf3d76c05a8 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.c
@@ -56,8 +56,8 @@ void mlx5_accel_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
 	mlx5_fpga_tls_del_flow(mdev, swid, GFP_KERNEL, direction_sx);
 }
 
-int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq,
-			     u64 rcd_sn)
+int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, __be32 handle,
+			     u32 seq, __be64 rcd_sn)
 {
 	return mlx5_fpga_tls_resync_rx(mdev, handle, seq, rcd_sn);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h
index e09bc3858d57..aefea467f7b3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/accel/tls.h
@@ -109,8 +109,8 @@ int mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
 			    bool direction_sx);
 void mlx5_accel_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
 			     bool direction_sx);
-int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq,
-			     u64 rcd_sn);
+int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, __be32 handle,
+			     u32 seq, __be64 rcd_sn);
 bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev);
 u32 mlx5_accel_tls_device_caps(struct mlx5_core_dev *mdev);
 int mlx5_accel_tls_init(struct mlx5_core_dev *mdev);
@@ -125,8 +125,8 @@ mlx5_accel_tls_add_flow(struct mlx5_core_dev *mdev, void *flow,
 			bool direction_sx) { return -ENOTSUPP; }
 static inline void mlx5_accel_tls_del_flow(struct mlx5_core_dev *mdev, u32 swid,
 					   bool direction_sx) { }
-static inline int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle,
-					   u32 seq, u64 rcd_sn) { return 0; }
+static inline int mlx5_accel_tls_resync_rx(struct mlx5_core_dev *mdev, __be32 handle,
+					   u32 seq, __be64 rcd_sn) { return 0; }
 static inline bool mlx5_accel_is_tls_device(struct mlx5_core_dev *mdev)
 {
 	return mlx5_accel_is_ktls_device(mdev);
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c
index c27e9a609d51..1fbb5a90cb38 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_accel/tls.c
@@ -167,7 +167,7 @@ static int mlx5e_tls_resync(struct net_device *netdev, struct sock *sk,
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct mlx5e_priv *priv = netdev_priv(netdev);
 	struct mlx5e_tls_offload_context_rx *rx_ctx;
-	u64 rcd_sn = *(u64 *)rcd_sn_data;
+	__be64 rcd_sn = *(__be64 *)rcd_sn_data;
 
 	if (WARN_ON_ONCE(direction != TLS_OFFLOAD_CTX_DIR_RX))
 		return -EINVAL;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c
index 22a2ef111514..29b7339ebfa3 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.c
@@ -194,8 +194,8 @@ static void mlx5_fpga_tls_flow_to_cmd(void *flow, void *cmd)
 		 MLX5_GET(tls_flow, flow, direction_sx));
 }
 
-int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq,
-			    u64 rcd_sn)
+int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, __be32 handle,
+			    u32 seq, __be64 rcd_sn)
 {
 	struct mlx5_fpga_dma_buf *buf;
 	int size = sizeof(*buf) + MLX5_TLS_COMMAND_SIZE;
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h
index 3b2e37bf76fe..5714cf391d1b 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/fpga/tls.h
@@ -68,7 +68,7 @@ static inline u32 mlx5_fpga_tls_device_caps(struct mlx5_core_dev *mdev)
 	return mdev->fpga->tls->caps;
 }
 
-int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, u32 handle, u32 seq,
-			    u64 rcd_sn);
+int mlx5_fpga_tls_resync_rx(struct mlx5_core_dev *mdev, __be32 handle,
+			    u32 seq, __be64 rcd_sn);
 
 #endif /* __MLX5_FPGA_TLS_H__ */
-- 
cgit v1.2.3-59-g8ed1b


From 44345c4c130ee3df9b9fbc366d59ab3ac707d7f8 Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Fri, 29 May 2020 00:47:12 -0700
Subject: net/mlx5: IPSec: Fix incorrect type for spi

spi is __be32, fix that.

Fixes sparse warning:
drivers/net/ethernet/mellanox/mlx5/core/accel/ipsec.c:74:64
warning: incorrect type

Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 include/linux/mlx5/accel.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/include/linux/mlx5/accel.h b/include/linux/mlx5/accel.h
index b919d143a9a6..96ebaa94a92e 100644
--- a/include/linux/mlx5/accel.h
+++ b/include/linux/mlx5/accel.h
@@ -76,7 +76,7 @@ struct aes_gcm_keymat {
 struct mlx5_accel_esp_xfrm_attrs {
 	enum mlx5_accel_esp_action action;
 	u32   esn;
-	u32   spi;
+	__be32 spi;
 	u32   seq;
 	u32   tfc_pad;
 	u32   flags;
-- 
cgit v1.2.3-59-g8ed1b


From c51323ee7ab4132c80db198b7d0956fef957e6ab Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Wed, 27 May 2020 23:41:03 -0700
Subject: net/mlx5e: en_tc: Fix incorrect type in initializer warnings

Fix some trivial warnings of the type:
warning: incorrect type in initializer (different base types)

Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index ac19a61c5cbc..e866f209f252 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -1873,7 +1873,7 @@ enc_opts_is_dont_care_or_full_match(struct mlx5e_priv *priv,
 		    memchr_inv(opt->opt_data, 0, opt->length * 4)) {
 			*dont_care = false;
 
-			if (opt->opt_class != U16_MAX ||
+			if (opt->opt_class != htons(U16_MAX) ||
 			    opt->type != U8_MAX) {
 				NL_SET_ERR_MSG(extack,
 					       "Partial match of tunnel options in chain > 0 isn't supported");
-- 
cgit v1.2.3-59-g8ed1b


From 58ff18e12c9b3bb860b32e9cac4dc8e12aec2695 Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Thu, 28 May 2020 00:22:12 -0700
Subject: net/mlx5e: en_tc: Fix cast to restricted __be32 warning

Fixes sparse warnings:
warning: cast to restricted __be32
warning: restricted __be32 degrades to integer

Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
Reviewed-by: Mark Bloch <markb@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_tc.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
index e866f209f252..3ce177c24d52 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_tc.c
@@ -210,8 +210,8 @@ mlx5e_tc_match_to_reg_match(struct mlx5_flow_spec *spec,
 	fmask = headers_c + soffset;
 	fval = headers_v + soffset;
 
-	mask = cpu_to_be32(mask) >> (32 - (match_len * 8));
-	data = cpu_to_be32(data) >> (32 - (match_len * 8));
+	mask = (__force u32)(cpu_to_be32(mask)) >> (32 - (match_len * 8));
+	data = (__force u32)(cpu_to_be32(data)) >> (32 - (match_len * 8));
 
 	memcpy(fmask, &mask, match_len);
 	memcpy(fval, &data, match_len);
@@ -2815,10 +2815,10 @@ static int offload_pedit_fields(struct mlx5e_priv *priv,
 			continue;
 
 		if (f->field_bsize == 32) {
-			mask_be32 = (__be32)mask;
+			mask_be32 = (__force __be32)(mask);
 			mask = (__force unsigned long)cpu_to_le32(be32_to_cpu(mask_be32));
 		} else if (f->field_bsize == 16) {
-			mask_be32 = (__be32)mask;
+			mask_be32 = (__force __be32)(mask);
 			mask_be16 = *(__be16 *)&mask_be32;
 			mask = (__force unsigned long)cpu_to_le16(be16_to_cpu(mask_be16));
 		}
-- 
cgit v1.2.3-59-g8ed1b


From eb24387183d37f2f4f456654ef92679b1556f8df Mon Sep 17 00:00:00 2001
From: Saeed Mahameed <saeedm@mellanox.com>
Date: Fri, 29 May 2020 00:36:10 -0700
Subject: net/mlx5e: Make mlx5e_dcbnl_ops static

Fix sparse warning:
drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c:988:29:
error: symbol 'mlx5e_dcbnl_ops' was not declared. Should it be static?

Signed-off-by: Saeed Mahameed <saeedm@mellanox.com>
---
 drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
index ec7b332d74c2..bc102d094bbd 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_dcbnl.c
@@ -985,7 +985,7 @@ static int mlx5e_dcbnl_setbuffer(struct net_device *dev,
 	return err;
 }
 
-const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops = {
+static const struct dcbnl_rtnl_ops mlx5e_dcbnl_ops = {
 	.ieee_getets	= mlx5e_dcbnl_ieee_getets,
 	.ieee_setets	= mlx5e_dcbnl_ieee_setets,
 	.ieee_getmaxrate = mlx5e_dcbnl_ieee_getmaxrate,
-- 
cgit v1.2.3-59-g8ed1b


From c01c320d24ac42802ee5e6db5342477d64a23e8f Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni@codeaurora.org>
Date: Sun, 26 Apr 2020 11:47:32 +0300
Subject: ath9k: Set RX filter based to allow broadcast Action frame RX

Advertise support for multicast frame registration and update the RX
filter based on the recently added FIF_MCAST_ACTION to allow broadcast
Action frames to be received. This is needed for Device Provisioning
Protocol (DPP) use cases that use broadcast Public Action frames.

Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200426084733.7889-1-jouni@codeaurora.org
---
 drivers/net/wireless/ath/ath9k/init.c | 2 ++
 drivers/net/wireless/ath/ath9k/main.c | 1 +
 drivers/net/wireless/ath/ath9k/recv.c | 3 ++-
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath9k/init.c b/drivers/net/wireless/ath/ath9k/init.c
index 17c318902cb8..289a2444d534 100644
--- a/drivers/net/wireless/ath/ath9k/init.c
+++ b/drivers/net/wireless/ath/ath9k/init.c
@@ -1012,6 +1012,8 @@ static void ath9k_set_hw_capab(struct ath_softc *sc, struct ieee80211_hw *hw)
 
 	wiphy_ext_feature_set(hw->wiphy, NL80211_EXT_FEATURE_CQM_RSSI_LIST);
 	wiphy_ext_feature_set(hw->wiphy, NL80211_EXT_FEATURE_AIRTIME_FAIRNESS);
+	wiphy_ext_feature_set(hw->wiphy,
+			      NL80211_EXT_FEATURE_MULTICAST_REGISTRATIONS);
 }
 
 int ath9k_init_device(u16 devid, struct ath_softc *sc,
diff --git a/drivers/net/wireless/ath/ath9k/main.c b/drivers/net/wireless/ath/ath9k/main.c
index 457e9b0d21ca..a47f6e978095 100644
--- a/drivers/net/wireless/ath/ath9k/main.c
+++ b/drivers/net/wireless/ath/ath9k/main.c
@@ -1476,6 +1476,7 @@ static int ath9k_config(struct ieee80211_hw *hw, u32 changed)
 	FIF_OTHER_BSS |				\
 	FIF_BCN_PRBRESP_PROMISC |		\
 	FIF_PROBE_REQ |				\
+	FIF_MCAST_ACTION |			\
 	FIF_FCSFAIL)
 
 /* FIXME: sc->sc_full_reset ? */
diff --git a/drivers/net/wireless/ath/ath9k/recv.c b/drivers/net/wireless/ath/ath9k/recv.c
index 06e660858766..0c0624a3b40d 100644
--- a/drivers/net/wireless/ath/ath9k/recv.c
+++ b/drivers/net/wireless/ath/ath9k/recv.c
@@ -413,7 +413,8 @@ u32 ath_calcrxfilter(struct ath_softc *sc)
 	if (sc->cur_chandef.width != NL80211_CHAN_WIDTH_20_NOHT)
 		rfilt |= ATH9K_RX_FILTER_COMP_BAR;
 
-	if (sc->cur_chan->nvifs > 1 || (sc->cur_chan->rxfilter & FIF_OTHER_BSS)) {
+	if (sc->cur_chan->nvifs > 1 ||
+	    (sc->cur_chan->rxfilter & (FIF_OTHER_BSS | FIF_MCAST_ACTION))) {
 		/* This is needed for older chips */
 		if (sc->sc_ah->hw_version.macVersion <= AR_SREV_VERSION_9160)
 			rfilt |= ATH9K_RX_FILTER_PROM;
-- 
cgit v1.2.3-59-g8ed1b


From 2a9311b5d39ab7e60e612d75fd08c78e358caf4d Mon Sep 17 00:00:00 2001
From: Jouni Malinen <jouni@codeaurora.org>
Date: Sun, 26 Apr 2020 11:47:33 +0300
Subject: ath9k_htc: Set RX filter based to allow broadcast Action frame RX

Advertise support for multicast frame registration and update the RX
filter based on the recently added FIF_MCAST_ACTION to allow broadcast
Action frames to be received. This is needed for Device Provisioning
Protocol (DPP) use cases that use broadcast Public Action frames.

Signed-off-by: Jouni Malinen <jouni@codeaurora.org>
Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200426084733.7889-2-jouni@codeaurora.org
---
 drivers/net/wireless/ath/ath9k/htc_drv_init.c | 2 ++
 drivers/net/wireless/ath/ath9k/htc_drv_main.c | 1 +
 drivers/net/wireless/ath/ath9k/htc_drv_txrx.c | 3 ++-
 3 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_init.c b/drivers/net/wireless/ath/ath9k/htc_drv_init.c
index 40a065028ebe..1d6ad8d46607 100644
--- a/drivers/net/wireless/ath/ath9k/htc_drv_init.c
+++ b/drivers/net/wireless/ath/ath9k/htc_drv_init.c
@@ -780,6 +780,8 @@ static void ath9k_set_hw_capab(struct ath9k_htc_priv *priv,
 	SET_IEEE80211_PERM_ADDR(hw, common->macaddr);
 
 	wiphy_ext_feature_set(hw->wiphy, NL80211_EXT_FEATURE_CQM_RSSI_LIST);
+	wiphy_ext_feature_set(hw->wiphy,
+			      NL80211_EXT_FEATURE_MULTICAST_REGISTRATIONS);
 }
 
 static int ath9k_init_firmware_version(struct ath9k_htc_priv *priv)
diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_main.c b/drivers/net/wireless/ath/ath9k/htc_drv_main.c
index 791f6633667c..2b7832b1c800 100644
--- a/drivers/net/wireless/ath/ath9k/htc_drv_main.c
+++ b/drivers/net/wireless/ath/ath9k/htc_drv_main.c
@@ -1251,6 +1251,7 @@ out:
 	FIF_OTHER_BSS |				\
 	FIF_BCN_PRBRESP_PROMISC |		\
 	FIF_PROBE_REQ |				\
+	FIF_MCAST_ACTION |			\
 	FIF_FCSFAIL)
 
 static void ath9k_htc_configure_filter(struct ieee80211_hw *hw,
diff --git a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
index 118e5550b10c..b353995bdd45 100644
--- a/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
+++ b/drivers/net/wireless/ath/ath9k/htc_drv_txrx.c
@@ -893,7 +893,8 @@ u32 ath9k_htc_calcrxfilter(struct ath9k_htc_priv *priv)
 	if (priv->rxfilter & FIF_PSPOLL)
 		rfilt |= ATH9K_RX_FILTER_PSPOLL;
 
-	if (priv->nvifs > 1 || priv->rxfilter & FIF_OTHER_BSS)
+	if (priv->nvifs > 1 ||
+	    priv->rxfilter & (FIF_OTHER_BSS | FIF_MCAST_ACTION))
 		rfilt |= ATH9K_RX_FILTER_MCAST_BCAST_ALL;
 
 	return rfilt;
-- 
cgit v1.2.3-59-g8ed1b


From 23cc6bb5a2e6a9220c075824c8f68a91a633b547 Mon Sep 17 00:00:00 2001
From: Douglas Anderson <dianders@chromium.org>
Date: Thu, 28 May 2020 12:21:09 -0700
Subject: ath10k: Remove ath10k_qmi_register_service_notifier() declaration

The ath10k/qmi.h header file contains a declaration for the function
ath10k_qmi_register_service_notifier().  This function doesn't exist.
Remove the declaration.

This patch is a no-op and was just found by code inspection.

Fixes: ba94c753ccb4 ("ath10k: add QMI message handshake for wcn3990 client")
Signed-off-by: Douglas Anderson <dianders@chromium.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/20200528122105.1.I31937dce728b441fd72cbe23447bc4710fd56ddb@changeid
---
 drivers/net/wireless/ath/ath10k/qmi.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath10k/qmi.h b/drivers/net/wireless/ath/ath10k/qmi.h
index 16190511318d..89464239fe96 100644
--- a/drivers/net/wireless/ath/ath10k/qmi.h
+++ b/drivers/net/wireless/ath/ath10k/qmi.h
@@ -115,7 +115,6 @@ int ath10k_qmi_wlan_enable(struct ath10k *ar,
 			   enum wlfw_driver_mode_enum_v01 mode,
 			   const char *version);
 int ath10k_qmi_wlan_disable(struct ath10k *ar);
-int ath10k_qmi_register_service_notifier(struct notifier_block *nb);
 int ath10k_qmi_init(struct ath10k *ar, u32 msa_size);
 int ath10k_qmi_deinit(struct ath10k *ar);
 int ath10k_qmi_set_fw_log_mode(struct ath10k *ar, u8 fw_log_mode);
-- 
cgit v1.2.3-59-g8ed1b


From 9529cba988b74091b3a67916b1c119f5b189b8b6 Mon Sep 17 00:00:00 2001
From: Muna Sinada <msinada@codeaurora.org>
Date: Thu, 28 May 2020 15:54:43 -0700
Subject: ath11k: reset trigger frame MAC padding duration

The value was 3 and it's reserved value. Corrected to maintain fw
defaults.

Signed-off-by: Muna Sinada <msinada@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1590706483-27609-1-git-send-email-msinada@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/mac.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c
index 1a7e5817e5c8..00e5aac3deea 100644
--- a/drivers/net/wireless/ath/ath11k/mac.c
+++ b/drivers/net/wireless/ath/ath11k/mac.c
@@ -3562,7 +3562,7 @@ static int ath11k_mac_copy_he_cap(struct ath11k *ar,
 		memcpy(he_cap_elem->phy_cap_info, band_cap->he_cap_phy_info,
 		       sizeof(he_cap_elem->phy_cap_info));
 
-		he_cap_elem->mac_cap_info[1] |=
+		he_cap_elem->mac_cap_info[1] &=
 			IEEE80211_HE_MAC_CAP1_TF_MAC_PAD_DUR_MASK;
 		he_cap_elem->phy_cap_info[4] &=
 			~IEEE80211_HE_PHY_CAP4_BEAMFORMEE_MAX_STS_UNDER_80MHZ_MASK;
-- 
cgit v1.2.3-59-g8ed1b


From 37b76986ebd72aae2c613b94b805e67eaea558b6 Mon Sep 17 00:00:00 2001
From: Muna Sinada <msinada@codeaurora.org>
Date: Thu, 28 May 2020 16:10:17 -0700
Subject: ath11k: clear DCM max constellation tx value

According to 11ax spec. draft 4.0. DCM Max Constellation Tx data
field should be set to "Reserved" for an AP, therefore bit is
cleared.

Signed-off-by: Muna Sinada <msinada@codeaurora.org>
Signed-off-by: Kalle Valo <kvalo@codeaurora.org>
Link: https://lore.kernel.org/r/1590707417-29672-1-git-send-email-msinada@codeaurora.org
---
 drivers/net/wireless/ath/ath11k/mac.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/wireless/ath/ath11k/mac.c b/drivers/net/wireless/ath/ath11k/mac.c
index 00e5aac3deea..2836a0f197ab 100644
--- a/drivers/net/wireless/ath/ath11k/mac.c
+++ b/drivers/net/wireless/ath/ath11k/mac.c
@@ -3578,6 +3578,8 @@ static int ath11k_mac_copy_he_cap(struct ath11k *ar,
 
 		switch (i) {
 		case NL80211_IFTYPE_AP:
+			he_cap_elem->phy_cap_info[3] &=
+				~IEEE80211_HE_PHY_CAP3_DCM_MAX_CONST_TX_MASK;
 			he_cap_elem->phy_cap_info[9] |=
 				IEEE80211_HE_PHY_CAP9_RX_1024_QAM_LESS_THAN_242_TONE_RU;
 			break;
-- 
cgit v1.2.3-59-g8ed1b


From 1f4982ef56f794101cae7ec0fa3b7605f78bd25f Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Sat, 30 May 2020 09:08:27 +0800
Subject: net: hns3: fix a print format issue in hclge_mac_mdio_config()

Use %d to print int variable 'ret' in hclge_mac_mdio_config().

Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
index 696c5ae922e3..e89820702540 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_mdio.c
@@ -155,7 +155,7 @@ int hclge_mac_mdio_config(struct hclge_dev *hdev)
 	ret = mdiobus_register(mdio_bus);
 	if (ret) {
 		dev_err(mdio_bus->parent,
-			"Failed to register MDIO bus ret = %#x\n", ret);
+			"failed to register MDIO bus, ret = %d\n", ret);
 		return ret;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From d62805087e7fbbd7582403b972dd75581256e585 Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Sat, 30 May 2020 09:08:28 +0800
Subject: net: hns3: remove an unused macro hclge_is_csq

Macro hclge_is_csq defined in hcgle_cmd.c has not been used,
so remove it.

Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
index 64a1d0bdd7d1..1d6c328bd9fb 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3pf/hclge_cmd.c
@@ -11,8 +11,6 @@
 #include "hnae3.h"
 #include "hclge_main.h"
 
-#define hclge_is_csq(ring) ((ring)->flag & HCLGE_TYPE_CSQ)
-
 #define cmq_ring_to_dev(ring)   (&(ring)->dev->pdev->dev)
 
 static int hclge_ring_space(struct hclge_cmq_ring *ring)
-- 
cgit v1.2.3-59-g8ed1b


From fc68aed15648c70db0377a6abd2b34ec328dd12a Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Sat, 30 May 2020 09:08:29 +0800
Subject: net: hns3: remove two unused macros in hclgevf_cmd.c

Macro hclgevf_ring_to_dma_dir and hclgevf_is_csq defined in
hclgevf_cmd.c, but not used, so remove them.

Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
index f38d236ebf4f..fec65239a3c8 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_cmd.c
@@ -11,9 +11,6 @@
 #include "hclgevf_main.h"
 #include "hnae3.h"
 
-#define hclgevf_is_csq(ring) ((ring)->flag & HCLGEVF_TYPE_CSQ)
-#define hclgevf_ring_to_dma_dir(ring) (hclgevf_is_csq(ring) ? \
-					DMA_TO_DEVICE : DMA_FROM_DEVICE)
 #define cmq_ring_to_dev(ring)   (&(ring)->dev->pdev->dev)
 
 static int hclgevf_ring_space(struct hclgevf_cmq_ring *ring)
-- 
cgit v1.2.3-59-g8ed1b


From ec4d9392207aad5db32cc518c4a5c8b7f1057fa1 Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Sat, 30 May 2020 09:08:30 +0800
Subject: net: hns3: fix an incorrect comment for num_tqps in struct
 hclgevf_dev

struct hclgevf_dev stands for VF device, its field num_tqps
indicates the number of VF's task queue pairs, so the comment
is incorrect, replace 'PF' with 'VF'.

Reported-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
index 738de124cfc4..c1fac8920ae3 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.h
@@ -278,7 +278,7 @@ struct hclgevf_dev {
 	struct semaphore reset_sem;	/* protect reset process */
 
 	u32 fw_version;
-	u16 num_tqps;		/* num task queue pairs of this PF */
+	u16 num_tqps;		/* num task queue pairs of this VF */
 
 	u16 alloc_rss_size;	/* allocated RSS task queue */
 	u16 rss_size_max;	/* HW defined max RSS task queue */
-- 
cgit v1.2.3-59-g8ed1b


From 2adb8187e5439e5066c9893586e5079e89f9060a Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Sat, 30 May 2020 09:08:31 +0800
Subject: net: hns3: fix two coding style issues in hclgevf_main.c

Remove a redundant blank line in hclgevf_cmd_set_promisc_mode(),
and fix a reverse xmas tree coding style issue in
hclgevf_set_rss_tc_mode().

Reported-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
index a8c0e79901f5..1b9578d0bd80 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3vf/hclgevf_main.c
@@ -669,8 +669,8 @@ static int hclgevf_set_rss_tc_mode(struct hclgevf_dev *hdev,  u16 rss_size)
 	u16 tc_size[HCLGEVF_MAX_TC_NUM];
 	struct hclgevf_desc desc;
 	u16 roundup_size;
-	int status;
 	unsigned int i;
+	int status;
 
 	req = (struct hclgevf_rss_tc_mode_cmd *)desc.data;
 
@@ -1143,7 +1143,6 @@ static int hclgevf_cmd_set_promisc_mode(struct hclgevf_dev *hdev,
 	send_msg.en_mc = en_mc_pmc ? 1 : 0;
 
 	ret = hclgevf_send_mbx_msg(hdev, &send_msg, false, NULL, 0);
-
 	if (ret)
 		dev_err(&hdev->pdev->dev,
 			"Set promisc mode fail, status is %d.\n", ret);
-- 
cgit v1.2.3-59-g8ed1b


From 996aade998ac0e9f6f0bf09531c32f1106d9d559 Mon Sep 17 00:00:00 2001
From: Huazhong Tan <tanhuazhong@huawei.com>
Date: Sat, 30 May 2020 09:08:32 +0800
Subject: net: hns3: remove some unused codes in hns3_nic_set_features()

NETIF_F_HW_VLAN_CTAG_FILTER is not set in netdev->hw_feature for
the HNS3 driver, so the handler of NETIF_F_HW_VLAN_CTAG_FILTER
in hns3_nic_set_features() won't be called, remove it.

Reported-by: Jian Shen <shenjian15@huawei.com>
Signed-off-by: Huazhong Tan <tanhuazhong@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/hisilicon/hns3/hns3_enet.c | 6 ------
 1 file changed, 6 deletions(-)

diff --git a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
index 9fe40c7773b4..b14f2abc2425 100644
--- a/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
+++ b/drivers/net/ethernet/hisilicon/hns3/hns3_enet.c
@@ -1544,12 +1544,6 @@ static int hns3_nic_set_features(struct net_device *netdev,
 			return ret;
 	}
 
-	if ((changed & NETIF_F_HW_VLAN_CTAG_FILTER) &&
-	    h->ae_algo->ops->enable_vlan_filter) {
-		enable = !!(features & NETIF_F_HW_VLAN_CTAG_FILTER);
-		h->ae_algo->ops->enable_vlan_filter(h, enable);
-	}
-
 	if ((changed & NETIF_F_HW_VLAN_CTAG_RX) &&
 	    h->ae_algo->ops->enable_hw_strip_rxvtag) {
 		enable = !!(features & NETIF_F_HW_VLAN_CTAG_RX);
-- 
cgit v1.2.3-59-g8ed1b


From 53bd63afbd659017d20dfb7ac3a53ceb8cbb338a Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sat, 30 May 2020 13:29:52 +0300
Subject: net: dsa: sja1105: suppress -Wmissing-prototypes in
 sja1105_static_config.c

Newer compilers complain with W=1 builds that there are non-static
functions defined in sja1105_static_config.c that don't have a
prototype, because their prototype is defined in sja1105.h which this
translation unit does not include.

I don't entirely understand what is the point of these warnings, since
in principle there's nothing wrong with that. But let's move the
prototypes to a header file that _is_ included by
sja1105_static_config.c, since that will make these warnings go away.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105.h               | 18 ------------------
 drivers/net/dsa/sja1105/sja1105_static_config.h | 18 ++++++++++++++++++
 2 files changed, 18 insertions(+), 18 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105.h b/drivers/net/dsa/sja1105/sja1105.h
index cb3c81a49fbc..29ed21687295 100644
--- a/drivers/net/dsa/sja1105/sja1105.h
+++ b/drivers/net/dsa/sja1105/sja1105.h
@@ -323,24 +323,6 @@ int sja1105pqrs_fdb_add(struct dsa_switch *ds, int port,
 int sja1105pqrs_fdb_del(struct dsa_switch *ds, int port,
 			const unsigned char *addr, u16 vid);
 
-/* Common implementations for the static and dynamic configs */
-size_t sja1105_l2_forwarding_entry_packing(void *buf, void *entry_ptr,
-					   enum packing_op op);
-size_t sja1105pqrs_l2_lookup_entry_packing(void *buf, void *entry_ptr,
-					   enum packing_op op);
-size_t sja1105et_l2_lookup_entry_packing(void *buf, void *entry_ptr,
-					 enum packing_op op);
-size_t sja1105_vlan_lookup_entry_packing(void *buf, void *entry_ptr,
-					 enum packing_op op);
-size_t sja1105_retagging_entry_packing(void *buf, void *entry_ptr,
-				       enum packing_op op);
-size_t sja1105pqrs_mac_config_entry_packing(void *buf, void *entry_ptr,
-					    enum packing_op op);
-size_t sja1105pqrs_avb_params_entry_packing(void *buf, void *entry_ptr,
-					    enum packing_op op);
-size_t sja1105_vl_lookup_entry_packing(void *buf, void *entry_ptr,
-				       enum packing_op op);
-
 /* From sja1105_flower.c */
 int sja1105_cls_flower_del(struct dsa_switch *ds, int port,
 			   struct flow_cls_offload *cls, bool ingress);
diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.h b/drivers/net/dsa/sja1105/sja1105_static_config.h
index 9b62b9b5549d..8279f4f31eff 100644
--- a/drivers/net/dsa/sja1105/sja1105_static_config.h
+++ b/drivers/net/dsa/sja1105/sja1105_static_config.h
@@ -430,4 +430,22 @@ void sja1105_unpack(const void *buf, u64 *val, int start, int end, size_t len);
 void sja1105_packing(void *buf, u64 *val, int start, int end,
 		     size_t len, enum packing_op op);
 
+/* Common implementations for the static and dynamic configs */
+size_t sja1105_l2_forwarding_entry_packing(void *buf, void *entry_ptr,
+					   enum packing_op op);
+size_t sja1105pqrs_l2_lookup_entry_packing(void *buf, void *entry_ptr,
+					   enum packing_op op);
+size_t sja1105et_l2_lookup_entry_packing(void *buf, void *entry_ptr,
+					 enum packing_op op);
+size_t sja1105_vlan_lookup_entry_packing(void *buf, void *entry_ptr,
+					 enum packing_op op);
+size_t sja1105_retagging_entry_packing(void *buf, void *entry_ptr,
+				       enum packing_op op);
+size_t sja1105pqrs_mac_config_entry_packing(void *buf, void *entry_ptr,
+					    enum packing_op op);
+size_t sja1105pqrs_avb_params_entry_packing(void *buf, void *entry_ptr,
+					    enum packing_op op);
+size_t sja1105_vl_lookup_entry_packing(void *buf, void *entry_ptr,
+				       enum packing_op op);
+
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From 99b981f431323e31c279ee7aee20a4c501a1e89d Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sat, 30 May 2020 13:29:53 +0300
Subject: net: dsa: sja1105: fix port mirroring for P/Q/R/S

The dynamic configuration interface for the General Params and the L2
Lookup Params tables was copy-pasted between E/T devices and P/Q/R/S
devices. Nonetheless, these interfaces are bitwise different.

The driver is using dynamic reconfiguration of the General Parameters
table for the port mirroring feature, which was therefore broken on
P/Q/R/S.

Note that this patch can't be backported easily very far to stable trees
(since it conflicts with some other development done since the
introduction of the driver). So the Fixes: tag is purely informational.

Fixes: 8aa9ebccae87 ("net: dsa: Introduce driver for NXP SJA1105 5-port L2 switch")
Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_dynamic_config.c | 50 +++++++++++++++++++-----
 drivers/net/dsa/sja1105/sja1105_static_config.c  | 10 ++---
 drivers/net/dsa/sja1105/sja1105_static_config.h  |  4 ++
 3 files changed, 48 insertions(+), 16 deletions(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
index 7516f2ffdd4e..4471eeccc293 100644
--- a/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
+++ b/drivers/net/dsa/sja1105/sja1105_dynamic_config.c
@@ -127,9 +127,15 @@
 #define SJA1105ET_SIZE_L2_LOOKUP_PARAMS_DYN_CMD			\
 	SJA1105_SIZE_DYN_CMD
 
+#define SJA1105PQRS_SIZE_L2_LOOKUP_PARAMS_DYN_CMD		\
+	(SJA1105_SIZE_DYN_CMD + SJA1105PQRS_SIZE_L2_LOOKUP_PARAMS_ENTRY)
+
 #define SJA1105ET_SIZE_GENERAL_PARAMS_DYN_CMD			\
 	SJA1105_SIZE_DYN_CMD
 
+#define SJA1105PQRS_SIZE_GENERAL_PARAMS_DYN_CMD			\
+	(SJA1105_SIZE_DYN_CMD + SJA1105PQRS_SIZE_GENERAL_PARAMS_ENTRY)
+
 #define SJA1105PQRS_SIZE_AVB_PARAMS_DYN_CMD			\
 	(SJA1105_SIZE_DYN_CMD + SJA1105PQRS_SIZE_AVB_PARAMS_ENTRY)
 
@@ -143,7 +149,7 @@
 	(SJA1105_SIZE_DYN_CMD + SJA1105PQRS_SIZE_CBS_ENTRY)
 
 #define SJA1105_MAX_DYN_CMD_SIZE				\
-	SJA1105PQRS_SIZE_MAC_CONFIG_DYN_CMD
+	SJA1105PQRS_SIZE_GENERAL_PARAMS_DYN_CMD
 
 struct sja1105_dyn_cmd {
 	bool search;
@@ -500,6 +506,18 @@ sja1105et_l2_lookup_params_entry_packing(void *buf, void *entry_ptr,
 	return 0;
 }
 
+static void
+sja1105pqrs_l2_lookup_params_cmd_packing(void *buf,
+					 struct sja1105_dyn_cmd *cmd,
+					 enum packing_op op)
+{
+	u8 *p = buf + SJA1105PQRS_SIZE_L2_LOOKUP_PARAMS_ENTRY;
+	const int size = SJA1105_SIZE_DYN_CMD;
+
+	sja1105_packing(p, &cmd->valid,   31, 31, size, op);
+	sja1105_packing(p, &cmd->rdwrset, 30, 30, size, op);
+}
+
 static void
 sja1105et_general_params_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
 				     enum packing_op op)
@@ -522,6 +540,18 @@ sja1105et_general_params_entry_packing(void *buf, void *entry_ptr,
 	return 0;
 }
 
+static void
+sja1105pqrs_general_params_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
+				       enum packing_op op)
+{
+	u8 *p = buf + SJA1105PQRS_SIZE_GENERAL_PARAMS_ENTRY;
+	const int size = SJA1105_SIZE_DYN_CMD;
+
+	sja1105_packing(p, &cmd->valid,   31, 31, size, op);
+	sja1105_packing(p, &cmd->errors,  30, 30, size, op);
+	sja1105_packing(p, &cmd->rdwrset, 28, 28, size, op);
+}
+
 static void
 sja1105pqrs_avb_params_cmd_packing(void *buf, struct sja1105_dyn_cmd *cmd,
 				   enum packing_op op)
@@ -761,12 +791,12 @@ struct sja1105_dynamic_table_ops sja1105pqrs_dyn_ops[BLK_IDX_MAX_DYN] = {
 	[BLK_IDX_SCHEDULE_ENTRY_POINTS_PARAMS] = {0},
 	[BLK_IDX_VL_FORWARDING_PARAMS] = {0},
 	[BLK_IDX_L2_LOOKUP_PARAMS] = {
-		.entry_packing = sja1105et_l2_lookup_params_entry_packing,
-		.cmd_packing = sja1105et_l2_lookup_params_cmd_packing,
+		.entry_packing = sja1105pqrs_l2_lookup_params_entry_packing,
+		.cmd_packing = sja1105pqrs_l2_lookup_params_cmd_packing,
 		.max_entry_count = SJA1105_MAX_L2_LOOKUP_PARAMS_COUNT,
 		.access = (OP_READ | OP_WRITE),
-		.packed_size = SJA1105ET_SIZE_L2_LOOKUP_PARAMS_DYN_CMD,
-		.addr = 0x38,
+		.packed_size = SJA1105PQRS_SIZE_L2_LOOKUP_PARAMS_DYN_CMD,
+		.addr = 0x54,
 	},
 	[BLK_IDX_L2_FORWARDING_PARAMS] = {0},
 	[BLK_IDX_AVB_PARAMS] = {
@@ -778,12 +808,12 @@ struct sja1105_dynamic_table_ops sja1105pqrs_dyn_ops[BLK_IDX_MAX_DYN] = {
 		.addr = 0x8003,
 	},
 	[BLK_IDX_GENERAL_PARAMS] = {
-		.entry_packing = sja1105et_general_params_entry_packing,
-		.cmd_packing = sja1105et_general_params_cmd_packing,
+		.entry_packing = sja1105pqrs_general_params_entry_packing,
+		.cmd_packing = sja1105pqrs_general_params_cmd_packing,
 		.max_entry_count = SJA1105_MAX_GENERAL_PARAMS_COUNT,
-		.access = OP_WRITE,
-		.packed_size = SJA1105ET_SIZE_GENERAL_PARAMS_DYN_CMD,
-		.addr = 0x34,
+		.access = (OP_READ | OP_WRITE),
+		.packed_size = SJA1105PQRS_SIZE_GENERAL_PARAMS_DYN_CMD,
+		.addr = 0x3B,
 	},
 	[BLK_IDX_RETAGGING] = {
 		.entry_packing = sja1105_retagging_entry_packing,
diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.c b/drivers/net/dsa/sja1105/sja1105_static_config.c
index 780aca034cdc..ff3fe471efc2 100644
--- a/drivers/net/dsa/sja1105/sja1105_static_config.c
+++ b/drivers/net/dsa/sja1105/sja1105_static_config.c
@@ -146,9 +146,8 @@ static size_t sja1105et_general_params_entry_packing(void *buf, void *entry_ptr,
 /* TPID and TPID2 are intentionally reversed so that semantic
  * compatibility with E/T is kept.
  */
-static size_t
-sja1105pqrs_general_params_entry_packing(void *buf, void *entry_ptr,
-					 enum packing_op op)
+size_t sja1105pqrs_general_params_entry_packing(void *buf, void *entry_ptr,
+						enum packing_op op)
 {
 	const size_t size = SJA1105PQRS_SIZE_GENERAL_PARAMS_ENTRY;
 	struct sja1105_general_params_entry *entry = entry_ptr;
@@ -228,9 +227,8 @@ sja1105et_l2_lookup_params_entry_packing(void *buf, void *entry_ptr,
 	return size;
 }
 
-static size_t
-sja1105pqrs_l2_lookup_params_entry_packing(void *buf, void *entry_ptr,
-					   enum packing_op op)
+size_t sja1105pqrs_l2_lookup_params_entry_packing(void *buf, void *entry_ptr,
+						  enum packing_op op)
 {
 	const size_t size = SJA1105PQRS_SIZE_L2_LOOKUP_PARAMS_ENTRY;
 	struct sja1105_l2_lookup_params_entry *entry = entry_ptr;
diff --git a/drivers/net/dsa/sja1105/sja1105_static_config.h b/drivers/net/dsa/sja1105/sja1105_static_config.h
index 8279f4f31eff..ee0f10062763 100644
--- a/drivers/net/dsa/sja1105/sja1105_static_config.h
+++ b/drivers/net/dsa/sja1105/sja1105_static_config.h
@@ -431,6 +431,10 @@ void sja1105_packing(void *buf, u64 *val, int start, int end,
 		     size_t len, enum packing_op op);
 
 /* Common implementations for the static and dynamic configs */
+size_t sja1105pqrs_general_params_entry_packing(void *buf, void *entry_ptr,
+						enum packing_op op);
+size_t sja1105pqrs_l2_lookup_params_entry_packing(void *buf, void *entry_ptr,
+						  enum packing_op op);
 size_t sja1105_l2_forwarding_entry_packing(void *buf, void *entry_ptr,
 					   enum packing_op op);
 size_t sja1105pqrs_l2_lookup_entry_packing(void *buf, void *entry_ptr,
-- 
cgit v1.2.3-59-g8ed1b


From b8ded9de8db34dd209a3dece94cf54fc414e78f7 Mon Sep 17 00:00:00 2001
From: Karsten Graul <kgraul@linux.ibm.com>
Date: Sat, 30 May 2020 16:42:37 +0200
Subject: net/smc: pre-fetch send buffer outside of send_lock

Pre-fetch send buffer for the CDC validation message before entering the
send_lock. Without that the send call might fail with -EBUSY because
there are no free buffers and waiting for buffers is not possible under
send_lock.

Signed-off-by: Karsten Graul <kgraul@linux.ibm.com>
Reviewed-by: Ursula Braun <ubraun@linux.ibm.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/smc/smc_cdc.c  | 10 +++-------
 net/smc/smc_cdc.h  |  4 +++-
 net/smc/smc_core.c | 18 +++++++++++++++---
 3 files changed, 21 insertions(+), 11 deletions(-)

diff --git a/net/smc/smc_cdc.c b/net/smc/smc_cdc.c
index b2b85e1be72c..a47e8855e045 100644
--- a/net/smc/smc_cdc.c
+++ b/net/smc/smc_cdc.c
@@ -116,19 +116,15 @@ int smc_cdc_msg_send(struct smc_connection *conn,
 }
 
 /* send a validation msg indicating the move of a conn to an other QP link */
-int smcr_cdc_msg_send_validation(struct smc_connection *conn)
+int smcr_cdc_msg_send_validation(struct smc_connection *conn,
+				 struct smc_cdc_tx_pend *pend,
+				 struct smc_wr_buf *wr_buf)
 {
 	struct smc_host_cdc_msg *local = &conn->local_tx_ctrl;
 	struct smc_link *link = conn->lnk;
-	struct smc_cdc_tx_pend *pend;
-	struct smc_wr_buf *wr_buf;
 	struct smc_cdc_msg *peer;
 	int rc;
 
-	rc = smc_cdc_get_free_slot(conn, link, &wr_buf, NULL, &pend);
-	if (rc)
-		return rc;
-
 	peer = (struct smc_cdc_msg *)wr_buf;
 	peer->common.type = local->common.type;
 	peer->len = local->len;
diff --git a/net/smc/smc_cdc.h b/net/smc/smc_cdc.h
index 2ddcc5fb5ceb..0a0a89abd38b 100644
--- a/net/smc/smc_cdc.h
+++ b/net/smc/smc_cdc.h
@@ -296,7 +296,9 @@ int smc_cdc_msg_send(struct smc_connection *conn, struct smc_wr_buf *wr_buf,
 		     struct smc_cdc_tx_pend *pend);
 int smc_cdc_get_slot_and_msg_send(struct smc_connection *conn);
 int smcd_cdc_msg_send(struct smc_connection *conn);
-int smcr_cdc_msg_send_validation(struct smc_connection *conn);
+int smcr_cdc_msg_send_validation(struct smc_connection *conn,
+				 struct smc_cdc_tx_pend *pend,
+				 struct smc_wr_buf *wr_buf);
 int smc_cdc_init(void) __init;
 void smcd_cdc_rx_init(struct smc_connection *conn);
 
diff --git a/net/smc/smc_core.c b/net/smc/smc_core.c
index 65de700e1f17..7964a21e5e6f 100644
--- a/net/smc/smc_core.c
+++ b/net/smc/smc_core.c
@@ -483,7 +483,8 @@ static int smc_write_space(struct smc_connection *conn)
 	return space;
 }
 
-static int smc_switch_cursor(struct smc_sock *smc)
+static int smc_switch_cursor(struct smc_sock *smc, struct smc_cdc_tx_pend *pend,
+			     struct smc_wr_buf *wr_buf)
 {
 	struct smc_connection *conn = &smc->conn;
 	union smc_host_cursor cons, fin;
@@ -520,11 +521,14 @@ static int smc_switch_cursor(struct smc_sock *smc)
 
 	if (smc->sk.sk_state != SMC_INIT &&
 	    smc->sk.sk_state != SMC_CLOSED) {
-		rc = smcr_cdc_msg_send_validation(conn);
+		rc = smcr_cdc_msg_send_validation(conn, pend, wr_buf);
 		if (!rc) {
 			schedule_delayed_work(&conn->tx_work, 0);
 			smc->sk.sk_data_ready(&smc->sk);
 		}
+	} else {
+		smc_wr_tx_put_slot(conn->lnk,
+				   (struct smc_wr_tx_pend_priv *)pend);
 	}
 	return rc;
 }
@@ -533,7 +537,9 @@ struct smc_link *smc_switch_conns(struct smc_link_group *lgr,
 				  struct smc_link *from_lnk, bool is_dev_err)
 {
 	struct smc_link *to_lnk = NULL;
+	struct smc_cdc_tx_pend *pend;
 	struct smc_connection *conn;
+	struct smc_wr_buf *wr_buf;
 	struct smc_sock *smc;
 	struct rb_node *node;
 	int i, rc = 0;
@@ -582,10 +588,16 @@ again:
 		}
 		sock_hold(&smc->sk);
 		read_unlock_bh(&lgr->conns_lock);
+		/* pre-fetch buffer outside of send_lock, might sleep */
+		rc = smc_cdc_get_free_slot(conn, to_lnk, &wr_buf, NULL, &pend);
+		if (rc) {
+			smcr_link_down_cond_sched(to_lnk);
+			return NULL;
+		}
 		/* avoid race with smcr_tx_sndbuf_nonempty() */
 		spin_lock_bh(&conn->send_lock);
 		conn->lnk = to_lnk;
-		rc = smc_switch_cursor(smc);
+		rc = smc_switch_cursor(smc, pend, wr_buf);
 		spin_unlock_bh(&conn->send_lock);
 		sock_put(&smc->sk);
 		if (rc) {
-- 
cgit v1.2.3-59-g8ed1b


From 27dc36aefc73ce50a485c9d32c33b18832289203 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sat, 30 May 2020 23:54:36 +0200
Subject: r8169: change driver data type

Change driver private data type to struct rtl8169_private * to avoid
some overhead.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 60 +++++++++++++------------------
 1 file changed, 25 insertions(+), 35 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index d672ae77c644..810398ef7186 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -4816,15 +4816,13 @@ rtl8169_get_stats64(struct net_device *dev, struct rtnl_link_stats64 *stats)
 	pm_runtime_put_noidle(&pdev->dev);
 }
 
-static void rtl8169_net_suspend(struct net_device *dev)
+static void rtl8169_net_suspend(struct rtl8169_private *tp)
 {
-	struct rtl8169_private *tp = netdev_priv(dev);
-
-	if (!netif_running(dev))
+	if (!netif_running(tp->dev))
 		return;
 
 	phy_stop(tp->phydev);
-	netif_device_detach(dev);
+	netif_device_detach(tp->dev);
 
 	rtl_lock_work(tp);
 	napi_disable(&tp->napi);
@@ -4840,20 +4838,17 @@ static void rtl8169_net_suspend(struct net_device *dev)
 
 static int rtl8169_suspend(struct device *device)
 {
-	struct net_device *dev = dev_get_drvdata(device);
-	struct rtl8169_private *tp = netdev_priv(dev);
+	struct rtl8169_private *tp = dev_get_drvdata(device);
 
-	rtl8169_net_suspend(dev);
+	rtl8169_net_suspend(tp);
 	clk_disable_unprepare(tp->clk);
 
 	return 0;
 }
 
-static void __rtl8169_resume(struct net_device *dev)
+static void __rtl8169_resume(struct rtl8169_private *tp)
 {
-	struct rtl8169_private *tp = netdev_priv(dev);
-
-	netif_device_attach(dev);
+	netif_device_attach(tp->dev);
 
 	rtl_pll_power_up(tp);
 	rtl8169_init_phy(tp);
@@ -4869,23 +4864,21 @@ static void __rtl8169_resume(struct net_device *dev)
 
 static int rtl8169_resume(struct device *device)
 {
-	struct net_device *dev = dev_get_drvdata(device);
-	struct rtl8169_private *tp = netdev_priv(dev);
+	struct rtl8169_private *tp = dev_get_drvdata(device);
 
-	rtl_rar_set(tp, dev->dev_addr);
+	rtl_rar_set(tp, tp->dev->dev_addr);
 
 	clk_prepare_enable(tp->clk);
 
-	if (netif_running(dev))
-		__rtl8169_resume(dev);
+	if (netif_running(tp->dev))
+		__rtl8169_resume(tp);
 
 	return 0;
 }
 
 static int rtl8169_runtime_suspend(struct device *device)
 {
-	struct net_device *dev = dev_get_drvdata(device);
-	struct rtl8169_private *tp = netdev_priv(dev);
+	struct rtl8169_private *tp = dev_get_drvdata(device);
 
 	if (!tp->TxDescArray)
 		return 0;
@@ -4894,7 +4887,7 @@ static int rtl8169_runtime_suspend(struct device *device)
 	__rtl8169_set_wol(tp, WAKE_ANY);
 	rtl_unlock_work(tp);
 
-	rtl8169_net_suspend(dev);
+	rtl8169_net_suspend(tp);
 
 	/* Update counters before going runtime suspend */
 	rtl8169_update_counters(tp);
@@ -4904,10 +4897,9 @@ static int rtl8169_runtime_suspend(struct device *device)
 
 static int rtl8169_runtime_resume(struct device *device)
 {
-	struct net_device *dev = dev_get_drvdata(device);
-	struct rtl8169_private *tp = netdev_priv(dev);
+	struct rtl8169_private *tp = dev_get_drvdata(device);
 
-	rtl_rar_set(tp, dev->dev_addr);
+	rtl_rar_set(tp, tp->dev->dev_addr);
 
 	if (!tp->TxDescArray)
 		return 0;
@@ -4916,16 +4908,16 @@ static int rtl8169_runtime_resume(struct device *device)
 	__rtl8169_set_wol(tp, tp->saved_wolopts);
 	rtl_unlock_work(tp);
 
-	__rtl8169_resume(dev);
+	__rtl8169_resume(tp);
 
 	return 0;
 }
 
 static int rtl8169_runtime_idle(struct device *device)
 {
-	struct net_device *dev = dev_get_drvdata(device);
+	struct rtl8169_private *tp = dev_get_drvdata(device);
 
-	if (!netif_running(dev) || !netif_carrier_ok(dev))
+	if (!netif_running(tp->dev) || !netif_carrier_ok(tp->dev))
 		pm_schedule_suspend(device, 10000);
 
 	return -EBUSY;
@@ -4970,13 +4962,12 @@ static void rtl_wol_shutdown_quirk(struct rtl8169_private *tp)
 
 static void rtl_shutdown(struct pci_dev *pdev)
 {
-	struct net_device *dev = pci_get_drvdata(pdev);
-	struct rtl8169_private *tp = netdev_priv(dev);
+	struct rtl8169_private *tp = pci_get_drvdata(pdev);
 
-	rtl8169_net_suspend(dev);
+	rtl8169_net_suspend(tp);
 
 	/* Restore original MAC address */
-	rtl_rar_set(tp, dev->perm_addr);
+	rtl_rar_set(tp, tp->dev->perm_addr);
 
 	rtl8169_hw_reset(tp);
 
@@ -4993,13 +4984,12 @@ static void rtl_shutdown(struct pci_dev *pdev)
 
 static void rtl_remove_one(struct pci_dev *pdev)
 {
-	struct net_device *dev = pci_get_drvdata(pdev);
-	struct rtl8169_private *tp = netdev_priv(dev);
+	struct rtl8169_private *tp = pci_get_drvdata(pdev);
 
 	if (pci_dev_run_wake(pdev))
 		pm_runtime_get_noresume(&pdev->dev);
 
-	unregister_netdev(dev);
+	unregister_netdev(tp->dev);
 
 	if (r8168_check_dash(tp))
 		rtl8168_driver_stop(tp);
@@ -5007,7 +4997,7 @@ static void rtl_remove_one(struct pci_dev *pdev)
 	rtl_release_firmware(tp);
 
 	/* restore original MAC address */
-	rtl_rar_set(tp, dev->perm_addr);
+	rtl_rar_set(tp, tp->dev->perm_addr);
 }
 
 static const struct net_device_ops rtl_netdev_ops = {
@@ -5446,7 +5436,7 @@ static int rtl_init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 	if (!tp->counters)
 		return -ENOMEM;
 
-	pci_set_drvdata(pdev, dev);
+	pci_set_drvdata(pdev, tp);
 
 	rc = r8169_mdio_register(tp);
 	if (rc)
-- 
cgit v1.2.3-59-g8ed1b


From 01bd753d039553cf63830823d1dcc8a864174afc Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sat, 30 May 2020 23:55:30 +0200
Subject: r8169: enable WAKE_PHY as only WoL source when runtime-suspending

We go to runtime-suspend few secs after cable removal. As cable is
removed "physical link up" is the only meaningful WoL source.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 810398ef7186..6fcd35ac8110 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -4884,7 +4884,7 @@ static int rtl8169_runtime_suspend(struct device *device)
 		return 0;
 
 	rtl_lock_work(tp);
-	__rtl8169_set_wol(tp, WAKE_ANY);
+	__rtl8169_set_wol(tp, WAKE_PHY);
 	rtl_unlock_work(tp);
 
 	rtl8169_net_suspend(tp);
-- 
cgit v1.2.3-59-g8ed1b


From 9fdd50c579802adad09028eba86e5a7ef7c9b738 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sat, 30 May 2020 23:56:14 +0200
Subject: r8169: don't reset tx ring indexes in rtl8169_tx_clear

In places where the indexes have to be reset, we call
rtl8169_init_ring_indexes() anyway after rtl8169_tx_clear().

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 6fcd35ac8110..43652c450892 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -3955,7 +3955,6 @@ static void rtl8169_tx_clear_range(struct rtl8169_private *tp, u32 start,
 static void rtl8169_tx_clear(struct rtl8169_private *tp)
 {
 	rtl8169_tx_clear_range(tp, tp->dirty_tx, NUM_TX_DESC);
-	tp->cur_tx = tp->dirty_tx = 0;
 	netdev_reset_queue(tp->dev);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From bac75d8565e800f7a4494d8c873e38dce33d6079 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sat, 30 May 2020 23:57:10 +0200
Subject: r8169: move some calls to rtl8169_hw_reset

Move calls that are needed before and after calling rtl8169_hw_reset()
into this function. This requires to move the function in the code.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 75 +++++++++++++++----------------
 1 file changed, 36 insertions(+), 39 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 43652c450892..5f3c50fb0647 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -2530,36 +2530,6 @@ static void rtl_enable_rxdvgate(struct rtl8169_private *tp)
 	rtl_wait_txrx_fifo_empty(tp);
 }
 
-static void rtl8169_hw_reset(struct rtl8169_private *tp)
-{
-	/* Disable interrupts */
-	rtl8169_irq_mask_and_ack(tp);
-
-	rtl_rx_close(tp);
-
-	switch (tp->mac_version) {
-	case RTL_GIGA_MAC_VER_27:
-	case RTL_GIGA_MAC_VER_28:
-	case RTL_GIGA_MAC_VER_31:
-		rtl_loop_wait_low(tp, &rtl_npq_cond, 20, 2000);
-		break;
-	case RTL_GIGA_MAC_VER_34 ... RTL_GIGA_MAC_VER_38:
-		RTL_W8(tp, ChipCmd, RTL_R8(tp, ChipCmd) | StopReq);
-		rtl_loop_wait_high(tp, &rtl_txcfg_empty_cond, 100, 666);
-		break;
-	case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_61:
-		rtl_enable_rxdvgate(tp);
-		fsleep(2000);
-		break;
-	default:
-		RTL_W8(tp, ChipCmd, RTL_R8(tp, ChipCmd) | StopReq);
-		udelay(100);
-		break;
-	}
-
-	rtl_hw_reset(tp);
-}
-
 static void rtl_set_tx_config_registers(struct rtl8169_private *tp)
 {
 	u32 val = TX_DMA_BURST << TxDMAShift |
@@ -3958,6 +3928,42 @@ static void rtl8169_tx_clear(struct rtl8169_private *tp)
 	netdev_reset_queue(tp->dev);
 }
 
+static void rtl8169_hw_reset(struct rtl8169_private *tp)
+{
+	/* Give a racing hard_start_xmit a few cycles to complete. */
+	synchronize_rcu();
+
+	/* Disable interrupts */
+	rtl8169_irq_mask_and_ack(tp);
+
+	rtl_rx_close(tp);
+
+	switch (tp->mac_version) {
+	case RTL_GIGA_MAC_VER_27:
+	case RTL_GIGA_MAC_VER_28:
+	case RTL_GIGA_MAC_VER_31:
+		rtl_loop_wait_low(tp, &rtl_npq_cond, 20, 2000);
+		break;
+	case RTL_GIGA_MAC_VER_34 ... RTL_GIGA_MAC_VER_38:
+		RTL_W8(tp, ChipCmd, RTL_R8(tp, ChipCmd) | StopReq);
+		rtl_loop_wait_high(tp, &rtl_txcfg_empty_cond, 100, 666);
+		break;
+	case RTL_GIGA_MAC_VER_40 ... RTL_GIGA_MAC_VER_61:
+		rtl_enable_rxdvgate(tp);
+		fsleep(2000);
+		break;
+	default:
+		RTL_W8(tp, ChipCmd, RTL_R8(tp, ChipCmd) | StopReq);
+		fsleep(100);
+		break;
+	}
+
+	rtl_hw_reset(tp);
+
+	rtl8169_tx_clear(tp);
+	rtl8169_init_ring_indexes(tp);
+}
+
 static void rtl_reset_work(struct rtl8169_private *tp)
 {
 	struct net_device *dev = tp->dev;
@@ -3965,16 +3971,12 @@ static void rtl_reset_work(struct rtl8169_private *tp)
 
 	napi_disable(&tp->napi);
 	netif_stop_queue(dev);
-	synchronize_rcu();
 
 	rtl8169_hw_reset(tp);
 
 	for (i = 0; i < NUM_RX_DESC; i++)
 		rtl8169_mark_to_asic(tp->RxDescArray + i);
 
-	rtl8169_tx_clear(tp);
-	rtl8169_init_ring_indexes(tp);
-
 	napi_enable(&tp->napi);
 	rtl_hw_start(tp);
 	netif_wake_queue(dev);
@@ -4636,11 +4638,6 @@ static void rtl8169_down(struct net_device *dev)
 
 	rtl8169_hw_reset(tp);
 
-	/* Give a racing hard_start_xmit a few cycles to complete. */
-	synchronize_rcu();
-
-	rtl8169_tx_clear(tp);
-
 	rtl8169_rx_clear(tp);
 
 	rtl_pll_power_down(tp);
-- 
cgit v1.2.3-59-g8ed1b


From 8ac8e8c64b539a548a5f22d6b21f999eea38b0ee Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sat, 30 May 2020 23:58:35 +0200
Subject: r8169: make rtl8169_down central chip quiesce function

Functionality for quiescing the chip is spread across different
functions currently. Move it to rtl8169_down().

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 33 +++++++++++--------------------
 1 file changed, 11 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index 5f3c50fb0647..fd93377f961f 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -4627,20 +4627,21 @@ static int r8169_phy_connect(struct rtl8169_private *tp)
 	return 0;
 }
 
-static void rtl8169_down(struct net_device *dev)
+static void rtl8169_down(struct rtl8169_private *tp)
 {
-	struct rtl8169_private *tp = netdev_priv(dev);
+	rtl_lock_work(tp);
 
-	phy_stop(tp->phydev);
+	/* Clear all task flags */
+	bitmap_zero(tp->wk.flags, RTL_FLAG_MAX);
 
+	phy_stop(tp->phydev);
 	napi_disable(&tp->napi);
-	netif_stop_queue(dev);
 
 	rtl8169_hw_reset(tp);
 
-	rtl8169_rx_clear(tp);
-
 	rtl_pll_power_down(tp);
+
+	rtl_unlock_work(tp);
 }
 
 static int rtl8169_close(struct net_device *dev)
@@ -4653,12 +4654,9 @@ static int rtl8169_close(struct net_device *dev)
 	/* Update counters before going down */
 	rtl8169_update_counters(tp);
 
-	rtl_lock_work(tp);
-	/* Clear all task flags */
-	bitmap_zero(tp->wk.flags, RTL_FLAG_MAX);
-
-	rtl8169_down(dev);
-	rtl_unlock_work(tp);
+	netif_stop_queue(dev);
+	rtl8169_down(tp);
+	rtl8169_rx_clear(tp);
 
 	cancel_work_sync(&tp->wk.work);
 
@@ -4817,17 +4815,8 @@ static void rtl8169_net_suspend(struct rtl8169_private *tp)
 	if (!netif_running(tp->dev))
 		return;
 
-	phy_stop(tp->phydev);
 	netif_device_detach(tp->dev);
-
-	rtl_lock_work(tp);
-	napi_disable(&tp->napi);
-	/* Clear all task flags */
-	bitmap_zero(tp->wk.flags, RTL_FLAG_MAX);
-
-	rtl_unlock_work(tp);
-
-	rtl_pll_power_down(tp);
+	rtl8169_down(tp);
 }
 
 #ifdef CONFIG_PM
-- 
cgit v1.2.3-59-g8ed1b


From 67ee63ef2b15fcb0cb692010083592672d18f0a8 Mon Sep 17 00:00:00 2001
From: Heiner Kallweit <hkallweit1@gmail.com>
Date: Sat, 30 May 2020 23:59:58 +0200
Subject: r8169: improve handling power management ops

Simplify handling the power management callbacks.

Signed-off-by: Heiner Kallweit <hkallweit1@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/realtek/r8169_main.c | 28 +++++++++-------------------
 1 file changed, 9 insertions(+), 19 deletions(-)

diff --git a/drivers/net/ethernet/realtek/r8169_main.c b/drivers/net/ethernet/realtek/r8169_main.c
index fd93377f961f..4d2ec9742cee 100644
--- a/drivers/net/ethernet/realtek/r8169_main.c
+++ b/drivers/net/ethernet/realtek/r8169_main.c
@@ -4821,7 +4821,7 @@ static void rtl8169_net_suspend(struct rtl8169_private *tp)
 
 #ifdef CONFIG_PM
 
-static int rtl8169_suspend(struct device *device)
+static int __maybe_unused rtl8169_suspend(struct device *device)
 {
 	struct rtl8169_private *tp = dev_get_drvdata(device);
 
@@ -4847,7 +4847,7 @@ static void __rtl8169_resume(struct rtl8169_private *tp)
 	rtl_unlock_work(tp);
 }
 
-static int rtl8169_resume(struct device *device)
+static int __maybe_unused rtl8169_resume(struct device *device)
 {
 	struct rtl8169_private *tp = dev_get_drvdata(device);
 
@@ -4909,24 +4909,12 @@ static int rtl8169_runtime_idle(struct device *device)
 }
 
 static const struct dev_pm_ops rtl8169_pm_ops = {
-	.suspend		= rtl8169_suspend,
-	.resume			= rtl8169_resume,
-	.freeze			= rtl8169_suspend,
-	.thaw			= rtl8169_resume,
-	.poweroff		= rtl8169_suspend,
-	.restore		= rtl8169_resume,
-	.runtime_suspend	= rtl8169_runtime_suspend,
-	.runtime_resume		= rtl8169_runtime_resume,
-	.runtime_idle		= rtl8169_runtime_idle,
+	SET_SYSTEM_SLEEP_PM_OPS(rtl8169_suspend, rtl8169_resume)
+	SET_RUNTIME_PM_OPS(rtl8169_runtime_suspend, rtl8169_runtime_resume,
+			   rtl8169_runtime_idle)
 };
 
-#define RTL8169_PM_OPS	(&rtl8169_pm_ops)
-
-#else /* !CONFIG_PM */
-
-#define RTL8169_PM_OPS	NULL
-
-#endif /* !CONFIG_PM */
+#endif /* CONFIG_PM */
 
 static void rtl_wol_shutdown_quirk(struct rtl8169_private *tp)
 {
@@ -5458,7 +5446,9 @@ static struct pci_driver rtl8169_pci_driver = {
 	.probe		= rtl_init_one,
 	.remove		= rtl_remove_one,
 	.shutdown	= rtl_shutdown,
-	.driver.pm	= RTL8169_PM_OPS,
+#ifdef CONFIG_PM
+	.driver.pm	= &rtl8169_pm_ops,
+#endif
 };
 
 module_pci_driver(rtl8169_pci_driver);
-- 
cgit v1.2.3-59-g8ed1b


From 72b4868211a85d040c42444620f2197bb0094ac8 Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Thu, 28 May 2020 22:12:35 -0700
Subject: vxlan: add check to prevent use of remote ip attributes with
 NDA_NH_ID

NDA_NH_ID represents a remote ip or a group of remote ips.
It allows use of nexthop groups in lieu of a remote ip or a
list of remote ips supported by the fdb api.

Current code ignores the other remote ip attrs when NDA_NH_ID is
specified. In the spirit of strict checking, This commit adds a
check to explicitly return an error on incorrect usage.

Fixes: 1274e1cc4226 ("vxlan: ecmp support for mac fdb entries")
Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index a0015cdedfaf..fe606c688855 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -1196,6 +1196,10 @@ static int vxlan_fdb_parse(struct nlattr *tb[], struct vxlan_dev *vxlan,
 	struct net *net = dev_net(vxlan->dev);
 	int err;
 
+	if (tb[NDA_NH_ID] && (tb[NDA_DST] || tb[NDA_VNI] || tb[NDA_IFINDEX] ||
+	    tb[NDA_PORT]))
+		return -EINVAL;
+
 	if (tb[NDA_DST]) {
 		err = vxlan_nla_get_addr(ip, tb[NDA_DST]);
 		if (err)
-- 
cgit v1.2.3-59-g8ed1b


From 79472fe873dd307101f9b1dbfe0fedebae42219a Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Thu, 28 May 2020 22:12:36 -0700
Subject: vxlan: few locking fixes in nexthop event handler

- remove fdb from nh_list before the rcu grace period
- protect fdb->vdev with rcu
- hold spin lock before destroying fdb

Fixes: c7cdbe2efc40 ("vxlan: support for nexthop notifiers")
Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Reviewed-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c | 32 +++++++++++++++++++++++++-------
 1 file changed, 25 insertions(+), 7 deletions(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index fe606c688855..39bc10a7fd2e 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -81,7 +81,7 @@ struct vxlan_fdb {
 	u16		  flags;	/* see ndm_flags and below */
 	struct list_head  nh_list;
 	struct nexthop __rcu *nh;
-	struct vxlan_dev  *vdev;
+	struct vxlan_dev  __rcu *vdev;
 };
 
 #define NTF_VXLAN_ADDED_BY_USER 0x100
@@ -837,7 +837,7 @@ static struct vxlan_fdb *vxlan_fdb_alloc(struct vxlan_dev *vxlan, const u8 *mac,
 	f->updated = f->used = jiffies;
 	f->vni = src_vni;
 	f->nh = NULL;
-	f->vdev = vxlan;
+	RCU_INIT_POINTER(f->vdev, vxlan);
 	INIT_LIST_HEAD(&f->nh_list);
 	INIT_LIST_HEAD(&f->remotes);
 	memcpy(f->eth_addr, mac, ETH_ALEN);
@@ -963,7 +963,7 @@ static void __vxlan_fdb_free(struct vxlan_fdb *f)
 	nh = rcu_dereference_raw(f->nh);
 	if (nh) {
 		rcu_assign_pointer(f->nh, NULL);
-		list_del_rcu(&f->nh_list);
+		rcu_assign_pointer(f->vdev, NULL);
 		nexthop_put(nh);
 	}
 
@@ -1000,7 +1000,7 @@ static void vxlan_fdb_destroy(struct vxlan_dev *vxlan, struct vxlan_fdb *f,
 	}
 
 	hlist_del_rcu(&f->hlist);
-	f->vdev = NULL;
+	list_del_rcu(&f->nh_list);
 	call_rcu(&f->rcu, vxlan_fdb_free);
 }
 
@@ -4615,17 +4615,35 @@ static struct notifier_block vxlan_switchdev_notifier_block __read_mostly = {
 	.notifier_call = vxlan_switchdev_event,
 };
 
+static void vxlan_fdb_nh_flush(struct nexthop *nh)
+{
+	struct vxlan_fdb *fdb;
+	struct vxlan_dev *vxlan;
+	u32 hash_index;
+
+	rcu_read_lock();
+	list_for_each_entry_rcu(fdb, &nh->fdb_list, nh_list) {
+		vxlan = rcu_dereference(fdb->vdev);
+		WARN_ON(!vxlan);
+		hash_index = fdb_head_index(vxlan, fdb->eth_addr,
+					    vxlan->default_dst.remote_vni);
+		spin_lock_bh(&vxlan->hash_lock[hash_index]);
+		if (!hlist_unhashed(&fdb->hlist))
+			vxlan_fdb_destroy(vxlan, fdb, false, false);
+		spin_unlock_bh(&vxlan->hash_lock[hash_index]);
+	}
+	rcu_read_unlock();
+}
+
 static int vxlan_nexthop_event(struct notifier_block *nb,
 			       unsigned long event, void *ptr)
 {
 	struct nexthop *nh = ptr;
-	struct vxlan_fdb *fdb, *tmp;
 
 	if (!nh || event != NEXTHOP_EVENT_DEL)
 		return NOTIFY_DONE;
 
-	list_for_each_entry_safe(fdb, tmp, &nh->fdb_list, nh_list)
-		vxlan_fdb_destroy(fdb->vdev, fdb, false, false);
+	vxlan_fdb_nh_flush(nh);
 
 	return NOTIFY_DONE;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 1c0522b4a2e143fa6e55e4bd2308415c81184ec7 Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Fri, 29 May 2020 14:16:53 +0300
Subject: selftests: forwarding: mirror_lib: Use mausezahn

Using ping in tests is error-prone, because ping is too smart. On a
flaky system (notably in a simulator), when packets don't come quickly
enough, more pings are sent, and that throws off counters. Instead use
mausezahn to generate ICMP echo request packets. That allows us to
send them in quicker succession as well, because the reason the ping
was made slow in the first place was to make the tests work on
simulated systems.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/forwarding/mirror_lib.sh | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/net/forwarding/mirror_lib.sh b/tools/testing/selftests/net/forwarding/mirror_lib.sh
index 00797597fcf5..c33bfd7ba214 100644
--- a/tools/testing/selftests/net/forwarding/mirror_lib.sh
+++ b/tools/testing/selftests/net/forwarding/mirror_lib.sh
@@ -29,11 +29,9 @@ mirror_test()
 	local pref=$1; shift
 	local expect=$1; shift
 
-	local ping_timeout=$((PING_TIMEOUT * 5))
 	local t0=$(tc_rule_stats_get $dev $pref)
-	ip vrf exec $vrf_name \
-	   ${PING} ${sip:+-I $sip} $dip -c 10 -i 0.5 -w $ping_timeout \
-		   &> /dev/null
+	$MZ $vrf_name ${sip:+-A $sip} -B $dip -a own -b bc -q \
+	    -c 10 -d 100ms -t icmp type=8
 	sleep 0.5
 	local t1=$(tc_rule_stats_get $dev $pref)
 	local delta=$((t1 - t0))
-- 
cgit v1.2.3-59-g8ed1b


From 3ed97037f063b9130b56991f55f346597d27440d Mon Sep 17 00:00:00 2001
From: Petr Machata <petrm@mellanox.com>
Date: Fri, 29 May 2020 14:16:54 +0300
Subject: selftests: forwarding: pedit_dsfield: Check counter value

A missing stats_update callback was recently added to act_pedit. Now that
iproute2 supports JSON dumping for pedit, extend the pedit_dsfield selftest
with a check that would have caught the fact that the callback was missing.

Signed-off-by: Petr Machata <petrm@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/forwarding/pedit_dsfield.sh | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/net/forwarding/pedit_dsfield.sh b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh
index 1181d647f6a7..55eeacf59241 100755
--- a/tools/testing/selftests/net/forwarding/pedit_dsfield.sh
+++ b/tools/testing/selftests/net/forwarding/pedit_dsfield.sh
@@ -132,7 +132,12 @@ do_test_pedit_dsfield_common()
 	local pkts
 	pkts=$(busywait "$TC_HIT_TIMEOUT" until_counter_is ">= 10" \
 			tc_rule_handle_stats_get "dev $h2 ingress" 101)
-	check_err $? "Expected to get 10 packets, but got $pkts."
+	check_err $? "Expected to get 10 packets on test probe, but got $pkts."
+
+	pkts=$(tc_rule_handle_stats_get "$pedit_locus" 101)
+	((pkts >= 10))
+	check_err $? "Expected to get 10 packets on pedit rule, but got $pkts."
+
 	log_test "$pedit_locus pedit $pedit_action"
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 9b23203c32ee02cd316e39ba3ec243e0f7bf56de Mon Sep 17 00:00:00 2001
From: Geert Uytterhoeven <geert+renesas@glider.be>
Date: Fri, 29 May 2020 14:25:40 +0200
Subject: ravb: Mask PHY mode to avoid inserting delays twice

Until recently, the Micrel KSZ9031 PHY driver ignored any PHY mode
("RGMII-*ID") settings, but used the hardware defaults, augmented by
explicit configuration of individual skew values using the "*-skew-ps"
DT properties.  The lack of PHY mode support was compensated by the
EtherAVB MAC driver, which configures TX and/or RX internal delay
itself, based on the PHY mode.

However, now the KSZ9031 driver has gained PHY mode support, delays may
be configured twice, causing regressions.  E.g. on the Renesas
Salvator-X board with R-Car M3-W ES1.0, TX performance dropped from ca.
400 Mbps to 0.1-0.3 Mbps, as measured by nuttcp.

As internal delay configuration supported by the KSZ9031 PHY is too
limited for some use cases, the ability to configure MAC internal delay
is deemed useful and necessary.  Hence a proper fix would involve
splitting internal delay configuration in two parts, one for the PHY,
and one for the MAC.  However, this would require adding new DT
properties, thus breaking DTB backwards-compatibility.

Hence fix the regression in a backwards-compatibility way, by letting
the EtherAVB driver mask the PHY mode when it has inserted a delay, to
avoid the PHY driver adding a second delay.  This also fixes messages
like:

    Micrel KSZ9031 Gigabit PHY e6800000.ethernet-ffffffff:00: *-skew-ps values should be used only with phy-mode = "rgmii"

as the PHY no longer sees the original RGMII-*ID mode.

Solving the issue by splitting configuration in two parts can be handled
in future patches, and would require retaining a backwards-compatibility
mode anyway.

Fixes: bcf3440c6dd78bfe ("net: phy: micrel: add phy-mode support for the KSZ9031 PHY")
Signed-off-by: Geert Uytterhoeven <geert+renesas@glider.be>
Reviewed-by: Andrew Lunn <andrew@lunn.ch>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/renesas/ravb_main.c | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/renesas/ravb_main.c b/drivers/net/ethernet/renesas/ravb_main.c
index 067ad25553b9..a442bcf64b9c 100644
--- a/drivers/net/ethernet/renesas/ravb_main.c
+++ b/drivers/net/ethernet/renesas/ravb_main.c
@@ -1014,6 +1014,7 @@ static int ravb_phy_init(struct net_device *ndev)
 	struct ravb_private *priv = netdev_priv(ndev);
 	struct phy_device *phydev;
 	struct device_node *pn;
+	phy_interface_t iface;
 	int err;
 
 	priv->link = 0;
@@ -1032,8 +1033,13 @@ static int ravb_phy_init(struct net_device *ndev)
 		}
 		pn = of_node_get(np);
 	}
-	phydev = of_phy_connect(ndev, pn, ravb_adjust_link, 0,
-				priv->phy_interface);
+
+	iface = priv->phy_interface;
+	if (priv->chip_id != RCAR_GEN2 && phy_interface_mode_is_rgmii(iface)) {
+		/* ravb_set_delay_mode() takes care of internal delay mode */
+		iface = PHY_INTERFACE_MODE_RGMII;
+	}
+	phydev = of_phy_connect(ndev, pn, ravb_adjust_link, 0, iface);
 	of_node_put(pn);
 	if (!phydev) {
 		netdev_err(ndev, "failed to connect PHY\n");
-- 
cgit v1.2.3-59-g8ed1b


From b0c19ed6088ab41dd2a727b60594b7297c15d6ce Mon Sep 17 00:00:00 2001
From: Toke Høiland-Jørgensen <toke@redhat.com>
Date: Fri, 29 May 2020 14:43:44 +0200
Subject: sch_cake: Take advantage of skb->hash where appropriate
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

While the other fq-based qdiscs take advantage of skb->hash and doesn't
recompute it if it is already set, sch_cake does not.

This was a deliberate choice because sch_cake hashes various parts of the
packet header to support its advanced flow isolation modes. However,
foregoing the use of skb->hash entirely loses a few important benefits:

- When skb->hash is set by hardware, a few CPU cycles can be saved by not
  hashing again in software.

- Tunnel encapsulations will generally preserve the value of skb->hash from
  before the encapsulation, which allows flow-based qdiscs to distinguish
  between flows even though the outer packet header no longer has flow
  information.

It turns out that we can preserve these desirable properties in many cases,
while still supporting the advanced flow isolation properties of sch_cake.
This patch does so by reusing the skb->hash value as the flow_hash part of
the hashing procedure in cake_hash() only in the following conditions:

- If the skb->hash is marked as covering the flow headers (skb->l4_hash is
  set)

AND

- NAT header rewriting is either disabled, or did not change any values
  used for hashing. The latter is important to match local-origin packets
  such as those of a tunnel endpoint.

The immediate motivation for fixing this was the recent patch to WireGuard
to preserve the skb->hash on encapsulation. As such, this is also what I
tested against; with this patch, added latency under load for competing
flows drops from ~8 ms to sub-1ms on an RRUL test over a WireGuard tunnel
going through a virtual link shaped to 1Gbps using sch_cake. This matches
the results we saw with a similar setup using sch_fq_codel when testing the
WireGuard patch.

Fixes: 046f6fd5daef ("sched: Add Common Applications Kept Enhanced (cake) qdisc")
Signed-off-by: Toke Høiland-Jørgensen <toke@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/sch_cake.c | 65 +++++++++++++++++++++++++++++++++++++++++-----------
 1 file changed, 51 insertions(+), 14 deletions(-)

diff --git a/net/sched/sch_cake.c b/net/sched/sch_cake.c
index 1496e87cd07b..60f8ae578819 100644
--- a/net/sched/sch_cake.c
+++ b/net/sched/sch_cake.c
@@ -584,26 +584,48 @@ static bool cobalt_should_drop(struct cobalt_vars *vars,
 	return drop;
 }
 
-static void cake_update_flowkeys(struct flow_keys *keys,
+static bool cake_update_flowkeys(struct flow_keys *keys,
 				 const struct sk_buff *skb)
 {
 #if IS_ENABLED(CONFIG_NF_CONNTRACK)
 	struct nf_conntrack_tuple tuple = {};
-	bool rev = !skb->_nfct;
+	bool rev = !skb->_nfct, upd = false;
+	__be32 ip;
 
 	if (tc_skb_protocol(skb) != htons(ETH_P_IP))
-		return;
+		return false;
 
 	if (!nf_ct_get_tuple_skb(&tuple, skb))
-		return;
+		return false;
 
-	keys->addrs.v4addrs.src = rev ? tuple.dst.u3.ip : tuple.src.u3.ip;
-	keys->addrs.v4addrs.dst = rev ? tuple.src.u3.ip : tuple.dst.u3.ip;
+	ip = rev ? tuple.dst.u3.ip : tuple.src.u3.ip;
+	if (ip != keys->addrs.v4addrs.src) {
+		keys->addrs.v4addrs.src = ip;
+		upd = true;
+	}
+	ip = rev ? tuple.src.u3.ip : tuple.dst.u3.ip;
+	if (ip != keys->addrs.v4addrs.dst) {
+		keys->addrs.v4addrs.dst = ip;
+		upd = true;
+	}
 
 	if (keys->ports.ports) {
-		keys->ports.src = rev ? tuple.dst.u.all : tuple.src.u.all;
-		keys->ports.dst = rev ? tuple.src.u.all : tuple.dst.u.all;
+		__be16 port;
+
+		port = rev ? tuple.dst.u.all : tuple.src.u.all;
+		if (port != keys->ports.src) {
+			keys->ports.src = port;
+			upd = true;
+		}
+		port = rev ? tuple.src.u.all : tuple.dst.u.all;
+		if (port != keys->ports.dst) {
+			port = keys->ports.dst;
+			upd = true;
+		}
 	}
+	return upd;
+#else
+	return false;
 #endif
 }
 
@@ -624,23 +646,36 @@ static bool cake_ddst(int flow_mode)
 static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb,
 		     int flow_mode, u16 flow_override, u16 host_override)
 {
+	bool hash_flows = (!flow_override && !!(flow_mode & CAKE_FLOW_FLOWS));
+	bool hash_hosts = (!host_override && !!(flow_mode & CAKE_FLOW_HOSTS));
+	bool nat_enabled = !!(flow_mode & CAKE_FLOW_NAT_FLAG);
 	u32 flow_hash = 0, srchost_hash = 0, dsthost_hash = 0;
 	u16 reduced_hash, srchost_idx, dsthost_idx;
 	struct flow_keys keys, host_keys;
+	bool use_skbhash = skb->l4_hash;
 
 	if (unlikely(flow_mode == CAKE_FLOW_NONE))
 		return 0;
 
-	/* If both overrides are set we can skip packet dissection entirely */
-	if ((flow_override || !(flow_mode & CAKE_FLOW_FLOWS)) &&
-	    (host_override || !(flow_mode & CAKE_FLOW_HOSTS)))
+	/* If both overrides are set, or we can use the SKB hash and nat mode is
+	 * disabled, we can skip packet dissection entirely. If nat mode is
+	 * enabled there's another check below after doing the conntrack lookup.
+	 */
+	if ((!hash_flows || (use_skbhash && !nat_enabled)) && !hash_hosts)
 		goto skip_hash;
 
 	skb_flow_dissect_flow_keys(skb, &keys,
 				   FLOW_DISSECTOR_F_STOP_AT_FLOW_LABEL);
 
-	if (flow_mode & CAKE_FLOW_NAT_FLAG)
-		cake_update_flowkeys(&keys, skb);
+	/* Don't use the SKB hash if we change the lookup keys from conntrack */
+	if (nat_enabled && cake_update_flowkeys(&keys, skb))
+		use_skbhash = false;
+
+	/* If we can still use the SKB hash and don't need the host hash, we can
+	 * skip the rest of the hashing procedure
+	 */
+	if (use_skbhash && !hash_hosts)
+		goto skip_hash;
 
 	/* flow_hash_from_keys() sorts the addresses by value, so we have
 	 * to preserve their order in a separate data structure to treat
@@ -679,12 +714,14 @@ static u32 cake_hash(struct cake_tin_data *q, const struct sk_buff *skb,
 	/* This *must* be after the above switch, since as a
 	 * side-effect it sorts the src and dst addresses.
 	 */
-	if (flow_mode & CAKE_FLOW_FLOWS)
+	if (hash_flows && !use_skbhash)
 		flow_hash = flow_hash_from_keys(&keys);
 
 skip_hash:
 	if (flow_override)
 		flow_hash = flow_override - 1;
+	else if (use_skbhash)
+		flow_hash = skb->hash;
 	if (host_override) {
 		dsthost_hash = host_override - 1;
 		srchost_hash = host_override - 1;
-- 
cgit v1.2.3-59-g8ed1b


From 39884604b11692158ce0c559fc603510b96f8c2e Mon Sep 17 00:00:00 2001
From: Paolo Abeni <pabeni@redhat.com>
Date: Fri, 29 May 2020 17:49:18 +0200
Subject: mptcp: fix NULL ptr dereference in MP_JOIN error path

When token lookup on MP_JOIN 3rd ack fails, the server
socket closes with a reset the incoming child. Such socket
has the 'is_mptcp' flag set, but no msk socket associated
- due to the failed lookup.

While crafting the reset packet mptcp_established_options_mp()
will try to dereference the child's master socket, causing
a NULL ptr dereference.

This change addresses the issue with explicit fallback to
TCP in such error path.

Fixes: 729cd6436f35 ("mptcp: cope better with MP_JOIN failure")
Signed-off-by: Paolo Abeni <pabeni@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/mptcp/subflow.c | 20 ++++++++++++++++----
 1 file changed, 16 insertions(+), 4 deletions(-)

diff --git a/net/mptcp/subflow.c b/net/mptcp/subflow.c
index f3c06b8af92d..493b98a0825c 100644
--- a/net/mptcp/subflow.c
+++ b/net/mptcp/subflow.c
@@ -413,6 +413,20 @@ static void subflow_ulp_fallback(struct sock *sk,
 	tcp_sk(sk)->is_mptcp = 0;
 }
 
+static void subflow_drop_ctx(struct sock *ssk)
+{
+	struct mptcp_subflow_context *ctx = mptcp_subflow_ctx(ssk);
+
+	if (!ctx)
+		return;
+
+	subflow_ulp_fallback(ssk, ctx);
+	if (ctx->conn)
+		sock_put(ctx->conn);
+
+	kfree_rcu(ctx, rcu);
+}
+
 static struct sock *subflow_syn_recv_sock(const struct sock *sk,
 					  struct sk_buff *skb,
 					  struct request_sock *req,
@@ -485,10 +499,7 @@ create_child:
 			if (fallback_is_fatal)
 				goto dispose_child;
 
-			if (ctx) {
-				subflow_ulp_fallback(child, ctx);
-				kfree_rcu(ctx, rcu);
-			}
+			subflow_drop_ctx(child);
 			goto out;
 		}
 
@@ -537,6 +548,7 @@ out:
 	return child;
 
 dispose_child:
+	subflow_drop_ctx(child);
 	tcp_rsk(req)->drop_req = true;
 	tcp_send_active_reset(child, GFP_ATOMIC);
 	inet_csk_prepare_for_destroy_sock(child);
-- 
cgit v1.2.3-59-g8ed1b


From 5e9cf0f0a3e98992184442de24253fe1b9c40f2e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Fri, 29 May 2020 14:04:27 +0200
Subject: cfg80211: fix 6 GHz frequencies to kHz

The updates to change to kHz frequencies and the 6 GHz
additions evidently overlapped (or rather, I didn't see
it when applying the latter), so the 6 GHz is broken.
Fix this.

Fixes: 934f4c7dd3a5 ("cfg80211: express channels with a KHz component")
Link: https://lore.kernel.org/r/20200529140425.1bf824f6911b.I4a1174916b8f5965af4366999eb9ffc7a0347470@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/util.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/net/wireless/util.c b/net/wireless/util.c
index df75e58eca5d..5b3b0d1222a2 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -94,7 +94,7 @@ u32 ieee80211_channel_to_freq_khz(int chan, enum nl80211_band band)
 	case NL80211_BAND_6GHZ:
 		/* see 802.11ax D4.1 27.3.22.2 */
 		if (chan <= 253)
-			return 5940 + chan * 5;
+			return MHZ_TO_KHZ(5940 + chan * 5);
 		break;
 	case NL80211_BAND_60GHZ:
 		if (chan < 7)
-- 
cgit v1.2.3-59-g8ed1b


From d1a1646c0de7b0083d42a1ada72a3ec243bfcc6d Mon Sep 17 00:00:00 2001
From: Arend Van Spriel <arend.vanspriel@broadcom.com>
Date: Fri, 29 May 2020 11:41:43 +0200
Subject: cfg80211: adapt to new channelization of the 6GHz band

The 6GHz band does not have regulatory approval yet, but things are
moving forward. However, that has led to a change in the channelization
of the 6GHz band which has been accepted in the 11ax specification. It
also fixes a missing MHZ_TO_KHZ() macro for 6GHz channels while at it.

This change is primarily thrown in to discuss how to deal with it.
I noticed ath11k adding 6G support with old channelization and ditto
for iw. It probably involves changes in hostapd as well.

Cc: Pradeep Kumar Chitrapu <pradeepc@codeaurora.org>
Cc: Jouni Malinen <jouni@w1.fi>
Signed-off-by: Arend van Spriel <arend.vanspriel@broadcom.com>
Link: https://lore.kernel.org/r/edf07cdd-ad15-4012-3afd-d8b961a80b69@broadcom.com
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/util.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/wireless/util.c b/net/wireless/util.c
index 5b3b0d1222a2..a27d4f45fb5f 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -92,9 +92,11 @@ u32 ieee80211_channel_to_freq_khz(int chan, enum nl80211_band band)
 			return MHZ_TO_KHZ(5000 + chan * 5);
 		break;
 	case NL80211_BAND_6GHZ:
-		/* see 802.11ax D4.1 27.3.22.2 */
+		/* see 802.11ax D6.1 27.3.23.2 */
+		if (chan == 2)
+			return MHZ_TO_KHZ(5935);
 		if (chan <= 253)
-			return MHZ_TO_KHZ(5940 + chan * 5);
+			return MHZ_TO_KHZ(5950 + chan * 5);
 		break;
 	case NL80211_BAND_60GHZ:
 		if (chan < 7)
-- 
cgit v1.2.3-59-g8ed1b


From 0e47901d78f0b91901f845c2fc575ae48d8ed395 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 28 May 2020 21:34:24 +0200
Subject: nl80211: really allow client-only BIGTK support

My previous commit here was wrong, it didn't check the new
flag in two necessary places, so things didn't work. Fix that.

Fixes: 155d7c733807 ("nl80211: allow client-only BIGTK support")
Link: https://lore.kernel.org/r/20200528213443.993f108e96ca.I0086ae42d672379380d04ac5effb2f3d5135731b@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/sme.c  | 7 +++++--
 net/wireless/util.c | 4 +++-
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/net/wireless/sme.c b/net/wireless/sme.c
index 3554c0d951f4..15595cf401de 100644
--- a/net/wireless/sme.c
+++ b/net/wireless/sme.c
@@ -5,7 +5,7 @@
  * (for nl80211's connect() and wext)
  *
  * Copyright 2009	Johannes Berg <johannes@sipsolutions.net>
- * Copyright (C) 2009   Intel Corporation. All rights reserved.
+ * Copyright (C) 2009, 2020 Intel Corporation. All rights reserved.
  * Copyright 2017	Intel Deutschland GmbH
  */
 
@@ -1118,7 +1118,10 @@ void __cfg80211_disconnected(struct net_device *dev, const u8 *ie,
 
 		if (wiphy_ext_feature_isset(
 			    wdev->wiphy,
-			    NL80211_EXT_FEATURE_BEACON_PROTECTION))
+			    NL80211_EXT_FEATURE_BEACON_PROTECTION) ||
+		    wiphy_ext_feature_isset(
+			    wdev->wiphy,
+			    NL80211_EXT_FEATURE_BEACON_PROTECTION_CLIENT))
 			max_key_idx = 7;
 		for (i = 0; i <= max_key_idx; i++)
 			rdev_del_key(rdev, dev, i, false, NULL);
diff --git a/net/wireless/util.c b/net/wireless/util.c
index a27d4f45fb5f..4d3b76f94f55 100644
--- a/net/wireless/util.c
+++ b/net/wireless/util.c
@@ -242,7 +242,9 @@ int cfg80211_validate_key_settings(struct cfg80211_registered_device *rdev,
 	int max_key_idx = 5;
 
 	if (wiphy_ext_feature_isset(&rdev->wiphy,
-				    NL80211_EXT_FEATURE_BEACON_PROTECTION))
+				    NL80211_EXT_FEATURE_BEACON_PROTECTION) ||
+	    wiphy_ext_feature_isset(&rdev->wiphy,
+				    NL80211_EXT_FEATURE_BEACON_PROTECTION_CLIENT))
 		max_key_idx = 7;
 	if (key_idx < 0 || key_idx > max_key_idx)
 		return -EINVAL;
-- 
cgit v1.2.3-59-g8ed1b


From afbc9c9e8bfe71e8bd12a8c01bedd969fbab8f0e Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 28 May 2020 21:34:25 +0200
Subject: cfg80211: add a helper to identify 6 GHz PSCs

This allows identifying whether or not a channel is a PSC
(preferred scanning channel).

Link: https://lore.kernel.org/r/20200528213443.414363ecf62c.Ic15e681a0e249eab7350a06ceb582cca8bb9a080@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index e2dbc9c02ef3..a38653358885 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -5277,6 +5277,21 @@ ieee80211_get_channel(struct wiphy *wiphy, int freq)
 	return ieee80211_get_channel_khz(wiphy, MHZ_TO_KHZ(freq));
 }
 
+/**
+ * cfg80211_channel_is_psc - Check if the channel is a 6 GHz PSC
+ * @chan: control channel to check
+ *
+ * The Preferred Scanning Channels (PSC) are defined in
+ * Draft IEEE P802.11ax/D5.0, 26.17.2.3.3
+ */
+static inline bool cfg80211_channel_is_psc(struct ieee80211_channel *chan)
+{
+	if (chan->band != NL80211_BAND_6GHZ)
+		return false;
+
+	return ieee80211_frequency_to_channel(chan->center_freq) % 16 == 5;
+}
+
 /**
  * ieee80211_get_response_rate - get basic rate for a given rate
  *
-- 
cgit v1.2.3-59-g8ed1b


From 372b38ea5911fc2500f0291b00140e80a26c0e36 Mon Sep 17 00:00:00 2001
From: Tova Mussai <tova.mussai@intel.com>
Date: Thu, 28 May 2020 21:34:26 +0200
Subject: ieee80211: definitions for reduced neighbor reports

Add the necessary definitions to parse reduced neighbor
report elements.

Signed-off-by: Tova Mussai <tova.mussai@intel.com>
[change struct name, remove IEEE80211_MIN_AP_NEIGHBOR_INFO_SIZE]
Link: https://lore.kernel.org/r/20200528213443.4f9154461c06.I518d9898ad982f838112ea9ca14a20d6bbb16394@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 0320ca4c7d28..c29184bf9416 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -2754,6 +2754,8 @@ enum ieee80211_eid {
 	WLAN_EID_QUIET_CHANNEL = 198,
 	WLAN_EID_OPMODE_NOTIF = 199,
 
+	WLAN_EID_REDUCED_NEIGHBOR_REPORT = 201,
+
 	WLAN_EID_S1G_BCN_COMPAT = 213,
 	WLAN_EID_S1G_SHORT_BCN_INTERVAL = 214,
 	WLAN_EID_S1G_CAPABILITIES = 217,
@@ -3675,4 +3677,30 @@ static inline bool for_each_element_completed(const struct element *element,
 #define WLAN_RSNX_CAPA_PROTECTED_TWT BIT(4)
 #define WLAN_RSNX_CAPA_SAE_H2E BIT(5)
 
+/*
+ * reduced neighbor report, based on Draft P802.11ax_D5.0,
+ * section 9.4.2.170
+ */
+#define IEEE80211_AP_INFO_TBTT_HDR_TYPE				0x03
+#define IEEE80211_AP_INFO_TBTT_HDR_FILTERED			0x04
+#define IEEE80211_AP_INFO_TBTT_HDR_COLOC			0x08
+#define IEEE80211_AP_INFO_TBTT_HDR_COUNT			0xF0
+#define IEEE80211_TBTT_INFO_OFFSET_BSSID_BSS_PARAM		8
+#define IEEE80211_TBTT_INFO_OFFSET_BSSID_SSSID_BSS_PARAM	12
+
+#define IEEE80211_RNR_TBTT_PARAMS_OCT_RECOMMENDED		0x01
+#define IEEE80211_RNR_TBTT_PARAMS_SAME_SSID			0x02
+#define IEEE80211_RNR_TBTT_PARAMS_MULTI_BSSID			0x04
+#define IEEE80211_RNR_TBTT_PARAMS_TRANSMITTED_BSSID		0x08
+#define IEEE80211_RNR_TBTT_PARAMS_COLOC_ESS			0x10
+#define IEEE80211_RNR_TBTT_PARAMS_PROBE_ACTIVE			0x20
+#define IEEE80211_RNR_TBTT_PARAMS_COLOC_AP			0x40
+
+struct ieee80211_neighbor_ap_info {
+       u8 tbtt_info_hdr;
+       u8 tbtt_info_len;
+       u8 op_class;
+       u8 channel;
+} __packed;
+
 #endif /* LINUX_IEEE80211_H */
-- 
cgit v1.2.3-59-g8ed1b


From 821273a5a502eebaae005557907d122d1e9b7b98 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 28 May 2020 21:34:27 +0200
Subject: ieee80211: add code to obtain and parse 6 GHz operation field

Add some code to obtain and parse the 6 GHz operation field
inside the HE operation element.

While at it, fix the required length using sizeof() the new
struct, which is 5 instead of 4 now.

Link: https://lore.kernel.org/r/20200528213443.42ca72c45ca9.Id74bc1b03da9ea6574f9bc70deeb60dfc1634359@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 52 ++++++++++++++++++++++++++++++++++++++++++++++-
 1 file changed, 51 insertions(+), 1 deletion(-)

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index c29184bf9416..2bd9e757167d 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -2209,6 +2209,28 @@ ieee80211_he_ppe_size(u8 ppe_thres_hdr, const u8 *phy_cap_info)
 #define IEEE80211_HE_OPERATION_PARTIAL_BSS_COLOR		0x40000000
 #define IEEE80211_HE_OPERATION_BSS_COLOR_DISABLED		0x80000000
 
+/**
+ * ieee80211_he_6ghz_oper - HE 6 GHz operation Information field
+ * @primary: primary channel
+ * @control: control flags
+ * @ccfs0: channel center frequency segment 0
+ * @ccfs1: channel center frequency segment 1
+ * @minrate: minimum rate (in 1 Mbps units)
+ */
+struct ieee80211_he_6ghz_oper {
+	u8 primary;
+#define IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH	0x3
+#define		IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_20MHZ	0
+#define		IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_40MHZ	1
+#define		IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_80MHZ	2
+#define		IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_160MHZ	3
+#define IEEE80211_HE_6GHZ_OPER_CTRL_DUP_BEACON	0x4
+	u8 control;
+	u8 ccfs0;
+	u8 ccfs1;
+	u8 minrate;
+} __packed;
+
 /*
  * ieee80211_he_oper_size - calculate 802.11ax HE Operations IE size
  * @he_oper_ie: byte data of the He Operations IE, stating from the byte
@@ -2235,7 +2257,7 @@ ieee80211_he_oper_size(const u8 *he_oper_ie)
 	if (he_oper_params & IEEE80211_HE_OPERATION_CO_HOSTED_BSS)
 		oper_len++;
 	if (he_oper_params & IEEE80211_HE_OPERATION_6GHZ_OP_INFO)
-		oper_len += 4;
+		oper_len += sizeof(struct ieee80211_he_6ghz_oper);
 
 	/* Add the first byte (extension ID) to the total length */
 	oper_len++;
@@ -2243,6 +2265,34 @@ ieee80211_he_oper_size(const u8 *he_oper_ie)
 	return oper_len;
 }
 
+/**
+ * ieee80211_he_6ghz_oper - obtain 6 GHz operation field
+ * @he_oper: HE operation element (must be pre-validated for size)
+ *	but may be %NULL
+ *
+ * Return: a pointer to the 6 GHz operation field, or %NULL
+ */
+static inline const struct ieee80211_he_6ghz_oper *
+ieee80211_he_6ghz_oper(const struct ieee80211_he_operation *he_oper)
+{
+	const u8 *ret = (void *)&he_oper->optional;
+	u32 he_oper_params;
+
+	if (!he_oper)
+		return NULL;
+
+	he_oper_params = le32_to_cpu(he_oper->he_oper_params);
+
+	if (!(he_oper_params & IEEE80211_HE_OPERATION_6GHZ_OP_INFO))
+		return NULL;
+	if (he_oper_params & IEEE80211_HE_OPERATION_VHT_OPER_INFO)
+		ret += 3;
+	if (he_oper_params & IEEE80211_HE_OPERATION_CO_HOSTED_BSS)
+		ret++;
+
+	return (void *)ret;
+}
+
 /* HE Spatial Reuse defines */
 #define IEEE80211_HE_SPR_NON_SRG_OFFSET_PRESENT			0x4
 #define IEEE80211_HE_SPR_SRG_INFORMATION_PRESENT		0x8
-- 
cgit v1.2.3-59-g8ed1b


From 8b30808d9be4183fab17f0b0e68eea88c94ff15a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 28 May 2020 21:34:28 +0200
Subject: ieee80211: add HE ext EIDs and 6 GHz capability defines

Add the HE extended element IDs and the definitions for the
HE 6 GHz band capabilities element, from Draft 5.0.

Link: https://lore.kernel.org/r/20200528213443.1a6689fe093f.Ifdc5400fb01779351354daf38663ebeea03c9ad9@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h | 28 ++++++++++++++++++++++++++++
 1 file changed, 28 insertions(+)

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 2bd9e757167d..9580dfd9e2d1 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -2839,9 +2839,19 @@ enum ieee80211_eid_ext {
 	WLAN_EID_EXT_UORA = 37,
 	WLAN_EID_EXT_HE_MU_EDCA = 38,
 	WLAN_EID_EXT_HE_SPR = 39,
+	WLAN_EID_EXT_NDP_FEEDBACK_REPORT_PARAMSET = 41,
+	WLAN_EID_EXT_BSS_COLOR_CHG_ANN = 42,
+	WLAN_EID_EXT_QUIET_TIME_PERIOD_SETUP = 43,
+	WLAN_EID_EXT_ESS_REPORT = 45,
+	WLAN_EID_EXT_OPS = 46,
+	WLAN_EID_EXT_HE_BSS_LOAD = 47,
 	WLAN_EID_EXT_MAX_CHANNEL_SWITCH_TIME = 52,
 	WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION = 55,
 	WLAN_EID_EXT_NON_INHERITANCE = 56,
+	WLAN_EID_EXT_KNOWN_BSSID = 57,
+	WLAN_EID_EXT_SHORT_SSID_LIST = 58,
+	WLAN_EID_EXT_HE_6GHZ_CAPA = 59,
+	WLAN_EID_EXT_UL_MU_POWER_CAPA = 60,
 };
 
 /* Action category code */
@@ -3384,6 +3394,24 @@ struct ieee80211_tspec_ie {
 	__le16 medium_time;
 } __packed;
 
+struct ieee80211_he_6ghz_capa {
+	/* uses IEEE80211_HE_6GHZ_CAP_* below */
+	__le16 capa;
+} __packed;
+
+/* HE 6 GHz band capabilities */
+/* uses enum ieee80211_min_mpdu_spacing values */
+#define IEEE80211_HE_6GHZ_CAP_MIN_MPDU_START	0x0007
+/* uses enum ieee80211_vht_max_ampdu_length_exp values */
+#define IEEE80211_HE_6GHZ_CAP_MAX_AMPDU_LEN_EXP	0x0038
+/* uses IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_* values */
+#define IEEE80211_HE_6GHZ_CAP_MAX_MPDU_LEN	0x00c0
+/* WLAN_HT_CAP_SM_PS_* values */
+#define IEEE80211_HE_6GHZ_CAP_SM_PS		0x0600
+#define IEEE80211_HE_6GHZ_CAP_RD_RESPONDER	0x0800
+#define IEEE80211_HE_6GHZ_CAP_RX_ANTPAT_CONS	0x1000
+#define IEEE80211_HE_6GHZ_CAP_TX_ANTPAT_CONS	0x2000
+
 /**
  * ieee80211_get_qos_ctl - get pointer to qos control bytes
  * @hdr: the frame
-- 
cgit v1.2.3-59-g8ed1b


From 43e64bf301fd8c54f0082d91c6ffd4de861baf96 Mon Sep 17 00:00:00 2001
From: Rajkumar Manoharan <rmanohar@codeaurora.org>
Date: Thu, 28 May 2020 21:34:29 +0200
Subject: cfg80211: handle 6 GHz capability of new station

Handle 6 GHz HE capability while adding new station. It will be used
later in mac80211 station processing.

Signed-off-by: Rajkumar Manoharan <rmanohar@codeaurora.org>
Link: https://lore.kernel.org/r/1589399105-25472-2-git-send-email-rmanohar@codeaurora.org
[handle nl80211_set_station, require WME,
 remove NL80211_HE_6GHZ_CAPABILITY_LEN]
Link: https://lore.kernel.org/r/20200528213443.b6b711fd4312.Ic9b97d57b6c4f2b28d4b2d23d2849d8bc20bd8cc@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       |  2 ++
 include/uapi/linux/nl80211.h |  5 +++++
 net/wireless/nl80211.c       | 18 +++++++++++++++++-
 3 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index a38653358885..da734ea71b5a 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -1238,6 +1238,7 @@ struct sta_txpwr {
  * @he_capa_len: the length of the HE capabilities
  * @airtime_weight: airtime scheduler weight for this station
  * @txpwr: transmit power for an associated station
+ * @he_6ghz_capa: HE 6 GHz Band capabilities of station
  */
 struct station_parameters {
 	const u8 *supported_rates;
@@ -1270,6 +1271,7 @@ struct station_parameters {
 	u8 he_capa_len;
 	u16 airtime_weight;
 	struct sta_txpwr txpwr;
+	const struct ieee80211_he_6ghz_capa *he_6ghz_capa;
 };
 
 /**
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index c14666b75e57..e42ae429383e 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -2502,6 +2502,9 @@ enum nl80211_commands {
  *	first channel segment specified in %NL80211_ATTR_CENTER_FREQ1.
  * @NL80211_ATTR_SCAN_FREQ_KHZ: nested attribute with KHz frequencies
  *
+ * @NL80211_ATTR_HE_6GHZ_CAPABILITY: HE 6 GHz Band Capability element (from
+ *	association request when used with NL80211_CMD_NEW_STATION).
+ *
  * @NUM_NL80211_ATTR: total number of nl80211_attrs available
  * @NL80211_ATTR_MAX: highest attribute number currently defined
  * @__NL80211_ATTR_AFTER_LAST: internal use
@@ -2982,6 +2985,8 @@ enum nl80211_attrs {
 	NL80211_ATTR_CENTER_FREQ1_OFFSET,
 	NL80211_ATTR_SCAN_FREQ_KHZ,
 
+	NL80211_ATTR_HE_6GHZ_CAPABILITY,
+
 	/* add attributes here, update the policy in nl80211.c */
 
 	__NL80211_ATTR_AFTER_LAST,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 22c4d13e28cb..bf8bd8268cb7 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -654,6 +654,10 @@ static const struct nla_policy nl80211_policy[NUM_NL80211_ATTR] = {
 	[NL80211_ATTR_RECEIVE_MULTICAST] = { .type = NLA_FLAG },
 	[NL80211_ATTR_WIPHY_FREQ_OFFSET] = NLA_POLICY_RANGE(NLA_U32, 0, 999),
 	[NL80211_ATTR_SCAN_FREQ_KHZ] = { .type = NLA_NESTED },
+	[NL80211_ATTR_HE_6GHZ_CAPABILITY] = {
+		.type = NLA_EXACT_LEN,
+		.len = sizeof(struct ieee80211_he_6ghz_capa),
+	},
 };
 
 /* policy for the key attributes */
@@ -5989,6 +5993,10 @@ static int nl80211_set_station(struct sk_buff *skb, struct genl_info *info)
 			nla_get_u8(info->attrs[NL80211_ATTR_OPMODE_NOTIF]);
 	}
 
+	if (info->attrs[NL80211_ATTR_HE_6GHZ_CAPABILITY])
+		params.he_6ghz_capa =
+			nla_data(info->attrs[NL80211_ATTR_HE_CAPABILITY]);
+
 	if (info->attrs[NL80211_ATTR_AIRTIME_WEIGHT])
 		params.airtime_weight =
 			nla_get_u16(info->attrs[NL80211_ATTR_AIRTIME_WEIGHT]);
@@ -6123,6 +6131,10 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
 			return -EINVAL;
 	}
 
+	if (info->attrs[NL80211_ATTR_HE_6GHZ_CAPABILITY])
+		params.he_6ghz_capa =
+			nla_data(info->attrs[NL80211_ATTR_HE_6GHZ_CAPABILITY]);
+
 	if (info->attrs[NL80211_ATTR_OPMODE_NOTIF]) {
 		params.opmode_notif_used = true;
 		params.opmode_notif =
@@ -6167,10 +6179,14 @@ static int nl80211_new_station(struct sk_buff *skb, struct genl_info *info)
 		params.vht_capa = NULL;
 
 		/* HE requires WME */
-		if (params.he_capa_len)
+		if (params.he_capa_len || params.he_6ghz_capa)
 			return -EINVAL;
 	}
 
+	/* Ensure that HT/VHT capabilities are not set for 6 GHz HE STA */
+	if (params.he_6ghz_capa && (params.ht_capa || params.vht_capa))
+		return -EINVAL;
+
 	/* When you run into this, adjust the code below for the new flag */
 	BUILD_BUG_ON(NL80211_STA_FLAG_MAX != 7);
 
-- 
cgit v1.2.3-59-g8ed1b


From a6cf28e05f0b3bda6ff0c58100324ac91aec6027 Mon Sep 17 00:00:00 2001
From: Rajkumar Manoharan <rmanohar@codeaurora.org>
Date: Thu, 28 May 2020 21:34:30 +0200
Subject: mac80211: add HE 6 GHz Band Capabilities into parse extension

Handle 6 GHz band capability element parsing for association.

Signed-off-by: Rajkumar Manoharan <rmanohar@codeaurora.org>
Link: https://lore.kernel.org/r/1589399105-25472-4-git-send-email-rmanohar@codeaurora.org
[some renaming to be in line with previous patches]
Link: https://lore.kernel.org/r/20200528213443.a13d7a0b85b0.Ia07584da4fc77aa77c4cc563248d2ce4234ffe5d@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/ieee80211_i.h | 1 +
 net/mac80211/util.c        | 4 ++++
 2 files changed, 5 insertions(+)

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index b7935f3d000d..dac016636d12 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1494,6 +1494,7 @@ struct ieee802_11_elems {
 	const struct ieee80211_he_operation *he_operation;
 	const struct ieee80211_he_spr *he_spr;
 	const struct ieee80211_mu_edca_param_set *mu_edca_param_set;
+	const struct ieee80211_he_6ghz_capa *he_6ghz_capa;
 	const u8 *uora_element;
 	const u8 *mesh_id;
 	const u8 *peering;
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 20436c86b9bf..5d2c5ae8aadb 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -936,6 +936,10 @@ static void ieee80211_parse_extension_element(u32 *crc,
 		    len >= ieee80211_he_spr_size(data))
 			elems->he_spr = data;
 		break;
+	case WLAN_EID_EXT_HE_6GHZ_CAPA:
+		if (len == sizeof(*elems->he_6ghz_capa))
+			elems->he_6ghz_capa = data;
+		break;
 	}
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 223952177296c34d9c8de9cde33204caffe55725 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 28 May 2020 21:34:31 +0200
Subject: cfg80211: add and expose HE 6 GHz band capabilities

These capabilities cover what would otherwise be transported
in HT/VHT capabilities, but only a subset thereof that is
actually needed on 6 GHz with HE already present. Expose the
capabilities to userspace, drivers are expected to set them
as using the 6 GHz band (currently) requires HE capability.

Link: https://lore.kernel.org/r/20200528213443.244cd5cb9db8.Icd8c773277a88c837e7e3af1d4d1013cc3b66543@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 3 +++
 include/uapi/linux/nl80211.h | 3 +++
 net/wireless/nl80211.c       | 9 ++++++++-
 3 files changed, 14 insertions(+), 1 deletion(-)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index da734ea71b5a..9b76be3d561a 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -354,10 +354,13 @@ struct ieee80211_sta_he_cap {
  *
  * @types_mask: interface types mask
  * @he_cap: holds the HE capabilities
+ * @he_6ghz_capa: HE 6 GHz capabilities, must be filled in for a
+ *	6 GHz band channel (and 0 may be valid value).
  */
 struct ieee80211_sband_iftype_data {
 	u16 types_mask;
 	struct ieee80211_sta_he_cap he_cap;
+	struct ieee80211_he_6ghz_capa he_6ghz_capa;
 };
 
 /**
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index e42ae429383e..5b350d032fa3 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -3565,6 +3565,8 @@ enum nl80211_mpath_info {
  *     defined in HE capabilities IE
  * @NL80211_BAND_IFTYPE_ATTR_MAX: highest band HE capability attribute currently
  *     defined
+ * @NL80211_BAND_IFTYPE_ATTR_HE_6GHZ_CAPA: HE 6GHz band capabilities (__le16),
+ *	given for all 6 GHz band channels
  * @__NL80211_BAND_IFTYPE_ATTR_AFTER_LAST: internal use
  */
 enum nl80211_band_iftype_attr {
@@ -3575,6 +3577,7 @@ enum nl80211_band_iftype_attr {
 	NL80211_BAND_IFTYPE_ATTR_HE_CAP_PHY,
 	NL80211_BAND_IFTYPE_ATTR_HE_CAP_MCS_SET,
 	NL80211_BAND_IFTYPE_ATTR_HE_CAP_PPE,
+	NL80211_BAND_IFTYPE_ATTR_HE_6GHZ_CAPA,
 
 	/* keep last */
 	__NL80211_BAND_IFTYPE_ATTR_AFTER_LAST,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index bf8bd8268cb7..3a24e6add13e 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -1562,6 +1562,7 @@ static int nl80211_send_coalesce(struct sk_buff *msg,
 
 static int
 nl80211_send_iftype_data(struct sk_buff *msg,
+			 const struct ieee80211_supported_band *sband,
 			 const struct ieee80211_sband_iftype_data *iftdata)
 {
 	const struct ieee80211_sta_he_cap *he_cap = &iftdata->he_cap;
@@ -1585,6 +1586,12 @@ nl80211_send_iftype_data(struct sk_buff *msg,
 			return -ENOBUFS;
 	}
 
+	if (sband->band == NL80211_BAND_6GHZ &&
+	    nla_put(msg, NL80211_BAND_IFTYPE_ATTR_HE_6GHZ_CAPA,
+		    sizeof(iftdata->he_6ghz_capa),
+		    &iftdata->he_6ghz_capa))
+		return -ENOBUFS;
+
 	return 0;
 }
 
@@ -1633,7 +1640,7 @@ static int nl80211_send_band_rateinfo(struct sk_buff *msg,
 			if (!iftdata)
 				return -ENOBUFS;
 
-			err = nl80211_send_iftype_data(msg,
+			err = nl80211_send_iftype_data(msg, sband,
 						       &sband->iftype_data[i]);
 			if (err)
 				return err;
-- 
cgit v1.2.3-59-g8ed1b


From 24a2042cb22fdfc7feef0df9622f0d9d71b8ced1 Mon Sep 17 00:00:00 2001
From: Rajkumar Manoharan <rmanohar@codeaurora.org>
Date: Thu, 28 May 2020 21:34:32 +0200
Subject: mac80211: add HE 6 GHz Band Capability element

Construct HE 6 GHz band capability element (IEEE 802.11ax/D6.0,
9.4.2.261) for association request and mesh beacon. The 6 GHz
capability information is passed by driver through iftypes caps.

Signed-off-by: Rajkumar Manoharan <rmanohar@codeaurora.org>
Link: https://lore.kernel.org/r/1589399105-25472-7-git-send-email-rmanohar@codeaurora.org
[handle SMPS, adjust for previous patches, reserve SKB space properly,
 change to handle SKB directly]
Link: https://lore.kernel.org/r/20200528213443.643aa8101111.I3f9747c1147480f65445f13eda5c4a5ed4e86757@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/ieee80211_i.h |  2 ++
 net/mac80211/mesh.c        |  9 +++++++++
 net/mac80211/mesh.h        |  2 ++
 net/mac80211/mesh_plink.c  |  4 +++-
 net/mac80211/mlme.c        |  3 +++
 net/mac80211/util.c        | 46 ++++++++++++++++++++++++++++++++++++++++++++++
 6 files changed, 65 insertions(+), 1 deletion(-)

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index dac016636d12..344ea828e806 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -2177,6 +2177,8 @@ u8 ieee80211_ie_len_he_cap(struct ieee80211_sub_if_data *sdata, u8 iftype);
 u8 *ieee80211_ie_build_he_cap(u8 *pos,
 			      const struct ieee80211_sta_he_cap *he_cap,
 			      u8 *end);
+void ieee80211_ie_build_he_6ghz_cap(struct ieee80211_sub_if_data *sdata,
+				    struct sk_buff *skb);
 u8 *ieee80211_ie_build_he_oper(u8 *pos);
 int ieee80211_parse_bitrates(struct cfg80211_chan_def *chandef,
 			     const struct ieee80211_supported_band *sband,
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 5930d07b1e43..5e8d72bdbb98 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -587,6 +587,13 @@ int mesh_add_he_oper_ie(struct ieee80211_sub_if_data *sdata,
 	return 0;
 }
 
+int mesh_add_he_6ghz_cap_ie(struct ieee80211_sub_if_data *sdata,
+			    struct sk_buff *skb)
+{
+	ieee80211_ie_build_he_6ghz_cap(sdata, skb);
+	return 0;
+}
+
 static void ieee80211_mesh_path_timer(struct timer_list *t)
 {
 	struct ieee80211_sub_if_data *sdata =
@@ -766,6 +773,7 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh)
 		   2 + sizeof(struct ieee80211_vht_operation) +
 		   ie_len_he_cap +
 		   2 + 1 + sizeof(struct ieee80211_he_operation) +
+		   2 + 1 + sizeof(struct ieee80211_he_6ghz_capa) +
 		   ifmsh->ie_len;
 
 	bcn = kzalloc(sizeof(*bcn) + head_len + tail_len, GFP_KERNEL);
@@ -885,6 +893,7 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh)
 	    mesh_add_vht_oper_ie(sdata, skb) ||
 	    mesh_add_he_cap_ie(sdata, skb, ie_len_he_cap) ||
 	    mesh_add_he_oper_ie(sdata, skb) ||
+	    mesh_add_he_6ghz_cap_ie(sdata, skb) ||
 	    mesh_add_vendor_ies(sdata, skb))
 		goto out_free;
 
diff --git a/net/mac80211/mesh.h b/net/mac80211/mesh.h
index 953f720754e8..40492d1bd8fd 100644
--- a/net/mac80211/mesh.h
+++ b/net/mac80211/mesh.h
@@ -222,6 +222,8 @@ int mesh_add_he_cap_ie(struct ieee80211_sub_if_data *sdata,
 		       struct sk_buff *skb, u8 ie_len);
 int mesh_add_he_oper_ie(struct ieee80211_sub_if_data *sdata,
 			struct sk_buff *skb);
+int mesh_add_he_6ghz_cap_ie(struct ieee80211_sub_if_data *sdata,
+			    struct sk_buff *skb);
 void mesh_rmc_free(struct ieee80211_sub_if_data *sdata);
 int mesh_rmc_init(struct ieee80211_sub_if_data *sdata);
 void ieee80211s_init(void);
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 737c5f4dbf52..3aca89c97f36 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -238,6 +238,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
 			    2 + sizeof(struct ieee80211_vht_operation) +
 			    ie_len_he_cap +
 			    2 + 1 + sizeof(struct ieee80211_he_operation) +
+			    2 + 1 + sizeof(struct ieee80211_he_6ghz_capa) +
 			    2 + 8 + /* peering IE */
 			    sdata->u.mesh.ie_len);
 	if (!skb)
@@ -328,7 +329,8 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
 		    mesh_add_vht_cap_ie(sdata, skb) ||
 		    mesh_add_vht_oper_ie(sdata, skb) ||
 		    mesh_add_he_cap_ie(sdata, skb, ie_len_he_cap) ||
-		    mesh_add_he_oper_ie(sdata, skb))
+		    mesh_add_he_oper_ie(sdata, skb) ||
+		    mesh_add_he_6ghz_cap_ie(sdata, skb))
 			goto free;
 	}
 
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index a259b4487b60..f6ddce646f18 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -658,6 +658,8 @@ static void ieee80211_add_he_ie(struct ieee80211_sub_if_data *sdata,
 				      he_cap->he_cap_elem.phy_cap_info);
 	pos = skb_put(skb, he_cap_size);
 	ieee80211_ie_build_he_cap(pos, he_cap, pos + he_cap_size);
+
+	ieee80211_ie_build_he_6ghz_cap(sdata, skb);
 }
 
 static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
@@ -731,6 +733,7 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
 			2 + 1 + sizeof(struct ieee80211_he_cap_elem) + /* HE */
 				sizeof(struct ieee80211_he_mcs_nss_supp) +
 				IEEE80211_HE_PPE_THRES_MAX_LEN +
+			2 + 1 + sizeof(struct ieee80211_he_6ghz_capa) +
 			assoc_data->ie_len + /* extra IEs */
 			(assoc_data->fils_kek_len ? 16 /* AES-SIV */ : 0) +
 			9, /* WMM */
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 5d2c5ae8aadb..048b38546a56 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -2839,6 +2839,52 @@ end:
 	return pos;
 }
 
+void ieee80211_ie_build_he_6ghz_cap(struct ieee80211_sub_if_data *sdata,
+				    struct sk_buff *skb)
+{
+	struct ieee80211_supported_band *sband;
+	const struct ieee80211_sband_iftype_data *iftd;
+	enum nl80211_iftype iftype = ieee80211_vif_type_p2p(&sdata->vif);
+	u8 *pos;
+	u16 cap;
+
+	sband = ieee80211_get_sband(sdata);
+	if (!sband)
+		return;
+
+	iftd = ieee80211_get_sband_iftype_data(sband, iftype);
+	if (WARN_ON(!iftd))
+		return;
+
+	cap = le16_to_cpu(iftd->he_6ghz_capa.capa);
+	cap &= ~IEEE80211_HE_6GHZ_CAP_SM_PS;
+
+	switch (sdata->smps_mode) {
+	case IEEE80211_SMPS_AUTOMATIC:
+	case IEEE80211_SMPS_NUM_MODES:
+		WARN_ON(1);
+		/* fall through */
+	case IEEE80211_SMPS_OFF:
+		cap |= u16_encode_bits(WLAN_HT_CAP_SM_PS_DISABLED,
+				       IEEE80211_HE_6GHZ_CAP_SM_PS);
+		break;
+	case IEEE80211_SMPS_STATIC:
+		cap |= u16_encode_bits(WLAN_HT_CAP_SM_PS_STATIC,
+				       IEEE80211_HE_6GHZ_CAP_SM_PS);
+		break;
+	case IEEE80211_SMPS_DYNAMIC:
+		cap |= u16_encode_bits(WLAN_HT_CAP_SM_PS_DYNAMIC,
+				       IEEE80211_HE_6GHZ_CAP_SM_PS);
+		break;
+	}
+
+	pos = skb_put(skb, 2 + 1 + sizeof(cap));
+	*pos++ = WLAN_EID_EXTENSION;
+	*pos++ = 1 + sizeof(cap);
+	*pos++ = WLAN_EID_EXT_HE_6GHZ_CAPA;
+	put_unaligned_le16(cap, pos);
+}
+
 u8 *ieee80211_ie_build_ht_oper(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap,
 			       const struct cfg80211_chan_def *chandef,
 			       u16 prot_mode, bool rifs_mode)
-- 
cgit v1.2.3-59-g8ed1b


From d1b7524b3ea140e552658485d4e9dce5ee2953e1 Mon Sep 17 00:00:00 2001
From: Rajkumar Manoharan <rmanohar@codeaurora.org>
Date: Thu, 28 May 2020 21:34:33 +0200
Subject: mac80211: build HE operation with 6 GHz oper information

Add 6 GHz operation information (IEEE 802.11ax/D6.0, Figure 9-787k)
while building HE operation element for non-HE AP. This field is used to
determine channel information in the absence of HT/VHT IEs.

Signed-off-by: Rajkumar Manoharan <rmanohar@codeaurora.org>
Link: https://lore.kernel.org/r/1589399105-25472-8-git-send-email-rmanohar@codeaurora.org
[fix skb allocation size]
Link: https://lore.kernel.org/r/20200528193455.76796-1-johannes@sipsolutions.net
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/ieee80211_i.h |  2 +-
 net/mac80211/mesh.c        | 12 ++++++---
 net/mac80211/mesh_plink.c  |  1 +
 net/mac80211/util.c        | 65 +++++++++++++++++++++++++++++++++++++++++++---
 4 files changed, 72 insertions(+), 8 deletions(-)

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 344ea828e806..9f874ce500f6 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -2179,7 +2179,7 @@ u8 *ieee80211_ie_build_he_cap(u8 *pos,
 			      u8 *end);
 void ieee80211_ie_build_he_6ghz_cap(struct ieee80211_sub_if_data *sdata,
 				    struct sk_buff *skb);
-u8 *ieee80211_ie_build_he_oper(u8 *pos);
+u8 *ieee80211_ie_build_he_oper(u8 *pos, struct cfg80211_chan_def *chandef);
 int ieee80211_parse_bitrates(struct cfg80211_chan_def *chandef,
 			     const struct ieee80211_supported_band *sband,
 			     const u8 *srates, int srates_len, u32 *rates);
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 5e8d72bdbb98..5f3d45474db6 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -565,6 +565,7 @@ int mesh_add_he_oper_ie(struct ieee80211_sub_if_data *sdata,
 {
 	const struct ieee80211_sta_he_cap *he_cap;
 	struct ieee80211_supported_band *sband;
+	u32 len;
 	u8 *pos;
 
 	sband = ieee80211_get_sband(sdata);
@@ -578,11 +579,15 @@ int mesh_add_he_oper_ie(struct ieee80211_sub_if_data *sdata,
 	    sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_10)
 		return 0;
 
-	if (skb_tailroom(skb) < 2 + 1 + sizeof(struct ieee80211_he_operation))
+	len = 2 + 1 + sizeof(struct ieee80211_he_operation);
+	if (sdata->vif.bss_conf.chandef.chan->band == NL80211_BAND_6GHZ)
+		len += sizeof(struct ieee80211_he_6ghz_oper);
+
+	if (skb_tailroom(skb) < len)
 		return -ENOMEM;
 
-	pos = skb_put(skb, 2 + 1 + sizeof(struct ieee80211_he_operation));
-	ieee80211_ie_build_he_oper(pos);
+	pos = skb_put(skb, len);
+	ieee80211_ie_build_he_oper(pos, &sdata->vif.bss_conf.chandef);
 
 	return 0;
 }
@@ -773,6 +778,7 @@ ieee80211_mesh_build_beacon(struct ieee80211_if_mesh *ifmsh)
 		   2 + sizeof(struct ieee80211_vht_operation) +
 		   ie_len_he_cap +
 		   2 + 1 + sizeof(struct ieee80211_he_operation) +
+			   sizeof(struct ieee80211_he_6ghz_oper) +
 		   2 + 1 + sizeof(struct ieee80211_he_6ghz_capa) +
 		   ifmsh->ie_len;
 
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index 3aca89c97f36..fbbfc5d4a51c 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -238,6 +238,7 @@ static int mesh_plink_frame_tx(struct ieee80211_sub_if_data *sdata,
 			    2 + sizeof(struct ieee80211_vht_operation) +
 			    ie_len_he_cap +
 			    2 + 1 + sizeof(struct ieee80211_he_operation) +
+				    sizeof(struct ieee80211_he_6ghz_oper) +
 			    2 + 1 + sizeof(struct ieee80211_he_6ghz_capa) +
 			    2 + 8 + /* peering IE */
 			    sdata->u.mesh.ie_len);
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 048b38546a56..87dd003dbdf2 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -3008,13 +3008,18 @@ u8 *ieee80211_ie_build_vht_oper(u8 *pos, struct ieee80211_sta_vht_cap *vht_cap,
 	return pos + sizeof(struct ieee80211_vht_operation);
 }
 
-u8 *ieee80211_ie_build_he_oper(u8 *pos)
+u8 *ieee80211_ie_build_he_oper(u8 *pos, struct cfg80211_chan_def *chandef)
 {
 	struct ieee80211_he_operation *he_oper;
+	struct ieee80211_he_6ghz_oper *he_6ghz_op;
 	u32 he_oper_params;
+	u8 ie_len = 1 + sizeof(struct ieee80211_he_operation);
+
+	if (chandef->chan->band == NL80211_BAND_6GHZ)
+		ie_len += sizeof(struct ieee80211_he_6ghz_oper);
 
 	*pos++ = WLAN_EID_EXTENSION;
-	*pos++ = 1 + sizeof(struct ieee80211_he_operation);
+	*pos++ = ie_len;
 	*pos++ = WLAN_EID_EXT_HE_OPERATION;
 
 	he_oper_params = 0;
@@ -3024,16 +3029,68 @@ u8 *ieee80211_ie_build_he_oper(u8 *pos)
 				IEEE80211_HE_OPERATION_ER_SU_DISABLE);
 	he_oper_params |= u32_encode_bits(1,
 				IEEE80211_HE_OPERATION_BSS_COLOR_DISABLED);
+	if (chandef->chan->band == NL80211_BAND_6GHZ)
+		he_oper_params |= u32_encode_bits(1,
+				IEEE80211_HE_OPERATION_6GHZ_OP_INFO);
 
 	he_oper = (struct ieee80211_he_operation *)pos;
 	he_oper->he_oper_params = cpu_to_le32(he_oper_params);
 
 	/* don't require special HE peer rates */
 	he_oper->he_mcs_nss_set = cpu_to_le16(0xffff);
+	pos += sizeof(struct ieee80211_he_operation);
 
-	/* TODO add VHT operational and 6GHz operational subelement? */
+	if (chandef->chan->band != NL80211_BAND_6GHZ)
+		goto out;
 
-	return pos + sizeof(struct ieee80211_vht_operation);
+	/* TODO add VHT operational */
+	he_6ghz_op = (struct ieee80211_he_6ghz_oper *)pos;
+	he_6ghz_op->minrate = 6; /* 6 Mbps */
+	he_6ghz_op->primary =
+		ieee80211_frequency_to_channel(chandef->chan->center_freq);
+	he_6ghz_op->ccfs0 =
+		ieee80211_frequency_to_channel(chandef->center_freq1);
+	if (chandef->center_freq2)
+		he_6ghz_op->ccfs1 =
+			ieee80211_frequency_to_channel(chandef->center_freq2);
+	else
+		he_6ghz_op->ccfs1 = 0;
+
+	switch (chandef->width) {
+	case NL80211_CHAN_WIDTH_160:
+		/* Convert 160 MHz channel width to new style as interop
+		 * workaround.
+		 */
+		he_6ghz_op->control =
+			IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_160MHZ;
+		he_6ghz_op->ccfs1 = he_6ghz_op->ccfs0;
+		if (chandef->chan->center_freq < chandef->center_freq1)
+			he_6ghz_op->ccfs0 -= 8;
+		else
+			he_6ghz_op->ccfs0 += 8;
+		fallthrough;
+	case NL80211_CHAN_WIDTH_80P80:
+		he_6ghz_op->control =
+			IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_160MHZ;
+		break;
+	case NL80211_CHAN_WIDTH_80:
+		he_6ghz_op->control =
+			IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_80MHZ;
+		break;
+	case NL80211_CHAN_WIDTH_40:
+		he_6ghz_op->control =
+			IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_40MHZ;
+		break;
+	default:
+		he_6ghz_op->control =
+			IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_20MHZ;
+		break;
+	}
+
+	pos += sizeof(struct ieee80211_he_6ghz_oper);
+
+out:
+	return pos;
 }
 
 bool ieee80211_chandef_ht_oper(const struct ieee80211_ht_operation *ht_oper,
-- 
cgit v1.2.3-59-g8ed1b


From 607ca9ea3462719e256b60b24286f984e0d48c9b Mon Sep 17 00:00:00 2001
From: Rajkumar Manoharan <rmanohar@codeaurora.org>
Date: Thu, 28 May 2020 21:34:34 +0200
Subject: mac80211: do not allow HT/VHT IEs in 6 GHz mesh mode

As HT/VHT elements are not allowed in 6 GHz band, do not include
them in mesh beacon template formation.

Signed-off-by: Rajkumar Manoharan <rmanohar@codeaurora.org>
Link: https://lore.kernel.org/r/1589399105-25472-9-git-send-email-rmanohar@codeaurora.org
Link: https://lore.kernel.org/r/20200528193455.76796-2-johannes@sipsolutions.net
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mesh.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 5f3d45474db6..79e0a90982dd 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -415,6 +415,10 @@ int mesh_add_ht_cap_ie(struct ieee80211_sub_if_data *sdata,
 	if (!sband)
 		return -EINVAL;
 
+	/* HT not allowed in 6 GHz */
+	if (sband->band == NL80211_BAND_6GHZ)
+		return 0;
+
 	if (!sband->ht_cap.ht_supported ||
 	    sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
 	    sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 ||
@@ -452,6 +456,10 @@ int mesh_add_ht_oper_ie(struct ieee80211_sub_if_data *sdata,
 	sband = local->hw.wiphy->bands[channel->band];
 	ht_cap = &sband->ht_cap;
 
+	/* HT not allowed in 6 GHz */
+	if (sband->band == NL80211_BAND_6GHZ)
+		return 0;
+
 	if (!ht_cap->ht_supported ||
 	    sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
 	    sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 ||
@@ -479,6 +487,10 @@ int mesh_add_vht_cap_ie(struct ieee80211_sub_if_data *sdata,
 	if (!sband)
 		return -EINVAL;
 
+	/* VHT not allowed in 6 GHz */
+	if (sband->band == NL80211_BAND_6GHZ)
+		return 0;
+
 	if (!sband->vht_cap.vht_supported ||
 	    sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
 	    sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 ||
@@ -516,6 +528,10 @@ int mesh_add_vht_oper_ie(struct ieee80211_sub_if_data *sdata,
 	sband = local->hw.wiphy->bands[channel->band];
 	vht_cap = &sband->vht_cap;
 
+	/* VHT not allowed in 6 GHz */
+	if (sband->band == NL80211_BAND_6GHZ)
+		return 0;
+
 	if (!vht_cap->vht_supported ||
 	    sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_20_NOHT ||
 	    sdata->vif.bss_conf.chandef.width == NL80211_CHAN_WIDTH_5 ||
-- 
cgit v1.2.3-59-g8ed1b


From 2a333a0db24e37daa2e4eb9a542c07deda44ca5a Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 28 May 2020 21:34:35 +0200
Subject: mac80211: avoid using ext NSS high BW if not supported

If the AP advertises inconsistent data, namely it has CCFS1 or CCFS2,
but doesn't advertise support for 160/80+80 bandwidth or "Extended NSS
BW Support", then we cannot use any MCSes in the the higher bandwidth.
Thus, avoid connecting with higher bandwidth since it's less efficient
that way.

Link: https://lore.kernel.org/r/20200528213443.0e55d40c3ccc.I6fd0b4708ebd087e5e46466c3e91f6efbcbef668@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/ibss.c        | 11 +++++++++--
 net/mac80211/ieee80211_i.h |  6 +++++-
 net/mac80211/mesh.c        | 16 ++++++++++++---
 net/mac80211/mlme.c        | 25 +++++++++++++++++------
 net/mac80211/scan.c        |  6 ++++++
 net/mac80211/spectmgmt.c   |  4 +++-
 net/mac80211/util.c        | 49 ++++++++++++++++++++++++++++++++++++++++++----
 7 files changed, 100 insertions(+), 17 deletions(-)

diff --git a/net/mac80211/ibss.c b/net/mac80211/ibss.c
index 2479cd48fed0..81d26fef41e9 100644
--- a/net/mac80211/ibss.c
+++ b/net/mac80211/ibss.c
@@ -9,7 +9,7 @@
  * Copyright 2009, Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright(c) 2016 Intel Deutschland GmbH
- * Copyright(c) 2018-2019 Intel Corporation
+ * Copyright(c) 2018-2020 Intel Corporation
  */
 
 #include <linux/delay.h>
@@ -781,6 +781,7 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata,
 	enum nl80211_channel_type ch_type;
 	int err;
 	u32 sta_flags;
+	u32 vht_cap_info = 0;
 
 	sdata_assert_lock(sdata);
 
@@ -798,9 +799,13 @@ ieee80211_ibss_process_chanswitch(struct ieee80211_sub_if_data *sdata,
 		break;
 	}
 
+	if (elems->vht_cap_elem)
+		vht_cap_info = le32_to_cpu(elems->vht_cap_elem->vht_cap_info);
+
 	memset(&params, 0, sizeof(params));
 	err = ieee80211_parse_ch_switch_ie(sdata, elems,
 					   ifibss->chandef.chan->band,
+					   vht_cap_info,
 					   sta_flags, ifibss->bssid, &csa_ie);
 	/* can't switch to destination channel, fail */
 	if (err < 0)
@@ -1060,8 +1065,10 @@ static void ieee80211_update_sta_info(struct ieee80211_sub_if_data *sdata,
 			/* we both use VHT */
 			struct ieee80211_vht_cap cap_ie;
 			struct ieee80211_sta_vht_cap cap = sta->sta.vht_cap;
+			u32 vht_cap_info =
+				le32_to_cpu(elems->vht_cap_elem->vht_cap_info);
 
-			ieee80211_chandef_vht_oper(&local->hw,
+			ieee80211_chandef_vht_oper(&local->hw, vht_cap_info,
 						   elems->vht_operation,
 						   elems->ht_operation,
 						   &chandef);
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 9f874ce500f6..0cc584574976 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -111,6 +111,8 @@ struct ieee80211_bss {
 	size_t supp_rates_len;
 	struct ieee80211_rate *beacon_rate;
 
+	u32 vht_cap_info;
+
 	/*
 	 * During association, we save an ERP value from a probe response so
 	 * that we can feed ERP info to the driver when handling the
@@ -1915,6 +1917,7 @@ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
  * @sdata: the sdata of the interface which has received the frame
  * @elems: parsed 802.11 elements received with the frame
  * @current_band: indicates the current band
+ * @vht_cap_info: VHT capabilities of the transmitter
  * @sta_flags: contains information about own capabilities and restrictions
  *	to decide which channel switch announcements can be accepted. Only the
  *	following subset of &enum ieee80211_sta_flags are evaluated:
@@ -1929,6 +1932,7 @@ void ieee80211_process_measurement_req(struct ieee80211_sub_if_data *sdata,
 int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
 				 struct ieee802_11_elems *elems,
 				 enum nl80211_band current_band,
+				 u32 vht_cap_info,
 				 u32 sta_flags, u8 *bssid,
 				 struct ieee80211_csa_ie *csa_ie);
 
@@ -2194,7 +2198,7 @@ u8 *ieee80211_add_wmm_info_ie(u8 *buf, u8 qosinfo);
 /* channel management */
 bool ieee80211_chandef_ht_oper(const struct ieee80211_ht_operation *ht_oper,
 			       struct cfg80211_chan_def *chandef);
-bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw,
+bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw, u32 vht_cap_info,
 				const struct ieee80211_vht_operation *oper,
 				const struct ieee80211_ht_operation *htop,
 				struct cfg80211_chan_def *chandef);
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 79e0a90982dd..696d6fb322e6 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -1,7 +1,7 @@
 // SPDX-License-Identifier: GPL-2.0-only
 /*
  * Copyright (c) 2008, 2009 open80211s Ltd.
- * Copyright (C) 2018 - 2019 Intel Corporation
+ * Copyright (C) 2018 - 2020 Intel Corporation
  * Authors:    Luis Carlos Cobo <luisca@cozybit.com>
  * 	       Javier Cardona <javier@cozybit.com>
  */
@@ -63,6 +63,7 @@ bool mesh_matches_local(struct ieee80211_sub_if_data *sdata,
 	u32 basic_rates = 0;
 	struct cfg80211_chan_def sta_chan_def;
 	struct ieee80211_supported_band *sband;
+	u32 vht_cap_info = 0;
 
 	/*
 	 * As support for each feature is added, check for matching
@@ -96,7 +97,11 @@ bool mesh_matches_local(struct ieee80211_sub_if_data *sdata,
 	cfg80211_chandef_create(&sta_chan_def, sdata->vif.bss_conf.chandef.chan,
 				NL80211_CHAN_NO_HT);
 	ieee80211_chandef_ht_oper(ie->ht_operation, &sta_chan_def);
-	ieee80211_chandef_vht_oper(&sdata->local->hw,
+
+	if (ie->vht_cap_elem)
+		vht_cap_info = le32_to_cpu(ie->vht_cap_elem->vht_cap_info);
+
+	ieee80211_chandef_vht_oper(&sdata->local->hw, vht_cap_info,
 				   ie->vht_operation, ie->ht_operation,
 				   &sta_chan_def);
 
@@ -1076,7 +1081,7 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_if_mesh *ifmsh = &sdata->u.mesh;
 	struct ieee80211_supported_band *sband;
 	int err;
-	u32 sta_flags;
+	u32 sta_flags, vht_cap_info = 0;
 
 	sdata_assert_lock(sdata);
 
@@ -1099,8 +1104,13 @@ ieee80211_mesh_process_chnswitch(struct ieee80211_sub_if_data *sdata,
 		break;
 	}
 
+	if (elems->vht_cap_elem)
+		vht_cap_info =
+			le32_to_cpu(elems->vht_cap_elem->vht_cap_info);
+
 	memset(&params, 0, sizeof(params));
 	err = ieee80211_parse_ch_switch_ie(sdata, elems, sband->band,
+					   vht_cap_info,
 					   sta_flags, sdata->vif.addr,
 					   &csa_ie);
 	if (err < 0)
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index f6ddce646f18..1f9c48414c85 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -145,6 +145,7 @@ static u32
 ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
 			     struct ieee80211_supported_band *sband,
 			     struct ieee80211_channel *channel,
+			     u32 vht_cap_info,
 			     const struct ieee80211_ht_operation *ht_oper,
 			     const struct ieee80211_vht_operation *vht_oper,
 			     const struct ieee80211_he_operation *he_oper,
@@ -223,7 +224,7 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
 		memcpy(&he_oper_vht_cap, he_oper->optional, 3);
 		he_oper_vht_cap.basic_mcs_set = cpu_to_le16(0);
 
-		if (!ieee80211_chandef_vht_oper(&sdata->local->hw,
+		if (!ieee80211_chandef_vht_oper(&sdata->local->hw, vht_cap_info,
 						&he_oper_vht_cap, ht_oper,
 						&vht_chandef)) {
 			if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HE))
@@ -232,8 +233,10 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
 			ret = IEEE80211_STA_DISABLE_HE;
 			goto out;
 		}
-	} else if (!ieee80211_chandef_vht_oper(&sdata->local->hw, vht_oper,
-					       ht_oper, &vht_chandef)) {
+	} else if (!ieee80211_chandef_vht_oper(&sdata->local->hw,
+					       vht_cap_info,
+					       vht_oper, ht_oper,
+					       &vht_chandef)) {
 		if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT))
 			sdata_info(sdata,
 				   "AP VHT information is invalid, disable VHT\n");
@@ -329,6 +332,7 @@ out:
 static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata,
 			       struct sta_info *sta,
 			       const struct ieee80211_ht_cap *ht_cap,
+			       const struct ieee80211_vht_cap *vht_cap,
 			       const struct ieee80211_ht_operation *ht_oper,
 			       const struct ieee80211_vht_operation *vht_oper,
 			       const struct ieee80211_he_operation *he_oper,
@@ -343,6 +347,7 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata,
 	u16 ht_opmode;
 	u32 flags;
 	enum ieee80211_sta_rx_bandwidth new_sta_bw;
+	u32 vht_cap_info = 0;
 	int ret;
 
 	/* if HT was/is disabled, don't track any bandwidth changes */
@@ -371,8 +376,11 @@ static int ieee80211_config_bw(struct ieee80211_sub_if_data *sdata,
 		sdata->vif.bss_conf.ht_operation_mode = ht_opmode;
 	}
 
+	if (vht_cap)
+		vht_cap_info = le32_to_cpu(vht_cap->vht_cap_info);
+
 	/* calculate new channel (type) based on HT/VHT/HE operation IEs */
-	flags = ieee80211_determine_chantype(sdata, sband, chan,
+	flags = ieee80211_determine_chantype(sdata, sband, chan, vht_cap_info,
 					     ht_oper, vht_oper, he_oper,
 					     &chandef, true);
 
@@ -1327,6 +1335,7 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
 	enum nl80211_band current_band;
 	struct ieee80211_csa_ie csa_ie;
 	struct ieee80211_channel_switch ch_switch;
+	struct ieee80211_bss *bss;
 	int res;
 
 	sdata_assert_lock(sdata);
@@ -1338,7 +1347,9 @@ ieee80211_sta_process_chanswitch(struct ieee80211_sub_if_data *sdata,
 		return;
 
 	current_band = cbss->channel->band;
+	bss = (void *)cbss->priv;
 	res = ieee80211_parse_ch_switch_ie(sdata, elems, current_band,
+					   bss->vht_cap_info,
 					   ifmgd->flags,
 					   ifmgd->associated->bssid, &csa_ie);
 
@@ -4097,8 +4108,8 @@ static void ieee80211_rx_mgmt_beacon(struct ieee80211_sub_if_data *sdata,
 
 	changed |= ieee80211_recalc_twt_req(sdata, sta, &elems);
 
-	if (ieee80211_config_bw(sdata, sta,
-				elems.ht_cap_elem, elems.ht_operation,
+	if (ieee80211_config_bw(sdata, sta, elems.ht_cap_elem,
+				elems.vht_cap_elem, elems.ht_operation,
 				elems.vht_operation, elems.he_operation,
 				bssid, &changed)) {
 		mutex_unlock(&local->sta_mtx);
@@ -4815,6 +4826,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
 	const struct ieee80211_he_operation *he_oper = NULL;
 	struct ieee80211_supported_band *sband;
 	struct cfg80211_chan_def chandef;
+	struct ieee80211_bss *bss = (void *)cbss->priv;
 	int ret;
 	u32 i;
 	bool have_80mhz;
@@ -4913,6 +4925,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
 
 	ifmgd->flags |= ieee80211_determine_chantype(sdata, sband,
 						     cbss->channel,
+						     bss->vht_cap_info,
 						     ht_oper, vht_oper, he_oper,
 						     &chandef, false);
 
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index 5db15996524f..d0c2e8012118 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -132,6 +132,12 @@ ieee80211_update_bss_from_elems(struct ieee80211_local *local,
 			bss->beacon_rate =
 				&sband->bitrates[rx_status->rate_idx];
 	}
+
+	if (elems->vht_cap_elem)
+		bss->vht_cap_info =
+			le32_to_cpu(elems->vht_cap_elem->vht_cap_info);
+	else
+		bss->vht_cap_info = 0;
 }
 
 struct ieee80211_bss *
diff --git a/net/mac80211/spectmgmt.c b/net/mac80211/spectmgmt.c
index 5fe2b645912f..ae1cb2c68722 100644
--- a/net/mac80211/spectmgmt.c
+++ b/net/mac80211/spectmgmt.c
@@ -9,7 +9,7 @@
  * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
  * Copyright 2007-2008, Intel Corporation
  * Copyright 2008, Johannes Berg <johannes@sipsolutions.net>
- * Copyright (C) 2018        Intel Corporation
+ * Copyright (C) 2018, 2020 Intel Corporation
  */
 
 #include <linux/ieee80211.h>
@@ -22,6 +22,7 @@
 int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
 				 struct ieee802_11_elems *elems,
 				 enum nl80211_band current_band,
+				 u32 vht_cap_info,
 				 u32 sta_flags, u8 *bssid,
 				 struct ieee80211_csa_ie *csa_ie)
 {
@@ -150,6 +151,7 @@ int ieee80211_parse_ch_switch_ie(struct ieee80211_sub_if_data *sdata,
 
 		/* ignore if parsing fails */
 		if (!ieee80211_chandef_vht_oper(&sdata->local->hw,
+						vht_cap_info,
 						&vht_oper, &ht_oper,
 						&new_vht_chandef))
 			new_vht_chandef.chan = NULL;
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index 87dd003dbdf2..e6104829fa1c 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -3120,7 +3120,7 @@ bool ieee80211_chandef_ht_oper(const struct ieee80211_ht_operation *ht_oper,
 	return true;
 }
 
-bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw,
+bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw, u32 vht_cap_info,
 				const struct ieee80211_vht_operation *oper,
 				const struct ieee80211_ht_operation *htop,
 				struct cfg80211_chan_def *chandef)
@@ -3132,6 +3132,10 @@ bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw,
 	u32 vht_cap;
 	bool support_80_80 = false;
 	bool support_160 = false;
+	u8 ext_nss_bw_supp = u32_get_bits(vht_cap_info,
+					  IEEE80211_VHT_CAP_EXT_NSS_BW_MASK);
+	u8 supp_chwidth = u32_get_bits(vht_cap_info,
+				       IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK);
 
 	if (!oper || !htop)
 		return false;
@@ -3151,11 +3155,48 @@ bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw,
 				IEEE80211_HT_OP_MODE_CCFS2_MASK)
 			>> IEEE80211_HT_OP_MODE_CCFS2_SHIFT;
 
-	/* when parsing (and we know how to) CCFS1 and CCFS2 are equivalent */
 	ccf0 = ccfs0;
-	ccf1 = ccfs1;
-	if (!ccfs1 && ieee80211_hw_check(hw, SUPPORTS_VHT_EXT_NSS_BW))
+
+	/* if not supported, parse as though we didn't understand it */
+	if (!ieee80211_hw_check(hw, SUPPORTS_VHT_EXT_NSS_BW))
+		ext_nss_bw_supp = 0;
+
+	/*
+	 * Cf. IEEE 802.11 Table 9-250
+	 *
+	 * We really just consider that because it's inefficient to connect
+	 * at a higher bandwidth than we'll actually be able to use.
+	 */
+	switch ((supp_chwidth << 4) | ext_nss_bw_supp) {
+	default:
+	case 0x00:
+		ccf1 = 0;
+		support_160 = false;
+		support_80_80 = false;
+		break;
+	case 0x01:
+		support_80_80 = false;
+		/* fall through */
+	case 0x02:
+	case 0x03:
 		ccf1 = ccfs2;
+		break;
+	case 0x10:
+		ccf1 = ccfs1;
+		break;
+	case 0x11:
+	case 0x12:
+		if (!ccfs1)
+			ccf1 = ccfs2;
+		else
+			ccf1 = ccfs1;
+		break;
+	case 0x13:
+	case 0x20:
+	case 0x23:
+		ccf1 = ccfs1;
+		break;
+	}
 
 	cf0 = ieee80211_channel_to_frequency(ccf0, chandef->chan->band);
 	cf1 = ieee80211_channel_to_frequency(ccf1, chandef->chan->band);
-- 
cgit v1.2.3-59-g8ed1b


From 57fa5e85d53ce51e0cb06a7f320b79377d0fbe5f Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 28 May 2020 21:34:36 +0200
Subject: mac80211: determine chandef from HE 6 GHz operation

Support connecting to HE 6 GHz APs and mesh networks on 6 GHz,
where the HT/VHT information is missing but instead the HE 6 GHz
band capability is present, and the 6 GHz Operation information
field is used to encode the channel configuration instead of the
HT/VHT operation elements.

Also add some other bits needed to connect to 6 GHz networks.

Link: https://lore.kernel.org/r/1589399105-25472-10-git-send-email-rmanohar@codeaurora.org
Co-developed-by: Rajkumar Manoharan <rmanohar@codeaurora.org>
Signed-off-by: Rajkumar Manoharan <rmanohar@codeaurora.org>
Link: https://lore.kernel.org/r/20200528213443.25687d2695bc.I3f9747c1147480f65445f13eda5c4a5ed4e86757@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/ieee80211_i.h |   3 ++
 net/mac80211/mesh.c        |   1 +
 net/mac80211/mlme.c        |  69 +++++++++++++++++++++--------
 net/mac80211/util.c        | 106 +++++++++++++++++++++++++++++++++++++++++++++
 4 files changed, 160 insertions(+), 19 deletions(-)

diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 0cc584574976..6cac5bf7cba3 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -2202,6 +2202,9 @@ bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw, u32 vht_cap_info,
 				const struct ieee80211_vht_operation *oper,
 				const struct ieee80211_ht_operation *htop,
 				struct cfg80211_chan_def *chandef);
+bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata,
+				    const struct ieee80211_he_operation *he_oper,
+				    struct cfg80211_chan_def *chandef);
 u32 ieee80211_chandef_downgrade(struct cfg80211_chan_def *c);
 
 int __must_check
diff --git a/net/mac80211/mesh.c b/net/mac80211/mesh.c
index 696d6fb322e6..5f1ca25b6c97 100644
--- a/net/mac80211/mesh.c
+++ b/net/mac80211/mesh.c
@@ -104,6 +104,7 @@ bool mesh_matches_local(struct ieee80211_sub_if_data *sdata,
 	ieee80211_chandef_vht_oper(&sdata->local->hw, vht_cap_info,
 				   ie->vht_operation, ie->ht_operation,
 				   &sta_chan_def);
+	ieee80211_chandef_he_6ghz_oper(sdata, ie->he_operation, &sta_chan_def);
 
 	if (!cfg80211_chandef_compatible(&sdata->vif.bss_conf.chandef,
 					 &sta_chan_def))
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 1f9c48414c85..bc558d1d20fc 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -156,15 +156,24 @@ ieee80211_determine_chantype(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_sta_ht_cap sta_ht_cap;
 	u32 ht_cfreq, ret;
 
-	memcpy(&sta_ht_cap, &sband->ht_cap, sizeof(sta_ht_cap));
-	ieee80211_apply_htcap_overrides(sdata, &sta_ht_cap);
-
 	memset(chandef, 0, sizeof(struct cfg80211_chan_def));
 	chandef->chan = channel;
 	chandef->width = NL80211_CHAN_WIDTH_20_NOHT;
 	chandef->center_freq1 = channel->center_freq;
 	chandef->freq1_offset = channel->freq_offset;
 
+	if (channel->band == NL80211_BAND_6GHZ) {
+		if (!ieee80211_chandef_he_6ghz_oper(sdata, he_oper, chandef))
+			ret = IEEE80211_STA_DISABLE_HT |
+			      IEEE80211_STA_DISABLE_VHT |
+			      IEEE80211_STA_DISABLE_HE;
+		vht_chandef = *chandef;
+		goto out;
+	}
+
+	memcpy(&sta_ht_cap, &sband->ht_cap, sizeof(sta_ht_cap));
+	ieee80211_apply_htcap_overrides(sdata, &sta_ht_cap);
+
 	if (!ht_oper || !sta_ht_cap.ht_supported) {
 		ret = IEEE80211_STA_DISABLE_HT |
 		      IEEE80211_STA_DISABLE_VHT |
@@ -914,7 +923,8 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
 			 !(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)))
 		ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
 
-	if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT))
+	if (sband->band != NL80211_BAND_6GHZ &&
+	    !(ifmgd->flags & IEEE80211_STA_DISABLE_HT))
 		ieee80211_add_ht_ie(sdata, skb, assoc_data->ap_ht_param,
 				    sband, chan, sdata->smps_mode);
 
@@ -968,7 +978,8 @@ static void ieee80211_send_assoc(struct ieee80211_sub_if_data *sdata)
 		offset = noffset;
 	}
 
-	if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT))
+	if (sband->band != NL80211_BAND_6GHZ &&
+	    !(ifmgd->flags & IEEE80211_STA_DISABLE_VHT))
 		ieee80211_add_vht_ie(sdata, skb, sband,
 				     &assoc_data->ap_vht_cap);
 
@@ -3248,6 +3259,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
 	struct ieee80211_bss_conf *bss_conf = &sdata->vif.bss_conf;
 	const struct cfg80211_bss_ies *bss_ies = NULL;
 	struct ieee80211_mgd_assoc_data *assoc_data = ifmgd->assoc_data;
+	bool is_6ghz = cbss->channel->band == NL80211_BAND_6GHZ;
 	u32 changed = 0;
 	int err;
 	bool ret;
@@ -3289,11 +3301,12 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
 	 * 2G/3G/4G wifi routers, reported models include the "Onda PN51T",
 	 * "Vodafone PocketWiFi 2", "ZTE MF60" and a similar T-Mobile device.
 	 */
-	if ((assoc_data->wmm && !elems->wmm_param) ||
-	    (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT) &&
-	     (!elems->ht_cap_elem || !elems->ht_operation)) ||
-	    (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT) &&
-	     (!elems->vht_cap_elem || !elems->vht_operation))) {
+	if (!is_6ghz &&
+	    ((assoc_data->wmm && !elems->wmm_param) ||
+	     (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT) &&
+	      (!elems->ht_cap_elem || !elems->ht_operation)) ||
+	     (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT) &&
+	      (!elems->vht_cap_elem || !elems->vht_operation)))) {
 		const struct cfg80211_bss_ies *ies;
 		struct ieee802_11_elems bss_elems;
 
@@ -3351,7 +3364,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
 	 * We previously checked these in the beacon/probe response, so
 	 * they should be present here. This is just a safety net.
 	 */
-	if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT) &&
+	if (!is_6ghz && !(ifmgd->flags & IEEE80211_STA_DISABLE_HT) &&
 	    (!elems->wmm_param || !elems->ht_cap_elem || !elems->ht_operation)) {
 		sdata_info(sdata,
 			   "HT AP is missing WMM params or HT capability/operation\n");
@@ -3359,7 +3372,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
 		goto out;
 	}
 
-	if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT) &&
+	if (!is_6ghz && !(ifmgd->flags & IEEE80211_STA_DISABLE_VHT) &&
 	    (!elems->vht_cap_elem || !elems->vht_operation)) {
 		sdata_info(sdata,
 			   "VHT AP is missing VHT capability/operation\n");
@@ -3367,6 +3380,14 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
 		goto out;
 	}
 
+	if (is_6ghz && !(ifmgd->flags & IEEE80211_STA_DISABLE_HE) &&
+	    !elems->he_6ghz_capa) {
+		sdata_info(sdata,
+			   "HE 6 GHz AP is missing HE 6 GHz band capability\n");
+		ret = false;
+		goto out;
+	}
+
 	mutex_lock(&sdata->local->sta_mtx);
 	/*
 	 * station info was already allocated and inserted before
@@ -4826,6 +4847,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
 	const struct ieee80211_he_operation *he_oper = NULL;
 	struct ieee80211_supported_band *sband;
 	struct cfg80211_chan_def chandef;
+	bool is_6ghz = cbss->channel->band == NL80211_BAND_6GHZ;
 	struct ieee80211_bss *bss = (void *)cbss->priv;
 	int ret;
 	u32 i;
@@ -4838,21 +4860,23 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
 			  IEEE80211_STA_DISABLE_160MHZ);
 
 	/* disable HT/VHT/HE if we don't support them */
-	if (!sband->ht_cap.ht_supported) {
+	if (!sband->ht_cap.ht_supported && !is_6ghz) {
 		ifmgd->flags |= IEEE80211_STA_DISABLE_HT;
 		ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
 		ifmgd->flags |= IEEE80211_STA_DISABLE_HE;
 	}
 
-	if (!sband->vht_cap.vht_supported)
+	if (!sband->vht_cap.vht_supported && !is_6ghz) {
 		ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
+		ifmgd->flags |= IEEE80211_STA_DISABLE_HE;
+	}
 
 	if (!ieee80211_get_he_sta_cap(sband))
 		ifmgd->flags |= IEEE80211_STA_DISABLE_HE;
 
 	rcu_read_lock();
 
-	if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT)) {
+	if (!(ifmgd->flags & IEEE80211_STA_DISABLE_HT) && !is_6ghz) {
 		const u8 *ht_oper_ie, *ht_cap_ie;
 
 		ht_oper_ie = ieee80211_bss_get_ie(cbss, WLAN_EID_HT_OPERATION);
@@ -4869,7 +4893,7 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
 		}
 	}
 
-	if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT)) {
+	if (!(ifmgd->flags & IEEE80211_STA_DISABLE_VHT) && !is_6ghz) {
 		const u8 *vht_oper_ie, *vht_cap;
 
 		vht_oper_ie = ieee80211_bss_get_ie(cbss,
@@ -4934,6 +4958,11 @@ static int ieee80211_prep_channel(struct ieee80211_sub_if_data *sdata,
 
 	rcu_read_unlock();
 
+	if (ifmgd->flags & IEEE80211_STA_DISABLE_HE && is_6ghz) {
+		sdata_info(sdata, "Rejecting non-HE 6/7 GHz connection");
+		return -EINVAL;
+	}
+
 	/* will change later if needed */
 	sdata->smps_mode = IEEE80211_SMPS_OFF;
 
@@ -5315,6 +5344,7 @@ int ieee80211_mgd_auth(struct ieee80211_sub_if_data *sdata,
 int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 			struct cfg80211_assoc_request *req)
 {
+	bool is_6ghz = req->bss->channel->band == NL80211_BAND_6GHZ;
 	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_if_managed *ifmgd = &sdata->u.mgd;
 	struct ieee80211_bss *bss = (void *)req->bss->priv;
@@ -5457,14 +5487,15 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 	if (ht_ie && ht_ie[1] >= sizeof(struct ieee80211_ht_operation))
 		assoc_data->ap_ht_param =
 			((struct ieee80211_ht_operation *)(ht_ie + 2))->ht_param;
-	else
+	else if (!is_6ghz)
 		ifmgd->flags |= IEEE80211_STA_DISABLE_HT;
 	vht_ie = ieee80211_bss_get_ie(req->bss, WLAN_EID_VHT_CAPABILITY);
 	if (vht_ie && vht_ie[1] >= sizeof(struct ieee80211_vht_cap))
 		memcpy(&assoc_data->ap_vht_cap, vht_ie + 2,
 		       sizeof(struct ieee80211_vht_cap));
-	else
-		ifmgd->flags |= IEEE80211_STA_DISABLE_VHT;
+	else if (!is_6ghz)
+		ifmgd->flags |= IEEE80211_STA_DISABLE_VHT |
+				IEEE80211_STA_DISABLE_HE;
 	rcu_read_unlock();
 
 	if (WARN((sdata->vif.driver_flags & IEEE80211_VIF_SUPPORTS_UAPSD) &&
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index e6104829fa1c..cbe24d303f0d 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -3244,6 +3244,112 @@ bool ieee80211_chandef_vht_oper(struct ieee80211_hw *hw, u32 vht_cap_info,
 	return true;
 }
 
+bool ieee80211_chandef_he_6ghz_oper(struct ieee80211_sub_if_data *sdata,
+				    const struct ieee80211_he_operation *he_oper,
+				    struct cfg80211_chan_def *chandef)
+{
+	struct ieee80211_local *local = sdata->local;
+	struct ieee80211_supported_band *sband;
+	enum nl80211_iftype iftype = ieee80211_vif_type_p2p(&sdata->vif);
+	const struct ieee80211_sta_he_cap *he_cap;
+	struct cfg80211_chan_def he_chandef = *chandef;
+	const struct ieee80211_he_6ghz_oper *he_6ghz_oper;
+	bool support_80_80, support_160;
+	u8 he_phy_cap;
+	u32 freq;
+
+	if (chandef->chan->band != NL80211_BAND_6GHZ)
+		return true;
+
+	sband = local->hw.wiphy->bands[NL80211_BAND_6GHZ];
+
+	he_cap = ieee80211_get_he_iftype_cap(sband, iftype);
+	if (!he_cap) {
+		sdata_info(sdata, "Missing iftype sband data/HE cap");
+		return false;
+	}
+
+	he_phy_cap = he_cap->he_cap_elem.phy_cap_info[0];
+	support_160 =
+		he_phy_cap &
+		IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_160MHZ_IN_5G;
+	support_80_80 =
+		he_phy_cap &
+		IEEE80211_HE_PHY_CAP0_CHANNEL_WIDTH_SET_80PLUS80_MHZ_IN_5G;
+
+	if (!he_oper) {
+		sdata_info(sdata,
+			   "HE is not advertised on (on %d MHz), expect issues\n",
+			   chandef->chan->center_freq);
+		return false;
+	}
+
+	he_6ghz_oper = ieee80211_he_6ghz_oper(he_oper);
+
+	if (!he_6ghz_oper) {
+		sdata_info(sdata,
+			   "HE 6GHz operation missing (on %d MHz), expect issues\n",
+			   chandef->chan->center_freq);
+		return false;
+	}
+
+	freq = ieee80211_channel_to_frequency(he_6ghz_oper->primary,
+					      NL80211_BAND_6GHZ);
+	he_chandef.chan = ieee80211_get_channel(sdata->local->hw.wiphy, freq);
+
+	switch (u8_get_bits(he_6ghz_oper->control,
+			    IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH)) {
+	case IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_20MHZ:
+		he_chandef.width = NL80211_CHAN_WIDTH_20;
+		break;
+	case IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_40MHZ:
+		he_chandef.width = NL80211_CHAN_WIDTH_40;
+		break;
+	case IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_80MHZ:
+		he_chandef.width = NL80211_CHAN_WIDTH_80;
+		break;
+	case IEEE80211_HE_6GHZ_OPER_CTRL_CHANWIDTH_160MHZ:
+		he_chandef.width = NL80211_CHAN_WIDTH_80;
+		if (!he_6ghz_oper->ccfs1)
+			break;
+		if (abs(he_6ghz_oper->ccfs1 - he_6ghz_oper->ccfs0) == 8) {
+			if (support_160)
+				he_chandef.width = NL80211_CHAN_WIDTH_160;
+		} else {
+			if (support_80_80)
+				he_chandef.width = NL80211_CHAN_WIDTH_80P80;
+		}
+		break;
+	}
+
+	if (he_chandef.width == NL80211_CHAN_WIDTH_160) {
+		he_chandef.center_freq1 =
+			ieee80211_channel_to_frequency(he_6ghz_oper->ccfs1,
+						       NL80211_BAND_6GHZ);
+	} else {
+		he_chandef.center_freq1 =
+			ieee80211_channel_to_frequency(he_6ghz_oper->ccfs0,
+						       NL80211_BAND_6GHZ);
+		he_chandef.center_freq2 =
+			ieee80211_channel_to_frequency(he_6ghz_oper->ccfs1,
+						       NL80211_BAND_6GHZ);
+	}
+
+	if (!cfg80211_chandef_valid(&he_chandef)) {
+		sdata_info(sdata,
+			   "HE 6GHz operation resulted in invalid chandef: %d MHz/%d/%d MHz/%d MHz\n",
+			   he_chandef.chan ? he_chandef.chan->center_freq : 0,
+			   he_chandef.width,
+			   he_chandef.center_freq1,
+			   he_chandef.center_freq2);
+		return false;
+	}
+
+	*chandef = he_chandef;
+
+	return true;
+}
+
 int ieee80211_parse_bitrates(struct cfg80211_chan_def *chandef,
 			     const struct ieee80211_supported_band *sband,
 			     const u8 *srates, int srates_len, u32 *rates)
-- 
cgit v1.2.3-59-g8ed1b


From 3b3ec3d52e8f72ec8c40477b96f23440a89000be Mon Sep 17 00:00:00 2001
From: Shaul Triebitz <shaul.triebitz@intel.com>
Date: Thu, 28 May 2020 21:34:37 +0200
Subject: mac80211: check the correct bit for EMA AP

An AP supporting EMA (Enhanced Multi-BSSID advertisement) should set
bit 83 in the extended capabilities IE (9.4.2.26 in the 802.11ax D5 spec).
So the *3rd* bit of the 10th byte should be checked.
Also, in one place, the wrong byte was checked.
(cfg80211_find_ie returns a pointer to the beginning of the IE,
 so the data really starts at ie[2], so the 10th byte
 should be ie[12]. To avoid this confusion, use cfg80211_find_elem
 instead).

Signed-off-by: Shaul Triebitz <shaul.triebitz@intel.com>
Link: https://lore.kernel.org/r/20200528213443.4316121fa2a3.I9745582f8d41ad8e689dac0fefcd70b276d7c1ea@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/linux/ieee80211.h |  2 +-
 net/mac80211/mlme.c       | 18 +++++++++---------
 2 files changed, 10 insertions(+), 10 deletions(-)

diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h
index 9580dfd9e2d1..1ecfd19f836d 100644
--- a/include/linux/ieee80211.h
+++ b/include/linux/ieee80211.h
@@ -3082,7 +3082,7 @@ enum ieee80211_tdls_actioncode {
 #define WLAN_EXT_CAPA10_OBSS_NARROW_BW_RU_TOLERANCE_SUPPORT BIT(7)
 
 /* Defines support for enhanced multi-bssid advertisement*/
-#define WLAN_EXT_CAPA11_EMA_SUPPORT	BIT(1)
+#define WLAN_EXT_CAPA11_EMA_SUPPORT	BIT(3)
 
 /* TDLS specific payload type in the LLC/SNAP header */
 #define WLAN_TDLS_SNAP_RFTYPE	0x2
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index bc558d1d20fc..c534cd1bb9cd 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -5596,7 +5596,7 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 		assoc_data->timeout_started = true;
 		assoc_data->need_beacon = true;
 	} else if (beacon_ies) {
-		const u8 *ie;
+		const struct element *elem;
 		u8 dtim_count = 0;
 
 		ieee80211_get_dtim(beacon_ies, &dtim_count,
@@ -5613,15 +5613,15 @@ int ieee80211_mgd_assoc(struct ieee80211_sub_if_data *sdata,
 			sdata->vif.bss_conf.sync_dtim_count = dtim_count;
 		}
 
-		ie = cfg80211_find_ext_ie(WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION,
-					  beacon_ies->data, beacon_ies->len);
-		if (ie && ie[1] >= 3)
-			sdata->vif.bss_conf.profile_periodicity = ie[4];
+		elem = cfg80211_find_ext_elem(WLAN_EID_EXT_MULTIPLE_BSSID_CONFIGURATION,
+					      beacon_ies->data, beacon_ies->len);
+		if (elem && elem->datalen >= 3)
+			sdata->vif.bss_conf.profile_periodicity = elem->data[2];
 
-		ie = cfg80211_find_ie(WLAN_EID_EXT_CAPABILITY,
-				      beacon_ies->data, beacon_ies->len);
-		if (ie && ie[1] >= 11 &&
-		    (ie[10] & WLAN_EXT_CAPA11_EMA_SUPPORT))
+		elem = cfg80211_find_elem(WLAN_EID_EXT_CAPABILITY,
+					  beacon_ies->data, beacon_ies->len);
+		if (elem && elem->datalen >= 11 &&
+		    (elem->data[10] & WLAN_EXT_CAPA11_EMA_SUPPORT))
 			sdata->vif.bss_conf.ema_ap = true;
 	} else {
 		assoc_data->timeout = jiffies;
-- 
cgit v1.2.3-59-g8ed1b


From 1bb9a8a4c81d0305c511a0919cd30ebfa91915ae Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 28 May 2020 21:34:38 +0200
Subject: mac80211: use HE 6 GHz band capability and pass it to the driver

In order to handle 6 GHz AP side, take the HE 6 GHz band capability
data and pass it to the driver (which needs it for A-MPDU spacing
and A-MPDU length).

Link: https://lore.kernel.org/r/1589399105-25472-6-git-send-email-rmanohar@codeaurora.org
Co-developed-by: Rajkumar Manoharan <rmanohar@codeaurora.org>
Signed-off-by: Rajkumar Manoharan <rmanohar@codeaurora.org>
Link: https://lore.kernel.org/r/20200528213443.784e4890d82f.I5f1230d5ab27e84e7bbe88e3645b24ea15a0c146@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/mac80211.h     |  4 +++-
 net/mac80211/cfg.c         |  4 +++-
 net/mac80211/he.c          | 48 ++++++++++++++++++++++++++++++++++++++++++++++
 net/mac80211/ieee80211_i.h |  1 +
 net/mac80211/mesh_plink.c  |  4 +++-
 net/mac80211/mlme.c        |  1 +
 6 files changed, 59 insertions(+), 3 deletions(-)

diff --git a/include/net/mac80211.h b/include/net/mac80211.h
index 7cb712427df1..11d5610d2ad5 100644
--- a/include/net/mac80211.h
+++ b/include/net/mac80211.h
@@ -7,7 +7,7 @@
  * Copyright 2007-2010	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright (C) 2015 - 2017 Intel Deutschland GmbH
- * Copyright (C) 2018 - 2019 Intel Corporation
+ * Copyright (C) 2018 - 2020 Intel Corporation
  */
 
 #ifndef MAC80211_H
@@ -1977,6 +1977,7 @@ struct ieee80211_sta_txpwr {
  * @ht_cap: HT capabilities of this STA; restricted to our own capabilities
  * @vht_cap: VHT capabilities of this STA; restricted to our own capabilities
  * @he_cap: HE capabilities of this STA
+ * @he_6ghz_capa: on 6 GHz, holds the HE 6 GHz band capabilities
  * @max_rx_aggregation_subframes: maximal amount of frames in a single AMPDU
  *	that this station is allowed to transmit to us.
  *	Can be modified by driver.
@@ -2016,6 +2017,7 @@ struct ieee80211_sta {
 	struct ieee80211_sta_ht_cap ht_cap;
 	struct ieee80211_sta_vht_cap vht_cap;
 	struct ieee80211_sta_he_cap he_cap;
+	struct ieee80211_he_6ghz_capa he_6ghz_capa;
 	u16 max_rx_aggregation_subframes;
 	bool wme;
 	u8 uapsd_queues;
diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 06a2b7640a9d..90a07d075fdb 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -1520,7 +1520,9 @@ static int sta_apply_parameters(struct ieee80211_local *local,
 	if (params->he_capa)
 		ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband,
 						  (void *)params->he_capa,
-						  params->he_capa_len, sta);
+						  params->he_capa_len,
+						  (void *)params->he_6ghz_capa,
+						  sta);
 
 	if (params->opmode_notif_used) {
 		/* returned value is only needed for rc update, but the
diff --git a/net/mac80211/he.c b/net/mac80211/he.c
index f520552b22be..cc26f239838b 100644
--- a/net/mac80211/he.c
+++ b/net/mac80211/he.c
@@ -8,10 +8,55 @@
 
 #include "ieee80211_i.h"
 
+static void
+ieee80211_update_from_he_6ghz_capa(const struct ieee80211_he_6ghz_capa *he_6ghz_capa,
+				   struct sta_info *sta)
+{
+	enum ieee80211_smps_mode smps_mode;
+
+	if (sta->sdata->vif.type == NL80211_IFTYPE_AP ||
+	    sta->sdata->vif.type == NL80211_IFTYPE_AP_VLAN) {
+		switch (le16_get_bits(he_6ghz_capa->capa,
+				      IEEE80211_HE_6GHZ_CAP_SM_PS)) {
+		case WLAN_HT_CAP_SM_PS_INVALID:
+		case WLAN_HT_CAP_SM_PS_STATIC:
+			smps_mode = IEEE80211_SMPS_STATIC;
+			break;
+		case WLAN_HT_CAP_SM_PS_DYNAMIC:
+			smps_mode = IEEE80211_SMPS_DYNAMIC;
+			break;
+		case WLAN_HT_CAP_SM_PS_DISABLED:
+			smps_mode = IEEE80211_SMPS_OFF;
+			break;
+		}
+
+		sta->sta.smps_mode = smps_mode;
+	} else {
+		sta->sta.smps_mode = IEEE80211_SMPS_OFF;
+	}
+
+	switch (le16_get_bits(he_6ghz_capa->capa,
+			      IEEE80211_HE_6GHZ_CAP_MAX_MPDU_LEN)) {
+	case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_11454:
+		sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_11454;
+		break;
+	case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_7991:
+		sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_7991;
+		break;
+	case IEEE80211_VHT_CAP_MAX_MPDU_LENGTH_3895:
+	default:
+		sta->sta.max_amsdu_len = IEEE80211_MAX_MPDU_LEN_VHT_3895;
+		break;
+	}
+
+	sta->sta.he_6ghz_capa = *he_6ghz_capa;
+}
+
 void
 ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata,
 				  struct ieee80211_supported_band *sband,
 				  const u8 *he_cap_ie, u8 he_cap_len,
+				  const struct ieee80211_he_6ghz_capa *he_6ghz_capa,
 				  struct sta_info *sta)
 {
 	struct ieee80211_sta_he_cap *he_cap = &sta->sta.he_cap;
@@ -53,6 +98,9 @@ ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata,
 
 	sta->cur_max_bandwidth = ieee80211_sta_cap_rx_bw(sta);
 	sta->sta.bandwidth = ieee80211_sta_cur_vht_bw(sta);
+
+	if (sband->band == NL80211_BAND_6GHZ && he_6ghz_capa)
+		ieee80211_update_from_he_6ghz_capa(he_6ghz_capa, sta);
 }
 
 void
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 6cac5bf7cba3..24dc1fd57000 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -1899,6 +1899,7 @@ void
 ieee80211_he_cap_ie_to_sta_he_cap(struct ieee80211_sub_if_data *sdata,
 				  struct ieee80211_supported_band *sband,
 				  const u8 *he_cap_ie, u8 he_cap_len,
+				  const struct ieee80211_he_6ghz_capa *he_6ghz_capa,
 				  struct sta_info *sta);
 void
 ieee80211_he_spr_ie_to_bss_conf(struct ieee80211_vif *vif,
diff --git a/net/mac80211/mesh_plink.c b/net/mac80211/mesh_plink.c
index fbbfc5d4a51c..798e4b6b383f 100644
--- a/net/mac80211/mesh_plink.c
+++ b/net/mac80211/mesh_plink.c
@@ -444,7 +444,9 @@ static void mesh_sta_info_init(struct ieee80211_sub_if_data *sdata,
 					    elems->vht_cap_elem, sta);
 
 	ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband, elems->he_cap,
-					  elems->he_cap_len, sta);
+					  elems->he_cap_len,
+					  elems->he_6ghz_capa,
+					  sta);
 
 	if (bw != sta->sta.bandwidth)
 		changed |= IEEE80211_RC_BW_CHANGED;
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index c534cd1bb9cd..8a37089e86bb 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -3430,6 +3430,7 @@ static bool ieee80211_assoc_success(struct ieee80211_sub_if_data *sdata,
 		ieee80211_he_cap_ie_to_sta_he_cap(sdata, sband,
 						  elems->he_cap,
 						  elems->he_cap_len,
+						  elems->he_6ghz_capa,
 						  sta);
 
 		bss_conf->he_support = sta->sta.he_cap.has_he;
-- 
cgit v1.2.3-59-g8ed1b


From 2ad2274c58ee2dcaf9ccde5c63ff30f59b138f77 Mon Sep 17 00:00:00 2001
From: Ilan Peer <ilan.peer@intel.com>
Date: Thu, 28 May 2020 21:34:39 +0200
Subject: mac80211: Add HE 6GHz capabilities element to probe request

On 6 GHz, the 6 GHz capabilities element should be added, do that.

Signed-off-by: Ilan Peer <ilan.peer@intel.com>
[add commit message]
Link: https://lore.kernel.org/r/20200528213443.8ee764f0cde0.I2b0c66b60e11818c97c9803e04a6a197c6376243@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h     | 20 ++++++++++++++++++++
 net/mac80211/ieee80211_i.h |  2 +-
 net/mac80211/scan.c        | 17 +++++++++--------
 net/mac80211/util.c        | 36 ++++++++++++++++++++++++++++--------
 4 files changed, 58 insertions(+), 17 deletions(-)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 9b76be3d561a..95b55eea2afb 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -512,6 +512,26 @@ ieee80211_get_he_sta_cap(const struct ieee80211_supported_band *sband)
 	return ieee80211_get_he_iftype_cap(sband, NL80211_IFTYPE_STATION);
 }
 
+/**
+ * ieee80211_get_he_6ghz_capa - return HE 6 GHz capabilities
+ * @sband: the sband to search for the STA on
+ * @iftype: the iftype to search for
+ *
+ * Return: the 6GHz capabilities
+ */
+static inline __le16
+ieee80211_get_he_6ghz_capa(const struct ieee80211_supported_band *sband,
+			   enum nl80211_iftype iftype)
+{
+	const struct ieee80211_sband_iftype_data *data =
+		ieee80211_get_sband_iftype_data(sband, iftype);
+
+	if (WARN_ON(!data || !data->he_cap.has_he))
+		return 0;
+
+	return data->he_6ghz_capa.capa;
+}
+
 /**
  * wiphy_read_of_freq_limits - read frequency limits from device tree
  *
diff --git a/net/mac80211/ieee80211_i.h b/net/mac80211/ieee80211_i.h
index 24dc1fd57000..ec1a71ac65f2 100644
--- a/net/mac80211/ieee80211_i.h
+++ b/net/mac80211/ieee80211_i.h
@@ -2144,7 +2144,7 @@ enum {
 	IEEE80211_PROBE_FLAG_RANDOM_SN		= BIT(2),
 };
 
-int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
+int ieee80211_build_preq_ies(struct ieee80211_sub_if_data *sdata, u8 *buffer,
 			     size_t buffer_len,
 			     struct ieee80211_scan_ies *ie_desc,
 			     const u8 *ie, size_t ie_len,
diff --git a/net/mac80211/scan.c b/net/mac80211/scan.c
index d0c2e8012118..ad90bbe57457 100644
--- a/net/mac80211/scan.c
+++ b/net/mac80211/scan.c
@@ -313,8 +313,9 @@ ieee80211_prepare_scan_chandef(struct cfg80211_chan_def *chandef,
 }
 
 /* return false if no more work */
-static bool ieee80211_prep_hw_scan(struct ieee80211_local *local)
+static bool ieee80211_prep_hw_scan(struct ieee80211_sub_if_data *sdata)
 {
+	struct ieee80211_local *local = sdata->local;
 	struct cfg80211_scan_request *req;
 	struct cfg80211_chan_def chandef;
 	u8 bands_used = 0;
@@ -361,7 +362,7 @@ static bool ieee80211_prep_hw_scan(struct ieee80211_local *local)
 	if (req->flags & NL80211_SCAN_FLAG_MIN_PREQ_CONTENT)
 		flags |= IEEE80211_PROBE_FLAG_MIN_CONTENT;
 
-	ielen = ieee80211_build_preq_ies(local,
+	ielen = ieee80211_build_preq_ies(sdata,
 					 (u8 *)local->hw_scan_req->req.ie,
 					 local->hw_scan_ies_bufsize,
 					 &local->hw_scan_req->ies,
@@ -401,9 +402,12 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
 	if (WARN_ON(!local->scan_req))
 		return;
 
+	scan_sdata = rcu_dereference_protected(local->scan_sdata,
+					       lockdep_is_held(&local->mtx));
+
 	if (hw_scan && !aborted &&
 	    !ieee80211_hw_check(&local->hw, SINGLE_SCAN_ON_ALL_BANDS) &&
-	    ieee80211_prep_hw_scan(local)) {
+	    ieee80211_prep_hw_scan(scan_sdata)) {
 		int rc;
 
 		rc = drv_hw_scan(local,
@@ -432,9 +436,6 @@ static void __ieee80211_scan_completed(struct ieee80211_hw *hw, bool aborted)
 		cfg80211_scan_done(scan_req, &local->scan_info);
 	}
 	RCU_INIT_POINTER(local->scan_req, NULL);
-
-	scan_sdata = rcu_dereference_protected(local->scan_sdata,
-					       lockdep_is_held(&local->mtx));
 	RCU_INIT_POINTER(local->scan_sdata, NULL);
 
 	local->scanning = 0;
@@ -776,7 +777,7 @@ static int __ieee80211_start_scan(struct ieee80211_sub_if_data *sdata,
 	ieee80211_recalc_idle(local);
 
 	if (hw_scan) {
-		WARN_ON(!ieee80211_prep_hw_scan(local));
+		WARN_ON(!ieee80211_prep_hw_scan(sdata));
 		rc = drv_hw_scan(local, sdata, local->hw_scan_req);
 	} else {
 		rc = ieee80211_start_sw_scan(local, sdata);
@@ -1274,7 +1275,7 @@ int __ieee80211_request_sched_scan_start(struct ieee80211_sub_if_data *sdata,
 
 	ieee80211_prepare_scan_chandef(&chandef, req->scan_width);
 
-	ieee80211_build_preq_ies(local, ie, num_bands * iebufsz,
+	ieee80211_build_preq_ies(sdata, ie, num_bands * iebufsz,
 				 &sched_scan_ies, req->ie,
 				 req->ie_len, bands_used, rate_masks, &chandef,
 				 flags);
diff --git a/net/mac80211/util.c b/net/mac80211/util.c
index cbe24d303f0d..21c94094a699 100644
--- a/net/mac80211/util.c
+++ b/net/mac80211/util.c
@@ -1663,7 +1663,20 @@ void ieee80211_send_deauth_disassoc(struct ieee80211_sub_if_data *sdata,
 	}
 }
 
-static int ieee80211_build_preq_ies_band(struct ieee80211_local *local,
+static u8 *ieee80211_write_he_6ghz_cap(u8 *pos, __le16 cap, u8 *end)
+{
+	if ((end - pos) < 5)
+		return pos;
+
+	*pos++ = WLAN_EID_EXTENSION;
+	*pos++ = 1 + sizeof(cap);
+	*pos++ = WLAN_EID_EXT_HE_6GHZ_CAPA;
+	memcpy(pos, &cap, sizeof(cap));
+
+	return pos + 2;
+}
+
+static int ieee80211_build_preq_ies_band(struct ieee80211_sub_if_data *sdata,
 					 u8 *buffer, size_t buffer_len,
 					 const u8 *ie, size_t ie_len,
 					 enum nl80211_band band,
@@ -1671,6 +1684,7 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local,
 					 struct cfg80211_chan_def *chandef,
 					 size_t *offset, u32 flags)
 {
+	struct ieee80211_local *local = sdata->local;
 	struct ieee80211_supported_band *sband;
 	const struct ieee80211_sta_he_cap *he_cap;
 	u8 *pos = buffer, *end = buffer + buffer_len;
@@ -1848,6 +1862,14 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local,
 		pos = ieee80211_ie_build_he_cap(pos, he_cap, end);
 		if (!pos)
 			goto out_err;
+
+		if (sband->band == NL80211_BAND_6GHZ) {
+			enum nl80211_iftype iftype =
+				ieee80211_vif_type_p2p(&sdata->vif);
+			__le16 cap = ieee80211_get_he_6ghz_capa(sband, iftype);
+
+			pos = ieee80211_write_he_6ghz_cap(pos, cap, end);
+		}
 	}
 
 	/*
@@ -1862,7 +1884,7 @@ static int ieee80211_build_preq_ies_band(struct ieee80211_local *local,
 	return pos - buffer;
 }
 
-int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
+int ieee80211_build_preq_ies(struct ieee80211_sub_if_data *sdata, u8 *buffer,
 			     size_t buffer_len,
 			     struct ieee80211_scan_ies *ie_desc,
 			     const u8 *ie, size_t ie_len,
@@ -1877,7 +1899,7 @@ int ieee80211_build_preq_ies(struct ieee80211_local *local, u8 *buffer,
 
 	for (i = 0; i < NUM_NL80211_BANDS; i++) {
 		if (bands_used & BIT(i)) {
-			pos += ieee80211_build_preq_ies_band(local,
+			pos += ieee80211_build_preq_ies_band(sdata,
 							     buffer + pos,
 							     buffer_len - pos,
 							     ie, ie_len, i,
@@ -1939,7 +1961,7 @@ struct sk_buff *ieee80211_build_probe_req(struct ieee80211_sub_if_data *sdata,
 		return NULL;
 
 	rate_masks[chan->band] = ratemask;
-	ies_len = ieee80211_build_preq_ies(local, skb_tail_pointer(skb),
+	ies_len = ieee80211_build_preq_ies(sdata, skb_tail_pointer(skb),
 					   skb_tailroom(skb), &dummy_ie_desc,
 					   ie, ie_len, BIT(chan->band),
 					   rate_masks, &chandef, flags);
@@ -2879,10 +2901,8 @@ void ieee80211_ie_build_he_6ghz_cap(struct ieee80211_sub_if_data *sdata,
 	}
 
 	pos = skb_put(skb, 2 + 1 + sizeof(cap));
-	*pos++ = WLAN_EID_EXTENSION;
-	*pos++ = 1 + sizeof(cap);
-	*pos++ = WLAN_EID_EXT_HE_6GHZ_CAPA;
-	put_unaligned_le16(cap, pos);
+	ieee80211_write_he_6ghz_cap(pos, cpu_to_le16(cap),
+				    pos + 2 + 1 + sizeof(cap));
 }
 
 u8 *ieee80211_ie_build_ht_oper(u8 *pos, struct ieee80211_sta_ht_cap *ht_cap,
-- 
cgit v1.2.3-59-g8ed1b


From ba8f6a037f790147438173029799f54c9d3065f2 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 28 May 2020 21:34:40 +0200
Subject: cfg80211: treat 6 GHz channels as valid regardless of capability

If a 6 GHz channel exists, then we can probably safely assume that
the device actually supports it, and then it should support most
bandwidths.

This will probably need to be extended to check the interface type
and then dig into the HE capabilities for that though, to have the
correct bandwidth check.

Link: https://lore.kernel.org/r/20200528213443.d4864ef52e92.I82f09b2b14a56413ce20376d09967fe954a033eb@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/chan.c | 22 +++++++++++++++-------
 1 file changed, 15 insertions(+), 7 deletions(-)

diff --git a/net/wireless/chan.c b/net/wireless/chan.c
index e111c08daa0e..cddf92c5d09e 100644
--- a/net/wireless/chan.c
+++ b/net/wireless/chan.c
@@ -6,7 +6,7 @@
  *
  * Copyright 2009	Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
- * Copyright 2018       Intel Corporation
+ * Copyright 2018-2020	Intel Corporation
  */
 
 #include <linux/export.h>
@@ -919,7 +919,8 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
 		width = 10;
 		break;
 	case NL80211_CHAN_WIDTH_20:
-		if (!ht_cap->ht_supported)
+		if (!ht_cap->ht_supported &&
+		    chandef->chan->band != NL80211_BAND_6GHZ)
 			return false;
 		/* fall through */
 	case NL80211_CHAN_WIDTH_20_NOHT:
@@ -928,6 +929,8 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
 		break;
 	case NL80211_CHAN_WIDTH_40:
 		width = 40;
+		if (chandef->chan->band == NL80211_BAND_6GHZ)
+			break;
 		if (!ht_cap->ht_supported)
 			return false;
 		if (!(ht_cap->cap & IEEE80211_HT_CAP_SUP_WIDTH_20_40) ||
@@ -942,24 +945,29 @@ bool cfg80211_chandef_usable(struct wiphy *wiphy,
 		break;
 	case NL80211_CHAN_WIDTH_80P80:
 		cap = vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK;
-		if (cap != IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ)
+		if (chandef->chan->band != NL80211_BAND_6GHZ &&
+		    cap != IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ)
 			return false;
 		/* fall through */
 	case NL80211_CHAN_WIDTH_80:
-		if (!vht_cap->vht_supported)
-			return false;
 		prohibited_flags |= IEEE80211_CHAN_NO_80MHZ;
 		width = 80;
+		if (chandef->chan->band == NL80211_BAND_6GHZ)
+			break;
+		if (!vht_cap->vht_supported)
+			return false;
 		break;
 	case NL80211_CHAN_WIDTH_160:
+		prohibited_flags |= IEEE80211_CHAN_NO_160MHZ;
+		width = 160;
+		if (chandef->chan->band == NL80211_BAND_6GHZ)
+			break;
 		if (!vht_cap->vht_supported)
 			return false;
 		cap = vht_cap->cap & IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_MASK;
 		if (cap != IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160MHZ &&
 		    cap != IEEE80211_VHT_CAP_SUPP_CHAN_WIDTH_160_80PLUS80MHZ)
 			return false;
-		prohibited_flags |= IEEE80211_CHAN_NO_160MHZ;
-		width = 160;
 		break;
 	default:
 		WARN_ON_ONCE(1);
-- 
cgit v1.2.3-59-g8ed1b


From 461ce35d5535c1479384f67fcf4bfc3f3610edca Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 28 May 2020 21:34:41 +0200
Subject: cfg80211: reject HT/VHT capabilities on 6 GHz band

On the 6 GHz band, HE should be used, but without any direct HT/VHT
capabilities, instead the HE 6 GHz band capabilities will capture
the relevant information. Reject HT/VHT capabilities here.

Link: https://lore.kernel.org/r/20200528213443.bfe89c35459a.Ibba5e066fa0087fd49d13cfee89d196ea0c68ae2@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/core.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/net/wireless/core.c b/net/wireless/core.c
index b795f363d004..1651f86db6ca 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -5,7 +5,7 @@
  * Copyright 2006-2010		Johannes Berg <johannes@sipsolutions.net>
  * Copyright 2013-2014  Intel Mobile Communications GmbH
  * Copyright 2015-2017	Intel Deutschland GmbH
- * Copyright (C) 2018-2019 Intel Corporation
+ * Copyright (C) 2018-2020 Intel Corporation
  */
 
 #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt
@@ -807,6 +807,11 @@ int wiphy_register(struct wiphy *wiphy)
 			    !sband->n_bitrates))
 			return -EINVAL;
 
+		if (WARN_ON(band == NL80211_BAND_6GHZ &&
+			    (sband->ht_cap.ht_supported ||
+			     sband->vht_cap.vht_supported)))
+			return -EINVAL;
+
 		/*
 		 * Since cfg80211_disable_40mhz_24ghz is global, we can
 		 * modify the sband's ht data even if the driver uses a
-- 
cgit v1.2.3-59-g8ed1b


From f438136528482f98535889c9a6f99bbacdd92870 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 28 May 2020 21:34:42 +0200
Subject: cfg80211: require HE capabilities for 6 GHz band

On 6 GHz band, HE capabilities must be available for all of
the interface types, otherwise we shouldn't use 6 GHz. Check
this.

Link: https://lore.kernel.org/r/20200528213443.5881cb3c8c4a.I583b54172f91f98d44af64a16c5826fe458cbb27@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/wireless/core.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/net/wireless/core.c b/net/wireless/core.c
index 1651f86db6ca..5b6714460490 100644
--- a/net/wireless/core.c
+++ b/net/wireless/core.c
@@ -791,6 +791,7 @@ int wiphy_register(struct wiphy *wiphy)
 	/* sanity check supported bands/channels */
 	for (band = 0; band < NUM_NL80211_BANDS; band++) {
 		u16 types = 0;
+		bool have_he = false;
 
 		sband = wiphy->bands[band];
 		if (!sband)
@@ -859,8 +860,17 @@ int wiphy_register(struct wiphy *wiphy)
 				return -EINVAL;
 
 			types |= iftd->types_mask;
+
+			if (i == 0)
+				have_he = iftd->he_cap.has_he;
+			else
+				have_he = have_he &&
+					  iftd->he_cap.has_he;
 		}
 
+		if (WARN_ON(!have_he && band == NL80211_BAND_6GHZ))
+			return -EINVAL;
+
 		have_band = true;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 93382a0d119b3ab95e3ebca51ea15aa87187b493 Mon Sep 17 00:00:00 2001
From: Johannes Berg <johannes.berg@intel.com>
Date: Thu, 28 May 2020 21:34:44 +0200
Subject: mac80211: accept aggregation sessions on 6 GHz

On 6 GHz, stations don't have ht_supported set, but they can
still do aggregation since they must have HE, allow that.

Link: https://lore.kernel.org/r/20200528213443.776d3c891b64.Ifa099d450617b50c691832b3c4aa08959fab520a@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/agg-rx.c | 5 +++--
 net/mac80211/agg-tx.c | 3 ++-
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/net/mac80211/agg-rx.c b/net/mac80211/agg-rx.c
index 4d1c335e06e5..7f245e9f114c 100644
--- a/net/mac80211/agg-rx.c
+++ b/net/mac80211/agg-rx.c
@@ -9,7 +9,7 @@
  * Copyright 2007, Michael Wu <flamingice@sourmilk.net>
  * Copyright 2007-2010, Intel Corporation
  * Copyright(c) 2015-2017 Intel Deutschland GmbH
- * Copyright (C) 2018        Intel Corporation
+ * Copyright (C) 2018-2020 Intel Corporation
  */
 
 /**
@@ -292,7 +292,8 @@ void ___ieee80211_start_rx_ba_session(struct sta_info *sta,
 		goto end;
 	}
 
-	if (!sta->sta.ht_cap.ht_supported) {
+	if (!sta->sta.ht_cap.ht_supported &&
+	    sta->sdata->vif.bss_conf.chandef.chan->band != NL80211_BAND_6GHZ) {
 		ht_dbg(sta->sdata,
 		       "STA %pM erroneously requests BA session on tid %d w/o QoS\n",
 		       sta->sta.addr, tid);
diff --git a/net/mac80211/agg-tx.c b/net/mac80211/agg-tx.c
index c2d5f512526d..b37c8a983d88 100644
--- a/net/mac80211/agg-tx.c
+++ b/net/mac80211/agg-tx.c
@@ -593,7 +593,8 @@ int ieee80211_start_tx_ba_session(struct ieee80211_sta *pubsta, u16 tid,
 		 "Requested to start BA session on reserved tid=%d", tid))
 		return -EINVAL;
 
-	if (!pubsta->ht_cap.ht_supported)
+	if (!pubsta->ht_cap.ht_supported &&
+	    sta->sdata->vif.bss_conf.chandef.chan->band != NL80211_BAND_6GHZ)
 		return -EINVAL;
 
 	if (WARN_ON_ONCE(!local->ops->ampdu_action))
-- 
cgit v1.2.3-59-g8ed1b


From 6fcb56ce0f9036b08d1c3e35ff93b100d499771b Mon Sep 17 00:00:00 2001
From: Ilan Peer <ilan.peer@intel.com>
Date: Thu, 28 May 2020 21:34:45 +0200
Subject: mac80211: Consider 6 GHz band when handling power constraint

Treat it like the 5 GHz band.

Signed-off-by: Ilan Peer <ilan.peer@intel.com>
Link: https://lore.kernel.org/r/20200528213443.889e5c9dd006.Id8ed3bb8000ba8738be5df05639415eb2e23c61a@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/mlme.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index 8a37089e86bb..d7bffd640ed3 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -1533,6 +1533,7 @@ ieee80211_find_80211h_pwr_constr(struct ieee80211_sub_if_data *sdata,
 		chan_increment = 1;
 		break;
 	case NL80211_BAND_5GHZ:
+	case NL80211_BAND_6GHZ:
 		chan_increment = 4;
 		break;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 07c12d618f06c8876fe12a53217d92b32d7baf07 Mon Sep 17 00:00:00 2001
From: Tova Mussai <tova.mussai@intel.com>
Date: Thu, 28 May 2020 21:34:46 +0200
Subject: mac80211: set short_slot for 6 GHz band

Set short slot also for 6 GHz band, just like 5 GHz.

Signed-off-by: Tova Mussai <tova.mussai@intel.com>
Link: https://lore.kernel.org/r/20200528213443.75f38e6f5efd.I272fbae402b03123f04e9ae69204eeab960c70cd@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 net/mac80211/cfg.c  | 3 ++-
 net/mac80211/mlme.c | 3 ++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/net/mac80211/cfg.c b/net/mac80211/cfg.c
index 90a07d075fdb..9b360544ad6f 100644
--- a/net/mac80211/cfg.c
+++ b/net/mac80211/cfg.c
@@ -2198,7 +2198,8 @@ static int ieee80211_change_bss(struct wiphy *wiphy,
 	}
 
 	if (!sdata->vif.bss_conf.use_short_slot &&
-	    sband->band == NL80211_BAND_5GHZ) {
+	    (sband->band == NL80211_BAND_5GHZ ||
+	     sband->band == NL80211_BAND_6GHZ)) {
 		sdata->vif.bss_conf.use_short_slot = true;
 		changed |= BSS_CHANGED_ERP_SLOT;
 	}
diff --git a/net/mac80211/mlme.c b/net/mac80211/mlme.c
index d7bffd640ed3..5820ef02a587 100644
--- a/net/mac80211/mlme.c
+++ b/net/mac80211/mlme.c
@@ -2171,7 +2171,8 @@ static u32 ieee80211_handle_bss_capability(struct ieee80211_sub_if_data *sdata,
 	}
 
 	use_short_slot = !!(capab & WLAN_CAPABILITY_SHORT_SLOT_TIME);
-	if (sband->band == NL80211_BAND_5GHZ)
+	if (sband->band == NL80211_BAND_5GHZ ||
+	    sband->band == NL80211_BAND_6GHZ)
 		use_short_slot = true;
 
 	if (use_protection != bss_conf->use_cts_prot) {
-- 
cgit v1.2.3-59-g8ed1b


From 093a48d2aa4b74db3134b61d7b7a061dbe79177b Mon Sep 17 00:00:00 2001
From: Nathan Errera <nathan.errera@intel.com>
Date: Thu, 28 May 2020 21:22:38 +0200
Subject: cfg80211: support bigger kek/kck key length

With some newer AKMs, the KCK and KEK are bigger, so allow that
if the driver advertises support for it. In addition, add a new
attribute for the AKM so we can use it for offloaded rekeying.

Signed-off-by: Nathan Errera <nathan.errera@intel.com>
[reword commit message]
Link: https://lore.kernel.org/r/20200528212237.5eb58b00a5d1.I61b09d77c4f382e8d58a05dcca78096e99a6bc15@changeid
Signed-off-by: Johannes Berg <johannes.berg@intel.com>
---
 include/net/cfg80211.h       | 12 +++++++++---
 include/uapi/linux/nl80211.h |  4 ++++
 net/wireless/nl80211.c       | 23 +++++++++++++++++++----
 3 files changed, 32 insertions(+), 7 deletions(-)

diff --git a/include/net/cfg80211.h b/include/net/cfg80211.h
index 95b55eea2afb..b58ad1a3f695 100644
--- a/include/net/cfg80211.h
+++ b/include/net/cfg80211.h
@@ -2936,12 +2936,17 @@ struct cfg80211_wowlan_wakeup {
 
 /**
  * struct cfg80211_gtk_rekey_data - rekey data
- * @kek: key encryption key (NL80211_KEK_LEN bytes)
- * @kck: key confirmation key (NL80211_KCK_LEN bytes)
+ * @kek: key encryption key (@kek_len bytes)
+ * @kck: key confirmation key (@kck_len bytes)
  * @replay_ctr: replay counter (NL80211_REPLAY_CTR_LEN bytes)
+ * @kek_len: length of kek
+ * @kck_len length of kck
+ * @akm: akm (oui, id)
  */
 struct cfg80211_gtk_rekey_data {
 	const u8 *kek, *kck, *replay_ctr;
+	u32 akm;
+	u8 kek_len, kck_len;
 };
 
 /**
@@ -4166,9 +4171,10 @@ struct cfg80211_ops {
  *	beaconing mode (AP, IBSS, Mesh, ...).
  * @WIPHY_FLAG_HAS_STATIC_WEP: The device supports static WEP key installation
  *	before connection.
+ * @WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK: The device supports bigger kek and kck keys
  */
 enum wiphy_flags {
-	/* use hole at 0 */
+	WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK		= BIT(0),
 	/* use hole at 1 */
 	/* use hole at 2 */
 	WIPHY_FLAG_NETNS_OK			= BIT(3),
diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h
index 5b350d032fa3..dad8c8f8581f 100644
--- a/include/uapi/linux/nl80211.h
+++ b/include/uapi/linux/nl80211.h
@@ -5396,6 +5396,8 @@ enum plink_actions {
 
 #define NL80211_KCK_LEN			16
 #define NL80211_KEK_LEN			16
+#define NL80211_KCK_EXT_LEN		24
+#define NL80211_KEK_EXT_LEN		32
 #define NL80211_REPLAY_CTR_LEN		8
 
 /**
@@ -5404,6 +5406,7 @@ enum plink_actions {
  * @NL80211_REKEY_DATA_KEK: key encryption key (binary)
  * @NL80211_REKEY_DATA_KCK: key confirmation key (binary)
  * @NL80211_REKEY_DATA_REPLAY_CTR: replay counter (binary)
+ * @NL80211_REKEY_DATA_AKM: AKM data (OUI, suite type)
  * @NUM_NL80211_REKEY_DATA: number of rekey attributes (internal)
  * @MAX_NL80211_REKEY_DATA: highest rekey attribute (internal)
  */
@@ -5412,6 +5415,7 @@ enum nl80211_rekey_data {
 	NL80211_REKEY_DATA_KEK,
 	NL80211_REKEY_DATA_KCK,
 	NL80211_REKEY_DATA_REPLAY_CTR,
+	NL80211_REKEY_DATA_AKM,
 
 	/* keep last */
 	NUM_NL80211_REKEY_DATA,
diff --git a/net/wireless/nl80211.c b/net/wireless/nl80211.c
index 3a24e6add13e..263ae395ad44 100644
--- a/net/wireless/nl80211.c
+++ b/net/wireless/nl80211.c
@@ -730,9 +730,16 @@ nl80211_coalesce_policy[NUM_NL80211_ATTR_COALESCE_RULE] = {
 /* policy for GTK rekey offload attributes */
 static const struct nla_policy
 nl80211_rekey_policy[NUM_NL80211_REKEY_DATA] = {
-	[NL80211_REKEY_DATA_KEK] = NLA_POLICY_EXACT_LEN_WARN(NL80211_KEK_LEN),
-	[NL80211_REKEY_DATA_KCK] = NLA_POLICY_EXACT_LEN_WARN(NL80211_KCK_LEN),
+	[NL80211_REKEY_DATA_KEK] = {
+		.type = NLA_BINARY,
+		.len = NL80211_KEK_EXT_LEN
+	},
+	[NL80211_REKEY_DATA_KCK] = {
+		.type = NLA_BINARY,
+		.len = NL80211_KCK_EXT_LEN
+	},
 	[NL80211_REKEY_DATA_REPLAY_CTR] = NLA_POLICY_EXACT_LEN_WARN(NL80211_REPLAY_CTR_LEN),
+	[NL80211_REKEY_DATA_AKM] = { .type = NLA_U32 },
 };
 
 static const struct nla_policy
@@ -12347,14 +12354,22 @@ static int nl80211_set_rekey_data(struct sk_buff *skb, struct genl_info *info)
 		return -EINVAL;
 	if (nla_len(tb[NL80211_REKEY_DATA_REPLAY_CTR]) != NL80211_REPLAY_CTR_LEN)
 		return -ERANGE;
-	if (nla_len(tb[NL80211_REKEY_DATA_KEK]) != NL80211_KEK_LEN)
+	if (nla_len(tb[NL80211_REKEY_DATA_KEK]) != NL80211_KEK_LEN &&
+	    !(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK &&
+	      nla_len(tb[NL80211_REKEY_DATA_KEK]) == NL80211_KEK_EXT_LEN))
 		return -ERANGE;
-	if (nla_len(tb[NL80211_REKEY_DATA_KCK]) != NL80211_KCK_LEN)
+	if (nla_len(tb[NL80211_REKEY_DATA_KCK]) != NL80211_KCK_LEN &&
+	    !(rdev->wiphy.flags & WIPHY_FLAG_SUPPORTS_EXT_KEK_KCK &&
+	      nla_len(tb[NL80211_REKEY_DATA_KEK]) == NL80211_KCK_EXT_LEN))
 		return -ERANGE;
 
 	rekey_data.kek = nla_data(tb[NL80211_REKEY_DATA_KEK]);
 	rekey_data.kck = nla_data(tb[NL80211_REKEY_DATA_KCK]);
 	rekey_data.replay_ctr = nla_data(tb[NL80211_REKEY_DATA_REPLAY_CTR]);
+	rekey_data.kek_len = nla_len(tb[NL80211_REKEY_DATA_KEK]);
+	rekey_data.kck_len = nla_len(tb[NL80211_REKEY_DATA_KCK]);
+	if (tb[NL80211_REKEY_DATA_AKM])
+		rekey_data.akm = nla_get_u32(tb[NL80211_REKEY_DATA_AKM]);
 
 	wdev_lock(wdev);
 	if (!wdev->current_bss) {
-- 
cgit v1.2.3-59-g8ed1b


From f109603a4be0578a8145c8ae7d6e10b0b5ab6df4 Mon Sep 17 00:00:00 2001
From: Brett Creeley <brett.creeley@intel.com>
Date: Fri, 15 May 2020 17:51:17 -0700
Subject: ice: allow host to clear administratively set VF MAC

Currently a user is not allowed to clear a VF's administratively set MAC
on the PF. Fix this by allowing an all zero MAC address via "ip link set
${pf_eth} vf ${vf_id} mac 00:00:00:00:00:00".

An example use case for this would be issuing a "virsh shutdown"
command on a VM. The call to iproute mentioned above is part of this flow.
Without this change the driver incorrectly rejects clearing the VF's
administratively set MAC and prints unhelpful log messages.

Also, improve the comments surrounding this change.

Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 22 ++++++++++++++--------
 1 file changed, 14 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index a126e7c7663d..9550501f9279 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -3904,7 +3904,7 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
 	if (ice_validate_vf_id(pf, vf_id))
 		return -EINVAL;
 
-	if (is_zero_ether_addr(mac) || is_multicast_ether_addr(mac)) {
+	if (is_multicast_ether_addr(mac)) {
 		netdev_err(netdev, "%pM not a valid unicast address\n", mac);
 		return -EINVAL;
 	}
@@ -3924,15 +3924,21 @@ int ice_set_vf_mac(struct net_device *netdev, int vf_id, u8 *mac)
 		return -EINVAL;
 	}
 
-	/* copy MAC into dflt_lan_addr and trigger a VF reset. The reset
-	 * flow will use the updated dflt_lan_addr and add a MAC filter
-	 * using ice_add_mac. Also set pf_set_mac to indicate that the PF has
-	 * set the MAC address for this VF.
+	/* VF is notified of its new MAC via the PF's response to the
+	 * VIRTCHNL_OP_GET_VF_RESOURCES message after the VF has been reset
 	 */
 	ether_addr_copy(vf->dflt_lan_addr.addr, mac);
-	vf->pf_set_mac = true;
-	netdev_info(netdev, "MAC on VF %d set to %pM. VF driver will be reinitialized\n",
-		    vf_id, mac);
+	if (is_zero_ether_addr(mac)) {
+		/* VF will send VIRTCHNL_OP_ADD_ETH_ADDR message with its MAC */
+		vf->pf_set_mac = false;
+		netdev_info(netdev, "Removing MAC on VF %d. VF driver will be reinitialized\n",
+			    vf->vf_id);
+	} else {
+		/* PF will add MAC rule for the VF */
+		vf->pf_set_mac = true;
+		netdev_info(netdev, "Setting MAC %pM on VF %d. VF driver will be reinitialized\n",
+			    mac, vf_id);
+	}
 
 	ice_vc_reset_vf(vf);
 	return 0;
-- 
cgit v1.2.3-59-g8ed1b


From c1636a6e8a5e10190bd31ae085f9e8f8c5bc50a0 Mon Sep 17 00:00:00 2001
From: Paul Greenwalt <paul.greenwalt@intel.com>
Date: Fri, 15 May 2020 17:51:18 -0700
Subject: ice: support adding 16 unicast/multicast filter on untrusted VF

Allow untrusted VF to add 16 unicast/multicast filters. VF uses 1 filter
for the default/perm_addr/LAA MAC, 1 for broadcast, and 16 additional
unicast/multicast filters.

Signed-off-by: Paul Greenwalt <paul.greenwalt@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
index 0adff89a6749..67aa9110fdd1 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.h
@@ -7,7 +7,10 @@
 
 /* Restrict number of MAC Addr and VLAN that non-trusted VF can programmed */
 #define ICE_MAX_VLAN_PER_VF		8
-#define ICE_MAX_MACADDR_PER_VF		12
+/* MAC filters: 1 is reserved for the VF's default/perm_addr/LAA MAC, 1 for
+ * broadcast, and 16 for additional unicast/multicast filters
+ */
+#define ICE_MAX_MACADDR_PER_VF		18
 
 /* Malicious Driver Detection */
 #define ICE_DFLT_NUM_INVAL_MSGS_ALLOWED		10
-- 
cgit v1.2.3-59-g8ed1b


From 2bb19d6e077190bafbfd50f3793333af3f07a7b1 Mon Sep 17 00:00:00 2001
From: Brett Creeley <brett.creeley@intel.com>
Date: Fri, 15 May 2020 17:51:19 -0700
Subject: ice: Fix transmit for all software offloaded VLANs

Currently the driver does not recognize when there is an 802.1AD VLAN
tag right after the dmac/smac (outermost VLAN tag). If any DCB map is
applied and/or DCB is enabled this is causing the hardware to insert a
VLAN 0 tag after the 802.1AD VLAN tag that is already in the packet.
Fix this by preventing VLAN tag 0 from being added when any VLAN is
already present after dmac/smac (software offloaded) or skb (hardware
offloaded).

Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_dcb_lib.c | 28 +++++++----------
 drivers/net/ethernet/intel/ice/ice_dcb_lib.h |  2 +-
 drivers/net/ethernet/intel/ice/ice_txrx.c    | 45 +++++++---------------------
 3 files changed, 21 insertions(+), 54 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
index 3c7f604c0c49..979af197f8a3 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.c
@@ -791,39 +791,31 @@ void ice_update_dcb_stats(struct ice_pf *pf)
  * ice_tx_prepare_vlan_flags_dcb - prepare VLAN tagging for DCB
  * @tx_ring: ring to send buffer on
  * @first: pointer to struct ice_tx_buf
+ *
+ * This should not be called if the outer VLAN is software offloaded as the VLAN
+ * tag will already be configured with the correct ID and priority bits
  */
-int
+void
 ice_tx_prepare_vlan_flags_dcb(struct ice_ring *tx_ring,
 			      struct ice_tx_buf *first)
 {
 	struct sk_buff *skb = first->skb;
 
 	if (!test_bit(ICE_FLAG_DCB_ENA, tx_ring->vsi->back->flags))
-		return 0;
+		return;
 
 	/* Insert 802.1p priority into VLAN header */
-	if ((first->tx_flags & (ICE_TX_FLAGS_HW_VLAN | ICE_TX_FLAGS_SW_VLAN)) ||
+	if ((first->tx_flags & ICE_TX_FLAGS_HW_VLAN) ||
 	    skb->priority != TC_PRIO_CONTROL) {
 		first->tx_flags &= ~ICE_TX_FLAGS_VLAN_PR_M;
 		/* Mask the lower 3 bits to set the 802.1p priority */
 		first->tx_flags |= (skb->priority & 0x7) <<
 				   ICE_TX_FLAGS_VLAN_PR_S;
-		if (first->tx_flags & ICE_TX_FLAGS_SW_VLAN) {
-			struct vlan_ethhdr *vhdr;
-			int rc;
-
-			rc = skb_cow_head(skb, 0);
-			if (rc < 0)
-				return rc;
-			vhdr = (struct vlan_ethhdr *)skb->data;
-			vhdr->h_vlan_TCI = htons(first->tx_flags >>
-						 ICE_TX_FLAGS_VLAN_S);
-		} else {
-			first->tx_flags |= ICE_TX_FLAGS_HW_VLAN;
-		}
+		/* if this is not already set it means a VLAN 0 + priority needs
+		 * to be offloaded
+		 */
+		first->tx_flags |= ICE_TX_FLAGS_HW_VLAN;
 	}
-
-	return 0;
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h
index 7c42324494d2..323238669572 100644
--- a/drivers/net/ethernet/intel/ice/ice_dcb_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_dcb_lib.h
@@ -27,7 +27,7 @@ void ice_pf_dcb_recfg(struct ice_pf *pf);
 void ice_vsi_cfg_dcb_rings(struct ice_vsi *vsi);
 int ice_init_pf_dcb(struct ice_pf *pf, bool locked);
 void ice_update_dcb_stats(struct ice_pf *pf);
-int
+void
 ice_tx_prepare_vlan_flags_dcb(struct ice_ring *tx_ring,
 			      struct ice_tx_buf *first);
 void
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx.c b/drivers/net/ethernet/intel/ice/ice_txrx.c
index cda7e05bd8ae..abdb137c8bb7 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx.c
@@ -2053,49 +2053,25 @@ int ice_tx_csum(struct ice_tx_buf *first, struct ice_tx_offload_params *off)
  *
  * Checks the skb and set up correspondingly several generic transmit flags
  * related to VLAN tagging for the HW, such as VLAN, DCB, etc.
- *
- * Returns error code indicate the frame should be dropped upon error and the
- * otherwise returns 0 to indicate the flags has been set properly.
  */
-static int
+static void
 ice_tx_prepare_vlan_flags(struct ice_ring *tx_ring, struct ice_tx_buf *first)
 {
 	struct sk_buff *skb = first->skb;
-	__be16 protocol = skb->protocol;
-
-	if (protocol == htons(ETH_P_8021Q) &&
-	    !(tx_ring->netdev->features & NETIF_F_HW_VLAN_CTAG_TX)) {
-		/* when HW VLAN acceleration is turned off by the user the
-		 * stack sets the protocol to 8021q so that the driver
-		 * can take any steps required to support the SW only
-		 * VLAN handling. In our case the driver doesn't need
-		 * to take any further steps so just set the protocol
-		 * to the encapsulated ethertype.
-		 */
-		skb->protocol = vlan_get_protocol(skb);
-		return 0;
-	}
 
-	/* if we have a HW VLAN tag being added, default to the HW one */
+	/* nothing left to do, software offloaded VLAN */
+	if (!skb_vlan_tag_present(skb) && eth_type_vlan(skb->protocol))
+		return;
+
+	/* currently, we always assume 802.1Q for VLAN insertion as VLAN
+	 * insertion for 802.1AD is not supported
+	 */
 	if (skb_vlan_tag_present(skb)) {
 		first->tx_flags |= skb_vlan_tag_get(skb) << ICE_TX_FLAGS_VLAN_S;
 		first->tx_flags |= ICE_TX_FLAGS_HW_VLAN;
-	} else if (protocol == htons(ETH_P_8021Q)) {
-		struct vlan_hdr *vhdr, _vhdr;
-
-		/* for SW VLAN, check the next protocol and store the tag */
-		vhdr = (struct vlan_hdr *)skb_header_pointer(skb, ETH_HLEN,
-							     sizeof(_vhdr),
-							     &_vhdr);
-		if (!vhdr)
-			return -EINVAL;
-
-		first->tx_flags |= ntohs(vhdr->h_vlan_TCI) <<
-				   ICE_TX_FLAGS_VLAN_S;
-		first->tx_flags |= ICE_TX_FLAGS_SW_VLAN;
 	}
 
-	return ice_tx_prepare_vlan_flags_dcb(tx_ring, first);
+	ice_tx_prepare_vlan_flags_dcb(tx_ring, first);
 }
 
 /**
@@ -2403,8 +2379,7 @@ ice_xmit_frame_ring(struct sk_buff *skb, struct ice_ring *tx_ring)
 	first->tx_flags = 0;
 
 	/* prepare the VLAN tagging flags for Tx */
-	if (ice_tx_prepare_vlan_flags(tx_ring, first))
-		goto out_drop;
+	ice_tx_prepare_vlan_flags(tx_ring, first);
 
 	/* set up TSO offload */
 	tso = ice_tso(first, &offload);
-- 
cgit v1.2.3-59-g8ed1b


From c9a12d6d2091175fe2dc1707dd40d6ad781414fe Mon Sep 17 00:00:00 2001
From: Dan Nowlin <dan.nowlin@intel.com>
Date: Fri, 15 May 2020 17:51:20 -0700
Subject: ice: Increase timeout after PFR

To allow for resets during package download, increase the timeout period
after performing a PFR. The time waited is the global config lock
timeout plus the normal PFSWR timeout.

Signed-off-by: Dan Nowlin <dan.nowlin@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_common.c | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index 8c73e161829d..d4a31c734326 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -964,7 +964,12 @@ static enum ice_status ice_pf_reset(struct ice_hw *hw)
 
 	wr32(hw, PFGEN_CTRL, (reg | PFGEN_CTRL_PFSWR_M));
 
-	for (cnt = 0; cnt < ICE_PF_RESET_WAIT_COUNT; cnt++) {
+	/* Wait for the PFR to complete. The wait time is the global config lock
+	 * timeout plus the PFR timeout which will account for a possible reset
+	 * that is occurring during a download package operation.
+	 */
+	for (cnt = 0; cnt < ICE_GLOBAL_CFG_LOCK_TIMEOUT +
+	     ICE_PF_RESET_WAIT_COUNT; cnt++) {
 		reg = rd32(hw, PFGEN_CTRL);
 		if (!(reg & PFGEN_CTRL_PFSWR_M))
 			break;
-- 
cgit v1.2.3-59-g8ed1b


From bff185e2406e10d2608857171537645714bea1f4 Mon Sep 17 00:00:00 2001
From: Chinh T Cao <chinh.t.cao@intel.com>
Date: Fri, 15 May 2020 17:51:21 -0700
Subject: ice: Update ICE_PHY_TYPE_HIGH_MAX_INDEX value

As currently, we are supporting only 5 PHY_SPEEDs for phy_type_high.
Thus, we should adjust the value of ICE_PHY_TYPE_HIGH_MAX_INDEX to 5.

Signed-off-by: Chinh T Cao <chinh.t.cao@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_adminq_cmd.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index f04c338fb6e0..50040c5c55ec 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -974,7 +974,7 @@ struct ice_aqc_get_phy_caps {
 #define ICE_PHY_TYPE_HIGH_100G_CAUI2		BIT_ULL(2)
 #define ICE_PHY_TYPE_HIGH_100G_AUI2_AOC_ACC	BIT_ULL(3)
 #define ICE_PHY_TYPE_HIGH_100G_AUI2		BIT_ULL(4)
-#define ICE_PHY_TYPE_HIGH_MAX_INDEX		19
+#define ICE_PHY_TYPE_HIGH_MAX_INDEX		5
 
 struct ice_aqc_get_phy_caps_data {
 	__le64 phy_type_low; /* Use values from ICE_PHY_TYPE_LOW_* */
-- 
cgit v1.2.3-59-g8ed1b


From cf0bf41dd6cb1d5461c71d2159c7c062fff3c8fd Mon Sep 17 00:00:00 2001
From: Brett Creeley <brett.creeley@intel.com>
Date: Fri, 15 May 2020 17:54:58 -0700
Subject: ice: Reset VF for all port VLAN changes from host

Currently the PF is modifying the VF's port VLAN on the fly when
configured via iproute. This is okay for most cases, but if the VF
already has guest VLANs configured the PF has to remove all of those
filters so only VLAN tagged traffic that matches the port VLAN will
pass. Instead of adding functionality to track which guest VLANs have
been added, just reset the VF each time port VLAN parameters are
modified.

Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 42 +++---------------------
 1 file changed, 5 insertions(+), 37 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index 9550501f9279..2916cfb9d032 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -3295,7 +3295,6 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
 		     __be16 vlan_proto)
 {
 	struct ice_pf *pf = ice_netdev_to_pf(netdev);
-	struct ice_vsi *vsi;
 	struct device *dev;
 	struct ice_vf *vf;
 	u16 vlanprio;
@@ -3317,8 +3316,6 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
 	}
 
 	vf = &pf->vf[vf_id];
-	vsi = pf->vsi[vf->lan_vsi_idx];
-
 	ret = ice_check_vf_ready_for_cfg(vf);
 	if (ret)
 		return ret;
@@ -3331,44 +3328,15 @@ ice_set_vf_port_vlan(struct net_device *netdev, int vf_id, u16 vlan_id, u8 qos,
 		return 0;
 	}
 
-	if (vlan_id || qos) {
-		/* remove VLAN 0 filter set by default when transitioning from
-		 * no port VLAN to a port VLAN. No change to old port VLAN on
-		 * failure.
-		 */
-		ret = ice_vsi_kill_vlan(vsi, 0);
-		if (ret)
-			return ret;
-		ret = ice_vsi_manage_pvid(vsi, vlanprio, true);
-		if (ret)
-			return ret;
-	} else {
-		/* add VLAN 0 filter back when transitioning from port VLAN to
-		 * no port VLAN. No change to old port VLAN on failure.
-		 */
-		ret = ice_vsi_add_vlan(vsi, 0, ICE_FWD_TO_VSI);
-		if (ret)
-			return ret;
-		ret = ice_vsi_manage_pvid(vsi, 0, false);
-		if (ret)
-			return ret;
-	}
+	vf->port_vlan_info = vlanprio;
 
-	if (vlan_id) {
+	if (vf->port_vlan_info)
 		dev_info(dev, "Setting VLAN %d, QoS 0x%x on VF %d\n",
 			 vlan_id, qos, vf_id);
+	else
+		dev_info(dev, "Clearing port VLAN on VF %d\n", vf_id);
 
-		/* add VLAN filter for the port VLAN */
-		ret = ice_vsi_add_vlan(vsi, vlan_id, ICE_FWD_TO_VSI);
-		if (ret)
-			return ret;
-	}
-	/* remove old port VLAN filter with valid VLAN ID or QoS fields */
-	if (vf->port_vlan_info)
-		ice_vsi_kill_vlan(vsi, vf->port_vlan_info & VLAN_VID_MASK);
-
-	/* keep port VLAN information persistent on resets */
-	vf->port_vlan_info = le16_to_cpu(vsi->info.pvid);
+	ice_vc_reset_vf(vf);
 
 	return 0;
 }
-- 
cgit v1.2.3-59-g8ed1b


From 401ce33b32812a8fde6789588416d8c5b232138f Mon Sep 17 00:00:00 2001
From: Brett Creeley <brett.creeley@intel.com>
Date: Fri, 15 May 2020 17:54:59 -0700
Subject: ice: Always clear QRXFLXP_CNTXT before writing new value

Always clear the previous value in QRXFLXP_CNTXT before writing a new
value. This will make it so re-used queues will not accidentally take the
previously configured settings.

Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_base.c | 33 +++++++++++--------------------
 drivers/net/ethernet/intel/ice/ice_lib.c  | 26 ++++++++++++++++++++++++
 drivers/net/ethernet/intel/ice/ice_lib.h  |  3 +++
 3 files changed, 40 insertions(+), 22 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_base.c b/drivers/net/ethernet/intel/ice/ice_base.c
index a174911d8994..d620d26d42ed 100644
--- a/drivers/net/ethernet/intel/ice/ice_base.c
+++ b/drivers/net/ethernet/intel/ice/ice_base.c
@@ -3,6 +3,7 @@
 
 #include <net/xdp_sock_drv.h>
 #include "ice_base.h"
+#include "ice_lib.h"
 #include "ice_dcb_lib.h"
 
 /**
@@ -288,7 +289,6 @@ int ice_setup_rx_ctx(struct ice_ring *ring)
 	u32 rxdid = ICE_RXDID_FLEX_NIC;
 	struct ice_rlan_ctx rlan_ctx;
 	struct ice_hw *hw;
-	u32 regval;
 	u16 pf_q;
 	int err;
 
@@ -385,27 +385,16 @@ int ice_setup_rx_ctx(struct ice_ring *ring)
 	/* Rx queue threshold in units of 64 */
 	rlan_ctx.lrxqthresh = 1;
 
-	 /* Enable Flexible Descriptors in the queue context which
-	  * allows this driver to select a specific receive descriptor format
-	  */
-	regval = rd32(hw, QRXFLXP_CNTXT(pf_q));
-	if (vsi->type != ICE_VSI_VF) {
-		regval |= (rxdid << QRXFLXP_CNTXT_RXDID_IDX_S) &
-			QRXFLXP_CNTXT_RXDID_IDX_M;
-
-		/* increasing context priority to pick up profile ID;
-		 * default is 0x01; setting to 0x03 to ensure profile
-		 * is programming if prev context is of same priority
-		 */
-		regval |= (0x03 << QRXFLXP_CNTXT_RXDID_PRIO_S) &
-			QRXFLXP_CNTXT_RXDID_PRIO_M;
-
-	} else {
-		regval &= ~(QRXFLXP_CNTXT_RXDID_IDX_M |
-			    QRXFLXP_CNTXT_RXDID_PRIO_M |
-			    QRXFLXP_CNTXT_TS_M);
-	}
-	wr32(hw, QRXFLXP_CNTXT(pf_q), regval);
+	/* Enable Flexible Descriptors in the queue context which
+	 * allows this driver to select a specific receive descriptor format
+	 * increasing context priority to pick up profile ID; default is 0x01;
+	 * setting to 0x03 to ensure profile is programming if prev context is
+	 * of same priority
+	 */
+	if (vsi->type != ICE_VSI_VF)
+		ice_write_qrxflxp_cntxt(hw, pf_q, rxdid, 0x3);
+	else
+		ice_write_qrxflxp_cntxt(hw, pf_q, ICE_RXDID_LEGACY_1, 0x3);
 
 	/* Absolute queue number out of 2K needs to be passed */
 	err = ice_write_rxq_ctx(hw, &rlan_ctx, pf_q);
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index 89e8e4f7f56f..ecc04a696e50 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -1595,6 +1595,32 @@ void ice_vsi_cfg_frame_size(struct ice_vsi *vsi)
 	}
 }
 
+/**
+ * ice_write_qrxflxp_cntxt - write/configure QRXFLXP_CNTXT register
+ * @hw: HW pointer
+ * @pf_q: index of the Rx queue in the PF's queue space
+ * @rxdid: flexible descriptor RXDID
+ * @prio: priority for the RXDID for this queue
+ */
+void
+ice_write_qrxflxp_cntxt(struct ice_hw *hw, u16 pf_q, u32 rxdid, u32 prio)
+{
+	int regval = rd32(hw, QRXFLXP_CNTXT(pf_q));
+
+	/* clear any previous values */
+	regval &= ~(QRXFLXP_CNTXT_RXDID_IDX_M |
+		    QRXFLXP_CNTXT_RXDID_PRIO_M |
+		    QRXFLXP_CNTXT_TS_M);
+
+	regval |= (rxdid << QRXFLXP_CNTXT_RXDID_IDX_S) &
+		QRXFLXP_CNTXT_RXDID_IDX_M;
+
+	regval |= (prio << QRXFLXP_CNTXT_RXDID_PRIO_S) &
+		QRXFLXP_CNTXT_RXDID_PRIO_M;
+
+	wr32(hw, QRXFLXP_CNTXT(pf_q), regval);
+}
+
 /**
  * ice_vsi_cfg_rxqs - Configure the VSI for Rx
  * @vsi: the VSI being configured
diff --git a/drivers/net/ethernet/intel/ice/ice_lib.h b/drivers/net/ethernet/intel/ice/ice_lib.h
index 076e635e0c9f..d80e6afa4511 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.h
+++ b/drivers/net/ethernet/intel/ice/ice_lib.h
@@ -74,6 +74,9 @@ int ice_vsi_rebuild(struct ice_vsi *vsi, bool init_vsi);
 
 bool ice_is_reset_in_progress(unsigned long *state);
 
+void
+ice_write_qrxflxp_cntxt(struct ice_hw *hw, u16 pf_q, u32 rxdid, u32 prio);
+
 void ice_vsi_put_qs(struct ice_vsi *vsi);
 
 void ice_vsi_dis_irq(struct ice_vsi *vsi);
-- 
cgit v1.2.3-59-g8ed1b


From 765dd7a1827c687b782e6ab3dd6daf4d13a4780f Mon Sep 17 00:00:00 2001
From: Jesse Brandeburg <jesse.brandeburg@intel.com>
Date: Fri, 15 May 2020 17:55:00 -0700
Subject: ice: Fix inability to set channels when down

Currently the driver prevents a user from doing
modprobe ice
ethtool -L eth0 combined 5
ip link set eth0 up

The ethtool command fails, because the driver is checking to see if the
interface is down before allowing the get_channels to proceed (even for
a set_channels).

Remove this check and allow the user to configure the interface
before bringing it up, which is a much better usability case.

Fixes: 87324e747fde ("ice: Implement ethtool ops for channels")
Signed-off-by: Jesse Brandeburg <jesse.brandeburg@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_ethtool.c | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool.c b/drivers/net/ethernet/intel/ice/ice_ethtool.c
index fd1849155d85..68c38004a088 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool.c
@@ -3189,10 +3189,6 @@ ice_get_channels(struct net_device *dev, struct ethtool_channels *ch)
 	struct ice_vsi *vsi = np->vsi;
 	struct ice_pf *pf = vsi->back;
 
-	/* check to see if VSI is active */
-	if (test_bit(__ICE_DOWN, vsi->state))
-		return;
-
 	/* report maximum channels */
 	ch->max_rx = ice_get_max_rxq(pf);
 	ch->max_tx = ice_get_max_txq(pf);
-- 
cgit v1.2.3-59-g8ed1b


From 7dcc0fb8f64913c783c7c1f06065c47e39b19794 Mon Sep 17 00:00:00 2001
From: Brett Creeley <brett.creeley@intel.com>
Date: Fri, 15 May 2020 17:55:01 -0700
Subject: ice: Allow VF to request reset as soon as it's initialized

A VF driver has the ability to request reset via VIRTCHNL_OP_RESET_VF.
This is a required step in VF driver load. Currently, the PF is only
allowing a VF to request reset using this method after the VF has
already communicated resources via VIRTCHNL_OP_GET_VF_RESOURCES.
However, this is incorrect because the VF can request reset before
requesting resources. Fix this by allowing the VF to request a reset
once it has been initialized.

Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
index 2916cfb9d032..16a2f2526ccc 100644
--- a/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
+++ b/drivers/net/ethernet/intel/ice/ice_virtchnl_pf.c
@@ -2014,7 +2014,7 @@ err:
  */
 static void ice_vc_reset_vf_msg(struct ice_vf *vf)
 {
-	if (test_bit(ICE_VF_STATE_ACTIVE, vf->vf_states))
+	if (test_bit(ICE_VF_STATE_INIT, vf->vf_states))
 		ice_reset_vf(vf, false);
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From ebb462dc21eae79bed5b050afb225534992bd1f0 Mon Sep 17 00:00:00 2001
From: Bruce Allan <bruce.w.allan@intel.com>
Date: Fri, 15 May 2020 17:55:02 -0700
Subject: ice: fix function signature style format

Where possible, cuddle multiple lines of function signatures to be
consistent throughout the code.

Signed-off-by: Bruce Allan <bruce.w.allan@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_controlq.c |  3 +--
 drivers/net/ethernet/intel/ice/ice_sched.c    | 12 ++++--------
 drivers/net/ethernet/intel/ice/ice_switch.c   |  9 +++------
 3 files changed, 8 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_controlq.c b/drivers/net/ethernet/intel/ice/ice_controlq.c
index 479a74efc536..1e18021aa073 100644
--- a/drivers/net/ethernet/intel/ice/ice_controlq.c
+++ b/drivers/net/ethernet/intel/ice/ice_controlq.c
@@ -769,8 +769,7 @@ enum ice_status ice_create_all_ctrlq(struct ice_hw *hw)
  *
  * Destroys the send and receive queue locks for a given control queue.
  */
-static void
-ice_destroy_ctrlq_locks(struct ice_ctl_q_info *cq)
+static void ice_destroy_ctrlq_locks(struct ice_ctl_q_info *cq)
 {
 	mutex_destroy(&cq->sq_lock);
 	mutex_destroy(&cq->rq_lock);
diff --git a/drivers/net/ethernet/intel/ice/ice_sched.c b/drivers/net/ethernet/intel/ice/ice_sched.c
index d63acd2fcf79..0475134295e4 100644
--- a/drivers/net/ethernet/intel/ice/ice_sched.c
+++ b/drivers/net/ethernet/intel/ice/ice_sched.c
@@ -1714,8 +1714,7 @@ ice_sched_cfg_vsi(struct ice_port_info *pi, u16 vsi_handle, u8 tc, u16 maxqs,
  * This function removes single aggregator VSI info entry from
  * aggregator list.
  */
-static void
-ice_sched_rm_agg_vsi_info(struct ice_port_info *pi, u16 vsi_handle)
+static void ice_sched_rm_agg_vsi_info(struct ice_port_info *pi, u16 vsi_handle)
 {
 	struct ice_sched_agg_info *agg_info;
 	struct ice_sched_agg_info *atmp;
@@ -1947,8 +1946,7 @@ ice_sched_cfg_node_bw_alloc(struct ice_hw *hw, struct ice_sched_node *node,
  *
  * Save or clear CIR bandwidth (BW) in the passed param bw_t_info.
  */
-static void
-ice_set_clear_cir_bw(struct ice_bw_type_info *bw_t_info, u32 bw)
+static void ice_set_clear_cir_bw(struct ice_bw_type_info *bw_t_info, u32 bw)
 {
 	if (bw == ICE_SCHED_DFLT_BW) {
 		clear_bit(ICE_BW_TYPE_CIR, bw_t_info->bw_t_bitmap);
@@ -1967,8 +1965,7 @@ ice_set_clear_cir_bw(struct ice_bw_type_info *bw_t_info, u32 bw)
  *
  * Save or clear EIR bandwidth (BW) in the passed param bw_t_info.
  */
-static void
-ice_set_clear_eir_bw(struct ice_bw_type_info *bw_t_info, u32 bw)
+static void ice_set_clear_eir_bw(struct ice_bw_type_info *bw_t_info, u32 bw)
 {
 	if (bw == ICE_SCHED_DFLT_BW) {
 		clear_bit(ICE_BW_TYPE_EIR, bw_t_info->bw_t_bitmap);
@@ -1993,8 +1990,7 @@ ice_set_clear_eir_bw(struct ice_bw_type_info *bw_t_info, u32 bw)
  *
  * Save or clear shared bandwidth (BW) in the passed param bw_t_info.
  */
-static void
-ice_set_clear_shared_bw(struct ice_bw_type_info *bw_t_info, u32 bw)
+static void ice_set_clear_shared_bw(struct ice_bw_type_info *bw_t_info, u32 bw)
 {
 	if (bw == ICE_SCHED_DFLT_BW) {
 		clear_bit(ICE_BW_TYPE_SHARED, bw_t_info->bw_t_bitmap);
diff --git a/drivers/net/ethernet/intel/ice/ice_switch.c b/drivers/net/ethernet/intel/ice/ice_switch.c
index 0156b73df1b1..ff7d16ac693e 100644
--- a/drivers/net/ethernet/intel/ice/ice_switch.c
+++ b/drivers/net/ethernet/intel/ice/ice_switch.c
@@ -1612,8 +1612,7 @@ exit:
  * check for duplicates in this case, removing duplicates from a given
  * list should be taken care of in the caller of this function.
  */
-enum ice_status
-ice_add_mac(struct ice_hw *hw, struct list_head *m_list)
+enum ice_status ice_add_mac(struct ice_hw *hw, struct list_head *m_list)
 {
 	struct ice_aqc_sw_rules_elem *s_rule, *r_iter;
 	struct ice_fltr_list_entry *m_list_itr;
@@ -1914,8 +1913,7 @@ exit:
  * @hw: pointer to the hardware structure
  * @v_list: list of VLAN entries and forwarding information
  */
-enum ice_status
-ice_add_vlan(struct ice_hw *hw, struct list_head *v_list)
+enum ice_status ice_add_vlan(struct ice_hw *hw, struct list_head *v_list)
 {
 	struct ice_fltr_list_entry *v_list_itr;
 
@@ -2145,8 +2143,7 @@ ice_find_ucast_rule_entry(struct ice_hw *hw, u8 recp_id,
  * the entries passed into m_list were added previously. It will not attempt to
  * do a partial remove of entries that were found.
  */
-enum ice_status
-ice_remove_mac(struct ice_hw *hw, struct list_head *m_list)
+enum ice_status ice_remove_mac(struct ice_hw *hw, struct list_head *m_list)
 {
 	struct ice_fltr_list_entry *list_itr, *tmp;
 	struct mutex *rule_lock; /* Lock to protect filter rule list */
-- 
cgit v1.2.3-59-g8ed1b


From 1a9c561aa35534a03c0aa51c7fb1485731202a7c Mon Sep 17 00:00:00 2001
From: Paul M Stillwell Jr <paul.m.stillwell.jr@intel.com>
Date: Fri, 15 May 2020 17:55:03 -0700
Subject: ice: fix PCI device serial number to be lowercase values

Commit ceb2f00707f9 ("ice: Use pci_get_dsn()") changed the code to
use a new function to get the Device Serial Number. It also changed
the case of the filename for loading a package on a specific NIC
from lowercase to uppercase. Change the filename back to
lowercase since that is what we specified.

Fixes: ceb2f00707f9 ("ice: Use pci_get_dsn()")
Signed-off-by: Paul M Stillwell Jr <paul.m.stillwell.jr@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_main.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index bbf92d2f1ac1..cb72ff32a29b 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -3248,7 +3248,7 @@ static char *ice_get_opt_fw_name(struct ice_pf *pf)
 	if (!opt_fw_filename)
 		return NULL;
 
-	snprintf(opt_fw_filename, NAME_MAX, "%sice-%016llX.pkg",
+	snprintf(opt_fw_filename, NAME_MAX, "%sice-%016llx.pkg",
 		 ICE_DDP_PKG_PATH, dsn);
 
 	return opt_fw_filename;
-- 
cgit v1.2.3-59-g8ed1b


From a039f6fcba452fba5973798f4c641eee1ef770a1 Mon Sep 17 00:00:00 2001
From: Brett Creeley <brett.creeley@intel.com>
Date: Fri, 15 May 2020 17:55:04 -0700
Subject: ice: Use coalesce values from q_vector 0 when increasing q_vectors

Currently when a VSI is built (i.e. reset, set channels, etc.)
the coalesce settings will be preserved in most cases. However, when the
number of q_vectors are increased the settings for the new q_vectors
will be set to the driver defaults of AIM on, Rx/Tx ITR 50, and INTRL 0.
This is causing issues with how the ethtool layer gets the current
coalesce settings since it only uses q_vector 0. So, assume that the user
set the coalesce settings globally (i.e. ethtool -C eth0) and use q_vector
0's settings for all of the new q_vectors.

Signed-off-by: Brett Creeley <brett.creeley@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_lib.c | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_lib.c b/drivers/net/ethernet/intel/ice/ice_lib.c
index ecc04a696e50..28b46cc9f5cb 100644
--- a/drivers/net/ethernet/intel/ice/ice_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_lib.c
@@ -2707,15 +2707,13 @@ ice_vsi_rebuild_set_coalesce(struct ice_vsi *vsi,
 		ice_vsi_rebuild_update_coalesce(vsi->q_vectors[i],
 						&coalesce[i]);
 
-	for (; i < vsi->num_q_vectors; i++) {
-		struct ice_coalesce_stored coalesce_dflt = {
-			.itr_tx = ICE_DFLT_TX_ITR,
-			.itr_rx = ICE_DFLT_RX_ITR,
-			.intrl = 0
-		};
+	/* number of q_vectors increased, so assume coalesce settings were
+	 * changed globally (i.e. ethtool -C eth0 instead of per-queue) and use
+	 * the previous settings from q_vector 0 for all of the new q_vectors
+	 */
+	for (; i < vsi->num_q_vectors; i++)
 		ice_vsi_rebuild_update_coalesce(vsi->q_vectors[i],
-						&coalesce_dflt);
-	}
+						&coalesce[0]);
 }
 
 /**
-- 
cgit v1.2.3-59-g8ed1b


From d5329be9907723a646f8874388cdaaccba988b4d Mon Sep 17 00:00:00 2001
From: Henry Tieman <henry.w.tieman@intel.com>
Date: Fri, 15 May 2020 17:55:05 -0700
Subject: ice: fix aRFS after flow director delete

The logic was missing for adding back perfect flows after flow director
filter delete. The code now adds perfect flows into the HW tables after
filter delete.

Signed-off-by: Henry Tieman <henry.w.tieman@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c | 27 ++++++++++++++++++++++-
 1 file changed, 26 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
index 42803fc0ed18..d7430ce6af26 100644
--- a/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
+++ b/drivers/net/ethernet/intel/ice/ice_ethtool_fdir.c
@@ -1363,6 +1363,31 @@ release_lock:
 	mutex_unlock(&hw->fdir_fltr_lock);
 }
 
+/**
+ * ice_fdir_do_rem_flow - delete flow and possibly add perfect flow
+ * @pf: PF structure
+ * @flow_type: FDir flow type to release
+ */
+static void
+ice_fdir_do_rem_flow(struct ice_pf *pf, enum ice_fltr_ptype flow_type)
+{
+	struct ice_hw *hw = &pf->hw;
+	bool need_perfect = false;
+
+	if (flow_type == ICE_FLTR_PTYPE_NONF_IPV4_TCP ||
+	    flow_type == ICE_FLTR_PTYPE_NONF_IPV4_UDP ||
+	    flow_type == ICE_FLTR_PTYPE_NONF_IPV6_TCP ||
+	    flow_type == ICE_FLTR_PTYPE_NONF_IPV6_UDP)
+		need_perfect = true;
+
+	if (need_perfect && test_bit(flow_type, hw->fdir_perfect_fltr))
+		return;
+
+	ice_fdir_rem_flow(hw, ICE_BLK_FD, flow_type);
+	if (need_perfect)
+		ice_create_init_fdir_rule(pf, flow_type);
+}
+
 /**
  * ice_fdir_update_list_entry - add or delete a filter from the filter list
  * @pf: PF structure
@@ -1393,7 +1418,7 @@ ice_fdir_update_list_entry(struct ice_pf *pf, struct ice_fdir_fltr *input,
 			/* we just deleted the last filter of flow_type so we
 			 * should also delete the HW filter info.
 			 */
-			ice_fdir_rem_flow(hw, ICE_BLK_FD, old_fltr->flow_type);
+			ice_fdir_do_rem_flow(pf, old_fltr->flow_type);
 		list_del(&old_fltr->fltr_node);
 		devm_kfree(ice_hw_to_dev(hw), old_fltr);
 	}
-- 
cgit v1.2.3-59-g8ed1b


From b5e19a642b7ed3d9e6de746957226a7ae726d226 Mon Sep 17 00:00:00 2001
From: Chinh T Cao <chinh.t.cao@intel.com>
Date: Fri, 15 May 2020 17:55:06 -0700
Subject: ice: Ignore EMODE when setting PHY config

When setting the PHY cfg (CQ cmd 0x0601), if the firmware responds
with an EMODE error, software will ignore the error as it simply
means that manageability (ex: BMC) is in control of the link and that
the new setting may not be applied.

Signed-off-by: Chinh T Cao <chinh.t.cao@intel.com>
Signed-off-by: Tony Nguyen <anthony.l.nguyen@intel.com>
Tested-by: Andrew Bowers <andrewx.bowers@intel.com>
Signed-off-by: Jeff Kirsher <jeffrey.t.kirsher@intel.com>
---
 drivers/net/ethernet/intel/ice/ice_adminq_cmd.h | 1 +
 drivers/net/ethernet/intel/ice/ice_common.c     | 7 ++++++-
 drivers/net/ethernet/intel/ice/ice_main.c       | 2 ++
 3 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
index 50040c5c55ec..92f82f2a8af4 100644
--- a/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
+++ b/drivers/net/ethernet/intel/ice/ice_adminq_cmd.h
@@ -1826,6 +1826,7 @@ enum ice_aq_err {
 	ICE_AQ_RC_EINVAL	= 14, /* Invalid argument */
 	ICE_AQ_RC_ENOSPC	= 16, /* No space left or allocation failure */
 	ICE_AQ_RC_ENOSYS	= 17, /* Function not implemented */
+	ICE_AQ_RC_EMODE		= 21, /* Op not allowed in current dev mode */
 	ICE_AQ_RC_ENOSEC	= 24, /* Missing security manifest */
 	ICE_AQ_RC_EBADSIG	= 25, /* Bad RSA signature */
 	ICE_AQ_RC_ESVN		= 26, /* SVN number prohibits this package */
diff --git a/drivers/net/ethernet/intel/ice/ice_common.c b/drivers/net/ethernet/intel/ice/ice_common.c
index d4a31c734326..bce0e1281168 100644
--- a/drivers/net/ethernet/intel/ice/ice_common.c
+++ b/drivers/net/ethernet/intel/ice/ice_common.c
@@ -2232,6 +2232,7 @@ ice_aq_set_phy_cfg(struct ice_hw *hw, u8 lport,
 		   struct ice_aqc_set_phy_cfg_data *cfg, struct ice_sq_cd *cd)
 {
 	struct ice_aq_desc desc;
+	enum ice_status status;
 
 	if (!cfg)
 		return ICE_ERR_PARAM;
@@ -2260,7 +2261,11 @@ ice_aq_set_phy_cfg(struct ice_hw *hw, u8 lport,
 	ice_debug(hw, ICE_DBG_LINK, "eeer_value = 0x%x\n", cfg->eeer_value);
 	ice_debug(hw, ICE_DBG_LINK, "link_fec_opt = 0x%x\n", cfg->link_fec_opt);
 
-	return ice_aq_send_cmd(hw, &desc, cfg, sizeof(*cfg), cd);
+	status = ice_aq_send_cmd(hw, &desc, cfg, sizeof(*cfg), cd);
+	if (hw->adminq.sq_last_status == ICE_AQ_RC_EMODE)
+		status = 0;
+
+	return status;
 }
 
 /**
diff --git a/drivers/net/ethernet/intel/ice/ice_main.c b/drivers/net/ethernet/intel/ice/ice_main.c
index cb72ff32a29b..082825e3cb39 100644
--- a/drivers/net/ethernet/intel/ice/ice_main.c
+++ b/drivers/net/ethernet/intel/ice/ice_main.c
@@ -5159,6 +5159,8 @@ const char *ice_aq_str(enum ice_aq_err aq_err)
 		return "ICE_AQ_RC_ENOSPC";
 	case ICE_AQ_RC_ENOSYS:
 		return "ICE_AQ_RC_ENOSYS";
+	case ICE_AQ_RC_EMODE:
+		return "ICE_AQ_RC_EMODE";
 	case ICE_AQ_RC_ENOSEC:
 		return "ICE_AQ_RC_ENOSEC";
 	case ICE_AQ_RC_EBADSIG:
-- 
cgit v1.2.3-59-g8ed1b


From 4942857b015ede4fab8b262931244a3c1006a2a6 Mon Sep 17 00:00:00 2001
From: Zijun Hu <zijuhu@codeaurora.org>
Date: Fri, 29 May 2020 22:46:13 +0800
Subject: Bluetooth: hci_qca: Improve controller ID info log level

Controller ID info got by VSC EDL_PATCH_GETVER is very
important, so improve its log level from DEBUG to INFO.

Signed-off-by: Zijun Hu <zijuhu@codeaurora.org>
Reviewed-by: Matthias Kaehlcke <mka@chromium.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btqca.c | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/bluetooth/btqca.c b/drivers/bluetooth/btqca.c
index 3ea866d44568..c5984966f315 100644
--- a/drivers/bluetooth/btqca.c
+++ b/drivers/bluetooth/btqca.c
@@ -74,17 +74,21 @@ int qca_read_soc_version(struct hci_dev *hdev, u32 *soc_version,
 
 	ver = (struct qca_btsoc_version *)(edl->data);
 
-	BT_DBG("%s: Product:0x%08x", hdev->name, le32_to_cpu(ver->product_id));
-	BT_DBG("%s: Patch  :0x%08x", hdev->name, le16_to_cpu(ver->patch_ver));
-	BT_DBG("%s: ROM    :0x%08x", hdev->name, le16_to_cpu(ver->rom_ver));
-	BT_DBG("%s: SOC    :0x%08x", hdev->name, le32_to_cpu(ver->soc_id));
+	bt_dev_info(hdev, "QCA Product ID   :0x%08x",
+		    le32_to_cpu(ver->product_id));
+	bt_dev_info(hdev, "QCA SOC Version  :0x%08x",
+		    le32_to_cpu(ver->soc_id));
+	bt_dev_info(hdev, "QCA ROM Version  :0x%08x",
+		    le16_to_cpu(ver->rom_ver));
+	bt_dev_info(hdev, "QCA Patch Version:0x%08x",
+		    le16_to_cpu(ver->patch_ver));
 
 	/* QCA chipset version can be decided by patch and SoC
 	 * version, combination with upper 2 bytes from SoC
 	 * and lower 2 bytes from patch will be used.
 	 */
 	*soc_version = (le32_to_cpu(ver->soc_id) << 16) |
-			(le16_to_cpu(ver->rom_ver) & 0x0000ffff);
+		       (le16_to_cpu(ver->rom_ver) & 0x0000ffff);
 	if (*soc_version == 0)
 		err = -EILSEQ;
 
-- 
cgit v1.2.3-59-g8ed1b


From d3a0fe6b0988241c64ef4f6a1045423cc79a612a Mon Sep 17 00:00:00 2001
From: Zijun Hu <zijuhu@codeaurora.org>
Date: Fri, 29 May 2020 23:58:56 +0800
Subject: Bluetooth: btmtkuart: Use serdev_device_write_buf() instead of
 serdev_device_write()

serdev_device_write() is not appropriate at here because
serdev_device_write_wakeup() is not used to release completion hold
by the former at @write_wakeup member of struct serdev_device_ops.

Fix by using serdev_device_write_buf() instead of serdev_device_write().

Signed-off-by: Zijun Hu <zijuhu@codeaurora.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/btmtkuart.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/bluetooth/btmtkuart.c b/drivers/bluetooth/btmtkuart.c
index 8a81fbca5c9d..6c40bc75fb5b 100644
--- a/drivers/bluetooth/btmtkuart.c
+++ b/drivers/bluetooth/btmtkuart.c
@@ -695,8 +695,7 @@ static int btmtkuart_change_baudrate(struct hci_dev *hdev)
 
 	/* Send a dummy byte 0xff to activate the new baudrate */
 	param = 0xff;
-	err = serdev_device_write(bdev->serdev, &param, sizeof(param),
-				  MAX_SCHEDULE_TIMEOUT);
+	err = serdev_device_write_buf(bdev->serdev, &param, sizeof(param));
 	if (err < 0 || err < sizeof(param))
 		return err;
 
-- 
cgit v1.2.3-59-g8ed1b


From e5aeebddfc312ea7bb55dfe6c7264e71a3b43992 Mon Sep 17 00:00:00 2001
From: Zijun Hu <zijuhu@codeaurora.org>
Date: Fri, 29 May 2020 22:38:31 +0800
Subject: Bluetooth: hci_qca: Fix QCA6390 memdump failure

QCA6390 memdump VSE sometimes come to bluetooth driver
with wrong sequence number as illustrated as follows:
frame # in dec: frame data in hex
1396: ff fd 01 08 74 05 00 37 8f 14
1397: ff fd 01 08 75 05 00 ff bf 38
1414: ff fd 01 08 86 05 00 fb 5e 4b
1399: ff fd 01 08 77 05 00 f3 44 0a
1400: ff fd 01 08 78 05 00 ca f7 41
it is mistook for controller missing packets, so results
in page fault after overwriting memdump buffer allocated.

Fixed by ignoring QCA6390 sequence number check and
checking buffer space before writing.

Signed-off-by: Zijun Hu <zijuhu@codeaurora.org>
Tested-by: Zijun Hu <zijuhu@codeaurora.org>
Signed-off-by: Marcel Holtmann <marcel@holtmann.org>
---
 drivers/bluetooth/hci_qca.c | 54 +++++++++++++++++++++++++++++++++++++--------
 1 file changed, 45 insertions(+), 9 deletions(-)

diff --git a/drivers/bluetooth/hci_qca.c b/drivers/bluetooth/hci_qca.c
index aa957d749d6f..81c3c38baba1 100644
--- a/drivers/bluetooth/hci_qca.c
+++ b/drivers/bluetooth/hci_qca.c
@@ -114,6 +114,7 @@ struct qca_memdump_data {
 	char *memdump_buf_tail;
 	u32 current_seq_no;
 	u32 received_dump;
+	u32 ram_dump_size;
 };
 
 struct qca_memdump_event_hdr {
@@ -976,6 +977,8 @@ static void qca_controller_memdump(struct work_struct *work)
 	char nullBuff[QCA_DUMP_PACKET_SIZE] = { 0 };
 	u16 seq_no;
 	u32 dump_size;
+	u32 rx_size;
+	enum qca_btsoc_type soc_type = qca_soc_type(hu);
 
 	while ((skb = skb_dequeue(&qca->rx_memdump_q))) {
 
@@ -1025,10 +1028,12 @@ static void qca_controller_memdump(struct work_struct *work)
 				    dump_size);
 			queue_delayed_work(qca->workqueue,
 					   &qca->ctrl_memdump_timeout,
-					msecs_to_jiffies(MEMDUMP_TIMEOUT_MS));
+					   msecs_to_jiffies(MEMDUMP_TIMEOUT_MS)
+					  );
 
 			skb_pull(skb, sizeof(dump_size));
 			memdump_buf = vmalloc(dump_size);
+			qca_memdump->ram_dump_size = dump_size;
 			qca_memdump->memdump_buf_head = memdump_buf;
 			qca_memdump->memdump_buf_tail = memdump_buf;
 		}
@@ -1051,26 +1056,57 @@ static void qca_controller_memdump(struct work_struct *work)
 		 * the controller. In such cases let us store the dummy
 		 * packets in the buffer.
 		 */
+		/* For QCA6390, controller does not lost packets but
+		 * sequence number field of packat sometimes has error
+		 * bits, so skip this checking for missing packet.
+		 */
 		while ((seq_no > qca_memdump->current_seq_no + 1) &&
-			seq_no != QCA_LAST_SEQUENCE_NUM) {
+		       (soc_type != QCA_QCA6390) &&
+		       seq_no != QCA_LAST_SEQUENCE_NUM) {
 			bt_dev_err(hu->hdev, "QCA controller missed packet:%d",
 				   qca_memdump->current_seq_no);
+			rx_size = qca_memdump->received_dump;
+			rx_size += QCA_DUMP_PACKET_SIZE;
+			if (rx_size > qca_memdump->ram_dump_size) {
+				bt_dev_err(hu->hdev,
+					   "QCA memdump received %d, no space for missed packet",
+					   qca_memdump->received_dump);
+				break;
+			}
 			memcpy(memdump_buf, nullBuff, QCA_DUMP_PACKET_SIZE);
 			memdump_buf = memdump_buf + QCA_DUMP_PACKET_SIZE;
 			qca_memdump->received_dump += QCA_DUMP_PACKET_SIZE;
 			qca_memdump->current_seq_no++;
 		}
 
-		memcpy(memdump_buf, (unsigned char *) skb->data, skb->len);
-		memdump_buf = memdump_buf + skb->len;
-		qca_memdump->memdump_buf_tail = memdump_buf;
-		qca_memdump->current_seq_no = seq_no + 1;
-		qca_memdump->received_dump += skb->len;
+		rx_size = qca_memdump->received_dump + skb->len;
+		if (rx_size <= qca_memdump->ram_dump_size) {
+			if ((seq_no != QCA_LAST_SEQUENCE_NUM) &&
+			    (seq_no != qca_memdump->current_seq_no))
+				bt_dev_err(hu->hdev,
+					   "QCA memdump unexpected packet %d",
+					   seq_no);
+			bt_dev_dbg(hu->hdev,
+				   "QCA memdump packet %d with length %d",
+				   seq_no, skb->len);
+			memcpy(memdump_buf, (unsigned char *)skb->data,
+			       skb->len);
+			memdump_buf = memdump_buf + skb->len;
+			qca_memdump->memdump_buf_tail = memdump_buf;
+			qca_memdump->current_seq_no = seq_no + 1;
+			qca_memdump->received_dump += skb->len;
+		} else {
+			bt_dev_err(hu->hdev,
+				   "QCA memdump received %d, no space for packet %d",
+				   qca_memdump->received_dump, seq_no);
+		}
 		qca->qca_memdump = qca_memdump;
 		kfree_skb(skb);
 		if (seq_no == QCA_LAST_SEQUENCE_NUM) {
-			bt_dev_info(hu->hdev, "QCA writing crash dump of size %d bytes",
-				   qca_memdump->received_dump);
+			bt_dev_info(hu->hdev,
+				    "QCA memdump Done, received %d, total %d",
+				    qca_memdump->received_dump,
+				    qca_memdump->ram_dump_size);
 			memdump_buf = qca_memdump->memdump_buf_head;
 			dev_coredumpv(&hu->serdev->dev, memdump_buf,
 				      qca_memdump->received_dump, GFP_KERNEL);
-- 
cgit v1.2.3-59-g8ed1b


From dafe2078a75af1abe4780313ef8dd8491ba8598f Mon Sep 17 00:00:00 2001
From: Patrick Eigensatz <patrickeigensatz@gmail.com>
Date: Mon, 1 Jun 2020 13:12:01 +0200
Subject: ipv4: nexthop: Fix deadcode issue by performing a proper NULL check

After allocating the spare nexthop group it should be tested for kzalloc()
returning NULL, instead the already used nexthop group (which cannot be
NULL at this point) had been tested so far.

Additionally, if kzalloc() fails, return ERR_PTR(-ENOMEM) instead of NULL.

Coverity-id: 1463885
Reported-by: Coverity <scan-admin@coverity.com>
Signed-off-by: Patrick Eigensatz <patrickeigensatz@gmail.com>
Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv4/nexthop.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/net/ipv4/nexthop.c b/net/ipv4/nexthop.c
index ebafa5ed91ac..400a9f89ebdb 100644
--- a/net/ipv4/nexthop.c
+++ b/net/ipv4/nexthop.c
@@ -1185,10 +1185,10 @@ static struct nexthop *nexthop_create_group(struct net *net,
 
 	/* spare group used for removals */
 	nhg->spare = nexthop_grp_alloc(num_nh);
-	if (!nhg) {
+	if (!nhg->spare) {
 		kfree(nhg);
 		kfree(nh);
-		return NULL;
+		return ERR_PTR(-ENOMEM);
 	}
 	nhg->spare->spare = nhg;
 
-- 
cgit v1.2.3-59-g8ed1b


From 53fc685243bd6fb90d90305cea54598b78d3cbfc Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Mon, 1 Jun 2020 15:58:54 +0300
Subject: bridge: Avoid infinite loop when suppressing NS messages with invalid
 options

When neighbor suppression is enabled the bridge device might reply to
Neighbor Solicitation (NS) messages on behalf of remote hosts.

In case the NS message includes the "Source link-layer address" option
[1], the bridge device will use the specified address as the link-layer
destination address in its reply.

To avoid an infinite loop, break out of the options parsing loop when
encountering an option with length zero and disregard the NS message.

This is consistent with the IPv6 ndisc code and RFC 4886 which states
that "Nodes MUST silently discard an ND packet that contains an option
with length zero" [2].

[1] https://tools.ietf.org/html/rfc4861#section-4.3
[2] https://tools.ietf.org/html/rfc4861#section-4.6

Fixes: ed842faeb2bd ("bridge: suppress nd pkts on BR_NEIGH_SUPPRESS ports")
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reported-by: Alla Segal <allas@mellanox.com>
Tested-by: Alla Segal <allas@mellanox.com>
Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/bridge/br_arp_nd_proxy.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/net/bridge/br_arp_nd_proxy.c b/net/bridge/br_arp_nd_proxy.c
index 37908561a64b..b18cdf03edb3 100644
--- a/net/bridge/br_arp_nd_proxy.c
+++ b/net/bridge/br_arp_nd_proxy.c
@@ -276,6 +276,10 @@ static void br_nd_send(struct net_bridge *br, struct net_bridge_port *p,
 	ns_olen = request->len - (skb_network_offset(request) +
 				  sizeof(struct ipv6hdr)) - sizeof(*ns);
 	for (i = 0; i < ns_olen - 1; i += (ns->opt[i + 1] << 3)) {
+		if (!ns->opt[i + 1]) {
+			kfree_skb(reply);
+			return;
+		}
 		if (ns->opt[i] == ND_OPT_SOURCE_LL_ADDR) {
 			daddr = ns->opt + i + sizeof(struct nd_opt_hdr);
 			break;
-- 
cgit v1.2.3-59-g8ed1b


From 8066e6b449e050675df48e7c4b16c29f00507ff0 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Mon, 1 Jun 2020 15:58:55 +0300
Subject: vxlan: Avoid infinite loop when suppressing NS messages with invalid
 options

When proxy mode is enabled the vxlan device might reply to Neighbor
Solicitation (NS) messages on behalf of remote hosts.

In case the NS message includes the "Source link-layer address" option
[1], the vxlan device will use the specified address as the link-layer
destination address in its reply.

To avoid an infinite loop, break out of the options parsing loop when
encountering an option with length zero and disregard the NS message.

This is consistent with the IPv6 ndisc code and RFC 4886 which states
that "Nodes MUST silently discard an ND packet that contains an option
with length zero" [2].

[1] https://tools.ietf.org/html/rfc4861#section-4.3
[2] https://tools.ietf.org/html/rfc4861#section-4.6

Fixes: 4b29dba9c085 ("vxlan: fix nonfunctional neigh_reduce()")
Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Acked-by: Nikolay Aleksandrov <nikolay@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index 39bc10a7fd2e..d5906b41cdae 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -2092,6 +2092,10 @@ static struct sk_buff *vxlan_na_create(struct sk_buff *request,
 	ns_olen = request->len - skb_network_offset(request) -
 		sizeof(struct ipv6hdr) - sizeof(*ns);
 	for (i = 0; i < ns_olen-1; i += (ns->opt[i+1]<<3)) {
+		if (!ns->opt[i + 1]) {
+			kfree_skb(reply);
+			return NULL;
+		}
 		if (ns->opt[i] == ND_OPT_SOURCE_LL_ADDR) {
 			daddr = ns->opt + i + sizeof(struct nd_opt_hdr);
 			break;
-- 
cgit v1.2.3-59-g8ed1b


From bda6752f3de99e9d765638b89aacfb11c07cee06 Mon Sep 17 00:00:00 2001
From: Dan Carpenter <dan.carpenter@oracle.com>
Date: Thu, 28 May 2020 15:49:57 +0300
Subject: cxgb4: cleanup error code in setup_sge_queues_uld()

The caller doesn't care about the error codes, they only check for zero
vs non-zero.  Still, it's better to preserve the negative error codes
from alloc_uld_rxqs() instead of changing it to 1.  We can also return
directly if there is a failure.

Signed-off-by: Dan Carpenter <dan.carpenter@oracle.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
index 6b1d3df4b9ba..9e3c6b36cde8 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
@@ -174,13 +174,14 @@ static int
 setup_sge_queues_uld(struct adapter *adap, unsigned int uld_type, bool lro)
 {
 	struct sge_uld_rxq_info *rxq_info = adap->sge.uld_rxq_info[uld_type];
-	int i, ret = 0;
+	int i, ret;
 
-	ret = !(!alloc_uld_rxqs(adap, rxq_info, lro));
+	ret = alloc_uld_rxqs(adap, rxq_info, lro);
+	if (ret)
+		return ret;
 
 	/* Tell uP to route control queue completions to rdma rspq */
-	if (adap->flags & CXGB4_FULL_INIT_DONE &&
-	    !ret && uld_type == CXGB4_ULD_RDMA) {
+	if (adap->flags & CXGB4_FULL_INIT_DONE && uld_type == CXGB4_ULD_RDMA) {
 		struct sge *s = &adap->sge;
 		unsigned int cmplqid;
 		u32 param, cmdop;
-- 
cgit v1.2.3-59-g8ed1b


From bfad978116c2aa3b693701059923de4561196f9b Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Thu, 28 May 2020 17:45:02 +0200
Subject: regmap: provide helpers for simple bit operations

In many instances regmap_update_bits() is used for simple bit setting
and clearing. In these cases the last argument is redundant and we can
hide it with a static inline function.

This adds three new helpers for simple bit operations: set_bits,
clear_bits and test_bits (the last one defined as a regular function).

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/base/regmap/regmap.c | 22 ++++++++++++++++++++++
 include/linux/regmap.h       | 36 ++++++++++++++++++++++++++++++++++++
 2 files changed, 58 insertions(+)

diff --git a/drivers/base/regmap/regmap.c b/drivers/base/regmap/regmap.c
index 59f911e57719..4ad5c5adc0a3 100644
--- a/drivers/base/regmap/regmap.c
+++ b/drivers/base/regmap/regmap.c
@@ -2936,6 +2936,28 @@ int regmap_update_bits_base(struct regmap *map, unsigned int reg,
 }
 EXPORT_SYMBOL_GPL(regmap_update_bits_base);
 
+/**
+ * regmap_test_bits() - Check if all specified bits are set in a register.
+ *
+ * @map: Register map to operate on
+ * @reg: Register to read from
+ * @bits: Bits to test
+ *
+ * Returns -1 if the underlying regmap_read() fails, 0 if at least one of the
+ * tested bits is not set and 1 if all tested bits are set.
+ */
+int regmap_test_bits(struct regmap *map, unsigned int reg, unsigned int bits)
+{
+	unsigned int val, ret;
+
+	ret = regmap_read(map, reg, &val);
+	if (ret)
+		return ret;
+
+	return (val & bits) == bits;
+}
+EXPORT_SYMBOL_GPL(regmap_test_bits);
+
 void regmap_async_complete_cb(struct regmap_async *async, int ret)
 {
 	struct regmap *map = async->map;
diff --git a/include/linux/regmap.h b/include/linux/regmap.h
index 40b07168fd8e..ddf0baff195d 100644
--- a/include/linux/regmap.h
+++ b/include/linux/regmap.h
@@ -1111,6 +1111,21 @@ bool regmap_reg_in_ranges(unsigned int reg,
 			  const struct regmap_range *ranges,
 			  unsigned int nranges);
 
+static inline int regmap_set_bits(struct regmap *map,
+				  unsigned int reg, unsigned int bits)
+{
+	return regmap_update_bits_base(map, reg, bits, bits,
+				       NULL, false, false);
+}
+
+static inline int regmap_clear_bits(struct regmap *map,
+				    unsigned int reg, unsigned int bits)
+{
+	return regmap_update_bits_base(map, reg, bits, 0, NULL, false, false);
+}
+
+int regmap_test_bits(struct regmap *map, unsigned int reg, unsigned int bits);
+
 /**
  * struct reg_field - Description of an register field
  *
@@ -1410,6 +1425,27 @@ static inline int regmap_update_bits_base(struct regmap *map, unsigned int reg,
 	return -EINVAL;
 }
 
+static inline int regmap_set_bits(struct regmap *map,
+				  unsigned int reg, unsigned int bits)
+{
+	WARN_ONCE(1, "regmap API is disabled");
+	return -EINVAL;
+}
+
+static inline int regmap_clear_bits(struct regmap *map,
+				    unsigned int reg, unsigned int bits)
+{
+	WARN_ONCE(1, "regmap API is disabled");
+	return -EINVAL;
+}
+
+static inline int regmap_test_bits(struct regmap *map,
+				   unsigned int reg, unsigned int bits)
+{
+	WARN_ONCE(1, "regmap API is disabled");
+	return -EINVAL;
+}
+
 static inline int regmap_field_update_bits_base(struct regmap_field *field,
 					unsigned int mask, unsigned int val,
 					bool *change, bool async, bool force)
-- 
cgit v1.2.3-59-g8ed1b


From 240f1ae40c659713a53f8fa5e4899e7b7350bfed Mon Sep 17 00:00:00 2001
From: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Date: Thu, 28 May 2020 17:45:03 +0200
Subject: net: ethernet: mtk-star-emac: use regmap bitops

Shrink the code visually by replacing regmap_update_bits() with
appropriate regmap bit operations where applicable.

Signed-off-by: Bartosz Golaszewski <bgolaszewski@baylibre.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mediatek/mtk_star_emac.c | 80 ++++++++++++---------------
 1 file changed, 35 insertions(+), 45 deletions(-)

diff --git a/drivers/net/ethernet/mediatek/mtk_star_emac.c b/drivers/net/ethernet/mediatek/mtk_star_emac.c
index 7df35872c107..f1ace4fec19f 100644
--- a/drivers/net/ethernet/mediatek/mtk_star_emac.c
+++ b/drivers/net/ethernet/mediatek/mtk_star_emac.c
@@ -413,8 +413,8 @@ static void mtk_star_dma_unmap_tx(struct mtk_star_priv *priv,
 
 static void mtk_star_nic_disable_pd(struct mtk_star_priv *priv)
 {
-	regmap_update_bits(priv->regs, MTK_STAR_REG_MAC_CFG,
-			   MTK_STAR_BIT_MAC_CFG_NIC_PD, 0);
+	regmap_clear_bits(priv->regs, MTK_STAR_REG_MAC_CFG,
+			  MTK_STAR_BIT_MAC_CFG_NIC_PD);
 }
 
 /* Unmask the three interrupts we care about, mask all others. */
@@ -434,41 +434,38 @@ static void mtk_star_intr_disable(struct mtk_star_priv *priv)
 
 static void mtk_star_intr_enable_tx(struct mtk_star_priv *priv)
 {
-	regmap_update_bits(priv->regs, MTK_STAR_REG_INT_MASK,
-			   MTK_STAR_BIT_INT_STS_TNTC, 0);
+	regmap_clear_bits(priv->regs, MTK_STAR_REG_INT_MASK,
+			  MTK_STAR_BIT_INT_STS_TNTC);
 }
 
 static void mtk_star_intr_enable_rx(struct mtk_star_priv *priv)
 {
-	regmap_update_bits(priv->regs, MTK_STAR_REG_INT_MASK,
-			   MTK_STAR_BIT_INT_STS_FNRC, 0);
+	regmap_clear_bits(priv->regs, MTK_STAR_REG_INT_MASK,
+			  MTK_STAR_BIT_INT_STS_FNRC);
 }
 
 static void mtk_star_intr_enable_stats(struct mtk_star_priv *priv)
 {
-	regmap_update_bits(priv->regs, MTK_STAR_REG_INT_MASK,
-			   MTK_STAR_REG_INT_STS_MIB_CNT_TH, 0);
+	regmap_clear_bits(priv->regs, MTK_STAR_REG_INT_MASK,
+			  MTK_STAR_REG_INT_STS_MIB_CNT_TH);
 }
 
 static void mtk_star_intr_disable_tx(struct mtk_star_priv *priv)
 {
-	regmap_update_bits(priv->regs, MTK_STAR_REG_INT_MASK,
-			   MTK_STAR_BIT_INT_STS_TNTC,
-			   MTK_STAR_BIT_INT_STS_TNTC);
+	regmap_set_bits(priv->regs, MTK_STAR_REG_INT_MASK,
+			MTK_STAR_BIT_INT_STS_TNTC);
 }
 
 static void mtk_star_intr_disable_rx(struct mtk_star_priv *priv)
 {
-	regmap_update_bits(priv->regs, MTK_STAR_REG_INT_MASK,
-			   MTK_STAR_BIT_INT_STS_FNRC,
-			   MTK_STAR_BIT_INT_STS_FNRC);
+	regmap_set_bits(priv->regs, MTK_STAR_REG_INT_MASK,
+			MTK_STAR_BIT_INT_STS_FNRC);
 }
 
 static void mtk_star_intr_disable_stats(struct mtk_star_priv *priv)
 {
-	regmap_update_bits(priv->regs, MTK_STAR_REG_INT_MASK,
-			   MTK_STAR_REG_INT_STS_MIB_CNT_TH,
-			   MTK_STAR_REG_INT_STS_MIB_CNT_TH);
+	regmap_set_bits(priv->regs, MTK_STAR_REG_INT_MASK,
+			MTK_STAR_REG_INT_STS_MIB_CNT_TH);
 }
 
 static unsigned int mtk_star_intr_read(struct mtk_star_priv *priv)
@@ -524,12 +521,10 @@ static void mtk_star_dma_init(struct mtk_star_priv *priv)
 
 static void mtk_star_dma_start(struct mtk_star_priv *priv)
 {
-	regmap_update_bits(priv->regs, MTK_STAR_REG_TX_DMA_CTRL,
-			   MTK_STAR_BIT_TX_DMA_CTRL_START,
-			   MTK_STAR_BIT_TX_DMA_CTRL_START);
-	regmap_update_bits(priv->regs, MTK_STAR_REG_RX_DMA_CTRL,
-			   MTK_STAR_BIT_RX_DMA_CTRL_START,
-			   MTK_STAR_BIT_RX_DMA_CTRL_START);
+	regmap_set_bits(priv->regs, MTK_STAR_REG_TX_DMA_CTRL,
+			MTK_STAR_BIT_TX_DMA_CTRL_START);
+	regmap_set_bits(priv->regs, MTK_STAR_REG_RX_DMA_CTRL,
+			MTK_STAR_BIT_RX_DMA_CTRL_START);
 }
 
 static void mtk_star_dma_stop(struct mtk_star_priv *priv)
@@ -553,16 +548,14 @@ static void mtk_star_dma_disable(struct mtk_star_priv *priv)
 
 static void mtk_star_dma_resume_rx(struct mtk_star_priv *priv)
 {
-	regmap_update_bits(priv->regs, MTK_STAR_REG_RX_DMA_CTRL,
-			   MTK_STAR_BIT_RX_DMA_CTRL_RESUME,
-			   MTK_STAR_BIT_RX_DMA_CTRL_RESUME);
+	regmap_set_bits(priv->regs, MTK_STAR_REG_RX_DMA_CTRL,
+			MTK_STAR_BIT_RX_DMA_CTRL_RESUME);
 }
 
 static void mtk_star_dma_resume_tx(struct mtk_star_priv *priv)
 {
-	regmap_update_bits(priv->regs, MTK_STAR_REG_TX_DMA_CTRL,
-			   MTK_STAR_BIT_TX_DMA_CTRL_RESUME,
-			   MTK_STAR_BIT_TX_DMA_CTRL_RESUME);
+	regmap_set_bits(priv->regs, MTK_STAR_REG_TX_DMA_CTRL,
+			MTK_STAR_BIT_TX_DMA_CTRL_RESUME);
 }
 
 static void mtk_star_set_mac_addr(struct net_device *ndev)
@@ -842,8 +835,8 @@ static int mtk_star_hash_wait_ok(struct mtk_star_priv *priv)
 		return ret;
 
 	/* Check the BIST_OK bit. */
-	regmap_read(priv->regs, MTK_STAR_REG_HASH_CTRL, &val);
-	if (!(val & MTK_STAR_BIT_HASH_CTRL_BIST_OK))
+	if (!regmap_test_bits(priv->regs, MTK_STAR_REG_HASH_CTRL,
+			      MTK_STAR_BIT_HASH_CTRL_BIST_OK))
 		return -EIO;
 
 	return 0;
@@ -877,12 +870,10 @@ static int mtk_star_reset_hash_table(struct mtk_star_priv *priv)
 	if (ret)
 		return ret;
 
-	regmap_update_bits(priv->regs, MTK_STAR_REG_HASH_CTRL,
-			   MTK_STAR_BIT_HASH_CTRL_BIST_EN,
-			   MTK_STAR_BIT_HASH_CTRL_BIST_EN);
-	regmap_update_bits(priv->regs, MTK_STAR_REG_TEST1,
-			   MTK_STAR_BIT_TEST1_RST_HASH_MBIST,
-			   MTK_STAR_BIT_TEST1_RST_HASH_MBIST);
+	regmap_set_bits(priv->regs, MTK_STAR_REG_HASH_CTRL,
+			MTK_STAR_BIT_HASH_CTRL_BIST_EN);
+	regmap_set_bits(priv->regs, MTK_STAR_REG_TEST1,
+			MTK_STAR_BIT_TEST1_RST_HASH_MBIST);
 
 	return mtk_star_hash_wait_ok(priv);
 }
@@ -1013,13 +1004,13 @@ static int mtk_star_enable(struct net_device *ndev)
 		return ret;
 
 	/* Setup the hashing algorithm */
-	regmap_update_bits(priv->regs, MTK_STAR_REG_ARL_CFG,
-			   MTK_STAR_BIT_ARL_CFG_HASH_ALG |
-			   MTK_STAR_BIT_ARL_CFG_MISC_MODE, 0);
+	regmap_clear_bits(priv->regs, MTK_STAR_REG_ARL_CFG,
+			  MTK_STAR_BIT_ARL_CFG_HASH_ALG |
+			  MTK_STAR_BIT_ARL_CFG_MISC_MODE);
 
 	/* Don't strip VLAN tags */
-	regmap_update_bits(priv->regs, MTK_STAR_REG_MAC_CFG,
-			   MTK_STAR_BIT_MAC_CFG_VLAN_STRIP, 0);
+	regmap_clear_bits(priv->regs, MTK_STAR_REG_MAC_CFG,
+			  MTK_STAR_BIT_MAC_CFG_VLAN_STRIP);
 
 	/* Setup DMA */
 	mtk_star_dma_init(priv);
@@ -1201,9 +1192,8 @@ static void mtk_star_set_rx_mode(struct net_device *ndev)
 	int ret;
 
 	if (ndev->flags & IFF_PROMISC) {
-		regmap_update_bits(priv->regs, MTK_STAR_REG_ARL_CFG,
-				   MTK_STAR_BIT_ARL_CFG_MISC_MODE,
-				   MTK_STAR_BIT_ARL_CFG_MISC_MODE);
+		regmap_set_bits(priv->regs, MTK_STAR_REG_ARL_CFG,
+				MTK_STAR_BIT_ARL_CFG_MISC_MODE);
 	} else if (netdev_mc_count(ndev) > MTK_STAR_HASHTABLE_MC_LIMIT ||
 		   ndev->flags & IFF_ALLMULTI) {
 		for (i = 0; i < MTK_STAR_HASHTABLE_SIZE_MAX; i++) {
-- 
cgit v1.2.3-59-g8ed1b


From a01c245438c59a44f3ece8440046c78675fa8b0b Mon Sep 17 00:00:00 2001
From: Davide Caratti <dcaratti@redhat.com>
Date: Fri, 29 May 2020 00:05:32 +0200
Subject: net/sched: fix a couple of splats in the error path of
 tfc_gate_init()

trying to configure TC 'act_gate' rules with invalid control actions, the
following splat can be observed:

 general protection fault, probably for non-canonical address 0xdffffc0000000002: 0000 [#1] SMP KASAN NOPTI
 KASAN: null-ptr-deref in range [0x0000000000000010-0x0000000000000017]
 CPU: 1 PID: 2143 Comm: tc Not tainted 5.7.0-rc6+ #168
 Hardware name: Red Hat KVM, BIOS 1.11.1-4.module+el8.1.0+4066+0f1aadab 04/01/2014
 RIP: 0010:hrtimer_active+0x56/0x290
 [...]
  Call Trace:
  hrtimer_try_to_cancel+0x6d/0x330
  hrtimer_cancel+0x11/0x20
  tcf_gate_cleanup+0x15/0x30 [act_gate]
  tcf_action_cleanup+0x58/0x170
  __tcf_action_put+0xb0/0xe0
  __tcf_idr_release+0x68/0x90
  tcf_gate_init+0x7c7/0x19a0 [act_gate]
  tcf_action_init_1+0x60f/0x960
  tcf_action_init+0x157/0x2a0
  tcf_action_add+0xd9/0x2f0
  tc_ctl_action+0x2a3/0x39d
  rtnetlink_rcv_msg+0x5f3/0x920
  netlink_rcv_skb+0x121/0x350
  netlink_unicast+0x439/0x630
  netlink_sendmsg+0x714/0xbf0
  sock_sendmsg+0xe2/0x110
  ____sys_sendmsg+0x5b4/0x890
  ___sys_sendmsg+0xe9/0x160
  __sys_sendmsg+0xd3/0x170
  do_syscall_64+0x9a/0x370
  entry_SYSCALL_64_after_hwframe+0x44/0xa9

this is caused by hrtimer_cancel(), running before hrtimer_init(). Fix it
ensuring to call hrtimer_cancel() only if clockid is valid, and the timer
has been initialized. After fixing this splat, the same error path causes
another problem:

 general protection fault, probably for non-canonical address 0xdffffc0000000000: 0000 [#1] SMP KASAN NOPTI
 KASAN: null-ptr-deref in range [0x0000000000000000-0x0000000000000007]
 CPU: 1 PID: 980 Comm: tc Not tainted 5.7.0-rc6+ #168
 Hardware name: Red Hat KVM, BIOS 1.11.1-4.module+el8.1.0+4066+0f1aadab 04/01/2014
 RIP: 0010:release_entry_list+0x4a/0x240 [act_gate]
 [...]
 Call Trace:
  tcf_action_cleanup+0x58/0x170
  __tcf_action_put+0xb0/0xe0
  __tcf_idr_release+0x68/0x90
  tcf_gate_init+0x7ab/0x19a0 [act_gate]
  tcf_action_init_1+0x60f/0x960
  tcf_action_init+0x157/0x2a0
  tcf_action_add+0xd9/0x2f0
  tc_ctl_action+0x2a3/0x39d
  rtnetlink_rcv_msg+0x5f3/0x920
  netlink_rcv_skb+0x121/0x350
  netlink_unicast+0x439/0x630
  netlink_sendmsg+0x714/0xbf0
  sock_sendmsg+0xe2/0x110
  ____sys_sendmsg+0x5b4/0x890
  ___sys_sendmsg+0xe9/0x160
  __sys_sendmsg+0xd3/0x170
  do_syscall_64+0x9a/0x370
  entry_SYSCALL_64_after_hwframe+0x44/0xa9

the problem is similar: tcf_action_cleanup() was trying to release a list
without initializing it first. Ensure that INIT_LIST_HEAD() is called for
every newly created 'act_gate' action, same as what was done to 'act_ife'
with commit 44c23d71599f ("net/sched: act_ife: initalize ife->metalist
earlier").

Fixes: a51c328df310 ("net: qos: introduce a gate control flow action")
CC: Ivan Vecera <ivecera@redhat.com>
Signed-off-by: Davide Caratti <dcaratti@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/act_gate.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/net/sched/act_gate.c b/net/sched/act_gate.c
index 35fc48795541..9c628591f452 100644
--- a/net/sched/act_gate.c
+++ b/net/sched/act_gate.c
@@ -331,6 +331,10 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
 		tcf_idr_release(*a, bind);
 		return -EEXIST;
 	}
+	if (ret == ACT_P_CREATED) {
+		to_gate(*a)->param.tcfg_clockid = -1;
+		INIT_LIST_HEAD(&(to_gate(*a)->param.entries));
+	}
 
 	if (tb[TCA_GATE_PRIORITY])
 		prio = nla_get_s32(tb[TCA_GATE_PRIORITY]);
@@ -377,7 +381,6 @@ static int tcf_gate_init(struct net *net, struct nlattr *nla,
 			goto chain_put;
 	}
 
-	INIT_LIST_HEAD(&p->entries);
 	if (tb[TCA_GATE_ENTRY_LIST]) {
 		err = parse_gate_list(tb[TCA_GATE_ENTRY_LIST], p, extack);
 		if (err < 0)
@@ -449,9 +452,9 @@ static void tcf_gate_cleanup(struct tc_action *a)
 	struct tcf_gate *gact = to_gate(a);
 	struct tcf_gate_params *p;
 
-	hrtimer_cancel(&gact->hitimer);
-
 	p = &gact->param;
+	if (p->tcfg_clockid != -1)
+		hrtimer_cancel(&gact->hitimer);
 
 	release_entry_list(&p->entries);
 }
-- 
cgit v1.2.3-59-g8ed1b


From a8284c6899cf7321abbd258d970a9442978b0a4f Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 29 May 2020 02:25:34 +0200
Subject: netfilter: nf_flowtable: expose nf_flow_table_gc_cleanup()

This function schedules the flow teardown state and it forces a gc run.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/netfilter/nf_flow_table.h | 2 ++
 net/netfilter/nf_flow_table_core.c    | 6 +++---
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/include/net/netfilter/nf_flow_table.h b/include/net/netfilter/nf_flow_table.h
index c54a7f707e50..d7338bfd7b0f 100644
--- a/include/net/netfilter/nf_flow_table.h
+++ b/include/net/netfilter/nf_flow_table.h
@@ -175,6 +175,8 @@ void flow_offload_refresh(struct nf_flowtable *flow_table,
 
 struct flow_offload_tuple_rhash *flow_offload_lookup(struct nf_flowtable *flow_table,
 						     struct flow_offload_tuple *tuple);
+void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable,
+			      struct net_device *dev);
 void nf_flow_table_cleanup(struct net_device *dev);
 
 int nf_flow_table_init(struct nf_flowtable *flow_table);
diff --git a/net/netfilter/nf_flow_table_core.c b/net/netfilter/nf_flow_table_core.c
index 42da6e337276..6a3034f84ab6 100644
--- a/net/netfilter/nf_flow_table_core.c
+++ b/net/netfilter/nf_flow_table_core.c
@@ -588,8 +588,8 @@ static void nf_flow_table_do_cleanup(struct flow_offload *flow, void *data)
 		flow_offload_teardown(flow);
 }
 
-static void nf_flow_table_iterate_cleanup(struct nf_flowtable *flowtable,
-					  struct net_device *dev)
+void nf_flow_table_gc_cleanup(struct nf_flowtable *flowtable,
+			      struct net_device *dev)
 {
 	nf_flow_table_iterate(flowtable, nf_flow_table_do_cleanup, dev);
 	flush_delayed_work(&flowtable->gc_work);
@@ -602,7 +602,7 @@ void nf_flow_table_cleanup(struct net_device *dev)
 
 	mutex_lock(&flowtable_lock);
 	list_for_each_entry(flowtable, &flowtables, list)
-		nf_flow_table_iterate_cleanup(flowtable, dev);
+		nf_flow_table_gc_cleanup(flowtable, dev);
 	mutex_unlock(&flowtable_lock);
 }
 EXPORT_SYMBOL_GPL(nf_flow_table_cleanup);
-- 
cgit v1.2.3-59-g8ed1b


From 1fac52da5942c58dd3e337fd7c5a550925ca752e Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 29 May 2020 02:25:35 +0200
Subject: net: flow_offload: consolidate indirect flow_block infrastructure

Tunnel devices provide no dev->netdev_ops->ndo_setup_tc(...) interface.
The tunnel device and route control plane does not provide an obvious
way to relate tunnel and physical devices.

This patch allows drivers to register a tunnel device offload handler
for the tc and netfilter frontends through flow_indr_dev_register() and
flow_indr_dev_unregister().

The frontend calls flow_indr_dev_setup_offload() that iterates over the
list of drivers that are offering tunnel device hardware offload
support and it sets up the flow block for this tunnel device.

If the driver module is removed, the indirect flow_block ends up with a
stale callback reference. The module removal path triggers the
dev_shutdown() path to remove the qdisc and the flow_blocks for the
physical devices. However, this is not useful for tunnel devices, where
relation between the physical and the tunnel device is not explicit.

This patch introduces a cleanup callback that is invoked when the driver
module is removed to clean up the tunnel device flow_block. This patch
defines struct flow_block_indr and it uses it from flow_block_cb to
store the information that front-end requires to perform the
flow_block_cb cleanup on module removal.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow_offload.h |  19 ++++++
 net/core/flow_offload.c    | 157 +++++++++++++++++++++++++++++++++++++++++++++
 2 files changed, 176 insertions(+)

diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index 95d633785ef9..5493282348fa 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -443,6 +443,16 @@ enum tc_setup_type;
 typedef int flow_setup_cb_t(enum tc_setup_type type, void *type_data,
 			    void *cb_priv);
 
+struct flow_block_cb;
+
+struct flow_block_indr {
+	struct list_head		list;
+	struct net_device		*dev;
+	enum flow_block_binder_type	binder_type;
+	void				*data;
+	void				(*cleanup)(struct flow_block_cb *block_cb);
+};
+
 struct flow_block_cb {
 	struct list_head	driver_list;
 	struct list_head	list;
@@ -450,6 +460,7 @@ struct flow_block_cb {
 	void			*cb_ident;
 	void			*cb_priv;
 	void			(*release)(void *cb_priv);
+	struct flow_block_indr	indr;
 	unsigned int		refcnt;
 };
 
@@ -523,6 +534,14 @@ static inline void flow_block_init(struct flow_block *flow_block)
 typedef int flow_indr_block_bind_cb_t(struct net_device *dev, void *cb_priv,
 				      enum tc_setup_type type, void *type_data);
 
+int flow_indr_dev_register(flow_indr_block_bind_cb_t *cb, void *cb_priv);
+void flow_indr_dev_unregister(flow_indr_block_bind_cb_t *cb, void *cb_priv,
+			      flow_setup_cb_t *setup_cb);
+int flow_indr_dev_setup_offload(struct net_device *dev,
+				enum tc_setup_type type, void *data,
+				struct flow_block_offload *bo,
+				void (*cleanup)(struct flow_block_cb *block_cb));
+
 typedef void flow_indr_block_cmd_t(struct net_device *dev,
 				   flow_indr_block_bind_cb_t *cb, void *cb_priv,
 				   enum flow_block_command command);
diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
index e64941c526b1..8cd7da2586ae 100644
--- a/net/core/flow_offload.c
+++ b/net/core/flow_offload.c
@@ -317,6 +317,163 @@ int flow_block_cb_setup_simple(struct flow_block_offload *f,
 }
 EXPORT_SYMBOL(flow_block_cb_setup_simple);
 
+static DEFINE_MUTEX(flow_indr_block_lock);
+static LIST_HEAD(flow_block_indr_list);
+static LIST_HEAD(flow_block_indr_dev_list);
+
+struct flow_indr_dev {
+	struct list_head		list;
+	flow_indr_block_bind_cb_t	*cb;
+	void				*cb_priv;
+	refcount_t			refcnt;
+	struct rcu_head			rcu;
+};
+
+static struct flow_indr_dev *flow_indr_dev_alloc(flow_indr_block_bind_cb_t *cb,
+						 void *cb_priv)
+{
+	struct flow_indr_dev *indr_dev;
+
+	indr_dev = kmalloc(sizeof(*indr_dev), GFP_KERNEL);
+	if (!indr_dev)
+		return NULL;
+
+	indr_dev->cb		= cb;
+	indr_dev->cb_priv	= cb_priv;
+	refcount_set(&indr_dev->refcnt, 1);
+
+	return indr_dev;
+}
+
+int flow_indr_dev_register(flow_indr_block_bind_cb_t *cb, void *cb_priv)
+{
+	struct flow_indr_dev *indr_dev;
+
+	mutex_lock(&flow_indr_block_lock);
+	list_for_each_entry(indr_dev, &flow_block_indr_dev_list, list) {
+		if (indr_dev->cb == cb &&
+		    indr_dev->cb_priv == cb_priv) {
+			refcount_inc(&indr_dev->refcnt);
+			mutex_unlock(&flow_indr_block_lock);
+			return 0;
+		}
+	}
+
+	indr_dev = flow_indr_dev_alloc(cb, cb_priv);
+	if (!indr_dev) {
+		mutex_unlock(&flow_indr_block_lock);
+		return -ENOMEM;
+	}
+
+	list_add(&indr_dev->list, &flow_block_indr_dev_list);
+	mutex_unlock(&flow_indr_block_lock);
+
+	return 0;
+}
+EXPORT_SYMBOL(flow_indr_dev_register);
+
+static void __flow_block_indr_cleanup(flow_setup_cb_t *setup_cb, void *cb_priv,
+				      struct list_head *cleanup_list)
+{
+	struct flow_block_cb *this, *next;
+
+	list_for_each_entry_safe(this, next, &flow_block_indr_list, indr.list) {
+		if (this->cb == setup_cb &&
+		    this->cb_priv == cb_priv) {
+			list_move(&this->indr.list, cleanup_list);
+			return;
+		}
+	}
+}
+
+static void flow_block_indr_notify(struct list_head *cleanup_list)
+{
+	struct flow_block_cb *this, *next;
+
+	list_for_each_entry_safe(this, next, cleanup_list, indr.list) {
+		list_del(&this->indr.list);
+		this->indr.cleanup(this);
+	}
+}
+
+void flow_indr_dev_unregister(flow_indr_block_bind_cb_t *cb, void *cb_priv,
+			      flow_setup_cb_t *setup_cb)
+{
+	struct flow_indr_dev *this, *next, *indr_dev = NULL;
+	LIST_HEAD(cleanup_list);
+
+	mutex_lock(&flow_indr_block_lock);
+	list_for_each_entry_safe(this, next, &flow_block_indr_dev_list, list) {
+		if (this->cb == cb &&
+		    this->cb_priv == cb_priv &&
+		    refcount_dec_and_test(&this->refcnt)) {
+			indr_dev = this;
+			list_del(&indr_dev->list);
+			break;
+		}
+	}
+
+	if (!indr_dev) {
+		mutex_unlock(&flow_indr_block_lock);
+		return;
+	}
+
+	__flow_block_indr_cleanup(setup_cb, cb_priv, &cleanup_list);
+	mutex_unlock(&flow_indr_block_lock);
+
+	flow_block_indr_notify(&cleanup_list);
+	kfree(indr_dev);
+}
+EXPORT_SYMBOL(flow_indr_dev_unregister);
+
+static void flow_block_indr_init(struct flow_block_cb *flow_block,
+				 struct flow_block_offload *bo,
+				 struct net_device *dev, void *data,
+				 void (*cleanup)(struct flow_block_cb *block_cb))
+{
+	flow_block->indr.binder_type = bo->binder_type;
+	flow_block->indr.data = data;
+	flow_block->indr.dev = dev;
+	flow_block->indr.cleanup = cleanup;
+}
+
+static void __flow_block_indr_binding(struct flow_block_offload *bo,
+				      struct net_device *dev, void *data,
+				      void (*cleanup)(struct flow_block_cb *block_cb))
+{
+	struct flow_block_cb *block_cb;
+
+	list_for_each_entry(block_cb, &bo->cb_list, list) {
+		switch (bo->command) {
+		case FLOW_BLOCK_BIND:
+			flow_block_indr_init(block_cb, bo, dev, data, cleanup);
+			list_add(&block_cb->indr.list, &flow_block_indr_list);
+			break;
+		case FLOW_BLOCK_UNBIND:
+			list_del(&block_cb->indr.list);
+			break;
+		}
+	}
+}
+
+int flow_indr_dev_setup_offload(struct net_device *dev,
+				enum tc_setup_type type, void *data,
+				struct flow_block_offload *bo,
+				void (*cleanup)(struct flow_block_cb *block_cb))
+{
+	struct flow_indr_dev *this;
+
+	mutex_lock(&flow_indr_block_lock);
+	list_for_each_entry(this, &flow_block_indr_dev_list, list)
+		this->cb(dev, this->cb_priv, type, bo);
+
+	__flow_block_indr_binding(bo, dev, data, cleanup);
+	mutex_unlock(&flow_indr_block_lock);
+
+	return list_empty(&bo->cb_list) ? -EOPNOTSUPP : 0;
+}
+EXPORT_SYMBOL(flow_indr_dev_setup_offload);
+
 static LIST_HEAD(block_cb_list);
 
 static struct rhashtable indr_setup_block_ht;
-- 
cgit v1.2.3-59-g8ed1b


From 324a823b9962a0f290c40fb6314926d434193276 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 29 May 2020 02:25:36 +0200
Subject: net: cls_api: add tcf_block_offload_init()

Add a helper function to initialize the flow_block_offload structure.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_api.c | 26 +++++++++++++++++++-------
 1 file changed, 19 insertions(+), 7 deletions(-)

diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 752d608f4442..c5a2f16097b6 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -693,6 +693,22 @@ static void tc_indr_block_get_and_cmd(struct net_device *dev,
 	tc_indr_block_cmd(dev, block, cb, cb_priv, command, false);
 }
 
+static void tcf_block_offload_init(struct flow_block_offload *bo,
+				   struct net_device *dev,
+				   enum flow_block_command command,
+				   enum flow_block_binder_type binder_type,
+				   struct flow_block *flow_block,
+				   bool shared, struct netlink_ext_ack *extack)
+{
+	bo->net = dev_net(dev);
+	bo->command = command;
+	bo->binder_type = binder_type;
+	bo->block = flow_block;
+	bo->block_shared = shared;
+	bo->extack = extack;
+	INIT_LIST_HEAD(&bo->cb_list);
+}
+
 static void tc_indr_block_call(struct tcf_block *block,
 			       struct net_device *dev,
 			       struct tcf_block_ext_info *ei,
@@ -727,13 +743,9 @@ static int tcf_block_offload_cmd(struct tcf_block *block,
 	struct flow_block_offload bo = {};
 	int err;
 
-	bo.net = dev_net(dev);
-	bo.command = command;
-	bo.binder_type = ei->binder_type;
-	bo.block = &block->flow_block;
-	bo.block_shared = tcf_block_shared(block);
-	bo.extack = extack;
-	INIT_LIST_HEAD(&bo.cb_list);
+	tcf_block_offload_init(&bo, dev, command, ei->binder_type,
+			       &block->flow_block, tcf_block_shared(block),
+			       extack);
 
 	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
 	if (err < 0) {
-- 
cgit v1.2.3-59-g8ed1b


From 0fdcf78d59737939ea449b512d02c3733a22c8e1 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 29 May 2020 02:25:37 +0200
Subject: net: use flow_indr_dev_setup_offload()

Update existing frontends to use flow_indr_dev_setup_offload().

This new function must be called if ->ndo_setup_tc is unset to deal
with tunnel devices.

If there is no driver that is subscribed to new tunnel device
flow_block bindings, then this function bails out with EOPNOTSUPP.

If the driver module is removed, the ->cleanup() callback removes the
entries that belong to this tunnel device. This cleanup procedures is
triggered when the device unregisters the tunnel device offload handler.

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/netfilter/nf_flow_table_offload.c | 19 +++++++++---
 net/netfilter/nf_tables_offload.c     | 28 ++++++++++++++---
 net/sched/cls_api.c                   | 58 +++++++++++++++++------------------
 3 files changed, 67 insertions(+), 38 deletions(-)

diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index 2ff4087007a6..01cfa02c43bd 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -942,6 +942,18 @@ static void nf_flow_table_block_offload_init(struct flow_block_offload *bo,
 	INIT_LIST_HEAD(&bo->cb_list);
 }
 
+static void nf_flow_table_indr_cleanup(struct flow_block_cb *block_cb)
+{
+	struct nf_flowtable *flowtable = block_cb->indr.data;
+	struct net_device *dev = block_cb->indr.dev;
+
+	nf_flow_table_gc_cleanup(flowtable, dev);
+	down_write(&flowtable->flow_block_lock);
+	list_del(&block_cb->list);
+	flow_block_cb_free(block_cb);
+	up_write(&flowtable->flow_block_lock);
+}
+
 static int nf_flow_table_indr_offload_cmd(struct flow_block_offload *bo,
 					  struct nf_flowtable *flowtable,
 					  struct net_device *dev,
@@ -950,12 +962,9 @@ static int nf_flow_table_indr_offload_cmd(struct flow_block_offload *bo,
 {
 	nf_flow_table_block_offload_init(bo, dev_net(dev), cmd, flowtable,
 					 extack);
-	flow_indr_block_call(dev, bo, cmd, TC_SETUP_FT);
 
-	if (list_empty(&bo->cb_list))
-		return -EOPNOTSUPP;
-
-	return 0;
+	return flow_indr_dev_setup_offload(dev, TC_SETUP_FT, flowtable, bo,
+					   nf_flow_table_indr_cleanup);
 }
 
 static int nf_flow_table_offload_cmd(struct flow_block_offload *bo,
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index 954bccb7f32a..1960f11477e8 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -304,21 +304,41 @@ static void nft_indr_block_ing_cmd(struct net_device *dev,
 	nft_block_setup(chain, &bo, cmd);
 }
 
-static int nft_indr_block_offload_cmd(struct nft_base_chain *chain,
+static void nft_indr_block_cleanup(struct flow_block_cb *block_cb)
+{
+	struct nft_base_chain *basechain = block_cb->indr.data;
+	struct net_device *dev = block_cb->indr.dev;
+	struct netlink_ext_ack extack = {};
+	struct net *net = dev_net(dev);
+	struct flow_block_offload bo;
+
+	nft_flow_block_offload_init(&bo, dev_net(dev), FLOW_BLOCK_UNBIND,
+				    basechain, &extack);
+	mutex_lock(&net->nft.commit_mutex);
+	list_move(&block_cb->list, &bo.cb_list);
+	nft_flow_offload_unbind(&bo, basechain);
+	mutex_unlock(&net->nft.commit_mutex);
+}
+
+static int nft_indr_block_offload_cmd(struct nft_base_chain *basechain,
 				      struct net_device *dev,
 				      enum flow_block_command cmd)
 {
 	struct netlink_ext_ack extack = {};
 	struct flow_block_offload bo;
+	int err;
 
-	nft_flow_block_offload_init(&bo, dev_net(dev), cmd, chain, &extack);
+	nft_flow_block_offload_init(&bo, dev_net(dev), cmd, basechain, &extack);
 
-	flow_indr_block_call(dev, &bo, cmd, TC_SETUP_BLOCK);
+	err = flow_indr_dev_setup_offload(dev, TC_SETUP_BLOCK, basechain, &bo,
+					  nft_indr_block_cleanup);
+	if (err < 0)
+		return err;
 
 	if (list_empty(&bo.cb_list))
 		return -EOPNOTSUPP;
 
-	return nft_block_setup(chain, &bo, cmd);
+	return nft_block_setup(basechain, &bo, cmd);
 }
 
 #define FLOW_SETUP_BLOCK TC_SETUP_BLOCK
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index c5a2f16097b6..760e51d852f5 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -709,24 +709,26 @@ static void tcf_block_offload_init(struct flow_block_offload *bo,
 	INIT_LIST_HEAD(&bo->cb_list);
 }
 
-static void tc_indr_block_call(struct tcf_block *block,
-			       struct net_device *dev,
-			       struct tcf_block_ext_info *ei,
-			       enum flow_block_command command,
-			       struct netlink_ext_ack *extack)
+static void tcf_block_unbind(struct tcf_block *block,
+			     struct flow_block_offload *bo);
+
+static void tc_block_indr_cleanup(struct flow_block_cb *block_cb)
 {
-	struct flow_block_offload bo = {
-		.command	= command,
-		.binder_type	= ei->binder_type,
-		.net		= dev_net(dev),
-		.block		= &block->flow_block,
-		.block_shared	= tcf_block_shared(block),
-		.extack		= extack,
-	};
-	INIT_LIST_HEAD(&bo.cb_list);
+	struct tcf_block *block = block_cb->indr.data;
+	struct net_device *dev = block_cb->indr.dev;
+	struct netlink_ext_ack extack = {};
+	struct flow_block_offload bo;
 
-	flow_indr_block_call(dev, &bo, command, TC_SETUP_BLOCK);
-	tcf_block_setup(block, &bo);
+	tcf_block_offload_init(&bo, dev, FLOW_BLOCK_UNBIND,
+			       block_cb->indr.binder_type,
+			       &block->flow_block, tcf_block_shared(block),
+			       &extack);
+	down_write(&block->cb_lock);
+	list_move(&block_cb->list, &bo.cb_list);
+	up_write(&block->cb_lock);
+	rtnl_lock();
+	tcf_block_unbind(block, &bo);
+	rtnl_unlock();
 }
 
 static bool tcf_block_offload_in_use(struct tcf_block *block)
@@ -747,7 +749,12 @@ static int tcf_block_offload_cmd(struct tcf_block *block,
 			       &block->flow_block, tcf_block_shared(block),
 			       extack);
 
-	err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
+	if (dev->netdev_ops->ndo_setup_tc)
+		err = dev->netdev_ops->ndo_setup_tc(dev, TC_SETUP_BLOCK, &bo);
+	else
+		err = flow_indr_dev_setup_offload(dev, TC_SETUP_BLOCK, block,
+						  &bo, tc_block_indr_cleanup);
+
 	if (err < 0) {
 		if (err != -EOPNOTSUPP)
 			NL_SET_ERR_MSG(extack, "Driver ndo_setup_tc failed");
@@ -765,13 +772,13 @@ static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
 	int err;
 
 	down_write(&block->cb_lock);
-	if (!dev->netdev_ops->ndo_setup_tc)
-		goto no_offload_dev_inc;
 
 	/* If tc offload feature is disabled and the block we try to bind
 	 * to already has some offloaded filters, forbid to bind.
 	 */
-	if (!tc_can_offload(dev) && tcf_block_offload_in_use(block)) {
+	if (dev->netdev_ops->ndo_setup_tc &&
+	    !tc_can_offload(dev) &&
+	    tcf_block_offload_in_use(block)) {
 		NL_SET_ERR_MSG(extack, "Bind to offloaded block failed as dev has offload disabled");
 		err = -EOPNOTSUPP;
 		goto err_unlock;
@@ -783,18 +790,15 @@ static int tcf_block_offload_bind(struct tcf_block *block, struct Qdisc *q,
 	if (err)
 		goto err_unlock;
 
-	tc_indr_block_call(block, dev, ei, FLOW_BLOCK_BIND, extack);
 	up_write(&block->cb_lock);
 	return 0;
 
 no_offload_dev_inc:
-	if (tcf_block_offload_in_use(block)) {
-		err = -EOPNOTSUPP;
+	if (tcf_block_offload_in_use(block))
 		goto err_unlock;
-	}
+
 	err = 0;
 	block->nooffloaddevcnt++;
-	tc_indr_block_call(block, dev, ei, FLOW_BLOCK_BIND, extack);
 err_unlock:
 	up_write(&block->cb_lock);
 	return err;
@@ -807,10 +811,6 @@ static void tcf_block_offload_unbind(struct tcf_block *block, struct Qdisc *q,
 	int err;
 
 	down_write(&block->cb_lock);
-	tc_indr_block_call(block, dev, ei, FLOW_BLOCK_UNBIND, NULL);
-
-	if (!dev->netdev_ops->ndo_setup_tc)
-		goto no_offload_dev_dec;
 	err = tcf_block_offload_cmd(block, dev, ei, FLOW_BLOCK_UNBIND, NULL);
 	if (err == -EOPNOTSUPP)
 		goto no_offload_dev_dec;
-- 
cgit v1.2.3-59-g8ed1b


From 9eabd188716b2c53d8b9d23e969c6c17049f0fcc Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 29 May 2020 02:25:38 +0200
Subject: mlx5: update indirect block support

Register ndo callback via flow_indr_dev_register() and
flow_indr_dev_unregister().

No need for mlx5e_rep_indr_clean_block_privs() since flow_block_cb_free()
already releases the internal mapping via ->release callback, which in
this case is mlx5e_rep_indr_tc_block_unbind().

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlx5/core/en/rep/tc.c    | 81 +++-------------------
 .../net/ethernet/mellanox/mlx5/core/en/rep/tc.h    |  4 --
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.c   |  1 -
 drivers/net/ethernet/mellanox/mlx5/core/en_rep.h   |  5 --
 4 files changed, 8 insertions(+), 83 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
index c609a5e50ebc..80713123de5c 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.c
@@ -306,20 +306,6 @@ mlx5e_rep_indr_block_priv_lookup(struct mlx5e_rep_priv *rpriv,
 	return NULL;
 }
 
-static void mlx5e_rep_indr_unregister_block(struct mlx5e_rep_priv *rpriv,
-					    struct net_device *netdev);
-
-void mlx5e_rep_indr_clean_block_privs(struct mlx5e_rep_priv *rpriv)
-{
-	struct mlx5e_rep_indr_block_priv *cb_priv, *temp;
-	struct list_head *head = &rpriv->uplink_priv.tc_indr_block_priv_list;
-
-	list_for_each_entry_safe(cb_priv, temp, head, list) {
-		mlx5e_rep_indr_unregister_block(rpriv, cb_priv->netdev);
-		kfree(cb_priv);
-	}
-}
-
 static int
 mlx5e_rep_indr_offload(struct net_device *netdev,
 		       struct flow_cls_offload *flower,
@@ -423,9 +409,14 @@ mlx5e_rep_indr_setup_block(struct net_device *netdev,
 			   struct flow_block_offload *f,
 			   flow_setup_cb_t *setup_cb)
 {
+	struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
 	struct mlx5e_rep_indr_block_priv *indr_priv;
 	struct flow_block_cb *block_cb;
 
+	if (!mlx5e_tc_tun_device_to_offload(priv, netdev) &&
+	    !(is_vlan_dev(netdev) && vlan_dev_real_dev(netdev) == rpriv->netdev))
+		return -EOPNOTSUPP;
+
 	if (f->binder_type != FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS)
 		return -EOPNOTSUPP;
 
@@ -492,76 +483,20 @@ int mlx5e_rep_indr_setup_cb(struct net_device *netdev, void *cb_priv,
 	}
 }
 
-static int mlx5e_rep_indr_register_block(struct mlx5e_rep_priv *rpriv,
-					 struct net_device *netdev)
-{
-	int err;
-
-	err = __flow_indr_block_cb_register(netdev, rpriv,
-					    mlx5e_rep_indr_setup_cb,
-					    rpriv);
-	if (err) {
-		struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
-
-		mlx5_core_err(priv->mdev, "Failed to register remote block notifier for %s err=%d\n",
-			      netdev_name(netdev), err);
-	}
-	return err;
-}
-
-static void mlx5e_rep_indr_unregister_block(struct mlx5e_rep_priv *rpriv,
-					    struct net_device *netdev)
-{
-	__flow_indr_block_cb_unregister(netdev, mlx5e_rep_indr_setup_cb,
-					rpriv);
-}
-
-static int mlx5e_nic_rep_netdevice_event(struct notifier_block *nb,
-					 unsigned long event, void *ptr)
-{
-	struct mlx5e_rep_priv *rpriv = container_of(nb, struct mlx5e_rep_priv,
-						     uplink_priv.netdevice_nb);
-	struct mlx5e_priv *priv = netdev_priv(rpriv->netdev);
-	struct net_device *netdev = netdev_notifier_info_to_dev(ptr);
-
-	if (!mlx5e_tc_tun_device_to_offload(priv, netdev) &&
-	    !(is_vlan_dev(netdev) && vlan_dev_real_dev(netdev) == rpriv->netdev))
-		return NOTIFY_OK;
-
-	switch (event) {
-	case NETDEV_REGISTER:
-		mlx5e_rep_indr_register_block(rpriv, netdev);
-		break;
-	case NETDEV_UNREGISTER:
-		mlx5e_rep_indr_unregister_block(rpriv, netdev);
-		break;
-	}
-	return NOTIFY_OK;
-}
-
 int mlx5e_rep_tc_netdevice_event_register(struct mlx5e_rep_priv *rpriv)
 {
 	struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
-	int err;
 
 	/* init indirect block notifications */
 	INIT_LIST_HEAD(&uplink_priv->tc_indr_block_priv_list);
 
-	uplink_priv->netdevice_nb.notifier_call = mlx5e_nic_rep_netdevice_event;
-	err = register_netdevice_notifier_dev_net(rpriv->netdev,
-						  &uplink_priv->netdevice_nb,
-						  &uplink_priv->netdevice_nn);
-	return err;
+	return flow_indr_dev_register(mlx5e_rep_indr_setup_cb, rpriv);
 }
 
 void mlx5e_rep_tc_netdevice_event_unregister(struct mlx5e_rep_priv *rpriv)
 {
-	struct mlx5_rep_uplink_priv *uplink_priv = &rpriv->uplink_priv;
-
-	/* clean indirect TC block notifications */
-	unregister_netdevice_notifier_dev_net(rpriv->netdev,
-					      &uplink_priv->netdevice_nb,
-					      &uplink_priv->netdevice_nn);
+	flow_indr_dev_unregister(mlx5e_rep_indr_setup_cb, rpriv,
+				 mlx5e_rep_indr_setup_tc_cb);
 }
 
 #if IS_ENABLED(CONFIG_NET_TC_SKB_EXT)
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h
index 86f92abf2fdd..fdf9702c2d7d 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/rep/tc.h
@@ -33,7 +33,6 @@ void mlx5e_rep_encap_entry_detach(struct mlx5e_priv *priv,
 
 int mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
 		       void *type_data);
-void mlx5e_rep_indr_clean_block_privs(struct mlx5e_rep_priv *rpriv);
 
 bool mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
 			     struct sk_buff *skb,
@@ -65,9 +64,6 @@ static inline int
 mlx5e_rep_setup_tc(struct net_device *dev, enum tc_setup_type type,
 		   void *type_data) { return -EOPNOTSUPP; }
 
-static inline void
-mlx5e_rep_indr_clean_block_privs(struct mlx5e_rep_priv *rpriv) {}
-
 struct mlx5e_tc_update_priv;
 static inline bool
 mlx5e_rep_tc_update_skb(struct mlx5_cqe64 *cqe,
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
index af89a4803c7d..006807e04eda 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.c
@@ -1018,7 +1018,6 @@ destroy_tises:
 static void mlx5e_cleanup_uplink_rep_tx(struct mlx5e_rep_priv *rpriv)
 {
 	mlx5e_rep_tc_netdevice_event_unregister(rpriv);
-	mlx5e_rep_indr_clean_block_privs(rpriv);
 	mlx5e_rep_bond_cleanup(rpriv);
 	mlx5e_rep_tc_cleanup(rpriv);
 }
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
index da9f1686d525..1d5669801484 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en_rep.h
@@ -69,13 +69,8 @@ struct mlx5_rep_uplink_priv {
 	 * tc_indr_block_cb_priv_list is used to lookup indirect callback
 	 * private data
 	 *
-	 * netdevice_nb is the netdev events notifier - used to register
-	 * tunnel devices for block events
-	 *
 	 */
 	struct list_head	    tc_indr_block_priv_list;
-	struct notifier_block	    netdevice_nb;
-	struct netdev_net_notifier  netdevice_nn;
 
 	struct mlx5_tun_entropy tun_entropy;
 
-- 
cgit v1.2.3-59-g8ed1b


From 50c1b1c9385fbb35c25b27608e00bcf89368e8ba Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 29 May 2020 02:25:39 +0200
Subject: nfp: update indirect block support

Register ndo callback via flow_indr_dev_register() and
flow_indr_dev_unregister().

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/netronome/nfp/flower/main.c   | 11 ++++---
 drivers/net/ethernet/netronome/nfp/flower/main.h   |  7 +++--
 .../net/ethernet/netronome/nfp/flower/offload.c    | 35 ++++------------------
 3 files changed, 17 insertions(+), 36 deletions(-)

diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.c b/drivers/net/ethernet/netronome/nfp/flower/main.c
index ca7032d22196..c39327677a7d 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.c
@@ -830,6 +830,10 @@ static int nfp_flower_init(struct nfp_app *app)
 	if (err)
 		goto err_cleanup;
 
+	err = flow_indr_dev_register(nfp_flower_indr_setup_tc_cb, app);
+	if (err)
+		goto err_cleanup;
+
 	if (app_priv->flower_ext_feats & NFP_FL_FEATS_VF_RLIM)
 		nfp_flower_qos_init(app);
 
@@ -856,6 +860,9 @@ static void nfp_flower_clean(struct nfp_app *app)
 	skb_queue_purge(&app_priv->cmsg_skbs_low);
 	flush_work(&app_priv->cmsg_work);
 
+	flow_indr_dev_unregister(nfp_flower_indr_setup_tc_cb, app,
+				 nfp_flower_setup_indr_block_cb);
+
 	if (app_priv->flower_ext_feats & NFP_FL_FEATS_VF_RLIM)
 		nfp_flower_qos_cleanup(app);
 
@@ -959,10 +966,6 @@ nfp_flower_netdev_event(struct nfp_app *app, struct net_device *netdev,
 			return ret;
 	}
 
-	ret = nfp_flower_reg_indir_block_handler(app, netdev, event);
-	if (ret & NOTIFY_STOP_MASK)
-		return ret;
-
 	ret = nfp_flower_internal_port_event_handler(app, netdev, event);
 	if (ret & NOTIFY_STOP_MASK)
 		return ret;
diff --git a/drivers/net/ethernet/netronome/nfp/flower/main.h b/drivers/net/ethernet/netronome/nfp/flower/main.h
index 59abea2a39ad..6c3dc3baf387 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/main.h
+++ b/drivers/net/ethernet/netronome/nfp/flower/main.h
@@ -458,9 +458,10 @@ void nfp_flower_qos_cleanup(struct nfp_app *app);
 int nfp_flower_setup_qos_offload(struct nfp_app *app, struct net_device *netdev,
 				 struct tc_cls_matchall_offload *flow);
 void nfp_flower_stats_rlim_reply(struct nfp_app *app, struct sk_buff *skb);
-int nfp_flower_reg_indir_block_handler(struct nfp_app *app,
-				       struct net_device *netdev,
-				       unsigned long event);
+int nfp_flower_indr_setup_tc_cb(struct net_device *netdev, void *cb_priv,
+				enum tc_setup_type type, void *type_data);
+int nfp_flower_setup_indr_block_cb(enum tc_setup_type type, void *type_data,
+				   void *cb_priv);
 
 void
 __nfp_flower_non_repr_priv_get(struct nfp_flower_non_repr_priv *non_repr_priv);
diff --git a/drivers/net/ethernet/netronome/nfp/flower/offload.c b/drivers/net/ethernet/netronome/nfp/flower/offload.c
index 6b60771ccb19..695d24b9dd92 100644
--- a/drivers/net/ethernet/netronome/nfp/flower/offload.c
+++ b/drivers/net/ethernet/netronome/nfp/flower/offload.c
@@ -1619,8 +1619,8 @@ nfp_flower_indr_block_cb_priv_lookup(struct nfp_app *app,
 	return NULL;
 }
 
-static int nfp_flower_setup_indr_block_cb(enum tc_setup_type type,
-					  void *type_data, void *cb_priv)
+int nfp_flower_setup_indr_block_cb(enum tc_setup_type type,
+				   void *type_data, void *cb_priv)
 {
 	struct nfp_flower_indr_block_cb_priv *priv = cb_priv;
 	struct flow_cls_offload *flower = type_data;
@@ -1708,10 +1708,13 @@ nfp_flower_setup_indr_tc_block(struct net_device *netdev, struct nfp_app *app,
 	return 0;
 }
 
-static int
+int
 nfp_flower_indr_setup_tc_cb(struct net_device *netdev, void *cb_priv,
 			    enum tc_setup_type type, void *type_data)
 {
+	if (!nfp_fl_is_netdev_to_offload(netdev))
+		return -EOPNOTSUPP;
+
 	switch (type) {
 	case TC_SETUP_BLOCK:
 		return nfp_flower_setup_indr_tc_block(netdev, cb_priv,
@@ -1720,29 +1723,3 @@ nfp_flower_indr_setup_tc_cb(struct net_device *netdev, void *cb_priv,
 		return -EOPNOTSUPP;
 	}
 }
-
-int nfp_flower_reg_indir_block_handler(struct nfp_app *app,
-				       struct net_device *netdev,
-				       unsigned long event)
-{
-	int err;
-
-	if (!nfp_fl_is_netdev_to_offload(netdev))
-		return NOTIFY_OK;
-
-	if (event == NETDEV_REGISTER) {
-		err = __flow_indr_block_cb_register(netdev, app,
-						    nfp_flower_indr_setup_tc_cb,
-						    app);
-		if (err)
-			nfp_flower_cmsg_warn(app,
-					     "Indirect block reg failed - %s\n",
-					     netdev->name);
-	} else if (event == NETDEV_UNREGISTER) {
-		__flow_indr_block_cb_unregister(netdev,
-						nfp_flower_indr_setup_tc_cb,
-						app);
-	}
-
-	return NOTIFY_OK;
-}
-- 
cgit v1.2.3-59-g8ed1b


From e445e30cf7e6d68566db775ce186cbe63ef286e9 Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 29 May 2020 02:25:40 +0200
Subject: bnxt_tc: update indirect block support

Register ndo callback via flow_indr_dev_register() and
flow_indr_dev_unregister().

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/broadcom/bnxt/bnxt.h    |  1 -
 drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c | 51 +++++++---------------------
 2 files changed, 12 insertions(+), 40 deletions(-)

diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt.h b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
index 5c562e0aac67..9e173d74b72a 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt.h
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt.h
@@ -1870,7 +1870,6 @@ struct bnxt {
 	u8			dsn[8];
 	struct bnxt_tc_info	*tc_info;
 	struct list_head	tc_indr_block_list;
-	struct notifier_block	tc_netdev_nb;
 	struct dentry		*debugfs_pdev;
 	struct device		*hwmon_dev;
 };
diff --git a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
index 782ea0771221..0eef4f5e4a46 100644
--- a/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
+++ b/drivers/net/ethernet/broadcom/bnxt/bnxt_tc.c
@@ -1939,53 +1939,25 @@ static int bnxt_tc_setup_indr_block(struct net_device *netdev, struct bnxt *bp,
 	return 0;
 }
 
-static int bnxt_tc_setup_indr_cb(struct net_device *netdev, void *cb_priv,
-				 enum tc_setup_type type, void *type_data)
-{
-	switch (type) {
-	case TC_SETUP_BLOCK:
-		return bnxt_tc_setup_indr_block(netdev, cb_priv, type_data);
-	default:
-		return -EOPNOTSUPP;
-	}
-}
-
 static bool bnxt_is_netdev_indr_offload(struct net_device *netdev)
 {
 	return netif_is_vxlan(netdev);
 }
 
-static int bnxt_tc_indr_block_event(struct notifier_block *nb,
-				    unsigned long event, void *ptr)
+static int bnxt_tc_setup_indr_cb(struct net_device *netdev, void *cb_priv,
+				 enum tc_setup_type type, void *type_data)
 {
-	struct net_device *netdev;
-	struct bnxt *bp;
-	int rc;
-
-	netdev = netdev_notifier_info_to_dev(ptr);
 	if (!bnxt_is_netdev_indr_offload(netdev))
-		return NOTIFY_OK;
-
-	bp = container_of(nb, struct bnxt, tc_netdev_nb);
+		return -EOPNOTSUPP;
 
-	switch (event) {
-	case NETDEV_REGISTER:
-		rc = __flow_indr_block_cb_register(netdev, bp,
-						   bnxt_tc_setup_indr_cb,
-						   bp);
-		if (rc)
-			netdev_info(bp->dev,
-				    "Failed to register indirect blk: dev: %s\n",
-				    netdev->name);
-		break;
-	case NETDEV_UNREGISTER:
-		__flow_indr_block_cb_unregister(netdev,
-						bnxt_tc_setup_indr_cb,
-						bp);
+	switch (type) {
+	case TC_SETUP_BLOCK:
+		return bnxt_tc_setup_indr_block(netdev, cb_priv, type_data);
+	default:
 		break;
 	}
 
-	return NOTIFY_DONE;
+	return -EOPNOTSUPP;
 }
 
 static const struct rhashtable_params bnxt_tc_flow_ht_params = {
@@ -2074,8 +2046,8 @@ int bnxt_init_tc(struct bnxt *bp)
 
 	/* init indirect block notifications */
 	INIT_LIST_HEAD(&bp->tc_indr_block_list);
-	bp->tc_netdev_nb.notifier_call = bnxt_tc_indr_block_event;
-	rc = register_netdevice_notifier(&bp->tc_netdev_nb);
+
+	rc = flow_indr_dev_register(bnxt_tc_setup_indr_cb, bp);
 	if (!rc)
 		return 0;
 
@@ -2101,7 +2073,8 @@ void bnxt_shutdown_tc(struct bnxt *bp)
 	if (!bnxt_tc_flower_enabled(bp))
 		return;
 
-	unregister_netdevice_notifier(&bp->tc_netdev_nb);
+	flow_indr_dev_unregister(bnxt_tc_setup_indr_cb, bp,
+				 bnxt_tc_setup_indr_block_cb);
 	rhashtable_destroy(&tc_info->flow_table);
 	rhashtable_destroy(&tc_info->l2_table);
 	rhashtable_destroy(&tc_info->decap_l2_table);
-- 
cgit v1.2.3-59-g8ed1b


From 709ffbe19b777e8fc952e2fdcfd8e6f50c8ef08c Mon Sep 17 00:00:00 2001
From: Pablo Neira Ayuso <pablo@netfilter.org>
Date: Fri, 29 May 2020 02:25:41 +0200
Subject: net: remove indirect block netdev event registration

Drivers do not register to netdev events to set up indirect blocks
anymore. Remove __flow_indr_block_cb_register() and
__flow_indr_block_cb_unregister().

The frontends set up the callbacks through flow_indr_dev_setup_block()

Signed-off-by: Pablo Neira Ayuso <pablo@netfilter.org>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/flow_offload.h            |   9 --
 net/core/flow_offload.c               | 238 ----------------------------------
 net/netfilter/nf_flow_table_offload.c |  66 ----------
 net/netfilter/nf_tables_offload.c     |  53 +-------
 net/sched/cls_api.c                   |  79 -----------
 5 files changed, 1 insertion(+), 444 deletions(-)

diff --git a/include/net/flow_offload.h b/include/net/flow_offload.h
index 5493282348fa..69e13c8b6b3a 100644
--- a/include/net/flow_offload.h
+++ b/include/net/flow_offload.h
@@ -546,15 +546,6 @@ typedef void flow_indr_block_cmd_t(struct net_device *dev,
 				   flow_indr_block_bind_cb_t *cb, void *cb_priv,
 				   enum flow_block_command command);
 
-struct flow_indr_block_entry {
-	flow_indr_block_cmd_t *cb;
-	struct list_head	list;
-};
-
-void flow_indr_add_block_cb(struct flow_indr_block_entry *entry);
-
-void flow_indr_del_block_cb(struct flow_indr_block_entry *entry);
-
 int __flow_indr_block_cb_register(struct net_device *dev, void *cb_priv,
 				  flow_indr_block_bind_cb_t *cb,
 				  void *cb_ident);
diff --git a/net/core/flow_offload.c b/net/core/flow_offload.c
index 8cd7da2586ae..0cfc35e6be28 100644
--- a/net/core/flow_offload.c
+++ b/net/core/flow_offload.c
@@ -473,241 +473,3 @@ int flow_indr_dev_setup_offload(struct net_device *dev,
 	return list_empty(&bo->cb_list) ? -EOPNOTSUPP : 0;
 }
 EXPORT_SYMBOL(flow_indr_dev_setup_offload);
-
-static LIST_HEAD(block_cb_list);
-
-static struct rhashtable indr_setup_block_ht;
-
-struct flow_indr_block_cb {
-	struct list_head list;
-	void *cb_priv;
-	flow_indr_block_bind_cb_t *cb;
-	void *cb_ident;
-};
-
-struct flow_indr_block_dev {
-	struct rhash_head ht_node;
-	struct net_device *dev;
-	unsigned int refcnt;
-	struct list_head cb_list;
-};
-
-static const struct rhashtable_params flow_indr_setup_block_ht_params = {
-	.key_offset	= offsetof(struct flow_indr_block_dev, dev),
-	.head_offset	= offsetof(struct flow_indr_block_dev, ht_node),
-	.key_len	= sizeof(struct net_device *),
-};
-
-static struct flow_indr_block_dev *
-flow_indr_block_dev_lookup(struct net_device *dev)
-{
-	return rhashtable_lookup_fast(&indr_setup_block_ht, &dev,
-				      flow_indr_setup_block_ht_params);
-}
-
-static struct flow_indr_block_dev *
-flow_indr_block_dev_get(struct net_device *dev)
-{
-	struct flow_indr_block_dev *indr_dev;
-
-	indr_dev = flow_indr_block_dev_lookup(dev);
-	if (indr_dev)
-		goto inc_ref;
-
-	indr_dev = kzalloc(sizeof(*indr_dev), GFP_KERNEL);
-	if (!indr_dev)
-		return NULL;
-
-	INIT_LIST_HEAD(&indr_dev->cb_list);
-	indr_dev->dev = dev;
-	if (rhashtable_insert_fast(&indr_setup_block_ht, &indr_dev->ht_node,
-				   flow_indr_setup_block_ht_params)) {
-		kfree(indr_dev);
-		return NULL;
-	}
-
-inc_ref:
-	indr_dev->refcnt++;
-	return indr_dev;
-}
-
-static void flow_indr_block_dev_put(struct flow_indr_block_dev *indr_dev)
-{
-	if (--indr_dev->refcnt)
-		return;
-
-	rhashtable_remove_fast(&indr_setup_block_ht, &indr_dev->ht_node,
-			       flow_indr_setup_block_ht_params);
-	kfree(indr_dev);
-}
-
-static struct flow_indr_block_cb *
-flow_indr_block_cb_lookup(struct flow_indr_block_dev *indr_dev,
-			  flow_indr_block_bind_cb_t *cb, void *cb_ident)
-{
-	struct flow_indr_block_cb *indr_block_cb;
-
-	list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
-		if (indr_block_cb->cb == cb &&
-		    indr_block_cb->cb_ident == cb_ident)
-			return indr_block_cb;
-	return NULL;
-}
-
-static struct flow_indr_block_cb *
-flow_indr_block_cb_add(struct flow_indr_block_dev *indr_dev, void *cb_priv,
-		       flow_indr_block_bind_cb_t *cb, void *cb_ident)
-{
-	struct flow_indr_block_cb *indr_block_cb;
-
-	indr_block_cb = flow_indr_block_cb_lookup(indr_dev, cb, cb_ident);
-	if (indr_block_cb)
-		return ERR_PTR(-EEXIST);
-
-	indr_block_cb = kzalloc(sizeof(*indr_block_cb), GFP_KERNEL);
-	if (!indr_block_cb)
-		return ERR_PTR(-ENOMEM);
-
-	indr_block_cb->cb_priv = cb_priv;
-	indr_block_cb->cb = cb;
-	indr_block_cb->cb_ident = cb_ident;
-	list_add(&indr_block_cb->list, &indr_dev->cb_list);
-
-	return indr_block_cb;
-}
-
-static void flow_indr_block_cb_del(struct flow_indr_block_cb *indr_block_cb)
-{
-	list_del(&indr_block_cb->list);
-	kfree(indr_block_cb);
-}
-
-static DEFINE_MUTEX(flow_indr_block_cb_lock);
-
-static void flow_block_cmd(struct net_device *dev,
-			   flow_indr_block_bind_cb_t *cb, void *cb_priv,
-			   enum flow_block_command command)
-{
-	struct flow_indr_block_entry *entry;
-
-	mutex_lock(&flow_indr_block_cb_lock);
-	list_for_each_entry(entry, &block_cb_list, list) {
-		entry->cb(dev, cb, cb_priv, command);
-	}
-	mutex_unlock(&flow_indr_block_cb_lock);
-}
-
-int __flow_indr_block_cb_register(struct net_device *dev, void *cb_priv,
-				  flow_indr_block_bind_cb_t *cb,
-				  void *cb_ident)
-{
-	struct flow_indr_block_cb *indr_block_cb;
-	struct flow_indr_block_dev *indr_dev;
-	int err;
-
-	indr_dev = flow_indr_block_dev_get(dev);
-	if (!indr_dev)
-		return -ENOMEM;
-
-	indr_block_cb = flow_indr_block_cb_add(indr_dev, cb_priv, cb, cb_ident);
-	err = PTR_ERR_OR_ZERO(indr_block_cb);
-	if (err)
-		goto err_dev_put;
-
-	flow_block_cmd(dev, indr_block_cb->cb, indr_block_cb->cb_priv,
-		       FLOW_BLOCK_BIND);
-
-	return 0;
-
-err_dev_put:
-	flow_indr_block_dev_put(indr_dev);
-	return err;
-}
-EXPORT_SYMBOL_GPL(__flow_indr_block_cb_register);
-
-int flow_indr_block_cb_register(struct net_device *dev, void *cb_priv,
-				flow_indr_block_bind_cb_t *cb,
-				void *cb_ident)
-{
-	int err;
-
-	rtnl_lock();
-	err = __flow_indr_block_cb_register(dev, cb_priv, cb, cb_ident);
-	rtnl_unlock();
-
-	return err;
-}
-EXPORT_SYMBOL_GPL(flow_indr_block_cb_register);
-
-void __flow_indr_block_cb_unregister(struct net_device *dev,
-				     flow_indr_block_bind_cb_t *cb,
-				     void *cb_ident)
-{
-	struct flow_indr_block_cb *indr_block_cb;
-	struct flow_indr_block_dev *indr_dev;
-
-	indr_dev = flow_indr_block_dev_lookup(dev);
-	if (!indr_dev)
-		return;
-
-	indr_block_cb = flow_indr_block_cb_lookup(indr_dev, cb, cb_ident);
-	if (!indr_block_cb)
-		return;
-
-	flow_block_cmd(dev, indr_block_cb->cb, indr_block_cb->cb_priv,
-		       FLOW_BLOCK_UNBIND);
-
-	flow_indr_block_cb_del(indr_block_cb);
-	flow_indr_block_dev_put(indr_dev);
-}
-EXPORT_SYMBOL_GPL(__flow_indr_block_cb_unregister);
-
-void flow_indr_block_cb_unregister(struct net_device *dev,
-				   flow_indr_block_bind_cb_t *cb,
-				   void *cb_ident)
-{
-	rtnl_lock();
-	__flow_indr_block_cb_unregister(dev, cb, cb_ident);
-	rtnl_unlock();
-}
-EXPORT_SYMBOL_GPL(flow_indr_block_cb_unregister);
-
-void flow_indr_block_call(struct net_device *dev,
-			  struct flow_block_offload *bo,
-			  enum flow_block_command command,
-			  enum tc_setup_type type)
-{
-	struct flow_indr_block_cb *indr_block_cb;
-	struct flow_indr_block_dev *indr_dev;
-
-	indr_dev = flow_indr_block_dev_lookup(dev);
-	if (!indr_dev)
-		return;
-
-	list_for_each_entry(indr_block_cb, &indr_dev->cb_list, list)
-		indr_block_cb->cb(dev, indr_block_cb->cb_priv, type, bo);
-}
-EXPORT_SYMBOL_GPL(flow_indr_block_call);
-
-void flow_indr_add_block_cb(struct flow_indr_block_entry *entry)
-{
-	mutex_lock(&flow_indr_block_cb_lock);
-	list_add_tail(&entry->list, &block_cb_list);
-	mutex_unlock(&flow_indr_block_cb_lock);
-}
-EXPORT_SYMBOL_GPL(flow_indr_add_block_cb);
-
-void flow_indr_del_block_cb(struct flow_indr_block_entry *entry)
-{
-	mutex_lock(&flow_indr_block_cb_lock);
-	list_del(&entry->list);
-	mutex_unlock(&flow_indr_block_cb_lock);
-}
-EXPORT_SYMBOL_GPL(flow_indr_del_block_cb);
-
-static int __init init_flow_indr_rhashtable(void)
-{
-	return rhashtable_init(&indr_setup_block_ht,
-			       &flow_indr_setup_block_ht_params);
-}
-subsys_initcall(init_flow_indr_rhashtable);
diff --git a/net/netfilter/nf_flow_table_offload.c b/net/netfilter/nf_flow_table_offload.c
index 01cfa02c43bd..62651e6683f6 100644
--- a/net/netfilter/nf_flow_table_offload.c
+++ b/net/netfilter/nf_flow_table_offload.c
@@ -1008,69 +1008,6 @@ int nf_flow_table_offload_setup(struct nf_flowtable *flowtable,
 }
 EXPORT_SYMBOL_GPL(nf_flow_table_offload_setup);
 
-static void nf_flow_table_indr_block_ing_cmd(struct net_device *dev,
-					     struct nf_flowtable *flowtable,
-					     flow_indr_block_bind_cb_t *cb,
-					     void *cb_priv,
-					     enum flow_block_command cmd)
-{
-	struct netlink_ext_ack extack = {};
-	struct flow_block_offload bo;
-
-	if (!flowtable)
-		return;
-
-	nf_flow_table_block_offload_init(&bo, dev_net(dev), cmd, flowtable,
-					 &extack);
-
-	cb(dev, cb_priv, TC_SETUP_FT, &bo);
-
-	nf_flow_table_block_setup(flowtable, &bo, cmd);
-}
-
-static void nf_flow_table_indr_block_cb_cmd(struct nf_flowtable *flowtable,
-					    struct net_device *dev,
-					    flow_indr_block_bind_cb_t *cb,
-					    void *cb_priv,
-					    enum flow_block_command cmd)
-{
-	if (!(flowtable->flags & NF_FLOWTABLE_HW_OFFLOAD))
-		return;
-
-	nf_flow_table_indr_block_ing_cmd(dev, flowtable, cb, cb_priv, cmd);
-}
-
-static void nf_flow_table_indr_block_cb(struct net_device *dev,
-					flow_indr_block_bind_cb_t *cb,
-					void *cb_priv,
-					enum flow_block_command cmd)
-{
-	struct net *net = dev_net(dev);
-	struct nft_flowtable *nft_ft;
-	struct nft_table *table;
-	struct nft_hook *hook;
-
-	mutex_lock(&net->nft.commit_mutex);
-	list_for_each_entry(table, &net->nft.tables, list) {
-		list_for_each_entry(nft_ft, &table->flowtables, list) {
-			list_for_each_entry(hook, &nft_ft->hook_list, list) {
-				if (hook->ops.dev != dev)
-					continue;
-
-				nf_flow_table_indr_block_cb_cmd(&nft_ft->data,
-								dev, cb,
-								cb_priv, cmd);
-			}
-		}
-	}
-	mutex_unlock(&net->nft.commit_mutex);
-}
-
-static struct flow_indr_block_entry block_ing_entry = {
-	.cb	= nf_flow_table_indr_block_cb,
-	.list	= LIST_HEAD_INIT(block_ing_entry.list),
-};
-
 int nf_flow_table_offload_init(void)
 {
 	nf_flow_offload_wq  = alloc_workqueue("nf_flow_table_offload",
@@ -1078,13 +1015,10 @@ int nf_flow_table_offload_init(void)
 	if (!nf_flow_offload_wq)
 		return -ENOMEM;
 
-	flow_indr_add_block_cb(&block_ing_entry);
-
 	return 0;
 }
 
 void nf_flow_table_offload_exit(void)
 {
-	flow_indr_del_block_cb(&block_ing_entry);
 	destroy_workqueue(nf_flow_offload_wq);
 }
diff --git a/net/netfilter/nf_tables_offload.c b/net/netfilter/nf_tables_offload.c
index 1960f11477e8..185fc82c99aa 100644
--- a/net/netfilter/nf_tables_offload.c
+++ b/net/netfilter/nf_tables_offload.c
@@ -285,25 +285,6 @@ static int nft_block_offload_cmd(struct nft_base_chain *chain,
 	return nft_block_setup(chain, &bo, cmd);
 }
 
-static void nft_indr_block_ing_cmd(struct net_device *dev,
-				   struct nft_base_chain *chain,
-				   flow_indr_block_bind_cb_t *cb,
-				   void *cb_priv,
-				   enum flow_block_command cmd)
-{
-	struct netlink_ext_ack extack = {};
-	struct flow_block_offload bo;
-
-	if (!chain)
-		return;
-
-	nft_flow_block_offload_init(&bo, dev_net(dev), cmd, chain, &extack);
-
-	cb(dev, cb_priv, TC_SETUP_BLOCK, &bo);
-
-	nft_block_setup(chain, &bo, cmd);
-}
-
 static void nft_indr_block_cleanup(struct flow_block_cb *block_cb)
 {
 	struct nft_base_chain *basechain = block_cb->indr.data;
@@ -575,24 +556,6 @@ static struct nft_chain *__nft_offload_get_chain(struct net_device *dev)
 	return NULL;
 }
 
-static void nft_indr_block_cb(struct net_device *dev,
-			      flow_indr_block_bind_cb_t *cb, void *cb_priv,
-			      enum flow_block_command cmd)
-{
-	struct net *net = dev_net(dev);
-	struct nft_chain *chain;
-
-	mutex_lock(&net->nft.commit_mutex);
-	chain = __nft_offload_get_chain(dev);
-	if (chain && chain->flags & NFT_CHAIN_HW_OFFLOAD) {
-		struct nft_base_chain *basechain;
-
-		basechain = nft_base_chain(chain);
-		nft_indr_block_ing_cmd(dev, basechain, cb, cb_priv, cmd);
-	}
-	mutex_unlock(&net->nft.commit_mutex);
-}
-
 static int nft_offload_netdev_event(struct notifier_block *this,
 				    unsigned long event, void *ptr)
 {
@@ -614,30 +577,16 @@ static int nft_offload_netdev_event(struct notifier_block *this,
 	return NOTIFY_DONE;
 }
 
-static struct flow_indr_block_entry block_ing_entry = {
-	.cb	= nft_indr_block_cb,
-	.list	= LIST_HEAD_INIT(block_ing_entry.list),
-};
-
 static struct notifier_block nft_offload_netdev_notifier = {
 	.notifier_call	= nft_offload_netdev_event,
 };
 
 int nft_offload_init(void)
 {
-	int err;
-
-	err = register_netdevice_notifier(&nft_offload_netdev_notifier);
-	if (err < 0)
-		return err;
-
-	flow_indr_add_block_cb(&block_ing_entry);
-
-	return 0;
+	return register_netdevice_notifier(&nft_offload_netdev_notifier);
 }
 
 void nft_offload_exit(void)
 {
-	flow_indr_del_block_cb(&block_ing_entry);
 	unregister_netdevice_notifier(&nft_offload_netdev_notifier);
 }
diff --git a/net/sched/cls_api.c b/net/sched/cls_api.c
index 760e51d852f5..a00a203b2ef5 100644
--- a/net/sched/cls_api.c
+++ b/net/sched/cls_api.c
@@ -621,78 +621,6 @@ static void tcf_chain_flush(struct tcf_chain *chain, bool rtnl_held)
 static int tcf_block_setup(struct tcf_block *block,
 			   struct flow_block_offload *bo);
 
-static void tc_indr_block_cmd(struct net_device *dev, struct tcf_block *block,
-			      flow_indr_block_bind_cb_t *cb, void *cb_priv,
-			      enum flow_block_command command, bool ingress)
-{
-	struct flow_block_offload bo = {
-		.command	= command,
-		.binder_type	= ingress ?
-				  FLOW_BLOCK_BINDER_TYPE_CLSACT_INGRESS :
-				  FLOW_BLOCK_BINDER_TYPE_CLSACT_EGRESS,
-		.net		= dev_net(dev),
-		.block_shared	= tcf_block_non_null_shared(block),
-	};
-	INIT_LIST_HEAD(&bo.cb_list);
-
-	if (!block)
-		return;
-
-	bo.block = &block->flow_block;
-
-	down_write(&block->cb_lock);
-	cb(dev, cb_priv, TC_SETUP_BLOCK, &bo);
-
-	tcf_block_setup(block, &bo);
-	up_write(&block->cb_lock);
-}
-
-static struct tcf_block *tc_dev_block(struct net_device *dev, bool ingress)
-{
-	const struct Qdisc_class_ops *cops;
-	const struct Qdisc_ops *ops;
-	struct Qdisc *qdisc;
-
-	if (!dev_ingress_queue(dev))
-		return NULL;
-
-	qdisc = dev_ingress_queue(dev)->qdisc_sleeping;
-	if (!qdisc)
-		return NULL;
-
-	ops = qdisc->ops;
-	if (!ops)
-		return NULL;
-
-	if (!ingress && !strcmp("ingress", ops->id))
-		return NULL;
-
-	cops = ops->cl_ops;
-	if (!cops)
-		return NULL;
-
-	if (!cops->tcf_block)
-		return NULL;
-
-	return cops->tcf_block(qdisc,
-			       ingress ? TC_H_MIN_INGRESS : TC_H_MIN_EGRESS,
-			       NULL);
-}
-
-static void tc_indr_block_get_and_cmd(struct net_device *dev,
-				      flow_indr_block_bind_cb_t *cb,
-				      void *cb_priv,
-				      enum flow_block_command command)
-{
-	struct tcf_block *block;
-
-	block = tc_dev_block(dev, true);
-	tc_indr_block_cmd(dev, block, cb, cb_priv, command, true);
-
-	block = tc_dev_block(dev, false);
-	tc_indr_block_cmd(dev, block, cb, cb_priv, command, false);
-}
-
 static void tcf_block_offload_init(struct flow_block_offload *bo,
 				   struct net_device *dev,
 				   enum flow_block_command command,
@@ -3836,11 +3764,6 @@ static struct pernet_operations tcf_net_ops = {
 	.size = sizeof(struct tcf_net),
 };
 
-static struct flow_indr_block_entry block_entry = {
-	.cb = tc_indr_block_get_and_cmd,
-	.list = LIST_HEAD_INIT(block_entry.list),
-};
-
 static int __init tc_filter_init(void)
 {
 	int err;
@@ -3853,8 +3776,6 @@ static int __init tc_filter_init(void)
 	if (err)
 		goto err_register_pernet_subsys;
 
-	flow_indr_add_block_cb(&block_entry);
-
 	rtnl_register(PF_UNSPEC, RTM_NEWTFILTER, tc_new_tfilter, NULL,
 		      RTNL_FLAG_DOIT_UNLOCKED);
 	rtnl_register(PF_UNSPEC, RTM_DELTFILTER, tc_del_tfilter, NULL,
-- 
cgit v1.2.3-59-g8ed1b


From 0c34bb598c510e070160029f34efeeb217000f8d Mon Sep 17 00:00:00 2001
From: Alexander Sverdlin <alexander.sverdlin@nokia.com>
Date: Fri, 29 May 2020 14:17:10 +0200
Subject: net: octeon: mgmt: Repair filling of RX ring

The removal of mips_swiotlb_ops exposed a problem in octeon_mgmt Ethernet
driver. mips_swiotlb_ops had an mb() after most of the operations and the
removal of the ops had broken the receive functionality of the driver.
My code inspection has shown no other places except
octeon_mgmt_rx_fill_ring() where an explicit barrier would be obviously
missing. The latter function however has to make sure that "ringing the
bell" doesn't happen before RX ring entry is really written.

The patch has been successfully tested on Octeon II.

Fixes: a999933db9ed ("MIPS: remove mips_swiotlb_ops")
Cc: stable@vger.kernel.org
Signed-off-by: Alexander Sverdlin <alexander.sverdlin@nokia.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/cavium/octeon/octeon_mgmt.c | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
index 9d868403d86c..cbaa1924afbe 100644
--- a/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
+++ b/drivers/net/ethernet/cavium/octeon/octeon_mgmt.c
@@ -234,6 +234,11 @@ static void octeon_mgmt_rx_fill_ring(struct net_device *netdev)
 
 		/* Put it in the ring.  */
 		p->rx_ring[p->rx_next_fill] = re.d64;
+		/* Make sure there is no reorder of filling the ring and ringing
+		 * the bell
+		 */
+		wmb();
+
 		dma_sync_single_for_device(p->dev, p->rx_ring_handle,
 					   ring_size_to_bytes(OCTEON_MGMT_RX_RING_SIZE),
 					   DMA_BIDIRECTIONAL);
-- 
cgit v1.2.3-59-g8ed1b


From a74d19ba7c41b6c1e424ef4fb7d4600f43ff75e5 Mon Sep 17 00:00:00 2001
From: Liu Xiang <liuxiang_1999@126.com>
Date: Fri, 29 May 2020 23:24:56 +0800
Subject: net: fec: disable correct clk in the err path of fec_enet_clk_enable

When enable clk_ref failed, clk_ptp should be disabled rather than
clk_ref itself.

Signed-off-by: Liu Xiang <liuxiang_1999@126.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/fec_main.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/drivers/net/ethernet/freescale/fec_main.c b/drivers/net/ethernet/freescale/fec_main.c
index 4acb91dce5fc..2d0d313ee7c5 100644
--- a/drivers/net/ethernet/freescale/fec_main.c
+++ b/drivers/net/ethernet/freescale/fec_main.c
@@ -1981,8 +1981,12 @@ static int fec_enet_clk_enable(struct net_device *ndev, bool enable)
 	return 0;
 
 failed_clk_ref:
-	if (fep->clk_ref)
-		clk_disable_unprepare(fep->clk_ref);
+	if (fep->clk_ptp) {
+		mutex_lock(&fep->ptp_clk_mutex);
+		clk_disable_unprepare(fep->clk_ptp);
+		fep->ptp_clk_on = false;
+		mutex_unlock(&fep->ptp_clk_mutex);
+	}
 failed_clk_ptp:
 	if (fep->clk_enet_out)
 		clk_disable_unprepare(fep->clk_enet_out);
-- 
cgit v1.2.3-59-g8ed1b


From 678eb199cc9df3bf1cb12fb2da22768b8d1b6bf3 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Fri, 29 May 2020 21:36:36 +0300
Subject: devlink: Create dedicated trap group for layer 3 exceptions

Packets that hit exceptions during layer 3 forwarding must be trapped to
the CPU for the control plane to function properly. Create a dedicated
group for them, so that user space could choose to assign a different
policer for them.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/devlink/devlink-trap.rst | 7 +++++--
 include/net/devlink.h                             | 3 +++
 net/core/devlink.c                                | 1 +
 3 files changed, 9 insertions(+), 2 deletions(-)

diff --git a/Documentation/networking/devlink/devlink-trap.rst b/Documentation/networking/devlink/devlink-trap.rst
index fe089acb7783..4ca241e70064 100644
--- a/Documentation/networking/devlink/devlink-trap.rst
+++ b/Documentation/networking/devlink/devlink-trap.rst
@@ -277,8 +277,11 @@ narrow. The description of these groups must be added to the following table:
      - Contains packet traps for packets that were dropped by the device during
        layer 2 forwarding (i.e., bridge)
    * - ``l3_drops``
-     - Contains packet traps for packets that were dropped by the device or hit
-       an exception (e.g., TTL error) during layer 3 forwarding
+     - Contains packet traps for packets that were dropped by the device during
+       layer 3 forwarding
+   * - ``l3_exceptions``
+     - Contains packet traps for packets that hit an exception (e.g., TTL
+       error) during layer 3 forwarding
    * - ``buffer_drops``
      - Contains packet traps for packets that were dropped by the device due to
        an enqueue decision
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 8ffc1b5cd89b..851388c9d795 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -657,6 +657,7 @@ enum devlink_trap_generic_id {
 enum devlink_trap_group_generic_id {
 	DEVLINK_TRAP_GROUP_GENERIC_ID_L2_DROPS,
 	DEVLINK_TRAP_GROUP_GENERIC_ID_L3_DROPS,
+	DEVLINK_TRAP_GROUP_GENERIC_ID_L3_EXCEPTIONS,
 	DEVLINK_TRAP_GROUP_GENERIC_ID_BUFFER_DROPS,
 	DEVLINK_TRAP_GROUP_GENERIC_ID_TUNNEL_DROPS,
 	DEVLINK_TRAP_GROUP_GENERIC_ID_ACL_DROPS,
@@ -730,6 +731,8 @@ enum devlink_trap_group_generic_id {
 	"l2_drops"
 #define DEVLINK_TRAP_GROUP_GENERIC_NAME_L3_DROPS \
 	"l3_drops"
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_L3_EXCEPTIONS \
+	"l3_exceptions"
 #define DEVLINK_TRAP_GROUP_GENERIC_NAME_BUFFER_DROPS \
 	"buffer_drops"
 #define DEVLINK_TRAP_GROUP_GENERIC_NAME_TUNNEL_DROPS \
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 7b76e5fffc10..d9fff7083f02 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -8505,6 +8505,7 @@ static const struct devlink_trap devlink_trap_generic[] = {
 static const struct devlink_trap_group devlink_trap_group_generic[] = {
 	DEVLINK_TRAP_GROUP(L2_DROPS),
 	DEVLINK_TRAP_GROUP(L3_DROPS),
+	DEVLINK_TRAP_GROUP(L3_EXCEPTIONS),
 	DEVLINK_TRAP_GROUP(BUFFER_DROPS),
 	DEVLINK_TRAP_GROUP(TUNNEL_DROPS),
 	DEVLINK_TRAP_GROUP(ACL_DROPS),
-- 
cgit v1.2.3-59-g8ed1b


From 1e292f5c11c1e2ef38f416b62c5d616f5768057f Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Fri, 29 May 2020 21:36:37 +0300
Subject: mlxsw: spectrum_trap: Move layer 3 exceptions to exceptions trap
 group

The layer 3 exceptions are still subject to the same trap policer, so
nothing changes, but user space can choose to assign a different one.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h          |  1 +
 .../net/ethernet/mellanox/mlxsw/spectrum_trap.c    | 40 +++++++++++++---------
 2 files changed, 25 insertions(+), 16 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 38fa7304af0c..030d6f9766d2 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -5552,6 +5552,7 @@ enum mlxsw_reg_htgt_trap_group {
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_DUMMY,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_L2_DISCARDS,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_L3_DISCARDS,
+	MLXSW_REG_HTGT_TRAP_GROUP_SP_L3_EXCEPTIONS,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_TUNNEL_DISCARDS,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_ACL_DISCARDS,
 
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
index f4b812276a5a..dc2217f1a07f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
@@ -212,6 +212,11 @@ static const struct mlxsw_sp_trap_group_item mlxsw_sp_trap_group_items_arr[] = {
 		.hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_L3_DISCARDS,
 		.priority = 0,
 	},
+	{
+		.group = DEVLINK_TRAP_GROUP_GENERIC(L3_EXCEPTIONS, 1),
+		.hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_L3_EXCEPTIONS,
+		.priority = 2,
+	},
 	{
 		.group = DEVLINK_TRAP_GROUP_GENERIC(TUNNEL_DROPS, 1),
 		.hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_TUNNEL_DISCARDS,
@@ -332,56 +337,59 @@ static const struct mlxsw_sp_trap_item mlxsw_sp_trap_items_arr[] = {
 		},
 	},
 	{
-		.trap = MLXSW_SP_TRAP_EXCEPTION(MTU_ERROR, L3_DROPS),
+		.trap = MLXSW_SP_TRAP_EXCEPTION(MTU_ERROR, L3_EXCEPTIONS),
 		.listeners_arr = {
-			MLXSW_SP_RXL_EXCEPTION(MTUERROR, L3_DISCARDS,
+			MLXSW_SP_RXL_EXCEPTION(MTUERROR, L3_EXCEPTIONS,
 					       TRAP_TO_CPU),
 		},
 	},
 	{
-		.trap = MLXSW_SP_TRAP_EXCEPTION(TTL_ERROR, L3_DROPS),
+		.trap = MLXSW_SP_TRAP_EXCEPTION(TTL_ERROR, L3_EXCEPTIONS),
 		.listeners_arr = {
-			MLXSW_SP_RXL_EXCEPTION(TTLERROR, L3_DISCARDS,
+			MLXSW_SP_RXL_EXCEPTION(TTLERROR, L3_EXCEPTIONS,
 					       TRAP_TO_CPU),
 		},
 	},
 	{
-		.trap = MLXSW_SP_TRAP_EXCEPTION(RPF, L3_DROPS),
+		.trap = MLXSW_SP_TRAP_EXCEPTION(RPF, L3_EXCEPTIONS),
 		.listeners_arr = {
-			MLXSW_SP_RXL_EXCEPTION(RPF, L3_DISCARDS, TRAP_TO_CPU),
+			MLXSW_SP_RXL_EXCEPTION(RPF, L3_EXCEPTIONS, TRAP_TO_CPU),
 		},
 	},
 	{
-		.trap = MLXSW_SP_TRAP_EXCEPTION(REJECT_ROUTE, L3_DROPS),
+		.trap = MLXSW_SP_TRAP_EXCEPTION(REJECT_ROUTE, L3_EXCEPTIONS),
 		.listeners_arr = {
-			MLXSW_SP_RXL_EXCEPTION(RTR_INGRESS1, L3_DISCARDS,
+			MLXSW_SP_RXL_EXCEPTION(RTR_INGRESS1, L3_EXCEPTIONS,
 					       TRAP_TO_CPU),
 		},
 	},
 	{
-		.trap = MLXSW_SP_TRAP_EXCEPTION(UNRESOLVED_NEIGH, L3_DROPS),
+		.trap = MLXSW_SP_TRAP_EXCEPTION(UNRESOLVED_NEIGH,
+						L3_EXCEPTIONS),
 		.listeners_arr = {
-			MLXSW_SP_RXL_EXCEPTION(HOST_MISS_IPV4, L3_DISCARDS,
+			MLXSW_SP_RXL_EXCEPTION(HOST_MISS_IPV4, L3_EXCEPTIONS,
 					       TRAP_TO_CPU),
-			MLXSW_SP_RXL_EXCEPTION(HOST_MISS_IPV6, L3_DISCARDS,
+			MLXSW_SP_RXL_EXCEPTION(HOST_MISS_IPV6, L3_EXCEPTIONS,
 					       TRAP_TO_CPU),
-			MLXSW_SP_RXL_EXCEPTION(DISCARD_ROUTER3, L3_DISCARDS,
+			MLXSW_SP_RXL_EXCEPTION(DISCARD_ROUTER3, L3_EXCEPTIONS,
 					       TRAP_EXCEPTION_TO_CPU),
 		},
 	},
 	{
 		.trap = MLXSW_SP_TRAP_EXCEPTION(IPV4_LPM_UNICAST_MISS,
-						L3_DROPS),
+						L3_EXCEPTIONS),
 		.listeners_arr = {
-			MLXSW_SP_RXL_EXCEPTION(DISCARD_ROUTER_LPM4, L3_DISCARDS,
+			MLXSW_SP_RXL_EXCEPTION(DISCARD_ROUTER_LPM4,
+					       L3_EXCEPTIONS,
 					       TRAP_EXCEPTION_TO_CPU),
 		},
 	},
 	{
 		.trap = MLXSW_SP_TRAP_EXCEPTION(IPV6_LPM_UNICAST_MISS,
-						L3_DROPS),
+						L3_EXCEPTIONS),
 		.listeners_arr = {
-			MLXSW_SP_RXL_EXCEPTION(DISCARD_ROUTER_LPM6, L3_DISCARDS,
+			MLXSW_SP_RXL_EXCEPTION(DISCARD_ROUTER_LPM6,
+					       L3_EXCEPTIONS,
 					       TRAP_EXCEPTION_TO_CPU),
 		},
 	},
-- 
cgit v1.2.3-59-g8ed1b


From 85176f19f5ff579c8c1676b4c170a6535b782584 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Fri, 29 May 2020 21:36:38 +0300
Subject: netdevsim: Move layer 3 exceptions to exceptions trap group

The layer 3 exceptions are still subject to the same trap policer, so
nothing changes, but user space can choose to assign a different one.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/netdevsim/dev.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
index dc3ff0e20944..09d947eff980 100644
--- a/drivers/net/netdevsim/dev.c
+++ b/drivers/net/netdevsim/dev.c
@@ -458,6 +458,7 @@ static const struct devlink_trap_policer nsim_trap_policers_arr[] = {
 static const struct devlink_trap_group nsim_trap_groups_arr[] = {
 	DEVLINK_TRAP_GROUP_GENERIC(L2_DROPS, 0),
 	DEVLINK_TRAP_GROUP_GENERIC(L3_DROPS, 1),
+	DEVLINK_TRAP_GROUP_GENERIC(L3_EXCEPTIONS, 1),
 	DEVLINK_TRAP_GROUP_GENERIC(BUFFER_DROPS, 2),
 	DEVLINK_TRAP_GROUP_GENERIC(ACL_DROPS, 3),
 };
@@ -471,7 +472,7 @@ static const struct devlink_trap nsim_traps_arr[] = {
 	NSIM_TRAP_DROP(PORT_LOOPBACK_FILTER, L2_DROPS),
 	NSIM_TRAP_DRIVER_EXCEPTION(FID_MISS, L2_DROPS),
 	NSIM_TRAP_DROP(BLACKHOLE_ROUTE, L3_DROPS),
-	NSIM_TRAP_EXCEPTION(TTL_ERROR, L3_DROPS),
+	NSIM_TRAP_EXCEPTION(TTL_ERROR, L3_EXCEPTIONS),
 	NSIM_TRAP_DROP(TAIL_DROP, BUFFER_DROPS),
 	NSIM_TRAP_DROP_EXT(INGRESS_FLOW_ACTION_DROP, ACL_DROPS,
 			   DEVLINK_TRAP_METADATA_TYPE_F_FA_COOKIE),
-- 
cgit v1.2.3-59-g8ed1b


From 9eefeabed6f831018c15bd7e17d34967ee34d9dd Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Fri, 29 May 2020 21:36:39 +0300
Subject: devlink: Add 'mirror' trap action

The action is used by control traps such as IGMP query. The packet is
flooded by the device, but also trapped to the CPU in order for the
software bridge to mark the receiving port as a multicast router port.
Such packets are marked with 'skb->offload_fwd_mark = 1' in order to
prevent the software bridge from flooding them again.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/devlink/devlink-trap.rst | 2 ++
 include/uapi/linux/devlink.h                      | 3 +++
 net/core/devlink.c                                | 3 ++-
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/Documentation/networking/devlink/devlink-trap.rst b/Documentation/networking/devlink/devlink-trap.rst
index 4ca241e70064..5b97327caefc 100644
--- a/Documentation/networking/devlink/devlink-trap.rst
+++ b/Documentation/networking/devlink/devlink-trap.rst
@@ -108,6 +108,8 @@ The ``devlink-trap`` mechanism supports the following packet trap actions:
   * ``trap``: The sole copy of the packet is sent to the CPU.
   * ``drop``: The packet is dropped by the underlying device and a copy is not
     sent to the CPU.
+  * ``mirror``: The packet is forwarded by the underlying device and a copy is
+    sent to the CPU.
 
 Generic Packet Traps
 ====================
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 1ae90e06c06d..16305932a950 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -233,10 +233,13 @@ enum {
  * @DEVLINK_TRAP_ACTION_DROP: Packet is dropped by the device and a copy is not
  *                            sent to the CPU.
  * @DEVLINK_TRAP_ACTION_TRAP: The sole copy of the packet is sent to the CPU.
+ * @DEVLINK_TRAP_ACTION_MIRROR: Packet is forwarded by the device and a copy is
+ *                              sent to the CPU.
  */
 enum devlink_trap_action {
 	DEVLINK_TRAP_ACTION_DROP,
 	DEVLINK_TRAP_ACTION_TRAP,
+	DEVLINK_TRAP_ACTION_MIRROR,
 };
 
 /**
diff --git a/net/core/devlink.c b/net/core/devlink.c
index d9fff7083f02..d6298917b077 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -5869,7 +5869,8 @@ devlink_trap_action_get_from_info(struct genl_info *info,
 	val = nla_get_u8(info->attrs[DEVLINK_ATTR_TRAP_ACTION]);
 	switch (val) {
 	case DEVLINK_TRAP_ACTION_DROP: /* fall-through */
-	case DEVLINK_TRAP_ACTION_TRAP:
+	case DEVLINK_TRAP_ACTION_TRAP: /* fall-through */
+	case DEVLINK_TRAP_ACTION_MIRROR:
 		*p_trap_action = val;
 		break;
 	default:
-- 
cgit v1.2.3-59-g8ed1b


From 30a4e9a29ab9aadfe6c5386ae4aa396b1d2556c2 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Fri, 29 May 2020 21:36:40 +0300
Subject: devlink: Add 'control' trap type

This type is used for traps that trap control packets such as ARP
request and IGMP query to the CPU.

Do not report such packets to the kernel's drop monitor as they were not
dropped by the device no encountered an exception during forwarding.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/devlink/devlink-trap.rst | 8 +++++++-
 include/uapi/linux/devlink.h                      | 6 ++++++
 net/core/devlink.c                                | 7 +++++++
 3 files changed, 20 insertions(+), 1 deletion(-)

diff --git a/Documentation/networking/devlink/devlink-trap.rst b/Documentation/networking/devlink/devlink-trap.rst
index 5b97327caefc..6c293cfa23ee 100644
--- a/Documentation/networking/devlink/devlink-trap.rst
+++ b/Documentation/networking/devlink/devlink-trap.rst
@@ -55,7 +55,7 @@ The following diagram provides a general overview of ``devlink-trap``::
                           |                |
                           +-------^--------+
                                   |
-                                  |
+                                  | Non-control traps
                                   |
                              +----+----+
                              |         |      Kernel's Rx path
@@ -97,6 +97,12 @@ The ``devlink-trap`` mechanism supports the following packet trap types:
     processed by ``devlink`` and injected to the kernel's Rx path. Changing the
     action of such traps is not allowed, as it can easily break the control
     plane.
+  * ``control``: Trapped packets were trapped by the device because these are
+    control packets required for the correct functioning of the control plane.
+    For example, ARP request and IGMP query packets. Packets are injected to
+    the kernel's Rx path, but not reported to the kernel's drop monitor.
+    Changing the action of such traps is not allowed, as it can easily break
+    the control plane.
 
 .. _Trap-Actions:
 
diff --git a/include/uapi/linux/devlink.h b/include/uapi/linux/devlink.h
index 16305932a950..08563e6a424d 100644
--- a/include/uapi/linux/devlink.h
+++ b/include/uapi/linux/devlink.h
@@ -253,10 +253,16 @@ enum devlink_trap_action {
  *                               control plane for resolution. Trapped packets
  *                               are processed by devlink and injected to
  *                               the kernel's Rx path.
+ * @DEVLINK_TRAP_TYPE_CONTROL: Packet was trapped because it is required for
+ *                             the correct functioning of the control plane.
+ *                             For example, an ARP request packet. Trapped
+ *                             packets are injected to the kernel's Rx path,
+ *                             but not reported to drop monitor.
  */
 enum devlink_trap_type {
 	DEVLINK_TRAP_TYPE_DROP,
 	DEVLINK_TRAP_TYPE_EXCEPTION,
+	DEVLINK_TRAP_TYPE_CONTROL,
 };
 
 enum {
diff --git a/net/core/devlink.c b/net/core/devlink.c
index d6298917b077..47c28e0f848f 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -8847,6 +8847,13 @@ void devlink_trap_report(struct devlink *devlink, struct sk_buff *skb,
 	devlink_trap_stats_update(trap_item->stats, skb->len);
 	devlink_trap_stats_update(trap_item->group_item->stats, skb->len);
 
+	/* Control packets were not dropped by the device or encountered an
+	 * exception during forwarding and therefore should not be reported to
+	 * the kernel's drop monitor.
+	 */
+	if (trap_item->trap->type == DEVLINK_TRAP_TYPE_CONTROL)
+		return;
+
 	devlink_trap_report_metadata_fill(&hw_metadata, trap_item,
 					  in_devlink_port, fa_cookie);
 	net_dm_hw_report(skb, &hw_metadata);
-- 
cgit v1.2.3-59-g8ed1b


From 515eac677fe119433c2a466443bef95c10c550cc Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Fri, 29 May 2020 21:36:41 +0300
Subject: devlink: Add layer 2 control packet traps

Add layer 2 control packet traps such as STP and IGMP query, so that
capable device drivers could register them with devlink. Add
documentation for every added packet trap and packet trap group.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/devlink/devlink-trap.rst | 45 +++++++++++++++++++++
 include/net/devlink.h                             | 48 +++++++++++++++++++++++
 net/core/devlink.c                                | 16 ++++++++
 3 files changed, 109 insertions(+)

diff --git a/Documentation/networking/devlink/devlink-trap.rst b/Documentation/networking/devlink/devlink-trap.rst
index 6c293cfa23ee..e9fc3c9d7d7a 100644
--- a/Documentation/networking/devlink/devlink-trap.rst
+++ b/Documentation/networking/devlink/devlink-trap.rst
@@ -252,6 +252,42 @@ be added to the following table:
    * - ``egress_flow_action_drop``
      - ``drop``
      - Traps packets dropped during processing of egress flow action drop
+   * - ``stp``
+     - ``control``
+     - Traps STP packets
+   * - ``lacp``
+     - ``control``
+     - Traps LACP packets
+   * - ``lldp``
+     - ``control``
+     - Traps LLDP packets
+   * - ``igmp_query``
+     - ``control``
+     - Traps IGMP Membership Query packets
+   * - ``igmp_v1_report``
+     - ``control``
+     - Traps IGMP Version 1 Membership Report packets
+   * - ``igmp_v2_report``
+     - ``control``
+     - Traps IGMP Version 2 Membership Report packets
+   * - ``igmp_v3_report``
+     - ``control``
+     - Traps IGMP Version 3 Membership Report packets
+   * - ``igmp_v2_leave``
+     - ``control``
+     - Traps IGMP Version 2 Leave Group packets
+   * - ``mld_query``
+     - ``control``
+     - Traps MLD Multicast Listener Query packets
+   * - ``mld_v1_report``
+     - ``control``
+     - Traps MLD Version 1 Multicast Listener Report packets
+   * - ``mld_v2_report``
+     - ``control``
+     - Traps MLD Version 2 Multicast Listener Report packets
+   * - ``mld_v1_done``
+     - ``control``
+     - Traps MLD Version 1 Multicast Listener Done packets
 
 Driver-specific Packet Traps
 ============================
@@ -299,6 +335,15 @@ narrow. The description of these groups must be added to the following table:
    * - ``acl_drops``
      - Contains packet traps for packets that were dropped by the device during
        ACL processing
+   * - ``stp``
+     - Contains packet traps for STP packets
+   * - ``lacp``
+     - Contains packet traps for LACP packets
+   * - ``lldp``
+     - Contains packet traps for LLDP packets
+   * - ``mc_snooping``
+     - Contains packet traps for IGMP and MLD packets required for multicast
+       snooping
 
 Packet Trap Policers
 ====================
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 851388c9d795..c0061542ad65 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -645,6 +645,18 @@ enum devlink_trap_generic_id {
 	DEVLINK_TRAP_GENERIC_ID_OVERLAY_SMAC_MC,
 	DEVLINK_TRAP_GENERIC_ID_INGRESS_FLOW_ACTION_DROP,
 	DEVLINK_TRAP_GENERIC_ID_EGRESS_FLOW_ACTION_DROP,
+	DEVLINK_TRAP_GENERIC_ID_STP,
+	DEVLINK_TRAP_GENERIC_ID_LACP,
+	DEVLINK_TRAP_GENERIC_ID_LLDP,
+	DEVLINK_TRAP_GENERIC_ID_IGMP_QUERY,
+	DEVLINK_TRAP_GENERIC_ID_IGMP_V1_REPORT,
+	DEVLINK_TRAP_GENERIC_ID_IGMP_V2_REPORT,
+	DEVLINK_TRAP_GENERIC_ID_IGMP_V3_REPORT,
+	DEVLINK_TRAP_GENERIC_ID_IGMP_V2_LEAVE,
+	DEVLINK_TRAP_GENERIC_ID_MLD_QUERY,
+	DEVLINK_TRAP_GENERIC_ID_MLD_V1_REPORT,
+	DEVLINK_TRAP_GENERIC_ID_MLD_V2_REPORT,
+	DEVLINK_TRAP_GENERIC_ID_MLD_V1_DONE,
 
 	/* Add new generic trap IDs above */
 	__DEVLINK_TRAP_GENERIC_ID_MAX,
@@ -661,6 +673,10 @@ enum devlink_trap_group_generic_id {
 	DEVLINK_TRAP_GROUP_GENERIC_ID_BUFFER_DROPS,
 	DEVLINK_TRAP_GROUP_GENERIC_ID_TUNNEL_DROPS,
 	DEVLINK_TRAP_GROUP_GENERIC_ID_ACL_DROPS,
+	DEVLINK_TRAP_GROUP_GENERIC_ID_STP,
+	DEVLINK_TRAP_GROUP_GENERIC_ID_LACP,
+	DEVLINK_TRAP_GROUP_GENERIC_ID_LLDP,
+	DEVLINK_TRAP_GROUP_GENERIC_ID_MC_SNOOPING,
 
 	/* Add new generic trap group IDs above */
 	__DEVLINK_TRAP_GROUP_GENERIC_ID_MAX,
@@ -726,6 +742,30 @@ enum devlink_trap_group_generic_id {
 	"ingress_flow_action_drop"
 #define DEVLINK_TRAP_GENERIC_NAME_EGRESS_FLOW_ACTION_DROP \
 	"egress_flow_action_drop"
+#define DEVLINK_TRAP_GENERIC_NAME_STP \
+	"stp"
+#define DEVLINK_TRAP_GENERIC_NAME_LACP \
+	"lacp"
+#define DEVLINK_TRAP_GENERIC_NAME_LLDP \
+	"lldp"
+#define DEVLINK_TRAP_GENERIC_NAME_IGMP_QUERY \
+	"igmp_query"
+#define DEVLINK_TRAP_GENERIC_NAME_IGMP_V1_REPORT \
+	"igmp_v1_report"
+#define DEVLINK_TRAP_GENERIC_NAME_IGMP_V2_REPORT \
+	"igmp_v2_report"
+#define DEVLINK_TRAP_GENERIC_NAME_IGMP_V3_REPORT \
+	"igmp_v3_report"
+#define DEVLINK_TRAP_GENERIC_NAME_IGMP_V2_LEAVE \
+	"igmp_v2_leave"
+#define DEVLINK_TRAP_GENERIC_NAME_MLD_QUERY \
+	"mld_query"
+#define DEVLINK_TRAP_GENERIC_NAME_MLD_V1_REPORT \
+	"mld_v1_report"
+#define DEVLINK_TRAP_GENERIC_NAME_MLD_V2_REPORT \
+	"mld_v2_report"
+#define DEVLINK_TRAP_GENERIC_NAME_MLD_V1_DONE \
+	"mld_v1_done"
 
 #define DEVLINK_TRAP_GROUP_GENERIC_NAME_L2_DROPS \
 	"l2_drops"
@@ -739,6 +779,14 @@ enum devlink_trap_group_generic_id {
 	"tunnel_drops"
 #define DEVLINK_TRAP_GROUP_GENERIC_NAME_ACL_DROPS \
 	"acl_drops"
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_STP \
+	"stp"
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_LACP \
+	"lacp"
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_LLDP \
+	"lldp"
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_MC_SNOOPING  \
+	"mc_snooping"
 
 #define DEVLINK_TRAP_GENERIC(_type, _init_action, _id, _group_id,	      \
 			     _metadata_cap)				      \
diff --git a/net/core/devlink.c b/net/core/devlink.c
index 47c28e0f848f..c91ef1b5f738 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -8495,6 +8495,18 @@ static const struct devlink_trap devlink_trap_generic[] = {
 	DEVLINK_TRAP(OVERLAY_SMAC_MC, DROP),
 	DEVLINK_TRAP(INGRESS_FLOW_ACTION_DROP, DROP),
 	DEVLINK_TRAP(EGRESS_FLOW_ACTION_DROP, DROP),
+	DEVLINK_TRAP(STP, CONTROL),
+	DEVLINK_TRAP(LACP, CONTROL),
+	DEVLINK_TRAP(LLDP, CONTROL),
+	DEVLINK_TRAP(IGMP_QUERY, CONTROL),
+	DEVLINK_TRAP(IGMP_V1_REPORT, CONTROL),
+	DEVLINK_TRAP(IGMP_V2_REPORT, CONTROL),
+	DEVLINK_TRAP(IGMP_V3_REPORT, CONTROL),
+	DEVLINK_TRAP(IGMP_V2_LEAVE, CONTROL),
+	DEVLINK_TRAP(MLD_QUERY, CONTROL),
+	DEVLINK_TRAP(MLD_V1_REPORT, CONTROL),
+	DEVLINK_TRAP(MLD_V2_REPORT, CONTROL),
+	DEVLINK_TRAP(MLD_V1_DONE, CONTROL),
 };
 
 #define DEVLINK_TRAP_GROUP(_id)						      \
@@ -8510,6 +8522,10 @@ static const struct devlink_trap_group devlink_trap_group_generic[] = {
 	DEVLINK_TRAP_GROUP(BUFFER_DROPS),
 	DEVLINK_TRAP_GROUP(TUNNEL_DROPS),
 	DEVLINK_TRAP_GROUP(ACL_DROPS),
+	DEVLINK_TRAP_GROUP(STP),
+	DEVLINK_TRAP_GROUP(LACP),
+	DEVLINK_TRAP_GROUP(LLDP),
+	DEVLINK_TRAP_GROUP(MC_SNOOPING),
 };
 
 static int devlink_trap_generic_verify(const struct devlink_trap *trap)
-- 
cgit v1.2.3-59-g8ed1b


From d77cfd162a346259222d0207a95bf1a0cc0c2520 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Fri, 29 May 2020 21:36:42 +0300
Subject: devlink: Add layer 3 control packet traps

Add layer 3 control packet traps such as ARP and DHCP, so that capable
device drivers could register them with devlink. Add documentation for
every added packet trap and packet trap group.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/devlink/devlink-trap.rst | 143 ++++++++++++++++++++++
 include/net/devlink.h                             | 126 +++++++++++++++++++
 net/core/devlink.c                                |  42 +++++++
 3 files changed, 311 insertions(+)

diff --git a/Documentation/networking/devlink/devlink-trap.rst b/Documentation/networking/devlink/devlink-trap.rst
index e9fc3c9d7d7a..621b634b16be 100644
--- a/Documentation/networking/devlink/devlink-trap.rst
+++ b/Documentation/networking/devlink/devlink-trap.rst
@@ -288,6 +288,115 @@ be added to the following table:
    * - ``mld_v1_done``
      - ``control``
      - Traps MLD Version 1 Multicast Listener Done packets
+   * - ``ipv4_dhcp``
+     - ``control``
+     - Traps IPv4 DHCP packets
+   * - ``ipv6_dhcp``
+     - ``control``
+     - Traps IPv6 DHCP packets
+   * - ``arp_request``
+     - ``control``
+     - Traps ARP request packets
+   * - ``arp_response``
+     - ``control``
+     - Traps ARP response packets
+   * - ``arp_overlay``
+     - ``control``
+     - Traps NVE-decapsulated ARP packets that reached the overlay network.
+       This is required, for example, when the address that needs to be
+       resolved is a local address
+   * - ``ipv6_neigh_solicit``
+     - ``control``
+     - Traps IPv6 Neighbour Solicitation packets
+   * - ``ipv6_neigh_advert``
+     - ``control``
+     - Traps IPv6 Neighbour Advertisement packets
+   * - ``ipv4_bfd``
+     - ``control``
+     - Traps IPv4 BFD packets
+   * - ``ipv6_bfd``
+     - ``control``
+     - Traps IPv6 BFD packets
+   * - ``ipv4_ospf``
+     - ``control``
+     - Traps IPv4 OSPF packets
+   * - ``ipv6_ospf``
+     - ``control``
+     - Traps IPv6 OSPF packets
+   * - ``ipv4_bgp``
+     - ``control``
+     - Traps IPv4 BGP packets
+   * - ``ipv6_bgp``
+     - ``control``
+     - Traps IPv6 BGP packets
+   * - ``ipv4_vrrp``
+     - ``control``
+     - Traps IPv4 VRRP packets
+   * - ``ipv6_vrrp``
+     - ``control``
+     - Traps IPv6 VRRP packets
+   * - ``ipv4_pim``
+     - ``control``
+     - Traps IPv4 PIM packets
+   * - ``ipv6_pim``
+     - ``control``
+     - Traps IPv6 PIM packets
+   * - ``uc_loopback``
+     - ``control``
+     - Traps unicast packets that need to be routed through the same layer 3
+       interface from which they were received. Such packets are routed by the
+       kernel, but also cause it to potentially generate ICMP redirect packets
+   * - ``local_route``
+     - ``control``
+     - Traps unicast packets that hit a local route and need to be locally
+       delivered
+   * - ``external_route``
+     - ``control``
+     - Traps packets that should be routed through an external interface (e.g.,
+       management interface) that does not belong to the same device (e.g.,
+       switch ASIC) as the ingress interface
+   * - ``ipv6_uc_dip_link_local_scope``
+     - ``control``
+     - Traps unicast IPv6 packets that need to be routed and have a destination
+       IP address with a link-local scope (i.e., fe80::/10). The trap allows
+       device drivers to avoid programming link-local routes, but still receive
+       packets for local delivery
+   * - ``ipv6_dip_all_nodes``
+     - ``control``
+     - Traps IPv6 packets that their destination IP address is the "All Nodes
+       Address" (i.e., ff02::1)
+   * - ``ipv6_dip_all_routers``
+     - ``control``
+     - Traps IPv6 packets that their destination IP address is the "All Routers
+       Address" (i.e., ff02::2)
+   * - ``ipv6_router_solicit``
+     - ``control``
+     - Traps IPv6 Router Solicitation packets
+   * - ``ipv6_router_advert``
+     - ``control``
+     - Traps IPv6 Router Advertisement packets
+   * - ``ipv6_redirect``
+     - ``control``
+     - Traps IPv6 Redirect Message packets
+   * - ``ipv4_router_alert``
+     - ``control``
+     - Traps IPv4 packets that need to be routed and include the Router Alert
+       option. Such packets need to be locally delivered to raw sockets that
+       have the IP_ROUTER_ALERT socket option set
+   * - ``ipv6_router_alert``
+     - ``control``
+     - Traps IPv6 packets that need to be routed and include the Router Alert
+       option in their Hop-by-Hop extension header. Such packets need to be
+       locally delivered to raw sockets that have the IPV6_ROUTER_ALERT socket
+       option set
+   * - ``ptp_event``
+     - ``control``
+     - Traps PTP time-critical event messages (Sync, Delay_req, Pdelay_Req and
+       Pdelay_Resp)
+   * - ``ptp_general``
+     - ``control``
+     - Traps PTP general messages (Announce, Follow_Up, Delay_Resp,
+       Pdelay_Resp_Follow_Up, management and signaling)
 
 Driver-specific Packet Traps
 ============================
@@ -344,6 +453,40 @@ narrow. The description of these groups must be added to the following table:
    * - ``mc_snooping``
      - Contains packet traps for IGMP and MLD packets required for multicast
        snooping
+   * - ``dhcp``
+     - Contains packet traps for DHCP packets
+   * - ``neigh_discovery``
+     - Contains packet traps for neighbour discovery packets (e.g., ARP, IPv6
+       ND)
+   * - ``bfd``
+     - Contains packet traps for BFD packets
+   * - ``ospf``
+     - Contains packet traps for OSPF packets
+   * - ``bgp``
+     - Contains packet traps for BGP packets
+   * - ``vrrp``
+     - Contains packet traps for VRRP packets
+   * - ``pim``
+     - Contains packet traps for PIM packets
+   * - ``uc_loopback``
+     - Contains a packet trap for unicast loopback packets (i.e.,
+       ``uc_loopback``). This trap is singled-out because in cases such as
+       one-armed router it will be constantly triggered. To limit the impact on
+       the CPU usage, a packet trap policer with a low rate can be bound to the
+       group without affecting other traps
+   * - ``local_delivery``
+     - Contains packet traps for packets that should be locally delivered after
+       routing, but do not match more specific packet traps (e.g.,
+       ``ipv4_bgp``)
+   * - ``ipv6``
+     - Contains packet traps for various IPv6 control packets (e.g., Router
+       Advertisements)
+   * - ``ptp_event``
+     - Contains packet traps for PTP time-critical event messages (Sync,
+       Delay_req, Pdelay_Req and Pdelay_Resp)
+   * - ``ptp_general``
+     - Contains packet traps for PTP general messages (Announce, Follow_Up,
+       Delay_Resp, Pdelay_Resp_Follow_Up, management and signaling)
 
 Packet Trap Policers
 ====================
diff --git a/include/net/devlink.h b/include/net/devlink.h
index c0061542ad65..05a45dea976b 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -657,6 +657,36 @@ enum devlink_trap_generic_id {
 	DEVLINK_TRAP_GENERIC_ID_MLD_V1_REPORT,
 	DEVLINK_TRAP_GENERIC_ID_MLD_V2_REPORT,
 	DEVLINK_TRAP_GENERIC_ID_MLD_V1_DONE,
+	DEVLINK_TRAP_GENERIC_ID_IPV4_DHCP,
+	DEVLINK_TRAP_GENERIC_ID_IPV6_DHCP,
+	DEVLINK_TRAP_GENERIC_ID_ARP_REQUEST,
+	DEVLINK_TRAP_GENERIC_ID_ARP_RESPONSE,
+	DEVLINK_TRAP_GENERIC_ID_ARP_OVERLAY,
+	DEVLINK_TRAP_GENERIC_ID_IPV6_NEIGH_SOLICIT,
+	DEVLINK_TRAP_GENERIC_ID_IPV6_NEIGH_ADVERT,
+	DEVLINK_TRAP_GENERIC_ID_IPV4_BFD,
+	DEVLINK_TRAP_GENERIC_ID_IPV6_BFD,
+	DEVLINK_TRAP_GENERIC_ID_IPV4_OSPF,
+	DEVLINK_TRAP_GENERIC_ID_IPV6_OSPF,
+	DEVLINK_TRAP_GENERIC_ID_IPV4_BGP,
+	DEVLINK_TRAP_GENERIC_ID_IPV6_BGP,
+	DEVLINK_TRAP_GENERIC_ID_IPV4_VRRP,
+	DEVLINK_TRAP_GENERIC_ID_IPV6_VRRP,
+	DEVLINK_TRAP_GENERIC_ID_IPV4_PIM,
+	DEVLINK_TRAP_GENERIC_ID_IPV6_PIM,
+	DEVLINK_TRAP_GENERIC_ID_UC_LB,
+	DEVLINK_TRAP_GENERIC_ID_LOCAL_ROUTE,
+	DEVLINK_TRAP_GENERIC_ID_EXTERNAL_ROUTE,
+	DEVLINK_TRAP_GENERIC_ID_IPV6_UC_DIP_LINK_LOCAL_SCOPE,
+	DEVLINK_TRAP_GENERIC_ID_IPV6_DIP_ALL_NODES,
+	DEVLINK_TRAP_GENERIC_ID_IPV6_DIP_ALL_ROUTERS,
+	DEVLINK_TRAP_GENERIC_ID_IPV6_ROUTER_SOLICIT,
+	DEVLINK_TRAP_GENERIC_ID_IPV6_ROUTER_ADVERT,
+	DEVLINK_TRAP_GENERIC_ID_IPV6_REDIRECT,
+	DEVLINK_TRAP_GENERIC_ID_IPV4_ROUTER_ALERT,
+	DEVLINK_TRAP_GENERIC_ID_IPV6_ROUTER_ALERT,
+	DEVLINK_TRAP_GENERIC_ID_PTP_EVENT,
+	DEVLINK_TRAP_GENERIC_ID_PTP_GENERAL,
 
 	/* Add new generic trap IDs above */
 	__DEVLINK_TRAP_GENERIC_ID_MAX,
@@ -677,6 +707,18 @@ enum devlink_trap_group_generic_id {
 	DEVLINK_TRAP_GROUP_GENERIC_ID_LACP,
 	DEVLINK_TRAP_GROUP_GENERIC_ID_LLDP,
 	DEVLINK_TRAP_GROUP_GENERIC_ID_MC_SNOOPING,
+	DEVLINK_TRAP_GROUP_GENERIC_ID_DHCP,
+	DEVLINK_TRAP_GROUP_GENERIC_ID_NEIGH_DISCOVERY,
+	DEVLINK_TRAP_GROUP_GENERIC_ID_BFD,
+	DEVLINK_TRAP_GROUP_GENERIC_ID_OSPF,
+	DEVLINK_TRAP_GROUP_GENERIC_ID_BGP,
+	DEVLINK_TRAP_GROUP_GENERIC_ID_VRRP,
+	DEVLINK_TRAP_GROUP_GENERIC_ID_PIM,
+	DEVLINK_TRAP_GROUP_GENERIC_ID_UC_LB,
+	DEVLINK_TRAP_GROUP_GENERIC_ID_LOCAL_DELIVERY,
+	DEVLINK_TRAP_GROUP_GENERIC_ID_IPV6,
+	DEVLINK_TRAP_GROUP_GENERIC_ID_PTP_EVENT,
+	DEVLINK_TRAP_GROUP_GENERIC_ID_PTP_GENERAL,
 
 	/* Add new generic trap group IDs above */
 	__DEVLINK_TRAP_GROUP_GENERIC_ID_MAX,
@@ -766,6 +808,66 @@ enum devlink_trap_group_generic_id {
 	"mld_v2_report"
 #define DEVLINK_TRAP_GENERIC_NAME_MLD_V1_DONE \
 	"mld_v1_done"
+#define DEVLINK_TRAP_GENERIC_NAME_IPV4_DHCP \
+	"ipv4_dhcp"
+#define DEVLINK_TRAP_GENERIC_NAME_IPV6_DHCP \
+	"ipv6_dhcp"
+#define DEVLINK_TRAP_GENERIC_NAME_ARP_REQUEST \
+	"arp_request"
+#define DEVLINK_TRAP_GENERIC_NAME_ARP_RESPONSE \
+	"arp_response"
+#define DEVLINK_TRAP_GENERIC_NAME_ARP_OVERLAY \
+	"arp_overlay"
+#define DEVLINK_TRAP_GENERIC_NAME_IPV6_NEIGH_SOLICIT \
+	"ipv6_neigh_solicit"
+#define DEVLINK_TRAP_GENERIC_NAME_IPV6_NEIGH_ADVERT \
+	"ipv6_neigh_advert"
+#define DEVLINK_TRAP_GENERIC_NAME_IPV4_BFD \
+	"ipv4_bfd"
+#define DEVLINK_TRAP_GENERIC_NAME_IPV6_BFD \
+	"ipv6_bfd"
+#define DEVLINK_TRAP_GENERIC_NAME_IPV4_OSPF \
+	"ipv4_ospf"
+#define DEVLINK_TRAP_GENERIC_NAME_IPV6_OSPF \
+	"ipv6_ospf"
+#define DEVLINK_TRAP_GENERIC_NAME_IPV4_BGP \
+	"ipv4_bgp"
+#define DEVLINK_TRAP_GENERIC_NAME_IPV6_BGP \
+	"ipv6_bgp"
+#define DEVLINK_TRAP_GENERIC_NAME_IPV4_VRRP \
+	"ipv4_vrrp"
+#define DEVLINK_TRAP_GENERIC_NAME_IPV6_VRRP \
+	"ipv6_vrrp"
+#define DEVLINK_TRAP_GENERIC_NAME_IPV4_PIM \
+	"ipv4_pim"
+#define DEVLINK_TRAP_GENERIC_NAME_IPV6_PIM \
+	"ipv6_pim"
+#define DEVLINK_TRAP_GENERIC_NAME_UC_LB \
+	"uc_loopback"
+#define DEVLINK_TRAP_GENERIC_NAME_LOCAL_ROUTE \
+	"local_route"
+#define DEVLINK_TRAP_GENERIC_NAME_EXTERNAL_ROUTE \
+	"external_route"
+#define DEVLINK_TRAP_GENERIC_NAME_IPV6_UC_DIP_LINK_LOCAL_SCOPE \
+	"ipv6_uc_dip_link_local_scope"
+#define DEVLINK_TRAP_GENERIC_NAME_IPV6_DIP_ALL_NODES \
+	"ipv6_dip_all_nodes"
+#define DEVLINK_TRAP_GENERIC_NAME_IPV6_DIP_ALL_ROUTERS \
+	"ipv6_dip_all_routers"
+#define DEVLINK_TRAP_GENERIC_NAME_IPV6_ROUTER_SOLICIT \
+	"ipv6_router_solicit"
+#define DEVLINK_TRAP_GENERIC_NAME_IPV6_ROUTER_ADVERT \
+	"ipv6_router_advert"
+#define DEVLINK_TRAP_GENERIC_NAME_IPV6_REDIRECT \
+	"ipv6_redirect"
+#define DEVLINK_TRAP_GENERIC_NAME_IPV4_ROUTER_ALERT \
+	"ipv4_router_alert"
+#define DEVLINK_TRAP_GENERIC_NAME_IPV6_ROUTER_ALERT \
+	"ipv6_router_alert"
+#define DEVLINK_TRAP_GENERIC_NAME_PTP_EVENT \
+	"ptp_event"
+#define DEVLINK_TRAP_GENERIC_NAME_PTP_GENERAL \
+	"ptp_general"
 
 #define DEVLINK_TRAP_GROUP_GENERIC_NAME_L2_DROPS \
 	"l2_drops"
@@ -787,6 +889,30 @@ enum devlink_trap_group_generic_id {
 	"lldp"
 #define DEVLINK_TRAP_GROUP_GENERIC_NAME_MC_SNOOPING  \
 	"mc_snooping"
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_DHCP \
+	"dhcp"
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_NEIGH_DISCOVERY \
+	"neigh_discovery"
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_BFD \
+	"bfd"
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_OSPF \
+	"ospf"
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_BGP \
+	"bgp"
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_VRRP \
+	"vrrp"
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_PIM \
+	"pim"
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_UC_LB \
+	"uc_loopback"
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_LOCAL_DELIVERY \
+	"local_delivery"
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_IPV6 \
+	"ipv6"
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_PTP_EVENT \
+	"ptp_event"
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_PTP_GENERAL \
+	"ptp_general"
 
 #define DEVLINK_TRAP_GENERIC(_type, _init_action, _id, _group_id,	      \
 			     _metadata_cap)				      \
diff --git a/net/core/devlink.c b/net/core/devlink.c
index c91ef1b5f738..f32854c3d0e7 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -8507,6 +8507,36 @@ static const struct devlink_trap devlink_trap_generic[] = {
 	DEVLINK_TRAP(MLD_V1_REPORT, CONTROL),
 	DEVLINK_TRAP(MLD_V2_REPORT, CONTROL),
 	DEVLINK_TRAP(MLD_V1_DONE, CONTROL),
+	DEVLINK_TRAP(IPV4_DHCP, CONTROL),
+	DEVLINK_TRAP(IPV6_DHCP, CONTROL),
+	DEVLINK_TRAP(ARP_REQUEST, CONTROL),
+	DEVLINK_TRAP(ARP_RESPONSE, CONTROL),
+	DEVLINK_TRAP(ARP_OVERLAY, CONTROL),
+	DEVLINK_TRAP(IPV6_NEIGH_SOLICIT, CONTROL),
+	DEVLINK_TRAP(IPV6_NEIGH_ADVERT, CONTROL),
+	DEVLINK_TRAP(IPV4_BFD, CONTROL),
+	DEVLINK_TRAP(IPV6_BFD, CONTROL),
+	DEVLINK_TRAP(IPV4_OSPF, CONTROL),
+	DEVLINK_TRAP(IPV6_OSPF, CONTROL),
+	DEVLINK_TRAP(IPV4_BGP, CONTROL),
+	DEVLINK_TRAP(IPV6_BGP, CONTROL),
+	DEVLINK_TRAP(IPV4_VRRP, CONTROL),
+	DEVLINK_TRAP(IPV6_VRRP, CONTROL),
+	DEVLINK_TRAP(IPV4_PIM, CONTROL),
+	DEVLINK_TRAP(IPV6_PIM, CONTROL),
+	DEVLINK_TRAP(UC_LB, CONTROL),
+	DEVLINK_TRAP(LOCAL_ROUTE, CONTROL),
+	DEVLINK_TRAP(EXTERNAL_ROUTE, CONTROL),
+	DEVLINK_TRAP(IPV6_UC_DIP_LINK_LOCAL_SCOPE, CONTROL),
+	DEVLINK_TRAP(IPV6_DIP_ALL_NODES, CONTROL),
+	DEVLINK_TRAP(IPV6_DIP_ALL_ROUTERS, CONTROL),
+	DEVLINK_TRAP(IPV6_ROUTER_SOLICIT, CONTROL),
+	DEVLINK_TRAP(IPV6_ROUTER_ADVERT, CONTROL),
+	DEVLINK_TRAP(IPV6_REDIRECT, CONTROL),
+	DEVLINK_TRAP(IPV4_ROUTER_ALERT, CONTROL),
+	DEVLINK_TRAP(IPV6_ROUTER_ALERT, CONTROL),
+	DEVLINK_TRAP(PTP_EVENT, CONTROL),
+	DEVLINK_TRAP(PTP_GENERAL, CONTROL),
 };
 
 #define DEVLINK_TRAP_GROUP(_id)						      \
@@ -8526,6 +8556,18 @@ static const struct devlink_trap_group devlink_trap_group_generic[] = {
 	DEVLINK_TRAP_GROUP(LACP),
 	DEVLINK_TRAP_GROUP(LLDP),
 	DEVLINK_TRAP_GROUP(MC_SNOOPING),
+	DEVLINK_TRAP_GROUP(DHCP),
+	DEVLINK_TRAP_GROUP(NEIGH_DISCOVERY),
+	DEVLINK_TRAP_GROUP(BFD),
+	DEVLINK_TRAP_GROUP(OSPF),
+	DEVLINK_TRAP_GROUP(BGP),
+	DEVLINK_TRAP_GROUP(VRRP),
+	DEVLINK_TRAP_GROUP(PIM),
+	DEVLINK_TRAP_GROUP(UC_LB),
+	DEVLINK_TRAP_GROUP(LOCAL_DELIVERY),
+	DEVLINK_TRAP_GROUP(IPV6),
+	DEVLINK_TRAP_GROUP(PTP_EVENT),
+	DEVLINK_TRAP_GROUP(PTP_GENERAL),
 };
 
 static int devlink_trap_generic_verify(const struct devlink_trap *trap)
-- 
cgit v1.2.3-59-g8ed1b


From 5eb18a2b6c11bf165271644ef1ab812b10659c8f Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Fri, 29 May 2020 21:36:43 +0300
Subject: devlink: Add ACL control packet traps

Add packet traps for packets that are sampled / trapped by ACLs, so that
capable drivers could register them with devlink. Add documentation for
every added packet trap and packet trap group.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 Documentation/networking/devlink/devlink-trap.rst | 14 ++++++++++++++
 include/net/devlink.h                             | 12 ++++++++++++
 net/core/devlink.c                                |  4 ++++
 3 files changed, 30 insertions(+)

diff --git a/Documentation/networking/devlink/devlink-trap.rst b/Documentation/networking/devlink/devlink-trap.rst
index 621b634b16be..1e3f3ffee248 100644
--- a/Documentation/networking/devlink/devlink-trap.rst
+++ b/Documentation/networking/devlink/devlink-trap.rst
@@ -397,6 +397,14 @@ be added to the following table:
      - ``control``
      - Traps PTP general messages (Announce, Follow_Up, Delay_Resp,
        Pdelay_Resp_Follow_Up, management and signaling)
+   * - ``flow_action_sample``
+     - ``control``
+     - Traps packets sampled during processing of flow action sample (e.g., via
+       tc's sample action)
+   * - ``flow_action_trap``
+     - ``control``
+     - Traps packets logged during processing of flow action trap (e.g., via
+       tc's trap action)
 
 Driver-specific Packet Traps
 ============================
@@ -487,6 +495,12 @@ narrow. The description of these groups must be added to the following table:
    * - ``ptp_general``
      - Contains packet traps for PTP general messages (Announce, Follow_Up,
        Delay_Resp, Pdelay_Resp_Follow_Up, management and signaling)
+   * - ``acl_sample``
+     - Contains packet traps for packets that were sampled by the device during
+       ACL processing
+   * - ``acl_trap``
+     - Contains packet traps for packets that were trapped (logged) by the
+       device during ACL processing
 
 Packet Trap Policers
 ====================
diff --git a/include/net/devlink.h b/include/net/devlink.h
index 05a45dea976b..1df6dfec26c2 100644
--- a/include/net/devlink.h
+++ b/include/net/devlink.h
@@ -687,6 +687,8 @@ enum devlink_trap_generic_id {
 	DEVLINK_TRAP_GENERIC_ID_IPV6_ROUTER_ALERT,
 	DEVLINK_TRAP_GENERIC_ID_PTP_EVENT,
 	DEVLINK_TRAP_GENERIC_ID_PTP_GENERAL,
+	DEVLINK_TRAP_GENERIC_ID_FLOW_ACTION_SAMPLE,
+	DEVLINK_TRAP_GENERIC_ID_FLOW_ACTION_TRAP,
 
 	/* Add new generic trap IDs above */
 	__DEVLINK_TRAP_GENERIC_ID_MAX,
@@ -719,6 +721,8 @@ enum devlink_trap_group_generic_id {
 	DEVLINK_TRAP_GROUP_GENERIC_ID_IPV6,
 	DEVLINK_TRAP_GROUP_GENERIC_ID_PTP_EVENT,
 	DEVLINK_TRAP_GROUP_GENERIC_ID_PTP_GENERAL,
+	DEVLINK_TRAP_GROUP_GENERIC_ID_ACL_SAMPLE,
+	DEVLINK_TRAP_GROUP_GENERIC_ID_ACL_TRAP,
 
 	/* Add new generic trap group IDs above */
 	__DEVLINK_TRAP_GROUP_GENERIC_ID_MAX,
@@ -868,6 +872,10 @@ enum devlink_trap_group_generic_id {
 	"ptp_event"
 #define DEVLINK_TRAP_GENERIC_NAME_PTP_GENERAL \
 	"ptp_general"
+#define DEVLINK_TRAP_GENERIC_NAME_FLOW_ACTION_SAMPLE \
+	"flow_action_sample"
+#define DEVLINK_TRAP_GENERIC_NAME_FLOW_ACTION_TRAP \
+	"flow_action_trap"
 
 #define DEVLINK_TRAP_GROUP_GENERIC_NAME_L2_DROPS \
 	"l2_drops"
@@ -913,6 +921,10 @@ enum devlink_trap_group_generic_id {
 	"ptp_event"
 #define DEVLINK_TRAP_GROUP_GENERIC_NAME_PTP_GENERAL \
 	"ptp_general"
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_ACL_SAMPLE \
+	"acl_sample"
+#define DEVLINK_TRAP_GROUP_GENERIC_NAME_ACL_TRAP \
+	"acl_trap"
 
 #define DEVLINK_TRAP_GENERIC(_type, _init_action, _id, _group_id,	      \
 			     _metadata_cap)				      \
diff --git a/net/core/devlink.c b/net/core/devlink.c
index f32854c3d0e7..2cafbc808b09 100644
--- a/net/core/devlink.c
+++ b/net/core/devlink.c
@@ -8537,6 +8537,8 @@ static const struct devlink_trap devlink_trap_generic[] = {
 	DEVLINK_TRAP(IPV6_ROUTER_ALERT, CONTROL),
 	DEVLINK_TRAP(PTP_EVENT, CONTROL),
 	DEVLINK_TRAP(PTP_GENERAL, CONTROL),
+	DEVLINK_TRAP(FLOW_ACTION_SAMPLE, CONTROL),
+	DEVLINK_TRAP(FLOW_ACTION_TRAP, CONTROL),
 };
 
 #define DEVLINK_TRAP_GROUP(_id)						      \
@@ -8568,6 +8570,8 @@ static const struct devlink_trap_group devlink_trap_group_generic[] = {
 	DEVLINK_TRAP_GROUP(IPV6),
 	DEVLINK_TRAP_GROUP(PTP_EVENT),
 	DEVLINK_TRAP_GROUP(PTP_GENERAL),
+	DEVLINK_TRAP_GROUP(ACL_SAMPLE),
+	DEVLINK_TRAP_GROUP(ACL_TRAP),
 };
 
 static int devlink_trap_generic_verify(const struct devlink_trap *trap)
-- 
cgit v1.2.3-59-g8ed1b


From 1897936744f0ab366102170d7c76bfc8f7aeb2ba Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Fri, 29 May 2020 21:36:44 +0300
Subject: netdevsim: Register control traps

Register two control traps with devlink. The existing selftest at
tools/testing/selftests/drivers/net/netdevsim/devlink_trap.sh iterates
over all registered traps and checks that the action of non-drop traps
cannot be changed. Up until now only exception traps were tested, now
control traps will be tested as well.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/netdevsim/dev.c | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/drivers/net/netdevsim/dev.c b/drivers/net/netdevsim/dev.c
index 09d947eff980..ec6b6f7818ac 100644
--- a/drivers/net/netdevsim/dev.c
+++ b/drivers/net/netdevsim/dev.c
@@ -431,6 +431,10 @@ enum {
 	DEVLINK_TRAP_GENERIC(EXCEPTION, TRAP, _id,			      \
 			     DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id,	      \
 			     NSIM_TRAP_METADATA)
+#define NSIM_TRAP_CONTROL(_id, _group_id, _action)			      \
+	DEVLINK_TRAP_GENERIC(CONTROL, _action, _id,			      \
+			     DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id,	      \
+			     NSIM_TRAP_METADATA)
 #define NSIM_TRAP_DRIVER_EXCEPTION(_id, _group_id)			      \
 	DEVLINK_TRAP_DRIVER(EXCEPTION, TRAP, NSIM_TRAP_ID_##_id,	      \
 			    NSIM_TRAP_NAME_##_id,			      \
@@ -461,6 +465,7 @@ static const struct devlink_trap_group nsim_trap_groups_arr[] = {
 	DEVLINK_TRAP_GROUP_GENERIC(L3_EXCEPTIONS, 1),
 	DEVLINK_TRAP_GROUP_GENERIC(BUFFER_DROPS, 2),
 	DEVLINK_TRAP_GROUP_GENERIC(ACL_DROPS, 3),
+	DEVLINK_TRAP_GROUP_GENERIC(MC_SNOOPING, 3),
 };
 
 static const struct devlink_trap nsim_traps_arr[] = {
@@ -478,6 +483,8 @@ static const struct devlink_trap nsim_traps_arr[] = {
 			   DEVLINK_TRAP_METADATA_TYPE_F_FA_COOKIE),
 	NSIM_TRAP_DROP_EXT(EGRESS_FLOW_ACTION_DROP, ACL_DROPS,
 			   DEVLINK_TRAP_METADATA_TYPE_F_FA_COOKIE),
+	NSIM_TRAP_CONTROL(IGMP_QUERY, MC_SNOOPING, MIRROR),
+	NSIM_TRAP_CONTROL(IGMP_V1_REPORT, MC_SNOOPING, TRAP),
 };
 
 #define NSIM_TRAP_L4_DATA_LEN 100
-- 
cgit v1.2.3-59-g8ed1b


From 45b1c87313cd2ab2843edd4e6467e3d6458e0c68 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Fri, 29 May 2020 21:36:45 +0300
Subject: mlxsw: spectrum_trap: Factor out common Rx listener function

We currently have an Rx listener function for exception traps that marks
received skbs with 'offload_fwd_mark' and injects them to the kernel's
Rx path. The marking is done because all these exceptions occur during
L3 forwarding, after the packets were potentially flooded at L2.

A subsequent patch will add support for control traps. Packets received
via some of these control traps need different handling:

1. Packets might not need to be marked with 'offload_fwd_mark'. For
   example, if packet was trapped before L2 forwarding

2. Packets might not need to be injected to the kernel's Rx path. For
   example, sampled packets are reported to user space via the psample
   module

Factor out a common Rx listener function that only reports trapped
packets to devlink. Call it from mlxsw_sp_rx_no_mark_listener() and
mlxsw_sp_rx_mark_listener() that will inject the packets to the kernel's
Rx path, without and with the marking, respectively.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/mellanox/mlxsw/spectrum_trap.c    | 29 ++++++++++++++++++----
 1 file changed, 24 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
index dc2217f1a07f..206751963a4f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
@@ -125,8 +125,8 @@ static void mlxsw_sp_rx_acl_drop_listener(struct sk_buff *skb, u8 local_port,
 	consume_skb(skb);
 }
 
-static void mlxsw_sp_rx_exception_listener(struct sk_buff *skb, u8 local_port,
-					   void *trap_ctx)
+static int __mlxsw_sp_rx_no_mark_listener(struct sk_buff *skb, u8 local_port,
+					  void *trap_ctx)
 {
 	struct devlink_port *in_devlink_port;
 	struct mlxsw_sp_port *mlxsw_sp_port;
@@ -139,7 +139,7 @@ static void mlxsw_sp_rx_exception_listener(struct sk_buff *skb, u8 local_port,
 
 	err = mlxsw_sp_rx_listener(mlxsw_sp, skb, local_port, mlxsw_sp_port);
 	if (err)
-		return;
+		return err;
 
 	devlink = priv_to_devlink(mlxsw_sp->core);
 	in_devlink_port = mlxsw_core_port_devlink_port_get(mlxsw_sp->core,
@@ -147,10 +147,29 @@ static void mlxsw_sp_rx_exception_listener(struct sk_buff *skb, u8 local_port,
 	skb_push(skb, ETH_HLEN);
 	devlink_trap_report(devlink, skb, trap_ctx, in_devlink_port, NULL);
 	skb_pull(skb, ETH_HLEN);
-	skb->offload_fwd_mark = 1;
+
+	return 0;
+}
+
+static void mlxsw_sp_rx_no_mark_listener(struct sk_buff *skb, u8 local_port,
+					 void *trap_ctx)
+{
+	int err;
+
+	err = __mlxsw_sp_rx_no_mark_listener(skb, local_port, trap_ctx);
+	if (err)
+		return;
+
 	netif_receive_skb(skb);
 }
 
+static void mlxsw_sp_rx_mark_listener(struct sk_buff *skb, u8 local_port,
+				      void *trap_ctx)
+{
+	skb->offload_fwd_mark = 1;
+	mlxsw_sp_rx_no_mark_listener(skb, local_port, trap_ctx);
+}
+
 #define MLXSW_SP_TRAP_DROP(_id, _group_id)				      \
 	DEVLINK_TRAP_GENERIC(DROP, DROP, _id,				      \
 			     DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id,	      \
@@ -183,7 +202,7 @@ static void mlxsw_sp_rx_exception_listener(struct sk_buff *skb, u8 local_port,
 		      SET_FW_DEFAULT, SP_##_dis_group_id)
 
 #define MLXSW_SP_RXL_EXCEPTION(_id, _group_id, _action)			      \
-	MLXSW_RXL(mlxsw_sp_rx_exception_listener, _id,			      \
+	MLXSW_RXL(mlxsw_sp_rx_mark_listener, _id,			      \
 		   _action, false, SP_##_group_id, SET_FW_DEFAULT)
 
 #define MLXSW_SP_TRAP_POLICER(_id, _rate, _burst)			      \
-- 
cgit v1.2.3-59-g8ed1b


From 39c10350cfc8ce23faae651877171e354b9006d4 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Fri, 29 May 2020 21:36:46 +0300
Subject: mlxsw: spectrum_trap: Register layer 2 control traps

In a similar fashion to other traps, register layer 2 control traps with
devlink.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c     |  37 +----
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h     |   2 +
 .../net/ethernet/mellanox/mlxsw/spectrum_trap.c    | 151 +++++++++++++++++++++
 3 files changed, 159 insertions(+), 31 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index c598ae9ed106..74925826a2cb 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -4022,6 +4022,12 @@ static void mlxsw_sp_rx_listener_ptp(struct sk_buff *skb, u8 local_port,
 	mlxsw_sp->ptp_ops->receive(mlxsw_sp, skb, local_port);
 }
 
+void mlxsw_sp_ptp_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb,
+			  u8 local_port)
+{
+	mlxsw_sp->ptp_ops->receive(mlxsw_sp, skb, local_port);
+}
+
 #define MLXSW_SP_RXL_NO_MARK(_trap_id, _action, _trap_group, _is_ctrl)	\
 	MLXSW_RXL(mlxsw_sp_rx_listener_no_mark_func, _trap_id, _action,	\
 		  _is_ctrl, SP_##_trap_group, DISCARD)
@@ -4041,26 +4047,9 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
 	/* Events */
 	MLXSW_SP_EVENTL(mlxsw_sp_pude_event_func, PUDE),
 	/* L2 traps */
-	MLXSW_SP_RXL_NO_MARK(STP, TRAP_TO_CPU, STP, true),
-	MLXSW_SP_RXL_NO_MARK(LACP, TRAP_TO_CPU, LACP, true),
-	MLXSW_RXL(mlxsw_sp_rx_listener_ptp, LLDP, TRAP_TO_CPU,
-		  false, SP_LLDP, DISCARD),
-	MLXSW_SP_RXL_MARK(IGMP_QUERY, MIRROR_TO_CPU, MC_SNOOPING, false),
-	MLXSW_SP_RXL_NO_MARK(IGMP_V1_REPORT, TRAP_TO_CPU, MC_SNOOPING, false),
-	MLXSW_SP_RXL_NO_MARK(IGMP_V2_REPORT, TRAP_TO_CPU, MC_SNOOPING, false),
-	MLXSW_SP_RXL_NO_MARK(IGMP_V2_LEAVE, TRAP_TO_CPU, MC_SNOOPING, false),
-	MLXSW_SP_RXL_NO_MARK(IGMP_V3_REPORT, TRAP_TO_CPU, MC_SNOOPING, false),
 	MLXSW_SP_RXL_MARK(ARPBC, MIRROR_TO_CPU, NEIGH_DISCOVERY, false),
 	MLXSW_SP_RXL_MARK(ARPUC, MIRROR_TO_CPU, NEIGH_DISCOVERY, false),
 	MLXSW_SP_RXL_NO_MARK(FID_MISS, TRAP_TO_CPU, FID_MISS, false),
-	MLXSW_SP_RXL_MARK(IPV6_MLDV12_LISTENER_QUERY, MIRROR_TO_CPU,
-			  MC_SNOOPING, false),
-	MLXSW_SP_RXL_NO_MARK(IPV6_MLDV1_LISTENER_REPORT, TRAP_TO_CPU,
-			     MC_SNOOPING, false),
-	MLXSW_SP_RXL_NO_MARK(IPV6_MLDV1_LISTENER_DONE, TRAP_TO_CPU, MC_SNOOPING,
-			     false),
-	MLXSW_SP_RXL_NO_MARK(IPV6_MLDV2_LISTENER_REPORT, TRAP_TO_CPU,
-			     MC_SNOOPING, false),
 	/* L3 traps */
 	MLXSW_SP_RXL_L3_MARK(LBERROR, MIRROR_TO_CPU, LBERROR, false),
 	MLXSW_SP_RXL_MARK(IP2ME, TRAP_TO_CPU, IP2ME, false),
@@ -4149,9 +4138,6 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core)
 	for (i = 0; i < max_cpu_policers; i++) {
 		is_bytes = false;
 		switch (i) {
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_STP:
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_LACP:
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_LBERROR:
@@ -4159,10 +4145,6 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core)
 			rate = 128;
 			burst_size = 7;
 			break;
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_MC_SNOOPING:
-			rate = 16 * 1024;
-			burst_size = 10;
-			break;
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_NEIGH_DISCOVERY:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP:
@@ -4225,9 +4207,6 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core)
 	for (i = 0; i < max_trap_groups; i++) {
 		policer_id = i;
 		switch (i) {
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_STP:
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_LACP:
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP0:
@@ -4241,10 +4220,6 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core)
 			priority = 4;
 			tc = 4;
 			break;
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_MC_SNOOPING:
-			priority = 3;
-			tc = 3;
-			break;
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_NEIGH_DISCOVERY:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6:
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 147a5634244b..9d4dfb22cb7a 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -451,6 +451,8 @@ extern struct notifier_block mlxsw_sp_switchdev_notifier;
 /* spectrum.c */
 void mlxsw_sp_rx_listener_no_mark_func(struct sk_buff *skb,
 				       u8 local_port, void *priv);
+void mlxsw_sp_ptp_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb,
+			  u8 local_port);
 int mlxsw_sp_port_speed_get(struct mlxsw_sp_port *mlxsw_sp_port, u32 *speed);
 int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port,
 			  enum mlxsw_reg_qeec_hr hr, u8 index, u8 next_index,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
index 206751963a4f..32b77d5a917d 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
@@ -170,6 +170,23 @@ static void mlxsw_sp_rx_mark_listener(struct sk_buff *skb, u8 local_port,
 	mlxsw_sp_rx_no_mark_listener(skb, local_port, trap_ctx);
 }
 
+static void mlxsw_sp_rx_ptp_listener(struct sk_buff *skb, u8 local_port,
+				     void *trap_ctx)
+{
+	struct mlxsw_sp *mlxsw_sp = devlink_trap_ctx_priv(trap_ctx);
+	int err;
+
+	err = __mlxsw_sp_rx_no_mark_listener(skb, local_port, trap_ctx);
+	if (err)
+		return;
+
+	/* The PTP handler expects skb->data to point to the start of the
+	 * Ethernet header.
+	 */
+	skb_push(skb, ETH_HLEN);
+	mlxsw_sp_ptp_receive(mlxsw_sp, skb, local_port);
+}
+
 #define MLXSW_SP_TRAP_DROP(_id, _group_id)				      \
 	DEVLINK_TRAP_GENERIC(DROP, DROP, _id,				      \
 			     DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id,	      \
@@ -191,6 +208,11 @@ static void mlxsw_sp_rx_mark_listener(struct sk_buff *skb, u8 local_port,
 			     DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id,	      \
 			     MLXSW_SP_TRAP_METADATA)
 
+#define MLXSW_SP_TRAP_CONTROL(_id, _group_id, _action)			      \
+	DEVLINK_TRAP_GENERIC(CONTROL, _action, _id,			      \
+			     DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id,	      \
+			     MLXSW_SP_TRAP_METADATA)
+
 #define MLXSW_SP_RXL_DISCARD(_id, _group_id)				      \
 	MLXSW_RXL_DIS(mlxsw_sp_rx_drop_listener, DISCARD_##_id,		      \
 		      TRAP_EXCEPTION_TO_CPU, false, SP_##_group_id,	      \
@@ -205,6 +227,14 @@ static void mlxsw_sp_rx_mark_listener(struct sk_buff *skb, u8 local_port,
 	MLXSW_RXL(mlxsw_sp_rx_mark_listener, _id,			      \
 		   _action, false, SP_##_group_id, SET_FW_DEFAULT)
 
+#define MLXSW_SP_RXL_NO_MARK(_id, _group_id, _action, _is_ctrl)		      \
+	MLXSW_RXL(mlxsw_sp_rx_no_mark_listener, _id, _action,		      \
+		  _is_ctrl, SP_##_group_id, DISCARD)
+
+#define MLXSW_SP_RXL_MARK(_id, _group_id, _action, _is_ctrl)		      \
+	MLXSW_RXL(mlxsw_sp_rx_mark_listener, _id, _action, _is_ctrl,	      \
+		  SP_##_group_id, DISCARD)
+
 #define MLXSW_SP_TRAP_POLICER(_id, _rate, _burst)			      \
 	DEVLINK_TRAP_POLICER(_id, _rate, _burst,			      \
 			     MLXSW_REG_QPCR_HIGHEST_CIR,		      \
@@ -218,6 +248,18 @@ mlxsw_sp_trap_policer_items_arr[] = {
 	{
 		.policer = MLXSW_SP_TRAP_POLICER(1, 10 * 1024, 128),
 	},
+	{
+		.policer = MLXSW_SP_TRAP_POLICER(2, 128, 128),
+	},
+	{
+		.policer = MLXSW_SP_TRAP_POLICER(3, 128, 128),
+	},
+	{
+		.policer = MLXSW_SP_TRAP_POLICER(4, 128, 128),
+	},
+	{
+		.policer = MLXSW_SP_TRAP_POLICER(5, 16 * 1024, 128),
+	},
 };
 
 static const struct mlxsw_sp_trap_group_item mlxsw_sp_trap_group_items_arr[] = {
@@ -246,6 +288,26 @@ static const struct mlxsw_sp_trap_group_item mlxsw_sp_trap_group_items_arr[] = {
 		.hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_ACL_DISCARDS,
 		.priority = 0,
 	},
+	{
+		.group = DEVLINK_TRAP_GROUP_GENERIC(STP, 2),
+		.hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_STP,
+		.priority = 5,
+	},
+	{
+		.group = DEVLINK_TRAP_GROUP_GENERIC(LACP, 3),
+		.hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_LACP,
+		.priority = 5,
+	},
+	{
+		.group = DEVLINK_TRAP_GROUP_GENERIC(LLDP, 4),
+		.hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_LLDP,
+		.priority = 5,
+	},
+	{
+		.group = DEVLINK_TRAP_GROUP_GENERIC(MC_SNOOPING, 5),
+		.hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_MC_SNOOPING,
+		.priority = 3,
+	},
 };
 
 static const struct mlxsw_sp_trap_item mlxsw_sp_trap_items_arr[] = {
@@ -466,6 +528,95 @@ static const struct mlxsw_sp_trap_item mlxsw_sp_trap_items_arr[] = {
 						 DUMMY),
 		},
 	},
+	{
+		.trap = MLXSW_SP_TRAP_CONTROL(STP, STP, TRAP),
+		.listeners_arr = {
+			MLXSW_SP_RXL_NO_MARK(STP, STP, TRAP_TO_CPU, true),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_CONTROL(LACP, LACP, TRAP),
+		.listeners_arr = {
+			MLXSW_SP_RXL_NO_MARK(LACP, LACP, TRAP_TO_CPU, true),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_CONTROL(LLDP, LLDP, TRAP),
+		.listeners_arr = {
+			MLXSW_RXL(mlxsw_sp_rx_ptp_listener, LLDP, TRAP_TO_CPU,
+				  false, SP_LLDP, DISCARD),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_CONTROL(IGMP_QUERY, MC_SNOOPING, MIRROR),
+		.listeners_arr = {
+			MLXSW_SP_RXL_MARK(IGMP_QUERY, MC_SNOOPING,
+					  MIRROR_TO_CPU, false),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_CONTROL(IGMP_V1_REPORT, MC_SNOOPING,
+					      TRAP),
+		.listeners_arr = {
+			MLXSW_SP_RXL_NO_MARK(IGMP_V1_REPORT, MC_SNOOPING,
+					     TRAP_TO_CPU, false),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_CONTROL(IGMP_V2_REPORT, MC_SNOOPING,
+					      TRAP),
+		.listeners_arr = {
+			MLXSW_SP_RXL_NO_MARK(IGMP_V2_REPORT, MC_SNOOPING,
+					     TRAP_TO_CPU, false),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_CONTROL(IGMP_V3_REPORT, MC_SNOOPING,
+					      TRAP),
+		.listeners_arr = {
+			MLXSW_SP_RXL_NO_MARK(IGMP_V3_REPORT, MC_SNOOPING,
+					     TRAP_TO_CPU, false),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_CONTROL(IGMP_V2_LEAVE, MC_SNOOPING,
+					      TRAP),
+		.listeners_arr = {
+			MLXSW_SP_RXL_NO_MARK(IGMP_V2_LEAVE, MC_SNOOPING,
+					     TRAP_TO_CPU, false),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_CONTROL(MLD_QUERY, MC_SNOOPING, MIRROR),
+		.listeners_arr = {
+			MLXSW_SP_RXL_MARK(IPV6_MLDV12_LISTENER_QUERY,
+					  MC_SNOOPING, MIRROR_TO_CPU, false),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_CONTROL(MLD_V1_REPORT, MC_SNOOPING,
+					      TRAP),
+		.listeners_arr = {
+			MLXSW_SP_RXL_NO_MARK(IPV6_MLDV1_LISTENER_REPORT,
+					     MC_SNOOPING, TRAP_TO_CPU, false),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_CONTROL(MLD_V2_REPORT, MC_SNOOPING,
+					      TRAP),
+		.listeners_arr = {
+			MLXSW_SP_RXL_NO_MARK(IPV6_MLDV2_LISTENER_REPORT,
+					     MC_SNOOPING, TRAP_TO_CPU, false),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_CONTROL(MLD_V1_DONE, MC_SNOOPING,
+					      TRAP),
+		.listeners_arr = {
+			MLXSW_SP_RXL_NO_MARK(IPV6_MLDV1_LISTENER_DONE,
+					     MC_SNOOPING, TRAP_TO_CPU, false),
+		},
+	},
 };
 
 static struct mlxsw_sp_trap_policer_item *
-- 
cgit v1.2.3-59-g8ed1b


From 8110668ecd9a9e704b9b412302e76a9c6e1f4ce2 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Fri, 29 May 2020 21:36:47 +0300
Subject: mlxsw: spectrum_trap: Register layer 3 control traps

In a similar fashion to layer 2 control traps, register layer 3 control
traps with devlink.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/reg.h          |   1 -
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c     |  93 ------
 .../net/ethernet/mellanox/mlxsw/spectrum_trap.c    | 318 +++++++++++++++++++++
 3 files changed, 318 insertions(+), 94 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/reg.h b/drivers/net/ethernet/mellanox/mlxsw/reg.h
index 030d6f9766d2..fcb88d4271bf 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/reg.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/reg.h
@@ -5536,7 +5536,6 @@ enum mlxsw_reg_htgt_trap_group {
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_MULTICAST,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_NEIGH_DISCOVERY,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP,
-	MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP,
 	MLXSW_REG_HTGT_TRAP_GROUP_SP_EVENT,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 74925826a2cb..8daeae1384da 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -4014,14 +4014,6 @@ out:
 	consume_skb(skb);
 }
 
-static void mlxsw_sp_rx_listener_ptp(struct sk_buff *skb, u8 local_port,
-				     void *priv)
-{
-	struct mlxsw_sp *mlxsw_sp = priv;
-
-	mlxsw_sp->ptp_ops->receive(mlxsw_sp, skb, local_port);
-}
-
 void mlxsw_sp_ptp_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb,
 			  u8 local_port)
 {
@@ -4047,43 +4039,13 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
 	/* Events */
 	MLXSW_SP_EVENTL(mlxsw_sp_pude_event_func, PUDE),
 	/* L2 traps */
-	MLXSW_SP_RXL_MARK(ARPBC, MIRROR_TO_CPU, NEIGH_DISCOVERY, false),
-	MLXSW_SP_RXL_MARK(ARPUC, MIRROR_TO_CPU, NEIGH_DISCOVERY, false),
 	MLXSW_SP_RXL_NO_MARK(FID_MISS, TRAP_TO_CPU, FID_MISS, false),
 	/* L3 traps */
-	MLXSW_SP_RXL_L3_MARK(LBERROR, MIRROR_TO_CPU, LBERROR, false),
-	MLXSW_SP_RXL_MARK(IP2ME, TRAP_TO_CPU, IP2ME, false),
 	MLXSW_SP_RXL_MARK(IPV6_UNSPECIFIED_ADDRESS, TRAP_TO_CPU, ROUTER_EXP,
 			  false),
-	MLXSW_SP_RXL_MARK(IPV6_LINK_LOCAL_DEST, TRAP_TO_CPU, IP2ME, false),
 	MLXSW_SP_RXL_MARK(IPV6_LINK_LOCAL_SRC, TRAP_TO_CPU, ROUTER_EXP, false),
-	MLXSW_SP_RXL_MARK(IPV6_ALL_NODES_LINK, TRAP_TO_CPU, IPV6, false),
-	MLXSW_SP_RXL_MARK(IPV6_ALL_ROUTERS_LINK, TRAP_TO_CPU, IPV6,
-			  false),
-	MLXSW_SP_RXL_MARK(IPV4_OSPF, TRAP_TO_CPU, OSPF, false),
-	MLXSW_SP_RXL_MARK(IPV6_OSPF, TRAP_TO_CPU, OSPF, false),
-	MLXSW_SP_RXL_MARK(IPV4_DHCP, TRAP_TO_CPU, DHCP, false),
-	MLXSW_SP_RXL_MARK(IPV6_DHCP, TRAP_TO_CPU, DHCP, false),
-	MLXSW_SP_RXL_MARK(RTR_INGRESS0, TRAP_TO_CPU, REMOTE_ROUTE, false),
-	MLXSW_SP_RXL_MARK(IPV4_BGP, TRAP_TO_CPU, BGP, false),
-	MLXSW_SP_RXL_MARK(IPV6_BGP, TRAP_TO_CPU, BGP, false),
-	MLXSW_SP_RXL_MARK(L3_IPV6_ROUTER_SOLICITATION, TRAP_TO_CPU, IPV6,
-			  false),
-	MLXSW_SP_RXL_MARK(L3_IPV6_ROUTER_ADVERTISEMENT, TRAP_TO_CPU, IPV6,
-			  false),
-	MLXSW_SP_RXL_MARK(L3_IPV6_NEIGHBOR_SOLICITATION, TRAP_TO_CPU,
-			  NEIGH_DISCOVERY, false),
-	MLXSW_SP_RXL_MARK(L3_IPV6_NEIGHBOR_ADVERTISEMENT, TRAP_TO_CPU,
-			  NEIGH_DISCOVERY, false),
-	MLXSW_SP_RXL_MARK(L3_IPV6_REDIRECTION, TRAP_TO_CPU, IPV6, false),
 	MLXSW_SP_RXL_MARK(IPV6_MC_LINK_LOCAL_DEST, TRAP_TO_CPU, ROUTER_EXP,
 			  false),
-	MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV4, TRAP_TO_CPU, IP2ME, false),
-	MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV6, TRAP_TO_CPU, IP2ME, false),
-	MLXSW_SP_RXL_MARK(IPV4_VRRP, TRAP_TO_CPU, VRRP, false),
-	MLXSW_SP_RXL_MARK(IPV6_VRRP, TRAP_TO_CPU, VRRP, false),
-	MLXSW_SP_RXL_MARK(IPV4_BFD, TRAP_TO_CPU, BFD, false),
-	MLXSW_SP_RXL_MARK(IPV6_BFD, TRAP_TO_CPU, BFD, false),
 	MLXSW_SP_RXL_NO_MARK(DISCARD_ING_ROUTER_SIP_CLASS_E, FORWARD,
 			     ROUTER_EXP, false),
 	MLXSW_SP_RXL_NO_MARK(DISCARD_ING_ROUTER_MC_DMAC, FORWARD,
@@ -4098,18 +4060,10 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
 	/* ACL trap */
 	MLXSW_SP_RXL_NO_MARK(ACL0, TRAP_TO_CPU, FLOW_LOGGING, false),
 	/* Multicast Router Traps */
-	MLXSW_SP_RXL_MARK(IPV4_PIM, TRAP_TO_CPU, PIM, false),
-	MLXSW_SP_RXL_MARK(IPV6_PIM, TRAP_TO_CPU, PIM, false),
 	MLXSW_SP_RXL_MARK(ACL1, TRAP_TO_CPU, MULTICAST, false),
 	MLXSW_SP_RXL_L3_MARK(ACL2, TRAP_TO_CPU, MULTICAST, false),
 	/* NVE traps */
 	MLXSW_SP_RXL_MARK(NVE_ENCAP_ARP, TRAP_TO_CPU, NEIGH_DISCOVERY, false),
-	MLXSW_SP_RXL_NO_MARK(NVE_DECAP_ARP, TRAP_TO_CPU, NEIGH_DISCOVERY,
-			     false),
-	/* PTP traps */
-	MLXSW_RXL(mlxsw_sp_rx_listener_ptp, PTP0, TRAP_TO_CPU,
-		  false, SP_PTP0, DISCARD),
-	MLXSW_SP_RXL_NO_MARK(PTP1, TRAP_TO_CPU, PTP1, false),
 };
 
 static const struct mlxsw_listener mlxsw_sp1_listener[] = {
@@ -4138,41 +4092,13 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core)
 	for (i = 0; i < max_cpu_policers; i++) {
 		is_bytes = false;
 		switch (i) {
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF:
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM:
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_LBERROR:
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP:
-			rate = 128;
-			burst_size = 7;
-			break;
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP:
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_NEIGH_DISCOVERY:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP:
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE:
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_MULTICAST:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_FLOW_LOGGING:
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_FID_MISS:
 			rate = 1024;
 			burst_size = 7;
 			break;
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP0:
-			rate = 24 * 1024;
-			burst_size = 12;
-			break;
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP1:
-			rate = 19 * 1024;
-			burst_size = 12;
-			break;
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_VRRP:
-			rate = 360;
-			burst_size = 7;
-			break;
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_BFD:
-			rate = 20 * 1024;
-			burst_size = 10;
-			break;
 		default:
 			continue;
 		}
@@ -4207,36 +4133,17 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core)
 	for (i = 0; i < max_trap_groups; i++) {
 		policer_id = i;
 		switch (i) {
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF:
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM:
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP0:
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_VRRP:
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_BFD:
-			priority = 5;
-			tc = 5;
-			break;
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_FLOW_LOGGING:
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP:
 			priority = 4;
 			tc = 4;
 			break;
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_NEIGH_DISCOVERY:
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME:
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6:
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP1:
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP:
-			priority = 2;
-			tc = 2;
-			break;
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP:
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_REMOTE_ROUTE:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_MULTICAST:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_FID_MISS:
 			priority = 1;
 			tc = 1;
 			break;
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_PKT_SAMPLE:
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_LBERROR:
 			priority = 0;
 			tc = 0;
 			break;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
index 32b77d5a917d..148a35b7f4f8 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
@@ -170,6 +170,14 @@ static void mlxsw_sp_rx_mark_listener(struct sk_buff *skb, u8 local_port,
 	mlxsw_sp_rx_no_mark_listener(skb, local_port, trap_ctx);
 }
 
+static void mlxsw_sp_rx_l3_mark_listener(struct sk_buff *skb, u8 local_port,
+					 void *trap_ctx)
+{
+	skb->offload_l3_fwd_mark = 1;
+	skb->offload_fwd_mark = 1;
+	mlxsw_sp_rx_no_mark_listener(skb, local_port, trap_ctx);
+}
+
 static void mlxsw_sp_rx_ptp_listener(struct sk_buff *skb, u8 local_port,
 				     void *trap_ctx)
 {
@@ -235,6 +243,10 @@ static void mlxsw_sp_rx_ptp_listener(struct sk_buff *skb, u8 local_port,
 	MLXSW_RXL(mlxsw_sp_rx_mark_listener, _id, _action, _is_ctrl,	      \
 		  SP_##_group_id, DISCARD)
 
+#define MLXSW_SP_RXL_L3_MARK(_id, _group_id, _action, _is_ctrl)		      \
+	MLXSW_RXL(mlxsw_sp_rx_l3_mark_listener, _id, _action, _is_ctrl,	      \
+		  SP_##_group_id, DISCARD)
+
 #define MLXSW_SP_TRAP_POLICER(_id, _rate, _burst)			      \
 	DEVLINK_TRAP_POLICER(_id, _rate, _burst,			      \
 			     MLXSW_REG_QPCR_HIGHEST_CIR,		      \
@@ -260,6 +272,42 @@ mlxsw_sp_trap_policer_items_arr[] = {
 	{
 		.policer = MLXSW_SP_TRAP_POLICER(5, 16 * 1024, 128),
 	},
+	{
+		.policer = MLXSW_SP_TRAP_POLICER(6, 128, 128),
+	},
+	{
+		.policer = MLXSW_SP_TRAP_POLICER(7, 1024, 128),
+	},
+	{
+		.policer = MLXSW_SP_TRAP_POLICER(8, 20 * 1024, 1024),
+	},
+	{
+		.policer = MLXSW_SP_TRAP_POLICER(9, 128, 128),
+	},
+	{
+		.policer = MLXSW_SP_TRAP_POLICER(10, 1024, 128),
+	},
+	{
+		.policer = MLXSW_SP_TRAP_POLICER(11, 360, 128),
+	},
+	{
+		.policer = MLXSW_SP_TRAP_POLICER(12, 128, 128),
+	},
+	{
+		.policer = MLXSW_SP_TRAP_POLICER(13, 128, 128),
+	},
+	{
+		.policer = MLXSW_SP_TRAP_POLICER(14, 1024, 128),
+	},
+	{
+		.policer = MLXSW_SP_TRAP_POLICER(15, 1024, 128),
+	},
+	{
+		.policer = MLXSW_SP_TRAP_POLICER(16, 24 * 1024, 4096),
+	},
+	{
+		.policer = MLXSW_SP_TRAP_POLICER(17, 19 * 1024, 4096),
+	},
 };
 
 static const struct mlxsw_sp_trap_group_item mlxsw_sp_trap_group_items_arr[] = {
@@ -308,6 +356,66 @@ static const struct mlxsw_sp_trap_group_item mlxsw_sp_trap_group_items_arr[] = {
 		.hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_MC_SNOOPING,
 		.priority = 3,
 	},
+	{
+		.group = DEVLINK_TRAP_GROUP_GENERIC(DHCP, 6),
+		.hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_DHCP,
+		.priority = 2,
+	},
+	{
+		.group = DEVLINK_TRAP_GROUP_GENERIC(NEIGH_DISCOVERY, 7),
+		.hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_NEIGH_DISCOVERY,
+		.priority = 2,
+	},
+	{
+		.group = DEVLINK_TRAP_GROUP_GENERIC(BFD, 8),
+		.hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_BFD,
+		.priority = 5,
+	},
+	{
+		.group = DEVLINK_TRAP_GROUP_GENERIC(OSPF, 9),
+		.hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_OSPF,
+		.priority = 5,
+	},
+	{
+		.group = DEVLINK_TRAP_GROUP_GENERIC(BGP, 10),
+		.hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_BGP,
+		.priority = 4,
+	},
+	{
+		.group = DEVLINK_TRAP_GROUP_GENERIC(VRRP, 11),
+		.hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_VRRP,
+		.priority = 5,
+	},
+	{
+		.group = DEVLINK_TRAP_GROUP_GENERIC(PIM, 12),
+		.hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_PIM,
+		.priority = 5,
+	},
+	{
+		.group = DEVLINK_TRAP_GROUP_GENERIC(UC_LB, 13),
+		.hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_LBERROR,
+		.priority = 0,
+	},
+	{
+		.group = DEVLINK_TRAP_GROUP_GENERIC(LOCAL_DELIVERY, 14),
+		.hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_IP2ME,
+		.priority = 2,
+	},
+	{
+		.group = DEVLINK_TRAP_GROUP_GENERIC(IPV6, 15),
+		.hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_IPV6,
+		.priority = 2,
+	},
+	{
+		.group = DEVLINK_TRAP_GROUP_GENERIC(PTP_EVENT, 16),
+		.hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP0,
+		.priority = 5,
+	},
+	{
+		.group = DEVLINK_TRAP_GROUP_GENERIC(PTP_GENERAL, 17),
+		.hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP1,
+		.priority = 2,
+	},
 };
 
 static const struct mlxsw_sp_trap_item mlxsw_sp_trap_items_arr[] = {
@@ -617,6 +725,216 @@ static const struct mlxsw_sp_trap_item mlxsw_sp_trap_items_arr[] = {
 					     MC_SNOOPING, TRAP_TO_CPU, false),
 		},
 	},
+	{
+		.trap = MLXSW_SP_TRAP_CONTROL(IPV4_DHCP, DHCP, TRAP),
+		.listeners_arr = {
+			MLXSW_SP_RXL_MARK(IPV4_DHCP, DHCP, TRAP_TO_CPU, false),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_CONTROL(IPV6_DHCP, DHCP, TRAP),
+		.listeners_arr = {
+			MLXSW_SP_RXL_MARK(IPV6_DHCP, DHCP, TRAP_TO_CPU, false),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_CONTROL(ARP_REQUEST, NEIGH_DISCOVERY,
+					      MIRROR),
+		.listeners_arr = {
+			MLXSW_SP_RXL_MARK(ARPBC, NEIGH_DISCOVERY, MIRROR_TO_CPU,
+					  false),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_CONTROL(ARP_RESPONSE, NEIGH_DISCOVERY,
+					      MIRROR),
+		.listeners_arr = {
+			MLXSW_SP_RXL_MARK(ARPUC, NEIGH_DISCOVERY, MIRROR_TO_CPU,
+					  false),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_CONTROL(ARP_OVERLAY, NEIGH_DISCOVERY,
+					      TRAP),
+		.listeners_arr = {
+			MLXSW_SP_RXL_NO_MARK(NVE_DECAP_ARP, NEIGH_DISCOVERY,
+					     TRAP_TO_CPU, false),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_CONTROL(IPV6_NEIGH_SOLICIT,
+					      NEIGH_DISCOVERY, TRAP),
+		.listeners_arr = {
+			MLXSW_SP_RXL_MARK(L3_IPV6_NEIGHBOR_SOLICITATION,
+					  NEIGH_DISCOVERY, TRAP_TO_CPU, false),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_CONTROL(IPV6_NEIGH_ADVERT,
+					      NEIGH_DISCOVERY, TRAP),
+		.listeners_arr = {
+			MLXSW_SP_RXL_MARK(L3_IPV6_NEIGHBOR_ADVERTISEMENT,
+					  NEIGH_DISCOVERY, TRAP_TO_CPU, false),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_CONTROL(IPV4_BFD, BFD, TRAP),
+		.listeners_arr = {
+			MLXSW_SP_RXL_MARK(IPV4_BFD, BFD, TRAP_TO_CPU, false),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_CONTROL(IPV6_BFD, BFD, TRAP),
+		.listeners_arr = {
+			MLXSW_SP_RXL_MARK(IPV6_BFD, BFD, TRAP_TO_CPU, false),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_CONTROL(IPV4_OSPF, OSPF, TRAP),
+		.listeners_arr = {
+			MLXSW_SP_RXL_MARK(IPV4_OSPF, OSPF, TRAP_TO_CPU, false),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_CONTROL(IPV6_OSPF, OSPF, TRAP),
+		.listeners_arr = {
+			MLXSW_SP_RXL_MARK(IPV6_OSPF, OSPF, TRAP_TO_CPU, false),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_CONTROL(IPV4_BGP, BGP, TRAP),
+		.listeners_arr = {
+			MLXSW_SP_RXL_MARK(IPV4_BGP, BGP, TRAP_TO_CPU, false),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_CONTROL(IPV6_BGP, BGP, TRAP),
+		.listeners_arr = {
+			MLXSW_SP_RXL_MARK(IPV6_BGP, BGP, TRAP_TO_CPU, false),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_CONTROL(IPV4_VRRP, VRRP, TRAP),
+		.listeners_arr = {
+			MLXSW_SP_RXL_MARK(IPV4_VRRP, VRRP, TRAP_TO_CPU, false),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_CONTROL(IPV6_VRRP, VRRP, TRAP),
+		.listeners_arr = {
+			MLXSW_SP_RXL_MARK(IPV6_VRRP, VRRP, TRAP_TO_CPU, false),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_CONTROL(IPV4_PIM, PIM, TRAP),
+		.listeners_arr = {
+			MLXSW_SP_RXL_MARK(IPV4_PIM, PIM, TRAP_TO_CPU, false),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_CONTROL(IPV6_PIM, PIM, TRAP),
+		.listeners_arr = {
+			MLXSW_SP_RXL_MARK(IPV6_PIM, PIM, TRAP_TO_CPU, false),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_CONTROL(UC_LB, UC_LB, MIRROR),
+		.listeners_arr = {
+			MLXSW_SP_RXL_L3_MARK(LBERROR, LBERROR, MIRROR_TO_CPU,
+					     false),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_CONTROL(LOCAL_ROUTE, LOCAL_DELIVERY,
+					      TRAP),
+		.listeners_arr = {
+			MLXSW_SP_RXL_MARK(IP2ME, IP2ME, TRAP_TO_CPU, false),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_CONTROL(EXTERNAL_ROUTE, LOCAL_DELIVERY,
+					      TRAP),
+		.listeners_arr = {
+			MLXSW_SP_RXL_MARK(RTR_INGRESS0, IP2ME, TRAP_TO_CPU,
+					  false),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_CONTROL(IPV6_UC_DIP_LINK_LOCAL_SCOPE,
+					      LOCAL_DELIVERY, TRAP),
+		.listeners_arr = {
+			MLXSW_SP_RXL_MARK(IPV6_LINK_LOCAL_DEST, IP2ME,
+					  TRAP_TO_CPU, false),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_CONTROL(IPV4_ROUTER_ALERT, LOCAL_DELIVERY,
+					      TRAP),
+		.listeners_arr = {
+			MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV4, IP2ME, TRAP_TO_CPU,
+					  false),
+		},
+	},
+	{
+		/* IPV6_ROUTER_ALERT is defined in uAPI as 22, but it is not
+		 * used in this file, so undefine it.
+		 */
+		#undef IPV6_ROUTER_ALERT
+		.trap = MLXSW_SP_TRAP_CONTROL(IPV6_ROUTER_ALERT, LOCAL_DELIVERY,
+					      TRAP),
+		.listeners_arr = {
+			MLXSW_SP_RXL_MARK(ROUTER_ALERT_IPV6, IP2ME, TRAP_TO_CPU,
+					  false),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_CONTROL(IPV6_DIP_ALL_NODES, IPV6, TRAP),
+		.listeners_arr = {
+			MLXSW_SP_RXL_MARK(IPV6_ALL_NODES_LINK, IPV6,
+					  TRAP_TO_CPU, false),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_CONTROL(IPV6_DIP_ALL_ROUTERS, IPV6, TRAP),
+		.listeners_arr = {
+			MLXSW_SP_RXL_MARK(IPV6_ALL_ROUTERS_LINK, IPV6,
+					  TRAP_TO_CPU, false),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_CONTROL(IPV6_ROUTER_SOLICIT, IPV6, TRAP),
+		.listeners_arr = {
+			MLXSW_SP_RXL_MARK(L3_IPV6_ROUTER_SOLICITATION, IPV6,
+					  TRAP_TO_CPU, false),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_CONTROL(IPV6_ROUTER_ADVERT, IPV6, TRAP),
+		.listeners_arr = {
+			MLXSW_SP_RXL_MARK(L3_IPV6_ROUTER_ADVERTISEMENT, IPV6,
+					  TRAP_TO_CPU, false),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_CONTROL(IPV6_REDIRECT, IPV6, TRAP),
+		.listeners_arr = {
+			MLXSW_SP_RXL_MARK(L3_IPV6_REDIRECTION, IPV6,
+					  TRAP_TO_CPU, false),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_CONTROL(PTP_EVENT, PTP_EVENT, TRAP),
+		.listeners_arr = {
+			MLXSW_RXL(mlxsw_sp_rx_ptp_listener, PTP0, TRAP_TO_CPU,
+				  false, SP_PTP0, DISCARD),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_CONTROL(PTP_GENERAL, PTP_GENERAL, TRAP),
+		.listeners_arr = {
+			MLXSW_SP_RXL_NO_MARK(PTP1, PTP1, TRAP_TO_CPU, false),
+		},
+	},
 };
 
 static struct mlxsw_sp_trap_policer_item *
-- 
cgit v1.2.3-59-g8ed1b


From 88e2774961d7854628fa9c403166c4162cebc12e Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Fri, 29 May 2020 21:36:48 +0300
Subject: mlxsw: spectrum_trap: Register ACL control traps

In a similar fashion to other control traps, register ACL control traps
with devlink.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Reviewed-by: Jiri Pirko <jiri@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/mellanox/mlxsw/spectrum.c     | 31 ++++-----------
 drivers/net/ethernet/mellanox/mlxsw/spectrum.h     |  2 +
 .../net/ethernet/mellanox/mlxsw/spectrum_trap.c    | 45 ++++++++++++++++++++++
 3 files changed, 55 insertions(+), 23 deletions(-)

diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
index 8daeae1384da..5ffa32b75e5f 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.c
@@ -3987,10 +3987,15 @@ static void mlxsw_sp_rx_listener_l3_mark_func(struct sk_buff *skb,
 	return mlxsw_sp_rx_listener_no_mark_func(skb, local_port, priv);
 }
 
-static void mlxsw_sp_rx_listener_sample_func(struct sk_buff *skb, u8 local_port,
-					     void *priv)
+void mlxsw_sp_ptp_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb,
+			  u8 local_port)
+{
+	mlxsw_sp->ptp_ops->receive(mlxsw_sp, skb, local_port);
+}
+
+void mlxsw_sp_sample_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb,
+			     u8 local_port)
 {
-	struct mlxsw_sp *mlxsw_sp = priv;
 	struct mlxsw_sp_port *mlxsw_sp_port = mlxsw_sp->ports[local_port];
 	struct mlxsw_sp_port_sample *sample;
 	u32 size;
@@ -4014,12 +4019,6 @@ out:
 	consume_skb(skb);
 }
 
-void mlxsw_sp_ptp_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb,
-			  u8 local_port)
-{
-	mlxsw_sp->ptp_ops->receive(mlxsw_sp, skb, local_port);
-}
-
 #define MLXSW_SP_RXL_NO_MARK(_trap_id, _action, _trap_group, _is_ctrl)	\
 	MLXSW_RXL(mlxsw_sp_rx_listener_no_mark_func, _trap_id, _action,	\
 		  _is_ctrl, SP_##_trap_group, DISCARD)
@@ -4054,11 +4053,6 @@ static const struct mlxsw_listener mlxsw_sp_listener[] = {
 			     ROUTER_EXP, false),
 	MLXSW_SP_RXL_NO_MARK(DISCARD_ING_ROUTER_DIP_LINK_LOCAL, FORWARD,
 			     ROUTER_EXP, false),
-	/* PKT Sample trap */
-	MLXSW_RXL(mlxsw_sp_rx_listener_sample_func, PKT_SAMPLE, MIRROR_TO_CPU,
-		  false, SP_PKT_SAMPLE, DISCARD),
-	/* ACL trap */
-	MLXSW_SP_RXL_NO_MARK(ACL0, TRAP_TO_CPU, FLOW_LOGGING, false),
 	/* Multicast Router Traps */
 	MLXSW_SP_RXL_MARK(ACL1, TRAP_TO_CPU, MULTICAST, false),
 	MLXSW_SP_RXL_L3_MARK(ACL2, TRAP_TO_CPU, MULTICAST, false),
@@ -4094,7 +4088,6 @@ static int mlxsw_sp_cpu_policers_set(struct mlxsw_core *mlxsw_core)
 		switch (i) {
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_MULTICAST:
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_FLOW_LOGGING:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_FID_MISS:
 			rate = 1024;
 			burst_size = 7;
@@ -4133,20 +4126,12 @@ static int mlxsw_sp_trap_groups_set(struct mlxsw_core *mlxsw_core)
 	for (i = 0; i < max_trap_groups; i++) {
 		policer_id = i;
 		switch (i) {
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_FLOW_LOGGING:
-			priority = 4;
-			tc = 4;
-			break;
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_ROUTER_EXP:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_MULTICAST:
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_FID_MISS:
 			priority = 1;
 			tc = 1;
 			break;
-		case MLXSW_REG_HTGT_TRAP_GROUP_SP_PKT_SAMPLE:
-			priority = 0;
-			tc = 0;
-			break;
 		case MLXSW_REG_HTGT_TRAP_GROUP_SP_EVENT:
 			priority = MLXSW_REG_HTGT_DEFAULT_PRIORITY;
 			tc = MLXSW_REG_HTGT_DEFAULT_TC;
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
index 9d4dfb22cb7a..6f96ca50c9ba 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum.h
@@ -453,6 +453,8 @@ void mlxsw_sp_rx_listener_no_mark_func(struct sk_buff *skb,
 				       u8 local_port, void *priv);
 void mlxsw_sp_ptp_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb,
 			  u8 local_port);
+void mlxsw_sp_sample_receive(struct mlxsw_sp *mlxsw_sp, struct sk_buff *skb,
+			     u8 local_port);
 int mlxsw_sp_port_speed_get(struct mlxsw_sp_port *mlxsw_sp_port, u32 *speed);
 int mlxsw_sp_port_ets_set(struct mlxsw_sp_port *mlxsw_sp_port,
 			  enum mlxsw_reg_qeec_hr hr, u8 index, u8 next_index,
diff --git a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
index 148a35b7f4f8..157a42c63066 100644
--- a/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
+++ b/drivers/net/ethernet/mellanox/mlxsw/spectrum_trap.c
@@ -195,6 +195,23 @@ static void mlxsw_sp_rx_ptp_listener(struct sk_buff *skb, u8 local_port,
 	mlxsw_sp_ptp_receive(mlxsw_sp, skb, local_port);
 }
 
+static void mlxsw_sp_rx_sample_listener(struct sk_buff *skb, u8 local_port,
+					void *trap_ctx)
+{
+	struct mlxsw_sp *mlxsw_sp = devlink_trap_ctx_priv(trap_ctx);
+	int err;
+
+	err = __mlxsw_sp_rx_no_mark_listener(skb, local_port, trap_ctx);
+	if (err)
+		return;
+
+	/* The sample handler expects skb->data to point to the start of the
+	 * Ethernet header.
+	 */
+	skb_push(skb, ETH_HLEN);
+	mlxsw_sp_sample_receive(mlxsw_sp, skb, local_port);
+}
+
 #define MLXSW_SP_TRAP_DROP(_id, _group_id)				      \
 	DEVLINK_TRAP_GENERIC(DROP, DROP, _id,				      \
 			     DEVLINK_TRAP_GROUP_GENERIC_ID_##_group_id,	      \
@@ -308,6 +325,9 @@ mlxsw_sp_trap_policer_items_arr[] = {
 	{
 		.policer = MLXSW_SP_TRAP_POLICER(17, 19 * 1024, 4096),
 	},
+	{
+		.policer = MLXSW_SP_TRAP_POLICER(18, 1024, 128),
+	},
 };
 
 static const struct mlxsw_sp_trap_group_item mlxsw_sp_trap_group_items_arr[] = {
@@ -416,6 +436,16 @@ static const struct mlxsw_sp_trap_group_item mlxsw_sp_trap_group_items_arr[] = {
 		.hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_PTP1,
 		.priority = 2,
 	},
+	{
+		.group = DEVLINK_TRAP_GROUP_GENERIC(ACL_SAMPLE, 0),
+		.hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_PKT_SAMPLE,
+		.priority = 0,
+	},
+	{
+		.group = DEVLINK_TRAP_GROUP_GENERIC(ACL_TRAP, 18),
+		.hw_group_id = MLXSW_REG_HTGT_TRAP_GROUP_SP_FLOW_LOGGING,
+		.priority = 4,
+	},
 };
 
 static const struct mlxsw_sp_trap_item mlxsw_sp_trap_items_arr[] = {
@@ -935,6 +965,21 @@ static const struct mlxsw_sp_trap_item mlxsw_sp_trap_items_arr[] = {
 			MLXSW_SP_RXL_NO_MARK(PTP1, PTP1, TRAP_TO_CPU, false),
 		},
 	},
+	{
+		.trap = MLXSW_SP_TRAP_CONTROL(FLOW_ACTION_SAMPLE, ACL_SAMPLE,
+					      MIRROR),
+		.listeners_arr = {
+			MLXSW_RXL(mlxsw_sp_rx_sample_listener, PKT_SAMPLE,
+				  MIRROR_TO_CPU, false, SP_PKT_SAMPLE, DISCARD),
+		},
+	},
+	{
+		.trap = MLXSW_SP_TRAP_CONTROL(FLOW_ACTION_TRAP, ACL_TRAP, TRAP),
+		.listeners_arr = {
+			MLXSW_SP_RXL_NO_MARK(ACL0, FLOW_LOGGING, TRAP_TO_CPU,
+					     false),
+		},
+	},
 };
 
 static struct mlxsw_sp_trap_policer_item *
-- 
cgit v1.2.3-59-g8ed1b


From 9959b389779a9e688d1a9272eed6377d999d8739 Mon Sep 17 00:00:00 2001
From: Ido Schimmel <idosch@mellanox.com>
Date: Fri, 29 May 2020 21:36:49 +0300
Subject: selftests: mlxsw: Add test for control packets

Generate packets matching the various control traps and check that the
traps' stats increase accordingly.

Signed-off-by: Ido Schimmel <idosch@mellanox.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../drivers/net/mlxsw/devlink_trap_control.sh      | 688 +++++++++++++++++++++
 .../selftests/net/forwarding/devlink_lib.sh        |  23 +
 2 files changed, 711 insertions(+)
 create mode 100755 tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh

diff --git a/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh
new file mode 100755
index 000000000000..a37273473c1b
--- /dev/null
+++ b/tools/testing/selftests/drivers/net/mlxsw/devlink_trap_control.sh
@@ -0,0 +1,688 @@
+#!/bin/bash
+# SPDX-License-Identifier: GPL-2.0
+#
+# Test devlink-trap control trap functionality over mlxsw. Each registered
+# control packet trap is tested to make sure it is triggered under the right
+# conditions.
+#
+# +---------------------------------+
+# | H1 (vrf)                        |
+# |    + $h1                        |
+# |    | 192.0.2.1/24               |
+# |    | 2001:db8:1::1/64           |
+# |    |                            |
+# |    |  default via 192.0.2.2     |
+# |    |  default via 2001:db8:1::2 |
+# +----|----------------------------+
+#      |
+# +----|----------------------------------------------------------------------+
+# | SW |                                                                      |
+# |    + $rp1                                                                 |
+# |        192.0.2.2/24                                                       |
+# |        2001:db8:1::2/64                                                   |
+# |                                                                           |
+# |        2001:db8:2::2/64                                                   |
+# |        198.51.100.2/24                                                    |
+# |    + $rp2                                                                 |
+# |    |                                                                      |
+# +----|----------------------------------------------------------------------+
+#      |
+# +----|----------------------------+
+# |    |  default via 198.51.100.2  |
+# |    |  default via 2001:db8:2::2 |
+# |    |                            |
+# |    | 2001:db8:2::1/64           |
+# |    | 198.51.100.1/24            |
+# |    + $h2                        |
+# | H2 (vrf)                        |
+# +---------------------------------+
+
+lib_dir=$(dirname $0)/../../../net/forwarding
+
+ALL_TESTS="
+	stp_test
+	lacp_test
+	lldp_test
+	igmp_query_test
+	igmp_v1_report_test
+	igmp_v2_report_test
+	igmp_v3_report_test
+	igmp_v2_leave_test
+	mld_query_test
+	mld_v1_report_test
+	mld_v2_report_test
+	mld_v1_done_test
+	ipv4_dhcp_test
+	ipv6_dhcp_test
+	arp_request_test
+	arp_response_test
+	ipv6_neigh_solicit_test
+	ipv6_neigh_advert_test
+	ipv4_bfd_test
+	ipv6_bfd_test
+	ipv4_ospf_test
+	ipv6_ospf_test
+	ipv4_bgp_test
+	ipv6_bgp_test
+	ipv4_vrrp_test
+	ipv6_vrrp_test
+	ipv4_pim_test
+	ipv6_pim_test
+	uc_loopback_test
+	local_route_test
+	external_route_test
+	ipv6_uc_dip_link_local_scope_test
+	ipv4_router_alert_test
+	ipv6_router_alert_test
+	ipv6_dip_all_nodes_test
+	ipv6_dip_all_routers_test
+	ipv6_router_solicit_test
+	ipv6_router_advert_test
+	ipv6_redirect_test
+	ptp_event_test
+	ptp_general_test
+	flow_action_sample_test
+	flow_action_trap_test
+"
+NUM_NETIFS=4
+source $lib_dir/lib.sh
+source $lib_dir/devlink_lib.sh
+
+h1_create()
+{
+	simple_if_init $h1 192.0.2.1/24 2001:db8:1::1/64
+
+	ip -4 route add default vrf v$h1 nexthop via 192.0.2.2
+	ip -6 route add default vrf v$h1 nexthop via 2001:db8:1::2
+}
+
+h1_destroy()
+{
+	ip -6 route del default vrf v$h1 nexthop via 2001:db8:1::2
+	ip -4 route del default vrf v$h1 nexthop via 192.0.2.2
+
+	simple_if_fini $h1 192.0.2.1/24 2001:db8:1::1/64
+}
+
+h2_create()
+{
+	simple_if_init $h2 198.51.100.1/24 2001:db8:2::1/64
+
+	ip -4 route add default vrf v$h2 nexthop via 198.51.100.2
+	ip -6 route add default vrf v$h2 nexthop via 2001:db8:2::2
+}
+
+h2_destroy()
+{
+	ip -6 route del default vrf v$h2 nexthop via 2001:db8:2::2
+	ip -4 route del default vrf v$h2 nexthop via 198.51.100.2
+
+	simple_if_fini $h2 198.51.100.1/24 2001:db8:2::1/64
+}
+
+router_create()
+{
+	ip link set dev $rp1 up
+	ip link set dev $rp2 up
+
+	__addr_add_del $rp1 add 192.0.2.2/24 2001:db8:1::2/64
+	__addr_add_del $rp2 add 198.51.100.2/24 2001:db8:2::2/64
+}
+
+router_destroy()
+{
+	__addr_add_del $rp2 del 198.51.100.2/24 2001:db8:2::2/64
+	__addr_add_del $rp1 del 192.0.2.2/24 2001:db8:1::2/64
+
+	ip link set dev $rp2 down
+	ip link set dev $rp1 down
+}
+
+setup_prepare()
+{
+	h1=${NETIFS[p1]}
+	rp1=${NETIFS[p2]}
+
+	rp2=${NETIFS[p3]}
+	h2=${NETIFS[p4]}
+
+	vrf_prepare
+	forwarding_enable
+
+	h1_create
+	h2_create
+	router_create
+}
+
+cleanup()
+{
+	pre_cleanup
+
+	router_destroy
+	h2_destroy
+	h1_destroy
+
+	forwarding_restore
+	vrf_cleanup
+}
+
+stp_test()
+{
+	devlink_trap_stats_test "STP" "stp" $MZ $h1 -c 1 -t bpdu -q
+}
+
+lacp_payload_get()
+{
+	local source_mac=$1; shift
+	local p
+
+	p=$(:
+		)"01:80:C2:00:00:02:"$(       : ETH daddr
+		)"$source_mac:"$(             : ETH saddr
+		)"88:09:"$(                   : ETH type
+		)
+	echo $p
+}
+
+lacp_test()
+{
+	local h1mac=$(mac_get $h1)
+
+	devlink_trap_stats_test "LACP" "lacp" $MZ $h1 -c 1 \
+		$(lacp_payload_get $h1mac) -p 100 -q
+}
+
+lldp_payload_get()
+{
+	local source_mac=$1; shift
+	local p
+
+	p=$(:
+		)"01:80:C2:00:00:0E:"$(       : ETH daddr
+		)"$source_mac:"$(             : ETH saddr
+		)"88:CC:"$(                   : ETH type
+		)
+	echo $p
+}
+
+lldp_test()
+{
+	local h1mac=$(mac_get $h1)
+
+	devlink_trap_stats_test "LLDP" "lldp" $MZ $h1 -c 1 \
+		$(lldp_payload_get $h1mac) -p 100 -q
+}
+
+igmp_query_test()
+{
+	# IGMP (IP Protocol 2) Membership Query (Type 0x11)
+	devlink_trap_stats_test "IGMP Membership Query" "igmp_query" \
+		$MZ $h1 -c 1 -a own -b 01:00:5E:00:00:01 \
+		-A 192.0.2.1 -B 224.0.0.1 -t ip proto=2,p=11 -p 100 -q
+}
+
+igmp_v1_report_test()
+{
+	# IGMP (IP Protocol 2) Version 1 Membership Report (Type 0x12)
+	devlink_trap_stats_test "IGMP Version 1 Membership Report" \
+		"igmp_v1_report" $MZ $h1 -c 1 -a own -b 01:00:5E:00:00:01 \
+		-A 192.0.2.1 -B 244.0.0.1 -t ip proto=2,p=12 -p 100 -q
+}
+
+igmp_v2_report_test()
+{
+	# IGMP (IP Protocol 2) Version 2 Membership Report (Type 0x16)
+	devlink_trap_stats_test "IGMP Version 2 Membership Report" \
+		"igmp_v2_report" $MZ $h1 -c 1 -a own -b 01:00:5E:00:00:01 \
+		-A 192.0.2.1 -B 244.0.0.1 -t ip proto=2,p=16 -p 100 -q
+}
+
+igmp_v3_report_test()
+{
+	# IGMP (IP Protocol 2) Version 3 Membership Report (Type 0x22)
+	devlink_trap_stats_test "IGMP Version 3 Membership Report" \
+		"igmp_v3_report" $MZ $h1 -c 1 -a own -b 01:00:5E:00:00:01 \
+		-A 192.0.2.1 -B 244.0.0.1 -t ip proto=2,p=22 -p 100 -q
+}
+
+igmp_v2_leave_test()
+{
+	# IGMP (IP Protocol 2) Version 2 Leave Group (Type 0x17)
+	devlink_trap_stats_test "IGMP Version 2 Leave Group" \
+		"igmp_v2_leave" $MZ $h1 -c 1 -a own -b 01:00:5E:00:00:02 \
+		-A 192.0.2.1 -B 224.0.0.2 -t ip proto=2,p=17 -p 100 -q
+}
+
+mld_payload_get()
+{
+	local type=$1; shift
+	local p
+
+	type=$(printf "%x" $type)
+	p=$(:
+		)"3A:"$(			: Next Header - ICMPv6
+		)"00:"$(			: Hdr Ext Len
+		)"00:00:00:00:00:00:"$(		: Options and Padding
+		)"$type:"$(			: ICMPv6.type
+		)"00:"$(			: ICMPv6.code
+		)"00:"$(			: ICMPv6.checksum
+		)
+	echo $p
+}
+
+mld_query_test()
+{
+	# MLD Multicast Listener Query (Type 130)
+	devlink_trap_stats_test "MLD Multicast Listener Query" "mld_query" \
+		$MZ $h1 -6 -c 1 -A fe80::1 -B ff02::1 \
+		-t ip hop=1,next=0,payload=$(mld_payload_get 130) -p 100 -q
+}
+
+mld_v1_report_test()
+{
+	# MLD Version 1 Multicast Listener Report (Type 131)
+	devlink_trap_stats_test "MLD Version 1 Multicast Listener Report" \
+		"mld_v1_report" $MZ $h1 -6 -c 1 -A fe80::1 -B ff02::16 \
+		-t ip hop=1,next=0,payload=$(mld_payload_get 131) -p 100 -q
+}
+
+mld_v2_report_test()
+{
+	# MLD Version 2 Multicast Listener Report (Type 143)
+	devlink_trap_stats_test "MLD Version 2 Multicast Listener Report" \
+		"mld_v2_report" $MZ $h1 -6 -c 1 -A fe80::1 -B ff02::16 \
+		-t ip hop=1,next=0,payload=$(mld_payload_get 143) -p 100 -q
+}
+
+mld_v1_done_test()
+{
+	# MLD Version 1 Multicast Listener Done (Type 132)
+	devlink_trap_stats_test "MLD Version 1 Multicast Listener Done" \
+		"mld_v1_done" $MZ $h1 -6 -c 1 -A fe80::1 -B ff02::16 \
+		-t ip hop=1,next=0,payload=$(mld_payload_get 132) -p 100 -q
+}
+
+ipv4_dhcp_test()
+{
+	devlink_trap_stats_test "IPv4 DHCP Port 67" "ipv4_dhcp" \
+		$MZ $h1 -c 1 -a own -b bcast -A 0.0.0.0 -B 255.255.255.255 \
+		-t udp sp=68,dp=67 -p 100 -q
+
+	devlink_trap_stats_test "IPv4 DHCP Port 68" "ipv4_dhcp" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) -A 192.0.2.1 \
+		-B 255.255.255.255 -t udp sp=67,dp=68 -p 100 -q
+}
+
+ipv6_dhcp_test()
+{
+	devlink_trap_stats_test "IPv6 DHCP Port 547" "ipv6_dhcp" \
+		$MZ $h1 -6 -c 1 -A fe80::1 -B ff02::1:2 -t udp sp=546,dp=547 \
+		-p 100 -q
+
+	devlink_trap_stats_test "IPv6 DHCP Port 546" "ipv6_dhcp" \
+		$MZ $h1 -6 -c 1 -A fe80::1 -B ff02::1:2 -t udp sp=547,dp=546 \
+		-p 100 -q
+}
+
+arp_request_test()
+{
+	devlink_trap_stats_test "ARP Request" "arp_request" \
+		$MZ $h1 -c 1 -a own -b bcast -t arp request -p 100 -q
+}
+
+arp_response_test()
+{
+	devlink_trap_stats_test "ARP Response" "arp_response" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) -t arp reply -p 100 -q
+}
+
+icmpv6_header_get()
+{
+	local type=$1; shift
+	local p
+
+	type=$(printf "%x" $type)
+	p=$(:
+		)"$type:"$(			: ICMPv6.type
+		)"00:"$(			: ICMPv6.code
+		)"00:"$(			: ICMPv6.checksum
+		)
+	echo $p
+}
+
+ipv6_neigh_solicit_test()
+{
+	devlink_trap_stats_test "IPv6 Neighbour Solicitation" \
+		"ipv6_neigh_solicit" $MZ $h1 -6 -c 1 \
+		-A fe80::1 -B ff02::1:ff00:02 \
+		-t ip hop=1,next=58,payload=$(icmpv6_header_get 135) -p 100 -q
+}
+
+ipv6_neigh_advert_test()
+{
+	devlink_trap_stats_test "IPv6 Neighbour Advertisement" \
+		"ipv6_neigh_advert" $MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+		-A fe80::1 -B 2001:db8:1::2 \
+		-t ip hop=1,next=58,payload=$(icmpv6_header_get 136) -p 100 -q
+}
+
+ipv4_bfd_test()
+{
+	devlink_trap_stats_test "IPv4 BFD Control - Port 3784" "ipv4_bfd" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+		-A 192.0.2.1 -B 192.0.2.2 -t udp sp=49153,dp=3784 -p 100 -q
+
+	devlink_trap_stats_test "IPv4 BFD Echo - Port 3785" "ipv4_bfd" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+		-A 192.0.2.1 -B 192.0.2.2 -t udp sp=49153,dp=3785 -p 100 -q
+}
+
+ipv6_bfd_test()
+{
+	devlink_trap_stats_test "IPv6 BFD Control - Port 3784" "ipv6_bfd" \
+		$MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+		-A 2001:db8:1::1 -B 2001:db8:1::2 \
+		-t udp sp=49153,dp=3784 -p 100 -q
+
+	devlink_trap_stats_test "IPv6 BFD Echo - Port 3785" "ipv6_bfd" \
+		$MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+		-A 2001:db8:1::1 -B 2001:db8:1::2 \
+		-t udp sp=49153,dp=3785 -p 100 -q
+}
+
+ipv4_ospf_test()
+{
+	devlink_trap_stats_test "IPv4 OSPF - Multicast" "ipv4_ospf" \
+		$MZ $h1 -c 1 -a own -b 01:00:5e:00:00:05 \
+		-A 192.0.2.1 -B 224.0.0.5 -t ip proto=89 -p 100 -q
+
+	devlink_trap_stats_test "IPv4 OSPF - Unicast" "ipv4_ospf" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+		-A 192.0.2.1 -B 192.0.2.2 -t ip proto=89 -p 100 -q
+}
+
+ipv6_ospf_test()
+{
+	devlink_trap_stats_test "IPv6 OSPF - Multicast" "ipv6_ospf" \
+		$MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:05 \
+		-A fe80::1 -B ff02::5 -t ip next=89 -p 100 -q
+
+	devlink_trap_stats_test "IPv6 OSPF - Unicast" "ipv6_ospf" \
+		$MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+		-A 2001:db8:1::1 -B 2001:db8:1::2 -t ip next=89 -p 100 -q
+}
+
+ipv4_bgp_test()
+{
+	devlink_trap_stats_test "IPv4 BGP" "ipv4_bgp" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+		-A 192.0.2.1 -B 192.0.2.2 -t tcp sp=54321,dp=179,flags=rst \
+		-p 100 -q
+}
+
+ipv6_bgp_test()
+{
+	devlink_trap_stats_test "IPv6 BGP" "ipv6_bgp" \
+		$MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+		-A 2001:db8:1::1 -B 2001:db8:1::2 \
+		-t tcp sp=54321,dp=179,flags=rst -p 100 -q
+}
+
+ipv4_vrrp_test()
+{
+	devlink_trap_stats_test "IPv4 VRRP" "ipv4_vrrp" \
+		$MZ $h1 -c 1 -a own -b 01:00:5e:00:00:12 \
+		-A 192.0.2.1 -B 224.0.0.18 -t ip proto=112 -p 100 -q
+}
+
+ipv6_vrrp_test()
+{
+	devlink_trap_stats_test "IPv6 VRRP" "ipv6_vrrp" \
+		$MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:12 \
+		-A fe80::1 -B ff02::12 -t ip next=112 -p 100 -q
+}
+
+ipv4_pim_test()
+{
+	devlink_trap_stats_test "IPv4 PIM - Multicast" "ipv4_pim" \
+		$MZ $h1 -c 1 -a own -b 01:00:5e:00:00:0d \
+		-A 192.0.2.1 -B 224.0.0.13 -t ip proto=103 -p 100 -q
+
+	devlink_trap_stats_test "IPv4 PIM - Unicast" "ipv4_pim" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+		-A 192.0.2.1 -B 192.0.2.2 -t ip proto=103 -p 100 -q
+}
+
+ipv6_pim_test()
+{
+	devlink_trap_stats_test "IPv6 PIM - Multicast" "ipv6_pim" \
+		$MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:0d \
+		-A fe80::1 -B ff02::d -t ip next=103 -p 100 -q
+
+	devlink_trap_stats_test "IPv6 PIM - Unicast" "ipv6_pim" \
+		$MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+		-A fe80::1 -B 2001:db8:1::2 -t ip next=103 -p 100 -q
+}
+
+uc_loopback_test()
+{
+	# Add neighbours to the fake destination IPs, so that the packets are
+	# routed in the device and not trapped due to an unresolved neighbour
+	# exception.
+	ip -4 neigh add 192.0.2.3 lladdr 00:11:22:33:44:55 nud permanent \
+		dev $rp1
+	ip -6 neigh add 2001:db8:1::3 lladdr 00:11:22:33:44:55 nud permanent \
+		dev $rp1
+
+	devlink_trap_stats_test "IPv4 Unicast Loopback" "uc_loopback" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+		-A 192.0.2.1 -B 192.0.2.3 -t udp sp=54321,dp=12345 -p 100 -q
+
+	devlink_trap_stats_test "IPv6 Unicast Loopback" "uc_loopback" \
+		$MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+		-A 2001:db8:1::1 -B 2001:db8:1::3 -t udp sp=54321,dp=12345 \
+		-p 100 -q
+
+	ip -6 neigh del 2001:db8:1::3 dev $rp1
+	ip -4 neigh del 192.0.2.3 dev $rp1
+}
+
+local_route_test()
+{
+	# Use a fake source IP to prevent the trap from being triggered twice
+	# when the router sends back a port unreachable message.
+	devlink_trap_stats_test "IPv4 Local Route" "local_route" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+		-A 192.0.2.3 -B 192.0.2.2 -t udp sp=54321,dp=12345 -p 100 -q
+
+	devlink_trap_stats_test "IPv6 Local Route" "local_route" \
+		$MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+		-A 2001:db8:1::3 -B 2001:db8:1::2 -t udp sp=54321,sp=12345 \
+		-p 100 -q
+}
+
+external_route_test()
+{
+	# Add a dummy device through which the incoming packets should be
+	# routed.
+	ip link add name dummy10 up type dummy
+	ip address add 203.0.113.1/24 dev dummy10
+	ip -6 address add 2001:db8:10::1/64 dev dummy10
+
+	devlink_trap_stats_test "IPv4 External Route" "external_route" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+		-A 192.0.2.1 -B 203.0.113.2 -t udp sp=54321,dp=12345 -p 100 -q
+
+	devlink_trap_stats_test "IPv6 External Route" "external_route" \
+		$MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+		-A 2001:db8:1::1 -B 2001:db8:10::2 -t udp sp=54321,sp=12345 \
+		-p 100 -q
+
+	ip -6 address del 2001:db8:10::1/64 dev dummy10
+	ip address del 203.0.113.1/24 dev dummy10
+	ip link del dev dummy10
+}
+
+ipv6_uc_dip_link_local_scope_test()
+{
+	# Add a dummy link-local prefix route to allow the packet to be routed.
+	ip -6 route add fe80:1::/64 dev $rp2
+
+	devlink_trap_stats_test \
+		"IPv6 Unicast Destination IP With Link-Local Scope" \
+		"ipv6_uc_dip_link_local_scope" \
+		$MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+		-A fe80::1 -B fe80:1::2 -t udp sp=54321,sp=12345 \
+		-p 100 -q
+
+	ip -6 route del fe80:1::/64 dev $rp2
+}
+
+ipv4_router_alert_get()
+{
+	local p
+
+	# https://en.wikipedia.org/wiki/IPv4#Options
+	p=$(:
+		)"94:"$(			: Option Number
+		)"04:"$(			: Option Length
+		)"00:00:"$(			: Option Data
+		)
+	echo $p
+}
+
+ipv4_router_alert_test()
+{
+	devlink_trap_stats_test "IPv4 Router Alert" "ipv4_router_alert" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+		-A 192.0.2.1 -B 198.51.100.3 \
+		-t ip option=$(ipv4_router_alert_get) -p 100 -q
+}
+
+ipv6_router_alert_get()
+{
+	local p
+
+	# https://en.wikipedia.org/wiki/IPv6_packet#Hop-by-hop_options_and_destination_options
+	# https://tools.ietf.org/html/rfc2711#section-2.1
+	p=$(:
+		)"11:"$(			: Next Header - UDP
+		)"00:"$(			: Hdr Ext Len
+		)"05:02:00:00:00:00:"$(		: Option Data
+		)
+	echo $p
+}
+
+ipv6_router_alert_test()
+{
+	devlink_trap_stats_test "IPv6 Router Alert" "ipv6_router_alert" \
+		$MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+		-A 2001:db8:1::1 -B 2001:db8:1::3 \
+		-t ip next=0,payload=$(ipv6_router_alert_get) -p 100 -q
+}
+
+ipv6_dip_all_nodes_test()
+{
+	devlink_trap_stats_test "IPv6 Destination IP \"All Nodes Address\"" \
+		"ipv6_dip_all_nodes" \
+		$MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:01 \
+		-A 2001:db8:1::1 -B ff02::1 -t udp sp=12345,dp=54321 -p 100 -q
+}
+
+ipv6_dip_all_routers_test()
+{
+	devlink_trap_stats_test "IPv6 Destination IP \"All Routers Address\"" \
+		"ipv6_dip_all_routers" \
+		$MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:02 \
+		-A 2001:db8:1::1 -B ff02::2 -t udp sp=12345,dp=54321 -p 100 -q
+}
+
+ipv6_router_solicit_test()
+{
+	devlink_trap_stats_test "IPv6 Router Solicitation" \
+		"ipv6_router_solicit" \
+		$MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:02 \
+		-A fe80::1 -B ff02::2 \
+		-t ip hop=1,next=58,payload=$(icmpv6_header_get 133) -p 100 -q
+}
+
+ipv6_router_advert_test()
+{
+	devlink_trap_stats_test "IPv6 Router Advertisement" \
+		"ipv6_router_advert" \
+		$MZ $h1 -6 -c 1 -a own -b 33:33:00:00:00:01 \
+		-A fe80::1 -B ff02::1 \
+		-t ip hop=1,next=58,payload=$(icmpv6_header_get 134) -p 100 -q
+}
+
+ipv6_redirect_test()
+{
+	devlink_trap_stats_test "IPv6 Redirect Message" \
+		"ipv6_redirect" \
+		$MZ $h1 -6 -c 1 -a own -b $(mac_get $rp1) \
+		-A fe80::1 -B 2001:db8:1::2 \
+		-t ip hop=1,next=58,payload=$(icmpv6_header_get 137) -p 100 -q
+}
+
+ptp_event_test()
+{
+	# PTP is only supported on Spectrum-1, for now.
+	[[ "$DEVLINK_VIDDID" != "15b3:cb84" ]] && return
+
+	# PTP Sync (0)
+	devlink_trap_stats_test "PTP Time-Critical Event Message" "ptp_event" \
+		$MZ $h1 -c 1 -a own -b 01:00:5e:00:01:81 \
+		-A 192.0.2.1 -B 224.0.1.129 \
+		-t udp sp=12345,dp=319,payload=10 -p 100 -q
+}
+
+ptp_general_test()
+{
+	# PTP is only supported on Spectrum-1, for now.
+	[[ "$DEVLINK_VIDDID" != "15b3:cb84" ]] && return
+
+	# PTP Announce (b)
+	devlink_trap_stats_test "PTP General Message" "ptp_general" \
+		$MZ $h1 -c 1 -a own -b 01:00:5e:00:01:81 \
+		-A 192.0.2.1 -B 224.0.1.129 \
+		-t udp sp=12345,dp=320,payload=1b -p 100 -q
+}
+
+flow_action_sample_test()
+{
+	# Install a filter that samples every incoming packet.
+	tc qdisc add dev $rp1 clsact
+	tc filter add dev $rp1 ingress proto all pref 1 handle 101 matchall \
+		skip_sw action sample rate 1 group 1
+
+	devlink_trap_stats_test "Flow Sampling" "flow_action_sample" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+		-A 192.0.2.1 -B 198.51.100.1 -t udp sp=12345,dp=54321 -p 100 -q
+
+	tc filter del dev $rp1 ingress proto all pref 1 handle 101 matchall
+	tc qdisc del dev $rp1 clsact
+}
+
+flow_action_trap_test()
+{
+	# Install a filter that traps a specific flow.
+	tc qdisc add dev $rp1 clsact
+	tc filter add dev $rp1 ingress proto ip pref 1 handle 101 flower \
+		skip_sw ip_proto udp src_port 12345 dst_port 54321 action trap
+
+	devlink_trap_stats_test "Flow Trapping (Logging)" "flow_action_trap" \
+		$MZ $h1 -c 1 -a own -b $(mac_get $rp1) \
+		-A 192.0.2.1 -B 198.51.100.1 -t udp sp=12345,dp=54321 -p 100 -q
+
+	tc filter del dev $rp1 ingress proto ip pref 1 handle 101 flower
+	tc qdisc del dev $rp1 clsact
+}
+
+trap cleanup EXIT
+
+setup_prepare
+setup_wait
+
+tests_run
+
+exit $EXIT_STATUS
diff --git a/tools/testing/selftests/net/forwarding/devlink_lib.sh b/tools/testing/selftests/net/forwarding/devlink_lib.sh
index e27236109235..f0e6be4c09e9 100644
--- a/tools/testing/selftests/net/forwarding/devlink_lib.sh
+++ b/tools/testing/selftests/net/forwarding/devlink_lib.sh
@@ -423,6 +423,29 @@ devlink_trap_drop_cleanup()
 	tc filter del dev $dev egress protocol $proto pref $pref handle $handle flower
 }
 
+devlink_trap_stats_test()
+{
+	local test_name=$1; shift
+	local trap_name=$1; shift
+	local send_one="$@"
+	local t0_packets
+	local t1_packets
+
+	RET=0
+
+	t0_packets=$(devlink_trap_rx_packets_get $trap_name)
+
+	$send_one && sleep 1
+
+	t1_packets=$(devlink_trap_rx_packets_get $trap_name)
+
+	if [[ $t1_packets -eq $t0_packets ]]; then
+		check_err 1 "Trap stats did not increase"
+	fi
+
+	log_test "$test_name"
+}
+
 devlink_trap_policers_num_get()
 {
 	devlink -j -p trap policer show | jq '.[]["'$DEVLINK_DEV'"] | length'
-- 
cgit v1.2.3-59-g8ed1b


From 6f197fb63850b26ef8f70f1bfe5900e377910a5a Mon Sep 17 00:00:00 2001
From: Roelof Berg <rberg@berg-solutions.de>
Date: Fri, 29 May 2020 21:30:02 +0200
Subject: lan743x: Added fixed link and RGMII support

Microchip lan7431 is frequently connected to a phy. However, it
can also be directly connected to a MII remote peer without
any phy in between. For supporting such a phyless hardware setup
in Linux we utilized phylib, which supports a fixed-link
configuration via the device tree. And we added support for
defining the connection type R/GMII in the device tree.

New behavior:
-------------
. The automatic speed and duplex detection of the lan743x silicon
  between mac and phy is disabled. Instead phylib is used like in
  other typical Linux drivers. The usage of phylib allows to
  specify fixed-link parameters in the device tree.

. The device tree entry phy-connection-type is supported now with
  the modes RGMII or (G)MII (default).

Development state:
------------------
. Tested with fixed-phy configurations. Not yet tested in normal
  configurations with phy. Microchip kindly offered testing
  as soon as the Corona measures allow this.

. All review findings of Andrew Lunn are included

Example:
--------
&pcie {
	status = "okay";

	host@0 {
		reg = <0 0 0 0 0>;

		#address-cells = <3>;
		#size-cells = <2>;

		ethernet@0 {
			compatible = "weyland-yutani,noscom1", "microchip,lan743x";
			status = "okay";
			reg = <0 0 0 0 0>;
			phy-connection-type = "rgmii";

			fixed-link {
				speed = <100>;
				full-duplex;
			};
		};
	};
};

Signed-off-by: Roelof Berg <rberg@berg-solutions.de>
Signed-off-by: Andrew Lunn <andrew@lunn.ch>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/microchip/lan743x_ethtool.c |  4 +-
 drivers/net/ethernet/microchip/lan743x_main.c    | 81 +++++++++++++++++++++---
 drivers/net/ethernet/microchip/lan743x_main.h    |  6 ++
 drivers/net/ethernet/microchip/lan743x_ptp.c     |  2 +-
 4 files changed, 80 insertions(+), 13 deletions(-)

diff --git a/drivers/net/ethernet/microchip/lan743x_ethtool.c b/drivers/net/ethernet/microchip/lan743x_ethtool.c
index 3a0b289d9771..c533d06fbe3a 100644
--- a/drivers/net/ethernet/microchip/lan743x_ethtool.c
+++ b/drivers/net/ethernet/microchip/lan743x_ethtool.c
@@ -2,11 +2,11 @@
 /* Copyright (C) 2018 Microchip Technology Inc. */
 
 #include <linux/netdevice.h>
-#include "lan743x_main.h"
-#include "lan743x_ethtool.h"
 #include <linux/net_tstamp.h>
 #include <linux/pci.h>
 #include <linux/phy.h>
+#include "lan743x_main.h"
+#include "lan743x_ethtool.h"
 
 /* eeprom */
 #define LAN743X_EEPROM_MAGIC		    (0x74A5)
diff --git a/drivers/net/ethernet/microchip/lan743x_main.c b/drivers/net/ethernet/microchip/lan743x_main.c
index a43140f7b5eb..36624e3c633b 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.c
+++ b/drivers/net/ethernet/microchip/lan743x_main.c
@@ -8,7 +8,10 @@
 #include <linux/crc32.h>
 #include <linux/microchipphy.h>
 #include <linux/net_tstamp.h>
+#include <linux/of_mdio.h>
+#include <linux/of_net.h>
 #include <linux/phy.h>
+#include <linux/phy_fixed.h>
 #include <linux/rtnetlink.h>
 #include <linux/iopoll.h>
 #include <linux/crc16.h>
@@ -798,9 +801,9 @@ static int lan743x_mac_init(struct lan743x_adapter *adapter)
 
 	netdev = adapter->netdev;
 
-	/* setup auto duplex, and speed detection */
+	/* disable auto duplex, and speed detection. Phylib does that */
 	data = lan743x_csr_read(adapter, MAC_CR);
-	data |= MAC_CR_ADD_ | MAC_CR_ASD_;
+	data &= ~(MAC_CR_ADD_ | MAC_CR_ASD_);
 	data |= MAC_CR_CNTR_RST_;
 	lan743x_csr_write(adapter, MAC_CR, data);
 
@@ -946,6 +949,7 @@ static void lan743x_phy_link_status_change(struct net_device *netdev)
 {
 	struct lan743x_adapter *adapter = netdev_priv(netdev);
 	struct phy_device *phydev = netdev->phydev;
+	u32 data;
 
 	phy_print_status(phydev);
 	if (phydev->state == PHY_RUNNING) {
@@ -953,6 +957,39 @@ static void lan743x_phy_link_status_change(struct net_device *netdev)
 		int remote_advertisement = 0;
 		int local_advertisement = 0;
 
+		data = lan743x_csr_read(adapter, MAC_CR);
+
+		/* set interface mode */
+		if (phy_interface_mode_is_rgmii(adapter->phy_mode))
+			/* RGMII */
+			data &= ~MAC_CR_MII_EN_;
+		else
+			/* GMII */
+			data |= MAC_CR_MII_EN_;
+
+		/* set duplex mode */
+		if (phydev->duplex)
+			data |= MAC_CR_DPX_;
+		else
+			data &= ~MAC_CR_DPX_;
+
+		/* set bus speed */
+		switch (phydev->speed) {
+		case SPEED_10:
+			data &= ~MAC_CR_CFG_H_;
+			data &= ~MAC_CR_CFG_L_;
+		break;
+		case SPEED_100:
+			data &= ~MAC_CR_CFG_H_;
+			data |= MAC_CR_CFG_L_;
+		break;
+		case SPEED_1000:
+			data |= MAC_CR_CFG_H_;
+			data |= MAC_CR_CFG_L_;
+		break;
+		}
+		lan743x_csr_write(adapter, MAC_CR, data);
+
 		memset(&ksettings, 0, sizeof(ksettings));
 		phy_ethtool_get_link_ksettings(netdev, &ksettings);
 		local_advertisement =
@@ -980,20 +1017,44 @@ static void lan743x_phy_close(struct lan743x_adapter *adapter)
 static int lan743x_phy_open(struct lan743x_adapter *adapter)
 {
 	struct lan743x_phy *phy = &adapter->phy;
+	struct device_node *phynode;
 	struct phy_device *phydev;
 	struct net_device *netdev;
 	int ret = -EIO;
 
 	netdev = adapter->netdev;
-	phydev = phy_find_first(adapter->mdiobus);
-	if (!phydev)
-		goto return_error;
+	phynode = of_node_get(adapter->pdev->dev.of_node);
+	adapter->phy_mode = PHY_INTERFACE_MODE_GMII;
+
+	if (phynode) {
+		of_get_phy_mode(phynode, &adapter->phy_mode);
+
+		if (of_phy_is_fixed_link(phynode)) {
+			ret = of_phy_register_fixed_link(phynode);
+			if (ret) {
+				netdev_err(netdev,
+					   "cannot register fixed PHY\n");
+				of_node_put(phynode);
+				goto return_error;
+			}
+		}
+		phydev = of_phy_connect(netdev, phynode,
+					lan743x_phy_link_status_change, 0,
+					adapter->phy_mode);
+		of_node_put(phynode);
+		if (!phydev)
+			goto return_error;
+	} else {
+		phydev = phy_find_first(adapter->mdiobus);
+		if (!phydev)
+			goto return_error;
 
-	ret = phy_connect_direct(netdev, phydev,
-				 lan743x_phy_link_status_change,
-				 PHY_INTERFACE_MODE_GMII);
-	if (ret)
-		goto return_error;
+		ret = phy_connect_direct(netdev, phydev,
+					 lan743x_phy_link_status_change,
+					 adapter->phy_mode);
+		if (ret)
+			goto return_error;
+	}
 
 	/* MAC doesn't support 1000T Half */
 	phy_remove_link_mode(phydev, ETHTOOL_LINK_MODE_1000baseT_Half_BIT);
diff --git a/drivers/net/ethernet/microchip/lan743x_main.h b/drivers/net/ethernet/microchip/lan743x_main.h
index 3b02eeae5f45..c61a40411317 100644
--- a/drivers/net/ethernet/microchip/lan743x_main.h
+++ b/drivers/net/ethernet/microchip/lan743x_main.h
@@ -4,6 +4,7 @@
 #ifndef _LAN743X_H
 #define _LAN743X_H
 
+#include <linux/phy.h>
 #include "lan743x_ptp.h"
 
 #define DRIVER_AUTHOR   "Bryan Whitehead <Bryan.Whitehead@microchip.com>"
@@ -104,10 +105,14 @@
 	((value << 0) & FCT_FLOW_CTL_ON_THRESHOLD_)
 
 #define MAC_CR				(0x100)
+#define MAC_CR_MII_EN_			BIT(19)
 #define MAC_CR_EEE_EN_			BIT(17)
 #define MAC_CR_ADD_			BIT(12)
 #define MAC_CR_ASD_			BIT(11)
 #define MAC_CR_CNTR_RST_		BIT(5)
+#define MAC_CR_DPX_			BIT(3)
+#define MAC_CR_CFG_H_			BIT(2)
+#define MAC_CR_CFG_L_			BIT(1)
 #define MAC_CR_RST_			BIT(0)
 
 #define MAC_RX				(0x104)
@@ -698,6 +703,7 @@ struct lan743x_rx {
 struct lan743x_adapter {
 	struct net_device       *netdev;
 	struct mii_bus		*mdiobus;
+	phy_interface_t		phy_mode;
 	int                     msg_enable;
 #ifdef CONFIG_PM
 	u32			wolopts;
diff --git a/drivers/net/ethernet/microchip/lan743x_ptp.c b/drivers/net/ethernet/microchip/lan743x_ptp.c
index 9399f6a98748..ab6d719d40f0 100644
--- a/drivers/net/ethernet/microchip/lan743x_ptp.c
+++ b/drivers/net/ethernet/microchip/lan743x_ptp.c
@@ -2,12 +2,12 @@
 /* Copyright (C) 2018 Microchip Technology Inc. */
 
 #include <linux/netdevice.h>
-#include "lan743x_main.h"
 
 #include <linux/ptp_clock_kernel.h>
 #include <linux/module.h>
 #include <linux/pci.h>
 #include <linux/net_tstamp.h>
+#include "lan743x_main.h"
 
 #include "lan743x_ptp.h"
 
-- 
cgit v1.2.3-59-g8ed1b


From 0af413bd3e2de73bcf0742ed556be4af83c71964 Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Fri, 29 May 2020 22:13:58 +0200
Subject: flow_dissector: work around stack frame size warning

The fl_flow_key structure is around 500 bytes, so having two of them
on the stack in one function now exceeds the warning limit after an
otherwise correct change:

net/sched/cls_flower.c:298:12: error: stack frame size of 1056 bytes in function 'fl_classify' [-Werror,-Wframe-larger-than=]

I suspect the fl_classify function could be reworked to only have one
of them on the stack and modify it in place, but I could not work out
how to do that.

As a somewhat hacky workaround, move one of them into an out-of-line
function to reduce its scope. This does not necessarily reduce the stack
usage of the outer function, but at least the second copy is removed
from the stack during most of it and does not add up to whatever is
called from there.

I now see 552 bytes of stack usage for fl_classify(), plus 528 bytes
for fl_mask_lookup().

Fixes: 58cff782cc55 ("flow_dissector: Parse multiple MPLS Label Stack Entries")
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Cong Wang <xiyou.wangcong@gmail.com>
Acked-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_flower.c | 17 ++++++++---------
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 96f5999281e0..030896eadd11 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -272,14 +272,16 @@ static struct cls_fl_filter *fl_lookup_range(struct fl_flow_mask *mask,
 	return NULL;
 }
 
-static struct cls_fl_filter *fl_lookup(struct fl_flow_mask *mask,
-				       struct fl_flow_key *mkey,
-				       struct fl_flow_key *key)
+static noinline_for_stack
+struct cls_fl_filter *fl_mask_lookup(struct fl_flow_mask *mask, struct fl_flow_key *key)
 {
+	struct fl_flow_key mkey;
+
+	fl_set_masked_key(&mkey, key, mask);
 	if ((mask->flags & TCA_FLOWER_MASK_FLAGS_RANGE))
-		return fl_lookup_range(mask, mkey, key);
+		return fl_lookup_range(mask, &mkey, key);
 
-	return __fl_lookup(mask, mkey);
+	return __fl_lookup(mask, &mkey);
 }
 
 static u16 fl_ct_info_to_flower_map[] = {
@@ -299,7 +301,6 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 		       struct tcf_result *res)
 {
 	struct cls_fl_head *head = rcu_dereference_bh(tp->root);
-	struct fl_flow_key skb_mkey;
 	struct fl_flow_key skb_key;
 	struct fl_flow_mask *mask;
 	struct cls_fl_filter *f;
@@ -319,9 +320,7 @@ static int fl_classify(struct sk_buff *skb, const struct tcf_proto *tp,
 				    ARRAY_SIZE(fl_ct_info_to_flower_map));
 		skb_flow_dissect(skb, &mask->dissector, &skb_key, 0);
 
-		fl_set_masked_key(&skb_mkey, &skb_key, mask);
-
-		f = fl_lookup(mask, &skb_mkey, &skb_key);
+		f = fl_mask_lookup(mask, &skb_key);
 		if (f && !tc_skip_sw(f->flags)) {
 			*res = f->res;
 			return tcf_exts_exec(skb, &f->exts, res);
-- 
cgit v1.2.3-59-g8ed1b


From 3e1c6846b9e108740ef8a37be80314053f5dd52a Mon Sep 17 00:00:00 2001
From: Jia-Ju Bai <baijiaju1990@gmail.com>
Date: Sat, 30 May 2020 10:41:50 +0800
Subject: net: vmxnet3: fix possible buffer overflow caused by bad DMA value in
 vmxnet3_get_rss()

The value adapter->rss_conf is stored in DMA memory, and it is assigned
to rssConf, so rssConf->indTableSize can be modified at anytime by
malicious hardware. Because rssConf->indTableSize is assigned to n,
buffer overflow may occur when the code "rssConf->indTable[n]" is
executed.

To fix this possible bug, n is checked after being used.

Signed-off-by: Jia-Ju Bai <baijiaju1990@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vmxnet3/vmxnet3_ethtool.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c
index bfdda0f34b97..6acaafe169de 100644
--- a/drivers/net/vmxnet3/vmxnet3_ethtool.c
+++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c
@@ -954,6 +954,8 @@ vmxnet3_get_rss(struct net_device *netdev, u32 *p, u8 *key, u8 *hfunc)
 		*hfunc = ETH_RSS_HASH_TOP;
 	if (!p)
 		return 0;
+	if (n > UPT1_RSS_MAX_IND_TABLE_SIZE)
+		return 0;
 	while (n--)
 		p[n] = rssConf->indTable[n];
 	return 0;
-- 
cgit v1.2.3-59-g8ed1b


From 7e89ed8ab3f74e0746d3ea80537d7a06b0e27732 Mon Sep 17 00:00:00 2001
From: Horatiu Vultur <horatiu.vultur@microchip.com>
Date: Sat, 30 May 2020 18:09:46 +0000
Subject: bridge: mrp: Update MRP frame type

Replace u16/u32 with be16/be32 in the MRP frame types.
This fixes sparse warnings like:
warning: cast to restricted __be16

Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/uapi/linux/mrp_bridge.h | 22 +++++++++++-----------
 1 file changed, 11 insertions(+), 11 deletions(-)

diff --git a/include/uapi/linux/mrp_bridge.h b/include/uapi/linux/mrp_bridge.h
index 2600cdf5a284..bcad42128d62 100644
--- a/include/uapi/linux/mrp_bridge.h
+++ b/include/uapi/linux/mrp_bridge.h
@@ -55,30 +55,30 @@ struct br_mrp_end_hdr {
 };
 
 struct br_mrp_common_hdr {
-	__u16 seq_id;
+	__be16 seq_id;
 	__u8 domain[MRP_DOMAIN_UUID_LENGTH];
 };
 
 struct br_mrp_ring_test_hdr {
-	__u16 prio;
+	__be16 prio;
 	__u8 sa[ETH_ALEN];
-	__u16 port_role;
-	__u16 state;
-	__u16 transitions;
-	__u32 timestamp;
+	__be16 port_role;
+	__be16 state;
+	__be16 transitions;
+	__be32 timestamp;
 };
 
 struct br_mrp_ring_topo_hdr {
-	__u16 prio;
+	__be16 prio;
 	__u8 sa[ETH_ALEN];
-	__u16 interval;
+	__be16 interval;
 };
 
 struct br_mrp_ring_link_hdr {
 	__u8 sa[ETH_ALEN];
-	__u16 port_role;
-	__u16 interval;
-	__u16 blocked;
+	__be16 port_role;
+	__be16 interval;
+	__be16 blocked;
 };
 
 #endif
-- 
cgit v1.2.3-59-g8ed1b


From 4b3a61b030d1131dcf3633a276158a3d0a435a47 Mon Sep 17 00:00:00 2001
From: Horatiu Vultur <horatiu.vultur@microchip.com>
Date: Sat, 30 May 2020 18:09:47 +0000
Subject: bridge: mrp: Set the priority of MRP instance

Each MRP instance has a priority, a lower value means a higher priority.
The priority of MRP instance is stored in MRP_Test frame in this way
all the MRP nodes in the ring can see other nodes priority.

Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/switchdev.h        | 1 +
 include/uapi/linux/if_bridge.h | 2 ++
 net/bridge/br_mrp.c            | 3 ++-
 net/bridge/br_mrp_netlink.c    | 5 +++++
 net/bridge/br_mrp_switchdev.c  | 1 +
 net/bridge/br_private_mrp.h    | 1 +
 6 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index db519957e134..f82ef4c45f5e 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -116,6 +116,7 @@ struct switchdev_obj_mrp {
 	struct net_device *p_port;
 	struct net_device *s_port;
 	u32 ring_id;
+	u16 prio;
 };
 
 #define SWITCHDEV_OBJ_MRP(OBJ) \
diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index 5a43eb86c93b..0162c1370ecb 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -176,6 +176,7 @@ enum {
 	IFLA_BRIDGE_MRP_INSTANCE_RING_ID,
 	IFLA_BRIDGE_MRP_INSTANCE_P_IFINDEX,
 	IFLA_BRIDGE_MRP_INSTANCE_S_IFINDEX,
+	IFLA_BRIDGE_MRP_INSTANCE_PRIO,
 	__IFLA_BRIDGE_MRP_INSTANCE_MAX,
 };
 
@@ -230,6 +231,7 @@ struct br_mrp_instance {
 	__u32 ring_id;
 	__u32 p_ifindex;
 	__u32 s_ifindex;
+	__u16 prio;
 };
 
 struct br_mrp_ring_state {
diff --git a/net/bridge/br_mrp.c b/net/bridge/br_mrp.c
index 8ea59504ef47..f8fd037219fe 100644
--- a/net/bridge/br_mrp.c
+++ b/net/bridge/br_mrp.c
@@ -147,7 +147,7 @@ static struct sk_buff *br_mrp_alloc_test_skb(struct br_mrp *mrp,
 	br_mrp_skb_tlv(skb, BR_MRP_TLV_HEADER_RING_TEST, sizeof(*hdr));
 	hdr = skb_put(skb, sizeof(*hdr));
 
-	hdr->prio = cpu_to_be16(MRP_DEFAULT_PRIO);
+	hdr->prio = cpu_to_be16(mrp->prio);
 	ether_addr_copy(hdr->sa, p->br->dev->dev_addr);
 	hdr->port_role = cpu_to_be16(port_role);
 	hdr->state = cpu_to_be16(mrp->ring_state);
@@ -290,6 +290,7 @@ int br_mrp_add(struct net_bridge *br, struct br_mrp_instance *instance)
 		return -ENOMEM;
 
 	mrp->ring_id = instance->ring_id;
+	mrp->prio = instance->prio;
 
 	p = br_mrp_get_port(br, instance->p_ifindex);
 	spin_lock_bh(&br->lock);
diff --git a/net/bridge/br_mrp_netlink.c b/net/bridge/br_mrp_netlink.c
index d9de780d2ce0..8cb67d9ca44e 100644
--- a/net/bridge/br_mrp_netlink.c
+++ b/net/bridge/br_mrp_netlink.c
@@ -22,6 +22,7 @@ br_mrp_instance_policy[IFLA_BRIDGE_MRP_INSTANCE_MAX + 1] = {
 	[IFLA_BRIDGE_MRP_INSTANCE_RING_ID]	= { .type = NLA_U32 },
 	[IFLA_BRIDGE_MRP_INSTANCE_P_IFINDEX]	= { .type = NLA_U32 },
 	[IFLA_BRIDGE_MRP_INSTANCE_S_IFINDEX]	= { .type = NLA_U32 },
+	[IFLA_BRIDGE_MRP_INSTANCE_PRIO]		= { .type = NLA_U16 },
 };
 
 static int br_mrp_instance_parse(struct net_bridge *br, struct nlattr *attr,
@@ -49,6 +50,10 @@ static int br_mrp_instance_parse(struct net_bridge *br, struct nlattr *attr,
 	inst.ring_id = nla_get_u32(tb[IFLA_BRIDGE_MRP_INSTANCE_RING_ID]);
 	inst.p_ifindex = nla_get_u32(tb[IFLA_BRIDGE_MRP_INSTANCE_P_IFINDEX]);
 	inst.s_ifindex = nla_get_u32(tb[IFLA_BRIDGE_MRP_INSTANCE_S_IFINDEX]);
+	inst.prio = MRP_DEFAULT_PRIO;
+
+	if (tb[IFLA_BRIDGE_MRP_INSTANCE_PRIO])
+		inst.prio = nla_get_u16(tb[IFLA_BRIDGE_MRP_INSTANCE_PRIO]);
 
 	if (cmd == RTM_SETLINK)
 		return br_mrp_add(br, &inst);
diff --git a/net/bridge/br_mrp_switchdev.c b/net/bridge/br_mrp_switchdev.c
index 51cb1d5a24b4..3a776043bf80 100644
--- a/net/bridge/br_mrp_switchdev.c
+++ b/net/bridge/br_mrp_switchdev.c
@@ -12,6 +12,7 @@ int br_mrp_switchdev_add(struct net_bridge *br, struct br_mrp *mrp)
 		.p_port = rtnl_dereference(mrp->p_port)->dev,
 		.s_port = rtnl_dereference(mrp->s_port)->dev,
 		.ring_id = mrp->ring_id,
+		.prio = mrp->prio,
 	};
 	int err;
 
diff --git a/net/bridge/br_private_mrp.h b/net/bridge/br_private_mrp.h
index a0f53cc3ab85..558941ce2366 100644
--- a/net/bridge/br_private_mrp.h
+++ b/net/bridge/br_private_mrp.h
@@ -14,6 +14,7 @@ struct br_mrp {
 	struct net_bridge_port __rcu	*s_port;
 
 	u32				ring_id;
+	u16				prio;
 
 	enum br_mrp_ring_role_type	ring_role;
 	u8				ring_role_offloaded;
-- 
cgit v1.2.3-59-g8ed1b


From c6676e7d62cfb5cb7c1c5320a26f3634a11afdb0 Mon Sep 17 00:00:00 2001
From: Horatiu Vultur <horatiu.vultur@microchip.com>
Date: Sat, 30 May 2020 18:09:48 +0000
Subject: bridge: mrp: Add support for role MRA

A node that has the MRA role, it can behave as MRM or MRC.

Initially it starts as MRM and sends MRP_Test frames on both ring ports.
If it detects that there are MRP_Test send by another MRM, then it
checks if these frames have a lower priority than itself. In this case
it would send MRP_Nack frames to notify the other node that it needs to
stop sending MRP_Test frames.
If it receives a MRP_Nack frame then it stops sending MRP_Test frames
and starts to behave as a MRC but it would continue to monitor the
MRP_Test frames send by MRM. If at a point the MRM stops to send
MRP_Test frames it would get the MRM role and start to send MRP_Test
frames.

Signed-off-by: Horatiu Vultur <horatiu.vultur@microchip.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 include/net/switchdev.h         |   1 +
 include/uapi/linux/if_bridge.h  |   2 +
 include/uapi/linux/mrp_bridge.h |  38 ++++++++++++
 net/bridge/br_mrp.c             | 125 ++++++++++++++++++++++++++++++++++------
 net/bridge/br_mrp_netlink.c     |   6 ++
 net/bridge/br_mrp_switchdev.c   |   4 +-
 net/bridge/br_private_mrp.h     |   4 +-
 7 files changed, 159 insertions(+), 21 deletions(-)

diff --git a/include/net/switchdev.h b/include/net/switchdev.h
index f82ef4c45f5e..b8c059b4e06d 100644
--- a/include/net/switchdev.h
+++ b/include/net/switchdev.h
@@ -130,6 +130,7 @@ struct switchdev_obj_ring_test_mrp {
 	u8 max_miss;
 	u32 ring_id;
 	u32 period;
+	bool monitor;
 };
 
 #define SWITCHDEV_OBJ_RING_TEST_MRP(OBJ) \
diff --git a/include/uapi/linux/if_bridge.h b/include/uapi/linux/if_bridge.h
index 0162c1370ecb..caa6914a3e53 100644
--- a/include/uapi/linux/if_bridge.h
+++ b/include/uapi/linux/if_bridge.h
@@ -222,6 +222,7 @@ enum {
 	IFLA_BRIDGE_MRP_START_TEST_INTERVAL,
 	IFLA_BRIDGE_MRP_START_TEST_MAX_MISS,
 	IFLA_BRIDGE_MRP_START_TEST_PERIOD,
+	IFLA_BRIDGE_MRP_START_TEST_MONITOR,
 	__IFLA_BRIDGE_MRP_START_TEST_MAX,
 };
 
@@ -249,6 +250,7 @@ struct br_mrp_start_test {
 	__u32 interval;
 	__u32 max_miss;
 	__u32 period;
+	__u32 monitor;
 };
 
 struct bridge_stp_xstats {
diff --git a/include/uapi/linux/mrp_bridge.h b/include/uapi/linux/mrp_bridge.h
index bcad42128d62..84f15f48a7cb 100644
--- a/include/uapi/linux/mrp_bridge.h
+++ b/include/uapi/linux/mrp_bridge.h
@@ -11,11 +11,14 @@
 #define MRP_DOMAIN_UUID_LENGTH		16
 #define MRP_VERSION			1
 #define MRP_FRAME_PRIO			7
+#define MRP_OUI_LENGTH			3
+#define MRP_MANUFACTURE_DATA_LENGTH	2
 
 enum br_mrp_ring_role_type {
 	BR_MRP_RING_ROLE_DISABLED,
 	BR_MRP_RING_ROLE_MRC,
 	BR_MRP_RING_ROLE_MRM,
+	BR_MRP_RING_ROLE_MRA,
 };
 
 enum br_mrp_ring_state_type {
@@ -43,6 +46,13 @@ enum br_mrp_tlv_header_type {
 	BR_MRP_TLV_HEADER_RING_TOPO = 0x3,
 	BR_MRP_TLV_HEADER_RING_LINK_DOWN = 0x4,
 	BR_MRP_TLV_HEADER_RING_LINK_UP = 0x5,
+	BR_MRP_TLV_HEADER_OPTION = 0x7f,
+};
+
+enum br_mrp_sub_tlv_header_type {
+	BR_MRP_SUB_TLV_HEADER_TEST_MGR_NACK = 0x1,
+	BR_MRP_SUB_TLV_HEADER_TEST_PROPAGATE = 0x2,
+	BR_MRP_SUB_TLV_HEADER_TEST_AUTO_MGR = 0x3,
 };
 
 struct br_mrp_tlv_hdr {
@@ -50,6 +60,11 @@ struct br_mrp_tlv_hdr {
 	__u8 length;
 };
 
+struct br_mrp_sub_tlv_hdr {
+	__u8 type;
+	__u8 length;
+};
+
 struct br_mrp_end_hdr {
 	struct br_mrp_tlv_hdr hdr;
 };
@@ -81,4 +96,27 @@ struct br_mrp_ring_link_hdr {
 	__be16 blocked;
 };
 
+struct br_mrp_sub_opt_hdr {
+	__u8 type;
+	__u8 manufacture_data[MRP_MANUFACTURE_DATA_LENGTH];
+};
+
+struct br_mrp_test_mgr_nack_hdr {
+	__be16 prio;
+	__u8 sa[ETH_ALEN];
+	__be16 other_prio;
+	__u8 other_sa[ETH_ALEN];
+};
+
+struct br_mrp_test_prop_hdr {
+	__be16 prio;
+	__u8 sa[ETH_ALEN];
+	__be16 other_prio;
+	__u8 other_sa[ETH_ALEN];
+};
+
+struct br_mrp_oui_hdr {
+	__u8 oui[MRP_OUI_LENGTH];
+};
+
 #endif
diff --git a/net/bridge/br_mrp.c b/net/bridge/br_mrp.c
index f8fd037219fe..24986ec7d38c 100644
--- a/net/bridge/br_mrp.c
+++ b/net/bridge/br_mrp.c
@@ -160,6 +160,16 @@ static struct sk_buff *br_mrp_alloc_test_skb(struct br_mrp *mrp,
 	return skb;
 }
 
+/* This function is continuously called in the following cases:
+ * - when node role is MRM, in this case test_monitor is always set to false
+ *   because it needs to notify the userspace that the ring is open and needs to
+ *   send MRP_Test frames
+ * - when node role is MRA, there are 2 subcases:
+ *     - when MRA behaves as MRM, in this case is similar with MRM role
+ *     - when MRA behaves as MRC, in this case test_monitor is set to true,
+ *       because it needs to detect when it stops seeing MRP_Test frames
+ *       from MRM node but it doesn't need to send MRP_Test frames.
+ */
 static void br_mrp_test_work_expired(struct work_struct *work)
 {
 	struct delayed_work *del_work = to_delayed_work(work);
@@ -177,8 +187,14 @@ static void br_mrp_test_work_expired(struct work_struct *work)
 		/* Notify that the ring is open only if the ring state is
 		 * closed, otherwise it would continue to notify at every
 		 * interval.
+		 * Also notify that the ring is open when the node has the
+		 * role MRA and behaves as MRC. The reason is that the
+		 * userspace needs to know when the MRM stopped sending
+		 * MRP_Test frames so that the current node to try to take
+		 * the role of a MRM.
 		 */
-		if (mrp->ring_state == BR_MRP_RING_STATE_CLOSED)
+		if (mrp->ring_state == BR_MRP_RING_STATE_CLOSED ||
+		    mrp->test_monitor)
 			notify_open = true;
 	}
 
@@ -186,12 +202,15 @@ static void br_mrp_test_work_expired(struct work_struct *work)
 
 	p = rcu_dereference(mrp->p_port);
 	if (p) {
-		skb = br_mrp_alloc_test_skb(mrp, p, BR_MRP_PORT_ROLE_PRIMARY);
-		if (!skb)
-			goto out;
-
-		skb_reset_network_header(skb);
-		dev_queue_xmit(skb);
+		if (!mrp->test_monitor) {
+			skb = br_mrp_alloc_test_skb(mrp, p,
+						    BR_MRP_PORT_ROLE_PRIMARY);
+			if (!skb)
+				goto out;
+
+			skb_reset_network_header(skb);
+			dev_queue_xmit(skb);
+		}
 
 		if (notify_open && !mrp->ring_role_offloaded)
 			br_mrp_port_open(p->dev, true);
@@ -199,12 +218,15 @@ static void br_mrp_test_work_expired(struct work_struct *work)
 
 	p = rcu_dereference(mrp->s_port);
 	if (p) {
-		skb = br_mrp_alloc_test_skb(mrp, p, BR_MRP_PORT_ROLE_SECONDARY);
-		if (!skb)
-			goto out;
-
-		skb_reset_network_header(skb);
-		dev_queue_xmit(skb);
+		if (!mrp->test_monitor) {
+			skb = br_mrp_alloc_test_skb(mrp, p,
+						    BR_MRP_PORT_ROLE_SECONDARY);
+			if (!skb)
+				goto out;
+
+			skb_reset_network_header(skb);
+			dev_queue_xmit(skb);
+		}
 
 		if (notify_open && !mrp->ring_role_offloaded)
 			br_mrp_port_open(p->dev, true);
@@ -227,7 +249,7 @@ static void br_mrp_del_impl(struct net_bridge *br, struct br_mrp *mrp)
 
 	/* Stop sending MRP_Test frames */
 	cancel_delayed_work_sync(&mrp->test_work);
-	br_mrp_switchdev_send_ring_test(br, mrp, 0, 0, 0);
+	br_mrp_switchdev_send_ring_test(br, mrp, 0, 0, 0, 0);
 
 	br_mrp_switchdev_del(br, mrp);
 
@@ -452,8 +474,8 @@ int br_mrp_set_ring_role(struct net_bridge *br,
 	return 0;
 }
 
-/* Start to generate MRP test frames, the frames are generated by HW and if it
- * fails, they are generated by the SW.
+/* Start to generate or monitor MRP test frames, the frames are generated by
+ * HW and if it fails, they are generated by the SW.
  * note: already called with rtnl_lock
  */
 int br_mrp_start_test(struct net_bridge *br,
@@ -464,16 +486,18 @@ int br_mrp_start_test(struct net_bridge *br,
 	if (!mrp)
 		return -EINVAL;
 
-	/* Try to push it to the HW and if it fails then continue to generate in
-	 * SW and if that also fails then return error
+	/* Try to push it to the HW and if it fails then continue with SW
+	 * implementation and if that also fails then return error.
 	 */
 	if (!br_mrp_switchdev_send_ring_test(br, mrp, test->interval,
-					     test->max_miss, test->period))
+					     test->max_miss, test->period,
+					     test->monitor))
 		return 0;
 
 	mrp->test_interval = test->interval;
 	mrp->test_end = jiffies + usecs_to_jiffies(test->period);
 	mrp->test_max_miss = test->max_miss;
+	mrp->test_monitor = test->monitor;
 	mrp->test_count_miss = 0;
 	queue_delayed_work(system_wq, &mrp->test_work,
 			   usecs_to_jiffies(test->interval));
@@ -510,6 +534,57 @@ static void br_mrp_mrm_process(struct br_mrp *mrp, struct net_bridge_port *port,
 		br_mrp_port_open(port->dev, false);
 }
 
+/* Determin if the test hdr has a better priority than the node */
+static bool br_mrp_test_better_than_own(struct br_mrp *mrp,
+					struct net_bridge *br,
+					const struct br_mrp_ring_test_hdr *hdr)
+{
+	u16 prio = be16_to_cpu(hdr->prio);
+
+	if (prio < mrp->prio ||
+	    (prio == mrp->prio &&
+	    ether_addr_to_u64(hdr->sa) < ether_addr_to_u64(br->dev->dev_addr)))
+		return true;
+
+	return false;
+}
+
+/* Process only MRP Test frame. All the other MRP frames are processed by
+ * userspace application
+ * note: already called with rcu_read_lock
+ */
+static void br_mrp_mra_process(struct br_mrp *mrp, struct net_bridge *br,
+			       struct net_bridge_port *port,
+			       struct sk_buff *skb)
+{
+	const struct br_mrp_ring_test_hdr *test_hdr;
+	struct br_mrp_ring_test_hdr _test_hdr;
+	const struct br_mrp_tlv_hdr *hdr;
+	struct br_mrp_tlv_hdr _hdr;
+
+	/* Each MRP header starts with a version field which is 16 bits.
+	 * Therefore skip the version and get directly the TLV header.
+	 */
+	hdr = skb_header_pointer(skb, sizeof(uint16_t), sizeof(_hdr), &_hdr);
+	if (!hdr)
+		return;
+
+	if (hdr->type != BR_MRP_TLV_HEADER_RING_TEST)
+		return;
+
+	test_hdr = skb_header_pointer(skb, sizeof(uint16_t) + sizeof(_hdr),
+				      sizeof(_test_hdr), &_test_hdr);
+	if (!test_hdr)
+		return;
+
+	/* Only frames that have a better priority than the node will
+	 * clear the miss counter because otherwise the node will need to behave
+	 * as MRM.
+	 */
+	if (br_mrp_test_better_than_own(mrp, br, test_hdr))
+		mrp->test_count_miss = 0;
+}
+
 /* This will just forward the frame to the other mrp ring port(MRC role) or will
  * not do anything.
  * note: already called with rcu_read_lock
@@ -546,6 +621,18 @@ static int br_mrp_rcv(struct net_bridge_port *p,
 		return 1;
 	}
 
+	/* If the role is MRA then don't forward the frames if it behaves as
+	 * MRM node
+	 */
+	if (mrp->ring_role == BR_MRP_RING_ROLE_MRA) {
+		if (!mrp->test_monitor) {
+			br_mrp_mrm_process(mrp, p, skb);
+			return 1;
+		}
+
+		br_mrp_mra_process(mrp, br, p, skb);
+	}
+
 	/* Clone the frame and forward it on the other MRP port */
 	nskb = skb_clone(skb, GFP_ATOMIC);
 	if (!nskb)
diff --git a/net/bridge/br_mrp_netlink.c b/net/bridge/br_mrp_netlink.c
index 8cb67d9ca44e..34b3a8776991 100644
--- a/net/bridge/br_mrp_netlink.c
+++ b/net/bridge/br_mrp_netlink.c
@@ -196,6 +196,7 @@ br_mrp_start_test_policy[IFLA_BRIDGE_MRP_START_TEST_MAX + 1] = {
 	[IFLA_BRIDGE_MRP_START_TEST_INTERVAL]	= { .type = NLA_U32 },
 	[IFLA_BRIDGE_MRP_START_TEST_MAX_MISS]	= { .type = NLA_U32 },
 	[IFLA_BRIDGE_MRP_START_TEST_PERIOD]	= { .type = NLA_U32 },
+	[IFLA_BRIDGE_MRP_START_TEST_MONITOR]	= { .type = NLA_U32 },
 };
 
 static int br_mrp_start_test_parse(struct net_bridge *br, struct nlattr *attr,
@@ -225,6 +226,11 @@ static int br_mrp_start_test_parse(struct net_bridge *br, struct nlattr *attr,
 	test.interval = nla_get_u32(tb[IFLA_BRIDGE_MRP_START_TEST_INTERVAL]);
 	test.max_miss = nla_get_u32(tb[IFLA_BRIDGE_MRP_START_TEST_MAX_MISS]);
 	test.period = nla_get_u32(tb[IFLA_BRIDGE_MRP_START_TEST_PERIOD]);
+	test.monitor = false;
+
+	if (tb[IFLA_BRIDGE_MRP_START_TEST_MONITOR])
+		test.monitor =
+			nla_get_u32(tb[IFLA_BRIDGE_MRP_START_TEST_MONITOR]);
 
 	return br_mrp_start_test(br, &test);
 }
diff --git a/net/bridge/br_mrp_switchdev.c b/net/bridge/br_mrp_switchdev.c
index 3a776043bf80..0da68a0da4b5 100644
--- a/net/bridge/br_mrp_switchdev.c
+++ b/net/bridge/br_mrp_switchdev.c
@@ -65,7 +65,8 @@ int br_mrp_switchdev_set_ring_role(struct net_bridge *br,
 
 int br_mrp_switchdev_send_ring_test(struct net_bridge *br,
 				    struct br_mrp *mrp, u32 interval,
-				    u8 max_miss, u32 period)
+				    u8 max_miss, u32 period,
+				    bool monitor)
 {
 	struct switchdev_obj_ring_test_mrp test = {
 		.obj.orig_dev = br->dev,
@@ -74,6 +75,7 @@ int br_mrp_switchdev_send_ring_test(struct net_bridge *br,
 		.max_miss = max_miss,
 		.ring_id = mrp->ring_id,
 		.period = period,
+		.monitor = monitor,
 	};
 	int err;
 
diff --git a/net/bridge/br_private_mrp.h b/net/bridge/br_private_mrp.h
index 558941ce2366..33b255e38ffe 100644
--- a/net/bridge/br_private_mrp.h
+++ b/net/bridge/br_private_mrp.h
@@ -26,6 +26,7 @@ struct br_mrp {
 	unsigned long			test_end;
 	u32				test_count_miss;
 	u32				test_max_miss;
+	bool				test_monitor;
 
 	u32				seq_id;
 
@@ -52,7 +53,8 @@ int br_mrp_switchdev_set_ring_role(struct net_bridge *br, struct br_mrp *mrp,
 int br_mrp_switchdev_set_ring_state(struct net_bridge *br, struct br_mrp *mrp,
 				    enum br_mrp_ring_state_type state);
 int br_mrp_switchdev_send_ring_test(struct net_bridge *br, struct br_mrp *mrp,
-				    u32 interval, u8 max_miss, u32 period);
+				    u32 interval, u8 max_miss, u32 period,
+				    bool monitor);
 int br_mrp_port_switchdev_set_state(struct net_bridge_port *p,
 				    enum br_mrp_port_state_type state);
 int br_mrp_port_switchdev_set_role(struct net_bridge_port *p,
-- 
cgit v1.2.3-59-g8ed1b


From 4e4f4ce6abf5f6a8df0561776d3a790d60d519d0 Mon Sep 17 00:00:00 2001
From: Guillaume Nault <gnault@redhat.com>
Date: Sat, 30 May 2020 20:49:56 +0200
Subject: cls_flower: remove mpls_opts_policy
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Compiling with W=1 gives the following warning:
net/sched/cls_flower.c:731:1: warning: ‘mpls_opts_policy’ defined but not used [-Wunused-const-variable=]

The TCA_FLOWER_KEY_MPLS_OPTS contains a list of
TCA_FLOWER_KEY_MPLS_OPTS_LSE. Therefore, the attributes all have the
same type and we can't parse the list with nla_parse*() and have the
attributes validated automatically using an nla_policy.

fl_set_key_mpls_opts() properly verifies that all attributes in the
list are TCA_FLOWER_KEY_MPLS_OPTS_LSE. Then fl_set_key_mpls_lse()
uses nla_parse_nested() on all these attributes, thus verifying that
they have the NLA_F_NESTED flag. So we can safely drop the
mpls_opts_policy.

Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Guillaume Nault <gnault@redhat.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/sched/cls_flower.c | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/net/sched/cls_flower.c b/net/sched/cls_flower.c
index 030896eadd11..b2da37286082 100644
--- a/net/sched/cls_flower.c
+++ b/net/sched/cls_flower.c
@@ -726,11 +726,6 @@ erspan_opt_policy[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_MAX + 1] = {
 	[TCA_FLOWER_KEY_ENC_OPT_ERSPAN_HWID]       = { .type = NLA_U8 },
 };
 
-static const struct nla_policy
-mpls_opts_policy[TCA_FLOWER_KEY_MPLS_OPTS_MAX + 1] = {
-	[TCA_FLOWER_KEY_MPLS_OPTS_LSE]    = { .type = NLA_NESTED },
-};
-
 static const struct nla_policy
 mpls_stack_entry_policy[TCA_FLOWER_KEY_MPLS_OPT_LSE_MAX + 1] = {
 	[TCA_FLOWER_KEY_MPLS_OPT_LSE_DEPTH]    = { .type = NLA_U8 },
-- 
cgit v1.2.3-59-g8ed1b


From 96aa1b22bd6bb9fccf62f6261f390ed6f3e7967f Mon Sep 17 00:00:00 2001
From: Willem de Bruijn <willemb@google.com>
Date: Sat, 30 May 2020 15:41:31 -0400
Subject: tun: correct header offsets in napi frags mode

Tun in IFF_NAPI_FRAGS mode calls napi_gro_frags. Unlike netif_rx and
netif_gro_receive, this expects skb->data to point to the mac layer.

But skb_probe_transport_header, __skb_get_hash_symmetric, and
xdp_do_generic in tun_get_user need skb->data to point to the network
header. Flow dissection also needs skb->protocol set, so
eth_type_trans has to be called.

Ensure the link layer header lies in linear as eth_type_trans pulls
ETH_HLEN. Then take the same code paths for frags as for not frags.
Push the link layer header back just before calling napi_gro_frags.

By pulling up to ETH_HLEN from frag0 into linear, this disables the
frag0 optimization in the special case when IFF_NAPI_FRAGS is used
with zero length iov[0] (and thus empty skb->linear).

Fixes: 90e33d459407 ("tun: enable napi_gro_frags() for TUN/TAP driver")
Signed-off-by: Willem de Bruijn <willemb@google.com>
Acked-by: Petar Penkov <ppenkov@google.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/tun.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index c54f967e2c66..b0ab882c021e 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1872,8 +1872,11 @@ drop:
 		skb->dev = tun->dev;
 		break;
 	case IFF_TAP:
-		if (!frags)
-			skb->protocol = eth_type_trans(skb, tun->dev);
+		if (frags && !pskb_may_pull(skb, ETH_HLEN)) {
+			err = -ENOMEM;
+			goto drop;
+		}
+		skb->protocol = eth_type_trans(skb, tun->dev);
 		break;
 	}
 
@@ -1930,9 +1933,12 @@ drop:
 	}
 
 	if (frags) {
+		u32 headlen;
+
 		/* Exercise flow dissector code path. */
-		u32 headlen = eth_get_headlen(tun->dev, skb->data,
-					      skb_headlen(skb));
+		skb_push(skb, ETH_HLEN);
+		headlen = eth_get_headlen(tun->dev, skb->data,
+					  skb_headlen(skb));
 
 		if (unlikely(headlen > skb_headlen(skb))) {
 			this_cpu_inc(tun->pcpu_stats->rx_dropped);
-- 
cgit v1.2.3-59-g8ed1b


From 3190ca3b5f51a0e471ee3f04c898401c81b00385 Mon Sep 17 00:00:00 2001
From: Michael Walle <michael@walle.cc>
Date: Sat, 30 May 2020 22:34:04 +0200
Subject: net: phy: broadcom: don't export RDB/legacy access methods

Don't export __bcm_phy_enable_rdb_access() and
__bcm_phy_enable_legacy_access() functions. They aren't used outside this
module and it was forgotten to provide a prototype for these functions.
Just make them static for now.

Fixes: 11ecf8c55b91 ("net: phy: broadcom: add cable test support")
Reported-by: kbuild test robot <lkp@intel.com>
Signed-off-by: Michael Walle <michael@walle.cc>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/phy/bcm-phy-lib.c | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/drivers/net/phy/bcm-phy-lib.c b/drivers/net/phy/bcm-phy-lib.c
index cb92786e3ded..ef6825b30323 100644
--- a/drivers/net/phy/bcm-phy-lib.c
+++ b/drivers/net/phy/bcm-phy-lib.c
@@ -583,18 +583,16 @@ int bcm_phy_enable_jumbo(struct phy_device *phydev)
 }
 EXPORT_SYMBOL_GPL(bcm_phy_enable_jumbo);
 
-int __bcm_phy_enable_rdb_access(struct phy_device *phydev)
+static int __bcm_phy_enable_rdb_access(struct phy_device *phydev)
 {
 	return __bcm_phy_write_exp(phydev, BCM54XX_EXP_REG7E, 0);
 }
-EXPORT_SYMBOL_GPL(__bcm_phy_enable_rdb_access);
 
-int __bcm_phy_enable_legacy_access(struct phy_device *phydev)
+static int __bcm_phy_enable_legacy_access(struct phy_device *phydev)
 {
 	return __bcm_phy_write_rdb(phydev, BCM54XX_RDB_REG0087,
 				   BCM54XX_ACCESS_MODE_LEGACY_EN);
 }
-EXPORT_SYMBOL_GPL(__bcm_phy_enable_legacy_access);
 
 static int _bcm_phy_cable_test_start(struct phy_device *phydev, bool is_rdb)
 {
-- 
cgit v1.2.3-59-g8ed1b


From 685e39eaf4b5bf68167c799fe683e26cdc43a5ea Mon Sep 17 00:00:00 2001
From: Ioana Radulescu <ruxandra.radulescu@nxp.com>
Date: Sun, 31 May 2020 00:08:08 +0300
Subject: dpaa2-eth: Add support for Rx traffic classes

The firmware reserves for each DPNI a number of RX frame queues
equal to the number of configured flows x number of configured
traffic classes.

Current driver configuration directs all incoming traffic to
FQs corresponding to TC0, leaving all other priority levels unused.

Start adding support for multiple ingress traffic classes, by
configuring the FQs associated with all priority levels, not just
TC0. All settings that are per-TC, such as those related to
hashing and flow steering, are also updated.

Signed-off-by: Ioana Radulescu <ruxandra.radulescu@nxp.com>
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../ethernet/freescale/dpaa2/dpaa2-eth-debugfs.c   |  7 ++-
 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c   | 70 +++++++++++++++-------
 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h   |  4 +-
 .../net/ethernet/freescale/dpaa2/dpaa2-ethtool.c   | 19 ++++--
 4 files changed, 68 insertions(+), 32 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.c
index 0a31e4268dfb..c453a23045c1 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-debugfs.c
@@ -81,8 +81,8 @@ static int dpaa2_dbg_fqs_show(struct seq_file *file, void *offset)
 	int i, err;
 
 	seq_printf(file, "FQ stats for %s:\n", priv->net_dev->name);
-	seq_printf(file, "%s%16s%16s%16s%16s\n",
-		   "VFQID", "CPU", "Type", "Frames", "Pending frames");
+	seq_printf(file, "%s%16s%16s%16s%16s%16s\n",
+		   "VFQID", "CPU", "TC", "Type", "Frames", "Pending frames");
 
 	for (i = 0; i <  priv->num_fqs; i++) {
 		fq = &priv->fq[i];
@@ -90,9 +90,10 @@ static int dpaa2_dbg_fqs_show(struct seq_file *file, void *offset)
 		if (err)
 			fcnt = 0;
 
-		seq_printf(file, "%5d%16d%16s%16llu%16u\n",
+		seq_printf(file, "%5d%16d%16d%16s%16llu%16u\n",
 			   fq->fqid,
 			   fq->target_cpu,
+			   fq->tc,
 			   fq_type_to_str(fq),
 			   fq->stats.frames,
 			   fcnt);
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index fe3806d54630..01263e247d39 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -1290,6 +1290,7 @@ static void disable_ch_napi(struct dpaa2_eth_priv *priv)
 static void dpaa2_eth_set_rx_taildrop(struct dpaa2_eth_priv *priv, bool enable)
 {
 	struct dpni_taildrop td = {0};
+	struct dpaa2_eth_fq *fq;
 	int i, err;
 
 	if (priv->rx_td_enabled == enable)
@@ -1299,11 +1300,12 @@ static void dpaa2_eth_set_rx_taildrop(struct dpaa2_eth_priv *priv, bool enable)
 	td.threshold = DPAA2_ETH_TAILDROP_THRESH;
 
 	for (i = 0; i < priv->num_fqs; i++) {
-		if (priv->fq[i].type != DPAA2_RX_FQ)
+		fq = &priv->fq[i];
+		if (fq->type != DPAA2_RX_FQ)
 			continue;
 		err = dpni_set_taildrop(priv->mc_io, 0, priv->mc_token,
-					DPNI_CP_QUEUE, DPNI_QUEUE_RX, 0,
-					priv->fq[i].flowid, &td);
+					DPNI_CP_QUEUE, DPNI_QUEUE_RX,
+					fq->tc, fq->flowid, &td);
 		if (err) {
 			netdev_err(priv->net_dev,
 				   "dpni_set_taildrop() failed\n");
@@ -2407,7 +2409,7 @@ static void set_fq_affinity(struct dpaa2_eth_priv *priv)
 
 static void setup_fqs(struct dpaa2_eth_priv *priv)
 {
-	int i;
+	int i, j;
 
 	/* We have one TxConf FQ per Tx flow.
 	 * The number of Tx and Rx queues is the same.
@@ -2419,10 +2421,13 @@ static void setup_fqs(struct dpaa2_eth_priv *priv)
 		priv->fq[priv->num_fqs++].flowid = (u16)i;
 	}
 
-	for (i = 0; i < dpaa2_eth_queue_count(priv); i++) {
-		priv->fq[priv->num_fqs].type = DPAA2_RX_FQ;
-		priv->fq[priv->num_fqs].consume = dpaa2_eth_rx;
-		priv->fq[priv->num_fqs++].flowid = (u16)i;
+	for (j = 0; j < dpaa2_eth_tc_count(priv); j++) {
+		for (i = 0; i < dpaa2_eth_queue_count(priv); i++) {
+			priv->fq[priv->num_fqs].type = DPAA2_RX_FQ;
+			priv->fq[priv->num_fqs].consume = dpaa2_eth_rx;
+			priv->fq[priv->num_fqs].tc = (u8)j;
+			priv->fq[priv->num_fqs++].flowid = (u16)i;
+		}
 	}
 
 	/* For each FQ, decide on which core to process incoming frames */
@@ -2789,7 +2794,7 @@ static int setup_rx_flow(struct dpaa2_eth_priv *priv,
 	int err;
 
 	err = dpni_get_queue(priv->mc_io, 0, priv->mc_token,
-			     DPNI_QUEUE_RX, 0, fq->flowid, &queue, &qid);
+			     DPNI_QUEUE_RX, fq->tc, fq->flowid, &queue, &qid);
 	if (err) {
 		dev_err(dev, "dpni_get_queue(RX) failed\n");
 		return err;
@@ -2802,7 +2807,7 @@ static int setup_rx_flow(struct dpaa2_eth_priv *priv,
 	queue.destination.priority = 1;
 	queue.user_context = (u64)(uintptr_t)fq;
 	err = dpni_set_queue(priv->mc_io, 0, priv->mc_token,
-			     DPNI_QUEUE_RX, 0, fq->flowid,
+			     DPNI_QUEUE_RX, fq->tc, fq->flowid,
 			     DPNI_QUEUE_OPT_USER_CTX | DPNI_QUEUE_OPT_DEST,
 			     &queue);
 	if (err) {
@@ -2811,6 +2816,10 @@ static int setup_rx_flow(struct dpaa2_eth_priv *priv,
 	}
 
 	/* xdp_rxq setup */
+	/* only once for each channel */
+	if (fq->tc > 0)
+		return 0;
+
 	err = xdp_rxq_info_reg(&fq->channel->xdp_rxq, priv->net_dev,
 			       fq->flowid);
 	if (err) {
@@ -2948,7 +2957,7 @@ static int config_legacy_hash_key(struct dpaa2_eth_priv *priv, dma_addr_t key)
 {
 	struct device *dev = priv->net_dev->dev.parent;
 	struct dpni_rx_tc_dist_cfg dist_cfg;
-	int err;
+	int i, err = 0;
 
 	memset(&dist_cfg, 0, sizeof(dist_cfg));
 
@@ -2956,9 +2965,14 @@ static int config_legacy_hash_key(struct dpaa2_eth_priv *priv, dma_addr_t key)
 	dist_cfg.dist_size = dpaa2_eth_queue_count(priv);
 	dist_cfg.dist_mode = DPNI_DIST_MODE_HASH;
 
-	err = dpni_set_rx_tc_dist(priv->mc_io, 0, priv->mc_token, 0, &dist_cfg);
-	if (err)
-		dev_err(dev, "dpni_set_rx_tc_dist failed\n");
+	for (i = 0; i < dpaa2_eth_tc_count(priv); i++) {
+		err = dpni_set_rx_tc_dist(priv->mc_io, 0, priv->mc_token,
+					  i, &dist_cfg);
+		if (err) {
+			dev_err(dev, "dpni_set_rx_tc_dist failed\n");
+			break;
+		}
+	}
 
 	return err;
 }
@@ -2968,7 +2982,7 @@ static int config_hash_key(struct dpaa2_eth_priv *priv, dma_addr_t key)
 {
 	struct device *dev = priv->net_dev->dev.parent;
 	struct dpni_rx_dist_cfg dist_cfg;
-	int err;
+	int i, err = 0;
 
 	memset(&dist_cfg, 0, sizeof(dist_cfg));
 
@@ -2976,9 +2990,15 @@ static int config_hash_key(struct dpaa2_eth_priv *priv, dma_addr_t key)
 	dist_cfg.dist_size = dpaa2_eth_queue_count(priv);
 	dist_cfg.enable = 1;
 
-	err = dpni_set_rx_hash_dist(priv->mc_io, 0, priv->mc_token, &dist_cfg);
-	if (err)
-		dev_err(dev, "dpni_set_rx_hash_dist failed\n");
+	for (i = 0; i < dpaa2_eth_tc_count(priv); i++) {
+		dist_cfg.tc = i;
+		err = dpni_set_rx_hash_dist(priv->mc_io, 0, priv->mc_token,
+					    &dist_cfg);
+		if (err) {
+			dev_err(dev, "dpni_set_rx_hash_dist failed\n");
+			break;
+		}
+	}
 
 	return err;
 }
@@ -2988,7 +3008,7 @@ static int config_cls_key(struct dpaa2_eth_priv *priv, dma_addr_t key)
 {
 	struct device *dev = priv->net_dev->dev.parent;
 	struct dpni_rx_dist_cfg dist_cfg;
-	int err;
+	int i, err = 0;
 
 	memset(&dist_cfg, 0, sizeof(dist_cfg));
 
@@ -2996,9 +3016,15 @@ static int config_cls_key(struct dpaa2_eth_priv *priv, dma_addr_t key)
 	dist_cfg.dist_size = dpaa2_eth_queue_count(priv);
 	dist_cfg.enable = 1;
 
-	err = dpni_set_rx_fs_dist(priv->mc_io, 0, priv->mc_token, &dist_cfg);
-	if (err)
-		dev_err(dev, "dpni_set_rx_fs_dist failed\n");
+	for (i = 0; i < dpaa2_eth_tc_count(priv); i++) {
+		dist_cfg.tc = i;
+		err = dpni_set_rx_fs_dist(priv->mc_io, 0, priv->mc_token,
+					  &dist_cfg);
+		if (err) {
+			dev_err(dev, "dpni_set_rx_fs_dist failed\n");
+			break;
+		}
+	}
 
 	return err;
 }
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
index 0581fbf1f98c..580ad5fd7bd8 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
@@ -294,7 +294,9 @@ struct dpaa2_eth_ch_stats {
 
 /* Maximum number of queues associated with a DPNI */
 #define DPAA2_ETH_MAX_TCS		8
-#define DPAA2_ETH_MAX_RX_QUEUES		16
+#define DPAA2_ETH_MAX_RX_QUEUES_PER_TC	16
+#define DPAA2_ETH_MAX_RX_QUEUES		\
+	(DPAA2_ETH_MAX_RX_QUEUES_PER_TC * DPAA2_ETH_MAX_TCS)
 #define DPAA2_ETH_MAX_TX_QUEUES		16
 #define DPAA2_ETH_MAX_QUEUES		(DPAA2_ETH_MAX_RX_QUEUES + \
 					DPAA2_ETH_MAX_TX_QUEUES)
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
index 049afd1d6252..8bf169783bea 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
@@ -547,7 +547,7 @@ static int do_cls_rule(struct net_device *net_dev,
 	dma_addr_t key_iova;
 	u64 fields = 0;
 	void *key_buf;
-	int err;
+	int i, err;
 
 	if (fs->ring_cookie != RX_CLS_FLOW_DISC &&
 	    fs->ring_cookie >= dpaa2_eth_queue_count(priv))
@@ -607,11 +607,18 @@ static int do_cls_rule(struct net_device *net_dev,
 			fs_act.options |= DPNI_FS_OPT_DISCARD;
 		else
 			fs_act.flow_id = fs->ring_cookie;
-		err = dpni_add_fs_entry(priv->mc_io, 0, priv->mc_token, 0,
-					fs->location, &rule_cfg, &fs_act);
-	} else {
-		err = dpni_remove_fs_entry(priv->mc_io, 0, priv->mc_token, 0,
-					   &rule_cfg);
+	}
+	for (i = 0; i < dpaa2_eth_tc_count(priv); i++) {
+		if (add)
+			err = dpni_add_fs_entry(priv->mc_io, 0, priv->mc_token,
+						i, fs->location, &rule_cfg,
+						&fs_act);
+		else
+			err = dpni_remove_fs_entry(priv->mc_io, 0,
+						   priv->mc_token, i,
+						   &rule_cfg);
+		if (err)
+			break;
 	}
 
 	dma_unmap_single(dev, key_iova, rule_cfg.key_size * 2, DMA_TO_DEVICE);
-- 
cgit v1.2.3-59-g8ed1b


From 6aa90fe2d96745b63d4ccc74c0c37b90d31b699e Mon Sep 17 00:00:00 2001
From: Ioana Radulescu <ruxandra.radulescu@nxp.com>
Date: Sun, 31 May 2020 00:08:09 +0300
Subject: dpaa2-eth: Distribute ingress frames based on VLAN prio

Configure static ingress classification based on VLAN PCP field.
If the DPNI doesn't have enough traffic classes to accommodate all
priority levels, the lowest ones end up on TC 0 (default on miss).

Signed-off-by: Ioana Radulescu <ruxandra.radulescu@nxp.com>
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 116 ++++++++++++++++++++
 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h |   1 +
 drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h  |  34 ++++++
 drivers/net/ethernet/freescale/dpaa2/dpni.c      | 131 +++++++++++++++++++++++
 drivers/net/ethernet/freescale/dpaa2/dpni.h      |  36 +++++++
 5 files changed, 318 insertions(+)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index 01263e247d39..3bf5df92ecfa 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -2696,6 +2696,118 @@ out_err:
 	priv->enqueue = dpaa2_eth_enqueue_qd;
 }
 
+/* Configure ingress classification based on VLAN PCP */
+static int set_vlan_qos(struct dpaa2_eth_priv *priv)
+{
+	struct device *dev = priv->net_dev->dev.parent;
+	struct dpkg_profile_cfg kg_cfg = {0};
+	struct dpni_qos_tbl_cfg qos_cfg = {0};
+	struct dpni_rule_cfg key_params;
+	void *dma_mem, *key, *mask;
+	u8 key_size = 2;	/* VLAN TCI field */
+	int i, pcp, err;
+
+	/* VLAN-based classification only makes sense if we have multiple
+	 * traffic classes.
+	 * Also, we need to extract just the 3-bit PCP field from the VLAN
+	 * header and we can only do that by using a mask
+	 */
+	if (dpaa2_eth_tc_count(priv) == 1 || !dpaa2_eth_fs_mask_enabled(priv)) {
+		dev_dbg(dev, "VLAN-based QoS classification not supported\n");
+		return -EOPNOTSUPP;
+	}
+
+	dma_mem = kzalloc(DPAA2_CLASSIFIER_DMA_SIZE, GFP_KERNEL);
+	if (!dma_mem)
+		return -ENOMEM;
+
+	kg_cfg.num_extracts = 1;
+	kg_cfg.extracts[0].type = DPKG_EXTRACT_FROM_HDR;
+	kg_cfg.extracts[0].extract.from_hdr.prot = NET_PROT_VLAN;
+	kg_cfg.extracts[0].extract.from_hdr.type = DPKG_FULL_FIELD;
+	kg_cfg.extracts[0].extract.from_hdr.field = NH_FLD_VLAN_TCI;
+
+	err = dpni_prepare_key_cfg(&kg_cfg, dma_mem);
+	if (err) {
+		dev_err(dev, "dpni_prepare_key_cfg failed\n");
+		goto out_free_tbl;
+	}
+
+	/* set QoS table */
+	qos_cfg.default_tc = 0;
+	qos_cfg.discard_on_miss = 0;
+	qos_cfg.key_cfg_iova = dma_map_single(dev, dma_mem,
+					      DPAA2_CLASSIFIER_DMA_SIZE,
+					      DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, qos_cfg.key_cfg_iova)) {
+		dev_err(dev, "QoS table DMA mapping failed\n");
+		err = -ENOMEM;
+		goto out_free_tbl;
+	}
+
+	err = dpni_set_qos_table(priv->mc_io, 0, priv->mc_token, &qos_cfg);
+	if (err) {
+		dev_err(dev, "dpni_set_qos_table failed\n");
+		goto out_unmap_tbl;
+	}
+
+	/* Add QoS table entries */
+	key = kzalloc(key_size * 2, GFP_KERNEL);
+	if (!key) {
+		err = -ENOMEM;
+		goto out_unmap_tbl;
+	}
+	mask = key + key_size;
+	*(__be16 *)mask = cpu_to_be16(VLAN_PRIO_MASK);
+
+	key_params.key_iova = dma_map_single(dev, key, key_size * 2,
+					     DMA_TO_DEVICE);
+	if (dma_mapping_error(dev, key_params.key_iova)) {
+		dev_err(dev, "Qos table entry DMA mapping failed\n");
+		err = -ENOMEM;
+		goto out_free_key;
+	}
+
+	key_params.mask_iova = key_params.key_iova + key_size;
+	key_params.key_size = key_size;
+
+	/* We add rules for PCP-based distribution starting with highest
+	 * priority (VLAN PCP = 7). If this DPNI doesn't have enough traffic
+	 * classes to accommodate all priority levels, the lowest ones end up
+	 * on TC 0 which was configured as default
+	 */
+	for (i = dpaa2_eth_tc_count(priv) - 1, pcp = 7; i >= 0; i--, pcp--) {
+		*(__be16 *)key = cpu_to_be16(pcp << VLAN_PRIO_SHIFT);
+		dma_sync_single_for_device(dev, key_params.key_iova,
+					   key_size * 2, DMA_TO_DEVICE);
+
+		err = dpni_add_qos_entry(priv->mc_io, 0, priv->mc_token,
+					 &key_params, i, i);
+		if (err) {
+			dev_err(dev, "dpni_add_qos_entry failed\n");
+			dpni_clear_qos_table(priv->mc_io, 0, priv->mc_token);
+			goto out_unmap_key;
+		}
+	}
+
+	priv->vlan_cls_enabled = true;
+
+	/* Table and key memory is not persistent, clean everything up after
+	 * configuration is finished
+	 */
+out_unmap_key:
+	dma_unmap_single(dev, key_params.key_iova, key_size * 2, DMA_TO_DEVICE);
+out_free_key:
+	kfree(key);
+out_unmap_tbl:
+	dma_unmap_single(dev, qos_cfg.key_cfg_iova, DPAA2_CLASSIFIER_DMA_SIZE,
+			 DMA_TO_DEVICE);
+out_free_tbl:
+	kfree(dma_mem);
+
+	return err;
+}
+
 /* Configure the DPNI object this interface is associated with */
 static int setup_dpni(struct fsl_mc_device *ls_dev)
 {
@@ -2758,6 +2870,10 @@ static int setup_dpni(struct fsl_mc_device *ls_dev)
 			goto close;
 	}
 
+	err = set_vlan_qos(priv);
+	if (err && err != -EOPNOTSUPP)
+		goto close;
+
 	priv->cls_rules = devm_kzalloc(dev, sizeof(struct dpaa2_eth_cls_rule) *
 				       dpaa2_eth_fs_count(priv), GFP_KERNEL);
 	if (!priv->cls_rules) {
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
index 580ad5fd7bd8..7856f69bcf36 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
@@ -427,6 +427,7 @@ struct dpaa2_eth_priv {
 	u64 rx_cls_fields;
 	struct dpaa2_eth_cls_rule *cls_rules;
 	u8 rx_cls_enabled;
+	u8 vlan_cls_enabled;
 	struct bpf_prog *xdp_prog;
 #ifdef CONFIG_DEBUG_FS
 	struct dpaa2_debugfs dbg;
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h b/drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h
index d9b6918807af..0048e856f85e 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h
@@ -59,6 +59,10 @@
 
 #define DPNI_CMDID_SET_RX_TC_DIST			DPNI_CMD(0x235)
 
+#define DPNI_CMDID_SET_QOS_TBL				DPNI_CMD(0x240)
+#define DPNI_CMDID_ADD_QOS_ENT				DPNI_CMD(0x241)
+#define DPNI_CMDID_REMOVE_QOS_ENT			DPNI_CMD(0x242)
+#define DPNI_CMDID_CLR_QOS_TBL				DPNI_CMD(0x243)
 #define DPNI_CMDID_ADD_FS_ENT				DPNI_CMD(0x244)
 #define DPNI_CMDID_REMOVE_FS_ENT			DPNI_CMD(0x245)
 #define DPNI_CMDID_CLR_FS_ENT				DPNI_CMD(0x246)
@@ -567,4 +571,34 @@ struct dpni_cmd_remove_fs_entry {
 	__le64 mask_iova;
 };
 
+#define DPNI_DISCARD_ON_MISS_SHIFT	0
+#define DPNI_DISCARD_ON_MISS_SIZE	1
+
+struct dpni_cmd_set_qos_table {
+	__le32 pad;
+	u8 default_tc;
+	/* only the LSB */
+	u8 discard_on_miss;
+	__le16 pad1[21];
+	__le64 key_cfg_iova;
+};
+
+struct dpni_cmd_add_qos_entry {
+	__le16 pad;
+	u8 tc_id;
+	u8 key_size;
+	__le16 index;
+	__le16 pad1;
+	__le64 key_iova;
+	__le64 mask_iova;
+};
+
+struct dpni_cmd_remove_qos_entry {
+	u8 pad[3];
+	u8 key_size;
+	__le32 pad1;
+	__le64 key_iova;
+	__le64 mask_iova;
+};
+
 #endif /* _FSL_DPNI_CMD_H */
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpni.c b/drivers/net/ethernet/freescale/dpaa2/dpni.c
index dd54e6953aeb..78fa325407ca 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpni.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpni.c
@@ -1786,3 +1786,134 @@ int dpni_remove_fs_entry(struct fsl_mc_io *mc_io,
 	/* send command to mc*/
 	return mc_send_command(mc_io, &cmd);
 }
+
+/**
+ * dpni_set_qos_table() - Set QoS mapping table
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @cfg:	QoS table configuration
+ *
+ * This function and all QoS-related functions require that
+ *'max_tcs > 1' was set at DPNI creation.
+ *
+ * warning: Before calling this function, call dpkg_prepare_key_cfg() to
+ *			prepare the key_cfg_iova parameter
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_set_qos_table(struct fsl_mc_io *mc_io,
+		       u32 cmd_flags,
+		       u16 token,
+		       const struct dpni_qos_tbl_cfg *cfg)
+{
+	struct dpni_cmd_set_qos_table *cmd_params;
+	struct fsl_mc_command cmd = { 0 };
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_SET_QOS_TBL,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpni_cmd_set_qos_table *)cmd.params;
+	cmd_params->default_tc = cfg->default_tc;
+	cmd_params->key_cfg_iova = cpu_to_le64(cfg->key_cfg_iova);
+	dpni_set_field(cmd_params->discard_on_miss, DISCARD_ON_MISS,
+		       cfg->discard_on_miss);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpni_add_qos_entry() - Add QoS mapping entry (to select a traffic class)
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @cfg:	QoS rule to add
+ * @tc_id:	Traffic class selection (0-7)
+ * @index:	Location in the QoS table where to insert the entry.
+ *		Only relevant if MASKING is enabled for QoS classification on
+ *		this DPNI, it is ignored for exact match.
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_add_qos_entry(struct fsl_mc_io *mc_io,
+		       u32 cmd_flags,
+		       u16 token,
+		       const struct dpni_rule_cfg *cfg,
+		       u8 tc_id,
+		       u16 index)
+{
+	struct dpni_cmd_add_qos_entry *cmd_params;
+	struct fsl_mc_command cmd = { 0 };
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_ADD_QOS_ENT,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpni_cmd_add_qos_entry *)cmd.params;
+	cmd_params->tc_id = tc_id;
+	cmd_params->key_size = cfg->key_size;
+	cmd_params->index = cpu_to_le16(index);
+	cmd_params->key_iova = cpu_to_le64(cfg->key_iova);
+	cmd_params->mask_iova = cpu_to_le64(cfg->mask_iova);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpni_remove_qos_entry() - Remove QoS mapping entry
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @cfg:	QoS rule to remove
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_remove_qos_entry(struct fsl_mc_io *mc_io,
+			  u32 cmd_flags,
+			  u16 token,
+			  const struct dpni_rule_cfg *cfg)
+{
+	struct dpni_cmd_remove_qos_entry *cmd_params;
+	struct fsl_mc_command cmd = { 0 };
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_REMOVE_QOS_ENT,
+					  cmd_flags,
+					  token);
+	cmd_params = (struct dpni_cmd_remove_qos_entry *)cmd.params;
+	cmd_params->key_size = cfg->key_size;
+	cmd_params->key_iova = cpu_to_le64(cfg->key_iova);
+	cmd_params->mask_iova = cpu_to_le64(cfg->mask_iova);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
+/**
+ * dpni_clear_qos_table() - Clear all QoS mapping entries
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ *
+ * Following this function call, all frames are directed to
+ * the default traffic class (0)
+ *
+ * Return:	'0' on Success; Error code otherwise.
+ */
+int dpni_clear_qos_table(struct fsl_mc_io *mc_io,
+			 u32 cmd_flags,
+			 u16 token)
+{
+	struct fsl_mc_command cmd = { 0 };
+
+	/* prepare command */
+	cmd.header = mc_encode_cmd_header(DPNI_CMDID_CLR_QOS_TBL,
+					  cmd_flags,
+					  token);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpni.h b/drivers/net/ethernet/freescale/dpaa2/dpni.h
index ee0711d06b3a..8c7ac20bf1a7 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpni.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpni.h
@@ -715,6 +715,26 @@ int dpni_set_rx_hash_dist(struct fsl_mc_io *mc_io,
 			  u16 token,
 			  const struct dpni_rx_dist_cfg *cfg);
 
+/**
+ * struct dpni_qos_tbl_cfg - Structure representing QOS table configuration
+ * @key_cfg_iova: I/O virtual address of 256 bytes DMA-able memory filled with
+ *		key extractions to be used as the QoS criteria by calling
+ *		dpkg_prepare_key_cfg()
+ * @discard_on_miss: Set to '1' to discard frames in case of no match (miss);
+ *		'0' to use the 'default_tc' in such cases
+ * @default_tc: Used in case of no-match and 'discard_on_miss'= 0
+ */
+struct dpni_qos_tbl_cfg {
+	u64 key_cfg_iova;
+	int discard_on_miss;
+	u8 default_tc;
+};
+
+int dpni_set_qos_table(struct fsl_mc_io *mc_io,
+		       u32 cmd_flags,
+		       u16 token,
+		       const struct dpni_qos_tbl_cfg *cfg);
+
 /**
  * enum dpni_dest - DPNI destination types
  * @DPNI_DEST_NONE: Unassigned destination; The queue is set in parked mode and
@@ -961,6 +981,22 @@ int dpni_remove_fs_entry(struct fsl_mc_io *mc_io,
 			 u8 tc_id,
 			 const struct dpni_rule_cfg *cfg);
 
+int dpni_add_qos_entry(struct fsl_mc_io *mc_io,
+		       u32 cmd_flags,
+		       u16 token,
+		       const struct dpni_rule_cfg *cfg,
+		       u8 tc_id,
+		       u16 index);
+
+int dpni_remove_qos_entry(struct fsl_mc_io *mc_io,
+			  u32 cmd_flags,
+			  u16 token,
+			  const struct dpni_rule_cfg *cfg);
+
+int dpni_clear_qos_table(struct fsl_mc_io *mc_io,
+			 u32 cmd_flags,
+			 u16 token);
+
 int dpni_get_api_version(struct fsl_mc_io *mc_io,
 			 u32 cmd_flags,
 			 u16 *major_ver,
-- 
cgit v1.2.3-59-g8ed1b


From ad054f265401d8279837a916e9b5a5aee2a1749d Mon Sep 17 00:00:00 2001
From: Ioana Radulescu <ruxandra.radulescu@nxp.com>
Date: Sun, 31 May 2020 00:08:10 +0300
Subject: dpaa2-eth: Add helper functions

Add convenient helper functions that determines whether Rx/Tx pause
frames are enabled based on link state flags received from firmware.

Signed-off-by: Ioana Radulescu <ruxandra.radulescu@nxp.com>
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c     |  3 +--
 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h     | 11 +++++++++++
 drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c |  5 ++---
 3 files changed, 14 insertions(+), 5 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index 3bf5df92ecfa..c16c8ea3a174 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -1333,8 +1333,7 @@ static int link_state_update(struct dpaa2_eth_priv *priv)
 	 * Rx FQ taildrop configuration as well. We configure taildrop
 	 * only when pause frame generation is disabled.
 	 */
-	tx_pause = !!(state.options & DPNI_LINK_OPT_PAUSE) ^
-		   !!(state.options & DPNI_LINK_OPT_ASYM_PAUSE);
+	tx_pause = dpaa2_eth_tx_pause_enabled(state.options);
 	dpaa2_eth_set_rx_taildrop(priv, !tx_pause);
 
 	/* When we manage the MAC/PHY using phylink there is no need
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
index 7856f69bcf36..6384f6a23349 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
@@ -510,6 +510,17 @@ enum dpaa2_eth_rx_dist {
 	(dpaa2_eth_cmp_dpni_ver((priv), DPNI_PAUSE_VER_MAJOR,	\
 				DPNI_PAUSE_VER_MINOR) >= 0)
 
+static inline bool dpaa2_eth_tx_pause_enabled(u64 link_options)
+{
+	return !!(link_options & DPNI_LINK_OPT_PAUSE) ^
+	       !!(link_options & DPNI_LINK_OPT_ASYM_PAUSE);
+}
+
+static inline bool dpaa2_eth_rx_pause_enabled(u64 link_options)
+{
+	return !!(link_options & DPNI_LINK_OPT_PAUSE);
+}
+
 static inline
 unsigned int dpaa2_eth_needed_headroom(struct dpaa2_eth_priv *priv,
 				       struct sk_buff *skb)
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
index 8bf169783bea..e88269fe3de7 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-ethtool.c
@@ -130,9 +130,8 @@ static void dpaa2_eth_get_pauseparam(struct net_device *net_dev,
 		return;
 	}
 
-	pause->rx_pause = !!(link_options & DPNI_LINK_OPT_PAUSE);
-	pause->tx_pause = pause->rx_pause ^
-			  !!(link_options & DPNI_LINK_OPT_ASYM_PAUSE);
+	pause->rx_pause = dpaa2_eth_rx_pause_enabled(link_options);
+	pause->tx_pause = dpaa2_eth_tx_pause_enabled(link_options);
 	pause->autoneg = AUTONEG_DISABLE;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From 2c8d1c8d7d62dfedab97927c22e9421f0d72de8e Mon Sep 17 00:00:00 2001
From: Ioana Radulescu <ruxandra.radulescu@nxp.com>
Date: Sun, 31 May 2020 00:08:11 +0300
Subject: dpaa2-eth: Add congestion group taildrop

The increase in number of ingress frame queues means we now risk
depleting the buffer pool before the FQ taildrop kicks in.

Congestion group taildrop allows us to control the number of frames that
can accumulate on a group of Rx frame queues belonging to the same
traffic class.  This setting coexists with the frame queue based
taildrop: whichever limit gets hit first triggers the frame drop.

Signed-off-by: Ioana Radulescu <ruxandra.radulescu@nxp.com>
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c | 35 ++++++++++++++++++------
 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h | 13 +++++++--
 2 files changed, 38 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index c16c8ea3a174..04eff6308c72 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -1287,17 +1287,20 @@ static void disable_ch_napi(struct dpaa2_eth_priv *priv)
 	}
 }
 
-static void dpaa2_eth_set_rx_taildrop(struct dpaa2_eth_priv *priv, bool enable)
+static void dpaa2_eth_set_rx_taildrop(struct dpaa2_eth_priv *priv,
+				      bool tx_pause)
 {
 	struct dpni_taildrop td = {0};
 	struct dpaa2_eth_fq *fq;
 	int i, err;
 
-	if (priv->rx_td_enabled == enable)
+	td.enable = !tx_pause;
+	if (priv->rx_td_enabled == td.enable)
 		return;
 
-	td.enable = enable;
-	td.threshold = DPAA2_ETH_TAILDROP_THRESH;
+	/* FQ taildrop: threshold is in bytes, per frame queue */
+	td.threshold = DPAA2_ETH_FQ_TAILDROP_THRESH;
+	td.units = DPNI_CONGESTION_UNIT_BYTES;
 
 	for (i = 0; i < priv->num_fqs; i++) {
 		fq = &priv->fq[i];
@@ -1308,12 +1311,28 @@ static void dpaa2_eth_set_rx_taildrop(struct dpaa2_eth_priv *priv, bool enable)
 					fq->tc, fq->flowid, &td);
 		if (err) {
 			netdev_err(priv->net_dev,
-				   "dpni_set_taildrop() failed\n");
-			break;
+				   "dpni_set_taildrop(FQ) failed\n");
+			return;
+		}
+	}
+
+	/* Congestion group taildrop: threshold is in frames, per group
+	 * of FQs belonging to the same traffic class
+	 */
+	td.threshold = DPAA2_ETH_CG_TAILDROP_THRESH(priv);
+	td.units = DPNI_CONGESTION_UNIT_FRAMES;
+	for (i = 0; i < dpaa2_eth_tc_count(priv); i++) {
+		err = dpni_set_taildrop(priv->mc_io, 0, priv->mc_token,
+					DPNI_CP_GROUP, DPNI_QUEUE_RX,
+					i, 0, &td);
+		if (err) {
+			netdev_err(priv->net_dev,
+				   "dpni_set_taildrop(CG) failed\n");
+			return;
 		}
 	}
 
-	priv->rx_td_enabled = enable;
+	priv->rx_td_enabled = td.enable;
 }
 
 static int link_state_update(struct dpaa2_eth_priv *priv)
@@ -1334,7 +1353,7 @@ static int link_state_update(struct dpaa2_eth_priv *priv)
 	 * only when pause frame generation is disabled.
 	 */
 	tx_pause = dpaa2_eth_tx_pause_enabled(state.options);
-	dpaa2_eth_set_rx_taildrop(priv, !tx_pause);
+	dpaa2_eth_set_rx_taildrop(priv, tx_pause);
 
 	/* When we manage the MAC/PHY using phylink there is no need
 	 * to manually update the netif_carrier.
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
index 6384f6a23349..184d5d83e497 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
@@ -40,7 +40,7 @@
  * frames in the Rx queues (length of the current frame is not
  * taken into account when making the taildrop decision)
  */
-#define DPAA2_ETH_TAILDROP_THRESH	(64 * 1024)
+#define DPAA2_ETH_FQ_TAILDROP_THRESH	(64 * 1024)
 
 /* Maximum number of Tx confirmation frames to be processed
  * in a single NAPI call
@@ -52,11 +52,20 @@
  * how many 64B frames fit inside the taildrop threshold and add a margin
  * to accommodate the buffer refill delay.
  */
-#define DPAA2_ETH_MAX_FRAMES_PER_QUEUE	(DPAA2_ETH_TAILDROP_THRESH / 64)
+#define DPAA2_ETH_MAX_FRAMES_PER_QUEUE	(DPAA2_ETH_FQ_TAILDROP_THRESH / 64)
 #define DPAA2_ETH_NUM_BUFS		(DPAA2_ETH_MAX_FRAMES_PER_QUEUE + 256)
 #define DPAA2_ETH_REFILL_THRESH \
 	(DPAA2_ETH_NUM_BUFS - DPAA2_ETH_BUFS_PER_CMD)
 
+/* Congestion group taildrop threshold: number of frames allowed to accumulate
+ * at any moment in a group of Rx queues belonging to the same traffic class.
+ * Choose value such that we don't risk depleting the buffer pool before the
+ * taildrop kicks in
+ */
+#define DPAA2_ETH_CG_TAILDROP_THRESH(priv)				\
+	(DPAA2_ETH_MAX_FRAMES_PER_QUEUE * dpaa2_eth_queue_count(priv) /	\
+	 dpaa2_eth_tc_count(priv))
+
 /* Maximum number of buffers that can be acquired/released through a single
  * QBMan command
  */
-- 
cgit v1.2.3-59-g8ed1b


From 3f8b826d705fc6f0f0602fcbe6ee3b646ed3316e Mon Sep 17 00:00:00 2001
From: Ioana Radulescu <ruxandra.radulescu@nxp.com>
Date: Sun, 31 May 2020 00:08:12 +0300
Subject: dpaa2-eth: Update FQ taildrop threshold and buffer pool count

Now that we have congestion group taildrop configured at all
times, we can afford to increase the frame queue taildrop
threshold; this will ensure a better response when receiving
bursts of large-sized frames.

Also decouple the buffer pool count from the Rx FQ taildrop
threshold, as above change would increase it too much. Instead,
keep the old count as a hardcoded value.

With the new limits, we try to ensure that:
* we allow enough leeway for large frame bursts (by buffering
enough of them in queues to avoid heavy dropping in case of
bursty traffic, but when overall ingress bandwidth is manageable)
* allow pending frames to be evenly spread between ingress FQs,
regardless of frame size
* avoid dropping frames due to the buffer pool being empty; this
is not a bad behaviour per se, but system overall response is
more linear and predictable when frames are dropped at frame
queue/group level.

Signed-off-by: Ioana Radulescu <ruxandra.radulescu@nxp.com>
Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
index 184d5d83e497..02c0eea69a23 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
@@ -36,24 +36,24 @@
 /* Convert L3 MTU to L2 MFL */
 #define DPAA2_ETH_L2_MAX_FRM(mtu)	((mtu) + VLAN_ETH_HLEN)
 
-/* Set the taildrop threshold (in bytes) to allow the enqueue of several jumbo
- * frames in the Rx queues (length of the current frame is not
- * taken into account when making the taildrop decision)
+/* Set the taildrop threshold (in bytes) to allow the enqueue of a large
+ * enough number of jumbo frames in the Rx queues (length of the current
+ * frame is not taken into account when making the taildrop decision)
  */
-#define DPAA2_ETH_FQ_TAILDROP_THRESH	(64 * 1024)
+#define DPAA2_ETH_FQ_TAILDROP_THRESH	(1024 * 1024)
 
 /* Maximum number of Tx confirmation frames to be processed
  * in a single NAPI call
  */
 #define DPAA2_ETH_TXCONF_PER_NAPI	256
 
-/* Buffer quota per queue. Must be large enough such that for minimum sized
- * frames taildrop kicks in before the bpool gets depleted, so we compute
- * how many 64B frames fit inside the taildrop threshold and add a margin
- * to accommodate the buffer refill delay.
+/* Buffer qouta per channel. We want to keep in check number of ingress frames
+ * in flight: for small sized frames, congestion group taildrop may kick in
+ * first; for large sizes, Rx FQ taildrop threshold will ensure only a
+ * reasonable number of frames will be pending at any given time.
+ * Ingress frame drop due to buffer pool depletion should be a corner case only
  */
-#define DPAA2_ETH_MAX_FRAMES_PER_QUEUE	(DPAA2_ETH_FQ_TAILDROP_THRESH / 64)
-#define DPAA2_ETH_NUM_BUFS		(DPAA2_ETH_MAX_FRAMES_PER_QUEUE + 256)
+#define DPAA2_ETH_NUM_BUFS		1280
 #define DPAA2_ETH_REFILL_THRESH \
 	(DPAA2_ETH_NUM_BUFS - DPAA2_ETH_BUFS_PER_CMD)
 
@@ -63,8 +63,7 @@
  * taildrop kicks in
  */
 #define DPAA2_ETH_CG_TAILDROP_THRESH(priv)				\
-	(DPAA2_ETH_MAX_FRAMES_PER_QUEUE * dpaa2_eth_queue_count(priv) /	\
-	 dpaa2_eth_tc_count(priv))
+	(1024 * dpaa2_eth_queue_count(priv) / dpaa2_eth_tc_count(priv))
 
 /* Maximum number of buffers that can be acquired/released through a single
  * QBMan command
-- 
cgit v1.2.3-59-g8ed1b


From f395b69f40f580491ef56f2395a98e3189baa53c Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Sun, 31 May 2020 00:08:13 +0300
Subject: dpaa2-eth: Add PFC support through DCB ops

Add support in dpaa2-eth for PFC (Priority Flow Control)
through the DCB ops.

Instruct the hardware to respond to received PFC frames.
Current firmware doesn't allow us to selectively enable PFC
on the Rx side for some priorities only, so we will react to
all incoming PFC frames (and stop transmitting on the traffic
classes specified in the frame).

Also, configure the hardware to generate PFC frames based on Rx
congestion notifications. When a certain number of frames accumulate in
the ingress queues corresponding to a traffic class, priority flow
control frames are generated for that TC.

The number of PFC traffic classes available can be queried through
lldptool. Also, which of those traffic classes have PFC enabled is also
controlled through the same dcbnl_rtnl_ops callbacks.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/freescale/dpaa2/Kconfig       |  10 ++
 drivers/net/ethernet/freescale/dpaa2/Makefile      |   1 +
 .../net/ethernet/freescale/dpaa2/dpaa2-eth-dcb.c   | 146 +++++++++++++++++++++
 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c   |   9 ++
 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h   |  18 +++
 drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h    |  25 ++++
 drivers/net/ethernet/freescale/dpaa2/dpni.c        |  46 +++++++
 drivers/net/ethernet/freescale/dpaa2/dpni.h        |  61 +++++++++
 8 files changed, 316 insertions(+)
 create mode 100644 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-dcb.c

diff --git a/drivers/net/ethernet/freescale/dpaa2/Kconfig b/drivers/net/ethernet/freescale/dpaa2/Kconfig
index c6fb8e4021ac..feea797cde02 100644
--- a/drivers/net/ethernet/freescale/dpaa2/Kconfig
+++ b/drivers/net/ethernet/freescale/dpaa2/Kconfig
@@ -9,6 +9,16 @@ config FSL_DPAA2_ETH
 	  The driver manages network objects discovered on the Freescale
 	  MC bus.
 
+if FSL_DPAA2_ETH
+config FSL_DPAA2_ETH_DCB
+	bool "Data Center Bridging (DCB) Support"
+	default n
+	depends on DCB
+	help
+	  Enable Priority-Based Flow Control (PFC) support for DPAA2 Ethernet
+	  devices.
+endif
+
 config FSL_DPAA2_PTP_CLOCK
 	tristate "Freescale DPAA2 PTP Clock"
 	depends on FSL_DPAA2_ETH && PTP_1588_CLOCK_QORIQ
diff --git a/drivers/net/ethernet/freescale/dpaa2/Makefile b/drivers/net/ethernet/freescale/dpaa2/Makefile
index 69184ca3b7b9..6e7f33c956bf 100644
--- a/drivers/net/ethernet/freescale/dpaa2/Makefile
+++ b/drivers/net/ethernet/freescale/dpaa2/Makefile
@@ -7,6 +7,7 @@ obj-$(CONFIG_FSL_DPAA2_ETH)		+= fsl-dpaa2-eth.o
 obj-$(CONFIG_FSL_DPAA2_PTP_CLOCK)	+= fsl-dpaa2-ptp.o
 
 fsl-dpaa2-eth-objs	:= dpaa2-eth.o dpaa2-ethtool.o dpni.o dpaa2-mac.o dpmac.o
+fsl-dpaa2-eth-${CONFIG_FSL_DPAA2_ETH_DCB} += dpaa2-eth-dcb.o
 fsl-dpaa2-eth-${CONFIG_DEBUG_FS} += dpaa2-eth-debugfs.o
 fsl-dpaa2-ptp-objs	:= dpaa2-ptp.o dprtc.o
 
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-dcb.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-dcb.c
new file mode 100644
index 000000000000..7ee07872af4d
--- /dev/null
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-dcb.c
@@ -0,0 +1,146 @@
+// SPDX-License-Identifier: (GPL-2.0+ OR BSD-3-Clause)
+/* Copyright 2020 NXP */
+
+#include "dpaa2-eth.h"
+
+static int dpaa2_eth_dcbnl_ieee_getpfc(struct net_device *net_dev,
+				       struct ieee_pfc *pfc)
+{
+	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
+
+	if (!(priv->link_state.options & DPNI_LINK_OPT_PFC_PAUSE))
+		return 0;
+
+	memcpy(pfc, &priv->pfc, sizeof(priv->pfc));
+	pfc->pfc_cap = dpaa2_eth_tc_count(priv);
+
+	return 0;
+}
+
+static inline bool is_prio_enabled(u8 pfc_en, u8 tc)
+{
+	return !!(pfc_en & (1 << tc));
+}
+
+static int set_pfc_cn(struct dpaa2_eth_priv *priv, u8 pfc_en)
+{
+	struct dpni_congestion_notification_cfg cfg = {0};
+	int i, err;
+
+	cfg.notification_mode = DPNI_CONG_OPT_FLOW_CONTROL;
+	cfg.units = DPNI_CONGESTION_UNIT_FRAMES;
+	cfg.message_iova = 0ULL;
+	cfg.message_ctx = 0ULL;
+
+	for (i = 0; i < dpaa2_eth_tc_count(priv); i++) {
+		if (is_prio_enabled(pfc_en, i)) {
+			cfg.threshold_entry = DPAA2_ETH_CN_THRESH_ENTRY(priv);
+			cfg.threshold_exit = DPAA2_ETH_CN_THRESH_EXIT(priv);
+		} else {
+			/* For priorities not set in the pfc_en mask, we leave
+			 * the congestion thresholds at zero, which effectively
+			 * disables generation of PFC frames for them
+			 */
+			cfg.threshold_entry = 0;
+			cfg.threshold_exit = 0;
+		}
+
+		err = dpni_set_congestion_notification(priv->mc_io, 0,
+						       priv->mc_token,
+						       DPNI_QUEUE_RX, i, &cfg);
+		if (err) {
+			netdev_err(priv->net_dev,
+				   "dpni_set_congestion_notification failed\n");
+			return err;
+		}
+	}
+
+	return 0;
+}
+
+static int dpaa2_eth_dcbnl_ieee_setpfc(struct net_device *net_dev,
+				       struct ieee_pfc *pfc)
+{
+	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
+	struct dpni_link_cfg link_cfg = {0};
+	int err;
+
+	if (pfc->mbc || pfc->delay)
+		return -EOPNOTSUPP;
+
+	/* If same PFC enabled mask, nothing to do */
+	if (priv->pfc.pfc_en == pfc->pfc_en)
+		return 0;
+
+	/* We allow PFC configuration even if it won't have any effect until
+	 * general pause frames are enabled
+	 */
+	if (!dpaa2_eth_rx_pause_enabled(priv->link_state.options) ||
+	    !dpaa2_eth_tx_pause_enabled(priv->link_state.options))
+		netdev_warn(net_dev, "Pause support must be enabled in order for PFC to work!\n");
+
+	link_cfg.rate = priv->link_state.rate;
+	link_cfg.options = priv->link_state.options;
+	if (pfc->pfc_en)
+		link_cfg.options |= DPNI_LINK_OPT_PFC_PAUSE;
+	else
+		link_cfg.options &= ~DPNI_LINK_OPT_PFC_PAUSE;
+	err = dpni_set_link_cfg(priv->mc_io, 0, priv->mc_token, &link_cfg);
+	if (err) {
+		netdev_err(net_dev, "dpni_set_link_cfg failed\n");
+		return err;
+	}
+
+	/* Configure congestion notifications for the enabled priorities */
+	err = set_pfc_cn(priv, pfc->pfc_en);
+	if (err)
+		return err;
+
+	memcpy(&priv->pfc, pfc, sizeof(priv->pfc));
+
+	return 0;
+}
+
+static u8 dpaa2_eth_dcbnl_getdcbx(struct net_device *net_dev)
+{
+	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
+
+	return priv->dcbx_mode;
+}
+
+static u8 dpaa2_eth_dcbnl_setdcbx(struct net_device *net_dev, u8 mode)
+{
+	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
+
+	return (mode != (priv->dcbx_mode)) ? 1 : 0;
+}
+
+static u8 dpaa2_eth_dcbnl_getcap(struct net_device *net_dev, int capid, u8 *cap)
+{
+	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
+
+	switch (capid) {
+	case DCB_CAP_ATTR_PFC:
+		*cap = true;
+		break;
+	case DCB_CAP_ATTR_PFC_TCS:
+		*cap = 1 << (dpaa2_eth_tc_count(priv) - 1);
+		break;
+	case DCB_CAP_ATTR_DCBX:
+		*cap = priv->dcbx_mode;
+		break;
+	default:
+		*cap = false;
+		break;
+	}
+
+	return 0;
+}
+
+const struct dcbnl_rtnl_ops dpaa2_eth_dcbnl_ops = {
+	.ieee_getpfc	= dpaa2_eth_dcbnl_ieee_getpfc,
+	.ieee_setpfc	= dpaa2_eth_dcbnl_ieee_setpfc,
+	.getdcbx	= dpaa2_eth_dcbnl_getdcbx,
+	.setdcbx	= dpaa2_eth_dcbnl_setdcbx,
+	.getcap		= dpaa2_eth_dcbnl_getcap,
+};
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index 04eff6308c72..cde9d0e2dd6d 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -3844,6 +3844,15 @@ static int dpaa2_eth_probe(struct fsl_mc_device *dpni_dev)
 	if (err)
 		goto err_alloc_rings;
 
+#ifdef CONFIG_FSL_DPAA2_ETH_DCB
+	if (dpaa2_eth_has_pause_support(priv) && priv->vlan_cls_enabled) {
+		priv->dcbx_mode = DCB_CAP_DCBX_HOST | DCB_CAP_DCBX_VER_IEEE;
+		net_dev->dcbnl_ops = &dpaa2_eth_dcbnl_ops;
+	} else {
+		dev_dbg(dev, "PFC not supported\n");
+	}
+#endif
+
 	err = setup_irqs(dpni_dev);
 	if (err) {
 		netdev_warn(net_dev, "Failed to set link interrupt, fall back to polling\n");
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
index 02c0eea69a23..31b7b9b52da0 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
@@ -6,6 +6,7 @@
 #ifndef __DPAA2_ETH_H
 #define __DPAA2_ETH_H
 
+#include <linux/dcbnl.h>
 #include <linux/netdevice.h>
 #include <linux/if_vlan.h>
 #include <linux/fsl/mc.h>
@@ -65,6 +66,17 @@
 #define DPAA2_ETH_CG_TAILDROP_THRESH(priv)				\
 	(1024 * dpaa2_eth_queue_count(priv) / dpaa2_eth_tc_count(priv))
 
+/* Congestion group notification threshold: when this many frames accumulate
+ * on the Rx queues belonging to the same TC, the MAC is instructed to send
+ * PFC frames for that TC.
+ * When number of pending frames drops below exit threshold transmission of
+ * PFC frames is stopped.
+ */
+#define DPAA2_ETH_CN_THRESH_ENTRY(priv) \
+	(DPAA2_ETH_CG_TAILDROP_THRESH(priv) / 2)
+#define DPAA2_ETH_CN_THRESH_EXIT(priv) \
+	(DPAA2_ETH_CN_THRESH_ENTRY(priv) * 3 / 4)
+
 /* Maximum number of buffers that can be acquired/released through a single
  * QBMan command
  */
@@ -436,6 +448,10 @@ struct dpaa2_eth_priv {
 	struct dpaa2_eth_cls_rule *cls_rules;
 	u8 rx_cls_enabled;
 	u8 vlan_cls_enabled;
+#ifdef CONFIG_FSL_DPAA2_ETH_DCB
+	u8 dcbx_mode;
+	struct ieee_pfc pfc;
+#endif
 	struct bpf_prog *xdp_prog;
 #ifdef CONFIG_DEBUG_FS
 	struct dpaa2_debugfs dbg;
@@ -568,4 +584,6 @@ int dpaa2_eth_cls_key_size(u64 key);
 int dpaa2_eth_cls_fld_off(int prot, int field);
 void dpaa2_eth_cls_trim_rule(void *key_mem, u64 fields);
 
+extern const struct dcbnl_rtnl_ops dpaa2_eth_dcbnl_ops;
+
 #endif	/* __DPAA2_H */
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h b/drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h
index 0048e856f85e..fd069f67be9b 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpni-cmd.h
@@ -601,4 +601,29 @@ struct dpni_cmd_remove_qos_entry {
 	__le64 mask_iova;
 };
 
+#define DPNI_DEST_TYPE_SHIFT		0
+#define DPNI_DEST_TYPE_SIZE		4
+#define DPNI_CONG_UNITS_SHIFT		4
+#define DPNI_CONG_UNITS_SIZE		2
+
+struct dpni_cmd_set_congestion_notification {
+	/* cmd word 0 */
+	u8 qtype;
+	u8 tc;
+	u8 pad[6];
+	/* cmd word 1 */
+	__le32 dest_id;
+	__le16 notification_mode;
+	u8 dest_priority;
+	/* from LSB: dest_type: 4 units:2 */
+	u8 type_units;
+	/* cmd word 2 */
+	__le64 message_iova;
+	/* cmd word 3 */
+	__le64 message_ctx;
+	/* cmd word 4 */
+	__le32 threshold_entry;
+	__le32 threshold_exit;
+};
+
 #endif /* _FSL_DPNI_CMD_H */
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpni.c b/drivers/net/ethernet/freescale/dpaa2/dpni.c
index 78fa325407ca..6b479ba66465 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpni.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpni.c
@@ -1354,6 +1354,52 @@ int dpni_set_rx_tc_dist(struct fsl_mc_io *mc_io,
 	return mc_send_command(mc_io, &cmd);
 }
 
+/**
+ * dpni_set_congestion_notification() - Set traffic class congestion
+ *					notification configuration
+ * @mc_io:	Pointer to MC portal's I/O object
+ * @cmd_flags:	Command flags; one or more of 'MC_CMD_FLAG_'
+ * @token:	Token of DPNI object
+ * @qtype:	Type of queue - Rx, Tx and Tx confirm types are supported
+ * @tc_id:	Traffic class selection (0-7)
+ * @cfg:	Congestion notification configuration
+ *
+ * Return:	'0' on Success; error code otherwise.
+ */
+int dpni_set_congestion_notification(
+			struct fsl_mc_io *mc_io,
+			u32 cmd_flags,
+			u16 token,
+			enum dpni_queue_type qtype,
+			u8 tc_id,
+			const struct dpni_congestion_notification_cfg *cfg)
+{
+	struct dpni_cmd_set_congestion_notification *cmd_params;
+	struct fsl_mc_command cmd = { 0 };
+
+	/* prepare command */
+	cmd.header =
+		mc_encode_cmd_header(DPNI_CMDID_SET_CONGESTION_NOTIFICATION,
+				     cmd_flags,
+				     token);
+	cmd_params = (struct dpni_cmd_set_congestion_notification *)cmd.params;
+	cmd_params->qtype = qtype;
+	cmd_params->tc = tc_id;
+	cmd_params->dest_id = cpu_to_le32(cfg->dest_cfg.dest_id);
+	cmd_params->notification_mode = cpu_to_le16(cfg->notification_mode);
+	cmd_params->dest_priority = cfg->dest_cfg.priority;
+	dpni_set_field(cmd_params->type_units, DEST_TYPE,
+		       cfg->dest_cfg.dest_type);
+	dpni_set_field(cmd_params->type_units, CONG_UNITS, cfg->units);
+	cmd_params->message_iova = cpu_to_le64(cfg->message_iova);
+	cmd_params->message_ctx = cpu_to_le64(cfg->message_ctx);
+	cmd_params->threshold_entry = cpu_to_le32(cfg->threshold_entry);
+	cmd_params->threshold_exit = cpu_to_le32(cfg->threshold_exit);
+
+	/* send command to mc*/
+	return mc_send_command(mc_io, &cmd);
+}
+
 /**
  * dpni_set_queue() - Set queue parameters
  * @mc_io:	Pointer to MC portal's I/O object
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpni.h b/drivers/net/ethernet/freescale/dpaa2/dpni.h
index 8c7ac20bf1a7..e874d8084142 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpni.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpni.h
@@ -513,6 +513,11 @@ int dpni_get_statistics(struct fsl_mc_io	*mc_io,
  */
 #define DPNI_LINK_OPT_ASYM_PAUSE	0x0000000000000008ULL
 
+/**
+ * Enable priority flow control pause frames
+ */
+#define DPNI_LINK_OPT_PFC_PAUSE		0x0000000000000010ULL
+
 /**
  * struct - Structure representing DPNI link configuration
  * @rate: Rate
@@ -877,6 +882,62 @@ enum dpni_congestion_point {
 	DPNI_CP_GROUP,
 };
 
+/**
+ * struct dpni_dest_cfg - Structure representing DPNI destination parameters
+ * @dest_type:	Destination type
+ * @dest_id:	Either DPIO ID or DPCON ID, depending on the destination type
+ * @priority:	Priority selection within the DPIO or DPCON channel; valid
+ *		values are 0-1 or 0-7, depending on the number of priorities
+ *		in that channel; not relevant for 'DPNI_DEST_NONE' option
+ */
+struct dpni_dest_cfg {
+	enum dpni_dest dest_type;
+	int dest_id;
+	u8 priority;
+};
+
+/* DPNI congestion options */
+
+/**
+ * This congestion will trigger flow control or priority flow control.
+ * This will have effect only if flow control is enabled with
+ * dpni_set_link_cfg().
+ */
+#define DPNI_CONG_OPT_FLOW_CONTROL		0x00000040
+
+/**
+ * struct dpni_congestion_notification_cfg - congestion notification
+ *					configuration
+ * @units: Units type
+ * @threshold_entry: Above this threshold we enter a congestion state.
+ *		set it to '0' to disable it
+ * @threshold_exit: Below this threshold we exit the congestion state.
+ * @message_ctx: The context that will be part of the CSCN message
+ * @message_iova: I/O virtual address (must be in DMA-able memory),
+ *		must be 16B aligned; valid only if 'DPNI_CONG_OPT_WRITE_MEM_<X>'
+ *		is contained in 'options'
+ * @dest_cfg: CSCN can be send to either DPIO or DPCON WQ channel
+ * @notification_mode: Mask of available options; use 'DPNI_CONG_OPT_<X>' values
+ */
+
+struct dpni_congestion_notification_cfg {
+	enum dpni_congestion_unit units;
+	u32 threshold_entry;
+	u32 threshold_exit;
+	u64 message_ctx;
+	u64 message_iova;
+	struct dpni_dest_cfg dest_cfg;
+	u16 notification_mode;
+};
+
+int dpni_set_congestion_notification(
+			struct fsl_mc_io *mc_io,
+			u32 cmd_flags,
+			u16 token,
+			enum dpni_queue_type qtype,
+			u8 tc_id,
+			const struct dpni_congestion_notification_cfg *cfg);
+
 /**
  * struct dpni_taildrop - Structure representing the taildrop
  * @enable:	Indicates whether the taildrop is active or not.
-- 
cgit v1.2.3-59-g8ed1b


From 07beb1651adcd324f4d91584d5cab75d5882a9c2 Mon Sep 17 00:00:00 2001
From: Ioana Ciornei <ioana.ciornei@nxp.com>
Date: Sun, 31 May 2020 00:08:14 +0300
Subject: dpaa2-eth: Keep congestion group taildrop enabled when PFC on

Leave congestion group taildrop enabled for all traffic classes
when PFC is enabled. Notification threshold is low enough such
that it will be hit first and this also ensures that FQs on
traffic classes which are not PFC enabled won't drain the buffer
pool.

FQ taildrop threshold is kept disabled as long as any form of
flow control is on. Since FQ taildrop works with bytes, not number
of frames, we can't guarantee it will not interfere with the
congestion notification mechanism for all frame sizes.

Signed-off-by: Ioana Ciornei <ioana.ciornei@nxp.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 .../net/ethernet/freescale/dpaa2/dpaa2-eth-dcb.c   |  8 ++++--
 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c   | 29 ++++++++++++++++------
 drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h   |  7 +++++-
 3 files changed, 34 insertions(+), 10 deletions(-)

diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-dcb.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-dcb.c
index 7ee07872af4d..83dee575c2fa 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-dcb.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth-dcb.c
@@ -63,6 +63,7 @@ static int dpaa2_eth_dcbnl_ieee_setpfc(struct net_device *net_dev,
 {
 	struct dpaa2_eth_priv *priv = netdev_priv(net_dev);
 	struct dpni_link_cfg link_cfg = {0};
+	bool tx_pause;
 	int err;
 
 	if (pfc->mbc || pfc->delay)
@@ -75,8 +76,8 @@ static int dpaa2_eth_dcbnl_ieee_setpfc(struct net_device *net_dev,
 	/* We allow PFC configuration even if it won't have any effect until
 	 * general pause frames are enabled
 	 */
-	if (!dpaa2_eth_rx_pause_enabled(priv->link_state.options) ||
-	    !dpaa2_eth_tx_pause_enabled(priv->link_state.options))
+	tx_pause = dpaa2_eth_tx_pause_enabled(priv->link_state.options);
+	if (!dpaa2_eth_rx_pause_enabled(priv->link_state.options) || !tx_pause)
 		netdev_warn(net_dev, "Pause support must be enabled in order for PFC to work!\n");
 
 	link_cfg.rate = priv->link_state.rate;
@@ -97,6 +98,9 @@ static int dpaa2_eth_dcbnl_ieee_setpfc(struct net_device *net_dev,
 		return err;
 
 	memcpy(&priv->pfc, pfc, sizeof(priv->pfc));
+	priv->pfc_enabled = !!pfc->pfc_en;
+
+	dpaa2_eth_set_rx_taildrop(priv, tx_pause, priv->pfc_enabled);
 
 	return 0;
 }
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
index cde9d0e2dd6d..8fb48de5d18c 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.c
@@ -1287,18 +1287,22 @@ static void disable_ch_napi(struct dpaa2_eth_priv *priv)
 	}
 }
 
-static void dpaa2_eth_set_rx_taildrop(struct dpaa2_eth_priv *priv,
-				      bool tx_pause)
+void dpaa2_eth_set_rx_taildrop(struct dpaa2_eth_priv *priv,
+			       bool tx_pause, bool pfc)
 {
 	struct dpni_taildrop td = {0};
 	struct dpaa2_eth_fq *fq;
 	int i, err;
 
+	/* FQ taildrop: threshold is in bytes, per frame queue. Enabled if
+	 * flow control is disabled (as it might interfere with either the
+	 * buffer pool depletion trigger for pause frames or with the group
+	 * congestion trigger for PFC frames)
+	 */
 	td.enable = !tx_pause;
-	if (priv->rx_td_enabled == td.enable)
-		return;
+	if (priv->rx_fqtd_enabled == td.enable)
+		goto set_cgtd;
 
-	/* FQ taildrop: threshold is in bytes, per frame queue */
 	td.threshold = DPAA2_ETH_FQ_TAILDROP_THRESH;
 	td.units = DPNI_CONGESTION_UNIT_BYTES;
 
@@ -1316,9 +1320,20 @@ static void dpaa2_eth_set_rx_taildrop(struct dpaa2_eth_priv *priv,
 		}
 	}
 
+	priv->rx_fqtd_enabled = td.enable;
+
+set_cgtd:
 	/* Congestion group taildrop: threshold is in frames, per group
 	 * of FQs belonging to the same traffic class
+	 * Enabled if general Tx pause disabled or if PFCs are enabled
+	 * (congestion group threhsold for PFC generation is lower than the
+	 * CG taildrop threshold, so it won't interfere with it; we also
+	 * want frames in non-PFC enabled traffic classes to be kept in check)
 	 */
+	td.enable = !tx_pause || (tx_pause && pfc);
+	if (priv->rx_cgtd_enabled == td.enable)
+		return;
+
 	td.threshold = DPAA2_ETH_CG_TAILDROP_THRESH(priv);
 	td.units = DPNI_CONGESTION_UNIT_FRAMES;
 	for (i = 0; i < dpaa2_eth_tc_count(priv); i++) {
@@ -1332,7 +1347,7 @@ static void dpaa2_eth_set_rx_taildrop(struct dpaa2_eth_priv *priv,
 		}
 	}
 
-	priv->rx_td_enabled = td.enable;
+	priv->rx_cgtd_enabled = td.enable;
 }
 
 static int link_state_update(struct dpaa2_eth_priv *priv)
@@ -1353,7 +1368,7 @@ static int link_state_update(struct dpaa2_eth_priv *priv)
 	 * only when pause frame generation is disabled.
 	 */
 	tx_pause = dpaa2_eth_tx_pause_enabled(state.options);
-	dpaa2_eth_set_rx_taildrop(priv, tx_pause);
+	dpaa2_eth_set_rx_taildrop(priv, tx_pause, priv->pfc_enabled);
 
 	/* When we manage the MAC/PHY using phylink there is no need
 	 * to manually update the netif_carrier.
diff --git a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
index 31b7b9b52da0..2d7ada0f0dbd 100644
--- a/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
+++ b/drivers/net/ethernet/freescale/dpaa2/dpaa2-eth.h
@@ -436,7 +436,8 @@ struct dpaa2_eth_priv {
 	struct dpaa2_eth_drv_stats __percpu *percpu_extras;
 
 	u16 mc_token;
-	u8 rx_td_enabled;
+	u8 rx_fqtd_enabled;
+	u8 rx_cgtd_enabled;
 
 	struct dpni_link_state link_state;
 	bool do_link_poll;
@@ -448,6 +449,7 @@ struct dpaa2_eth_priv {
 	struct dpaa2_eth_cls_rule *cls_rules;
 	u8 rx_cls_enabled;
 	u8 vlan_cls_enabled;
+	u8 pfc_enabled;
 #ifdef CONFIG_FSL_DPAA2_ETH_DCB
 	u8 dcbx_mode;
 	struct ieee_pfc pfc;
@@ -584,6 +586,9 @@ int dpaa2_eth_cls_key_size(u64 key);
 int dpaa2_eth_cls_fld_off(int prot, int field);
 void dpaa2_eth_cls_trim_rule(void *key_mem, u64 fields);
 
+void dpaa2_eth_set_rx_taildrop(struct dpaa2_eth_priv *priv,
+			       bool tx_pause, bool pfc);
+
 extern const struct dcbnl_rtnl_ops dpaa2_eth_dcbnl_ops;
 
 #endif	/* __DPAA2_H */
-- 
cgit v1.2.3-59-g8ed1b


From 547ce4cfb34cdecfa0ee19c29a5510329a7ac802 Mon Sep 17 00:00:00 2001
From: Al Viro <viro@zeniv.linux.org.uk>
Date: Sun, 31 May 2020 02:06:55 +0100
Subject: switch cmsghdr_from_user_compat_to_kern() to copy_from_user()

no point getting compat_cmsghdr field-by-field

Signed-off-by: Al Viro <viro@zeniv.linux.org.uk>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/compat.c | 15 ++++++++-------
 1 file changed, 8 insertions(+), 7 deletions(-)

diff --git a/net/compat.c b/net/compat.c
index afd7b444e0bf..5e3041a2c37d 100644
--- a/net/compat.c
+++ b/net/compat.c
@@ -183,20 +183,21 @@ int cmsghdr_from_user_compat_to_kern(struct msghdr *kmsg, struct sock *sk,
 	memset(kcmsg, 0, kcmlen);
 	ucmsg = CMSG_COMPAT_FIRSTHDR(kmsg);
 	while (ucmsg != NULL) {
-		if (__get_user(ucmlen, &ucmsg->cmsg_len))
+		struct compat_cmsghdr cmsg;
+		if (copy_from_user(&cmsg, ucmsg, sizeof(cmsg)))
 			goto Efault;
-		if (!CMSG_COMPAT_OK(ucmlen, ucmsg, kmsg))
+		if (!CMSG_COMPAT_OK(cmsg.cmsg_len, ucmsg, kmsg))
 			goto Einval;
-		tmp = ((ucmlen - sizeof(*ucmsg)) + sizeof(struct cmsghdr));
+		tmp = ((cmsg.cmsg_len - sizeof(*ucmsg)) + sizeof(struct cmsghdr));
 		if ((char *)kcmsg_base + kcmlen - (char *)kcmsg < CMSG_ALIGN(tmp))
 			goto Einval;
 		kcmsg->cmsg_len = tmp;
+		kcmsg->cmsg_level = cmsg.cmsg_level;
+		kcmsg->cmsg_type = cmsg.cmsg_type;
 		tmp = CMSG_ALIGN(tmp);
-		if (__get_user(kcmsg->cmsg_level, &ucmsg->cmsg_level) ||
-		    __get_user(kcmsg->cmsg_type, &ucmsg->cmsg_type) ||
-		    copy_from_user(CMSG_DATA(kcmsg),
+		if (copy_from_user(CMSG_DATA(kcmsg),
 				   CMSG_COMPAT_DATA(ucmsg),
-				   (ucmlen - sizeof(*ucmsg))))
+				   (cmsg.cmsg_len - sizeof(*ucmsg))))
 			goto Efault;
 
 		/* Advance. */
-- 
cgit v1.2.3-59-g8ed1b


From 03eaeda7806dcafb221a66939fcec9748619d16a Mon Sep 17 00:00:00 2001
From: Roopa Prabhu <roopa@cumulusnetworks.com>
Date: Sat, 30 May 2020 22:17:20 -0700
Subject: vxlan: fix dereference of nexthop group in nexthop update path

fix dereference of nexthop group in fdb nexthop group
update validation path.

Fixes: 1274e1cc4226 ("vxlan: ecmp support for mac fdb entries")
Reported-by: Ido Schimmel <idosch@idosch.org>
Suggested-by: Ido Schimmel <idosch@idosch.org>
Signed-off-by: Roopa Prabhu <roopa@cumulusnetworks.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vxlan.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/net/vxlan.c b/drivers/net/vxlan.c
index d5906b41cdae..5bb448ae6c9c 100644
--- a/drivers/net/vxlan.c
+++ b/drivers/net/vxlan.c
@@ -881,13 +881,13 @@ static int vxlan_fdb_nh_update(struct vxlan_dev *vxlan, struct vxlan_fdb *fdb,
 			goto err_inval;
 		}
 
-		if (!nh->is_group || !nh->nh_grp->mpath) {
+		nhg = rtnl_dereference(nh->nh_grp);
+		if (!nh->is_group || !nhg->mpath) {
 			NL_SET_ERR_MSG(extack, "Nexthop is not a multipath group");
 			goto err_inval;
 		}
 
 		/* check nexthop group family */
-		nhg = rtnl_dereference(nh->nh_grp);
 		switch (vxlan->default_dst.remote_ip.sa.sa_family) {
 		case AF_INET:
 			if (!nhg->has_v4) {
-- 
cgit v1.2.3-59-g8ed1b


From eae9d3c0167df840e821317040efcf0ca6789cb9 Mon Sep 17 00:00:00 2001
From: Vladimir Oltean <vladimir.oltean@nxp.com>
Date: Sun, 31 May 2020 21:25:51 +0300
Subject: net: dsa: sja1105: suppress -Wmissing-prototypes in sja1105_vl.c

Newer C compilers are complaining about the fact that there are no
function prototypes in sja1105_vl.c for the non-static functions.
Give them what they want.

Signed-off-by: Vladimir Oltean <vladimir.oltean@nxp.com>
Reviewed-by: Florian Fainelli <f.fainelli@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/dsa/sja1105/sja1105_vl.c | 2 +-
 drivers/net/dsa/sja1105/sja1105_vl.h | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/drivers/net/dsa/sja1105/sja1105_vl.c b/drivers/net/dsa/sja1105/sja1105_vl.c
index f37611885376..bdfd6c4e190d 100644
--- a/drivers/net/dsa/sja1105/sja1105_vl.c
+++ b/drivers/net/dsa/sja1105/sja1105_vl.c
@@ -3,7 +3,7 @@
  */
 #include <net/tc_act/tc_gate.h>
 #include <linux/dsa/8021q.h>
-#include "sja1105.h"
+#include "sja1105_vl.h"
 
 #define SJA1105_SIZE_VL_STATUS			8
 
diff --git a/drivers/net/dsa/sja1105/sja1105_vl.h b/drivers/net/dsa/sja1105/sja1105_vl.h
index 323fa0535af7..173d78963fed 100644
--- a/drivers/net/dsa/sja1105/sja1105_vl.h
+++ b/drivers/net/dsa/sja1105/sja1105_vl.h
@@ -4,6 +4,8 @@
 #ifndef _SJA1105_VL_H
 #define _SJA1105_VL_H
 
+#include "sja1105.h"
+
 #if IS_ENABLED(CONFIG_NET_DSA_SJA1105_VL)
 
 int sja1105_vl_redirect(struct sja1105_private *priv, int port,
-- 
cgit v1.2.3-59-g8ed1b


From 90040351a832acf862c8f1855c29411303d23755 Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin@isovalent.com>
Date: Sat, 23 May 2020 02:07:51 +0100
Subject: tools, bpftool: Clean subcommand help messages

This is a clean-up for the formatting of the do_help functions for
bpftool's subcommands. The following fixes are included:

- Do not use argv[-2] for "iter" help message, as the help is shown by
  default if no "iter" action is selected, resulting in messages looking
  like "./bpftool bpftool pin...".

- Do not print unused HELP_SPEC_PROGRAM in help message for "bpftool
  link".

- Andrii used argument indexing to avoid having multiple occurrences of
  bin_name and argv[-2] in the fprintf() for the help message, for
  "bpftool gen" and "bpftool link". Let's reuse this for all other help
  functions. We can remove up to thirty arguments for the "bpftool map"
  help message.

- Harmonise all functions, e.g. use ending quotes-comma on a separate
  line.

Signed-off-by: Quentin Monnet <quentin@isovalent.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200523010751.23465-1-quentin@isovalent.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/btf.c        |  8 ++++----
 tools/bpf/bpftool/cgroup.c     | 14 ++++++--------
 tools/bpf/bpftool/feature.c    |  6 +++---
 tools/bpf/bpftool/gen.c        |  6 +++---
 tools/bpf/bpftool/iter.c       |  8 ++++----
 tools/bpf/bpftool/link.c       |  1 -
 tools/bpf/bpftool/map.c        | 41 ++++++++++++++++++-----------------------
 tools/bpf/bpftool/net.c        | 12 ++++++------
 tools/bpf/bpftool/perf.c       |  2 +-
 tools/bpf/bpftool/prog.c       | 27 ++++++++++++---------------
 tools/bpf/bpftool/struct_ops.c | 15 +++++++--------
 11 files changed, 64 insertions(+), 76 deletions(-)

diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index 41a1346934a1..c134666591a6 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -951,9 +951,9 @@ static int do_help(int argc, char **argv)
 	}
 
 	fprintf(stderr,
-		"Usage: %s btf { show | list } [id BTF_ID]\n"
-		"       %s btf dump BTF_SRC [format FORMAT]\n"
-		"       %s btf help\n"
+		"Usage: %1$s %2$s { show | list } [id BTF_ID]\n"
+		"       %1$s %2$s dump BTF_SRC [format FORMAT]\n"
+		"       %1$s %2$s help\n"
 		"\n"
 		"       BTF_SRC := { id BTF_ID | prog PROG | map MAP [{key | value | kv | all}] | file FILE }\n"
 		"       FORMAT  := { raw | c }\n"
@@ -961,7 +961,7 @@ static int do_help(int argc, char **argv)
 		"       " HELP_SPEC_PROGRAM "\n"
 		"       " HELP_SPEC_OPTIONS "\n"
 		"",
-		bin_name, bin_name, bin_name);
+		bin_name, "btf");
 
 	return 0;
 }
diff --git a/tools/bpf/bpftool/cgroup.c b/tools/bpf/bpftool/cgroup.c
index 27931db421d8..d901cc1b904a 100644
--- a/tools/bpf/bpftool/cgroup.c
+++ b/tools/bpf/bpftool/cgroup.c
@@ -491,20 +491,18 @@ static int do_help(int argc, char **argv)
 	}
 
 	fprintf(stderr,
-		"Usage: %s %s { show | list } CGROUP [**effective**]\n"
-		"       %s %s tree [CGROUP_ROOT] [**effective**]\n"
-		"       %s %s attach CGROUP ATTACH_TYPE PROG [ATTACH_FLAGS]\n"
-		"       %s %s detach CGROUP ATTACH_TYPE PROG\n"
-		"       %s %s help\n"
+		"Usage: %1$s %2$s { show | list } CGROUP [**effective**]\n"
+		"       %1$s %2$s tree [CGROUP_ROOT] [**effective**]\n"
+		"       %1$s %2$s attach CGROUP ATTACH_TYPE PROG [ATTACH_FLAGS]\n"
+		"       %1$s %2$s detach CGROUP ATTACH_TYPE PROG\n"
+		"       %1$s %2$s help\n"
 		"\n"
 		HELP_SPEC_ATTACH_TYPES "\n"
 		"       " HELP_SPEC_ATTACH_FLAGS "\n"
 		"       " HELP_SPEC_PROGRAM "\n"
 		"       " HELP_SPEC_OPTIONS "\n"
 		"",
-		bin_name, argv[-2],
-		bin_name, argv[-2], bin_name, argv[-2],
-		bin_name, argv[-2], bin_name, argv[-2]);
+		bin_name, argv[-2]);
 
 	return 0;
 }
diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
index 1b73e63274b5..f05e9e57b593 100644
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -937,12 +937,12 @@ static int do_help(int argc, char **argv)
 	}
 
 	fprintf(stderr,
-		"Usage: %s %s probe [COMPONENT] [full] [unprivileged] [macros [prefix PREFIX]]\n"
-		"       %s %s help\n"
+		"Usage: %1$s %2$s probe [COMPONENT] [full] [unprivileged] [macros [prefix PREFIX]]\n"
+		"       %1$s %2$s help\n"
 		"\n"
 		"       COMPONENT := { kernel | dev NAME }\n"
 		"",
-		bin_name, argv[-2], bin_name, argv[-2]);
+		bin_name, argv[-2]);
 
 	return 0;
 }
diff --git a/tools/bpf/bpftool/gen.c b/tools/bpf/bpftool/gen.c
index 0e5f0236cc76..a3c4bb86c05a 100644
--- a/tools/bpf/bpftool/gen.c
+++ b/tools/bpf/bpftool/gen.c
@@ -586,12 +586,12 @@ static int do_help(int argc, char **argv)
 	}
 
 	fprintf(stderr,
-		"Usage: %1$s gen skeleton FILE\n"
-		"       %1$s gen help\n"
+		"Usage: %1$s %2$s skeleton FILE\n"
+		"       %1$s %2$s help\n"
 		"\n"
 		"       " HELP_SPEC_OPTIONS "\n"
 		"",
-		bin_name);
+		bin_name, "gen");
 
 	return 0;
 }
diff --git a/tools/bpf/bpftool/iter.c b/tools/bpf/bpftool/iter.c
index eb5987a0c3b6..33240fcc6319 100644
--- a/tools/bpf/bpftool/iter.c
+++ b/tools/bpf/bpftool/iter.c
@@ -68,10 +68,10 @@ close_obj:
 static int do_help(int argc, char **argv)
 {
 	fprintf(stderr,
-		"Usage: %s %s pin OBJ PATH\n"
-		"       %s %s help\n"
-		"\n",
-		bin_name, argv[-2], bin_name, argv[-2]);
+		"Usage: %1$s %2$s pin OBJ PATH\n"
+		"       %1$s %2$s help\n"
+		"",
+		bin_name, "iter");
 
 	return 0;
 }
diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index b6a0b35c78ae..670a561dc31b 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -312,7 +312,6 @@ static int do_help(int argc, char **argv)
 		"       %1$s %2$s help\n"
 		"\n"
 		"       " HELP_SPEC_LINK "\n"
-		"       " HELP_SPEC_PROGRAM "\n"
 		"       " HELP_SPEC_OPTIONS "\n"
 		"",
 		bin_name, argv[-2]);
diff --git a/tools/bpf/bpftool/map.c b/tools/bpf/bpftool/map.c
index 85cbe9a19170..c5fac8068ba1 100644
--- a/tools/bpf/bpftool/map.c
+++ b/tools/bpf/bpftool/map.c
@@ -1561,24 +1561,24 @@ static int do_help(int argc, char **argv)
 	}
 
 	fprintf(stderr,
-		"Usage: %s %s { show | list }   [MAP]\n"
-		"       %s %s create     FILE type TYPE key KEY_SIZE value VALUE_SIZE \\\n"
-		"                              entries MAX_ENTRIES name NAME [flags FLAGS] \\\n"
-		"                              [dev NAME]\n"
-		"       %s %s dump       MAP\n"
-		"       %s %s update     MAP [key DATA] [value VALUE] [UPDATE_FLAGS]\n"
-		"       %s %s lookup     MAP [key DATA]\n"
-		"       %s %s getnext    MAP [key DATA]\n"
-		"       %s %s delete     MAP  key DATA\n"
-		"       %s %s pin        MAP  FILE\n"
-		"       %s %s event_pipe MAP [cpu N index M]\n"
-		"       %s %s peek       MAP\n"
-		"       %s %s push       MAP value VALUE\n"
-		"       %s %s pop        MAP\n"
-		"       %s %s enqueue    MAP value VALUE\n"
-		"       %s %s dequeue    MAP\n"
-		"       %s %s freeze     MAP\n"
-		"       %s %s help\n"
+		"Usage: %1$s %2$s { show | list }   [MAP]\n"
+		"       %1$s %2$s create     FILE type TYPE key KEY_SIZE value VALUE_SIZE \\\n"
+		"                                  entries MAX_ENTRIES name NAME [flags FLAGS] \\\n"
+		"                                  [dev NAME]\n"
+		"       %1$s %2$s dump       MAP\n"
+		"       %1$s %2$s update     MAP [key DATA] [value VALUE] [UPDATE_FLAGS]\n"
+		"       %1$s %2$s lookup     MAP [key DATA]\n"
+		"       %1$s %2$s getnext    MAP [key DATA]\n"
+		"       %1$s %2$s delete     MAP  key DATA\n"
+		"       %1$s %2$s pin        MAP  FILE\n"
+		"       %1$s %2$s event_pipe MAP [cpu N index M]\n"
+		"       %1$s %2$s peek       MAP\n"
+		"       %1$s %2$s push       MAP value VALUE\n"
+		"       %1$s %2$s pop        MAP\n"
+		"       %1$s %2$s enqueue    MAP value VALUE\n"
+		"       %1$s %2$s dequeue    MAP\n"
+		"       %1$s %2$s freeze     MAP\n"
+		"       %1$s %2$s help\n"
 		"\n"
 		"       " HELP_SPEC_MAP "\n"
 		"       DATA := { [hex] BYTES }\n"
@@ -1593,11 +1593,6 @@ static int do_help(int argc, char **argv)
 		"                 queue | stack | sk_storage | struct_ops }\n"
 		"       " HELP_SPEC_OPTIONS "\n"
 		"",
-		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
-		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
-		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
-		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
-		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
 		bin_name, argv[-2]);
 
 	return 0;
diff --git a/tools/bpf/bpftool/net.c b/tools/bpf/bpftool/net.c
index c5e3895b7c8b..56c3a2bae3ef 100644
--- a/tools/bpf/bpftool/net.c
+++ b/tools/bpf/bpftool/net.c
@@ -458,10 +458,10 @@ static int do_help(int argc, char **argv)
 	}
 
 	fprintf(stderr,
-		"Usage: %s %s { show | list } [dev <devname>]\n"
-		"       %s %s attach ATTACH_TYPE PROG dev <devname> [ overwrite ]\n"
-		"       %s %s detach ATTACH_TYPE dev <devname>\n"
-		"       %s %s help\n"
+		"Usage: %1$s %2$s { show | list } [dev <devname>]\n"
+		"       %1$s %2$s attach ATTACH_TYPE PROG dev <devname> [ overwrite ]\n"
+		"       %1$s %2$s detach ATTACH_TYPE dev <devname>\n"
+		"       %1$s %2$s help\n"
 		"\n"
 		"       " HELP_SPEC_PROGRAM "\n"
 		"       ATTACH_TYPE := { xdp | xdpgeneric | xdpdrv | xdpoffload }\n"
@@ -470,8 +470,8 @@ static int do_help(int argc, char **argv)
 		"      For progs attached to cgroups, use \"bpftool cgroup\"\n"
 		"      to dump program attachments. For program types\n"
 		"      sk_{filter,skb,msg,reuseport} and lwt/seg6, please\n"
-		"      consult iproute2.\n",
-		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
+		"      consult iproute2.\n"
+		"",
 		bin_name, argv[-2]);
 
 	return 0;
diff --git a/tools/bpf/bpftool/perf.c b/tools/bpf/bpftool/perf.c
index 3341aa14acda..ad23934819c7 100644
--- a/tools/bpf/bpftool/perf.c
+++ b/tools/bpf/bpftool/perf.c
@@ -231,7 +231,7 @@ static int do_show(int argc, char **argv)
 static int do_help(int argc, char **argv)
 {
 	fprintf(stderr,
-		"Usage: %s %s { show | list | help }\n"
+		"Usage: %1$s %2$s { show | list | help }\n"
 		"",
 		bin_name, argv[-2]);
 
diff --git a/tools/bpf/bpftool/prog.c b/tools/bpf/bpftool/prog.c
index 245f941fdbcf..a5eff83496f2 100644
--- a/tools/bpf/bpftool/prog.c
+++ b/tools/bpf/bpftool/prog.c
@@ -1984,24 +1984,24 @@ static int do_help(int argc, char **argv)
 	}
 
 	fprintf(stderr,
-		"Usage: %s %s { show | list } [PROG]\n"
-		"       %s %s dump xlated PROG [{ file FILE | opcodes | visual | linum }]\n"
-		"       %s %s dump jited  PROG [{ file FILE | opcodes | linum }]\n"
-		"       %s %s pin   PROG FILE\n"
-		"       %s %s { load | loadall } OBJ  PATH \\\n"
+		"Usage: %1$s %2$s { show | list } [PROG]\n"
+		"       %1$s %2$s dump xlated PROG [{ file FILE | opcodes | visual | linum }]\n"
+		"       %1$s %2$s dump jited  PROG [{ file FILE | opcodes | linum }]\n"
+		"       %1$s %2$s pin   PROG FILE\n"
+		"       %1$s %2$s { load | loadall } OBJ  PATH \\\n"
 		"                         [type TYPE] [dev NAME] \\\n"
 		"                         [map { idx IDX | name NAME } MAP]\\\n"
 		"                         [pinmaps MAP_DIR]\n"
-		"       %s %s attach PROG ATTACH_TYPE [MAP]\n"
-		"       %s %s detach PROG ATTACH_TYPE [MAP]\n"
-		"       %s %s run PROG \\\n"
+		"       %1$s %2$s attach PROG ATTACH_TYPE [MAP]\n"
+		"       %1$s %2$s detach PROG ATTACH_TYPE [MAP]\n"
+		"       %1$s %2$s run PROG \\\n"
 		"                         data_in FILE \\\n"
 		"                         [data_out FILE [data_size_out L]] \\\n"
 		"                         [ctx_in FILE [ctx_out FILE [ctx_size_out M]]] \\\n"
 		"                         [repeat N]\n"
-		"       %s %s profile PROG [duration DURATION] METRICs\n"
-		"       %s %s tracelog\n"
-		"       %s %s help\n"
+		"       %1$s %2$s profile PROG [duration DURATION] METRICs\n"
+		"       %1$s %2$s tracelog\n"
+		"       %1$s %2$s help\n"
 		"\n"
 		"       " HELP_SPEC_MAP "\n"
 		"       " HELP_SPEC_PROGRAM "\n"
@@ -2022,10 +2022,7 @@ static int do_help(int argc, char **argv)
 		"       METRIC := { cycles | instructions | l1d_loads | llc_misses }\n"
 		"       " HELP_SPEC_OPTIONS "\n"
 		"",
-		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
-		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
-		bin_name, argv[-2], bin_name, argv[-2], bin_name, argv[-2],
-		bin_name, argv[-2], bin_name, argv[-2]);
+		bin_name, argv[-2]);
 
 	return 0;
 }
diff --git a/tools/bpf/bpftool/struct_ops.c b/tools/bpf/bpftool/struct_ops.c
index e17738479edc..b58b91f62ffb 100644
--- a/tools/bpf/bpftool/struct_ops.c
+++ b/tools/bpf/bpftool/struct_ops.c
@@ -566,16 +566,15 @@ static int do_help(int argc, char **argv)
 	}
 
 	fprintf(stderr,
-		"Usage: %s %s { show | list } [STRUCT_OPS_MAP]\n"
-		"       %s %s dump [STRUCT_OPS_MAP]\n"
-		"       %s %s register OBJ\n"
-		"       %s %s unregister STRUCT_OPS_MAP\n"
-		"       %s %s help\n"
+		"Usage: %1$s %2$s { show | list } [STRUCT_OPS_MAP]\n"
+		"       %1$s %2$s dump [STRUCT_OPS_MAP]\n"
+		"       %1$s %2$s register OBJ\n"
+		"       %1$s %2$s unregister STRUCT_OPS_MAP\n"
+		"       %1$s %2$s help\n"
 		"\n"
 		"       OPTIONS := { {-j|--json} [{-p|--pretty}] }\n"
-		"       STRUCT_OPS_MAP := [ id STRUCT_OPS_MAP_ID | name STRUCT_OPS_MAP_NAME ]\n",
-		bin_name, argv[-2], bin_name, argv[-2],
-		bin_name, argv[-2], bin_name, argv[-2],
+		"       STRUCT_OPS_MAP := [ id STRUCT_OPS_MAP_ID | name STRUCT_OPS_MAP_NAME ]\n"
+		"",
 		bin_name, argv[-2]);
 
 	return 0;
-- 
cgit v1.2.3-59-g8ed1b


From 73a4f0407e67cdfdf55dd94f573ed4ee2d0d62fe Mon Sep 17 00:00:00 2001
From: Quentin Monnet <quentin@isovalent.com>
Date: Sat, 23 May 2020 02:02:47 +0100
Subject: tools, bpftool: Make capability check account for new BPF caps

Following the introduction of CAP_BPF, and the switch from CAP_SYS_ADMIN
to other capabilities for various BPF features, update the capability
checks (and potentially, drops) in bpftool for feature probes. Because
bpftool and/or the system might not know of CAP_BPF yet, some caution is
necessary:

- If compiled and run on a system with CAP_BPF, check CAP_BPF,
  CAP_SYS_ADMIN, CAP_PERFMON, CAP_NET_ADMIN.

- Guard against CAP_BPF being undefined, to allow compiling bpftool from
  latest sources on older systems. If the system where feature probes
  are run does not know of CAP_BPF, stop checking after CAP_SYS_ADMIN,
  as this should be the only capability required for all the BPF
  probing.

- If compiled from latest sources on a system without CAP_BPF, but later
  executed on a newer system with CAP_BPF knowledge, then we only test
  CAP_SYS_ADMIN. Some probes may fail if the bpftool process has
  CAP_SYS_ADMIN but misses the other capabilities. The alternative would
  be to redefine the value for CAP_BPF in bpftool, but this does not
  look clean, and the case sounds relatively rare anyway.

Note that libcap offers a cap_to_name() function to retrieve the name of
a given capability (e.g. "cap_sys_admin"). We do not use it because
deriving the names from the macros looks simpler than using
cap_to_name() (doing a strdup() on the string) + cap_free() + handling
the case of failed allocations, when we just want to use the name of the
capability in an error message.

The checks when compiling without libcap (i.e. root versus non-root) are
unchanged.

v2:
- Do not allocate cap_list dynamically.
- Drop BPF-related capabilities when running with "unprivileged", even
  if we didn't have the full set in the first place (in v1, we would
  skip dropping them in that case).
- Keep track of what capabilities we have, print the names of the
  missing ones for privileged probing.
- Attempt to drop only the capabilities we actually have.
- Rename a couple variables.

Signed-off-by: Quentin Monnet <quentin@isovalent.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200523010247.20654-1-quentin@isovalent.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/feature.c | 85 +++++++++++++++++++++++++++++++++++----------
 1 file changed, 66 insertions(+), 19 deletions(-)

diff --git a/tools/bpf/bpftool/feature.c b/tools/bpf/bpftool/feature.c
index f05e9e57b593..768bf77df886 100644
--- a/tools/bpf/bpftool/feature.c
+++ b/tools/bpf/bpftool/feature.c
@@ -758,11 +758,29 @@ static void section_misc(const char *define_prefix, __u32 ifindex)
 	print_end_section();
 }
 
+#ifdef USE_LIBCAP
+#define capability(c) { c, false, #c }
+#define capability_msg(a, i) a[i].set ? "" : a[i].name, a[i].set ? "" : ", "
+#endif
+
 static int handle_perms(void)
 {
 #ifdef USE_LIBCAP
-	cap_value_t cap_list[1] = { CAP_SYS_ADMIN };
-	bool has_sys_admin_cap = false;
+	struct {
+		cap_value_t cap;
+		bool set;
+		char name[14];	/* strlen("CAP_SYS_ADMIN") */
+	} bpf_caps[] = {
+		capability(CAP_SYS_ADMIN),
+#ifdef CAP_BPF
+		capability(CAP_BPF),
+		capability(CAP_NET_ADMIN),
+		capability(CAP_PERFMON),
+#endif
+	};
+	cap_value_t cap_list[ARRAY_SIZE(bpf_caps)];
+	unsigned int i, nb_bpf_caps = 0;
+	bool cap_sys_admin_only = true;
 	cap_flag_value_t val;
 	int res = -1;
 	cap_t caps;
@@ -774,35 +792,64 @@ static int handle_perms(void)
 		return -1;
 	}
 
-	if (cap_get_flag(caps, CAP_SYS_ADMIN, CAP_EFFECTIVE, &val)) {
-		p_err("bug: failed to retrieve CAP_SYS_ADMIN status");
-		goto exit_free;
-	}
-	if (val == CAP_SET)
-		has_sys_admin_cap = true;
+#ifdef CAP_BPF
+	if (CAP_IS_SUPPORTED(CAP_BPF))
+		cap_sys_admin_only = false;
+#endif
 
-	if (!run_as_unprivileged && !has_sys_admin_cap) {
-		p_err("full feature probing requires CAP_SYS_ADMIN, run as root or use 'unprivileged'");
-		goto exit_free;
+	for (i = 0; i < ARRAY_SIZE(bpf_caps); i++) {
+		const char *cap_name = bpf_caps[i].name;
+		cap_value_t cap = bpf_caps[i].cap;
+
+		if (cap_get_flag(caps, cap, CAP_EFFECTIVE, &val)) {
+			p_err("bug: failed to retrieve %s status: %s", cap_name,
+			      strerror(errno));
+			goto exit_free;
+		}
+
+		if (val == CAP_SET) {
+			bpf_caps[i].set = true;
+			cap_list[nb_bpf_caps++] = cap;
+		}
+
+		if (cap_sys_admin_only)
+			/* System does not know about CAP_BPF, meaning that
+			 * CAP_SYS_ADMIN is the only capability required. We
+			 * just checked it, break.
+			 */
+			break;
 	}
 
-	if ((run_as_unprivileged && !has_sys_admin_cap) ||
-	    (!run_as_unprivileged && has_sys_admin_cap)) {
+	if ((run_as_unprivileged && !nb_bpf_caps) ||
+	    (!run_as_unprivileged && nb_bpf_caps == ARRAY_SIZE(bpf_caps)) ||
+	    (!run_as_unprivileged && cap_sys_admin_only && nb_bpf_caps)) {
 		/* We are all good, exit now */
 		res = 0;
 		goto exit_free;
 	}
 
-	/* if (run_as_unprivileged && has_sys_admin_cap), drop CAP_SYS_ADMIN */
+	if (!run_as_unprivileged) {
+		if (cap_sys_admin_only)
+			p_err("missing %s, required for full feature probing; run as root or use 'unprivileged'",
+			      bpf_caps[0].name);
+		else
+			p_err("missing %s%s%s%s%s%s%s%srequired for full feature probing; run as root or use 'unprivileged'",
+			      capability_msg(bpf_caps, 0),
+			      capability_msg(bpf_caps, 1),
+			      capability_msg(bpf_caps, 2),
+			      capability_msg(bpf_caps, 3));
+		goto exit_free;
+	}
 
-	if (cap_set_flag(caps, CAP_EFFECTIVE, ARRAY_SIZE(cap_list), cap_list,
+	/* if (run_as_unprivileged && nb_bpf_caps > 0), drop capabilities. */
+	if (cap_set_flag(caps, CAP_EFFECTIVE, nb_bpf_caps, cap_list,
 			 CAP_CLEAR)) {
-		p_err("bug: failed to clear CAP_SYS_ADMIN from capabilities");
+		p_err("bug: failed to clear capabilities: %s", strerror(errno));
 		goto exit_free;
 	}
 
 	if (cap_set_proc(caps)) {
-		p_err("failed to drop CAP_SYS_ADMIN: %s", strerror(errno));
+		p_err("failed to drop capabilities: %s", strerror(errno));
 		goto exit_free;
 	}
 
@@ -817,7 +864,7 @@ exit_free:
 
 	return res;
 #else
-	/* Detection assumes user has sufficient privileges (CAP_SYS_ADMIN).
+	/* Detection assumes user has specific privileges.
 	 * We do not use libpcap so let's approximate, and restrict usage to
 	 * root user only.
 	 */
@@ -901,7 +948,7 @@ static int do_probe(int argc, char **argv)
 		}
 	}
 
-	/* Full feature detection requires CAP_SYS_ADMIN privilege.
+	/* Full feature detection requires specific privileges.
 	 * Let's approximate, and warn if user is not root.
 	 */
 	if (handle_perms())
-- 
cgit v1.2.3-59-g8ed1b


From dc3ca5cf3e0be9fb73f4691247367d76a22bf30b Mon Sep 17 00:00:00 2001
From: Tobias Klauser <tklauser@distanz.ch>
Date: Mon, 25 May 2020 15:54:21 +0200
Subject: tools, bpftool: Print correct error message when failing to load BTF

btf__parse_raw and btf__parse_elf return negative error numbers wrapped
in an ERR_PTR, so the extracted value needs to be negated before passing
them to strerror which expects a positive error number.

Before:
  Error: failed to load BTF from .../vmlinux: Unknown error -2

After:
  Error: failed to load BTF from .../vmlinux: No such file or directory

Signed-off-by: Tobias Klauser <tklauser@distanz.ch>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200525135421.4154-1-tklauser@distanz.ch
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/bpf/bpftool/btf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/bpf/bpftool/btf.c b/tools/bpf/bpftool/btf.c
index c134666591a6..faac8189b285 100644
--- a/tools/bpf/bpftool/btf.c
+++ b/tools/bpf/bpftool/btf.c
@@ -553,7 +553,7 @@ static int do_dump(int argc, char **argv)
 			btf = btf__parse_elf(*argv, NULL);
 
 		if (IS_ERR(btf)) {
-			err = PTR_ERR(btf);
+			err = -PTR_ERR(btf);
 			btf = NULL;
 			p_err("failed to load BTF from %s: %s",
 			      *argv, strerror(err));
-- 
cgit v1.2.3-59-g8ed1b


From fe537393b5795ecbe5746eec0e16124bc998a594 Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Mon, 25 May 2020 14:29:28 +0200
Subject: bpf: Fix returned error sign when link doesn't support updates

System calls encode returned errors as negative values. Fix a typo that
breaks this convention for bpf(LINK_UPDATE) when bpf_link doesn't support
update operation.

Fixes: f9d041271cf4 ("bpf: Refactor bpf_link update handling")
Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200525122928.1164495-1-jakub@cloudflare.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/syscall.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index aaa29fb6f363..d13b804ff045 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3924,7 +3924,7 @@ static int link_update(union bpf_attr *attr)
 	if (link->ops->update_prog)
 		ret = link->ops->update_prog(link, new_prog, old_prog);
 	else
-		ret = EINVAL;
+		ret = -EINVAL;
 
 out_put_progs:
 	if (old_prog)
-- 
cgit v1.2.3-59-g8ed1b


From 2b983b407a3a1f47f7d8595245066854ff352c65 Mon Sep 17 00:00:00 2001
From: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Date: Mon, 25 May 2020 16:15:53 +0200
Subject: MAINTAINERS: Adjust entry in XDP SOCKETS to actual file name

Commit 2b43470add8c ("xsk: Introduce AF_XDP buffer allocation API") added a
new header file include/net/xsk_buff_pool.h, but commit 28bee21dc04b
("MAINTAINERS, xsk: Update AF_XDP section after moves/adds") added a file
entry referring to include/net/xsk_buffer_pool.h.

Hence, ./scripts/get_maintainer.pl --self-test=patterns complains:

  warning: no file matches  F:  include/net/xsk_buffer_pool.h

Adjust the entry in XDP SOCKETS to the actual file name.

Signed-off-by: Lukas Bulwahn <lukas.bulwahn@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200525141553.7035-1-lukas.bulwahn@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 MAINTAINERS | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/MAINTAINERS b/MAINTAINERS
index 5d81c002232a..66d1a3f10102 100644
--- a/MAINTAINERS
+++ b/MAINTAINERS
@@ -18456,7 +18456,7 @@ L:	netdev@vger.kernel.org
 L:	bpf@vger.kernel.org
 S:	Maintained
 F:	include/net/xdp_sock*
-F:	include/net/xsk_buffer_pool.h
+F:	include/net/xsk_buff_pool.h
 F:	include/uapi/linux/if_xdp.h
 F:	net/xdp/
 F:	samples/bpf/xdpsock*
-- 
cgit v1.2.3-59-g8ed1b


From 272d51af32890632134845ddf35318c11da20c7b Mon Sep 17 00:00:00 2001
From: Eelco Chaudron <echaudro@redhat.com>
Date: Tue, 26 May 2020 11:21:42 +0200
Subject: libbpf: Add API to consume the perf ring buffer content

This new API, perf_buffer__consume, can be used as follows:

- When you have a perf ring where wakeup_events is higher than 1,
  and you have remaining data in the rings you would like to pull
  out on exit (or maybe based on a timeout).

- For low latency cases where you burn a CPU that constantly polls
  the queues.

Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/159048487929.89441.7465713173442594608.stgit@ebuild
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/libbpf.c   | 19 +++++++++++++++++++
 tools/lib/bpf/libbpf.h   |  1 +
 tools/lib/bpf/libbpf.map |  1 +
 3 files changed, 21 insertions(+)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index fa04cbe547ed..5d60de6fd818 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -8456,6 +8456,25 @@ int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms)
 	return cnt < 0 ? -errno : cnt;
 }
 
+int perf_buffer__consume(struct perf_buffer *pb)
+{
+	int i, err;
+
+	for (i = 0; i < pb->cpu_cnt; i++) {
+		struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
+
+		if (!cpu_buf)
+			continue;
+
+		err = perf_buffer__process_records(pb, cpu_buf);
+		if (err) {
+			pr_warn("error while processing records: %d\n", err);
+			return err;
+		}
+	}
+	return 0;
+}
+
 struct bpf_prog_info_array_desc {
 	int	array_offset;	/* e.g. offset of jited_prog_insns */
 	int	count_offset;	/* e.g. offset of jited_prog_len */
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 8ea69558f0a8..1e2e399a5f2c 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -533,6 +533,7 @@ perf_buffer__new_raw(int map_fd, size_t page_cnt,
 
 LIBBPF_API void perf_buffer__free(struct perf_buffer *pb);
 LIBBPF_API int perf_buffer__poll(struct perf_buffer *pb, int timeout_ms);
+LIBBPF_API int perf_buffer__consume(struct perf_buffer *pb);
 
 typedef enum bpf_perf_event_ret
 	(*bpf_perf_event_print_t)(struct perf_event_header *hdr,
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 0133d469d30b..381a7342ecfc 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -262,4 +262,5 @@ LIBBPF_0.0.9 {
 		bpf_link_get_fd_by_id;
 		bpf_link_get_next_id;
 		bpf_program__attach_iter;
+		perf_buffer__consume;
 } LIBBPF_0.0.8;
-- 
cgit v1.2.3-59-g8ed1b


From 93581359e7aeb11358018f2e3a737776d1e899ae Mon Sep 17 00:00:00 2001
From: Nikolay Borisov <nborisov@suse.com>
Date: Tue, 26 May 2020 20:46:12 +0300
Subject: libbpf: Install headers as part of make install

Current 'make install' results in only pkg-config and library binaries
being installed. For consistency also install headers as part of
"make install"

Signed-off-by: Nikolay Borisov <nborisov@suse.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200526174612.5447-1-nborisov@suse.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/Makefile | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index aee7f1a83c77..d02c4d910aad 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -264,7 +264,7 @@ install_pkgconfig: $(PC_FILE)
 	$(call QUIET_INSTALL, $(PC_FILE)) \
 		$(call do_install,$(PC_FILE),$(libdir_SQ)/pkgconfig,644)
 
-install: install_lib install_pkgconfig
+install: install_lib install_pkgconfig install_headers
 
 ### Cleaning rules
 
-- 
cgit v1.2.3-59-g8ed1b


From 0142dddcbe965450338076c486d0d757b3184352 Mon Sep 17 00:00:00 2001
From: Chris Packham <chris.packham@alliedtelesis.co.nz>
Date: Tue, 26 May 2020 11:00:24 +1200
Subject: bpf: Fix spelling in comment explaining ARG1 in ___bpf_prog_run

Change 'handeled' to 'handled'.

Signed-off-by: Chris Packham <chris.packham@alliedtelesis.co.nz>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200525230025.14470-1-chris.packham@alliedtelesis.co.nz
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/bpf/core.c b/kernel/bpf/core.c
index c40ff4cf9880..af52ca658c73 100644
--- a/kernel/bpf/core.c
+++ b/kernel/bpf/core.c
@@ -1543,7 +1543,7 @@ select_insn:
 
 		/* ARG1 at this point is guaranteed to point to CTX from
 		 * the verifier side due to the fact that the tail call is
-		 * handeled like a helper, that is, bpf_tail_call_proto,
+		 * handled like a helper, that is, bpf_tail_call_proto,
 		 * where arg1_type is ARG_PTR_TO_CTX.
 		 */
 		insn = prog->insnsi;
-- 
cgit v1.2.3-59-g8ed1b


From 55983299b7ea94d714c19cdfd8d969ba86e0d7e9 Mon Sep 17 00:00:00 2001
From: Yauheni Kaliuta <yauheni.kaliuta@redhat.com>
Date: Mon, 25 May 2020 09:18:46 +0300
Subject: libbpf: Use .so dynamic symbols for abi check

Since dynamic symbols are used for dynamic linking it makes sense to
use them (readelf --dyn-syms) for abi check.

Found with some configuration on powerpc where linker puts
local *.plt_call.* symbols into .so.

Signed-off-by: Yauheni Kaliuta <yauheni.kaliuta@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/20200525061846.16524-1-yauheni.kaliuta@redhat.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/Makefile | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/lib/bpf/Makefile b/tools/lib/bpf/Makefile
index d02c4d910aad..bf8ed134cb8a 100644
--- a/tools/lib/bpf/Makefile
+++ b/tools/lib/bpf/Makefile
@@ -151,7 +151,7 @@ GLOBAL_SYM_COUNT = $(shell readelf -s --wide $(BPF_IN_SHARED) | \
 			   sed 's/\[.*\]//' | \
 			   awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}' | \
 			   sort -u | wc -l)
-VERSIONED_SYM_COUNT = $(shell readelf -s --wide $(OUTPUT)libbpf.so | \
+VERSIONED_SYM_COUNT = $(shell readelf --dyn-syms --wide $(OUTPUT)libbpf.so | \
 			      grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 | sort -u | wc -l)
 
 CMD_TARGETS = $(LIB_TARGET) $(PC_FILE)
@@ -218,7 +218,7 @@ check_abi: $(OUTPUT)libbpf.so
 		    sed 's/\[.*\]//' |					 \
 		    awk '/GLOBAL/ && /DEFAULT/ && !/UND/ {print $$NF}'|  \
 		    sort -u > $(OUTPUT)libbpf_global_syms.tmp;		 \
-		readelf -s --wide $(OUTPUT)libbpf.so |			 \
+		readelf --dyn-syms --wide $(OUTPUT)libbpf.so |		 \
 		    grep -Eo '[^ ]+@LIBBPF_' | cut -d@ -f1 |		 \
 		    sort -u > $(OUTPUT)libbpf_versioned_syms.tmp; 	 \
 		diff -u $(OUTPUT)libbpf_global_syms.tmp			 \
-- 
cgit v1.2.3-59-g8ed1b


From abe3cac8706bffeda3ebc06e4a9fa6e9cadacf26 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sun, 24 May 2020 09:50:33 -0700
Subject: bpf, sk_msg: Add some generic helpers that may be useful from sk_msg

Add these generic helpers that may be useful to use from sk_msg programs.
The helpers do not depend on ctx so we can simply add them here,

 BPF_FUNC_perf_event_output
 BPF_FUNC_get_current_uid_gid
 BPF_FUNC_get_current_pid_tgid
 BPF_FUNC_get_current_cgroup_id
 BPF_FUNC_get_current_ancestor_cgroup_id
 BPF_FUNC_get_cgroup_classid

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/159033903373.12355.15489763099696629346.stgit@john-Precision-5820-Tower
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 net/core/filter.c | 16 ++++++++++++++++
 1 file changed, 16 insertions(+)

diff --git a/net/core/filter.c b/net/core/filter.c
index bd2853d23b50..c3b496a19748 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -6443,6 +6443,22 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_msg_push_data_proto;
 	case BPF_FUNC_msg_pop_data:
 		return &bpf_msg_pop_data_proto;
+	case BPF_FUNC_perf_event_output:
+		return &bpf_event_output_data_proto;
+	case BPF_FUNC_get_current_uid_gid:
+		return &bpf_get_current_uid_gid_proto;
+	case BPF_FUNC_get_current_pid_tgid:
+		return &bpf_get_current_pid_tgid_proto;
+#ifdef CONFIG_CGROUPS
+	case BPF_FUNC_get_current_cgroup_id:
+		return &bpf_get_current_cgroup_id_proto;
+	case BPF_FUNC_get_current_ancestor_cgroup_id:
+		return &bpf_get_current_ancestor_cgroup_id_proto;
+#endif
+#ifdef CONFIG_CGROUP_NET_CLASSID
+	case BPF_FUNC_get_cgroup_classid:
+		return &bpf_get_cgroup_classid_curr_proto;
+#endif
 	default:
 		return bpf_base_func_proto(func_id);
 	}
-- 
cgit v1.2.3-59-g8ed1b


From f470378c7562a2818b45ed11c98973f2b89eedd3 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sun, 24 May 2020 09:50:55 -0700
Subject: bpf: Extend bpf_base_func_proto helpers with probe_* and
 *current_task*

Often it is useful when applying policy to know something about the
task. If the administrator has CAP_SYS_ADMIN rights then they can
use kprobe + networking hook and link the two programs together to
accomplish this. However, this is a bit clunky and also means we have
to call both the network program and kprobe program when we could just
use a single program and avoid passing metadata through sk_msg/skb->cb,
socket, maps, etc.

To accomplish this add probe_* helpers to bpf_base_func_proto programs
guarded by a perfmon_capable() check. New supported helpers are the
following,

 BPF_FUNC_get_current_task
 BPF_FUNC_probe_read_user
 BPF_FUNC_probe_read_kernel
 BPF_FUNC_probe_read_user_str
 BPF_FUNC_probe_read_kernel_str

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/159033905529.12355.4368381069655254932.stgit@john-Precision-5820-Tower
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/helpers.c     | 24 ++++++++++++++++++++++++
 kernel/trace/bpf_trace.c | 10 +++++-----
 2 files changed, 29 insertions(+), 5 deletions(-)

diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index 886949fdcece..bb4fb634275e 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -601,6 +601,12 @@ const struct bpf_func_proto bpf_event_output_data_proto =  {
 	.arg5_type      = ARG_CONST_SIZE_OR_ZERO,
 };
 
+const struct bpf_func_proto bpf_get_current_task_proto __weak;
+const struct bpf_func_proto bpf_probe_read_user_proto __weak;
+const struct bpf_func_proto bpf_probe_read_user_str_proto __weak;
+const struct bpf_func_proto bpf_probe_read_kernel_proto __weak;
+const struct bpf_func_proto bpf_probe_read_kernel_str_proto __weak;
+
 const struct bpf_func_proto *
 bpf_base_func_proto(enum bpf_func_id func_id)
 {
@@ -647,6 +653,24 @@ bpf_base_func_proto(enum bpf_func_id func_id)
 		return bpf_get_trace_printk_proto();
 	case BPF_FUNC_jiffies64:
 		return &bpf_jiffies64_proto;
+	default:
+		break;
+	}
+
+	if (!perfmon_capable())
+		return NULL;
+
+	switch (func_id) {
+	case BPF_FUNC_get_current_task:
+		return &bpf_get_current_task_proto;
+	case BPF_FUNC_probe_read_user:
+		return &bpf_probe_read_user_proto;
+	case BPF_FUNC_probe_read_kernel:
+		return &bpf_probe_read_kernel_proto;
+	case BPF_FUNC_probe_read_user_str:
+		return &bpf_probe_read_user_str_proto;
+	case BPF_FUNC_probe_read_kernel_str:
+		return &bpf_probe_read_kernel_str_proto;
 	default:
 		return NULL;
 	}
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 9531f54d0a3a..187cd6995bbb 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -147,7 +147,7 @@ BPF_CALL_3(bpf_probe_read_user, void *, dst, u32, size,
 	return ret;
 }
 
-static const struct bpf_func_proto bpf_probe_read_user_proto = {
+const struct bpf_func_proto bpf_probe_read_user_proto = {
 	.func		= bpf_probe_read_user,
 	.gpl_only	= true,
 	.ret_type	= RET_INTEGER,
@@ -167,7 +167,7 @@ BPF_CALL_3(bpf_probe_read_user_str, void *, dst, u32, size,
 	return ret;
 }
 
-static const struct bpf_func_proto bpf_probe_read_user_str_proto = {
+const struct bpf_func_proto bpf_probe_read_user_str_proto = {
 	.func		= bpf_probe_read_user_str,
 	.gpl_only	= true,
 	.ret_type	= RET_INTEGER,
@@ -198,7 +198,7 @@ BPF_CALL_3(bpf_probe_read_kernel, void *, dst, u32, size,
 	return bpf_probe_read_kernel_common(dst, size, unsafe_ptr, false);
 }
 
-static const struct bpf_func_proto bpf_probe_read_kernel_proto = {
+const struct bpf_func_proto bpf_probe_read_kernel_proto = {
 	.func		= bpf_probe_read_kernel,
 	.gpl_only	= true,
 	.ret_type	= RET_INTEGER,
@@ -253,7 +253,7 @@ BPF_CALL_3(bpf_probe_read_kernel_str, void *, dst, u32, size,
 	return bpf_probe_read_kernel_str_common(dst, size, unsafe_ptr, false);
 }
 
-static const struct bpf_func_proto bpf_probe_read_kernel_str_proto = {
+const struct bpf_func_proto bpf_probe_read_kernel_str_proto = {
 	.func		= bpf_probe_read_kernel_str,
 	.gpl_only	= true,
 	.ret_type	= RET_INTEGER,
@@ -907,7 +907,7 @@ BPF_CALL_0(bpf_get_current_task)
 	return (long) current;
 }
 
-static const struct bpf_func_proto bpf_get_current_task_proto = {
+const struct bpf_func_proto bpf_get_current_task_proto = {
 	.func		= bpf_get_current_task,
 	.gpl_only	= true,
 	.ret_type	= RET_INTEGER,
-- 
cgit v1.2.3-59-g8ed1b


From 13d70f5a5ecff367db2fb18ed4ebe433eab8a74c Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sun, 24 May 2020 09:51:15 -0700
Subject: bpf, sk_msg: Add get socket storage helpers

Add helpers to use local socket storage.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Link: https://lore.kernel.org/bpf/159033907577.12355.14740125020572756560.stgit@john-Precision-5820-Tower
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/uapi/linux/bpf.h       |  2 ++
 net/core/filter.c              | 15 +++++++++++++++
 tools/include/uapi/linux/bpf.h |  2 ++
 3 files changed, 19 insertions(+)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 97e1fd19ff58..54b93f8b49b8 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3645,6 +3645,8 @@ struct sk_msg_md {
 	__u32 remote_port;	/* Stored in network byte order */
 	__u32 local_port;	/* stored in host byte order */
 	__u32 size;		/* Total size of sk_msg */
+
+	__bpf_md_ptr(struct bpf_sock *, sk); /* current socket */
 };
 
 struct sk_reuseport_md {
diff --git a/net/core/filter.c b/net/core/filter.c
index c3b496a19748..a6fc23447f12 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -6449,6 +6449,10 @@ sk_msg_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_get_current_uid_gid_proto;
 	case BPF_FUNC_get_current_pid_tgid:
 		return &bpf_get_current_pid_tgid_proto;
+	case BPF_FUNC_sk_storage_get:
+		return &bpf_sk_storage_get_proto;
+	case BPF_FUNC_sk_storage_delete:
+		return &bpf_sk_storage_delete_proto;
 #ifdef CONFIG_CGROUPS
 	case BPF_FUNC_get_current_cgroup_id:
 		return &bpf_get_current_cgroup_id_proto;
@@ -7273,6 +7277,11 @@ static bool sk_msg_is_valid_access(int off, int size,
 		if (size != sizeof(__u64))
 			return false;
 		break;
+	case offsetof(struct sk_msg_md, sk):
+		if (size != sizeof(__u64))
+			return false;
+		info->reg_type = PTR_TO_SOCKET;
+		break;
 	case bpf_ctx_range(struct sk_msg_md, family):
 	case bpf_ctx_range(struct sk_msg_md, remote_ip4):
 	case bpf_ctx_range(struct sk_msg_md, local_ip4):
@@ -8609,6 +8618,12 @@ static u32 sk_msg_convert_ctx_access(enum bpf_access_type type,
 				      si->dst_reg, si->src_reg,
 				      offsetof(struct sk_msg_sg, size));
 		break;
+
+	case offsetof(struct sk_msg_md, sk):
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct sk_msg, sk),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct sk_msg, sk));
+		break;
 	}
 
 	return insn - insn_buf;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 97e1fd19ff58..54b93f8b49b8 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3645,6 +3645,8 @@ struct sk_msg_md {
 	__u32 remote_port;	/* Stored in network byte order */
 	__u32 local_port;	/* stored in host byte order */
 	__u32 size;		/* Total size of sk_msg */
+
+	__bpf_md_ptr(struct bpf_sock *, sk); /* current socket */
 };
 
 struct sk_reuseport_md {
-- 
cgit v1.2.3-59-g8ed1b


From 1d9c037a898b3c0344cfe5064ba6c482bf9b46b0 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sun, 24 May 2020 09:51:36 -0700
Subject: bpf, selftests: Add sk_msg helpers load and attach test

The test itself is not particularly useful but it encodes a common
pattern we have.

Namely do a sk storage lookup then depending on data here decide if
we need to do more work or alternatively allow packet to PASS. Then
if we need to do more work consult task_struct for more information
about the running task. Finally based on this additional information
drop or pass the data. In this case the suspicious check is not so
realisitic but it encodes the general pattern and uses the helpers
so we test the workflow.

This is a load test to ensure verifier correctly handles this case.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/159033909665.12355.6166415847337547879.stgit@john-Precision-5820-Tower
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/prog_tests/sockmap_basic.c       | 35 ++++++++++++++++
 .../selftests/bpf/progs/test_skmsg_load_helpers.c  | 47 ++++++++++++++++++++++
 2 files changed, 82 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/progs/test_skmsg_load_helpers.c

diff --git a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
index aa43e0bd210c..96e7b7f84c65 100644
--- a/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
+++ b/tools/testing/selftests/bpf/prog_tests/sockmap_basic.c
@@ -1,7 +1,9 @@
 // SPDX-License-Identifier: GPL-2.0
 // Copyright (c) 2020 Cloudflare
+#include <error.h>
 
 #include "test_progs.h"
+#include "test_skmsg_load_helpers.skel.h"
 
 #define TCP_REPAIR		19	/* TCP sock is under repair right now */
 
@@ -70,10 +72,43 @@ out:
 	close(s);
 }
 
+static void test_skmsg_helpers(enum bpf_map_type map_type)
+{
+	struct test_skmsg_load_helpers *skel;
+	int err, map, verdict;
+
+	skel = test_skmsg_load_helpers__open_and_load();
+	if (CHECK_FAIL(!skel)) {
+		perror("test_skmsg_load_helpers__open_and_load");
+		return;
+	}
+
+	verdict = bpf_program__fd(skel->progs.prog_msg_verdict);
+	map = bpf_map__fd(skel->maps.sock_map);
+
+	err = bpf_prog_attach(verdict, map, BPF_SK_MSG_VERDICT, 0);
+	if (CHECK_FAIL(err)) {
+		perror("bpf_prog_attach");
+		goto out;
+	}
+
+	err = bpf_prog_detach2(verdict, map, BPF_SK_MSG_VERDICT);
+	if (CHECK_FAIL(err)) {
+		perror("bpf_prog_detach2");
+		goto out;
+	}
+out:
+	test_skmsg_load_helpers__destroy(skel);
+}
+
 void test_sockmap_basic(void)
 {
 	if (test__start_subtest("sockmap create_update_free"))
 		test_sockmap_create_update_free(BPF_MAP_TYPE_SOCKMAP);
 	if (test__start_subtest("sockhash create_update_free"))
 		test_sockmap_create_update_free(BPF_MAP_TYPE_SOCKHASH);
+	if (test__start_subtest("sockmap sk_msg load helpers"))
+		test_skmsg_helpers(BPF_MAP_TYPE_SOCKMAP);
+	if (test__start_subtest("sockhash sk_msg load helpers"))
+		test_skmsg_helpers(BPF_MAP_TYPE_SOCKHASH);
 }
diff --git a/tools/testing/selftests/bpf/progs/test_skmsg_load_helpers.c b/tools/testing/selftests/bpf/progs/test_skmsg_load_helpers.c
new file mode 100644
index 000000000000..45e8fc75a739
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_skmsg_load_helpers.c
@@ -0,0 +1,47 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Isovalent, Inc.
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SOCKMAP);
+	__uint(max_entries, 2);
+	__type(key, __u32);
+	__type(value, __u64);
+} sock_map SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SOCKHASH);
+	__uint(max_entries, 2);
+	__type(key, __u32);
+	__type(value, __u64);
+} sock_hash SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_SK_STORAGE);
+	__uint(map_flags, BPF_F_NO_PREALLOC);
+	__type(key, __u32);
+	__type(value, __u64);
+} socket_storage SEC(".maps");
+
+SEC("sk_msg")
+int prog_msg_verdict(struct sk_msg_md *msg)
+{
+	struct task_struct *task = (struct task_struct *)bpf_get_current_task();
+	int verdict = SK_PASS;
+	__u32 pid, tpid;
+	__u64 *sk_stg;
+
+	pid = bpf_get_current_pid_tgid() >> 32;
+	sk_stg = bpf_sk_storage_get(&socket_storage, msg->sk, 0, BPF_SK_STORAGE_GET_F_CREATE);
+	if (!sk_stg)
+		return SK_DROP;
+	*sk_stg = pid;
+	bpf_probe_read_kernel(&tpid , sizeof(tpid), &task->tgid);
+	if (pid != tpid)
+		verdict = SK_DROP;
+	bpf_sk_storage_delete(&socket_storage, (void *)msg->sk);
+	return verdict;
+}
+
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3-59-g8ed1b


From ee103e9f1544e04ecd1db5eb5e9eb9a8b8698879 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Sun, 24 May 2020 09:51:57 -0700
Subject: bpf, selftests: Test probe_* helpers from SCHED_CLS

Lets test using probe* in SCHED_CLS network programs as well just
to be sure these keep working. Its cheap to add the extra test
and provides a second context to test outside of sk_msg after
we generalized probe* helpers to all networking types.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Yonghong Song <yhs@fb.com>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/159033911685.12355.15951980509828906214.stgit@john-Precision-5820-Tower
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../testing/selftests/bpf/prog_tests/skb_helpers.c | 30 ++++++++++++++++++++++
 .../testing/selftests/bpf/progs/test_skb_helpers.c | 28 ++++++++++++++++++++
 2 files changed, 58 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/skb_helpers.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_skb_helpers.c

diff --git a/tools/testing/selftests/bpf/prog_tests/skb_helpers.c b/tools/testing/selftests/bpf/prog_tests/skb_helpers.c
new file mode 100644
index 000000000000..f302ad84a298
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/skb_helpers.c
@@ -0,0 +1,30 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <test_progs.h>
+#include <network_helpers.h>
+
+void test_skb_helpers(void)
+{
+	struct __sk_buff skb = {
+		.wire_len = 100,
+		.gso_segs = 8,
+		.gso_size = 10,
+	};
+	struct bpf_prog_test_run_attr tattr = {
+		.data_in = &pkt_v4,
+		.data_size_in = sizeof(pkt_v4),
+		.ctx_in = &skb,
+		.ctx_size_in = sizeof(skb),
+		.ctx_out = &skb,
+		.ctx_size_out = sizeof(skb),
+	};
+	struct bpf_object *obj;
+	int err;
+
+	err = bpf_prog_load("./test_skb_helpers.o", BPF_PROG_TYPE_SCHED_CLS, &obj,
+			    &tattr.prog_fd);
+	if (CHECK_ATTR(err, "load", "err %d errno %d\n", err, errno))
+		return;
+	err = bpf_prog_test_run_xattr(&tattr);
+	CHECK_ATTR(err, "len", "err %d errno %d\n", err, errno);
+	bpf_object__close(obj);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_skb_helpers.c b/tools/testing/selftests/bpf/progs/test_skb_helpers.c
new file mode 100644
index 000000000000..bb3fbf1a29e3
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_skb_helpers.c
@@ -0,0 +1,28 @@
+// SPDX-License-Identifier: GPL-2.0-only
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+#include <bpf/bpf_endian.h>
+
+#define TEST_COMM_LEN 16
+
+struct {
+	__uint(type, BPF_MAP_TYPE_CGROUP_ARRAY);
+	__uint(max_entries, 1);
+	__type(key, u32);
+	__type(value, u32);
+} cgroup_map SEC(".maps");
+
+char _license[] SEC("license") = "GPL";
+
+SEC("classifier/test_skb_helpers")
+int test_skb_helpers(struct __sk_buff *skb)
+{
+	struct task_struct *task;
+	char comm[TEST_COMM_LEN];
+	__u32 tpid;
+
+	task = (struct task_struct *)bpf_get_current_task();
+	bpf_probe_read_kernel(&tpid , sizeof(tpid), &task->tgid);
+	bpf_probe_read_kernel_str(&comm, sizeof(comm), &task->comm);
+	return 0;
+}
-- 
cgit v1.2.3-59-g8ed1b


From 601b05ca6edb0422bf6ce313fbfd55ec7bbbc0fd Mon Sep 17 00:00:00 2001
From: Eelco Chaudron <echaudro@redhat.com>
Date: Wed, 27 May 2020 10:42:00 +0200
Subject: libbpf: Fix perf_buffer__free() API for sparse allocs

In case the cpu_bufs are sparsely allocated they are not all
free'ed. These changes will fix this.

Fixes: fb84b8224655 ("libbpf: add perf buffer API")
Signed-off-by: Eelco Chaudron <echaudro@redhat.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Acked-by: Andrii Nakryiko <andriin@fb.com>
Link: https://lore.kernel.org/bpf/159056888305.330763.9684536967379110349.stgit@ebuild
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/libbpf.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 5d60de6fd818..74d967619dcf 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -8137,9 +8137,12 @@ void perf_buffer__free(struct perf_buffer *pb)
 	if (!pb)
 		return;
 	if (pb->cpu_bufs) {
-		for (i = 0; i < pb->cpu_cnt && pb->cpu_bufs[i]; i++) {
+		for (i = 0; i < pb->cpu_cnt; i++) {
 			struct perf_cpu_buf *cpu_buf = pb->cpu_bufs[i];
 
+			if (!cpu_buf)
+				continue;
+
 			bpf_map_delete_elem(pb->map_fd, &cpu_buf->map_key);
 			perf_buffer__free_cpu_buf(pb, cpu_buf);
 		}
-- 
cgit v1.2.3-59-g8ed1b


From 204fb0413a92342d31f3e2557db0bb5babed586c Mon Sep 17 00:00:00 2001
From: Anton Protopopov <a.s.protopopov@gmail.com>
Date: Wed, 27 May 2020 18:56:56 +0000
Subject: selftests/bpf: Fix a typo in test_maps

Trivial fix to a typo in the test_map_wronly test: "read" -> "write"

Signed-off-by: Anton Protopopov <a.s.protopopov@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200527185700.14658-2-a.s.protopopov@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_maps.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index c6766b2cff85..f717acc0c68d 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -1410,7 +1410,7 @@ static void test_map_wronly(void)
 	fd = bpf_create_map(BPF_MAP_TYPE_HASH, sizeof(key), sizeof(value),
 			    MAP_SIZE, map_flags | BPF_F_WRONLY);
 	if (fd < 0) {
-		printf("Failed to create map for read only test '%s'!\n",
+		printf("Failed to create map for write only test '%s'!\n",
 		       strerror(errno));
 		exit(1);
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 36ef9a2d3f764a37cf3d8e619bfebf5c99c070a0 Mon Sep 17 00:00:00 2001
From: Anton Protopopov <a.s.protopopov@gmail.com>
Date: Wed, 27 May 2020 18:56:57 +0000
Subject: selftests/bpf: Cleanup some file descriptors in test_maps

The test_map_rdonly and test_map_wronly tests should close file descriptors
which they open.

Signed-off-by: Anton Protopopov <a.s.protopopov@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200527185700.14658-3-a.s.protopopov@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_maps.c | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index f717acc0c68d..46cf2c232964 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -1401,6 +1401,8 @@ static void test_map_rdonly(void)
 	/* Check that key=2 is not found. */
 	assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
 	assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == ENOENT);
+
+	close(fd);
 }
 
 static void test_map_wronly(void)
@@ -1423,6 +1425,8 @@ static void test_map_wronly(void)
 	/* Check that key=2 is not found. */
 	assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == EPERM);
 	assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == EPERM);
+
+	close(fd);
 }
 
 static void prepare_reuseport_grp(int type, int map_fd, size_t map_elem_size,
-- 
cgit v1.2.3-59-g8ed1b


From efbc3b8fe1e6259777670aadf931500545073c6c Mon Sep 17 00:00:00 2001
From: Anton Protopopov <a.s.protopopov@gmail.com>
Date: Wed, 27 May 2020 18:56:58 +0000
Subject: selftests/bpf: Cleanup comments in test_maps

Make comments inside the test_map_rdonly and test_map_wronly tests
consistent with logic.

Signed-off-by: Anton Protopopov <a.s.protopopov@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200527185700.14658-4-a.s.protopopov@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_maps.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 46cf2c232964..08d63948514a 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -1394,11 +1394,11 @@ static void test_map_rdonly(void)
 
 	key = 1;
 	value = 1234;
-	/* Insert key=1 element. */
+	/* Try to insert key=1 element. */
 	assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == -1 &&
 	       errno == EPERM);
 
-	/* Check that key=2 is not found. */
+	/* Check that key=1 is not found. */
 	assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == ENOENT);
 	assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == ENOENT);
 
@@ -1422,7 +1422,7 @@ static void test_map_wronly(void)
 	/* Insert key=1 element. */
 	assert(bpf_map_update_elem(fd, &key, &value, BPF_ANY) == 0);
 
-	/* Check that key=2 is not found. */
+	/* Check that reading elements and keys from the map is not allowed. */
 	assert(bpf_map_lookup_elem(fd, &key, &value) == -1 && errno == EPERM);
 	assert(bpf_map_get_next_key(fd, &key, &value) == -1 && errno == EPERM);
 
-- 
cgit v1.2.3-59-g8ed1b


From 1ea0f9120c8ce105ca181b070561df5cbd6bc049 Mon Sep 17 00:00:00 2001
From: Anton Protopopov <a.s.protopopov@gmail.com>
Date: Wed, 27 May 2020 18:56:59 +0000
Subject: bpf: Fix map permissions check

The map_lookup_and_delete_elem() function should check for both FMODE_CAN_WRITE
and FMODE_CAN_READ permissions because it returns a map element to user space.

Fixes: bd513cd08f10 ("bpf: add MAP_LOOKUP_AND_DELETE_ELEM syscall")
Signed-off-by: Anton Protopopov <a.s.protopopov@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200527185700.14658-5-a.s.protopopov@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/syscall.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index d13b804ff045..2c969a9b90d3 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1472,7 +1472,8 @@ static int map_lookup_and_delete_elem(union bpf_attr *attr)
 	map = __bpf_map_get(f);
 	if (IS_ERR(map))
 		return PTR_ERR(map);
-	if (!(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
+	if (!(map_get_sys_perms(map, f) & FMODE_CAN_READ) ||
+	    !(map_get_sys_perms(map, f) & FMODE_CAN_WRITE)) {
 		err = -EPERM;
 		goto err_put;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 457f44363a8894135c85b7a9afd2bd8196db24ab Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 29 May 2020 00:54:20 -0700
Subject: bpf: Implement BPF ring buffer and verifier support for it

This commit adds a new MPSC ring buffer implementation into BPF ecosystem,
which allows multiple CPUs to submit data to a single shared ring buffer. On
the consumption side, only single consumer is assumed.

Motivation
----------
There are two distinctive motivators for this work, which are not satisfied by
existing perf buffer, which prompted creation of a new ring buffer
implementation.
  - more efficient memory utilization by sharing ring buffer across CPUs;
  - preserving ordering of events that happen sequentially in time, even
  across multiple CPUs (e.g., fork/exec/exit events for a task).

These two problems are independent, but perf buffer fails to satisfy both.
Both are a result of a choice to have per-CPU perf ring buffer.  Both can be
also solved by having an MPSC implementation of ring buffer. The ordering
problem could technically be solved for perf buffer with some in-kernel
counting, but given the first one requires an MPSC buffer, the same solution
would solve the second problem automatically.

Semantics and APIs
------------------
Single ring buffer is presented to BPF programs as an instance of BPF map of
type BPF_MAP_TYPE_RINGBUF. Two other alternatives considered, but ultimately
rejected.

One way would be to, similar to BPF_MAP_TYPE_PERF_EVENT_ARRAY, make
BPF_MAP_TYPE_RINGBUF could represent an array of ring buffers, but not enforce
"same CPU only" rule. This would be more familiar interface compatible with
existing perf buffer use in BPF, but would fail if application needed more
advanced logic to lookup ring buffer by arbitrary key. HASH_OF_MAPS addresses
this with current approach. Additionally, given the performance of BPF
ringbuf, many use cases would just opt into a simple single ring buffer shared
among all CPUs, for which current approach would be an overkill.

Another approach could introduce a new concept, alongside BPF map, to
represent generic "container" object, which doesn't necessarily have key/value
interface with lookup/update/delete operations. This approach would add a lot
of extra infrastructure that has to be built for observability and verifier
support. It would also add another concept that BPF developers would have to
familiarize themselves with, new syntax in libbpf, etc. But then would really
provide no additional benefits over the approach of using a map.
BPF_MAP_TYPE_RINGBUF doesn't support lookup/update/delete operations, but so
doesn't few other map types (e.g., queue and stack; array doesn't support
delete, etc).

The approach chosen has an advantage of re-using existing BPF map
infrastructure (introspection APIs in kernel, libbpf support, etc), being
familiar concept (no need to teach users a new type of object in BPF program),
and utilizing existing tooling (bpftool). For common scenario of using
a single ring buffer for all CPUs, it's as simple and straightforward, as
would be with a dedicated "container" object. On the other hand, by being
a map, it can be combined with ARRAY_OF_MAPS and HASH_OF_MAPS map-in-maps to
implement a wide variety of topologies, from one ring buffer for each CPU
(e.g., as a replacement for perf buffer use cases), to a complicated
application hashing/sharding of ring buffers (e.g., having a small pool of
ring buffers with hashed task's tgid being a look up key to preserve order,
but reduce contention).

Key and value sizes are enforced to be zero. max_entries is used to specify
the size of ring buffer and has to be a power of 2 value.

There are a bunch of similarities between perf buffer
(BPF_MAP_TYPE_PERF_EVENT_ARRAY) and new BPF ring buffer semantics:
  - variable-length records;
  - if there is no more space left in ring buffer, reservation fails, no
    blocking;
  - memory-mappable data area for user-space applications for ease of
    consumption and high performance;
  - epoll notifications for new incoming data;
  - but still the ability to do busy polling for new data to achieve the
    lowest latency, if necessary.

BPF ringbuf provides two sets of APIs to BPF programs:
  - bpf_ringbuf_output() allows to *copy* data from one place to a ring
    buffer, similarly to bpf_perf_event_output();
  - bpf_ringbuf_reserve()/bpf_ringbuf_commit()/bpf_ringbuf_discard() APIs
    split the whole process into two steps. First, a fixed amount of space is
    reserved. If successful, a pointer to a data inside ring buffer data area
    is returned, which BPF programs can use similarly to a data inside
    array/hash maps. Once ready, this piece of memory is either committed or
    discarded. Discard is similar to commit, but makes consumer ignore the
    record.

bpf_ringbuf_output() has disadvantage of incurring extra memory copy, because
record has to be prepared in some other place first. But it allows to submit
records of the length that's not known to verifier beforehand. It also closely
matches bpf_perf_event_output(), so will simplify migration significantly.

bpf_ringbuf_reserve() avoids the extra copy of memory by providing a memory
pointer directly to ring buffer memory. In a lot of cases records are larger
than BPF stack space allows, so many programs have use extra per-CPU array as
a temporary heap for preparing sample. bpf_ringbuf_reserve() avoid this needs
completely. But in exchange, it only allows a known constant size of memory to
be reserved, such that verifier can verify that BPF program can't access
memory outside its reserved record space. bpf_ringbuf_output(), while slightly
slower due to extra memory copy, covers some use cases that are not suitable
for bpf_ringbuf_reserve().

The difference between commit and discard is very small. Discard just marks
a record as discarded, and such records are supposed to be ignored by consumer
code. Discard is useful for some advanced use-cases, such as ensuring
all-or-nothing multi-record submission, or emulating temporary malloc()/free()
within single BPF program invocation.

Each reserved record is tracked by verifier through existing
reference-tracking logic, similar to socket ref-tracking. It is thus
impossible to reserve a record, but forget to submit (or discard) it.

bpf_ringbuf_query() helper allows to query various properties of ring buffer.
Currently 4 are supported:
  - BPF_RB_AVAIL_DATA returns amount of unconsumed data in ring buffer;
  - BPF_RB_RING_SIZE returns the size of ring buffer;
  - BPF_RB_CONS_POS/BPF_RB_PROD_POS returns current logical possition of
    consumer/producer, respectively.
Returned values are momentarily snapshots of ring buffer state and could be
off by the time helper returns, so this should be used only for
debugging/reporting reasons or for implementing various heuristics, that take
into account highly-changeable nature of some of those characteristics.

One such heuristic might involve more fine-grained control over poll/epoll
notifications about new data availability in ring buffer. Together with
BPF_RB_NO_WAKEUP/BPF_RB_FORCE_WAKEUP flags for output/commit/discard helpers,
it allows BPF program a high degree of control and, e.g., more efficient
batched notifications. Default self-balancing strategy, though, should be
adequate for most applications and will work reliable and efficiently already.

Design and implementation
-------------------------
This reserve/commit schema allows a natural way for multiple producers, either
on different CPUs or even on the same CPU/in the same BPF program, to reserve
independent records and work with them without blocking other producers. This
means that if BPF program was interruped by another BPF program sharing the
same ring buffer, they will both get a record reserved (provided there is
enough space left) and can work with it and submit it independently. This
applies to NMI context as well, except that due to using a spinlock during
reservation, in NMI context, bpf_ringbuf_reserve() might fail to get a lock,
in which case reservation will fail even if ring buffer is not full.

The ring buffer itself internally is implemented as a power-of-2 sized
circular buffer, with two logical and ever-increasing counters (which might
wrap around on 32-bit architectures, that's not a problem):
  - consumer counter shows up to which logical position consumer consumed the
    data;
  - producer counter denotes amount of data reserved by all producers.

Each time a record is reserved, producer that "owns" the record will
successfully advance producer counter. At that point, data is still not yet
ready to be consumed, though. Each record has 8 byte header, which contains
the length of reserved record, as well as two extra bits: busy bit to denote
that record is still being worked on, and discard bit, which might be set at
commit time if record is discarded. In the latter case, consumer is supposed
to skip the record and move on to the next one. Record header also encodes
record's relative offset from the beginning of ring buffer data area (in
pages). This allows bpf_ringbuf_commit()/bpf_ringbuf_discard() to accept only
the pointer to the record itself, without requiring also the pointer to ring
buffer itself. Ring buffer memory location will be restored from record
metadata header. This significantly simplifies verifier, as well as improving
API usability.

Producer counter increments are serialized under spinlock, so there is
a strict ordering between reservations. Commits, on the other hand, are
completely lockless and independent. All records become available to consumer
in the order of reservations, but only after all previous records where
already committed. It is thus possible for slow producers to temporarily hold
off submitted records, that were reserved later.

Reservation/commit/consumer protocol is verified by litmus tests in
Documentation/litmus-test/bpf-rb.

One interesting implementation bit, that significantly simplifies (and thus
speeds up as well) implementation of both producers and consumers is how data
area is mapped twice contiguously back-to-back in the virtual memory. This
allows to not take any special measures for samples that have to wrap around
at the end of the circular buffer data area, because the next page after the
last data page would be first data page again, and thus the sample will still
appear completely contiguous in virtual memory. See comment and a simple ASCII
diagram showing this visually in bpf_ringbuf_area_alloc().

Another feature that distinguishes BPF ringbuf from perf ring buffer is
a self-pacing notifications of new data being availability.
bpf_ringbuf_commit() implementation will send a notification of new record
being available after commit only if consumer has already caught up right up
to the record being committed. If not, consumer still has to catch up and thus
will see new data anyways without needing an extra poll notification.
Benchmarks (see tools/testing/selftests/bpf/benchs/bench_ringbuf.c) show that
this allows to achieve a very high throughput without having to resort to
tricks like "notify only every Nth sample", which are necessary with perf
buffer. For extreme cases, when BPF program wants more manual control of
notifications, commit/discard/output helpers accept BPF_RB_NO_WAKEUP and
BPF_RB_FORCE_WAKEUP flags, which give full control over notifications of data
availability, but require extra caution and diligence in using this API.

Comparison to alternatives
--------------------------
Before considering implementing BPF ring buffer from scratch existing
alternatives in kernel were evaluated, but didn't seem to meet the needs. They
largely fell into few categores:
  - per-CPU buffers (perf, ftrace, etc), which don't satisfy two motivations
    outlined above (ordering and memory consumption);
  - linked list-based implementations; while some were multi-producer designs,
    consuming these from user-space would be very complicated and most
    probably not performant; memory-mapping contiguous piece of memory is
    simpler and more performant for user-space consumers;
  - io_uring is SPSC, but also requires fixed-sized elements. Naively turning
    SPSC queue into MPSC w/ lock would have subpar performance compared to
    locked reserve + lockless commit, as with BPF ring buffer. Fixed sized
    elements would be too limiting for BPF programs, given existing BPF
    programs heavily rely on variable-sized perf buffer already;
  - specialized implementations (like a new printk ring buffer, [0]) with lots
    of printk-specific limitations and implications, that didn't seem to fit
    well for intended use with BPF programs.

  [0] https://lwn.net/Articles/779550/

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200529075424.3139988-2-andriin@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h                                |  13 +
 include/linux/bpf_types.h                          |   1 +
 include/linux/bpf_verifier.h                       |   4 +
 include/uapi/linux/bpf.h                           |  84 +++-
 kernel/bpf/Makefile                                |   2 +-
 kernel/bpf/helpers.c                               |  10 +
 kernel/bpf/ringbuf.c                               | 501 +++++++++++++++++++++
 kernel/bpf/syscall.c                               |  12 +
 kernel/bpf/verifier.c                              | 195 ++++++--
 kernel/trace/bpf_trace.c                           |  10 +
 tools/include/uapi/linux/bpf.h                     |  84 +++-
 tools/testing/selftests/bpf/verifier/and.c         |   4 +-
 .../testing/selftests/bpf/verifier/array_access.c  |   4 +-
 tools/testing/selftests/bpf/verifier/bounds.c      |   6 +-
 tools/testing/selftests/bpf/verifier/calls.c       |   2 +-
 .../selftests/bpf/verifier/direct_value_access.c   |   4 +-
 .../selftests/bpf/verifier/helper_access_var_len.c |   2 +-
 .../selftests/bpf/verifier/helper_value_access.c   |   6 +-
 .../selftests/bpf/verifier/value_ptr_arith.c       |   8 +-
 19 files changed, 882 insertions(+), 70 deletions(-)
 create mode 100644 kernel/bpf/ringbuf.c

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index efe8836b5c48..e5884f7f801c 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -90,6 +90,8 @@ struct bpf_map_ops {
 	int (*map_direct_value_meta)(const struct bpf_map *map,
 				     u64 imm, u32 *off);
 	int (*map_mmap)(struct bpf_map *map, struct vm_area_struct *vma);
+	__poll_t (*map_poll)(struct bpf_map *map, struct file *filp,
+			     struct poll_table_struct *pts);
 };
 
 struct bpf_map_memory {
@@ -244,6 +246,9 @@ enum bpf_arg_type {
 	ARG_PTR_TO_LONG,	/* pointer to long */
 	ARG_PTR_TO_SOCKET,	/* pointer to bpf_sock (fullsock) */
 	ARG_PTR_TO_BTF_ID,	/* pointer to in-kernel struct */
+	ARG_PTR_TO_ALLOC_MEM,	/* pointer to dynamically allocated memory */
+	ARG_PTR_TO_ALLOC_MEM_OR_NULL,	/* pointer to dynamically allocated memory or NULL */
+	ARG_CONST_ALLOC_SIZE_OR_ZERO,	/* number of allocated bytes requested */
 };
 
 /* type of values returned from helper functions */
@@ -255,6 +260,7 @@ enum bpf_return_type {
 	RET_PTR_TO_SOCKET_OR_NULL,	/* returns a pointer to a socket or NULL */
 	RET_PTR_TO_TCP_SOCK_OR_NULL,	/* returns a pointer to a tcp_sock or NULL */
 	RET_PTR_TO_SOCK_COMMON_OR_NULL,	/* returns a pointer to a sock_common or NULL */
+	RET_PTR_TO_ALLOC_MEM_OR_NULL,	/* returns a pointer to dynamically allocated memory or NULL */
 };
 
 /* eBPF function prototype used by verifier to allow BPF_CALLs from eBPF programs
@@ -322,6 +328,8 @@ enum bpf_reg_type {
 	PTR_TO_XDP_SOCK,	 /* reg points to struct xdp_sock */
 	PTR_TO_BTF_ID,		 /* reg points to kernel struct */
 	PTR_TO_BTF_ID_OR_NULL,	 /* reg points to kernel struct or NULL */
+	PTR_TO_MEM,		 /* reg points to valid memory region */
+	PTR_TO_MEM_OR_NULL,	 /* reg points to valid memory region or NULL */
 };
 
 /* The information passed from prog-specific *_is_valid_access
@@ -1611,6 +1619,11 @@ extern const struct bpf_func_proto bpf_tcp_sock_proto;
 extern const struct bpf_func_proto bpf_jiffies64_proto;
 extern const struct bpf_func_proto bpf_get_ns_current_pid_tgid_proto;
 extern const struct bpf_func_proto bpf_event_output_data_proto;
+extern const struct bpf_func_proto bpf_ringbuf_output_proto;
+extern const struct bpf_func_proto bpf_ringbuf_reserve_proto;
+extern const struct bpf_func_proto bpf_ringbuf_submit_proto;
+extern const struct bpf_func_proto bpf_ringbuf_discard_proto;
+extern const struct bpf_func_proto bpf_ringbuf_query_proto;
 
 const struct bpf_func_proto *bpf_tracing_func_proto(
 	enum bpf_func_id func_id, const struct bpf_prog *prog);
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index 29d22752fc87..fa8e1b552acd 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -118,6 +118,7 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_STACK, stack_map_ops)
 #if defined(CONFIG_BPF_JIT)
 BPF_MAP_TYPE(BPF_MAP_TYPE_STRUCT_OPS, bpf_struct_ops_map_ops)
 #endif
+BPF_MAP_TYPE(BPF_MAP_TYPE_RINGBUF, ringbuf_map_ops)
 
 BPF_LINK_TYPE(BPF_LINK_TYPE_RAW_TRACEPOINT, raw_tracepoint)
 BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing)
diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h
index ea833087e853..ca08db4ffb5f 100644
--- a/include/linux/bpf_verifier.h
+++ b/include/linux/bpf_verifier.h
@@ -54,6 +54,8 @@ struct bpf_reg_state {
 
 		u32 btf_id; /* for PTR_TO_BTF_ID */
 
+		u32 mem_size; /* for PTR_TO_MEM | PTR_TO_MEM_OR_NULL */
+
 		/* Max size from any of the above. */
 		unsigned long raw;
 	};
@@ -63,6 +65,8 @@ struct bpf_reg_state {
 	 * offset, so they can share range knowledge.
 	 * For PTR_TO_MAP_VALUE_OR_NULL this is used to share which map value we
 	 * came from, when one is tested for != NULL.
+	 * For PTR_TO_MEM_OR_NULL this is used to identify memory allocation
+	 * for the purpose of tracking that it's freed.
 	 * For PTR_TO_SOCKET this is used to share which pointers retain the
 	 * same reference to the socket, to determine proper reference freeing.
 	 */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 54b93f8b49b8..974ca6e948e3 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -147,6 +147,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_SK_STORAGE,
 	BPF_MAP_TYPE_DEVMAP_HASH,
 	BPF_MAP_TYPE_STRUCT_OPS,
+	BPF_MAP_TYPE_RINGBUF,
 };
 
 /* Note that tracing related programs such as
@@ -3157,6 +3158,59 @@ union bpf_attr {
  *		**bpf_sk_cgroup_id**\ ().
  *	Return
  *		The id is returned or 0 in case the id could not be retrieved.
+ *
+ * void *bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags)
+ * 	Description
+ * 		Copy *size* bytes from *data* into a ring buffer *ringbuf*.
+ * 		If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of
+ * 		new data availability is sent.
+ * 		IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of
+ * 		new data availability is sent unconditionally.
+ * 	Return
+ * 		0, on success;
+ * 		< 0, on error.
+ *
+ * void *bpf_ringbuf_reserve(void *ringbuf, u64 size, u64 flags)
+ * 	Description
+ * 		Reserve *size* bytes of payload in a ring buffer *ringbuf*.
+ * 	Return
+ * 		Valid pointer with *size* bytes of memory available; NULL,
+ * 		otherwise.
+ *
+ * void bpf_ringbuf_submit(void *data, u64 flags)
+ * 	Description
+ * 		Submit reserved ring buffer sample, pointed to by *data*.
+ * 		If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of
+ * 		new data availability is sent.
+ * 		IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of
+ * 		new data availability is sent unconditionally.
+ * 	Return
+ * 		Nothing. Always succeeds.
+ *
+ * void bpf_ringbuf_discard(void *data, u64 flags)
+ * 	Description
+ * 		Discard reserved ring buffer sample, pointed to by *data*.
+ * 		If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of
+ * 		new data availability is sent.
+ * 		IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of
+ * 		new data availability is sent unconditionally.
+ * 	Return
+ * 		Nothing. Always succeeds.
+ *
+ * u64 bpf_ringbuf_query(void *ringbuf, u64 flags)
+ *	Description
+ *		Query various characteristics of provided ring buffer. What
+ *		exactly is queries is determined by *flags*:
+ *		  - BPF_RB_AVAIL_DATA - amount of data not yet consumed;
+ *		  - BPF_RB_RING_SIZE - the size of ring buffer;
+ *		  - BPF_RB_CONS_POS - consumer position (can wrap around);
+ *		  - BPF_RB_PROD_POS - producer(s) position (can wrap around);
+ *		Data returned is just a momentary snapshots of actual values
+ *		and could be inaccurate, so this facility should be used to
+ *		power heuristics and for reporting, not to make 100% correct
+ *		calculation.
+ *	Return
+ *		Requested value, or 0, if flags are not recognized.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3288,7 +3342,12 @@ union bpf_attr {
 	FN(seq_printf),			\
 	FN(seq_write),			\
 	FN(sk_cgroup_id),		\
-	FN(sk_ancestor_cgroup_id),
+	FN(sk_ancestor_cgroup_id),	\
+	FN(ringbuf_output),		\
+	FN(ringbuf_reserve),		\
+	FN(ringbuf_submit),		\
+	FN(ringbuf_discard),		\
+	FN(ringbuf_query),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -3398,6 +3457,29 @@ enum {
 	BPF_F_GET_BRANCH_RECORDS_SIZE	= (1ULL << 0),
 };
 
+/* BPF_FUNC_bpf_ringbuf_commit, BPF_FUNC_bpf_ringbuf_discard, and
+ * BPF_FUNC_bpf_ringbuf_output flags.
+ */
+enum {
+	BPF_RB_NO_WAKEUP		= (1ULL << 0),
+	BPF_RB_FORCE_WAKEUP		= (1ULL << 1),
+};
+
+/* BPF_FUNC_bpf_ringbuf_query flags */
+enum {
+	BPF_RB_AVAIL_DATA = 0,
+	BPF_RB_RING_SIZE = 1,
+	BPF_RB_CONS_POS = 2,
+	BPF_RB_PROD_POS = 3,
+};
+
+/* BPF ring buffer constants */
+enum {
+	BPF_RINGBUF_BUSY_BIT		= (1U << 31),
+	BPF_RINGBUF_DISCARD_BIT		= (1U << 30),
+	BPF_RINGBUF_HDR_SZ		= 8,
+};
+
 /* Mode for BPF_FUNC_skb_adjust_room helper. */
 enum bpf_adj_room_mode {
 	BPF_ADJ_ROOM_NET,
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 375b933010dd..8fca02f64811 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -4,7 +4,7 @@ CFLAGS_core.o += $(call cc-disable-warning, override-init)
 
 obj-$(CONFIG_BPF_SYSCALL) += syscall.o verifier.o inode.o helpers.o tnum.o bpf_iter.o map_iter.o task_iter.o
 obj-$(CONFIG_BPF_SYSCALL) += hashtab.o arraymap.o percpu_freelist.o bpf_lru_list.o lpm_trie.o map_in_map.o
-obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o
+obj-$(CONFIG_BPF_SYSCALL) += local_storage.o queue_stack_maps.o ringbuf.o
 obj-$(CONFIG_BPF_SYSCALL) += disasm.o
 obj-$(CONFIG_BPF_JIT) += trampoline.o
 obj-$(CONFIG_BPF_SYSCALL) += btf.o
diff --git a/kernel/bpf/helpers.c b/kernel/bpf/helpers.c
index bb4fb634275e..be43ab3e619f 100644
--- a/kernel/bpf/helpers.c
+++ b/kernel/bpf/helpers.c
@@ -635,6 +635,16 @@ bpf_base_func_proto(enum bpf_func_id func_id)
 		return &bpf_ktime_get_ns_proto;
 	case BPF_FUNC_ktime_get_boot_ns:
 		return &bpf_ktime_get_boot_ns_proto;
+	case BPF_FUNC_ringbuf_output:
+		return &bpf_ringbuf_output_proto;
+	case BPF_FUNC_ringbuf_reserve:
+		return &bpf_ringbuf_reserve_proto;
+	case BPF_FUNC_ringbuf_submit:
+		return &bpf_ringbuf_submit_proto;
+	case BPF_FUNC_ringbuf_discard:
+		return &bpf_ringbuf_discard_proto;
+	case BPF_FUNC_ringbuf_query:
+		return &bpf_ringbuf_query_proto;
 	default:
 		break;
 	}
diff --git a/kernel/bpf/ringbuf.c b/kernel/bpf/ringbuf.c
new file mode 100644
index 000000000000..180414bb0d3e
--- /dev/null
+++ b/kernel/bpf/ringbuf.c
@@ -0,0 +1,501 @@
+#include <linux/bpf.h>
+#include <linux/btf.h>
+#include <linux/err.h>
+#include <linux/irq_work.h>
+#include <linux/slab.h>
+#include <linux/filter.h>
+#include <linux/mm.h>
+#include <linux/vmalloc.h>
+#include <linux/wait.h>
+#include <linux/poll.h>
+#include <uapi/linux/btf.h>
+
+#define RINGBUF_CREATE_FLAG_MASK (BPF_F_NUMA_NODE)
+
+/* non-mmap()'able part of bpf_ringbuf (everything up to consumer page) */
+#define RINGBUF_PGOFF \
+	(offsetof(struct bpf_ringbuf, consumer_pos) >> PAGE_SHIFT)
+/* consumer page and producer page */
+#define RINGBUF_POS_PAGES 2
+
+#define RINGBUF_MAX_RECORD_SZ (UINT_MAX/4)
+
+/* Maximum size of ring buffer area is limited by 32-bit page offset within
+ * record header, counted in pages. Reserve 8 bits for extensibility, and take
+ * into account few extra pages for consumer/producer pages and
+ * non-mmap()'able parts. This gives 64GB limit, which seems plenty for single
+ * ring buffer.
+ */
+#define RINGBUF_MAX_DATA_SZ \
+	(((1ULL << 24) - RINGBUF_POS_PAGES - RINGBUF_PGOFF) * PAGE_SIZE)
+
+struct bpf_ringbuf {
+	wait_queue_head_t waitq;
+	struct irq_work work;
+	u64 mask;
+	struct page **pages;
+	int nr_pages;
+	spinlock_t spinlock ____cacheline_aligned_in_smp;
+	/* Consumer and producer counters are put into separate pages to allow
+	 * mapping consumer page as r/w, but restrict producer page to r/o.
+	 * This protects producer position from being modified by user-space
+	 * application and ruining in-kernel position tracking.
+	 */
+	unsigned long consumer_pos __aligned(PAGE_SIZE);
+	unsigned long producer_pos __aligned(PAGE_SIZE);
+	char data[] __aligned(PAGE_SIZE);
+};
+
+struct bpf_ringbuf_map {
+	struct bpf_map map;
+	struct bpf_map_memory memory;
+	struct bpf_ringbuf *rb;
+};
+
+/* 8-byte ring buffer record header structure */
+struct bpf_ringbuf_hdr {
+	u32 len;
+	u32 pg_off;
+};
+
+static struct bpf_ringbuf *bpf_ringbuf_area_alloc(size_t data_sz, int numa_node)
+{
+	const gfp_t flags = GFP_KERNEL | __GFP_RETRY_MAYFAIL | __GFP_NOWARN |
+			    __GFP_ZERO;
+	int nr_meta_pages = RINGBUF_PGOFF + RINGBUF_POS_PAGES;
+	int nr_data_pages = data_sz >> PAGE_SHIFT;
+	int nr_pages = nr_meta_pages + nr_data_pages;
+	struct page **pages, *page;
+	struct bpf_ringbuf *rb;
+	size_t array_size;
+	int i;
+
+	/* Each data page is mapped twice to allow "virtual"
+	 * continuous read of samples wrapping around the end of ring
+	 * buffer area:
+	 * ------------------------------------------------------
+	 * | meta pages |  real data pages  |  same data pages  |
+	 * ------------------------------------------------------
+	 * |            | 1 2 3 4 5 6 7 8 9 | 1 2 3 4 5 6 7 8 9 |
+	 * ------------------------------------------------------
+	 * |            | TA             DA | TA             DA |
+	 * ------------------------------------------------------
+	 *                               ^^^^^^^
+	 *                                  |
+	 * Here, no need to worry about special handling of wrapped-around
+	 * data due to double-mapped data pages. This works both in kernel and
+	 * when mmap()'ed in user-space, simplifying both kernel and
+	 * user-space implementations significantly.
+	 */
+	array_size = (nr_meta_pages + 2 * nr_data_pages) * sizeof(*pages);
+	if (array_size > PAGE_SIZE)
+		pages = vmalloc_node(array_size, numa_node);
+	else
+		pages = kmalloc_node(array_size, flags, numa_node);
+	if (!pages)
+		return NULL;
+
+	for (i = 0; i < nr_pages; i++) {
+		page = alloc_pages_node(numa_node, flags, 0);
+		if (!page) {
+			nr_pages = i;
+			goto err_free_pages;
+		}
+		pages[i] = page;
+		if (i >= nr_meta_pages)
+			pages[nr_data_pages + i] = page;
+	}
+
+	rb = vmap(pages, nr_meta_pages + 2 * nr_data_pages,
+		  VM_ALLOC | VM_USERMAP, PAGE_KERNEL);
+	if (rb) {
+		rb->pages = pages;
+		rb->nr_pages = nr_pages;
+		return rb;
+	}
+
+err_free_pages:
+	for (i = 0; i < nr_pages; i++)
+		__free_page(pages[i]);
+	kvfree(pages);
+	return NULL;
+}
+
+static void bpf_ringbuf_notify(struct irq_work *work)
+{
+	struct bpf_ringbuf *rb = container_of(work, struct bpf_ringbuf, work);
+
+	wake_up_all(&rb->waitq);
+}
+
+static struct bpf_ringbuf *bpf_ringbuf_alloc(size_t data_sz, int numa_node)
+{
+	struct bpf_ringbuf *rb;
+
+	if (!data_sz || !PAGE_ALIGNED(data_sz))
+		return ERR_PTR(-EINVAL);
+
+#ifdef CONFIG_64BIT
+	/* on 32-bit arch, it's impossible to overflow record's hdr->pgoff */
+	if (data_sz > RINGBUF_MAX_DATA_SZ)
+		return ERR_PTR(-E2BIG);
+#endif
+
+	rb = bpf_ringbuf_area_alloc(data_sz, numa_node);
+	if (!rb)
+		return ERR_PTR(-ENOMEM);
+
+	spin_lock_init(&rb->spinlock);
+	init_waitqueue_head(&rb->waitq);
+	init_irq_work(&rb->work, bpf_ringbuf_notify);
+
+	rb->mask = data_sz - 1;
+	rb->consumer_pos = 0;
+	rb->producer_pos = 0;
+
+	return rb;
+}
+
+static struct bpf_map *ringbuf_map_alloc(union bpf_attr *attr)
+{
+	struct bpf_ringbuf_map *rb_map;
+	u64 cost;
+	int err;
+
+	if (attr->map_flags & ~RINGBUF_CREATE_FLAG_MASK)
+		return ERR_PTR(-EINVAL);
+
+	if (attr->key_size || attr->value_size ||
+	    attr->max_entries == 0 || !PAGE_ALIGNED(attr->max_entries))
+		return ERR_PTR(-EINVAL);
+
+	rb_map = kzalloc(sizeof(*rb_map), GFP_USER);
+	if (!rb_map)
+		return ERR_PTR(-ENOMEM);
+
+	bpf_map_init_from_attr(&rb_map->map, attr);
+
+	cost = sizeof(struct bpf_ringbuf_map) +
+	       sizeof(struct bpf_ringbuf) +
+	       attr->max_entries;
+	err = bpf_map_charge_init(&rb_map->map.memory, cost);
+	if (err)
+		goto err_free_map;
+
+	rb_map->rb = bpf_ringbuf_alloc(attr->max_entries, rb_map->map.numa_node);
+	if (IS_ERR(rb_map->rb)) {
+		err = PTR_ERR(rb_map->rb);
+		goto err_uncharge;
+	}
+
+	return &rb_map->map;
+
+err_uncharge:
+	bpf_map_charge_finish(&rb_map->map.memory);
+err_free_map:
+	kfree(rb_map);
+	return ERR_PTR(err);
+}
+
+static void bpf_ringbuf_free(struct bpf_ringbuf *rb)
+{
+	/* copy pages pointer and nr_pages to local variable, as we are going
+	 * to unmap rb itself with vunmap() below
+	 */
+	struct page **pages = rb->pages;
+	int i, nr_pages = rb->nr_pages;
+
+	vunmap(rb);
+	for (i = 0; i < nr_pages; i++)
+		__free_page(pages[i]);
+	kvfree(pages);
+}
+
+static void ringbuf_map_free(struct bpf_map *map)
+{
+	struct bpf_ringbuf_map *rb_map;
+
+	/* at this point bpf_prog->aux->refcnt == 0 and this map->refcnt == 0,
+	 * so the programs (can be more than one that used this map) were
+	 * disconnected from events. Wait for outstanding critical sections in
+	 * these programs to complete
+	 */
+	synchronize_rcu();
+
+	rb_map = container_of(map, struct bpf_ringbuf_map, map);
+	bpf_ringbuf_free(rb_map->rb);
+	kfree(rb_map);
+}
+
+static void *ringbuf_map_lookup_elem(struct bpf_map *map, void *key)
+{
+	return ERR_PTR(-ENOTSUPP);
+}
+
+static int ringbuf_map_update_elem(struct bpf_map *map, void *key, void *value,
+				   u64 flags)
+{
+	return -ENOTSUPP;
+}
+
+static int ringbuf_map_delete_elem(struct bpf_map *map, void *key)
+{
+	return -ENOTSUPP;
+}
+
+static int ringbuf_map_get_next_key(struct bpf_map *map, void *key,
+				    void *next_key)
+{
+	return -ENOTSUPP;
+}
+
+static size_t bpf_ringbuf_mmap_page_cnt(const struct bpf_ringbuf *rb)
+{
+	size_t data_pages = (rb->mask + 1) >> PAGE_SHIFT;
+
+	/* consumer page + producer page + 2 x data pages */
+	return RINGBUF_POS_PAGES + 2 * data_pages;
+}
+
+static int ringbuf_map_mmap(struct bpf_map *map, struct vm_area_struct *vma)
+{
+	struct bpf_ringbuf_map *rb_map;
+	size_t mmap_sz;
+
+	rb_map = container_of(map, struct bpf_ringbuf_map, map);
+	mmap_sz = bpf_ringbuf_mmap_page_cnt(rb_map->rb) << PAGE_SHIFT;
+
+	if (vma->vm_pgoff * PAGE_SIZE + (vma->vm_end - vma->vm_start) > mmap_sz)
+		return -EINVAL;
+
+	return remap_vmalloc_range(vma, rb_map->rb,
+				   vma->vm_pgoff + RINGBUF_PGOFF);
+}
+
+static unsigned long ringbuf_avail_data_sz(struct bpf_ringbuf *rb)
+{
+	unsigned long cons_pos, prod_pos;
+
+	cons_pos = smp_load_acquire(&rb->consumer_pos);
+	prod_pos = smp_load_acquire(&rb->producer_pos);
+	return prod_pos - cons_pos;
+}
+
+static __poll_t ringbuf_map_poll(struct bpf_map *map, struct file *filp,
+				 struct poll_table_struct *pts)
+{
+	struct bpf_ringbuf_map *rb_map;
+
+	rb_map = container_of(map, struct bpf_ringbuf_map, map);
+	poll_wait(filp, &rb_map->rb->waitq, pts);
+
+	if (ringbuf_avail_data_sz(rb_map->rb))
+		return EPOLLIN | EPOLLRDNORM;
+	return 0;
+}
+
+const struct bpf_map_ops ringbuf_map_ops = {
+	.map_alloc = ringbuf_map_alloc,
+	.map_free = ringbuf_map_free,
+	.map_mmap = ringbuf_map_mmap,
+	.map_poll = ringbuf_map_poll,
+	.map_lookup_elem = ringbuf_map_lookup_elem,
+	.map_update_elem = ringbuf_map_update_elem,
+	.map_delete_elem = ringbuf_map_delete_elem,
+	.map_get_next_key = ringbuf_map_get_next_key,
+};
+
+/* Given pointer to ring buffer record metadata and struct bpf_ringbuf itself,
+ * calculate offset from record metadata to ring buffer in pages, rounded
+ * down. This page offset is stored as part of record metadata and allows to
+ * restore struct bpf_ringbuf * from record pointer. This page offset is
+ * stored at offset 4 of record metadata header.
+ */
+static size_t bpf_ringbuf_rec_pg_off(struct bpf_ringbuf *rb,
+				     struct bpf_ringbuf_hdr *hdr)
+{
+	return ((void *)hdr - (void *)rb) >> PAGE_SHIFT;
+}
+
+/* Given pointer to ring buffer record header, restore pointer to struct
+ * bpf_ringbuf itself by using page offset stored at offset 4
+ */
+static struct bpf_ringbuf *
+bpf_ringbuf_restore_from_rec(struct bpf_ringbuf_hdr *hdr)
+{
+	unsigned long addr = (unsigned long)(void *)hdr;
+	unsigned long off = (unsigned long)hdr->pg_off << PAGE_SHIFT;
+
+	return (void*)((addr & PAGE_MASK) - off);
+}
+
+static void *__bpf_ringbuf_reserve(struct bpf_ringbuf *rb, u64 size)
+{
+	unsigned long cons_pos, prod_pos, new_prod_pos, flags;
+	u32 len, pg_off;
+	struct bpf_ringbuf_hdr *hdr;
+
+	if (unlikely(size > RINGBUF_MAX_RECORD_SZ))
+		return NULL;
+
+	len = round_up(size + BPF_RINGBUF_HDR_SZ, 8);
+	cons_pos = smp_load_acquire(&rb->consumer_pos);
+
+	if (in_nmi()) {
+		if (!spin_trylock_irqsave(&rb->spinlock, flags))
+			return NULL;
+	} else {
+		spin_lock_irqsave(&rb->spinlock, flags);
+	}
+
+	prod_pos = rb->producer_pos;
+	new_prod_pos = prod_pos + len;
+
+	/* check for out of ringbuf space by ensuring producer position
+	 * doesn't advance more than (ringbuf_size - 1) ahead
+	 */
+	if (new_prod_pos - cons_pos > rb->mask) {
+		spin_unlock_irqrestore(&rb->spinlock, flags);
+		return NULL;
+	}
+
+	hdr = (void *)rb->data + (prod_pos & rb->mask);
+	pg_off = bpf_ringbuf_rec_pg_off(rb, hdr);
+	hdr->len = size | BPF_RINGBUF_BUSY_BIT;
+	hdr->pg_off = pg_off;
+
+	/* pairs with consumer's smp_load_acquire() */
+	smp_store_release(&rb->producer_pos, new_prod_pos);
+
+	spin_unlock_irqrestore(&rb->spinlock, flags);
+
+	return (void *)hdr + BPF_RINGBUF_HDR_SZ;
+}
+
+BPF_CALL_3(bpf_ringbuf_reserve, struct bpf_map *, map, u64, size, u64, flags)
+{
+	struct bpf_ringbuf_map *rb_map;
+
+	if (unlikely(flags))
+		return 0;
+
+	rb_map = container_of(map, struct bpf_ringbuf_map, map);
+	return (unsigned long)__bpf_ringbuf_reserve(rb_map->rb, size);
+}
+
+const struct bpf_func_proto bpf_ringbuf_reserve_proto = {
+	.func		= bpf_ringbuf_reserve,
+	.ret_type	= RET_PTR_TO_ALLOC_MEM_OR_NULL,
+	.arg1_type	= ARG_CONST_MAP_PTR,
+	.arg2_type	= ARG_CONST_ALLOC_SIZE_OR_ZERO,
+	.arg3_type	= ARG_ANYTHING,
+};
+
+static void bpf_ringbuf_commit(void *sample, u64 flags, bool discard)
+{
+	unsigned long rec_pos, cons_pos;
+	struct bpf_ringbuf_hdr *hdr;
+	struct bpf_ringbuf *rb;
+	u32 new_len;
+
+	hdr = sample - BPF_RINGBUF_HDR_SZ;
+	rb = bpf_ringbuf_restore_from_rec(hdr);
+	new_len = hdr->len ^ BPF_RINGBUF_BUSY_BIT;
+	if (discard)
+		new_len |= BPF_RINGBUF_DISCARD_BIT;
+
+	/* update record header with correct final size prefix */
+	xchg(&hdr->len, new_len);
+
+	/* if consumer caught up and is waiting for our record, notify about
+	 * new data availability
+	 */
+	rec_pos = (void *)hdr - (void *)rb->data;
+	cons_pos = smp_load_acquire(&rb->consumer_pos) & rb->mask;
+
+	if (flags & BPF_RB_FORCE_WAKEUP)
+		irq_work_queue(&rb->work);
+	else if (cons_pos == rec_pos && !(flags & BPF_RB_NO_WAKEUP))
+		irq_work_queue(&rb->work);
+}
+
+BPF_CALL_2(bpf_ringbuf_submit, void *, sample, u64, flags)
+{
+	bpf_ringbuf_commit(sample, flags, false /* discard */);
+	return 0;
+}
+
+const struct bpf_func_proto bpf_ringbuf_submit_proto = {
+	.func		= bpf_ringbuf_submit,
+	.ret_type	= RET_VOID,
+	.arg1_type	= ARG_PTR_TO_ALLOC_MEM,
+	.arg2_type	= ARG_ANYTHING,
+};
+
+BPF_CALL_2(bpf_ringbuf_discard, void *, sample, u64, flags)
+{
+	bpf_ringbuf_commit(sample, flags, true /* discard */);
+	return 0;
+}
+
+const struct bpf_func_proto bpf_ringbuf_discard_proto = {
+	.func		= bpf_ringbuf_discard,
+	.ret_type	= RET_VOID,
+	.arg1_type	= ARG_PTR_TO_ALLOC_MEM,
+	.arg2_type	= ARG_ANYTHING,
+};
+
+BPF_CALL_4(bpf_ringbuf_output, struct bpf_map *, map, void *, data, u64, size,
+	   u64, flags)
+{
+	struct bpf_ringbuf_map *rb_map;
+	void *rec;
+
+	if (unlikely(flags & ~(BPF_RB_NO_WAKEUP | BPF_RB_FORCE_WAKEUP)))
+		return -EINVAL;
+
+	rb_map = container_of(map, struct bpf_ringbuf_map, map);
+	rec = __bpf_ringbuf_reserve(rb_map->rb, size);
+	if (!rec)
+		return -EAGAIN;
+
+	memcpy(rec, data, size);
+	bpf_ringbuf_commit(rec, flags, false /* discard */);
+	return 0;
+}
+
+const struct bpf_func_proto bpf_ringbuf_output_proto = {
+	.func		= bpf_ringbuf_output,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_CONST_MAP_PTR,
+	.arg2_type	= ARG_PTR_TO_MEM,
+	.arg3_type	= ARG_CONST_SIZE_OR_ZERO,
+	.arg4_type	= ARG_ANYTHING,
+};
+
+BPF_CALL_2(bpf_ringbuf_query, struct bpf_map *, map, u64, flags)
+{
+	struct bpf_ringbuf *rb;
+
+	rb = container_of(map, struct bpf_ringbuf_map, map)->rb;
+
+	switch (flags) {
+	case BPF_RB_AVAIL_DATA:
+		return ringbuf_avail_data_sz(rb);
+	case BPF_RB_RING_SIZE:
+		return rb->mask + 1;
+	case BPF_RB_CONS_POS:
+		return smp_load_acquire(&rb->consumer_pos);
+	case BPF_RB_PROD_POS:
+		return smp_load_acquire(&rb->producer_pos);
+	default:
+		return 0;
+	}
+}
+
+const struct bpf_func_proto bpf_ringbuf_query_proto = {
+	.func		= bpf_ringbuf_query,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_CONST_MAP_PTR,
+	.arg2_type	= ARG_ANYTHING,
+};
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 2c969a9b90d3..9de3540fa90c 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -26,6 +26,7 @@
 #include <linux/audit.h>
 #include <uapi/linux/btf.h>
 #include <linux/bpf_lsm.h>
+#include <linux/poll.h>
 
 #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
 			  (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
@@ -662,6 +663,16 @@ out:
 	return err;
 }
 
+static __poll_t bpf_map_poll(struct file *filp, struct poll_table_struct *pts)
+{
+	struct bpf_map *map = filp->private_data;
+
+	if (map->ops->map_poll)
+		return map->ops->map_poll(map, filp, pts);
+
+	return EPOLLERR;
+}
+
 const struct file_operations bpf_map_fops = {
 #ifdef CONFIG_PROC_FS
 	.show_fdinfo	= bpf_map_show_fdinfo,
@@ -670,6 +681,7 @@ const struct file_operations bpf_map_fops = {
 	.read		= bpf_dummy_read,
 	.write		= bpf_dummy_write,
 	.mmap		= bpf_map_mmap,
+	.poll		= bpf_map_poll,
 };
 
 int bpf_map_new_fd(struct bpf_map *map, int flags)
diff --git a/kernel/bpf/verifier.c b/kernel/bpf/verifier.c
index 6d725a26f66e..5c7bbaac81ef 100644
--- a/kernel/bpf/verifier.c
+++ b/kernel/bpf/verifier.c
@@ -233,6 +233,7 @@ struct bpf_call_arg_meta {
 	bool pkt_access;
 	int regno;
 	int access_size;
+	int mem_size;
 	u64 msize_max_value;
 	int ref_obj_id;
 	int func_id;
@@ -408,7 +409,8 @@ static bool reg_type_may_be_null(enum bpf_reg_type type)
 	       type == PTR_TO_SOCKET_OR_NULL ||
 	       type == PTR_TO_SOCK_COMMON_OR_NULL ||
 	       type == PTR_TO_TCP_SOCK_OR_NULL ||
-	       type == PTR_TO_BTF_ID_OR_NULL;
+	       type == PTR_TO_BTF_ID_OR_NULL ||
+	       type == PTR_TO_MEM_OR_NULL;
 }
 
 static bool reg_may_point_to_spin_lock(const struct bpf_reg_state *reg)
@@ -422,7 +424,9 @@ static bool reg_type_may_be_refcounted_or_null(enum bpf_reg_type type)
 	return type == PTR_TO_SOCKET ||
 		type == PTR_TO_SOCKET_OR_NULL ||
 		type == PTR_TO_TCP_SOCK ||
-		type == PTR_TO_TCP_SOCK_OR_NULL;
+		type == PTR_TO_TCP_SOCK_OR_NULL ||
+		type == PTR_TO_MEM ||
+		type == PTR_TO_MEM_OR_NULL;
 }
 
 static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
@@ -436,7 +440,9 @@ static bool arg_type_may_be_refcounted(enum bpf_arg_type type)
  */
 static bool is_release_function(enum bpf_func_id func_id)
 {
-	return func_id == BPF_FUNC_sk_release;
+	return func_id == BPF_FUNC_sk_release ||
+	       func_id == BPF_FUNC_ringbuf_submit ||
+	       func_id == BPF_FUNC_ringbuf_discard;
 }
 
 static bool may_be_acquire_function(enum bpf_func_id func_id)
@@ -444,7 +450,8 @@ static bool may_be_acquire_function(enum bpf_func_id func_id)
 	return func_id == BPF_FUNC_sk_lookup_tcp ||
 		func_id == BPF_FUNC_sk_lookup_udp ||
 		func_id == BPF_FUNC_skc_lookup_tcp ||
-		func_id == BPF_FUNC_map_lookup_elem;
+		func_id == BPF_FUNC_map_lookup_elem ||
+	        func_id == BPF_FUNC_ringbuf_reserve;
 }
 
 static bool is_acquire_function(enum bpf_func_id func_id,
@@ -454,7 +461,8 @@ static bool is_acquire_function(enum bpf_func_id func_id,
 
 	if (func_id == BPF_FUNC_sk_lookup_tcp ||
 	    func_id == BPF_FUNC_sk_lookup_udp ||
-	    func_id == BPF_FUNC_skc_lookup_tcp)
+	    func_id == BPF_FUNC_skc_lookup_tcp ||
+	    func_id == BPF_FUNC_ringbuf_reserve)
 		return true;
 
 	if (func_id == BPF_FUNC_map_lookup_elem &&
@@ -494,6 +502,8 @@ static const char * const reg_type_str[] = {
 	[PTR_TO_XDP_SOCK]	= "xdp_sock",
 	[PTR_TO_BTF_ID]		= "ptr_",
 	[PTR_TO_BTF_ID_OR_NULL]	= "ptr_or_null_",
+	[PTR_TO_MEM]		= "mem",
+	[PTR_TO_MEM_OR_NULL]	= "mem_or_null",
 };
 
 static char slot_type_char[] = {
@@ -2468,32 +2478,49 @@ static int check_map_access_type(struct bpf_verifier_env *env, u32 regno,
 	return 0;
 }
 
-/* check read/write into map element returned by bpf_map_lookup_elem() */
-static int __check_map_access(struct bpf_verifier_env *env, u32 regno, int off,
-			      int size, bool zero_size_allowed)
+/* check read/write into memory region (e.g., map value, ringbuf sample, etc) */
+static int __check_mem_access(struct bpf_verifier_env *env, int regno,
+			      int off, int size, u32 mem_size,
+			      bool zero_size_allowed)
 {
-	struct bpf_reg_state *regs = cur_regs(env);
-	struct bpf_map *map = regs[regno].map_ptr;
+	bool size_ok = size > 0 || (size == 0 && zero_size_allowed);
+	struct bpf_reg_state *reg;
+
+	if (off >= 0 && size_ok && (u64)off + size <= mem_size)
+		return 0;
 
-	if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) ||
-	    off + size > map->value_size) {
+	reg = &cur_regs(env)[regno];
+	switch (reg->type) {
+	case PTR_TO_MAP_VALUE:
 		verbose(env, "invalid access to map value, value_size=%d off=%d size=%d\n",
-			map->value_size, off, size);
-		return -EACCES;
+			mem_size, off, size);
+		break;
+	case PTR_TO_PACKET:
+	case PTR_TO_PACKET_META:
+	case PTR_TO_PACKET_END:
+		verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
+			off, size, regno, reg->id, off, mem_size);
+		break;
+	case PTR_TO_MEM:
+	default:
+		verbose(env, "invalid access to memory, mem_size=%u off=%d size=%d\n",
+			mem_size, off, size);
 	}
-	return 0;
+
+	return -EACCES;
 }
 
-/* check read/write into a map element with possible variable offset */
-static int check_map_access(struct bpf_verifier_env *env, u32 regno,
-			    int off, int size, bool zero_size_allowed)
+/* check read/write into a memory region with possible variable offset */
+static int check_mem_region_access(struct bpf_verifier_env *env, u32 regno,
+				   int off, int size, u32 mem_size,
+				   bool zero_size_allowed)
 {
 	struct bpf_verifier_state *vstate = env->cur_state;
 	struct bpf_func_state *state = vstate->frame[vstate->curframe];
 	struct bpf_reg_state *reg = &state->regs[regno];
 	int err;
 
-	/* We may have adjusted the register to this map value, so we
+	/* We may have adjusted the register pointing to memory region, so we
 	 * need to try adding each of min_value and max_value to off
 	 * to make sure our theoretical access will be safe.
 	 */
@@ -2514,10 +2541,10 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
 			regno);
 		return -EACCES;
 	}
-	err = __check_map_access(env, regno, reg->smin_value + off, size,
-				 zero_size_allowed);
+	err = __check_mem_access(env, regno, reg->smin_value + off, size,
+				 mem_size, zero_size_allowed);
 	if (err) {
-		verbose(env, "R%d min value is outside of the array range\n",
+		verbose(env, "R%d min value is outside of the allowed memory range\n",
 			regno);
 		return err;
 	}
@@ -2527,18 +2554,38 @@ static int check_map_access(struct bpf_verifier_env *env, u32 regno,
 	 * If reg->umax_value + off could overflow, treat that as unbounded too.
 	 */
 	if (reg->umax_value >= BPF_MAX_VAR_OFF) {
-		verbose(env, "R%d unbounded memory access, make sure to bounds check any array access into a map\n",
+		verbose(env, "R%d unbounded memory access, make sure to bounds check any such access\n",
 			regno);
 		return -EACCES;
 	}
-	err = __check_map_access(env, regno, reg->umax_value + off, size,
-				 zero_size_allowed);
-	if (err)
-		verbose(env, "R%d max value is outside of the array range\n",
+	err = __check_mem_access(env, regno, reg->umax_value + off, size,
+				 mem_size, zero_size_allowed);
+	if (err) {
+		verbose(env, "R%d max value is outside of the allowed memory range\n",
 			regno);
+		return err;
+	}
+
+	return 0;
+}
 
-	if (map_value_has_spin_lock(reg->map_ptr)) {
-		u32 lock = reg->map_ptr->spin_lock_off;
+/* check read/write into a map element with possible variable offset */
+static int check_map_access(struct bpf_verifier_env *env, u32 regno,
+			    int off, int size, bool zero_size_allowed)
+{
+	struct bpf_verifier_state *vstate = env->cur_state;
+	struct bpf_func_state *state = vstate->frame[vstate->curframe];
+	struct bpf_reg_state *reg = &state->regs[regno];
+	struct bpf_map *map = reg->map_ptr;
+	int err;
+
+	err = check_mem_region_access(env, regno, off, size, map->value_size,
+				      zero_size_allowed);
+	if (err)
+		return err;
+
+	if (map_value_has_spin_lock(map)) {
+		u32 lock = map->spin_lock_off;
 
 		/* if any part of struct bpf_spin_lock can be touched by
 		 * load/store reject this program.
@@ -2596,21 +2643,6 @@ static bool may_access_direct_pkt_data(struct bpf_verifier_env *env,
 	}
 }
 
-static int __check_packet_access(struct bpf_verifier_env *env, u32 regno,
-				 int off, int size, bool zero_size_allowed)
-{
-	struct bpf_reg_state *regs = cur_regs(env);
-	struct bpf_reg_state *reg = &regs[regno];
-
-	if (off < 0 || size < 0 || (size == 0 && !zero_size_allowed) ||
-	    (u64)off + size > reg->range) {
-		verbose(env, "invalid access to packet, off=%d size=%d, R%d(id=%d,off=%d,r=%d)\n",
-			off, size, regno, reg->id, reg->off, reg->range);
-		return -EACCES;
-	}
-	return 0;
-}
-
 static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
 			       int size, bool zero_size_allowed)
 {
@@ -2631,16 +2663,17 @@ static int check_packet_access(struct bpf_verifier_env *env, u32 regno, int off,
 			regno);
 		return -EACCES;
 	}
-	err = __check_packet_access(env, regno, off, size, zero_size_allowed);
+	err = __check_mem_access(env, regno, off, size, reg->range,
+				 zero_size_allowed);
 	if (err) {
 		verbose(env, "R%d offset is outside of the packet\n", regno);
 		return err;
 	}
 
-	/* __check_packet_access has made sure "off + size - 1" is within u16.
+	/* __check_mem_access has made sure "off + size - 1" is within u16.
 	 * reg->umax_value can't be bigger than MAX_PACKET_OFF which is 0xffff,
 	 * otherwise find_good_pkt_pointers would have refused to set range info
-	 * that __check_packet_access would have rejected this pkt access.
+	 * that __check_mem_access would have rejected this pkt access.
 	 * Therefore, "off + reg->umax_value + size - 1" won't overflow u32.
 	 */
 	env->prog->aux->max_pkt_offset =
@@ -3220,6 +3253,16 @@ static int check_mem_access(struct bpf_verifier_env *env, int insn_idx, u32 regn
 				mark_reg_unknown(env, regs, value_regno);
 			}
 		}
+	} else if (reg->type == PTR_TO_MEM) {
+		if (t == BPF_WRITE && value_regno >= 0 &&
+		    is_pointer_value(env, value_regno)) {
+			verbose(env, "R%d leaks addr into mem\n", value_regno);
+			return -EACCES;
+		}
+		err = check_mem_region_access(env, regno, off, size,
+					      reg->mem_size, false);
+		if (!err && t == BPF_READ && value_regno >= 0)
+			mark_reg_unknown(env, regs, value_regno);
 	} else if (reg->type == PTR_TO_CTX) {
 		enum bpf_reg_type reg_type = SCALAR_VALUE;
 		u32 btf_id = 0;
@@ -3557,6 +3600,10 @@ static int check_helper_mem_access(struct bpf_verifier_env *env, int regno,
 			return -EACCES;
 		return check_map_access(env, regno, reg->off, access_size,
 					zero_size_allowed);
+	case PTR_TO_MEM:
+		return check_mem_region_access(env, regno, reg->off,
+					       access_size, reg->mem_size,
+					       zero_size_allowed);
 	default: /* scalar_value|ptr_to_stack or invalid ptr */
 		return check_stack_boundary(env, regno, access_size,
 					    zero_size_allowed, meta);
@@ -3661,6 +3708,17 @@ static bool arg_type_is_mem_size(enum bpf_arg_type type)
 	       type == ARG_CONST_SIZE_OR_ZERO;
 }
 
+static bool arg_type_is_alloc_mem_ptr(enum bpf_arg_type type)
+{
+	return type == ARG_PTR_TO_ALLOC_MEM ||
+	       type == ARG_PTR_TO_ALLOC_MEM_OR_NULL;
+}
+
+static bool arg_type_is_alloc_size(enum bpf_arg_type type)
+{
+	return type == ARG_CONST_ALLOC_SIZE_OR_ZERO;
+}
+
 static bool arg_type_is_int_ptr(enum bpf_arg_type type)
 {
 	return type == ARG_PTR_TO_INT ||
@@ -3720,7 +3778,8 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 			 type != expected_type)
 			goto err_type;
 	} else if (arg_type == ARG_CONST_SIZE ||
-		   arg_type == ARG_CONST_SIZE_OR_ZERO) {
+		   arg_type == ARG_CONST_SIZE_OR_ZERO ||
+		   arg_type == ARG_CONST_ALLOC_SIZE_OR_ZERO) {
 		expected_type = SCALAR_VALUE;
 		if (type != expected_type)
 			goto err_type;
@@ -3791,13 +3850,29 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 		 * happens during stack boundary checking.
 		 */
 		if (register_is_null(reg) &&
-		    arg_type == ARG_PTR_TO_MEM_OR_NULL)
+		    (arg_type == ARG_PTR_TO_MEM_OR_NULL ||
+		     arg_type == ARG_PTR_TO_ALLOC_MEM_OR_NULL))
 			/* final test in check_stack_boundary() */;
 		else if (!type_is_pkt_pointer(type) &&
 			 type != PTR_TO_MAP_VALUE &&
+			 type != PTR_TO_MEM &&
 			 type != expected_type)
 			goto err_type;
 		meta->raw_mode = arg_type == ARG_PTR_TO_UNINIT_MEM;
+	} else if (arg_type_is_alloc_mem_ptr(arg_type)) {
+		expected_type = PTR_TO_MEM;
+		if (register_is_null(reg) &&
+		    arg_type == ARG_PTR_TO_ALLOC_MEM_OR_NULL)
+			/* final test in check_stack_boundary() */;
+		else if (type != expected_type)
+			goto err_type;
+		if (meta->ref_obj_id) {
+			verbose(env, "verifier internal error: more than one arg with ref_obj_id R%d %u %u\n",
+				regno, reg->ref_obj_id,
+				meta->ref_obj_id);
+			return -EFAULT;
+		}
+		meta->ref_obj_id = reg->ref_obj_id;
 	} else if (arg_type_is_int_ptr(arg_type)) {
 		expected_type = PTR_TO_STACK;
 		if (!type_is_pkt_pointer(type) &&
@@ -3893,6 +3968,13 @@ static int check_func_arg(struct bpf_verifier_env *env, u32 regno,
 					      zero_size_allowed, meta);
 		if (!err)
 			err = mark_chain_precision(env, regno);
+	} else if (arg_type_is_alloc_size(arg_type)) {
+		if (!tnum_is_const(reg->var_off)) {
+			verbose(env, "R%d unbounded size, use 'var &= const' or 'if (var < const)'\n",
+				regno);
+			return -EACCES;
+		}
+		meta->mem_size = reg->var_off.value;
 	} else if (arg_type_is_int_ptr(arg_type)) {
 		int size = int_ptr_type_to_size(arg_type);
 
@@ -3929,6 +4011,14 @@ static int check_map_func_compatibility(struct bpf_verifier_env *env,
 		    func_id != BPF_FUNC_xdp_output)
 			goto error;
 		break;
+	case BPF_MAP_TYPE_RINGBUF:
+		if (func_id != BPF_FUNC_ringbuf_output &&
+		    func_id != BPF_FUNC_ringbuf_reserve &&
+		    func_id != BPF_FUNC_ringbuf_submit &&
+		    func_id != BPF_FUNC_ringbuf_discard &&
+		    func_id != BPF_FUNC_ringbuf_query)
+			goto error;
+		break;
 	case BPF_MAP_TYPE_STACK_TRACE:
 		if (func_id != BPF_FUNC_get_stackid)
 			goto error;
@@ -4655,6 +4745,11 @@ static int check_helper_call(struct bpf_verifier_env *env, int func_id, int insn
 		mark_reg_known_zero(env, regs, BPF_REG_0);
 		regs[BPF_REG_0].type = PTR_TO_TCP_SOCK_OR_NULL;
 		regs[BPF_REG_0].id = ++env->id_gen;
+	} else if (fn->ret_type == RET_PTR_TO_ALLOC_MEM_OR_NULL) {
+		mark_reg_known_zero(env, regs, BPF_REG_0);
+		regs[BPF_REG_0].type = PTR_TO_MEM_OR_NULL;
+		regs[BPF_REG_0].id = ++env->id_gen;
+		regs[BPF_REG_0].mem_size = meta.mem_size;
 	} else {
 		verbose(env, "unknown return type %d of func %s#%d\n",
 			fn->ret_type, func_id_name(func_id), func_id);
@@ -6611,6 +6706,8 @@ static void mark_ptr_or_null_reg(struct bpf_func_state *state,
 			reg->type = PTR_TO_TCP_SOCK;
 		} else if (reg->type == PTR_TO_BTF_ID_OR_NULL) {
 			reg->type = PTR_TO_BTF_ID;
+		} else if (reg->type == PTR_TO_MEM_OR_NULL) {
+			reg->type = PTR_TO_MEM;
 		}
 		if (is_null) {
 			/* We don't need id and ref_obj_id from this point
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 187cd6995bbb..3767d34114c0 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1088,6 +1088,16 @@ bpf_tracing_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_perf_event_read_value_proto;
 	case BPF_FUNC_get_ns_current_pid_tgid:
 		return &bpf_get_ns_current_pid_tgid_proto;
+	case BPF_FUNC_ringbuf_output:
+		return &bpf_ringbuf_output_proto;
+	case BPF_FUNC_ringbuf_reserve:
+		return &bpf_ringbuf_reserve_proto;
+	case BPF_FUNC_ringbuf_submit:
+		return &bpf_ringbuf_submit_proto;
+	case BPF_FUNC_ringbuf_discard:
+		return &bpf_ringbuf_discard_proto;
+	case BPF_FUNC_ringbuf_query:
+		return &bpf_ringbuf_query_proto;
 	default:
 		return NULL;
 	}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 54b93f8b49b8..974ca6e948e3 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -147,6 +147,7 @@ enum bpf_map_type {
 	BPF_MAP_TYPE_SK_STORAGE,
 	BPF_MAP_TYPE_DEVMAP_HASH,
 	BPF_MAP_TYPE_STRUCT_OPS,
+	BPF_MAP_TYPE_RINGBUF,
 };
 
 /* Note that tracing related programs such as
@@ -3157,6 +3158,59 @@ union bpf_attr {
  *		**bpf_sk_cgroup_id**\ ().
  *	Return
  *		The id is returned or 0 in case the id could not be retrieved.
+ *
+ * void *bpf_ringbuf_output(void *ringbuf, void *data, u64 size, u64 flags)
+ * 	Description
+ * 		Copy *size* bytes from *data* into a ring buffer *ringbuf*.
+ * 		If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of
+ * 		new data availability is sent.
+ * 		IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of
+ * 		new data availability is sent unconditionally.
+ * 	Return
+ * 		0, on success;
+ * 		< 0, on error.
+ *
+ * void *bpf_ringbuf_reserve(void *ringbuf, u64 size, u64 flags)
+ * 	Description
+ * 		Reserve *size* bytes of payload in a ring buffer *ringbuf*.
+ * 	Return
+ * 		Valid pointer with *size* bytes of memory available; NULL,
+ * 		otherwise.
+ *
+ * void bpf_ringbuf_submit(void *data, u64 flags)
+ * 	Description
+ * 		Submit reserved ring buffer sample, pointed to by *data*.
+ * 		If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of
+ * 		new data availability is sent.
+ * 		IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of
+ * 		new data availability is sent unconditionally.
+ * 	Return
+ * 		Nothing. Always succeeds.
+ *
+ * void bpf_ringbuf_discard(void *data, u64 flags)
+ * 	Description
+ * 		Discard reserved ring buffer sample, pointed to by *data*.
+ * 		If BPF_RB_NO_WAKEUP is specified in *flags*, no notification of
+ * 		new data availability is sent.
+ * 		IF BPF_RB_FORCE_WAKEUP is specified in *flags*, notification of
+ * 		new data availability is sent unconditionally.
+ * 	Return
+ * 		Nothing. Always succeeds.
+ *
+ * u64 bpf_ringbuf_query(void *ringbuf, u64 flags)
+ *	Description
+ *		Query various characteristics of provided ring buffer. What
+ *		exactly is queries is determined by *flags*:
+ *		  - BPF_RB_AVAIL_DATA - amount of data not yet consumed;
+ *		  - BPF_RB_RING_SIZE - the size of ring buffer;
+ *		  - BPF_RB_CONS_POS - consumer position (can wrap around);
+ *		  - BPF_RB_PROD_POS - producer(s) position (can wrap around);
+ *		Data returned is just a momentary snapshots of actual values
+ *		and could be inaccurate, so this facility should be used to
+ *		power heuristics and for reporting, not to make 100% correct
+ *		calculation.
+ *	Return
+ *		Requested value, or 0, if flags are not recognized.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3288,7 +3342,12 @@ union bpf_attr {
 	FN(seq_printf),			\
 	FN(seq_write),			\
 	FN(sk_cgroup_id),		\
-	FN(sk_ancestor_cgroup_id),
+	FN(sk_ancestor_cgroup_id),	\
+	FN(ringbuf_output),		\
+	FN(ringbuf_reserve),		\
+	FN(ringbuf_submit),		\
+	FN(ringbuf_discard),		\
+	FN(ringbuf_query),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -3398,6 +3457,29 @@ enum {
 	BPF_F_GET_BRANCH_RECORDS_SIZE	= (1ULL << 0),
 };
 
+/* BPF_FUNC_bpf_ringbuf_commit, BPF_FUNC_bpf_ringbuf_discard, and
+ * BPF_FUNC_bpf_ringbuf_output flags.
+ */
+enum {
+	BPF_RB_NO_WAKEUP		= (1ULL << 0),
+	BPF_RB_FORCE_WAKEUP		= (1ULL << 1),
+};
+
+/* BPF_FUNC_bpf_ringbuf_query flags */
+enum {
+	BPF_RB_AVAIL_DATA = 0,
+	BPF_RB_RING_SIZE = 1,
+	BPF_RB_CONS_POS = 2,
+	BPF_RB_PROD_POS = 3,
+};
+
+/* BPF ring buffer constants */
+enum {
+	BPF_RINGBUF_BUSY_BIT		= (1U << 31),
+	BPF_RINGBUF_DISCARD_BIT		= (1U << 30),
+	BPF_RINGBUF_HDR_SZ		= 8,
+};
+
 /* Mode for BPF_FUNC_skb_adjust_room helper. */
 enum bpf_adj_room_mode {
 	BPF_ADJ_ROOM_NET,
diff --git a/tools/testing/selftests/bpf/verifier/and.c b/tools/testing/selftests/bpf/verifier/and.c
index e0fad1548737..d781bc86e100 100644
--- a/tools/testing/selftests/bpf/verifier/and.c
+++ b/tools/testing/selftests/bpf/verifier/and.c
@@ -15,7 +15,7 @@
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 3 },
-	.errstr = "R0 max value is outside of the array range",
+	.errstr = "R0 max value is outside of the allowed memory range",
 	.result = REJECT,
 	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
@@ -44,7 +44,7 @@
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 3 },
-	.errstr = "R0 max value is outside of the array range",
+	.errstr = "R0 max value is outside of the allowed memory range",
 	.result = REJECT,
 	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
diff --git a/tools/testing/selftests/bpf/verifier/array_access.c b/tools/testing/selftests/bpf/verifier/array_access.c
index f3c33e128709..1c4b1939f5a8 100644
--- a/tools/testing/selftests/bpf/verifier/array_access.c
+++ b/tools/testing/selftests/bpf/verifier/array_access.c
@@ -117,7 +117,7 @@
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 3 },
-	.errstr = "R0 min value is outside of the array range",
+	.errstr = "R0 min value is outside of the allowed memory range",
 	.result = REJECT,
 	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
@@ -137,7 +137,7 @@
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 3 },
-	.errstr = "R0 unbounded memory access, make sure to bounds check any array access into a map",
+	.errstr = "R0 unbounded memory access, make sure to bounds check any such access",
 	.result = REJECT,
 	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
diff --git a/tools/testing/selftests/bpf/verifier/bounds.c b/tools/testing/selftests/bpf/verifier/bounds.c
index 58f4aa593b1b..4d6645f2874c 100644
--- a/tools/testing/selftests/bpf/verifier/bounds.c
+++ b/tools/testing/selftests/bpf/verifier/bounds.c
@@ -20,7 +20,7 @@
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_8b = { 3 },
-	.errstr = "R0 max value is outside of the array range",
+	.errstr = "R0 max value is outside of the allowed memory range",
 	.result = REJECT,
 },
 {
@@ -146,7 +146,7 @@
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_8b = { 3 },
-	.errstr = "R0 min value is outside of the array range",
+	.errstr = "R0 min value is outside of the allowed memory range",
 	.result = REJECT
 },
 {
@@ -354,7 +354,7 @@
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_8b = { 3 },
-	.errstr = "R0 max value is outside of the array range",
+	.errstr = "R0 max value is outside of the allowed memory range",
 	.result = REJECT
 },
 {
diff --git a/tools/testing/selftests/bpf/verifier/calls.c b/tools/testing/selftests/bpf/verifier/calls.c
index 7629a0cebb9b..94258c6b5235 100644
--- a/tools/testing/selftests/bpf/verifier/calls.c
+++ b/tools/testing/selftests/bpf/verifier/calls.c
@@ -105,7 +105,7 @@
 	.prog_type = BPF_PROG_TYPE_SCHED_CLS,
 	.fixup_map_hash_8b = { 16 },
 	.result = REJECT,
-	.errstr = "R0 min value is outside of the array range",
+	.errstr = "R0 min value is outside of the allowed memory range",
 },
 {
 	"calls: overlapping caller/callee",
diff --git a/tools/testing/selftests/bpf/verifier/direct_value_access.c b/tools/testing/selftests/bpf/verifier/direct_value_access.c
index b9fb28e8e224..988f46a1a4c7 100644
--- a/tools/testing/selftests/bpf/verifier/direct_value_access.c
+++ b/tools/testing/selftests/bpf/verifier/direct_value_access.c
@@ -68,7 +68,7 @@
 	},
 	.fixup_map_array_48b = { 1 },
 	.result = REJECT,
-	.errstr = "R1 min value is outside of the array range",
+	.errstr = "R1 min value is outside of the allowed memory range",
 },
 {
 	"direct map access, write test 7",
@@ -220,7 +220,7 @@
 	},
 	.fixup_map_array_small = { 1 },
 	.result = REJECT,
-	.errstr = "R1 min value is outside of the array range",
+	.errstr = "R1 min value is outside of the allowed memory range",
 },
 {
 	"direct map access, write test 19",
diff --git a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c
index 67ab12410050..5a605ae131a9 100644
--- a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c
+++ b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c
@@ -318,7 +318,7 @@
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 4 },
-	.errstr = "R1 min value is outside of the array range",
+	.errstr = "R1 min value is outside of the allowed memory range",
 	.result = REJECT,
 	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 },
diff --git a/tools/testing/selftests/bpf/verifier/helper_value_access.c b/tools/testing/selftests/bpf/verifier/helper_value_access.c
index 7572e403ddb9..961f28139b96 100644
--- a/tools/testing/selftests/bpf/verifier/helper_value_access.c
+++ b/tools/testing/selftests/bpf/verifier/helper_value_access.c
@@ -280,7 +280,7 @@
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 3 },
-	.errstr = "R1 min value is outside of the array range",
+	.errstr = "R1 min value is outside of the allowed memory range",
 	.result = REJECT,
 	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 },
@@ -415,7 +415,7 @@
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 3 },
-	.errstr = "R1 min value is outside of the array range",
+	.errstr = "R1 min value is outside of the allowed memory range",
 	.result = REJECT,
 	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 },
@@ -926,7 +926,7 @@
 	},
 	.fixup_map_hash_16b = { 3, 10 },
 	.result = REJECT,
-	.errstr = "R2 unbounded memory access, make sure to bounds check any array access into a map",
+	.errstr = "R2 unbounded memory access, make sure to bounds check any such access",
 	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 },
 {
diff --git a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
index a53d99cebd9f..97ee658e1242 100644
--- a/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
+++ b/tools/testing/selftests/bpf/verifier/value_ptr_arith.c
@@ -50,7 +50,7 @@
 	.fixup_map_array_48b = { 8 },
 	.result = ACCEPT,
 	.result_unpriv = REJECT,
-	.errstr_unpriv = "R0 min value is outside of the array range",
+	.errstr_unpriv = "R0 min value is outside of the allowed memory range",
 	.retval = 1,
 },
 {
@@ -325,7 +325,7 @@
 	},
 	.fixup_map_array_48b = { 3 },
 	.result = REJECT,
-	.errstr = "R0 min value is outside of the array range",
+	.errstr = "R0 min value is outside of the allowed memory range",
 	.result_unpriv = REJECT,
 	.errstr_unpriv = "R0 pointer arithmetic of map value goes out of range",
 },
@@ -601,7 +601,7 @@
 	},
 	.fixup_map_array_48b = { 3 },
 	.result = REJECT,
-	.errstr = "R1 max value is outside of the array range",
+	.errstr = "R1 max value is outside of the allowed memory range",
 	.errstr_unpriv = "R1 pointer arithmetic of map value goes out of range",
 	.flags = F_NEEDS_EFFICIENT_UNALIGNED_ACCESS,
 },
@@ -726,7 +726,7 @@
 	},
 	.fixup_map_array_48b = { 3 },
 	.result = REJECT,
-	.errstr = "R0 min value is outside of the array range",
+	.errstr = "R0 min value is outside of the allowed memory range",
 },
 {
 	"map access: value_ptr -= known scalar, 2",
-- 
cgit v1.2.3-59-g8ed1b


From bf99c936f9478a05d51e9f101f90de70bee9a89c Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 29 May 2020 00:54:21 -0700
Subject: libbpf: Add BPF ring buffer support

Declaring and instantiating BPF ring buffer doesn't require any changes to
libbpf, as it's just another type of maps. So using existing BTF-defined maps
syntax with __uint(type, BPF_MAP_TYPE_RINGBUF) and __uint(max_elements,
<size-of-ring-buf>) is all that's necessary to create and use BPF ring buffer.

This patch adds BPF ring buffer consumer to libbpf. It is very similar to
perf_buffer implementation in terms of API, but also attempts to fix some
minor problems and inconveniences with existing perf_buffer API.

ring_buffer support both single ring buffer use case (with just using
ring_buffer__new()), as well as allows to add more ring buffers, each with its
own callback and context. This allows to efficiently poll and consume
multiple, potentially completely independent, ring buffers, using single
epoll instance.

The latter is actually a problem in practice for applications
that are using multiple sets of perf buffers. They have to create multiple
instances for struct perf_buffer and poll them independently or in a loop,
each approach having its own problems (e.g., inability to use a common poll
timeout). struct ring_buffer eliminates this problem by aggregating many
independent ring buffer instances under the single "ring buffer manager".

Second, perf_buffer's callback can't return error, so applications that need
to stop polling due to error in data or data signalling the end, have to use
extra mechanisms to signal that polling has to stop. ring_buffer's callback
can return error, which will be passed through back to user code and can be
acted upon appropariately.

Two APIs allow to consume ring buffer data:
  - ring_buffer__poll(), which will wait for data availability notification
    and will consume data only from reported ring buffer(s); this API allows
    to efficiently use resources by reading data only when it becomes
    available;
  - ring_buffer__consume(), will attempt to read new records regardless of
    data availablity notification sub-system. This API is useful for cases
    when lowest latency is required, in expense of burning CPU resources.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200529075424.3139988-3-andriin@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/Build           |   2 +-
 tools/lib/bpf/libbpf.h        |  21 ++++
 tools/lib/bpf/libbpf.map      |   5 +
 tools/lib/bpf/libbpf_probes.c |   5 +
 tools/lib/bpf/ringbuf.c       | 285 ++++++++++++++++++++++++++++++++++++++++++
 5 files changed, 317 insertions(+), 1 deletion(-)
 create mode 100644 tools/lib/bpf/ringbuf.c

diff --git a/tools/lib/bpf/Build b/tools/lib/bpf/Build
index e3962cfbc9a6..190366d05588 100644
--- a/tools/lib/bpf/Build
+++ b/tools/lib/bpf/Build
@@ -1,3 +1,3 @@
 libbpf-y := libbpf.o bpf.o nlattr.o btf.o libbpf_errno.o str_error.o \
 	    netlink.o bpf_prog_linfo.o libbpf_probes.o xsk.o hashmap.o \
-	    btf_dump.o
+	    btf_dump.o ringbuf.o
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 1e2e399a5f2c..8528a02d5af8 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -478,6 +478,27 @@ LIBBPF_API int bpf_get_link_xdp_id(int ifindex, __u32 *prog_id, __u32 flags);
 LIBBPF_API int bpf_get_link_xdp_info(int ifindex, struct xdp_link_info *info,
 				     size_t info_size, __u32 flags);
 
+/* Ring buffer APIs */
+struct ring_buffer;
+
+typedef int (*ring_buffer_sample_fn)(void *ctx, void *data, size_t size);
+
+struct ring_buffer_opts {
+	size_t sz; /* size of this struct, for forward/backward compatiblity */
+};
+
+#define ring_buffer_opts__last_field sz
+
+LIBBPF_API struct ring_buffer *
+ring_buffer__new(int map_fd, ring_buffer_sample_fn sample_cb, void *ctx,
+		 const struct ring_buffer_opts *opts);
+LIBBPF_API void ring_buffer__free(struct ring_buffer *rb);
+LIBBPF_API int ring_buffer__add(struct ring_buffer *rb, int map_fd,
+				ring_buffer_sample_fn sample_cb, void *ctx);
+LIBBPF_API int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms);
+LIBBPF_API int ring_buffer__consume(struct ring_buffer *rb);
+
+/* Perf buffer APIs */
 struct perf_buffer;
 
 typedef void (*perf_buffer_sample_fn)(void *ctx, int cpu,
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index 381a7342ecfc..c18860200abb 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -263,4 +263,9 @@ LIBBPF_0.0.9 {
 		bpf_link_get_next_id;
 		bpf_program__attach_iter;
 		perf_buffer__consume;
+		ring_buffer__add;
+		ring_buffer__consume;
+		ring_buffer__free;
+		ring_buffer__new;
+		ring_buffer__poll;
 } LIBBPF_0.0.8;
diff --git a/tools/lib/bpf/libbpf_probes.c b/tools/lib/bpf/libbpf_probes.c
index 2c92059c0c90..10cd8d1891f5 100644
--- a/tools/lib/bpf/libbpf_probes.c
+++ b/tools/lib/bpf/libbpf_probes.c
@@ -238,6 +238,11 @@ bool bpf_probe_map_type(enum bpf_map_type map_type, __u32 ifindex)
 		if (btf_fd < 0)
 			return false;
 		break;
+	case BPF_MAP_TYPE_RINGBUF:
+		key_size = 0;
+		value_size = 0;
+		max_entries = 4096;
+		break;
 	case BPF_MAP_TYPE_UNSPEC:
 	case BPF_MAP_TYPE_HASH:
 	case BPF_MAP_TYPE_ARRAY:
diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c
new file mode 100644
index 000000000000..bc10fa1d43c7
--- /dev/null
+++ b/tools/lib/bpf/ringbuf.c
@@ -0,0 +1,285 @@
+// SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
+/*
+ * Ring buffer operations.
+ *
+ * Copyright (C) 2020 Facebook, Inc.
+ */
+#include <stdlib.h>
+#include <stdio.h>
+#include <errno.h>
+#include <unistd.h>
+#include <linux/err.h>
+#include <linux/bpf.h>
+#include <asm/barrier.h>
+#include <sys/mman.h>
+#include <sys/epoll.h>
+#include <tools/libc_compat.h>
+
+#include "libbpf.h"
+#include "libbpf_internal.h"
+#include "bpf.h"
+
+/* make sure libbpf doesn't use kernel-only integer typedefs */
+#pragma GCC poison u8 u16 u32 u64 s8 s16 s32 s64
+
+struct ring {
+	ring_buffer_sample_fn sample_cb;
+	void *ctx;
+	void *data;
+	unsigned long *consumer_pos;
+	unsigned long *producer_pos;
+	unsigned long mask;
+	int map_fd;
+};
+
+struct ring_buffer {
+	struct epoll_event *events;
+	struct ring *rings;
+	size_t page_size;
+	int epoll_fd;
+	int ring_cnt;
+};
+
+static void ringbuf_unmap_ring(struct ring_buffer *rb, struct ring *r)
+{
+	if (r->consumer_pos) {
+		munmap(r->consumer_pos, rb->page_size);
+		r->consumer_pos = NULL;
+	}
+	if (r->producer_pos) {
+		munmap(r->producer_pos, rb->page_size + 2 * (r->mask + 1));
+		r->producer_pos = NULL;
+	}
+}
+
+/* Add extra RINGBUF maps to this ring buffer manager */
+int ring_buffer__add(struct ring_buffer *rb, int map_fd,
+		     ring_buffer_sample_fn sample_cb, void *ctx)
+{
+	struct bpf_map_info info;
+	__u32 len = sizeof(info);
+	struct epoll_event *e;
+	struct ring *r;
+	void *tmp;
+	int err;
+
+	memset(&info, 0, sizeof(info));
+
+	err = bpf_obj_get_info_by_fd(map_fd, &info, &len);
+	if (err) {
+		err = -errno;
+		pr_warn("ringbuf: failed to get map info for fd=%d: %d\n",
+			map_fd, err);
+		return err;
+	}
+
+	if (info.type != BPF_MAP_TYPE_RINGBUF) {
+		pr_warn("ringbuf: map fd=%d is not BPF_MAP_TYPE_RINGBUF\n",
+			map_fd);
+		return -EINVAL;
+	}
+
+	tmp = reallocarray(rb->rings, rb->ring_cnt + 1, sizeof(*rb->rings));
+	if (!tmp)
+		return -ENOMEM;
+	rb->rings = tmp;
+
+	tmp = reallocarray(rb->events, rb->ring_cnt + 1, sizeof(*rb->events));
+	if (!tmp)
+		return -ENOMEM;
+	rb->events = tmp;
+
+	r = &rb->rings[rb->ring_cnt];
+	memset(r, 0, sizeof(*r));
+
+	r->map_fd = map_fd;
+	r->sample_cb = sample_cb;
+	r->ctx = ctx;
+	r->mask = info.max_entries - 1;
+
+	/* Map writable consumer page */
+	tmp = mmap(NULL, rb->page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
+		   map_fd, 0);
+	if (tmp == MAP_FAILED) {
+		err = -errno;
+		pr_warn("ringbuf: failed to mmap consumer page for map fd=%d: %d\n",
+			map_fd, err);
+		return err;
+	}
+	r->consumer_pos = tmp;
+
+	/* Map read-only producer page and data pages. We map twice as big
+	 * data size to allow simple reading of samples that wrap around the
+	 * end of a ring buffer. See kernel implementation for details.
+	 * */
+	tmp = mmap(NULL, rb->page_size + 2 * info.max_entries, PROT_READ,
+		   MAP_SHARED, map_fd, rb->page_size);
+	if (tmp == MAP_FAILED) {
+		err = -errno;
+		ringbuf_unmap_ring(rb, r);
+		pr_warn("ringbuf: failed to mmap data pages for map fd=%d: %d\n",
+			map_fd, err);
+		return err;
+	}
+	r->producer_pos = tmp;
+	r->data = tmp + rb->page_size;
+
+	e = &rb->events[rb->ring_cnt];
+	memset(e, 0, sizeof(*e));
+
+	e->events = EPOLLIN;
+	e->data.fd = rb->ring_cnt;
+	if (epoll_ctl(rb->epoll_fd, EPOLL_CTL_ADD, map_fd, e) < 0) {
+		err = -errno;
+		ringbuf_unmap_ring(rb, r);
+		pr_warn("ringbuf: failed to epoll add map fd=%d: %d\n",
+			map_fd, err);
+		return err;
+	}
+
+	rb->ring_cnt++;
+	return 0;
+}
+
+void ring_buffer__free(struct ring_buffer *rb)
+{
+	int i;
+
+	if (!rb)
+		return;
+
+	for (i = 0; i < rb->ring_cnt; ++i)
+		ringbuf_unmap_ring(rb, &rb->rings[i]);
+	if (rb->epoll_fd >= 0)
+		close(rb->epoll_fd);
+
+	free(rb->events);
+	free(rb->rings);
+	free(rb);
+}
+
+struct ring_buffer *
+ring_buffer__new(int map_fd, ring_buffer_sample_fn sample_cb, void *ctx,
+		 const struct ring_buffer_opts *opts)
+{
+	struct ring_buffer *rb;
+	int err;
+
+	if (!OPTS_VALID(opts, ring_buffer_opts))
+		return NULL;
+
+	rb = calloc(1, sizeof(*rb));
+	if (!rb)
+		return NULL;
+
+	rb->page_size = getpagesize();
+
+	rb->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
+	if (rb->epoll_fd < 0) {
+		err = -errno;
+		pr_warn("ringbuf: failed to create epoll instance: %d\n", err);
+		goto err_out;
+	}
+
+	err = ring_buffer__add(rb, map_fd, sample_cb, ctx);
+	if (err)
+		goto err_out;
+
+	return rb;
+
+err_out:
+	ring_buffer__free(rb);
+	return NULL;
+}
+
+static inline int roundup_len(__u32 len)
+{
+	/* clear out top 2 bits (discard and busy, if set) */
+	len <<= 2;
+	len >>= 2;
+	/* add length prefix */
+	len += BPF_RINGBUF_HDR_SZ;
+	/* round up to 8 byte alignment */
+	return (len + 7) / 8 * 8;
+}
+
+static int ringbuf_process_ring(struct ring* r)
+{
+	int *len_ptr, len, err, cnt = 0;
+	unsigned long cons_pos, prod_pos;
+	bool got_new_data;
+	void *sample;
+
+	cons_pos = smp_load_acquire(r->consumer_pos);
+	do {
+		got_new_data = false;
+		prod_pos = smp_load_acquire(r->producer_pos);
+		while (cons_pos < prod_pos) {
+			len_ptr = r->data + (cons_pos & r->mask);
+			len = smp_load_acquire(len_ptr);
+
+			/* sample not committed yet, bail out for now */
+			if (len & BPF_RINGBUF_BUSY_BIT)
+				goto done;
+
+			got_new_data = true;
+			cons_pos += roundup_len(len);
+
+			if ((len & BPF_RINGBUF_DISCARD_BIT) == 0) {
+				sample = (void *)len_ptr + BPF_RINGBUF_HDR_SZ;
+				err = r->sample_cb(r->ctx, sample, len);
+				if (err) {
+					/* update consumer pos and bail out */
+					smp_store_release(r->consumer_pos,
+							  cons_pos);
+					return err;
+				}
+				cnt++;
+			}
+
+			smp_store_release(r->consumer_pos, cons_pos);
+		}
+	} while (got_new_data);
+done:
+	return cnt;
+}
+
+/* Consume available ring buffer(s) data without event polling.
+ * Returns number of records consumed across all registered ring buffers, or
+ * negative number if any of the callbacks return error.
+ */
+int ring_buffer__consume(struct ring_buffer *rb)
+{
+	int i, err, res = 0;
+
+	for (i = 0; i < rb->ring_cnt; i++) {
+		struct ring *ring = &rb->rings[i];
+
+		err = ringbuf_process_ring(ring);
+		if (err < 0)
+			return err;
+		res += err;
+	}
+	return res;
+}
+
+/* Poll for available data and consume records, if any are available.
+ * Returns number of records consumed, or negative number, if any of the
+ * registered callbacks returned error.
+ */
+int ring_buffer__poll(struct ring_buffer *rb, int timeout_ms)
+{
+	int i, cnt, err, res = 0;
+
+	cnt = epoll_wait(rb->epoll_fd, rb->events, rb->ring_cnt, timeout_ms);
+	for (i = 0; i < cnt; i++) {
+		__u32 ring_id = rb->events[i].data.fd;
+		struct ring *ring = &rb->rings[ring_id];
+
+		err = ringbuf_process_ring(ring);
+		if (err < 0)
+			return err;
+		res += cnt;
+	}
+	return cnt < 0 ? -errno : res;
+}
-- 
cgit v1.2.3-59-g8ed1b


From cb1c9ddd552520abd49031d47397c6e95bad882e Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 29 May 2020 00:54:22 -0700
Subject: selftests/bpf: Add BPF ringbuf selftests

Both singleton BPF ringbuf and BPF ringbuf with map-in-map use cases are tested.
Also reserve+submit/discards and output variants of API are validated.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200529075424.3139988-4-andriin@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/prog_tests/ringbuf.c   | 211 +++++++++++++++++++++
 .../selftests/bpf/prog_tests/ringbuf_multi.c       | 102 ++++++++++
 tools/testing/selftests/bpf/progs/test_ringbuf.c   |  78 ++++++++
 .../selftests/bpf/progs/test_ringbuf_multi.c       |  77 ++++++++
 4 files changed, 468 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/ringbuf.c
 create mode 100644 tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_ringbuf.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_ringbuf_multi.c

diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
new file mode 100644
index 000000000000..bb8541f240e2
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
@@ -0,0 +1,211 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <linux/compiler.h>
+#include <asm/barrier.h>
+#include <test_progs.h>
+#include <sys/mman.h>
+#include <sys/epoll.h>
+#include <time.h>
+#include <sched.h>
+#include <signal.h>
+#include <pthread.h>
+#include <sys/sysinfo.h>
+#include <linux/perf_event.h>
+#include <linux/ring_buffer.h>
+#include "test_ringbuf.skel.h"
+
+#define EDONE 7777
+
+static int duration = 0;
+
+struct sample {
+	int pid;
+	int seq;
+	long value;
+	char comm[16];
+};
+
+static int sample_cnt;
+
+static int process_sample(void *ctx, void *data, size_t len)
+{
+	struct sample *s = data;
+
+	sample_cnt++;
+
+	switch (s->seq) {
+	case 0:
+		CHECK(s->value != 333, "sample1_value", "exp %ld, got %ld\n",
+		      333L, s->value);
+		return 0;
+	case 1:
+		CHECK(s->value != 777, "sample2_value", "exp %ld, got %ld\n",
+		      777L, s->value);
+		return -EDONE;
+	default:
+		/* we don't care about the rest */
+		return 0;
+	}
+}
+
+static struct test_ringbuf *skel;
+static struct ring_buffer *ringbuf;
+
+static void trigger_samples()
+{
+	skel->bss->dropped = 0;
+	skel->bss->total = 0;
+	skel->bss->discarded = 0;
+
+	/* trigger exactly two samples */
+	skel->bss->value = 333;
+	syscall(__NR_getpgid);
+	skel->bss->value = 777;
+	syscall(__NR_getpgid);
+}
+
+static void *poll_thread(void *input)
+{
+	long timeout = (long)input;
+
+	return (void *)(long)ring_buffer__poll(ringbuf, timeout);
+}
+
+void test_ringbuf(void)
+{
+	const size_t rec_sz = BPF_RINGBUF_HDR_SZ + sizeof(struct sample);
+	pthread_t thread;
+	long bg_ret = -1;
+	int err;
+
+	skel = test_ringbuf__open_and_load();
+	if (CHECK(!skel, "skel_open_load", "skeleton open&load failed\n"))
+		return;
+
+	/* only trigger BPF program for current process */
+	skel->bss->pid = getpid();
+
+	ringbuf = ring_buffer__new(bpf_map__fd(skel->maps.ringbuf),
+				   process_sample, NULL, NULL);
+	if (CHECK(!ringbuf, "ringbuf_create", "failed to create ringbuf\n"))
+		goto cleanup;
+
+	err = test_ringbuf__attach(skel);
+	if (CHECK(err, "skel_attach", "skeleton attachment failed: %d\n", err))
+		goto cleanup;
+
+	trigger_samples();
+
+	/* 2 submitted + 1 discarded records */
+	CHECK(skel->bss->avail_data != 3 * rec_sz,
+	      "err_avail_size", "exp %ld, got %ld\n",
+	      3L * rec_sz, skel->bss->avail_data);
+	CHECK(skel->bss->ring_size != 4096,
+	      "err_ring_size", "exp %ld, got %ld\n",
+	      4096L, skel->bss->ring_size);
+	CHECK(skel->bss->cons_pos != 0,
+	      "err_cons_pos", "exp %ld, got %ld\n",
+	      0L, skel->bss->cons_pos);
+	CHECK(skel->bss->prod_pos != 3 * rec_sz,
+	      "err_prod_pos", "exp %ld, got %ld\n",
+	      3L * rec_sz, skel->bss->prod_pos);
+
+	/* poll for samples */
+	err = ring_buffer__poll(ringbuf, -1);
+
+	/* -EDONE is used as an indicator that we are done */
+	if (CHECK(err != -EDONE, "err_done", "done err: %d\n", err))
+		goto cleanup;
+
+	/* we expect extra polling to return nothing */
+	err = ring_buffer__poll(ringbuf, 0);
+	if (CHECK(err != 0, "extra_samples", "poll result: %d\n", err))
+		goto cleanup;
+
+	CHECK(skel->bss->dropped != 0, "err_dropped", "exp %ld, got %ld\n",
+	      0L, skel->bss->dropped);
+	CHECK(skel->bss->total != 2, "err_total", "exp %ld, got %ld\n",
+	      2L, skel->bss->total);
+	CHECK(skel->bss->discarded != 1, "err_discarded", "exp %ld, got %ld\n",
+	      1L, skel->bss->discarded);
+
+	/* now validate consumer position is updated and returned */
+	trigger_samples();
+	CHECK(skel->bss->cons_pos != 3 * rec_sz,
+	      "err_cons_pos", "exp %ld, got %ld\n",
+	      3L * rec_sz, skel->bss->cons_pos);
+	err = ring_buffer__poll(ringbuf, -1);
+	CHECK(err <= 0, "poll_err", "err %d\n", err);
+
+	/* start poll in background w/ long timeout */
+	err = pthread_create(&thread, NULL, poll_thread, (void *)(long)10000);
+	if (CHECK(err, "bg_poll", "pthread_create failed: %d\n", err))
+		goto cleanup;
+
+	/* turn off notifications now */
+	skel->bss->flags = BPF_RB_NO_WAKEUP;
+
+	/* give background thread a bit of a time */
+	usleep(50000);
+	trigger_samples();
+	/* sleeping arbitrarily is bad, but no better way to know that
+	 * epoll_wait() **DID NOT** unblock in background thread
+	 */
+	usleep(50000);
+	/* background poll should still be blocked */
+	err = pthread_tryjoin_np(thread, (void **)&bg_ret);
+	if (CHECK(err != EBUSY, "try_join", "err %d\n", err))
+		goto cleanup;
+
+	/* BPF side did everything right */
+	CHECK(skel->bss->dropped != 0, "err_dropped", "exp %ld, got %ld\n",
+	      0L, skel->bss->dropped);
+	CHECK(skel->bss->total != 2, "err_total", "exp %ld, got %ld\n",
+	      2L, skel->bss->total);
+	CHECK(skel->bss->discarded != 1, "err_discarded", "exp %ld, got %ld\n",
+	      1L, skel->bss->discarded);
+
+	/* clear flags to return to "adaptive" notification mode */
+	skel->bss->flags = 0;
+
+	/* produce new samples, no notification should be triggered, because
+	 * consumer is now behind
+	 */
+	trigger_samples();
+
+	/* background poll should still be blocked */
+	err = pthread_tryjoin_np(thread, (void **)&bg_ret);
+	if (CHECK(err != EBUSY, "try_join", "err %d\n", err))
+		goto cleanup;
+
+	/* now force notifications */
+	skel->bss->flags = BPF_RB_FORCE_WAKEUP;
+	sample_cnt = 0;
+	trigger_samples();
+
+	/* now we should get a pending notification */
+	usleep(50000);
+	err = pthread_tryjoin_np(thread, (void **)&bg_ret);
+	if (CHECK(err, "join_bg", "err %d\n", err))
+		goto cleanup;
+
+	if (CHECK(bg_ret != 1, "bg_ret", "epoll_wait result: %ld", bg_ret))
+		goto cleanup;
+
+	/* 3 rounds, 2 samples each */
+	CHECK(sample_cnt != 6, "wrong_sample_cnt",
+	      "expected to see %d samples, got %d\n", 6, sample_cnt);
+
+	/* BPF side did everything right */
+	CHECK(skel->bss->dropped != 0, "err_dropped", "exp %ld, got %ld\n",
+	      0L, skel->bss->dropped);
+	CHECK(skel->bss->total != 2, "err_total", "exp %ld, got %ld\n",
+	      2L, skel->bss->total);
+	CHECK(skel->bss->discarded != 1, "err_discarded", "exp %ld, got %ld\n",
+	      1L, skel->bss->discarded);
+
+	test_ringbuf__detach(skel);
+cleanup:
+	ring_buffer__free(ringbuf);
+	test_ringbuf__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
new file mode 100644
index 000000000000..78e450609803
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf_multi.c
@@ -0,0 +1,102 @@
+// SPDX-License-Identifier: GPL-2.0
+#define _GNU_SOURCE
+#include <test_progs.h>
+#include <sys/epoll.h>
+#include "test_ringbuf_multi.skel.h"
+
+static int duration = 0;
+
+struct sample {
+	int pid;
+	int seq;
+	long value;
+	char comm[16];
+};
+
+static int process_sample(void *ctx, void *data, size_t len)
+{
+	int ring = (unsigned long)ctx;
+	struct sample *s = data;
+
+	switch (s->seq) {
+	case 0:
+		CHECK(ring != 1, "sample1_ring", "exp %d, got %d\n", 1, ring);
+		CHECK(s->value != 333, "sample1_value", "exp %ld, got %ld\n",
+		      333L, s->value);
+		break;
+	case 1:
+		CHECK(ring != 2, "sample2_ring", "exp %d, got %d\n", 2, ring);
+		CHECK(s->value != 777, "sample2_value", "exp %ld, got %ld\n",
+		      777L, s->value);
+		break;
+	default:
+		CHECK(true, "extra_sample", "unexpected sample seq %d, val %ld\n",
+		      s->seq, s->value);
+		return -1;
+	}
+
+	return 0;
+}
+
+void test_ringbuf_multi(void)
+{
+	struct test_ringbuf_multi *skel;
+	struct ring_buffer *ringbuf;
+	int err;
+
+	skel = test_ringbuf_multi__open_and_load();
+	if (CHECK(!skel, "skel_open_load", "skeleton open&load failed\n"))
+		return;
+
+	/* only trigger BPF program for current process */
+	skel->bss->pid = getpid();
+
+	ringbuf = ring_buffer__new(bpf_map__fd(skel->maps.ringbuf1),
+				   process_sample, (void *)(long)1, NULL);
+	if (CHECK(!ringbuf, "ringbuf_create", "failed to create ringbuf\n"))
+		goto cleanup;
+
+	err = ring_buffer__add(ringbuf, bpf_map__fd(skel->maps.ringbuf2),
+			      process_sample, (void *)(long)2);
+	if (CHECK(err, "ringbuf_add", "failed to add another ring\n"))
+		goto cleanup;
+
+	err = test_ringbuf_multi__attach(skel);
+	if (CHECK(err, "skel_attach", "skeleton attachment failed: %d\n", err))
+		goto cleanup;
+
+	/* trigger few samples, some will be skipped */
+	skel->bss->target_ring = 0;
+	skel->bss->value = 333;
+	syscall(__NR_getpgid);
+
+	/* skipped, no ringbuf in slot 1 */
+	skel->bss->target_ring = 1;
+	skel->bss->value = 555;
+	syscall(__NR_getpgid);
+
+	skel->bss->target_ring = 2;
+	skel->bss->value = 777;
+	syscall(__NR_getpgid);
+
+	/* poll for samples, should get 2 ringbufs back */
+	err = ring_buffer__poll(ringbuf, -1);
+	if (CHECK(err != 4, "poll_res", "expected 4 records, got %d\n", err))
+		goto cleanup;
+
+	/* expect extra polling to return nothing */
+	err = ring_buffer__poll(ringbuf, 0);
+	if (CHECK(err < 0, "extra_samples", "poll result: %d\n", err))
+		goto cleanup;
+
+	CHECK(skel->bss->dropped != 0, "err_dropped", "exp %ld, got %ld\n",
+	      0L, skel->bss->dropped);
+	CHECK(skel->bss->skipped != 1, "err_skipped", "exp %ld, got %ld\n",
+	      1L, skel->bss->skipped);
+	CHECK(skel->bss->total != 2, "err_total", "exp %ld, got %ld\n",
+	      2L, skel->bss->total);
+
+cleanup:
+	ring_buffer__free(ringbuf);
+	test_ringbuf_multi__destroy(skel);
+}
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf.c b/tools/testing/selftests/bpf/progs/test_ringbuf.c
new file mode 100644
index 000000000000..8ba9959b036b
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf.c
@@ -0,0 +1,78 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct sample {
+	int pid;
+	int seq;
+	long value;
+	char comm[16];
+};
+
+struct {
+	__uint(type, BPF_MAP_TYPE_RINGBUF);
+	__uint(max_entries, 1 << 12);
+} ringbuf SEC(".maps");
+
+/* inputs */
+int pid = 0;
+long value = 0;
+long flags = 0;
+
+/* outputs */
+long total = 0;
+long discarded = 0;
+long dropped = 0;
+
+long avail_data = 0;
+long ring_size = 0;
+long cons_pos = 0;
+long prod_pos = 0;
+
+/* inner state */
+long seq = 0;
+
+SEC("tp/syscalls/sys_enter_getpgid")
+int test_ringbuf(void *ctx)
+{
+	int cur_pid = bpf_get_current_pid_tgid() >> 32;
+	struct sample *sample;
+	int zero = 0;
+
+	if (cur_pid != pid)
+		return 0;
+
+	sample = bpf_ringbuf_reserve(&ringbuf, sizeof(*sample), 0);
+	if (!sample) {
+		__sync_fetch_and_add(&dropped, 1);
+		return 1;
+	}
+
+	sample->pid = pid;
+	bpf_get_current_comm(sample->comm, sizeof(sample->comm));
+	sample->value = value;
+
+	sample->seq = seq++;
+	__sync_fetch_and_add(&total, 1);
+
+	if (sample->seq & 1) {
+		/* copy from reserved sample to a new one... */
+		bpf_ringbuf_output(&ringbuf, sample, sizeof(*sample), flags);
+		/* ...and then discard reserved sample */
+		bpf_ringbuf_discard(sample, flags);
+		__sync_fetch_and_add(&discarded, 1);
+	} else {
+		bpf_ringbuf_submit(sample, flags);
+	}
+
+	avail_data = bpf_ringbuf_query(&ringbuf, BPF_RB_AVAIL_DATA);
+	ring_size = bpf_ringbuf_query(&ringbuf, BPF_RB_RING_SIZE);
+	cons_pos = bpf_ringbuf_query(&ringbuf, BPF_RB_CONS_POS);
+	prod_pos = bpf_ringbuf_query(&ringbuf, BPF_RB_PROD_POS);
+
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c b/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c
new file mode 100644
index 000000000000..edf3b6953533
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_ringbuf_multi.c
@@ -0,0 +1,77 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct sample {
+	int pid;
+	int seq;
+	long value;
+	char comm[16];
+};
+
+struct ringbuf_map {
+	__uint(type, BPF_MAP_TYPE_RINGBUF);
+	__uint(max_entries, 1 << 12);
+} ringbuf1 SEC(".maps"),
+  ringbuf2 SEC(".maps");
+
+struct {
+	__uint(type, BPF_MAP_TYPE_ARRAY_OF_MAPS);
+	__uint(max_entries, 4);
+	__type(key, int);
+	__array(values, struct ringbuf_map);
+} ringbuf_arr SEC(".maps") = {
+	.values = {
+		[0] = &ringbuf1,
+		[2] = &ringbuf2,
+	},
+};
+
+/* inputs */
+int pid = 0;
+int target_ring = 0;
+long value = 0;
+
+/* outputs */
+long total = 0;
+long dropped = 0;
+long skipped = 0;
+
+SEC("tp/syscalls/sys_enter_getpgid")
+int test_ringbuf(void *ctx)
+{
+	int cur_pid = bpf_get_current_pid_tgid() >> 32;
+	struct sample *sample;
+	void *rb;
+	int zero = 0;
+
+	if (cur_pid != pid)
+		return 0;
+
+	rb = bpf_map_lookup_elem(&ringbuf_arr, &target_ring);
+	if (!rb) {
+		skipped += 1;
+		return 1;
+	}
+
+	sample = bpf_ringbuf_reserve(rb, sizeof(*sample), 0);
+	if (!sample) {
+		dropped += 1;
+		return 1;
+	}
+
+	sample->pid = pid;
+	bpf_get_current_comm(sample->comm, sizeof(sample->comm));
+	sample->value = value;
+
+	sample->seq = total;
+	total += 1;
+
+	bpf_ringbuf_submit(sample, 0);
+
+	return 0;
+}
-- 
cgit v1.2.3-59-g8ed1b


From c97099b0f22722be7d0f290278a26d297cc4b7ca Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 29 May 2020 00:54:23 -0700
Subject: bpf: Add BPF ringbuf and perf buffer benchmarks
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Extend bench framework with ability to have benchmark-provided child argument
parser for custom benchmark-specific parameters. This makes bench generic code
modular and independent from any specific benchmark.

Also implement a set of benchmarks for new BPF ring buffer and existing perf
buffer. 4 benchmarks were implemented: 2 variations for each of BPF ringbuf
and perfbuf:,
  - rb-libbpf utilizes stock libbpf ring_buffer manager for reading data;
  - rb-custom implements custom ring buffer setup and reading code, to
    eliminate overheads inherent in generic libbpf code due to callback
    functions and the need to update consumer position after each consumed
    record, instead of batching updates (due to pessimistic assumption that
    user callback might take long time and thus could unnecessarily hold ring
    buffer space for too long);
  - pb-libbpf uses stock libbpf perf_buffer code with all the default
    settings, though uses higher-performance raw event callback to minimize
    unnecessary overhead;
  - pb-custom implements its own custom consumer code to minimize any possible
    overhead of generic libbpf implementation and indirect function calls.

All of the test support default, no data notification skipped, mode, as well
as sampled mode (with --rb-sampled flag), which allows to trigger epoll
notification less frequently and reduce overhead. As will be shown, this mode
is especially critical for perf buffer, which suffers from high overhead of
wakeups in kernel.

Otherwise, all benchamrks implement similar way to generate a batch of records
by using fentry/sys_getpgid BPF program, which pushes a bunch of records in
a tight loop and records number of successful and dropped samples. Each record
is a small 8-byte integer, to minimize the effect of memory copying with
bpf_perf_event_output() and bpf_ringbuf_output().

Benchmarks that have only one producer implement optional back-to-back mode,
in which record production and consumption is alternating on the same CPU.
This is the highest-throughput happy case, showing ultimate performance
achievable with either BPF ringbuf or perfbuf.

All the below scenarios are implemented in a script in
benchs/run_bench_ringbufs.sh. Tests were performed on 28-core/56-thread
Intel Xeon CPU E5-2680 v4 @ 2.40GHz CPU.

Single-producer, parallel producer
==================================
rb-libbpf            12.054 ± 0.320M/s (drops 0.000 ± 0.000M/s)
rb-custom            8.158 ± 0.118M/s (drops 0.001 ± 0.003M/s)
pb-libbpf            0.931 ± 0.007M/s (drops 0.000 ± 0.000M/s)
pb-custom            0.965 ± 0.003M/s (drops 0.000 ± 0.000M/s)

Single-producer, parallel producer, sampled notification
========================================================
rb-libbpf            11.563 ± 0.067M/s (drops 0.000 ± 0.000M/s)
rb-custom            15.895 ± 0.076M/s (drops 0.000 ± 0.000M/s)
pb-libbpf            9.889 ± 0.032M/s (drops 0.000 ± 0.000M/s)
pb-custom            9.866 ± 0.028M/s (drops 0.000 ± 0.000M/s)

Single producer on one CPU, consumer on another one, both running at full
speed. Curiously, rb-libbpf has higher throughput than objectively faster (due
to more lightweight consumer code path) rb-custom. It appears that faster
consumer causes kernel to send notifications more frequently, because consumer
appears to be caught up more frequently. Performance of perfbuf suffers from
default "no sampling" policy and huge overhead that causes.

In sampled mode, rb-custom is winning very significantly eliminating too
frequent in-kernel wakeups, the gain appears to be more than 2x.

Perf buffer achieves even more impressive wins, compared to stock perfbuf
settings, with 10x improvements in throughput with 1:500 sampling rate. The
trade-off is that with sampling, application might not get next X events until
X+1st arrives, which is not always acceptable. With steady influx of events,
though, this shouldn't be a problem.

Overall, single-producer performance of ring buffers seems to be better no
matter the sampled/non-sampled modes, but it especially beats ring buffer
without sampling due to its adaptive notification approach.

Single-producer, back-to-back mode
==================================
rb-libbpf            15.507 ± 0.247M/s (drops 0.000 ± 0.000M/s)
rb-libbpf-sampled    14.692 ± 0.195M/s (drops 0.000 ± 0.000M/s)
rb-custom            21.449 ± 0.157M/s (drops 0.000 ± 0.000M/s)
rb-custom-sampled    20.024 ± 0.386M/s (drops 0.000 ± 0.000M/s)
pb-libbpf            1.601 ± 0.015M/s (drops 0.000 ± 0.000M/s)
pb-libbpf-sampled    8.545 ± 0.064M/s (drops 0.000 ± 0.000M/s)
pb-custom            1.607 ± 0.022M/s (drops 0.000 ± 0.000M/s)
pb-custom-sampled    8.988 ± 0.144M/s (drops 0.000 ± 0.000M/s)

Here we test a back-to-back mode, which is arguably best-case scenario both
for BPF ringbuf and perfbuf, because there is no contention and for ringbuf
also no excessive notification, because consumer appears to be behind after
the first record. For ringbuf, custom consumer code clearly wins with 21.5 vs
16 million records per second exchanged between producer and consumer. Sampled
mode actually hurts a bit due to slightly slower producer logic (it needs to
fetch amount of data available to decide whether to skip or force notification).

Perfbuf with wakeup sampling gets 5.5x throughput increase, compared to
no-sampling version. There also doesn't seem to be noticeable overhead from
generic libbpf handling code.

Perfbuf back-to-back, effect of sample rate
===========================================
pb-sampled-1         1.035 ± 0.012M/s (drops 0.000 ± 0.000M/s)
pb-sampled-5         3.476 ± 0.087M/s (drops 0.000 ± 0.000M/s)
pb-sampled-10        5.094 ± 0.136M/s (drops 0.000 ± 0.000M/s)
pb-sampled-25        7.118 ± 0.153M/s (drops 0.000 ± 0.000M/s)
pb-sampled-50        8.169 ± 0.156M/s (drops 0.000 ± 0.000M/s)
pb-sampled-100       8.887 ± 0.136M/s (drops 0.000 ± 0.000M/s)
pb-sampled-250       9.180 ± 0.209M/s (drops 0.000 ± 0.000M/s)
pb-sampled-500       9.353 ± 0.281M/s (drops 0.000 ± 0.000M/s)
pb-sampled-1000      9.411 ± 0.217M/s (drops 0.000 ± 0.000M/s)
pb-sampled-2000      9.464 ± 0.167M/s (drops 0.000 ± 0.000M/s)
pb-sampled-3000      9.575 ± 0.273M/s (drops 0.000 ± 0.000M/s)

This benchmark shows the effect of event sampling for perfbuf. Back-to-back
mode for highest throughput. Just doing every 5th record notification gives
3.5x speed up. 250-500 appears to be the point of diminishing return, with
almost 9x speed up. Most benchmarks use 500 as the default sampling for pb-raw
and pb-custom.

Ringbuf back-to-back, effect of sample rate
===========================================
rb-sampled-1         1.106 ± 0.010M/s (drops 0.000 ± 0.000M/s)
rb-sampled-5         4.746 ± 0.149M/s (drops 0.000 ± 0.000M/s)
rb-sampled-10        7.706 ± 0.164M/s (drops 0.000 ± 0.000M/s)
rb-sampled-25        12.893 ± 0.273M/s (drops 0.000 ± 0.000M/s)
rb-sampled-50        15.961 ± 0.361M/s (drops 0.000 ± 0.000M/s)
rb-sampled-100       18.203 ± 0.445M/s (drops 0.000 ± 0.000M/s)
rb-sampled-250       19.962 ± 0.786M/s (drops 0.000 ± 0.000M/s)
rb-sampled-500       20.881 ± 0.551M/s (drops 0.000 ± 0.000M/s)
rb-sampled-1000      21.317 ± 0.532M/s (drops 0.000 ± 0.000M/s)
rb-sampled-2000      21.331 ± 0.535M/s (drops 0.000 ± 0.000M/s)
rb-sampled-3000      21.688 ± 0.392M/s (drops 0.000 ± 0.000M/s)

Similar benchmark for ring buffer also shows a great advantage (in terms of
throughput) of skipping notifications. Skipping every 5th one gives 4x boost.
Also similar to perfbuf case, 250-500 seems to be the point of diminishing
returns, giving roughly 20x better results.

Keep in mind, for this test, notifications are controlled manually with
BPF_RB_NO_WAKEUP and BPF_RB_FORCE_WAKEUP. As can be seen from previous
benchmarks, adaptive notifications based on consumer's positions provides same
(or even slightly better due to simpler load generator on BPF side) benefits in
favorable back-to-back scenario. Over zealous and fast consumer, which is
almost always caught up, will make thoughput numbers smaller. That's the case
when manual notification control might prove to be extremely beneficial.

Ringbuf back-to-back, reserve+commit vs output
==============================================
reserve              22.819 ± 0.503M/s (drops 0.000 ± 0.000M/s)
output               18.906 ± 0.433M/s (drops 0.000 ± 0.000M/s)

Ringbuf sampled, reserve+commit vs output
=========================================
reserve-sampled      15.350 ± 0.132M/s (drops 0.000 ± 0.000M/s)
output-sampled       14.195 ± 0.144M/s (drops 0.000 ± 0.000M/s)

BPF ringbuf supports two sets of APIs with various usability and performance
tradeoffs: bpf_ringbuf_reserve()+bpf_ringbuf_commit() vs bpf_ringbuf_output().
This benchmark clearly shows superiority of reserve+commit approach, despite
using a small 8-byte record size.

Single-producer, consumer/producer competing on the same CPU, low batch count
=============================================================================
rb-libbpf            3.045 ± 0.020M/s (drops 3.536 ± 0.148M/s)
rb-custom            3.055 ± 0.022M/s (drops 3.893 ± 0.066M/s)
pb-libbpf            1.393 ± 0.024M/s (drops 0.000 ± 0.000M/s)
pb-custom            1.407 ± 0.016M/s (drops 0.000 ± 0.000M/s)

This benchmark shows one of the worst-case scenarios, in which producer and
consumer do not coordinate *and* fight for the same CPU. No batch count and
sampling settings were able to eliminate drops for ringbuffer, producer is
just too fast for consumer to keep up. But ringbuf and perfbuf still able to
pass through quite a lot of messages, which is more than enough for a lot of
applications.

Ringbuf, multi-producer contention
==================================
rb-libbpf nr_prod 1  10.916 ± 0.399M/s (drops 0.000 ± 0.000M/s)
rb-libbpf nr_prod 2  4.931 ± 0.030M/s (drops 0.000 ± 0.000M/s)
rb-libbpf nr_prod 3  4.880 ± 0.006M/s (drops 0.000 ± 0.000M/s)
rb-libbpf nr_prod 4  3.926 ± 0.004M/s (drops 0.000 ± 0.000M/s)
rb-libbpf nr_prod 8  4.011 ± 0.004M/s (drops 0.000 ± 0.000M/s)
rb-libbpf nr_prod 12 3.967 ± 0.016M/s (drops 0.000 ± 0.000M/s)
rb-libbpf nr_prod 16 2.604 ± 0.030M/s (drops 0.001 ± 0.002M/s)
rb-libbpf nr_prod 20 2.233 ± 0.003M/s (drops 0.000 ± 0.000M/s)
rb-libbpf nr_prod 24 2.085 ± 0.015M/s (drops 0.000 ± 0.000M/s)
rb-libbpf nr_prod 28 2.055 ± 0.004M/s (drops 0.000 ± 0.000M/s)
rb-libbpf nr_prod 32 1.962 ± 0.004M/s (drops 0.000 ± 0.000M/s)
rb-libbpf nr_prod 36 2.089 ± 0.005M/s (drops 0.000 ± 0.000M/s)
rb-libbpf nr_prod 40 2.118 ± 0.006M/s (drops 0.000 ± 0.000M/s)
rb-libbpf nr_prod 44 2.105 ± 0.004M/s (drops 0.000 ± 0.000M/s)
rb-libbpf nr_prod 48 2.120 ± 0.058M/s (drops 0.000 ± 0.001M/s)
rb-libbpf nr_prod 52 2.074 ± 0.024M/s (drops 0.007 ± 0.014M/s)

Ringbuf uses a very short-duration spinlock during reservation phase, to check
few invariants, increment producer count and set record header. This is the
biggest point of contention for ringbuf implementation. This benchmark
evaluates the effect of multiple competing writers on overall throughput of
a single shared ringbuffer.

Overall throughput drops almost 2x when going from single to two
highly-contended producers, gradually dropping with additional competing
producers.  Performance drop stabilizes at around 20 producers and hovers
around 2mln even with 50+ fighting producers, which is a 5x drop compared to
non-contended case. Good kernel implementation in kernel helps maintain decent
performance here.

Note, that in the intended real-world scenarios, it's not expected to get even
close to such a high levels of contention. But if contention will become
a problem, there is always an option of sharding few ring buffers across a set
of CPUs.

Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200529075424.3139988-5-andriin@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/Makefile               |   5 +-
 tools/testing/selftests/bpf/bench.c                |  16 +
 .../testing/selftests/bpf/benchs/bench_ringbufs.c  | 566 +++++++++++++++++++++
 .../selftests/bpf/benchs/run_bench_ringbufs.sh     |  75 +++
 tools/testing/selftests/bpf/progs/perfbuf_bench.c  |  33 ++
 tools/testing/selftests/bpf/progs/ringbuf_bench.c  |  60 +++
 6 files changed, 754 insertions(+), 1 deletion(-)
 create mode 100644 tools/testing/selftests/bpf/benchs/bench_ringbufs.c
 create mode 100755 tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
 create mode 100644 tools/testing/selftests/bpf/progs/perfbuf_bench.c
 create mode 100644 tools/testing/selftests/bpf/progs/ringbuf_bench.c

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index e716e931d0c9..3ce548eff8a8 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -413,12 +413,15 @@ $(OUTPUT)/bench_%.o: benchs/bench_%.c bench.h
 	$(CC) $(CFLAGS) -c $(filter %.c,$^) $(LDLIBS) -o $@
 $(OUTPUT)/bench_rename.o: $(OUTPUT)/test_overhead.skel.h
 $(OUTPUT)/bench_trigger.o: $(OUTPUT)/trigger_bench.skel.h
+$(OUTPUT)/bench_ringbufs.o: $(OUTPUT)/ringbuf_bench.skel.h \
+			    $(OUTPUT)/perfbuf_bench.skel.h
 $(OUTPUT)/bench.o: bench.h testing_helpers.h
 $(OUTPUT)/bench: LDLIBS += -lm
 $(OUTPUT)/bench: $(OUTPUT)/bench.o $(OUTPUT)/testing_helpers.o \
 		 $(OUTPUT)/bench_count.o \
 		 $(OUTPUT)/bench_rename.o \
-		 $(OUTPUT)/bench_trigger.o
+		 $(OUTPUT)/bench_trigger.o \
+		 $(OUTPUT)/bench_ringbufs.o
 	$(call msg,BINARY,,$@)
 	$(CC) $(LDFLAGS) -o $@ $(filter %.a %.o,$^) $(LDLIBS)
 
diff --git a/tools/testing/selftests/bpf/bench.c b/tools/testing/selftests/bpf/bench.c
index 14390689ef90..944ad4721c83 100644
--- a/tools/testing/selftests/bpf/bench.c
+++ b/tools/testing/selftests/bpf/bench.c
@@ -130,6 +130,13 @@ static const struct argp_option opts[] = {
 	{},
 };
 
+extern struct argp bench_ringbufs_argp;
+
+static const struct argp_child bench_parsers[] = {
+	{ &bench_ringbufs_argp, 0, "Ring buffers benchmark", 0 },
+	{},
+};
+
 static error_t parse_arg(int key, char *arg, struct argp_state *state)
 {
 	static int pos_args;
@@ -208,6 +215,7 @@ static void parse_cmdline_args(int argc, char **argv)
 		.options = opts,
 		.parser = parse_arg,
 		.doc = argp_program_doc,
+		.children = bench_parsers,
 	};
 	if (argp_parse(&argp, argc, argv, 0, NULL, NULL))
 		exit(1);
@@ -310,6 +318,10 @@ extern const struct bench bench_trig_rawtp;
 extern const struct bench bench_trig_kprobe;
 extern const struct bench bench_trig_fentry;
 extern const struct bench bench_trig_fmodret;
+extern const struct bench bench_rb_libbpf;
+extern const struct bench bench_rb_custom;
+extern const struct bench bench_pb_libbpf;
+extern const struct bench bench_pb_custom;
 
 static const struct bench *benchs[] = {
 	&bench_count_global,
@@ -327,6 +339,10 @@ static const struct bench *benchs[] = {
 	&bench_trig_kprobe,
 	&bench_trig_fentry,
 	&bench_trig_fmodret,
+	&bench_rb_libbpf,
+	&bench_rb_custom,
+	&bench_pb_libbpf,
+	&bench_pb_custom,
 };
 
 static void setup_benchmark()
diff --git a/tools/testing/selftests/bpf/benchs/bench_ringbufs.c b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
new file mode 100644
index 000000000000..da87c7f31891
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/bench_ringbufs.c
@@ -0,0 +1,566 @@
+// SPDX-License-Identifier: GPL-2.0
+/* Copyright (c) 2020 Facebook */
+#include <asm/barrier.h>
+#include <linux/perf_event.h>
+#include <linux/ring_buffer.h>
+#include <sys/epoll.h>
+#include <sys/mman.h>
+#include <argp.h>
+#include <stdlib.h>
+#include "bench.h"
+#include "ringbuf_bench.skel.h"
+#include "perfbuf_bench.skel.h"
+
+static struct {
+	bool back2back;
+	int batch_cnt;
+	bool sampled;
+	int sample_rate;
+	int ringbuf_sz; /* per-ringbuf, in bytes */
+	bool ringbuf_use_output; /* use slower output API */
+	int perfbuf_sz; /* per-CPU size, in pages */
+} args = {
+	.back2back = false,
+	.batch_cnt = 500,
+	.sampled = false,
+	.sample_rate = 500,
+	.ringbuf_sz = 512 * 1024,
+	.ringbuf_use_output = false,
+	.perfbuf_sz = 128,
+};
+
+enum {
+	ARG_RB_BACK2BACK = 2000,
+	ARG_RB_USE_OUTPUT = 2001,
+	ARG_RB_BATCH_CNT = 2002,
+	ARG_RB_SAMPLED = 2003,
+	ARG_RB_SAMPLE_RATE = 2004,
+};
+
+static const struct argp_option opts[] = {
+	{ "rb-b2b", ARG_RB_BACK2BACK, NULL, 0, "Back-to-back mode"},
+	{ "rb-use-output", ARG_RB_USE_OUTPUT, NULL, 0, "Use bpf_ringbuf_output() instead of bpf_ringbuf_reserve()"},
+	{ "rb-batch-cnt", ARG_RB_BATCH_CNT, "CNT", 0, "Set BPF-side record batch count"},
+	{ "rb-sampled", ARG_RB_SAMPLED, NULL, 0, "Notification sampling"},
+	{ "rb-sample-rate", ARG_RB_SAMPLE_RATE, "RATE", 0, "Notification sample rate"},
+	{},
+};
+
+static error_t parse_arg(int key, char *arg, struct argp_state *state)
+{
+	switch (key) {
+	case ARG_RB_BACK2BACK:
+		args.back2back = true;
+		break;
+	case ARG_RB_USE_OUTPUT:
+		args.ringbuf_use_output = true;
+		break;
+	case ARG_RB_BATCH_CNT:
+		args.batch_cnt = strtol(arg, NULL, 10);
+		if (args.batch_cnt < 0) {
+			fprintf(stderr, "Invalid batch count.");
+			argp_usage(state);
+		}
+		break;
+	case ARG_RB_SAMPLED:
+		args.sampled = true;
+		break;
+	case ARG_RB_SAMPLE_RATE:
+		args.sample_rate = strtol(arg, NULL, 10);
+		if (args.sample_rate < 0) {
+			fprintf(stderr, "Invalid perfbuf sample rate.");
+			argp_usage(state);
+		}
+		break;
+	default:
+		return ARGP_ERR_UNKNOWN;
+	}
+	return 0;
+}
+
+/* exported into benchmark runner */
+const struct argp bench_ringbufs_argp = {
+	.options = opts,
+	.parser = parse_arg,
+};
+
+/* RINGBUF-LIBBPF benchmark */
+
+static struct counter buf_hits;
+
+static inline void bufs_trigger_batch()
+{
+	(void)syscall(__NR_getpgid);
+}
+
+static void bufs_validate()
+{
+	if (env.consumer_cnt != 1) {
+		fprintf(stderr, "rb-libbpf benchmark doesn't support multi-consumer!\n");
+		exit(1);
+	}
+
+	if (args.back2back && env.producer_cnt > 1) {
+		fprintf(stderr, "back-to-back mode makes sense only for single-producer case!\n");
+		exit(1);
+	}
+}
+
+static void *bufs_sample_producer(void *input)
+{
+	if (args.back2back) {
+		/* initial batch to get everything started */
+		bufs_trigger_batch();
+		return NULL;
+	}
+
+	while (true)
+		bufs_trigger_batch();
+	return NULL;
+}
+
+static struct ringbuf_libbpf_ctx {
+	struct ringbuf_bench *skel;
+	struct ring_buffer *ringbuf;
+} ringbuf_libbpf_ctx;
+
+static void ringbuf_libbpf_measure(struct bench_res *res)
+{
+	struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
+
+	res->hits = atomic_swap(&buf_hits.value, 0);
+	res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
+}
+
+static struct ringbuf_bench *ringbuf_setup_skeleton()
+{
+	struct ringbuf_bench *skel;
+
+	setup_libbpf();
+
+	skel = ringbuf_bench__open();
+	if (!skel) {
+		fprintf(stderr, "failed to open skeleton\n");
+		exit(1);
+	}
+
+	skel->rodata->batch_cnt = args.batch_cnt;
+	skel->rodata->use_output = args.ringbuf_use_output ? 1 : 0;
+
+	if (args.sampled)
+		/* record data + header take 16 bytes */
+		skel->rodata->wakeup_data_size = args.sample_rate * 16;
+
+	bpf_map__resize(skel->maps.ringbuf, args.ringbuf_sz);
+
+	if (ringbuf_bench__load(skel)) {
+		fprintf(stderr, "failed to load skeleton\n");
+		exit(1);
+	}
+
+	return skel;
+}
+
+static int buf_process_sample(void *ctx, void *data, size_t len)
+{
+	atomic_inc(&buf_hits.value);
+	return 0;
+}
+
+static void ringbuf_libbpf_setup()
+{
+	struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
+	struct bpf_link *link;
+
+	ctx->skel = ringbuf_setup_skeleton();
+	ctx->ringbuf = ring_buffer__new(bpf_map__fd(ctx->skel->maps.ringbuf),
+					buf_process_sample, NULL, NULL);
+	if (!ctx->ringbuf) {
+		fprintf(stderr, "failed to create ringbuf\n");
+		exit(1);
+	}
+
+	link = bpf_program__attach(ctx->skel->progs.bench_ringbuf);
+	if (IS_ERR(link)) {
+		fprintf(stderr, "failed to attach program!\n");
+		exit(1);
+	}
+}
+
+static void *ringbuf_libbpf_consumer(void *input)
+{
+	struct ringbuf_libbpf_ctx *ctx = &ringbuf_libbpf_ctx;
+
+	while (ring_buffer__poll(ctx->ringbuf, -1) >= 0) {
+		if (args.back2back)
+			bufs_trigger_batch();
+	}
+	fprintf(stderr, "ringbuf polling failed!\n");
+	return NULL;
+}
+
+/* RINGBUF-CUSTOM benchmark */
+struct ringbuf_custom {
+	__u64 *consumer_pos;
+	__u64 *producer_pos;
+	__u64 mask;
+	void *data;
+	int map_fd;
+};
+
+static struct ringbuf_custom_ctx {
+	struct ringbuf_bench *skel;
+	struct ringbuf_custom ringbuf;
+	int epoll_fd;
+	struct epoll_event event;
+} ringbuf_custom_ctx;
+
+static void ringbuf_custom_measure(struct bench_res *res)
+{
+	struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx;
+
+	res->hits = atomic_swap(&buf_hits.value, 0);
+	res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
+}
+
+static void ringbuf_custom_setup()
+{
+	struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx;
+	const size_t page_size = getpagesize();
+	struct bpf_link *link;
+	struct ringbuf_custom *r;
+	void *tmp;
+	int err;
+
+	ctx->skel = ringbuf_setup_skeleton();
+
+	ctx->epoll_fd = epoll_create1(EPOLL_CLOEXEC);
+	if (ctx->epoll_fd < 0) {
+		fprintf(stderr, "failed to create epoll fd: %d\n", -errno);
+		exit(1);
+	}
+
+	r = &ctx->ringbuf;
+	r->map_fd = bpf_map__fd(ctx->skel->maps.ringbuf);
+	r->mask = args.ringbuf_sz - 1;
+
+	/* Map writable consumer page */
+	tmp = mmap(NULL, page_size, PROT_READ | PROT_WRITE, MAP_SHARED,
+		   r->map_fd, 0);
+	if (tmp == MAP_FAILED) {
+		fprintf(stderr, "failed to mmap consumer page: %d\n", -errno);
+		exit(1);
+	}
+	r->consumer_pos = tmp;
+
+	/* Map read-only producer page and data pages. */
+	tmp = mmap(NULL, page_size + 2 * args.ringbuf_sz, PROT_READ, MAP_SHARED,
+		   r->map_fd, page_size);
+	if (tmp == MAP_FAILED) {
+		fprintf(stderr, "failed to mmap data pages: %d\n", -errno);
+		exit(1);
+	}
+	r->producer_pos = tmp;
+	r->data = tmp + page_size;
+
+	ctx->event.events = EPOLLIN;
+	err = epoll_ctl(ctx->epoll_fd, EPOLL_CTL_ADD, r->map_fd, &ctx->event);
+	if (err < 0) {
+		fprintf(stderr, "failed to epoll add ringbuf: %d\n", -errno);
+		exit(1);
+	}
+
+	link = bpf_program__attach(ctx->skel->progs.bench_ringbuf);
+	if (IS_ERR(link)) {
+		fprintf(stderr, "failed to attach program\n");
+		exit(1);
+	}
+}
+
+#define RINGBUF_BUSY_BIT (1 << 31)
+#define RINGBUF_DISCARD_BIT (1 << 30)
+#define RINGBUF_META_LEN 8
+
+static inline int roundup_len(__u32 len)
+{
+	/* clear out top 2 bits */
+	len <<= 2;
+	len >>= 2;
+	/* add length prefix */
+	len += RINGBUF_META_LEN;
+	/* round up to 8 byte alignment */
+	return (len + 7) / 8 * 8;
+}
+
+static void ringbuf_custom_process_ring(struct ringbuf_custom *r)
+{
+	unsigned long cons_pos, prod_pos;
+	int *len_ptr, len;
+	bool got_new_data;
+
+	cons_pos = smp_load_acquire(r->consumer_pos);
+	while (true) {
+		got_new_data = false;
+		prod_pos = smp_load_acquire(r->producer_pos);
+		while (cons_pos < prod_pos) {
+			len_ptr = r->data + (cons_pos & r->mask);
+			len = smp_load_acquire(len_ptr);
+
+			/* sample not committed yet, bail out for now */
+			if (len & RINGBUF_BUSY_BIT)
+				return;
+
+			got_new_data = true;
+			cons_pos += roundup_len(len);
+
+			atomic_inc(&buf_hits.value);
+		}
+		if (got_new_data)
+			smp_store_release(r->consumer_pos, cons_pos);
+		else
+			break;
+	};
+}
+
+static void *ringbuf_custom_consumer(void *input)
+{
+	struct ringbuf_custom_ctx *ctx = &ringbuf_custom_ctx;
+	int cnt;
+
+	do {
+		if (args.back2back)
+			bufs_trigger_batch();
+		cnt = epoll_wait(ctx->epoll_fd, &ctx->event, 1, -1);
+		if (cnt > 0)
+			ringbuf_custom_process_ring(&ctx->ringbuf);
+	} while (cnt >= 0);
+	fprintf(stderr, "ringbuf polling failed!\n");
+	return 0;
+}
+
+/* PERFBUF-LIBBPF benchmark */
+static struct perfbuf_libbpf_ctx {
+	struct perfbuf_bench *skel;
+	struct perf_buffer *perfbuf;
+} perfbuf_libbpf_ctx;
+
+static void perfbuf_measure(struct bench_res *res)
+{
+	struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
+
+	res->hits = atomic_swap(&buf_hits.value, 0);
+	res->drops = atomic_swap(&ctx->skel->bss->dropped, 0);
+}
+
+static struct perfbuf_bench *perfbuf_setup_skeleton()
+{
+	struct perfbuf_bench *skel;
+
+	setup_libbpf();
+
+	skel = perfbuf_bench__open();
+	if (!skel) {
+		fprintf(stderr, "failed to open skeleton\n");
+		exit(1);
+	}
+
+	skel->rodata->batch_cnt = args.batch_cnt;
+
+	if (perfbuf_bench__load(skel)) {
+		fprintf(stderr, "failed to load skeleton\n");
+		exit(1);
+	}
+
+	return skel;
+}
+
+static enum bpf_perf_event_ret
+perfbuf_process_sample_raw(void *input_ctx, int cpu,
+			   struct perf_event_header *e)
+{
+	switch (e->type) {
+	case PERF_RECORD_SAMPLE:
+		atomic_inc(&buf_hits.value);
+		break;
+	case PERF_RECORD_LOST:
+		break;
+	default:
+		return LIBBPF_PERF_EVENT_ERROR;
+	}
+	return LIBBPF_PERF_EVENT_CONT;
+}
+
+static void perfbuf_libbpf_setup()
+{
+	struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
+	struct perf_event_attr attr;
+	struct perf_buffer_raw_opts pb_opts = {
+		.event_cb = perfbuf_process_sample_raw,
+		.ctx = (void *)(long)0,
+		.attr = &attr,
+	};
+	struct bpf_link *link;
+
+	ctx->skel = perfbuf_setup_skeleton();
+
+	memset(&attr, 0, sizeof(attr));
+	attr.config = PERF_COUNT_SW_BPF_OUTPUT,
+	attr.type = PERF_TYPE_SOFTWARE;
+	attr.sample_type = PERF_SAMPLE_RAW;
+	/* notify only every Nth sample */
+	if (args.sampled) {
+		attr.sample_period = args.sample_rate;
+		attr.wakeup_events = args.sample_rate;
+	} else {
+		attr.sample_period = 1;
+		attr.wakeup_events = 1;
+	}
+
+	if (args.sample_rate > args.batch_cnt) {
+		fprintf(stderr, "sample rate %d is too high for given batch count %d\n",
+			args.sample_rate, args.batch_cnt);
+		exit(1);
+	}
+
+	ctx->perfbuf = perf_buffer__new_raw(bpf_map__fd(ctx->skel->maps.perfbuf),
+					    args.perfbuf_sz, &pb_opts);
+	if (!ctx->perfbuf) {
+		fprintf(stderr, "failed to create perfbuf\n");
+		exit(1);
+	}
+
+	link = bpf_program__attach(ctx->skel->progs.bench_perfbuf);
+	if (IS_ERR(link)) {
+		fprintf(stderr, "failed to attach program\n");
+		exit(1);
+	}
+}
+
+static void *perfbuf_libbpf_consumer(void *input)
+{
+	struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
+
+	while (perf_buffer__poll(ctx->perfbuf, -1) >= 0) {
+		if (args.back2back)
+			bufs_trigger_batch();
+	}
+	fprintf(stderr, "perfbuf polling failed!\n");
+	return NULL;
+}
+
+/* PERFBUF-CUSTOM benchmark */
+
+/* copies of internal libbpf definitions */
+struct perf_cpu_buf {
+	struct perf_buffer *pb;
+	void *base; /* mmap()'ed memory */
+	void *buf; /* for reconstructing segmented data */
+	size_t buf_size;
+	int fd;
+	int cpu;
+	int map_key;
+};
+
+struct perf_buffer {
+	perf_buffer_event_fn event_cb;
+	perf_buffer_sample_fn sample_cb;
+	perf_buffer_lost_fn lost_cb;
+	void *ctx; /* passed into callbacks */
+
+	size_t page_size;
+	size_t mmap_size;
+	struct perf_cpu_buf **cpu_bufs;
+	struct epoll_event *events;
+	int cpu_cnt; /* number of allocated CPU buffers */
+	int epoll_fd; /* perf event FD */
+	int map_fd; /* BPF_MAP_TYPE_PERF_EVENT_ARRAY BPF map FD */
+};
+
+static void *perfbuf_custom_consumer(void *input)
+{
+	struct perfbuf_libbpf_ctx *ctx = &perfbuf_libbpf_ctx;
+	struct perf_buffer *pb = ctx->perfbuf;
+	struct perf_cpu_buf *cpu_buf;
+	struct perf_event_mmap_page *header;
+	size_t mmap_mask = pb->mmap_size - 1;
+	struct perf_event_header *ehdr;
+	__u64 data_head, data_tail;
+	size_t ehdr_size;
+	void *base;
+	int i, cnt;
+
+	while (true) {
+		if (args.back2back)
+			bufs_trigger_batch();
+		cnt = epoll_wait(pb->epoll_fd, pb->events, pb->cpu_cnt, -1);
+		if (cnt <= 0) {
+			fprintf(stderr, "perf epoll failed: %d\n", -errno);
+			exit(1);
+		}
+
+		for (i = 0; i < cnt; ++i) {
+			cpu_buf = pb->events[i].data.ptr;
+			header = cpu_buf->base;
+			base = ((void *)header) + pb->page_size;
+
+			data_head = ring_buffer_read_head(header);
+			data_tail = header->data_tail;
+			while (data_head != data_tail) {
+				ehdr = base + (data_tail & mmap_mask);
+				ehdr_size = ehdr->size;
+
+				if (ehdr->type == PERF_RECORD_SAMPLE)
+					atomic_inc(&buf_hits.value);
+
+				data_tail += ehdr_size;
+			}
+			ring_buffer_write_tail(header, data_tail);
+		}
+	}
+	return NULL;
+}
+
+const struct bench bench_rb_libbpf = {
+	.name = "rb-libbpf",
+	.validate = bufs_validate,
+	.setup = ringbuf_libbpf_setup,
+	.producer_thread = bufs_sample_producer,
+	.consumer_thread = ringbuf_libbpf_consumer,
+	.measure = ringbuf_libbpf_measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
+const struct bench bench_rb_custom = {
+	.name = "rb-custom",
+	.validate = bufs_validate,
+	.setup = ringbuf_custom_setup,
+	.producer_thread = bufs_sample_producer,
+	.consumer_thread = ringbuf_custom_consumer,
+	.measure = ringbuf_custom_measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
+const struct bench bench_pb_libbpf = {
+	.name = "pb-libbpf",
+	.validate = bufs_validate,
+	.setup = perfbuf_libbpf_setup,
+	.producer_thread = bufs_sample_producer,
+	.consumer_thread = perfbuf_libbpf_consumer,
+	.measure = perfbuf_measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
+const struct bench bench_pb_custom = {
+	.name = "pb-custom",
+	.validate = bufs_validate,
+	.setup = perfbuf_libbpf_setup,
+	.producer_thread = bufs_sample_producer,
+	.consumer_thread = perfbuf_custom_consumer,
+	.measure = perfbuf_measure,
+	.report_progress = hits_drops_report_progress,
+	.report_final = hits_drops_report_final,
+};
+
diff --git a/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
new file mode 100755
index 000000000000..af4aa04caba6
--- /dev/null
+++ b/tools/testing/selftests/bpf/benchs/run_bench_ringbufs.sh
@@ -0,0 +1,75 @@
+#!/bin/bash
+
+set -eufo pipefail
+
+RUN_BENCH="sudo ./bench -w3 -d10 -a"
+
+function hits()
+{
+	echo "$*" | sed -E "s/.*hits\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/"
+}
+
+function drops()
+{
+	echo "$*" | sed -E "s/.*drops\s+([0-9]+\.[0-9]+ ± [0-9]+\.[0-9]+M\/s).*/\1/"
+}
+
+function header()
+{
+	local len=${#1}
+
+	printf "\n%s\n" "$1"
+	for i in $(seq 1 $len); do printf '='; done
+	printf '\n'
+}
+
+function summarize()
+{
+	bench="$1"
+	summary=$(echo $2 | tail -n1)
+	printf "%-20s %s (drops %s)\n" "$bench" "$(hits $summary)" "$(drops $summary)"
+}
+
+header "Single-producer, parallel producer"
+for b in rb-libbpf rb-custom pb-libbpf pb-custom; do
+	summarize $b "$($RUN_BENCH $b)"
+done
+
+header "Single-producer, parallel producer, sampled notification"
+for b in rb-libbpf rb-custom pb-libbpf pb-custom; do
+	summarize $b "$($RUN_BENCH --rb-sampled $b)"
+done
+
+header "Single-producer, back-to-back mode"
+for b in rb-libbpf rb-custom pb-libbpf pb-custom; do
+	summarize $b "$($RUN_BENCH --rb-b2b $b)"
+	summarize $b-sampled "$($RUN_BENCH --rb-sampled --rb-b2b $b)"
+done
+
+header "Ringbuf back-to-back, effect of sample rate"
+for b in 1 5 10 25 50 100 250 500 1000 2000 3000; do
+	summarize "rb-sampled-$b" "$($RUN_BENCH --rb-b2b --rb-batch-cnt $b --rb-sampled --rb-sample-rate $b rb-custom)"
+done
+header "Perfbuf back-to-back, effect of sample rate"
+for b in 1 5 10 25 50 100 250 500 1000 2000 3000; do
+	summarize "pb-sampled-$b" "$($RUN_BENCH --rb-b2b --rb-batch-cnt $b --rb-sampled --rb-sample-rate $b pb-custom)"
+done
+
+header "Ringbuf back-to-back, reserve+commit vs output"
+summarize "reserve" "$($RUN_BENCH --rb-b2b                 rb-custom)"
+summarize "output"  "$($RUN_BENCH --rb-b2b --rb-use-output rb-custom)"
+
+header "Ringbuf sampled, reserve+commit vs output"
+summarize "reserve-sampled" "$($RUN_BENCH --rb-sampled                 rb-custom)"
+summarize "output-sampled"  "$($RUN_BENCH --rb-sampled --rb-use-output rb-custom)"
+
+header "Single-producer, consumer/producer competing on the same CPU, low batch count"
+for b in rb-libbpf rb-custom pb-libbpf pb-custom; do
+	summarize $b "$($RUN_BENCH --rb-batch-cnt 1 --rb-sample-rate 1 --prod-affinity 0 --cons-affinity 0 $b)"
+done
+
+header "Ringbuf, multi-producer contention"
+for b in 1 2 3 4 8 12 16 20 24 28 32 36 40 44 48 52; do
+	summarize "rb-libbpf nr_prod $b" "$($RUN_BENCH -p$b --rb-batch-cnt 50 rb-libbpf)"
+done
+
diff --git a/tools/testing/selftests/bpf/progs/perfbuf_bench.c b/tools/testing/selftests/bpf/progs/perfbuf_bench.c
new file mode 100644
index 000000000000..e5ab4836a641
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/perfbuf_bench.c
@@ -0,0 +1,33 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	__uint(type, BPF_MAP_TYPE_PERF_EVENT_ARRAY);
+	__uint(value_size, sizeof(int));
+	__uint(key_size, sizeof(int));
+} perfbuf SEC(".maps");
+
+const volatile int batch_cnt = 0;
+
+long sample_val = 42;
+long dropped __attribute__((aligned(128))) = 0;
+
+SEC("fentry/__x64_sys_getpgid")
+int bench_perfbuf(void *ctx)
+{
+	__u64 *sample;
+	int i;
+
+	for (i = 0; i < batch_cnt; i++) {
+		if (bpf_perf_event_output(ctx, &perfbuf, BPF_F_CURRENT_CPU,
+					  &sample_val, sizeof(sample_val)))
+			__sync_add_and_fetch(&dropped, 1);
+	}
+	return 0;
+}
diff --git a/tools/testing/selftests/bpf/progs/ringbuf_bench.c b/tools/testing/selftests/bpf/progs/ringbuf_bench.c
new file mode 100644
index 000000000000..123607d314d6
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/ringbuf_bench.c
@@ -0,0 +1,60 @@
+// SPDX-License-Identifier: GPL-2.0
+// Copyright (c) 2020 Facebook
+
+#include <linux/bpf.h>
+#include <stdint.h>
+#include <bpf/bpf_helpers.h>
+
+char _license[] SEC("license") = "GPL";
+
+struct {
+	__uint(type, BPF_MAP_TYPE_RINGBUF);
+} ringbuf SEC(".maps");
+
+const volatile int batch_cnt = 0;
+const volatile long use_output = 0;
+
+long sample_val = 42;
+long dropped __attribute__((aligned(128))) = 0;
+
+const volatile long wakeup_data_size = 0;
+
+static __always_inline long get_flags()
+{
+	long sz;
+
+	if (!wakeup_data_size)
+		return 0;
+
+	sz = bpf_ringbuf_query(&ringbuf, BPF_RB_AVAIL_DATA);
+	return sz >= wakeup_data_size ? BPF_RB_FORCE_WAKEUP : BPF_RB_NO_WAKEUP;
+}
+
+SEC("fentry/__x64_sys_getpgid")
+int bench_ringbuf(void *ctx)
+{
+	long *sample, flags;
+	int i;
+
+	if (!use_output) {
+		for (i = 0; i < batch_cnt; i++) {
+			sample = bpf_ringbuf_reserve(&ringbuf,
+					             sizeof(sample_val), 0);
+			if (!sample) {
+				__sync_add_and_fetch(&dropped, 1);
+			} else {
+				*sample = sample_val;
+				flags = get_flags();
+				bpf_ringbuf_submit(sample, flags);
+			}
+		}
+	} else {
+		for (i = 0; i < batch_cnt; i++) {
+			flags = get_flags();
+			if (bpf_ringbuf_output(&ringbuf, &sample_val,
+					       sizeof(sample_val), flags))
+				__sync_add_and_fetch(&dropped, 1);
+		}
+	}
+	return 0;
+}
-- 
cgit v1.2.3-59-g8ed1b


From 97abb2b396821f21c21cee2d537bb4e0a0eef31b Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Fri, 29 May 2020 00:54:24 -0700
Subject: docs/bpf: Add BPF ring buffer design notes

Add commit description from patch #1 as a stand-alone documentation under
Documentation/bpf, as it might be more convenient format, in long term
perspective.

Suggested-by: Stanislav Fomichev <sdf@google.com>
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200529075424.3139988-6-andriin@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 Documentation/bpf/ringbuf.rst | 209 ++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 209 insertions(+)
 create mode 100644 Documentation/bpf/ringbuf.rst

diff --git a/Documentation/bpf/ringbuf.rst b/Documentation/bpf/ringbuf.rst
new file mode 100644
index 000000000000..75f943f0009d
--- /dev/null
+++ b/Documentation/bpf/ringbuf.rst
@@ -0,0 +1,209 @@
+===============
+BPF ring buffer
+===============
+
+This document describes BPF ring buffer design, API, and implementation details.
+
+.. contents::
+    :local:
+    :depth: 2
+
+Motivation
+----------
+
+There are two distinctive motivators for this work, which are not satisfied by
+existing perf buffer, which prompted creation of a new ring buffer
+implementation.
+
+- more efficient memory utilization by sharing ring buffer across CPUs;
+- preserving ordering of events that happen sequentially in time, even across
+  multiple CPUs (e.g., fork/exec/exit events for a task).
+
+These two problems are independent, but perf buffer fails to satisfy both.
+Both are a result of a choice to have per-CPU perf ring buffer.  Both can be
+also solved by having an MPSC implementation of ring buffer. The ordering
+problem could technically be solved for perf buffer with some in-kernel
+counting, but given the first one requires an MPSC buffer, the same solution
+would solve the second problem automatically.
+
+Semantics and APIs
+------------------
+
+Single ring buffer is presented to BPF programs as an instance of BPF map of
+type ``BPF_MAP_TYPE_RINGBUF``. Two other alternatives considered, but
+ultimately rejected.
+
+One way would be to, similar to ``BPF_MAP_TYPE_PERF_EVENT_ARRAY``, make
+``BPF_MAP_TYPE_RINGBUF`` could represent an array of ring buffers, but not
+enforce "same CPU only" rule. This would be more familiar interface compatible
+with existing perf buffer use in BPF, but would fail if application needed more
+advanced logic to lookup ring buffer by arbitrary key.
+``BPF_MAP_TYPE_HASH_OF_MAPS`` addresses this with current approach.
+Additionally, given the performance of BPF ringbuf, many use cases would just
+opt into a simple single ring buffer shared among all CPUs, for which current
+approach would be an overkill.
+
+Another approach could introduce a new concept, alongside BPF map, to represent
+generic "container" object, which doesn't necessarily have key/value interface
+with lookup/update/delete operations. This approach would add a lot of extra
+infrastructure that has to be built for observability and verifier support. It
+would also add another concept that BPF developers would have to familiarize
+themselves with, new syntax in libbpf, etc. But then would really provide no
+additional benefits over the approach of using a map.  ``BPF_MAP_TYPE_RINGBUF``
+doesn't support lookup/update/delete operations, but so doesn't few other map
+types (e.g., queue and stack; array doesn't support delete, etc).
+
+The approach chosen has an advantage of re-using existing BPF map
+infrastructure (introspection APIs in kernel, libbpf support, etc), being
+familiar concept (no need to teach users a new type of object in BPF program),
+and utilizing existing tooling (bpftool). For common scenario of using a single
+ring buffer for all CPUs, it's as simple and straightforward, as would be with
+a dedicated "container" object. On the other hand, by being a map, it can be
+combined with ``ARRAY_OF_MAPS`` and ``HASH_OF_MAPS`` map-in-maps to implement
+a wide variety of topologies, from one ring buffer for each CPU (e.g., as
+a replacement for perf buffer use cases), to a complicated application
+hashing/sharding of ring buffers (e.g., having a small pool of ring buffers
+with hashed task's tgid being a look up key to preserve order, but reduce
+contention).
+
+Key and value sizes are enforced to be zero. ``max_entries`` is used to specify
+the size of ring buffer and has to be a power of 2 value.
+
+There are a bunch of similarities between perf buffer
+(``BPF_MAP_TYPE_PERF_EVENT_ARRAY``) and new BPF ring buffer semantics:
+
+- variable-length records;
+- if there is no more space left in ring buffer, reservation fails, no
+  blocking;
+- memory-mappable data area for user-space applications for ease of
+  consumption and high performance;
+- epoll notifications for new incoming data;
+- but still the ability to do busy polling for new data to achieve the
+  lowest latency, if necessary.
+
+BPF ringbuf provides two sets of APIs to BPF programs:
+
+- ``bpf_ringbuf_output()`` allows to *copy* data from one place to a ring
+  buffer, similarly to ``bpf_perf_event_output()``;
+- ``bpf_ringbuf_reserve()``/``bpf_ringbuf_commit()``/``bpf_ringbuf_discard()``
+  APIs split the whole process into two steps. First, a fixed amount of space
+  is reserved. If successful, a pointer to a data inside ring buffer data
+  area is returned, which BPF programs can use similarly to a data inside
+  array/hash maps. Once ready, this piece of memory is either committed or
+  discarded. Discard is similar to commit, but makes consumer ignore the
+  record.
+
+``bpf_ringbuf_output()`` has disadvantage of incurring extra memory copy,
+because record has to be prepared in some other place first. But it allows to
+submit records of the length that's not known to verifier beforehand. It also
+closely matches ``bpf_perf_event_output()``, so will simplify migration
+significantly.
+
+``bpf_ringbuf_reserve()`` avoids the extra copy of memory by providing a memory
+pointer directly to ring buffer memory. In a lot of cases records are larger
+than BPF stack space allows, so many programs have use extra per-CPU array as
+a temporary heap for preparing sample. bpf_ringbuf_reserve() avoid this needs
+completely. But in exchange, it only allows a known constant size of memory to
+be reserved, such that verifier can verify that BPF program can't access memory
+outside its reserved record space. bpf_ringbuf_output(), while slightly slower
+due to extra memory copy, covers some use cases that are not suitable for
+``bpf_ringbuf_reserve()``.
+
+The difference between commit and discard is very small. Discard just marks
+a record as discarded, and such records are supposed to be ignored by consumer
+code. Discard is useful for some advanced use-cases, such as ensuring
+all-or-nothing multi-record submission, or emulating temporary
+``malloc()``/``free()`` within single BPF program invocation.
+
+Each reserved record is tracked by verifier through existing
+reference-tracking logic, similar to socket ref-tracking. It is thus
+impossible to reserve a record, but forget to submit (or discard) it.
+
+``bpf_ringbuf_query()`` helper allows to query various properties of ring
+buffer.  Currently 4 are supported:
+
+- ``BPF_RB_AVAIL_DATA`` returns amount of unconsumed data in ring buffer;
+- ``BPF_RB_RING_SIZE`` returns the size of ring buffer;
+- ``BPF_RB_CONS_POS``/``BPF_RB_PROD_POS`` returns current logical possition
+  of consumer/producer, respectively.
+
+Returned values are momentarily snapshots of ring buffer state and could be
+off by the time helper returns, so this should be used only for
+debugging/reporting reasons or for implementing various heuristics, that take
+into account highly-changeable nature of some of those characteristics.
+
+One such heuristic might involve more fine-grained control over poll/epoll
+notifications about new data availability in ring buffer. Together with
+``BPF_RB_NO_WAKEUP``/``BPF_RB_FORCE_WAKEUP`` flags for output/commit/discard
+helpers, it allows BPF program a high degree of control and, e.g., more
+efficient batched notifications. Default self-balancing strategy, though,
+should be adequate for most applications and will work reliable and efficiently
+already.
+
+Design and Implementation
+-------------------------
+
+This reserve/commit schema allows a natural way for multiple producers, either
+on different CPUs or even on the same CPU/in the same BPF program, to reserve
+independent records and work with them without blocking other producers. This
+means that if BPF program was interruped by another BPF program sharing the
+same ring buffer, they will both get a record reserved (provided there is
+enough space left) and can work with it and submit it independently. This
+applies to NMI context as well, except that due to using a spinlock during
+reservation, in NMI context, ``bpf_ringbuf_reserve()`` might fail to get
+a lock, in which case reservation will fail even if ring buffer is not full.
+
+The ring buffer itself internally is implemented as a power-of-2 sized
+circular buffer, with two logical and ever-increasing counters (which might
+wrap around on 32-bit architectures, that's not a problem):
+
+- consumer counter shows up to which logical position consumer consumed the
+  data;
+- producer counter denotes amount of data reserved by all producers.
+
+Each time a record is reserved, producer that "owns" the record will
+successfully advance producer counter. At that point, data is still not yet
+ready to be consumed, though. Each record has 8 byte header, which contains the
+length of reserved record, as well as two extra bits: busy bit to denote that
+record is still being worked on, and discard bit, which might be set at commit
+time if record is discarded. In the latter case, consumer is supposed to skip
+the record and move on to the next one. Record header also encodes record's
+relative offset from the beginning of ring buffer data area (in pages). This
+allows ``bpf_ringbuf_commit()``/``bpf_ringbuf_discard()`` to accept only the
+pointer to the record itself, without requiring also the pointer to ring buffer
+itself. Ring buffer memory location will be restored from record metadata
+header. This significantly simplifies verifier, as well as improving API
+usability.
+
+Producer counter increments are serialized under spinlock, so there is
+a strict ordering between reservations. Commits, on the other hand, are
+completely lockless and independent. All records become available to consumer
+in the order of reservations, but only after all previous records where
+already committed. It is thus possible for slow producers to temporarily hold
+off submitted records, that were reserved later.
+
+Reservation/commit/consumer protocol is verified by litmus tests in
+Documentation/litmus_tests/bpf-rb/_.
+
+One interesting implementation bit, that significantly simplifies (and thus
+speeds up as well) implementation of both producers and consumers is how data
+area is mapped twice contiguously back-to-back in the virtual memory. This
+allows to not take any special measures for samples that have to wrap around
+at the end of the circular buffer data area, because the next page after the
+last data page would be first data page again, and thus the sample will still
+appear completely contiguous in virtual memory. See comment and a simple ASCII
+diagram showing this visually in ``bpf_ringbuf_area_alloc()``.
+
+Another feature that distinguishes BPF ringbuf from perf ring buffer is
+a self-pacing notifications of new data being availability.
+``bpf_ringbuf_commit()`` implementation will send a notification of new record
+being available after commit only if consumer has already caught up right up to
+the record being committed. If not, consumer still has to catch up and thus
+will see new data anyways without needing an extra poll notification.
+Benchmarks (see tools/testing/selftests/bpf/benchs/bench_ringbuf.c_) show that
+this allows to achieve a very high throughput without having to resort to
+tricks like "notify only every Nth sample", which are necessary with perf
+buffer. For extreme cases, when BPF program wants more manual control of
+notifications, commit/discard/output helpers accept ``BPF_RB_NO_WAKEUP`` and
+``BPF_RB_FORCE_WAKEUP`` flags, which give full control over notifications of
+data availability, but require extra caution and diligence in using this API.
-- 
cgit v1.2.3-59-g8ed1b


From 43dd115b1fffdd8d2c4cc15659c00b2a1addbc43 Mon Sep 17 00:00:00 2001
From: Anton Protopopov <a.s.protopopov@gmail.com>
Date: Wed, 27 May 2020 18:57:00 +0000
Subject: selftests/bpf: Add tests for write-only stacks/queues

For write-only stacks and queues bpf_map_update_elem should be allowed, but
bpf_map_lookup_elem and bpf_map_lookup_and_delete_elem should fail with EPERM.

Signed-off-by: Anton Protopopov <a.s.protopopov@gmail.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200527185700.14658-6-a.s.protopopov@gmail.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/test_maps.c | 40 ++++++++++++++++++++++++++++++++-
 1 file changed, 39 insertions(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/test_maps.c b/tools/testing/selftests/bpf/test_maps.c
index 08d63948514a..6a12a0e01e07 100644
--- a/tools/testing/selftests/bpf/test_maps.c
+++ b/tools/testing/selftests/bpf/test_maps.c
@@ -1405,7 +1405,7 @@ static void test_map_rdonly(void)
 	close(fd);
 }
 
-static void test_map_wronly(void)
+static void test_map_wronly_hash(void)
 {
 	int fd, key = 0, value = 0;
 
@@ -1429,6 +1429,44 @@ static void test_map_wronly(void)
 	close(fd);
 }
 
+static void test_map_wronly_stack_or_queue(enum bpf_map_type map_type)
+{
+	int fd, value = 0;
+
+	assert(map_type == BPF_MAP_TYPE_QUEUE ||
+	       map_type == BPF_MAP_TYPE_STACK);
+	fd = bpf_create_map(map_type, 0, sizeof(value), MAP_SIZE,
+			    map_flags | BPF_F_WRONLY);
+	/* Stack/Queue maps do not support BPF_F_NO_PREALLOC */
+	if (map_flags & BPF_F_NO_PREALLOC) {
+		assert(fd < 0 && errno == EINVAL);
+		return;
+	}
+	if (fd < 0) {
+		printf("Failed to create map '%s'!\n", strerror(errno));
+		exit(1);
+	}
+
+	value = 1234;
+	assert(bpf_map_update_elem(fd, NULL, &value, BPF_ANY) == 0);
+
+	/* Peek element should fail */
+	assert(bpf_map_lookup_elem(fd, NULL, &value) == -1 && errno == EPERM);
+
+	/* Pop element should fail */
+	assert(bpf_map_lookup_and_delete_elem(fd, NULL, &value) == -1 &&
+	       errno == EPERM);
+
+	close(fd);
+}
+
+static void test_map_wronly(void)
+{
+	test_map_wronly_hash();
+	test_map_wronly_stack_or_queue(BPF_MAP_TYPE_STACK);
+	test_map_wronly_stack_or_queue(BPF_MAP_TYPE_QUEUE);
+}
+
 static void prepare_reuseport_grp(int type, int map_fd, size_t map_elem_size,
 				  __s64 *fds64, __u64 *sk_cookies,
 				  unsigned int n)
-- 
cgit v1.2.3-59-g8ed1b


From c3c16f2ea6d20159903cf93afbb1155f3d8348d5 Mon Sep 17 00:00:00 2001
From: Amritha Nambiar <amritha.nambiar@intel.com>
Date: Tue, 26 May 2020 17:34:36 -0700
Subject: bpf: Add rx_queue_mapping to bpf_sock

Add "rx_queue_mapping" to bpf_sock. This gives read access for the
existing field (sk_rx_queue_mapping) of struct sock from bpf_sock.
Semantics for the bpf_sock rx_queue_mapping access are similar to
sk_rx_queue_get(), i.e the value NO_QUEUE_MAPPING is not allowed
and -1 is returned in that case. This is useful for transmit queue
selection based on the received queue index which is cached in the
socket in the receive path.

v3: Addressed review comments to add usecase in patch description,
    and fixed default value for rx_queue_mapping.
v2: fixed build error for CONFIG_XPS wrapping, reported by
    kbuild test robot <lkp@intel.com>

Signed-off-by: Amritha Nambiar <amritha.nambiar@intel.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/uapi/linux/bpf.h |  1 +
 net/core/filter.c        | 18 ++++++++++++++++++
 2 files changed, 19 insertions(+)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 974ca6e948e3..630432c5c292 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3612,6 +3612,7 @@ struct bpf_sock {
 	__u32 dst_ip4;
 	__u32 dst_ip6[4];
 	__u32 state;
+	__s32 rx_queue_mapping;
 };
 
 struct bpf_tcp_sock {
diff --git a/net/core/filter.c b/net/core/filter.c
index a6fc23447f12..0008b029d644 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -6849,6 +6849,7 @@ bool bpf_sock_is_valid_access(int off, int size, enum bpf_access_type type,
 	case offsetof(struct bpf_sock, protocol):
 	case offsetof(struct bpf_sock, dst_port):
 	case offsetof(struct bpf_sock, src_port):
+	case offsetof(struct bpf_sock, rx_queue_mapping):
 	case bpf_ctx_range(struct bpf_sock, src_ip4):
 	case bpf_ctx_range_till(struct bpf_sock, src_ip6[0], src_ip6[3]):
 	case bpf_ctx_range(struct bpf_sock, dst_ip4):
@@ -7897,6 +7898,23 @@ u32 bpf_sock_convert_ctx_access(enum bpf_access_type type,
 						    skc_state),
 				       target_size));
 		break;
+	case offsetof(struct bpf_sock, rx_queue_mapping):
+#ifdef CONFIG_XPS
+		*insn++ = BPF_LDX_MEM(
+			BPF_FIELD_SIZEOF(struct sock, sk_rx_queue_mapping),
+			si->dst_reg, si->src_reg,
+			bpf_target_off(struct sock, sk_rx_queue_mapping,
+				       sizeof_field(struct sock,
+						    sk_rx_queue_mapping),
+				       target_size));
+		*insn++ = BPF_JMP_IMM(BPF_JNE, si->dst_reg, NO_QUEUE_MAPPING,
+				      1);
+		*insn++ = BPF_MOV64_IMM(si->dst_reg, -1);
+#else
+		*insn++ = BPF_MOV64_IMM(si->dst_reg, -1);
+		*target_size = 2;
+#endif
+		break;
 	}
 
 	return insn - insn_buf;
-- 
cgit v1.2.3-59-g8ed1b


From 7f1c04269fe7b3293dea38ea65da4fd6614d6f80 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@kernel.org>
Date: Fri, 29 May 2020 16:07:12 -0600
Subject: devmap: Formalize map value as a named struct
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add 'struct bpf_devmap_val' to formalize the expected values that can
be passed in for a DEVMAP. Update devmap code to use the struct.

Signed-off-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/bpf/20200529220716.75383-2-dsahern@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/devmap.c | 45 ++++++++++++++++++++++++++++-----------------
 1 file changed, 28 insertions(+), 17 deletions(-)

diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index a51d9fb7a359..a1459de0914e 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -60,12 +60,18 @@ struct xdp_dev_bulk_queue {
 	unsigned int count;
 };
 
+/* DEVMAP values */
+struct bpf_devmap_val {
+	u32 ifindex;   /* device index */
+};
+
 struct bpf_dtab_netdev {
 	struct net_device *dev; /* must be first member, due to tracepoint */
 	struct hlist_node index_hlist;
 	struct bpf_dtab *dtab;
 	struct rcu_head rcu;
 	unsigned int idx;
+	struct bpf_devmap_val val;
 };
 
 struct bpf_dtab {
@@ -472,18 +478,15 @@ int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
 static void *dev_map_lookup_elem(struct bpf_map *map, void *key)
 {
 	struct bpf_dtab_netdev *obj = __dev_map_lookup_elem(map, *(u32 *)key);
-	struct net_device *dev = obj ? obj->dev : NULL;
 
-	return dev ? &dev->ifindex : NULL;
+	return obj ? &obj->val : NULL;
 }
 
 static void *dev_map_hash_lookup_elem(struct bpf_map *map, void *key)
 {
 	struct bpf_dtab_netdev *obj = __dev_map_hash_lookup_elem(map,
 								*(u32 *)key);
-	struct net_device *dev = obj ? obj->dev : NULL;
-
-	return dev ? &dev->ifindex : NULL;
+	return obj ? &obj->val : NULL;
 }
 
 static void __dev_map_entry_free(struct rcu_head *rcu)
@@ -541,7 +544,7 @@ static int dev_map_hash_delete_elem(struct bpf_map *map, void *key)
 
 static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net,
 						    struct bpf_dtab *dtab,
-						    u32 ifindex,
+						    struct bpf_devmap_val *val,
 						    unsigned int idx)
 {
 	struct bpf_dtab_netdev *dev;
@@ -551,16 +554,18 @@ static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net,
 	if (!dev)
 		return ERR_PTR(-ENOMEM);
 
-	dev->dev = dev_get_by_index(net, ifindex);
-	if (!dev->dev) {
-		kfree(dev);
-		return ERR_PTR(-EINVAL);
-	}
+	dev->dev = dev_get_by_index(net, val->ifindex);
+	if (!dev->dev)
+		goto err_out;
 
 	dev->idx = idx;
 	dev->dtab = dtab;
+	dev->val.ifindex = val->ifindex;
 
 	return dev;
+err_out:
+	kfree(dev);
+	return ERR_PTR(-EINVAL);
 }
 
 static int __dev_map_update_elem(struct net *net, struct bpf_map *map,
@@ -568,7 +573,7 @@ static int __dev_map_update_elem(struct net *net, struct bpf_map *map,
 {
 	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
 	struct bpf_dtab_netdev *dev, *old_dev;
-	u32 ifindex = *(u32 *)value;
+	struct bpf_devmap_val val = { };
 	u32 i = *(u32 *)key;
 
 	if (unlikely(map_flags > BPF_EXIST))
@@ -578,10 +583,13 @@ static int __dev_map_update_elem(struct net *net, struct bpf_map *map,
 	if (unlikely(map_flags == BPF_NOEXIST))
 		return -EEXIST;
 
-	if (!ifindex) {
+	/* already verified value_size <= sizeof val */
+	memcpy(&val, value, map->value_size);
+
+	if (!val.ifindex) {
 		dev = NULL;
 	} else {
-		dev = __dev_map_alloc_node(net, dtab, ifindex, i);
+		dev = __dev_map_alloc_node(net, dtab, &val, i);
 		if (IS_ERR(dev))
 			return PTR_ERR(dev);
 	}
@@ -609,12 +617,15 @@ static int __dev_map_hash_update_elem(struct net *net, struct bpf_map *map,
 {
 	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
 	struct bpf_dtab_netdev *dev, *old_dev;
-	u32 ifindex = *(u32 *)value;
+	struct bpf_devmap_val val = { };
 	u32 idx = *(u32 *)key;
 	unsigned long flags;
 	int err = -EEXIST;
 
-	if (unlikely(map_flags > BPF_EXIST || !ifindex))
+	/* already verified value_size <= sizeof val */
+	memcpy(&val, value, map->value_size);
+
+	if (unlikely(map_flags > BPF_EXIST || !val.ifindex))
 		return -EINVAL;
 
 	spin_lock_irqsave(&dtab->index_lock, flags);
@@ -623,7 +634,7 @@ static int __dev_map_hash_update_elem(struct net *net, struct bpf_map *map,
 	if (old_dev && (map_flags & BPF_NOEXIST))
 		goto out_err;
 
-	dev = __dev_map_alloc_node(net, dtab, ifindex, idx);
+	dev = __dev_map_alloc_node(net, dtab, &val, idx);
 	if (IS_ERR(dev)) {
 		err = PTR_ERR(dev);
 		goto out_err;
-- 
cgit v1.2.3-59-g8ed1b


From b36e62eb85215a60916f910070f6d494b4f3e73a Mon Sep 17 00:00:00 2001
From: Yonghong Song <yhs@fb.com>
Date: Thu, 28 May 2020 17:48:10 -0700
Subject: bpf: Use strncpy_from_unsafe_strict() in bpf_seq_printf() helper

In bpf_seq_printf() helper, when user specified a "%s" in the
format string, strncpy_from_unsafe() is used to read the actual string
to a buffer. The string could be a format string or a string in
the kernel data structure. It is really unlikely that the string
will reside in the user memory.

This is different from Commit b2a5212fb634 ("bpf: Restrict bpf_trace_printk()'s %s
usage and add %pks, %pus specifier") which still used
strncpy_from_unsafe() for "%s" to preserve the old behavior.

If in the future, bpf_seq_printf() indeed needs to read user
memory, we can implement "%pus" format string.

Based on discussion in [1], if the intent is to read kernel memory,
strncpy_from_unsafe_strict() should be used. So this patch
changed to use strncpy_from_unsafe_strict().

[1]: https://lore.kernel.org/bpf/20200521152301.2587579-1-hch@lst.de/T/

Fixes: 492e639f0c22 ("bpf: Add bpf_seq_printf and bpf_seq_write helpers")
Signed-off-by: Yonghong Song <yhs@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Cc: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/bpf/20200529004810.3352219-1-yhs@fb.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/trace/bpf_trace.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index 3767d34114c0..b6c24be5ff53 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -585,9 +585,9 @@ BPF_CALL_5(bpf_seq_printf, struct seq_file *, m, char *, fmt, u32, fmt_size,
 				goto out;
 			}
 
-			err = strncpy_from_unsafe(bufs->buf[memcpy_cnt],
-						  (void *) (long) args[fmt_cnt],
-						  MAX_SEQ_PRINTF_STR_LEN);
+			err = strncpy_from_unsafe_strict(bufs->buf[memcpy_cnt],
+							 (void *) (long) args[fmt_cnt],
+							 MAX_SEQ_PRINTF_STR_LEN);
 			if (err < 0)
 				bufs->buf[memcpy_cnt][0] = '\0';
 			params[fmt_cnt] = (u64)(long)bufs->buf[memcpy_cnt];
-- 
cgit v1.2.3-59-g8ed1b


From fbee97feed9b3e4acdf9590e1f6b4a2eefecfffe Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@kernel.org>
Date: Fri, 29 May 2020 16:07:13 -0600
Subject: bpf: Add support to attach bpf program to a devmap entry
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add BPF_XDP_DEVMAP attach type for use with programs associated with a
DEVMAP entry.

Allow DEVMAPs to associate a program with a device entry by adding
a bpf_prog.fd to 'struct bpf_devmap_val'. Values read show the program
id, so the fd and id are a union. bpf programs can get access to the
struct via vmlinux.h.

The program associated with the fd must have type XDP with expected
attach type BPF_XDP_DEVMAP. When a program is associated with a device
index, the program is run on an XDP_REDIRECT and before the buffer is
added to the per-cpu queue. At this point rxq data is still valid; the
next patch adds tx device information allowing the prorgam to see both
ingress and egress device indices.

XDP generic is skb based and XDP programs do not work with skb's. Block
the use case by walking maps used by a program that is to be attached
via xdpgeneric and fail if any of them are DEVMAP / DEVMAP_HASH with

Block attach of BPF_XDP_DEVMAP programs to devices.

Signed-off-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/bpf/20200529220716.75383-3-dsahern@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/bpf.h            |  5 +++
 include/uapi/linux/bpf.h       |  1 +
 kernel/bpf/devmap.c            | 88 ++++++++++++++++++++++++++++++++++++++++--
 net/core/dev.c                 | 18 +++++++++
 tools/include/uapi/linux/bpf.h |  1 +
 5 files changed, 109 insertions(+), 4 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e5884f7f801c..e042311f991f 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1250,6 +1250,7 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
 		    struct net_device *dev_rx);
 int dev_map_generic_redirect(struct bpf_dtab_netdev *dst, struct sk_buff *skb,
 			     struct bpf_prog *xdp_prog);
+bool dev_map_can_have_prog(struct bpf_map *map);
 
 struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key);
 void __cpu_map_flush(void);
@@ -1363,6 +1364,10 @@ static inline struct net_device  *__dev_map_hash_lookup_elem(struct bpf_map *map
 {
 	return NULL;
 }
+static inline bool dev_map_can_have_prog(struct bpf_map *map)
+{
+	return false;
+}
 
 static inline void __dev_flush(void)
 {
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 630432c5c292..f1e364d69007 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -225,6 +225,7 @@ enum bpf_attach_type {
 	BPF_CGROUP_INET6_GETPEERNAME,
 	BPF_CGROUP_INET4_GETSOCKNAME,
 	BPF_CGROUP_INET6_GETSOCKNAME,
+	BPF_XDP_DEVMAP,
 	__MAX_BPF_ATTACH_TYPE
 };
 
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index a1459de0914e..0089d56617ec 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -63,12 +63,17 @@ struct xdp_dev_bulk_queue {
 /* DEVMAP values */
 struct bpf_devmap_val {
 	u32 ifindex;   /* device index */
+	union {
+		int fd;  /* prog fd on map write */
+		u32 id;  /* prog id on map read */
+	} bpf_prog;
 };
 
 struct bpf_dtab_netdev {
 	struct net_device *dev; /* must be first member, due to tracepoint */
 	struct hlist_node index_hlist;
 	struct bpf_dtab *dtab;
+	struct bpf_prog *xdp_prog;
 	struct rcu_head rcu;
 	unsigned int idx;
 	struct bpf_devmap_val val;
@@ -111,12 +116,18 @@ static inline struct hlist_head *dev_map_index_hash(struct bpf_dtab *dtab,
 
 static int dev_map_init_map(struct bpf_dtab *dtab, union bpf_attr *attr)
 {
+	u32 valsize = attr->value_size;
 	u64 cost = 0;
 	int err;
 
-	/* check sanity of attributes */
+	/* check sanity of attributes. 2 value sizes supported:
+	 * 4 bytes: ifindex
+	 * 8 bytes: ifindex + prog fd
+	 */
 	if (attr->max_entries == 0 || attr->key_size != 4 ||
-	    attr->value_size != 4 || attr->map_flags & ~DEV_CREATE_FLAG_MASK)
+	    (valsize != offsetofend(struct bpf_devmap_val, ifindex) &&
+	     valsize != offsetofend(struct bpf_devmap_val, bpf_prog.fd)) ||
+	    attr->map_flags & ~DEV_CREATE_FLAG_MASK)
 		return -EINVAL;
 
 	/* Lookup returns a pointer straight to dev->ifindex, so make sure the
@@ -223,6 +234,8 @@ static void dev_map_free(struct bpf_map *map)
 
 			hlist_for_each_entry_safe(dev, next, head, index_hlist) {
 				hlist_del_rcu(&dev->index_hlist);
+				if (dev->xdp_prog)
+					bpf_prog_put(dev->xdp_prog);
 				dev_put(dev->dev);
 				kfree(dev);
 			}
@@ -237,6 +250,8 @@ static void dev_map_free(struct bpf_map *map)
 			if (!dev)
 				continue;
 
+			if (dev->xdp_prog)
+				bpf_prog_put(dev->xdp_prog);
 			dev_put(dev->dev);
 			kfree(dev);
 		}
@@ -323,6 +338,16 @@ static int dev_map_hash_get_next_key(struct bpf_map *map, void *key,
 	return -ENOENT;
 }
 
+bool dev_map_can_have_prog(struct bpf_map *map)
+{
+	if ((map->map_type == BPF_MAP_TYPE_DEVMAP ||
+	     map->map_type == BPF_MAP_TYPE_DEVMAP_HASH) &&
+	    map->value_size != offsetofend(struct bpf_devmap_val, ifindex))
+		return true;
+
+	return false;
+}
+
 static int bq_xmit_all(struct xdp_dev_bulk_queue *bq, u32 flags)
 {
 	struct net_device *dev = bq->dev;
@@ -447,6 +472,30 @@ static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
 	return bq_enqueue(dev, xdpf, dev_rx);
 }
 
+static struct xdp_buff *dev_map_run_prog(struct net_device *dev,
+					 struct xdp_buff *xdp,
+					 struct bpf_prog *xdp_prog)
+{
+	u32 act;
+
+	act = bpf_prog_run_xdp(xdp_prog, xdp);
+	switch (act) {
+	case XDP_PASS:
+		return xdp;
+	case XDP_DROP:
+		break;
+	default:
+		bpf_warn_invalid_xdp_action(act);
+		fallthrough;
+	case XDP_ABORTED:
+		trace_xdp_exception(dev, xdp_prog, act);
+		break;
+	}
+
+	xdp_return_buff(xdp);
+	return NULL;
+}
+
 int dev_xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
 		    struct net_device *dev_rx)
 {
@@ -458,6 +507,11 @@ int dev_map_enqueue(struct bpf_dtab_netdev *dst, struct xdp_buff *xdp,
 {
 	struct net_device *dev = dst->dev;
 
+	if (dst->xdp_prog) {
+		xdp = dev_map_run_prog(dev, xdp, dst->xdp_prog);
+		if (!xdp)
+			return 0;
+	}
 	return __xdp_enqueue(dev, xdp, dev_rx);
 }
 
@@ -494,6 +548,8 @@ static void __dev_map_entry_free(struct rcu_head *rcu)
 	struct bpf_dtab_netdev *dev;
 
 	dev = container_of(rcu, struct bpf_dtab_netdev, rcu);
+	if (dev->xdp_prog)
+		bpf_prog_put(dev->xdp_prog);
 	dev_put(dev->dev);
 	kfree(dev);
 }
@@ -547,6 +603,7 @@ static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net,
 						    struct bpf_devmap_val *val,
 						    unsigned int idx)
 {
+	struct bpf_prog *prog = NULL;
 	struct bpf_dtab_netdev *dev;
 
 	dev = kmalloc_node(sizeof(*dev), GFP_ATOMIC | __GFP_NOWARN,
@@ -558,11 +615,31 @@ static struct bpf_dtab_netdev *__dev_map_alloc_node(struct net *net,
 	if (!dev->dev)
 		goto err_out;
 
+	if (val->bpf_prog.fd >= 0) {
+		prog = bpf_prog_get_type_dev(val->bpf_prog.fd,
+					     BPF_PROG_TYPE_XDP, false);
+		if (IS_ERR(prog))
+			goto err_put_dev;
+		if (prog->expected_attach_type != BPF_XDP_DEVMAP)
+			goto err_put_prog;
+	}
+
 	dev->idx = idx;
 	dev->dtab = dtab;
+	if (prog) {
+		dev->xdp_prog = prog;
+		dev->val.bpf_prog.id = prog->aux->id;
+	} else {
+		dev->xdp_prog = NULL;
+		dev->val.bpf_prog.id = 0;
+	}
 	dev->val.ifindex = val->ifindex;
 
 	return dev;
+err_put_prog:
+	bpf_prog_put(prog);
+err_put_dev:
+	dev_put(dev->dev);
 err_out:
 	kfree(dev);
 	return ERR_PTR(-EINVAL);
@@ -572,8 +649,8 @@ static int __dev_map_update_elem(struct net *net, struct bpf_map *map,
 				 void *key, void *value, u64 map_flags)
 {
 	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+	struct bpf_devmap_val val = { .bpf_prog.fd = -1 };
 	struct bpf_dtab_netdev *dev, *old_dev;
-	struct bpf_devmap_val val = { };
 	u32 i = *(u32 *)key;
 
 	if (unlikely(map_flags > BPF_EXIST))
@@ -588,6 +665,9 @@ static int __dev_map_update_elem(struct net *net, struct bpf_map *map,
 
 	if (!val.ifindex) {
 		dev = NULL;
+		/* can not specify fd if ifindex is 0 */
+		if (val.bpf_prog.fd != -1)
+			return -EINVAL;
 	} else {
 		dev = __dev_map_alloc_node(net, dtab, &val, i);
 		if (IS_ERR(dev))
@@ -616,8 +696,8 @@ static int __dev_map_hash_update_elem(struct net *net, struct bpf_map *map,
 				     void *key, void *value, u64 map_flags)
 {
 	struct bpf_dtab *dtab = container_of(map, struct bpf_dtab, map);
+	struct bpf_devmap_val val = { .bpf_prog.fd = -1 };
 	struct bpf_dtab_netdev *dev, *old_dev;
-	struct bpf_devmap_val val = { };
 	u32 idx = *(u32 *)key;
 	unsigned long flags;
 	int err = -EEXIST;
diff --git a/net/core/dev.c b/net/core/dev.c
index ae37586f6ee8..10684833f864 100644
--- a/net/core/dev.c
+++ b/net/core/dev.c
@@ -5420,6 +5420,18 @@ static int generic_xdp_install(struct net_device *dev, struct netdev_bpf *xdp)
 	struct bpf_prog *new = xdp->prog;
 	int ret = 0;
 
+	if (new) {
+		u32 i;
+
+		/* generic XDP does not work with DEVMAPs that can
+		 * have a bpf_prog installed on an entry
+		 */
+		for (i = 0; i < new->aux->used_map_cnt; i++) {
+			if (dev_map_can_have_prog(new->aux->used_maps[i]))
+				return -EINVAL;
+		}
+	}
+
 	switch (xdp->command) {
 	case XDP_SETUP_PROG:
 		rcu_assign_pointer(dev->xdp_prog, new);
@@ -8835,6 +8847,12 @@ int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack,
 			return -EINVAL;
 		}
 
+		if (prog->expected_attach_type == BPF_XDP_DEVMAP) {
+			NL_SET_ERR_MSG(extack, "BPF_XDP_DEVMAP programs can not be attached to a device");
+			bpf_prog_put(prog);
+			return -EINVAL;
+		}
+
 		/* prog->aux->id may be 0 for orphaned device-bound progs */
 		if (prog->aux->id && prog->aux->id == prog_id) {
 			bpf_prog_put(prog);
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 974ca6e948e3..65d7717bce2f 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -225,6 +225,7 @@ enum bpf_attach_type {
 	BPF_CGROUP_INET6_GETPEERNAME,
 	BPF_CGROUP_INET4_GETSOCKNAME,
 	BPF_CGROUP_INET6_GETSOCKNAME,
+	BPF_XDP_DEVMAP,
 	__MAX_BPF_ATTACH_TYPE
 };
 
-- 
cgit v1.2.3-59-g8ed1b


From 64b59025c15b244c0954cf52b24fbabfcf5ed8f6 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@kernel.org>
Date: Fri, 29 May 2020 16:07:14 -0600
Subject: xdp: Add xdp_txq_info to xdp_buff
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add xdp_txq_info as the Tx counterpart to xdp_rxq_info. At the
moment only the device is added. Other fields (queue_index)
can be added as use cases arise.

>From a UAPI perspective, add egress_ifindex to xdp context for
bpf programs to see the Tx device.

Update the verifier to only allow accesses to egress_ifindex by
XDP programs with BPF_XDP_DEVMAP expected attach type.

Signed-off-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/bpf/20200529220716.75383-4-dsahern@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/net/xdp.h              |  5 +++++
 include/uapi/linux/bpf.h       |  2 ++
 kernel/bpf/devmap.c            |  3 +++
 net/core/filter.c              | 17 +++++++++++++++++
 tools/include/uapi/linux/bpf.h |  2 ++
 5 files changed, 29 insertions(+)

diff --git a/include/net/xdp.h b/include/net/xdp.h
index 90f11760bd12..d54022959491 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -61,12 +61,17 @@ struct xdp_rxq_info {
 	struct xdp_mem_info mem;
 } ____cacheline_aligned; /* perf critical, avoid false-sharing */
 
+struct xdp_txq_info {
+	struct net_device *dev;
+};
+
 struct xdp_buff {
 	void *data;
 	void *data_end;
 	void *data_meta;
 	void *data_hard_start;
 	struct xdp_rxq_info *rxq;
+	struct xdp_txq_info *txq;
 	u32 frame_sz; /* frame size to deduce data_hard_end/reserved tailroom*/
 };
 
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f1e364d69007..f862a58fb567 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3707,6 +3707,8 @@ struct xdp_md {
 	/* Below access go through struct xdp_rxq_info */
 	__u32 ingress_ifindex; /* rxq->dev->ifindex */
 	__u32 rx_queue_index;  /* rxq->queue_index  */
+
+	__u32 egress_ifindex;  /* txq->dev->ifindex */
 };
 
 enum sk_action {
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index 0089d56617ec..c04fb1c72f5e 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -476,8 +476,11 @@ static struct xdp_buff *dev_map_run_prog(struct net_device *dev,
 					 struct xdp_buff *xdp,
 					 struct bpf_prog *xdp_prog)
 {
+	struct xdp_txq_info txq = { .dev = dev };
 	u32 act;
 
+	xdp->txq = &txq;
+
 	act = bpf_prog_run_xdp(xdp_prog, xdp);
 	switch (act) {
 	case XDP_PASS:
diff --git a/net/core/filter.c b/net/core/filter.c
index 0008b029d644..85ff827aab73 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -7015,6 +7015,13 @@ static bool xdp_is_valid_access(int off, int size,
 				const struct bpf_prog *prog,
 				struct bpf_insn_access_aux *info)
 {
+	if (prog->expected_attach_type != BPF_XDP_DEVMAP) {
+		switch (off) {
+		case offsetof(struct xdp_md, egress_ifindex):
+			return false;
+		}
+	}
+
 	if (type == BPF_WRITE) {
 		if (bpf_prog_is_dev_bound(prog->aux)) {
 			switch (off) {
@@ -7985,6 +7992,16 @@ static u32 xdp_convert_ctx_access(enum bpf_access_type type,
 				      offsetof(struct xdp_rxq_info,
 					       queue_index));
 		break;
+	case offsetof(struct xdp_md, egress_ifindex):
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_buff, txq),
+				      si->dst_reg, si->src_reg,
+				      offsetof(struct xdp_buff, txq));
+		*insn++ = BPF_LDX_MEM(BPF_FIELD_SIZEOF(struct xdp_txq_info, dev),
+				      si->dst_reg, si->dst_reg,
+				      offsetof(struct xdp_txq_info, dev));
+		*insn++ = BPF_LDX_MEM(BPF_W, si->dst_reg, si->dst_reg,
+				      offsetof(struct net_device, ifindex));
+		break;
 	}
 
 	return insn - insn_buf;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 65d7717bce2f..f74bc4a2385e 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3706,6 +3706,8 @@ struct xdp_md {
 	/* Below access go through struct xdp_rxq_info */
 	__u32 ingress_ifindex; /* rxq->dev->ifindex */
 	__u32 rx_queue_index;  /* rxq->queue_index  */
+
+	__u32 egress_ifindex;  /* txq->dev->ifindex */
 };
 
 enum sk_action {
-- 
cgit v1.2.3-59-g8ed1b


From 2778797037a658be71a6c55b54700bf58ba21eb7 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@kernel.org>
Date: Fri, 29 May 2020 16:07:15 -0600
Subject: libbpf: Add SEC name for xdp programs attached to device map
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Support SEC("xdp_devmap*") as a short cut for loading the program with
type BPF_PROG_TYPE_XDP and expected attach type BPF_XDP_DEVMAP.

Signed-off-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/bpf/20200529220716.75383-5-dsahern@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/lib/bpf/libbpf.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 74d967619dcf..85d4f1c5fc52 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -6657,6 +6657,8 @@ static const struct bpf_sec_def section_defs[] = {
 		.expected_attach_type = BPF_TRACE_ITER,
 		.is_attach_btf = true,
 		.attach_fn = attach_iter),
+	BPF_EAPROG_SEC("xdp_devmap",		BPF_PROG_TYPE_XDP,
+						BPF_XDP_DEVMAP),
 	BPF_PROG_SEC("xdp",			BPF_PROG_TYPE_XDP),
 	BPF_PROG_SEC("perf_event",		BPF_PROG_TYPE_PERF_EVENT),
 	BPF_PROG_SEC("lwt_in",			BPF_PROG_TYPE_LWT_IN),
-- 
cgit v1.2.3-59-g8ed1b


From ca2f5f21dbbd5e3a00cd3e97f728aa2ca0b2e011 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Fri, 29 May 2020 16:06:41 -0700
Subject: bpf: Refactor sockmap redirect code so its easy to reuse

We will need this block of code called from tls context shortly
lets refactor the redirect logic so its easy to use. This also
cleans up the switch stmt so we have fewer fallthrough cases.

No logic changes are intended.

Fixes: d829e9c4112b5 ("tls: convert to generic sk_msg interface")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Jakub Sitnicki <jakub@cloudflare.com>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/159079360110.5745.7024009076049029819.stgit@john-Precision-5820-Tower
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 net/core/skmsg.c | 55 ++++++++++++++++++++++++++++++++++---------------------
 1 file changed, 34 insertions(+), 21 deletions(-)

diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index c479372f2cd2..9d72f71e9b47 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -682,13 +682,43 @@ static struct sk_psock *sk_psock_from_strp(struct strparser *strp)
 	return container_of(parser, struct sk_psock, parser);
 }
 
-static void sk_psock_verdict_apply(struct sk_psock *psock,
-				   struct sk_buff *skb, int verdict)
+static void sk_psock_skb_redirect(struct sk_psock *psock, struct sk_buff *skb)
 {
 	struct sk_psock *psock_other;
 	struct sock *sk_other;
 	bool ingress;
 
+	sk_other = tcp_skb_bpf_redirect_fetch(skb);
+	if (unlikely(!sk_other)) {
+		kfree_skb(skb);
+		return;
+	}
+	psock_other = sk_psock(sk_other);
+	if (!psock_other || sock_flag(sk_other, SOCK_DEAD) ||
+	    !sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED)) {
+		kfree_skb(skb);
+		return;
+	}
+
+	ingress = tcp_skb_bpf_ingress(skb);
+	if ((!ingress && sock_writeable(sk_other)) ||
+	    (ingress &&
+	     atomic_read(&sk_other->sk_rmem_alloc) <=
+	     sk_other->sk_rcvbuf)) {
+		if (!ingress)
+			skb_set_owner_w(skb, sk_other);
+		skb_queue_tail(&psock_other->ingress_skb, skb);
+		schedule_work(&psock_other->work);
+	} else {
+		kfree_skb(skb);
+	}
+}
+
+static void sk_psock_verdict_apply(struct sk_psock *psock,
+				   struct sk_buff *skb, int verdict)
+{
+	struct sock *sk_other;
+
 	switch (verdict) {
 	case __SK_PASS:
 		sk_other = psock->sk;
@@ -707,25 +737,8 @@ static void sk_psock_verdict_apply(struct sk_psock *psock,
 		}
 		goto out_free;
 	case __SK_REDIRECT:
-		sk_other = tcp_skb_bpf_redirect_fetch(skb);
-		if (unlikely(!sk_other))
-			goto out_free;
-		psock_other = sk_psock(sk_other);
-		if (!psock_other || sock_flag(sk_other, SOCK_DEAD) ||
-		    !sk_psock_test_state(psock_other, SK_PSOCK_TX_ENABLED))
-			goto out_free;
-		ingress = tcp_skb_bpf_ingress(skb);
-		if ((!ingress && sock_writeable(sk_other)) ||
-		    (ingress &&
-		     atomic_read(&sk_other->sk_rmem_alloc) <=
-		     sk_other->sk_rcvbuf)) {
-			if (!ingress)
-				skb_set_owner_w(skb, sk_other);
-			skb_queue_tail(&psock_other->ingress_skb, skb);
-			schedule_work(&psock_other->work);
-			break;
-		}
-		/* fall-through */
+		sk_psock_skb_redirect(psock, skb);
+		break;
 	case __SK_DROP:
 		/* fall-through */
 	default:
-- 
cgit v1.2.3-59-g8ed1b


From d39aec79e5923bf984df991ffe51d4a2b7a9e746 Mon Sep 17 00:00:00 2001
From: David Ahern <dsahern@kernel.org>
Date: Fri, 29 May 2020 16:07:16 -0600
Subject: selftest: Add tests for XDP programs in devmap entries
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add tests to verify ability to add an XDP program to a
entry in a DEVMAP.

Add negative tests to show DEVMAP programs can not be
attached to devices as a normal XDP program, and accesses
to egress_ifindex require BPF_XDP_DEVMAP attach type.

Signed-off-by: David Ahern <dsahern@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Toke Høiland-Jørgensen <toke@redhat.com>
Link: https://lore.kernel.org/bpf/20200529220716.75383-6-dsahern@kernel.org
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/prog_tests/xdp_devmap_attach.c   | 97 ++++++++++++++++++++++
 .../selftests/bpf/progs/test_xdp_devmap_helpers.c  | 22 +++++
 .../bpf/progs/test_xdp_with_devmap_helpers.c       | 44 ++++++++++
 3 files changed, 163 insertions(+)
 create mode 100644 tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c
 create mode 100644 tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c

diff --git a/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
new file mode 100644
index 000000000000..d19dbd668f6a
--- /dev/null
+++ b/tools/testing/selftests/bpf/prog_tests/xdp_devmap_attach.c
@@ -0,0 +1,97 @@
+// SPDX-License-Identifier: GPL-2.0
+#include <uapi/linux/bpf.h>
+#include <linux/if_link.h>
+#include <test_progs.h>
+
+#include "test_xdp_devmap_helpers.skel.h"
+#include "test_xdp_with_devmap_helpers.skel.h"
+
+#define IFINDEX_LO 1
+
+struct bpf_devmap_val {
+	u32 ifindex;   /* device index */
+	union {
+		int fd;  /* prog fd on map write */
+		u32 id;  /* prog id on map read */
+	} bpf_prog;
+};
+
+void test_xdp_with_devmap_helpers(void)
+{
+	struct test_xdp_with_devmap_helpers *skel;
+	struct bpf_prog_info info = {};
+	struct bpf_devmap_val val = {
+		.ifindex = IFINDEX_LO,
+	};
+	__u32 len = sizeof(info);
+	__u32 duration = 0, idx = 0;
+	int err, dm_fd, map_fd;
+
+
+	skel = test_xdp_with_devmap_helpers__open_and_load();
+	if (CHECK_FAIL(!skel)) {
+		perror("test_xdp_with_devmap_helpers__open_and_load");
+		return;
+	}
+
+	/* can not attach program with DEVMAPs that allow programs
+	 * as xdp generic
+	 */
+	dm_fd = bpf_program__fd(skel->progs.xdp_redir_prog);
+	err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE);
+	CHECK(err == 0, "Generic attach of program with 8-byte devmap",
+	      "should have failed\n");
+
+	dm_fd = bpf_program__fd(skel->progs.xdp_dummy_dm);
+	map_fd = bpf_map__fd(skel->maps.dm_ports);
+	err = bpf_obj_get_info_by_fd(dm_fd, &info, &len);
+	if (CHECK_FAIL(err))
+		goto out_close;
+
+	val.bpf_prog.fd = dm_fd;
+	err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+	CHECK(err, "Add program to devmap entry",
+	      "err %d errno %d\n", err, errno);
+
+	err = bpf_map_lookup_elem(map_fd, &idx, &val);
+	CHECK(err, "Read devmap entry", "err %d errno %d\n", err, errno);
+	CHECK(info.id != val.bpf_prog.id, "Expected program id in devmap entry",
+	      "expected %u read %u\n", info.id, val.bpf_prog.id);
+
+	/* can not attach BPF_XDP_DEVMAP program to a device */
+	err = bpf_set_link_xdp_fd(IFINDEX_LO, dm_fd, XDP_FLAGS_SKB_MODE);
+	CHECK(err == 0, "Attach of BPF_XDP_DEVMAP program",
+	      "should have failed\n");
+
+	val.ifindex = 1;
+	val.bpf_prog.fd = bpf_program__fd(skel->progs.xdp_dummy_prog);
+	err = bpf_map_update_elem(map_fd, &idx, &val, 0);
+	CHECK(err == 0, "Add non-BPF_XDP_DEVMAP program to devmap entry",
+	      "should have failed\n");
+
+out_close:
+	test_xdp_with_devmap_helpers__destroy(skel);
+}
+
+void test_neg_xdp_devmap_helpers(void)
+{
+	struct test_xdp_devmap_helpers *skel;
+	__u32 duration = 0;
+
+	skel = test_xdp_devmap_helpers__open_and_load();
+	if (CHECK(skel,
+		  "Load of XDP program accessing egress ifindex without attach type",
+		  "should have failed\n")) {
+		test_xdp_devmap_helpers__destroy(skel);
+	}
+}
+
+
+void test_xdp_devmap_attach(void)
+{
+	if (test__start_subtest("DEVMAP with programs in entries"))
+		test_xdp_with_devmap_helpers();
+
+	if (test__start_subtest("Verifier check of DEVMAP programs"))
+		test_neg_xdp_devmap_helpers();
+}
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c
new file mode 100644
index 000000000000..e5c0f131c8a7
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_devmap_helpers.c
@@ -0,0 +1,22 @@
+// SPDX-License-Identifier: GPL-2.0
+/* fails to load without expected_attach_type = BPF_XDP_DEVMAP
+ * because of access to egress_ifindex
+ */
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+SEC("xdp_dm_log")
+int xdpdm_devlog(struct xdp_md *ctx)
+{
+	char fmt[] = "devmap redirect: dev %u -> dev %u len %u\n";
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data = (void *)(long)ctx->data;
+	unsigned int len = data_end - data;
+
+	bpf_trace_printk(fmt, sizeof(fmt),
+			 ctx->ingress_ifindex, ctx->egress_ifindex, len);
+
+	return XDP_PASS;
+}
+
+char _license[] SEC("license") = "GPL";
diff --git a/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
new file mode 100644
index 000000000000..deef0e050863
--- /dev/null
+++ b/tools/testing/selftests/bpf/progs/test_xdp_with_devmap_helpers.c
@@ -0,0 +1,44 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include "vmlinux.h"
+#include <bpf/bpf_helpers.h>
+
+struct {
+	__uint(type, BPF_MAP_TYPE_DEVMAP);
+	__uint(key_size, sizeof(__u32));
+	__uint(value_size, sizeof(struct bpf_devmap_val));
+	__uint(max_entries, 4);
+} dm_ports SEC(".maps");
+
+SEC("xdp_redir")
+int xdp_redir_prog(struct xdp_md *ctx)
+{
+	return bpf_redirect_map(&dm_ports, 1, 0);
+}
+
+/* invalid program on DEVMAP entry;
+ * SEC name means expected attach type not set
+ */
+SEC("xdp_dummy")
+int xdp_dummy_prog(struct xdp_md *ctx)
+{
+	return XDP_PASS;
+}
+
+/* valid program on DEVMAP entry via SEC name;
+ * has access to egress and ingress ifindex
+ */
+SEC("xdp_devmap")
+int xdp_dummy_dm(struct xdp_md *ctx)
+{
+	char fmt[] = "devmap redirect: dev %u -> dev %u len %u\n";
+	void *data_end = (void *)(long)ctx->data_end;
+	void *data = (void *)(long)ctx->data;
+	unsigned int len = data_end - data;
+
+	bpf_trace_printk(fmt, sizeof(fmt),
+			 ctx->ingress_ifindex, ctx->egress_ifindex, len);
+
+	return XDP_PASS;
+}
+char _license[] SEC("license") = "GPL";
-- 
cgit v1.2.3-59-g8ed1b


From e91de6afa81c10e9f855c5695eb9a53168d96b73 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Fri, 29 May 2020 16:06:59 -0700
Subject: bpf: Fix running sk_skb program types with ktls

KTLS uses a stream parser to collect TLS messages and send them to
the upper layer tls receive handler. This ensures the tls receiver
has a full TLS header to parse when it is run. However, when a
socket has BPF_SK_SKB_STREAM_VERDICT program attached before KTLS
is enabled we end up with two stream parsers running on the same
socket.

The result is both try to run on the same socket. First the KTLS
stream parser runs and calls read_sock() which will tcp_read_sock
which in turn calls tcp_rcv_skb(). This dequeues the skb from the
sk_receive_queue. When this is done KTLS code then data_ready()
callback which because we stacked KTLS on top of the bpf stream
verdict program has been replaced with sk_psock_start_strp(). This
will in turn kick the stream parser again and eventually do the
same thing KTLS did above calling into tcp_rcv_skb() and dequeuing
a skb from the sk_receive_queue.

At this point the data stream is broke. Part of the stream was
handled by the KTLS side some other bytes may have been handled
by the BPF side. Generally this results in either missing data
or more likely a "Bad Message" complaint from the kTLS receive
handler as the BPF program steals some bytes meant to be in a
TLS header and/or the TLS header length is no longer correct.

We've already broke the idealized model where we can stack ULPs
in any order with generic callbacks on the TX side to handle this.
So in this patch we do the same thing but for RX side. We add
a sk_psock_strp_enabled() helper so TLS can learn a BPF verdict
program is running and add a tls_sw_has_ctx_rx() helper so BPF
side can learn there is a TLS ULP on the socket.

Then on BPF side we omit calling our stream parser to avoid
breaking the data stream for the KTLS receiver. Then on the
KTLS side we call BPF_SK_SKB_STREAM_VERDICT once the KTLS
receiver is done with the packet but before it posts the
msg to userspace. This gives us symmetry between the TX and
RX halfs and IMO makes it usable again. On the TX side we
process packets in this order BPF -> TLS -> TCP and on
the receive side in the reverse order TCP -> TLS -> BPF.

Discovered while testing OpenSSL 3.0 Alpha2.0 release.

Fixes: d829e9c4112b5 ("tls: convert to generic sk_msg interface")
Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/159079361946.5745.605854335665044485.stgit@john-Precision-5820-Tower
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 include/linux/skmsg.h |  8 ++++++++
 include/net/tls.h     |  9 +++++++++
 net/core/skmsg.c      | 43 ++++++++++++++++++++++++++++++++++++++++---
 net/tls/tls_sw.c      | 20 ++++++++++++++++++--
 4 files changed, 75 insertions(+), 5 deletions(-)

diff --git a/include/linux/skmsg.h b/include/linux/skmsg.h
index ad31c9fb7158..08674cd14d5a 100644
--- a/include/linux/skmsg.h
+++ b/include/linux/skmsg.h
@@ -437,4 +437,12 @@ static inline void psock_progs_drop(struct sk_psock_progs *progs)
 	psock_set_prog(&progs->skb_verdict, NULL);
 }
 
+int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb);
+
+static inline bool sk_psock_strp_enabled(struct sk_psock *psock)
+{
+	if (!psock)
+		return false;
+	return psock->parser.enabled;
+}
 #endif /* _LINUX_SKMSG_H */
diff --git a/include/net/tls.h b/include/net/tls.h
index 3e7b44cae0d9..3212d3c214a9 100644
--- a/include/net/tls.h
+++ b/include/net/tls.h
@@ -571,6 +571,15 @@ static inline bool tls_sw_has_ctx_tx(const struct sock *sk)
 	return !!tls_sw_ctx_tx(ctx);
 }
 
+static inline bool tls_sw_has_ctx_rx(const struct sock *sk)
+{
+	struct tls_context *ctx = tls_get_ctx(sk);
+
+	if (!ctx)
+		return false;
+	return !!tls_sw_ctx_rx(ctx);
+}
+
 void tls_sw_write_space(struct sock *sk, struct tls_context *ctx);
 void tls_device_write_space(struct sock *sk, struct tls_context *ctx);
 
diff --git a/net/core/skmsg.c b/net/core/skmsg.c
index 9d72f71e9b47..351afbf6bfba 100644
--- a/net/core/skmsg.c
+++ b/net/core/skmsg.c
@@ -7,6 +7,7 @@
 
 #include <net/sock.h>
 #include <net/tcp.h>
+#include <net/tls.h>
 
 static bool sk_msg_try_coalesce_ok(struct sk_msg *msg, int elem_first_coalesce)
 {
@@ -714,6 +715,38 @@ static void sk_psock_skb_redirect(struct sk_psock *psock, struct sk_buff *skb)
 	}
 }
 
+static void sk_psock_tls_verdict_apply(struct sk_psock *psock,
+				       struct sk_buff *skb, int verdict)
+{
+	switch (verdict) {
+	case __SK_REDIRECT:
+		sk_psock_skb_redirect(psock, skb);
+		break;
+	case __SK_PASS:
+	case __SK_DROP:
+	default:
+		break;
+	}
+}
+
+int sk_psock_tls_strp_read(struct sk_psock *psock, struct sk_buff *skb)
+{
+	struct bpf_prog *prog;
+	int ret = __SK_PASS;
+
+	rcu_read_lock();
+	prog = READ_ONCE(psock->progs.skb_verdict);
+	if (likely(prog)) {
+		tcp_skb_bpf_redirect_clear(skb);
+		ret = sk_psock_bpf_run(psock, prog, skb);
+		ret = sk_psock_map_verd(ret, tcp_skb_bpf_redirect_fetch(skb));
+	}
+	rcu_read_unlock();
+	sk_psock_tls_verdict_apply(psock, skb, ret);
+	return ret;
+}
+EXPORT_SYMBOL_GPL(sk_psock_tls_strp_read);
+
 static void sk_psock_verdict_apply(struct sk_psock *psock,
 				   struct sk_buff *skb, int verdict)
 {
@@ -792,9 +825,13 @@ static void sk_psock_strp_data_ready(struct sock *sk)
 	rcu_read_lock();
 	psock = sk_psock(sk);
 	if (likely(psock)) {
-		write_lock_bh(&sk->sk_callback_lock);
-		strp_data_ready(&psock->parser.strp);
-		write_unlock_bh(&sk->sk_callback_lock);
+		if (tls_sw_has_ctx_rx(sk)) {
+			psock->parser.saved_data_ready(sk);
+		} else {
+			write_lock_bh(&sk->sk_callback_lock);
+			strp_data_ready(&psock->parser.strp);
+			write_unlock_bh(&sk->sk_callback_lock);
+		}
 	}
 	rcu_read_unlock();
 }
diff --git a/net/tls/tls_sw.c b/net/tls/tls_sw.c
index 8c2763eb6aae..24f64bc0de18 100644
--- a/net/tls/tls_sw.c
+++ b/net/tls/tls_sw.c
@@ -1742,6 +1742,7 @@ int tls_sw_recvmsg(struct sock *sk,
 	long timeo;
 	bool is_kvec = iov_iter_is_kvec(&msg->msg_iter);
 	bool is_peek = flags & MSG_PEEK;
+	bool bpf_strp_enabled;
 	int num_async = 0;
 	int pending;
 
@@ -1752,6 +1753,7 @@ int tls_sw_recvmsg(struct sock *sk,
 
 	psock = sk_psock_get(sk);
 	lock_sock(sk);
+	bpf_strp_enabled = sk_psock_strp_enabled(psock);
 
 	/* Process pending decrypted records. It must be non-zero-copy */
 	err = process_rx_list(ctx, msg, &control, &cmsg, 0, len, false,
@@ -1805,11 +1807,12 @@ int tls_sw_recvmsg(struct sock *sk,
 
 		if (to_decrypt <= len && !is_kvec && !is_peek &&
 		    ctx->control == TLS_RECORD_TYPE_DATA &&
-		    prot->version != TLS_1_3_VERSION)
+		    prot->version != TLS_1_3_VERSION &&
+		    !bpf_strp_enabled)
 			zc = true;
 
 		/* Do not use async mode if record is non-data */
-		if (ctx->control == TLS_RECORD_TYPE_DATA)
+		if (ctx->control == TLS_RECORD_TYPE_DATA && !bpf_strp_enabled)
 			async_capable = ctx->async_capable;
 		else
 			async_capable = false;
@@ -1859,6 +1862,19 @@ int tls_sw_recvmsg(struct sock *sk,
 			goto pick_next_record;
 
 		if (!zc) {
+			if (bpf_strp_enabled) {
+				err = sk_psock_tls_strp_read(psock, skb);
+				if (err != __SK_PASS) {
+					rxm->offset = rxm->offset + rxm->full_len;
+					rxm->full_len = 0;
+					if (err == __SK_DROP)
+						consume_skb(skb);
+					ctx->recv_pkt = NULL;
+					__strp_unpause(&ctx->strp);
+					continue;
+				}
+			}
+
 			if (rxm->full_len > len) {
 				retain_skb = true;
 				chunk = len;
-- 
cgit v1.2.3-59-g8ed1b


From 463bac5f1ca79fcd964bf50426eab024fb4dd8a4 Mon Sep 17 00:00:00 2001
From: John Fastabend <john.fastabend@gmail.com>
Date: Fri, 29 May 2020 16:07:19 -0700
Subject: bpf, selftests: Add test for ktls with skb bpf ingress policy

This adds a test for bpf ingress policy. To ensure data writes happen
as expected with extra TLS headers we run these tests with data
verification enabled by default. This will test receive packets have
"PASS" stamped into the front of the payload.

Signed-off-by: John Fastabend <john.fastabend@gmail.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/159079363965.5745.3390806911628980210.stgit@john-Precision-5820-Tower
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 .../selftests/bpf/progs/test_sockmap_kern.h        |  46 +++++-
 tools/testing/selftests/bpf/test_sockmap.c         | 163 ++++++++++++++++++---
 2 files changed, 187 insertions(+), 22 deletions(-)

diff --git a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
index a443d3637db3..057036ca1111 100644
--- a/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
+++ b/tools/testing/selftests/bpf/progs/test_sockmap_kern.h
@@ -79,11 +79,18 @@ struct {
 
 struct {
 	__uint(type, BPF_MAP_TYPE_ARRAY);
-	__uint(max_entries, 1);
+	__uint(max_entries, 2);
 	__type(key, int);
 	__type(value, int);
 } sock_skb_opts SEC(".maps");
 
+struct {
+	__uint(type, TEST_MAP_TYPE);
+	__uint(max_entries, 20);
+	__uint(key_size, sizeof(int));
+	__uint(value_size, sizeof(int));
+} tls_sock_map SEC(".maps");
+
 SEC("sk_skb1")
 int bpf_prog1(struct __sk_buff *skb)
 {
@@ -118,6 +125,43 @@ int bpf_prog2(struct __sk_buff *skb)
 
 }
 
+SEC("sk_skb3")
+int bpf_prog3(struct __sk_buff *skb)
+{
+	const int one = 1;
+	int err, *f, ret = SK_PASS;
+	void *data_end;
+	char *c;
+
+	err = bpf_skb_pull_data(skb, 19);
+	if (err)
+		goto tls_out;
+
+	c = (char *)(long)skb->data;
+	data_end = (void *)(long)skb->data_end;
+
+	if (c + 18 < data_end)
+		memcpy(&c[13], "PASS", 4);
+	f = bpf_map_lookup_elem(&sock_skb_opts, &one);
+	if (f && *f) {
+		__u64 flags = 0;
+
+		ret = 0;
+		flags = *f;
+#ifdef SOCKMAP
+		return bpf_sk_redirect_map(skb, &tls_sock_map, ret, flags);
+#else
+		return bpf_sk_redirect_hash(skb, &tls_sock_map, &ret, flags);
+#endif
+	}
+
+	f = bpf_map_lookup_elem(&sock_skb_opts, &one);
+	if (f && *f)
+		ret = SK_DROP;
+tls_out:
+	return ret;
+}
+
 SEC("sockops")
 int bpf_sockmap(struct bpf_sock_ops *skops)
 {
diff --git a/tools/testing/selftests/bpf/test_sockmap.c b/tools/testing/selftests/bpf/test_sockmap.c
index c80643828b82..37695fc8096a 100644
--- a/tools/testing/selftests/bpf/test_sockmap.c
+++ b/tools/testing/selftests/bpf/test_sockmap.c
@@ -63,8 +63,8 @@ int s1, s2, c1, c2, p1, p2;
 int test_cnt;
 int passed;
 int failed;
-int map_fd[8];
-struct bpf_map *maps[8];
+int map_fd[9];
+struct bpf_map *maps[9];
 int prog_fd[11];
 
 int txmsg_pass;
@@ -79,7 +79,10 @@ int txmsg_end_push;
 int txmsg_start_pop;
 int txmsg_pop;
 int txmsg_ingress;
-int txmsg_skb;
+int txmsg_redir_skb;
+int txmsg_ktls_skb;
+int txmsg_ktls_skb_drop;
+int txmsg_ktls_skb_redir;
 int ktls;
 int peek_flag;
 
@@ -104,7 +107,7 @@ static const struct option long_options[] = {
 	{"txmsg_start_pop",  required_argument,	NULL, 'w'},
 	{"txmsg_pop",	     required_argument,	NULL, 'x'},
 	{"txmsg_ingress", no_argument,		&txmsg_ingress, 1 },
-	{"txmsg_skb", no_argument,		&txmsg_skb, 1 },
+	{"txmsg_redir_skb", no_argument,	&txmsg_redir_skb, 1 },
 	{"ktls", no_argument,			&ktls, 1 },
 	{"peek", no_argument,			&peek_flag, 1 },
 	{"whitelist", required_argument,	NULL, 'n' },
@@ -169,7 +172,8 @@ static void test_reset(void)
 	txmsg_start_push = txmsg_end_push = 0;
 	txmsg_pass = txmsg_drop = txmsg_redir = 0;
 	txmsg_apply = txmsg_cork = 0;
-	txmsg_ingress = txmsg_skb = 0;
+	txmsg_ingress = txmsg_redir_skb = 0;
+	txmsg_ktls_skb = txmsg_ktls_skb_drop = txmsg_ktls_skb_redir = 0;
 }
 
 static int test_start_subtest(const struct _test *t, struct sockmap_options *o)
@@ -502,14 +506,41 @@ unwind_iov:
 
 static int msg_verify_data(struct msghdr *msg, int size, int chunk_sz)
 {
-	int i, j, bytes_cnt = 0;
+	int i, j = 0, bytes_cnt = 0;
 	unsigned char k = 0;
 
 	for (i = 0; i < msg->msg_iovlen; i++) {
 		unsigned char *d = msg->msg_iov[i].iov_base;
 
-		for (j = 0;
-		     j < msg->msg_iov[i].iov_len && size; j++) {
+		/* Special case test for skb ingress + ktls */
+		if (i == 0 && txmsg_ktls_skb) {
+			if (msg->msg_iov[i].iov_len < 4)
+				return -EIO;
+			if (txmsg_ktls_skb_redir) {
+				if (memcmp(&d[13], "PASS", 4) != 0) {
+					fprintf(stderr,
+						"detected redirect ktls_skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n", i, 0, d[13], d[14], d[15], d[16]);
+					return -EIO;
+				}
+				d[13] = 0;
+				d[14] = 1;
+				d[15] = 2;
+				d[16] = 3;
+				j = 13;
+			} else if (txmsg_ktls_skb) {
+				if (memcmp(d, "PASS", 4) != 0) {
+					fprintf(stderr,
+						"detected ktls_skb data error with skb ingress update @iov[%i]:%i \"%02x %02x %02x %02x\" != \"PASS\"\n", i, 0, d[0], d[1], d[2], d[3]);
+					return -EIO;
+				}
+				d[0] = 0;
+				d[1] = 1;
+				d[2] = 2;
+				d[3] = 3;
+			}
+		}
+
+		for (; j < msg->msg_iov[i].iov_len && size; j++) {
 			if (d[j] != k++) {
 				fprintf(stderr,
 					"detected data corruption @iov[%i]:%i %02x != %02x, %02x ?= %02x\n",
@@ -724,7 +755,7 @@ static int sendmsg_test(struct sockmap_options *opt)
 	rxpid = fork();
 	if (rxpid == 0) {
 		iov_buf -= (txmsg_pop - txmsg_start_pop + 1);
-		if (opt->drop_expected)
+		if (opt->drop_expected || txmsg_ktls_skb_drop)
 			_exit(0);
 
 		if (!iov_buf) /* zero bytes sent case */
@@ -911,8 +942,28 @@ static int run_options(struct sockmap_options *options, int cg_fd,  int test)
 		return err;
 	}
 
+	/* Attach programs to TLS sockmap */
+	if (txmsg_ktls_skb) {
+		err = bpf_prog_attach(prog_fd[0], map_fd[8],
+					BPF_SK_SKB_STREAM_PARSER, 0);
+		if (err) {
+			fprintf(stderr,
+				"ERROR: bpf_prog_attach (TLS sockmap %i->%i): %d (%s)\n",
+				prog_fd[0], map_fd[8], err, strerror(errno));
+			return err;
+		}
+
+		err = bpf_prog_attach(prog_fd[2], map_fd[8],
+				      BPF_SK_SKB_STREAM_VERDICT, 0);
+		if (err) {
+			fprintf(stderr, "ERROR: bpf_prog_attach (TLS sockmap): %d (%s)\n",
+				err, strerror(errno));
+			return err;
+		}
+	}
+
 	/* Attach to cgroups */
-	err = bpf_prog_attach(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS, 0);
+	err = bpf_prog_attach(prog_fd[3], cg_fd, BPF_CGROUP_SOCK_OPS, 0);
 	if (err) {
 		fprintf(stderr, "ERROR: bpf_prog_attach (groups): %d (%s)\n",
 			err, strerror(errno));
@@ -928,15 +979,15 @@ run:
 
 	/* Attach txmsg program to sockmap */
 	if (txmsg_pass)
-		tx_prog_fd = prog_fd[3];
-	else if (txmsg_redir)
 		tx_prog_fd = prog_fd[4];
-	else if (txmsg_apply)
+	else if (txmsg_redir)
 		tx_prog_fd = prog_fd[5];
-	else if (txmsg_cork)
+	else if (txmsg_apply)
 		tx_prog_fd = prog_fd[6];
-	else if (txmsg_drop)
+	else if (txmsg_cork)
 		tx_prog_fd = prog_fd[7];
+	else if (txmsg_drop)
+		tx_prog_fd = prog_fd[8];
 	else
 		tx_prog_fd = 0;
 
@@ -1108,7 +1159,35 @@ run:
 			}
 		}
 
-		if (txmsg_skb) {
+		if (txmsg_ktls_skb) {
+			int ingress = BPF_F_INGRESS;
+
+			i = 0;
+			err = bpf_map_update_elem(map_fd[8], &i, &p2, BPF_ANY);
+			if (err) {
+				fprintf(stderr,
+					"ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n",
+					err, strerror(errno));
+			}
+
+			if (txmsg_ktls_skb_redir) {
+				i = 1;
+				err = bpf_map_update_elem(map_fd[7],
+							  &i, &ingress, BPF_ANY);
+				if (err) {
+					fprintf(stderr,
+						"ERROR: bpf_map_update_elem (txmsg_ingress): %d (%s)\n",
+						err, strerror(errno));
+				}
+			}
+
+			if (txmsg_ktls_skb_drop) {
+				i = 1;
+				err = bpf_map_update_elem(map_fd[7], &i, &i, BPF_ANY);
+			}
+		}
+
+		if (txmsg_redir_skb) {
 			int skb_fd = (test == SENDMSG || test == SENDPAGE) ?
 					p2 : p1;
 			int ingress = BPF_F_INGRESS;
@@ -1123,8 +1202,7 @@ run:
 			}
 
 			i = 3;
-			err = bpf_map_update_elem(map_fd[0],
-						  &i, &skb_fd, BPF_ANY);
+			err = bpf_map_update_elem(map_fd[0], &i, &skb_fd, BPF_ANY);
 			if (err) {
 				fprintf(stderr,
 					"ERROR: bpf_map_update_elem (c1 sockmap): %d (%s)\n",
@@ -1158,9 +1236,12 @@ run:
 		fprintf(stderr, "unknown test\n");
 out:
 	/* Detatch and zero all the maps */
-	bpf_prog_detach2(prog_fd[2], cg_fd, BPF_CGROUP_SOCK_OPS);
+	bpf_prog_detach2(prog_fd[3], cg_fd, BPF_CGROUP_SOCK_OPS);
 	bpf_prog_detach2(prog_fd[0], map_fd[0], BPF_SK_SKB_STREAM_PARSER);
 	bpf_prog_detach2(prog_fd[1], map_fd[0], BPF_SK_SKB_STREAM_VERDICT);
+	bpf_prog_detach2(prog_fd[0], map_fd[8], BPF_SK_SKB_STREAM_PARSER);
+	bpf_prog_detach2(prog_fd[2], map_fd[8], BPF_SK_SKB_STREAM_VERDICT);
+
 	if (tx_prog_fd >= 0)
 		bpf_prog_detach2(tx_prog_fd, map_fd[1], BPF_SK_MSG_VERDICT);
 
@@ -1229,8 +1310,10 @@ static void test_options(char *options)
 	}
 	if (txmsg_ingress)
 		strncat(options, "ingress,", OPTSTRING);
-	if (txmsg_skb)
-		strncat(options, "skb,", OPTSTRING);
+	if (txmsg_redir_skb)
+		strncat(options, "redir_skb,", OPTSTRING);
+	if (txmsg_ktls_skb)
+		strncat(options, "ktls_skb,", OPTSTRING);
 	if (ktls)
 		strncat(options, "ktls,", OPTSTRING);
 	if (peek_flag)
@@ -1362,6 +1445,40 @@ static void test_txmsg_ingress_redir(int cgrp, struct sockmap_options *opt)
 	test_send(opt, cgrp);
 }
 
+static void test_txmsg_skb(int cgrp, struct sockmap_options *opt)
+{
+	bool data = opt->data_test;
+	int k = ktls;
+
+	opt->data_test = true;
+	ktls = 1;
+
+	txmsg_pass = txmsg_drop = 0;
+	txmsg_ingress = txmsg_redir = 0;
+	txmsg_ktls_skb = 1;
+	txmsg_pass = 1;
+
+	/* Using data verification so ensure iov layout is
+	 * expected from test receiver side. e.g. has enough
+	 * bytes to write test code.
+	 */
+	opt->iov_length = 100;
+	opt->iov_count = 1;
+	opt->rate = 1;
+	test_exec(cgrp, opt);
+
+	txmsg_ktls_skb_drop = 1;
+	test_exec(cgrp, opt);
+
+	txmsg_ktls_skb_drop = 0;
+	txmsg_ktls_skb_redir = 1;
+	test_exec(cgrp, opt);
+
+	opt->data_test = data;
+	ktls = k;
+}
+
+
 /* Test cork with hung data. This tests poor usage patterns where
  * cork can leave data on the ring if user program is buggy and
  * doesn't flush them somehow. They do take some time however
@@ -1542,11 +1659,13 @@ char *map_names[] = {
 	"sock_bytes",
 	"sock_redir_flags",
 	"sock_skb_opts",
+	"tls_sock_map",
 };
 
 int prog_attach_type[] = {
 	BPF_SK_SKB_STREAM_PARSER,
 	BPF_SK_SKB_STREAM_VERDICT,
+	BPF_SK_SKB_STREAM_VERDICT,
 	BPF_CGROUP_SOCK_OPS,
 	BPF_SK_MSG_VERDICT,
 	BPF_SK_MSG_VERDICT,
@@ -1558,6 +1677,7 @@ int prog_attach_type[] = {
 };
 
 int prog_type[] = {
+	BPF_PROG_TYPE_SK_SKB,
 	BPF_PROG_TYPE_SK_SKB,
 	BPF_PROG_TYPE_SK_SKB,
 	BPF_PROG_TYPE_SOCK_OPS,
@@ -1620,6 +1740,7 @@ struct _test test[] = {
 	{"txmsg test redirect", test_txmsg_redir},
 	{"txmsg test drop", test_txmsg_drop},
 	{"txmsg test ingress redirect", test_txmsg_ingress_redir},
+	{"txmsg test skb", test_txmsg_skb},
 	{"txmsg test apply", test_txmsg_apply},
 	{"txmsg test cork", test_txmsg_cork},
 	{"txmsg test hanging corks", test_txmsg_cork_hangs},
-- 
cgit v1.2.3-59-g8ed1b


From df8fe57c071c58f355d0a4985ecd2fcaf99b050f Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Mon, 1 Jun 2020 14:13:52 -0700
Subject: tools/bpf: sync bpf.h

Sync bpf.h into tool/include/uapi/

Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/include/uapi/linux/bpf.h | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index f74bc4a2385e..f862a58fb567 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3613,6 +3613,7 @@ struct bpf_sock {
 	__u32 dst_ip4;
 	__u32 dst_ip6[4];
 	__u32 state;
+	__s32 rx_queue_mapping;
 };
 
 struct bpf_tcp_sock {
-- 
cgit v1.2.3-59-g8ed1b


From bb2359f4dbe98e8863b4e885fc09269ef4682ec3 Mon Sep 17 00:00:00 2001
From: Denis Efremov <efremov@linux.com>
Date: Mon, 1 Jun 2020 19:28:14 +0300
Subject: bpf: Change kvfree to kfree in generic_map_lookup_batch()

buf_prevkey in generic_map_lookup_batch() is allocated with
kmalloc(). It's safe to free it with kfree().

Fixes: cb4d03ab499d ("bpf: Add generic support for lookup batch op")
Signed-off-by: Denis Efremov <efremov@linux.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20200601162814.17426-1-efremov@linux.com
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 kernel/bpf/syscall.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index 9de3540fa90c..e83b0818b529 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -1399,7 +1399,7 @@ int generic_map_lookup_batch(struct bpf_map *map,
 
 	buf = kmalloc(map->key_size + value_size, GFP_USER | __GFP_NOWARN);
 	if (!buf) {
-		kvfree(buf_prevkey);
+		kfree(buf_prevkey);
 		return -ENOMEM;
 	}
 
-- 
cgit v1.2.3-59-g8ed1b


From 8ea204c2b658eaef55b4716fde469fb66c589a3d Mon Sep 17 00:00:00 2001
From: Ferenc Fejes <fejes@inf.elte.hu>
Date: Sat, 30 May 2020 23:09:00 +0200
Subject: net: Make locking in sock_bindtoindex optional

The sock_bindtoindex intended for kernel wide usage however
it will lock the socket regardless of the context. This modification
relax this behavior optionally: locking the socket will be optional
by calling the sock_bindtoindex with lock_sk = true.

The modification applied to all users of the sock_bindtoindex.

Signed-off-by: Ferenc Fejes <fejes@inf.elte.hu>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/bee6355da40d9e991b2f2d12b67d55ebb5f5b207.1590871065.git.fejes@inf.elte.hu
---
 include/net/sock.h        |  2 +-
 net/core/sock.c           | 10 ++++++----
 net/ipv4/udp_tunnel.c     |  2 +-
 net/ipv6/ip6_udp_tunnel.c |  2 +-
 4 files changed, 9 insertions(+), 7 deletions(-)

diff --git a/include/net/sock.h b/include/net/sock.h
index 6e9f713a7860..c53cc42b5ab9 100644
--- a/include/net/sock.h
+++ b/include/net/sock.h
@@ -2690,7 +2690,7 @@ static inline bool sk_dev_equal_l3scope(struct sock *sk, int dif)
 
 void sock_def_readable(struct sock *sk);
 
-int sock_bindtoindex(struct sock *sk, int ifindex);
+int sock_bindtoindex(struct sock *sk, int ifindex, bool lock_sk);
 void sock_enable_timestamps(struct sock *sk);
 void sock_no_linger(struct sock *sk);
 void sock_set_keepalive(struct sock *sk);
diff --git a/net/core/sock.c b/net/core/sock.c
index 61ec573221a6..6c4acf1f0220 100644
--- a/net/core/sock.c
+++ b/net/core/sock.c
@@ -594,13 +594,15 @@ out:
 	return ret;
 }
 
-int sock_bindtoindex(struct sock *sk, int ifindex)
+int sock_bindtoindex(struct sock *sk, int ifindex, bool lock_sk)
 {
 	int ret;
 
-	lock_sock(sk);
+	if (lock_sk)
+		lock_sock(sk);
 	ret = sock_bindtoindex_locked(sk, ifindex);
-	release_sock(sk);
+	if (lock_sk)
+		release_sock(sk);
 
 	return ret;
 }
@@ -646,7 +648,7 @@ static int sock_setbindtodevice(struct sock *sk, char __user *optval,
 			goto out;
 	}
 
-	return sock_bindtoindex(sk, index);
+	return sock_bindtoindex(sk, index, true);
 out:
 #endif
 
diff --git a/net/ipv4/udp_tunnel.c b/net/ipv4/udp_tunnel.c
index 2158e8bddf41..3eecba0874aa 100644
--- a/net/ipv4/udp_tunnel.c
+++ b/net/ipv4/udp_tunnel.c
@@ -22,7 +22,7 @@ int udp_sock_create4(struct net *net, struct udp_port_cfg *cfg,
 		goto error;
 
 	if (cfg->bind_ifindex) {
-		err = sock_bindtoindex(sock->sk, cfg->bind_ifindex);
+		err = sock_bindtoindex(sock->sk, cfg->bind_ifindex, true);
 		if (err < 0)
 			goto error;
 	}
diff --git a/net/ipv6/ip6_udp_tunnel.c b/net/ipv6/ip6_udp_tunnel.c
index 2e0ad1bc84a8..cdc4d4ee2420 100644
--- a/net/ipv6/ip6_udp_tunnel.c
+++ b/net/ipv6/ip6_udp_tunnel.c
@@ -30,7 +30,7 @@ int udp_sock_create6(struct net *net, struct udp_port_cfg *cfg,
 			goto error;
 	}
 	if (cfg->bind_ifindex) {
-		err = sock_bindtoindex(sock->sk, cfg->bind_ifindex);
+		err = sock_bindtoindex(sock->sk, cfg->bind_ifindex, true);
 		if (err < 0)
 			goto error;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 70c58997c1e864c96dfdf072572047303db8f42a Mon Sep 17 00:00:00 2001
From: Ferenc Fejes <fejes@inf.elte.hu>
Date: Sat, 30 May 2020 23:09:01 +0200
Subject: bpf: Allow SO_BINDTODEVICE opt in bpf_setsockopt

Extending the supported sockopts in bpf_setsockopt with
SO_BINDTODEVICE. We call sock_bindtoindex with parameter
lock_sk = false in this context because we already owning
the socket.

Signed-off-by: Ferenc Fejes <fejes@inf.elte.hu>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/4149e304867b8d5a606a305bc59e29b063e51f49.1590871065.git.fejes@inf.elte.hu
---
 net/core/filter.c | 28 +++++++++++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/net/core/filter.c b/net/core/filter.c
index 85ff827aab73..ae82bcb03124 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -4248,6 +4248,9 @@ static const struct bpf_func_proto bpf_get_socket_uid_proto = {
 static int _bpf_setsockopt(struct sock *sk, int level, int optname,
 			   char *optval, int optlen, u32 flags)
 {
+	char devname[IFNAMSIZ];
+	struct net *net;
+	int ifindex;
 	int ret = 0;
 	int val;
 
@@ -4257,7 +4260,7 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
 	sock_owned_by_me(sk);
 
 	if (level == SOL_SOCKET) {
-		if (optlen != sizeof(int))
+		if (optlen != sizeof(int) && optname != SO_BINDTODEVICE)
 			return -EINVAL;
 		val = *((int *)optval);
 
@@ -4298,6 +4301,29 @@ static int _bpf_setsockopt(struct sock *sk, int level, int optname,
 				sk_dst_reset(sk);
 			}
 			break;
+		case SO_BINDTODEVICE:
+			ret = -ENOPROTOOPT;
+#ifdef CONFIG_NETDEVICES
+			optlen = min_t(long, optlen, IFNAMSIZ - 1);
+			strncpy(devname, optval, optlen);
+			devname[optlen] = 0;
+
+			ifindex = 0;
+			if (devname[0] != '\0') {
+				struct net_device *dev;
+
+				ret = -ENODEV;
+
+				net = sock_net(sk);
+				dev = dev_get_by_name(net, devname);
+				if (!dev)
+					break;
+				ifindex = dev->ifindex;
+				dev_put(dev);
+			}
+			ret = sock_bindtoindex(sk, ifindex, false);
+#endif
+			break;
 		default:
 			ret = -EINVAL;
 		}
-- 
cgit v1.2.3-59-g8ed1b


From 9c441fe4c06a553ad770b6f21616327a3badf793 Mon Sep 17 00:00:00 2001
From: Ferenc Fejes <fejes@inf.elte.hu>
Date: Sat, 30 May 2020 23:09:02 +0200
Subject: selftests/bpf: Add test for SO_BINDTODEVICE opt of bpf_setsockopt

This test intended to verify if SO_BINDTODEVICE option works in
bpf_setsockopt. Because we already in the SOL_SOCKET level in this
connect bpf prog its safe to verify the sanity in the beginning of
the connect_v4_prog by calling the bind_to_device test helper.

The testing environment already created by the test_sock_addr.sh
script so this test assume that two netdevices already existing in
the system: veth pair with names test_sock_addr1 and test_sock_addr2.
The test will try to bind the socket to those devices first.
Then the test assume there are no netdevice with "nonexistent_dev"
name so the bpf_setsockopt will give use ENODEV error.
At the end the test remove the device binding from the socket
by binding it to an empty name.

Signed-off-by: Ferenc Fejes <fejes@inf.elte.hu>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/3f055b8e45c65639c5c73d0b4b6c589e60b86f15.1590871065.git.fejes@inf.elte.hu
---
 tools/testing/selftests/bpf/progs/connect4_prog.c | 33 +++++++++++++++++++++++
 1 file changed, 33 insertions(+)

diff --git a/tools/testing/selftests/bpf/progs/connect4_prog.c b/tools/testing/selftests/bpf/progs/connect4_prog.c
index c2c85c31cffd..1ab2c5eba86c 100644
--- a/tools/testing/selftests/bpf/progs/connect4_prog.c
+++ b/tools/testing/selftests/bpf/progs/connect4_prog.c
@@ -9,6 +9,8 @@
 #include <linux/in6.h>
 #include <sys/socket.h>
 #include <netinet/tcp.h>
+#include <linux/if.h>
+#include <errno.h>
 
 #include <bpf/bpf_helpers.h>
 #include <bpf/bpf_endian.h>
@@ -21,6 +23,10 @@
 #define TCP_CA_NAME_MAX 16
 #endif
 
+#ifndef IFNAMSIZ
+#define IFNAMSIZ 16
+#endif
+
 int _version SEC("version") = 1;
 
 __attribute__ ((noinline))
@@ -75,6 +81,29 @@ static __inline int set_cc(struct bpf_sock_addr *ctx)
 	return 0;
 }
 
+static __inline int bind_to_device(struct bpf_sock_addr *ctx)
+{
+	char veth1[IFNAMSIZ] = "test_sock_addr1";
+	char veth2[IFNAMSIZ] = "test_sock_addr2";
+	char missing[IFNAMSIZ] = "nonexistent_dev";
+	char del_bind[IFNAMSIZ] = "";
+
+	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+				&veth1, sizeof(veth1)))
+		return 1;
+	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+				&veth2, sizeof(veth2)))
+		return 1;
+	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+				&missing, sizeof(missing)) != -ENODEV)
+		return 1;
+	if (bpf_setsockopt(ctx, SOL_SOCKET, SO_BINDTODEVICE,
+				&del_bind, sizeof(del_bind)))
+		return 1;
+
+	return 0;
+}
+
 SEC("cgroup/connect4")
 int connect_v4_prog(struct bpf_sock_addr *ctx)
 {
@@ -88,6 +117,10 @@ int connect_v4_prog(struct bpf_sock_addr *ctx)
 	tuple.ipv4.daddr = bpf_htonl(DST_REWRITE_IP4);
 	tuple.ipv4.dport = bpf_htons(DST_REWRITE_PORT4);
 
+	/* Bind to device and unbind it. */
+	if (bind_to_device(ctx))
+		return 0;
+
 	if (ctx->type != SOCK_STREAM && ctx->type != SOCK_DGRAM)
 		return 0;
 	else if (ctx->type == SOCK_STREAM)
-- 
cgit v1.2.3-59-g8ed1b


From fc37987265b5979129a72c672b20245119768fb8 Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 28 May 2020 22:47:28 +0200
Subject: xdp: Introduce xdp_convert_frame_to_buff utility routine

Introduce xdp_convert_frame_to_buff utility routine to initialize xdp_buff
fields from xdp_frames ones. Rely on xdp_convert_frame_to_buff in veth xdp
code.

Suggested-by: Jesper Dangaard Brouer <brouer@redhat.com>
Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Link: https://lore.kernel.org/bpf/87acf133073c4b2d4cbb8097e8c2480c0a0fac32.1590698295.git.lorenzo@kernel.org
---
 drivers/net/veth.c |  6 +-----
 include/net/xdp.h  | 10 ++++++++++
 2 files changed, 11 insertions(+), 5 deletions(-)

diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index b586d2fa5551..fb5c17361f64 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -575,11 +575,7 @@ static struct sk_buff *veth_xdp_rcv_one(struct veth_rq *rq,
 		struct xdp_buff xdp;
 		u32 act;
 
-		xdp.data_hard_start = hard_start;
-		xdp.data = frame->data;
-		xdp.data_end = frame->data + frame->len;
-		xdp.data_meta = frame->data - frame->metasize;
-		xdp.frame_sz = frame->frame_sz;
+		xdp_convert_frame_to_buff(frame, &xdp);
 		xdp.rxq = &rq->xdp_rxq;
 
 		act = bpf_prog_run_xdp(xdp_prog, &xdp);
diff --git a/include/net/xdp.h b/include/net/xdp.h
index d54022959491..db5c2c687f48 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -111,6 +111,16 @@ void xdp_warn(const char *msg, const char *func, const int line);
 
 struct xdp_frame *xdp_convert_zc_to_xdp_frame(struct xdp_buff *xdp);
 
+static inline
+void xdp_convert_frame_to_buff(struct xdp_frame *frame, struct xdp_buff *xdp)
+{
+	xdp->data_hard_start = frame->data - frame->headroom - sizeof(*frame);
+	xdp->data = frame->data;
+	xdp->data_end = frame->data + frame->len;
+	xdp->data_meta = frame->data - frame->metasize;
+	xdp->frame_sz = frame->frame_sz;
+}
+
 /* Convert xdp_buff to xdp_frame */
 static inline
 struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
-- 
cgit v1.2.3-59-g8ed1b


From 1b698fa5d8ef958007c455e316aa44c37ab3c5fb Mon Sep 17 00:00:00 2001
From: Lorenzo Bianconi <lorenzo@kernel.org>
Date: Thu, 28 May 2020 22:47:29 +0200
Subject: xdp: Rename convert_to_xdp_frame in xdp_convert_buff_to_frame

In order to use standard 'xdp' prefix, rename convert_to_xdp_frame
utility routine in xdp_convert_buff_to_frame and replace all the
occurrences

Signed-off-by: Lorenzo Bianconi <lorenzo@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Jesper Dangaard Brouer <brouer@redhat.com>
Link: https://lore.kernel.org/bpf/6344f739be0d1a08ab2b9607584c4d5478c8c083.1590698295.git.lorenzo@kernel.org
---
 drivers/net/ethernet/amazon/ena/ena_netdev.c     |  2 +-
 drivers/net/ethernet/intel/i40e/i40e_txrx.c      |  2 +-
 drivers/net/ethernet/intel/ice/ice_txrx_lib.c    |  2 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_main.c    |  2 +-
 drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c     |  2 +-
 drivers/net/ethernet/marvell/mvneta.c            |  2 +-
 drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c | 10 +++++-----
 drivers/net/ethernet/sfc/rx.c                    |  2 +-
 drivers/net/ethernet/socionext/netsec.c          |  2 +-
 drivers/net/ethernet/ti/cpsw_priv.c              |  2 +-
 drivers/net/tun.c                                |  2 +-
 drivers/net/veth.c                               |  2 +-
 drivers/net/virtio_net.c                         |  4 ++--
 include/net/xdp.h                                |  2 +-
 kernel/bpf/cpumap.c                              |  2 +-
 kernel/bpf/devmap.c                              |  2 +-
 16 files changed, 21 insertions(+), 21 deletions(-)

diff --git a/drivers/net/ethernet/amazon/ena/ena_netdev.c b/drivers/net/ethernet/amazon/ena/ena_netdev.c
index 46865d5bd7e7..a0af74c93971 100644
--- a/drivers/net/ethernet/amazon/ena/ena_netdev.c
+++ b/drivers/net/ethernet/amazon/ena/ena_netdev.c
@@ -263,7 +263,7 @@ static int ena_xdp_tx_map_buff(struct ena_ring *xdp_ring,
 	dma_addr_t dma = 0;
 	u32 size;
 
-	tx_info->xdpf = convert_to_xdp_frame(xdp);
+	tx_info->xdpf = xdp_convert_buff_to_frame(xdp);
 	size = tx_info->xdpf->len;
 	ena_buf = tx_info->bufs;
 
diff --git a/drivers/net/ethernet/intel/i40e/i40e_txrx.c b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
index f613782f2f56..f9555c847f73 100644
--- a/drivers/net/ethernet/intel/i40e/i40e_txrx.c
+++ b/drivers/net/ethernet/intel/i40e/i40e_txrx.c
@@ -2167,7 +2167,7 @@ static int i40e_xmit_xdp_ring(struct xdp_frame *xdpf,
 
 int i40e_xmit_xdp_tx_ring(struct xdp_buff *xdp, struct i40e_ring *xdp_ring)
 {
-	struct xdp_frame *xdpf = convert_to_xdp_frame(xdp);
+	struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
 
 	if (unlikely(!xdpf))
 		return I40E_XDP_CONSUMED;
diff --git a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
index ab2031b1c635..02b12736ea80 100644
--- a/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
+++ b/drivers/net/ethernet/intel/ice/ice_txrx_lib.c
@@ -254,7 +254,7 @@ int ice_xmit_xdp_ring(void *data, u16 size, struct ice_ring *xdp_ring)
  */
 int ice_xmit_xdp_buff(struct xdp_buff *xdp, struct ice_ring *xdp_ring)
 {
-	struct xdp_frame *xdpf = convert_to_xdp_frame(xdp);
+	struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
 
 	if (unlikely(!xdpf))
 		return ICE_XDP_CONSUMED;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
index a59c166f794f..f162b8b8f345 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_main.c
@@ -2215,7 +2215,7 @@ static struct sk_buff *ixgbe_run_xdp(struct ixgbe_adapter *adapter,
 	case XDP_PASS:
 		break;
 	case XDP_TX:
-		xdpf = convert_to_xdp_frame(xdp);
+		xdpf = xdp_convert_buff_to_frame(xdp);
 		if (unlikely(!xdpf)) {
 			result = IXGBE_XDP_CONSUMED;
 			break;
diff --git a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
index 86add9fbd36c..be9d2a8da515 100644
--- a/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
+++ b/drivers/net/ethernet/intel/ixgbe/ixgbe_xsk.c
@@ -107,7 +107,7 @@ static int ixgbe_run_xdp_zc(struct ixgbe_adapter *adapter,
 	case XDP_PASS:
 		break;
 	case XDP_TX:
-		xdpf = convert_to_xdp_frame(xdp);
+		xdpf = xdp_convert_buff_to_frame(xdp);
 		if (unlikely(!xdpf)) {
 			result = IXGBE_XDP_CONSUMED;
 			break;
diff --git a/drivers/net/ethernet/marvell/mvneta.c b/drivers/net/ethernet/marvell/mvneta.c
index 15e42a7f8a86..011cd26953d9 100644
--- a/drivers/net/ethernet/marvell/mvneta.c
+++ b/drivers/net/ethernet/marvell/mvneta.c
@@ -2073,7 +2073,7 @@ mvneta_xdp_xmit_back(struct mvneta_port *pp, struct xdp_buff *xdp)
 	int cpu;
 	u32 ret;
 
-	xdpf = convert_to_xdp_frame(xdp);
+	xdpf = xdp_convert_buff_to_frame(xdp);
 	if (unlikely(!xdpf))
 		return MVNETA_XDP_DROPPED;
 
diff --git a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
index 3bea1d4be53b..c9d308e91965 100644
--- a/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
+++ b/drivers/net/ethernet/mellanox/mlx5/core/en/xdp.c
@@ -64,7 +64,7 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
 	struct xdp_frame *xdpf;
 	dma_addr_t dma_addr;
 
-	xdpf = convert_to_xdp_frame(xdp);
+	xdpf = xdp_convert_buff_to_frame(xdp);
 	if (unlikely(!xdpf))
 		return false;
 
@@ -97,10 +97,10 @@ mlx5e_xmit_xdp_buff(struct mlx5e_xdpsq *sq, struct mlx5e_rq *rq,
 		xdpi.frame.xdpf     = xdpf;
 		xdpi.frame.dma_addr = dma_addr;
 	} else {
-		/* Driver assumes that convert_to_xdp_frame returns an xdp_frame
-		 * that points to the same memory region as the original
-		 * xdp_buff. It allows to map the memory only once and to use
-		 * the DMA_BIDIRECTIONAL mode.
+		/* Driver assumes that xdp_convert_buff_to_frame returns
+		 * an xdp_frame that points to the same memory region as
+		 * the original xdp_buff. It allows to map the memory only
+		 * once and to use the DMA_BIDIRECTIONAL mode.
 		 */
 
 		xdpi.mode = MLX5E_XDP_XMIT_MODE_PAGE;
diff --git a/drivers/net/ethernet/sfc/rx.c b/drivers/net/ethernet/sfc/rx.c
index 68c47a8c71df..c01916cff507 100644
--- a/drivers/net/ethernet/sfc/rx.c
+++ b/drivers/net/ethernet/sfc/rx.c
@@ -329,7 +329,7 @@ static bool efx_do_xdp(struct efx_nic *efx, struct efx_channel *channel,
 
 	case XDP_TX:
 		/* Buffer ownership passes to tx on success. */
-		xdpf = convert_to_xdp_frame(&xdp);
+		xdpf = xdp_convert_buff_to_frame(&xdp);
 		err = efx_xdp_tx_buffers(efx, 1, &xdpf, true);
 		if (unlikely(err != 1)) {
 			efx_free_rx_buffers(rx_queue, rx_buf, 1);
diff --git a/drivers/net/ethernet/socionext/netsec.c b/drivers/net/ethernet/socionext/netsec.c
index e1f4be4b3d69..328bc38848bb 100644
--- a/drivers/net/ethernet/socionext/netsec.c
+++ b/drivers/net/ethernet/socionext/netsec.c
@@ -867,7 +867,7 @@ static u32 netsec_xdp_queue_one(struct netsec_priv *priv,
 static u32 netsec_xdp_xmit_back(struct netsec_priv *priv, struct xdp_buff *xdp)
 {
 	struct netsec_desc_ring *tx_ring = &priv->desc_ring[NETSEC_RING_TX];
-	struct xdp_frame *xdpf = convert_to_xdp_frame(xdp);
+	struct xdp_frame *xdpf = xdp_convert_buff_to_frame(xdp);
 	u32 ret;
 
 	if (unlikely(!xdpf))
diff --git a/drivers/net/ethernet/ti/cpsw_priv.c b/drivers/net/ethernet/ti/cpsw_priv.c
index d940628bff8d..a399f3659346 100644
--- a/drivers/net/ethernet/ti/cpsw_priv.c
+++ b/drivers/net/ethernet/ti/cpsw_priv.c
@@ -1355,7 +1355,7 @@ int cpsw_run_xdp(struct cpsw_priv *priv, int ch, struct xdp_buff *xdp,
 		ret = CPSW_XDP_PASS;
 		break;
 	case XDP_TX:
-		xdpf = convert_to_xdp_frame(xdp);
+		xdpf = xdp_convert_buff_to_frame(xdp);
 		if (unlikely(!xdpf))
 			goto drop;
 
diff --git a/drivers/net/tun.c b/drivers/net/tun.c
index b0ab882c021e..858b012074bd 100644
--- a/drivers/net/tun.c
+++ b/drivers/net/tun.c
@@ -1295,7 +1295,7 @@ resample:
 
 static int tun_xdp_tx(struct net_device *dev, struct xdp_buff *xdp)
 {
-	struct xdp_frame *frame = convert_to_xdp_frame(xdp);
+	struct xdp_frame *frame = xdp_convert_buff_to_frame(xdp);
 
 	if (unlikely(!frame))
 		return -EOVERFLOW;
diff --git a/drivers/net/veth.c b/drivers/net/veth.c
index fb5c17361f64..b594f03eeddb 100644
--- a/drivers/net/veth.c
+++ b/drivers/net/veth.c
@@ -541,7 +541,7 @@ out:
 static int veth_xdp_tx(struct veth_rq *rq, struct xdp_buff *xdp,
 		       struct veth_xdp_tx_bq *bq)
 {
-	struct xdp_frame *frame = convert_to_xdp_frame(xdp);
+	struct xdp_frame *frame = xdp_convert_buff_to_frame(xdp);
 
 	if (unlikely(!frame))
 		return -EOVERFLOW;
diff --git a/drivers/net/virtio_net.c b/drivers/net/virtio_net.c
index b6951aa76295..ba38765dc490 100644
--- a/drivers/net/virtio_net.c
+++ b/drivers/net/virtio_net.c
@@ -703,7 +703,7 @@ static struct sk_buff *receive_small(struct net_device *dev,
 			break;
 		case XDP_TX:
 			stats->xdp_tx++;
-			xdpf = convert_to_xdp_frame(&xdp);
+			xdpf = xdp_convert_buff_to_frame(&xdp);
 			if (unlikely(!xdpf))
 				goto err_xdp;
 			err = virtnet_xdp_xmit(dev, 1, &xdpf, 0);
@@ -892,7 +892,7 @@ static struct sk_buff *receive_mergeable(struct net_device *dev,
 			break;
 		case XDP_TX:
 			stats->xdp_tx++;
-			xdpf = convert_to_xdp_frame(&xdp);
+			xdpf = xdp_convert_buff_to_frame(&xdp);
 			if (unlikely(!xdpf))
 				goto err_xdp;
 			err = virtnet_xdp_xmit(dev, 1, &xdpf, 0);
diff --git a/include/net/xdp.h b/include/net/xdp.h
index db5c2c687f48..609f819ed08b 100644
--- a/include/net/xdp.h
+++ b/include/net/xdp.h
@@ -123,7 +123,7 @@ void xdp_convert_frame_to_buff(struct xdp_frame *frame, struct xdp_buff *xdp)
 
 /* Convert xdp_buff to xdp_frame */
 static inline
-struct xdp_frame *convert_to_xdp_frame(struct xdp_buff *xdp)
+struct xdp_frame *xdp_convert_buff_to_frame(struct xdp_buff *xdp)
 {
 	struct xdp_frame *xdp_frame;
 	int metasize;
diff --git a/kernel/bpf/cpumap.c b/kernel/bpf/cpumap.c
index 8b85bfddfac7..27595fc6da56 100644
--- a/kernel/bpf/cpumap.c
+++ b/kernel/bpf/cpumap.c
@@ -621,7 +621,7 @@ int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp,
 {
 	struct xdp_frame *xdpf;
 
-	xdpf = convert_to_xdp_frame(xdp);
+	xdpf = xdp_convert_buff_to_frame(xdp);
 	if (unlikely(!xdpf))
 		return -EOVERFLOW;
 
diff --git a/kernel/bpf/devmap.c b/kernel/bpf/devmap.c
index c04fb1c72f5e..854b09beb16b 100644
--- a/kernel/bpf/devmap.c
+++ b/kernel/bpf/devmap.c
@@ -465,7 +465,7 @@ static inline int __xdp_enqueue(struct net_device *dev, struct xdp_buff *xdp,
 	if (unlikely(err))
 		return err;
 
-	xdpf = convert_to_xdp_frame(xdp);
+	xdpf = xdp_convert_buff_to_frame(xdp);
 	if (unlikely(!xdpf))
 		return -EOVERFLOW;
 
-- 
cgit v1.2.3-59-g8ed1b


From 958a3f2d2aff896ae2a622878e456114f4a4cd15 Mon Sep 17 00:00:00 2001
From: Jiri Olsa <jolsa@kernel.org>
Date: Sun, 31 May 2020 17:42:55 +0200
Subject: bpf: Use tracing helpers for lsm programs

Currenty lsm uses bpf_tracing_func_proto helpers which do
not include stack trace or perf event output. It's useful
to have those for bpftrace lsm support [1].

Using tracing_prog_func_proto helpers for lsm programs.

[1] https://github.com/iovisor/bpftrace/pull/1347

Signed-off-by: Jiri Olsa <jolsa@kernel.org>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Cc: KP Singh <kpsingh@google.com>
Link: https://lore.kernel.org/bpf/20200531154255.896551-1-jolsa@kernel.org
---
 include/linux/bpf.h      | 3 +++
 kernel/bpf/bpf_lsm.c     | 2 +-
 kernel/trace/bpf_trace.c | 2 +-
 3 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/include/linux/bpf.h b/include/linux/bpf.h
index e042311f991f..07052d44bca1 100644
--- a/include/linux/bpf.h
+++ b/include/linux/bpf.h
@@ -1633,6 +1633,9 @@ extern const struct bpf_func_proto bpf_ringbuf_query_proto;
 const struct bpf_func_proto *bpf_tracing_func_proto(
 	enum bpf_func_id func_id, const struct bpf_prog *prog);
 
+const struct bpf_func_proto *tracing_prog_func_proto(
+  enum bpf_func_id func_id, const struct bpf_prog *prog);
+
 /* Shared helpers among cBPF and eBPF. */
 void bpf_user_rnd_init_once(void);
 u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5);
diff --git a/kernel/bpf/bpf_lsm.c b/kernel/bpf/bpf_lsm.c
index 19636703b24e..fb278144e9fd 100644
--- a/kernel/bpf/bpf_lsm.c
+++ b/kernel/bpf/bpf_lsm.c
@@ -49,6 +49,6 @@ const struct bpf_prog_ops lsm_prog_ops = {
 };
 
 const struct bpf_verifier_ops lsm_verifier_ops = {
-	.get_func_proto = bpf_tracing_func_proto,
+	.get_func_proto = tracing_prog_func_proto,
 	.is_valid_access = btf_ctx_access,
 };
diff --git a/kernel/trace/bpf_trace.c b/kernel/trace/bpf_trace.c
index b6c24be5ff53..c41186417d93 100644
--- a/kernel/trace/bpf_trace.c
+++ b/kernel/trace/bpf_trace.c
@@ -1467,7 +1467,7 @@ raw_tp_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 	}
 }
 
-static const struct bpf_func_proto *
+const struct bpf_func_proto *
 tracing_prog_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 {
 	switch (func_id) {
-- 
cgit v1.2.3-59-g8ed1b


From febeb6dff7beafcaf89521f6c8ff7b0adac08d54 Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Mon, 1 Jun 2020 13:26:01 -0700
Subject: libbpf: Add _GNU_SOURCE for reallocarray to ringbuf.c

On systems with recent enough glibc, reallocarray compat won't kick in, so
reallocarray() itself has to come from stdlib.h include. But _GNU_SOURCE is
necessary to enable it. So add it.

Fixes: bf99c936f947 ("libbpf: Add BPF ring buffer support")
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20200601202601.2139477-1-andriin@fb.com
---
 tools/lib/bpf/ringbuf.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/tools/lib/bpf/ringbuf.c b/tools/lib/bpf/ringbuf.c
index bc10fa1d43c7..4fc6c6cbb4eb 100644
--- a/tools/lib/bpf/ringbuf.c
+++ b/tools/lib/bpf/ringbuf.c
@@ -4,6 +4,9 @@
  *
  * Copyright (C) 2020 Facebook, Inc.
  */
+#ifndef _GNU_SOURCE
+#define _GNU_SOURCE
+#endif
 #include <stdlib.h>
 #include <stdio.h>
 #include <errno.h>
-- 
cgit v1.2.3-59-g8ed1b


From 171526f6fee84de0c39e2b7aa7e666ba0bbfd173 Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Sun, 31 May 2020 10:28:35 +0200
Subject: flow_dissector: Pull locking up from prog attach callback

Split out the part of attach callback that happens with attach/detach lock
acquired. This structures the prog attach callback in a way that opens up
doors for moving the locking out of flow_dissector and into generic
callbacks for attaching/detaching progs to netns in subsequent patches.

Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/bpf/20200531082846.2117903-2-jakub@cloudflare.com
---
 net/core/flow_dissector.c | 40 ++++++++++++++++++++--------------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 0aeb33572feb..b64a44a083fd 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -109,15 +109,10 @@ int skb_flow_dissector_prog_query(const union bpf_attr *attr,
 	return 0;
 }
 
-int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr,
-				       struct bpf_prog *prog)
+static int flow_dissector_bpf_prog_attach(struct net *net,
+					  struct bpf_prog *prog)
 {
 	struct bpf_prog *attached;
-	struct net *net;
-	int ret = 0;
-
-	net = current->nsproxy->net_ns;
-	mutex_lock(&flow_dissector_mutex);
 
 	if (net == &init_net) {
 		/* BPF flow dissector in the root namespace overrides
@@ -130,33 +125,38 @@ int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr,
 		for_each_net(ns) {
 			if (ns == &init_net)
 				continue;
-			if (rcu_access_pointer(ns->flow_dissector_prog)) {
-				ret = -EEXIST;
-				goto out;
-			}
+			if (rcu_access_pointer(ns->flow_dissector_prog))
+				return -EEXIST;
 		}
 	} else {
 		/* Make sure root flow dissector is not attached
 		 * when attaching to the non-root namespace.
 		 */
-		if (rcu_access_pointer(init_net.flow_dissector_prog)) {
-			ret = -EEXIST;
-			goto out;
-		}
+		if (rcu_access_pointer(init_net.flow_dissector_prog))
+			return -EEXIST;
 	}
 
 	attached = rcu_dereference_protected(net->flow_dissector_prog,
 					     lockdep_is_held(&flow_dissector_mutex));
-	if (attached == prog) {
+	if (attached == prog)
 		/* The same program cannot be attached twice */
-		ret = -EINVAL;
-		goto out;
-	}
+		return -EINVAL;
+
 	rcu_assign_pointer(net->flow_dissector_prog, prog);
 	if (attached)
 		bpf_prog_put(attached);
-out:
+	return 0;
+}
+
+int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr,
+				       struct bpf_prog *prog)
+{
+	int ret;
+
+	mutex_lock(&flow_dissector_mutex);
+	ret = flow_dissector_bpf_prog_attach(current->nsproxy->net_ns, prog);
 	mutex_unlock(&flow_dissector_mutex);
+
 	return ret;
 }
 
-- 
cgit v1.2.3-59-g8ed1b


From a3fd7ceee05431d2c51ed86c6cae015d236a51f0 Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Sun, 31 May 2020 10:28:36 +0200
Subject: net: Introduce netns_bpf for BPF programs attached to netns

In order to:

 (1) attach more than one BPF program type to netns, or
 (2) support attaching BPF programs to netns with bpf_link, or
 (3) support multi-prog attach points for netns

we will need to keep more state per netns than a single pointer like we
have now for BPF flow dissector program.

Prepare for the above by extracting netns_bpf that is part of struct net,
for storing all state related to BPF programs attached to netns.

Turn flow dissector callbacks for querying/attaching/detaching a program
into generic ones that operate on netns_bpf. Next patch will move the
generic callbacks into their own module.

This is similar to how it is organized for cgroup with cgroup_bpf.

Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Cc: Stanislav Fomichev <sdf@google.com>
Link: https://lore.kernel.org/bpf/20200531082846.2117903-3-jakub@cloudflare.com
---
 include/linux/bpf-netns.h   |  56 +++++++++++++++++++++++
 include/linux/skbuff.h      |  26 -----------
 include/net/net_namespace.h |   4 +-
 include/net/netns/bpf.h     |  17 +++++++
 kernel/bpf/syscall.c        |   7 +--
 net/core/flow_dissector.c   | 105 +++++++++++++++++++++++++++++---------------
 6 files changed, 149 insertions(+), 66 deletions(-)
 create mode 100644 include/linux/bpf-netns.h
 create mode 100644 include/net/netns/bpf.h

diff --git a/include/linux/bpf-netns.h b/include/linux/bpf-netns.h
new file mode 100644
index 000000000000..f3aec3d79824
--- /dev/null
+++ b/include/linux/bpf-netns.h
@@ -0,0 +1,56 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+#ifndef _BPF_NETNS_H
+#define _BPF_NETNS_H
+
+#include <linux/mutex.h>
+#include <uapi/linux/bpf.h>
+
+enum netns_bpf_attach_type {
+	NETNS_BPF_INVALID = -1,
+	NETNS_BPF_FLOW_DISSECTOR = 0,
+	MAX_NETNS_BPF_ATTACH_TYPE
+};
+
+static inline enum netns_bpf_attach_type
+to_netns_bpf_attach_type(enum bpf_attach_type attach_type)
+{
+	switch (attach_type) {
+	case BPF_FLOW_DISSECTOR:
+		return NETNS_BPF_FLOW_DISSECTOR;
+	default:
+		return NETNS_BPF_INVALID;
+	}
+}
+
+/* Protects updates to netns_bpf */
+extern struct mutex netns_bpf_mutex;
+
+union bpf_attr;
+struct bpf_prog;
+
+#ifdef CONFIG_NET
+int netns_bpf_prog_query(const union bpf_attr *attr,
+			 union bpf_attr __user *uattr);
+int netns_bpf_prog_attach(const union bpf_attr *attr,
+			  struct bpf_prog *prog);
+int netns_bpf_prog_detach(const union bpf_attr *attr);
+#else
+static inline int netns_bpf_prog_query(const union bpf_attr *attr,
+				       union bpf_attr __user *uattr)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int netns_bpf_prog_attach(const union bpf_attr *attr,
+					struct bpf_prog *prog)
+{
+	return -EOPNOTSUPP;
+}
+
+static inline int netns_bpf_prog_detach(const union bpf_attr *attr)
+{
+	return -EOPNOTSUPP;
+}
+#endif
+
+#endif /* _BPF_NETNS_H */
diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index 531843952809..a0d5c2760103 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -1283,32 +1283,6 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
 			     const struct flow_dissector_key *key,
 			     unsigned int key_count);
 
-#ifdef CONFIG_NET
-int skb_flow_dissector_prog_query(const union bpf_attr *attr,
-				  union bpf_attr __user *uattr);
-int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr,
-				       struct bpf_prog *prog);
-
-int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr);
-#else
-static inline int skb_flow_dissector_prog_query(const union bpf_attr *attr,
-						union bpf_attr __user *uattr)
-{
-	return -EOPNOTSUPP;
-}
-
-static inline int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr,
-						     struct bpf_prog *prog)
-{
-	return -EOPNOTSUPP;
-}
-
-static inline int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr)
-{
-	return -EOPNOTSUPP;
-}
-#endif
-
 struct bpf_flow_dissector;
 bool bpf_flow_dissect(struct bpf_prog *prog, struct bpf_flow_dissector *ctx,
 		      __be16 proto, int nhoff, int hlen, unsigned int flags);
diff --git a/include/net/net_namespace.h b/include/net/net_namespace.h
index 8e001e049497..2ee5901bec7a 100644
--- a/include/net/net_namespace.h
+++ b/include/net/net_namespace.h
@@ -33,6 +33,7 @@
 #include <net/netns/mpls.h>
 #include <net/netns/can.h>
 #include <net/netns/xdp.h>
+#include <net/netns/bpf.h>
 #include <linux/ns_common.h>
 #include <linux/idr.h>
 #include <linux/skbuff.h>
@@ -162,7 +163,8 @@ struct net {
 #endif
 	struct net_generic __rcu	*gen;
 
-	struct bpf_prog __rcu	*flow_dissector_prog;
+	/* Used to store attached BPF programs */
+	struct netns_bpf	bpf;
 
 	/* Note : following structs are cache line aligned */
 #ifdef CONFIG_XFRM
diff --git a/include/net/netns/bpf.h b/include/net/netns/bpf.h
new file mode 100644
index 000000000000..a858d1c5b166
--- /dev/null
+++ b/include/net/netns/bpf.h
@@ -0,0 +1,17 @@
+/* SPDX-License-Identifier: GPL-2.0 */
+/*
+ * BPF programs attached to network namespace
+ */
+
+#ifndef __NETNS_BPF_H__
+#define __NETNS_BPF_H__
+
+#include <linux/bpf-netns.h>
+
+struct bpf_prog;
+
+struct netns_bpf {
+	struct bpf_prog __rcu *progs[MAX_NETNS_BPF_ATTACH_TYPE];
+};
+
+#endif /* __NETNS_BPF_H__ */
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index e83b0818b529..c77ab9c76f7b 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -27,6 +27,7 @@
 #include <uapi/linux/btf.h>
 #include <linux/bpf_lsm.h>
 #include <linux/poll.h>
+#include <linux/bpf-netns.h>
 
 #define IS_FD_ARRAY(map) ((map)->map_type == BPF_MAP_TYPE_PERF_EVENT_ARRAY || \
 			  (map)->map_type == BPF_MAP_TYPE_CGROUP_ARRAY || \
@@ -2868,7 +2869,7 @@ static int bpf_prog_attach(const union bpf_attr *attr)
 		ret = lirc_prog_attach(attr, prog);
 		break;
 	case BPF_PROG_TYPE_FLOW_DISSECTOR:
-		ret = skb_flow_dissector_bpf_prog_attach(attr, prog);
+		ret = netns_bpf_prog_attach(attr, prog);
 		break;
 	case BPF_PROG_TYPE_CGROUP_DEVICE:
 	case BPF_PROG_TYPE_CGROUP_SKB:
@@ -2908,7 +2909,7 @@ static int bpf_prog_detach(const union bpf_attr *attr)
 	case BPF_PROG_TYPE_FLOW_DISSECTOR:
 		if (!capable(CAP_NET_ADMIN))
 			return -EPERM;
-		return skb_flow_dissector_bpf_prog_detach(attr);
+		return netns_bpf_prog_detach(attr);
 	case BPF_PROG_TYPE_CGROUP_DEVICE:
 	case BPF_PROG_TYPE_CGROUP_SKB:
 	case BPF_PROG_TYPE_CGROUP_SOCK:
@@ -2961,7 +2962,7 @@ static int bpf_prog_query(const union bpf_attr *attr,
 	case BPF_LIRC_MODE2:
 		return lirc_prog_query(attr, uattr);
 	case BPF_FLOW_DISSECTOR:
-		return skb_flow_dissector_prog_query(attr, uattr);
+		return netns_bpf_prog_query(attr, uattr);
 	default:
 		return -EINVAL;
 	}
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index b64a44a083fd..6c1b8e43d611 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -31,8 +31,10 @@
 #include <net/netfilter/nf_conntrack_core.h>
 #include <net/netfilter/nf_conntrack_labels.h>
 #endif
+#include <linux/bpf-netns.h>
 
-static DEFINE_MUTEX(flow_dissector_mutex);
+/* Protects updates to netns_bpf */
+DEFINE_MUTEX(netns_bpf_mutex);
 
 static void dissector_set_key(struct flow_dissector *flow_dissector,
 			      enum flow_dissector_key_id key_id)
@@ -70,23 +72,28 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
 }
 EXPORT_SYMBOL(skb_flow_dissector_init);
 
-int skb_flow_dissector_prog_query(const union bpf_attr *attr,
-				  union bpf_attr __user *uattr)
+int netns_bpf_prog_query(const union bpf_attr *attr,
+			 union bpf_attr __user *uattr)
 {
 	__u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
 	u32 prog_id, prog_cnt = 0, flags = 0;
+	enum netns_bpf_attach_type type;
 	struct bpf_prog *attached;
 	struct net *net;
 
 	if (attr->query.query_flags)
 		return -EINVAL;
 
+	type = to_netns_bpf_attach_type(attr->query.attach_type);
+	if (type < 0)
+		return -EINVAL;
+
 	net = get_net_ns_by_fd(attr->query.target_fd);
 	if (IS_ERR(net))
 		return PTR_ERR(net);
 
 	rcu_read_lock();
-	attached = rcu_dereference(net->flow_dissector_prog);
+	attached = rcu_dereference(net->bpf.progs[type]);
 	if (attached) {
 		prog_cnt = 1;
 		prog_id = attached->aux->id;
@@ -112,6 +119,7 @@ int skb_flow_dissector_prog_query(const union bpf_attr *attr,
 static int flow_dissector_bpf_prog_attach(struct net *net,
 					  struct bpf_prog *prog)
 {
+	enum netns_bpf_attach_type type = NETNS_BPF_FLOW_DISSECTOR;
 	struct bpf_prog *attached;
 
 	if (net == &init_net) {
@@ -125,74 +133,97 @@ static int flow_dissector_bpf_prog_attach(struct net *net,
 		for_each_net(ns) {
 			if (ns == &init_net)
 				continue;
-			if (rcu_access_pointer(ns->flow_dissector_prog))
+			if (rcu_access_pointer(ns->bpf.progs[type]))
 				return -EEXIST;
 		}
 	} else {
 		/* Make sure root flow dissector is not attached
 		 * when attaching to the non-root namespace.
 		 */
-		if (rcu_access_pointer(init_net.flow_dissector_prog))
+		if (rcu_access_pointer(init_net.bpf.progs[type]))
 			return -EEXIST;
 	}
 
-	attached = rcu_dereference_protected(net->flow_dissector_prog,
-					     lockdep_is_held(&flow_dissector_mutex));
+	attached = rcu_dereference_protected(net->bpf.progs[type],
+					     lockdep_is_held(&netns_bpf_mutex));
 	if (attached == prog)
 		/* The same program cannot be attached twice */
 		return -EINVAL;
 
-	rcu_assign_pointer(net->flow_dissector_prog, prog);
+	rcu_assign_pointer(net->bpf.progs[type], prog);
 	if (attached)
 		bpf_prog_put(attached);
 	return 0;
 }
 
-int skb_flow_dissector_bpf_prog_attach(const union bpf_attr *attr,
-				       struct bpf_prog *prog)
+int netns_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog)
 {
+	enum netns_bpf_attach_type type;
+	struct net *net;
 	int ret;
 
-	mutex_lock(&flow_dissector_mutex);
-	ret = flow_dissector_bpf_prog_attach(current->nsproxy->net_ns, prog);
-	mutex_unlock(&flow_dissector_mutex);
+	type = to_netns_bpf_attach_type(attr->attach_type);
+	if (type < 0)
+		return -EINVAL;
+
+	net = current->nsproxy->net_ns;
+	mutex_lock(&netns_bpf_mutex);
+	switch (type) {
+	case NETNS_BPF_FLOW_DISSECTOR:
+		ret = flow_dissector_bpf_prog_attach(net, prog);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+	mutex_unlock(&netns_bpf_mutex);
 
 	return ret;
 }
 
-static int flow_dissector_bpf_prog_detach(struct net *net)
+/* Must be called with netns_bpf_mutex held. */
+static int __netns_bpf_prog_detach(struct net *net,
+				   enum netns_bpf_attach_type type)
 {
 	struct bpf_prog *attached;
 
-	mutex_lock(&flow_dissector_mutex);
-	attached = rcu_dereference_protected(net->flow_dissector_prog,
-					     lockdep_is_held(&flow_dissector_mutex));
-	if (!attached) {
-		mutex_unlock(&flow_dissector_mutex);
+	attached = rcu_dereference_protected(net->bpf.progs[type],
+					     lockdep_is_held(&netns_bpf_mutex));
+	if (!attached)
 		return -ENOENT;
-	}
-	RCU_INIT_POINTER(net->flow_dissector_prog, NULL);
+	RCU_INIT_POINTER(net->bpf.progs[type], NULL);
 	bpf_prog_put(attached);
-	mutex_unlock(&flow_dissector_mutex);
 	return 0;
 }
 
-int skb_flow_dissector_bpf_prog_detach(const union bpf_attr *attr)
+int netns_bpf_prog_detach(const union bpf_attr *attr)
 {
-	return flow_dissector_bpf_prog_detach(current->nsproxy->net_ns);
+	enum netns_bpf_attach_type type;
+	int ret;
+
+	type = to_netns_bpf_attach_type(attr->attach_type);
+	if (type < 0)
+		return -EINVAL;
+
+	mutex_lock(&netns_bpf_mutex);
+	ret = __netns_bpf_prog_detach(current->nsproxy->net_ns, type);
+	mutex_unlock(&netns_bpf_mutex);
+
+	return ret;
 }
 
-static void __net_exit flow_dissector_pernet_pre_exit(struct net *net)
+static void __net_exit netns_bpf_pernet_pre_exit(struct net *net)
 {
-	/* We're not racing with attach/detach because there are no
-	 * references to netns left when pre_exit gets called.
-	 */
-	if (rcu_access_pointer(net->flow_dissector_prog))
-		flow_dissector_bpf_prog_detach(net);
+	enum netns_bpf_attach_type type;
+
+	mutex_lock(&netns_bpf_mutex);
+	for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++)
+		__netns_bpf_prog_detach(net, type);
+	mutex_unlock(&netns_bpf_mutex);
 }
 
-static struct pernet_operations flow_dissector_pernet_ops __net_initdata = {
-	.pre_exit = flow_dissector_pernet_pre_exit,
+static struct pernet_operations netns_bpf_pernet_ops __net_initdata = {
+	.pre_exit = netns_bpf_pernet_pre_exit,
 };
 
 /**
@@ -1044,11 +1075,13 @@ bool __skb_flow_dissect(const struct net *net,
 
 	WARN_ON_ONCE(!net);
 	if (net) {
+		enum netns_bpf_attach_type type = NETNS_BPF_FLOW_DISSECTOR;
+
 		rcu_read_lock();
-		attached = rcu_dereference(init_net.flow_dissector_prog);
+		attached = rcu_dereference(init_net.bpf.progs[type]);
 
 		if (!attached)
-			attached = rcu_dereference(net->flow_dissector_prog);
+			attached = rcu_dereference(net->bpf.progs[type]);
 
 		if (attached) {
 			struct bpf_flow_keys flow_keys;
@@ -1870,6 +1903,6 @@ static int __init init_default_flow_dissectors(void)
 				flow_keys_basic_dissector_keys,
 				ARRAY_SIZE(flow_keys_basic_dissector_keys));
 
-	return register_pernet_subsys(&flow_dissector_pernet_ops);
+	return register_pernet_subsys(&netns_bpf_pernet_ops);
 }
 core_initcall(init_default_flow_dissectors);
-- 
cgit v1.2.3-59-g8ed1b


From b27f7bb590ba835b32ef122389db158e44cfda1e Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Sun, 31 May 2020 10:28:37 +0200
Subject: flow_dissector: Move out netns_bpf prog callbacks

Move functions to manage BPF programs attached to netns that are not
specific to flow dissector to a dedicated module named
bpf/net_namespace.c.

The set of functions will grow with the addition of bpf_link support for
netns attached programs. This patch prepares ground by creating a place
for it.

This is a code move with no functional changes intended.

Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200531082846.2117903-4-jakub@cloudflare.com
---
 include/net/flow_dissector.h |   6 ++
 kernel/bpf/Makefile          |   1 +
 kernel/bpf/net_namespace.c   | 133 +++++++++++++++++++++++++++++++++++++++++++
 net/core/flow_dissector.c    | 125 ++--------------------------------------
 4 files changed, 144 insertions(+), 121 deletions(-)
 create mode 100644 kernel/bpf/net_namespace.c

diff --git a/include/net/flow_dissector.h b/include/net/flow_dissector.h
index 4fb1a69c6ecf..a7eba43fe4e4 100644
--- a/include/net/flow_dissector.h
+++ b/include/net/flow_dissector.h
@@ -8,6 +8,8 @@
 #include <linux/string.h>
 #include <uapi/linux/if_ether.h>
 
+struct bpf_prog;
+struct net;
 struct sk_buff;
 
 /**
@@ -369,4 +371,8 @@ flow_dissector_init_keys(struct flow_dissector_key_control *key_control,
 	memset(key_basic, 0, sizeof(*key_basic));
 }
 
+#ifdef CONFIG_BPF_SYSCALL
+int flow_dissector_bpf_prog_attach(struct net *net, struct bpf_prog *prog);
+#endif /* CONFIG_BPF_SYSCALL */
+
 #endif
diff --git a/kernel/bpf/Makefile b/kernel/bpf/Makefile
index 8fca02f64811..1131a921e1a6 100644
--- a/kernel/bpf/Makefile
+++ b/kernel/bpf/Makefile
@@ -13,6 +13,7 @@ ifeq ($(CONFIG_NET),y)
 obj-$(CONFIG_BPF_SYSCALL) += devmap.o
 obj-$(CONFIG_BPF_SYSCALL) += cpumap.o
 obj-$(CONFIG_BPF_SYSCALL) += offload.o
+obj-$(CONFIG_BPF_SYSCALL) += net_namespace.o
 endif
 ifeq ($(CONFIG_PERF_EVENTS),y)
 obj-$(CONFIG_BPF_SYSCALL) += stackmap.o
diff --git a/kernel/bpf/net_namespace.c b/kernel/bpf/net_namespace.c
new file mode 100644
index 000000000000..b37d81450c3a
--- /dev/null
+++ b/kernel/bpf/net_namespace.c
@@ -0,0 +1,133 @@
+// SPDX-License-Identifier: GPL-2.0
+
+#include <linux/bpf.h>
+#include <linux/filter.h>
+#include <net/net_namespace.h>
+
+/*
+ * Functions to manage BPF programs attached to netns
+ */
+
+/* Protects updates to netns_bpf */
+DEFINE_MUTEX(netns_bpf_mutex);
+
+int netns_bpf_prog_query(const union bpf_attr *attr,
+			 union bpf_attr __user *uattr)
+{
+	__u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
+	u32 prog_id, prog_cnt = 0, flags = 0;
+	enum netns_bpf_attach_type type;
+	struct bpf_prog *attached;
+	struct net *net;
+
+	if (attr->query.query_flags)
+		return -EINVAL;
+
+	type = to_netns_bpf_attach_type(attr->query.attach_type);
+	if (type < 0)
+		return -EINVAL;
+
+	net = get_net_ns_by_fd(attr->query.target_fd);
+	if (IS_ERR(net))
+		return PTR_ERR(net);
+
+	rcu_read_lock();
+	attached = rcu_dereference(net->bpf.progs[type]);
+	if (attached) {
+		prog_cnt = 1;
+		prog_id = attached->aux->id;
+	}
+	rcu_read_unlock();
+
+	put_net(net);
+
+	if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)))
+		return -EFAULT;
+	if (copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt)))
+		return -EFAULT;
+
+	if (!attr->query.prog_cnt || !prog_ids || !prog_cnt)
+		return 0;
+
+	if (copy_to_user(prog_ids, &prog_id, sizeof(u32)))
+		return -EFAULT;
+
+	return 0;
+}
+
+int netns_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+	enum netns_bpf_attach_type type;
+	struct net *net;
+	int ret;
+
+	type = to_netns_bpf_attach_type(attr->attach_type);
+	if (type < 0)
+		return -EINVAL;
+
+	net = current->nsproxy->net_ns;
+	mutex_lock(&netns_bpf_mutex);
+	switch (type) {
+	case NETNS_BPF_FLOW_DISSECTOR:
+		ret = flow_dissector_bpf_prog_attach(net, prog);
+		break;
+	default:
+		ret = -EINVAL;
+		break;
+	}
+	mutex_unlock(&netns_bpf_mutex);
+
+	return ret;
+}
+
+/* Must be called with netns_bpf_mutex held. */
+static int __netns_bpf_prog_detach(struct net *net,
+				   enum netns_bpf_attach_type type)
+{
+	struct bpf_prog *attached;
+
+	attached = rcu_dereference_protected(net->bpf.progs[type],
+					     lockdep_is_held(&netns_bpf_mutex));
+	if (!attached)
+		return -ENOENT;
+	RCU_INIT_POINTER(net->bpf.progs[type], NULL);
+	bpf_prog_put(attached);
+	return 0;
+}
+
+int netns_bpf_prog_detach(const union bpf_attr *attr)
+{
+	enum netns_bpf_attach_type type;
+	int ret;
+
+	type = to_netns_bpf_attach_type(attr->attach_type);
+	if (type < 0)
+		return -EINVAL;
+
+	mutex_lock(&netns_bpf_mutex);
+	ret = __netns_bpf_prog_detach(current->nsproxy->net_ns, type);
+	mutex_unlock(&netns_bpf_mutex);
+
+	return ret;
+}
+
+static void __net_exit netns_bpf_pernet_pre_exit(struct net *net)
+{
+	enum netns_bpf_attach_type type;
+
+	mutex_lock(&netns_bpf_mutex);
+	for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++)
+		__netns_bpf_prog_detach(net, type);
+	mutex_unlock(&netns_bpf_mutex);
+}
+
+static struct pernet_operations netns_bpf_pernet_ops __net_initdata = {
+	.pre_exit = netns_bpf_pernet_pre_exit,
+};
+
+static int __init netns_bpf_init(void)
+{
+	return register_pernet_subsys(&netns_bpf_pernet_ops);
+}
+
+subsys_initcall(netns_bpf_init);
diff --git a/net/core/flow_dissector.c b/net/core/flow_dissector.c
index 6c1b8e43d611..d02df0b6d0d9 100644
--- a/net/core/flow_dissector.c
+++ b/net/core/flow_dissector.c
@@ -33,9 +33,6 @@
 #endif
 #include <linux/bpf-netns.h>
 
-/* Protects updates to netns_bpf */
-DEFINE_MUTEX(netns_bpf_mutex);
-
 static void dissector_set_key(struct flow_dissector *flow_dissector,
 			      enum flow_dissector_key_id key_id)
 {
@@ -72,52 +69,8 @@ void skb_flow_dissector_init(struct flow_dissector *flow_dissector,
 }
 EXPORT_SYMBOL(skb_flow_dissector_init);
 
-int netns_bpf_prog_query(const union bpf_attr *attr,
-			 union bpf_attr __user *uattr)
-{
-	__u32 __user *prog_ids = u64_to_user_ptr(attr->query.prog_ids);
-	u32 prog_id, prog_cnt = 0, flags = 0;
-	enum netns_bpf_attach_type type;
-	struct bpf_prog *attached;
-	struct net *net;
-
-	if (attr->query.query_flags)
-		return -EINVAL;
-
-	type = to_netns_bpf_attach_type(attr->query.attach_type);
-	if (type < 0)
-		return -EINVAL;
-
-	net = get_net_ns_by_fd(attr->query.target_fd);
-	if (IS_ERR(net))
-		return PTR_ERR(net);
-
-	rcu_read_lock();
-	attached = rcu_dereference(net->bpf.progs[type]);
-	if (attached) {
-		prog_cnt = 1;
-		prog_id = attached->aux->id;
-	}
-	rcu_read_unlock();
-
-	put_net(net);
-
-	if (copy_to_user(&uattr->query.attach_flags, &flags, sizeof(flags)))
-		return -EFAULT;
-	if (copy_to_user(&uattr->query.prog_cnt, &prog_cnt, sizeof(prog_cnt)))
-		return -EFAULT;
-
-	if (!attr->query.prog_cnt || !prog_ids || !prog_cnt)
-		return 0;
-
-	if (copy_to_user(prog_ids, &prog_id, sizeof(u32)))
-		return -EFAULT;
-
-	return 0;
-}
-
-static int flow_dissector_bpf_prog_attach(struct net *net,
-					  struct bpf_prog *prog)
+#ifdef CONFIG_BPF_SYSCALL
+int flow_dissector_bpf_prog_attach(struct net *net, struct bpf_prog *prog)
 {
 	enum netns_bpf_attach_type type = NETNS_BPF_FLOW_DISSECTOR;
 	struct bpf_prog *attached;
@@ -155,76 +108,7 @@ static int flow_dissector_bpf_prog_attach(struct net *net,
 		bpf_prog_put(attached);
 	return 0;
 }
-
-int netns_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog)
-{
-	enum netns_bpf_attach_type type;
-	struct net *net;
-	int ret;
-
-	type = to_netns_bpf_attach_type(attr->attach_type);
-	if (type < 0)
-		return -EINVAL;
-
-	net = current->nsproxy->net_ns;
-	mutex_lock(&netns_bpf_mutex);
-	switch (type) {
-	case NETNS_BPF_FLOW_DISSECTOR:
-		ret = flow_dissector_bpf_prog_attach(net, prog);
-		break;
-	default:
-		ret = -EINVAL;
-		break;
-	}
-	mutex_unlock(&netns_bpf_mutex);
-
-	return ret;
-}
-
-/* Must be called with netns_bpf_mutex held. */
-static int __netns_bpf_prog_detach(struct net *net,
-				   enum netns_bpf_attach_type type)
-{
-	struct bpf_prog *attached;
-
-	attached = rcu_dereference_protected(net->bpf.progs[type],
-					     lockdep_is_held(&netns_bpf_mutex));
-	if (!attached)
-		return -ENOENT;
-	RCU_INIT_POINTER(net->bpf.progs[type], NULL);
-	bpf_prog_put(attached);
-	return 0;
-}
-
-int netns_bpf_prog_detach(const union bpf_attr *attr)
-{
-	enum netns_bpf_attach_type type;
-	int ret;
-
-	type = to_netns_bpf_attach_type(attr->attach_type);
-	if (type < 0)
-		return -EINVAL;
-
-	mutex_lock(&netns_bpf_mutex);
-	ret = __netns_bpf_prog_detach(current->nsproxy->net_ns, type);
-	mutex_unlock(&netns_bpf_mutex);
-
-	return ret;
-}
-
-static void __net_exit netns_bpf_pernet_pre_exit(struct net *net)
-{
-	enum netns_bpf_attach_type type;
-
-	mutex_lock(&netns_bpf_mutex);
-	for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++)
-		__netns_bpf_prog_detach(net, type);
-	mutex_unlock(&netns_bpf_mutex);
-}
-
-static struct pernet_operations netns_bpf_pernet_ops __net_initdata = {
-	.pre_exit = netns_bpf_pernet_pre_exit,
-};
+#endif /* CONFIG_BPF_SYSCALL */
 
 /**
  * __skb_flow_get_ports - extract the upper layer ports and return them
@@ -1902,7 +1786,6 @@ static int __init init_default_flow_dissectors(void)
 	skb_flow_dissector_init(&flow_keys_basic_dissector,
 				flow_keys_basic_dissector_keys,
 				ARRAY_SIZE(flow_keys_basic_dissector_keys));
-
-	return register_pernet_subsys(&netns_bpf_pernet_ops);
+	return 0;
 }
 core_initcall(init_default_flow_dissectors);
-- 
cgit v1.2.3-59-g8ed1b


From 7f045a49fee04b5662cbdeaf0838f9322ae8c63a Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Sun, 31 May 2020 10:28:38 +0200
Subject: bpf: Add link-based BPF program attachment to network namespace

Extend bpf() syscall subcommands that operate on bpf_link, that is
LINK_CREATE, LINK_UPDATE, OBJ_GET_INFO, to accept attach types tied to
network namespaces (only flow dissector at the moment).

Link-based and prog-based attachment can be used interchangeably, but only
one can exist at a time. Attempts to attach a link when a prog is already
attached directly, and the other way around, will be met with -EEXIST.
Attempts to detach a program when link exists result in -EINVAL.

Attachment of multiple links of same attach type to one netns is not
supported with the intention to lift the restriction when a use-case
presents itself. Because of that link create returns -E2BIG when trying to
create another netns link, when one already exists.

Link-based attachments to netns don't keep a netns alive by holding a ref
to it. Instead links get auto-detached from netns when the latter is being
destroyed, using a pernet pre_exit callback.

When auto-detached, link lives in defunct state as long there are open FDs
for it. -ENOLINK is returned if a user tries to update a defunct link.

Because bpf_link to netns doesn't hold a ref to struct net, special care is
taken when releasing, updating, or filling link info. The netns might be
getting torn down when any of these link operations are in progress. That
is why auto-detach and update/release/fill_info are synchronized by the
same mutex. Also, link ops have to always check if auto-detach has not
happened yet and if netns is still alive (refcnt > 0).

Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200531082846.2117903-5-jakub@cloudflare.com
---
 include/linux/bpf-netns.h      |   8 ++
 include/linux/bpf_types.h      |   3 +
 include/net/netns/bpf.h        |   1 +
 include/uapi/linux/bpf.h       |   5 +
 kernel/bpf/net_namespace.c     | 244 ++++++++++++++++++++++++++++++++++++++++-
 kernel/bpf/syscall.c           |   3 +
 tools/include/uapi/linux/bpf.h |   5 +
 7 files changed, 267 insertions(+), 2 deletions(-)

diff --git a/include/linux/bpf-netns.h b/include/linux/bpf-netns.h
index f3aec3d79824..4052d649f36d 100644
--- a/include/linux/bpf-netns.h
+++ b/include/linux/bpf-netns.h
@@ -34,6 +34,8 @@ int netns_bpf_prog_query(const union bpf_attr *attr,
 int netns_bpf_prog_attach(const union bpf_attr *attr,
 			  struct bpf_prog *prog);
 int netns_bpf_prog_detach(const union bpf_attr *attr);
+int netns_bpf_link_create(const union bpf_attr *attr,
+			  struct bpf_prog *prog);
 #else
 static inline int netns_bpf_prog_query(const union bpf_attr *attr,
 				       union bpf_attr __user *uattr)
@@ -51,6 +53,12 @@ static inline int netns_bpf_prog_detach(const union bpf_attr *attr)
 {
 	return -EOPNOTSUPP;
 }
+
+static inline int netns_bpf_link_create(const union bpf_attr *attr,
+					struct bpf_prog *prog)
+{
+	return -EOPNOTSUPP;
+}
 #endif
 
 #endif /* _BPF_NETNS_H */
diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h
index fa8e1b552acd..a18ae82a298a 100644
--- a/include/linux/bpf_types.h
+++ b/include/linux/bpf_types.h
@@ -126,3 +126,6 @@ BPF_LINK_TYPE(BPF_LINK_TYPE_TRACING, tracing)
 BPF_LINK_TYPE(BPF_LINK_TYPE_CGROUP, cgroup)
 #endif
 BPF_LINK_TYPE(BPF_LINK_TYPE_ITER, iter)
+#ifdef CONFIG_NET
+BPF_LINK_TYPE(BPF_LINK_TYPE_NETNS, netns)
+#endif
diff --git a/include/net/netns/bpf.h b/include/net/netns/bpf.h
index a858d1c5b166..a8dce2a380c8 100644
--- a/include/net/netns/bpf.h
+++ b/include/net/netns/bpf.h
@@ -12,6 +12,7 @@ struct bpf_prog;
 
 struct netns_bpf {
 	struct bpf_prog __rcu *progs[MAX_NETNS_BPF_ATTACH_TYPE];
+	struct bpf_link *links[MAX_NETNS_BPF_ATTACH_TYPE];
 };
 
 #endif /* __NETNS_BPF_H__ */
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index f862a58fb567..b9ed9f14f2a2 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -237,6 +237,7 @@ enum bpf_link_type {
 	BPF_LINK_TYPE_TRACING = 2,
 	BPF_LINK_TYPE_CGROUP = 3,
 	BPF_LINK_TYPE_ITER = 4,
+	BPF_LINK_TYPE_NETNS = 5,
 
 	MAX_BPF_LINK_TYPE,
 };
@@ -3839,6 +3840,10 @@ struct bpf_link_info {
 			__u64 cgroup_id;
 			__u32 attach_type;
 		} cgroup;
+		struct  {
+			__u32 netns_ino;
+			__u32 attach_type;
+		} netns;
 	};
 } __attribute__((aligned(8)));
 
diff --git a/kernel/bpf/net_namespace.c b/kernel/bpf/net_namespace.c
index b37d81450c3a..78cf061f8179 100644
--- a/kernel/bpf/net_namespace.c
+++ b/kernel/bpf/net_namespace.c
@@ -8,9 +8,140 @@
  * Functions to manage BPF programs attached to netns
  */
 
+struct bpf_netns_link {
+	struct bpf_link	link;
+	enum bpf_attach_type type;
+	enum netns_bpf_attach_type netns_type;
+
+	/* We don't hold a ref to net in order to auto-detach the link
+	 * when netns is going away. Instead we rely on pernet
+	 * pre_exit callback to clear this pointer. Must be accessed
+	 * with netns_bpf_mutex held.
+	 */
+	struct net *net;
+};
+
 /* Protects updates to netns_bpf */
 DEFINE_MUTEX(netns_bpf_mutex);
 
+/* Must be called with netns_bpf_mutex held. */
+static void __net_exit bpf_netns_link_auto_detach(struct bpf_link *link)
+{
+	struct bpf_netns_link *net_link =
+		container_of(link, struct bpf_netns_link, link);
+
+	net_link->net = NULL;
+}
+
+static void bpf_netns_link_release(struct bpf_link *link)
+{
+	struct bpf_netns_link *net_link =
+		container_of(link, struct bpf_netns_link, link);
+	enum netns_bpf_attach_type type = net_link->netns_type;
+	struct net *net;
+
+	/* Link auto-detached by dying netns. */
+	if (!net_link->net)
+		return;
+
+	mutex_lock(&netns_bpf_mutex);
+
+	/* Recheck after potential sleep. We can race with cleanup_net
+	 * here, but if we see a non-NULL struct net pointer pre_exit
+	 * has not happened yet and will block on netns_bpf_mutex.
+	 */
+	net = net_link->net;
+	if (!net)
+		goto out_unlock;
+
+	net->bpf.links[type] = NULL;
+	RCU_INIT_POINTER(net->bpf.progs[type], NULL);
+
+out_unlock:
+	mutex_unlock(&netns_bpf_mutex);
+}
+
+static void bpf_netns_link_dealloc(struct bpf_link *link)
+{
+	struct bpf_netns_link *net_link =
+		container_of(link, struct bpf_netns_link, link);
+
+	kfree(net_link);
+}
+
+static int bpf_netns_link_update_prog(struct bpf_link *link,
+				      struct bpf_prog *new_prog,
+				      struct bpf_prog *old_prog)
+{
+	struct bpf_netns_link *net_link =
+		container_of(link, struct bpf_netns_link, link);
+	enum netns_bpf_attach_type type = net_link->netns_type;
+	struct net *net;
+	int ret = 0;
+
+	if (old_prog && old_prog != link->prog)
+		return -EPERM;
+	if (new_prog->type != link->prog->type)
+		return -EINVAL;
+
+	mutex_lock(&netns_bpf_mutex);
+
+	net = net_link->net;
+	if (!net || !check_net(net)) {
+		/* Link auto-detached or netns dying */
+		ret = -ENOLINK;
+		goto out_unlock;
+	}
+
+	old_prog = xchg(&link->prog, new_prog);
+	rcu_assign_pointer(net->bpf.progs[type], new_prog);
+	bpf_prog_put(old_prog);
+
+out_unlock:
+	mutex_unlock(&netns_bpf_mutex);
+	return ret;
+}
+
+static int bpf_netns_link_fill_info(const struct bpf_link *link,
+				    struct bpf_link_info *info)
+{
+	const struct bpf_netns_link *net_link =
+		container_of(link, struct bpf_netns_link, link);
+	unsigned int inum = 0;
+	struct net *net;
+
+	mutex_lock(&netns_bpf_mutex);
+	net = net_link->net;
+	if (net && check_net(net))
+		inum = net->ns.inum;
+	mutex_unlock(&netns_bpf_mutex);
+
+	info->netns.netns_ino = inum;
+	info->netns.attach_type = net_link->type;
+	return 0;
+}
+
+static void bpf_netns_link_show_fdinfo(const struct bpf_link *link,
+				       struct seq_file *seq)
+{
+	struct bpf_link_info info = {};
+
+	bpf_netns_link_fill_info(link, &info);
+	seq_printf(seq,
+		   "netns_ino:\t%u\n"
+		   "attach_type:\t%u\n",
+		   info.netns.netns_ino,
+		   info.netns.attach_type);
+}
+
+static const struct bpf_link_ops bpf_netns_link_ops = {
+	.release = bpf_netns_link_release,
+	.dealloc = bpf_netns_link_dealloc,
+	.update_prog = bpf_netns_link_update_prog,
+	.fill_link_info = bpf_netns_link_fill_info,
+	.show_fdinfo = bpf_netns_link_show_fdinfo,
+};
+
 int netns_bpf_prog_query(const union bpf_attr *attr,
 			 union bpf_attr __user *uattr)
 {
@@ -67,6 +198,13 @@ int netns_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog)
 
 	net = current->nsproxy->net_ns;
 	mutex_lock(&netns_bpf_mutex);
+
+	/* Attaching prog directly is not compatible with links */
+	if (net->bpf.links[type]) {
+		ret = -EEXIST;
+		goto out_unlock;
+	}
+
 	switch (type) {
 	case NETNS_BPF_FLOW_DISSECTOR:
 		ret = flow_dissector_bpf_prog_attach(net, prog);
@@ -75,6 +213,7 @@ int netns_bpf_prog_attach(const union bpf_attr *attr, struct bpf_prog *prog)
 		ret = -EINVAL;
 		break;
 	}
+out_unlock:
 	mutex_unlock(&netns_bpf_mutex);
 
 	return ret;
@@ -86,6 +225,10 @@ static int __netns_bpf_prog_detach(struct net *net,
 {
 	struct bpf_prog *attached;
 
+	/* Progs attached via links cannot be detached */
+	if (net->bpf.links[type])
+		return -EINVAL;
+
 	attached = rcu_dereference_protected(net->bpf.progs[type],
 					     lockdep_is_held(&netns_bpf_mutex));
 	if (!attached)
@@ -111,13 +254,110 @@ int netns_bpf_prog_detach(const union bpf_attr *attr)
 	return ret;
 }
 
+static int netns_bpf_link_attach(struct net *net, struct bpf_link *link,
+				 enum netns_bpf_attach_type type)
+{
+	struct bpf_prog *prog;
+	int err;
+
+	mutex_lock(&netns_bpf_mutex);
+
+	/* Allow attaching only one prog or link for now */
+	if (net->bpf.links[type]) {
+		err = -E2BIG;
+		goto out_unlock;
+	}
+	/* Links are not compatible with attaching prog directly */
+	prog = rcu_dereference_protected(net->bpf.progs[type],
+					 lockdep_is_held(&netns_bpf_mutex));
+	if (prog) {
+		err = -EEXIST;
+		goto out_unlock;
+	}
+
+	switch (type) {
+	case NETNS_BPF_FLOW_DISSECTOR:
+		err = flow_dissector_bpf_prog_attach(net, link->prog);
+		break;
+	default:
+		err = -EINVAL;
+		break;
+	}
+	if (err)
+		goto out_unlock;
+
+	net->bpf.links[type] = link;
+
+out_unlock:
+	mutex_unlock(&netns_bpf_mutex);
+	return err;
+}
+
+int netns_bpf_link_create(const union bpf_attr *attr, struct bpf_prog *prog)
+{
+	enum netns_bpf_attach_type netns_type;
+	struct bpf_link_primer link_primer;
+	struct bpf_netns_link *net_link;
+	enum bpf_attach_type type;
+	struct net *net;
+	int err;
+
+	if (attr->link_create.flags)
+		return -EINVAL;
+
+	type = attr->link_create.attach_type;
+	netns_type = to_netns_bpf_attach_type(type);
+	if (netns_type < 0)
+		return -EINVAL;
+
+	net = get_net_ns_by_fd(attr->link_create.target_fd);
+	if (IS_ERR(net))
+		return PTR_ERR(net);
+
+	net_link = kzalloc(sizeof(*net_link), GFP_USER);
+	if (!net_link) {
+		err = -ENOMEM;
+		goto out_put_net;
+	}
+	bpf_link_init(&net_link->link, BPF_LINK_TYPE_NETNS,
+		      &bpf_netns_link_ops, prog);
+	net_link->net = net;
+	net_link->type = type;
+	net_link->netns_type = netns_type;
+
+	err = bpf_link_prime(&net_link->link, &link_primer);
+	if (err) {
+		kfree(net_link);
+		goto out_put_net;
+	}
+
+	err = netns_bpf_link_attach(net, &net_link->link, netns_type);
+	if (err) {
+		bpf_link_cleanup(&link_primer);
+		goto out_put_net;
+	}
+
+	put_net(net);
+	return bpf_link_settle(&link_primer);
+
+out_put_net:
+	put_net(net);
+	return err;
+}
+
 static void __net_exit netns_bpf_pernet_pre_exit(struct net *net)
 {
 	enum netns_bpf_attach_type type;
+	struct bpf_link *link;
 
 	mutex_lock(&netns_bpf_mutex);
-	for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++)
-		__netns_bpf_prog_detach(net, type);
+	for (type = 0; type < MAX_NETNS_BPF_ATTACH_TYPE; type++) {
+		link = net->bpf.links[type];
+		if (link)
+			bpf_netns_link_auto_detach(link);
+		else
+			__netns_bpf_prog_detach(net, type);
+	}
 	mutex_unlock(&netns_bpf_mutex);
 }
 
diff --git a/kernel/bpf/syscall.c b/kernel/bpf/syscall.c
index c77ab9c76f7b..e14a842d7e0d 100644
--- a/kernel/bpf/syscall.c
+++ b/kernel/bpf/syscall.c
@@ -3887,6 +3887,9 @@ static int link_create(union bpf_attr *attr)
 	case BPF_PROG_TYPE_TRACING:
 		ret = tracing_bpf_link_attach(attr, prog);
 		break;
+	case BPF_PROG_TYPE_FLOW_DISSECTOR:
+		ret = netns_bpf_link_create(attr, prog);
+		break;
 	default:
 		ret = -EINVAL;
 	}
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index f862a58fb567..b9ed9f14f2a2 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -237,6 +237,7 @@ enum bpf_link_type {
 	BPF_LINK_TYPE_TRACING = 2,
 	BPF_LINK_TYPE_CGROUP = 3,
 	BPF_LINK_TYPE_ITER = 4,
+	BPF_LINK_TYPE_NETNS = 5,
 
 	MAX_BPF_LINK_TYPE,
 };
@@ -3839,6 +3840,10 @@ struct bpf_link_info {
 			__u64 cgroup_id;
 			__u32 attach_type;
 		} cgroup;
+		struct  {
+			__u32 netns_ino;
+			__u32 attach_type;
+		} netns;
 	};
 } __attribute__((aligned(8)));
 
-- 
cgit v1.2.3-59-g8ed1b


From 0c047ecbb7bab4c1d2136f5f04bb47a66a9a12b8 Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Sun, 31 May 2020 10:28:39 +0200
Subject: bpf, cgroup: Return ENOLINK for auto-detached links on update

Failure to update a bpf_link because it has been auto-detached by a dying
cgroup currently results in EINVAL error, even though the arguments passed
to bpf() syscall are not wrong.

bpf_links attaching to netns in this case will return ENOLINK, which
carries the message that the link is no longer attached to anything.

Change cgroup bpf_links to do the same to keep the uAPI errors consistent.

Fixes: 0c991ebc8c69 ("bpf: Implement bpf_prog replacement for an active bpf_cgroup_link")
Suggested-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200531082846.2117903-6-jakub@cloudflare.com
---
 kernel/bpf/cgroup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/bpf/cgroup.c b/kernel/bpf/cgroup.c
index 5c0e964105ac..fdf7836750a3 100644
--- a/kernel/bpf/cgroup.c
+++ b/kernel/bpf/cgroup.c
@@ -595,7 +595,7 @@ static int cgroup_bpf_replace(struct bpf_link *link, struct bpf_prog *new_prog,
 	mutex_lock(&cgroup_mutex);
 	/* link might have been auto-released by dying cgroup, so fail */
 	if (!cg_link->cgroup) {
-		ret = -EINVAL;
+		ret = -ENOLINK;
 		goto out_unlock;
 	}
 	if (old_prog && link->prog != old_prog) {
-- 
cgit v1.2.3-59-g8ed1b


From d60d81acc2c180e33244857e35ef60072573b000 Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Sun, 31 May 2020 10:28:40 +0200
Subject: libbpf: Add support for bpf_link-based netns attachment

Add bpf_program__attach_nets(), which uses LINK_CREATE subcommand to create
an FD-based kernel bpf_link, for attach types tied to network namespace,
that is BPF_FLOW_DISSECTOR for the moment.

Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200531082846.2117903-7-jakub@cloudflare.com
---
 tools/lib/bpf/libbpf.c   | 23 ++++++++++++++++++-----
 tools/lib/bpf/libbpf.h   |  2 ++
 tools/lib/bpf/libbpf.map |  1 +
 3 files changed, 21 insertions(+), 5 deletions(-)

diff --git a/tools/lib/bpf/libbpf.c b/tools/lib/bpf/libbpf.c
index 85d4f1c5fc52..7f01be2b88b8 100644
--- a/tools/lib/bpf/libbpf.c
+++ b/tools/lib/bpf/libbpf.c
@@ -7896,8 +7896,9 @@ static struct bpf_link *attach_iter(const struct bpf_sec_def *sec,
 	return bpf_program__attach_iter(prog, NULL);
 }
 
-struct bpf_link *
-bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd)
+static struct bpf_link *
+bpf_program__attach_fd(struct bpf_program *prog, int target_fd,
+		       const char *target_name)
 {
 	enum bpf_attach_type attach_type;
 	char errmsg[STRERR_BUFSIZE];
@@ -7917,12 +7918,12 @@ bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd)
 	link->detach = &bpf_link__detach_fd;
 
 	attach_type = bpf_program__get_expected_attach_type(prog);
-	link_fd = bpf_link_create(prog_fd, cgroup_fd, attach_type, NULL);
+	link_fd = bpf_link_create(prog_fd, target_fd, attach_type, NULL);
 	if (link_fd < 0) {
 		link_fd = -errno;
 		free(link);
-		pr_warn("program '%s': failed to attach to cgroup: %s\n",
-			bpf_program__title(prog, false),
+		pr_warn("program '%s': failed to attach to %s: %s\n",
+			bpf_program__title(prog, false), target_name,
 			libbpf_strerror_r(link_fd, errmsg, sizeof(errmsg)));
 		return ERR_PTR(link_fd);
 	}
@@ -7930,6 +7931,18 @@ bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd)
 	return link;
 }
 
+struct bpf_link *
+bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd)
+{
+	return bpf_program__attach_fd(prog, cgroup_fd, "cgroup");
+}
+
+struct bpf_link *
+bpf_program__attach_netns(struct bpf_program *prog, int netns_fd)
+{
+	return bpf_program__attach_fd(prog, netns_fd, "netns");
+}
+
 struct bpf_link *
 bpf_program__attach_iter(struct bpf_program *prog,
 			 const struct bpf_iter_attach_opts *opts)
diff --git a/tools/lib/bpf/libbpf.h b/tools/lib/bpf/libbpf.h
index 8528a02d5af8..334437af3014 100644
--- a/tools/lib/bpf/libbpf.h
+++ b/tools/lib/bpf/libbpf.h
@@ -253,6 +253,8 @@ LIBBPF_API struct bpf_link *
 bpf_program__attach_lsm(struct bpf_program *prog);
 LIBBPF_API struct bpf_link *
 bpf_program__attach_cgroup(struct bpf_program *prog, int cgroup_fd);
+LIBBPF_API struct bpf_link *
+bpf_program__attach_netns(struct bpf_program *prog, int netns_fd);
 
 struct bpf_map;
 
diff --git a/tools/lib/bpf/libbpf.map b/tools/lib/bpf/libbpf.map
index c18860200abb..f732c77b7ed0 100644
--- a/tools/lib/bpf/libbpf.map
+++ b/tools/lib/bpf/libbpf.map
@@ -262,6 +262,7 @@ LIBBPF_0.0.9 {
 		bpf_link_get_fd_by_id;
 		bpf_link_get_next_id;
 		bpf_program__attach_iter;
+		bpf_program__attach_netns;
 		perf_buffer__consume;
 		ring_buffer__add;
 		ring_buffer__consume;
-- 
cgit v1.2.3-59-g8ed1b


From be6e19818ba626eb1b354490aee40a2cfc1a219f Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Sun, 31 May 2020 10:28:41 +0200
Subject: bpftool: Extract helpers for showing link attach type

Code for printing link attach_type is duplicated in a couple of places, and
likely will be duplicated for future link types as well. Create helpers to
prevent duplication.

Suggested-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200531082846.2117903-8-jakub@cloudflare.com
---
 tools/bpf/bpftool/link.c | 44 ++++++++++++++++++++++----------------------
 1 file changed, 22 insertions(+), 22 deletions(-)

diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index 670a561dc31b..1ff416eff3d7 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -62,6 +62,15 @@ show_link_header_json(struct bpf_link_info *info, json_writer_t *wtr)
 	jsonw_uint_field(json_wtr, "prog_id", info->prog_id);
 }
 
+static void show_link_attach_type_json(__u32 attach_type, json_writer_t *wtr)
+{
+	if (attach_type < ARRAY_SIZE(attach_type_name))
+		jsonw_string_field(wtr, "attach_type",
+				   attach_type_name[attach_type]);
+	else
+		jsonw_uint_field(wtr, "attach_type", attach_type);
+}
+
 static int get_prog_info(int prog_id, struct bpf_prog_info *info)
 {
 	__u32 len = sizeof(*info);
@@ -105,22 +114,13 @@ static int show_link_close_json(int fd, struct bpf_link_info *info)
 			jsonw_uint_field(json_wtr, "prog_type",
 					 prog_info.type);
 
-		if (info->tracing.attach_type < ARRAY_SIZE(attach_type_name))
-			jsonw_string_field(json_wtr, "attach_type",
-			       attach_type_name[info->tracing.attach_type]);
-		else
-			jsonw_uint_field(json_wtr, "attach_type",
-					 info->tracing.attach_type);
+		show_link_attach_type_json(info->tracing.attach_type,
+					   json_wtr);
 		break;
 	case BPF_LINK_TYPE_CGROUP:
 		jsonw_lluint_field(json_wtr, "cgroup_id",
 				   info->cgroup.cgroup_id);
-		if (info->cgroup.attach_type < ARRAY_SIZE(attach_type_name))
-			jsonw_string_field(json_wtr, "attach_type",
-			       attach_type_name[info->cgroup.attach_type]);
-		else
-			jsonw_uint_field(json_wtr, "attach_type",
-					 info->cgroup.attach_type);
+		show_link_attach_type_json(info->cgroup.attach_type, json_wtr);
 		break;
 	default:
 		break;
@@ -153,6 +153,14 @@ static void show_link_header_plain(struct bpf_link_info *info)
 	printf("prog %u  ", info->prog_id);
 }
 
+static void show_link_attach_type_plain(__u32 attach_type)
+{
+	if (attach_type < ARRAY_SIZE(attach_type_name))
+		printf("attach_type %s  ", attach_type_name[attach_type]);
+	else
+		printf("attach_type %u  ", attach_type);
+}
+
 static int show_link_close_plain(int fd, struct bpf_link_info *info)
 {
 	struct bpf_prog_info prog_info;
@@ -176,19 +184,11 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info)
 		else
 			printf("\n\tprog_type %u  ", prog_info.type);
 
-		if (info->tracing.attach_type < ARRAY_SIZE(attach_type_name))
-			printf("attach_type %s  ",
-			       attach_type_name[info->tracing.attach_type]);
-		else
-			printf("attach_type %u  ", info->tracing.attach_type);
+		show_link_attach_type_plain(info->tracing.attach_type);
 		break;
 	case BPF_LINK_TYPE_CGROUP:
 		printf("\n\tcgroup_id %zu  ", (size_t)info->cgroup.cgroup_id);
-		if (info->cgroup.attach_type < ARRAY_SIZE(attach_type_name))
-			printf("attach_type %s  ",
-			       attach_type_name[info->cgroup.attach_type]);
-		else
-			printf("attach_type %u  ", info->cgroup.attach_type);
+		show_link_attach_type_plain(info->cgroup.attach_type);
 		break;
 	default:
 		break;
-- 
cgit v1.2.3-59-g8ed1b


From e948947a6e111b3d4bbe538105ee2f3611e032ad Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Sun, 31 May 2020 10:28:42 +0200
Subject: bpftool: Support link show for netns-attached links

Make `bpf link show` aware of new link type, that is links attached to
netns. When listing netns-attached links, display netns inode number as its
identifier and link attach type.

Sample session:

  # readlink /proc/self/ns/net
  net:[4026532251]
  # bpftool prog show
  357: flow_dissector  tag a04f5eef06a7f555  gpl
          loaded_at 2020-05-30T16:53:51+0200  uid 0
          xlated 16B  jited 37B  memlock 4096B
  358: flow_dissector  tag a04f5eef06a7f555  gpl
          loaded_at 2020-05-30T16:53:51+0200  uid 0
          xlated 16B  jited 37B  memlock 4096B
  # bpftool link show
  108: netns  prog 357
          netns_ino 4026532251  attach_type flow_dissector
  # bpftool link -jp show
  [{
          "id": 108,
          "type": "netns",
          "prog_id": 357,
          "netns_ino": 4026532251,
          "attach_type": "flow_dissector"
      }
  ]

  (... after netns is gone ...)

  # bpftool link show
  108: netns  prog 357
          netns_ino 0  attach_type flow_dissector
  # bpftool link -jp show
  [{
          "id": 108,
          "type": "netns",
          "prog_id": 357,
          "netns_ino": 0,
          "attach_type": "flow_dissector"
      }
  ]

Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200531082846.2117903-9-jakub@cloudflare.com
---
 tools/bpf/bpftool/link.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/tools/bpf/bpftool/link.c b/tools/bpf/bpftool/link.c
index 1ff416eff3d7..fca57ee8fafe 100644
--- a/tools/bpf/bpftool/link.c
+++ b/tools/bpf/bpftool/link.c
@@ -17,6 +17,7 @@ static const char * const link_type_name[] = {
 	[BPF_LINK_TYPE_TRACING]			= "tracing",
 	[BPF_LINK_TYPE_CGROUP]			= "cgroup",
 	[BPF_LINK_TYPE_ITER]			= "iter",
+	[BPF_LINK_TYPE_NETNS]			= "netns",
 };
 
 static int link_parse_fd(int *argc, char ***argv)
@@ -122,6 +123,11 @@ static int show_link_close_json(int fd, struct bpf_link_info *info)
 				   info->cgroup.cgroup_id);
 		show_link_attach_type_json(info->cgroup.attach_type, json_wtr);
 		break;
+	case BPF_LINK_TYPE_NETNS:
+		jsonw_uint_field(json_wtr, "netns_ino",
+				 info->netns.netns_ino);
+		show_link_attach_type_json(info->netns.attach_type, json_wtr);
+		break;
 	default:
 		break;
 	}
@@ -190,6 +196,10 @@ static int show_link_close_plain(int fd, struct bpf_link_info *info)
 		printf("\n\tcgroup_id %zu  ", (size_t)info->cgroup.cgroup_id);
 		show_link_attach_type_plain(info->cgroup.attach_type);
 		break;
+	case BPF_LINK_TYPE_NETNS:
+		printf("\n\tnetns_ino %u  ", info->netns.netns_ino);
+		show_link_attach_type_plain(info->netns.attach_type);
+		break;
 	default:
 		break;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 1f043f87bb595bbe6c7e6b291d115284840a6c33 Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Sun, 31 May 2020 10:28:43 +0200
Subject: selftests/bpf: Add tests for attaching bpf_link to netns

Extend the existing test case for flow dissector attaching to cover:

 - link creation,
 - link updates,
 - link info querying,
 - mixing links with direct prog attachment.

Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200531082846.2117903-10-jakub@cloudflare.com
---
 .../bpf/prog_tests/flow_dissector_reattach.c       | 588 +++++++++++++++++++--
 1 file changed, 551 insertions(+), 37 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
index 1f51ba66b98b..15cb554a66d8 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector_reattach.c
@@ -11,6 +11,7 @@
 #include <fcntl.h>
 #include <sched.h>
 #include <stdbool.h>
+#include <sys/stat.h>
 #include <unistd.h>
 
 #include <linux/bpf.h>
@@ -18,21 +19,30 @@
 
 #include "test_progs.h"
 
-static bool is_attached(int netns)
+static int init_net = -1;
+
+static __u32 query_attached_prog_id(int netns)
 {
-	__u32 cnt;
+	__u32 prog_ids[1] = {};
+	__u32 prog_cnt = ARRAY_SIZE(prog_ids);
 	int err;
 
-	err = bpf_prog_query(netns, BPF_FLOW_DISSECTOR, 0, NULL, NULL, &cnt);
+	err = bpf_prog_query(netns, BPF_FLOW_DISSECTOR, 0, NULL,
+			     prog_ids, &prog_cnt);
 	if (CHECK_FAIL(err)) {
 		perror("bpf_prog_query");
-		return true; /* fail-safe */
+		return 0;
 	}
 
-	return cnt > 0;
+	return prog_cnt == 1 ? prog_ids[0] : 0;
+}
+
+static bool prog_is_attached(int netns)
+{
+	return query_attached_prog_id(netns) > 0;
 }
 
-static int load_prog(void)
+static int load_prog(enum bpf_prog_type type)
 {
 	struct bpf_insn prog[] = {
 		BPF_MOV64_IMM(BPF_REG_0, BPF_OK),
@@ -40,61 +50,566 @@ static int load_prog(void)
 	};
 	int fd;
 
-	fd = bpf_load_program(BPF_PROG_TYPE_FLOW_DISSECTOR, prog,
-			      ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
+	fd = bpf_load_program(type, prog, ARRAY_SIZE(prog), "GPL", 0, NULL, 0);
 	if (CHECK_FAIL(fd < 0))
 		perror("bpf_load_program");
 
 	return fd;
 }
 
-static void do_flow_dissector_reattach(void)
+static __u32 query_prog_id(int prog)
 {
-	int prog_fd[2] = { -1, -1 };
+	struct bpf_prog_info info = {};
+	__u32 info_len = sizeof(info);
 	int err;
 
-	prog_fd[0] = load_prog();
-	if (prog_fd[0] < 0)
-		return;
+	err = bpf_obj_get_info_by_fd(prog, &info, &info_len);
+	if (CHECK_FAIL(err || info_len != sizeof(info))) {
+		perror("bpf_obj_get_info_by_fd");
+		return 0;
+	}
 
-	prog_fd[1] = load_prog();
-	if (prog_fd[1] < 0)
-		goto out_close;
+	return info.id;
+}
+
+static int unshare_net(int old_net)
+{
+	int err, new_net;
 
-	err = bpf_prog_attach(prog_fd[0], 0, BPF_FLOW_DISSECTOR, 0);
+	err = unshare(CLONE_NEWNET);
 	if (CHECK_FAIL(err)) {
-		perror("bpf_prog_attach-0");
-		goto out_close;
+		perror("unshare(CLONE_NEWNET)");
+		return -1;
+	}
+	new_net = open("/proc/self/ns/net", O_RDONLY);
+	if (CHECK_FAIL(new_net < 0)) {
+		perror("open(/proc/self/ns/net)");
+		setns(old_net, CLONE_NEWNET);
+		return -1;
 	}
+	return new_net;
+}
+
+static void test_prog_attach_prog_attach(int netns, int prog1, int prog2)
+{
+	int err;
+
+	err = bpf_prog_attach(prog1, 0, BPF_FLOW_DISSECTOR, 0);
+	if (CHECK_FAIL(err)) {
+		perror("bpf_prog_attach(prog1)");
+		return;
+	}
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
 
 	/* Expect success when attaching a different program */
-	err = bpf_prog_attach(prog_fd[1], 0, BPF_FLOW_DISSECTOR, 0);
+	err = bpf_prog_attach(prog2, 0, BPF_FLOW_DISSECTOR, 0);
 	if (CHECK_FAIL(err)) {
-		perror("bpf_prog_attach-1");
+		perror("bpf_prog_attach(prog2) #1");
 		goto out_detach;
 	}
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog2));
 
 	/* Expect failure when attaching the same program twice */
-	err = bpf_prog_attach(prog_fd[1], 0, BPF_FLOW_DISSECTOR, 0);
+	err = bpf_prog_attach(prog2, 0, BPF_FLOW_DISSECTOR, 0);
 	if (CHECK_FAIL(!err || errno != EINVAL))
-		perror("bpf_prog_attach-2");
+		perror("bpf_prog_attach(prog2) #2");
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog2));
 
 out_detach:
 	err = bpf_prog_detach(0, BPF_FLOW_DISSECTOR);
 	if (CHECK_FAIL(err))
 		perror("bpf_prog_detach");
+	CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_link_create_link_create(int netns, int prog1, int prog2)
+{
+	DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts);
+	int link1, link2;
+
+	link1 = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &opts);
+	if (CHECK_FAIL(link < 0)) {
+		perror("bpf_link_create(prog1)");
+		return;
+	}
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+	/* Expect failure creating link when another link exists */
+	errno = 0;
+	link2 = bpf_link_create(prog2, netns, BPF_FLOW_DISSECTOR, &opts);
+	if (CHECK_FAIL(link2 != -1 || errno != E2BIG))
+		perror("bpf_prog_attach(prog2) expected E2BIG");
+	if (link2 != -1)
+		close(link2);
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+	close(link1);
+	CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_prog_attach_link_create(int netns, int prog1, int prog2)
+{
+	DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts);
+	int err, link;
+
+	err = bpf_prog_attach(prog1, -1, BPF_FLOW_DISSECTOR, 0);
+	if (CHECK_FAIL(err)) {
+		perror("bpf_prog_attach(prog1)");
+		return;
+	}
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+	/* Expect failure creating link when prog attached */
+	errno = 0;
+	link = bpf_link_create(prog2, netns, BPF_FLOW_DISSECTOR, &opts);
+	if (CHECK_FAIL(link != -1 || errno != EEXIST))
+		perror("bpf_link_create(prog2) expected EEXIST");
+	if (link != -1)
+		close(link);
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+	err = bpf_prog_detach(-1, BPF_FLOW_DISSECTOR);
+	if (CHECK_FAIL(err))
+		perror("bpf_prog_detach");
+	CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_link_create_prog_attach(int netns, int prog1, int prog2)
+{
+	DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts);
+	int err, link;
+
+	link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &opts);
+	if (CHECK_FAIL(link < 0)) {
+		perror("bpf_link_create(prog1)");
+		return;
+	}
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+	/* Expect failure attaching prog when link exists */
+	errno = 0;
+	err = bpf_prog_attach(prog2, -1, BPF_FLOW_DISSECTOR, 0);
+	if (CHECK_FAIL(!err || errno != EEXIST))
+		perror("bpf_prog_attach(prog2) expected EEXIST");
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+	close(link);
+	CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_link_create_prog_detach(int netns, int prog1, int prog2)
+{
+	DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts);
+	int err, link;
+
+	link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &opts);
+	if (CHECK_FAIL(link < 0)) {
+		perror("bpf_link_create(prog1)");
+		return;
+	}
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+	/* Expect failure detaching prog when link exists */
+	errno = 0;
+	err = bpf_prog_detach(-1, BPF_FLOW_DISSECTOR);
+	if (CHECK_FAIL(!err || errno != EINVAL))
+		perror("bpf_prog_detach expected EINVAL");
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+	close(link);
+	CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_prog_attach_detach_query(int netns, int prog1, int prog2)
+{
+	int err;
+
+	err = bpf_prog_attach(prog1, 0, BPF_FLOW_DISSECTOR, 0);
+	if (CHECK_FAIL(err)) {
+		perror("bpf_prog_attach(prog1)");
+		return;
+	}
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+	err = bpf_prog_detach(0, BPF_FLOW_DISSECTOR);
+	if (CHECK_FAIL(err)) {
+		perror("bpf_prog_detach");
+		return;
+	}
+
+	/* Expect no prog attached after successful detach */
+	CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_link_create_close_query(int netns, int prog1, int prog2)
+{
+	DECLARE_LIBBPF_OPTS(bpf_link_create_opts, opts);
+	int link;
+
+	link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &opts);
+	if (CHECK_FAIL(link < 0)) {
+		perror("bpf_link_create(prog1)");
+		return;
+	}
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+	close(link);
+	/* Expect no prog attached after closing last link FD */
+	CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_link_update_no_old_prog(int netns, int prog1, int prog2)
+{
+	DECLARE_LIBBPF_OPTS(bpf_link_create_opts, create_opts);
+	DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts);
+	int err, link;
+
+	link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &create_opts);
+	if (CHECK_FAIL(link < 0)) {
+		perror("bpf_link_create(prog1)");
+		return;
+	}
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+	/* Expect success replacing the prog when old prog not specified */
+	update_opts.flags = 0;
+	update_opts.old_prog_fd = 0;
+	err = bpf_link_update(link, prog2, &update_opts);
+	if (CHECK_FAIL(err))
+		perror("bpf_link_update");
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog2));
+
+	close(link);
+	CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_link_update_replace_old_prog(int netns, int prog1, int prog2)
+{
+	DECLARE_LIBBPF_OPTS(bpf_link_create_opts, create_opts);
+	DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts);
+	int err, link;
 
+	link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &create_opts);
+	if (CHECK_FAIL(link < 0)) {
+		perror("bpf_link_create(prog1)");
+		return;
+	}
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+	/* Expect success F_REPLACE and old prog specified to succeed */
+	update_opts.flags = BPF_F_REPLACE;
+	update_opts.old_prog_fd = prog1;
+	err = bpf_link_update(link, prog2, &update_opts);
+	if (CHECK_FAIL(err))
+		perror("bpf_link_update");
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog2));
+
+	close(link);
+	CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_link_update_invalid_opts(int netns, int prog1, int prog2)
+{
+	DECLARE_LIBBPF_OPTS(bpf_link_create_opts, create_opts);
+	DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts);
+	int err, link;
+
+	link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &create_opts);
+	if (CHECK_FAIL(link < 0)) {
+		perror("bpf_link_create(prog1)");
+		return;
+	}
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+	/* Expect update to fail w/ old prog FD but w/o F_REPLACE*/
+	errno = 0;
+	update_opts.flags = 0;
+	update_opts.old_prog_fd = prog1;
+	err = bpf_link_update(link, prog2, &update_opts);
+	if (CHECK_FAIL(!err || errno != EINVAL)) {
+		perror("bpf_link_update expected EINVAL");
+		goto out_close;
+	}
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+	/* Expect update to fail on old prog FD mismatch */
+	errno = 0;
+	update_opts.flags = BPF_F_REPLACE;
+	update_opts.old_prog_fd = prog2;
+	err = bpf_link_update(link, prog2, &update_opts);
+	if (CHECK_FAIL(!err || errno != EPERM)) {
+		perror("bpf_link_update expected EPERM");
+		goto out_close;
+	}
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+	/* Expect update to fail for invalid old prog FD */
+	errno = 0;
+	update_opts.flags = BPF_F_REPLACE;
+	update_opts.old_prog_fd = -1;
+	err = bpf_link_update(link, prog2, &update_opts);
+	if (CHECK_FAIL(!err || errno != EBADF)) {
+		perror("bpf_link_update expected EBADF");
+		goto out_close;
+	}
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+	/* Expect update to fail with invalid flags */
+	errno = 0;
+	update_opts.flags = BPF_F_ALLOW_MULTI;
+	update_opts.old_prog_fd = 0;
+	err = bpf_link_update(link, prog2, &update_opts);
+	if (CHECK_FAIL(!err || errno != EINVAL))
+		perror("bpf_link_update expected EINVAL");
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+out_close:
+	close(link);
+	CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_link_update_invalid_prog(int netns, int prog1, int prog2)
+{
+	DECLARE_LIBBPF_OPTS(bpf_link_create_opts, create_opts);
+	DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts);
+	int err, link, prog3;
+
+	link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &create_opts);
+	if (CHECK_FAIL(link < 0)) {
+		perror("bpf_link_create(prog1)");
+		return;
+	}
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+	/* Expect failure when new prog FD is not valid */
+	errno = 0;
+	update_opts.flags = 0;
+	update_opts.old_prog_fd = 0;
+	err = bpf_link_update(link, -1, &update_opts);
+	if (CHECK_FAIL(!err || errno != EBADF)) {
+		perror("bpf_link_update expected EINVAL");
+		goto out_close_link;
+	}
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+	prog3 = load_prog(BPF_PROG_TYPE_SOCKET_FILTER);
+	if (prog3 < 0)
+		goto out_close_link;
+
+	/* Expect failure when new prog FD type doesn't match */
+	errno = 0;
+	update_opts.flags = 0;
+	update_opts.old_prog_fd = 0;
+	err = bpf_link_update(link, prog3, &update_opts);
+	if (CHECK_FAIL(!err || errno != EINVAL))
+		perror("bpf_link_update expected EINVAL");
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+	close(prog3);
+out_close_link:
+	close(link);
+	CHECK_FAIL(prog_is_attached(netns));
+}
+
+static void test_link_update_netns_gone(int netns, int prog1, int prog2)
+{
+	DECLARE_LIBBPF_OPTS(bpf_link_create_opts, create_opts);
+	DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts);
+	int err, link, old_net;
+
+	old_net = netns;
+	netns = unshare_net(old_net);
+	if (netns < 0)
+		return;
+
+	link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &create_opts);
+	if (CHECK_FAIL(link < 0)) {
+		perror("bpf_link_create(prog1)");
+		return;
+	}
+	CHECK_FAIL(query_attached_prog_id(netns) != query_prog_id(prog1));
+
+	close(netns);
+	err = setns(old_net, CLONE_NEWNET);
+	if (CHECK_FAIL(err)) {
+		perror("setns(CLONE_NEWNET)");
+		close(link);
+		return;
+	}
+
+	/* Expect failure when netns destroyed */
+	errno = 0;
+	update_opts.flags = 0;
+	update_opts.old_prog_fd = 0;
+	err = bpf_link_update(link, prog2, &update_opts);
+	if (CHECK_FAIL(!err || errno != ENOLINK))
+		perror("bpf_link_update");
+
+	close(link);
+}
+
+static void test_link_get_info(int netns, int prog1, int prog2)
+{
+	DECLARE_LIBBPF_OPTS(bpf_link_create_opts, create_opts);
+	DECLARE_LIBBPF_OPTS(bpf_link_update_opts, update_opts);
+	struct bpf_link_info info = {};
+	struct stat netns_stat = {};
+	__u32 info_len, link_id;
+	int err, link, old_net;
+
+	old_net = netns;
+	netns = unshare_net(old_net);
+	if (netns < 0)
+		return;
+
+	err = fstat(netns, &netns_stat);
+	if (CHECK_FAIL(err)) {
+		perror("stat(netns)");
+		goto out_resetns;
+	}
+
+	link = bpf_link_create(prog1, netns, BPF_FLOW_DISSECTOR, &create_opts);
+	if (CHECK_FAIL(link < 0)) {
+		perror("bpf_link_create(prog1)");
+		goto out_resetns;
+	}
+
+	info_len = sizeof(info);
+	err = bpf_obj_get_info_by_fd(link, &info, &info_len);
+	if (CHECK_FAIL(err)) {
+		perror("bpf_obj_get_info");
+		goto out_unlink;
+	}
+	CHECK_FAIL(info_len != sizeof(info));
+
+	/* Expect link info to be sane and match prog and netns details */
+	CHECK_FAIL(info.type != BPF_LINK_TYPE_NETNS);
+	CHECK_FAIL(info.id == 0);
+	CHECK_FAIL(info.prog_id != query_prog_id(prog1));
+	CHECK_FAIL(info.netns.netns_ino != netns_stat.st_ino);
+	CHECK_FAIL(info.netns.attach_type != BPF_FLOW_DISSECTOR);
+
+	update_opts.flags = 0;
+	update_opts.old_prog_fd = 0;
+	err = bpf_link_update(link, prog2, &update_opts);
+	if (CHECK_FAIL(err)) {
+		perror("bpf_link_update(prog2)");
+		goto out_unlink;
+	}
+
+	link_id = info.id;
+	info_len = sizeof(info);
+	err = bpf_obj_get_info_by_fd(link, &info, &info_len);
+	if (CHECK_FAIL(err)) {
+		perror("bpf_obj_get_info");
+		goto out_unlink;
+	}
+	CHECK_FAIL(info_len != sizeof(info));
+
+	/* Expect no info change after update except in prog id */
+	CHECK_FAIL(info.type != BPF_LINK_TYPE_NETNS);
+	CHECK_FAIL(info.id != link_id);
+	CHECK_FAIL(info.prog_id != query_prog_id(prog2));
+	CHECK_FAIL(info.netns.netns_ino != netns_stat.st_ino);
+	CHECK_FAIL(info.netns.attach_type != BPF_FLOW_DISSECTOR);
+
+	/* Leave netns link is attached to and close last FD to it */
+	err = setns(old_net, CLONE_NEWNET);
+	if (CHECK_FAIL(err)) {
+		perror("setns(NEWNET)");
+		goto out_unlink;
+	}
+	close(netns);
+	old_net = -1;
+	netns = -1;
+
+	info_len = sizeof(info);
+	err = bpf_obj_get_info_by_fd(link, &info, &info_len);
+	if (CHECK_FAIL(err)) {
+		perror("bpf_obj_get_info");
+		goto out_unlink;
+	}
+	CHECK_FAIL(info_len != sizeof(info));
+
+	/* Expect netns_ino to change to 0 */
+	CHECK_FAIL(info.type != BPF_LINK_TYPE_NETNS);
+	CHECK_FAIL(info.id != link_id);
+	CHECK_FAIL(info.prog_id != query_prog_id(prog2));
+	CHECK_FAIL(info.netns.netns_ino != 0);
+	CHECK_FAIL(info.netns.attach_type != BPF_FLOW_DISSECTOR);
+
+out_unlink:
+	close(link);
+out_resetns:
+	if (old_net != -1)
+		setns(old_net, CLONE_NEWNET);
+	if (netns != -1)
+		close(netns);
+}
+
+static void run_tests(int netns)
+{
+	struct test {
+		const char *test_name;
+		void (*test_func)(int netns, int prog1, int prog2);
+	} tests[] = {
+		{ "prog attach, prog attach",
+		  test_prog_attach_prog_attach },
+		{ "link create, link create",
+		  test_link_create_link_create },
+		{ "prog attach, link create",
+		  test_prog_attach_link_create },
+		{ "link create, prog attach",
+		  test_link_create_prog_attach },
+		{ "link create, prog detach",
+		  test_link_create_prog_detach },
+		{ "prog attach, detach, query",
+		  test_prog_attach_detach_query },
+		{ "link create, close, query",
+		  test_link_create_close_query },
+		{ "link update no old prog",
+		  test_link_update_no_old_prog },
+		{ "link update with replace old prog",
+		  test_link_update_replace_old_prog },
+		{ "link update invalid opts",
+		  test_link_update_invalid_opts },
+		{ "link update invalid prog",
+		  test_link_update_invalid_prog },
+		{ "link update netns gone",
+		  test_link_update_netns_gone },
+		{ "link get info",
+		  test_link_get_info },
+	};
+	int i, progs[2] = { -1, -1 };
+	char test_name[80];
+
+	for (i = 0; i < ARRAY_SIZE(progs); i++) {
+		progs[i] = load_prog(BPF_PROG_TYPE_FLOW_DISSECTOR);
+		if (progs[i] < 0)
+			goto out_close;
+	}
+
+	for (i = 0; i < ARRAY_SIZE(tests); i++) {
+		snprintf(test_name, sizeof(test_name),
+			 "flow dissector %s%s",
+			 tests[i].test_name,
+			 netns == init_net ? " (init_net)" : "");
+		if (test__start_subtest(test_name))
+			tests[i].test_func(netns, progs[0], progs[1]);
+	}
 out_close:
-	close(prog_fd[1]);
-	close(prog_fd[0]);
+	for (i = 0; i < ARRAY_SIZE(progs); i++) {
+		if (progs[i] != -1)
+			CHECK_FAIL(close(progs[i]));
+	}
 }
 
 void test_flow_dissector_reattach(void)
 {
-	int init_net, self_net, err;
+	int err, new_net, saved_net;
 
-	self_net = open("/proc/self/ns/net", O_RDONLY);
-	if (CHECK_FAIL(self_net < 0)) {
+	saved_net = open("/proc/self/ns/net", O_RDONLY);
+	if (CHECK_FAIL(saved_net < 0)) {
 		perror("open(/proc/self/ns/net");
 		return;
 	}
@@ -111,30 +626,29 @@ void test_flow_dissector_reattach(void)
 		goto out_close;
 	}
 
-	if (is_attached(init_net)) {
+	if (prog_is_attached(init_net)) {
 		test__skip();
 		printf("Can't test with flow dissector attached to init_net\n");
 		goto out_setns;
 	}
 
 	/* First run tests in root network namespace */
-	do_flow_dissector_reattach();
+	run_tests(init_net);
 
 	/* Then repeat tests in a non-root namespace */
-	err = unshare(CLONE_NEWNET);
-	if (CHECK_FAIL(err)) {
-		perror("unshare(CLONE_NEWNET)");
+	new_net = unshare_net(init_net);
+	if (new_net < 0)
 		goto out_setns;
-	}
-	do_flow_dissector_reattach();
+	run_tests(new_net);
+	close(new_net);
 
 out_setns:
 	/* Move back to netns we started in. */
-	err = setns(self_net, CLONE_NEWNET);
+	err = setns(saved_net, CLONE_NEWNET);
 	if (CHECK_FAIL(err))
 		perror("setns(/proc/self/ns/net)");
 
 out_close:
 	close(init_net);
-	close(self_net);
+	close(saved_net);
 }
-- 
cgit v1.2.3-59-g8ed1b


From b8215dce7dfd817ca38807f55165bf502146cd68 Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Sun, 31 May 2020 10:28:44 +0200
Subject: selftests/bpf, flow_dissector: Close TAP device FD after the test

test_flow_dissector leaves a TAP device after it's finished, potentially
interfering with other tests that will run after it. Fix it by closing the
TAP descriptor on cleanup.

Fixes: 0905beec9f52 ("selftests/bpf: run flow dissector tests in skb-less mode")
Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200531082846.2117903-11-jakub@cloudflare.com
---
 tools/testing/selftests/bpf/prog_tests/flow_dissector.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
index 2301c4d3ecec..ef5aab2f60b5 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -524,6 +524,7 @@ void test_flow_dissector(void)
 		CHECK_ATTR(err, tests[i].name, "bpf_map_delete_elem %d\n", err);
 	}
 
+	close(tap_fd);
 	bpf_prog_detach(prog_fd, BPF_FLOW_DISSECTOR);
 	bpf_object__close(obj);
 }
-- 
cgit v1.2.3-59-g8ed1b


From b4b8a3bf9ef0fbbf343b624d68ea328dd4edd5c4 Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Sun, 31 May 2020 10:28:45 +0200
Subject: selftests/bpf: Convert test_flow_dissector to use BPF skeleton

Switch flow dissector test setup from custom BPF object loader to BPF
skeleton to save boilerplate and prepare for testing higher-level API for
attaching flow dissector with bpf_link.

To avoid depending on program order in the BPF object when populating the
flow dissector PROG_ARRAY map, change the program section names to contain
the program index into the map. This follows the example set by tailcall
tests.

Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200531082846.2117903-12-jakub@cloudflare.com
---
 .../selftests/bpf/prog_tests/flow_dissector.c      | 50 +++++++++++++++++++---
 tools/testing/selftests/bpf/progs/bpf_flow.c       | 20 ++++-----
 2 files changed, 55 insertions(+), 15 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
index ef5aab2f60b5..b6370c0b3b7a 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -6,6 +6,8 @@
 #include <linux/if_tun.h>
 #include <sys/uio.h>
 
+#include "bpf_flow.skel.h"
+
 #ifndef IP_MF
 #define IP_MF 0x2000
 #endif
@@ -444,17 +446,54 @@ static int ifup(const char *ifname)
 	return 0;
 }
 
+static int init_prog_array(struct bpf_object *obj, struct bpf_map *prog_array)
+{
+	int i, err, map_fd, prog_fd;
+	struct bpf_program *prog;
+	char prog_name[32];
+
+	map_fd = bpf_map__fd(prog_array);
+	if (map_fd < 0)
+		return -1;
+
+	for (i = 0; i < bpf_map__def(prog_array)->max_entries; i++) {
+		snprintf(prog_name, sizeof(prog_name), "flow_dissector/%i", i);
+
+		prog = bpf_object__find_program_by_title(obj, prog_name);
+		if (!prog)
+			return -1;
+
+		prog_fd = bpf_program__fd(prog);
+		if (prog_fd < 0)
+			return -1;
+
+		err = bpf_map_update_elem(map_fd, &i, &prog_fd, BPF_ANY);
+		if (err)
+			return -1;
+	}
+	return 0;
+}
+
 void test_flow_dissector(void)
 {
 	int i, err, prog_fd, keys_fd = -1, tap_fd;
-	struct bpf_object *obj;
+	struct bpf_flow *skel;
 	__u32 duration = 0;
 
-	err = bpf_flow_load(&obj, "./bpf_flow.o", "flow_dissector",
-			    "jmp_table", "last_dissection", &prog_fd, &keys_fd);
-	if (CHECK_FAIL(err))
+	skel = bpf_flow__open_and_load();
+	if (CHECK(!skel, "skel", "failed to open/load skeleton\n"))
 		return;
 
+	prog_fd = bpf_program__fd(skel->progs._dissect);
+	if (CHECK(prog_fd < 0, "bpf_program__fd", "err %d\n", prog_fd))
+		goto out_destroy_skel;
+	keys_fd = bpf_map__fd(skel->maps.last_dissection);
+	if (CHECK(keys_fd < 0, "bpf_map__fd", "err %d\n", keys_fd))
+		goto out_destroy_skel;
+	err = init_prog_array(skel->obj, skel->maps.jmp_table);
+	if (CHECK(err, "init_prog_array", "err %d\n", err))
+		goto out_destroy_skel;
+
 	for (i = 0; i < ARRAY_SIZE(tests); i++) {
 		struct bpf_flow_keys flow_keys;
 		struct bpf_prog_test_run_attr tattr = {
@@ -526,5 +565,6 @@ void test_flow_dissector(void)
 
 	close(tap_fd);
 	bpf_prog_detach(prog_fd, BPF_FLOW_DISSECTOR);
-	bpf_object__close(obj);
+out_destroy_skel:
+	bpf_flow__destroy(skel);
 }
diff --git a/tools/testing/selftests/bpf/progs/bpf_flow.c b/tools/testing/selftests/bpf/progs/bpf_flow.c
index 9941f0ba471e..de6de9221518 100644
--- a/tools/testing/selftests/bpf/progs/bpf_flow.c
+++ b/tools/testing/selftests/bpf/progs/bpf_flow.c
@@ -20,20 +20,20 @@
 #include <bpf/bpf_endian.h>
 
 int _version SEC("version") = 1;
-#define PROG(F) SEC(#F) int bpf_func_##F
+#define PROG(F) PROG_(F, _##F)
+#define PROG_(NUM, NAME) SEC("flow_dissector/"#NUM) int bpf_func##NAME
 
 /* These are the identifiers of the BPF programs that will be used in tail
  * calls. Name is limited to 16 characters, with the terminating character and
  * bpf_func_ above, we have only 6 to work with, anything after will be cropped.
  */
-enum {
-	IP,
-	IPV6,
-	IPV6OP,	/* Destination/Hop-by-Hop Options IPv6 Extension header */
-	IPV6FR,	/* Fragmentation IPv6 Extension Header */
-	MPLS,
-	VLAN,
-};
+#define IP		0
+#define IPV6		1
+#define IPV6OP		2 /* Destination/Hop-by-Hop Options IPv6 Ext. Header */
+#define IPV6FR		3 /* Fragmentation IPv6 Extension Header */
+#define MPLS		4
+#define VLAN		5
+#define MAX_PROG	6
 
 #define IP_MF		0x2000
 #define IP_OFFSET	0x1FFF
@@ -59,7 +59,7 @@ struct frag_hdr {
 
 struct {
 	__uint(type, BPF_MAP_TYPE_PROG_ARRAY);
-	__uint(max_entries, 8);
+	__uint(max_entries, MAX_PROG);
 	__uint(key_size, sizeof(__u32));
 	__uint(value_size, sizeof(__u32));
 } jmp_table SEC(".maps");
-- 
cgit v1.2.3-59-g8ed1b


From 06716e04a043aa5e010f952a823ad038054b0e5c Mon Sep 17 00:00:00 2001
From: Jakub Sitnicki <jakub@cloudflare.com>
Date: Sun, 31 May 2020 10:28:46 +0200
Subject: selftests/bpf: Extend test_flow_dissector to cover link creation

Extend the existing flow_dissector test case to run tests once using direct
prog attachments, and then for the second time using indirect attachment
via link.

The intention is to exercises the newly added high-level API for attaching
programs to network namespace with links (bpf_program__attach_netns).

Signed-off-by: Jakub Sitnicki <jakub@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Link: https://lore.kernel.org/bpf/20200531082846.2117903-13-jakub@cloudflare.com
---
 .../selftests/bpf/prog_tests/flow_dissector.c      | 115 +++++++++++++++------
 1 file changed, 82 insertions(+), 33 deletions(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
index b6370c0b3b7a..ea14e3ece812 100644
--- a/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
+++ b/tools/testing/selftests/bpf/prog_tests/flow_dissector.c
@@ -103,6 +103,7 @@ struct test {
 
 #define VLAN_HLEN	4
 
+static __u32 duration;
 struct test tests[] = {
 	{
 		.name = "ipv4",
@@ -474,11 +475,87 @@ static int init_prog_array(struct bpf_object *obj, struct bpf_map *prog_array)
 	return 0;
 }
 
+static void run_tests_skb_less(int tap_fd, struct bpf_map *keys)
+{
+	int i, err, keys_fd;
+
+	keys_fd = bpf_map__fd(keys);
+	if (CHECK(keys_fd < 0, "bpf_map__fd", "err %d\n", keys_fd))
+		return;
+
+	for (i = 0; i < ARRAY_SIZE(tests); i++) {
+		/* Keep in sync with 'flags' from eth_get_headlen. */
+		__u32 eth_get_headlen_flags =
+			BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG;
+		struct bpf_prog_test_run_attr tattr = {};
+		struct bpf_flow_keys flow_keys = {};
+		__u32 key = (__u32)(tests[i].keys.sport) << 16 |
+			    tests[i].keys.dport;
+
+		/* For skb-less case we can't pass input flags; run
+		 * only the tests that have a matching set of flags.
+		 */
+
+		if (tests[i].flags != eth_get_headlen_flags)
+			continue;
+
+		err = tx_tap(tap_fd, &tests[i].pkt, sizeof(tests[i].pkt));
+		CHECK(err < 0, "tx_tap", "err %d errno %d\n", err, errno);
+
+		err = bpf_map_lookup_elem(keys_fd, &key, &flow_keys);
+		CHECK_ATTR(err, tests[i].name, "bpf_map_lookup_elem %d\n", err);
+
+		CHECK_ATTR(err, tests[i].name, "skb-less err %d\n", err);
+		CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys);
+
+		err = bpf_map_delete_elem(keys_fd, &key);
+		CHECK_ATTR(err, tests[i].name, "bpf_map_delete_elem %d\n", err);
+	}
+}
+
+static void test_skb_less_prog_attach(struct bpf_flow *skel, int tap_fd)
+{
+	int err, prog_fd;
+
+	prog_fd = bpf_program__fd(skel->progs._dissect);
+	if (CHECK(prog_fd < 0, "bpf_program__fd", "err %d\n", prog_fd))
+		return;
+
+	err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0);
+	if (CHECK(err, "bpf_prog_attach", "err %d errno %d\n", err, errno))
+		return;
+
+	run_tests_skb_less(tap_fd, skel->maps.last_dissection);
+
+	err = bpf_prog_detach(prog_fd, BPF_FLOW_DISSECTOR);
+	CHECK(err, "bpf_prog_detach", "err %d errno %d\n", err, errno);
+}
+
+static void test_skb_less_link_create(struct bpf_flow *skel, int tap_fd)
+{
+	struct bpf_link *link;
+	int err, net_fd;
+
+	net_fd = open("/proc/self/ns/net", O_RDONLY);
+	if (CHECK(net_fd < 0, "open(/proc/self/ns/net)", "err %d\n", errno))
+		return;
+
+	link = bpf_program__attach_netns(skel->progs._dissect, net_fd);
+	if (CHECK(IS_ERR(link), "attach_netns", "err %ld\n", PTR_ERR(link)))
+		goto out_close;
+
+	run_tests_skb_less(tap_fd, skel->maps.last_dissection);
+
+	err = bpf_link__destroy(link);
+	CHECK(err, "bpf_link__destroy", "err %d\n", err);
+out_close:
+	close(net_fd);
+}
+
 void test_flow_dissector(void)
 {
 	int i, err, prog_fd, keys_fd = -1, tap_fd;
 	struct bpf_flow *skel;
-	__u32 duration = 0;
 
 	skel = bpf_flow__open_and_load();
 	if (CHECK(!skel, "skel", "failed to open/load skeleton\n"))
@@ -526,45 +603,17 @@ void test_flow_dissector(void)
 	 * via BPF map in this case.
 	 */
 
-	err = bpf_prog_attach(prog_fd, 0, BPF_FLOW_DISSECTOR, 0);
-	CHECK(err, "bpf_prog_attach", "err %d errno %d\n", err, errno);
-
 	tap_fd = create_tap("tap0");
 	CHECK(tap_fd < 0, "create_tap", "tap_fd %d errno %d\n", tap_fd, errno);
 	err = ifup("tap0");
 	CHECK(err, "ifup", "err %d errno %d\n", err, errno);
 
-	for (i = 0; i < ARRAY_SIZE(tests); i++) {
-		/* Keep in sync with 'flags' from eth_get_headlen. */
-		__u32 eth_get_headlen_flags =
-			BPF_FLOW_DISSECTOR_F_PARSE_1ST_FRAG;
-		struct bpf_prog_test_run_attr tattr = {};
-		struct bpf_flow_keys flow_keys = {};
-		__u32 key = (__u32)(tests[i].keys.sport) << 16 |
-			    tests[i].keys.dport;
-
-		/* For skb-less case we can't pass input flags; run
-		 * only the tests that have a matching set of flags.
-		 */
-
-		if (tests[i].flags != eth_get_headlen_flags)
-			continue;
-
-		err = tx_tap(tap_fd, &tests[i].pkt, sizeof(tests[i].pkt));
-		CHECK(err < 0, "tx_tap", "err %d errno %d\n", err, errno);
-
-		err = bpf_map_lookup_elem(keys_fd, &key, &flow_keys);
-		CHECK_ATTR(err, tests[i].name, "bpf_map_lookup_elem %d\n", err);
-
-		CHECK_ATTR(err, tests[i].name, "skb-less err %d\n", err);
-		CHECK_FLOW_KEYS(tests[i].name, flow_keys, tests[i].keys);
-
-		err = bpf_map_delete_elem(keys_fd, &key);
-		CHECK_ATTR(err, tests[i].name, "bpf_map_delete_elem %d\n", err);
-	}
+	/* Test direct prog attachment */
+	test_skb_less_prog_attach(skel, tap_fd);
+	/* Test indirect prog attachment via link */
+	test_skb_less_link_create(skel, tap_fd);
 
 	close(tap_fd);
-	bpf_prog_detach(prog_fd, BPF_FLOW_DISSECTOR);
 out_destroy_skel:
 	bpf_flow__destroy(skel);
 }
-- 
cgit v1.2.3-59-g8ed1b


From 4c21daae3dbc9f8536cc18e6e53627821fa2c90c Mon Sep 17 00:00:00 2001
From: YueHaibing <yuehaibing@huawei.com>
Date: Thu, 28 May 2020 22:34:07 +0800
Subject: tipc: Fix NULL pointer dereference in __tipc_sendstream()

tipc_sendstream() may send zero length packet, then tipc_msg_append()
do not alloc skb, skb_peek_tail() will get NULL, msg_set_ack_required
will trigger NULL pointer dereference.

Reported-by: syzbot+8eac6d030e7807c21d32@syzkaller.appspotmail.com
Fixes: 0a3e060f340d ("tipc: add test for Nagle algorithm effectiveness")
Signed-off-by: YueHaibing <yuehaibing@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/socket.c | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/net/tipc/socket.c b/net/tipc/socket.c
index 3734cdbedc9c..26123f4177fd 100644
--- a/net/tipc/socket.c
+++ b/net/tipc/socket.c
@@ -1588,8 +1588,12 @@ static int __tipc_sendstream(struct socket *sock, struct msghdr *m, size_t dlen)
 				tsk->pkt_cnt += skb_queue_len(txq);
 			} else {
 				skb = skb_peek_tail(txq);
-				msg_set_ack_required(buf_msg(skb));
-				tsk->expect_ack = true;
+				if (skb) {
+					msg_set_ack_required(buf_msg(skb));
+					tsk->expect_ack = true;
+				} else {
+					tsk->expect_ack = false;
+				}
 				tsk->msg_acc = 0;
 				tsk->pkt_cnt = 0;
 			}
-- 
cgit v1.2.3-59-g8ed1b


From 79a1f0ccdbb4ad700590f61b00525b390cb53905 Mon Sep 17 00:00:00 2001
From: Hangbin Liu <liuhangbin@gmail.com>
Date: Mon, 1 Jun 2020 11:55:03 +0800
Subject: ipv6: fix IPV6_ADDRFORM operation logic

Socket option IPV6_ADDRFORM supports UDP/UDPLITE and TCP at present.
Previously the checking logic looks like:
if (sk->sk_protocol == IPPROTO_UDP || sk->sk_protocol == IPPROTO_UDPLITE)
	do_some_check;
else if (sk->sk_protocol != IPPROTO_TCP)
	break;

After commit b6f6118901d1 ("ipv6: restrict IPV6_ADDRFORM operation"), TCP
was blocked as the logic changed to:
if (sk->sk_protocol == IPPROTO_UDP || sk->sk_protocol == IPPROTO_UDPLITE)
	do_some_check;
else if (sk->sk_protocol == IPPROTO_TCP)
	do_some_check;
	break;
else
	break;

Then after commit 82c9ae440857 ("ipv6: fix restrict IPV6_ADDRFORM operation")
UDP/UDPLITE were blocked as the logic changed to:
if (sk->sk_protocol == IPPROTO_UDP || sk->sk_protocol == IPPROTO_UDPLITE)
	do_some_check;
if (sk->sk_protocol == IPPROTO_TCP)
	do_some_check;

if (sk->sk_protocol != IPPROTO_TCP)
	break;

Fix it by using Eric's code and simply remove the break in TCP check, which
looks like:
if (sk->sk_protocol == IPPROTO_UDP || sk->sk_protocol == IPPROTO_UDPLITE)
	do_some_check;
else if (sk->sk_protocol == IPPROTO_TCP)
	do_some_check;
else
	break;

Fixes: 82c9ae440857 ("ipv6: fix restrict IPV6_ADDRFORM operation")
Signed-off-by: Hangbin Liu <liuhangbin@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/ipv6/ipv6_sockglue.c | 13 +++++++------
 1 file changed, 7 insertions(+), 6 deletions(-)

diff --git a/net/ipv6/ipv6_sockglue.c b/net/ipv6/ipv6_sockglue.c
index adbfed6adf11..2c843ff5e3a9 100644
--- a/net/ipv6/ipv6_sockglue.c
+++ b/net/ipv6/ipv6_sockglue.c
@@ -218,14 +218,15 @@ static int do_ipv6_setsockopt(struct sock *sk, int level, int optname,
 					retv = -EBUSY;
 					break;
 				}
-			}
-			if (sk->sk_protocol == IPPROTO_TCP &&
-			    sk->sk_prot != &tcpv6_prot) {
-				retv = -EBUSY;
+			} else if (sk->sk_protocol == IPPROTO_TCP) {
+				if (sk->sk_prot != &tcpv6_prot) {
+					retv = -EBUSY;
+					break;
+				}
+			} else {
 				break;
 			}
-			if (sk->sk_protocol != IPPROTO_TCP)
-				break;
+
 			if (sk->sk_state != TCP_ESTABLISHED) {
 				retv = -ENOTCONN;
 				break;
-- 
cgit v1.2.3-59-g8ed1b


From a3ac249a1ab57552cb2a63e70556ee87610a591d Mon Sep 17 00:00:00 2001
From: Rohit Maheshwari <rohitm@chelsio.com>
Date: Mon, 1 Jun 2020 13:08:29 +0530
Subject: cxgb4/chcr: Enable ktls settings at run time

Current design enables ktls setting from start, which is not
efficient. Now the feature will be enabled when user demands
TLS offload on any interface.

v1->v2:
- taking ULD module refcount till any single connection exists.
- taking rtnl_lock() before clearing tls_devops.

v2->v3:
- cxgb4 is now registering to tlsdev_ops.
- module refcount inc/dec in chcr.
- refcount is only for connections.
- removed new code from cxgb_set_feature().

v3->v4:
- fixed warning message.

Signed-off-by: Rohit Maheshwari <rohitm@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/crypto/chelsio/chcr_core.c                 | 23 +++---
 drivers/crypto/chelsio/chcr_core.h                 | 10 ++-
 drivers/crypto/chelsio/chcr_ktls.c                 | 59 ++++-----------
 drivers/crypto/chelsio/chcr_ktls.h                 |  9 ++-
 drivers/net/ethernet/chelsio/cxgb4/cxgb4.h         |  4 +
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c |  2 +
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c    | 85 +++++++++++++++++++++-
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c     | 71 ++++++++++++------
 drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h     |  7 ++
 drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h      | 10 ++-
 10 files changed, 195 insertions(+), 85 deletions(-)

diff --git a/drivers/crypto/chelsio/chcr_core.c b/drivers/crypto/chelsio/chcr_core.c
index ffd4ec0c7374..bd8dac806e7a 100644
--- a/drivers/crypto/chelsio/chcr_core.c
+++ b/drivers/crypto/chelsio/chcr_core.c
@@ -33,6 +33,13 @@ static int cpl_fw6_pld_handler(struct adapter *adap, unsigned char *input);
 static void *chcr_uld_add(const struct cxgb4_lld_info *lld);
 static int chcr_uld_state_change(void *handle, enum cxgb4_state state);
 
+#if defined(CONFIG_CHELSIO_TLS_DEVICE)
+static const struct tlsdev_ops chcr_ktls_ops = {
+	.tls_dev_add = chcr_ktls_dev_add,
+	.tls_dev_del = chcr_ktls_dev_del,
+};
+#endif
+
 #ifdef CONFIG_CHELSIO_IPSEC_INLINE
 static void update_netdev_features(void);
 #endif /* CONFIG_CHELSIO_IPSEC_INLINE */
@@ -56,6 +63,9 @@ static struct cxgb4_uld_info chcr_uld_info = {
 #if defined(CONFIG_CHELSIO_IPSEC_INLINE) || defined(CONFIG_CHELSIO_TLS_DEVICE)
 	.tx_handler = chcr_uld_tx_handler,
 #endif /* CONFIG_CHELSIO_IPSEC_INLINE || CONFIG_CHELSIO_TLS_DEVICE */
+#if defined(CONFIG_CHELSIO_TLS_DEVICE)
+	.tlsdev_ops = &chcr_ktls_ops,
+#endif
 };
 
 static void detach_work_fn(struct work_struct *work)
@@ -207,11 +217,6 @@ static void *chcr_uld_add(const struct cxgb4_lld_info *lld)
 	}
 	u_ctx->lldi = *lld;
 	chcr_dev_init(u_ctx);
-
-#ifdef CONFIG_CHELSIO_TLS_DEVICE
-	if (lld->ulp_crypto & ULP_CRYPTO_KTLS_INLINE)
-		chcr_enable_ktls(padap(&u_ctx->dev));
-#endif
 out:
 	return u_ctx;
 }
@@ -348,20 +353,12 @@ static void __exit chcr_crypto_exit(void)
 	list_for_each_entry_safe(u_ctx, tmp, &drv_data.act_dev, entry) {
 		adap = padap(&u_ctx->dev);
 		memset(&adap->chcr_stats, 0, sizeof(adap->chcr_stats));
-#ifdef CONFIG_CHELSIO_TLS_DEVICE
-		if (u_ctx->lldi.ulp_crypto & ULP_CRYPTO_KTLS_INLINE)
-			chcr_disable_ktls(adap);
-#endif
 		list_del(&u_ctx->entry);
 		kfree(u_ctx);
 	}
 	list_for_each_entry_safe(u_ctx, tmp, &drv_data.inact_dev, entry) {
 		adap = padap(&u_ctx->dev);
 		memset(&adap->chcr_stats, 0, sizeof(adap->chcr_stats));
-#ifdef CONFIG_CHELSIO_TLS_DEVICE
-		if (u_ctx->lldi.ulp_crypto & ULP_CRYPTO_KTLS_INLINE)
-			chcr_disable_ktls(adap);
-#endif
 		list_del(&u_ctx->entry);
 		kfree(u_ctx);
 	}
diff --git a/drivers/crypto/chelsio/chcr_core.h b/drivers/crypto/chelsio/chcr_core.h
index 2c09672e00a4..67d77abd6775 100644
--- a/drivers/crypto/chelsio/chcr_core.h
+++ b/drivers/crypto/chelsio/chcr_core.h
@@ -37,6 +37,7 @@
 #define __CHCR_CORE_H__
 
 #include <crypto/algapi.h>
+#include <net/tls.h>
 #include "t4_hw.h"
 #include "cxgb4.h"
 #include "t4_msg.h"
@@ -223,10 +224,15 @@ int chcr_handle_resp(struct crypto_async_request *req, unsigned char *input,
 int chcr_ipsec_xmit(struct sk_buff *skb, struct net_device *dev);
 void chcr_add_xfrmops(const struct cxgb4_lld_info *lld);
 #ifdef CONFIG_CHELSIO_TLS_DEVICE
-void chcr_enable_ktls(struct adapter *adap);
-void chcr_disable_ktls(struct adapter *adap);
 int chcr_ktls_cpl_act_open_rpl(struct adapter *adap, unsigned char *input);
 int chcr_ktls_cpl_set_tcb_rpl(struct adapter *adap, unsigned char *input);
 int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev);
+extern int chcr_ktls_dev_add(struct net_device *netdev, struct sock *sk,
+			     enum tls_offload_ctx_dir direction,
+			     struct tls_crypto_info *crypto_info,
+			     u32 start_offload_tcp_sn);
+extern void chcr_ktls_dev_del(struct net_device *netdev,
+			      struct tls_context *tls_ctx,
+			      enum tls_offload_ctx_dir direction);
 #endif
 #endif /* __CHCR_CORE_H__ */
diff --git a/drivers/crypto/chelsio/chcr_ktls.c b/drivers/crypto/chelsio/chcr_ktls.c
index 43d9e2420110..f55b87152166 100644
--- a/drivers/crypto/chelsio/chcr_ktls.c
+++ b/drivers/crypto/chelsio/chcr_ktls.c
@@ -373,9 +373,9 @@ static int chcr_ktls_mark_tcb_close(struct chcr_ktls_info *tx_info)
  * @tls_cts - tls context.
  * @direction - TX/RX crypto direction
  */
-static void chcr_ktls_dev_del(struct net_device *netdev,
-			      struct tls_context *tls_ctx,
-			      enum tls_offload_ctx_dir direction)
+void chcr_ktls_dev_del(struct net_device *netdev,
+		       struct tls_context *tls_ctx,
+		       enum tls_offload_ctx_dir direction)
 {
 	struct chcr_ktls_ofld_ctx_tx *tx_ctx =
 				chcr_get_ktls_tx_context(tls_ctx);
@@ -411,6 +411,8 @@ static void chcr_ktls_dev_del(struct net_device *netdev,
 	atomic64_inc(&tx_info->adap->chcr_stats.ktls_tx_connection_close);
 	kvfree(tx_info);
 	tx_ctx->chcr_info = NULL;
+	/* release module refcount */
+	module_put(THIS_MODULE);
 }
 
 /*
@@ -422,10 +424,10 @@ static void chcr_ktls_dev_del(struct net_device *netdev,
  * @direction - TX/RX crypto direction
  * return: SUCCESS/FAILURE.
  */
-static int chcr_ktls_dev_add(struct net_device *netdev, struct sock *sk,
-			     enum tls_offload_ctx_dir direction,
-			     struct tls_crypto_info *crypto_info,
-			     u32 start_offload_tcp_sn)
+int chcr_ktls_dev_add(struct net_device *netdev, struct sock *sk,
+		      enum tls_offload_ctx_dir direction,
+		      struct tls_crypto_info *crypto_info,
+		      u32 start_offload_tcp_sn)
 {
 	struct tls_context *tls_ctx = tls_get_ctx(sk);
 	struct chcr_ktls_ofld_ctx_tx *tx_ctx;
@@ -528,6 +530,12 @@ static int chcr_ktls_dev_add(struct net_device *netdev, struct sock *sk,
 	if (ret)
 		goto out2;
 
+	/* Driver shouldn't be removed until any single connection exists */
+	if (!try_module_get(THIS_MODULE)) {
+		ret = -EINVAL;
+		goto out2;
+	}
+
 	atomic64_inc(&adap->chcr_stats.ktls_tx_connection_open);
 	return 0;
 out2:
@@ -537,43 +545,6 @@ out:
 	return ret;
 }
 
-static const struct tlsdev_ops chcr_ktls_ops = {
-	.tls_dev_add = chcr_ktls_dev_add,
-	.tls_dev_del = chcr_ktls_dev_del,
-};
-
-/*
- * chcr_enable_ktls:  add NETIF_F_HW_TLS_TX flag in all the ports.
- */
-void chcr_enable_ktls(struct adapter *adap)
-{
-	struct net_device *netdev;
-	int i;
-
-	for_each_port(adap, i) {
-		netdev = adap->port[i];
-		netdev->features |= NETIF_F_HW_TLS_TX;
-		netdev->hw_features |= NETIF_F_HW_TLS_TX;
-		netdev->tlsdev_ops = &chcr_ktls_ops;
-	}
-}
-
-/*
- * chcr_disable_ktls:  remove NETIF_F_HW_TLS_TX flag from all the ports.
- */
-void chcr_disable_ktls(struct adapter *adap)
-{
-	struct net_device *netdev;
-	int i;
-
-	for_each_port(adap, i) {
-		netdev = adap->port[i];
-		netdev->features &= ~NETIF_F_HW_TLS_TX;
-		netdev->hw_features &= ~NETIF_F_HW_TLS_TX;
-		netdev->tlsdev_ops = NULL;
-	}
-}
-
 /*
  * chcr_init_tcb_fields:  Initialize tcb fields to handle TCP seq number
  *			  handling.
diff --git a/drivers/crypto/chelsio/chcr_ktls.h b/drivers/crypto/chelsio/chcr_ktls.h
index 5a7ae2ca446e..5cbd84b1da05 100644
--- a/drivers/crypto/chelsio/chcr_ktls.h
+++ b/drivers/crypto/chelsio/chcr_ktls.h
@@ -89,10 +89,15 @@ static inline int chcr_get_first_rx_qid(struct adapter *adap)
 	return u_ctx->lldi.rxq_ids[0];
 }
 
-void chcr_enable_ktls(struct adapter *adap);
-void chcr_disable_ktls(struct adapter *adap);
 int chcr_ktls_cpl_act_open_rpl(struct adapter *adap, unsigned char *input);
 int chcr_ktls_cpl_set_tcb_rpl(struct adapter *adap, unsigned char *input);
 int chcr_ktls_xmit(struct sk_buff *skb, struct net_device *dev);
+int chcr_ktls_dev_add(struct net_device *netdev, struct sock *sk,
+		      enum tls_offload_ctx_dir direction,
+		      struct tls_crypto_info *crypto_info,
+		      u32 start_offload_tcp_sn);
+void chcr_ktls_dev_del(struct net_device *netdev,
+		       struct tls_context *tls_ctx,
+		       enum tls_offload_ctx_dir direction);
 #endif /* CONFIG_CHELSIO_TLS_DEVICE */
 #endif /* __CHCR_KTLS_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
index 5a41801acb6a..cf69c6edcfec 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4.h
@@ -1099,6 +1099,7 @@ struct adapter {
 
 	/* TC u32 offload */
 	struct cxgb4_tc_u32_table *tc_u32;
+	struct chcr_ktls chcr_ktls;
 	struct chcr_stats_debug chcr_stats;
 
 	/* TC flower offload */
@@ -2060,4 +2061,7 @@ int cxgb_open(struct net_device *dev);
 int cxgb_close(struct net_device *dev);
 void cxgb4_enable_rx(struct adapter *adap, struct sge_rspq *q);
 void cxgb4_quiesce_rx(struct sge_rspq *q);
+#ifdef CONFIG_CHELSIO_TLS_DEVICE
+int cxgb4_set_ktls_feature(struct adapter *adap, bool enable);
+#endif
 #endif /* __CXGB4_H__ */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
index c3dd50b45c48..41315712deb8 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_debugfs.c
@@ -3491,6 +3491,8 @@ static int chcr_stats_show(struct seq_file *seq, void *v)
 		   atomic_read(&adap->chcr_stats.tls_key));
 #ifdef CONFIG_CHELSIO_TLS_DEVICE
 	seq_puts(seq, "\nChelsio KTLS Crypto Accelerator Stats\n");
+	seq_printf(seq, "Tx TLS offload refcount:          %20u\n",
+		   refcount_read(&adap->chcr_ktls.ktls_refcount));
 	seq_printf(seq, "Tx HW offload contexts added:     %20llu\n",
 		   atomic64_read(&adap->chcr_stats.ktls_tx_ctx));
 	seq_printf(seq, "Tx connection created:            %20llu\n",
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
index 7a0414f379be..854b1717a70d 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_main.c
@@ -66,6 +66,9 @@
 #include <linux/crash_dump.h>
 #include <net/udp_tunnel.h>
 #include <net/xfrm.h>
+#if defined(CONFIG_CHELSIO_TLS_DEVICE)
+#include <net/tls.h>
+#endif
 
 #include "cxgb4.h"
 #include "cxgb4_filter.h"
@@ -6064,6 +6067,79 @@ static int cxgb4_iov_configure(struct pci_dev *pdev, int num_vfs)
 }
 #endif /* CONFIG_PCI_IOV */
 
+#if defined(CONFIG_CHELSIO_TLS_DEVICE)
+
+static int cxgb4_ktls_dev_add(struct net_device *netdev, struct sock *sk,
+			      enum tls_offload_ctx_dir direction,
+			      struct tls_crypto_info *crypto_info,
+			      u32 tcp_sn)
+{
+	struct adapter *adap = netdev2adap(netdev);
+	int ret = 0;
+
+	mutex_lock(&uld_mutex);
+	if (!adap->uld[CXGB4_ULD_CRYPTO].handle) {
+		dev_err(adap->pdev_dev, "chcr driver is not loaded\n");
+		ret = -EOPNOTSUPP;
+		goto out_unlock;
+	}
+
+	if (!adap->uld[CXGB4_ULD_CRYPTO].tlsdev_ops) {
+		dev_err(adap->pdev_dev,
+			"chcr driver has no registered tlsdev_ops()\n");
+		ret = -EOPNOTSUPP;
+		goto out_unlock;
+	}
+
+	ret = cxgb4_set_ktls_feature(adap, FW_PARAMS_PARAM_DEV_KTLS_HW_ENABLE);
+	if (ret)
+		goto out_unlock;
+
+	ret = adap->uld[CXGB4_ULD_CRYPTO].tlsdev_ops->tls_dev_add(netdev, sk,
+								  direction,
+								  crypto_info,
+								  tcp_sn);
+	/* if there is a failure, clear the refcount */
+	if (ret)
+		cxgb4_set_ktls_feature(adap,
+				       FW_PARAMS_PARAM_DEV_KTLS_HW_DISABLE);
+out_unlock:
+	mutex_unlock(&uld_mutex);
+	return ret;
+}
+
+static void cxgb4_ktls_dev_del(struct net_device *netdev,
+			       struct tls_context *tls_ctx,
+			       enum tls_offload_ctx_dir direction)
+{
+	struct adapter *adap = netdev2adap(netdev);
+
+	mutex_lock(&uld_mutex);
+	if (!adap->uld[CXGB4_ULD_CRYPTO].handle) {
+		dev_err(adap->pdev_dev, "chcr driver is not loaded\n");
+		goto out_unlock;
+	}
+
+	if (!adap->uld[CXGB4_ULD_CRYPTO].tlsdev_ops) {
+		dev_err(adap->pdev_dev,
+			"chcr driver has no registered tlsdev_ops\n");
+		goto out_unlock;
+	}
+
+	adap->uld[CXGB4_ULD_CRYPTO].tlsdev_ops->tls_dev_del(netdev, tls_ctx,
+							    direction);
+	cxgb4_set_ktls_feature(adap, FW_PARAMS_PARAM_DEV_KTLS_HW_DISABLE);
+
+out_unlock:
+	mutex_unlock(&uld_mutex);
+}
+
+static const struct tlsdev_ops cxgb4_ktls_ops = {
+	.tls_dev_add = cxgb4_ktls_dev_add,
+	.tls_dev_del = cxgb4_ktls_dev_del,
+};
+#endif /* CONFIG_CHELSIO_TLS_DEVICE */
+
 static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 {
 	struct net_device *netdev;
@@ -6313,7 +6389,14 @@ static int init_one(struct pci_dev *pdev, const struct pci_device_id *ent)
 			netdev->hw_features |= NETIF_F_HIGHDMA;
 		netdev->features |= netdev->hw_features;
 		netdev->vlan_features = netdev->features & VLAN_FEAT;
-
+#if defined(CONFIG_CHELSIO_TLS_DEVICE)
+		if (pi->adapter->params.crypto & FW_CAPS_CONFIG_TLS_HW) {
+			netdev->hw_features |= NETIF_F_HW_TLS_TX;
+			netdev->tlsdev_ops = &cxgb4_ktls_ops;
+			/* initialize the refcount */
+			refcount_set(&pi->adapter->chcr_ktls.ktls_refcount, 0);
+		}
+#endif
 		netdev->priv_flags |= IFF_UNICAST_FLT;
 
 		/* MTU range: 81 - 9600 */
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
index 9e3c6b36cde8..0307e9c69a47 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.c
@@ -663,22 +663,64 @@ static int uld_attach(struct adapter *adap, unsigned int uld)
 	return 0;
 }
 
+static bool cxgb4_uld_in_use(struct adapter *adap)
+{
+	const struct tid_info *t = &adap->tids;
+
+	return (atomic_read(&t->conns_in_use) || t->stids_in_use);
+}
+
 #ifdef CONFIG_CHELSIO_TLS_DEVICE
 /* cxgb4_set_ktls_feature: request FW to enable/disable ktls settings.
  * @adap: adapter info
  * @enable: 1 to enable / 0 to disable ktls settings.
  */
-static void cxgb4_set_ktls_feature(struct adapter *adap, bool enable)
+int cxgb4_set_ktls_feature(struct adapter *adap, bool enable)
 {
-	u32 params = (FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
-		      FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_KTLS_TX_HW) |
-		      FW_PARAMS_PARAM_Y_V(enable));
 	int ret = 0;
+	u32 params =
+		FW_PARAMS_MNEM_V(FW_PARAMS_MNEM_DEV) |
+		FW_PARAMS_PARAM_X_V(FW_PARAMS_PARAM_DEV_KTLS_HW) |
+		FW_PARAMS_PARAM_Y_V(enable) |
+		FW_PARAMS_PARAM_Z_V(FW_PARAMS_PARAM_DEV_KTLS_HW_USER_ENABLE);
+
+	if (enable) {
+		if (!refcount_read(&adap->chcr_ktls.ktls_refcount)) {
+			/* At this moment if ULD connection are up means, other
+			 * ULD is/are already active, return failure.
+			 */
+			if (cxgb4_uld_in_use(adap)) {
+				dev_warn(adap->pdev_dev,
+					 "ULD connections (tid/stid) active. Can't enable kTLS\n");
+				return -EINVAL;
+			}
+			ret = t4_set_params(adap, adap->mbox, adap->pf,
+					    0, 1, &params, &params);
+			if (ret)
+				return ret;
+			refcount_set(&adap->chcr_ktls.ktls_refcount, 1);
+			pr_info("kTLS has been enabled. Restrictions placed on ULD support\n");
+		} else {
+			/* ktls settings already up, just increment refcount. */
+			refcount_inc(&adap->chcr_ktls.ktls_refcount);
+		}
+	} else {
+		/* return failure if refcount is already 0. */
+		if (!refcount_read(&adap->chcr_ktls.ktls_refcount))
+			return -EINVAL;
+		/* decrement refcount and test, if 0, disable ktls feature,
+		 * else return command success.
+		 */
+		if (refcount_dec_and_test(&adap->chcr_ktls.ktls_refcount)) {
+			ret = t4_set_params(adap, adap->mbox, adap->pf,
+					    0, 1, &params, &params);
+			if (ret)
+				return ret;
+			pr_info("kTLS is disabled. Restrictions on ULD support removed\n");
+		}
+	}
 
-	ret = t4_set_params(adap, adap->mbox, adap->pf, 0, 1, &params, &params);
-	/* if fw returns failure, clear the ktls flag */
-	if (ret)
-		adap->params.crypto &= ~ULP_CRYPTO_KTLS_INLINE;
+	return ret;
 }
 #endif
 
@@ -706,12 +748,6 @@ static void cxgb4_uld_alloc_resources(struct adapter *adap,
 	}
 	if (adap->flags & CXGB4_FULL_INIT_DONE)
 		enable_rx_uld(adap, type);
-#ifdef CONFIG_CHELSIO_TLS_DEVICE
-	/* send mbox to enable ktls related settings. */
-	if (type == CXGB4_ULD_CRYPTO &&
-	    (adap->params.crypto & FW_CAPS_CONFIG_TX_TLS_HW))
-		cxgb4_set_ktls_feature(adap, 1);
-#endif
 	if (adap->uld[type].add)
 		goto free_irq;
 	ret = setup_sge_txq_uld(adap, type, p);
@@ -805,13 +841,6 @@ int cxgb4_unregister_uld(enum cxgb4_uld type)
 			continue;
 
 		cxgb4_shutdown_uld_adapter(adap, type);
-
-#ifdef CONFIG_CHELSIO_TLS_DEVICE
-		/* send mbox to disable ktls related settings. */
-		if (type == CXGB4_ULD_CRYPTO &&
-		    (adap->params.crypto & FW_CAPS_CONFIG_TX_TLS_HW))
-			cxgb4_set_ktls_feature(adap, 0);
-#endif
 	}
 
 	list_for_each_entry_safe(uld_entry, tmp, &uld_list, list_node) {
diff --git a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
index 085fa1424f9a..dbce99b209d6 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/cxgb4_uld.h
@@ -268,6 +268,10 @@ struct filter_ctx {
 	u32 tid;			/* to store tid */
 };
 
+struct chcr_ktls {
+	refcount_t ktls_refcount;
+};
+
 struct ch_filter_specification;
 
 int cxgb4_get_free_ftid(struct net_device *dev, u8 family, bool hash_en,
@@ -464,6 +468,9 @@ struct cxgb4_uld_info {
 			      struct napi_struct *napi);
 	void (*lro_flush)(struct t4_lro_mgr *);
 	int (*tx_handler)(struct sk_buff *skb, struct net_device *dev);
+#if IS_ENABLED(CONFIG_TLS_DEVICE)
+	const struct tlsdev_ops *tlsdev_ops;
+#endif
 };
 
 void cxgb4_uld_enable(struct adapter *adap);
diff --git a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
index 68fe734b9b37..0a326c054707 100644
--- a/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
+++ b/drivers/net/ethernet/chelsio/cxgb4/t4fw_api.h
@@ -1205,7 +1205,7 @@ enum fw_caps_config_crypto {
 	FW_CAPS_CONFIG_CRYPTO_LOOKASIDE = 0x00000001,
 	FW_CAPS_CONFIG_TLS_INLINE = 0x00000002,
 	FW_CAPS_CONFIG_IPSEC_INLINE = 0x00000004,
-	FW_CAPS_CONFIG_TX_TLS_HW = 0x00000008,
+	FW_CAPS_CONFIG_TLS_HW = 0x00000008,
 };
 
 enum fw_caps_config_fcoe {
@@ -1329,7 +1329,7 @@ enum fw_params_param_dev {
 	FW_PARAMS_PARAM_DEV_DBQ_TIMERTICK = 0x2A,
 	FW_PARAMS_PARAM_DEV_NUM_TM_CLASS = 0x2B,
 	FW_PARAMS_PARAM_DEV_FILTER = 0x2E,
-	FW_PARAMS_PARAM_DEV_KTLS_TX_HW = 0x31,
+	FW_PARAMS_PARAM_DEV_KTLS_HW = 0x31,
 };
 
 /*
@@ -1412,6 +1412,12 @@ enum fw_params_param_dmaq {
 	FW_PARAMS_PARAM_DMAQ_CONM_CTXT = 0x20,
 };
 
+enum fw_params_param_dev_ktls_hw {
+	FW_PARAMS_PARAM_DEV_KTLS_HW_DISABLE      = 0x00,
+	FW_PARAMS_PARAM_DEV_KTLS_HW_ENABLE       = 0x01,
+	FW_PARAMS_PARAM_DEV_KTLS_HW_USER_ENABLE  = 0x01,
+};
+
 enum fw_params_param_dev_phyfw {
 	FW_PARAMS_PARAM_DEV_PHYFW_DOWNLOAD = 0x00,
 	FW_PARAMS_PARAM_DEV_PHYFW_VERSION = 0x01,
-- 
cgit v1.2.3-59-g8ed1b


From 76d7728db724466490c2c3dd4f84c3357f550615 Mon Sep 17 00:00:00 2001
From: Rohit Maheshwari <rohitm@chelsio.com>
Date: Mon, 1 Jun 2020 19:33:32 +0530
Subject: crypto/chcr: IPV6 code needs to be in CONFIG_IPV6

Error messages seen while building kernel with CONFIG_IPV6
disabled.

Signed-off-by: Rohit Maheshwari <rohitm@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/crypto/chelsio/chcr_ktls.c | 48 ++++++++++++++++++++++++++------------
 1 file changed, 33 insertions(+), 15 deletions(-)

diff --git a/drivers/crypto/chelsio/chcr_ktls.c b/drivers/crypto/chelsio/chcr_ktls.c
index f55b87152166..91dee616d15e 100644
--- a/drivers/crypto/chelsio/chcr_ktls.c
+++ b/drivers/crypto/chelsio/chcr_ktls.c
@@ -221,6 +221,7 @@ static int chcr_ktls_act_open_req(struct sock *sk,
 	return cxgb4_l2t_send(tx_info->netdev, skb, tx_info->l2te);
 }
 
+#if IS_ENABLED(CONFIG_IPV6)
 /*
  * chcr_ktls_act_open_req6: creates TCB entry for ipv6 connection.
  * @sk - tcp socket.
@@ -270,6 +271,7 @@ static int chcr_ktls_act_open_req6(struct sock *sk,
 
 	return cxgb4_l2t_send(tx_info->netdev, skb, tx_info->l2te);
 }
+#endif /* #if IS_ENABLED(CONFIG_IPV6) */
 
 /*
  * chcr_setup_connection:  create a TCB entry so that TP will form tcp packets.
@@ -290,20 +292,26 @@ static int chcr_setup_connection(struct sock *sk,
 	tx_info->atid = atid;
 	tx_info->ip_family = sk->sk_family;
 
-	if (sk->sk_family == AF_INET ||
-	    (sk->sk_family == AF_INET6 && !sk->sk_ipv6only &&
-	     ipv6_addr_type(&sk->sk_v6_daddr) == IPV6_ADDR_MAPPED)) {
+	if (sk->sk_family == AF_INET) {
 		tx_info->ip_family = AF_INET;
 		ret = chcr_ktls_act_open_req(sk, tx_info, atid);
+#if IS_ENABLED(CONFIG_IPV6)
 	} else {
-		tx_info->ip_family = AF_INET6;
-		ret =
-		cxgb4_clip_get(tx_info->netdev,
-			       (const u32 *)&sk->sk_v6_rcv_saddr.in6_u.u6_addr8,
-			       1);
-		if (ret)
-			goto out;
-		ret = chcr_ktls_act_open_req6(sk, tx_info, atid);
+		if (!sk->sk_ipv6only &&
+		    ipv6_addr_type(&sk->sk_v6_daddr) == IPV6_ADDR_MAPPED) {
+			tx_info->ip_family = AF_INET;
+			ret = chcr_ktls_act_open_req(sk, tx_info, atid);
+		} else {
+			tx_info->ip_family = AF_INET6;
+			ret = cxgb4_clip_get(tx_info->netdev,
+					     (const u32 *)
+					     &sk->sk_v6_rcv_saddr.s6_addr,
+					     1);
+			if (ret)
+				goto out;
+			ret = chcr_ktls_act_open_req6(sk, tx_info, atid);
+		}
+#endif
 	}
 
 	/* if return type is NET_XMIT_CN, msg will be sent but delayed, mark ret
@@ -394,11 +402,13 @@ void chcr_ktls_dev_del(struct net_device *netdev,
 	if (tx_info->l2te)
 		cxgb4_l2t_release(tx_info->l2te);
 
+#if IS_ENABLED(CONFIG_IPV6)
 	/* clear clip entry */
 	if (tx_info->ip_family == AF_INET6)
 		cxgb4_clip_release(netdev,
 				   (const u32 *)&sk->sk_v6_daddr.in6_u.u6_addr8,
 				   1);
+#endif
 
 	/* clear tid */
 	if (tx_info->tid != -1) {
@@ -491,12 +501,16 @@ int chcr_ktls_dev_add(struct net_device *netdev, struct sock *sk,
 		goto out2;
 
 	/* get peer ip */
-	if (sk->sk_family == AF_INET ||
-	    (sk->sk_family == AF_INET6 && !sk->sk_ipv6only &&
-	     ipv6_addr_type(&sk->sk_v6_daddr) == IPV6_ADDR_MAPPED)) {
+	if (sk->sk_family == AF_INET) {
 		memcpy(daaddr, &sk->sk_daddr, 4);
+#if IS_ENABLED(CONFIG_IPV6)
 	} else {
-		memcpy(daaddr, sk->sk_v6_daddr.in6_u.u6_addr8, 16);
+		if (!sk->sk_ipv6only &&
+		    ipv6_addr_type(&sk->sk_v6_daddr) == IPV6_ADDR_MAPPED)
+			memcpy(daaddr, &sk->sk_daddr, 4);
+		else
+			memcpy(daaddr, sk->sk_v6_daddr.in6_u.u6_addr8, 16);
+#endif
 	}
 
 	/* get the l2t index */
@@ -903,7 +917,9 @@ chcr_ktls_write_tcp_options(struct chcr_ktls_info *tx_info, struct sk_buff *skb,
 	struct fw_eth_tx_pkt_wr *wr;
 	struct cpl_tx_pkt_core *cpl;
 	u32 ctrl, iplen, maclen;
+#if IS_ENABLED(CONFIG_IPV6)
 	struct ipv6hdr *ip6;
+#endif
 	unsigned int ndesc;
 	struct tcphdr *tcp;
 	int len16, pktlen;
@@ -958,9 +974,11 @@ chcr_ktls_write_tcp_options(struct chcr_ktls_info *tx_info, struct sk_buff *skb,
 		/* we need to correct ip header len */
 		ip = (struct iphdr *)(buf + maclen);
 		ip->tot_len = htons(pktlen - maclen);
+#if IS_ENABLED(CONFIG_IPV6)
 	} else {
 		ip6 = (struct ipv6hdr *)(buf + maclen);
 		ip6->payload_len = htons(pktlen - maclen - iplen);
+#endif
 	}
 	/* now take care of the tcp header, if fin is not set then clear push
 	 * bit as well, and if fin is set, it will be sent at the last so we
-- 
cgit v1.2.3-59-g8ed1b


From f3b140ad8575b74c78be50f34079e1cf43b0302d Mon Sep 17 00:00:00 2001
From: Ayush Sawal <ayush.sawal@chelsio.com>
Date: Mon, 1 Jun 2020 23:11:58 +0530
Subject: Crypto/chcr: Fixes compilations warnings

This patch fixes the compilation warnings displayed by sparse tool for
chcr driver.

V1->V2

Avoid type casting by using get_unaligned_be32() and
put_unaligned_be16/32() functions.

The key which comes from stack is an u8 byte stream so we store it in
an unsigned char array(ablkctx->key). The function get_aes_decrypt_key()
is a used to calculate  the reverse round key for decryption, for this
operation the key has to be divided into 4 bytes, so to extract 4 bytes
from an u8 byte stream and store it in an u32 variable, get_aligned_be32()
is used. Similarly for copying back the key from u32 variable to the
original u8 key stream, put_aligned_be32() is used.

Signed-off-by: Ayush Sawal <ayush.sawal@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/crypto/chelsio/chcr_algo.c  | 10 ++++------
 drivers/crypto/chelsio/chcr_ipsec.c |  2 +-
 2 files changed, 5 insertions(+), 7 deletions(-)

diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c
index b8c1c4dd3ef0..94cf04e5aacf 100644
--- a/drivers/crypto/chelsio/chcr_algo.c
+++ b/drivers/crypto/chelsio/chcr_algo.c
@@ -256,7 +256,7 @@ static void get_aes_decrypt_key(unsigned char *dec_key,
 		return;
 	}
 	for (i = 0; i < nk; i++)
-		w_ring[i] = be32_to_cpu(*(u32 *)&key[4 * i]);
+		w_ring[i] = get_unaligned_be32(&key[i * 4]);
 
 	i = 0;
 	temp = w_ring[nk - 1];
@@ -275,7 +275,7 @@ static void get_aes_decrypt_key(unsigned char *dec_key,
 	}
 	i--;
 	for (k = 0, j = i % nk; k < nk; k++) {
-		*((u32 *)dec_key + k) = htonl(w_ring[j]);
+		put_unaligned_be32(w_ring[j], &dec_key[k * 4]);
 		j--;
 		if (j < 0)
 			j += nk;
@@ -2926,8 +2926,7 @@ static int ccm_format_packet(struct aead_request *req,
 		memcpy(ivptr, req->iv, 16);
 	}
 	if (assoclen)
-		*((unsigned short *)(reqctx->scratch_pad + 16)) =
-				htons(assoclen);
+		put_unaligned_be16(assoclen, &reqctx->scratch_pad[16]);
 
 	rc = generate_b0(req, ivptr, op_type);
 	/* zero the ctr value */
@@ -3201,8 +3200,7 @@ static struct sk_buff *create_gcm_wr(struct aead_request *req,
 	} else {
 		memcpy(ivptr, req->iv, GCM_AES_IV_SIZE);
 	}
-	*((unsigned int *)(ivptr + 12)) = htonl(0x01);
-
+	put_unaligned_be32(0x01, &ivptr[12]);
 	ulptx = (struct ulptx_sgl *)(ivptr + 16);
 
 	chcr_add_aead_dst_ent(req, phys_cpl, qid);
diff --git a/drivers/crypto/chelsio/chcr_ipsec.c b/drivers/crypto/chelsio/chcr_ipsec.c
index d25689837b26..3a10f51ad6fd 100644
--- a/drivers/crypto/chelsio/chcr_ipsec.c
+++ b/drivers/crypto/chelsio/chcr_ipsec.c
@@ -403,7 +403,7 @@ inline void *copy_esn_pktxt(struct sk_buff *skb,
 	xo = xfrm_offload(skb);
 
 	aadiv->spi = (esphdr->spi);
-	seqlo = htonl(esphdr->seq_no);
+	seqlo = ntohl(esphdr->seq_no);
 	seqno = cpu_to_be64(seqlo + ((u64)xo->seq.hi << 32));
 	memcpy(aadiv->seq_no, &seqno, 8);
 	iv = skb_transport_header(skb) + sizeof(struct ip_esp_hdr);
-- 
cgit v1.2.3-59-g8ed1b


From 055be6865dea6743b090d1c55c8d21a5e01df201 Mon Sep 17 00:00:00 2001
From: Ayush Sawal <ayush.sawal@chelsio.com>
Date: Mon, 1 Jun 2020 23:11:59 +0530
Subject: Crypto/chcr: Fixes a coccinile check error

This fixes an error observed after running coccinile check.
drivers/crypto/chelsio/chcr_algo.c:1462:5-8: Unneeded variable:
"err". Return "0" on line 1480

This line is missed in the commit 567be3a5d227 ("crypto:
chelsio - Use multiple txq/rxq per tfm to process the requests").

Fixes: 567be3a5d227 ("crypto:
chelsio - Use multiple txq/rxq per tfm to process the requests").

V1->V2
-Modified subject.

Signed-off-by: Ayush Sawal <ayush.sawal@chelsio.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/crypto/chelsio/chcr_algo.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/crypto/chelsio/chcr_algo.c b/drivers/crypto/chelsio/chcr_algo.c
index 94cf04e5aacf..2080b2ec6639 100644
--- a/drivers/crypto/chelsio/chcr_algo.c
+++ b/drivers/crypto/chelsio/chcr_algo.c
@@ -1464,6 +1464,7 @@ static int chcr_device_init(struct chcr_context *ctx)
 	if (!ctx->dev) {
 		u_ctx = assign_chcr_device();
 		if (!u_ctx) {
+			err = -ENXIO;
 			pr_err("chcr device assignment fails\n");
 			goto out;
 		}
-- 
cgit v1.2.3-59-g8ed1b


From 6abde0b241224347cd88e2ae75902e07f55c42cb Mon Sep 17 00:00:00 2001
From: Vinay Kumar Yadav <vinay.yadav@chelsio.com>
Date: Tue, 2 Jun 2020 00:07:05 +0530
Subject: crypto/chtls: IPv6 support for inline TLS

Extends support to IPv6 for Inline TLS server.

Signed-off-by: Vinay Kumar Yadav <vinay.yadav@chelsio.com>

v1->v2:
- cc'd tcp folks.

v2->v3:
- changed EXPORT_SYMBOL() to EXPORT_SYMBOL_GPL()

Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/crypto/chelsio/chtls/chtls_cm.c   | 195 ++++++++++++++++++++++++------
 drivers/crypto/chelsio/chtls/chtls_cm.h   |   1 +
 drivers/crypto/chelsio/chtls/chtls_main.c |  14 ++-
 net/ipv6/tcp_ipv6.c                       |   1 +
 4 files changed, 168 insertions(+), 43 deletions(-)

diff --git a/drivers/crypto/chelsio/chtls/chtls_cm.c b/drivers/crypto/chelsio/chtls/chtls_cm.c
index d5720a859443..9a642c79a657 100644
--- a/drivers/crypto/chelsio/chtls/chtls_cm.c
+++ b/drivers/crypto/chelsio/chtls/chtls_cm.c
@@ -18,13 +18,20 @@
 #include <linux/kallsyms.h>
 #include <linux/kprobes.h>
 #include <linux/if_vlan.h>
+#include <linux/ipv6.h>
+#include <net/ipv6.h>
+#include <net/transp_v6.h>
+#include <net/ip6_route.h>
 #include <net/inet_common.h>
 #include <net/tcp.h>
 #include <net/dst.h>
 #include <net/tls.h>
+#include <net/addrconf.h>
+#include <net/secure_seq.h>
 
 #include "chtls.h"
 #include "chtls_cm.h"
+#include "clip_tbl.h"
 
 /*
  * State transitions and actions for close.  Note that if we are in SYN_SENT
@@ -82,15 +89,36 @@ static void chtls_sock_release(struct kref *ref)
 	kfree(csk);
 }
 
-static struct net_device *chtls_ipv4_netdev(struct chtls_dev *cdev,
+static struct net_device *chtls_find_netdev(struct chtls_dev *cdev,
 					    struct sock *sk)
 {
 	struct net_device *ndev = cdev->ports[0];
+	struct net_device *temp;
+	int addr_type;
+
+	switch (sk->sk_family) {
+	case PF_INET:
+		if (likely(!inet_sk(sk)->inet_rcv_saddr))
+			return ndev;
+		ndev = ip_dev_find(&init_net, inet_sk(sk)->inet_rcv_saddr);
+		break;
+	case PF_INET6:
+		addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
+		if (likely(addr_type == IPV6_ADDR_ANY))
+			return ndev;
+
+	for_each_netdev_rcu(&init_net, temp) {
+		if (ipv6_chk_addr(&init_net, (struct in6_addr *)
+				  &sk->sk_v6_rcv_saddr, temp, 1)) {
+			ndev = temp;
+			break;
+		}
+	}
+	break;
+	default:
+		return NULL;
+	}
 
-	if (likely(!inet_sk(sk)->inet_rcv_saddr))
-		return ndev;
-
-	ndev = ip_dev_find(&init_net, inet_sk(sk)->inet_rcv_saddr);
 	if (!ndev)
 		return NULL;
 
@@ -446,7 +474,10 @@ void chtls_destroy_sock(struct sock *sk)
 	free_tls_keyid(sk);
 	kref_put(&csk->kref, chtls_sock_release);
 	csk->cdev = NULL;
-	sk->sk_prot = &tcp_prot;
+	if (sk->sk_family == AF_INET)
+		sk->sk_prot = &tcp_prot;
+	else
+		sk->sk_prot = &tcpv6_prot;
 	sk->sk_prot->destroy(sk);
 }
 
@@ -473,7 +504,8 @@ static void chtls_disconnect_acceptq(struct sock *listen_sk)
 	while (*pprev) {
 		struct request_sock *req = *pprev;
 
-		if (req->rsk_ops == &chtls_rsk_ops) {
+		if (req->rsk_ops == &chtls_rsk_ops ||
+		    req->rsk_ops == &chtls_rsk_opsv6) {
 			struct sock *child = req->sk;
 
 			*pprev = req->dl_next;
@@ -600,14 +632,13 @@ int chtls_listen_start(struct chtls_dev *cdev, struct sock *sk)
 	struct listen_ctx *ctx;
 	struct adapter *adap;
 	struct port_info *pi;
+	bool clip_valid;
 	int stid;
 	int ret;
 
-	if (sk->sk_family != PF_INET)
-		return -EAGAIN;
-
+	clip_valid = false;
 	rcu_read_lock();
-	ndev = chtls_ipv4_netdev(cdev, sk);
+	ndev = chtls_find_netdev(cdev, sk);
 	rcu_read_unlock();
 	if (!ndev)
 		return -EBADF;
@@ -638,16 +669,35 @@ int chtls_listen_start(struct chtls_dev *cdev, struct sock *sk)
 	if (!listen_hash_add(cdev, sk, stid))
 		goto free_stid;
 
-	ret = cxgb4_create_server(ndev, stid,
-				  inet_sk(sk)->inet_rcv_saddr,
-				  inet_sk(sk)->inet_sport, 0,
-				  cdev->lldi->rxq_ids[0]);
+	if (sk->sk_family == PF_INET) {
+		ret = cxgb4_create_server(ndev, stid,
+					  inet_sk(sk)->inet_rcv_saddr,
+					  inet_sk(sk)->inet_sport, 0,
+					  cdev->lldi->rxq_ids[0]);
+	} else {
+		int addr_type;
+
+		addr_type = ipv6_addr_type(&sk->sk_v6_rcv_saddr);
+		if (addr_type != IPV6_ADDR_ANY) {
+			ret = cxgb4_clip_get(ndev, (const u32 *)
+					     &sk->sk_v6_rcv_saddr, 1);
+			if (ret)
+				goto del_hash;
+			clip_valid = true;
+		}
+		ret = cxgb4_create_server6(ndev, stid,
+					   &sk->sk_v6_rcv_saddr,
+					   inet_sk(sk)->inet_sport,
+					   cdev->lldi->rxq_ids[0]);
+	}
 	if (ret > 0)
 		ret = net_xmit_errno(ret);
 	if (ret)
 		goto del_hash;
 	return 0;
 del_hash:
+	if (clip_valid)
+		cxgb4_clip_release(ndev, (const u32 *)&sk->sk_v6_rcv_saddr, 1);
 	listen_hash_del(cdev, sk);
 free_stid:
 	cxgb4_free_stid(cdev->tids, stid, sk->sk_family);
@@ -661,6 +711,8 @@ free_ctx:
 void chtls_listen_stop(struct chtls_dev *cdev, struct sock *sk)
 {
 	struct listen_ctx *listen_ctx;
+	struct chtls_sock *csk;
+	int addr_type = 0;
 	int stid;
 
 	stid = listen_hash_del(cdev, sk);
@@ -671,7 +723,16 @@ void chtls_listen_stop(struct chtls_dev *cdev, struct sock *sk)
 	chtls_reset_synq(listen_ctx);
 
 	cxgb4_remove_server(cdev->lldi->ports[0], stid,
-			    cdev->lldi->rxq_ids[0], 0);
+			    cdev->lldi->rxq_ids[0], sk->sk_family == PF_INET6);
+
+	if (sk->sk_family == PF_INET6) {
+		csk = rcu_dereference_sk_user_data(sk);
+		addr_type = ipv6_addr_type((const struct in6_addr *)
+					  &sk->sk_v6_rcv_saddr);
+		if (addr_type != IPV6_ADDR_ANY)
+			cxgb4_clip_release(csk->egress_dev, (const u32 *)
+					   &sk->sk_v6_rcv_saddr, 1);
+	}
 	chtls_disconnect_acceptq(sk);
 }
 
@@ -880,7 +941,10 @@ static unsigned int chtls_select_mss(const struct chtls_sock *csk,
 	tp = tcp_sk(sk);
 	tcpoptsz = 0;
 
-	iphdrsz = sizeof(struct iphdr) + sizeof(struct tcphdr);
+	if (sk->sk_family == AF_INET6)
+		iphdrsz = sizeof(struct ipv6hdr) + sizeof(struct tcphdr);
+	else
+		iphdrsz = sizeof(struct iphdr) + sizeof(struct tcphdr);
 	if (req->tcpopt.tstamp)
 		tcpoptsz += round_up(TCPOLEN_TIMESTAMP, 4);
 
@@ -1045,11 +1109,29 @@ static struct sock *chtls_recv_sock(struct sock *lsk,
 	if (!newsk)
 		goto free_oreq;
 
-	dst = inet_csk_route_child_sock(lsk, newsk, oreq);
-	if (!dst)
-		goto free_sk;
+	if (lsk->sk_family == AF_INET) {
+		dst = inet_csk_route_child_sock(lsk, newsk, oreq);
+		if (!dst)
+			goto free_sk;
 
-	n = dst_neigh_lookup(dst, &iph->saddr);
+		n = dst_neigh_lookup(dst, &iph->saddr);
+	} else {
+		const struct ipv6hdr *ip6h;
+		struct flowi6 fl6;
+
+		ip6h = (const struct ipv6hdr *)network_hdr;
+		memset(&fl6, 0, sizeof(fl6));
+		fl6.flowi6_proto = IPPROTO_TCP;
+		fl6.saddr = ip6h->daddr;
+		fl6.daddr = ip6h->saddr;
+		fl6.fl6_dport = inet_rsk(oreq)->ir_rmt_port;
+		fl6.fl6_sport = htons(inet_rsk(oreq)->ir_num);
+		security_req_classify_flow(oreq, flowi6_to_flowi(&fl6));
+		dst = ip6_dst_lookup_flow(sock_net(lsk), lsk, &fl6, NULL);
+		if (IS_ERR(dst))
+			goto free_sk;
+		n = dst_neigh_lookup(dst, &ip6h->saddr);
+	}
 	if (!n)
 		goto free_sk;
 
@@ -1072,9 +1154,28 @@ static struct sock *chtls_recv_sock(struct sock *lsk,
 	tp = tcp_sk(newsk);
 	newinet = inet_sk(newsk);
 
-	newinet->inet_daddr = iph->saddr;
-	newinet->inet_rcv_saddr = iph->daddr;
-	newinet->inet_saddr = iph->daddr;
+	if (iph->version == 0x4) {
+		newinet->inet_daddr = iph->saddr;
+		newinet->inet_rcv_saddr = iph->daddr;
+		newinet->inet_saddr = iph->daddr;
+	} else {
+		struct tcp6_sock *newtcp6sk = (struct tcp6_sock *)newsk;
+		struct inet_request_sock *treq = inet_rsk(oreq);
+		struct ipv6_pinfo *newnp = inet6_sk(newsk);
+		struct ipv6_pinfo *np = inet6_sk(lsk);
+
+		inet_sk(newsk)->pinet6 = &newtcp6sk->inet6;
+		memcpy(newnp, np, sizeof(struct ipv6_pinfo));
+		newsk->sk_v6_daddr = treq->ir_v6_rmt_addr;
+		newsk->sk_v6_rcv_saddr = treq->ir_v6_loc_addr;
+		inet6_sk(newsk)->saddr = treq->ir_v6_loc_addr;
+		newnp->ipv6_fl_list = NULL;
+		newnp->pktoptions = NULL;
+		newsk->sk_bound_dev_if = treq->ir_iif;
+		newinet->inet_opt = NULL;
+		newinet->inet_daddr = LOOPBACK4_IPV6;
+		newinet->inet_saddr = LOOPBACK4_IPV6;
+	}
 
 	oreq->ts_recent = PASS_OPEN_TID_G(ntohl(req->tos_stid));
 	sk_setup_caps(newsk, dst);
@@ -1156,6 +1257,7 @@ static void chtls_pass_accept_request(struct sock *sk,
 	struct sk_buff *reply_skb;
 	struct chtls_sock *csk;
 	struct chtls_dev *cdev;
+	struct ipv6hdr *ip6h;
 	struct tcphdr *tcph;
 	struct sock *newsk;
 	struct ethhdr *eh;
@@ -1196,37 +1298,50 @@ static void chtls_pass_accept_request(struct sock *sk,
 	if (sk_acceptq_is_full(sk))
 		goto reject;
 
-	oreq = inet_reqsk_alloc(&chtls_rsk_ops, sk, true);
-	if (!oreq)
-		goto reject;
-
-	oreq->rsk_rcv_wnd = 0;
-	oreq->rsk_window_clamp = 0;
-	oreq->cookie_ts = 0;
-	oreq->mss = 0;
-	oreq->ts_recent = 0;
 
 	eth_hdr_len = T6_ETH_HDR_LEN_G(ntohl(req->hdr_len));
 	if (eth_hdr_len == ETH_HLEN) {
 		eh = (struct ethhdr *)(req + 1);
 		iph = (struct iphdr *)(eh + 1);
+		ip6h = (struct ipv6hdr *)(eh + 1);
 		network_hdr = (void *)(eh + 1);
 	} else {
 		vlan_eh = (struct vlan_ethhdr *)(req + 1);
 		iph = (struct iphdr *)(vlan_eh + 1);
+		ip6h = (struct ipv6hdr *)(vlan_eh + 1);
 		network_hdr = (void *)(vlan_eh + 1);
 	}
-	if (iph->version != 0x4)
-		goto free_oreq;
 
-	tcph = (struct tcphdr *)(iph + 1);
-	skb_set_network_header(skb, (void *)iph - (void *)req);
+	if (iph->version == 0x4) {
+		tcph = (struct tcphdr *)(iph + 1);
+		skb_set_network_header(skb, (void *)iph - (void *)req);
+		oreq = inet_reqsk_alloc(&chtls_rsk_ops, sk, true);
+	} else {
+		tcph = (struct tcphdr *)(ip6h + 1);
+		skb_set_network_header(skb, (void *)ip6h - (void *)req);
+		oreq = inet_reqsk_alloc(&chtls_rsk_opsv6, sk, false);
+	}
+
+	if (!oreq)
+		goto reject;
+
+	oreq->rsk_rcv_wnd = 0;
+	oreq->rsk_window_clamp = 0;
+	oreq->cookie_ts = 0;
+	oreq->mss = 0;
+	oreq->ts_recent = 0;
 
 	tcp_rsk(oreq)->tfo_listener = false;
 	tcp_rsk(oreq)->rcv_isn = ntohl(tcph->seq);
 	chtls_set_req_port(oreq, tcph->source, tcph->dest);
-	chtls_set_req_addr(oreq, iph->daddr, iph->saddr);
-	ip_dsfield = ipv4_get_dsfield(iph);
+	if (iph->version == 0x4) {
+		chtls_set_req_addr(oreq, iph->daddr, iph->saddr);
+		ip_dsfield = ipv4_get_dsfield(iph);
+	} else {
+		inet_rsk(oreq)->ir_v6_rmt_addr = ipv6_hdr(skb)->saddr;
+		inet_rsk(oreq)->ir_v6_loc_addr = ipv6_hdr(skb)->daddr;
+		ip_dsfield = ipv6_get_dsfield(ipv6_hdr(skb));
+	}
 	if (req->tcpopt.wsf <= 14 &&
 	    sock_net(sk)->ipv4.sysctl_tcp_window_scaling) {
 		inet_rsk(oreq)->wscale_ok = 1;
@@ -1243,7 +1358,7 @@ static void chtls_pass_accept_request(struct sock *sk,
 
 	newsk = chtls_recv_sock(sk, oreq, network_hdr, req, cdev);
 	if (!newsk)
-		goto reject;
+		goto free_oreq;
 
 	if (chtls_get_module(newsk))
 		goto reject;
diff --git a/drivers/crypto/chelsio/chtls/chtls_cm.h b/drivers/crypto/chelsio/chtls/chtls_cm.h
index 3fac0c74a41f..47ba81e42f5d 100644
--- a/drivers/crypto/chelsio/chtls/chtls_cm.h
+++ b/drivers/crypto/chelsio/chtls/chtls_cm.h
@@ -79,6 +79,7 @@ enum {
 
 typedef void (*defer_handler_t)(struct chtls_dev *dev, struct sk_buff *skb);
 extern struct request_sock_ops chtls_rsk_ops;
+extern struct request_sock_ops chtls_rsk_opsv6;
 
 struct deferred_skb_cb {
 	defer_handler_t handler;
diff --git a/drivers/crypto/chelsio/chtls/chtls_main.c b/drivers/crypto/chelsio/chtls/chtls_main.c
index 2110d0893bc7..7dfffdde9593 100644
--- a/drivers/crypto/chelsio/chtls/chtls_main.c
+++ b/drivers/crypto/chelsio/chtls/chtls_main.c
@@ -13,6 +13,8 @@
 #include <linux/net.h>
 #include <linux/ip.h>
 #include <linux/tcp.h>
+#include <net/ipv6.h>
+#include <net/transp_v6.h>
 #include <net/tcp.h>
 #include <net/tls.h>
 
@@ -30,8 +32,8 @@ static DEFINE_MUTEX(cdev_mutex);
 
 static DEFINE_MUTEX(notify_mutex);
 static RAW_NOTIFIER_HEAD(listen_notify_list);
-static struct proto chtls_cpl_prot;
-struct request_sock_ops chtls_rsk_ops;
+static struct proto chtls_cpl_prot, chtls_cpl_protv6;
+struct request_sock_ops chtls_rsk_ops, chtls_rsk_opsv6;
 static uint send_page_order = (14 - PAGE_SHIFT < 0) ? 0 : 14 - PAGE_SHIFT;
 
 static void register_listen_notifier(struct notifier_block *nb)
@@ -586,7 +588,10 @@ static struct cxgb4_uld_info chtls_uld_info = {
 
 void chtls_install_cpl_ops(struct sock *sk)
 {
-	sk->sk_prot = &chtls_cpl_prot;
+	if (sk->sk_family == AF_INET)
+		sk->sk_prot = &chtls_cpl_prot;
+	else
+		sk->sk_prot = &chtls_cpl_protv6;
 }
 
 static void __init chtls_init_ulp_ops(void)
@@ -603,6 +608,9 @@ static void __init chtls_init_ulp_ops(void)
 	chtls_cpl_prot.recvmsg		= chtls_recvmsg;
 	chtls_cpl_prot.setsockopt	= chtls_setsockopt;
 	chtls_cpl_prot.getsockopt	= chtls_getsockopt;
+	chtls_cpl_protv6		= chtls_cpl_prot;
+	chtls_init_rsk_ops(&chtls_cpl_protv6, &chtls_rsk_opsv6,
+			   &tcpv6_prot, PF_INET6);
 }
 
 static int __init chtls_register(void)
diff --git a/net/ipv6/tcp_ipv6.c b/net/ipv6/tcp_ipv6.c
index b7415ca75c2d..f67d45ff00b4 100644
--- a/net/ipv6/tcp_ipv6.c
+++ b/net/ipv6/tcp_ipv6.c
@@ -2121,6 +2121,7 @@ struct proto tcpv6_prot = {
 #endif
 	.diag_destroy		= tcp_abort,
 };
+EXPORT_SYMBOL_GPL(tcpv6_prot);
 
 /* thinking of making this const? Don't.
  * early_demux can change based on sysctl.
-- 
cgit v1.2.3-59-g8ed1b


From efd7ed0f5f2d07ccbb1853c5d46656cdfa1371fb Mon Sep 17 00:00:00 2001
From: Jules Irenge <jbi.octave@gmail.com>
Date: Mon, 1 Jun 2020 19:45:52 +0100
Subject: sfc: add missing annotation for efx_ef10_try_update_nic_stats_vf()

Sparse reports a warning at efx_ef10_try_update_nic_stats_vf()
warning: context imbalance in efx_ef10_try_update_nic_stats_vf()
	- unexpected unlock
The root cause is the missing annotation at
efx_ef10_try_update_nic_stats_vf()
Add the missing _must_hold(&efx->stats_lock) annotation

Signed-off-by: Jules Irenge <jbi.octave@gmail.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/sfc/ef10.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/net/ethernet/sfc/ef10.c b/drivers/net/ethernet/sfc/ef10.c
index 964c5e842cec..4b0e3695a71a 100644
--- a/drivers/net/ethernet/sfc/ef10.c
+++ b/drivers/net/ethernet/sfc/ef10.c
@@ -1819,6 +1819,7 @@ static size_t efx_ef10_update_stats_pf(struct efx_nic *efx, u64 *full_stats,
 }
 
 static int efx_ef10_try_update_nic_stats_vf(struct efx_nic *efx)
+	__must_hold(&efx->stats_lock)
 {
 	MCDI_DECLARE_BUF(inbuf, MC_CMD_MAC_STATS_IN_LEN);
 	struct efx_ef10_nic_data *nic_data = efx->nic_data;
-- 
cgit v1.2.3-59-g8ed1b


From 836e66c218f355ec01ba57671c85abf32961dcea Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 2 Jun 2020 16:58:32 +0200
Subject: bpf: Fix up bpf_skb_adjust_room helper's skb csum setting

Lorenz recently reported:

  In our TC classifier cls_redirect [0], we use the following sequence of
  helper calls to decapsulate a GUE (basically IP + UDP + custom header)
  encapsulated packet:

    bpf_skb_adjust_room(skb, -encap_len, BPF_ADJ_ROOM_MAC, BPF_F_ADJ_ROOM_FIXED_GSO)
    bpf_redirect(skb->ifindex, BPF_F_INGRESS)

  It seems like some checksums of the inner headers are not validated in
  this case. For example, a TCP SYN packet with invalid TCP checksum is
  still accepted by the network stack and elicits a SYN ACK. [...]

  That is, we receive the following packet from the driver:

    | ETH | IP | UDP | GUE | IP | TCP |
    skb->ip_summed == CHECKSUM_UNNECESSARY

  ip_summed is CHECKSUM_UNNECESSARY because our NICs do rx checksum offloading.
  On this packet we run skb_adjust_room_mac(-encap_len), and get the following:

    | ETH | IP | TCP |
    skb->ip_summed == CHECKSUM_UNNECESSARY

  Note that ip_summed is still CHECKSUM_UNNECESSARY. After bpf_redirect()'ing
  into the ingress, we end up in tcp_v4_rcv(). There, skb_checksum_init() is
  turned into a no-op due to CHECKSUM_UNNECESSARY.

The bpf_skb_adjust_room() helper is not aware of protocol specifics. Internally,
it handles the CHECKSUM_COMPLETE case via skb_postpull_rcsum(), but that does
not cover CHECKSUM_UNNECESSARY. In this case skb->csum_level of the original
skb prior to bpf_skb_adjust_room() call was 0, that is, covering UDP. Right now
there is no way to adjust the skb->csum_level. NICs that have checksum offload
disabled (CHECKSUM_NONE) or that support CHECKSUM_COMPLETE are not affected.

Use a safe default for CHECKSUM_UNNECESSARY by resetting to CHECKSUM_NONE and
add a flag to the helper called BPF_F_ADJ_ROOM_NO_CSUM_RESET that allows users
from opting out. Opting out is useful for the case where we don't remove/add
full protocol headers, or for the case where a user wants to adjust the csum
level manually e.g. through bpf_csum_level() helper that is added in subsequent
patch.

The bpf_skb_proto_{4_to_6,6_to_4}() for NAT64/46 translation from the BPF
bpf_skb_change_proto() helper uses bpf_skb_net_hdr_{push,pop}() pair internally
as well but doesn't change layers, only transitions between v4 to v6 and vice
versa, therefore no adoption is required there.

  [0] https://lore.kernel.org/bpf/20200424185556.7358-1-lmb@cloudflare.com/

Fixes: 2be7e212d541 ("bpf: add bpf_skb_adjust_room helper")
Reported-by: Lorenz Bauer <lmb@cloudflare.com>
Reported-by: Alan Maguire <alan.maguire@oracle.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Lorenz Bauer <lmb@cloudflare.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Alan Maguire <alan.maguire@oracle.com>
Link: https://lore.kernel.org/bpf/CACAyw9-uU_52esMd1JjuA80fRPHJv5vsSg8GnfW3t_qDU4aVKQ@mail.gmail.com/
Link: https://lore.kernel.org/bpf/11a90472e7cce83e76ddbfce81fdfce7bfc68808.1591108731.git.daniel@iogearbox.net
---
 include/linux/skbuff.h         | 8 ++++++++
 include/uapi/linux/bpf.h       | 8 ++++++++
 net/core/filter.c              | 8 ++++++--
 tools/include/uapi/linux/bpf.h | 8 ++++++++
 4 files changed, 30 insertions(+), 2 deletions(-)

diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h
index a0d5c2760103..0c0377fc00c2 100644
--- a/include/linux/skbuff.h
+++ b/include/linux/skbuff.h
@@ -3919,6 +3919,14 @@ static inline void __skb_incr_checksum_unnecessary(struct sk_buff *skb)
 	}
 }
 
+static inline void __skb_reset_checksum_unnecessary(struct sk_buff *skb)
+{
+	if (skb->ip_summed == CHECKSUM_UNNECESSARY) {
+		skb->ip_summed = CHECKSUM_NONE;
+		skb->csum_level = 0;
+	}
+}
+
 /* Check if we need to perform checksum complete validation.
  *
  * Returns true if checksum complete is needed, false otherwise
diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index b9ed9f14f2a2..3ba2bbbed80c 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -1635,6 +1635,13 @@ union bpf_attr {
  * 		Grow or shrink the room for data in the packet associated to
  * 		*skb* by *len_diff*, and according to the selected *mode*.
  *
+ * 		By default, the helper will reset any offloaded checksum
+ * 		indicator of the skb to CHECKSUM_NONE. This can be avoided
+ * 		by the following flag:
+ *
+ * 		* **BPF_F_ADJ_ROOM_NO_CSUM_RESET**: Do not reset offloaded
+ * 		  checksum data of the skb to CHECKSUM_NONE.
+ *
  *		There are two supported modes at this time:
  *
  *		* **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer
@@ -3433,6 +3440,7 @@ enum {
 	BPF_F_ADJ_ROOM_ENCAP_L3_IPV6	= (1ULL << 2),
 	BPF_F_ADJ_ROOM_ENCAP_L4_GRE	= (1ULL << 3),
 	BPF_F_ADJ_ROOM_ENCAP_L4_UDP	= (1ULL << 4),
+	BPF_F_ADJ_ROOM_NO_CSUM_RESET	= (1ULL << 5),
 };
 
 enum {
diff --git a/net/core/filter.c b/net/core/filter.c
index ae82bcb03124..278dcc0af961 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -3113,7 +3113,8 @@ static int bpf_skb_net_shrink(struct sk_buff *skb, u32 off, u32 len_diff,
 {
 	int ret;
 
-	if (flags & ~BPF_F_ADJ_ROOM_FIXED_GSO)
+	if (unlikely(flags & ~(BPF_F_ADJ_ROOM_FIXED_GSO |
+			       BPF_F_ADJ_ROOM_NO_CSUM_RESET)))
 		return -EINVAL;
 
 	if (skb_is_gso(skb) && !skb_is_gso_tcp(skb)) {
@@ -3163,7 +3164,8 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
 	u32 off;
 	int ret;
 
-	if (unlikely(flags & ~BPF_F_ADJ_ROOM_MASK))
+	if (unlikely(flags & ~(BPF_F_ADJ_ROOM_MASK |
+			       BPF_F_ADJ_ROOM_NO_CSUM_RESET)))
 		return -EINVAL;
 	if (unlikely(len_diff_abs > 0xfffU))
 		return -EFAULT;
@@ -3191,6 +3193,8 @@ BPF_CALL_4(bpf_skb_adjust_room, struct sk_buff *, skb, s32, len_diff,
 
 	ret = shrink ? bpf_skb_net_shrink(skb, off, len_diff_abs, flags) :
 		       bpf_skb_net_grow(skb, off, len_diff_abs, flags);
+	if (!ret && !(flags & BPF_F_ADJ_ROOM_NO_CSUM_RESET))
+		__skb_reset_checksum_unnecessary(skb);
 
 	bpf_compute_data_pointers(skb);
 	return ret;
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index b9ed9f14f2a2..3ba2bbbed80c 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -1635,6 +1635,13 @@ union bpf_attr {
  * 		Grow or shrink the room for data in the packet associated to
  * 		*skb* by *len_diff*, and according to the selected *mode*.
  *
+ * 		By default, the helper will reset any offloaded checksum
+ * 		indicator of the skb to CHECKSUM_NONE. This can be avoided
+ * 		by the following flag:
+ *
+ * 		* **BPF_F_ADJ_ROOM_NO_CSUM_RESET**: Do not reset offloaded
+ * 		  checksum data of the skb to CHECKSUM_NONE.
+ *
  *		There are two supported modes at this time:
  *
  *		* **BPF_ADJ_ROOM_MAC**: Adjust room at the mac layer
@@ -3433,6 +3440,7 @@ enum {
 	BPF_F_ADJ_ROOM_ENCAP_L3_IPV6	= (1ULL << 2),
 	BPF_F_ADJ_ROOM_ENCAP_L4_GRE	= (1ULL << 3),
 	BPF_F_ADJ_ROOM_ENCAP_L4_UDP	= (1ULL << 4),
+	BPF_F_ADJ_ROOM_NO_CSUM_RESET	= (1ULL << 5),
 };
 
 enum {
-- 
cgit v1.2.3-59-g8ed1b


From 7cdec54f9713256bb170873a1fc5c75c9127c9d2 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 2 Jun 2020 16:58:33 +0200
Subject: bpf: Add csum_level helper for fixing up csum levels

Add a bpf_csum_level() helper which BPF programs can use in combination
with bpf_skb_adjust_room() when they pass in BPF_F_ADJ_ROOM_NO_CSUM_RESET
flag to the latter to avoid falling back to CHECKSUM_NONE.

The bpf_csum_level() allows to adjust CHECKSUM_UNNECESSARY skb->csum_levels
via BPF_CSUM_LEVEL_{INC,DEC} which calls __skb_{incr,decr}_checksum_unnecessary()
on the skb. The helper also allows a BPF_CSUM_LEVEL_RESET which sets the skb's
csum to CHECKSUM_NONE as well as a BPF_CSUM_LEVEL_QUERY to just return the
current level. Without this helper, there is no way to otherwise adjust the
skb->csum_level. I did not add an extra dummy flags as there is plenty of free
bitspace in level argument itself iff ever needed in future.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Alan Maguire <alan.maguire@oracle.com>
Acked-by: Lorenz Bauer <lmb@cloudflare.com>
Link: https://lore.kernel.org/bpf/279ae3717cb3d03c0ffeb511493c93c450a01e1a.1591108731.git.daniel@iogearbox.net
---
 include/uapi/linux/bpf.h       | 43 +++++++++++++++++++++++++++++++++++++++++-
 net/core/filter.c              | 38 +++++++++++++++++++++++++++++++++++++
 tools/include/uapi/linux/bpf.h | 43 +++++++++++++++++++++++++++++++++++++++++-
 3 files changed, 122 insertions(+), 2 deletions(-)

diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h
index 3ba2bbbed80c..c65b374a5090 100644
--- a/include/uapi/linux/bpf.h
+++ b/include/uapi/linux/bpf.h
@@ -3220,6 +3220,38 @@ union bpf_attr {
  *		calculation.
  *	Return
  *		Requested value, or 0, if flags are not recognized.
+ *
+ * int bpf_csum_level(struct sk_buff *skb, u64 level)
+ * 	Description
+ * 		Change the skbs checksum level by one layer up or down, or
+ * 		reset it entirely to none in order to have the stack perform
+ * 		checksum validation. The level is applicable to the following
+ * 		protocols: TCP, UDP, GRE, SCTP, FCOE. For example, a decap of
+ * 		| ETH | IP | UDP | GUE | IP | TCP | into | ETH | IP | TCP |
+ * 		through **bpf_skb_adjust_room**\ () helper with passing in
+ * 		**BPF_F_ADJ_ROOM_NO_CSUM_RESET** flag would require one	call
+ * 		to **bpf_csum_level**\ () with **BPF_CSUM_LEVEL_DEC** since
+ * 		the UDP header is removed. Similarly, an encap of the latter
+ * 		into the former could be accompanied by a helper call to
+ * 		**bpf_csum_level**\ () with **BPF_CSUM_LEVEL_INC** if the
+ * 		skb is still intended to be processed in higher layers of the
+ * 		stack instead of just egressing at tc.
+ *
+ * 		There are three supported level settings at this time:
+ *
+ * 		* **BPF_CSUM_LEVEL_INC**: Increases skb->csum_level for skbs
+ * 		  with CHECKSUM_UNNECESSARY.
+ * 		* **BPF_CSUM_LEVEL_DEC**: Decreases skb->csum_level for skbs
+ * 		  with CHECKSUM_UNNECESSARY.
+ * 		* **BPF_CSUM_LEVEL_RESET**: Resets skb->csum_level to 0 and
+ * 		  sets CHECKSUM_NONE to force checksum validation by the stack.
+ * 		* **BPF_CSUM_LEVEL_QUERY**: No-op, returns the current
+ * 		  skb->csum_level.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure. In the
+ * 		case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level
+ * 		is returned or the error code -EACCES in case the skb is not
+ * 		subject to CHECKSUM_UNNECESSARY.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3356,7 +3388,8 @@ union bpf_attr {
 	FN(ringbuf_reserve),		\
 	FN(ringbuf_submit),		\
 	FN(ringbuf_discard),		\
-	FN(ringbuf_query),
+	FN(ringbuf_query),		\
+	FN(csum_level),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -3433,6 +3466,14 @@ enum {
 	BPF_F_CURRENT_NETNS		= (-1L),
 };
 
+/* BPF_FUNC_csum_level level values. */
+enum {
+	BPF_CSUM_LEVEL_QUERY,
+	BPF_CSUM_LEVEL_INC,
+	BPF_CSUM_LEVEL_DEC,
+	BPF_CSUM_LEVEL_RESET,
+};
+
 /* BPF_FUNC_skb_adjust_room flags. */
 enum {
 	BPF_F_ADJ_ROOM_FIXED_GSO	= (1ULL << 0),
diff --git a/net/core/filter.c b/net/core/filter.c
index 278dcc0af961..d01a244b5087 100644
--- a/net/core/filter.c
+++ b/net/core/filter.c
@@ -2015,6 +2015,40 @@ static const struct bpf_func_proto bpf_csum_update_proto = {
 	.arg2_type	= ARG_ANYTHING,
 };
 
+BPF_CALL_2(bpf_csum_level, struct sk_buff *, skb, u64, level)
+{
+	/* The interface is to be used in combination with bpf_skb_adjust_room()
+	 * for encap/decap of packet headers when BPF_F_ADJ_ROOM_NO_CSUM_RESET
+	 * is passed as flags, for example.
+	 */
+	switch (level) {
+	case BPF_CSUM_LEVEL_INC:
+		__skb_incr_checksum_unnecessary(skb);
+		break;
+	case BPF_CSUM_LEVEL_DEC:
+		__skb_decr_checksum_unnecessary(skb);
+		break;
+	case BPF_CSUM_LEVEL_RESET:
+		__skb_reset_checksum_unnecessary(skb);
+		break;
+	case BPF_CSUM_LEVEL_QUERY:
+		return skb->ip_summed == CHECKSUM_UNNECESSARY ?
+		       skb->csum_level : -EACCES;
+	default:
+		return -EINVAL;
+	}
+
+	return 0;
+}
+
+static const struct bpf_func_proto bpf_csum_level_proto = {
+	.func		= bpf_csum_level,
+	.gpl_only	= false,
+	.ret_type	= RET_INTEGER,
+	.arg1_type	= ARG_PTR_TO_CTX,
+	.arg2_type	= ARG_ANYTHING,
+};
+
 static inline int __bpf_rx_skb(struct net_device *dev, struct sk_buff *skb)
 {
 	return dev_forward_skb(dev, skb);
@@ -6280,6 +6314,8 @@ tc_cls_act_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_csum_diff_proto;
 	case BPF_FUNC_csum_update:
 		return &bpf_csum_update_proto;
+	case BPF_FUNC_csum_level:
+		return &bpf_csum_level_proto;
 	case BPF_FUNC_l3_csum_replace:
 		return &bpf_l3_csum_replace_proto;
 	case BPF_FUNC_l4_csum_replace:
@@ -6613,6 +6649,8 @@ lwt_xmit_func_proto(enum bpf_func_id func_id, const struct bpf_prog *prog)
 		return &bpf_skb_store_bytes_proto;
 	case BPF_FUNC_csum_update:
 		return &bpf_csum_update_proto;
+	case BPF_FUNC_csum_level:
+		return &bpf_csum_level_proto;
 	case BPF_FUNC_l3_csum_replace:
 		return &bpf_l3_csum_replace_proto;
 	case BPF_FUNC_l4_csum_replace:
diff --git a/tools/include/uapi/linux/bpf.h b/tools/include/uapi/linux/bpf.h
index 3ba2bbbed80c..c65b374a5090 100644
--- a/tools/include/uapi/linux/bpf.h
+++ b/tools/include/uapi/linux/bpf.h
@@ -3220,6 +3220,38 @@ union bpf_attr {
  *		calculation.
  *	Return
  *		Requested value, or 0, if flags are not recognized.
+ *
+ * int bpf_csum_level(struct sk_buff *skb, u64 level)
+ * 	Description
+ * 		Change the skbs checksum level by one layer up or down, or
+ * 		reset it entirely to none in order to have the stack perform
+ * 		checksum validation. The level is applicable to the following
+ * 		protocols: TCP, UDP, GRE, SCTP, FCOE. For example, a decap of
+ * 		| ETH | IP | UDP | GUE | IP | TCP | into | ETH | IP | TCP |
+ * 		through **bpf_skb_adjust_room**\ () helper with passing in
+ * 		**BPF_F_ADJ_ROOM_NO_CSUM_RESET** flag would require one	call
+ * 		to **bpf_csum_level**\ () with **BPF_CSUM_LEVEL_DEC** since
+ * 		the UDP header is removed. Similarly, an encap of the latter
+ * 		into the former could be accompanied by a helper call to
+ * 		**bpf_csum_level**\ () with **BPF_CSUM_LEVEL_INC** if the
+ * 		skb is still intended to be processed in higher layers of the
+ * 		stack instead of just egressing at tc.
+ *
+ * 		There are three supported level settings at this time:
+ *
+ * 		* **BPF_CSUM_LEVEL_INC**: Increases skb->csum_level for skbs
+ * 		  with CHECKSUM_UNNECESSARY.
+ * 		* **BPF_CSUM_LEVEL_DEC**: Decreases skb->csum_level for skbs
+ * 		  with CHECKSUM_UNNECESSARY.
+ * 		* **BPF_CSUM_LEVEL_RESET**: Resets skb->csum_level to 0 and
+ * 		  sets CHECKSUM_NONE to force checksum validation by the stack.
+ * 		* **BPF_CSUM_LEVEL_QUERY**: No-op, returns the current
+ * 		  skb->csum_level.
+ * 	Return
+ * 		0 on success, or a negative error in case of failure. In the
+ * 		case of **BPF_CSUM_LEVEL_QUERY**, the current skb->csum_level
+ * 		is returned or the error code -EACCES in case the skb is not
+ * 		subject to CHECKSUM_UNNECESSARY.
  */
 #define __BPF_FUNC_MAPPER(FN)		\
 	FN(unspec),			\
@@ -3356,7 +3388,8 @@ union bpf_attr {
 	FN(ringbuf_reserve),		\
 	FN(ringbuf_submit),		\
 	FN(ringbuf_discard),		\
-	FN(ringbuf_query),
+	FN(ringbuf_query),		\
+	FN(csum_level),
 
 /* integer value in 'imm' field of BPF_CALL instruction selects which helper
  * function eBPF program intends to call
@@ -3433,6 +3466,14 @@ enum {
 	BPF_F_CURRENT_NETNS		= (-1L),
 };
 
+/* BPF_FUNC_csum_level level values. */
+enum {
+	BPF_CSUM_LEVEL_QUERY,
+	BPF_CSUM_LEVEL_INC,
+	BPF_CSUM_LEVEL_DEC,
+	BPF_CSUM_LEVEL_RESET,
+};
+
 /* BPF_FUNC_skb_adjust_room flags. */
 enum {
 	BPF_F_ADJ_ROOM_FIXED_GSO	= (1ULL << 0),
-- 
cgit v1.2.3-59-g8ed1b


From c4ba153b6501fa7ccfdc7e57946fb1d6011e36e8 Mon Sep 17 00:00:00 2001
From: Daniel Borkmann <daniel@iogearbox.net>
Date: Tue, 2 Jun 2020 16:58:34 +0200
Subject: bpf, selftests: Adapt cls_redirect to call csum_level helper

Adapt bpf_skb_adjust_room() to pass in BPF_F_ADJ_ROOM_NO_CSUM_RESET flag and
use the new bpf_csum_level() helper to inc/dec the checksum level by one after
the encap/decap.

Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Reviewed-by: Lorenz Bauer <lmb@cloudflare.com>
Link: https://lore.kernel.org/bpf/e7458f10e3f3d795307cbc5ad870112671d9c6f7.1591108731.git.daniel@iogearbox.net
---
 tools/testing/selftests/bpf/progs/test_cls_redirect.c | 9 ++++++---
 1 file changed, 6 insertions(+), 3 deletions(-)

diff --git a/tools/testing/selftests/bpf/progs/test_cls_redirect.c b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
index 1668b993eb86..f0b72e86bee5 100644
--- a/tools/testing/selftests/bpf/progs/test_cls_redirect.c
+++ b/tools/testing/selftests/bpf/progs/test_cls_redirect.c
@@ -380,9 +380,10 @@ static ret_t accept_locally(struct __sk_buff *skb, encap_headers_t *encap)
 	}
 
 	if (bpf_skb_adjust_room(skb, -encap_overhead, BPF_ADJ_ROOM_MAC,
-				BPF_F_ADJ_ROOM_FIXED_GSO)) {
+				BPF_F_ADJ_ROOM_FIXED_GSO |
+				BPF_F_ADJ_ROOM_NO_CSUM_RESET) ||
+	    bpf_csum_level(skb, BPF_CSUM_LEVEL_DEC))
 		return TC_ACT_SHOT;
-	}
 
 	return bpf_redirect(skb->ifindex, BPF_F_INGRESS);
 }
@@ -472,7 +473,9 @@ static ret_t forward_with_gre(struct __sk_buff *skb, encap_headers_t *encap,
 	}
 
 	if (bpf_skb_adjust_room(skb, delta, BPF_ADJ_ROOM_NET,
-				BPF_F_ADJ_ROOM_FIXED_GSO)) {
+				BPF_F_ADJ_ROOM_FIXED_GSO |
+				BPF_F_ADJ_ROOM_NO_CSUM_RESET) ||
+	    bpf_csum_level(skb, BPF_CSUM_LEVEL_INC)) {
 		metrics->errors_total_encap_adjust_failed++;
 		return TC_ACT_SHOT;
 	}
-- 
cgit v1.2.3-59-g8ed1b


From 9a5f25ad30e5bb40a2e0c61c991594d3e6529c0a Mon Sep 17 00:00:00 2001
From: Andrii Nakryiko <andriin@fb.com>
Date: Mon, 1 Jun 2020 22:03:49 -0700
Subject: selftests/bpf: Fix sample_cnt shared between two threads

Make sample_cnt volatile to fix possible selftests failure due to compiler
optimization preventing latest sample_cnt value to be visible to main thread.
sample_cnt is incremented in background thread, which is then joined into main
thread. So in terms of visibility sample_cnt update is ok. But because it's
not volatile, compiler might make optimizations that would prevent main thread
to see latest updated value. Fix this by marking global variable volatile.

Fixes: cb1c9ddd5525 ("selftests/bpf: Add BPF ringbuf selftests")
Signed-off-by: Andrii Nakryiko <andriin@fb.com>
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
Acked-by: Song Liu <songliubraving@fb.com>
Link: https://lore.kernel.org/bpf/20200602050349.215037-1-andriin@fb.com
---
 tools/testing/selftests/bpf/prog_tests/ringbuf.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/prog_tests/ringbuf.c b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
index bb8541f240e2..2bba908dfa63 100644
--- a/tools/testing/selftests/bpf/prog_tests/ringbuf.c
+++ b/tools/testing/selftests/bpf/prog_tests/ringbuf.c
@@ -25,7 +25,7 @@ struct sample {
 	char comm[16];
 };
 
-static int sample_cnt;
+static volatile int sample_cnt;
 
 static int process_sample(void *ctx, void *data, size_t len)
 {
-- 
cgit v1.2.3-59-g8ed1b


From 7cec0b927142f510a1fac88033017616cce44c26 Mon Sep 17 00:00:00 2001
From: Alexei Starovoitov <ast@kernel.org>
Date: Tue, 2 Jun 2020 11:57:43 -0700
Subject: selftests/bpf: Fix verifier test

Adjust verifier test due to addition of new field.

Fixes: c3c16f2ea6d2 ("bpf: Add rx_queue_mapping to bpf_sock")
Signed-off-by: Alexei Starovoitov <ast@kernel.org>
---
 tools/testing/selftests/bpf/verifier/sock.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/testing/selftests/bpf/verifier/sock.c b/tools/testing/selftests/bpf/verifier/sock.c
index 0bc51ad9e0fb..b1aac2641498 100644
--- a/tools/testing/selftests/bpf/verifier/sock.c
+++ b/tools/testing/selftests/bpf/verifier/sock.c
@@ -222,7 +222,7 @@
 	BPF_JMP_IMM(BPF_JNE, BPF_REG_0, 0, 2),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
-	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetofend(struct bpf_sock, state)),
+	BPF_LDX_MEM(BPF_W, BPF_REG_0, BPF_REG_0, offsetofend(struct bpf_sock, rx_queue_mapping)),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
-- 
cgit v1.2.3-59-g8ed1b


From effe5be17706167ee968fa28afe40dec9c6f71db Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Tue, 2 Jun 2020 19:43:39 +0200
Subject: s390/bpf: Maintain 8-byte stack alignment

Certain kernel functions (e.g. get_vtimer/set_vtimer) cause kernel
panic when the stack is not 8-byte aligned. Currently JITed BPF programs
may trigger this by allocating stack frames with non-rounded sizes and
then being interrupted. Fix by using rounded fp->aux->stack_depth.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200602174339.2501066-1-iii@linux.ibm.com
---
 arch/s390/net/bpf_jit_comp.c | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 8d2134136290..0f37a1b635f8 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -594,7 +594,7 @@ static void bpf_jit_epilogue(struct bpf_jit *jit, u32 stack_depth)
  * stack space for the large switch statement.
  */
 static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
-				 int i, bool extra_pass)
+				 int i, bool extra_pass, u32 stack_depth)
 {
 	struct bpf_insn *insn = &fp->insnsi[i];
 	u32 dst_reg = insn->dst_reg;
@@ -1207,7 +1207,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 		 */
 
 		if (jit->seen & SEEN_STACK)
-			off = STK_OFF_TCCNT + STK_OFF + fp->aux->stack_depth;
+			off = STK_OFF_TCCNT + STK_OFF + stack_depth;
 		else
 			off = STK_OFF_TCCNT;
 		/* lhi %w0,1 */
@@ -1249,7 +1249,7 @@ static noinline int bpf_jit_insn(struct bpf_jit *jit, struct bpf_prog *fp,
 		/*
 		 * Restore registers before calling function
 		 */
-		save_restore_regs(jit, REGS_RESTORE, fp->aux->stack_depth);
+		save_restore_regs(jit, REGS_RESTORE, stack_depth);
 
 		/*
 		 * goto *(prog->bpf_func + tail_call_start);
@@ -1519,7 +1519,7 @@ static int bpf_set_addr(struct bpf_jit *jit, int i)
  * Compile eBPF program into s390x code
  */
 static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp,
-			bool extra_pass)
+			bool extra_pass, u32 stack_depth)
 {
 	int i, insn_count, lit32_size, lit64_size;
 
@@ -1527,18 +1527,18 @@ static int bpf_jit_prog(struct bpf_jit *jit, struct bpf_prog *fp,
 	jit->lit64 = jit->lit64_start;
 	jit->prg = 0;
 
-	bpf_jit_prologue(jit, fp->aux->stack_depth);
+	bpf_jit_prologue(jit, stack_depth);
 	if (bpf_set_addr(jit, 0) < 0)
 		return -1;
 	for (i = 0; i < fp->len; i += insn_count) {
-		insn_count = bpf_jit_insn(jit, fp, i, extra_pass);
+		insn_count = bpf_jit_insn(jit, fp, i, extra_pass, stack_depth);
 		if (insn_count < 0)
 			return -1;
 		/* Next instruction address */
 		if (bpf_set_addr(jit, i + insn_count) < 0)
 			return -1;
 	}
-	bpf_jit_epilogue(jit, fp->aux->stack_depth);
+	bpf_jit_epilogue(jit, stack_depth);
 
 	lit32_size = jit->lit32 - jit->lit32_start;
 	lit64_size = jit->lit64 - jit->lit64_start;
@@ -1569,6 +1569,7 @@ struct s390_jit_data {
  */
 struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 {
+	u32 stack_depth = round_up(fp->aux->stack_depth, 8);
 	struct bpf_prog *tmp, *orig_fp = fp;
 	struct bpf_binary_header *header;
 	struct s390_jit_data *jit_data;
@@ -1621,7 +1622,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 	 *   - 3:   Calculate program size and addrs arrray
 	 */
 	for (pass = 1; pass <= 3; pass++) {
-		if (bpf_jit_prog(&jit, fp, extra_pass)) {
+		if (bpf_jit_prog(&jit, fp, extra_pass, stack_depth)) {
 			fp = orig_fp;
 			goto free_addrs;
 		}
@@ -1635,7 +1636,7 @@ struct bpf_prog *bpf_int_jit_compile(struct bpf_prog *fp)
 		goto free_addrs;
 	}
 skip_init_ctx:
-	if (bpf_jit_prog(&jit, fp, extra_pass)) {
+	if (bpf_jit_prog(&jit, fp, extra_pass, stack_depth)) {
 		bpf_jit_binary_free(header);
 		fp = orig_fp;
 		goto free_addrs;
-- 
cgit v1.2.3-59-g8ed1b


From 33d21f18204cb33b43ca6c78c8180949f6dc7227 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Tue, 2 Jun 2020 19:45:55 +0200
Subject: s390/bpf: Use bcr 0,%0 as tail call nop filler

Currently used 0x0000 filler confuses bfd disassembler, making bpftool
prog dump xlated output nearly useless. Fix by using a real instruction.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200602174555.2501389-1-iii@linux.ibm.com
---
 arch/s390/net/bpf_jit_comp.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/arch/s390/net/bpf_jit_comp.c b/arch/s390/net/bpf_jit_comp.c
index 0f37a1b635f8..f4242b894cf2 100644
--- a/arch/s390/net/bpf_jit_comp.c
+++ b/arch/s390/net/bpf_jit_comp.c
@@ -503,7 +503,8 @@ static void bpf_jit_prologue(struct bpf_jit *jit, u32 stack_depth)
 	} else {
 		/* j tail_call_start: NOP if no tail calls are used */
 		EMIT4_PCREL(0xa7f40000, 6);
-		_EMIT2(0);
+		/* bcr 0,%0 */
+		EMIT2(0x0700, 0, REG_0);
 	}
 	/* Tail calls have to skip above initialization */
 	jit->tail_call_start = jit->prg;
-- 
cgit v1.2.3-59-g8ed1b


From 9bc499befeef07a4d79f4924bfca05634ad8fc97 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Tue, 2 Jun 2020 19:44:48 +0200
Subject: bpf, selftests: Use bpf_probe_read_kernel

Since commit 0ebeea8ca8a4 ("bpf: Restrict bpf_probe_read{, str}() only to
archs where they work") 44 verifier tests fail on s390 due to not having
bpf_probe_read anymore. Fix by using bpf_probe_read_kernel.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200602174448.2501214-1-iii@linux.ibm.com
---
 tools/testing/selftests/bpf/verifier/const_or.c    |  8 ++--
 .../selftests/bpf/verifier/helper_access_var_len.c | 44 +++++++++++-----------
 .../selftests/bpf/verifier/helper_value_access.c   | 36 +++++++++---------
 tools/testing/selftests/bpf/verifier/precise.c     |  8 ++--
 4 files changed, 48 insertions(+), 48 deletions(-)

diff --git a/tools/testing/selftests/bpf/verifier/const_or.c b/tools/testing/selftests/bpf/verifier/const_or.c
index 84446dfc7c1d..6c214c58e8d4 100644
--- a/tools/testing/selftests/bpf/verifier/const_or.c
+++ b/tools/testing/selftests/bpf/verifier/const_or.c
@@ -6,7 +6,7 @@
 	BPF_MOV64_IMM(BPF_REG_2, 34),
 	BPF_ALU64_IMM(BPF_OR, BPF_REG_2, 13),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.result = ACCEPT,
@@ -20,7 +20,7 @@
 	BPF_MOV64_IMM(BPF_REG_2, 34),
 	BPF_ALU64_IMM(BPF_OR, BPF_REG_2, 24),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.errstr = "invalid stack type R1 off=-48 access_size=58",
@@ -36,7 +36,7 @@
 	BPF_MOV64_IMM(BPF_REG_4, 13),
 	BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_4),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.result = ACCEPT,
@@ -51,7 +51,7 @@
 	BPF_MOV64_IMM(BPF_REG_4, 24),
 	BPF_ALU64_REG(BPF_OR, BPF_REG_2, BPF_REG_4),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.errstr = "invalid stack type R1 off=-48 access_size=58",
diff --git a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c
index 5a605ae131a9..87c4e7900083 100644
--- a/tools/testing/selftests/bpf/verifier/helper_access_var_len.c
+++ b/tools/testing/selftests/bpf/verifier/helper_access_var_len.c
@@ -19,7 +19,7 @@
 	BPF_MOV64_IMM(BPF_REG_4, 0),
 	BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
@@ -36,7 +36,7 @@
 	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
 	BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 64),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.errstr = "invalid indirect read from stack off -64+0 size 64",
@@ -55,7 +55,7 @@
 	BPF_MOV64_IMM(BPF_REG_4, 0),
 	BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
@@ -84,7 +84,7 @@
 	BPF_MOV64_IMM(BPF_REG_4, 0),
 	BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
@@ -112,7 +112,7 @@
 	BPF_MOV64_IMM(BPF_REG_4, 0),
 	BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
@@ -132,7 +132,7 @@
 	BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 3),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
@@ -152,7 +152,7 @@
 	BPF_MOV64_IMM(BPF_REG_4, 0),
 	BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
@@ -171,7 +171,7 @@
 	BPF_MOV64_IMM(BPF_REG_4, 0),
 	BPF_JMP_REG(BPF_JGE, BPF_REG_4, BPF_REG_2, 2),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
@@ -190,7 +190,7 @@
 	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
 	BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 64, 3),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
@@ -208,7 +208,7 @@
 	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_1, -128),
 	BPF_JMP_IMM(BPF_JSGT, BPF_REG_2, 64, 3),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
@@ -233,7 +233,7 @@
 	BPF_MOV64_IMM(BPF_REG_4, 0),
 	BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
@@ -259,7 +259,7 @@
 	BPF_MOV64_IMM(BPF_REG_4, 0),
 	BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
@@ -286,7 +286,7 @@
 	BPF_MOV64_IMM(BPF_REG_4, 0),
 	BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
@@ -313,7 +313,7 @@
 	BPF_MOV64_IMM(BPF_REG_4, 0),
 	BPF_JMP_REG(BPF_JSGE, BPF_REG_4, BPF_REG_2, 2),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_MOV64_IMM(BPF_REG_0, 0),
 	BPF_EXIT_INSN(),
 	},
@@ -468,7 +468,7 @@
 	BPF_MOV64_IMM(BPF_REG_1, 0),
 	BPF_MOV64_IMM(BPF_REG_2, 0),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.errstr = "R1 type=inv expected=fp",
@@ -481,7 +481,7 @@
 	BPF_MOV64_IMM(BPF_REG_1, 0),
 	BPF_MOV64_IMM(BPF_REG_2, 1),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.errstr = "R1 type=inv expected=fp",
@@ -495,7 +495,7 @@
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
 	BPF_MOV64_IMM(BPF_REG_2, 0),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.result = ACCEPT,
@@ -513,7 +513,7 @@
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
 	BPF_MOV64_IMM(BPF_REG_2, 0),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_8b = { 3 },
@@ -534,7 +534,7 @@
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_10),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_8b = { 3 },
@@ -554,7 +554,7 @@
 	BPF_LDX_MEM(BPF_DW, BPF_REG_2, BPF_REG_0, 0),
 	BPF_JMP_IMM(BPF_JGT, BPF_REG_2, 8, 2),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_8b = { 3 },
@@ -580,7 +580,7 @@
 	BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 63),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 1),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
 	BPF_EXIT_INSN(),
 	},
@@ -607,7 +607,7 @@
 	BPF_ALU64_IMM(BPF_AND, BPF_REG_2, 32),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_2, 32),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_LDX_MEM(BPF_DW, BPF_REG_1, BPF_REG_10, -16),
 	BPF_EXIT_INSN(),
 	},
diff --git a/tools/testing/selftests/bpf/verifier/helper_value_access.c b/tools/testing/selftests/bpf/verifier/helper_value_access.c
index 961f28139b96..1c7882ddfa63 100644
--- a/tools/testing/selftests/bpf/verifier/helper_value_access.c
+++ b/tools/testing/selftests/bpf/verifier/helper_value_access.c
@@ -10,7 +10,7 @@
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
 	BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val)),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 3 },
@@ -29,7 +29,7 @@
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
 	BPF_MOV64_IMM(BPF_REG_2, 8),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 3 },
@@ -67,7 +67,7 @@
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
 	BPF_MOV64_IMM(BPF_REG_2, sizeof(struct test_val) + 8),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 3 },
@@ -87,7 +87,7 @@
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_0),
 	BPF_MOV64_IMM(BPF_REG_2, -8),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 3 },
@@ -109,7 +109,7 @@
 	BPF_MOV64_IMM(BPF_REG_2,
 		      sizeof(struct test_val) -	offsetof(struct test_val, foo)),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 3 },
@@ -129,7 +129,7 @@
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct test_val, foo)),
 	BPF_MOV64_IMM(BPF_REG_2, 8),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 3 },
@@ -170,7 +170,7 @@
 	BPF_MOV64_IMM(BPF_REG_2,
 		      sizeof(struct test_val) - offsetof(struct test_val, foo) + 8),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 3 },
@@ -191,7 +191,7 @@
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct test_val, foo)),
 	BPF_MOV64_IMM(BPF_REG_2, -8),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 3 },
@@ -212,7 +212,7 @@
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, offsetof(struct test_val, foo)),
 	BPF_MOV64_IMM(BPF_REG_2, -1),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 3 },
@@ -235,7 +235,7 @@
 	BPF_MOV64_IMM(BPF_REG_2,
 		      sizeof(struct test_val) - offsetof(struct test_val, foo)),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 3 },
@@ -256,7 +256,7 @@
 	BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
 	BPF_MOV64_IMM(BPF_REG_2, 8),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 3 },
@@ -300,7 +300,7 @@
 		      sizeof(struct test_val) -
 		      offsetof(struct test_val, foo) + 8),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 3 },
@@ -322,7 +322,7 @@
 	BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
 	BPF_MOV64_IMM(BPF_REG_2, -8),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 3 },
@@ -344,7 +344,7 @@
 	BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
 	BPF_MOV64_IMM(BPF_REG_2, -1),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 3 },
@@ -368,7 +368,7 @@
 	BPF_MOV64_IMM(BPF_REG_2,
 		      sizeof(struct test_val) - offsetof(struct test_val, foo)),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 3 },
@@ -390,7 +390,7 @@
 	BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
 	BPF_MOV64_IMM(BPF_REG_2, 8),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 3 },
@@ -433,7 +433,7 @@
 	BPF_ALU64_REG(BPF_ADD, BPF_REG_1, BPF_REG_3),
 	BPF_MOV64_IMM(BPF_REG_2, 1),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 3 },
@@ -458,7 +458,7 @@
 		      sizeof(struct test_val) -
 		      offsetof(struct test_val, foo) + 1),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.fixup_map_hash_48b = { 3 },
diff --git a/tools/testing/selftests/bpf/verifier/precise.c b/tools/testing/selftests/bpf/verifier/precise.c
index 02151f8c940f..6dc8003ffc70 100644
--- a/tools/testing/selftests/bpf/verifier/precise.c
+++ b/tools/testing/selftests/bpf/verifier/precise.c
@@ -31,14 +31,14 @@
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_FP),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
 	.fixup_map_array_48b = { 1 },
 	.result = VERBOSE_ACCEPT,
 	.errstr =
-	"26: (85) call bpf_probe_read#4\
+	"26: (85) call bpf_probe_read_kernel#113\
 	last_idx 26 first_idx 20\
 	regs=4 stack=0 before 25\
 	regs=4 stack=0 before 24\
@@ -91,7 +91,7 @@
 	BPF_MOV64_REG(BPF_REG_1, BPF_REG_FP),
 	BPF_ALU64_IMM(BPF_ADD, BPF_REG_1, -8),
 	BPF_MOV64_IMM(BPF_REG_3, 0),
-	BPF_EMIT_CALL(BPF_FUNC_probe_read),
+	BPF_EMIT_CALL(BPF_FUNC_probe_read_kernel),
 	BPF_EXIT_INSN(),
 	},
 	.prog_type = BPF_PROG_TYPE_TRACEPOINT,
@@ -99,7 +99,7 @@
 	.result = VERBOSE_ACCEPT,
 	.flags = BPF_F_TEST_STATE_FREQ,
 	.errstr =
-	"26: (85) call bpf_probe_read#4\
+	"26: (85) call bpf_probe_read_kernel#113\
 	last_idx 26 first_idx 22\
 	regs=4 stack=0 before 25\
 	regs=4 stack=0 before 24\
-- 
cgit v1.2.3-59-g8ed1b


From d70a6be1e2ab98f13688e4a529b326e8e11230d0 Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Tue, 2 Jun 2020 19:56:48 +0200
Subject: tools/bpf: Don't use $(COMPILE.c)

When using make kselftest TARGETS=bpf, tools/bpf is built with
MAKEFLAGS=rR, which causes $(COMPILE.c) to be undefined, which in turn
causes the build to fail with

  CC       kselftest/bpf/tools/build/bpftool/map_perf_ring.o
/bin/sh: 1: -MMD: not found

Fix by using $(CC) $(CFLAGS) -c instead of $(COMPILE.c).

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200602175649.2501580-2-iii@linux.ibm.com
---
 tools/bpf/Makefile         | 6 +++---
 tools/bpf/bpftool/Makefile | 8 ++++----
 2 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/tools/bpf/Makefile b/tools/bpf/Makefile
index f897eeeb0b4f..77472e28c8fd 100644
--- a/tools/bpf/Makefile
+++ b/tools/bpf/Makefile
@@ -64,12 +64,12 @@ $(OUTPUT)%.lex.c: $(srctree)/tools/bpf/%.l
 	$(QUIET_FLEX)$(LEX) -o $@ $<
 
 $(OUTPUT)%.o: $(srctree)/tools/bpf/%.c
-	$(QUIET_CC)$(COMPILE.c) -o $@ $<
+	$(QUIET_CC)$(CC) $(CFLAGS) -c -o $@ $<
 
 $(OUTPUT)%.yacc.o: $(OUTPUT)%.yacc.c
-	$(QUIET_CC)$(COMPILE.c) -o $@ $<
+	$(QUIET_CC)$(CC) $(CFLAGS) -c -o $@ $<
 $(OUTPUT)%.lex.o: $(OUTPUT)%.lex.c
-	$(QUIET_CC)$(COMPILE.c) -o $@ $<
+	$(QUIET_CC)$(CC) $(CFLAGS) -c -o $@ $<
 
 PROGS = $(OUTPUT)bpf_jit_disasm $(OUTPUT)bpf_dbg $(OUTPUT)bpf_asm
 
diff --git a/tools/bpf/bpftool/Makefile b/tools/bpf/bpftool/Makefile
index 2759f9cc3289..9e85f101be85 100644
--- a/tools/bpf/bpftool/Makefile
+++ b/tools/bpf/bpftool/Makefile
@@ -126,7 +126,7 @@ else
 endif
 
 $(OUTPUT)_prog.o: prog.c
-	$(QUIET_CC)$(COMPILE.c) -MMD -DBPFTOOL_WITHOUT_SKELETONS -o $@ $<
+	$(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -DBPFTOOL_WITHOUT_SKELETONS -o $@ $<
 
 $(OUTPUT)_bpftool: $(_OBJS) $(LIBBPF)
 	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(_OBJS) $(LIBS)
@@ -141,10 +141,10 @@ profiler.skel.h: $(OUTPUT)_bpftool skeleton/profiler.bpf.o
 	$(QUIET_GEN)$(OUTPUT)./_bpftool gen skeleton skeleton/profiler.bpf.o > $@
 
 $(OUTPUT)prog.o: prog.c profiler.skel.h
-	$(QUIET_CC)$(COMPILE.c) -MMD -o $@ $<
+	$(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $<
 
 $(OUTPUT)disasm.o: $(srctree)/kernel/bpf/disasm.c
-	$(QUIET_CC)$(COMPILE.c) -MMD -o $@ $<
+	$(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $<
 
 $(OUTPUT)feature.o: | zdep
 
@@ -152,7 +152,7 @@ $(OUTPUT)bpftool: $(__OBJS) $(LIBBPF)
 	$(QUIET_LINK)$(CC) $(CFLAGS) $(LDFLAGS) -o $@ $(__OBJS) $(LIBS)
 
 $(OUTPUT)%.o: %.c
-	$(QUIET_CC)$(COMPILE.c) -MMD -o $@ $<
+	$(QUIET_CC)$(CC) $(CFLAGS) -c -MMD -o $@ $<
 
 clean: $(LIBBPF)-clean
 	$(call QUIET_CLEAN, bpftool)
-- 
cgit v1.2.3-59-g8ed1b


From e7ad28e6fdbffa2b9b1bd376431fb81a5403bcfd Mon Sep 17 00:00:00 2001
From: Ilya Leoshkevich <iii@linux.ibm.com>
Date: Tue, 2 Jun 2020 19:56:49 +0200
Subject: selftests/bpf: Add a default $(CXX) value

When using make kselftest TARGETS=bpf, tools/bpf is built with
MAKEFLAGS=rR, which causes $(CXX) to be undefined, which in turn causes
the build to fail with

  CXX      test_cpp
/bin/sh: 2: g: not found

Fix by adding a default $(CXX) value, like tools/build/feature/Makefile
already does.

Signed-off-by: Ilya Leoshkevich <iii@linux.ibm.com>
Signed-off-by: Daniel Borkmann <daniel@iogearbox.net>
Link: https://lore.kernel.org/bpf/20200602175649.2501580-3-iii@linux.ibm.com
---
 tools/testing/selftests/bpf/Makefile | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/tools/testing/selftests/bpf/Makefile b/tools/testing/selftests/bpf/Makefile
index 3ce548eff8a8..22aaec74ea0a 100644
--- a/tools/testing/selftests/bpf/Makefile
+++ b/tools/testing/selftests/bpf/Makefile
@@ -2,6 +2,8 @@
 include ../../../../scripts/Kbuild.include
 include ../../../scripts/Makefile.arch
 
+CXX ?= $(CROSS_COMPILE)g++
+
 CURDIR := $(abspath .)
 TOOLSDIR := $(abspath ../../..)
 LIBDIR := $(TOOLSDIR)/lib
-- 
cgit v1.2.3-59-g8ed1b


From 2eed5a8b614bc0197b29da7b21a78d2c564a7098 Mon Sep 17 00:00:00 2001
From: Luo bin <luobin9@huawei.com>
Date: Tue, 2 Jun 2020 08:40:32 +0800
Subject: hinic: add set_channels ethtool_ops support

add support to change TX/RX queue number with "ethtool -L combined".

V5 -> V6: remove check for carrier in hinic_xmit_frame
V4 -> V5: change time zone in patch header
V3 -> V4: update date in patch header
V2 -> V3: remove check for zero channels->combined_count
V1 -> V2: update commit message("ethtool -L" to "ethtool -L combined")
V0 -> V1: remove check for channels->tx_count/rx_count/other_count

Signed-off-by: Luo bin <luobin9@huawei.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/ethernet/huawei/hinic/hinic_ethtool.c | 40 ++++++++++++++++++-----
 drivers/net/ethernet/huawei/hinic/hinic_main.c    |  2 +-
 2 files changed, 33 insertions(+), 9 deletions(-)

diff --git a/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c b/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c
index ace18d258049..efb02e03e7da 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_ethtool.c
@@ -619,14 +619,37 @@ static void hinic_get_channels(struct net_device *netdev,
 	struct hinic_dev *nic_dev = netdev_priv(netdev);
 	struct hinic_hwdev *hwdev = nic_dev->hwdev;
 
-	channels->max_rx = hwdev->nic_cap.max_qps;
-	channels->max_tx = hwdev->nic_cap.max_qps;
-	channels->max_other = 0;
-	channels->max_combined = 0;
-	channels->rx_count = hinic_hwdev_num_qps(hwdev);
-	channels->tx_count = hinic_hwdev_num_qps(hwdev);
-	channels->other_count = 0;
-	channels->combined_count = 0;
+	channels->max_combined = nic_dev->max_qps;
+	channels->combined_count = hinic_hwdev_num_qps(hwdev);
+}
+
+static int hinic_set_channels(struct net_device *netdev,
+			      struct ethtool_channels *channels)
+{
+	struct hinic_dev *nic_dev = netdev_priv(netdev);
+	unsigned int count = channels->combined_count;
+	int err;
+
+	netif_info(nic_dev, drv, netdev, "Set max combined queue number from %d to %d\n",
+		   hinic_hwdev_num_qps(nic_dev->hwdev), count);
+
+	if (netif_running(netdev)) {
+		netif_info(nic_dev, drv, netdev, "Restarting netdev\n");
+		hinic_close(netdev);
+
+		nic_dev->hwdev->nic_cap.num_qps = count;
+
+		err = hinic_open(netdev);
+		if (err) {
+			netif_err(nic_dev, drv, netdev,
+				  "Failed to open netdev\n");
+			return -EFAULT;
+		}
+	} else {
+		nic_dev->hwdev->nic_cap.num_qps = count;
+	}
+
+	return 0;
 }
 
 static int hinic_get_rss_hash_opts(struct hinic_dev *nic_dev,
@@ -1219,6 +1242,7 @@ static const struct ethtool_ops hinic_ethtool_ops = {
 	.get_ringparam = hinic_get_ringparam,
 	.set_ringparam = hinic_set_ringparam,
 	.get_channels = hinic_get_channels,
+	.set_channels = hinic_set_channels,
 	.get_rxnfc = hinic_get_rxnfc,
 	.set_rxnfc = hinic_set_rxnfc,
 	.get_rxfh_key_size = hinic_get_rxfh_key_size,
diff --git a/drivers/net/ethernet/huawei/hinic/hinic_main.c b/drivers/net/ethernet/huawei/hinic/hinic_main.c
index c8ab129a7ae8..e9e6f4c9309a 100644
--- a/drivers/net/ethernet/huawei/hinic/hinic_main.c
+++ b/drivers/net/ethernet/huawei/hinic/hinic_main.c
@@ -326,7 +326,6 @@ static void hinic_enable_rss(struct hinic_dev *nic_dev)
 	int i, node, err = 0;
 	u16 num_cpus = 0;
 
-	nic_dev->max_qps = hinic_hwdev_max_num_qps(hwdev);
 	if (nic_dev->max_qps <= 1) {
 		nic_dev->flags &= ~HINIC_RSS_ENABLE;
 		nic_dev->rss_limit = nic_dev->max_qps;
@@ -1031,6 +1030,7 @@ static int nic_dev_init(struct pci_dev *pdev)
 	nic_dev->rq_depth = HINIC_RQ_DEPTH;
 	nic_dev->sriov_info.hwdev = hwdev;
 	nic_dev->sriov_info.pdev = pdev;
+	nic_dev->max_qps = num_qps;
 
 	sema_init(&nic_dev->mgmt_lock, 1);
 
-- 
cgit v1.2.3-59-g8ed1b


From 11e877b2a8cfd282a1b81f9d4c594b900889a5d8 Mon Sep 17 00:00:00 2001
From: Ronak Doshi <doshir@vmware.com>
Date: Mon, 1 Jun 2020 20:02:39 -0700
Subject: vmxnet3: allow rx flow hash ops only when rss is enabled

It makes sense to allow changes to get/set rx flow hash callback only
when rss is enabled. This patch restricts get_rss_hash_opts and
set_rss_hash_opts methods to allow querying and configuring different
Rx flow hash configurations only when rss is enabled

Signed-off-by: Ronak Doshi <doshir@vmware.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/vmxnet3/vmxnet3_ethtool.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/net/vmxnet3/vmxnet3_ethtool.c b/drivers/net/vmxnet3/vmxnet3_ethtool.c
index 6acaafe169de..def27afa1c69 100644
--- a/drivers/net/vmxnet3/vmxnet3_ethtool.c
+++ b/drivers/net/vmxnet3/vmxnet3_ethtool.c
@@ -899,6 +899,12 @@ vmxnet3_get_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *info,
 			err = -EOPNOTSUPP;
 			break;
 		}
+#ifdef VMXNET3_RSS
+		if (!adapter->rss) {
+			err = -EOPNOTSUPP;
+			break;
+		}
+#endif
 		err = vmxnet3_get_rss_hash_opts(adapter, info);
 		break;
 	default:
@@ -919,6 +925,12 @@ vmxnet3_set_rxnfc(struct net_device *netdev, struct ethtool_rxnfc *info)
 		err = -EOPNOTSUPP;
 		goto done;
 	}
+#ifdef VMXNET3_RSS
+	if (!adapter->rss) {
+		err = -EOPNOTSUPP;
+		goto done;
+	}
+#endif
 
 	switch (info->cmd) {
 	case ETHTOOL_SRXFH:
-- 
cgit v1.2.3-59-g8ed1b


From 049fa17f7ae6b0971ad41b761479962facafea4b Mon Sep 17 00:00:00 2001
From: Tuong Lien <tuong.t.lien@dektech.com.au>
Date: Tue, 2 Jun 2020 11:46:40 +0700
Subject: Revert "tipc: Fix potential tipc_node refcnt leak in tipc_rcv"

This reverts commit de058420767df21e2b6b0f3bb36d1616fb962032.

There is no actual tipc_node refcnt leak as stated in the above commit.
The refcnt is hold carefully for the case of an asynchronous decryption
(i.e. -EINPROGRESS/-EBUSY and skb = NULL is returned), so that the node
object cannot be freed in the meantime. The counter will be re-balanced
when the operation's callback arrives with the decrypted buffer if any.
In other cases, e.g. a synchronous crypto the counter will be decreased
immediately when it is done.

Now with that commit, a kernel panic will occur when there is no node
found (i.e. n = NULL) in the 'tipc_rcv()' or a premature release of the
node object.

This commit solves the issues by reverting the said commit, but keeping
one valid case that the 'skb_linearize()' is failed.

Acked-by: Jon Maloy <jmaloy@redhat.com>
Signed-off-by: Tuong Lien <tuong.t.lien@dektech.com.au>
Tested-by: Hoang Le <hoang.h.le@dektech.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/node.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/tipc/node.c b/net/tipc/node.c
index 0312fb181d94..a4c2816c3746 100644
--- a/net/tipc/node.c
+++ b/net/tipc/node.c
@@ -2038,7 +2038,6 @@ void tipc_rcv(struct net *net, struct sk_buff *skb, struct tipc_bearer *b)
 		n = tipc_node_find_by_id(net, ehdr->id);
 	}
 	tipc_crypto_rcv(net, (n) ? n->crypto_rx : NULL, &skb, b);
-	tipc_node_put(n);
 	if (!skb)
 		return;
 
-- 
cgit v1.2.3-59-g8ed1b


From a275727b1899d14d33d6c8c70f303b73f01cdbc6 Mon Sep 17 00:00:00 2001
From: Tuong Lien <tuong.t.lien@dektech.com.au>
Date: Tue, 2 Jun 2020 11:46:41 +0700
Subject: Revert "tipc: Fix potential tipc_aead refcnt leak in tipc_crypto_rcv"

This reverts commit 441870ee4240cf67b5d3ab8e16216a9ff42eb5d6.

Like the previous patch in this series, we revert the above commit that
causes similar issues with the 'aead' object.

Acked-by: Jon Maloy <jmaloy@redhat.com>
Signed-off-by: Tuong Lien <tuong.t.lien@dektech.com.au>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 net/tipc/crypto.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/net/tipc/crypto.c b/net/tipc/crypto.c
index 8c47ded2edb6..c8c47fc72653 100644
--- a/net/tipc/crypto.c
+++ b/net/tipc/crypto.c
@@ -1712,7 +1712,6 @@ exit:
 	case -EBUSY:
 		this_cpu_inc(stats->stat[STAT_ASYNC]);
 		*skb = NULL;
-		tipc_aead_put(aead);
 		return rc;
 	default:
 		this_cpu_inc(stats->stat[STAT_NOK]);
-- 
cgit v1.2.3-59-g8ed1b


From e8224bfe77293494626f6eec1884fee7b87d0ced Mon Sep 17 00:00:00 2001
From: Vasily Averin <vvs@virtuozzo.com>
Date: Tue, 2 Jun 2020 15:55:26 +0300
Subject: net_failover: fixed rollback in net_failover_open()

found by smatch:
drivers/net/net_failover.c:65 net_failover_open() error:
 we previously assumed 'primary_dev' could be null (see line 43)

Fixes: cfc80d9a1163 ("net: Introduce net_failover driver")
Signed-off-by: Vasily Averin <vvs@virtuozzo.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 drivers/net/net_failover.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/drivers/net/net_failover.c b/drivers/net/net_failover.c
index b16a1221d19b..fb182bec8f06 100644
--- a/drivers/net/net_failover.c
+++ b/drivers/net/net_failover.c
@@ -61,7 +61,8 @@ static int net_failover_open(struct net_device *dev)
 	return 0;
 
 err_standby_open:
-	dev_close(primary_dev);
+	if (primary_dev)
+		dev_close(primary_dev);
 err_primary_open:
 	netif_tx_disable(dev);
 	return err;
-- 
cgit v1.2.3-59-g8ed1b


From 065fcfd49763ec71ae345bb5c5a74f961031e70e Mon Sep 17 00:00:00 2001
From: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
Date: Tue, 2 Jun 2020 15:38:37 -0300
Subject: selftests: net: ip_defrag: ignore EPERM

When running with conntrack rules, the dropped overlap fragments may cause
EPERM to be returned to sendto. Instead of completely failing, just ignore
those errors and continue. If this causes packets with overlap fragments to
be dropped as expected, that is okay. And if it causes packets that are
expected to be received to be dropped, which should not happen, it will be
detected as failure.

Signed-off-by: Thadeu Lima de Souza Cascardo <cascardo@canonical.com>
Signed-off-by: David S. Miller <davem@davemloft.net>
---
 tools/testing/selftests/net/ip_defrag.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/tools/testing/selftests/net/ip_defrag.c b/tools/testing/selftests/net/ip_defrag.c
index c0c9ecb891e1..f9ed749fd8c7 100644
--- a/tools/testing/selftests/net/ip_defrag.c
+++ b/tools/testing/selftests/net/ip_defrag.c
@@ -192,9 +192,9 @@ static void send_fragment(int fd_raw, struct sockaddr *addr, socklen_t alen,
 	}
 
 	res = sendto(fd_raw, ip_frame, frag_len, 0, addr, alen);
-	if (res < 0)
+	if (res < 0 && errno != EPERM)
 		error(1, errno, "send_fragment");
-	if (res != frag_len)
+	if (res >= 0 && res != frag_len)
 		error(1, 0, "send_fragment: %d vs %d", res, frag_len);
 
 	frag_counter++;
@@ -313,9 +313,9 @@ static void send_udp_frags(int fd_raw, struct sockaddr *addr,
 			iphdr->ip_len = htons(frag_len);
 		}
 		res = sendto(fd_raw, ip_frame, frag_len, 0, addr, alen);
-		if (res < 0)
+		if (res < 0 && errno != EPERM)
 			error(1, errno, "sendto overlap: %d", frag_len);
-		if (res != frag_len)
+		if (res >= 0 && res != frag_len)
 			error(1, 0, "sendto overlap: %d vs %d", (int)res, frag_len);
 		frag_counter++;
 	}
-- 
cgit v1.2.3-59-g8ed1b